-> import numpy as np
(Pdb) num examples: 111000
loss: 5.164388179779053,grad_norm: 0.9999998973538893, iteration: 1
loss: 4.9893364906311035,grad_norm: 0.9999997722661338, iteration: 2
loss: 4.9315924644470215,grad_norm: 0.999999834296327, iteration: 3
loss: 4.929351806640625,grad_norm: 0.9999999560927203, iteration: 4
loss: 5.019329071044922,grad_norm: 0.9999998891526373, iteration: 5
loss: 4.961483478546143,grad_norm: 0.9999998799332848, iteration: 6
loss: 4.894497394561768,grad_norm: 0.9999999602350926, iteration: 7
loss: 4.921554088592529,grad_norm: 0.9999999269102631, iteration: 8
loss: 5.016721725463867,grad_norm: 0.9999999252931565, iteration: 9
loss: 5.0564069747924805,grad_norm: 0.9999998181181688, iteration: 10
loss: 5.067516326904297,grad_norm: 0.9999998554843667, iteration: 11
loss: 4.9750657081604,grad_norm: 0.9999998851466795, iteration: 12
loss: 4.882284164428711,grad_norm: 0.9999998716764654, iteration: 13
loss: 4.862491130828857,grad_norm: 0.9999998950207808, iteration: 14
loss: 4.984943389892578,grad_norm: 0.9999999185000479, iteration: 15
loss: 4.951695919036865,grad_norm: 0.9999998974323463, iteration: 16
loss: 4.892328262329102,grad_norm: 0.9999999391743735, iteration: 17
loss: 4.949143886566162,grad_norm: 0.9999999739046165, iteration: 18
loss: 5.237498760223389,grad_norm: 0.999999953630998, iteration: 19
loss: 4.9419450759887695,grad_norm: 0.9999999119137437, iteration: 20
loss: 5.137335300445557,grad_norm: 0.9999999335195277, iteration: 21
loss: 5.137580871582031,grad_norm: 0.999999879482164, iteration: 22
loss: 4.887294292449951,grad_norm: 0.9999998951520763, iteration: 23
loss: 4.892258644104004,grad_norm: 0.9999999167872167, iteration: 24
loss: 4.961796760559082,grad_norm: 0.9999998525213494, iteration: 25
loss: 4.847148418426514,grad_norm: 0.9999998779861902, iteration: 26
loss: 5.000661849975586,grad_norm: 0.9999998815304645, iteration: 27
loss: 4.94817590713501,grad_norm: 0.999999908641617, iteration: 28
loss: 4.996826648712158,grad_norm: 0.9999998356291092, iteration: 29
loss: 4.994977951049805,grad_norm: 0.9999999403108543, iteration: 30
loss: 5.003725051879883,grad_norm: 0.9999998246643794, iteration: 31
loss: 4.96627140045166,grad_norm: 0.9999998322706543, iteration: 32
loss: 5.0240797996521,grad_norm: 0.999999965153483, iteration: 33
loss: 4.780527591705322,grad_norm: 0.9999998448563784, iteration: 34
loss: 4.984661102294922,grad_norm: 0.9999998334791328, iteration: 35
loss: 4.85759973526001,grad_norm: 0.9999998095938833, iteration: 36
loss: 4.909484386444092,grad_norm: 0.9999998765013567, iteration: 37
loss: 4.899479389190674,grad_norm: 0.9999999567831552, iteration: 38
loss: 4.949498653411865,grad_norm: 0.9999999249751466, iteration: 39
loss: 4.839416980743408,grad_norm: 0.999999931809456, iteration: 40
loss: 4.859468460083008,grad_norm: 0.9999998098098862, iteration: 41
loss: 4.741517066955566,grad_norm: 0.9999998446272671, iteration: 42
loss: 4.813564777374268,grad_norm: 0.9999998848877267, iteration: 43
loss: 4.949861526489258,grad_norm: 0.9999999001193177, iteration: 44
loss: 4.86319637298584,grad_norm: 0.9999999413384506, iteration: 45
loss: 4.773381233215332,grad_norm: 0.9999998530308472, iteration: 46
loss: 4.821852207183838,grad_norm: 0.9999998667070252, iteration: 47
loss: 4.664005279541016,grad_norm: 0.9999998705959741, iteration: 48
loss: 4.823391437530518,grad_norm: 0.9999998578828555, iteration: 49
loss: 4.772298812866211,grad_norm: 0.9999999499650091, iteration: 50
loss: 4.768184661865234,grad_norm: 0.9999999185559525, iteration: 51
loss: 4.615085124969482,grad_norm: 0.9999998791938762, iteration: 52
loss: 4.765554428100586,grad_norm: 0.999999941044244, iteration: 53
loss: 4.987277984619141,grad_norm: 0.9999998620431166, iteration: 54
loss: 4.83219575881958,grad_norm: 0.9999998456379621, iteration: 55
loss: 4.5882039070129395,grad_norm: 0.9999999370618666, iteration: 56
loss: 4.788951873779297,grad_norm: 0.999999944319565, iteration: 57
loss: 4.738214015960693,grad_norm: 0.9999999501873132, iteration: 58
loss: 4.634448051452637,grad_norm: 0.9999999592902241, iteration: 59
loss: 4.681836128234863,grad_norm: 0.9999998473191438, iteration: 60
loss: 4.555108547210693,grad_norm: 1.0000000354485976, iteration: 61
loss: 4.67002010345459,grad_norm: 0.9999998583122532, iteration: 62
loss: 4.829329490661621,grad_norm: 0.9999999285242253, iteration: 63
loss: 4.506229877471924,grad_norm: 0.9999999174786256, iteration: 64
loss: 4.725187301635742,grad_norm: 0.9999998847273903, iteration: 65
loss: 4.5942301750183105,grad_norm: 0.9999999334771732, iteration: 66
loss: 4.5236592292785645,grad_norm: 0.9999999639733663, iteration: 67
loss: 4.834641933441162,grad_norm: 0.9999999146459125, iteration: 68
loss: 4.617599964141846,grad_norm: 0.9999999442071564, iteration: 69
loss: 4.516733646392822,grad_norm: 0.9999999417073786, iteration: 70
loss: 4.638228416442871,grad_norm: 0.9999999483926698, iteration: 71
loss: 4.772814750671387,grad_norm: 0.9999999501226655, iteration: 72
loss: 4.568609237670898,grad_norm: 0.9999999516080066, iteration: 73
loss: 4.624960422515869,grad_norm: 0.9999999759667367, iteration: 74
loss: 4.533079624176025,grad_norm: 0.9999999637772197, iteration: 75
loss: 4.540821552276611,grad_norm: 0.999999956200128, iteration: 76
loss: 4.6075029373168945,grad_norm: 0.9999999503430625, iteration: 77
loss: 4.6039204597473145,grad_norm: 0.9999999910285943, iteration: 78
loss: 4.565996170043945,grad_norm: 0.9999999190984074, iteration: 79
loss: 4.585468292236328,grad_norm: 0.999999877360184, iteration: 80
loss: 4.519042015075684,grad_norm: 0.9999999761065931, iteration: 81
loss: 4.472079753875732,grad_norm: 0.999999915149945, iteration: 82
loss: 4.551773548126221,grad_norm: 0.9999999714871467, iteration: 83
loss: 4.621200084686279,grad_norm: 0.9999999480711091, iteration: 84
loss: 4.5394368171691895,grad_norm: 0.9999999264784536, iteration: 85
loss: 4.562015533447266,grad_norm: 0.9999998741710997, iteration: 86
loss: 4.506621837615967,grad_norm: 0.9999998998525633, iteration: 87
loss: 4.460455894470215,grad_norm: 0.9999999782982039, iteration: 88
loss: 4.495095729827881,grad_norm: 0.9999999367234577, iteration: 89
loss: 4.44947624206543,grad_norm: 0.9999999508392547, iteration: 90
loss: 4.427917957305908,grad_norm: 0.9999998877476401, iteration: 91
loss: 4.38228702545166,grad_norm: 1.0000000018501958, iteration: 92
loss: 4.469217777252197,grad_norm: 0.9999999317959354, iteration: 93
loss: 4.5239577293396,grad_norm: 0.9999999372592252, iteration: 94
loss: 4.336484432220459,grad_norm: 0.999999842110445, iteration: 95
loss: 4.492918014526367,grad_norm: 0.9999999074605039, iteration: 96
loss: 4.575221061706543,grad_norm: 0.9999998889503159, iteration: 97
loss: 4.479445934295654,grad_norm: 0.9999999692805229, iteration: 98
loss: 4.456832408905029,grad_norm: 0.9999999375067045, iteration: 99
loss: 4.370221138000488,grad_norm: 0.9999999302177912, iteration: 100
loss: 4.369932174682617,grad_norm: 0.9999999086639323, iteration: 101
loss: 4.335369110107422,grad_norm: 0.9999998844350563, iteration: 102
loss: 4.396744728088379,grad_norm: 0.9999999080802321, iteration: 103
loss: 4.506707668304443,grad_norm: 0.9999998902483769, iteration: 104
loss: 4.490350723266602,grad_norm: 0.9999999220426481, iteration: 105
loss: 4.3369059562683105,grad_norm: 1.000000004475053, iteration: 106
loss: 4.366447925567627,grad_norm: 0.999999841795606, iteration: 107
loss: 4.209244251251221,grad_norm: 1.000000019848301, iteration: 108
loss: 4.337088108062744,grad_norm: 0.9999998813827753, iteration: 109
loss: 4.37415075302124,grad_norm: 0.9999998940970986, iteration: 110
loss: 4.424482822418213,grad_norm: 0.9999999496794546, iteration: 111
loss: 4.314199447631836,grad_norm: 1.0000000188640386, iteration: 112
loss: 4.289524555206299,grad_norm: 0.9999999812777466, iteration: 113
loss: 4.487142086029053,grad_norm: 0.9999998958072106, iteration: 114
loss: 4.348061561584473,grad_norm: 0.9999999782034775, iteration: 115
loss: 4.34737491607666,grad_norm: 0.9999998446556905, iteration: 116
loss: 4.33046817779541,grad_norm: 0.9999999478007036, iteration: 117
loss: 4.276296138763428,grad_norm: 0.9999999175808328, iteration: 118
loss: 4.326846599578857,grad_norm: 0.9999999244190659, iteration: 119
loss: 4.407360553741455,grad_norm: 0.9999998753783962, iteration: 120
loss: 4.388187408447266,grad_norm: 0.9999999586681421, iteration: 121
loss: 4.323024749755859,grad_norm: 0.9999998707389691, iteration: 122
loss: 4.347353458404541,grad_norm: 0.9999998675944567, iteration: 123
loss: 4.285187244415283,grad_norm: 0.9999999486429725, iteration: 124
loss: 4.181972503662109,grad_norm: 0.9999999227699242, iteration: 125
loss: 4.287736415863037,grad_norm: 0.9999999453323263, iteration: 126
loss: 4.369395732879639,grad_norm: 0.9999999253101702, iteration: 127
loss: 4.302271366119385,grad_norm: 0.999999956626836, iteration: 128
loss: 4.278347969055176,grad_norm: 0.9999998886884768, iteration: 129
loss: 4.368886470794678,grad_norm: 0.9999999477659494, iteration: 130
loss: 4.368780612945557,grad_norm: 0.9999999053256419, iteration: 131
loss: 4.2700934410095215,grad_norm: 0.9999999729510849, iteration: 132
loss: 4.284014701843262,grad_norm: 0.9999998781424156, iteration: 133
loss: 4.348945140838623,grad_norm: 0.9999999259099268, iteration: 134
loss: 4.186235427856445,grad_norm: 0.9999999213041374, iteration: 135
loss: 4.255561351776123,grad_norm: 0.9999998777943792, iteration: 136
loss: 4.254049777984619,grad_norm: 0.9999999234367469, iteration: 137
loss: 4.233481407165527,grad_norm: 0.9999999217942471, iteration: 138
loss: 4.432783603668213,grad_norm: 0.9999999275692826, iteration: 139
loss: 4.258418560028076,grad_norm: 0.9999999485594321, iteration: 140
loss: 4.26560640335083,grad_norm: 0.9999999127393405, iteration: 141
loss: 4.317566394805908,grad_norm: 0.999999967782243, iteration: 142
loss: 4.324817180633545,grad_norm: 0.9999999595398203, iteration: 143
loss: 4.225551605224609,grad_norm: 0.999999976252441, iteration: 144
loss: 4.277548313140869,grad_norm: 0.9999999378574417, iteration: 145
loss: 4.192315578460693,grad_norm: 0.999999938169153, iteration: 146
loss: 4.279254913330078,grad_norm: 0.9999999379857084, iteration: 147
loss: 4.121175289154053,grad_norm: 0.9999998431407057, iteration: 148
loss: 4.220109939575195,grad_norm: 0.9999999592556984, iteration: 149
loss: 4.156722068786621,grad_norm: 0.9999998918128401, iteration: 150
loss: 4.252575874328613,grad_norm: 0.999999916035164, iteration: 151
loss: 4.269347667694092,grad_norm: 0.9999999458839585, iteration: 152
loss: 4.280887126922607,grad_norm: 0.9999998480320091, iteration: 153
loss: 4.295499324798584,grad_norm: 0.9999999443965842, iteration: 154
loss: 4.200572967529297,grad_norm: 0.9999998820325057, iteration: 155
loss: 4.121663570404053,grad_norm: 0.9999999538073561, iteration: 156
loss: 4.157379627227783,grad_norm: 0.9999998726313196, iteration: 157
loss: 4.219937324523926,grad_norm: 0.9999999161882738, iteration: 158
loss: 4.268049716949463,grad_norm: 0.999999855584873, iteration: 159
loss: 4.26502799987793,grad_norm: 0.9999999850024217, iteration: 160
loss: 4.243865013122559,grad_norm: 0.9999999759642189, iteration: 161
loss: 4.216798782348633,grad_norm: 0.9999998796045875, iteration: 162
loss: 4.1499457359313965,grad_norm: 0.9999998968215169, iteration: 163
loss: 4.285239219665527,grad_norm: 0.9999999813364874, iteration: 164
loss: 4.191648006439209,grad_norm: 0.9999998815200672, iteration: 165
loss: 4.186208248138428,grad_norm: 0.9999999328098906, iteration: 166
loss: 4.324325084686279,grad_norm: 0.9999998444593668, iteration: 167
loss: 4.191373825073242,grad_norm: 0.9999998645498143, iteration: 168
loss: 4.235793590545654,grad_norm: 0.9999998713804665, iteration: 169
loss: 4.234074592590332,grad_norm: 0.9999999202413903, iteration: 170
loss: 4.175952434539795,grad_norm: 0.9999998880766705, iteration: 171
loss: 4.1279778480529785,grad_norm: 0.9999998602161538, iteration: 172
loss: 4.1395583152771,grad_norm: 0.9999999147959668, iteration: 173
loss: 4.236241340637207,grad_norm: 0.9999998871693284, iteration: 174
loss: 4.3879780769348145,grad_norm: 0.9999999557279584, iteration: 175
loss: 4.184856414794922,grad_norm: 0.9999998643370391, iteration: 176
loss: 4.228030204772949,grad_norm: 0.9999998011514944, iteration: 177
loss: 4.218174934387207,grad_norm: 0.999999951911524, iteration: 178
loss: 4.1367878913879395,grad_norm: 0.9999999768291981, iteration: 179
loss: 4.123758792877197,grad_norm: 0.9999998781626911, iteration: 180
loss: 4.223617076873779,grad_norm: 0.9999999411072305, iteration: 181
loss: 4.304410934448242,grad_norm: 0.9999998981884283, iteration: 182
loss: 4.233401298522949,grad_norm: 0.9999998793516308, iteration: 183
loss: 4.2143378257751465,grad_norm: 0.9999999669743547, iteration: 184
loss: 4.264917373657227,grad_norm: 0.9999999085830599, iteration: 185
loss: 4.230868339538574,grad_norm: 0.9999999341746982, iteration: 186
loss: 4.193236827850342,grad_norm: 0.9999999463035452, iteration: 187
loss: 4.259521484375,grad_norm: 0.9999999204796648, iteration: 188
loss: 4.279033184051514,grad_norm: 0.9999999171087668, iteration: 189
loss: 4.281428813934326,grad_norm: 0.9999999461143548, iteration: 190
loss: 4.280811786651611,grad_norm: 0.9999999077865098, iteration: 191
loss: 4.202293395996094,grad_norm: 0.999999902659435, iteration: 192
loss: 4.161434173583984,grad_norm: 0.9999999757232645, iteration: 193
loss: 4.092471122741699,grad_norm: 0.99999982847492, iteration: 194
loss: 4.0781683921813965,grad_norm: 0.9999999586660763, iteration: 195
loss: 4.232519626617432,grad_norm: 0.9999998532413238, iteration: 196
loss: 4.100618362426758,grad_norm: 0.9999998069775853, iteration: 197
loss: 4.183506011962891,grad_norm: 0.999999927925507, iteration: 198
loss: 4.126577377319336,grad_norm: 0.9999999055028074, iteration: 199
loss: 4.164897918701172,grad_norm: 0.9999999099086797, iteration: 200
loss: 4.039880752563477,grad_norm: 0.999999917548714, iteration: 201
loss: 4.229883193969727,grad_norm: 0.9999999764180425, iteration: 202
loss: 4.185238361358643,grad_norm: 0.9999999038924373, iteration: 203
loss: 4.315871715545654,grad_norm: 0.9999998607131553, iteration: 204
loss: 4.23203182220459,grad_norm: 0.9999998329259808, iteration: 205
loss: 4.182300567626953,grad_norm: 0.9999999414545233, iteration: 206
loss: 4.256079196929932,grad_norm: 0.9999999190934256, iteration: 207
loss: 4.208299160003662,grad_norm: 0.9999998933780647, iteration: 208
loss: 4.227908611297607,grad_norm: 0.9999999568266414, iteration: 209
loss: 4.194168567657471,grad_norm: 0.9999999113455259, iteration: 210
loss: 4.107287883758545,grad_norm: 0.9999999341160765, iteration: 211
loss: 4.17399787902832,grad_norm: 0.9999999338578736, iteration: 212
loss: 4.237482070922852,grad_norm: 0.9999999176836759, iteration: 213
loss: 4.071930408477783,grad_norm: 0.9999999425114642, iteration: 214
loss: 4.112138271331787,grad_norm: 0.999999833732887, iteration: 215
loss: 4.18351411819458,grad_norm: 0.9999998057098043, iteration: 216
loss: 4.236755847930908,grad_norm: 0.999999958363743, iteration: 217
loss: 4.2339396476745605,grad_norm: 0.9999999472100619, iteration: 218
loss: 4.160976886749268,grad_norm: 0.9999998907301273, iteration: 219
loss: 4.168567657470703,grad_norm: 0.9999998198428913, iteration: 220
loss: 4.138073921203613,grad_norm: 0.9999999578097132, iteration: 221
loss: 4.224088668823242,grad_norm: 0.999999839746197, iteration: 222
loss: 4.219120502471924,grad_norm: 0.9999999548187473, iteration: 223
loss: 4.182824611663818,grad_norm: 0.9999998624645905, iteration: 224
loss: 4.1810760498046875,grad_norm: 0.9999998849447409, iteration: 225
loss: 3.9981234073638916,grad_norm: 0.9999999059090459, iteration: 226
loss: 4.227964878082275,grad_norm: 0.9999998048268522, iteration: 227
loss: 4.100610256195068,grad_norm: 0.9999999147061404, iteration: 228
loss: 4.1932806968688965,grad_norm: 0.9999998678786717, iteration: 229
loss: 4.316529750823975,grad_norm: 0.9999998584576133, iteration: 230
loss: 4.128730297088623,grad_norm: 0.9999998919928094, iteration: 231
loss: 4.199449062347412,grad_norm: 0.9999998559405558, iteration: 232
loss: 4.035175800323486,grad_norm: 0.9999998871057838, iteration: 233
loss: 4.146591663360596,grad_norm: 0.9999999319823076, iteration: 234
loss: 4.083034515380859,grad_norm: 0.9999997756414172, iteration: 235
loss: 4.098885536193848,grad_norm: 0.9999999236901328, iteration: 236
loss: 4.196094512939453,grad_norm: 0.9999998465039589, iteration: 237
loss: 4.207246780395508,grad_norm: 0.9999998177739363, iteration: 238
loss: 4.189093589782715,grad_norm: 0.9999998192541213, iteration: 239
loss: 4.2035393714904785,grad_norm: 0.9999997999706165, iteration: 240
loss: 4.228552341461182,grad_norm: 0.9999998381828766, iteration: 241
loss: 4.14823055267334,grad_norm: 0.9999999115965764, iteration: 242
loss: 4.0707011222839355,grad_norm: 0.9999999608956668, iteration: 243
loss: 4.102545738220215,grad_norm: 0.9999999312494621, iteration: 244
loss: 4.124112129211426,grad_norm: 0.9999998681015556, iteration: 245
loss: 4.095912933349609,grad_norm: 0.9999998650654368, iteration: 246
loss: 4.056370258331299,grad_norm: 0.9999998185908026, iteration: 247
loss: 4.147814750671387,grad_norm: 0.9999998982534479, iteration: 248
loss: 4.176321506500244,grad_norm: 0.9999999215988093, iteration: 249
loss: 4.021692752838135,grad_norm: 0.9999999151219461, iteration: 250
loss: 4.151568412780762,grad_norm: 0.9999998680449976, iteration: 251
loss: 4.1703290939331055,grad_norm: 0.9999998961217836, iteration: 252
loss: 4.0907368659973145,grad_norm: 0.9999999219763792, iteration: 253
loss: 4.100957870483398,grad_norm: 0.9999998915001357, iteration: 254
loss: 4.126827716827393,grad_norm: 0.9999998446253168, iteration: 255
loss: 4.182523250579834,grad_norm: 0.9999998931870312, iteration: 256
loss: 4.177639007568359,grad_norm: 0.9999998992017066, iteration: 257
loss: 4.193702220916748,grad_norm: 0.9999998346372808, iteration: 258
loss: 4.086606979370117,grad_norm: 0.9999998751973324, iteration: 259
loss: 4.051395416259766,grad_norm: 0.999999863652798, iteration: 260
loss: 4.119551181793213,grad_norm: 0.9999998750855511, iteration: 261
loss: 4.224619388580322,grad_norm: 0.9999999353211536, iteration: 262
loss: 4.163023948669434,grad_norm: 0.9999998356070174, iteration: 263
loss: 4.037951946258545,grad_norm: 0.9999998635054623, iteration: 264
loss: 4.160475254058838,grad_norm: 0.9999998466503506, iteration: 265
loss: 4.088841438293457,grad_norm: 0.9999999262791675, iteration: 266
loss: 4.077944278717041,grad_norm: 0.9999998019585248, iteration: 267
loss: 4.182228088378906,grad_norm: 0.9999999652069583, iteration: 268
loss: 4.102402210235596,grad_norm: 0.999999893824007, iteration: 269
loss: 4.125643730163574,grad_norm: 0.9999999232489539, iteration: 270
loss: 4.105215072631836,grad_norm: 0.9999998834419482, iteration: 271
loss: 4.096614360809326,grad_norm: 0.9999998607292484, iteration: 272
loss: 4.152337551116943,grad_norm: 0.9999998984323205, iteration: 273
loss: 4.01755952835083,grad_norm: 0.9999997951126653, iteration: 274
loss: 4.044510841369629,grad_norm: 0.9999998607967824, iteration: 275
loss: 4.099828720092773,grad_norm: 0.9999999197606256, iteration: 276
loss: 4.171793460845947,grad_norm: 0.9999998849719753, iteration: 277
loss: 4.16082239151001,grad_norm: 0.9999999223971551, iteration: 278
loss: 4.090988636016846,grad_norm: 0.9999999044892832, iteration: 279
loss: 4.050617218017578,grad_norm: 0.9999999440439467, iteration: 280
loss: 4.212301731109619,grad_norm: 0.9999999330467876, iteration: 281
loss: 4.211628437042236,grad_norm: 0.9999999861644161, iteration: 282
loss: 4.138372898101807,grad_norm: 0.9999998036943203, iteration: 283
loss: 4.154067039489746,grad_norm: 0.9999998613670953, iteration: 284
loss: 4.104514122009277,grad_norm: 0.9999999229845489, iteration: 285
loss: 4.1757659912109375,grad_norm: 0.9999998449934154, iteration: 286
loss: 4.029271125793457,grad_norm: 0.9999998106966694, iteration: 287
loss: 4.095881462097168,grad_norm: 0.9999998353586724, iteration: 288
loss: 4.169095039367676,grad_norm: 0.9999997981604062, iteration: 289
loss: 4.104060649871826,grad_norm: 0.9999998323444068, iteration: 290
loss: 4.09298849105835,grad_norm: 0.9999998791240495, iteration: 291
loss: 4.133347988128662,grad_norm: 0.9999998070977315, iteration: 292
loss: 4.114358425140381,grad_norm: 0.9999997877004282, iteration: 293
loss: 4.034520149230957,grad_norm: 0.9999998467286401, iteration: 294
loss: 4.132449626922607,grad_norm: 0.9999998895047046, iteration: 295
loss: 4.06608247756958,grad_norm: 0.999999860990977, iteration: 296
loss: 4.121326446533203,grad_norm: 0.9999998327540928, iteration: 297
loss: 4.058757305145264,grad_norm: 0.9999998723866185, iteration: 298
loss: 4.172337055206299,grad_norm: 0.9999998726353821, iteration: 299
loss: 4.046668529510498,grad_norm: 0.9999998944900188, iteration: 300
loss: 4.206432342529297,grad_norm: 0.9999999066598578, iteration: 301
loss: 4.057727336883545,grad_norm: 0.9999999051550821, iteration: 302
loss: 4.174527645111084,grad_norm: 0.9999998362537766, iteration: 303
loss: 4.038697719573975,grad_norm: 0.9999999220456077, iteration: 304
loss: 4.153744697570801,grad_norm: 0.9999999022111742, iteration: 305
loss: 4.079328536987305,grad_norm: 0.9999998521533826, iteration: 306
loss: 4.000757217407227,grad_norm: 0.9999998296813156, iteration: 307
loss: 4.071542263031006,grad_norm: 1.0000000026070526, iteration: 308
loss: 4.003232002258301,grad_norm: 0.9999998564393481, iteration: 309
loss: 4.125637531280518,grad_norm: 0.9999998554175298, iteration: 310
loss: 4.067626476287842,grad_norm: 0.9999997347170935, iteration: 311
loss: 4.157486915588379,grad_norm: 0.9999998300803528, iteration: 312
loss: 4.126284599304199,grad_norm: 0.9999998898507128, iteration: 313
loss: 4.045012950897217,grad_norm: 0.9999998655757474, iteration: 314
loss: 4.099694728851318,grad_norm: 0.999999889499628, iteration: 315
loss: 3.9774742126464844,grad_norm: 0.9999999248480097, iteration: 316
loss: 4.069728374481201,grad_norm: 0.9999999024530907, iteration: 317
loss: 4.119378566741943,grad_norm: 0.9999997826609399, iteration: 318
loss: 4.033240795135498,grad_norm: 0.9999999381567503, iteration: 319
loss: 4.058986663818359,grad_norm: 0.9999998678770441, iteration: 320
loss: 4.062457084655762,grad_norm: 0.9999998758867952, iteration: 321
loss: 4.053135395050049,grad_norm: 0.9999998544910748, iteration: 322
loss: 4.062041759490967,grad_norm: 0.9999998752864013, iteration: 323
loss: 4.139201641082764,grad_norm: 0.9999999554431066, iteration: 324
loss: 4.11878776550293,grad_norm: 0.9999997752163524, iteration: 325
loss: 4.092832565307617,grad_norm: 0.9999999015569583, iteration: 326
loss: 4.049365997314453,grad_norm: 0.9999998388186843, iteration: 327
loss: 4.096372604370117,grad_norm: 0.9999999736414704, iteration: 328
loss: 4.065576076507568,grad_norm: 0.9999999340136337, iteration: 329
loss: 4.0273661613464355,grad_norm: 0.999999858462838, iteration: 330
loss: 4.165306568145752,grad_norm: 0.9999998519709985, iteration: 331
loss: 4.003528594970703,grad_norm: 0.9999998553054238, iteration: 332
loss: 4.106637001037598,grad_norm: 0.999999861995738, iteration: 333
loss: 4.11482048034668,grad_norm: 0.9999997740392684, iteration: 334
loss: 3.9212398529052734,grad_norm: 0.9999998791075252, iteration: 335
loss: 4.056400775909424,grad_norm: 0.9999998653946401, iteration: 336
loss: 4.104612350463867,grad_norm: 0.9999999093824318, iteration: 337
loss: 4.093879699707031,grad_norm: 0.9999999233450548, iteration: 338
loss: 4.041128635406494,grad_norm: 0.9999998374185565, iteration: 339
loss: 4.1215715408325195,grad_norm: 0.9999998919565555, iteration: 340
loss: 4.157227993011475,grad_norm: 0.9999998227830954, iteration: 341
loss: 4.039646148681641,grad_norm: 0.9999998710239825, iteration: 342
loss: 4.1543731689453125,grad_norm: 0.9999998558657145, iteration: 343
loss: 4.098403453826904,grad_norm: 0.9999998480856497, iteration: 344
loss: 3.9643137454986572,grad_norm: 0.9999998700757233, iteration: 345
loss: 4.146407127380371,grad_norm: 0.9999998218453809, iteration: 346
loss: 4.1924943923950195,grad_norm: 0.9999998348538074, iteration: 347
loss: 4.092498779296875,grad_norm: 0.999999879465136, iteration: 348
loss: 4.084981441497803,grad_norm: 0.9999998442852386, iteration: 349
loss: 4.102357387542725,grad_norm: 0.9999999088672493, iteration: 350
loss: 4.088551998138428,grad_norm: 0.9999998942646079, iteration: 351
loss: 4.066981315612793,grad_norm: 0.999999798492656, iteration: 352
loss: 4.096282482147217,grad_norm: 0.9999998423134221, iteration: 353
loss: 4.159978866577148,grad_norm: 0.9999998218595594, iteration: 354
loss: 3.956646203994751,grad_norm: 0.9999998694734916, iteration: 355
loss: 3.990551471710205,grad_norm: 0.999999869983955, iteration: 356
loss: 3.998605966567993,grad_norm: 0.9999998115636982, iteration: 357
loss: 4.095233917236328,grad_norm: 0.9999998637778937, iteration: 358
loss: 4.057596683502197,grad_norm: 0.9999998827746663, iteration: 359
loss: 3.9793620109558105,grad_norm: 0.9999998367884875, iteration: 360
loss: 4.149987697601318,grad_norm: 0.9999998943208472, iteration: 361
loss: 4.08612060546875,grad_norm: 0.9999999066004494, iteration: 362
loss: 4.050779819488525,grad_norm: 0.9999998768888769, iteration: 363
loss: 4.113611698150635,grad_norm: 0.9999998550187907, iteration: 364
loss: 4.104781150817871,grad_norm: 0.9999997660965004, iteration: 365
loss: 4.118383884429932,grad_norm: 0.9999999717735227, iteration: 366
loss: 4.0387983322143555,grad_norm: 0.9999998411655551, iteration: 367
loss: 4.0994439125061035,grad_norm: 0.9999998274668752, iteration: 368
loss: 4.052586078643799,grad_norm: 0.9999998621278916, iteration: 369
loss: 4.167198181152344,grad_norm: 0.9999998218001805, iteration: 370
loss: 4.072840213775635,grad_norm: 0.9999998474681525, iteration: 371
loss: 4.027506351470947,grad_norm: 0.9999998665252391, iteration: 372
loss: 4.170835971832275,grad_norm: 0.9999998624870808, iteration: 373
loss: 4.0167341232299805,grad_norm: 0.9999998918546336, iteration: 374
loss: 4.057554721832275,grad_norm: 0.999999849978724, iteration: 375
loss: 4.126834869384766,grad_norm: 0.9999997843316502, iteration: 376
loss: 4.0434393882751465,grad_norm: 0.9999998292063815, iteration: 377
loss: 4.078723430633545,grad_norm: 0.999999795453366, iteration: 378
loss: 4.076068878173828,grad_norm: 0.9999999340796811, iteration: 379
loss: 3.952045202255249,grad_norm: 0.9999998902104401, iteration: 380
loss: 4.1565470695495605,grad_norm: 0.9999997451615167, iteration: 381
loss: 4.1467156410217285,grad_norm: 0.9999999110572058, iteration: 382
loss: 4.071720123291016,grad_norm: 0.9999997825950813, iteration: 383
loss: 4.099658012390137,grad_norm: 0.9999999207917032, iteration: 384
loss: 4.0461602210998535,grad_norm: 0.9999998320000221, iteration: 385
loss: 4.149837017059326,grad_norm: 0.9999998249503744, iteration: 386
loss: 3.9865007400512695,grad_norm: 0.9999998462500582, iteration: 387
loss: 4.108195781707764,grad_norm: 0.9999998308475909, iteration: 388
loss: 4.06337308883667,grad_norm: 0.9999998584865026, iteration: 389
loss: 4.032081604003906,grad_norm: 0.9999997815241235, iteration: 390
loss: 3.989351987838745,grad_norm: 0.999999931010698, iteration: 391
loss: 4.074162483215332,grad_norm: 0.9999998030957632, iteration: 392
loss: 4.0838942527771,grad_norm: 0.9999999292943943, iteration: 393
loss: 4.027711868286133,grad_norm: 0.9999997663284433, iteration: 394
loss: 3.9911088943481445,grad_norm: 0.9999998888875851, iteration: 395
loss: 4.065273761749268,grad_norm: 0.9999998720835517, iteration: 396
loss: 4.184048652648926,grad_norm: 0.9999997621529968, iteration: 397
loss: 4.128328800201416,grad_norm: 0.9999998679396398, iteration: 398
loss: 4.139715194702148,grad_norm: 0.9999998112215908, iteration: 399
loss: 4.04036283493042,grad_norm: 0.9999998251800257, iteration: 400
loss: 4.126485347747803,grad_norm: 0.9999998686275965, iteration: 401
loss: 4.110502243041992,grad_norm: 0.9999998147610273, iteration: 402
loss: 4.043516635894775,grad_norm: 0.9999998447722175, iteration: 403
loss: 4.074534893035889,grad_norm: 0.9999998527591675, iteration: 404
loss: 4.124213218688965,grad_norm: 0.999999858979139, iteration: 405
loss: 3.9941468238830566,grad_norm: 0.999999893454221, iteration: 406
loss: 4.017199516296387,grad_norm: 0.9999998047751693, iteration: 407
loss: 3.9827332496643066,grad_norm: 0.999999878986147, iteration: 408
loss: 4.089584827423096,grad_norm: 0.9999998977811545, iteration: 409
loss: 3.9794373512268066,grad_norm: 0.9999998003980832, iteration: 410
loss: 4.105533599853516,grad_norm: 0.9999998399644502, iteration: 411
loss: 4.073877334594727,grad_norm: 0.9999999010394558, iteration: 412
loss: 4.102399826049805,grad_norm: 0.9999999000412686, iteration: 413
loss: 4.1523566246032715,grad_norm: 0.9999998938139727, iteration: 414
loss: 4.0429558753967285,grad_norm: 0.9999998769123007, iteration: 415
loss: 4.000434875488281,grad_norm: 0.9999998246364147, iteration: 416
loss: 4.0277791023254395,grad_norm: 0.999999849665037, iteration: 417
loss: 4.122138977050781,grad_norm: 0.9999998414147763, iteration: 418
loss: 4.120336532592773,grad_norm: 0.9999998240790444, iteration: 419
loss: 4.047152996063232,grad_norm: 0.9999998477781801, iteration: 420
loss: 4.064761638641357,grad_norm: 0.9999999215913505, iteration: 421
loss: 4.0535993576049805,grad_norm: 0.9999998276809794, iteration: 422
loss: 4.036642551422119,grad_norm: 0.9999998538891581, iteration: 423
loss: 4.087743759155273,grad_norm: 0.9999998942131206, iteration: 424
loss: 4.0124006271362305,grad_norm: 0.9999998506283494, iteration: 425
loss: 4.099591255187988,grad_norm: 0.9999998650795896, iteration: 426
loss: 4.108142852783203,grad_norm: 0.9999999120998704, iteration: 427
loss: 4.057147026062012,grad_norm: 0.999999873695276, iteration: 428
loss: 4.00548791885376,grad_norm: 0.9999998733806109, iteration: 429
loss: 4.066258907318115,grad_norm: 0.9999998571070169, iteration: 430
loss: 3.993715286254883,grad_norm: 0.9999998947800746, iteration: 431
loss: 4.009376049041748,grad_norm: 0.999999852267068, iteration: 432
loss: 4.0331315994262695,grad_norm: 0.9999998724510841, iteration: 433
loss: 4.0845627784729,grad_norm: 0.99999987357103, iteration: 434
loss: 4.007453918457031,grad_norm: 0.9999997929023755, iteration: 435
loss: 4.006285190582275,grad_norm: 0.9999998905682751, iteration: 436
loss: 4.095475196838379,grad_norm: 0.9999998416844172, iteration: 437
loss: 4.03532600402832,grad_norm: 0.9999998357806671, iteration: 438
loss: 3.967690944671631,grad_norm: 0.9999998248826759, iteration: 439
loss: 4.061600208282471,grad_norm: 0.9999997672116463, iteration: 440
loss: 4.042688846588135,grad_norm: 0.9999999054043677, iteration: 441
loss: 4.013835906982422,grad_norm: 0.9999998406024095, iteration: 442
loss: 4.048696994781494,grad_norm: 0.9999998406329581, iteration: 443
loss: 4.050442218780518,grad_norm: 0.9999998633588674, iteration: 444
loss: 4.04817008972168,grad_norm: 0.9999998239604606, iteration: 445
loss: 4.001394748687744,grad_norm: 0.9999998304566436, iteration: 446
loss: 4.138092517852783,grad_norm: 0.9999998672723729, iteration: 447
loss: 4.03264856338501,grad_norm: 0.9999998502728519, iteration: 448
loss: 4.107926845550537,grad_norm: 0.9999999392459882, iteration: 449
loss: 4.174546241760254,grad_norm: 0.9999999572948627, iteration: 450
loss: 4.0388922691345215,grad_norm: 0.999999817092717, iteration: 451
loss: 4.073742389678955,grad_norm: 0.9999997953803811, iteration: 452
loss: 4.049803256988525,grad_norm: 0.9999997732733885, iteration: 453
loss: 4.100327968597412,grad_norm: 0.9999999275940048, iteration: 454
loss: 4.104332447052002,grad_norm: 0.9999998864575547, iteration: 455
loss: 4.086188793182373,grad_norm: 0.9999998736196161, iteration: 456
loss: 4.030476093292236,grad_norm: 0.9999998643939942, iteration: 457
loss: 4.005636692047119,grad_norm: 0.9999997733502198, iteration: 458
loss: 4.0694780349731445,grad_norm: 0.9999998472728433, iteration: 459
loss: 4.0815629959106445,grad_norm: 0.9999998296364336, iteration: 460
loss: 4.051650047302246,grad_norm: 0.9999998553063353, iteration: 461
loss: 4.0528740882873535,grad_norm: 0.9999998975303168, iteration: 462
loss: 4.023078441619873,grad_norm: 0.9999999049939798, iteration: 463
loss: 3.9871644973754883,grad_norm: 0.9999998938993743, iteration: 464
loss: 4.075711250305176,grad_norm: 0.9999998424100625, iteration: 465
loss: 4.103883743286133,grad_norm: 0.9999998809499847, iteration: 466
loss: 4.024865627288818,grad_norm: 0.9999998539831234, iteration: 467
loss: 4.054507732391357,grad_norm: 0.9999998594226926, iteration: 468
loss: 4.016203880310059,grad_norm: 0.9999997849238781, iteration: 469
loss: 4.037349700927734,grad_norm: 0.9999998083739855, iteration: 470
loss: 4.028474807739258,grad_norm: 0.9999998789033643, iteration: 471
loss: 4.146947860717773,grad_norm: 0.9999998747676142, iteration: 472
loss: 4.118032932281494,grad_norm: 0.9999998329826078, iteration: 473
loss: 3.9575843811035156,grad_norm: 0.9999998064770609, iteration: 474
loss: 4.079874038696289,grad_norm: 0.9999999342947846, iteration: 475
loss: 4.077339172363281,grad_norm: 0.9999998531777601, iteration: 476
loss: 3.998504400253296,grad_norm: 0.9999998141622286, iteration: 477
loss: 4.048956871032715,grad_norm: 0.999999835372058, iteration: 478
loss: 4.083226680755615,grad_norm: 0.9999998588099519, iteration: 479
loss: 4.088563919067383,grad_norm: 0.9999998295468382, iteration: 480
loss: 4.116326332092285,grad_norm: 0.9999998393473949, iteration: 481
loss: 4.062150001525879,grad_norm: 0.9999998812227269, iteration: 482
loss: 4.063345432281494,grad_norm: 0.9999998483376817, iteration: 483
loss: 3.940553903579712,grad_norm: 0.9999998593336681, iteration: 484
loss: 4.104716777801514,grad_norm: 0.9999997853467634, iteration: 485
loss: 4.0147385597229,grad_norm: 0.9999999182192275, iteration: 486
loss: 4.010225772857666,grad_norm: 0.9999997691196236, iteration: 487
loss: 4.038066387176514,grad_norm: 0.9999999025078405, iteration: 488
loss: 3.9794986248016357,grad_norm: 0.9999998302085998, iteration: 489
loss: 4.153781414031982,grad_norm: 0.9999998068539403, iteration: 490
loss: 4.120412826538086,grad_norm: 0.9999998513041269, iteration: 491
loss: 3.983980894088745,grad_norm: 0.9999997974965653, iteration: 492
loss: 4.056479454040527,grad_norm: 0.999999837700461, iteration: 493
loss: 4.035233497619629,grad_norm: 0.999999856069208, iteration: 494
loss: 4.099321365356445,grad_norm: 0.9999998569610573, iteration: 495
loss: 4.094451427459717,grad_norm: 0.9999997552795457, iteration: 496
loss: 4.021692276000977,grad_norm: 0.9999998192800934, iteration: 497
loss: 4.002061367034912,grad_norm: 0.9999998488592345, iteration: 498
loss: 4.166926383972168,grad_norm: 0.999999800608881, iteration: 499
loss: 3.986142635345459,grad_norm: 0.9999997454964814, iteration: 500
loss: 4.097427845001221,grad_norm: 0.9999998998327386, iteration: 501
loss: 4.16951322555542,grad_norm: 0.9999998142342442, iteration: 502
loss: 4.006394863128662,grad_norm: 0.9999997976675651, iteration: 503
loss: 4.0688395500183105,grad_norm: 0.9999997994319239, iteration: 504
loss: 4.163154602050781,grad_norm: 0.9999999411219418, iteration: 505
loss: 3.96697998046875,grad_norm: 0.9999998810299893, iteration: 506
loss: 4.074028968811035,grad_norm: 0.9999997531029666, iteration: 507
loss: 3.921907424926758,grad_norm: 0.9999998935640011, iteration: 508
loss: 4.0416483879089355,grad_norm: 0.9999998470943869, iteration: 509
loss: 4.0796709060668945,grad_norm: 0.9999998197683909, iteration: 510
loss: 4.0620832443237305,grad_norm: 0.9999998545264094, iteration: 511
loss: 4.045173645019531,grad_norm: 0.9999998839923263, iteration: 512
loss: 3.95253324508667,grad_norm: 0.9999998083957629, iteration: 513
loss: 4.067139148712158,grad_norm: 0.9999998761698934, iteration: 514
loss: 4.009936332702637,grad_norm: 0.999999809226977, iteration: 515
loss: 4.019825458526611,grad_norm: 0.999999690451858, iteration: 516
loss: 4.064694881439209,grad_norm: 0.9999998054461152, iteration: 517
loss: 4.013367176055908,grad_norm: 0.9999998288680225, iteration: 518
loss: 4.114780426025391,grad_norm: 0.9999998007116094, iteration: 519
loss: 4.001238822937012,grad_norm: 0.9999998271104951, iteration: 520
loss: 3.9962968826293945,grad_norm: 0.999999790369026, iteration: 521
loss: 4.023524761199951,grad_norm: 0.9999997675164238, iteration: 522
loss: 4.046851634979248,grad_norm: 0.9999998569300373, iteration: 523
loss: 4.045410633087158,grad_norm: 0.9999998435448281, iteration: 524
loss: 3.9968948364257812,grad_norm: 0.9999998196498865, iteration: 525
loss: 4.111011981964111,grad_norm: 0.9999998571686385, iteration: 526
loss: 4.007638454437256,grad_norm: 0.999999863225627, iteration: 527
loss: 4.021123886108398,grad_norm: 0.9999998291604457, iteration: 528
loss: 4.005414009094238,grad_norm: 0.9999998664539025, iteration: 529
loss: 3.9500656127929688,grad_norm: 0.9999997750890948, iteration: 530
loss: 4.0446953773498535,grad_norm: 0.9999997666408592, iteration: 531
loss: 4.054675579071045,grad_norm: 0.9999998118142106, iteration: 532
loss: 4.038891792297363,grad_norm: 0.9999998117669376, iteration: 533
loss: 4.007192611694336,grad_norm: 0.9999998010880249, iteration: 534
loss: 3.977304220199585,grad_norm: 0.9999997686291332, iteration: 535
loss: 4.022284984588623,grad_norm: 0.9999997420896913, iteration: 536
loss: 4.127900123596191,grad_norm: 0.9999998237138606, iteration: 537
loss: 3.998162031173706,grad_norm: 0.9999998510549635, iteration: 538
loss: 4.022427558898926,grad_norm: 0.9999998131605322, iteration: 539
loss: 4.064551830291748,grad_norm: 0.9999997964977091, iteration: 540
loss: 3.933025598526001,grad_norm: 0.9999997896727816, iteration: 541
loss: 3.946052312850952,grad_norm: 0.9999998454600408, iteration: 542
loss: 3.8672213554382324,grad_norm: 0.9999998487502324, iteration: 543
loss: 4.000633239746094,grad_norm: 0.9999998211968006, iteration: 544
loss: 4.077186107635498,grad_norm: 0.9999998347559175, iteration: 545
loss: 4.025413513183594,grad_norm: 0.999999765825028, iteration: 546
loss: 3.974142551422119,grad_norm: 0.9999998232865894, iteration: 547
loss: 3.968832015991211,grad_norm: 0.9999997643714383, iteration: 548
loss: 4.097672939300537,grad_norm: 0.999999823783506, iteration: 549
loss: 4.047155380249023,grad_norm: 0.9999998606324998, iteration: 550
loss: 3.9968316555023193,grad_norm: 0.9999998278905825, iteration: 551
loss: 3.9720940589904785,grad_norm: 0.9999997949531093, iteration: 552
loss: 3.999742031097412,grad_norm: 0.9999997852002744, iteration: 553
loss: 4.066732883453369,grad_norm: 0.9999997863378093, iteration: 554
loss: 3.9560110569000244,grad_norm: 0.9999998255277973, iteration: 555
loss: 4.057743072509766,grad_norm: 0.9999997175333155, iteration: 556
loss: 3.952633857727051,grad_norm: 0.9999998044207771, iteration: 557
loss: 4.024394512176514,grad_norm: 0.9999997861540755, iteration: 558
loss: 4.070191383361816,grad_norm: 0.9999997874885701, iteration: 559
loss: 3.9579131603240967,grad_norm: 0.999999911090528, iteration: 560
loss: 4.084668159484863,grad_norm: 0.9999998401516319, iteration: 561
loss: 4.099174976348877,grad_norm: 0.9999998782222856, iteration: 562
loss: 4.047069072723389,grad_norm: 0.9999998434181239, iteration: 563
loss: 4.080033302307129,grad_norm: 0.9999999251643213, iteration: 564
loss: 4.065811634063721,grad_norm: 0.9999997505914665, iteration: 565
loss: 4.036846160888672,grad_norm: 0.9999998246351148, iteration: 566
loss: 3.9891724586486816,grad_norm: 0.9999998297155649, iteration: 567
loss: 3.995392322540283,grad_norm: 0.9999998041514285, iteration: 568
loss: 4.028823375701904,grad_norm: 0.9999997682789392, iteration: 569
loss: 4.0021071434021,grad_norm: 0.9999997806144221, iteration: 570
loss: 3.983574151992798,grad_norm: 0.9999998686295472, iteration: 571
loss: 3.976457357406616,grad_norm: 0.9999998047521084, iteration: 572
loss: 4.028777599334717,grad_norm: 0.9999997076340638, iteration: 573
loss: 4.044354438781738,grad_norm: 0.9999998424926873, iteration: 574
loss: 3.975343942642212,grad_norm: 0.9999998550544797, iteration: 575
loss: 4.063818454742432,grad_norm: 0.9999998868826997, iteration: 576
loss: 4.060927391052246,grad_norm: 0.9999998170342024, iteration: 577
loss: 4.004611492156982,grad_norm: 0.9999998314565743, iteration: 578
loss: 3.9657695293426514,grad_norm: 0.9999997928195427, iteration: 579
loss: 3.9332807064056396,grad_norm: 0.9999997257030117, iteration: 580
loss: 4.137625217437744,grad_norm: 0.9999998266691797, iteration: 581
loss: 4.018651962280273,grad_norm: 0.9999997906331115, iteration: 582
loss: 4.042771816253662,grad_norm: 0.9999997194411674, iteration: 583
loss: 4.025334358215332,grad_norm: 0.9999998756506718, iteration: 584
loss: 4.048043727874756,grad_norm: 0.9999997202075367, iteration: 585
loss: 3.9952354431152344,grad_norm: 0.9999998112408442, iteration: 586
loss: 4.051321029663086,grad_norm: 0.9999997804987008, iteration: 587
loss: 4.034544944763184,grad_norm: 0.9999997507750833, iteration: 588
loss: 4.022969722747803,grad_norm: 0.9999998161380877, iteration: 589
loss: 3.9072093963623047,grad_norm: 0.9999998563190423, iteration: 590
loss: 3.9936182498931885,grad_norm: 0.9999997811652334, iteration: 591
loss: 4.05303955078125,grad_norm: 0.999999854862945, iteration: 592
loss: 3.9654760360717773,grad_norm: 0.9999998137219298, iteration: 593
loss: 3.9535179138183594,grad_norm: 0.9999998481356405, iteration: 594
loss: 3.9165685176849365,grad_norm: 0.999999872000316, iteration: 595
loss: 4.01150369644165,grad_norm: 0.9999997816009946, iteration: 596
loss: 4.026369094848633,grad_norm: 0.9999998572664125, iteration: 597
loss: 3.997642755508423,grad_norm: 0.9999998551490195, iteration: 598
loss: 3.928532600402832,grad_norm: 0.9999998815719889, iteration: 599
loss: 3.9636871814727783,grad_norm: 0.9999998016283371, iteration: 600
loss: 4.003464698791504,grad_norm: 0.9999998305627157, iteration: 601
loss: 3.9741806983947754,grad_norm: 0.9999998179265279, iteration: 602
loss: 4.045593738555908,grad_norm: 0.999999888633206, iteration: 603
loss: 4.045167922973633,grad_norm: 0.9999998270430575, iteration: 604
loss: 4.06854772567749,grad_norm: 0.9999997937045562, iteration: 605
loss: 4.092289447784424,grad_norm: 0.9999998116544497, iteration: 606
loss: 3.969454526901245,grad_norm: 0.9999997925581111, iteration: 607
loss: 4.027684211730957,grad_norm: 0.9999998213070441, iteration: 608
loss: 4.043047904968262,grad_norm: 0.999999740522314, iteration: 609
loss: 4.090700149536133,grad_norm: 0.9999997831524959, iteration: 610
loss: 4.088342189788818,grad_norm: 0.9999998184413472, iteration: 611
loss: 4.026026725769043,grad_norm: 0.9999999003118548, iteration: 612
loss: 3.958754301071167,grad_norm: 0.9999998847235128, iteration: 613
loss: 3.998814344406128,grad_norm: 0.99999981361241, iteration: 614
loss: 4.03284215927124,grad_norm: 0.9999997666002649, iteration: 615
loss: 4.009390830993652,grad_norm: 0.9999998300004893, iteration: 616
loss: 4.012299537658691,grad_norm: 0.9999997289507435, iteration: 617
loss: 3.941131830215454,grad_norm: 0.9999998391495983, iteration: 618
loss: 3.972644090652466,grad_norm: 0.9999997546741032, iteration: 619
loss: 4.034101963043213,grad_norm: 0.9999998671496015, iteration: 620
loss: 4.116690158843994,grad_norm: 0.9999997734154064, iteration: 621
loss: 4.050298690795898,grad_norm: 0.999999804415061, iteration: 622
loss: 3.9281609058380127,grad_norm: 0.999999743201713, iteration: 623
loss: 4.043433666229248,grad_norm: 0.9999997993074867, iteration: 624
loss: 4.059896945953369,grad_norm: 0.9999997655197919, iteration: 625
loss: 4.060878276824951,grad_norm: 0.9999998169058929, iteration: 626
loss: 4.003425121307373,grad_norm: 0.9999997827741385, iteration: 627
loss: 3.9477784633636475,grad_norm: 0.9999998611552087, iteration: 628
loss: 3.981313943862915,grad_norm: 0.999999784822889, iteration: 629
loss: 3.9611518383026123,grad_norm: 0.9999997875229462, iteration: 630
loss: 4.034292697906494,grad_norm: 0.9999998713614193, iteration: 631
loss: 4.004614353179932,grad_norm: 0.9999999208118748, iteration: 632
loss: 3.940737009048462,grad_norm: 0.9999998630249827, iteration: 633
loss: 4.0049309730529785,grad_norm: 0.9999997891171354, iteration: 634
loss: 3.937559127807617,grad_norm: 0.9999997867404756, iteration: 635
loss: 3.983746290206909,grad_norm: 0.9999998906306939, iteration: 636
loss: 3.999699831008911,grad_norm: 0.9999997702671406, iteration: 637
loss: 3.9874138832092285,grad_norm: 0.9999997560180798, iteration: 638
loss: 3.9854736328125,grad_norm: 0.9999997837792706, iteration: 639
loss: 4.012145042419434,grad_norm: 0.9999997839430712, iteration: 640
loss: 4.0382537841796875,grad_norm: 0.9999998722222261, iteration: 641
loss: 4.037718296051025,grad_norm: 0.999999791802826, iteration: 642
loss: 4.012933731079102,grad_norm: 0.9999998211198271, iteration: 643
loss: 4.020459175109863,grad_norm: 0.9999998166307467, iteration: 644
loss: 3.929806709289551,grad_norm: 0.9999997772227448, iteration: 645
loss: 3.962050437927246,grad_norm: 0.9999998125229818, iteration: 646
loss: 4.02176570892334,grad_norm: 0.9999998051277319, iteration: 647
loss: 4.058849334716797,grad_norm: 0.9999998148680831, iteration: 648
loss: 4.040678024291992,grad_norm: 0.9999998798895222, iteration: 649
loss: 3.9493026733398438,grad_norm: 0.9999998497764592, iteration: 650
loss: 4.036066055297852,grad_norm: 0.9999998695656168, iteration: 651
loss: 3.9086971282958984,grad_norm: 0.9999998672201111, iteration: 652
loss: 4.123383045196533,grad_norm: 0.9999998531642527, iteration: 653
loss: 4.050005912780762,grad_norm: 0.9999998418696723, iteration: 654
loss: 3.9410295486450195,grad_norm: 0.9999997769790641, iteration: 655
loss: 4.03480339050293,grad_norm: 0.999999910257424, iteration: 656
loss: 3.996107578277588,grad_norm: 0.9999997857725629, iteration: 657
loss: 4.076223850250244,grad_norm: 0.9999998395968039, iteration: 658
loss: 4.043685436248779,grad_norm: 0.9999997960820675, iteration: 659
loss: 3.9598262310028076,grad_norm: 0.9999997980633963, iteration: 660
loss: 3.985905647277832,grad_norm: 0.9999997669525628, iteration: 661
loss: 4.0434064865112305,grad_norm: 0.9999997773908791, iteration: 662
loss: 3.9565625190734863,grad_norm: 0.9999997896414516, iteration: 663
loss: 4.104630947113037,grad_norm: 0.9999998009942942, iteration: 664
loss: 4.027862071990967,grad_norm: 0.9999998274426178, iteration: 665
loss: 3.987900495529175,grad_norm: 0.9999998066418019, iteration: 666
loss: 4.002262115478516,grad_norm: 0.9999999038855342, iteration: 667
loss: 3.9353363513946533,grad_norm: 0.9999997631487022, iteration: 668
loss: 4.005856037139893,grad_norm: 0.999999821074655, iteration: 669
loss: 4.023813247680664,grad_norm: 0.9999998169490543, iteration: 670
loss: 3.9943044185638428,grad_norm: 0.99999983224374, iteration: 671
loss: 3.9572887420654297,grad_norm: 0.9999998425965886, iteration: 672
loss: 3.983278512954712,grad_norm: 0.9999998715739745, iteration: 673
loss: 3.9777321815490723,grad_norm: 0.9999998841803244, iteration: 674
loss: 3.9975545406341553,grad_norm: 0.9999998085662142, iteration: 675
loss: 4.048966884613037,grad_norm: 0.9999998757970553, iteration: 676
loss: 3.9771790504455566,grad_norm: 0.9999997805217185, iteration: 677
loss: 3.9504692554473877,grad_norm: 0.9999997869996392, iteration: 678
loss: 4.045271396636963,grad_norm: 0.9999998185720018, iteration: 679
loss: 3.9787065982818604,grad_norm: 0.9999997672164415, iteration: 680
loss: 3.9579615592956543,grad_norm: 0.9999998598590656, iteration: 681
loss: 3.996367931365967,grad_norm: 0.9999997807545083, iteration: 682
loss: 3.9206955432891846,grad_norm: 0.9999998041505062, iteration: 683
loss: 4.006750583648682,grad_norm: 0.9999998503706112, iteration: 684
loss: 4.027370929718018,grad_norm: 0.9999998425440367, iteration: 685
loss: 3.9046359062194824,grad_norm: 0.999999891959116, iteration: 686
loss: 4.082577705383301,grad_norm: 0.9999998413479216, iteration: 687
loss: 3.936150550842285,grad_norm: 0.9999997872475511, iteration: 688
loss: 3.9496121406555176,grad_norm: 0.9999998370941, iteration: 689
loss: 3.997285842895508,grad_norm: 0.999999830155653, iteration: 690
loss: 3.926396608352661,grad_norm: 0.9999997779731347, iteration: 691
loss: 3.9243557453155518,grad_norm: 0.9999998608715299, iteration: 692
loss: 3.975142240524292,grad_norm: 0.9999999139635183, iteration: 693
loss: 4.034382343292236,grad_norm: 0.9999998163982937, iteration: 694
loss: 4.000174522399902,grad_norm: 0.9999998384664791, iteration: 695
loss: 4.0525431632995605,grad_norm: 0.9999999233026928, iteration: 696
loss: 3.932140350341797,grad_norm: 0.999999839297673, iteration: 697
loss: 3.9439384937286377,grad_norm: 0.9999998409129641, iteration: 698
loss: 3.9750332832336426,grad_norm: 0.9999998839359866, iteration: 699
loss: 3.979067087173462,grad_norm: 0.9999997834573142, iteration: 700
loss: 3.9621121883392334,grad_norm: 0.999999873633476, iteration: 701
loss: 3.9697327613830566,grad_norm: 0.999999887910213, iteration: 702
loss: 3.935883045196533,grad_norm: 0.9999999059527449, iteration: 703
loss: 3.8749494552612305,grad_norm: 0.9999997708147655, iteration: 704
loss: 3.940887212753296,grad_norm: 0.9999997230024867, iteration: 705
loss: 3.94805908203125,grad_norm: 0.9999997675467274, iteration: 706
loss: 3.9476091861724854,grad_norm: 0.9999998722917287, iteration: 707
loss: 3.990363359451294,grad_norm: 0.9999998466470714, iteration: 708
loss: 3.9421043395996094,grad_norm: 0.9999997721697738, iteration: 709
loss: 3.982440948486328,grad_norm: 0.9999996934586104, iteration: 710
loss: 3.987419843673706,grad_norm: 0.9999998658291672, iteration: 711
loss: 3.8562049865722656,grad_norm: 0.9999998465040778, iteration: 712
loss: 3.9555282592773438,grad_norm: 0.9999998362625122, iteration: 713
loss: 3.970841884613037,grad_norm: 0.9999997760254974, iteration: 714
loss: 4.005908012390137,grad_norm: 0.9999997678540709, iteration: 715
loss: 3.9686317443847656,grad_norm: 0.9999998750116841, iteration: 716
loss: 4.019856929779053,grad_norm: 0.9999999249203204, iteration: 717
loss: 3.9227983951568604,grad_norm: 0.9999998001162252, iteration: 718
loss: 3.945686101913452,grad_norm: 0.9999998006662582, iteration: 719
loss: 3.9876320362091064,grad_norm: 0.9999997852197642, iteration: 720
loss: 3.92388653755188,grad_norm: 0.9999997959296298, iteration: 721
loss: 4.0015692710876465,grad_norm: 0.9999997846387166, iteration: 722
loss: 4.0024213790893555,grad_norm: 0.9999998538704212, iteration: 723
loss: 3.9907407760620117,grad_norm: 0.9999997946286712, iteration: 724
loss: 3.920675754547119,grad_norm: 0.9999998458029439, iteration: 725
loss: 3.9050536155700684,grad_norm: 0.9999998526746908, iteration: 726
loss: 4.020822525024414,grad_norm: 0.9999998376668231, iteration: 727
loss: 3.9862818717956543,grad_norm: 0.9999998110406787, iteration: 728
loss: 4.056185245513916,grad_norm: 0.9999998265761041, iteration: 729
loss: 4.016664505004883,grad_norm: 0.9999998501728564, iteration: 730
loss: 4.018091201782227,grad_norm: 0.9999998056898061, iteration: 731
loss: 3.8684637546539307,grad_norm: 0.9999998568157711, iteration: 732
loss: 3.8834502696990967,grad_norm: 0.9999997685518222, iteration: 733
loss: 3.905426025390625,grad_norm: 0.999999758314183, iteration: 734
loss: 3.9972994327545166,grad_norm: 0.9999998234995374, iteration: 735
loss: 3.95265793800354,grad_norm: 0.9999997405177042, iteration: 736
loss: 4.085378646850586,grad_norm: 0.9999998100821489, iteration: 737
loss: 4.016864776611328,grad_norm: 0.9999997588095516, iteration: 738
loss: 4.048177719116211,grad_norm: 0.9999997599427712, iteration: 739
loss: 3.8916921615600586,grad_norm: 0.9999997639614226, iteration: 740
loss: 4.009764194488525,grad_norm: 0.9999998894291542, iteration: 741
loss: 3.8882977962493896,grad_norm: 0.9999998059401596, iteration: 742
loss: 3.9868648052215576,grad_norm: 0.9999998087244168, iteration: 743
loss: 3.930983304977417,grad_norm: 0.999999824827197, iteration: 744
loss: 3.907151937484741,grad_norm: 0.9999997398292462, iteration: 745
loss: 3.9879422187805176,grad_norm: 0.9999997351416138, iteration: 746
loss: 3.9701602458953857,grad_norm: 0.9999997390575492, iteration: 747
loss: 4.108150005340576,grad_norm: 0.999999772871078, iteration: 748
loss: 3.918898344039917,grad_norm: 0.9999997983524349, iteration: 749
loss: 4.004781723022461,grad_norm: 0.9999998260714101, iteration: 750
loss: 4.04524040222168,grad_norm: 0.9999998288940006, iteration: 751
loss: 3.9086108207702637,grad_norm: 0.9999998060085575, iteration: 752
loss: 3.9668447971343994,grad_norm: 0.9999998176318149, iteration: 753
loss: 3.9421403408050537,grad_norm: 0.9999997980946527, iteration: 754
loss: 3.9195215702056885,grad_norm: 0.9999998896886826, iteration: 755
loss: 4.003018379211426,grad_norm: 0.999999819702882, iteration: 756
loss: 4.045064449310303,grad_norm: 0.9999997549935474, iteration: 757
loss: 3.9083046913146973,grad_norm: 0.9999998629176655, iteration: 758
loss: 4.023369312286377,grad_norm: 0.9999997960765872, iteration: 759
loss: 4.032841205596924,grad_norm: 0.9999998958604558, iteration: 760
loss: 3.8766818046569824,grad_norm: 0.9999997789198793, iteration: 761
loss: 4.033407211303711,grad_norm: 0.9999999071759176, iteration: 762
loss: 3.904207944869995,grad_norm: 0.99999980294375, iteration: 763
loss: 3.899005174636841,grad_norm: 0.9999998346786627, iteration: 764
loss: 3.9890544414520264,grad_norm: 0.9999998754882559, iteration: 765
loss: 3.9898793697357178,grad_norm: 0.9999998162062965, iteration: 766
loss: 3.9076128005981445,grad_norm: 0.9999998019345159, iteration: 767
loss: 3.958740711212158,grad_norm: 0.999999811337413, iteration: 768
loss: 3.974747896194458,grad_norm: 0.9999997531787916, iteration: 769
loss: 3.9692771434783936,grad_norm: 0.999999847827448, iteration: 770
loss: 3.896186113357544,grad_norm: 0.999999859808037, iteration: 771
loss: 3.994389772415161,grad_norm: 0.9999998732163982, iteration: 772
loss: 4.032051086425781,grad_norm: 0.9999997840382846, iteration: 773
loss: 4.003079891204834,grad_norm: 0.9999998163730783, iteration: 774
loss: 3.9892637729644775,grad_norm: 0.999999794741311, iteration: 775
loss: 3.9993529319763184,grad_norm: 0.9999998463624677, iteration: 776
loss: 4.029767036437988,grad_norm: 0.9999998078526963, iteration: 777
loss: 3.9464221000671387,grad_norm: 0.9999998269641779, iteration: 778
loss: 3.9411215782165527,grad_norm: 0.999999808033603, iteration: 779
loss: 3.9483909606933594,grad_norm: 0.999999854482576, iteration: 780
loss: 3.9300670623779297,grad_norm: 0.9999997926742228, iteration: 781
loss: 3.908944845199585,grad_norm: 0.9999997981816519, iteration: 782
loss: 3.9634835720062256,grad_norm: 0.9999998125089895, iteration: 783
loss: 4.061620712280273,grad_norm: 0.9999998746432949, iteration: 784
loss: 3.9174916744232178,grad_norm: 0.9999998863751459, iteration: 785
loss: 3.9098753929138184,grad_norm: 0.9999998423823578, iteration: 786
loss: 3.839984893798828,grad_norm: 0.9999998529321065, iteration: 787
loss: 4.008932590484619,grad_norm: 0.9999997445157656, iteration: 788
loss: 3.9240925312042236,grad_norm: 0.999999853443972, iteration: 789
loss: 3.895062208175659,grad_norm: 0.9999997804008774, iteration: 790
loss: 3.9230871200561523,grad_norm: 0.9999997701031984, iteration: 791
loss: 3.928007125854492,grad_norm: 0.9999997522823094, iteration: 792
loss: 3.9863271713256836,grad_norm: 0.99999976709669, iteration: 793
loss: 3.8965961933135986,grad_norm: 0.9999998484015208, iteration: 794
loss: 3.8981332778930664,grad_norm: 0.9999998795365462, iteration: 795
loss: 3.94803524017334,grad_norm: 0.9999998567248829, iteration: 796
loss: 3.985149383544922,grad_norm: 0.9999998406644716, iteration: 797
loss: 3.961979627609253,grad_norm: 0.9999998201067538, iteration: 798
loss: 3.8628525733947754,grad_norm: 0.9999998886852175, iteration: 799
loss: 3.9806180000305176,grad_norm: 0.9999998617305719, iteration: 800
loss: 3.952036142349243,grad_norm: 0.9999997358314937, iteration: 801
loss: 3.9279985427856445,grad_norm: 0.9999998428136645, iteration: 802
loss: 3.9504892826080322,grad_norm: 0.999999774067992, iteration: 803
loss: 3.9600908756256104,grad_norm: 0.9999997719390725, iteration: 804
loss: 3.909898281097412,grad_norm: 0.9999998309679303, iteration: 805
loss: 3.9448816776275635,grad_norm: 0.999999823518443, iteration: 806
loss: 3.8485679626464844,grad_norm: 0.9999997298331224, iteration: 807
loss: 3.882178544998169,grad_norm: 0.9999998700289152, iteration: 808
loss: 4.003289222717285,grad_norm: 0.9999997994584596, iteration: 809
loss: 3.9373066425323486,grad_norm: 0.9999998125999363, iteration: 810
loss: 3.8982994556427,grad_norm: 0.9999999306657604, iteration: 811
loss: 3.992532253265381,grad_norm: 0.9999997971385143, iteration: 812
loss: 3.8342838287353516,grad_norm: 0.9999998554741576, iteration: 813
loss: 3.9836244583129883,grad_norm: 0.9999997630574806, iteration: 814
loss: 3.8983991146087646,grad_norm: 0.9999998192822871, iteration: 815
loss: 3.951582193374634,grad_norm: 0.999999845880495, iteration: 816
loss: 3.924100637435913,grad_norm: 0.999999727845949, iteration: 817
loss: 3.865062713623047,grad_norm: 0.9999999382650555, iteration: 818
loss: 3.9594898223876953,grad_norm: 0.9999997713796434, iteration: 819
loss: 3.8782873153686523,grad_norm: 0.9999998693742059, iteration: 820
loss: 3.8768649101257324,grad_norm: 0.9999997609784276, iteration: 821
loss: 3.8710103034973145,grad_norm: 0.9999997314587914, iteration: 822
loss: 3.8935201168060303,grad_norm: 0.9999998194371293, iteration: 823
loss: 3.94940447807312,grad_norm: 0.999999767165715, iteration: 824
loss: 3.9787375926971436,grad_norm: 0.9999997711967539, iteration: 825
loss: 3.9665238857269287,grad_norm: 0.9999997297103864, iteration: 826
loss: 4.025939464569092,grad_norm: 0.999999805811808, iteration: 827
loss: 3.9620018005371094,grad_norm: 0.999999846389079, iteration: 828
loss: 3.9344940185546875,grad_norm: 0.9999999279765224, iteration: 829
loss: 3.917163610458374,grad_norm: 0.999999783258406, iteration: 830
loss: 3.9044666290283203,grad_norm: 0.9999997093109381, iteration: 831
loss: 3.915513515472412,grad_norm: 0.9999997791804117, iteration: 832
loss: 4.01332426071167,grad_norm: 0.9999997573606613, iteration: 833
loss: 3.941133975982666,grad_norm: 0.9999997450717897, iteration: 834
loss: 3.9767568111419678,grad_norm: 0.9999998494746317, iteration: 835
loss: 3.8839528560638428,grad_norm: 0.9999997733076171, iteration: 836
loss: 3.904062032699585,grad_norm: 0.9999997360085284, iteration: 837
loss: 3.910332679748535,grad_norm: 0.9999998518330099, iteration: 838
loss: 3.843341827392578,grad_norm: 0.9999998290577888, iteration: 839
loss: 3.9566574096679688,grad_norm: 0.9999998931000582, iteration: 840
loss: 3.807143211364746,grad_norm: 0.9999997742764907, iteration: 841
loss: 3.855989933013916,grad_norm: 0.9999997953435966, iteration: 842
loss: 3.908369541168213,grad_norm: 0.9999997713138132, iteration: 843
loss: 4.006180763244629,grad_norm: 0.9999999120857749, iteration: 844
loss: 3.892551898956299,grad_norm: 0.9999998890670815, iteration: 845
loss: 3.9902164936065674,grad_norm: 0.9999998173676775, iteration: 846
loss: 3.870051860809326,grad_norm: 0.9999998924911724, iteration: 847
loss: 3.9598588943481445,grad_norm: 0.9999998303282883, iteration: 848
loss: 3.9196155071258545,grad_norm: 0.9999998833477062, iteration: 849
loss: 3.9754042625427246,grad_norm: 0.9999998364426633, iteration: 850
loss: 3.8961141109466553,grad_norm: 0.9999998170299728, iteration: 851
loss: 3.9600250720977783,grad_norm: 0.9999998150259788, iteration: 852
loss: 3.841975212097168,grad_norm: 0.9999998546703702, iteration: 853
loss: 3.8934600353240967,grad_norm: 0.99999982007258, iteration: 854
loss: 3.9602510929107666,grad_norm: 0.9999998119299266, iteration: 855
loss: 3.8338866233825684,grad_norm: 0.9999998659917316, iteration: 856
loss: 3.9556238651275635,grad_norm: 0.9999998604732365, iteration: 857
loss: 3.862727165222168,grad_norm: 0.9999998697117587, iteration: 858
loss: 3.8699464797973633,grad_norm: 0.9999998437456382, iteration: 859
loss: 3.9255354404449463,grad_norm: 0.9999998208198776, iteration: 860
loss: 3.9509904384613037,grad_norm: 0.999999739068649, iteration: 861
loss: 4.02039909362793,grad_norm: 0.9999998449314157, iteration: 862
loss: 3.8785760402679443,grad_norm: 0.9999997822681117, iteration: 863
loss: 4.055242538452148,grad_norm: 0.9999998189805427, iteration: 864
loss: 4.0126543045043945,grad_norm: 0.9999998906551032, iteration: 865
loss: 3.8934624195098877,grad_norm: 0.9999998043429422, iteration: 866
loss: 3.8855860233306885,grad_norm: 0.9999997977909704, iteration: 867
loss: 3.854459047317505,grad_norm: 0.9999998931305834, iteration: 868
loss: 3.8832902908325195,grad_norm: 0.9999998489249194, iteration: 869
loss: 3.9807159900665283,grad_norm: 0.9999998347237388, iteration: 870
loss: 3.8620848655700684,grad_norm: 0.9999998052182829, iteration: 871
loss: 3.9616246223449707,grad_norm: 0.9999997775490342, iteration: 872
loss: 3.8984553813934326,grad_norm: 0.9999998627157899, iteration: 873
loss: 3.825556993484497,grad_norm: 0.9999997697361402, iteration: 874
loss: 3.9516963958740234,grad_norm: 0.9999998846285866, iteration: 875
loss: 3.844374179840088,grad_norm: 0.9999998470589724, iteration: 876
loss: 3.889522075653076,grad_norm: 0.9999997915018656, iteration: 877
loss: 3.8106181621551514,grad_norm: 0.9999997758031534, iteration: 878
loss: 3.941972255706787,grad_norm: 0.9999998405466538, iteration: 879
loss: 3.842252254486084,grad_norm: 0.9999997480687919, iteration: 880
loss: 3.8887252807617188,grad_norm: 0.9999998189017493, iteration: 881
loss: 3.9168641567230225,grad_norm: 0.9999998255602114, iteration: 882
loss: 3.9793262481689453,grad_norm: 0.9999998297020627, iteration: 883
loss: 3.9416344165802,grad_norm: 0.9999997973549825, iteration: 884
loss: 3.944225311279297,grad_norm: 0.9999998296216629, iteration: 885
loss: 3.986597776412964,grad_norm: 0.9999998534589111, iteration: 886
loss: 3.899183750152588,grad_norm: 0.9999998045440742, iteration: 887
loss: 3.826695680618286,grad_norm: 0.9999998476762678, iteration: 888
loss: 3.84655499458313,grad_norm: 0.9999997993599739, iteration: 889
loss: 3.9334628582000732,grad_norm: 0.9999998196415484, iteration: 890
loss: 4.0014238357543945,grad_norm: 0.9999998291739096, iteration: 891
loss: 3.978314161300659,grad_norm: 0.9999998661718977, iteration: 892
loss: 3.8720686435699463,grad_norm: 0.9999998112960963, iteration: 893
loss: 3.954176187515259,grad_norm: 0.9999997856668923, iteration: 894
loss: 3.919205904006958,grad_norm: 0.9999997827052759, iteration: 895
loss: 3.894646406173706,grad_norm: 0.999999840875629, iteration: 896
loss: 3.887498617172241,grad_norm: 0.9999998395982922, iteration: 897
loss: 3.847505569458008,grad_norm: 0.9999998973441742, iteration: 898
loss: 3.94680118560791,grad_norm: 0.9999997542296116, iteration: 899
loss: 3.8821709156036377,grad_norm: 0.9999998464102664, iteration: 900
loss: 3.9818191528320312,grad_norm: 0.9999998807918584, iteration: 901
loss: 3.8488659858703613,grad_norm: 0.999999870971691, iteration: 902
loss: 3.7870781421661377,grad_norm: 0.9999998367205801, iteration: 903
loss: 3.8377981185913086,grad_norm: 0.9999998320515492, iteration: 904
loss: 3.9237773418426514,grad_norm: 0.9999998838146804, iteration: 905
loss: 3.908439874649048,grad_norm: 0.999999830766749, iteration: 906
loss: 3.811478853225708,grad_norm: 0.9999998217010259, iteration: 907
loss: 3.880993604660034,grad_norm: 0.9999998473978317, iteration: 908
loss: 3.751894950866699,grad_norm: 0.9999998671118796, iteration: 909
loss: 3.8621511459350586,grad_norm: 0.9999997473765396, iteration: 910
loss: 3.912330389022827,grad_norm: 0.9999997898074896, iteration: 911
loss: 3.7805516719818115,grad_norm: 0.9999998384283855, iteration: 912
loss: 3.849241256713867,grad_norm: 0.9999998800104543, iteration: 913
loss: 3.847522258758545,grad_norm: 0.9999998155947535, iteration: 914
loss: 3.8844010829925537,grad_norm: 0.9999998644167608, iteration: 915
loss: 3.8690035343170166,grad_norm: 0.9999999108939921, iteration: 916
loss: 4.011127948760986,grad_norm: 0.9999997995291849, iteration: 917
loss: 3.96085262298584,grad_norm: 0.9999998364668948, iteration: 918
loss: 3.9298198223114014,grad_norm: 0.9999998110593665, iteration: 919
loss: 3.900848388671875,grad_norm: 0.9999998035906605, iteration: 920
loss: 3.843231439590454,grad_norm: 0.9999998455173419, iteration: 921
loss: 3.8937015533447266,grad_norm: 0.9999998150107756, iteration: 922
loss: 3.9369914531707764,grad_norm: 0.9999998040118896, iteration: 923
loss: 3.7774455547332764,grad_norm: 0.9999998280483275, iteration: 924
loss: 3.8854215145111084,grad_norm: 0.9999998301542119, iteration: 925
loss: 3.7958931922912598,grad_norm: 0.9999998687931761, iteration: 926
loss: 3.882493019104004,grad_norm: 0.9999998105932766, iteration: 927
loss: 3.8461720943450928,grad_norm: 0.999999845202913, iteration: 928
loss: 3.944179058074951,grad_norm: 0.9999998278285828, iteration: 929
loss: 3.84969162940979,grad_norm: 0.9999998578388094, iteration: 930
loss: 3.9150607585906982,grad_norm: 0.9999998205149061, iteration: 931
loss: 3.9760096073150635,grad_norm: 0.9999998631879765, iteration: 932
loss: 3.9302985668182373,grad_norm: 0.9999999006862426, iteration: 933
loss: 3.8533976078033447,grad_norm: 0.9999998474342711, iteration: 934
loss: 3.841768741607666,grad_norm: 0.9999998155703348, iteration: 935
loss: 3.7816460132598877,grad_norm: 0.9999997510933308, iteration: 936
loss: 3.8725838661193848,grad_norm: 0.9999998175546011, iteration: 937
loss: 3.936152458190918,grad_norm: 0.9999998760921929, iteration: 938
loss: 3.878185510635376,grad_norm: 0.9999998496685641, iteration: 939
loss: 3.9587817192077637,grad_norm: 0.9999998149939288, iteration: 940
loss: 3.8977015018463135,grad_norm: 0.9999997606053931, iteration: 941
loss: 3.8792169094085693,grad_norm: 0.9999998269503055, iteration: 942
loss: 3.9055044651031494,grad_norm: 0.9999998606280478, iteration: 943
loss: 3.8509716987609863,grad_norm: 0.9999997526515059, iteration: 944
loss: 3.7395246028900146,grad_norm: 0.9999998309347414, iteration: 945
loss: 3.8716506958007812,grad_norm: 0.999999720775491, iteration: 946
loss: 3.76240611076355,grad_norm: 0.9999997757203065, iteration: 947
loss: 3.883002281188965,grad_norm: 0.9999998915384314, iteration: 948
loss: 3.8630101680755615,grad_norm: 0.9999997701841205, iteration: 949
loss: 3.899425983428955,grad_norm: 0.9999997349889914, iteration: 950
loss: 3.9008514881134033,grad_norm: 0.9999998388223925, iteration: 951
loss: 3.8215491771698,grad_norm: 0.9999997866679777, iteration: 952
loss: 3.7399702072143555,grad_norm: 0.9999998516999942, iteration: 953
loss: 3.87292218208313,grad_norm: 0.9999998415215385, iteration: 954
loss: 3.796823501586914,grad_norm: 0.9999998886181153, iteration: 955
loss: 3.8635013103485107,grad_norm: 0.9999998176707202, iteration: 956
loss: 3.9008867740631104,grad_norm: 0.9999999070206834, iteration: 957
loss: 3.9628517627716064,grad_norm: 0.9999997957260162, iteration: 958
loss: 3.829476833343506,grad_norm: 0.9999998763512471, iteration: 959
loss: 3.840787649154663,grad_norm: 0.9999997619098562, iteration: 960
loss: 3.797546148300171,grad_norm: 0.9999997385090412, iteration: 961
loss: 3.841174840927124,grad_norm: 0.999999878254771, iteration: 962
loss: 3.7872774600982666,grad_norm: 0.9999998212403267, iteration: 963
loss: 3.8364181518554688,grad_norm: 0.9999999184627233, iteration: 964
loss: 3.8664286136627197,grad_norm: 0.9999998593535345, iteration: 965
loss: 3.8089144229888916,grad_norm: 0.9999998119700709, iteration: 966
loss: 3.7815353870391846,grad_norm: 0.9999998008558297, iteration: 967
loss: 3.7619903087615967,grad_norm: 0.999999810165994, iteration: 968
loss: 3.916660785675049,grad_norm: 0.9999997928899031, iteration: 969
loss: 3.7222132682800293,grad_norm: 0.9999999268288585, iteration: 970
loss: 3.839139461517334,grad_norm: 0.9999997901107799, iteration: 971
loss: 3.8930435180664062,grad_norm: 0.9999998873946359, iteration: 972
loss: 3.8021676540374756,grad_norm: 0.9999999010117269, iteration: 973
loss: 3.7158539295196533,grad_norm: 0.9999998166428278, iteration: 974
loss: 3.8000497817993164,grad_norm: 0.999999810816924, iteration: 975
loss: 3.7087066173553467,grad_norm: 0.9999999013561812, iteration: 976
loss: 3.7177023887634277,grad_norm: 0.9999997750471196, iteration: 977
loss: 3.8114969730377197,grad_norm: 0.9999997780230628, iteration: 978
loss: 3.750101327896118,grad_norm: 0.9999998095789745, iteration: 979
loss: 3.735250234603882,grad_norm: 0.9999998442332455, iteration: 980
loss: 3.772423028945923,grad_norm: 0.9999998634389053, iteration: 981
loss: 3.7462141513824463,grad_norm: 0.9999998370565264, iteration: 982
loss: 3.7901337146759033,grad_norm: 0.9999998920184981, iteration: 983
loss: 3.841920852661133,grad_norm: 0.9999997879981535, iteration: 984
loss: 3.6812126636505127,grad_norm: 0.9999998341380804, iteration: 985
loss: 3.727120876312256,grad_norm: 0.9999997928129235, iteration: 986
loss: 3.885425567626953,grad_norm: 0.9999997578030574, iteration: 987
loss: 3.8865737915039062,grad_norm: 0.9999999144324233, iteration: 988
loss: 3.795649766921997,grad_norm: 0.9999998711694893, iteration: 989
loss: 3.843669891357422,grad_norm: 0.9999998722087995, iteration: 990
loss: 3.779953956604004,grad_norm: 0.9999998082416366, iteration: 991
loss: 3.793520212173462,grad_norm: 0.9999997797331872, iteration: 992
loss: 3.9098052978515625,grad_norm: 0.9999998439996863, iteration: 993
loss: 3.6796085834503174,grad_norm: 0.9999998401917265, iteration: 994
loss: 3.803786039352417,grad_norm: 0.9999997711862352, iteration: 995
loss: 3.6579086780548096,grad_norm: 0.9999998114338683, iteration: 996
loss: 3.8098816871643066,grad_norm: 0.9999998456961025, iteration: 997
loss: 3.7213222980499268,grad_norm: 0.9999999022172473, iteration: 998
loss: 3.884669303894043,grad_norm: 0.9999998484214352, iteration: 999
loss: 3.8287267684936523,grad_norm: 0.9999998809256069, iteration: 1000
loss: 3.680133819580078,grad_norm: 0.9999998027113831, iteration: 1001
loss: 3.7198758125305176,grad_norm: 0.999999811515982, iteration: 1002
loss: 3.7338039875030518,grad_norm: 0.9999998363021673, iteration: 1003
loss: 3.7500131130218506,grad_norm: 0.9999999063924452, iteration: 1004
loss: 3.8318960666656494,grad_norm: 0.999999841021113, iteration: 1005
loss: 3.8227405548095703,grad_norm: 0.9999998386139335, iteration: 1006
loss: 3.8287978172302246,grad_norm: 0.9999998289979215, iteration: 1007
loss: 3.7453863620758057,grad_norm: 0.9999998183431652, iteration: 1008
loss: 3.6611576080322266,grad_norm: 0.9999998461682905, iteration: 1009
loss: 3.806894063949585,grad_norm: 0.9999998641762218, iteration: 1010
loss: 3.7848498821258545,grad_norm: 0.9999998313592446, iteration: 1011
loss: 3.702336072921753,grad_norm: 0.9999998608570765, iteration: 1012
loss: 3.7768640518188477,grad_norm: 0.9999999109540408, iteration: 1013
loss: 3.6769840717315674,grad_norm: 0.9999999197135362, iteration: 1014
loss: 3.774829864501953,grad_norm: 0.9999998490982447, iteration: 1015
loss: 3.7122154235839844,grad_norm: 0.9999998172748256, iteration: 1016
loss: 3.6816294193267822,grad_norm: 0.9999997575288886, iteration: 1017
loss: 3.7961111068725586,grad_norm: 0.9999998885120758, iteration: 1018
loss: 3.81101131439209,grad_norm: 0.9999997825716753, iteration: 1019
loss: 3.7364516258239746,grad_norm: 0.9999997642355885, iteration: 1020
loss: 3.8185017108917236,grad_norm: 0.9999998795970771, iteration: 1021
loss: 3.733877420425415,grad_norm: 0.9999997678811525, iteration: 1022
loss: 3.6593117713928223,grad_norm: 0.9999998770884101, iteration: 1023
loss: 3.742995023727417,grad_norm: 0.9999997755178536, iteration: 1024
loss: 3.708522319793701,grad_norm: 0.999999786951387, iteration: 1025
loss: 3.65618896484375,grad_norm: 0.9999998359996605, iteration: 1026
loss: 3.679941177368164,grad_norm: 0.9999997884976639, iteration: 1027
loss: 3.713693857192993,grad_norm: 0.9999997853488736, iteration: 1028
loss: 3.734961748123169,grad_norm: 0.999999874488867, iteration: 1029
loss: 3.6957108974456787,grad_norm: 0.999999952786288, iteration: 1030
loss: 3.7039430141448975,grad_norm: 0.9999998134676005, iteration: 1031
loss: 3.781050443649292,grad_norm: 0.999999896915781, iteration: 1032
loss: 3.6256916522979736,grad_norm: 0.9999998948671617, iteration: 1033
loss: 3.6837501525878906,grad_norm: 0.9999998109562656, iteration: 1034
loss: 3.6380248069763184,grad_norm: 0.9999997935570685, iteration: 1035
loss: 3.7229886054992676,grad_norm: 0.9999998833096054, iteration: 1036
loss: 3.64744234085083,grad_norm: 0.9999998893718123, iteration: 1037
loss: 3.7297754287719727,grad_norm: 0.9999998176344237, iteration: 1038
loss: 3.6951355934143066,grad_norm: 0.9999998396250592, iteration: 1039
loss: 3.769026041030884,grad_norm: 0.9999999388601105, iteration: 1040
loss: 3.623771905899048,grad_norm: 0.9999997767882319, iteration: 1041
loss: 3.6762375831604004,grad_norm: 0.9999999162699786, iteration: 1042
loss: 3.646183490753174,grad_norm: 0.9999998123515518, iteration: 1043
loss: 3.700908899307251,grad_norm: 0.9999998457870936, iteration: 1044
loss: 3.7009496688842773,grad_norm: 0.9999999009625596, iteration: 1045
loss: 3.674349784851074,grad_norm: 0.9999998309177516, iteration: 1046
loss: 3.6676886081695557,grad_norm: 0.9999998689725057, iteration: 1047
loss: 3.6333000659942627,grad_norm: 0.999999887075088, iteration: 1048
loss: 3.6649227142333984,grad_norm: 0.9999998767294781, iteration: 1049
loss: 3.5434722900390625,grad_norm: 0.9999998499462924, iteration: 1050
loss: 3.630850315093994,grad_norm: 0.9999998566176487, iteration: 1051
loss: 3.641392946243286,grad_norm: 0.9999997927941386, iteration: 1052
loss: 3.696681499481201,grad_norm: 0.9999998941228426, iteration: 1053
loss: 3.742109775543213,grad_norm: 0.9999998691662505, iteration: 1054
loss: 3.6786065101623535,grad_norm: 0.9999999019526943, iteration: 1055
loss: 3.7368581295013428,grad_norm: 0.9999998142365379, iteration: 1056
loss: 3.812584638595581,grad_norm: 0.9999998636766583, iteration: 1057
loss: 3.7072372436523438,grad_norm: 0.9999998653047866, iteration: 1058
loss: 3.697108268737793,grad_norm: 0.9999999023364942, iteration: 1059
loss: 3.578704357147217,grad_norm: 0.9999999185230635, iteration: 1060
loss: 3.653520345687866,grad_norm: 0.9999999070335591, iteration: 1061
loss: 3.57395601272583,grad_norm: 0.9999998633095277, iteration: 1062
loss: 3.730438470840454,grad_norm: 0.99999986099375, iteration: 1063
loss: 3.5800137519836426,grad_norm: 0.9999999219591479, iteration: 1064
loss: 3.6737301349639893,grad_norm: 0.9999997806357083, iteration: 1065
loss: 3.5225727558135986,grad_norm: 0.9999998698690109, iteration: 1066
loss: 3.55012583732605,grad_norm: 0.999999830376322, iteration: 1067
loss: 3.695913314819336,grad_norm: 0.9999997938757571, iteration: 1068
loss: 3.6007416248321533,grad_norm: 0.9999998039777543, iteration: 1069
loss: 3.634976625442505,grad_norm: 0.999999856627579, iteration: 1070
loss: 3.6687912940979004,grad_norm: 0.9999998231955971, iteration: 1071
loss: 3.782625436782837,grad_norm: 0.9999998914494894, iteration: 1072
loss: 3.6306843757629395,grad_norm: 0.9999997746614192, iteration: 1073
loss: 3.607071876525879,grad_norm: 0.9999998161637941, iteration: 1074
loss: 3.6295409202575684,grad_norm: 0.9999998972638735, iteration: 1075
loss: 3.6104509830474854,grad_norm: 0.9999998688388331, iteration: 1076
loss: 3.570183753967285,grad_norm: 0.9999998202756835, iteration: 1077
loss: 3.5521247386932373,grad_norm: 0.9999998749094915, iteration: 1078
loss: 3.5581560134887695,grad_norm: 0.9999998466822002, iteration: 1079
loss: 3.5973758697509766,grad_norm: 0.9999998374631583, iteration: 1080
loss: 3.6032304763793945,grad_norm: 0.9999999002924094, iteration: 1081
loss: 3.6404478549957275,grad_norm: 0.9999999364561281, iteration: 1082
loss: 3.623919725418091,grad_norm: 0.9999998246416429, iteration: 1083
loss: 3.5190069675445557,grad_norm: 0.9999998250397186, iteration: 1084
loss: 3.6734395027160645,grad_norm: 0.9999999056634228, iteration: 1085
loss: 3.605588674545288,grad_norm: 0.9999998798463039, iteration: 1086
loss: 3.602193832397461,grad_norm: 0.9999997949020365, iteration: 1087
loss: 3.6152501106262207,grad_norm: 0.9999998807898542, iteration: 1088
loss: 3.564610004425049,grad_norm: 0.9999998397115065, iteration: 1089
loss: 3.695136070251465,grad_norm: 0.9999997873672579, iteration: 1090
loss: 3.6721160411834717,grad_norm: 0.9999998704304207, iteration: 1091
loss: 3.5821027755737305,grad_norm: 0.999999834098814, iteration: 1092
loss: 3.5672481060028076,grad_norm: 0.9999998751945391, iteration: 1093
loss: 3.5884361267089844,grad_norm: 0.9999998583562905, iteration: 1094
loss: 3.6514105796813965,grad_norm: 0.9999998592766366, iteration: 1095
loss: 3.5755813121795654,grad_norm: 0.999999846975198, iteration: 1096
loss: 3.7279772758483887,grad_norm: 0.9999998069636782, iteration: 1097
loss: 3.5594844818115234,grad_norm: 0.9999998332458869, iteration: 1098
loss: 3.6073527336120605,grad_norm: 0.9999998431381525, iteration: 1099
loss: 3.581531286239624,grad_norm: 0.999999840677412, iteration: 1100
loss: 3.471794605255127,grad_norm: 0.9999998817362571, iteration: 1101
loss: 3.5735530853271484,grad_norm: 0.9999998752198268, iteration: 1102
loss: 3.673307180404663,grad_norm: 0.999999841559374, iteration: 1103
loss: 3.571120500564575,grad_norm: 0.9999998389942452, iteration: 1104
loss: 3.6482794284820557,grad_norm: 0.9999998138472818, iteration: 1105
loss: 3.553973436355591,grad_norm: 0.9999998256151427, iteration: 1106
loss: 3.6192195415496826,grad_norm: 0.9999998435459315, iteration: 1107
loss: 3.5477473735809326,grad_norm: 0.9999998691510654, iteration: 1108
loss: 3.468256711959839,grad_norm: 0.9999998747444648, iteration: 1109
loss: 3.579929828643799,grad_norm: 0.9999998102225509, iteration: 1110
loss: 3.584216594696045,grad_norm: 0.9999999158167825, iteration: 1111
loss: 3.5662548542022705,grad_norm: 0.9999999139989606, iteration: 1112
loss: 3.6607797145843506,grad_norm: 0.9999998282431062, iteration: 1113
loss: 3.4799036979675293,grad_norm: 0.9999998279519994, iteration: 1114
loss: 3.640779972076416,grad_norm: 0.9999998548234929, iteration: 1115
loss: 3.5787477493286133,grad_norm: 0.9999999227890952, iteration: 1116
loss: 3.6278581619262695,grad_norm: 0.9999999661084336, iteration: 1117
loss: 3.531355857849121,grad_norm: 0.9999998897062712, iteration: 1118
loss: 3.5238351821899414,grad_norm: 0.9999999121489279, iteration: 1119
loss: 3.62554931640625,grad_norm: 0.9999998357311157, iteration: 1120
loss: 3.660109519958496,grad_norm: 0.9999997589095727, iteration: 1121
loss: 3.433537721633911,grad_norm: 0.9999998903898039, iteration: 1122
loss: 3.513451099395752,grad_norm: 0.9999998210580077, iteration: 1123
loss: 3.5861237049102783,grad_norm: 0.9999997781012424, iteration: 1124
loss: 3.6383235454559326,grad_norm: 0.9999998311785548, iteration: 1125
loss: 3.6130120754241943,grad_norm: 0.9999998763667683, iteration: 1126
loss: 3.5475282669067383,grad_norm: 0.9999998862036266, iteration: 1127
loss: 3.652378559112549,grad_norm: 0.9999998929304083, iteration: 1128
loss: 3.608938455581665,grad_norm: 0.9999997618196552, iteration: 1129
loss: 3.5383050441741943,grad_norm: 0.999999803505725, iteration: 1130
loss: 3.6137359142303467,grad_norm: 0.9999997733942181, iteration: 1131
loss: 3.531162738800049,grad_norm: 0.9999998986495142, iteration: 1132
loss: 3.6069767475128174,grad_norm: 0.9999998453347596, iteration: 1133
loss: 3.4492969512939453,grad_norm: 0.9999998410462345, iteration: 1134
loss: 3.5450589656829834,grad_norm: 0.9999998554105454, iteration: 1135
loss: 3.5343966484069824,grad_norm: 0.9999998941510437, iteration: 1136
loss: 3.5615499019622803,grad_norm: 0.9999998643905608, iteration: 1137
loss: 3.514751672744751,grad_norm: 0.9999997996918311, iteration: 1138
loss: 3.411696195602417,grad_norm: 0.9999998775351896, iteration: 1139
loss: 3.558361053466797,grad_norm: 0.9999998530134476, iteration: 1140
loss: 3.5989248752593994,grad_norm: 0.9999999085052809, iteration: 1141
loss: 3.4122908115386963,grad_norm: 0.9999998275482639, iteration: 1142
loss: 3.5944602489471436,grad_norm: 0.9999998403400858, iteration: 1143
loss: 3.3886656761169434,grad_norm: 0.9999997919111272, iteration: 1144
loss: 3.4591660499572754,grad_norm: 0.999999796957874, iteration: 1145
loss: 3.5152595043182373,grad_norm: 0.9999998364032734, iteration: 1146
loss: 3.586272954940796,grad_norm: 0.9999998723192968, iteration: 1147
loss: 3.430192470550537,grad_norm: 0.9999998015127073, iteration: 1148
loss: 3.4982659816741943,grad_norm: 0.9999998033423554, iteration: 1149
loss: 3.4915499687194824,grad_norm: 0.9999998605548895, iteration: 1150
loss: 3.598583698272705,grad_norm: 0.9999997906654747, iteration: 1151
loss: 3.4850471019744873,grad_norm: 0.9999997536045488, iteration: 1152
loss: 3.5515968799591064,grad_norm: 0.9999999537893123, iteration: 1153
loss: 3.431795835494995,grad_norm: 0.9999999348867364, iteration: 1154
loss: 3.5469541549682617,grad_norm: 0.9999998299374686, iteration: 1155
loss: 3.54483699798584,grad_norm: 0.9999997772575823, iteration: 1156
loss: 3.5195250511169434,grad_norm: 0.9999998429715629, iteration: 1157
loss: 3.4124152660369873,grad_norm: 0.9999998393554975, iteration: 1158
loss: 3.5884933471679688,grad_norm: 0.9999998008284473, iteration: 1159
loss: 3.5065245628356934,grad_norm: 0.9999998588253453, iteration: 1160
loss: 3.5583128929138184,grad_norm: 0.9999998492790287, iteration: 1161
loss: 3.482262134552002,grad_norm: 0.9999998426053789, iteration: 1162
loss: 3.359342575073242,grad_norm: 0.9999998664105859, iteration: 1163
loss: 3.411994695663452,grad_norm: 0.9999997982728352, iteration: 1164
loss: 3.4041452407836914,grad_norm: 0.9999998237793165, iteration: 1165
loss: 3.5905508995056152,grad_norm: 0.9999997666574738, iteration: 1166
loss: 3.3726460933685303,grad_norm: 0.9999998747061878, iteration: 1167
loss: 3.517629623413086,grad_norm: 0.9999998689672723, iteration: 1168
loss: 3.557692527770996,grad_norm: 0.9999998268891475, iteration: 1169
loss: 3.6294214725494385,grad_norm: 0.9999998968721231, iteration: 1170
loss: 3.435800790786743,grad_norm: 0.9999997655754918, iteration: 1171
loss: 3.6265532970428467,grad_norm: 0.9999998011896254, iteration: 1172
loss: 3.3499014377593994,grad_norm: 0.9999997985779933, iteration: 1173
loss: 3.4526853561401367,grad_norm: 0.9999998511445861, iteration: 1174
loss: 3.519460916519165,grad_norm: 0.9999999163596757, iteration: 1175
loss: 3.545222282409668,grad_norm: 0.9999998671774826, iteration: 1176
loss: 3.360870838165283,grad_norm: 0.9999998654946773, iteration: 1177
loss: 3.4920756816864014,grad_norm: 0.9999999384911984, iteration: 1178
loss: 3.519484281539917,grad_norm: 0.9999998906258759, iteration: 1179
loss: 3.4547436237335205,grad_norm: 0.9999998229520247, iteration: 1180
loss: 3.4662928581237793,grad_norm: 0.9999998860619188, iteration: 1181
loss: 3.5203332901000977,grad_norm: 0.9999999355808449, iteration: 1182
loss: 3.469433307647705,grad_norm: 0.9999998131229122, iteration: 1183
loss: 3.5085957050323486,grad_norm: 0.9999997453492955, iteration: 1184
loss: 3.554786205291748,grad_norm: 0.999999819094211, iteration: 1185
loss: 3.546995162963867,grad_norm: 0.999999849488424, iteration: 1186
loss: 3.48067045211792,grad_norm: 0.9999998901810159, iteration: 1187
loss: 3.5505359172821045,grad_norm: 0.9999998510813921, iteration: 1188
loss: 3.513288736343384,grad_norm: 0.9999999353306056, iteration: 1189
loss: 3.557196617126465,grad_norm: 0.9999998616122535, iteration: 1190
loss: 3.5872080326080322,grad_norm: 0.9999998187988483, iteration: 1191
loss: 3.4776320457458496,grad_norm: 0.9999999096195175, iteration: 1192
loss: 3.4038915634155273,grad_norm: 0.9999998541932804, iteration: 1193
loss: 3.5486271381378174,grad_norm: 0.9999998824153375, iteration: 1194
loss: 3.5850393772125244,grad_norm: 0.9999998067196743, iteration: 1195
loss: 3.545417070388794,grad_norm: 0.9999998168408168, iteration: 1196
loss: 3.593568801879883,grad_norm: 0.9999998385859056, iteration: 1197
loss: 3.351760149002075,grad_norm: 0.9999998073525596, iteration: 1198
loss: 3.4855456352233887,grad_norm: 0.999999831828629, iteration: 1199
loss: 3.5026497840881348,grad_norm: 0.9999998825732672, iteration: 1200
loss: 3.4702489376068115,grad_norm: 0.9999998333664176, iteration: 1201
loss: 3.460557699203491,grad_norm: 0.9999999911760696, iteration: 1202
loss: 3.4750289916992188,grad_norm: 0.9999998868096798, iteration: 1203
loss: 3.534294605255127,grad_norm: 0.9999997995205122, iteration: 1204
loss: 3.4832143783569336,grad_norm: 0.9999998628844964, iteration: 1205
loss: 3.484792947769165,grad_norm: 0.9999998233473616, iteration: 1206
loss: 3.5035197734832764,grad_norm: 0.9999998885345001, iteration: 1207
loss: 3.4404659271240234,grad_norm: 0.999999835746875, iteration: 1208
loss: 3.4995129108428955,grad_norm: 0.9999998289603156, iteration: 1209
loss: 3.4330406188964844,grad_norm: 0.9999999258344591, iteration: 1210
loss: 3.4931938648223877,grad_norm: 0.9999998951153252, iteration: 1211
loss: 3.495088577270508,grad_norm: 0.9999998250003905, iteration: 1212
loss: 3.6781039237976074,grad_norm: 0.9999998190662835, iteration: 1213
loss: 3.514979600906372,grad_norm: 0.9999998038298038, iteration: 1214
loss: 3.5270063877105713,grad_norm: 0.9999997573275333, iteration: 1215
loss: 3.4749248027801514,grad_norm: 0.9999998120582126, iteration: 1216
loss: 3.4435713291168213,grad_norm: 0.9999998769089306, iteration: 1217
loss: 3.607489824295044,grad_norm: 0.9999999162877983, iteration: 1218
loss: 3.577098846435547,grad_norm: 0.9999997814549194, iteration: 1219
loss: 3.600224733352661,grad_norm: 0.9999998874910421, iteration: 1220
loss: 3.4916470050811768,grad_norm: 0.9999998058231124, iteration: 1221
loss: 3.4870755672454834,grad_norm: 0.999999882625042, iteration: 1222
loss: 3.505764961242676,grad_norm: 0.9999998329126984, iteration: 1223
loss: 3.6028597354888916,grad_norm: 0.9999998715640678, iteration: 1224
loss: 3.5275652408599854,grad_norm: 0.9999998237314361, iteration: 1225
loss: 3.395113945007324,grad_norm: 0.9999997909257106, iteration: 1226
loss: 3.350189208984375,grad_norm: 0.9999998709241817, iteration: 1227
loss: 3.5012729167938232,grad_norm: 0.9999998487472486, iteration: 1228
loss: 3.4298408031463623,grad_norm: 0.9999997803717791, iteration: 1229
loss: 3.4658379554748535,grad_norm: 0.9999998204441858, iteration: 1230
loss: 3.442633628845215,grad_norm: 0.9999998268978139, iteration: 1231
loss: 3.3433496952056885,grad_norm: 0.9999998908756248, iteration: 1232
loss: 3.4427738189697266,grad_norm: 0.9999998043656245, iteration: 1233
loss: 3.4586141109466553,grad_norm: 0.999999851780971, iteration: 1234
loss: 3.430203676223755,grad_norm: 0.9999998117593879, iteration: 1235
loss: 3.4330294132232666,grad_norm: 0.9999998519183443, iteration: 1236
loss: 3.461451292037964,grad_norm: 0.9999998228207323, iteration: 1237
loss: 3.584287166595459,grad_norm: 0.9999998517251465, iteration: 1238
loss: 3.389536142349243,grad_norm: 0.9999998578191127, iteration: 1239
loss: 3.361595630645752,grad_norm: 0.9999998409965131, iteration: 1240
loss: 3.396130323410034,grad_norm: 0.9999998630326219, iteration: 1241
loss: 3.469425678253174,grad_norm: 0.9999998274974975, iteration: 1242
loss: 3.337193250656128,grad_norm: 0.9999998913441149, iteration: 1243
loss: 3.380340337753296,grad_norm: 0.9999997391006874, iteration: 1244
loss: 3.4727063179016113,grad_norm: 0.9999998457010225, iteration: 1245
loss: 3.4399566650390625,grad_norm: 0.9999998960904048, iteration: 1246
loss: 3.4929697513580322,grad_norm: 0.9999998812887441, iteration: 1247
loss: 3.4065396785736084,grad_norm: 0.9999998156884564, iteration: 1248
loss: 3.3917057514190674,grad_norm: 0.9999998347722837, iteration: 1249
loss: 3.4361348152160645,grad_norm: 0.9999997692526399, iteration: 1250
loss: 3.4409689903259277,grad_norm: 0.9999997735840158, iteration: 1251
loss: 3.512204647064209,grad_norm: 0.99999978222301, iteration: 1252
loss: 3.4823548793792725,grad_norm: 0.9999998170314196, iteration: 1253
loss: 3.447014808654785,grad_norm: 0.9999998041938508, iteration: 1254
loss: 3.469158172607422,grad_norm: 0.9999998603856828, iteration: 1255
loss: 3.525158643722534,grad_norm: 0.9999998391154433, iteration: 1256
loss: 3.5346009731292725,grad_norm: 0.9999999185701534, iteration: 1257
loss: 3.335681200027466,grad_norm: 0.9999997709893393, iteration: 1258
loss: 3.415820360183716,grad_norm: 0.9999999253113148, iteration: 1259
loss: 3.3119804859161377,grad_norm: 0.9999999004098112, iteration: 1260
loss: 3.4587149620056152,grad_norm: 0.9999998325087213, iteration: 1261
loss: 3.5316691398620605,grad_norm: 0.999999792490994, iteration: 1262
loss: 3.408599376678467,grad_norm: 0.9999998548863205, iteration: 1263
loss: 3.396225929260254,grad_norm: 0.9999999181804262, iteration: 1264
loss: 3.460132598876953,grad_norm: 0.9999999267721111, iteration: 1265
loss: 3.2522358894348145,grad_norm: 0.9999998646413903, iteration: 1266
loss: 3.542602300643921,grad_norm: 0.9999998746626069, iteration: 1267
loss: 3.342437267303467,grad_norm: 0.9999998510020794, iteration: 1268
loss: 3.5793445110321045,grad_norm: 0.9999998476159566, iteration: 1269
loss: 3.281085968017578,grad_norm: 0.9999998706057145, iteration: 1270
loss: 3.340161085128784,grad_norm: 0.9999997756874874, iteration: 1271
loss: 3.270186185836792,grad_norm: 0.9999998819348797, iteration: 1272
loss: 3.3132128715515137,grad_norm: 0.9999997977161212, iteration: 1273
loss: 3.4707400798797607,grad_norm: 0.9999998673491282, iteration: 1274
loss: 3.4263298511505127,grad_norm: 0.9999998056209508, iteration: 1275
loss: 3.501856803894043,grad_norm: 0.9999998361379848, iteration: 1276
loss: 3.2847046852111816,grad_norm: 0.9999998550250984, iteration: 1277
loss: 3.352720260620117,grad_norm: 0.9999999134352682, iteration: 1278
loss: 3.5573549270629883,grad_norm: 0.9999998445853467, iteration: 1279
loss: 3.4271790981292725,grad_norm: 0.9999998607743757, iteration: 1280
loss: 3.2981503009796143,grad_norm: 0.9999998832130484, iteration: 1281
loss: 3.4336142539978027,grad_norm: 0.9999998185973246, iteration: 1282
loss: 3.3713245391845703,grad_norm: 0.9999998235617383, iteration: 1283
loss: 3.615604877471924,grad_norm: 0.9999998684302794, iteration: 1284
loss: 3.3849709033966064,grad_norm: 0.9999997669553596, iteration: 1285
loss: 3.3939695358276367,grad_norm: 0.9999997849994419, iteration: 1286
loss: 3.369795799255371,grad_norm: 0.9999998140220034, iteration: 1287
loss: 3.5076541900634766,grad_norm: 0.9999998297349432, iteration: 1288
loss: 3.3506839275360107,grad_norm: 0.9999998140928944, iteration: 1289
loss: 3.513359546661377,grad_norm: 0.9999998896313579, iteration: 1290
loss: 3.3922014236450195,grad_norm: 0.9999997642268094, iteration: 1291
loss: 3.433842897415161,grad_norm: 0.9999999171884446, iteration: 1292
loss: 3.4132626056671143,grad_norm: 0.9999998605426765, iteration: 1293
loss: 3.3616652488708496,grad_norm: 0.9999998933394192, iteration: 1294
loss: 3.463743209838867,grad_norm: 0.9999998721099507, iteration: 1295
loss: 3.4355549812316895,grad_norm: 0.9999998125704517, iteration: 1296
loss: 3.3786826133728027,grad_norm: 0.9999998862408402, iteration: 1297
loss: 3.3822929859161377,grad_norm: 0.9999997790325653, iteration: 1298
loss: 3.487440824508667,grad_norm: 0.9999998339293588, iteration: 1299
loss: 3.431453227996826,grad_norm: 0.9999998355883616, iteration: 1300
loss: 3.4271936416625977,grad_norm: 0.9999998635744123, iteration: 1301
loss: 3.3996613025665283,grad_norm: 0.9999997624755849, iteration: 1302
loss: 3.440016508102417,grad_norm: 0.9999997731341921, iteration: 1303
loss: 3.333630323410034,grad_norm: 0.99999984728407, iteration: 1304
loss: 3.4312047958374023,grad_norm: 0.999999912022884, iteration: 1305
loss: 3.461719274520874,grad_norm: 0.9999999593313973, iteration: 1306
loss: 3.4241044521331787,grad_norm: 0.9999997765015961, iteration: 1307
loss: 3.3440065383911133,grad_norm: 0.9999997866665304, iteration: 1308
loss: 3.4627315998077393,grad_norm: 0.9999998704171847, iteration: 1309
loss: 3.4159700870513916,grad_norm: 0.9999999245918407, iteration: 1310
loss: 3.358690023422241,grad_norm: 0.9999998679462148, iteration: 1311
loss: 3.4252185821533203,grad_norm: 0.9999998465558789, iteration: 1312
loss: 3.2150766849517822,grad_norm: 0.9999998720760952, iteration: 1313
loss: 3.4318881034851074,grad_norm: 0.999999874620003, iteration: 1314
loss: 3.4305975437164307,grad_norm: 0.9999999127375466, iteration: 1315
loss: 3.5031588077545166,grad_norm: 0.9999999091018875, iteration: 1316
loss: 3.3254966735839844,grad_norm: 0.9999997968792016, iteration: 1317
loss: 3.3743841648101807,grad_norm: 0.9999998622014243, iteration: 1318
loss: 3.296389102935791,grad_norm: 0.9999998791192288, iteration: 1319
loss: 3.4009339809417725,grad_norm: 0.9999997943513399, iteration: 1320
loss: 3.2755935192108154,grad_norm: 0.9999998102155747, iteration: 1321
loss: 3.2810158729553223,grad_norm: 0.9999999411539098, iteration: 1322
loss: 3.278646469116211,grad_norm: 0.9999998959234118, iteration: 1323
loss: 3.3168370723724365,grad_norm: 0.9999998168655441, iteration: 1324
loss: 3.3297324180603027,grad_norm: 0.9999999095209497, iteration: 1325
loss: 3.391080617904663,grad_norm: 0.9999998107766013, iteration: 1326
loss: 3.2776436805725098,grad_norm: 0.9999998066657675, iteration: 1327
loss: 3.362067699432373,grad_norm: 0.9999998187002447, iteration: 1328
loss: 3.4200191497802734,grad_norm: 0.9999997868120613, iteration: 1329
loss: 3.340404510498047,grad_norm: 0.9999998524113568, iteration: 1330
loss: 3.4625556468963623,grad_norm: 0.9999998876938507, iteration: 1331
loss: 3.4663236141204834,grad_norm: 0.9999998461708745, iteration: 1332
loss: 3.4946138858795166,grad_norm: 0.9999998839504589, iteration: 1333
loss: 3.4609203338623047,grad_norm: 0.9999998288661904, iteration: 1334
loss: 3.3614308834075928,grad_norm: 0.9999998329825907, iteration: 1335
loss: 3.3336801528930664,grad_norm: 0.9999998898171578, iteration: 1336
loss: 3.326505184173584,grad_norm: 0.9999998877916472, iteration: 1337
loss: 3.4580841064453125,grad_norm: 0.9999999344109006, iteration: 1338
loss: 3.2994065284729004,grad_norm: 0.9999998414418496, iteration: 1339
loss: 3.4576659202575684,grad_norm: 0.9999997976938334, iteration: 1340
loss: 3.328352689743042,grad_norm: 0.9999998602220647, iteration: 1341
loss: 3.31809401512146,grad_norm: 0.9999998714218946, iteration: 1342
loss: 3.277791738510132,grad_norm: 0.9999998384877488, iteration: 1343
loss: 3.378139019012451,grad_norm: 0.9999997789229971, iteration: 1344
loss: 3.3958523273468018,grad_norm: 0.999999810877884, iteration: 1345
loss: 3.519402503967285,grad_norm: 0.9999998655513427, iteration: 1346
loss: 3.4666459560394287,grad_norm: 0.9999998992920813, iteration: 1347
loss: 3.4512553215026855,grad_norm: 0.9999999006305651, iteration: 1348
loss: 3.366647481918335,grad_norm: 0.9999998853353779, iteration: 1349
loss: 3.333385467529297,grad_norm: 0.9999998171771621, iteration: 1350
loss: 3.5619020462036133,grad_norm: 0.9999998940417992, iteration: 1351
loss: 3.4908692836761475,grad_norm: 0.9999998856540773, iteration: 1352
loss: 3.3830161094665527,grad_norm: 0.999999820502796, iteration: 1353
loss: 3.313513994216919,grad_norm: 0.9999998406553136, iteration: 1354
loss: 3.4237639904022217,grad_norm: 0.9999998006204085, iteration: 1355
loss: 3.304448127746582,grad_norm: 0.9999998213647784, iteration: 1356
loss: 3.3279449939727783,grad_norm: 0.9999998587054265, iteration: 1357
loss: 3.338693141937256,grad_norm: 0.9999998579255244, iteration: 1358
loss: 3.4681923389434814,grad_norm: 0.9999999075239693, iteration: 1359
loss: 3.366441249847412,grad_norm: 0.9999998758624261, iteration: 1360
loss: 3.3722219467163086,grad_norm: 0.9999998392950041, iteration: 1361
loss: 3.37198543548584,grad_norm: 0.9999999079235135, iteration: 1362
loss: 3.493623733520508,grad_norm: 0.9999998585728144, iteration: 1363
loss: 3.3333795070648193,grad_norm: 0.9999998484849429, iteration: 1364
loss: 3.324366331100464,grad_norm: 0.9999998327335781, iteration: 1365
loss: 3.239572525024414,grad_norm: 0.9999998698305398, iteration: 1366
loss: 3.333512306213379,grad_norm: 0.999999804026049, iteration: 1367
loss: 3.4590349197387695,grad_norm: 0.9999998081307815, iteration: 1368
loss: 3.370885133743286,grad_norm: 0.9999998781540888, iteration: 1369
loss: 3.3594090938568115,grad_norm: 0.9999998915691295, iteration: 1370
loss: 3.375173568725586,grad_norm: 0.9999997450401086, iteration: 1371
loss: 3.0899581909179688,grad_norm: 0.9999998563992262, iteration: 1372
loss: 3.3884942531585693,grad_norm: 0.9999998921283042, iteration: 1373
loss: 3.344648838043213,grad_norm: 0.9999998088214453, iteration: 1374
loss: 3.2399301528930664,grad_norm: 0.9999998565808639, iteration: 1375
loss: 3.4388465881347656,grad_norm: 0.9999998608823377, iteration: 1376
loss: 3.3302247524261475,grad_norm: 0.9999998248427127, iteration: 1377
loss: 3.3902509212493896,grad_norm: 0.9999998871627891, iteration: 1378
loss: 3.0974411964416504,grad_norm: 0.9999999554374324, iteration: 1379
loss: 3.3328561782836914,grad_norm: 0.9999999126566784, iteration: 1380
loss: 3.2478127479553223,grad_norm: 0.999999876139664, iteration: 1381
loss: 3.2462706565856934,grad_norm: 0.9999999086345149, iteration: 1382
loss: 3.4287071228027344,grad_norm: 0.9999998053322652, iteration: 1383
loss: 3.2694716453552246,grad_norm: 0.999999875892194, iteration: 1384
loss: 3.286121129989624,grad_norm: 0.9999997948344218, iteration: 1385
loss: 3.59899640083313,grad_norm: 0.9999998161780923, iteration: 1386
loss: 3.3693735599517822,grad_norm: 0.9999998534660722, iteration: 1387
loss: 3.2898030281066895,grad_norm: 0.9999999071312972, iteration: 1388
loss: 3.160834312438965,grad_norm: 0.9999998532445896, iteration: 1389
loss: 3.312730312347412,grad_norm: 0.9999998238728898, iteration: 1390
loss: 3.3135790824890137,grad_norm: 0.9999998249987969, iteration: 1391
loss: 3.360913038253784,grad_norm: 0.9999998584263341, iteration: 1392
loss: 3.2653143405914307,grad_norm: 0.9999997827748142, iteration: 1393
loss: 3.1336026191711426,grad_norm: 0.9999998961839494, iteration: 1394
loss: 3.4331841468811035,grad_norm: 0.9999998786320694, iteration: 1395
loss: 3.0970187187194824,grad_norm: 0.9999998685832863, iteration: 1396
loss: 3.322770118713379,grad_norm: 0.9999999023338791, iteration: 1397
loss: 3.3041269779205322,grad_norm: 0.9999998956621003, iteration: 1398
loss: 3.246460199356079,grad_norm: 0.9999998908341625, iteration: 1399
loss: 3.3756117820739746,grad_norm: 0.999999882577827, iteration: 1400
loss: 3.291994571685791,grad_norm: 0.9999998198937285, iteration: 1401
loss: 3.3450324535369873,grad_norm: 0.9999998974379227, iteration: 1402
loss: 3.225270986557007,grad_norm: 0.9999998903712822, iteration: 1403
loss: 3.3552796840667725,grad_norm: 0.9999999339567619, iteration: 1404
loss: 3.290040969848633,grad_norm: 0.9999998359406534, iteration: 1405
loss: 3.3208625316619873,grad_norm: 0.999999823208059, iteration: 1406
loss: 3.4350790977478027,grad_norm: 0.9999998366061931, iteration: 1407
loss: 3.5006115436553955,grad_norm: 0.99999984681375, iteration: 1408
loss: 3.3173487186431885,grad_norm: 0.9999998486280367, iteration: 1409
loss: 3.37060546875,grad_norm: 0.9999997872454265, iteration: 1410
loss: 3.3363311290740967,grad_norm: 0.9999998046930654, iteration: 1411
loss: 3.346083164215088,grad_norm: 0.9999997893236408, iteration: 1412
loss: 3.4490597248077393,grad_norm: 0.9999998184837906, iteration: 1413
loss: 3.319711446762085,grad_norm: 0.9999998555421196, iteration: 1414
loss: 3.3379006385803223,grad_norm: 0.9999998431910158, iteration: 1415
loss: 3.2891926765441895,grad_norm: 0.9999998969669194, iteration: 1416
loss: 3.295809745788574,grad_norm: 0.9999998084102064, iteration: 1417
loss: 3.132664442062378,grad_norm: 0.9999999141295292, iteration: 1418
loss: 3.286267042160034,grad_norm: 0.9999997692731921, iteration: 1419
loss: 3.344658136367798,grad_norm: 0.9999999123361528, iteration: 1420
loss: 3.1042463779449463,grad_norm: 0.9999998363964768, iteration: 1421
loss: 3.287111520767212,grad_norm: 0.9999997994696844, iteration: 1422
loss: 3.2577295303344727,grad_norm: 0.999999813904096, iteration: 1423
loss: 3.2106399536132812,grad_norm: 0.9999998646532596, iteration: 1424
loss: 3.3444786071777344,grad_norm: 0.9999998058198429, iteration: 1425
loss: 3.326596736907959,grad_norm: 0.9999998752986808, iteration: 1426
loss: 3.230186939239502,grad_norm: 0.9999998945328067, iteration: 1427
loss: 3.291555404663086,grad_norm: 0.999999827208188, iteration: 1428
loss: 3.2264864444732666,grad_norm: 0.9999998277682791, iteration: 1429
loss: 3.3624792098999023,grad_norm: 0.9999998799094031, iteration: 1430
loss: 3.246657371520996,grad_norm: 0.9999998656109185, iteration: 1431
loss: 3.2350759506225586,grad_norm: 0.9999998272227213, iteration: 1432
loss: 3.2665696144104004,grad_norm: 0.9999998479504718, iteration: 1433
loss: 3.3611724376678467,grad_norm: 0.9999999239335741, iteration: 1434
loss: 3.246522903442383,grad_norm: 0.9999999180461144, iteration: 1435
loss: 3.2025654315948486,grad_norm: 0.9999998769498345, iteration: 1436
loss: 3.2989187240600586,grad_norm: 0.9999999001892746, iteration: 1437
loss: 3.3225297927856445,grad_norm: 0.9999999233151138, iteration: 1438
loss: 3.1810121536254883,grad_norm: 0.999999883589298, iteration: 1439
loss: 3.1657631397247314,grad_norm: 0.9999998636503246, iteration: 1440
loss: 3.147764205932617,grad_norm: 0.9999998201238847, iteration: 1441
loss: 3.351151943206787,grad_norm: 0.9999998523394726, iteration: 1442
loss: 3.2664530277252197,grad_norm: 0.9999998888466503, iteration: 1443
loss: 3.2819674015045166,grad_norm: 0.9999998476914272, iteration: 1444
loss: 3.20953631401062,grad_norm: 0.9999998581989241, iteration: 1445
loss: 3.208409070968628,grad_norm: 0.9999998993222569, iteration: 1446
loss: 3.268052577972412,grad_norm: 0.9999998659581072, iteration: 1447
loss: 3.291555643081665,grad_norm: 0.9999998612012431, iteration: 1448
loss: 3.2471044063568115,grad_norm: 0.9999998546223591, iteration: 1449
loss: 3.241600275039673,grad_norm: 0.9999998414568405, iteration: 1450
loss: 3.2149782180786133,grad_norm: 0.999999835492721, iteration: 1451
loss: 3.286405086517334,grad_norm: 0.9999998623479317, iteration: 1452
loss: 3.2531745433807373,grad_norm: 0.9999998756729198, iteration: 1453
loss: 3.2754485607147217,grad_norm: 0.9999998337003402, iteration: 1454
loss: 3.1889114379882812,grad_norm: 0.9999998361530227, iteration: 1455
loss: 3.245370864868164,grad_norm: 0.9999998675952279, iteration: 1456
loss: 3.283083200454712,grad_norm: 0.9999998749037246, iteration: 1457
loss: 3.1787068843841553,grad_norm: 0.9999999038152162, iteration: 1458
loss: 2.9414710998535156,grad_norm: 0.9999998618675816, iteration: 1459
loss: 3.2841293811798096,grad_norm: 0.9999998126974303, iteration: 1460
loss: 3.2127537727355957,grad_norm: 0.9999998578687771, iteration: 1461
loss: 3.215003490447998,grad_norm: 0.9999999202360577, iteration: 1462
loss: 3.342313528060913,grad_norm: 0.999999864563805, iteration: 1463
loss: 3.172870397567749,grad_norm: 0.999999844966278, iteration: 1464
loss: 2.9665040969848633,grad_norm: 0.9999998681845326, iteration: 1465
loss: 3.1512720584869385,grad_norm: 0.9999998138064679, iteration: 1466
loss: 3.2015597820281982,grad_norm: 0.9999999592187597, iteration: 1467
loss: 3.127225637435913,grad_norm: 0.9999998591455638, iteration: 1468
loss: 3.125371217727661,grad_norm: 0.999999885098449, iteration: 1469
loss: 3.1285016536712646,grad_norm: 0.9999998508550647, iteration: 1470
loss: 3.1818642616271973,grad_norm: 0.9999998921163691, iteration: 1471
loss: 3.270199775695801,grad_norm: 0.9999998599972321, iteration: 1472
loss: 3.0871517658233643,grad_norm: 0.9999999139441137, iteration: 1473
loss: 3.07273006439209,grad_norm: 0.9999998515144006, iteration: 1474
loss: 3.0961828231811523,grad_norm: 0.9999998354080264, iteration: 1475
loss: 3.150770902633667,grad_norm: 0.9999998538888418, iteration: 1476
loss: 3.1783576011657715,grad_norm: 0.9999998561273843, iteration: 1477
loss: 3.3468477725982666,grad_norm: 0.9999999029412299, iteration: 1478
loss: 3.3307809829711914,grad_norm: 0.9999998179733083, iteration: 1479
loss: 3.3635473251342773,grad_norm: 0.9999999052158131, iteration: 1480
loss: 3.2117791175842285,grad_norm: 0.9999998285388401, iteration: 1481
loss: 3.143388509750366,grad_norm: 0.9999999007403861, iteration: 1482
loss: 3.082927942276001,grad_norm: 0.9999998875454582, iteration: 1483
loss: 3.025073528289795,grad_norm: 0.999999937005005, iteration: 1484
loss: 3.1712706089019775,grad_norm: 0.9999999063626795, iteration: 1485
loss: 3.051804304122925,grad_norm: 0.9999999111931658, iteration: 1486
loss: 3.130507230758667,grad_norm: 0.9999997803860559, iteration: 1487
loss: 3.2818782329559326,grad_norm: 0.9999998561892712, iteration: 1488
loss: 3.0837314128875732,grad_norm: 0.999999884803056, iteration: 1489
loss: 3.1518311500549316,grad_norm: 0.9999999265393404, iteration: 1490
loss: 3.145167589187622,grad_norm: 0.9999998110006828, iteration: 1491
loss: 3.233954429626465,grad_norm: 0.9999998298808017, iteration: 1492
loss: 3.125537395477295,grad_norm: 0.9999998825976811, iteration: 1493
loss: 3.206347703933716,grad_norm: 0.9999998942971327, iteration: 1494
loss: 3.139815330505371,grad_norm: 0.9999999391587266, iteration: 1495
loss: 3.125767946243286,grad_norm: 0.9999998242880888, iteration: 1496
loss: 3.203489065170288,grad_norm: 0.9999998006092882, iteration: 1497
loss: 3.254382610321045,grad_norm: 0.9999998315746123, iteration: 1498
loss: 3.0740413665771484,grad_norm: 0.9999997589518643, iteration: 1499
loss: 3.2461633682250977,grad_norm: 0.9999998082419977, iteration: 1500
loss: 3.0542798042297363,grad_norm: 0.9999998652463504, iteration: 1501
loss: 3.1025266647338867,grad_norm: 0.9999998627045393, iteration: 1502
loss: 3.179699182510376,grad_norm: 0.9999999052376721, iteration: 1503
loss: 3.02067494392395,grad_norm: 0.9999999484648183, iteration: 1504
loss: 3.1091694831848145,grad_norm: 0.9999999160421482, iteration: 1505
loss: 3.0646955966949463,grad_norm: 0.9999998243272087, iteration: 1506
loss: 3.065458059310913,grad_norm: 0.999999854133074, iteration: 1507
loss: 3.025860071182251,grad_norm: 0.9999997805962991, iteration: 1508
loss: 3.042057752609253,grad_norm: 0.9999998835471059, iteration: 1509
loss: 3.1560938358306885,grad_norm: 0.999999939008077, iteration: 1510
loss: 3.243091344833374,grad_norm: 0.9999999156049698, iteration: 1511
loss: 3.0587193965911865,grad_norm: 0.9999998705345011, iteration: 1512
loss: 3.2045531272888184,grad_norm: 0.9999998851044036, iteration: 1513
loss: 3.1310415267944336,grad_norm: 0.9999997318323564, iteration: 1514
loss: 3.111023187637329,grad_norm: 0.999999795604876, iteration: 1515
loss: 3.079906940460205,grad_norm: 0.9999998004761408, iteration: 1516
loss: 3.15069317817688,grad_norm: 0.9999998367901357, iteration: 1517
loss: 3.0138747692108154,grad_norm: 0.9999998376018236, iteration: 1518
loss: 3.0929813385009766,grad_norm: 0.9999999073883332, iteration: 1519
loss: 2.9209444522857666,grad_norm: 0.999999916227842, iteration: 1520
loss: 3.0966007709503174,grad_norm: 0.9999998822936762, iteration: 1521
loss: 3.2252261638641357,grad_norm: 0.999999913115806, iteration: 1522
loss: 3.0137996673583984,grad_norm: 0.9999998954983582, iteration: 1523
loss: 3.0798206329345703,grad_norm: 0.9999998626967908, iteration: 1524
loss: 3.066333770751953,grad_norm: 0.9999998530554864, iteration: 1525
loss: 3.075087308883667,grad_norm: 0.9999998743736993, iteration: 1526
loss: 3.027822256088257,grad_norm: 0.9999998484799225, iteration: 1527
loss: 3.092270851135254,grad_norm: 0.9999998514841715, iteration: 1528
loss: 3.1748147010803223,grad_norm: 0.9999999131327172, iteration: 1529
loss: 3.060260057449341,grad_norm: 0.9999999316679405, iteration: 1530
loss: 3.080808162689209,grad_norm: 0.9999998186899148, iteration: 1531
loss: 3.0802130699157715,grad_norm: 0.9999999044485943, iteration: 1532
loss: 2.929094076156616,grad_norm: 0.9999998784364329, iteration: 1533
loss: 3.0506186485290527,grad_norm: 0.9999998577371125, iteration: 1534
loss: 3.1697022914886475,grad_norm: 0.9999998473460008, iteration: 1535
loss: 3.1238598823547363,grad_norm: 0.9999998717910039, iteration: 1536
loss: 3.108069658279419,grad_norm: 0.9999999032733615, iteration: 1537
loss: 2.905449628829956,grad_norm: 0.9999998684988332, iteration: 1538
loss: 2.993314504623413,grad_norm: 0.999999929677217, iteration: 1539
loss: 3.0585615634918213,grad_norm: 0.9999998481965843, iteration: 1540
loss: 3.089977264404297,grad_norm: 0.999999832546393, iteration: 1541
loss: 3.040417432785034,grad_norm: 0.9999998259442606, iteration: 1542
loss: 2.921722650527954,grad_norm: 0.9999998896503765, iteration: 1543
loss: 2.9401662349700928,grad_norm: 0.999999864207537, iteration: 1544
loss: 3.0806190967559814,grad_norm: 0.9999998915711642, iteration: 1545
loss: 3.0261924266815186,grad_norm: 0.9999998139230196, iteration: 1546
loss: 3.143526077270508,grad_norm: 0.9999998580132601, iteration: 1547
loss: 3.059166669845581,grad_norm: 0.9999998944188765, iteration: 1548
loss: 2.9404866695404053,grad_norm: 0.9999998290034037, iteration: 1549
loss: 3.0709774494171143,grad_norm: 0.9999999000750021, iteration: 1550
loss: 3.042503833770752,grad_norm: 0.9999999280910076, iteration: 1551
loss: 3.148768663406372,grad_norm: 0.9999998874483649, iteration: 1552
loss: 2.9235329627990723,grad_norm: 0.9999999073754481, iteration: 1553
loss: 2.962306022644043,grad_norm: 0.9999998792119481, iteration: 1554
loss: 2.9809482097625732,grad_norm: 0.9999998232331239, iteration: 1555
loss: 3.0075323581695557,grad_norm: 0.9999999153583249, iteration: 1556
loss: 2.970428466796875,grad_norm: 0.9999998682863176, iteration: 1557
loss: 2.964945077896118,grad_norm: 0.999999801099316, iteration: 1558
loss: 2.836669921875,grad_norm: 0.9999998178949836, iteration: 1559
loss: 2.9425253868103027,grad_norm: 0.9999998673086247, iteration: 1560
loss: 2.972113847732544,grad_norm: 0.9999997718938888, iteration: 1561
loss: 2.914914846420288,grad_norm: 0.9999998497132022, iteration: 1562
loss: 3.009323835372925,grad_norm: 0.9999998943842082, iteration: 1563
loss: 2.9727728366851807,grad_norm: 0.9999998785269439, iteration: 1564
loss: 3.130610466003418,grad_norm: 0.9999998289230011, iteration: 1565
loss: 2.9974329471588135,grad_norm: 0.9999998359877784, iteration: 1566
loss: 2.869518518447876,grad_norm: 0.9999998761832655, iteration: 1567
loss: 2.8956005573272705,grad_norm: 0.9999998736045903, iteration: 1568
loss: 3.05120587348938,grad_norm: 0.9999998394532941, iteration: 1569
loss: 2.98152494430542,grad_norm: 0.9999998342703242, iteration: 1570
loss: 3.0230720043182373,grad_norm: 0.999999837463575, iteration: 1571
loss: 3.0873544216156006,grad_norm: 0.9999999311314437, iteration: 1572
loss: 3.093817949295044,grad_norm: 0.9999999203488971, iteration: 1573
loss: 2.8297014236450195,grad_norm: 0.9999998687212684, iteration: 1574
loss: 3.008129835128784,grad_norm: 0.9999999014685541, iteration: 1575
loss: 2.9829773902893066,grad_norm: 0.999999849763681, iteration: 1576
loss: 2.912210464477539,grad_norm: 0.9999999215765959, iteration: 1577
loss: 3.0563547611236572,grad_norm: 0.9999999187655648, iteration: 1578
loss: 2.8314499855041504,grad_norm: 0.9999999372504527, iteration: 1579
loss: 2.98386812210083,grad_norm: 0.9999998415764815, iteration: 1580
loss: 2.9828457832336426,grad_norm: 0.9999998562528506, iteration: 1581
loss: 2.9552536010742188,grad_norm: 0.9999998396261204, iteration: 1582
loss: 2.9915060997009277,grad_norm: 0.999999879213868, iteration: 1583
loss: 3.003021240234375,grad_norm: 0.9999999345613027, iteration: 1584
loss: 3.0255684852600098,grad_norm: 0.9999997876340166, iteration: 1585
loss: 2.949192523956299,grad_norm: 0.9999998750908033, iteration: 1586
loss: 2.9941766262054443,grad_norm: 0.9999998496608015, iteration: 1587
loss: 2.843691110610962,grad_norm: 0.9999998755980682, iteration: 1588
loss: 2.869237184524536,grad_norm: 0.9999999470382086, iteration: 1589
loss: 2.9164750576019287,grad_norm: 0.9999998597793047, iteration: 1590
loss: 2.9203813076019287,grad_norm: 0.9999998917809503, iteration: 1591
loss: 2.7993435859680176,grad_norm: 0.9999998372484084, iteration: 1592
loss: 2.893714189529419,grad_norm: 0.9999998744358973, iteration: 1593
loss: 2.9372196197509766,grad_norm: 0.9999998381029227, iteration: 1594
loss: 2.8642942905426025,grad_norm: 0.9999998728480061, iteration: 1595
loss: 3.0720643997192383,grad_norm: 0.9999999139275808, iteration: 1596
loss: 2.9100615978240967,grad_norm: 0.9999999112660296, iteration: 1597
loss: 2.812873601913452,grad_norm: 0.9999998079123754, iteration: 1598
loss: 2.8784759044647217,grad_norm: 0.9999999501734679, iteration: 1599
loss: 2.8769662380218506,grad_norm: 0.9999998785858274, iteration: 1600
loss: 3.135028600692749,grad_norm: 0.9999998036765343, iteration: 1601
loss: 2.9116122722625732,grad_norm: 0.999999869537733, iteration: 1602
loss: 3.0127060413360596,grad_norm: 0.9999998904465232, iteration: 1603
loss: 2.986398458480835,grad_norm: 0.9999998876655746, iteration: 1604
loss: 2.858139991760254,grad_norm: 0.9999998140378866, iteration: 1605
loss: 2.8005826473236084,grad_norm: 0.9999999224550218, iteration: 1606
loss: 2.8110511302948,grad_norm: 0.9999998409670587, iteration: 1607
loss: 2.958610773086548,grad_norm: 0.9999999335253258, iteration: 1608
loss: 3.0698459148406982,grad_norm: 0.9999998874725764, iteration: 1609
loss: 2.7602622509002686,grad_norm: 0.9999999149807611, iteration: 1610
loss: 2.882009267807007,grad_norm: 0.9999999466249062, iteration: 1611
loss: 2.9892489910125732,grad_norm: 0.9999998845212131, iteration: 1612
loss: 2.8175156116485596,grad_norm: 0.9999998562455066, iteration: 1613
loss: 2.874065637588501,grad_norm: 0.999999935449537, iteration: 1614
loss: 2.842646598815918,grad_norm: 0.9999998785386446, iteration: 1615
loss: 2.8197712898254395,grad_norm: 0.9999999040119654, iteration: 1616
loss: 2.87974214553833,grad_norm: 0.9999998853140525, iteration: 1617
loss: 2.940541982650757,grad_norm: 0.9999999070323824, iteration: 1618
loss: 2.9689855575561523,grad_norm: 0.9999999350980264, iteration: 1619
loss: 2.788698196411133,grad_norm: 0.9999999632716109, iteration: 1620
loss: 2.806732177734375,grad_norm: 0.9999999030252567, iteration: 1621
loss: 2.8396174907684326,grad_norm: 0.9999999147340869, iteration: 1622
loss: 2.7638916969299316,grad_norm: 0.9999998971691529, iteration: 1623
loss: 2.8121166229248047,grad_norm: 0.9999998794631115, iteration: 1624
loss: 2.7006289958953857,grad_norm: 0.9999998229316034, iteration: 1625
loss: 3.0469753742218018,grad_norm: 0.9999998819324056, iteration: 1626
loss: 2.7769155502319336,grad_norm: 0.9999997939427266, iteration: 1627
loss: 2.8647079467773438,grad_norm: 0.9999999317409495, iteration: 1628
loss: 2.8193247318267822,grad_norm: 0.9999998808960792, iteration: 1629
loss: 3.012432336807251,grad_norm: 0.9999999486270044, iteration: 1630
loss: 2.7078402042388916,grad_norm: 0.9999999095156494, iteration: 1631
loss: 2.880236864089966,grad_norm: 0.9999998452940319, iteration: 1632
loss: 2.859966278076172,grad_norm: 0.9999998953929027, iteration: 1633
loss: 2.8830456733703613,grad_norm: 0.9999998721828476, iteration: 1634
loss: 2.78387451171875,grad_norm: 0.9999998478025458, iteration: 1635
loss: 2.7983040809631348,grad_norm: 0.9999997636363731, iteration: 1636
loss: 2.983806848526001,grad_norm: 0.9999998940365732, iteration: 1637
loss: 2.752455711364746,grad_norm: 0.9999998502283595, iteration: 1638
loss: 2.88844895362854,grad_norm: 0.9999999380141543, iteration: 1639
loss: 2.9297642707824707,grad_norm: 0.9999998692876134, iteration: 1640
loss: 2.759509325027466,grad_norm: 0.9999998468602479, iteration: 1641
loss: 2.7498817443847656,grad_norm: 0.9999999486154368, iteration: 1642
loss: 2.6982786655426025,grad_norm: 0.9999999207554323, iteration: 1643
loss: 2.840864896774292,grad_norm: 0.9999998426264115, iteration: 1644
loss: 2.7370433807373047,grad_norm: 0.9999998557544575, iteration: 1645
loss: 2.8865950107574463,grad_norm: 0.9999998659099312, iteration: 1646
loss: 2.729313850402832,grad_norm: 0.9999999554309535, iteration: 1647
loss: 2.6529970169067383,grad_norm: 0.999999849836314, iteration: 1648
loss: 2.814298391342163,grad_norm: 0.9999998460434587, iteration: 1649
loss: 2.7970802783966064,grad_norm: 0.9999999413599234, iteration: 1650
loss: 2.8942389488220215,grad_norm: 0.999999979788002, iteration: 1651
loss: 2.8071510791778564,grad_norm: 0.9999998952996905, iteration: 1652
loss: 2.9468841552734375,grad_norm: 0.9999998242018382, iteration: 1653
loss: 2.927513837814331,grad_norm: 0.9999998291344088, iteration: 1654
loss: 2.7743148803710938,grad_norm: 0.9999999840964111, iteration: 1655
loss: 2.850935220718384,grad_norm: 0.9999999474547561, iteration: 1656
loss: 2.7487080097198486,grad_norm: 0.9999998795101049, iteration: 1657
loss: 2.8341269493103027,grad_norm: 0.9999998481108807, iteration: 1658
loss: 2.6208858489990234,grad_norm: 0.9999999059496621, iteration: 1659
loss: 2.799785614013672,grad_norm: 0.9999998632583444, iteration: 1660
loss: 2.769005298614502,grad_norm: 0.9999998007661893, iteration: 1661
loss: 2.7126429080963135,grad_norm: 0.9999998501106792, iteration: 1662
loss: 2.7451117038726807,grad_norm: 0.9999999524726462, iteration: 1663
loss: 2.6617581844329834,grad_norm: 0.9999998943751731, iteration: 1664
loss: 2.72898006439209,grad_norm: 0.9999998133838731, iteration: 1665
loss: 2.794471502304077,grad_norm: 0.9999999194861635, iteration: 1666
loss: 2.838150978088379,grad_norm: 0.999999935840004, iteration: 1667
loss: 2.7099995613098145,grad_norm: 0.9999998864100904, iteration: 1668
loss: 2.7263832092285156,grad_norm: 0.9999999544552934, iteration: 1669
loss: 2.725040912628174,grad_norm: 0.9999999686553789, iteration: 1670
loss: 2.780367374420166,grad_norm: 0.9999999416279233, iteration: 1671
loss: 2.81176495552063,grad_norm: 0.9999998600956553, iteration: 1672
loss: 2.739983558654785,grad_norm: 0.9999998875891217, iteration: 1673
loss: 2.7159507274627686,grad_norm: 0.9999998310208505, iteration: 1674
loss: 2.67317795753479,grad_norm: 0.9999998504628035, iteration: 1675
loss: 2.8712267875671387,grad_norm: 0.9999998735346254, iteration: 1676
loss: 2.7336270809173584,grad_norm: 0.9999999213789211, iteration: 1677
loss: 2.8429133892059326,grad_norm: 0.9999998990523359, iteration: 1678
loss: 2.865065574645996,grad_norm: 0.9999997639941977, iteration: 1679
loss: 2.6625797748565674,grad_norm: 0.9999998927006273, iteration: 1680
loss: 2.690178155899048,grad_norm: 0.9999999177588832, iteration: 1681
loss: 2.7625954151153564,grad_norm: 0.9999998583995698, iteration: 1682
loss: 2.695390462875366,grad_norm: 0.9999998448733388, iteration: 1683
loss: 2.8131258487701416,grad_norm: 0.9999999534141119, iteration: 1684
loss: 2.715628147125244,grad_norm: 0.9999998496140329, iteration: 1685
loss: 2.5966289043426514,grad_norm: 0.9999999186877745, iteration: 1686
loss: 2.784345865249634,grad_norm: 0.9999999230631157, iteration: 1687
loss: 2.8346500396728516,grad_norm: 0.9999999246099878, iteration: 1688
loss: 2.5901057720184326,grad_norm: 0.9999999067764456, iteration: 1689
loss: 2.7913999557495117,grad_norm: 0.999999928043976, iteration: 1690
loss: 2.5986452102661133,grad_norm: 0.9999999529545089, iteration: 1691
loss: 2.7031354904174805,grad_norm: 0.9999999375026152, iteration: 1692
loss: 2.664728879928589,grad_norm: 0.9999998822459536, iteration: 1693
loss: 2.5115458965301514,grad_norm: 0.9999999386624067, iteration: 1694
loss: 2.6032302379608154,grad_norm: 0.999999850744843, iteration: 1695
loss: 2.67645525932312,grad_norm: 0.9999999270364628, iteration: 1696
loss: 2.897230386734009,grad_norm: 0.9999998405736964, iteration: 1697
loss: 2.6515049934387207,grad_norm: 0.9999998697499125, iteration: 1698
loss: 2.4266531467437744,grad_norm: 0.999999946295512, iteration: 1699
loss: 2.7415759563446045,grad_norm: 0.9999998569091436, iteration: 1700
loss: 2.4874279499053955,grad_norm: 0.9999998975818122, iteration: 1701
loss: 2.566516637802124,grad_norm: 0.999999965976871, iteration: 1702
loss: 2.4409306049346924,grad_norm: 0.9999998625115729, iteration: 1703
loss: 2.541990280151367,grad_norm: 0.9999998395830358, iteration: 1704
loss: 2.684190273284912,grad_norm: 0.9999998615612441, iteration: 1705
loss: 2.7440097332000732,grad_norm: 0.9999999080407825, iteration: 1706
loss: 2.68235182762146,grad_norm: 0.9999999800648368, iteration: 1707
loss: 2.7851293087005615,grad_norm: 0.9999999734274254, iteration: 1708
loss: 2.4822540283203125,grad_norm: 0.9999998474336909, iteration: 1709
loss: 2.641190528869629,grad_norm: 0.9999998733068455, iteration: 1710
loss: 2.5092079639434814,grad_norm: 0.9999998810142965, iteration: 1711
loss: 2.5755085945129395,grad_norm: 0.999999834236692, iteration: 1712
loss: 2.7072813510894775,grad_norm: 0.999999888661452, iteration: 1713
loss: 2.605351448059082,grad_norm: 0.9999999179842476, iteration: 1714
loss: 2.6344032287597656,grad_norm: 0.9999999145280298, iteration: 1715
loss: 2.5121164321899414,grad_norm: 0.9999998633953884, iteration: 1716
loss: 2.6419613361358643,grad_norm: 0.9999999604271661, iteration: 1717
loss: 2.7208220958709717,grad_norm: 0.9999998127991229, iteration: 1718
loss: 2.647740125656128,grad_norm: 0.9999998345302232, iteration: 1719
loss: 2.65334153175354,grad_norm: 0.999999825999892, iteration: 1720
loss: 2.5954091548919678,grad_norm: 0.9999998880817846, iteration: 1721
loss: 2.6490273475646973,grad_norm: 0.9999999152400902, iteration: 1722
loss: 2.495831251144409,grad_norm: 1.0000000136394056, iteration: 1723
loss: 2.6778531074523926,grad_norm: 0.9999998907661131, iteration: 1724
loss: 2.6814165115356445,grad_norm: 0.9999998961809136, iteration: 1725
loss: 2.598841667175293,grad_norm: 0.9999999343371488, iteration: 1726
loss: 2.543316125869751,grad_norm: 0.9999998016304074, iteration: 1727
loss: 2.6149988174438477,grad_norm: 0.9999999149491314, iteration: 1728
loss: 2.5634284019470215,grad_norm: 0.9999998424253045, iteration: 1729
loss: 2.530968189239502,grad_norm: 0.9999998839018498, iteration: 1730
loss: 2.526097059249878,grad_norm: 0.9999998703732924, iteration: 1731
loss: 2.5146777629852295,grad_norm: 0.9999998737000665, iteration: 1732
loss: 2.6131062507629395,grad_norm: 0.9999998284199688, iteration: 1733
loss: 2.5762104988098145,grad_norm: 0.9999998634663586, iteration: 1734
loss: 2.651183605194092,grad_norm: 0.999999981317911, iteration: 1735
loss: 2.6862268447875977,grad_norm: 0.9999999164887832, iteration: 1736
loss: 2.5242183208465576,grad_norm: 0.9999999234711396, iteration: 1737
loss: 2.5935914516448975,grad_norm: 0.9999999613342105, iteration: 1738
loss: 2.520780563354492,grad_norm: 0.9999998645773674, iteration: 1739
loss: 2.661041021347046,grad_norm: 0.9999999093100852, iteration: 1740
loss: 2.6840240955352783,grad_norm: 0.9999998421206425, iteration: 1741
loss: 2.653862953186035,grad_norm: 0.9999997786397516, iteration: 1742
loss: 2.621204137802124,grad_norm: 0.9999998086122249, iteration: 1743
loss: 2.554673671722412,grad_norm: 0.999999851808181, iteration: 1744
loss: 2.7222187519073486,grad_norm: 0.9999998537580772, iteration: 1745
loss: 2.501948118209839,grad_norm: 0.9999998448707574, iteration: 1746
loss: 2.4664673805236816,grad_norm: 0.9999998770486461, iteration: 1747
loss: 2.629249334335327,grad_norm: 0.9999999594051953, iteration: 1748
loss: 2.5087296962738037,grad_norm: 0.9999999223741927, iteration: 1749
loss: 2.585371494293213,grad_norm: 0.9999998986982382, iteration: 1750
loss: 2.6733131408691406,grad_norm: 0.9999999634538799, iteration: 1751
loss: 2.6600234508514404,grad_norm: 0.9999998780822316, iteration: 1752
loss: 2.695269823074341,grad_norm: 0.9999998972199737, iteration: 1753
loss: 2.7071681022644043,grad_norm: 0.9999998697401771, iteration: 1754
loss: 2.549619197845459,grad_norm: 0.999999903171797, iteration: 1755
loss: 2.539565324783325,grad_norm: 0.9999998927747785, iteration: 1756
loss: 2.4583306312561035,grad_norm: 0.9999998242741337, iteration: 1757
loss: 2.388455867767334,grad_norm: 0.9999998300404885, iteration: 1758
loss: 2.4328126907348633,grad_norm: 0.9999999063105595, iteration: 1759
loss: 2.6251726150512695,grad_norm: 0.9999998910297037, iteration: 1760
loss: 2.615306854248047,grad_norm: 0.9999998921293913, iteration: 1761
loss: 2.762375831604004,grad_norm: 0.9999998434387064, iteration: 1762
loss: 2.4615530967712402,grad_norm: 0.9999999484804779, iteration: 1763
loss: 2.610740900039673,grad_norm: 0.9999999097086848, iteration: 1764
loss: 2.608194589614868,grad_norm: 0.9999998115068296, iteration: 1765
loss: 2.457998514175415,grad_norm: 0.999999826649476, iteration: 1766
loss: 2.6014811992645264,grad_norm: 0.9999998733440335, iteration: 1767
loss: 2.4981207847595215,grad_norm: 0.9999998918945747, iteration: 1768
loss: 2.5782833099365234,grad_norm: 0.9999998556478611, iteration: 1769
loss: 2.566993474960327,grad_norm: 0.9999998800553376, iteration: 1770
loss: 2.4610214233398438,grad_norm: 0.9999998217969671, iteration: 1771
loss: 2.5042407512664795,grad_norm: 0.999999784431824, iteration: 1772
loss: 2.525268316268921,grad_norm: 0.9999999596696094, iteration: 1773
loss: 2.4980759620666504,grad_norm: 0.9999999045988083, iteration: 1774
loss: 2.5972445011138916,grad_norm: 0.9999999225199151, iteration: 1775
loss: 2.494340419769287,grad_norm: 0.9999999089108403, iteration: 1776
loss: 2.569486141204834,grad_norm: 0.9999999299715151, iteration: 1777
loss: 2.41909122467041,grad_norm: 0.9999998540075621, iteration: 1778
loss: 2.4996848106384277,grad_norm: 0.9999998932687663, iteration: 1779
loss: 2.5359222888946533,grad_norm: 0.9999999234275698, iteration: 1780
loss: 2.4675021171569824,grad_norm: 0.9999998201674504, iteration: 1781
loss: 2.394666910171509,grad_norm: 0.999999848843669, iteration: 1782
loss: 2.5122411251068115,grad_norm: 0.9999998653195704, iteration: 1783
loss: 2.381385087966919,grad_norm: 0.9999998734717996, iteration: 1784
loss: 2.5022025108337402,grad_norm: 1.0000000085996925, iteration: 1785
loss: 2.441102981567383,grad_norm: 0.9999999343578905, iteration: 1786
loss: 2.462775468826294,grad_norm: 0.9999998396271534, iteration: 1787
loss: 2.4247121810913086,grad_norm: 0.9999999230252783, iteration: 1788
loss: 2.396219253540039,grad_norm: 0.9999998597184628, iteration: 1789
loss: 2.4589717388153076,grad_norm: 0.99999989743481, iteration: 1790
loss: 2.4494807720184326,grad_norm: 0.9999998258124895, iteration: 1791
loss: 2.4793283939361572,grad_norm: 0.9999998765626673, iteration: 1792
loss: 2.5814242362976074,grad_norm: 0.9999999188076476, iteration: 1793
loss: 2.5508549213409424,grad_norm: 0.9999998479333879, iteration: 1794
loss: 2.533681631088257,grad_norm: 0.9999999093704345, iteration: 1795
loss: 2.527843475341797,grad_norm: 0.9999999251391043, iteration: 1796
loss: 2.3488876819610596,grad_norm: 0.9999998221036988, iteration: 1797
loss: 2.3989217281341553,grad_norm: 0.9999998439663114, iteration: 1798
loss: 2.5051395893096924,grad_norm: 0.9999998393202172, iteration: 1799
loss: 2.477259874343872,grad_norm: 0.9999998287438792, iteration: 1800
loss: 2.375518560409546,grad_norm: 0.9999998753517563, iteration: 1801
loss: 2.3843994140625,grad_norm: 0.9999998415549672, iteration: 1802
loss: 2.49285626411438,grad_norm: 0.999999845490611, iteration: 1803
loss: 2.4819185733795166,grad_norm: 0.9999998316714663, iteration: 1804
loss: 2.397602081298828,grad_norm: 0.9999998677276835, iteration: 1805
loss: 2.4362921714782715,grad_norm: 0.9999997752753147, iteration: 1806
loss: 2.354672908782959,grad_norm: 0.9999998628897592, iteration: 1807
loss: 2.462876319885254,grad_norm: 0.9999998709591563, iteration: 1808
loss: 2.4662363529205322,grad_norm: 0.9999999211535395, iteration: 1809
loss: 2.4547131061553955,grad_norm: 0.9999998831206299, iteration: 1810
loss: 2.2773873805999756,grad_norm: 0.999999939888072, iteration: 1811
loss: 2.4555439949035645,grad_norm: 0.9999999127545097, iteration: 1812
loss: 2.3398776054382324,grad_norm: 0.9999998901867404, iteration: 1813
loss: 2.3001067638397217,grad_norm: 0.9999999116743082, iteration: 1814
loss: 2.2597012519836426,grad_norm: 0.9999998574058444, iteration: 1815
loss: 2.359668016433716,grad_norm: 0.9999998761087058, iteration: 1816
loss: 2.5894811153411865,grad_norm: 0.9999997848131528, iteration: 1817
loss: 2.1981918811798096,grad_norm: 0.9999999405121582, iteration: 1818
loss: 2.489514112472534,grad_norm: 0.9999999203190392, iteration: 1819
loss: 2.3050501346588135,grad_norm: 0.9999998687956579, iteration: 1820
loss: 2.6651220321655273,grad_norm: 0.9999999046870127, iteration: 1821
loss: 2.388942003250122,grad_norm: 0.9999999187698909, iteration: 1822
loss: 2.36533522605896,grad_norm: 0.9999998275775726, iteration: 1823
loss: 2.285423755645752,grad_norm: 0.9999999328472448, iteration: 1824
loss: 2.3617916107177734,grad_norm: 0.9999998299757477, iteration: 1825
loss: 2.4112298488616943,grad_norm: 0.999999881687914, iteration: 1826
loss: 2.5055320262908936,grad_norm: 0.9999998953935908, iteration: 1827
loss: 2.2603068351745605,grad_norm: 0.9999999019074561, iteration: 1828
loss: 2.3794686794281006,grad_norm: 0.999999870559858, iteration: 1829
loss: 2.2617075443267822,grad_norm: 0.9999998711989004, iteration: 1830
loss: 2.3059000968933105,grad_norm: 0.9999999012080466, iteration: 1831
loss: 2.345482110977173,grad_norm: 0.9999998783243012, iteration: 1832
loss: 2.449138641357422,grad_norm: 0.9999998509063358, iteration: 1833
loss: 2.4972593784332275,grad_norm: 0.9999998674603432, iteration: 1834
loss: 2.3833179473876953,grad_norm: 0.9999998029931453, iteration: 1835
loss: 2.4318032264709473,grad_norm: 0.9999998317220368, iteration: 1836
loss: 2.3682620525360107,grad_norm: 0.999999846257608, iteration: 1837
loss: 2.5033836364746094,grad_norm: 0.9999998142679993, iteration: 1838
loss: 2.3530359268188477,grad_norm: 0.9999998751514017, iteration: 1839
loss: 2.233405113220215,grad_norm: 0.9999998819350874, iteration: 1840
loss: 2.3140597343444824,grad_norm: 0.9999998504793596, iteration: 1841
loss: 2.4959213733673096,grad_norm: 0.9999999528615593, iteration: 1842
loss: 2.350233793258667,grad_norm: 0.9999998863908903, iteration: 1843
loss: 2.5132944583892822,grad_norm: 0.9999999369064844, iteration: 1844
loss: 2.2753264904022217,grad_norm: 0.9999999042065306, iteration: 1845
loss: 2.4560558795928955,grad_norm: 0.9999999275690857, iteration: 1846
loss: 2.2944815158843994,grad_norm: 0.9999998572459264, iteration: 1847
loss: 2.3516159057617188,grad_norm: 0.9999998641862772, iteration: 1848
loss: 2.4536471366882324,grad_norm: 0.9999999007299458, iteration: 1849
loss: 2.4776558876037598,grad_norm: 0.9999998843050817, iteration: 1850
loss: 2.317692756652832,grad_norm: 0.9999999083892912, iteration: 1851
loss: 2.528728723526001,grad_norm: 0.9999998272616185, iteration: 1852
loss: 2.440901279449463,grad_norm: 0.9999998834869287, iteration: 1853
loss: 2.4440431594848633,grad_norm: 0.9999999906009054, iteration: 1854
loss: 2.4643781185150146,grad_norm: 0.9999998515678051, iteration: 1855
loss: 2.4572434425354004,grad_norm: 0.9999998352123253, iteration: 1856
loss: 2.372138261795044,grad_norm: 0.9999998124123375, iteration: 1857
loss: 2.4290268421173096,grad_norm: 0.9999999771900425, iteration: 1858
loss: 2.2490432262420654,grad_norm: 0.9999998826049794, iteration: 1859
loss: 2.322159767150879,grad_norm: 0.9999998478639945, iteration: 1860
loss: 2.4867465496063232,grad_norm: 0.9999998803359186, iteration: 1861
loss: 2.3781356811523438,grad_norm: 0.9999998714891836, iteration: 1862
loss: 2.4329030513763428,grad_norm: 0.9999999107437418, iteration: 1863
loss: 2.3571925163269043,grad_norm: 0.9999998794903029, iteration: 1864
loss: 2.394160509109497,grad_norm: 0.9999999492787903, iteration: 1865
loss: 2.412935495376587,grad_norm: 0.999999929414334, iteration: 1866
loss: 2.3587729930877686,grad_norm: 0.999999882446477, iteration: 1867
loss: 2.362959384918213,grad_norm: 0.9999998369847266, iteration: 1868
loss: 2.2852835655212402,grad_norm: 0.9999998527422947, iteration: 1869
loss: 2.3272697925567627,grad_norm: 0.9999998913088654, iteration: 1870
loss: 2.1953041553497314,grad_norm: 0.9999998406694418, iteration: 1871
loss: 2.4639761447906494,grad_norm: 0.999999848230001, iteration: 1872
loss: 2.381629467010498,grad_norm: 0.9999998776966236, iteration: 1873
loss: 2.383758544921875,grad_norm: 0.999999897486619, iteration: 1874
loss: 2.39117693901062,grad_norm: 0.9999998953683893, iteration: 1875
loss: 2.343270778656006,grad_norm: 0.9999999639619399, iteration: 1876
loss: 2.322732925415039,grad_norm: 0.9999998241095278, iteration: 1877
loss: 2.3278214931488037,grad_norm: 0.9999998975885847, iteration: 1878
loss: 2.364975690841675,grad_norm: 0.9999999505493963, iteration: 1879
loss: 2.356137752532959,grad_norm: 0.9999998995935737, iteration: 1880
loss: 2.4938368797302246,grad_norm: 0.999999833069257, iteration: 1881
loss: 2.199599504470825,grad_norm: 0.9999999430864039, iteration: 1882
loss: 2.3624026775360107,grad_norm: 0.9999999001698933, iteration: 1883
loss: 2.4060938358306885,grad_norm: 0.9999999433145503, iteration: 1884
loss: 2.3965394496917725,grad_norm: 0.9999999534563083, iteration: 1885
loss: 2.451704978942871,grad_norm: 0.9999999120545251, iteration: 1886
loss: 2.3746109008789062,grad_norm: 0.99999993328533, iteration: 1887
loss: 2.2113687992095947,grad_norm: 0.999999868205289, iteration: 1888
loss: 2.4388539791107178,grad_norm: 0.9999998405281468, iteration: 1889
loss: 2.299102306365967,grad_norm: 0.9999999013975281, iteration: 1890
loss: 2.3587214946746826,grad_norm: 0.9999999001051623, iteration: 1891
loss: 2.3019468784332275,grad_norm: 0.9999999375183456, iteration: 1892
loss: 2.3369529247283936,grad_norm: 0.9999998241638429, iteration: 1893
loss: 2.4492738246917725,grad_norm: 0.9999999097384568, iteration: 1894
loss: 2.3141660690307617,grad_norm: 0.9999998420311865, iteration: 1895
loss: 2.315701961517334,grad_norm: 0.9999998553970696, iteration: 1896
loss: 2.3244171142578125,grad_norm: 0.9999998635931188, iteration: 1897
loss: 2.217158317565918,grad_norm: 0.9999999243421267, iteration: 1898
loss: 2.3331620693206787,grad_norm: 0.999999881623963, iteration: 1899
loss: 2.358443021774292,grad_norm: 0.9999999555507587, iteration: 1900
loss: 2.5524485111236572,grad_norm: 0.999999868777793, iteration: 1901
loss: 2.4064602851867676,grad_norm: 0.9999998759386037, iteration: 1902
loss: 2.4098753929138184,grad_norm: 0.999999807813084, iteration: 1903
loss: 2.30145263671875,grad_norm: 0.9999999446834448, iteration: 1904
loss: 2.3206186294555664,grad_norm: 0.9999999255132499, iteration: 1905
loss: 2.226423978805542,grad_norm: 0.9999998872991407, iteration: 1906
loss: 2.369216203689575,grad_norm: 0.9999999155632925, iteration: 1907
loss: 2.338160276412964,grad_norm: 0.9999998829746454, iteration: 1908
loss: 2.3720943927764893,grad_norm: 0.9999997946814722, iteration: 1909
loss: 2.3177926540374756,grad_norm: 0.9999998439542201, iteration: 1910
loss: 2.3715758323669434,grad_norm: 0.9999998932303967, iteration: 1911
loss: 2.4259939193725586,grad_norm: 0.9999999448496287, iteration: 1912
loss: 2.3316099643707275,grad_norm: 0.9999998988070291, iteration: 1913
loss: 2.5083861351013184,grad_norm: 0.9999998251111888, iteration: 1914
loss: 2.3098204135894775,grad_norm: 0.9999999615202758, iteration: 1915
loss: 2.35512638092041,grad_norm: 0.999999864448963, iteration: 1916
loss: 2.3442435264587402,grad_norm: 0.999999849073446, iteration: 1917
loss: 2.284576892852783,grad_norm: 0.9999998578191832, iteration: 1918
loss: 2.2983040809631348,grad_norm: 0.9999999094644809, iteration: 1919
loss: 2.4214589595794678,grad_norm: 0.9999998028978245, iteration: 1920
loss: 2.3327252864837646,grad_norm: 0.9999999009835361, iteration: 1921
loss: 2.1709537506103516,grad_norm: 0.9999998457608806, iteration: 1922
loss: 2.384580135345459,grad_norm: 0.9999999608368126, iteration: 1923
loss: 2.3113272190093994,grad_norm: 0.9999999276613006, iteration: 1924
loss: 2.445481300354004,grad_norm: 0.9999998211196082, iteration: 1925
loss: 2.2602698802948,grad_norm: 0.9999999041370179, iteration: 1926
loss: 2.207913875579834,grad_norm: 0.9999997709585965, iteration: 1927
loss: 2.3029258251190186,grad_norm: 0.999999932799017, iteration: 1928
loss: 2.2952868938446045,grad_norm: 0.9999998386942447, iteration: 1929
loss: 2.308011054992676,grad_norm: 0.9999998985617246, iteration: 1930
loss: 2.318938970565796,grad_norm: 0.9999998193559066, iteration: 1931
loss: 2.3609883785247803,grad_norm: 0.9999998300390245, iteration: 1932
loss: 2.528796672821045,grad_norm: 0.9999998696810588, iteration: 1933
loss: 2.281968355178833,grad_norm: 0.9999999404773953, iteration: 1934
loss: 2.364107370376587,grad_norm: 0.9999997828236247, iteration: 1935
loss: 2.3286728858947754,grad_norm: 0.9999998758590668, iteration: 1936
loss: 2.283479690551758,grad_norm: 0.9999999078619771, iteration: 1937
loss: 2.2541723251342773,grad_norm: 0.9999999559742653, iteration: 1938
loss: 2.1114184856414795,grad_norm: 0.999999937154048, iteration: 1939
loss: 2.309373140335083,grad_norm: 0.999999863259078, iteration: 1940
loss: 2.2448298931121826,grad_norm: 0.9999998028590994, iteration: 1941
loss: 2.4031474590301514,grad_norm: 0.9999997933508024, iteration: 1942
loss: 2.414013624191284,grad_norm: 0.9999999152183937, iteration: 1943
loss: 2.2943832874298096,grad_norm: 0.999999870662719, iteration: 1944
loss: 2.2549326419830322,grad_norm: 0.9999998086572295, iteration: 1945
loss: 2.363642692565918,grad_norm: 0.9999997674940302, iteration: 1946
loss: 2.516885995864868,grad_norm: 0.999999846175989, iteration: 1947
loss: 2.181976318359375,grad_norm: 0.9999998686843502, iteration: 1948
loss: 2.266679048538208,grad_norm: 0.9999999352891211, iteration: 1949
loss: 2.429591655731201,grad_norm: 0.9999999657990973, iteration: 1950
loss: 2.300933361053467,grad_norm: 0.9999998505562491, iteration: 1951
loss: 2.3932948112487793,grad_norm: 0.9999997945074933, iteration: 1952
loss: 2.312938690185547,grad_norm: 0.9999998642693658, iteration: 1953
loss: 2.3918726444244385,grad_norm: 0.9999998466054757, iteration: 1954
loss: 2.3344876766204834,grad_norm: 0.9999998489591527, iteration: 1955
loss: 2.348418712615967,grad_norm: 0.9999998850839088, iteration: 1956
loss: 2.3005611896514893,grad_norm: 0.9999999065335525, iteration: 1957
loss: 2.318098306655884,grad_norm: 0.99999985082147, iteration: 1958
loss: 2.400285005569458,grad_norm: 0.9999998507152396, iteration: 1959
loss: 2.1992673873901367,grad_norm: 0.9999998985769458, iteration: 1960
loss: 2.1627466678619385,grad_norm: 0.9999998579870862, iteration: 1961
loss: 2.259916067123413,grad_norm: 0.9999998918849349, iteration: 1962
loss: 2.151855945587158,grad_norm: 0.9999998642462298, iteration: 1963
loss: 2.257333755493164,grad_norm: 0.9999998082383088, iteration: 1964
loss: 2.3283464908599854,grad_norm: 0.999999882665036, iteration: 1965
loss: 2.2843592166900635,grad_norm: 0.999999856746197, iteration: 1966
loss: 2.309833288192749,grad_norm: 0.9999999170148244, iteration: 1967
loss: 2.3811488151550293,grad_norm: 0.999999851157734, iteration: 1968
loss: 2.201052665710449,grad_norm: 0.9999999286358832, iteration: 1969
loss: 2.301635265350342,grad_norm: 0.9999998675224102, iteration: 1970
loss: 2.217384099960327,grad_norm: 0.9999998603516665, iteration: 1971
loss: 2.44370436668396,grad_norm: 0.9999998199876047, iteration: 1972
loss: 2.3456273078918457,grad_norm: 0.9999998910271468, iteration: 1973
loss: 2.353825807571411,grad_norm: 0.9999998949947683, iteration: 1974
loss: 2.386784791946411,grad_norm: 0.9999999108403907, iteration: 1975
loss: 2.323172092437744,grad_norm: 0.9999999118632688, iteration: 1976
loss: 2.270575523376465,grad_norm: 0.9999997800082839, iteration: 1977
loss: 2.207733154296875,grad_norm: 0.9999998375450962, iteration: 1978
loss: 2.388836622238159,grad_norm: 0.9999998764509173, iteration: 1979
loss: 2.2627387046813965,grad_norm: 0.9999997513741953, iteration: 1980
loss: 2.397122859954834,grad_norm: 0.9999999268636348, iteration: 1981
loss: 2.2465670108795166,grad_norm: 0.9999998924977636, iteration: 1982
loss: 2.435490131378174,grad_norm: 0.9999998448075128, iteration: 1983
loss: 2.1742424964904785,grad_norm: 0.9999998464545369, iteration: 1984
loss: 2.293133497238159,grad_norm: 0.9999998187632787, iteration: 1985
loss: 2.1906485557556152,grad_norm: 0.9999998368579488, iteration: 1986
loss: 2.2749297618865967,grad_norm: 0.999999889938135, iteration: 1987
loss: 2.272895097732544,grad_norm: 0.9999998092022837, iteration: 1988
loss: 2.3221113681793213,grad_norm: 0.9999999011956026, iteration: 1989
loss: 2.287548065185547,grad_norm: 0.9999999311143243, iteration: 1990
loss: 2.218116283416748,grad_norm: 0.9999998703761271, iteration: 1991
loss: 2.3052337169647217,grad_norm: 0.999999881328077, iteration: 1992
loss: 2.2624170780181885,grad_norm: 0.999999880676059, iteration: 1993
loss: 2.2588775157928467,grad_norm: 0.9999997854276992, iteration: 1994
loss: 2.186211585998535,grad_norm: 0.999999855614583, iteration: 1995
loss: 2.2575843334198,grad_norm: 0.9999998755545946, iteration: 1996
loss: 2.2379775047302246,grad_norm: 0.9999999020435169, iteration: 1997
loss: 2.2956581115722656,grad_norm: 0.9999997904982726, iteration: 1998
loss: 2.188126564025879,grad_norm: 0.9999998191909248, iteration: 1999
loss: 2.351475238800049,grad_norm: 0.9999998869803154, iteration: 2000
loss: 2.3702986240386963,grad_norm: 0.9999998511283985, iteration: 2001
loss: 2.300536632537842,grad_norm: 0.9999998105169619, iteration: 2002
loss: 2.244335889816284,grad_norm: 0.9999999006295158, iteration: 2003
loss: 2.1742494106292725,grad_norm: 0.9999998858759458, iteration: 2004
loss: 2.322458267211914,grad_norm: 0.9999998631848089, iteration: 2005
loss: 2.308459758758545,grad_norm: 0.9999998594186679, iteration: 2006
loss: 2.4504311084747314,grad_norm: 0.9999998957084955, iteration: 2007
loss: 2.3333077430725098,grad_norm: 0.9999999701987698, iteration: 2008
loss: 2.2321231365203857,grad_norm: 0.999999894495086, iteration: 2009
loss: 2.155625343322754,grad_norm: 0.9999998492549644, iteration: 2010
loss: 2.223945379257202,grad_norm: 0.9999998440631709, iteration: 2011
loss: 2.285132884979248,grad_norm: 0.9999998800641787, iteration: 2012
loss: 2.2698214054107666,grad_norm: 0.9999998055712792, iteration: 2013
loss: 2.22450852394104,grad_norm: 0.9999998280912265, iteration: 2014
loss: 2.3564293384552,grad_norm: 0.9999998502295042, iteration: 2015
loss: 2.416363000869751,grad_norm: 0.9999999189316704, iteration: 2016
loss: 2.2617745399475098,grad_norm: 0.999999784709607, iteration: 2017
loss: 2.209111213684082,grad_norm: 0.9999997857997294, iteration: 2018
loss: 2.2065234184265137,grad_norm: 0.999999875335219, iteration: 2019
loss: 2.180277109146118,grad_norm: 0.9999998765919367, iteration: 2020
loss: 2.3414058685302734,grad_norm: 0.9999997947328625, iteration: 2021
loss: 2.2815263271331787,grad_norm: 0.9999999807791449, iteration: 2022
loss: 2.224433660507202,grad_norm: 0.9999998885202346, iteration: 2023
loss: 2.372473955154419,grad_norm: 0.9999999070241798, iteration: 2024
loss: 2.3341636657714844,grad_norm: 0.9999999268346988, iteration: 2025
loss: 2.080573797225952,grad_norm: 0.9999998939139634, iteration: 2026
loss: 2.2651901245117188,grad_norm: 0.9999998419634256, iteration: 2027
loss: 2.2366621494293213,grad_norm: 0.999999882061862, iteration: 2028
loss: 2.2995433807373047,grad_norm: 0.999999826437426, iteration: 2029
loss: 2.250354528427124,grad_norm: 0.9999997785550505, iteration: 2030
loss: 2.289130449295044,grad_norm: 0.999999773397503, iteration: 2031
loss: 2.187704563140869,grad_norm: 0.9999998466804738, iteration: 2032
loss: 2.358574628829956,grad_norm: 0.9999998817197641, iteration: 2033
loss: 2.224888801574707,grad_norm: 0.9999999545613693, iteration: 2034
loss: 2.3335044384002686,grad_norm: 0.9999998874999689, iteration: 2035
loss: 2.226186990737915,grad_norm: 0.9999998503181488, iteration: 2036
loss: 2.171152114868164,grad_norm: 0.9999999112746639, iteration: 2037
loss: 2.246678590774536,grad_norm: 0.9999998777637795, iteration: 2038
loss: 2.223494052886963,grad_norm: 0.9999998458684408, iteration: 2039
loss: 2.169320583343506,grad_norm: 0.9999998861258921, iteration: 2040
loss: 2.355994701385498,grad_norm: 0.9999998866188768, iteration: 2041
loss: 2.3583824634552,grad_norm: 0.9999998776739207, iteration: 2042
loss: 2.3199617862701416,grad_norm: 0.9999998678561692, iteration: 2043
loss: 2.2980127334594727,grad_norm: 0.9999997973514574, iteration: 2044
loss: 2.294032335281372,grad_norm: 0.9999999028426164, iteration: 2045
loss: 2.3880605697631836,grad_norm: 0.9999997575387715, iteration: 2046
loss: 2.212872266769409,grad_norm: 0.9999998791932639, iteration: 2047
loss: 2.310288190841675,grad_norm: 0.9999997653196796, iteration: 2048
loss: 2.1910970211029053,grad_norm: 0.9999999169250471, iteration: 2049
loss: 2.135023355484009,grad_norm: 0.9999998260882731, iteration: 2050
loss: 2.26997447013855,grad_norm: 0.9999998940232216, iteration: 2051
loss: 2.235862970352173,grad_norm: 0.9999998488638318, iteration: 2052
loss: 2.0930583477020264,grad_norm: 0.999999882524957, iteration: 2053
loss: 2.4028520584106445,grad_norm: 0.9999998629571298, iteration: 2054
loss: 2.079981565475464,grad_norm: 0.9999998320699128, iteration: 2055
loss: 2.21211838722229,grad_norm: 0.9999998357162867, iteration: 2056
loss: 2.2392592430114746,grad_norm: 0.9999998320100962, iteration: 2057
loss: 2.267487049102783,grad_norm: 0.9999998567848797, iteration: 2058
loss: 2.2548389434814453,grad_norm: 0.9999998672359642, iteration: 2059
loss: 2.2515223026275635,grad_norm: 0.9999998254306615, iteration: 2060
loss: 2.2703471183776855,grad_norm: 0.999999915906624, iteration: 2061
loss: 2.307218551635742,grad_norm: 0.9999998924379597, iteration: 2062
loss: 2.3160767555236816,grad_norm: 0.9999998287872716, iteration: 2063
loss: 2.1860921382904053,grad_norm: 0.9999998820216411, iteration: 2064
loss: 2.321821689605713,grad_norm: 0.9999998840411566, iteration: 2065
loss: 2.333920478820801,grad_norm: 0.9999999112085232, iteration: 2066
loss: 2.397629976272583,grad_norm: 0.9999998157633447, iteration: 2067
loss: 2.1421003341674805,grad_norm: 0.999999800149764, iteration: 2068
loss: 2.180311918258667,grad_norm: 0.9999998776981759, iteration: 2069
loss: 2.0859274864196777,grad_norm: 0.9999998672743359, iteration: 2070
loss: 2.306028127670288,grad_norm: 0.9999997966705259, iteration: 2071
loss: 2.149702548980713,grad_norm: 0.9999998366923226, iteration: 2072
loss: 2.2917380332946777,grad_norm: 0.9999998146159109, iteration: 2073
loss: 2.1750612258911133,grad_norm: 0.999999880724256, iteration: 2074
loss: 2.2480223178863525,grad_norm: 0.9999998712141154, iteration: 2075
loss: 2.197272300720215,grad_norm: 0.9999998786809777, iteration: 2076
loss: 2.262162208557129,grad_norm: 0.9999998579281196, iteration: 2077
loss: 2.194100856781006,grad_norm: 0.9999998793061481, iteration: 2078
loss: 2.263254165649414,grad_norm: 0.9999999480280294, iteration: 2079
loss: 2.257591724395752,grad_norm: 0.9999998070475165, iteration: 2080
loss: 2.1640822887420654,grad_norm: 0.999999865109633, iteration: 2081
loss: 2.1118743419647217,grad_norm: 0.9999997765125447, iteration: 2082
loss: 2.178508996963501,grad_norm: 0.9999999221433875, iteration: 2083
loss: 2.3060379028320312,grad_norm: 0.9999999014270803, iteration: 2084
loss: 2.287095546722412,grad_norm: 0.9999998513630596, iteration: 2085
loss: 2.156672954559326,grad_norm: 0.9999998583821283, iteration: 2086
loss: 2.3470659255981445,grad_norm: 0.9999998608290851, iteration: 2087
loss: 2.2059500217437744,grad_norm: 0.9999998585891318, iteration: 2088
loss: 2.2353906631469727,grad_norm: 0.9999997741392483, iteration: 2089
loss: 2.159339427947998,grad_norm: 0.9999998513458674, iteration: 2090
loss: 2.183295249938965,grad_norm: 0.999999832080383, iteration: 2091
loss: 2.158444404602051,grad_norm: 0.9999998270101174, iteration: 2092
loss: 2.292963981628418,grad_norm: 0.9999998534621106, iteration: 2093
loss: 2.3072798252105713,grad_norm: 0.9999997988647737, iteration: 2094
loss: 2.3364362716674805,grad_norm: 0.9999998961239126, iteration: 2095
loss: 2.1708035469055176,grad_norm: 0.9999998344902019, iteration: 2096
loss: 2.2226531505584717,grad_norm: 0.9999998255840861, iteration: 2097
loss: 2.129542589187622,grad_norm: 0.9999997711320632, iteration: 2098
loss: 2.3393914699554443,grad_norm: 0.999999915096286, iteration: 2099
loss: 2.2171859741210938,grad_norm: 0.9999998371712923, iteration: 2100
loss: 2.2942373752593994,grad_norm: 0.9999998718517945, iteration: 2101
loss: 2.2448232173919678,grad_norm: 0.999999888212323, iteration: 2102
loss: 2.1784088611602783,grad_norm: 0.9999998389136265, iteration: 2103
loss: 2.3198516368865967,grad_norm: 0.9999999071910645, iteration: 2104
loss: 2.2437117099761963,grad_norm: 0.9999998767381374, iteration: 2105
loss: 2.3712027072906494,grad_norm: 0.9999998904879419, iteration: 2106
loss: 2.2622861862182617,grad_norm: 0.9999998456543084, iteration: 2107
loss: 2.3188815116882324,grad_norm: 0.9999999337955932, iteration: 2108
loss: 1.99062180519104,grad_norm: 0.9999998615314681, iteration: 2109
loss: 2.2160770893096924,grad_norm: 0.9999997834415795, iteration: 2110
loss: 2.3283941745758057,grad_norm: 0.9999999126529805, iteration: 2111
loss: 2.296764850616455,grad_norm: 0.9999998862946536, iteration: 2112
loss: 2.215440034866333,grad_norm: 0.9999998475524136, iteration: 2113
loss: 2.128899335861206,grad_norm: 0.9999998539235748, iteration: 2114
loss: 2.062793016433716,grad_norm: 0.9999998035478876, iteration: 2115
loss: 2.2251195907592773,grad_norm: 0.9999998462672236, iteration: 2116
loss: 2.14188814163208,grad_norm: 0.9999999339264058, iteration: 2117
loss: 2.2037782669067383,grad_norm: 0.9999997647684759, iteration: 2118
loss: 2.2831742763519287,grad_norm: 0.9999998137817069, iteration: 2119
loss: 2.1605629920959473,grad_norm: 0.9999998791736341, iteration: 2120
loss: 2.2011656761169434,grad_norm: 0.9999998504764347, iteration: 2121
loss: 2.1123735904693604,grad_norm: 0.9999998372330233, iteration: 2122
loss: 2.2184898853302,grad_norm: 0.9999998448775766, iteration: 2123
loss: 2.0862560272216797,grad_norm: 0.9999998312594953, iteration: 2124
loss: 2.195429801940918,grad_norm: 0.9999997771303059, iteration: 2125
loss: 2.3333992958068848,grad_norm: 0.9999998286335551, iteration: 2126
loss: 2.1455862522125244,grad_norm: 0.9999999283904646, iteration: 2127
loss: 2.182525634765625,grad_norm: 0.9999998073589309, iteration: 2128
loss: 2.0816893577575684,grad_norm: 0.999999836594814, iteration: 2129
loss: 2.17594575881958,grad_norm: 0.9999998703071338, iteration: 2130
loss: 2.229091167449951,grad_norm: 0.9999998136926366, iteration: 2131
loss: 2.1884987354278564,grad_norm: 0.9999998572315348, iteration: 2132
loss: 2.1472585201263428,grad_norm: 0.9999998490088159, iteration: 2133
loss: 2.056220769882202,grad_norm: 0.9999998647754739, iteration: 2134
loss: 2.187741279602051,grad_norm: 0.9999997919848639, iteration: 2135
loss: 2.1336135864257812,grad_norm: 0.9999998314701569, iteration: 2136
loss: 2.181842803955078,grad_norm: 0.9999998052350213, iteration: 2137
loss: 2.2383694648742676,grad_norm: 0.9999998688669256, iteration: 2138
loss: 2.245887517929077,grad_norm: 0.9999998059884663, iteration: 2139
loss: 2.1911144256591797,grad_norm: 0.9999998473684469, iteration: 2140
loss: 2.295498847961426,grad_norm: 0.9999998310370322, iteration: 2141
loss: 2.301995277404785,grad_norm: 0.9999998564145631, iteration: 2142
loss: 2.085092544555664,grad_norm: 0.9999998183201613, iteration: 2143
loss: 2.2302932739257812,grad_norm: 0.9999998531136579, iteration: 2144
loss: 2.160284996032715,grad_norm: 0.9999998539034651, iteration: 2145
loss: 2.171349287033081,grad_norm: 0.9999998122699958, iteration: 2146
loss: 2.167452335357666,grad_norm: 0.9999998747957474, iteration: 2147
loss: 2.193836212158203,grad_norm: 0.9999998673104068, iteration: 2148
loss: 2.2277355194091797,grad_norm: 0.9999998445242647, iteration: 2149
loss: 2.2777304649353027,grad_norm: 0.9999997766310963, iteration: 2150
loss: 2.2181458473205566,grad_norm: 0.9999998226600917, iteration: 2151
loss: 2.0161068439483643,grad_norm: 0.9999998655112723, iteration: 2152
loss: 2.280536651611328,grad_norm: 0.999999829512399, iteration: 2153
loss: 2.1995301246643066,grad_norm: 0.9999998620123884, iteration: 2154
loss: 2.0224227905273438,grad_norm: 0.9999998124050347, iteration: 2155
loss: 2.34025239944458,grad_norm: 0.9999998767353233, iteration: 2156
loss: 2.2343087196350098,grad_norm: 0.9999997984648273, iteration: 2157
loss: 2.120072841644287,grad_norm: 0.9999998115527567, iteration: 2158
loss: 2.119502067565918,grad_norm: 0.9999999024435413, iteration: 2159
loss: 2.1250476837158203,grad_norm: 0.9999998419689148, iteration: 2160
loss: 2.2107834815979004,grad_norm: 0.9999998252200388, iteration: 2161
loss: 2.255676746368408,grad_norm: 0.9999997332987082, iteration: 2162
loss: 2.220690965652466,grad_norm: 0.999999863908066, iteration: 2163
loss: 2.139927387237549,grad_norm: 0.9999998580892646, iteration: 2164
loss: 2.1569578647613525,grad_norm: 0.9999998431152706, iteration: 2165
loss: 2.2795820236206055,grad_norm: 0.9999998824511103, iteration: 2166
loss: 2.2296149730682373,grad_norm: 0.9999998482800703, iteration: 2167
loss: 2.2536122798919678,grad_norm: 0.9999998978684743, iteration: 2168
loss: 2.2693724632263184,grad_norm: 0.9999998135361887, iteration: 2169
loss: 2.193981170654297,grad_norm: 0.9999997889390906, iteration: 2170
loss: 2.1013944149017334,grad_norm: 0.9999998614046061, iteration: 2171
loss: 2.2464210987091064,grad_norm: 0.999999895527498, iteration: 2172
loss: 2.3492281436920166,grad_norm: 0.9999998900094932, iteration: 2173
loss: 2.1110284328460693,grad_norm: 0.9999998328499382, iteration: 2174
loss: 2.130272150039673,grad_norm: 0.9999998817523319, iteration: 2175
loss: 2.1576576232910156,grad_norm: 0.9999998039526594, iteration: 2176
loss: 2.2718095779418945,grad_norm: 0.9999999054760584, iteration: 2177
loss: 2.2475063800811768,grad_norm: 0.9999999032432009, iteration: 2178
loss: 2.1703851222991943,grad_norm: 0.9999998516280422, iteration: 2179
loss: 2.3263089656829834,grad_norm: 0.9999998638112719, iteration: 2180
loss: 2.325960159301758,grad_norm: 0.9999998475235627, iteration: 2181
loss: 2.325665235519409,grad_norm: 0.9999998706475344, iteration: 2182
loss: 2.1361899375915527,grad_norm: 0.9999998838193617, iteration: 2183
loss: 2.129011631011963,grad_norm: 0.9999998673557009, iteration: 2184
loss: 2.029618263244629,grad_norm: 0.99999984065622, iteration: 2185
loss: 2.1753008365631104,grad_norm: 0.9999998658877955, iteration: 2186
loss: 2.238701820373535,grad_norm: 0.999999894331966, iteration: 2187
loss: 2.2905492782592773,grad_norm: 0.9999998184783726, iteration: 2188
loss: 2.1435203552246094,grad_norm: 0.9999998600271907, iteration: 2189
loss: 2.2540786266326904,grad_norm: 0.999999808968752, iteration: 2190
loss: 2.207446336746216,grad_norm: 0.9999998386139961, iteration: 2191
loss: 2.263063907623291,grad_norm: 0.9999997348457407, iteration: 2192
loss: 2.1249735355377197,grad_norm: 0.999999901500111, iteration: 2193
loss: 2.015946865081787,grad_norm: 0.999999845977663, iteration: 2194
loss: 2.114921808242798,grad_norm: 0.9999998425696804, iteration: 2195
loss: 2.1913037300109863,grad_norm: 0.9999998699809795, iteration: 2196
loss: 2.0650134086608887,grad_norm: 0.9999998381364588, iteration: 2197
loss: 2.1572306156158447,grad_norm: 0.999999865976379, iteration: 2198
loss: 2.167205572128296,grad_norm: 0.9999998114359449, iteration: 2199
loss: 2.0555615425109863,grad_norm: 0.999999803752708, iteration: 2200
loss: 2.087616443634033,grad_norm: 0.9999998708782841, iteration: 2201
loss: 2.0851798057556152,grad_norm: 0.9999998555828031, iteration: 2202
loss: 2.333355665206909,grad_norm: 0.9999999077851607, iteration: 2203
loss: 2.1401009559631348,grad_norm: 0.9999998449002218, iteration: 2204
loss: 2.2081825733184814,grad_norm: 0.999999881407725, iteration: 2205
loss: 2.198275566101074,grad_norm: 0.9999997580633552, iteration: 2206
loss: 2.0181663036346436,grad_norm: 0.9999998083062647, iteration: 2207
loss: 2.194955587387085,grad_norm: 0.9999998903175785, iteration: 2208
loss: 2.11010479927063,grad_norm: 0.9999998867703465, iteration: 2209
loss: 2.20770001411438,grad_norm: 0.9999998349220284, iteration: 2210
loss: 2.228627920150757,grad_norm: 0.9999997909513048, iteration: 2211
loss: 2.185272455215454,grad_norm: 0.9999998127625624, iteration: 2212
loss: 2.14031720161438,grad_norm: 0.9999999003648238, iteration: 2213
loss: 2.098572015762329,grad_norm: 0.9999997690251672, iteration: 2214
loss: 2.2170445919036865,grad_norm: 0.9999998770625567, iteration: 2215
loss: 2.3121819496154785,grad_norm: 0.9999998543656026, iteration: 2216
loss: 2.142101764678955,grad_norm: 0.9999998330272355, iteration: 2217
loss: 2.2357800006866455,grad_norm: 0.9999998944846897, iteration: 2218
loss: 2.108104944229126,grad_norm: 0.9999998316317906, iteration: 2219
loss: 2.092442035675049,grad_norm: 0.9999998513760202, iteration: 2220
loss: 2.1011736392974854,grad_norm: 0.9999998730602515, iteration: 2221
loss: 2.337808132171631,grad_norm: 0.9999998751408055, iteration: 2222
loss: 2.2485640048980713,grad_norm: 0.9999998157764287, iteration: 2223
loss: 2.102431058883667,grad_norm: 0.9999998205154611, iteration: 2224
loss: 2.227598190307617,grad_norm: 0.9999997917280796, iteration: 2225
loss: 2.1425414085388184,grad_norm: 0.9999997975517142, iteration: 2226
loss: 2.334385633468628,grad_norm: 0.9999998728992661, iteration: 2227
loss: 2.355764627456665,grad_norm: 0.9999998962400977, iteration: 2228
loss: 2.1294400691986084,grad_norm: 0.9999999512968488, iteration: 2229
loss: 2.146998405456543,grad_norm: 0.9999998290232888, iteration: 2230
loss: 2.004267692565918,grad_norm: 0.9999998597234661, iteration: 2231
loss: 2.168405771255493,grad_norm: 0.9999998472894958, iteration: 2232
loss: 2.083073854446411,grad_norm: 0.9999998501005589, iteration: 2233
loss: 2.2148351669311523,grad_norm: 0.9999998946373752, iteration: 2234
loss: 2.0976574420928955,grad_norm: 0.9999997531256802, iteration: 2235
loss: 2.1492486000061035,grad_norm: 0.9999998328505216, iteration: 2236
loss: 2.1757991313934326,grad_norm: 0.9999998338394195, iteration: 2237
loss: 1.965002179145813,grad_norm: 0.9999999063526828, iteration: 2238
loss: 2.151710033416748,grad_norm: 0.9999999511091582, iteration: 2239
loss: 2.208669662475586,grad_norm: 0.9999998651369626, iteration: 2240
loss: 2.207902669906616,grad_norm: 0.9999999323827004, iteration: 2241
loss: 2.1838366985321045,grad_norm: 0.9999998355574865, iteration: 2242
loss: 2.192685604095459,grad_norm: 0.9999997937904079, iteration: 2243
loss: 2.0270490646362305,grad_norm: 0.9999998579716562, iteration: 2244
loss: 1.9781726598739624,grad_norm: 0.9999998201701238, iteration: 2245
loss: 2.1981396675109863,grad_norm: 0.9999998290138108, iteration: 2246
loss: 2.1074109077453613,grad_norm: 0.999999813179715, iteration: 2247
loss: 2.1527504920959473,grad_norm: 0.9999998444286367, iteration: 2248
loss: 2.1262848377227783,grad_norm: 0.9999998656768443, iteration: 2249
loss: 1.9930737018585205,grad_norm: 0.9999998234218209, iteration: 2250
loss: 2.108664035797119,grad_norm: 0.9999998135233061, iteration: 2251
loss: 2.13073992729187,grad_norm: 0.9999997798566503, iteration: 2252
loss: 2.1869289875030518,grad_norm: 0.9999998533270763, iteration: 2253
loss: 2.04191517829895,grad_norm: 0.9999998481320531, iteration: 2254
loss: 2.1260969638824463,grad_norm: 0.9999999111938501, iteration: 2255
loss: 2.2289786338806152,grad_norm: 0.9999998712210946, iteration: 2256
loss: 2.12496280670166,grad_norm: 0.9999998467806346, iteration: 2257
loss: 2.220672845840454,grad_norm: 0.9999997773163897, iteration: 2258
loss: 2.0841734409332275,grad_norm: 0.9999998264669425, iteration: 2259
loss: 2.0510454177856445,grad_norm: 0.9999998672868995, iteration: 2260
loss: 2.136659860610962,grad_norm: 0.9999998378810936, iteration: 2261
loss: 2.2024974822998047,grad_norm: 0.9999997584251689, iteration: 2262
loss: 2.0596261024475098,grad_norm: 0.999999841737626, iteration: 2263
loss: 2.105184316635132,grad_norm: 0.9999998530892185, iteration: 2264
loss: 2.0904998779296875,grad_norm: 0.9999998295303505, iteration: 2265
loss: 2.1881961822509766,grad_norm: 0.9999998687262184, iteration: 2266
loss: 2.1247293949127197,grad_norm: 0.9999998367258677, iteration: 2267
loss: 2.3397748470306396,grad_norm: 0.9999998762166966, iteration: 2268
loss: 2.1585733890533447,grad_norm: 0.9999998359848707, iteration: 2269
loss: 2.0279123783111572,grad_norm: 0.9999998250768829, iteration: 2270
loss: 2.0413870811462402,grad_norm: 0.9999998556445119, iteration: 2271
loss: 2.240417003631592,grad_norm: 0.999999795783499, iteration: 2272
loss: 2.203087091445923,grad_norm: 0.9999998549277866, iteration: 2273
loss: 2.2049245834350586,grad_norm: 0.9999998194520237, iteration: 2274
loss: 2.076570987701416,grad_norm: 0.9999998025328254, iteration: 2275
loss: 2.0640709400177,grad_norm: 0.9999998454388443, iteration: 2276
loss: 2.254859447479248,grad_norm: 0.9999997839247879, iteration: 2277
loss: 2.0315194129943848,grad_norm: 0.9999998569801172, iteration: 2278
loss: 2.0853071212768555,grad_norm: 0.9999998740660765, iteration: 2279
loss: 1.9551218748092651,grad_norm: 0.9999998328171492, iteration: 2280
loss: 2.188823699951172,grad_norm: 0.9999999397904387, iteration: 2281
loss: 2.266249418258667,grad_norm: 0.9999997752335719, iteration: 2282
loss: 2.0493717193603516,grad_norm: 0.9999998170279206, iteration: 2283
loss: 2.1490883827209473,grad_norm: 0.9999998990358909, iteration: 2284
loss: 2.0317752361297607,grad_norm: 0.999999863048908, iteration: 2285
loss: 2.1522305011749268,grad_norm: 0.9999998866884826, iteration: 2286
loss: 2.0864479541778564,grad_norm: 0.9999997911044353, iteration: 2287
loss: 2.206348180770874,grad_norm: 0.9999998624111379, iteration: 2288
loss: 2.067073345184326,grad_norm: 0.999999882778772, iteration: 2289
loss: 2.010815143585205,grad_norm: 0.9999999323578028, iteration: 2290
loss: 2.036364793777466,grad_norm: 0.9999998232205844, iteration: 2291
loss: 2.3856089115142822,grad_norm: 0.9999997653463558, iteration: 2292
loss: 2.149568796157837,grad_norm: 0.9999998285620519, iteration: 2293
loss: 2.159573793411255,grad_norm: 0.9999997746206418, iteration: 2294
loss: 2.2707431316375732,grad_norm: 0.9999998667626202, iteration: 2295
loss: 2.093384265899658,grad_norm: 0.9999997976317472, iteration: 2296
loss: 2.1159751415252686,grad_norm: 0.9999998281656506, iteration: 2297
loss: 2.049746513366699,grad_norm: 0.9999999011393887, iteration: 2298
loss: 2.1816773414611816,grad_norm: 0.9999997669645087, iteration: 2299
loss: 2.162019968032837,grad_norm: 0.9999998754029323, iteration: 2300
loss: 1.8973588943481445,grad_norm: 0.9999998265406723, iteration: 2301
loss: 2.210401773452759,grad_norm: 0.9999998344833833, iteration: 2302
loss: 2.0842089653015137,grad_norm: 0.9999998197105792, iteration: 2303
loss: 2.0865204334259033,grad_norm: 0.9999998664113982, iteration: 2304
loss: 2.1666409969329834,grad_norm: 0.9999998531375434, iteration: 2305
loss: 2.0692973136901855,grad_norm: 0.9999998340558968, iteration: 2306
loss: 2.0740318298339844,grad_norm: 0.9999998508117807, iteration: 2307
loss: 2.186539888381958,grad_norm: 0.9999998635437026, iteration: 2308
loss: 1.9862349033355713,grad_norm: 0.9999999262631681, iteration: 2309
loss: 2.0650150775909424,grad_norm: 0.999999891832442, iteration: 2310
loss: 1.9965336322784424,grad_norm: 0.9999997305677906, iteration: 2311
loss: 2.140446901321411,grad_norm: 0.999999845713521, iteration: 2312
loss: 1.943952202796936,grad_norm: 0.9999998112073482, iteration: 2313
loss: 2.051663637161255,grad_norm: 0.9999998452679997, iteration: 2314
loss: 1.8895506858825684,grad_norm: 0.9999999013428943, iteration: 2315
loss: 2.0783402919769287,grad_norm: 0.9999998811744386, iteration: 2316
loss: 2.0126969814300537,grad_norm: 0.9999998816272603, iteration: 2317
loss: 2.2402708530426025,grad_norm: 0.99999985118118, iteration: 2318
loss: 2.1337671279907227,grad_norm: 0.9999998726007523, iteration: 2319
loss: 2.0305821895599365,grad_norm: 0.9999998291407697, iteration: 2320
loss: 2.2818522453308105,grad_norm: 0.9999998508705745, iteration: 2321
loss: 2.1905019283294678,grad_norm: 0.9999998276242957, iteration: 2322
loss: 2.0529351234436035,grad_norm: 0.999999863959928, iteration: 2323
loss: 2.330061197280884,grad_norm: 0.9999997964330142, iteration: 2324
loss: 2.1651017665863037,grad_norm: 0.9999998270816871, iteration: 2325
loss: 2.0275683403015137,grad_norm: 0.9999998775720994, iteration: 2326
loss: 2.0193121433258057,grad_norm: 0.9999998575017122, iteration: 2327
loss: 2.1957318782806396,grad_norm: 0.9999999126791579, iteration: 2328
loss: 2.2398478984832764,grad_norm: 0.9999997707227548, iteration: 2329
loss: 2.0794808864593506,grad_norm: 0.9999998059790196, iteration: 2330
loss: 2.0012354850769043,grad_norm: 0.9999998938900349, iteration: 2331
loss: 2.1191766262054443,grad_norm: 0.99999988946756, iteration: 2332
loss: 2.13844895362854,grad_norm: 0.999999832739165, iteration: 2333
loss: 2.132678508758545,grad_norm: 0.9999999121490902, iteration: 2334
loss: 2.1473548412323,grad_norm: 0.9999997670404284, iteration: 2335
loss: 2.138498306274414,grad_norm: 0.9999997892766863, iteration: 2336
loss: 2.2167563438415527,grad_norm: 0.9999998445316567, iteration: 2337
loss: 2.0042834281921387,grad_norm: 0.9999998550014749, iteration: 2338
loss: 2.0926530361175537,grad_norm: 0.9999998334990201, iteration: 2339
loss: 2.0697877407073975,grad_norm: 0.9999998774210035, iteration: 2340
loss: 2.22377872467041,grad_norm: 0.9999998203415609, iteration: 2341
loss: 2.033769130706787,grad_norm: 0.9999998400078564, iteration: 2342
loss: 2.1619231700897217,grad_norm: 0.9999998564735679, iteration: 2343
loss: 2.0449752807617188,grad_norm: 0.9999997902414829, iteration: 2344
loss: 1.9377983808517456,grad_norm: 0.9999998483957702, iteration: 2345
loss: 2.0075740814208984,grad_norm: 0.9999998641875201, iteration: 2346
loss: 2.080800771713257,grad_norm: 0.9999998266486216, iteration: 2347
loss: 2.113422155380249,grad_norm: 0.9999998736903807, iteration: 2348
loss: 2.319852828979492,grad_norm: 0.9999998970837698, iteration: 2349
loss: 2.0908570289611816,grad_norm: 0.999999783412023, iteration: 2350
loss: 1.8791579008102417,grad_norm: 0.9999998117379193, iteration: 2351
loss: 2.1894917488098145,grad_norm: 0.999999872266849, iteration: 2352
loss: 2.0447797775268555,grad_norm: 0.9999998355229583, iteration: 2353
loss: 2.133086681365967,grad_norm: 0.9999998735487808, iteration: 2354
loss: 2.1453466415405273,grad_norm: 0.9999998224351756, iteration: 2355
loss: 2.0125176906585693,grad_norm: 0.9999999042963706, iteration: 2356
loss: 2.200573682785034,grad_norm: 0.9999998607012817, iteration: 2357
loss: 2.1660640239715576,grad_norm: 0.9999998136668378, iteration: 2358
loss: 2.2085230350494385,grad_norm: 0.9999998233992813, iteration: 2359
loss: 2.1023478507995605,grad_norm: 0.9999998781423164, iteration: 2360
loss: 2.2875537872314453,grad_norm: 0.9999998499824508, iteration: 2361
loss: 2.247906446456909,grad_norm: 0.9999998288469318, iteration: 2362
loss: 2.0895841121673584,grad_norm: 0.999999808100557, iteration: 2363
loss: 2.1820547580718994,grad_norm: 0.9999998707758218, iteration: 2364
loss: 2.0404834747314453,grad_norm: 0.9999998260955605, iteration: 2365
loss: 2.182969331741333,grad_norm: 0.9999997840896023, iteration: 2366
loss: 2.006657123565674,grad_norm: 0.9999998829584033, iteration: 2367
loss: 2.2083840370178223,grad_norm: 0.9999999232291509, iteration: 2368
loss: 2.1225757598876953,grad_norm: 0.9999999173487135, iteration: 2369
loss: 2.051649332046509,grad_norm: 0.9999998741795773, iteration: 2370
loss: 2.046175003051758,grad_norm: 0.9999997334523856, iteration: 2371
loss: 2.0140488147735596,grad_norm: 0.9999998274204769, iteration: 2372
loss: 2.0976767539978027,grad_norm: 0.9999998653382921, iteration: 2373
loss: 2.1642274856567383,grad_norm: 0.9999999351756401, iteration: 2374
loss: 1.9957451820373535,grad_norm: 0.999999872193401, iteration: 2375
loss: 2.184103488922119,grad_norm: 0.9999999014191316, iteration: 2376
loss: 1.9557315111160278,grad_norm: 0.9999998897778393, iteration: 2377
loss: 2.1252565383911133,grad_norm: 0.9999998792379464, iteration: 2378
loss: 2.223231792449951,grad_norm: 0.9999998545849162, iteration: 2379
loss: 2.3328237533569336,grad_norm: 0.9999998850408922, iteration: 2380
loss: 2.073410749435425,grad_norm: 0.9999998393410353, iteration: 2381
loss: 2.068341016769409,grad_norm: 0.999999816267069, iteration: 2382
loss: 1.916118860244751,grad_norm: 0.9999998567189147, iteration: 2383
loss: 2.2432379722595215,grad_norm: 0.9999998273630812, iteration: 2384
loss: 2.085944890975952,grad_norm: 0.99999976614883, iteration: 2385
loss: 2.101316452026367,grad_norm: 0.9999998210035574, iteration: 2386
loss: 2.1705334186553955,grad_norm: 0.9999998605522317, iteration: 2387
loss: 2.123072862625122,grad_norm: 0.9999998521575697, iteration: 2388
loss: 2.0501344203948975,grad_norm: 0.999999856895546, iteration: 2389
loss: 2.0489585399627686,grad_norm: 0.9999997895494922, iteration: 2390
loss: 1.929241418838501,grad_norm: 0.9999997898462895, iteration: 2391
loss: 2.014606475830078,grad_norm: 0.9999998066779503, iteration: 2392
loss: 2.0151283740997314,grad_norm: 0.9999998073659471, iteration: 2393
loss: 2.2152748107910156,grad_norm: 0.9999999050521701, iteration: 2394
loss: 2.1161911487579346,grad_norm: 0.9999997955281613, iteration: 2395
loss: 2.161475419998169,grad_norm: 0.9999998393976276, iteration: 2396
loss: 2.106774091720581,grad_norm: 0.9999998525178762, iteration: 2397
loss: 2.220151424407959,grad_norm: 0.9999998721083839, iteration: 2398
loss: 2.1899359226226807,grad_norm: 0.9999997944622939, iteration: 2399
loss: 2.2420082092285156,grad_norm: 0.9999998302433829, iteration: 2400
loss: 2.0815184116363525,grad_norm: 0.9999998118728367, iteration: 2401
loss: 2.2016732692718506,grad_norm: 0.9999997655383304, iteration: 2402
loss: 2.032139539718628,grad_norm: 0.9999998073908059, iteration: 2403
loss: 2.2791342735290527,grad_norm: 0.999999801450936, iteration: 2404
loss: 2.076903820037842,grad_norm: 0.9999998233531271, iteration: 2405
loss: 2.2021923065185547,grad_norm: 0.9999998623251689, iteration: 2406
loss: 2.0848748683929443,grad_norm: 0.9999998518502962, iteration: 2407
loss: 2.065549850463867,grad_norm: 0.9999998575688863, iteration: 2408
loss: 1.9931340217590332,grad_norm: 0.9999998437642836, iteration: 2409
loss: 2.0265300273895264,grad_norm: 0.999999834750727, iteration: 2410
loss: 1.9362584352493286,grad_norm: 0.9999999249543995, iteration: 2411
loss: 2.155449151992798,grad_norm: 0.9999998829040868, iteration: 2412
loss: 2.073864221572876,grad_norm: 0.9999998209344108, iteration: 2413
loss: 2.212419271469116,grad_norm: 0.9999998554894864, iteration: 2414
loss: 2.0563199520111084,grad_norm: 0.999999875975136, iteration: 2415
loss: 2.030632257461548,grad_norm: 0.99999980603515, iteration: 2416
loss: 2.236057996749878,grad_norm: 0.9999998253878876, iteration: 2417
loss: 2.090956211090088,grad_norm: 0.9999999274054968, iteration: 2418
loss: 2.2195627689361572,grad_norm: 0.9999999175618134, iteration: 2419
loss: 1.9532514810562134,grad_norm: 0.9999998650648841, iteration: 2420
loss: 2.068359375,grad_norm: 0.9999999155701255, iteration: 2421
loss: 2.0902745723724365,grad_norm: 0.9999998327002675, iteration: 2422
loss: 2.0714926719665527,grad_norm: 0.9999998504538132, iteration: 2423
loss: 2.0016872882843018,grad_norm: 0.9999998509675121, iteration: 2424
loss: 2.19045352935791,grad_norm: 0.9999999111079336, iteration: 2425
loss: 2.0922274589538574,grad_norm: 0.9999997941249131, iteration: 2426
loss: 2.1893410682678223,grad_norm: 0.9999998377311189, iteration: 2427
loss: 2.0356431007385254,grad_norm: 0.9999998885271402, iteration: 2428
loss: 2.1584689617156982,grad_norm: 0.9999998100032536, iteration: 2429
loss: 2.026887893676758,grad_norm: 0.9999997963759015, iteration: 2430
loss: 2.1410810947418213,grad_norm: 0.9999998176946254, iteration: 2431
loss: 2.167296886444092,grad_norm: 0.9999999172425156, iteration: 2432
loss: 2.1413681507110596,grad_norm: 0.9999998017695023, iteration: 2433
loss: 2.030200242996216,grad_norm: 0.9999998473230156, iteration: 2434
loss: 2.1341779232025146,grad_norm: 0.9999998298026187, iteration: 2435
loss: 2.1124460697174072,grad_norm: 0.9999999132234396, iteration: 2436
loss: 2.071855306625366,grad_norm: 0.9999997637979763, iteration: 2437
loss: 2.0461225509643555,grad_norm: 0.9999998217572468, iteration: 2438
loss: 2.0536720752716064,grad_norm: 0.9999998709731763, iteration: 2439
loss: 2.055290460586548,grad_norm: 0.9999998249361597, iteration: 2440
loss: 2.0540716648101807,grad_norm: 0.999999800770448, iteration: 2441
loss: 2.0735702514648438,grad_norm: 0.9999999000842741, iteration: 2442
loss: 2.2608110904693604,grad_norm: 0.9999998251526435, iteration: 2443
loss: 2.0178346633911133,grad_norm: 0.9999998413147293, iteration: 2444
loss: 2.0072572231292725,grad_norm: 0.9999998920027159, iteration: 2445
loss: 1.9206647872924805,grad_norm: 0.9999998161695657, iteration: 2446
loss: 2.168980598449707,grad_norm: 0.9999999031955055, iteration: 2447
loss: 2.1371352672576904,grad_norm: 0.9999998925229301, iteration: 2448
loss: 2.181575298309326,grad_norm: 0.9999999163140734, iteration: 2449
loss: 2.1216087341308594,grad_norm: 0.9999998534492578, iteration: 2450
loss: 2.0935609340667725,grad_norm: 0.9999998345810444, iteration: 2451
loss: 2.059589385986328,grad_norm: 0.9999998301878009, iteration: 2452
loss: 2.0583198070526123,grad_norm: 0.9999999135037778, iteration: 2453
loss: 2.037172555923462,grad_norm: 0.9999997741893965, iteration: 2454
loss: 2.0343189239501953,grad_norm: 0.9999998738601159, iteration: 2455
loss: 2.057037115097046,grad_norm: 0.9999998738267604, iteration: 2456
loss: 2.060544729232788,grad_norm: 0.9999998486709079, iteration: 2457
loss: 1.9377423524856567,grad_norm: 0.9999998036301737, iteration: 2458
loss: 2.101585626602173,grad_norm: 0.9999998354117413, iteration: 2459
loss: 2.055114984512329,grad_norm: 0.9999999093998675, iteration: 2460
loss: 2.2827706336975098,grad_norm: 0.9999998602425209, iteration: 2461
loss: 2.112414836883545,grad_norm: 0.9999999084695536, iteration: 2462
loss: 2.0038979053497314,grad_norm: 0.9999998445494142, iteration: 2463
loss: 2.080842971801758,grad_norm: 0.9999998792965092, iteration: 2464
loss: 2.0855062007904053,grad_norm: 0.9999998562387303, iteration: 2465
loss: 1.9182759523391724,grad_norm: 0.9999998844579002, iteration: 2466
loss: 2.0417823791503906,grad_norm: 0.9999998223880882, iteration: 2467
loss: 1.9498754739761353,grad_norm: 0.999999819860482, iteration: 2468
loss: 1.981912612915039,grad_norm: 0.9999998623811793, iteration: 2469
loss: 2.0854873657226562,grad_norm: 0.9999997799188021, iteration: 2470
loss: 2.113656759262085,grad_norm: 0.9999998277793252, iteration: 2471
loss: 1.9827685356140137,grad_norm: 0.9999998400018929, iteration: 2472
loss: 2.1076927185058594,grad_norm: 0.9999997898156483, iteration: 2473
loss: 2.018268585205078,grad_norm: 0.9999998662034975, iteration: 2474
loss: 2.2026476860046387,grad_norm: 0.9999998557348426, iteration: 2475
loss: 2.0287528038024902,grad_norm: 0.9999999009240417, iteration: 2476
loss: 2.039645195007324,grad_norm: 0.9999998155764616, iteration: 2477
loss: 2.1138932704925537,grad_norm: 0.9999998906784093, iteration: 2478
loss: 1.9892770051956177,grad_norm: 0.9999998026168798, iteration: 2479
loss: 2.002027750015259,grad_norm: 0.9999998260547202, iteration: 2480
loss: 1.93100106716156,grad_norm: 0.9999998279107732, iteration: 2481
loss: 2.1661252975463867,grad_norm: 0.9999998391131435, iteration: 2482
loss: 2.13362979888916,grad_norm: 0.9999998211980429, iteration: 2483
loss: 2.045741319656372,grad_norm: 0.9999998451528835, iteration: 2484
loss: 2.088660478591919,grad_norm: 0.9999999178715623, iteration: 2485
loss: 2.070833206176758,grad_norm: 0.9999998783854702, iteration: 2486
loss: 2.1073427200317383,grad_norm: 0.999999866092537, iteration: 2487
loss: 2.0843217372894287,grad_norm: 0.999999879249334, iteration: 2488
loss: 2.118155002593994,grad_norm: 0.999999757129265, iteration: 2489
loss: 2.0539498329162598,grad_norm: 0.9999997409193684, iteration: 2490
loss: 2.033137321472168,grad_norm: 0.9999998743878875, iteration: 2491
loss: 2.0057523250579834,grad_norm: 0.9999997805948292, iteration: 2492
loss: 1.988637089729309,grad_norm: 0.9999998236097043, iteration: 2493
loss: 2.093108892440796,grad_norm: 0.9999998833350019, iteration: 2494
loss: 2.094083070755005,grad_norm: 0.9999998736305783, iteration: 2495
loss: 2.0162670612335205,grad_norm: 0.9999998752573854, iteration: 2496
loss: 1.9814666509628296,grad_norm: 0.9999998743173876, iteration: 2497
loss: 2.0339627265930176,grad_norm: 0.9999998193798051, iteration: 2498
loss: 1.9097084999084473,grad_norm: 0.9999998332340471, iteration: 2499
loss: 2.1121740341186523,grad_norm: 0.9999997751459754, iteration: 2500
loss: 2.034421920776367,grad_norm: 0.9999998877255746, iteration: 2501
loss: 2.027998208999634,grad_norm: 0.9999998378630937, iteration: 2502
loss: 1.9876428842544556,grad_norm: 0.9999999176180635, iteration: 2503
loss: 2.0536744594573975,grad_norm: 0.9999999092117673, iteration: 2504
loss: 1.7888407707214355,grad_norm: 0.9999998594413038, iteration: 2505
loss: 2.0267930030822754,grad_norm: 0.9999999140207256, iteration: 2506
loss: 2.088266372680664,grad_norm: 0.9999998218447105, iteration: 2507
loss: 1.9273239374160767,grad_norm: 0.9999997989685432, iteration: 2508
loss: 1.9894933700561523,grad_norm: 0.9999998868731202, iteration: 2509
loss: 1.9384549856185913,grad_norm: 0.9999998283859448, iteration: 2510
loss: 2.0582683086395264,grad_norm: 0.9999998051786817, iteration: 2511
loss: 2.193739891052246,grad_norm: 0.9999998893728563, iteration: 2512
loss: 2.215731143951416,grad_norm: 0.9999997955449998, iteration: 2513
loss: 2.0917129516601562,grad_norm: 0.9999997896620334, iteration: 2514
loss: 2.115464210510254,grad_norm: 0.9999998827383154, iteration: 2515
loss: 2.166231393814087,grad_norm: 0.9999999009538159, iteration: 2516
loss: 2.1784048080444336,grad_norm: 0.9999998242675741, iteration: 2517
loss: 2.093959331512451,grad_norm: 0.9999997930718896, iteration: 2518
loss: 1.8993582725524902,grad_norm: 0.9999999013361354, iteration: 2519
loss: 2.0644166469573975,grad_norm: 0.9999997822690377, iteration: 2520
loss: 2.0399649143218994,grad_norm: 0.9999998324708625, iteration: 2521
loss: 2.0026473999023438,grad_norm: 0.9999998706371515, iteration: 2522
loss: 2.1488637924194336,grad_norm: 0.9999997787113112, iteration: 2523
loss: 2.148742198944092,grad_norm: 0.9999997750224232, iteration: 2524
loss: 2.072878360748291,grad_norm: 0.9999998775313272, iteration: 2525
loss: 2.1134402751922607,grad_norm: 0.9999997680744541, iteration: 2526
loss: 2.0764315128326416,grad_norm: 0.9999997623989789, iteration: 2527
loss: 2.024611234664917,grad_norm: 0.999999830516954, iteration: 2528
loss: 1.98902428150177,grad_norm: 0.9999998941823742, iteration: 2529
loss: 1.983010172843933,grad_norm: 0.9999998132414218, iteration: 2530
loss: 2.0727293491363525,grad_norm: 0.9999998137645668, iteration: 2531
loss: 2.011852502822876,grad_norm: 0.9999998772149687, iteration: 2532
loss: 2.153946876525879,grad_norm: 0.9999998245429131, iteration: 2533
loss: 2.2541677951812744,grad_norm: 0.9999998709848829, iteration: 2534
loss: 2.0203378200531006,grad_norm: 0.9999997700769715, iteration: 2535
loss: 2.126253128051758,grad_norm: 0.9999998172772192, iteration: 2536
loss: 2.0690982341766357,grad_norm: 0.9999998007301483, iteration: 2537
loss: 1.9936045408248901,grad_norm: 0.9999998674246042, iteration: 2538
loss: 2.083017349243164,grad_norm: 0.9999998101992716, iteration: 2539
loss: 2.0555973052978516,grad_norm: 0.9999998787453794, iteration: 2540
loss: 2.2571558952331543,grad_norm: 0.9999998848753977, iteration: 2541
loss: 2.222346782684326,grad_norm: 0.9999998067779021, iteration: 2542
loss: 2.005319595336914,grad_norm: 0.9999998079296119, iteration: 2543
loss: 2.0859339237213135,grad_norm: 0.9999998032288481, iteration: 2544
loss: 2.061702013015747,grad_norm: 0.9999997847026922, iteration: 2545
loss: 1.9366190433502197,grad_norm: 0.9999998353583206, iteration: 2546
loss: 1.9807919263839722,grad_norm: 0.9999998481365558, iteration: 2547
loss: 2.0292978286743164,grad_norm: 0.9999998307184869, iteration: 2548
loss: 2.0087053775787354,grad_norm: 0.9999998616660521, iteration: 2549
loss: 1.9981433153152466,grad_norm: 0.9999999367567975, iteration: 2550
loss: 1.9414390325546265,grad_norm: 0.9999998400827579, iteration: 2551
loss: 1.9436371326446533,grad_norm: 0.999999803146678, iteration: 2552
loss: 2.165529489517212,grad_norm: 0.9999997707907157, iteration: 2553
loss: 2.0434818267822266,grad_norm: 0.9999999019828616, iteration: 2554
loss: 1.9106266498565674,grad_norm: 0.9999998256914987, iteration: 2555
loss: 2.000214099884033,grad_norm: 0.9999998780749441, iteration: 2556
loss: 1.9940890073776245,grad_norm: 0.9999998205074938, iteration: 2557
loss: 2.0684547424316406,grad_norm: 0.9999998310929601, iteration: 2558
loss: 1.9202485084533691,grad_norm: 0.9999997994637688, iteration: 2559
loss: 2.130350112915039,grad_norm: 0.9999999001837296, iteration: 2560
loss: 2.0810883045196533,grad_norm: 0.9999997847209076, iteration: 2561
loss: 1.9657472372055054,grad_norm: 0.9999999223371807, iteration: 2562
loss: 1.9824235439300537,grad_norm: 0.9999998171551834, iteration: 2563
loss: 2.234468936920166,grad_norm: 0.9999998964658179, iteration: 2564
loss: 2.0153510570526123,grad_norm: 0.9999997789106287, iteration: 2565
loss: 2.030088424682617,grad_norm: 0.9999998894848078, iteration: 2566
loss: 1.878759503364563,grad_norm: 0.9999997543577899, iteration: 2567
loss: 1.9502829313278198,grad_norm: 0.9999998201280611, iteration: 2568
loss: 2.029571533203125,grad_norm: 0.9999998760556487, iteration: 2569
loss: 2.1636099815368652,grad_norm: 0.9999998148389261, iteration: 2570
loss: 2.103368043899536,grad_norm: 0.9999997731471313, iteration: 2571
loss: 1.9303327798843384,grad_norm: 0.9999998055236188, iteration: 2572
loss: 1.929677963256836,grad_norm: 0.9999998402498393, iteration: 2573
loss: 2.019211769104004,grad_norm: 0.9999999217845098, iteration: 2574
loss: 2.0842220783233643,grad_norm: 0.9999998424001554, iteration: 2575
loss: 2.0179240703582764,grad_norm: 0.9999998662586251, iteration: 2576
loss: 2.015610933303833,grad_norm: 0.9999998158169127, iteration: 2577
loss: 2.0850679874420166,grad_norm: 0.9999998311688566, iteration: 2578
loss: 2.1458678245544434,grad_norm: 0.9999997601968048, iteration: 2579
loss: 1.7461987733840942,grad_norm: 0.9999998038165837, iteration: 2580
loss: 2.048518657684326,grad_norm: 0.9999998733352629, iteration: 2581
loss: 2.000396728515625,grad_norm: 0.9999998470887372, iteration: 2582
loss: 2.058628797531128,grad_norm: 0.9999998151440715, iteration: 2583
loss: 1.9612411260604858,grad_norm: 0.9999998368807216, iteration: 2584
loss: 2.1382923126220703,grad_norm: 0.9999999280615424, iteration: 2585
loss: 2.0351927280426025,grad_norm: 0.9999998145132956, iteration: 2586
loss: 2.1233744621276855,grad_norm: 0.9999998690983789, iteration: 2587
loss: 2.021070718765259,grad_norm: 0.9999998659957878, iteration: 2588
loss: 1.922794222831726,grad_norm: 0.9999998008165121, iteration: 2589
loss: 2.0752758979797363,grad_norm: 0.9999998580458225, iteration: 2590
loss: 1.9416884183883667,grad_norm: 0.9999999102990758, iteration: 2591
loss: 2.0537850856781006,grad_norm: 0.9999998311472139, iteration: 2592
loss: 1.9767022132873535,grad_norm: 0.9999998087663078, iteration: 2593
loss: 2.0748865604400635,grad_norm: 0.999999911301188, iteration: 2594
loss: 2.0382776260375977,grad_norm: 0.9999998520660291, iteration: 2595
loss: 1.9132238626480103,grad_norm: 0.9999999034302429, iteration: 2596
loss: 1.9468212127685547,grad_norm: 0.9999997639897805, iteration: 2597
loss: 2.053764581680298,grad_norm: 0.9999999437205629, iteration: 2598
loss: 2.0825562477111816,grad_norm: 0.999999793192404, iteration: 2599
loss: 1.9685945510864258,grad_norm: 0.9999998372156577, iteration: 2600
loss: 2.019967555999756,grad_norm: 0.9999998762912947, iteration: 2601
loss: 2.056914806365967,grad_norm: 0.9999998073232403, iteration: 2602
loss: 2.146118640899658,grad_norm: 0.9999998696570455, iteration: 2603
loss: 2.076350212097168,grad_norm: 0.9999998305206542, iteration: 2604
loss: 1.9283742904663086,grad_norm: 0.9999998812597991, iteration: 2605
loss: 1.9830403327941895,grad_norm: 0.9999997689667649, iteration: 2606
loss: 1.8683735132217407,grad_norm: 0.9999998842372403, iteration: 2607
loss: 1.951409935951233,grad_norm: 0.9999997956924627, iteration: 2608
loss: 2.016810894012451,grad_norm: 0.9999997526544553, iteration: 2609
loss: 1.9472240209579468,grad_norm: 0.9999998846659028, iteration: 2610
loss: 1.9990431070327759,grad_norm: 0.9999998204639573, iteration: 2611
loss: 1.9367707967758179,grad_norm: 0.9999998192148731, iteration: 2612
loss: 1.9507461786270142,grad_norm: 0.9999998689316182, iteration: 2613
loss: 2.02109956741333,grad_norm: 0.9999998066154786, iteration: 2614
loss: 2.0262937545776367,grad_norm: 0.9999998023570289, iteration: 2615
loss: 1.9404528141021729,grad_norm: 0.9999997967331045, iteration: 2616
loss: 1.9250037670135498,grad_norm: 0.9999998689560716, iteration: 2617
loss: 2.2007088661193848,grad_norm: 0.9999998243898018, iteration: 2618
loss: 1.923007607460022,grad_norm: 0.9999998742753822, iteration: 2619
loss: 2.017703056335449,grad_norm: 0.9999998960580547, iteration: 2620
loss: 1.9334110021591187,grad_norm: 0.9999998005169629, iteration: 2621
loss: 1.96660315990448,grad_norm: 0.9999999497010512, iteration: 2622
loss: 2.091235876083374,grad_norm: 0.9999997980126095, iteration: 2623
loss: 2.078278064727783,grad_norm: 0.9999999221818976, iteration: 2624
loss: 2.0337586402893066,grad_norm: 0.9999999033992388, iteration: 2625
loss: 2.1066226959228516,grad_norm: 0.9999998564751875, iteration: 2626
loss: 1.920903205871582,grad_norm: 0.9999998484775251, iteration: 2627
loss: 1.9496859312057495,grad_norm: 0.9999999259928053, iteration: 2628
loss: 2.047537088394165,grad_norm: 0.9999998333555279, iteration: 2629
loss: 2.1171672344207764,grad_norm: 0.9999998688946924, iteration: 2630
loss: 1.9098507165908813,grad_norm: 0.999999891198682, iteration: 2631
loss: 1.99308443069458,grad_norm: 0.9999998794555293, iteration: 2632
loss: 2.0783586502075195,grad_norm: 0.999999790414956, iteration: 2633
loss: 2.0127127170562744,grad_norm: 0.9999998732248037, iteration: 2634
loss: 2.0845248699188232,grad_norm: 0.9999998501282705, iteration: 2635
loss: 2.0451419353485107,grad_norm: 0.9999998943871128, iteration: 2636
loss: 2.0768239498138428,grad_norm: 0.9999997543469081, iteration: 2637
loss: 1.941820740699768,grad_norm: 0.9999997976888244, iteration: 2638
loss: 1.9361872673034668,grad_norm: 0.9999999075051205, iteration: 2639
loss: 2.0847816467285156,grad_norm: 0.999999882681767, iteration: 2640
loss: 1.918583869934082,grad_norm: 0.999999863834612, iteration: 2641
loss: 2.117218017578125,grad_norm: 0.999999824688743, iteration: 2642
loss: 2.020346164703369,grad_norm: 0.999999805676053, iteration: 2643
loss: 2.139760971069336,grad_norm: 0.9999998844694081, iteration: 2644
loss: 1.9369488954544067,grad_norm: 0.9999998665513493, iteration: 2645
loss: 1.9343125820159912,grad_norm: 0.9999997877118433, iteration: 2646
loss: 1.8961700201034546,grad_norm: 0.9999997905173327, iteration: 2647
loss: 1.885033369064331,grad_norm: 0.9999999007294549, iteration: 2648
loss: 2.2458434104919434,grad_norm: 0.9999999036074982, iteration: 2649
loss: 1.9995254278182983,grad_norm: 0.9999998469604003, iteration: 2650
loss: 2.0678153038024902,grad_norm: 0.9999999198131871, iteration: 2651
loss: 2.0192954540252686,grad_norm: 0.999999816730767, iteration: 2652
loss: 1.93949556350708,grad_norm: 0.9999998484870048, iteration: 2653
loss: 1.9881535768508911,grad_norm: 0.9999999355279671, iteration: 2654
loss: 1.8909345865249634,grad_norm: 0.9999998108946201, iteration: 2655
loss: 2.0411484241485596,grad_norm: 0.9999998772694695, iteration: 2656
loss: 2.154616594314575,grad_norm: 0.9999998732770569, iteration: 2657
loss: 1.9542895555496216,grad_norm: 0.99999983085123, iteration: 2658
loss: 2.0245094299316406,grad_norm: 0.9999998981613435, iteration: 2659
loss: 1.9965004920959473,grad_norm: 0.9999997857146092, iteration: 2660
loss: 1.9941450357437134,grad_norm: 0.9999998586021248, iteration: 2661
loss: 2.0942795276641846,grad_norm: 0.9999998209253853, iteration: 2662
loss: 1.9854426383972168,grad_norm: 0.9999998301094954, iteration: 2663
loss: 1.8922560214996338,grad_norm: 0.9999998767029316, iteration: 2664
loss: 2.0088610649108887,grad_norm: 0.9999998084949222, iteration: 2665
loss: 2.036069393157959,grad_norm: 0.999999832552934, iteration: 2666
loss: 2.1555354595184326,grad_norm: 0.9999998311910174, iteration: 2667
loss: 2.029163360595703,grad_norm: 0.9999998713898909, iteration: 2668
loss: 2.1052372455596924,grad_norm: 0.999999840832737, iteration: 2669
loss: 2.056671380996704,grad_norm: 0.9999998011817092, iteration: 2670
loss: 1.9857871532440186,grad_norm: 0.9999998417817598, iteration: 2671
loss: 1.8849616050720215,grad_norm: 0.9999998381570613, iteration: 2672
loss: 2.049466371536255,grad_norm: 0.9999998519835465, iteration: 2673
loss: 2.0396902561187744,grad_norm: 0.9999998583208163, iteration: 2674
loss: 2.0680124759674072,grad_norm: 0.9999997742038512, iteration: 2675
loss: 1.9320513010025024,grad_norm: 0.9999997468197416, iteration: 2676
loss: 1.9376013278961182,grad_norm: 0.99999984084265, iteration: 2677
loss: 2.0010263919830322,grad_norm: 0.9999998169633862, iteration: 2678
loss: 1.823134183883667,grad_norm: 0.9999998648151784, iteration: 2679
loss: 1.8826658725738525,grad_norm: 0.9999998114117664, iteration: 2680
loss: 1.9272757768630981,grad_norm: 0.9999998357815858, iteration: 2681
loss: 1.9061691761016846,grad_norm: 0.999999796963951, iteration: 2682
loss: 1.9731318950653076,grad_norm: 0.9999997954116661, iteration: 2683
loss: 1.8878066539764404,grad_norm: 0.9999997623697886, iteration: 2684
loss: 1.9922012090682983,grad_norm: 0.9999998175029855, iteration: 2685
loss: 1.99770987033844,grad_norm: 0.999999898972695, iteration: 2686
loss: 2.0184357166290283,grad_norm: 0.9999999029012022, iteration: 2687
loss: 1.9485223293304443,grad_norm: 0.9999997956965332, iteration: 2688
loss: 2.158426284790039,grad_norm: 0.9999998627986528, iteration: 2689
loss: 2.0359530448913574,grad_norm: 0.9999998786314811, iteration: 2690
loss: 2.2075772285461426,grad_norm: 0.9999998626163115, iteration: 2691
loss: 1.9843523502349854,grad_norm: 0.9999999155892172, iteration: 2692
loss: 1.8464292287826538,grad_norm: 0.9999998697281507, iteration: 2693
loss: 2.052659749984741,grad_norm: 0.9999997566641589, iteration: 2694
loss: 1.954008936882019,grad_norm: 0.9999998279080801, iteration: 2695
loss: 2.0908312797546387,grad_norm: 0.9999997697896215, iteration: 2696
loss: 1.9625368118286133,grad_norm: 0.9999999048710597, iteration: 2697
loss: 2.0503549575805664,grad_norm: 0.9999998287261855, iteration: 2698
loss: 2.0138473510742188,grad_norm: 0.9999998727865009, iteration: 2699
loss: 2.0090701580047607,grad_norm: 0.9999998095688347, iteration: 2700
loss: 2.0734190940856934,grad_norm: 0.9999998535889054, iteration: 2701
loss: 1.9483669996261597,grad_norm: 0.9999997512236954, iteration: 2702
loss: 1.9183012247085571,grad_norm: 0.9999998590861009, iteration: 2703
loss: 1.9594335556030273,grad_norm: 0.9999998059107507, iteration: 2704
loss: 1.9505877494812012,grad_norm: 0.9999998595964006, iteration: 2705
loss: 2.04390287399292,grad_norm: 0.999999840312882, iteration: 2706
loss: 1.9237560033798218,grad_norm: 0.9999997758672847, iteration: 2707
loss: 2.062030076980591,grad_norm: 0.999999810079167, iteration: 2708
loss: 1.8345801830291748,grad_norm: 0.999999783617111, iteration: 2709
loss: 2.0487873554229736,grad_norm: 0.9999998414176425, iteration: 2710
loss: 2.0182530879974365,grad_norm: 0.9999998935209531, iteration: 2711
loss: 2.083707809448242,grad_norm: 0.9999998182113885, iteration: 2712
loss: 2.014357328414917,grad_norm: 0.999999820359226, iteration: 2713
loss: 2.027059316635132,grad_norm: 0.999999854600834, iteration: 2714
loss: 2.0985605716705322,grad_norm: 0.9999997658244819, iteration: 2715
loss: 1.9927349090576172,grad_norm: 0.9999999058524516, iteration: 2716
loss: 1.9760291576385498,grad_norm: 0.9999997515217682, iteration: 2717
loss: 1.9839295148849487,grad_norm: 0.9999997896221097, iteration: 2718
loss: 1.8088722229003906,grad_norm: 0.9999997995331459, iteration: 2719
loss: 2.1206090450286865,grad_norm: 0.9999998060795297, iteration: 2720
loss: 1.9797972440719604,grad_norm: 0.9999998464914933, iteration: 2721
loss: 1.986130952835083,grad_norm: 0.9999997623603333, iteration: 2722
loss: 2.051544427871704,grad_norm: 0.999999826990231, iteration: 2723
loss: 1.9712207317352295,grad_norm: 0.9999998452478829, iteration: 2724
loss: 2.136164903640747,grad_norm: 0.9999998539907257, iteration: 2725
loss: 1.8734042644500732,grad_norm: 0.9999997751867712, iteration: 2726
loss: 1.9919416904449463,grad_norm: 0.999999833118282, iteration: 2727
loss: 1.9559848308563232,grad_norm: 0.9999998001792907, iteration: 2728
loss: 1.8667733669281006,grad_norm: 0.9999999194973174, iteration: 2729
loss: 1.9878544807434082,grad_norm: 0.999999884333031, iteration: 2730
loss: 1.9014616012573242,grad_norm: 0.9999998806815018, iteration: 2731
loss: 2.0016329288482666,grad_norm: 0.9999997970093966, iteration: 2732
loss: 1.8337364196777344,grad_norm: 0.9999998142653161, iteration: 2733
loss: 2.0452804565429688,grad_norm: 0.9999998128365595, iteration: 2734
loss: 1.973726511001587,grad_norm: 0.9999998022293756, iteration: 2735
loss: 2.0013833045959473,grad_norm: 0.9999998419182272, iteration: 2736
loss: 2.072038173675537,grad_norm: 0.9999999550384427, iteration: 2737
loss: 2.141547918319702,grad_norm: 0.9999998735816749, iteration: 2738
loss: 1.9183037281036377,grad_norm: 0.9999998169413913, iteration: 2739
loss: 2.061901330947876,grad_norm: 0.9999998537033582, iteration: 2740
loss: 1.9162365198135376,grad_norm: 0.9999998567841535, iteration: 2741
loss: 1.9528416395187378,grad_norm: 0.9999999083477947, iteration: 2742
loss: 1.9787324666976929,grad_norm: 0.9999998230685487, iteration: 2743
loss: 1.8866580724716187,grad_norm: 0.9999998358608265, iteration: 2744
loss: 2.0516600608825684,grad_norm: 0.9999999018554195, iteration: 2745
loss: 1.8245707750320435,grad_norm: 0.9999998731242711, iteration: 2746
loss: 1.9616174697875977,grad_norm: 0.9999998688278741, iteration: 2747
loss: 2.0179200172424316,grad_norm: 0.9999997848694182, iteration: 2748
loss: 2.095032215118408,grad_norm: 0.9999998424450228, iteration: 2749
loss: 2.021477460861206,grad_norm: 0.9999998285892902, iteration: 2750
loss: 2.215153932571411,grad_norm: 0.9999998359153044, iteration: 2751
loss: 2.1141090393066406,grad_norm: 0.9999998142876871, iteration: 2752
loss: 2.115548849105835,grad_norm: 0.9999997594752686, iteration: 2753
loss: 1.8767378330230713,grad_norm: 0.9999998657102703, iteration: 2754
loss: 1.9580023288726807,grad_norm: 0.9999997815163422, iteration: 2755
loss: 1.9886410236358643,grad_norm: 0.999999925408502, iteration: 2756
loss: 1.9360036849975586,grad_norm: 0.9999998232509242, iteration: 2757
loss: 1.9853440523147583,grad_norm: 0.9999997782945416, iteration: 2758
loss: 2.1438772678375244,grad_norm: 0.9999998288507961, iteration: 2759
loss: 1.9846807718276978,grad_norm: 0.9999998431223166, iteration: 2760
loss: 2.010802984237671,grad_norm: 0.9999997730950428, iteration: 2761
loss: 1.923372507095337,grad_norm: 0.9999997685008192, iteration: 2762
loss: 1.840562105178833,grad_norm: 0.9999998113901032, iteration: 2763
loss: 2.1032190322875977,grad_norm: 0.9999998278939868, iteration: 2764
loss: 1.9600739479064941,grad_norm: 0.9999997731896095, iteration: 2765
loss: 2.0233263969421387,grad_norm: 0.9999997717582484, iteration: 2766
loss: 1.8745110034942627,grad_norm: 0.9999997431041129, iteration: 2767
loss: 1.8860975503921509,grad_norm: 0.9999997445224691, iteration: 2768
loss: 1.9376925230026245,grad_norm: 0.9999998240731754, iteration: 2769
loss: 2.1437973976135254,grad_norm: 0.999999867584593, iteration: 2770
loss: 2.0268335342407227,grad_norm: 0.9999999246827246, iteration: 2771
loss: 1.968244194984436,grad_norm: 0.9999998965082816, iteration: 2772
loss: 1.8536362648010254,grad_norm: 0.9999998724537684, iteration: 2773
loss: 1.9411181211471558,grad_norm: 0.9999997926102979, iteration: 2774
loss: 2.127965211868286,grad_norm: 0.9999998115698097, iteration: 2775
loss: 1.9248170852661133,grad_norm: 0.9999997647649954, iteration: 2776
loss: 1.848347783088684,grad_norm: 0.9999999474650397, iteration: 2777
loss: 1.97383451461792,grad_norm: 0.9999998902722702, iteration: 2778
loss: 2.079526424407959,grad_norm: 0.9999998933929005, iteration: 2779
loss: 1.6883065700531006,grad_norm: 0.9999998859757883, iteration: 2780
loss: 1.8160477876663208,grad_norm: 0.9999998856040657, iteration: 2781
loss: 1.8979586362838745,grad_norm: 0.9999998002318178, iteration: 2782
loss: 1.8812474012374878,grad_norm: 0.9999997678604754, iteration: 2783
loss: 1.890316367149353,grad_norm: 0.9999998893986842, iteration: 2784
loss: 1.7427486181259155,grad_norm: 0.99999978298353, iteration: 2785
loss: 1.8827927112579346,grad_norm: 0.9999998590992236, iteration: 2786
loss: 1.9797823429107666,grad_norm: 0.9999998835009677, iteration: 2787
loss: 2.033066511154175,grad_norm: 0.9999998047268869, iteration: 2788
loss: 2.0796425342559814,grad_norm: 0.999999820581889, iteration: 2789
loss: 1.7512913942337036,grad_norm: 0.9999997874867008, iteration: 2790
loss: 2.1206018924713135,grad_norm: 0.9999998223990777, iteration: 2791
loss: 1.914857029914856,grad_norm: 0.9999999157460807, iteration: 2792
loss: 2.0885891914367676,grad_norm: 0.9999998169784511, iteration: 2793
loss: 1.9345606565475464,grad_norm: 0.9999998656796093, iteration: 2794
loss: 1.9640262126922607,grad_norm: 0.9999998559659863, iteration: 2795
loss: 1.986181616783142,grad_norm: 0.9999998189802337, iteration: 2796
loss: 2.0133261680603027,grad_norm: 0.9999998345580424, iteration: 2797
loss: 1.9159963130950928,grad_norm: 0.9999998366373523, iteration: 2798
loss: 1.99285089969635,grad_norm: 0.9999998308494435, iteration: 2799
loss: 1.9624879360198975,grad_norm: 0.9999997733886493, iteration: 2800
loss: 1.8383086919784546,grad_norm: 0.9999998170529337, iteration: 2801
loss: 1.9384006261825562,grad_norm: 0.9999999436095731, iteration: 2802
loss: 1.8829156160354614,grad_norm: 0.9999998676364291, iteration: 2803
loss: 1.8602536916732788,grad_norm: 0.9999998380881351, iteration: 2804
loss: 1.768576979637146,grad_norm: 0.999999873212208, iteration: 2805
loss: 1.8943772315979004,grad_norm: 0.9999998144766022, iteration: 2806
loss: 1.8908336162567139,grad_norm: 0.9999998504240292, iteration: 2807
loss: 2.085096836090088,grad_norm: 0.9999997820442333, iteration: 2808
loss: 1.7708561420440674,grad_norm: 0.9999998208605961, iteration: 2809
loss: 1.7261273860931396,grad_norm: 0.9999997943737051, iteration: 2810
loss: 1.991296648979187,grad_norm: 0.9999997884246854, iteration: 2811
loss: 1.8701940774917603,grad_norm: 0.999999778521723, iteration: 2812
loss: 2.1019225120544434,grad_norm: 0.9999998767041376, iteration: 2813
loss: 1.9964238405227661,grad_norm: 0.9999998319611614, iteration: 2814
loss: 1.8115150928497314,grad_norm: 0.9999999173719317, iteration: 2815
loss: 1.9158436059951782,grad_norm: 0.9999998938462207, iteration: 2816
loss: 1.9155155420303345,grad_norm: 0.9999998995967162, iteration: 2817
loss: 1.9617403745651245,grad_norm: 0.9999998725503783, iteration: 2818
loss: 2.0725300312042236,grad_norm: 0.9999997926382669, iteration: 2819
loss: 1.9144604206085205,grad_norm: 0.9999998056541294, iteration: 2820
loss: 2.0107429027557373,grad_norm: 0.9999998408979484, iteration: 2821
loss: 1.9530327320098877,grad_norm: 0.9999998130502493, iteration: 2822
loss: 1.9320728778839111,grad_norm: 0.9999998014691012, iteration: 2823
loss: 2.0737411975860596,grad_norm: 0.9999998055412253, iteration: 2824
loss: 1.8364852666854858,grad_norm: 0.9999998027113153, iteration: 2825
loss: 1.9109877347946167,grad_norm: 0.999999781619512, iteration: 2826
loss: 1.9649275541305542,grad_norm: 0.9999998488945151, iteration: 2827
loss: 1.8581115007400513,grad_norm: 0.9999998112364035, iteration: 2828
loss: 2.0221028327941895,grad_norm: 0.9999998239199285, iteration: 2829
loss: 2.0025839805603027,grad_norm: 0.9999998627645987, iteration: 2830
loss: 1.9944974184036255,grad_norm: 0.9999998140720819, iteration: 2831
loss: 1.843803882598877,grad_norm: 0.9999998052865405, iteration: 2832
loss: 2.0387144088745117,grad_norm: 0.9999998929569271, iteration: 2833
loss: 1.828216552734375,grad_norm: 0.999999781933382, iteration: 2834
loss: 2.011882781982422,grad_norm: 0.9999998583725499, iteration: 2835
loss: 1.9152719974517822,grad_norm: 0.9999998453668766, iteration: 2836
loss: 2.0461037158966064,grad_norm: 0.9999999100926916, iteration: 2837
loss: 1.8919100761413574,grad_norm: 0.9999999110864067, iteration: 2838
loss: 1.9485002756118774,grad_norm: 0.9999997288133546, iteration: 2839
loss: 1.8538044691085815,grad_norm: 0.9999997589978152, iteration: 2840
loss: 2.063702344894409,grad_norm: 0.9999998507019571, iteration: 2841
loss: 1.907997965812683,grad_norm: 0.9999998424875566, iteration: 2842
loss: 1.8694814443588257,grad_norm: 0.9999998913542557, iteration: 2843
loss: 1.707938551902771,grad_norm: 0.9999999066762097, iteration: 2844
loss: 1.938676118850708,grad_norm: 0.9999998886117761, iteration: 2845
loss: 2.002798080444336,grad_norm: 0.9999997919089768, iteration: 2846
loss: 1.9849492311477661,grad_norm: 0.9999998609389746, iteration: 2847
loss: 2.0471150875091553,grad_norm: 0.9999998420192363, iteration: 2848
loss: 1.9673442840576172,grad_norm: 0.999999848280537, iteration: 2849
loss: 1.8615440130233765,grad_norm: 0.9999998808531733, iteration: 2850
loss: 1.8547955751419067,grad_norm: 0.9999998352262034, iteration: 2851
loss: 2.072376251220703,grad_norm: 0.999999833442204, iteration: 2852
loss: 1.932249903678894,grad_norm: 0.9999998416434341, iteration: 2853
loss: 2.1006686687469482,grad_norm: 0.9999998935059174, iteration: 2854
loss: 1.8191462755203247,grad_norm: 0.9999998532506444, iteration: 2855
loss: 1.996752142906189,grad_norm: 0.9999997976699704, iteration: 2856
loss: 1.8979125022888184,grad_norm: 0.9999998558954466, iteration: 2857
loss: 2.0079879760742188,grad_norm: 0.9999998574598481, iteration: 2858
loss: 1.9774171113967896,grad_norm: 0.999999804757673, iteration: 2859
loss: 1.9863916635513306,grad_norm: 0.9999997717742575, iteration: 2860
loss: 1.9454737901687622,grad_norm: 0.9999998678747429, iteration: 2861
loss: 1.970646619796753,grad_norm: 0.9999998042664712, iteration: 2862
loss: 2.0268757343292236,grad_norm: 0.9999999101459269, iteration: 2863
loss: 1.8249157667160034,grad_norm: 0.9999998615706363, iteration: 2864
loss: 2.02703595161438,grad_norm: 0.9999999091842914, iteration: 2865
loss: 2.0231213569641113,grad_norm: 0.9999998317183654, iteration: 2866
loss: 1.9812946319580078,grad_norm: 0.9999998164211027, iteration: 2867
loss: 1.9056285619735718,grad_norm: 0.999999811584115, iteration: 2868
loss: 1.7401609420776367,grad_norm: 0.9999998126043786, iteration: 2869
loss: 2.0416886806488037,grad_norm: 0.9999998646930709, iteration: 2870
loss: 1.951547384262085,grad_norm: 0.9999997880634477, iteration: 2871
loss: 1.9842729568481445,grad_norm: 0.9999998090703963, iteration: 2872
loss: 1.8429349660873413,grad_norm: 0.9999998284595057, iteration: 2873
loss: 2.0181162357330322,grad_norm: 0.9999998451299007, iteration: 2874
loss: 1.902174472808838,grad_norm: 0.9999998520176202, iteration: 2875
loss: 1.9371289014816284,grad_norm: 0.9999997720408978, iteration: 2876
loss: 2.02321720123291,grad_norm: 0.9999998287736873, iteration: 2877
loss: 2.0821914672851562,grad_norm: 0.9999998398214887, iteration: 2878
loss: 1.9497390985488892,grad_norm: 0.999999866817759, iteration: 2879
loss: 2.0530290603637695,grad_norm: 0.9999998764693806, iteration: 2880
loss: 1.9357309341430664,grad_norm: 0.999999839034746, iteration: 2881
loss: 1.9360041618347168,grad_norm: 0.9999998112162584, iteration: 2882
loss: 1.86198091506958,grad_norm: 0.9999997803121801, iteration: 2883
loss: 1.9587286710739136,grad_norm: 0.9999998246578795, iteration: 2884
loss: 2.028440475463867,grad_norm: 0.9999998724056606, iteration: 2885
loss: 1.8970166444778442,grad_norm: 0.9999997923609611, iteration: 2886
loss: 1.8805701732635498,grad_norm: 0.9999998992868896, iteration: 2887
loss: 1.8934834003448486,grad_norm: 0.9999998694262155, iteration: 2888
loss: 2.036442995071411,grad_norm: 0.9999998508598688, iteration: 2889
loss: 1.8662782907485962,grad_norm: 0.9999998547041409, iteration: 2890
loss: 1.9330390691757202,grad_norm: 0.99999974892698, iteration: 2891
loss: 1.9008325338363647,grad_norm: 0.9999998087378307, iteration: 2892
loss: 1.9635275602340698,grad_norm: 0.9999997657349206, iteration: 2893
loss: 1.9064465761184692,grad_norm: 0.9999998295781009, iteration: 2894
loss: 1.9090827703475952,grad_norm: 0.9999998536687599, iteration: 2895
loss: 2.0259509086608887,grad_norm: 0.9999998330889798, iteration: 2896
loss: 1.9521892070770264,grad_norm: 0.9999998348550079, iteration: 2897
loss: 1.9293203353881836,grad_norm: 0.9999998521434511, iteration: 2898
loss: 1.9299612045288086,grad_norm: 0.9999997777597498, iteration: 2899
loss: 1.9743915796279907,grad_norm: 0.9999998146376908, iteration: 2900
loss: 1.8187618255615234,grad_norm: 0.9999998128260986, iteration: 2901
loss: 1.9577172994613647,grad_norm: 0.9999998532889612, iteration: 2902
loss: 1.9097596406936646,grad_norm: 0.999999817079567, iteration: 2903
loss: 1.9082826375961304,grad_norm: 0.9999998736412523, iteration: 2904
loss: 1.950142741203308,grad_norm: 0.9999998033685703, iteration: 2905
loss: 1.9425808191299438,grad_norm: 0.9999997921207399, iteration: 2906
loss: 1.9173849821090698,grad_norm: 0.9999998236400126, iteration: 2907
loss: 1.9451278448104858,grad_norm: 0.9999998368431043, iteration: 2908
loss: 2.083667039871216,grad_norm: 0.999999856487038, iteration: 2909
loss: 1.943036437034607,grad_norm: 0.9999998665051752, iteration: 2910
loss: 1.9220924377441406,grad_norm: 0.9999998738904781, iteration: 2911
loss: 1.8768192529678345,grad_norm: 0.9999999193037349, iteration: 2912
loss: 1.9590845108032227,grad_norm: 0.9999998170670429, iteration: 2913
loss: 1.884093165397644,grad_norm: 0.9999998879551913, iteration: 2914
loss: 2.025885581970215,grad_norm: 0.9999999095670618, iteration: 2915
loss: 1.7523844242095947,grad_norm: 0.9999998350268182, iteration: 2916
loss: 1.975782036781311,grad_norm: 0.9999997930507877, iteration: 2917
loss: 1.8477022647857666,grad_norm: 0.9999998134910146, iteration: 2918
loss: 1.8887321949005127,grad_norm: 0.9999999096932025, iteration: 2919
loss: 1.9446378946304321,grad_norm: 0.999999862394056, iteration: 2920
loss: 2.023037910461426,grad_norm: 0.9999998860577521, iteration: 2921
loss: 1.8611119985580444,grad_norm: 0.9999998105335428, iteration: 2922
loss: 1.8491289615631104,grad_norm: 0.9999998136399377, iteration: 2923
loss: 1.9387975931167603,grad_norm: 0.9999998089913412, iteration: 2924
loss: 1.8828030824661255,grad_norm: 0.99999982155151, iteration: 2925
loss: 2.0154035091400146,grad_norm: 0.9999999403067928, iteration: 2926
loss: 1.8571797609329224,grad_norm: 0.9999999083043728, iteration: 2927
loss: 1.799192190170288,grad_norm: 0.9999997585607846, iteration: 2928
loss: 1.9659446477890015,grad_norm: 0.9999998175353304, iteration: 2929
loss: 1.9060946702957153,grad_norm: 0.9999999348968542, iteration: 2930
loss: 1.8581372499465942,grad_norm: 0.9999998522488724, iteration: 2931
loss: 1.9871480464935303,grad_norm: 0.9999997903579716, iteration: 2932
loss: 1.927153468132019,grad_norm: 0.9999998114377906, iteration: 2933
loss: 1.8461403846740723,grad_norm: 0.9999998242960717, iteration: 2934
loss: 1.8565130233764648,grad_norm: 0.9999998532286544, iteration: 2935
loss: 1.9115597009658813,grad_norm: 0.9999999274438663, iteration: 2936
loss: 1.8766742944717407,grad_norm: 0.9999998160111158, iteration: 2937
loss: 1.7667577266693115,grad_norm: 0.9999998781757109, iteration: 2938
loss: 1.6506776809692383,grad_norm: 0.9999998860553561, iteration: 2939
loss: 1.9828665256500244,grad_norm: 0.9999998198964241, iteration: 2940
loss: 1.7510126829147339,grad_norm: 0.9999998284141951, iteration: 2941
loss: 1.9558755159378052,grad_norm: 0.9999997846806352, iteration: 2942
loss: 1.9323817491531372,grad_norm: 0.9999998204059097, iteration: 2943
loss: 1.8643556833267212,grad_norm: 0.9999998709667206, iteration: 2944
loss: 1.889655351638794,grad_norm: 0.999999863186988, iteration: 2945
loss: 2.0998687744140625,grad_norm: 0.9999999021498389, iteration: 2946
loss: 1.798025369644165,grad_norm: 0.9999998353083457, iteration: 2947
loss: 1.9125648736953735,grad_norm: 0.9999998004160967, iteration: 2948
loss: 1.900140404701233,grad_norm: 0.9999997964453171, iteration: 2949
loss: 1.9242013692855835,grad_norm: 0.9999998425070105, iteration: 2950
loss: 1.9170551300048828,grad_norm: 0.9999998137400279, iteration: 2951
loss: 1.8505892753601074,grad_norm: 0.9999997678742343, iteration: 2952
loss: 1.9687358140945435,grad_norm: 0.9999997733416066, iteration: 2953
loss: 1.9443994760513306,grad_norm: 0.9999998295239793, iteration: 2954
loss: 1.8580129146575928,grad_norm: 0.9999998200162462, iteration: 2955
loss: 1.8360799551010132,grad_norm: 0.9999998425288026, iteration: 2956
loss: 2.0129244327545166,grad_norm: 0.9999998126715778, iteration: 2957
loss: 1.8100560903549194,grad_norm: 0.9999998382372838, iteration: 2958
loss: 1.8680485486984253,grad_norm: 0.9999997439256499, iteration: 2959
loss: 1.9321955442428589,grad_norm: 0.9999998287426746, iteration: 2960
loss: 2.0004475116729736,grad_norm: 0.9999998125772367, iteration: 2961
loss: 1.9060627222061157,grad_norm: 0.9999998070752061, iteration: 2962
loss: 1.8125492334365845,grad_norm: 0.9999998472044721, iteration: 2963
loss: 2.030846118927002,grad_norm: 0.9999999076682525, iteration: 2964
loss: 1.8720226287841797,grad_norm: 0.9999997799504243, iteration: 2965
loss: 1.9392993450164795,grad_norm: 0.9999998777396821, iteration: 2966
loss: 1.9352827072143555,grad_norm: 0.9999998533833924, iteration: 2967
loss: 1.7822331190109253,grad_norm: 0.9999998040470204, iteration: 2968
loss: 2.095980405807495,grad_norm: 0.9999998405898325, iteration: 2969
loss: 1.9152443408966064,grad_norm: 0.9999997420053698, iteration: 2970
loss: 1.8859785795211792,grad_norm: 0.9999997876439783, iteration: 2971
loss: 2.061108350753784,grad_norm: 0.9999998010073973, iteration: 2972
loss: 1.937050700187683,grad_norm: 0.9999997898240597, iteration: 2973
loss: 2.015441417694092,grad_norm: 0.9999998190747459, iteration: 2974
loss: 1.8597334623336792,grad_norm: 0.9999998259477899, iteration: 2975
loss: 2.003399610519409,grad_norm: 0.9999998611504745, iteration: 2976
loss: 2.0067355632781982,grad_norm: 0.9999998168636495, iteration: 2977
loss: 1.9313420057296753,grad_norm: 0.9999997046101551, iteration: 2978
loss: 2.006507158279419,grad_norm: 0.9999998481505811, iteration: 2979
loss: 1.9630382061004639,grad_norm: 0.9999998643504221, iteration: 2980
loss: 1.9848392009735107,grad_norm: 0.9999998652564714, iteration: 2981
loss: 1.7937583923339844,grad_norm: 0.999999844150848, iteration: 2982
loss: 1.878955364227295,grad_norm: 0.9999998099733114, iteration: 2983
loss: 1.891688346862793,grad_norm: 0.999999813052853, iteration: 2984
loss: 1.8721035718917847,grad_norm: 0.9999998132966245, iteration: 2985
loss: 1.7893898487091064,grad_norm: 0.9999998409570023, iteration: 2986
loss: 1.8882321119308472,grad_norm: 0.9999998210928595, iteration: 2987
loss: 1.9303412437438965,grad_norm: 0.9999998073049216, iteration: 2988
loss: 1.9858872890472412,grad_norm: 0.999999878188882, iteration: 2989
loss: 1.7903072834014893,grad_norm: 0.999999849807708, iteration: 2990
loss: 1.9462069272994995,grad_norm: 0.9999998204309367, iteration: 2991
loss: 1.8790979385375977,grad_norm: 0.9999998351131472, iteration: 2992
loss: 1.745955467224121,grad_norm: 0.9999997633385688, iteration: 2993
loss: 1.9993841648101807,grad_norm: 0.9999999035075772, iteration: 2994
loss: 1.9149954319000244,grad_norm: 0.9999997816826613, iteration: 2995
loss: 1.9516563415527344,grad_norm: 0.9999998206765421, iteration: 2996
loss: 1.8337022066116333,grad_norm: 0.9999997579584922, iteration: 2997
loss: 1.9310086965560913,grad_norm: 0.9999998034484173, iteration: 2998
loss: 1.9363058805465698,grad_norm: 0.9999998494629996, iteration: 2999
loss: 1.9655424356460571,grad_norm: 0.999999768889291, iteration: 3000
loss: 1.7799371480941772,grad_norm: 0.9999997932752589, iteration: 3001
loss: 1.9477365016937256,grad_norm: 0.9999997988501909, iteration: 3002
loss: 1.8579528331756592,grad_norm: 0.9999998621882097, iteration: 3003
loss: 1.801477313041687,grad_norm: 0.999999802790881, iteration: 3004
loss: 1.93948233127594,grad_norm: 0.9999998424638543, iteration: 3005
loss: 1.9448167085647583,grad_norm: 0.9999998844353619, iteration: 3006
loss: 2.0216472148895264,grad_norm: 0.9999997519091575, iteration: 3007
loss: 1.8098556995391846,grad_norm: 0.9999997811869589, iteration: 3008
loss: 1.8916231393814087,grad_norm: 0.9999998466886112, iteration: 3009
loss: 1.8009635210037231,grad_norm: 0.999999789140901, iteration: 3010
loss: 1.9079937934875488,grad_norm: 0.9999998601207389, iteration: 3011
loss: 1.8819115161895752,grad_norm: 0.9999997692944594, iteration: 3012
loss: 1.74441397190094,grad_norm: 0.9999998431053918, iteration: 3013
loss: 1.8221697807312012,grad_norm: 0.9999998151298082, iteration: 3014
loss: 1.8889634609222412,grad_norm: 0.9999998147058722, iteration: 3015
loss: 1.765977144241333,grad_norm: 0.9999998095000283, iteration: 3016
loss: 1.9880248308181763,grad_norm: 0.9999998178462769, iteration: 3017
loss: 1.7524513006210327,grad_norm: 0.9999998496801128, iteration: 3018
loss: 2.1079137325286865,grad_norm: 0.9999998587895068, iteration: 3019
loss: 1.8800426721572876,grad_norm: 0.9999998531730181, iteration: 3020
loss: 1.834894061088562,grad_norm: 0.9999998209534574, iteration: 3021
loss: 1.9421991109848022,grad_norm: 0.9999998753703231, iteration: 3022
loss: 1.8968223333358765,grad_norm: 0.9999998591814286, iteration: 3023
loss: 1.7642760276794434,grad_norm: 0.9999997979841988, iteration: 3024
loss: 1.754212737083435,grad_norm: 0.9999997988668622, iteration: 3025
loss: 1.8848038911819458,grad_norm: 0.9999998702023583, iteration: 3026
loss: 1.8223137855529785,grad_norm: 0.9999998192908152, iteration: 3027
loss: 2.0239901542663574,grad_norm: 0.9999998230991567, iteration: 3028
loss: 2.027994155883789,grad_norm: 0.9999998254434499, iteration: 3029
loss: 1.8456424474716187,grad_norm: 0.9999998276465631, iteration: 3030
loss: 1.937479019165039,grad_norm: 0.9999998269329993, iteration: 3031
loss: 1.9535846710205078,grad_norm: 0.9999998564206278, iteration: 3032
loss: 2.049853563308716,grad_norm: 0.9999998299766226, iteration: 3033
loss: 1.9139739274978638,grad_norm: 0.9999998285790184, iteration: 3034
loss: 1.893105387687683,grad_norm: 0.9999998885320149, iteration: 3035
loss: 1.8470067977905273,grad_norm: 0.9999998583474977, iteration: 3036
loss: 1.87253999710083,grad_norm: 0.9999997991162822, iteration: 3037
loss: 1.7813419103622437,grad_norm: 0.9999998140272767, iteration: 3038
loss: 1.7986336946487427,grad_norm: 0.9999998552246341, iteration: 3039
loss: 1.9250481128692627,grad_norm: 0.9999998485666487, iteration: 3040
loss: 1.8324791193008423,grad_norm: 0.9999998418796627, iteration: 3041
loss: 1.8794822692871094,grad_norm: 0.9999998454652389, iteration: 3042
loss: 1.7936614751815796,grad_norm: 0.9999997982714922, iteration: 3043
loss: 1.9496617317199707,grad_norm: 0.9999998454305141, iteration: 3044
loss: 1.9231113195419312,grad_norm: 0.999999880070543, iteration: 3045
loss: 1.846996784210205,grad_norm: 0.9999997508763793, iteration: 3046
loss: 1.7649775743484497,grad_norm: 0.9999998639422636, iteration: 3047
loss: 1.8723762035369873,grad_norm: 0.9999998513146733, iteration: 3048
loss: 2.0346009731292725,grad_norm: 0.9999997591016534, iteration: 3049
loss: 1.8241573572158813,grad_norm: 0.9999998390412335, iteration: 3050
loss: 1.7931627035140991,grad_norm: 0.9999998305824152, iteration: 3051
loss: 1.9104318618774414,grad_norm: 0.9999998416593887, iteration: 3052
loss: 1.8277287483215332,grad_norm: 0.9999997966662707, iteration: 3053
loss: 1.9287528991699219,grad_norm: 0.9999999260708184, iteration: 3054
loss: 1.9513269662857056,grad_norm: 0.9999998075375418, iteration: 3055
loss: 1.8644205331802368,grad_norm: 0.9999998618116227, iteration: 3056
loss: 1.830849051475525,grad_norm: 0.9999997868523063, iteration: 3057
loss: 1.986518383026123,grad_norm: 0.9999998019084385, iteration: 3058
loss: 1.798944115638733,grad_norm: 0.9999998563287874, iteration: 3059
loss: 1.8189685344696045,grad_norm: 0.9999998099160345, iteration: 3060
loss: 1.8317654132843018,grad_norm: 0.9999999031287102, iteration: 3061
loss: 1.800179362297058,grad_norm: 0.999999846372045, iteration: 3062
loss: 1.7213751077651978,grad_norm: 0.9999998006198516, iteration: 3063
loss: 1.8595428466796875,grad_norm: 0.9999998342475188, iteration: 3064
loss: 1.7821768522262573,grad_norm: 0.99999976604266, iteration: 3065
loss: 1.8985198736190796,grad_norm: 0.9999998607151176, iteration: 3066
loss: 1.920957326889038,grad_norm: 0.9999998221835202, iteration: 3067
loss: 1.9503848552703857,grad_norm: 0.9999999228242883, iteration: 3068
loss: 1.9233218431472778,grad_norm: 0.999999815598749, iteration: 3069
loss: 1.784114956855774,grad_norm: 0.9999998553741495, iteration: 3070
loss: 1.9273854494094849,grad_norm: 0.9999998590811183, iteration: 3071
loss: 1.8584849834442139,grad_norm: 0.9999998470696442, iteration: 3072
loss: 1.8882073163986206,grad_norm: 0.9999997493536295, iteration: 3073
loss: 1.9181554317474365,grad_norm: 0.9999998792975452, iteration: 3074
loss: 1.8697007894515991,grad_norm: 0.9999998476267016, iteration: 3075
loss: 1.746039867401123,grad_norm: 0.9999998158736709, iteration: 3076
loss: 1.8893548250198364,grad_norm: 0.9999998871570149, iteration: 3077
loss: 1.8926481008529663,grad_norm: 0.9999997857495617, iteration: 3078
loss: 1.9840917587280273,grad_norm: 0.9999997559495305, iteration: 3079
loss: 1.8546618223190308,grad_norm: 0.9999998854178026, iteration: 3080
loss: 1.9327040910720825,grad_norm: 0.9999998488948362, iteration: 3081
loss: 1.8863704204559326,grad_norm: 0.999999767285498, iteration: 3082
loss: 1.859639048576355,grad_norm: 0.9999997857068176, iteration: 3083
loss: 1.857999563217163,grad_norm: 0.9999997969450881, iteration: 3084
loss: 1.8291358947753906,grad_norm: 0.9999998669409998, iteration: 3085
loss: 1.8983736038208008,grad_norm: 0.999999880073873, iteration: 3086
loss: 1.7627698183059692,grad_norm: 0.9999998645707675, iteration: 3087
loss: 1.6872233152389526,grad_norm: 0.9999997819763925, iteration: 3088
loss: 2.027557373046875,grad_norm: 0.9999998956609477, iteration: 3089
loss: 1.7810178995132446,grad_norm: 0.999999799595733, iteration: 3090
loss: 1.8681772947311401,grad_norm: 0.9999998429397251, iteration: 3091
loss: 1.8293031454086304,grad_norm: 0.9999998423915539, iteration: 3092
loss: 1.9041590690612793,grad_norm: 0.999999894802258, iteration: 3093
loss: 1.9168485403060913,grad_norm: 0.9999998276913695, iteration: 3094
loss: 1.8710883855819702,grad_norm: 0.9999997768277997, iteration: 3095
loss: 1.945357084274292,grad_norm: 0.9999998337970661, iteration: 3096
loss: 1.8810962438583374,grad_norm: 0.9999997771876339, iteration: 3097
loss: 1.9140874147415161,grad_norm: 0.9999998339959744, iteration: 3098
loss: 1.693467617034912,grad_norm: 0.9999998184877954, iteration: 3099
loss: 1.8714406490325928,grad_norm: 0.9999998561852127, iteration: 3100
loss: 1.8697811365127563,grad_norm: 0.9999999413259699, iteration: 3101
loss: 1.8431155681610107,grad_norm: 0.9999997668340089, iteration: 3102
loss: 1.8757874965667725,grad_norm: 0.9999998562052401, iteration: 3103
loss: 1.7907472848892212,grad_norm: 0.9999998620904047, iteration: 3104
loss: 2.1157491207122803,grad_norm: 0.9999998062174382, iteration: 3105
loss: 1.7831937074661255,grad_norm: 0.999999865872296, iteration: 3106
loss: 1.8300886154174805,grad_norm: 0.9999998489381998, iteration: 3107
loss: 1.756089210510254,grad_norm: 0.9999997910959685, iteration: 3108
loss: 1.934486985206604,grad_norm: 0.9999997944325776, iteration: 3109
loss: 1.7658897638320923,grad_norm: 0.9999997729477097, iteration: 3110
loss: 1.7885075807571411,grad_norm: 0.999999804543109, iteration: 3111
loss: 1.916250228881836,grad_norm: 0.9999998085630667, iteration: 3112
loss: 1.8830851316452026,grad_norm: 0.9999998081115599, iteration: 3113
loss: 1.8670259714126587,grad_norm: 0.9999997620871306, iteration: 3114
loss: 1.855111837387085,grad_norm: 0.9999998504301253, iteration: 3115
loss: 1.855366587638855,grad_norm: 0.9999998254555131, iteration: 3116
loss: 1.8223439455032349,grad_norm: 0.9999998320295271, iteration: 3117
loss: 1.772140622138977,grad_norm: 0.9999998290352905, iteration: 3118
loss: 1.8102500438690186,grad_norm: 0.9999997908010823, iteration: 3119
loss: 1.8958319425582886,grad_norm: 0.9999997795504525, iteration: 3120
loss: 1.8459184169769287,grad_norm: 0.9999997420762958, iteration: 3121
loss: 1.678186058998108,grad_norm: 0.9999998264659469, iteration: 3122
loss: 1.9057477712631226,grad_norm: 0.9999998616552714, iteration: 3123
loss: 1.9307005405426025,grad_norm: 0.999999858294364, iteration: 3124
loss: 1.9601147174835205,grad_norm: 0.9999998321310761, iteration: 3125
loss: 1.8383214473724365,grad_norm: 0.9999998513716815, iteration: 3126
loss: 1.916709303855896,grad_norm: 0.999999841905213, iteration: 3127
loss: 1.8737245798110962,grad_norm: 0.999999827095796, iteration: 3128
loss: 1.8778702020645142,grad_norm: 0.9999997658083538, iteration: 3129
loss: 1.9487113952636719,grad_norm: 0.9999998246826565, iteration: 3130
loss: 1.6364161968231201,grad_norm: 0.9999997659068308, iteration: 3131
loss: 1.9192054271697998,grad_norm: 0.9999998267475004, iteration: 3132
loss: 1.7444573640823364,grad_norm: 0.9999998180170684, iteration: 3133
loss: 1.8320035934448242,grad_norm: 0.9999998222963684, iteration: 3134
loss: 1.8285599946975708,grad_norm: 0.9999997769299881, iteration: 3135
loss: 1.8858039379119873,grad_norm: 0.9999999264041701, iteration: 3136
loss: 1.688848614692688,grad_norm: 0.9999998266208778, iteration: 3137
loss: 1.909743309020996,grad_norm: 0.9999998673788159, iteration: 3138
loss: 1.9518152475357056,grad_norm: 0.9999998842587546, iteration: 3139
loss: 1.8469343185424805,grad_norm: 0.9999998134692598, iteration: 3140
loss: 1.9187041521072388,grad_norm: 0.9999998331476169, iteration: 3141
loss: 1.9189342260360718,grad_norm: 0.9999998129138091, iteration: 3142
loss: 1.9430885314941406,grad_norm: 0.9999998289055855, iteration: 3143
loss: 1.96311354637146,grad_norm: 0.9999999218342728, iteration: 3144
loss: 1.9295034408569336,grad_norm: 0.9999998384944281, iteration: 3145
loss: 1.9534674882888794,grad_norm: 0.9999997899730778, iteration: 3146
loss: 1.756369709968567,grad_norm: 0.9999998822883397, iteration: 3147
loss: 1.8898720741271973,grad_norm: 0.9999997638813305, iteration: 3148
loss: 1.8690646886825562,grad_norm: 0.9999998588584853, iteration: 3149
loss: 1.7117464542388916,grad_norm: 0.9999998742189213, iteration: 3150
loss: 1.7725551128387451,grad_norm: 0.9999998138355104, iteration: 3151
loss: 1.9622493982315063,grad_norm: 0.9999997795202121, iteration: 3152
loss: 1.8003785610198975,grad_norm: 0.9999998968449748, iteration: 3153
loss: 1.834046483039856,grad_norm: 0.9999997960742091, iteration: 3154
loss: 1.8182159662246704,grad_norm: 0.9999997895007476, iteration: 3155
loss: 1.663960576057434,grad_norm: 0.9999998390249849, iteration: 3156
loss: 2.1112608909606934,grad_norm: 0.9999998928995847, iteration: 3157
loss: 1.96540105342865,grad_norm: 0.9999998679600053, iteration: 3158
loss: 1.791656494140625,grad_norm: 0.9999998808234671, iteration: 3159
loss: 1.792915940284729,grad_norm: 0.9999998952229563, iteration: 3160
loss: 2.0769917964935303,grad_norm: 0.9999998982262297, iteration: 3161
loss: 1.741010069847107,grad_norm: 0.9999998500157755, iteration: 3162
loss: 1.9353866577148438,grad_norm: 0.9999998172679728, iteration: 3163
loss: 1.9029656648635864,grad_norm: 0.9999998445609686, iteration: 3164
loss: 1.8881394863128662,grad_norm: 0.9999998458874549, iteration: 3165
loss: 1.7993097305297852,grad_norm: 0.9999998824703018, iteration: 3166
loss: 1.9152212142944336,grad_norm: 0.9999998512071129, iteration: 3167
loss: 1.680212378501892,grad_norm: 0.9999998328477581, iteration: 3168
loss: 1.7540377378463745,grad_norm: 0.9999998233441617, iteration: 3169
loss: 1.8293782472610474,grad_norm: 0.9999998884008561, iteration: 3170
loss: 1.7918922901153564,grad_norm: 0.9999999440860801, iteration: 3171
loss: 1.8226679563522339,grad_norm: 0.9999997745853628, iteration: 3172
loss: 1.8412114381790161,grad_norm: 0.9999997828425542, iteration: 3173
loss: 1.8773536682128906,grad_norm: 0.99999984636636, iteration: 3174
loss: 1.9786417484283447,grad_norm: 0.9999997856132743, iteration: 3175
loss: 1.8084372282028198,grad_norm: 0.9999997790622381, iteration: 3176
loss: 1.8175175189971924,grad_norm: 0.9999998605865287, iteration: 3177
loss: 1.839246392250061,grad_norm: 0.9999998276246553, iteration: 3178
loss: 1.8925180435180664,grad_norm: 0.9999998230733057, iteration: 3179
loss: 1.787014365196228,grad_norm: 0.9999998057983641, iteration: 3180
loss: 1.7805156707763672,grad_norm: 0.9999999023622994, iteration: 3181
loss: 1.9024252891540527,grad_norm: 0.9999998274663956, iteration: 3182
loss: 1.816840648651123,grad_norm: 0.9999998671813287, iteration: 3183
loss: 1.793283462524414,grad_norm: 0.9999999135807899, iteration: 3184
loss: 2.0251989364624023,grad_norm: 0.9999998178457257, iteration: 3185
loss: 1.9249175786972046,grad_norm: 0.9999998552971078, iteration: 3186
loss: 1.6723428964614868,grad_norm: 0.9999998729403698, iteration: 3187
loss: 1.7694488763809204,grad_norm: 0.9999998887351973, iteration: 3188
loss: 1.7922724485397339,grad_norm: 0.999999781248742, iteration: 3189
loss: 1.8181376457214355,grad_norm: 0.9999997676535557, iteration: 3190
loss: 1.898127555847168,grad_norm: 0.999999895256138, iteration: 3191
loss: 1.9132730960845947,grad_norm: 0.9999998321913701, iteration: 3192
loss: 1.8028191328048706,grad_norm: 0.999999849271894, iteration: 3193
loss: 1.7598880529403687,grad_norm: 0.9999997802862524, iteration: 3194
loss: 1.732926607131958,grad_norm: 0.9999996875836721, iteration: 3195
loss: 1.8652368783950806,grad_norm: 0.9999998719027485, iteration: 3196
loss: 1.7563446760177612,grad_norm: 0.9999998733126801, iteration: 3197
loss: 1.8072807788848877,grad_norm: 0.9999997658455275, iteration: 3198
loss: 1.857576847076416,grad_norm: 0.9999998592797258, iteration: 3199
loss: 1.8462181091308594,grad_norm: 0.999999828419135, iteration: 3200
loss: 1.8157066106796265,grad_norm: 0.9999998615429534, iteration: 3201
loss: 1.7569829225540161,grad_norm: 0.9999999084386982, iteration: 3202
loss: 1.5827640295028687,grad_norm: 0.9999998799801229, iteration: 3203
loss: 1.796465516090393,grad_norm: 0.9999999425099206, iteration: 3204
loss: 2.0218842029571533,grad_norm: 0.9999999257773633, iteration: 3205
loss: 2.0690548419952393,grad_norm: 0.9999997529924992, iteration: 3206
loss: 1.661784052848816,grad_norm: 0.9999998688424762, iteration: 3207
loss: 1.7572710514068604,grad_norm: 0.9999998369033984, iteration: 3208
loss: 2.0285751819610596,grad_norm: 0.9999998441009611, iteration: 3209
loss: 1.9234572649002075,grad_norm: 0.9999998472251103, iteration: 3210
loss: 1.9391719102859497,grad_norm: 0.9999998261420416, iteration: 3211
loss: 1.788017749786377,grad_norm: 0.9999999026715782, iteration: 3212
loss: 1.7971458435058594,grad_norm: 0.9999998073423637, iteration: 3213
loss: 1.817455530166626,grad_norm: 0.9999998612440057, iteration: 3214
loss: 1.861681342124939,grad_norm: 0.9999998752173715, iteration: 3215
loss: 1.8525524139404297,grad_norm: 0.9999999011908538, iteration: 3216
loss: 1.9136509895324707,grad_norm: 0.999999846035766, iteration: 3217
loss: 1.7266923189163208,grad_norm: 0.9999998252000584, iteration: 3218
loss: 1.8677815198898315,grad_norm: 0.999999848214567, iteration: 3219
loss: 1.8373321294784546,grad_norm: 0.9999998393088728, iteration: 3220
loss: 1.9213967323303223,grad_norm: 0.9999997807776656, iteration: 3221
loss: 1.8459745645523071,grad_norm: 0.9999998902229917, iteration: 3222
loss: 1.8962019681930542,grad_norm: 0.9999997513584719, iteration: 3223
loss: 1.823263168334961,grad_norm: 0.9999998636686501, iteration: 3224
loss: 1.9583290815353394,grad_norm: 0.9999998781797516, iteration: 3225
loss: 1.755922555923462,grad_norm: 0.9999997793608357, iteration: 3226
loss: 1.8907655477523804,grad_norm: 0.9999998635401498, iteration: 3227
loss: 1.7995623350143433,grad_norm: 0.9999997929758577, iteration: 3228
loss: 1.744020700454712,grad_norm: 0.9999997706125643, iteration: 3229
loss: 1.825322151184082,grad_norm: 0.9999998524135434, iteration: 3230
loss: 1.7021985054016113,grad_norm: 0.9999998552510994, iteration: 3231
loss: 1.9571964740753174,grad_norm: 0.9999998077762685, iteration: 3232
loss: 1.7148975133895874,grad_norm: 0.999999741659069, iteration: 3233
loss: 1.8555519580841064,grad_norm: 0.9999998779488092, iteration: 3234
loss: 1.9629796743392944,grad_norm: 0.9999999235844517, iteration: 3235
loss: 1.8437504768371582,grad_norm: 0.999999796223229, iteration: 3236
loss: 1.9120652675628662,grad_norm: 0.9999997701354466, iteration: 3237
loss: 1.8512998819351196,grad_norm: 0.999999778591302, iteration: 3238
loss: 1.700516700744629,grad_norm: 0.9999998754560091, iteration: 3239
loss: 1.852877140045166,grad_norm: 0.9999998081297853, iteration: 3240
loss: 1.757395625114441,grad_norm: 0.9999998411131277, iteration: 3241
loss: 1.9045076370239258,grad_norm: 0.9999998352604417, iteration: 3242
loss: 1.896116018295288,grad_norm: 0.9999998510317265, iteration: 3243
loss: 1.84932279586792,grad_norm: 0.9999997583260672, iteration: 3244
loss: 1.8920307159423828,grad_norm: 0.9999997970634996, iteration: 3245
loss: 1.8298094272613525,grad_norm: 0.9999997949497867, iteration: 3246
loss: 1.743120789527893,grad_norm: 0.9999998702802387, iteration: 3247
loss: 1.893730640411377,grad_norm: 0.9999997583464229, iteration: 3248
loss: 1.8441516160964966,grad_norm: 0.9999998622922126, iteration: 3249
loss: 1.7860866785049438,grad_norm: 0.999999854515286, iteration: 3250
loss: 1.8570423126220703,grad_norm: 0.9999997917944659, iteration: 3251
loss: 1.931232213973999,grad_norm: 0.9999997886926685, iteration: 3252
loss: 1.9142812490463257,grad_norm: 0.9999997846500971, iteration: 3253
loss: 1.8852561712265015,grad_norm: 0.9999997752537879, iteration: 3254
loss: 1.877192497253418,grad_norm: 0.9999998645358169, iteration: 3255
loss: 1.7666107416152954,grad_norm: 0.9999998201737601, iteration: 3256
loss: 1.8159433603286743,grad_norm: 0.9999997570437227, iteration: 3257
loss: 1.834324598312378,grad_norm: 0.9999998359006199, iteration: 3258
loss: 1.9207550287246704,grad_norm: 0.9999998537284984, iteration: 3259
loss: 1.906012773513794,grad_norm: 0.9999998485905697, iteration: 3260
loss: 1.667574167251587,grad_norm: 0.9999998477763452, iteration: 3261
loss: 1.7321751117706299,grad_norm: 0.9999997884828447, iteration: 3262
loss: 1.7568700313568115,grad_norm: 0.9999998638832883, iteration: 3263
loss: 1.794737696647644,grad_norm: 0.9999998185414898, iteration: 3264
loss: 1.912100911140442,grad_norm: 0.9999998583747229, iteration: 3265
loss: 2.02956485748291,grad_norm: 0.9999998587221858, iteration: 3266
loss: 1.6746858358383179,grad_norm: 0.9999998159756808, iteration: 3267
loss: 1.922685146331787,grad_norm: 0.9999999289269685, iteration: 3268
loss: 1.794870376586914,grad_norm: 0.9999998262101778, iteration: 3269
loss: 1.5395945310592651,grad_norm: 0.9999998786596422, iteration: 3270
loss: 1.8164907693862915,grad_norm: 0.9999997313819591, iteration: 3271
loss: 1.857638955116272,grad_norm: 0.9999998441467449, iteration: 3272
loss: 1.8191697597503662,grad_norm: 0.999999839953503, iteration: 3273
loss: 2.087698221206665,grad_norm: 0.9999998099084316, iteration: 3274
loss: 1.8421574831008911,grad_norm: 0.9999998217024493, iteration: 3275
loss: 1.9396294355392456,grad_norm: 0.9999999276045014, iteration: 3276
loss: 1.9602493047714233,grad_norm: 0.9999998069133013, iteration: 3277
loss: 1.9139763116836548,grad_norm: 0.9999998467597707, iteration: 3278
loss: 1.8491687774658203,grad_norm: 0.9999998056324938, iteration: 3279
loss: 1.8535195589065552,grad_norm: 0.9999997892411754, iteration: 3280
loss: 1.9226415157318115,grad_norm: 0.9999997830585788, iteration: 3281
loss: 1.8509243726730347,grad_norm: 0.9999998235748325, iteration: 3282
loss: 1.8135322332382202,grad_norm: 0.9999998194309677, iteration: 3283
loss: 1.8535680770874023,grad_norm: 0.9999999142069496, iteration: 3284
loss: 1.8275507688522339,grad_norm: 0.999999805895317, iteration: 3285
loss: 1.7354949712753296,grad_norm: 0.9999998589353594, iteration: 3286
loss: 1.9201525449752808,grad_norm: 0.9999998502737502, iteration: 3287
loss: 1.7637871503829956,grad_norm: 0.9999998088049962, iteration: 3288
loss: 1.931445837020874,grad_norm: 0.9999998884728157, iteration: 3289
loss: 1.8075956106185913,grad_norm: 0.9999998548691638, iteration: 3290
loss: 1.6729556322097778,grad_norm: 0.9999997744813395, iteration: 3291
loss: 1.8198301792144775,grad_norm: 0.9999998680026887, iteration: 3292
loss: 1.896727442741394,grad_norm: 0.9999999314158963, iteration: 3293
loss: 1.8374711275100708,grad_norm: 0.9999998351387706, iteration: 3294
loss: 1.6964699029922485,grad_norm: 0.9999998105931326, iteration: 3295
loss: 1.8776262998580933,grad_norm: 0.999999917720033, iteration: 3296
loss: 1.9383291006088257,grad_norm: 0.9999998018492674, iteration: 3297
loss: 1.7901324033737183,grad_norm: 0.9999997677897456, iteration: 3298
loss: 1.8066887855529785,grad_norm: 0.999999782249114, iteration: 3299
loss: 1.7863926887512207,grad_norm: 0.9999998073901387, iteration: 3300
loss: 1.8962903022766113,grad_norm: 0.9999998237592364, iteration: 3301
loss: 1.7600135803222656,grad_norm: 0.9999998767548985, iteration: 3302
loss: 1.7912200689315796,grad_norm: 0.9999998604217917, iteration: 3303
loss: 1.7005354166030884,grad_norm: 0.9999998322730755, iteration: 3304
loss: 1.7889870405197144,grad_norm: 0.9999997966062034, iteration: 3305
loss: 1.7693103551864624,grad_norm: 0.9999997877706449, iteration: 3306
loss: 1.880031704902649,grad_norm: 0.9999998179380595, iteration: 3307
loss: 1.6510720252990723,grad_norm: 0.9999997913811159, iteration: 3308
loss: 1.7564619779586792,grad_norm: 0.9999998562608231, iteration: 3309
loss: 1.7142188549041748,grad_norm: 0.9999998320595698, iteration: 3310
loss: 1.8042069673538208,grad_norm: 0.9999998059339233, iteration: 3311
loss: 1.8195832967758179,grad_norm: 0.9999998898084619, iteration: 3312
loss: 1.813793420791626,grad_norm: 0.999999831598791, iteration: 3313
loss: 1.9491662979125977,grad_norm: 0.9999998288341172, iteration: 3314
loss: 1.6793103218078613,grad_norm: 0.9999998654490488, iteration: 3315
loss: 1.8085302114486694,grad_norm: 0.9999997744051421, iteration: 3316
loss: 1.8100138902664185,grad_norm: 0.9999998479670948, iteration: 3317
loss: 1.9065773487091064,grad_norm: 0.9999998392734443, iteration: 3318
loss: 1.8581836223602295,grad_norm: 0.9999998244404419, iteration: 3319
loss: 1.89235258102417,grad_norm: 0.9999997939309677, iteration: 3320
loss: 1.8511492013931274,grad_norm: 0.9999998244650077, iteration: 3321
loss: 1.8592575788497925,grad_norm: 0.9999998262954651, iteration: 3322
loss: 1.8989263772964478,grad_norm: 0.9999997874504484, iteration: 3323
loss: 1.8942314386367798,grad_norm: 0.9999998210911987, iteration: 3324
loss: 1.863425850868225,grad_norm: 0.9999997988739676, iteration: 3325
loss: 1.7464300394058228,grad_norm: 0.99999980470286, iteration: 3326
loss: 1.7729692459106445,grad_norm: 0.9999998184170741, iteration: 3327
loss: 1.794150471687317,grad_norm: 0.9999998458551465, iteration: 3328
loss: 1.7223888635635376,grad_norm: 0.9999997868471755, iteration: 3329
loss: 1.8933203220367432,grad_norm: 0.9999998695857486, iteration: 3330
loss: 1.8949694633483887,grad_norm: 0.9999998434943366, iteration: 3331
loss: 1.7599225044250488,grad_norm: 0.9999998222532719, iteration: 3332
loss: 1.9371308088302612,grad_norm: 0.9999998226328657, iteration: 3333
loss: 1.9138365983963013,grad_norm: 0.9999998401192235, iteration: 3334
loss: 1.8034135103225708,grad_norm: 0.9999998452261823, iteration: 3335
loss: 1.9098280668258667,grad_norm: 0.9999998483105585, iteration: 3336
loss: 1.8232762813568115,grad_norm: 0.9999998443541687, iteration: 3337
loss: 1.605944037437439,grad_norm: 0.999999816054232, iteration: 3338
loss: 1.7911109924316406,grad_norm: 0.9999997621178857, iteration: 3339
loss: 1.7409522533416748,grad_norm: 0.9999999050490719, iteration: 3340
loss: 1.7826857566833496,grad_norm: 0.9999998248754266, iteration: 3341
loss: 1.844089150428772,grad_norm: 0.9999998213383892, iteration: 3342
loss: 1.8478329181671143,grad_norm: 0.9999997356252193, iteration: 3343
loss: 1.8135181665420532,grad_norm: 0.9999997412879845, iteration: 3344
loss: 1.67116117477417,grad_norm: 0.9999998969843764, iteration: 3345
loss: 1.688046932220459,grad_norm: 0.9999999054101132, iteration: 3346
loss: 1.771685004234314,grad_norm: 0.9999998116123273, iteration: 3347
loss: 1.8822702169418335,grad_norm: 0.9999998272029148, iteration: 3348
loss: 1.9856979846954346,grad_norm: 0.9999998939666583, iteration: 3349
loss: 1.7961870431900024,grad_norm: 0.9999999020521793, iteration: 3350
loss: 1.8186620473861694,grad_norm: 0.9999998921449618, iteration: 3351
loss: 1.7752952575683594,grad_norm: 0.9999997755618644, iteration: 3352
loss: 1.8031892776489258,grad_norm: 0.9999997723918079, iteration: 3353
loss: 1.8585641384124756,grad_norm: 0.9999997788104159, iteration: 3354
loss: 1.8619365692138672,grad_norm: 0.9999998618656378, iteration: 3355
loss: 1.6444358825683594,grad_norm: 0.9999998580140363, iteration: 3356
loss: 1.673377275466919,grad_norm: 0.9999998634295757, iteration: 3357
loss: 1.7035937309265137,grad_norm: 0.9999997565968928, iteration: 3358
loss: 1.9155703783035278,grad_norm: 0.9999998696188271, iteration: 3359
loss: 1.7813969850540161,grad_norm: 0.9999997886224685, iteration: 3360
loss: 1.8703476190567017,grad_norm: 0.9999998484414478, iteration: 3361
loss: 1.696478247642517,grad_norm: 0.9999997834804022, iteration: 3362
loss: 1.81551194190979,grad_norm: 0.9999998443227592, iteration: 3363
loss: 1.7165122032165527,grad_norm: 0.9999998319316125, iteration: 3364
loss: 1.7178622484207153,grad_norm: 0.9999998764607997, iteration: 3365
loss: 1.8954633474349976,grad_norm: 0.9999998063426133, iteration: 3366
loss: 1.7364763021469116,grad_norm: 0.9999998599243609, iteration: 3367
loss: 1.7894750833511353,grad_norm: 0.9999998489783148, iteration: 3368
loss: 1.8487955331802368,grad_norm: 0.9999999058774859, iteration: 3369
loss: 1.8521807193756104,grad_norm: 0.9999998606342501, iteration: 3370
loss: 1.9570651054382324,grad_norm: 0.9999998211568226, iteration: 3371
loss: 1.8407163619995117,grad_norm: 0.9999998162350577, iteration: 3372
loss: 1.7502129077911377,grad_norm: 0.9999998702973907, iteration: 3373
loss: 1.7493983507156372,grad_norm: 0.9999997845709412, iteration: 3374
loss: 1.865911602973938,grad_norm: 0.9999998228254713, iteration: 3375
loss: 1.8317251205444336,grad_norm: 0.9999998175027931, iteration: 3376
loss: 1.683675765991211,grad_norm: 0.9999998321148662, iteration: 3377
loss: 1.9654098749160767,grad_norm: 0.9999998536083659, iteration: 3378
loss: 1.7442336082458496,grad_norm: 0.9999997957161813, iteration: 3379
loss: 1.862411379814148,grad_norm: 0.999999833087704, iteration: 3380
loss: 1.7767698764801025,grad_norm: 0.9999998164761601, iteration: 3381
loss: 1.6778706312179565,grad_norm: 0.9999997413052606, iteration: 3382
loss: 1.7583884000778198,grad_norm: 0.9999997600581844, iteration: 3383
loss: 1.940228819847107,grad_norm: 0.9999997725422219, iteration: 3384
loss: 1.8565466403961182,grad_norm: 0.9999997952118477, iteration: 3385
loss: 1.8799993991851807,grad_norm: 0.9999998660969209, iteration: 3386
loss: 1.8918571472167969,grad_norm: 0.9999998109215442, iteration: 3387
loss: 1.7023411989212036,grad_norm: 0.9999997723778257, iteration: 3388
loss: 1.7557920217514038,grad_norm: 0.9999997626672114, iteration: 3389
loss: 1.7616112232208252,grad_norm: 0.9999998314099913, iteration: 3390
loss: 1.7438774108886719,grad_norm: 0.9999997191801641, iteration: 3391
loss: 1.714984655380249,grad_norm: 0.9999998472198524, iteration: 3392
loss: 1.7489337921142578,grad_norm: 0.9999998322355552, iteration: 3393
loss: 1.785081386566162,grad_norm: 0.999999765067117, iteration: 3394
loss: 1.8138771057128906,grad_norm: 0.9999998423316024, iteration: 3395
loss: 1.6584434509277344,grad_norm: 0.9999998006007853, iteration: 3396
loss: 1.660649299621582,grad_norm: 0.9999998055968401, iteration: 3397
loss: 1.914039969444275,grad_norm: 0.999999865573316, iteration: 3398
loss: 1.8226025104522705,grad_norm: 0.999999799034747, iteration: 3399
loss: 1.8156312704086304,grad_norm: 0.9999997912043526, iteration: 3400
loss: 1.5289994478225708,grad_norm: 0.9999998338651936, iteration: 3401
loss: 1.960198163986206,grad_norm: 0.9999997957609456, iteration: 3402
loss: 1.6979519128799438,grad_norm: 0.9999998999844424, iteration: 3403
loss: 1.8108019828796387,grad_norm: 0.9999998630227034, iteration: 3404
loss: 1.5566986799240112,grad_norm: 0.99999974683059, iteration: 3405
loss: 1.6707955598831177,grad_norm: 0.999999817353099, iteration: 3406
loss: 1.7987362146377563,grad_norm: 0.9999998610791733, iteration: 3407
loss: 1.7981252670288086,grad_norm: 0.9999998416543677, iteration: 3408
loss: 1.9094470739364624,grad_norm: 0.9999998443878262, iteration: 3409
loss: 1.898164987564087,grad_norm: 0.9999998514756846, iteration: 3410
loss: 1.86387038230896,grad_norm: 0.999999761196014, iteration: 3411
loss: 1.8688997030258179,grad_norm: 0.9999997949335863, iteration: 3412
loss: 1.803640604019165,grad_norm: 0.9999997565697958, iteration: 3413
loss: 1.84632408618927,grad_norm: 0.9999998198254859, iteration: 3414
loss: 1.9079761505126953,grad_norm: 0.9999997982018243, iteration: 3415
loss: 1.65230131149292,grad_norm: 0.999999753653295, iteration: 3416
loss: 1.8500328063964844,grad_norm: 0.9999997891738915, iteration: 3417
loss: 1.8848737478256226,grad_norm: 0.9999998799074699, iteration: 3418
loss: 1.7125247716903687,grad_norm: 0.9999998206534277, iteration: 3419
loss: 1.7665644884109497,grad_norm: 0.999999873163641, iteration: 3420
loss: 1.7438243627548218,grad_norm: 0.999999827538352, iteration: 3421
loss: 1.7328003644943237,grad_norm: 0.9999997603353092, iteration: 3422
loss: 1.709026575088501,grad_norm: 0.9999999058340525, iteration: 3423
loss: 1.8782283067703247,grad_norm: 0.9999998604620752, iteration: 3424
loss: 1.6751822233200073,grad_norm: 0.9999998520790645, iteration: 3425
loss: 1.733228325843811,grad_norm: 0.9999998122765853, iteration: 3426
loss: 1.9282487630844116,grad_norm: 0.9999998286037054, iteration: 3427
loss: 1.8562191724777222,grad_norm: 0.9999998048405002, iteration: 3428
loss: 1.8666880130767822,grad_norm: 0.9999998580864219, iteration: 3429
loss: 1.770452618598938,grad_norm: 0.9999998287631436, iteration: 3430
loss: 1.7684600353240967,grad_norm: 0.9999998873336882, iteration: 3431
loss: 1.8271733522415161,grad_norm: 0.9999998017763196, iteration: 3432
loss: 1.9606705904006958,grad_norm: 0.9999997422010506, iteration: 3433
loss: 1.6238882541656494,grad_norm: 0.9999998537352068, iteration: 3434
loss: 1.772527813911438,grad_norm: 0.9999998015652986, iteration: 3435
loss: 1.8541159629821777,grad_norm: 0.9999998242703216, iteration: 3436
loss: 1.712181806564331,grad_norm: 0.999999713292816, iteration: 3437
loss: 1.6511924266815186,grad_norm: 0.9999998613474606, iteration: 3438
loss: 1.8127168416976929,grad_norm: 0.9999999017518048, iteration: 3439
loss: 1.7125637531280518,grad_norm: 0.9999998173496345, iteration: 3440
loss: 1.677707552909851,grad_norm: 0.999999759172237, iteration: 3441
loss: 1.7077327966690063,grad_norm: 0.9999998479761432, iteration: 3442
loss: 1.766198992729187,grad_norm: 0.999999808407258, iteration: 3443
loss: 1.8803728818893433,grad_norm: 0.999999858075578, iteration: 3444
loss: 1.8050520420074463,grad_norm: 0.9999999267161656, iteration: 3445
loss: 1.7426139116287231,grad_norm: 0.9999998227689278, iteration: 3446
loss: 1.8522523641586304,grad_norm: 0.9999998943935396, iteration: 3447
loss: 1.7630583047866821,grad_norm: 0.999999836729046, iteration: 3448
loss: 1.7275470495224,grad_norm: 0.9999998863490884, iteration: 3449
loss: 1.7430458068847656,grad_norm: 0.9999998576298323, iteration: 3450
loss: 1.811651587486267,grad_norm: 0.9999998331573001, iteration: 3451
loss: 1.8263356685638428,grad_norm: 0.9999998485122236, iteration: 3452
loss: 1.8611295223236084,grad_norm: 0.9999998444482175, iteration: 3453
loss: 1.9268269538879395,grad_norm: 0.9999998126692125, iteration: 3454
loss: 1.7464452981948853,grad_norm: 0.999999837765002, iteration: 3455
loss: 1.7850643396377563,grad_norm: 0.9999998097065519, iteration: 3456
loss: 1.6271659135818481,grad_norm: 0.9999999096793452, iteration: 3457
loss: 1.7587522268295288,grad_norm: 0.9999997949345124, iteration: 3458
loss: 1.894487738609314,grad_norm: 0.9999998395671182, iteration: 3459
loss: 1.720178246498108,grad_norm: 0.9999998531248753, iteration: 3460
loss: 1.6458271741867065,grad_norm: 0.9999997662978072, iteration: 3461
loss: 1.6508911848068237,grad_norm: 0.9999997784874116, iteration: 3462
loss: 1.7137398719787598,grad_norm: 0.9999998771636542, iteration: 3463
loss: 1.7722021341323853,grad_norm: 0.999999757996088, iteration: 3464
loss: 1.9243158102035522,grad_norm: 0.9999998576922647, iteration: 3465
loss: 1.8940999507904053,grad_norm: 0.9999998886607533, iteration: 3466
loss: 1.7118085622787476,grad_norm: 0.9999998908755103, iteration: 3467
loss: 1.7531898021697998,grad_norm: 0.9999997876113705, iteration: 3468
loss: 1.6816811561584473,grad_norm: 0.9999998007830125, iteration: 3469
loss: 1.8578263521194458,grad_norm: 0.999999813597803, iteration: 3470
loss: 1.627482533454895,grad_norm: 0.9999998145480155, iteration: 3471
loss: 1.9073339700698853,grad_norm: 0.9999997775080158, iteration: 3472
loss: 1.6915611028671265,grad_norm: 0.9999998023847435, iteration: 3473
loss: 1.807918667793274,grad_norm: 0.9999998161376648, iteration: 3474
loss: 1.8164745569229126,grad_norm: 0.9999998374829927, iteration: 3475
loss: 1.732008934020996,grad_norm: 0.999999764612486, iteration: 3476
loss: 1.8154020309448242,grad_norm: 0.9999997948778806, iteration: 3477
loss: 1.7437063455581665,grad_norm: 0.9999997527422491, iteration: 3478
loss: 1.7285388708114624,grad_norm: 0.99999978643742, iteration: 3479
loss: 1.7243781089782715,grad_norm: 0.9999998167342619, iteration: 3480
loss: 1.5363179445266724,grad_norm: 0.9999999070223705, iteration: 3481
loss: 1.8567692041397095,grad_norm: 0.9999997900418838, iteration: 3482
loss: 1.7363812923431396,grad_norm: 0.9999998163961503, iteration: 3483
loss: 1.6790359020233154,grad_norm: 0.9999997228888278, iteration: 3484
loss: 1.846638798713684,grad_norm: 0.999999908656754, iteration: 3485
loss: 1.5681610107421875,grad_norm: 0.9999998863320292, iteration: 3486
loss: 1.876888394355774,grad_norm: 0.9999998170169728, iteration: 3487
loss: 1.8204890489578247,grad_norm: 0.9999997697459712, iteration: 3488
loss: 1.7296717166900635,grad_norm: 0.9999997954340337, iteration: 3489
loss: 1.7790648937225342,grad_norm: 0.9999998656735757, iteration: 3490
loss: 1.69258451461792,grad_norm: 0.9999997681558123, iteration: 3491
loss: 1.8157228231430054,grad_norm: 0.9999998401312913, iteration: 3492
loss: 1.6847100257873535,grad_norm: 0.9999998743548494, iteration: 3493
loss: 1.8867870569229126,grad_norm: 0.9999998171142964, iteration: 3494
loss: 1.8393423557281494,grad_norm: 0.9999998158926495, iteration: 3495
loss: 1.7202473878860474,grad_norm: 0.9999998215548597, iteration: 3496
loss: 1.6896775960922241,grad_norm: 0.9999998396225053, iteration: 3497
loss: 1.6518170833587646,grad_norm: 0.9999997973184599, iteration: 3498
loss: 1.9033042192459106,grad_norm: 0.9999998283188779, iteration: 3499
loss: 1.8352044820785522,grad_norm: 0.999999876959661, iteration: 3500
loss: 1.8657927513122559,grad_norm: 0.9999999249895941, iteration: 3501
loss: 1.796769618988037,grad_norm: 0.9999998200441684, iteration: 3502
loss: 1.7584761381149292,grad_norm: 0.9999997968487528, iteration: 3503
loss: 1.7420614957809448,grad_norm: 0.9999998132912127, iteration: 3504
loss: 1.714766263961792,grad_norm: 0.9999998297282383, iteration: 3505
loss: 1.7790374755859375,grad_norm: 0.999999842955066, iteration: 3506
loss: 1.6569039821624756,grad_norm: 0.9999997954684122, iteration: 3507
loss: 1.585530161857605,grad_norm: 0.9999998472541033, iteration: 3508
loss: 1.7578363418579102,grad_norm: 0.9999998395308414, iteration: 3509
loss: 1.8163455724716187,grad_norm: 0.9999998477402883, iteration: 3510
loss: 1.70194673538208,grad_norm: 0.9999998464545833, iteration: 3511
loss: 1.9529646635055542,grad_norm: 0.9999999118949865, iteration: 3512
loss: 1.8021055459976196,grad_norm: 0.9999998151075167, iteration: 3513
loss: 1.7195664644241333,grad_norm: 0.999999763987062, iteration: 3514
loss: 1.8565185070037842,grad_norm: 0.9999998334240543, iteration: 3515
loss: 1.7560229301452637,grad_norm: 0.9999998104022557, iteration: 3516
loss: 1.9228177070617676,grad_norm: 0.9999997819742402, iteration: 3517
loss: 1.8418550491333008,grad_norm: 0.9999997987071362, iteration: 3518
loss: 1.8243356943130493,grad_norm: 0.999999862320384, iteration: 3519
loss: 1.7330443859100342,grad_norm: 0.9999998419909779, iteration: 3520
loss: 1.7110551595687866,grad_norm: 0.9999998650971715, iteration: 3521
loss: 1.8773565292358398,grad_norm: 0.999999869301996, iteration: 3522
loss: 1.8734464645385742,grad_norm: 0.9999997583561197, iteration: 3523
loss: 1.8565593957901,grad_norm: 0.9999998647069283, iteration: 3524
loss: 1.6957159042358398,grad_norm: 0.9999997581447531, iteration: 3525
loss: 1.7662913799285889,grad_norm: 0.9999998357737133, iteration: 3526
loss: 1.7681951522827148,grad_norm: 0.9999997888770414, iteration: 3527
loss: 1.8008571863174438,grad_norm: 0.9999997657911811, iteration: 3528
loss: 1.667449951171875,grad_norm: 0.9999998467659967, iteration: 3529
loss: 1.7833701372146606,grad_norm: 0.9999998446707059, iteration: 3530
loss: 1.7900266647338867,grad_norm: 0.9999999100819971, iteration: 3531
loss: 1.7428951263427734,grad_norm: 0.9999997849701096, iteration: 3532
loss: 1.7013392448425293,grad_norm: 0.9999997966302403, iteration: 3533
loss: 1.6715726852416992,grad_norm: 0.9999998536444807, iteration: 3534
loss: 1.646503210067749,grad_norm: 0.9999997678840724, iteration: 3535
loss: 1.658003807067871,grad_norm: 0.9999998620112391, iteration: 3536
loss: 1.5893675088882446,grad_norm: 0.9999998182341281, iteration: 3537
loss: 1.7801463603973389,grad_norm: 0.9999998498923532, iteration: 3538
loss: 1.8711230754852295,grad_norm: 0.9999998565910749, iteration: 3539
loss: 1.835914134979248,grad_norm: 0.9999998339237961, iteration: 3540
loss: 1.7693718671798706,grad_norm: 0.9999998717169599, iteration: 3541
loss: 1.7277214527130127,grad_norm: 0.9999999126025344, iteration: 3542
loss: 1.7721483707427979,grad_norm: 0.999999822683424, iteration: 3543
loss: 1.7105430364608765,grad_norm: 0.9999998139134861, iteration: 3544
loss: 1.688300371170044,grad_norm: 0.9999998296768591, iteration: 3545
loss: 1.7376428842544556,grad_norm: 0.9999997937410228, iteration: 3546
loss: 1.5880100727081299,grad_norm: 0.9999997888269699, iteration: 3547
loss: 1.7090762853622437,grad_norm: 0.9999997684133796, iteration: 3548
loss: 1.7074404954910278,grad_norm: 0.9999997431745309, iteration: 3549
loss: 1.6617319583892822,grad_norm: 0.9999998558283895, iteration: 3550
loss: 1.7740720510482788,grad_norm: 0.9999998754249159, iteration: 3551
loss: 1.746902346611023,grad_norm: 0.999999798777306, iteration: 3552
loss: 1.6947802305221558,grad_norm: 0.9999998584528034, iteration: 3553
loss: 1.8037140369415283,grad_norm: 0.9999998011636643, iteration: 3554
loss: 1.6839728355407715,grad_norm: 0.9999997778519993, iteration: 3555
loss: 1.7582955360412598,grad_norm: 0.9999999378891815, iteration: 3556
loss: 1.6775974035263062,grad_norm: 0.9999997717791198, iteration: 3557
loss: 1.665101408958435,grad_norm: 0.9999998291961947, iteration: 3558
loss: 1.9183766841888428,grad_norm: 0.9999998968551731, iteration: 3559
loss: 1.896528959274292,grad_norm: 0.9999998377688288, iteration: 3560
loss: 1.789258360862732,grad_norm: 0.9999998296472103, iteration: 3561
loss: 1.883052945137024,grad_norm: 0.9999998442972969, iteration: 3562
loss: 1.662000298500061,grad_norm: 0.9999998065911035, iteration: 3563
loss: 1.6867834329605103,grad_norm: 0.9999998558638209, iteration: 3564
loss: 1.7076022624969482,grad_norm: 0.9999997139978856, iteration: 3565
loss: 1.797192096710205,grad_norm: 0.9999999077420098, iteration: 3566
loss: 1.7690874338150024,grad_norm: 0.9999999034027841, iteration: 3567
loss: 1.7971763610839844,grad_norm: 0.999999804237504, iteration: 3568
loss: 1.7336369752883911,grad_norm: 0.9999998338876936, iteration: 3569
loss: 1.5850279331207275,grad_norm: 0.9999998142380562, iteration: 3570
loss: 1.7037564516067505,grad_norm: 0.9999999046826593, iteration: 3571
loss: 1.7631683349609375,grad_norm: 0.9999998864310375, iteration: 3572
loss: 1.743204116821289,grad_norm: 0.999999848086694, iteration: 3573
loss: 1.7150019407272339,grad_norm: 0.9999997872628804, iteration: 3574
loss: 1.6287593841552734,grad_norm: 0.9999998259042732, iteration: 3575
loss: 1.7746896743774414,grad_norm: 0.9999997623859804, iteration: 3576
loss: 1.7968281507492065,grad_norm: 0.9999998827253106, iteration: 3577
loss: 1.8099907636642456,grad_norm: 0.9999998426914395, iteration: 3578
loss: 1.7338109016418457,grad_norm: 0.9999997896887, iteration: 3579
loss: 1.7396037578582764,grad_norm: 0.9999998050674906, iteration: 3580
loss: 1.8573609590530396,grad_norm: 0.9999998405031129, iteration: 3581
loss: 1.7818262577056885,grad_norm: 0.9999998662594861, iteration: 3582
loss: 1.786537766456604,grad_norm: 0.9999998582928844, iteration: 3583
loss: 1.6632322072982788,grad_norm: 0.9999997839790397, iteration: 3584
loss: 1.6103681325912476,grad_norm: 0.9999998527892265, iteration: 3585
loss: 1.7386071681976318,grad_norm: 0.9999998244420385, iteration: 3586
loss: 1.5925369262695312,grad_norm: 0.9999998939915391, iteration: 3587
loss: 1.7166435718536377,grad_norm: 0.9999998705069025, iteration: 3588
loss: 1.733936071395874,grad_norm: 0.9999997538112537, iteration: 3589
loss: 1.6215078830718994,grad_norm: 0.9999998298733765, iteration: 3590
loss: 1.7532639503479004,grad_norm: 0.9999998371531114, iteration: 3591
loss: 1.747544288635254,grad_norm: 0.9999997813469174, iteration: 3592
loss: 1.6065940856933594,grad_norm: 0.999999802684699, iteration: 3593
loss: 1.6625880002975464,grad_norm: 0.9999998133211606, iteration: 3594
loss: 1.5803357362747192,grad_norm: 0.9999998799651465, iteration: 3595
loss: 1.7410430908203125,grad_norm: 0.9999999160346319, iteration: 3596
loss: 1.8022230863571167,grad_norm: 0.9999998141687474, iteration: 3597
loss: 1.764508605003357,grad_norm: 0.9999998348210575, iteration: 3598
loss: 1.5673011541366577,grad_norm: 0.9999998030865274, iteration: 3599
loss: 1.7691951990127563,grad_norm: 0.9999998353099113, iteration: 3600
loss: 1.8660902976989746,grad_norm: 0.999999833727492, iteration: 3601
loss: 1.7411956787109375,grad_norm: 0.9999997983951903, iteration: 3602
loss: 1.836946964263916,grad_norm: 0.9999998357094607, iteration: 3603
loss: 1.8102450370788574,grad_norm: 0.9999998099891574, iteration: 3604
loss: 1.7504589557647705,grad_norm: 0.9999997730706396, iteration: 3605
loss: 1.822374701499939,grad_norm: 0.9999998471484364, iteration: 3606
loss: 1.734368085861206,grad_norm: 0.9999999149860077, iteration: 3607
loss: 1.653118371963501,grad_norm: 0.9999998116992652, iteration: 3608
loss: 1.6389105319976807,grad_norm: 0.9999998283422568, iteration: 3609
loss: 1.7920050621032715,grad_norm: 0.9999998373026437, iteration: 3610
loss: 1.7348397970199585,grad_norm: 0.999999830105019, iteration: 3611
loss: 1.7665117979049683,grad_norm: 0.9999998395332286, iteration: 3612
loss: 1.7465013265609741,grad_norm: 0.9999998376178483, iteration: 3613
loss: 1.7167108058929443,grad_norm: 0.999999753699567, iteration: 3614
loss: 1.7024251222610474,grad_norm: 0.9999998044024291, iteration: 3615
loss: 1.7071340084075928,grad_norm: 0.9999998719325418, iteration: 3616
loss: 1.8212746381759644,grad_norm: 0.9999998587438778, iteration: 3617
loss: 1.8309297561645508,grad_norm: 0.9999998733204909, iteration: 3618
loss: 1.6946289539337158,grad_norm: 0.9999998181749069, iteration: 3619
loss: 1.6555516719818115,grad_norm: 0.9999998792761802, iteration: 3620
loss: 1.7914600372314453,grad_norm: 0.9999997896547222, iteration: 3621
loss: 1.6064269542694092,grad_norm: 0.9999998385539671, iteration: 3622
loss: 1.7223600149154663,grad_norm: 0.9999998237506292, iteration: 3623
loss: 1.6306043863296509,grad_norm: 0.9999998595947487, iteration: 3624
loss: 1.7067772150039673,grad_norm: 0.9999998815280198, iteration: 3625
loss: 1.6079747676849365,grad_norm: 0.9999997534241966, iteration: 3626
loss: 1.5720926523208618,grad_norm: 0.9999998177646295, iteration: 3627
loss: 1.5712847709655762,grad_norm: 0.9999998090964142, iteration: 3628
loss: 1.7597986459732056,grad_norm: 0.9999998694485612, iteration: 3629
loss: 1.698155403137207,grad_norm: 0.999999817405387, iteration: 3630
loss: 1.7409557104110718,grad_norm: 0.9999999103834509, iteration: 3631
loss: 1.819541096687317,grad_norm: 0.9999998347584149, iteration: 3632
loss: 1.716895580291748,grad_norm: 0.9999998603566131, iteration: 3633
loss: 1.8432323932647705,grad_norm: 0.9999998310196431, iteration: 3634
loss: 1.782688021659851,grad_norm: 0.9999997671403922, iteration: 3635
loss: 1.728497862815857,grad_norm: 0.999999859640777, iteration: 3636
loss: 1.765882134437561,grad_norm: 0.9999998736794937, iteration: 3637
loss: 1.8499797582626343,grad_norm: 0.9999998266507762, iteration: 3638
loss: 1.6880384683609009,grad_norm: 0.999999735455825, iteration: 3639
loss: 1.740512728691101,grad_norm: 0.9999998869516292, iteration: 3640
loss: 1.536594033241272,grad_norm: 0.9999997808829907, iteration: 3641
loss: 1.801315426826477,grad_norm: 0.9999998209013707, iteration: 3642
loss: 1.7798460721969604,grad_norm: 0.9999998832386361, iteration: 3643
loss: 1.6135843992233276,grad_norm: 0.9999998624802849, iteration: 3644
loss: 1.717515230178833,grad_norm: 0.9999998723233096, iteration: 3645
loss: 1.768955945968628,grad_norm: 0.999999829972441, iteration: 3646
loss: 1.7545182704925537,grad_norm: 0.9999998386160409, iteration: 3647
loss: 1.4712215662002563,grad_norm: 0.9999998389816988, iteration: 3648
loss: 1.768827199935913,grad_norm: 0.9999998620778372, iteration: 3649
loss: 1.8063023090362549,grad_norm: 0.9999998380433903, iteration: 3650
loss: 1.5133556127548218,grad_norm: 0.9999997775327769, iteration: 3651
loss: 1.6346558332443237,grad_norm: 0.9999998135200312, iteration: 3652
loss: 1.7684603929519653,grad_norm: 0.9999997696130297, iteration: 3653
loss: 1.6618423461914062,grad_norm: 0.9999998223267695, iteration: 3654
loss: 1.8596597909927368,grad_norm: 0.9999998877952031, iteration: 3655
loss: 1.7938282489776611,grad_norm: 0.9999998562466824, iteration: 3656
loss: 1.8107551336288452,grad_norm: 0.9999998009935759, iteration: 3657
loss: 1.6369099617004395,grad_norm: 0.9999997919203608, iteration: 3658
loss: 1.7177985906600952,grad_norm: 0.999999820131127, iteration: 3659
loss: 1.6650352478027344,grad_norm: 0.999999838514697, iteration: 3660
loss: 1.9151748418807983,grad_norm: 0.9999997747075088, iteration: 3661
loss: 1.7263720035552979,grad_norm: 0.9999999040310906, iteration: 3662
loss: 1.8128236532211304,grad_norm: 0.9999997858933085, iteration: 3663
loss: 1.6907132863998413,grad_norm: 0.9999998131897576, iteration: 3664
loss: 1.8006916046142578,grad_norm: 0.9999999409734104, iteration: 3665
loss: 1.763790249824524,grad_norm: 0.9999998871227469, iteration: 3666
loss: 1.8568600416183472,grad_norm: 0.9999998952660051, iteration: 3667
loss: 1.8136228322982788,grad_norm: 0.9999998980418959, iteration: 3668
loss: 1.6556109189987183,grad_norm: 0.9999998615554533, iteration: 3669
loss: 1.6044450998306274,grad_norm: 0.9999997852765623, iteration: 3670
loss: 1.6075782775878906,grad_norm: 0.9999998984140162, iteration: 3671
loss: 1.8075039386749268,grad_norm: 0.9999998782396886, iteration: 3672
loss: 1.682357668876648,grad_norm: 0.9999998692797797, iteration: 3673
loss: 1.736167550086975,grad_norm: 0.9999997298546388, iteration: 3674
loss: 1.7502169609069824,grad_norm: 0.9999998063840353, iteration: 3675
loss: 1.5669742822647095,grad_norm: 0.9999998438765189, iteration: 3676
loss: 1.7131434679031372,grad_norm: 0.9999998473877525, iteration: 3677
loss: 1.6958249807357788,grad_norm: 0.9999997983430547, iteration: 3678
loss: 1.5676051378250122,grad_norm: 0.9999997659517659, iteration: 3679
loss: 1.7757081985473633,grad_norm: 0.9999998462902792, iteration: 3680
loss: 1.6785342693328857,grad_norm: 0.9999998534181587, iteration: 3681
loss: 1.7226507663726807,grad_norm: 0.9999997964965823, iteration: 3682
loss: 1.7609013319015503,grad_norm: 0.9999997666813499, iteration: 3683
loss: 1.677862524986267,grad_norm: 0.9999998682485244, iteration: 3684
loss: 1.5627628564834595,grad_norm: 0.9999997650910856, iteration: 3685
loss: 1.6756421327590942,grad_norm: 0.9999997284636195, iteration: 3686
loss: 1.925288200378418,grad_norm: 0.9999997851497866, iteration: 3687
loss: 1.7619673013687134,grad_norm: 0.9999997803823543, iteration: 3688
loss: 1.737167239189148,grad_norm: 0.9999998310395058, iteration: 3689
loss: 1.6774958372116089,grad_norm: 0.999999866807102, iteration: 3690
loss: 1.670642614364624,grad_norm: 0.9999998352553013, iteration: 3691
loss: 1.6217838525772095,grad_norm: 0.999999820083352, iteration: 3692
loss: 1.6450263261795044,grad_norm: 0.9999997403042993, iteration: 3693
loss: 1.7255120277404785,grad_norm: 0.9999997594066281, iteration: 3694
loss: 1.6641631126403809,grad_norm: 0.9999998365197885, iteration: 3695
loss: 1.570429801940918,grad_norm: 0.9999998007665859, iteration: 3696
loss: 1.6315104961395264,grad_norm: 0.9999998090323702, iteration: 3697
loss: 1.718353271484375,grad_norm: 0.999999824876071, iteration: 3698
loss: 1.595273494720459,grad_norm: 0.9999997829485966, iteration: 3699
loss: 1.6748907566070557,grad_norm: 0.9999997778789708, iteration: 3700
loss: 1.7036528587341309,grad_norm: 0.9999998512659093, iteration: 3701
loss: 1.5857090950012207,grad_norm: 0.9999998391475444, iteration: 3702
loss: 1.6635117530822754,grad_norm: 0.9999997341897041, iteration: 3703
loss: 1.6646171808242798,grad_norm: 0.9999998301758499, iteration: 3704
loss: 1.5929101705551147,grad_norm: 0.9999997734095318, iteration: 3705
loss: 1.8114097118377686,grad_norm: 0.999999847431254, iteration: 3706
loss: 1.5869526863098145,grad_norm: 0.999999815092051, iteration: 3707
loss: 1.778348684310913,grad_norm: 0.9999997848222972, iteration: 3708
loss: 1.8608113527297974,grad_norm: 0.9999998730324033, iteration: 3709
loss: 1.6824555397033691,grad_norm: 0.9999997734622216, iteration: 3710
loss: 1.6742771863937378,grad_norm: 0.9999997290916639, iteration: 3711
loss: 1.6421597003936768,grad_norm: 0.9999998207454904, iteration: 3712
loss: 1.809418797492981,grad_norm: 0.9999998537562889, iteration: 3713
loss: 1.4969379901885986,grad_norm: 0.9999998115102915, iteration: 3714
loss: 1.6383075714111328,grad_norm: 0.9999998080270917, iteration: 3715
loss: 1.7024576663970947,grad_norm: 0.9999998970140483, iteration: 3716
loss: 1.8151246309280396,grad_norm: 0.9999997294887574, iteration: 3717
loss: 1.595984697341919,grad_norm: 0.9999997785233042, iteration: 3718
loss: 1.7923215627670288,grad_norm: 0.9999998369697782, iteration: 3719
loss: 1.8338747024536133,grad_norm: 0.9999999312943694, iteration: 3720
loss: 1.6518232822418213,grad_norm: 0.9999998703852093, iteration: 3721
loss: 1.5919368267059326,grad_norm: 0.9999998016096665, iteration: 3722
loss: 1.7331148386001587,grad_norm: 0.9999998554467834, iteration: 3723
loss: 1.863908290863037,grad_norm: 0.9999998750638138, iteration: 3724
loss: 1.7113046646118164,grad_norm: 0.9999998280820367, iteration: 3725
loss: 1.693208932876587,grad_norm: 0.9999997702430028, iteration: 3726
loss: 1.6569368839263916,grad_norm: 0.9999997671152213, iteration: 3727
loss: 1.7187281847000122,grad_norm: 0.9999998034031257, iteration: 3728
loss: 1.6146854162216187,grad_norm: 0.9999998729078616, iteration: 3729
loss: 1.7763705253601074,grad_norm: 0.9999997862143877, iteration: 3730
loss: 1.7168952226638794,grad_norm: 0.9999997792147338, iteration: 3731
loss: 1.579328179359436,grad_norm: 0.9999998218855034, iteration: 3732
loss: 1.5042866468429565,grad_norm: 0.9999997595692096, iteration: 3733
loss: 1.8644566535949707,grad_norm: 0.9999997925208748, iteration: 3734
loss: 1.7314581871032715,grad_norm: 0.9999998385107133, iteration: 3735
loss: 1.7471160888671875,grad_norm: 0.9999998066376521, iteration: 3736
loss: 1.6966718435287476,grad_norm: 0.9999997121778669, iteration: 3737
loss: 1.7070868015289307,grad_norm: 0.9999997465090475, iteration: 3738
loss: 1.6264878511428833,grad_norm: 0.9999998568839225, iteration: 3739
loss: 1.5388848781585693,grad_norm: 0.9999998218453106, iteration: 3740
loss: 1.7642617225646973,grad_norm: 0.9999998742634577, iteration: 3741
loss: 1.6721649169921875,grad_norm: 0.9999998132035717, iteration: 3742
loss: 1.6788884401321411,grad_norm: 0.999999806488969, iteration: 3743
loss: 1.5489383935928345,grad_norm: 0.9999997814293434, iteration: 3744
loss: 1.7306163311004639,grad_norm: 0.9999998280923665, iteration: 3745
loss: 1.66971755027771,grad_norm: 0.999999815025054, iteration: 3746
loss: 1.6134507656097412,grad_norm: 0.9999997510099605, iteration: 3747
loss: 1.7201240062713623,grad_norm: 0.9999998497401857, iteration: 3748
loss: 1.6697075366973877,grad_norm: 0.9999998272094056, iteration: 3749
loss: 1.5673221349716187,grad_norm: 0.9999998139825316, iteration: 3750
loss: 1.6630510091781616,grad_norm: 0.9999998759420939, iteration: 3751
loss: 1.7130229473114014,grad_norm: 0.999999837408715, iteration: 3752
loss: 1.6541893482208252,grad_norm: 0.9999997826387983, iteration: 3753
loss: 1.7049511671066284,grad_norm: 0.9999997671902568, iteration: 3754
loss: 1.634537696838379,grad_norm: 0.9999997661212829, iteration: 3755
loss: 1.8398945331573486,grad_norm: 0.9999997938060644, iteration: 3756
loss: 1.5314522981643677,grad_norm: 0.9999997951076566, iteration: 3757
loss: 1.759874701499939,grad_norm: 0.9999997822945423, iteration: 3758
loss: 1.7218806743621826,grad_norm: 0.999999797288272, iteration: 3759
loss: 1.6917033195495605,grad_norm: 0.9999998147581424, iteration: 3760
loss: 1.6504420042037964,grad_norm: 0.9999998596486174, iteration: 3761
loss: 1.6659938097000122,grad_norm: 0.999999901194909, iteration: 3762
loss: 1.7662298679351807,grad_norm: 0.9999999248535204, iteration: 3763
loss: 1.6950687170028687,grad_norm: 0.9999998117609096, iteration: 3764
loss: 1.641503095626831,grad_norm: 0.9999997524937025, iteration: 3765
loss: 1.836790919303894,grad_norm: 0.9999998378677286, iteration: 3766
loss: 1.655004858970642,grad_norm: 0.9999997836495651, iteration: 3767
loss: 1.669270634651184,grad_norm: 0.9999998399954685, iteration: 3768
loss: 1.5752339363098145,grad_norm: 0.999999839484613, iteration: 3769
loss: 1.8709384202957153,grad_norm: 0.9999998878449677, iteration: 3770
loss: 1.634753942489624,grad_norm: 0.9999998294846225, iteration: 3771
loss: 1.6858309507369995,grad_norm: 0.9999998813439405, iteration: 3772
loss: 1.6825101375579834,grad_norm: 0.999999829558832, iteration: 3773
loss: 1.6726709604263306,grad_norm: 0.9999997832428289, iteration: 3774
loss: 1.559374451637268,grad_norm: 0.9999998368001095, iteration: 3775
loss: 1.719188928604126,grad_norm: 0.9999998420069159, iteration: 3776
loss: 1.607412338256836,grad_norm: 0.9999998357700921, iteration: 3777
loss: 1.5231192111968994,grad_norm: 0.9999997097972836, iteration: 3778
loss: 1.5742931365966797,grad_norm: 0.9999997786566918, iteration: 3779
loss: 1.6766357421875,grad_norm: 0.9999997755788432, iteration: 3780
loss: 1.846834421157837,grad_norm: 0.9999997954485915, iteration: 3781
loss: 1.5832908153533936,grad_norm: 0.9999997916721162, iteration: 3782
loss: 1.6993272304534912,grad_norm: 0.9999998538821322, iteration: 3783
loss: 1.792633056640625,grad_norm: 0.9999998760325185, iteration: 3784
loss: 1.5597724914550781,grad_norm: 0.9999998377687156, iteration: 3785
loss: 1.6654348373413086,grad_norm: 0.9999998526578026, iteration: 3786
loss: 1.6148556470870972,grad_norm: 0.9999997870550295, iteration: 3787
loss: 1.7695902585983276,grad_norm: 0.999999847714686, iteration: 3788
loss: 1.7680273056030273,grad_norm: 0.9999998700289889, iteration: 3789
loss: 1.6344459056854248,grad_norm: 0.9999998027556525, iteration: 3790
loss: 1.5222374200820923,grad_norm: 0.9999998377687358, iteration: 3791
loss: 1.6763520240783691,grad_norm: 0.9999998683124164, iteration: 3792
loss: 1.7840850353240967,grad_norm: 0.9999998312232791, iteration: 3793
loss: 1.638459324836731,grad_norm: 0.9999997949456254, iteration: 3794
loss: 1.5766552686691284,grad_norm: 0.9999998097876907, iteration: 3795
loss: 1.6311367750167847,grad_norm: 0.9999998412788504, iteration: 3796
loss: 1.7128268480300903,grad_norm: 0.9999998836177021, iteration: 3797
loss: 1.6967673301696777,grad_norm: 0.9999998757750606, iteration: 3798
loss: 1.7389887571334839,grad_norm: 0.9999998572760211, iteration: 3799
loss: 1.747597098350525,grad_norm: 0.9999998283780848, iteration: 3800
loss: 1.6046159267425537,grad_norm: 0.9999997595864804, iteration: 3801
loss: 1.694400429725647,grad_norm: 0.9999998121484046, iteration: 3802
loss: 1.5951573848724365,grad_norm: 0.9999997960830903, iteration: 3803
loss: 1.6490689516067505,grad_norm: 0.9999997996110479, iteration: 3804
loss: 1.8356019258499146,grad_norm: 0.9999998468899063, iteration: 3805
loss: 1.5516469478607178,grad_norm: 0.9999997686534369, iteration: 3806
loss: 1.684836506843567,grad_norm: 0.9999997341671508, iteration: 3807
loss: 1.655426025390625,grad_norm: 0.9999998085845729, iteration: 3808
loss: 1.716782569885254,grad_norm: 0.9999998399413753, iteration: 3809
loss: 1.7436437606811523,grad_norm: 0.9999998211097629, iteration: 3810
loss: 1.6195353269577026,grad_norm: 0.9999997930395088, iteration: 3811
loss: 1.7329249382019043,grad_norm: 0.9999998795955517, iteration: 3812
loss: 1.8515056371688843,grad_norm: 0.9999998049106401, iteration: 3813
loss: 1.722292423248291,grad_norm: 0.9999997139705188, iteration: 3814
loss: 1.7070187330245972,grad_norm: 0.9999997971314573, iteration: 3815
loss: 1.7438167333602905,grad_norm: 0.9999998172383177, iteration: 3816
loss: 1.5948089361190796,grad_norm: 0.9999998545894281, iteration: 3817
loss: 1.5701050758361816,grad_norm: 0.9999997737631212, iteration: 3818
loss: 1.708725094795227,grad_norm: 0.9999997960281206, iteration: 3819
loss: 1.7366786003112793,grad_norm: 0.9999998230477586, iteration: 3820
loss: 1.66802179813385,grad_norm: 0.999999854871814, iteration: 3821
loss: 1.5792418718338013,grad_norm: 0.9999998738596493, iteration: 3822
loss: 1.6903820037841797,grad_norm: 0.999999815576844, iteration: 3823
loss: 1.7301326990127563,grad_norm: 0.9999997991468678, iteration: 3824
loss: 1.5608166456222534,grad_norm: 0.9999997344235374, iteration: 3825
loss: 1.6167975664138794,grad_norm: 0.9999998168912307, iteration: 3826
loss: 1.6788607835769653,grad_norm: 0.9999998449968763, iteration: 3827
loss: 1.760513424873352,grad_norm: 0.9999997538443051, iteration: 3828
loss: 1.649619698524475,grad_norm: 0.999999726898453, iteration: 3829
loss: 1.5721362829208374,grad_norm: 0.9999998583465751, iteration: 3830
loss: 1.6700667142868042,grad_norm: 0.9999997695378487, iteration: 3831
loss: 1.60684335231781,grad_norm: 0.9999998881494143, iteration: 3832
loss: 1.6382341384887695,grad_norm: 0.9999997934420827, iteration: 3833
loss: 1.6885837316513062,grad_norm: 0.9999997888329603, iteration: 3834
loss: 1.6950936317443848,grad_norm: 0.9999998076286732, iteration: 3835
loss: 1.5478273630142212,grad_norm: 0.9999997779007215, iteration: 3836
loss: 1.571481466293335,grad_norm: 0.9999998042915036, iteration: 3837
loss: 1.8102984428405762,grad_norm: 0.9999998263992453, iteration: 3838
loss: 1.7714236974716187,grad_norm: 0.9999998517764865, iteration: 3839
loss: 1.8212010860443115,grad_norm: 0.999999789256524, iteration: 3840
loss: 1.7496362924575806,grad_norm: 0.9999999025167109, iteration: 3841
loss: 1.7163949012756348,grad_norm: 0.9999998294654425, iteration: 3842
loss: 1.7134616374969482,grad_norm: 0.9999997651242521, iteration: 3843
loss: 1.59625244140625,grad_norm: 0.999999778425156, iteration: 3844
loss: 1.5695312023162842,grad_norm: 0.9999998246065299, iteration: 3845
loss: 1.6951380968093872,grad_norm: 0.9999998070997912, iteration: 3846
loss: 1.6174650192260742,grad_norm: 0.9999998400647959, iteration: 3847
loss: 1.6750558614730835,grad_norm: 0.999999768314234, iteration: 3848
loss: 1.6817981004714966,grad_norm: 0.9999998005253882, iteration: 3849
loss: 1.6020784378051758,grad_norm: 0.9999998053283856, iteration: 3850
loss: 1.7039681673049927,grad_norm: 0.9999998088592759, iteration: 3851
loss: 1.534693956375122,grad_norm: 0.9999998199920843, iteration: 3852
loss: 1.6287322044372559,grad_norm: 0.9999997861766862, iteration: 3853
loss: 1.724801778793335,grad_norm: 0.999999774435592, iteration: 3854
loss: 1.6853529214859009,grad_norm: 0.9999998558750834, iteration: 3855
loss: 1.6635934114456177,grad_norm: 0.9999997627584581, iteration: 3856
loss: 1.630352258682251,grad_norm: 0.9999998505296788, iteration: 3857
loss: 1.5536996126174927,grad_norm: 0.9999998298992413, iteration: 3858
loss: 1.9124504327774048,grad_norm: 0.9999998009354372, iteration: 3859
loss: 1.6785740852355957,grad_norm: 0.9999998114691547, iteration: 3860
loss: 1.6755726337432861,grad_norm: 0.9999997789500259, iteration: 3861
loss: 1.7506123781204224,grad_norm: 0.9999998532216894, iteration: 3862
loss: 1.692882776260376,grad_norm: 0.9999998021514696, iteration: 3863
loss: 1.707842469215393,grad_norm: 0.9999998062250945, iteration: 3864
loss: 1.4799257516860962,grad_norm: 0.9999998017867586, iteration: 3865
loss: 1.7423877716064453,grad_norm: 0.9999997967656933, iteration: 3866
loss: 1.576158881187439,grad_norm: 0.9999998587139661, iteration: 3867
loss: 1.7482796907424927,grad_norm: 0.9999998273503625, iteration: 3868
loss: 1.5661988258361816,grad_norm: 0.9999997558800321, iteration: 3869
loss: 1.6377007961273193,grad_norm: 0.9999997795061877, iteration: 3870
loss: 1.6958403587341309,grad_norm: 0.9999998163002279, iteration: 3871
loss: 1.786263108253479,grad_norm: 0.9999998407954492, iteration: 3872
loss: 1.590786099433899,grad_norm: 0.9999998787787269, iteration: 3873
loss: 1.9274977445602417,grad_norm: 0.9999998610223941, iteration: 3874
loss: 1.677842617034912,grad_norm: 0.9999998638307616, iteration: 3875
loss: 1.5438573360443115,grad_norm: 0.9999998156068812, iteration: 3876
loss: 1.6693685054779053,grad_norm: 0.9999998696414223, iteration: 3877
loss: 1.5800986289978027,grad_norm: 0.9999997856463525, iteration: 3878
loss: 1.6171637773513794,grad_norm: 0.9999998633555726, iteration: 3879
loss: 1.6343584060668945,grad_norm: 0.9999997902135564, iteration: 3880
loss: 1.649305820465088,grad_norm: 0.9999998413623268, iteration: 3881
loss: 1.7836201190948486,grad_norm: 0.9999997845478833, iteration: 3882
loss: 1.5697829723358154,grad_norm: 0.9999997347431758, iteration: 3883
loss: 1.7172907590866089,grad_norm: 0.9999997966917857, iteration: 3884
loss: 1.6382942199707031,grad_norm: 0.9999998253169488, iteration: 3885
loss: 1.5795754194259644,grad_norm: 0.9999997919129985, iteration: 3886
loss: 1.63432776927948,grad_norm: 0.9999998088041223, iteration: 3887
loss: 1.6120394468307495,grad_norm: 0.999999739542705, iteration: 3888
loss: 1.8814842700958252,grad_norm: 0.9999998019454382, iteration: 3889
loss: 1.7019453048706055,grad_norm: 0.9999998137634285, iteration: 3890
loss: 1.6247268915176392,grad_norm: 0.999999771815713, iteration: 3891
loss: 1.604619026184082,grad_norm: 0.9999998024040488, iteration: 3892
loss: 1.5772759914398193,grad_norm: 0.9999998195692039, iteration: 3893
loss: 1.6720088720321655,grad_norm: 0.9999998037844526, iteration: 3894
loss: 1.5400645732879639,grad_norm: 0.9999997629425327, iteration: 3895
loss: 1.8973082304000854,grad_norm: 0.9999998937216206, iteration: 3896
loss: 1.5404082536697388,grad_norm: 0.9999998308810685, iteration: 3897
loss: 1.7482314109802246,grad_norm: 0.9999997843484897, iteration: 3898
loss: 1.6464645862579346,grad_norm: 0.9999998511267085, iteration: 3899
loss: 1.7322702407836914,grad_norm: 0.9999997884206079, iteration: 3900
loss: 1.6172171831130981,grad_norm: 0.9999998295733933, iteration: 3901
loss: 1.686116337776184,grad_norm: 0.9999997548059358, iteration: 3902
loss: 1.6351191997528076,grad_norm: 0.9999997859948582, iteration: 3903
loss: 1.5862653255462646,grad_norm: 0.9999997546530984, iteration: 3904
loss: 1.6053187847137451,grad_norm: 0.9999997916069561, iteration: 3905
loss: 1.607993483543396,grad_norm: 0.9999998314703734, iteration: 3906
loss: 1.761419415473938,grad_norm: 0.9999997814988967, iteration: 3907
loss: 1.581924557685852,grad_norm: 0.999999755248146, iteration: 3908
loss: 1.5957566499710083,grad_norm: 0.9999997432282415, iteration: 3909
loss: 1.5586903095245361,grad_norm: 0.9999998760005385, iteration: 3910
loss: 1.6234862804412842,grad_norm: 0.9999998713184791, iteration: 3911
loss: 1.6264915466308594,grad_norm: 0.9999998223757746, iteration: 3912
loss: 1.65647292137146,grad_norm: 0.9999997822819263, iteration: 3913
loss: 1.7745126485824585,grad_norm: 0.9999998832027521, iteration: 3914
loss: 1.614288330078125,grad_norm: 0.9999998715509741, iteration: 3915
loss: 1.7355202436447144,grad_norm: 0.9999998085844507, iteration: 3916
loss: 1.4914438724517822,grad_norm: 0.9999997758553169, iteration: 3917
loss: 1.5813413858413696,grad_norm: 0.9999998529515208, iteration: 3918
loss: 1.677585244178772,grad_norm: 0.9999998196435136, iteration: 3919
loss: 1.6053324937820435,grad_norm: 0.9999998048300661, iteration: 3920
loss: 1.6477386951446533,grad_norm: 0.9999997171207865, iteration: 3921
loss: 1.6030751466751099,grad_norm: 0.9999997477318139, iteration: 3922
loss: 1.6024876832962036,grad_norm: 0.9999998604067716, iteration: 3923
loss: 1.6089633703231812,grad_norm: 0.9999999035723456, iteration: 3924
loss: 1.6951546669006348,grad_norm: 0.999999830000216, iteration: 3925
loss: 1.8077675104141235,grad_norm: 0.9999998307284783, iteration: 3926
loss: 1.5315080881118774,grad_norm: 0.9999998403698008, iteration: 3927
loss: 1.671079397201538,grad_norm: 0.9999999122498955, iteration: 3928
loss: 1.6753275394439697,grad_norm: 0.9999998732539185, iteration: 3929
loss: 1.6589672565460205,grad_norm: 0.999999820795268, iteration: 3930
loss: 1.6145564317703247,grad_norm: 0.9999997914350206, iteration: 3931
loss: 1.6770732402801514,grad_norm: 0.9999998745221218, iteration: 3932
loss: 1.7051892280578613,grad_norm: 0.9999997723455656, iteration: 3933
loss: 1.6406410932540894,grad_norm: 0.9999998611206171, iteration: 3934
loss: 1.704674482345581,grad_norm: 0.9999998622504446, iteration: 3935
loss: 1.724112868309021,grad_norm: 0.9999998890977434, iteration: 3936
loss: 1.7133231163024902,grad_norm: 0.9999998365763697, iteration: 3937
loss: 1.678030252456665,grad_norm: 0.9999997727585463, iteration: 3938
loss: 1.594443917274475,grad_norm: 0.9999997951918482, iteration: 3939
loss: 1.6232742071151733,grad_norm: 0.9999998345266895, iteration: 3940
loss: 1.6311275959014893,grad_norm: 0.9999997220644653, iteration: 3941
loss: 1.6522457599639893,grad_norm: 0.9999998240171908, iteration: 3942
loss: 1.6356858015060425,grad_norm: 0.9999997766776415, iteration: 3943
loss: 1.6162928342819214,grad_norm: 0.9999997796525018, iteration: 3944
loss: 1.575353741645813,grad_norm: 0.9999997946323872, iteration: 3945
loss: 1.6329582929611206,grad_norm: 0.9999997954894033, iteration: 3946
loss: 1.688978672027588,grad_norm: 0.9999998544580354, iteration: 3947
loss: 1.703072190284729,grad_norm: 0.9999997818368843, iteration: 3948
loss: 1.7367901802062988,grad_norm: 0.9999997828418062, iteration: 3949
loss: 1.5274516344070435,grad_norm: 0.9999998146379576, iteration: 3950
loss: 1.6373440027236938,grad_norm: 0.9999997803292605, iteration: 3951
loss: 1.5934308767318726,grad_norm: 0.9999997963843965, iteration: 3952
loss: 1.5505239963531494,grad_norm: 0.9999998112259039, iteration: 3953
loss: 1.609513521194458,grad_norm: 0.9999997784803347, iteration: 3954
loss: 1.7347872257232666,grad_norm: 0.9999998162499971, iteration: 3955
loss: 1.7249912023544312,grad_norm: 0.9999998494082627, iteration: 3956
loss: 1.5533666610717773,grad_norm: 0.9999997388102417, iteration: 3957
loss: 1.5985186100006104,grad_norm: 0.9999997132032445, iteration: 3958
loss: 1.6612365245819092,grad_norm: 0.9999997789346021, iteration: 3959
loss: 1.4855533838272095,grad_norm: 0.9999997719460995, iteration: 3960
loss: 1.7458364963531494,grad_norm: 0.999999885468637, iteration: 3961
loss: 1.6103304624557495,grad_norm: 0.9999998096805107, iteration: 3962
loss: 1.5984123945236206,grad_norm: 0.9999997953348744, iteration: 3963
loss: 1.5930120944976807,grad_norm: 0.999999832926524, iteration: 3964
loss: 1.5246565341949463,grad_norm: 0.9999998828889898, iteration: 3965
loss: 1.6666282415390015,grad_norm: 0.999999758630034, iteration: 3966
loss: 1.8303906917572021,grad_norm: 0.9999998197882942, iteration: 3967
loss: 1.6543047428131104,grad_norm: 0.9999997770821426, iteration: 3968
loss: 1.6443321704864502,grad_norm: 0.999999874355051, iteration: 3969
loss: 1.669978380203247,grad_norm: 0.9999998273685891, iteration: 3970
loss: 1.597419261932373,grad_norm: 0.9999998103501484, iteration: 3971
loss: 1.6709394454956055,grad_norm: 0.999999799382359, iteration: 3972
loss: 1.692097783088684,grad_norm: 0.9999998614279013, iteration: 3973
loss: 1.5469266176223755,grad_norm: 0.9999998166399414, iteration: 3974
loss: 1.5411148071289062,grad_norm: 0.9999998568929003, iteration: 3975
loss: 1.7080434560775757,grad_norm: 0.9999998927269631, iteration: 3976
loss: 1.543708324432373,grad_norm: 0.9999997751941354, iteration: 3977
loss: 1.6008061170578003,grad_norm: 0.999999765733869, iteration: 3978
loss: 1.5007648468017578,grad_norm: 0.9999998539671782, iteration: 3979
loss: 1.6975923776626587,grad_norm: 0.9999998458458275, iteration: 3980
loss: 1.5980420112609863,grad_norm: 0.9999997378554719, iteration: 3981
loss: 1.6628468036651611,grad_norm: 0.9999998567556933, iteration: 3982
loss: 1.7242900133132935,grad_norm: 0.9999998678022082, iteration: 3983
loss: 1.5856190919876099,grad_norm: 0.9999998605141907, iteration: 3984
loss: 1.6497762203216553,grad_norm: 0.999999764942516, iteration: 3985
loss: 1.6304588317871094,grad_norm: 0.9999998862316196, iteration: 3986
loss: 1.5506647825241089,grad_norm: 0.9999996414890829, iteration: 3987
loss: 1.5742360353469849,grad_norm: 0.9999998333741565, iteration: 3988
loss: 1.6498308181762695,grad_norm: 0.9999997826056778, iteration: 3989
loss: 1.5501207113265991,grad_norm: 0.9999998595358911, iteration: 3990
loss: 1.564684271812439,grad_norm: 0.9999997842024136, iteration: 3991
loss: 1.5821613073349,grad_norm: 0.9999997535267363, iteration: 3992
loss: 1.6339212656021118,grad_norm: 0.9999998318843834, iteration: 3993
loss: 1.664209246635437,grad_norm: 0.9999998444104657, iteration: 3994
loss: 1.737608551979065,grad_norm: 0.9999998718488541, iteration: 3995
loss: 1.6587082147598267,grad_norm: 0.9999998692638313, iteration: 3996
loss: 1.6260664463043213,grad_norm: 0.9999998128777944, iteration: 3997
loss: 1.6381832361221313,grad_norm: 0.999999777514153, iteration: 3998
loss: 1.6819666624069214,grad_norm: 0.9999998302959837, iteration: 3999
loss: 1.6228312253952026,grad_norm: 0.9999998360139499, iteration: 4000
loss: 1.4926809072494507,grad_norm: 0.9999997970278062, iteration: 4001
loss: 1.7421513795852661,grad_norm: 0.9999997687528351, iteration: 4002
loss: 1.651961088180542,grad_norm: 0.9999998174629029, iteration: 4003
loss: 1.5618674755096436,grad_norm: 0.9999998328250713, iteration: 4004
loss: 1.680954933166504,grad_norm: 0.9999997612947289, iteration: 4005
loss: 1.6500310897827148,grad_norm: 0.9999998605203715, iteration: 4006
loss: 1.6607974767684937,grad_norm: 0.9999997912795443, iteration: 4007
loss: 1.6000028848648071,grad_norm: 0.9999998480619366, iteration: 4008
loss: 1.628063440322876,grad_norm: 0.9999998400529717, iteration: 4009
loss: 1.7264007329940796,grad_norm: 0.9999998476058971, iteration: 4010
loss: 1.7032444477081299,grad_norm: 0.9999998759133962, iteration: 4011
loss: 1.5858378410339355,grad_norm: 0.9999997305606831, iteration: 4012
loss: 1.6635105609893799,grad_norm: 0.9999998500864575, iteration: 4013
loss: 1.6947695016860962,grad_norm: 0.9999998256970255, iteration: 4014
loss: 1.6102147102355957,grad_norm: 0.9999998135275533, iteration: 4015
loss: 1.4438252449035645,grad_norm: 0.9999998022261793, iteration: 4016
loss: 1.7467796802520752,grad_norm: 0.9999997697435132, iteration: 4017
loss: 1.638523817062378,grad_norm: 0.9999997216883569, iteration: 4018
loss: 1.5916553735733032,grad_norm: 0.9999998190935141, iteration: 4019
loss: 1.7076629400253296,grad_norm: 0.9999998265233326, iteration: 4020
loss: 1.5196988582611084,grad_norm: 0.9999998148356931, iteration: 4021
loss: 1.575911045074463,grad_norm: 0.9999998358069785, iteration: 4022
loss: 1.6170495748519897,grad_norm: 0.9999998209469201, iteration: 4023
loss: 1.636685848236084,grad_norm: 0.9999998439584933, iteration: 4024
loss: 1.6662198305130005,grad_norm: 0.9999999024840802, iteration: 4025
loss: 1.6423133611679077,grad_norm: 0.9999997867346648, iteration: 4026
loss: 1.5016292333602905,grad_norm: 0.9999997753660828, iteration: 4027
loss: 1.586402416229248,grad_norm: 0.9999998626801909, iteration: 4028
loss: 1.6996170282363892,grad_norm: 0.9999997120800753, iteration: 4029
loss: 1.6571964025497437,grad_norm: 0.9999998429476827, iteration: 4030
loss: 1.672377347946167,grad_norm: 0.9999998378517458, iteration: 4031
loss: 1.5585920810699463,grad_norm: 0.9999997504040916, iteration: 4032
loss: 1.601151704788208,grad_norm: 0.9999998065536577, iteration: 4033
loss: 1.579896092414856,grad_norm: 0.9999997688210603, iteration: 4034
loss: 1.420129418373108,grad_norm: 0.9999997648099052, iteration: 4035
loss: 1.6487479209899902,grad_norm: 0.9999998705234303, iteration: 4036
loss: 1.5438553094863892,grad_norm: 0.9999998064206064, iteration: 4037
loss: 1.6440212726593018,grad_norm: 0.9999997647282326, iteration: 4038
loss: 1.7886314392089844,grad_norm: 0.9999998585461711, iteration: 4039
loss: 1.4598147869110107,grad_norm: 0.9999997495756626, iteration: 4040
loss: 1.5776275396347046,grad_norm: 0.9999996774087581, iteration: 4041
loss: 1.6128604412078857,grad_norm: 0.9999997306031901, iteration: 4042
loss: 1.5730206966400146,grad_norm: 0.9999998251004203, iteration: 4043
loss: 1.8038413524627686,grad_norm: 0.9999999077319862, iteration: 4044
loss: 1.5831325054168701,grad_norm: 0.9999998819685834, iteration: 4045
loss: 1.5603420734405518,grad_norm: 0.9999998212297627, iteration: 4046
loss: 1.412203073501587,grad_norm: 0.9999997590646497, iteration: 4047
loss: 1.7348690032958984,grad_norm: 0.9999997789092685, iteration: 4048
loss: 1.7302905321121216,grad_norm: 0.9999998779612956, iteration: 4049
loss: 1.6820764541625977,grad_norm: 0.9999998102218443, iteration: 4050
loss: 1.543259620666504,grad_norm: 0.9999997228500519, iteration: 4051
loss: 1.6590054035186768,grad_norm: 0.9999998104416211, iteration: 4052
loss: 1.5535959005355835,grad_norm: 0.9999997917026501, iteration: 4053
loss: 1.6232463121414185,grad_norm: 0.9999998178319837, iteration: 4054
loss: 1.7437000274658203,grad_norm: 0.9999998269789465, iteration: 4055
loss: 1.6312859058380127,grad_norm: 0.9999997851532162, iteration: 4056
loss: 1.695624828338623,grad_norm: 0.9999998532779252, iteration: 4057
loss: 1.5584336519241333,grad_norm: 0.999999846748957, iteration: 4058
loss: 1.4609538316726685,grad_norm: 0.999999760581951, iteration: 4059
loss: 1.4789037704467773,grad_norm: 0.9999998541865721, iteration: 4060
loss: 1.6491938829421997,grad_norm: 0.9999998852305122, iteration: 4061
loss: 1.5921350717544556,grad_norm: 0.9999997810912336, iteration: 4062
loss: 1.7741249799728394,grad_norm: 0.9999998180055519, iteration: 4063
loss: 1.6752856969833374,grad_norm: 0.9999998309796719, iteration: 4064
loss: 1.7083510160446167,grad_norm: 0.9999998607374032, iteration: 4065
loss: 1.6257379055023193,grad_norm: 0.9999998028932572, iteration: 4066
loss: 1.6126950979232788,grad_norm: 0.9999997173175795, iteration: 4067
loss: 1.6109237670898438,grad_norm: 0.9999998337906556, iteration: 4068
loss: 1.5464816093444824,grad_norm: 0.9999998082551199, iteration: 4069
loss: 1.4319108724594116,grad_norm: 0.999999767212721, iteration: 4070
loss: 1.6119157075881958,grad_norm: 0.9999998214550088, iteration: 4071
loss: 1.6774147748947144,grad_norm: 0.9999997890722094, iteration: 4072
loss: 1.510595679283142,grad_norm: 0.9999997829228732, iteration: 4073
loss: 1.6635074615478516,grad_norm: 0.9999998594254998, iteration: 4074
loss: 1.5770918130874634,grad_norm: 0.9999999071391826, iteration: 4075
loss: 1.5730619430541992,grad_norm: 0.9999997338169876, iteration: 4076
loss: 1.6275572776794434,grad_norm: 0.9999998465603677, iteration: 4077
loss: 1.6213247776031494,grad_norm: 0.999999832429804, iteration: 4078
loss: 1.4554702043533325,grad_norm: 0.9999998285179792, iteration: 4079
loss: 1.5900019407272339,grad_norm: 0.999999870923779, iteration: 4080
loss: 1.6766676902770996,grad_norm: 0.9999997907744098, iteration: 4081
loss: 1.6429741382598877,grad_norm: 0.9999997880191609, iteration: 4082
loss: 1.651305913925171,grad_norm: 0.9999998416169856, iteration: 4083
loss: 1.6324408054351807,grad_norm: 0.9999998727767969, iteration: 4084
loss: 1.622819423675537,grad_norm: 0.9999998624507441, iteration: 4085
loss: 1.5358576774597168,grad_norm: 0.9999998129723952, iteration: 4086
loss: 1.5451464653015137,grad_norm: 0.999999779897095, iteration: 4087
loss: 1.5711815357208252,grad_norm: 0.9999997797488569, iteration: 4088
loss: 1.6672866344451904,grad_norm: 0.9999997668256531, iteration: 4089
loss: 1.4943934679031372,grad_norm: 0.999999806303207, iteration: 4090
loss: 1.5611623525619507,grad_norm: 0.9999998321035093, iteration: 4091
loss: 1.6584781408309937,grad_norm: 0.9999998263411836, iteration: 4092
loss: 1.7047138214111328,grad_norm: 0.9999997779894932, iteration: 4093
loss: 1.641588568687439,grad_norm: 0.9999998260848605, iteration: 4094
loss: 1.407232403755188,grad_norm: 0.9999997534175646, iteration: 4095
loss: 1.4926542043685913,grad_norm: 0.9999997798558825, iteration: 4096
loss: 1.4951626062393188,grad_norm: 0.9999998177514768, iteration: 4097
loss: 1.4092189073562622,grad_norm: 0.9999997551666422, iteration: 4098
loss: 1.5133980512619019,grad_norm: 0.999999833118263, iteration: 4099
loss: 1.4425833225250244,grad_norm: 0.9999998045006783, iteration: 4100
loss: 1.3986588716506958,grad_norm: 0.9999998232484714, iteration: 4101
loss: 1.4576928615570068,grad_norm: 0.9999998387491317, iteration: 4102
loss: 1.6235785484313965,grad_norm: 0.999999768468292, iteration: 4103
loss: 1.6290785074234009,grad_norm: 0.9999997816273098, iteration: 4104
loss: 1.5271247625350952,grad_norm: 0.9999997946726669, iteration: 4105
loss: 1.7303440570831299,grad_norm: 0.9999998984106716, iteration: 4106
loss: 1.674085021018982,grad_norm: 0.9999998104841765, iteration: 4107
loss: 1.6166936159133911,grad_norm: 0.999999840486175, iteration: 4108
loss: 1.7326184511184692,grad_norm: 0.9999998926795735, iteration: 4109
loss: 1.4639142751693726,grad_norm: 0.9999998124085829, iteration: 4110
loss: 1.580272912979126,grad_norm: 0.999999866858069, iteration: 4111
loss: 1.6774400472640991,grad_norm: 0.9999998074134594, iteration: 4112
loss: 1.671987533569336,grad_norm: 0.9999997692514975, iteration: 4113
loss: 1.5894882678985596,grad_norm: 0.9999998067891162, iteration: 4114
loss: 1.6152304410934448,grad_norm: 0.9999998054959429, iteration: 4115
loss: 1.6587929725646973,grad_norm: 0.9999997808968492, iteration: 4116
loss: 1.687283992767334,grad_norm: 0.9999998092212146, iteration: 4117
loss: 1.7490431070327759,grad_norm: 0.999999832623021, iteration: 4118
loss: 1.7841521501541138,grad_norm: 0.9999998625222403, iteration: 4119
loss: 1.5627472400665283,grad_norm: 0.9999997937087143, iteration: 4120
loss: 1.6676678657531738,grad_norm: 0.9999999218957069, iteration: 4121
loss: 1.7133506536483765,grad_norm: 0.9999997773223934, iteration: 4122
loss: 1.7435888051986694,grad_norm: 0.9999997350422398, iteration: 4123
loss: 1.534652829170227,grad_norm: 0.9999998094575788, iteration: 4124
loss: 1.6687016487121582,grad_norm: 0.9999998095535023, iteration: 4125
loss: 1.496705174446106,grad_norm: 0.999999788831001, iteration: 4126
loss: 1.7049994468688965,grad_norm: 0.9999998600935351, iteration: 4127
loss: 1.6522190570831299,grad_norm: 0.9999998623501813, iteration: 4128
loss: 1.4950428009033203,grad_norm: 0.9999997519654015, iteration: 4129
loss: 1.715783953666687,grad_norm: 0.9999998726896268, iteration: 4130
loss: 1.559075117111206,grad_norm: 0.999999789304502, iteration: 4131
loss: 1.6359443664550781,grad_norm: 0.9999998487161462, iteration: 4132
loss: 1.580322265625,grad_norm: 0.9999998006803419, iteration: 4133
loss: 1.5806256532669067,grad_norm: 0.9999998157767351, iteration: 4134
loss: 1.5490254163742065,grad_norm: 0.9999998060686922, iteration: 4135
loss: 1.5829484462738037,grad_norm: 0.9999998886504061, iteration: 4136
loss: 1.4641999006271362,grad_norm: 0.9999997944393868, iteration: 4137
loss: 1.409162163734436,grad_norm: 0.999999743492208, iteration: 4138
loss: 1.6797720193862915,grad_norm: 0.9999998186677164, iteration: 4139
loss: 1.546134352684021,grad_norm: 0.9999998124145959, iteration: 4140
loss: 1.6032302379608154,grad_norm: 0.9999997468962507, iteration: 4141
loss: 1.5598621368408203,grad_norm: 0.9999998133638438, iteration: 4142
loss: 1.5657192468643188,grad_norm: 0.9999997723446953, iteration: 4143
loss: 1.6061376333236694,grad_norm: 0.9999998169964553, iteration: 4144
loss: 1.4702898263931274,grad_norm: 0.9999997515458274, iteration: 4145
loss: 1.6668968200683594,grad_norm: 0.9999998113469429, iteration: 4146
loss: 1.5657539367675781,grad_norm: 0.9999998832330325, iteration: 4147
loss: 1.491695761680603,grad_norm: 0.9999998281627944, iteration: 4148
loss: 1.6382571458816528,grad_norm: 0.9999998329922738, iteration: 4149
loss: 1.5311923027038574,grad_norm: 0.9999998345898047, iteration: 4150
loss: 1.5925556421279907,grad_norm: 0.9999997918057878, iteration: 4151
loss: 1.7209333181381226,grad_norm: 0.9999998765513693, iteration: 4152
loss: 1.7628402709960938,grad_norm: 0.9999997923829033, iteration: 4153
loss: 1.8006349802017212,grad_norm: 0.99999981874985, iteration: 4154
loss: 1.4879100322723389,grad_norm: 0.9999998323974407, iteration: 4155
loss: 1.5903315544128418,grad_norm: 0.9999998190755586, iteration: 4156
loss: 1.5488109588623047,grad_norm: 0.9999997778372205, iteration: 4157
loss: 1.5313184261322021,grad_norm: 0.9999998552653903, iteration: 4158
loss: 1.5700504779815674,grad_norm: 0.9999997489638024, iteration: 4159
loss: 1.5301406383514404,grad_norm: 0.9999997858717634, iteration: 4160
loss: 1.6372754573822021,grad_norm: 0.9999998445895107, iteration: 4161
loss: 1.6474144458770752,grad_norm: 0.999999844895737, iteration: 4162
loss: 1.5458468198776245,grad_norm: 0.9999998793160201, iteration: 4163
loss: 1.5482158660888672,grad_norm: 0.9999998187422862, iteration: 4164
loss: 1.4441274404525757,grad_norm: 0.9999998110753558, iteration: 4165
loss: 1.6061776876449585,grad_norm: 0.999999792281175, iteration: 4166
loss: 1.4952231645584106,grad_norm: 0.9999997992518844, iteration: 4167
loss: 1.6073873043060303,grad_norm: 0.9999998953095246, iteration: 4168
loss: 1.5129836797714233,grad_norm: 0.9999998292740081, iteration: 4169
loss: 1.5328112840652466,grad_norm: 0.9999998082359582, iteration: 4170
loss: 1.5847223997116089,grad_norm: 0.9999998875780097, iteration: 4171
loss: 1.6015969514846802,grad_norm: 0.9999998376994459, iteration: 4172
loss: 1.3348711729049683,grad_norm: 0.999999734195348, iteration: 4173
loss: 1.4983124732971191,grad_norm: 0.9999997640024701, iteration: 4174
loss: 1.64701247215271,grad_norm: 0.9999998402981075, iteration: 4175
loss: 1.660541296005249,grad_norm: 0.9999998193593052, iteration: 4176
loss: 1.5802295207977295,grad_norm: 0.9999997665292648, iteration: 4177
loss: 1.5639265775680542,grad_norm: 0.9999998087709554, iteration: 4178
loss: 1.4349461793899536,grad_norm: 0.9999997455353891, iteration: 4179
loss: 1.6839288473129272,grad_norm: 0.9999997630929874, iteration: 4180
loss: 1.7185592651367188,grad_norm: 0.9999997522266666, iteration: 4181
loss: 1.5359177589416504,grad_norm: 0.9999998338189385, iteration: 4182
loss: 1.539367437362671,grad_norm: 0.9999997582233036, iteration: 4183
loss: 1.565400242805481,grad_norm: 0.9999997459970319, iteration: 4184
loss: 1.5636577606201172,grad_norm: 0.9999998084422914, iteration: 4185
loss: 1.7365057468414307,grad_norm: 0.9999997747728953, iteration: 4186
loss: 1.6843737363815308,grad_norm: 0.9999998458505414, iteration: 4187
loss: 1.6360061168670654,grad_norm: 0.9999997208879831, iteration: 4188
loss: 1.5329798460006714,grad_norm: 0.9999998545179805, iteration: 4189
loss: 1.5239847898483276,grad_norm: 0.9999997471136176, iteration: 4190
loss: 1.5785129070281982,grad_norm: 0.9999998254743689, iteration: 4191
loss: 1.5480197668075562,grad_norm: 0.9999998337486165, iteration: 4192
loss: 1.4608420133590698,grad_norm: 0.999999811712603, iteration: 4193
loss: 1.6167583465576172,grad_norm: 0.9999998487900918, iteration: 4194
loss: 1.5619093179702759,grad_norm: 0.9999998968380214, iteration: 4195
loss: 1.7180187702178955,grad_norm: 0.9999998486999231, iteration: 4196
loss: 1.720015525817871,grad_norm: 0.9999998185708899, iteration: 4197
loss: 1.6246248483657837,grad_norm: 0.9999998647765443, iteration: 4198
loss: 1.593570590019226,grad_norm: 0.9999997853759333, iteration: 4199
loss: 1.5425931215286255,grad_norm: 0.9999998220504714, iteration: 4200
loss: 1.5855610370635986,grad_norm: 0.9999998505159216, iteration: 4201
loss: 1.5860352516174316,grad_norm: 0.9999997756800423, iteration: 4202
loss: 1.5867726802825928,grad_norm: 0.9999998032399825, iteration: 4203
loss: 1.5799251794815063,grad_norm: 0.999999800152758, iteration: 4204
loss: 1.3878400325775146,grad_norm: 0.9999998218224811, iteration: 4205
loss: 1.5827335119247437,grad_norm: 0.9999997377923434, iteration: 4206
loss: 1.677008032798767,grad_norm: 0.9999998126355589, iteration: 4207
loss: 1.4418416023254395,grad_norm: 0.9999998135527868, iteration: 4208
loss: 1.5981733798980713,grad_norm: 0.9999998272657703, iteration: 4209
loss: 1.5519087314605713,grad_norm: 0.9999997951218863, iteration: 4210
loss: 1.6270911693572998,grad_norm: 0.9999998145451341, iteration: 4211
loss: 1.5417293310165405,grad_norm: 0.9999997745640077, iteration: 4212
loss: 1.4697954654693604,grad_norm: 0.9999998355362075, iteration: 4213
loss: 1.6369541883468628,grad_norm: 0.9999997264984735, iteration: 4214
loss: 1.640309453010559,grad_norm: 0.9999999462563459, iteration: 4215
loss: 1.7258515357971191,grad_norm: 0.9999998197843707, iteration: 4216
loss: 1.5893042087554932,grad_norm: 0.9999998761075425, iteration: 4217
loss: 1.662936806678772,grad_norm: 0.9999998674721894, iteration: 4218
loss: 1.6157011985778809,grad_norm: 0.9999998416052867, iteration: 4219
loss: 1.681307315826416,grad_norm: 0.9999998068397687, iteration: 4220
loss: 1.536308765411377,grad_norm: 0.9999997824162734, iteration: 4221
loss: 1.5378047227859497,grad_norm: 0.999999872455047, iteration: 4222
loss: 1.5816961526870728,grad_norm: 0.9999998944519303, iteration: 4223
loss: 1.6377822160720825,grad_norm: 0.9999997921551642, iteration: 4224
loss: 1.408443570137024,grad_norm: 0.9999997870917068, iteration: 4225
loss: 1.5664604902267456,grad_norm: 0.9999997325635998, iteration: 4226
loss: 1.59107506275177,grad_norm: 0.9999997899103634, iteration: 4227
loss: 1.4040944576263428,grad_norm: 0.999999836559552, iteration: 4228
loss: 1.50133216381073,grad_norm: 0.9999998219569912, iteration: 4229
loss: 1.521979808807373,grad_norm: 0.9999998545690618, iteration: 4230
loss: 1.6041699647903442,grad_norm: 0.999999879120812, iteration: 4231
loss: 1.6925137042999268,grad_norm: 0.9999997491172817, iteration: 4232
loss: 1.6158045530319214,grad_norm: 0.9999998030872517, iteration: 4233
loss: 1.4400473833084106,grad_norm: 0.9999997355055815, iteration: 4234
loss: 1.6343573331832886,grad_norm: 0.9999998519185757, iteration: 4235
loss: 1.5183695554733276,grad_norm: 0.9999998309098426, iteration: 4236
loss: 1.5035227537155151,grad_norm: 0.9999997924340176, iteration: 4237
loss: 1.6753028631210327,grad_norm: 0.999999745661801, iteration: 4238
loss: 1.569187045097351,grad_norm: 0.9999997367846669, iteration: 4239
loss: 1.5890388488769531,grad_norm: 0.9999998010584268, iteration: 4240
loss: 1.5243839025497437,grad_norm: 0.9999997925194964, iteration: 4241
loss: 1.5204973220825195,grad_norm: 0.9999998522204058, iteration: 4242
loss: 1.5634390115737915,grad_norm: 0.9999998561257848, iteration: 4243
loss: 1.6214724779129028,grad_norm: 0.9999997348752976, iteration: 4244
loss: 1.6385974884033203,grad_norm: 0.9999998845689309, iteration: 4245
loss: 1.5886342525482178,grad_norm: 0.9999999008923027, iteration: 4246
loss: 1.6088628768920898,grad_norm: 0.9999998502263675, iteration: 4247
loss: 1.5224868059158325,grad_norm: 0.9999997580371746, iteration: 4248
loss: 1.4309098720550537,grad_norm: 0.9999998188743159, iteration: 4249
loss: 1.5788583755493164,grad_norm: 0.9999998042733936, iteration: 4250
loss: 1.5688385963439941,grad_norm: 0.999999792867783, iteration: 4251
loss: 1.5533467531204224,grad_norm: 0.9999998062813548, iteration: 4252
loss: 1.518537998199463,grad_norm: 0.9999997939056164, iteration: 4253
loss: 1.5268685817718506,grad_norm: 0.9999997391454446, iteration: 4254
loss: 1.6320356130599976,grad_norm: 0.9999998188340462, iteration: 4255
loss: 1.5546605587005615,grad_norm: 0.9999998396813565, iteration: 4256
loss: 1.7134885787963867,grad_norm: 0.9999998674144629, iteration: 4257
loss: 1.5887715816497803,grad_norm: 0.9999998585431956, iteration: 4258
loss: 1.5012164115905762,grad_norm: 0.999999737334933, iteration: 4259
loss: 1.5930336713790894,grad_norm: 0.9999997587095748, iteration: 4260
loss: 1.4569813013076782,grad_norm: 0.9999998014168147, iteration: 4261
loss: 1.6603742837905884,grad_norm: 0.9999998050955361, iteration: 4262
loss: 1.5882643461227417,grad_norm: 0.9999997787905831, iteration: 4263
loss: 1.5197875499725342,grad_norm: 0.9999998446484852, iteration: 4264
loss: 1.5470013618469238,grad_norm: 0.999999785378465, iteration: 4265
loss: 1.5725449323654175,grad_norm: 0.9999998112387862, iteration: 4266
loss: 1.7660163640975952,grad_norm: 0.9999997984475533, iteration: 4267
loss: 1.666876196861267,grad_norm: 0.9999997674970693, iteration: 4268
loss: 1.5558723211288452,grad_norm: 0.999999829039471, iteration: 4269
loss: 1.656764268875122,grad_norm: 0.999999848746979, iteration: 4270
loss: 1.4929183721542358,grad_norm: 0.9999998082804576, iteration: 4271
loss: 1.5904983282089233,grad_norm: 0.9999998012764214, iteration: 4272
loss: 1.5389389991760254,grad_norm: 0.9999998713163084, iteration: 4273
loss: 1.4627788066864014,grad_norm: 0.9999998185362025, iteration: 4274
loss: 1.6617541313171387,grad_norm: 0.9999998989686628, iteration: 4275
loss: 1.496769905090332,grad_norm: 0.9999997614031817, iteration: 4276
loss: 1.564105749130249,grad_norm: 0.999999808049971, iteration: 4277
loss: 1.5411417484283447,grad_norm: 0.9999998522655706, iteration: 4278
loss: 1.51332688331604,grad_norm: 0.9999997588906716, iteration: 4279
loss: 1.4237439632415771,grad_norm: 0.9999998089773764, iteration: 4280
loss: 1.6442209482192993,grad_norm: 0.9999998019306359, iteration: 4281
loss: 1.590860366821289,grad_norm: 0.9999997764999176, iteration: 4282
loss: 1.5872644186019897,grad_norm: 0.9999998061421772, iteration: 4283
loss: 1.5433114767074585,grad_norm: 0.9999998086703514, iteration: 4284
loss: 1.6181503534317017,grad_norm: 0.9999998063620789, iteration: 4285
loss: 1.353506326675415,grad_norm: 0.999999797386174, iteration: 4286
loss: 1.498050332069397,grad_norm: 0.9999998185157358, iteration: 4287
loss: 1.5046095848083496,grad_norm: 0.9999997874342323, iteration: 4288
loss: 1.65416419506073,grad_norm: 0.9999998759607117, iteration: 4289
loss: 1.6883726119995117,grad_norm: 0.9999998997310834, iteration: 4290
loss: 1.5014572143554688,grad_norm: 0.9999997927866768, iteration: 4291
loss: 1.4450674057006836,grad_norm: 0.9999998198746776, iteration: 4292
loss: 1.478405237197876,grad_norm: 0.9999998539094207, iteration: 4293
loss: 1.5909242630004883,grad_norm: 0.9999998865693092, iteration: 4294
loss: 1.5658774375915527,grad_norm: 0.9999998556355352, iteration: 4295
loss: 1.6375670433044434,grad_norm: 0.9999998031922879, iteration: 4296
loss: 1.5778714418411255,grad_norm: 0.9999999212574698, iteration: 4297
loss: 1.4918209314346313,grad_norm: 0.9999998352043895, iteration: 4298
loss: 1.7007668018341064,grad_norm: 0.9999998534951369, iteration: 4299
loss: 1.4704214334487915,grad_norm: 0.9999998891990609, iteration: 4300
loss: 1.4296760559082031,grad_norm: 0.9999997083166984, iteration: 4301
loss: 1.5970534086227417,grad_norm: 0.9999997034253167, iteration: 4302
loss: 1.533649206161499,grad_norm: 0.9999997230881509, iteration: 4303
loss: 1.598665475845337,grad_norm: 0.9999998785064664, iteration: 4304
loss: 1.3870420455932617,grad_norm: 0.9999998039941272, iteration: 4305
loss: 1.6263483762741089,grad_norm: 0.9999997718105937, iteration: 4306
loss: 1.6641608476638794,grad_norm: 0.9999997498498793, iteration: 4307
loss: 1.5133520364761353,grad_norm: 0.999999807161846, iteration: 4308
loss: 1.557963252067566,grad_norm: 0.9999997882458643, iteration: 4309
loss: 1.6221883296966553,grad_norm: 0.9999998409787922, iteration: 4310
loss: 1.5811715126037598,grad_norm: 0.999999816190216, iteration: 4311
loss: 1.4903745651245117,grad_norm: 0.9999997821050639, iteration: 4312
loss: 1.5953369140625,grad_norm: 0.9999998739254304, iteration: 4313
loss: 1.5563502311706543,grad_norm: 0.9999998113749083, iteration: 4314
loss: 1.4300224781036377,grad_norm: 0.9999998786977646, iteration: 4315
loss: 1.6135907173156738,grad_norm: 0.9999998452283797, iteration: 4316
loss: 1.5117403268814087,grad_norm: 0.9999999055740588, iteration: 4317
loss: 1.4658218622207642,grad_norm: 0.9999997925208511, iteration: 4318
loss: 1.6169767379760742,grad_norm: 0.9999998584804627, iteration: 4319
loss: 1.3351869583129883,grad_norm: 0.9999998019129913, iteration: 4320
loss: 1.6509311199188232,grad_norm: 0.9999998256974079, iteration: 4321
loss: 1.6354238986968994,grad_norm: 0.9999998399159983, iteration: 4322
loss: 1.5911515951156616,grad_norm: 0.9999997548562801, iteration: 4323
loss: 1.5077697038650513,grad_norm: 0.9999997700432947, iteration: 4324
loss: 1.5547994375228882,grad_norm: 0.9999998221551568, iteration: 4325
loss: 1.5506837368011475,grad_norm: 0.999999817478761, iteration: 4326
loss: 1.5544426441192627,grad_norm: 0.9999998269222772, iteration: 4327
loss: 1.508923888206482,grad_norm: 0.9999998328697891, iteration: 4328
loss: 1.5960594415664673,grad_norm: 0.999999821541829, iteration: 4329
loss: 1.4934581518173218,grad_norm: 0.999999756821654, iteration: 4330
loss: 1.5438464879989624,grad_norm: 0.9999998169789132, iteration: 4331
loss: 1.5962320566177368,grad_norm: 0.9999997756619093, iteration: 4332
loss: 1.4105100631713867,grad_norm: 0.9999997664653675, iteration: 4333
loss: 1.62171471118927,grad_norm: 0.9999997485558851, iteration: 4334
loss: 1.5942912101745605,grad_norm: 0.9999998245551966, iteration: 4335
loss: 1.7512089014053345,grad_norm: 0.9999998359120306, iteration: 4336
loss: 1.6099164485931396,grad_norm: 0.9999998138845481, iteration: 4337
loss: 1.5827995538711548,grad_norm: 0.9999997549872047, iteration: 4338
loss: 1.564841628074646,grad_norm: 0.999999810730363, iteration: 4339
loss: 1.6274652481079102,grad_norm: 0.9999998002594982, iteration: 4340
loss: 1.4261236190795898,grad_norm: 0.9999997783367482, iteration: 4341
loss: 1.5815939903259277,grad_norm: 0.9999998298374401, iteration: 4342
loss: 1.475358486175537,grad_norm: 0.9999997831549539, iteration: 4343
loss: 1.5973670482635498,grad_norm: 0.9999999085703732, iteration: 4344
loss: 1.420143485069275,grad_norm: 0.9999998294732043, iteration: 4345
loss: 1.5176113843917847,grad_norm: 0.9999997499472076, iteration: 4346
loss: 1.7022243738174438,grad_norm: 0.999999870640561, iteration: 4347
loss: 1.4652599096298218,grad_norm: 0.99999986670028, iteration: 4348
loss: 1.5201027393341064,grad_norm: 0.9999997632454852, iteration: 4349
loss: 1.5425068140029907,grad_norm: 0.9999997442854257, iteration: 4350
loss: 1.4883131980895996,grad_norm: 0.9999999143955941, iteration: 4351
loss: 1.4184495210647583,grad_norm: 0.9999998039561057, iteration: 4352
loss: 1.5026609897613525,grad_norm: 0.9999997837080018, iteration: 4353
loss: 1.4758079051971436,grad_norm: 0.9999998783808505, iteration: 4354
loss: 1.6394389867782593,grad_norm: 0.9999998016834157, iteration: 4355
loss: 1.5984100103378296,grad_norm: 0.999999827631707, iteration: 4356
loss: 1.604183554649353,grad_norm: 0.9999997992150313, iteration: 4357
loss: 1.4408602714538574,grad_norm: 0.9999998410603068, iteration: 4358
loss: 1.6416659355163574,grad_norm: 0.9999997832584507, iteration: 4359
loss: 1.6654636859893799,grad_norm: 0.9999998088737039, iteration: 4360
loss: 1.4823126792907715,grad_norm: 0.9999998466155919, iteration: 4361
loss: 1.4449132680892944,grad_norm: 0.9999998789133068, iteration: 4362
loss: 1.4730699062347412,grad_norm: 0.9999998482187951, iteration: 4363
loss: 1.426731824874878,grad_norm: 0.9999998609531061, iteration: 4364
loss: 1.5296969413757324,grad_norm: 0.9999997773247755, iteration: 4365
loss: 1.6179436445236206,grad_norm: 0.999999751746583, iteration: 4366
loss: 1.4448070526123047,grad_norm: 0.9999999273816672, iteration: 4367
loss: 1.5561577081680298,grad_norm: 0.9999998489344338, iteration: 4368
loss: 1.5292264223098755,grad_norm: 0.9999998775942434, iteration: 4369
loss: 1.5022984743118286,grad_norm: 0.9999997547763384, iteration: 4370
loss: 1.5736194849014282,grad_norm: 0.9999998230173975, iteration: 4371
loss: 1.5332156419754028,grad_norm: 0.9999999013229962, iteration: 4372
loss: 1.483254313468933,grad_norm: 0.9999998376997754, iteration: 4373
loss: 1.4459128379821777,grad_norm: 0.9999998450011554, iteration: 4374
loss: 1.70321524143219,grad_norm: 0.9999998281653121, iteration: 4375
loss: 1.6607067584991455,grad_norm: 0.999999772024368, iteration: 4376
loss: 1.510041356086731,grad_norm: 0.9999997990885409, iteration: 4377
loss: 1.4963411092758179,grad_norm: 0.9999997048231425, iteration: 4378
loss: 1.4413596391677856,grad_norm: 0.9999997451049247, iteration: 4379
loss: 1.609203577041626,grad_norm: 0.9999998243617215, iteration: 4380
loss: 1.4948745965957642,grad_norm: 0.9999997748755888, iteration: 4381
loss: 1.5834300518035889,grad_norm: 0.9999998063358739, iteration: 4382
loss: 1.6323599815368652,grad_norm: 0.9999997666790659, iteration: 4383
loss: 1.522369384765625,grad_norm: 0.9999998282914967, iteration: 4384
loss: 1.535408616065979,grad_norm: 0.9999997846401987, iteration: 4385
loss: 1.4286003112792969,grad_norm: 0.9999997472697539, iteration: 4386
loss: 1.4380923509597778,grad_norm: 0.9999997636685695, iteration: 4387
loss: 1.6173326969146729,grad_norm: 0.9999998310087206, iteration: 4388
loss: 1.4714909791946411,grad_norm: 0.9999997630669952, iteration: 4389
loss: 1.556921124458313,grad_norm: 0.9999997699441213, iteration: 4390
loss: 1.643584966659546,grad_norm: 0.9999998431217431, iteration: 4391
loss: 1.5375455617904663,grad_norm: 0.9999998036970618, iteration: 4392
loss: 1.4929516315460205,grad_norm: 0.9999998050269225, iteration: 4393
loss: 1.5697658061981201,grad_norm: 0.9999998233339286, iteration: 4394
loss: 1.4412283897399902,grad_norm: 0.999999861799319, iteration: 4395
loss: 1.426353096961975,grad_norm: 0.9999998094135699, iteration: 4396
loss: 1.473639965057373,grad_norm: 0.9999997654240715, iteration: 4397
loss: 1.4139394760131836,grad_norm: 0.9999997987482709, iteration: 4398
loss: 1.5836985111236572,grad_norm: 0.9999998365118801, iteration: 4399
loss: 1.588570475578308,grad_norm: 0.9999998282784838, iteration: 4400
loss: 1.5066554546356201,grad_norm: 0.9999998323240269, iteration: 4401
loss: 1.4815406799316406,grad_norm: 0.9999998119964884, iteration: 4402
loss: 1.4889715909957886,grad_norm: 0.9999997775312754, iteration: 4403
loss: 1.5370229482650757,grad_norm: 0.9999997179642671, iteration: 4404
loss: 1.4896965026855469,grad_norm: 0.9999997076067696, iteration: 4405
loss: 1.4479761123657227,grad_norm: 0.9999997309061954, iteration: 4406
loss: 1.436893105506897,grad_norm: 0.9999998708858664, iteration: 4407
loss: 1.5109440088272095,grad_norm: 0.9999998249800645, iteration: 4408
loss: 1.5933867692947388,grad_norm: 0.9999998317994134, iteration: 4409
loss: 1.5928776264190674,grad_norm: 0.999999783085683, iteration: 4410
loss: 1.446616291999817,grad_norm: 0.9999997959036213, iteration: 4411
loss: 1.6669273376464844,grad_norm: 0.9999998502391869, iteration: 4412
loss: 1.5832173824310303,grad_norm: 0.999999724224232, iteration: 4413
loss: 1.6874268054962158,grad_norm: 0.9999997426773328, iteration: 4414
loss: 1.6077485084533691,grad_norm: 0.9999998091952504, iteration: 4415
loss: 1.6180802583694458,grad_norm: 0.9999997684508357, iteration: 4416
loss: 1.6427477598190308,grad_norm: 0.9999998763316889, iteration: 4417
loss: 1.5235992670059204,grad_norm: 0.9999998459177591, iteration: 4418
loss: 1.4478504657745361,grad_norm: 0.9999998360339263, iteration: 4419
loss: 1.609958291053772,grad_norm: 0.9999998189376166, iteration: 4420
loss: 1.5441458225250244,grad_norm: 0.9999997463168364, iteration: 4421
loss: 1.4187601804733276,grad_norm: 0.9999997722559945, iteration: 4422
loss: 1.5826514959335327,grad_norm: 0.9999997797593256, iteration: 4423
loss: 1.5953185558319092,grad_norm: 0.9999998087345995, iteration: 4424
loss: 1.510406494140625,grad_norm: 0.9999998291092966, iteration: 4425
loss: 1.4594790935516357,grad_norm: 0.9999998525420055, iteration: 4426
loss: 1.458954095840454,grad_norm: 0.9999998339493678, iteration: 4427
loss: 1.493937611579895,grad_norm: 0.9999998700918366, iteration: 4428
loss: 1.4499151706695557,grad_norm: 0.9999998250218239, iteration: 4429
loss: 1.5510069131851196,grad_norm: 0.9999998717192391, iteration: 4430
loss: 1.4910966157913208,grad_norm: 0.9999997503088818, iteration: 4431
loss: 1.360779047012329,grad_norm: 0.9999998321456761, iteration: 4432
loss: 1.489363193511963,grad_norm: 0.9999997932095862, iteration: 4433
loss: 1.5073050260543823,grad_norm: 0.9999998515325391, iteration: 4434
loss: 1.3924156427383423,grad_norm: 0.9999997977210853, iteration: 4435
loss: 1.5178793668746948,grad_norm: 0.9999998283704402, iteration: 4436
loss: 1.4959927797317505,grad_norm: 0.9999997637404184, iteration: 4437
loss: 1.5834338665008545,grad_norm: 0.9999997018002088, iteration: 4438
loss: 1.4258695840835571,grad_norm: 0.9999998244529732, iteration: 4439
loss: 1.5121514797210693,grad_norm: 0.9999997925587565, iteration: 4440
loss: 1.3434427976608276,grad_norm: 0.9999998294608635, iteration: 4441
loss: 1.542162537574768,grad_norm: 0.999999723097347, iteration: 4442
loss: 1.566663384437561,grad_norm: 0.9999998080422278, iteration: 4443
loss: 1.5054265260696411,grad_norm: 0.9999998405609268, iteration: 4444
loss: 1.5035951137542725,grad_norm: 0.9999998039375766, iteration: 4445
loss: 1.561267375946045,grad_norm: 0.9999998104660643, iteration: 4446
loss: 1.5923539400100708,grad_norm: 0.9999998513801318, iteration: 4447
loss: 1.448892593383789,grad_norm: 0.9999997739182951, iteration: 4448
loss: 1.5805799961090088,grad_norm: 0.9999998538710353, iteration: 4449
loss: 1.5875133275985718,grad_norm: 0.9999998123310105, iteration: 4450
loss: 1.3526479005813599,grad_norm: 0.9999998684179404, iteration: 4451
loss: 1.501803994178772,grad_norm: 0.9999997969186454, iteration: 4452
loss: 1.5552176237106323,grad_norm: 0.9999997649305664, iteration: 4453
loss: 1.531502366065979,grad_norm: 0.9999998268408619, iteration: 4454
loss: 1.4169317483901978,grad_norm: 0.9999998199505603, iteration: 4455
loss: 1.4314041137695312,grad_norm: 0.9999998171119042, iteration: 4456
loss: 1.2463809251785278,grad_norm: 0.9999998288181723, iteration: 4457
loss: 1.6314765214920044,grad_norm: 0.9999998544255274, iteration: 4458
loss: 1.4424513578414917,grad_norm: 0.9999998912091537, iteration: 4459
loss: 1.470690131187439,grad_norm: 0.9999998674977075, iteration: 4460
loss: 1.5033107995986938,grad_norm: 0.9999998672641368, iteration: 4461
loss: 1.4179095029830933,grad_norm: 0.9999997927728759, iteration: 4462
loss: 1.6487720012664795,grad_norm: 0.9999997736354246, iteration: 4463
loss: 1.4920017719268799,grad_norm: 0.9999997588641845, iteration: 4464
loss: 1.443922758102417,grad_norm: 0.9999997961483369, iteration: 4465
loss: 1.5367239713668823,grad_norm: 0.9999998727275802, iteration: 4466
loss: 1.4775893688201904,grad_norm: 0.9999997138167966, iteration: 4467
loss: 1.4159170389175415,grad_norm: 0.9999998310320569, iteration: 4468
loss: 1.501267671585083,grad_norm: 0.9999997798442501, iteration: 4469
loss: 1.5133521556854248,grad_norm: 0.9999998324792181, iteration: 4470
loss: 1.5488778352737427,grad_norm: 0.9999997808779334, iteration: 4471
loss: 1.4514580965042114,grad_norm: 0.9999998087588019, iteration: 4472
loss: 1.5170718431472778,grad_norm: 0.9999998009501707, iteration: 4473
loss: 1.5079048871994019,grad_norm: 0.9999997176302494, iteration: 4474
loss: 1.3874629735946655,grad_norm: 0.9999998151006416, iteration: 4475
loss: 1.434714674949646,grad_norm: 0.9999998333492681, iteration: 4476
loss: 1.348251461982727,grad_norm: 0.9999997955686554, iteration: 4477
loss: 1.7198563814163208,grad_norm: 0.9999999310250652, iteration: 4478
loss: 1.5002549886703491,grad_norm: 0.9999998348178076, iteration: 4479
loss: 1.5469154119491577,grad_norm: 0.9999998080702572, iteration: 4480
loss: 1.4670366048812866,grad_norm: 0.9999997859031358, iteration: 4481
loss: 1.422964096069336,grad_norm: 0.9999997659273541, iteration: 4482
loss: 1.518315076828003,grad_norm: 0.9999998404910678, iteration: 4483
loss: 1.5161901712417603,grad_norm: 0.99999977102859, iteration: 4484
loss: 1.3055368661880493,grad_norm: 0.9999998090593164, iteration: 4485
loss: 1.4539698362350464,grad_norm: 0.999999790476969, iteration: 4486
loss: 1.6792963743209839,grad_norm: 0.999999775854795, iteration: 4487
loss: 1.4858875274658203,grad_norm: 0.9999997695922355, iteration: 4488
loss: 1.5530049800872803,grad_norm: 0.9999997806036259, iteration: 4489
loss: 1.4289758205413818,grad_norm: 0.9999997906859619, iteration: 4490
loss: 1.4060142040252686,grad_norm: 0.999999761794519, iteration: 4491
loss: 1.5057891607284546,grad_norm: 0.999999852898827, iteration: 4492
loss: 1.5296452045440674,grad_norm: 0.9999998077103215, iteration: 4493
loss: 1.482964038848877,grad_norm: 0.9999998468805845, iteration: 4494
loss: 1.648257851600647,grad_norm: 0.9999998520772422, iteration: 4495
loss: 1.3198939561843872,grad_norm: 0.9999998276226603, iteration: 4496
loss: 1.619309663772583,grad_norm: 0.9999998647853967, iteration: 4497
loss: 1.5869718790054321,grad_norm: 0.9999997636639888, iteration: 4498
loss: 1.479470133781433,grad_norm: 0.9999999211348513, iteration: 4499
loss: 1.4696952104568481,grad_norm: 0.9999997596909095, iteration: 4500
loss: 1.4337832927703857,grad_norm: 0.999999833117901, iteration: 4501
loss: 1.5991109609603882,grad_norm: 0.9999998750574842, iteration: 4502
loss: 1.5011682510375977,grad_norm: 0.9999999378949602, iteration: 4503
loss: 1.5287892818450928,grad_norm: 0.9999997711415618, iteration: 4504
loss: 1.372706413269043,grad_norm: 0.9999997936830609, iteration: 4505
loss: 1.471098780632019,grad_norm: 0.9999998468649366, iteration: 4506
loss: 1.3088953495025635,grad_norm: 0.9999997195262432, iteration: 4507
loss: 1.412647008895874,grad_norm: 0.9999998308084185, iteration: 4508
loss: 1.4543147087097168,grad_norm: 0.9999998218698462, iteration: 4509
loss: 1.714129090309143,grad_norm: 0.9999998483761299, iteration: 4510
loss: 1.462477445602417,grad_norm: 0.9999997654729925, iteration: 4511
loss: 1.5531127452850342,grad_norm: 0.9999998375009477, iteration: 4512
loss: 1.3446754217147827,grad_norm: 0.9999998911508302, iteration: 4513
loss: 1.431136965751648,grad_norm: 0.99999981311294, iteration: 4514
loss: 1.3572170734405518,grad_norm: 0.9999998236424311, iteration: 4515
loss: 1.4309372901916504,grad_norm: 0.9999997860119431, iteration: 4516
loss: 1.5055381059646606,grad_norm: 0.9999997890636756, iteration: 4517
loss: 1.450781226158142,grad_norm: 0.9999998305006237, iteration: 4518
loss: 1.581851840019226,grad_norm: 0.9999998783747792, iteration: 4519
loss: 1.3880376815795898,grad_norm: 0.9999998144670755, iteration: 4520
loss: 1.544412612915039,grad_norm: 0.9999998307981741, iteration: 4521
loss: 1.390278935432434,grad_norm: 0.9999997916473918, iteration: 4522
loss: 1.4485224485397339,grad_norm: 0.9999997941456048, iteration: 4523
loss: 1.4489301443099976,grad_norm: 0.9999997327907548, iteration: 4524
loss: 1.5717636346817017,grad_norm: 0.9999998795398739, iteration: 4525
loss: 1.4254062175750732,grad_norm: 0.9999998017757574, iteration: 4526
loss: 1.4499293565750122,grad_norm: 0.999999795717469, iteration: 4527
loss: 1.3239808082580566,grad_norm: 0.9999997786994548, iteration: 4528
loss: 1.3898543119430542,grad_norm: 0.9999997835417745, iteration: 4529
loss: 1.3952406644821167,grad_norm: 0.9999998955971314, iteration: 4530
loss: 1.621939778327942,grad_norm: 0.9999997953373863, iteration: 4531
loss: 1.5483927726745605,grad_norm: 0.9999997981126668, iteration: 4532
loss: 1.48871910572052,grad_norm: 0.9999997858146966, iteration: 4533
loss: 1.5014241933822632,grad_norm: 0.9999998113860918, iteration: 4534
loss: 1.621079444885254,grad_norm: 0.9999998429841509, iteration: 4535
loss: 1.399330496788025,grad_norm: 0.9999998490648309, iteration: 4536
loss: 1.5419813394546509,grad_norm: 0.9999998276223004, iteration: 4537
loss: 1.681488037109375,grad_norm: 0.9999997896967826, iteration: 4538
loss: 1.5381251573562622,grad_norm: 0.9999998517362305, iteration: 4539
loss: 1.4604308605194092,grad_norm: 0.9999998901489167, iteration: 4540
loss: 1.45912766456604,grad_norm: 0.9999998289068629, iteration: 4541
loss: 1.4960451126098633,grad_norm: 0.999999805607609, iteration: 4542
loss: 1.5430670976638794,grad_norm: 0.9999998214407213, iteration: 4543
loss: 1.4815959930419922,grad_norm: 0.9999997840658785, iteration: 4544
loss: 1.4639792442321777,grad_norm: 0.9999997773912468, iteration: 4545
loss: 1.458603024482727,grad_norm: 0.9999997332986417, iteration: 4546
loss: 1.428324818611145,grad_norm: 0.9999998938798298, iteration: 4547
loss: 1.5505812168121338,grad_norm: 0.9999997802045384, iteration: 4548
loss: 1.4516633749008179,grad_norm: 0.9999998812703693, iteration: 4549
loss: 1.566766381263733,grad_norm: 0.9999998709254155, iteration: 4550
loss: 1.4991835355758667,grad_norm: 0.9999998674974244, iteration: 4551
loss: 1.473756194114685,grad_norm: 0.9999997371416967, iteration: 4552
loss: 1.3434284925460815,grad_norm: 0.9999997759840489, iteration: 4553
loss: 1.4020018577575684,grad_norm: 0.9999998286135598, iteration: 4554
loss: 1.5338842868804932,grad_norm: 0.9999998779310894, iteration: 4555
loss: 1.4173444509506226,grad_norm: 0.9999997890719869, iteration: 4556
loss: 1.3172471523284912,grad_norm: 0.9999997573369185, iteration: 4557
loss: 1.5241951942443848,grad_norm: 0.9999997874518672, iteration: 4558
loss: 1.40940260887146,grad_norm: 0.999999893414877, iteration: 4559
loss: 1.3632912635803223,grad_norm: 0.9999997976348903, iteration: 4560
loss: 1.4787267446517944,grad_norm: 0.9999997741346006, iteration: 4561
loss: 1.3355093002319336,grad_norm: 0.9999997634134841, iteration: 4562
loss: 1.4502867460250854,grad_norm: 0.9999998760070901, iteration: 4563
loss: 1.5402231216430664,grad_norm: 0.9999998022112995, iteration: 4564
loss: 1.495856523513794,grad_norm: 0.9999998014984841, iteration: 4565
loss: 1.5424168109893799,grad_norm: 0.9999997701823343, iteration: 4566
loss: 1.5502201318740845,grad_norm: 0.9999997916964566, iteration: 4567
loss: 1.3582535982131958,grad_norm: 0.9999998147781094, iteration: 4568
loss: 1.4910141229629517,grad_norm: 0.9999997917627726, iteration: 4569
loss: 1.4732160568237305,grad_norm: 0.9999998148527128, iteration: 4570
loss: 1.4787404537200928,grad_norm: 0.9999998006234846, iteration: 4571
loss: 1.5160359144210815,grad_norm: 0.9999998325002839, iteration: 4572
loss: 1.3865869045257568,grad_norm: 0.9999998264712714, iteration: 4573
loss: 1.3140727281570435,grad_norm: 0.9999998123703906, iteration: 4574
loss: 1.4409066438674927,grad_norm: 0.999999802829317, iteration: 4575
loss: 1.3879764080047607,grad_norm: 0.9999997969663547, iteration: 4576
loss: 1.5132275819778442,grad_norm: 0.9999998641555452, iteration: 4577
loss: 1.4433786869049072,grad_norm: 0.9999998707754274, iteration: 4578
loss: 1.6338086128234863,grad_norm: 0.9999998418142325, iteration: 4579
loss: 1.3688064813613892,grad_norm: 0.9999997703644953, iteration: 4580
loss: 1.3195666074752808,grad_norm: 0.9999997269146575, iteration: 4581
loss: 1.5127936601638794,grad_norm: 0.9999998623011632, iteration: 4582
loss: 1.3900214433670044,grad_norm: 0.9999998245247985, iteration: 4583
loss: 1.404509425163269,grad_norm: 0.9999997130363583, iteration: 4584
loss: 1.6760718822479248,grad_norm: 0.9999998139109398, iteration: 4585
loss: 1.5463569164276123,grad_norm: 0.9999997594064796, iteration: 4586
loss: 1.3242584466934204,grad_norm: 0.9999997975914754, iteration: 4587
loss: 1.4716392755508423,grad_norm: 0.9999998253164627, iteration: 4588
loss: 1.6385003328323364,grad_norm: 0.9999998889231451, iteration: 4589
loss: 1.5174070596694946,grad_norm: 0.9999998614237133, iteration: 4590
loss: 1.5246820449829102,grad_norm: 0.999999858769149, iteration: 4591
loss: 1.517338514328003,grad_norm: 0.9999998262351246, iteration: 4592
loss: 1.4098803997039795,grad_norm: 0.9999997633141664, iteration: 4593
loss: 1.5594171285629272,grad_norm: 0.9999998484664392, iteration: 4594
loss: 1.3583245277404785,grad_norm: 0.9999998044105011, iteration: 4595
loss: 1.4719957113265991,grad_norm: 0.9999998041124419, iteration: 4596
loss: 1.3596820831298828,grad_norm: 0.9999998602966457, iteration: 4597
loss: 1.3741233348846436,grad_norm: 0.9999997212073978, iteration: 4598
loss: 1.4242618083953857,grad_norm: 0.9999997236081001, iteration: 4599
loss: 1.414400577545166,grad_norm: 0.9999998561892492, iteration: 4600
loss: 1.4134856462478638,grad_norm: 0.9999998333318055, iteration: 4601
loss: 1.4272109270095825,grad_norm: 0.9999998125781214, iteration: 4602
loss: 1.5488594770431519,grad_norm: 0.9999998263870366, iteration: 4603
loss: 1.4217020273208618,grad_norm: 0.9999997452261022, iteration: 4604
loss: 1.3727717399597168,grad_norm: 0.9999998054664196, iteration: 4605
loss: 1.6521676778793335,grad_norm: 0.9999998737799234, iteration: 4606
loss: 1.4559335708618164,grad_norm: 0.9999998660775082, iteration: 4607
loss: 1.429760456085205,grad_norm: 0.9999997938966261, iteration: 4608
loss: 1.482844591140747,grad_norm: 0.9999998248219668, iteration: 4609
loss: 1.5141868591308594,grad_norm: 0.999999774736301, iteration: 4610
loss: 1.4205505847930908,grad_norm: 0.9999998755840914, iteration: 4611
loss: 1.5273901224136353,grad_norm: 0.9999998592655589, iteration: 4612
loss: 1.497872233390808,grad_norm: 0.9999998487105479, iteration: 4613
loss: 1.3471648693084717,grad_norm: 0.9999997437821934, iteration: 4614
loss: 1.5121108293533325,grad_norm: 0.9999997783206667, iteration: 4615
loss: 1.5539039373397827,grad_norm: 0.9999998911963457, iteration: 4616
loss: 1.3905283212661743,grad_norm: 0.9999997842913065, iteration: 4617
loss: 1.373117208480835,grad_norm: 0.9999998719319265, iteration: 4618
loss: 1.4956551790237427,grad_norm: 0.9999998677435732, iteration: 4619
loss: 1.3213118314743042,grad_norm: 0.9999997185556981, iteration: 4620
loss: 1.3132063150405884,grad_norm: 0.999999791871267, iteration: 4621
loss: 1.5392805337905884,grad_norm: 0.9999998468769645, iteration: 4622
loss: 1.5322589874267578,grad_norm: 0.9999998236696402, iteration: 4623
loss: 1.3989503383636475,grad_norm: 0.9999997617751956, iteration: 4624
loss: 1.4777981042861938,grad_norm: 0.9999997818444107, iteration: 4625
loss: 1.4174503087997437,grad_norm: 0.99999985824262, iteration: 4626
loss: 1.5303608179092407,grad_norm: 0.9999998976788033, iteration: 4627
loss: 1.5033613443374634,grad_norm: 0.9999998053359688, iteration: 4628
loss: 1.4462928771972656,grad_norm: 0.9999997827305714, iteration: 4629
loss: 1.4749077558517456,grad_norm: 0.9999998251344727, iteration: 4630
loss: 1.2864266633987427,grad_norm: 0.9999998246260746, iteration: 4631
loss: 1.3226040601730347,grad_norm: 0.999999788031689, iteration: 4632
loss: 1.67816162109375,grad_norm: 0.9999998789368332, iteration: 4633
loss: 1.4314098358154297,grad_norm: 0.999999867747817, iteration: 4634
loss: 1.4874706268310547,grad_norm: 0.9999998297225868, iteration: 4635
loss: 1.5079175233840942,grad_norm: 0.9999998370729819, iteration: 4636
loss: 1.5013805627822876,grad_norm: 0.9999998439679745, iteration: 4637
loss: 1.598236083984375,grad_norm: 0.9999998022968518, iteration: 4638
loss: 1.292853832244873,grad_norm: 0.9999998134236541, iteration: 4639
loss: 1.5041735172271729,grad_norm: 0.9999998506263392, iteration: 4640
loss: 1.3941400051116943,grad_norm: 0.9999996745244122, iteration: 4641
loss: 1.531625509262085,grad_norm: 0.999999833525034, iteration: 4642
loss: 1.4385442733764648,grad_norm: 0.9999997455902991, iteration: 4643
loss: 1.4872241020202637,grad_norm: 0.9999997015278745, iteration: 4644
loss: 1.6016439199447632,grad_norm: 0.9999998030936409, iteration: 4645
loss: 1.4384171962738037,grad_norm: 0.9999997542921444, iteration: 4646
loss: 1.5303574800491333,grad_norm: 0.9999997957285056, iteration: 4647
loss: 1.6083194017410278,grad_norm: 0.9999997975527946, iteration: 4648
loss: 1.5071122646331787,grad_norm: 0.9999998392746061, iteration: 4649
loss: 1.5528926849365234,grad_norm: 0.9999998583416243, iteration: 4650
loss: 1.540299415588379,grad_norm: 0.9999998185191872, iteration: 4651
loss: 1.4640655517578125,grad_norm: 0.9999998230524052, iteration: 4652
loss: 1.4702821969985962,grad_norm: 0.999999810495633, iteration: 4653
loss: 1.4336944818496704,grad_norm: 0.9999998818937038, iteration: 4654
loss: 1.3997660875320435,grad_norm: 0.9999997581125297, iteration: 4655
loss: 1.3504217863082886,grad_norm: 0.9999998644294427, iteration: 4656
loss: 1.4926764965057373,grad_norm: 0.9999997942674388, iteration: 4657
loss: 1.4476228952407837,grad_norm: 0.9999998345953758, iteration: 4658
loss: 1.4502644538879395,grad_norm: 0.9999997611305506, iteration: 4659
loss: 1.5581573247909546,grad_norm: 0.9999997974555814, iteration: 4660
loss: 1.4562244415283203,grad_norm: 0.9999998408988022, iteration: 4661
loss: 1.3253471851348877,grad_norm: 0.9999997377899094, iteration: 4662
loss: 1.4032721519470215,grad_norm: 0.9999998936551928, iteration: 4663
loss: 1.5175161361694336,grad_norm: 0.9999998548001948, iteration: 4664
loss: 1.4955650568008423,grad_norm: 0.9999998363789059, iteration: 4665
loss: 1.3667497634887695,grad_norm: 0.9999998748697757, iteration: 4666
loss: 1.4251843690872192,grad_norm: 0.9999999183098004, iteration: 4667
loss: 1.400209903717041,grad_norm: 0.9999997756635816, iteration: 4668
loss: 1.5326205492019653,grad_norm: 0.999999817178065, iteration: 4669
loss: 1.27909517288208,grad_norm: 0.9999998097027412, iteration: 4670
loss: 1.5640339851379395,grad_norm: 0.999999851042092, iteration: 4671
loss: 1.4632116556167603,grad_norm: 0.9999998327370772, iteration: 4672
loss: 1.2598762512207031,grad_norm: 0.999999808508228, iteration: 4673
loss: 1.5411906242370605,grad_norm: 0.9999998647303208, iteration: 4674
loss: 1.4804953336715698,grad_norm: 0.9999998444133388, iteration: 4675
loss: 1.281014323234558,grad_norm: 0.9999997989259344, iteration: 4676
loss: 1.4865591526031494,grad_norm: 0.9999997817717621, iteration: 4677
loss: 1.5156011581420898,grad_norm: 0.9999998526429854, iteration: 4678
loss: 1.607367992401123,grad_norm: 0.9999997909922745, iteration: 4679
loss: 1.49337637424469,grad_norm: 0.9999998323496789, iteration: 4680
loss: 1.4363653659820557,grad_norm: 0.9999998426323747, iteration: 4681
loss: 1.4508845806121826,grad_norm: 0.9999996983479628, iteration: 4682
loss: 1.326995611190796,grad_norm: 0.9999998140736523, iteration: 4683
loss: 1.4331499338150024,grad_norm: 0.9999998223000531, iteration: 4684
loss: 1.4753682613372803,grad_norm: 0.9999998133973595, iteration: 4685
loss: 1.4054750204086304,grad_norm: 0.9999998032480797, iteration: 4686
loss: 1.4745292663574219,grad_norm: 0.9999998413173574, iteration: 4687
loss: 1.435276746749878,grad_norm: 0.9999998520837274, iteration: 4688
loss: 1.440401554107666,grad_norm: 0.999999769142353, iteration: 4689
loss: 1.3289371728897095,grad_norm: 0.9999998633151838, iteration: 4690
loss: 1.5106717348098755,grad_norm: 0.9999998244328007, iteration: 4691
loss: 1.4300057888031006,grad_norm: 0.9999998442555921, iteration: 4692
loss: 1.3842136859893799,grad_norm: 0.9999997684358426, iteration: 4693
loss: 1.375783085823059,grad_norm: 0.9999998246570752, iteration: 4694
loss: 1.461355447769165,grad_norm: 0.9999997823864675, iteration: 4695
loss: 1.4599709510803223,grad_norm: 0.9999997874360014, iteration: 4696
loss: 1.4085804224014282,grad_norm: 0.9999997705775082, iteration: 4697
loss: 1.382448434829712,grad_norm: 0.999999931272414, iteration: 4698
loss: 1.536949872970581,grad_norm: 0.9999998869978555, iteration: 4699
loss: 1.5372380018234253,grad_norm: 0.9999997393373876, iteration: 4700
loss: 1.41732656955719,grad_norm: 0.9999998311141013, iteration: 4701
loss: 1.5131275653839111,grad_norm: 0.9999998780951438, iteration: 4702
loss: 1.416344404220581,grad_norm: 0.999999756609633, iteration: 4703
loss: 1.3945305347442627,grad_norm: 0.9999997236168972, iteration: 4704
loss: 1.4587697982788086,grad_norm: 0.9999997925593718, iteration: 4705
loss: 1.4192047119140625,grad_norm: 0.9999997670309986, iteration: 4706
loss: 1.4004877805709839,grad_norm: 0.9999997631179963, iteration: 4707
loss: 1.4426425695419312,grad_norm: 0.999999703643615, iteration: 4708
loss: 1.4628981351852417,grad_norm: 0.9999998153263269, iteration: 4709
loss: 1.4871689081192017,grad_norm: 0.9999997768399693, iteration: 4710
loss: 1.3908230066299438,grad_norm: 0.9999998065678178, iteration: 4711
loss: 1.4915865659713745,grad_norm: 0.9999998235733684, iteration: 4712
loss: 1.3214895725250244,grad_norm: 0.999999795040425, iteration: 4713
loss: 1.4032878875732422,grad_norm: 0.999999777324968, iteration: 4714
loss: 1.381243348121643,grad_norm: 0.9999997711763317, iteration: 4715
loss: 1.5157901048660278,grad_norm: 0.999999815688959, iteration: 4716
loss: 1.4072327613830566,grad_norm: 0.999999851471769, iteration: 4717
loss: 1.4678422212600708,grad_norm: 0.9999998223348122, iteration: 4718
loss: 1.487479567527771,grad_norm: 0.9999998402190411, iteration: 4719
loss: 1.3588736057281494,grad_norm: 0.9999998783359072, iteration: 4720
loss: 1.3466637134552002,grad_norm: 0.9999998075257441, iteration: 4721
loss: 1.518749713897705,grad_norm: 0.9999998797903038, iteration: 4722
loss: 1.397374153137207,grad_norm: 0.9999998425099067, iteration: 4723
loss: 1.4458754062652588,grad_norm: 0.9999998624402794, iteration: 4724
loss: 1.3686975240707397,grad_norm: 0.9999998665640495, iteration: 4725
loss: 1.5747870206832886,grad_norm: 0.9999998682871498, iteration: 4726
loss: 1.4874073266983032,grad_norm: 0.9999998296135632, iteration: 4727
loss: 1.3631213903427124,grad_norm: 0.9999997981980294, iteration: 4728
loss: 1.3759676218032837,grad_norm: 0.9999997602315287, iteration: 4729
loss: 1.3657891750335693,grad_norm: 0.9999997612187621, iteration: 4730
loss: 1.4049561023712158,grad_norm: 0.9999997717577928, iteration: 4731
loss: 1.4297103881835938,grad_norm: 0.9999998068350592, iteration: 4732
loss: 1.496495008468628,grad_norm: 0.9999998294176636, iteration: 4733
loss: 1.471638560295105,grad_norm: 0.9999997641352043, iteration: 4734
loss: 1.5002497434616089,grad_norm: 0.999999873156095, iteration: 4735
loss: 1.3830976486206055,grad_norm: 0.9999998002512958, iteration: 4736
loss: 1.3925448656082153,grad_norm: 0.9999998156889321, iteration: 4737
loss: 1.3447591066360474,grad_norm: 0.9999998061874479, iteration: 4738
loss: 1.474061131477356,grad_norm: 0.9999998551934245, iteration: 4739
loss: 1.5695313215255737,grad_norm: 0.9999998483489049, iteration: 4740
loss: 1.454785704612732,grad_norm: 0.999999788583987, iteration: 4741
loss: 1.4770071506500244,grad_norm: 0.99999979794209, iteration: 4742
loss: 1.3589421510696411,grad_norm: 0.9999997381577115, iteration: 4743
loss: 1.3901920318603516,grad_norm: 0.999999863722468, iteration: 4744
loss: 1.3628058433532715,grad_norm: 0.9999998885854999, iteration: 4745
loss: 1.362136721611023,grad_norm: 0.9999998445670315, iteration: 4746
loss: 1.3723188638687134,grad_norm: 0.9999998319818266, iteration: 4747
loss: 1.35724937915802,grad_norm: 0.9999997582785669, iteration: 4748
loss: 1.364274024963379,grad_norm: 0.9999998290097287, iteration: 4749
loss: 1.5611422061920166,grad_norm: 0.9999997946947755, iteration: 4750
loss: 1.4301382303237915,grad_norm: 0.9999998847421425, iteration: 4751
loss: 1.4044948816299438,grad_norm: 0.9999997938903508, iteration: 4752
loss: 1.3713239431381226,grad_norm: 0.9999998278276536, iteration: 4753
loss: 1.4627127647399902,grad_norm: 0.9999997540631335, iteration: 4754
loss: 1.4423582553863525,grad_norm: 0.9999997790037398, iteration: 4755
loss: 1.2628834247589111,grad_norm: 0.9999997403932783, iteration: 4756
loss: 1.4338854551315308,grad_norm: 0.9999998930425169, iteration: 4757
loss: 1.274116039276123,grad_norm: 0.9999997995135035, iteration: 4758
loss: 1.4984158277511597,grad_norm: 0.9999998572497961, iteration: 4759
loss: 1.567228078842163,grad_norm: 0.9999997964428146, iteration: 4760
loss: 1.5636171102523804,grad_norm: 0.9999997972464992, iteration: 4761
loss: 1.471976399421692,grad_norm: 0.9999997868936216, iteration: 4762
loss: 1.4687683582305908,grad_norm: 0.9999997311247817, iteration: 4763
loss: 1.316401481628418,grad_norm: 0.9999998745232397, iteration: 4764
loss: 1.4216456413269043,grad_norm: 0.9999998231748809, iteration: 4765
loss: 1.4616484642028809,grad_norm: 0.9999997697368374, iteration: 4766
loss: 1.4220880270004272,grad_norm: 0.9999998093959073, iteration: 4767
loss: 1.3698421716690063,grad_norm: 0.9999998271060306, iteration: 4768
loss: 1.3331607580184937,grad_norm: 0.9999997650542929, iteration: 4769
loss: 1.487872838973999,grad_norm: 0.9999997784115568, iteration: 4770
loss: 1.4093481302261353,grad_norm: 0.9999997756185972, iteration: 4771
loss: 1.4158737659454346,grad_norm: 0.9999997801484716, iteration: 4772
loss: 1.3946610689163208,grad_norm: 0.9999998038356815, iteration: 4773
loss: 1.499650478363037,grad_norm: 0.999999839903909, iteration: 4774
loss: 1.3621176481246948,grad_norm: 0.9999998179691697, iteration: 4775
loss: 1.536535382270813,grad_norm: 0.9999998837134644, iteration: 4776
loss: 1.5591082572937012,grad_norm: 0.9999998925090761, iteration: 4777
loss: 1.426209568977356,grad_norm: 0.9999997680278795, iteration: 4778
loss: 1.3375487327575684,grad_norm: 0.9999998380772447, iteration: 4779
loss: 1.6040048599243164,grad_norm: 0.9999998077097555, iteration: 4780
loss: 1.4436149597167969,grad_norm: 0.9999997885460352, iteration: 4781
loss: 1.6248050928115845,grad_norm: 0.9999998304655666, iteration: 4782
loss: 1.4768059253692627,grad_norm: 0.9999997950479231, iteration: 4783
loss: 1.3627249002456665,grad_norm: 0.9999998267831683, iteration: 4784
loss: 1.3935660123825073,grad_norm: 0.9999998207581718, iteration: 4785
loss: 1.4713029861450195,grad_norm: 0.9999998035149968, iteration: 4786
loss: 1.408368468284607,grad_norm: 0.9999997662756912, iteration: 4787
loss: 1.4768762588500977,grad_norm: 0.9999998626899808, iteration: 4788
loss: 1.4359885454177856,grad_norm: 0.9999998088381888, iteration: 4789
loss: 1.3110628128051758,grad_norm: 0.999999771906594, iteration: 4790
loss: 1.4497891664505005,grad_norm: 0.9999998539758616, iteration: 4791
loss: 1.472640872001648,grad_norm: 0.9999998396850275, iteration: 4792
loss: 1.437902569770813,grad_norm: 0.9999998111261145, iteration: 4793
loss: 1.3767751455307007,grad_norm: 0.999999781299925, iteration: 4794
loss: 1.3692294359207153,grad_norm: 0.9999998276770753, iteration: 4795
loss: 1.2693097591400146,grad_norm: 0.9999997802138947, iteration: 4796
loss: 1.4016315937042236,grad_norm: 0.9999998375090093, iteration: 4797
loss: 1.3744113445281982,grad_norm: 0.999999858496034, iteration: 4798
loss: 1.40034818649292,grad_norm: 0.9999998517746954, iteration: 4799
loss: 1.32187819480896,grad_norm: 0.9999997721724448, iteration: 4800
loss: 1.5124186277389526,grad_norm: 0.9999998020850068, iteration: 4801
loss: 1.385879397392273,grad_norm: 0.9999998580659525, iteration: 4802
loss: 1.460489273071289,grad_norm: 0.9999997724376076, iteration: 4803
loss: 1.2860565185546875,grad_norm: 0.9999998483142097, iteration: 4804
loss: 1.59483003616333,grad_norm: 0.9999998816741518, iteration: 4805
loss: 1.3198727369308472,grad_norm: 0.9999998166679346, iteration: 4806
loss: 1.414261817932129,grad_norm: 0.9999998289562627, iteration: 4807
loss: 1.35114586353302,grad_norm: 0.9999997623775203, iteration: 4808
loss: 1.5008611679077148,grad_norm: 0.9999997985310567, iteration: 4809
loss: 1.4509756565093994,grad_norm: 0.9999997898484804, iteration: 4810
loss: 1.4239777326583862,grad_norm: 0.9999999049633286, iteration: 4811
loss: 1.5449340343475342,grad_norm: 0.9999998066590773, iteration: 4812
loss: 1.4466333389282227,grad_norm: 0.9999998193642268, iteration: 4813
loss: 1.387287974357605,grad_norm: 0.9999998683784914, iteration: 4814
loss: 1.4425687789916992,grad_norm: 0.999999785168742, iteration: 4815
loss: 1.5052522420883179,grad_norm: 0.9999999065160445, iteration: 4816
loss: 1.2979589700698853,grad_norm: 0.9999997497531671, iteration: 4817
loss: 1.4587005376815796,grad_norm: 0.9999998327286859, iteration: 4818
loss: 1.4524918794631958,grad_norm: 0.9999997774396031, iteration: 4819
loss: 1.3362447023391724,grad_norm: 0.9999999003828413, iteration: 4820
loss: 1.3691638708114624,grad_norm: 0.9999997185105923, iteration: 4821
loss: 1.4968253374099731,grad_norm: 0.9999998536512397, iteration: 4822
loss: 1.3841394186019897,grad_norm: 0.9999998252949752, iteration: 4823
loss: 1.4094388484954834,grad_norm: 0.9999998289670948, iteration: 4824
loss: 1.458748459815979,grad_norm: 0.9999998324531482, iteration: 4825
loss: 1.5377647876739502,grad_norm: 0.9999997562119817, iteration: 4826
loss: 1.5037914514541626,grad_norm: 0.9999997216951783, iteration: 4827
loss: 1.352948546409607,grad_norm: 0.9999997760389805, iteration: 4828
loss: 1.3230317831039429,grad_norm: 0.9999998191731349, iteration: 4829
loss: 1.5218315124511719,grad_norm: 0.9999998624572031, iteration: 4830
loss: 1.6234456300735474,grad_norm: 0.999999778499665, iteration: 4831
loss: 1.4514079093933105,grad_norm: 0.9999998466557175, iteration: 4832
loss: 1.4001452922821045,grad_norm: 0.9999997726193954, iteration: 4833
loss: 1.3172423839569092,grad_norm: 0.9999998641938358, iteration: 4834
loss: 1.3003360033035278,grad_norm: 0.9999996940180363, iteration: 4835
loss: 1.3667188882827759,grad_norm: 0.9999997306699253, iteration: 4836
loss: 1.4584355354309082,grad_norm: 0.999999859730842, iteration: 4837
loss: 1.3372113704681396,grad_norm: 0.9999998597919745, iteration: 4838
loss: 1.4502143859863281,grad_norm: 0.9999998499758238, iteration: 4839
loss: 1.4224128723144531,grad_norm: 0.9999998035037185, iteration: 4840
loss: 1.4911364316940308,grad_norm: 0.9999998610635229, iteration: 4841
loss: 1.608781337738037,grad_norm: 0.9999998239903556, iteration: 4842
loss: 1.3612653017044067,grad_norm: 0.9999997966566297, iteration: 4843
loss: 1.3677836656570435,grad_norm: 0.9999997617452886, iteration: 4844
loss: 1.354512333869934,grad_norm: 0.9999998231015724, iteration: 4845
loss: 1.3343697786331177,grad_norm: 0.9999998249143265, iteration: 4846
loss: 1.4531468152999878,grad_norm: 0.9999997920975091, iteration: 4847
loss: 1.3733595609664917,grad_norm: 0.9999997948534972, iteration: 4848
loss: 1.446641206741333,grad_norm: 0.9999997952376712, iteration: 4849
loss: 1.4845622777938843,grad_norm: 0.9999998389628947, iteration: 4850
loss: 1.3688279390335083,grad_norm: 0.9999997739053667, iteration: 4851
loss: 1.4554189443588257,grad_norm: 0.9999998283418376, iteration: 4852
loss: 1.3721368312835693,grad_norm: 0.9999998465125052, iteration: 4853
loss: 1.460686445236206,grad_norm: 0.9999998255864392, iteration: 4854
loss: 1.3398083448410034,grad_norm: 0.9999997976555107, iteration: 4855
loss: 1.3376901149749756,grad_norm: 0.9999998115325408, iteration: 4856
loss: 1.3408421277999878,grad_norm: 0.99999974676694, iteration: 4857
loss: 1.4524391889572144,grad_norm: 0.9999997738835696, iteration: 4858
loss: 1.4119181632995605,grad_norm: 0.9999998150479882, iteration: 4859
loss: 1.3737848997116089,grad_norm: 0.9999997979362804, iteration: 4860
loss: 1.3178802728652954,grad_norm: 0.9999997938496101, iteration: 4861
loss: 1.591151475906372,grad_norm: 0.9999998672360504, iteration: 4862
loss: 1.4259333610534668,grad_norm: 0.9999998284584752, iteration: 4863
loss: 1.3710883855819702,grad_norm: 0.9999998506791409, iteration: 4864
loss: 1.4229117631912231,grad_norm: 0.9999997482330948, iteration: 4865
loss: 1.3655319213867188,grad_norm: 0.9999998543992543, iteration: 4866
loss: 1.446519136428833,grad_norm: 0.9999998721466944, iteration: 4867
loss: 1.347291350364685,grad_norm: 0.9999997786192663, iteration: 4868
loss: 1.3556112051010132,grad_norm: 0.9999997809340866, iteration: 4869
loss: 1.555495262145996,grad_norm: 0.999999825253816, iteration: 4870
loss: 1.4446407556533813,grad_norm: 0.9999998006392152, iteration: 4871
loss: 1.4590448141098022,grad_norm: 0.9999998961667911, iteration: 4872
loss: 1.4108585119247437,grad_norm: 0.9999998016453365, iteration: 4873
loss: 1.4366194009780884,grad_norm: 0.9999997954678725, iteration: 4874
loss: 1.2877253293991089,grad_norm: 0.9999998273071163, iteration: 4875
loss: 1.4573931694030762,grad_norm: 0.9999998254423016, iteration: 4876
loss: 1.4511146545410156,grad_norm: 0.9999998788720236, iteration: 4877
loss: 1.4083460569381714,grad_norm: 0.9999998722801438, iteration: 4878
loss: 1.3328819274902344,grad_norm: 0.9999997966290195, iteration: 4879
loss: 1.4109231233596802,grad_norm: 0.9999997271427657, iteration: 4880
loss: 1.3864604234695435,grad_norm: 0.9999997844311082, iteration: 4881
loss: 1.3988584280014038,grad_norm: 0.9999998811209823, iteration: 4882
loss: 1.4215646982192993,grad_norm: 0.9999998644901554, iteration: 4883
loss: 1.4272761344909668,grad_norm: 0.9999998800159856, iteration: 4884
loss: 1.4035676717758179,grad_norm: 0.9999997929711463, iteration: 4885
loss: 1.3474242687225342,grad_norm: 0.9999998572294507, iteration: 4886
loss: 1.5115242004394531,grad_norm: 0.999999846676746, iteration: 4887
loss: 1.4350775480270386,grad_norm: 0.9999998606274226, iteration: 4888
loss: 1.328989863395691,grad_norm: 0.9999997641637418, iteration: 4889
loss: 1.262675166130066,grad_norm: 0.9999998223574087, iteration: 4890
loss: 1.4029887914657593,grad_norm: 0.9999997357872665, iteration: 4891
loss: 1.3793394565582275,grad_norm: 0.9999998227365409, iteration: 4892
loss: 1.4573553800582886,grad_norm: 0.9999998602094711, iteration: 4893
loss: 1.452702283859253,grad_norm: 0.9999998772637522, iteration: 4894
loss: 1.3395628929138184,grad_norm: 0.9999998412634936, iteration: 4895
loss: 1.3499996662139893,grad_norm: 0.9999998980318278, iteration: 4896
loss: 1.2910526990890503,grad_norm: 0.9999997916294845, iteration: 4897
loss: 1.2855069637298584,grad_norm: 0.9999997926298555, iteration: 4898
loss: 1.3877090215682983,grad_norm: 0.9999997957279619, iteration: 4899
loss: 1.3311713933944702,grad_norm: 0.9999998235262875, iteration: 4900
loss: 1.3628302812576294,grad_norm: 0.9999998134285201, iteration: 4901
loss: 1.32179594039917,grad_norm: 0.9999997667759866, iteration: 4902
loss: 1.3793410062789917,grad_norm: 0.9999997389990631, iteration: 4903
loss: 1.4484772682189941,grad_norm: 0.9999999011088776, iteration: 4904
loss: 1.2606940269470215,grad_norm: 0.9999997625109082, iteration: 4905
loss: 1.4020702838897705,grad_norm: 0.999999826289581, iteration: 4906
loss: 1.5641999244689941,grad_norm: 0.9999998659697338, iteration: 4907
loss: 1.372043251991272,grad_norm: 0.9999997631968401, iteration: 4908
loss: 1.2279216051101685,grad_norm: 0.9999997174506139, iteration: 4909
loss: 1.545837163925171,grad_norm: 0.9999997746769353, iteration: 4910
loss: 1.3042141199111938,grad_norm: 0.9999998152869364, iteration: 4911
loss: 1.3583123683929443,grad_norm: 0.9999998302137042, iteration: 4912
loss: 1.326501727104187,grad_norm: 0.9999998353995465, iteration: 4913
loss: 1.453600287437439,grad_norm: 0.9999997507049654, iteration: 4914
loss: 1.3966344594955444,grad_norm: 0.9999997574799536, iteration: 4915
loss: 1.4874475002288818,grad_norm: 0.9999998600242583, iteration: 4916
loss: 1.466491460800171,grad_norm: 0.9999998029888292, iteration: 4917
loss: 1.4433952569961548,grad_norm: 0.9999998497827781, iteration: 4918
loss: 1.268953561782837,grad_norm: 0.9999997880147194, iteration: 4919
loss: 1.348362922668457,grad_norm: 0.9999998054913849, iteration: 4920
loss: 1.360652208328247,grad_norm: 0.9999997291863842, iteration: 4921
loss: 1.439213752746582,grad_norm: 0.9999998246079416, iteration: 4922
loss: 1.28643798828125,grad_norm: 0.9999998652233055, iteration: 4923
loss: 1.3517206907272339,grad_norm: 0.9999998288125144, iteration: 4924
loss: 1.414657711982727,grad_norm: 0.9999998015021869, iteration: 4925
loss: 1.381190299987793,grad_norm: 0.9999997382689585, iteration: 4926
loss: 1.2036967277526855,grad_norm: 0.9999997071695788, iteration: 4927
loss: 1.4576867818832397,grad_norm: 0.9999998841032448, iteration: 4928
loss: 1.4665203094482422,grad_norm: 0.9999998333579995, iteration: 4929
loss: 1.383109211921692,grad_norm: 0.9999998392658194, iteration: 4930
loss: 1.4635093212127686,grad_norm: 0.9999997863852135, iteration: 4931
loss: 1.3272435665130615,grad_norm: 0.9999998030943766, iteration: 4932
loss: 1.5119396448135376,grad_norm: 0.9999997507175055, iteration: 4933
loss: 1.341568112373352,grad_norm: 0.9999997691636082, iteration: 4934
loss: 1.3791102170944214,grad_norm: 0.9999999006346667, iteration: 4935
loss: 1.4578238725662231,grad_norm: 0.9999998238114017, iteration: 4936
loss: 1.3507115840911865,grad_norm: 0.999999868186606, iteration: 4937
loss: 1.3314507007598877,grad_norm: 0.9999998592710669, iteration: 4938
loss: 1.5176228284835815,grad_norm: 0.999999872454942, iteration: 4939
loss: 1.3989062309265137,grad_norm: 0.9999998407599461, iteration: 4940
loss: 1.1806527376174927,grad_norm: 0.9999997219286879, iteration: 4941
loss: 1.3515594005584717,grad_norm: 0.9999997371462711, iteration: 4942
loss: 1.3159443140029907,grad_norm: 0.9999998933239569, iteration: 4943
loss: 1.313174843788147,grad_norm: 0.9999997167737824, iteration: 4944
loss: 1.3146034479141235,grad_norm: 0.999999820579901, iteration: 4945
loss: 1.4902398586273193,grad_norm: 0.9999998529976596, iteration: 4946
loss: 1.4376966953277588,grad_norm: 0.9999998190261926, iteration: 4947
loss: 1.3813656568527222,grad_norm: 0.9999998166792148, iteration: 4948
loss: 1.4041017293930054,grad_norm: 0.9999998241829897, iteration: 4949
loss: 1.312635064125061,grad_norm: 0.9999999186752337, iteration: 4950
loss: 1.4181801080703735,grad_norm: 0.9999998415108403, iteration: 4951
loss: 1.407930612564087,grad_norm: 0.9999997661841116, iteration: 4952
loss: 1.398593544960022,grad_norm: 0.9999997866555422, iteration: 4953
loss: 1.2818745374679565,grad_norm: 0.999999796696804, iteration: 4954
loss: 1.3280690908432007,grad_norm: 0.9999997345943606, iteration: 4955
loss: 1.3825438022613525,grad_norm: 0.9999998207500232, iteration: 4956
loss: 1.3611860275268555,grad_norm: 0.9999997681444038, iteration: 4957
loss: 1.4535868167877197,grad_norm: 0.99999989655883, iteration: 4958
loss: 1.3847090005874634,grad_norm: 0.9999998531922577, iteration: 4959
loss: 1.2967456579208374,grad_norm: 0.9999998340172902, iteration: 4960
loss: 1.2185457944869995,grad_norm: 0.9999997849675257, iteration: 4961
loss: 1.469996690750122,grad_norm: 0.9999997723141402, iteration: 4962
loss: 1.429555892944336,grad_norm: 0.9999998237565135, iteration: 4963
loss: 1.5439767837524414,grad_norm: 0.9999998068399216, iteration: 4964
loss: 1.2776334285736084,grad_norm: 0.9999997578426982, iteration: 4965
loss: 1.4798365831375122,grad_norm: 0.9999997903669892, iteration: 4966
loss: 1.4073008298873901,grad_norm: 0.9999998610861819, iteration: 4967
loss: 1.2715519666671753,grad_norm: 0.9999999204691514, iteration: 4968
loss: 1.3296871185302734,grad_norm: 0.999999719869608, iteration: 4969
loss: 1.2978699207305908,grad_norm: 0.9999997294412931, iteration: 4970
loss: 1.3492282629013062,grad_norm: 0.9999998070708528, iteration: 4971
loss: 1.4048619270324707,grad_norm: 0.9999997860328034, iteration: 4972
loss: 1.3467637300491333,grad_norm: 0.999999885031316, iteration: 4973
loss: 1.2893329858779907,grad_norm: 0.9999998296357949, iteration: 4974
loss: 1.460192322731018,grad_norm: 0.9999997877674511, iteration: 4975
loss: 1.4920231103897095,grad_norm: 0.9999997276332814, iteration: 4976
loss: 1.3213785886764526,grad_norm: 0.9999997638231124, iteration: 4977
loss: 1.553180456161499,grad_norm: 0.9999997486454264, iteration: 4978
loss: 1.2328786849975586,grad_norm: 0.9999997910853747, iteration: 4979
loss: 1.4691046476364136,grad_norm: 0.9999998769142239, iteration: 4980
loss: 1.4968310594558716,grad_norm: 0.9999998607157272, iteration: 4981
loss: 1.3107010126113892,grad_norm: 0.9999998062235664, iteration: 4982
loss: 1.3275971412658691,grad_norm: 0.9999997834973111, iteration: 4983
loss: 1.3243186473846436,grad_norm: 0.9999998686763487, iteration: 4984
loss: 1.3659346103668213,grad_norm: 0.9999998650587806, iteration: 4985
loss: 1.372502326965332,grad_norm: 0.9999998677554972, iteration: 4986
loss: 1.2425329685211182,grad_norm: 0.9999997699344845, iteration: 4987
loss: 1.33268404006958,grad_norm: 0.9999998860169732, iteration: 4988
loss: 1.398060917854309,grad_norm: 0.9999998376770851, iteration: 4989
loss: 1.538118600845337,grad_norm: 0.9999998756537305, iteration: 4990
loss: 1.3099994659423828,grad_norm: 0.9999998591995701, iteration: 4991
loss: 1.3394850492477417,grad_norm: 0.9999998315268918, iteration: 4992
loss: 1.2498887777328491,grad_norm: 0.9999998542708359, iteration: 4993
loss: 1.2901479005813599,grad_norm: 0.9999997970044311, iteration: 4994
loss: 1.4172770977020264,grad_norm: 0.9999997900951101, iteration: 4995
loss: 1.3751451969146729,grad_norm: 0.9999998633114269, iteration: 4996
loss: 1.3636183738708496,grad_norm: 0.9999998825112159, iteration: 4997
loss: 1.3850640058517456,grad_norm: 0.9999997464409364, iteration: 4998
loss: 1.31229567527771,grad_norm: 0.9999998093020894, iteration: 4999
loss: 1.3718301057815552,grad_norm: 0.9999998066863526, iteration: 5000
loss: 1.286370873451233,grad_norm: 0.9999997474293221, iteration: 5001
loss: 1.3497023582458496,grad_norm: 0.9999997792919086, iteration: 5002
loss: 1.3675626516342163,grad_norm: 0.9999998366171056, iteration: 5003
loss: 1.4157828092575073,grad_norm: 0.9999997530476928, iteration: 5004
loss: 1.2717572450637817,grad_norm: 0.9999998393407449, iteration: 5005
loss: 1.3441418409347534,grad_norm: 0.999999782718992, iteration: 5006
loss: 1.3033411502838135,grad_norm: 0.9999997870148287, iteration: 5007
loss: 1.3832470178604126,grad_norm: 0.9999998098818785, iteration: 5008
loss: 1.3666775226593018,grad_norm: 0.9999998622686158, iteration: 5009
loss: 1.3117600679397583,grad_norm: 0.9999998479253974, iteration: 5010
loss: 1.356635570526123,grad_norm: 0.999999811313294, iteration: 5011
loss: 1.355576992034912,grad_norm: 0.9999997736819293, iteration: 5012
loss: 1.3942973613739014,grad_norm: 0.9999998503559693, iteration: 5013
loss: 1.4008243083953857,grad_norm: 0.9999998291593514, iteration: 5014
loss: 1.1447474956512451,grad_norm: 0.9999997357235977, iteration: 5015
loss: 1.4607264995574951,grad_norm: 0.9999998426823062, iteration: 5016
loss: 1.2906595468521118,grad_norm: 0.9999998470164484, iteration: 5017
loss: 1.2933251857757568,grad_norm: 0.999999777400105, iteration: 5018
loss: 1.194168210029602,grad_norm: 0.9999997691018131, iteration: 5019
loss: 1.4057834148406982,grad_norm: 0.9999998309130621, iteration: 5020
loss: 1.394835352897644,grad_norm: 0.9999997504551928, iteration: 5021
loss: 1.4216046333312988,grad_norm: 0.999999844557392, iteration: 5022
loss: 1.4104678630828857,grad_norm: 0.9999997839344198, iteration: 5023
loss: 1.2272616624832153,grad_norm: 0.9999997138252894, iteration: 5024
loss: 1.2217559814453125,grad_norm: 0.9999998052338441, iteration: 5025
loss: 1.4709018468856812,grad_norm: 0.999999763357298, iteration: 5026
loss: 1.301723599433899,grad_norm: 0.9999998195997338, iteration: 5027
loss: 1.2911128997802734,grad_norm: 0.9999997627141098, iteration: 5028
loss: 1.3430205583572388,grad_norm: 0.9999997888831044, iteration: 5029
loss: 1.3366453647613525,grad_norm: 0.9999997269506483, iteration: 5030
loss: 1.2750164270401,grad_norm: 0.9999999111764736, iteration: 5031
loss: 1.3618252277374268,grad_norm: 0.9999998233993826, iteration: 5032
loss: 1.3391257524490356,grad_norm: 0.9999998505418166, iteration: 5033
loss: 1.370108723640442,grad_norm: 0.9999998694483035, iteration: 5034
loss: 1.4052271842956543,grad_norm: 0.9999998064291958, iteration: 5035
loss: 1.3838422298431396,grad_norm: 0.9999998580835361, iteration: 5036
loss: 1.3840579986572266,grad_norm: 0.9999998380761558, iteration: 5037
loss: 1.387540340423584,grad_norm: 0.9999997833557409, iteration: 5038
loss: 1.2438924312591553,grad_norm: 0.9999997679516027, iteration: 5039
loss: 1.306394100189209,grad_norm: 0.999999893382576, iteration: 5040
loss: 1.3364918231964111,grad_norm: 0.9999997791312121, iteration: 5041
loss: 1.623781681060791,grad_norm: 0.9999998518109326, iteration: 5042
loss: 1.439361333847046,grad_norm: 0.999999804849513, iteration: 5043
loss: 1.381769061088562,grad_norm: 0.9999997525300388, iteration: 5044
loss: 1.3202530145645142,grad_norm: 0.9999997921580641, iteration: 5045
loss: 1.339842677116394,grad_norm: 0.9999997921080329, iteration: 5046
loss: 1.5217750072479248,grad_norm: 0.9999998353832308, iteration: 5047
loss: 1.3500028848648071,grad_norm: 0.9999998928216233, iteration: 5048
loss: 1.3086764812469482,grad_norm: 0.999999744345003, iteration: 5049
loss: 1.3673032522201538,grad_norm: 0.9999998335958624, iteration: 5050
loss: 1.3282705545425415,grad_norm: 0.9999998469263889, iteration: 5051
loss: 1.4511280059814453,grad_norm: 0.9999997776986541, iteration: 5052
loss: 1.3908238410949707,grad_norm: 0.9999998007514105, iteration: 5053
loss: 1.2048115730285645,grad_norm: 0.9999996965097702, iteration: 5054
loss: 1.4017010927200317,grad_norm: 0.9999998194595384, iteration: 5055
loss: 1.3523598909378052,grad_norm: 0.9999997496838686, iteration: 5056
loss: 1.3147462606430054,grad_norm: 0.9999997767917629, iteration: 5057
loss: 1.1574616432189941,grad_norm: 0.9999998091502285, iteration: 5058
loss: 1.381783366203308,grad_norm: 0.9999997845377349, iteration: 5059
loss: 1.2760207653045654,grad_norm: 0.9999997939089622, iteration: 5060
loss: 1.3485264778137207,grad_norm: 0.9999997431620682, iteration: 5061
loss: 1.398507833480835,grad_norm: 0.999999806117053, iteration: 5062
loss: 1.3182027339935303,grad_norm: 0.9999998199432792, iteration: 5063
loss: 1.3697818517684937,grad_norm: 0.9999997690385612, iteration: 5064
loss: 1.4690319299697876,grad_norm: 0.9999998142915696, iteration: 5065
loss: 1.2999016046524048,grad_norm: 0.9999997962421083, iteration: 5066
loss: 1.3046706914901733,grad_norm: 0.999999777058737, iteration: 5067
loss: 1.5315911769866943,grad_norm: 0.9999998100126603, iteration: 5068
loss: 1.3389924764633179,grad_norm: 0.999999778091998, iteration: 5069
loss: 1.4604538679122925,grad_norm: 0.9999997660630364, iteration: 5070
loss: 1.4594438076019287,grad_norm: 0.9999998075831338, iteration: 5071
loss: 1.2892481088638306,grad_norm: 0.999999738742763, iteration: 5072
loss: 1.2653812170028687,grad_norm: 0.9999997787031919, iteration: 5073
loss: 1.4105783700942993,grad_norm: 0.9999998338427205, iteration: 5074
loss: 1.3145076036453247,grad_norm: 0.9999997677075115, iteration: 5075
loss: 1.2551783323287964,grad_norm: 0.9999998251785109, iteration: 5076
loss: 1.3523281812667847,grad_norm: 0.9999998098472608, iteration: 5077
loss: 1.2196911573410034,grad_norm: 0.9999998049664272, iteration: 5078
loss: 1.3846666812896729,grad_norm: 0.9999997832460348, iteration: 5079
loss: 1.2766056060791016,grad_norm: 0.9999997350837815, iteration: 5080
loss: 1.139647364616394,grad_norm: 0.9999997144773597, iteration: 5081
loss: 1.5016570091247559,grad_norm: 0.9999998406972249, iteration: 5082
loss: 1.4032130241394043,grad_norm: 0.9999997685818471, iteration: 5083
loss: 1.4368212223052979,grad_norm: 0.9999998464058659, iteration: 5084
loss: 1.3786439895629883,grad_norm: 0.9999997269423907, iteration: 5085
loss: 1.2986167669296265,grad_norm: 0.9999997921356494, iteration: 5086
loss: 1.3670302629470825,grad_norm: 0.9999998158944643, iteration: 5087
loss: 1.2392271757125854,grad_norm: 0.9999998206291668, iteration: 5088
loss: 1.4125527143478394,grad_norm: 0.9999997766859876, iteration: 5089
loss: 1.3445143699645996,grad_norm: 0.999999787932348, iteration: 5090
loss: 1.2710202932357788,grad_norm: 0.9999997651300817, iteration: 5091
loss: 1.2831339836120605,grad_norm: 0.9999997661489959, iteration: 5092
loss: 1.302569031715393,grad_norm: 0.9999998332021084, iteration: 5093
loss: 1.2516725063323975,grad_norm: 0.9999998268408601, iteration: 5094
loss: 1.370640516281128,grad_norm: 0.9999998300775752, iteration: 5095
loss: 1.2411166429519653,grad_norm: 0.999999815169829, iteration: 5096
loss: 1.4802666902542114,grad_norm: 0.9999997760535364, iteration: 5097
loss: 1.4087368249893188,grad_norm: 0.9999998199672118, iteration: 5098
loss: 1.3589739799499512,grad_norm: 0.9999997880180783, iteration: 5099
loss: 1.3602017164230347,grad_norm: 0.9999998750351479, iteration: 5100
loss: 1.306040644645691,grad_norm: 0.9999997834345669, iteration: 5101
loss: 1.4382538795471191,grad_norm: 0.9999998151879774, iteration: 5102
loss: 1.5792109966278076,grad_norm: 0.9999998432230918, iteration: 5103
loss: 1.3379167318344116,grad_norm: 0.9999997656090216, iteration: 5104
loss: 1.4068357944488525,grad_norm: 0.9999998624320169, iteration: 5105
loss: 1.2785885334014893,grad_norm: 0.999999697936769, iteration: 5106
loss: 1.2668375968933105,grad_norm: 0.9999997984340536, iteration: 5107
loss: 1.2849631309509277,grad_norm: 0.999999780962478, iteration: 5108
loss: 1.2733114957809448,grad_norm: 0.999999823083038, iteration: 5109
loss: 1.3511855602264404,grad_norm: 0.9999997983500162, iteration: 5110
loss: 1.2880522012710571,grad_norm: 0.9999998171022921, iteration: 5111
loss: 1.3180298805236816,grad_norm: 0.9999997760042983, iteration: 5112
loss: 1.1342215538024902,grad_norm: 0.9999997990908491, iteration: 5113
loss: 1.3559703826904297,grad_norm: 0.999999755261169, iteration: 5114
loss: 1.4274457693099976,grad_norm: 0.9999997522853813, iteration: 5115
loss: 1.370710015296936,grad_norm: 0.9999997864439275, iteration: 5116
loss: 1.2592191696166992,grad_norm: 0.9999996615885468, iteration: 5117
loss: 1.4165862798690796,grad_norm: 0.9999997789689565, iteration: 5118
loss: 1.4944534301757812,grad_norm: 0.9999998307163896, iteration: 5119
loss: 1.4217349290847778,grad_norm: 0.9999997844149857, iteration: 5120
loss: 1.3924598693847656,grad_norm: 0.9999997805214814, iteration: 5121
loss: 1.3360322713851929,grad_norm: 0.9999997603603009, iteration: 5122
loss: 1.280387282371521,grad_norm: 0.9999997601974987, iteration: 5123
loss: 1.552378535270691,grad_norm: 0.9999998990126497, iteration: 5124
loss: 1.4209574460983276,grad_norm: 0.9999997866162461, iteration: 5125
loss: 1.2272552251815796,grad_norm: 0.9999997039738991, iteration: 5126
loss: 1.3870574235916138,grad_norm: 0.9999997772567294, iteration: 5127
loss: 1.3793970346450806,grad_norm: 0.9999998378634316, iteration: 5128
loss: 1.2350863218307495,grad_norm: 0.999999797706572, iteration: 5129
loss: 1.2741092443466187,grad_norm: 0.999999747024207, iteration: 5130
loss: 1.2934662103652954,grad_norm: 0.9999997931572199, iteration: 5131
loss: 1.1715160608291626,grad_norm: 0.9999998417698024, iteration: 5132
loss: 1.2540221214294434,grad_norm: 0.9999997684948746, iteration: 5133
loss: 1.338284969329834,grad_norm: 0.999999867090869, iteration: 5134
loss: 1.3179452419281006,grad_norm: 0.9999997414955625, iteration: 5135
loss: 1.3062281608581543,grad_norm: 0.9999998249010428, iteration: 5136
loss: 1.455843210220337,grad_norm: 0.999999887244413, iteration: 5137
loss: 1.3970921039581299,grad_norm: 0.9999999050457816, iteration: 5138
loss: 1.403282880783081,grad_norm: 0.999999836824714, iteration: 5139
loss: 1.158202052116394,grad_norm: 0.9999998059230041, iteration: 5140
loss: 1.2481845617294312,grad_norm: 0.999999686514543, iteration: 5141
loss: 1.2969598770141602,grad_norm: 0.99999977277097, iteration: 5142
loss: 1.3620619773864746,grad_norm: 0.9999998998956298, iteration: 5143
loss: 1.2928566932678223,grad_norm: 0.9999998002742719, iteration: 5144
loss: 1.3419872522354126,grad_norm: 0.9999997498013886, iteration: 5145
loss: 1.2845079898834229,grad_norm: 0.9999997478330345, iteration: 5146
loss: 1.1839746236801147,grad_norm: 0.999999784827959, iteration: 5147
loss: 1.4465477466583252,grad_norm: 0.9999997773349839, iteration: 5148
loss: 1.3176766633987427,grad_norm: 0.9999997625547352, iteration: 5149
loss: 1.3333390951156616,grad_norm: 0.9999997939173685, iteration: 5150
loss: 1.3847743272781372,grad_norm: 0.9999997979558738, iteration: 5151
loss: 1.411657691001892,grad_norm: 0.9999998297129794, iteration: 5152
loss: 1.4112130403518677,grad_norm: 0.9999998351426062, iteration: 5153
loss: 1.3282307386398315,grad_norm: 0.9999997770211889, iteration: 5154
loss: 1.3455432653427124,grad_norm: 0.9999997935003375, iteration: 5155
loss: 1.2851557731628418,grad_norm: 0.9999997820334383, iteration: 5156
loss: 1.3530179262161255,grad_norm: 0.9999998249957783, iteration: 5157
loss: 1.2314130067825317,grad_norm: 0.9999998094706355, iteration: 5158
loss: 1.22059965133667,grad_norm: 0.9999997590169101, iteration: 5159
loss: 1.2003614902496338,grad_norm: 0.9999998044997755, iteration: 5160
loss: 1.135712742805481,grad_norm: 0.9999997409434834, iteration: 5161
loss: 1.3516349792480469,grad_norm: 0.9999997702863604, iteration: 5162
loss: 1.2212296724319458,grad_norm: 0.9999998217352843, iteration: 5163
loss: 1.4795557260513306,grad_norm: 0.9999998352081337, iteration: 5164
loss: 1.2926273345947266,grad_norm: 0.9999997908494987, iteration: 5165
loss: 1.3746577501296997,grad_norm: 0.9999998604934587, iteration: 5166
loss: 1.3409488201141357,grad_norm: 0.9999998184126455, iteration: 5167
loss: 1.3443565368652344,grad_norm: 0.9999997965351869, iteration: 5168
loss: 1.4126157760620117,grad_norm: 0.9999998171659154, iteration: 5169
loss: 1.1625826358795166,grad_norm: 0.9999998078012305, iteration: 5170
loss: 1.2886956930160522,grad_norm: 0.9999999146655569, iteration: 5171
loss: 1.435343623161316,grad_norm: 0.999999801586436, iteration: 5172
loss: 1.3461105823516846,grad_norm: 0.9999997639415877, iteration: 5173
loss: 1.32868230342865,grad_norm: 0.9999998461123079, iteration: 5174
loss: 1.349853277206421,grad_norm: 0.9999998063856357, iteration: 5175
loss: 1.3239372968673706,grad_norm: 0.9999997796076635, iteration: 5176
loss: 1.2939242124557495,grad_norm: 0.9999997466577721, iteration: 5177
loss: 1.414526343345642,grad_norm: 0.9999998847812216, iteration: 5178
loss: 1.3988689184188843,grad_norm: 0.9999997983783296, iteration: 5179
loss: 1.3290197849273682,grad_norm: 0.9999997513476186, iteration: 5180
loss: 1.184549331665039,grad_norm: 0.999999735445492, iteration: 5181
loss: 1.3688437938690186,grad_norm: 0.9999997691349833, iteration: 5182
loss: 1.3645039796829224,grad_norm: 0.9999998169479557, iteration: 5183
loss: 1.3012992143630981,grad_norm: 0.9999998518165023, iteration: 5184
loss: 1.3739426136016846,grad_norm: 0.999999876146875, iteration: 5185
loss: 1.3115971088409424,grad_norm: 0.9999998457966858, iteration: 5186
loss: 1.3110458850860596,grad_norm: 0.9999998332189874, iteration: 5187
loss: 1.3544729948043823,grad_norm: 0.9999997852383447, iteration: 5188
loss: 1.355870008468628,grad_norm: 0.9999997746994701, iteration: 5189
loss: 1.4592852592468262,grad_norm: 0.9999997954388092, iteration: 5190
loss: 1.4206929206848145,grad_norm: 0.9999998057263197, iteration: 5191
loss: 1.3865485191345215,grad_norm: 0.9999997998143035, iteration: 5192
loss: 1.3931903839111328,grad_norm: 0.9999998309775662, iteration: 5193
loss: 1.359269380569458,grad_norm: 0.999999776784407, iteration: 5194
loss: 1.2976313829421997,grad_norm: 0.9999998170836987, iteration: 5195
loss: 1.4516500234603882,grad_norm: 0.9999998458321504, iteration: 5196
loss: 1.4458314180374146,grad_norm: 0.9999997508630472, iteration: 5197
loss: 1.3631104230880737,grad_norm: 0.9999998043225523, iteration: 5198
loss: 1.323322057723999,grad_norm: 0.999999743606738, iteration: 5199
loss: 1.3146703243255615,grad_norm: 0.9999998394712083, iteration: 5200
loss: 1.26216721534729,grad_norm: 0.9999997627716751, iteration: 5201
loss: 1.3732409477233887,grad_norm: 0.9999998211718524, iteration: 5202
loss: 1.2908530235290527,grad_norm: 0.9999997886064096, iteration: 5203
loss: 1.331114411354065,grad_norm: 0.999999742901539, iteration: 5204
loss: 1.2752023935317993,grad_norm: 0.9999997568255461, iteration: 5205
loss: 1.2473368644714355,grad_norm: 0.9999997321302437, iteration: 5206
loss: 1.1870543956756592,grad_norm: 0.9999996832542158, iteration: 5207
loss: 1.2538788318634033,grad_norm: 0.9999998035734371, iteration: 5208
loss: 1.3143665790557861,grad_norm: 0.999999758900914, iteration: 5209
loss: 1.2713881731033325,grad_norm: 0.9999998331658022, iteration: 5210
loss: 1.379164218902588,grad_norm: 0.9999998729613021, iteration: 5211
loss: 1.429957628250122,grad_norm: 0.9999998396351462, iteration: 5212
loss: 1.430171251296997,grad_norm: 0.9999997711308243, iteration: 5213
loss: 1.3038376569747925,grad_norm: 0.9999997471354786, iteration: 5214
loss: 1.304364562034607,grad_norm: 0.9999997451270303, iteration: 5215
loss: 1.482301115989685,grad_norm: 0.999999864904173, iteration: 5216
loss: 1.3092960119247437,grad_norm: 0.9999998560527741, iteration: 5217
loss: 1.3142929077148438,grad_norm: 0.999999764988216, iteration: 5218
loss: 1.2078096866607666,grad_norm: 0.9999997839869386, iteration: 5219
loss: 1.2497999668121338,grad_norm: 0.999999766776539, iteration: 5220
loss: 1.2013300657272339,grad_norm: 0.9999997768921577, iteration: 5221
loss: 1.2772356271743774,grad_norm: 0.9999997928463, iteration: 5222
loss: 1.270659327507019,grad_norm: 0.9999998544084904, iteration: 5223
loss: 1.231215000152588,grad_norm: 0.9999997128164279, iteration: 5224
loss: 1.4131476879119873,grad_norm: 0.9999998324047763, iteration: 5225
loss: 1.374821424484253,grad_norm: 0.9999998287091917, iteration: 5226
loss: 1.390596628189087,grad_norm: 0.9999997715563065, iteration: 5227
loss: 1.176375389099121,grad_norm: 0.9999997390089982, iteration: 5228
loss: 1.2654268741607666,grad_norm: 0.9999998423436638, iteration: 5229
loss: 1.090646505355835,grad_norm: 0.9999997198028954, iteration: 5230
loss: 1.359237790107727,grad_norm: 0.9999998755350943, iteration: 5231
loss: 1.5023534297943115,grad_norm: 0.9999997880764689, iteration: 5232
loss: 1.3700547218322754,grad_norm: 0.999999730934462, iteration: 5233
loss: 1.4001598358154297,grad_norm: 0.9999997498670848, iteration: 5234
loss: 1.3568787574768066,grad_norm: 0.9999998499745475, iteration: 5235
loss: 1.1714458465576172,grad_norm: 0.999999868827842, iteration: 5236
loss: 1.4169918298721313,grad_norm: 0.9999998567911275, iteration: 5237
loss: 1.1790037155151367,grad_norm: 0.9999997257723693, iteration: 5238
loss: 1.34324312210083,grad_norm: 0.9999998628094772, iteration: 5239
loss: 1.4278318881988525,grad_norm: 0.999999811795136, iteration: 5240
loss: 1.2517551183700562,grad_norm: 0.9999997637601415, iteration: 5241
loss: 1.334343671798706,grad_norm: 0.9999998871562844, iteration: 5242
loss: 1.3747164011001587,grad_norm: 0.9999998636747279, iteration: 5243
loss: 1.2265615463256836,grad_norm: 0.9999997751243159, iteration: 5244
loss: 1.281381607055664,grad_norm: 0.9999998282489212, iteration: 5245
loss: 1.2063992023468018,grad_norm: 0.9999998124324503, iteration: 5246
loss: 1.3697539567947388,grad_norm: 0.999999778161838, iteration: 5247
loss: 1.2560371160507202,grad_norm: 0.9999998040565125, iteration: 5248
loss: 1.3809151649475098,grad_norm: 0.9999998408747356, iteration: 5249
loss: 1.238621711730957,grad_norm: 0.9999998297543333, iteration: 5250
loss: 1.2470989227294922,grad_norm: 0.9999997396617215, iteration: 5251
loss: 1.4188344478607178,grad_norm: 0.9999998812554494, iteration: 5252
loss: 1.2699860334396362,grad_norm: 0.9999996854833427, iteration: 5253
loss: 1.2039027214050293,grad_norm: 0.9999998130423335, iteration: 5254
loss: 1.165008306503296,grad_norm: 0.9999997588078338, iteration: 5255
loss: 1.2429503202438354,grad_norm: 0.9999997912446656, iteration: 5256
loss: 1.283653736114502,grad_norm: 0.9999996838538618, iteration: 5257
loss: 1.3289941549301147,grad_norm: 0.9999997589395299, iteration: 5258
loss: 1.2781959772109985,grad_norm: 0.9999998100846026, iteration: 5259
loss: 1.3400578498840332,grad_norm: 0.9999998828117469, iteration: 5260
loss: 1.3050469160079956,grad_norm: 0.9999997957611351, iteration: 5261
loss: 1.2049392461776733,grad_norm: 0.9999997423437287, iteration: 5262
loss: 1.120030164718628,grad_norm: 0.9999996931505357, iteration: 5263
loss: 1.3511039018630981,grad_norm: 0.9999998532134786, iteration: 5264
loss: 1.2107856273651123,grad_norm: 0.999999839901268, iteration: 5265
loss: 1.347945213317871,grad_norm: 0.999999790380646, iteration: 5266
loss: 1.4350672960281372,grad_norm: 0.999999851524947, iteration: 5267
loss: 1.3774741888046265,grad_norm: 0.9999997718379604, iteration: 5268
loss: 1.1780494451522827,grad_norm: 0.9999997874492847, iteration: 5269
loss: 1.3646858930587769,grad_norm: 0.9999998120392749, iteration: 5270
loss: 1.3490313291549683,grad_norm: 0.9999997841117334, iteration: 5271
loss: 1.3721981048583984,grad_norm: 0.9999998096828767, iteration: 5272
loss: 1.4393023252487183,grad_norm: 0.9999997447883596, iteration: 5273
loss: 1.2381497621536255,grad_norm: 0.9999998490706259, iteration: 5274
loss: 1.474778652191162,grad_norm: 0.9999998418693818, iteration: 5275
loss: 1.2843499183654785,grad_norm: 0.9999997862524989, iteration: 5276
loss: 1.4595223665237427,grad_norm: 0.9999997767202307, iteration: 5277
loss: 1.3824546337127686,grad_norm: 0.9999998436552106, iteration: 5278
loss: 1.3514763116836548,grad_norm: 0.9999998478519759, iteration: 5279
loss: 1.3706365823745728,grad_norm: 0.9999997466554349, iteration: 5280
loss: 1.2370388507843018,grad_norm: 0.9999997123643777, iteration: 5281
loss: 1.3343852758407593,grad_norm: 0.9999998299013347, iteration: 5282
loss: 1.2586232423782349,grad_norm: 0.9999997653411232, iteration: 5283
loss: 1.2999476194381714,grad_norm: 0.9999997774453327, iteration: 5284
loss: 1.4489331245422363,grad_norm: 0.9999998160232445, iteration: 5285
loss: 1.3734575510025024,grad_norm: 0.9999998277335298, iteration: 5286
loss: 1.3794845342636108,grad_norm: 0.9999998168374793, iteration: 5287
loss: 1.3103382587432861,grad_norm: 0.9999998125802724, iteration: 5288
loss: 1.5132619142532349,grad_norm: 0.9999998975684977, iteration: 5289
loss: 1.3224767446517944,grad_norm: 0.9999998398549086, iteration: 5290
loss: 1.2434563636779785,grad_norm: 0.9999997527962238, iteration: 5291
loss: 1.271653652191162,grad_norm: 0.9999998536116219, iteration: 5292
loss: 1.3453160524368286,grad_norm: 0.9999998238800628, iteration: 5293
loss: 1.3066749572753906,grad_norm: 0.9999997805020433, iteration: 5294
loss: 1.2854788303375244,grad_norm: 0.9999997707899606, iteration: 5295
loss: 1.1894930601119995,grad_norm: 0.999999780669272, iteration: 5296
loss: 1.2608675956726074,grad_norm: 0.9999997864266597, iteration: 5297
loss: 1.1834096908569336,grad_norm: 0.9999998130633966, iteration: 5298
loss: 1.2753297090530396,grad_norm: 0.9999998263219261, iteration: 5299
loss: 1.2190437316894531,grad_norm: 0.9999998132791668, iteration: 5300
loss: 1.3190876245498657,grad_norm: 0.999999736364949, iteration: 5301
loss: 1.176226019859314,grad_norm: 0.9999998337239557, iteration: 5302
loss: 1.3576818704605103,grad_norm: 0.9999997044893696, iteration: 5303
loss: 1.3718308210372925,grad_norm: 0.9999998261539259, iteration: 5304
loss: 1.2305476665496826,grad_norm: 0.9999998031919034, iteration: 5305
loss: 1.3548996448516846,grad_norm: 0.9999998693122004, iteration: 5306
loss: 1.3499805927276611,grad_norm: 0.9999998028309265, iteration: 5307
loss: 1.3592044115066528,grad_norm: 0.9999998336260713, iteration: 5308
loss: 1.3037536144256592,grad_norm: 0.9999998260911017, iteration: 5309
loss: 1.298101544380188,grad_norm: 0.9999998204349007, iteration: 5310
loss: 1.2372112274169922,grad_norm: 0.999999738607229, iteration: 5311
loss: 1.2162679433822632,grad_norm: 0.9999997537786187, iteration: 5312
loss: 1.2712453603744507,grad_norm: 0.9999997772478075, iteration: 5313
loss: 1.4388854503631592,grad_norm: 0.9999998054748199, iteration: 5314
loss: 1.1433029174804688,grad_norm: 0.9999996694791701, iteration: 5315
loss: 1.2553644180297852,grad_norm: 0.9999998580893036, iteration: 5316
loss: 1.2849862575531006,grad_norm: 0.9999998163517748, iteration: 5317
loss: 1.2467589378356934,grad_norm: 0.999999795482314, iteration: 5318
loss: 1.2205159664154053,grad_norm: 0.9999997015880793, iteration: 5319
loss: 1.3543801307678223,grad_norm: 0.9999998412963997, iteration: 5320
loss: 1.210523009300232,grad_norm: 0.9999997396801741, iteration: 5321
loss: 1.3729143142700195,grad_norm: 0.9999997732446162, iteration: 5322
loss: 1.2606340646743774,grad_norm: 0.9999997731739065, iteration: 5323
loss: 1.2850755453109741,grad_norm: 0.9999997834474843, iteration: 5324
loss: 1.3520045280456543,grad_norm: 0.9999997593720609, iteration: 5325
loss: 1.299707055091858,grad_norm: 0.9999997788368792, iteration: 5326
loss: 1.2752645015716553,grad_norm: 0.9999997884751618, iteration: 5327
loss: 1.2496392726898193,grad_norm: 0.9999997857458888, iteration: 5328
loss: 1.278958797454834,grad_norm: 0.9999998378123278, iteration: 5329
loss: 1.2607556581497192,grad_norm: 0.9999997578192358, iteration: 5330
loss: 1.2826088666915894,grad_norm: 0.9999998085072929, iteration: 5331
loss: 1.2252342700958252,grad_norm: 0.9999997758689516, iteration: 5332
loss: 1.281347393989563,grad_norm: 0.9999998135946631, iteration: 5333
loss: 1.2424646615982056,grad_norm: 0.9999997897370569, iteration: 5334
loss: 1.1671888828277588,grad_norm: 0.9999997552638957, iteration: 5335
loss: 1.2646366357803345,grad_norm: 0.9999997213550514, iteration: 5336
loss: 1.2485015392303467,grad_norm: 0.9999998539192555, iteration: 5337
loss: 1.2579387426376343,grad_norm: 0.9999998783148117, iteration: 5338
loss: 1.222520112991333,grad_norm: 0.9999997399780881, iteration: 5339
loss: 1.30699622631073,grad_norm: 0.9999997264855046, iteration: 5340
loss: 1.2905768156051636,grad_norm: 0.9999997971379583, iteration: 5341
loss: 1.192093849182129,grad_norm: 0.9999997104816754, iteration: 5342
loss: 1.2664276361465454,grad_norm: 0.999999879267257, iteration: 5343
loss: 1.342793345451355,grad_norm: 0.9999997010229862, iteration: 5344
loss: 1.3485509157180786,grad_norm: 0.9999999133673738, iteration: 5345
loss: 1.4125921726226807,grad_norm: 0.9999997899426455, iteration: 5346
loss: 1.2515130043029785,grad_norm: 0.9999998007587194, iteration: 5347
loss: 1.2414084672927856,grad_norm: 0.9999997924223314, iteration: 5348
loss: 1.2177186012268066,grad_norm: 0.9999998051231893, iteration: 5349
loss: 1.2425135374069214,grad_norm: 0.9999997650545206, iteration: 5350
loss: 1.274922490119934,grad_norm: 0.9999997712583514, iteration: 5351
loss: 1.2335952520370483,grad_norm: 0.9999997679448989, iteration: 5352
loss: 1.135714054107666,grad_norm: 0.9999998348062296, iteration: 5353
loss: 1.4473893642425537,grad_norm: 0.9999998606224881, iteration: 5354
loss: 1.2941631078720093,grad_norm: 0.9999998256832251, iteration: 5355
loss: 1.2073795795440674,grad_norm: 0.9999997430144819, iteration: 5356
loss: 1.37080717086792,grad_norm: 0.9999998287482802, iteration: 5357
loss: 1.2676055431365967,grad_norm: 0.9999998656833067, iteration: 5358
loss: 1.35318922996521,grad_norm: 0.999999887804798, iteration: 5359
loss: 1.3186477422714233,grad_norm: 0.9999998003001104, iteration: 5360
loss: 1.298107624053955,grad_norm: 0.9999997930712143, iteration: 5361
loss: 1.4084829092025757,grad_norm: 0.9999998423311185, iteration: 5362
loss: 1.2531979084014893,grad_norm: 0.9999997724064945, iteration: 5363
loss: 1.3163877725601196,grad_norm: 0.9999998287937523, iteration: 5364
loss: 1.2945863008499146,grad_norm: 0.9999997325330485, iteration: 5365
loss: 1.457497477531433,grad_norm: 0.9999998021331192, iteration: 5366
loss: 1.4208259582519531,grad_norm: 0.9999998110122922, iteration: 5367
loss: 1.3252986669540405,grad_norm: 0.9999998200521832, iteration: 5368
loss: 1.1687109470367432,grad_norm: 0.9999997783300947, iteration: 5369
loss: 1.2920079231262207,grad_norm: 0.9999998041977146, iteration: 5370
loss: 1.3701773881912231,grad_norm: 0.9999998551802025, iteration: 5371
loss: 1.2920972108840942,grad_norm: 0.9999998762608774, iteration: 5372
loss: 1.1925197839736938,grad_norm: 0.9999997600942628, iteration: 5373
loss: 1.3876233100891113,grad_norm: 0.9999997772712024, iteration: 5374
loss: 1.2994227409362793,grad_norm: 0.9999998478176998, iteration: 5375
loss: 1.248648762702942,grad_norm: 0.9999998715879962, iteration: 5376
loss: 1.3840415477752686,grad_norm: 0.9999997970372061, iteration: 5377
loss: 1.3650810718536377,grad_norm: 0.9999998557861305, iteration: 5378
loss: 1.2797778844833374,grad_norm: 0.9999998756557095, iteration: 5379
loss: 1.1405946016311646,grad_norm: 0.9999996482283752, iteration: 5380
loss: 1.324501633644104,grad_norm: 0.9999998125248417, iteration: 5381
loss: 1.2315444946289062,grad_norm: 0.9999997230068389, iteration: 5382
loss: 1.271409273147583,grad_norm: 0.9999998036098111, iteration: 5383
loss: 1.1507251262664795,grad_norm: 0.9999997606248341, iteration: 5384
loss: 1.3009225130081177,grad_norm: 0.9999997821927439, iteration: 5385
loss: 1.2789989709854126,grad_norm: 0.999999770106402, iteration: 5386
loss: 1.3182897567749023,grad_norm: 0.9999998449519623, iteration: 5387
loss: 1.2724140882492065,grad_norm: 0.9999997752580038, iteration: 5388
loss: 1.2202764749526978,grad_norm: 0.9999997786296182, iteration: 5389
loss: 1.2726002931594849,grad_norm: 0.9999998418713522, iteration: 5390
loss: 1.424201250076294,grad_norm: 0.9999998718558322, iteration: 5391
loss: 1.250004529953003,grad_norm: 0.9999997079709707, iteration: 5392
loss: 1.2902954816818237,grad_norm: 0.9999997940091634, iteration: 5393
loss: 1.166954517364502,grad_norm: 0.9999997209944476, iteration: 5394
loss: 1.2196154594421387,grad_norm: 0.9999998263628496, iteration: 5395
loss: 1.2159924507141113,grad_norm: 0.9999997732486088, iteration: 5396
loss: 1.4987390041351318,grad_norm: 0.9999998170192144, iteration: 5397
loss: 1.314157247543335,grad_norm: 0.9999997623957995, iteration: 5398
loss: 1.2936999797821045,grad_norm: 0.9999997605864256, iteration: 5399
loss: 1.2832238674163818,grad_norm: 0.9999997503345524, iteration: 5400
loss: 1.2609031200408936,grad_norm: 0.9999997184732246, iteration: 5401
loss: 1.2144758701324463,grad_norm: 0.9999998360009767, iteration: 5402
loss: 1.3698508739471436,grad_norm: 0.9999999011498425, iteration: 5403
loss: 1.2160707712173462,grad_norm: 0.9999998534357265, iteration: 5404
loss: 1.190199613571167,grad_norm: 0.9999997782616271, iteration: 5405
loss: 1.2160245180130005,grad_norm: 0.9999996549031057, iteration: 5406
loss: 1.2473020553588867,grad_norm: 0.9999998414037456, iteration: 5407
loss: 1.3417086601257324,grad_norm: 0.9999998125932109, iteration: 5408
loss: 1.304088830947876,grad_norm: 0.9999997949016561, iteration: 5409
loss: 1.3612016439437866,grad_norm: 0.9999997408294091, iteration: 5410
loss: 1.352391004562378,grad_norm: 0.9999998106913744, iteration: 5411
loss: 1.2453303337097168,grad_norm: 0.9999997773672596, iteration: 5412
loss: 1.2077560424804688,grad_norm: 0.999999761853925, iteration: 5413
loss: 1.28181791305542,grad_norm: 0.9999998571850741, iteration: 5414
loss: 1.1486566066741943,grad_norm: 0.9999997521705337, iteration: 5415
loss: 1.1136831045150757,grad_norm: 0.9999998305251779, iteration: 5416
loss: 1.4503167867660522,grad_norm: 0.9999998495720887, iteration: 5417
loss: 1.209148645401001,grad_norm: 0.9999998282451593, iteration: 5418
loss: 1.1853222846984863,grad_norm: 0.999999792517935, iteration: 5419
loss: 1.2617417573928833,grad_norm: 0.9999998149480098, iteration: 5420
loss: 1.2199689149856567,grad_norm: 0.9999997142832665, iteration: 5421
loss: 1.4856420755386353,grad_norm: 0.9999998333142588, iteration: 5422
loss: 1.3498696088790894,grad_norm: 0.9999998826963038, iteration: 5423
loss: 1.2896829843521118,grad_norm: 0.9999998534350625, iteration: 5424
loss: 1.1851933002471924,grad_norm: 0.9999997917748998, iteration: 5425
loss: 1.1849207878112793,grad_norm: 0.999999717431298, iteration: 5426
loss: 1.1599711179733276,grad_norm: 0.9999998752002436, iteration: 5427
loss: 1.274855375289917,grad_norm: 0.9999997644457477, iteration: 5428
loss: 1.2749649286270142,grad_norm: 0.9999997828920969, iteration: 5429
loss: 1.2275868654251099,grad_norm: 0.9999997220523608, iteration: 5430
loss: 1.332216501235962,grad_norm: 0.999999900039572, iteration: 5431
loss: 1.390607237815857,grad_norm: 0.9999998052027702, iteration: 5432
loss: 1.1589680910110474,grad_norm: 0.9999997428887051, iteration: 5433
loss: 1.2264610528945923,grad_norm: 0.999999799402799, iteration: 5434
loss: 1.2821747064590454,grad_norm: 0.9999998351716045, iteration: 5435
loss: 1.253402590751648,grad_norm: 0.999999791032247, iteration: 5436
loss: 1.2331128120422363,grad_norm: 0.9999998257873803, iteration: 5437
loss: 1.245967984199524,grad_norm: 0.9999997533656868, iteration: 5438
loss: 1.2819238901138306,grad_norm: 0.9999998247207741, iteration: 5439
loss: 1.3077116012573242,grad_norm: 0.9999997414207387, iteration: 5440
loss: 1.296234369277954,grad_norm: 0.99999977803464, iteration: 5441
loss: 1.324039101600647,grad_norm: 0.9999998514547952, iteration: 5442
loss: 1.2960816621780396,grad_norm: 0.9999997825451827, iteration: 5443
loss: 1.3089776039123535,grad_norm: 0.9999998704222111, iteration: 5444
loss: 1.3740423917770386,grad_norm: 0.9999997375714613, iteration: 5445
loss: 1.2978147268295288,grad_norm: 0.9999997878010972, iteration: 5446
loss: 1.3140990734100342,grad_norm: 0.999999748357825, iteration: 5447
loss: 1.3976876735687256,grad_norm: 0.9999997420799585, iteration: 5448
loss: 1.3722425699234009,grad_norm: 0.9999998394813954, iteration: 5449
loss: 1.230765700340271,grad_norm: 0.9999998136348262, iteration: 5450
loss: 1.2642935514450073,grad_norm: 0.9999998066055096, iteration: 5451
loss: 1.1063770055770874,grad_norm: 0.9999998194177052, iteration: 5452
loss: 1.3463636636734009,grad_norm: 0.9999997275379875, iteration: 5453
loss: 1.3001192808151245,grad_norm: 0.9999997807393958, iteration: 5454
loss: 1.2591005563735962,grad_norm: 0.9999998514297496, iteration: 5455
loss: 1.3840879201889038,grad_norm: 0.9999997981701708, iteration: 5456
loss: 1.226556420326233,grad_norm: 0.9999997714470428, iteration: 5457
loss: 1.3144328594207764,grad_norm: 0.9999997335991787, iteration: 5458
loss: 1.3283566236495972,grad_norm: 0.9999997789024139, iteration: 5459
loss: 1.259106993675232,grad_norm: 0.999999808375219, iteration: 5460
loss: 1.3335543870925903,grad_norm: 0.9999997882793016, iteration: 5461
loss: 1.2663713693618774,grad_norm: 0.999999781964873, iteration: 5462
loss: 1.3260166645050049,grad_norm: 0.9999998024156251, iteration: 5463
loss: 1.340030550956726,grad_norm: 0.9999998158057081, iteration: 5464
loss: 1.2371978759765625,grad_norm: 0.9999998304305229, iteration: 5465
loss: 1.33419930934906,grad_norm: 0.9999997787052884, iteration: 5466
loss: 1.2269293069839478,grad_norm: 0.9999997793925621, iteration: 5467
loss: 1.319446325302124,grad_norm: 0.9999998761596848, iteration: 5468
loss: 1.3192652463912964,grad_norm: 0.9999998035103338, iteration: 5469
loss: 1.2630106210708618,grad_norm: 0.9999998298670497, iteration: 5470
loss: 1.2657002210617065,grad_norm: 0.9999997748223378, iteration: 5471
loss: 1.3069562911987305,grad_norm: 0.999999732727544, iteration: 5472
loss: 1.2729030847549438,grad_norm: 0.999999760702032, iteration: 5473
loss: 1.26749849319458,grad_norm: 0.9999996950963789, iteration: 5474
loss: 1.2576234340667725,grad_norm: 0.9999997782646884, iteration: 5475
loss: 1.2972644567489624,grad_norm: 0.9999997693049936, iteration: 5476
loss: 1.1676948070526123,grad_norm: 0.9999996912973121, iteration: 5477
loss: 1.2563117742538452,grad_norm: 0.9999998251043584, iteration: 5478
loss: 1.2740013599395752,grad_norm: 0.9999997953705803, iteration: 5479
loss: 1.2800180912017822,grad_norm: 0.999999739699419, iteration: 5480
loss: 1.2340096235275269,grad_norm: 0.9999996599872961, iteration: 5481
loss: 1.260405421257019,grad_norm: 0.9999997060634296, iteration: 5482
loss: 1.2221990823745728,grad_norm: 0.999999766764996, iteration: 5483
loss: 1.2332103252410889,grad_norm: 0.9999997978905779, iteration: 5484
loss: 1.2497525215148926,grad_norm: 0.9999998197618433, iteration: 5485
loss: 1.3531112670898438,grad_norm: 0.9999998047803729, iteration: 5486
loss: 1.2845427989959717,grad_norm: 0.9999997339317171, iteration: 5487
loss: 1.2663737535476685,grad_norm: 0.99999972390314, iteration: 5488
loss: 1.217255711555481,grad_norm: 0.9999997886812759, iteration: 5489
loss: 1.3013708591461182,grad_norm: 0.999999787517806, iteration: 5490
loss: 1.355832576751709,grad_norm: 0.9999997902898334, iteration: 5491
loss: 1.1530547142028809,grad_norm: 0.9999997592657691, iteration: 5492
loss: 1.256818175315857,grad_norm: 0.9999997327080012, iteration: 5493
loss: 1.2159079313278198,grad_norm: 0.9999997114088346, iteration: 5494
loss: 1.271213412284851,grad_norm: 0.9999997368223813, iteration: 5495
loss: 1.3120859861373901,grad_norm: 0.9999998257461798, iteration: 5496
loss: 1.2042182683944702,grad_norm: 0.9999997843119778, iteration: 5497
loss: 1.1834791898727417,grad_norm: 0.9999998168966093, iteration: 5498
loss: 1.3854706287384033,grad_norm: 0.999999831871662, iteration: 5499
loss: 1.285456895828247,grad_norm: 0.9999998435983527, iteration: 5500
loss: 1.3281079530715942,grad_norm: 0.9999998340525171, iteration: 5501
loss: 1.2563163042068481,grad_norm: 0.9999998083067897, iteration: 5502
loss: 1.3145955801010132,grad_norm: 0.9999998107365418, iteration: 5503
loss: 1.3116625547409058,grad_norm: 0.9999998035900745, iteration: 5504
loss: 1.2660551071166992,grad_norm: 0.9999997658947658, iteration: 5505
loss: 1.4447956085205078,grad_norm: 0.999999758855348, iteration: 5506
loss: 1.332462191581726,grad_norm: 0.9999997820428073, iteration: 5507
loss: 1.267697811126709,grad_norm: 0.9999998775676041, iteration: 5508
loss: 1.3634527921676636,grad_norm: 0.9999998485615849, iteration: 5509
loss: 1.307253122329712,grad_norm: 0.9999997622915441, iteration: 5510
loss: 1.358378291130066,grad_norm: 0.9999997445523984, iteration: 5511
loss: 1.356112003326416,grad_norm: 0.9999998667209837, iteration: 5512
loss: 1.292074203491211,grad_norm: 0.999999817352243, iteration: 5513
loss: 1.2424124479293823,grad_norm: 0.9999998158365279, iteration: 5514
loss: 1.3064345121383667,grad_norm: 0.9999997729714719, iteration: 5515
loss: 1.2306673526763916,grad_norm: 0.9999998525413875, iteration: 5516
loss: 1.420444369316101,grad_norm: 0.999999855891399, iteration: 5517
loss: 1.2799992561340332,grad_norm: 0.9999998459112103, iteration: 5518
loss: 1.24285888671875,grad_norm: 0.9999998311138155, iteration: 5519
loss: 1.2080858945846558,grad_norm: 0.9999997724248172, iteration: 5520
loss: 1.2397379875183105,grad_norm: 0.9999998020240322, iteration: 5521
loss: 1.1901001930236816,grad_norm: 0.9999998181767635, iteration: 5522
loss: 1.2701317071914673,grad_norm: 0.9999998503687616, iteration: 5523
loss: 1.392175316810608,grad_norm: 0.9999998666187994, iteration: 5524
loss: 1.160771131515503,grad_norm: 0.9999997560162072, iteration: 5525
loss: 1.2850788831710815,grad_norm: 0.9999997618483445, iteration: 5526
loss: 1.254880428314209,grad_norm: 0.9999997618555705, iteration: 5527
loss: 1.1123689413070679,grad_norm: 0.9999998188741309, iteration: 5528
loss: 1.1058917045593262,grad_norm: 0.9999997262664841, iteration: 5529
loss: 1.3129023313522339,grad_norm: 0.9999997683023042, iteration: 5530
loss: 1.3049943447113037,grad_norm: 0.999999889223137, iteration: 5531
loss: 1.1585272550582886,grad_norm: 0.9999997866263555, iteration: 5532
loss: 1.2349580526351929,grad_norm: 0.9999997484449216, iteration: 5533
loss: 1.2380454540252686,grad_norm: 0.9999997403792624, iteration: 5534
loss: 1.277753233909607,grad_norm: 0.9999997598649991, iteration: 5535
loss: 1.3342417478561401,grad_norm: 0.9999997533108972, iteration: 5536
loss: 1.3250622749328613,grad_norm: 0.9999998076409208, iteration: 5537
loss: 1.1394805908203125,grad_norm: 0.999999726170863, iteration: 5538
loss: 1.3338557481765747,grad_norm: 0.99999986934171, iteration: 5539
loss: 1.1190110445022583,grad_norm: 0.9999997211437072, iteration: 5540
loss: 1.2329267263412476,grad_norm: 0.999999736915524, iteration: 5541
loss: 1.2743797302246094,grad_norm: 0.9999998057950084, iteration: 5542
loss: 1.2795823812484741,grad_norm: 0.9999998552071957, iteration: 5543
loss: 1.1918220520019531,grad_norm: 0.9999998320253305, iteration: 5544
loss: 1.2160547971725464,grad_norm: 0.9999998196519183, iteration: 5545
loss: 1.2683087587356567,grad_norm: 0.9999998306806205, iteration: 5546
loss: 1.2101483345031738,grad_norm: 0.9999996875920889, iteration: 5547
loss: 1.1464428901672363,grad_norm: 0.9999998344816329, iteration: 5548
loss: 1.2150522470474243,grad_norm: 0.9999997996875005, iteration: 5549
loss: 1.2175227403640747,grad_norm: 0.9999998087080667, iteration: 5550
loss: 1.2130787372589111,grad_norm: 0.999999755953373, iteration: 5551
loss: 1.2297524213790894,grad_norm: 0.999999829296062, iteration: 5552
loss: 1.11650550365448,grad_norm: 0.9999996677351122, iteration: 5553
loss: 1.2509634494781494,grad_norm: 0.9999997726679942, iteration: 5554
loss: 1.3012137413024902,grad_norm: 0.9999998260483697, iteration: 5555
loss: 1.2312026023864746,grad_norm: 0.9999997881143177, iteration: 5556
loss: 1.2715877294540405,grad_norm: 0.9999998049314327, iteration: 5557
loss: 1.181150197982788,grad_norm: 0.9999997043031792, iteration: 5558
loss: 1.2755235433578491,grad_norm: 0.9999997962077535, iteration: 5559
loss: 1.169856071472168,grad_norm: 0.9999998604075928, iteration: 5560
loss: 1.2359750270843506,grad_norm: 0.9999998015089426, iteration: 5561
loss: 1.3072898387908936,grad_norm: 0.9999997455813877, iteration: 5562
loss: 1.2515584230422974,grad_norm: 0.9999997696164259, iteration: 5563
loss: 1.2198004722595215,grad_norm: 0.9999997530325185, iteration: 5564
loss: 1.2231980562210083,grad_norm: 0.9999997284699186, iteration: 5565
loss: 1.1702710390090942,grad_norm: 0.9999997450250253, iteration: 5566
loss: 1.378183126449585,grad_norm: 0.9999997876780637, iteration: 5567
loss: 1.2378283739089966,grad_norm: 0.9999997432775778, iteration: 5568
loss: 1.2899786233901978,grad_norm: 0.9999998164421061, iteration: 5569
loss: 1.2832722663879395,grad_norm: 0.9999998454952964, iteration: 5570
loss: 1.2448079586029053,grad_norm: 0.9999998065959114, iteration: 5571
loss: 1.258078932762146,grad_norm: 0.9999997740915243, iteration: 5572
loss: 1.200233817100525,grad_norm: 0.99999975439641, iteration: 5573
loss: 1.2377246618270874,grad_norm: 0.9999997769333603, iteration: 5574
loss: 1.328066110610962,grad_norm: 0.9999997345149758, iteration: 5575
loss: 1.244114875793457,grad_norm: 0.9999996919216521, iteration: 5576
loss: 1.2385460138320923,grad_norm: 0.9999997667580672, iteration: 5577
loss: 1.213518738746643,grad_norm: 0.9999997802384404, iteration: 5578
loss: 1.3385686874389648,grad_norm: 0.9999997986819934, iteration: 5579
loss: 1.137874722480774,grad_norm: 0.9999996926303926, iteration: 5580
loss: 1.2927135229110718,grad_norm: 0.9999998119218627, iteration: 5581
loss: 1.2957793474197388,grad_norm: 0.9999997208368533, iteration: 5582
loss: 1.1654049158096313,grad_norm: 0.9999997511049797, iteration: 5583
loss: 1.3140143156051636,grad_norm: 0.9999998190579092, iteration: 5584
loss: 1.116913080215454,grad_norm: 0.9999997701351713, iteration: 5585
loss: 1.2807003259658813,grad_norm: 0.9999997825125208, iteration: 5586
loss: 1.1160967350006104,grad_norm: 0.9999997444719617, iteration: 5587
loss: 1.2871220111846924,grad_norm: 0.9999997851245476, iteration: 5588
loss: 1.2831085920333862,grad_norm: 0.9999998408239625, iteration: 5589
loss: 1.285537838935852,grad_norm: 0.9999997597017732, iteration: 5590
loss: 1.2911027669906616,grad_norm: 0.9999997333094731, iteration: 5591
loss: 1.3086949586868286,grad_norm: 0.999999741302761, iteration: 5592
loss: 1.3021341562271118,grad_norm: 0.9999997708940873, iteration: 5593
loss: 1.3207697868347168,grad_norm: 0.9999997650411957, iteration: 5594
loss: 1.1995511054992676,grad_norm: 0.9999999096447095, iteration: 5595
loss: 1.193339228630066,grad_norm: 0.9999998375037792, iteration: 5596
loss: 1.2403987646102905,grad_norm: 0.9999998024345997, iteration: 5597
loss: 1.2240959405899048,grad_norm: 0.999999824071894, iteration: 5598
loss: 1.2415751218795776,grad_norm: 0.9999997653977282, iteration: 5599
loss: 1.188588261604309,grad_norm: 0.9999997533698206, iteration: 5600
loss: 1.186963438987732,grad_norm: 0.9999997896275274, iteration: 5601
loss: 1.2286407947540283,grad_norm: 0.9999998137957207, iteration: 5602
loss: 1.2433445453643799,grad_norm: 0.9999997600842517, iteration: 5603
loss: 1.1068037748336792,grad_norm: 0.9999997472295421, iteration: 5604
loss: 1.251442790031433,grad_norm: 0.9999998232705087, iteration: 5605
loss: 1.2770366668701172,grad_norm: 0.9999997891255145, iteration: 5606
loss: 1.4568538665771484,grad_norm: 0.9999998207941927, iteration: 5607
loss: 1.3250192403793335,grad_norm: 0.9999997923098459, iteration: 5608
loss: 1.2146183252334595,grad_norm: 0.999999842114723, iteration: 5609
loss: 1.259119987487793,grad_norm: 0.9999996880332936, iteration: 5610
loss: 1.3363314867019653,grad_norm: 0.9999998761319381, iteration: 5611
loss: 1.1908466815948486,grad_norm: 0.9999997537571227, iteration: 5612
loss: 1.237410306930542,grad_norm: 0.999999748380068, iteration: 5613
loss: 1.2694886922836304,grad_norm: 0.9999998142583841, iteration: 5614
loss: 1.185051679611206,grad_norm: 0.9999997013959285, iteration: 5615
loss: 1.2160612344741821,grad_norm: 0.9999997936353512, iteration: 5616
loss: 1.2282675504684448,grad_norm: 0.9999998329574462, iteration: 5617
loss: 1.3130074739456177,grad_norm: 0.9999998208104424, iteration: 5618
loss: 1.173606514930725,grad_norm: 0.9999997363024922, iteration: 5619
loss: 1.1544665098190308,grad_norm: 0.9999998384003891, iteration: 5620
loss: 1.1821057796478271,grad_norm: 0.9999997175751585, iteration: 5621
loss: 1.2053539752960205,grad_norm: 0.9999997959006298, iteration: 5622
loss: 1.2485374212265015,grad_norm: 0.999999818886651, iteration: 5623
loss: 1.173053503036499,grad_norm: 0.9999998116731145, iteration: 5624
loss: 1.1802488565444946,grad_norm: 0.999999777756643, iteration: 5625
loss: 1.269361972808838,grad_norm: 0.9999997341505436, iteration: 5626
loss: 1.1728419065475464,grad_norm: 0.9999997409150574, iteration: 5627
loss: 1.2389072179794312,grad_norm: 0.9999997993563242, iteration: 5628
loss: 1.2763934135437012,grad_norm: 0.9999997249663384, iteration: 5629
loss: 1.2236348390579224,grad_norm: 0.9999997702547834, iteration: 5630
loss: 1.22215735912323,grad_norm: 0.999999834234985, iteration: 5631
loss: 1.1641082763671875,grad_norm: 0.9999997395653023, iteration: 5632
loss: 1.169628620147705,grad_norm: 0.9999996944318408, iteration: 5633
loss: 1.2916994094848633,grad_norm: 0.9999998612821711, iteration: 5634
loss: 1.1997898817062378,grad_norm: 0.9999997185814564, iteration: 5635
loss: 1.1657108068466187,grad_norm: 0.9999997377576205, iteration: 5636
loss: 1.1804968118667603,grad_norm: 0.999999697880172, iteration: 5637
loss: 1.3388166427612305,grad_norm: 0.9999998912829601, iteration: 5638
loss: 1.3468828201293945,grad_norm: 0.9999998172256135, iteration: 5639
loss: 1.1736329793930054,grad_norm: 0.999999828533891, iteration: 5640
loss: 1.203517198562622,grad_norm: 0.999999837457185, iteration: 5641
loss: 1.150675892829895,grad_norm: 0.9999997521894257, iteration: 5642
loss: 1.1911362409591675,grad_norm: 0.9999998004079432, iteration: 5643
loss: 1.2020975351333618,grad_norm: 0.9999999044395448, iteration: 5644
loss: 1.2111471891403198,grad_norm: 0.9999999019123178, iteration: 5645
loss: 1.317552089691162,grad_norm: 0.9999997418928224, iteration: 5646
loss: 1.265284538269043,grad_norm: 0.9999997741347794, iteration: 5647
loss: 1.2359236478805542,grad_norm: 0.9999997880391862, iteration: 5648
loss: 1.2217024564743042,grad_norm: 0.9999998142942664, iteration: 5649
loss: 1.2280018329620361,grad_norm: 0.9999998703170186, iteration: 5650
loss: 1.0669353008270264,grad_norm: 0.9999997032024319, iteration: 5651
loss: 1.313948631286621,grad_norm: 0.9999997563871026, iteration: 5652
loss: 1.20807945728302,grad_norm: 0.9999997184365086, iteration: 5653
loss: 1.2228028774261475,grad_norm: 0.9999998177935497, iteration: 5654
loss: 1.3086413145065308,grad_norm: 0.999999790091428, iteration: 5655
loss: 1.1801615953445435,grad_norm: 0.9999997182084479, iteration: 5656
loss: 1.3271795511245728,grad_norm: 0.9999998455664255, iteration: 5657
loss: 1.2713215351104736,grad_norm: 0.9999998085987102, iteration: 5658
loss: 1.1641191244125366,grad_norm: 0.9999997805763899, iteration: 5659
loss: 1.208014726638794,grad_norm: 0.9999997886335565, iteration: 5660
loss: 1.29843270778656,grad_norm: 0.9999997743800405, iteration: 5661
loss: 1.3072876930236816,grad_norm: 0.9999998308652056, iteration: 5662
loss: 1.2935662269592285,grad_norm: 0.9999997441734036, iteration: 5663
loss: 1.263663411140442,grad_norm: 0.9999997889165957, iteration: 5664
loss: 1.1095147132873535,grad_norm: 0.999999770672331, iteration: 5665
loss: 1.2556428909301758,grad_norm: 0.9999998069255336, iteration: 5666
loss: 1.2914941310882568,grad_norm: 0.9999997598420409, iteration: 5667
loss: 1.1676251888275146,grad_norm: 0.9999998350001668, iteration: 5668
loss: 1.2202261686325073,grad_norm: 0.999999701589563, iteration: 5669
loss: 1.2312703132629395,grad_norm: 0.9999996801488924, iteration: 5670
loss: 1.2851990461349487,grad_norm: 0.9999998338817588, iteration: 5671
loss: 1.173795223236084,grad_norm: 0.9999998349617762, iteration: 5672
loss: 1.2058876752853394,grad_norm: 0.9999997386442265, iteration: 5673
loss: 1.2354822158813477,grad_norm: 0.9999997827326396, iteration: 5674
loss: 1.16829514503479,grad_norm: 0.9999997886955914, iteration: 5675
loss: 1.1365392208099365,grad_norm: 0.9999998352268459, iteration: 5676
loss: 1.2705496549606323,grad_norm: 0.9999998992346656, iteration: 5677
loss: 1.2812138795852661,grad_norm: 0.9999997563727386, iteration: 5678
loss: 1.2684835195541382,grad_norm: 0.9999998266534141, iteration: 5679
loss: 1.2004501819610596,grad_norm: 0.9999997221257604, iteration: 5680
loss: 1.2842761278152466,grad_norm: 0.9999997828547215, iteration: 5681
loss: 1.2863290309906006,grad_norm: 0.9999998235696538, iteration: 5682
loss: 1.274030089378357,grad_norm: 0.9999997640049155, iteration: 5683
loss: 1.3042019605636597,grad_norm: 0.999999771154511, iteration: 5684
loss: 1.0840044021606445,grad_norm: 0.9999997512878344, iteration: 5685
loss: 1.2533373832702637,grad_norm: 0.9999997853015061, iteration: 5686
loss: 1.1587705612182617,grad_norm: 0.9999997754024563, iteration: 5687
loss: 1.1990886926651,grad_norm: 0.9999997592220856, iteration: 5688
loss: 1.263085961341858,grad_norm: 0.9999998004569124, iteration: 5689
loss: 1.2103095054626465,grad_norm: 0.9999997747776957, iteration: 5690
loss: 1.1936700344085693,grad_norm: 0.9999997172478338, iteration: 5691
loss: 1.1778755187988281,grad_norm: 0.9999997461259528, iteration: 5692
loss: 1.2675710916519165,grad_norm: 0.9999997843584262, iteration: 5693
loss: 1.1848015785217285,grad_norm: 0.999999738450472, iteration: 5694
loss: 1.1192270517349243,grad_norm: 0.9999998130926883, iteration: 5695
loss: 1.161189317703247,grad_norm: 0.9999998026607042, iteration: 5696
loss: 1.1341853141784668,grad_norm: 0.9999997229420586, iteration: 5697
loss: 1.3190335035324097,grad_norm: 0.9999997745034355, iteration: 5698
loss: 1.2249424457550049,grad_norm: 0.9999998282794423, iteration: 5699
loss: 1.3483682870864868,grad_norm: 0.9999997861946734, iteration: 5700
loss: 1.1722846031188965,grad_norm: 0.9999997130458134, iteration: 5701
loss: 1.2228031158447266,grad_norm: 0.9999998677241425, iteration: 5702
loss: 1.2578322887420654,grad_norm: 0.9999998178403486, iteration: 5703
loss: 1.2180994749069214,grad_norm: 0.9999998114164271, iteration: 5704
loss: 1.2015384435653687,grad_norm: 0.9999998220638813, iteration: 5705
loss: 1.0852586030960083,grad_norm: 0.9999997888169552, iteration: 5706
loss: 1.1839123964309692,grad_norm: 0.9999997734601966, iteration: 5707
loss: 1.243514895439148,grad_norm: 0.9999998029685848, iteration: 5708
loss: 1.2900015115737915,grad_norm: 0.9999997787482701, iteration: 5709
loss: 1.110008955001831,grad_norm: 0.9999997294284129, iteration: 5710
loss: 1.1764193773269653,grad_norm: 0.9999997615628086, iteration: 5711
loss: 1.134834885597229,grad_norm: 0.9999996609665612, iteration: 5712
loss: 1.380448579788208,grad_norm: 0.9999999003901061, iteration: 5713
loss: 1.1839216947555542,grad_norm: 0.9999998053259089, iteration: 5714
loss: 1.15427827835083,grad_norm: 0.9999999449095959, iteration: 5715
loss: 1.2900606393814087,grad_norm: 0.9999997622909302, iteration: 5716
loss: 1.285304069519043,grad_norm: 0.9999997601986669, iteration: 5717
loss: 1.2541331052780151,grad_norm: 0.9999997352419839, iteration: 5718
loss: 1.2116583585739136,grad_norm: 0.9999997916253494, iteration: 5719
loss: 1.0597000122070312,grad_norm: 0.9999996921090913, iteration: 5720
loss: 1.1967133283615112,grad_norm: 0.9999998061944405, iteration: 5721
loss: 1.3040961027145386,grad_norm: 0.9999998206444107, iteration: 5722
loss: 1.2303297519683838,grad_norm: 0.9999997807553644, iteration: 5723
loss: 1.252085566520691,grad_norm: 0.9999998441206718, iteration: 5724
loss: 1.258540153503418,grad_norm: 0.9999997294604399, iteration: 5725
loss: 1.230946660041809,grad_norm: 0.9999999228665287, iteration: 5726
loss: 1.153694987297058,grad_norm: 0.9999996815674146, iteration: 5727
loss: 1.32029390335083,grad_norm: 0.9999998163999045, iteration: 5728
loss: 1.3502371311187744,grad_norm: 0.9999998468323943, iteration: 5729
loss: 1.1200146675109863,grad_norm: 0.9999997986838806, iteration: 5730
loss: 1.2038878202438354,grad_norm: 0.9999998267858767, iteration: 5731
loss: 1.1713430881500244,grad_norm: 0.9999996719869216, iteration: 5732
loss: 1.2301297187805176,grad_norm: 0.9999998843900374, iteration: 5733
loss: 1.3453718423843384,grad_norm: 0.9999998968514326, iteration: 5734
loss: 1.3134708404541016,grad_norm: 0.9999997535951225, iteration: 5735
loss: 1.0323899984359741,grad_norm: 0.9999996673469347, iteration: 5736
loss: 1.280234932899475,grad_norm: 0.9999997469954237, iteration: 5737
loss: 1.2670193910598755,grad_norm: 0.999999877965358, iteration: 5738
loss: 1.3204606771469116,grad_norm: 0.9999998663949735, iteration: 5739
loss: 1.2285892963409424,grad_norm: 0.9999997881223869, iteration: 5740
loss: 1.176821231842041,grad_norm: 0.9999997331022433, iteration: 5741
loss: 1.1923600435256958,grad_norm: 0.9999997270378238, iteration: 5742
loss: 1.2973114252090454,grad_norm: 0.9999997968999589, iteration: 5743
loss: 1.1536966562271118,grad_norm: 0.9999997081795121, iteration: 5744
loss: 1.08217453956604,grad_norm: 0.9999995969892987, iteration: 5745
loss: 1.1667473316192627,grad_norm: 0.9999997123172906, iteration: 5746
loss: 1.204764723777771,grad_norm: 0.99999975118559, iteration: 5747
loss: 1.1274865865707397,grad_norm: 0.9999997366683938, iteration: 5748
loss: 1.1553553342819214,grad_norm: 0.999999754916403, iteration: 5749
loss: 1.2246382236480713,grad_norm: 0.9999998181453931, iteration: 5750
loss: 1.4051822423934937,grad_norm: 0.9999998009581806, iteration: 5751
loss: 1.2780187129974365,grad_norm: 0.9999997542172198, iteration: 5752
loss: 1.2154300212860107,grad_norm: 0.9999996736867328, iteration: 5753
loss: 1.1487202644348145,grad_norm: 0.9999997166487631, iteration: 5754
loss: 1.2719463109970093,grad_norm: 0.9999998331944987, iteration: 5755
loss: 1.2109702825546265,grad_norm: 0.9999997687273919, iteration: 5756
loss: 1.1576731204986572,grad_norm: 0.9999997665501899, iteration: 5757
loss: 1.144978642463684,grad_norm: 0.9999996900650461, iteration: 5758
loss: 1.2003605365753174,grad_norm: 0.9999997263945423, iteration: 5759
loss: 1.16629159450531,grad_norm: 0.9999997795802946, iteration: 5760
loss: 1.2662899494171143,grad_norm: 0.99999971059197, iteration: 5761
loss: 1.26579749584198,grad_norm: 0.9999998051298553, iteration: 5762
loss: 1.1980581283569336,grad_norm: 0.9999997427813424, iteration: 5763
loss: 1.1771166324615479,grad_norm: 0.9999997136276546, iteration: 5764
loss: 1.174013376235962,grad_norm: 0.9999997197414362, iteration: 5765
loss: 1.2381826639175415,grad_norm: 0.9999997870734473, iteration: 5766
loss: 1.2041329145431519,grad_norm: 0.9999998367209053, iteration: 5767
loss: 1.2845243215560913,grad_norm: 0.9999996871241633, iteration: 5768
loss: 1.2809257507324219,grad_norm: 0.9999998095598106, iteration: 5769
loss: 1.1402446031570435,grad_norm: 0.9999997450532072, iteration: 5770
loss: 1.2743624448776245,grad_norm: 0.999999727864141, iteration: 5771
loss: 1.3001340627670288,grad_norm: 0.9999997767654228, iteration: 5772
loss: 1.1289122104644775,grad_norm: 0.999999835749246, iteration: 5773
loss: 1.273033618927002,grad_norm: 0.9999997367783435, iteration: 5774
loss: 1.147207260131836,grad_norm: 0.9999998027439687, iteration: 5775
loss: 1.1240568161010742,grad_norm: 0.9999997001325374, iteration: 5776
loss: 1.1559526920318604,grad_norm: 0.9999997528346928, iteration: 5777
loss: 1.22626793384552,grad_norm: 0.9999998344236971, iteration: 5778
loss: 1.0883214473724365,grad_norm: 0.9999996670938954, iteration: 5779
loss: 1.2714173793792725,grad_norm: 0.9999997786122663, iteration: 5780
loss: 1.16147780418396,grad_norm: 0.999999737297809, iteration: 5781
loss: 1.2417675256729126,grad_norm: 0.9999998028680541, iteration: 5782
loss: 1.2608283758163452,grad_norm: 0.9999996782609271, iteration: 5783
loss: 1.1717535257339478,grad_norm: 0.9999998096270319, iteration: 5784
loss: 1.178109884262085,grad_norm: 0.9999998132571086, iteration: 5785
loss: 1.1698520183563232,grad_norm: 0.9999996539376962, iteration: 5786
loss: 1.244796872138977,grad_norm: 0.9999999249491387, iteration: 5787
loss: 1.2991952896118164,grad_norm: 0.9999998606250609, iteration: 5788
loss: 1.106020450592041,grad_norm: 0.9999997045460973, iteration: 5789
loss: 1.1618618965148926,grad_norm: 0.999999820473537, iteration: 5790
loss: 1.2109272480010986,grad_norm: 0.9999997355687636, iteration: 5791
loss: 1.183577537536621,grad_norm: 0.9999996191544109, iteration: 5792
loss: 1.3022284507751465,grad_norm: 0.9999997436312384, iteration: 5793
loss: 1.2237516641616821,grad_norm: 0.9999998901368848, iteration: 5794
loss: 1.1126840114593506,grad_norm: 0.9999998366574814, iteration: 5795
loss: 1.3253237009048462,grad_norm: 0.9999999439710089, iteration: 5796
loss: 1.1560332775115967,grad_norm: 0.9999996996177387, iteration: 5797
loss: 1.1035487651824951,grad_norm: 0.9999996430714065, iteration: 5798
loss: 1.115309715270996,grad_norm: 0.9999996693775564, iteration: 5799
loss: 1.2567665576934814,grad_norm: 0.9999997097210236, iteration: 5800
loss: 1.394180178642273,grad_norm: 0.999999742455032, iteration: 5801
loss: 1.045538067817688,grad_norm: 0.9999997928926836, iteration: 5802
loss: 1.1409492492675781,grad_norm: 0.9999997186888053, iteration: 5803
loss: 1.098759412765503,grad_norm: 0.9999997056895059, iteration: 5804
loss: 1.180809736251831,grad_norm: 0.9999997100726029, iteration: 5805
loss: 1.1698518991470337,grad_norm: 0.9999997187762482, iteration: 5806
loss: 1.1847331523895264,grad_norm: 0.999999878052656, iteration: 5807
loss: 1.16257905960083,grad_norm: 0.9999997459744585, iteration: 5808
loss: 1.043641209602356,grad_norm: 0.9999996072756737, iteration: 5809
loss: 1.2026880979537964,grad_norm: 0.9999998297262369, iteration: 5810
loss: 1.2139850854873657,grad_norm: 0.9999998769978818, iteration: 5811
loss: 1.1949752569198608,grad_norm: 0.9999997813544695, iteration: 5812
loss: 1.2180743217468262,grad_norm: 0.9999997652140986, iteration: 5813
loss: 1.3312132358551025,grad_norm: 0.9999998644030313, iteration: 5814
loss: 1.1626414060592651,grad_norm: 0.9999997063258779, iteration: 5815
loss: 1.2108155488967896,grad_norm: 0.9999997585214712, iteration: 5816
loss: 1.213192105293274,grad_norm: 0.9999997857797978, iteration: 5817
loss: 1.2190181016921997,grad_norm: 0.9999998188062654, iteration: 5818
loss: 1.2210116386413574,grad_norm: 0.9999997927201059, iteration: 5819
loss: 1.1341909170150757,grad_norm: 0.9999997591697092, iteration: 5820
loss: 1.2225793600082397,grad_norm: 0.9999997361692831, iteration: 5821
loss: 1.3270139694213867,grad_norm: 0.9999998757062115, iteration: 5822
loss: 1.0745155811309814,grad_norm: 0.9999996672939437, iteration: 5823
loss: 1.1994359493255615,grad_norm: 0.9999998376978827, iteration: 5824
loss: 1.1852723360061646,grad_norm: 0.9999997048226273, iteration: 5825
loss: 1.2482469081878662,grad_norm: 0.9999997776357751, iteration: 5826
loss: 1.1099926233291626,grad_norm: 0.9999997314898921, iteration: 5827
loss: 1.1710361242294312,grad_norm: 0.9999998464952304, iteration: 5828
loss: 1.3007131814956665,grad_norm: 0.9999998507266875, iteration: 5829
loss: 1.2547008991241455,grad_norm: 0.9999998280581412, iteration: 5830
loss: 1.2613037824630737,grad_norm: 0.9999997569041271, iteration: 5831
loss: 1.1716947555541992,grad_norm: 0.9999997853624155, iteration: 5832
loss: 1.210193395614624,grad_norm: 0.9999997453974669, iteration: 5833
loss: 1.1315988302230835,grad_norm: 0.9999997185811302, iteration: 5834
loss: 1.1926109790802002,grad_norm: 0.9999997746834709, iteration: 5835
loss: 1.276719570159912,grad_norm: 0.9999997496008478, iteration: 5836
loss: 1.110852599143982,grad_norm: 0.9999997993171974, iteration: 5837
loss: 1.0835899114608765,grad_norm: 0.9999997883541539, iteration: 5838
loss: 1.168188452720642,grad_norm: 0.999999671769777, iteration: 5839
loss: 1.2430973052978516,grad_norm: 0.9999996846859329, iteration: 5840
loss: 1.1536146402359009,grad_norm: 0.9999998489009548, iteration: 5841
loss: 1.1878643035888672,grad_norm: 0.99999971418475, iteration: 5842
loss: 1.1730748414993286,grad_norm: 0.9999997918319444, iteration: 5843
loss: 1.2228742837905884,grad_norm: 0.9999997837454515, iteration: 5844
loss: 1.1967259645462036,grad_norm: 0.9999997660079727, iteration: 5845
loss: 1.2518401145935059,grad_norm: 0.9999997044094564, iteration: 5846
loss: 1.2920454740524292,grad_norm: 0.999999902166153, iteration: 5847
loss: 1.2267173528671265,grad_norm: 0.9999998753618563, iteration: 5848
loss: 1.2664408683776855,grad_norm: 0.9999997091993164, iteration: 5849
loss: 1.2448183298110962,grad_norm: 0.999999766255163, iteration: 5850
loss: 1.2341675758361816,grad_norm: 0.9999997084243202, iteration: 5851
loss: 1.2478641271591187,grad_norm: 0.9999998820692135, iteration: 5852
loss: 1.2239043712615967,grad_norm: 0.9999997606733477, iteration: 5853
loss: 1.2445735931396484,grad_norm: 0.9999998164184638, iteration: 5854
loss: 1.264691710472107,grad_norm: 0.9999997962865905, iteration: 5855
loss: 1.1264592409133911,grad_norm: 0.9999996683318708, iteration: 5856
loss: 1.1843562126159668,grad_norm: 0.9999996726851227, iteration: 5857
loss: 1.2497223615646362,grad_norm: 0.9999997719878164, iteration: 5858
loss: 1.2604438066482544,grad_norm: 0.9999997892411013, iteration: 5859
loss: 1.1736665964126587,grad_norm: 0.9999997700940024, iteration: 5860
loss: 1.1107971668243408,grad_norm: 0.9999997277192706, iteration: 5861
loss: 1.2852777242660522,grad_norm: 0.9999997189445962, iteration: 5862
loss: 1.2168693542480469,grad_norm: 0.9999998310488277, iteration: 5863
loss: 1.1107840538024902,grad_norm: 0.9999997150294057, iteration: 5864
loss: 1.277977466583252,grad_norm: 0.9999997916891294, iteration: 5865
loss: 1.119309902191162,grad_norm: 0.9999997470350396, iteration: 5866
loss: 1.1708358526229858,grad_norm: 0.9999998185983067, iteration: 5867
loss: 1.1233230829238892,grad_norm: 0.9999999263497612, iteration: 5868
loss: 1.2898658514022827,grad_norm: 0.9999997694593087, iteration: 5869
loss: 1.1402018070220947,grad_norm: 0.9999998342178474, iteration: 5870
loss: 1.2111135721206665,grad_norm: 0.9999997854113493, iteration: 5871
loss: 1.2151058912277222,grad_norm: 0.9999997060336374, iteration: 5872
loss: 1.2642443180084229,grad_norm: 0.9999997599994701, iteration: 5873
loss: 1.2086517810821533,grad_norm: 0.9999997020344056, iteration: 5874
loss: 1.2350232601165771,grad_norm: 0.9999997765503494, iteration: 5875
loss: 1.234880805015564,grad_norm: 0.9999997037602326, iteration: 5876
loss: 1.1604223251342773,grad_norm: 0.9999997075508378, iteration: 5877
loss: 1.1005576848983765,grad_norm: 0.9999997412528238, iteration: 5878
loss: 1.1949926614761353,grad_norm: 0.9999998140742411, iteration: 5879
loss: 1.1214518547058105,grad_norm: 0.9999997525149533, iteration: 5880
loss: 1.1782373189926147,grad_norm: 0.9999996993145976, iteration: 5881
loss: 1.1793473958969116,grad_norm: 0.9999996487434182, iteration: 5882
loss: 1.1567041873931885,grad_norm: 0.9999997313072951, iteration: 5883
loss: 1.1920111179351807,grad_norm: 0.9999996914915249, iteration: 5884
loss: 1.190424919128418,grad_norm: 0.9999998675778131, iteration: 5885
loss: 1.1743658781051636,grad_norm: 0.9999997914278662, iteration: 5886
loss: 1.1908506155014038,grad_norm: 0.9999996899892587, iteration: 5887
loss: 1.129874587059021,grad_norm: 0.9999997175512246, iteration: 5888
loss: 1.2304548025131226,grad_norm: 0.9999997486335965, iteration: 5889
loss: 1.204257607460022,grad_norm: 0.9999997543415005, iteration: 5890
loss: 1.1177712678909302,grad_norm: 0.9999997771909462, iteration: 5891
loss: 1.2303591966629028,grad_norm: 0.9999997952652934, iteration: 5892
loss: 1.2461903095245361,grad_norm: 0.9999998018561614, iteration: 5893
loss: 1.1991404294967651,grad_norm: 0.9999998182436958, iteration: 5894
loss: 1.18362557888031,grad_norm: 0.9999997854476921, iteration: 5895
loss: 1.1859711408615112,grad_norm: 0.9999996664792492, iteration: 5896
loss: 1.16746187210083,grad_norm: 0.9999997049135794, iteration: 5897
loss: 1.1879020929336548,grad_norm: 0.9999997851471387, iteration: 5898
loss: 1.1351089477539062,grad_norm: 0.9999997275752833, iteration: 5899
loss: 1.1754499673843384,grad_norm: 0.999999701527506, iteration: 5900
loss: 1.1412935256958008,grad_norm: 0.9999997454739659, iteration: 5901
loss: 1.2836601734161377,grad_norm: 0.999999856533176, iteration: 5902
loss: 1.159932255744934,grad_norm: 0.9999997855172112, iteration: 5903
loss: 1.1779307126998901,grad_norm: 0.9999997639994208, iteration: 5904
loss: 1.3264188766479492,grad_norm: 0.9999998383396771, iteration: 5905
loss: 1.2138447761535645,grad_norm: 0.9999997689714188, iteration: 5906
loss: 1.2054475545883179,grad_norm: 0.999999806524441, iteration: 5907
loss: 1.1763596534729004,grad_norm: 0.9999997333332473, iteration: 5908
loss: 1.1967109441757202,grad_norm: 0.9999997894115124, iteration: 5909
loss: 1.1777235269546509,grad_norm: 0.9999998121010277, iteration: 5910
loss: 1.1748312711715698,grad_norm: 0.9999997614677176, iteration: 5911
loss: 1.1772955656051636,grad_norm: 0.9999998027377666, iteration: 5912
loss: 1.138662576675415,grad_norm: 0.9999997189755307, iteration: 5913
loss: 1.2512569427490234,grad_norm: 0.9999998118657358, iteration: 5914
loss: 1.2645237445831299,grad_norm: 0.9999997994104813, iteration: 5915
loss: 1.3250612020492554,grad_norm: 0.9999998006317752, iteration: 5916
loss: 1.163407325744629,grad_norm: 0.9999997653897167, iteration: 5917
loss: 1.215656042098999,grad_norm: 0.9999997926735336, iteration: 5918
loss: 1.2499279975891113,grad_norm: 0.9999997276285025, iteration: 5919
loss: 1.190139889717102,grad_norm: 0.999999729901828, iteration: 5920
loss: 1.1804612874984741,grad_norm: 0.9999997229523055, iteration: 5921
loss: 1.1758795976638794,grad_norm: 0.9999997810209547, iteration: 5922
loss: 1.2316666841506958,grad_norm: 0.9999998254302254, iteration: 5923
loss: 1.2325432300567627,grad_norm: 0.999999735207903, iteration: 5924
loss: 1.1811126470565796,grad_norm: 0.9999997625897783, iteration: 5925
loss: 1.270972728729248,grad_norm: 0.9999998686798576, iteration: 5926
loss: 1.2287054061889648,grad_norm: 0.9999997713348823, iteration: 5927
loss: 1.1043906211853027,grad_norm: 0.9999997882017335, iteration: 5928
loss: 1.0928200483322144,grad_norm: 0.9999997699384391, iteration: 5929
loss: 1.05966055393219,grad_norm: 0.9999995339590722, iteration: 5930
loss: 1.2941752672195435,grad_norm: 0.9999997222179967, iteration: 5931
loss: 1.3307791948318481,grad_norm: 0.9999998272055399, iteration: 5932
loss: 1.1421873569488525,grad_norm: 0.9999998066094461, iteration: 5933
loss: 1.2429707050323486,grad_norm: 0.9999997700297221, iteration: 5934
loss: 1.0799651145935059,grad_norm: 0.9999997288768568, iteration: 5935
loss: 1.274322748184204,grad_norm: 0.9999997468470783, iteration: 5936
loss: 1.2492755651474,grad_norm: 0.9999997321798196, iteration: 5937
loss: 1.1713707447052002,grad_norm: 0.999999724364393, iteration: 5938
loss: 1.106105089187622,grad_norm: 0.9999997741882114, iteration: 5939
loss: 1.2190418243408203,grad_norm: 0.9999998158611356, iteration: 5940
loss: 1.18388831615448,grad_norm: 0.9999998286435313, iteration: 5941
loss: 1.1673003435134888,grad_norm: 0.9999997725621272, iteration: 5942
loss: 1.1418105363845825,grad_norm: 0.9999997296331349, iteration: 5943
loss: 1.1076468229293823,grad_norm: 0.9999996493035666, iteration: 5944
loss: 1.1726912260055542,grad_norm: 0.9999997249636501, iteration: 5945
loss: 1.146116852760315,grad_norm: 0.9999996425442798, iteration: 5946
loss: 1.238346815109253,grad_norm: 0.9999998078588352, iteration: 5947
loss: 1.1153528690338135,grad_norm: 0.9999997059230553, iteration: 5948
loss: 1.202501654624939,grad_norm: 0.99999974993912, iteration: 5949
loss: 1.1421674489974976,grad_norm: 0.9999996326952569, iteration: 5950
loss: 1.211707353591919,grad_norm: 0.9999998790748118, iteration: 5951
loss: 1.169174313545227,grad_norm: 0.9999998090963571, iteration: 5952
loss: 1.1494957208633423,grad_norm: 0.9999998057803208, iteration: 5953
loss: 1.2148829698562622,grad_norm: 0.9999997039869177, iteration: 5954
loss: 1.3090941905975342,grad_norm: 0.9999998310931252, iteration: 5955
loss: 1.2384638786315918,grad_norm: 0.9999997008378442, iteration: 5956
loss: 1.2682163715362549,grad_norm: 0.9999998566865267, iteration: 5957
loss: 1.1434385776519775,grad_norm: 0.9999997272255131, iteration: 5958
loss: 1.1161693334579468,grad_norm: 0.9999998545169508, iteration: 5959
loss: 1.1201666593551636,grad_norm: 0.9999996822649035, iteration: 5960
loss: 1.1783266067504883,grad_norm: 0.9999997935537575, iteration: 5961
loss: 1.162706971168518,grad_norm: 0.9999997596843466, iteration: 5962
loss: 1.15554940700531,grad_norm: 0.9999997549321281, iteration: 5963
loss: 1.1451162099838257,grad_norm: 0.9999997618082189, iteration: 5964
loss: 1.2132469415664673,grad_norm: 0.9999997587512531, iteration: 5965
loss: 1.199019193649292,grad_norm: 0.9999997653006923, iteration: 5966
loss: 1.2198809385299683,grad_norm: 0.9999996550466931, iteration: 5967
loss: 1.2213927507400513,grad_norm: 0.9999998658187603, iteration: 5968
loss: 1.1143661737442017,grad_norm: 0.9999996960645591, iteration: 5969
loss: 1.0684480667114258,grad_norm: 0.9999996594521591, iteration: 5970
loss: 1.11995530128479,grad_norm: 0.9999997334183605, iteration: 5971
loss: 1.1602588891983032,grad_norm: 0.9999998087157421, iteration: 5972
loss: 1.1418335437774658,grad_norm: 0.9999997766863084, iteration: 5973
loss: 1.2691618204116821,grad_norm: 0.9999997655719214, iteration: 5974
loss: 1.1624476909637451,grad_norm: 0.999999788145311, iteration: 5975
loss: 1.11891770362854,grad_norm: 0.9999997963844931, iteration: 5976
loss: 1.1211837530136108,grad_norm: 0.9999996352616564, iteration: 5977
loss: 1.1187849044799805,grad_norm: 0.9999995728573671, iteration: 5978
loss: 1.1314070224761963,grad_norm: 0.9999997690364019, iteration: 5979
loss: 1.184853196144104,grad_norm: 0.9999998521495463, iteration: 5980
loss: 1.2092089653015137,grad_norm: 0.9999997350562, iteration: 5981
loss: 1.2557082176208496,grad_norm: 0.9999997518837659, iteration: 5982
loss: 1.0925302505493164,grad_norm: 0.9999997604729051, iteration: 5983
loss: 1.267708420753479,grad_norm: 0.9999997555143263, iteration: 5984
loss: 1.234684944152832,grad_norm: 0.9999997521659701, iteration: 5985
loss: 1.0838731527328491,grad_norm: 0.999999728895053, iteration: 5986
loss: 1.1728761196136475,grad_norm: 0.999999764620288, iteration: 5987
loss: 1.1708062887191772,grad_norm: 0.9999997042024545, iteration: 5988
loss: 1.1486178636550903,grad_norm: 0.9999998407667039, iteration: 5989
loss: 1.2882914543151855,grad_norm: 0.9999998750900083, iteration: 5990
loss: 1.1730332374572754,grad_norm: 0.9999996589221533, iteration: 5991
loss: 1.1516072750091553,grad_norm: 0.9999996711473423, iteration: 5992
loss: 1.1786179542541504,grad_norm: 0.9999998267287736, iteration: 5993
loss: 1.22262442111969,grad_norm: 0.999999790146468, iteration: 5994
loss: 1.1978670358657837,grad_norm: 0.9999997122360776, iteration: 5995
loss: 1.1219000816345215,grad_norm: 0.9999997020374771, iteration: 5996
loss: 1.151311993598938,grad_norm: 0.9999999048740099, iteration: 5997
loss: 1.2500370740890503,grad_norm: 0.9999997930629861, iteration: 5998
loss: 1.2049455642700195,grad_norm: 0.9999997360816003, iteration: 5999
loss: 1.0894396305084229,grad_norm: 0.9999996982292733, iteration: 6000
loss: 1.115393042564392,grad_norm: 0.9999996236016344, iteration: 6001
loss: 1.2057844400405884,grad_norm: 0.9999997502288995, iteration: 6002
loss: 1.3147966861724854,grad_norm: 0.9999998592259323, iteration: 6003
loss: 1.1318702697753906,grad_norm: 0.9999996876034948, iteration: 6004
loss: 1.194280743598938,grad_norm: 0.9999997841968166, iteration: 6005
loss: 1.2313657999038696,grad_norm: 0.9999998413860538, iteration: 6006
loss: 1.2598927021026611,grad_norm: 0.9999997836289374, iteration: 6007
loss: 1.1319968700408936,grad_norm: 0.9999996769829227, iteration: 6008
loss: 1.1631213426589966,grad_norm: 0.999999727015868, iteration: 6009
loss: 1.3484967947006226,grad_norm: 0.9999998239608543, iteration: 6010
loss: 1.250547170639038,grad_norm: 0.9999998707530368, iteration: 6011
loss: 1.1253001689910889,grad_norm: 0.9999997688352983, iteration: 6012
loss: 1.1256518363952637,grad_norm: 0.9999996995340784, iteration: 6013
loss: 1.1058132648468018,grad_norm: 0.9999997624813515, iteration: 6014
loss: 1.278698444366455,grad_norm: 0.9999998714295643, iteration: 6015
loss: 1.0948694944381714,grad_norm: 0.9999996990734747, iteration: 6016
loss: 1.3143589496612549,grad_norm: 0.9999997580750317, iteration: 6017
loss: 1.089958667755127,grad_norm: 0.9999996578715522, iteration: 6018
loss: 1.1992450952529907,grad_norm: 0.999999673131284, iteration: 6019
loss: 1.103085994720459,grad_norm: 0.9999997340561128, iteration: 6020
loss: 1.1746485233306885,grad_norm: 0.9999998309445978, iteration: 6021
loss: 1.15947425365448,grad_norm: 0.9999996877817846, iteration: 6022
loss: 1.1536816358566284,grad_norm: 0.9999998000787406, iteration: 6023
loss: 1.1236169338226318,grad_norm: 0.9999997489230288, iteration: 6024
loss: 1.0790729522705078,grad_norm: 0.999999746161155, iteration: 6025
loss: 1.118919014930725,grad_norm: 0.9999996223771486, iteration: 6026
loss: 1.073300838470459,grad_norm: 0.9999996351224946, iteration: 6027
loss: 1.1879979372024536,grad_norm: 0.9999997433251536, iteration: 6028
loss: 1.0713776350021362,grad_norm: 0.9999996407157484, iteration: 6029
loss: 1.108147144317627,grad_norm: 0.9999996349702024, iteration: 6030
loss: 1.0594907999038696,grad_norm: 0.9999997107220089, iteration: 6031
loss: 1.1075160503387451,grad_norm: 0.9999996365945278, iteration: 6032
loss: 1.0933860540390015,grad_norm: 0.9999996857408564, iteration: 6033
loss: 1.1470615863800049,grad_norm: 0.9999996868685825, iteration: 6034
loss: 1.353674054145813,grad_norm: 0.9999997574307653, iteration: 6035
loss: 1.0658857822418213,grad_norm: 0.9999995974872364, iteration: 6036
loss: 1.1230509281158447,grad_norm: 0.9999997868364905, iteration: 6037
loss: 1.0637108087539673,grad_norm: 0.9999996690489453, iteration: 6038
loss: 1.030616283416748,grad_norm: 0.999999584716448, iteration: 6039
loss: 1.1008671522140503,grad_norm: 0.9999996621120824, iteration: 6040
loss: 1.273582100868225,grad_norm: 0.9999999012904748, iteration: 6041
loss: 1.287864327430725,grad_norm: 0.9999997191308533, iteration: 6042
loss: 1.2798957824707031,grad_norm: 0.9999997856799787, iteration: 6043
loss: 1.2600351572036743,grad_norm: 0.9999998375719359, iteration: 6044
loss: 1.3655180931091309,grad_norm: 0.9999997977828455, iteration: 6045
loss: 1.2583471536636353,grad_norm: 0.999999673114898, iteration: 6046
loss: 1.2285295724868774,grad_norm: 0.9999997987737499, iteration: 6047
loss: 1.1538060903549194,grad_norm: 0.9999997385693964, iteration: 6048
loss: 1.108315348625183,grad_norm: 0.9999997325744061, iteration: 6049
loss: 1.1479027271270752,grad_norm: 0.9999997150229971, iteration: 6050
loss: 1.1327046155929565,grad_norm: 0.9999997913126379, iteration: 6051
loss: 1.138046383857727,grad_norm: 0.9999997971763208, iteration: 6052
loss: 1.1321303844451904,grad_norm: 0.9999997543537886, iteration: 6053
loss: 1.1568491458892822,grad_norm: 0.9999996316161378, iteration: 6054
loss: 1.1358754634857178,grad_norm: 0.9999995899971131, iteration: 6055
loss: 1.238776445388794,grad_norm: 0.9999998041336775, iteration: 6056
loss: 1.1328712701797485,grad_norm: 0.9999998179771691, iteration: 6057
loss: 1.1522806882858276,grad_norm: 0.9999997216951556, iteration: 6058
loss: 1.1561987400054932,grad_norm: 0.9999996744778583, iteration: 6059
loss: 1.1205286979675293,grad_norm: 0.9999997075290696, iteration: 6060
loss: 1.1359057426452637,grad_norm: 0.9999997853621218, iteration: 6061
loss: 1.1312376260757446,grad_norm: 0.9999997358709349, iteration: 6062
loss: 1.1389622688293457,grad_norm: 0.9999996713868818, iteration: 6063
loss: 1.2423877716064453,grad_norm: 0.9999998212847663, iteration: 6064
loss: 1.1735484600067139,grad_norm: 0.999999719692558, iteration: 6065
loss: 1.1423718929290771,grad_norm: 0.9999997168537932, iteration: 6066
loss: 1.2274514436721802,grad_norm: 0.9999998059879647, iteration: 6067
loss: 1.1846301555633545,grad_norm: 0.9999997257677675, iteration: 6068
loss: 1.151633620262146,grad_norm: 0.9999997567965792, iteration: 6069
loss: 1.1218492984771729,grad_norm: 0.9999996840815105, iteration: 6070
loss: 1.1899713277816772,grad_norm: 0.9999998174881777, iteration: 6071
loss: 1.2488763332366943,grad_norm: 0.9999997160637597, iteration: 6072
loss: 1.1452761888504028,grad_norm: 0.9999997875006604, iteration: 6073
loss: 1.2579096555709839,grad_norm: 0.9999998376469974, iteration: 6074
loss: 1.0964967012405396,grad_norm: 0.99999965854844, iteration: 6075
loss: 1.1890482902526855,grad_norm: 0.9999997147266179, iteration: 6076
loss: 1.1436797380447388,grad_norm: 0.9999996623124325, iteration: 6077
loss: 1.1495312452316284,grad_norm: 0.9999997417883082, iteration: 6078
loss: 1.1513487100601196,grad_norm: 0.9999996995867237, iteration: 6079
loss: 1.1886347532272339,grad_norm: 0.9999998637728283, iteration: 6080
loss: 1.1610069274902344,grad_norm: 0.9999998057182782, iteration: 6081
loss: 1.1447126865386963,grad_norm: 0.9999997679940292, iteration: 6082
loss: 1.1745175123214722,grad_norm: 0.999999647230383, iteration: 6083
loss: 1.2102872133255005,grad_norm: 0.9999997888854165, iteration: 6084
loss: 1.1621685028076172,grad_norm: 0.9999998571463786, iteration: 6085
loss: 1.1219228506088257,grad_norm: 0.999999788378472, iteration: 6086
loss: 1.1463062763214111,grad_norm: 0.9999997884827045, iteration: 6087
loss: 1.1094523668289185,grad_norm: 0.9999996867601892, iteration: 6088
loss: 1.1044436693191528,grad_norm: 0.9999997230208085, iteration: 6089
loss: 1.1870232820510864,grad_norm: 0.9999997434787942, iteration: 6090
loss: 1.083073616027832,grad_norm: 0.9999996224619941, iteration: 6091
loss: 1.1708104610443115,grad_norm: 0.9999997169363625, iteration: 6092
loss: 1.1422373056411743,grad_norm: 0.999999796340041, iteration: 6093
loss: 1.1763001680374146,grad_norm: 0.9999997912423513, iteration: 6094
loss: 1.1771222352981567,grad_norm: 0.9999997247247396, iteration: 6095
loss: 1.0953902006149292,grad_norm: 0.9999996604049151, iteration: 6096
loss: 1.1214170455932617,grad_norm: 0.9999997471739008, iteration: 6097
loss: 1.2265561819076538,grad_norm: 0.9999998696437, iteration: 6098
loss: 1.1344295740127563,grad_norm: 0.9999997022909195, iteration: 6099
loss: 1.1647312641143799,grad_norm: 0.9999996713924475, iteration: 6100
loss: 1.1825077533721924,grad_norm: 0.9999998175628362, iteration: 6101
loss: 1.230663776397705,grad_norm: 0.9999997966922096, iteration: 6102
loss: 1.1687201261520386,grad_norm: 0.9999997572638821, iteration: 6103
loss: 1.2256850004196167,grad_norm: 0.9999996715228834, iteration: 6104
loss: 1.1397238969802856,grad_norm: 0.9999997204646003, iteration: 6105
loss: 1.1819190979003906,grad_norm: 0.9999997015263545, iteration: 6106
loss: 1.1998544931411743,grad_norm: 0.9999997509521249, iteration: 6107
loss: 1.3285578489303589,grad_norm: 0.9999997884759694, iteration: 6108
loss: 1.1518316268920898,grad_norm: 0.9999997945519473, iteration: 6109
loss: 1.1107547283172607,grad_norm: 0.9999995995656629, iteration: 6110
loss: 1.1884196996688843,grad_norm: 0.9999997667450138, iteration: 6111
loss: 1.1029736995697021,grad_norm: 0.999999643789411, iteration: 6112
loss: 1.1747978925704956,grad_norm: 0.9999998231097535, iteration: 6113
loss: 1.1951388120651245,grad_norm: 0.9999997897004539, iteration: 6114
loss: 1.2045741081237793,grad_norm: 0.9999998352017813, iteration: 6115
loss: 1.164278268814087,grad_norm: 0.9999997000182825, iteration: 6116
loss: 1.1416702270507812,grad_norm: 0.9999997096640842, iteration: 6117
loss: 1.2768265008926392,grad_norm: 0.999999830968706, iteration: 6118
loss: 1.277492880821228,grad_norm: 0.9999998110990597, iteration: 6119
loss: 1.1044247150421143,grad_norm: 0.9999998516503404, iteration: 6120
loss: 1.2206636667251587,grad_norm: 0.9999998256686865, iteration: 6121
loss: 1.1801971197128296,grad_norm: 0.9999997203630475, iteration: 6122
loss: 1.1473393440246582,grad_norm: 0.9999997975236365, iteration: 6123
loss: 1.1276957988739014,grad_norm: 0.9999997991403231, iteration: 6124
loss: 1.1126819849014282,grad_norm: 0.9999997732826306, iteration: 6125
loss: 1.116499900817871,grad_norm: 0.9999997489995853, iteration: 6126
loss: 1.1020084619522095,grad_norm: 0.999999700885348, iteration: 6127
loss: 1.1127241849899292,grad_norm: 0.999999696521694, iteration: 6128
loss: 1.2280879020690918,grad_norm: 0.9999997955665142, iteration: 6129
loss: 1.0457621812820435,grad_norm: 0.9999996700181912, iteration: 6130
loss: 1.0902446508407593,grad_norm: 0.9999998460518237, iteration: 6131
loss: 1.1676135063171387,grad_norm: 0.9999997547349829, iteration: 6132
loss: 1.1257253885269165,grad_norm: 0.9999997868474688, iteration: 6133
loss: 1.2614015340805054,grad_norm: 0.9999998094366631, iteration: 6134
loss: 1.1721218824386597,grad_norm: 0.9999998117752078, iteration: 6135
loss: 1.2134230136871338,grad_norm: 0.9999997335284276, iteration: 6136
loss: 1.0994157791137695,grad_norm: 0.9999997003070287, iteration: 6137
loss: 1.2829495668411255,grad_norm: 0.9999997862128359, iteration: 6138
loss: 1.1590619087219238,grad_norm: 0.9999997145106351, iteration: 6139
loss: 1.2712538242340088,grad_norm: 0.9999998844313591, iteration: 6140
loss: 1.2525687217712402,grad_norm: 0.9999997494065168, iteration: 6141
loss: 1.122772455215454,grad_norm: 0.9999997735570453, iteration: 6142
loss: 1.2386318445205688,grad_norm: 0.9999997567328637, iteration: 6143
loss: 1.1951674222946167,grad_norm: 0.9999998325326305, iteration: 6144
loss: 1.104546308517456,grad_norm: 0.9999997125691734, iteration: 6145
loss: 1.1392834186553955,grad_norm: 0.9999996669452013, iteration: 6146
loss: 1.134804129600525,grad_norm: 0.9999996870054036, iteration: 6147
loss: 1.1715582609176636,grad_norm: 0.999999784666933, iteration: 6148
loss: 1.249697208404541,grad_norm: 0.9999998080035587, iteration: 6149
loss: 1.1315888166427612,grad_norm: 0.9999996479571265, iteration: 6150
loss: 1.1818748712539673,grad_norm: 0.9999997963859669, iteration: 6151
loss: 1.1254284381866455,grad_norm: 0.999999673432704, iteration: 6152
loss: 1.090789794921875,grad_norm: 0.9999997217616332, iteration: 6153
loss: 1.1362475156784058,grad_norm: 0.9999996715026714, iteration: 6154
loss: 1.2008835077285767,grad_norm: 0.9999997744119057, iteration: 6155
loss: 1.133638620376587,grad_norm: 0.9999997645950566, iteration: 6156
loss: 1.230756402015686,grad_norm: 0.9999997053714191, iteration: 6157
loss: 1.1425151824951172,grad_norm: 0.9999996561950287, iteration: 6158
loss: 1.1904711723327637,grad_norm: 0.9999997174139827, iteration: 6159
loss: 1.2272826433181763,grad_norm: 0.9999998087657572, iteration: 6160
loss: 1.0925577878952026,grad_norm: 0.9999997630222095, iteration: 6161
loss: 1.1902052164077759,grad_norm: 0.9999997219173625, iteration: 6162
loss: 1.190368413925171,grad_norm: 0.9999997532410544, iteration: 6163
loss: 1.2508823871612549,grad_norm: 0.9999998767136168, iteration: 6164
loss: 1.2132478952407837,grad_norm: 0.9999997692330879, iteration: 6165
loss: 1.1455343961715698,grad_norm: 0.9999996881381568, iteration: 6166
loss: 1.0962367057800293,grad_norm: 0.9999997416380778, iteration: 6167
loss: 1.1959441900253296,grad_norm: 0.9999997465026678, iteration: 6168
loss: 1.159070611000061,grad_norm: 0.9999996137745131, iteration: 6169
loss: 1.2207950353622437,grad_norm: 0.9999997374539827, iteration: 6170
loss: 1.113646149635315,grad_norm: 0.9999996732504403, iteration: 6171
loss: 1.019433856010437,grad_norm: 0.9999996576127181, iteration: 6172
loss: 1.1794475317001343,grad_norm: 0.999999668358929, iteration: 6173
loss: 1.0815048217773438,grad_norm: 0.9999996681048872, iteration: 6174
loss: 1.1644747257232666,grad_norm: 0.9999996357212536, iteration: 6175
loss: 1.1269515752792358,grad_norm: 0.9999997547312734, iteration: 6176
loss: 1.1552557945251465,grad_norm: 0.9999997575793996, iteration: 6177
loss: 1.2403305768966675,grad_norm: 0.9999997257264358, iteration: 6178
loss: 1.1203805208206177,grad_norm: 0.9999997291240217, iteration: 6179
loss: 1.2055696249008179,grad_norm: 0.9999997058305511, iteration: 6180
loss: 1.1167675256729126,grad_norm: 0.9999996896988549, iteration: 6181
loss: 1.0878522396087646,grad_norm: 0.9999996061805496, iteration: 6182
loss: 1.0986309051513672,grad_norm: 0.999999727244188, iteration: 6183
loss: 1.1400374174118042,grad_norm: 0.9999996330189834, iteration: 6184
loss: 1.2709615230560303,grad_norm: 0.9999998022641757, iteration: 6185
loss: 1.1371185779571533,grad_norm: 0.9999998001159138, iteration: 6186
loss: 1.049466609954834,grad_norm: 0.9999996739697861, iteration: 6187
loss: 1.248351812362671,grad_norm: 0.9999998585823326, iteration: 6188
loss: 1.1165592670440674,grad_norm: 0.999999801942121, iteration: 6189
loss: 1.258721947669983,grad_norm: 0.9999997687263648, iteration: 6190
loss: 1.1117291450500488,grad_norm: 0.999999639068796, iteration: 6191
loss: 1.1100319623947144,grad_norm: 0.9999995783005508, iteration: 6192
loss: 1.105683445930481,grad_norm: 0.9999996445579691, iteration: 6193
loss: 1.1709128618240356,grad_norm: 0.9999998551228322, iteration: 6194
loss: 1.1213096380233765,grad_norm: 0.9999996754845775, iteration: 6195
loss: 1.2494674921035767,grad_norm: 0.9999998085068409, iteration: 6196
loss: 1.087660551071167,grad_norm: 0.9999996928839033, iteration: 6197
loss: 1.1588191986083984,grad_norm: 0.9999997049152536, iteration: 6198
loss: 1.0968437194824219,grad_norm: 0.999999650857056, iteration: 6199
loss: 1.2070695161819458,grad_norm: 0.999999809301199, iteration: 6200
loss: 1.1553188562393188,grad_norm: 0.999999794707331, iteration: 6201
loss: 1.098607063293457,grad_norm: 0.9999996680112163, iteration: 6202
loss: 1.2514150142669678,grad_norm: 0.9999998586857224, iteration: 6203
loss: 1.1898540258407593,grad_norm: 0.9999997105934567, iteration: 6204
loss: 1.0775117874145508,grad_norm: 0.9999996766726224, iteration: 6205
loss: 1.0986597537994385,grad_norm: 0.99999983798137, iteration: 6206
loss: 1.1313713788986206,grad_norm: 0.9999997633429576, iteration: 6207
loss: 1.1709789037704468,grad_norm: 0.9999998154855078, iteration: 6208
loss: 1.207017421722412,grad_norm: 0.9999998330146347, iteration: 6209
loss: 1.1698949337005615,grad_norm: 0.9999997582884099, iteration: 6210
loss: 1.1059914827346802,grad_norm: 0.9999997288017488, iteration: 6211
loss: 1.1305108070373535,grad_norm: 0.9999996388573832, iteration: 6212
loss: 1.188536286354065,grad_norm: 0.9999998763954498, iteration: 6213
loss: 1.2438517808914185,grad_norm: 0.9999997534528037, iteration: 6214
loss: 1.1898545026779175,grad_norm: 0.9999998107827132, iteration: 6215
loss: 1.2046525478363037,grad_norm: 0.9999998465346196, iteration: 6216
loss: 1.095339298248291,grad_norm: 0.9999997614607398, iteration: 6217
loss: 1.0385278463363647,grad_norm: 0.9999996349404627, iteration: 6218
loss: 1.1058341264724731,grad_norm: 0.9999996558435994, iteration: 6219
loss: 1.101319432258606,grad_norm: 0.9999996152841374, iteration: 6220
loss: 1.1851236820220947,grad_norm: 0.9999997530683691, iteration: 6221
loss: 1.1847587823867798,grad_norm: 0.9999997208487236, iteration: 6222
loss: 1.1370469331741333,grad_norm: 0.9999997044597287, iteration: 6223
loss: 1.1899482011795044,grad_norm: 0.9999998082843214, iteration: 6224
loss: 1.1955705881118774,grad_norm: 0.9999996762483346, iteration: 6225
loss: 1.1705251932144165,grad_norm: 0.9999997079592456, iteration: 6226
loss: 1.1286784410476685,grad_norm: 0.9999997279792786, iteration: 6227
loss: 1.244940996170044,grad_norm: 0.9999997861869583, iteration: 6228
loss: 1.102436900138855,grad_norm: 0.9999998104664615, iteration: 6229
loss: 1.2063230276107788,grad_norm: 0.9999998406355602, iteration: 6230
loss: 1.065250277519226,grad_norm: 0.9999995220912283, iteration: 6231
loss: 1.1463123559951782,grad_norm: 0.9999998202282983, iteration: 6232
loss: 1.1403381824493408,grad_norm: 0.9999997384894, iteration: 6233
loss: 1.080802321434021,grad_norm: 0.9999995587122762, iteration: 6234
loss: 1.1577450037002563,grad_norm: 0.9999996943869414, iteration: 6235
loss: 1.199616551399231,grad_norm: 0.999999758973939, iteration: 6236
loss: 1.230217456817627,grad_norm: 0.999999804550099, iteration: 6237
loss: 1.1947886943817139,grad_norm: 0.9999996078700434, iteration: 6238
loss: 1.093332290649414,grad_norm: 0.9999996340530328, iteration: 6239
loss: 1.14695143699646,grad_norm: 0.9999997594040677, iteration: 6240
loss: 1.1481914520263672,grad_norm: 0.9999996695923994, iteration: 6241
loss: 1.1520161628723145,grad_norm: 0.9999996196773021, iteration: 6242
loss: 1.124249815940857,grad_norm: 0.9999997413494016, iteration: 6243
loss: 1.113415002822876,grad_norm: 0.9999997653227989, iteration: 6244
loss: 1.1634399890899658,grad_norm: 0.9999997802284066, iteration: 6245
loss: 1.115728735923767,grad_norm: 0.99999967181079, iteration: 6246
loss: 1.1562731266021729,grad_norm: 0.9999997632333285, iteration: 6247
loss: 1.0909191370010376,grad_norm: 0.9999996953203935, iteration: 6248
loss: 1.1757044792175293,grad_norm: 0.9999998251253922, iteration: 6249
loss: 1.22772216796875,grad_norm: 0.9999997471367424, iteration: 6250
loss: 1.2443640232086182,grad_norm: 0.9999999162326136, iteration: 6251
loss: 1.187288522720337,grad_norm: 0.999999830914308, iteration: 6252
loss: 1.1637113094329834,grad_norm: 0.999999715612174, iteration: 6253
loss: 1.122769832611084,grad_norm: 0.9999996573731829, iteration: 6254
loss: 1.1398707628250122,grad_norm: 0.9999997352349901, iteration: 6255
loss: 1.085442304611206,grad_norm: 0.9999996965967024, iteration: 6256
loss: 1.2407385110855103,grad_norm: 0.9999996584271604, iteration: 6257
loss: 1.222835898399353,grad_norm: 0.9999998057595274, iteration: 6258
loss: 1.025413155555725,grad_norm: 0.9999997340580483, iteration: 6259
loss: 1.1057311296463013,grad_norm: 0.9999997161085958, iteration: 6260
loss: 1.1210342645645142,grad_norm: 0.9999997751529957, iteration: 6261
loss: 1.208330512046814,grad_norm: 0.9999998338682832, iteration: 6262
loss: 1.0911078453063965,grad_norm: 0.9999997124140422, iteration: 6263
loss: 1.1889289617538452,grad_norm: 0.9999996869747214, iteration: 6264
loss: 1.0732290744781494,grad_norm: 0.9999996362970677, iteration: 6265
loss: 1.093085765838623,grad_norm: 0.9999995899225191, iteration: 6266
loss: 1.1223140954971313,grad_norm: 0.9999997601382784, iteration: 6267
loss: 1.1272474527359009,grad_norm: 0.9999996499297379, iteration: 6268
loss: 1.2054922580718994,grad_norm: 0.9999998141594874, iteration: 6269
loss: 1.1392565965652466,grad_norm: 0.999999657933566, iteration: 6270
loss: 1.2310607433319092,grad_norm: 0.999999842930513, iteration: 6271
loss: 1.1422375440597534,grad_norm: 0.9999997570257889, iteration: 6272
loss: 1.1185613870620728,grad_norm: 0.9999997060747862, iteration: 6273
loss: 1.1181693077087402,grad_norm: 0.999999673373744, iteration: 6274
loss: 1.2199079990386963,grad_norm: 0.9999996552610724, iteration: 6275
loss: 1.1028478145599365,grad_norm: 0.9999997925455499, iteration: 6276
loss: 1.1436097621917725,grad_norm: 0.9999997624168653, iteration: 6277
loss: 1.102310061454773,grad_norm: 0.999999569016819, iteration: 6278
loss: 1.1537777185440063,grad_norm: 0.9999997732009058, iteration: 6279
loss: 1.1550885438919067,grad_norm: 0.9999997235047084, iteration: 6280
loss: 1.0889034271240234,grad_norm: 0.9999996781920044, iteration: 6281
loss: 1.1854208707809448,grad_norm: 0.9999996617217276, iteration: 6282
loss: 1.221321940422058,grad_norm: 0.9999997609865197, iteration: 6283
loss: 1.0924142599105835,grad_norm: 0.9999996038793452, iteration: 6284
loss: 1.1959043741226196,grad_norm: 0.9999998185000888, iteration: 6285
loss: 1.1001399755477905,grad_norm: 0.9999997299724788, iteration: 6286
loss: 1.1868680715560913,grad_norm: 0.9999997566532124, iteration: 6287
loss: 1.1379377841949463,grad_norm: 0.9999997700457325, iteration: 6288
loss: 1.1434305906295776,grad_norm: 0.9999996720851426, iteration: 6289
loss: 1.163187861442566,grad_norm: 0.9999997508892189, iteration: 6290
loss: 1.1874765157699585,grad_norm: 0.9999997574250588, iteration: 6291
loss: 1.1273034811019897,grad_norm: 0.999999641857137, iteration: 6292
loss: 1.1840647459030151,grad_norm: 0.9999997918206348, iteration: 6293
loss: 1.1370458602905273,grad_norm: 0.9999997466748449, iteration: 6294
loss: 1.1083035469055176,grad_norm: 0.9999996846803832, iteration: 6295
loss: 1.0816025733947754,grad_norm: 0.999999724537004, iteration: 6296
loss: 1.1055046319961548,grad_norm: 0.9999997856344142, iteration: 6297
loss: 1.2008461952209473,grad_norm: 0.9999998216278398, iteration: 6298
loss: 1.1265863180160522,grad_norm: 0.9999997775089398, iteration: 6299
loss: 1.250048041343689,grad_norm: 0.9999997674095925, iteration: 6300
loss: 1.1037499904632568,grad_norm: 0.9999997235169124, iteration: 6301
loss: 1.087431788444519,grad_norm: 0.9999996920246402, iteration: 6302
loss: 1.2153006792068481,grad_norm: 0.9999997022574431, iteration: 6303
loss: 1.0696831941604614,grad_norm: 0.999999678846192, iteration: 6304
loss: 1.3505423069000244,grad_norm: 0.9999999533110991, iteration: 6305
loss: 1.204771876335144,grad_norm: 0.9999998062965159, iteration: 6306
loss: 1.1581307649612427,grad_norm: 0.9999998335504696, iteration: 6307
loss: 0.9997112154960632,grad_norm: 0.999999693461206, iteration: 6308
loss: 1.1239150762557983,grad_norm: 0.999999747751255, iteration: 6309
loss: 1.1159518957138062,grad_norm: 0.9999996646638128, iteration: 6310
loss: 1.1767176389694214,grad_norm: 0.9999997291836826, iteration: 6311
loss: 1.166738748550415,grad_norm: 0.9999997296223896, iteration: 6312
loss: 1.154374122619629,grad_norm: 0.9999997710572573, iteration: 6313
loss: 1.1519436836242676,grad_norm: 0.9999998011537901, iteration: 6314
loss: 1.1270629167556763,grad_norm: 0.9999997424384329, iteration: 6315
loss: 1.149124026298523,grad_norm: 0.9999996730311181, iteration: 6316
loss: 1.1995517015457153,grad_norm: 0.999999787064742, iteration: 6317
loss: 1.1269525289535522,grad_norm: 0.9999996397717171, iteration: 6318
loss: 1.1880404949188232,grad_norm: 0.9999997999271842, iteration: 6319
loss: 1.1889923810958862,grad_norm: 0.9999997028546596, iteration: 6320
loss: 1.1344842910766602,grad_norm: 0.9999998211601429, iteration: 6321
loss: 1.1520910263061523,grad_norm: 0.9999997319231578, iteration: 6322
loss: 1.0785362720489502,grad_norm: 0.9999996296907822, iteration: 6323
loss: 1.1289373636245728,grad_norm: 0.9999997583920341, iteration: 6324
loss: 1.1213290691375732,grad_norm: 0.9999997606600893, iteration: 6325
loss: 1.0977543592453003,grad_norm: 0.9999996200941623, iteration: 6326
loss: 1.0607924461364746,grad_norm: 0.999999804221424, iteration: 6327
loss: 1.1498371362686157,grad_norm: 0.9999996602752336, iteration: 6328
loss: 1.147901177406311,grad_norm: 0.9999997416473562, iteration: 6329
loss: 1.1415690183639526,grad_norm: 0.9999997040977672, iteration: 6330
loss: 1.1582804918289185,grad_norm: 0.9999996596556531, iteration: 6331
loss: 1.054682731628418,grad_norm: 0.9999996115465849, iteration: 6332
loss: 1.0874418020248413,grad_norm: 0.9999996181519186, iteration: 6333
loss: 1.0893805027008057,grad_norm: 0.9999998166580426, iteration: 6334
loss: 1.1950706243515015,grad_norm: 0.9999997365134025, iteration: 6335
loss: 1.167149305343628,grad_norm: 0.999999703914536, iteration: 6336
loss: 1.010615587234497,grad_norm: 0.999999570497599, iteration: 6337
loss: 1.051753044128418,grad_norm: 0.9999995902313836, iteration: 6338
loss: 1.2030397653579712,grad_norm: 0.9999997649825599, iteration: 6339
loss: 1.2055827379226685,grad_norm: 0.9999998130676956, iteration: 6340
loss: 1.1914347410202026,grad_norm: 0.9999997514653034, iteration: 6341
loss: 1.0632619857788086,grad_norm: 0.9999997101421916, iteration: 6342
loss: 1.1785231828689575,grad_norm: 0.9999997086356105, iteration: 6343
loss: 1.1307954788208008,grad_norm: 0.9999997625542905, iteration: 6344
loss: 1.1362541913986206,grad_norm: 0.9999997931876208, iteration: 6345
loss: 1.0525550842285156,grad_norm: 0.9999996883020602, iteration: 6346
loss: 1.1344189643859863,grad_norm: 0.999999722491997, iteration: 6347
loss: 1.1590172052383423,grad_norm: 0.9999997497296284, iteration: 6348
loss: 1.0617903470993042,grad_norm: 0.9999996457459838, iteration: 6349
loss: 1.1164770126342773,grad_norm: 0.9999996649091512, iteration: 6350
loss: 1.121846318244934,grad_norm: 0.9999996537624639, iteration: 6351
loss: 1.0705361366271973,grad_norm: 0.9999997399352101, iteration: 6352
loss: 1.2102599143981934,grad_norm: 0.9999996990795966, iteration: 6353
loss: 1.1186206340789795,grad_norm: 0.9999997662010119, iteration: 6354
loss: 1.104928731918335,grad_norm: 0.999999677058277, iteration: 6355
loss: 1.1271954774856567,grad_norm: 0.9999996474327278, iteration: 6356
loss: 1.0999438762664795,grad_norm: 0.9999998293608944, iteration: 6357
loss: 1.3041356801986694,grad_norm: 0.9999997555056238, iteration: 6358
loss: 1.1894752979278564,grad_norm: 0.999999775166895, iteration: 6359
loss: 1.196352243423462,grad_norm: 0.9999996981646644, iteration: 6360
loss: 1.1797051429748535,grad_norm: 0.9999997013182974, iteration: 6361
loss: 1.1702467203140259,grad_norm: 0.999999689418595, iteration: 6362
loss: 1.1919530630111694,grad_norm: 0.9999996880292661, iteration: 6363
loss: 1.1085448265075684,grad_norm: 0.9999997798931098, iteration: 6364
loss: 1.1196190118789673,grad_norm: 0.9999997140463036, iteration: 6365
loss: 1.1613290309906006,grad_norm: 0.9999997614403747, iteration: 6366
loss: 1.1385713815689087,grad_norm: 0.9999995947893364, iteration: 6367
loss: 1.163806438446045,grad_norm: 0.999999798013927, iteration: 6368
loss: 1.0964206457138062,grad_norm: 0.999999701507225, iteration: 6369
loss: 1.103811502456665,grad_norm: 0.9999996399223261, iteration: 6370
loss: 1.1423369646072388,grad_norm: 0.9999997078857162, iteration: 6371
loss: 1.120320200920105,grad_norm: 0.9999996597500971, iteration: 6372
loss: 1.0249444246292114,grad_norm: 0.9999996635405626, iteration: 6373
loss: 1.1731220483779907,grad_norm: 0.9999997494607656, iteration: 6374
loss: 1.1037888526916504,grad_norm: 0.9999997300516343, iteration: 6375
loss: 1.1247642040252686,grad_norm: 0.9999995909126718, iteration: 6376
loss: 1.1458544731140137,grad_norm: 0.999999844218663, iteration: 6377
loss: 1.0870274305343628,grad_norm: 0.9999997327664416, iteration: 6378
loss: 1.1321302652359009,grad_norm: 0.9999996423835362, iteration: 6379
loss: 1.1395511627197266,grad_norm: 0.9999995887901368, iteration: 6380
loss: 1.2640718221664429,grad_norm: 0.9999998870539331, iteration: 6381
loss: 1.0964199304580688,grad_norm: 0.9999997787073956, iteration: 6382
loss: 1.200327754020691,grad_norm: 0.9999999166313955, iteration: 6383
loss: 1.1812968254089355,grad_norm: 0.9999997296796865, iteration: 6384
loss: 1.1273051500320435,grad_norm: 0.9999995759381132, iteration: 6385
loss: 1.0575202703475952,grad_norm: 0.9999996394804824, iteration: 6386
loss: 1.1117364168167114,grad_norm: 0.9999996076436787, iteration: 6387
loss: 1.1576464176177979,grad_norm: 0.9999998013425756, iteration: 6388
loss: 1.0524444580078125,grad_norm: 0.999999727978944, iteration: 6389
loss: 1.1163619756698608,grad_norm: 0.9999997089620968, iteration: 6390
loss: 1.0749013423919678,grad_norm: 0.999999672743827, iteration: 6391
loss: 1.1769356727600098,grad_norm: 0.9999997401688316, iteration: 6392
loss: 1.1359832286834717,grad_norm: 0.9999997649979395, iteration: 6393
loss: 1.2247698307037354,grad_norm: 0.9999997722729586, iteration: 6394
loss: 1.2150739431381226,grad_norm: 0.9999998250300837, iteration: 6395
loss: 1.1627957820892334,grad_norm: 0.9999998625476658, iteration: 6396
loss: 1.0902174711227417,grad_norm: 0.9999997090733698, iteration: 6397
loss: 1.100806474685669,grad_norm: 0.9999996032768464, iteration: 6398
loss: 1.1741162538528442,grad_norm: 0.9999996972140397, iteration: 6399
loss: 1.1457618474960327,grad_norm: 0.9999996992536018, iteration: 6400
loss: 1.127551555633545,grad_norm: 0.9999997641056914, iteration: 6401
loss: 1.1342244148254395,grad_norm: 0.9999997353558453, iteration: 6402
loss: 1.1839336156845093,grad_norm: 0.9999998550202646, iteration: 6403
loss: 1.2206629514694214,grad_norm: 0.9999997040343436, iteration: 6404
loss: 1.1768112182617188,grad_norm: 0.9999997501413603, iteration: 6405
loss: 1.1658436059951782,grad_norm: 0.9999997920148888, iteration: 6406
loss: 1.105298399925232,grad_norm: 0.9999997731987958, iteration: 6407
loss: 1.191155195236206,grad_norm: 0.9999997166623288, iteration: 6408
loss: 1.0893443822860718,grad_norm: 0.9999997269524843, iteration: 6409
loss: 1.0650838613510132,grad_norm: 0.9999996298347186, iteration: 6410
loss: 1.1263400316238403,grad_norm: 0.9999998009017793, iteration: 6411
loss: 1.135328769683838,grad_norm: 0.999999779252605, iteration: 6412
loss: 1.1261255741119385,grad_norm: 0.9999996954782714, iteration: 6413
loss: 1.188552737236023,grad_norm: 0.9999998273962895, iteration: 6414
loss: 1.0811617374420166,grad_norm: 0.9999997081371254, iteration: 6415
loss: 1.1922614574432373,grad_norm: 0.9999997903960995, iteration: 6416
loss: 1.1293647289276123,grad_norm: 0.9999998667545398, iteration: 6417
loss: 1.1271532773971558,grad_norm: 0.999999788134678, iteration: 6418
loss: 1.0567336082458496,grad_norm: 0.9999997753943551, iteration: 6419
loss: 1.0512381792068481,grad_norm: 0.9999996371456189, iteration: 6420
loss: 1.2230865955352783,grad_norm: 0.9999997935105557, iteration: 6421
loss: 1.1271696090698242,grad_norm: 0.9999996488063708, iteration: 6422
loss: 1.1364322900772095,grad_norm: 0.9999998171355342, iteration: 6423
loss: 1.120479941368103,grad_norm: 0.9999997465401207, iteration: 6424
loss: 1.1034690141677856,grad_norm: 0.999999766942989, iteration: 6425
loss: 1.1678414344787598,grad_norm: 0.9999997324549598, iteration: 6426
loss: 1.118677020072937,grad_norm: 0.9999997546453019, iteration: 6427
loss: 1.0902379751205444,grad_norm: 0.9999996270452839, iteration: 6428
loss: 1.20512056350708,grad_norm: 0.9999997660573302, iteration: 6429
loss: 1.108443260192871,grad_norm: 0.9999996719785701, iteration: 6430
loss: 1.0616735219955444,grad_norm: 0.9999997130764008, iteration: 6431
loss: 1.1861814260482788,grad_norm: 0.9999998341199333, iteration: 6432
loss: 1.116573691368103,grad_norm: 0.9999998216158621, iteration: 6433
loss: 1.1393816471099854,grad_norm: 0.9999996919638623, iteration: 6434
loss: 1.0641541481018066,grad_norm: 0.9999996197844445, iteration: 6435
loss: 1.160841703414917,grad_norm: 0.9999997378423576, iteration: 6436
loss: 1.1862856149673462,grad_norm: 0.9999997930813417, iteration: 6437
loss: 1.1733976602554321,grad_norm: 0.9999997078789923, iteration: 6438
loss: 1.1621854305267334,grad_norm: 0.999999692177711, iteration: 6439
loss: 1.1473734378814697,grad_norm: 0.9999996980748898, iteration: 6440
loss: 1.1650373935699463,grad_norm: 0.9999998180096226, iteration: 6441
loss: 1.1342577934265137,grad_norm: 0.9999996785157532, iteration: 6442
loss: 1.1823441982269287,grad_norm: 0.9999998743167547, iteration: 6443
loss: 1.0298794507980347,grad_norm: 0.9999995517910811, iteration: 6444
loss: 1.2090694904327393,grad_norm: 0.9999997157976928, iteration: 6445
loss: 1.0735764503479004,grad_norm: 0.999999642904791, iteration: 6446
loss: 1.0341960191726685,grad_norm: 0.999999612793547, iteration: 6447
loss: 1.0887417793273926,grad_norm: 0.9999995910868715, iteration: 6448
loss: 1.0701571702957153,grad_norm: 0.9999996673601181, iteration: 6449
loss: 1.1010260581970215,grad_norm: 0.9999996607788167, iteration: 6450
loss: 1.175010085105896,grad_norm: 0.999999744993024, iteration: 6451
loss: 1.2607868909835815,grad_norm: 0.9999999000466775, iteration: 6452
loss: 1.1396113634109497,grad_norm: 0.9999997661452011, iteration: 6453
loss: 1.1929036378860474,grad_norm: 0.9999997574221177, iteration: 6454
loss: 1.1746034622192383,grad_norm: 0.9999997382158557, iteration: 6455
loss: 1.1421900987625122,grad_norm: 0.999999668097598, iteration: 6456
loss: 1.0851237773895264,grad_norm: 0.9999997742938761, iteration: 6457
loss: 1.062286615371704,grad_norm: 0.9999998006571853, iteration: 6458
loss: 1.1641981601715088,grad_norm: 0.9999997652433569, iteration: 6459
loss: 1.199628472328186,grad_norm: 0.9999997289667747, iteration: 6460
loss: 1.1760159730911255,grad_norm: 0.9999998056809519, iteration: 6461
loss: 1.1891191005706787,grad_norm: 0.9999998188175375, iteration: 6462
loss: 1.1278644800186157,grad_norm: 0.9999995388657917, iteration: 6463
loss: 1.144538164138794,grad_norm: 0.9999997055994764, iteration: 6464
loss: 1.1041622161865234,grad_norm: 0.9999996649753925, iteration: 6465
loss: 1.194557547569275,grad_norm: 0.9999997404639654, iteration: 6466
loss: 1.1863588094711304,grad_norm: 0.9999998350391315, iteration: 6467
loss: 1.1424787044525146,grad_norm: 0.999999685374358, iteration: 6468
loss: 1.0938608646392822,grad_norm: 0.9999995837674515, iteration: 6469
loss: 1.152463674545288,grad_norm: 0.9999997657454672, iteration: 6470
loss: 1.1745965480804443,grad_norm: 0.9999998481756432, iteration: 6471
loss: 1.1840159893035889,grad_norm: 0.9999997648755502, iteration: 6472
loss: 1.1069765090942383,grad_norm: 0.9999997328213605, iteration: 6473
loss: 1.1191526651382446,grad_norm: 0.9999997434244582, iteration: 6474
loss: 1.1688326597213745,grad_norm: 0.9999998034265135, iteration: 6475
loss: 1.176615834236145,grad_norm: 0.9999996934727858, iteration: 6476
loss: 1.139991044998169,grad_norm: 0.9999996597545052, iteration: 6477
loss: 1.1243213415145874,grad_norm: 0.9999996764093708, iteration: 6478
loss: 1.0637996196746826,grad_norm: 0.9999997053812478, iteration: 6479
loss: 1.0711888074874878,grad_norm: 0.9999996331350766, iteration: 6480
loss: 1.1161270141601562,grad_norm: 0.9999996916607116, iteration: 6481
loss: 1.1339213848114014,grad_norm: 0.999999708732976, iteration: 6482
loss: 1.1340358257293701,grad_norm: 0.9999997460484432, iteration: 6483
loss: 1.252044677734375,grad_norm: 0.9999998225256945, iteration: 6484
loss: 1.2089887857437134,grad_norm: 0.9999997855177984, iteration: 6485
loss: 1.140911340713501,grad_norm: 0.9999996895949902, iteration: 6486
loss: 1.1942181587219238,grad_norm: 0.9999997975033114, iteration: 6487
loss: 1.1577212810516357,grad_norm: 0.9999996734268591, iteration: 6488
loss: 1.1476846933364868,grad_norm: 0.9999997553667986, iteration: 6489
loss: 1.1875982284545898,grad_norm: 0.9999997077189343, iteration: 6490
loss: 1.0822489261627197,grad_norm: 0.9999997159963697, iteration: 6491
loss: 1.064505696296692,grad_norm: 0.999999568551502, iteration: 6492
loss: 1.1228082180023193,grad_norm: 0.9999996110840621, iteration: 6493
loss: 1.057610273361206,grad_norm: 0.9999997505737296, iteration: 6494
loss: 1.0970182418823242,grad_norm: 0.9999996691782307, iteration: 6495
loss: 1.117684245109558,grad_norm: 0.9999996280997819, iteration: 6496
loss: 1.1273558139801025,grad_norm: 0.9999997153057438, iteration: 6497
loss: 1.111930012702942,grad_norm: 0.9999997607302734, iteration: 6498
loss: 1.1683026552200317,grad_norm: 0.9999997832816344, iteration: 6499
loss: 1.1777026653289795,grad_norm: 0.9999998262257941, iteration: 6500
loss: 1.1350066661834717,grad_norm: 0.9999997410931126, iteration: 6501
loss: 1.206034779548645,grad_norm: 0.9999997610299275, iteration: 6502
loss: 1.132895588874817,grad_norm: 0.999999673355265, iteration: 6503
loss: 1.170652985572815,grad_norm: 0.9999996920380645, iteration: 6504
loss: 1.0939749479293823,grad_norm: 0.9999996987276186, iteration: 6505
loss: 1.1370584964752197,grad_norm: 0.9999997718557804, iteration: 6506
loss: 1.086398959159851,grad_norm: 0.9999996847854153, iteration: 6507
loss: 1.1942138671875,grad_norm: 0.9999997914470413, iteration: 6508
loss: 1.1835166215896606,grad_norm: 0.9999998603230948, iteration: 6509
loss: 1.125504970550537,grad_norm: 0.9999996574463218, iteration: 6510
loss: 1.0611499547958374,grad_norm: 0.9999997218287815, iteration: 6511
loss: 1.1116870641708374,grad_norm: 0.9999997517382707, iteration: 6512
loss: 1.0251094102859497,grad_norm: 0.9999998048806436, iteration: 6513
loss: 1.1057549715042114,grad_norm: 0.9999998738884351, iteration: 6514
loss: 1.1294482946395874,grad_norm: 0.9999997219018739, iteration: 6515
loss: 1.1794061660766602,grad_norm: 0.9999997896260263, iteration: 6516
loss: 1.1233383417129517,grad_norm: 0.9999997688716816, iteration: 6517
loss: 1.1407030820846558,grad_norm: 0.9999997708862958, iteration: 6518
loss: 1.1442760229110718,grad_norm: 0.9999997478345755, iteration: 6519
loss: 1.2205387353897095,grad_norm: 0.9999997427766234, iteration: 6520
loss: 1.135005235671997,grad_norm: 0.9999995953646488, iteration: 6521
loss: 1.1880208253860474,grad_norm: 0.9999996756149824, iteration: 6522
loss: 1.165794014930725,grad_norm: 0.9999997561129645, iteration: 6523
loss: 1.0338895320892334,grad_norm: 0.9999994921591443, iteration: 6524
loss: 1.1286603212356567,grad_norm: 0.9999996915394936, iteration: 6525
loss: 1.1714543104171753,grad_norm: 0.9999997924006359, iteration: 6526
loss: 1.1482906341552734,grad_norm: 0.9999997779630183, iteration: 6527
loss: 1.144040822982788,grad_norm: 0.9999996662796128, iteration: 6528
loss: 1.1807597875595093,grad_norm: 0.9999997443857588, iteration: 6529
loss: 1.199746012687683,grad_norm: 0.9999997433193463, iteration: 6530
loss: 1.0967901945114136,grad_norm: 0.9999997562017479, iteration: 6531
loss: 1.1278380155563354,grad_norm: 0.9999997539774855, iteration: 6532
loss: 1.1875687837600708,grad_norm: 0.9999996681024218, iteration: 6533
loss: 1.066812515258789,grad_norm: 0.9999997769509275, iteration: 6534
loss: 1.1642367839813232,grad_norm: 0.9999997006934103, iteration: 6535
loss: 1.1830908060073853,grad_norm: 0.9999998429274318, iteration: 6536
loss: 1.0870808362960815,grad_norm: 0.9999995747930395, iteration: 6537
loss: 1.0934998989105225,grad_norm: 0.9999996876693641, iteration: 6538
loss: 1.0887115001678467,grad_norm: 0.9999996455063391, iteration: 6539
loss: 1.1725926399230957,grad_norm: 0.9999998195982698, iteration: 6540
loss: 1.0676429271697998,grad_norm: 0.9999995874809311, iteration: 6541
loss: 1.127437710762024,grad_norm: 0.9999998029152657, iteration: 6542
loss: 1.0902588367462158,grad_norm: 0.999999578334747, iteration: 6543
loss: 1.1600239276885986,grad_norm: 0.9999998151535945, iteration: 6544
loss: 1.0725669860839844,grad_norm: 0.9999997298818573, iteration: 6545
loss: 1.0037510395050049,grad_norm: 0.9999994803140014, iteration: 6546
loss: 1.0765289068222046,grad_norm: 0.9999996773923829, iteration: 6547
loss: 1.1132495403289795,grad_norm: 0.9999997819135034, iteration: 6548
loss: 1.1763474941253662,grad_norm: 0.9999997053622766, iteration: 6549
loss: 1.1491398811340332,grad_norm: 0.9999997532111732, iteration: 6550
loss: 1.1397392749786377,grad_norm: 0.999999727608704, iteration: 6551
loss: 1.1718796491622925,grad_norm: 0.9999997283615265, iteration: 6552
loss: 1.0701959133148193,grad_norm: 0.9999996557363482, iteration: 6553
loss: 1.1153134107589722,grad_norm: 0.9999997416338386, iteration: 6554
loss: 1.0269787311553955,grad_norm: 0.9999997465409078, iteration: 6555
loss: 1.048750400543213,grad_norm: 0.9999995957793046, iteration: 6556
loss: 1.3003393411636353,grad_norm: 0.9999998237816665, iteration: 6557
loss: 1.075515866279602,grad_norm: 0.9999996259703656, iteration: 6558
loss: 1.1966118812561035,grad_norm: 0.9999997104797994, iteration: 6559
loss: 1.2015122175216675,grad_norm: 0.9999998036712225, iteration: 6560
loss: 1.073904037475586,grad_norm: 0.9999995147363823, iteration: 6561
loss: 1.2778092622756958,grad_norm: 0.9999998416481967, iteration: 6562
loss: 1.1641987562179565,grad_norm: 0.9999997457097671, iteration: 6563
loss: 1.1039769649505615,grad_norm: 0.9999997093397949, iteration: 6564
loss: 1.022377610206604,grad_norm: 0.9999996614459268, iteration: 6565
loss: 1.1903965473175049,grad_norm: 0.9999997361692955, iteration: 6566
loss: 1.1322647333145142,grad_norm: 0.9999996757137519, iteration: 6567
loss: 1.182937741279602,grad_norm: 0.9999997595007788, iteration: 6568
loss: 1.150250792503357,grad_norm: 0.9999996233777132, iteration: 6569
loss: 1.1678080558776855,grad_norm: 0.9999997417724416, iteration: 6570
loss: 1.1276073455810547,grad_norm: 0.9999996444484849, iteration: 6571
loss: 1.1263278722763062,grad_norm: 0.999999824934397, iteration: 6572
loss: 1.055701732635498,grad_norm: 0.9999995863678309, iteration: 6573
loss: 1.1020047664642334,grad_norm: 0.999999713339351, iteration: 6574
loss: 1.2586205005645752,grad_norm: 0.9999998411666629, iteration: 6575
loss: 1.0386883020401,grad_norm: 0.9999996774453664, iteration: 6576
loss: 1.1014769077301025,grad_norm: 0.99999967261202, iteration: 6577
loss: 1.144363522529602,grad_norm: 0.9999996882866287, iteration: 6578
loss: 1.1161301136016846,grad_norm: 0.9999997073737255, iteration: 6579
loss: 1.2169545888900757,grad_norm: 0.9999997966424852, iteration: 6580
loss: 1.1666452884674072,grad_norm: 0.9999998463280398, iteration: 6581
loss: 1.136194109916687,grad_norm: 0.9999997979358674, iteration: 6582
loss: 1.0770329236984253,grad_norm: 0.9999996644643152, iteration: 6583
loss: 1.196691870689392,grad_norm: 0.9999998617911664, iteration: 6584
loss: 1.0923587083816528,grad_norm: 0.9999996691357654, iteration: 6585
loss: 1.1832808256149292,grad_norm: 0.9999997095737257, iteration: 6586
loss: 1.1578497886657715,grad_norm: 0.9999996418128133, iteration: 6587
loss: 1.0859439373016357,grad_norm: 0.9999995526438616, iteration: 6588
loss: 1.062307596206665,grad_norm: 0.9999995271303642, iteration: 6589
loss: 1.096440315246582,grad_norm: 0.9999996114321819, iteration: 6590
loss: 1.0900582075119019,grad_norm: 0.9999997477905146, iteration: 6591
loss: 1.1221160888671875,grad_norm: 0.9999995756958096, iteration: 6592
loss: 1.0514447689056396,grad_norm: 0.9999996732382187, iteration: 6593
loss: 1.0374782085418701,grad_norm: 0.9999996111972557, iteration: 6594
loss: 1.1460000276565552,grad_norm: 0.9999999122053868, iteration: 6595
loss: 1.0613266229629517,grad_norm: 0.9999994508822763, iteration: 6596
loss: 1.0904483795166016,grad_norm: 0.9999996856436298, iteration: 6597
loss: 1.189300775527954,grad_norm: 0.9999998248186088, iteration: 6598
loss: 1.212393045425415,grad_norm: 0.9999998611228049, iteration: 6599
loss: 1.1276705265045166,grad_norm: 0.9999997478081388, iteration: 6600
loss: 1.2174783945083618,grad_norm: 0.9999998094258903, iteration: 6601
loss: 1.1732876300811768,grad_norm: 0.9999996966174318, iteration: 6602
loss: 1.0397846698760986,grad_norm: 0.9999995813375068, iteration: 6603
loss: 1.0728002786636353,grad_norm: 0.9999996774660459, iteration: 6604
loss: 1.1605520248413086,grad_norm: 0.9999997403283325, iteration: 6605
loss: 1.2153289318084717,grad_norm: 0.9999997441890233, iteration: 6606
loss: 1.1143438816070557,grad_norm: 0.9999997101843939, iteration: 6607
loss: 1.1768299341201782,grad_norm: 0.9999997348718409, iteration: 6608
loss: 1.1569477319717407,grad_norm: 0.9999997492856836, iteration: 6609
loss: 1.099930763244629,grad_norm: 0.9999995433726602, iteration: 6610
loss: 1.1483582258224487,grad_norm: 0.9999998213615086, iteration: 6611
loss: 1.1051114797592163,grad_norm: 0.9999996726059022, iteration: 6612
loss: 1.0490778684616089,grad_norm: 0.9999995710129931, iteration: 6613
loss: 1.0559734106063843,grad_norm: 0.9999995790364556, iteration: 6614
loss: 1.20071280002594,grad_norm: 0.9999998528464024, iteration: 6615
loss: 1.1179633140563965,grad_norm: 0.9999997184098953, iteration: 6616
loss: 1.147253155708313,grad_norm: 0.9999997757660934, iteration: 6617
loss: 1.1681932210922241,grad_norm: 0.9999998468280646, iteration: 6618
loss: 1.0800236463546753,grad_norm: 0.9999996831189364, iteration: 6619
loss: 1.109059453010559,grad_norm: 0.9999998610321525, iteration: 6620
loss: 1.205061674118042,grad_norm: 0.9999997569679195, iteration: 6621
loss: 1.1858206987380981,grad_norm: 0.99999968080991, iteration: 6622
loss: 1.2765699625015259,grad_norm: 0.9999998071768068, iteration: 6623
loss: 1.1102681159973145,grad_norm: 0.9999998053484016, iteration: 6624
loss: 1.166863203048706,grad_norm: 0.9999997241686989, iteration: 6625
loss: 1.0776985883712769,grad_norm: 0.9999997274368023, iteration: 6626
loss: 1.115273356437683,grad_norm: 0.999999706717382, iteration: 6627
loss: 1.1538357734680176,grad_norm: 0.9999997146211658, iteration: 6628
loss: 1.1974749565124512,grad_norm: 0.9999997414456742, iteration: 6629
loss: 1.2003551721572876,grad_norm: 0.9999997056972383, iteration: 6630
loss: 1.087920069694519,grad_norm: 0.9999997131127462, iteration: 6631
loss: 1.0548964738845825,grad_norm: 0.9999995174407648, iteration: 6632
loss: 1.0941153764724731,grad_norm: 0.99999970008052, iteration: 6633
loss: 1.0792829990386963,grad_norm: 0.9999995485041707, iteration: 6634
loss: 1.1420546770095825,grad_norm: 0.9999996816423787, iteration: 6635
loss: 1.0972363948822021,grad_norm: 0.9999996014975021, iteration: 6636
loss: 1.1944639682769775,grad_norm: 0.9999998774454989, iteration: 6637
loss: 1.0734003782272339,grad_norm: 0.9999995724118493, iteration: 6638
loss: 1.085309386253357,grad_norm: 0.9999995728973823, iteration: 6639
loss: 1.1015610694885254,grad_norm: 0.9999996524378622, iteration: 6640
loss: 1.1316697597503662,grad_norm: 0.9999997967637226, iteration: 6641
loss: 1.0910065174102783,grad_norm: 0.9999995837810877, iteration: 6642
loss: 1.1104774475097656,grad_norm: 0.9999997541718694, iteration: 6643
loss: 1.1808786392211914,grad_norm: 0.9999997220045558, iteration: 6644
loss: 1.068995475769043,grad_norm: 0.9999997849002626, iteration: 6645
loss: 1.114383578300476,grad_norm: 0.9999996284947535, iteration: 6646
loss: 1.061720609664917,grad_norm: 0.9999997332203425, iteration: 6647
loss: 1.1217366456985474,grad_norm: 0.9999996174480571, iteration: 6648
loss: 1.161415696144104,grad_norm: 0.9999997769497408, iteration: 6649
loss: 1.1285991668701172,grad_norm: 0.9999996855039358, iteration: 6650
loss: 1.1181763410568237,grad_norm: 0.9999996944756232, iteration: 6651
loss: 1.1611545085906982,grad_norm: 0.9999997530671358, iteration: 6652
loss: 1.0887176990509033,grad_norm: 0.9999996412049331, iteration: 6653
loss: 1.1200823783874512,grad_norm: 0.9999996168839569, iteration: 6654
loss: 1.0460948944091797,grad_norm: 0.9999997303269555, iteration: 6655
loss: 1.068725824356079,grad_norm: 0.9999995418837218, iteration: 6656
loss: 1.188793659210205,grad_norm: 0.9999998163975151, iteration: 6657
loss: 1.1110152006149292,grad_norm: 0.9999997759305562, iteration: 6658
loss: 1.116762399673462,grad_norm: 0.9999997422878384, iteration: 6659
loss: 1.1510190963745117,grad_norm: 0.9999996703897489, iteration: 6660
loss: 1.1750491857528687,grad_norm: 0.9999997390445194, iteration: 6661
loss: 1.1427407264709473,grad_norm: 0.9999996407925357, iteration: 6662
loss: 1.113715648651123,grad_norm: 0.9999995937283039, iteration: 6663
loss: 1.047871708869934,grad_norm: 0.999999492558548, iteration: 6664
loss: 1.1773847341537476,grad_norm: 0.9999997967470265, iteration: 6665
loss: 1.0838708877563477,grad_norm: 0.9999997192632321, iteration: 6666
loss: 1.0967586040496826,grad_norm: 0.9999996495102235, iteration: 6667
loss: 1.0790834426879883,grad_norm: 0.9999996044997896, iteration: 6668
loss: 1.0959821939468384,grad_norm: 0.9999996624799232, iteration: 6669
loss: 1.0888499021530151,grad_norm: 0.9999996633695476, iteration: 6670
loss: 1.1004279851913452,grad_norm: 0.9999997397764228, iteration: 6671
loss: 1.0239261388778687,grad_norm: 0.9999996500993885, iteration: 6672
loss: 1.0918519496917725,grad_norm: 0.9999996737023069, iteration: 6673
loss: 1.0971440076828003,grad_norm: 0.9999994559794718, iteration: 6674
loss: 1.0561798810958862,grad_norm: 0.9999997035119169, iteration: 6675
loss: 1.118005394935608,grad_norm: 0.9999997315587301, iteration: 6676
loss: 1.1408050060272217,grad_norm: 0.9999995713648817, iteration: 6677
loss: 1.0549834966659546,grad_norm: 0.999999617977106, iteration: 6678
loss: 1.1501164436340332,grad_norm: 0.9999998626877762, iteration: 6679
loss: 1.1355173587799072,grad_norm: 0.9999997863392078, iteration: 6680
loss: 1.0247344970703125,grad_norm: 0.9999995976256552, iteration: 6681
loss: 1.1823616027832031,grad_norm: 0.9999998237043165, iteration: 6682
loss: 1.1202998161315918,grad_norm: 0.9999997222422842, iteration: 6683
loss: 1.113877773284912,grad_norm: 0.9999995973442027, iteration: 6684
loss: 1.1927965879440308,grad_norm: 0.9999998171950838, iteration: 6685
loss: 1.1652144193649292,grad_norm: 0.9999997089034104, iteration: 6686
loss: 1.1530394554138184,grad_norm: 0.9999996903643417, iteration: 6687
loss: 1.203202724456787,grad_norm: 0.9999996776886665, iteration: 6688
loss: 1.1651363372802734,grad_norm: 0.9999997364273461, iteration: 6689
loss: 1.1221978664398193,grad_norm: 0.9999996760844474, iteration: 6690
loss: 1.1205066442489624,grad_norm: 0.9999997295396686, iteration: 6691
loss: 1.0793802738189697,grad_norm: 0.9999996007291129, iteration: 6692
loss: 1.0929896831512451,grad_norm: 0.999999640659481, iteration: 6693
loss: 1.1321070194244385,grad_norm: 0.9999998781706853, iteration: 6694
loss: 1.1035699844360352,grad_norm: 0.9999996214173398, iteration: 6695
loss: 1.0778992176055908,grad_norm: 0.9999996557072217, iteration: 6696
loss: 1.17038094997406,grad_norm: 0.9999997460156143, iteration: 6697
loss: 1.0956532955169678,grad_norm: 0.9999995862377655, iteration: 6698
loss: 1.0855134725570679,grad_norm: 0.9999997014870091, iteration: 6699
loss: 1.028375506401062,grad_norm: 0.9999996510579792, iteration: 6700
loss: 1.197275161743164,grad_norm: 0.9999997324027137, iteration: 6701
loss: 1.157926082611084,grad_norm: 0.9999997394526399, iteration: 6702
loss: 1.02382230758667,grad_norm: 0.9999996370500682, iteration: 6703
loss: 1.168139934539795,grad_norm: 0.999999767420024, iteration: 6704
loss: 1.138029932975769,grad_norm: 0.9999996725185106, iteration: 6705
loss: 1.0990784168243408,grad_norm: 0.9999996661813934, iteration: 6706
loss: 1.1206837892532349,grad_norm: 0.9999997035239695, iteration: 6707
loss: 1.1517471075057983,grad_norm: 0.9999996885431036, iteration: 6708
loss: 1.0719292163848877,grad_norm: 0.9999996246923953, iteration: 6709
loss: 1.1603676080703735,grad_norm: 0.9999997194239216, iteration: 6710
loss: 1.0764786005020142,grad_norm: 0.9999994824681588, iteration: 6711
loss: 1.1432446241378784,grad_norm: 0.9999998144286756, iteration: 6712
loss: 1.1401761770248413,grad_norm: 0.9999997837734433, iteration: 6713
loss: 1.0886183977127075,grad_norm: 0.9999997614760094, iteration: 6714
loss: 1.1736799478530884,grad_norm: 0.9999997845509171, iteration: 6715
loss: 1.0778560638427734,grad_norm: 0.9999996584600178, iteration: 6716
loss: 1.198822021484375,grad_norm: 0.9999997443818226, iteration: 6717
loss: 1.0847749710083008,grad_norm: 0.9999995497683335, iteration: 6718
loss: 1.1508454084396362,grad_norm: 0.9999996395356812, iteration: 6719
loss: 1.1398601531982422,grad_norm: 0.999999656321465, iteration: 6720
loss: 1.1485278606414795,grad_norm: 0.9999996242800797, iteration: 6721
loss: 1.1019537448883057,grad_norm: 0.9999995776422738, iteration: 6722
loss: 1.0403368473052979,grad_norm: 0.9999997365417383, iteration: 6723
loss: 1.0890767574310303,grad_norm: 0.9999998107261696, iteration: 6724
loss: 1.120958924293518,grad_norm: 0.9999996454221415, iteration: 6725
loss: 1.1672172546386719,grad_norm: 0.9999997773132149, iteration: 6726
loss: 1.1387896537780762,grad_norm: 0.9999996650644362, iteration: 6727
loss: 1.0863338708877563,grad_norm: 0.9999996893793872, iteration: 6728
loss: 1.099151611328125,grad_norm: 0.9999996045349516, iteration: 6729
loss: 1.0579931735992432,grad_norm: 0.9999997166763934, iteration: 6730
loss: 1.0535593032836914,grad_norm: 0.9999995973051594, iteration: 6731
loss: 1.074831485748291,grad_norm: 0.9999995903544128, iteration: 6732
loss: 1.1001631021499634,grad_norm: 0.9999996578371344, iteration: 6733
loss: 1.0577363967895508,grad_norm: 0.9999994801553832, iteration: 6734
loss: 1.1035137176513672,grad_norm: 0.9999997058675201, iteration: 6735
loss: 1.1534658670425415,grad_norm: 0.9999997198020744, iteration: 6736
loss: 1.1982810497283936,grad_norm: 0.9999997199402094, iteration: 6737
loss: 1.164192795753479,grad_norm: 0.9999996648984485, iteration: 6738
loss: 1.0688692331314087,grad_norm: 0.9999996745836612, iteration: 6739
loss: 1.1576566696166992,grad_norm: 0.9999996548970906, iteration: 6740
loss: 1.154778003692627,grad_norm: 0.9999997001693256, iteration: 6741
loss: 1.1058919429779053,grad_norm: 0.9999997896485828, iteration: 6742
loss: 1.1258419752120972,grad_norm: 0.9999997407061845, iteration: 6743
loss: 1.1449077129364014,grad_norm: 0.9999997124843377, iteration: 6744
loss: 1.1164841651916504,grad_norm: 0.999999703173568, iteration: 6745
loss: 1.1822516918182373,grad_norm: 0.9999996572580645, iteration: 6746
loss: 1.102528691291809,grad_norm: 0.9999995749782964, iteration: 6747
loss: 1.1589207649230957,grad_norm: 0.9999997072997726, iteration: 6748
loss: 1.173632025718689,grad_norm: 0.9999996763436799, iteration: 6749
loss: 1.0636913776397705,grad_norm: 0.9999995984091031, iteration: 6750
loss: 1.1105067729949951,grad_norm: 0.9999996731228799, iteration: 6751
loss: 1.1110867261886597,grad_norm: 0.999999566444516, iteration: 6752
loss: 1.125970482826233,grad_norm: 0.999999738541999, iteration: 6753
loss: 1.1683056354522705,grad_norm: 0.9999997506121286, iteration: 6754
loss: 1.1457295417785645,grad_norm: 0.9999998123005717, iteration: 6755
loss: 1.1544805765151978,grad_norm: 0.9999997639740197, iteration: 6756
loss: 1.0310418605804443,grad_norm: 0.9999997635204252, iteration: 6757
loss: 1.1061333417892456,grad_norm: 0.999999742364255, iteration: 6758
loss: 1.1165963411331177,grad_norm: 0.9999997399387546, iteration: 6759
loss: 1.1619253158569336,grad_norm: 0.9999998516061742, iteration: 6760
loss: 1.0714521408081055,grad_norm: 0.9999996565282465, iteration: 6761
loss: 1.1275705099105835,grad_norm: 0.9999997486735, iteration: 6762
loss: 1.0201724767684937,grad_norm: 0.9999995402327326, iteration: 6763
loss: 1.125083565711975,grad_norm: 0.9999996956430536, iteration: 6764
loss: 1.124944806098938,grad_norm: 0.9999995677470371, iteration: 6765
loss: 1.0582104921340942,grad_norm: 0.9999995871605496, iteration: 6766
loss: 1.1644402742385864,grad_norm: 0.9999997358700616, iteration: 6767
loss: 1.104702115058899,grad_norm: 0.999999489134168, iteration: 6768
loss: 1.035513162612915,grad_norm: 0.9999995178983598, iteration: 6769
loss: 1.1044079065322876,grad_norm: 0.9999995684980099, iteration: 6770
loss: 1.1223735809326172,grad_norm: 0.9999996163356604, iteration: 6771
loss: 1.259683609008789,grad_norm: 0.9999998224844298, iteration: 6772
loss: 1.124419927597046,grad_norm: 0.9999996778271678, iteration: 6773
loss: 1.0911579132080078,grad_norm: 0.9999997927445552, iteration: 6774
loss: 1.112815022468567,grad_norm: 0.999999637176483, iteration: 6775
loss: 1.141768217086792,grad_norm: 0.999999569483719, iteration: 6776
loss: 1.0910853147506714,grad_norm: 0.9999996453462653, iteration: 6777
loss: 1.0642493963241577,grad_norm: 0.9999995953561314, iteration: 6778
loss: 1.1627944707870483,grad_norm: 0.9999997307334008, iteration: 6779
loss: 1.0510187149047852,grad_norm: 0.9999996198518363, iteration: 6780
loss: 1.1719005107879639,grad_norm: 0.9999998651309595, iteration: 6781
loss: 1.1076171398162842,grad_norm: 0.9999997745838137, iteration: 6782
loss: 1.2138370275497437,grad_norm: 0.9999998888874666, iteration: 6783
loss: 1.0781394243240356,grad_norm: 0.9999997201141203, iteration: 6784
loss: 1.1113048791885376,grad_norm: 0.9999996417345477, iteration: 6785
loss: 1.0514254570007324,grad_norm: 0.9999996206305147, iteration: 6786
loss: 1.1091036796569824,grad_norm: 0.9999996530279373, iteration: 6787
loss: 1.063117504119873,grad_norm: 0.9999995474874155, iteration: 6788
loss: 1.0789539813995361,grad_norm: 0.9999997540826575, iteration: 6789
loss: 0.9949352145195007,grad_norm: 0.9999994541292494, iteration: 6790
loss: 1.1697336435317993,grad_norm: 0.9999997265419674, iteration: 6791
loss: 1.0439704656600952,grad_norm: 0.9999994949632176, iteration: 6792
loss: 1.0658543109893799,grad_norm: 0.9999995899961085, iteration: 6793
loss: 1.0563788414001465,grad_norm: 0.9999997595953679, iteration: 6794
loss: 1.1080058813095093,grad_norm: 0.9999997050017104, iteration: 6795
loss: 1.0496128797531128,grad_norm: 0.9999995264070116, iteration: 6796
loss: 1.1100293397903442,grad_norm: 0.9999995843996038, iteration: 6797
loss: 1.1262394189834595,grad_norm: 0.9999996839361196, iteration: 6798
loss: 1.1034842729568481,grad_norm: 0.9999996890458245, iteration: 6799
loss: 1.1231420040130615,grad_norm: 0.999999683199355, iteration: 6800
loss: 1.194325566291809,grad_norm: 0.9999997815389382, iteration: 6801
loss: 1.0718225240707397,grad_norm: 0.9999996769485812, iteration: 6802
loss: 1.1473007202148438,grad_norm: 0.9999996620102228, iteration: 6803
loss: 1.0876851081848145,grad_norm: 0.9999996538833948, iteration: 6804
loss: 1.2142716646194458,grad_norm: 0.9999997629670374, iteration: 6805
loss: 1.0714486837387085,grad_norm: 0.9999996380540443, iteration: 6806
loss: 1.1578952074050903,grad_norm: 0.99999982432935, iteration: 6807
loss: 1.1814465522766113,grad_norm: 0.999999649253046, iteration: 6808
loss: 1.1573995351791382,grad_norm: 0.999999683741534, iteration: 6809
loss: 1.1199744939804077,grad_norm: 0.9999997201017252, iteration: 6810
loss: 1.0786296129226685,grad_norm: 0.9999995120031511, iteration: 6811
loss: 1.1039036512374878,grad_norm: 0.9999998134061225, iteration: 6812
loss: 1.1231708526611328,grad_norm: 0.9999996796018267, iteration: 6813
loss: 1.0415221452713013,grad_norm: 0.9999996506802455, iteration: 6814
loss: 1.1349315643310547,grad_norm: 0.9999998046243237, iteration: 6815
loss: 1.1495870351791382,grad_norm: 0.9999997884098782, iteration: 6816
loss: 1.183554768562317,grad_norm: 0.9999997908827891, iteration: 6817
loss: 1.090815544128418,grad_norm: 0.9999996724823556, iteration: 6818
loss: 1.1238595247268677,grad_norm: 0.9999997123350638, iteration: 6819
loss: 1.0442551374435425,grad_norm: 0.9999995180896464, iteration: 6820
loss: 1.0165307521820068,grad_norm: 0.9999996010572654, iteration: 6821
loss: 1.041702151298523,grad_norm: 0.9999996227960365, iteration: 6822
loss: 1.1439412832260132,grad_norm: 0.9999998527851367, iteration: 6823
loss: 1.1419880390167236,grad_norm: 0.9999997899239024, iteration: 6824
loss: 1.123806357383728,grad_norm: 0.9999996831151085, iteration: 6825
loss: 1.1769475936889648,grad_norm: 0.9999997462976096, iteration: 6826
loss: 1.1798681020736694,grad_norm: 0.9999997009970413, iteration: 6827
loss: 1.0712858438491821,grad_norm: 0.9999995196364446, iteration: 6828
loss: 1.1686527729034424,grad_norm: 0.9999997545275846, iteration: 6829
loss: 1.1745870113372803,grad_norm: 0.9999997909236148, iteration: 6830
loss: 1.0857652425765991,grad_norm: 0.9999997723590112, iteration: 6831
loss: 1.1134296655654907,grad_norm: 0.9999996606121062, iteration: 6832
loss: 1.1536208391189575,grad_norm: 0.9999996828115778, iteration: 6833
loss: 1.110232949256897,grad_norm: 0.9999996900844486, iteration: 6834
loss: 1.014132022857666,grad_norm: 0.9999995397620262, iteration: 6835
loss: 1.1643486022949219,grad_norm: 0.9999998560502172, iteration: 6836
loss: 1.0480931997299194,grad_norm: 0.9999996971367401, iteration: 6837
loss: 1.1063201427459717,grad_norm: 0.9999998264168353, iteration: 6838
loss: 1.0183919668197632,grad_norm: 0.9999996533955852, iteration: 6839
loss: 1.09684157371521,grad_norm: 0.9999997126606142, iteration: 6840
loss: 1.1051414012908936,grad_norm: 0.9999996944049397, iteration: 6841
loss: 1.0943020582199097,grad_norm: 0.9999995340073078, iteration: 6842
loss: 1.09868586063385,grad_norm: 0.9999996689579785, iteration: 6843
loss: 1.0478160381317139,grad_norm: 0.9999996111706949, iteration: 6844
loss: 1.111521601676941,grad_norm: 0.9999997856417397, iteration: 6845
loss: 1.1196647882461548,grad_norm: 0.999999706597039, iteration: 6846
loss: 1.17910897731781,grad_norm: 0.99999978558941, iteration: 6847
loss: 1.0624549388885498,grad_norm: 0.9999994483641799, iteration: 6848
loss: 1.158138632774353,grad_norm: 0.9999998242088446, iteration: 6849
loss: 1.0587289333343506,grad_norm: 0.9999997078151264, iteration: 6850
loss: 1.0962958335876465,grad_norm: 0.9999997189893225, iteration: 6851
loss: 1.0953729152679443,grad_norm: 0.9999997974303175, iteration: 6852
loss: 1.180857539176941,grad_norm: 0.9999997261337795, iteration: 6853
loss: 1.0364848375320435,grad_norm: 0.9999996911274167, iteration: 6854
loss: 1.0504401922225952,grad_norm: 0.9999995922070319, iteration: 6855
loss: 1.0989699363708496,grad_norm: 0.9999995135645025, iteration: 6856
loss: 1.146694540977478,grad_norm: 0.9999997488752057, iteration: 6857
loss: 1.1320650577545166,grad_norm: 0.9999996740510171, iteration: 6858
loss: 1.0840651988983154,grad_norm: 0.999999589517284, iteration: 6859
loss: 1.1996259689331055,grad_norm: 0.9999996764547475, iteration: 6860
loss: 1.1259691715240479,grad_norm: 0.9999997067049918, iteration: 6861
loss: 1.1087696552276611,grad_norm: 0.9999996620728983, iteration: 6862
loss: 1.0597655773162842,grad_norm: 0.999999549935219, iteration: 6863
loss: 1.0752015113830566,grad_norm: 0.9999997461956016, iteration: 6864
loss: 1.1183151006698608,grad_norm: 0.9999997028784061, iteration: 6865
loss: 1.1581928730010986,grad_norm: 0.9999996654826139, iteration: 6866
loss: 1.1422553062438965,grad_norm: 0.9999997997101884, iteration: 6867
loss: 1.161665439605713,grad_norm: 0.9999997016561541, iteration: 6868
loss: 1.0719677209854126,grad_norm: 0.9999995748894047, iteration: 6869
loss: 1.1976741552352905,grad_norm: 0.999999759241588, iteration: 6870
loss: 1.1127172708511353,grad_norm: 0.9999996045694186, iteration: 6871
loss: 1.1368471384048462,grad_norm: 0.9999996987757305, iteration: 6872
loss: 1.0745306015014648,grad_norm: 0.9999994647505092, iteration: 6873
loss: 1.1325480937957764,grad_norm: 0.999999810858451, iteration: 6874
loss: 1.0295175313949585,grad_norm: 0.9999998635478997, iteration: 6875
loss: 1.1330546140670776,grad_norm: 0.9999996405649203, iteration: 6876
loss: 1.0904229879379272,grad_norm: 0.9999995850777864, iteration: 6877
loss: 1.0755587816238403,grad_norm: 0.9999997356250366, iteration: 6878
loss: 1.0546996593475342,grad_norm: 0.9999996094437191, iteration: 6879
loss: 1.025088906288147,grad_norm: 0.9999996062444468, iteration: 6880
loss: 1.0975900888442993,grad_norm: 0.9999997277706233, iteration: 6881
loss: 1.0856128931045532,grad_norm: 0.9999995539558137, iteration: 6882
loss: 1.1208882331848145,grad_norm: 0.9999997404330928, iteration: 6883
loss: 1.1135375499725342,grad_norm: 0.9999995323769155, iteration: 6884
loss: 1.164222240447998,grad_norm: 0.9999997359687831, iteration: 6885
loss: 1.1877351999282837,grad_norm: 0.9999997129868844, iteration: 6886
loss: 1.1732200384140015,grad_norm: 0.9999997986751191, iteration: 6887
loss: 1.1062450408935547,grad_norm: 0.9999996731578823, iteration: 6888
loss: 1.1188349723815918,grad_norm: 0.9999995523623832, iteration: 6889
loss: 1.1385748386383057,grad_norm: 0.9999998490412899, iteration: 6890
loss: 1.0937455892562866,grad_norm: 0.9999997471472122, iteration: 6891
loss: 1.0803346633911133,grad_norm: 0.999999717647103, iteration: 6892
loss: 1.0263137817382812,grad_norm: 0.9999996117424864, iteration: 6893
loss: 1.124471664428711,grad_norm: 0.9999996465329799, iteration: 6894
loss: 1.1576098203659058,grad_norm: 0.9999998009097779, iteration: 6895
loss: 1.2003965377807617,grad_norm: 0.9999997313587188, iteration: 6896
loss: 1.1601955890655518,grad_norm: 0.9999997780842674, iteration: 6897
loss: 1.1376067399978638,grad_norm: 0.9999997615205929, iteration: 6898
loss: 1.1706079244613647,grad_norm: 0.9999998709656692, iteration: 6899
loss: 1.1254500150680542,grad_norm: 0.9999996582979667, iteration: 6900
loss: 1.106209397315979,grad_norm: 0.9999997028139301, iteration: 6901
loss: 1.0663050413131714,grad_norm: 0.9999996747428199, iteration: 6902
loss: 1.1235183477401733,grad_norm: 0.9999996700567538, iteration: 6903
loss: 1.1229274272918701,grad_norm: 0.9999997743071922, iteration: 6904
loss: 1.084769368171692,grad_norm: 0.9999998044171133, iteration: 6905
loss: 1.0883829593658447,grad_norm: 0.9999996693574422, iteration: 6906
loss: 1.0922833681106567,grad_norm: 0.9999997437698515, iteration: 6907
loss: 1.0994555950164795,grad_norm: 0.9999997145392799, iteration: 6908
loss: 1.0982940196990967,grad_norm: 0.9999995435192892, iteration: 6909
loss: 1.0829362869262695,grad_norm: 0.9999995534506441, iteration: 6910
loss: 1.0821256637573242,grad_norm: 0.9999995555989538, iteration: 6911
loss: 1.0845308303833008,grad_norm: 0.9999996022205891, iteration: 6912
loss: 1.1272779703140259,grad_norm: 0.999999621636706, iteration: 6913
loss: 1.0672965049743652,grad_norm: 0.9999995458585723, iteration: 6914
loss: 1.1266827583312988,grad_norm: 0.9999997451510932, iteration: 6915
loss: 1.1038788557052612,grad_norm: 0.9999997052707663, iteration: 6916
loss: 1.215253472328186,grad_norm: 0.999999849139507, iteration: 6917
loss: 1.1030482053756714,grad_norm: 0.9999997156230997, iteration: 6918
loss: 1.0143738985061646,grad_norm: 0.999999541369066, iteration: 6919
loss: 1.0205082893371582,grad_norm: 0.999999579317125, iteration: 6920
loss: 1.1008657217025757,grad_norm: 0.9999997739674985, iteration: 6921
loss: 1.097525954246521,grad_norm: 0.999999602995443, iteration: 6922
loss: 1.1079444885253906,grad_norm: 0.999999682544436, iteration: 6923
loss: 1.1721826791763306,grad_norm: 0.9999998463768912, iteration: 6924
loss: 1.0486159324645996,grad_norm: 0.9999997571687387, iteration: 6925
loss: 1.1402369737625122,grad_norm: 0.9999997583748592, iteration: 6926
loss: 1.032894492149353,grad_norm: 0.9999996534435024, iteration: 6927
loss: 1.065139889717102,grad_norm: 0.9999994586373888, iteration: 6928
loss: 1.0451548099517822,grad_norm: 0.9999996545425116, iteration: 6929
loss: 1.0592600107192993,grad_norm: 0.999999715139812, iteration: 6930
loss: 1.0341085195541382,grad_norm: 0.9999996644191226, iteration: 6931
loss: 1.1362372636795044,grad_norm: 0.9999997050540353, iteration: 6932
loss: 1.0645909309387207,grad_norm: 0.9999996082971386, iteration: 6933
loss: 1.0219343900680542,grad_norm: 0.9999995350145247, iteration: 6934
loss: 1.0537272691726685,grad_norm: 0.9999997405662377, iteration: 6935
loss: 1.1295466423034668,grad_norm: 0.9999996975871424, iteration: 6936
loss: 1.1172683238983154,grad_norm: 0.9999996209989067, iteration: 6937
loss: 1.0997892618179321,grad_norm: 0.9999997152655855, iteration: 6938
loss: 1.0927462577819824,grad_norm: 0.9999995962010609, iteration: 6939
loss: 1.0931521654129028,grad_norm: 0.9999996424107828, iteration: 6940
loss: 1.1445719003677368,grad_norm: 0.9999998253053922, iteration: 6941
loss: 1.127639889717102,grad_norm: 0.9999995317384724, iteration: 6942
loss: 1.0933529138565063,grad_norm: 0.9999996648703119, iteration: 6943
loss: 1.1597261428833008,grad_norm: 0.9999997810012131, iteration: 6944
loss: 1.1471457481384277,grad_norm: 0.9999997111602668, iteration: 6945
loss: 1.1453193426132202,grad_norm: 0.9999996649525337, iteration: 6946
loss: 1.04741370677948,grad_norm: 0.9999994447411449, iteration: 6947
loss: 1.1444333791732788,grad_norm: 0.9999997199170093, iteration: 6948
loss: 1.1291791200637817,grad_norm: 0.999999629647905, iteration: 6949
loss: 1.0590767860412598,grad_norm: 0.9999993906390271, iteration: 6950
loss: 1.0771982669830322,grad_norm: 0.9999995008500316, iteration: 6951
loss: 1.1474944353103638,grad_norm: 0.9999997266807708, iteration: 6952
loss: 1.0585416555404663,grad_norm: 0.9999994933406581, iteration: 6953
loss: 1.1279560327529907,grad_norm: 0.9999998660949437, iteration: 6954
loss: 1.098363995552063,grad_norm: 0.9999996757102432, iteration: 6955
loss: 1.1113263368606567,grad_norm: 0.9999996305194112, iteration: 6956
loss: 1.1991713047027588,grad_norm: 0.9999997608839271, iteration: 6957
loss: 1.153649926185608,grad_norm: 0.9999997449487574, iteration: 6958
loss: 1.109788417816162,grad_norm: 0.9999996919805055, iteration: 6959
loss: 1.1464132070541382,grad_norm: 0.9999997678642638, iteration: 6960
loss: 1.1571258306503296,grad_norm: 0.9999998474555895, iteration: 6961
loss: 1.1510519981384277,grad_norm: 0.9999997085826531, iteration: 6962
loss: 1.0941643714904785,grad_norm: 0.9999997702841111, iteration: 6963
loss: 1.0749993324279785,grad_norm: 0.9999996538514192, iteration: 6964
loss: 1.0770838260650635,grad_norm: 0.9999997962963926, iteration: 6965
loss: 1.1127314567565918,grad_norm: 0.9999996881336969, iteration: 6966
loss: 1.1001840829849243,grad_norm: 0.9999996544266561, iteration: 6967
loss: 1.1386778354644775,grad_norm: 0.9999996850867894, iteration: 6968
loss: 1.0604702234268188,grad_norm: 0.9999996339123709, iteration: 6969
loss: 1.108258843421936,grad_norm: 0.9999996163742897, iteration: 6970
loss: 1.0487827062606812,grad_norm: 0.9999997115385095, iteration: 6971
loss: 1.097314476966858,grad_norm: 0.9999993503198255, iteration: 6972
loss: 1.1408817768096924,grad_norm: 0.9999997991260977, iteration: 6973
loss: 1.0411858558654785,grad_norm: 0.9999996772557377, iteration: 6974
loss: 1.0085012912750244,grad_norm: 0.9999996366777529, iteration: 6975
loss: 1.0702965259552002,grad_norm: 0.9999996208332097, iteration: 6976
loss: 1.0764386653900146,grad_norm: 0.9999995535122156, iteration: 6977
loss: 1.2115893363952637,grad_norm: 0.999999772822878, iteration: 6978
loss: 1.1106847524642944,grad_norm: 0.9999995661628307, iteration: 6979
loss: 1.0645498037338257,grad_norm: 0.9999997606040555, iteration: 6980
loss: 1.079593300819397,grad_norm: 0.9999996753143662, iteration: 6981
loss: 1.1417300701141357,grad_norm: 0.9999996872892609, iteration: 6982
loss: 1.0195062160491943,grad_norm: 0.9999994900348855, iteration: 6983
loss: 1.1238664388656616,grad_norm: 0.9999997550870147, iteration: 6984
loss: 1.122011423110962,grad_norm: 0.9999996881962396, iteration: 6985
loss: 1.0853303670883179,grad_norm: 0.9999995826463142, iteration: 6986
loss: 1.1465203762054443,grad_norm: 0.9999997571847417, iteration: 6987
loss: 1.0647550821304321,grad_norm: 0.9999996279376144, iteration: 6988
loss: 1.087862253189087,grad_norm: 0.9999997170442553, iteration: 6989
loss: 1.0825941562652588,grad_norm: 0.9999995929557428, iteration: 6990
loss: 1.0778148174285889,grad_norm: 0.9999996525205181, iteration: 6991
loss: 1.0701030492782593,grad_norm: 0.9999997011621903, iteration: 6992
loss: 1.0543946027755737,grad_norm: 0.9999996933339417, iteration: 6993
loss: 1.1308900117874146,grad_norm: 0.9999998343111502, iteration: 6994
loss: 1.1710405349731445,grad_norm: 0.9999997743696265, iteration: 6995
loss: 1.063784122467041,grad_norm: 0.9999996346152072, iteration: 6996
loss: 1.0292869806289673,grad_norm: 0.9999995748795101, iteration: 6997
loss: 1.1695106029510498,grad_norm: 0.999999736463473, iteration: 6998
loss: 1.0479564666748047,grad_norm: 0.9999995795230517, iteration: 6999
loss: 1.0275455713272095,grad_norm: 0.9999996655396022, iteration: 7000
loss: 1.1899945735931396,grad_norm: 0.9999996162325584, iteration: 7001
loss: 1.1712594032287598,grad_norm: 0.9999998430381607, iteration: 7002
loss: 1.073703408241272,grad_norm: 0.9999996838141387, iteration: 7003
loss: 1.1059932708740234,grad_norm: 0.9999997144020787, iteration: 7004
loss: 1.0781364440917969,grad_norm: 0.9999997644627973, iteration: 7005
loss: 1.0552947521209717,grad_norm: 0.9999996274357987, iteration: 7006
loss: 1.0503126382827759,grad_norm: 0.9999997194964715, iteration: 7007
loss: 1.0707424879074097,grad_norm: 0.9999997091282711, iteration: 7008
loss: 1.0808186531066895,grad_norm: 0.9999996412145038, iteration: 7009
loss: 1.072709083557129,grad_norm: 0.999999790574407, iteration: 7010
loss: 1.039486289024353,grad_norm: 0.9999995031345317, iteration: 7011
loss: 1.0288605690002441,grad_norm: 0.999999530076454, iteration: 7012
loss: 1.11268150806427,grad_norm: 0.9999995233856815, iteration: 7013
loss: 1.1028965711593628,grad_norm: 0.9999997834475647, iteration: 7014
loss: 1.059122920036316,grad_norm: 0.9999995488990877, iteration: 7015
loss: 1.0730780363082886,grad_norm: 0.9999995913238279, iteration: 7016
loss: 1.126688003540039,grad_norm: 0.9999996801092432, iteration: 7017
loss: 1.1350172758102417,grad_norm: 0.9999996722557818, iteration: 7018
loss: 1.1549242734909058,grad_norm: 0.9999996684340536, iteration: 7019
loss: 1.1712021827697754,grad_norm: 0.9999998520959607, iteration: 7020
loss: 1.1251652240753174,grad_norm: 0.9999996373373299, iteration: 7021
loss: 1.0503615140914917,grad_norm: 0.9999995998056368, iteration: 7022
loss: 1.122309923171997,grad_norm: 0.9999997096815257, iteration: 7023
loss: 1.0867805480957031,grad_norm: 0.9999996311773421, iteration: 7024
loss: 1.0559993982315063,grad_norm: 0.9999996705945444, iteration: 7025
loss: 1.1089376211166382,grad_norm: 0.999999535079124, iteration: 7026
loss: 1.2731823921203613,grad_norm: 0.9999997552337352, iteration: 7027
loss: 1.020996332168579,grad_norm: 0.9999994374038043, iteration: 7028
loss: 1.1853629350662231,grad_norm: 0.9999997870241123, iteration: 7029
loss: 1.0849723815917969,grad_norm: 0.9999996649885905, iteration: 7030
loss: 1.1384657621383667,grad_norm: 0.9999997361012862, iteration: 7031
loss: 1.0790483951568604,grad_norm: 0.9999997538495685, iteration: 7032
loss: 1.1016875505447388,grad_norm: 0.9999994755086707, iteration: 7033
loss: 1.0584038496017456,grad_norm: 0.9999996330323134, iteration: 7034
loss: 1.0993362665176392,grad_norm: 0.9999996047104684, iteration: 7035
loss: 1.1445168256759644,grad_norm: 0.9999998095202633, iteration: 7036
loss: 1.1898785829544067,grad_norm: 0.9999998085438802, iteration: 7037
loss: 1.0742071866989136,grad_norm: 0.9999994853621678, iteration: 7038
loss: 1.123838186264038,grad_norm: 0.9999998150011414, iteration: 7039
loss: 1.0346086025238037,grad_norm: 0.999999574598452, iteration: 7040
loss: 1.1001677513122559,grad_norm: 0.9999996601123273, iteration: 7041
loss: 1.0291372537612915,grad_norm: 0.999999561910892, iteration: 7042
loss: 1.1328595876693726,grad_norm: 0.9999996773784983, iteration: 7043
loss: 1.0697782039642334,grad_norm: 0.9999996541015149, iteration: 7044
loss: 1.1176203489303589,grad_norm: 0.9999997942766959, iteration: 7045
loss: 1.1127890348434448,grad_norm: 0.9999996480489705, iteration: 7046
loss: 1.0479005575180054,grad_norm: 0.9999997155620005, iteration: 7047
loss: 1.1506271362304688,grad_norm: 0.9999996347423772, iteration: 7048
loss: 1.0994770526885986,grad_norm: 0.9999997655439791, iteration: 7049
loss: 1.1026867628097534,grad_norm: 0.9999996095043545, iteration: 7050
loss: 1.0845425128936768,grad_norm: 0.9999995206492627, iteration: 7051
loss: 1.0258268117904663,grad_norm: 0.9999994817457645, iteration: 7052
loss: 1.0535924434661865,grad_norm: 0.9999997500178324, iteration: 7053
loss: 1.054086446762085,grad_norm: 0.9999997207884873, iteration: 7054
loss: 1.0604490041732788,grad_norm: 0.9999998089483949, iteration: 7055
loss: 1.1093806028366089,grad_norm: 0.9999996042083167, iteration: 7056
loss: 1.1012089252471924,grad_norm: 0.9999993352121651, iteration: 7057
loss: 1.0596686601638794,grad_norm: 0.9999995444736135, iteration: 7058
loss: 1.0325781106948853,grad_norm: 0.9999995202329642, iteration: 7059
loss: 1.0700808763504028,grad_norm: 0.9999995647963257, iteration: 7060
loss: 1.1311944723129272,grad_norm: 0.9999996675074605, iteration: 7061
loss: 1.0510867834091187,grad_norm: 0.9999995426818155, iteration: 7062
loss: 1.1255662441253662,grad_norm: 0.9999995777182212, iteration: 7063
loss: 1.0838104486465454,grad_norm: 0.9999997117785168, iteration: 7064
loss: 1.0846797227859497,grad_norm: 0.9999996658035034, iteration: 7065
loss: 1.0862236022949219,grad_norm: 0.999999697069755, iteration: 7066
loss: 1.0931912660598755,grad_norm: 0.9999995517306433, iteration: 7067
loss: 1.0772594213485718,grad_norm: 0.9999997624302732, iteration: 7068
loss: 1.0673648118972778,grad_norm: 0.9999995863089781, iteration: 7069
loss: 1.1381747722625732,grad_norm: 0.999999724025652, iteration: 7070
loss: 1.086007833480835,grad_norm: 0.9999996085982834, iteration: 7071
loss: 1.061241865158081,grad_norm: 0.9999996591256379, iteration: 7072
loss: 1.152485966682434,grad_norm: 0.9999997159330846, iteration: 7073
loss: 1.0511001348495483,grad_norm: 0.9999995178322203, iteration: 7074
loss: 1.1416202783584595,grad_norm: 0.9999996843746171, iteration: 7075
loss: 1.1167099475860596,grad_norm: 0.9999996125703753, iteration: 7076
loss: 1.0964643955230713,grad_norm: 0.9999995715167506, iteration: 7077
loss: 1.143344759941101,grad_norm: 0.9999997486460244, iteration: 7078
loss: 1.0973103046417236,grad_norm: 0.9999997335120387, iteration: 7079
loss: 1.0860599279403687,grad_norm: 0.9999996133315288, iteration: 7080
loss: 1.0828056335449219,grad_norm: 0.9999996333736607, iteration: 7081
loss: 1.0257395505905151,grad_norm: 0.9999996341492962, iteration: 7082
loss: 1.1392388343811035,grad_norm: 0.9999996891907409, iteration: 7083
loss: 1.147887110710144,grad_norm: 0.99999979785108, iteration: 7084
loss: 1.0585885047912598,grad_norm: 0.9999997005066266, iteration: 7085
loss: 1.1553412675857544,grad_norm: 0.9999997301683039, iteration: 7086
loss: 1.0645887851715088,grad_norm: 0.9999997714982901, iteration: 7087
loss: 1.1605985164642334,grad_norm: 0.9999998110265303, iteration: 7088
loss: 1.069964051246643,grad_norm: 0.9999997804666478, iteration: 7089
loss: 1.0780162811279297,grad_norm: 0.9999996320590772, iteration: 7090
loss: 1.0959290266036987,grad_norm: 0.9999995441004881, iteration: 7091
loss: 1.0621366500854492,grad_norm: 0.9999995469685611, iteration: 7092
loss: 1.0934785604476929,grad_norm: 0.9999997846799292, iteration: 7093
loss: 1.1105842590332031,grad_norm: 0.999999609224031, iteration: 7094
loss: 1.2052617073059082,grad_norm: 0.9999997511604599, iteration: 7095
loss: 1.1729236841201782,grad_norm: 0.9999998110553584, iteration: 7096
loss: 1.0851410627365112,grad_norm: 0.9999996499366719, iteration: 7097
loss: 1.0803580284118652,grad_norm: 0.9999996560258269, iteration: 7098
loss: 1.0679115056991577,grad_norm: 0.9999996428122473, iteration: 7099
loss: 1.07380211353302,grad_norm: 0.9999996423594822, iteration: 7100
loss: 1.1098110675811768,grad_norm: 0.9999996263831193, iteration: 7101
loss: 1.0809274911880493,grad_norm: 0.9999996912361524, iteration: 7102
loss: 1.169858694076538,grad_norm: 0.99999968008309, iteration: 7103
loss: 1.1129971742630005,grad_norm: 0.999999644501407, iteration: 7104
loss: 1.0400400161743164,grad_norm: 0.999999509667587, iteration: 7105
loss: 1.1185911893844604,grad_norm: 0.9999997660791342, iteration: 7106
loss: 1.1159017086029053,grad_norm: 0.9999996775351758, iteration: 7107
loss: 1.1213303804397583,grad_norm: 0.9999996569334558, iteration: 7108
loss: 1.1124107837677002,grad_norm: 0.9999997117301201, iteration: 7109
loss: 1.0684165954589844,grad_norm: 0.9999997059502443, iteration: 7110
loss: 1.0861784219741821,grad_norm: 0.9999996574002066, iteration: 7111
loss: 1.044359564781189,grad_norm: 0.9999994751485294, iteration: 7112
loss: 1.0483980178833008,grad_norm: 0.999999685315853, iteration: 7113
loss: 1.0837239027023315,grad_norm: 0.9999997517231773, iteration: 7114
loss: 1.1064141988754272,grad_norm: 0.9999996560685057, iteration: 7115
loss: 1.032091498374939,grad_norm: 0.9999995536342582, iteration: 7116
loss: 1.0734710693359375,grad_norm: 0.9999995656942574, iteration: 7117
loss: 1.1206685304641724,grad_norm: 0.9999997093346339, iteration: 7118
loss: 1.13406503200531,grad_norm: 0.9999995815336733, iteration: 7119
loss: 1.0067685842514038,grad_norm: 0.9999994687472291, iteration: 7120
loss: 1.0995540618896484,grad_norm: 0.9999996951711343, iteration: 7121
loss: 1.0482043027877808,grad_norm: 0.9999996251536954, iteration: 7122
loss: 1.1488206386566162,grad_norm: 0.9999996363006222, iteration: 7123
loss: 1.2216944694519043,grad_norm: 0.9999997836567379, iteration: 7124
loss: 1.0959742069244385,grad_norm: 0.9999996196355624, iteration: 7125
loss: 1.0904207229614258,grad_norm: 0.9999995744116583, iteration: 7126
loss: 1.0577013492584229,grad_norm: 0.9999997875983367, iteration: 7127
loss: 1.0702306032180786,grad_norm: 0.999999675158695, iteration: 7128
loss: 1.0679324865341187,grad_norm: 0.999999371686172, iteration: 7129
loss: 1.160338044166565,grad_norm: 0.9999996431652917, iteration: 7130
loss: 1.0952528715133667,grad_norm: 0.999999699584176, iteration: 7131
loss: 1.0521430969238281,grad_norm: 0.9999997277274304, iteration: 7132
loss: 1.1764960289001465,grad_norm: 0.9999998348630488, iteration: 7133
loss: 1.1337789297103882,grad_norm: 0.9999997964703379, iteration: 7134
loss: 1.1508644819259644,grad_norm: 0.9999996430060637, iteration: 7135
loss: 1.1222763061523438,grad_norm: 0.9999997118767592, iteration: 7136
loss: 1.0977727174758911,grad_norm: 0.9999996135346496, iteration: 7137
loss: 1.0595245361328125,grad_norm: 0.9999996664627478, iteration: 7138
loss: 1.0798884630203247,grad_norm: 0.9999996164379776, iteration: 7139
loss: 1.0693403482437134,grad_norm: 0.9999996256824013, iteration: 7140
loss: 1.1286389827728271,grad_norm: 0.9999996668250121, iteration: 7141
loss: 1.0648119449615479,grad_norm: 0.9999996169023109, iteration: 7142
loss: 1.1662977933883667,grad_norm: 0.9999998558808084, iteration: 7143
loss: 1.0748668909072876,grad_norm: 0.9999995722426427, iteration: 7144
loss: 1.1318451166152954,grad_norm: 0.9999996731654043, iteration: 7145
loss: 1.1516296863555908,grad_norm: 0.9999996600917469, iteration: 7146
loss: 1.049993634223938,grad_norm: 0.9999994922800866, iteration: 7147
loss: 1.154105544090271,grad_norm: 0.9999997414708786, iteration: 7148
loss: 1.0547951459884644,grad_norm: 0.9999996734456957, iteration: 7149
loss: 1.0982383489608765,grad_norm: 0.9999996189332964, iteration: 7150
loss: 1.1824849843978882,grad_norm: 0.9999997093647488, iteration: 7151
loss: 1.0975412130355835,grad_norm: 0.9999996287298341, iteration: 7152
loss: 1.0704115629196167,grad_norm: 0.9999995220508842, iteration: 7153
loss: 1.0898133516311646,grad_norm: 0.9999996883655803, iteration: 7154
loss: 1.0690159797668457,grad_norm: 0.9999996234080543, iteration: 7155
loss: 1.044256329536438,grad_norm: 0.9999996575104764, iteration: 7156
loss: 1.143600583076477,grad_norm: 0.9999997781784448, iteration: 7157
loss: 1.1781455278396606,grad_norm: 0.9999996857433185, iteration: 7158
loss: 1.0785064697265625,grad_norm: 0.9999996610031375, iteration: 7159
loss: 1.05112886428833,grad_norm: 0.9999995792122003, iteration: 7160
loss: 1.0911210775375366,grad_norm: 0.9999997732829021, iteration: 7161
loss: 1.1449666023254395,grad_norm: 0.9999997065161925, iteration: 7162
loss: 1.0865898132324219,grad_norm: 0.9999997395266431, iteration: 7163
loss: 1.1324682235717773,grad_norm: 0.9999998026471283, iteration: 7164
loss: 1.1722354888916016,grad_norm: 0.9999997371496694, iteration: 7165
loss: 1.1072781085968018,grad_norm: 0.9999996783498933, iteration: 7166
loss: 1.1161359548568726,grad_norm: 0.9999997914926727, iteration: 7167
loss: 1.0945847034454346,grad_norm: 0.9999997336070542, iteration: 7168
loss: 1.1209743022918701,grad_norm: 0.9999996801916116, iteration: 7169
loss: 1.090685248374939,grad_norm: 0.9999995463557139, iteration: 7170
loss: 1.1240952014923096,grad_norm: 0.999999704431223, iteration: 7171
loss: 1.0583107471466064,grad_norm: 0.9999996732167998, iteration: 7172
loss: 1.0584460496902466,grad_norm: 0.9999994047572155, iteration: 7173
loss: 1.0558785200119019,grad_norm: 0.999999597725669, iteration: 7174
loss: 1.0141202211380005,grad_norm: 0.999999589796168, iteration: 7175
loss: 1.1419106721878052,grad_norm: 0.9999995921839874, iteration: 7176
loss: 1.0866373777389526,grad_norm: 0.9999997117168867, iteration: 7177
loss: 1.1856088638305664,grad_norm: 0.999999816918608, iteration: 7178
loss: 1.0282692909240723,grad_norm: 0.999999579784183, iteration: 7179
loss: 1.0759949684143066,grad_norm: 0.9999995340109393, iteration: 7180
loss: 1.0365056991577148,grad_norm: 0.9999995857855641, iteration: 7181
loss: 1.082855224609375,grad_norm: 0.9999994168966608, iteration: 7182
loss: 1.1213243007659912,grad_norm: 0.9999996177983426, iteration: 7183
loss: 1.1315796375274658,grad_norm: 0.9999996662016791, iteration: 7184
loss: 1.0715892314910889,grad_norm: 0.9999996369481696, iteration: 7185
loss: 1.1039774417877197,grad_norm: 0.9999994471077767, iteration: 7186
loss: 1.0215389728546143,grad_norm: 0.9999995780280243, iteration: 7187
loss: 1.0392478704452515,grad_norm: 0.999999618971546, iteration: 7188
loss: 1.0210384130477905,grad_norm: 0.9999996358435861, iteration: 7189
loss: 1.0118381977081299,grad_norm: 0.9999994912530591, iteration: 7190
loss: 1.0035573244094849,grad_norm: 0.9999994427566316, iteration: 7191
loss: 1.1434885263442993,grad_norm: 0.9999997144698509, iteration: 7192
loss: 1.1869300603866577,grad_norm: 0.9999997561805514, iteration: 7193
loss: 1.0746691226959229,grad_norm: 0.9999996504290455, iteration: 7194
loss: 1.0638235807418823,grad_norm: 0.9999995916885251, iteration: 7195
loss: 1.0323445796966553,grad_norm: 0.9999994229472183, iteration: 7196
loss: 1.1267457008361816,grad_norm: 0.9999995329870528, iteration: 7197
loss: 1.0313738584518433,grad_norm: 0.999999389582078, iteration: 7198
loss: 1.1462095975875854,grad_norm: 0.9999997154273962, iteration: 7199
loss: 1.1258043050765991,grad_norm: 0.9999996172669091, iteration: 7200
loss: 1.0804258584976196,grad_norm: 0.999999643783999, iteration: 7201
loss: 1.0798059701919556,grad_norm: 0.9999996446724317, iteration: 7202
loss: 1.1215583086013794,grad_norm: 0.9999996863164815, iteration: 7203
loss: 1.0687257051467896,grad_norm: 0.999999557765278, iteration: 7204
loss: 1.0577348470687866,grad_norm: 0.9999996401168799, iteration: 7205
loss: 1.0850228071212769,grad_norm: 0.9999996398251185, iteration: 7206
loss: 1.0907658338546753,grad_norm: 0.9999997193954742, iteration: 7207
loss: 1.1289955377578735,grad_norm: 0.9999995772397088, iteration: 7208
loss: 1.0776501893997192,grad_norm: 0.9999996819191384, iteration: 7209
loss: 1.1281728744506836,grad_norm: 0.9999996988326959, iteration: 7210
loss: 1.1815996170043945,grad_norm: 0.9999997716319365, iteration: 7211
loss: 1.1040129661560059,grad_norm: 0.9999998044604443, iteration: 7212
loss: 1.0799860954284668,grad_norm: 0.9999996719059548, iteration: 7213
loss: 1.068207859992981,grad_norm: 0.9999995431854433, iteration: 7214
loss: 1.3128015995025635,grad_norm: 0.9999998282221126, iteration: 7215
loss: 1.1699776649475098,grad_norm: 0.9999997301774939, iteration: 7216
loss: 1.1148406267166138,grad_norm: 0.9999994475454156, iteration: 7217
loss: 1.0274028778076172,grad_norm: 0.9999995905769093, iteration: 7218
loss: 1.0490374565124512,grad_norm: 0.9999995975061333, iteration: 7219
loss: 1.0247317552566528,grad_norm: 0.9999994110088333, iteration: 7220
loss: 1.0363743305206299,grad_norm: 0.9999994840883747, iteration: 7221
loss: 1.0764714479446411,grad_norm: 0.9999995933081878, iteration: 7222
loss: 1.1194374561309814,grad_norm: 0.9999996847652983, iteration: 7223
loss: 1.0409668684005737,grad_norm: 0.9999994889356493, iteration: 7224
loss: 1.07330322265625,grad_norm: 0.9999996038316782, iteration: 7225
loss: 1.128944754600525,grad_norm: 0.9999997309996617, iteration: 7226
loss: 1.065708041191101,grad_norm: 0.9999997352339371, iteration: 7227
loss: 1.1269640922546387,grad_norm: 0.9999996933256385, iteration: 7228
loss: 1.0829498767852783,grad_norm: 0.9999996661950119, iteration: 7229
loss: 1.171651840209961,grad_norm: 0.9999998403933522, iteration: 7230
loss: 1.1214994192123413,grad_norm: 0.9999998248647359, iteration: 7231
loss: 1.1680874824523926,grad_norm: 0.9999996927452626, iteration: 7232
loss: 1.119175910949707,grad_norm: 0.9999997311052414, iteration: 7233
loss: 1.0791923999786377,grad_norm: 0.9999994405247696, iteration: 7234
loss: 1.1356955766677856,grad_norm: 0.9999996706564048, iteration: 7235
loss: 1.0717225074768066,grad_norm: 0.9999995241179931, iteration: 7236
loss: 1.0866438150405884,grad_norm: 0.9999996851770262, iteration: 7237
loss: 1.176169514656067,grad_norm: 0.9999997959842183, iteration: 7238
loss: 1.108103632926941,grad_norm: 0.9999998086708278, iteration: 7239
loss: 1.172139286994934,grad_norm: 0.9999997428362053, iteration: 7240
loss: 1.1179579496383667,grad_norm: 0.9999997744982284, iteration: 7241
loss: 1.1045374870300293,grad_norm: 0.9999996327463004, iteration: 7242
loss: 1.1192474365234375,grad_norm: 0.9999994710496203, iteration: 7243
loss: 1.0720720291137695,grad_norm: 0.9999994128837972, iteration: 7244
loss: 1.0719928741455078,grad_norm: 0.9999996198088217, iteration: 7245
loss: 1.1822670698165894,grad_norm: 0.9999997160386477, iteration: 7246
loss: 1.1495494842529297,grad_norm: 0.9999996599913366, iteration: 7247
loss: 1.2122713327407837,grad_norm: 0.999999769354242, iteration: 7248
loss: 1.0297678709030151,grad_norm: 0.9999996274960794, iteration: 7249
loss: 1.0860097408294678,grad_norm: 0.9999996936785144, iteration: 7250
loss: 1.1231319904327393,grad_norm: 0.9999998817732673, iteration: 7251
loss: 1.0562056303024292,grad_norm: 0.9999994152613232, iteration: 7252
loss: 1.042122721672058,grad_norm: 0.9999995870910404, iteration: 7253
loss: 1.0679155588150024,grad_norm: 0.9999995523623847, iteration: 7254
loss: 1.0868474245071411,grad_norm: 0.9999995999732343, iteration: 7255
loss: 1.0922733545303345,grad_norm: 0.9999996552618944, iteration: 7256
loss: 1.08191978931427,grad_norm: 0.9999997616800551, iteration: 7257
loss: 1.07053542137146,grad_norm: 0.9999995554455521, iteration: 7258
loss: 1.1375291347503662,grad_norm: 0.999999827545236, iteration: 7259
loss: 1.1779755353927612,grad_norm: 0.9999997424441422, iteration: 7260
loss: 1.1210936307907104,grad_norm: 0.9999997431728183, iteration: 7261
loss: 1.1153564453125,grad_norm: 0.9999996717699065, iteration: 7262
loss: 1.0779287815093994,grad_norm: 0.9999997281946752, iteration: 7263
loss: 1.0984069108963013,grad_norm: 0.9999996282022223, iteration: 7264
loss: 1.0879442691802979,grad_norm: 0.9999996308046251, iteration: 7265
loss: 1.0284005403518677,grad_norm: 0.9999994674915461, iteration: 7266
loss: 1.0274170637130737,grad_norm: 0.9999993998798425, iteration: 7267
loss: 1.107143759727478,grad_norm: 0.999999658719917, iteration: 7268
loss: 1.0263745784759521,grad_norm: 0.9999994375955267, iteration: 7269
loss: 1.0698423385620117,grad_norm: 0.9999997962736242, iteration: 7270
loss: 1.1776652336120605,grad_norm: 0.9999997535006419, iteration: 7271
loss: 1.0419868230819702,grad_norm: 0.9999995521307704, iteration: 7272
loss: 1.011135458946228,grad_norm: 0.9999994802098697, iteration: 7273
loss: 1.0636826753616333,grad_norm: 0.9999995398104691, iteration: 7274
loss: 1.1117451190948486,grad_norm: 0.999999535699555, iteration: 7275
loss: 1.0971533060073853,grad_norm: 0.9999997115388052, iteration: 7276
loss: 1.0631661415100098,grad_norm: 0.9999996485879913, iteration: 7277
loss: 1.0523760318756104,grad_norm: 0.9999994951958293, iteration: 7278
loss: 1.079732060432434,grad_norm: 0.9999996376167617, iteration: 7279
loss: 1.049179196357727,grad_norm: 0.9999995976191878, iteration: 7280
loss: 1.1088979244232178,grad_norm: 0.9999995424276938, iteration: 7281
loss: 1.1211342811584473,grad_norm: 0.9999997436836675, iteration: 7282
loss: 1.0961374044418335,grad_norm: 0.9999995320810764, iteration: 7283
loss: 1.0975679159164429,grad_norm: 0.9999997107221592, iteration: 7284
loss: 1.1613755226135254,grad_norm: 0.9999997755823018, iteration: 7285
loss: 1.123756766319275,grad_norm: 0.999999555921849, iteration: 7286
loss: 1.0751785039901733,grad_norm: 0.9999995514024684, iteration: 7287
loss: 1.0608927011489868,grad_norm: 0.9999996299949343, iteration: 7288
loss: 1.092875599861145,grad_norm: 0.9999997226246629, iteration: 7289
loss: 1.069705843925476,grad_norm: 0.9999996296175829, iteration: 7290
loss: 1.0686917304992676,grad_norm: 0.9999995919023027, iteration: 7291
loss: 1.0527116060256958,grad_norm: 0.9999995995719207, iteration: 7292
loss: 1.0926165580749512,grad_norm: 0.9999996043439922, iteration: 7293
loss: 1.1486749649047852,grad_norm: 0.99999974365256, iteration: 7294
loss: 1.0965317487716675,grad_norm: 0.9999996021274616, iteration: 7295
loss: 1.1525806188583374,grad_norm: 0.999999791796676, iteration: 7296
loss: 1.1915963888168335,grad_norm: 0.9999997508537192, iteration: 7297
loss: 1.1041254997253418,grad_norm: 0.9999997177123576, iteration: 7298
loss: 1.0851309299468994,grad_norm: 0.9999996007823085, iteration: 7299
loss: 1.0984227657318115,grad_norm: 0.9999997019890732, iteration: 7300
loss: 1.1000531911849976,grad_norm: 0.9999996767832072, iteration: 7301
loss: 1.0491591691970825,grad_norm: 0.9999997028510853, iteration: 7302
loss: 1.167804479598999,grad_norm: 0.9999998974591043, iteration: 7303
loss: 1.076838731765747,grad_norm: 0.9999996217924051, iteration: 7304
loss: 1.02521812915802,grad_norm: 0.9999995161856648, iteration: 7305
loss: 1.0645790100097656,grad_norm: 0.9999996302767373, iteration: 7306
loss: 1.0791655778884888,grad_norm: 0.9999995752080741, iteration: 7307
loss: 1.0740042924880981,grad_norm: 0.9999995485920309, iteration: 7308
loss: 1.0051476955413818,grad_norm: 0.9999996409027236, iteration: 7309
loss: 1.012047290802002,grad_norm: 0.9999993569520047, iteration: 7310
loss: 1.0488746166229248,grad_norm: 0.9999995173757904, iteration: 7311
loss: 0.98046875,grad_norm: 0.9999995961521975, iteration: 7312
loss: 1.1038832664489746,grad_norm: 0.9999996257901563, iteration: 7313
loss: 1.0731174945831299,grad_norm: 0.9999995313529978, iteration: 7314
loss: 1.1787291765213013,grad_norm: 0.9999996541911372, iteration: 7315
loss: 1.1682599782943726,grad_norm: 0.9999996909378686, iteration: 7316
loss: 1.1125849485397339,grad_norm: 0.9999997345298475, iteration: 7317
loss: 1.0402675867080688,grad_norm: 0.9999993792607105, iteration: 7318
loss: 1.054620623588562,grad_norm: 0.9999994841609072, iteration: 7319
loss: 1.0607775449752808,grad_norm: 0.999999662499164, iteration: 7320
loss: 1.1023284196853638,grad_norm: 0.9999996559997004, iteration: 7321
loss: 1.1267975568771362,grad_norm: 0.9999999270137175, iteration: 7322
loss: 1.0629194974899292,grad_norm: 0.9999997364792583, iteration: 7323
loss: 1.1066426038742065,grad_norm: 0.999999711205651, iteration: 7324
loss: 0.9965305328369141,grad_norm: 0.9999994163288424, iteration: 7325
loss: 1.0393781661987305,grad_norm: 0.9999997154019538, iteration: 7326
loss: 1.061463713645935,grad_norm: 0.9999996417023056, iteration: 7327
loss: 1.091523289680481,grad_norm: 0.9999994286396757, iteration: 7328
loss: 1.1075104475021362,grad_norm: 0.9999995856662103, iteration: 7329
loss: 1.0482449531555176,grad_norm: 0.999999532075894, iteration: 7330
loss: 0.9923131465911865,grad_norm: 0.9999996845397164, iteration: 7331
loss: 1.0144076347351074,grad_norm: 0.9999995244693058, iteration: 7332
loss: 1.1148728132247925,grad_norm: 0.9999996867396043, iteration: 7333
loss: 1.1307374238967896,grad_norm: 0.9999997314712561, iteration: 7334
loss: 1.103136420249939,grad_norm: 0.9999996597204107, iteration: 7335
loss: 1.086016058921814,grad_norm: 0.9999998470754294, iteration: 7336
loss: 1.139280915260315,grad_norm: 0.9999997535302104, iteration: 7337
loss: 1.05485999584198,grad_norm: 0.9999995969305145, iteration: 7338
loss: 1.1386735439300537,grad_norm: 0.9999997027541137, iteration: 7339
loss: 1.0811667442321777,grad_norm: 0.9999995744878551, iteration: 7340
loss: 1.1533701419830322,grad_norm: 0.9999997920253462, iteration: 7341
loss: 1.0173934698104858,grad_norm: 0.9999997007459861, iteration: 7342
loss: 1.0979712009429932,grad_norm: 0.9999997857259514, iteration: 7343
loss: 1.061323881149292,grad_norm: 0.9999996411802736, iteration: 7344
loss: 1.0863817930221558,grad_norm: 0.9999996151471308, iteration: 7345
loss: 1.1216548681259155,grad_norm: 0.9999996280993023, iteration: 7346
loss: 1.3426042795181274,grad_norm: 0.9999997743026874, iteration: 7347
loss: 1.0652164220809937,grad_norm: 0.9999997284507726, iteration: 7348
loss: 1.0604606866836548,grad_norm: 0.9999997078162167, iteration: 7349
loss: 1.1235971450805664,grad_norm: 0.9999996623568731, iteration: 7350
loss: 1.1234841346740723,grad_norm: 0.9999997320996213, iteration: 7351
loss: 1.1121827363967896,grad_norm: 0.9999997223944215, iteration: 7352
loss: 1.1205952167510986,grad_norm: 0.9999996715034745, iteration: 7353
loss: 1.039764165878296,grad_norm: 0.9999994565922851, iteration: 7354
loss: 1.0835288763046265,grad_norm: 0.9999995407300664, iteration: 7355
loss: 1.079594373703003,grad_norm: 0.9999997399547592, iteration: 7356
loss: 1.07059907913208,grad_norm: 0.9999997281946182, iteration: 7357
loss: 1.108992576599121,grad_norm: 0.9999996188941785, iteration: 7358
loss: 1.049384593963623,grad_norm: 0.9999995888494084, iteration: 7359
loss: 1.0932132005691528,grad_norm: 0.9999995056724873, iteration: 7360
loss: 1.0296766757965088,grad_norm: 0.9999996614739691, iteration: 7361
loss: 1.016426920890808,grad_norm: 0.9999995671763955, iteration: 7362
loss: 1.0710207223892212,grad_norm: 0.9999996776001615, iteration: 7363
loss: 1.029910683631897,grad_norm: 0.9999993991823799, iteration: 7364
loss: 1.0810627937316895,grad_norm: 0.9999997219362924, iteration: 7365
loss: 1.0463873147964478,grad_norm: 0.9999995504790621, iteration: 7366
loss: 1.1186871528625488,grad_norm: 0.9999996247292728, iteration: 7367
loss: 1.1599183082580566,grad_norm: 0.9999997422165795, iteration: 7368
loss: 1.129684329032898,grad_norm: 0.9999996808745273, iteration: 7369
loss: 1.049600601196289,grad_norm: 0.9999996801402441, iteration: 7370
loss: 1.1631417274475098,grad_norm: 0.9999996951837333, iteration: 7371
loss: 1.1115608215332031,grad_norm: 0.9999997098667208, iteration: 7372
loss: 1.0786443948745728,grad_norm: 0.9999995238361391, iteration: 7373
loss: 1.101439356803894,grad_norm: 0.9999997400970616, iteration: 7374
loss: 1.0404564142227173,grad_norm: 0.9999995458462496, iteration: 7375
loss: 1.1003634929656982,grad_norm: 0.999999574087422, iteration: 7376
loss: 1.0769168138504028,grad_norm: 0.9999996355476953, iteration: 7377
loss: 1.1003729104995728,grad_norm: 0.9999997977740867, iteration: 7378
loss: 1.0313587188720703,grad_norm: 0.9999995256310187, iteration: 7379
loss: 1.1025818586349487,grad_norm: 0.9999996270031768, iteration: 7380
loss: 1.137682318687439,grad_norm: 0.9999996005656101, iteration: 7381
loss: 1.14350163936615,grad_norm: 0.9999997271918868, iteration: 7382
loss: 1.1250461339950562,grad_norm: 0.9999997608586815, iteration: 7383
loss: 1.1217154264450073,grad_norm: 0.999999709218506, iteration: 7384
loss: 1.0466251373291016,grad_norm: 0.9999997070082718, iteration: 7385
loss: 1.1260440349578857,grad_norm: 0.9999996370919342, iteration: 7386
loss: 1.0113036632537842,grad_norm: 0.999999381920277, iteration: 7387
loss: 1.0931317806243896,grad_norm: 0.999999614993064, iteration: 7388
loss: 1.0865780115127563,grad_norm: 0.9999996215413155, iteration: 7389
loss: 1.1390125751495361,grad_norm: 0.999999698806515, iteration: 7390
loss: 1.0529513359069824,grad_norm: 0.9999994798915218, iteration: 7391
loss: 1.0463218688964844,grad_norm: 0.9999994968631127, iteration: 7392
loss: 1.086723804473877,grad_norm: 0.9999996244532178, iteration: 7393
loss: 1.0975470542907715,grad_norm: 0.9999996331636111, iteration: 7394
loss: 1.0743529796600342,grad_norm: 0.999999550922312, iteration: 7395
loss: 1.1599860191345215,grad_norm: 0.9999998016047267, iteration: 7396
loss: 1.0793880224227905,grad_norm: 0.9999995729324396, iteration: 7397
loss: 1.0334947109222412,grad_norm: 0.9999995234223166, iteration: 7398
loss: 1.1385338306427002,grad_norm: 0.9999997431356566, iteration: 7399
loss: 1.1093717813491821,grad_norm: 0.9999995811188769, iteration: 7400
loss: 1.034360408782959,grad_norm: 0.9999994327489287, iteration: 7401
loss: 1.0980768203735352,grad_norm: 0.9999995795033406, iteration: 7402
loss: 1.048871397972107,grad_norm: 0.9999995995664693, iteration: 7403
loss: 1.08058762550354,grad_norm: 0.9999995617405366, iteration: 7404
loss: 1.1475051641464233,grad_norm: 0.9999996021547357, iteration: 7405
loss: 1.1080702543258667,grad_norm: 0.9999996969827418, iteration: 7406
loss: 1.098891258239746,grad_norm: 0.9999996112758721, iteration: 7407
loss: 1.1177234649658203,grad_norm: 0.9999998116590061, iteration: 7408
loss: 1.0642253160476685,grad_norm: 0.9999996564347723, iteration: 7409
loss: 1.123389720916748,grad_norm: 0.9999996289963924, iteration: 7410
loss: 1.074619174003601,grad_norm: 0.9999997407429718, iteration: 7411
loss: 1.0872727632522583,grad_norm: 0.9999997265444328, iteration: 7412
loss: 1.067784070968628,grad_norm: 0.9999995248414304, iteration: 7413
loss: 1.0627063512802124,grad_norm: 0.999999543188284, iteration: 7414
loss: 1.095075249671936,grad_norm: 0.9999996681723824, iteration: 7415
loss: 1.0861371755599976,grad_norm: 0.9999996043794421, iteration: 7416
loss: 1.1251606941223145,grad_norm: 0.9999996838887744, iteration: 7417
loss: 1.1503536701202393,grad_norm: 0.9999996846183766, iteration: 7418
loss: 1.050813913345337,grad_norm: 0.9999995718638885, iteration: 7419
loss: 1.0377905368804932,grad_norm: 0.9999997389688638, iteration: 7420
loss: 1.063050389289856,grad_norm: 0.9999995850932479, iteration: 7421
loss: 1.0944010019302368,grad_norm: 0.9999997585213262, iteration: 7422
loss: 1.1060484647750854,grad_norm: 0.9999996968122407, iteration: 7423
loss: 1.0869848728179932,grad_norm: 0.999999568214805, iteration: 7424
loss: 1.1451138257980347,grad_norm: 0.9999998228246079, iteration: 7425
loss: 1.0258722305297852,grad_norm: 0.9999995009356183, iteration: 7426
loss: 1.050258994102478,grad_norm: 0.9999994273019194, iteration: 7427
loss: 1.1127769947052002,grad_norm: 0.9999997631997706, iteration: 7428
loss: 1.088442325592041,grad_norm: 0.9999997705108865, iteration: 7429
loss: 1.0592397451400757,grad_norm: 0.9999996240096761, iteration: 7430
loss: 1.088908314704895,grad_norm: 0.9999996332249316, iteration: 7431
loss: 1.021937370300293,grad_norm: 0.999999617666789, iteration: 7432
loss: 1.0867849588394165,grad_norm: 0.9999995179819692, iteration: 7433
loss: 1.0963059663772583,grad_norm: 0.9999996788584765, iteration: 7434
loss: 1.1405006647109985,grad_norm: 0.9999998735390517, iteration: 7435
loss: 1.167399525642395,grad_norm: 0.9999997601085993, iteration: 7436
loss: 1.132690191268921,grad_norm: 0.9999996573995453, iteration: 7437
loss: 1.0684199333190918,grad_norm: 0.9999997363895292, iteration: 7438
loss: 1.1155036687850952,grad_norm: 0.9999994918327921, iteration: 7439
loss: 1.04909086227417,grad_norm: 0.9999996392278286, iteration: 7440
loss: 1.0379230976104736,grad_norm: 0.9999996413267727, iteration: 7441
loss: 1.1603230237960815,grad_norm: 0.999999767353485, iteration: 7442
loss: 1.0395442247390747,grad_norm: 0.9999996172544358, iteration: 7443
loss: 1.1128538846969604,grad_norm: 0.9999996065769232, iteration: 7444
loss: 1.0866385698318481,grad_norm: 0.9999995897797812, iteration: 7445
loss: 1.066811442375183,grad_norm: 0.9999997259668413, iteration: 7446
loss: 1.0032966136932373,grad_norm: 0.9999996818977623, iteration: 7447
loss: 1.2153888940811157,grad_norm: 0.9999998520313695, iteration: 7448
loss: 1.0895732641220093,grad_norm: 0.9999998141276669, iteration: 7449
loss: 1.0389560461044312,grad_norm: 0.9999996322644268, iteration: 7450
loss: 1.0679391622543335,grad_norm: 0.9999993589396838, iteration: 7451
loss: 1.0402560234069824,grad_norm: 0.9999997444121691, iteration: 7452
loss: 1.0900568962097168,grad_norm: 0.9999996219665186, iteration: 7453
loss: 1.0711709260940552,grad_norm: 0.9999995810791066, iteration: 7454
loss: 1.1720564365386963,grad_norm: 0.9999996663532243, iteration: 7455
loss: 1.148168921470642,grad_norm: 0.9999998246909289, iteration: 7456
loss: 1.1108647584915161,grad_norm: 0.9999996365934014, iteration: 7457
loss: 1.0976433753967285,grad_norm: 0.9999997239027789, iteration: 7458
loss: 1.121610164642334,grad_norm: 0.9999996954164794, iteration: 7459
loss: 1.0856218338012695,grad_norm: 0.9999995486894876, iteration: 7460
loss: 1.0870264768600464,grad_norm: 0.9999996114935177, iteration: 7461
loss: 1.1058186292648315,grad_norm: 0.999999652758235, iteration: 7462
loss: 1.1415255069732666,grad_norm: 0.9999997351469775, iteration: 7463
loss: 1.0719990730285645,grad_norm: 0.9999996136934615, iteration: 7464
loss: 1.1373255252838135,grad_norm: 0.9999996202424432, iteration: 7465
loss: 1.095060110092163,grad_norm: 0.9999995982862224, iteration: 7466
loss: 1.0677857398986816,grad_norm: 0.9999997363056874, iteration: 7467
loss: 1.0412715673446655,grad_norm: 0.9999996878919672, iteration: 7468
loss: 1.0801795721054077,grad_norm: 0.999999544859191, iteration: 7469
loss: 1.0771480798721313,grad_norm: 0.9999995956297486, iteration: 7470
loss: 1.1384981870651245,grad_norm: 0.9999997926831075, iteration: 7471
loss: 1.1053555011749268,grad_norm: 0.9999996672850123, iteration: 7472
loss: 1.0468350648880005,grad_norm: 0.9999995980626919, iteration: 7473
loss: 1.0338317155838013,grad_norm: 0.9999994800195424, iteration: 7474
loss: 1.0774343013763428,grad_norm: 0.9999996509946982, iteration: 7475
loss: 1.0944327116012573,grad_norm: 0.9999996472052624, iteration: 7476
loss: 1.0734354257583618,grad_norm: 0.9999997078432332, iteration: 7477
loss: 1.0410585403442383,grad_norm: 0.9999995963887854, iteration: 7478
loss: 1.145815134048462,grad_norm: 0.9999996177519028, iteration: 7479
loss: 1.1233259439468384,grad_norm: 0.9999995733246313, iteration: 7480
loss: 1.0770680904388428,grad_norm: 0.9999996473122574, iteration: 7481
loss: 1.0213615894317627,grad_norm: 0.9999995888400741, iteration: 7482
loss: 1.080100178718567,grad_norm: 0.9999994584621704, iteration: 7483
loss: 1.0894222259521484,grad_norm: 0.9999998916229729, iteration: 7484
loss: 1.044586420059204,grad_norm: 0.9999995370416781, iteration: 7485
loss: 1.0966330766677856,grad_norm: 0.9999996607842035, iteration: 7486
loss: 1.040607213973999,grad_norm: 0.9999994608922246, iteration: 7487
loss: 1.083149790763855,grad_norm: 0.9999996448024325, iteration: 7488
loss: 1.1554527282714844,grad_norm: 0.9999997935382747, iteration: 7489
loss: 1.031462550163269,grad_norm: 0.9999996191047912, iteration: 7490
loss: 1.051084041595459,grad_norm: 0.999999553248662, iteration: 7491
loss: 1.1013550758361816,grad_norm: 0.9999997807435752, iteration: 7492
loss: 1.1418895721435547,grad_norm: 0.9999997849496739, iteration: 7493
loss: 1.0532402992248535,grad_norm: 0.9999994507198197, iteration: 7494
loss: 1.1214485168457031,grad_norm: 0.9999996837618038, iteration: 7495
loss: 1.146553635597229,grad_norm: 0.9999995896317871, iteration: 7496
loss: 1.064732551574707,grad_norm: 0.9999996198289898, iteration: 7497
loss: 1.0737673044204712,grad_norm: 0.9999995476613478, iteration: 7498
loss: 1.0419641733169556,grad_norm: 0.999999551594277, iteration: 7499
loss: 1.126322627067566,grad_norm: 0.9999997207557954, iteration: 7500
loss: 1.1559700965881348,grad_norm: 0.9999997826272261, iteration: 7501
loss: 1.0875885486602783,grad_norm: 0.9999996592045951, iteration: 7502
loss: 1.0803766250610352,grad_norm: 0.9999997038689582, iteration: 7503
loss: 1.0760191679000854,grad_norm: 0.9999995732528565, iteration: 7504
loss: 1.0806740522384644,grad_norm: 0.9999996178912623, iteration: 7505
loss: 1.1309852600097656,grad_norm: 0.9999997136008266, iteration: 7506
loss: 1.2249988317489624,grad_norm: 0.9999997390609721, iteration: 7507
loss: 1.0535506010055542,grad_norm: 0.9999998558674442, iteration: 7508
loss: 1.0579801797866821,grad_norm: 0.9999996495026833, iteration: 7509
loss: 1.0515714883804321,grad_norm: 0.9999995836315115, iteration: 7510
loss: 1.0174742937088013,grad_norm: 0.9999995665886948, iteration: 7511
loss: 1.079552173614502,grad_norm: 0.9999996892568409, iteration: 7512
loss: 1.090113878250122,grad_norm: 0.9999996932639289, iteration: 7513
loss: 1.021611213684082,grad_norm: 0.9999996393559653, iteration: 7514
loss: 1.0518404245376587,grad_norm: 0.9999995672268713, iteration: 7515
loss: 1.1286927461624146,grad_norm: 0.999999429888665, iteration: 7516
loss: 1.105515718460083,grad_norm: 0.9999995288464045, iteration: 7517
loss: 1.0868868827819824,grad_norm: 0.9999995695515461, iteration: 7518
loss: 1.034558892250061,grad_norm: 0.9999996050195911, iteration: 7519
loss: 1.1805036067962646,grad_norm: 0.9999998015237835, iteration: 7520
loss: 1.01327383518219,grad_norm: 0.9999996229248171, iteration: 7521
loss: 1.0867280960083008,grad_norm: 0.9999997291977558, iteration: 7522
loss: 1.0091187953948975,grad_norm: 0.9999994753410834, iteration: 7523
loss: 1.1113146543502808,grad_norm: 0.9999996549174828, iteration: 7524
loss: 1.0689377784729004,grad_norm: 0.9999995521830966, iteration: 7525
loss: 1.140355110168457,grad_norm: 0.9999998134010634, iteration: 7526
loss: 1.0736641883850098,grad_norm: 0.999999523542069, iteration: 7527
loss: 1.1156586408615112,grad_norm: 0.9999996145074527, iteration: 7528
loss: 1.1042287349700928,grad_norm: 0.9999995698844515, iteration: 7529
loss: 1.068763256072998,grad_norm: 0.99999963190801, iteration: 7530
loss: 1.0431876182556152,grad_norm: 0.9999995988151856, iteration: 7531
loss: 1.1015256643295288,grad_norm: 0.9999997193084803, iteration: 7532
loss: 1.0753023624420166,grad_norm: 0.9999995614646765, iteration: 7533
loss: 1.0871845483779907,grad_norm: 0.9999997226513788, iteration: 7534
loss: 1.0183476209640503,grad_norm: 0.9999997428663439, iteration: 7535
loss: 1.0761418342590332,grad_norm: 0.9999996083534954, iteration: 7536
loss: 1.00546133518219,grad_norm: 0.999999719444919, iteration: 7537
loss: 1.0694116353988647,grad_norm: 0.9999995763390397, iteration: 7538
loss: 1.0529825687408447,grad_norm: 0.9999997755584137, iteration: 7539
loss: 1.1066091060638428,grad_norm: 0.9999997399116353, iteration: 7540
loss: 1.0394874811172485,grad_norm: 0.9999994705795711, iteration: 7541
loss: 1.1036690473556519,grad_norm: 0.999999840328272, iteration: 7542
loss: 1.0359292030334473,grad_norm: 0.9999997535392862, iteration: 7543
loss: 1.0936113595962524,grad_norm: 0.9999997452677298, iteration: 7544
loss: 1.0652508735656738,grad_norm: 0.9999995757935677, iteration: 7545
loss: 1.1901272535324097,grad_norm: 0.9999998610532121, iteration: 7546
loss: 1.0843698978424072,grad_norm: 0.9999995671009373, iteration: 7547
loss: 0.9865484237670898,grad_norm: 0.9999996586629103, iteration: 7548
loss: 1.0844807624816895,grad_norm: 0.9999994817801949, iteration: 7549
loss: 1.049424171447754,grad_norm: 0.9999995211859839, iteration: 7550
loss: 1.0442043542861938,grad_norm: 0.9999994935098345, iteration: 7551
loss: 1.089316487312317,grad_norm: 0.9999997200208396, iteration: 7552
loss: 1.0433399677276611,grad_norm: 0.9999998569916583, iteration: 7553
loss: 1.0633947849273682,grad_norm: 0.9999997011223848, iteration: 7554
loss: 1.0935059785842896,grad_norm: 0.9999997386693062, iteration: 7555
loss: 1.1079988479614258,grad_norm: 0.9999997218536983, iteration: 7556
loss: 1.0981159210205078,grad_norm: 0.9999997685356905, iteration: 7557
loss: 1.0887136459350586,grad_norm: 0.9999998881111005, iteration: 7558
loss: 1.087462306022644,grad_norm: 0.9999998721905363, iteration: 7559
loss: 1.121170163154602,grad_norm: 0.9999998411760745, iteration: 7560
loss: 1.1308927536010742,grad_norm: 0.9999996107411763, iteration: 7561
loss: 1.097886323928833,grad_norm: 0.9999996860188741, iteration: 7562
loss: 1.06857430934906,grad_norm: 0.999999694315914, iteration: 7563
loss: 1.0907562971115112,grad_norm: 0.9999995014709403, iteration: 7564
loss: 1.1358448266983032,grad_norm: 0.9999998153933872, iteration: 7565
loss: 1.127819538116455,grad_norm: 0.9999996248676742, iteration: 7566
loss: 1.131378173828125,grad_norm: 0.9999999213096017, iteration: 7567
loss: 1.1365524530410767,grad_norm: 0.999999681571385, iteration: 7568
loss: 1.1329383850097656,grad_norm: 0.9999997433122851, iteration: 7569
loss: 1.028823733329773,grad_norm: 0.9999995766224615, iteration: 7570
loss: 1.0379364490509033,grad_norm: 0.999999715373338, iteration: 7571
loss: 1.1584630012512207,grad_norm: 0.9999998559401027, iteration: 7572
loss: 1.0716496706008911,grad_norm: 0.9999994883258806, iteration: 7573
loss: 1.0571731328964233,grad_norm: 0.9999995156264605, iteration: 7574
loss: 1.0831198692321777,grad_norm: 0.9999996084723652, iteration: 7575
loss: 1.075358510017395,grad_norm: 0.9999996612487893, iteration: 7576
loss: 1.1184669733047485,grad_norm: 0.9999996762168183, iteration: 7577
loss: 1.0411598682403564,grad_norm: 0.9999996850465053, iteration: 7578
loss: 1.0577142238616943,grad_norm: 0.9999994061663475, iteration: 7579
loss: 1.0538175106048584,grad_norm: 0.9999997311972146, iteration: 7580
loss: 1.0833377838134766,grad_norm: 0.9999996601263912, iteration: 7581
loss: 1.0940186977386475,grad_norm: 0.9999998734615514, iteration: 7582
loss: 1.0458811521530151,grad_norm: 0.9999994151443007, iteration: 7583
loss: 1.1183969974517822,grad_norm: 0.9999998013066997, iteration: 7584
loss: 1.1459652185440063,grad_norm: 0.9999997544840408, iteration: 7585
loss: 1.1024442911148071,grad_norm: 0.999999593963814, iteration: 7586
loss: 1.1008626222610474,grad_norm: 0.999999738302798, iteration: 7587
loss: 1.0800509452819824,grad_norm: 0.9999996249187171, iteration: 7588
loss: 1.1221983432769775,grad_norm: 0.9999997444644335, iteration: 7589
loss: 1.0745919942855835,grad_norm: 0.9999996458637248, iteration: 7590
loss: 1.1555814743041992,grad_norm: 0.9999997492850148, iteration: 7591
loss: 1.0730605125427246,grad_norm: 0.9999994755072106, iteration: 7592
loss: 1.064822793006897,grad_norm: 0.9999997039500697, iteration: 7593
loss: 1.0764607191085815,grad_norm: 0.9999997474248442, iteration: 7594
loss: 1.077876329421997,grad_norm: 0.9999997623694078, iteration: 7595
loss: 1.0673635005950928,grad_norm: 0.9999995496942484, iteration: 7596
loss: 1.0505229234695435,grad_norm: 0.9999994188230606, iteration: 7597
loss: 1.0984644889831543,grad_norm: 0.9999997531820807, iteration: 7598
loss: 1.0632820129394531,grad_norm: 0.9999997988599226, iteration: 7599
loss: 1.1090288162231445,grad_norm: 0.999999800784771, iteration: 7600
loss: 1.0463762283325195,grad_norm: 0.9999997747772721, iteration: 7601
loss: 1.048671841621399,grad_norm: 0.999999642980998, iteration: 7602
loss: 1.0449124574661255,grad_norm: 0.999999476925786, iteration: 7603
loss: 1.0546176433563232,grad_norm: 0.9999996316798967, iteration: 7604
loss: 1.065617561340332,grad_norm: 0.9999996637963471, iteration: 7605
loss: 1.2381782531738281,grad_norm: 0.9999998244146562, iteration: 7606
loss: 1.0544078350067139,grad_norm: 0.9999997136580852, iteration: 7607
loss: 1.113608479499817,grad_norm: 0.9999998098286467, iteration: 7608
loss: 1.1646333932876587,grad_norm: 0.9999996737748589, iteration: 7609
loss: 1.117067575454712,grad_norm: 0.9999997150569597, iteration: 7610
loss: 1.073018193244934,grad_norm: 0.9999996402577502, iteration: 7611
loss: 1.072790265083313,grad_norm: 0.9999996840803125, iteration: 7612
loss: 1.1413956880569458,grad_norm: 0.9999997429897781, iteration: 7613
loss: 1.0842007398605347,grad_norm: 0.9999997522462499, iteration: 7614
loss: 1.0421199798583984,grad_norm: 0.9999995273785086, iteration: 7615
loss: 1.138956904411316,grad_norm: 0.9999995774809963, iteration: 7616
loss: 1.170042872428894,grad_norm: 0.9999996568839711, iteration: 7617
loss: 0.9560034871101379,grad_norm: 0.999999570799427, iteration: 7618
loss: 1.1233268976211548,grad_norm: 0.9999996081253415, iteration: 7619
loss: 1.091009259223938,grad_norm: 0.9999997695769264, iteration: 7620
loss: 1.09274423122406,grad_norm: 0.9999997003363699, iteration: 7621
loss: 1.0771334171295166,grad_norm: 0.9999996983638701, iteration: 7622
loss: 1.0892207622528076,grad_norm: 0.9999996764659961, iteration: 7623
loss: 1.0571904182434082,grad_norm: 0.9999996995320111, iteration: 7624
loss: 1.1196951866149902,grad_norm: 0.9999998084081193, iteration: 7625
loss: 1.065870761871338,grad_norm: 0.9999995811434954, iteration: 7626
loss: 1.1702834367752075,grad_norm: 0.9999996817950386, iteration: 7627
loss: 0.9955205917358398,grad_norm: 0.9999995446823844, iteration: 7628
loss: 1.0252724885940552,grad_norm: 0.9999995593850262, iteration: 7629
loss: 1.1143897771835327,grad_norm: 0.9999996049732226, iteration: 7630
loss: 1.1021348237991333,grad_norm: 0.9999996246990099, iteration: 7631
loss: 1.0851171016693115,grad_norm: 0.9999995517619831, iteration: 7632
loss: 1.0694382190704346,grad_norm: 0.9999995758309955, iteration: 7633
loss: 1.016772747039795,grad_norm: 0.9999995579819024, iteration: 7634
loss: 1.0884088277816772,grad_norm: 0.9999997473392811, iteration: 7635
loss: 1.0673680305480957,grad_norm: 0.9999995632883762, iteration: 7636
loss: 1.0524271726608276,grad_norm: 0.999999441153391, iteration: 7637
loss: 1.1141271591186523,grad_norm: 0.9999995727714871, iteration: 7638
loss: 1.060018539428711,grad_norm: 0.9999995071834038, iteration: 7639
loss: 1.0112916231155396,grad_norm: 0.9999994291557015, iteration: 7640
loss: 1.0842738151550293,grad_norm: 0.9999994735433823, iteration: 7641
loss: 1.0896189212799072,grad_norm: 0.9999995911579019, iteration: 7642
loss: 1.0191168785095215,grad_norm: 0.9999994286009418, iteration: 7643
loss: 1.1776965856552124,grad_norm: 0.9999998450287209, iteration: 7644
loss: 1.0503833293914795,grad_norm: 0.9999994199972205, iteration: 7645
loss: 1.089747428894043,grad_norm: 0.9999997944167812, iteration: 7646
loss: 1.0486830472946167,grad_norm: 0.9999996371045126, iteration: 7647
loss: 1.1436110734939575,grad_norm: 0.9999996850788406, iteration: 7648
loss: 1.0770214796066284,grad_norm: 0.9999996769025447, iteration: 7649
loss: 1.036405086517334,grad_norm: 0.9999994926775567, iteration: 7650
loss: 1.1061300039291382,grad_norm: 0.9999996061997442, iteration: 7651
loss: 1.0915201902389526,grad_norm: 0.9999995091919439, iteration: 7652
loss: 1.0876014232635498,grad_norm: 0.9999993820620837, iteration: 7653
loss: 1.0327023267745972,grad_norm: 0.9999996586832847, iteration: 7654
loss: 1.0564004182815552,grad_norm: 0.9999996437527696, iteration: 7655
loss: 1.0115091800689697,grad_norm: 0.99999955001002, iteration: 7656
loss: 1.1124347448349,grad_norm: 0.9999997042498611, iteration: 7657
loss: 1.0969386100769043,grad_norm: 0.999999701233941, iteration: 7658
loss: 1.1280385255813599,grad_norm: 0.9999995301099577, iteration: 7659
loss: 1.10243558883667,grad_norm: 0.9999995339359217, iteration: 7660
loss: 1.0265936851501465,grad_norm: 0.9999996352324527, iteration: 7661
loss: 1.0876160860061646,grad_norm: 0.9999996592060827, iteration: 7662
loss: 1.112532615661621,grad_norm: 0.9999997315406589, iteration: 7663
loss: 1.0606728792190552,grad_norm: 0.9999995381600145, iteration: 7664
loss: 1.1030831336975098,grad_norm: 0.9999997138819879, iteration: 7665
loss: 1.1366215944290161,grad_norm: 0.9999997321722442, iteration: 7666
loss: 1.008373737335205,grad_norm: 0.9999995270283678, iteration: 7667
loss: 1.0418003797531128,grad_norm: 0.9999994388397405, iteration: 7668
loss: 1.0297538042068481,grad_norm: 0.9999994999397566, iteration: 7669
loss: 1.0570117235183716,grad_norm: 0.999999582559139, iteration: 7670
loss: 1.0826979875564575,grad_norm: 0.9999996761245976, iteration: 7671
loss: 1.0808104276657104,grad_norm: 0.999999659736578, iteration: 7672
loss: 1.1193052530288696,grad_norm: 0.9999996570587146, iteration: 7673
loss: 1.0592130422592163,grad_norm: 0.9999994536390618, iteration: 7674
loss: 1.1931971311569214,grad_norm: 0.9999996923764084, iteration: 7675
loss: 1.132706880569458,grad_norm: 0.999999650484832, iteration: 7676
loss: 1.0377765893936157,grad_norm: 0.9999995122506925, iteration: 7677
loss: 1.0537066459655762,grad_norm: 0.9999995450421301, iteration: 7678
loss: 1.0289300680160522,grad_norm: 0.999999446425826, iteration: 7679
loss: 1.0618799924850464,grad_norm: 0.999999553286326, iteration: 7680
loss: 1.0687730312347412,grad_norm: 0.9999996284340105, iteration: 7681
loss: 1.039446473121643,grad_norm: 0.9999994071910732, iteration: 7682
loss: 1.020175576210022,grad_norm: 0.9999994913048763, iteration: 7683
loss: 1.0418983697891235,grad_norm: 0.9999995201110669, iteration: 7684
loss: 1.0604798793792725,grad_norm: 0.9999996835995836, iteration: 7685
loss: 1.0672138929367065,grad_norm: 0.999999606864514, iteration: 7686
loss: 1.093113899230957,grad_norm: 0.9999993766387888, iteration: 7687
loss: 1.1096687316894531,grad_norm: 0.9999997583692833, iteration: 7688
loss: 1.1459932327270508,grad_norm: 0.9999996597859552, iteration: 7689
loss: 0.9952710270881653,grad_norm: 0.9999995196062629, iteration: 7690
loss: 1.0430641174316406,grad_norm: 0.9999994339897482, iteration: 7691
loss: 1.1189913749694824,grad_norm: 0.9999996901049911, iteration: 7692
loss: 1.1156638860702515,grad_norm: 0.9999996409077822, iteration: 7693
loss: 1.0922383069992065,grad_norm: 0.9999996399983124, iteration: 7694
loss: 1.1435542106628418,grad_norm: 0.9999997512628053, iteration: 7695
loss: 1.1277254819869995,grad_norm: 0.9999998151780347, iteration: 7696
loss: 1.0783177614212036,grad_norm: 0.9999997493908998, iteration: 7697
loss: 1.1142387390136719,grad_norm: 0.9999996597129608, iteration: 7698
loss: 1.1351302862167358,grad_norm: 0.9999997626316989, iteration: 7699
loss: 1.089011549949646,grad_norm: 0.9999996185852106, iteration: 7700
loss: 1.0515285730361938,grad_norm: 0.9999994991295993, iteration: 7701
loss: 1.0877100229263306,grad_norm: 0.999999688042874, iteration: 7702
loss: 1.074997067451477,grad_norm: 0.999999494215144, iteration: 7703
loss: 0.9892050623893738,grad_norm: 0.9999993181180226, iteration: 7704
loss: 1.1049329042434692,grad_norm: 0.9999995894677329, iteration: 7705
loss: 1.0007429122924805,grad_norm: 0.9999993662617239, iteration: 7706
loss: 1.0400679111480713,grad_norm: 0.9999995198500526, iteration: 7707
loss: 1.1109890937805176,grad_norm: 0.9999996492749156, iteration: 7708
loss: 1.071112871170044,grad_norm: 0.9999996148590957, iteration: 7709
loss: 1.024819016456604,grad_norm: 0.9999994401634931, iteration: 7710
loss: 1.0932855606079102,grad_norm: 0.9999995767583312, iteration: 7711
loss: 1.043596625328064,grad_norm: 0.9999995244091034, iteration: 7712
loss: 1.0751560926437378,grad_norm: 0.9999997208702521, iteration: 7713
loss: 1.1522581577301025,grad_norm: 0.999999756489051, iteration: 7714
loss: 1.0894306898117065,grad_norm: 0.9999994916626181, iteration: 7715
loss: 1.0469051599502563,grad_norm: 0.9999995140898719, iteration: 7716
loss: 1.053907871246338,grad_norm: 0.9999994850987327, iteration: 7717
loss: 1.0976366996765137,grad_norm: 0.9999994443454133, iteration: 7718
loss: 1.119155764579773,grad_norm: 0.9999995878011002, iteration: 7719
loss: 1.0494662523269653,grad_norm: 0.9999996741499318, iteration: 7720
loss: 1.0914572477340698,grad_norm: 0.9999998594408966, iteration: 7721
loss: 1.060632348060608,grad_norm: 0.9999996615691364, iteration: 7722
loss: 1.0268951654434204,grad_norm: 0.9999995598490028, iteration: 7723
loss: 1.021458625793457,grad_norm: 0.9999995520334499, iteration: 7724
loss: 1.0693012475967407,grad_norm: 0.9999997445330122, iteration: 7725
loss: 1.0905388593673706,grad_norm: 0.9999996890070268, iteration: 7726
loss: 1.068669080734253,grad_norm: 0.9999996381760418, iteration: 7727
loss: 1.1063131093978882,grad_norm: 0.999999645044577, iteration: 7728
loss: 1.0867161750793457,grad_norm: 0.9999995977641485, iteration: 7729
loss: 1.1036521196365356,grad_norm: 0.99999982697703, iteration: 7730
loss: 1.0062973499298096,grad_norm: 0.9999996001721116, iteration: 7731
loss: 1.1022008657455444,grad_norm: 0.9999997200104829, iteration: 7732
loss: 1.0797618627548218,grad_norm: 0.999999591470914, iteration: 7733
loss: 1.1150823831558228,grad_norm: 0.9999998104044897, iteration: 7734
loss: 1.0714077949523926,grad_norm: 0.9999995439586283, iteration: 7735
loss: 1.0220707654953003,grad_norm: 0.9999995300868563, iteration: 7736
loss: 1.0799392461776733,grad_norm: 0.9999995297586367, iteration: 7737
loss: 1.1281967163085938,grad_norm: 0.9999996173201309, iteration: 7738
loss: 1.0582352876663208,grad_norm: 0.99999946150621, iteration: 7739
loss: 1.1080783605575562,grad_norm: 0.9999997160951966, iteration: 7740
loss: 1.0891307592391968,grad_norm: 0.9999996944185893, iteration: 7741
loss: 1.0058112144470215,grad_norm: 0.9999994954138406, iteration: 7742
loss: 1.1194732189178467,grad_norm: 0.999999758685639, iteration: 7743
loss: 1.2224608659744263,grad_norm: 0.9999997071137166, iteration: 7744
loss: 1.155320644378662,grad_norm: 0.9999997235619363, iteration: 7745
loss: 1.090592622756958,grad_norm: 0.9999997035461184, iteration: 7746
loss: 1.0439814329147339,grad_norm: 0.9999994856079985, iteration: 7747
loss: 1.049030065536499,grad_norm: 0.999999563671658, iteration: 7748
loss: 1.1149624586105347,grad_norm: 0.9999996596575957, iteration: 7749
loss: 1.06725013256073,grad_norm: 0.9999995250546947, iteration: 7750
loss: 1.1349517107009888,grad_norm: 0.999999691722068, iteration: 7751
loss: 1.0580832958221436,grad_norm: 0.9999997768316096, iteration: 7752
loss: 1.1231178045272827,grad_norm: 0.9999997711397718, iteration: 7753
loss: 1.023057222366333,grad_norm: 0.9999995473788551, iteration: 7754
loss: 1.0352500677108765,grad_norm: 0.9999996189257379, iteration: 7755
loss: 1.1220570802688599,grad_norm: 0.9999994752149743, iteration: 7756
loss: 1.1327029466629028,grad_norm: 0.9999997580606015, iteration: 7757
loss: 1.0594398975372314,grad_norm: 0.9999997538859695, iteration: 7758
loss: 0.9997794032096863,grad_norm: 0.9999995079274321, iteration: 7759
loss: 1.0393174886703491,grad_norm: 0.9999996787240499, iteration: 7760
loss: 1.049622654914856,grad_norm: 0.9999995241335964, iteration: 7761
loss: 1.1244133710861206,grad_norm: 0.99999968807508, iteration: 7762
loss: 1.0674396753311157,grad_norm: 0.999999326334168, iteration: 7763
loss: 1.1301261186599731,grad_norm: 0.9999997040555593, iteration: 7764
loss: 1.0773680210113525,grad_norm: 0.9999998709220485, iteration: 7765
loss: 0.9943764805793762,grad_norm: 0.999999528808258, iteration: 7766
loss: 1.0841974020004272,grad_norm: 0.9999996640197112, iteration: 7767
loss: 1.1606884002685547,grad_norm: 0.9999997747426234, iteration: 7768
loss: 1.1378177404403687,grad_norm: 0.9999997141783584, iteration: 7769
loss: 1.0406244993209839,grad_norm: 0.9999995540520047, iteration: 7770
loss: 1.0796078443527222,grad_norm: 0.9999996877380217, iteration: 7771
loss: 1.099121332168579,grad_norm: 0.9999997035332238, iteration: 7772
loss: 1.117824912071228,grad_norm: 0.9999997097779755, iteration: 7773
loss: 1.0797545909881592,grad_norm: 0.9999997297256887, iteration: 7774
loss: 1.1789710521697998,grad_norm: 0.9999998238071248, iteration: 7775
loss: 1.1024329662322998,grad_norm: 0.999999796656479, iteration: 7776
loss: 1.1154178380966187,grad_norm: 0.9999997233023424, iteration: 7777
loss: 1.0628968477249146,grad_norm: 0.9999995995924806, iteration: 7778
loss: 1.0861724615097046,grad_norm: 0.9999994869651183, iteration: 7779
loss: 1.1450016498565674,grad_norm: 0.9999997132872702, iteration: 7780
loss: 1.0524556636810303,grad_norm: 0.9999996232314229, iteration: 7781
loss: 1.1184377670288086,grad_norm: 0.9999996640691826, iteration: 7782
loss: 1.0427570343017578,grad_norm: 0.9999995655804834, iteration: 7783
loss: 1.0782644748687744,grad_norm: 0.9999997434671837, iteration: 7784
loss: 1.1049174070358276,grad_norm: 0.999999698214654, iteration: 7785
loss: 1.0826927423477173,grad_norm: 0.9999994795972019, iteration: 7786
loss: 1.1180495023727417,grad_norm: 0.9999995751144063, iteration: 7787
loss: 1.0607264041900635,grad_norm: 0.9999997626505266, iteration: 7788
loss: 1.0453118085861206,grad_norm: 0.9999996736904807, iteration: 7789
loss: 1.0917762517929077,grad_norm: 0.9999996602448238, iteration: 7790
loss: 1.121093511581421,grad_norm: 0.9999995188077228, iteration: 7791
loss: 1.0401082038879395,grad_norm: 0.9999996991171953, iteration: 7792
loss: 1.0834956169128418,grad_norm: 0.9999996405087875, iteration: 7793
loss: 1.0844719409942627,grad_norm: 0.9999995570897781, iteration: 7794
loss: 1.0901715755462646,grad_norm: 0.9999997666767237, iteration: 7795
loss: 1.0360171794891357,grad_norm: 0.9999994365895124, iteration: 7796
loss: 1.1001999378204346,grad_norm: 0.9999997470166251, iteration: 7797
loss: 1.0789923667907715,grad_norm: 0.9999998822458791, iteration: 7798
loss: 1.1092103719711304,grad_norm: 0.9999997291066632, iteration: 7799
loss: 1.0061379671096802,grad_norm: 0.999999618209987, iteration: 7800
loss: 1.0756990909576416,grad_norm: 0.9999994586014757, iteration: 7801
loss: 1.1549525260925293,grad_norm: 0.99999960063328, iteration: 7802
loss: 1.056485891342163,grad_norm: 0.9999996051255691, iteration: 7803
loss: 1.096253752708435,grad_norm: 0.9999994981477303, iteration: 7804
loss: 1.021438479423523,grad_norm: 0.9999996465163861, iteration: 7805
loss: 1.0439472198486328,grad_norm: 0.9999995558367728, iteration: 7806
loss: 1.07567298412323,grad_norm: 0.9999994968695767, iteration: 7807
loss: 1.0308175086975098,grad_norm: 0.9999998957609523, iteration: 7808
loss: 1.0286173820495605,grad_norm: 0.9999996962386304, iteration: 7809
loss: 1.0829823017120361,grad_norm: 0.9999998172125231, iteration: 7810
loss: 1.1673718690872192,grad_norm: 0.999999846361868, iteration: 7811
loss: 1.1054043769836426,grad_norm: 0.999999629440332, iteration: 7812
loss: 1.0011438131332397,grad_norm: 0.9999994857629891, iteration: 7813
loss: 0.9929822087287903,grad_norm: 0.9999993530840245, iteration: 7814
loss: 1.0196996927261353,grad_norm: 0.9999997353588674, iteration: 7815
loss: 1.0922698974609375,grad_norm: 0.999999731462476, iteration: 7816
loss: 1.122484564781189,grad_norm: 0.9999995577532617, iteration: 7817
loss: 1.0736621618270874,grad_norm: 0.9999997640198744, iteration: 7818
loss: 1.1263482570648193,grad_norm: 0.9999997368683604, iteration: 7819
loss: 1.0928376913070679,grad_norm: 0.9999995328530336, iteration: 7820
loss: 1.0703157186508179,grad_norm: 0.9999998197168176, iteration: 7821
loss: 1.101786494255066,grad_norm: 0.9999996575512751, iteration: 7822
loss: 1.1565126180648804,grad_norm: 0.9999997225061308, iteration: 7823
loss: 1.0047000646591187,grad_norm: 0.9999995364187382, iteration: 7824
loss: 1.0564192533493042,grad_norm: 0.999999611382324, iteration: 7825
loss: 1.041642427444458,grad_norm: 0.9999993524873017, iteration: 7826
loss: 1.1699599027633667,grad_norm: 0.9999996608602052, iteration: 7827
loss: 1.1586146354675293,grad_norm: 0.9999996977904305, iteration: 7828
loss: 1.059594750404358,grad_norm: 0.999999531427351, iteration: 7829
loss: 1.0748034715652466,grad_norm: 0.9999995158304394, iteration: 7830
loss: 1.0442792177200317,grad_norm: 0.9999994566929582, iteration: 7831
loss: 1.1054481267929077,grad_norm: 0.9999997046428178, iteration: 7832
loss: 1.0719760656356812,grad_norm: 0.999999918438739, iteration: 7833
loss: 1.0997463464736938,grad_norm: 0.9999995534819194, iteration: 7834
loss: 1.0623860359191895,grad_norm: 0.9999994773310811, iteration: 7835
loss: 1.0520806312561035,grad_norm: 0.9999996828531019, iteration: 7836
loss: 1.0763276815414429,grad_norm: 0.9999996336282891, iteration: 7837
loss: 1.1132549047470093,grad_norm: 0.9999997059837558, iteration: 7838
loss: 1.0364762544631958,grad_norm: 0.9999994796913223, iteration: 7839
loss: 1.0538238286972046,grad_norm: 0.9999993612859658, iteration: 7840
loss: 1.0571770668029785,grad_norm: 0.9999997999902764, iteration: 7841
loss: 1.1251033544540405,grad_norm: 0.9999995182752338, iteration: 7842
loss: 1.0588380098342896,grad_norm: 0.9999997552871333, iteration: 7843
loss: 1.0246139764785767,grad_norm: 0.9999995880887868, iteration: 7844
loss: 1.1231192350387573,grad_norm: 0.9999994836343905, iteration: 7845
loss: 1.0659197568893433,grad_norm: 0.9999993263389674, iteration: 7846
loss: 1.1030747890472412,grad_norm: 0.9999997366513309, iteration: 7847
loss: 1.1163134574890137,grad_norm: 0.9999996323363333, iteration: 7848
loss: 1.1164836883544922,grad_norm: 0.999999529720868, iteration: 7849
loss: 1.0852149724960327,grad_norm: 0.9999996031209465, iteration: 7850
loss: 1.1476373672485352,grad_norm: 0.9999997629673418, iteration: 7851
loss: 1.0759556293487549,grad_norm: 0.9999998153668713, iteration: 7852
loss: 1.085620403289795,grad_norm: 0.9999996639315962, iteration: 7853
loss: 1.0286787748336792,grad_norm: 0.9999997658393017, iteration: 7854
loss: 1.0521546602249146,grad_norm: 0.9999997016619432, iteration: 7855
loss: 1.0286086797714233,grad_norm: 0.9999995416112417, iteration: 7856
loss: 1.0753929615020752,grad_norm: 0.9999996452454762, iteration: 7857
loss: 1.0501708984375,grad_norm: 0.9999996094506874, iteration: 7858
loss: 1.0818411111831665,grad_norm: 0.9999995385865682, iteration: 7859
loss: 1.0580651760101318,grad_norm: 0.9999997408676664, iteration: 7860
loss: 1.0658456087112427,grad_norm: 0.9999996210881811, iteration: 7861
loss: 1.155851125717163,grad_norm: 0.9999997061783448, iteration: 7862
loss: 1.1105157136917114,grad_norm: 0.9999995585678249, iteration: 7863
loss: 1.095094084739685,grad_norm: 0.9999999133609379, iteration: 7864
loss: 1.0324174165725708,grad_norm: 0.9999996412074816, iteration: 7865
loss: 1.096010684967041,grad_norm: 0.999999767413325, iteration: 7866
loss: 1.0507123470306396,grad_norm: 0.9999995129189514, iteration: 7867
loss: 1.0221161842346191,grad_norm: 0.9999994434293302, iteration: 7868
loss: 1.091525912284851,grad_norm: 0.9999996456519996, iteration: 7869
loss: 0.9994125962257385,grad_norm: 0.9999995573648525, iteration: 7870
loss: 1.0369564294815063,grad_norm: 0.999999524126923, iteration: 7871
loss: 1.0986591577529907,grad_norm: 0.999999645213888, iteration: 7872
loss: 1.088358998298645,grad_norm: 0.9999995663968702, iteration: 7873
loss: 1.1795331239700317,grad_norm: 0.9999997970456852, iteration: 7874
loss: 1.0428972244262695,grad_norm: 0.9999996975168729, iteration: 7875
loss: 1.053061842918396,grad_norm: 0.9999993765944208, iteration: 7876
loss: 1.0242289304733276,grad_norm: 0.999999469476519, iteration: 7877
loss: 1.1265597343444824,grad_norm: 0.9999995725603041, iteration: 7878
loss: 1.0454732179641724,grad_norm: 0.9999993598745158, iteration: 7879
loss: 0.9695877432823181,grad_norm: 0.9999995569594061, iteration: 7880
loss: 1.0913203954696655,grad_norm: 0.9999995668044095, iteration: 7881
loss: 1.1301565170288086,grad_norm: 0.9999997369352482, iteration: 7882
loss: 1.104382038116455,grad_norm: 0.9999998219447848, iteration: 7883
loss: 1.1077135801315308,grad_norm: 0.9999997574649752, iteration: 7884
loss: 1.1288893222808838,grad_norm: 0.9999996421002619, iteration: 7885
loss: 1.110915184020996,grad_norm: 0.9999996235961504, iteration: 7886
loss: 1.060604453086853,grad_norm: 0.9999994428102629, iteration: 7887
loss: 1.1695352792739868,grad_norm: 0.9999997342661466, iteration: 7888
loss: 1.1407960653305054,grad_norm: 0.9999996796919814, iteration: 7889
loss: 1.0940864086151123,grad_norm: 0.999999617508224, iteration: 7890
loss: 1.1006560325622559,grad_norm: 0.9999998244517089, iteration: 7891
loss: 1.0547382831573486,grad_norm: 0.999999738842163, iteration: 7892
loss: 1.1348832845687866,grad_norm: 0.9999997319662766, iteration: 7893
loss: 1.1087353229522705,grad_norm: 0.9999996387877556, iteration: 7894
loss: 1.1108241081237793,grad_norm: 0.9999996399437479, iteration: 7895
loss: 1.0792800188064575,grad_norm: 0.9999995232076558, iteration: 7896
loss: 1.0402023792266846,grad_norm: 0.9999996040938615, iteration: 7897
loss: 1.0789340734481812,grad_norm: 0.9999994932660794, iteration: 7898
loss: 1.1052124500274658,grad_norm: 0.9999997376488148, iteration: 7899
loss: 1.0411875247955322,grad_norm: 0.9999996481663884, iteration: 7900
loss: 1.010744333267212,grad_norm: 0.9999995626245693, iteration: 7901
loss: 1.1798239946365356,grad_norm: 0.999999558385624, iteration: 7902
loss: 1.0508639812469482,grad_norm: 0.9999995180266413, iteration: 7903
loss: 1.0572720766067505,grad_norm: 0.999999668144537, iteration: 7904
loss: 1.078543782234192,grad_norm: 0.9999995575989853, iteration: 7905
loss: 1.150367259979248,grad_norm: 0.9999997198281142, iteration: 7906
loss: 1.0692955255508423,grad_norm: 0.9999996924848363, iteration: 7907
loss: 1.1456981897354126,grad_norm: 0.9999996730140449, iteration: 7908
loss: 1.1168397665023804,grad_norm: 0.9999996722960073, iteration: 7909
loss: 1.045276403427124,grad_norm: 0.9999995099955966, iteration: 7910
loss: 1.0809669494628906,grad_norm: 0.9999994784575403, iteration: 7911
loss: 1.0744881629943848,grad_norm: 0.9999996570689297, iteration: 7912
loss: 1.0240904092788696,grad_norm: 0.9999993567214117, iteration: 7913
loss: 1.0129176378250122,grad_norm: 0.9999994706035248, iteration: 7914
loss: 1.1211925745010376,grad_norm: 0.9999997279549734, iteration: 7915
loss: 1.1085737943649292,grad_norm: 0.9999996387803135, iteration: 7916
loss: 1.0434387922286987,grad_norm: 0.9999996072296038, iteration: 7917
loss: 1.0700013637542725,grad_norm: 0.9999995103644869, iteration: 7918
loss: 1.104852318763733,grad_norm: 0.9999996391308155, iteration: 7919
loss: 1.0829999446868896,grad_norm: 0.999999466719072, iteration: 7920
loss: 1.081351637840271,grad_norm: 0.999999685378436, iteration: 7921
loss: 1.0456215143203735,grad_norm: 0.9999994709557604, iteration: 7922
loss: 1.0455387830734253,grad_norm: 0.9999996314057334, iteration: 7923
loss: 1.0784533023834229,grad_norm: 0.9999995933659238, iteration: 7924
loss: 1.0394840240478516,grad_norm: 0.9999992944365526, iteration: 7925
loss: 1.077781319618225,grad_norm: 0.9999997985407124, iteration: 7926
loss: 1.0789306163787842,grad_norm: 0.999999627032821, iteration: 7927
loss: 1.0829488039016724,grad_norm: 0.9999997524724928, iteration: 7928
loss: 1.0526552200317383,grad_norm: 0.999999557599739, iteration: 7929
loss: 1.0762089490890503,grad_norm: 0.9999996064427212, iteration: 7930
loss: 1.055825114250183,grad_norm: 0.9999995745989092, iteration: 7931
loss: 1.0489524602890015,grad_norm: 0.9999994106829893, iteration: 7932
loss: 1.1101733446121216,grad_norm: 0.9999998363390052, iteration: 7933
loss: 1.0387346744537354,grad_norm: 0.9999996295489885, iteration: 7934
loss: 1.0505247116088867,grad_norm: 0.9999994661712072, iteration: 7935
loss: 1.0906368494033813,grad_norm: 0.9999998314792438, iteration: 7936
loss: 1.0143662691116333,grad_norm: 0.9999995460585873, iteration: 7937
loss: 1.1940592527389526,grad_norm: 0.9999997690398542, iteration: 7938
loss: 1.1232885122299194,grad_norm: 0.9999996156647282, iteration: 7939
loss: 1.0905723571777344,grad_norm: 0.9999998572435195, iteration: 7940
loss: 1.0524269342422485,grad_norm: 0.9999994002677909, iteration: 7941
loss: 1.1456425189971924,grad_norm: 0.9999997480213999, iteration: 7942
loss: 1.082210898399353,grad_norm: 0.9999995173793785, iteration: 7943
loss: 1.0828137397766113,grad_norm: 0.9999997365619564, iteration: 7944
loss: 1.0533512830734253,grad_norm: 0.9999996272283468, iteration: 7945
loss: 1.0597403049468994,grad_norm: 0.9999997191580795, iteration: 7946
loss: 1.0674504041671753,grad_norm: 0.9999994806403428, iteration: 7947
loss: 1.1394212245941162,grad_norm: 0.9999997605458903, iteration: 7948
loss: 1.0624961853027344,grad_norm: 0.9999997094156297, iteration: 7949
loss: 1.1008806228637695,grad_norm: 0.9999996569088532, iteration: 7950
loss: 1.0138721466064453,grad_norm: 0.9999994878831273, iteration: 7951
loss: 1.0686665773391724,grad_norm: 0.9999995060463903, iteration: 7952
loss: 1.0435692071914673,grad_norm: 0.999999592551075, iteration: 7953
loss: 1.1257351636886597,grad_norm: 0.9999998167558946, iteration: 7954
loss: 1.0560474395751953,grad_norm: 0.9999996731177784, iteration: 7955
loss: 1.0362722873687744,grad_norm: 0.9999996025882557, iteration: 7956
loss: 1.1098262071609497,grad_norm: 0.9999996564656783, iteration: 7957
loss: 1.109133005142212,grad_norm: 0.9999996244205865, iteration: 7958
loss: 1.0857553482055664,grad_norm: 0.9999996600510636, iteration: 7959
loss: 1.0631273984909058,grad_norm: 0.999999551697924, iteration: 7960
loss: 1.0602084398269653,grad_norm: 0.9999995270418095, iteration: 7961
loss: 1.1007862091064453,grad_norm: 0.9999997231930925, iteration: 7962
loss: 1.009335994720459,grad_norm: 0.9999994748872556, iteration: 7963
loss: 1.0813649892807007,grad_norm: 0.9999994719753824, iteration: 7964
loss: 1.0980570316314697,grad_norm: 0.9999995286441616, iteration: 7965
loss: 1.0798436403274536,grad_norm: 0.9999995151103476, iteration: 7966
loss: 1.067931890487671,grad_norm: 0.9999995640556013, iteration: 7967
loss: 1.071250557899475,grad_norm: 0.9999996356627436, iteration: 7968
loss: 1.0877078771591187,grad_norm: 0.9999997369937331, iteration: 7969
loss: 1.1195303201675415,grad_norm: 0.9999996659474747, iteration: 7970
loss: 1.0959510803222656,grad_norm: 0.9999994972370461, iteration: 7971
loss: 1.1136939525604248,grad_norm: 0.9999997021563637, iteration: 7972
loss: 1.0131624937057495,grad_norm: 0.9999993311603973, iteration: 7973
loss: 1.1764329671859741,grad_norm: 0.999999840715033, iteration: 7974
loss: 1.1904858350753784,grad_norm: 0.9999996857938916, iteration: 7975
loss: 1.120588779449463,grad_norm: 0.9999997832459623, iteration: 7976
loss: 1.202929139137268,grad_norm: 0.9999995628830581, iteration: 7977
loss: 1.1699427366256714,grad_norm: 0.9999997865995955, iteration: 7978
loss: 1.069744348526001,grad_norm: 0.9999994906337886, iteration: 7979
loss: 1.0769215822219849,grad_norm: 0.9999997677504465, iteration: 7980
loss: 1.079811692237854,grad_norm: 0.9999996659268935, iteration: 7981
loss: 1.05037522315979,grad_norm: 0.9999993655994617, iteration: 7982
loss: 1.129800796508789,grad_norm: 0.9999996314834758, iteration: 7983
loss: 1.1067453622817993,grad_norm: 0.9999996147274829, iteration: 7984
loss: 1.073153018951416,grad_norm: 0.9999994179982769, iteration: 7985
loss: 1.0565590858459473,grad_norm: 0.9999993530613461, iteration: 7986
loss: 1.0599406957626343,grad_norm: 0.9999994823980175, iteration: 7987
loss: 1.0101479291915894,grad_norm: 0.9999995533461022, iteration: 7988
loss: 1.0508016347885132,grad_norm: 0.9999995537421859, iteration: 7989
loss: 1.0642354488372803,grad_norm: 0.9999994852258655, iteration: 7990
loss: 1.0845093727111816,grad_norm: 0.9999996689908872, iteration: 7991
loss: 1.0355274677276611,grad_norm: 0.999999294251514, iteration: 7992
loss: 1.049782633781433,grad_norm: 0.9999994174957855, iteration: 7993
loss: 1.0869570970535278,grad_norm: 0.9999996640254918, iteration: 7994
loss: 1.0599805116653442,grad_norm: 0.9999993648507526, iteration: 7995
loss: 1.1383213996887207,grad_norm: 0.9999996066359639, iteration: 7996
loss: 1.0668679475784302,grad_norm: 0.9999996072647933, iteration: 7997
loss: 1.0625945329666138,grad_norm: 0.9999997824731093, iteration: 7998
loss: 1.0751879215240479,grad_norm: 0.9999997930425435, iteration: 7999
loss: 1.056806206703186,grad_norm: 0.9999994843308324, iteration: 8000
loss: 1.066975474357605,grad_norm: 0.9999997369990162, iteration: 8001
loss: 1.075282335281372,grad_norm: 0.9999996995526808, iteration: 8002
loss: 1.0363095998764038,grad_norm: 0.9999997438449513, iteration: 8003
loss: 1.21113920211792,grad_norm: 0.9999997185762998, iteration: 8004
loss: 1.055830717086792,grad_norm: 0.9999995691121157, iteration: 8005
loss: 1.027538537979126,grad_norm: 0.9999995204939277, iteration: 8006
loss: 1.0366562604904175,grad_norm: 0.9999994583095875, iteration: 8007
loss: 1.0604816675186157,grad_norm: 0.9999998160336977, iteration: 8008
loss: 1.058548092842102,grad_norm: 0.9999995043757541, iteration: 8009
loss: 1.106845498085022,grad_norm: 0.9999997164816251, iteration: 8010
loss: 1.1214152574539185,grad_norm: 0.999999616748241, iteration: 8011
loss: 1.0519038438796997,grad_norm: 0.999999841447121, iteration: 8012
loss: 1.1072096824645996,grad_norm: 0.9999995394840119, iteration: 8013
loss: 1.0666735172271729,grad_norm: 0.9999995505080592, iteration: 8014
loss: 1.0557245016098022,grad_norm: 0.9999995752743671, iteration: 8015
loss: 1.0584644079208374,grad_norm: 0.9999995188664086, iteration: 8016
loss: 1.1263201236724854,grad_norm: 0.9999996487969302, iteration: 8017
loss: 1.0533955097198486,grad_norm: 0.9999994918787394, iteration: 8018
loss: 1.0439469814300537,grad_norm: 0.9999997739474566, iteration: 8019
loss: 1.1112754344940186,grad_norm: 0.9999996820795541, iteration: 8020
loss: 1.0841400623321533,grad_norm: 0.9999996714826839, iteration: 8021
loss: 1.0329351425170898,grad_norm: 0.9999994252794295, iteration: 8022
loss: 1.0915671586990356,grad_norm: 0.9999997518378965, iteration: 8023
loss: 1.0564066171646118,grad_norm: 0.9999997925501453, iteration: 8024
loss: 1.1084833145141602,grad_norm: 0.9999997001895093, iteration: 8025
loss: 1.0171996355056763,grad_norm: 0.999999361580054, iteration: 8026
loss: 1.1481163501739502,grad_norm: 0.9999997868342992, iteration: 8027
loss: 1.0576319694519043,grad_norm: 0.9999996152044148, iteration: 8028
loss: 1.0691864490509033,grad_norm: 0.99999972696991, iteration: 8029
loss: 1.0784010887145996,grad_norm: 0.9999994731733256, iteration: 8030
loss: 1.1035857200622559,grad_norm: 0.9999995672311934, iteration: 8031
loss: 1.0573853254318237,grad_norm: 0.9999996684358609, iteration: 8032
loss: 1.0204461812973022,grad_norm: 0.9999995351450717, iteration: 8033
loss: 1.076163411140442,grad_norm: 0.9999993207867599, iteration: 8034
loss: 1.0521191358566284,grad_norm: 0.999999325766526, iteration: 8035
loss: 1.0380738973617554,grad_norm: 0.9999994613756144, iteration: 8036
loss: 1.0415891408920288,grad_norm: 0.9999994244294547, iteration: 8037
loss: 1.0658704042434692,grad_norm: 0.9999997369566122, iteration: 8038
loss: 1.0618609189987183,grad_norm: 0.9999995383633019, iteration: 8039
loss: 1.0337352752685547,grad_norm: 0.999999314628595, iteration: 8040
loss: 1.0754488706588745,grad_norm: 0.999999584563326, iteration: 8041
loss: 1.0442386865615845,grad_norm: 0.9999994320787124, iteration: 8042
loss: 1.1462218761444092,grad_norm: 0.9999995677042732, iteration: 8043
loss: 1.0876017808914185,grad_norm: 0.9999995915400118, iteration: 8044
loss: 1.078066349029541,grad_norm: 0.9999995747329203, iteration: 8045
loss: 1.1083561182022095,grad_norm: 0.9999997411093804, iteration: 8046
loss: 1.046960711479187,grad_norm: 0.9999996554387842, iteration: 8047
loss: 1.0475332736968994,grad_norm: 0.9999993979935482, iteration: 8048
loss: 1.000128984451294,grad_norm: 0.9999995433254876, iteration: 8049
loss: 1.1131490468978882,grad_norm: 0.9999996051888749, iteration: 8050
loss: 1.06998610496521,grad_norm: 0.9999998512649149, iteration: 8051
loss: 1.0529651641845703,grad_norm: 0.9999996405863346, iteration: 8052
loss: 1.0538296699523926,grad_norm: 0.9999995675854646, iteration: 8053
loss: 1.143990397453308,grad_norm: 0.9999997698678756, iteration: 8054
loss: 1.0468803644180298,grad_norm: 0.9999994753490528, iteration: 8055
loss: 1.063107967376709,grad_norm: 0.9999993426525015, iteration: 8056
loss: 1.0796780586242676,grad_norm: 0.9999996922664817, iteration: 8057
loss: 1.138721227645874,grad_norm: 0.9999997198350417, iteration: 8058
loss: 1.0849984884262085,grad_norm: 0.9999993582962738, iteration: 8059
loss: 0.9992401599884033,grad_norm: 0.9999993600388434, iteration: 8060
loss: 1.0937745571136475,grad_norm: 0.9999995234758479, iteration: 8061
loss: 1.026242971420288,grad_norm: 0.999999507841164, iteration: 8062
loss: 1.0339462757110596,grad_norm: 0.9999994306094365, iteration: 8063
loss: 1.0722131729125977,grad_norm: 0.9999997104010431, iteration: 8064
loss: 1.0580476522445679,grad_norm: 0.9999993973515052, iteration: 8065
loss: 1.0766996145248413,grad_norm: 0.9999998151442023, iteration: 8066
loss: 1.0577175617218018,grad_norm: 0.9999994228912601, iteration: 8067
loss: 1.0586568117141724,grad_norm: 0.9999995955681918, iteration: 8068
loss: 1.0532723665237427,grad_norm: 0.9999996989285521, iteration: 8069
loss: 1.0786787271499634,grad_norm: 0.9999994398046061, iteration: 8070
loss: 1.0852588415145874,grad_norm: 0.9999994536692656, iteration: 8071
loss: 1.074823021888733,grad_norm: 0.9999993670689167, iteration: 8072
loss: 1.0472824573516846,grad_norm: 0.9999995562745928, iteration: 8073
loss: 1.10535728931427,grad_norm: 0.9999996783795017, iteration: 8074
loss: 1.08785080909729,grad_norm: 0.9999996046113728, iteration: 8075
loss: 1.088514804840088,grad_norm: 0.9999994514451811, iteration: 8076
loss: 1.0824118852615356,grad_norm: 0.9999996238038352, iteration: 8077
loss: 1.0296632051467896,grad_norm: 0.9999994317297212, iteration: 8078
loss: 1.0967453718185425,grad_norm: 0.9999995887336718, iteration: 8079
loss: 1.0753661394119263,grad_norm: 0.9999997452609317, iteration: 8080
loss: 1.0298993587493896,grad_norm: 0.9999995917811947, iteration: 8081
loss: 1.1128188371658325,grad_norm: 0.9999997415240817, iteration: 8082
loss: 1.1037472486495972,grad_norm: 0.9999997475593334, iteration: 8083
loss: 1.069118618965149,grad_norm: 0.999999457618512, iteration: 8084
loss: 0.984646201133728,grad_norm: 0.9999993652956131, iteration: 8085
loss: 1.074975609779358,grad_norm: 0.9999996938386055, iteration: 8086
loss: 1.1090071201324463,grad_norm: 0.9999998757078494, iteration: 8087
loss: 1.0677329301834106,grad_norm: 0.9999997857414223, iteration: 8088
loss: 1.098860502243042,grad_norm: 0.9999997687162789, iteration: 8089
loss: 1.062656283378601,grad_norm: 0.9999997515802617, iteration: 8090
loss: 1.0611146688461304,grad_norm: 0.9999997478967044, iteration: 8091
loss: 1.096479892730713,grad_norm: 0.9999995770941056, iteration: 8092
loss: 1.0898733139038086,grad_norm: 0.9999994871867951, iteration: 8093
loss: 1.0898545980453491,grad_norm: 0.9999994319965542, iteration: 8094
loss: 1.1346529722213745,grad_norm: 0.9999998105296339, iteration: 8095
loss: 1.013002872467041,grad_norm: 0.9999995902244153, iteration: 8096
loss: 0.9920183420181274,grad_norm: 0.999999578133338, iteration: 8097
loss: 1.1242282390594482,grad_norm: 0.9999998106070017, iteration: 8098
loss: 1.1422895193099976,grad_norm: 0.9999997725915017, iteration: 8099
loss: 1.1083307266235352,grad_norm: 0.9999995692161753, iteration: 8100
loss: 1.0658867359161377,grad_norm: 0.9999994875983881, iteration: 8101
loss: 1.065749168395996,grad_norm: 0.9999994224208641, iteration: 8102
loss: 1.1126501560211182,grad_norm: 0.9999998178418528, iteration: 8103
loss: 1.0429518222808838,grad_norm: 0.9999996642113818, iteration: 8104
loss: 1.0327616930007935,grad_norm: 0.9999994398408286, iteration: 8105
loss: 1.0653092861175537,grad_norm: 0.9999995905641764, iteration: 8106
loss: 1.0071840286254883,grad_norm: 0.9999995286223949, iteration: 8107
loss: 1.0369441509246826,grad_norm: 0.999999615741102, iteration: 8108
loss: 1.059026837348938,grad_norm: 0.9999994402380469, iteration: 8109
loss: 1.0538511276245117,grad_norm: 0.9999994372304838, iteration: 8110
loss: 1.1398532390594482,grad_norm: 0.9999997409131774, iteration: 8111
loss: 1.1163973808288574,grad_norm: 0.9999998979234322, iteration: 8112
loss: 1.1010369062423706,grad_norm: 0.9999996031606588, iteration: 8113
loss: 0.9958265423774719,grad_norm: 0.9999995796737443, iteration: 8114
loss: 1.073590874671936,grad_norm: 0.9999994314145193, iteration: 8115
loss: 0.9907914400100708,grad_norm: 0.9999995085049973, iteration: 8116
loss: 1.1005446910858154,grad_norm: 0.9999995091824254, iteration: 8117
loss: 1.0840015411376953,grad_norm: 0.9999995934246318, iteration: 8118
loss: 1.0614702701568604,grad_norm: 0.9999995291151508, iteration: 8119
loss: 1.0297762155532837,grad_norm: 0.9999992991598321, iteration: 8120
loss: 1.0046393871307373,grad_norm: 0.9999993538398227, iteration: 8121
loss: 1.078957200050354,grad_norm: 0.9999996589508074, iteration: 8122
loss: 1.0846532583236694,grad_norm: 0.9999996960239449, iteration: 8123
loss: 1.022989273071289,grad_norm: 0.9999997911463493, iteration: 8124
loss: 1.0368926525115967,grad_norm: 0.9999994537581716, iteration: 8125
loss: 1.0827609300613403,grad_norm: 0.9999995254797082, iteration: 8126
loss: 1.0883567333221436,grad_norm: 0.9999996757343022, iteration: 8127
loss: 1.053708791732788,grad_norm: 0.999999741519717, iteration: 8128
loss: 1.0180881023406982,grad_norm: 0.9999995035684927, iteration: 8129
loss: 1.1135878562927246,grad_norm: 0.999999713742728, iteration: 8130
loss: 1.019026279449463,grad_norm: 0.9999994742187478, iteration: 8131
loss: 1.1125743389129639,grad_norm: 0.99999961854316, iteration: 8132
loss: 1.0810041427612305,grad_norm: 0.9999996736106931, iteration: 8133
loss: 1.1344166994094849,grad_norm: 0.9999997494936529, iteration: 8134
loss: 1.1523889303207397,grad_norm: 0.9999997933019352, iteration: 8135
loss: 1.114272952079773,grad_norm: 0.9999995111747079, iteration: 8136
loss: 0.9750754237174988,grad_norm: 0.9999995016542051, iteration: 8137
loss: 1.0660120248794556,grad_norm: 0.9999994853610593, iteration: 8138
loss: 1.0364631414413452,grad_norm: 0.9999994264841926, iteration: 8139
loss: 1.043631672859192,grad_norm: 0.999999621010032, iteration: 8140
loss: 1.0823777914047241,grad_norm: 0.9999995066383474, iteration: 8141
loss: 1.041741967201233,grad_norm: 0.9999995206401966, iteration: 8142
loss: 1.0225975513458252,grad_norm: 0.9999995317134709, iteration: 8143
loss: 1.040795087814331,grad_norm: 0.9999994982454505, iteration: 8144
loss: 1.1209588050842285,grad_norm: 0.9999997423159643, iteration: 8145
loss: 1.0490025281906128,grad_norm: 0.9999997862393633, iteration: 8146
loss: 1.0384130477905273,grad_norm: 0.9999994467477096, iteration: 8147
loss: 1.0764271020889282,grad_norm: 0.9999999231205161, iteration: 8148
loss: 1.0692201852798462,grad_norm: 0.9999997113890453, iteration: 8149
loss: 1.0121455192565918,grad_norm: 0.9999995314898508, iteration: 8150
loss: 1.080941081047058,grad_norm: 0.9999997965647538, iteration: 8151
loss: 1.043758749961853,grad_norm: 0.9999995062179768, iteration: 8152
loss: 1.0391616821289062,grad_norm: 0.9999996747199191, iteration: 8153
loss: 1.0352421998977661,grad_norm: 0.9999994964784102, iteration: 8154
loss: 1.1091047525405884,grad_norm: 0.9999993693356625, iteration: 8155
loss: 1.1012635231018066,grad_norm: 0.9999994547208735, iteration: 8156
loss: 1.143499493598938,grad_norm: 0.9999996906313005, iteration: 8157
loss: 1.040855884552002,grad_norm: 0.999999470368599, iteration: 8158
loss: 1.0333192348480225,grad_norm: 0.9999995041876893, iteration: 8159
loss: 1.098450779914856,grad_norm: 0.9999996095226488, iteration: 8160
loss: 1.0441124439239502,grad_norm: 0.9999995435263827, iteration: 8161
loss: 1.0878571271896362,grad_norm: 0.9999997552141561, iteration: 8162
loss: 1.0717066526412964,grad_norm: 0.9999997339291583, iteration: 8163
loss: 1.053892970085144,grad_norm: 0.9999997401390125, iteration: 8164
loss: 1.0871825218200684,grad_norm: 0.9999996360835964, iteration: 8165
loss: 1.1672877073287964,grad_norm: 0.9999996605866539, iteration: 8166
loss: 1.0313605070114136,grad_norm: 0.9999993286040895, iteration: 8167
loss: 1.0792508125305176,grad_norm: 0.9999995321322227, iteration: 8168
loss: 1.117680549621582,grad_norm: 0.9999996835721607, iteration: 8169
loss: 1.1134709119796753,grad_norm: 0.9999994592906285, iteration: 8170
loss: 1.0644235610961914,grad_norm: 0.9999993566506628, iteration: 8171
loss: 1.0537872314453125,grad_norm: 0.9999995936556838, iteration: 8172
loss: 1.1264594793319702,grad_norm: 0.9999998456318511, iteration: 8173
loss: 1.0831875801086426,grad_norm: 0.9999993290662769, iteration: 8174
loss: 1.1127862930297852,grad_norm: 0.999999868832269, iteration: 8175
loss: 1.063201665878296,grad_norm: 0.9999994413178795, iteration: 8176
loss: 1.059969186782837,grad_norm: 0.9999992982402377, iteration: 8177
loss: 1.0440254211425781,grad_norm: 0.999999337457059, iteration: 8178
loss: 1.022554636001587,grad_norm: 0.999999508170426, iteration: 8179
loss: 1.0357252359390259,grad_norm: 0.99999959107874, iteration: 8180
loss: 1.065040111541748,grad_norm: 0.9999997104488698, iteration: 8181
loss: 1.134634256362915,grad_norm: 0.99999949582239, iteration: 8182
loss: 1.0942295789718628,grad_norm: 0.999999513146195, iteration: 8183
loss: 1.0846188068389893,grad_norm: 0.9999994957322862, iteration: 8184
loss: 1.0047147274017334,grad_norm: 0.9999994257063918, iteration: 8185
loss: 1.119239330291748,grad_norm: 0.999999737528696, iteration: 8186
loss: 1.0595316886901855,grad_norm: 0.9999996691533376, iteration: 8187
loss: 1.0101513862609863,grad_norm: 0.9999992955679642, iteration: 8188
loss: 1.071286916732788,grad_norm: 0.9999995226812971, iteration: 8189
loss: 1.0504635572433472,grad_norm: 0.9999994214234523, iteration: 8190
loss: 1.0691447257995605,grad_norm: 0.9999996447724785, iteration: 8191
loss: 1.0423601865768433,grad_norm: 0.9999996092397488, iteration: 8192
loss: 1.0433306694030762,grad_norm: 0.999999525385197, iteration: 8193
loss: 1.0670990943908691,grad_norm: 0.9999995543901032, iteration: 8194
loss: 1.0570980310440063,grad_norm: 0.999999843949616, iteration: 8195
loss: 1.1319993734359741,grad_norm: 0.9999996389051343, iteration: 8196
loss: 1.0243302583694458,grad_norm: 0.9999994326244885, iteration: 8197
loss: 1.0525119304656982,grad_norm: 0.9999997970110339, iteration: 8198
loss: 1.0818085670471191,grad_norm: 0.9999995951199883, iteration: 8199
loss: 1.0703428983688354,grad_norm: 0.999999560532467, iteration: 8200
loss: 1.049573302268982,grad_norm: 0.9999996139308136, iteration: 8201
loss: 1.0206162929534912,grad_norm: 0.9999994860146305, iteration: 8202
loss: 1.0817698240280151,grad_norm: 0.9999995528335979, iteration: 8203
loss: 1.0678387880325317,grad_norm: 0.9999996599963578, iteration: 8204
loss: 1.0151046514511108,grad_norm: 0.9999993823265623, iteration: 8205
loss: 1.0727037191390991,grad_norm: 0.9999995375174989, iteration: 8206
loss: 1.1193151473999023,grad_norm: 0.9999996281516214, iteration: 8207
loss: 1.0449246168136597,grad_norm: 0.9999995681555568, iteration: 8208
loss: 1.0484062433242798,grad_norm: 0.9999993520338198, iteration: 8209
loss: 1.0836453437805176,grad_norm: 0.9999995982665587, iteration: 8210
loss: 1.070764422416687,grad_norm: 0.9999995168286323, iteration: 8211
loss: 1.0548627376556396,grad_norm: 0.99999946802292, iteration: 8212
loss: 1.016843318939209,grad_norm: 0.9999993524900851, iteration: 8213
loss: 1.0706725120544434,grad_norm: 0.9999996192222473, iteration: 8214
loss: 1.0471347570419312,grad_norm: 0.9999994959284927, iteration: 8215
loss: 1.0589823722839355,grad_norm: 0.9999994275891945, iteration: 8216
loss: 1.03312087059021,grad_norm: 0.9999993268941889, iteration: 8217
loss: 1.067144513130188,grad_norm: 0.9999995091246773, iteration: 8218
loss: 1.1296789646148682,grad_norm: 0.9999997580160628, iteration: 8219
loss: 1.0349879264831543,grad_norm: 0.9999997178304941, iteration: 8220
loss: 1.0060018301010132,grad_norm: 0.9999994404179884, iteration: 8221
loss: 1.056249976158142,grad_norm: 0.9999994675956693, iteration: 8222
loss: 1.0591332912445068,grad_norm: 0.9999994895687015, iteration: 8223
loss: 1.0928983688354492,grad_norm: 0.9999996853055378, iteration: 8224
loss: 1.1186778545379639,grad_norm: 0.999999727064859, iteration: 8225
loss: 1.123192310333252,grad_norm: 0.9999998506297184, iteration: 8226
loss: 1.0568838119506836,grad_norm: 0.9999995006241549, iteration: 8227
loss: 1.064037799835205,grad_norm: 0.9999997026255041, iteration: 8228
loss: 1.0478078126907349,grad_norm: 0.9999995628511448, iteration: 8229
loss: 1.0325191020965576,grad_norm: 0.9999995949253148, iteration: 8230
loss: 1.1268573999404907,grad_norm: 0.9999996042119939, iteration: 8231
loss: 1.1014798879623413,grad_norm: 0.9999998393173775, iteration: 8232
loss: 1.0928131341934204,grad_norm: 0.9999996471205451, iteration: 8233
loss: 1.0693213939666748,grad_norm: 0.9999994885195479, iteration: 8234
loss: 1.0994391441345215,grad_norm: 0.9999995392176616, iteration: 8235
loss: 1.060150146484375,grad_norm: 0.9999997403908762, iteration: 8236
loss: 1.1423040628433228,grad_norm: 0.9999998451639496, iteration: 8237
loss: 1.0725371837615967,grad_norm: 0.9999994592948814, iteration: 8238
loss: 1.0454845428466797,grad_norm: 0.9999996687725957, iteration: 8239
loss: 1.053495168685913,grad_norm: 0.9999995894947541, iteration: 8240
loss: 1.0469274520874023,grad_norm: 0.9999993931812244, iteration: 8241
loss: 1.1011987924575806,grad_norm: 0.9999997854830333, iteration: 8242
loss: 1.129022240638733,grad_norm: 0.9999998482212039, iteration: 8243
loss: 1.0595757961273193,grad_norm: 0.9999997263009525, iteration: 8244
loss: 1.0060189962387085,grad_norm: 0.9999994258415958, iteration: 8245
loss: 1.0629273653030396,grad_norm: 0.9999999197531663, iteration: 8246
loss: 1.1362013816833496,grad_norm: 0.9999997500055909, iteration: 8247
loss: 1.1133464574813843,grad_norm: 0.999999622475263, iteration: 8248
loss: 1.0544568300247192,grad_norm: 0.9999993681155643, iteration: 8249
loss: 1.1305875778198242,grad_norm: 0.9999996497309855, iteration: 8250
loss: 1.0586522817611694,grad_norm: 0.9999996112572496, iteration: 8251
loss: 1.1186586618423462,grad_norm: 0.9999996669398444, iteration: 8252
loss: 1.0403616428375244,grad_norm: 0.9999994873955199, iteration: 8253
loss: 1.0305708646774292,grad_norm: 0.9999993049341446, iteration: 8254
loss: 1.0481410026550293,grad_norm: 0.9999994810953352, iteration: 8255
loss: 1.0077836513519287,grad_norm: 0.9999994617738129, iteration: 8256
loss: 1.0604593753814697,grad_norm: 0.9999995219456999, iteration: 8257
loss: 1.1836339235305786,grad_norm: 0.9999997579858039, iteration: 8258
loss: 1.0571300983428955,grad_norm: 0.9999995652610586, iteration: 8259
loss: 1.0631102323532104,grad_norm: 0.9999996280597043, iteration: 8260
loss: 1.0856910943984985,grad_norm: 0.9999994731297762, iteration: 8261
loss: 1.0285696983337402,grad_norm: 0.9999995528750569, iteration: 8262
loss: 1.0750819444656372,grad_norm: 0.9999997439016879, iteration: 8263
loss: 1.1295825242996216,grad_norm: 0.9999994877042606, iteration: 8264
loss: 1.0730831623077393,grad_norm: 0.9999994369751287, iteration: 8265
loss: 1.0679607391357422,grad_norm: 0.9999995772940293, iteration: 8266
loss: 1.0781813859939575,grad_norm: 0.9999998387936921, iteration: 8267
loss: 0.9980525970458984,grad_norm: 0.9999996795193175, iteration: 8268
loss: 1.0867153406143188,grad_norm: 0.9999996011567015, iteration: 8269
loss: 1.0640919208526611,grad_norm: 0.999999429692021, iteration: 8270
loss: 1.0831104516983032,grad_norm: 0.9999993875041138, iteration: 8271
loss: 1.0474669933319092,grad_norm: 0.9999998016475558, iteration: 8272
loss: 1.0887603759765625,grad_norm: 0.9999996058351189, iteration: 8273
loss: 1.0791391134262085,grad_norm: 0.9999995965795762, iteration: 8274
loss: 1.143467903137207,grad_norm: 0.9999998005355714, iteration: 8275
loss: 1.1220680475234985,grad_norm: 0.999999640040206, iteration: 8276
loss: 1.1019034385681152,grad_norm: 0.999999559328747, iteration: 8277
loss: 1.0424667596817017,grad_norm: 0.9999995107131104, iteration: 8278
loss: 1.084198236465454,grad_norm: 0.9999996639211503, iteration: 8279
loss: 1.1406553983688354,grad_norm: 0.999999613822809, iteration: 8280
loss: 1.01457679271698,grad_norm: 0.9999994560627519, iteration: 8281
loss: 1.121315360069275,grad_norm: 0.9999997871600846, iteration: 8282
loss: 1.0803554058074951,grad_norm: 0.9999994109987346, iteration: 8283
loss: 1.1009703874588013,grad_norm: 0.9999995931435625, iteration: 8284
loss: 1.09541654586792,grad_norm: 0.9999995189310026, iteration: 8285
loss: 1.1531720161437988,grad_norm: 0.9999998348781308, iteration: 8286
loss: 1.0850231647491455,grad_norm: 0.9999995838376904, iteration: 8287
loss: 1.060779094696045,grad_norm: 0.9999995601668878, iteration: 8288
loss: 1.0664644241333008,grad_norm: 0.999999501417083, iteration: 8289
loss: 1.081505537033081,grad_norm: 0.9999997070719725, iteration: 8290
loss: 1.044114112854004,grad_norm: 0.999999617706692, iteration: 8291
loss: 1.0144925117492676,grad_norm: 0.9999994086559411, iteration: 8292
loss: 1.0579088926315308,grad_norm: 0.9999997156695603, iteration: 8293
loss: 1.0449250936508179,grad_norm: 0.9999997479884534, iteration: 8294
loss: 1.1027624607086182,grad_norm: 0.9999997221107917, iteration: 8295
loss: 1.0383166074752808,grad_norm: 0.999999479295599, iteration: 8296
loss: 1.043515920639038,grad_norm: 0.9999994234494656, iteration: 8297
loss: 1.030837059020996,grad_norm: 0.9999994492508062, iteration: 8298
loss: 1.0619992017745972,grad_norm: 0.9999994866856052, iteration: 8299
loss: 1.0687885284423828,grad_norm: 0.9999995707560053, iteration: 8300
loss: 1.0966218709945679,grad_norm: 0.9999994310658399, iteration: 8301
loss: 1.1006075143814087,grad_norm: 0.9999996240371686, iteration: 8302
loss: 1.050647258758545,grad_norm: 0.9999994668231648, iteration: 8303
loss: 1.0485291481018066,grad_norm: 0.999999486461097, iteration: 8304
loss: 1.0688170194625854,grad_norm: 0.999999481361787, iteration: 8305
loss: 1.029283046722412,grad_norm: 0.9999995201552997, iteration: 8306
loss: 1.0732961893081665,grad_norm: 0.9999995376169242, iteration: 8307
loss: 1.0925016403198242,grad_norm: 0.9999995173590781, iteration: 8308
loss: 1.0437273979187012,grad_norm: 0.9999995429177272, iteration: 8309
loss: 1.1134692430496216,grad_norm: 0.9999996972010872, iteration: 8310
loss: 1.1263834238052368,grad_norm: 0.9999997219954402, iteration: 8311
loss: 1.0825270414352417,grad_norm: 0.9999995984247866, iteration: 8312
loss: 1.126039743423462,grad_norm: 0.9999996345994299, iteration: 8313
loss: 1.0604209899902344,grad_norm: 0.9999993663596586, iteration: 8314
loss: 1.05646812915802,grad_norm: 0.9999993816448549, iteration: 8315
loss: 1.0593775510787964,grad_norm: 0.9999996574383851, iteration: 8316
loss: 1.0517853498458862,grad_norm: 0.9999995355321665, iteration: 8317
loss: 0.9970847964286804,grad_norm: 0.9999993033578992, iteration: 8318
loss: 1.057464361190796,grad_norm: 0.999999512937511, iteration: 8319
loss: 1.0472110509872437,grad_norm: 0.9999995948398538, iteration: 8320
loss: 1.0520461797714233,grad_norm: 0.9999993683170739, iteration: 8321
loss: 1.1605730056762695,grad_norm: 0.9999997993944996, iteration: 8322
loss: 1.1507607698440552,grad_norm: 0.99999964464951, iteration: 8323
loss: 1.1050761938095093,grad_norm: 0.9999996517016619, iteration: 8324
loss: 1.0758001804351807,grad_norm: 0.9999994786651335, iteration: 8325
loss: 1.0129798650741577,grad_norm: 0.9999994167260581, iteration: 8326
loss: 1.1039531230926514,grad_norm: 0.9999994329663584, iteration: 8327
loss: 1.0989538431167603,grad_norm: 0.999999766707547, iteration: 8328
loss: 1.0451023578643799,grad_norm: 0.9999998899626473, iteration: 8329
loss: 1.0354715585708618,grad_norm: 0.999999539900529, iteration: 8330
loss: 1.0329139232635498,grad_norm: 0.9999994976078873, iteration: 8331
loss: 1.0316507816314697,grad_norm: 0.9999994589180177, iteration: 8332
loss: 1.1141780614852905,grad_norm: 0.99999967582636, iteration: 8333
loss: 1.0978268384933472,grad_norm: 0.9999996213432887, iteration: 8334
loss: 1.0660674571990967,grad_norm: 0.9999995146288128, iteration: 8335
loss: 1.1172761917114258,grad_norm: 0.999999668671677, iteration: 8336
loss: 1.0668601989746094,grad_norm: 0.9999994627583484, iteration: 8337
loss: 1.0868213176727295,grad_norm: 0.9999995141425309, iteration: 8338
loss: 1.0045812129974365,grad_norm: 0.999999270540784, iteration: 8339
loss: 1.0235826969146729,grad_norm: 0.999999619261161, iteration: 8340
loss: 1.0671186447143555,grad_norm: 0.9999997445553128, iteration: 8341
loss: 0.9833968877792358,grad_norm: 0.9999994408454637, iteration: 8342
loss: 1.042199730873108,grad_norm: 0.9999995283387692, iteration: 8343
loss: 1.0405150651931763,grad_norm: 0.9999995344543596, iteration: 8344
loss: 1.0608526468276978,grad_norm: 0.9999996832748006, iteration: 8345
loss: 1.0987411737442017,grad_norm: 0.9999996054738584, iteration: 8346
loss: 1.0341699123382568,grad_norm: 0.9999995147199994, iteration: 8347
loss: 1.0974589586257935,grad_norm: 0.999999756848904, iteration: 8348
loss: 1.106742024421692,grad_norm: 0.9999994185344919, iteration: 8349
loss: 1.0797619819641113,grad_norm: 0.9999998111737576, iteration: 8350
loss: 1.0757291316986084,grad_norm: 0.9999993596874454, iteration: 8351
loss: 0.9955759048461914,grad_norm: 0.9999995354068005, iteration: 8352
loss: 1.0503090620040894,grad_norm: 0.9999996434285777, iteration: 8353
loss: 1.058922529220581,grad_norm: 0.9999994722538174, iteration: 8354
loss: 1.1192039251327515,grad_norm: 0.9999996468063809, iteration: 8355
loss: 1.0793101787567139,grad_norm: 0.9999994512449414, iteration: 8356
loss: 1.1000629663467407,grad_norm: 0.9999997136440669, iteration: 8357
loss: 1.0719417333602905,grad_norm: 0.999999572907004, iteration: 8358
loss: 1.1100724935531616,grad_norm: 0.9999993379377747, iteration: 8359
loss: 1.0666606426239014,grad_norm: 0.9999995248131877, iteration: 8360
loss: 1.0790468454360962,grad_norm: 0.9999992125222382, iteration: 8361
loss: 1.116439700126648,grad_norm: 0.9999997118877649, iteration: 8362
loss: 1.028001070022583,grad_norm: 0.9999996756521569, iteration: 8363
loss: 1.0625125169754028,grad_norm: 0.9999994921616098, iteration: 8364
loss: 1.0843267440795898,grad_norm: 0.9999996651293962, iteration: 8365
loss: 1.0732003450393677,grad_norm: 0.9999994229277409, iteration: 8366
loss: 1.0606800317764282,grad_norm: 0.9999994770831058, iteration: 8367
loss: 1.0796500444412231,grad_norm: 0.9999995314749538, iteration: 8368
loss: 1.104972004890442,grad_norm: 0.9999995955345321, iteration: 8369
loss: 1.0838189125061035,grad_norm: 0.9999995615354819, iteration: 8370
loss: 1.0548205375671387,grad_norm: 0.9999995142824804, iteration: 8371
loss: 1.1271787881851196,grad_norm: 0.99999979763459, iteration: 8372
loss: 1.0635349750518799,grad_norm: 0.999999737067044, iteration: 8373
loss: 1.0042157173156738,grad_norm: 0.99999971814529, iteration: 8374
loss: 1.0951650142669678,grad_norm: 0.9999996842806048, iteration: 8375
loss: 1.067578911781311,grad_norm: 0.9999993758451057, iteration: 8376
loss: 1.068822979927063,grad_norm: 0.999999469661751, iteration: 8377
loss: 1.0085358619689941,grad_norm: 0.9999993299835859, iteration: 8378
loss: 1.0645250082015991,grad_norm: 0.9999993663521813, iteration: 8379
loss: 1.117462396621704,grad_norm: 0.9999997016462918, iteration: 8380
loss: 1.0311249494552612,grad_norm: 0.9999996628339175, iteration: 8381
loss: 1.0528367757797241,grad_norm: 0.9999995093822903, iteration: 8382
loss: 1.1425695419311523,grad_norm: 0.9999997492713101, iteration: 8383
loss: 1.0807383060455322,grad_norm: 0.9999994663269748, iteration: 8384
loss: 0.9860502481460571,grad_norm: 0.9999995558995254, iteration: 8385
loss: 1.010446548461914,grad_norm: 0.9999993543608681, iteration: 8386
loss: 1.0893495082855225,grad_norm: 0.9999996034334813, iteration: 8387
loss: 1.0977439880371094,grad_norm: 0.9999996892042918, iteration: 8388
loss: 1.0826151371002197,grad_norm: 0.9999997190493009, iteration: 8389
loss: 1.0674173831939697,grad_norm: 0.9999996892671706, iteration: 8390
loss: 1.0254324674606323,grad_norm: 0.9999995750161286, iteration: 8391
loss: 1.1814054250717163,grad_norm: 0.9999997953979329, iteration: 8392
loss: 1.0402418375015259,grad_norm: 0.9999997092437283, iteration: 8393
loss: 1.0056191682815552,grad_norm: 0.9999993521293904, iteration: 8394
loss: 1.118021845817566,grad_norm: 0.9999995112163805, iteration: 8395
loss: 1.0526803731918335,grad_norm: 0.9999994747668813, iteration: 8396
loss: 1.0960763692855835,grad_norm: 0.9999994021959269, iteration: 8397
loss: 1.0204339027404785,grad_norm: 0.9999994315765655, iteration: 8398
loss: 1.0589489936828613,grad_norm: 0.9999995739708526, iteration: 8399
loss: 1.029453158378601,grad_norm: 0.9999996381983237, iteration: 8400
loss: 1.1827198266983032,grad_norm: 0.9999998096054736, iteration: 8401
loss: 1.0595910549163818,grad_norm: 0.9999997382078429, iteration: 8402
loss: 1.08843994140625,grad_norm: 0.9999996331728325, iteration: 8403
loss: 1.0702821016311646,grad_norm: 0.9999995077094462, iteration: 8404
loss: 1.0705732107162476,grad_norm: 0.9999996693730809, iteration: 8405
loss: 1.028667688369751,grad_norm: 0.9999995697742823, iteration: 8406
loss: 1.0396623611450195,grad_norm: 0.9999992904996146, iteration: 8407
loss: 1.1215639114379883,grad_norm: 0.9999996333914882, iteration: 8408
loss: 1.1278023719787598,grad_norm: 0.9999998412083146, iteration: 8409
loss: 1.037243127822876,grad_norm: 0.9999992985750205, iteration: 8410
loss: 1.06813645362854,grad_norm: 0.9999996224953505, iteration: 8411
loss: 1.0644254684448242,grad_norm: 0.999999596447687, iteration: 8412
loss: 1.0416713953018188,grad_norm: 0.9999995196356395, iteration: 8413
loss: 1.1097381114959717,grad_norm: 0.9999998439755463, iteration: 8414
loss: 1.0546473264694214,grad_norm: 0.9999996344444406, iteration: 8415
loss: 1.0899564027786255,grad_norm: 0.9999995654469459, iteration: 8416
loss: 1.126206874847412,grad_norm: 0.9999996071825893, iteration: 8417
loss: 1.069387435913086,grad_norm: 0.9999996891422839, iteration: 8418
loss: 1.03045654296875,grad_norm: 0.9999994269144031, iteration: 8419
loss: 1.0884290933609009,grad_norm: 0.9999995776494676, iteration: 8420
loss: 1.0288022756576538,grad_norm: 0.9999996109628108, iteration: 8421
loss: 1.104656457901001,grad_norm: 0.9999996792628902, iteration: 8422
loss: 1.0544954538345337,grad_norm: 0.9999996230891144, iteration: 8423
loss: 1.0936304330825806,grad_norm: 0.999999396856708, iteration: 8424
loss: 1.088280439376831,grad_norm: 0.9999994188473078, iteration: 8425
loss: 1.0285853147506714,grad_norm: 0.9999997980838997, iteration: 8426
loss: 1.044045329093933,grad_norm: 0.9999995332570202, iteration: 8427
loss: 1.019176959991455,grad_norm: 0.9999992794075523, iteration: 8428
loss: 1.0259501934051514,grad_norm: 0.9999997494241329, iteration: 8429
loss: 1.0460528135299683,grad_norm: 0.9999993583021154, iteration: 8430
loss: 1.002884030342102,grad_norm: 0.9999995209040988, iteration: 8431
loss: 1.016206979751587,grad_norm: 0.9999997210564747, iteration: 8432
loss: 1.035234808921814,grad_norm: 0.9999994729101094, iteration: 8433
loss: 1.1119176149368286,grad_norm: 0.999999776027768, iteration: 8434
loss: 1.0318732261657715,grad_norm: 0.9999994244858442, iteration: 8435
loss: 1.0865633487701416,grad_norm: 0.9999994803011717, iteration: 8436
loss: 1.0968235731124878,grad_norm: 0.9999994230882933, iteration: 8437
loss: 1.0294028520584106,grad_norm: 0.9999993647835117, iteration: 8438
loss: 1.1154531240463257,grad_norm: 0.9999997753509611, iteration: 8439
loss: 1.0436393022537231,grad_norm: 0.9999996172861743, iteration: 8440
loss: 1.0904489755630493,grad_norm: 0.9999993880405355, iteration: 8441
loss: 1.0615177154541016,grad_norm: 0.9999995290096847, iteration: 8442
loss: 1.0221421718597412,grad_norm: 0.9999996011584725, iteration: 8443
loss: 1.0180257558822632,grad_norm: 0.9999992748251189, iteration: 8444
loss: 1.0975884199142456,grad_norm: 0.9999995790768114, iteration: 8445
loss: 1.0168819427490234,grad_norm: 0.9999995721412515, iteration: 8446
loss: 1.09145987033844,grad_norm: 0.999999643603618, iteration: 8447
loss: 1.0441786050796509,grad_norm: 0.9999992050830779, iteration: 8448
loss: 1.1079787015914917,grad_norm: 0.9999995542780558, iteration: 8449
loss: 1.0722975730895996,grad_norm: 0.9999996427955263, iteration: 8450
loss: 1.0491610765457153,grad_norm: 0.9999995271479202, iteration: 8451
loss: 1.0502276420593262,grad_norm: 0.9999994466759785, iteration: 8452
loss: 0.997317910194397,grad_norm: 0.9999997392814725, iteration: 8453
loss: 1.1339709758758545,grad_norm: 0.9999998368107426, iteration: 8454
loss: 1.0491549968719482,grad_norm: 0.9999995867335896, iteration: 8455
loss: 1.0290828943252563,grad_norm: 0.9999993436078208, iteration: 8456
loss: 1.0814825296401978,grad_norm: 0.9999998168170912, iteration: 8457
loss: 1.1189872026443481,grad_norm: 0.99999963493098, iteration: 8458
loss: 1.0966700315475464,grad_norm: 0.99999979914671, iteration: 8459
loss: 1.2106049060821533,grad_norm: 0.9999999503034888, iteration: 8460
loss: 1.1048463582992554,grad_norm: 0.9999997083859689, iteration: 8461
loss: 1.0904690027236938,grad_norm: 0.9999996882177425, iteration: 8462
loss: 1.0827736854553223,grad_norm: 0.9999994753398046, iteration: 8463
loss: 1.0643043518066406,grad_norm: 0.9999996320813573, iteration: 8464
loss: 1.042873740196228,grad_norm: 0.9999997110280653, iteration: 8465
loss: 1.0243841409683228,grad_norm: 0.999999519287648, iteration: 8466
loss: 1.077706217765808,grad_norm: 0.9999997442703966, iteration: 8467
loss: 1.1251306533813477,grad_norm: 0.9999996169418983, iteration: 8468
loss: 1.0761898756027222,grad_norm: 0.9999995802112794, iteration: 8469
loss: 1.0570625066757202,grad_norm: 0.999999663837256, iteration: 8470
loss: 1.0612468719482422,grad_norm: 0.9999995734002922, iteration: 8471
loss: 1.0550026893615723,grad_norm: 0.9999993946238663, iteration: 8472
loss: 1.0722886323928833,grad_norm: 0.9999995485924946, iteration: 8473
loss: 1.0687335729599,grad_norm: 0.9999995638677385, iteration: 8474
loss: 1.0320580005645752,grad_norm: 0.9999996072992414, iteration: 8475
loss: 1.0059401988983154,grad_norm: 0.9999993888922779, iteration: 8476
loss: 1.0920493602752686,grad_norm: 0.9999994109573201, iteration: 8477
loss: 1.0425201654434204,grad_norm: 0.9999994521899891, iteration: 8478
loss: 1.0417447090148926,grad_norm: 0.9999994393903111, iteration: 8479
loss: 1.0595688819885254,grad_norm: 0.9999997265808659, iteration: 8480
loss: 1.0457854270935059,grad_norm: 0.9999994751660662, iteration: 8481
loss: 1.0807603597640991,grad_norm: 0.9999992415207168, iteration: 8482
loss: 1.1102920770645142,grad_norm: 0.9999994661479975, iteration: 8483
loss: 1.0454860925674438,grad_norm: 0.9999996480922149, iteration: 8484
loss: 1.051787257194519,grad_norm: 0.9999994063688246, iteration: 8485
loss: 1.1467936038970947,grad_norm: 0.9999997775806451, iteration: 8486
loss: 1.010162353515625,grad_norm: 0.9999993795820523, iteration: 8487
loss: 1.046077847480774,grad_norm: 0.9999995720283056, iteration: 8488
loss: 1.1364765167236328,grad_norm: 0.9999996940489317, iteration: 8489
loss: 1.0689120292663574,grad_norm: 0.9999996583336015, iteration: 8490
loss: 1.0188465118408203,grad_norm: 0.9999993869254626, iteration: 8491
loss: 1.0645393133163452,grad_norm: 0.9999996061926216, iteration: 8492
loss: 1.0604780912399292,grad_norm: 0.9999995414850679, iteration: 8493
loss: 1.0813440084457397,grad_norm: 0.999999628964786, iteration: 8494
loss: 1.0693804025650024,grad_norm: 0.9999995406835802, iteration: 8495
loss: 1.144558310508728,grad_norm: 0.9999998452814215, iteration: 8496
loss: 1.075873613357544,grad_norm: 0.9999996429036158, iteration: 8497
loss: 1.1350507736206055,grad_norm: 0.9999996644597209, iteration: 8498
loss: 1.0643268823623657,grad_norm: 0.9999996385745162, iteration: 8499
loss: 1.0639142990112305,grad_norm: 0.9999995300518193, iteration: 8500
loss: 1.0549616813659668,grad_norm: 0.999999440458154, iteration: 8501
loss: 1.0389765501022339,grad_norm: 0.9999995051534197, iteration: 8502
loss: 1.0529258251190186,grad_norm: 0.9999995323181162, iteration: 8503
loss: 1.1198203563690186,grad_norm: 0.9999996899963342, iteration: 8504
loss: 1.1065220832824707,grad_norm: 0.9999996445637088, iteration: 8505
loss: 1.0575156211853027,grad_norm: 0.9999998499609714, iteration: 8506
loss: 1.1141248941421509,grad_norm: 0.9999996569084886, iteration: 8507
loss: 1.0448694229125977,grad_norm: 0.9999996231273676, iteration: 8508
loss: 1.0598639249801636,grad_norm: 0.9999997191021737, iteration: 8509
loss: 1.0721015930175781,grad_norm: 0.999999363579895, iteration: 8510
loss: 1.1251037120819092,grad_norm: 0.9999996889233461, iteration: 8511
loss: 1.047624945640564,grad_norm: 0.9999995231026332, iteration: 8512
loss: 1.013346791267395,grad_norm: 0.9999996035273965, iteration: 8513
loss: 1.0408406257629395,grad_norm: 0.9999994268038712, iteration: 8514
loss: 1.0397990942001343,grad_norm: 0.999999514566521, iteration: 8515
loss: 1.103654146194458,grad_norm: 0.9999995038718213, iteration: 8516
loss: 1.069121241569519,grad_norm: 0.9999997123190074, iteration: 8517
loss: 1.0906798839569092,grad_norm: 0.9999995169979943, iteration: 8518
loss: 1.0691167116165161,grad_norm: 0.9999995199176469, iteration: 8519
loss: 1.0646100044250488,grad_norm: 0.9999992762146206, iteration: 8520
loss: 1.090743064880371,grad_norm: 0.9999998169877672, iteration: 8521
loss: 1.016991376876831,grad_norm: 0.9999994857638973, iteration: 8522
loss: 1.0322799682617188,grad_norm: 0.9999995687512715, iteration: 8523
loss: 1.057672142982483,grad_norm: 0.9999993745843125, iteration: 8524
loss: 1.074050784111023,grad_norm: 0.9999993453909582, iteration: 8525
loss: 1.0210283994674683,grad_norm: 0.9999993233744856, iteration: 8526
loss: 1.0688984394073486,grad_norm: 0.9999996431639868, iteration: 8527
loss: 1.0600248575210571,grad_norm: 0.9999995359783934, iteration: 8528
loss: 1.0535484552383423,grad_norm: 0.9999995336806328, iteration: 8529
loss: 1.0949031114578247,grad_norm: 0.9999993970003621, iteration: 8530
loss: 1.0662680864334106,grad_norm: 0.9999996311322779, iteration: 8531
loss: 1.0835031270980835,grad_norm: 0.9999996190919582, iteration: 8532
loss: 1.0275752544403076,grad_norm: 0.9999992934318478, iteration: 8533
loss: 1.016596794128418,grad_norm: 0.9999994813695247, iteration: 8534
loss: 1.0325864553451538,grad_norm: 0.9999995513591022, iteration: 8535
loss: 1.0819981098175049,grad_norm: 0.9999994673549661, iteration: 8536
loss: 1.049553632736206,grad_norm: 0.9999995641415949, iteration: 8537
loss: 1.023988127708435,grad_norm: 0.9999991921462544, iteration: 8538
loss: 1.0753101110458374,grad_norm: 0.9999994839851216, iteration: 8539
loss: 1.0793797969818115,grad_norm: 0.9999995565562412, iteration: 8540
loss: 1.1013325452804565,grad_norm: 0.9999996755003476, iteration: 8541
loss: 0.964961051940918,grad_norm: 0.9999994143077623, iteration: 8542
loss: 1.0383208990097046,grad_norm: 0.9999994974619079, iteration: 8543
loss: 1.0690988302230835,grad_norm: 0.9999996376316965, iteration: 8544
loss: 1.066898226737976,grad_norm: 0.9999993055932997, iteration: 8545
loss: 1.0283297300338745,grad_norm: 0.9999993129982145, iteration: 8546
loss: 1.0190761089324951,grad_norm: 0.9999994050114904, iteration: 8547
loss: 1.1037545204162598,grad_norm: 0.9999996693937911, iteration: 8548
loss: 1.0205514430999756,grad_norm: 0.9999996331462483, iteration: 8549
loss: 1.0674891471862793,grad_norm: 0.9999997193123352, iteration: 8550
loss: 0.9724658131599426,grad_norm: 0.9999995193421133, iteration: 8551
loss: 1.1103779077529907,grad_norm: 0.999999518828767, iteration: 8552
loss: 1.0940287113189697,grad_norm: 0.9999994123990825, iteration: 8553
loss: 1.0916738510131836,grad_norm: 0.9999993810272004, iteration: 8554
loss: 1.0698984861373901,grad_norm: 0.9999996472130183, iteration: 8555
loss: 1.0850272178649902,grad_norm: 0.9999993533689554, iteration: 8556
loss: 1.0164411067962646,grad_norm: 0.9999995842129721, iteration: 8557
loss: 1.072510838508606,grad_norm: 0.9999994500642498, iteration: 8558
loss: 1.0493372678756714,grad_norm: 0.9999995000895003, iteration: 8559
loss: 1.0789936780929565,grad_norm: 0.9999995125952766, iteration: 8560
loss: 1.1042795181274414,grad_norm: 0.9999991799551351, iteration: 8561
loss: 1.0969479084014893,grad_norm: 0.9999994004363937, iteration: 8562
loss: 1.0189913511276245,grad_norm: 0.9999996278559175, iteration: 8563
loss: 1.033939242362976,grad_norm: 0.999999296578818, iteration: 8564
loss: 1.0378779172897339,grad_norm: 0.9999997758478519, iteration: 8565
loss: 1.0988562107086182,grad_norm: 0.9999997163484303, iteration: 8566
loss: 1.1217474937438965,grad_norm: 0.9999996344577291, iteration: 8567
loss: 1.1117385625839233,grad_norm: 0.9999996327665397, iteration: 8568
loss: 1.0132650136947632,grad_norm: 0.9999996145832976, iteration: 8569
loss: 1.0935441255569458,grad_norm: 0.9999998260667284, iteration: 8570
loss: 1.0588622093200684,grad_norm: 0.9999994595487993, iteration: 8571
loss: 1.0487425327301025,grad_norm: 0.9999997178560842, iteration: 8572
loss: 1.1830734014511108,grad_norm: 0.9999998514626196, iteration: 8573
loss: 1.0525763034820557,grad_norm: 0.9999995900454989, iteration: 8574
loss: 1.0653926134109497,grad_norm: 0.9999995468602205, iteration: 8575
loss: 1.0542240142822266,grad_norm: 0.9999995805716582, iteration: 8576
loss: 1.148976445198059,grad_norm: 0.9999996404916045, iteration: 8577
loss: 1.0672919750213623,grad_norm: 0.9999998448737691, iteration: 8578
loss: 1.125489354133606,grad_norm: 0.9999995684404729, iteration: 8579
loss: 1.0562247037887573,grad_norm: 0.9999995050592199, iteration: 8580
loss: 1.0520657300949097,grad_norm: 0.9999996174145381, iteration: 8581
loss: 1.084818959236145,grad_norm: 0.9999996042992965, iteration: 8582
loss: 1.0917021036148071,grad_norm: 0.9999997100565007, iteration: 8583
loss: 0.9934752583503723,grad_norm: 0.9999995664100731, iteration: 8584
loss: 1.0643601417541504,grad_norm: 0.9999994787056415, iteration: 8585
loss: 1.0625277757644653,grad_norm: 0.9999995802864444, iteration: 8586
loss: 1.0366829633712769,grad_norm: 0.9999993009596125, iteration: 8587
loss: 1.0129566192626953,grad_norm: 0.9999994495159447, iteration: 8588
loss: 1.0404472351074219,grad_norm: 0.9999996718847731, iteration: 8589
loss: 1.0762916803359985,grad_norm: 0.9999995820913823, iteration: 8590
loss: 1.095320701599121,grad_norm: 0.9999995221401841, iteration: 8591
loss: 1.071679949760437,grad_norm: 0.9999997057659539, iteration: 8592
loss: 1.053983211517334,grad_norm: 0.9999994038284366, iteration: 8593
loss: 1.0664043426513672,grad_norm: 0.9999996302921372, iteration: 8594
loss: 1.1422806978225708,grad_norm: 0.9999998228942484, iteration: 8595
loss: 1.0343103408813477,grad_norm: 0.9999994827766444, iteration: 8596
loss: 0.9900252819061279,grad_norm: 0.9999995317053773, iteration: 8597
loss: 1.0846506357192993,grad_norm: 0.9999996597371764, iteration: 8598
loss: 1.0467052459716797,grad_norm: 0.9999992884628772, iteration: 8599
loss: 1.003806710243225,grad_norm: 0.9999994088065265, iteration: 8600
loss: 1.0590739250183105,grad_norm: 0.9999997100493182, iteration: 8601
loss: 0.9801503419876099,grad_norm: 0.9999992585977926, iteration: 8602
loss: 1.0393385887145996,grad_norm: 0.9999994910516067, iteration: 8603
loss: 1.1193046569824219,grad_norm: 0.9999997511029947, iteration: 8604
loss: 0.9875631332397461,grad_norm: 0.9999994707464158, iteration: 8605
loss: 1.0963196754455566,grad_norm: 0.9999995221420368, iteration: 8606
loss: 1.0793156623840332,grad_norm: 0.9999997877417797, iteration: 8607
loss: 1.0728718042373657,grad_norm: 0.9999997764383614, iteration: 8608
loss: 1.0880800485610962,grad_norm: 0.9999996106482373, iteration: 8609
loss: 1.011724591255188,grad_norm: 0.9999997110589186, iteration: 8610
loss: 1.166346788406372,grad_norm: 0.9999996925036413, iteration: 8611
loss: 1.0536808967590332,grad_norm: 0.9999994630399076, iteration: 8612
loss: 1.0390113592147827,grad_norm: 0.999999538851477, iteration: 8613
loss: 1.0265425443649292,grad_norm: 0.9999992351469569, iteration: 8614
loss: 1.0895804166793823,grad_norm: 0.999999375190623, iteration: 8615
loss: 1.1034678220748901,grad_norm: 0.9999996187480167, iteration: 8616
loss: 1.058341383934021,grad_norm: 0.9999995691941438, iteration: 8617
loss: 1.079744577407837,grad_norm: 0.9999996498884379, iteration: 8618
loss: 1.0259181261062622,grad_norm: 0.999999515175581, iteration: 8619
loss: 1.0595959424972534,grad_norm: 0.9999996671070435, iteration: 8620
loss: 1.063029170036316,grad_norm: 0.9999991799693742, iteration: 8621
loss: 1.0027949810028076,grad_norm: 0.9999993238362623, iteration: 8622
loss: 1.1114659309387207,grad_norm: 0.9999995686150696, iteration: 8623
loss: 1.137518286705017,grad_norm: 0.9999998095478987, iteration: 8624
loss: 1.064273715019226,grad_norm: 0.9999993927367404, iteration: 8625
loss: 1.0843764543533325,grad_norm: 0.9999997712196822, iteration: 8626
loss: 1.1027228832244873,grad_norm: 0.9999996504526436, iteration: 8627
loss: 1.0609955787658691,grad_norm: 0.9999997477445244, iteration: 8628
loss: 1.0798795223236084,grad_norm: 0.9999996293741182, iteration: 8629
loss: 1.0631171464920044,grad_norm: 0.9999996574606532, iteration: 8630
loss: 1.0952659845352173,grad_norm: 0.9999995918862425, iteration: 8631
loss: 1.0628734827041626,grad_norm: 0.9999993204878405, iteration: 8632
loss: 1.0794545412063599,grad_norm: 0.9999995260583512, iteration: 8633
loss: 1.0887655019760132,grad_norm: 0.9999997895771939, iteration: 8634
loss: 1.1358740329742432,grad_norm: 0.9999998292777151, iteration: 8635
loss: 1.0540415048599243,grad_norm: 0.9999995163924676, iteration: 8636
loss: 1.0984301567077637,grad_norm: 0.9999995428482698, iteration: 8637
loss: 1.0246169567108154,grad_norm: 0.9999992926107308, iteration: 8638
loss: 1.0686860084533691,grad_norm: 0.9999996262114962, iteration: 8639
loss: 1.1076807975769043,grad_norm: 0.999999643197226, iteration: 8640
loss: 1.0553988218307495,grad_norm: 0.9999996618857039, iteration: 8641
loss: 0.9969037175178528,grad_norm: 0.9999992600223307, iteration: 8642
loss: 1.0994607210159302,grad_norm: 0.9999995122171869, iteration: 8643
loss: 1.0490405559539795,grad_norm: 0.9999993668909232, iteration: 8644
loss: 1.0085384845733643,grad_norm: 0.9999994044577818, iteration: 8645
loss: 1.078458309173584,grad_norm: 0.9999996240916412, iteration: 8646
loss: 1.0891586542129517,grad_norm: 0.9999996067101865, iteration: 8647
loss: 1.0285221338272095,grad_norm: 0.9999994117551869, iteration: 8648
loss: 0.9877734184265137,grad_norm: 0.9999992874328496, iteration: 8649
loss: 1.002257227897644,grad_norm: 0.9999993649775246, iteration: 8650
loss: 1.217526912689209,grad_norm: 0.9999998304761415, iteration: 8651
loss: 1.1083347797393799,grad_norm: 0.9999997363053532, iteration: 8652
loss: 1.187550663948059,grad_norm: 0.9999998966277795, iteration: 8653
loss: 1.0032968521118164,grad_norm: 0.9999993335669167, iteration: 8654
loss: 1.0244606733322144,grad_norm: 0.9999996159040346, iteration: 8655
loss: 1.053053379058838,grad_norm: 0.9999995844070372, iteration: 8656
loss: 1.0574254989624023,grad_norm: 0.9999996941204239, iteration: 8657
loss: 0.9858179688453674,grad_norm: 0.9999992734364223, iteration: 8658
loss: 1.1164864301681519,grad_norm: 0.9999994362018951, iteration: 8659
loss: 1.0179346799850464,grad_norm: 0.9999993816303812, iteration: 8660
loss: 1.029046893119812,grad_norm: 0.9999994144737716, iteration: 8661
loss: 1.067873239517212,grad_norm: 0.9999993350359307, iteration: 8662
loss: 1.0377020835876465,grad_norm: 0.9999996735354285, iteration: 8663
loss: 1.0743046998977661,grad_norm: 0.9999997938025984, iteration: 8664
loss: 1.0982365608215332,grad_norm: 0.9999996583115779, iteration: 8665
loss: 1.142540693283081,grad_norm: 0.9999994640853216, iteration: 8666
loss: 1.0857315063476562,grad_norm: 0.9999994198861214, iteration: 8667
loss: 1.069618582725525,grad_norm: 0.9999995846580135, iteration: 8668
loss: 1.0439722537994385,grad_norm: 0.9999992875952979, iteration: 8669
loss: 1.0138955116271973,grad_norm: 0.9999994933704986, iteration: 8670
loss: 1.0496360063552856,grad_norm: 0.9999993744703807, iteration: 8671
loss: 1.0421276092529297,grad_norm: 0.9999993791860151, iteration: 8672
loss: 1.0251176357269287,grad_norm: 0.9999994292352463, iteration: 8673
loss: 1.096552848815918,grad_norm: 0.9999994743939854, iteration: 8674
loss: 1.0768272876739502,grad_norm: 0.9999994051620126, iteration: 8675
loss: 1.0524944067001343,grad_norm: 0.9999995406488011, iteration: 8676
loss: 1.074263572692871,grad_norm: 0.9999997714512251, iteration: 8677
loss: 0.9866775274276733,grad_norm: 0.9999993240008856, iteration: 8678
loss: 1.0118871927261353,grad_norm: 0.9999995603676752, iteration: 8679
loss: 1.002169132232666,grad_norm: 0.9999994411559139, iteration: 8680
loss: 1.0317484140396118,grad_norm: 0.9999996308525438, iteration: 8681
loss: 1.0403358936309814,grad_norm: 0.9999994767680487, iteration: 8682
loss: 1.03774893283844,grad_norm: 0.999999545415912, iteration: 8683
loss: 1.1040124893188477,grad_norm: 0.999999761137365, iteration: 8684
loss: 0.9907259345054626,grad_norm: 0.9999993027256718, iteration: 8685
loss: 1.008493423461914,grad_norm: 0.9999992894688001, iteration: 8686
loss: 1.100510835647583,grad_norm: 0.9999995464986152, iteration: 8687
loss: 1.0897942781448364,grad_norm: 0.9999997758757456, iteration: 8688
loss: 1.1319191455841064,grad_norm: 0.9999997905281487, iteration: 8689
loss: 1.0901042222976685,grad_norm: 0.9999996628133331, iteration: 8690
loss: 1.0065727233886719,grad_norm: 0.9999992884457684, iteration: 8691
loss: 1.0871608257293701,grad_norm: 0.9999993939655638, iteration: 8692
loss: 1.0806021690368652,grad_norm: 0.9999996311871246, iteration: 8693
loss: 1.106938362121582,grad_norm: 0.9999997175921881, iteration: 8694
loss: 1.103186011314392,grad_norm: 0.9999998489604692, iteration: 8695
loss: 1.1408017873764038,grad_norm: 0.9999997571847563, iteration: 8696
loss: 1.0153939723968506,grad_norm: 0.9999994700711824, iteration: 8697
loss: 1.0438324213027954,grad_norm: 0.9999997831889768, iteration: 8698
loss: 1.0455312728881836,grad_norm: 0.9999993899570668, iteration: 8699
loss: 1.0778554677963257,grad_norm: 0.9999996037627245, iteration: 8700
loss: 1.0460498332977295,grad_norm: 0.9999994802666207, iteration: 8701
loss: 1.111295223236084,grad_norm: 0.999999696332263, iteration: 8702
loss: 0.9937912225723267,grad_norm: 0.9999993705562435, iteration: 8703
loss: 1.0558345317840576,grad_norm: 0.9999997249573819, iteration: 8704
loss: 1.0696065425872803,grad_norm: 0.999999527714011, iteration: 8705
loss: 1.0618411302566528,grad_norm: 0.9999996452331924, iteration: 8706
loss: 1.093001127243042,grad_norm: 0.9999996279543694, iteration: 8707
loss: 0.98142009973526,grad_norm: 0.9999994706914112, iteration: 8708
loss: 1.0577207803726196,grad_norm: 0.9999992890641122, iteration: 8709
loss: 1.0113075971603394,grad_norm: 0.9999993024316464, iteration: 8710
loss: 1.0717222690582275,grad_norm: 0.9999997068757052, iteration: 8711
loss: 1.0169498920440674,grad_norm: 0.9999993139185709, iteration: 8712
loss: 1.1083673238754272,grad_norm: 0.9999996754933599, iteration: 8713
loss: 1.0704069137573242,grad_norm: 0.9999993894860442, iteration: 8714
loss: 1.0141857862472534,grad_norm: 0.9999994913137976, iteration: 8715
loss: 1.0295215845108032,grad_norm: 0.9999995309481294, iteration: 8716
loss: 1.0058737993240356,grad_norm: 0.9999993612467426, iteration: 8717
loss: 1.152329683303833,grad_norm: 0.9999997569593736, iteration: 8718
loss: 1.1041324138641357,grad_norm: 0.9999997264895949, iteration: 8719
loss: 1.0956813097000122,grad_norm: 0.9999996016111244, iteration: 8720
loss: 1.125825047492981,grad_norm: 0.9999997603990232, iteration: 8721
loss: 1.0605391263961792,grad_norm: 0.9999993199338756, iteration: 8722
loss: 1.0023714303970337,grad_norm: 0.9999996482377628, iteration: 8723
loss: 1.034125804901123,grad_norm: 0.9999994427530057, iteration: 8724
loss: 1.0587238073349,grad_norm: 0.9999996572386557, iteration: 8725
loss: 1.0690381526947021,grad_norm: 0.9999992876850523, iteration: 8726
loss: 1.0060251951217651,grad_norm: 0.9999994632336089, iteration: 8727
loss: 1.0604993104934692,grad_norm: 0.9999996605703885, iteration: 8728
loss: 1.0617307424545288,grad_norm: 0.9999994318950424, iteration: 8729
loss: 1.0688291788101196,grad_norm: 0.9999995729730057, iteration: 8730
loss: 1.0971179008483887,grad_norm: 0.9999996410735489, iteration: 8731
loss: 1.0955393314361572,grad_norm: 0.9999994495772466, iteration: 8732
loss: 1.077933669090271,grad_norm: 0.9999997874368547, iteration: 8733
loss: 1.0836869478225708,grad_norm: 0.9999995666316354, iteration: 8734
loss: 1.087519645690918,grad_norm: 0.9999995769439615, iteration: 8735
loss: 0.9974433779716492,grad_norm: 0.9999997086061416, iteration: 8736
loss: 1.0685617923736572,grad_norm: 0.9999997791203719, iteration: 8737
loss: 1.0071905851364136,grad_norm: 0.9999994549869005, iteration: 8738
loss: 1.0429322719573975,grad_norm: 0.999999464787186, iteration: 8739
loss: 1.0137871503829956,grad_norm: 0.9999994063759619, iteration: 8740
loss: 1.0838860273361206,grad_norm: 0.9999997191549572, iteration: 8741
loss: 1.0193235874176025,grad_norm: 0.9999994978232293, iteration: 8742
loss: 1.1529279947280884,grad_norm: 0.9999998751702217, iteration: 8743
loss: 1.0439813137054443,grad_norm: 0.9999996153657307, iteration: 8744
loss: 1.0562232732772827,grad_norm: 0.9999993625685256, iteration: 8745
loss: 1.0852723121643066,grad_norm: 0.9999997063674261, iteration: 8746
loss: 1.0784177780151367,grad_norm: 0.9999997328730501, iteration: 8747
loss: 1.0990759134292603,grad_norm: 0.9999995140950892, iteration: 8748
loss: 1.0536538362503052,grad_norm: 0.999999643208037, iteration: 8749
loss: 1.042114019393921,grad_norm: 0.9999996210688548, iteration: 8750
loss: 1.1021150350570679,grad_norm: 0.9999995669888939, iteration: 8751
loss: 1.023611307144165,grad_norm: 0.9999996785833974, iteration: 8752
loss: 1.0713099241256714,grad_norm: 0.9999995897107149, iteration: 8753
loss: 1.037814974784851,grad_norm: 0.9999996987869063, iteration: 8754
loss: 1.1396106481552124,grad_norm: 0.9999997866917381, iteration: 8755
loss: 1.0503127574920654,grad_norm: 0.999999512036736, iteration: 8756
loss: 1.0345828533172607,grad_norm: 0.9999993448591591, iteration: 8757
loss: 1.064208984375,grad_norm: 0.9999995012631155, iteration: 8758
loss: 1.0120365619659424,grad_norm: 0.9999993124721634, iteration: 8759
loss: 1.049006700515747,grad_norm: 0.9999995158369361, iteration: 8760
loss: 1.0703686475753784,grad_norm: 0.9999994688157933, iteration: 8761
loss: 1.102268099784851,grad_norm: 0.9999996530141768, iteration: 8762
loss: 0.998345136642456,grad_norm: 0.9999993479655747, iteration: 8763
loss: 1.1041063070297241,grad_norm: 0.9999993580622346, iteration: 8764
loss: 1.03541898727417,grad_norm: 0.9999995430347639, iteration: 8765
loss: 1.054840326309204,grad_norm: 0.9999996861472294, iteration: 8766
loss: 1.0617965459823608,grad_norm: 0.999999589929398, iteration: 8767
loss: 1.1091233491897583,grad_norm: 0.9999995822227472, iteration: 8768
loss: 1.0609877109527588,grad_norm: 0.9999994138315366, iteration: 8769
loss: 0.9995073676109314,grad_norm: 0.9999994568714639, iteration: 8770
loss: 1.0621955394744873,grad_norm: 0.9999993493959703, iteration: 8771
loss: 1.0078771114349365,grad_norm: 0.9999997732626124, iteration: 8772
loss: 1.0682377815246582,grad_norm: 0.9999993779651765, iteration: 8773
loss: 1.0779991149902344,grad_norm: 0.9999997867854357, iteration: 8774
loss: 1.0742303133010864,grad_norm: 0.9999996626386989, iteration: 8775
loss: 1.0056401491165161,grad_norm: 0.999999591089294, iteration: 8776
loss: 1.0370597839355469,grad_norm: 0.9999994675108662, iteration: 8777
loss: 1.0212510824203491,grad_norm: 0.9999996381122133, iteration: 8778
loss: 1.0157335996627808,grad_norm: 0.999999300582331, iteration: 8779
loss: 1.0664091110229492,grad_norm: 0.9999995280338495, iteration: 8780
loss: 1.0347694158554077,grad_norm: 0.9999995388632802, iteration: 8781
loss: 1.1043570041656494,grad_norm: 0.9999996702361466, iteration: 8782
loss: 1.1181714534759521,grad_norm: 0.9999996735787902, iteration: 8783
loss: 1.0510263442993164,grad_norm: 0.9999993820379154, iteration: 8784
loss: 1.0779078006744385,grad_norm: 0.9999994871385525, iteration: 8785
loss: 1.0263006687164307,grad_norm: 0.999999304532411, iteration: 8786
loss: 1.0919318199157715,grad_norm: 0.9999993017343094, iteration: 8787
loss: 1.061851978302002,grad_norm: 0.9999997083093076, iteration: 8788
loss: 1.0494333505630493,grad_norm: 0.999999355495481, iteration: 8789
loss: 1.0768301486968994,grad_norm: 0.9999997536394754, iteration: 8790
loss: 1.0492786169052124,grad_norm: 0.9999996904718025, iteration: 8791
loss: 1.0465649366378784,grad_norm: 0.9999993752129483, iteration: 8792
loss: 1.111872673034668,grad_norm: 0.9999995347470774, iteration: 8793
loss: 1.027574062347412,grad_norm: 0.9999994028537845, iteration: 8794
loss: 1.092557430267334,grad_norm: 0.9999996374531878, iteration: 8795
loss: 1.0459437370300293,grad_norm: 0.9999998352780655, iteration: 8796
loss: 1.094719648361206,grad_norm: 0.9999994536500583, iteration: 8797
loss: 1.0423935651779175,grad_norm: 0.9999995470524219, iteration: 8798
loss: 1.043842077255249,grad_norm: 0.9999994074270816, iteration: 8799
loss: 1.0595991611480713,grad_norm: 0.9999994824246325, iteration: 8800
loss: 1.0445222854614258,grad_norm: 0.9999993464392398, iteration: 8801
loss: 1.0944828987121582,grad_norm: 0.9999995875226759, iteration: 8802
loss: 1.017106533050537,grad_norm: 0.9999993861506806, iteration: 8803
loss: 1.031467318534851,grad_norm: 0.9999993900260358, iteration: 8804
loss: 1.1022238731384277,grad_norm: 0.9999998812905355, iteration: 8805
loss: 1.0611903667449951,grad_norm: 0.99999955328512, iteration: 8806
loss: 1.0456714630126953,grad_norm: 0.9999994830361688, iteration: 8807
loss: 1.0891201496124268,grad_norm: 0.99999967185412, iteration: 8808
loss: 1.0318479537963867,grad_norm: 0.999999420003014, iteration: 8809
loss: 1.0562667846679688,grad_norm: 0.999999687590481, iteration: 8810
loss: 1.1660935878753662,grad_norm: 0.9999996913531451, iteration: 8811
loss: 1.0900369882583618,grad_norm: 0.9999995994108914, iteration: 8812
loss: 1.0306456089019775,grad_norm: 0.9999994322086735, iteration: 8813
loss: 1.0495785474777222,grad_norm: 0.9999992991381051, iteration: 8814
loss: 1.0477721691131592,grad_norm: 0.9999995248419854, iteration: 8815
loss: 1.0366156101226807,grad_norm: 0.9999995072997394, iteration: 8816
loss: 1.0273159742355347,grad_norm: 0.9999995351466113, iteration: 8817
loss: 1.1735904216766357,grad_norm: 0.9999998675793758, iteration: 8818
loss: 1.0600355863571167,grad_norm: 0.9999993175005047, iteration: 8819
loss: 1.0627540349960327,grad_norm: 0.9999993930711825, iteration: 8820
loss: 1.0558220148086548,grad_norm: 0.9999994579353383, iteration: 8821
loss: 1.0252577066421509,grad_norm: 0.999999581764427, iteration: 8822
loss: 1.0487476587295532,grad_norm: 0.9999998769712977, iteration: 8823
loss: 1.013035535812378,grad_norm: 0.9999995371431977, iteration: 8824
loss: 1.0492700338363647,grad_norm: 0.9999995889000816, iteration: 8825
loss: 1.078298807144165,grad_norm: 0.9999997277685669, iteration: 8826
loss: 1.0558009147644043,grad_norm: 0.9999995222317247, iteration: 8827
loss: 1.070178747177124,grad_norm: 0.9999993660629719, iteration: 8828
loss: 1.0118863582611084,grad_norm: 0.9999993553008331, iteration: 8829
loss: 1.0186071395874023,grad_norm: 0.9999994158487742, iteration: 8830
loss: 1.0353368520736694,grad_norm: 0.9999995006621498, iteration: 8831
loss: 1.0626530647277832,grad_norm: 0.9999996923025088, iteration: 8832
loss: 1.1075164079666138,grad_norm: 0.999999924484448, iteration: 8833
loss: 1.1171700954437256,grad_norm: 0.999999736095755, iteration: 8834
loss: 1.0170888900756836,grad_norm: 0.9999995199168694, iteration: 8835
loss: 1.071489691734314,grad_norm: 0.9999996426596521, iteration: 8836
loss: 1.01167893409729,grad_norm: 0.9999996740092856, iteration: 8837
loss: 1.0666509866714478,grad_norm: 0.9999994525804667, iteration: 8838
loss: 0.9948518872261047,grad_norm: 0.9999994895473423, iteration: 8839
loss: 1.021240234375,grad_norm: 0.9999997704664221, iteration: 8840
loss: 1.0390069484710693,grad_norm: 0.9999992391185996, iteration: 8841
loss: 1.0601558685302734,grad_norm: 0.9999995998247168, iteration: 8842
loss: 1.0906656980514526,grad_norm: 0.9999993677872772, iteration: 8843
loss: 0.9945228099822998,grad_norm: 0.9999995370490283, iteration: 8844
loss: 1.1141389608383179,grad_norm: 0.9999995487029699, iteration: 8845
loss: 1.149780035018921,grad_norm: 0.9999995260627804, iteration: 8846
loss: 1.049949049949646,grad_norm: 0.9999994109153724, iteration: 8847
loss: 1.0621283054351807,grad_norm: 0.9999995268718086, iteration: 8848
loss: 1.135553240776062,grad_norm: 0.9999996881257374, iteration: 8849
loss: 0.9753557443618774,grad_norm: 0.9999994655670061, iteration: 8850
loss: 1.068816900253296,grad_norm: 0.9999993975229147, iteration: 8851
loss: 1.058700680732727,grad_norm: 0.9999996435721059, iteration: 8852
loss: 1.079879641532898,grad_norm: 0.9999994714020516, iteration: 8853
loss: 1.0571222305297852,grad_norm: 0.9999998819441979, iteration: 8854
loss: 1.0000851154327393,grad_norm: 0.9999994956326382, iteration: 8855
loss: 1.0774191617965698,grad_norm: 0.9999996388654985, iteration: 8856
loss: 1.087807536125183,grad_norm: 0.9999997394183529, iteration: 8857
loss: 1.0289822816848755,grad_norm: 0.9999997792363492, iteration: 8858
loss: 1.0783581733703613,grad_norm: 0.9999997984926817, iteration: 8859
loss: 1.1303290128707886,grad_norm: 0.9999996121508345, iteration: 8860
loss: 1.1451539993286133,grad_norm: 0.999999856634844, iteration: 8861
loss: 1.0155748128890991,grad_norm: 0.999999743142782, iteration: 8862
loss: 1.06488037109375,grad_norm: 0.9999993595805446, iteration: 8863
loss: 1.0933873653411865,grad_norm: 0.999999836190444, iteration: 8864
loss: 1.1036796569824219,grad_norm: 0.999999695181141, iteration: 8865
loss: 1.0806244611740112,grad_norm: 0.9999995792704907, iteration: 8866
loss: 1.0113734006881714,grad_norm: 0.9999995596528007, iteration: 8867
loss: 1.060001254081726,grad_norm: 0.9999995559749733, iteration: 8868
loss: 1.0643543004989624,grad_norm: 0.999999351805696, iteration: 8869
loss: 1.0750510692596436,grad_norm: 0.9999998889292749, iteration: 8870
loss: 1.1272318363189697,grad_norm: 0.9999996676062156, iteration: 8871
loss: 1.118301510810852,grad_norm: 0.9999998421464533, iteration: 8872
loss: 1.0800161361694336,grad_norm: 0.9999996153775208, iteration: 8873
loss: 1.1458566188812256,grad_norm: 0.9999997627182455, iteration: 8874
loss: 1.014180302619934,grad_norm: 0.9999995818316567, iteration: 8875
loss: 1.096793532371521,grad_norm: 0.9999994927563165, iteration: 8876
loss: 1.051529884338379,grad_norm: 0.9999994521087575, iteration: 8877
loss: 1.0334036350250244,grad_norm: 0.9999995296445426, iteration: 8878
loss: 1.074996829032898,grad_norm: 0.999999509348148, iteration: 8879
loss: 1.074742317199707,grad_norm: 0.9999994745126575, iteration: 8880
loss: 1.0136935710906982,grad_norm: 0.9999994952950356, iteration: 8881
loss: 1.045586347579956,grad_norm: 0.9999994194124605, iteration: 8882
loss: 1.0520926713943481,grad_norm: 0.9999996133913422, iteration: 8883
loss: 1.112391710281372,grad_norm: 0.9999996250441363, iteration: 8884
loss: 1.0183898210525513,grad_norm: 0.9999993441999002, iteration: 8885
loss: 1.0703959465026855,grad_norm: 0.9999996761974692, iteration: 8886
loss: 1.0024164915084839,grad_norm: 0.9999994917862158, iteration: 8887
loss: 1.057228446006775,grad_norm: 0.9999995499161061, iteration: 8888
loss: 1.0949664115905762,grad_norm: 0.9999996249680957, iteration: 8889
loss: 1.0559483766555786,grad_norm: 0.9999994103559977, iteration: 8890
loss: 1.111275553703308,grad_norm: 0.9999998521057686, iteration: 8891
loss: 1.0092785358428955,grad_norm: 0.9999991628748511, iteration: 8892
loss: 0.9962474703788757,grad_norm: 0.9999993915936873, iteration: 8893
loss: 1.051574945449829,grad_norm: 0.9999995228245961, iteration: 8894
loss: 1.1347965002059937,grad_norm: 0.9999999024903363, iteration: 8895
loss: 1.0522406101226807,grad_norm: 0.9999996515574836, iteration: 8896
loss: 1.1018720865249634,grad_norm: 0.9999996614503782, iteration: 8897
loss: 1.1198132038116455,grad_norm: 0.999999513197296, iteration: 8898
loss: 1.0215295553207397,grad_norm: 0.999999235165745, iteration: 8899
loss: 1.0604064464569092,grad_norm: 0.9999994997922138, iteration: 8900
loss: 1.0406023263931274,grad_norm: 0.999999380696518, iteration: 8901
loss: 1.0180164575576782,grad_norm: 0.9999996897775649, iteration: 8902
loss: 1.1114411354064941,grad_norm: 0.9999997494941057, iteration: 8903
loss: 1.02047860622406,grad_norm: 0.9999995808601793, iteration: 8904
loss: 1.0771032571792603,grad_norm: 0.9999996670300766, iteration: 8905
loss: 1.1346508264541626,grad_norm: 0.99999972071133, iteration: 8906
loss: 1.0986857414245605,grad_norm: 0.9999997539454796, iteration: 8907
loss: 1.0705597400665283,grad_norm: 0.9999995345201557, iteration: 8908
loss: 1.0523306131362915,grad_norm: 0.9999997920578547, iteration: 8909
loss: 1.0953185558319092,grad_norm: 0.9999996764357642, iteration: 8910
loss: 1.0333590507507324,grad_norm: 0.9999995208373107, iteration: 8911
loss: 1.1143126487731934,grad_norm: 0.9999998068584162, iteration: 8912
loss: 1.050803542137146,grad_norm: 0.9999994209140272, iteration: 8913
loss: 1.1445330381393433,grad_norm: 0.9999997532936096, iteration: 8914
loss: 1.0910670757293701,grad_norm: 0.999999618788676, iteration: 8915
loss: 0.9895676970481873,grad_norm: 0.999999357525852, iteration: 8916
loss: 1.077994465827942,grad_norm: 0.9999994602418463, iteration: 8917
loss: 1.0389007329940796,grad_norm: 0.9999994867500885, iteration: 8918
loss: 1.0236762762069702,grad_norm: 0.9999993847334937, iteration: 8919
loss: 1.1317269802093506,grad_norm: 0.999999771848223, iteration: 8920
loss: 1.0229789018630981,grad_norm: 0.999999465420511, iteration: 8921
loss: 1.0536892414093018,grad_norm: 0.9999995169877388, iteration: 8922
loss: 1.0759249925613403,grad_norm: 0.999999680808101, iteration: 8923
loss: 1.0478423833847046,grad_norm: 0.9999995791064522, iteration: 8924
loss: 1.0932096242904663,grad_norm: 0.9999995139227418, iteration: 8925
loss: 1.0742663145065308,grad_norm: 0.9999996140430518, iteration: 8926
loss: 1.0366359949111938,grad_norm: 0.9999992814743438, iteration: 8927
loss: 1.0667164325714111,grad_norm: 0.9999997946461304, iteration: 8928
loss: 1.050532579421997,grad_norm: 0.9999994627904769, iteration: 8929
loss: 1.0122190713882446,grad_norm: 0.9999992080896968, iteration: 8930
loss: 1.0663546323776245,grad_norm: 0.9999994916565175, iteration: 8931
loss: 1.0337892770767212,grad_norm: 0.9999996056868291, iteration: 8932
loss: 1.17488694190979,grad_norm: 0.9999998695666221, iteration: 8933
loss: 1.0992684364318848,grad_norm: 0.9999992713077719, iteration: 8934
loss: 1.050243854522705,grad_norm: 0.9999995708805249, iteration: 8935
loss: 1.0639690160751343,grad_norm: 0.9999996166813241, iteration: 8936
loss: 1.0149401426315308,grad_norm: 0.9999994867444347, iteration: 8937
loss: 1.0683693885803223,grad_norm: 0.9999997650111422, iteration: 8938
loss: 1.0648983716964722,grad_norm: 0.9999996091888013, iteration: 8939
loss: 1.0229793787002563,grad_norm: 0.9999993723790301, iteration: 8940
loss: 1.012503981590271,grad_norm: 0.9999994480400315, iteration: 8941
loss: 1.0328847169876099,grad_norm: 0.9999996641769434, iteration: 8942
loss: 1.035338282585144,grad_norm: 0.9999995190506131, iteration: 8943
loss: 1.057376742362976,grad_norm: 0.9999995329616426, iteration: 8944
loss: 1.0402332544326782,grad_norm: 0.999999514934143, iteration: 8945
loss: 1.0481188297271729,grad_norm: 0.9999994353641235, iteration: 8946
loss: 1.048704743385315,grad_norm: 0.9999997364099086, iteration: 8947
loss: 1.0686061382293701,grad_norm: 0.9999997596097487, iteration: 8948
loss: 1.0842474699020386,grad_norm: 0.9999996009168094, iteration: 8949
loss: 1.109415054321289,grad_norm: 0.9999996739972808, iteration: 8950
loss: 1.046203374862671,grad_norm: 0.9999996181238385, iteration: 8951
loss: 1.0877259969711304,grad_norm: 0.9999996101084522, iteration: 8952
loss: 1.073642373085022,grad_norm: 0.9999995031227011, iteration: 8953
loss: 1.1221235990524292,grad_norm: 0.9999996905650029, iteration: 8954
loss: 1.067163109779358,grad_norm: 0.9999995162215265, iteration: 8955
loss: 1.0512350797653198,grad_norm: 0.9999995280408738, iteration: 8956
loss: 1.0754340887069702,grad_norm: 0.9999997574042534, iteration: 8957
loss: 1.0552685260772705,grad_norm: 0.9999995921312504, iteration: 8958
loss: 1.1332722902297974,grad_norm: 0.999999653542647, iteration: 8959
loss: 1.0745360851287842,grad_norm: 0.9999994211834249, iteration: 8960
loss: 1.0347965955734253,grad_norm: 0.9999995682186574, iteration: 8961
loss: 1.033021330833435,grad_norm: 0.9999997229167916, iteration: 8962
loss: 1.0945912599563599,grad_norm: 0.9999995017095551, iteration: 8963
loss: 0.9934258460998535,grad_norm: 0.9999994180558549, iteration: 8964
loss: 1.0647754669189453,grad_norm: 0.9999993986151007, iteration: 8965
loss: 1.0657002925872803,grad_norm: 0.9999996805300834, iteration: 8966
loss: 1.0720680952072144,grad_norm: 0.9999996367190476, iteration: 8967
loss: 1.0219213962554932,grad_norm: 0.9999994782564979, iteration: 8968
loss: 1.0488758087158203,grad_norm: 0.9999996830533416, iteration: 8969
loss: 1.0197182893753052,grad_norm: 0.999999572296018, iteration: 8970
loss: 1.0913093090057373,grad_norm: 0.999999510886388, iteration: 8971
loss: 1.0672074556350708,grad_norm: 0.9999994895491408, iteration: 8972
loss: 1.0379921197891235,grad_norm: 0.9999996142680706, iteration: 8973
loss: 1.0212745666503906,grad_norm: 0.9999993426730841, iteration: 8974
loss: 1.0452426671981812,grad_norm: 0.9999994882561181, iteration: 8975
loss: 1.1098999977111816,grad_norm: 0.9999997392820961, iteration: 8976
loss: 1.062672734260559,grad_norm: 0.9999995594417859, iteration: 8977
loss: 1.0584065914154053,grad_norm: 0.9999995650902649, iteration: 8978
loss: 1.0354747772216797,grad_norm: 0.9999995515493902, iteration: 8979
loss: 1.0644875764846802,grad_norm: 0.9999994812801859, iteration: 8980
loss: 1.0570907592773438,grad_norm: 0.999999402388829, iteration: 8981
loss: 1.0147325992584229,grad_norm: 0.9999994081391246, iteration: 8982
loss: 1.0437321662902832,grad_norm: 0.9999992199623968, iteration: 8983
loss: 1.0539040565490723,grad_norm: 0.999999835038489, iteration: 8984
loss: 1.1345418691635132,grad_norm: 0.999999579571432, iteration: 8985
loss: 1.0373866558074951,grad_norm: 0.9999995698006394, iteration: 8986
loss: 1.0428597927093506,grad_norm: 0.9999996953869632, iteration: 8987
loss: 1.0780929327011108,grad_norm: 0.9999995937191234, iteration: 8988
loss: 1.1378806829452515,grad_norm: 0.9999996276927509, iteration: 8989
loss: 1.1202549934387207,grad_norm: 0.9999996795146004, iteration: 8990
loss: 1.1150298118591309,grad_norm: 0.9999996444660187, iteration: 8991
loss: 1.0762373208999634,grad_norm: 0.9999996931280714, iteration: 8992
loss: 1.0922311544418335,grad_norm: 0.9999996599593836, iteration: 8993
loss: 1.0830820798873901,grad_norm: 0.9999995171403248, iteration: 8994
loss: 1.0876315832138062,grad_norm: 0.9999995986866408, iteration: 8995
loss: 1.0282315015792847,grad_norm: 0.9999993426956971, iteration: 8996
loss: 1.0036460161209106,grad_norm: 0.9999995465088616, iteration: 8997
loss: 1.0607439279556274,grad_norm: 0.9999995013568477, iteration: 8998
loss: 1.1095131635665894,grad_norm: 0.9999995743159551, iteration: 8999
loss: 1.051021695137024,grad_norm: 0.9999994862545882, iteration: 9000
loss: 1.0863040685653687,grad_norm: 0.9999993696666043, iteration: 9001
loss: 1.0316507816314697,grad_norm: 0.9999994385715524, iteration: 9002
loss: 0.9856348037719727,grad_norm: 0.9999992691297623, iteration: 9003
loss: 1.020614743232727,grad_norm: 0.9999993783366053, iteration: 9004
loss: 1.0492507219314575,grad_norm: 0.9999995936910518, iteration: 9005
loss: 1.06907057762146,grad_norm: 0.9999995056911348, iteration: 9006
loss: 1.0860600471496582,grad_norm: 0.9999997869714978, iteration: 9007
loss: 1.0804998874664307,grad_norm: 0.9999994208866531, iteration: 9008
loss: 1.0175271034240723,grad_norm: 0.9999995955528462, iteration: 9009
loss: 1.115631341934204,grad_norm: 0.999999358671914, iteration: 9010
loss: 1.0218600034713745,grad_norm: 0.9999994517365719, iteration: 9011
loss: 1.056868076324463,grad_norm: 0.9999995064570746, iteration: 9012
loss: 1.0464260578155518,grad_norm: 0.9999994963780243, iteration: 9013
loss: 1.1031410694122314,grad_norm: 0.9999995941559482, iteration: 9014
loss: 0.9872963428497314,grad_norm: 0.999999345935329, iteration: 9015
loss: 1.07339346408844,grad_norm: 0.9999994368676427, iteration: 9016
loss: 1.0376529693603516,grad_norm: 0.9999994111399569, iteration: 9017
loss: 1.0505526065826416,grad_norm: 0.9999995279241262, iteration: 9018
loss: 0.9998295903205872,grad_norm: 0.9999993297930866, iteration: 9019
loss: 1.1241568326950073,grad_norm: 0.9999997557209643, iteration: 9020
loss: 1.0518770217895508,grad_norm: 0.9999992617635877, iteration: 9021
loss: 1.0901998281478882,grad_norm: 0.9999995293491297, iteration: 9022
loss: 1.1357814073562622,grad_norm: 0.9999996207959935, iteration: 9023
loss: 1.0569781064987183,grad_norm: 0.9999995660349236, iteration: 9024
loss: 1.0560696125030518,grad_norm: 0.9999997450684224, iteration: 9025
loss: 1.1119791269302368,grad_norm: 0.999999773991375, iteration: 9026
loss: 1.0376415252685547,grad_norm: 0.9999995296534936, iteration: 9027
loss: 1.0597543716430664,grad_norm: 0.9999993569109767, iteration: 9028
loss: 1.113199234008789,grad_norm: 0.9999996780598199, iteration: 9029
loss: 1.0780930519104004,grad_norm: 0.9999994079132118, iteration: 9030
loss: 1.0867952108383179,grad_norm: 0.9999995164110906, iteration: 9031
loss: 1.0305432081222534,grad_norm: 0.9999993865028807, iteration: 9032
loss: 1.0219006538391113,grad_norm: 0.9999994083841613, iteration: 9033
loss: 1.0234613418579102,grad_norm: 0.9999994506310247, iteration: 9034
loss: 1.065516710281372,grad_norm: 0.9999992778353946, iteration: 9035
loss: 1.130774974822998,grad_norm: 0.9999998155259066, iteration: 9036
loss: 1.0636056661605835,grad_norm: 0.9999993871082226, iteration: 9037
loss: 1.0053004026412964,grad_norm: 0.9999995276564002, iteration: 9038
loss: 1.0758417844772339,grad_norm: 0.9999994979114811, iteration: 9039
loss: 1.0975661277770996,grad_norm: 0.9999995876626115, iteration: 9040
loss: 1.1224292516708374,grad_norm: 0.9999995858096244, iteration: 9041
loss: 1.0063978433609009,grad_norm: 0.9999992761216417, iteration: 9042
loss: 1.0375910997390747,grad_norm: 0.9999995175658609, iteration: 9043
loss: 1.0612645149230957,grad_norm: 0.999999350221021, iteration: 9044
loss: 1.0198701620101929,grad_norm: 0.9999992874820559, iteration: 9045
loss: 1.0227768421173096,grad_norm: 0.9999993893760118, iteration: 9046
loss: 1.1426390409469604,grad_norm: 0.9999994030470102, iteration: 9047
loss: 1.0944586992263794,grad_norm: 0.9999994711396281, iteration: 9048
loss: 1.0852255821228027,grad_norm: 0.9999995745525492, iteration: 9049
loss: 1.110761046409607,grad_norm: 0.9999992950237099, iteration: 9050
loss: 1.0392603874206543,grad_norm: 0.9999995736605014, iteration: 9051
loss: 1.0679715871810913,grad_norm: 0.999999578742005, iteration: 9052
loss: 1.0413405895233154,grad_norm: 0.9999993625424002, iteration: 9053
loss: 1.0414395332336426,grad_norm: 0.999999462734647, iteration: 9054
loss: 1.1072947978973389,grad_norm: 0.9999996698429431, iteration: 9055
loss: 1.0189260244369507,grad_norm: 0.9999993552680928, iteration: 9056
loss: 1.0473765134811401,grad_norm: 0.9999994292577578, iteration: 9057
loss: 1.0099434852600098,grad_norm: 0.999999481177011, iteration: 9058
loss: 1.078031301498413,grad_norm: 0.9999996724095427, iteration: 9059
loss: 1.028146505355835,grad_norm: 0.9999993691662153, iteration: 9060
loss: 1.0456678867340088,grad_norm: 0.9999997205135374, iteration: 9061
loss: 1.0142780542373657,grad_norm: 0.9999995283991788, iteration: 9062
loss: 1.0527666807174683,grad_norm: 0.9999993342814048, iteration: 9063
loss: 1.113032341003418,grad_norm: 0.9999997914728481, iteration: 9064
loss: 1.0515564680099487,grad_norm: 0.9999995068677109, iteration: 9065
loss: 1.1001856327056885,grad_norm: 0.999999472485748, iteration: 9066
loss: 1.0808697938919067,grad_norm: 0.9999994642342874, iteration: 9067
loss: 1.0637943744659424,grad_norm: 0.9999995604924318, iteration: 9068
loss: 1.028356909751892,grad_norm: 0.9999991788103928, iteration: 9069
loss: 1.1105843782424927,grad_norm: 0.999999452650129, iteration: 9070
loss: 1.0607306957244873,grad_norm: 0.9999994426166181, iteration: 9071
loss: 1.0504862070083618,grad_norm: 0.9999997592680059, iteration: 9072
loss: 1.0750114917755127,grad_norm: 0.9999993552820291, iteration: 9073
loss: 1.0749359130859375,grad_norm: 0.9999995914819426, iteration: 9074
loss: 1.050917387008667,grad_norm: 0.9999996919397448, iteration: 9075
loss: 1.0578454732894897,grad_norm: 0.9999996122983591, iteration: 9076
loss: 1.0602439641952515,grad_norm: 0.9999996321172622, iteration: 9077
loss: 1.0742018222808838,grad_norm: 0.9999992812468205, iteration: 9078
loss: 1.1456997394561768,grad_norm: 0.9999997476907034, iteration: 9079
loss: 1.0282394886016846,grad_norm: 0.9999996276435559, iteration: 9080
loss: 1.0529004335403442,grad_norm: 0.9999994283504099, iteration: 9081
loss: 1.0589519739151,grad_norm: 0.999999482852577, iteration: 9082
loss: 1.078262209892273,grad_norm: 0.999999455809109, iteration: 9083
loss: 1.0497713088989258,grad_norm: 0.9999992355136061, iteration: 9084
loss: 1.061424970626831,grad_norm: 0.9999997256713383, iteration: 9085
loss: 1.1014986038208008,grad_norm: 0.9999997617243785, iteration: 9086
loss: 1.051619529724121,grad_norm: 0.9999995895132178, iteration: 9087
loss: 1.0508732795715332,grad_norm: 0.9999994851234418, iteration: 9088
loss: 1.1017521619796753,grad_norm: 0.9999994570375781, iteration: 9089
loss: 1.0909627676010132,grad_norm: 0.9999995902948724, iteration: 9090
loss: 1.0155775547027588,grad_norm: 0.9999994365651859, iteration: 9091
loss: 1.0860044956207275,grad_norm: 0.9999997031037987, iteration: 9092
loss: 0.995954155921936,grad_norm: 0.9999993162586784, iteration: 9093
loss: 1.1024819612503052,grad_norm: 0.9999994196110371, iteration: 9094
loss: 1.0412720441818237,grad_norm: 0.9999997505850169, iteration: 9095
loss: 0.9946444630622864,grad_norm: 0.9999994611910032, iteration: 9096
loss: 0.9744728207588196,grad_norm: 0.9999993860800979, iteration: 9097
loss: 1.1343584060668945,grad_norm: 0.9999996789067322, iteration: 9098
loss: 0.9866359233856201,grad_norm: 0.9999995246262195, iteration: 9099
loss: 1.1270495653152466,grad_norm: 0.9999997827558514, iteration: 9100
loss: 1.053799033164978,grad_norm: 0.999999521522398, iteration: 9101
loss: 1.0193804502487183,grad_norm: 0.9999994527963089, iteration: 9102
loss: 1.081578254699707,grad_norm: 0.9999994545544642, iteration: 9103
loss: 1.0634126663208008,grad_norm: 0.9999994175453919, iteration: 9104
loss: 1.1072523593902588,grad_norm: 0.9999996893283267, iteration: 9105
loss: 1.018499732017517,grad_norm: 0.9999993924867032, iteration: 9106
loss: 1.0940290689468384,grad_norm: 0.9999995728247131, iteration: 9107
loss: 1.0619853734970093,grad_norm: 0.9999994769839912, iteration: 9108
loss: 1.0134793519973755,grad_norm: 0.9999991427595745, iteration: 9109
loss: 1.0396584272384644,grad_norm: 0.9999995653987475, iteration: 9110
loss: 1.0787389278411865,grad_norm: 0.9999994547450689, iteration: 9111
loss: 1.0470812320709229,grad_norm: 0.9999994606105158, iteration: 9112
loss: 1.0319381952285767,grad_norm: 0.9999993309572938, iteration: 9113
loss: 1.081459879875183,grad_norm: 0.9999994872426358, iteration: 9114
loss: 1.0756620168685913,grad_norm: 0.9999993253785768, iteration: 9115
loss: 1.1218962669372559,grad_norm: 0.9999996704904017, iteration: 9116
loss: 1.0829180479049683,grad_norm: 0.9999993973860177, iteration: 9117
loss: 1.0720547437667847,grad_norm: 0.9999995275590903, iteration: 9118
loss: 1.059295654296875,grad_norm: 0.9999993351256068, iteration: 9119
loss: 1.1024740934371948,grad_norm: 0.9999994268044068, iteration: 9120
loss: 1.0530818700790405,grad_norm: 0.9999996650798135, iteration: 9121
loss: 1.0835082530975342,grad_norm: 0.9999996144223311, iteration: 9122
loss: 1.0794267654418945,grad_norm: 0.9999996151644795, iteration: 9123
loss: 1.1009687185287476,grad_norm: 0.9999994101909514, iteration: 9124
loss: 1.0600700378417969,grad_norm: 0.9999995235980144, iteration: 9125
loss: 1.0122487545013428,grad_norm: 0.9999995916422757, iteration: 9126
loss: 1.014947772026062,grad_norm: 0.9999995286687076, iteration: 9127
loss: 1.1421570777893066,grad_norm: 0.9999996332349562, iteration: 9128
loss: 1.0928622484207153,grad_norm: 0.9999996538046283, iteration: 9129
loss: 1.0938321352005005,grad_norm: 0.9999995548590636, iteration: 9130
loss: 1.0693979263305664,grad_norm: 0.999999409630791, iteration: 9131
loss: 1.0366466045379639,grad_norm: 0.9999995689012026, iteration: 9132
loss: 1.0771918296813965,grad_norm: 0.9999994957920639, iteration: 9133
loss: 1.0625542402267456,grad_norm: 0.9999997508612556, iteration: 9134
loss: 1.0602678060531616,grad_norm: 0.9999997198499643, iteration: 9135
loss: 1.0481274127960205,grad_norm: 0.9999995261589849, iteration: 9136
loss: 1.0105737447738647,grad_norm: 0.9999993186497763, iteration: 9137
loss: 1.0808436870574951,grad_norm: 0.9999996357820796, iteration: 9138
loss: 1.0976676940917969,grad_norm: 0.9999996095414836, iteration: 9139
loss: 1.0155012607574463,grad_norm: 0.9999992372719624, iteration: 9140
loss: 1.0223983526229858,grad_norm: 0.9999998063202036, iteration: 9141
loss: 1.0554563999176025,grad_norm: 0.9999994001975806, iteration: 9142
loss: 1.0069339275360107,grad_norm: 0.999999796731758, iteration: 9143
loss: 1.0110706090927124,grad_norm: 0.9999996289832704, iteration: 9144
loss: 1.0437984466552734,grad_norm: 0.9999992475972383, iteration: 9145
loss: 1.115409255027771,grad_norm: 0.9999995472540637, iteration: 9146
loss: 1.0515333414077759,grad_norm: 0.9999995446559364, iteration: 9147
loss: 1.0429245233535767,grad_norm: 0.9999997871010534, iteration: 9148
loss: 1.0651726722717285,grad_norm: 0.9999991873630585, iteration: 9149
loss: 1.1058498620986938,grad_norm: 0.9999996579806888, iteration: 9150
loss: 1.1316485404968262,grad_norm: 0.9999997045375522, iteration: 9151
loss: 1.0876176357269287,grad_norm: 0.9999993760896831, iteration: 9152
loss: 1.0533685684204102,grad_norm: 0.9999994806690847, iteration: 9153
loss: 0.9967947602272034,grad_norm: 0.9999996256967698, iteration: 9154
loss: 1.0559756755828857,grad_norm: 0.9999993547288443, iteration: 9155
loss: 1.1096301078796387,grad_norm: 0.9999997882717132, iteration: 9156
loss: 1.082241177558899,grad_norm: 0.9999998189188918, iteration: 9157
loss: 1.028937578201294,grad_norm: 0.9999997408967902, iteration: 9158
loss: 1.0507011413574219,grad_norm: 0.9999993665330877, iteration: 9159
loss: 1.0818053483963013,grad_norm: 0.9999998501389945, iteration: 9160
loss: 1.1159170866012573,grad_norm: 0.9999997586912116, iteration: 9161
loss: 1.0564888715744019,grad_norm: 0.9999993126240967, iteration: 9162
loss: 1.0428485870361328,grad_norm: 0.9999993772397201, iteration: 9163
loss: 1.0557550191879272,grad_norm: 0.9999994812435128, iteration: 9164
loss: 1.078241229057312,grad_norm: 0.999999614124125, iteration: 9165
loss: 1.0717417001724243,grad_norm: 0.9999996839287744, iteration: 9166
loss: 1.0596035718917847,grad_norm: 0.9999995339278008, iteration: 9167
loss: 1.0859577655792236,grad_norm: 0.999999643505073, iteration: 9168
loss: 1.0448673963546753,grad_norm: 0.9999997411876297, iteration: 9169
loss: 1.0806150436401367,grad_norm: 0.9999998001951022, iteration: 9170
loss: 1.0304932594299316,grad_norm: 0.9999995259746013, iteration: 9171
loss: 1.100234866142273,grad_norm: 0.9999997825864502, iteration: 9172
loss: 1.0959089994430542,grad_norm: 0.999999449061356, iteration: 9173
loss: 1.0302599668502808,grad_norm: 0.9999995913174342, iteration: 9174
loss: 1.081807255744934,grad_norm: 0.9999997159870787, iteration: 9175
loss: 1.0567889213562012,grad_norm: 0.9999994788032163, iteration: 9176
loss: 1.0335516929626465,grad_norm: 0.9999995654677717, iteration: 9177
loss: 1.0834438800811768,grad_norm: 0.999999462506476, iteration: 9178
loss: 1.0504028797149658,grad_norm: 0.99999945772207, iteration: 9179
loss: 1.0313546657562256,grad_norm: 0.999999234262398, iteration: 9180
loss: 1.1066533327102661,grad_norm: 0.9999995898795246, iteration: 9181
loss: 1.0592676401138306,grad_norm: 0.9999994533852273, iteration: 9182
loss: 1.0087295770645142,grad_norm: 0.9999992813811628, iteration: 9183
loss: 1.0289808511734009,grad_norm: 0.9999995902186244, iteration: 9184
loss: 1.0985438823699951,grad_norm: 0.9999998355388655, iteration: 9185
loss: 1.1018645763397217,grad_norm: 0.9999995531425725, iteration: 9186
loss: 1.0154930353164673,grad_norm: 0.9999994268734801, iteration: 9187
loss: 1.1157392263412476,grad_norm: 0.9999997117715692, iteration: 9188
loss: 1.0932399034500122,grad_norm: 0.9999997858100984, iteration: 9189
loss: 1.011374831199646,grad_norm: 0.9999993261667296, iteration: 9190
loss: 1.0737847089767456,grad_norm: 0.9999993869542914, iteration: 9191
loss: 1.1051619052886963,grad_norm: 0.9999998902477133, iteration: 9192
loss: 1.0428582429885864,grad_norm: 0.9999996223632708, iteration: 9193
loss: 1.0861808061599731,grad_norm: 0.9999994024251264, iteration: 9194
loss: 0.9941826462745667,grad_norm: 0.9999994071766284, iteration: 9195
loss: 1.047890543937683,grad_norm: 0.9999993155142408, iteration: 9196
loss: 1.0672051906585693,grad_norm: 0.9999993908104261, iteration: 9197
loss: 1.057602047920227,grad_norm: 0.9999993982086206, iteration: 9198
loss: 1.0373544692993164,grad_norm: 0.9999996134519179, iteration: 9199
loss: 1.0899159908294678,grad_norm: 0.9999997365276351, iteration: 9200
loss: 1.0338691473007202,grad_norm: 0.9999995823575342, iteration: 9201
loss: 1.0618267059326172,grad_norm: 0.9999996205717268, iteration: 9202
loss: 1.0600225925445557,grad_norm: 0.9999996983539325, iteration: 9203
loss: 1.0165143013000488,grad_norm: 0.9999996840552013, iteration: 9204
loss: 0.9936229586601257,grad_norm: 0.9999993031287587, iteration: 9205
loss: 1.0346438884735107,grad_norm: 0.9999997816117336, iteration: 9206
loss: 1.0407602787017822,grad_norm: 0.99999947772472, iteration: 9207
loss: 1.0605024099349976,grad_norm: 0.9999992850565417, iteration: 9208
loss: 1.060349702835083,grad_norm: 0.999999401268363, iteration: 9209
loss: 1.0697875022888184,grad_norm: 0.9999995536494888, iteration: 9210
loss: 1.0465809106826782,grad_norm: 0.9999997066580294, iteration: 9211
loss: 1.0256973505020142,grad_norm: 0.9999992929650089, iteration: 9212
loss: 1.002339243888855,grad_norm: 0.999999337110883, iteration: 9213
loss: 1.041959524154663,grad_norm: 0.9999995109571639, iteration: 9214
loss: 1.0129777193069458,grad_norm: 0.999999744766489, iteration: 9215
loss: 1.0648654699325562,grad_norm: 0.9999996591289637, iteration: 9216
loss: 1.0779038667678833,grad_norm: 0.9999996488694021, iteration: 9217
loss: 1.073132872581482,grad_norm: 0.9999993288236767, iteration: 9218
loss: 1.0617873668670654,grad_norm: 0.999999626055485, iteration: 9219
loss: 1.0575629472732544,grad_norm: 0.9999995636785833, iteration: 9220
loss: 1.0953277349472046,grad_norm: 0.9999994455082823, iteration: 9221
loss: 1.0231621265411377,grad_norm: 0.9999996840224942, iteration: 9222
loss: 1.0163335800170898,grad_norm: 0.999999503689169, iteration: 9223
loss: 1.051195502281189,grad_norm: 0.9999994361555595, iteration: 9224
loss: 0.976519763469696,grad_norm: 0.9999993399863847, iteration: 9225
loss: 1.0626050233840942,grad_norm: 0.9999994299513333, iteration: 9226
loss: 0.9969182014465332,grad_norm: 0.999999304369607, iteration: 9227
loss: 1.0312020778656006,grad_norm: 0.9999992638619258, iteration: 9228
loss: 1.0369383096694946,grad_norm: 0.9999993420145887, iteration: 9229
loss: 1.0608693361282349,grad_norm: 0.9999994783681662, iteration: 9230
loss: 1.0452497005462646,grad_norm: 0.9999995364826902, iteration: 9231
loss: 1.1087957620620728,grad_norm: 0.9999996631894614, iteration: 9232
loss: 1.0538266897201538,grad_norm: 0.9999994704363938, iteration: 9233
loss: 1.0919058322906494,grad_norm: 0.9999995557392779, iteration: 9234
loss: 1.0764343738555908,grad_norm: 0.9999993155283342, iteration: 9235
loss: 1.0542125701904297,grad_norm: 0.9999994103803164, iteration: 9236
loss: 1.0716297626495361,grad_norm: 0.999999520977866, iteration: 9237
loss: 1.0719190835952759,grad_norm: 0.9999995502628174, iteration: 9238
loss: 1.0343228578567505,grad_norm: 0.9999995785828892, iteration: 9239
loss: 1.0460022687911987,grad_norm: 0.9999997104945749, iteration: 9240
loss: 1.0970451831817627,grad_norm: 0.9999995861458295, iteration: 9241
loss: 1.065686583518982,grad_norm: 0.9999998681636223, iteration: 9242
loss: 1.0758980512619019,grad_norm: 0.9999994868101163, iteration: 9243
loss: 1.041870355606079,grad_norm: 0.9999996210007864, iteration: 9244
loss: 1.0367403030395508,grad_norm: 0.999999364993706, iteration: 9245
loss: 1.0556182861328125,grad_norm: 0.9999994551500708, iteration: 9246
loss: 1.1440407037734985,grad_norm: 0.9999995991928008, iteration: 9247
loss: 1.1160136461257935,grad_norm: 0.9999997015514103, iteration: 9248
loss: 0.9814314842224121,grad_norm: 0.9999995210960436, iteration: 9249
loss: 1.039385199546814,grad_norm: 0.999999167809808, iteration: 9250
loss: 1.0481609106063843,grad_norm: 0.9999997569049613, iteration: 9251
loss: 1.0429518222808838,grad_norm: 0.9999994770655167, iteration: 9252
loss: 1.1040481328964233,grad_norm: 0.9999992254408647, iteration: 9253
loss: 1.1138746738433838,grad_norm: 0.9999996222158032, iteration: 9254
loss: 1.0572092533111572,grad_norm: 0.99999951490422, iteration: 9255
loss: 1.057678461074829,grad_norm: 0.9999996649550746, iteration: 9256
loss: 1.0320031642913818,grad_norm: 0.9999991575323365, iteration: 9257
loss: 1.0706899166107178,grad_norm: 0.9999994268650464, iteration: 9258
loss: 0.978549063205719,grad_norm: 0.9999992585076383, iteration: 9259
loss: 1.0271261930465698,grad_norm: 0.9999992745254651, iteration: 9260
loss: 1.0442298650741577,grad_norm: 0.9999995192628609, iteration: 9261
loss: 1.1164323091506958,grad_norm: 0.9999996094672426, iteration: 9262
loss: 1.0536963939666748,grad_norm: 0.999999566020372, iteration: 9263
loss: 1.1107897758483887,grad_norm: 0.999999412858268, iteration: 9264
loss: 1.0790483951568604,grad_norm: 0.9999996846565219, iteration: 9265
loss: 1.0932611227035522,grad_norm: 0.9999996316149502, iteration: 9266
loss: 1.0099313259124756,grad_norm: 0.999999468395334, iteration: 9267
loss: 1.1100069284439087,grad_norm: 0.9999998321131074, iteration: 9268
loss: 1.0648612976074219,grad_norm: 0.999999383328052, iteration: 9269
loss: 1.0222221612930298,grad_norm: 0.999999726601441, iteration: 9270
loss: 1.0526342391967773,grad_norm: 0.999999267409218, iteration: 9271
loss: 1.118985891342163,grad_norm: 0.9999995991313572, iteration: 9272
loss: 1.0421239137649536,grad_norm: 0.9999997283398242, iteration: 9273
loss: 1.0652295351028442,grad_norm: 0.9999994002412091, iteration: 9274
loss: 1.096506118774414,grad_norm: 0.9999996071818014, iteration: 9275
loss: 1.0908827781677246,grad_norm: 0.9999997232843162, iteration: 9276
loss: 1.015275001525879,grad_norm: 0.9999997402686736, iteration: 9277
loss: 1.0722603797912598,grad_norm: 0.999999443194212, iteration: 9278
loss: 1.0259742736816406,grad_norm: 0.999999405745164, iteration: 9279
loss: 1.0343568325042725,grad_norm: 0.9999994446549798, iteration: 9280
loss: 1.0242935419082642,grad_norm: 0.9999993834921507, iteration: 9281
loss: 1.1166964769363403,grad_norm: 0.9999996113834977, iteration: 9282
loss: 1.0594207048416138,grad_norm: 0.9999996624387735, iteration: 9283
loss: 1.045022964477539,grad_norm: 0.9999993983672885, iteration: 9284
loss: 1.066300392150879,grad_norm: 0.9999995389588306, iteration: 9285
loss: 1.0371646881103516,grad_norm: 0.9999994951341161, iteration: 9286
loss: 1.0182231664657593,grad_norm: 0.999999376169303, iteration: 9287
loss: 1.0080825090408325,grad_norm: 0.9999995660339324, iteration: 9288
loss: 1.005531907081604,grad_norm: 0.9999994664448713, iteration: 9289
loss: 1.038299798965454,grad_norm: 0.9999995030104603, iteration: 9290
loss: 1.0658762454986572,grad_norm: 0.9999996936367942, iteration: 9291
loss: 1.0526392459869385,grad_norm: 0.9999993893846716, iteration: 9292
loss: 1.0664787292480469,grad_norm: 0.9999996345370893, iteration: 9293
loss: 1.0741714239120483,grad_norm: 0.9999993713858214, iteration: 9294
loss: 1.1123141050338745,grad_norm: 0.9999996913285525, iteration: 9295
loss: 1.0307643413543701,grad_norm: 0.9999993338620097, iteration: 9296
loss: 1.074205756187439,grad_norm: 0.9999997797204638, iteration: 9297
loss: 1.1126830577850342,grad_norm: 0.9999997374541376, iteration: 9298
loss: 1.0639522075653076,grad_norm: 0.999999345179786, iteration: 9299
loss: 1.1081074476242065,grad_norm: 0.9999997916627876, iteration: 9300
loss: 1.0282726287841797,grad_norm: 0.9999994643356578, iteration: 9301
loss: 1.074899435043335,grad_norm: 0.9999996390375181, iteration: 9302
loss: 1.0136334896087646,grad_norm: 0.9999993240367362, iteration: 9303
loss: 1.0857385396957397,grad_norm: 0.9999996303599602, iteration: 9304
loss: 1.0736943483352661,grad_norm: 0.9999995275416027, iteration: 9305
loss: 0.9962119460105896,grad_norm: 0.9999993570296137, iteration: 9306
loss: 1.058261752128601,grad_norm: 0.9999995320822552, iteration: 9307
loss: 1.0916582345962524,grad_norm: 0.999999477284963, iteration: 9308
loss: 1.1432008743286133,grad_norm: 0.9999997369008584, iteration: 9309
loss: 1.0348987579345703,grad_norm: 0.9999993478119609, iteration: 9310
loss: 1.0408743619918823,grad_norm: 0.9999998957371907, iteration: 9311
loss: 1.0043431520462036,grad_norm: 0.9999993892132144, iteration: 9312
loss: 1.0881037712097168,grad_norm: 0.9999995142449657, iteration: 9313
loss: 1.0417379140853882,grad_norm: 0.9999995967126817, iteration: 9314
loss: 1.2151830196380615,grad_norm: 0.9999996516709986, iteration: 9315
loss: 1.0984681844711304,grad_norm: 0.9999995315058787, iteration: 9316
loss: 1.1120550632476807,grad_norm: 0.9999998213184269, iteration: 9317
loss: 1.071812391281128,grad_norm: 0.9999995119570917, iteration: 9318
loss: 1.1006367206573486,grad_norm: 0.999999707262521, iteration: 9319
loss: 1.0103100538253784,grad_norm: 0.999999613290562, iteration: 9320
loss: 1.0365755558013916,grad_norm: 0.9999991850245996, iteration: 9321
loss: 1.0969585180282593,grad_norm: 0.9999997827783472, iteration: 9322
loss: 0.9896135330200195,grad_norm: 0.9999994255843474, iteration: 9323
loss: 1.0630418062210083,grad_norm: 0.9999995805010778, iteration: 9324
loss: 1.0509164333343506,grad_norm: 0.9999998960457704, iteration: 9325
loss: 1.085729956626892,grad_norm: 0.9999998338022648, iteration: 9326
loss: 1.063137173652649,grad_norm: 0.9999997411322196, iteration: 9327
loss: 1.0930472612380981,grad_norm: 0.9999995025179205, iteration: 9328
loss: 1.0551881790161133,grad_norm: 0.9999994059718048, iteration: 9329
loss: 1.085832953453064,grad_norm: 0.9999993743337899, iteration: 9330
loss: 1.036478042602539,grad_norm: 0.9999993356727735, iteration: 9331
loss: 1.0333635807037354,grad_norm: 0.9999992759628922, iteration: 9332
loss: 1.0866447687149048,grad_norm: 0.9999995315430728, iteration: 9333
loss: 1.0580830574035645,grad_norm: 0.9999997241497508, iteration: 9334
loss: 1.0560302734375,grad_norm: 0.9999995369050814, iteration: 9335
loss: 1.009634256362915,grad_norm: 0.9999993158929843, iteration: 9336
loss: 1.0282400846481323,grad_norm: 0.9999991894018738, iteration: 9337
loss: 1.0989919900894165,grad_norm: 0.9999996798189676, iteration: 9338
loss: 1.118984341621399,grad_norm: 0.9999994281633114, iteration: 9339
loss: 1.0775206089019775,grad_norm: 0.9999993564445346, iteration: 9340
loss: 1.0309953689575195,grad_norm: 0.999999389262753, iteration: 9341
loss: 1.04502272605896,grad_norm: 0.9999998259981344, iteration: 9342
loss: 1.0321487188339233,grad_norm: 0.9999993494630555, iteration: 9343
loss: 1.0425227880477905,grad_norm: 0.9999993117319873, iteration: 9344
loss: 1.0218696594238281,grad_norm: 0.9999994888242615, iteration: 9345
loss: 1.0117374658584595,grad_norm: 0.9999993537148764, iteration: 9346
loss: 1.0256084203720093,grad_norm: 0.999999431989511, iteration: 9347
loss: 1.0532152652740479,grad_norm: 0.9999992835497881, iteration: 9348
loss: 1.0321929454803467,grad_norm: 0.9999993517413961, iteration: 9349
loss: 1.0818613767623901,grad_norm: 0.9999995605611209, iteration: 9350
loss: 1.0786833763122559,grad_norm: 0.9999995109645033, iteration: 9351
loss: 1.0380088090896606,grad_norm: 0.9999994314164503, iteration: 9352
loss: 1.0335458517074585,grad_norm: 0.9999994184676406, iteration: 9353
loss: 1.0204005241394043,grad_norm: 0.999999514700736, iteration: 9354
loss: 1.0232089757919312,grad_norm: 0.9999995751142133, iteration: 9355
loss: 1.044981837272644,grad_norm: 0.9999992730031668, iteration: 9356
loss: 1.0327638387680054,grad_norm: 0.9999997967464643, iteration: 9357
loss: 1.100588083267212,grad_norm: 0.9999995423296235, iteration: 9358
loss: 1.0396592617034912,grad_norm: 0.9999993123356445, iteration: 9359
loss: 1.0687413215637207,grad_norm: 0.9999995844254526, iteration: 9360
loss: 1.1456184387207031,grad_norm: 0.9999995752005726, iteration: 9361
loss: 1.0798674821853638,grad_norm: 0.9999994554642276, iteration: 9362
loss: 1.0416916608810425,grad_norm: 0.9999993016762996, iteration: 9363
loss: 1.0292189121246338,grad_norm: 0.9999995176856619, iteration: 9364
loss: 1.0461684465408325,grad_norm: 0.9999993179774387, iteration: 9365
loss: 1.073109745979309,grad_norm: 0.999999374162557, iteration: 9366
loss: 1.051848292350769,grad_norm: 0.9999994662956988, iteration: 9367
loss: 1.0277713537216187,grad_norm: 0.9999994207026279, iteration: 9368
loss: 1.052937626838684,grad_norm: 0.9999996063571798, iteration: 9369
loss: 1.040097951889038,grad_norm: 0.9999995233959936, iteration: 9370
loss: 1.010855793952942,grad_norm: 0.9999994287176692, iteration: 9371
loss: 1.0143617391586304,grad_norm: 0.9999999047505247, iteration: 9372
loss: 1.082287073135376,grad_norm: 0.9999993937776399, iteration: 9373
loss: 1.0917330980300903,grad_norm: 0.9999995478618281, iteration: 9374
loss: 1.1215708255767822,grad_norm: 0.9999995957398439, iteration: 9375
loss: 1.0723748207092285,grad_norm: 0.9999996072969115, iteration: 9376
loss: 1.0186266899108887,grad_norm: 0.9999992600878823, iteration: 9377
loss: 0.9744275808334351,grad_norm: 0.9999993262292135, iteration: 9378
loss: 1.117124080657959,grad_norm: 0.9999997970736945, iteration: 9379
loss: 1.0207223892211914,grad_norm: 0.9999991933043415, iteration: 9380
loss: 1.0574475526809692,grad_norm: 0.9999995414392011, iteration: 9381
loss: 1.0167639255523682,grad_norm: 0.9999993108738037, iteration: 9382
loss: 1.013452410697937,grad_norm: 0.9999991683727202, iteration: 9383
loss: 1.0499169826507568,grad_norm: 0.9999996239891142, iteration: 9384
loss: 1.0921626091003418,grad_norm: 0.9999997456828316, iteration: 9385
loss: 1.0384362936019897,grad_norm: 0.9999993978879798, iteration: 9386
loss: 1.0741331577301025,grad_norm: 0.9999994386335886, iteration: 9387
loss: 1.0077229738235474,grad_norm: 0.9999992166317947, iteration: 9388
loss: 1.0625574588775635,grad_norm: 0.999999516363002, iteration: 9389
loss: 1.0793901681900024,grad_norm: 0.9999994594760365, iteration: 9390
loss: 1.0094141960144043,grad_norm: 0.999999580166923, iteration: 9391
loss: 1.1150763034820557,grad_norm: 0.9999996371772027, iteration: 9392
loss: 1.053658127784729,grad_norm: 0.9999995285604548, iteration: 9393
loss: 1.0813697576522827,grad_norm: 0.9999996009706232, iteration: 9394
loss: 1.1169854402542114,grad_norm: 0.9999995156024637, iteration: 9395
loss: 1.057611346244812,grad_norm: 0.9999993350499609, iteration: 9396
loss: 1.0357640981674194,grad_norm: 0.9999993454723122, iteration: 9397
loss: 1.086449384689331,grad_norm: 0.9999996002147775, iteration: 9398
loss: 1.119080662727356,grad_norm: 0.9999998030451955, iteration: 9399
loss: 1.0811991691589355,grad_norm: 0.9999994214060742, iteration: 9400
loss: 1.0947901010513306,grad_norm: 0.9999997464032648, iteration: 9401
loss: 1.1116397380828857,grad_norm: 0.999999557361951, iteration: 9402
loss: 1.0045806169509888,grad_norm: 0.9999995229761358, iteration: 9403
loss: 1.0628002882003784,grad_norm: 0.9999993399705865, iteration: 9404
loss: 1.0640943050384521,grad_norm: 0.9999994829070464, iteration: 9405
loss: 1.0410979986190796,grad_norm: 0.9999996706919394, iteration: 9406
loss: 0.9795423746109009,grad_norm: 0.9999993891927534, iteration: 9407
loss: 1.1791975498199463,grad_norm: 0.9999998446470421, iteration: 9408
loss: 1.018222451210022,grad_norm: 0.9999995789780225, iteration: 9409
loss: 1.053824782371521,grad_norm: 0.9999994442108014, iteration: 9410
loss: 1.0832974910736084,grad_norm: 0.9999995883378833, iteration: 9411
loss: 1.0540242195129395,grad_norm: 0.9999996541284502, iteration: 9412
loss: 1.0680140256881714,grad_norm: 0.9999994378125816, iteration: 9413
loss: 1.0554319620132446,grad_norm: 0.9999994979846519, iteration: 9414
loss: 1.0267640352249146,grad_norm: 0.9999994397684853, iteration: 9415
loss: 1.1899679899215698,grad_norm: 0.9999999335891192, iteration: 9416
loss: 1.178499460220337,grad_norm: 0.9999998012283793, iteration: 9417
loss: 1.0540227890014648,grad_norm: 0.9999997846045553, iteration: 9418
loss: 1.0876368284225464,grad_norm: 0.9999995301983519, iteration: 9419
loss: 1.0376359224319458,grad_norm: 0.9999995515863574, iteration: 9420
loss: 1.0941709280014038,grad_norm: 0.9999997453406136, iteration: 9421
loss: 1.0686042308807373,grad_norm: 0.9999998596966369, iteration: 9422
loss: 1.2023347616195679,grad_norm: 0.9999999251277936, iteration: 9423
loss: 1.0526288747787476,grad_norm: 0.9999993025480757, iteration: 9424
loss: 1.0328083038330078,grad_norm: 0.9999995750070585, iteration: 9425
loss: 1.0998491048812866,grad_norm: 0.9999999113931458, iteration: 9426
loss: 1.0654715299606323,grad_norm: 0.9999993228885272, iteration: 9427
loss: 1.0734126567840576,grad_norm: 0.9999995507339648, iteration: 9428
loss: 1.1312053203582764,grad_norm: 0.9999997482990984, iteration: 9429
loss: 1.0848920345306396,grad_norm: 0.9999997786350359, iteration: 9430
loss: 1.0811173915863037,grad_norm: 0.9999998579989084, iteration: 9431
loss: 1.0592132806777954,grad_norm: 0.9999995149275388, iteration: 9432
loss: 1.0485329627990723,grad_norm: 0.9999992817705116, iteration: 9433
loss: 1.050390362739563,grad_norm: 0.9999992826578067, iteration: 9434
loss: 1.1074236631393433,grad_norm: 0.9999999236935473, iteration: 9435
loss: 1.069267988204956,grad_norm: 0.9999995764713644, iteration: 9436
loss: 1.0656622648239136,grad_norm: 0.9999997104385536, iteration: 9437
loss: 1.0974891185760498,grad_norm: 0.9999995691182259, iteration: 9438
loss: 1.1277971267700195,grad_norm: 0.9999996896732378, iteration: 9439
loss: 1.0384612083435059,grad_norm: 0.9999992820106958, iteration: 9440
loss: 1.0562883615493774,grad_norm: 0.9999996258932973, iteration: 9441
loss: 1.0589864253997803,grad_norm: 0.9999995728152888, iteration: 9442
loss: 1.0653889179229736,grad_norm: 0.9999994787683615, iteration: 9443
loss: 1.0249649286270142,grad_norm: 0.9999994096670881, iteration: 9444
loss: 1.0112758874893188,grad_norm: 0.9999993202849299, iteration: 9445
loss: 1.071256399154663,grad_norm: 0.9999996204448718, iteration: 9446
loss: 1.0724468231201172,grad_norm: 0.9999996410878192, iteration: 9447
loss: 1.0569684505462646,grad_norm: 0.9999995453121074, iteration: 9448
loss: 1.1180495023727417,grad_norm: 0.9999997730516366, iteration: 9449
loss: 1.1426469087600708,grad_norm: 0.9999997588066888, iteration: 9450
loss: 1.0632902383804321,grad_norm: 0.9999996280597909, iteration: 9451
loss: 1.1003758907318115,grad_norm: 0.9999995967285235, iteration: 9452
loss: 1.131303310394287,grad_norm: 0.9999997192367821, iteration: 9453
loss: 0.9926013350486755,grad_norm: 0.9999992312972672, iteration: 9454
loss: 1.0197967290878296,grad_norm: 0.9999993338422055, iteration: 9455
loss: 0.9904299378395081,grad_norm: 0.9999992857600064, iteration: 9456
loss: 1.1703286170959473,grad_norm: 0.9999995434553252, iteration: 9457
loss: 1.030411958694458,grad_norm: 0.9999996575844755, iteration: 9458
loss: 1.0552445650100708,grad_norm: 0.9999995428779703, iteration: 9459
loss: 1.0906568765640259,grad_norm: 0.9999997171699778, iteration: 9460
loss: 1.0635290145874023,grad_norm: 0.9999995785257236, iteration: 9461
loss: 1.0053993463516235,grad_norm: 0.999999403288782, iteration: 9462
loss: 1.0692235231399536,grad_norm: 0.9999994567202862, iteration: 9463
loss: 1.0087050199508667,grad_norm: 0.9999995021002586, iteration: 9464
loss: 1.092098355293274,grad_norm: 0.9999995673011368, iteration: 9465
loss: 1.087374210357666,grad_norm: 0.9999999306159785, iteration: 9466
loss: 1.0682469606399536,grad_norm: 0.9999996136697302, iteration: 9467
loss: 1.0144680738449097,grad_norm: 0.9999994802462735, iteration: 9468
loss: 1.0869122743606567,grad_norm: 0.999999489465285, iteration: 9469
loss: 1.0331056118011475,grad_norm: 0.9999994266276017, iteration: 9470
loss: 1.0326741933822632,grad_norm: 0.9999997668826476, iteration: 9471
loss: 1.0148630142211914,grad_norm: 0.9999994299304793, iteration: 9472
loss: 1.0351794958114624,grad_norm: 0.9999995506801305, iteration: 9473
loss: 1.0192900896072388,grad_norm: 0.9999993511664618, iteration: 9474
loss: 1.0274772644042969,grad_norm: 0.999999477156351, iteration: 9475
loss: 1.107901692390442,grad_norm: 0.9999993861597457, iteration: 9476
loss: 1.0618687868118286,grad_norm: 0.9999996688749416, iteration: 9477
loss: 0.9916641712188721,grad_norm: 0.9999994308636679, iteration: 9478
loss: 1.0607759952545166,grad_norm: 0.9999996923844924, iteration: 9479
loss: 1.0825127363204956,grad_norm: 0.9999997040938928, iteration: 9480
loss: 1.1151260137557983,grad_norm: 0.9999996911305091, iteration: 9481
loss: 1.1258283853530884,grad_norm: 0.9999991945616974, iteration: 9482
loss: 1.0800191164016724,grad_norm: 0.9999996091444537, iteration: 9483
loss: 1.0011357069015503,grad_norm: 0.9999994538243585, iteration: 9484
loss: 1.0687940120697021,grad_norm: 0.9999994839107783, iteration: 9485
loss: 1.0350680351257324,grad_norm: 0.9999996183058626, iteration: 9486
loss: 1.0572896003723145,grad_norm: 0.9999996323377397, iteration: 9487
loss: 1.0448774099349976,grad_norm: 0.9999995082745508, iteration: 9488
loss: 1.0560557842254639,grad_norm: 0.9999994691593891, iteration: 9489
loss: 1.088149905204773,grad_norm: 0.9999997519282631, iteration: 9490
loss: 1.0536105632781982,grad_norm: 0.9999994150850698, iteration: 9491
loss: 1.0551949739456177,grad_norm: 0.9999997695946332, iteration: 9492
loss: 0.9945862889289856,grad_norm: 0.9999994404733782, iteration: 9493
loss: 1.0546517372131348,grad_norm: 0.999999412773406, iteration: 9494
loss: 1.1118943691253662,grad_norm: 0.9999996421588713, iteration: 9495
loss: 1.0663450956344604,grad_norm: 0.9999992452068187, iteration: 9496
loss: 1.0704272985458374,grad_norm: 0.9999997670293833, iteration: 9497
loss: 1.007806420326233,grad_norm: 0.9999994130053703, iteration: 9498
loss: 1.0439006090164185,grad_norm: 0.9999996251722685, iteration: 9499
loss: 1.062572717666626,grad_norm: 0.9999996528602197, iteration: 9500
loss: 1.1082472801208496,grad_norm: 0.9999997041413171, iteration: 9501
loss: 1.1194356679916382,grad_norm: 0.9999995663416446, iteration: 9502
loss: 1.2073051929473877,grad_norm: 0.9999998356845051, iteration: 9503
loss: 0.9900690913200378,grad_norm: 0.9999993945924642, iteration: 9504
loss: 1.0172884464263916,grad_norm: 0.9999996520517935, iteration: 9505
loss: 0.9753248691558838,grad_norm: 0.9999994989361293, iteration: 9506
loss: 1.0457919836044312,grad_norm: 0.9999996341882144, iteration: 9507
loss: 1.0923874378204346,grad_norm: 0.9999996035127251, iteration: 9508
loss: 1.1034454107284546,grad_norm: 0.9999998500907071, iteration: 9509
loss: 1.0458018779754639,grad_norm: 0.999999703395001, iteration: 9510
loss: 1.0240402221679688,grad_norm: 0.9999993964416567, iteration: 9511
loss: 1.0689477920532227,grad_norm: 0.9999996511661224, iteration: 9512
loss: 1.0704165697097778,grad_norm: 0.9999995883913282, iteration: 9513
loss: 1.121437668800354,grad_norm: 0.9999997260037063, iteration: 9514
loss: 1.1772890090942383,grad_norm: 0.999999764854814, iteration: 9515
loss: 1.0352323055267334,grad_norm: 0.9999996623210562, iteration: 9516
loss: 1.1068707704544067,grad_norm: 0.9999998661172961, iteration: 9517
loss: 1.0213713645935059,grad_norm: 0.999999643317653, iteration: 9518
loss: 1.043771505355835,grad_norm: 0.9999995289259933, iteration: 9519
loss: 0.9919100999832153,grad_norm: 0.9999993894871848, iteration: 9520
loss: 1.077321171760559,grad_norm: 0.9999996789198269, iteration: 9521
loss: 1.027226448059082,grad_norm: 0.9999995787306427, iteration: 9522
loss: 1.1421624422073364,grad_norm: 0.9999997645614715, iteration: 9523
loss: 0.9948024749755859,grad_norm: 0.9999992829124311, iteration: 9524
loss: 1.085279941558838,grad_norm: 0.999999657788947, iteration: 9525
loss: 1.0199363231658936,grad_norm: 0.9999993036560866, iteration: 9526
loss: 1.0727673768997192,grad_norm: 0.9999995601501986, iteration: 9527
loss: 1.1039789915084839,grad_norm: 0.9999998138656312, iteration: 9528
loss: 1.1275051832199097,grad_norm: 0.9999997598265292, iteration: 9529
loss: 1.0880672931671143,grad_norm: 0.9999996700479907, iteration: 9530
loss: 1.1161141395568848,grad_norm: 0.999999530678077, iteration: 9531
loss: 1.1337969303131104,grad_norm: 0.9999997538844057, iteration: 9532
loss: 1.1154921054840088,grad_norm: 0.9999999602942278, iteration: 9533
loss: 1.0402017831802368,grad_norm: 0.9999992425707777, iteration: 9534
loss: 1.0694692134857178,grad_norm: 0.9999997011206085, iteration: 9535
loss: 1.049171805381775,grad_norm: 0.9999993077441698, iteration: 9536
loss: 1.045465111732483,grad_norm: 0.9999995085274005, iteration: 9537
loss: 1.1093904972076416,grad_norm: 0.9999996734295201, iteration: 9538
loss: 1.001909613609314,grad_norm: 0.9999996018036754, iteration: 9539
loss: 1.0981725454330444,grad_norm: 0.9999996562154869, iteration: 9540
loss: 1.0440901517868042,grad_norm: 0.9999995487471852, iteration: 9541
loss: 1.053279995918274,grad_norm: 0.9999992726952752, iteration: 9542
loss: 1.046359896659851,grad_norm: 0.9999995536289693, iteration: 9543
loss: 1.002676010131836,grad_norm: 0.9999993082686417, iteration: 9544
loss: 1.078766942024231,grad_norm: 0.999999688736412, iteration: 9545
loss: 1.0460195541381836,grad_norm: 0.9999992219160863, iteration: 9546
loss: 1.0977946519851685,grad_norm: 0.9999997323318515, iteration: 9547
loss: 1.0246224403381348,grad_norm: 0.9999997246124176, iteration: 9548
loss: 1.02878737449646,grad_norm: 0.9999998307372354, iteration: 9549
loss: 1.0731570720672607,grad_norm: 0.9999994992948802, iteration: 9550
loss: 1.0529481172561646,grad_norm: 0.9999994558987265, iteration: 9551
loss: 1.0436642169952393,grad_norm: 0.9999992623318841, iteration: 9552
loss: 1.0922205448150635,grad_norm: 0.9999993966341191, iteration: 9553
loss: 1.0047600269317627,grad_norm: 0.9999995042138426, iteration: 9554
loss: 1.045843482017517,grad_norm: 0.9999994672229461, iteration: 9555
loss: 1.0056452751159668,grad_norm: 0.999999377650485, iteration: 9556
loss: 1.001452088356018,grad_norm: 0.9999994069220942, iteration: 9557
loss: 1.0844273567199707,grad_norm: 0.9999996105471053, iteration: 9558
loss: 1.049062967300415,grad_norm: 0.9999994733019495, iteration: 9559
loss: 1.0237370729446411,grad_norm: 0.9999994788877783, iteration: 9560
loss: 1.0216755867004395,grad_norm: 0.9999993915996725, iteration: 9561
loss: 1.1362916231155396,grad_norm: 0.9999997193374568, iteration: 9562
loss: 1.050544023513794,grad_norm: 0.9999993929199852, iteration: 9563
loss: 1.0670911073684692,grad_norm: 0.9999997952096698, iteration: 9564
loss: 1.0676236152648926,grad_norm: 0.9999995284782591, iteration: 9565
loss: 1.0351225137710571,grad_norm: 0.99999964748777, iteration: 9566
loss: 1.0628833770751953,grad_norm: 0.9999994115283836, iteration: 9567
loss: 1.0423182249069214,grad_norm: 0.9999992834913571, iteration: 9568
loss: 1.0901163816452026,grad_norm: 0.9999997112928726, iteration: 9569
loss: 1.1023871898651123,grad_norm: 0.9999996675862283, iteration: 9570
loss: 1.0400893688201904,grad_norm: 0.9999998076203043, iteration: 9571
loss: 1.0373225212097168,grad_norm: 0.9999993160837629, iteration: 9572
loss: 1.1022924184799194,grad_norm: 0.999999822177687, iteration: 9573
loss: 1.1186833381652832,grad_norm: 0.9999995640632062, iteration: 9574
loss: 1.0610932111740112,grad_norm: 0.9999995990249397, iteration: 9575
loss: 1.0611340999603271,grad_norm: 0.9999993653924772, iteration: 9576
loss: 1.096962571144104,grad_norm: 0.9999997688230854, iteration: 9577
loss: 1.1003282070159912,grad_norm: 0.9999996586168256, iteration: 9578
loss: 1.0960315465927124,grad_norm: 0.9999998213860225, iteration: 9579
loss: 1.067184567451477,grad_norm: 0.999999709106382, iteration: 9580
loss: 1.016027808189392,grad_norm: 0.9999995268174268, iteration: 9581
loss: 1.0822123289108276,grad_norm: 0.9999995845934945, iteration: 9582
loss: 1.040502667427063,grad_norm: 0.9999994286561366, iteration: 9583
loss: 1.0425585508346558,grad_norm: 0.9999995279652404, iteration: 9584
loss: 0.9797817468643188,grad_norm: 0.9999993700542226, iteration: 9585
loss: 1.1061393022537231,grad_norm: 0.9999996567925908, iteration: 9586
loss: 1.053459882736206,grad_norm: 0.9999994118438958, iteration: 9587
loss: 1.023353099822998,grad_norm: 0.9999993306008098, iteration: 9588
loss: 0.9833571314811707,grad_norm: 0.9999993423257464, iteration: 9589
loss: 1.0684064626693726,grad_norm: 0.9999992710992518, iteration: 9590
loss: 1.0693168640136719,grad_norm: 0.9999996262453396, iteration: 9591
loss: 1.0796537399291992,grad_norm: 0.9999992155529805, iteration: 9592
loss: 1.005302906036377,grad_norm: 0.9999995383127003, iteration: 9593
loss: 1.068593978881836,grad_norm: 0.9999996845035402, iteration: 9594
loss: 1.129331111907959,grad_norm: 0.9999995909784778, iteration: 9595
loss: 1.0645605325698853,grad_norm: 0.9999992529513871, iteration: 9596
loss: 0.9644041657447815,grad_norm: 0.999999225363886, iteration: 9597
loss: 0.9796162843704224,grad_norm: 0.9999993233744923, iteration: 9598
loss: 1.0377124547958374,grad_norm: 0.9999994720710005, iteration: 9599
loss: 1.0315923690795898,grad_norm: 0.9999994131055326, iteration: 9600
loss: 1.0347609519958496,grad_norm: 0.9999997351939826, iteration: 9601
loss: 1.02113938331604,grad_norm: 0.9999992583495214, iteration: 9602
loss: 1.0247161388397217,grad_norm: 0.9999997094382519, iteration: 9603
loss: 1.0299012660980225,grad_norm: 0.9999993089528894, iteration: 9604
loss: 1.106512188911438,grad_norm: 0.9999994055334447, iteration: 9605
loss: 1.0928572416305542,grad_norm: 0.9999994147273357, iteration: 9606
loss: 1.093106985092163,grad_norm: 0.9999994415943503, iteration: 9607
loss: 1.104554533958435,grad_norm: 0.999999492209752, iteration: 9608
loss: 1.1596988439559937,grad_norm: 0.9999997784928795, iteration: 9609
loss: 1.0181784629821777,grad_norm: 0.9999993102653537, iteration: 9610
loss: 1.0683391094207764,grad_norm: 0.9999996730853009, iteration: 9611
loss: 1.010118007659912,grad_norm: 0.999999593161244, iteration: 9612
loss: 1.0515252351760864,grad_norm: 0.9999993425839189, iteration: 9613
loss: 1.0668292045593262,grad_norm: 0.9999995515431402, iteration: 9614
loss: 1.0517709255218506,grad_norm: 0.9999995630122945, iteration: 9615
loss: 1.0704361200332642,grad_norm: 0.9999993311176525, iteration: 9616
loss: 1.0490403175354004,grad_norm: 0.9999993568371317, iteration: 9617
loss: 1.0706589221954346,grad_norm: 0.9999994354927272, iteration: 9618
loss: 1.0358445644378662,grad_norm: 0.9999994535966251, iteration: 9619
loss: 1.1027981042861938,grad_norm: 0.9999993805242479, iteration: 9620
loss: 1.0266873836517334,grad_norm: 0.9999993553427532, iteration: 9621
loss: 1.0126268863677979,grad_norm: 0.9999993359069878, iteration: 9622
loss: 1.0374245643615723,grad_norm: 0.9999992142463744, iteration: 9623
loss: 0.9885531663894653,grad_norm: 0.9999993332825713, iteration: 9624
loss: 1.054606318473816,grad_norm: 0.9999996776604683, iteration: 9625
loss: 0.9999096393585205,grad_norm: 0.9999997424681448, iteration: 9626
loss: 1.0328607559204102,grad_norm: 0.9999991569046063, iteration: 9627
loss: 1.0879993438720703,grad_norm: 0.9999997203115909, iteration: 9628
loss: 1.0917507410049438,grad_norm: 0.9999995665212336, iteration: 9629
loss: 1.1142606735229492,grad_norm: 0.9999997334210343, iteration: 9630
loss: 1.063698410987854,grad_norm: 0.9999995701897556, iteration: 9631
loss: 1.0148719549179077,grad_norm: 0.9999995082704344, iteration: 9632
loss: 1.092623233795166,grad_norm: 0.9999993995543733, iteration: 9633
loss: 1.0867775678634644,grad_norm: 0.9999994966200471, iteration: 9634
loss: 1.0574485063552856,grad_norm: 0.9999992878508407, iteration: 9635
loss: 1.0943207740783691,grad_norm: 0.9999995887331038, iteration: 9636
loss: 1.0436644554138184,grad_norm: 0.9999996130271548, iteration: 9637
loss: 1.1015220880508423,grad_norm: 0.9999997588838969, iteration: 9638
loss: 1.0528768301010132,grad_norm: 0.9999996046443794, iteration: 9639
loss: 1.0335557460784912,grad_norm: 0.9999995872811696, iteration: 9640
loss: 1.0657362937927246,grad_norm: 0.9999994165530025, iteration: 9641
loss: 1.0513979196548462,grad_norm: 0.9999996239985066, iteration: 9642
loss: 1.1086326837539673,grad_norm: 0.9999996297424738, iteration: 9643
loss: 1.0202924013137817,grad_norm: 0.9999993363753895, iteration: 9644
loss: 1.0414764881134033,grad_norm: 0.9999993921043817, iteration: 9645
loss: 1.005370855331421,grad_norm: 0.9999994540036942, iteration: 9646
loss: 1.0879446268081665,grad_norm: 0.9999995693142749, iteration: 9647
loss: 1.0867222547531128,grad_norm: 0.9999994821800834, iteration: 9648
loss: 1.0352232456207275,grad_norm: 0.9999998634451817, iteration: 9649
loss: 1.0643151998519897,grad_norm: 0.9999994881106172, iteration: 9650
loss: 0.9791055917739868,grad_norm: 0.9999993236452531, iteration: 9651
loss: 1.0598218441009521,grad_norm: 0.999999430780953, iteration: 9652
loss: 1.0650194883346558,grad_norm: 0.999999504400651, iteration: 9653
loss: 1.0188300609588623,grad_norm: 0.9999993325560909, iteration: 9654
loss: 1.0063635110855103,grad_norm: 0.9999995994388767, iteration: 9655
loss: 1.049553632736206,grad_norm: 0.9999992892587987, iteration: 9656
loss: 0.9957126975059509,grad_norm: 0.9999992077868538, iteration: 9657
loss: 1.0669751167297363,grad_norm: 0.9999995575894252, iteration: 9658
loss: 1.0564141273498535,grad_norm: 0.9999995721128839, iteration: 9659
loss: 1.004705548286438,grad_norm: 0.9999994141668715, iteration: 9660
loss: 1.0772404670715332,grad_norm: 0.9999996189111189, iteration: 9661
loss: 1.0882482528686523,grad_norm: 0.9999997320598384, iteration: 9662
loss: 1.0768026113510132,grad_norm: 0.9999993631873401, iteration: 9663
loss: 1.0300037860870361,grad_norm: 0.9999994529186396, iteration: 9664
loss: 1.055053949356079,grad_norm: 0.9999994846671264, iteration: 9665
loss: 1.070887804031372,grad_norm: 0.9999992820018513, iteration: 9666
loss: 0.9921888709068298,grad_norm: 0.9999997537352415, iteration: 9667
loss: 0.9938201904296875,grad_norm: 0.9999993929899251, iteration: 9668
loss: 1.0714553594589233,grad_norm: 0.9999995723227466, iteration: 9669
loss: 1.0309494733810425,grad_norm: 0.9999991487527103, iteration: 9670
loss: 1.0757713317871094,grad_norm: 0.999999285264112, iteration: 9671
loss: 1.0654401779174805,grad_norm: 0.999999572548012, iteration: 9672
loss: 1.1043996810913086,grad_norm: 0.9999998659011958, iteration: 9673
loss: 1.0744037628173828,grad_norm: 0.999999561072184, iteration: 9674
loss: 1.0925527811050415,grad_norm: 0.9999996439458965, iteration: 9675
loss: 1.1153498888015747,grad_norm: 0.9999995496346465, iteration: 9676
loss: 1.0517653226852417,grad_norm: 0.9999995026061617, iteration: 9677
loss: 1.0376683473587036,grad_norm: 0.9999995641663549, iteration: 9678
loss: 1.0348031520843506,grad_norm: 0.9999993724471372, iteration: 9679
loss: 1.103768229484558,grad_norm: 0.9999995345835534, iteration: 9680
loss: 1.0185630321502686,grad_norm: 0.9999995723623415, iteration: 9681
loss: 1.0087624788284302,grad_norm: 0.9999992284766028, iteration: 9682
loss: 1.094534158706665,grad_norm: 0.999999662402393, iteration: 9683
loss: 1.0549968481063843,grad_norm: 0.999999213037749, iteration: 9684
loss: 1.044156789779663,grad_norm: 0.9999995958793969, iteration: 9685
loss: 1.0647220611572266,grad_norm: 0.9999992821941953, iteration: 9686
loss: 1.1082326173782349,grad_norm: 0.9999997202895062, iteration: 9687
loss: 1.0434017181396484,grad_norm: 0.9999994201039463, iteration: 9688
loss: 1.0772204399108887,grad_norm: 0.9999995607060538, iteration: 9689
loss: 1.063708782196045,grad_norm: 0.9999993044039552, iteration: 9690
loss: 1.043044090270996,grad_norm: 0.9999994366832956, iteration: 9691
loss: 1.0291128158569336,grad_norm: 0.9999991860971993, iteration: 9692
loss: 1.031821846961975,grad_norm: 0.999999308944139, iteration: 9693
loss: 0.9990153312683105,grad_norm: 0.999999329531563, iteration: 9694
loss: 1.0515316724777222,grad_norm: 0.9999995156984645, iteration: 9695
loss: 1.0776327848434448,grad_norm: 0.9999996416115299, iteration: 9696
loss: 1.0813339948654175,grad_norm: 0.9999992755424006, iteration: 9697
loss: 1.019238829612732,grad_norm: 0.9999995722567123, iteration: 9698
loss: 1.061022162437439,grad_norm: 0.9999993661820716, iteration: 9699
loss: 1.0212665796279907,grad_norm: 0.9999992297155492, iteration: 9700
loss: 1.0715886354446411,grad_norm: 0.9999993862628358, iteration: 9701
loss: 1.0239198207855225,grad_norm: 0.999999234604423, iteration: 9702
loss: 1.0410712957382202,grad_norm: 0.9999993837675757, iteration: 9703
loss: 1.0796548128128052,grad_norm: 0.9999995770937596, iteration: 9704
loss: 1.0594795942306519,grad_norm: 0.9999992674683948, iteration: 9705
loss: 1.0259898900985718,grad_norm: 0.999999220991529, iteration: 9706
loss: 1.0577467679977417,grad_norm: 0.9999992414873715, iteration: 9707
loss: 1.0168347358703613,grad_norm: 0.999999449663548, iteration: 9708
loss: 1.0745950937271118,grad_norm: 0.9999997597349097, iteration: 9709
loss: 1.1142380237579346,grad_norm: 0.999999856389004, iteration: 9710
loss: 1.0969889163970947,grad_norm: 0.9999997629085804, iteration: 9711
loss: 1.0686284303665161,grad_norm: 0.9999993835823057, iteration: 9712
loss: 1.0946959257125854,grad_norm: 0.9999995109860725, iteration: 9713
loss: 1.0771183967590332,grad_norm: 0.9999992915120846, iteration: 9714
loss: 1.0690351724624634,grad_norm: 0.9999995454761977, iteration: 9715
loss: 1.0052821636199951,grad_norm: 0.9999992976332184, iteration: 9716
loss: 1.021102786064148,grad_norm: 0.9999995611353849, iteration: 9717
loss: 1.0350178480148315,grad_norm: 0.9999994018678422, iteration: 9718
loss: 1.0834378004074097,grad_norm: 0.9999994582856796, iteration: 9719
loss: 1.0203447341918945,grad_norm: 0.9999992650369948, iteration: 9720
loss: 1.0268057584762573,grad_norm: 0.9999995067089035, iteration: 9721
loss: 1.0641638040542603,grad_norm: 0.9999996717074595, iteration: 9722
loss: 1.0304118394851685,grad_norm: 0.9999993276120289, iteration: 9723
loss: 1.0009305477142334,grad_norm: 0.9999993555378128, iteration: 9724
loss: 0.9730128049850464,grad_norm: 0.9999992341406431, iteration: 9725
loss: 1.096558928489685,grad_norm: 0.9999994546095154, iteration: 9726
loss: 1.0048856735229492,grad_norm: 0.9999993462146105, iteration: 9727
loss: 1.060689091682434,grad_norm: 0.9999996312154134, iteration: 9728
loss: 1.0834767818450928,grad_norm: 0.9999996313233468, iteration: 9729
loss: 1.0626931190490723,grad_norm: 0.9999995376740101, iteration: 9730
loss: 1.0789648294448853,grad_norm: 0.9999996122105212, iteration: 9731
loss: 1.0362671613693237,grad_norm: 0.9999992475971204, iteration: 9732
loss: 1.097416877746582,grad_norm: 0.9999995494707914, iteration: 9733
loss: 1.0849312543869019,grad_norm: 0.9999996325862021, iteration: 9734
loss: 1.0359394550323486,grad_norm: 0.9999993954854522, iteration: 9735
loss: 1.0407415628433228,grad_norm: 0.9999998454174752, iteration: 9736
loss: 1.012719988822937,grad_norm: 0.9999994952921195, iteration: 9737
loss: 0.9989380240440369,grad_norm: 0.9999996914596352, iteration: 9738
loss: 1.1097525358200073,grad_norm: 0.9999997643946358, iteration: 9739
loss: 0.9957969188690186,grad_norm: 0.9999994692190856, iteration: 9740
loss: 1.1325507164001465,grad_norm: 0.9999997265821056, iteration: 9741
loss: 1.0441255569458008,grad_norm: 0.9999995753752219, iteration: 9742
loss: 1.0320237874984741,grad_norm: 0.99999938728792, iteration: 9743
loss: 1.0764427185058594,grad_norm: 0.9999995767528208, iteration: 9744
loss: 1.0663987398147583,grad_norm: 0.9999993382539155, iteration: 9745
loss: 1.0684930086135864,grad_norm: 0.9999997517245242, iteration: 9746
loss: 1.1334718465805054,grad_norm: 0.9999998459346102, iteration: 9747
loss: 1.0253217220306396,grad_norm: 0.9999996369435058, iteration: 9748
loss: 1.0169919729232788,grad_norm: 0.999999437118014, iteration: 9749
loss: 1.0046011209487915,grad_norm: 0.9999996478503244, iteration: 9750
loss: 1.0898075103759766,grad_norm: 0.9999998479626889, iteration: 9751
loss: 1.0818833112716675,grad_norm: 0.9999997005032828, iteration: 9752
loss: 1.0382235050201416,grad_norm: 0.9999998222723633, iteration: 9753
loss: 1.115732192993164,grad_norm: 0.9999994861786858, iteration: 9754
loss: 1.0084495544433594,grad_norm: 0.9999993957379676, iteration: 9755
loss: 1.0597871541976929,grad_norm: 0.9999995193050477, iteration: 9756
loss: 1.106916069984436,grad_norm: 0.9999996258244873, iteration: 9757
loss: 1.1656286716461182,grad_norm: 0.9999998193837649, iteration: 9758
loss: 1.1106592416763306,grad_norm: 0.9999995736123198, iteration: 9759
loss: 1.0184857845306396,grad_norm: 0.9999991700374942, iteration: 9760
loss: 1.0664383172988892,grad_norm: 0.9999992695288916, iteration: 9761
loss: 1.0881595611572266,grad_norm: 0.9999997307254036, iteration: 9762
loss: 1.0413048267364502,grad_norm: 0.9999992845105224, iteration: 9763
loss: 1.0500006675720215,grad_norm: 0.9999992800659684, iteration: 9764
loss: 1.0611079931259155,grad_norm: 0.9999994163308338, iteration: 9765
loss: 1.0394781827926636,grad_norm: 0.9999991952729864, iteration: 9766
loss: 1.0031872987747192,grad_norm: 0.9999993464442221, iteration: 9767
loss: 1.0531435012817383,grad_norm: 0.9999994171970031, iteration: 9768
loss: 1.0583480596542358,grad_norm: 0.9999993689348997, iteration: 9769
loss: 1.1089439392089844,grad_norm: 0.9999995358169442, iteration: 9770
loss: 1.1293476819992065,grad_norm: 0.9999994771907041, iteration: 9771
loss: 1.0928659439086914,grad_norm: 0.9999995524712478, iteration: 9772
loss: 1.0319275856018066,grad_norm: 0.9999991902851784, iteration: 9773
loss: 0.9887894988059998,grad_norm: 0.9999993936805609, iteration: 9774
loss: 1.0369914770126343,grad_norm: 0.9999997156651363, iteration: 9775
loss: 1.118019700050354,grad_norm: 0.9999997537394945, iteration: 9776
loss: 1.0661499500274658,grad_norm: 0.9999995752835044, iteration: 9777
loss: 1.1086398363113403,grad_norm: 0.9999996058582409, iteration: 9778
loss: 1.0239611864089966,grad_norm: 0.999999609031296, iteration: 9779
loss: 1.0190192461013794,grad_norm: 0.9999993293663858, iteration: 9780
loss: 1.0340702533721924,grad_norm: 0.9999996326533488, iteration: 9781
loss: 0.9979462027549744,grad_norm: 0.9999990572017147, iteration: 9782
loss: 1.0600547790527344,grad_norm: 0.9999993932565617, iteration: 9783
loss: 1.0428130626678467,grad_norm: 0.9999991498508535, iteration: 9784
loss: 1.1079206466674805,grad_norm: 0.9999997061125683, iteration: 9785
loss: 1.026634931564331,grad_norm: 0.9999994731961059, iteration: 9786
loss: 1.0141561031341553,grad_norm: 0.9999991092764877, iteration: 9787
loss: 1.0770397186279297,grad_norm: 0.9999993575287193, iteration: 9788
loss: 1.0067559480667114,grad_norm: 0.9999991142774581, iteration: 9789
loss: 1.0129777193069458,grad_norm: 0.9999997146776278, iteration: 9790
loss: 1.0608885288238525,grad_norm: 0.9999994093743328, iteration: 9791
loss: 1.0204598903656006,grad_norm: 0.9999995172853435, iteration: 9792
loss: 1.001095175743103,grad_norm: 0.9999991730821196, iteration: 9793
loss: 1.0507869720458984,grad_norm: 0.9999994093473983, iteration: 9794
loss: 0.9985048770904541,grad_norm: 0.9999993858607911, iteration: 9795
loss: 1.076327919960022,grad_norm: 0.9999992603439916, iteration: 9796
loss: 1.0394175052642822,grad_norm: 0.9999995403617299, iteration: 9797
loss: 1.0738234519958496,grad_norm: 0.9999997877098161, iteration: 9798
loss: 1.0664727687835693,grad_norm: 0.9999993905726206, iteration: 9799
loss: 1.0398467779159546,grad_norm: 0.9999993303206453, iteration: 9800
loss: 1.0495296716690063,grad_norm: 0.9999991609239708, iteration: 9801
loss: 1.038573145866394,grad_norm: 0.9999992329472017, iteration: 9802
loss: 1.055286169052124,grad_norm: 0.9999995376935052, iteration: 9803
loss: 1.04530668258667,grad_norm: 0.9999995204028547, iteration: 9804
loss: 0.9757171869277954,grad_norm: 0.9999994225037288, iteration: 9805
loss: 1.0210487842559814,grad_norm: 0.999999204123991, iteration: 9806
loss: 1.040008783340454,grad_norm: 0.9999993198974566, iteration: 9807
loss: 1.170348048210144,grad_norm: 0.9999998643021637, iteration: 9808
loss: 1.0288267135620117,grad_norm: 0.9999994864265818, iteration: 9809
loss: 0.989751935005188,grad_norm: 0.9999994418386557, iteration: 9810
loss: 1.0148019790649414,grad_norm: 0.999999232652757, iteration: 9811
loss: 1.0013482570648193,grad_norm: 0.9999992097130432, iteration: 9812
loss: 1.0188210010528564,grad_norm: 0.99999925266431, iteration: 9813
loss: 1.0460312366485596,grad_norm: 0.9999995156725116, iteration: 9814
loss: 1.067962408065796,grad_norm: 0.9999994521077488, iteration: 9815
loss: 1.0210442543029785,grad_norm: 0.999999375066826, iteration: 9816
loss: 1.0227588415145874,grad_norm: 0.9999993619909088, iteration: 9817
loss: 1.0803191661834717,grad_norm: 0.9999995360390163, iteration: 9818
loss: 1.0676934719085693,grad_norm: 0.9999995789888797, iteration: 9819
loss: 0.9721672534942627,grad_norm: 0.9999991947424712, iteration: 9820
loss: 1.0431002378463745,grad_norm: 0.9999992556355511, iteration: 9821
loss: 1.1226550340652466,grad_norm: 0.9999996497684468, iteration: 9822
loss: 0.9901472926139832,grad_norm: 0.9999993032414354, iteration: 9823
loss: 1.0069116353988647,grad_norm: 0.9999993392895101, iteration: 9824
loss: 1.1438326835632324,grad_norm: 0.9999995326132209, iteration: 9825
loss: 1.0709952116012573,grad_norm: 0.999999277243694, iteration: 9826
loss: 1.1154353618621826,grad_norm: 0.9999996124639927, iteration: 9827
loss: 1.038288950920105,grad_norm: 0.9999994044145679, iteration: 9828
loss: 1.072090983390808,grad_norm: 0.999999376438602, iteration: 9829
loss: 1.0625231266021729,grad_norm: 0.999999292764008, iteration: 9830
loss: 1.0698941946029663,grad_norm: 0.9999993022776953, iteration: 9831
loss: 0.9991803169250488,grad_norm: 0.9999992336052902, iteration: 9832
loss: 1.0487427711486816,grad_norm: 0.9999994266811282, iteration: 9833
loss: 1.080349087715149,grad_norm: 0.9999993850372957, iteration: 9834
loss: 1.05739426612854,grad_norm: 0.9999995123493381, iteration: 9835
loss: 1.0183905363082886,grad_norm: 0.9999993696613314, iteration: 9836
loss: 1.021287441253662,grad_norm: 0.9999995329413514, iteration: 9837
loss: 1.048389196395874,grad_norm: 0.9999993118217263, iteration: 9838
loss: 1.0093027353286743,grad_norm: 0.9999993160363583, iteration: 9839
loss: 1.0283280611038208,grad_norm: 0.9999993285304395, iteration: 9840
loss: 1.0907039642333984,grad_norm: 0.9999996524670985, iteration: 9841
loss: 1.069636583328247,grad_norm: 0.9999998011326212, iteration: 9842
loss: 1.055022120475769,grad_norm: 0.9999996867593771, iteration: 9843
loss: 1.0312162637710571,grad_norm: 0.9999993272511851, iteration: 9844
loss: 1.0555522441864014,grad_norm: 0.9999993247242607, iteration: 9845
loss: 1.1159706115722656,grad_norm: 0.9999997259147835, iteration: 9846
loss: 1.063201904296875,grad_norm: 0.9999996329564345, iteration: 9847
loss: 1.0833251476287842,grad_norm: 0.9999992526694287, iteration: 9848
loss: 1.084765076637268,grad_norm: 0.9999993495124994, iteration: 9849
loss: 1.0668506622314453,grad_norm: 0.9999993187176435, iteration: 9850
loss: 1.014045238494873,grad_norm: 0.999999255037619, iteration: 9851
loss: 1.017559289932251,grad_norm: 0.999999157953963, iteration: 9852
loss: 1.0980478525161743,grad_norm: 0.9999994136051561, iteration: 9853
loss: 1.0529272556304932,grad_norm: 0.9999995052219706, iteration: 9854
loss: 1.0200049877166748,grad_norm: 0.9999995800967876, iteration: 9855
loss: 1.098299503326416,grad_norm: 0.9999994823601115, iteration: 9856
loss: 1.0038286447525024,grad_norm: 0.9999994623526496, iteration: 9857
loss: 1.0329487323760986,grad_norm: 0.9999993935868837, iteration: 9858
loss: 1.0067492723464966,grad_norm: 0.9999995920338975, iteration: 9859
loss: 1.0359324216842651,grad_norm: 0.9999992537269246, iteration: 9860
loss: 1.0811959505081177,grad_norm: 0.9999996931395686, iteration: 9861
loss: 1.041561245918274,grad_norm: 0.9999993270881627, iteration: 9862
loss: 1.03574800491333,grad_norm: 0.9999994855720121, iteration: 9863
loss: 1.0559099912643433,grad_norm: 0.9999997750870583, iteration: 9864
loss: 1.1330796480178833,grad_norm: 0.9999998295886606, iteration: 9865
loss: 1.0529754161834717,grad_norm: 0.9999992664402182, iteration: 9866
loss: 1.0798665285110474,grad_norm: 0.9999994924139428, iteration: 9867
loss: 1.0856815576553345,grad_norm: 0.9999995759362277, iteration: 9868
loss: 1.080893874168396,grad_norm: 0.9999995094661092, iteration: 9869
loss: 1.073208212852478,grad_norm: 0.9999994701769973, iteration: 9870
loss: 1.0833210945129395,grad_norm: 0.9999992595546929, iteration: 9871
loss: 1.0255556106567383,grad_norm: 0.9999993881293577, iteration: 9872
loss: 1.0761405229568481,grad_norm: 0.9999994711213673, iteration: 9873
loss: 1.0254461765289307,grad_norm: 0.9999993291789625, iteration: 9874
loss: 1.0300607681274414,grad_norm: 0.9999993229156867, iteration: 9875
loss: 1.0415382385253906,grad_norm: 0.9999993956304187, iteration: 9876
loss: 0.9947481751441956,grad_norm: 0.9999994123576641, iteration: 9877
loss: 0.9802030324935913,grad_norm: 0.999999088819566, iteration: 9878
loss: 1.0681859254837036,grad_norm: 0.999999672605705, iteration: 9879
loss: 1.0746164321899414,grad_norm: 0.9999993196494572, iteration: 9880
loss: 0.9867839813232422,grad_norm: 0.999999186300072, iteration: 9881
loss: 1.039253830909729,grad_norm: 0.9999991620374769, iteration: 9882
loss: 1.1455984115600586,grad_norm: 0.9999996199839901, iteration: 9883
loss: 1.0427523851394653,grad_norm: 0.9999996022276694, iteration: 9884
loss: 1.0549874305725098,grad_norm: 0.9999993221729254, iteration: 9885
loss: 0.989000678062439,grad_norm: 0.9999993777247872, iteration: 9886
loss: 1.0646533966064453,grad_norm: 0.9999995075184294, iteration: 9887
loss: 1.1054425239562988,grad_norm: 0.999999440745143, iteration: 9888
loss: 1.0713517665863037,grad_norm: 0.9999998213315314, iteration: 9889
loss: 1.0221806764602661,grad_norm: 0.9999992753921741, iteration: 9890
loss: 1.0055675506591797,grad_norm: 0.9999993326044474, iteration: 9891
loss: 1.0457931756973267,grad_norm: 0.9999994593245579, iteration: 9892
loss: 1.0306446552276611,grad_norm: 0.9999994411045835, iteration: 9893
loss: 1.0394068956375122,grad_norm: 0.9999992396481099, iteration: 9894
loss: 1.0540820360183716,grad_norm: 0.9999991614823501, iteration: 9895
loss: 1.0785073041915894,grad_norm: 0.9999997514649744, iteration: 9896
loss: 1.02357017993927,grad_norm: 0.9999993723853652, iteration: 9897
loss: 1.1068116426467896,grad_norm: 0.9999995995453288, iteration: 9898
loss: 1.022376298904419,grad_norm: 0.999999785252503, iteration: 9899
loss: 1.0296597480773926,grad_norm: 0.9999993891537092, iteration: 9900
loss: 1.047531008720398,grad_norm: 0.9999997074276145, iteration: 9901
loss: 1.0969581604003906,grad_norm: 0.9999999656386706, iteration: 9902
loss: 1.041378378868103,grad_norm: 0.9999997667114418, iteration: 9903
loss: 1.049810528755188,grad_norm: 0.999999813621736, iteration: 9904
loss: 1.0735993385314941,grad_norm: 0.9999996570200208, iteration: 9905
loss: 0.9929878115653992,grad_norm: 0.9999992392284747, iteration: 9906
loss: 1.1000416278839111,grad_norm: 0.9999996844597381, iteration: 9907
loss: 0.9595555067062378,grad_norm: 0.9999991895492965, iteration: 9908
loss: 1.0646796226501465,grad_norm: 0.9999992579753701, iteration: 9909
loss: 1.0226877927780151,grad_norm: 0.9999994397666382, iteration: 9910
loss: 1.0655624866485596,grad_norm: 0.9999996452230735, iteration: 9911
loss: 1.0514527559280396,grad_norm: 0.9999996107396794, iteration: 9912
loss: 1.025874137878418,grad_norm: 0.9999993734095747, iteration: 9913
loss: 1.0352387428283691,grad_norm: 0.9999994715709386, iteration: 9914
loss: 1.0480402708053589,grad_norm: 0.9999994429285817, iteration: 9915
loss: 1.1277953386306763,grad_norm: 0.9999996703633828, iteration: 9916
loss: 1.135063886642456,grad_norm: 0.9999997191767375, iteration: 9917
loss: 1.0072888135910034,grad_norm: 0.9999993057242073, iteration: 9918
loss: 1.0684148073196411,grad_norm: 0.9999996743280308, iteration: 9919
loss: 1.0266293287277222,grad_norm: 0.9999997409743415, iteration: 9920
loss: 1.032765507698059,grad_norm: 0.9999994885343289, iteration: 9921
loss: 1.0339468717575073,grad_norm: 0.9999996696302278, iteration: 9922
loss: 1.0695751905441284,grad_norm: 0.9999996075069874, iteration: 9923
loss: 1.07057523727417,grad_norm: 0.9999995087871832, iteration: 9924
loss: 1.1151777505874634,grad_norm: 0.9999998207846669, iteration: 9925
loss: 1.056382417678833,grad_norm: 0.9999995192088726, iteration: 9926
loss: 1.09306001663208,grad_norm: 0.9999995370825887, iteration: 9927
loss: 1.0793195962905884,grad_norm: 0.9999996917537253, iteration: 9928
loss: 1.1353213787078857,grad_norm: 0.9999997255094804, iteration: 9929
loss: 1.0434391498565674,grad_norm: 0.9999992285216357, iteration: 9930
loss: 1.1076394319534302,grad_norm: 0.9999997421206622, iteration: 9931
loss: 1.116547703742981,grad_norm: 0.9999996598750548, iteration: 9932
loss: 1.029608130455017,grad_norm: 0.9999993121082731, iteration: 9933
loss: 1.0396548509597778,grad_norm: 0.999999319286258, iteration: 9934
loss: 1.0726580619812012,grad_norm: 0.9999995553511523, iteration: 9935
loss: 1.1073976755142212,grad_norm: 0.9999997834446991, iteration: 9936
loss: 1.067112684249878,grad_norm: 0.9999996426064138, iteration: 9937
loss: 1.0210429430007935,grad_norm: 0.9999992648008915, iteration: 9938
loss: 1.0469539165496826,grad_norm: 0.9999992403041702, iteration: 9939
loss: 1.0746937990188599,grad_norm: 0.9999996005731928, iteration: 9940
loss: 1.0367764234542847,grad_norm: 0.9999995310557087, iteration: 9941
loss: 1.059736967086792,grad_norm: 0.999999670630629, iteration: 9942
loss: 1.0490400791168213,grad_norm: 0.9999991870701493, iteration: 9943
loss: 1.0500153303146362,grad_norm: 0.9999992769324713, iteration: 9944
loss: 1.0915040969848633,grad_norm: 0.9999994882667732, iteration: 9945
loss: 1.0960783958435059,grad_norm: 0.9999996435138392, iteration: 9946
loss: 1.022653579711914,grad_norm: 0.9999993362592089, iteration: 9947
loss: 1.241276741027832,grad_norm: 0.9999999190415096, iteration: 9948
loss: 1.0647149085998535,grad_norm: 0.9999998633815204, iteration: 9949
loss: 1.1073024272918701,grad_norm: 0.9999995426613814, iteration: 9950
loss: 1.1356526613235474,grad_norm: 0.9999997245535187, iteration: 9951
loss: 1.1064869165420532,grad_norm: 0.9999994710130942, iteration: 9952
loss: 1.0666331052780151,grad_norm: 0.9999995445926521, iteration: 9953
loss: 1.0543231964111328,grad_norm: 0.9999991462952964, iteration: 9954
loss: 1.0412647724151611,grad_norm: 0.9999996330182892, iteration: 9955
loss: 1.0836971998214722,grad_norm: 0.9999997863078977, iteration: 9956
loss: 1.0565241575241089,grad_norm: 0.9999995456859161, iteration: 9957
loss: 1.1487385034561157,grad_norm: 0.99999970640899, iteration: 9958
loss: 1.0990428924560547,grad_norm: 0.9999998491069735, iteration: 9959
loss: 1.1226636171340942,grad_norm: 0.9999997711881968, iteration: 9960
loss: 1.0884292125701904,grad_norm: 0.9999998033069702, iteration: 9961
loss: 1.0417678356170654,grad_norm: 0.9999993010581101, iteration: 9962
loss: 1.0668737888336182,grad_norm: 0.9999995293675481, iteration: 9963
loss: 1.0930981636047363,grad_norm: 0.9999998570926418, iteration: 9964
loss: 1.040779948234558,grad_norm: 0.9999996439189599, iteration: 9965
loss: 1.0423672199249268,grad_norm: 0.9999992604877131, iteration: 9966
loss: 1.0020486116409302,grad_norm: 0.9999996604058591, iteration: 9967
loss: 1.0433783531188965,grad_norm: 0.9999993868185242, iteration: 9968
loss: 1.0577239990234375,grad_norm: 0.9999996161155787, iteration: 9969
loss: 1.0521161556243896,grad_norm: 0.999999761036783, iteration: 9970
loss: 1.0091884136199951,grad_norm: 0.9999994078318307, iteration: 9971
loss: 1.103778600692749,grad_norm: 0.9999998613990745, iteration: 9972
loss: 1.0546199083328247,grad_norm: 0.9999995068925807, iteration: 9973
loss: 1.079410195350647,grad_norm: 0.9999997472256538, iteration: 9974
loss: 1.0419540405273438,grad_norm: 0.9999995590776953, iteration: 9975
loss: 1.0384567975997925,grad_norm: 0.999999659316903, iteration: 9976
loss: 1.0947753190994263,grad_norm: 0.9999995775280872, iteration: 9977
loss: 1.022210717201233,grad_norm: 0.9999993856905255, iteration: 9978
loss: 1.1133326292037964,grad_norm: 0.9999997120609325, iteration: 9979
loss: 1.0618332624435425,grad_norm: 0.9999996105946776, iteration: 9980
loss: 1.0408662557601929,grad_norm: 0.9999997382207492, iteration: 9981
loss: 1.10210120677948,grad_norm: 0.9999998328532944, iteration: 9982
loss: 1.1160974502563477,grad_norm: 0.9999998708791199, iteration: 9983
loss: 1.017734169960022,grad_norm: 0.9999994963461889, iteration: 9984
loss: 1.1058865785598755,grad_norm: 0.9999994102864613, iteration: 9985
loss: 0.9626637697219849,grad_norm: 0.9999993485142199, iteration: 9986
loss: 1.010991096496582,grad_norm: 0.9999992965436969, iteration: 9987
loss: 1.0600507259368896,grad_norm: 0.9999992475050071, iteration: 9988
loss: 1.0392849445343018,grad_norm: 0.999999408103704, iteration: 9989
loss: 1.1273010969161987,grad_norm: 0.9999995835982299, iteration: 9990
loss: 1.092803716659546,grad_norm: 0.9999993831640092, iteration: 9991
loss: 1.0566251277923584,grad_norm: 0.9999995447424413, iteration: 9992
loss: 1.0903955698013306,grad_norm: 0.9999996548425821, iteration: 9993
loss: 1.0075483322143555,grad_norm: 0.9999995240661668, iteration: 9994
loss: 1.0537378787994385,grad_norm: 0.9999994778280354, iteration: 9995
loss: 1.0749446153640747,grad_norm: 0.9999995482060092, iteration: 9996
loss: 1.0552308559417725,grad_norm: 0.9999994730952836, iteration: 9997
loss: 1.1113849878311157,grad_norm: 0.999999643027194, iteration: 9998
loss: 1.0062742233276367,grad_norm: 0.9999997607301518, iteration: 9999
loss: 1.1012979745864868,grad_norm: 0.9999996630354568, iteration: 10000
Evaluating at step 10000
{'val': 1.0093021001666784, 'test': 2.7802651332209285}
loss: 1.0195058584213257,grad_norm: 0.9999995712083192, iteration: 10001
loss: 1.0346473455429077,grad_norm: 0.9999996097725972, iteration: 10002
loss: 1.0453053712844849,grad_norm: 0.9999995174304692, iteration: 10003
loss: 1.0383667945861816,grad_norm: 0.9999996222770676, iteration: 10004
loss: 1.0440133810043335,grad_norm: 0.9999994178650213, iteration: 10005
loss: 1.0913081169128418,grad_norm: 0.9999996524401348, iteration: 10006
loss: 1.0897451639175415,grad_norm: 0.9999996248489883, iteration: 10007
loss: 1.0502427816390991,grad_norm: 0.9999994785407054, iteration: 10008
loss: 1.1261146068572998,grad_norm: 0.9999996997398066, iteration: 10009
loss: 1.038894772529602,grad_norm: 0.9999996110738042, iteration: 10010
loss: 1.0804657936096191,grad_norm: 0.9999995784165152, iteration: 10011
loss: 1.021897792816162,grad_norm: 0.9999994189141431, iteration: 10012
loss: 1.0876250267028809,grad_norm: 0.9999996708334429, iteration: 10013
loss: 1.0771523714065552,grad_norm: 0.999999616078347, iteration: 10014
loss: 1.035790205001831,grad_norm: 0.9999993852528054, iteration: 10015
loss: 1.0622702836990356,grad_norm: 0.9999996570742443, iteration: 10016
loss: 1.0397754907608032,grad_norm: 0.9999993470846638, iteration: 10017
loss: 1.0569548606872559,grad_norm: 0.9999996022019457, iteration: 10018
loss: 1.0910378694534302,grad_norm: 0.9999998184886288, iteration: 10019
loss: 1.0096948146820068,grad_norm: 0.9999992468785727, iteration: 10020
loss: 1.0015259981155396,grad_norm: 0.9999993108232361, iteration: 10021
loss: 0.9862750768661499,grad_norm: 0.9999991484676092, iteration: 10022
loss: 1.0358556509017944,grad_norm: 0.9999998050196391, iteration: 10023
loss: 1.0502015352249146,grad_norm: 0.9999993648320732, iteration: 10024
loss: 1.0303294658660889,grad_norm: 0.9999994844921378, iteration: 10025
loss: 1.0541296005249023,grad_norm: 0.9999994599849859, iteration: 10026
loss: 0.9632329344749451,grad_norm: 0.9999992653030824, iteration: 10027
loss: 1.0917164087295532,grad_norm: 0.999999492595164, iteration: 10028
loss: 1.1018025875091553,grad_norm: 0.9999997103853446, iteration: 10029
loss: 1.100527048110962,grad_norm: 0.9999997479020423, iteration: 10030
loss: 1.0394599437713623,grad_norm: 0.9999992879825863, iteration: 10031
loss: 0.9885956645011902,grad_norm: 0.9999994608255937, iteration: 10032
loss: 0.9854525327682495,grad_norm: 0.9999990615226432, iteration: 10033
loss: 1.105122447013855,grad_norm: 0.9999995386433194, iteration: 10034
loss: 1.0661041736602783,grad_norm: 0.9999994816737171, iteration: 10035
loss: 1.1095871925354004,grad_norm: 0.9999997091473489, iteration: 10036
loss: 1.103102207183838,grad_norm: 0.9999996046084894, iteration: 10037
loss: 1.0367646217346191,grad_norm: 0.9999995395415329, iteration: 10038
loss: 1.0912623405456543,grad_norm: 0.9999994893203523, iteration: 10039
loss: 1.0773530006408691,grad_norm: 0.9999995801200484, iteration: 10040
loss: 1.0287624597549438,grad_norm: 0.9999992143508412, iteration: 10041
loss: 1.0729788541793823,grad_norm: 0.9999996699592278, iteration: 10042
loss: 1.0686652660369873,grad_norm: 0.9999996959212851, iteration: 10043
loss: 1.0853675603866577,grad_norm: 0.9999997822335588, iteration: 10044
loss: 1.0393949747085571,grad_norm: 0.9999996095075143, iteration: 10045
loss: 1.0332274436950684,grad_norm: 0.9999993540476362, iteration: 10046
loss: 1.0831071138381958,grad_norm: 0.9999995716276494, iteration: 10047
loss: 1.0804545879364014,grad_norm: 0.9999996522105375, iteration: 10048
loss: 1.1220955848693848,grad_norm: 0.9999995519360613, iteration: 10049
loss: 1.1393970251083374,grad_norm: 0.999999840541431, iteration: 10050
loss: 1.020457148551941,grad_norm: 0.9999993376043844, iteration: 10051
loss: 1.042467474937439,grad_norm: 0.9999994643685346, iteration: 10052
loss: 1.0391044616699219,grad_norm: 0.9999993310971381, iteration: 10053
loss: 1.0787612199783325,grad_norm: 0.9999996428325022, iteration: 10054
loss: 1.0364363193511963,grad_norm: 0.9999992237016115, iteration: 10055
loss: 1.0626031160354614,grad_norm: 0.9999992098712916, iteration: 10056
loss: 1.1806527376174927,grad_norm: 0.9999996350161502, iteration: 10057
loss: 1.0587446689605713,grad_norm: 0.99999960393453, iteration: 10058
loss: 1.0512892007827759,grad_norm: 0.9999992856901545, iteration: 10059
loss: 1.0157731771469116,grad_norm: 0.9999995999586506, iteration: 10060
loss: 1.0101150274276733,grad_norm: 0.9999994117000685, iteration: 10061
loss: 1.0936028957366943,grad_norm: 0.9999995495717515, iteration: 10062
loss: 1.0365629196166992,grad_norm: 0.9999993516677165, iteration: 10063
loss: 1.057174801826477,grad_norm: 0.9999995327716138, iteration: 10064
loss: 1.0201148986816406,grad_norm: 0.9999993632976234, iteration: 10065
loss: 1.0282127857208252,grad_norm: 0.9999997955122947, iteration: 10066
loss: 1.0660781860351562,grad_norm: 0.9999995634701563, iteration: 10067
loss: 0.9983433485031128,grad_norm: 0.999999395894788, iteration: 10068
loss: 1.1197160482406616,grad_norm: 0.9999997212374145, iteration: 10069
loss: 1.0252052545547485,grad_norm: 0.9999996592700122, iteration: 10070
loss: 1.0734001398086548,grad_norm: 0.9999994788333155, iteration: 10071
loss: 1.082999348640442,grad_norm: 0.9999994728610264, iteration: 10072
loss: 1.0536872148513794,grad_norm: 0.999999407775756, iteration: 10073
loss: 1.0764473676681519,grad_norm: 0.9999993014452851, iteration: 10074
loss: 1.064371109008789,grad_norm: 0.9999993841705376, iteration: 10075
loss: 1.024367332458496,grad_norm: 0.9999992741206203, iteration: 10076
loss: 1.1059151887893677,grad_norm: 0.9999997198758989, iteration: 10077
loss: 1.0414801836013794,grad_norm: 0.999999356191638, iteration: 10078
loss: 1.0155764818191528,grad_norm: 0.9999992845760464, iteration: 10079
loss: 1.03451406955719,grad_norm: 0.9999992383894964, iteration: 10080
loss: 1.0228513479232788,grad_norm: 0.9999995431876644, iteration: 10081
loss: 0.9937618374824524,grad_norm: 0.9999993377567142, iteration: 10082
loss: 1.003531575202942,grad_norm: 0.9999994304478235, iteration: 10083
loss: 1.0268722772598267,grad_norm: 0.9999994796188868, iteration: 10084
loss: 1.136626124382019,grad_norm: 0.9999995952758939, iteration: 10085
loss: 1.002241611480713,grad_norm: 0.999999316869197, iteration: 10086
loss: 1.095913052558899,grad_norm: 0.9999994059071454, iteration: 10087
loss: 1.0224697589874268,grad_norm: 0.9999994660747445, iteration: 10088
loss: 1.0407570600509644,grad_norm: 0.9999994296136689, iteration: 10089
loss: 1.0011831521987915,grad_norm: 0.9999994311169176, iteration: 10090
loss: 1.0074690580368042,grad_norm: 0.9999991388257016, iteration: 10091
loss: 1.0221890211105347,grad_norm: 0.9999994429567665, iteration: 10092
loss: 1.0362197160720825,grad_norm: 0.9999992093669205, iteration: 10093
loss: 1.0783722400665283,grad_norm: 0.9999994720098306, iteration: 10094
loss: 1.0100399255752563,grad_norm: 0.9999993085642965, iteration: 10095
loss: 1.0589609146118164,grad_norm: 0.9999995736832901, iteration: 10096
loss: 1.0877115726470947,grad_norm: 0.9999993505218596, iteration: 10097
loss: 1.0868630409240723,grad_norm: 0.9999994975680943, iteration: 10098
loss: 1.0140385627746582,grad_norm: 0.999999139021121, iteration: 10099
loss: 1.02433443069458,grad_norm: 0.9999992714202495, iteration: 10100
loss: 1.0011110305786133,grad_norm: 0.9999993432875122, iteration: 10101
loss: 1.070133924484253,grad_norm: 0.9999994166656122, iteration: 10102
loss: 1.0208543539047241,grad_norm: 0.9999994905107068, iteration: 10103
loss: 1.0341651439666748,grad_norm: 0.9999995249506564, iteration: 10104
loss: 1.0184106826782227,grad_norm: 0.999999271904695, iteration: 10105
loss: 1.025247573852539,grad_norm: 0.9999995326148233, iteration: 10106
loss: 1.0491690635681152,grad_norm: 0.9999995842271202, iteration: 10107
loss: 1.0354365110397339,grad_norm: 0.9999991735082275, iteration: 10108
loss: 1.0182205438613892,grad_norm: 0.9999994197549285, iteration: 10109
loss: 1.0740318298339844,grad_norm: 0.9999998041049476, iteration: 10110
loss: 1.0935924053192139,grad_norm: 0.9999993675073809, iteration: 10111
loss: 1.0774773359298706,grad_norm: 0.9999993407719421, iteration: 10112
loss: 1.016942024230957,grad_norm: 0.9999993613334296, iteration: 10113
loss: 0.997947096824646,grad_norm: 0.9999995100824027, iteration: 10114
loss: 0.9638634920120239,grad_norm: 0.9999991338667106, iteration: 10115
loss: 1.0593085289001465,grad_norm: 0.9999997399715279, iteration: 10116
loss: 1.0532022714614868,grad_norm: 0.99999927969451, iteration: 10117
loss: 1.0232378244400024,grad_norm: 0.9999993355185338, iteration: 10118
loss: 1.0499753952026367,grad_norm: 0.9999995070005616, iteration: 10119
loss: 1.0259249210357666,grad_norm: 0.9999996489170121, iteration: 10120
loss: 1.1092497110366821,grad_norm: 0.9999998267219103, iteration: 10121
loss: 1.1282131671905518,grad_norm: 0.9999996498961714, iteration: 10122
loss: 1.0616511106491089,grad_norm: 0.9999996966383976, iteration: 10123
loss: 1.0432889461517334,grad_norm: 0.9999993767305374, iteration: 10124
loss: 1.0604920387268066,grad_norm: 0.9999995935523913, iteration: 10125
loss: 1.0982166528701782,grad_norm: 0.9999993054402511, iteration: 10126
loss: 1.0420554876327515,grad_norm: 0.9999996805275339, iteration: 10127
loss: 1.0688813924789429,grad_norm: 0.9999996478719166, iteration: 10128
loss: 1.0622575283050537,grad_norm: 0.9999995381671904, iteration: 10129
loss: 1.0573033094406128,grad_norm: 0.9999995675818987, iteration: 10130
loss: 1.1103723049163818,grad_norm: 0.9999993159889011, iteration: 10131
loss: 1.0410999059677124,grad_norm: 0.9999993069064798, iteration: 10132
loss: 1.0667357444763184,grad_norm: 0.9999993096396943, iteration: 10133
loss: 1.066489338874817,grad_norm: 0.9999997322067077, iteration: 10134
loss: 1.1744706630706787,grad_norm: 0.9999996693379729, iteration: 10135
loss: 1.0646772384643555,grad_norm: 0.9999995448360273, iteration: 10136
loss: 1.1142886877059937,grad_norm: 0.999999668551929, iteration: 10137
loss: 1.0794765949249268,grad_norm: 0.9999994904199857, iteration: 10138
loss: 1.060407280921936,grad_norm: 0.9999994697330349, iteration: 10139
loss: 1.0602531433105469,grad_norm: 0.9999992919534635, iteration: 10140
loss: 1.060896873474121,grad_norm: 0.9999993401803622, iteration: 10141
loss: 1.0105886459350586,grad_norm: 0.999999235999594, iteration: 10142
loss: 1.031823754310608,grad_norm: 0.9999996863186759, iteration: 10143
loss: 1.0580031871795654,grad_norm: 0.9999994357474049, iteration: 10144
loss: 1.0931432247161865,grad_norm: 0.9999994603939293, iteration: 10145
loss: 1.0655417442321777,grad_norm: 0.9999993131899328, iteration: 10146
loss: 1.0299566984176636,grad_norm: 0.9999994400247665, iteration: 10147
loss: 1.010589838027954,grad_norm: 0.9999996433956466, iteration: 10148
loss: 1.0158107280731201,grad_norm: 0.999999374995631, iteration: 10149
loss: 1.0412404537200928,grad_norm: 0.99999960808386, iteration: 10150
loss: 1.0371854305267334,grad_norm: 0.9999994352291418, iteration: 10151
loss: 1.0663483142852783,grad_norm: 0.9999993950314845, iteration: 10152
loss: 0.9815137982368469,grad_norm: 0.9999993409599425, iteration: 10153
loss: 1.0722217559814453,grad_norm: 0.9999994816056742, iteration: 10154
loss: 1.0574969053268433,grad_norm: 0.9999994811653594, iteration: 10155
loss: 1.051563024520874,grad_norm: 0.9999996638707771, iteration: 10156
loss: 1.0162830352783203,grad_norm: 0.9999995057149158, iteration: 10157
loss: 1.0379509925842285,grad_norm: 0.9999993751074888, iteration: 10158
loss: 1.0176939964294434,grad_norm: 0.9999992189930192, iteration: 10159
loss: 1.0645852088928223,grad_norm: 0.9999993327597654, iteration: 10160
loss: 1.0411285161972046,grad_norm: 0.9999992738857827, iteration: 10161
loss: 1.1053005456924438,grad_norm: 0.9999996653078012, iteration: 10162
loss: 1.0468714237213135,grad_norm: 0.9999995855548445, iteration: 10163
loss: 1.0665302276611328,grad_norm: 0.9999995417005114, iteration: 10164
loss: 0.9983627796173096,grad_norm: 0.9999993391797324, iteration: 10165
loss: 1.0709069967269897,grad_norm: 0.9999993080220956, iteration: 10166
loss: 1.0726772546768188,grad_norm: 0.9999992910809419, iteration: 10167
loss: 1.120474934577942,grad_norm: 0.9999995894429929, iteration: 10168
loss: 1.0774494409561157,grad_norm: 0.9999994904038233, iteration: 10169
loss: 1.0020434856414795,grad_norm: 0.9999994415077317, iteration: 10170
loss: 1.0783413648605347,grad_norm: 0.9999993850635424, iteration: 10171
loss: 1.0516209602355957,grad_norm: 0.9999995111172347, iteration: 10172
loss: 1.0668376684188843,grad_norm: 0.9999994036566793, iteration: 10173
loss: 1.051973581314087,grad_norm: 0.999999187713376, iteration: 10174
loss: 1.0108624696731567,grad_norm: 0.9999992219011553, iteration: 10175
loss: 0.9990810751914978,grad_norm: 0.9999993213103721, iteration: 10176
loss: 1.0543956756591797,grad_norm: 0.9999994997613125, iteration: 10177
loss: 1.0672954320907593,grad_norm: 0.9999995409468742, iteration: 10178
loss: 1.0263299942016602,grad_norm: 0.9999993101327781, iteration: 10179
loss: 1.0651779174804688,grad_norm: 0.9999993612864091, iteration: 10180
loss: 1.0684539079666138,grad_norm: 0.999999728003058, iteration: 10181
loss: 1.036095380783081,grad_norm: 0.9999995832661268, iteration: 10182
loss: 1.0802497863769531,grad_norm: 0.9999992356889623, iteration: 10183
loss: 1.013480305671692,grad_norm: 0.9999993859329075, iteration: 10184
loss: 1.0192543268203735,grad_norm: 0.9999993959280432, iteration: 10185
loss: 1.0858039855957031,grad_norm: 0.999999363047411, iteration: 10186
loss: 1.132720708847046,grad_norm: 0.9999999120853463, iteration: 10187
loss: 1.020633339881897,grad_norm: 0.9999997469715857, iteration: 10188
loss: 1.0707100629806519,grad_norm: 0.9999997123918921, iteration: 10189
loss: 1.051439881324768,grad_norm: 0.999999383103674, iteration: 10190
loss: 1.0359941720962524,grad_norm: 0.999999146643357, iteration: 10191
loss: 1.0409220457077026,grad_norm: 0.9999992906593459, iteration: 10192
loss: 1.0179096460342407,grad_norm: 0.999999264577222, iteration: 10193
loss: 1.0481246709823608,grad_norm: 0.9999993744205641, iteration: 10194
loss: 1.0543758869171143,grad_norm: 0.9999992359214608, iteration: 10195
loss: 1.0694407224655151,grad_norm: 0.9999998081051289, iteration: 10196
loss: 1.04068922996521,grad_norm: 0.9999992605161023, iteration: 10197
loss: 1.10287344455719,grad_norm: 0.9999994922560953, iteration: 10198
loss: 1.0678352117538452,grad_norm: 0.9999997690767396, iteration: 10199
loss: 1.0303338766098022,grad_norm: 0.9999992816605182, iteration: 10200
loss: 1.0416545867919922,grad_norm: 0.9999992815061212, iteration: 10201
loss: 1.0453633069992065,grad_norm: 0.9999993730052779, iteration: 10202
loss: 1.0956286191940308,grad_norm: 0.9999994335481847, iteration: 10203
loss: 1.1035850048065186,grad_norm: 0.999999721648542, iteration: 10204
loss: 1.0424050092697144,grad_norm: 0.9999995061588066, iteration: 10205
loss: 1.0041470527648926,grad_norm: 0.9999994904787618, iteration: 10206
loss: 1.0155668258666992,grad_norm: 0.9999994052632631, iteration: 10207
loss: 1.0562317371368408,grad_norm: 0.9999998249949498, iteration: 10208
loss: 1.0793042182922363,grad_norm: 0.9999995865318538, iteration: 10209
loss: 1.048402190208435,grad_norm: 0.9999993105322974, iteration: 10210
loss: 1.0662448406219482,grad_norm: 0.9999994204900096, iteration: 10211
loss: 1.027773380279541,grad_norm: 0.9999992601958972, iteration: 10212
loss: 1.1017634868621826,grad_norm: 0.9999995948288342, iteration: 10213
loss: 1.0430599451065063,grad_norm: 0.9999995109595685, iteration: 10214
loss: 1.011289119720459,grad_norm: 0.9999991454610072, iteration: 10215
loss: 1.0462766885757446,grad_norm: 0.999999710976572, iteration: 10216
loss: 1.0141775608062744,grad_norm: 0.999999523443711, iteration: 10217
loss: 1.0088274478912354,grad_norm: 0.9999992421197368, iteration: 10218
loss: 1.0746461153030396,grad_norm: 0.9999996430995333, iteration: 10219
loss: 1.0282866954803467,grad_norm: 0.9999997684545981, iteration: 10220
loss: 1.0846996307373047,grad_norm: 0.9999994690567565, iteration: 10221
loss: 1.0439984798431396,grad_norm: 0.9999995217543141, iteration: 10222
loss: 1.0399516820907593,grad_norm: 0.9999994314724204, iteration: 10223
loss: 1.0141645669937134,grad_norm: 0.9999993838627977, iteration: 10224
loss: 1.0469567775726318,grad_norm: 0.9999993831200276, iteration: 10225
loss: 1.0515966415405273,grad_norm: 0.9999992524038855, iteration: 10226
loss: 1.1285408735275269,grad_norm: 0.9999996697105585, iteration: 10227
loss: 1.0629574060440063,grad_norm: 0.9999991332277309, iteration: 10228
loss: 0.9873909950256348,grad_norm: 0.9999992213874334, iteration: 10229
loss: 1.0643635988235474,grad_norm: 0.9999996021924228, iteration: 10230
loss: 1.0554053783416748,grad_norm: 0.9999991627256091, iteration: 10231
loss: 1.0757285356521606,grad_norm: 0.9999995727256575, iteration: 10232
loss: 1.0167838335037231,grad_norm: 0.9999993431382199, iteration: 10233
loss: 1.0321465730667114,grad_norm: 0.9999991968565513, iteration: 10234
loss: 1.0930238962173462,grad_norm: 0.999999858173581, iteration: 10235
loss: 1.060551404953003,grad_norm: 0.9999992177292184, iteration: 10236
loss: 1.067966103553772,grad_norm: 0.9999993016419325, iteration: 10237
loss: 1.0463892221450806,grad_norm: 0.999999260885774, iteration: 10238
loss: 1.0486944913864136,grad_norm: 0.9999998490000577, iteration: 10239
loss: 0.9995136260986328,grad_norm: 0.9999993336312435, iteration: 10240
loss: 1.0150130987167358,grad_norm: 0.999999164132257, iteration: 10241
loss: 1.0205684900283813,grad_norm: 0.9999992527380149, iteration: 10242
loss: 1.0500969886779785,grad_norm: 0.9999995883057291, iteration: 10243
loss: 1.0517700910568237,grad_norm: 0.9999993558330756, iteration: 10244
loss: 1.0451748371124268,grad_norm: 0.9999996206791892, iteration: 10245
loss: 1.0782036781311035,grad_norm: 0.999999811828726, iteration: 10246
loss: 1.0524258613586426,grad_norm: 0.9999998325674309, iteration: 10247
loss: 1.08229660987854,grad_norm: 0.9999997007442161, iteration: 10248
loss: 1.039542555809021,grad_norm: 0.9999994087561067, iteration: 10249
loss: 1.0609374046325684,grad_norm: 0.9999996336482497, iteration: 10250
loss: 1.0323173999786377,grad_norm: 0.9999993679066811, iteration: 10251
loss: 1.0387156009674072,grad_norm: 0.9999994008421454, iteration: 10252
loss: 1.0377833843231201,grad_norm: 0.9999997546859407, iteration: 10253
loss: 1.0696848630905151,grad_norm: 0.9999993443425925, iteration: 10254
loss: 1.0811278820037842,grad_norm: 0.9999997187973854, iteration: 10255
loss: 1.0756840705871582,grad_norm: 0.9999997832497352, iteration: 10256
loss: 1.0592889785766602,grad_norm: 0.9999998416101938, iteration: 10257
loss: 0.9869178533554077,grad_norm: 0.999999308865261, iteration: 10258
loss: 1.1330188512802124,grad_norm: 0.9999994647059163, iteration: 10259
loss: 0.982365608215332,grad_norm: 0.9999997990670726, iteration: 10260
loss: 1.0491143465042114,grad_norm: 0.9999994955318557, iteration: 10261
loss: 1.0673335790634155,grad_norm: 0.9999997030044762, iteration: 10262
loss: 1.0878854990005493,grad_norm: 0.9999997715975084, iteration: 10263
loss: 1.0537047386169434,grad_norm: 0.9999997872443538, iteration: 10264
loss: 1.0473544597625732,grad_norm: 0.9999995619880351, iteration: 10265
loss: 1.0894341468811035,grad_norm: 0.9999998812481441, iteration: 10266
loss: 1.0519657135009766,grad_norm: 0.9999992840393067, iteration: 10267
loss: 1.0342373847961426,grad_norm: 0.999999261104325, iteration: 10268
loss: 0.9966287016868591,grad_norm: 0.9999994756268855, iteration: 10269
loss: 1.0502766370773315,grad_norm: 0.9999994656045406, iteration: 10270
loss: 1.0248039960861206,grad_norm: 0.9999996218976244, iteration: 10271
loss: 1.101582646369934,grad_norm: 0.9999997830457246, iteration: 10272
loss: 1.032672643661499,grad_norm: 0.9999993807163242, iteration: 10273
loss: 1.07123601436615,grad_norm: 0.9999996242970088, iteration: 10274
loss: 1.028847575187683,grad_norm: 0.9999994075814583, iteration: 10275
loss: 0.9821648597717285,grad_norm: 0.9999991235684267, iteration: 10276
loss: 1.1161432266235352,grad_norm: 0.9999997108220989, iteration: 10277
loss: 1.0088756084442139,grad_norm: 0.9999992559236767, iteration: 10278
loss: 1.0993762016296387,grad_norm: 0.9999996952468659, iteration: 10279
loss: 1.0548547506332397,grad_norm: 0.9999997200912758, iteration: 10280
loss: 1.0579107999801636,grad_norm: 0.9999996560003585, iteration: 10281
loss: 1.0381044149398804,grad_norm: 0.99999927700631, iteration: 10282
loss: 1.0940979719161987,grad_norm: 0.9999997063704209, iteration: 10283
loss: 1.1069673299789429,grad_norm: 0.9999996539264688, iteration: 10284
loss: 1.0348913669586182,grad_norm: 0.9999994670521308, iteration: 10285
loss: 1.0455405712127686,grad_norm: 0.9999995428173196, iteration: 10286
loss: 1.0385099649429321,grad_norm: 0.9999992942882299, iteration: 10287
loss: 1.0365419387817383,grad_norm: 0.9999995135596594, iteration: 10288
loss: 1.0595803260803223,grad_norm: 0.9999995616732964, iteration: 10289
loss: 1.0637871026992798,grad_norm: 0.9999998622271465, iteration: 10290
loss: 1.0405033826828003,grad_norm: 0.9999995461160713, iteration: 10291
loss: 1.073123574256897,grad_norm: 0.9999995411056576, iteration: 10292
loss: 1.0424660444259644,grad_norm: 0.999999394500218, iteration: 10293
loss: 1.0736640691757202,grad_norm: 0.9999995756318836, iteration: 10294
loss: 1.0250725746154785,grad_norm: 0.9999992524000475, iteration: 10295
loss: 1.0494171380996704,grad_norm: 0.9999995052898136, iteration: 10296
loss: 0.9946825504302979,grad_norm: 0.9999992170797706, iteration: 10297
loss: 1.0704618692398071,grad_norm: 0.9999995001883257, iteration: 10298
loss: 1.0345215797424316,grad_norm: 0.9999993192413167, iteration: 10299
loss: 1.0575510263442993,grad_norm: 0.9999991623444341, iteration: 10300
loss: 1.0729085206985474,grad_norm: 0.9999996570203126, iteration: 10301
loss: 1.0965476036071777,grad_norm: 0.9999998919243244, iteration: 10302
loss: 1.035932183265686,grad_norm: 0.999999491803644, iteration: 10303
loss: 1.0214757919311523,grad_norm: 0.9999994387825832, iteration: 10304
loss: 1.0414706468582153,grad_norm: 0.9999993016324431, iteration: 10305
loss: 0.999376654624939,grad_norm: 0.9999995363402634, iteration: 10306
loss: 1.083897590637207,grad_norm: 0.9999993663117628, iteration: 10307
loss: 1.0309644937515259,grad_norm: 0.9999997576270367, iteration: 10308
loss: 1.032525658607483,grad_norm: 0.9999997566319513, iteration: 10309
loss: 1.1286660432815552,grad_norm: 0.9999998456655543, iteration: 10310
loss: 1.0642844438552856,grad_norm: 0.9999994826698024, iteration: 10311
loss: 1.0522302389144897,grad_norm: 0.9999992652885238, iteration: 10312
loss: 1.0443376302719116,grad_norm: 0.9999995643455079, iteration: 10313
loss: 0.9911301136016846,grad_norm: 0.9999993586201739, iteration: 10314
loss: 1.066472053527832,grad_norm: 0.999999175768897, iteration: 10315
loss: 0.9863486289978027,grad_norm: 0.9999994660523747, iteration: 10316
loss: 1.0760000944137573,grad_norm: 0.9999996838094877, iteration: 10317
loss: 1.011718511581421,grad_norm: 0.9999992701074462, iteration: 10318
loss: 1.063093900680542,grad_norm: 0.9999997772998571, iteration: 10319
loss: 1.0376379489898682,grad_norm: 0.9999993969530894, iteration: 10320
loss: 1.0026054382324219,grad_norm: 0.9999995587281769, iteration: 10321
loss: 0.9989083409309387,grad_norm: 0.9999997170336974, iteration: 10322
loss: 0.9742048382759094,grad_norm: 0.999999286320843, iteration: 10323
loss: 1.04475998878479,grad_norm: 0.9999994161127023, iteration: 10324
loss: 1.0641155242919922,grad_norm: 0.999999347407981, iteration: 10325
loss: 1.08013916015625,grad_norm: 0.9999998091790141, iteration: 10326
loss: 1.055709719657898,grad_norm: 0.9999995354762651, iteration: 10327
loss: 1.0891915559768677,grad_norm: 0.9999994778864831, iteration: 10328
loss: 1.0395928621292114,grad_norm: 0.9999996436108012, iteration: 10329
loss: 0.9955498576164246,grad_norm: 0.9999993579050634, iteration: 10330
loss: 1.0600810050964355,grad_norm: 0.9999995946032139, iteration: 10331
loss: 1.0616143941879272,grad_norm: 0.9999993340370867, iteration: 10332
loss: 1.0451246500015259,grad_norm: 0.9999996329415701, iteration: 10333
loss: 1.039635181427002,grad_norm: 0.9999997005276765, iteration: 10334
loss: 1.0478169918060303,grad_norm: 0.9999995436685454, iteration: 10335
loss: 0.9993543028831482,grad_norm: 0.9999994244088509, iteration: 10336
loss: 1.034863829612732,grad_norm: 0.9999992853471811, iteration: 10337
loss: 1.0422227382659912,grad_norm: 0.9999997553439162, iteration: 10338
loss: 1.2059504985809326,grad_norm: 0.9999998932206124, iteration: 10339
loss: 1.0449934005737305,grad_norm: 0.9999993340872092, iteration: 10340
loss: 1.0236942768096924,grad_norm: 0.9999994557348034, iteration: 10341
loss: 1.0291966199874878,grad_norm: 0.9999991129129674, iteration: 10342
loss: 1.1126515865325928,grad_norm: 0.99999966089212, iteration: 10343
loss: 1.074277400970459,grad_norm: 0.9999997123630487, iteration: 10344
loss: 1.0758553743362427,grad_norm: 0.9999997071194101, iteration: 10345
loss: 0.9789654016494751,grad_norm: 0.9999991013136424, iteration: 10346
loss: 1.1058616638183594,grad_norm: 0.9999995403790182, iteration: 10347
loss: 1.002449631690979,grad_norm: 0.9999994395731241, iteration: 10348
loss: 1.0202100276947021,grad_norm: 0.9999993582663634, iteration: 10349
loss: 1.0473581552505493,grad_norm: 0.9999997049966554, iteration: 10350
loss: 1.0246708393096924,grad_norm: 0.9999993173361615, iteration: 10351
loss: 1.00437331199646,grad_norm: 0.9999992848076914, iteration: 10352
loss: 1.0504347085952759,grad_norm: 0.9999992384904144, iteration: 10353
loss: 1.0563305616378784,grad_norm: 0.9999997160729005, iteration: 10354
loss: 0.9987407922744751,grad_norm: 0.9999994207569441, iteration: 10355
loss: 1.0782837867736816,grad_norm: 0.9999994137404113, iteration: 10356
loss: 1.0175623893737793,grad_norm: 0.9999992386912641, iteration: 10357
loss: 1.0177948474884033,grad_norm: 0.999999459059276, iteration: 10358
loss: 1.0504249334335327,grad_norm: 0.999999361798608, iteration: 10359
loss: 1.0124164819717407,grad_norm: 0.9999993336143614, iteration: 10360
loss: 1.0667213201522827,grad_norm: 0.9999995927011752, iteration: 10361
loss: 1.0015584230422974,grad_norm: 0.9999992393477529, iteration: 10362
loss: 1.0569732189178467,grad_norm: 0.9999997685198738, iteration: 10363
loss: 1.121539831161499,grad_norm: 0.9999995042077476, iteration: 10364
loss: 1.0984915494918823,grad_norm: 0.9999997864562695, iteration: 10365
loss: 1.0224710702896118,grad_norm: 0.9999994030689718, iteration: 10366
loss: 1.1725960969924927,grad_norm: 0.9999996943394183, iteration: 10367
loss: 1.121597170829773,grad_norm: 0.9999994826755435, iteration: 10368
loss: 1.0693260431289673,grad_norm: 0.999999776735269, iteration: 10369
loss: 1.000031590461731,grad_norm: 0.9999992473520446, iteration: 10370
loss: 1.0100924968719482,grad_norm: 0.9999994123561315, iteration: 10371
loss: 1.0586172342300415,grad_norm: 0.9999995832874166, iteration: 10372
loss: 1.030609130859375,grad_norm: 0.9999996219990688, iteration: 10373
loss: 1.1238200664520264,grad_norm: 0.9999995799554753, iteration: 10374
loss: 1.0739909410476685,grad_norm: 0.9999997468707558, iteration: 10375
loss: 1.0822457075119019,grad_norm: 0.999999696533438, iteration: 10376
loss: 1.042936086654663,grad_norm: 0.9999990841599696, iteration: 10377
loss: 1.0753554105758667,grad_norm: 0.9999995286907016, iteration: 10378
loss: 1.131304383277893,grad_norm: 0.9999995496408517, iteration: 10379
loss: 1.021831750869751,grad_norm: 0.999999486670466, iteration: 10380
loss: 1.0995420217514038,grad_norm: 0.9999994049116736, iteration: 10381
loss: 1.1049357652664185,grad_norm: 0.9999992604460437, iteration: 10382
loss: 1.0444248914718628,grad_norm: 0.9999996708354748, iteration: 10383
loss: 1.027535319328308,grad_norm: 0.999999478274403, iteration: 10384
loss: 1.060734510421753,grad_norm: 0.9999992913096076, iteration: 10385
loss: 1.1110060214996338,grad_norm: 0.9999994468799204, iteration: 10386
loss: 1.0646799802780151,grad_norm: 0.9999992535406592, iteration: 10387
loss: 1.0426996946334839,grad_norm: 0.9999992617543282, iteration: 10388
loss: 1.0182271003723145,grad_norm: 0.9999994698993383, iteration: 10389
loss: 1.089563012123108,grad_norm: 0.9999993983595226, iteration: 10390
loss: 0.9894999265670776,grad_norm: 0.9999994163786025, iteration: 10391
loss: 1.0391755104064941,grad_norm: 0.9999994782902057, iteration: 10392
loss: 1.0090924501419067,grad_norm: 0.9999992857799099, iteration: 10393
loss: 1.0470877885818481,grad_norm: 0.9999994570873995, iteration: 10394
loss: 1.0515637397766113,grad_norm: 0.9999994728102836, iteration: 10395
loss: 1.0509816408157349,grad_norm: 0.9999994026227876, iteration: 10396
loss: 1.0613270998001099,grad_norm: 0.9999991643831079, iteration: 10397
loss: 1.049317479133606,grad_norm: 0.9999994711656477, iteration: 10398
loss: 1.0329753160476685,grad_norm: 0.9999992200508174, iteration: 10399
loss: 1.0000216960906982,grad_norm: 0.9999995884431656, iteration: 10400
loss: 1.0839736461639404,grad_norm: 0.9999993819757075, iteration: 10401
loss: 1.1211925745010376,grad_norm: 0.9999995767844384, iteration: 10402
loss: 1.03783118724823,grad_norm: 0.9999993983070593, iteration: 10403
loss: 1.088671326637268,grad_norm: 0.9999997166119848, iteration: 10404
loss: 0.9921914339065552,grad_norm: 0.9999994109663937, iteration: 10405
loss: 1.1019954681396484,grad_norm: 0.9999996196018511, iteration: 10406
loss: 1.0101134777069092,grad_norm: 0.9999992831222552, iteration: 10407
loss: 1.0803056955337524,grad_norm: 0.9999998617095347, iteration: 10408
loss: 1.0853908061981201,grad_norm: 0.9999997551259794, iteration: 10409
loss: 1.04287588596344,grad_norm: 0.9999993604904657, iteration: 10410
loss: 1.0654946565628052,grad_norm: 0.9999993447631387, iteration: 10411
loss: 1.056065559387207,grad_norm: 0.9999993152518881, iteration: 10412
loss: 1.0165164470672607,grad_norm: 0.9999993534767243, iteration: 10413
loss: 1.030245065689087,grad_norm: 0.9999994877957559, iteration: 10414
loss: 1.0298835039138794,grad_norm: 0.9999992832246821, iteration: 10415
loss: 1.005046010017395,grad_norm: 0.9999991717725923, iteration: 10416
loss: 1.0834547281265259,grad_norm: 0.9999994645518016, iteration: 10417
loss: 1.0387083292007446,grad_norm: 0.999999293665915, iteration: 10418
loss: 1.0297443866729736,grad_norm: 0.9999993128622923, iteration: 10419
loss: 1.0273075103759766,grad_norm: 0.9999998012204627, iteration: 10420
loss: 1.0232785940170288,grad_norm: 0.9999994342735651, iteration: 10421
loss: 1.0501842498779297,grad_norm: 0.9999994367551264, iteration: 10422
loss: 1.0408018827438354,grad_norm: 0.9999998400850092, iteration: 10423
loss: 1.063909649848938,grad_norm: 0.9999996113197094, iteration: 10424
loss: 1.0622526407241821,grad_norm: 0.9999993328240493, iteration: 10425
loss: 1.0597240924835205,grad_norm: 0.9999993391512214, iteration: 10426
loss: 1.0493345260620117,grad_norm: 0.9999994859657754, iteration: 10427
loss: 1.0564250946044922,grad_norm: 0.9999993890906023, iteration: 10428
loss: 1.0991915464401245,grad_norm: 0.9999992924016348, iteration: 10429
loss: 1.0575157403945923,grad_norm: 0.9999998138964623, iteration: 10430
loss: 1.0362064838409424,grad_norm: 0.9999996735959771, iteration: 10431
loss: 1.0869048833847046,grad_norm: 0.9999994223950532, iteration: 10432
loss: 1.0515313148498535,grad_norm: 0.9999994222089867, iteration: 10433
loss: 1.0724972486495972,grad_norm: 0.9999995260017672, iteration: 10434
loss: 1.0250831842422485,grad_norm: 0.9999992078385186, iteration: 10435
loss: 1.0025149583816528,grad_norm: 0.9999991537586873, iteration: 10436
loss: 1.041584849357605,grad_norm: 0.9999992743422893, iteration: 10437
loss: 1.0874977111816406,grad_norm: 0.9999996058366998, iteration: 10438
loss: 1.095327377319336,grad_norm: 0.9999993362450099, iteration: 10439
loss: 1.0432053804397583,grad_norm: 0.9999995844943111, iteration: 10440
loss: 1.0663293600082397,grad_norm: 0.9999993932817003, iteration: 10441
loss: 1.0607575178146362,grad_norm: 0.9999992696961946, iteration: 10442
loss: 1.027369737625122,grad_norm: 0.999999464739747, iteration: 10443
loss: 1.0168310403823853,grad_norm: 0.999999461219792, iteration: 10444
loss: 1.034432291984558,grad_norm: 0.9999993856898236, iteration: 10445
loss: 1.0692954063415527,grad_norm: 0.9999996717787955, iteration: 10446
loss: 1.064386010169983,grad_norm: 0.9999995652737182, iteration: 10447
loss: 1.0341858863830566,grad_norm: 0.9999995256238683, iteration: 10448
loss: 1.030564546585083,grad_norm: 0.9999991433439948, iteration: 10449
loss: 1.0234036445617676,grad_norm: 0.999999291894227, iteration: 10450
loss: 1.0068153142929077,grad_norm: 0.9999993867824466, iteration: 10451
loss: 1.0832791328430176,grad_norm: 0.9999997884544161, iteration: 10452
loss: 1.065163254737854,grad_norm: 0.9999995019578793, iteration: 10453
loss: 1.0307303667068481,grad_norm: 0.9999992160853285, iteration: 10454
loss: 1.0376454591751099,grad_norm: 0.9999993954426409, iteration: 10455
loss: 1.0548303127288818,grad_norm: 0.9999994487493942, iteration: 10456
loss: 1.088274359703064,grad_norm: 0.99999936352251, iteration: 10457
loss: 1.0287522077560425,grad_norm: 0.9999992371440042, iteration: 10458
loss: 1.0439099073410034,grad_norm: 0.9999992679756865, iteration: 10459
loss: 0.9928266406059265,grad_norm: 0.9999993167962591, iteration: 10460
loss: 1.0416080951690674,grad_norm: 0.9999993817088693, iteration: 10461
loss: 1.0356309413909912,grad_norm: 0.9999998621101635, iteration: 10462
loss: 1.0993974208831787,grad_norm: 0.9999997513618964, iteration: 10463
loss: 1.0315113067626953,grad_norm: 0.9999995416232599, iteration: 10464
loss: 1.0434720516204834,grad_norm: 0.99999969099665, iteration: 10465
loss: 1.0322356224060059,grad_norm: 0.9999992064770366, iteration: 10466
loss: 1.0554332733154297,grad_norm: 0.9999997829407395, iteration: 10467
loss: 1.0355045795440674,grad_norm: 0.9999990981775568, iteration: 10468
loss: 1.0130763053894043,grad_norm: 0.9999995043044677, iteration: 10469
loss: 1.070456624031067,grad_norm: 0.9999995398876357, iteration: 10470
loss: 1.071115255355835,grad_norm: 0.9999995551460226, iteration: 10471
loss: 1.0533660650253296,grad_norm: 0.9999996428726707, iteration: 10472
loss: 0.9930981993675232,grad_norm: 0.9999991614278698, iteration: 10473
loss: 1.0856282711029053,grad_norm: 0.9999995652779805, iteration: 10474
loss: 1.0923686027526855,grad_norm: 0.9999998212431875, iteration: 10475
loss: 1.079611897468567,grad_norm: 0.9999997419765029, iteration: 10476
loss: 1.0539811849594116,grad_norm: 0.9999993248035567, iteration: 10477
loss: 1.0446149110794067,grad_norm: 0.9999997000588396, iteration: 10478
loss: 1.0868886709213257,grad_norm: 0.9999994881163273, iteration: 10479
loss: 0.9951412081718445,grad_norm: 0.9999997895812999, iteration: 10480
loss: 1.1077066659927368,grad_norm: 0.9999997731224046, iteration: 10481
loss: 1.03373384475708,grad_norm: 0.9999993852700721, iteration: 10482
loss: 1.0472464561462402,grad_norm: 0.9999993960179017, iteration: 10483
loss: 0.9969748854637146,grad_norm: 0.9999991639802064, iteration: 10484
loss: 1.0544953346252441,grad_norm: 0.999999759244221, iteration: 10485
loss: 1.0517451763153076,grad_norm: 0.9999993936663235, iteration: 10486
loss: 1.0126458406448364,grad_norm: 0.9999996318354712, iteration: 10487
loss: 1.1081745624542236,grad_norm: 0.9999995545265071, iteration: 10488
loss: 1.0432236194610596,grad_norm: 0.9999993451260547, iteration: 10489
loss: 1.0436803102493286,grad_norm: 0.9999996121098542, iteration: 10490
loss: 1.0739716291427612,grad_norm: 0.999999358743641, iteration: 10491
loss: 1.0154095888137817,grad_norm: 0.9999993431057651, iteration: 10492
loss: 1.0057127475738525,grad_norm: 0.9999995615747788, iteration: 10493
loss: 1.0325093269348145,grad_norm: 0.9999996119276201, iteration: 10494
loss: 1.0536890029907227,grad_norm: 0.9999994574257803, iteration: 10495
loss: 0.9623110890388489,grad_norm: 0.9999993088899326, iteration: 10496
loss: 1.0407791137695312,grad_norm: 0.9999995025649409, iteration: 10497
loss: 1.0304534435272217,grad_norm: 0.9999997522338152, iteration: 10498
loss: 1.0361939668655396,grad_norm: 0.9999998431623776, iteration: 10499
loss: 1.0129157304763794,grad_norm: 0.9999992683155293, iteration: 10500
loss: 1.034197211265564,grad_norm: 0.9999993743704804, iteration: 10501
loss: 1.0441454648971558,grad_norm: 0.9999993257199888, iteration: 10502
loss: 1.0600433349609375,grad_norm: 0.9999991393044616, iteration: 10503
loss: 1.0877598524093628,grad_norm: 0.9999994161836809, iteration: 10504
loss: 1.0680181980133057,grad_norm: 0.9999994965050556, iteration: 10505
loss: 1.079815149307251,grad_norm: 0.9999993421571719, iteration: 10506
loss: 1.062902808189392,grad_norm: 0.999999548198646, iteration: 10507
loss: 1.057258129119873,grad_norm: 0.9999996194256909, iteration: 10508
loss: 1.0300298929214478,grad_norm: 0.9999995082496786, iteration: 10509
loss: 1.098729133605957,grad_norm: 0.9999993871099568, iteration: 10510
loss: 1.1039716005325317,grad_norm: 0.9999997719783911, iteration: 10511
loss: 1.1003563404083252,grad_norm: 0.9999993282779333, iteration: 10512
loss: 1.047265648841858,grad_norm: 0.9999995800909722, iteration: 10513
loss: 1.0728490352630615,grad_norm: 0.9999992356281293, iteration: 10514
loss: 1.007808804512024,grad_norm: 0.9999992636209771, iteration: 10515
loss: 1.1419755220413208,grad_norm: 0.9999996567186388, iteration: 10516
loss: 1.1241340637207031,grad_norm: 0.9999997889102219, iteration: 10517
loss: 1.0086551904678345,grad_norm: 0.9999995231441795, iteration: 10518
loss: 1.0676014423370361,grad_norm: 0.9999996703127326, iteration: 10519
loss: 1.010330319404602,grad_norm: 0.9999993084237467, iteration: 10520
loss: 1.0352857112884521,grad_norm: 0.9999992424945394, iteration: 10521
loss: 1.114626407623291,grad_norm: 0.9999997363267019, iteration: 10522
loss: 1.069140911102295,grad_norm: 0.9999995876736305, iteration: 10523
loss: 1.1454747915267944,grad_norm: 0.9999996457330647, iteration: 10524
loss: 1.062117099761963,grad_norm: 0.9999996354099957, iteration: 10525
loss: 1.0790406465530396,grad_norm: 0.9999993440142808, iteration: 10526
loss: 1.036077857017517,grad_norm: 0.9999991999991927, iteration: 10527
loss: 1.0464363098144531,grad_norm: 0.9999994112863996, iteration: 10528
loss: 0.9972935914993286,grad_norm: 0.9999994007467572, iteration: 10529
loss: 1.04450523853302,grad_norm: 0.9999997726900421, iteration: 10530
loss: 1.0796509981155396,grad_norm: 0.9999994485031157, iteration: 10531
loss: 0.9885252118110657,grad_norm: 0.9999992238017527, iteration: 10532
loss: 1.0016099214553833,grad_norm: 0.9999993400043876, iteration: 10533
loss: 1.0542607307434082,grad_norm: 0.9999996798331214, iteration: 10534
loss: 1.08562171459198,grad_norm: 0.999999679769893, iteration: 10535
loss: 1.0418517589569092,grad_norm: 0.9999992587040958, iteration: 10536
loss: 1.0540895462036133,grad_norm: 0.9999994415117959, iteration: 10537
loss: 1.025099277496338,grad_norm: 0.9999996111823102, iteration: 10538
loss: 1.0591381788253784,grad_norm: 0.9999995900736123, iteration: 10539
loss: 1.0404086112976074,grad_norm: 0.9999992025468536, iteration: 10540
loss: 1.0829248428344727,grad_norm: 0.9999998673089485, iteration: 10541
loss: 1.046769618988037,grad_norm: 0.999999572941066, iteration: 10542
loss: 1.062015175819397,grad_norm: 0.9999995984656967, iteration: 10543
loss: 0.9856612682342529,grad_norm: 0.9999992767756406, iteration: 10544
loss: 1.1345750093460083,grad_norm: 0.9999994792120045, iteration: 10545
loss: 1.0881940126419067,grad_norm: 0.9999996911089062, iteration: 10546
loss: 1.1009961366653442,grad_norm: 0.9999992590287847, iteration: 10547
loss: 1.0975303649902344,grad_norm: 0.9999994166637192, iteration: 10548
loss: 1.0530126094818115,grad_norm: 0.9999998216595442, iteration: 10549
loss: 0.9760966300964355,grad_norm: 0.999999333929774, iteration: 10550
loss: 1.0902338027954102,grad_norm: 0.9999997055801746, iteration: 10551
loss: 1.046646237373352,grad_norm: 0.9999997956269236, iteration: 10552
loss: 1.1206121444702148,grad_norm: 0.999999864179982, iteration: 10553
loss: 1.0327420234680176,grad_norm: 0.999999615198048, iteration: 10554
loss: 0.9752161502838135,grad_norm: 0.9999994046967061, iteration: 10555
loss: 1.0502128601074219,grad_norm: 0.9999992795958489, iteration: 10556
loss: 1.0688682794570923,grad_norm: 0.9999991367393612, iteration: 10557
loss: 1.0750699043273926,grad_norm: 0.9999995778525396, iteration: 10558
loss: 1.0587866306304932,grad_norm: 0.9999995206017568, iteration: 10559
loss: 1.053868293762207,grad_norm: 0.9999993320307892, iteration: 10560
loss: 1.0809338092803955,grad_norm: 0.9999997801139417, iteration: 10561
loss: 1.1279810667037964,grad_norm: 0.9999998216870674, iteration: 10562
loss: 1.088390827178955,grad_norm: 0.9999994373287805, iteration: 10563
loss: 1.0252877473831177,grad_norm: 0.9999994742235461, iteration: 10564
loss: 1.056951880455017,grad_norm: 0.9999994088514561, iteration: 10565
loss: 1.028629183769226,grad_norm: 0.9999993766280763, iteration: 10566
loss: 1.058664321899414,grad_norm: 0.9999994168781318, iteration: 10567
loss: 1.1000268459320068,grad_norm: 0.9999996257756318, iteration: 10568
loss: 1.0518170595169067,grad_norm: 0.9999993328193905, iteration: 10569
loss: 1.0349125862121582,grad_norm: 0.9999994853445516, iteration: 10570
loss: 1.026169776916504,grad_norm: 0.9999991877617554, iteration: 10571
loss: 1.0282591581344604,grad_norm: 0.999999352116934, iteration: 10572
loss: 1.0190339088439941,grad_norm: 0.9999998866674897, iteration: 10573
loss: 1.0851328372955322,grad_norm: 0.9999997516320713, iteration: 10574
loss: 1.03765070438385,grad_norm: 0.9999993573315401, iteration: 10575
loss: 1.0230624675750732,grad_norm: 0.9999995060638944, iteration: 10576
loss: 1.0783836841583252,grad_norm: 0.999999620263215, iteration: 10577
loss: 1.0247288942337036,grad_norm: 0.9999995223918163, iteration: 10578
loss: 1.0482041835784912,grad_norm: 0.9999992915744017, iteration: 10579
loss: 1.0582739114761353,grad_norm: 0.9999994247553282, iteration: 10580
loss: 1.0498547554016113,grad_norm: 0.9999992921354153, iteration: 10581
loss: 1.0974202156066895,grad_norm: 0.9999997288232152, iteration: 10582
loss: 1.0589429140090942,grad_norm: 0.999999828479569, iteration: 10583
loss: 1.0704604387283325,grad_norm: 0.999999792042847, iteration: 10584
loss: 1.0252015590667725,grad_norm: 0.9999992806247952, iteration: 10585
loss: 1.0226792097091675,grad_norm: 0.999999522780629, iteration: 10586
loss: 1.0630775690078735,grad_norm: 0.9999993480263284, iteration: 10587
loss: 1.0488789081573486,grad_norm: 0.9999995428627267, iteration: 10588
loss: 1.044406533241272,grad_norm: 0.9999994106290595, iteration: 10589
loss: 1.0045369863510132,grad_norm: 0.9999991556182097, iteration: 10590
loss: 1.0203311443328857,grad_norm: 0.9999993436041821, iteration: 10591
loss: 1.0527973175048828,grad_norm: 0.9999993955869315, iteration: 10592
loss: 1.106706976890564,grad_norm: 0.9999997208345355, iteration: 10593
loss: 1.057963490486145,grad_norm: 0.9999992206379008, iteration: 10594
loss: 1.0497280359268188,grad_norm: 0.9999992749381861, iteration: 10595
loss: 1.0736275911331177,grad_norm: 0.9999995482778797, iteration: 10596
loss: 1.0637613534927368,grad_norm: 0.999999617222994, iteration: 10597
loss: 1.040619969367981,grad_norm: 0.9999996419305345, iteration: 10598
loss: 1.0410921573638916,grad_norm: 0.9999993732881257, iteration: 10599
loss: 1.030136227607727,grad_norm: 0.9999992542974653, iteration: 10600
loss: 1.047829031944275,grad_norm: 0.9999995114593738, iteration: 10601
loss: 1.0500046014785767,grad_norm: 0.9999994676171668, iteration: 10602
loss: 1.0390089750289917,grad_norm: 0.9999994515160147, iteration: 10603
loss: 1.1412208080291748,grad_norm: 0.9999998535007227, iteration: 10604
loss: 1.025001883506775,grad_norm: 0.9999993086394072, iteration: 10605
loss: 1.0388593673706055,grad_norm: 0.9999991817744859, iteration: 10606
loss: 1.0803486108779907,grad_norm: 0.9999995053495867, iteration: 10607
loss: 1.1377198696136475,grad_norm: 0.9999996152564888, iteration: 10608
loss: 1.0618219375610352,grad_norm: 0.9999993061884177, iteration: 10609
loss: 1.0154519081115723,grad_norm: 0.9999993925265478, iteration: 10610
loss: 1.0252795219421387,grad_norm: 0.9999994895200076, iteration: 10611
loss: 1.0496340990066528,grad_norm: 0.9999995618739628, iteration: 10612
loss: 1.0183695554733276,grad_norm: 0.9999992570182118, iteration: 10613
loss: 1.0456596612930298,grad_norm: 0.9999994732181883, iteration: 10614
loss: 0.9890971779823303,grad_norm: 0.9999994542053129, iteration: 10615
loss: 0.9655647873878479,grad_norm: 0.999999208855292, iteration: 10616
loss: 1.084533452987671,grad_norm: 0.9999993870951636, iteration: 10617
loss: 1.04730224609375,grad_norm: 0.9999993768529313, iteration: 10618
loss: 1.0763853788375854,grad_norm: 0.9999994771393118, iteration: 10619
loss: 1.1147243976593018,grad_norm: 0.9999998155110089, iteration: 10620
loss: 1.074651837348938,grad_norm: 0.9999994359202089, iteration: 10621
loss: 1.0164320468902588,grad_norm: 0.9999993775446453, iteration: 10622
loss: 0.9961393475532532,grad_norm: 0.9999995215322689, iteration: 10623
loss: 1.1137391328811646,grad_norm: 0.9999999360271731, iteration: 10624
loss: 1.0525028705596924,grad_norm: 0.9999998354112463, iteration: 10625
loss: 1.0182580947875977,grad_norm: 0.9999992461061675, iteration: 10626
loss: 1.042959451675415,grad_norm: 0.999999540200162, iteration: 10627
loss: 1.069990873336792,grad_norm: 0.9999994237590785, iteration: 10628
loss: 1.080955982208252,grad_norm: 0.999999339498528, iteration: 10629
loss: 1.0035088062286377,grad_norm: 0.9999993055981555, iteration: 10630
loss: 1.0824108123779297,grad_norm: 0.9999998093748427, iteration: 10631
loss: 1.0123368501663208,grad_norm: 0.9999994847910711, iteration: 10632
loss: 1.0457979440689087,grad_norm: 0.9999992024440578, iteration: 10633
loss: 1.0777113437652588,grad_norm: 0.9999996061935823, iteration: 10634
loss: 1.0492603778839111,grad_norm: 0.9999994025035812, iteration: 10635
loss: 1.0467833280563354,grad_norm: 0.9999996412764708, iteration: 10636
loss: 0.9822478890419006,grad_norm: 0.9999992030831406, iteration: 10637
loss: 1.0832220315933228,grad_norm: 0.9999995779533667, iteration: 10638
loss: 1.1257680654525757,grad_norm: 0.9999995793537072, iteration: 10639
loss: 1.0113309621810913,grad_norm: 0.9999993245624378, iteration: 10640
loss: 1.1194804906845093,grad_norm: 0.9999998920758176, iteration: 10641
loss: 1.040852665901184,grad_norm: 0.999999306454663, iteration: 10642
loss: 1.0868165493011475,grad_norm: 0.9999996439235891, iteration: 10643
loss: 1.1019866466522217,grad_norm: 0.9999994495964077, iteration: 10644
loss: 0.9895704388618469,grad_norm: 0.9999991316820793, iteration: 10645
loss: 1.06211256980896,grad_norm: 0.999999370685966, iteration: 10646
loss: 1.0338822603225708,grad_norm: 0.9999994163156167, iteration: 10647
loss: 0.9824638366699219,grad_norm: 0.9999993398233635, iteration: 10648
loss: 1.0779025554656982,grad_norm: 0.9999995909111289, iteration: 10649
loss: 1.026621699333191,grad_norm: 0.9999993912124908, iteration: 10650
loss: 1.0440303087234497,grad_norm: 0.9999995057506903, iteration: 10651
loss: 1.0814626216888428,grad_norm: 0.9999993524356077, iteration: 10652
loss: 1.0487792491912842,grad_norm: 0.99999959291439, iteration: 10653
loss: 1.1412981748580933,grad_norm: 0.9999998225780391, iteration: 10654
loss: 1.0374644994735718,grad_norm: 0.9999996473051601, iteration: 10655
loss: 1.0779893398284912,grad_norm: 0.999999834979219, iteration: 10656
loss: 1.1087656021118164,grad_norm: 0.9999998678948866, iteration: 10657
loss: 1.1404860019683838,grad_norm: 0.9999997205754666, iteration: 10658
loss: 1.0781909227371216,grad_norm: 0.9999992768136927, iteration: 10659
loss: 1.0461571216583252,grad_norm: 0.9999993382799642, iteration: 10660
loss: 1.0035102367401123,grad_norm: 0.999999857214311, iteration: 10661
loss: 1.1146329641342163,grad_norm: 0.9999998180251713, iteration: 10662
loss: 1.092890977859497,grad_norm: 0.9999995067799134, iteration: 10663
loss: 1.0729495286941528,grad_norm: 0.9999998433670607, iteration: 10664
loss: 0.9976744055747986,grad_norm: 0.9999992934292585, iteration: 10665
loss: 1.0555306673049927,grad_norm: 0.9999995427840976, iteration: 10666
loss: 1.0628783702850342,grad_norm: 0.9999997630286531, iteration: 10667
loss: 1.0436662435531616,grad_norm: 0.999999238776416, iteration: 10668
loss: 1.051706314086914,grad_norm: 0.9999995774085381, iteration: 10669
loss: 1.0264796018600464,grad_norm: 0.9999991709837449, iteration: 10670
loss: 0.9829803705215454,grad_norm: 0.9999996500518229, iteration: 10671
loss: 1.1054892539978027,grad_norm: 0.9999995567714932, iteration: 10672
loss: 1.0444711446762085,grad_norm: 0.9999993142012223, iteration: 10673
loss: 1.0449446439743042,grad_norm: 0.9999993769700279, iteration: 10674
loss: 1.093240737915039,grad_norm: 0.9999996908782046, iteration: 10675
loss: 1.094844937324524,grad_norm: 0.999999859251805, iteration: 10676
loss: 1.0808393955230713,grad_norm: 0.9999991988330769, iteration: 10677
loss: 1.0950931310653687,grad_norm: 0.9999995317187182, iteration: 10678
loss: 0.9825633764266968,grad_norm: 0.9999993634439212, iteration: 10679
loss: 0.996874988079071,grad_norm: 0.999999462575482, iteration: 10680
loss: 1.0176384449005127,grad_norm: 0.9999992322981904, iteration: 10681
loss: 1.021679162979126,grad_norm: 0.9999998480678486, iteration: 10682
loss: 1.0794118642807007,grad_norm: 0.9999993291778523, iteration: 10683
loss: 1.0102506875991821,grad_norm: 0.9999992897324195, iteration: 10684
loss: 1.0221948623657227,grad_norm: 0.9999995849046166, iteration: 10685
loss: 1.0637884140014648,grad_norm: 0.999999627928742, iteration: 10686
loss: 1.0397447347640991,grad_norm: 0.999999641073001, iteration: 10687
loss: 1.0366647243499756,grad_norm: 0.9999996281938903, iteration: 10688
loss: 1.044074535369873,grad_norm: 0.9999994797250419, iteration: 10689
loss: 1.094756007194519,grad_norm: 0.9999993393998855, iteration: 10690
loss: 1.071033000946045,grad_norm: 0.9999996224139177, iteration: 10691
loss: 1.0321711301803589,grad_norm: 0.9999993090345763, iteration: 10692
loss: 1.106721043586731,grad_norm: 0.9999993737998819, iteration: 10693
loss: 1.0612508058547974,grad_norm: 0.9999995200686198, iteration: 10694
loss: 1.089311122894287,grad_norm: 0.9999993668940724, iteration: 10695
loss: 1.038106918334961,grad_norm: 0.9999995507718311, iteration: 10696
loss: 1.0485315322875977,grad_norm: 0.9999993600388544, iteration: 10697
loss: 1.0023757219314575,grad_norm: 0.9999990813032349, iteration: 10698
loss: 1.0091581344604492,grad_norm: 0.9999993853448502, iteration: 10699
loss: 1.0442869663238525,grad_norm: 0.9999993336203844, iteration: 10700
loss: 1.1162837743759155,grad_norm: 0.9999999227425354, iteration: 10701
loss: 1.06417977809906,grad_norm: 0.9999995386937004, iteration: 10702
loss: 1.0082768201828003,grad_norm: 0.999999389193872, iteration: 10703
loss: 1.126397728919983,grad_norm: 0.9999996668812454, iteration: 10704
loss: 1.028385877609253,grad_norm: 0.9999995861780389, iteration: 10705
loss: 1.0387660264968872,grad_norm: 0.9999992417898361, iteration: 10706
loss: 1.1158859729766846,grad_norm: 0.9999994692946633, iteration: 10707
loss: 1.0158133506774902,grad_norm: 0.999999239744693, iteration: 10708
loss: 1.0307281017303467,grad_norm: 0.9999996721397383, iteration: 10709
loss: 1.097691535949707,grad_norm: 0.9999994472541803, iteration: 10710
loss: 1.0478944778442383,grad_norm: 0.9999993358889981, iteration: 10711
loss: 1.0871250629425049,grad_norm: 0.9999992284843346, iteration: 10712
loss: 1.0276165008544922,grad_norm: 0.9999993511179877, iteration: 10713
loss: 0.9994236826896667,grad_norm: 0.9999991897136609, iteration: 10714
loss: 1.0533177852630615,grad_norm: 0.9999991477847209, iteration: 10715
loss: 1.0210572481155396,grad_norm: 0.9999992731232431, iteration: 10716
loss: 0.9942880868911743,grad_norm: 0.9999992380253385, iteration: 10717
loss: 1.0982354879379272,grad_norm: 0.9999996076741507, iteration: 10718
loss: 1.0526021718978882,grad_norm: 0.9999992890137813, iteration: 10719
loss: 1.0842472314834595,grad_norm: 0.9999997929014557, iteration: 10720
loss: 1.0683904886245728,grad_norm: 0.9999993246854816, iteration: 10721
loss: 0.9990726113319397,grad_norm: 0.9999993514551688, iteration: 10722
loss: 1.0910149812698364,grad_norm: 0.9999996931524602, iteration: 10723
loss: 1.0504893064498901,grad_norm: 0.9999994096461042, iteration: 10724
loss: 1.141860842704773,grad_norm: 0.999999460290137, iteration: 10725
loss: 1.0625476837158203,grad_norm: 0.9999995456369213, iteration: 10726
loss: 1.063163161277771,grad_norm: 0.9999992941622934, iteration: 10727
loss: 1.0377451181411743,grad_norm: 0.9999990352617181, iteration: 10728
loss: 1.054460048675537,grad_norm: 0.9999992174834765, iteration: 10729
loss: 1.0471436977386475,grad_norm: 0.9999994824596594, iteration: 10730
loss: 0.9823065400123596,grad_norm: 0.9999993369708284, iteration: 10731
loss: 1.0436128377914429,grad_norm: 0.999999562426582, iteration: 10732
loss: 1.097684383392334,grad_norm: 0.999999631604317, iteration: 10733
loss: 1.0150412321090698,grad_norm: 0.9999991639269094, iteration: 10734
loss: 1.1818203926086426,grad_norm: 0.9999998796951142, iteration: 10735
loss: 1.0935847759246826,grad_norm: 0.9999994790158572, iteration: 10736
loss: 1.0274851322174072,grad_norm: 0.9999992393946958, iteration: 10737
loss: 1.0353542566299438,grad_norm: 0.9999992582681843, iteration: 10738
loss: 1.0666533708572388,grad_norm: 0.9999993532132457, iteration: 10739
loss: 1.02436101436615,grad_norm: 0.9999993326220598, iteration: 10740
loss: 1.0911720991134644,grad_norm: 0.9999992474561428, iteration: 10741
loss: 1.0493943691253662,grad_norm: 0.9999993358088185, iteration: 10742
loss: 1.0323190689086914,grad_norm: 0.9999992930420224, iteration: 10743
loss: 0.9925802946090698,grad_norm: 0.999999280460537, iteration: 10744
loss: 1.1101765632629395,grad_norm: 0.9999995770135096, iteration: 10745
loss: 0.9692482948303223,grad_norm: 0.9999991620250442, iteration: 10746
loss: 1.0454986095428467,grad_norm: 0.999999266562395, iteration: 10747
loss: 1.0650373697280884,grad_norm: 0.999999451786077, iteration: 10748
loss: 1.0278879404067993,grad_norm: 0.9999993601321941, iteration: 10749
loss: 1.037354826927185,grad_norm: 0.9999994852662494, iteration: 10750
loss: 1.0638201236724854,grad_norm: 0.9999991963566636, iteration: 10751
loss: 1.0577222108840942,grad_norm: 0.9999992118914984, iteration: 10752
loss: 1.0600844621658325,grad_norm: 0.99999966873186, iteration: 10753
loss: 1.049522876739502,grad_norm: 0.9999992303280044, iteration: 10754
loss: 1.0363695621490479,grad_norm: 0.9999991356015862, iteration: 10755
loss: 1.0357277393341064,grad_norm: 0.9999996568207857, iteration: 10756
loss: 1.0170866250991821,grad_norm: 0.9999992242853943, iteration: 10757
loss: 1.0902907848358154,grad_norm: 0.999999346856398, iteration: 10758
loss: 1.1014370918273926,grad_norm: 0.9999995234271895, iteration: 10759
loss: 1.054261326789856,grad_norm: 0.9999994641407991, iteration: 10760
loss: 1.017769694328308,grad_norm: 0.9999991537384595, iteration: 10761
loss: 1.0114009380340576,grad_norm: 0.9999992572235281, iteration: 10762
loss: 1.0772840976715088,grad_norm: 0.9999998473278117, iteration: 10763
loss: 1.0825272798538208,grad_norm: 0.9999995917654599, iteration: 10764
loss: 1.0104713439941406,grad_norm: 0.9999992784981695, iteration: 10765
loss: 1.0425995588302612,grad_norm: 0.999999321093928, iteration: 10766
loss: 1.0105355978012085,grad_norm: 0.9999994436227638, iteration: 10767
loss: 1.2510709762573242,grad_norm: 0.9999996885669207, iteration: 10768
loss: 1.022431492805481,grad_norm: 0.9999995651636481, iteration: 10769
loss: 1.1828330755233765,grad_norm: 0.9999995469127497, iteration: 10770
loss: 1.072265863418579,grad_norm: 0.9999998218136442, iteration: 10771
loss: 1.1261982917785645,grad_norm: 1.0000000593331382, iteration: 10772
loss: 0.9687308073043823,grad_norm: 0.9999993451801694, iteration: 10773
loss: 1.010149598121643,grad_norm: 0.9999994462389388, iteration: 10774
loss: 1.0588499307632446,grad_norm: 0.9999995114454698, iteration: 10775
loss: 1.1157574653625488,grad_norm: 0.9999996500211222, iteration: 10776
loss: 1.0689102411270142,grad_norm: 0.9999994716313008, iteration: 10777
loss: 1.115525484085083,grad_norm: 0.9999996773934235, iteration: 10778
loss: 1.0696282386779785,grad_norm: 0.9999994934969272, iteration: 10779
loss: 1.046281337738037,grad_norm: 0.9999994168201748, iteration: 10780
loss: 1.0962367057800293,grad_norm: 0.9999994067690238, iteration: 10781
loss: 1.0458108186721802,grad_norm: 0.999999692629563, iteration: 10782
loss: 1.0076018571853638,grad_norm: 0.9999994176322281, iteration: 10783
loss: 1.134690523147583,grad_norm: 0.999999938538289, iteration: 10784
loss: 1.0383265018463135,grad_norm: 0.9999992118202169, iteration: 10785
loss: 1.0681687593460083,grad_norm: 0.9999991176221943, iteration: 10786
loss: 1.0243624448776245,grad_norm: 0.9999995228281615, iteration: 10787
loss: 1.0603538751602173,grad_norm: 0.9999995978835844, iteration: 10788
loss: 1.0420559644699097,grad_norm: 0.9999997827795817, iteration: 10789
loss: 1.0398821830749512,grad_norm: 0.9999994058994485, iteration: 10790
loss: 1.023840308189392,grad_norm: 0.9999993599881596, iteration: 10791
loss: 1.0219769477844238,grad_norm: 0.9999994288617151, iteration: 10792
loss: 1.0236735343933105,grad_norm: 0.9999992179603662, iteration: 10793
loss: 1.049328327178955,grad_norm: 0.9999995065194214, iteration: 10794
loss: 1.0189911127090454,grad_norm: 0.9999993778480571, iteration: 10795
loss: 1.0494437217712402,grad_norm: 0.999999179692081, iteration: 10796
loss: 1.030352234840393,grad_norm: 0.9999993775989133, iteration: 10797
loss: 1.0622529983520508,grad_norm: 0.999999766838428, iteration: 10798
loss: 1.0369352102279663,grad_norm: 0.9999998382132331, iteration: 10799
loss: 1.0359948873519897,grad_norm: 0.9999992963675908, iteration: 10800
loss: 0.9877498149871826,grad_norm: 0.9999992223898652, iteration: 10801
loss: 1.0818980932235718,grad_norm: 0.9999993519665307, iteration: 10802
loss: 1.0777875185012817,grad_norm: 0.9999993960417987, iteration: 10803
loss: 1.037710189819336,grad_norm: 0.9999993415032161, iteration: 10804
loss: 0.9858807325363159,grad_norm: 0.9999995302520197, iteration: 10805
loss: 0.9807534217834473,grad_norm: 0.9999990932081355, iteration: 10806
loss: 1.0333969593048096,grad_norm: 0.9999997711057897, iteration: 10807
loss: 1.0200001001358032,grad_norm: 0.9999994941855297, iteration: 10808
loss: 1.036044955253601,grad_norm: 0.9999993355327169, iteration: 10809
loss: 1.0353676080703735,grad_norm: 0.9999993442875093, iteration: 10810
loss: 1.0364043712615967,grad_norm: 0.9999991554267502, iteration: 10811
loss: 1.0390135049819946,grad_norm: 0.9999996725890542, iteration: 10812
loss: 1.0243216753005981,grad_norm: 0.9999993243634971, iteration: 10813
loss: 0.9945023059844971,grad_norm: 0.9999990824412832, iteration: 10814
loss: 1.0570838451385498,grad_norm: 0.999999282149553, iteration: 10815
loss: 1.0440349578857422,grad_norm: 0.9999996638755193, iteration: 10816
loss: 1.0539395809173584,grad_norm: 0.9999994057042797, iteration: 10817
loss: 1.0553076267242432,grad_norm: 0.9999992123892786, iteration: 10818
loss: 1.01224684715271,grad_norm: 0.9999998888929403, iteration: 10819
loss: 1.0545921325683594,grad_norm: 0.9999996589018015, iteration: 10820
loss: 1.0068973302841187,grad_norm: 0.9999993916995135, iteration: 10821
loss: 1.01229989528656,grad_norm: 0.9999993156420091, iteration: 10822
loss: 0.9975479245185852,grad_norm: 0.9999994436653756, iteration: 10823
loss: 1.0616730451583862,grad_norm: 0.9999998205481149, iteration: 10824
loss: 1.0471165180206299,grad_norm: 0.9999998009222009, iteration: 10825
loss: 1.0377531051635742,grad_norm: 0.9999995535152546, iteration: 10826
loss: 1.0293041467666626,grad_norm: 0.9999992040687957, iteration: 10827
loss: 1.0401109457015991,grad_norm: 0.9999997610569265, iteration: 10828
loss: 1.048796534538269,grad_norm: 0.9999995251479284, iteration: 10829
loss: 1.070615291595459,grad_norm: 0.999999347813333, iteration: 10830
loss: 1.087438941001892,grad_norm: 0.999999710598168, iteration: 10831
loss: 1.0650891065597534,grad_norm: 0.9999995860432858, iteration: 10832
loss: 0.9776270389556885,grad_norm: 0.999999248396549, iteration: 10833
loss: 1.0422953367233276,grad_norm: 0.9999997235634863, iteration: 10834
loss: 1.022782564163208,grad_norm: 0.9999996899574094, iteration: 10835
loss: 1.0571954250335693,grad_norm: 0.9999995210144923, iteration: 10836
loss: 1.0471843481063843,grad_norm: 0.9999993952379651, iteration: 10837
loss: 1.0719050168991089,grad_norm: 0.9999998387735918, iteration: 10838
loss: 1.0942249298095703,grad_norm: 0.9999996322041675, iteration: 10839
loss: 1.033050298690796,grad_norm: 0.9999993948005266, iteration: 10840
loss: 1.0198709964752197,grad_norm: 0.9999997056021178, iteration: 10841
loss: 1.020236611366272,grad_norm: 0.999999276100498, iteration: 10842
loss: 1.0613828897476196,grad_norm: 0.9999993413827182, iteration: 10843
loss: 1.0283621549606323,grad_norm: 0.9999992275989018, iteration: 10844
loss: 1.0672686100006104,grad_norm: 0.9999997700101665, iteration: 10845
loss: 0.9951118230819702,grad_norm: 0.9999993002331368, iteration: 10846
loss: 1.0371034145355225,grad_norm: 0.9999995311735113, iteration: 10847
loss: 1.004241943359375,grad_norm: 0.9999994400352678, iteration: 10848
loss: 1.0531498193740845,grad_norm: 0.9999997623381752, iteration: 10849
loss: 1.0815833806991577,grad_norm: 0.9999994831545017, iteration: 10850
loss: 1.0907098054885864,grad_norm: 0.9999993792282544, iteration: 10851
loss: 1.0613346099853516,grad_norm: 0.9999995899138246, iteration: 10852
loss: 1.0140587091445923,grad_norm: 0.9999990859572672, iteration: 10853
loss: 1.0371320247650146,grad_norm: 0.9999996714849215, iteration: 10854
loss: 1.0160220861434937,grad_norm: 0.999999540979, iteration: 10855
loss: 1.0566685199737549,grad_norm: 0.9999995881646425, iteration: 10856
loss: 1.0514203310012817,grad_norm: 0.9999995909074813, iteration: 10857
loss: 1.093077540397644,grad_norm: 0.9999995749565069, iteration: 10858
loss: 1.1469966173171997,grad_norm: 0.9999996679680762, iteration: 10859
loss: 1.0311167240142822,grad_norm: 0.9999994548404585, iteration: 10860
loss: 1.0423784255981445,grad_norm: 0.9999993571670638, iteration: 10861
loss: 1.0384079217910767,grad_norm: 0.9999992595962994, iteration: 10862
loss: 1.0178234577178955,grad_norm: 0.9999994066428449, iteration: 10863
loss: 1.0940653085708618,grad_norm: 0.9999998213152022, iteration: 10864
loss: 1.105155110359192,grad_norm: 0.9999996879222337, iteration: 10865
loss: 1.113667607307434,grad_norm: 0.9999998027956191, iteration: 10866
loss: 1.029281735420227,grad_norm: 0.999999859157977, iteration: 10867
loss: 1.0508595705032349,grad_norm: 0.9999994379825918, iteration: 10868
loss: 1.1176859140396118,grad_norm: 0.9999997450964221, iteration: 10869
loss: 1.0669755935668945,grad_norm: 0.9999995476164247, iteration: 10870
loss: 1.031669020652771,grad_norm: 0.9999996375298852, iteration: 10871
loss: 1.0518990755081177,grad_norm: 0.999999138940566, iteration: 10872
loss: 1.0480090379714966,grad_norm: 0.9999996906523675, iteration: 10873
loss: 1.014290690422058,grad_norm: 0.999999264283729, iteration: 10874
loss: 1.0981601476669312,grad_norm: 0.9999996898124791, iteration: 10875
loss: 1.044903039932251,grad_norm: 0.9999996191447077, iteration: 10876
loss: 1.047141194343567,grad_norm: 0.9999991178814673, iteration: 10877
loss: 1.020412802696228,grad_norm: 0.9999993386346653, iteration: 10878
loss: 1.1298185586929321,grad_norm: 0.9999998334933343, iteration: 10879
loss: 1.0186585187911987,grad_norm: 0.9999993059533516, iteration: 10880
loss: 1.0318883657455444,grad_norm: 0.9999991195964228, iteration: 10881
loss: 1.1245129108428955,grad_norm: 0.9999998499889987, iteration: 10882
loss: 1.0631881952285767,grad_norm: 0.9999996049853305, iteration: 10883
loss: 1.0297542810440063,grad_norm: 0.9999996727440577, iteration: 10884
loss: 1.0761860609054565,grad_norm: 0.9999995731814448, iteration: 10885
loss: 1.0124046802520752,grad_norm: 0.9999993624681076, iteration: 10886
loss: 1.072546362876892,grad_norm: 0.999999625278127, iteration: 10887
loss: 1.0467358827590942,grad_norm: 0.9999996881334855, iteration: 10888
loss: 0.9831724166870117,grad_norm: 0.9999992665691723, iteration: 10889
loss: 1.037654161453247,grad_norm: 0.9999994318202308, iteration: 10890
loss: 1.0949795246124268,grad_norm: 0.9999998491159414, iteration: 10891
loss: 1.05351722240448,grad_norm: 0.9999995083768163, iteration: 10892
loss: 1.0181190967559814,grad_norm: 0.9999991912422178, iteration: 10893
loss: 1.061951756477356,grad_norm: 0.9999996975421168, iteration: 10894
loss: 1.0441639423370361,grad_norm: 0.9999996452644255, iteration: 10895
loss: 1.053023099899292,grad_norm: 0.9999993555938067, iteration: 10896
loss: 1.0636959075927734,grad_norm: 0.9999993075924387, iteration: 10897
loss: 1.0147989988327026,grad_norm: 0.9999995141697964, iteration: 10898
loss: 0.9908694624900818,grad_norm: 0.9999991359414097, iteration: 10899
loss: 1.116904616355896,grad_norm: 0.9999997970414155, iteration: 10900
loss: 1.0677082538604736,grad_norm: 0.9999994190515962, iteration: 10901
loss: 1.0411540269851685,grad_norm: 0.9999993710318961, iteration: 10902
loss: 1.0178524255752563,grad_norm: 0.9999993592362412, iteration: 10903
loss: 1.0711722373962402,grad_norm: 0.999999305419004, iteration: 10904
loss: 1.0503894090652466,grad_norm: 0.9999993574101136, iteration: 10905
loss: 1.0187761783599854,grad_norm: 0.9999993385907433, iteration: 10906
loss: 1.092520833015442,grad_norm: 0.9999998675640183, iteration: 10907
loss: 1.081495761871338,grad_norm: 0.9999998725565818, iteration: 10908
loss: 1.0773935317993164,grad_norm: 0.9999992234188244, iteration: 10909
loss: 1.0763202905654907,grad_norm: 0.9999994202864653, iteration: 10910
loss: 1.0517884492874146,grad_norm: 0.9999994219486057, iteration: 10911
loss: 1.0201314687728882,grad_norm: 0.9999992898666306, iteration: 10912
loss: 1.0585546493530273,grad_norm: 0.9999996719661075, iteration: 10913
loss: 0.9964187145233154,grad_norm: 0.9999994631180504, iteration: 10914
loss: 1.0186868906021118,grad_norm: 0.999999236251869, iteration: 10915
loss: 1.022099494934082,grad_norm: 0.9999993251926926, iteration: 10916
loss: 0.9930943846702576,grad_norm: 0.9999992667542634, iteration: 10917
loss: 1.116498589515686,grad_norm: 0.9999996450261204, iteration: 10918
loss: 1.0203702449798584,grad_norm: 0.9999996495723418, iteration: 10919
loss: 1.1112018823623657,grad_norm: 0.99999965654474, iteration: 10920
loss: 1.0359971523284912,grad_norm: 0.9999995226400448, iteration: 10921
loss: 1.0836856365203857,grad_norm: 0.9999994494688107, iteration: 10922
loss: 1.044694423675537,grad_norm: 0.9999993844772708, iteration: 10923
loss: 1.0703175067901611,grad_norm: 0.9999993966960825, iteration: 10924
loss: 1.0433838367462158,grad_norm: 0.9999992411105617, iteration: 10925
loss: 1.0350849628448486,grad_norm: 0.9999994087194998, iteration: 10926
loss: 1.0721862316131592,grad_norm: 0.9999996958412223, iteration: 10927
loss: 1.0375943183898926,grad_norm: 0.9999994148934085, iteration: 10928
loss: 1.0444607734680176,grad_norm: 0.9999992127856911, iteration: 10929
loss: 1.0946471691131592,grad_norm: 0.999999804306964, iteration: 10930
loss: 1.0432764291763306,grad_norm: 0.9999991827391843, iteration: 10931
loss: 1.0718598365783691,grad_norm: 0.9999995515098589, iteration: 10932
loss: 1.0296775102615356,grad_norm: 0.9999996350427638, iteration: 10933
loss: 1.1302605867385864,grad_norm: 0.9999997850177831, iteration: 10934
loss: 1.0537523031234741,grad_norm: 0.9999995538643024, iteration: 10935
loss: 1.0235941410064697,grad_norm: 0.9999994030856696, iteration: 10936
loss: 1.0590020418167114,grad_norm: 0.9999996029615642, iteration: 10937
loss: 1.0472137928009033,grad_norm: 0.999999373128014, iteration: 10938
loss: 1.0307821035385132,grad_norm: 0.9999991409824124, iteration: 10939
loss: 1.0402928590774536,grad_norm: 0.9999993417924785, iteration: 10940
loss: 0.9985565543174744,grad_norm: 0.9999993588373816, iteration: 10941
loss: 1.0528093576431274,grad_norm: 0.9999998117250947, iteration: 10942
loss: 0.989666759967804,grad_norm: 0.9999992205049197, iteration: 10943
loss: 0.9651844501495361,grad_norm: 0.9999993962641405, iteration: 10944
loss: 1.0489379167556763,grad_norm: 0.9999993358366503, iteration: 10945
loss: 1.103314757347107,grad_norm: 0.9999994073811392, iteration: 10946
loss: 1.0974353551864624,grad_norm: 0.9999996501609784, iteration: 10947
loss: 1.011134386062622,grad_norm: 0.9999993703571337, iteration: 10948
loss: 1.0485808849334717,grad_norm: 0.9999991745925638, iteration: 10949
loss: 1.0341215133666992,grad_norm: 0.9999993802457873, iteration: 10950
loss: 1.021518588066101,grad_norm: 0.999999306249027, iteration: 10951
loss: 1.0289338827133179,grad_norm: 0.9999993023029848, iteration: 10952
loss: 1.0162912607192993,grad_norm: 0.9999994168225302, iteration: 10953
loss: 0.9937508702278137,grad_norm: 0.999999234939096, iteration: 10954
loss: 1.0317448377609253,grad_norm: 0.9999992201318537, iteration: 10955
loss: 1.1282247304916382,grad_norm: 0.9999996664140761, iteration: 10956
loss: 1.061063528060913,grad_norm: 0.9999998493087636, iteration: 10957
loss: 1.046022891998291,grad_norm: 0.9999997083999834, iteration: 10958
loss: 1.0441902875900269,grad_norm: 0.9999993900047591, iteration: 10959
loss: 1.0462217330932617,grad_norm: 0.9999994588944517, iteration: 10960
loss: 1.0081737041473389,grad_norm: 0.9999993016271028, iteration: 10961
loss: 1.0447864532470703,grad_norm: 0.9999997868511377, iteration: 10962
loss: 1.0301874876022339,grad_norm: 0.9999995784890706, iteration: 10963
loss: 1.0208051204681396,grad_norm: 0.9999991703399429, iteration: 10964
loss: 0.9840577840805054,grad_norm: 0.9999993279332537, iteration: 10965
loss: 1.061697244644165,grad_norm: 0.999999356511092, iteration: 10966
loss: 1.0546681880950928,grad_norm: 0.9999995207602809, iteration: 10967
loss: 1.0704479217529297,grad_norm: 0.9999990546170656, iteration: 10968
loss: 0.9853149652481079,grad_norm: 0.9999991490511367, iteration: 10969
loss: 1.044899821281433,grad_norm: 0.999999377060832, iteration: 10970
loss: 1.1853454113006592,grad_norm: 0.9999998712217543, iteration: 10971
loss: 1.0196340084075928,grad_norm: 0.9999999438335263, iteration: 10972
loss: 1.06787109375,grad_norm: 0.9999995156610626, iteration: 10973
loss: 1.0495792627334595,grad_norm: 0.9999994357190942, iteration: 10974
loss: 1.0659303665161133,grad_norm: 0.9999994650333107, iteration: 10975
loss: 1.0470314025878906,grad_norm: 0.9999995136900515, iteration: 10976
loss: 1.0282169580459595,grad_norm: 0.9999994502891071, iteration: 10977
loss: 1.0398262739181519,grad_norm: 0.9999993137612063, iteration: 10978
loss: 1.0585250854492188,grad_norm: 0.9999993580265588, iteration: 10979
loss: 1.0199947357177734,grad_norm: 0.9999995204869248, iteration: 10980
loss: 1.0650838613510132,grad_norm: 0.9999994584875187, iteration: 10981
loss: 1.0395920276641846,grad_norm: 0.9999992939656126, iteration: 10982
loss: 1.0610146522521973,grad_norm: 0.9999997608565832, iteration: 10983
loss: 1.0215458869934082,grad_norm: 0.9999990521646266, iteration: 10984
loss: 1.150793194770813,grad_norm: 0.9999996073212579, iteration: 10985
loss: 1.0035942792892456,grad_norm: 0.999999485881938, iteration: 10986
loss: 1.020751953125,grad_norm: 0.9999991300326075, iteration: 10987
loss: 1.043656587600708,grad_norm: 0.9999992152060723, iteration: 10988
loss: 1.1523597240447998,grad_norm: 0.999999585123299, iteration: 10989
loss: 1.093348741531372,grad_norm: 0.9999996429144764, iteration: 10990
loss: 1.0514097213745117,grad_norm: 0.9999993589505264, iteration: 10991
loss: 1.0336987972259521,grad_norm: 0.9999996370050552, iteration: 10992
loss: 1.0117512941360474,grad_norm: 0.9999991267030977, iteration: 10993
loss: 1.0728888511657715,grad_norm: 0.9999997880334851, iteration: 10994
loss: 1.06588876247406,grad_norm: 0.999999440735351, iteration: 10995
loss: 1.0833091735839844,grad_norm: 0.999999540288265, iteration: 10996
loss: 1.035799264907837,grad_norm: 0.9999992682491049, iteration: 10997
loss: 1.0431574583053589,grad_norm: 0.9999992267946586, iteration: 10998
loss: 1.0085126161575317,grad_norm: 0.9999997618939259, iteration: 10999
loss: 1.0232619047164917,grad_norm: 0.9999995504530914, iteration: 11000
loss: 1.0025571584701538,grad_norm: 0.9999990400769129, iteration: 11001
loss: 1.0211830139160156,grad_norm: 0.9999990920683729, iteration: 11002
loss: 1.0633164644241333,grad_norm: 0.9999990939962038, iteration: 11003
loss: 0.9724612236022949,grad_norm: 0.9999997808592996, iteration: 11004
loss: 1.01320481300354,grad_norm: 0.9999991717616274, iteration: 11005
loss: 1.0584827661514282,grad_norm: 0.9999994861018088, iteration: 11006
loss: 1.050398826599121,grad_norm: 0.9999995682817937, iteration: 11007
loss: 1.0967128276824951,grad_norm: 0.9999995171605196, iteration: 11008
loss: 1.019457221031189,grad_norm: 0.9999992136861335, iteration: 11009
loss: 0.9665622115135193,grad_norm: 0.9999993365090543, iteration: 11010
loss: 1.0379594564437866,grad_norm: 0.9999992387651887, iteration: 11011
loss: 1.0225145816802979,grad_norm: 0.9999994853095583, iteration: 11012
loss: 1.0700937509536743,grad_norm: 0.9999995223411101, iteration: 11013
loss: 1.0352786779403687,grad_norm: 0.9999996122037758, iteration: 11014
loss: 1.0409221649169922,grad_norm: 0.9999993754907179, iteration: 11015
loss: 1.0831058025360107,grad_norm: 0.9999994846841417, iteration: 11016
loss: 1.0793498754501343,grad_norm: 0.9999992120385089, iteration: 11017
loss: 1.0168110132217407,grad_norm: 0.9999994569233538, iteration: 11018
loss: 1.0520304441452026,grad_norm: 0.9999993468843974, iteration: 11019
loss: 1.0177465677261353,grad_norm: 0.9999992526543959, iteration: 11020
loss: 1.053324818611145,grad_norm: 0.9999994059902279, iteration: 11021
loss: 0.9920270442962646,grad_norm: 0.9999997587938548, iteration: 11022
loss: 1.0652729272842407,grad_norm: 0.9999999288704285, iteration: 11023
loss: 0.9756351709365845,grad_norm: 0.9999991776977012, iteration: 11024
loss: 1.0908803939819336,grad_norm: 0.9999998821599158, iteration: 11025
loss: 1.0733373165130615,grad_norm: 0.9999998386446836, iteration: 11026
loss: 1.0562843084335327,grad_norm: 0.999999584835185, iteration: 11027
loss: 1.0275077819824219,grad_norm: 0.9999996991753066, iteration: 11028
loss: 1.07418692111969,grad_norm: 0.999999474525943, iteration: 11029
loss: 1.1025527715682983,grad_norm: 0.9999995558005836, iteration: 11030
loss: 1.0607191324234009,grad_norm: 0.9999994834311239, iteration: 11031
loss: 1.0823559761047363,grad_norm: 0.9999995662465924, iteration: 11032
loss: 1.0029640197753906,grad_norm: 0.9999992811364741, iteration: 11033
loss: 1.0119856595993042,grad_norm: 0.9999992268943328, iteration: 11034
loss: 1.0222299098968506,grad_norm: 0.9999994652274398, iteration: 11035
loss: 1.0309193134307861,grad_norm: 0.999999122196738, iteration: 11036
loss: 1.0781506299972534,grad_norm: 0.9999994289532297, iteration: 11037
loss: 1.0563547611236572,grad_norm: 0.9999993732651543, iteration: 11038
loss: 1.0361498594284058,grad_norm: 0.9999994576145149, iteration: 11039
loss: 1.0139679908752441,grad_norm: 0.9999994847630763, iteration: 11040
loss: 1.0352904796600342,grad_norm: 0.9999992648027874, iteration: 11041
loss: 1.0433207750320435,grad_norm: 0.9999991948252476, iteration: 11042
loss: 1.0329135656356812,grad_norm: 0.9999993054930123, iteration: 11043
loss: 1.0345655679702759,grad_norm: 0.9999994565785418, iteration: 11044
loss: 1.0177563428878784,grad_norm: 0.9999992703111008, iteration: 11045
loss: 1.1184730529785156,grad_norm: 0.9999996361219963, iteration: 11046
loss: 1.0265473127365112,grad_norm: 0.9999992806629918, iteration: 11047
loss: 0.9860203862190247,grad_norm: 0.9999992581506015, iteration: 11048
loss: 1.0741183757781982,grad_norm: 0.9999993640515035, iteration: 11049
loss: 0.9822039008140564,grad_norm: 0.9999991838369054, iteration: 11050
loss: 1.1206077337265015,grad_norm: 0.9999998529801305, iteration: 11051
loss: 1.0617605447769165,grad_norm: 0.999999511661096, iteration: 11052
loss: 1.0355340242385864,grad_norm: 0.9999991517764258, iteration: 11053
loss: 1.0384042263031006,grad_norm: 0.9999993699791324, iteration: 11054
loss: 1.076316237449646,grad_norm: 0.999999338145408, iteration: 11055
loss: 1.0273672342300415,grad_norm: 0.9999993505050705, iteration: 11056
loss: 1.039652705192566,grad_norm: 0.9999996178591756, iteration: 11057
loss: 1.0869135856628418,grad_norm: 0.9999995193071883, iteration: 11058
loss: 1.1789817810058594,grad_norm: 0.999999660159456, iteration: 11059
loss: 1.010661244392395,grad_norm: 0.9999991058659731, iteration: 11060
loss: 1.0113660097122192,grad_norm: 0.9999995267446776, iteration: 11061
loss: 0.9882608652114868,grad_norm: 0.9999992650715023, iteration: 11062
loss: 1.002902865409851,grad_norm: 0.99999952396425, iteration: 11063
loss: 0.9897004961967468,grad_norm: 0.9999996275782308, iteration: 11064
loss: 1.0469837188720703,grad_norm: 0.9999992828711873, iteration: 11065
loss: 1.0886166095733643,grad_norm: 0.9999993332653105, iteration: 11066
loss: 1.0537575483322144,grad_norm: 0.9999995496816789, iteration: 11067
loss: 1.0198909044265747,grad_norm: 0.9999993686544645, iteration: 11068
loss: 1.0647131204605103,grad_norm: 0.9999993276312974, iteration: 11069
loss: 1.0535640716552734,grad_norm: 0.9999992719622229, iteration: 11070
loss: 1.0033981800079346,grad_norm: 0.9999994616357684, iteration: 11071
loss: 1.0704833269119263,grad_norm: 0.9999998038915235, iteration: 11072
loss: 1.1075912714004517,grad_norm: 0.9999991956043134, iteration: 11073
loss: 1.0018187761306763,grad_norm: 0.9999994117341617, iteration: 11074
loss: 1.0338568687438965,grad_norm: 0.9999993915116823, iteration: 11075
loss: 1.0288089513778687,grad_norm: 0.999999368277617, iteration: 11076
loss: 1.0617929697036743,grad_norm: 0.9999992105821652, iteration: 11077
loss: 1.1139379739761353,grad_norm: 0.9999998291675176, iteration: 11078
loss: 1.0202264785766602,grad_norm: 0.9999992209263988, iteration: 11079
loss: 1.0545494556427002,grad_norm: 0.9999995630283374, iteration: 11080
loss: 1.0553512573242188,grad_norm: 0.9999996283420433, iteration: 11081
loss: 1.0421724319458008,grad_norm: 0.9999992767030476, iteration: 11082
loss: 1.0118224620819092,grad_norm: 0.9999991150642684, iteration: 11083
loss: 1.0270700454711914,grad_norm: 0.9999991860792065, iteration: 11084
loss: 0.9996922016143799,grad_norm: 0.9999991739528243, iteration: 11085
loss: 1.0193190574645996,grad_norm: 0.9999992225881448, iteration: 11086
loss: 0.9674004316329956,grad_norm: 0.9999991941167861, iteration: 11087
loss: 1.0546016693115234,grad_norm: 0.9999997707802091, iteration: 11088
loss: 1.080014944076538,grad_norm: 0.9999995921160639, iteration: 11089
loss: 1.0882036685943604,grad_norm: 0.9999992116818808, iteration: 11090
loss: 1.0300930738449097,grad_norm: 0.9999993781194305, iteration: 11091
loss: 0.989220380783081,grad_norm: 0.9999992729566564, iteration: 11092
loss: 1.075329065322876,grad_norm: 0.9999997929710482, iteration: 11093
loss: 1.0278788805007935,grad_norm: 0.9999993719534487, iteration: 11094
loss: 1.0356557369232178,grad_norm: 0.9999996383011096, iteration: 11095
loss: 0.9727835655212402,grad_norm: 0.9999995052196354, iteration: 11096
loss: 1.0479496717453003,grad_norm: 0.9999994709857448, iteration: 11097
loss: 1.1049225330352783,grad_norm: 0.9999994819035649, iteration: 11098
loss: 1.0600090026855469,grad_norm: 0.9999993124051498, iteration: 11099
loss: 1.1548410654067993,grad_norm: 0.9999998575127051, iteration: 11100
loss: 1.0164130926132202,grad_norm: 0.9999992997740519, iteration: 11101
loss: 1.0414067506790161,grad_norm: 0.9999992371500486, iteration: 11102
loss: 1.027763843536377,grad_norm: 0.9999995296864969, iteration: 11103
loss: 1.1118860244750977,grad_norm: 0.9999998010433141, iteration: 11104
loss: 0.9755954742431641,grad_norm: 0.9999991263958777, iteration: 11105
loss: 0.9978892803192139,grad_norm: 0.9999992455090534, iteration: 11106
loss: 1.0092066526412964,grad_norm: 0.9999991602147357, iteration: 11107
loss: 0.988495409488678,grad_norm: 0.9999993865541189, iteration: 11108
loss: 1.1263599395751953,grad_norm: 0.9999997820970059, iteration: 11109
loss: 0.9983207583427429,grad_norm: 0.9999994534446351, iteration: 11110
loss: 1.0318611860275269,grad_norm: 0.9999996085074997, iteration: 11111
loss: 1.0574318170547485,grad_norm: 0.9999994999878061, iteration: 11112
loss: 1.0247057676315308,grad_norm: 0.9999992119086382, iteration: 11113
loss: 1.0009803771972656,grad_norm: 0.9999991025532989, iteration: 11114
loss: 1.1048778295516968,grad_norm: 0.9999997080733509, iteration: 11115
loss: 0.9919726848602295,grad_norm: 0.9999992758208759, iteration: 11116
loss: 1.027587652206421,grad_norm: 0.9999996902131114, iteration: 11117
loss: 0.9977920651435852,grad_norm: 0.9999992905801997, iteration: 11118
loss: 1.0596867799758911,grad_norm: 0.9999993447221112, iteration: 11119
loss: 1.0102113485336304,grad_norm: 0.9999996891111772, iteration: 11120
loss: 1.0223337411880493,grad_norm: 0.9999994111331857, iteration: 11121
loss: 1.0740751028060913,grad_norm: 0.9999996142319324, iteration: 11122
loss: 1.073405146598816,grad_norm: 0.9999994195360811, iteration: 11123
loss: 1.0391242504119873,grad_norm: 0.9999996172995852, iteration: 11124
loss: 1.033835768699646,grad_norm: 0.9999994984229297, iteration: 11125
loss: 1.0246257781982422,grad_norm: 0.9999993649187078, iteration: 11126
loss: 1.0248377323150635,grad_norm: 0.9999992103157702, iteration: 11127
loss: 1.1260035037994385,grad_norm: 0.9999996363934344, iteration: 11128
loss: 1.0256590843200684,grad_norm: 0.9999994902268812, iteration: 11129
loss: 1.046122670173645,grad_norm: 0.9999994702375103, iteration: 11130
loss: 1.0491681098937988,grad_norm: 0.999999339663898, iteration: 11131
loss: 1.0370458364486694,grad_norm: 0.9999994230283578, iteration: 11132
loss: 0.9921090006828308,grad_norm: 0.9999993121242176, iteration: 11133
loss: 1.0083072185516357,grad_norm: 0.9999994889980318, iteration: 11134
loss: 1.0325157642364502,grad_norm: 0.9999992741208746, iteration: 11135
loss: 0.9706219434738159,grad_norm: 0.9999991657615156, iteration: 11136
loss: 1.0285340547561646,grad_norm: 0.9999995140543884, iteration: 11137
loss: 1.0513874292373657,grad_norm: 0.9999997957337361, iteration: 11138
loss: 1.076471209526062,grad_norm: 0.9999995391890303, iteration: 11139
loss: 1.0439532995224,grad_norm: 0.9999991717349695, iteration: 11140
loss: 1.0637561082839966,grad_norm: 0.9999996281674289, iteration: 11141
loss: 1.057425856590271,grad_norm: 0.9999995587712105, iteration: 11142
loss: 1.1016714572906494,grad_norm: 0.9999996256093322, iteration: 11143
loss: 1.0292987823486328,grad_norm: 0.999999419012838, iteration: 11144
loss: 1.0252982378005981,grad_norm: 0.9999994208725022, iteration: 11145
loss: 1.045206904411316,grad_norm: 0.9999996624476584, iteration: 11146
loss: 1.0027358531951904,grad_norm: 0.9999994342020913, iteration: 11147
loss: 1.0565104484558105,grad_norm: 0.9999992739262323, iteration: 11148
loss: 1.0070527791976929,grad_norm: 0.9738057800167542, iteration: 11149
loss: 1.093855381011963,grad_norm: 0.99999923035195, iteration: 11150
loss: 1.0390903949737549,grad_norm: 0.999999248021424, iteration: 11151
loss: 1.1004431247711182,grad_norm: 0.9999997421022367, iteration: 11152
loss: 1.082648754119873,grad_norm: 0.9999991694330967, iteration: 11153
loss: 1.0050305128097534,grad_norm: 0.9999994388843116, iteration: 11154
loss: 1.0523555278778076,grad_norm: 0.999999330868175, iteration: 11155
loss: 0.9833502769470215,grad_norm: 0.9999997128417925, iteration: 11156
loss: 0.9868519902229309,grad_norm: 0.9999992764535967, iteration: 11157
loss: 1.0130462646484375,grad_norm: 0.9999991259777581, iteration: 11158
loss: 1.0206159353256226,grad_norm: 0.9999992477442836, iteration: 11159
loss: 1.0673984289169312,grad_norm: 0.9999992030088679, iteration: 11160
loss: 1.0001802444458008,grad_norm: 0.9999996440640943, iteration: 11161
loss: 1.0489506721496582,grad_norm: 0.9999996079268942, iteration: 11162
loss: 1.0846434831619263,grad_norm: 0.9999993828838042, iteration: 11163
loss: 1.022434949874878,grad_norm: 0.9999993820961154, iteration: 11164
loss: 1.0893572568893433,grad_norm: 0.9999998969900536, iteration: 11165
loss: 1.0723639726638794,grad_norm: 0.999999372960133, iteration: 11166
loss: 1.0663782358169556,grad_norm: 0.9999994177210285, iteration: 11167
loss: 1.1204802989959717,grad_norm: 0.9999999856717173, iteration: 11168
loss: 1.019500970840454,grad_norm: 0.9999991067579439, iteration: 11169
loss: 1.0106189250946045,grad_norm: 0.999999457999735, iteration: 11170
loss: 1.048186182975769,grad_norm: 0.9999992775427756, iteration: 11171
loss: 1.0020009279251099,grad_norm: 0.9999993766742633, iteration: 11172
loss: 1.0314189195632935,grad_norm: 0.9999992862630535, iteration: 11173
loss: 1.0177570581436157,grad_norm: 0.9999997849129548, iteration: 11174
loss: 0.9913597702980042,grad_norm: 0.9999993502641391, iteration: 11175
loss: 1.049911379814148,grad_norm: 0.9999991386091682, iteration: 11176
loss: 1.0330009460449219,grad_norm: 0.9999990772537591, iteration: 11177
loss: 1.0285305976867676,grad_norm: 0.9999992916766335, iteration: 11178
loss: 1.0455695390701294,grad_norm: 0.9999995719830935, iteration: 11179
loss: 1.0365017652511597,grad_norm: 0.9999993283440205, iteration: 11180
loss: 1.0089329481124878,grad_norm: 0.9999994112250716, iteration: 11181
loss: 1.018410325050354,grad_norm: 0.9999995025406669, iteration: 11182
loss: 1.000078558921814,grad_norm: 0.9999992880229644, iteration: 11183
loss: 1.0248445272445679,grad_norm: 0.9999993470475891, iteration: 11184
loss: 1.051008701324463,grad_norm: 0.9999995740544505, iteration: 11185
loss: 1.0617552995681763,grad_norm: 0.9999993332238698, iteration: 11186
loss: 1.0138733386993408,grad_norm: 0.9999990798746181, iteration: 11187
loss: 1.0294069051742554,grad_norm: 0.999999859631683, iteration: 11188
loss: 1.0528285503387451,grad_norm: 0.9999994092020055, iteration: 11189
loss: 1.0625680685043335,grad_norm: 0.9999993537417201, iteration: 11190
loss: 1.1057063341140747,grad_norm: 0.9999996803288878, iteration: 11191
loss: 1.088976263999939,grad_norm: 0.999999736996571, iteration: 11192
loss: 1.0211234092712402,grad_norm: 0.9999993982809214, iteration: 11193
loss: 1.0405464172363281,grad_norm: 0.9999994467934573, iteration: 11194
loss: 1.0605628490447998,grad_norm: 0.9999995153567719, iteration: 11195
loss: 1.0211173295974731,grad_norm: 0.9999993945015563, iteration: 11196
loss: 1.0755964517593384,grad_norm: 0.999999565289051, iteration: 11197
loss: 1.0184866189956665,grad_norm: 0.9999995061787224, iteration: 11198
loss: 1.0769197940826416,grad_norm: 0.9999994734387195, iteration: 11199
loss: 1.034148931503296,grad_norm: 0.999999500777748, iteration: 11200
loss: 1.0920926332473755,grad_norm: 0.9999995787652005, iteration: 11201
loss: 1.0195865631103516,grad_norm: 0.9999992553464865, iteration: 11202
loss: 1.0169333219528198,grad_norm: 0.9999993354329471, iteration: 11203
loss: 1.0912171602249146,grad_norm: 0.999999630395603, iteration: 11204
loss: 1.0784943103790283,grad_norm: 0.9999996250581804, iteration: 11205
loss: 1.0976779460906982,grad_norm: 0.9999996888813998, iteration: 11206
loss: 1.0443955659866333,grad_norm: 0.9999992884219723, iteration: 11207
loss: 1.0098695755004883,grad_norm: 0.9999994297465791, iteration: 11208
loss: 0.998091995716095,grad_norm: 0.9999992262213443, iteration: 11209
loss: 1.0301196575164795,grad_norm: 0.9999991169999066, iteration: 11210
loss: 1.0508273839950562,grad_norm: 0.9999995368660937, iteration: 11211
loss: 1.0309052467346191,grad_norm: 0.9999994153536145, iteration: 11212
loss: 1.038851261138916,grad_norm: 0.9999992400539192, iteration: 11213
loss: 1.0404659509658813,grad_norm: 0.9999992109616987, iteration: 11214
loss: 1.043932557106018,grad_norm: 0.9999996050804625, iteration: 11215
loss: 1.034069299697876,grad_norm: 0.9999993193309809, iteration: 11216
loss: 1.0395137071609497,grad_norm: 0.9999996603774688, iteration: 11217
loss: 1.08154296875,grad_norm: 0.9999997464256627, iteration: 11218
loss: 0.9888447523117065,grad_norm: 0.9999995515639248, iteration: 11219
loss: 1.0346894264221191,grad_norm: 0.9999993723600807, iteration: 11220
loss: 0.9763438105583191,grad_norm: 0.9999992126007344, iteration: 11221
loss: 1.032071590423584,grad_norm: 0.9999994764051002, iteration: 11222
loss: 1.0658152103424072,grad_norm: 0.9999993570640686, iteration: 11223
loss: 1.0553956031799316,grad_norm: 0.9999997052317025, iteration: 11224
loss: 1.0650391578674316,grad_norm: 0.9999993213579934, iteration: 11225
loss: 1.034050464630127,grad_norm: 0.9999998733461615, iteration: 11226
loss: 1.0392894744873047,grad_norm: 0.9999994585349185, iteration: 11227
loss: 1.0528210401535034,grad_norm: 0.9999993463176328, iteration: 11228
loss: 0.9684787392616272,grad_norm: 0.9999992878894325, iteration: 11229
loss: 0.9895958304405212,grad_norm: 0.9999992161161279, iteration: 11230
loss: 1.042372703552246,grad_norm: 0.9999997593553324, iteration: 11231
loss: 1.0577607154846191,grad_norm: 0.999999507390504, iteration: 11232
loss: 1.051194667816162,grad_norm: 0.999999436307369, iteration: 11233
loss: 1.0359922647476196,grad_norm: 0.9999993729832235, iteration: 11234
loss: 1.0709513425827026,grad_norm: 0.9999993836827132, iteration: 11235
loss: 1.1147831678390503,grad_norm: 0.9999997642619919, iteration: 11236
loss: 1.035280704498291,grad_norm: 0.9999992947217459, iteration: 11237
loss: 1.0598093271255493,grad_norm: 0.9999992384377636, iteration: 11238
loss: 1.0495883226394653,grad_norm: 0.9999994405325844, iteration: 11239
loss: 0.9755870699882507,grad_norm: 0.9999994644575894, iteration: 11240
loss: 1.0765256881713867,grad_norm: 0.9999996930344083, iteration: 11241
loss: 1.085662603378296,grad_norm: 0.9999991070205876, iteration: 11242
loss: 1.0218660831451416,grad_norm: 0.9999992792458352, iteration: 11243
loss: 1.0483530759811401,grad_norm: 0.9999992217547512, iteration: 11244
loss: 0.9909985065460205,grad_norm: 0.9999991578678495, iteration: 11245
loss: 0.9995984435081482,grad_norm: 0.9999993189650956, iteration: 11246
loss: 1.088352918624878,grad_norm: 0.9999993977126582, iteration: 11247
loss: 0.9790932536125183,grad_norm: 0.9999995160646661, iteration: 11248
loss: 1.030314326286316,grad_norm: 0.999999328125703, iteration: 11249
loss: 1.0769678354263306,grad_norm: 0.999999280345546, iteration: 11250
loss: 0.9866917729377747,grad_norm: 0.9999992480586953, iteration: 11251
loss: 1.0649343729019165,grad_norm: 0.9999995236700836, iteration: 11252
loss: 1.0990869998931885,grad_norm: 0.9999995899264922, iteration: 11253
loss: 1.019025444984436,grad_norm: 0.9999991968735105, iteration: 11254
loss: 1.0547629594802856,grad_norm: 0.999999689055894, iteration: 11255
loss: 1.016392707824707,grad_norm: 0.9999993841817444, iteration: 11256
loss: 1.0146374702453613,grad_norm: 0.9999997164810529, iteration: 11257
loss: 1.0328940153121948,grad_norm: 0.9999995186861261, iteration: 11258
loss: 1.0498390197753906,grad_norm: 0.9999991210402636, iteration: 11259
loss: 1.0686732530593872,grad_norm: 0.9999994801528732, iteration: 11260
loss: 1.0437595844268799,grad_norm: 0.999999179319004, iteration: 11261
loss: 1.0406780242919922,grad_norm: 0.9999994739843961, iteration: 11262
loss: 1.0834102630615234,grad_norm: 0.9999997163862814, iteration: 11263
loss: 1.0680238008499146,grad_norm: 0.9999991646287804, iteration: 11264
loss: 1.0235271453857422,grad_norm: 0.9999991747377798, iteration: 11265
loss: 1.0571751594543457,grad_norm: 0.9999996601492118, iteration: 11266
loss: 1.0355429649353027,grad_norm: 0.9999998476498498, iteration: 11267
loss: 1.00545072555542,grad_norm: 0.9999992675648024, iteration: 11268
loss: 1.0136373043060303,grad_norm: 0.9999992097665087, iteration: 11269
loss: 1.0238574743270874,grad_norm: 0.9999993368556993, iteration: 11270
loss: 1.0791349411010742,grad_norm: 0.999999600436618, iteration: 11271
loss: 1.0494745969772339,grad_norm: 0.9999992745625781, iteration: 11272
loss: 1.0224074125289917,grad_norm: 0.999999195110171, iteration: 11273
loss: 1.0137823820114136,grad_norm: 0.9999991661536102, iteration: 11274
loss: 1.012430191040039,grad_norm: 0.9999993508765882, iteration: 11275
loss: 1.0474052429199219,grad_norm: 0.9999998034431107, iteration: 11276
loss: 1.0721383094787598,grad_norm: 0.9999997402938827, iteration: 11277
loss: 0.9741629362106323,grad_norm: 0.9999992366358683, iteration: 11278
loss: 1.0181092023849487,grad_norm: 0.9999993886915238, iteration: 11279
loss: 1.001021385192871,grad_norm: 0.9999992732803242, iteration: 11280
loss: 1.1095517873764038,grad_norm: 0.9999995774836966, iteration: 11281
loss: 1.0249967575073242,grad_norm: 0.9999993419537863, iteration: 11282
loss: 1.0224493741989136,grad_norm: 0.9999991681525006, iteration: 11283
loss: 1.0241003036499023,grad_norm: 0.9999992813755721, iteration: 11284
loss: 1.036321759223938,grad_norm: 0.9999991303275559, iteration: 11285
loss: 1.0385750532150269,grad_norm: 0.9999991772633264, iteration: 11286
loss: 1.0419420003890991,grad_norm: 0.9999995273783778, iteration: 11287
loss: 1.064624309539795,grad_norm: 0.9999993865444097, iteration: 11288
loss: 0.9779983758926392,grad_norm: 0.9999993624520476, iteration: 11289
loss: 1.067389726638794,grad_norm: 0.9999995669045054, iteration: 11290
loss: 1.0205237865447998,grad_norm: 0.9999994084967955, iteration: 11291
loss: 1.0733052492141724,grad_norm: 0.9999993853921894, iteration: 11292
loss: 1.1043038368225098,grad_norm: 0.9999995965522219, iteration: 11293
loss: 0.9973871111869812,grad_norm: 0.999999048581452, iteration: 11294
loss: 1.019496202468872,grad_norm: 0.9999996447683808, iteration: 11295
loss: 1.027141809463501,grad_norm: 0.9999991616535442, iteration: 11296
loss: 1.0635128021240234,grad_norm: 0.9999992839009063, iteration: 11297
loss: 1.0523778200149536,grad_norm: 0.9999994208860346, iteration: 11298
loss: 1.0688163042068481,grad_norm: 0.9999997759405387, iteration: 11299
loss: 1.0399847030639648,grad_norm: 0.9999996261904933, iteration: 11300
loss: 1.0780448913574219,grad_norm: 0.9999993928166537, iteration: 11301
loss: 1.0619151592254639,grad_norm: 0.9999995815738362, iteration: 11302
loss: 1.0340925455093384,grad_norm: 0.9999994386639224, iteration: 11303
loss: 1.0517069101333618,grad_norm: 0.9999994024418982, iteration: 11304
loss: 1.0789283514022827,grad_norm: 0.9999998328783686, iteration: 11305
loss: 1.0489349365234375,grad_norm: 0.9999998559959313, iteration: 11306
loss: 1.0123035907745361,grad_norm: 0.9999990808741657, iteration: 11307
loss: 1.0100477933883667,grad_norm: 0.9999995494554937, iteration: 11308
loss: 1.0832116603851318,grad_norm: 0.9999996574916442, iteration: 11309
loss: 1.0569307804107666,grad_norm: 0.9999993647197409, iteration: 11310
loss: 1.0591033697128296,grad_norm: 0.999999379368435, iteration: 11311
loss: 1.0334465503692627,grad_norm: 0.9999993102327848, iteration: 11312
loss: 1.0963332653045654,grad_norm: 0.9999998621190985, iteration: 11313
loss: 1.0632950067520142,grad_norm: 0.9999994972165657, iteration: 11314
loss: 1.01967453956604,grad_norm: 0.999999128905482, iteration: 11315
loss: 1.0574965476989746,grad_norm: 0.9999993179138911, iteration: 11316
loss: 1.0418705940246582,grad_norm: 0.9999997083149169, iteration: 11317
loss: 1.0150452852249146,grad_norm: 0.9999994601086558, iteration: 11318
loss: 1.0393223762512207,grad_norm: 0.9999993049250888, iteration: 11319
loss: 1.0334923267364502,grad_norm: 0.9999993115005871, iteration: 11320
loss: 1.0110955238342285,grad_norm: 0.9999991682672372, iteration: 11321
loss: 1.1512917280197144,grad_norm: 0.9999997268367239, iteration: 11322
loss: 1.0838303565979004,grad_norm: 0.9999996345405845, iteration: 11323
loss: 0.9997855424880981,grad_norm: 0.9999992504808047, iteration: 11324
loss: 1.0741522312164307,grad_norm: 0.999999516523541, iteration: 11325
loss: 1.0932174921035767,grad_norm: 0.9999998165331344, iteration: 11326
loss: 1.002690076828003,grad_norm: 0.999999250093079, iteration: 11327
loss: 1.0620256662368774,grad_norm: 0.9999993092453638, iteration: 11328
loss: 1.0246669054031372,grad_norm: 0.9999992094408164, iteration: 11329
loss: 1.0549298524856567,grad_norm: 0.9999990939817557, iteration: 11330
loss: 1.0199949741363525,grad_norm: 0.9999993678034999, iteration: 11331
loss: 1.094525933265686,grad_norm: 0.9999993283199405, iteration: 11332
loss: 1.0845904350280762,grad_norm: 0.9999992219903611, iteration: 11333
loss: 1.0666954517364502,grad_norm: 0.9999996404003133, iteration: 11334
loss: 1.0283056497573853,grad_norm: 0.9999992535251141, iteration: 11335
loss: 1.1067534685134888,grad_norm: 0.999999636836044, iteration: 11336
loss: 1.0867892503738403,grad_norm: 0.9999997104496932, iteration: 11337
loss: 1.0232820510864258,grad_norm: 0.99999939254202, iteration: 11338
loss: 1.0831109285354614,grad_norm: 0.9999998213737282, iteration: 11339
loss: 1.0570576190948486,grad_norm: 0.999999110822016, iteration: 11340
loss: 1.0394262075424194,grad_norm: 0.9999991502100968, iteration: 11341
loss: 1.070237636566162,grad_norm: 0.9999994374543614, iteration: 11342
loss: 1.0349165201187134,grad_norm: 0.9999991984522469, iteration: 11343
loss: 1.052848219871521,grad_norm: 0.9999994550204337, iteration: 11344
loss: 1.1257057189941406,grad_norm: 0.9999995742071952, iteration: 11345
loss: 1.0274748802185059,grad_norm: 0.9999994226485628, iteration: 11346
loss: 1.0351085662841797,grad_norm: 0.9999994945266698, iteration: 11347
loss: 1.028964638710022,grad_norm: 0.9999992066271114, iteration: 11348
loss: 1.0547494888305664,grad_norm: 0.9999995022440181, iteration: 11349
loss: 1.0351462364196777,grad_norm: 0.999999321875187, iteration: 11350
loss: 1.012786626815796,grad_norm: 0.9999994883906367, iteration: 11351
loss: 1.1139881610870361,grad_norm: 0.9999998928248824, iteration: 11352
loss: 0.9815362691879272,grad_norm: 0.9999996237435068, iteration: 11353
loss: 1.006505012512207,grad_norm: 0.9999994552959384, iteration: 11354
loss: 1.0237715244293213,grad_norm: 0.9999995795208064, iteration: 11355
loss: 1.035415530204773,grad_norm: 0.999999249699431, iteration: 11356
loss: 1.0560804605484009,grad_norm: 0.9999992173035905, iteration: 11357
loss: 1.105790376663208,grad_norm: 0.9999994078589288, iteration: 11358
loss: 1.009697675704956,grad_norm: 0.999999147978301, iteration: 11359
loss: 0.9783338308334351,grad_norm: 0.9999995563745322, iteration: 11360
loss: 1.0427520275115967,grad_norm: 0.9999996633574455, iteration: 11361
loss: 1.042218804359436,grad_norm: 0.999999415818161, iteration: 11362
loss: 1.0665390491485596,grad_norm: 0.9999993864014413, iteration: 11363
loss: 1.0300551652908325,grad_norm: 0.9999992693830527, iteration: 11364
loss: 1.0566070079803467,grad_norm: 0.9999993322998759, iteration: 11365
loss: 1.0085957050323486,grad_norm: 0.9999992942150527, iteration: 11366
loss: 1.1128486394882202,grad_norm: 0.9999994722406678, iteration: 11367
loss: 1.0765742063522339,grad_norm: 0.9999996734112846, iteration: 11368
loss: 1.07645845413208,grad_norm: 0.9999998072628309, iteration: 11369
loss: 1.09597909450531,grad_norm: 0.999999842359051, iteration: 11370
loss: 1.0189404487609863,grad_norm: 0.999999299203877, iteration: 11371
loss: 1.01996910572052,grad_norm: 0.9999993309280366, iteration: 11372
loss: 1.0414214134216309,grad_norm: 0.9999993051469909, iteration: 11373
loss: 1.07673978805542,grad_norm: 0.9999995513287553, iteration: 11374
loss: 1.0599344968795776,grad_norm: 0.9999993002493962, iteration: 11375
loss: 1.0121700763702393,grad_norm: 0.9999997083705179, iteration: 11376
loss: 0.97065269947052,grad_norm: 0.9999994481748591, iteration: 11377
loss: 1.0276696681976318,grad_norm: 0.9999992468285057, iteration: 11378
loss: 1.0588840246200562,grad_norm: 0.9999993179898151, iteration: 11379
loss: 1.0074732303619385,grad_norm: 0.9999991357755321, iteration: 11380
loss: 1.0580666065216064,grad_norm: 0.9999992545163259, iteration: 11381
loss: 1.0531573295593262,grad_norm: 0.9999992823036018, iteration: 11382
loss: 1.074201226234436,grad_norm: 0.9999996777431049, iteration: 11383
loss: 0.9989984035491943,grad_norm: 0.9999991923665562, iteration: 11384
loss: 1.0662528276443481,grad_norm: 0.9999994988756123, iteration: 11385
loss: 1.0104411840438843,grad_norm: 0.9999993580013847, iteration: 11386
loss: 1.0929429531097412,grad_norm: 0.9999997555390889, iteration: 11387
loss: 1.04572331905365,grad_norm: 0.999999426742841, iteration: 11388
loss: 0.9860000610351562,grad_norm: 0.9999997518186534, iteration: 11389
loss: 1.0134696960449219,grad_norm: 0.9999993286843998, iteration: 11390
loss: 1.0264257192611694,grad_norm: 0.9999991543934722, iteration: 11391
loss: 1.089742660522461,grad_norm: 0.9999994986162927, iteration: 11392
loss: 1.014764428138733,grad_norm: 0.9999993400410795, iteration: 11393
loss: 1.0717254877090454,grad_norm: 0.999999513188987, iteration: 11394
loss: 1.1168906688690186,grad_norm: 0.9999997341118742, iteration: 11395
loss: 1.0376783609390259,grad_norm: 0.9999994834579968, iteration: 11396
loss: 1.1106244325637817,grad_norm: 0.9999995164012825, iteration: 11397
loss: 1.0710701942443848,grad_norm: 0.999999275453042, iteration: 11398
loss: 1.0481034517288208,grad_norm: 0.9999995485413745, iteration: 11399
loss: 1.0528732538223267,grad_norm: 0.9999994211021092, iteration: 11400
loss: 1.0355254411697388,grad_norm: 0.9999991778331354, iteration: 11401
loss: 1.0248994827270508,grad_norm: 0.9999993065169294, iteration: 11402
loss: 1.0610361099243164,grad_norm: 0.9999998715470219, iteration: 11403
loss: 1.0450397729873657,grad_norm: 0.9999996724626222, iteration: 11404
loss: 0.9730626940727234,grad_norm: 0.9999994646417494, iteration: 11405
loss: 0.9946052432060242,grad_norm: 0.9999992194889403, iteration: 11406
loss: 1.0100553035736084,grad_norm: 0.9999993372398112, iteration: 11407
loss: 1.009334683418274,grad_norm: 0.9999994613718884, iteration: 11408
loss: 1.0729581117630005,grad_norm: 0.9999995231112749, iteration: 11409
loss: 1.0609172582626343,grad_norm: 0.9999993019931039, iteration: 11410
loss: 1.0350350141525269,grad_norm: 0.9999992561950797, iteration: 11411
loss: 1.0595816373825073,grad_norm: 0.9999996599390261, iteration: 11412
loss: 1.0844473838806152,grad_norm: 0.9999992245839237, iteration: 11413
loss: 1.060439109802246,grad_norm: 0.9999993876567571, iteration: 11414
loss: 1.0900471210479736,grad_norm: 0.999999200488983, iteration: 11415
loss: 1.0152510404586792,grad_norm: 0.9999993957214429, iteration: 11416
loss: 1.0361435413360596,grad_norm: 0.9999992999587111, iteration: 11417
loss: 1.0058773756027222,grad_norm: 0.9999998074756288, iteration: 11418
loss: 1.0095343589782715,grad_norm: 0.9999995039194639, iteration: 11419
loss: 1.0533978939056396,grad_norm: 0.9999995407240704, iteration: 11420
loss: 1.214126706123352,grad_norm: 0.9999997841821767, iteration: 11421
loss: 1.016901969909668,grad_norm: 0.9999992008808963, iteration: 11422
loss: 1.0318777561187744,grad_norm: 0.9999990936904467, iteration: 11423
loss: 1.0470240116119385,grad_norm: 0.9999993320112754, iteration: 11424
loss: 1.0376317501068115,grad_norm: 0.9999996657682481, iteration: 11425
loss: 1.1172301769256592,grad_norm: 0.9999995072027723, iteration: 11426
loss: 0.9861661791801453,grad_norm: 0.9999991931118821, iteration: 11427
loss: 1.0181783437728882,grad_norm: 0.9813916710424166, iteration: 11428
loss: 1.0001486539840698,grad_norm: 0.9999991147705567, iteration: 11429
loss: 1.0374987125396729,grad_norm: 0.9999995004507308, iteration: 11430
loss: 1.0732148885726929,grad_norm: 0.999999429770063, iteration: 11431
loss: 1.061689853668213,grad_norm: 0.9999992079563057, iteration: 11432
loss: 1.0735058784484863,grad_norm: 0.9999994982510783, iteration: 11433
loss: 0.9944054484367371,grad_norm: 0.9999993674735236, iteration: 11434
loss: 1.0553374290466309,grad_norm: 0.9999995734010507, iteration: 11435
loss: 1.010867953300476,grad_norm: 0.9999993508506273, iteration: 11436
loss: 1.0194634199142456,grad_norm: 0.9999991512570418, iteration: 11437
loss: 1.0194109678268433,grad_norm: 0.9999992263397028, iteration: 11438
loss: 1.01948082447052,grad_norm: 0.9999993468483135, iteration: 11439
loss: 1.0377168655395508,grad_norm: 0.9999996885369764, iteration: 11440
loss: 1.0381816625595093,grad_norm: 0.9999991183853288, iteration: 11441
loss: 1.0381234884262085,grad_norm: 0.9999992294231033, iteration: 11442
loss: 0.9963758587837219,grad_norm: 0.9999992663383914, iteration: 11443
loss: 1.0342134237289429,grad_norm: 0.9999992888624372, iteration: 11444
loss: 1.041756510734558,grad_norm: 0.9999993963737303, iteration: 11445
loss: 1.0505776405334473,grad_norm: 0.9999992468981717, iteration: 11446
loss: 1.0636587142944336,grad_norm: 0.9999992380740639, iteration: 11447
loss: 1.080931305885315,grad_norm: 0.999999164764214, iteration: 11448
loss: 1.0001366138458252,grad_norm: 0.9999993963462868, iteration: 11449
loss: 1.0228450298309326,grad_norm: 0.9999991889392975, iteration: 11450
loss: 1.0678447484970093,grad_norm: 0.999999476105782, iteration: 11451
loss: 1.0243744850158691,grad_norm: 0.9999992442752835, iteration: 11452
loss: 0.9959476590156555,grad_norm: 0.9999993064897331, iteration: 11453
loss: 0.99494868516922,grad_norm: 0.9999996697433883, iteration: 11454
loss: 1.0105006694793701,grad_norm: 0.9999995951943116, iteration: 11455
loss: 1.0277729034423828,grad_norm: 0.9999992965270335, iteration: 11456
loss: 1.0288546085357666,grad_norm: 0.9999993141345933, iteration: 11457
loss: 1.0590674877166748,grad_norm: 0.9999995988604582, iteration: 11458
loss: 1.0378669500350952,grad_norm: 0.9999994960297359, iteration: 11459
loss: 0.9890480041503906,grad_norm: 0.9999992884876004, iteration: 11460
loss: 1.076461911201477,grad_norm: 0.9999995725498563, iteration: 11461
loss: 1.0370882749557495,grad_norm: 0.9999994317221593, iteration: 11462
loss: 1.0592875480651855,grad_norm: 0.9999993407761492, iteration: 11463
loss: 1.0373682975769043,grad_norm: 0.9999992741240783, iteration: 11464
loss: 1.0054070949554443,grad_norm: 0.9999993919214574, iteration: 11465
loss: 1.036811113357544,grad_norm: 0.9999994093207842, iteration: 11466
loss: 1.0471980571746826,grad_norm: 0.9999992729716772, iteration: 11467
loss: 1.0071890354156494,grad_norm: 0.9999994179576749, iteration: 11468
loss: 1.0404309034347534,grad_norm: 0.9999998070142726, iteration: 11469
loss: 1.0176483392715454,grad_norm: 0.9999995170842934, iteration: 11470
loss: 1.0230376720428467,grad_norm: 0.9999993136254692, iteration: 11471
loss: 1.0086402893066406,grad_norm: 0.9999994112558382, iteration: 11472
loss: 1.0479905605316162,grad_norm: 0.99999940388558, iteration: 11473
loss: 1.0721379518508911,grad_norm: 0.9999997701662766, iteration: 11474
loss: 1.122984766960144,grad_norm: 0.9999996929000055, iteration: 11475
loss: 1.0594731569290161,grad_norm: 0.9999992493457109, iteration: 11476
loss: 1.085978627204895,grad_norm: 0.9999997633377504, iteration: 11477
loss: 1.0427907705307007,grad_norm: 0.9999992850420509, iteration: 11478
loss: 1.0354549884796143,grad_norm: 0.9999990901333459, iteration: 11479
loss: 1.0952128171920776,grad_norm: 0.999999313694533, iteration: 11480
loss: 1.0230127573013306,grad_norm: 0.9999992198231958, iteration: 11481
loss: 1.0374051332473755,grad_norm: 0.999999273909446, iteration: 11482
loss: 0.9883139729499817,grad_norm: 0.9999992749898837, iteration: 11483
loss: 1.0740312337875366,grad_norm: 0.9999994278604093, iteration: 11484
loss: 0.9817772507667542,grad_norm: 0.9999995894266652, iteration: 11485
loss: 1.0476380586624146,grad_norm: 0.9999992046360211, iteration: 11486
loss: 1.0600756406784058,grad_norm: 0.9999995051098026, iteration: 11487
loss: 1.0592952966690063,grad_norm: 0.999999358259808, iteration: 11488
loss: 1.0465937852859497,grad_norm: 0.9999995413784676, iteration: 11489
loss: 1.0442923307418823,grad_norm: 0.9999993229618477, iteration: 11490
loss: 1.0167361497879028,grad_norm: 0.9999992027238213, iteration: 11491
loss: 1.0646886825561523,grad_norm: 0.9999996316872841, iteration: 11492
loss: 1.019437551498413,grad_norm: 0.9999995929044151, iteration: 11493
loss: 1.0264009237289429,grad_norm: 0.999999246249563, iteration: 11494
loss: 1.0347566604614258,grad_norm: 0.9999996378745097, iteration: 11495
loss: 1.030288815498352,grad_norm: 0.9999994452178561, iteration: 11496
loss: 1.052565336227417,grad_norm: 0.9999994668589914, iteration: 11497
loss: 1.06015944480896,grad_norm: 0.9999995245424976, iteration: 11498
loss: 1.045235514640808,grad_norm: 0.9999991963042234, iteration: 11499
loss: 1.01941978931427,grad_norm: 0.999999397073433, iteration: 11500
loss: 1.0825752019882202,grad_norm: 0.9999993895878684, iteration: 11501
loss: 1.0197542905807495,grad_norm: 0.999999306197567, iteration: 11502
loss: 1.0462368726730347,grad_norm: 0.9999994605486288, iteration: 11503
loss: 1.0184965133666992,grad_norm: 0.9999993817510724, iteration: 11504
loss: 1.0569368600845337,grad_norm: 0.9999995619433325, iteration: 11505
loss: 1.0555943250656128,grad_norm: 0.9999993238037405, iteration: 11506
loss: 1.0169216394424438,grad_norm: 0.9999995166041509, iteration: 11507
loss: 1.0463312864303589,grad_norm: 0.9999991522424322, iteration: 11508
loss: 1.0452618598937988,grad_norm: 0.9999991699850972, iteration: 11509
loss: 1.0502756834030151,grad_norm: 0.9999993624493282, iteration: 11510
loss: 1.0521509647369385,grad_norm: 0.999999367926872, iteration: 11511
loss: 1.0207468271255493,grad_norm: 0.999999273359776, iteration: 11512
loss: 1.001368522644043,grad_norm: 0.9999994039937029, iteration: 11513
loss: 1.0576090812683105,grad_norm: 0.9999995649602578, iteration: 11514
loss: 1.0399043560028076,grad_norm: 0.999999341018855, iteration: 11515
loss: 0.985235333442688,grad_norm: 0.9999993289653384, iteration: 11516
loss: 1.0115501880645752,grad_norm: 0.9999991502887475, iteration: 11517
loss: 1.0488972663879395,grad_norm: 0.999999688909098, iteration: 11518
loss: 1.1802754402160645,grad_norm: 0.9999997645613741, iteration: 11519
loss: 1.085971474647522,grad_norm: 0.9999994187400265, iteration: 11520
loss: 1.025944471359253,grad_norm: 0.9999991174564673, iteration: 11521
loss: 1.0388679504394531,grad_norm: 0.999999325444604, iteration: 11522
loss: 1.0472487211227417,grad_norm: 0.9999993410684984, iteration: 11523
loss: 1.0639913082122803,grad_norm: 0.9999992863087203, iteration: 11524
loss: 1.0113954544067383,grad_norm: 0.9999994453202031, iteration: 11525
loss: 1.0148451328277588,grad_norm: 0.9999993163558561, iteration: 11526
loss: 1.035544514656067,grad_norm: 0.9999991407240022, iteration: 11527
loss: 1.0450234413146973,grad_norm: 0.9999992587042491, iteration: 11528
loss: 0.9909830689430237,grad_norm: 0.9999993252004197, iteration: 11529
loss: 1.0396840572357178,grad_norm: 0.9999990177683414, iteration: 11530
loss: 1.0421347618103027,grad_norm: 0.9999997194591264, iteration: 11531
loss: 0.9888426661491394,grad_norm: 0.9999992124718377, iteration: 11532
loss: 1.0075520277023315,grad_norm: 0.9999991444852052, iteration: 11533
loss: 0.9782604575157166,grad_norm: 0.999999218822126, iteration: 11534
loss: 1.1059330701828003,grad_norm: 0.9999996225432196, iteration: 11535
loss: 1.0595065355300903,grad_norm: 0.999999580972135, iteration: 11536
loss: 1.0420254468917847,grad_norm: 0.9999992453640071, iteration: 11537
loss: 1.0588208436965942,grad_norm: 0.9999992817307422, iteration: 11538
loss: 1.0318882465362549,grad_norm: 0.9999992421204631, iteration: 11539
loss: 1.0394172668457031,grad_norm: 0.9999993723784306, iteration: 11540
loss: 1.0417503118515015,grad_norm: 0.9999993911768743, iteration: 11541
loss: 1.0216162204742432,grad_norm: 0.9999991559443969, iteration: 11542
loss: 0.9974436163902283,grad_norm: 0.9999991553519028, iteration: 11543
loss: 1.039925456047058,grad_norm: 0.9999992275647072, iteration: 11544
loss: 1.0128196477890015,grad_norm: 0.999999468781525, iteration: 11545
loss: 1.1002540588378906,grad_norm: 0.9999996183720845, iteration: 11546
loss: 1.0293582677841187,grad_norm: 0.999999275424756, iteration: 11547
loss: 1.0928380489349365,grad_norm: 0.9999996543205439, iteration: 11548
loss: 1.000388741493225,grad_norm: 0.999999134628425, iteration: 11549
loss: 1.1112877130508423,grad_norm: 0.999999779611682, iteration: 11550
loss: 1.0023751258850098,grad_norm: 0.9999994999912045, iteration: 11551
loss: 1.0559563636779785,grad_norm: 0.9999993012634608, iteration: 11552
loss: 1.0247390270233154,grad_norm: 0.9999991864970749, iteration: 11553
loss: 1.025174617767334,grad_norm: 0.9999997209201764, iteration: 11554
loss: 1.048028826713562,grad_norm: 0.999999175059866, iteration: 11555
loss: 1.0566246509552002,grad_norm: 0.9999991713895016, iteration: 11556
loss: 1.0009552240371704,grad_norm: 0.9999991633032916, iteration: 11557
loss: 1.0100528001785278,grad_norm: 0.9999991571507943, iteration: 11558
loss: 1.0314253568649292,grad_norm: 0.9999992687251782, iteration: 11559
loss: 0.9914101362228394,grad_norm: 0.9999992396808475, iteration: 11560
loss: 0.9958255290985107,grad_norm: 0.9999991997934152, iteration: 11561
loss: 1.0573493242263794,grad_norm: 0.9999994527890471, iteration: 11562
loss: 1.0772500038146973,grad_norm: 0.9999995050791378, iteration: 11563
loss: 1.0681126117706299,grad_norm: 0.9999993787714873, iteration: 11564
loss: 1.057781457901001,grad_norm: 0.999999383126918, iteration: 11565
loss: 0.9884898662567139,grad_norm: 0.9999992494420382, iteration: 11566
loss: 1.067559003829956,grad_norm: 0.9999992388818427, iteration: 11567
loss: 1.0397038459777832,grad_norm: 0.9999994206790715, iteration: 11568
loss: 1.0635817050933838,grad_norm: 0.9999994835420131, iteration: 11569
loss: 1.0658543109893799,grad_norm: 0.999999408694866, iteration: 11570
loss: 1.092197299003601,grad_norm: 0.9999994776408688, iteration: 11571
loss: 1.0579910278320312,grad_norm: 0.999999493365333, iteration: 11572
loss: 1.0843141078948975,grad_norm: 0.9999996740447237, iteration: 11573
loss: 1.088826298713684,grad_norm: 0.9999997069012782, iteration: 11574
loss: 1.0059318542480469,grad_norm: 0.9999993202567825, iteration: 11575
loss: 1.0305531024932861,grad_norm: 0.9999992380928867, iteration: 11576
loss: 1.0578144788742065,grad_norm: 0.9999994317104874, iteration: 11577
loss: 1.0504971742630005,grad_norm: 0.9999997264849193, iteration: 11578
loss: 1.1118661165237427,grad_norm: 0.9999997200859817, iteration: 11579
loss: 1.0206221342086792,grad_norm: 0.9999992655635674, iteration: 11580
loss: 1.015143632888794,grad_norm: 0.9999994411603724, iteration: 11581
loss: 0.9894711971282959,grad_norm: 0.9999992818948779, iteration: 11582
loss: 1.0068107843399048,grad_norm: 0.9999992778902553, iteration: 11583
loss: 1.0409705638885498,grad_norm: 0.9999991484116202, iteration: 11584
loss: 1.0197737216949463,grad_norm: 0.999999181767646, iteration: 11585
loss: 1.0327935218811035,grad_norm: 0.9999993718176059, iteration: 11586
loss: 1.0585706233978271,grad_norm: 0.9999993564857079, iteration: 11587
loss: 1.0065147876739502,grad_norm: 0.9999998663637303, iteration: 11588
loss: 1.0459060668945312,grad_norm: 0.9999993805799275, iteration: 11589
loss: 1.0254931449890137,grad_norm: 0.9999994376457817, iteration: 11590
loss: 1.042783260345459,grad_norm: 0.9999992548022357, iteration: 11591
loss: 1.047252893447876,grad_norm: 0.9999992030030891, iteration: 11592
loss: 0.9854303002357483,grad_norm: 0.9999992949886953, iteration: 11593
loss: 0.9803115725517273,grad_norm: 0.9999991006792461, iteration: 11594
loss: 0.9748770594596863,grad_norm: 0.9999992698447759, iteration: 11595
loss: 1.0182703733444214,grad_norm: 0.9999996189231133, iteration: 11596
loss: 1.0690970420837402,grad_norm: 0.9999996743890692, iteration: 11597
loss: 0.9716387987136841,grad_norm: 0.9999992965267651, iteration: 11598
loss: 1.0003279447555542,grad_norm: 0.9999993315444164, iteration: 11599
loss: 1.018496036529541,grad_norm: 0.9999994097954763, iteration: 11600
loss: 1.0140974521636963,grad_norm: 0.9999995055966591, iteration: 11601
loss: 1.0704345703125,grad_norm: 0.9999993728096094, iteration: 11602
loss: 1.0603879690170288,grad_norm: 0.999999284301636, iteration: 11603
loss: 0.9860275387763977,grad_norm: 0.999999156029828, iteration: 11604
loss: 0.9216769337654114,grad_norm: 0.9999993540859704, iteration: 11605
loss: 1.0379416942596436,grad_norm: 0.9999996109644561, iteration: 11606
loss: 1.0076746940612793,grad_norm: 0.9999992045485644, iteration: 11607
loss: 1.0292495489120483,grad_norm: 0.9999991969269166, iteration: 11608
loss: 1.1068357229232788,grad_norm: 0.9999998281704002, iteration: 11609
loss: 1.0404295921325684,grad_norm: 0.9999995562274128, iteration: 11610
loss: 1.0141139030456543,grad_norm: 0.9999992578347823, iteration: 11611
loss: 0.997491180896759,grad_norm: 0.9999990784466856, iteration: 11612
loss: 1.0054742097854614,grad_norm: 0.9999993828959326, iteration: 11613
loss: 0.9891582727432251,grad_norm: 0.9999993570449366, iteration: 11614
loss: 1.0961716175079346,grad_norm: 0.9999995120406714, iteration: 11615
loss: 1.1307944059371948,grad_norm: 0.9999997763720749, iteration: 11616
loss: 1.0256142616271973,grad_norm: 0.9999992747584588, iteration: 11617
loss: 1.1685971021652222,grad_norm: 0.999999643285418, iteration: 11618
loss: 1.0536922216415405,grad_norm: 0.9999993706557344, iteration: 11619
loss: 1.0740106105804443,grad_norm: 0.9999995047462484, iteration: 11620
loss: 0.9899338483810425,grad_norm: 0.9999993077527147, iteration: 11621
loss: 1.0416871309280396,grad_norm: 0.9999993405656652, iteration: 11622
loss: 1.0350943803787231,grad_norm: 0.999999681745251, iteration: 11623
loss: 1.0938504934310913,grad_norm: 0.9999997537583386, iteration: 11624
loss: 1.04830002784729,grad_norm: 0.999999588132387, iteration: 11625
loss: 1.0621049404144287,grad_norm: 0.9999992477510041, iteration: 11626
loss: 1.0418673753738403,grad_norm: 0.9999992211922929, iteration: 11627
loss: 1.0985575914382935,grad_norm: 0.999999486231211, iteration: 11628
loss: 1.0321710109710693,grad_norm: 0.99999935014363, iteration: 11629
loss: 1.0192654132843018,grad_norm: 0.9999991124277886, iteration: 11630
loss: 1.13612699508667,grad_norm: 0.9999998246949985, iteration: 11631
loss: 1.044001579284668,grad_norm: 0.9999996935560984, iteration: 11632
loss: 1.0820828676223755,grad_norm: 0.99999959082446, iteration: 11633
loss: 0.9992319345474243,grad_norm: 0.9999991813093623, iteration: 11634
loss: 0.9968154430389404,grad_norm: 0.9999992170322093, iteration: 11635
loss: 0.9885050654411316,grad_norm: 0.9999996292781421, iteration: 11636
loss: 1.002260446548462,grad_norm: 0.9999990692866603, iteration: 11637
loss: 0.9856513142585754,grad_norm: 0.9999991733789239, iteration: 11638
loss: 1.0288214683532715,grad_norm: 0.9999993040901135, iteration: 11639
loss: 0.9948081374168396,grad_norm: 0.9999992684901476, iteration: 11640
loss: 1.0239232778549194,grad_norm: 0.9999992767808881, iteration: 11641
loss: 1.0296204090118408,grad_norm: 0.9999993915986553, iteration: 11642
loss: 0.9694086313247681,grad_norm: 0.9999990873591575, iteration: 11643
loss: 0.9849609732627869,grad_norm: 0.9999992104265997, iteration: 11644
loss: 1.0299274921417236,grad_norm: 0.9999991599145103, iteration: 11645
loss: 1.0795456171035767,grad_norm: 0.9999992724344114, iteration: 11646
loss: 0.99763423204422,grad_norm: 0.9999992615413539, iteration: 11647
loss: 1.0486834049224854,grad_norm: 0.9999992027223422, iteration: 11648
loss: 1.0194321870803833,grad_norm: 0.9999993619343077, iteration: 11649
loss: 1.0427888631820679,grad_norm: 0.9999998772661691, iteration: 11650
loss: 1.0146799087524414,grad_norm: 0.9999993918315822, iteration: 11651
loss: 1.0831226110458374,grad_norm: 0.9999992318377302, iteration: 11652
loss: 1.0406800508499146,grad_norm: 0.9999990944856689, iteration: 11653
loss: 1.020553469657898,grad_norm: 0.9999991182766146, iteration: 11654
loss: 1.0307327508926392,grad_norm: 0.9999992757175715, iteration: 11655
loss: 1.0215014219284058,grad_norm: 0.9999993253026953, iteration: 11656
loss: 0.9607774615287781,grad_norm: 0.9999993117074819, iteration: 11657
loss: 1.0358145236968994,grad_norm: 0.9999992537935972, iteration: 11658
loss: 1.0181885957717896,grad_norm: 0.9999993520049089, iteration: 11659
loss: 1.0237510204315186,grad_norm: 0.9999995323672425, iteration: 11660
loss: 1.0460848808288574,grad_norm: 0.9999990142455256, iteration: 11661
loss: 1.027481198310852,grad_norm: 0.9999995626236641, iteration: 11662
loss: 1.0286951065063477,grad_norm: 0.9999990483140954, iteration: 11663
loss: 1.1132887601852417,grad_norm: 0.9999998973293679, iteration: 11664
loss: 0.9504996538162231,grad_norm: 0.9999993951821476, iteration: 11665
loss: 1.0493451356887817,grad_norm: 0.9999993387257147, iteration: 11666
loss: 1.0417439937591553,grad_norm: 0.9999994356182256, iteration: 11667
loss: 1.0799647569656372,grad_norm: 0.9999994758543646, iteration: 11668
loss: 0.9978818893432617,grad_norm: 0.9999994259321965, iteration: 11669
loss: 0.9762598276138306,grad_norm: 0.999999250238029, iteration: 11670
loss: 1.0146600008010864,grad_norm: 0.9999992763668997, iteration: 11671
loss: 1.0426543951034546,grad_norm: 0.9999993015445314, iteration: 11672
loss: 1.046406626701355,grad_norm: 0.999999688105326, iteration: 11673
loss: 1.0553228855133057,grad_norm: 0.999999217604606, iteration: 11674
loss: 1.0294055938720703,grad_norm: 0.9999992661721682, iteration: 11675
loss: 1.0449678897857666,grad_norm: 0.9999992163470719, iteration: 11676
loss: 1.0546927452087402,grad_norm: 0.9999993518144499, iteration: 11677
loss: 1.0835596323013306,grad_norm: 0.9999998281063133, iteration: 11678
loss: 1.0694541931152344,grad_norm: 0.999999311142117, iteration: 11679
loss: 1.0490691661834717,grad_norm: 0.9999996847845923, iteration: 11680
loss: 1.0204544067382812,grad_norm: 0.9999992777779856, iteration: 11681
loss: 1.0251612663269043,grad_norm: 0.9999996973509748, iteration: 11682
loss: 1.0400866270065308,grad_norm: 0.9999995007408518, iteration: 11683
loss: 0.9847294688224792,grad_norm: 0.9999992397227229, iteration: 11684
loss: 1.0842519998550415,grad_norm: 0.999999722533178, iteration: 11685
loss: 1.0465408563613892,grad_norm: 0.9999993322972341, iteration: 11686
loss: 1.0706608295440674,grad_norm: 0.9999998632900494, iteration: 11687
loss: 1.0343186855316162,grad_norm: 0.9999991389811529, iteration: 11688
loss: 1.040437936782837,grad_norm: 0.9999996588358576, iteration: 11689
loss: 1.035357117652893,grad_norm: 0.999999191498796, iteration: 11690
loss: 1.051689624786377,grad_norm: 0.9999994874137225, iteration: 11691
loss: 0.9941881895065308,grad_norm: 0.9999991445333951, iteration: 11692
loss: 1.0933427810668945,grad_norm: 0.999999219085118, iteration: 11693
loss: 1.0305782556533813,grad_norm: 0.9999994418594498, iteration: 11694
loss: 1.0561960935592651,grad_norm: 0.9999996799055355, iteration: 11695
loss: 1.049027681350708,grad_norm: 0.9999992486740804, iteration: 11696
loss: 1.0205200910568237,grad_norm: 0.9999993719683948, iteration: 11697
loss: 1.0069973468780518,grad_norm: 0.9999990940955601, iteration: 11698
loss: 1.081931471824646,grad_norm: 0.9999991376138804, iteration: 11699
loss: 1.0287889242172241,grad_norm: 0.999999230263774, iteration: 11700
loss: 0.9876173138618469,grad_norm: 0.9999991859149373, iteration: 11701
loss: 1.0318963527679443,grad_norm: 0.9999996310028963, iteration: 11702
loss: 1.0082088708877563,grad_norm: 0.999999215186968, iteration: 11703
loss: 1.0251882076263428,grad_norm: 0.999999322496091, iteration: 11704
loss: 1.0183405876159668,grad_norm: 0.99999910030786, iteration: 11705
loss: 1.0737000703811646,grad_norm: 0.9999992637468804, iteration: 11706
loss: 1.0322176218032837,grad_norm: 0.9999994408602318, iteration: 11707
loss: 0.9935979247093201,grad_norm: 0.9999996229460629, iteration: 11708
loss: 1.1449077129364014,grad_norm: 0.999999684518878, iteration: 11709
loss: 1.009239673614502,grad_norm: 0.9999995372424181, iteration: 11710
loss: 1.0338095426559448,grad_norm: 0.999999300812726, iteration: 11711
loss: 1.0588048696517944,grad_norm: 0.9999992437913002, iteration: 11712
loss: 1.0372982025146484,grad_norm: 0.9999999242754468, iteration: 11713
loss: 1.040124535560608,grad_norm: 0.9999993179223851, iteration: 11714
loss: 1.0016611814498901,grad_norm: 0.9999991706298785, iteration: 11715
loss: 1.0606497526168823,grad_norm: 0.9999996665684658, iteration: 11716
loss: 1.0587824583053589,grad_norm: 0.9999997286526415, iteration: 11717
loss: 1.039553165435791,grad_norm: 0.9999993151591736, iteration: 11718
loss: 1.0187981128692627,grad_norm: 0.9999995870577145, iteration: 11719
loss: 1.0568991899490356,grad_norm: 0.999999697399503, iteration: 11720
loss: 1.024621605873108,grad_norm: 0.9999996361531475, iteration: 11721
loss: 1.0559039115905762,grad_norm: 0.9999996953719992, iteration: 11722
loss: 1.0504062175750732,grad_norm: 0.9999994974961585, iteration: 11723
loss: 1.0227454900741577,grad_norm: 0.9999994216244694, iteration: 11724
loss: 1.0207092761993408,grad_norm: 0.9999995588608577, iteration: 11725
loss: 1.090406060218811,grad_norm: 0.9999997787717003, iteration: 11726
loss: 0.9897564053535461,grad_norm: 0.9999996539385652, iteration: 11727
loss: 1.0195443630218506,grad_norm: 0.9999995312829886, iteration: 11728
loss: 1.0903303623199463,grad_norm: 0.9999996935455134, iteration: 11729
loss: 1.0803529024124146,grad_norm: 0.9999997021979917, iteration: 11730
loss: 1.1080737113952637,grad_norm: 0.999999897699924, iteration: 11731
loss: 1.088879108428955,grad_norm: 0.9999995897490058, iteration: 11732
loss: 0.9827120900154114,grad_norm: 0.9999993925746428, iteration: 11733
loss: 1.0357632637023926,grad_norm: 0.9999991716859506, iteration: 11734
loss: 1.0804287195205688,grad_norm: 0.9999996485198425, iteration: 11735
loss: 0.9903613328933716,grad_norm: 0.9999992885286043, iteration: 11736
loss: 1.1096769571304321,grad_norm: 0.9999997824786915, iteration: 11737
loss: 0.994432806968689,grad_norm: 0.9999991410550847, iteration: 11738
loss: 1.0515682697296143,grad_norm: 0.9999995305963714, iteration: 11739
loss: 1.088966727256775,grad_norm: 0.999999542794179, iteration: 11740
loss: 1.0250182151794434,grad_norm: 0.9999995740789075, iteration: 11741
loss: 1.0892735719680786,grad_norm: 0.999999568817148, iteration: 11742
loss: 1.0473291873931885,grad_norm: 0.9999997534836726, iteration: 11743
loss: 1.0733293294906616,grad_norm: 0.9999993108777147, iteration: 11744
loss: 1.0806082487106323,grad_norm: 0.9999994941588832, iteration: 11745
loss: 1.0421762466430664,grad_norm: 0.9999995763470544, iteration: 11746
loss: 1.031518578529358,grad_norm: 0.9902924268021066, iteration: 11747
loss: 1.0660293102264404,grad_norm: 0.999999500314477, iteration: 11748
loss: 1.0498292446136475,grad_norm: 0.99999962591872, iteration: 11749
loss: 1.060582160949707,grad_norm: 0.999999511533817, iteration: 11750
loss: 0.9892820119857788,grad_norm: 0.9999991168137544, iteration: 11751
loss: 1.0136363506317139,grad_norm: 0.9999992178013197, iteration: 11752
loss: 1.0735796689987183,grad_norm: 0.9999996259645755, iteration: 11753
loss: 1.0438870191574097,grad_norm: 0.999999630969705, iteration: 11754
loss: 1.0396721363067627,grad_norm: 0.9999993256439061, iteration: 11755
loss: 1.0337724685668945,grad_norm: 0.9999991516909427, iteration: 11756
loss: 1.0397863388061523,grad_norm: 0.9999992408720577, iteration: 11757
loss: 1.0846853256225586,grad_norm: 0.9999995124345392, iteration: 11758
loss: 1.0436886548995972,grad_norm: 0.9999992276392724, iteration: 11759
loss: 1.0341572761535645,grad_norm: 0.9999995968211194, iteration: 11760
loss: 1.1428958177566528,grad_norm: 0.999999833370383, iteration: 11761
loss: 1.05152428150177,grad_norm: 0.9999991905637238, iteration: 11762
loss: 1.058842658996582,grad_norm: 0.9999994924687413, iteration: 11763
loss: 1.0556334257125854,grad_norm: 0.9999992316787262, iteration: 11764
loss: 1.0348507165908813,grad_norm: 0.999999589709766, iteration: 11765
loss: 1.0228643417358398,grad_norm: 0.9999991981320226, iteration: 11766
loss: 1.0419024229049683,grad_norm: 0.9999991423663828, iteration: 11767
loss: 1.040770173072815,grad_norm: 0.9999996992731547, iteration: 11768
loss: 1.0604358911514282,grad_norm: 0.9999992901803736, iteration: 11769
loss: 1.0508965253829956,grad_norm: 0.9999995559630653, iteration: 11770
loss: 1.0475986003875732,grad_norm: 0.9999993923287099, iteration: 11771
loss: 1.0312906503677368,grad_norm: 0.9999996859541512, iteration: 11772
loss: 1.009559988975525,grad_norm: 0.9999993343077708, iteration: 11773
loss: 1.0514562129974365,grad_norm: 0.9999993923617949, iteration: 11774
loss: 1.0123004913330078,grad_norm: 0.9999991165463971, iteration: 11775
loss: 1.0536675453186035,grad_norm: 0.9999995071371602, iteration: 11776
loss: 1.054577350616455,grad_norm: 0.9999992953424062, iteration: 11777
loss: 1.0774089097976685,grad_norm: 0.9999993576136273, iteration: 11778
loss: 0.9839425683021545,grad_norm: 0.9999992989037998, iteration: 11779
loss: 1.0282856225967407,grad_norm: 0.9999992693196869, iteration: 11780
loss: 1.01236891746521,grad_norm: 0.9999994849368882, iteration: 11781
loss: 1.060221791267395,grad_norm: 0.999999584367618, iteration: 11782
loss: 1.0798650979995728,grad_norm: 0.9999999251628288, iteration: 11783
loss: 1.0385072231292725,grad_norm: 0.9999995393813307, iteration: 11784
loss: 1.0422906875610352,grad_norm: 0.9999995181396294, iteration: 11785
loss: 1.0127997398376465,grad_norm: 0.9999993428335875, iteration: 11786
loss: 0.9944326877593994,grad_norm: 0.9999993166622996, iteration: 11787
loss: 1.0653419494628906,grad_norm: 0.9999992056523872, iteration: 11788
loss: 1.030388355255127,grad_norm: 0.9999992850255282, iteration: 11789
loss: 0.97105872631073,grad_norm: 0.9999991732848249, iteration: 11790
loss: 1.031431794166565,grad_norm: 0.9999992745150806, iteration: 11791
loss: 1.0094554424285889,grad_norm: 0.9999993469871896, iteration: 11792
loss: 1.050991415977478,grad_norm: 0.9999992187710768, iteration: 11793
loss: 0.9948475956916809,grad_norm: 0.9999994873360187, iteration: 11794
loss: 1.0683722496032715,grad_norm: 0.9999994370335732, iteration: 11795
loss: 1.3028247356414795,grad_norm: 0.9999997826253637, iteration: 11796
loss: 1.0019774436950684,grad_norm: 0.999999296391038, iteration: 11797
loss: 1.1295490264892578,grad_norm: 0.9999993848186874, iteration: 11798
loss: 1.0568857192993164,grad_norm: 0.9999996526595526, iteration: 11799
loss: 1.009260654449463,grad_norm: 0.9999999057745237, iteration: 11800
loss: 0.9785959720611572,grad_norm: 0.9999993762106478, iteration: 11801
loss: 1.0576878786087036,grad_norm: 0.9999995053510824, iteration: 11802
loss: 0.9878886938095093,grad_norm: 0.9999992359196851, iteration: 11803
loss: 0.9653039574623108,grad_norm: 0.9999991383986394, iteration: 11804
loss: 1.0564416646957397,grad_norm: 0.9999992886848158, iteration: 11805
loss: 1.0865236520767212,grad_norm: 0.9999994754852362, iteration: 11806
loss: 1.0368319749832153,grad_norm: 0.9999998806425614, iteration: 11807
loss: 1.0842190980911255,grad_norm: 0.9999998018465295, iteration: 11808
loss: 1.0272340774536133,grad_norm: 0.9999993252497179, iteration: 11809
loss: 1.0620335340499878,grad_norm: 0.9999993333633137, iteration: 11810
loss: 1.0716043710708618,grad_norm: 0.9999994578521367, iteration: 11811
loss: 1.0815646648406982,grad_norm: 0.999999861479868, iteration: 11812
loss: 1.0175138711929321,grad_norm: 0.9999997285303855, iteration: 11813
loss: 1.1563862562179565,grad_norm: 0.9999997219655077, iteration: 11814
loss: 1.0440651178359985,grad_norm: 0.9999994667259603, iteration: 11815
loss: 1.0373950004577637,grad_norm: 0.9999997168368138, iteration: 11816
loss: 1.0410103797912598,grad_norm: 0.9999997953792162, iteration: 11817
loss: 1.0256410837173462,grad_norm: 0.9999992547541178, iteration: 11818
loss: 1.0309624671936035,grad_norm: 0.9999999271623113, iteration: 11819
loss: 1.0376713275909424,grad_norm: 0.9999993548608452, iteration: 11820
loss: 1.0814595222473145,grad_norm: 0.9999997967454198, iteration: 11821
loss: 1.0716561079025269,grad_norm: 0.999999669795035, iteration: 11822
loss: 1.0093430280685425,grad_norm: 0.999999340817014, iteration: 11823
loss: 1.0685515403747559,grad_norm: 0.9999995780405707, iteration: 11824
loss: 1.0468463897705078,grad_norm: 0.999999275081274, iteration: 11825
loss: 1.0898469686508179,grad_norm: 0.9999993015750765, iteration: 11826
loss: 1.048941969871521,grad_norm: 0.9999996463594971, iteration: 11827
loss: 1.0844122171401978,grad_norm: 0.9999995911590726, iteration: 11828
loss: 1.026013731956482,grad_norm: 0.9999993497681706, iteration: 11829
loss: 1.0785377025604248,grad_norm: 0.9999993523576094, iteration: 11830
loss: 1.0095107555389404,grad_norm: 0.9999996618323485, iteration: 11831
loss: 0.9964977502822876,grad_norm: 0.9999992245610663, iteration: 11832
loss: 1.0735547542572021,grad_norm: 0.9999996674784437, iteration: 11833
loss: 1.0606005191802979,grad_norm: 0.9999992628636278, iteration: 11834
loss: 1.1073094606399536,grad_norm: 0.9999995295661495, iteration: 11835
loss: 1.0212633609771729,grad_norm: 0.9999995940732989, iteration: 11836
loss: 1.0281659364700317,grad_norm: 0.999999349879483, iteration: 11837
loss: 1.0763670206069946,grad_norm: 0.999999715851148, iteration: 11838
loss: 1.0399961471557617,grad_norm: 0.9999996167994337, iteration: 11839
loss: 1.0921895503997803,grad_norm: 0.9999993448583427, iteration: 11840
loss: 1.0332039594650269,grad_norm: 0.9903849027927257, iteration: 11841
loss: 1.042096495628357,grad_norm: 0.9999992372018278, iteration: 11842
loss: 1.052667498588562,grad_norm: 0.9999991610295046, iteration: 11843
loss: 1.0297743082046509,grad_norm: 0.9999997580500339, iteration: 11844
loss: 0.9559198021888733,grad_norm: 0.9999991377663111, iteration: 11845
loss: 1.0253617763519287,grad_norm: 0.9999991621633243, iteration: 11846
loss: 1.0303716659545898,grad_norm: 0.9999993407084032, iteration: 11847
loss: 0.9985163807868958,grad_norm: 0.9999992264484745, iteration: 11848
loss: 1.006056308746338,grad_norm: 0.9999993661699274, iteration: 11849
loss: 1.0378965139389038,grad_norm: 0.9999992946799553, iteration: 11850
loss: 1.0279055833816528,grad_norm: 0.9999992699752148, iteration: 11851
loss: 0.994092583656311,grad_norm: 0.9999994220396745, iteration: 11852
loss: 1.046676754951477,grad_norm: 0.9999995751064715, iteration: 11853
loss: 1.0323231220245361,grad_norm: 0.999999361533915, iteration: 11854
loss: 1.0336201190948486,grad_norm: 0.9999993266550871, iteration: 11855
loss: 1.0757542848587036,grad_norm: 0.9999993319889882, iteration: 11856
loss: 1.0198578834533691,grad_norm: 0.9999991400189054, iteration: 11857
loss: 1.0052067041397095,grad_norm: 0.9999992540092881, iteration: 11858
loss: 1.0925652980804443,grad_norm: 0.9999997658492804, iteration: 11859
loss: 1.0456336736679077,grad_norm: 0.9999991172096528, iteration: 11860
loss: 0.976508378982544,grad_norm: 0.9999991756466392, iteration: 11861
loss: 0.9639453291893005,grad_norm: 0.9999991153382881, iteration: 11862
loss: 1.048094391822815,grad_norm: 0.9999997375387879, iteration: 11863
loss: 1.0965287685394287,grad_norm: 0.9999996294965555, iteration: 11864
loss: 0.9944934844970703,grad_norm: 0.9999993162800395, iteration: 11865
loss: 1.0065569877624512,grad_norm: 0.9999992077199332, iteration: 11866
loss: 1.0038872957229614,grad_norm: 0.999999327144185, iteration: 11867
loss: 1.0290935039520264,grad_norm: 0.9999992559637338, iteration: 11868
loss: 1.0518794059753418,grad_norm: 0.9999995148923811, iteration: 11869
loss: 1.0150420665740967,grad_norm: 0.9999992689410848, iteration: 11870
loss: 1.0379339456558228,grad_norm: 0.9999993859799894, iteration: 11871
loss: 1.0293877124786377,grad_norm: 0.999999400169745, iteration: 11872
loss: 1.0009543895721436,grad_norm: 0.9999993246128165, iteration: 11873
loss: 1.0663338899612427,grad_norm: 0.9999995935362697, iteration: 11874
loss: 1.0195057392120361,grad_norm: 0.9999991960089598, iteration: 11875
loss: 1.0657151937484741,grad_norm: 0.999999639445736, iteration: 11876
loss: 1.0818754434585571,grad_norm: 0.9999995413539458, iteration: 11877
loss: 1.0335431098937988,grad_norm: 0.9999992616736683, iteration: 11878
loss: 1.030225396156311,grad_norm: 0.9999993588505095, iteration: 11879
loss: 1.0699622631072998,grad_norm: 0.999999479829268, iteration: 11880
loss: 1.0479850769042969,grad_norm: 0.9999995782726562, iteration: 11881
loss: 0.9654544591903687,grad_norm: 0.9999993637527222, iteration: 11882
loss: 1.04006826877594,grad_norm: 0.9999995873974684, iteration: 11883
loss: 0.990581214427948,grad_norm: 0.9999994931502995, iteration: 11884
loss: 1.0368568897247314,grad_norm: 0.9999993211379616, iteration: 11885
loss: 1.039720058441162,grad_norm: 0.9999995849336377, iteration: 11886
loss: 0.9993215799331665,grad_norm: 0.9999990618176359, iteration: 11887
loss: 1.0243953466415405,grad_norm: 0.9999991637332497, iteration: 11888
loss: 1.299829363822937,grad_norm: 0.9999997289583471, iteration: 11889
loss: 1.0276882648468018,grad_norm: 0.9999992308886826, iteration: 11890
loss: 1.0068578720092773,grad_norm: 0.9999993204849665, iteration: 11891
loss: 1.0137680768966675,grad_norm: 0.9999991891502614, iteration: 11892
loss: 1.0416920185089111,grad_norm: 0.9999998362857045, iteration: 11893
loss: 1.084490418434143,grad_norm: 0.9999994982656029, iteration: 11894
loss: 1.018462896347046,grad_norm: 0.9999995796085006, iteration: 11895
loss: 1.0452715158462524,grad_norm: 0.9999992547070139, iteration: 11896
loss: 1.014710545539856,grad_norm: 0.9999993538048363, iteration: 11897
loss: 1.0564411878585815,grad_norm: 0.9999992101045511, iteration: 11898
loss: 0.9726492166519165,grad_norm: 0.9999991858257263, iteration: 11899
loss: 1.0825512409210205,grad_norm: 0.9999998736452271, iteration: 11900
loss: 1.061479926109314,grad_norm: 0.9999993756609774, iteration: 11901
loss: 1.0607261657714844,grad_norm: 0.9999996239143972, iteration: 11902
loss: 1.0297448635101318,grad_norm: 0.9999992461253844, iteration: 11903
loss: 1.0505295991897583,grad_norm: 0.9999992343123383, iteration: 11904
loss: 1.0473493337631226,grad_norm: 0.9999993311601538, iteration: 11905
loss: 1.0610564947128296,grad_norm: 0.9999993789969444, iteration: 11906
loss: 1.0671653747558594,grad_norm: 0.9999996322359909, iteration: 11907
loss: 1.0622533559799194,grad_norm: 0.9999991359336634, iteration: 11908
loss: 1.0718882083892822,grad_norm: 0.9999993163743154, iteration: 11909
loss: 1.0147645473480225,grad_norm: 0.9999998111824406, iteration: 11910
loss: 1.0346699953079224,grad_norm: 0.999999568740797, iteration: 11911
loss: 0.9997606873512268,grad_norm: 0.9999997111963917, iteration: 11912
loss: 1.0194392204284668,grad_norm: 0.9999993051356535, iteration: 11913
loss: 1.0356907844543457,grad_norm: 0.9999994861116908, iteration: 11914
loss: 1.0407625436782837,grad_norm: 0.999999287343466, iteration: 11915
loss: 1.1617883443832397,grad_norm: 0.9999997716091356, iteration: 11916
loss: 1.0193105936050415,grad_norm: 0.9999997753257913, iteration: 11917
loss: 1.0480530261993408,grad_norm: 0.9999995677819546, iteration: 11918
loss: 1.0781376361846924,grad_norm: 0.9999993061354547, iteration: 11919
loss: 1.1099971532821655,grad_norm: 0.9999995087498847, iteration: 11920
loss: 1.068532109260559,grad_norm: 0.9999994351853498, iteration: 11921
loss: 1.0043799877166748,grad_norm: 0.9999992959637375, iteration: 11922
loss: 1.1079590320587158,grad_norm: 0.9999995618359198, iteration: 11923
loss: 1.0636978149414062,grad_norm: 0.9999995007272316, iteration: 11924
loss: 1.003580927848816,grad_norm: 0.9999991245344585, iteration: 11925
loss: 1.0793472528457642,grad_norm: 0.999999521508733, iteration: 11926
loss: 1.0352684259414673,grad_norm: 0.9999997574476628, iteration: 11927
loss: 1.023080587387085,grad_norm: 0.9999991952447114, iteration: 11928
loss: 1.0130137205123901,grad_norm: 0.9999993648378535, iteration: 11929
loss: 1.0913019180297852,grad_norm: 0.9999996915507151, iteration: 11930
loss: 1.035756230354309,grad_norm: 0.9999993246942692, iteration: 11931
loss: 1.0820746421813965,grad_norm: 0.9999994294560757, iteration: 11932
loss: 1.0121197700500488,grad_norm: 0.9999994730961329, iteration: 11933
loss: 1.08042311668396,grad_norm: 0.9999994470485813, iteration: 11934
loss: 1.0205163955688477,grad_norm: 0.99999930073237, iteration: 11935
loss: 1.0125600099563599,grad_norm: 0.9999993079060777, iteration: 11936
loss: 1.067221999168396,grad_norm: 0.999999747094735, iteration: 11937
loss: 1.1082847118377686,grad_norm: 0.9999998419046984, iteration: 11938
loss: 1.0227997303009033,grad_norm: 0.9999994431319607, iteration: 11939
loss: 1.0871953964233398,grad_norm: 0.9999993865049679, iteration: 11940
loss: 1.0718715190887451,grad_norm: 0.999999272941029, iteration: 11941
loss: 1.0006108283996582,grad_norm: 0.9999992299729811, iteration: 11942
loss: 1.052971363067627,grad_norm: 0.9999992142613794, iteration: 11943
loss: 1.065109372138977,grad_norm: 0.999999159567872, iteration: 11944
loss: 1.0426630973815918,grad_norm: 0.9999992483163294, iteration: 11945
loss: 0.9824450016021729,grad_norm: 0.9999992097237499, iteration: 11946
loss: 0.9685662984848022,grad_norm: 0.999999365048179, iteration: 11947
loss: 1.0331916809082031,grad_norm: 0.9999991886833982, iteration: 11948
loss: 1.077993392944336,grad_norm: 0.9999996185920185, iteration: 11949
loss: 1.0142097473144531,grad_norm: 0.9999993196573175, iteration: 11950
loss: 1.1328766345977783,grad_norm: 0.9999996654777095, iteration: 11951
loss: 1.0373713970184326,grad_norm: 0.9999996262987062, iteration: 11952
loss: 1.0253033638000488,grad_norm: 0.9999992436689642, iteration: 11953
loss: 1.0000723600387573,grad_norm: 0.99999929967844, iteration: 11954
loss: 1.0751912593841553,grad_norm: 0.9999991090870906, iteration: 11955
loss: 1.0484387874603271,grad_norm: 0.9999996937244865, iteration: 11956
loss: 1.119807481765747,grad_norm: 0.9999994655734528, iteration: 11957
loss: 1.0250294208526611,grad_norm: 0.9999991621212903, iteration: 11958
loss: 1.0452330112457275,grad_norm: 0.9999993300812499, iteration: 11959
loss: 0.996959924697876,grad_norm: 0.9999992183993405, iteration: 11960
loss: 1.0930266380310059,grad_norm: 0.9999998183495852, iteration: 11961
loss: 1.0488219261169434,grad_norm: 0.9999993756854071, iteration: 11962
loss: 0.9847351312637329,grad_norm: 0.999999282584104, iteration: 11963
loss: 1.039219856262207,grad_norm: 0.9999991554470626, iteration: 11964
loss: 1.0790972709655762,grad_norm: 0.9999998298547025, iteration: 11965
loss: 1.0361573696136475,grad_norm: 0.9999992953969727, iteration: 11966
loss: 1.0494475364685059,grad_norm: 0.999999370192129, iteration: 11967
loss: 1.063955307006836,grad_norm: 0.9999994716793822, iteration: 11968
loss: 1.0472091436386108,grad_norm: 0.999999590065923, iteration: 11969
loss: 0.9728743433952332,grad_norm: 0.9999993060658141, iteration: 11970
loss: 1.0134142637252808,grad_norm: 0.9612337137586819, iteration: 11971
loss: 1.0550791025161743,grad_norm: 0.9999995979944172, iteration: 11972
loss: 1.139231562614441,grad_norm: 0.9999998224796602, iteration: 11973
loss: 1.0546129941940308,grad_norm: 0.9999990953509349, iteration: 11974
loss: 1.0729206800460815,grad_norm: 0.9999999673668732, iteration: 11975
loss: 1.008466124534607,grad_norm: 0.9999992415317626, iteration: 11976
loss: 1.0453144311904907,grad_norm: 0.9999993361902686, iteration: 11977
loss: 1.0959700345993042,grad_norm: 0.9999998397013764, iteration: 11978
loss: 1.0177137851715088,grad_norm: 0.9999995341827267, iteration: 11979
loss: 1.0396029949188232,grad_norm: 0.9999995448816172, iteration: 11980
loss: 1.0987682342529297,grad_norm: 0.9999995338915741, iteration: 11981
loss: 1.1321349143981934,grad_norm: 0.9999996466766788, iteration: 11982
loss: 1.0322036743164062,grad_norm: 0.9999995311811379, iteration: 11983
loss: 1.067846655845642,grad_norm: 0.9999994520274068, iteration: 11984
loss: 1.0107841491699219,grad_norm: 0.9999994550524176, iteration: 11985
loss: 1.051599144935608,grad_norm: 0.9999996258384526, iteration: 11986
loss: 1.0314475297927856,grad_norm: 0.9999994900547607, iteration: 11987
loss: 0.9918379187583923,grad_norm: 0.9999992347462454, iteration: 11988
loss: 1.0158003568649292,grad_norm: 0.999999146886447, iteration: 11989
loss: 1.0594396591186523,grad_norm: 0.9999996986332955, iteration: 11990
loss: 1.0087743997573853,grad_norm: 0.9999993715343591, iteration: 11991
loss: 1.087368130683899,grad_norm: 0.9999995469039522, iteration: 11992
loss: 1.0448442697525024,grad_norm: 0.9999994870370975, iteration: 11993
loss: 1.0505119562149048,grad_norm: 0.9999994384561749, iteration: 11994
loss: 1.017972707748413,grad_norm: 0.9999992269809768, iteration: 11995
loss: 1.0406761169433594,grad_norm: 0.9999993607742023, iteration: 11996
loss: 1.0040671825408936,grad_norm: 0.999999180952012, iteration: 11997
loss: 1.050192952156067,grad_norm: 0.9999993329143733, iteration: 11998
loss: 1.0191643238067627,grad_norm: 0.9999992981645663, iteration: 11999
loss: 1.021414041519165,grad_norm: 0.9999992311136415, iteration: 12000
loss: 1.0433708429336548,grad_norm: 0.9999993403789947, iteration: 12001
loss: 0.9898810386657715,grad_norm: 0.999999386708017, iteration: 12002
loss: 1.0229363441467285,grad_norm: 0.9999993251834479, iteration: 12003
loss: 0.9981778264045715,grad_norm: 0.9999992567620154, iteration: 12004
loss: 0.9880063533782959,grad_norm: 0.9999992815320696, iteration: 12005
loss: 0.9913138747215271,grad_norm: 0.9999992151107586, iteration: 12006
loss: 1.1614329814910889,grad_norm: 0.999999660192757, iteration: 12007
loss: 1.0507296323776245,grad_norm: 0.9999995874905057, iteration: 12008
loss: 1.0237027406692505,grad_norm: 0.9999994051875668, iteration: 12009
loss: 0.9858440160751343,grad_norm: 0.999999238069747, iteration: 12010
loss: 1.1157050132751465,grad_norm: 0.9999995551359437, iteration: 12011
loss: 1.0257434844970703,grad_norm: 0.9999994751803365, iteration: 12012
loss: 1.0543177127838135,grad_norm: 0.9999995798464413, iteration: 12013
loss: 1.0569360256195068,grad_norm: 0.9999992297096463, iteration: 12014
loss: 0.9779828190803528,grad_norm: 0.999999325645561, iteration: 12015
loss: 1.0507493019104004,grad_norm: 0.9999995619588674, iteration: 12016
loss: 1.0527361631393433,grad_norm: 0.9999992442428061, iteration: 12017
loss: 1.0549216270446777,grad_norm: 0.9999994981237368, iteration: 12018
loss: 1.0227121114730835,grad_norm: 0.9999993950270892, iteration: 12019
loss: 0.9985783100128174,grad_norm: 0.9999991706435913, iteration: 12020
loss: 1.0147502422332764,grad_norm: 0.9999994603433805, iteration: 12021
loss: 1.0438885688781738,grad_norm: 0.9999992964215808, iteration: 12022
loss: 1.0169222354888916,grad_norm: 0.9999990689525039, iteration: 12023
loss: 1.0504059791564941,grad_norm: 0.9999990408675263, iteration: 12024
loss: 1.0018669366836548,grad_norm: 0.9999993128723322, iteration: 12025
loss: 1.0108184814453125,grad_norm: 0.9999991080769779, iteration: 12026
loss: 0.976499617099762,grad_norm: 0.9999993644643015, iteration: 12027
loss: 1.0687793493270874,grad_norm: 0.9999993159412207, iteration: 12028
loss: 1.0792678594589233,grad_norm: 0.9999994294557558, iteration: 12029
loss: 1.0476431846618652,grad_norm: 0.9999993809177445, iteration: 12030
loss: 1.043291449546814,grad_norm: 0.9999992892616634, iteration: 12031
loss: 1.078684687614441,grad_norm: 0.9999997703322104, iteration: 12032
loss: 1.0523754358291626,grad_norm: 0.9999994641847457, iteration: 12033
loss: 1.027195930480957,grad_norm: 0.9999991367081085, iteration: 12034
loss: 1.0856329202651978,grad_norm: 0.9999994648005238, iteration: 12035
loss: 1.027849793434143,grad_norm: 0.999999230971524, iteration: 12036
loss: 1.0383994579315186,grad_norm: 0.9999991179833745, iteration: 12037
loss: 1.1178693771362305,grad_norm: 0.9999994082061553, iteration: 12038
loss: 1.0737587213516235,grad_norm: 0.9999994536470753, iteration: 12039
loss: 1.0320597887039185,grad_norm: 0.9999993235060499, iteration: 12040
loss: 1.023606300354004,grad_norm: 0.9999991345082667, iteration: 12041
loss: 1.1274253129959106,grad_norm: 0.9999996709913758, iteration: 12042
loss: 1.032605528831482,grad_norm: 0.9999994179624818, iteration: 12043
loss: 1.025891661643982,grad_norm: 0.9999994383650388, iteration: 12044
loss: 1.000536322593689,grad_norm: 0.9999991786974785, iteration: 12045
loss: 1.0513893365859985,grad_norm: 0.9999994035321325, iteration: 12046
loss: 1.0196926593780518,grad_norm: 0.999999170832257, iteration: 12047
loss: 1.0220290422439575,grad_norm: 0.9999991210478233, iteration: 12048
loss: 1.0277937650680542,grad_norm: 0.9999991763114809, iteration: 12049
loss: 1.0544617176055908,grad_norm: 0.9999993543827937, iteration: 12050
loss: 1.0125946998596191,grad_norm: 0.9999991330314285, iteration: 12051
loss: 1.0422627925872803,grad_norm: 0.9999991831361758, iteration: 12052
loss: 1.065966248512268,grad_norm: 0.9999996291036889, iteration: 12053
loss: 1.0317424535751343,grad_norm: 0.999999284507677, iteration: 12054
loss: 1.0330531597137451,grad_norm: 0.9999992277693712, iteration: 12055
loss: 0.9849382638931274,grad_norm: 0.9999994370801039, iteration: 12056
loss: 1.0617128610610962,grad_norm: 0.9999992311408242, iteration: 12057
loss: 1.0919654369354248,grad_norm: 0.9999996063142877, iteration: 12058
loss: 0.9884757399559021,grad_norm: 0.999998981853244, iteration: 12059
loss: 1.020307183265686,grad_norm: 0.9999993584590025, iteration: 12060
loss: 1.0130833387374878,grad_norm: 0.999999456750998, iteration: 12061
loss: 1.0243958234786987,grad_norm: 0.9999994115108596, iteration: 12062
loss: 1.0495606660842896,grad_norm: 0.9999994012039769, iteration: 12063
loss: 0.9927147626876831,grad_norm: 0.9999992476668773, iteration: 12064
loss: 1.040952205657959,grad_norm: 0.9999992422729526, iteration: 12065
loss: 1.019027590751648,grad_norm: 0.9999990488257388, iteration: 12066
loss: 1.0127720832824707,grad_norm: 0.9999992596929439, iteration: 12067
loss: 1.0418760776519775,grad_norm: 0.9999993519777809, iteration: 12068
loss: 1.0387961864471436,grad_norm: 0.9999992487599132, iteration: 12069
loss: 1.0078041553497314,grad_norm: 0.9999992499143519, iteration: 12070
loss: 1.0310198068618774,grad_norm: 0.9999994058428116, iteration: 12071
loss: 1.040035605430603,grad_norm: 0.9999995533351018, iteration: 12072
loss: 1.027398943901062,grad_norm: 0.9999994160379594, iteration: 12073
loss: 1.0362409353256226,grad_norm: 0.9999991318595625, iteration: 12074
loss: 1.0696728229522705,grad_norm: 0.9999995744960769, iteration: 12075
loss: 1.0882601737976074,grad_norm: 0.9999995374319723, iteration: 12076
loss: 1.0532923936843872,grad_norm: 0.9999993575242714, iteration: 12077
loss: 1.1206721067428589,grad_norm: 0.9999997091569783, iteration: 12078
loss: 1.0614182949066162,grad_norm: 0.9999992011279712, iteration: 12079
loss: 1.0391242504119873,grad_norm: 0.9999993098023053, iteration: 12080
loss: 1.0556702613830566,grad_norm: 0.999999209971269, iteration: 12081
loss: 1.0056577920913696,grad_norm: 0.9999992291735829, iteration: 12082
loss: 1.049193263053894,grad_norm: 0.999999396828461, iteration: 12083
loss: 1.0272372961044312,grad_norm: 0.9999993557848201, iteration: 12084
loss: 1.0296657085418701,grad_norm: 0.9999997153194299, iteration: 12085
loss: 1.029188632965088,grad_norm: 0.9999992451358433, iteration: 12086
loss: 1.0372120141983032,grad_norm: 0.9999995336736185, iteration: 12087
loss: 1.0274620056152344,grad_norm: 0.9999992925432865, iteration: 12088
loss: 1.0116064548492432,grad_norm: 0.9999992747825533, iteration: 12089
loss: 1.054272174835205,grad_norm: 0.9999992096391548, iteration: 12090
loss: 0.9896039366722107,grad_norm: 0.999999659285088, iteration: 12091
loss: 1.0593615770339966,grad_norm: 0.999999275332749, iteration: 12092
loss: 1.0512858629226685,grad_norm: 0.9999992844711754, iteration: 12093
loss: 1.0012377500534058,grad_norm: 0.9999992991078471, iteration: 12094
loss: 1.0378131866455078,grad_norm: 0.9999995165513967, iteration: 12095
loss: 1.1425575017929077,grad_norm: 0.9999996036599146, iteration: 12096
loss: 1.016465663909912,grad_norm: 0.9999990529333104, iteration: 12097
loss: 1.077332854270935,grad_norm: 0.9999991372086998, iteration: 12098
loss: 1.0337650775909424,grad_norm: 0.9999994651541677, iteration: 12099
loss: 1.0365580320358276,grad_norm: 0.9999991289895981, iteration: 12100
loss: 1.0184177160263062,grad_norm: 0.9999993312578661, iteration: 12101
loss: 1.039944052696228,grad_norm: 0.9999993350569943, iteration: 12102
loss: 1.0130432844161987,grad_norm: 0.9999995334428694, iteration: 12103
loss: 1.0351076126098633,grad_norm: 0.9999992071133015, iteration: 12104
loss: 1.0445802211761475,grad_norm: 0.9999993562720436, iteration: 12105
loss: 1.0057114362716675,grad_norm: 0.9999995578669304, iteration: 12106
loss: 1.0565600395202637,grad_norm: 0.999999468569334, iteration: 12107
loss: 1.0355077981948853,grad_norm: 0.9999996295743879, iteration: 12108
loss: 1.0948456525802612,grad_norm: 0.999999871560369, iteration: 12109
loss: 1.0107368230819702,grad_norm: 0.9999991123098709, iteration: 12110
loss: 1.0450471639633179,grad_norm: 0.9999992543445134, iteration: 12111
loss: 1.0060901641845703,grad_norm: 0.9999993641791798, iteration: 12112
loss: 0.9880402088165283,grad_norm: 0.999999114697234, iteration: 12113
loss: 0.9837731719017029,grad_norm: 0.9999993165061346, iteration: 12114
loss: 1.060469388961792,grad_norm: 0.9999993263982329, iteration: 12115
loss: 1.0745625495910645,grad_norm: 0.9999995491465181, iteration: 12116
loss: 0.9926755428314209,grad_norm: 0.9999993465431442, iteration: 12117
loss: 1.0769963264465332,grad_norm: 0.9999995679536203, iteration: 12118
loss: 1.0546568632125854,grad_norm: 0.9999996808573315, iteration: 12119
loss: 1.1318684816360474,grad_norm: 0.9999996509661488, iteration: 12120
loss: 1.0373661518096924,grad_norm: 0.9999994779730981, iteration: 12121
loss: 1.006372094154358,grad_norm: 0.9999992459854612, iteration: 12122
loss: 1.0170152187347412,grad_norm: 0.9999992734492437, iteration: 12123
loss: 1.0240628719329834,grad_norm: 0.9999993092137863, iteration: 12124
loss: 1.0910391807556152,grad_norm: 0.9999995969057138, iteration: 12125
loss: 1.0666719675064087,grad_norm: 0.999999378076811, iteration: 12126
loss: 1.0588575601577759,grad_norm: 0.9999993013183032, iteration: 12127
loss: 1.0453214645385742,grad_norm: 0.9999992980476946, iteration: 12128
loss: 1.011228084564209,grad_norm: 0.9999992869150011, iteration: 12129
loss: 1.0439047813415527,grad_norm: 0.9999997597091969, iteration: 12130
loss: 1.082770586013794,grad_norm: 0.9999992764539759, iteration: 12131
loss: 1.0365564823150635,grad_norm: 0.9999996898221754, iteration: 12132
loss: 1.0395534038543701,grad_norm: 0.9999990620327068, iteration: 12133
loss: 1.0222581624984741,grad_norm: 0.9999991503761809, iteration: 12134
loss: 1.0139044523239136,grad_norm: 0.9999990493436546, iteration: 12135
loss: 1.0560671091079712,grad_norm: 0.9999993498007312, iteration: 12136
loss: 1.0736452341079712,grad_norm: 0.9999996030363277, iteration: 12137
loss: 1.0747840404510498,grad_norm: 0.999999693539756, iteration: 12138
loss: 1.0681687593460083,grad_norm: 0.9999992722522858, iteration: 12139
loss: 1.012804388999939,grad_norm: 0.9999992349169167, iteration: 12140
loss: 1.0118036270141602,grad_norm: 0.9999992118758603, iteration: 12141
loss: 1.0608097314834595,grad_norm: 0.9511564788451644, iteration: 12142
loss: 1.0093275308609009,grad_norm: 0.9999990609861342, iteration: 12143
loss: 1.0184956789016724,grad_norm: 0.999999212007602, iteration: 12144
loss: 0.9778650999069214,grad_norm: 0.9999994845250283, iteration: 12145
loss: 1.0175913572311401,grad_norm: 0.9999992938456793, iteration: 12146
loss: 1.0756311416625977,grad_norm: 0.999999592528168, iteration: 12147
loss: 1.0899882316589355,grad_norm: 0.9999995126696124, iteration: 12148
loss: 1.0584107637405396,grad_norm: 0.9999995051111965, iteration: 12149
loss: 1.1086492538452148,grad_norm: 0.9999997324505553, iteration: 12150
loss: 1.022300362586975,grad_norm: 0.999999270207151, iteration: 12151
loss: 1.0528937578201294,grad_norm: 0.9999996302398777, iteration: 12152
loss: 1.087793231010437,grad_norm: 0.9999996511297314, iteration: 12153
loss: 0.999074399471283,grad_norm: 0.9999991487964992, iteration: 12154
loss: 1.0092133283615112,grad_norm: 0.9999992338118927, iteration: 12155
loss: 1.1047970056533813,grad_norm: 0.9999996806333579, iteration: 12156
loss: 1.045210838317871,grad_norm: 0.9999993435422995, iteration: 12157
loss: 1.0601179599761963,grad_norm: 0.9999996353298076, iteration: 12158
loss: 1.01396906375885,grad_norm: 0.9999991877987748, iteration: 12159
loss: 1.0804311037063599,grad_norm: 0.9999993053079278, iteration: 12160
loss: 1.031063437461853,grad_norm: 0.9999994834012614, iteration: 12161
loss: 1.0328319072723389,grad_norm: 0.9999996685516482, iteration: 12162
loss: 1.006012201309204,grad_norm: 0.9999992639093597, iteration: 12163
loss: 1.0221203565597534,grad_norm: 0.999999275263795, iteration: 12164
loss: 1.0629510879516602,grad_norm: 0.9999991770030382, iteration: 12165
loss: 1.0408869981765747,grad_norm: 0.9999997602750882, iteration: 12166
loss: 1.0190519094467163,grad_norm: 0.9999990999923275, iteration: 12167
loss: 1.0016705989837646,grad_norm: 0.9999992552181525, iteration: 12168
loss: 1.0221965312957764,grad_norm: 0.9999993597687006, iteration: 12169
loss: 1.003724217414856,grad_norm: 0.968919042556154, iteration: 12170
loss: 1.0590927600860596,grad_norm: 0.9999992040259095, iteration: 12171
loss: 1.0617225170135498,grad_norm: 0.9999994989091275, iteration: 12172
loss: 1.0462892055511475,grad_norm: 0.9999992558770819, iteration: 12173
loss: 1.020536184310913,grad_norm: 0.9999994570821913, iteration: 12174
loss: 1.015032410621643,grad_norm: 0.9999993766001443, iteration: 12175
loss: 1.0114481449127197,grad_norm: 0.9999992649605103, iteration: 12176
loss: 1.0317161083221436,grad_norm: 0.9999991073911838, iteration: 12177
loss: 0.9532271027565002,grad_norm: 0.9999992631372235, iteration: 12178
loss: 0.9924268126487732,grad_norm: 0.9999992337965993, iteration: 12179
loss: 1.0243886709213257,grad_norm: 0.9999992246826451, iteration: 12180
loss: 1.015149474143982,grad_norm: 0.9999994930250888, iteration: 12181
loss: 1.0344210863113403,grad_norm: 0.9999991362692807, iteration: 12182
loss: 0.9603402018547058,grad_norm: 0.9999996310252661, iteration: 12183
loss: 1.0172933340072632,grad_norm: 0.999999274405496, iteration: 12184
loss: 1.1010888814926147,grad_norm: 0.9999997039722589, iteration: 12185
loss: 1.087589144706726,grad_norm: 0.9999996585296073, iteration: 12186
loss: 0.9848043322563171,grad_norm: 0.9999992744157516, iteration: 12187
loss: 1.0394952297210693,grad_norm: 0.9999993392709381, iteration: 12188
loss: 1.0443997383117676,grad_norm: 0.9999992396337731, iteration: 12189
loss: 1.0041899681091309,grad_norm: 0.9999990686790121, iteration: 12190
loss: 1.0355278253555298,grad_norm: 0.9999993809360574, iteration: 12191
loss: 1.0101252794265747,grad_norm: 0.9999991283267889, iteration: 12192
loss: 0.9771657586097717,grad_norm: 0.9999997174152395, iteration: 12193
loss: 1.0552213191986084,grad_norm: 0.9999992913775774, iteration: 12194
loss: 1.0578800439834595,grad_norm: 0.9999993874885362, iteration: 12195
loss: 1.0841459035873413,grad_norm: 0.999999561582794, iteration: 12196
loss: 1.0544815063476562,grad_norm: 0.999999267471574, iteration: 12197
loss: 1.0734730958938599,grad_norm: 0.9999994816640605, iteration: 12198
loss: 1.0142284631729126,grad_norm: 0.999999302727947, iteration: 12199
loss: 1.1035196781158447,grad_norm: 0.9999995711925644, iteration: 12200
loss: 1.0371915102005005,grad_norm: 0.9999993194597877, iteration: 12201
loss: 1.0318397283554077,grad_norm: 0.9999996832140586, iteration: 12202
loss: 1.073056936264038,grad_norm: 0.9999996262152852, iteration: 12203
loss: 0.9969690442085266,grad_norm: 0.999999193845877, iteration: 12204
loss: 1.055273175239563,grad_norm: 0.9999995545190105, iteration: 12205
loss: 1.0525293350219727,grad_norm: 0.9999996410807852, iteration: 12206
loss: 1.0301481485366821,grad_norm: 0.9999995582493179, iteration: 12207
loss: 1.033266544342041,grad_norm: 0.9523743789041054, iteration: 12208
loss: 1.079932689666748,grad_norm: 0.9999996293894206, iteration: 12209
loss: 1.0404971837997437,grad_norm: 0.9999995324662385, iteration: 12210
loss: 1.0703785419464111,grad_norm: 0.9999992975213905, iteration: 12211
loss: 0.9838224649429321,grad_norm: 0.9999992110183424, iteration: 12212
loss: 1.0472378730773926,grad_norm: 0.9999990271109028, iteration: 12213
loss: 1.0335440635681152,grad_norm: 0.9999992848907057, iteration: 12214
loss: 1.0825291872024536,grad_norm: 0.9999994435487547, iteration: 12215
loss: 1.0411485433578491,grad_norm: 0.9999993172020958, iteration: 12216
loss: 1.0258368253707886,grad_norm: 0.9999991761164905, iteration: 12217
loss: 1.024912714958191,grad_norm: 0.9999991892837324, iteration: 12218
loss: 0.9761786460876465,grad_norm: 0.9999990831824807, iteration: 12219
loss: 1.0855481624603271,grad_norm: 0.9999996833740625, iteration: 12220
loss: 1.0566630363464355,grad_norm: 0.9999992087338565, iteration: 12221
loss: 1.0194474458694458,grad_norm: 0.9999993810786566, iteration: 12222
loss: 1.0121221542358398,grad_norm: 0.9999993337559737, iteration: 12223
loss: 1.0456761121749878,grad_norm: 0.9999993598961067, iteration: 12224
loss: 1.0423297882080078,grad_norm: 0.9999992359676474, iteration: 12225
loss: 1.053523063659668,grad_norm: 0.9999994307764632, iteration: 12226
loss: 1.0635130405426025,grad_norm: 0.9999993741196659, iteration: 12227
loss: 1.0041559934616089,grad_norm: 0.9999993954564184, iteration: 12228
loss: 1.0844162702560425,grad_norm: 0.9999994778534405, iteration: 12229
loss: 1.0336472988128662,grad_norm: 0.9999994772089515, iteration: 12230
loss: 1.029404640197754,grad_norm: 0.9999996543521326, iteration: 12231
loss: 1.0416735410690308,grad_norm: 0.9999994505139563, iteration: 12232
loss: 0.9903644323348999,grad_norm: 0.9999995169846981, iteration: 12233
loss: 1.0861501693725586,grad_norm: 0.9999994848189319, iteration: 12234
loss: 1.0577272176742554,grad_norm: 0.9999997772034932, iteration: 12235
loss: 1.0196353197097778,grad_norm: 0.9999994837974795, iteration: 12236
loss: 1.0193185806274414,grad_norm: 0.9999999071192239, iteration: 12237
loss: 0.9851070642471313,grad_norm: 0.9999991099147839, iteration: 12238
loss: 1.0626676082611084,grad_norm: 0.9999993992343482, iteration: 12239
loss: 1.0903092622756958,grad_norm: 0.9999995182666421, iteration: 12240
loss: 1.0545402765274048,grad_norm: 0.9999993134019625, iteration: 12241
loss: 1.0447438955307007,grad_norm: 0.9999998225679817, iteration: 12242
loss: 1.0114209651947021,grad_norm: 0.9999993879381182, iteration: 12243
loss: 1.0378096103668213,grad_norm: 0.9999992303386817, iteration: 12244
loss: 1.0313527584075928,grad_norm: 0.9999996325290625, iteration: 12245
loss: 1.0341100692749023,grad_norm: 0.9999997393685526, iteration: 12246
loss: 1.0435513257980347,grad_norm: 0.9999992886710859, iteration: 12247
loss: 1.0254342555999756,grad_norm: 0.9999994338453498, iteration: 12248
loss: 1.0605345964431763,grad_norm: 0.9999993442348593, iteration: 12249
loss: 1.060888409614563,grad_norm: 0.9999997104712783, iteration: 12250
loss: 1.0448803901672363,grad_norm: 0.9999996542811375, iteration: 12251
loss: 1.0287734270095825,grad_norm: 0.9999993652048873, iteration: 12252
loss: 1.0252015590667725,grad_norm: 0.9999994371140059, iteration: 12253
loss: 1.0879933834075928,grad_norm: 0.9999996437456237, iteration: 12254
loss: 1.0186048746109009,grad_norm: 0.9999995218646414, iteration: 12255
loss: 1.0603636503219604,grad_norm: 0.999999691717828, iteration: 12256
loss: 1.024432897567749,grad_norm: 0.9999993488059519, iteration: 12257
loss: 1.038716435432434,grad_norm: 0.9999997370264765, iteration: 12258
loss: 1.020697832107544,grad_norm: 0.9999990300318067, iteration: 12259
loss: 1.038838505744934,grad_norm: 0.9999991327843332, iteration: 12260
loss: 1.046212911605835,grad_norm: 0.9999995815302296, iteration: 12261
loss: 1.0179011821746826,grad_norm: 0.9999991353352348, iteration: 12262
loss: 1.0874909162521362,grad_norm: 0.9999995708065211, iteration: 12263
loss: 1.0474737882614136,grad_norm: 0.9999992910981775, iteration: 12264
loss: 1.0536434650421143,grad_norm: 0.99999941647163, iteration: 12265
loss: 0.9792734980583191,grad_norm: 0.9999992036590449, iteration: 12266
loss: 1.0103987455368042,grad_norm: 0.9999994351998313, iteration: 12267
loss: 0.9999303221702576,grad_norm: 0.9999992592614838, iteration: 12268
loss: 1.015256643295288,grad_norm: 0.9999991556586713, iteration: 12269
loss: 1.0691808462142944,grad_norm: 0.9999994505870624, iteration: 12270
loss: 1.0831650495529175,grad_norm: 0.9999995021216429, iteration: 12271
loss: 1.0116416215896606,grad_norm: 0.9999992429865937, iteration: 12272
loss: 1.0199624300003052,grad_norm: 0.9999995047986214, iteration: 12273
loss: 1.0137152671813965,grad_norm: 0.9999993900293124, iteration: 12274
loss: 1.035199761390686,grad_norm: 0.999999269301398, iteration: 12275
loss: 1.0178370475769043,grad_norm: 0.9999993165117457, iteration: 12276
loss: 1.0054556131362915,grad_norm: 0.9999992417097402, iteration: 12277
loss: 1.0571675300598145,grad_norm: 0.9999996046261268, iteration: 12278
loss: 1.0113856792449951,grad_norm: 0.9999990996703602, iteration: 12279
loss: 1.0716755390167236,grad_norm: 0.9999996110445232, iteration: 12280
loss: 1.0239514112472534,grad_norm: 0.9999993745353524, iteration: 12281
loss: 1.048903226852417,grad_norm: 0.9999996409188967, iteration: 12282
loss: 1.0798522233963013,grad_norm: 0.9999996152655437, iteration: 12283
loss: 1.0581245422363281,grad_norm: 0.9999993786512679, iteration: 12284
loss: 1.0977532863616943,grad_norm: 0.9999995262550765, iteration: 12285
loss: 1.0507726669311523,grad_norm: 0.9999993779731079, iteration: 12286
loss: 1.0711244344711304,grad_norm: 0.9999993493980481, iteration: 12287
loss: 1.0188478231430054,grad_norm: 0.9999991782177124, iteration: 12288
loss: 0.9949392676353455,grad_norm: 0.9999992003282568, iteration: 12289
loss: 1.0071107149124146,grad_norm: 0.9999992685014497, iteration: 12290
loss: 0.9952501654624939,grad_norm: 0.999999211684023, iteration: 12291
loss: 0.9869434237480164,grad_norm: 0.9999992971439705, iteration: 12292
loss: 1.0801136493682861,grad_norm: 0.9999996273918104, iteration: 12293
loss: 1.0368545055389404,grad_norm: 0.9999995854215706, iteration: 12294
loss: 0.9568501710891724,grad_norm: 0.9999994389625589, iteration: 12295
loss: 1.0136116743087769,grad_norm: 0.9999990411716821, iteration: 12296
loss: 1.0613099336624146,grad_norm: 0.9999993989262191, iteration: 12297
loss: 1.0016400814056396,grad_norm: 0.9999993268872529, iteration: 12298
loss: 1.056050419807434,grad_norm: 0.9999993486739258, iteration: 12299
loss: 1.0328865051269531,grad_norm: 0.9999992640356504, iteration: 12300
loss: 1.0099891424179077,grad_norm: 0.9999990822048106, iteration: 12301
loss: 1.0596094131469727,grad_norm: 0.9999994814906877, iteration: 12302
loss: 0.9925930500030518,grad_norm: 0.9999993559306364, iteration: 12303
loss: 1.0009251832962036,grad_norm: 0.9999994351167619, iteration: 12304
loss: 1.0821633338928223,grad_norm: 0.9999993538061671, iteration: 12305
loss: 1.0939644575119019,grad_norm: 0.9999999041825917, iteration: 12306
loss: 0.9844951629638672,grad_norm: 0.9999993082499545, iteration: 12307
loss: 1.0427579879760742,grad_norm: 0.999999389723963, iteration: 12308
loss: 1.034509539604187,grad_norm: 0.9999994682493352, iteration: 12309
loss: 1.0253803730010986,grad_norm: 0.9999991116874715, iteration: 12310
loss: 1.0372767448425293,grad_norm: 0.9999991044767444, iteration: 12311
loss: 1.0344727039337158,grad_norm: 0.99999942700485, iteration: 12312
loss: 1.0892797708511353,grad_norm: 0.9999995734013418, iteration: 12313
loss: 1.0946890115737915,grad_norm: 0.9999999241846566, iteration: 12314
loss: 1.0088400840759277,grad_norm: 0.9999992212494834, iteration: 12315
loss: 1.0836620330810547,grad_norm: 0.9999997077923665, iteration: 12316
loss: 1.038662075996399,grad_norm: 0.9999991259347335, iteration: 12317
loss: 0.9775310158729553,grad_norm: 0.9999990885113162, iteration: 12318
loss: 1.1295510530471802,grad_norm: 0.9999997046718652, iteration: 12319
loss: 1.0461112260818481,grad_norm: 0.9999993041799564, iteration: 12320
loss: 1.045571208000183,grad_norm: 0.999999053225219, iteration: 12321
loss: 1.0278605222702026,grad_norm: 0.9999992171999955, iteration: 12322
loss: 0.981355607509613,grad_norm: 0.990388636815105, iteration: 12323
loss: 1.0196025371551514,grad_norm: 0.9999992734519214, iteration: 12324
loss: 1.0332335233688354,grad_norm: 0.9999991152233675, iteration: 12325
loss: 1.1001811027526855,grad_norm: 0.9999997085133993, iteration: 12326
loss: 0.9339949488639832,grad_norm: 0.9999992848830139, iteration: 12327
loss: 1.016979455947876,grad_norm: 0.9999991688763784, iteration: 12328
loss: 1.0229551792144775,grad_norm: 0.9999992522068215, iteration: 12329
loss: 1.0568995475769043,grad_norm: 0.9999991320098329, iteration: 12330
loss: 1.0235785245895386,grad_norm: 0.9999995466013069, iteration: 12331
loss: 1.0433942079544067,grad_norm: 0.9999994172934878, iteration: 12332
loss: 1.0140039920806885,grad_norm: 0.999999060565597, iteration: 12333
loss: 1.0344421863555908,grad_norm: 0.9999994035686276, iteration: 12334
loss: 0.986463725566864,grad_norm: 0.9999992775649531, iteration: 12335
loss: 1.0231008529663086,grad_norm: 0.9999992357572351, iteration: 12336
loss: 1.0070068836212158,grad_norm: 0.9999992722977495, iteration: 12337
loss: 1.0494685173034668,grad_norm: 0.9999992224791165, iteration: 12338
loss: 1.0187162160873413,grad_norm: 0.9999991664418408, iteration: 12339
loss: 1.0663940906524658,grad_norm: 0.9999991545311777, iteration: 12340
loss: 0.994260311126709,grad_norm: 0.9786596485517854, iteration: 12341
loss: 1.0271430015563965,grad_norm: 0.9999991614574055, iteration: 12342
loss: 1.0501965284347534,grad_norm: 0.9999992623836255, iteration: 12343
loss: 1.0295968055725098,grad_norm: 0.9999993392753103, iteration: 12344
loss: 1.0125024318695068,grad_norm: 0.999999153053669, iteration: 12345
loss: 1.0649579763412476,grad_norm: 0.9999998037866678, iteration: 12346
loss: 0.9894168376922607,grad_norm: 0.9999991262943952, iteration: 12347
loss: 1.109667420387268,grad_norm: 0.9999996163742458, iteration: 12348
loss: 1.0336298942565918,grad_norm: 0.9999994589235371, iteration: 12349
loss: 1.0060081481933594,grad_norm: 0.9446257302871557, iteration: 12350
loss: 1.0125393867492676,grad_norm: 0.9999991996622923, iteration: 12351
loss: 1.0624780654907227,grad_norm: 0.9999991467829178, iteration: 12352
loss: 1.050580382347107,grad_norm: 0.9999992745140407, iteration: 12353
loss: 1.014762043952942,grad_norm: 0.9999992189295015, iteration: 12354
loss: 1.0242263078689575,grad_norm: 0.9999993098451273, iteration: 12355
loss: 1.0127149820327759,grad_norm: 0.999999302922145, iteration: 12356
loss: 1.1036416292190552,grad_norm: 0.9999992723181216, iteration: 12357
loss: 1.0645426511764526,grad_norm: 0.9999990789643326, iteration: 12358
loss: 1.0562819242477417,grad_norm: 0.999999747806466, iteration: 12359
loss: 1.0299263000488281,grad_norm: 0.999999352292601, iteration: 12360
loss: 1.0259560346603394,grad_norm: 0.999999355964323, iteration: 12361
loss: 1.0961970090866089,grad_norm: 0.9999996479144494, iteration: 12362
loss: 1.0041816234588623,grad_norm: 0.9999990334895644, iteration: 12363
loss: 0.9953458905220032,grad_norm: 0.9999994763595949, iteration: 12364
loss: 1.005164384841919,grad_norm: 0.9999990375500905, iteration: 12365
loss: 0.9881046414375305,grad_norm: 0.9999993483394619, iteration: 12366
loss: 1.0401004552841187,grad_norm: 0.9999992866985519, iteration: 12367
loss: 1.029298186302185,grad_norm: 0.9999992165899485, iteration: 12368
loss: 1.0991990566253662,grad_norm: 0.9999995642188532, iteration: 12369
loss: 0.9991404414176941,grad_norm: 0.9999991480783864, iteration: 12370
loss: 1.00780189037323,grad_norm: 0.9999991453619607, iteration: 12371
loss: 1.0443207025527954,grad_norm: 0.9999992890035342, iteration: 12372
loss: 1.0474120378494263,grad_norm: 0.9999996615103736, iteration: 12373
loss: 1.0085018873214722,grad_norm: 0.9999997455271281, iteration: 12374
loss: 1.0627119541168213,grad_norm: 1.0000000107488507, iteration: 12375
loss: 1.0137766599655151,grad_norm: 0.9999992261126992, iteration: 12376
loss: 1.0122796297073364,grad_norm: 0.9999993069822702, iteration: 12377
loss: 1.0897574424743652,grad_norm: 0.9999995281612134, iteration: 12378
loss: 1.030290961265564,grad_norm: 0.9999994361579454, iteration: 12379
loss: 1.1090375185012817,grad_norm: 0.9999996766219974, iteration: 12380
loss: 1.0513193607330322,grad_norm: 0.9999991885967792, iteration: 12381
loss: 1.0550609827041626,grad_norm: 0.9999991385385756, iteration: 12382
loss: 1.0320944786071777,grad_norm: 0.999999189626246, iteration: 12383
loss: 1.0137830972671509,grad_norm: 0.9999990965789067, iteration: 12384
loss: 1.0141685009002686,grad_norm: 0.9999994022315173, iteration: 12385
loss: 1.0191233158111572,grad_norm: 0.9999992336097091, iteration: 12386
loss: 1.0103354454040527,grad_norm: 0.99999926725785, iteration: 12387
loss: 1.0778228044509888,grad_norm: 0.9999996428395095, iteration: 12388
loss: 1.0869932174682617,grad_norm: 0.9999991396907681, iteration: 12389
loss: 1.075435757637024,grad_norm: 0.9999996377441954, iteration: 12390
loss: 0.9588106870651245,grad_norm: 0.9999991178431961, iteration: 12391
loss: 1.0292879343032837,grad_norm: 0.9999991686041487, iteration: 12392
loss: 0.990989089012146,grad_norm: 0.9999991796652739, iteration: 12393
loss: 1.027203917503357,grad_norm: 0.9999993190435165, iteration: 12394
loss: 1.0350326299667358,grad_norm: 0.9999991830744078, iteration: 12395
loss: 1.0252012014389038,grad_norm: 0.9999993570006811, iteration: 12396
loss: 1.0495223999023438,grad_norm: 0.9999992032805931, iteration: 12397
loss: 1.0504266023635864,grad_norm: 0.9999993417259047, iteration: 12398
loss: 1.0209336280822754,grad_norm: 0.9999993525601685, iteration: 12399
loss: 0.9995847344398499,grad_norm: 0.9999997789287717, iteration: 12400
loss: 1.0088849067687988,grad_norm: 0.9999992355766892, iteration: 12401
loss: 1.021546483039856,grad_norm: 0.9999991998773831, iteration: 12402
loss: 1.0034931898117065,grad_norm: 0.999999067145322, iteration: 12403
loss: 1.029949426651001,grad_norm: 0.9999992708154497, iteration: 12404
loss: 1.0597196817398071,grad_norm: 0.9999991931022232, iteration: 12405
loss: 1.0633805990219116,grad_norm: 0.999999197062547, iteration: 12406
loss: 0.9810932874679565,grad_norm: 0.999999233316837, iteration: 12407
loss: 1.0090912580490112,grad_norm: 0.9999991831710684, iteration: 12408
loss: 1.0372406244277954,grad_norm: 0.999999203965185, iteration: 12409
loss: 1.0228853225708008,grad_norm: 0.9999998673088663, iteration: 12410
loss: 1.0722748041152954,grad_norm: 0.9999990754235598, iteration: 12411
loss: 1.0373095273971558,grad_norm: 0.9999992891655821, iteration: 12412
loss: 0.9951915740966797,grad_norm: 0.9999992263247934, iteration: 12413
loss: 0.9719934463500977,grad_norm: 0.9999992279591269, iteration: 12414
loss: 1.0614733695983887,grad_norm: 0.9999996892126976, iteration: 12415
loss: 1.0154496431350708,grad_norm: 0.9999992681446824, iteration: 12416
loss: 1.0708987712860107,grad_norm: 0.9999992857178471, iteration: 12417
loss: 0.9799509644508362,grad_norm: 0.9999992790869778, iteration: 12418
loss: 1.0297036170959473,grad_norm: 0.9999994442565848, iteration: 12419
loss: 1.0449283123016357,grad_norm: 0.9999992171700394, iteration: 12420
loss: 1.0248253345489502,grad_norm: 0.999999204004888, iteration: 12421
loss: 1.0188555717468262,grad_norm: 0.999999263874589, iteration: 12422
loss: 0.9896544814109802,grad_norm: 0.9999998247600134, iteration: 12423
loss: 1.0260894298553467,grad_norm: 0.999999339601312, iteration: 12424
loss: 1.0331611633300781,grad_norm: 0.9999996771903806, iteration: 12425
loss: 1.1231743097305298,grad_norm: 0.9999996295616939, iteration: 12426
loss: 1.0235878229141235,grad_norm: 0.9999991630410671, iteration: 12427
loss: 1.0282822847366333,grad_norm: 0.9999994633020564, iteration: 12428
loss: 1.0635905265808105,grad_norm: 0.999999240474047, iteration: 12429
loss: 1.0985413789749146,grad_norm: 0.9999996889054957, iteration: 12430
loss: 1.041528344154358,grad_norm: 0.9999994394259725, iteration: 12431
loss: 1.00834321975708,grad_norm: 0.9999991484469499, iteration: 12432
loss: 1.0125521421432495,grad_norm: 0.9999993699797359, iteration: 12433
loss: 0.9840533137321472,grad_norm: 0.999999238155567, iteration: 12434
loss: 0.9931244254112244,grad_norm: 0.9999991949010396, iteration: 12435
loss: 0.9744556546211243,grad_norm: 0.9999993574762648, iteration: 12436
loss: 1.0159164667129517,grad_norm: 0.9999993086791091, iteration: 12437
loss: 1.0451593399047852,grad_norm: 0.9999996320700071, iteration: 12438
loss: 1.1281325817108154,grad_norm: 0.9999994494622386, iteration: 12439
loss: 1.0125747919082642,grad_norm: 0.9999992770063811, iteration: 12440
loss: 1.05663001537323,grad_norm: 0.9999990887649439, iteration: 12441
loss: 0.9886535406112671,grad_norm: 0.9999990955470567, iteration: 12442
loss: 1.1263328790664673,grad_norm: 0.9999995849363271, iteration: 12443
loss: 1.0323823690414429,grad_norm: 0.9999992512583155, iteration: 12444
loss: 1.0739623308181763,grad_norm: 0.9999992038442843, iteration: 12445
loss: 0.9846012592315674,grad_norm: 0.9999993632734171, iteration: 12446
loss: 1.0721062421798706,grad_norm: 0.9999995958080057, iteration: 12447
loss: 1.0131829977035522,grad_norm: 0.9999992807228905, iteration: 12448
loss: 1.0311399698257446,grad_norm: 0.9999993493270345, iteration: 12449
loss: 1.0616589784622192,grad_norm: 0.9999993449167879, iteration: 12450
loss: 1.1185749769210815,grad_norm: 0.9999995452401595, iteration: 12451
loss: 1.018640398979187,grad_norm: 0.999999373248693, iteration: 12452
loss: 1.047340750694275,grad_norm: 0.9999990862596828, iteration: 12453
loss: 1.0101916790008545,grad_norm: 0.9999995626089099, iteration: 12454
loss: 1.0171622037887573,grad_norm: 0.9999991434845834, iteration: 12455
loss: 1.0596132278442383,grad_norm: 0.9999995309082665, iteration: 12456
loss: 0.9967531561851501,grad_norm: 0.9999991812600854, iteration: 12457
loss: 1.0405163764953613,grad_norm: 0.9999993377758543, iteration: 12458
loss: 1.0138304233551025,grad_norm: 0.9999991199504715, iteration: 12459
loss: 1.0270143747329712,grad_norm: 0.9999993893460304, iteration: 12460
loss: 1.0881675481796265,grad_norm: 0.9999998177425775, iteration: 12461
loss: 1.0260878801345825,grad_norm: 0.9999993492711663, iteration: 12462
loss: 1.0150306224822998,grad_norm: 0.9999992175287885, iteration: 12463
loss: 1.1120414733886719,grad_norm: 0.9999992884946545, iteration: 12464
loss: 1.0657804012298584,grad_norm: 0.9999997784342853, iteration: 12465
loss: 1.0430033206939697,grad_norm: 0.9999996494767586, iteration: 12466
loss: 1.0536643266677856,grad_norm: 0.9999994758790157, iteration: 12467
loss: 1.0246386528015137,grad_norm: 0.9999992229865761, iteration: 12468
loss: 1.0289843082427979,grad_norm: 0.9999994230403769, iteration: 12469
loss: 1.0595000982284546,grad_norm: 0.9999994321051721, iteration: 12470
loss: 1.0253520011901855,grad_norm: 0.9999995309481177, iteration: 12471
loss: 1.069512963294983,grad_norm: 0.999999604105967, iteration: 12472
loss: 1.041250228881836,grad_norm: 0.999999549918023, iteration: 12473
loss: 1.090340495109558,grad_norm: 0.9999995159235405, iteration: 12474
loss: 1.1008195877075195,grad_norm: 0.9999996034627479, iteration: 12475
loss: 1.083238959312439,grad_norm: 0.9999996906159103, iteration: 12476
loss: 1.0711119174957275,grad_norm: 0.9999993786961917, iteration: 12477
loss: 1.0227476358413696,grad_norm: 0.9999993886482801, iteration: 12478
loss: 1.037213921546936,grad_norm: 0.9999998483464742, iteration: 12479
loss: 1.0719170570373535,grad_norm: 0.999999664317292, iteration: 12480
loss: 1.133734941482544,grad_norm: 0.9999994923079322, iteration: 12481
loss: 0.9569072127342224,grad_norm: 0.9999992885651527, iteration: 12482
loss: 1.0426448583602905,grad_norm: 0.9999991482566982, iteration: 12483
loss: 1.0190500020980835,grad_norm: 0.9999992019233827, iteration: 12484
loss: 1.0124452114105225,grad_norm: 0.9999992556509447, iteration: 12485
loss: 1.05122709274292,grad_norm: 0.9999993963450728, iteration: 12486
loss: 0.9844971299171448,grad_norm: 0.9999992079615189, iteration: 12487
loss: 1.1263554096221924,grad_norm: 0.9999998366315176, iteration: 12488
loss: 1.0824053287506104,grad_norm: 0.9999998398410415, iteration: 12489
loss: 1.0166435241699219,grad_norm: 0.9999993361620187, iteration: 12490
loss: 1.054497241973877,grad_norm: 0.9999995673530364, iteration: 12491
loss: 1.0188406705856323,grad_norm: 0.9999999021400229, iteration: 12492
loss: 1.0220848321914673,grad_norm: 0.9999997632644092, iteration: 12493
loss: 1.0440250635147095,grad_norm: 0.9999991164703353, iteration: 12494
loss: 1.0326218605041504,grad_norm: 0.9999991747234768, iteration: 12495
loss: 1.029965877532959,grad_norm: 0.9999994697238228, iteration: 12496
loss: 1.0269579887390137,grad_norm: 0.9999991859670058, iteration: 12497
loss: 1.025221824645996,grad_norm: 0.9999991422511052, iteration: 12498
loss: 1.014967918395996,grad_norm: 0.9999989794845574, iteration: 12499
loss: 1.0413320064544678,grad_norm: 0.9999994355730234, iteration: 12500
loss: 1.0272475481033325,grad_norm: 0.9999992220044377, iteration: 12501
loss: 0.973997950553894,grad_norm: 0.9999994638357591, iteration: 12502
loss: 1.0630632638931274,grad_norm: 0.9999998307999324, iteration: 12503
loss: 1.0588589906692505,grad_norm: 0.9999994790285582, iteration: 12504
loss: 1.0487138032913208,grad_norm: 0.9999993572002662, iteration: 12505
loss: 1.0562528371810913,grad_norm: 0.9999994980826474, iteration: 12506
loss: 1.0275228023529053,grad_norm: 0.9999990698292942, iteration: 12507
loss: 1.0520659685134888,grad_norm: 0.9999993274704821, iteration: 12508
loss: 1.0882277488708496,grad_norm: 0.9999991892724361, iteration: 12509
loss: 1.0289490222930908,grad_norm: 0.9999993373224987, iteration: 12510
loss: 1.0416250228881836,grad_norm: 0.9999995885111477, iteration: 12511
loss: 1.0665957927703857,grad_norm: 0.9999992432579382, iteration: 12512
loss: 1.0409812927246094,grad_norm: 0.9999992498584772, iteration: 12513
loss: 1.0014547109603882,grad_norm: 0.9999992350939092, iteration: 12514
loss: 1.0661574602127075,grad_norm: 0.9999996806537731, iteration: 12515
loss: 1.0892469882965088,grad_norm: 0.9999993772037163, iteration: 12516
loss: 1.0006340742111206,grad_norm: 0.99999932921885, iteration: 12517
loss: 1.0357743501663208,grad_norm: 0.9999993876671176, iteration: 12518
loss: 1.0821834802627563,grad_norm: 0.9999996224034932, iteration: 12519
loss: 1.013121485710144,grad_norm: 0.9999991852034054, iteration: 12520
loss: 1.0314364433288574,grad_norm: 0.9999991892909816, iteration: 12521
loss: 1.0586292743682861,grad_norm: 0.9999994294960317, iteration: 12522
loss: 1.0161038637161255,grad_norm: 0.999999316243232, iteration: 12523
loss: 1.0508067607879639,grad_norm: 0.9999997630890654, iteration: 12524
loss: 1.0522044897079468,grad_norm: 0.9999993200920827, iteration: 12525
loss: 1.1416735649108887,grad_norm: 0.9999996641461601, iteration: 12526
loss: 1.0664613246917725,grad_norm: 0.9999993700317681, iteration: 12527
loss: 1.0133262872695923,grad_norm: 0.9973099568621585, iteration: 12528
loss: 1.044371485710144,grad_norm: 0.9999997023368355, iteration: 12529
loss: 1.0688586235046387,grad_norm: 0.9999997247913307, iteration: 12530
loss: 1.0212864875793457,grad_norm: 0.9999992101202955, iteration: 12531
loss: 1.026861548423767,grad_norm: 0.9999991759446266, iteration: 12532
loss: 1.054772138595581,grad_norm: 0.9999998692754816, iteration: 12533
loss: 0.9970018863677979,grad_norm: 0.9999992058027375, iteration: 12534
loss: 1.106612205505371,grad_norm: 0.9999997802497261, iteration: 12535
loss: 1.0327214002609253,grad_norm: 0.9999992984511595, iteration: 12536
loss: 0.9842721223831177,grad_norm: 0.9999990227189052, iteration: 12537
loss: 1.0470844507217407,grad_norm: 0.9999994257131629, iteration: 12538
loss: 1.0217976570129395,grad_norm: 0.9999997931423262, iteration: 12539
loss: 1.0040020942687988,grad_norm: 0.9999991272080366, iteration: 12540
loss: 1.060335636138916,grad_norm: 0.9999997627515468, iteration: 12541
loss: 1.0196528434753418,grad_norm: 0.999999306991181, iteration: 12542
loss: 1.0287195444107056,grad_norm: 0.9999994349206875, iteration: 12543
loss: 1.0143924951553345,grad_norm: 0.9999993667273003, iteration: 12544
loss: 1.0670585632324219,grad_norm: 0.999999296385277, iteration: 12545
loss: 1.1262094974517822,grad_norm: 0.9999998039284674, iteration: 12546
loss: 1.0416667461395264,grad_norm: 0.9999994079131626, iteration: 12547
loss: 0.9870566725730896,grad_norm: 0.9999993226098135, iteration: 12548
loss: 1.0092378854751587,grad_norm: 0.9999996721373555, iteration: 12549
loss: 1.0302088260650635,grad_norm: 0.9999992769911309, iteration: 12550
loss: 1.0594319105148315,grad_norm: 0.9999991474480979, iteration: 12551
loss: 0.9844400882720947,grad_norm: 0.9999991496608607, iteration: 12552
loss: 0.9997472763061523,grad_norm: 0.9999991539281202, iteration: 12553
loss: 1.0385546684265137,grad_norm: 0.9999992078186821, iteration: 12554
loss: 1.057061791419983,grad_norm: 0.9999995839077861, iteration: 12555
loss: 1.0004013776779175,grad_norm: 0.9999992046198404, iteration: 12556
loss: 1.104354739189148,grad_norm: 0.9999996744449902, iteration: 12557
loss: 1.0949496030807495,grad_norm: 0.9999996230834403, iteration: 12558
loss: 1.0824137926101685,grad_norm: 0.9999997829351609, iteration: 12559
loss: 1.0240046977996826,grad_norm: 0.9999991975368187, iteration: 12560
loss: 1.0730177164077759,grad_norm: 0.9999999527405202, iteration: 12561
loss: 1.009077548980713,grad_norm: 0.99999919465616, iteration: 12562
loss: 1.05543053150177,grad_norm: 0.9999995942309592, iteration: 12563
loss: 0.983340322971344,grad_norm: 0.999999331357151, iteration: 12564
loss: 0.9900609254837036,grad_norm: 0.9999991914841255, iteration: 12565
loss: 1.0066778659820557,grad_norm: 0.9999993357130782, iteration: 12566
loss: 1.0408859252929688,grad_norm: 0.9999993218540777, iteration: 12567
loss: 1.0155478715896606,grad_norm: 0.9999994564313136, iteration: 12568
loss: 1.0480657815933228,grad_norm: 0.9999995831322089, iteration: 12569
loss: 0.9532467722892761,grad_norm: 0.9999992213076879, iteration: 12570
loss: 1.0607556104660034,grad_norm: 0.9999993247799817, iteration: 12571
loss: 1.0403718948364258,grad_norm: 0.9999997853858791, iteration: 12572
loss: 1.0800353288650513,grad_norm: 0.9999997074962522, iteration: 12573
loss: 1.165126919746399,grad_norm: 0.9999996907638053, iteration: 12574
loss: 1.007519245147705,grad_norm: 0.9999992618271424, iteration: 12575
loss: 1.0097287893295288,grad_norm: 0.9999996107990298, iteration: 12576
loss: 1.062631607055664,grad_norm: 0.9999996217107461, iteration: 12577
loss: 1.0282624959945679,grad_norm: 0.999999269239406, iteration: 12578
loss: 1.0693295001983643,grad_norm: 0.9999994415680696, iteration: 12579
loss: 1.0296005010604858,grad_norm: 0.9999990963322183, iteration: 12580
loss: 0.9904239177703857,grad_norm: 0.9999994800574774, iteration: 12581
loss: 1.0315361022949219,grad_norm: 0.9999993445583983, iteration: 12582
loss: 1.0046674013137817,grad_norm: 0.9999993917619577, iteration: 12583
loss: 1.0335296392440796,grad_norm: 0.9999993493030995, iteration: 12584
loss: 1.0459562540054321,grad_norm: 0.999999796640789, iteration: 12585
loss: 1.0129830837249756,grad_norm: 0.9999992996849204, iteration: 12586
loss: 1.0444570779800415,grad_norm: 0.9999991700435206, iteration: 12587
loss: 1.0590052604675293,grad_norm: 0.9999998319209698, iteration: 12588
loss: 1.0297232866287231,grad_norm: 0.9999993457627133, iteration: 12589
loss: 1.032362937927246,grad_norm: 0.9999993424444723, iteration: 12590
loss: 0.9656074643135071,grad_norm: 0.9999992726891199, iteration: 12591
loss: 1.0412201881408691,grad_norm: 0.9999996734896415, iteration: 12592
loss: 1.0181885957717896,grad_norm: 0.9999992616900091, iteration: 12593
loss: 1.02583646774292,grad_norm: 0.9999996065256279, iteration: 12594
loss: 1.0346899032592773,grad_norm: 0.9999993319598239, iteration: 12595
loss: 1.0730195045471191,grad_norm: 0.9999997011104192, iteration: 12596
loss: 1.004002571105957,grad_norm: 0.9999993300502797, iteration: 12597
loss: 1.0605159997940063,grad_norm: 0.9999996238908335, iteration: 12598
loss: 1.161577820777893,grad_norm: 0.9999998382346075, iteration: 12599
loss: 1.0828099250793457,grad_norm: 0.9999998730989831, iteration: 12600
loss: 1.0210784673690796,grad_norm: 0.9999991012506037, iteration: 12601
loss: 1.1253137588500977,grad_norm: 0.9999997096875077, iteration: 12602
loss: 1.0014103651046753,grad_norm: 0.999999452561149, iteration: 12603
loss: 1.0048149824142456,grad_norm: 0.9999997471158791, iteration: 12604
loss: 1.0285407304763794,grad_norm: 0.9999995877983282, iteration: 12605
loss: 0.9712061882019043,grad_norm: 0.999999252442219, iteration: 12606
loss: 1.0318044424057007,grad_norm: 0.9999994815062699, iteration: 12607
loss: 1.0466690063476562,grad_norm: 0.9999992764942672, iteration: 12608
loss: 1.0014015436172485,grad_norm: 0.9999993848203725, iteration: 12609
loss: 1.0163670778274536,grad_norm: 0.999999547780689, iteration: 12610
loss: 1.0540196895599365,grad_norm: 0.9999997722182158, iteration: 12611
loss: 1.026267409324646,grad_norm: 0.9999992371041704, iteration: 12612
loss: 0.9862475991249084,grad_norm: 0.9999993198984254, iteration: 12613
loss: 1.0360215902328491,grad_norm: 0.9999992034715066, iteration: 12614
loss: 1.0583641529083252,grad_norm: 0.9999993471965785, iteration: 12615
loss: 1.0209639072418213,grad_norm: 0.9999993585012164, iteration: 12616
loss: 1.033859133720398,grad_norm: 0.999999407723927, iteration: 12617
loss: 1.0047575235366821,grad_norm: 0.9999992472341763, iteration: 12618
loss: 1.03291916847229,grad_norm: 0.9999995683925704, iteration: 12619
loss: 1.0366742610931396,grad_norm: 0.9999996080420176, iteration: 12620
loss: 1.0194686651229858,grad_norm: 0.9999990888939585, iteration: 12621
loss: 1.0619618892669678,grad_norm: 0.9999997992066232, iteration: 12622
loss: 1.0473061800003052,grad_norm: 0.9999992833150919, iteration: 12623
loss: 1.0636613368988037,grad_norm: 0.9999992588602471, iteration: 12624
loss: 1.0044472217559814,grad_norm: 0.9999991527669603, iteration: 12625
loss: 1.0404975414276123,grad_norm: 0.9999993437250386, iteration: 12626
loss: 1.0090720653533936,grad_norm: 0.9999993815874492, iteration: 12627
loss: 1.0300039052963257,grad_norm: 0.9999991823930604, iteration: 12628
loss: 1.0300599336624146,grad_norm: 0.9999994740025027, iteration: 12629
loss: 1.0758765935897827,grad_norm: 0.9999996815424795, iteration: 12630
loss: 1.0199196338653564,grad_norm: 0.9999995296654636, iteration: 12631
loss: 1.0695571899414062,grad_norm: 0.9999992587669201, iteration: 12632
loss: 0.9912819266319275,grad_norm: 0.9999993361072259, iteration: 12633
loss: 1.0811986923217773,grad_norm: 0.9999997911672608, iteration: 12634
loss: 1.0845367908477783,grad_norm: 0.90648882191715, iteration: 12635
loss: 1.0634355545043945,grad_norm: 0.9999993897440203, iteration: 12636
loss: 0.9879311323165894,grad_norm: 0.9999990362262227, iteration: 12637
loss: 1.0520750284194946,grad_norm: 0.99999943908132, iteration: 12638
loss: 1.044603943824768,grad_norm: 0.9999995566710207, iteration: 12639
loss: 1.077748417854309,grad_norm: 0.999999629637931, iteration: 12640
loss: 1.0330740213394165,grad_norm: 0.9999992671921719, iteration: 12641
loss: 1.0374822616577148,grad_norm: 0.9999993642565114, iteration: 12642
loss: 1.035072922706604,grad_norm: 0.9999992096700204, iteration: 12643
loss: 1.0490803718566895,grad_norm: 0.999999718395136, iteration: 12644
loss: 1.0659176111221313,grad_norm: 0.9999994546968601, iteration: 12645
loss: 1.0129541158676147,grad_norm: 0.9999990390635235, iteration: 12646
loss: 1.0096098184585571,grad_norm: 0.9999994888928343, iteration: 12647
loss: 0.9889610409736633,grad_norm: 0.9999991428224159, iteration: 12648
loss: 1.0717384815216064,grad_norm: 0.9999996516403035, iteration: 12649
loss: 1.0018059015274048,grad_norm: 0.9999994911763403, iteration: 12650
loss: 1.017327070236206,grad_norm: 0.999999455531845, iteration: 12651
loss: 1.0389477014541626,grad_norm: 0.9999993912919379, iteration: 12652
loss: 1.1466500759124756,grad_norm: 0.9999996963205697, iteration: 12653
loss: 1.0143321752548218,grad_norm: 0.9999993429645573, iteration: 12654
loss: 1.082629919052124,grad_norm: 0.9999995742616394, iteration: 12655
loss: 1.0611135959625244,grad_norm: 0.999999418547107, iteration: 12656
loss: 1.0815719366073608,grad_norm: 0.9999997235115196, iteration: 12657
loss: 1.0280317068099976,grad_norm: 0.9999991284481891, iteration: 12658
loss: 1.0651402473449707,grad_norm: 0.9999996739622083, iteration: 12659
loss: 1.0606286525726318,grad_norm: 0.9999993262291227, iteration: 12660
loss: 1.03084397315979,grad_norm: 0.9999995696530927, iteration: 12661
loss: 1.0089722871780396,grad_norm: 0.9999991115019704, iteration: 12662
loss: 1.0745341777801514,grad_norm: 0.9999994955747714, iteration: 12663
loss: 1.0221062898635864,grad_norm: 0.9999996136101902, iteration: 12664
loss: 1.0924314260482788,grad_norm: 0.9999998348569042, iteration: 12665
loss: 1.037994146347046,grad_norm: 0.9999993477931531, iteration: 12666
loss: 1.03438401222229,grad_norm: 0.9999994733595601, iteration: 12667
loss: 1.061142921447754,grad_norm: 0.9999991137968994, iteration: 12668
loss: 1.0429072380065918,grad_norm: 0.9999994068131219, iteration: 12669
loss: 1.0142474174499512,grad_norm: 0.999999624359053, iteration: 12670
loss: 1.1111253499984741,grad_norm: 0.9999997290938021, iteration: 12671
loss: 1.003436803817749,grad_norm: 0.9999996036936902, iteration: 12672
loss: 1.0682107210159302,grad_norm: 0.9999997572462492, iteration: 12673
loss: 0.9714328050613403,grad_norm: 0.9999992725123922, iteration: 12674
loss: 1.0215270519256592,grad_norm: 0.9999994617632502, iteration: 12675
loss: 1.0385977029800415,grad_norm: 0.9999992581522633, iteration: 12676
loss: 1.0603344440460205,grad_norm: 0.9999996064617962, iteration: 12677
loss: 1.0219570398330688,grad_norm: 0.999999161230472, iteration: 12678
loss: 1.0561176538467407,grad_norm: 0.9999993308306131, iteration: 12679
loss: 1.0730071067810059,grad_norm: 0.999999686071172, iteration: 12680
loss: 1.0874035358428955,grad_norm: 0.9999991964419834, iteration: 12681
loss: 1.0481147766113281,grad_norm: 0.9999991371321678, iteration: 12682
loss: 1.0608525276184082,grad_norm: 0.9999993523807499, iteration: 12683
loss: 1.1156879663467407,grad_norm: 0.9999997305182003, iteration: 12684
loss: 1.1284728050231934,grad_norm: 0.9999997604588521, iteration: 12685
loss: 1.1288172006607056,grad_norm: 0.9999999268761165, iteration: 12686
loss: 1.0421562194824219,grad_norm: 0.9999993709986383, iteration: 12687
loss: 1.0798649787902832,grad_norm: 0.9999994765875576, iteration: 12688
loss: 0.9926251173019409,grad_norm: 0.9999991768518633, iteration: 12689
loss: 1.0992902517318726,grad_norm: 0.9999996436335324, iteration: 12690
loss: 1.032840371131897,grad_norm: 0.9999991989368221, iteration: 12691
loss: 0.996551513671875,grad_norm: 0.973632520958751, iteration: 12692
loss: 1.038063645362854,grad_norm: 0.9999993160345437, iteration: 12693
loss: 1.0316249132156372,grad_norm: 0.9999992557174313, iteration: 12694
loss: 0.9846673011779785,grad_norm: 0.9999992118075982, iteration: 12695
loss: 1.0204601287841797,grad_norm: 0.9999995173557399, iteration: 12696
loss: 1.0368953943252563,grad_norm: 0.9999992788581537, iteration: 12697
loss: 0.9938604831695557,grad_norm: 0.9999991621730806, iteration: 12698
loss: 0.9822878241539001,grad_norm: 0.9999993718167053, iteration: 12699
loss: 1.112662434577942,grad_norm: 0.9999997879006443, iteration: 12700
loss: 0.9865339398384094,grad_norm: 0.9999991755199096, iteration: 12701
loss: 1.053538203239441,grad_norm: 0.9999996114368237, iteration: 12702
loss: 1.046310544013977,grad_norm: 0.9999994678154177, iteration: 12703
loss: 1.0349996089935303,grad_norm: 0.999999475396223, iteration: 12704
loss: 1.0155731439590454,grad_norm: 0.999999130543891, iteration: 12705
loss: 1.0768930912017822,grad_norm: 0.9999997479314101, iteration: 12706
loss: 1.0290966033935547,grad_norm: 0.9999992868953426, iteration: 12707
loss: 1.0259462594985962,grad_norm: 0.9999998319614664, iteration: 12708
loss: 1.0326725244522095,grad_norm: 0.9999992693219926, iteration: 12709
loss: 1.0387107133865356,grad_norm: 0.9999991352593963, iteration: 12710
loss: 1.0770440101623535,grad_norm: 0.9999992604667557, iteration: 12711
loss: 1.0614968538284302,grad_norm: 0.9999994228146498, iteration: 12712
loss: 1.054625391960144,grad_norm: 0.9999997804519984, iteration: 12713
loss: 1.0188792943954468,grad_norm: 0.999999208859544, iteration: 12714
loss: 1.0297218561172485,grad_norm: 0.9999996830543787, iteration: 12715
loss: 1.1651573181152344,grad_norm: 0.999999834913058, iteration: 12716
loss: 1.0394787788391113,grad_norm: 0.9999992692216358, iteration: 12717
loss: 1.0128450393676758,grad_norm: 0.9999992763700645, iteration: 12718
loss: 1.0236464738845825,grad_norm: 0.9999993941349556, iteration: 12719
loss: 1.0066052675247192,grad_norm: 0.9999992544405572, iteration: 12720
loss: 0.9889699220657349,grad_norm: 0.999999247975618, iteration: 12721
loss: 1.03105890750885,grad_norm: 0.9999993601897406, iteration: 12722
loss: 1.001721978187561,grad_norm: 0.9999990399950873, iteration: 12723
loss: 1.0387383699417114,grad_norm: 0.9999991124014466, iteration: 12724
loss: 1.0247291326522827,grad_norm: 0.9999990992712017, iteration: 12725
loss: 1.0385750532150269,grad_norm: 0.9999996532121762, iteration: 12726
loss: 1.0560072660446167,grad_norm: 0.9999993895157342, iteration: 12727
loss: 1.0326071977615356,grad_norm: 0.9999996325258611, iteration: 12728
loss: 1.0771327018737793,grad_norm: 0.9999993263091784, iteration: 12729
loss: 1.019290804862976,grad_norm: 0.9999992909845462, iteration: 12730
loss: 1.0441666841506958,grad_norm: 0.999999321616356, iteration: 12731
loss: 1.0104479789733887,grad_norm: 0.9999993486644663, iteration: 12732
loss: 1.0291744470596313,grad_norm: 0.9999995160593302, iteration: 12733
loss: 0.9941901564598083,grad_norm: 0.9999992487950133, iteration: 12734
loss: 1.0747230052947998,grad_norm: 0.9999997416830384, iteration: 12735
loss: 1.0296125411987305,grad_norm: 0.9999992779931068, iteration: 12736
loss: 1.0306695699691772,grad_norm: 0.9999995352121549, iteration: 12737
loss: 1.0513991117477417,grad_norm: 0.9999992561162088, iteration: 12738
loss: 1.062676191329956,grad_norm: 0.9999995896477298, iteration: 12739
loss: 1.031399130821228,grad_norm: 0.9999992244093711, iteration: 12740
loss: 1.0771762132644653,grad_norm: 0.9999995118459275, iteration: 12741
loss: 1.0793168544769287,grad_norm: 0.9999992264062658, iteration: 12742
loss: 0.9949999451637268,grad_norm: 0.9999994946425222, iteration: 12743
loss: 1.0279350280761719,grad_norm: 0.9999994660530914, iteration: 12744
loss: 1.0389102697372437,grad_norm: 0.999999206012401, iteration: 12745
loss: 1.045336127281189,grad_norm: 0.9999992420333397, iteration: 12746
loss: 1.0225385427474976,grad_norm: 0.9999998787996456, iteration: 12747
loss: 1.0097554922103882,grad_norm: 0.9999992955362016, iteration: 12748
loss: 1.0303738117218018,grad_norm: 0.9999995451776703, iteration: 12749
loss: 0.986748218536377,grad_norm: 0.9999991930082075, iteration: 12750
loss: 1.0557817220687866,grad_norm: 0.9999995940053953, iteration: 12751
loss: 1.0219346284866333,grad_norm: 0.9999994622003721, iteration: 12752
loss: 1.0035114288330078,grad_norm: 0.9999992429197878, iteration: 12753
loss: 1.0329715013504028,grad_norm: 0.9999992687817737, iteration: 12754
loss: 1.035228967666626,grad_norm: 0.9999993594563245, iteration: 12755
loss: 1.0574027299880981,grad_norm: 0.9999993011537837, iteration: 12756
loss: 1.042996883392334,grad_norm: 0.99999919253943, iteration: 12757
loss: 1.0373966693878174,grad_norm: 0.9999998556812254, iteration: 12758
loss: 1.0025444030761719,grad_norm: 0.999999376288343, iteration: 12759
loss: 1.0282760858535767,grad_norm: 0.9999991328168412, iteration: 12760
loss: 1.00954270362854,grad_norm: 0.9999995532503104, iteration: 12761
loss: 1.0421240329742432,grad_norm: 0.9999992892166271, iteration: 12762
loss: 1.003284215927124,grad_norm: 0.9999994681785195, iteration: 12763
loss: 1.0290988683700562,grad_norm: 0.9999991272848893, iteration: 12764
loss: 0.9688443541526794,grad_norm: 0.9999992372965596, iteration: 12765
loss: 1.0029646158218384,grad_norm: 0.9999991908919208, iteration: 12766
loss: 1.0575408935546875,grad_norm: 0.9999994500290214, iteration: 12767
loss: 1.0031731128692627,grad_norm: 0.9999991757545884, iteration: 12768
loss: 1.057530164718628,grad_norm: 0.9999997765820609, iteration: 12769
loss: 1.0218126773834229,grad_norm: 0.9999990937132573, iteration: 12770
loss: 1.030220627784729,grad_norm: 0.9999991897899658, iteration: 12771
loss: 1.0477604866027832,grad_norm: 0.9999997404968896, iteration: 12772
loss: 1.0694586038589478,grad_norm: 0.9999996578091399, iteration: 12773
loss: 1.0292692184448242,grad_norm: 0.999999128191733, iteration: 12774
loss: 1.0588490962982178,grad_norm: 0.9999993134452795, iteration: 12775
loss: 1.0503078699111938,grad_norm: 0.9999992951137499, iteration: 12776
loss: 1.0397361516952515,grad_norm: 0.9999991967027286, iteration: 12777
loss: 1.0817208290100098,grad_norm: 0.9999993057933371, iteration: 12778
loss: 1.031749963760376,grad_norm: 0.9999990366203017, iteration: 12779
loss: 1.033171534538269,grad_norm: 0.999999288514696, iteration: 12780
loss: 1.057783603668213,grad_norm: 0.999999194628261, iteration: 12781
loss: 1.0772559642791748,grad_norm: 0.99999982409826, iteration: 12782
loss: 1.0349050760269165,grad_norm: 0.9999992350703172, iteration: 12783
loss: 1.0526422262191772,grad_norm: 0.9999990274924943, iteration: 12784
loss: 1.039617896080017,grad_norm: 0.9999997443968397, iteration: 12785
loss: 1.055598497390747,grad_norm: 0.9999998158771767, iteration: 12786
loss: 1.1304471492767334,grad_norm: 0.9999998337670011, iteration: 12787
loss: 0.9927844405174255,grad_norm: 0.9999993306973294, iteration: 12788
loss: 1.0239254236221313,grad_norm: 0.9999991937868871, iteration: 12789
loss: 1.0363531112670898,grad_norm: 0.9999993162179459, iteration: 12790
loss: 1.018629550933838,grad_norm: 0.9999991939100625, iteration: 12791
loss: 1.0122939348220825,grad_norm: 0.9999992893063362, iteration: 12792
loss: 1.0361583232879639,grad_norm: 0.9999991584512301, iteration: 12793
loss: 1.0318188667297363,grad_norm: 0.9999993675787686, iteration: 12794
loss: 0.9984854459762573,grad_norm: 0.9999993968434282, iteration: 12795
loss: 1.0270620584487915,grad_norm: 0.9999991715908192, iteration: 12796
loss: 0.9923016428947449,grad_norm: 0.9999991214320135, iteration: 12797
loss: 1.0419245958328247,grad_norm: 0.9999992254040467, iteration: 12798
loss: 1.0071924924850464,grad_norm: 0.9999993099451533, iteration: 12799
loss: 1.0126402378082275,grad_norm: 0.9999993694037991, iteration: 12800
loss: 1.0284807682037354,grad_norm: 0.927718627502582, iteration: 12801
loss: 0.9944204688072205,grad_norm: 0.9999991310252087, iteration: 12802
loss: 1.057848572731018,grad_norm: 0.9999990746229328, iteration: 12803
loss: 1.0706294775009155,grad_norm: 0.9999993005128562, iteration: 12804
loss: 1.0633693933486938,grad_norm: 0.999999569397374, iteration: 12805
loss: 0.9949946403503418,grad_norm: 0.999999372242986, iteration: 12806
loss: 1.0359584093093872,grad_norm: 0.9999994248073026, iteration: 12807
loss: 1.0150187015533447,grad_norm: 0.9999993222587391, iteration: 12808
loss: 1.0057106018066406,grad_norm: 0.9999993037565044, iteration: 12809
loss: 0.9845533967018127,grad_norm: 0.9999992049523414, iteration: 12810
loss: 1.056343913078308,grad_norm: 0.9999993689645117, iteration: 12811
loss: 1.008061408996582,grad_norm: 0.999999406022252, iteration: 12812
loss: 1.0378756523132324,grad_norm: 0.9999992191947279, iteration: 12813
loss: 1.048844575881958,grad_norm: 0.9999993230751717, iteration: 12814
loss: 1.0065349340438843,grad_norm: 0.9999993193439349, iteration: 12815
loss: 0.9913508892059326,grad_norm: 0.9999992116390465, iteration: 12816
loss: 0.9798034429550171,grad_norm: 0.9999992017097882, iteration: 12817
loss: 1.0292943716049194,grad_norm: 0.9999992583395344, iteration: 12818
loss: 0.9740538597106934,grad_norm: 0.9999992682874056, iteration: 12819
loss: 1.011683702468872,grad_norm: 0.9999992081783673, iteration: 12820
loss: 1.0542817115783691,grad_norm: 0.999999298312579, iteration: 12821
loss: 0.9745020270347595,grad_norm: 0.999999171368633, iteration: 12822
loss: 1.0390156507492065,grad_norm: 0.9999992779788802, iteration: 12823
loss: 1.023391604423523,grad_norm: 0.9999992256838804, iteration: 12824
loss: 1.0388654470443726,grad_norm: 0.9999992705479113, iteration: 12825
loss: 1.0346850156784058,grad_norm: 0.9999993940417257, iteration: 12826
loss: 0.9925881624221802,grad_norm: 0.9999991799347365, iteration: 12827
loss: 1.0428162813186646,grad_norm: 0.999999267185579, iteration: 12828
loss: 1.0424948930740356,grad_norm: 0.9999992977769183, iteration: 12829
loss: 0.9579451680183411,grad_norm: 0.9999991226129392, iteration: 12830
loss: 1.0512722730636597,grad_norm: 0.999999610952438, iteration: 12831
loss: 1.0378779172897339,grad_norm: 0.9999991615253627, iteration: 12832
loss: 1.0731315612792969,grad_norm: 0.9911762011521519, iteration: 12833
loss: 1.044116735458374,grad_norm: 0.9999992322804114, iteration: 12834
loss: 1.0462875366210938,grad_norm: 0.999999047474812, iteration: 12835
loss: 1.0612907409667969,grad_norm: 0.9999990955414158, iteration: 12836
loss: 1.0069630146026611,grad_norm: 0.999999398544729, iteration: 12837
loss: 1.0482321977615356,grad_norm: 0.9999991339046312, iteration: 12838
loss: 1.0466331243515015,grad_norm: 0.9999991947321468, iteration: 12839
loss: 1.014992594718933,grad_norm: 0.9999992454767284, iteration: 12840
loss: 1.008393406867981,grad_norm: 0.9999991117417059, iteration: 12841
loss: 0.9691263437271118,grad_norm: 0.9999991004553478, iteration: 12842
loss: 1.018122673034668,grad_norm: 0.9999993941352588, iteration: 12843
loss: 1.021059513092041,grad_norm: 0.9999991450112512, iteration: 12844
loss: 1.0799201726913452,grad_norm: 0.9999994493727506, iteration: 12845
loss: 1.0171599388122559,grad_norm: 0.9999992160973664, iteration: 12846
loss: 1.0048468112945557,grad_norm: 0.9999992232342741, iteration: 12847
loss: 1.0241883993148804,grad_norm: 0.9999992666963132, iteration: 12848
loss: 1.0403294563293457,grad_norm: 0.9999990802917706, iteration: 12849
loss: 1.067978024482727,grad_norm: 0.9999991784086696, iteration: 12850
loss: 1.0803533792495728,grad_norm: 0.9999994670068805, iteration: 12851
loss: 0.9974217414855957,grad_norm: 0.9999994335336007, iteration: 12852
loss: 1.0550509691238403,grad_norm: 0.9999991471888028, iteration: 12853
loss: 1.0367861986160278,grad_norm: 0.9999991654749137, iteration: 12854
loss: 0.9801104664802551,grad_norm: 0.9999990533143348, iteration: 12855
loss: 1.0064460039138794,grad_norm: 0.9999995208995575, iteration: 12856
loss: 1.0346782207489014,grad_norm: 0.9999991985061821, iteration: 12857
loss: 1.0105235576629639,grad_norm: 0.9999991117135205, iteration: 12858
loss: 1.0904043912887573,grad_norm: 0.9999995600415434, iteration: 12859
loss: 1.0622227191925049,grad_norm: 0.9999992832965445, iteration: 12860
loss: 1.068929672241211,grad_norm: 0.9999994747284877, iteration: 12861
loss: 1.1007238626480103,grad_norm: 0.9999995459767951, iteration: 12862
loss: 0.9852635860443115,grad_norm: 0.9999991101192814, iteration: 12863
loss: 1.0638453960418701,grad_norm: 0.9999993612869832, iteration: 12864
loss: 1.031847596168518,grad_norm: 0.9999995434518468, iteration: 12865
loss: 1.057541012763977,grad_norm: 0.9999996503567459, iteration: 12866
loss: 1.0259881019592285,grad_norm: 0.9999992651809804, iteration: 12867
loss: 1.0540891885757446,grad_norm: 0.9999994867498256, iteration: 12868
loss: 1.092695951461792,grad_norm: 0.999999464262751, iteration: 12869
loss: 0.9715400338172913,grad_norm: 0.9999994037645346, iteration: 12870
loss: 1.0864964723587036,grad_norm: 0.9999994409840797, iteration: 12871
loss: 1.0013912916183472,grad_norm: 0.9999992041848293, iteration: 12872
loss: 1.0206745862960815,grad_norm: 0.9999993244649865, iteration: 12873
loss: 1.0205923318862915,grad_norm: 0.9999996871577043, iteration: 12874
loss: 1.0467231273651123,grad_norm: 0.9999992852915967, iteration: 12875
loss: 1.0561097860336304,grad_norm: 0.9999995829645424, iteration: 12876
loss: 1.0027387142181396,grad_norm: 0.9999992946488442, iteration: 12877
loss: 1.0363225936889648,grad_norm: 0.9999995650786302, iteration: 12878
loss: 0.999650776386261,grad_norm: 0.9999991625354911, iteration: 12879
loss: 1.081217646598816,grad_norm: 0.9999994400052302, iteration: 12880
loss: 1.0044761896133423,grad_norm: 0.9869770025797353, iteration: 12881
loss: 1.054007887840271,grad_norm: 0.999999186974876, iteration: 12882
loss: 1.0132248401641846,grad_norm: 0.9999994117671682, iteration: 12883
loss: 1.0726354122161865,grad_norm: 0.9999995684517721, iteration: 12884
loss: 1.0316462516784668,grad_norm: 0.9999992257744759, iteration: 12885
loss: 0.9794283509254456,grad_norm: 0.9999994049524916, iteration: 12886
loss: 1.0289915800094604,grad_norm: 0.9999993653597475, iteration: 12887
loss: 1.0698564052581787,grad_norm: 0.9999993640672844, iteration: 12888
loss: 1.0056743621826172,grad_norm: 0.9999993959768778, iteration: 12889
loss: 1.0147860050201416,grad_norm: 0.9999992661197881, iteration: 12890
loss: 1.0473346710205078,grad_norm: 0.9999993815624039, iteration: 12891
loss: 1.0173676013946533,grad_norm: 0.9999994534851553, iteration: 12892
loss: 1.022162675857544,grad_norm: 0.9999995811789243, iteration: 12893
loss: 1.0195235013961792,grad_norm: 0.9999995285097671, iteration: 12894
loss: 1.0432394742965698,grad_norm: 0.9999992399084828, iteration: 12895
loss: 0.9803314805030823,grad_norm: 0.9999992525347712, iteration: 12896
loss: 0.9595476388931274,grad_norm: 0.9999994285271968, iteration: 12897
loss: 1.006319522857666,grad_norm: 0.999999309604834, iteration: 12898
loss: 1.0420111417770386,grad_norm: 0.9999994371924685, iteration: 12899
loss: 1.0147875547409058,grad_norm: 0.9999993743871083, iteration: 12900
loss: 1.0581908226013184,grad_norm: 0.9999992886871956, iteration: 12901
loss: 1.039645791053772,grad_norm: 0.9999994841106027, iteration: 12902
loss: 1.139423131942749,grad_norm: 0.9999998455024228, iteration: 12903
loss: 0.9889729022979736,grad_norm: 0.9999992636713294, iteration: 12904
loss: 1.0825875997543335,grad_norm: 0.999999755861255, iteration: 12905
loss: 1.0192302465438843,grad_norm: 0.9999997597922851, iteration: 12906
loss: 1.0587173700332642,grad_norm: 0.9885978657856288, iteration: 12907
loss: 1.0604957342147827,grad_norm: 0.9999994454059649, iteration: 12908
loss: 0.9992094039916992,grad_norm: 0.9999992660590565, iteration: 12909
loss: 1.0673922300338745,grad_norm: 0.9999993273092094, iteration: 12910
loss: 1.0056527853012085,grad_norm: 0.9999990205584175, iteration: 12911
loss: 1.0370510816574097,grad_norm: 0.9999994523210939, iteration: 12912
loss: 1.0138705968856812,grad_norm: 0.999999489955596, iteration: 12913
loss: 1.0508239269256592,grad_norm: 0.9999994373948852, iteration: 12914
loss: 0.9896587133407593,grad_norm: 0.9999994429500276, iteration: 12915
loss: 1.0591589212417603,grad_norm: 0.9999991400444561, iteration: 12916
loss: 1.0220520496368408,grad_norm: 0.9999992582924483, iteration: 12917
loss: 1.0424251556396484,grad_norm: 0.999999158122154, iteration: 12918
loss: 1.036410927772522,grad_norm: 0.9999994011917835, iteration: 12919
loss: 1.0323519706726074,grad_norm: 0.9999992659274183, iteration: 12920
loss: 0.9547777771949768,grad_norm: 0.9999991687817852, iteration: 12921
loss: 1.0766689777374268,grad_norm: 0.9999997212314592, iteration: 12922
loss: 1.0390344858169556,grad_norm: 0.9999991907971312, iteration: 12923
loss: 1.0504406690597534,grad_norm: 0.9999991896529784, iteration: 12924
loss: 1.022199034690857,grad_norm: 0.9999992454930271, iteration: 12925
loss: 1.0449273586273193,grad_norm: 0.9999994908370349, iteration: 12926
loss: 1.0517722368240356,grad_norm: 0.9999994437907039, iteration: 12927
loss: 1.046901822090149,grad_norm: 0.9999993268121026, iteration: 12928
loss: 1.0091801881790161,grad_norm: 0.9999991683584457, iteration: 12929
loss: 1.04612135887146,grad_norm: 0.9999992287927313, iteration: 12930
loss: 1.022473931312561,grad_norm: 0.9999993078237799, iteration: 12931
loss: 1.0401732921600342,grad_norm: 0.9434375409682836, iteration: 12932
loss: 0.9969987273216248,grad_norm: 0.9999991692283035, iteration: 12933
loss: 1.021926760673523,grad_norm: 0.9999993498944294, iteration: 12934
loss: 1.0460517406463623,grad_norm: 0.9999995030995987, iteration: 12935
loss: 1.024278998374939,grad_norm: 0.9999991800326196, iteration: 12936
loss: 1.0494033098220825,grad_norm: 0.9999994093434947, iteration: 12937
loss: 1.0819302797317505,grad_norm: 0.9999997372245966, iteration: 12938
loss: 1.026758074760437,grad_norm: 0.9999990100326727, iteration: 12939
loss: 1.0144798755645752,grad_norm: 0.9999998464663629, iteration: 12940
loss: 1.0427483320236206,grad_norm: 0.9999995464492135, iteration: 12941
loss: 1.0530719757080078,grad_norm: 0.9999996624940667, iteration: 12942
loss: 1.0272443294525146,grad_norm: 0.9999992029399779, iteration: 12943
loss: 0.9940463900566101,grad_norm: 0.9999993035962359, iteration: 12944
loss: 1.050729513168335,grad_norm: 0.9999992323479396, iteration: 12945
loss: 1.0481213331222534,grad_norm: 0.9999995570296902, iteration: 12946
loss: 1.015330195426941,grad_norm: 0.9999990987238042, iteration: 12947
loss: 1.0180164575576782,grad_norm: 0.9999993313318546, iteration: 12948
loss: 1.0557351112365723,grad_norm: 0.9999997446488078, iteration: 12949
loss: 1.0257998704910278,grad_norm: 0.9999992628736426, iteration: 12950
loss: 1.1325139999389648,grad_norm: 0.9999996787506483, iteration: 12951
loss: 1.0763676166534424,grad_norm: 0.9999996761770152, iteration: 12952
loss: 1.1704849004745483,grad_norm: 0.9999997640018695, iteration: 12953
loss: 1.244733214378357,grad_norm: 0.9999998660750303, iteration: 12954
loss: 1.032968521118164,grad_norm: 0.9999992223150084, iteration: 12955
loss: 1.0804271697998047,grad_norm: 0.9999991622611152, iteration: 12956
loss: 1.0585936307907104,grad_norm: 0.9999992239435516, iteration: 12957
loss: 0.9874470829963684,grad_norm: 0.9999993661862469, iteration: 12958
loss: 1.0411512851715088,grad_norm: 0.9999991824692518, iteration: 12959
loss: 0.9768847823143005,grad_norm: 0.9999994336156685, iteration: 12960
loss: 1.1166232824325562,grad_norm: 0.9999996185916209, iteration: 12961
loss: 1.052992582321167,grad_norm: 0.9999995567667272, iteration: 12962
loss: 0.9844499230384827,grad_norm: 0.9999991518289745, iteration: 12963
loss: 1.005738615989685,grad_norm: 0.9999992220821882, iteration: 12964
loss: 1.015918493270874,grad_norm: 0.9999992607885078, iteration: 12965
loss: 1.0015772581100464,grad_norm: 0.999999296331613, iteration: 12966
loss: 1.0191209316253662,grad_norm: 0.9999991931040538, iteration: 12967
loss: 1.0316698551177979,grad_norm: 0.9999991569373321, iteration: 12968
loss: 0.9935708045959473,grad_norm: 0.9999991201344876, iteration: 12969
loss: 1.0508317947387695,grad_norm: 0.9999997458749412, iteration: 12970
loss: 1.0569032430648804,grad_norm: 0.9999992442153842, iteration: 12971
loss: 1.0813443660736084,grad_norm: 0.9999997934868712, iteration: 12972
loss: 1.0174371004104614,grad_norm: 0.9999998869680046, iteration: 12973
loss: 1.0202568769454956,grad_norm: 0.9999991750357384, iteration: 12974
loss: 0.9955358505249023,grad_norm: 0.9999991090706125, iteration: 12975
loss: 1.0390841960906982,grad_norm: 0.9999991589230385, iteration: 12976
loss: 0.9762362241744995,grad_norm: 0.9999991221955078, iteration: 12977
loss: 1.036566972732544,grad_norm: 0.9999995644969852, iteration: 12978
loss: 1.0522412061691284,grad_norm: 0.999999669060679, iteration: 12979
loss: 1.0534149408340454,grad_norm: 0.9999991312101674, iteration: 12980
loss: 1.0504854917526245,grad_norm: 0.9999991848559496, iteration: 12981
loss: 1.0235515832901,grad_norm: 0.9999995366291056, iteration: 12982
loss: 0.9917396903038025,grad_norm: 0.9999994082425684, iteration: 12983
loss: 0.9609800577163696,grad_norm: 0.9999990262953546, iteration: 12984
loss: 1.004149317741394,grad_norm: 0.9999995039274295, iteration: 12985
loss: 1.092392086982727,grad_norm: 0.9999996127074595, iteration: 12986
loss: 1.0500739812850952,grad_norm: 0.9999993186806729, iteration: 12987
loss: 1.0436513423919678,grad_norm: 0.9999993478709711, iteration: 12988
loss: 1.0033632516860962,grad_norm: 0.9999991755295325, iteration: 12989
loss: 1.0202995538711548,grad_norm: 0.9999994922729152, iteration: 12990
loss: 1.030368685722351,grad_norm: 0.9999991914691033, iteration: 12991
loss: 1.0238869190216064,grad_norm: 0.9999991171535648, iteration: 12992
loss: 1.0951906442642212,grad_norm: 0.9999995746109521, iteration: 12993
loss: 1.0478696823120117,grad_norm: 0.9999995971818737, iteration: 12994
loss: 1.0185939073562622,grad_norm: 0.9999994238691836, iteration: 12995
loss: 1.0353206396102905,grad_norm: 0.9999991221315032, iteration: 12996
loss: 1.0100258588790894,grad_norm: 0.999999125183463, iteration: 12997
loss: 0.9803441166877747,grad_norm: 0.9999990543354746, iteration: 12998
loss: 1.0158711671829224,grad_norm: 0.9999992167390922, iteration: 12999
loss: 1.0528573989868164,grad_norm: 0.9999991151864269, iteration: 13000
loss: 1.0101913213729858,grad_norm: 0.9999995036638409, iteration: 13001
loss: 0.9973425269126892,grad_norm: 0.9999990806163948, iteration: 13002
loss: 1.0286659002304077,grad_norm: 0.9999992722519844, iteration: 13003
loss: 1.0439437627792358,grad_norm: 0.999999497398634, iteration: 13004
loss: 1.0460981130599976,grad_norm: 0.9999991845856634, iteration: 13005
loss: 0.9958397150039673,grad_norm: 0.9999992783310231, iteration: 13006
loss: 1.0128005743026733,grad_norm: 0.9999991020593253, iteration: 13007
loss: 1.000908374786377,grad_norm: 0.9999991134783964, iteration: 13008
loss: 0.9987855553627014,grad_norm: 0.9999990603884639, iteration: 13009
loss: 1.085766315460205,grad_norm: 0.9999994942433167, iteration: 13010
loss: 0.9876198768615723,grad_norm: 0.9999992780110312, iteration: 13011
loss: 1.0467584133148193,grad_norm: 0.999999307970549, iteration: 13012
loss: 1.0523489713668823,grad_norm: 0.9999992561639205, iteration: 13013
loss: 1.04550039768219,grad_norm: 0.9999990760582163, iteration: 13014
loss: 1.0186779499053955,grad_norm: 0.9999992242543668, iteration: 13015
loss: 1.0438932180404663,grad_norm: 0.9999992000589332, iteration: 13016
loss: 1.0610238313674927,grad_norm: 0.9999994404779318, iteration: 13017
loss: 1.033180832862854,grad_norm: 0.9999993679675215, iteration: 13018
loss: 1.0276745557785034,grad_norm: 0.9999996127379139, iteration: 13019
loss: 1.0125848054885864,grad_norm: 0.9999995290175945, iteration: 13020
loss: 1.116416573524475,grad_norm: 0.9999992956158763, iteration: 13021
loss: 0.9721271395683289,grad_norm: 0.9999992866441081, iteration: 13022
loss: 1.100063443183899,grad_norm: 0.9999995368250149, iteration: 13023
loss: 1.0086019039154053,grad_norm: 0.9999992505603162, iteration: 13024
loss: 1.0040438175201416,grad_norm: 0.9668213895562748, iteration: 13025
loss: 1.072972059249878,grad_norm: 0.9999996730230384, iteration: 13026
loss: 0.9913060069084167,grad_norm: 0.9999991116656496, iteration: 13027
loss: 1.0336729288101196,grad_norm: 0.999999208172855, iteration: 13028
loss: 1.0364373922348022,grad_norm: 0.9999993005820793, iteration: 13029
loss: 1.0364365577697754,grad_norm: 0.9999992321573578, iteration: 13030
loss: 1.0540053844451904,grad_norm: 0.9999991775409165, iteration: 13031
loss: 1.0992947816848755,grad_norm: 0.9999995311126867, iteration: 13032
loss: 1.0338644981384277,grad_norm: 0.9999993220844797, iteration: 13033
loss: 1.0373448133468628,grad_norm: 0.9999992404238922, iteration: 13034
loss: 1.0094350576400757,grad_norm: 0.9999994604141647, iteration: 13035
loss: 1.0000582933425903,grad_norm: 0.999999158235211, iteration: 13036
loss: 1.0247477293014526,grad_norm: 0.9999993522575761, iteration: 13037
loss: 0.9793261885643005,grad_norm: 0.9999993827323936, iteration: 13038
loss: 1.0216442346572876,grad_norm: 0.999999133422016, iteration: 13039
loss: 1.0545921325683594,grad_norm: 0.9999993846344484, iteration: 13040
loss: 1.088074803352356,grad_norm: 0.9999996513776287, iteration: 13041
loss: 1.014394998550415,grad_norm: 0.9999994074116224, iteration: 13042
loss: 1.048895239830017,grad_norm: 0.9999996345836979, iteration: 13043
loss: 1.0818175077438354,grad_norm: 0.9999994825011811, iteration: 13044
loss: 1.0672351121902466,grad_norm: 0.9999993266260868, iteration: 13045
loss: 1.094415545463562,grad_norm: 0.9999996509639101, iteration: 13046
loss: 1.0001685619354248,grad_norm: 0.9999994517547367, iteration: 13047
loss: 1.0129907131195068,grad_norm: 0.9602898902166646, iteration: 13048
loss: 1.0398997068405151,grad_norm: 0.9999992216407517, iteration: 13049
loss: 1.0822954177856445,grad_norm: 0.9999995894670489, iteration: 13050
loss: 1.098833441734314,grad_norm: 0.999999558364402, iteration: 13051
loss: 1.0844889879226685,grad_norm: 0.9999993747699921, iteration: 13052
loss: 1.075144648551941,grad_norm: 0.9999995856549043, iteration: 13053
loss: 1.0834295749664307,grad_norm: 0.9999996306132689, iteration: 13054
loss: 1.0564337968826294,grad_norm: 0.9999993791185643, iteration: 13055
loss: 0.9931231141090393,grad_norm: 0.9999993389194759, iteration: 13056
loss: 1.0220481157302856,grad_norm: 0.9999990544529388, iteration: 13057
loss: 1.0126652717590332,grad_norm: 0.9676293990765867, iteration: 13058
loss: 1.0337636470794678,grad_norm: 0.9999996262806184, iteration: 13059
loss: 1.02369225025177,grad_norm: 0.9999990929818162, iteration: 13060
loss: 1.096962809562683,grad_norm: 0.9999995031769617, iteration: 13061
loss: 1.053934931755066,grad_norm: 0.9999992192507802, iteration: 13062
loss: 0.9713260531425476,grad_norm: 0.9999998001212745, iteration: 13063
loss: 1.0519737005233765,grad_norm: 0.9999996220927669, iteration: 13064
loss: 1.0115808248519897,grad_norm: 0.999999113880986, iteration: 13065
loss: 1.0333362817764282,grad_norm: 0.9999992236103891, iteration: 13066
loss: 1.0087391138076782,grad_norm: 0.9999991546721607, iteration: 13067
loss: 1.035575270652771,grad_norm: 0.9999994090673623, iteration: 13068
loss: 1.022569179534912,grad_norm: 0.9999991620484193, iteration: 13069
loss: 1.0685393810272217,grad_norm: 0.999999674100925, iteration: 13070
loss: 1.0281153917312622,grad_norm: 0.999999583389219, iteration: 13071
loss: 1.0309560298919678,grad_norm: 0.9999994783559963, iteration: 13072
loss: 1.038881540298462,grad_norm: 0.9999996724489907, iteration: 13073
loss: 1.0310168266296387,grad_norm: 0.9999991072237442, iteration: 13074
loss: 1.0562567710876465,grad_norm: 0.9999992742331532, iteration: 13075
loss: 1.0343892574310303,grad_norm: 0.9999991097920461, iteration: 13076
loss: 1.0050580501556396,grad_norm: 0.9999991549515367, iteration: 13077
loss: 1.0816229581832886,grad_norm: 0.9999996695959965, iteration: 13078
loss: 1.0672695636749268,grad_norm: 0.9999996059874505, iteration: 13079
loss: 0.9942092895507812,grad_norm: 0.9999990332236893, iteration: 13080
loss: 1.0081117153167725,grad_norm: 0.9999991240081727, iteration: 13081
loss: 1.0557738542556763,grad_norm: 0.9999994677200528, iteration: 13082
loss: 1.0719153881072998,grad_norm: 0.9999996617103439, iteration: 13083
loss: 1.0956798791885376,grad_norm: 0.9999994926393058, iteration: 13084
loss: 1.0430700778961182,grad_norm: 0.9999995802125117, iteration: 13085
loss: 1.0777226686477661,grad_norm: 0.9999996869698395, iteration: 13086
loss: 1.0252314805984497,grad_norm: 0.9999993217173814, iteration: 13087
loss: 0.9789955019950867,grad_norm: 0.9999992010478845, iteration: 13088
loss: 1.0256386995315552,grad_norm: 0.9999993676125307, iteration: 13089
loss: 1.0313829183578491,grad_norm: 0.9999991809474871, iteration: 13090
loss: 1.0260871648788452,grad_norm: 0.9999992482800116, iteration: 13091
loss: 1.0128203630447388,grad_norm: 0.9999992436427977, iteration: 13092
loss: 1.0363500118255615,grad_norm: 0.9999997625037865, iteration: 13093
loss: 1.0381460189819336,grad_norm: 0.9999994019365158, iteration: 13094
loss: 1.0507512092590332,grad_norm: 0.999999474357118, iteration: 13095
loss: 1.0733911991119385,grad_norm: 0.9999994771801203, iteration: 13096
loss: 1.03364896774292,grad_norm: 0.9999991990657975, iteration: 13097
loss: 1.016719102859497,grad_norm: 0.9999991423967249, iteration: 13098
loss: 1.0615183115005493,grad_norm: 0.9999995583102456, iteration: 13099
loss: 1.0158851146697998,grad_norm: 0.999999852067445, iteration: 13100
loss: 1.0374482870101929,grad_norm: 0.9999997209385628, iteration: 13101
loss: 1.0167027711868286,grad_norm: 0.999999253216591, iteration: 13102
loss: 1.0774794816970825,grad_norm: 0.9999991711541, iteration: 13103
loss: 1.0183770656585693,grad_norm: 0.9999996700906869, iteration: 13104
loss: 1.020275592803955,grad_norm: 0.9999992961979934, iteration: 13105
loss: 1.0790095329284668,grad_norm: 0.9999994532806962, iteration: 13106
loss: 1.0138717889785767,grad_norm: 0.9999993268390368, iteration: 13107
loss: 1.0249080657958984,grad_norm: 0.9999996647478734, iteration: 13108
loss: 0.993768572807312,grad_norm: 0.9999992949520683, iteration: 13109
loss: 1.028342843055725,grad_norm: 0.9999991955406844, iteration: 13110
loss: 1.0457723140716553,grad_norm: 0.9999990047073553, iteration: 13111
loss: 1.089937448501587,grad_norm: 0.9999996876348272, iteration: 13112
loss: 1.037031888961792,grad_norm: 0.9999990926245701, iteration: 13113
loss: 1.0219762325286865,grad_norm: 0.9999991888412637, iteration: 13114
loss: 1.0290093421936035,grad_norm: 0.999999190759965, iteration: 13115
loss: 1.0523680448532104,grad_norm: 0.9999996507522305, iteration: 13116
loss: 1.0373048782348633,grad_norm: 0.9999992229099597, iteration: 13117
loss: 1.0322761535644531,grad_norm: 0.9999991885998637, iteration: 13118
loss: 1.011419653892517,grad_norm: 0.9999992894570804, iteration: 13119
loss: 1.003444790840149,grad_norm: 0.9999994198049194, iteration: 13120
loss: 1.0466163158416748,grad_norm: 0.9999993694667513, iteration: 13121
loss: 1.0763044357299805,grad_norm: 0.9999995789717706, iteration: 13122
loss: 1.0424846410751343,grad_norm: 0.9999996831752648, iteration: 13123
loss: 1.0902823209762573,grad_norm: 0.9999995805208894, iteration: 13124
loss: 0.9743073582649231,grad_norm: 0.9999992262051759, iteration: 13125
loss: 1.034899353981018,grad_norm: 0.9999994202445407, iteration: 13126
loss: 1.0432523488998413,grad_norm: 0.9999995381173903, iteration: 13127
loss: 1.0615286827087402,grad_norm: 0.9999994462352435, iteration: 13128
loss: 1.0243149995803833,grad_norm: 0.9999995224446571, iteration: 13129
loss: 0.9664817452430725,grad_norm: 0.9999994769943519, iteration: 13130
loss: 1.0480608940124512,grad_norm: 0.9999991432009494, iteration: 13131
loss: 1.025142788887024,grad_norm: 0.999999247330433, iteration: 13132
loss: 1.0260547399520874,grad_norm: 0.9999993075233765, iteration: 13133
loss: 0.9907408356666565,grad_norm: 0.999999476724683, iteration: 13134
loss: 1.0653318166732788,grad_norm: 0.9999990753699873, iteration: 13135
loss: 1.0606094598770142,grad_norm: 0.9999992918823046, iteration: 13136
loss: 0.9999792575836182,grad_norm: 0.999999027879327, iteration: 13137
loss: 1.063075065612793,grad_norm: 0.9999992219664245, iteration: 13138
loss: 1.0341798067092896,grad_norm: 0.9999994901990232, iteration: 13139
loss: 1.020149827003479,grad_norm: 0.9999991031279881, iteration: 13140
loss: 1.0593335628509521,grad_norm: 0.9999992708680469, iteration: 13141
loss: 1.0216705799102783,grad_norm: 0.9999992536739094, iteration: 13142
loss: 1.0373492240905762,grad_norm: 0.999999518506804, iteration: 13143
loss: 0.9594933390617371,grad_norm: 0.9999992006405847, iteration: 13144
loss: 1.0445808172225952,grad_norm: 0.9999991525339779, iteration: 13145
loss: 1.007473111152649,grad_norm: 0.9999993083551949, iteration: 13146
loss: 1.0094398260116577,grad_norm: 0.9999991488707192, iteration: 13147
loss: 0.9854487776756287,grad_norm: 0.9999990730404933, iteration: 13148
loss: 0.9871041774749756,grad_norm: 0.9999991580181302, iteration: 13149
loss: 0.9716333746910095,grad_norm: 0.9999995018775208, iteration: 13150
loss: 1.053619623184204,grad_norm: 0.9999998768368258, iteration: 13151
loss: 1.0733617544174194,grad_norm: 0.999999810533071, iteration: 13152
loss: 1.0998473167419434,grad_norm: 0.9999998371643891, iteration: 13153
loss: 1.0586570501327515,grad_norm: 0.9999992453908286, iteration: 13154
loss: 1.052259087562561,grad_norm: 0.9999993317107223, iteration: 13155
loss: 1.0554375648498535,grad_norm: 0.9999996105523341, iteration: 13156
loss: 1.0590903759002686,grad_norm: 0.9999995425116228, iteration: 13157
loss: 1.054694414138794,grad_norm: 0.999999168896796, iteration: 13158
loss: 1.0310440063476562,grad_norm: 0.9999992816084665, iteration: 13159
loss: 1.0306508541107178,grad_norm: 0.9999993352968137, iteration: 13160
loss: 1.021587610244751,grad_norm: 0.9999993652612554, iteration: 13161
loss: 1.0382474660873413,grad_norm: 0.9999993156919077, iteration: 13162
loss: 1.0320796966552734,grad_norm: 0.9999992144689491, iteration: 13163
loss: 1.036146879196167,grad_norm: 0.9999997483384976, iteration: 13164
loss: 1.0735185146331787,grad_norm: 0.9999996211151927, iteration: 13165
loss: 1.0448546409606934,grad_norm: 0.9999995665461779, iteration: 13166
loss: 1.027187705039978,grad_norm: 0.9999994973403213, iteration: 13167
loss: 1.0538997650146484,grad_norm: 0.999999081860816, iteration: 13168
loss: 1.0482929944992065,grad_norm: 0.999999100412107, iteration: 13169
loss: 1.028267502784729,grad_norm: 0.9999992212588412, iteration: 13170
loss: 1.0157047510147095,grad_norm: 0.9999992351348669, iteration: 13171
loss: 1.0418834686279297,grad_norm: 0.9999993790042032, iteration: 13172
loss: 1.0162689685821533,grad_norm: 0.9999993247758993, iteration: 13173
loss: 0.9865578413009644,grad_norm: 0.9999991619879817, iteration: 13174
loss: 1.0161786079406738,grad_norm: 0.9999992764579344, iteration: 13175
loss: 1.0536174774169922,grad_norm: 0.9999995831482251, iteration: 13176
loss: 1.0591380596160889,grad_norm: 0.9999992989337474, iteration: 13177
loss: 1.0827621221542358,grad_norm: 0.9999992981086723, iteration: 13178
loss: 1.0249881744384766,grad_norm: 0.9999992723170901, iteration: 13179
loss: 0.9927730560302734,grad_norm: 0.9999991882447703, iteration: 13180
loss: 1.0516740083694458,grad_norm: 0.9999997773787008, iteration: 13181
loss: 0.9972148537635803,grad_norm: 0.9999994004647226, iteration: 13182
loss: 1.0139594078063965,grad_norm: 0.9999992963884703, iteration: 13183
loss: 1.0688766241073608,grad_norm: 0.9999991099185697, iteration: 13184
loss: 1.0366872549057007,grad_norm: 0.9999993018355542, iteration: 13185
loss: 1.0625476837158203,grad_norm: 0.999999445765614, iteration: 13186
loss: 0.9877870082855225,grad_norm: 0.9999990588081608, iteration: 13187
loss: 1.0478954315185547,grad_norm: 0.9999992769262971, iteration: 13188
loss: 1.0364922285079956,grad_norm: 0.9999994704900332, iteration: 13189
loss: 1.0002999305725098,grad_norm: 0.9999991786892904, iteration: 13190
loss: 1.0510128736495972,grad_norm: 0.9999991580993121, iteration: 13191
loss: 1.0499083995819092,grad_norm: 0.9999993653644684, iteration: 13192
loss: 1.0761157274246216,grad_norm: 0.9999993415905478, iteration: 13193
loss: 1.023152470588684,grad_norm: 0.9999992303909712, iteration: 13194
loss: 1.0053398609161377,grad_norm: 0.9999990982910852, iteration: 13195
loss: 1.0642167329788208,grad_norm: 0.9999994593278482, iteration: 13196
loss: 1.0659598112106323,grad_norm: 0.9999996978863962, iteration: 13197
loss: 1.0178167819976807,grad_norm: 0.9579323730691542, iteration: 13198
loss: 1.0243949890136719,grad_norm: 0.9999996127236501, iteration: 13199
loss: 1.076345443725586,grad_norm: 0.9999994233256531, iteration: 13200
loss: 1.0546869039535522,grad_norm: 0.9999995592261116, iteration: 13201
loss: 1.037767767906189,grad_norm: 0.9999990892956349, iteration: 13202
loss: 1.1397979259490967,grad_norm: 0.9999996565317302, iteration: 13203
loss: 1.016579270362854,grad_norm: 0.9999994873832987, iteration: 13204
loss: 1.0559370517730713,grad_norm: 0.9999991176320744, iteration: 13205
loss: 1.0111685991287231,grad_norm: 0.9999992997716979, iteration: 13206
loss: 1.0666426420211792,grad_norm: 0.9999992430792095, iteration: 13207
loss: 1.0338077545166016,grad_norm: 0.9999992512364394, iteration: 13208
loss: 1.032090187072754,grad_norm: 0.9999990764902896, iteration: 13209
loss: 1.031524658203125,grad_norm: 0.9999996294066806, iteration: 13210
loss: 1.043239951133728,grad_norm: 0.9999993843935894, iteration: 13211
loss: 1.0188062191009521,grad_norm: 0.9999991547953905, iteration: 13212
loss: 1.0243099927902222,grad_norm: 0.9999991911580142, iteration: 13213
loss: 1.0403473377227783,grad_norm: 0.9999992027476807, iteration: 13214
loss: 1.025846242904663,grad_norm: 0.9999992244112338, iteration: 13215
loss: 1.0305793285369873,grad_norm: 0.9999992098769482, iteration: 13216
loss: 1.0170795917510986,grad_norm: 0.9231105624495598, iteration: 13217
loss: 1.0243066549301147,grad_norm: 0.9999993052002618, iteration: 13218
loss: 0.9800770878791809,grad_norm: 0.9999991642757678, iteration: 13219
loss: 1.0263431072235107,grad_norm: 0.9999993291743455, iteration: 13220
loss: 0.9860367774963379,grad_norm: 0.9999995662103657, iteration: 13221
loss: 1.0662152767181396,grad_norm: 0.9999993120909249, iteration: 13222
loss: 1.0306907892227173,grad_norm: 0.9999995122328119, iteration: 13223
loss: 1.026487946510315,grad_norm: 0.9999992996930522, iteration: 13224
loss: 1.0209170579910278,grad_norm: 0.999999166770273, iteration: 13225
loss: 1.0170150995254517,grad_norm: 0.9999992221193416, iteration: 13226
loss: 0.9975030422210693,grad_norm: 0.9999993817344451, iteration: 13227
loss: 0.9658234119415283,grad_norm: 0.9999992196590065, iteration: 13228
loss: 0.999340832233429,grad_norm: 0.9999991999266311, iteration: 13229
loss: 1.0099005699157715,grad_norm: 0.9999990352190778, iteration: 13230
loss: 1.023376703262329,grad_norm: 0.999999125105702, iteration: 13231
loss: 0.9775902628898621,grad_norm: 0.9999990984413779, iteration: 13232
loss: 1.0210702419281006,grad_norm: 0.9999990516748983, iteration: 13233
loss: 1.0652174949645996,grad_norm: 0.9999994000739559, iteration: 13234
loss: 1.0104180574417114,grad_norm: 0.9999991926085156, iteration: 13235
loss: 1.0071425437927246,grad_norm: 0.9999992525324028, iteration: 13236
loss: 1.0294415950775146,grad_norm: 0.9999993830204019, iteration: 13237
loss: 1.0103974342346191,grad_norm: 0.9999993859918955, iteration: 13238
loss: 0.9844062924385071,grad_norm: 0.9999991163164457, iteration: 13239
loss: 1.0810575485229492,grad_norm: 0.9999999119945887, iteration: 13240
loss: 1.02224862575531,grad_norm: 0.9407422022048366, iteration: 13241
loss: 0.9813401699066162,grad_norm: 0.9999995036574292, iteration: 13242
loss: 1.0016436576843262,grad_norm: 0.9999993051518316, iteration: 13243
loss: 1.0777995586395264,grad_norm: 0.9999991924189643, iteration: 13244
loss: 1.0700950622558594,grad_norm: 0.9999994746708555, iteration: 13245
loss: 1.053843379020691,grad_norm: 0.9999992124213885, iteration: 13246
loss: 0.9947641491889954,grad_norm: 0.999999264436103, iteration: 13247
loss: 0.9843921065330505,grad_norm: 0.9999992232670889, iteration: 13248
loss: 0.9929884076118469,grad_norm: 0.9999992183128851, iteration: 13249
loss: 1.0216771364212036,grad_norm: 0.9999991677777916, iteration: 13250
loss: 0.9720771312713623,grad_norm: 0.9999991989813077, iteration: 13251
loss: 1.0503970384597778,grad_norm: 0.9999990803604472, iteration: 13252
loss: 1.0452115535736084,grad_norm: 0.9999993231625144, iteration: 13253
loss: 1.013130784034729,grad_norm: 0.999999235427826, iteration: 13254
loss: 1.0051567554473877,grad_norm: 0.9999991702042135, iteration: 13255
loss: 0.9903513789176941,grad_norm: 0.9999991362454529, iteration: 13256
loss: 1.0833410024642944,grad_norm: 0.9999997395544679, iteration: 13257
loss: 1.0748244524002075,grad_norm: 0.9999991733923957, iteration: 13258
loss: 0.9852417707443237,grad_norm: 0.9999992740906576, iteration: 13259
loss: 0.9993245601654053,grad_norm: 0.9999993681340186, iteration: 13260
loss: 1.029492974281311,grad_norm: 0.9999991478954237, iteration: 13261
loss: 1.0130232572555542,grad_norm: 0.9926285606048435, iteration: 13262
loss: 1.0106074810028076,grad_norm: 0.999999101377448, iteration: 13263
loss: 1.0200330018997192,grad_norm: 0.9999990372515845, iteration: 13264
loss: 0.9810532331466675,grad_norm: 0.9999992133143404, iteration: 13265
loss: 1.0016156435012817,grad_norm: 0.999999325882354, iteration: 13266
loss: 1.034857988357544,grad_norm: 0.9999990786390949, iteration: 13267
loss: 1.0398658514022827,grad_norm: 0.999999162702518, iteration: 13268
loss: 0.997283935546875,grad_norm: 0.9999990873750308, iteration: 13269
loss: 1.062208652496338,grad_norm: 0.9999991442176336, iteration: 13270
loss: 1.0143156051635742,grad_norm: 0.9999990731147396, iteration: 13271
loss: 1.0288046598434448,grad_norm: 0.9999991269000984, iteration: 13272
loss: 1.0169543027877808,grad_norm: 0.9999997513736727, iteration: 13273
loss: 1.0814831256866455,grad_norm: 0.9999996828716567, iteration: 13274
loss: 1.0217641592025757,grad_norm: 0.9999992630066362, iteration: 13275
loss: 1.0051268339157104,grad_norm: 0.9999993763319849, iteration: 13276
loss: 1.0682497024536133,grad_norm: 0.9999992337426548, iteration: 13277
loss: 1.1250852346420288,grad_norm: 0.9999998477374209, iteration: 13278
loss: 1.0356091260910034,grad_norm: 0.9999990743479511, iteration: 13279
loss: 1.0578604936599731,grad_norm: 0.9999992585054897, iteration: 13280
loss: 1.0450764894485474,grad_norm: 0.9999993364306814, iteration: 13281
loss: 1.0510798692703247,grad_norm: 0.9999997138091248, iteration: 13282
loss: 1.052081823348999,grad_norm: 0.9999993107006457, iteration: 13283
loss: 1.0481981039047241,grad_norm: 0.9999992614214156, iteration: 13284
loss: 1.0548498630523682,grad_norm: 0.9999996005585193, iteration: 13285
loss: 1.0550967454910278,grad_norm: 0.9999994879907187, iteration: 13286
loss: 1.064937710762024,grad_norm: 0.9999992803464004, iteration: 13287
loss: 1.010208010673523,grad_norm: 0.9999991733342568, iteration: 13288
loss: 1.0287666320800781,grad_norm: 0.9999996109742576, iteration: 13289
loss: 1.0895580053329468,grad_norm: 0.9999997606297991, iteration: 13290
loss: 1.021592140197754,grad_norm: 0.9999995888635095, iteration: 13291
loss: 1.085042119026184,grad_norm: 0.9999997313145405, iteration: 13292
loss: 1.012729525566101,grad_norm: 0.9999993332677184, iteration: 13293
loss: 1.0083794593811035,grad_norm: 0.9999991553452827, iteration: 13294
loss: 1.0280482769012451,grad_norm: 0.9999995811686297, iteration: 13295
loss: 1.0400785207748413,grad_norm: 0.9999991194770373, iteration: 13296
loss: 1.0125013589859009,grad_norm: 0.9999992485537585, iteration: 13297
loss: 1.0781902074813843,grad_norm: 0.9999998461115048, iteration: 13298
loss: 1.0416945219039917,grad_norm: 0.9999996953404583, iteration: 13299
loss: 0.9959026575088501,grad_norm: 0.9999992954384189, iteration: 13300
loss: 0.9969327449798584,grad_norm: 0.9999993210682337, iteration: 13301
loss: 1.0845062732696533,grad_norm: 0.9999993131090686, iteration: 13302
loss: 1.0758857727050781,grad_norm: 0.9999996748968596, iteration: 13303
loss: 1.0526591539382935,grad_norm: 0.9999992648042665, iteration: 13304
loss: 1.0432180166244507,grad_norm: 0.9999996669242156, iteration: 13305
loss: 1.0127999782562256,grad_norm: 0.9999992654901833, iteration: 13306
loss: 1.087857961654663,grad_norm: 0.9999997010252307, iteration: 13307
loss: 1.0212478637695312,grad_norm: 0.9999993063620429, iteration: 13308
loss: 1.0388273000717163,grad_norm: 0.9999993507385633, iteration: 13309
loss: 1.0695478916168213,grad_norm: 0.999999397260366, iteration: 13310
loss: 1.0534794330596924,grad_norm: 0.9999997224435141, iteration: 13311
loss: 1.0505187511444092,grad_norm: 0.9999994776074742, iteration: 13312
loss: 1.0593822002410889,grad_norm: 0.9999994939683681, iteration: 13313
loss: 0.9921707510948181,grad_norm: 0.9999993263193446, iteration: 13314
loss: 1.0192762613296509,grad_norm: 0.9999995009594017, iteration: 13315
loss: 1.0961886644363403,grad_norm: 0.9999996092543487, iteration: 13316
loss: 1.0008134841918945,grad_norm: 0.9999991002479602, iteration: 13317
loss: 1.0282866954803467,grad_norm: 0.9999993947983322, iteration: 13318
loss: 1.101207971572876,grad_norm: 0.9999994868109259, iteration: 13319
loss: 1.0901912450790405,grad_norm: 0.9999997414962704, iteration: 13320
loss: 1.0444793701171875,grad_norm: 0.9999991768373035, iteration: 13321
loss: 1.0261088609695435,grad_norm: 0.999999277127115, iteration: 13322
loss: 1.0211515426635742,grad_norm: 0.9999994354178969, iteration: 13323
loss: 1.0327730178833008,grad_norm: 0.9999992573369162, iteration: 13324
loss: 1.060365915298462,grad_norm: 0.9999993371155962, iteration: 13325
loss: 1.0214476585388184,grad_norm: 0.9999995554840055, iteration: 13326
loss: 1.0723658800125122,grad_norm: 0.999999490056291, iteration: 13327
loss: 1.04598069190979,grad_norm: 0.9999992989456282, iteration: 13328
loss: 0.9898257851600647,grad_norm: 0.9999992622503113, iteration: 13329
loss: 1.0723910331726074,grad_norm: 0.9999995059826797, iteration: 13330
loss: 1.0374072790145874,grad_norm: 0.9999993591487666, iteration: 13331
loss: 1.0481518507003784,grad_norm: 0.9999991601717896, iteration: 13332
loss: 1.0344399213790894,grad_norm: 0.9999992096405951, iteration: 13333
loss: 1.0328129529953003,grad_norm: 0.9999996080137592, iteration: 13334
loss: 1.0288944244384766,grad_norm: 0.9999991904734913, iteration: 13335
loss: 0.9910001158714294,grad_norm: 0.9081832416569973, iteration: 13336
loss: 1.0227773189544678,grad_norm: 0.9999991156865856, iteration: 13337
loss: 1.0326327085494995,grad_norm: 0.9999992697943011, iteration: 13338
loss: 1.02463960647583,grad_norm: 0.9999994160214464, iteration: 13339
loss: 1.093180537223816,grad_norm: 0.9999995506564469, iteration: 13340
loss: 1.028958797454834,grad_norm: 0.9999991238278471, iteration: 13341
loss: 1.0357165336608887,grad_norm: 0.9999990278231033, iteration: 13342
loss: 1.0416985750198364,grad_norm: 0.9999997837327512, iteration: 13343
loss: 1.0121585130691528,grad_norm: 0.9999991731987236, iteration: 13344
loss: 0.9562027454376221,grad_norm: 0.999999389129669, iteration: 13345
loss: 1.0232104063034058,grad_norm: 0.9999994082213944, iteration: 13346
loss: 1.0194859504699707,grad_norm: 0.999999019577384, iteration: 13347
loss: 1.03140127658844,grad_norm: 0.9999994094028977, iteration: 13348
loss: 0.984317421913147,grad_norm: 0.9999992460757249, iteration: 13349
loss: 1.096516728401184,grad_norm: 0.9999993265776724, iteration: 13350
loss: 1.0284249782562256,grad_norm: 0.9999991441532033, iteration: 13351
loss: 0.986398458480835,grad_norm: 0.999999262416994, iteration: 13352
loss: 1.0042054653167725,grad_norm: 0.9999990858206194, iteration: 13353
loss: 1.076495885848999,grad_norm: 0.9999991695640779, iteration: 13354
loss: 1.0707908868789673,grad_norm: 0.9999993120114662, iteration: 13355
loss: 1.024392008781433,grad_norm: 0.9999994150960767, iteration: 13356
loss: 1.0404058694839478,grad_norm: 0.9999993654571684, iteration: 13357
loss: 0.9825568199157715,grad_norm: 0.9999992168322464, iteration: 13358
loss: 1.0218268632888794,grad_norm: 0.9999992066092606, iteration: 13359
loss: 1.0515743494033813,grad_norm: 0.8692683825217666, iteration: 13360
loss: 1.0579779148101807,grad_norm: 0.9999989672287, iteration: 13361
loss: 1.0059984922409058,grad_norm: 0.9999990557874067, iteration: 13362
loss: 1.0352709293365479,grad_norm: 0.9999993003483291, iteration: 13363
loss: 1.0238285064697266,grad_norm: 0.9999991917238109, iteration: 13364
loss: 0.9987488389015198,grad_norm: 0.9999992652323811, iteration: 13365
loss: 1.034247875213623,grad_norm: 0.9999996125302787, iteration: 13366
loss: 1.0479910373687744,grad_norm: 0.9999990334166579, iteration: 13367
loss: 1.075251579284668,grad_norm: 0.9999991373432783, iteration: 13368
loss: 1.0550401210784912,grad_norm: 0.9999994284225294, iteration: 13369
loss: 1.0216641426086426,grad_norm: 0.9742362897198178, iteration: 13370
loss: 1.0563628673553467,grad_norm: 0.9999993360320132, iteration: 13371
loss: 1.0053452253341675,grad_norm: 0.9999993230681531, iteration: 13372
loss: 0.9764302968978882,grad_norm: 0.9999991330450941, iteration: 13373
loss: 1.0486769676208496,grad_norm: 0.999999534696116, iteration: 13374
loss: 1.020022988319397,grad_norm: 0.9999992207130197, iteration: 13375
loss: 1.0441123247146606,grad_norm: 0.9999990876871997, iteration: 13376
loss: 1.0053788423538208,grad_norm: 0.999999219503778, iteration: 13377
loss: 0.9739416837692261,grad_norm: 0.9999991029781887, iteration: 13378
loss: 1.0255717039108276,grad_norm: 0.9999991311640672, iteration: 13379
loss: 1.0079560279846191,grad_norm: 0.9999994486557339, iteration: 13380
loss: 1.0391077995300293,grad_norm: 0.9999992001304283, iteration: 13381
loss: 1.0099754333496094,grad_norm: 0.9999991036664938, iteration: 13382
loss: 1.0375218391418457,grad_norm: 0.9999996494931763, iteration: 13383
loss: 1.0091123580932617,grad_norm: 0.9999993847063693, iteration: 13384
loss: 1.0645703077316284,grad_norm: 0.9999993578699151, iteration: 13385
loss: 1.0344730615615845,grad_norm: 0.9999991749787532, iteration: 13386
loss: 1.0945582389831543,grad_norm: 0.9999996147923367, iteration: 13387
loss: 1.0184366703033447,grad_norm: 0.9999993330666225, iteration: 13388
loss: 1.0148285627365112,grad_norm: 0.999999268701504, iteration: 13389
loss: 1.0217623710632324,grad_norm: 0.9999992567227309, iteration: 13390
loss: 1.0089739561080933,grad_norm: 0.9999998720053618, iteration: 13391
loss: 1.0889626741409302,grad_norm: 0.9999993333215844, iteration: 13392
loss: 1.1180518865585327,grad_norm: 0.999999712775129, iteration: 13393
loss: 1.0807911157608032,grad_norm: 0.9999997934025244, iteration: 13394
loss: 1.0465455055236816,grad_norm: 0.9999992501799956, iteration: 13395
loss: 1.0521621704101562,grad_norm: 0.9999990443306218, iteration: 13396
loss: 0.9723321795463562,grad_norm: 0.9999991672716738, iteration: 13397
loss: 1.0428969860076904,grad_norm: 0.9999994521140863, iteration: 13398
loss: 1.0605143308639526,grad_norm: 0.9999991942692995, iteration: 13399
loss: 1.0728685855865479,grad_norm: 0.9999995173529209, iteration: 13400
loss: 1.0563135147094727,grad_norm: 0.9999992057186183, iteration: 13401
loss: 1.0370490550994873,grad_norm: 0.9999995015649012, iteration: 13402
loss: 1.033354640007019,grad_norm: 0.999999369501971, iteration: 13403
loss: 1.0576242208480835,grad_norm: 0.9999993441978272, iteration: 13404
loss: 1.0546303987503052,grad_norm: 0.9999995558065287, iteration: 13405
loss: 1.0160439014434814,grad_norm: 0.9999995621537956, iteration: 13406
loss: 1.0294731855392456,grad_norm: 0.9999991842621684, iteration: 13407
loss: 1.0401936769485474,grad_norm: 0.9999993094699725, iteration: 13408
loss: 1.1179423332214355,grad_norm: 0.9999995675828678, iteration: 13409
loss: 1.0289186239242554,grad_norm: 0.999999538720424, iteration: 13410
loss: 0.9907276034355164,grad_norm: 0.9999990635770483, iteration: 13411
loss: 1.0527496337890625,grad_norm: 0.9999992859215626, iteration: 13412
loss: 1.0331803560256958,grad_norm: 0.9999993833118689, iteration: 13413
loss: 1.0607714653015137,grad_norm: 0.999999203281188, iteration: 13414
loss: 1.0512609481811523,grad_norm: 0.9999994070362309, iteration: 13415
loss: 1.0102521181106567,grad_norm: 0.9999992663431977, iteration: 13416
loss: 1.0264259576797485,grad_norm: 0.9999991792618144, iteration: 13417
loss: 1.0211025476455688,grad_norm: 0.9999994400159697, iteration: 13418
loss: 1.0679115056991577,grad_norm: 0.9999997208716974, iteration: 13419
loss: 1.0901328325271606,grad_norm: 0.999999423489496, iteration: 13420
loss: 1.030285358428955,grad_norm: 0.9999993089296897, iteration: 13421
loss: 1.0402731895446777,grad_norm: 0.9999996331919838, iteration: 13422
loss: 1.0002849102020264,grad_norm: 0.9999990562697346, iteration: 13423
loss: 1.0472391843795776,grad_norm: 0.9999995234966893, iteration: 13424
loss: 1.0248996019363403,grad_norm: 0.9999995787365306, iteration: 13425
loss: 1.0095720291137695,grad_norm: 0.9999995234449407, iteration: 13426
loss: 1.0832265615463257,grad_norm: 0.9999998116060844, iteration: 13427
loss: 1.0380853414535522,grad_norm: 0.999999278500043, iteration: 13428
loss: 1.034905195236206,grad_norm: 0.9999995059643978, iteration: 13429
loss: 1.028109073638916,grad_norm: 0.9999998372404829, iteration: 13430
loss: 1.064203143119812,grad_norm: 0.9999993839709693, iteration: 13431
loss: 1.0539171695709229,grad_norm: 0.999999206433648, iteration: 13432
loss: 1.071500301361084,grad_norm: 0.9999998265003401, iteration: 13433
loss: 1.037566065788269,grad_norm: 0.9999993659172236, iteration: 13434
loss: 1.027155876159668,grad_norm: 0.9999990481007555, iteration: 13435
loss: 1.0536694526672363,grad_norm: 0.9999993123233107, iteration: 13436
loss: 1.0267534255981445,grad_norm: 0.9999990549298953, iteration: 13437
loss: 1.0496803522109985,grad_norm: 0.9999990558533155, iteration: 13438
loss: 1.019823431968689,grad_norm: 0.9999992868067245, iteration: 13439
loss: 1.0630865097045898,grad_norm: 0.9999992731776297, iteration: 13440
loss: 1.0219565629959106,grad_norm: 0.9999993280343813, iteration: 13441
loss: 1.0611090660095215,grad_norm: 0.9999998493180561, iteration: 13442
loss: 1.062927484512329,grad_norm: 0.9999999899814056, iteration: 13443
loss: 1.1086004972457886,grad_norm: 0.9999996392129018, iteration: 13444
loss: 1.071372151374817,grad_norm: 0.9999991883699991, iteration: 13445
loss: 1.0286695957183838,grad_norm: 0.9620599772792039, iteration: 13446
loss: 0.9955527782440186,grad_norm: 0.999999177345708, iteration: 13447
loss: 1.056502103805542,grad_norm: 0.9999995498296627, iteration: 13448
loss: 1.009158730506897,grad_norm: 0.999999247859395, iteration: 13449
loss: 1.040718674659729,grad_norm: 0.9999991078967433, iteration: 13450
loss: 0.9972174167633057,grad_norm: 0.9999992062613285, iteration: 13451
loss: 1.0353147983551025,grad_norm: 0.9999997940924925, iteration: 13452
loss: 0.9785935878753662,grad_norm: 0.9999993056072897, iteration: 13453
loss: 1.0280885696411133,grad_norm: 0.9999991919002003, iteration: 13454
loss: 1.0320385694503784,grad_norm: 0.9999994780783332, iteration: 13455
loss: 0.9840257167816162,grad_norm: 0.9999991127096151, iteration: 13456
loss: 1.04327392578125,grad_norm: 0.9999992511783063, iteration: 13457
loss: 1.0897018909454346,grad_norm: 0.9999998995783552, iteration: 13458
loss: 1.0679649114608765,grad_norm: 0.9999997539444322, iteration: 13459
loss: 1.0414412021636963,grad_norm: 0.9999992219883792, iteration: 13460
loss: 1.0171446800231934,grad_norm: 0.9999992807604438, iteration: 13461
loss: 1.0435880422592163,grad_norm: 0.9999993011461619, iteration: 13462
loss: 0.9831528663635254,grad_norm: 0.9999991867136192, iteration: 13463
loss: 1.0279847383499146,grad_norm: 0.9999994374273785, iteration: 13464
loss: 1.0405253171920776,grad_norm: 0.9999991719794401, iteration: 13465
loss: 1.053123950958252,grad_norm: 0.9999990082699098, iteration: 13466
loss: 1.042869210243225,grad_norm: 0.9999992405402334, iteration: 13467
loss: 1.0330146551132202,grad_norm: 0.9999991389578894, iteration: 13468
loss: 1.0403428077697754,grad_norm: 0.9999992902897782, iteration: 13469
loss: 1.0466307401657104,grad_norm: 0.99999955507155, iteration: 13470
loss: 1.0409120321273804,grad_norm: 0.999999172464429, iteration: 13471
loss: 1.1224329471588135,grad_norm: 0.999999734805721, iteration: 13472
loss: 1.0572283267974854,grad_norm: 0.9999991734217853, iteration: 13473
loss: 1.0353055000305176,grad_norm: 0.9999990155312762, iteration: 13474
loss: 1.0094000101089478,grad_norm: 0.9999991218320782, iteration: 13475
loss: 0.9871627688407898,grad_norm: 0.9999991539968018, iteration: 13476
loss: 1.0126110315322876,grad_norm: 0.9999994900526691, iteration: 13477
loss: 1.035448670387268,grad_norm: 0.9999992675113246, iteration: 13478
loss: 0.997340738773346,grad_norm: 0.9999992060209381, iteration: 13479
loss: 1.0471161603927612,grad_norm: 0.9999993457359837, iteration: 13480
loss: 1.0640592575073242,grad_norm: 0.9999992295402784, iteration: 13481
loss: 1.041247844696045,grad_norm: 0.9999991474693921, iteration: 13482
loss: 1.047590732574463,grad_norm: 0.9999990983181346, iteration: 13483
loss: 1.0363510847091675,grad_norm: 0.9999992180770239, iteration: 13484
loss: 1.0311518907546997,grad_norm: 0.9999991084245047, iteration: 13485
loss: 1.067366361618042,grad_norm: 0.9999993318076914, iteration: 13486
loss: 0.9841188788414001,grad_norm: 0.9999992268888512, iteration: 13487
loss: 1.0164825916290283,grad_norm: 0.9999991841065642, iteration: 13488
loss: 1.0397374629974365,grad_norm: 0.9999994017506356, iteration: 13489
loss: 1.0425658226013184,grad_norm: 0.999999326113901, iteration: 13490
loss: 1.0137569904327393,grad_norm: 0.9999992424375935, iteration: 13491
loss: 1.0604450702667236,grad_norm: 0.999999636909466, iteration: 13492
loss: 1.021572232246399,grad_norm: 0.9999993198812447, iteration: 13493
loss: 1.0042775869369507,grad_norm: 0.9999993556637331, iteration: 13494
loss: 1.1367212533950806,grad_norm: 0.9999997579490195, iteration: 13495
loss: 1.0536551475524902,grad_norm: 0.9999992279754902, iteration: 13496
loss: 1.0903993844985962,grad_norm: 0.9999996334050257, iteration: 13497
loss: 1.0182727575302124,grad_norm: 0.999999134137189, iteration: 13498
loss: 1.0418998003005981,grad_norm: 0.9999993128397842, iteration: 13499
loss: 1.0998027324676514,grad_norm: 0.999999734131217, iteration: 13500
loss: 1.0648411512374878,grad_norm: 0.9999998820871829, iteration: 13501
loss: 1.069008708000183,grad_norm: 0.9999996219844691, iteration: 13502
loss: 1.059396743774414,grad_norm: 0.999999473013257, iteration: 13503
loss: 0.9830449819564819,grad_norm: 0.9999993865012624, iteration: 13504
loss: 0.991480827331543,grad_norm: 0.9999993704321088, iteration: 13505
loss: 0.9989108443260193,grad_norm: 0.9999992839198704, iteration: 13506
loss: 1.0237549543380737,grad_norm: 0.9999991828007148, iteration: 13507
loss: 1.0263947248458862,grad_norm: 0.9999993695223276, iteration: 13508
loss: 1.0153709650039673,grad_norm: 0.9999993329381499, iteration: 13509
loss: 1.004885196685791,grad_norm: 0.9999994993237836, iteration: 13510
loss: 1.0598005056381226,grad_norm: 0.999999448912804, iteration: 13511
loss: 1.0391544103622437,grad_norm: 0.9999994454420387, iteration: 13512
loss: 1.0516738891601562,grad_norm: 0.999999446148047, iteration: 13513
loss: 1.0099000930786133,grad_norm: 0.9999993740717966, iteration: 13514
loss: 1.0347105264663696,grad_norm: 0.9999993996638783, iteration: 13515
loss: 0.9874381422996521,grad_norm: 0.9727408101049693, iteration: 13516
loss: 1.0394270420074463,grad_norm: 0.9999992712079426, iteration: 13517
loss: 1.0924760103225708,grad_norm: 0.9999996239069029, iteration: 13518
loss: 1.0512837171554565,grad_norm: 0.9999993956950918, iteration: 13519
loss: 1.0186339616775513,grad_norm: 0.9999993631760546, iteration: 13520
loss: 0.986028254032135,grad_norm: 0.9999991087513007, iteration: 13521
loss: 1.0223191976547241,grad_norm: 0.9999993036573445, iteration: 13522
loss: 1.0403114557266235,grad_norm: 0.9999997279713553, iteration: 13523
loss: 1.0069552659988403,grad_norm: 0.9999995152749788, iteration: 13524
loss: 1.010157585144043,grad_norm: 0.9999994688736603, iteration: 13525
loss: 1.0958373546600342,grad_norm: 0.9999996332052321, iteration: 13526
loss: 1.0623878240585327,grad_norm: 0.9999995397727419, iteration: 13527
loss: 0.9614200592041016,grad_norm: 0.9999990797153521, iteration: 13528
loss: 1.0682297945022583,grad_norm: 0.9999992238913831, iteration: 13529
loss: 1.0037150382995605,grad_norm: 0.9999995552757797, iteration: 13530
loss: 0.9547197818756104,grad_norm: 0.9999992898535051, iteration: 13531
loss: 1.0147433280944824,grad_norm: 0.9703032223572581, iteration: 13532
loss: 1.0346245765686035,grad_norm: 0.9999992205815474, iteration: 13533
loss: 1.0139976739883423,grad_norm: 0.9637033016622298, iteration: 13534
loss: 1.0373892784118652,grad_norm: 0.9999994979497155, iteration: 13535
loss: 0.9813776612281799,grad_norm: 0.9999992658193455, iteration: 13536
loss: 1.0307809114456177,grad_norm: 0.9999993030794261, iteration: 13537
loss: 1.0338822603225708,grad_norm: 0.9999990966096697, iteration: 13538
loss: 1.0455468893051147,grad_norm: 0.9999995655680938, iteration: 13539
loss: 1.024120807647705,grad_norm: 0.9999996106385591, iteration: 13540
loss: 1.084035038948059,grad_norm: 0.9999994530361689, iteration: 13541
loss: 1.010975956916809,grad_norm: 0.9999991553054595, iteration: 13542
loss: 1.023869514465332,grad_norm: 0.9999997147919624, iteration: 13543
loss: 1.0405592918395996,grad_norm: 0.9999994128372274, iteration: 13544
loss: 1.0798299312591553,grad_norm: 0.999999637890077, iteration: 13545
loss: 0.9864662289619446,grad_norm: 0.9999991038542888, iteration: 13546
loss: 0.9677625894546509,grad_norm: 0.9999991895915715, iteration: 13547
loss: 1.0207632780075073,grad_norm: 0.9999994265816692, iteration: 13548
loss: 1.055898904800415,grad_norm: 0.9999995325320357, iteration: 13549
loss: 1.004503846168518,grad_norm: 0.9999994169130432, iteration: 13550
loss: 1.0687074661254883,grad_norm: 0.9999992899202027, iteration: 13551
loss: 1.0509977340698242,grad_norm: 0.9999993205124322, iteration: 13552
loss: 1.0660309791564941,grad_norm: 0.9999993289633273, iteration: 13553
loss: 1.0614179372787476,grad_norm: 0.9999991666039663, iteration: 13554
loss: 1.0387365818023682,grad_norm: 0.9999998265558473, iteration: 13555
loss: 1.061446189880371,grad_norm: 0.9999996765768512, iteration: 13556
loss: 1.073681116104126,grad_norm: 0.9999997931943732, iteration: 13557
loss: 0.9722500443458557,grad_norm: 0.9999991350623731, iteration: 13558
loss: 1.08969247341156,grad_norm: 0.999999719912241, iteration: 13559
loss: 1.0850520133972168,grad_norm: 0.9999997018254412, iteration: 13560
loss: 1.025777816772461,grad_norm: 0.9999991661594617, iteration: 13561
loss: 1.048478603363037,grad_norm: 0.9999995211416276, iteration: 13562
loss: 1.0524885654449463,grad_norm: 0.9999994153913236, iteration: 13563
loss: 1.0303210020065308,grad_norm: 0.9999991839701764, iteration: 13564
loss: 0.9768399596214294,grad_norm: 0.9999991610709545, iteration: 13565
loss: 1.007828950881958,grad_norm: 0.9999991320901818, iteration: 13566
loss: 1.0607693195343018,grad_norm: 0.999999419330103, iteration: 13567
loss: 1.0388214588165283,grad_norm: 0.9999992926420627, iteration: 13568
loss: 1.0072176456451416,grad_norm: 0.9999993978858495, iteration: 13569
loss: 1.061352252960205,grad_norm: 0.999999831910357, iteration: 13570
loss: 1.0766156911849976,grad_norm: 0.9999995412410807, iteration: 13571
loss: 1.0783605575561523,grad_norm: 0.9999991583220184, iteration: 13572
loss: 1.0457863807678223,grad_norm: 0.9999996172163672, iteration: 13573
loss: 1.0118420124053955,grad_norm: 0.9999992003100854, iteration: 13574
loss: 0.9676522016525269,grad_norm: 0.9999992983625655, iteration: 13575
loss: 1.0173579454421997,grad_norm: 0.9999994352196243, iteration: 13576
loss: 1.0929793119430542,grad_norm: 0.9999992078728187, iteration: 13577
loss: 1.0417026281356812,grad_norm: 0.9999994103494663, iteration: 13578
loss: 1.0087782144546509,grad_norm: 0.9999992578105554, iteration: 13579
loss: 1.0307730436325073,grad_norm: 0.9999992109731131, iteration: 13580
loss: 1.0686181783676147,grad_norm: 0.9999997626441528, iteration: 13581
loss: 1.0488773584365845,grad_norm: 0.9999993127218997, iteration: 13582
loss: 1.1083050966262817,grad_norm: 0.9999994127453494, iteration: 13583
loss: 1.0144826173782349,grad_norm: 0.9999992562636184, iteration: 13584
loss: 0.9680854678153992,grad_norm: 0.9999992221944102, iteration: 13585
loss: 1.0629249811172485,grad_norm: 0.9999994905377262, iteration: 13586
loss: 1.0207111835479736,grad_norm: 0.9999996786309061, iteration: 13587
loss: 1.0015530586242676,grad_norm: 0.9999994076477489, iteration: 13588
loss: 1.0308291912078857,grad_norm: 0.999999324596475, iteration: 13589
loss: 1.0149933099746704,grad_norm: 0.9999992493920543, iteration: 13590
loss: 0.9919551014900208,grad_norm: 0.9999991330133224, iteration: 13591
loss: 1.0181291103363037,grad_norm: 0.9999996792488427, iteration: 13592
loss: 1.0177615880966187,grad_norm: 0.9999992310560053, iteration: 13593
loss: 1.0664829015731812,grad_norm: 0.999999196608324, iteration: 13594
loss: 1.0355221033096313,grad_norm: 0.9999990683711505, iteration: 13595
loss: 1.0203410387039185,grad_norm: 0.999999351858602, iteration: 13596
loss: 1.0646299123764038,grad_norm: 0.999999418088544, iteration: 13597
loss: 1.0757129192352295,grad_norm: 0.9999998142608684, iteration: 13598
loss: 1.0188815593719482,grad_norm: 0.9999993185989323, iteration: 13599
loss: 1.1077375411987305,grad_norm: 0.9999996898923433, iteration: 13600
loss: 1.0021896362304688,grad_norm: 0.9999996240373281, iteration: 13601
loss: 1.0563393831253052,grad_norm: 0.9999996150852983, iteration: 13602
loss: 1.0311279296875,grad_norm: 0.9999994518555433, iteration: 13603
loss: 1.0149697065353394,grad_norm: 0.9999991716397034, iteration: 13604
loss: 0.9937288165092468,grad_norm: 0.9999992260323923, iteration: 13605
loss: 1.0615668296813965,grad_norm: 0.9999999132109954, iteration: 13606
loss: 0.9926385283470154,grad_norm: 0.9999994395288563, iteration: 13607
loss: 1.0502182245254517,grad_norm: 0.9999994015786275, iteration: 13608
loss: 1.0319665670394897,grad_norm: 0.9999991221591192, iteration: 13609
loss: 1.0167791843414307,grad_norm: 0.9999993466506828, iteration: 13610
loss: 1.0271000862121582,grad_norm: 0.9999995133776692, iteration: 13611
loss: 1.0624829530715942,grad_norm: 0.9999994634006563, iteration: 13612
loss: 0.9973291158676147,grad_norm: 0.9999992919735967, iteration: 13613
loss: 1.0209506750106812,grad_norm: 0.9999992593559257, iteration: 13614
loss: 1.0183357000350952,grad_norm: 0.9999992689434529, iteration: 13615
loss: 1.0135865211486816,grad_norm: 0.9999991567330434, iteration: 13616
loss: 0.998992919921875,grad_norm: 0.9999996314721482, iteration: 13617
loss: 1.0476181507110596,grad_norm: 0.9999994923440259, iteration: 13618
loss: 1.0112724304199219,grad_norm: 0.9999993957202721, iteration: 13619
loss: 1.0691404342651367,grad_norm: 0.9999998280884025, iteration: 13620
loss: 1.0640958547592163,grad_norm: 0.9999994903697799, iteration: 13621
loss: 1.0196967124938965,grad_norm: 0.9999991560612758, iteration: 13622
loss: 1.0197278261184692,grad_norm: 0.9999994579251072, iteration: 13623
loss: 1.0070738792419434,grad_norm: 0.9999994791373675, iteration: 13624
loss: 1.0431854724884033,grad_norm: 0.9999993026724331, iteration: 13625
loss: 0.9988186359405518,grad_norm: 0.9999991465335863, iteration: 13626
loss: 1.0020729303359985,grad_norm: 0.999999244175394, iteration: 13627
loss: 1.0467184782028198,grad_norm: 0.9999994295461876, iteration: 13628
loss: 1.013231873512268,grad_norm: 0.9999992762242333, iteration: 13629
loss: 1.054457426071167,grad_norm: 0.999999816690706, iteration: 13630
loss: 1.0547528266906738,grad_norm: 0.9999998380927986, iteration: 13631
loss: 1.0065170526504517,grad_norm: 0.9999991357239688, iteration: 13632
loss: 1.033403992652893,grad_norm: 0.9082089187209953, iteration: 13633
loss: 1.086077094078064,grad_norm: 0.9999996496787181, iteration: 13634
loss: 1.0718356370925903,grad_norm: 0.9999994059577099, iteration: 13635
loss: 1.0832206010818481,grad_norm: 0.9999997799689184, iteration: 13636
loss: 1.0238006114959717,grad_norm: 0.9999995396616697, iteration: 13637
loss: 1.0528361797332764,grad_norm: 0.9999997595267336, iteration: 13638
loss: 0.983819842338562,grad_norm: 0.9999992727868942, iteration: 13639
loss: 1.0504647493362427,grad_norm: 0.999999071211413, iteration: 13640
loss: 1.0580317974090576,grad_norm: 0.9999994480510289, iteration: 13641
loss: 1.03989577293396,grad_norm: 0.9999991570400336, iteration: 13642
loss: 1.0139621496200562,grad_norm: 0.9999991063870022, iteration: 13643
loss: 1.012646198272705,grad_norm: 0.9999992690974473, iteration: 13644
loss: 1.0153605937957764,grad_norm: 0.999999344883663, iteration: 13645
loss: 1.1268037557601929,grad_norm: 0.9999998385114692, iteration: 13646
loss: 1.0123156309127808,grad_norm: 0.9999992506998187, iteration: 13647
loss: 1.0280033349990845,grad_norm: 0.9999998477357127, iteration: 13648
loss: 1.0392426252365112,grad_norm: 0.9999990921338807, iteration: 13649
loss: 1.0304539203643799,grad_norm: 0.9999993213177617, iteration: 13650
loss: 1.018871545791626,grad_norm: 0.9999992973267996, iteration: 13651
loss: 1.0204715728759766,grad_norm: 0.9999995474657315, iteration: 13652
loss: 1.001440405845642,grad_norm: 0.9999993103820337, iteration: 13653
loss: 1.0777559280395508,grad_norm: 0.9999998915901681, iteration: 13654
loss: 1.0517737865447998,grad_norm: 0.9999997213866018, iteration: 13655
loss: 1.0076117515563965,grad_norm: 0.9999991853016404, iteration: 13656
loss: 1.0263235569000244,grad_norm: 0.9999991577858178, iteration: 13657
loss: 1.0338162183761597,grad_norm: 0.9927036229865487, iteration: 13658
loss: 1.0502725839614868,grad_norm: 0.9999994182518629, iteration: 13659
loss: 1.031282663345337,grad_norm: 0.9999993210794295, iteration: 13660
loss: 1.194076418876648,grad_norm: 0.9999996514075772, iteration: 13661
loss: 1.0754144191741943,grad_norm: 0.9999997633393664, iteration: 13662
loss: 1.008655071258545,grad_norm: 0.9999991628853555, iteration: 13663
loss: 1.0049842596054077,grad_norm: 0.999999801428219, iteration: 13664
loss: 1.1092652082443237,grad_norm: 0.9999996699764995, iteration: 13665
loss: 1.0550074577331543,grad_norm: 0.999999434195787, iteration: 13666
loss: 0.9739477038383484,grad_norm: 0.9999996727531357, iteration: 13667
loss: 1.0782259702682495,grad_norm: 0.9999998420967485, iteration: 13668
loss: 1.0045491456985474,grad_norm: 0.9999993910737872, iteration: 13669
loss: 1.0522135496139526,grad_norm: 0.999999720787277, iteration: 13670
loss: 1.0319910049438477,grad_norm: 0.9999994297468325, iteration: 13671
loss: 1.032299518585205,grad_norm: 0.9999991080543663, iteration: 13672
loss: 0.9970917701721191,grad_norm: 0.9999989963272672, iteration: 13673
loss: 1.0224354267120361,grad_norm: 0.9999996620298328, iteration: 13674
loss: 0.9917604923248291,grad_norm: 0.9999993635737343, iteration: 13675
loss: 1.076027274131775,grad_norm: 0.9999997068614184, iteration: 13676
loss: 1.1036971807479858,grad_norm: 0.9999997153687766, iteration: 13677
loss: 1.0559550523757935,grad_norm: 0.9999992781774463, iteration: 13678
loss: 1.044207215309143,grad_norm: 0.9999996538562252, iteration: 13679
loss: 1.035189151763916,grad_norm: 0.9999991299242393, iteration: 13680
loss: 1.0676406621932983,grad_norm: 0.999999040357577, iteration: 13681
loss: 1.0042043924331665,grad_norm: 0.9999995223607314, iteration: 13682
loss: 1.039838194847107,grad_norm: 0.9999992237125134, iteration: 13683
loss: 1.1355063915252686,grad_norm: 0.9999995655707138, iteration: 13684
loss: 1.0469104051589966,grad_norm: 0.9999996331956651, iteration: 13685
loss: 1.0220417976379395,grad_norm: 0.9999996484271545, iteration: 13686
loss: 1.0236217975616455,grad_norm: 0.9999993544109367, iteration: 13687
loss: 0.9990773797035217,grad_norm: 0.9088945590903242, iteration: 13688
loss: 1.048522710800171,grad_norm: 0.9999992187093527, iteration: 13689
loss: 1.0173946619033813,grad_norm: 0.9999999607710582, iteration: 13690
loss: 1.0464426279067993,grad_norm: 0.9999990919431757, iteration: 13691
loss: 1.0327529907226562,grad_norm: 0.9642696537304328, iteration: 13692
loss: 1.0161422491073608,grad_norm: 0.9999994671162707, iteration: 13693
loss: 1.0364636182785034,grad_norm: 0.999999738732132, iteration: 13694
loss: 1.0266846418380737,grad_norm: 0.9999992949413018, iteration: 13695
loss: 1.0703414678573608,grad_norm: 0.9999996089280173, iteration: 13696
loss: 1.0118637084960938,grad_norm: 0.9999991481902853, iteration: 13697
loss: 1.1208280324935913,grad_norm: 0.9999998448564974, iteration: 13698
loss: 1.022668480873108,grad_norm: 0.9861433657073831, iteration: 13699
loss: 1.0639002323150635,grad_norm: 0.9999992754536113, iteration: 13700
loss: 1.018842339515686,grad_norm: 0.9999996852394062, iteration: 13701
loss: 0.9983630776405334,grad_norm: 0.9999995552227285, iteration: 13702
loss: 1.0332602262496948,grad_norm: 0.9999996470420813, iteration: 13703
loss: 1.0542880296707153,grad_norm: 0.9999998361630588, iteration: 13704
loss: 1.0119309425354004,grad_norm: 0.9999990236206369, iteration: 13705
loss: 1.0598065853118896,grad_norm: 0.9999997739934995, iteration: 13706
loss: 1.0223156213760376,grad_norm: 0.999999425133438, iteration: 13707
loss: 1.053850531578064,grad_norm: 0.9999992053272234, iteration: 13708
loss: 1.015617847442627,grad_norm: 0.9999991172707265, iteration: 13709
loss: 1.0248467922210693,grad_norm: 0.9999994119510944, iteration: 13710
loss: 1.007575511932373,grad_norm: 0.9999992536858736, iteration: 13711
loss: 1.0019375085830688,grad_norm: 0.9999992949899791, iteration: 13712
loss: 1.043843388557434,grad_norm: 0.9999992928155539, iteration: 13713
loss: 1.0288721323013306,grad_norm: 0.99999931542202, iteration: 13714
loss: 1.0404400825500488,grad_norm: 0.9999994069922379, iteration: 13715
loss: 1.0384668111801147,grad_norm: 0.9999994299296628, iteration: 13716
loss: 1.0372846126556396,grad_norm: 0.9999993506790434, iteration: 13717
loss: 1.0577342510223389,grad_norm: 0.9999994512622795, iteration: 13718
loss: 1.123119831085205,grad_norm: 0.9999995189405778, iteration: 13719
loss: 0.9792123436927795,grad_norm: 0.999999221924037, iteration: 13720
loss: 1.0022032260894775,grad_norm: 0.9938925634303907, iteration: 13721
loss: 0.9877926111221313,grad_norm: 0.9999991725412516, iteration: 13722
loss: 1.016467571258545,grad_norm: 0.9999995443138711, iteration: 13723
loss: 1.0231542587280273,grad_norm: 0.9999991755195641, iteration: 13724
loss: 1.0363211631774902,grad_norm: 0.999999373821511, iteration: 13725
loss: 1.0560097694396973,grad_norm: 0.9999990730421343, iteration: 13726
loss: 1.0538744926452637,grad_norm: 0.9999991370964769, iteration: 13727
loss: 1.039826512336731,grad_norm: 0.9999992965367314, iteration: 13728
loss: 1.015784740447998,grad_norm: 0.9999991427606923, iteration: 13729
loss: 1.03147554397583,grad_norm: 0.9999996233923456, iteration: 13730
loss: 1.014861822128296,grad_norm: 0.9999991955037079, iteration: 13731
loss: 1.014610767364502,grad_norm: 0.9999990267005299, iteration: 13732
loss: 1.0067040920257568,grad_norm: 0.9999992768920781, iteration: 13733
loss: 0.9929050207138062,grad_norm: 0.9999991304187742, iteration: 13734
loss: 0.9947757720947266,grad_norm: 0.9999994309972721, iteration: 13735
loss: 1.0255119800567627,grad_norm: 0.9999993083665782, iteration: 13736
loss: 1.039286494255066,grad_norm: 0.9999991421143334, iteration: 13737
loss: 0.9991088509559631,grad_norm: 0.9999990844368988, iteration: 13738
loss: 1.1191548109054565,grad_norm: 0.9999999463172009, iteration: 13739
loss: 1.0178518295288086,grad_norm: 0.9999991590476791, iteration: 13740
loss: 1.030146837234497,grad_norm: 0.999999120231315, iteration: 13741
loss: 1.0076930522918701,grad_norm: 0.9999991260197731, iteration: 13742
loss: 1.098387598991394,grad_norm: 0.9999998267831047, iteration: 13743
loss: 1.0308287143707275,grad_norm: 0.9999993371423781, iteration: 13744
loss: 1.0411821603775024,grad_norm: 0.9999991629476095, iteration: 13745
loss: 1.0383577346801758,grad_norm: 0.9999990673319643, iteration: 13746
loss: 1.0645949840545654,grad_norm: 0.9999992766971896, iteration: 13747
loss: 1.0408278703689575,grad_norm: 0.9999991485641582, iteration: 13748
loss: 0.9780418872833252,grad_norm: 0.9999990706145087, iteration: 13749
loss: 1.0483226776123047,grad_norm: 0.9999994507607883, iteration: 13750
loss: 1.0158939361572266,grad_norm: 0.9999991209377905, iteration: 13751
loss: 1.0310026407241821,grad_norm: 0.9999991433870087, iteration: 13752
loss: 1.0118643045425415,grad_norm: 0.9999992359725688, iteration: 13753
loss: 1.003209114074707,grad_norm: 0.9999995200014496, iteration: 13754
loss: 1.0198922157287598,grad_norm: 0.9999992891984774, iteration: 13755
loss: 1.0767693519592285,grad_norm: 0.9999998270453733, iteration: 13756
loss: 1.0504891872406006,grad_norm: 0.9999997656337581, iteration: 13757
loss: 0.9784590601921082,grad_norm: 0.9999991259360901, iteration: 13758
loss: 1.0328240394592285,grad_norm: 0.9999993763236764, iteration: 13759
loss: 1.0755248069763184,grad_norm: 0.999999584440868, iteration: 13760
loss: 1.04293692111969,grad_norm: 0.9999993234594631, iteration: 13761
loss: 1.0637545585632324,grad_norm: 0.9999994314933557, iteration: 13762
loss: 1.040083646774292,grad_norm: 0.999999468202106, iteration: 13763
loss: 1.027872920036316,grad_norm: 0.9999993642024595, iteration: 13764
loss: 0.9926658272743225,grad_norm: 0.999999045773799, iteration: 13765
loss: 1.0951286554336548,grad_norm: 0.9999997471086305, iteration: 13766
loss: 1.0558645725250244,grad_norm: 0.999999704237861, iteration: 13767
loss: 0.9842536449432373,grad_norm: 0.9999992552138675, iteration: 13768
loss: 1.1026026010513306,grad_norm: 0.9999996109301277, iteration: 13769
loss: 1.0073741674423218,grad_norm: 0.9999992428598993, iteration: 13770
loss: 1.0594751834869385,grad_norm: 0.9999992402167854, iteration: 13771
loss: 0.9888561367988586,grad_norm: 0.9999991779890501, iteration: 13772
loss: 1.035403847694397,grad_norm: 0.99999987825308, iteration: 13773
loss: 1.126932144165039,grad_norm: 0.9999997524998866, iteration: 13774
loss: 1.0254871845245361,grad_norm: 0.9999995341281912, iteration: 13775
loss: 1.0116666555404663,grad_norm: 0.9999999906193007, iteration: 13776
loss: 1.000598430633545,grad_norm: 0.9999991166553498, iteration: 13777
loss: 0.994874894618988,grad_norm: 0.9977134433012352, iteration: 13778
loss: 1.0257127285003662,grad_norm: 0.9999990432947105, iteration: 13779
loss: 1.0499368906021118,grad_norm: 0.9999992792606521, iteration: 13780
loss: 0.9884288907051086,grad_norm: 0.9999991845497687, iteration: 13781
loss: 1.015436053276062,grad_norm: 0.9999994191261669, iteration: 13782
loss: 1.0096501111984253,grad_norm: 0.9999993436657948, iteration: 13783
loss: 1.0237900018692017,grad_norm: 0.9999997567109156, iteration: 13784
loss: 1.0205976963043213,grad_norm: 0.9999991286444392, iteration: 13785
loss: 1.0697726011276245,grad_norm: 0.9999994345601164, iteration: 13786
loss: 1.0307142734527588,grad_norm: 0.9999994113832984, iteration: 13787
loss: 1.0581218004226685,grad_norm: 0.9999997468296618, iteration: 13788
loss: 0.9968674182891846,grad_norm: 0.9999991105465559, iteration: 13789
loss: 1.0197296142578125,grad_norm: 0.9999995009409727, iteration: 13790
loss: 1.0210411548614502,grad_norm: 0.9999993729035933, iteration: 13791
loss: 1.0278724431991577,grad_norm: 0.9999993176651072, iteration: 13792
loss: 0.9881343841552734,grad_norm: 0.9999992255558598, iteration: 13793
loss: 1.0725358724594116,grad_norm: 0.999999695637856, iteration: 13794
loss: 0.995032548904419,grad_norm: 0.9999991609424649, iteration: 13795
loss: 1.0787659883499146,grad_norm: 0.9999994124010655, iteration: 13796
loss: 1.0041828155517578,grad_norm: 0.9999995878918382, iteration: 13797
loss: 1.0080589056015015,grad_norm: 0.9999995586838845, iteration: 13798
loss: 1.0078496932983398,grad_norm: 0.9999993243551031, iteration: 13799
loss: 1.11441171169281,grad_norm: 0.9999996433780352, iteration: 13800
loss: 1.0308347940444946,grad_norm: 0.9999991524801517, iteration: 13801
loss: 1.0524168014526367,grad_norm: 0.9999992509232427, iteration: 13802
loss: 1.0633666515350342,grad_norm: 0.9999993260591357, iteration: 13803
loss: 1.0321626663208008,grad_norm: 0.9999994325451228, iteration: 13804
loss: 1.0223007202148438,grad_norm: 0.9999991797707217, iteration: 13805
loss: 1.0453481674194336,grad_norm: 0.9999993373561649, iteration: 13806
loss: 1.0388182401657104,grad_norm: 0.9999996831893816, iteration: 13807
loss: 1.0536130666732788,grad_norm: 0.9999992987056, iteration: 13808
loss: 1.051590919494629,grad_norm: 0.9999994837417083, iteration: 13809
loss: 1.0219919681549072,grad_norm: 0.9999992136073612, iteration: 13810
loss: 1.0251595973968506,grad_norm: 0.99999923633523, iteration: 13811
loss: 1.0609846115112305,grad_norm: 0.9999993127769361, iteration: 13812
loss: 0.9986525774002075,grad_norm: 0.999999660328993, iteration: 13813
loss: 0.9912948608398438,grad_norm: 0.9999993196364423, iteration: 13814
loss: 1.0864781141281128,grad_norm: 0.9999995312073109, iteration: 13815
loss: 1.0272417068481445,grad_norm: 0.9999993352075995, iteration: 13816
loss: 1.0737009048461914,grad_norm: 0.9999991729802978, iteration: 13817
loss: 1.0219135284423828,grad_norm: 0.9999994098348618, iteration: 13818
loss: 1.0009043216705322,grad_norm: 0.9999995697555238, iteration: 13819
loss: 1.0414067506790161,grad_norm: 0.9999992243830063, iteration: 13820
loss: 1.1915531158447266,grad_norm: 0.9999995911334487, iteration: 13821
loss: 1.0280357599258423,grad_norm: 0.999999567579067, iteration: 13822
loss: 1.0576226711273193,grad_norm: 0.9999995250500235, iteration: 13823
loss: 1.0581517219543457,grad_norm: 0.9999993291670584, iteration: 13824
loss: 1.044809103012085,grad_norm: 0.9999995638741624, iteration: 13825
loss: 0.9987080097198486,grad_norm: 0.9999995680438007, iteration: 13826
loss: 1.0243821144104004,grad_norm: 0.9999995659116088, iteration: 13827
loss: 1.0226528644561768,grad_norm: 0.9999992437520001, iteration: 13828
loss: 1.002150297164917,grad_norm: 0.9999992988204156, iteration: 13829
loss: 1.0233280658721924,grad_norm: 0.9999992633013463, iteration: 13830
loss: 1.0437545776367188,grad_norm: 0.9999995850407501, iteration: 13831
loss: 1.0028467178344727,grad_norm: 0.9999995272577541, iteration: 13832
loss: 1.0436304807662964,grad_norm: 0.999999573477076, iteration: 13833
loss: 1.0279780626296997,grad_norm: 0.9999998700053542, iteration: 13834
loss: 1.0223194360733032,grad_norm: 0.9999993779199481, iteration: 13835
loss: 1.0501766204833984,grad_norm: 0.9999994217071584, iteration: 13836
loss: 1.0359313488006592,grad_norm: 0.9999991233380816, iteration: 13837
loss: 1.0022293329238892,grad_norm: 0.9999993560557131, iteration: 13838
loss: 1.0325101613998413,grad_norm: 0.9999992815420493, iteration: 13839
loss: 1.0614838600158691,grad_norm: 0.9999992570629179, iteration: 13840
loss: 1.0069087743759155,grad_norm: 0.9754823769487171, iteration: 13841
loss: 1.0285881757736206,grad_norm: 0.9999992507662124, iteration: 13842
loss: 1.1161580085754395,grad_norm: 0.9999996367380547, iteration: 13843
loss: 1.075642704963684,grad_norm: 0.9999996939914775, iteration: 13844
loss: 1.0211656093597412,grad_norm: 0.9999991031160589, iteration: 13845
loss: 1.0697427988052368,grad_norm: 0.9999994523197115, iteration: 13846
loss: 1.0326757431030273,grad_norm: 0.9999992289002371, iteration: 13847
loss: 1.0268604755401611,grad_norm: 0.9999998036703605, iteration: 13848
loss: 1.0835188627243042,grad_norm: 0.999999198326958, iteration: 13849
loss: 1.0017625093460083,grad_norm: 0.9999991559905876, iteration: 13850
loss: 1.037660837173462,grad_norm: 0.999999500051516, iteration: 13851
loss: 1.0625085830688477,grad_norm: 0.999999740775489, iteration: 13852
loss: 1.0427452325820923,grad_norm: 0.999999702151955, iteration: 13853
loss: 1.0185837745666504,grad_norm: 0.999999551150505, iteration: 13854
loss: 1.0950251817703247,grad_norm: 0.9999999237763717, iteration: 13855
loss: 1.050605297088623,grad_norm: 0.9999990955176533, iteration: 13856
loss: 1.0788567066192627,grad_norm: 0.9999996354319534, iteration: 13857
loss: 1.0827581882476807,grad_norm: 0.9999994144565143, iteration: 13858
loss: 1.052722454071045,grad_norm: 0.9999994292439364, iteration: 13859
loss: 1.0258764028549194,grad_norm: 0.999999522623521, iteration: 13860
loss: 1.0209944248199463,grad_norm: 0.9999996140894384, iteration: 13861
loss: 1.042319893836975,grad_norm: 0.9999995950322849, iteration: 13862
loss: 1.0351672172546387,grad_norm: 0.9999991833509285, iteration: 13863
loss: 1.0654542446136475,grad_norm: 0.9999996380773499, iteration: 13864
loss: 1.0327321290969849,grad_norm: 0.9999994402506364, iteration: 13865
loss: 1.041202425956726,grad_norm: 0.9999993113441339, iteration: 13866
loss: 1.0121846199035645,grad_norm: 0.9999991231534068, iteration: 13867
loss: 1.0575358867645264,grad_norm: 0.9999993303600196, iteration: 13868
loss: 1.0268070697784424,grad_norm: 0.9999991827554919, iteration: 13869
loss: 1.0031119585037231,grad_norm: 0.9999992885849208, iteration: 13870
loss: 1.0155677795410156,grad_norm: 0.9999993731021626, iteration: 13871
loss: 1.0549968481063843,grad_norm: 0.9999992025365694, iteration: 13872
loss: 1.059367060661316,grad_norm: 0.9999995508730516, iteration: 13873
loss: 1.0392121076583862,grad_norm: 0.999999155981482, iteration: 13874
loss: 0.9928577542304993,grad_norm: 0.9999992232951838, iteration: 13875
loss: 1.0476890802383423,grad_norm: 0.9999990905320989, iteration: 13876
loss: 1.0596067905426025,grad_norm: 0.9999994428850065, iteration: 13877
loss: 1.03292715549469,grad_norm: 0.9545214322507729, iteration: 13878
loss: 1.0905169248580933,grad_norm: 0.9999993280946161, iteration: 13879
loss: 1.044451355934143,grad_norm: 0.999999380082789, iteration: 13880
loss: 1.051445484161377,grad_norm: 0.9999994550705883, iteration: 13881
loss: 1.0310598611831665,grad_norm: 0.999999603221728, iteration: 13882
loss: 1.0466105937957764,grad_norm: 0.9999991756860798, iteration: 13883
loss: 1.015960454940796,grad_norm: 0.9999992015955178, iteration: 13884
loss: 1.0593678951263428,grad_norm: 0.99999981710234, iteration: 13885
loss: 1.0273712873458862,grad_norm: 0.99999918475798, iteration: 13886
loss: 1.0883524417877197,grad_norm: 0.9999996361168996, iteration: 13887
loss: 1.0298982858657837,grad_norm: 0.999999530153899, iteration: 13888
loss: 1.101460576057434,grad_norm: 0.9999997445269831, iteration: 13889
loss: 0.9947001338005066,grad_norm: 0.9999991673879124, iteration: 13890
loss: 1.0268611907958984,grad_norm: 0.9999991801745672, iteration: 13891
loss: 1.0110145807266235,grad_norm: 0.9999993267058586, iteration: 13892
loss: 1.0241401195526123,grad_norm: 0.9999990386868538, iteration: 13893
loss: 0.998715877532959,grad_norm: 0.9572278773181199, iteration: 13894
loss: 1.018017292022705,grad_norm: 0.9999991394443699, iteration: 13895
loss: 1.0151078701019287,grad_norm: 0.9999990930771804, iteration: 13896
loss: 1.0083543062210083,grad_norm: 0.9999989940144866, iteration: 13897
loss: 1.1015329360961914,grad_norm: 0.9999999407224052, iteration: 13898
loss: 1.0491135120391846,grad_norm: 0.9999991483040384, iteration: 13899
loss: 1.0180702209472656,grad_norm: 0.9999989988208776, iteration: 13900
loss: 1.0621675252914429,grad_norm: 0.9999996073782448, iteration: 13901
loss: 1.0354067087173462,grad_norm: 0.9999996194440449, iteration: 13902
loss: 1.0605106353759766,grad_norm: 0.999999288773505, iteration: 13903
loss: 1.0023791790008545,grad_norm: 0.999999334379039, iteration: 13904
loss: 1.0911967754364014,grad_norm: 0.9999995390809135, iteration: 13905
loss: 0.9800896048545837,grad_norm: 0.9999995047542107, iteration: 13906
loss: 0.9986753463745117,grad_norm: 0.9999993309618908, iteration: 13907
loss: 1.1170965433120728,grad_norm: 0.9999993657967383, iteration: 13908
loss: 1.0402264595031738,grad_norm: 0.9999991145140256, iteration: 13909
loss: 1.0359992980957031,grad_norm: 0.9999998892403917, iteration: 13910
loss: 1.0847264528274536,grad_norm: 0.9999992808079158, iteration: 13911
loss: 1.0085463523864746,grad_norm: 0.9999993206386175, iteration: 13912
loss: 1.0546667575836182,grad_norm: 0.999999348510487, iteration: 13913
loss: 1.0514596700668335,grad_norm: 0.9999994901727042, iteration: 13914
loss: 1.0147430896759033,grad_norm: 0.9999994048078088, iteration: 13915
loss: 1.0147866010665894,grad_norm: 0.9999990717751791, iteration: 13916
loss: 1.0206619501113892,grad_norm: 0.9999991515713736, iteration: 13917
loss: 1.0113003253936768,grad_norm: 0.9999993318247312, iteration: 13918
loss: 1.0292342901229858,grad_norm: 0.9999996334886162, iteration: 13919
loss: 0.966781735420227,grad_norm: 0.9999990686683238, iteration: 13920
loss: 1.0306053161621094,grad_norm: 0.9999994308289051, iteration: 13921
loss: 0.9895493984222412,grad_norm: 0.9999992768806256, iteration: 13922
loss: 1.033043622970581,grad_norm: 0.999999007201986, iteration: 13923
loss: 1.008877158164978,grad_norm: 0.9999992402935232, iteration: 13924
loss: 1.0198100805282593,grad_norm: 0.9999993131583412, iteration: 13925
loss: 1.0400561094284058,grad_norm: 0.9999993504791357, iteration: 13926
loss: 1.0518344640731812,grad_norm: 0.9999992279974355, iteration: 13927
loss: 0.9980111718177795,grad_norm: 0.999999805332645, iteration: 13928
loss: 1.062614917755127,grad_norm: 0.9999995124669472, iteration: 13929
loss: 1.0663738250732422,grad_norm: 0.9999994455484371, iteration: 13930
loss: 1.034074306488037,grad_norm: 0.9999991724789595, iteration: 13931
loss: 1.001031517982483,grad_norm: 0.9999991886368732, iteration: 13932
loss: 1.0251307487487793,grad_norm: 0.9999990807256686, iteration: 13933
loss: 1.0665271282196045,grad_norm: 0.9999990164892107, iteration: 13934
loss: 1.0424884557724,grad_norm: 0.9999991485274439, iteration: 13935
loss: 1.0013656616210938,grad_norm: 0.9999993515452064, iteration: 13936
loss: 1.0383878946304321,grad_norm: 0.9999994610503236, iteration: 13937
loss: 1.0676648616790771,grad_norm: 0.9999995550355901, iteration: 13938
loss: 1.0277761220932007,grad_norm: 0.9999992066832203, iteration: 13939
loss: 1.0333189964294434,grad_norm: 0.9494139127395509, iteration: 13940
loss: 0.9988122582435608,grad_norm: 0.9999994467393929, iteration: 13941
loss: 0.9983381628990173,grad_norm: 0.9999991642184347, iteration: 13942
loss: 0.9929047226905823,grad_norm: 0.9999992525208101, iteration: 13943
loss: 1.028374195098877,grad_norm: 0.9999992289509502, iteration: 13944
loss: 1.0172076225280762,grad_norm: 0.9999990681743551, iteration: 13945
loss: 1.0216827392578125,grad_norm: 0.999999189739656, iteration: 13946
loss: 1.0098044872283936,grad_norm: 0.9999992053729359, iteration: 13947
loss: 1.0174434185028076,grad_norm: 0.9999991281720886, iteration: 13948
loss: 1.0063543319702148,grad_norm: 0.9999993660147265, iteration: 13949
loss: 1.0149189233779907,grad_norm: 0.9999999394084155, iteration: 13950
loss: 1.0412628650665283,grad_norm: 0.99999972329514, iteration: 13951
loss: 1.0404677391052246,grad_norm: 0.9999995077640973, iteration: 13952
loss: 0.996652364730835,grad_norm: 0.9999991643055228, iteration: 13953
loss: 1.0169600248336792,grad_norm: 0.9999992475892934, iteration: 13954
loss: 1.1310714483261108,grad_norm: 0.9999996412340224, iteration: 13955
loss: 1.081653356552124,grad_norm: 0.9999995893471031, iteration: 13956
loss: 1.0501807928085327,grad_norm: 0.9999993408381872, iteration: 13957
loss: 1.02729070186615,grad_norm: 0.9999994975077996, iteration: 13958
loss: 1.0574498176574707,grad_norm: 0.9999995483330275, iteration: 13959
loss: 1.098603367805481,grad_norm: 0.9999999401924108, iteration: 13960
loss: 0.9767584800720215,grad_norm: 0.9999995207670229, iteration: 13961
loss: 1.0805875062942505,grad_norm: 0.9999996427633435, iteration: 13962
loss: 1.0903143882751465,grad_norm: 0.9999997713655181, iteration: 13963
loss: 1.0171887874603271,grad_norm: 0.999999267843806, iteration: 13964
loss: 1.0438108444213867,grad_norm: 0.9999995004870088, iteration: 13965
loss: 1.0725533962249756,grad_norm: 0.9999994905308814, iteration: 13966
loss: 1.0600922107696533,grad_norm: 0.999999099162241, iteration: 13967
loss: 1.1417434215545654,grad_norm: 0.9999997797635913, iteration: 13968
loss: 1.0586038827896118,grad_norm: 0.9999991889022346, iteration: 13969
loss: 1.0239989757537842,grad_norm: 0.9999994018285109, iteration: 13970
loss: 1.0061384439468384,grad_norm: 0.9999993681897384, iteration: 13971
loss: 1.0035207271575928,grad_norm: 0.9999990771672707, iteration: 13972
loss: 1.0753241777420044,grad_norm: 0.9999997180400961, iteration: 13973
loss: 1.1551622152328491,grad_norm: 0.9999999445011093, iteration: 13974
loss: 1.0274766683578491,grad_norm: 0.9999991317020723, iteration: 13975
loss: 1.0394279956817627,grad_norm: 0.9999992653517377, iteration: 13976
loss: 1.017764687538147,grad_norm: 0.9999996646410076, iteration: 13977
loss: 1.0010383129119873,grad_norm: 0.9999991107045595, iteration: 13978
loss: 1.0262243747711182,grad_norm: 0.9999994427536092, iteration: 13979
loss: 1.082281231880188,grad_norm: 0.999999880730313, iteration: 13980
loss: 1.06195867061615,grad_norm: 0.9999993616663172, iteration: 13981
loss: 1.0953959226608276,grad_norm: 0.9999992558929909, iteration: 13982
loss: 1.0719892978668213,grad_norm: 0.9999993013832881, iteration: 13983
loss: 1.0098477602005005,grad_norm: 0.9999994987282046, iteration: 13984
loss: 0.995894193649292,grad_norm: 0.9999993328143879, iteration: 13985
loss: 1.036238670349121,grad_norm: 0.9999998220967746, iteration: 13986
loss: 0.9794996976852417,grad_norm: 0.9999992535729213, iteration: 13987
loss: 1.0689834356307983,grad_norm: 0.9999991803608536, iteration: 13988
loss: 1.031948208808899,grad_norm: 0.999999202743066, iteration: 13989
loss: 1.0677794218063354,grad_norm: 0.999999647740241, iteration: 13990
loss: 1.0540508031845093,grad_norm: 0.9999996907313937, iteration: 13991
loss: 1.0280579328536987,grad_norm: 0.9999997948622906, iteration: 13992
loss: 1.0592989921569824,grad_norm: 0.9999995184618982, iteration: 13993
loss: 1.0383933782577515,grad_norm: 0.9999993050445635, iteration: 13994
loss: 1.0635544061660767,grad_norm: 0.9999992501916691, iteration: 13995
loss: 1.0089192390441895,grad_norm: 0.9999992692747492, iteration: 13996
loss: 1.0212119817733765,grad_norm: 0.9999994652315021, iteration: 13997
loss: 1.0144809484481812,grad_norm: 0.999999188654015, iteration: 13998
loss: 1.0051583051681519,grad_norm: 0.9999991373485067, iteration: 13999
loss: 1.0177637338638306,grad_norm: 0.9999996798885439, iteration: 14000
loss: 1.031427264213562,grad_norm: 0.999999385392068, iteration: 14001
loss: 1.0019192695617676,grad_norm: 0.9999989400140258, iteration: 14002
loss: 1.0012714862823486,grad_norm: 0.9999993179884558, iteration: 14003
loss: 1.0377628803253174,grad_norm: 0.999999399808478, iteration: 14004
loss: 1.037001132965088,grad_norm: 0.9999999101108205, iteration: 14005
loss: 1.0364218950271606,grad_norm: 0.9999996468499779, iteration: 14006
loss: 1.121897578239441,grad_norm: 0.9999995465194528, iteration: 14007
loss: 1.0314936637878418,grad_norm: 0.9999991157633543, iteration: 14008
loss: 1.0651576519012451,grad_norm: 0.999999841298181, iteration: 14009
loss: 1.0769456624984741,grad_norm: 0.999999888961504, iteration: 14010
loss: 1.0589600801467896,grad_norm: 0.9999994891544872, iteration: 14011
loss: 1.0342872142791748,grad_norm: 0.9999996129823262, iteration: 14012
loss: 1.1208529472351074,grad_norm: 0.999999761821229, iteration: 14013
loss: 1.0339640378952026,grad_norm: 0.9999992591323799, iteration: 14014
loss: 1.0265452861785889,grad_norm: 0.9815293591792025, iteration: 14015
loss: 1.0675026178359985,grad_norm: 0.9999995707627641, iteration: 14016
loss: 1.076979398727417,grad_norm: 0.9999994480896987, iteration: 14017
loss: 1.007058024406433,grad_norm: 0.9999991680892543, iteration: 14018
loss: 1.007706642150879,grad_norm: 0.9999991026448499, iteration: 14019
loss: 1.0320302248001099,grad_norm: 0.9999994727531422, iteration: 14020
loss: 1.080261468887329,grad_norm: 0.9999994842482787, iteration: 14021
loss: 1.0084706544876099,grad_norm: 0.9999995275551747, iteration: 14022
loss: 1.024848461151123,grad_norm: 0.9999989532931569, iteration: 14023
loss: 1.0479437112808228,grad_norm: 0.9999996480225637, iteration: 14024
loss: 1.0045541524887085,grad_norm: 0.9999992743819427, iteration: 14025
loss: 1.0605539083480835,grad_norm: 0.999999501021911, iteration: 14026
loss: 1.0424813032150269,grad_norm: 0.9999998300567348, iteration: 14027
loss: 1.050074577331543,grad_norm: 0.9999992791281643, iteration: 14028
loss: 1.0590338706970215,grad_norm: 0.9999996871044552, iteration: 14029
loss: 1.2455676794052124,grad_norm: 0.9999999447227508, iteration: 14030
loss: 1.0627676248550415,grad_norm: 0.9999997253155025, iteration: 14031
loss: 1.0972744226455688,grad_norm: 0.9999997006702301, iteration: 14032
loss: 1.0855696201324463,grad_norm: 0.9999998094395052, iteration: 14033
loss: 1.0408371686935425,grad_norm: 0.9999994952423146, iteration: 14034
loss: 1.0297931432724,grad_norm: 0.9999991644742011, iteration: 14035
loss: 1.0603724718093872,grad_norm: 0.9999994178490589, iteration: 14036
loss: 1.0546437501907349,grad_norm: 0.9999995957474013, iteration: 14037
loss: 1.0287189483642578,grad_norm: 0.9999992553106414, iteration: 14038
loss: 1.0429315567016602,grad_norm: 0.9999993360903816, iteration: 14039
loss: 0.9504895806312561,grad_norm: 0.9999992873608989, iteration: 14040
loss: 1.1029448509216309,grad_norm: 0.9999997933161541, iteration: 14041
loss: 1.0210037231445312,grad_norm: 0.9999992133851184, iteration: 14042
loss: 1.017953872680664,grad_norm: 0.9999997339060549, iteration: 14043
loss: 0.9770408272743225,grad_norm: 0.9999991991014482, iteration: 14044
loss: 0.9736009240150452,grad_norm: 0.9999989451420572, iteration: 14045
loss: 0.9890129566192627,grad_norm: 0.9999993402561373, iteration: 14046
loss: 1.0445774793624878,grad_norm: 0.9999991964367703, iteration: 14047
loss: 0.9978241920471191,grad_norm: 0.9999991539946943, iteration: 14048
loss: 1.0375968217849731,grad_norm: 0.9999992637306703, iteration: 14049
loss: 0.9656780958175659,grad_norm: 0.9999995601402473, iteration: 14050
loss: 1.03073251247406,grad_norm: 0.9999993056223672, iteration: 14051
loss: 1.045047402381897,grad_norm: 0.9999991382074828, iteration: 14052
loss: 1.0268285274505615,grad_norm: 0.9999992279016156, iteration: 14053
loss: 0.9862457513809204,grad_norm: 0.999999151757785, iteration: 14054
loss: 0.9965289235115051,grad_norm: 0.9999995961370356, iteration: 14055
loss: 1.0941449403762817,grad_norm: 0.9999993307090725, iteration: 14056
loss: 1.0708293914794922,grad_norm: 0.999999489456096, iteration: 14057
loss: 1.0067014694213867,grad_norm: 0.9999993467868313, iteration: 14058
loss: 1.0413126945495605,grad_norm: 0.9999995089028378, iteration: 14059
loss: 0.9811176657676697,grad_norm: 0.9999993915061811, iteration: 14060
loss: 1.0593576431274414,grad_norm: 0.9999992723412894, iteration: 14061
loss: 1.010387897491455,grad_norm: 0.9999994198796729, iteration: 14062
loss: 0.9904604554176331,grad_norm: 0.9999996989782474, iteration: 14063
loss: 1.0048400163650513,grad_norm: 0.9999991724065771, iteration: 14064
loss: 1.0277436971664429,grad_norm: 0.9999992102107838, iteration: 14065
loss: 1.0689568519592285,grad_norm: 0.9999997124624146, iteration: 14066
loss: 1.0514999628067017,grad_norm: 0.9999997551371278, iteration: 14067
loss: 1.0692203044891357,grad_norm: 0.9999997696304033, iteration: 14068
loss: 1.0013031959533691,grad_norm: 0.9999993922078892, iteration: 14069
loss: 1.0186659097671509,grad_norm: 0.9999993411649934, iteration: 14070
loss: 1.0200523138046265,grad_norm: 0.99999924203971, iteration: 14071
loss: 0.9922655820846558,grad_norm: 0.9999992030937866, iteration: 14072
loss: 1.004684329032898,grad_norm: 0.9999992756516239, iteration: 14073
loss: 1.0424171686172485,grad_norm: 0.9999993570109802, iteration: 14074
loss: 1.0540200471878052,grad_norm: 0.9999996548555857, iteration: 14075
loss: 1.0730111598968506,grad_norm: 0.9999996905312449, iteration: 14076
loss: 1.0400755405426025,grad_norm: 0.9999996374190976, iteration: 14077
loss: 1.063924789428711,grad_norm: 0.9999994020978911, iteration: 14078
loss: 1.037150502204895,grad_norm: 0.9999993654132363, iteration: 14079
loss: 1.0114423036575317,grad_norm: 0.9999993687957125, iteration: 14080
loss: 1.0805269479751587,grad_norm: 0.9999992563168939, iteration: 14081
loss: 1.030927300453186,grad_norm: 0.9341715625842691, iteration: 14082
loss: 1.0371642112731934,grad_norm: 0.9999993634957378, iteration: 14083
loss: 1.0563578605651855,grad_norm: 0.9999992543061306, iteration: 14084
loss: 0.9999488592147827,grad_norm: 0.9999993866481514, iteration: 14085
loss: 1.075348973274231,grad_norm: 0.9999993061812625, iteration: 14086
loss: 1.0123258829116821,grad_norm: 0.9999992970269613, iteration: 14087
loss: 1.040064811706543,grad_norm: 0.9999995247397537, iteration: 14088
loss: 1.052732229232788,grad_norm: 0.9753435674830063, iteration: 14089
loss: 1.065893530845642,grad_norm: 0.9999996434317483, iteration: 14090
loss: 1.0262689590454102,grad_norm: 0.9999994423371124, iteration: 14091
loss: 0.98306804895401,grad_norm: 0.9999990967405322, iteration: 14092
loss: 1.2135043144226074,grad_norm: 0.9999997238404083, iteration: 14093
loss: 0.9788122773170471,grad_norm: 0.9752765650696185, iteration: 14094
loss: 1.0901143550872803,grad_norm: 0.9999994680950651, iteration: 14095
loss: 1.022544026374817,grad_norm: 0.9999995406185862, iteration: 14096
loss: 1.0892730951309204,grad_norm: 0.999999701489682, iteration: 14097
loss: 1.1369234323501587,grad_norm: 0.9999996949911129, iteration: 14098
loss: 1.001312017440796,grad_norm: 0.9884119399731489, iteration: 14099
loss: 0.9666337966918945,grad_norm: 0.9999993657677845, iteration: 14100
loss: 1.0404384136199951,grad_norm: 0.9999991542179636, iteration: 14101
loss: 1.0192838907241821,grad_norm: 0.9999994134122795, iteration: 14102
loss: 1.026671290397644,grad_norm: 0.9999996578703509, iteration: 14103
loss: 1.0723096132278442,grad_norm: 0.9999993517046577, iteration: 14104
loss: 1.0243055820465088,grad_norm: 0.9999992063488945, iteration: 14105
loss: 1.0174567699432373,grad_norm: 0.9999994266100951, iteration: 14106
loss: 1.0983864068984985,grad_norm: 0.9999998560650654, iteration: 14107
loss: 1.0566142797470093,grad_norm: 0.9999995840339609, iteration: 14108
loss: 1.075143575668335,grad_norm: 0.9999994523609458, iteration: 14109
loss: 1.0090985298156738,grad_norm: 0.9999995126147015, iteration: 14110
loss: 1.01994788646698,grad_norm: 0.9999991108266836, iteration: 14111
loss: 1.0146898031234741,grad_norm: 0.9999992287085318, iteration: 14112
loss: 1.0018863677978516,grad_norm: 0.9999992415596863, iteration: 14113
loss: 1.0818480253219604,grad_norm: 0.9999997592250545, iteration: 14114
loss: 1.052351951599121,grad_norm: 0.9999992392220111, iteration: 14115
loss: 1.0280994176864624,grad_norm: 0.9999991843501679, iteration: 14116
loss: 1.0236127376556396,grad_norm: 0.999999116627807, iteration: 14117
loss: 1.023515224456787,grad_norm: 0.9999993014947942, iteration: 14118
loss: 1.0407949686050415,grad_norm: 0.9999994153842214, iteration: 14119
loss: 1.0083528757095337,grad_norm: 0.9999992728351376, iteration: 14120
loss: 1.0249323844909668,grad_norm: 0.9999992350360752, iteration: 14121
loss: 1.0685213804244995,grad_norm: 0.9999991742962968, iteration: 14122
loss: 1.01526939868927,grad_norm: 0.9999989681054622, iteration: 14123
loss: 1.016133189201355,grad_norm: 0.9999990355041489, iteration: 14124
loss: 1.0640815496444702,grad_norm: 0.9999998128833625, iteration: 14125
loss: 0.999710202217102,grad_norm: 0.999999191655728, iteration: 14126
loss: 1.0354679822921753,grad_norm: 0.9999994927062639, iteration: 14127
loss: 1.043442726135254,grad_norm: 0.999999566094854, iteration: 14128
loss: 1.0921632051467896,grad_norm: 0.9999995198120466, iteration: 14129
loss: 0.9825126528739929,grad_norm: 0.9999991240551407, iteration: 14130
loss: 1.0319136381149292,grad_norm: 0.9999991988586737, iteration: 14131
loss: 1.032371997833252,grad_norm: 0.9999991249413657, iteration: 14132
loss: 1.0208570957183838,grad_norm: 0.9999992292911372, iteration: 14133
loss: 1.0212626457214355,grad_norm: 0.9999991119328364, iteration: 14134
loss: 0.9746974110603333,grad_norm: 0.9999992681217046, iteration: 14135
loss: 1.034317970275879,grad_norm: 0.9999994396437113, iteration: 14136
loss: 0.9882518649101257,grad_norm: 0.9999991313212037, iteration: 14137
loss: 1.0386507511138916,grad_norm: 0.9999992783774571, iteration: 14138
loss: 1.0258110761642456,grad_norm: 0.999999199166285, iteration: 14139
loss: 0.9990174174308777,grad_norm: 0.9999993904258799, iteration: 14140
loss: 1.04548978805542,grad_norm: 0.999999323891673, iteration: 14141
loss: 1.0003252029418945,grad_norm: 0.9999993481469172, iteration: 14142
loss: 1.027515172958374,grad_norm: 0.9916028871541589, iteration: 14143
loss: 1.0377063751220703,grad_norm: 0.9999995100114943, iteration: 14144
loss: 1.023462176322937,grad_norm: 0.999999077048071, iteration: 14145
loss: 0.9541845917701721,grad_norm: 0.9999991246493629, iteration: 14146
loss: 1.122231125831604,grad_norm: 0.9999995731720591, iteration: 14147
loss: 1.0219166278839111,grad_norm: 0.9999991342082776, iteration: 14148
loss: 1.1159086227416992,grad_norm: 0.9999994416801521, iteration: 14149
loss: 1.0591161251068115,grad_norm: 0.9999993071818539, iteration: 14150
loss: 0.9686865210533142,grad_norm: 0.9999992940884667, iteration: 14151
loss: 1.047580599784851,grad_norm: 0.9999994549556004, iteration: 14152
loss: 1.0702406167984009,grad_norm: 0.9999995716035922, iteration: 14153
loss: 1.0084102153778076,grad_norm: 0.9999992682299518, iteration: 14154
loss: 0.9656127095222473,grad_norm: 0.9999994236343686, iteration: 14155
loss: 1.0168952941894531,grad_norm: 0.9999993974963918, iteration: 14156
loss: 1.0172439813613892,grad_norm: 0.9999991603611992, iteration: 14157
loss: 1.075702428817749,grad_norm: 0.9999992250417404, iteration: 14158
loss: 1.0163652896881104,grad_norm: 0.9999991202884698, iteration: 14159
loss: 0.9987998604774475,grad_norm: 0.9999995936904013, iteration: 14160
loss: 1.0363202095031738,grad_norm: 0.9999994841088449, iteration: 14161
loss: 1.1059281826019287,grad_norm: 0.9999996343012564, iteration: 14162
loss: 1.0118368864059448,grad_norm: 0.9999992350398056, iteration: 14163
loss: 1.007817029953003,grad_norm: 0.9999990649198376, iteration: 14164
loss: 1.0248993635177612,grad_norm: 0.9999992102910783, iteration: 14165
loss: 1.0341553688049316,grad_norm: 0.9999993818131024, iteration: 14166
loss: 1.068452000617981,grad_norm: 0.9999994595054323, iteration: 14167
loss: 1.0404889583587646,grad_norm: 0.9999996485926734, iteration: 14168
loss: 1.0491348505020142,grad_norm: 0.9999995559322358, iteration: 14169
loss: 1.0244661569595337,grad_norm: 0.9999992607622009, iteration: 14170
loss: 1.0527105331420898,grad_norm: 0.9999993523934279, iteration: 14171
loss: 1.0062628984451294,grad_norm: 0.9999991653182525, iteration: 14172
loss: 1.0477702617645264,grad_norm: 0.9999995656028708, iteration: 14173
loss: 1.060240387916565,grad_norm: 0.9999997834697207, iteration: 14174
loss: 1.0539017915725708,grad_norm: 0.9999990926780403, iteration: 14175
loss: 1.0735520124435425,grad_norm: 0.9999993786178841, iteration: 14176
loss: 1.0186736583709717,grad_norm: 0.9999993540255371, iteration: 14177
loss: 1.062798023223877,grad_norm: 0.999999510835902, iteration: 14178
loss: 1.0117913484573364,grad_norm: 0.9999992468439839, iteration: 14179
loss: 1.0374109745025635,grad_norm: 0.9999997431308976, iteration: 14180
loss: 1.0382601022720337,grad_norm: 0.9999991820688185, iteration: 14181
loss: 1.0356029272079468,grad_norm: 0.9999993232456104, iteration: 14182
loss: 1.0751309394836426,grad_norm: 0.9999994209487704, iteration: 14183
loss: 1.0241867303848267,grad_norm: 0.9999990674361127, iteration: 14184
loss: 1.0210570096969604,grad_norm: 0.999999219739885, iteration: 14185
loss: 1.0340176820755005,grad_norm: 0.999999435187975, iteration: 14186
loss: 1.0128908157348633,grad_norm: 0.9999991366841418, iteration: 14187
loss: 0.9825718998908997,grad_norm: 0.9999990817993639, iteration: 14188
loss: 1.0389295816421509,grad_norm: 0.9999993087471772, iteration: 14189
loss: 0.9994462728500366,grad_norm: 0.9999990711149674, iteration: 14190
loss: 1.0068302154541016,grad_norm: 0.999999265311264, iteration: 14191
loss: 1.057682752609253,grad_norm: 0.9999997228013868, iteration: 14192
loss: 1.001558780670166,grad_norm: 0.9797563196904087, iteration: 14193
loss: 1.0357649326324463,grad_norm: 0.9999992088041856, iteration: 14194
loss: 1.1233165264129639,grad_norm: 0.9999997856980637, iteration: 14195
loss: 1.03244149684906,grad_norm: 0.9999992388976686, iteration: 14196
loss: 0.9548880457878113,grad_norm: 0.9999991655059282, iteration: 14197
loss: 1.028434157371521,grad_norm: 0.9999992173619866, iteration: 14198
loss: 1.0085692405700684,grad_norm: 0.999999347438199, iteration: 14199
loss: 1.037998914718628,grad_norm: 0.9428062448528977, iteration: 14200
loss: 1.0395816564559937,grad_norm: 0.9999991191716299, iteration: 14201
loss: 1.056544542312622,grad_norm: 0.999999145391027, iteration: 14202
loss: 1.0212886333465576,grad_norm: 0.9999990997648253, iteration: 14203
loss: 1.0341383218765259,grad_norm: 0.9999991035460822, iteration: 14204
loss: 1.0051194429397583,grad_norm: 0.9999999147230025, iteration: 14205
loss: 1.0794038772583008,grad_norm: 0.9999993873984548, iteration: 14206
loss: 1.0604225397109985,grad_norm: 0.9999992929743188, iteration: 14207
loss: 1.0562689304351807,grad_norm: 0.9999997391616608, iteration: 14208
loss: 1.0319029092788696,grad_norm: 0.9999991176772192, iteration: 14209
loss: 1.013001799583435,grad_norm: 0.9999991159502152, iteration: 14210
loss: 1.0848987102508545,grad_norm: 0.999999385503651, iteration: 14211
loss: 1.0233218669891357,grad_norm: 0.999999240502676, iteration: 14212
loss: 1.0043586492538452,grad_norm: 0.9999991955120725, iteration: 14213
loss: 1.0243687629699707,grad_norm: 0.9999993356163964, iteration: 14214
loss: 1.0231389999389648,grad_norm: 0.999999424393054, iteration: 14215
loss: 0.9860264658927917,grad_norm: 0.9999993790913576, iteration: 14216
loss: 0.9719521403312683,grad_norm: 0.9999991423516997, iteration: 14217
loss: 1.0164700746536255,grad_norm: 0.9999994603046246, iteration: 14218
loss: 0.99024498462677,grad_norm: 0.9999991365833178, iteration: 14219
loss: 1.074129581451416,grad_norm: 0.999999195218565, iteration: 14220
loss: 1.0558819770812988,grad_norm: 0.9999992840261636, iteration: 14221
loss: 0.9776020050048828,grad_norm: 0.9999993606658758, iteration: 14222
loss: 1.0110499858856201,grad_norm: 0.9999991099150236, iteration: 14223
loss: 1.0116252899169922,grad_norm: 0.99201445429196, iteration: 14224
loss: 1.035459280014038,grad_norm: 0.9999991689792952, iteration: 14225
loss: 1.0166668891906738,grad_norm: 0.9999991851008927, iteration: 14226
loss: 1.0536574125289917,grad_norm: 0.9999994790251535, iteration: 14227
loss: 1.056153655052185,grad_norm: 0.9999992388233966, iteration: 14228
loss: 1.0458531379699707,grad_norm: 0.9999991975699977, iteration: 14229
loss: 0.9868013262748718,grad_norm: 0.9999992327126064, iteration: 14230
loss: 0.9696598649024963,grad_norm: 0.999999152184101, iteration: 14231
loss: 0.986355721950531,grad_norm: 0.9999992120576164, iteration: 14232
loss: 1.0354505777359009,grad_norm: 0.9999992077151832, iteration: 14233
loss: 1.000751256942749,grad_norm: 0.9999992393228752, iteration: 14234
loss: 1.0398790836334229,grad_norm: 0.9999997050892057, iteration: 14235
loss: 1.0042353868484497,grad_norm: 0.9999992622475594, iteration: 14236
loss: 1.0419236421585083,grad_norm: 0.9999996493544455, iteration: 14237
loss: 1.063604712486267,grad_norm: 0.9999994526253054, iteration: 14238
loss: 1.0265177488327026,grad_norm: 0.9999991533829679, iteration: 14239
loss: 1.0440510511398315,grad_norm: 0.9999991859254216, iteration: 14240
loss: 0.9558655619621277,grad_norm: 0.9999991178650669, iteration: 14241
loss: 1.0576906204223633,grad_norm: 0.9999998429280874, iteration: 14242
loss: 1.0475414991378784,grad_norm: 0.9999992232304066, iteration: 14243
loss: 1.0680912733078003,grad_norm: 0.9999998150907615, iteration: 14244
loss: 1.0251193046569824,grad_norm: 0.9999991787313781, iteration: 14245
loss: 0.9949196577072144,grad_norm: 0.9999992549873387, iteration: 14246
loss: 1.0515185594558716,grad_norm: 0.9999992012510437, iteration: 14247
loss: 0.9890016317367554,grad_norm: 0.999999207552918, iteration: 14248
loss: 1.0313971042633057,grad_norm: 0.9999991475930101, iteration: 14249
loss: 1.0319346189498901,grad_norm: 0.999999288234498, iteration: 14250
loss: 1.0124157667160034,grad_norm: 0.9999993834648923, iteration: 14251
loss: 1.0230796337127686,grad_norm: 0.9999998448319556, iteration: 14252
loss: 1.0645626783370972,grad_norm: 0.9999995333380807, iteration: 14253
loss: 1.0661673545837402,grad_norm: 0.999999849182783, iteration: 14254
loss: 1.0418329238891602,grad_norm: 0.9999992860115576, iteration: 14255
loss: 1.0067745447158813,grad_norm: 0.9999993827809024, iteration: 14256
loss: 1.0628119707107544,grad_norm: 0.9999992212115324, iteration: 14257
loss: 1.031569004058838,grad_norm: 0.9452505805081063, iteration: 14258
loss: 1.0560804605484009,grad_norm: 0.9999993036951005, iteration: 14259
loss: 1.0442126989364624,grad_norm: 0.9999991743709934, iteration: 14260
loss: 1.0313063859939575,grad_norm: 0.9999991425451391, iteration: 14261
loss: 1.0351552963256836,grad_norm: 0.9999991501927548, iteration: 14262
loss: 1.0601757764816284,grad_norm: 0.9999993038781071, iteration: 14263
loss: 1.0179167985916138,grad_norm: 0.9999998766479511, iteration: 14264
loss: 1.0336369276046753,grad_norm: 0.9999991784143986, iteration: 14265
loss: 1.0727875232696533,grad_norm: 0.9999996166312163, iteration: 14266
loss: 1.046554446220398,grad_norm: 0.9999990758577884, iteration: 14267
loss: 1.0097365379333496,grad_norm: 0.9903072777343338, iteration: 14268
loss: 1.0365920066833496,grad_norm: 0.9999993743199571, iteration: 14269
loss: 1.04771089553833,grad_norm: 0.9948804893046049, iteration: 14270
loss: 1.006292700767517,grad_norm: 0.9999991969340849, iteration: 14271
loss: 1.046543002128601,grad_norm: 0.9999991655738306, iteration: 14272
loss: 0.980750322341919,grad_norm: 0.9999992475266101, iteration: 14273
loss: 1.0217909812927246,grad_norm: 0.9999991545882659, iteration: 14274
loss: 1.0496443510055542,grad_norm: 0.999999160498678, iteration: 14275
loss: 1.0069925785064697,grad_norm: 0.9999991193398433, iteration: 14276
loss: 1.0251765251159668,grad_norm: 0.9999992759539905, iteration: 14277
loss: 1.0358792543411255,grad_norm: 0.9999991739970181, iteration: 14278
loss: 0.9991369843482971,grad_norm: 0.9999992539748772, iteration: 14279
loss: 1.0534157752990723,grad_norm: 0.9999992150002446, iteration: 14280
loss: 1.029226541519165,grad_norm: 0.999999435797674, iteration: 14281
loss: 1.0412760972976685,grad_norm: 0.9999991528823616, iteration: 14282
loss: 1.072026252746582,grad_norm: 0.9999993951919011, iteration: 14283
loss: 1.0394567251205444,grad_norm: 0.999999220070134, iteration: 14284
loss: 1.0573190450668335,grad_norm: 0.9999997531675331, iteration: 14285
loss: 1.0800617933273315,grad_norm: 0.9999993733442578, iteration: 14286
loss: 1.0472339391708374,grad_norm: 0.9999993732756466, iteration: 14287
loss: 1.0419923067092896,grad_norm: 0.9999998892207593, iteration: 14288
loss: 1.088407278060913,grad_norm: 0.9999995704193543, iteration: 14289
loss: 1.0059149265289307,grad_norm: 0.9208961686294883, iteration: 14290
loss: 1.0251868963241577,grad_norm: 0.9999989968325297, iteration: 14291
loss: 0.9924281239509583,grad_norm: 0.9906053015850627, iteration: 14292
loss: 1.083720088005066,grad_norm: 0.999999476088764, iteration: 14293
loss: 1.0216572284698486,grad_norm: 0.9999990295376805, iteration: 14294
loss: 0.989513099193573,grad_norm: 0.9522754978321943, iteration: 14295
loss: 0.9891389012336731,grad_norm: 0.9583039107227163, iteration: 14296
loss: 1.043060302734375,grad_norm: 0.9999995811488381, iteration: 14297
loss: 1.0288779735565186,grad_norm: 0.999999358549552, iteration: 14298
loss: 1.0235956907272339,grad_norm: 0.9999991743782998, iteration: 14299
loss: 0.9970209002494812,grad_norm: 0.9999992750060769, iteration: 14300
loss: 1.0271661281585693,grad_norm: 0.9999991983229766, iteration: 14301
loss: 1.004665732383728,grad_norm: 0.9999991301244806, iteration: 14302
loss: 1.0397980213165283,grad_norm: 0.999999065010032, iteration: 14303
loss: 1.0412731170654297,grad_norm: 0.9999994814819898, iteration: 14304
loss: 1.0478250980377197,grad_norm: 0.9938120086676158, iteration: 14305
loss: 1.0614625215530396,grad_norm: 0.999999353930216, iteration: 14306
loss: 1.0541386604309082,grad_norm: 0.9999995268246901, iteration: 14307
loss: 1.0218067169189453,grad_norm: 0.9999997104690486, iteration: 14308
loss: 1.0160764455795288,grad_norm: 0.9999995594568322, iteration: 14309
loss: 1.0306966304779053,grad_norm: 0.9999998350379347, iteration: 14310
loss: 1.0543655157089233,grad_norm: 0.9999995973825635, iteration: 14311
loss: 1.0520814657211304,grad_norm: 0.9999992885343849, iteration: 14312
loss: 1.043312430381775,grad_norm: 0.99999932341706, iteration: 14313
loss: 0.967262327671051,grad_norm: 0.9999993483504045, iteration: 14314
loss: 1.0355333089828491,grad_norm: 0.9999997185002311, iteration: 14315
loss: 1.0279077291488647,grad_norm: 0.9999993322363707, iteration: 14316
loss: 1.0285431146621704,grad_norm: 0.999999715172086, iteration: 14317
loss: 1.052274465560913,grad_norm: 0.9999991896218517, iteration: 14318
loss: 0.9958233833312988,grad_norm: 0.9999992185542104, iteration: 14319
loss: 0.9757944941520691,grad_norm: 0.9412874615956894, iteration: 14320
loss: 0.995344340801239,grad_norm: 0.9999991780799641, iteration: 14321
loss: 1.069250464439392,grad_norm: 0.9999997205219194, iteration: 14322
loss: 1.0291367769241333,grad_norm: 0.9999991896059628, iteration: 14323
loss: 1.0801632404327393,grad_norm: 0.9999993696148375, iteration: 14324
loss: 1.0092666149139404,grad_norm: 0.9999992134677018, iteration: 14325
loss: 1.0291798114776611,grad_norm: 0.9999991171550653, iteration: 14326
loss: 1.0242398977279663,grad_norm: 0.9999996789255435, iteration: 14327
loss: 1.0002578496932983,grad_norm: 0.9668931524578929, iteration: 14328
loss: 1.033661127090454,grad_norm: 0.999999060592558, iteration: 14329
loss: 0.9794350862503052,grad_norm: 0.9999991371280929, iteration: 14330
loss: 1.041459560394287,grad_norm: 0.9999992272665429, iteration: 14331
loss: 1.0207372903823853,grad_norm: 0.9999994102240848, iteration: 14332
loss: 1.0763568878173828,grad_norm: 0.9999991921593505, iteration: 14333
loss: 1.0795292854309082,grad_norm: 0.9999993282287146, iteration: 14334
loss: 1.047909140586853,grad_norm: 0.999999324024387, iteration: 14335
loss: 1.0278780460357666,grad_norm: 0.99999929511671, iteration: 14336
loss: 1.0553264617919922,grad_norm: 0.9999995571365191, iteration: 14337
loss: 1.0202372074127197,grad_norm: 0.999999364371609, iteration: 14338
loss: 0.9717375636100769,grad_norm: 0.9999992799648941, iteration: 14339
loss: 1.0624483823776245,grad_norm: 0.9999994143440044, iteration: 14340
loss: 1.0234066247940063,grad_norm: 0.9999994755188364, iteration: 14341
loss: 1.0427542924880981,grad_norm: 0.9999992031671869, iteration: 14342
loss: 1.0052902698516846,grad_norm: 0.9999991824929577, iteration: 14343
loss: 1.05983304977417,grad_norm: 0.9999992880122885, iteration: 14344
loss: 1.0415905714035034,grad_norm: 0.9999997366345144, iteration: 14345
loss: 1.1167198419570923,grad_norm: 0.9999996033803207, iteration: 14346
loss: 0.9829453229904175,grad_norm: 0.99999907442682, iteration: 14347
loss: 1.0507770776748657,grad_norm: 0.9999993330740587, iteration: 14348
loss: 1.0071734189987183,grad_norm: 0.9999991577828564, iteration: 14349
loss: 1.0182855129241943,grad_norm: 0.9999992399922539, iteration: 14350
loss: 1.0294668674468994,grad_norm: 0.9999995790734206, iteration: 14351
loss: 1.0691558122634888,grad_norm: 0.9999991155723241, iteration: 14352
loss: 1.1513943672180176,grad_norm: 0.999999793537834, iteration: 14353
loss: 1.054187297821045,grad_norm: 0.9999994497588491, iteration: 14354
loss: 1.008231282234192,grad_norm: 0.9999991159525431, iteration: 14355
loss: 1.0089163780212402,grad_norm: 0.9999992215798491, iteration: 14356
loss: 1.1246140003204346,grad_norm: 0.9999994130389536, iteration: 14357
loss: 1.0671603679656982,grad_norm: 0.999999372921472, iteration: 14358
loss: 1.0275064706802368,grad_norm: 0.9999991573202622, iteration: 14359
loss: 1.034900426864624,grad_norm: 0.99999921174312, iteration: 14360
loss: 0.9946476817131042,grad_norm: 0.9999992814978861, iteration: 14361
loss: 1.0542436838150024,grad_norm: 0.9999992922994214, iteration: 14362
loss: 1.052096962928772,grad_norm: 0.9999995538423453, iteration: 14363
loss: 1.02061128616333,grad_norm: 0.9999991642889116, iteration: 14364
loss: 1.0140315294265747,grad_norm: 0.9999992273876463, iteration: 14365
loss: 1.1259775161743164,grad_norm: 0.9999996703675084, iteration: 14366
loss: 0.972148597240448,grad_norm: 0.9999992396648891, iteration: 14367
loss: 1.0158283710479736,grad_norm: 0.9999993795250801, iteration: 14368
loss: 0.9882704019546509,grad_norm: 0.9999991808584947, iteration: 14369
loss: 0.9654471278190613,grad_norm: 0.9999992747134007, iteration: 14370
loss: 0.9976472854614258,grad_norm: 0.9999991554722034, iteration: 14371
loss: 1.003259301185608,grad_norm: 0.9999995015627395, iteration: 14372
loss: 1.0565115213394165,grad_norm: 0.9999993063743956, iteration: 14373
loss: 1.0047986507415771,grad_norm: 0.9999994279566712, iteration: 14374
loss: 1.024452805519104,grad_norm: 0.999999233580297, iteration: 14375
loss: 1.037807583808899,grad_norm: 0.9999999481705064, iteration: 14376
loss: 1.0257924795150757,grad_norm: 0.9999992587005637, iteration: 14377
loss: 1.0499337911605835,grad_norm: 0.999999216852444, iteration: 14378
loss: 1.0300109386444092,grad_norm: 0.9999991705203015, iteration: 14379
loss: 0.9900595545768738,grad_norm: 0.9999991247442849, iteration: 14380
loss: 1.0177398920059204,grad_norm: 0.9999997611234748, iteration: 14381
loss: 1.0530897378921509,grad_norm: 0.999999275835863, iteration: 14382
loss: 1.092230200767517,grad_norm: 0.9999998954277519, iteration: 14383
loss: 0.9889320731163025,grad_norm: 0.9999992303027827, iteration: 14384
loss: 1.0245962142944336,grad_norm: 0.9955704612146229, iteration: 14385
loss: 1.435793161392212,grad_norm: 0.999999678891621, iteration: 14386
loss: 0.9988176226615906,grad_norm: 0.9999991353517468, iteration: 14387
loss: 1.0314364433288574,grad_norm: 0.9999990539906818, iteration: 14388
loss: 1.0398039817810059,grad_norm: 0.9999989507101734, iteration: 14389
loss: 1.050487756729126,grad_norm: 0.9999994015278502, iteration: 14390
loss: 1.0474495887756348,grad_norm: 0.9999991951209818, iteration: 14391
loss: 1.0484991073608398,grad_norm: 0.9999994170980501, iteration: 14392
loss: 1.0604922771453857,grad_norm: 0.9999991645655529, iteration: 14393
loss: 1.0094951391220093,grad_norm: 0.9999990767247355, iteration: 14394
loss: 0.9995977282524109,grad_norm: 0.9992846238184798, iteration: 14395
loss: 1.013039469718933,grad_norm: 0.999999203184572, iteration: 14396
loss: 1.0198177099227905,grad_norm: 0.9999992269634078, iteration: 14397
loss: 0.999267578125,grad_norm: 0.9999992827366293, iteration: 14398
loss: 1.0550919771194458,grad_norm: 0.9999991659764679, iteration: 14399
loss: 1.0386686325073242,grad_norm: 0.9999992372933918, iteration: 14400
loss: 1.0162380933761597,grad_norm: 0.9999991181658945, iteration: 14401
loss: 1.0298140048980713,grad_norm: 0.9999992409356838, iteration: 14402
loss: 1.127901315689087,grad_norm: 0.9999993326538344, iteration: 14403
loss: 1.0315546989440918,grad_norm: 0.9999993903914097, iteration: 14404
loss: 1.0575908422470093,grad_norm: 0.9999996719321655, iteration: 14405
loss: 0.9934329390525818,grad_norm: 0.9999991220974579, iteration: 14406
loss: 1.0171747207641602,grad_norm: 0.9999991778750421, iteration: 14407
loss: 1.043469786643982,grad_norm: 0.9999992229848447, iteration: 14408
loss: 0.9974775910377502,grad_norm: 0.9282270125011973, iteration: 14409
loss: 1.0337638854980469,grad_norm: 0.9999992612141568, iteration: 14410
loss: 1.0256931781768799,grad_norm: 0.9999996133035293, iteration: 14411
loss: 1.010025143623352,grad_norm: 0.9999991728312665, iteration: 14412
loss: 1.0489000082015991,grad_norm: 0.9999991608182484, iteration: 14413
loss: 1.0273908376693726,grad_norm: 0.9999993508731599, iteration: 14414
loss: 1.0200387239456177,grad_norm: 0.9999992303217236, iteration: 14415
loss: 1.0266109704971313,grad_norm: 0.9999992702936905, iteration: 14416
loss: 1.0460389852523804,grad_norm: 0.9999991383707113, iteration: 14417
loss: 1.0202181339263916,grad_norm: 0.9999992058041292, iteration: 14418
loss: 1.0423355102539062,grad_norm: 0.9999992311187544, iteration: 14419
loss: 1.0171011686325073,grad_norm: 0.9999991898314998, iteration: 14420
loss: 0.9994205832481384,grad_norm: 0.9999992331561172, iteration: 14421
loss: 1.0249912738800049,grad_norm: 0.9999990527566077, iteration: 14422
loss: 1.0485919713974,grad_norm: 0.9999994742224976, iteration: 14423
loss: 1.0252294540405273,grad_norm: 0.9999993577098986, iteration: 14424
loss: 0.9920641183853149,grad_norm: 0.9999992655859354, iteration: 14425
loss: 1.017533302307129,grad_norm: 0.9999991266474506, iteration: 14426
loss: 1.0270553827285767,grad_norm: 0.999999272160222, iteration: 14427
loss: 1.020161747932434,grad_norm: 0.9999990688483887, iteration: 14428
loss: 1.023715615272522,grad_norm: 0.9999993636356806, iteration: 14429
loss: 1.0419714450836182,grad_norm: 0.9999990655963056, iteration: 14430
loss: 1.0589569807052612,grad_norm: 0.9999991175593009, iteration: 14431
loss: 1.0204854011535645,grad_norm: 0.9999990828436927, iteration: 14432
loss: 1.0010071992874146,grad_norm: 0.9999991658513376, iteration: 14433
loss: 1.0497477054595947,grad_norm: 0.9999994723746142, iteration: 14434
loss: 1.0462418794631958,grad_norm: 0.9999990241633548, iteration: 14435
loss: 1.059931755065918,grad_norm: 0.9999997806487365, iteration: 14436
loss: 1.0639803409576416,grad_norm: 0.9999996920886588, iteration: 14437
loss: 1.0164674520492554,grad_norm: 0.9999991811537527, iteration: 14438
loss: 1.0523319244384766,grad_norm: 0.9999997122059574, iteration: 14439
loss: 0.9986704587936401,grad_norm: 0.9999991399373181, iteration: 14440
loss: 1.0020583868026733,grad_norm: 0.9999991539442044, iteration: 14441
loss: 1.0144507884979248,grad_norm: 0.9999991290778498, iteration: 14442
loss: 1.0191375017166138,grad_norm: 0.9999990572151789, iteration: 14443
loss: 1.0429925918579102,grad_norm: 0.9999992475515072, iteration: 14444
loss: 1.0196783542633057,grad_norm: 0.9999992089688732, iteration: 14445
loss: 0.9698417782783508,grad_norm: 0.9999991271570609, iteration: 14446
loss: 1.031714916229248,grad_norm: 0.9999991304187764, iteration: 14447
loss: 1.048250675201416,grad_norm: 0.9765496669309787, iteration: 14448
loss: 1.001276969909668,grad_norm: 0.9999996508535551, iteration: 14449
loss: 0.9521756172180176,grad_norm: 0.9999991199150857, iteration: 14450
loss: 0.987101674079895,grad_norm: 0.9999993662487938, iteration: 14451
loss: 0.9974319338798523,grad_norm: 0.9576976161177527, iteration: 14452
loss: 1.038540005683899,grad_norm: 0.9999996004690663, iteration: 14453
loss: 1.0747246742248535,grad_norm: 0.9999992968761995, iteration: 14454
loss: 1.008188247680664,grad_norm: 0.9470578624720385, iteration: 14455
loss: 0.9868074059486389,grad_norm: 0.9999996564619797, iteration: 14456
loss: 1.0127595663070679,grad_norm: 0.9999992180956618, iteration: 14457
loss: 1.0046995878219604,grad_norm: 0.9999992996984548, iteration: 14458
loss: 1.0562701225280762,grad_norm: 0.9999995993741287, iteration: 14459
loss: 1.0003786087036133,grad_norm: 0.9999990485150223, iteration: 14460
loss: 1.057033658027649,grad_norm: 0.9999995070372272, iteration: 14461
loss: 1.0452159643173218,grad_norm: 0.9999990652420275, iteration: 14462
loss: 0.9944178462028503,grad_norm: 0.999999224025995, iteration: 14463
loss: 1.0298190116882324,grad_norm: 0.9999997583224289, iteration: 14464
loss: 1.0188246965408325,grad_norm: 0.9999992771367563, iteration: 14465
loss: 1.0052753686904907,grad_norm: 0.9999993247299037, iteration: 14466
loss: 1.0739837884902954,grad_norm: 0.9999992568765409, iteration: 14467
loss: 0.9845790863037109,grad_norm: 0.9999991616920381, iteration: 14468
loss: 1.0372368097305298,grad_norm: 0.9999991087451447, iteration: 14469
loss: 0.981971263885498,grad_norm: 0.999999287636198, iteration: 14470
loss: 1.050532341003418,grad_norm: 0.9999995366201404, iteration: 14471
loss: 1.0161192417144775,grad_norm: 0.9999991445315458, iteration: 14472
loss: 1.0265264511108398,grad_norm: 0.9999994658248772, iteration: 14473
loss: 1.0329960584640503,grad_norm: 0.9999991922581482, iteration: 14474
loss: 1.0533629655838013,grad_norm: 0.9999991200152584, iteration: 14475
loss: 1.0424495935440063,grad_norm: 0.9999992728408712, iteration: 14476
loss: 0.9998223781585693,grad_norm: 0.9999990909685813, iteration: 14477
loss: 1.012068510055542,grad_norm: 0.9999992780038498, iteration: 14478
loss: 1.0797137022018433,grad_norm: 0.9999993317755956, iteration: 14479
loss: 1.0369423627853394,grad_norm: 0.9999992523792275, iteration: 14480
loss: 1.0467679500579834,grad_norm: 0.9999991155245594, iteration: 14481
loss: 1.0411604642868042,grad_norm: 0.999999220812463, iteration: 14482
loss: 1.0325560569763184,grad_norm: 0.9999991238136134, iteration: 14483
loss: 1.0032838582992554,grad_norm: 0.9999993099439892, iteration: 14484
loss: 1.0326977968215942,grad_norm: 0.9999990503268754, iteration: 14485
loss: 0.9442285299301147,grad_norm: 0.9999991344024974, iteration: 14486
loss: 1.0517678260803223,grad_norm: 0.9999990647449378, iteration: 14487
loss: 1.0395630598068237,grad_norm: 0.9999994180703429, iteration: 14488
loss: 1.0169248580932617,grad_norm: 0.9999990763356127, iteration: 14489
loss: 1.020821452140808,grad_norm: 0.9999993655039734, iteration: 14490
loss: 1.0892565250396729,grad_norm: 0.9999996033349712, iteration: 14491
loss: 1.0560697317123413,grad_norm: 0.9999996622576823, iteration: 14492
loss: 1.003471851348877,grad_norm: 0.9999991765855042, iteration: 14493
loss: 1.0230567455291748,grad_norm: 0.9999992932463517, iteration: 14494
loss: 1.070122241973877,grad_norm: 0.9999991752525967, iteration: 14495
loss: 1.0349156856536865,grad_norm: 0.9999998466322273, iteration: 14496
loss: 1.0287100076675415,grad_norm: 0.9999992880161034, iteration: 14497
loss: 1.048951268196106,grad_norm: 0.9999990916965031, iteration: 14498
loss: 1.0271267890930176,grad_norm: 0.9999991477615557, iteration: 14499
loss: 1.0381474494934082,grad_norm: 0.9999992505720499, iteration: 14500
loss: 0.9709737300872803,grad_norm: 0.9999991916487997, iteration: 14501
loss: 1.0793390274047852,grad_norm: 0.999999409486478, iteration: 14502
loss: 0.9973649978637695,grad_norm: 0.9999992833876669, iteration: 14503
loss: 1.0078707933425903,grad_norm: 0.9999991305718112, iteration: 14504
loss: 1.034602165222168,grad_norm: 0.9999995497480446, iteration: 14505
loss: 1.0674829483032227,grad_norm: 0.999999867433065, iteration: 14506
loss: 1.0129555463790894,grad_norm: 0.9999992772682512, iteration: 14507
loss: 1.002273678779602,grad_norm: 0.9999990726225003, iteration: 14508
loss: 1.0255111455917358,grad_norm: 0.9999994286846107, iteration: 14509
loss: 0.9983857870101929,grad_norm: 0.9999994293447693, iteration: 14510
loss: 1.0451741218566895,grad_norm: 0.9999992386624863, iteration: 14511
loss: 1.001278281211853,grad_norm: 0.9999995452461625, iteration: 14512
loss: 0.9996340274810791,grad_norm: 0.9999992183047773, iteration: 14513
loss: 1.0363980531692505,grad_norm: 0.999999381482572, iteration: 14514
loss: 1.0654606819152832,grad_norm: 0.9999998266578244, iteration: 14515
loss: 1.0427026748657227,grad_norm: 0.9999993766237428, iteration: 14516
loss: 1.0218044519424438,grad_norm: 0.9999991638768453, iteration: 14517
loss: 0.9650136232376099,grad_norm: 0.9999993072418008, iteration: 14518
loss: 1.0070894956588745,grad_norm: 0.9602259825143148, iteration: 14519
loss: 1.0723984241485596,grad_norm: 0.9999998844936253, iteration: 14520
loss: 1.0500330924987793,grad_norm: 0.9999996230439107, iteration: 14521
loss: 1.0601564645767212,grad_norm: 0.9999994989239444, iteration: 14522
loss: 0.9915100336074829,grad_norm: 0.9999993835792399, iteration: 14523
loss: 1.0592228174209595,grad_norm: 0.9999993287180818, iteration: 14524
loss: 0.9983479976654053,grad_norm: 0.9999992559439929, iteration: 14525
loss: 1.0058579444885254,grad_norm: 0.999999153598637, iteration: 14526
loss: 1.0260374546051025,grad_norm: 0.9999991694761511, iteration: 14527
loss: 0.9958096742630005,grad_norm: 0.9999997126267302, iteration: 14528
loss: 1.0616374015808105,grad_norm: 0.9999993415038378, iteration: 14529
loss: 1.1604148149490356,grad_norm: 0.9999998595113814, iteration: 14530
loss: 1.0706806182861328,grad_norm: 0.9957483959052474, iteration: 14531
loss: 1.043341040611267,grad_norm: 0.999999196144806, iteration: 14532
loss: 1.0081701278686523,grad_norm: 0.9999992480956507, iteration: 14533
loss: 1.0139816999435425,grad_norm: 0.9999991019112652, iteration: 14534
loss: 1.0345654487609863,grad_norm: 0.9316225154194442, iteration: 14535
loss: 0.981696605682373,grad_norm: 0.9999993602497337, iteration: 14536
loss: 1.0284186601638794,grad_norm: 0.9999993298473072, iteration: 14537
loss: 0.9818587899208069,grad_norm: 0.9999994691634536, iteration: 14538
loss: 1.0062628984451294,grad_norm: 0.9999993560868154, iteration: 14539
loss: 1.1087234020233154,grad_norm: 0.9999995741015474, iteration: 14540
loss: 1.0551702976226807,grad_norm: 0.9999993671359786, iteration: 14541
loss: 1.0104601383209229,grad_norm: 0.9999990220650545, iteration: 14542
loss: 1.0052262544631958,grad_norm: 0.9999991941460662, iteration: 14543
loss: 1.0094836950302124,grad_norm: 0.9999990955571574, iteration: 14544
loss: 1.0284909009933472,grad_norm: 0.9999991203196771, iteration: 14545
loss: 1.0119632482528687,grad_norm: 0.9999991711666725, iteration: 14546
loss: 0.9748196601867676,grad_norm: 0.9999994963730635, iteration: 14547
loss: 0.9873538017272949,grad_norm: 0.9999992019116826, iteration: 14548
loss: 1.0400347709655762,grad_norm: 0.9999992377696836, iteration: 14549
loss: 1.0204849243164062,grad_norm: 0.9999991243700876, iteration: 14550
loss: 1.0603851079940796,grad_norm: 0.9999991103980758, iteration: 14551
loss: 1.0390043258666992,grad_norm: 0.9999995332606822, iteration: 14552
loss: 0.9903538823127747,grad_norm: 0.9999991183942691, iteration: 14553
loss: 1.0257165431976318,grad_norm: 0.9999995951056823, iteration: 14554
loss: 0.9780198931694031,grad_norm: 0.9989473878844614, iteration: 14555
loss: 1.0445733070373535,grad_norm: 0.9999992609628471, iteration: 14556
loss: 0.9880573153495789,grad_norm: 0.9999990838966718, iteration: 14557
loss: 1.050367832183838,grad_norm: 0.9999989871089673, iteration: 14558
loss: 1.050430417060852,grad_norm: 0.9999991723681874, iteration: 14559
loss: 0.9807428121566772,grad_norm: 0.926726197289343, iteration: 14560
loss: 1.0678631067276,grad_norm: 0.9999995449361819, iteration: 14561
loss: 1.02920401096344,grad_norm: 0.9999996160369956, iteration: 14562
loss: 0.9889881610870361,grad_norm: 0.99999911698818, iteration: 14563
loss: 1.0551154613494873,grad_norm: 0.9999992535339799, iteration: 14564
loss: 1.0086374282836914,grad_norm: 0.9999992642492235, iteration: 14565
loss: 1.0314664840698242,grad_norm: 0.9876245347696254, iteration: 14566
loss: 0.9797344207763672,grad_norm: 0.9999992070937401, iteration: 14567
loss: 1.0407085418701172,grad_norm: 0.9999993397781556, iteration: 14568
loss: 1.0392361879348755,grad_norm: 0.9999995691264498, iteration: 14569
loss: 1.0230518579483032,grad_norm: 0.9999992329553112, iteration: 14570
loss: 1.0386321544647217,grad_norm: 0.9999991134129802, iteration: 14571
loss: 1.0111665725708008,grad_norm: 0.9999991330060501, iteration: 14572
loss: 1.034523367881775,grad_norm: 0.9999991519712007, iteration: 14573
loss: 1.0118119716644287,grad_norm: 0.9999992941181588, iteration: 14574
loss: 1.0641542673110962,grad_norm: 0.999999135894245, iteration: 14575
loss: 1.0103981494903564,grad_norm: 0.9999991087977737, iteration: 14576
loss: 1.059433102607727,grad_norm: 0.9999992908136219, iteration: 14577
loss: 1.0544134378433228,grad_norm: 0.9999990663864937, iteration: 14578
loss: 1.0533734560012817,grad_norm: 0.9999998713954934, iteration: 14579
loss: 0.9975697994232178,grad_norm: 0.9999992886185947, iteration: 14580
loss: 1.0577301979064941,grad_norm: 0.99999928650243, iteration: 14581
loss: 1.004024863243103,grad_norm: 0.9999992219650011, iteration: 14582
loss: 1.034098744392395,grad_norm: 0.9999991054501666, iteration: 14583
loss: 1.0530506372451782,grad_norm: 0.9999990472094291, iteration: 14584
loss: 1.074974536895752,grad_norm: 0.999999596280832, iteration: 14585
loss: 1.04468834400177,grad_norm: 0.9999992075073194, iteration: 14586
loss: 1.0119999647140503,grad_norm: 0.9999992747367489, iteration: 14587
loss: 0.9995622038841248,grad_norm: 0.999999075563703, iteration: 14588
loss: 1.0343745946884155,grad_norm: 0.999999666766554, iteration: 14589
loss: 1.0577905178070068,grad_norm: 0.9999992852248494, iteration: 14590
loss: 1.037479043006897,grad_norm: 0.9999992239907883, iteration: 14591
loss: 1.009475827217102,grad_norm: 0.9694661685468248, iteration: 14592
loss: 1.0932596921920776,grad_norm: 0.9999995452268308, iteration: 14593
loss: 1.0285027027130127,grad_norm: 0.9999991157216057, iteration: 14594
loss: 0.9944191575050354,grad_norm: 0.9999990821880571, iteration: 14595
loss: 1.0531911849975586,grad_norm: 0.9999996031816127, iteration: 14596
loss: 1.019453525543213,grad_norm: 0.9999992526459751, iteration: 14597
loss: 1.011949062347412,grad_norm: 0.9999993869734563, iteration: 14598
loss: 1.054447054862976,grad_norm: 0.9999996301915095, iteration: 14599
loss: 1.0320219993591309,grad_norm: 0.9999996332667254, iteration: 14600
loss: 1.0433374643325806,grad_norm: 0.9999998603766889, iteration: 14601
loss: 1.0161691904067993,grad_norm: 0.9999992772051576, iteration: 14602
loss: 1.0999003648757935,grad_norm: 0.9999993219193759, iteration: 14603
loss: 1.0055235624313354,grad_norm: 0.9999992012231677, iteration: 14604
loss: 0.9984012246131897,grad_norm: 0.999999239591082, iteration: 14605
loss: 1.0532149076461792,grad_norm: 0.9999993657448903, iteration: 14606
loss: 0.9762003421783447,grad_norm: 0.999999263438846, iteration: 14607
loss: 1.0181686878204346,grad_norm: 0.9999993091657918, iteration: 14608
loss: 1.0378586053848267,grad_norm: 0.999999326499771, iteration: 14609
loss: 1.0014231204986572,grad_norm: 0.9999997043276108, iteration: 14610
loss: 1.0586291551589966,grad_norm: 0.9999990791101078, iteration: 14611
loss: 1.0446676015853882,grad_norm: 0.9999991900113832, iteration: 14612
loss: 1.0097579956054688,grad_norm: 0.999999293243201, iteration: 14613
loss: 1.0423227548599243,grad_norm: 0.9999991866322139, iteration: 14614
loss: 1.0184776782989502,grad_norm: 0.9999992268297669, iteration: 14615
loss: 1.0374897718429565,grad_norm: 0.9999995822297246, iteration: 14616
loss: 0.9912054538726807,grad_norm: 0.9999992320154782, iteration: 14617
loss: 0.9885295033454895,grad_norm: 0.9999995379404982, iteration: 14618
loss: 1.0060465335845947,grad_norm: 0.9999989665284359, iteration: 14619
loss: 1.0614320039749146,grad_norm: 0.9999995121961847, iteration: 14620
loss: 1.0453616380691528,grad_norm: 0.9999992539460422, iteration: 14621
loss: 1.0988545417785645,grad_norm: 0.9999997931565506, iteration: 14622
loss: 1.0385791063308716,grad_norm: 0.9999992631256821, iteration: 14623
loss: 0.989345371723175,grad_norm: 0.9999995867455828, iteration: 14624
loss: 1.0467873811721802,grad_norm: 0.9999992958293565, iteration: 14625
loss: 1.045135498046875,grad_norm: 0.9999992899324985, iteration: 14626
loss: 1.029698371887207,grad_norm: 0.8971835708286346, iteration: 14627
loss: 1.0271086692810059,grad_norm: 0.9999991346661314, iteration: 14628
loss: 1.0226905345916748,grad_norm: 0.9999991564295935, iteration: 14629
loss: 1.0510884523391724,grad_norm: 0.9999996332588963, iteration: 14630
loss: 1.0515832901000977,grad_norm: 0.9999995583653799, iteration: 14631
loss: 1.0455729961395264,grad_norm: 0.999999123747831, iteration: 14632
loss: 1.020662784576416,grad_norm: 0.9999993737098445, iteration: 14633
loss: 1.1013127565383911,grad_norm: 0.9999997231521789, iteration: 14634
loss: 1.0096317529678345,grad_norm: 0.9999991195251574, iteration: 14635
loss: 1.0367721319198608,grad_norm: 0.9999990382580238, iteration: 14636
loss: 1.0284873247146606,grad_norm: 0.9999992073751346, iteration: 14637
loss: 1.0699241161346436,grad_norm: 0.9999990733167988, iteration: 14638
loss: 1.0098938941955566,grad_norm: 0.9999998220514893, iteration: 14639
loss: 1.0558102130889893,grad_norm: 0.9999994714369634, iteration: 14640
loss: 1.0378966331481934,grad_norm: 0.9999991396538627, iteration: 14641
loss: 1.0190318822860718,grad_norm: 0.9150151734349079, iteration: 14642
loss: 1.0013190507888794,grad_norm: 0.9999990233595069, iteration: 14643
loss: 1.0527832508087158,grad_norm: 0.9999991960769956, iteration: 14644
loss: 1.0780283212661743,grad_norm: 0.9999998029456472, iteration: 14645
loss: 1.075341820716858,grad_norm: 0.9999993865736005, iteration: 14646
loss: 1.0427892208099365,grad_norm: 0.9999994198309283, iteration: 14647
loss: 1.0935587882995605,grad_norm: 0.9999998322714986, iteration: 14648
loss: 1.085146427154541,grad_norm: 0.9999994478454507, iteration: 14649
loss: 1.054313063621521,grad_norm: 0.9999990479009422, iteration: 14650
loss: 1.0517770051956177,grad_norm: 0.9999997028838234, iteration: 14651
loss: 1.0406633615493774,grad_norm: 0.9999995323808475, iteration: 14652
loss: 1.0220491886138916,grad_norm: 0.9999991197654297, iteration: 14653
loss: 1.006679892539978,grad_norm: 0.9999998099712153, iteration: 14654
loss: 1.0177651643753052,grad_norm: 0.9999994892549403, iteration: 14655
loss: 1.008212685585022,grad_norm: 0.9999992252184928, iteration: 14656
loss: 1.0328197479248047,grad_norm: 0.9999997289480445, iteration: 14657
loss: 1.0022549629211426,grad_norm: 0.9999997274390897, iteration: 14658
loss: 1.0586518049240112,grad_norm: 0.9999995854389494, iteration: 14659
loss: 0.9966752529144287,grad_norm: 0.9999990826749606, iteration: 14660
loss: 1.022750973701477,grad_norm: 0.999999217788746, iteration: 14661
loss: 1.049975872039795,grad_norm: 0.9999993355784317, iteration: 14662
loss: 0.997416079044342,grad_norm: 0.9999993743115408, iteration: 14663
loss: 1.0043256282806396,grad_norm: 0.999999610249064, iteration: 14664
loss: 1.0930901765823364,grad_norm: 0.9999991720458525, iteration: 14665
loss: 1.0646979808807373,grad_norm: 0.9999992323817739, iteration: 14666
loss: 0.9957306981086731,grad_norm: 0.9999994379343647, iteration: 14667
loss: 1.0208736658096313,grad_norm: 0.9999995239514053, iteration: 14668
loss: 1.053406834602356,grad_norm: 0.9999991610319532, iteration: 14669
loss: 1.0109730958938599,grad_norm: 0.9999994217630023, iteration: 14670
loss: 1.0118924379348755,grad_norm: 0.9999990132661213, iteration: 14671
loss: 1.046784520149231,grad_norm: 0.9999991078706689, iteration: 14672
loss: 1.017013669013977,grad_norm: 0.9999996712080972, iteration: 14673
loss: 0.984281599521637,grad_norm: 0.9999990861234173, iteration: 14674
loss: 1.017224907875061,grad_norm: 0.9999991494823706, iteration: 14675
loss: 1.0362584590911865,grad_norm: 0.999999285482939, iteration: 14676
loss: 1.056962490081787,grad_norm: 0.9999994387029053, iteration: 14677
loss: 1.079592227935791,grad_norm: 0.9999992724632721, iteration: 14678
loss: 1.0387256145477295,grad_norm: 0.999999126586046, iteration: 14679
loss: 1.0662509202957153,grad_norm: 0.9999992691124935, iteration: 14680
loss: 1.0336768627166748,grad_norm: 0.9999991905130802, iteration: 14681
loss: 1.0776119232177734,grad_norm: 0.9999996846961231, iteration: 14682
loss: 0.9708442091941833,grad_norm: 0.9999995332492015, iteration: 14683
loss: 1.0143613815307617,grad_norm: 0.922322597613165, iteration: 14684
loss: 1.05996572971344,grad_norm: 0.9999992910210397, iteration: 14685
loss: 1.0236409902572632,grad_norm: 0.999999361238385, iteration: 14686
loss: 1.0268570184707642,grad_norm: 0.9999992043069055, iteration: 14687
loss: 0.9845351576805115,grad_norm: 0.9999990778750245, iteration: 14688
loss: 1.0290380716323853,grad_norm: 0.9999990501613287, iteration: 14689
loss: 1.0199894905090332,grad_norm: 0.9999993409488769, iteration: 14690
loss: 1.0402032136917114,grad_norm: 0.9999991887309395, iteration: 14691
loss: 1.0368365049362183,grad_norm: 0.9964082162045165, iteration: 14692
loss: 1.012390375137329,grad_norm: 0.9999991921389354, iteration: 14693
loss: 1.0404293537139893,grad_norm: 0.9999993765089489, iteration: 14694
loss: 1.045475721359253,grad_norm: 0.9999992507456411, iteration: 14695
loss: 1.03154718875885,grad_norm: 0.9999991574842436, iteration: 14696
loss: 1.024808406829834,grad_norm: 0.9999997656513612, iteration: 14697
loss: 1.0385806560516357,grad_norm: 0.9999992440099938, iteration: 14698
loss: 0.9753068089485168,grad_norm: 0.9999991606917306, iteration: 14699
loss: 1.0513428449630737,grad_norm: 0.9999992046786629, iteration: 14700
loss: 1.0310508012771606,grad_norm: 0.9999994987641059, iteration: 14701
loss: 1.0450745820999146,grad_norm: 0.9999992184300852, iteration: 14702
loss: 1.0562633275985718,grad_norm: 0.9999989785526094, iteration: 14703
loss: 0.9932547211647034,grad_norm: 0.9999993616460021, iteration: 14704
loss: 1.0485365390777588,grad_norm: 0.9999992942969014, iteration: 14705
loss: 1.0492967367172241,grad_norm: 0.9999992139727927, iteration: 14706
loss: 1.017546534538269,grad_norm: 0.9999992199897546, iteration: 14707
loss: 1.0114728212356567,grad_norm: 0.9999990832334662, iteration: 14708
loss: 0.9920353889465332,grad_norm: 0.9999994737155756, iteration: 14709
loss: 1.023937463760376,grad_norm: 0.99999946818557, iteration: 14710
loss: 1.0340923070907593,grad_norm: 0.9999991951677641, iteration: 14711
loss: 1.0023927688598633,grad_norm: 0.9999992405405732, iteration: 14712
loss: 1.0479779243469238,grad_norm: 0.99999911029493, iteration: 14713
loss: 1.007616400718689,grad_norm: 0.9999992169382605, iteration: 14714
loss: 1.1272042989730835,grad_norm: 0.9999997696385284, iteration: 14715
loss: 1.0770601034164429,grad_norm: 0.9999992263153303, iteration: 14716
loss: 1.049831509590149,grad_norm: 0.9999998498321975, iteration: 14717
loss: 1.055930495262146,grad_norm: 0.9999995750512295, iteration: 14718
loss: 1.1289255619049072,grad_norm: 0.9999997532149109, iteration: 14719
loss: 1.062301516532898,grad_norm: 0.9999990193886731, iteration: 14720
loss: 1.053069829940796,grad_norm: 0.999999664790448, iteration: 14721
loss: 1.034331202507019,grad_norm: 0.9999992878441673, iteration: 14722
loss: 1.044181227684021,grad_norm: 0.9999991175564202, iteration: 14723
loss: 1.0206421613693237,grad_norm: 0.9999992459317709, iteration: 14724
loss: 1.0665122270584106,grad_norm: 0.9999992877548743, iteration: 14725
loss: 1.0988267660140991,grad_norm: 0.999999433149158, iteration: 14726
loss: 1.0067219734191895,grad_norm: 0.9999991050940099, iteration: 14727
loss: 1.0187530517578125,grad_norm: 0.9999997448776723, iteration: 14728
loss: 1.054793119430542,grad_norm: 0.9999996425498667, iteration: 14729
loss: 1.0083881616592407,grad_norm: 0.9999991965174237, iteration: 14730
loss: 1.0468101501464844,grad_norm: 0.9999991521164969, iteration: 14731
loss: 1.0109920501708984,grad_norm: 0.9999996449076117, iteration: 14732
loss: 1.0098532438278198,grad_norm: 0.9999996980132676, iteration: 14733
loss: 1.0475780963897705,grad_norm: 0.9999993096803194, iteration: 14734
loss: 1.1284390687942505,grad_norm: 0.9999994325023074, iteration: 14735
loss: 1.0370893478393555,grad_norm: 0.9686089532029145, iteration: 14736
loss: 1.0495234727859497,grad_norm: 0.9999997004089374, iteration: 14737
loss: 0.985890805721283,grad_norm: 0.9999996668228528, iteration: 14738
loss: 1.0833971500396729,grad_norm: 0.9999993543672558, iteration: 14739
loss: 1.0237294435501099,grad_norm: 0.9999993553926495, iteration: 14740
loss: 1.0507851839065552,grad_norm: 0.9999996002874613, iteration: 14741
loss: 1.0469835996627808,grad_norm: 0.9999991911059558, iteration: 14742
loss: 0.9797366857528687,grad_norm: 0.9642964208367013, iteration: 14743
loss: 1.0448137521743774,grad_norm: 0.9999995607137939, iteration: 14744
loss: 0.9790825843811035,grad_norm: 0.9826664596291466, iteration: 14745
loss: 1.0018019676208496,grad_norm: 0.9999994937090279, iteration: 14746
loss: 1.002000331878662,grad_norm: 0.9999992334042268, iteration: 14747
loss: 1.0404276847839355,grad_norm: 0.99999955657754, iteration: 14748
loss: 1.052796721458435,grad_norm: 0.9924695734221602, iteration: 14749
loss: 0.9908857941627502,grad_norm: 0.9999990333584602, iteration: 14750
loss: 0.9946516156196594,grad_norm: 0.9819956363633798, iteration: 14751
loss: 1.0270129442214966,grad_norm: 0.9999994742615246, iteration: 14752
loss: 1.0442363023757935,grad_norm: 0.9999992675697488, iteration: 14753
loss: 1.018381953239441,grad_norm: 0.9999992000287148, iteration: 14754
loss: 1.0361281633377075,grad_norm: 0.9999992142367943, iteration: 14755
loss: 1.0184476375579834,grad_norm: 0.9999991760892538, iteration: 14756
loss: 1.0878385305404663,grad_norm: 0.9999996592693574, iteration: 14757
loss: 0.9929323792457581,grad_norm: 0.9999991840924524, iteration: 14758
loss: 0.9639561176300049,grad_norm: 0.9999995781398086, iteration: 14759
loss: 1.0182417631149292,grad_norm: 0.9999992241963053, iteration: 14760
loss: 0.9923951029777527,grad_norm: 0.9999991077227157, iteration: 14761
loss: 1.009389877319336,grad_norm: 0.9999991416010486, iteration: 14762
loss: 1.0129717588424683,grad_norm: 0.9999997150177274, iteration: 14763
loss: 1.0079361200332642,grad_norm: 0.9999998902132085, iteration: 14764
loss: 1.0938032865524292,grad_norm: 0.9999997417205498, iteration: 14765
loss: 1.0426944494247437,grad_norm: 0.9999991896338102, iteration: 14766
loss: 1.065413475036621,grad_norm: 0.9999993105759671, iteration: 14767
loss: 1.050639271736145,grad_norm: 0.9999993070099792, iteration: 14768
loss: 1.0143650770187378,grad_norm: 0.9999991791205275, iteration: 14769
loss: 1.0611764192581177,grad_norm: 0.9999996623086901, iteration: 14770
loss: 1.0373163223266602,grad_norm: 0.9999997873757639, iteration: 14771
loss: 1.045556902885437,grad_norm: 0.9999994598323925, iteration: 14772
loss: 1.0339688062667847,grad_norm: 0.9999992631052376, iteration: 14773
loss: 1.0426443815231323,grad_norm: 0.9999994185439521, iteration: 14774
loss: 1.0169440507888794,grad_norm: 0.9999993820737878, iteration: 14775
loss: 0.993176281452179,grad_norm: 0.9999993487241656, iteration: 14776
loss: 1.0006102323532104,grad_norm: 0.9999991111193558, iteration: 14777
loss: 1.0082855224609375,grad_norm: 0.9999990719246276, iteration: 14778
loss: 1.042432427406311,grad_norm: 0.9999990683273069, iteration: 14779
loss: 1.041776418685913,grad_norm: 0.9999993107571958, iteration: 14780
loss: 1.0222657918930054,grad_norm: 0.9999991747951231, iteration: 14781
loss: 0.9890540242195129,grad_norm: 0.9999992899691149, iteration: 14782
loss: 1.0525305271148682,grad_norm: 0.999999557978173, iteration: 14783
loss: 1.0644936561584473,grad_norm: 0.999999621227504, iteration: 14784
loss: 1.0177154541015625,grad_norm: 0.9999997808230078, iteration: 14785
loss: 1.011048674583435,grad_norm: 0.9999996464669862, iteration: 14786
loss: 1.023397445678711,grad_norm: 0.9999990847815279, iteration: 14787
loss: 1.0102883577346802,grad_norm: 0.9999992068454648, iteration: 14788
loss: 1.0629301071166992,grad_norm: 0.999999391350809, iteration: 14789
loss: 1.0331164598464966,grad_norm: 0.9999992053787824, iteration: 14790
loss: 1.0332159996032715,grad_norm: 0.999999429001265, iteration: 14791
loss: 0.991476833820343,grad_norm: 0.9999991439503709, iteration: 14792
loss: 1.050135612487793,grad_norm: 0.999999029350399, iteration: 14793
loss: 1.0917751789093018,grad_norm: 0.9999996525496561, iteration: 14794
loss: 1.0074113607406616,grad_norm: 0.9999993724816751, iteration: 14795
loss: 1.0504429340362549,grad_norm: 0.9999996281354092, iteration: 14796
loss: 1.0536549091339111,grad_norm: 0.9999993889973672, iteration: 14797
loss: 1.0485763549804688,grad_norm: 0.9999994097545412, iteration: 14798
loss: 1.0647505521774292,grad_norm: 0.9999997699586074, iteration: 14799
loss: 1.0661842823028564,grad_norm: 0.9999994797370292, iteration: 14800
loss: 1.0112807750701904,grad_norm: 0.9999994847996341, iteration: 14801
loss: 1.045462965965271,grad_norm: 0.9999992691674214, iteration: 14802
loss: 1.0236061811447144,grad_norm: 0.9999993151572965, iteration: 14803
loss: 0.980126678943634,grad_norm: 0.9999992201418291, iteration: 14804
loss: 0.9965788722038269,grad_norm: 0.999999065619494, iteration: 14805
loss: 1.0099897384643555,grad_norm: 0.9999992978419214, iteration: 14806
loss: 1.0336551666259766,grad_norm: 0.9999993666193232, iteration: 14807
loss: 1.070433259010315,grad_norm: 0.9999996094316254, iteration: 14808
loss: 1.0051945447921753,grad_norm: 0.9999991204171742, iteration: 14809
loss: 1.0350310802459717,grad_norm: 0.9999994550258239, iteration: 14810
loss: 1.1015636920928955,grad_norm: 0.9999998192357498, iteration: 14811
loss: 1.0573556423187256,grad_norm: 0.9999992754943612, iteration: 14812
loss: 1.0514169931411743,grad_norm: 0.9999992839780305, iteration: 14813
loss: 1.0530905723571777,grad_norm: 0.9999990607173918, iteration: 14814
loss: 1.075523853302002,grad_norm: 0.999999314840432, iteration: 14815
loss: 1.0696053504943848,grad_norm: 0.9999992667770977, iteration: 14816
loss: 1.0318092107772827,grad_norm: 0.9691213196504954, iteration: 14817
loss: 1.0588778257369995,grad_norm: 0.9999995300947601, iteration: 14818
loss: 1.0478429794311523,grad_norm: 0.9999993192250248, iteration: 14819
loss: 1.0301283597946167,grad_norm: 0.9999998127615639, iteration: 14820
loss: 1.0071051120758057,grad_norm: 0.9999992996899276, iteration: 14821
loss: 1.038379192352295,grad_norm: 0.9999991823422523, iteration: 14822
loss: 1.0564478635787964,grad_norm: 0.9999994653631878, iteration: 14823
loss: 1.0126080513000488,grad_norm: 0.9999992595605056, iteration: 14824
loss: 1.0537189245224,grad_norm: 0.9999991925097983, iteration: 14825
loss: 1.007887601852417,grad_norm: 0.9999993308658318, iteration: 14826
loss: 1.0218870639801025,grad_norm: 0.9999991001354855, iteration: 14827
loss: 1.0079319477081299,grad_norm: 0.9999991084828297, iteration: 14828
loss: 1.0330597162246704,grad_norm: 0.9999996679644715, iteration: 14829
loss: 1.0253759622573853,grad_norm: 0.999999570838554, iteration: 14830
loss: 1.0081703662872314,grad_norm: 0.9999993369466947, iteration: 14831
loss: 1.0158189535140991,grad_norm: 0.999999081736571, iteration: 14832
loss: 1.0534394979476929,grad_norm: 0.999999174333456, iteration: 14833
loss: 1.0422444343566895,grad_norm: 0.9999995741104913, iteration: 14834
loss: 0.9666881561279297,grad_norm: 0.999999111673771, iteration: 14835
loss: 1.0097383260726929,grad_norm: 0.9999990792896533, iteration: 14836
loss: 1.0874602794647217,grad_norm: 0.9999995549374706, iteration: 14837
loss: 1.0569182634353638,grad_norm: 0.9999997480213669, iteration: 14838
loss: 1.0428251028060913,grad_norm: 0.9999990985845889, iteration: 14839
loss: 1.029819369316101,grad_norm: 0.9999991404632297, iteration: 14840
loss: 1.0142205953598022,grad_norm: 0.9999993734507198, iteration: 14841
loss: 1.051630973815918,grad_norm: 0.9999994390073047, iteration: 14842
loss: 0.9957378506660461,grad_norm: 0.999999165756437, iteration: 14843
loss: 1.0613919496536255,grad_norm: 0.9999996212574026, iteration: 14844
loss: 1.0119819641113281,grad_norm: 0.9999992773642784, iteration: 14845
loss: 0.9879851937294006,grad_norm: 0.99999961853433, iteration: 14846
loss: 1.0081385374069214,grad_norm: 0.9999992594862334, iteration: 14847
loss: 0.9980323910713196,grad_norm: 0.9999992725049093, iteration: 14848
loss: 1.0599294900894165,grad_norm: 0.9999991749248005, iteration: 14849
loss: 0.9817800521850586,grad_norm: 0.9999993540816284, iteration: 14850
loss: 1.0070871114730835,grad_norm: 0.999999173301402, iteration: 14851
loss: 1.0262465476989746,grad_norm: 0.9999993120062014, iteration: 14852
loss: 1.0446233749389648,grad_norm: 0.9999995138602827, iteration: 14853
loss: 1.0702948570251465,grad_norm: 0.9999996736025101, iteration: 14854
loss: 1.0188719034194946,grad_norm: 0.9999995137303485, iteration: 14855
loss: 1.0298075675964355,grad_norm: 0.9999994996227644, iteration: 14856
loss: 1.0642648935317993,grad_norm: 0.9999991958452717, iteration: 14857
loss: 1.038102149963379,grad_norm: 0.9999993083737899, iteration: 14858
loss: 1.0056898593902588,grad_norm: 0.999999094374968, iteration: 14859
loss: 0.9523699283599854,grad_norm: 0.9999990913838575, iteration: 14860
loss: 1.0595288276672363,grad_norm: 0.9999994006814196, iteration: 14861
loss: 1.0576144456863403,grad_norm: 0.9999996624600255, iteration: 14862
loss: 1.0432994365692139,grad_norm: 0.9999991777096595, iteration: 14863
loss: 1.0489758253097534,grad_norm: 0.9999994973809939, iteration: 14864
loss: 1.0936732292175293,grad_norm: 0.9562815070831341, iteration: 14865
loss: 0.9968037009239197,grad_norm: 0.9999991854588872, iteration: 14866
loss: 1.0327903032302856,grad_norm: 0.9999994041652571, iteration: 14867
loss: 1.0307923555374146,grad_norm: 0.999999345022274, iteration: 14868
loss: 1.0224977731704712,grad_norm: 0.999999360279929, iteration: 14869
loss: 1.074232578277588,grad_norm: 0.9999996536752971, iteration: 14870
loss: 1.0865693092346191,grad_norm: 0.999999544381028, iteration: 14871
loss: 1.021101474761963,grad_norm: 0.9999992137753566, iteration: 14872
loss: 1.0183453559875488,grad_norm: 0.9999996644924685, iteration: 14873
loss: 1.0480562448501587,grad_norm: 0.9999991787397221, iteration: 14874
loss: 1.01436185836792,grad_norm: 0.9999991907400412, iteration: 14875
loss: 1.029119849205017,grad_norm: 0.9999993925250731, iteration: 14876
loss: 1.0390087366104126,grad_norm: 0.9999991927862514, iteration: 14877
loss: 1.0593714714050293,grad_norm: 0.9999995889964384, iteration: 14878
loss: 0.9908112287521362,grad_norm: 0.9999996488339623, iteration: 14879
loss: 1.0454742908477783,grad_norm: 0.9999992714793654, iteration: 14880
loss: 1.038638949394226,grad_norm: 0.9999992968014135, iteration: 14881
loss: 0.9627474546432495,grad_norm: 0.9999991588101603, iteration: 14882
loss: 1.0053141117095947,grad_norm: 0.9999994765803073, iteration: 14883
loss: 1.057239294052124,grad_norm: 0.9999994835101823, iteration: 14884
loss: 0.9746478796005249,grad_norm: 0.9999993459973191, iteration: 14885
loss: 1.0306246280670166,grad_norm: 0.999999538486784, iteration: 14886
loss: 1.0285874605178833,grad_norm: 0.9999990279116374, iteration: 14887
loss: 1.0838137865066528,grad_norm: 0.9999994197939202, iteration: 14888
loss: 1.0419578552246094,grad_norm: 0.9999990354650132, iteration: 14889
loss: 1.0441969633102417,grad_norm: 0.9552131770753185, iteration: 14890
loss: 1.061975121498108,grad_norm: 0.9999999143323667, iteration: 14891
loss: 0.9995954036712646,grad_norm: 0.9999995466126099, iteration: 14892
loss: 1.1019071340560913,grad_norm: 0.9999996348987416, iteration: 14893
loss: 1.022873044013977,grad_norm: 0.9999992563342823, iteration: 14894
loss: 1.083199143409729,grad_norm: 0.9999997398724624, iteration: 14895
loss: 1.0153661966323853,grad_norm: 0.9999992242183606, iteration: 14896
loss: 1.058024287223816,grad_norm: 0.9999992775222163, iteration: 14897
loss: 1.1288466453552246,grad_norm: 0.9999997436365982, iteration: 14898
loss: 1.0548744201660156,grad_norm: 0.9999993010158535, iteration: 14899
loss: 1.0274125337600708,grad_norm: 0.9999991836568914, iteration: 14900
loss: 1.001477837562561,grad_norm: 0.9999991241069063, iteration: 14901
loss: 1.0429329872131348,grad_norm: 0.9999992045700772, iteration: 14902
loss: 1.0095278024673462,grad_norm: 0.9999993118204502, iteration: 14903
loss: 1.0372812747955322,grad_norm: 0.9999991333015779, iteration: 14904
loss: 1.0553464889526367,grad_norm: 0.9999993159605746, iteration: 14905
loss: 1.0179967880249023,grad_norm: 0.9999994404973227, iteration: 14906
loss: 1.0287855863571167,grad_norm: 0.999999748803016, iteration: 14907
loss: 1.041348934173584,grad_norm: 0.9999990913717705, iteration: 14908
loss: 0.9994987845420837,grad_norm: 0.999999434295295, iteration: 14909
loss: 1.0102976560592651,grad_norm: 0.9999992562065013, iteration: 14910
loss: 1.0342353582382202,grad_norm: 0.9999996816971056, iteration: 14911
loss: 1.0522189140319824,grad_norm: 0.9999993219660673, iteration: 14912
loss: 1.0560256242752075,grad_norm: 0.9999995419005372, iteration: 14913
loss: 1.006951928138733,grad_norm: 0.9999993193897218, iteration: 14914
loss: 1.0133663415908813,grad_norm: 0.9999998064840401, iteration: 14915
loss: 1.0009907484054565,grad_norm: 0.9999997938438392, iteration: 14916
loss: 1.047818899154663,grad_norm: 0.9999992686385751, iteration: 14917
loss: 1.0506017208099365,grad_norm: 0.9999993110714867, iteration: 14918
loss: 1.0319817066192627,grad_norm: 0.9999991510018517, iteration: 14919
loss: 1.0252711772918701,grad_norm: 0.9999993152800433, iteration: 14920
loss: 1.0377402305603027,grad_norm: 0.9999997604116966, iteration: 14921
loss: 0.9891079068183899,grad_norm: 0.9999992864687565, iteration: 14922
loss: 1.1192185878753662,grad_norm: 0.999999532160275, iteration: 14923
loss: 0.9865502119064331,grad_norm: 0.9999991361626889, iteration: 14924
loss: 1.0403380393981934,grad_norm: 0.9999990120553364, iteration: 14925
loss: 0.9816398620605469,grad_norm: 0.9821336144685726, iteration: 14926
loss: 1.050148367881775,grad_norm: 0.9999994097746505, iteration: 14927
loss: 0.9802048802375793,grad_norm: 0.9999992185776695, iteration: 14928
loss: 1.0252485275268555,grad_norm: 0.9999992822374811, iteration: 14929
loss: 1.0712835788726807,grad_norm: 0.9999994705306144, iteration: 14930
loss: 1.038101315498352,grad_norm: 0.9999997445283889, iteration: 14931
loss: 1.0094784498214722,grad_norm: 0.9999991774356015, iteration: 14932
loss: 1.0028685331344604,grad_norm: 0.9999992288280954, iteration: 14933
loss: 1.060930609703064,grad_norm: 0.9999993275935654, iteration: 14934
loss: 1.0160434246063232,grad_norm: 0.9919433721809122, iteration: 14935
loss: 0.9621429443359375,grad_norm: 0.9999992675215584, iteration: 14936
loss: 1.062243938446045,grad_norm: 0.9999992586418079, iteration: 14937
loss: 0.9933187365531921,grad_norm: 0.9999993435307468, iteration: 14938
loss: 0.9930979013442993,grad_norm: 0.9999998238097874, iteration: 14939
loss: 1.003891944885254,grad_norm: 0.9999992444121836, iteration: 14940
loss: 1.018540620803833,grad_norm: 0.9999998138604189, iteration: 14941
loss: 1.0387606620788574,grad_norm: 0.9999993153019903, iteration: 14942
loss: 1.048869013786316,grad_norm: 0.9999992405016912, iteration: 14943
loss: 1.0176838636398315,grad_norm: 0.9999991986994794, iteration: 14944
loss: 0.9750485420227051,grad_norm: 0.9999994267612381, iteration: 14945
loss: 1.037295937538147,grad_norm: 0.999999172695601, iteration: 14946
loss: 1.0050238370895386,grad_norm: 0.9999992183369484, iteration: 14947
loss: 1.0995019674301147,grad_norm: 0.9999994116940208, iteration: 14948
loss: 1.0745435953140259,grad_norm: 0.9999996223119205, iteration: 14949
loss: 1.0012142658233643,grad_norm: 0.9999991949366488, iteration: 14950
loss: 1.0578422546386719,grad_norm: 0.9999991158067935, iteration: 14951
loss: 0.992276132106781,grad_norm: 0.9999992469900376, iteration: 14952
loss: 1.0227292776107788,grad_norm: 0.9999992136624128, iteration: 14953
loss: 1.0305808782577515,grad_norm: 0.9999991357280612, iteration: 14954
loss: 1.0572830438613892,grad_norm: 0.9999990824667697, iteration: 14955
loss: 1.016517996788025,grad_norm: 0.9999995297150132, iteration: 14956
loss: 1.0155690908432007,grad_norm: 0.9999994094626702, iteration: 14957
loss: 1.0117065906524658,grad_norm: 0.9999990943990893, iteration: 14958
loss: 0.9655667543411255,grad_norm: 0.9999994673890369, iteration: 14959
loss: 1.0323106050491333,grad_norm: 0.9999996084041497, iteration: 14960
loss: 0.945318877696991,grad_norm: 0.9999991731670987, iteration: 14961
loss: 1.017459750175476,grad_norm: 0.9999992561410717, iteration: 14962
loss: 1.0442250967025757,grad_norm: 0.999999816245652, iteration: 14963
loss: 0.9986423254013062,grad_norm: 0.999999425628362, iteration: 14964
loss: 1.0147408246994019,grad_norm: 0.99999946157811, iteration: 14965
loss: 1.0019111633300781,grad_norm: 0.9999993131899102, iteration: 14966
loss: 1.019711971282959,grad_norm: 0.9999992679591742, iteration: 14967
loss: 1.02874755859375,grad_norm: 0.9999991045627558, iteration: 14968
loss: 0.9903377890586853,grad_norm: 0.9999992497328647, iteration: 14969
loss: 1.050744891166687,grad_norm: 0.9999993668896289, iteration: 14970
loss: 1.0573701858520508,grad_norm: 0.9999992133661747, iteration: 14971
loss: 1.0564271211624146,grad_norm: 0.9999992119286066, iteration: 14972
loss: 1.0437190532684326,grad_norm: 0.9999994001628337, iteration: 14973
loss: 0.9986486434936523,grad_norm: 0.9999991271948043, iteration: 14974
loss: 1.0505924224853516,grad_norm: 0.9999992259096345, iteration: 14975
loss: 1.0494451522827148,grad_norm: 0.999999552069773, iteration: 14976
loss: 1.0854051113128662,grad_norm: 0.9999994124064333, iteration: 14977
loss: 1.0675715208053589,grad_norm: 0.9999995881939447, iteration: 14978
loss: 0.9916483759880066,grad_norm: 0.8604034531997816, iteration: 14979
loss: 1.069105863571167,grad_norm: 0.999999285013706, iteration: 14980
loss: 1.0490175485610962,grad_norm: 0.999999276542472, iteration: 14981
loss: 0.9998898506164551,grad_norm: 0.9999993049618586, iteration: 14982
loss: 1.0206596851348877,grad_norm: 0.9999997025585928, iteration: 14983
loss: 1.0029563903808594,grad_norm: 0.9999991025089848, iteration: 14984
loss: 1.057815432548523,grad_norm: 0.9999995743029515, iteration: 14985
loss: 1.0801031589508057,grad_norm: 0.999999409368102, iteration: 14986
loss: 1.051893949508667,grad_norm: 0.9999993050136018, iteration: 14987
loss: 0.9417006373405457,grad_norm: 0.9999992889691964, iteration: 14988
loss: 1.017991304397583,grad_norm: 0.9999991868414804, iteration: 14989
loss: 1.0232418775558472,grad_norm: 0.9572983989908019, iteration: 14990
loss: 1.0168657302856445,grad_norm: 0.9999992413443846, iteration: 14991
loss: 1.007369875907898,grad_norm: 0.9999991653907863, iteration: 14992
loss: 0.98481684923172,grad_norm: 0.9999995985551602, iteration: 14993
loss: 1.0800870656967163,grad_norm: 0.9999994075147626, iteration: 14994
loss: 1.0138951539993286,grad_norm: 0.9999997091572187, iteration: 14995
loss: 0.9718896746635437,grad_norm: 0.9999991925012113, iteration: 14996
loss: 1.0244256258010864,grad_norm: 0.9747543787270692, iteration: 14997
loss: 1.033026099205017,grad_norm: 0.9999992796915993, iteration: 14998
loss: 1.0066312551498413,grad_norm: 0.9999993979651338, iteration: 14999
loss: 1.026505470275879,grad_norm: 0.9846290625774772, iteration: 15000
loss: 1.0114946365356445,grad_norm: 0.9999991442304521, iteration: 15001
loss: 1.0290744304656982,grad_norm: 0.9999994694368103, iteration: 15002
loss: 1.0468143224716187,grad_norm: 0.9999995994490581, iteration: 15003
loss: 0.9751616716384888,grad_norm: 0.9999992901030197, iteration: 15004
loss: 1.0183916091918945,grad_norm: 0.9706881489049, iteration: 15005
loss: 1.0124480724334717,grad_norm: 0.9999992262420185, iteration: 15006
loss: 1.0376880168914795,grad_norm: 0.999999211479163, iteration: 15007
loss: 1.0443015098571777,grad_norm: 0.999998988007628, iteration: 15008
loss: 1.0186165571212769,grad_norm: 0.9435785584376827, iteration: 15009
loss: 1.0265487432479858,grad_norm: 0.9999992577551439, iteration: 15010
loss: 1.0051746368408203,grad_norm: 0.9999991596974142, iteration: 15011
loss: 1.0193320512771606,grad_norm: 0.9999991214679004, iteration: 15012
loss: 1.066443920135498,grad_norm: 0.9999992330935272, iteration: 15013
loss: 1.0251191854476929,grad_norm: 0.9992110442308428, iteration: 15014
loss: 1.0334829092025757,grad_norm: 0.9999991183800886, iteration: 15015
loss: 1.0540093183517456,grad_norm: 0.9999995748752745, iteration: 15016
loss: 1.0423364639282227,grad_norm: 0.9985127548739419, iteration: 15017
loss: 1.0543123483657837,grad_norm: 0.9999995584713304, iteration: 15018
loss: 0.9879978895187378,grad_norm: 0.9999993322475509, iteration: 15019
loss: 1.0255764722824097,grad_norm: 0.9999993107864517, iteration: 15020
loss: 1.0225589275360107,grad_norm: 0.9999992660229623, iteration: 15021
loss: 1.0218262672424316,grad_norm: 0.999999184571441, iteration: 15022
loss: 1.029379963874817,grad_norm: 0.9999990126622939, iteration: 15023
loss: 1.018201231956482,grad_norm: 0.9999991872669884, iteration: 15024
loss: 1.0031527280807495,grad_norm: 0.9999995905636658, iteration: 15025
loss: 1.0240896940231323,grad_norm: 0.9999992600759862, iteration: 15026
loss: 0.9849026203155518,grad_norm: 0.9858900820030632, iteration: 15027
loss: 1.032596230506897,grad_norm: 0.9999990632526375, iteration: 15028
loss: 1.0075620412826538,grad_norm: 0.9687073700490239, iteration: 15029
loss: 1.0070030689239502,grad_norm: 0.984397970800966, iteration: 15030
loss: 1.0906360149383545,grad_norm: 0.9999997301351642, iteration: 15031
loss: 1.0058112144470215,grad_norm: 0.9999990064381825, iteration: 15032
loss: 1.0147416591644287,grad_norm: 0.9999991357172561, iteration: 15033
loss: 0.9921106696128845,grad_norm: 0.9999990640801613, iteration: 15034
loss: 1.0464341640472412,grad_norm: 0.9999992949655064, iteration: 15035
loss: 1.0519647598266602,grad_norm: 0.999999146126362, iteration: 15036
loss: 1.0197087526321411,grad_norm: 0.9864076693012074, iteration: 15037
loss: 1.0324286222457886,grad_norm: 0.9999996484321055, iteration: 15038
loss: 1.002590537071228,grad_norm: 0.999999196354643, iteration: 15039
loss: 1.037928819656372,grad_norm: 0.9999991116480514, iteration: 15040
loss: 0.9976146221160889,grad_norm: 0.9999992608892507, iteration: 15041
loss: 1.0426723957061768,grad_norm: 0.9999992655354735, iteration: 15042
loss: 1.076030969619751,grad_norm: 0.9999991462575362, iteration: 15043
loss: 0.9995622038841248,grad_norm: 0.9999991292455865, iteration: 15044
loss: 0.9740194082260132,grad_norm: 0.9831069950755688, iteration: 15045
loss: 1.0462517738342285,grad_norm: 0.999999474485625, iteration: 15046
loss: 1.0446211099624634,grad_norm: 0.9999993546273492, iteration: 15047
loss: 1.033352255821228,grad_norm: 0.9999991890314691, iteration: 15048
loss: 1.0807561874389648,grad_norm: 0.9999992260715207, iteration: 15049
loss: 1.044666051864624,grad_norm: 0.9999997906784114, iteration: 15050
loss: 1.0306437015533447,grad_norm: 0.9999991206610785, iteration: 15051
loss: 1.006833553314209,grad_norm: 0.9522472094801285, iteration: 15052
loss: 1.05048668384552,grad_norm: 0.999999742271225, iteration: 15053
loss: 1.0118229389190674,grad_norm: 0.9999991313827169, iteration: 15054
loss: 1.0021189451217651,grad_norm: 0.9999993515441656, iteration: 15055
loss: 1.0080171823501587,grad_norm: 0.9999991616467658, iteration: 15056
loss: 1.025089144706726,grad_norm: 0.9857538321712607, iteration: 15057
loss: 0.9876090884208679,grad_norm: 0.9999990502412335, iteration: 15058
loss: 1.0408306121826172,grad_norm: 0.9999991036538617, iteration: 15059
loss: 0.9890385270118713,grad_norm: 0.9999991602517309, iteration: 15060
loss: 1.0255303382873535,grad_norm: 0.9999992229012729, iteration: 15061
loss: 1.0043669939041138,grad_norm: 0.9999989751857261, iteration: 15062
loss: 0.9914993047714233,grad_norm: 0.9999992433230946, iteration: 15063
loss: 1.0282853841781616,grad_norm: 0.9999996409280694, iteration: 15064
loss: 1.0423561334609985,grad_norm: 0.9999992392574016, iteration: 15065
loss: 1.003502368927002,grad_norm: 0.9999990744020675, iteration: 15066
loss: 1.003635287284851,grad_norm: 0.9999993212461206, iteration: 15067
loss: 1.0102488994598389,grad_norm: 0.9999998658809108, iteration: 15068
loss: 1.0487040281295776,grad_norm: 0.9999995224972186, iteration: 15069
loss: 1.0307815074920654,grad_norm: 0.9999996599754848, iteration: 15070
loss: 0.9997498393058777,grad_norm: 0.9999991389527264, iteration: 15071
loss: 1.0439354181289673,grad_norm: 0.9999991862398101, iteration: 15072
loss: 0.9798135161399841,grad_norm: 0.9999992114622482, iteration: 15073
loss: 0.9945003390312195,grad_norm: 0.9525619104263611, iteration: 15074
loss: 1.062734842300415,grad_norm: 0.99999937975411, iteration: 15075
loss: 0.9948561191558838,grad_norm: 0.999999217431147, iteration: 15076
loss: 1.0617921352386475,grad_norm: 0.9999990730823182, iteration: 15077
loss: 1.0163074731826782,grad_norm: 0.9999990445468574, iteration: 15078
loss: 1.0322067737579346,grad_norm: 0.9999992444551266, iteration: 15079
loss: 1.0262089967727661,grad_norm: 0.9999994284402056, iteration: 15080
loss: 1.016570806503296,grad_norm: 0.9999993111085436, iteration: 15081
loss: 1.070853352546692,grad_norm: 0.9999996046020188, iteration: 15082
loss: 1.1011106967926025,grad_norm: 0.9999994335825364, iteration: 15083
loss: 1.0498250722885132,grad_norm: 0.9999995626445191, iteration: 15084
loss: 1.0077186822891235,grad_norm: 0.9999992824413939, iteration: 15085
loss: 0.9801841378211975,grad_norm: 0.999999667110868, iteration: 15086
loss: 0.991335391998291,grad_norm: 0.9986941020332151, iteration: 15087
loss: 1.021541714668274,grad_norm: 0.9999997587066909, iteration: 15088
loss: 1.1105198860168457,grad_norm: 0.9999996546875248, iteration: 15089
loss: 1.002776861190796,grad_norm: 0.9999993714784722, iteration: 15090
loss: 1.0261093378067017,grad_norm: 0.9999992767490323, iteration: 15091
loss: 1.0457395315170288,grad_norm: 0.9999993080064654, iteration: 15092
loss: 1.057044267654419,grad_norm: 0.9999992976939515, iteration: 15093
loss: 1.0232644081115723,grad_norm: 0.9999998983215849, iteration: 15094
loss: 1.0669732093811035,grad_norm: 0.9999999422662066, iteration: 15095
loss: 0.9717560410499573,grad_norm: 0.9999992570524154, iteration: 15096
loss: 1.0130155086517334,grad_norm: 0.9999991426593055, iteration: 15097
loss: 1.0229989290237427,grad_norm: 0.9999993390406342, iteration: 15098
loss: 1.0113551616668701,grad_norm: 0.9999993252144513, iteration: 15099
loss: 1.0808976888656616,grad_norm: 0.9999997236559007, iteration: 15100
loss: 0.9541662335395813,grad_norm: 0.999999327818702, iteration: 15101
loss: 0.9952201247215271,grad_norm: 0.9999993195541409, iteration: 15102
loss: 1.0256752967834473,grad_norm: 0.9999995038305606, iteration: 15103
loss: 1.0669288635253906,grad_norm: 0.9999997132108687, iteration: 15104
loss: 1.0768266916275024,grad_norm: 0.9999997683975691, iteration: 15105
loss: 0.9800031781196594,grad_norm: 0.9999992256982472, iteration: 15106
loss: 1.004949927330017,grad_norm: 0.9999996133238962, iteration: 15107
loss: 1.0092518329620361,grad_norm: 0.9579709487945073, iteration: 15108
loss: 1.0264263153076172,grad_norm: 0.9999991053699139, iteration: 15109
loss: 1.07315993309021,grad_norm: 0.9999997072733664, iteration: 15110
loss: 1.033400297164917,grad_norm: 0.999999436769571, iteration: 15111
loss: 1.0325406789779663,grad_norm: 0.9999992718253141, iteration: 15112
loss: 1.0188593864440918,grad_norm: 0.9999991771652998, iteration: 15113
loss: 1.0476285219192505,grad_norm: 0.9999993734725992, iteration: 15114
loss: 1.009074091911316,grad_norm: 0.9999992163771912, iteration: 15115
loss: 1.0829399824142456,grad_norm: 0.9999996742230357, iteration: 15116
loss: 1.0422383546829224,grad_norm: 0.9999992511444827, iteration: 15117
loss: 1.057909369468689,grad_norm: 0.9999998143550072, iteration: 15118
loss: 1.0511962175369263,grad_norm: 0.9999994796458332, iteration: 15119
loss: 1.0159614086151123,grad_norm: 0.9999996886586172, iteration: 15120
loss: 1.0379410982131958,grad_norm: 0.9999990815588026, iteration: 15121
loss: 0.9580338597297668,grad_norm: 0.9999992608509629, iteration: 15122
loss: 1.02351713180542,grad_norm: 0.9450634432077601, iteration: 15123
loss: 1.0307071208953857,grad_norm: 0.9999994504181142, iteration: 15124
loss: 1.0515724420547485,grad_norm: 0.9999998273168194, iteration: 15125
loss: 0.9725504517555237,grad_norm: 0.9999990674062503, iteration: 15126
loss: 1.051699161529541,grad_norm: 0.9999990845375808, iteration: 15127
loss: 1.0220190286636353,grad_norm: 0.9999992625927651, iteration: 15128
loss: 0.9938815832138062,grad_norm: 0.9999991110966049, iteration: 15129
loss: 1.1263612508773804,grad_norm: 0.9999997583880249, iteration: 15130
loss: 1.0183959007263184,grad_norm: 0.9999995280462792, iteration: 15131
loss: 1.0169776678085327,grad_norm: 0.9999993020555881, iteration: 15132
loss: 1.0292977094650269,grad_norm: 0.9999995455272015, iteration: 15133
loss: 1.046010136604309,grad_norm: 0.9999990341459182, iteration: 15134
loss: 1.0261921882629395,grad_norm: 0.9999992414010983, iteration: 15135
loss: 0.9814023375511169,grad_norm: 0.9999991252736752, iteration: 15136
loss: 1.0244723558425903,grad_norm: 0.9999992054125209, iteration: 15137
loss: 1.029865026473999,grad_norm: 0.9999997088443163, iteration: 15138
loss: 0.9820136427879333,grad_norm: 0.8912961099914318, iteration: 15139
loss: 1.0701438188552856,grad_norm: 0.9999995273341769, iteration: 15140
loss: 1.120126724243164,grad_norm: 0.9999996022870248, iteration: 15141
loss: 1.013412356376648,grad_norm: 0.9999991111101025, iteration: 15142
loss: 1.0181934833526611,grad_norm: 0.9999992044152416, iteration: 15143
loss: 0.9811985492706299,grad_norm: 0.99999900348221, iteration: 15144
loss: 1.0942522287368774,grad_norm: 0.9999994775944236, iteration: 15145
loss: 1.0308071374893188,grad_norm: 0.999999405172155, iteration: 15146
loss: 1.0463985204696655,grad_norm: 0.9999992316597273, iteration: 15147
loss: 1.015529990196228,grad_norm: 0.9999991577749408, iteration: 15148
loss: 1.0092289447784424,grad_norm: 0.9999992758355374, iteration: 15149
loss: 1.0088493824005127,grad_norm: 0.9999990341239913, iteration: 15150
loss: 1.0181519985198975,grad_norm: 0.9999992395585746, iteration: 15151
loss: 0.9834626317024231,grad_norm: 0.9999990337775595, iteration: 15152
loss: 0.9960623979568481,grad_norm: 0.999999412113341, iteration: 15153
loss: 1.0231891870498657,grad_norm: 0.9999996721825637, iteration: 15154
loss: 1.0219062566757202,grad_norm: 0.9999992355502808, iteration: 15155
loss: 1.0594232082366943,grad_norm: 0.9999992467352168, iteration: 15156
loss: 0.9892658591270447,grad_norm: 0.9999991738902515, iteration: 15157
loss: 1.0825215578079224,grad_norm: 0.9999997938513485, iteration: 15158
loss: 1.0289287567138672,grad_norm: 0.9999991656446734, iteration: 15159
loss: 1.076921820640564,grad_norm: 0.9999994999806742, iteration: 15160
loss: 1.0497379302978516,grad_norm: 0.9999992703197828, iteration: 15161
loss: 1.0140948295593262,grad_norm: 0.9999990954137893, iteration: 15162
loss: 1.041306734085083,grad_norm: 0.9999993052482107, iteration: 15163
loss: 1.0218186378479004,grad_norm: 0.9999990626102345, iteration: 15164
loss: 1.0649652481079102,grad_norm: 0.9999992463266087, iteration: 15165
loss: 1.0340745449066162,grad_norm: 0.9999991714579349, iteration: 15166
loss: 0.9959376454353333,grad_norm: 0.9999996847925003, iteration: 15167
loss: 1.0475335121154785,grad_norm: 0.9999993040441919, iteration: 15168
loss: 1.0419174432754517,grad_norm: 0.9999995373756283, iteration: 15169
loss: 1.0011513233184814,grad_norm: 0.9999994381865729, iteration: 15170
loss: 1.0285779237747192,grad_norm: 0.9999991721894869, iteration: 15171
loss: 0.9715899229049683,grad_norm: 0.9999992380220906, iteration: 15172
loss: 1.0302547216415405,grad_norm: 0.9999996192594899, iteration: 15173
loss: 0.9906027317047119,grad_norm: 0.9999991010451829, iteration: 15174
loss: 1.0244157314300537,grad_norm: 0.9790319372621236, iteration: 15175
loss: 1.076505184173584,grad_norm: 0.9999997370553877, iteration: 15176
loss: 1.034042239189148,grad_norm: 0.9999995048141371, iteration: 15177
loss: 1.09775972366333,grad_norm: 0.9999997062616084, iteration: 15178
loss: 1.0466270446777344,grad_norm: 0.9999994227915696, iteration: 15179
loss: 1.0112727880477905,grad_norm: 0.9999993446688225, iteration: 15180
loss: 1.041067123413086,grad_norm: 0.9999994868370913, iteration: 15181
loss: 1.0180907249450684,grad_norm: 0.9999991116706084, iteration: 15182
loss: 1.0663323402404785,grad_norm: 0.9999997690763034, iteration: 15183
loss: 1.0991754531860352,grad_norm: 0.9999995095186437, iteration: 15184
loss: 1.0237826108932495,grad_norm: 0.9999993526706349, iteration: 15185
loss: 1.015529751777649,grad_norm: 0.999999607507285, iteration: 15186
loss: 1.0130891799926758,grad_norm: 0.9999992059406871, iteration: 15187
loss: 1.0433595180511475,grad_norm: 0.9999992671442322, iteration: 15188
loss: 1.1433162689208984,grad_norm: 0.9999997337844194, iteration: 15189
loss: 0.9914368391036987,grad_norm: 0.999999372654702, iteration: 15190
loss: 0.9910831451416016,grad_norm: 0.9999992042326157, iteration: 15191
loss: 1.0262904167175293,grad_norm: 0.9999992164236018, iteration: 15192
loss: 1.0690492391586304,grad_norm: 0.9999996096425465, iteration: 15193
loss: 1.0044301748275757,grad_norm: 0.9999992889863927, iteration: 15194
loss: 1.031803011894226,grad_norm: 0.9999993428383204, iteration: 15195
loss: 1.0151798725128174,grad_norm: 0.9999998568451399, iteration: 15196
loss: 1.0601837635040283,grad_norm: 0.9999992011234761, iteration: 15197
loss: 1.0116750001907349,grad_norm: 0.9999991030145476, iteration: 15198
loss: 0.9946039915084839,grad_norm: 0.9999998002212115, iteration: 15199
loss: 1.040335774421692,grad_norm: 0.9999991557986784, iteration: 15200
loss: 1.0059266090393066,grad_norm: 0.9999995818351495, iteration: 15201
loss: 1.0455970764160156,grad_norm: 0.9999997235831042, iteration: 15202
loss: 0.9815136194229126,grad_norm: 0.9999991411956163, iteration: 15203
loss: 1.0496890544891357,grad_norm: 0.9999992937695108, iteration: 15204
loss: 1.0381864309310913,grad_norm: 0.9999993781360765, iteration: 15205
loss: 1.0615848302841187,grad_norm: 0.9999993580060508, iteration: 15206
loss: 1.0052695274353027,grad_norm: 0.9999991864567895, iteration: 15207
loss: 1.070158839225769,grad_norm: 0.9999996378506809, iteration: 15208
loss: 1.0217751264572144,grad_norm: 0.9999990915195018, iteration: 15209
loss: 1.0704504251480103,grad_norm: 0.99999972678369, iteration: 15210
loss: 1.0431808233261108,grad_norm: 0.9999991221657845, iteration: 15211
loss: 1.0484848022460938,grad_norm: 0.9999991613846811, iteration: 15212
loss: 1.0410457849502563,grad_norm: 0.9999993787866862, iteration: 15213
loss: 1.0545680522918701,grad_norm: 0.9999993969118222, iteration: 15214
loss: 1.015977382659912,grad_norm: 0.999999739506568, iteration: 15215
loss: 1.049379587173462,grad_norm: 0.9999992652278659, iteration: 15216
loss: 1.0202008485794067,grad_norm: 0.9999991699892602, iteration: 15217
loss: 1.0138663053512573,grad_norm: 0.9999992046435381, iteration: 15218
loss: 1.0304983854293823,grad_norm: 0.999999257441487, iteration: 15219
loss: 1.0157612562179565,grad_norm: 0.9999992753640083, iteration: 15220
loss: 1.006335973739624,grad_norm: 0.9999993918014699, iteration: 15221
loss: 0.9936429858207703,grad_norm: 0.9999991492157382, iteration: 15222
loss: 1.037137746810913,grad_norm: 0.9999990741422534, iteration: 15223
loss: 1.0381824970245361,grad_norm: 0.9999992091712139, iteration: 15224
loss: 0.9871319532394409,grad_norm: 0.9999992775418789, iteration: 15225
loss: 0.9848161339759827,grad_norm: 0.9999993243298173, iteration: 15226
loss: 0.9947217106819153,grad_norm: 0.9999989943097504, iteration: 15227
loss: 1.0032469034194946,grad_norm: 0.9999991380690598, iteration: 15228
loss: 1.003303050994873,grad_norm: 0.9999992072789472, iteration: 15229
loss: 0.9695848226547241,grad_norm: 0.9999993922557668, iteration: 15230
loss: 1.0092971324920654,grad_norm: 0.9999994960011781, iteration: 15231
loss: 1.0693025588989258,grad_norm: 0.9999992152510748, iteration: 15232
loss: 1.092767357826233,grad_norm: 0.999999669416699, iteration: 15233
loss: 1.0224859714508057,grad_norm: 0.9999993025835293, iteration: 15234
loss: 0.9937857389450073,grad_norm: 0.9999996511077344, iteration: 15235
loss: 0.9928880333900452,grad_norm: 0.9999990826250911, iteration: 15236
loss: 1.0277233123779297,grad_norm: 0.914591467387362, iteration: 15237
loss: 1.0240594148635864,grad_norm: 0.9999996349105931, iteration: 15238
loss: 0.9863836169242859,grad_norm: 0.9999991576369851, iteration: 15239
loss: 1.0350341796875,grad_norm: 0.9999991769223049, iteration: 15240
loss: 1.0817679166793823,grad_norm: 0.9999992901293048, iteration: 15241
loss: 1.0323572158813477,grad_norm: 0.9999993873080989, iteration: 15242
loss: 1.0611820220947266,grad_norm: 0.999999228437241, iteration: 15243
loss: 0.983951985836029,grad_norm: 0.9999992225196787, iteration: 15244
loss: 1.0100393295288086,grad_norm: 0.999999081323353, iteration: 15245
loss: 1.0271443128585815,grad_norm: 0.9999993311826751, iteration: 15246
loss: 1.0265194177627563,grad_norm: 0.9999995177048401, iteration: 15247
loss: 0.9887701272964478,grad_norm: 0.999999143850489, iteration: 15248
loss: 0.9971064329147339,grad_norm: 0.9999993051366236, iteration: 15249
loss: 1.016282320022583,grad_norm: 0.9999989702117698, iteration: 15250
loss: 0.9900158643722534,grad_norm: 0.999999050138105, iteration: 15251
loss: 1.0784850120544434,grad_norm: 0.9999994171253577, iteration: 15252
loss: 1.0630974769592285,grad_norm: 0.999999211397962, iteration: 15253
loss: 1.0130908489227295,grad_norm: 0.9999993851893559, iteration: 15254
loss: 1.0688430070877075,grad_norm: 0.9999990978266311, iteration: 15255
loss: 1.0082777738571167,grad_norm: 0.9999992449859684, iteration: 15256
loss: 1.0157496929168701,grad_norm: 0.9999993854779965, iteration: 15257
loss: 1.054018259048462,grad_norm: 0.9999997534143197, iteration: 15258
loss: 0.9984717965126038,grad_norm: 0.9999990732693662, iteration: 15259
loss: 1.0439680814743042,grad_norm: 0.9999995322736598, iteration: 15260
loss: 1.051626205444336,grad_norm: 0.9999998785732086, iteration: 15261
loss: 1.116875171661377,grad_norm: 0.9999998000531028, iteration: 15262
loss: 1.0115753412246704,grad_norm: 0.9999991947852489, iteration: 15263
loss: 1.0014909505844116,grad_norm: 0.9999989720321334, iteration: 15264
loss: 1.0142714977264404,grad_norm: 0.9999992320920122, iteration: 15265
loss: 1.0256366729736328,grad_norm: 0.9999989855217545, iteration: 15266
loss: 1.0316743850708008,grad_norm: 0.9999992865005695, iteration: 15267
loss: 1.0026918649673462,grad_norm: 0.9757018644913447, iteration: 15268
loss: 1.0106011629104614,grad_norm: 0.9463340721910712, iteration: 15269
loss: 1.0288748741149902,grad_norm: 0.9947352440816942, iteration: 15270
loss: 1.0300567150115967,grad_norm: 0.9999991853455994, iteration: 15271
loss: 1.0362292528152466,grad_norm: 0.9999996927747519, iteration: 15272
loss: 1.0170680284500122,grad_norm: 0.9999993762331787, iteration: 15273
loss: 1.0197862386703491,grad_norm: 0.999999201859892, iteration: 15274
loss: 1.0300853252410889,grad_norm: 0.999999094870399, iteration: 15275
loss: 1.0408709049224854,grad_norm: 0.9999994587690899, iteration: 15276
loss: 1.0141345262527466,grad_norm: 0.9999991262952413, iteration: 15277
loss: 1.045685887336731,grad_norm: 0.9999991444280827, iteration: 15278
loss: 1.0609122514724731,grad_norm: 0.9999995147257111, iteration: 15279
loss: 1.054431676864624,grad_norm: 0.9999995282883288, iteration: 15280
loss: 1.1799342632293701,grad_norm: 0.9999997540270872, iteration: 15281
loss: 1.019885540008545,grad_norm: 0.9999995133035771, iteration: 15282
loss: 1.0324617624282837,grad_norm: 0.9999992336832895, iteration: 15283
loss: 1.0813078880310059,grad_norm: 0.9999994194241163, iteration: 15284
loss: 1.0481432676315308,grad_norm: 0.9999994432211889, iteration: 15285
loss: 1.0500783920288086,grad_norm: 0.9999989655096685, iteration: 15286
loss: 1.123262882232666,grad_norm: 0.9999998870439994, iteration: 15287
loss: 1.085095763206482,grad_norm: 1.0000000085620184, iteration: 15288
loss: 1.0708162784576416,grad_norm: 0.999999373161368, iteration: 15289
loss: 1.0715972185134888,grad_norm: 0.9999996805673749, iteration: 15290
loss: 1.0244369506835938,grad_norm: 0.9999991909582643, iteration: 15291
loss: 1.0407823324203491,grad_norm: 0.9999997232938908, iteration: 15292
loss: 0.9915987849235535,grad_norm: 0.9999992004539081, iteration: 15293
loss: 1.0554157495498657,grad_norm: 0.9999995342086575, iteration: 15294
loss: 1.0497645139694214,grad_norm: 0.9999990939988993, iteration: 15295
loss: 1.0213884115219116,grad_norm: 0.9999990367038553, iteration: 15296
loss: 1.0346184968948364,grad_norm: 0.9999991207411897, iteration: 15297
loss: 1.0346364974975586,grad_norm: 0.9999996320343395, iteration: 15298
loss: 1.038415551185608,grad_norm: 0.9999993809622262, iteration: 15299
loss: 1.0170907974243164,grad_norm: 0.9999991224388255, iteration: 15300
loss: 1.060930848121643,grad_norm: 0.9999996558613096, iteration: 15301
loss: 1.0005500316619873,grad_norm: 0.999999133355078, iteration: 15302
loss: 1.0518720149993896,grad_norm: 0.9999994287155403, iteration: 15303
loss: 0.9992489218711853,grad_norm: 0.9999991886785768, iteration: 15304
loss: 1.043792963027954,grad_norm: 0.9999997298913984, iteration: 15305
loss: 1.062925934791565,grad_norm: 0.9999993452576962, iteration: 15306
loss: 1.027353286743164,grad_norm: 0.9999993738540618, iteration: 15307
loss: 0.978933572769165,grad_norm: 0.9999992321371752, iteration: 15308
loss: 1.0030980110168457,grad_norm: 0.9999996545939659, iteration: 15309
loss: 1.041536569595337,grad_norm: 0.9999993692562462, iteration: 15310
loss: 1.052288293838501,grad_norm: 0.9999995175148042, iteration: 15311
loss: 1.0301687717437744,grad_norm: 0.99999964938376, iteration: 15312
loss: 1.06956148147583,grad_norm: 0.9999994910559836, iteration: 15313
loss: 1.0987029075622559,grad_norm: 0.9999992036020845, iteration: 15314
loss: 0.9691619873046875,grad_norm: 0.9999992423300026, iteration: 15315
loss: 0.9964526891708374,grad_norm: 0.9999992186440088, iteration: 15316
loss: 1.0108578205108643,grad_norm: 0.999999207255172, iteration: 15317
loss: 0.9905813336372375,grad_norm: 0.9999999274756625, iteration: 15318
loss: 1.0687642097473145,grad_norm: 0.9999993522909585, iteration: 15319
loss: 1.107210636138916,grad_norm: 0.9999996014959719, iteration: 15320
loss: 1.0079187154769897,grad_norm: 0.9999993910428389, iteration: 15321
loss: 1.0135464668273926,grad_norm: 0.9999990022375479, iteration: 15322
loss: 1.009050965309143,grad_norm: 0.9999990788537098, iteration: 15323
loss: 1.0014574527740479,grad_norm: 0.999999278362651, iteration: 15324
loss: 1.0563911199569702,grad_norm: 0.999999940937018, iteration: 15325
loss: 1.0161094665527344,grad_norm: 0.9999994252589269, iteration: 15326
loss: 0.9765302538871765,grad_norm: 0.9999995180259714, iteration: 15327
loss: 1.003045916557312,grad_norm: 0.9999990706733608, iteration: 15328
loss: 1.099555253982544,grad_norm: 0.9999997584057123, iteration: 15329
loss: 1.0557652711868286,grad_norm: 0.9999998219867833, iteration: 15330
loss: 1.1704332828521729,grad_norm: 0.9999997876641394, iteration: 15331
loss: 1.042231798171997,grad_norm: 0.9999993539484054, iteration: 15332
loss: 1.048638939857483,grad_norm: 0.981171339328918, iteration: 15333
loss: 1.0968897342681885,grad_norm: 0.9999993126557313, iteration: 15334
loss: 1.0290915966033936,grad_norm: 0.9999993040029658, iteration: 15335
loss: 1.0285934209823608,grad_norm: 0.9999993799464256, iteration: 15336
loss: 1.0741665363311768,grad_norm: 0.9999994616783432, iteration: 15337
loss: 1.0424425601959229,grad_norm: 0.9999998667910234, iteration: 15338
loss: 1.127235770225525,grad_norm: 0.9999995212233705, iteration: 15339
loss: 1.050320029258728,grad_norm: 0.9999995256402682, iteration: 15340
loss: 0.9838943481445312,grad_norm: 0.99999926051742, iteration: 15341
loss: 0.9931766390800476,grad_norm: 0.9999997633820262, iteration: 15342
loss: 0.960069477558136,grad_norm: 0.999999176949293, iteration: 15343
loss: 1.0543348789215088,grad_norm: 0.9999992381465934, iteration: 15344
loss: 1.0217773914337158,grad_norm: 0.9999998592897609, iteration: 15345
loss: 1.0399219989776611,grad_norm: 0.9999996090893265, iteration: 15346
loss: 1.0079894065856934,grad_norm: 0.9999993113207365, iteration: 15347
loss: 1.0206784009933472,grad_norm: 0.9999996198299401, iteration: 15348
loss: 1.1110738515853882,grad_norm: 0.9999995001567493, iteration: 15349
loss: 1.0367354154586792,grad_norm: 0.9999992195813524, iteration: 15350
loss: 1.0120006799697876,grad_norm: 0.9999993092667476, iteration: 15351
loss: 1.0587794780731201,grad_norm: 0.9999995693158238, iteration: 15352
loss: 1.0258891582489014,grad_norm: 0.9999992100591157, iteration: 15353
loss: 0.9749747514724731,grad_norm: 0.9999992477912806, iteration: 15354
loss: 1.0337917804718018,grad_norm: 0.9999995086549159, iteration: 15355
loss: 1.0889333486557007,grad_norm: 0.9999993891495182, iteration: 15356
loss: 1.0594018697738647,grad_norm: 0.999999557114073, iteration: 15357
loss: 1.0451442003250122,grad_norm: 0.9999992051499372, iteration: 15358
loss: 1.0227988958358765,grad_norm: 0.9999996128120154, iteration: 15359
loss: 1.0876045227050781,grad_norm: 0.999999782694754, iteration: 15360
loss: 1.0208059549331665,grad_norm: 0.9999996826115122, iteration: 15361
loss: 1.019378423690796,grad_norm: 0.9999992136190196, iteration: 15362
loss: 1.0229017734527588,grad_norm: 0.9999994845751606, iteration: 15363
loss: 0.9812299013137817,grad_norm: 0.9999990923210126, iteration: 15364
loss: 1.0570868253707886,grad_norm: 0.9999993996100717, iteration: 15365
loss: 1.0787770748138428,grad_norm: 0.9999995806367724, iteration: 15366
loss: 1.05739426612854,grad_norm: 0.9999995431937847, iteration: 15367
loss: 1.058982253074646,grad_norm: 0.9999991208792813, iteration: 15368
loss: 1.0511776208877563,grad_norm: 0.9999992276524958, iteration: 15369
loss: 1.0267775058746338,grad_norm: 0.99999926864228, iteration: 15370
loss: 1.0083659887313843,grad_norm: 0.9999992401849533, iteration: 15371
loss: 0.9598986506462097,grad_norm: 0.9999994088291014, iteration: 15372
loss: 1.0348149538040161,grad_norm: 0.999999925080058, iteration: 15373
loss: 1.0584627389907837,grad_norm: 0.9999994130596234, iteration: 15374
loss: 1.0174190998077393,grad_norm: 0.9999993482202065, iteration: 15375
loss: 0.9966526031494141,grad_norm: 0.9999992394134172, iteration: 15376
loss: 1.0384941101074219,grad_norm: 0.9999991461912668, iteration: 15377
loss: 1.0142168998718262,grad_norm: 0.9999995200472499, iteration: 15378
loss: 1.0581576824188232,grad_norm: 0.9999994608568074, iteration: 15379
loss: 0.9892643690109253,grad_norm: 0.999999280772293, iteration: 15380
loss: 1.0036356449127197,grad_norm: 0.9999993882020439, iteration: 15381
loss: 1.1172823905944824,grad_norm: 0.9999992195421573, iteration: 15382
loss: 1.0310988426208496,grad_norm: 0.9999992911542809, iteration: 15383
loss: 1.125572681427002,grad_norm: 0.9999993787630991, iteration: 15384
loss: 1.042824387550354,grad_norm: 0.9999992539217094, iteration: 15385
loss: 1.0416792631149292,grad_norm: 0.999999082124698, iteration: 15386
loss: 1.1400558948516846,grad_norm: 0.9999998910481809, iteration: 15387
loss: 0.981198787689209,grad_norm: 0.9999991538474757, iteration: 15388
loss: 1.070319652557373,grad_norm: 0.9999993671765209, iteration: 15389
loss: 1.0776445865631104,grad_norm: 0.9999992545189078, iteration: 15390
loss: 1.0381460189819336,grad_norm: 0.9999997553707182, iteration: 15391
loss: 1.00934898853302,grad_norm: 0.9999991600491683, iteration: 15392
loss: 1.0926927328109741,grad_norm: 0.999999277641875, iteration: 15393
loss: 1.0633548498153687,grad_norm: 0.9999997689896929, iteration: 15394
loss: 0.9948106408119202,grad_norm: 0.999999622999755, iteration: 15395
loss: 1.062955379486084,grad_norm: 0.9999999305734475, iteration: 15396
loss: 1.0541225671768188,grad_norm: 0.9999995371730643, iteration: 15397
loss: 1.0659407377243042,grad_norm: 0.999999367805568, iteration: 15398
loss: 1.0116699934005737,grad_norm: 0.9999992944210484, iteration: 15399
loss: 1.1188805103302002,grad_norm: 0.9999995430072314, iteration: 15400
loss: 1.0349750518798828,grad_norm: 0.9999997959032578, iteration: 15401
loss: 1.0034425258636475,grad_norm: 0.9999995811535504, iteration: 15402
loss: 1.0020767450332642,grad_norm: 0.999999115647788, iteration: 15403
loss: 1.089253306388855,grad_norm: 0.9999998241889738, iteration: 15404
loss: 1.0565528869628906,grad_norm: 0.99999944449475, iteration: 15405
loss: 1.0283992290496826,grad_norm: 0.9999990309715378, iteration: 15406
loss: 1.0396997928619385,grad_norm: 0.9999996615184479, iteration: 15407
loss: 1.055539608001709,grad_norm: 0.999999740453125, iteration: 15408
loss: 1.0556371212005615,grad_norm: 0.9999991798840788, iteration: 15409
loss: 1.0166901350021362,grad_norm: 0.9999994068416229, iteration: 15410
loss: 1.053125023841858,grad_norm: 0.9999991808019117, iteration: 15411
loss: 1.0687320232391357,grad_norm: 0.9999995647138531, iteration: 15412
loss: 1.0452524423599243,grad_norm: 0.999999295042182, iteration: 15413
loss: 1.1025996208190918,grad_norm: 0.9999997685454516, iteration: 15414
loss: 1.0409001111984253,grad_norm: 0.9999993479716458, iteration: 15415
loss: 1.0423352718353271,grad_norm: 0.9999992646871753, iteration: 15416
loss: 0.9804226160049438,grad_norm: 0.9999992400958292, iteration: 15417
loss: 1.0221370458602905,grad_norm: 0.9999992267915153, iteration: 15418
loss: 1.0316892862319946,grad_norm: 0.999999250969018, iteration: 15419
loss: 1.0198606252670288,grad_norm: 0.999998991890182, iteration: 15420
loss: 1.0618298053741455,grad_norm: 0.9999998956472275, iteration: 15421
loss: 0.978424608707428,grad_norm: 0.9398019436897956, iteration: 15422
loss: 1.08683443069458,grad_norm: 0.9999994953257089, iteration: 15423
loss: 1.039899230003357,grad_norm: 0.9999992195253575, iteration: 15424
loss: 1.0237009525299072,grad_norm: 0.9999994176912684, iteration: 15425
loss: 1.0204131603240967,grad_norm: 0.9999996746316495, iteration: 15426
loss: 1.169277310371399,grad_norm: 0.9999996925951946, iteration: 15427
loss: 1.1140319108963013,grad_norm: 0.9999996510593704, iteration: 15428
loss: 0.9795169830322266,grad_norm: 0.9999991075749474, iteration: 15429
loss: 1.094565987586975,grad_norm: 0.9999997397062217, iteration: 15430
loss: 1.0315155982971191,grad_norm: 0.9999991388397987, iteration: 15431
loss: 1.0424565076828003,grad_norm: 0.9999991406885967, iteration: 15432
loss: 0.987106442451477,grad_norm: 0.9999993296955427, iteration: 15433
loss: 1.0401606559753418,grad_norm: 0.9999990893912921, iteration: 15434
loss: 1.0206811428070068,grad_norm: 0.9999991400512507, iteration: 15435
loss: 1.0299607515335083,grad_norm: 0.9999990396369525, iteration: 15436
loss: 1.0353456735610962,grad_norm: 0.9999992179656884, iteration: 15437
loss: 1.043506145477295,grad_norm: 0.9937936588885344, iteration: 15438
loss: 1.0406421422958374,grad_norm: 0.9999995554372733, iteration: 15439
loss: 1.0853471755981445,grad_norm: 0.9999996536139935, iteration: 15440
loss: 0.9715890288352966,grad_norm: 0.9999992178299122, iteration: 15441
loss: 1.005454421043396,grad_norm: 0.9999993052422448, iteration: 15442
loss: 0.9766468405723572,grad_norm: 0.9999991458386748, iteration: 15443
loss: 1.017133116722107,grad_norm: 0.9999990395251745, iteration: 15444
loss: 1.0785592794418335,grad_norm: 0.9999992132475217, iteration: 15445
loss: 0.9770665168762207,grad_norm: 0.9999991607231136, iteration: 15446
loss: 1.0049324035644531,grad_norm: 0.9999990235651341, iteration: 15447
loss: 1.0606541633605957,grad_norm: 0.9999998184493807, iteration: 15448
loss: 1.0427037477493286,grad_norm: 0.9999995646745607, iteration: 15449
loss: 1.065700650215149,grad_norm: 0.9999991520599436, iteration: 15450
loss: 1.025612711906433,grad_norm: 0.9999992627372172, iteration: 15451
loss: 1.0190476179122925,grad_norm: 0.999999179624363, iteration: 15452
loss: 1.0247715711593628,grad_norm: 0.999999418399284, iteration: 15453
loss: 1.0794014930725098,grad_norm: 0.9999992402830116, iteration: 15454
loss: 0.983443558216095,grad_norm: 0.9999990819543787, iteration: 15455
loss: 1.02803373336792,grad_norm: 0.9999991964373804, iteration: 15456
loss: 0.9887821078300476,grad_norm: 0.999999332386512, iteration: 15457
loss: 1.0518841743469238,grad_norm: 0.9999998049251205, iteration: 15458
loss: 1.0163642168045044,grad_norm: 0.9999991868969333, iteration: 15459
loss: 1.024150013923645,grad_norm: 0.9999991026934236, iteration: 15460
loss: 1.0153173208236694,grad_norm: 0.9999989874392695, iteration: 15461
loss: 1.0190308094024658,grad_norm: 0.9999990376257114, iteration: 15462
loss: 1.0537281036376953,grad_norm: 0.9999991838043176, iteration: 15463
loss: 1.0151301622390747,grad_norm: 0.9999991519151155, iteration: 15464
loss: 1.0019257068634033,grad_norm: 0.9999990155900355, iteration: 15465
loss: 1.0379793643951416,grad_norm: 0.9999993180897647, iteration: 15466
loss: 1.0734761953353882,grad_norm: 0.9999996422609261, iteration: 15467
loss: 0.991697371006012,grad_norm: 0.9999989745426869, iteration: 15468
loss: 1.0355240106582642,grad_norm: 0.9999996593617916, iteration: 15469
loss: 0.9722120761871338,grad_norm: 0.999999173562369, iteration: 15470
loss: 1.0008628368377686,grad_norm: 0.9999993723631911, iteration: 15471
loss: 0.9816555380821228,grad_norm: 0.9999993561291012, iteration: 15472
loss: 1.0073636770248413,grad_norm: 0.9999992523320509, iteration: 15473
loss: 1.118482232093811,grad_norm: 0.9999994810471976, iteration: 15474
loss: 1.0962001085281372,grad_norm: 0.9999997114947506, iteration: 15475
loss: 1.0464720726013184,grad_norm: 0.9999991359489036, iteration: 15476
loss: 0.9992515444755554,grad_norm: 0.9999991993997098, iteration: 15477
loss: 1.017568588256836,grad_norm: 0.9999990877176064, iteration: 15478
loss: 1.0435127019882202,grad_norm: 0.9999997925994186, iteration: 15479
loss: 1.0783847570419312,grad_norm: 0.9999991095224497, iteration: 15480
loss: 0.9752597808837891,grad_norm: 0.9999992515514812, iteration: 15481
loss: 0.9988641738891602,grad_norm: 0.9999993055357626, iteration: 15482
loss: 1.0592092275619507,grad_norm: 0.9999996459759692, iteration: 15483
loss: 1.0222111940383911,grad_norm: 0.9999991717674152, iteration: 15484
loss: 1.0506598949432373,grad_norm: 0.9999990507085234, iteration: 15485
loss: 1.0329991579055786,grad_norm: 0.9999991816975436, iteration: 15486
loss: 0.9943565130233765,grad_norm: 0.9999992069733026, iteration: 15487
loss: 0.9888224601745605,grad_norm: 0.9999992253018345, iteration: 15488
loss: 1.0719547271728516,grad_norm: 0.999998993368329, iteration: 15489
loss: 1.0403324365615845,grad_norm: 0.999999171361161, iteration: 15490
loss: 1.046182632446289,grad_norm: 0.9999991780507757, iteration: 15491
loss: 1.0055148601531982,grad_norm: 0.9999991263932039, iteration: 15492
loss: 1.026934027671814,grad_norm: 0.9978661767069071, iteration: 15493
loss: 1.023118019104004,grad_norm: 0.9999992783277591, iteration: 15494
loss: 1.0231540203094482,grad_norm: 0.9999991103336926, iteration: 15495
loss: 1.028123378753662,grad_norm: 0.9999991357003977, iteration: 15496
loss: 1.022162675857544,grad_norm: 0.9999992756233852, iteration: 15497
loss: 1.0543746948242188,grad_norm: 0.9999992217984064, iteration: 15498
loss: 1.0352363586425781,grad_norm: 0.9999991582186272, iteration: 15499
loss: 0.9924634695053101,grad_norm: 0.976086489249606, iteration: 15500
loss: 1.040995478630066,grad_norm: 0.9999994041209598, iteration: 15501
loss: 1.0626026391983032,grad_norm: 0.9999994081001428, iteration: 15502
loss: 1.0533174276351929,grad_norm: 0.9999997255939199, iteration: 15503
loss: 1.0835288763046265,grad_norm: 1.000000011589752, iteration: 15504
loss: 1.0589333772659302,grad_norm: 0.9999994826577986, iteration: 15505
loss: 1.0190038681030273,grad_norm: 0.9999991501122306, iteration: 15506
loss: 1.0441874265670776,grad_norm: 0.9999993564241278, iteration: 15507
loss: 1.053951382637024,grad_norm: 0.9999990152102387, iteration: 15508
loss: 1.0445393323898315,grad_norm: 0.9261455426722532, iteration: 15509
loss: 1.0945018529891968,grad_norm: 0.9999997724973765, iteration: 15510
loss: 1.0435878038406372,grad_norm: 0.9999992024574483, iteration: 15511
loss: 1.0136884450912476,grad_norm: 0.9999991426463078, iteration: 15512
loss: 1.038535237312317,grad_norm: 0.9999991582752227, iteration: 15513
loss: 1.0426936149597168,grad_norm: 0.9999992608758799, iteration: 15514
loss: 0.9747446179389954,grad_norm: 0.8923112020242143, iteration: 15515
loss: 1.038330078125,grad_norm: 0.999999089321859, iteration: 15516
loss: 1.039258360862732,grad_norm: 0.99999905250995, iteration: 15517
loss: 1.0223255157470703,grad_norm: 0.9999993517205872, iteration: 15518
loss: 1.047336459159851,grad_norm: 0.9503675233810518, iteration: 15519
loss: 1.0569653511047363,grad_norm: 0.999999415883424, iteration: 15520
loss: 1.0344723463058472,grad_norm: 0.999999589523485, iteration: 15521
loss: 1.0268937349319458,grad_norm: 0.999999117970493, iteration: 15522
loss: 1.073920488357544,grad_norm: 0.9999994806054059, iteration: 15523
loss: 1.0644326210021973,grad_norm: 0.999999466874931, iteration: 15524
loss: 1.0120131969451904,grad_norm: 0.9999993749222047, iteration: 15525
loss: 1.0760201215744019,grad_norm: 0.9999992904728723, iteration: 15526
loss: 0.9916362762451172,grad_norm: 0.9999992743717407, iteration: 15527
loss: 1.0249394178390503,grad_norm: 0.9999991772840343, iteration: 15528
loss: 1.0062450170516968,grad_norm: 0.9999991218480573, iteration: 15529
loss: 1.0144565105438232,grad_norm: 0.9866192951451421, iteration: 15530
loss: 1.0962550640106201,grad_norm: 0.9999999412085777, iteration: 15531
loss: 1.0014041662216187,grad_norm: 0.9999996041546911, iteration: 15532
loss: 1.0482354164123535,grad_norm: 0.999999412996795, iteration: 15533
loss: 0.9887182116508484,grad_norm: 0.9999990597740056, iteration: 15534
loss: 1.0551648139953613,grad_norm: 0.999999179763401, iteration: 15535
loss: 1.0340980291366577,grad_norm: 0.9999991807579197, iteration: 15536
loss: 1.0059758424758911,grad_norm: 0.9999995762155349, iteration: 15537
loss: 1.006767988204956,grad_norm: 0.9999991892390041, iteration: 15538
loss: 1.0494325160980225,grad_norm: 0.9999992630981988, iteration: 15539
loss: 1.0255495309829712,grad_norm: 0.9345662872597422, iteration: 15540
loss: 1.023352861404419,grad_norm: 0.9999992261726934, iteration: 15541
loss: 1.2338119745254517,grad_norm: 0.9999998396187028, iteration: 15542
loss: 1.1133404970169067,grad_norm: 0.9999996864888904, iteration: 15543
loss: 1.0349122285842896,grad_norm: 0.9999990505553428, iteration: 15544
loss: 1.0638976097106934,grad_norm: 0.9999994786522213, iteration: 15545
loss: 0.9902820587158203,grad_norm: 0.9999992594690398, iteration: 15546
loss: 1.0321513414382935,grad_norm: 0.9915182789023744, iteration: 15547
loss: 0.9999229907989502,grad_norm: 0.9999990702777263, iteration: 15548
loss: 1.0160750150680542,grad_norm: 0.9999994379944475, iteration: 15549
loss: 1.0350507497787476,grad_norm: 0.9999991941578329, iteration: 15550
loss: 0.99257892370224,grad_norm: 0.9999990950276403, iteration: 15551
loss: 1.0110498666763306,grad_norm: 0.9999992526084761, iteration: 15552
loss: 1.053244709968567,grad_norm: 0.9999992375747462, iteration: 15553
loss: 1.0043463706970215,grad_norm: 0.9999990314265917, iteration: 15554
loss: 1.0001686811447144,grad_norm: 0.9999993494296963, iteration: 15555
loss: 1.0068111419677734,grad_norm: 0.9999995397528867, iteration: 15556
loss: 1.0451081991195679,grad_norm: 0.9999991187052931, iteration: 15557
loss: 1.0710649490356445,grad_norm: 0.9999996652229378, iteration: 15558
loss: 0.9908214807510376,grad_norm: 0.9520363613139744, iteration: 15559
loss: 0.9556307792663574,grad_norm: 0.9999991573260607, iteration: 15560
loss: 1.0343055725097656,grad_norm: 0.9999995436223116, iteration: 15561
loss: 1.0224798917770386,grad_norm: 0.9999995184767331, iteration: 15562
loss: 1.0287998914718628,grad_norm: 0.9400002245807394, iteration: 15563
loss: 0.9879044890403748,grad_norm: 0.9999991179261767, iteration: 15564
loss: 1.0086603164672852,grad_norm: 0.9999993179213992, iteration: 15565
loss: 1.05307936668396,grad_norm: 0.9999996776040097, iteration: 15566
loss: 1.0396137237548828,grad_norm: 0.9999996537990365, iteration: 15567
loss: 0.9963916540145874,grad_norm: 0.999999321886571, iteration: 15568
loss: 1.0423702001571655,grad_norm: 0.9999992133483906, iteration: 15569
loss: 1.0605418682098389,grad_norm: 0.999999341715539, iteration: 15570
loss: 1.145129919052124,grad_norm: 0.9999999264017505, iteration: 15571
loss: 1.0316603183746338,grad_norm: 0.9999991141187607, iteration: 15572
loss: 1.0580307245254517,grad_norm: 0.999999115430489, iteration: 15573
loss: 1.0194058418273926,grad_norm: 0.9999991760217674, iteration: 15574
loss: 0.9926503896713257,grad_norm: 0.9999993753098656, iteration: 15575
loss: 1.0574462413787842,grad_norm: 0.9999991910754727, iteration: 15576
loss: 1.0099058151245117,grad_norm: 0.9999992309562824, iteration: 15577
loss: 0.9980261921882629,grad_norm: 0.9999990277204116, iteration: 15578
loss: 1.1062103509902954,grad_norm: 0.99999958871094, iteration: 15579
loss: 1.0106704235076904,grad_norm: 0.9999990898004575, iteration: 15580
loss: 1.0510939359664917,grad_norm: 0.9999994223819793, iteration: 15581
loss: 1.0499426126480103,grad_norm: 0.9999992044476078, iteration: 15582
loss: 1.0741745233535767,grad_norm: 0.9999995468261849, iteration: 15583
loss: 1.0553691387176514,grad_norm: 0.999999147913267, iteration: 15584
loss: 1.0656301975250244,grad_norm: 0.9999997904851294, iteration: 15585
loss: 0.9889481663703918,grad_norm: 0.9999991898315697, iteration: 15586
loss: 1.0521838665008545,grad_norm: 0.9999994340145363, iteration: 15587
loss: 1.0111078023910522,grad_norm: 0.9999991253554372, iteration: 15588
loss: 1.0525051355361938,grad_norm: 0.9999995146530362, iteration: 15589
loss: 1.0283135175704956,grad_norm: 0.9999993769916377, iteration: 15590
loss: 1.031667947769165,grad_norm: 0.9999992249586845, iteration: 15591
loss: 1.1238045692443848,grad_norm: 0.9999997847917049, iteration: 15592
loss: 1.0933070182800293,grad_norm: 0.9999996310638527, iteration: 15593
loss: 1.0074050426483154,grad_norm: 0.9999993304742141, iteration: 15594
loss: 1.0094062089920044,grad_norm: 0.8660702866320412, iteration: 15595
loss: 1.0144500732421875,grad_norm: 0.9999989998363866, iteration: 15596
loss: 1.0505127906799316,grad_norm: 0.9999993058306943, iteration: 15597
loss: 1.0112279653549194,grad_norm: 0.9999993109932598, iteration: 15598
loss: 0.9902237057685852,grad_norm: 0.9999992687501217, iteration: 15599
loss: 1.0824763774871826,grad_norm: 0.9999995400293383, iteration: 15600
loss: 1.0557730197906494,grad_norm: 0.9999994800267743, iteration: 15601
loss: 1.0119112730026245,grad_norm: 0.999999777113019, iteration: 15602
loss: 0.9897946715354919,grad_norm: 0.9999992452810934, iteration: 15603
loss: 1.0694501399993896,grad_norm: 0.9999998669748816, iteration: 15604
loss: 1.0443129539489746,grad_norm: 0.9942696841515436, iteration: 15605
loss: 0.993196427822113,grad_norm: 0.9999994191692726, iteration: 15606
loss: 1.0816580057144165,grad_norm: 0.9999991666234441, iteration: 15607
loss: 1.0410913228988647,grad_norm: 0.9999989928623276, iteration: 15608
loss: 0.996967077255249,grad_norm: 0.9999991645596893, iteration: 15609
loss: 1.049536943435669,grad_norm: 0.9999996109735944, iteration: 15610
loss: 0.997373104095459,grad_norm: 0.9281497827267061, iteration: 15611
loss: 1.0438575744628906,grad_norm: 0.9999992276224471, iteration: 15612
loss: 1.0534249544143677,grad_norm: 0.9999994595764359, iteration: 15613
loss: 1.0294653177261353,grad_norm: 0.9999993964694208, iteration: 15614
loss: 0.9810265898704529,grad_norm: 0.9999990861320752, iteration: 15615
loss: 1.0003726482391357,grad_norm: 0.9999994114254052, iteration: 15616
loss: 1.052804708480835,grad_norm: 0.9999992804712653, iteration: 15617
loss: 1.0077577829360962,grad_norm: 0.9999993668429097, iteration: 15618
loss: 0.9891058802604675,grad_norm: 0.9087793500042054, iteration: 15619
loss: 1.0684322118759155,grad_norm: 0.9999993751344075, iteration: 15620
loss: 1.0639036893844604,grad_norm: 0.999999463167616, iteration: 15621
loss: 1.0441569089889526,grad_norm: 0.9999996742589119, iteration: 15622
loss: 1.1236573457717896,grad_norm: 0.9999997011315422, iteration: 15623
loss: 1.012171983718872,grad_norm: 0.9999992607411502, iteration: 15624
loss: 1.0187458992004395,grad_norm: 0.9999995401463727, iteration: 15625
loss: 1.0702061653137207,grad_norm: 0.9999998137240561, iteration: 15626
loss: 1.038590908050537,grad_norm: 0.999999094168699, iteration: 15627
loss: 1.044836401939392,grad_norm: 0.9999992512015532, iteration: 15628
loss: 0.969226598739624,grad_norm: 0.9999996291002929, iteration: 15629
loss: 0.9916477203369141,grad_norm: 0.8986473609776242, iteration: 15630
loss: 0.999902606010437,grad_norm: 0.9999992237701238, iteration: 15631
loss: 1.0638893842697144,grad_norm: 0.9999990770027012, iteration: 15632
loss: 1.0103178024291992,grad_norm: 0.9999991270802366, iteration: 15633
loss: 0.9864274859428406,grad_norm: 0.9999997655109214, iteration: 15634
loss: 1.0235265493392944,grad_norm: 0.9999991671158036, iteration: 15635
loss: 0.982962429523468,grad_norm: 0.9999994770374504, iteration: 15636
loss: 0.9910603165626526,grad_norm: 0.9999992814055382, iteration: 15637
loss: 1.0686792135238647,grad_norm: 0.9999996725394943, iteration: 15638
loss: 1.0623317956924438,grad_norm: 0.9999994612156731, iteration: 15639
loss: 1.0494416952133179,grad_norm: 0.9999996058058235, iteration: 15640
loss: 1.045405387878418,grad_norm: 0.9999996493072535, iteration: 15641
loss: 1.0790722370147705,grad_norm: 0.999999686829718, iteration: 15642
loss: 1.0178430080413818,grad_norm: 0.9999990765017904, iteration: 15643
loss: 0.982685923576355,grad_norm: 0.9999991789722863, iteration: 15644
loss: 0.9971952438354492,grad_norm: 0.9999992289529709, iteration: 15645
loss: 0.9942804574966431,grad_norm: 0.9999991914796321, iteration: 15646
loss: 1.0318069458007812,grad_norm: 0.9999992497519008, iteration: 15647
loss: 1.0726057291030884,grad_norm: 0.9999990209238171, iteration: 15648
loss: 1.0919159650802612,grad_norm: 0.9999993458045766, iteration: 15649
loss: 1.0412629842758179,grad_norm: 0.9999990129529105, iteration: 15650
loss: 1.0442492961883545,grad_norm: 0.9999996713863354, iteration: 15651
loss: 1.0020716190338135,grad_norm: 0.9999990901555842, iteration: 15652
loss: 1.0277035236358643,grad_norm: 0.9999995709370746, iteration: 15653
loss: 1.0284643173217773,grad_norm: 0.9999991529467761, iteration: 15654
loss: 1.080696940422058,grad_norm: 0.9999993752128149, iteration: 15655
loss: 1.0067058801651,grad_norm: 0.999999770249791, iteration: 15656
loss: 1.0315423011779785,grad_norm: 0.9999997699604689, iteration: 15657
loss: 0.9565877318382263,grad_norm: 0.9999992697020612, iteration: 15658
loss: 1.0127348899841309,grad_norm: 0.999999160788138, iteration: 15659
loss: 1.0404802560806274,grad_norm: 0.9999997043064736, iteration: 15660
loss: 1.0395078659057617,grad_norm: 0.9999993131210709, iteration: 15661
loss: 1.033122181892395,grad_norm: 0.9999998505238376, iteration: 15662
loss: 1.008530855178833,grad_norm: 0.9999991028655124, iteration: 15663
loss: 1.0280675888061523,grad_norm: 0.9970837334759902, iteration: 15664
loss: 1.024039387702942,grad_norm: 0.9999990225219636, iteration: 15665
loss: 1.0014772415161133,grad_norm: 0.9999991750312794, iteration: 15666
loss: 0.9741092920303345,grad_norm: 0.999999185885724, iteration: 15667
loss: 1.0679391622543335,grad_norm: 0.9999995006634127, iteration: 15668
loss: 0.985137939453125,grad_norm: 0.9999991208700486, iteration: 15669
loss: 1.046041488647461,grad_norm: 0.99999925973864, iteration: 15670
loss: 1.0385404825210571,grad_norm: 0.9999996260104457, iteration: 15671
loss: 1.0260794162750244,grad_norm: 0.9999992157826428, iteration: 15672
loss: 1.0682191848754883,grad_norm: 0.9999994900918384, iteration: 15673
loss: 1.0223863124847412,grad_norm: 0.9999991422420582, iteration: 15674
loss: 1.019262671470642,grad_norm: 0.9999992955891464, iteration: 15675
loss: 1.0159313678741455,grad_norm: 0.9999991458194397, iteration: 15676
loss: 1.0328410863876343,grad_norm: 0.9999994574378269, iteration: 15677
loss: 1.0054175853729248,grad_norm: 0.999999523407468, iteration: 15678
loss: 1.0329631567001343,grad_norm: 0.9999991184372605, iteration: 15679
loss: 1.0618696212768555,grad_norm: 0.9999991689131655, iteration: 15680
loss: 0.9995161890983582,grad_norm: 0.9999993047225259, iteration: 15681
loss: 1.0148563385009766,grad_norm: 0.9999991578206228, iteration: 15682
loss: 1.0238256454467773,grad_norm: 0.9999991450557741, iteration: 15683
loss: 1.0213310718536377,grad_norm: 0.9999991214100983, iteration: 15684
loss: 1.0632044076919556,grad_norm: 0.9999995251005434, iteration: 15685
loss: 1.067533016204834,grad_norm: 0.9999996913369714, iteration: 15686
loss: 1.057371735572815,grad_norm: 0.9999994980572865, iteration: 15687
loss: 1.0339105129241943,grad_norm: 0.9999997101324247, iteration: 15688
loss: 1.0207390785217285,grad_norm: 0.9999994951899882, iteration: 15689
loss: 1.0186219215393066,grad_norm: 0.9999994953968191, iteration: 15690
loss: 1.0799071788787842,grad_norm: 0.9999992562825458, iteration: 15691
loss: 0.9982671737670898,grad_norm: 0.9999991754050513, iteration: 15692
loss: 1.0916637182235718,grad_norm: 0.9999995668839642, iteration: 15693
loss: 1.0290032625198364,grad_norm: 0.9999991189148755, iteration: 15694
loss: 1.0254075527191162,grad_norm: 0.9999990355984036, iteration: 15695
loss: 1.0276532173156738,grad_norm: 0.9999993713146744, iteration: 15696
loss: 1.029608130455017,grad_norm: 0.9999996357497061, iteration: 15697
loss: 1.0788880586624146,grad_norm: 0.999999379959376, iteration: 15698
loss: 1.047516942024231,grad_norm: 0.9999990342036207, iteration: 15699
loss: 1.0771005153656006,grad_norm: 0.9999992062180013, iteration: 15700
loss: 1.006535291671753,grad_norm: 0.9999991943253045, iteration: 15701
loss: 1.003989577293396,grad_norm: 0.991342959645647, iteration: 15702
loss: 1.034449577331543,grad_norm: 0.9999992777461806, iteration: 15703
loss: 1.0170162916183472,grad_norm: 0.9999994184393919, iteration: 15704
loss: 0.9990385174751282,grad_norm: 0.999999126578975, iteration: 15705
loss: 1.0586137771606445,grad_norm: 0.9999992329131475, iteration: 15706
loss: 1.081178903579712,grad_norm: 0.9999996966348388, iteration: 15707
loss: 1.0364065170288086,grad_norm: 0.9999994952991021, iteration: 15708
loss: 1.040291428565979,grad_norm: 0.9999996334748714, iteration: 15709
loss: 1.000428557395935,grad_norm: 0.9999994693676841, iteration: 15710
loss: 1.023638367652893,grad_norm: 0.9999990554075701, iteration: 15711
loss: 1.0944029092788696,grad_norm: 0.9999998738067005, iteration: 15712
loss: 1.028700828552246,grad_norm: 0.9999991052813559, iteration: 15713
loss: 1.0054985284805298,grad_norm: 0.9999990158843323, iteration: 15714
loss: 1.0370591878890991,grad_norm: 0.9999997287260826, iteration: 15715
loss: 1.0113632678985596,grad_norm: 0.9999993368962649, iteration: 15716
loss: 1.0385737419128418,grad_norm: 0.9999991986859509, iteration: 15717
loss: 0.9899968504905701,grad_norm: 0.9999992594062378, iteration: 15718
loss: 1.0601317882537842,grad_norm: 0.9999994766409157, iteration: 15719
loss: 1.0396839380264282,grad_norm: 0.9999991498711528, iteration: 15720
loss: 1.000880479812622,grad_norm: 0.9999991967641568, iteration: 15721
loss: 0.9691071510314941,grad_norm: 0.9999990674521728, iteration: 15722
loss: 0.9695802330970764,grad_norm: 0.999999026851845, iteration: 15723
loss: 0.9783300161361694,grad_norm: 0.9999991260475796, iteration: 15724
loss: 0.9968796968460083,grad_norm: 0.9999991851615175, iteration: 15725
loss: 1.0793390274047852,grad_norm: 0.9999997930054069, iteration: 15726
loss: 1.0619184970855713,grad_norm: 0.9999993567728721, iteration: 15727
loss: 1.0537917613983154,grad_norm: 0.999999780454011, iteration: 15728
loss: 1.1420907974243164,grad_norm: 0.9999998510616014, iteration: 15729
loss: 1.0589288473129272,grad_norm: 0.9999992097186648, iteration: 15730
loss: 1.0450996160507202,grad_norm: 0.9999992334109344, iteration: 15731
loss: 1.0020772218704224,grad_norm: 0.9999990864308117, iteration: 15732
loss: 1.029881238937378,grad_norm: 0.9999991897884912, iteration: 15733
loss: 0.9950497150421143,grad_norm: 0.9999993430000799, iteration: 15734
loss: 1.048822283744812,grad_norm: 0.999999413107178, iteration: 15735
loss: 1.0389987230300903,grad_norm: 0.9999994288586511, iteration: 15736
loss: 0.936737060546875,grad_norm: 0.9999992828739295, iteration: 15737
loss: 0.9634393453598022,grad_norm: 0.9999991325570621, iteration: 15738
loss: 1.0513079166412354,grad_norm: 0.9999997346221845, iteration: 15739
loss: 1.0429767370224,grad_norm: 0.9999993501509277, iteration: 15740
loss: 0.9859695434570312,grad_norm: 0.9999990699903002, iteration: 15741
loss: 0.9810682535171509,grad_norm: 0.9999992095986637, iteration: 15742
loss: 1.0315601825714111,grad_norm: 0.9999993061873163, iteration: 15743
loss: 1.0168429613113403,grad_norm: 0.9999997080484616, iteration: 15744
loss: 1.0328689813613892,grad_norm: 0.9999993725415844, iteration: 15745
loss: 0.9726672768592834,grad_norm: 0.9999993331389269, iteration: 15746
loss: 1.0188723802566528,grad_norm: 0.9999994151051377, iteration: 15747
loss: 1.0395302772521973,grad_norm: 0.9999992950159906, iteration: 15748
loss: 1.0025080442428589,grad_norm: 0.9999992620622112, iteration: 15749
loss: 1.0610660314559937,grad_norm: 0.9999990029201337, iteration: 15750
loss: 1.0659170150756836,grad_norm: 0.9999993896987569, iteration: 15751
loss: 1.0382437705993652,grad_norm: 0.999999441903624, iteration: 15752
loss: 1.0143482685089111,grad_norm: 0.9999997071702946, iteration: 15753
loss: 1.034461498260498,grad_norm: 0.9999991203208916, iteration: 15754
loss: 1.0242284536361694,grad_norm: 0.9999992399658166, iteration: 15755
loss: 1.090800166130066,grad_norm: 0.9999993005570996, iteration: 15756
loss: 1.120978593826294,grad_norm: 0.9999998274822005, iteration: 15757
loss: 1.0082727670669556,grad_norm: 0.9999992752087503, iteration: 15758
loss: 1.003421664237976,grad_norm: 0.9999992176432807, iteration: 15759
loss: 1.0018270015716553,grad_norm: 0.9999990901250319, iteration: 15760
loss: 1.0147008895874023,grad_norm: 0.9999996509116572, iteration: 15761
loss: 1.0421607494354248,grad_norm: 0.9999990960366155, iteration: 15762
loss: 1.0210696458816528,grad_norm: 0.9999992081011035, iteration: 15763
loss: 1.029126763343811,grad_norm: 0.9999991558068575, iteration: 15764
loss: 1.0904345512390137,grad_norm: 0.9999992644517978, iteration: 15765
loss: 1.0268632173538208,grad_norm: 0.9999993193944059, iteration: 15766
loss: 1.0192813873291016,grad_norm: 0.9999991520213967, iteration: 15767
loss: 1.0643019676208496,grad_norm: 0.999999236072532, iteration: 15768
loss: 1.035672664642334,grad_norm: 0.9999992046297839, iteration: 15769
loss: 1.0259544849395752,grad_norm: 0.9999996167795819, iteration: 15770
loss: 1.0006077289581299,grad_norm: 0.9999992430132956, iteration: 15771
loss: 1.0404750108718872,grad_norm: 0.9995273572738563, iteration: 15772
loss: 1.0503885746002197,grad_norm: 0.9999996614991006, iteration: 15773
loss: 1.0420476198196411,grad_norm: 0.9999993475542475, iteration: 15774
loss: 1.017208456993103,grad_norm: 0.9808395306577212, iteration: 15775
loss: 1.0241440534591675,grad_norm: 0.9999992914361202, iteration: 15776
loss: 0.9880611300468445,grad_norm: 0.9999991115811684, iteration: 15777
loss: 0.9804722666740417,grad_norm: 0.9999991281756473, iteration: 15778
loss: 1.026509404182434,grad_norm: 0.9999991586726431, iteration: 15779
loss: 1.0042349100112915,grad_norm: 0.9999991675761627, iteration: 15780
loss: 1.0183510780334473,grad_norm: 0.9999991852510904, iteration: 15781
loss: 0.996788740158081,grad_norm: 0.999999311228685, iteration: 15782
loss: 1.0330344438552856,grad_norm: 0.9999991323144973, iteration: 15783
loss: 0.9885363578796387,grad_norm: 0.9999995105247039, iteration: 15784
loss: 1.0239431858062744,grad_norm: 0.9999992521084948, iteration: 15785
loss: 0.9565686583518982,grad_norm: 0.9999991623905028, iteration: 15786
loss: 1.0148289203643799,grad_norm: 0.9999992224196839, iteration: 15787
loss: 1.0095090866088867,grad_norm: 0.9999991793201457, iteration: 15788
loss: 1.0059633255004883,grad_norm: 0.9999990444416087, iteration: 15789
loss: 1.0276795625686646,grad_norm: 0.9999992197675588, iteration: 15790
loss: 1.2991359233856201,grad_norm: 0.9999999521340613, iteration: 15791
loss: 1.0360640287399292,grad_norm: 0.9999995373781142, iteration: 15792
loss: 1.0327038764953613,grad_norm: 0.9999991288471289, iteration: 15793
loss: 1.0423645973205566,grad_norm: 0.9999992014263012, iteration: 15794
loss: 1.0704649686813354,grad_norm: 0.9999992935878657, iteration: 15795
loss: 1.0056105852127075,grad_norm: 0.9999994014014749, iteration: 15796
loss: 0.9713106751441956,grad_norm: 0.9229493253385835, iteration: 15797
loss: 1.0493457317352295,grad_norm: 0.9999991200303394, iteration: 15798
loss: 0.9778187274932861,grad_norm: 0.9999991657398217, iteration: 15799
loss: 1.0334734916687012,grad_norm: 0.9999991289539149, iteration: 15800
loss: 1.0025267601013184,grad_norm: 0.9943960945371263, iteration: 15801
loss: 1.0081537961959839,grad_norm: 0.9999990551771646, iteration: 15802
loss: 0.9782345294952393,grad_norm: 0.9999990000919412, iteration: 15803
loss: 0.9994545578956604,grad_norm: 0.9999990507970578, iteration: 15804
loss: 0.9831892848014832,grad_norm: 0.9999992446401768, iteration: 15805
loss: 1.0268634557724,grad_norm: 0.9999998196399079, iteration: 15806
loss: 1.0202127695083618,grad_norm: 0.9999992073889308, iteration: 15807
loss: 1.0221461057662964,grad_norm: 0.9913599183644441, iteration: 15808
loss: 0.9791982769966125,grad_norm: 0.9999991488278, iteration: 15809
loss: 1.0601731538772583,grad_norm: 0.9999991714162321, iteration: 15810
loss: 0.986272931098938,grad_norm: 0.9999992331167217, iteration: 15811
loss: 0.9925183057785034,grad_norm: 0.9999990642712052, iteration: 15812
loss: 1.059333086013794,grad_norm: 0.9999992376241879, iteration: 15813
loss: 1.031283974647522,grad_norm: 0.9999992409788346, iteration: 15814
loss: 1.0422683954238892,grad_norm: 0.9999992744680924, iteration: 15815
loss: 1.0230739116668701,grad_norm: 0.9999992545322528, iteration: 15816
loss: 1.0618693828582764,grad_norm: 0.9999993546619003, iteration: 15817
loss: 1.0849206447601318,grad_norm: 0.9999993464974446, iteration: 15818
loss: 1.0373377799987793,grad_norm: 0.9999995988866556, iteration: 15819
loss: 1.0314815044403076,grad_norm: 0.9999991111009389, iteration: 15820
loss: 1.0283966064453125,grad_norm: 0.999999162679565, iteration: 15821
loss: 1.0668883323669434,grad_norm: 0.9999994395839309, iteration: 15822
loss: 1.0195074081420898,grad_norm: 0.9999992145557355, iteration: 15823
loss: 1.0656046867370605,grad_norm: 0.9999996293473163, iteration: 15824
loss: 1.0554254055023193,grad_norm: 0.9999994554251652, iteration: 15825
loss: 1.0478779077529907,grad_norm: 0.9999996045628572, iteration: 15826
loss: 1.085524559020996,grad_norm: 0.9999993028713887, iteration: 15827
loss: 1.0551340579986572,grad_norm: 0.9999993615099306, iteration: 15828
loss: 1.0680538415908813,grad_norm: 0.9999995466106104, iteration: 15829
loss: 1.107012152671814,grad_norm: 0.9999993620819292, iteration: 15830
loss: 1.0271165370941162,grad_norm: 0.999999221287273, iteration: 15831
loss: 0.9961026906967163,grad_norm: 0.9981900818009941, iteration: 15832
loss: 0.9910897016525269,grad_norm: 0.9999993195387857, iteration: 15833
loss: 1.1128326654434204,grad_norm: 0.9999999697327557, iteration: 15834
loss: 1.0112853050231934,grad_norm: 0.9999993573673106, iteration: 15835
loss: 0.9732009768486023,grad_norm: 0.999999367686833, iteration: 15836
loss: 1.0285121202468872,grad_norm: 0.9999995951072106, iteration: 15837
loss: 1.0367794036865234,grad_norm: 0.9999993307650289, iteration: 15838
loss: 1.022840976715088,grad_norm: 0.9999991628700236, iteration: 15839
loss: 0.9888003468513489,grad_norm: 0.9230594636262809, iteration: 15840
loss: 1.0039616823196411,grad_norm: 0.9999991515997462, iteration: 15841
loss: 1.0220059156417847,grad_norm: 0.9999995775585137, iteration: 15842
loss: 0.9904918074607849,grad_norm: 0.9999992592591052, iteration: 15843
loss: 1.0056899785995483,grad_norm: 0.9857419230373048, iteration: 15844
loss: 1.042358636856079,grad_norm: 0.9999993729195517, iteration: 15845
loss: 1.0587173700332642,grad_norm: 0.9999992819973069, iteration: 15846
loss: 1.0682437419891357,grad_norm: 0.9999996615124993, iteration: 15847
loss: 1.048511266708374,grad_norm: 0.9999993412058612, iteration: 15848
loss: 1.018347978591919,grad_norm: 0.9999993591237001, iteration: 15849
loss: 1.011427879333496,grad_norm: 0.9999997463269099, iteration: 15850
loss: 1.0226175785064697,grad_norm: 0.9999991301450206, iteration: 15851
loss: 1.118743658065796,grad_norm: 0.9999997700014782, iteration: 15852
loss: 1.050087571144104,grad_norm: 0.9999993397362775, iteration: 15853
loss: 0.9975423812866211,grad_norm: 0.9999990838645765, iteration: 15854
loss: 1.045928716659546,grad_norm: 0.9977507311644461, iteration: 15855
loss: 0.9630252122879028,grad_norm: 0.999999129521503, iteration: 15856
loss: 1.0274924039840698,grad_norm: 0.9999991569242772, iteration: 15857
loss: 1.0964477062225342,grad_norm: 0.9999997737226114, iteration: 15858
loss: 1.019309639930725,grad_norm: 0.9999993438542176, iteration: 15859
loss: 1.0156704187393188,grad_norm: 0.9999990188039187, iteration: 15860
loss: 1.0406643152236938,grad_norm: 0.999999259699713, iteration: 15861
loss: 1.0620265007019043,grad_norm: 0.999999096264368, iteration: 15862
loss: 1.0815057754516602,grad_norm: 0.9999993495677064, iteration: 15863
loss: 1.0250487327575684,grad_norm: 0.9999995130140293, iteration: 15864
loss: 1.1161620616912842,grad_norm: 0.9999996541792433, iteration: 15865
loss: 0.9963893294334412,grad_norm: 0.9999991918598564, iteration: 15866
loss: 1.0022727251052856,grad_norm: 0.9999991448309232, iteration: 15867
loss: 1.042003870010376,grad_norm: 0.9999991608886797, iteration: 15868
loss: 1.1162298917770386,grad_norm: 0.9999996123715358, iteration: 15869
loss: 1.0666300058364868,grad_norm: 0.9999994173297372, iteration: 15870
loss: 1.039901614189148,grad_norm: 0.9999992412633898, iteration: 15871
loss: 1.071945071220398,grad_norm: 0.999999514496252, iteration: 15872
loss: 1.0215955972671509,grad_norm: 0.9999992270421837, iteration: 15873
loss: 0.9889101386070251,grad_norm: 0.9840010401686159, iteration: 15874
loss: 1.0670862197875977,grad_norm: 0.9998977052888992, iteration: 15875
loss: 1.0381078720092773,grad_norm: 0.9999996882113229, iteration: 15876
loss: 1.0211257934570312,grad_norm: 0.9999992613913188, iteration: 15877
loss: 1.0439485311508179,grad_norm: 0.9999993028206631, iteration: 15878
loss: 1.0497756004333496,grad_norm: 0.999999440101609, iteration: 15879
loss: 1.006446123123169,grad_norm: 0.9999992351189204, iteration: 15880
loss: 1.0523898601531982,grad_norm: 0.9999996773050295, iteration: 15881
loss: 1.06745183467865,grad_norm: 0.9999996798165408, iteration: 15882
loss: 0.9875028729438782,grad_norm: 0.9999991983952371, iteration: 15883
loss: 1.0248923301696777,grad_norm: 0.9999993057379108, iteration: 15884
loss: 1.0722947120666504,grad_norm: 0.9999997789808562, iteration: 15885
loss: 1.0948429107666016,grad_norm: 0.9999997900649675, iteration: 15886
loss: 1.0691577196121216,grad_norm: 0.9999997934268816, iteration: 15887
loss: 1.0445863008499146,grad_norm: 0.9999993006379808, iteration: 15888
loss: 1.009252667427063,grad_norm: 0.9999994959620891, iteration: 15889
loss: 1.0344417095184326,grad_norm: 0.9999993536397757, iteration: 15890
loss: 1.022664189338684,grad_norm: 0.9999993677463254, iteration: 15891
loss: 1.0187392234802246,grad_norm: 0.9999991463812092, iteration: 15892
loss: 1.0322903394699097,grad_norm: 0.9999993490035489, iteration: 15893
loss: 1.006946086883545,grad_norm: 0.9999992319590721, iteration: 15894
loss: 0.9851693511009216,grad_norm: 0.9999994725482796, iteration: 15895
loss: 1.063860297203064,grad_norm: 0.9999993612156696, iteration: 15896
loss: 0.9246997833251953,grad_norm: 0.9999990181963098, iteration: 15897
loss: 1.068906545639038,grad_norm: 0.9999993273147127, iteration: 15898
loss: 1.0424175262451172,grad_norm: 0.9999993633592421, iteration: 15899
loss: 1.0236756801605225,grad_norm: 0.9999993566637936, iteration: 15900
loss: 1.0246418714523315,grad_norm: 0.9999995365538672, iteration: 15901
loss: 1.0293548107147217,grad_norm: 0.9999991990341743, iteration: 15902
loss: 1.0106123685836792,grad_norm: 0.9999996089245718, iteration: 15903
loss: 1.0347449779510498,grad_norm: 0.9999990873080095, iteration: 15904
loss: 1.05601966381073,grad_norm: 0.9999991869749453, iteration: 15905
loss: 1.025827169418335,grad_norm: 0.9999991353898141, iteration: 15906
loss: 1.0990742444992065,grad_norm: 0.9999997953201168, iteration: 15907
loss: 1.0097951889038086,grad_norm: 0.999999249021774, iteration: 15908
loss: 1.0284360647201538,grad_norm: 0.9999999211585506, iteration: 15909
loss: 1.04764723777771,grad_norm: 0.9999997285105545, iteration: 15910
loss: 1.0160058736801147,grad_norm: 0.9999991208839779, iteration: 15911
loss: 1.0569063425064087,grad_norm: 0.9999995559201191, iteration: 15912
loss: 1.0368763208389282,grad_norm: 0.9999995231437904, iteration: 15913
loss: 1.0499939918518066,grad_norm: 0.9999991414440689, iteration: 15914
loss: 1.063673496246338,grad_norm: 0.9999992619114995, iteration: 15915
loss: 1.0193922519683838,grad_norm: 0.9999992998780092, iteration: 15916
loss: 1.0975059270858765,grad_norm: 0.9999994026723369, iteration: 15917
loss: 1.0042369365692139,grad_norm: 0.9597292007785232, iteration: 15918
loss: 1.0045276880264282,grad_norm: 0.9999991680437282, iteration: 15919
loss: 1.0800541639328003,grad_norm: 0.9999993676949471, iteration: 15920
loss: 1.0142958164215088,grad_norm: 0.9999994024000946, iteration: 15921
loss: 1.0196551084518433,grad_norm: 0.9999998913854883, iteration: 15922
loss: 0.9951292276382446,grad_norm: 0.999999253501437, iteration: 15923
loss: 1.0136339664459229,grad_norm: 0.999999222894477, iteration: 15924
loss: 1.055661678314209,grad_norm: 0.9999997987462198, iteration: 15925
loss: 1.0194023847579956,grad_norm: 0.9999991677589699, iteration: 15926
loss: 1.0658570528030396,grad_norm: 0.9999994934724703, iteration: 15927
loss: 1.1281962394714355,grad_norm: 0.9999995796964086, iteration: 15928
loss: 1.0268105268478394,grad_norm: 0.9999994965668232, iteration: 15929
loss: 1.0261554718017578,grad_norm: 0.9999996233966063, iteration: 15930
loss: 0.9824603796005249,grad_norm: 0.9999991801364521, iteration: 15931
loss: 1.0689020156860352,grad_norm: 0.9999995873224463, iteration: 15932
loss: 0.995717465877533,grad_norm: 0.9735280313827405, iteration: 15933
loss: 1.0370111465454102,grad_norm: 0.9999996267418146, iteration: 15934
loss: 1.0303748846054077,grad_norm: 0.9999995170286321, iteration: 15935
loss: 1.0168029069900513,grad_norm: 0.9999992505508124, iteration: 15936
loss: 1.0359166860580444,grad_norm: 0.8162884351119591, iteration: 15937
loss: 1.044678807258606,grad_norm: 0.9999990676235105, iteration: 15938
loss: 0.9761102795600891,grad_norm: 0.9615018573062075, iteration: 15939
loss: 1.0123291015625,grad_norm: 0.9999992774861871, iteration: 15940
loss: 0.9747040271759033,grad_norm: 0.9999989747364741, iteration: 15941
loss: 1.0117253065109253,grad_norm: 0.9999990704769641, iteration: 15942
loss: 0.9668218493461609,grad_norm: 0.9999991825224331, iteration: 15943
loss: 1.0518769025802612,grad_norm: 0.9999992709103207, iteration: 15944
loss: 0.9951923489570618,grad_norm: 0.9999991088138025, iteration: 15945
loss: 1.0389389991760254,grad_norm: 0.9999995510779498, iteration: 15946
loss: 1.0329399108886719,grad_norm: 0.9999995072408701, iteration: 15947
loss: 1.0164010524749756,grad_norm: 0.9999993753249256, iteration: 15948
loss: 0.9897372126579285,grad_norm: 0.9999991252638434, iteration: 15949
loss: 1.0663193464279175,grad_norm: 0.9999993401977466, iteration: 15950
loss: 1.0730667114257812,grad_norm: 0.9999994366823596, iteration: 15951
loss: 0.9599559903144836,grad_norm: 0.9999990642095811, iteration: 15952
loss: 1.0442256927490234,grad_norm: 0.9999994739999738, iteration: 15953
loss: 0.9928860068321228,grad_norm: 0.9999990923888751, iteration: 15954
loss: 1.0391842126846313,grad_norm: 0.9999993735311634, iteration: 15955
loss: 1.053141474723816,grad_norm: 0.9999991261400194, iteration: 15956
loss: 1.0105180740356445,grad_norm: 0.9999994405064232, iteration: 15957
loss: 1.010300636291504,grad_norm: 0.9999994164256238, iteration: 15958
loss: 0.9615499973297119,grad_norm: 0.9999989302441585, iteration: 15959
loss: 1.0233988761901855,grad_norm: 0.9999996416242467, iteration: 15960
loss: 1.0056387186050415,grad_norm: 0.9999995026219076, iteration: 15961
loss: 1.0016896724700928,grad_norm: 0.9290293595496831, iteration: 15962
loss: 1.0105730295181274,grad_norm: 0.9999993864146677, iteration: 15963
loss: 1.0312756299972534,grad_norm: 0.9999989891655147, iteration: 15964
loss: 1.106487512588501,grad_norm: 0.9999997082471443, iteration: 15965
loss: 1.077519416809082,grad_norm: 0.9999997911541196, iteration: 15966
loss: 0.988099217414856,grad_norm: 0.995720822564665, iteration: 15967
loss: 1.05288565158844,grad_norm: 0.9999990605271615, iteration: 15968
loss: 1.0293995141983032,grad_norm: 0.9999992742042823, iteration: 15969
loss: 1.0777920484542847,grad_norm: 0.9999996746553945, iteration: 15970
loss: 1.0477274656295776,grad_norm: 0.9999993050352937, iteration: 15971
loss: 1.0210684537887573,grad_norm: 0.9999992203121564, iteration: 15972
loss: 1.064375400543213,grad_norm: 0.9999994893300763, iteration: 15973
loss: 1.0468558073043823,grad_norm: 0.9999998533926434, iteration: 15974
loss: 0.9749171137809753,grad_norm: 0.9999992755083691, iteration: 15975
loss: 0.997418999671936,grad_norm: 0.99999917901656, iteration: 15976
loss: 1.0048017501831055,grad_norm: 0.9999995577810541, iteration: 15977
loss: 1.0656307935714722,grad_norm: 0.9999997341411, iteration: 15978
loss: 1.0317174196243286,grad_norm: 0.9999990691261567, iteration: 15979
loss: 1.0379544496536255,grad_norm: 0.9999992361224769, iteration: 15980
loss: 1.0070639848709106,grad_norm: 0.9999993480917627, iteration: 15981
loss: 0.9960734844207764,grad_norm: 0.9999993856489398, iteration: 15982
loss: 1.073006510734558,grad_norm: 0.9999995198657974, iteration: 15983
loss: 1.010074257850647,grad_norm: 0.9999992367910472, iteration: 15984
loss: 1.0680760145187378,grad_norm: 0.9999994734685603, iteration: 15985
loss: 1.0167570114135742,grad_norm: 0.9999990857654004, iteration: 15986
loss: 1.0093684196472168,grad_norm: 0.99999920182199, iteration: 15987
loss: 1.0324923992156982,grad_norm: 0.9999999252194909, iteration: 15988
loss: 1.0064163208007812,grad_norm: 0.9999991480056528, iteration: 15989
loss: 0.9827670454978943,grad_norm: 0.9999989811086338, iteration: 15990
loss: 1.020316481590271,grad_norm: 0.9999991144414844, iteration: 15991
loss: 1.0198408365249634,grad_norm: 0.9999992531111491, iteration: 15992
loss: 1.0691438913345337,grad_norm: 0.9999992485907784, iteration: 15993
loss: 1.0331968069076538,grad_norm: 0.9999991333441949, iteration: 15994
loss: 1.0235496759414673,grad_norm: 0.9999991614863797, iteration: 15995
loss: 1.0237988233566284,grad_norm: 0.9726048748750228, iteration: 15996
loss: 1.094053030014038,grad_norm: 0.9999996087561921, iteration: 15997
loss: 1.075351595878601,grad_norm: 0.9999994932162709, iteration: 15998
loss: 0.9778207540512085,grad_norm: 0.9999992052249635, iteration: 15999
loss: 1.0277113914489746,grad_norm: 0.9999991430546594, iteration: 16000
loss: 1.02211594581604,grad_norm: 0.9999993281188532, iteration: 16001
loss: 1.018883466720581,grad_norm: 0.9999991756524368, iteration: 16002
loss: 1.0826879739761353,grad_norm: 0.9999991101013317, iteration: 16003
loss: 1.0813333988189697,grad_norm: 0.999999571651976, iteration: 16004
loss: 1.0138155221939087,grad_norm: 0.9999993735430186, iteration: 16005
loss: 1.0388407707214355,grad_norm: 0.9999993871181799, iteration: 16006
loss: 1.0331603288650513,grad_norm: 0.9999996142080628, iteration: 16007
loss: 0.9840442538261414,grad_norm: 0.9999996831863146, iteration: 16008
loss: 1.0343154668807983,grad_norm: 0.9999995770118685, iteration: 16009
loss: 1.0442553758621216,grad_norm: 0.9999992099051214, iteration: 16010
loss: 1.0110528469085693,grad_norm: 0.9999989511037971, iteration: 16011
loss: 1.0288403034210205,grad_norm: 0.9999992664995063, iteration: 16012
loss: 1.058077096939087,grad_norm: 0.9999990198013704, iteration: 16013
loss: 0.9757378697395325,grad_norm: 0.9999990319138895, iteration: 16014
loss: 1.0299487113952637,grad_norm: 0.9999991653381188, iteration: 16015
loss: 1.0368660688400269,grad_norm: 0.999999358595831, iteration: 16016
loss: 0.9974253177642822,grad_norm: 0.9999994126856118, iteration: 16017
loss: 1.0452556610107422,grad_norm: 0.9999990127110673, iteration: 16018
loss: 1.0594843626022339,grad_norm: 0.9999992578832617, iteration: 16019
loss: 1.0125842094421387,grad_norm: 0.9999993244185574, iteration: 16020
loss: 1.0878689289093018,grad_norm: 0.9999996811439877, iteration: 16021
loss: 1.010456919670105,grad_norm: 0.9233215848911774, iteration: 16022
loss: 1.0205820798873901,grad_norm: 0.9999991657001784, iteration: 16023
loss: 1.0396257638931274,grad_norm: 0.9999990438066927, iteration: 16024
loss: 1.0077420473098755,grad_norm: 0.9999992932629682, iteration: 16025
loss: 1.0333383083343506,grad_norm: 0.9999995520176375, iteration: 16026
loss: 1.0475844144821167,grad_norm: 0.9999996109343199, iteration: 16027
loss: 1.0031152963638306,grad_norm: 0.9999992116625502, iteration: 16028
loss: 1.027269959449768,grad_norm: 0.9999991680808373, iteration: 16029
loss: 1.0471223592758179,grad_norm: 0.9999994050772886, iteration: 16030
loss: 1.0028481483459473,grad_norm: 0.9999991156143077, iteration: 16031
loss: 0.980720579624176,grad_norm: 0.9910946839772475, iteration: 16032
loss: 1.0306168794631958,grad_norm: 0.9161480735932951, iteration: 16033
loss: 1.0461865663528442,grad_norm: 0.9999998108450944, iteration: 16034
loss: 1.0129163265228271,grad_norm: 0.9999990404086612, iteration: 16035
loss: 1.0598223209381104,grad_norm: 0.9999991342871319, iteration: 16036
loss: 1.0260570049285889,grad_norm: 0.9999992307586405, iteration: 16037
loss: 1.0530141592025757,grad_norm: 0.9715764088829569, iteration: 16038
loss: 1.108680248260498,grad_norm: 0.9999997462627237, iteration: 16039
loss: 0.9685538411140442,grad_norm: 0.999999266698341, iteration: 16040
loss: 1.029402256011963,grad_norm: 0.9999992063135762, iteration: 16041
loss: 1.016896367073059,grad_norm: 0.9999995242466839, iteration: 16042
loss: 1.0636041164398193,grad_norm: 1.0000000019816817, iteration: 16043
loss: 1.035368800163269,grad_norm: 0.9999991754672943, iteration: 16044
loss: 1.0747700929641724,grad_norm: 0.9999994003014591, iteration: 16045
loss: 1.0401448011398315,grad_norm: 0.9999991930478963, iteration: 16046
loss: 1.0529272556304932,grad_norm: 0.9999995862762548, iteration: 16047
loss: 1.036497950553894,grad_norm: 0.9999994048044645, iteration: 16048
loss: 1.066549301147461,grad_norm: 0.9999990835021857, iteration: 16049
loss: 1.1202057600021362,grad_norm: 0.9999995973644898, iteration: 16050
loss: 1.0069525241851807,grad_norm: 0.9999995880043586, iteration: 16051
loss: 1.0466697216033936,grad_norm: 0.9999993274504243, iteration: 16052
loss: 1.0691914558410645,grad_norm: 0.9999996040710024, iteration: 16053
loss: 1.1082557439804077,grad_norm: 0.999999860780472, iteration: 16054
loss: 0.9892474412918091,grad_norm: 0.994998256741166, iteration: 16055
loss: 1.0331437587738037,grad_norm: 1.0000000100995, iteration: 16056
loss: 1.047207236289978,grad_norm: 0.9999990784836816, iteration: 16057
loss: 1.0050114393234253,grad_norm: 0.9999993246219034, iteration: 16058
loss: 1.017638087272644,grad_norm: 0.9999993540538267, iteration: 16059
loss: 1.005980372428894,grad_norm: 0.9999991599281473, iteration: 16060
loss: 1.0184152126312256,grad_norm: 0.9999991610707795, iteration: 16061
loss: 1.0282435417175293,grad_norm: 0.9999990791705314, iteration: 16062
loss: 1.1100412607192993,grad_norm: 0.9999996799467818, iteration: 16063
loss: 1.0257536172866821,grad_norm: 0.9999993064421587, iteration: 16064
loss: 1.0626451969146729,grad_norm: 0.9999996432860597, iteration: 16065
loss: 1.028923511505127,grad_norm: 0.9999998716226456, iteration: 16066
loss: 1.015913724899292,grad_norm: 0.9999992045962137, iteration: 16067
loss: 1.1172430515289307,grad_norm: 0.9999998888315174, iteration: 16068
loss: 1.0522865056991577,grad_norm: 0.9999991799261291, iteration: 16069
loss: 1.0530016422271729,grad_norm: 0.9999996861275967, iteration: 16070
loss: 1.0030202865600586,grad_norm: 0.9999989978175873, iteration: 16071
loss: 1.0602136850357056,grad_norm: 0.9507311252031662, iteration: 16072
loss: 1.020183801651001,grad_norm: 0.9999991023799278, iteration: 16073
loss: 0.9985491037368774,grad_norm: 0.9999990256085957, iteration: 16074
loss: 1.0092201232910156,grad_norm: 0.9999994427338731, iteration: 16075
loss: 1.0506200790405273,grad_norm: 0.9999993421103149, iteration: 16076
loss: 0.9921626448631287,grad_norm: 0.9999992408271282, iteration: 16077
loss: 1.0863804817199707,grad_norm: 0.9999995032192543, iteration: 16078
loss: 1.0300170183181763,grad_norm: 0.9999991202259381, iteration: 16079
loss: 1.0191314220428467,grad_norm: 0.9999993654649324, iteration: 16080
loss: 1.111238956451416,grad_norm: 0.9999994735819062, iteration: 16081
loss: 1.018101692199707,grad_norm: 0.9999992151586988, iteration: 16082
loss: 0.9942812323570251,grad_norm: 0.9999991318484207, iteration: 16083
loss: 1.0379786491394043,grad_norm: 0.9999996770556107, iteration: 16084
loss: 0.9994417428970337,grad_norm: 0.9999991184814353, iteration: 16085
loss: 1.0109714269638062,grad_norm: 0.9999997786996209, iteration: 16086
loss: 1.0630602836608887,grad_norm: 0.9999992762573658, iteration: 16087
loss: 1.0063316822052002,grad_norm: 0.9999993709459036, iteration: 16088
loss: 1.0138972997665405,grad_norm: 0.9999994453013477, iteration: 16089
loss: 1.0500023365020752,grad_norm: 0.9999996487569488, iteration: 16090
loss: 0.9886950850486755,grad_norm: 0.9999992608128978, iteration: 16091
loss: 1.031442642211914,grad_norm: 0.9999991322346803, iteration: 16092
loss: 1.0055452585220337,grad_norm: 0.999999147621971, iteration: 16093
loss: 1.0489568710327148,grad_norm: 0.999999248938935, iteration: 16094
loss: 1.0399237871170044,grad_norm: 0.8655197274145258, iteration: 16095
loss: 1.129104733467102,grad_norm: 0.9999993599150728, iteration: 16096
loss: 1.034804105758667,grad_norm: 0.9999993437174212, iteration: 16097
loss: 1.0259895324707031,grad_norm: 0.9999991342460741, iteration: 16098
loss: 1.037070870399475,grad_norm: 0.9905988067148086, iteration: 16099
loss: 0.9662264585494995,grad_norm: 0.9077378310124635, iteration: 16100
loss: 1.065544605255127,grad_norm: 0.9999997551063241, iteration: 16101
loss: 1.0511568784713745,grad_norm: 0.9999995028393281, iteration: 16102
loss: 1.0276784896850586,grad_norm: 0.9999995665852034, iteration: 16103
loss: 0.9999565482139587,grad_norm: 0.9819357909717478, iteration: 16104
loss: 0.9973767995834351,grad_norm: 0.9999992074494173, iteration: 16105
loss: 1.0154993534088135,grad_norm: 0.9999991784805099, iteration: 16106
loss: 1.0249212980270386,grad_norm: 0.9999991240777221, iteration: 16107
loss: 1.0676630735397339,grad_norm: 0.9999993204062472, iteration: 16108
loss: 0.9626498818397522,grad_norm: 0.999999215101818, iteration: 16109
loss: 1.0324516296386719,grad_norm: 0.9999990485009352, iteration: 16110
loss: 1.0038155317306519,grad_norm: 0.9999992472019084, iteration: 16111
loss: 0.9904675483703613,grad_norm: 0.9999989672960101, iteration: 16112
loss: 1.0350439548492432,grad_norm: 0.9999993186682817, iteration: 16113
loss: 1.060215950012207,grad_norm: 0.9999990656553078, iteration: 16114
loss: 0.9867045879364014,grad_norm: 0.993625352853828, iteration: 16115
loss: 1.022018551826477,grad_norm: 0.9999993097179269, iteration: 16116
loss: 1.0299186706542969,grad_norm: 0.9999990983452035, iteration: 16117
loss: 1.0455776453018188,grad_norm: 0.9999993654183934, iteration: 16118
loss: 1.0335958003997803,grad_norm: 0.9999992124664723, iteration: 16119
loss: 1.0243152379989624,grad_norm: 0.9999992379191304, iteration: 16120
loss: 1.0167020559310913,grad_norm: 0.999999082742839, iteration: 16121
loss: 1.0579251050949097,grad_norm: 0.9999993471629809, iteration: 16122
loss: 1.0615692138671875,grad_norm: 0.9999996488631233, iteration: 16123
loss: 1.031620979309082,grad_norm: 0.9999990703936903, iteration: 16124
loss: 0.9824486374855042,grad_norm: 0.9999991526690439, iteration: 16125
loss: 1.0649734735488892,grad_norm: 0.9999991648042404, iteration: 16126
loss: 1.0356138944625854,grad_norm: 0.9999993589411156, iteration: 16127
loss: 1.0547598600387573,grad_norm: 0.9999999400219151, iteration: 16128
loss: 1.0464143753051758,grad_norm: 0.999999075192119, iteration: 16129
loss: 1.0234205722808838,grad_norm: 0.9999997278371028, iteration: 16130
loss: 1.0318291187286377,grad_norm: 0.9999991756799793, iteration: 16131
loss: 1.0133135318756104,grad_norm: 0.9999995929827886, iteration: 16132
loss: 1.0586140155792236,grad_norm: 0.9999994325772966, iteration: 16133
loss: 1.070533037185669,grad_norm: 0.9999995174240845, iteration: 16134
loss: 1.0233656167984009,grad_norm: 0.9999991262735292, iteration: 16135
loss: 1.016877293586731,grad_norm: 0.981249115972559, iteration: 16136
loss: 1.0255682468414307,grad_norm: 0.9999992348124269, iteration: 16137
loss: 1.015384554862976,grad_norm: 0.9999991871814758, iteration: 16138
loss: 1.011124610900879,grad_norm: 0.9999992306969911, iteration: 16139
loss: 0.9954884052276611,grad_norm: 0.999999211072362, iteration: 16140
loss: 1.0433217287063599,grad_norm: 0.9999991572646002, iteration: 16141
loss: 1.029097080230713,grad_norm: 0.9999992700728327, iteration: 16142
loss: 1.0136971473693848,grad_norm: 0.9999990944272575, iteration: 16143
loss: 1.0242136716842651,grad_norm: 0.9999993889419144, iteration: 16144
loss: 1.0560370683670044,grad_norm: 0.9999998295660482, iteration: 16145
loss: 1.0257514715194702,grad_norm: 0.9999991903976256, iteration: 16146
loss: 1.1200436353683472,grad_norm: 0.9999992832196481, iteration: 16147
loss: 1.0298243761062622,grad_norm: 0.9999990957956468, iteration: 16148
loss: 1.0871671438217163,grad_norm: 0.999999526438204, iteration: 16149
loss: 1.0313682556152344,grad_norm: 0.9999992724595338, iteration: 16150
loss: 1.0459847450256348,grad_norm: 0.9999990693868944, iteration: 16151
loss: 1.0033756494522095,grad_norm: 0.9999990280885417, iteration: 16152
loss: 1.0086350440979004,grad_norm: 0.9999992094090545, iteration: 16153
loss: 1.0428075790405273,grad_norm: 0.999999181297762, iteration: 16154
loss: 1.0774202346801758,grad_norm: 0.9999994258551649, iteration: 16155
loss: 1.0263175964355469,grad_norm: 0.9999992172231323, iteration: 16156
loss: 1.063636302947998,grad_norm: 0.9999996204830639, iteration: 16157
loss: 1.040897011756897,grad_norm: 0.999999193323692, iteration: 16158
loss: 1.0535402297973633,grad_norm: 0.9999996727783481, iteration: 16159
loss: 1.0723925828933716,grad_norm: 0.9999992342655054, iteration: 16160
loss: 1.0662479400634766,grad_norm: 0.9999991808535311, iteration: 16161
loss: 1.0451147556304932,grad_norm: 0.9999991962644361, iteration: 16162
loss: 0.9893666505813599,grad_norm: 0.9999991931262409, iteration: 16163
loss: 0.9741502404212952,grad_norm: 0.9999993629199424, iteration: 16164
loss: 1.0446125268936157,grad_norm: 0.999999231285228, iteration: 16165
loss: 1.0134023427963257,grad_norm: 0.9999989941163667, iteration: 16166
loss: 1.0402497053146362,grad_norm: 0.9999994309101821, iteration: 16167
loss: 1.0264149904251099,grad_norm: 0.9999993030724863, iteration: 16168
loss: 1.048690676689148,grad_norm: 0.9999999920185544, iteration: 16169
loss: 1.0130116939544678,grad_norm: 0.9999992476002851, iteration: 16170
loss: 1.0027745962142944,grad_norm: 0.9999998486587661, iteration: 16171
loss: 1.0200817584991455,grad_norm: 0.9700293348946298, iteration: 16172
loss: 0.9912576079368591,grad_norm: 0.9999992004895232, iteration: 16173
loss: 1.0219236612319946,grad_norm: 0.9999991134090223, iteration: 16174
loss: 1.0403845310211182,grad_norm: 0.9999992644065724, iteration: 16175
loss: 0.9780863523483276,grad_norm: 0.9999993196445546, iteration: 16176
loss: 1.0474718809127808,grad_norm: 0.9999991309821167, iteration: 16177
loss: 1.0500160455703735,grad_norm: 0.9999991536204887, iteration: 16178
loss: 0.9906119704246521,grad_norm: 0.9999992063025677, iteration: 16179
loss: 1.0303632020950317,grad_norm: 0.9999991793166129, iteration: 16180
loss: 1.0065425634384155,grad_norm: 0.9999990478383353, iteration: 16181
loss: 1.0479958057403564,grad_norm: 0.9999993490860883, iteration: 16182
loss: 1.0152121782302856,grad_norm: 0.999999269349542, iteration: 16183
loss: 1.02306067943573,grad_norm: 0.9999992635621077, iteration: 16184
loss: 1.0317087173461914,grad_norm: 0.9999991201473887, iteration: 16185
loss: 1.0048155784606934,grad_norm: 0.9999992136039639, iteration: 16186
loss: 1.0115716457366943,grad_norm: 0.9999994747837972, iteration: 16187
loss: 1.0594311952590942,grad_norm: 0.9999992983584785, iteration: 16188
loss: 1.0278643369674683,grad_norm: 0.9999992535831689, iteration: 16189
loss: 1.000555396080017,grad_norm: 0.9671386565066824, iteration: 16190
loss: 1.0065298080444336,grad_norm: 0.9999993882963831, iteration: 16191
loss: 1.0427608489990234,grad_norm: 0.999999788261684, iteration: 16192
loss: 1.0086857080459595,grad_norm: 0.9999992659626786, iteration: 16193
loss: 0.989432156085968,grad_norm: 0.9999990545922413, iteration: 16194
loss: 1.0012705326080322,grad_norm: 0.9999989940032422, iteration: 16195
loss: 1.008796215057373,grad_norm: 0.9999992398895294, iteration: 16196
loss: 1.0354338884353638,grad_norm: 0.9999997045838349, iteration: 16197
loss: 1.0545086860656738,grad_norm: 0.99999973108165, iteration: 16198
loss: 1.0045760869979858,grad_norm: 0.9825113346793521, iteration: 16199
loss: 1.0864508152008057,grad_norm: 0.9999993965071171, iteration: 16200
loss: 1.0751512050628662,grad_norm: 0.9999991233212464, iteration: 16201
loss: 1.0412088632583618,grad_norm: 0.9999993648752963, iteration: 16202
loss: 1.0609718561172485,grad_norm: 0.9999992380729785, iteration: 16203
loss: 0.9931305050849915,grad_norm: 0.9999992463148468, iteration: 16204
loss: 1.043336272239685,grad_norm: 0.999999002375544, iteration: 16205
loss: 0.9879423975944519,grad_norm: 0.9999991457417574, iteration: 16206
loss: 1.1059117317199707,grad_norm: 0.9999994612724885, iteration: 16207
loss: 0.9701178073883057,grad_norm: 0.9999990836378373, iteration: 16208
loss: 0.9878345131874084,grad_norm: 0.9337678467550564, iteration: 16209
loss: 1.0083891153335571,grad_norm: 0.9999997296529157, iteration: 16210
loss: 1.0166538953781128,grad_norm: 0.9999990465497371, iteration: 16211
loss: 1.0334680080413818,grad_norm: 0.999999470165122, iteration: 16212
loss: 1.1569933891296387,grad_norm: 0.9999996047384909, iteration: 16213
loss: 1.0093035697937012,grad_norm: 0.9999992467007169, iteration: 16214
loss: 1.0600004196166992,grad_norm: 0.9999997114281635, iteration: 16215
loss: 0.9933431148529053,grad_norm: 0.9999991584627939, iteration: 16216
loss: 1.0484334230422974,grad_norm: 0.9999994557758729, iteration: 16217
loss: 1.0263113975524902,grad_norm: 0.9999992772987404, iteration: 16218
loss: 1.0366381406784058,grad_norm: 0.9999990649156523, iteration: 16219
loss: 0.9995967149734497,grad_norm: 0.9999991460864016, iteration: 16220
loss: 1.0211292505264282,grad_norm: 0.9999992946600349, iteration: 16221
loss: 1.1112847328186035,grad_norm: 0.999999328310922, iteration: 16222
loss: 0.9871723055839539,grad_norm: 0.9999990986969881, iteration: 16223
loss: 1.0267317295074463,grad_norm: 0.9999992111485262, iteration: 16224
loss: 1.0611644983291626,grad_norm: 0.9999998794585991, iteration: 16225
loss: 0.9974668622016907,grad_norm: 0.9999993491978787, iteration: 16226
loss: 1.0332900285720825,grad_norm: 0.9999996476510079, iteration: 16227
loss: 1.1071449518203735,grad_norm: 0.9999995933314001, iteration: 16228
loss: 1.0468579530715942,grad_norm: 0.9999995350936005, iteration: 16229
loss: 1.0445523262023926,grad_norm: 0.9999996082208092, iteration: 16230
loss: 1.0510191917419434,grad_norm: 0.9999994067511959, iteration: 16231
loss: 0.9833481907844543,grad_norm: 0.9999993910647836, iteration: 16232
loss: 1.0370630025863647,grad_norm: 0.9999992172814206, iteration: 16233
loss: 1.0154696702957153,grad_norm: 0.9999991176132956, iteration: 16234
loss: 1.048228144645691,grad_norm: 0.9999993887447074, iteration: 16235
loss: 1.0056911706924438,grad_norm: 0.9999989799296914, iteration: 16236
loss: 1.033748984336853,grad_norm: 0.9999992698811573, iteration: 16237
loss: 1.052173376083374,grad_norm: 0.999999472724291, iteration: 16238
loss: 1.0382013320922852,grad_norm: 0.9999990536453882, iteration: 16239
loss: 0.9876503348350525,grad_norm: 0.9999991366289611, iteration: 16240
loss: 1.1114461421966553,grad_norm: 0.9999999140547545, iteration: 16241
loss: 1.0337843894958496,grad_norm: 0.9999989838507067, iteration: 16242
loss: 0.9925996661186218,grad_norm: 0.9999989997011759, iteration: 16243
loss: 0.9854705333709717,grad_norm: 0.9999992854741775, iteration: 16244
loss: 1.0385167598724365,grad_norm: 0.9999993924565058, iteration: 16245
loss: 1.004565715789795,grad_norm: 0.9999991053156624, iteration: 16246
loss: 1.0098974704742432,grad_norm: 0.9999992658735867, iteration: 16247
loss: 1.105121374130249,grad_norm: 0.9999997527969469, iteration: 16248
loss: 1.0472642183303833,grad_norm: 0.9999995059807556, iteration: 16249
loss: 1.0418967008590698,grad_norm: 0.9999992398411184, iteration: 16250
loss: 1.0198190212249756,grad_norm: 0.9284766073068577, iteration: 16251
loss: 0.99388587474823,grad_norm: 0.9999992078500034, iteration: 16252
loss: 1.0359119176864624,grad_norm: 0.8760264411147959, iteration: 16253
loss: 1.0700756311416626,grad_norm: 0.9999992748635862, iteration: 16254
loss: 1.0087753534317017,grad_norm: 0.9999993289428029, iteration: 16255
loss: 1.0392544269561768,grad_norm: 0.9999994752068112, iteration: 16256
loss: 1.028328537940979,grad_norm: 0.9999998927721113, iteration: 16257
loss: 1.0039666891098022,grad_norm: 0.9999997058637606, iteration: 16258
loss: 0.9515306353569031,grad_norm: 0.999999790585188, iteration: 16259
loss: 1.079281210899353,grad_norm: 0.9999991732231658, iteration: 16260
loss: 0.9637037515640259,grad_norm: 0.9999990045462861, iteration: 16261
loss: 1.066204309463501,grad_norm: 0.999999097284803, iteration: 16262
loss: 1.0233008861541748,grad_norm: 0.9999990441199036, iteration: 16263
loss: 1.0221889019012451,grad_norm: 0.9999991286511429, iteration: 16264
loss: 1.0142107009887695,grad_norm: 0.9999995483937747, iteration: 16265
loss: 1.0154318809509277,grad_norm: 0.9999996192512888, iteration: 16266
loss: 1.0059832334518433,grad_norm: 0.999999129604159, iteration: 16267
loss: 1.0226823091506958,grad_norm: 0.9999990729617697, iteration: 16268
loss: 1.0086545944213867,grad_norm: 0.9999990514183965, iteration: 16269
loss: 1.0938003063201904,grad_norm: 0.9999996480246734, iteration: 16270
loss: 1.0391640663146973,grad_norm: 0.9999994770723126, iteration: 16271
loss: 0.9860161542892456,grad_norm: 0.9999993203371379, iteration: 16272
loss: 1.1370545625686646,grad_norm: 0.9999997137221501, iteration: 16273
loss: 1.0369627475738525,grad_norm: 0.9999991631751854, iteration: 16274
loss: 0.9884687066078186,grad_norm: 0.9999992718611221, iteration: 16275
loss: 0.9994497895240784,grad_norm: 0.9999991008979845, iteration: 16276
loss: 1.0046354532241821,grad_norm: 0.9999991766433365, iteration: 16277
loss: 0.987101137638092,grad_norm: 0.9999990982360148, iteration: 16278
loss: 1.0206587314605713,grad_norm: 0.9999989386681167, iteration: 16279
loss: 1.028800368309021,grad_norm: 0.9999991091103756, iteration: 16280
loss: 1.0953854322433472,grad_norm: 0.9999995614285596, iteration: 16281
loss: 1.0643864870071411,grad_norm: 0.999999127324923, iteration: 16282
loss: 1.035215973854065,grad_norm: 0.9999993686726448, iteration: 16283
loss: 1.040468454360962,grad_norm: 0.9999992433042163, iteration: 16284
loss: 1.0347223281860352,grad_norm: 0.9999992027630992, iteration: 16285
loss: 1.0364177227020264,grad_norm: 0.9999991155977885, iteration: 16286
loss: 1.0414329767227173,grad_norm: 0.9999991747709459, iteration: 16287
loss: 1.028183937072754,grad_norm: 0.9276592277936699, iteration: 16288
loss: 0.9826059937477112,grad_norm: 0.9368934607492057, iteration: 16289
loss: 1.0594217777252197,grad_norm: 0.9999993502087249, iteration: 16290
loss: 1.023831844329834,grad_norm: 0.9999993300734832, iteration: 16291
loss: 1.0399489402770996,grad_norm: 0.9999992674718423, iteration: 16292
loss: 1.016758680343628,grad_norm: 0.8857662421738389, iteration: 16293
loss: 1.0478438138961792,grad_norm: 0.9999990304598083, iteration: 16294
loss: 1.0267784595489502,grad_norm: 0.9999997336901322, iteration: 16295
loss: 1.0226397514343262,grad_norm: 0.9999992645417537, iteration: 16296
loss: 1.051801323890686,grad_norm: 0.9999994705852049, iteration: 16297
loss: 1.0353375673294067,grad_norm: 0.9999992043790868, iteration: 16298
loss: 1.0168066024780273,grad_norm: 0.9999992561475005, iteration: 16299
loss: 1.0409355163574219,grad_norm: 0.9999993057363088, iteration: 16300
loss: 1.0365041494369507,grad_norm: 0.9999992219262955, iteration: 16301
loss: 1.0552215576171875,grad_norm: 0.9999992547684576, iteration: 16302
loss: 1.0353169441223145,grad_norm: 0.9999991726299207, iteration: 16303
loss: 1.0219144821166992,grad_norm: 0.9999994307045863, iteration: 16304
loss: 1.0297681093215942,grad_norm: 0.9999991603721239, iteration: 16305
loss: 1.0345860719680786,grad_norm: 0.9999992222739851, iteration: 16306
loss: 0.9757957458496094,grad_norm: 0.9999990042555248, iteration: 16307
loss: 1.0253599882125854,grad_norm: 0.9999991346496051, iteration: 16308
loss: 1.055104374885559,grad_norm: 0.9999993151513376, iteration: 16309
loss: 1.0462063550949097,grad_norm: 0.9999994553170634, iteration: 16310
loss: 0.9954550862312317,grad_norm: 0.999999022665862, iteration: 16311
loss: 1.0736117362976074,grad_norm: 0.9999993403435091, iteration: 16312
loss: 1.004357099533081,grad_norm: 0.9999992733877284, iteration: 16313
loss: 1.0220563411712646,grad_norm: 0.999999415888145, iteration: 16314
loss: 1.026800513267517,grad_norm: 0.9999991586110683, iteration: 16315
loss: 1.0068819522857666,grad_norm: 0.999999298510441, iteration: 16316
loss: 1.0553616285324097,grad_norm: 0.99999955281183, iteration: 16317
loss: 1.0159411430358887,grad_norm: 0.9999992756874656, iteration: 16318
loss: 1.0209499597549438,grad_norm: 0.9999991923166872, iteration: 16319
loss: 1.0423394441604614,grad_norm: 0.9999991585653366, iteration: 16320
loss: 1.0153976678848267,grad_norm: 0.999999288240873, iteration: 16321
loss: 1.0243059396743774,grad_norm: 0.9999992678957278, iteration: 16322
loss: 1.0578629970550537,grad_norm: 0.9999994094157082, iteration: 16323
loss: 1.0808943510055542,grad_norm: 0.9736455507817635, iteration: 16324
loss: 0.992756724357605,grad_norm: 0.9999991199141569, iteration: 16325
loss: 0.9868113398551941,grad_norm: 0.9999992448516526, iteration: 16326
loss: 1.0082154273986816,grad_norm: 0.9999992246806255, iteration: 16327
loss: 1.0196691751480103,grad_norm: 0.9999995408887575, iteration: 16328
loss: 1.0241583585739136,grad_norm: 0.9690179627032527, iteration: 16329
loss: 1.0412962436676025,grad_norm: 0.9999992932258356, iteration: 16330
loss: 1.0330376625061035,grad_norm: 0.9999990490960499, iteration: 16331
loss: 1.0258408784866333,grad_norm: 0.9999996336813778, iteration: 16332
loss: 1.0333548784255981,grad_norm: 0.9999994173320947, iteration: 16333
loss: 1.0245215892791748,grad_norm: 0.9999992818917706, iteration: 16334
loss: 1.028626561164856,grad_norm: 0.9795396491302893, iteration: 16335
loss: 1.0315927267074585,grad_norm: 0.9999990095718163, iteration: 16336
loss: 1.0592079162597656,grad_norm: 0.9999994836920459, iteration: 16337
loss: 1.005204677581787,grad_norm: 0.999999660304919, iteration: 16338
loss: 1.0862375497817993,grad_norm: 0.9999997040066062, iteration: 16339
loss: 0.9671077132225037,grad_norm: 0.9999990773741267, iteration: 16340
loss: 1.0732970237731934,grad_norm: 0.9999997291264009, iteration: 16341
loss: 1.052133321762085,grad_norm: 0.9999991233443949, iteration: 16342
loss: 1.0380734205245972,grad_norm: 0.999999056603023, iteration: 16343
loss: 1.0109542608261108,grad_norm: 0.9999997118941941, iteration: 16344
loss: 1.0310055017471313,grad_norm: 0.9999993891011573, iteration: 16345
loss: 1.0592074394226074,grad_norm: 0.9999992860334165, iteration: 16346
loss: 1.011834979057312,grad_norm: 0.9999996563397131, iteration: 16347
loss: 1.0345289707183838,grad_norm: 0.9999992846316755, iteration: 16348
loss: 0.960972785949707,grad_norm: 0.9999990483787572, iteration: 16349
loss: 1.070850133895874,grad_norm: 0.9999990454242825, iteration: 16350
loss: 1.0459007024765015,grad_norm: 0.9999991873117307, iteration: 16351
loss: 0.9852527379989624,grad_norm: 0.9999992289846431, iteration: 16352
loss: 0.9709092378616333,grad_norm: 0.9999990951651785, iteration: 16353
loss: 1.0668144226074219,grad_norm: 0.9999995597555184, iteration: 16354
loss: 0.9949848055839539,grad_norm: 0.9765210556263211, iteration: 16355
loss: 0.9906964898109436,grad_norm: 0.9999992616266773, iteration: 16356
loss: 1.0140570402145386,grad_norm: 0.9968918871876377, iteration: 16357
loss: 0.9802084565162659,grad_norm: 0.9999992688234455, iteration: 16358
loss: 1.026814579963684,grad_norm: 0.9999991576453204, iteration: 16359
loss: 0.9984270930290222,grad_norm: 0.9999997836603792, iteration: 16360
loss: 1.0098122358322144,grad_norm: 0.9999993010948228, iteration: 16361
loss: 0.9951872229576111,grad_norm: 0.922981068818387, iteration: 16362
loss: 1.0053492784500122,grad_norm: 0.9999996569123595, iteration: 16363
loss: 1.0246583223342896,grad_norm: 0.9999997583605924, iteration: 16364
loss: 1.020155668258667,grad_norm: 0.9999993926316993, iteration: 16365
loss: 1.0338746309280396,grad_norm: 0.9999991850415649, iteration: 16366
loss: 1.0310871601104736,grad_norm: 0.9999990690950209, iteration: 16367
loss: 1.0381627082824707,grad_norm: 0.999999195268302, iteration: 16368
loss: 1.0234694480895996,grad_norm: 0.9999991065900983, iteration: 16369
loss: 1.0315724611282349,grad_norm: 0.9999995636721112, iteration: 16370
loss: 1.0535974502563477,grad_norm: 0.9999991774646586, iteration: 16371
loss: 1.0202564001083374,grad_norm: 0.9999993258053129, iteration: 16372
loss: 0.9852433800697327,grad_norm: 0.999999370353095, iteration: 16373
loss: 1.0231667757034302,grad_norm: 0.9999990927872159, iteration: 16374
loss: 0.991969645023346,grad_norm: 0.9999993514286836, iteration: 16375
loss: 1.070867896080017,grad_norm: 0.999999562613387, iteration: 16376
loss: 0.978571891784668,grad_norm: 0.9999990014210062, iteration: 16377
loss: 1.0651408433914185,grad_norm: 0.9999993679314133, iteration: 16378
loss: 1.0670127868652344,grad_norm: 0.9294524158467784, iteration: 16379
loss: 1.0374202728271484,grad_norm: 0.9999995984534865, iteration: 16380
loss: 0.9943656921386719,grad_norm: 0.9999996798687374, iteration: 16381
loss: 0.9975100159645081,grad_norm: 0.9799426413838241, iteration: 16382
loss: 1.091037631034851,grad_norm: 0.9999992137093847, iteration: 16383
loss: 1.0344536304473877,grad_norm: 0.9999991910055852, iteration: 16384
loss: 0.993460476398468,grad_norm: 0.968886998879542, iteration: 16385
loss: 1.0410786867141724,grad_norm: 0.9999991020163856, iteration: 16386
loss: 1.0496872663497925,grad_norm: 0.9999993953829062, iteration: 16387
loss: 1.0343683958053589,grad_norm: 0.9999993378469734, iteration: 16388
loss: 1.0294989347457886,grad_norm: 0.9999993774252397, iteration: 16389
loss: 1.0054638385772705,grad_norm: 0.9999992422171794, iteration: 16390
loss: 1.0349290370941162,grad_norm: 0.9999990633402265, iteration: 16391
loss: 0.9985293745994568,grad_norm: 0.9790939488628037, iteration: 16392
loss: 1.0230776071548462,grad_norm: 0.999999367078959, iteration: 16393
loss: 1.0571677684783936,grad_norm: 0.9999998296874343, iteration: 16394
loss: 1.0299811363220215,grad_norm: 0.9999992851146816, iteration: 16395
loss: 1.0262000560760498,grad_norm: 0.983024609350611, iteration: 16396
loss: 1.0956145524978638,grad_norm: 0.9999998080994145, iteration: 16397
loss: 1.0775072574615479,grad_norm: 0.9999992841703274, iteration: 16398
loss: 1.0421545505523682,grad_norm: 0.9945491170357276, iteration: 16399
loss: 1.0323715209960938,grad_norm: 0.999999152945451, iteration: 16400
loss: 1.0406160354614258,grad_norm: 0.9999992096295605, iteration: 16401
loss: 1.041921615600586,grad_norm: 0.9999992910902827, iteration: 16402
loss: 1.0139533281326294,grad_norm: 0.9999991459550217, iteration: 16403
loss: 1.0219621658325195,grad_norm: 0.9999992913506346, iteration: 16404
loss: 1.0520353317260742,grad_norm: 0.9999993273832875, iteration: 16405
loss: 1.1901382207870483,grad_norm: 1.0000000081492593, iteration: 16406
loss: 0.9994786977767944,grad_norm: 0.9999992877085404, iteration: 16407
loss: 1.0374385118484497,grad_norm: 0.9999991174727334, iteration: 16408
loss: 1.0308959484100342,grad_norm: 0.999999262550737, iteration: 16409
loss: 1.0334903001785278,grad_norm: 0.9999990794502333, iteration: 16410
loss: 1.0172642469406128,grad_norm: 0.9999991711886816, iteration: 16411
loss: 1.0312528610229492,grad_norm: 0.9999990883420756, iteration: 16412
loss: 0.9810978174209595,grad_norm: 0.9999990365483336, iteration: 16413
loss: 0.9793767929077148,grad_norm: 0.9999990353390823, iteration: 16414
loss: 1.054531216621399,grad_norm: 0.9999993057046713, iteration: 16415
loss: 0.9823842644691467,grad_norm: 0.9999988232835666, iteration: 16416
loss: 1.0436105728149414,grad_norm: 0.9999995215461305, iteration: 16417
loss: 1.0354005098342896,grad_norm: 0.9999989557422085, iteration: 16418
loss: 1.0178751945495605,grad_norm: 0.9999991346653657, iteration: 16419
loss: 1.0818485021591187,grad_norm: 0.9999991189608931, iteration: 16420
loss: 0.9955984354019165,grad_norm: 0.9999994155562535, iteration: 16421
loss: 1.0787150859832764,grad_norm: 0.9999997542058864, iteration: 16422
loss: 1.0385316610336304,grad_norm: 0.99999952215978, iteration: 16423
loss: 0.9898471832275391,grad_norm: 0.8556710906372392, iteration: 16424
loss: 1.0229307413101196,grad_norm: 0.9999990827288031, iteration: 16425
loss: 1.054078459739685,grad_norm: 0.999999179991719, iteration: 16426
loss: 1.0600091218948364,grad_norm: 0.9999992173291375, iteration: 16427
loss: 1.0019690990447998,grad_norm: 0.962304664873489, iteration: 16428
loss: 1.0598374605178833,grad_norm: 0.999999671597656, iteration: 16429
loss: 1.0337005853652954,grad_norm: 0.9194369968313325, iteration: 16430
loss: 1.03932785987854,grad_norm: 0.9999992952419524, iteration: 16431
loss: 0.9889900088310242,grad_norm: 0.9999991322171988, iteration: 16432
loss: 1.0210713148117065,grad_norm: 0.9999994534917603, iteration: 16433
loss: 1.0220506191253662,grad_norm: 0.999999149595384, iteration: 16434
loss: 1.034738302230835,grad_norm: 0.9999993730463113, iteration: 16435
loss: 1.0251284837722778,grad_norm: 0.999999177465297, iteration: 16436
loss: 1.0016576051712036,grad_norm: 0.9999995039465743, iteration: 16437
loss: 1.0514463186264038,grad_norm: 0.9999998114309328, iteration: 16438
loss: 1.0567660331726074,grad_norm: 0.9999991296445833, iteration: 16439
loss: 1.1342525482177734,grad_norm: 0.9999991134596415, iteration: 16440
loss: 1.0173338651657104,grad_norm: 0.9999991105931928, iteration: 16441
loss: 1.1763393878936768,grad_norm: 0.9999997289007088, iteration: 16442
loss: 1.0392186641693115,grad_norm: 0.9999991727650773, iteration: 16443
loss: 1.0221269130706787,grad_norm: 0.9999994031198383, iteration: 16444
loss: 1.0559686422348022,grad_norm: 0.9999993179206268, iteration: 16445
loss: 1.0221140384674072,grad_norm: 0.9999994036800695, iteration: 16446
loss: 1.0260730981826782,grad_norm: 0.9999991634876676, iteration: 16447
loss: 1.0410358905792236,grad_norm: 0.9999992447052577, iteration: 16448
loss: 1.0338075160980225,grad_norm: 0.9999991735768068, iteration: 16449
loss: 1.0476680994033813,grad_norm: 0.9999989856350127, iteration: 16450
loss: 0.9815629720687866,grad_norm: 0.9999992215791237, iteration: 16451
loss: 1.0176860094070435,grad_norm: 0.9999994836148534, iteration: 16452
loss: 1.042112946510315,grad_norm: 0.9999994141534067, iteration: 16453
loss: 1.050645351409912,grad_norm: 0.9999992363819439, iteration: 16454
loss: 1.030579686164856,grad_norm: 0.9999993207500576, iteration: 16455
loss: 1.0231754779815674,grad_norm: 0.9999992741325899, iteration: 16456
loss: 0.9869286417961121,grad_norm: 0.9183373426578004, iteration: 16457
loss: 1.0273703336715698,grad_norm: 0.9999990808859809, iteration: 16458
loss: 1.003490924835205,grad_norm: 0.999999198687163, iteration: 16459
loss: 0.9932206869125366,grad_norm: 0.999999252425709, iteration: 16460
loss: 0.9935962557792664,grad_norm: 0.9999991089242408, iteration: 16461
loss: 1.0471091270446777,grad_norm: 0.999999233439337, iteration: 16462
loss: 1.0204038619995117,grad_norm: 0.999999089171471, iteration: 16463
loss: 1.0111569166183472,grad_norm: 0.9999995553478199, iteration: 16464
loss: 1.05690336227417,grad_norm: 0.9999992875229283, iteration: 16465
loss: 1.0265291929244995,grad_norm: 0.9999991899794782, iteration: 16466
loss: 1.0607072114944458,grad_norm: 0.9999994387143251, iteration: 16467
loss: 1.0333627462387085,grad_norm: 0.9999990179852074, iteration: 16468
loss: 1.0389224290847778,grad_norm: 0.9999992532995848, iteration: 16469
loss: 1.030571460723877,grad_norm: 0.9999992648132501, iteration: 16470
loss: 1.0211842060089111,grad_norm: 0.9999990907424442, iteration: 16471
loss: 1.0532461404800415,grad_norm: 0.999999695693425, iteration: 16472
loss: 1.073071002960205,grad_norm: 0.9999997966628138, iteration: 16473
loss: 0.9996223449707031,grad_norm: 0.9999991138473892, iteration: 16474
loss: 1.0147324800491333,grad_norm: 0.9999990497638303, iteration: 16475
loss: 0.9702445864677429,grad_norm: 0.9999989767963201, iteration: 16476
loss: 1.0632730722427368,grad_norm: 0.9999993777878488, iteration: 16477
loss: 1.0262057781219482,grad_norm: 0.9999991500467703, iteration: 16478
loss: 1.0323783159255981,grad_norm: 0.999999070875507, iteration: 16479
loss: 1.0738948583602905,grad_norm: 0.9999998503373506, iteration: 16480
loss: 1.0399307012557983,grad_norm: 0.9999995352677464, iteration: 16481
loss: 1.0366634130477905,grad_norm: 0.9999993250924002, iteration: 16482
loss: 1.056933045387268,grad_norm: 0.9999993155487316, iteration: 16483
loss: 0.9711079001426697,grad_norm: 0.9999997059261, iteration: 16484
loss: 1.0472182035446167,grad_norm: 0.9999998843722842, iteration: 16485
loss: 1.0825639963150024,grad_norm: 0.9999993067866081, iteration: 16486
loss: 1.0063748359680176,grad_norm: 0.9999991521202845, iteration: 16487
loss: 1.0516165494918823,grad_norm: 0.9999995926552893, iteration: 16488
loss: 0.9701328277587891,grad_norm: 0.9999989938019959, iteration: 16489
loss: 0.985909104347229,grad_norm: 0.9999991890868559, iteration: 16490
loss: 1.050703763961792,grad_norm: 0.9999996512985259, iteration: 16491
loss: 1.0005122423171997,grad_norm: 0.9999993283659179, iteration: 16492
loss: 1.0157358646392822,grad_norm: 0.9999996315771928, iteration: 16493
loss: 1.0383821725845337,grad_norm: 0.999999081390228, iteration: 16494
loss: 1.0906190872192383,grad_norm: 0.9999998536046539, iteration: 16495
loss: 0.9998641610145569,grad_norm: 0.9999992176857262, iteration: 16496
loss: 1.013190507888794,grad_norm: 0.8699906194956748, iteration: 16497
loss: 1.0384117364883423,grad_norm: 0.9999990831853194, iteration: 16498
loss: 0.9473469853401184,grad_norm: 0.9999993648485647, iteration: 16499
loss: 0.9915522933006287,grad_norm: 0.9999990540675207, iteration: 16500
loss: 1.0754584074020386,grad_norm: 0.9999994253083017, iteration: 16501
loss: 1.0254682302474976,grad_norm: 0.9999991131662173, iteration: 16502
loss: 1.035660982131958,grad_norm: 0.9146623575763821, iteration: 16503
loss: 1.010690689086914,grad_norm: 0.9999996142941041, iteration: 16504
loss: 1.0527608394622803,grad_norm: 0.9999992022186598, iteration: 16505
loss: 1.0325831174850464,grad_norm: 0.9999991378476626, iteration: 16506
loss: 0.9994303584098816,grad_norm: 0.9999992649664405, iteration: 16507
loss: 1.0105981826782227,grad_norm: 0.9999990869251993, iteration: 16508
loss: 1.0189052820205688,grad_norm: 0.9999990224948636, iteration: 16509
loss: 1.0233099460601807,grad_norm: 0.9999994453984729, iteration: 16510
loss: 1.0407754182815552,grad_norm: 0.999999509880286, iteration: 16511
loss: 1.0362725257873535,grad_norm: 0.999999074936862, iteration: 16512
loss: 1.0508654117584229,grad_norm: 0.9999994024420108, iteration: 16513
loss: 0.9924875497817993,grad_norm: 0.9999991263808735, iteration: 16514
loss: 1.0768303871154785,grad_norm: 0.9999997175218641, iteration: 16515
loss: 1.1101306676864624,grad_norm: 0.9999993918523473, iteration: 16516
loss: 1.0049011707305908,grad_norm: 0.9999991378113198, iteration: 16517
loss: 1.0380992889404297,grad_norm: 0.9999993915035335, iteration: 16518
loss: 1.07364821434021,grad_norm: 0.9999993670257031, iteration: 16519
loss: 1.046451210975647,grad_norm: 0.999999252305978, iteration: 16520
loss: 1.0384612083435059,grad_norm: 0.9999996205479835, iteration: 16521
loss: 1.030328392982483,grad_norm: 0.9999993554256121, iteration: 16522
loss: 1.0696390867233276,grad_norm: 0.9999992073742613, iteration: 16523
loss: 1.0294171571731567,grad_norm: 0.9999993142780674, iteration: 16524
loss: 1.0513495206832886,grad_norm: 0.9999995009849959, iteration: 16525
loss: 1.026564598083496,grad_norm: 0.9646257229309781, iteration: 16526
loss: 1.0478135347366333,grad_norm: 0.9987366322029554, iteration: 16527
loss: 1.0733957290649414,grad_norm: 0.9999996754442195, iteration: 16528
loss: 1.0112191438674927,grad_norm: 0.999999184186182, iteration: 16529
loss: 0.99936842918396,grad_norm: 0.9999991137717508, iteration: 16530
loss: 1.0814714431762695,grad_norm: 0.9999997542174093, iteration: 16531
loss: 1.0282793045043945,grad_norm: 0.9999994070228218, iteration: 16532
loss: 1.026960015296936,grad_norm: 0.9999991486903408, iteration: 16533
loss: 1.0157321691513062,grad_norm: 0.999999154623036, iteration: 16534
loss: 1.0230152606964111,grad_norm: 0.9999992920470671, iteration: 16535
loss: 1.0419511795043945,grad_norm: 0.9831057873924505, iteration: 16536
loss: 1.004539132118225,grad_norm: 0.9999995872972676, iteration: 16537
loss: 1.0685627460479736,grad_norm: 0.9999990806260272, iteration: 16538
loss: 1.045423984527588,grad_norm: 0.9999992925496746, iteration: 16539
loss: 1.0416886806488037,grad_norm: 0.999999240317521, iteration: 16540
loss: 0.9867743849754333,grad_norm: 0.9999991940858077, iteration: 16541
loss: 1.0520719289779663,grad_norm: 0.9999992718643661, iteration: 16542
loss: 1.0357956886291504,grad_norm: 0.9999990456393977, iteration: 16543
loss: 0.995769202709198,grad_norm: 0.999999351916141, iteration: 16544
loss: 1.0015332698822021,grad_norm: 0.9770208625081281, iteration: 16545
loss: 0.9911798238754272,grad_norm: 0.9999992885048332, iteration: 16546
loss: 0.9814914464950562,grad_norm: 0.964448555137093, iteration: 16547
loss: 0.9991012811660767,grad_norm: 0.9421052663674896, iteration: 16548
loss: 0.984825074672699,grad_norm: 0.9999992307592365, iteration: 16549
loss: 1.0703461170196533,grad_norm: 0.9999993315695973, iteration: 16550
loss: 1.0388951301574707,grad_norm: 0.999999009034824, iteration: 16551
loss: 0.9862523674964905,grad_norm: 0.9901371063632478, iteration: 16552
loss: 1.03014075756073,grad_norm: 0.9999992686434479, iteration: 16553
loss: 1.0271834135055542,grad_norm: 0.9999989932505576, iteration: 16554
loss: 1.028213381767273,grad_norm: 0.9999993078970931, iteration: 16555
loss: 0.9918320775032043,grad_norm: 0.9999992807286032, iteration: 16556
loss: 1.0031660795211792,grad_norm: 0.9916739659079531, iteration: 16557
loss: 1.0194796323776245,grad_norm: 0.9999996921852569, iteration: 16558
loss: 1.069301724433899,grad_norm: 0.9999992199115202, iteration: 16559
loss: 1.0127344131469727,grad_norm: 0.9999992740012016, iteration: 16560
loss: 0.9893231391906738,grad_norm: 0.9999994104350889, iteration: 16561
loss: 1.014496922492981,grad_norm: 0.9999993561321886, iteration: 16562
loss: 0.9670085310935974,grad_norm: 0.9999990877052602, iteration: 16563
loss: 1.04317307472229,grad_norm: 0.9999990826784401, iteration: 16564
loss: 0.9480811953544617,grad_norm: 0.999999134314364, iteration: 16565
loss: 1.0362337827682495,grad_norm: 0.9999991650311719, iteration: 16566
loss: 1.0353134870529175,grad_norm: 0.999999219618057, iteration: 16567
loss: 1.0444499254226685,grad_norm: 0.9999994781716761, iteration: 16568
loss: 1.0344045162200928,grad_norm: 0.9922808777867222, iteration: 16569
loss: 1.0363852977752686,grad_norm: 0.9999992818231965, iteration: 16570
loss: 1.0211907625198364,grad_norm: 0.9999991966234748, iteration: 16571
loss: 1.0089762210845947,grad_norm: 0.9999991371961775, iteration: 16572
loss: 1.0712478160858154,grad_norm: 0.9999995956538112, iteration: 16573
loss: 0.9988804459571838,grad_norm: 0.9999992189969622, iteration: 16574
loss: 1.0397980213165283,grad_norm: 0.9999996166296786, iteration: 16575
loss: 1.088326096534729,grad_norm: 0.9999992600136656, iteration: 16576
loss: 1.015450119972229,grad_norm: 0.9999992534040987, iteration: 16577
loss: 1.010336995124817,grad_norm: 0.9999992079433658, iteration: 16578
loss: 0.995954155921936,grad_norm: 0.9999991193114498, iteration: 16579
loss: 1.0246926546096802,grad_norm: 0.9999991703692328, iteration: 16580
loss: 1.027005672454834,grad_norm: 0.9999994833198129, iteration: 16581
loss: 0.9952954649925232,grad_norm: 0.9999993653012531, iteration: 16582
loss: 1.0751951932907104,grad_norm: 0.9999993999787002, iteration: 16583
loss: 1.02628755569458,grad_norm: 0.9999993066146381, iteration: 16584
loss: 1.105522871017456,grad_norm: 0.9999993353455492, iteration: 16585
loss: 1.0377111434936523,grad_norm: 0.9999991737984841, iteration: 16586
loss: 1.0524455308914185,grad_norm: 0.9999990830430148, iteration: 16587
loss: 1.015812873840332,grad_norm: 0.9999994775656017, iteration: 16588
loss: 1.0254298448562622,grad_norm: 0.9999994764684265, iteration: 16589
loss: 1.0391172170639038,grad_norm: 0.9920906020528643, iteration: 16590
loss: 1.0216244459152222,grad_norm: 0.9999991855223327, iteration: 16591
loss: 1.0295311212539673,grad_norm: 0.9999991334958006, iteration: 16592
loss: 1.0101722478866577,grad_norm: 0.9999992456111931, iteration: 16593
loss: 1.0303573608398438,grad_norm: 0.9999991805431961, iteration: 16594
loss: 1.0205942392349243,grad_norm: 0.9982041152298191, iteration: 16595
loss: 1.0522713661193848,grad_norm: 0.9999994358629396, iteration: 16596
loss: 1.0401830673217773,grad_norm: 0.9999991260819079, iteration: 16597
loss: 0.9782242178916931,grad_norm: 0.9999990602960854, iteration: 16598
loss: 1.0240663290023804,grad_norm: 0.999999526736381, iteration: 16599
loss: 1.053604006767273,grad_norm: 0.99999934975553, iteration: 16600
loss: 1.0236706733703613,grad_norm: 0.9999993183847686, iteration: 16601
loss: 1.0639177560806274,grad_norm: 0.9999996694716009, iteration: 16602
loss: 1.018384337425232,grad_norm: 0.9999992057137833, iteration: 16603
loss: 1.012543797492981,grad_norm: 0.8727717270745086, iteration: 16604
loss: 1.079250693321228,grad_norm: 0.9999992959893161, iteration: 16605
loss: 0.9932796359062195,grad_norm: 0.999999122296836, iteration: 16606
loss: 1.0375897884368896,grad_norm: 0.9999995571514378, iteration: 16607
loss: 1.0238031148910522,grad_norm: 0.9999991894258462, iteration: 16608
loss: 1.0027592182159424,grad_norm: 0.9999993686669995, iteration: 16609
loss: 1.0045207738876343,grad_norm: 0.9999992089897087, iteration: 16610
loss: 0.9796363711357117,grad_norm: 0.9999991469228112, iteration: 16611
loss: 1.063046932220459,grad_norm: 0.99999993585923, iteration: 16612
loss: 0.9866616725921631,grad_norm: 0.9999990821524327, iteration: 16613
loss: 1.0642802715301514,grad_norm: 0.9999993631501405, iteration: 16614
loss: 1.0740940570831299,grad_norm: 0.9999995996508702, iteration: 16615
loss: 1.058610439300537,grad_norm: 0.9999992153178626, iteration: 16616
loss: 1.0045928955078125,grad_norm: 0.9999991641542979, iteration: 16617
loss: 0.9878138899803162,grad_norm: 0.9999991984263057, iteration: 16618
loss: 1.0279865264892578,grad_norm: 0.9999992487700993, iteration: 16619
loss: 1.0107148885726929,grad_norm: 0.9113380505132908, iteration: 16620
loss: 0.9852796792984009,grad_norm: 0.9999990523601751, iteration: 16621
loss: 1.038262128829956,grad_norm: 0.9999991818504835, iteration: 16622
loss: 0.9917026162147522,grad_norm: 0.9999995958486068, iteration: 16623
loss: 0.9915362000465393,grad_norm: 0.9999994266252737, iteration: 16624
loss: 1.046234130859375,grad_norm: 0.9999994375551122, iteration: 16625
loss: 1.0069222450256348,grad_norm: 0.9841122375376813, iteration: 16626
loss: 1.0520588159561157,grad_norm: 0.9999993317780588, iteration: 16627
loss: 1.0093075037002563,grad_norm: 0.9999994136912254, iteration: 16628
loss: 1.045044183731079,grad_norm: 0.9999991516476606, iteration: 16629
loss: 1.006376028060913,grad_norm: 0.9999991766088898, iteration: 16630
loss: 0.9922580122947693,grad_norm: 0.9999990402516571, iteration: 16631
loss: 1.0443906784057617,grad_norm: 0.999999170193381, iteration: 16632
loss: 1.0833004713058472,grad_norm: 0.9999999157458117, iteration: 16633
loss: 1.0694464445114136,grad_norm: 0.9999993834680833, iteration: 16634
loss: 1.044182300567627,grad_norm: 0.9999997448504161, iteration: 16635
loss: 1.0267449617385864,grad_norm: 0.9999992114013349, iteration: 16636
loss: 0.982979953289032,grad_norm: 0.9999992708042739, iteration: 16637
loss: 1.0319918394088745,grad_norm: 0.9999992985458066, iteration: 16638
loss: 1.028214454650879,grad_norm: 0.9999998575402954, iteration: 16639
loss: 1.0259970426559448,grad_norm: 0.9999992713786475, iteration: 16640
loss: 1.0336958169937134,grad_norm: 0.9999993158603488, iteration: 16641
loss: 1.0375391244888306,grad_norm: 0.9999991937664954, iteration: 16642
loss: 0.9688550233840942,grad_norm: 0.9944765120811502, iteration: 16643
loss: 1.0048532485961914,grad_norm: 0.854453345905185, iteration: 16644
loss: 1.0620695352554321,grad_norm: 0.9999995534467266, iteration: 16645
loss: 1.061364769935608,grad_norm: 0.9999991559331493, iteration: 16646
loss: 1.0344277620315552,grad_norm: 0.9999996856151728, iteration: 16647
loss: 0.9982720613479614,grad_norm: 0.9999999999972441, iteration: 16648
loss: 1.117826223373413,grad_norm: 0.9999996257685917, iteration: 16649
loss: 1.0300408601760864,grad_norm: 0.9999991654424313, iteration: 16650
loss: 0.9761220216751099,grad_norm: 0.9999998839987914, iteration: 16651
loss: 1.0520503520965576,grad_norm: 0.9999991674327329, iteration: 16652
loss: 1.050208568572998,grad_norm: 0.9999994250829934, iteration: 16653
loss: 1.039238452911377,grad_norm: 0.9999992305893461, iteration: 16654
loss: 1.084709882736206,grad_norm: 0.9999991766666841, iteration: 16655
loss: 1.0279979705810547,grad_norm: 0.9999991249592932, iteration: 16656
loss: 1.0239295959472656,grad_norm: 0.999999171242676, iteration: 16657
loss: 1.048211932182312,grad_norm: 0.9999998168258183, iteration: 16658
loss: 1.071704626083374,grad_norm: 0.9999989969329371, iteration: 16659
loss: 1.0741077661514282,grad_norm: 0.9999996501423318, iteration: 16660
loss: 1.0354335308074951,grad_norm: 0.9999993610364564, iteration: 16661
loss: 1.0150946378707886,grad_norm: 0.9999990984143099, iteration: 16662
loss: 1.0274713039398193,grad_norm: 0.9999993979830002, iteration: 16663
loss: 1.049168348312378,grad_norm: 0.9999991935093284, iteration: 16664
loss: 1.036630630493164,grad_norm: 0.9999991843703294, iteration: 16665
loss: 1.0070692300796509,grad_norm: 0.9999990610241637, iteration: 16666
loss: 0.9700610637664795,grad_norm: 0.9380058883077588, iteration: 16667
loss: 1.013540506362915,grad_norm: 0.9999992188767757, iteration: 16668
loss: 1.0234342813491821,grad_norm: 0.9999993746889226, iteration: 16669
loss: 1.061415672302246,grad_norm: 0.999999221224412, iteration: 16670
loss: 1.0463924407958984,grad_norm: 0.9999993505894653, iteration: 16671
loss: 1.0415401458740234,grad_norm: 0.9999992529385623, iteration: 16672
loss: 0.9694021940231323,grad_norm: 0.9999990346100381, iteration: 16673
loss: 0.9933903217315674,grad_norm: 0.9999992645475617, iteration: 16674
loss: 1.016548991203308,grad_norm: 0.9999994257905979, iteration: 16675
loss: 1.0095250606536865,grad_norm: 0.9999991785153898, iteration: 16676
loss: 1.0115365982055664,grad_norm: 0.9999990851354144, iteration: 16677
loss: 1.0458786487579346,grad_norm: 0.9999997877707143, iteration: 16678
loss: 1.0345044136047363,grad_norm: 0.9999997687397723, iteration: 16679
loss: 0.9737961888313293,grad_norm: 0.9999994714382625, iteration: 16680
loss: 1.0208563804626465,grad_norm: 0.9999990925572627, iteration: 16681
loss: 1.0188214778900146,grad_norm: 0.9999991415308163, iteration: 16682
loss: 0.9985912442207336,grad_norm: 0.9999992050682869, iteration: 16683
loss: 1.03358793258667,grad_norm: 0.999999381774216, iteration: 16684
loss: 0.998332679271698,grad_norm: 0.9999992396042774, iteration: 16685
loss: 1.060726284980774,grad_norm: 0.9999995590557141, iteration: 16686
loss: 1.044298768043518,grad_norm: 0.9999991368695126, iteration: 16687
loss: 1.0113152265548706,grad_norm: 0.99999918633151, iteration: 16688
loss: 1.020763874053955,grad_norm: 0.9425141328929069, iteration: 16689
loss: 1.027748703956604,grad_norm: 0.9999994389609107, iteration: 16690
loss: 0.9891940951347351,grad_norm: 0.9999990376830131, iteration: 16691
loss: 1.025309681892395,grad_norm: 0.9999991684368139, iteration: 16692
loss: 1.0652374029159546,grad_norm: 0.9999996891425705, iteration: 16693
loss: 1.028293490409851,grad_norm: 0.9999993655353508, iteration: 16694
loss: 1.0124459266662598,grad_norm: 0.9999990444021436, iteration: 16695
loss: 1.0386852025985718,grad_norm: 0.9999991451903224, iteration: 16696
loss: 1.028967261314392,grad_norm: 0.999999082996045, iteration: 16697
loss: 0.999553382396698,grad_norm: 0.9999990966661364, iteration: 16698
loss: 1.0532819032669067,grad_norm: 0.9999999202331099, iteration: 16699
loss: 0.9465492963790894,grad_norm: 0.9999992206784252, iteration: 16700
loss: 1.0253612995147705,grad_norm: 0.9999991364873339, iteration: 16701
loss: 1.100388526916504,grad_norm: 0.9999996972808401, iteration: 16702
loss: 1.0373841524124146,grad_norm: 0.9999991764978485, iteration: 16703
loss: 1.0719610452651978,grad_norm: 0.9999998425728799, iteration: 16704
loss: 1.0226703882217407,grad_norm: 0.9999993590996664, iteration: 16705
loss: 1.0659444332122803,grad_norm: 0.9999994125384318, iteration: 16706
loss: 1.0483484268188477,grad_norm: 0.9999998222654877, iteration: 16707
loss: 0.9628334641456604,grad_norm: 0.9999991447668166, iteration: 16708
loss: 1.022108793258667,grad_norm: 0.9999991979290049, iteration: 16709
loss: 1.0165376663208008,grad_norm: 0.9999991593555579, iteration: 16710
loss: 1.0560102462768555,grad_norm: 0.9999991790390009, iteration: 16711
loss: 0.9852309823036194,grad_norm: 0.9999992583273709, iteration: 16712
loss: 1.0542073249816895,grad_norm: 0.9646154082880237, iteration: 16713
loss: 1.3460359573364258,grad_norm: 0.9999997768116002, iteration: 16714
loss: 1.0573441982269287,grad_norm: 0.999999664171179, iteration: 16715
loss: 1.0098801851272583,grad_norm: 0.9999990704403265, iteration: 16716
loss: 0.9571204781532288,grad_norm: 0.9999991710198501, iteration: 16717
loss: 1.020906925201416,grad_norm: 0.9999992754138687, iteration: 16718
loss: 1.0343765020370483,grad_norm: 0.9999996280723606, iteration: 16719
loss: 0.9918197393417358,grad_norm: 0.9999993550231421, iteration: 16720
loss: 1.0533250570297241,grad_norm: 0.9623493245431002, iteration: 16721
loss: 1.0313185453414917,grad_norm: 0.9999992189714808, iteration: 16722
loss: 1.0733357667922974,grad_norm: 0.9999994501197041, iteration: 16723
loss: 1.0098538398742676,grad_norm: 0.9999991869376263, iteration: 16724
loss: 1.0217726230621338,grad_norm: 0.9999992359735871, iteration: 16725
loss: 1.0336400270462036,grad_norm: 0.9999994193409737, iteration: 16726
loss: 1.0121090412139893,grad_norm: 0.9999990493033556, iteration: 16727
loss: 1.0035569667816162,grad_norm: 0.9999991389307699, iteration: 16728
loss: 1.0571361780166626,grad_norm: 0.999999128094991, iteration: 16729
loss: 1.02292799949646,grad_norm: 0.999999479593726, iteration: 16730
loss: 0.9882903099060059,grad_norm: 0.9999991592166274, iteration: 16731
loss: 1.0253514051437378,grad_norm: 0.9999991334339867, iteration: 16732
loss: 1.0230754613876343,grad_norm: 0.9999995217628637, iteration: 16733
loss: 1.005855917930603,grad_norm: 0.9999993052007858, iteration: 16734
loss: 1.1004858016967773,grad_norm: 0.9999993333650172, iteration: 16735
loss: 0.9959157705307007,grad_norm: 0.9999993821397835, iteration: 16736
loss: 1.0134437084197998,grad_norm: 0.9999994657537177, iteration: 16737
loss: 0.9913201332092285,grad_norm: 0.9999992612721902, iteration: 16738
loss: 1.0562820434570312,grad_norm: 0.9999996475141797, iteration: 16739
loss: 1.0004359483718872,grad_norm: 0.9999991324275199, iteration: 16740
loss: 1.1071346998214722,grad_norm: 0.999999716769172, iteration: 16741
loss: 1.0159472227096558,grad_norm: 0.9999996421157291, iteration: 16742
loss: 1.076236367225647,grad_norm: 0.9999994074236906, iteration: 16743
loss: 0.9967420697212219,grad_norm: 0.9999993978187596, iteration: 16744
loss: 0.9535810351371765,grad_norm: 0.9999989860359223, iteration: 16745
loss: 0.9829877018928528,grad_norm: 0.9999992936946933, iteration: 16746
loss: 0.9981333017349243,grad_norm: 0.9999993335881776, iteration: 16747
loss: 1.0537633895874023,grad_norm: 0.9999991204703517, iteration: 16748
loss: 1.0596554279327393,grad_norm: 0.9999992658311023, iteration: 16749
loss: 1.0226272344589233,grad_norm: 0.9999992045489395, iteration: 16750
loss: 1.0573911666870117,grad_norm: 0.9999997807251839, iteration: 16751
loss: 1.0299557447433472,grad_norm: 0.9999995397669708, iteration: 16752
loss: 1.0463000535964966,grad_norm: 0.9999993939963112, iteration: 16753
loss: 1.011699914932251,grad_norm: 0.9999994578155632, iteration: 16754
loss: 0.9990005493164062,grad_norm: 0.9999990832810232, iteration: 16755
loss: 1.0321685075759888,grad_norm: 0.9999999365895574, iteration: 16756
loss: 1.028058409690857,grad_norm: 0.999999119685606, iteration: 16757
loss: 0.9976838827133179,grad_norm: 0.9999994910279987, iteration: 16758
loss: 1.0162087678909302,grad_norm: 0.9999993833865997, iteration: 16759
loss: 1.0494900941848755,grad_norm: 0.9999992822264002, iteration: 16760
loss: 0.9910827875137329,grad_norm: 0.9999991854998563, iteration: 16761
loss: 0.9935551285743713,grad_norm: 0.9999991651720667, iteration: 16762
loss: 1.0175493955612183,grad_norm: 0.9999993573189201, iteration: 16763
loss: 1.0395374298095703,grad_norm: 0.9999997084183039, iteration: 16764
loss: 1.0406019687652588,grad_norm: 0.9999991569452582, iteration: 16765
loss: 1.0502264499664307,grad_norm: 0.9999998422803843, iteration: 16766
loss: 1.0673750638961792,grad_norm: 0.999999081197713, iteration: 16767
loss: 1.0293751955032349,grad_norm: 0.999999079143142, iteration: 16768
loss: 1.109240174293518,grad_norm: 0.9999996017498163, iteration: 16769
loss: 1.0003278255462646,grad_norm: 0.9999997141956382, iteration: 16770
loss: 1.095478892326355,grad_norm: 0.9999996985891821, iteration: 16771
loss: 1.013601541519165,grad_norm: 0.9999992906261164, iteration: 16772
loss: 1.0317368507385254,grad_norm: 0.999999061887455, iteration: 16773
loss: 1.0807366371154785,grad_norm: 0.9999997391706023, iteration: 16774
loss: 1.0671135187149048,grad_norm: 0.9999993124803084, iteration: 16775
loss: 1.0186245441436768,grad_norm: 0.9999991765314619, iteration: 16776
loss: 0.9804988503456116,grad_norm: 0.9117579167347533, iteration: 16777
loss: 1.0350160598754883,grad_norm: 0.9999993847995504, iteration: 16778
loss: 1.0171246528625488,grad_norm: 0.989215641271417, iteration: 16779
loss: 1.0313643217086792,grad_norm: 0.9999991575488927, iteration: 16780
loss: 1.0501457452774048,grad_norm: 0.9999995699822382, iteration: 16781
loss: 0.9849828481674194,grad_norm: 0.9999996362308947, iteration: 16782
loss: 1.0370057821273804,grad_norm: 0.9999994468689413, iteration: 16783
loss: 1.035174012184143,grad_norm: 0.9999996435535957, iteration: 16784
loss: 1.0125764608383179,grad_norm: 0.999999076140611, iteration: 16785
loss: 1.0508604049682617,grad_norm: 0.9999991447096332, iteration: 16786
loss: 1.0654977560043335,grad_norm: 0.9999996571840113, iteration: 16787
loss: 1.0487123727798462,grad_norm: 0.9999990779659463, iteration: 16788
loss: 0.9887184500694275,grad_norm: 0.9999995381062616, iteration: 16789
loss: 1.0005137920379639,grad_norm: 0.9999991858362354, iteration: 16790
loss: 1.06719970703125,grad_norm: 0.9999994301382296, iteration: 16791
loss: 1.0253974199295044,grad_norm: 0.9999990695802153, iteration: 16792
loss: 1.033113718032837,grad_norm: 0.999999174003727, iteration: 16793
loss: 1.0559033155441284,grad_norm: 0.999999178667318, iteration: 16794
loss: 1.0149561166763306,grad_norm: 0.9093898355618429, iteration: 16795
loss: 1.0187184810638428,grad_norm: 0.9999991767615721, iteration: 16796
loss: 1.0704286098480225,grad_norm: 0.9999993527753878, iteration: 16797
loss: 1.0310829877853394,grad_norm: 0.9999993169734132, iteration: 16798
loss: 0.96169513463974,grad_norm: 0.999999232965183, iteration: 16799
loss: 1.0279260873794556,grad_norm: 0.9999991535298101, iteration: 16800
loss: 1.0131704807281494,grad_norm: 0.9999997443267714, iteration: 16801
loss: 1.0339651107788086,grad_norm: 0.9999990609266635, iteration: 16802
loss: 1.066799283027649,grad_norm: 0.9999992564918638, iteration: 16803
loss: 1.016045093536377,grad_norm: 0.9964764816133641, iteration: 16804
loss: 0.9785318970680237,grad_norm: 0.9999992358042707, iteration: 16805
loss: 1.018922209739685,grad_norm: 0.999999150220916, iteration: 16806
loss: 1.0192052125930786,grad_norm: 0.9999997948354366, iteration: 16807
loss: 1.016818642616272,grad_norm: 0.9999992497660238, iteration: 16808
loss: 1.0129953622817993,grad_norm: 0.9999995482562024, iteration: 16809
loss: 0.9839097261428833,grad_norm: 0.9999991716860374, iteration: 16810
loss: 1.0299919843673706,grad_norm: 0.9999991754431818, iteration: 16811
loss: 1.0845513343811035,grad_norm: 0.9999993329949861, iteration: 16812
loss: 1.0187588930130005,grad_norm: 0.9999992527873967, iteration: 16813
loss: 1.0158478021621704,grad_norm: 0.9999992980133365, iteration: 16814
loss: 1.0261142253875732,grad_norm: 0.9999991265818385, iteration: 16815
loss: 1.0224297046661377,grad_norm: 0.9999991522129109, iteration: 16816
loss: 1.0187269449234009,grad_norm: 0.9999993800205742, iteration: 16817
loss: 1.042946457862854,grad_norm: 0.9999997306776557, iteration: 16818
loss: 1.0625971555709839,grad_norm: 0.99999895152977, iteration: 16819
loss: 1.009549617767334,grad_norm: 0.9999990248926997, iteration: 16820
loss: 1.0254241228103638,grad_norm: 0.9999991498747207, iteration: 16821
loss: 1.0320615768432617,grad_norm: 0.999999279738343, iteration: 16822
loss: 0.9649527668952942,grad_norm: 0.9999994343347728, iteration: 16823
loss: 1.0576043128967285,grad_norm: 0.9999991995350817, iteration: 16824
loss: 0.9968544244766235,grad_norm: 0.9866731423997914, iteration: 16825
loss: 1.0077399015426636,grad_norm: 0.9999990481928439, iteration: 16826
loss: 0.989877462387085,grad_norm: 0.9999991408781972, iteration: 16827
loss: 1.084112286567688,grad_norm: 0.9999995948339804, iteration: 16828
loss: 0.9532886147499084,grad_norm: 0.9999992681852964, iteration: 16829
loss: 0.9937371611595154,grad_norm: 0.9999991735057855, iteration: 16830
loss: 1.0538854598999023,grad_norm: 0.9999991151820111, iteration: 16831
loss: 0.9975830316543579,grad_norm: 0.9999991017012295, iteration: 16832
loss: 1.0348774194717407,grad_norm: 0.999999201128962, iteration: 16833
loss: 1.0206929445266724,grad_norm: 0.9999992721197867, iteration: 16834
loss: 1.0569076538085938,grad_norm: 0.9999998552331297, iteration: 16835
loss: 1.0382916927337646,grad_norm: 0.9999992046595666, iteration: 16836
loss: 1.0496641397476196,grad_norm: 0.9999992938960325, iteration: 16837
loss: 0.997155487537384,grad_norm: 0.9197520411385122, iteration: 16838
loss: 0.9907497763633728,grad_norm: 0.9999990453184159, iteration: 16839
loss: 1.026910424232483,grad_norm: 0.9999991513858496, iteration: 16840
loss: 1.0626287460327148,grad_norm: 0.9999994672267868, iteration: 16841
loss: 0.9944913983345032,grad_norm: 0.9999992514898229, iteration: 16842
loss: 1.042594075202942,grad_norm: 0.9999993865379806, iteration: 16843
loss: 0.9762876629829407,grad_norm: 0.999999066575417, iteration: 16844
loss: 1.0960015058517456,grad_norm: 0.9999994495572189, iteration: 16845
loss: 1.0351718664169312,grad_norm: 0.9999992919780796, iteration: 16846
loss: 1.007295846939087,grad_norm: 0.9999991837777242, iteration: 16847
loss: 1.0091584920883179,grad_norm: 0.9062414460974979, iteration: 16848
loss: 1.0015169382095337,grad_norm: 0.8955846224355364, iteration: 16849
loss: 1.0365937948226929,grad_norm: 0.9999995178519747, iteration: 16850
loss: 0.997347891330719,grad_norm: 0.9999992778949538, iteration: 16851
loss: 1.0487167835235596,grad_norm: 0.9999991475796396, iteration: 16852
loss: 1.003794550895691,grad_norm: 0.9999991105624321, iteration: 16853
loss: 0.9837136268615723,grad_norm: 0.9345299116641718, iteration: 16854
loss: 1.0271152257919312,grad_norm: 0.9999992002116452, iteration: 16855
loss: 1.0028903484344482,grad_norm: 0.9999991113975832, iteration: 16856
loss: 1.073057770729065,grad_norm: 0.9999997223459579, iteration: 16857
loss: 0.988269567489624,grad_norm: 0.9999991438269359, iteration: 16858
loss: 0.975403904914856,grad_norm: 0.9999990527565161, iteration: 16859
loss: 1.0080113410949707,grad_norm: 0.9999993434548947, iteration: 16860
loss: 1.0242054462432861,grad_norm: 0.9999991105524709, iteration: 16861
loss: 1.1150184869766235,grad_norm: 0.9999995497741172, iteration: 16862
loss: 1.0785434246063232,grad_norm: 0.9999994689565181, iteration: 16863
loss: 1.0260616540908813,grad_norm: 0.9999991439732632, iteration: 16864
loss: 0.9970381855964661,grad_norm: 0.9871273563064783, iteration: 16865
loss: 1.0746384859085083,grad_norm: 0.9999991402441272, iteration: 16866
loss: 1.0589853525161743,grad_norm: 0.9999996167281853, iteration: 16867
loss: 1.0122824907302856,grad_norm: 0.9999991412168611, iteration: 16868
loss: 1.0039796829223633,grad_norm: 0.999999109617813, iteration: 16869
loss: 1.0284905433654785,grad_norm: 0.9999991747678428, iteration: 16870
loss: 0.9905218482017517,grad_norm: 0.999999143427629, iteration: 16871
loss: 1.0616086721420288,grad_norm: 0.9999997401184174, iteration: 16872
loss: 1.0425976514816284,grad_norm: 0.999999513783667, iteration: 16873
loss: 1.042013168334961,grad_norm: 0.9999990870354938, iteration: 16874
loss: 1.0404893159866333,grad_norm: 0.8878334076659391, iteration: 16875
loss: 1.0212953090667725,grad_norm: 0.9999995072110821, iteration: 16876
loss: 1.0002838373184204,grad_norm: 0.9999992863808275, iteration: 16877
loss: 0.9738345146179199,grad_norm: 0.9999991145583803, iteration: 16878
loss: 1.0185024738311768,grad_norm: 0.999999190956844, iteration: 16879
loss: 1.017341136932373,grad_norm: 0.9999991909978526, iteration: 16880
loss: 1.1373882293701172,grad_norm: 0.9999994974480012, iteration: 16881
loss: 1.021480917930603,grad_norm: 0.999999099868928, iteration: 16882
loss: 1.0290279388427734,grad_norm: 0.9999994706473254, iteration: 16883
loss: 1.0246142148971558,grad_norm: 0.9999991771374749, iteration: 16884
loss: 1.0482405424118042,grad_norm: 0.9999992051565219, iteration: 16885
loss: 1.0028173923492432,grad_norm: 0.999999276178663, iteration: 16886
loss: 1.0112066268920898,grad_norm: 0.9999991423741669, iteration: 16887
loss: 1.0711222887039185,grad_norm: 0.9999991611453458, iteration: 16888
loss: 1.0477042198181152,grad_norm: 0.9999994545823176, iteration: 16889
loss: 0.9974561333656311,grad_norm: 0.9999990589387852, iteration: 16890
loss: 1.0149855613708496,grad_norm: 0.9999992626151826, iteration: 16891
loss: 1.0599205493927002,grad_norm: 0.9999996386725276, iteration: 16892
loss: 1.0241161584854126,grad_norm: 0.9999997861554785, iteration: 16893
loss: 1.0102812051773071,grad_norm: 0.9999991753462005, iteration: 16894
loss: 1.0349098443984985,grad_norm: 0.9999995245918903, iteration: 16895
loss: 1.030055046081543,grad_norm: 0.9999992961838069, iteration: 16896
loss: 1.029544711112976,grad_norm: 0.9999996423889101, iteration: 16897
loss: 1.0154017210006714,grad_norm: 0.9601996549137403, iteration: 16898
loss: 0.991757333278656,grad_norm: 0.9999990725918835, iteration: 16899
loss: 0.9875890612602234,grad_norm: 0.9999998110227619, iteration: 16900
loss: 1.011692762374878,grad_norm: 0.9999992268020221, iteration: 16901
loss: 1.03003990650177,grad_norm: 0.9999994789778149, iteration: 16902
loss: 1.0214570760726929,grad_norm: 0.9751965735436409, iteration: 16903
loss: 1.0233867168426514,grad_norm: 0.999999454310606, iteration: 16904
loss: 0.9855943322181702,grad_norm: 0.9999991340165246, iteration: 16905
loss: 0.9949507117271423,grad_norm: 0.9999992871980591, iteration: 16906
loss: 1.0427876710891724,grad_norm: 0.9999992100473609, iteration: 16907
loss: 1.0585315227508545,grad_norm: 0.9999996334106509, iteration: 16908
loss: 1.0522013902664185,grad_norm: 0.9999991601644987, iteration: 16909
loss: 1.0974074602127075,grad_norm: 0.9999997152724367, iteration: 16910
loss: 0.9967138171195984,grad_norm: 0.9999993919757404, iteration: 16911
loss: 1.0038319826126099,grad_norm: 0.9999993240047591, iteration: 16912
loss: 1.0473495721817017,grad_norm: 0.999999646085449, iteration: 16913
loss: 1.0355006456375122,grad_norm: 0.9999992894167292, iteration: 16914
loss: 1.0259196758270264,grad_norm: 0.9999993186346471, iteration: 16915
loss: 1.0391966104507446,grad_norm: 0.9999994801800726, iteration: 16916
loss: 1.0470268726348877,grad_norm: 0.9999992090073517, iteration: 16917
loss: 1.002063512802124,grad_norm: 0.9999990986334566, iteration: 16918
loss: 1.0386549234390259,grad_norm: 0.9999990582792821, iteration: 16919
loss: 1.0671648979187012,grad_norm: 0.9999993842207126, iteration: 16920
loss: 0.9728187918663025,grad_norm: 0.999999155387956, iteration: 16921
loss: 1.017008662223816,grad_norm: 0.9999991369603862, iteration: 16922
loss: 1.053635597229004,grad_norm: 0.9999994734380659, iteration: 16923
loss: 0.9995306730270386,grad_norm: 0.9999990824222129, iteration: 16924
loss: 1.006576418876648,grad_norm: 0.9999993100685345, iteration: 16925
loss: 1.0202385187149048,grad_norm: 0.9595995312485022, iteration: 16926
loss: 1.0097414255142212,grad_norm: 0.9760590864524957, iteration: 16927
loss: 1.0097006559371948,grad_norm: 0.9999996416062498, iteration: 16928
loss: 1.0447279214859009,grad_norm: 0.9999991802365457, iteration: 16929
loss: 0.9838401675224304,grad_norm: 0.9999995104236521, iteration: 16930
loss: 0.9727739691734314,grad_norm: 0.9999993779033095, iteration: 16931
loss: 1.0246344804763794,grad_norm: 0.9565617828125142, iteration: 16932
loss: 1.056533932685852,grad_norm: 0.9999994911357638, iteration: 16933
loss: 1.0131418704986572,grad_norm: 0.9999992900424626, iteration: 16934
loss: 1.0251847505569458,grad_norm: 0.9999991823414384, iteration: 16935
loss: 1.0180630683898926,grad_norm: 0.9999992077815513, iteration: 16936
loss: 1.0720363855361938,grad_norm: 0.9999991885467321, iteration: 16937
loss: 1.013370156288147,grad_norm: 0.9999992415034331, iteration: 16938
loss: 0.9656869769096375,grad_norm: 0.999999116477439, iteration: 16939
loss: 1.0324212312698364,grad_norm: 0.9999991933464557, iteration: 16940
loss: 0.9929285645484924,grad_norm: 0.9999995747860162, iteration: 16941
loss: 0.9563019275665283,grad_norm: 0.9999992731728412, iteration: 16942
loss: 1.0138816833496094,grad_norm: 0.999999367010316, iteration: 16943
loss: 1.0015419721603394,grad_norm: 0.9499420881427107, iteration: 16944
loss: 1.0153001546859741,grad_norm: 0.9999995711157138, iteration: 16945
loss: 0.9894208312034607,grad_norm: 0.9999991495476678, iteration: 16946
loss: 1.0160624980926514,grad_norm: 0.9999993368339488, iteration: 16947
loss: 1.0066611766815186,grad_norm: 0.999999713748058, iteration: 16948
loss: 1.0405306816101074,grad_norm: 0.999999341286945, iteration: 16949
loss: 0.9629314541816711,grad_norm: 0.9999990559590939, iteration: 16950
loss: 1.010208010673523,grad_norm: 0.9999991157478207, iteration: 16951
loss: 1.0057792663574219,grad_norm: 0.9999990255903383, iteration: 16952
loss: 1.0431666374206543,grad_norm: 0.9999994195402068, iteration: 16953
loss: 1.0356098413467407,grad_norm: 0.9999991606882662, iteration: 16954
loss: 1.0271821022033691,grad_norm: 0.9999990166684265, iteration: 16955
loss: 1.0207793712615967,grad_norm: 0.9999993561738547, iteration: 16956
loss: 0.9904863834381104,grad_norm: 0.9999991165716996, iteration: 16957
loss: 1.05852472782135,grad_norm: 0.9999993182543944, iteration: 16958
loss: 1.0697612762451172,grad_norm: 0.9999992968172589, iteration: 16959
loss: 1.0417553186416626,grad_norm: 0.9999991156792, iteration: 16960
loss: 0.9865320920944214,grad_norm: 0.9311312713497244, iteration: 16961
loss: 1.023269534111023,grad_norm: 0.9999990177777178, iteration: 16962
loss: 1.0477778911590576,grad_norm: 0.9999992975720728, iteration: 16963
loss: 1.0261369943618774,grad_norm: 0.980141520556971, iteration: 16964
loss: 1.0338311195373535,grad_norm: 0.9999993259509449, iteration: 16965
loss: 1.0194921493530273,grad_norm: 0.9999997571174847, iteration: 16966
loss: 0.9783015251159668,grad_norm: 0.9999993543843199, iteration: 16967
loss: 1.0389058589935303,grad_norm: 0.9999992556124747, iteration: 16968
loss: 1.028752088546753,grad_norm: 0.9999992167218502, iteration: 16969
loss: 1.029718279838562,grad_norm: 0.9999993494288164, iteration: 16970
loss: 1.0268906354904175,grad_norm: 0.9999992155904426, iteration: 16971
loss: 1.080844521522522,grad_norm: 0.9999991977873112, iteration: 16972
loss: 1.0019418001174927,grad_norm: 0.9999990666770311, iteration: 16973
loss: 1.0338866710662842,grad_norm: 0.9999995473422268, iteration: 16974
loss: 1.0230224132537842,grad_norm: 0.999999135587495, iteration: 16975
loss: 1.0152297019958496,grad_norm: 0.9034671094268195, iteration: 16976
loss: 1.0363720655441284,grad_norm: 0.9999992412182716, iteration: 16977
loss: 1.084654688835144,grad_norm: 0.9999995811530021, iteration: 16978
loss: 1.0129319429397583,grad_norm: 0.9999990792560156, iteration: 16979
loss: 1.0132280588150024,grad_norm: 0.9456025874151848, iteration: 16980
loss: 1.0541011095046997,grad_norm: 0.9999991862082529, iteration: 16981
loss: 1.0115290880203247,grad_norm: 0.9999994138020143, iteration: 16982
loss: 1.040217399597168,grad_norm: 0.9999993226006415, iteration: 16983
loss: 1.0125750303268433,grad_norm: 0.9999994646299377, iteration: 16984
loss: 1.0485965013504028,grad_norm: 0.9999992901059434, iteration: 16985
loss: 1.0106571912765503,grad_norm: 0.9999991316834657, iteration: 16986
loss: 0.9605185389518738,grad_norm: 0.9921275811634599, iteration: 16987
loss: 1.04542875289917,grad_norm: 0.999999188297384, iteration: 16988
loss: 1.0299220085144043,grad_norm: 0.9999993460863038, iteration: 16989
loss: 1.0138059854507446,grad_norm: 0.9999991953546338, iteration: 16990
loss: 1.0534634590148926,grad_norm: 0.999999225698287, iteration: 16991
loss: 1.0338791608810425,grad_norm: 0.9999991880372034, iteration: 16992
loss: 1.0760389566421509,grad_norm: 0.9999993323124607, iteration: 16993
loss: 1.0339570045471191,grad_norm: 0.9999991774806367, iteration: 16994
loss: 1.018886923789978,grad_norm: 0.9999991261530725, iteration: 16995
loss: 1.0094094276428223,grad_norm: 0.9490633985806686, iteration: 16996
loss: 1.0398575067520142,grad_norm: 0.9668690482356991, iteration: 16997
loss: 1.1171865463256836,grad_norm: 0.9999997782563026, iteration: 16998
loss: 1.0322273969650269,grad_norm: 0.9999992855826068, iteration: 16999
loss: 1.0089489221572876,grad_norm: 0.9999992053120892, iteration: 17000
loss: 1.0322489738464355,grad_norm: 0.9999992112742815, iteration: 17001
loss: 1.0318419933319092,grad_norm: 0.9999994609648906, iteration: 17002
loss: 1.0914438962936401,grad_norm: 0.9999993191186239, iteration: 17003
loss: 1.0613148212432861,grad_norm: 0.9999996596477323, iteration: 17004
loss: 0.9789040088653564,grad_norm: 0.9999991305141239, iteration: 17005
loss: 1.0284888744354248,grad_norm: 0.9999996026434312, iteration: 17006
loss: 1.0172157287597656,grad_norm: 0.9999990950924542, iteration: 17007
loss: 1.000036358833313,grad_norm: 0.9999993421448086, iteration: 17008
loss: 0.996186375617981,grad_norm: 0.9999990344778155, iteration: 17009
loss: 1.0597652196884155,grad_norm: 0.9999997359495498, iteration: 17010
loss: 1.0580236911773682,grad_norm: 0.9999992225581771, iteration: 17011
loss: 1.00259268283844,grad_norm: 0.9999989820598942, iteration: 17012
loss: 1.0418275594711304,grad_norm: 0.999999625495065, iteration: 17013
loss: 1.0421011447906494,grad_norm: 0.9999996117138675, iteration: 17014
loss: 1.0216344594955444,grad_norm: 0.9999991196124117, iteration: 17015
loss: 1.0172717571258545,grad_norm: 0.9999990272814572, iteration: 17016
loss: 1.0229393243789673,grad_norm: 0.999999323874593, iteration: 17017
loss: 1.1074599027633667,grad_norm: 0.9999998160440354, iteration: 17018
loss: 0.9949477910995483,grad_norm: 0.9563170546877333, iteration: 17019
loss: 1.0404088497161865,grad_norm: 0.9999997250718282, iteration: 17020
loss: 1.0238887071609497,grad_norm: 0.9999989951605773, iteration: 17021
loss: 1.0649161338806152,grad_norm: 0.9999990927781107, iteration: 17022
loss: 1.0120444297790527,grad_norm: 0.9999993345792492, iteration: 17023
loss: 1.0437986850738525,grad_norm: 0.9999998282833721, iteration: 17024
loss: 1.0612587928771973,grad_norm: 0.9999992850112647, iteration: 17025
loss: 1.0231921672821045,grad_norm: 0.9999997417775467, iteration: 17026
loss: 1.0362156629562378,grad_norm: 0.9999992143912346, iteration: 17027
loss: 1.053011417388916,grad_norm: 0.9999990704506037, iteration: 17028
loss: 1.0048965215682983,grad_norm: 0.999999393153028, iteration: 17029
loss: 0.9848054647445679,grad_norm: 0.9999991126017587, iteration: 17030
loss: 1.0670703649520874,grad_norm: 0.999999814854646, iteration: 17031
loss: 1.0208427906036377,grad_norm: 0.9999991836306159, iteration: 17032
loss: 1.0504131317138672,grad_norm: 0.9999994733479046, iteration: 17033
loss: 1.0511831045150757,grad_norm: 0.9999994357537331, iteration: 17034
loss: 0.9746147990226746,grad_norm: 0.9999990736614941, iteration: 17035
loss: 1.0211501121520996,grad_norm: 0.9999993433271652, iteration: 17036
loss: 1.076386570930481,grad_norm: 0.9999994238452516, iteration: 17037
loss: 0.9696773290634155,grad_norm: 0.9999991665417114, iteration: 17038
loss: 1.0722163915634155,grad_norm: 0.9999994934156006, iteration: 17039
loss: 0.9941715598106384,grad_norm: 0.9999993424417093, iteration: 17040
loss: 1.0297937393188477,grad_norm: 0.9999995970130245, iteration: 17041
loss: 0.9943191409111023,grad_norm: 0.9415014387793553, iteration: 17042
loss: 1.0185621976852417,grad_norm: 0.999999474661256, iteration: 17043
loss: 1.0315513610839844,grad_norm: 0.9999990974741597, iteration: 17044
loss: 1.004425287246704,grad_norm: 0.9999992695202564, iteration: 17045
loss: 0.9671361446380615,grad_norm: 0.9999991319185804, iteration: 17046
loss: 1.0008147954940796,grad_norm: 0.9999995474045008, iteration: 17047
loss: 1.0776660442352295,grad_norm: 0.9999996491750839, iteration: 17048
loss: 1.045005440711975,grad_norm: 0.9999997294971925, iteration: 17049
loss: 1.0836387872695923,grad_norm: 0.9999999021036876, iteration: 17050
loss: 1.0410963296890259,grad_norm: 0.9999996241367975, iteration: 17051
loss: 1.0123872756958008,grad_norm: 0.9999992241246825, iteration: 17052
loss: 1.0345203876495361,grad_norm: 0.9999995540620027, iteration: 17053
loss: 1.0574203729629517,grad_norm: 0.9999995436971242, iteration: 17054
loss: 1.0703920125961304,grad_norm: 0.9999991472253632, iteration: 17055
loss: 1.0160601139068604,grad_norm: 0.9999990833857768, iteration: 17056
loss: 1.014785647392273,grad_norm: 0.9999991921651817, iteration: 17057
loss: 1.0762372016906738,grad_norm: 0.9999995582901042, iteration: 17058
loss: 1.0459498167037964,grad_norm: 0.9999993640793426, iteration: 17059
loss: 1.1313161849975586,grad_norm: 0.9999993508046628, iteration: 17060
loss: 1.0418386459350586,grad_norm: 0.9999991575133816, iteration: 17061
loss: 1.0357307195663452,grad_norm: 0.9999996367802387, iteration: 17062
loss: 1.0446326732635498,grad_norm: 0.9999995034669618, iteration: 17063
loss: 1.083757996559143,grad_norm: 0.9999997000069547, iteration: 17064
loss: 0.9636726975440979,grad_norm: 0.9999993923347036, iteration: 17065
loss: 1.0317168235778809,grad_norm: 0.9999991835789748, iteration: 17066
loss: 1.0158495903015137,grad_norm: 0.9999990231741493, iteration: 17067
loss: 0.9861482381820679,grad_norm: 0.9999992388683017, iteration: 17068
loss: 1.0250897407531738,grad_norm: 0.9999997754061636, iteration: 17069
loss: 1.0415806770324707,grad_norm: 0.9999990892462293, iteration: 17070
loss: 1.0530413389205933,grad_norm: 0.9999992084326236, iteration: 17071
loss: 1.061710000038147,grad_norm: 0.9999990264202044, iteration: 17072
loss: 1.0881880521774292,grad_norm: 0.9999993470469071, iteration: 17073
loss: 1.0179747343063354,grad_norm: 0.9999990433382804, iteration: 17074
loss: 1.0217407941818237,grad_norm: 0.9999991413429142, iteration: 17075
loss: 1.028762698173523,grad_norm: 0.9999992654703835, iteration: 17076
loss: 1.0864673852920532,grad_norm: 0.9999998117560666, iteration: 17077
loss: 1.0482693910598755,grad_norm: 0.9248920792360649, iteration: 17078
loss: 1.0149824619293213,grad_norm: 0.9999992536706038, iteration: 17079
loss: 1.016015887260437,grad_norm: 0.9999991653343255, iteration: 17080
loss: 1.0700969696044922,grad_norm: 0.9999995639193257, iteration: 17081
loss: 1.0409140586853027,grad_norm: 0.9999992416771033, iteration: 17082
loss: 1.030157208442688,grad_norm: 0.9999994214100258, iteration: 17083
loss: 0.9874100089073181,grad_norm: 0.9999991925048297, iteration: 17084
loss: 1.0065128803253174,grad_norm: 0.9999991615145577, iteration: 17085
loss: 1.0344318151474,grad_norm: 0.9999998323277669, iteration: 17086
loss: 1.0028228759765625,grad_norm: 0.9999993164478824, iteration: 17087
loss: 1.0137014389038086,grad_norm: 0.9999992338019688, iteration: 17088
loss: 1.0164388418197632,grad_norm: 0.9999990765882109, iteration: 17089
loss: 1.007232427597046,grad_norm: 0.9999992343424451, iteration: 17090
loss: 1.0238531827926636,grad_norm: 0.9999996196256625, iteration: 17091
loss: 0.9889549612998962,grad_norm: 0.9999993033978081, iteration: 17092
loss: 1.0573537349700928,grad_norm: 0.9999992586177177, iteration: 17093
loss: 1.0209561586380005,grad_norm: 0.9999996938340224, iteration: 17094
loss: 1.0700469017028809,grad_norm: 0.9999993946496036, iteration: 17095
loss: 1.0481030941009521,grad_norm: 0.9999995693715604, iteration: 17096
loss: 0.9840530753135681,grad_norm: 0.9999995886102826, iteration: 17097
loss: 1.0457161664962769,grad_norm: 0.9206080290265585, iteration: 17098
loss: 1.0501536130905151,grad_norm: 0.9999997656023403, iteration: 17099
loss: 0.9984152913093567,grad_norm: 0.9999992100577919, iteration: 17100
loss: 1.0143234729766846,grad_norm: 0.9999990855084394, iteration: 17101
loss: 0.9824132919311523,grad_norm: 0.9999990704687035, iteration: 17102
loss: 1.03575599193573,grad_norm: 0.9999992374751264, iteration: 17103
loss: 1.045631766319275,grad_norm: 0.9999994858709865, iteration: 17104
loss: 0.9750121831893921,grad_norm: 0.9999990901287162, iteration: 17105
loss: 1.0654332637786865,grad_norm: 0.999999383537493, iteration: 17106
loss: 1.0313897132873535,grad_norm: 0.9999998199601858, iteration: 17107
loss: 1.029524326324463,grad_norm: 0.9999992347956119, iteration: 17108
loss: 1.0469025373458862,grad_norm: 0.9999995942043458, iteration: 17109
loss: 1.0002477169036865,grad_norm: 0.9999993019197183, iteration: 17110
loss: 1.0067347288131714,grad_norm: 0.9999991064336585, iteration: 17111
loss: 0.9862213134765625,grad_norm: 0.9999997594990984, iteration: 17112
loss: 1.0227068662643433,grad_norm: 0.9999997128329458, iteration: 17113
loss: 0.9692649841308594,grad_norm: 0.9999991230638531, iteration: 17114
loss: 0.9814490079879761,grad_norm: 0.9874490613909168, iteration: 17115
loss: 1.04909086227417,grad_norm: 0.999999795870329, iteration: 17116
loss: 1.015407919883728,grad_norm: 0.9999991155019835, iteration: 17117
loss: 1.0871775150299072,grad_norm: 0.9999994232423157, iteration: 17118
loss: 1.1087760925292969,grad_norm: 0.9999993682708419, iteration: 17119
loss: 1.0677562952041626,grad_norm: 0.9999997651581622, iteration: 17120
loss: 0.9936272501945496,grad_norm: 0.9832245507232522, iteration: 17121
loss: 1.0152785778045654,grad_norm: 0.9999993141357901, iteration: 17122
loss: 1.0883837938308716,grad_norm: 0.9999998798222105, iteration: 17123
loss: 1.0464140176773071,grad_norm: 0.9999999082404017, iteration: 17124
loss: 1.0356311798095703,grad_norm: 0.9999993288032982, iteration: 17125
loss: 1.059240698814392,grad_norm: 0.9999995251259601, iteration: 17126
loss: 1.069634199142456,grad_norm: 0.9999993450672355, iteration: 17127
loss: 1.0325689315795898,grad_norm: 0.9999995508909368, iteration: 17128
loss: 1.015000343322754,grad_norm: 0.9572672506566862, iteration: 17129
loss: 1.0724782943725586,grad_norm: 0.9999998470143004, iteration: 17130
loss: 1.0480371713638306,grad_norm: 0.9999992839644862, iteration: 17131
loss: 1.069098949432373,grad_norm: 0.9999994324410241, iteration: 17132
loss: 1.0493872165679932,grad_norm: 0.9999994581101077, iteration: 17133
loss: 1.0581449270248413,grad_norm: 0.9999994967367059, iteration: 17134
loss: 1.0982860326766968,grad_norm: 0.9999996828932998, iteration: 17135
loss: 1.0633478164672852,grad_norm: 0.9999997114373677, iteration: 17136
loss: 1.163089632987976,grad_norm: 0.9999997948773452, iteration: 17137
loss: 1.0177806615829468,grad_norm: 0.9999992411319177, iteration: 17138
loss: 1.162459373474121,grad_norm: 0.9999998100570795, iteration: 17139
loss: 1.0607014894485474,grad_norm: 0.9999993438958964, iteration: 17140
loss: 1.03434419631958,grad_norm: 0.9999991338890715, iteration: 17141
loss: 1.0059871673583984,grad_norm: 0.9999992309617368, iteration: 17142
loss: 1.011134386062622,grad_norm: 0.9999990570772745, iteration: 17143
loss: 1.012945294380188,grad_norm: 0.9999990545502928, iteration: 17144
loss: 1.087354063987732,grad_norm: 0.9999992802165301, iteration: 17145
loss: 1.027986764907837,grad_norm: 0.9999990854968849, iteration: 17146
loss: 1.0419576168060303,grad_norm: 0.9999995795612809, iteration: 17147
loss: 1.0316380262374878,grad_norm: 0.9999993390583822, iteration: 17148
loss: 0.9844486117362976,grad_norm: 0.9999991927067947, iteration: 17149
loss: 0.9858020544052124,grad_norm: 0.9999992316601792, iteration: 17150
loss: 1.1014103889465332,grad_norm: 0.9999998041084264, iteration: 17151
loss: 1.0148992538452148,grad_norm: 0.9999991887467904, iteration: 17152
loss: 1.024304747581482,grad_norm: 0.9999991306298512, iteration: 17153
loss: 1.0543155670166016,grad_norm: 0.9999995617192428, iteration: 17154
loss: 1.0183244943618774,grad_norm: 0.9999994029863212, iteration: 17155
loss: 1.0622974634170532,grad_norm: 0.9999996086374887, iteration: 17156
loss: 1.0773924589157104,grad_norm: 0.9999997001829832, iteration: 17157
loss: 1.0249773263931274,grad_norm: 0.9999994026258517, iteration: 17158
loss: 1.0314521789550781,grad_norm: 0.9999992555509802, iteration: 17159
loss: 1.0053051710128784,grad_norm: 0.999999637385941, iteration: 17160
loss: 1.0537915229797363,grad_norm: 0.9999992710715143, iteration: 17161
loss: 0.9735391736030579,grad_norm: 0.9999991860703955, iteration: 17162
loss: 1.0351814031600952,grad_norm: 0.9999992295341843, iteration: 17163
loss: 1.0156735181808472,grad_norm: 0.9999992667275018, iteration: 17164
loss: 1.051500678062439,grad_norm: 0.9999991405363324, iteration: 17165
loss: 1.040054202079773,grad_norm: 0.9999990353227257, iteration: 17166
loss: 1.033078670501709,grad_norm: 0.9999990518147723, iteration: 17167
loss: 1.007698655128479,grad_norm: 0.9999991513275756, iteration: 17168
loss: 1.0388755798339844,grad_norm: 0.999999312078996, iteration: 17169
loss: 0.9881201386451721,grad_norm: 0.999999080891044, iteration: 17170
loss: 0.9995880722999573,grad_norm: 0.9999992208776324, iteration: 17171
loss: 1.004697561264038,grad_norm: 0.9999995054810841, iteration: 17172
loss: 1.0105701684951782,grad_norm: 0.9999994266370076, iteration: 17173
loss: 0.9963210821151733,grad_norm: 0.9999989860760663, iteration: 17174
loss: 1.0502688884735107,grad_norm: 0.9999991176953583, iteration: 17175
loss: 1.051058053970337,grad_norm: 0.9999993770770477, iteration: 17176
loss: 1.0267940759658813,grad_norm: 0.9999993051008289, iteration: 17177
loss: 1.0500074625015259,grad_norm: 0.9999996482511365, iteration: 17178
loss: 1.0171326398849487,grad_norm: 0.999999117343194, iteration: 17179
loss: 0.9925613403320312,grad_norm: 0.9999991271706471, iteration: 17180
loss: 1.0902156829833984,grad_norm: 0.9999994041010482, iteration: 17181
loss: 1.061392903327942,grad_norm: 0.9999993064529104, iteration: 17182
loss: 1.0726886987686157,grad_norm: 0.9999994883968474, iteration: 17183
loss: 0.9971243143081665,grad_norm: 0.9999991966034159, iteration: 17184
loss: 1.0497348308563232,grad_norm: 0.9999991861846418, iteration: 17185
loss: 1.0157114267349243,grad_norm: 0.9999990255898408, iteration: 17186
loss: 1.0079396963119507,grad_norm: 0.9999992048922005, iteration: 17187
loss: 1.0110540390014648,grad_norm: 0.962409345356943, iteration: 17188
loss: 0.9856218099594116,grad_norm: 0.9999990351864705, iteration: 17189
loss: 1.0188939571380615,grad_norm: 0.9999995720690292, iteration: 17190
loss: 0.965246319770813,grad_norm: 0.9999991521332563, iteration: 17191
loss: 1.0208911895751953,grad_norm: 0.999999046475116, iteration: 17192
loss: 1.064428687095642,grad_norm: 0.9999991427578541, iteration: 17193
loss: 1.0047367811203003,grad_norm: 0.9999991738242645, iteration: 17194
loss: 1.0431411266326904,grad_norm: 0.9999992472449231, iteration: 17195
loss: 1.028519868850708,grad_norm: 0.9999992443795129, iteration: 17196
loss: 1.0132842063903809,grad_norm: 0.9999990226134502, iteration: 17197
loss: 1.0320066213607788,grad_norm: 0.9746212813364568, iteration: 17198
loss: 1.022774338722229,grad_norm: 0.9999991982930152, iteration: 17199
loss: 1.0197194814682007,grad_norm: 0.9999990000642087, iteration: 17200
loss: 1.0340118408203125,grad_norm: 0.9999991629179596, iteration: 17201
loss: 1.0119504928588867,grad_norm: 0.9999990668714746, iteration: 17202
loss: 1.0540932416915894,grad_norm: 0.9999992930602265, iteration: 17203
loss: 1.001622200012207,grad_norm: 0.9999992272863647, iteration: 17204
loss: 1.008150339126587,grad_norm: 0.9999994995434498, iteration: 17205
loss: 0.9835201501846313,grad_norm: 0.9999990921699797, iteration: 17206
loss: 1.0616445541381836,grad_norm: 0.9999991721456984, iteration: 17207
loss: 0.9835631847381592,grad_norm: 0.9999993268163818, iteration: 17208
loss: 1.0127581357955933,grad_norm: 0.9999996024222927, iteration: 17209
loss: 0.976701557636261,grad_norm: 0.9999992346314918, iteration: 17210
loss: 1.0173860788345337,grad_norm: 0.9999996125648657, iteration: 17211
loss: 1.0497134923934937,grad_norm: 0.9999990444662434, iteration: 17212
loss: 1.0290755033493042,grad_norm: 0.9999989952061181, iteration: 17213
loss: 1.0475482940673828,grad_norm: 0.9988962644606105, iteration: 17214
loss: 1.0151877403259277,grad_norm: 0.9999993959793858, iteration: 17215
loss: 1.013404130935669,grad_norm: 0.9999992938800887, iteration: 17216
loss: 0.9931862950325012,grad_norm: 0.9999992412288842, iteration: 17217
loss: 1.0114672183990479,grad_norm: 0.9999991033849805, iteration: 17218
loss: 1.000862717628479,grad_norm: 0.9660438442365943, iteration: 17219
loss: 1.00373375415802,grad_norm: 0.9999991302538603, iteration: 17220
loss: 0.9937336444854736,grad_norm: 0.9999996542613413, iteration: 17221
loss: 0.9957017302513123,grad_norm: 0.9999995056950565, iteration: 17222
loss: 1.089645504951477,grad_norm: 0.9999992122108673, iteration: 17223
loss: 1.0039095878601074,grad_norm: 0.9999994205406222, iteration: 17224
loss: 1.0513710975646973,grad_norm: 0.9999994713742951, iteration: 17225
loss: 1.0064696073532104,grad_norm: 0.9999995815901859, iteration: 17226
loss: 1.0564112663269043,grad_norm: 0.9999990280263507, iteration: 17227
loss: 0.9772002696990967,grad_norm: 0.9999991148585028, iteration: 17228
loss: 1.0010205507278442,grad_norm: 0.999999057459496, iteration: 17229
loss: 1.0289255380630493,grad_norm: 0.9999993926457608, iteration: 17230
loss: 1.029381275177002,grad_norm: 0.9999991149024682, iteration: 17231
loss: 1.0182853937149048,grad_norm: 0.9999997502515677, iteration: 17232
loss: 1.0786298513412476,grad_norm: 0.9999996782347165, iteration: 17233
loss: 0.9880678057670593,grad_norm: 0.9999992378878851, iteration: 17234
loss: 0.9576186537742615,grad_norm: 0.9867920204501873, iteration: 17235
loss: 1.0183590650558472,grad_norm: 0.9999993475928465, iteration: 17236
loss: 1.048261046409607,grad_norm: 0.9999991086890628, iteration: 17237
loss: 1.0048421621322632,grad_norm: 0.9999991530980548, iteration: 17238
loss: 1.0339902639389038,grad_norm: 0.9999990384715198, iteration: 17239
loss: 0.9798411726951599,grad_norm: 0.9165239874111007, iteration: 17240
loss: 1.033437967300415,grad_norm: 0.9999997065767429, iteration: 17241
loss: 1.08647882938385,grad_norm: 0.9999998318172106, iteration: 17242
loss: 0.9819689989089966,grad_norm: 0.9999994818130824, iteration: 17243
loss: 1.0002251863479614,grad_norm: 0.9999993577232911, iteration: 17244
loss: 1.0015093088150024,grad_norm: 0.9999991857414847, iteration: 17245
loss: 0.9998266100883484,grad_norm: 0.9999996684499262, iteration: 17246
loss: 0.9921088218688965,grad_norm: 0.9999991090739566, iteration: 17247
loss: 1.0741521120071411,grad_norm: 0.9999995877869553, iteration: 17248
loss: 1.0329837799072266,grad_norm: 0.9999991555306422, iteration: 17249
loss: 1.0239423513412476,grad_norm: 0.9999992951613405, iteration: 17250
loss: 1.018310785293579,grad_norm: 0.9999990143064609, iteration: 17251
loss: 1.049033522605896,grad_norm: 0.9999992552034754, iteration: 17252
loss: 1.0002093315124512,grad_norm: 0.9999993605743518, iteration: 17253
loss: 1.0584241151809692,grad_norm: 0.9999997043273314, iteration: 17254
loss: 1.101880669593811,grad_norm: 0.9999993256950586, iteration: 17255
loss: 0.9711775183677673,grad_norm: 0.9999992115661894, iteration: 17256
loss: 0.9941784143447876,grad_norm: 0.9999990508901836, iteration: 17257
loss: 1.0494533777236938,grad_norm: 0.9999989495096576, iteration: 17258
loss: 1.0279507637023926,grad_norm: 0.999999350130876, iteration: 17259
loss: 1.0268340110778809,grad_norm: 0.9999995829824796, iteration: 17260
loss: 1.0147606134414673,grad_norm: 0.9999994854367743, iteration: 17261
loss: 1.0387295484542847,grad_norm: 0.9999994820614433, iteration: 17262
loss: 1.0275241136550903,grad_norm: 0.9999994296468793, iteration: 17263
loss: 1.0737799406051636,grad_norm: 0.9999999351654948, iteration: 17264
loss: 1.034434199333191,grad_norm: 0.9999992236661625, iteration: 17265
loss: 1.0731823444366455,grad_norm: 0.9999993504537384, iteration: 17266
loss: 0.9958534240722656,grad_norm: 0.7814394607434768, iteration: 17267
loss: 1.0833992958068848,grad_norm: 0.9999997733188459, iteration: 17268
loss: 1.0017461776733398,grad_norm: 0.9810884690023299, iteration: 17269
loss: 1.0524413585662842,grad_norm: 0.9644852794867896, iteration: 17270
loss: 1.0076714754104614,grad_norm: 0.9999993623016762, iteration: 17271
loss: 1.0748345851898193,grad_norm: 0.9999995764878956, iteration: 17272
loss: 1.0337884426116943,grad_norm: 0.9999990024129854, iteration: 17273
loss: 1.026397705078125,grad_norm: 0.9999991257988213, iteration: 17274
loss: 1.0348033905029297,grad_norm: 0.9999993877921954, iteration: 17275
loss: 1.052836537361145,grad_norm: 0.9999994456743021, iteration: 17276
loss: 1.040753960609436,grad_norm: 0.9999992056868573, iteration: 17277
loss: 0.9774740934371948,grad_norm: 0.999999137529646, iteration: 17278
loss: 0.9954068064689636,grad_norm: 0.9999992598837543, iteration: 17279
loss: 1.0270406007766724,grad_norm: 0.9999993910768751, iteration: 17280
loss: 1.0616750717163086,grad_norm: 0.999999190227911, iteration: 17281
loss: 1.0197705030441284,grad_norm: 0.9999991904794907, iteration: 17282
loss: 1.0110774040222168,grad_norm: 0.9999992260724879, iteration: 17283
loss: 0.9908246397972107,grad_norm: 0.9999991348937294, iteration: 17284
loss: 1.0368785858154297,grad_norm: 0.9999994689226404, iteration: 17285
loss: 1.0235207080841064,grad_norm: 0.9999990854922323, iteration: 17286
loss: 1.0539021492004395,grad_norm: 0.9999991149978376, iteration: 17287
loss: 1.1311111450195312,grad_norm: 0.9999996858568478, iteration: 17288
loss: 1.0142902135849,grad_norm: 0.9999990633375239, iteration: 17289
loss: 1.0546218156814575,grad_norm: 0.9999995399655892, iteration: 17290
loss: 1.0031774044036865,grad_norm: 0.9999995017870106, iteration: 17291
loss: 0.9848137497901917,grad_norm: 0.9999991689113614, iteration: 17292
loss: 0.9972058534622192,grad_norm: 0.9999990199278777, iteration: 17293
loss: 1.00227952003479,grad_norm: 0.9999992245083451, iteration: 17294
loss: 1.0596086978912354,grad_norm: 0.9999997093400592, iteration: 17295
loss: 1.071462631225586,grad_norm: 0.9999997122551543, iteration: 17296
loss: 1.0201481580734253,grad_norm: 0.9999994002388004, iteration: 17297
loss: 1.0581620931625366,grad_norm: 0.9999997560212944, iteration: 17298
loss: 1.1141453981399536,grad_norm: 0.9999999090132148, iteration: 17299
loss: 1.051674485206604,grad_norm: 0.9999993190115538, iteration: 17300
loss: 1.037713646888733,grad_norm: 0.9999992892002573, iteration: 17301
loss: 1.0358505249023438,grad_norm: 0.999999449949835, iteration: 17302
loss: 1.040792465209961,grad_norm: 0.9999993181196138, iteration: 17303
loss: 1.0130451917648315,grad_norm: 0.999999323087496, iteration: 17304
loss: 1.0265840291976929,grad_norm: 0.9999991428641902, iteration: 17305
loss: 1.0517972707748413,grad_norm: 0.9999998231631768, iteration: 17306
loss: 1.0649220943450928,grad_norm: 0.9999997389480854, iteration: 17307
loss: 1.0220390558242798,grad_norm: 0.9999994475626446, iteration: 17308
loss: 1.0218833684921265,grad_norm: 0.9567981589092946, iteration: 17309
loss: 1.0535703897476196,grad_norm: 0.9999997117619384, iteration: 17310
loss: 1.0009198188781738,grad_norm: 0.9999992162784102, iteration: 17311
loss: 1.0321240425109863,grad_norm: 0.9999994453864269, iteration: 17312
loss: 1.0626095533370972,grad_norm: 0.9999992410369964, iteration: 17313
loss: 1.1011605262756348,grad_norm: 0.9999994058822846, iteration: 17314
loss: 1.0197454690933228,grad_norm: 0.8852429675915828, iteration: 17315
loss: 0.9819439649581909,grad_norm: 0.9999991730282703, iteration: 17316
loss: 1.018395185470581,grad_norm: 0.99999932059335, iteration: 17317
loss: 1.009605050086975,grad_norm: 0.9999994802511052, iteration: 17318
loss: 1.0154633522033691,grad_norm: 0.9999997141622653, iteration: 17319
loss: 0.9959312677383423,grad_norm: 0.9999991421384862, iteration: 17320
loss: 1.0104968547821045,grad_norm: 0.9999991441759551, iteration: 17321
loss: 1.0722743272781372,grad_norm: 0.9999997638330465, iteration: 17322
loss: 1.0248682498931885,grad_norm: 0.9999991764737364, iteration: 17323
loss: 1.004906177520752,grad_norm: 0.9999991002624249, iteration: 17324
loss: 1.0456949472427368,grad_norm: 0.9999996297380038, iteration: 17325
loss: 1.0130820274353027,grad_norm: 0.9999990367524658, iteration: 17326
loss: 1.0431973934173584,grad_norm: 0.9999992835390829, iteration: 17327
loss: 1.0170999765396118,grad_norm: 0.9483370798909494, iteration: 17328
loss: 1.0176259279251099,grad_norm: 0.9999990536445817, iteration: 17329
loss: 0.999491274356842,grad_norm: 0.9906621674223308, iteration: 17330
loss: 1.0278542041778564,grad_norm: 0.9990336484258403, iteration: 17331
loss: 0.9997700452804565,grad_norm: 0.9999990502806769, iteration: 17332
loss: 0.9738705158233643,grad_norm: 0.9999993797039566, iteration: 17333
loss: 1.0359116792678833,grad_norm: 0.9999990823490015, iteration: 17334
loss: 0.9927840232849121,grad_norm: 0.9999991219838716, iteration: 17335
loss: 1.0100700855255127,grad_norm: 0.9999991307389566, iteration: 17336
loss: 1.013608694076538,grad_norm: 0.9999995958077939, iteration: 17337
loss: 1.017515778541565,grad_norm: 0.9999997193781321, iteration: 17338
loss: 1.004907488822937,grad_norm: 0.9999991147523473, iteration: 17339
loss: 0.9868430495262146,grad_norm: 0.9999992542454741, iteration: 17340
loss: 1.054358720779419,grad_norm: 0.9999991570339894, iteration: 17341
loss: 1.0229467153549194,grad_norm: 0.9999992997692884, iteration: 17342
loss: 0.9816173315048218,grad_norm: 0.9999990408089315, iteration: 17343
loss: 0.9963572025299072,grad_norm: 0.9999998786601391, iteration: 17344
loss: 1.0699585676193237,grad_norm: 0.9999996180023071, iteration: 17345
loss: 1.0228912830352783,grad_norm: 0.9999990988141224, iteration: 17346
loss: 1.0240046977996826,grad_norm: 0.9999991510064998, iteration: 17347
loss: 1.054756999015808,grad_norm: 0.9999996728816462, iteration: 17348
loss: 1.0859594345092773,grad_norm: 0.9999995965585315, iteration: 17349
loss: 1.0254780054092407,grad_norm: 0.9999990237500647, iteration: 17350
loss: 0.9795469045639038,grad_norm: 0.9999990493832265, iteration: 17351
loss: 0.9647791385650635,grad_norm: 0.9999990901558218, iteration: 17352
loss: 1.005753993988037,grad_norm: 0.9999992723731939, iteration: 17353
loss: 1.1602556705474854,grad_norm: 0.9999999089750876, iteration: 17354
loss: 0.9593910574913025,grad_norm: 0.9999991831430848, iteration: 17355
loss: 1.0744062662124634,grad_norm: 0.9999991583548722, iteration: 17356
loss: 0.9944691061973572,grad_norm: 0.9999992854699594, iteration: 17357
loss: 0.9902860522270203,grad_norm: 0.9999993519227038, iteration: 17358
loss: 0.9958864450454712,grad_norm: 0.9999992284162701, iteration: 17359
loss: 0.994745135307312,grad_norm: 0.9999991074386386, iteration: 17360
loss: 1.051400899887085,grad_norm: 0.9999993087202964, iteration: 17361
loss: 1.0750943422317505,grad_norm: 0.9999996664772872, iteration: 17362
loss: 1.004729151725769,grad_norm: 0.9999990570851619, iteration: 17363
loss: 1.0037214756011963,grad_norm: 0.9999991721582998, iteration: 17364
loss: 1.0412203073501587,grad_norm: 0.9999992858515712, iteration: 17365
loss: 0.9836241006851196,grad_norm: 0.9999992228133321, iteration: 17366
loss: 1.013933777809143,grad_norm: 0.9999994704477132, iteration: 17367
loss: 0.9785364270210266,grad_norm: 0.9999994392060031, iteration: 17368
loss: 1.1228153705596924,grad_norm: 0.9999994889958638, iteration: 17369
loss: 1.1130410432815552,grad_norm: 0.9999996127960894, iteration: 17370
loss: 1.0635912418365479,grad_norm: 0.9999992157657641, iteration: 17371
loss: 0.9995185136795044,grad_norm: 0.9704787249947487, iteration: 17372
loss: 1.0368293523788452,grad_norm: 0.9999994926013347, iteration: 17373
loss: 1.040198802947998,grad_norm: 0.9999993682293729, iteration: 17374
loss: 0.985451877117157,grad_norm: 0.9999994227339699, iteration: 17375
loss: 1.0476455688476562,grad_norm: 0.9999992033740066, iteration: 17376
loss: 1.0559661388397217,grad_norm: 0.9999992919108436, iteration: 17377
loss: 1.0146487951278687,grad_norm: 0.9999991381828123, iteration: 17378
loss: 1.0202345848083496,grad_norm: 0.9999991089869742, iteration: 17379
loss: 0.9897110462188721,grad_norm: 0.9999991912880617, iteration: 17380
loss: 1.0312919616699219,grad_norm: 0.9999994693480474, iteration: 17381
loss: 1.0156149864196777,grad_norm: 0.9999991085273744, iteration: 17382
loss: 1.0379393100738525,grad_norm: 0.9999995477936746, iteration: 17383
loss: 1.0171903371810913,grad_norm: 0.9999990006930931, iteration: 17384
loss: 0.9865571856498718,grad_norm: 0.9999991835278779, iteration: 17385
loss: 1.033389687538147,grad_norm: 0.9999994275351469, iteration: 17386
loss: 0.9970538020133972,grad_norm: 0.9999991084642784, iteration: 17387
loss: 1.0192338228225708,grad_norm: 0.9999991171100633, iteration: 17388
loss: 1.0510448217391968,grad_norm: 0.9999990100308774, iteration: 17389
loss: 1.0656906366348267,grad_norm: 0.9999992755510371, iteration: 17390
loss: 1.0469762086868286,grad_norm: 0.9999994068766747, iteration: 17391
loss: 1.0574544668197632,grad_norm: 0.9999994148754175, iteration: 17392
loss: 1.0233055353164673,grad_norm: 0.999999152595083, iteration: 17393
loss: 0.9987882375717163,grad_norm: 0.9800598192120282, iteration: 17394
loss: 0.9792104363441467,grad_norm: 0.9999989877709049, iteration: 17395
loss: 1.0654971599578857,grad_norm: 0.9999994421396073, iteration: 17396
loss: 1.0292329788208008,grad_norm: 0.999999112351617, iteration: 17397
loss: 1.024962306022644,grad_norm: 0.9999991124770522, iteration: 17398
loss: 1.005517601966858,grad_norm: 0.9999991900391351, iteration: 17399
loss: 1.0447465181350708,grad_norm: 0.9999991728224933, iteration: 17400
loss: 1.0059421062469482,grad_norm: 0.9999997852211764, iteration: 17401
loss: 1.0744953155517578,grad_norm: 0.9999997044040329, iteration: 17402
loss: 0.9989534616470337,grad_norm: 0.9999990071842954, iteration: 17403
loss: 1.0315141677856445,grad_norm: 0.9999991682099646, iteration: 17404
loss: 1.0395464897155762,grad_norm: 0.9999991339301015, iteration: 17405
loss: 1.1143732070922852,grad_norm: 0.9999995802705174, iteration: 17406
loss: 1.028228759765625,grad_norm: 0.9999992670318815, iteration: 17407
loss: 1.0317190885543823,grad_norm: 0.9372890460665885, iteration: 17408
loss: 1.0192445516586304,grad_norm: 0.9999991823500164, iteration: 17409
loss: 1.0073392391204834,grad_norm: 0.9999992329118096, iteration: 17410
loss: 1.047573208808899,grad_norm: 0.9999996499127712, iteration: 17411
loss: 1.0485267639160156,grad_norm: 0.9645432640629488, iteration: 17412
loss: 1.0070053339004517,grad_norm: 0.9999991004689498, iteration: 17413
loss: 1.0129196643829346,grad_norm: 0.9999993540927476, iteration: 17414
loss: 0.9921825528144836,grad_norm: 0.9999991317878348, iteration: 17415
loss: 1.0164693593978882,grad_norm: 0.9999993530484881, iteration: 17416
loss: 1.0056517124176025,grad_norm: 0.9999996330974669, iteration: 17417
loss: 1.0434702634811401,grad_norm: 0.999999303054172, iteration: 17418
loss: 1.010123372077942,grad_norm: 0.9999994285637887, iteration: 17419
loss: 1.0143522024154663,grad_norm: 0.999999362031894, iteration: 17420
loss: 1.0284130573272705,grad_norm: 0.999999306304445, iteration: 17421
loss: 1.0117137432098389,grad_norm: 0.999999328569306, iteration: 17422
loss: 1.0122451782226562,grad_norm: 0.9999992826753493, iteration: 17423
loss: 1.0084428787231445,grad_norm: 0.9999993243460696, iteration: 17424
loss: 1.038489818572998,grad_norm: 0.9999991073736021, iteration: 17425
loss: 1.0436826944351196,grad_norm: 0.9999990898576202, iteration: 17426
loss: 1.0622082948684692,grad_norm: 0.999999345663106, iteration: 17427
loss: 1.088258981704712,grad_norm: 0.9999995448201217, iteration: 17428
loss: 1.0209438800811768,grad_norm: 0.999999415283439, iteration: 17429
loss: 1.034485101699829,grad_norm: 0.9999993899571089, iteration: 17430
loss: 0.9585670828819275,grad_norm: 0.9999993418835792, iteration: 17431
loss: 1.0211381912231445,grad_norm: 0.9999992016267526, iteration: 17432
loss: 1.0157498121261597,grad_norm: 0.999999187311103, iteration: 17433
loss: 1.0259861946105957,grad_norm: 1.0000000169827392, iteration: 17434
loss: 1.0008081197738647,grad_norm: 0.9999991751606774, iteration: 17435
loss: 1.0106357336044312,grad_norm: 0.9999990020425733, iteration: 17436
loss: 1.053719401359558,grad_norm: 0.978067715272538, iteration: 17437
loss: 1.005896806716919,grad_norm: 0.9547456889040986, iteration: 17438
loss: 1.0019866228103638,grad_norm: 0.9965259540109841, iteration: 17439
loss: 1.0407321453094482,grad_norm: 0.9999993426990134, iteration: 17440
loss: 1.0572869777679443,grad_norm: 0.9999990913117922, iteration: 17441
loss: 1.07173752784729,grad_norm: 0.9999993264330529, iteration: 17442
loss: 1.0299959182739258,grad_norm: 0.9935075759396313, iteration: 17443
loss: 1.0394161939620972,grad_norm: 0.9999994970525119, iteration: 17444
loss: 1.0311801433563232,grad_norm: 0.9999990157135471, iteration: 17445
loss: 0.9961861371994019,grad_norm: 0.9999991653184308, iteration: 17446
loss: 0.982122004032135,grad_norm: 0.9999990325714834, iteration: 17447
loss: 1.0261610746383667,grad_norm: 0.9999993209588097, iteration: 17448
loss: 1.0335733890533447,grad_norm: 0.9999989744892182, iteration: 17449
loss: 1.0578038692474365,grad_norm: 0.9999992249524221, iteration: 17450
loss: 1.0523923635482788,grad_norm: 0.9999992134150723, iteration: 17451
loss: 0.9869574308395386,grad_norm: 0.9999990772768066, iteration: 17452
loss: 1.0465995073318481,grad_norm: 0.99999919171072, iteration: 17453
loss: 1.0244537591934204,grad_norm: 0.9999992619162886, iteration: 17454
loss: 1.0574134588241577,grad_norm: 0.9999996801489777, iteration: 17455
loss: 1.0624008178710938,grad_norm: 0.9999998136404477, iteration: 17456
loss: 1.0210193395614624,grad_norm: 0.9999993190028235, iteration: 17457
loss: 0.9972076416015625,grad_norm: 0.9999993902324108, iteration: 17458
loss: 1.0766496658325195,grad_norm: 0.999999721708711, iteration: 17459
loss: 1.0516873598098755,grad_norm: 0.999999697170244, iteration: 17460
loss: 1.0327696800231934,grad_norm: 0.9483465068456461, iteration: 17461
loss: 1.0179654359817505,grad_norm: 0.9999992771307562, iteration: 17462
loss: 0.9853313565254211,grad_norm: 0.9999991049259813, iteration: 17463
loss: 1.0363247394561768,grad_norm: 0.9347996360352135, iteration: 17464
loss: 1.0290441513061523,grad_norm: 0.9999999539120948, iteration: 17465
loss: 1.0590788125991821,grad_norm: 0.9999991989684364, iteration: 17466
loss: 1.0235106945037842,grad_norm: 0.9999992592728588, iteration: 17467
loss: 1.0490365028381348,grad_norm: 0.9999992388404377, iteration: 17468
loss: 0.9974241852760315,grad_norm: 0.999999209028081, iteration: 17469
loss: 1.0383907556533813,grad_norm: 0.999999461207032, iteration: 17470
loss: 1.0750386714935303,grad_norm: 0.9999993368684805, iteration: 17471
loss: 1.0217183828353882,grad_norm: 0.9999994670564601, iteration: 17472
loss: 1.0525918006896973,grad_norm: 0.9999991309417776, iteration: 17473
loss: 1.0516159534454346,grad_norm: 0.9999990694313128, iteration: 17474
loss: 1.0391250848770142,grad_norm: 0.9537567841426815, iteration: 17475
loss: 1.0657302141189575,grad_norm: 0.9999996559329648, iteration: 17476
loss: 1.006022334098816,grad_norm: 0.9999990914820173, iteration: 17477
loss: 1.024027705192566,grad_norm: 0.9999993230546877, iteration: 17478
loss: 1.0385745763778687,grad_norm: 0.9999990852308721, iteration: 17479
loss: 1.0339162349700928,grad_norm: 0.979031932775726, iteration: 17480
loss: 1.004154086112976,grad_norm: 0.9690400375891834, iteration: 17481
loss: 1.0114065408706665,grad_norm: 0.9999996921596459, iteration: 17482
loss: 1.0465325117111206,grad_norm: 0.9999993421185762, iteration: 17483
loss: 1.0016841888427734,grad_norm: 0.9999996299709151, iteration: 17484
loss: 1.0585705041885376,grad_norm: 0.9999995596830364, iteration: 17485
loss: 1.043692708015442,grad_norm: 0.9999995551606925, iteration: 17486
loss: 1.0245628356933594,grad_norm: 0.9999994282481133, iteration: 17487
loss: 1.0420773029327393,grad_norm: 0.9999994176064395, iteration: 17488
loss: 0.97260981798172,grad_norm: 0.9367218321212583, iteration: 17489
loss: 1.0264054536819458,grad_norm: 0.9999994094185404, iteration: 17490
loss: 1.0225871801376343,grad_norm: 0.9999992659131369, iteration: 17491
loss: 1.018001914024353,grad_norm: 0.9999991784498327, iteration: 17492
loss: 1.0435107946395874,grad_norm: 0.9999990871159511, iteration: 17493
loss: 1.049001693725586,grad_norm: 0.9999990496859091, iteration: 17494
loss: 1.035063624382019,grad_norm: 0.9999991091532163, iteration: 17495
loss: 1.0552424192428589,grad_norm: 0.9999990406832298, iteration: 17496
loss: 0.9790827631950378,grad_norm: 0.9999992058884656, iteration: 17497
loss: 1.0576343536376953,grad_norm: 0.9999993515790588, iteration: 17498
loss: 1.0833741426467896,grad_norm: 0.99999937425383, iteration: 17499
loss: 0.9798377752304077,grad_norm: 0.9999991303319354, iteration: 17500
loss: 1.0233858823776245,grad_norm: 0.9999992622465118, iteration: 17501
loss: 1.0057483911514282,grad_norm: 0.9999996032123418, iteration: 17502
loss: 0.9975340962409973,grad_norm: 0.9999993242836991, iteration: 17503
loss: 1.0364893674850464,grad_norm: 0.9999998461932762, iteration: 17504
loss: 1.0427707433700562,grad_norm: 0.9999993224860676, iteration: 17505
loss: 1.0030561685562134,grad_norm: 0.9881298695512665, iteration: 17506
loss: 1.012621283531189,grad_norm: 0.9999993300535351, iteration: 17507
loss: 1.0343762636184692,grad_norm: 0.9999990919490539, iteration: 17508
loss: 1.0265558958053589,grad_norm: 0.9999992725833162, iteration: 17509
loss: 1.0100767612457275,grad_norm: 0.999999027689866, iteration: 17510
loss: 1.0286815166473389,grad_norm: 0.9999995623239364, iteration: 17511
loss: 0.9969325661659241,grad_norm: 0.965430806879495, iteration: 17512
loss: 0.9856449961662292,grad_norm: 0.9999991779254435, iteration: 17513
loss: 1.1254231929779053,grad_norm: 0.9999998628500276, iteration: 17514
loss: 1.023380994796753,grad_norm: 0.9999992615205516, iteration: 17515
loss: 1.1186046600341797,grad_norm: 0.9999993299723453, iteration: 17516
loss: 1.0167450904846191,grad_norm: 0.9999992626015449, iteration: 17517
loss: 1.0154035091400146,grad_norm: 0.9999991164364528, iteration: 17518
loss: 1.0249699354171753,grad_norm: 0.9999991645859527, iteration: 17519
loss: 0.9917472004890442,grad_norm: 0.9999990512985655, iteration: 17520
loss: 1.0317529439926147,grad_norm: 0.9999990789454004, iteration: 17521
loss: 1.0401588678359985,grad_norm: 0.9999992850267434, iteration: 17522
loss: 1.0364699363708496,grad_norm: 0.9999992389470276, iteration: 17523
loss: 1.0354338884353638,grad_norm: 0.904060435341004, iteration: 17524
loss: 1.04705810546875,grad_norm: 0.999999088070172, iteration: 17525
loss: 1.0007727146148682,grad_norm: 0.9999994278744829, iteration: 17526
loss: 1.0124088525772095,grad_norm: 0.999999106031948, iteration: 17527
loss: 1.0326675176620483,grad_norm: 0.9999991055461788, iteration: 17528
loss: 1.0315396785736084,grad_norm: 0.9999993310521484, iteration: 17529
loss: 0.9741871356964111,grad_norm: 0.999999269044349, iteration: 17530
loss: 1.0228729248046875,grad_norm: 0.9999993633060182, iteration: 17531
loss: 1.0306787490844727,grad_norm: 0.999999541627152, iteration: 17532
loss: 0.9885131120681763,grad_norm: 0.9999991255729699, iteration: 17533
loss: 0.9719361066818237,grad_norm: 0.9999990575265443, iteration: 17534
loss: 0.9997368454933167,grad_norm: 0.9999991604674072, iteration: 17535
loss: 1.0508065223693848,grad_norm: 0.999999353685558, iteration: 17536
loss: 0.9897563457489014,grad_norm: 0.9918861583621538, iteration: 17537
loss: 1.0212339162826538,grad_norm: 0.999999071308083, iteration: 17538
loss: 1.0359811782836914,grad_norm: 0.9999992027288996, iteration: 17539
loss: 0.9986444711685181,grad_norm: 0.9926835137865437, iteration: 17540
loss: 1.020302414894104,grad_norm: 0.9999990925619302, iteration: 17541
loss: 1.0131006240844727,grad_norm: 0.9999991096009188, iteration: 17542
loss: 1.0026754140853882,grad_norm: 0.999999564310997, iteration: 17543
loss: 1.0192880630493164,grad_norm: 0.9999991723945165, iteration: 17544
loss: 0.9989257454872131,grad_norm: 0.9999990123720714, iteration: 17545
loss: 1.0974587202072144,grad_norm: 0.9999994647368807, iteration: 17546
loss: 1.0271270275115967,grad_norm: 0.9999992283978139, iteration: 17547
loss: 1.015787124633789,grad_norm: 0.9999992165584891, iteration: 17548
loss: 1.0499591827392578,grad_norm: 0.9999998052081286, iteration: 17549
loss: 1.0338104963302612,grad_norm: 0.9171012994050889, iteration: 17550
loss: 1.0489020347595215,grad_norm: 0.9999990536753307, iteration: 17551
loss: 1.0424555540084839,grad_norm: 0.9999997475944821, iteration: 17552
loss: 1.0234087705612183,grad_norm: 0.9999996263901737, iteration: 17553
loss: 1.012887954711914,grad_norm: 0.9999991107319385, iteration: 17554
loss: 1.039926290512085,grad_norm: 0.9999993938682935, iteration: 17555
loss: 1.0262250900268555,grad_norm: 0.9999992751264083, iteration: 17556
loss: 1.0034812688827515,grad_norm: 0.9999991630842483, iteration: 17557
loss: 1.0138938426971436,grad_norm: 0.9999993686830825, iteration: 17558
loss: 1.0444676876068115,grad_norm: 0.9999993236319661, iteration: 17559
loss: 0.9847745895385742,grad_norm: 0.9999991145794368, iteration: 17560
loss: 1.0412306785583496,grad_norm: 0.9999992647973814, iteration: 17561
loss: 1.0237423181533813,grad_norm: 0.9999990730972272, iteration: 17562
loss: 1.0332478284835815,grad_norm: 0.999999303056504, iteration: 17563
loss: 0.9875979423522949,grad_norm: 0.9999991879174163, iteration: 17564
loss: 1.025036334991455,grad_norm: 0.9999990762117784, iteration: 17565
loss: 0.9855546355247498,grad_norm: 0.9999991058320125, iteration: 17566
loss: 1.0359807014465332,grad_norm: 0.9999997371997381, iteration: 17567
loss: 1.000576376914978,grad_norm: 0.9999990035588157, iteration: 17568
loss: 1.0362471342086792,grad_norm: 0.9999992537477567, iteration: 17569
loss: 1.0164862871170044,grad_norm: 0.9999993456992166, iteration: 17570
loss: 0.9753408432006836,grad_norm: 0.9999990530845924, iteration: 17571
loss: 0.9851841926574707,grad_norm: 0.9462806899879955, iteration: 17572
loss: 1.074775218963623,grad_norm: 0.9999995428393048, iteration: 17573
loss: 0.9987823963165283,grad_norm: 0.999999155365395, iteration: 17574
loss: 0.9621737599372864,grad_norm: 0.9999991783008322, iteration: 17575
loss: 1.016403317451477,grad_norm: 0.999999117194793, iteration: 17576
loss: 1.049068808555603,grad_norm: 0.9999996926333913, iteration: 17577
loss: 1.0449550151824951,grad_norm: 0.9999998432690381, iteration: 17578
loss: 1.0258339643478394,grad_norm: 0.9999990210209819, iteration: 17579
loss: 0.9622352123260498,grad_norm: 0.9999991631245052, iteration: 17580
loss: 1.0744454860687256,grad_norm: 0.9999991074273038, iteration: 17581
loss: 1.047306776046753,grad_norm: 0.9999992357553764, iteration: 17582
loss: 1.0341205596923828,grad_norm: 0.9999990019285151, iteration: 17583
loss: 1.0411498546600342,grad_norm: 0.9999992478377241, iteration: 17584
loss: 1.0500584840774536,grad_norm: 0.9999990987156802, iteration: 17585
loss: 1.0594143867492676,grad_norm: 0.999999714776162, iteration: 17586
loss: 1.0787062644958496,grad_norm: 0.9999997841627429, iteration: 17587
loss: 1.0389617681503296,grad_norm: 0.9835649064860686, iteration: 17588
loss: 1.0318355560302734,grad_norm: 0.9999992036408673, iteration: 17589
loss: 1.0407085418701172,grad_norm: 0.9999993439749263, iteration: 17590
loss: 1.0636745691299438,grad_norm: 0.9999996289811942, iteration: 17591
loss: 0.9982895255088806,grad_norm: 0.9808419941137156, iteration: 17592
loss: 1.0554695129394531,grad_norm: 0.9999995740778792, iteration: 17593
loss: 0.9903313517570496,grad_norm: 0.9612034188948977, iteration: 17594
loss: 1.0295655727386475,grad_norm: 0.9999997722275689, iteration: 17595
loss: 1.026969075202942,grad_norm: 0.9999990663298599, iteration: 17596
loss: 0.9997384548187256,grad_norm: 0.994145628259024, iteration: 17597
loss: 1.0250029563903809,grad_norm: 0.9757064552089159, iteration: 17598
loss: 1.0407253503799438,grad_norm: 0.9999993724474736, iteration: 17599
loss: 1.052470326423645,grad_norm: 0.9999993102023125, iteration: 17600
loss: 1.0352643728256226,grad_norm: 0.991043616265926, iteration: 17601
loss: 1.0776152610778809,grad_norm: 0.9999994563507587, iteration: 17602
loss: 1.0178382396697998,grad_norm: 0.9088703732597025, iteration: 17603
loss: 0.9853801727294922,grad_norm: 0.9999992226226103, iteration: 17604
loss: 1.0228314399719238,grad_norm: 0.999999386928922, iteration: 17605
loss: 1.0346713066101074,grad_norm: 0.999999470234184, iteration: 17606
loss: 0.987694501876831,grad_norm: 0.9999991421814592, iteration: 17607
loss: 1.0163030624389648,grad_norm: 0.9999994195787049, iteration: 17608
loss: 1.0296072959899902,grad_norm: 0.999999143662262, iteration: 17609
loss: 0.9874238967895508,grad_norm: 0.9999996577003142, iteration: 17610
loss: 1.040950059890747,grad_norm: 0.9999994966219794, iteration: 17611
loss: 1.0170725584030151,grad_norm: 0.999999157516879, iteration: 17612
loss: 1.0530810356140137,grad_norm: 0.9999993927024737, iteration: 17613
loss: 1.0085281133651733,grad_norm: 0.9696437398132619, iteration: 17614
loss: 1.0203126668930054,grad_norm: 0.9810316793999638, iteration: 17615
loss: 1.0070948600769043,grad_norm: 0.9999996984921365, iteration: 17616
loss: 1.0065052509307861,grad_norm: 0.9999998955890536, iteration: 17617
loss: 0.9511876106262207,grad_norm: 0.9999992048615474, iteration: 17618
loss: 1.1005796194076538,grad_norm: 0.9999997190945444, iteration: 17619
loss: 1.0078080892562866,grad_norm: 0.9999996617631192, iteration: 17620
loss: 1.0194448232650757,grad_norm: 0.9999990924004677, iteration: 17621
loss: 0.9757655262947083,grad_norm: 0.9999991886225956, iteration: 17622
loss: 1.050665020942688,grad_norm: 0.9999998176380787, iteration: 17623
loss: 1.0292139053344727,grad_norm: 0.999999015216886, iteration: 17624
loss: 1.0560706853866577,grad_norm: 0.9999994539345114, iteration: 17625
loss: 1.1241952180862427,grad_norm: 0.9999995743729765, iteration: 17626
loss: 1.1613929271697998,grad_norm: 0.999999857518169, iteration: 17627
loss: 1.0210916996002197,grad_norm: 0.9999991536820986, iteration: 17628
loss: 1.0105141401290894,grad_norm: 0.9999997688995382, iteration: 17629
loss: 1.019455909729004,grad_norm: 0.999999898287336, iteration: 17630
loss: 1.005942463874817,grad_norm: 0.9999993003801004, iteration: 17631
loss: 1.025421142578125,grad_norm: 0.9999997137160698, iteration: 17632
loss: 1.0116262435913086,grad_norm: 0.9999992816367298, iteration: 17633
loss: 1.038894534111023,grad_norm: 0.9999990837637919, iteration: 17634
loss: 1.0130733251571655,grad_norm: 0.999999413816599, iteration: 17635
loss: 0.9842067956924438,grad_norm: 0.9999992611489258, iteration: 17636
loss: 1.0369532108306885,grad_norm: 0.9999992473051293, iteration: 17637
loss: 1.0425595045089722,grad_norm: 0.9999990829780772, iteration: 17638
loss: 0.9886803030967712,grad_norm: 0.9999997437593333, iteration: 17639
loss: 1.0389810800552368,grad_norm: 0.9999992142135394, iteration: 17640
loss: 1.0528208017349243,grad_norm: 0.999999065276976, iteration: 17641
loss: 1.0480598211288452,grad_norm: 0.9999991804105894, iteration: 17642
loss: 1.0553271770477295,grad_norm: 0.9999991063965196, iteration: 17643
loss: 1.0452500581741333,grad_norm: 0.924225068492898, iteration: 17644
loss: 1.015151858329773,grad_norm: 0.999999340965245, iteration: 17645
loss: 0.9992631673812866,grad_norm: 0.9837575033653693, iteration: 17646
loss: 1.026478886604309,grad_norm: 0.9999990685981024, iteration: 17647
loss: 0.988018810749054,grad_norm: 0.9999991829659448, iteration: 17648
loss: 1.0508856773376465,grad_norm: 0.9999994069007937, iteration: 17649
loss: 1.0005056858062744,grad_norm: 0.9362128047500047, iteration: 17650
loss: 1.0303183794021606,grad_norm: 0.9999989743188381, iteration: 17651
loss: 1.0304349660873413,grad_norm: 0.9999991839766716, iteration: 17652
loss: 1.022089958190918,grad_norm: 0.9999994144469522, iteration: 17653
loss: 0.9715549349784851,grad_norm: 0.9999990137148386, iteration: 17654
loss: 1.0155792236328125,grad_norm: 0.9999990727236284, iteration: 17655
loss: 1.1001449823379517,grad_norm: 0.999999363422547, iteration: 17656
loss: 1.0118011236190796,grad_norm: 0.9999992256954978, iteration: 17657
loss: 1.0041890144348145,grad_norm: 0.999999295446841, iteration: 17658
loss: 1.0270150899887085,grad_norm: 0.9757110819813469, iteration: 17659
loss: 0.9956899881362915,grad_norm: 0.999999039721673, iteration: 17660
loss: 1.0348742008209229,grad_norm: 0.9999993235605219, iteration: 17661
loss: 1.0315171480178833,grad_norm: 0.9999993752689728, iteration: 17662
loss: 1.0089458227157593,grad_norm: 0.9999989602836079, iteration: 17663
loss: 1.076224684715271,grad_norm: 0.9999997950478511, iteration: 17664
loss: 1.0428593158721924,grad_norm: 0.9999991622556786, iteration: 17665
loss: 1.0878483057022095,grad_norm: 0.9999996845485501, iteration: 17666
loss: 1.0034657716751099,grad_norm: 0.9999993425301107, iteration: 17667
loss: 1.064454197883606,grad_norm: 0.9999994153654114, iteration: 17668
loss: 1.0177005529403687,grad_norm: 0.9999993842588155, iteration: 17669
loss: 0.9988676905632019,grad_norm: 0.98650047130131, iteration: 17670
loss: 1.0714179277420044,grad_norm: 0.999999165740261, iteration: 17671
loss: 0.9815216660499573,grad_norm: 0.999999078922773, iteration: 17672
loss: 1.026620626449585,grad_norm: 0.8991133238176557, iteration: 17673
loss: 1.0269619226455688,grad_norm: 0.999999472374524, iteration: 17674
loss: 1.0139602422714233,grad_norm: 0.9999992770839351, iteration: 17675
loss: 1.1000077724456787,grad_norm: 0.9999993883966123, iteration: 17676
loss: 1.02724027633667,grad_norm: 0.9999995834855903, iteration: 17677
loss: 0.9959728121757507,grad_norm: 0.9999990227968893, iteration: 17678
loss: 1.0061707496643066,grad_norm: 0.9999995407127064, iteration: 17679
loss: 1.0529558658599854,grad_norm: 0.9999994682319218, iteration: 17680
loss: 1.0427130460739136,grad_norm: 0.9999994175243248, iteration: 17681
loss: 1.0377713441848755,grad_norm: 0.9999992981189041, iteration: 17682
loss: 1.0487544536590576,grad_norm: 0.9999994782471144, iteration: 17683
loss: 1.0009046792984009,grad_norm: 0.999999348818668, iteration: 17684
loss: 0.973158597946167,grad_norm: 0.9999991749657778, iteration: 17685
loss: 1.048185110092163,grad_norm: 0.9999998455325033, iteration: 17686
loss: 0.9936704039573669,grad_norm: 0.9999992284676713, iteration: 17687
loss: 0.9961339831352234,grad_norm: 0.9999992795357351, iteration: 17688
loss: 1.0336283445358276,grad_norm: 0.9999996088452269, iteration: 17689
loss: 1.0101642608642578,grad_norm: 0.9999995373392976, iteration: 17690
loss: 1.0922452211380005,grad_norm: 0.9999994171418086, iteration: 17691
loss: 1.0049515962600708,grad_norm: 0.9999991806247105, iteration: 17692
loss: 1.0200669765472412,grad_norm: 0.9999993097477268, iteration: 17693
loss: 1.035150170326233,grad_norm: 0.9999995773716893, iteration: 17694
loss: 1.0346038341522217,grad_norm: 0.9999991525504305, iteration: 17695
loss: 0.9978591203689575,grad_norm: 0.9999995140335828, iteration: 17696
loss: 1.0036360025405884,grad_norm: 0.9999992253862611, iteration: 17697
loss: 1.0306578874588013,grad_norm: 0.9755791011818679, iteration: 17698
loss: 1.0515763759613037,grad_norm: 0.9999993688884168, iteration: 17699
loss: 1.0076892375946045,grad_norm: 0.9999990655230658, iteration: 17700
loss: 1.0593031644821167,grad_norm: 0.9999996291583734, iteration: 17701
loss: 0.9890324473381042,grad_norm: 0.9999992999502665, iteration: 17702
loss: 1.0005046129226685,grad_norm: 0.9999992403669345, iteration: 17703
loss: 1.0621050596237183,grad_norm: 0.9999990468803414, iteration: 17704
loss: 1.0100394487380981,grad_norm: 0.9999995836812795, iteration: 17705
loss: 0.9942613840103149,grad_norm: 0.9034899421041207, iteration: 17706
loss: 0.9807517528533936,grad_norm: 0.9999992556840231, iteration: 17707
loss: 1.0561643838882446,grad_norm: 0.9999994272874092, iteration: 17708
loss: 1.0471049547195435,grad_norm: 0.9999993635105335, iteration: 17709
loss: 1.0385972261428833,grad_norm: 0.9999996680668964, iteration: 17710
loss: 1.0927371978759766,grad_norm: 0.9999994314184142, iteration: 17711
loss: 1.040735125541687,grad_norm: 0.9999993130634626, iteration: 17712
loss: 1.0488053560256958,grad_norm: 0.9999997779660603, iteration: 17713
loss: 1.0721479654312134,grad_norm: 0.9999997856642153, iteration: 17714
loss: 1.0677776336669922,grad_norm: 0.9999995369396224, iteration: 17715
loss: 0.9850873351097107,grad_norm: 0.9999990661538946, iteration: 17716
loss: 1.0197240114212036,grad_norm: 0.9999993192355411, iteration: 17717
loss: 1.0416361093521118,grad_norm: 0.9999994262375616, iteration: 17718
loss: 1.0341646671295166,grad_norm: 0.9999993499771263, iteration: 17719
loss: 0.990993082523346,grad_norm: 0.9999990868577758, iteration: 17720
loss: 1.0528029203414917,grad_norm: 0.9145835542844132, iteration: 17721
loss: 1.0084997415542603,grad_norm: 0.9999991482868965, iteration: 17722
loss: 1.0530961751937866,grad_norm: 0.9999995639262591, iteration: 17723
loss: 1.0244296789169312,grad_norm: 0.99999913005697, iteration: 17724
loss: 1.0389256477355957,grad_norm: 0.9999991173278291, iteration: 17725
loss: 1.0585912466049194,grad_norm: 0.9999993200774051, iteration: 17726
loss: 0.9829779267311096,grad_norm: 0.9790491159621392, iteration: 17727
loss: 1.0179792642593384,grad_norm: 0.9999991936552863, iteration: 17728
loss: 1.007246732711792,grad_norm: 0.9999991011779202, iteration: 17729
loss: 1.0113239288330078,grad_norm: 0.9999991623495351, iteration: 17730
loss: 1.0332121849060059,grad_norm: 0.910304734382804, iteration: 17731
loss: 1.00606107711792,grad_norm: 0.9999997222017957, iteration: 17732
loss: 0.9880837798118591,grad_norm: 0.9999992082791925, iteration: 17733
loss: 1.058707594871521,grad_norm: 0.9999997835253188, iteration: 17734
loss: 1.0760818719863892,grad_norm: 0.9882937129343343, iteration: 17735
loss: 1.0488836765289307,grad_norm: 0.9999991067307, iteration: 17736
loss: 1.0274626016616821,grad_norm: 0.99999923918191, iteration: 17737
loss: 1.0581520795822144,grad_norm: 0.9999992904962115, iteration: 17738
loss: 1.0015277862548828,grad_norm: 0.9999990772658842, iteration: 17739
loss: 1.0263274908065796,grad_norm: 0.9999991432112858, iteration: 17740
loss: 1.0907471179962158,grad_norm: 0.9999991480194463, iteration: 17741
loss: 1.0013418197631836,grad_norm: 0.999999430382189, iteration: 17742
loss: 0.9822655320167542,grad_norm: 0.9999990870854081, iteration: 17743
loss: 1.0081236362457275,grad_norm: 0.9227824560046906, iteration: 17744
loss: 0.9855973720550537,grad_norm: 0.9999992190744748, iteration: 17745
loss: 1.0364209413528442,grad_norm: 0.9999993258802773, iteration: 17746
loss: 1.0208320617675781,grad_norm: 0.9999990644906908, iteration: 17747
loss: 0.9805862903594971,grad_norm: 0.9999992018566803, iteration: 17748
loss: 1.0259852409362793,grad_norm: 0.9999991817938102, iteration: 17749
loss: 1.0209838151931763,grad_norm: 0.9999991316376903, iteration: 17750
loss: 1.1196550130844116,grad_norm: 0.9999994346279226, iteration: 17751
loss: 0.9933027029037476,grad_norm: 0.9999990577859101, iteration: 17752
loss: 1.0008388757705688,grad_norm: 0.9999989947074347, iteration: 17753
loss: 1.0374547243118286,grad_norm: 0.9999992406062468, iteration: 17754
loss: 1.057868480682373,grad_norm: 0.9999991027244413, iteration: 17755
loss: 1.025168776512146,grad_norm: 0.9999992522593579, iteration: 17756
loss: 0.9950674176216125,grad_norm: 0.9999992489074147, iteration: 17757
loss: 0.9785057306289673,grad_norm: 0.9999993161601802, iteration: 17758
loss: 1.1296147108078003,grad_norm: 0.9999993790209347, iteration: 17759
loss: 1.0673872232437134,grad_norm: 0.9999995627321224, iteration: 17760
loss: 1.0277191400527954,grad_norm: 0.9999995036340478, iteration: 17761
loss: 1.0495891571044922,grad_norm: 0.9999991117210218, iteration: 17762
loss: 1.0271570682525635,grad_norm: 0.928694211942898, iteration: 17763
loss: 1.0094746351242065,grad_norm: 0.9999990485789352, iteration: 17764
loss: 1.0456387996673584,grad_norm: 0.9999997482831869, iteration: 17765
loss: 1.037493348121643,grad_norm: 0.9999992881293139, iteration: 17766
loss: 1.0737621784210205,grad_norm: 0.9999992911130412, iteration: 17767
loss: 1.0631948709487915,grad_norm: 0.9999991377779474, iteration: 17768
loss: 1.10332190990448,grad_norm: 0.9999992133062515, iteration: 17769
loss: 1.0428969860076904,grad_norm: 0.9906924544805354, iteration: 17770
loss: 1.0154318809509277,grad_norm: 0.9999994113115277, iteration: 17771
loss: 0.9935362339019775,grad_norm: 0.9999991793385626, iteration: 17772
loss: 1.0558013916015625,grad_norm: 0.9599535169916265, iteration: 17773
loss: 1.070881724357605,grad_norm: 0.9999995073299367, iteration: 17774
loss: 1.0292096138000488,grad_norm: 0.9999991641282273, iteration: 17775
loss: 0.9582489728927612,grad_norm: 0.9999993283429705, iteration: 17776
loss: 1.0275778770446777,grad_norm: 0.9999994565788057, iteration: 17777
loss: 1.027206540107727,grad_norm: 0.999999332276763, iteration: 17778
loss: 0.9951515197753906,grad_norm: 0.999999212222015, iteration: 17779
loss: 1.048251986503601,grad_norm: 0.9999991842085485, iteration: 17780
loss: 1.0220592021942139,grad_norm: 0.999999208753132, iteration: 17781
loss: 1.014184832572937,grad_norm: 0.9999994387314672, iteration: 17782
loss: 0.9784929156303406,grad_norm: 0.9999991589978373, iteration: 17783
loss: 1.0238451957702637,grad_norm: 0.9999991814302053, iteration: 17784
loss: 0.9559064507484436,grad_norm: 0.9999992447903122, iteration: 17785
loss: 1.0230417251586914,grad_norm: 0.9999991061833561, iteration: 17786
loss: 1.0088900327682495,grad_norm: 0.9999991520929684, iteration: 17787
loss: 1.0326831340789795,grad_norm: 0.9999991555806904, iteration: 17788
loss: 1.0190006494522095,grad_norm: 0.9999996094902703, iteration: 17789
loss: 0.9987033605575562,grad_norm: 0.9999991188062611, iteration: 17790
loss: 1.0617057085037231,grad_norm: 0.9999994299052948, iteration: 17791
loss: 1.07582426071167,grad_norm: 0.9999990778100264, iteration: 17792
loss: 1.0267387628555298,grad_norm: 0.9999992214434644, iteration: 17793
loss: 0.9721373915672302,grad_norm: 0.9999990807956263, iteration: 17794
loss: 0.9864234924316406,grad_norm: 0.9520235789073527, iteration: 17795
loss: 1.0320470333099365,grad_norm: 0.9999994647335269, iteration: 17796
loss: 0.990670919418335,grad_norm: 0.9775601701048703, iteration: 17797
loss: 1.0204434394836426,grad_norm: 0.9999990798575757, iteration: 17798
loss: 1.035211205482483,grad_norm: 0.9999992019748557, iteration: 17799
loss: 1.038401484489441,grad_norm: 0.9999991541247927, iteration: 17800
loss: 0.997215747833252,grad_norm: 0.9999992614629808, iteration: 17801
loss: 1.0476515293121338,grad_norm: 0.9999999285255422, iteration: 17802
loss: 1.0347046852111816,grad_norm: 0.9999989890924906, iteration: 17803
loss: 1.0495377779006958,grad_norm: 0.9999995768713307, iteration: 17804
loss: 1.0317201614379883,grad_norm: 0.9999990424103475, iteration: 17805
loss: 1.0077468156814575,grad_norm: 0.9999993219112737, iteration: 17806
loss: 1.0264300107955933,grad_norm: 0.9999990944356258, iteration: 17807
loss: 1.0146390199661255,grad_norm: 0.9999991886128071, iteration: 17808
loss: 1.0380208492279053,grad_norm: 0.9999991843989747, iteration: 17809
loss: 1.0015246868133545,grad_norm: 0.99845806408387, iteration: 17810
loss: 1.039620280265808,grad_norm: 0.9999993101780408, iteration: 17811
loss: 1.008388876914978,grad_norm: 0.9440391797552208, iteration: 17812
loss: 1.0336792469024658,grad_norm: 0.973966655356433, iteration: 17813
loss: 0.992287814617157,grad_norm: 0.9999991122913714, iteration: 17814
loss: 1.0260355472564697,grad_norm: 0.999999103435672, iteration: 17815
loss: 1.00418221950531,grad_norm: 0.9999993618994956, iteration: 17816
loss: 0.9610634446144104,grad_norm: 0.9999994500645889, iteration: 17817
loss: 1.080776333808899,grad_norm: 0.9690102942649255, iteration: 17818
loss: 1.0234447717666626,grad_norm: 0.9999996924803927, iteration: 17819
loss: 1.0252386331558228,grad_norm: 0.9999990369107818, iteration: 17820
loss: 0.9932441711425781,grad_norm: 0.9999992926488384, iteration: 17821
loss: 1.008636474609375,grad_norm: 0.9999995756055889, iteration: 17822
loss: 1.0324097871780396,grad_norm: 0.9999991002967193, iteration: 17823
loss: 1.029945731163025,grad_norm: 0.9750886565368252, iteration: 17824
loss: 1.0204914808273315,grad_norm: 0.9999992511800477, iteration: 17825
loss: 1.0038857460021973,grad_norm: 0.9799934960185878, iteration: 17826
loss: 0.9827911853790283,grad_norm: 0.9999991949663405, iteration: 17827
loss: 1.0023566484451294,grad_norm: 0.9999992105965454, iteration: 17828
loss: 1.050979495048523,grad_norm: 0.9999995415248542, iteration: 17829
loss: 1.0260100364685059,grad_norm: 0.9999992336434196, iteration: 17830
loss: 1.0483139753341675,grad_norm: 0.9999990400986878, iteration: 17831
loss: 1.0381767749786377,grad_norm: 0.9999990639037639, iteration: 17832
loss: 1.0058341026306152,grad_norm: 0.9999990379600502, iteration: 17833
loss: 1.0617903470993042,grad_norm: 0.999999666972104, iteration: 17834
loss: 0.9936599731445312,grad_norm: 0.9999991765569106, iteration: 17835
loss: 1.031213641166687,grad_norm: 0.9999993930044113, iteration: 17836
loss: 1.0372763872146606,grad_norm: 0.9837421614681671, iteration: 17837
loss: 1.0462229251861572,grad_norm: 0.9999994064550247, iteration: 17838
loss: 1.0267982482910156,grad_norm: 0.9999991631615494, iteration: 17839
loss: 1.08687424659729,grad_norm: 0.999999142088666, iteration: 17840
loss: 1.0036495923995972,grad_norm: 0.9999995428811266, iteration: 17841
loss: 1.0169563293457031,grad_norm: 0.9999992445334056, iteration: 17842
loss: 1.014275312423706,grad_norm: 0.999999159275204, iteration: 17843
loss: 1.04959237575531,grad_norm: 0.9999990621401759, iteration: 17844
loss: 1.029486894607544,grad_norm: 0.999999237728002, iteration: 17845
loss: 1.0308623313903809,grad_norm: 0.9999992380461691, iteration: 17846
loss: 1.0230042934417725,grad_norm: 0.9999993809347943, iteration: 17847
loss: 1.0035918951034546,grad_norm: 0.9999991405141333, iteration: 17848
loss: 1.0219542980194092,grad_norm: 0.9999991402770814, iteration: 17849
loss: 0.9932374954223633,grad_norm: 0.9999989903423369, iteration: 17850
loss: 1.0022035837173462,grad_norm: 0.9999990300818817, iteration: 17851
loss: 1.0408730506896973,grad_norm: 0.9999992674229679, iteration: 17852
loss: 1.0134450197219849,grad_norm: 0.9999990986421968, iteration: 17853
loss: 1.0088096857070923,grad_norm: 0.9999996626499651, iteration: 17854
loss: 1.0747414827346802,grad_norm: 0.9999999040083242, iteration: 17855
loss: 1.0086510181427002,grad_norm: 0.9999991285286574, iteration: 17856
loss: 1.0280390977859497,grad_norm: 0.9999996101345191, iteration: 17857
loss: 1.0979382991790771,grad_norm: 0.9999996325162528, iteration: 17858
loss: 1.0538547039031982,grad_norm: 0.9999993938697158, iteration: 17859
loss: 0.9972128868103027,grad_norm: 0.9999991124874955, iteration: 17860
loss: 1.0380570888519287,grad_norm: 0.9999997465468898, iteration: 17861
loss: 1.022615671157837,grad_norm: 0.9999993430370411, iteration: 17862
loss: 1.1040995121002197,grad_norm: 0.9999994441858696, iteration: 17863
loss: 1.0245004892349243,grad_norm: 0.9999993028277158, iteration: 17864
loss: 1.0186113119125366,grad_norm: 0.9999990544114147, iteration: 17865
loss: 1.0002800226211548,grad_norm: 0.9999992114287362, iteration: 17866
loss: 1.044141173362732,grad_norm: 0.9999994302055605, iteration: 17867
loss: 1.0323673486709595,grad_norm: 0.9999990607722427, iteration: 17868
loss: 1.0019371509552002,grad_norm: 0.9999995490956347, iteration: 17869
loss: 1.0184160470962524,grad_norm: 0.9999991998542392, iteration: 17870
loss: 0.997685432434082,grad_norm: 0.9999989639055565, iteration: 17871
loss: 1.0632339715957642,grad_norm: 0.9999993923931683, iteration: 17872
loss: 1.007499098777771,grad_norm: 0.9999992699190502, iteration: 17873
loss: 0.9955910444259644,grad_norm: 0.9999990780877999, iteration: 17874
loss: 1.0392487049102783,grad_norm: 0.9999992574380406, iteration: 17875
loss: 1.1712157726287842,grad_norm: 0.9999997955419847, iteration: 17876
loss: 1.01516854763031,grad_norm: 0.9999996175568617, iteration: 17877
loss: 1.037533164024353,grad_norm: 0.9999993585284409, iteration: 17878
loss: 0.9906486868858337,grad_norm: 0.9999993129909643, iteration: 17879
loss: 1.017728567123413,grad_norm: 0.9999992545561946, iteration: 17880
loss: 0.9890300035476685,grad_norm: 0.9999992476174008, iteration: 17881
loss: 1.023882269859314,grad_norm: 0.9999990830619749, iteration: 17882
loss: 1.0607436895370483,grad_norm: 0.9999995350144028, iteration: 17883
loss: 1.1079121828079224,grad_norm: 0.9999992950058076, iteration: 17884
loss: 1.057456374168396,grad_norm: 0.9999996585679607, iteration: 17885
loss: 1.0102587938308716,grad_norm: 0.9999990176817023, iteration: 17886
loss: 1.014167308807373,grad_norm: 0.9344064235294889, iteration: 17887
loss: 1.0053695440292358,grad_norm: 0.9999993341253623, iteration: 17888
loss: 1.0260213613510132,grad_norm: 0.9999990929440332, iteration: 17889
loss: 1.0047529935836792,grad_norm: 0.9999991877666649, iteration: 17890
loss: 1.0056607723236084,grad_norm: 0.9999990272972783, iteration: 17891
loss: 1.0255262851715088,grad_norm: 0.9999993341797816, iteration: 17892
loss: 0.9726253747940063,grad_norm: 0.9999992071592224, iteration: 17893
loss: 1.0488500595092773,grad_norm: 0.9999994292047599, iteration: 17894
loss: 1.0496107339859009,grad_norm: 0.9999991000174323, iteration: 17895
loss: 1.0153368711471558,grad_norm: 0.9999990442712345, iteration: 17896
loss: 0.988135576248169,grad_norm: 0.9999991593316369, iteration: 17897
loss: 1.0391229391098022,grad_norm: 0.9999992825823139, iteration: 17898
loss: 1.0255942344665527,grad_norm: 0.999999250905311, iteration: 17899
loss: 1.003709077835083,grad_norm: 0.9999991180580199, iteration: 17900
loss: 1.0199880599975586,grad_norm: 0.9999989710464646, iteration: 17901
loss: 1.0433024168014526,grad_norm: 0.9999994980380479, iteration: 17902
loss: 1.0230070352554321,grad_norm: 0.9999998634278916, iteration: 17903
loss: 1.027730107307434,grad_norm: 0.9963819824108283, iteration: 17904
loss: 0.9848482012748718,grad_norm: 0.9999990605423903, iteration: 17905
loss: 1.0369248390197754,grad_norm: 0.9999990169995244, iteration: 17906
loss: 1.0229567289352417,grad_norm: 0.9999991687070647, iteration: 17907
loss: 1.0023795366287231,grad_norm: 0.9999991237406594, iteration: 17908
loss: 0.9984443187713623,grad_norm: 0.9999992184513362, iteration: 17909
loss: 1.0130338668823242,grad_norm: 0.9999992045796668, iteration: 17910
loss: 0.9995285868644714,grad_norm: 0.9999991859808405, iteration: 17911
loss: 1.0235044956207275,grad_norm: 0.999999159173235, iteration: 17912
loss: 0.9588336944580078,grad_norm: 0.9999993099246333, iteration: 17913
loss: 0.9887478947639465,grad_norm: 0.9999992149759107, iteration: 17914
loss: 1.0101585388183594,grad_norm: 0.9999990892279231, iteration: 17915
loss: 1.0298818349838257,grad_norm: 0.9999991179939545, iteration: 17916
loss: 1.0523579120635986,grad_norm: 0.9999996451593832, iteration: 17917
loss: 1.0390949249267578,grad_norm: 0.9999991886899912, iteration: 17918
loss: 1.0187538862228394,grad_norm: 0.9999993035709982, iteration: 17919
loss: 1.0005022287368774,grad_norm: 0.9999991256711613, iteration: 17920
loss: 1.0175262689590454,grad_norm: 0.9999991121965466, iteration: 17921
loss: 0.9892372488975525,grad_norm: 0.9999989769464435, iteration: 17922
loss: 1.006842851638794,grad_norm: 0.9999993285816514, iteration: 17923
loss: 1.0221599340438843,grad_norm: 0.999999269430089, iteration: 17924
loss: 0.9870491027832031,grad_norm: 0.999999272890038, iteration: 17925
loss: 0.9661422967910767,grad_norm: 0.9999991493090623, iteration: 17926
loss: 0.9788693785667419,grad_norm: 0.9999991744180063, iteration: 17927
loss: 1.0450621843338013,grad_norm: 0.9999990885515532, iteration: 17928
loss: 1.0520886182785034,grad_norm: 0.9999993533590639, iteration: 17929
loss: 1.040661096572876,grad_norm: 0.9999992462948722, iteration: 17930
loss: 1.0674057006835938,grad_norm: 0.9999994070396896, iteration: 17931
loss: 1.05704927444458,grad_norm: 0.9999995300927284, iteration: 17932
loss: 1.0476422309875488,grad_norm: 0.9999990843171004, iteration: 17933
loss: 1.0447595119476318,grad_norm: 0.9805740493263924, iteration: 17934
loss: 1.0344403982162476,grad_norm: 0.9999994893635361, iteration: 17935
loss: 0.9850109219551086,grad_norm: 0.9999997990423318, iteration: 17936
loss: 1.0338186025619507,grad_norm: 0.9756122431114285, iteration: 17937
loss: 1.012876033782959,grad_norm: 0.9999990687847216, iteration: 17938
loss: 1.0065500736236572,grad_norm: 0.9886042389209933, iteration: 17939
loss: 1.024902105331421,grad_norm: 0.9999994988489662, iteration: 17940
loss: 1.0114350318908691,grad_norm: 0.9999995372178242, iteration: 17941
loss: 0.9841241836547852,grad_norm: 0.9999990611832261, iteration: 17942
loss: 0.9623502492904663,grad_norm: 0.9999992029421405, iteration: 17943
loss: 1.0078539848327637,grad_norm: 0.9506535427762655, iteration: 17944
loss: 1.0000461339950562,grad_norm: 0.999999292696352, iteration: 17945
loss: 1.0122085809707642,grad_norm: 0.9999990624277102, iteration: 17946
loss: 1.040349006652832,grad_norm: 0.9999992711329978, iteration: 17947
loss: 1.009763240814209,grad_norm: 0.999999139288123, iteration: 17948
loss: 1.044754147529602,grad_norm: 0.9999991002677041, iteration: 17949
loss: 0.9887344241142273,grad_norm: 0.9999990249840212, iteration: 17950
loss: 1.0040408372879028,grad_norm: 0.999999119366448, iteration: 17951
loss: 1.0237139463424683,grad_norm: 0.9999991062853278, iteration: 17952
loss: 1.0064350366592407,grad_norm: 0.9696502413618148, iteration: 17953
loss: 1.0648980140686035,grad_norm: 0.9999992182141292, iteration: 17954
loss: 0.9959983825683594,grad_norm: 0.9999991844666581, iteration: 17955
loss: 1.068549633026123,grad_norm: 0.9999997961649063, iteration: 17956
loss: 1.003851294517517,grad_norm: 0.9999990997721374, iteration: 17957
loss: 1.0364410877227783,grad_norm: 0.9999991390713808, iteration: 17958
loss: 1.0999139547348022,grad_norm: 0.9999998366363372, iteration: 17959
loss: 1.0425517559051514,grad_norm: 0.9999996013464059, iteration: 17960
loss: 1.03616201877594,grad_norm: 0.999999097502147, iteration: 17961
loss: 1.033988118171692,grad_norm: 0.9999994218587224, iteration: 17962
loss: 1.0173319578170776,grad_norm: 0.9999992768659285, iteration: 17963
loss: 1.0495920181274414,grad_norm: 0.9999991374407754, iteration: 17964
loss: 1.0123859643936157,grad_norm: 0.9999991738097597, iteration: 17965
loss: 0.9946305155754089,grad_norm: 0.9999992548212139, iteration: 17966
loss: 1.0665159225463867,grad_norm: 0.9999994234431642, iteration: 17967
loss: 0.9974241852760315,grad_norm: 0.9999990290368576, iteration: 17968
loss: 0.9903355836868286,grad_norm: 0.8362119226261344, iteration: 17969
loss: 1.109837293624878,grad_norm: 0.9999995260254856, iteration: 17970
loss: 1.0499581098556519,grad_norm: 0.9999994129732616, iteration: 17971
loss: 1.0267261266708374,grad_norm: 0.9999992032564732, iteration: 17972
loss: 1.0376640558242798,grad_norm: 0.9999994593935241, iteration: 17973
loss: 0.9976904988288879,grad_norm: 0.9999990239604555, iteration: 17974
loss: 1.0230462551116943,grad_norm: 0.9999991269044681, iteration: 17975
loss: 1.0812644958496094,grad_norm: 0.9999996741941554, iteration: 17976
loss: 1.0071529150009155,grad_norm: 0.9547202197010594, iteration: 17977
loss: 0.998877763748169,grad_norm: 0.9885386955534876, iteration: 17978
loss: 0.9830322265625,grad_norm: 0.9999990844627077, iteration: 17979
loss: 1.0425972938537598,grad_norm: 0.9383102455791634, iteration: 17980
loss: 1.1039644479751587,grad_norm: 0.9999999176157156, iteration: 17981
loss: 1.0252517461776733,grad_norm: 0.9999992094283433, iteration: 17982
loss: 1.027967095375061,grad_norm: 0.9999990781959125, iteration: 17983
loss: 1.0273876190185547,grad_norm: 0.9414821927300319, iteration: 17984
loss: 1.0166536569595337,grad_norm: 0.9999989816853935, iteration: 17985
loss: 1.022912859916687,grad_norm: 0.9999991906743363, iteration: 17986
loss: 0.9874517917633057,grad_norm: 0.9973175448515761, iteration: 17987
loss: 1.0121146440505981,grad_norm: 0.9999992888576366, iteration: 17988
loss: 0.9938616156578064,grad_norm: 0.9999991588538362, iteration: 17989
loss: 1.0082424879074097,grad_norm: 0.9203161661934864, iteration: 17990
loss: 1.006081461906433,grad_norm: 0.9999991676773319, iteration: 17991
loss: 1.043633222579956,grad_norm: 0.9999993178499108, iteration: 17992
loss: 1.0074620246887207,grad_norm: 0.9999991817432353, iteration: 17993
loss: 1.0105704069137573,grad_norm: 0.9999992554283275, iteration: 17994
loss: 1.0107654333114624,grad_norm: 0.9522048518596499, iteration: 17995
loss: 1.0308866500854492,grad_norm: 0.9999991268528196, iteration: 17996
loss: 1.0469262599945068,grad_norm: 0.9999995126257356, iteration: 17997
loss: 1.0570259094238281,grad_norm: 0.9999990460266855, iteration: 17998
loss: 1.0069479942321777,grad_norm: 0.9999990340314544, iteration: 17999
loss: 0.9849977493286133,grad_norm: 0.9999993726633502, iteration: 18000
loss: 1.0936018228530884,grad_norm: 0.9999994817809582, iteration: 18001
loss: 1.0377341508865356,grad_norm: 0.9999995332997411, iteration: 18002
loss: 1.0166590213775635,grad_norm: 0.9999992755427619, iteration: 18003
loss: 1.0558269023895264,grad_norm: 0.9999991397908614, iteration: 18004
loss: 0.9946088194847107,grad_norm: 0.9999992518107048, iteration: 18005
loss: 1.0324065685272217,grad_norm: 0.9999994709761008, iteration: 18006
loss: 0.9890233874320984,grad_norm: 0.9549515625622444, iteration: 18007
loss: 1.0373386144638062,grad_norm: 0.9999990556751925, iteration: 18008
loss: 1.0178509950637817,grad_norm: 0.9999990775831515, iteration: 18009
loss: 1.0147205591201782,grad_norm: 0.9999991515795106, iteration: 18010
loss: 1.0658577680587769,grad_norm: 0.9999996048871944, iteration: 18011
loss: 1.0075289011001587,grad_norm: 0.9764557027961007, iteration: 18012
loss: 1.0419572591781616,grad_norm: 0.9999993484361418, iteration: 18013
loss: 1.0228137969970703,grad_norm: 0.9999990647586691, iteration: 18014
loss: 1.1419899463653564,grad_norm: 1.0000000874510317, iteration: 18015
loss: 0.9643038511276245,grad_norm: 0.953699463476142, iteration: 18016
loss: 0.9860759973526001,grad_norm: 0.9999989755893527, iteration: 18017
loss: 1.0299254655838013,grad_norm: 0.951464341143459, iteration: 18018
loss: 1.0287692546844482,grad_norm: 0.9999993767768027, iteration: 18019
loss: 1.0148929357528687,grad_norm: 0.9999997111165079, iteration: 18020
loss: 0.9541030526161194,grad_norm: 0.9999991505783754, iteration: 18021
loss: 1.0149286985397339,grad_norm: 0.9999991694916914, iteration: 18022
loss: 1.024490237236023,grad_norm: 0.9999990481219446, iteration: 18023
loss: 0.9767152667045593,grad_norm: 0.9789484017205013, iteration: 18024
loss: 0.967017650604248,grad_norm: 0.9999991196067076, iteration: 18025
loss: 0.9998524188995361,grad_norm: 0.9999990709683478, iteration: 18026
loss: 1.0888237953186035,grad_norm: 0.9999997357224406, iteration: 18027
loss: 0.9769173860549927,grad_norm: 0.9999992915877512, iteration: 18028
loss: 1.0041583776474,grad_norm: 0.9999998729174749, iteration: 18029
loss: 1.0201787948608398,grad_norm: 0.8814186212361876, iteration: 18030
loss: 1.0021997690200806,grad_norm: 0.9687370833860268, iteration: 18031
loss: 0.9970576167106628,grad_norm: 0.999999029200877, iteration: 18032
loss: 1.0008951425552368,grad_norm: 0.9156776373168442, iteration: 18033
loss: 0.9878121614456177,grad_norm: 0.999999138415813, iteration: 18034
loss: 1.1593220233917236,grad_norm: 0.9999997070498915, iteration: 18035
loss: 0.9977530837059021,grad_norm: 0.983366881439322, iteration: 18036
loss: 1.0852121114730835,grad_norm: 0.9999993459172436, iteration: 18037
loss: 0.9650148153305054,grad_norm: 0.9131227797544649, iteration: 18038
loss: 1.0206553936004639,grad_norm: 0.9999992481596498, iteration: 18039
loss: 0.9784607887268066,grad_norm: 0.9999992846660909, iteration: 18040
loss: 1.0352277755737305,grad_norm: 0.999999940808275, iteration: 18041
loss: 1.0420533418655396,grad_norm: 0.9999996501854741, iteration: 18042
loss: 1.0702449083328247,grad_norm: 0.9999997508533272, iteration: 18043
loss: 1.0524922609329224,grad_norm: 0.9761566083504165, iteration: 18044
loss: 1.0667164325714111,grad_norm: 0.9999994235629532, iteration: 18045
loss: 1.0003557205200195,grad_norm: 0.9999991856316027, iteration: 18046
loss: 1.009574055671692,grad_norm: 0.9999990367292836, iteration: 18047
loss: 1.0178018808364868,grad_norm: 0.9999995893002402, iteration: 18048
loss: 0.9906688928604126,grad_norm: 0.999999186603741, iteration: 18049
loss: 0.9936928749084473,grad_norm: 0.9999991028590336, iteration: 18050
loss: 1.0123755931854248,grad_norm: 0.9999995918095883, iteration: 18051
loss: 1.0262449979782104,grad_norm: 0.999999177139154, iteration: 18052
loss: 1.0004637241363525,grad_norm: 0.9999991403980842, iteration: 18053
loss: 1.0465173721313477,grad_norm: 0.9999997538552372, iteration: 18054
loss: 0.9874626994132996,grad_norm: 0.999999040249476, iteration: 18055
loss: 1.0407540798187256,grad_norm: 0.9999994923893598, iteration: 18056
loss: 1.0180039405822754,grad_norm: 0.9999990726447808, iteration: 18057
loss: 0.9990484118461609,grad_norm: 0.9999997233595533, iteration: 18058
loss: 0.9729362726211548,grad_norm: 0.9999994682273206, iteration: 18059
loss: 1.0960992574691772,grad_norm: 0.9999995630123863, iteration: 18060
loss: 1.0388031005859375,grad_norm: 0.9999995315661359, iteration: 18061
loss: 1.090417742729187,grad_norm: 0.9999996694340868, iteration: 18062
loss: 1.0584995746612549,grad_norm: 0.9999995046370146, iteration: 18063
loss: 1.0054060220718384,grad_norm: 0.983990272785444, iteration: 18064
loss: 1.0649824142456055,grad_norm: 0.9999997749819254, iteration: 18065
loss: 0.9995359182357788,grad_norm: 0.9999996075440358, iteration: 18066
loss: 0.985472559928894,grad_norm: 0.999999105542348, iteration: 18067
loss: 1.0215156078338623,grad_norm: 0.9999998658713871, iteration: 18068
loss: 0.9920076727867126,grad_norm: 0.9137011474959661, iteration: 18069
loss: 1.0455820560455322,grad_norm: 0.9999990954523941, iteration: 18070
loss: 1.0475029945373535,grad_norm: 0.9999992385702687, iteration: 18071
loss: 1.0172239542007446,grad_norm: 0.9999992310541849, iteration: 18072
loss: 0.9970718026161194,grad_norm: 0.9999992021500261, iteration: 18073
loss: 1.0652285814285278,grad_norm: 0.9999991825679411, iteration: 18074
loss: 1.0102273225784302,grad_norm: 0.9558644007296728, iteration: 18075
loss: 1.0225659608840942,grad_norm: 0.8586811675780144, iteration: 18076
loss: 1.009548544883728,grad_norm: 0.999999311540688, iteration: 18077
loss: 1.014063835144043,grad_norm: 0.9999991488317835, iteration: 18078
loss: 1.0029240846633911,grad_norm: 0.9999990728583272, iteration: 18079
loss: 0.9986020922660828,grad_norm: 0.999999065719151, iteration: 18080
loss: 1.0400031805038452,grad_norm: 0.9999991850113574, iteration: 18081
loss: 0.9632063508033752,grad_norm: 0.9999991310727631, iteration: 18082
loss: 1.014144778251648,grad_norm: 0.9999994809080123, iteration: 18083
loss: 1.0162252187728882,grad_norm: 0.9999996905457191, iteration: 18084
loss: 1.016829252243042,grad_norm: 0.9999991708961815, iteration: 18085
loss: 1.0108250379562378,grad_norm: 0.9999997797407212, iteration: 18086
loss: 1.006986141204834,grad_norm: 0.999999068489835, iteration: 18087
loss: 0.9324262142181396,grad_norm: 0.999999117989236, iteration: 18088
loss: 1.028825044631958,grad_norm: 0.8679198113998756, iteration: 18089
loss: 1.033584475517273,grad_norm: 0.9999992045059972, iteration: 18090
loss: 1.0094835758209229,grad_norm: 0.9999993221777658, iteration: 18091
loss: 1.0229629278182983,grad_norm: 0.9999990394114391, iteration: 18092
loss: 0.9726073145866394,grad_norm: 0.9999992883967197, iteration: 18093
loss: 1.0408703088760376,grad_norm: 0.9838179963575888, iteration: 18094
loss: 1.038591742515564,grad_norm: 0.9999990401331174, iteration: 18095
loss: 1.03060781955719,grad_norm: 0.9999992784489422, iteration: 18096
loss: 1.1436041593551636,grad_norm: 0.9999994921128754, iteration: 18097
loss: 0.9959551095962524,grad_norm: 0.9999991950110654, iteration: 18098
loss: 1.0641320943832397,grad_norm: 0.999999376750912, iteration: 18099
loss: 1.0174566507339478,grad_norm: 0.999999247121876, iteration: 18100
loss: 1.0102064609527588,grad_norm: 0.9999992787239651, iteration: 18101
loss: 1.029012680053711,grad_norm: 0.8929678143016411, iteration: 18102
loss: 1.0825347900390625,grad_norm: 0.9999997208837266, iteration: 18103
loss: 0.9811749458312988,grad_norm: 0.9999990504259115, iteration: 18104
loss: 0.9972699284553528,grad_norm: 0.9999990676368288, iteration: 18105
loss: 1.0715184211730957,grad_norm: 0.9999994841377179, iteration: 18106
loss: 1.029198169708252,grad_norm: 0.9999993358734117, iteration: 18107
loss: 1.009036898612976,grad_norm: 0.9999990117653673, iteration: 18108
loss: 1.104506015777588,grad_norm: 0.9999994036550929, iteration: 18109
loss: 1.0402812957763672,grad_norm: 0.9607889477360467, iteration: 18110
loss: 1.043848991394043,grad_norm: 0.9999998671709371, iteration: 18111
loss: 1.012237787246704,grad_norm: 0.9999994777107473, iteration: 18112
loss: 1.0392717123031616,grad_norm: 0.9999996518962572, iteration: 18113
loss: 1.021159052848816,grad_norm: 0.9999991311943863, iteration: 18114
loss: 1.1183605194091797,grad_norm: 0.9999995715555705, iteration: 18115
loss: 1.0573368072509766,grad_norm: 0.9999995422777097, iteration: 18116
loss: 1.0454459190368652,grad_norm: 0.8805785358446724, iteration: 18117
loss: 1.031079888343811,grad_norm: 0.9999990835561506, iteration: 18118
loss: 1.0254390239715576,grad_norm: 0.9999994036220996, iteration: 18119
loss: 1.0119363069534302,grad_norm: 0.9754929367496945, iteration: 18120
loss: 1.0201960802078247,grad_norm: 0.9999992977493413, iteration: 18121
loss: 1.0824837684631348,grad_norm: 0.9675121890899377, iteration: 18122
loss: 1.0481419563293457,grad_norm: 0.9999991296536325, iteration: 18123
loss: 1.0018671751022339,grad_norm: 0.9694888345475675, iteration: 18124
loss: 1.0281990766525269,grad_norm: 0.9999993957496742, iteration: 18125
loss: 1.014682650566101,grad_norm: 0.9999994795522249, iteration: 18126
loss: 1.0092679262161255,grad_norm: 0.9999993332321363, iteration: 18127
loss: 1.0096778869628906,grad_norm: 0.9999993365256842, iteration: 18128
loss: 1.0629268884658813,grad_norm: 0.9999997721092958, iteration: 18129
loss: 1.014207363128662,grad_norm: 0.999999372077946, iteration: 18130
loss: 1.0216734409332275,grad_norm: 0.9999993454486104, iteration: 18131
loss: 1.0100390911102295,grad_norm: 0.9999990298091158, iteration: 18132
loss: 1.0423405170440674,grad_norm: 0.9999992494820997, iteration: 18133
loss: 1.0310031175613403,grad_norm: 0.9999991907544602, iteration: 18134
loss: 1.0561836957931519,grad_norm: 0.9999997740929847, iteration: 18135
loss: 0.9985669255256653,grad_norm: 0.9782102412950622, iteration: 18136
loss: 1.0691163539886475,grad_norm: 0.9999990901277769, iteration: 18137
loss: 0.9897804260253906,grad_norm: 0.9441741157020738, iteration: 18138
loss: 1.040623664855957,grad_norm: 0.9999993586913513, iteration: 18139
loss: 1.020752191543579,grad_norm: 0.9999992323862449, iteration: 18140
loss: 1.0844736099243164,grad_norm: 0.9999992464478691, iteration: 18141
loss: 1.0103704929351807,grad_norm: 0.9999992934064226, iteration: 18142
loss: 1.0219149589538574,grad_norm: 0.9999996866682399, iteration: 18143
loss: 1.0147833824157715,grad_norm: 0.9999998335768411, iteration: 18144
loss: 1.1193511486053467,grad_norm: 0.9999996482712258, iteration: 18145
loss: 0.9878374338150024,grad_norm: 0.9465790345008183, iteration: 18146
loss: 1.0741543769836426,grad_norm: 0.9999993645727867, iteration: 18147
loss: 1.0462456941604614,grad_norm: 0.999999210555167, iteration: 18148
loss: 1.0532231330871582,grad_norm: 0.9999992093214184, iteration: 18149
loss: 0.9987941980361938,grad_norm: 0.999999336531461, iteration: 18150
loss: 1.0245201587677002,grad_norm: 0.9999990094458839, iteration: 18151
loss: 1.0141124725341797,grad_norm: 0.9999992466235155, iteration: 18152
loss: 1.0573054552078247,grad_norm: 0.9999991948138562, iteration: 18153
loss: 1.1623413562774658,grad_norm: 0.999999721384723, iteration: 18154
loss: 1.0527769327163696,grad_norm: 0.9999992776305615, iteration: 18155
loss: 1.0179890394210815,grad_norm: 0.9999998611517962, iteration: 18156
loss: 1.0503135919570923,grad_norm: 0.9999998992974363, iteration: 18157
loss: 1.053062915802002,grad_norm: 0.9999991933584881, iteration: 18158
loss: 1.0393954515457153,grad_norm: 0.9999992702680818, iteration: 18159
loss: 1.039732813835144,grad_norm: 0.9999993074604013, iteration: 18160
loss: 0.9891320466995239,grad_norm: 0.9999993015519684, iteration: 18161
loss: 1.012174367904663,grad_norm: 0.9999992789282341, iteration: 18162
loss: 0.9764963388442993,grad_norm: 0.9999991434368948, iteration: 18163
loss: 1.0158478021621704,grad_norm: 0.987599871602994, iteration: 18164
loss: 1.1297463178634644,grad_norm: 0.9999992991497682, iteration: 18165
loss: 1.0350592136383057,grad_norm: 0.9999997332218786, iteration: 18166
loss: 1.0248229503631592,grad_norm: 0.9999996480685198, iteration: 18167
loss: 1.075650691986084,grad_norm: 0.9999998738325507, iteration: 18168
loss: 1.0050972700119019,grad_norm: 0.9678533151357269, iteration: 18169
loss: 0.986054003238678,grad_norm: 0.9999993248828013, iteration: 18170
loss: 0.9950113892555237,grad_norm: 0.9999994047684654, iteration: 18171
loss: 1.0134202241897583,grad_norm: 0.9999990909629798, iteration: 18172
loss: 0.9789508581161499,grad_norm: 0.9999991132963622, iteration: 18173
loss: 0.9990900754928589,grad_norm: 0.999999173397178, iteration: 18174
loss: 1.0340546369552612,grad_norm: 0.9999995785210092, iteration: 18175
loss: 1.020945429801941,grad_norm: 0.9999992964905469, iteration: 18176
loss: 1.0548970699310303,grad_norm: 0.999999162381147, iteration: 18177
loss: 1.0275167226791382,grad_norm: 0.9999991792201254, iteration: 18178
loss: 1.0269508361816406,grad_norm: 0.9999997453790812, iteration: 18179
loss: 1.048640251159668,grad_norm: 0.9999992378477879, iteration: 18180
loss: 1.0414938926696777,grad_norm: 0.9999991376448848, iteration: 18181
loss: 1.030572533607483,grad_norm: 0.9999992389748604, iteration: 18182
loss: 1.1022454500198364,grad_norm: 0.9999997558709143, iteration: 18183
loss: 1.0116620063781738,grad_norm: 0.7869664653673473, iteration: 18184
loss: 1.0247995853424072,grad_norm: 0.9999993973909539, iteration: 18185
loss: 1.0057779550552368,grad_norm: 0.9999992317931556, iteration: 18186
loss: 0.991723358631134,grad_norm: 0.999999211229827, iteration: 18187
loss: 1.011393666267395,grad_norm: 0.9999992975948797, iteration: 18188
loss: 1.022876501083374,grad_norm: 0.9999993116220287, iteration: 18189
loss: 0.9666625261306763,grad_norm: 0.9999991659671843, iteration: 18190
loss: 1.0220736265182495,grad_norm: 0.999999132576903, iteration: 18191
loss: 1.004705786705017,grad_norm: 0.9999991158342517, iteration: 18192
loss: 1.032129168510437,grad_norm: 0.920163163807359, iteration: 18193
loss: 1.0146377086639404,grad_norm: 0.9999990359918316, iteration: 18194
loss: 1.0290642976760864,grad_norm: 0.9999995964951295, iteration: 18195
loss: 1.0355889797210693,grad_norm: 0.9999995196863187, iteration: 18196
loss: 1.0547096729278564,grad_norm: 0.9999991326217226, iteration: 18197
loss: 0.9938781261444092,grad_norm: 0.9999991857364683, iteration: 18198
loss: 1.0337510108947754,grad_norm: 0.9999993257276458, iteration: 18199
loss: 1.0060861110687256,grad_norm: 0.9207602440519942, iteration: 18200
loss: 0.9644352197647095,grad_norm: 0.9999995911732663, iteration: 18201
loss: 0.9706582427024841,grad_norm: 0.9999992465799168, iteration: 18202
loss: 1.0495611429214478,grad_norm: 0.9999993139837613, iteration: 18203
loss: 0.9853894114494324,grad_norm: 0.999999144747435, iteration: 18204
loss: 1.0043513774871826,grad_norm: 0.9999992423619525, iteration: 18205
loss: 1.0354387760162354,grad_norm: 0.9999992480415109, iteration: 18206
loss: 0.9902135729789734,grad_norm: 0.9999994109029633, iteration: 18207
loss: 1.014106273651123,grad_norm: 0.9999992079947054, iteration: 18208
loss: 0.9964419007301331,grad_norm: 0.9999991754648121, iteration: 18209
loss: 0.9860158562660217,grad_norm: 0.9999990881938006, iteration: 18210
loss: 1.0653316974639893,grad_norm: 0.9999990816127194, iteration: 18211
loss: 1.0192859172821045,grad_norm: 0.9999990657560445, iteration: 18212
loss: 1.0750504732131958,grad_norm: 0.9999994610014304, iteration: 18213
loss: 1.024239182472229,grad_norm: 0.9999992576548986, iteration: 18214
loss: 1.0015904903411865,grad_norm: 0.8786659706974983, iteration: 18215
loss: 0.9932129979133606,grad_norm: 0.9999991602710693, iteration: 18216
loss: 1.0061365365982056,grad_norm: 0.9999990931252383, iteration: 18217
loss: 1.0726882219314575,grad_norm: 0.9999995627960861, iteration: 18218
loss: 1.0222036838531494,grad_norm: 0.9025133805123503, iteration: 18219
loss: 1.0078794956207275,grad_norm: 0.9999991941358638, iteration: 18220
loss: 0.9752655625343323,grad_norm: 0.9999991943376368, iteration: 18221
loss: 0.961081326007843,grad_norm: 0.9398320622009773, iteration: 18222
loss: 1.0538533926010132,grad_norm: 0.9999992864486441, iteration: 18223
loss: 1.0247218608856201,grad_norm: 0.999999174211206, iteration: 18224
loss: 1.0381232500076294,grad_norm: 0.9999997770512637, iteration: 18225
loss: 1.0390809774398804,grad_norm: 0.9999993192428451, iteration: 18226
loss: 1.0134046077728271,grad_norm: 0.9999990347537537, iteration: 18227
loss: 1.0019313097000122,grad_norm: 0.9999990638159533, iteration: 18228
loss: 0.9953528046607971,grad_norm: 0.9999991230921303, iteration: 18229
loss: 1.047645926475525,grad_norm: 0.9999993514088668, iteration: 18230
loss: 1.0222280025482178,grad_norm: 0.9999991737649561, iteration: 18231
loss: 1.0215234756469727,grad_norm: 0.9999991498540354, iteration: 18232
loss: 1.0239441394805908,grad_norm: 0.9999992881672126, iteration: 18233
loss: 0.9808827638626099,grad_norm: 0.9796829458206252, iteration: 18234
loss: 1.0213679075241089,grad_norm: 0.9999993299767371, iteration: 18235
loss: 1.005118489265442,grad_norm: 0.9999992114677164, iteration: 18236
loss: 1.0085885524749756,grad_norm: 0.9999997017384684, iteration: 18237
loss: 1.0451064109802246,grad_norm: 0.9999992206129976, iteration: 18238
loss: 0.9881054759025574,grad_norm: 0.9679778151061641, iteration: 18239
loss: 1.0193979740142822,grad_norm: 0.9999991810848045, iteration: 18240
loss: 1.0120837688446045,grad_norm: 0.9999997140391073, iteration: 18241
loss: 1.0254253149032593,grad_norm: 0.9999992948905845, iteration: 18242
loss: 1.0101927518844604,grad_norm: 0.9999997355642859, iteration: 18243
loss: 0.9886059761047363,grad_norm: 0.9999992056724661, iteration: 18244
loss: 0.9956903457641602,grad_norm: 0.9517917766668424, iteration: 18245
loss: 1.0444426536560059,grad_norm: 0.99999908208093, iteration: 18246
loss: 1.0238196849822998,grad_norm: 0.9999991712928009, iteration: 18247
loss: 1.0147202014923096,grad_norm: 0.9999990731990606, iteration: 18248
loss: 1.0456066131591797,grad_norm: 0.999999280818699, iteration: 18249
loss: 1.0093575716018677,grad_norm: 0.9999991155546162, iteration: 18250
loss: 1.0729916095733643,grad_norm: 0.9999991601524996, iteration: 18251
loss: 0.9906927943229675,grad_norm: 0.9999992737676761, iteration: 18252
loss: 0.9866212010383606,grad_norm: 0.9999993079282382, iteration: 18253
loss: 1.0873768329620361,grad_norm: 0.9999996384343419, iteration: 18254
loss: 0.9915724992752075,grad_norm: 0.999998979478568, iteration: 18255
loss: 1.019627571105957,grad_norm: 0.9999990837309193, iteration: 18256
loss: 1.0080236196517944,grad_norm: 0.9999992202684582, iteration: 18257
loss: 1.0335030555725098,grad_norm: 0.9999994042344084, iteration: 18258
loss: 1.1035723686218262,grad_norm: 0.9999995073456064, iteration: 18259
loss: 1.0143479108810425,grad_norm: 0.9999991493190412, iteration: 18260
loss: 1.0241377353668213,grad_norm: 0.9999992518195091, iteration: 18261
loss: 1.1118417978286743,grad_norm: 0.999999783413688, iteration: 18262
loss: 1.0431575775146484,grad_norm: 0.9999993519717579, iteration: 18263
loss: 1.0412805080413818,grad_norm: 0.9999993096346663, iteration: 18264
loss: 1.0259793996810913,grad_norm: 0.9999994885253857, iteration: 18265
loss: 1.018413782119751,grad_norm: 0.9999991383325084, iteration: 18266
loss: 1.0238978862762451,grad_norm: 0.9146928633075592, iteration: 18267
loss: 1.0191832780838013,grad_norm: 0.99999912102302, iteration: 18268
loss: 0.9925016760826111,grad_norm: 0.9999989392278793, iteration: 18269
loss: 1.038975477218628,grad_norm: 0.9999992604798845, iteration: 18270
loss: 0.9955394864082336,grad_norm: 0.899673620692564, iteration: 18271
loss: 1.0415838956832886,grad_norm: 0.9999995040903271, iteration: 18272
loss: 1.055679440498352,grad_norm: 0.9999992731958635, iteration: 18273
loss: 1.0365597009658813,grad_norm: 0.999999035330531, iteration: 18274
loss: 1.041717529296875,grad_norm: 0.9999992672736983, iteration: 18275
loss: 1.0127724409103394,grad_norm: 0.9999991447266902, iteration: 18276
loss: 0.9905934929847717,grad_norm: 0.9999994602562295, iteration: 18277
loss: 1.0156804323196411,grad_norm: 0.9999990017884398, iteration: 18278
loss: 1.026523232460022,grad_norm: 0.9999992869456616, iteration: 18279
loss: 1.0141133069992065,grad_norm: 0.9999994516259422, iteration: 18280
loss: 1.043925166130066,grad_norm: 0.999999267834732, iteration: 18281
loss: 1.0318162441253662,grad_norm: 0.9999991601877515, iteration: 18282
loss: 0.998154878616333,grad_norm: 0.9194645116905157, iteration: 18283
loss: 1.0411794185638428,grad_norm: 0.9999992288618816, iteration: 18284
loss: 1.0767773389816284,grad_norm: 0.9999997634773402, iteration: 18285
loss: 1.0099282264709473,grad_norm: 0.9999990939799617, iteration: 18286
loss: 0.9768725633621216,grad_norm: 0.9999992535781992, iteration: 18287
loss: 1.0426032543182373,grad_norm: 0.9999994219499712, iteration: 18288
loss: 0.9893471598625183,grad_norm: 0.9999991653024296, iteration: 18289
loss: 1.0315321683883667,grad_norm: 0.999999257687483, iteration: 18290
loss: 1.0116091966629028,grad_norm: 0.9999991174871157, iteration: 18291
loss: 1.003235936164856,grad_norm: 0.9778208673978329, iteration: 18292
loss: 1.019361138343811,grad_norm: 0.9999991774126801, iteration: 18293
loss: 1.036742091178894,grad_norm: 0.9999997786100667, iteration: 18294
loss: 1.033408522605896,grad_norm: 0.9999991426234982, iteration: 18295
loss: 1.0574398040771484,grad_norm: 0.9513081862626785, iteration: 18296
loss: 1.0265254974365234,grad_norm: 0.9999996702439475, iteration: 18297
loss: 0.97743159532547,grad_norm: 0.9999991925997563, iteration: 18298
loss: 1.0235583782196045,grad_norm: 0.99999939598587, iteration: 18299
loss: 1.0233559608459473,grad_norm: 0.9999992669808088, iteration: 18300
loss: 1.0455026626586914,grad_norm: 0.9999991410441404, iteration: 18301
loss: 1.029180884361267,grad_norm: 0.937816192010384, iteration: 18302
loss: 0.9989444017410278,grad_norm: 0.9999991850114888, iteration: 18303
loss: 1.0073997974395752,grad_norm: 0.9999993639644129, iteration: 18304
loss: 1.0046706199645996,grad_norm: 0.9999994611323257, iteration: 18305
loss: 1.0206241607666016,grad_norm: 0.9999994911098357, iteration: 18306
loss: 1.0414568185806274,grad_norm: 0.9999991374955938, iteration: 18307
loss: 1.0145957469940186,grad_norm: 0.9999990727396401, iteration: 18308
loss: 0.9948710799217224,grad_norm: 0.9999990544407287, iteration: 18309
loss: 1.0281726121902466,grad_norm: 0.9999990356260631, iteration: 18310
loss: 0.9682984948158264,grad_norm: 0.9999995354989579, iteration: 18311
loss: 0.991180956363678,grad_norm: 0.9999990234657423, iteration: 18312
loss: 0.9784001111984253,grad_norm: 0.9999992380221644, iteration: 18313
loss: 1.058205246925354,grad_norm: 0.993244904286069, iteration: 18314
loss: 1.0165683031082153,grad_norm: 0.9999991073165894, iteration: 18315
loss: 1.0281596183776855,grad_norm: 0.9999991433042333, iteration: 18316
loss: 1.0178027153015137,grad_norm: 0.999999141077795, iteration: 18317
loss: 0.9863537549972534,grad_norm: 0.9999991775921911, iteration: 18318
loss: 1.00236177444458,grad_norm: 0.9999991479758401, iteration: 18319
loss: 1.0305120944976807,grad_norm: 0.9999993755108867, iteration: 18320
loss: 1.024011492729187,grad_norm: 0.9933491372880661, iteration: 18321
loss: 1.008774757385254,grad_norm: 0.999999155160459, iteration: 18322
loss: 0.999989926815033,grad_norm: 0.9999991539167873, iteration: 18323
loss: 1.0178166627883911,grad_norm: 0.9999995381182379, iteration: 18324
loss: 1.0523931980133057,grad_norm: 0.999999031789834, iteration: 18325
loss: 1.0408519506454468,grad_norm: 0.9999991001410304, iteration: 18326
loss: 1.034936785697937,grad_norm: 0.9534699206212569, iteration: 18327
loss: 1.036224126815796,grad_norm: 0.9462986909911747, iteration: 18328
loss: 0.9708343148231506,grad_norm: 0.9999991570399518, iteration: 18329
loss: 1.0376534461975098,grad_norm: 0.9999992999717271, iteration: 18330
loss: 1.0587081909179688,grad_norm: 0.9999996280383404, iteration: 18331
loss: 1.0317474603652954,grad_norm: 0.9999990234411975, iteration: 18332
loss: 1.016823410987854,grad_norm: 0.9999992241466442, iteration: 18333
loss: 1.0298302173614502,grad_norm: 0.9345313227412944, iteration: 18334
loss: 0.9885784387588501,grad_norm: 0.9980500148719912, iteration: 18335
loss: 1.0161759853363037,grad_norm: 0.8153798261213049, iteration: 18336
loss: 1.0489572286605835,grad_norm: 0.9999994135997808, iteration: 18337
loss: 0.9962234497070312,grad_norm: 0.9999991167969954, iteration: 18338
loss: 1.032307744026184,grad_norm: 0.9999990504966414, iteration: 18339
loss: 1.049519419670105,grad_norm: 0.9999991916652794, iteration: 18340
loss: 0.9963312149047852,grad_norm: 0.9999991192828726, iteration: 18341
loss: 0.9823743104934692,grad_norm: 0.9999991335787597, iteration: 18342
loss: 1.0151723623275757,grad_norm: 0.9999990684622562, iteration: 18343
loss: 1.0498099327087402,grad_norm: 0.9999990691555779, iteration: 18344
loss: 1.0495117902755737,grad_norm: 0.9999993496035965, iteration: 18345
loss: 1.0043895244598389,grad_norm: 0.9999993036743661, iteration: 18346
loss: 1.02781081199646,grad_norm: 0.9999995489722153, iteration: 18347
loss: 0.9889905452728271,grad_norm: 0.9744156828360747, iteration: 18348
loss: 0.9943062663078308,grad_norm: 0.9999991405214446, iteration: 18349
loss: 0.9698188900947571,grad_norm: 0.9999990722405984, iteration: 18350
loss: 1.0382744073867798,grad_norm: 0.9999991386447621, iteration: 18351
loss: 1.0443120002746582,grad_norm: 0.983481350501499, iteration: 18352
loss: 1.0788726806640625,grad_norm: 0.9999996038317122, iteration: 18353
loss: 1.0390346050262451,grad_norm: 0.9002869983477826, iteration: 18354
loss: 1.0391587018966675,grad_norm: 0.9999992034871109, iteration: 18355
loss: 1.0195436477661133,grad_norm: 0.9999990351264683, iteration: 18356
loss: 1.0052683353424072,grad_norm: 0.8484322660569054, iteration: 18357
loss: 1.0179829597473145,grad_norm: 0.9999990981548224, iteration: 18358
loss: 1.0594333410263062,grad_norm: 0.9999991737932742, iteration: 18359
loss: 1.0124620199203491,grad_norm: 0.9844673899537266, iteration: 18360
loss: 1.04094398021698,grad_norm: 0.9999990039358237, iteration: 18361
loss: 1.0231523513793945,grad_norm: 0.9999994984633712, iteration: 18362
loss: 0.9964032173156738,grad_norm: 0.9999991644102405, iteration: 18363
loss: 1.0284439325332642,grad_norm: 0.9999995249563537, iteration: 18364
loss: 0.9815568327903748,grad_norm: 0.9999990473855261, iteration: 18365
loss: 1.0251617431640625,grad_norm: 0.9999992683457917, iteration: 18366
loss: 1.019262671470642,grad_norm: 0.99999929359042, iteration: 18367
loss: 1.049514889717102,grad_norm: 0.9999992551944371, iteration: 18368
loss: 1.0841206312179565,grad_norm: 0.9999995401811312, iteration: 18369
loss: 1.0458422899246216,grad_norm: 0.999999433492953, iteration: 18370
loss: 1.0587102174758911,grad_norm: 0.9999990520540357, iteration: 18371
loss: 1.0608073472976685,grad_norm: 0.9999995478297453, iteration: 18372
loss: 1.0014832019805908,grad_norm: 0.9999994079895235, iteration: 18373
loss: 1.0185266733169556,grad_norm: 0.9999992843670712, iteration: 18374
loss: 1.0007562637329102,grad_norm: 0.9999990500876946, iteration: 18375
loss: 1.0117281675338745,grad_norm: 0.9999992014318295, iteration: 18376
loss: 0.9836560487747192,grad_norm: 0.9999991569442024, iteration: 18377
loss: 1.004009485244751,grad_norm: 0.9999991680438478, iteration: 18378
loss: 1.02306067943573,grad_norm: 0.999999547542657, iteration: 18379
loss: 1.0204724073410034,grad_norm: 0.9999990294996556, iteration: 18380
loss: 1.018510103225708,grad_norm: 0.9999991474604935, iteration: 18381
loss: 1.0390900373458862,grad_norm: 0.9999993427061183, iteration: 18382
loss: 1.0097745656967163,grad_norm: 0.9999992628305896, iteration: 18383
loss: 1.0044924020767212,grad_norm: 0.9999992083852728, iteration: 18384
loss: 1.0320755243301392,grad_norm: 0.9999993187810273, iteration: 18385
loss: 1.01564359664917,grad_norm: 0.9680801952186457, iteration: 18386
loss: 1.029009461402893,grad_norm: 0.9999989995862015, iteration: 18387
loss: 1.0070521831512451,grad_norm: 0.9999991141216327, iteration: 18388
loss: 1.0488117933273315,grad_norm: 0.9999993200137659, iteration: 18389
loss: 1.1072475910186768,grad_norm: 0.9999997045527406, iteration: 18390
loss: 1.0598764419555664,grad_norm: 0.9999994398521943, iteration: 18391
loss: 1.0219119787216187,grad_norm: 0.9999994270753464, iteration: 18392
loss: 0.9925837516784668,grad_norm: 0.9999992013304418, iteration: 18393
loss: 1.0281070470809937,grad_norm: 0.9560674218206372, iteration: 18394
loss: 1.0122495889663696,grad_norm: 0.9999992355183859, iteration: 18395
loss: 1.053086519241333,grad_norm: 0.9999992818595118, iteration: 18396
loss: 0.9747187495231628,grad_norm: 0.9999992312519331, iteration: 18397
loss: 1.063161849975586,grad_norm: 0.9999992916379553, iteration: 18398
loss: 0.9970752596855164,grad_norm: 0.999999170197888, iteration: 18399
loss: 1.0091160535812378,grad_norm: 0.999999684565206, iteration: 18400
loss: 1.0401337146759033,grad_norm: 0.9227841077384368, iteration: 18401
loss: 1.0153272151947021,grad_norm: 0.9999990597390789, iteration: 18402
loss: 0.9921275973320007,grad_norm: 0.9999995172484659, iteration: 18403
loss: 1.0485097169876099,grad_norm: 0.9176544011846917, iteration: 18404
loss: 1.0069308280944824,grad_norm: 0.9999992338770051, iteration: 18405
loss: 0.9863638877868652,grad_norm: 0.8497559919755144, iteration: 18406
loss: 0.9780594110488892,grad_norm: 0.9999990234067931, iteration: 18407
loss: 0.994665265083313,grad_norm: 0.9904119943597335, iteration: 18408
loss: 0.9932292699813843,grad_norm: 0.9999991750319043, iteration: 18409
loss: 1.0075817108154297,grad_norm: 0.9999992963968577, iteration: 18410
loss: 1.009575366973877,grad_norm: 0.9930827232704794, iteration: 18411
loss: 1.0347957611083984,grad_norm: 0.9999993520359157, iteration: 18412
loss: 1.0252515077590942,grad_norm: 0.9999995168981015, iteration: 18413
loss: 1.068421483039856,grad_norm: 0.9999990274836403, iteration: 18414
loss: 1.0548944473266602,grad_norm: 0.9999990602240459, iteration: 18415
loss: 1.003485083580017,grad_norm: 0.9999994296165271, iteration: 18416
loss: 0.9975214600563049,grad_norm: 0.9999991800724272, iteration: 18417
loss: 1.0178204774856567,grad_norm: 0.9999991168401561, iteration: 18418
loss: 1.0161025524139404,grad_norm: 0.9999991077023207, iteration: 18419
loss: 1.016558289527893,grad_norm: 0.9999991054932157, iteration: 18420
loss: 0.9813140034675598,grad_norm: 0.9999991054643037, iteration: 18421
loss: 1.085293173789978,grad_norm: 0.9999998042332153, iteration: 18422
loss: 0.9754906892776489,grad_norm: 0.9999998007424712, iteration: 18423
loss: 1.055619716644287,grad_norm: 0.9999991167082114, iteration: 18424
loss: 1.0124151706695557,grad_norm: 0.9999992168640844, iteration: 18425
loss: 1.0311199426651,grad_norm: 0.9999994329662246, iteration: 18426
loss: 0.9809911251068115,grad_norm: 0.9999991557647068, iteration: 18427
loss: 1.0254300832748413,grad_norm: 0.9999992059267843, iteration: 18428
loss: 1.0795060396194458,grad_norm: 0.9999992559746359, iteration: 18429
loss: 0.9913995862007141,grad_norm: 0.9999998490212194, iteration: 18430
loss: 0.9799413681030273,grad_norm: 0.9999991003920686, iteration: 18431
loss: 1.0217092037200928,grad_norm: 0.9999992368320424, iteration: 18432
loss: 1.0424820184707642,grad_norm: 0.9999989630658142, iteration: 18433
loss: 1.013899326324463,grad_norm: 0.9583849219783613, iteration: 18434
loss: 1.0345616340637207,grad_norm: 0.9560089264358798, iteration: 18435
loss: 1.0458241701126099,grad_norm: 0.9999997494826178, iteration: 18436
loss: 1.0266530513763428,grad_norm: 0.9999993281039137, iteration: 18437
loss: 1.0100879669189453,grad_norm: 0.9999993683435364, iteration: 18438
loss: 1.001948595046997,grad_norm: 0.9999991672638019, iteration: 18439
loss: 1.0542799234390259,grad_norm: 0.9999995295330879, iteration: 18440
loss: 1.0403012037277222,grad_norm: 0.9540900585944144, iteration: 18441
loss: 1.0228534936904907,grad_norm: 0.9999994907949016, iteration: 18442
loss: 1.0213929414749146,grad_norm: 0.9999997509325445, iteration: 18443
loss: 1.0332432985305786,grad_norm: 0.9718452182585802, iteration: 18444
loss: 1.010252594947815,grad_norm: 0.999999126057011, iteration: 18445
loss: 1.0461124181747437,grad_norm: 0.955282011073741, iteration: 18446
loss: 1.0395375490188599,grad_norm: 0.9999993132284877, iteration: 18447
loss: 1.0543056726455688,grad_norm: 0.99999958871032, iteration: 18448
loss: 1.0229260921478271,grad_norm: 0.9999991216248186, iteration: 18449
loss: 0.9871671795845032,grad_norm: 0.9999992468181962, iteration: 18450
loss: 1.0436614751815796,grad_norm: 0.9999992865929626, iteration: 18451
loss: 1.052172064781189,grad_norm: 0.9999992329430861, iteration: 18452
loss: 0.9802303314208984,grad_norm: 0.9999990041895295, iteration: 18453
loss: 1.0310999155044556,grad_norm: 0.9105109414873072, iteration: 18454
loss: 0.9994910359382629,grad_norm: 0.9999990431753278, iteration: 18455
loss: 1.0165095329284668,grad_norm: 0.9999990958325666, iteration: 18456
loss: 0.9886727333068848,grad_norm: 0.9999992515736906, iteration: 18457
loss: 1.065376877784729,grad_norm: 0.999999445098629, iteration: 18458
loss: 1.0278058052062988,grad_norm: 0.9999991566451848, iteration: 18459
loss: 1.0580955743789673,grad_norm: 0.9999993103961158, iteration: 18460
loss: 1.0123924016952515,grad_norm: 0.9999990852627425, iteration: 18461
loss: 1.0292295217514038,grad_norm: 0.8675707755718902, iteration: 18462
loss: 1.0438203811645508,grad_norm: 0.9999991206704665, iteration: 18463
loss: 0.9889211654663086,grad_norm: 0.9999992207292075, iteration: 18464
loss: 1.085863709449768,grad_norm: 0.9999992263524973, iteration: 18465
loss: 1.0537699460983276,grad_norm: 0.9999992276294001, iteration: 18466
loss: 1.1359502077102661,grad_norm: 0.9999998410831196, iteration: 18467
loss: 1.051741123199463,grad_norm: 0.9999993398036038, iteration: 18468
loss: 1.1038497686386108,grad_norm: 0.9999991112985865, iteration: 18469
loss: 1.125648856163025,grad_norm: 0.9999993179286016, iteration: 18470
loss: 1.065954566001892,grad_norm: 0.9999996765580026, iteration: 18471
loss: 1.0459398031234741,grad_norm: 0.9999990577333429, iteration: 18472
loss: 1.0377399921417236,grad_norm: 0.9559752603006497, iteration: 18473
loss: 1.0390136241912842,grad_norm: 0.9999990334104337, iteration: 18474
loss: 1.008283019065857,grad_norm: 0.9999994165221321, iteration: 18475
loss: 1.0010377168655396,grad_norm: 0.9999990372862672, iteration: 18476
loss: 1.0088882446289062,grad_norm: 0.999999175718265, iteration: 18477
loss: 1.0302070379257202,grad_norm: 0.9999990758868147, iteration: 18478
loss: 1.0036256313323975,grad_norm: 0.8869391217423366, iteration: 18479
loss: 1.0020923614501953,grad_norm: 0.999999189773772, iteration: 18480
loss: 1.0135947465896606,grad_norm: 0.9999991599230216, iteration: 18481
loss: 1.0030375719070435,grad_norm: 0.9999997533558531, iteration: 18482
loss: 0.9746737480163574,grad_norm: 0.8760070749007265, iteration: 18483
loss: 0.996837854385376,grad_norm: 0.9999990960048081, iteration: 18484
loss: 1.021200180053711,grad_norm: 0.9999991812731086, iteration: 18485
loss: 1.052229881286621,grad_norm: 0.999999443105331, iteration: 18486
loss: 1.023582100868225,grad_norm: 0.999999206309791, iteration: 18487
loss: 1.0529768466949463,grad_norm: 0.9999991441038213, iteration: 18488
loss: 1.0221126079559326,grad_norm: 0.9999991423409612, iteration: 18489
loss: 1.0033594369888306,grad_norm: 0.9999992931892339, iteration: 18490
loss: 1.013779878616333,grad_norm: 0.9938850169919464, iteration: 18491
loss: 0.9899482131004333,grad_norm: 0.9999993610410899, iteration: 18492
loss: 1.0466578006744385,grad_norm: 0.9999995474989202, iteration: 18493
loss: 1.0179214477539062,grad_norm: 0.999998965630783, iteration: 18494
loss: 0.9614853858947754,grad_norm: 0.9999991614541673, iteration: 18495
loss: 1.0455710887908936,grad_norm: 0.9999993465665407, iteration: 18496
loss: 1.035500407218933,grad_norm: 0.9999990882135324, iteration: 18497
loss: 1.0214471817016602,grad_norm: 0.8911976476146912, iteration: 18498
loss: 1.0246230363845825,grad_norm: 0.9999991329211301, iteration: 18499
loss: 1.0535446405410767,grad_norm: 0.9999991288037184, iteration: 18500
loss: 1.0647093057632446,grad_norm: 0.9999991172213588, iteration: 18501
loss: 1.0102894306182861,grad_norm: 0.9999997157121031, iteration: 18502
loss: 1.0350487232208252,grad_norm: 0.99999914945363, iteration: 18503
loss: 1.06128990650177,grad_norm: 0.9999992518508352, iteration: 18504
loss: 1.0320265293121338,grad_norm: 0.9999992751875302, iteration: 18505
loss: 1.01654851436615,grad_norm: 0.9999991667900384, iteration: 18506
loss: 1.020560622215271,grad_norm: 0.9999992039458943, iteration: 18507
loss: 1.0211395025253296,grad_norm: 0.999999866080707, iteration: 18508
loss: 1.0188822746276855,grad_norm: 0.999999102116621, iteration: 18509
loss: 1.0226891040802002,grad_norm: 0.9999990976168445, iteration: 18510
loss: 0.9782118797302246,grad_norm: 0.8967466907341128, iteration: 18511
loss: 1.0314003229141235,grad_norm: 0.9999992662502816, iteration: 18512
loss: 1.0487949848175049,grad_norm: 0.9999992676862325, iteration: 18513
loss: 1.0361967086791992,grad_norm: 0.9999993744327721, iteration: 18514
loss: 1.024858832359314,grad_norm: 0.9999994940843764, iteration: 18515
loss: 1.0092473030090332,grad_norm: 0.9788771056601437, iteration: 18516
loss: 1.0189570188522339,grad_norm: 0.9999991192992594, iteration: 18517
loss: 1.0333502292633057,grad_norm: 0.9963857967226778, iteration: 18518
loss: 1.0439354181289673,grad_norm: 0.9999991128028266, iteration: 18519
loss: 1.1071386337280273,grad_norm: 0.9999994464763272, iteration: 18520
loss: 1.0386258363723755,grad_norm: 0.9999992170908997, iteration: 18521
loss: 0.9610546231269836,grad_norm: 0.9999989674148707, iteration: 18522
loss: 1.0619021654129028,grad_norm: 0.9999990836924365, iteration: 18523
loss: 1.0385619401931763,grad_norm: 0.9327135102325937, iteration: 18524
loss: 1.0402977466583252,grad_norm: 0.9999997043532437, iteration: 18525
loss: 1.0639128684997559,grad_norm: 0.9999991742603309, iteration: 18526
loss: 1.0192162990570068,grad_norm: 0.9999993150103769, iteration: 18527
loss: 1.1075063943862915,grad_norm: 0.9999994642984105, iteration: 18528
loss: 0.9711242318153381,grad_norm: 0.9999990589546193, iteration: 18529
loss: 1.0088257789611816,grad_norm: 0.9999994413518833, iteration: 18530
loss: 0.9589709639549255,grad_norm: 0.9999992561126265, iteration: 18531
loss: 1.0290303230285645,grad_norm: 0.999999025662497, iteration: 18532
loss: 1.009243369102478,grad_norm: 0.9999990653397995, iteration: 18533
loss: 0.9681547284126282,grad_norm: 0.8399704718311468, iteration: 18534
loss: 1.0423123836517334,grad_norm: 0.9999992430368234, iteration: 18535
loss: 0.9781631827354431,grad_norm: 0.9999997874507537, iteration: 18536
loss: 1.0367647409439087,grad_norm: 0.9999992333417116, iteration: 18537
loss: 0.993495762348175,grad_norm: 0.9999990784109596, iteration: 18538
loss: 1.0457123517990112,grad_norm: 0.9109736026975845, iteration: 18539
loss: 1.013399600982666,grad_norm: 0.9671662924328586, iteration: 18540
loss: 1.0266733169555664,grad_norm: 0.9999990383134476, iteration: 18541
loss: 0.9934488534927368,grad_norm: 0.9999993233184641, iteration: 18542
loss: 1.1091455221176147,grad_norm: 0.999999530881297, iteration: 18543
loss: 1.0600382089614868,grad_norm: 0.9999993244893373, iteration: 18544
loss: 1.018035888671875,grad_norm: 0.9999993028032613, iteration: 18545
loss: 1.0176942348480225,grad_norm: 0.9999996809904762, iteration: 18546
loss: 1.0546138286590576,grad_norm: 0.9999992219913219, iteration: 18547
loss: 1.0130029916763306,grad_norm: 0.9999992025698912, iteration: 18548
loss: 0.9713664650917053,grad_norm: 0.9999991873914391, iteration: 18549
loss: 1.0099761486053467,grad_norm: 0.9999991619432612, iteration: 18550
loss: 0.9510402679443359,grad_norm: 0.9999992163606753, iteration: 18551
loss: 1.013983964920044,grad_norm: 0.8903822931145653, iteration: 18552
loss: 1.013076663017273,grad_norm: 0.9999991011099633, iteration: 18553
loss: 0.9969422221183777,grad_norm: 0.9999991693246649, iteration: 18554
loss: 1.0005615949630737,grad_norm: 0.999999153328507, iteration: 18555
loss: 1.0217293500900269,grad_norm: 0.9999994811093148, iteration: 18556
loss: 0.9741107821464539,grad_norm: 0.9999992139448471, iteration: 18557
loss: 1.029000997543335,grad_norm: 0.9999993056380919, iteration: 18558
loss: 1.0380553007125854,grad_norm: 0.961955524233036, iteration: 18559
loss: 0.970986545085907,grad_norm: 0.9999991434199637, iteration: 18560
loss: 1.0404489040374756,grad_norm: 0.986863053105484, iteration: 18561
loss: 1.0306510925292969,grad_norm: 0.9999990833660063, iteration: 18562
loss: 1.0688846111297607,grad_norm: 0.9999992323074214, iteration: 18563
loss: 1.0165135860443115,grad_norm: 0.9999992288866348, iteration: 18564
loss: 0.9954077005386353,grad_norm: 0.9999993722840901, iteration: 18565
loss: 1.0280547142028809,grad_norm: 0.9999993567286679, iteration: 18566
loss: 1.0153807401657104,grad_norm: 0.9999990475510454, iteration: 18567
loss: 1.0157287120819092,grad_norm: 0.999999222957189, iteration: 18568
loss: 1.0160939693450928,grad_norm: 0.9999990530843572, iteration: 18569
loss: 1.0711380243301392,grad_norm: 0.99999944417662, iteration: 18570
loss: 1.0159586668014526,grad_norm: 0.9999993617877889, iteration: 18571
loss: 1.0533804893493652,grad_norm: 0.9999998170594899, iteration: 18572
loss: 1.052396297454834,grad_norm: 0.9999996003446251, iteration: 18573
loss: 0.9995487332344055,grad_norm: 0.9999992156415002, iteration: 18574
loss: 0.9912257790565491,grad_norm: 0.9999990163265563, iteration: 18575
loss: 1.0374444723129272,grad_norm: 0.9999997158917674, iteration: 18576
loss: 0.9781757593154907,grad_norm: 0.9999991519991905, iteration: 18577
loss: 1.0237325429916382,grad_norm: 0.9999990673607336, iteration: 18578
loss: 1.0199458599090576,grad_norm: 0.9999990682947303, iteration: 18579
loss: 1.0480023622512817,grad_norm: 0.9999992468641833, iteration: 18580
loss: 1.0432196855545044,grad_norm: 0.9999992992584876, iteration: 18581
loss: 1.0134354829788208,grad_norm: 0.9999995655595348, iteration: 18582
loss: 1.0213326215744019,grad_norm: 0.9271045366512687, iteration: 18583
loss: 0.9901443123817444,grad_norm: 0.9999990530311826, iteration: 18584
loss: 1.020127773284912,grad_norm: 0.9999990755169437, iteration: 18585
loss: 1.0547572374343872,grad_norm: 0.9999997112519863, iteration: 18586
loss: 1.1447393894195557,grad_norm: 0.9999998111716483, iteration: 18587
loss: 0.9892215728759766,grad_norm: 0.8879233149628318, iteration: 18588
loss: 1.0905392169952393,grad_norm: 0.9999991919669086, iteration: 18589
loss: 1.0772103071212769,grad_norm: 0.9999996790934798, iteration: 18590
loss: 1.0593715906143188,grad_norm: 0.9999990568724562, iteration: 18591
loss: 1.0177867412567139,grad_norm: 0.943399888255372, iteration: 18592
loss: 1.001974105834961,grad_norm: 0.9999994475489069, iteration: 18593
loss: 0.9889428615570068,grad_norm: 0.9999990697889738, iteration: 18594
loss: 1.080716848373413,grad_norm: 0.9999992964029155, iteration: 18595
loss: 1.0011587142944336,grad_norm: 0.9999991590984446, iteration: 18596
loss: 1.111992359161377,grad_norm: 0.9999997439158977, iteration: 18597
loss: 1.019610047340393,grad_norm: 0.9999991525843748, iteration: 18598
loss: 0.9982947111129761,grad_norm: 0.9999992660122908, iteration: 18599
loss: 1.0401374101638794,grad_norm: 0.999999199805917, iteration: 18600
loss: 1.0053805112838745,grad_norm: 0.9999991684046534, iteration: 18601
loss: 1.005079746246338,grad_norm: 0.9999995225290322, iteration: 18602
loss: 0.9759498834609985,grad_norm: 0.9999992194800261, iteration: 18603
loss: 1.0101977586746216,grad_norm: 0.9999990772813737, iteration: 18604
loss: 1.0599931478500366,grad_norm: 0.9999993578707987, iteration: 18605
loss: 1.041417121887207,grad_norm: 0.999999416809506, iteration: 18606
loss: 1.0329444408416748,grad_norm: 0.9999991253279628, iteration: 18607
loss: 1.0507447719573975,grad_norm: 0.9999992448209901, iteration: 18608
loss: 0.9919770359992981,grad_norm: 0.9999995967492348, iteration: 18609
loss: 1.0223349332809448,grad_norm: 0.9721837677165923, iteration: 18610
loss: 0.9602900743484497,grad_norm: 0.9999991164681451, iteration: 18611
loss: 1.0239747762680054,grad_norm: 0.8555176326664373, iteration: 18612
loss: 1.0255153179168701,grad_norm: 0.9999992280130884, iteration: 18613
loss: 1.0020439624786377,grad_norm: 0.9999992696637747, iteration: 18614
loss: 0.9750215411186218,grad_norm: 0.9999991999411275, iteration: 18615
loss: 1.0265134572982788,grad_norm: 0.9999991580491013, iteration: 18616
loss: 1.0124925374984741,grad_norm: 0.999999257853466, iteration: 18617
loss: 1.0448951721191406,grad_norm: 0.8635366187475165, iteration: 18618
loss: 0.991992712020874,grad_norm: 0.9999990337663982, iteration: 18619
loss: 0.9840761423110962,grad_norm: 0.9999991634327077, iteration: 18620
loss: 1.0009580850601196,grad_norm: 0.9999989730081981, iteration: 18621
loss: 1.0117071866989136,grad_norm: 0.999999342691528, iteration: 18622
loss: 1.0071717500686646,grad_norm: 0.999999357932831, iteration: 18623
loss: 1.0227186679840088,grad_norm: 0.9999991856862156, iteration: 18624
loss: 1.0067386627197266,grad_norm: 0.9999990060647472, iteration: 18625
loss: 1.0370349884033203,grad_norm: 0.9999991208874582, iteration: 18626
loss: 1.026689052581787,grad_norm: 0.9999994238465653, iteration: 18627
loss: 1.0215528011322021,grad_norm: 0.9999990434870878, iteration: 18628
loss: 1.0345160961151123,grad_norm: 0.9999992050769333, iteration: 18629
loss: 1.0344411134719849,grad_norm: 0.9769808189344777, iteration: 18630
loss: 0.9999939203262329,grad_norm: 0.9999991257167123, iteration: 18631
loss: 0.9772011637687683,grad_norm: 0.999999321619986, iteration: 18632
loss: 1.0629302263259888,grad_norm: 0.999999578528787, iteration: 18633
loss: 1.0091853141784668,grad_norm: 0.9999991007294349, iteration: 18634
loss: 1.00188410282135,grad_norm: 0.9999990503197385, iteration: 18635
loss: 1.0671522617340088,grad_norm: 0.9999993665492356, iteration: 18636
loss: 1.014146089553833,grad_norm: 0.9999992929104685, iteration: 18637
loss: 0.990395188331604,grad_norm: 0.8884468376277341, iteration: 18638
loss: 1.0456993579864502,grad_norm: 0.999999181965663, iteration: 18639
loss: 1.0909923315048218,grad_norm: 0.9999995526599024, iteration: 18640
loss: 1.0208256244659424,grad_norm: 0.9999992597997646, iteration: 18641
loss: 1.0124772787094116,grad_norm: 0.998520291524346, iteration: 18642
loss: 1.0174533128738403,grad_norm: 0.9361359796515923, iteration: 18643
loss: 1.0188087224960327,grad_norm: 0.9999990486894541, iteration: 18644
loss: 0.9807100296020508,grad_norm: 0.9999994433837112, iteration: 18645
loss: 0.9915920495986938,grad_norm: 0.9999991180792525, iteration: 18646
loss: 1.0138777494430542,grad_norm: 0.9999991729628287, iteration: 18647
loss: 1.053369164466858,grad_norm: 0.9999996132410467, iteration: 18648
loss: 1.0401924848556519,grad_norm: 0.9999997229957178, iteration: 18649
loss: 1.0538034439086914,grad_norm: 0.9999995771191651, iteration: 18650
loss: 1.0328553915023804,grad_norm: 0.999999013081507, iteration: 18651
loss: 0.9779695868492126,grad_norm: 0.9999990263043961, iteration: 18652
loss: 1.0287431478500366,grad_norm: 0.999999044219179, iteration: 18653
loss: 1.008559226989746,grad_norm: 0.9999989830212325, iteration: 18654
loss: 0.9548346400260925,grad_norm: 0.9999996271662038, iteration: 18655
loss: 1.0051922798156738,grad_norm: 0.9999992193875816, iteration: 18656
loss: 1.005824089050293,grad_norm: 0.9969395337177968, iteration: 18657
loss: 1.0118629932403564,grad_norm: 0.9999994755224557, iteration: 18658
loss: 1.0210329294204712,grad_norm: 0.9999990909864697, iteration: 18659
loss: 1.0343648195266724,grad_norm: 0.999999123144424, iteration: 18660
loss: 1.044443130493164,grad_norm: 0.9999989964002987, iteration: 18661
loss: 1.0568739175796509,grad_norm: 0.9999992749256003, iteration: 18662
loss: 1.0076985359191895,grad_norm: 0.999999604083201, iteration: 18663
loss: 1.0087157487869263,grad_norm: 0.999999622482036, iteration: 18664
loss: 0.9962967038154602,grad_norm: 0.9999991752296398, iteration: 18665
loss: 1.0084397792816162,grad_norm: 0.9953974979965668, iteration: 18666
loss: 1.0084117650985718,grad_norm: 0.9999991405550124, iteration: 18667
loss: 1.0502201318740845,grad_norm: 0.9999997996555569, iteration: 18668
loss: 1.0104737281799316,grad_norm: 0.9999991037523106, iteration: 18669
loss: 0.9848676919937134,grad_norm: 0.9999991273559862, iteration: 18670
loss: 1.07575261592865,grad_norm: 0.9999991406203206, iteration: 18671
loss: 1.03123939037323,grad_norm: 0.990717319922363, iteration: 18672
loss: 1.0065829753875732,grad_norm: 0.9999997291128625, iteration: 18673
loss: 1.0445809364318848,grad_norm: 0.9999999016942892, iteration: 18674
loss: 0.9768245816230774,grad_norm: 0.9999993059101294, iteration: 18675
loss: 1.0360345840454102,grad_norm: 0.9999993812792413, iteration: 18676
loss: 0.9957526922225952,grad_norm: 0.9999993070846865, iteration: 18677
loss: 0.9767695069313049,grad_norm: 0.9999993461394862, iteration: 18678
loss: 1.0347514152526855,grad_norm: 0.999999293993546, iteration: 18679
loss: 0.9556786417961121,grad_norm: 0.9999991508726772, iteration: 18680
loss: 1.0368711948394775,grad_norm: 0.999999380260675, iteration: 18681
loss: 1.0101091861724854,grad_norm: 0.9757356093419046, iteration: 18682
loss: 1.0880595445632935,grad_norm: 0.9999996354223659, iteration: 18683
loss: 1.0456653833389282,grad_norm: 0.9999995309439443, iteration: 18684
loss: 1.0327930450439453,grad_norm: 0.9999992471319458, iteration: 18685
loss: 1.035807490348816,grad_norm: 0.9999994105459123, iteration: 18686
loss: 0.9857352375984192,grad_norm: 0.9999991468609477, iteration: 18687
loss: 0.9271746277809143,grad_norm: 0.9999993550237285, iteration: 18688
loss: 1.0324242115020752,grad_norm: 0.9999990758506472, iteration: 18689
loss: 1.0910453796386719,grad_norm: 0.999999764373398, iteration: 18690
loss: 1.055927038192749,grad_norm: 0.9999991473236373, iteration: 18691
loss: 1.1110092401504517,grad_norm: 1.000000044822844, iteration: 18692
loss: 1.0082488059997559,grad_norm: 0.861917714746478, iteration: 18693
loss: 0.9893392324447632,grad_norm: 0.976814180457681, iteration: 18694
loss: 1.0270154476165771,grad_norm: 0.9999994918178214, iteration: 18695
loss: 1.022269368171692,grad_norm: 0.9999992567218515, iteration: 18696
loss: 1.0086828470230103,grad_norm: 0.99999925117678, iteration: 18697
loss: 1.0513218641281128,grad_norm: 0.9999998284984775, iteration: 18698
loss: 0.9856387972831726,grad_norm: 0.9999991884790761, iteration: 18699
loss: 1.1163612604141235,grad_norm: 0.999999537760829, iteration: 18700
loss: 1.0011985301971436,grad_norm: 0.9999993154320613, iteration: 18701
loss: 0.9876952171325684,grad_norm: 0.9999992287796835, iteration: 18702
loss: 0.9963865876197815,grad_norm: 0.9999992118215939, iteration: 18703
loss: 1.0342270135879517,grad_norm: 0.9999992552236875, iteration: 18704
loss: 1.015736699104309,grad_norm: 0.9999992400340632, iteration: 18705
loss: 1.0172067880630493,grad_norm: 0.9999992673652032, iteration: 18706
loss: 1.0376297235488892,grad_norm: 0.9999990564455877, iteration: 18707
loss: 1.012475609779358,grad_norm: 0.8500644520612557, iteration: 18708
loss: 1.0503885746002197,grad_norm: 0.9999994819790203, iteration: 18709
loss: 1.0030180215835571,grad_norm: 0.9154848420333435, iteration: 18710
loss: 1.057761311531067,grad_norm: 0.9999990676869126, iteration: 18711
loss: 1.0365865230560303,grad_norm: 0.9999990765734472, iteration: 18712
loss: 1.0109745264053345,grad_norm: 0.990836972741267, iteration: 18713
loss: 1.0546870231628418,grad_norm: 0.9999998071583028, iteration: 18714
loss: 1.0223801136016846,grad_norm: 0.9999990610396772, iteration: 18715
loss: 1.0050771236419678,grad_norm: 0.999999198079188, iteration: 18716
loss: 1.0463454723358154,grad_norm: 0.9999992237839977, iteration: 18717
loss: 1.0009475946426392,grad_norm: 0.9999992221148235, iteration: 18718
loss: 0.9570989608764648,grad_norm: 0.9999992180400219, iteration: 18719
loss: 1.0349963903427124,grad_norm: 0.9999992436927455, iteration: 18720
loss: 1.0452862977981567,grad_norm: 0.99999923375431, iteration: 18721
loss: 0.9991589784622192,grad_norm: 0.8930439557382546, iteration: 18722
loss: 1.1591414213180542,grad_norm: 0.9999997574050403, iteration: 18723
loss: 1.0240470170974731,grad_norm: 0.9999992792794354, iteration: 18724
loss: 1.005401372909546,grad_norm: 0.9999991335958247, iteration: 18725
loss: 1.00144624710083,grad_norm: 0.9999993824748614, iteration: 18726
loss: 1.0565571784973145,grad_norm: 0.9999997355626843, iteration: 18727
loss: 1.0247275829315186,grad_norm: 0.955999875334476, iteration: 18728
loss: 0.9748519659042358,grad_norm: 0.9999992923937756, iteration: 18729
loss: 0.9813060164451599,grad_norm: 0.9999990009797146, iteration: 18730
loss: 1.0231034755706787,grad_norm: 0.9999990503133637, iteration: 18731
loss: 0.988445520401001,grad_norm: 0.9999990939545634, iteration: 18732
loss: 0.9884295463562012,grad_norm: 0.9999992100716206, iteration: 18733
loss: 1.0057709217071533,grad_norm: 0.9999992447283719, iteration: 18734
loss: 1.0104972124099731,grad_norm: 0.9999991095947066, iteration: 18735
loss: 1.03278386592865,grad_norm: 0.99999919975149, iteration: 18736
loss: 0.9970018267631531,grad_norm: 0.9999992093520632, iteration: 18737
loss: 1.0431222915649414,grad_norm: 0.9282308521988728, iteration: 18738
loss: 1.0257227420806885,grad_norm: 0.9921377516706285, iteration: 18739
loss: 1.0507395267486572,grad_norm: 0.9821308404178449, iteration: 18740
loss: 1.054614543914795,grad_norm: 0.9999995672312793, iteration: 18741
loss: 1.023667573928833,grad_norm: 0.9999992331765181, iteration: 18742
loss: 1.004765272140503,grad_norm: 0.9999990614234812, iteration: 18743
loss: 1.0287398099899292,grad_norm: 0.9999991665778583, iteration: 18744
loss: 1.0427659749984741,grad_norm: 0.9565702271458342, iteration: 18745
loss: 1.0309003591537476,grad_norm: 0.9999996086200925, iteration: 18746
loss: 0.9966791868209839,grad_norm: 0.9480049371047492, iteration: 18747
loss: 0.9936622381210327,grad_norm: 0.999999168530129, iteration: 18748
loss: 1.0299601554870605,grad_norm: 0.9999994029465269, iteration: 18749
loss: 0.9692614078521729,grad_norm: 0.9999993244029245, iteration: 18750
loss: 1.03734290599823,grad_norm: 0.9999992555274738, iteration: 18751
loss: 1.0047787427902222,grad_norm: 0.999999291993697, iteration: 18752
loss: 1.0359433889389038,grad_norm: 0.9999992382144203, iteration: 18753
loss: 1.0014679431915283,grad_norm: 0.9582021217451607, iteration: 18754
loss: 1.0778573751449585,grad_norm: 0.9999989940409655, iteration: 18755
loss: 0.9973350763320923,grad_norm: 0.9999993523215336, iteration: 18756
loss: 1.007983684539795,grad_norm: 0.9999991269598079, iteration: 18757
loss: 1.044148325920105,grad_norm: 0.9999993031038669, iteration: 18758
loss: 1.026432752609253,grad_norm: 0.999998995830997, iteration: 18759
loss: 1.0194461345672607,grad_norm: 0.9999994374857926, iteration: 18760
loss: 1.047602653503418,grad_norm: 0.8932924564178872, iteration: 18761
loss: 0.9897878170013428,grad_norm: 0.9999991973143161, iteration: 18762
loss: 1.006771445274353,grad_norm: 0.9999991100198011, iteration: 18763
loss: 0.9531320333480835,grad_norm: 0.9999990438758352, iteration: 18764
loss: 1.0097968578338623,grad_norm: 0.9999992340364866, iteration: 18765
loss: 1.0318467617034912,grad_norm: 0.9999997949415766, iteration: 18766
loss: 0.978424072265625,grad_norm: 0.9999991504728242, iteration: 18767
loss: 1.073020339012146,grad_norm: 0.9999992552525883, iteration: 18768
loss: 0.978119969367981,grad_norm: 0.9999997322655565, iteration: 18769
loss: 0.9716407060623169,grad_norm: 0.9999991288489484, iteration: 18770
loss: 1.0100107192993164,grad_norm: 0.888819866129561, iteration: 18771
loss: 1.0687048435211182,grad_norm: 0.9999993122067241, iteration: 18772
loss: 0.9727692008018494,grad_norm: 0.9999990868411804, iteration: 18773
loss: 1.011244773864746,grad_norm: 0.9999995335410679, iteration: 18774
loss: 1.0494776964187622,grad_norm: 0.9999991103345746, iteration: 18775
loss: 1.002929449081421,grad_norm: 0.9999991523393067, iteration: 18776
loss: 1.0002981424331665,grad_norm: 0.9999991542411358, iteration: 18777
loss: 1.0117859840393066,grad_norm: 0.9561826937373098, iteration: 18778
loss: 1.0383005142211914,grad_norm: 0.9999992985481495, iteration: 18779
loss: 0.9928370714187622,grad_norm: 0.9999992048915173, iteration: 18780
loss: 1.0423634052276611,grad_norm: 0.8446100333672948, iteration: 18781
loss: 0.9687427282333374,grad_norm: 0.999998989698222, iteration: 18782
loss: 1.0292773246765137,grad_norm: 0.9999989966148587, iteration: 18783
loss: 1.023923397064209,grad_norm: 0.999999025364623, iteration: 18784
loss: 1.0131843090057373,grad_norm: 0.9653533115518879, iteration: 18785
loss: 0.9944509267807007,grad_norm: 0.9999990338709849, iteration: 18786
loss: 1.0379235744476318,grad_norm: 0.9155490227535683, iteration: 18787
loss: 0.9996899962425232,grad_norm: 0.9999990874442181, iteration: 18788
loss: 1.0403032302856445,grad_norm: 0.9999991291609386, iteration: 18789
loss: 1.0262280702590942,grad_norm: 0.9999991695812313, iteration: 18790
loss: 1.0259053707122803,grad_norm: 0.9999992104983036, iteration: 18791
loss: 1.015041708946228,grad_norm: 0.9999992356654571, iteration: 18792
loss: 1.022164225578308,grad_norm: 0.9999997303208714, iteration: 18793
loss: 0.9965852499008179,grad_norm: 0.9999992307591986, iteration: 18794
loss: 1.0223963260650635,grad_norm: 0.9999997291314633, iteration: 18795
loss: 1.042182207107544,grad_norm: 0.9999992670495063, iteration: 18796
loss: 1.0266032218933105,grad_norm: 0.9999995550712788, iteration: 18797
loss: 1.0203540325164795,grad_norm: 0.9999992852414333, iteration: 18798
loss: 1.0718367099761963,grad_norm: 0.9999996077945279, iteration: 18799
loss: 0.9979347586631775,grad_norm: 0.9999992540597624, iteration: 18800
loss: 1.110727071762085,grad_norm: 0.9999993214833168, iteration: 18801
loss: 1.0447163581848145,grad_norm: 0.9999991097219663, iteration: 18802
loss: 1.0588608980178833,grad_norm: 0.9999998013960799, iteration: 18803
loss: 0.9775591492652893,grad_norm: 0.9999993467404087, iteration: 18804
loss: 0.9909766316413879,grad_norm: 0.9999991184259186, iteration: 18805
loss: 1.0296190977096558,grad_norm: 0.9999994535933936, iteration: 18806
loss: 1.0884093046188354,grad_norm: 0.999999706947935, iteration: 18807
loss: 1.000769019126892,grad_norm: 0.9999992357035232, iteration: 18808
loss: 1.026492714881897,grad_norm: 0.9999994554176611, iteration: 18809
loss: 1.0611790418624878,grad_norm: 0.9999991127597941, iteration: 18810
loss: 1.0311754941940308,grad_norm: 0.9999992458976575, iteration: 18811
loss: 1.019210934638977,grad_norm: 0.9999994826027383, iteration: 18812
loss: 1.0031155347824097,grad_norm: 0.9999992182972426, iteration: 18813
loss: 1.0115628242492676,grad_norm: 0.9999994358829016, iteration: 18814
loss: 1.0024546384811401,grad_norm: 0.9211017375740161, iteration: 18815
loss: 1.0307024717330933,grad_norm: 0.9999993247060208, iteration: 18816
loss: 1.1847752332687378,grad_norm: 0.9999999812510882, iteration: 18817
loss: 0.9934666156768799,grad_norm: 0.9999999293493813, iteration: 18818
loss: 1.0312831401824951,grad_norm: 0.9999989785233384, iteration: 18819
loss: 1.0576599836349487,grad_norm: 0.9999999693772998, iteration: 18820
loss: 1.0255364179611206,grad_norm: 0.9732240854068634, iteration: 18821
loss: 1.029994010925293,grad_norm: 0.9999994032614635, iteration: 18822
loss: 1.06145179271698,grad_norm: 0.9999993397497796, iteration: 18823
loss: 0.9888600707054138,grad_norm: 0.9999990553777657, iteration: 18824
loss: 0.97591632604599,grad_norm: 0.9999992753084234, iteration: 18825
loss: 1.047380805015564,grad_norm: 0.9999992413833312, iteration: 18826
loss: 0.9727350473403931,grad_norm: 0.9676091303779608, iteration: 18827
loss: 1.0397546291351318,grad_norm: 0.9999993864020124, iteration: 18828
loss: 1.0075080394744873,grad_norm: 0.9999991841887784, iteration: 18829
loss: 1.031727910041809,grad_norm: 0.9999992564379264, iteration: 18830
loss: 1.0004140138626099,grad_norm: 0.9999991706107487, iteration: 18831
loss: 1.0443687438964844,grad_norm: 0.9999990724627995, iteration: 18832
loss: 1.0353193283081055,grad_norm: 0.9242300952107418, iteration: 18833
loss: 0.9720049500465393,grad_norm: 0.9999992981689381, iteration: 18834
loss: 1.0214481353759766,grad_norm: 0.9894003418180684, iteration: 18835
loss: 0.9990044236183167,grad_norm: 0.9999990044910603, iteration: 18836
loss: 1.0329391956329346,grad_norm: 0.999999672015338, iteration: 18837
loss: 1.0477231740951538,grad_norm: 0.9999991384574097, iteration: 18838
loss: 1.0016157627105713,grad_norm: 0.9999991715795004, iteration: 18839
loss: 1.026678442955017,grad_norm: 0.9999992722369209, iteration: 18840
loss: 1.0892071723937988,grad_norm: 0.9999996798228946, iteration: 18841
loss: 1.0984209775924683,grad_norm: 0.9999995349962655, iteration: 18842
loss: 1.0253690481185913,grad_norm: 0.9999992342951669, iteration: 18843
loss: 1.0208786725997925,grad_norm: 0.999998995630945, iteration: 18844
loss: 1.1239925622940063,grad_norm: 0.9999994173269418, iteration: 18845
loss: 1.0601733922958374,grad_norm: 0.9999993532469347, iteration: 18846
loss: 1.0202901363372803,grad_norm: 0.9999993955662807, iteration: 18847
loss: 1.0264579057693481,grad_norm: 0.9999997917938124, iteration: 18848
loss: 1.054055094718933,grad_norm: 0.9999994951642049, iteration: 18849
loss: 1.0468132495880127,grad_norm: 0.999999142573828, iteration: 18850
loss: 0.9960578083992004,grad_norm: 0.9999992365228427, iteration: 18851
loss: 1.0035992860794067,grad_norm: 0.9084771331821744, iteration: 18852
loss: 1.0095863342285156,grad_norm: 0.9999992903435148, iteration: 18853
loss: 1.0038187503814697,grad_norm: 0.9999992323380371, iteration: 18854
loss: 0.9987624883651733,grad_norm: 0.9623420240508191, iteration: 18855
loss: 1.0146771669387817,grad_norm: 0.8760972738555871, iteration: 18856
loss: 1.0788626670837402,grad_norm: 0.9999994362817759, iteration: 18857
loss: 0.9858572483062744,grad_norm: 0.9999992085688162, iteration: 18858
loss: 1.0123591423034668,grad_norm: 0.9999993748561988, iteration: 18859
loss: 1.0588828325271606,grad_norm: 0.9999997059203927, iteration: 18860
loss: 0.993939995765686,grad_norm: 0.9510340941014398, iteration: 18861
loss: 1.0443170070648193,grad_norm: 0.9960649223748272, iteration: 18862
loss: 1.0707347393035889,grad_norm: 0.9999998422298028, iteration: 18863
loss: 0.9955708980560303,grad_norm: 0.987459612346416, iteration: 18864
loss: 0.993709146976471,grad_norm: 0.9999996439442904, iteration: 18865
loss: 0.9823684096336365,grad_norm: 0.9999991739886614, iteration: 18866
loss: 1.0865657329559326,grad_norm: 0.9999998428369626, iteration: 18867
loss: 0.9641799330711365,grad_norm: 0.9999992192548135, iteration: 18868
loss: 1.0159891843795776,grad_norm: 0.9999997436764672, iteration: 18869
loss: 0.9986041188240051,grad_norm: 0.9999991669740117, iteration: 18870
loss: 1.0026922225952148,grad_norm: 0.9999992247364187, iteration: 18871
loss: 1.0106873512268066,grad_norm: 0.9999989763477535, iteration: 18872
loss: 0.9808660745620728,grad_norm: 0.9999992485232977, iteration: 18873
loss: 1.0024726390838623,grad_norm: 0.9999989660685179, iteration: 18874
loss: 0.9956878423690796,grad_norm: 0.9262391500100549, iteration: 18875
loss: 1.0137040615081787,grad_norm: 0.9999993024965832, iteration: 18876
loss: 1.0205379724502563,grad_norm: 0.9999991921741567, iteration: 18877
loss: 1.0027399063110352,grad_norm: 0.9999991727520308, iteration: 18878
loss: 1.0430803298950195,grad_norm: 0.8907239854516454, iteration: 18879
loss: 1.011095643043518,grad_norm: 0.999999210482049, iteration: 18880
loss: 1.021989345550537,grad_norm: 0.9999995254375967, iteration: 18881
loss: 0.9774210453033447,grad_norm: 0.9999991384452176, iteration: 18882
loss: 0.987833559513092,grad_norm: 0.9999990548420877, iteration: 18883
loss: 1.0840169191360474,grad_norm: 0.9999996292272344, iteration: 18884
loss: 1.0407168865203857,grad_norm: 0.9999992119217309, iteration: 18885
loss: 1.0212974548339844,grad_norm: 0.9999992381521355, iteration: 18886
loss: 0.9938113689422607,grad_norm: 0.9999993866249831, iteration: 18887
loss: 1.013102412223816,grad_norm: 0.9635884708628607, iteration: 18888
loss: 0.9434941411018372,grad_norm: 0.9999991824024188, iteration: 18889
loss: 0.9502277374267578,grad_norm: 0.9999990871093782, iteration: 18890
loss: 0.9971242547035217,grad_norm: 0.9532399358596455, iteration: 18891
loss: 0.9795072674751282,grad_norm: 0.9321314862382079, iteration: 18892
loss: 1.0046627521514893,grad_norm: 0.9999991021937861, iteration: 18893
loss: 0.9898508191108704,grad_norm: 0.9570992355329389, iteration: 18894
loss: 0.9814186692237854,grad_norm: 0.9999991920562783, iteration: 18895
loss: 1.0255012512207031,grad_norm: 0.9999990982192498, iteration: 18896
loss: 1.0427958965301514,grad_norm: 0.9999990767278151, iteration: 18897
loss: 1.0340806245803833,grad_norm: 0.9999990565659119, iteration: 18898
loss: 1.024442434310913,grad_norm: 0.9999992213134185, iteration: 18899
loss: 1.0456078052520752,grad_norm: 0.9999996904431689, iteration: 18900
loss: 1.009198546409607,grad_norm: 0.9999994060363494, iteration: 18901
loss: 1.0554355382919312,grad_norm: 0.9999992813368411, iteration: 18902
loss: 0.9863460063934326,grad_norm: 0.9453242654715527, iteration: 18903
loss: 0.9664464592933655,grad_norm: 0.9515039345710533, iteration: 18904
loss: 1.037848949432373,grad_norm: 0.9999997253707684, iteration: 18905
loss: 1.0089349746704102,grad_norm: 0.9999991025357615, iteration: 18906
loss: 1.030269980430603,grad_norm: 0.9999991605336392, iteration: 18907
loss: 0.9931953549385071,grad_norm: 0.9167135388912934, iteration: 18908
loss: 0.998487114906311,grad_norm: 0.9999991180023396, iteration: 18909
loss: 1.0109390020370483,grad_norm: 0.999999153735597, iteration: 18910
loss: 1.0097543001174927,grad_norm: 0.9686453676607972, iteration: 18911
loss: 1.0087249279022217,grad_norm: 0.999999222034939, iteration: 18912
loss: 1.0133978128433228,grad_norm: 0.860920381762884, iteration: 18913
loss: 0.9571148157119751,grad_norm: 0.9999992322662794, iteration: 18914
loss: 1.0333784818649292,grad_norm: 0.9999990666477238, iteration: 18915
loss: 1.0070490837097168,grad_norm: 0.9999995989187705, iteration: 18916
loss: 0.9812669157981873,grad_norm: 0.9711216381043367, iteration: 18917
loss: 1.000324010848999,grad_norm: 0.9999990582277034, iteration: 18918
loss: 1.0146701335906982,grad_norm: 0.9999995195516775, iteration: 18919
loss: 1.0265370607376099,grad_norm: 0.9999995354074134, iteration: 18920
loss: 1.0645514726638794,grad_norm: 0.999999119892913, iteration: 18921
loss: 1.0780911445617676,grad_norm: 0.9999997861698331, iteration: 18922
loss: 1.0407629013061523,grad_norm: 0.9999993548374012, iteration: 18923
loss: 1.0325145721435547,grad_norm: 0.9999995021978487, iteration: 18924
loss: 1.008429765701294,grad_norm: 0.9999994103302983, iteration: 18925
loss: 0.9850637912750244,grad_norm: 0.9999991181421511, iteration: 18926
loss: 0.9839602708816528,grad_norm: 0.9999992818592858, iteration: 18927
loss: 0.9908599853515625,grad_norm: 0.9999992842241343, iteration: 18928
loss: 1.0590177774429321,grad_norm: 0.9999992703246664, iteration: 18929
loss: 1.025283694267273,grad_norm: 0.9999992281168357, iteration: 18930
loss: 0.9925785660743713,grad_norm: 0.9570952075747952, iteration: 18931
loss: 1.0191144943237305,grad_norm: 0.9999992366540065, iteration: 18932
loss: 1.06557035446167,grad_norm: 0.9999995125591052, iteration: 18933
loss: 1.0518181324005127,grad_norm: 0.9999991304570174, iteration: 18934
loss: 1.0759309530258179,grad_norm: 0.9999997180856349, iteration: 18935
loss: 1.0160789489746094,grad_norm: 0.99999927570427, iteration: 18936
loss: 1.0318231582641602,grad_norm: 0.9999996029706634, iteration: 18937
loss: 1.0064054727554321,grad_norm: 0.9999992403563382, iteration: 18938
loss: 0.9716066122055054,grad_norm: 0.9999991106309523, iteration: 18939
loss: 1.0196619033813477,grad_norm: 0.9999993080358955, iteration: 18940
loss: 1.0265215635299683,grad_norm: 0.9999996543734133, iteration: 18941
loss: 1.0065792798995972,grad_norm: 0.9806260012019438, iteration: 18942
loss: 1.055132508277893,grad_norm: 0.9999993291477917, iteration: 18943
loss: 1.0539523363113403,grad_norm: 0.9999994299222287, iteration: 18944
loss: 0.9776887893676758,grad_norm: 0.9999992685929568, iteration: 18945
loss: 1.0303547382354736,grad_norm: 0.9999992171611336, iteration: 18946
loss: 1.0092408657073975,grad_norm: 0.9999991478486188, iteration: 18947
loss: 1.036582350730896,grad_norm: 0.999999209272611, iteration: 18948
loss: 1.0149836540222168,grad_norm: 0.9865262524851665, iteration: 18949
loss: 0.977034330368042,grad_norm: 0.9655695758321488, iteration: 18950
loss: 0.9783802628517151,grad_norm: 0.9999991895147862, iteration: 18951
loss: 1.0406804084777832,grad_norm: 0.9999993684762031, iteration: 18952
loss: 1.009721040725708,grad_norm: 0.999999099140383, iteration: 18953
loss: 1.0030932426452637,grad_norm: 0.9999992521507419, iteration: 18954
loss: 0.9781053066253662,grad_norm: 0.9999991024962341, iteration: 18955
loss: 1.0145750045776367,grad_norm: 0.9999991720531917, iteration: 18956
loss: 0.9900389313697815,grad_norm: 0.9999992819964288, iteration: 18957
loss: 1.0238806009292603,grad_norm: 0.9999993590012877, iteration: 18958
loss: 1.0383782386779785,grad_norm: 0.9986552017415822, iteration: 18959
loss: 0.9895277619361877,grad_norm: 0.9999990537154191, iteration: 18960
loss: 0.9826822876930237,grad_norm: 0.9254496594579722, iteration: 18961
loss: 1.026246190071106,grad_norm: 0.9999990256321166, iteration: 18962
loss: 0.9964587688446045,grad_norm: 0.9999990589998873, iteration: 18963
loss: 1.0249792337417603,grad_norm: 0.9999991652856254, iteration: 18964
loss: 0.9821003079414368,grad_norm: 0.9999990830195541, iteration: 18965
loss: 1.0137555599212646,grad_norm: 0.9999990074596775, iteration: 18966
loss: 1.020655632019043,grad_norm: 0.9999990895002435, iteration: 18967
loss: 1.0254908800125122,grad_norm: 0.9999993339802027, iteration: 18968
loss: 1.0036946535110474,grad_norm: 0.9999991184698087, iteration: 18969
loss: 1.0435131788253784,grad_norm: 0.9999992318881563, iteration: 18970
loss: 1.0021674633026123,grad_norm: 0.9999991398506481, iteration: 18971
loss: 0.9941503405570984,grad_norm: 0.9999991301323065, iteration: 18972
loss: 1.0385534763336182,grad_norm: 0.999999533695117, iteration: 18973
loss: 1.0230202674865723,grad_norm: 0.9999991434754077, iteration: 18974
loss: 0.990007758140564,grad_norm: 0.9999994671791498, iteration: 18975
loss: 0.9942387342453003,grad_norm: 0.9081383991128725, iteration: 18976
loss: 1.0887176990509033,grad_norm: 0.9999995960176572, iteration: 18977
loss: 1.0349162817001343,grad_norm: 0.8229666598591876, iteration: 18978
loss: 0.9970297813415527,grad_norm: 0.999999232886376, iteration: 18979
loss: 1.0469845533370972,grad_norm: 0.9999992667920359, iteration: 18980
loss: 1.0158538818359375,grad_norm: 0.999999139055766, iteration: 18981
loss: 0.9920342564582825,grad_norm: 0.9999997117278526, iteration: 18982
loss: 0.9920128583908081,grad_norm: 0.9999993728972613, iteration: 18983
loss: 1.0361018180847168,grad_norm: 0.9999990871372116, iteration: 18984
loss: 1.03956139087677,grad_norm: 0.9770317570271521, iteration: 18985
loss: 0.9786171913146973,grad_norm: 0.9999991038722335, iteration: 18986
loss: 1.072211742401123,grad_norm: 0.9999994338808873, iteration: 18987
loss: 1.0185199975967407,grad_norm: 0.8768821638939542, iteration: 18988
loss: 0.9913733601570129,grad_norm: 0.9999990703623811, iteration: 18989
loss: 1.0153582096099854,grad_norm: 0.9896445577806005, iteration: 18990
loss: 0.9765862226486206,grad_norm: 0.982842516141757, iteration: 18991
loss: 1.0600271224975586,grad_norm: 0.9519979396178192, iteration: 18992
loss: 1.0036489963531494,grad_norm: 0.8869343527520613, iteration: 18993
loss: 1.0317068099975586,grad_norm: 0.9134252861175556, iteration: 18994
loss: 1.006555438041687,grad_norm: 0.9999990631015204, iteration: 18995
loss: 1.0314605236053467,grad_norm: 0.9999992910446187, iteration: 18996
loss: 0.9710142016410828,grad_norm: 0.9999991636926245, iteration: 18997
loss: 1.0040777921676636,grad_norm: 0.9999991945811891, iteration: 18998
loss: 0.9992871880531311,grad_norm: 0.9999994890768643, iteration: 18999
loss: 1.0318853855133057,grad_norm: 0.9999993160429482, iteration: 19000
loss: 0.9831789135932922,grad_norm: 0.9999992256307126, iteration: 19001
loss: 0.9884754419326782,grad_norm: 0.9999992982296138, iteration: 19002
loss: 1.0079004764556885,grad_norm: 0.9999990486160366, iteration: 19003
loss: 0.9844008088111877,grad_norm: 0.8916732466280096, iteration: 19004
loss: 1.0227402448654175,grad_norm: 0.9999991082527576, iteration: 19005
loss: 1.061112403869629,grad_norm: 0.9999991065344596, iteration: 19006
loss: 1.026858925819397,grad_norm: 0.9999992102937535, iteration: 19007
loss: 0.9900794625282288,grad_norm: 0.9643564539332508, iteration: 19008
loss: 0.9925716519355774,grad_norm: 0.9999992237187222, iteration: 19009
loss: 0.9630921483039856,grad_norm: 0.9999990602948914, iteration: 19010
loss: 1.0168142318725586,grad_norm: 0.9999993045560006, iteration: 19011
loss: 1.005881905555725,grad_norm: 0.9999996865952687, iteration: 19012
loss: 0.9692224264144897,grad_norm: 0.9090035613460229, iteration: 19013
loss: 1.033525824546814,grad_norm: 0.9999994459036303, iteration: 19014
loss: 1.058320164680481,grad_norm: 0.9999994604284228, iteration: 19015
loss: 1.0244975090026855,grad_norm: 0.9999993513920143, iteration: 19016
loss: 0.9697147607803345,grad_norm: 0.9999991118534556, iteration: 19017
loss: 1.062776803970337,grad_norm: 0.9999998723671599, iteration: 19018
loss: 1.0200519561767578,grad_norm: 0.9999991672617559, iteration: 19019
loss: 0.9953056573867798,grad_norm: 0.949010181752383, iteration: 19020
loss: 1.0434238910675049,grad_norm: 0.9623538642974692, iteration: 19021
loss: 1.046354055404663,grad_norm: 0.9999992493440799, iteration: 19022
loss: 1.0298627614974976,grad_norm: 0.999999118626104, iteration: 19023
loss: 1.0313217639923096,grad_norm: 0.9999994540651941, iteration: 19024
loss: 1.0053541660308838,grad_norm: 0.9999992112010404, iteration: 19025
loss: 1.0148022174835205,grad_norm: 0.9999995085130449, iteration: 19026
loss: 1.0412607192993164,grad_norm: 0.9999996385020127, iteration: 19027
loss: 1.0239113569259644,grad_norm: 0.9159337331872295, iteration: 19028
loss: 1.0304853916168213,grad_norm: 0.9999991344649348, iteration: 19029
loss: 1.0526032447814941,grad_norm: 0.9999993944316734, iteration: 19030
loss: 0.9542844891548157,grad_norm: 0.9999990772356103, iteration: 19031
loss: 1.0051823854446411,grad_norm: 0.9999993077368797, iteration: 19032
loss: 1.010504961013794,grad_norm: 0.9999990609768452, iteration: 19033
loss: 0.9934569597244263,grad_norm: 0.9999992811187676, iteration: 19034
loss: 0.997225284576416,grad_norm: 0.9999993704780612, iteration: 19035
loss: 1.0288304090499878,grad_norm: 0.999999170919908, iteration: 19036
loss: 1.0331238508224487,grad_norm: 0.9999990530976296, iteration: 19037
loss: 1.1008713245391846,grad_norm: 0.9999999247358788, iteration: 19038
loss: 1.066821813583374,grad_norm: 0.9999995712060004, iteration: 19039
loss: 1.0090044736862183,grad_norm: 0.9999990344935908, iteration: 19040
loss: 1.0406712293624878,grad_norm: 0.9999991145158612, iteration: 19041
loss: 0.9759370684623718,grad_norm: 0.9999991126607876, iteration: 19042
loss: 1.0255944728851318,grad_norm: 0.9999993005478662, iteration: 19043
loss: 1.0587679147720337,grad_norm: 0.9999994366445508, iteration: 19044
loss: 0.9993453025817871,grad_norm: 0.9999990744724998, iteration: 19045
loss: 1.1053279638290405,grad_norm: 0.9999997185128122, iteration: 19046
loss: 1.0270729064941406,grad_norm: 0.9999994192039019, iteration: 19047
loss: 1.018239974975586,grad_norm: 0.9999989914080598, iteration: 19048
loss: 1.0257422924041748,grad_norm: 0.9999991022342772, iteration: 19049
loss: 1.017148494720459,grad_norm: 0.9999991196685801, iteration: 19050
loss: 1.0301166772842407,grad_norm: 0.9696885749364613, iteration: 19051
loss: 1.0053611993789673,grad_norm: 0.778282015486876, iteration: 19052
loss: 1.0159541368484497,grad_norm: 0.9208256849575607, iteration: 19053
loss: 1.0141927003860474,grad_norm: 0.9999990957961062, iteration: 19054
loss: 1.0842360258102417,grad_norm: 0.9999996336867554, iteration: 19055
loss: 1.0014020204544067,grad_norm: 0.9999991232324765, iteration: 19056
loss: 1.0051549673080444,grad_norm: 0.8886814847522325, iteration: 19057
loss: 0.985887348651886,grad_norm: 0.9999992648506302, iteration: 19058
loss: 1.0120536088943481,grad_norm: 0.9999991455515933, iteration: 19059
loss: 1.011345386505127,grad_norm: 0.9999992473964988, iteration: 19060
loss: 1.014389991760254,grad_norm: 0.9999992036848006, iteration: 19061
loss: 1.0407973527908325,grad_norm: 0.9999991906554099, iteration: 19062
loss: 1.0637367963790894,grad_norm: 0.9999991791073581, iteration: 19063
loss: 1.0700860023498535,grad_norm: 0.9999992747664324, iteration: 19064
loss: 1.0830210447311401,grad_norm: 0.9999991987202224, iteration: 19065
loss: 1.028995394706726,grad_norm: 0.9999990634880197, iteration: 19066
loss: 1.047403335571289,grad_norm: 0.9999991508122562, iteration: 19067
loss: 1.1236366033554077,grad_norm: 0.9999994885618294, iteration: 19068
loss: 1.008633017539978,grad_norm: 0.9999992656474418, iteration: 19069
loss: 1.0075318813323975,grad_norm: 0.9327306171636396, iteration: 19070
loss: 0.9921544790267944,grad_norm: 0.9441707441095282, iteration: 19071
loss: 1.0183600187301636,grad_norm: 0.9999993910547219, iteration: 19072
loss: 1.028920292854309,grad_norm: 0.9999991333672003, iteration: 19073
loss: 1.022614598274231,grad_norm: 0.9999992576097552, iteration: 19074
loss: 1.0299166440963745,grad_norm: 0.9999991365518252, iteration: 19075
loss: 0.987119197845459,grad_norm: 0.9999990809362476, iteration: 19076
loss: 1.0537285804748535,grad_norm: 0.9999994892451758, iteration: 19077
loss: 1.0609338283538818,grad_norm: 0.9999993478538562, iteration: 19078
loss: 1.0464599132537842,grad_norm: 0.9999997818718386, iteration: 19079
loss: 0.9821712374687195,grad_norm: 0.9999997647197781, iteration: 19080
loss: 1.0055049657821655,grad_norm: 0.9814101207620828, iteration: 19081
loss: 0.9779995679855347,grad_norm: 0.9999991759572009, iteration: 19082
loss: 1.031294584274292,grad_norm: 0.9999990152752899, iteration: 19083
loss: 0.9907069802284241,grad_norm: 0.9481336701914359, iteration: 19084
loss: 1.0331196784973145,grad_norm: 0.9999991549887912, iteration: 19085
loss: 1.0239908695220947,grad_norm: 0.9999992254552337, iteration: 19086
loss: 1.0159082412719727,grad_norm: 0.8737485550860916, iteration: 19087
loss: 1.0257328748703003,grad_norm: 0.9654386952602255, iteration: 19088
loss: 1.0330981016159058,grad_norm: 0.9999990864230266, iteration: 19089
loss: 1.0572800636291504,grad_norm: 0.9999997052517059, iteration: 19090
loss: 1.006378412246704,grad_norm: 0.9999995639039198, iteration: 19091
loss: 1.0357619524002075,grad_norm: 0.9999991771076113, iteration: 19092
loss: 1.00362229347229,grad_norm: 0.9999991709504419, iteration: 19093
loss: 0.9986329674720764,grad_norm: 0.9999990518422363, iteration: 19094
loss: 1.1222566366195679,grad_norm: 0.9999992836375898, iteration: 19095
loss: 1.017803430557251,grad_norm: 0.9999994150572619, iteration: 19096
loss: 1.100416660308838,grad_norm: 0.9999993986546527, iteration: 19097
loss: 1.006550669670105,grad_norm: 0.8986539204543629, iteration: 19098
loss: 1.0341848134994507,grad_norm: 0.9999991391161521, iteration: 19099
loss: 1.0222216844558716,grad_norm: 0.99999917838403, iteration: 19100
loss: 1.0610623359680176,grad_norm: 0.9999994832675958, iteration: 19101
loss: 1.036217212677002,grad_norm: 0.9999995524631465, iteration: 19102
loss: 0.9677789211273193,grad_norm: 0.9999991328537825, iteration: 19103
loss: 1.0070858001708984,grad_norm: 0.9843122436209482, iteration: 19104
loss: 1.0428932905197144,grad_norm: 0.9999991261862888, iteration: 19105
loss: 1.0096040964126587,grad_norm: 0.9999993361171219, iteration: 19106
loss: 0.9874197244644165,grad_norm: 0.9999990223435984, iteration: 19107
loss: 1.0005009174346924,grad_norm: 0.9905971950906955, iteration: 19108
loss: 1.0386412143707275,grad_norm: 0.941763172392284, iteration: 19109
loss: 1.025692105293274,grad_norm: 0.9999992393445756, iteration: 19110
loss: 1.0268176794052124,grad_norm: 0.9916783894035407, iteration: 19111
loss: 1.0786610841751099,grad_norm: 0.9999997147774746, iteration: 19112
loss: 0.9801591038703918,grad_norm: 0.9793220391870008, iteration: 19113
loss: 0.9843304753303528,grad_norm: 0.9310312488102928, iteration: 19114
loss: 1.0100090503692627,grad_norm: 0.9999991852397767, iteration: 19115
loss: 1.0271861553192139,grad_norm: 0.9999993794815214, iteration: 19116
loss: 1.0415602922439575,grad_norm: 0.9999991143175817, iteration: 19117
loss: 1.0191422700881958,grad_norm: 0.9999992869624615, iteration: 19118
loss: 1.0350457429885864,grad_norm: 0.9999998631421313, iteration: 19119
loss: 0.9995595812797546,grad_norm: 0.9889991148091037, iteration: 19120
loss: 1.0386897325515747,grad_norm: 0.9999997010078595, iteration: 19121
loss: 0.9959761500358582,grad_norm: 0.9999990482290739, iteration: 19122
loss: 0.9773752689361572,grad_norm: 0.9999990542700548, iteration: 19123
loss: 1.043066143989563,grad_norm: 0.9999991017721374, iteration: 19124
loss: 1.066348910331726,grad_norm: 0.9999991590735235, iteration: 19125
loss: 1.0043511390686035,grad_norm: 0.9999992317614919, iteration: 19126
loss: 1.010175347328186,grad_norm: 0.999999140492461, iteration: 19127
loss: 0.9856685400009155,grad_norm: 0.9999990290303473, iteration: 19128
loss: 1.0005205869674683,grad_norm: 0.9999992483834681, iteration: 19129
loss: 1.0034465789794922,grad_norm: 0.9999995107064346, iteration: 19130
loss: 1.0213255882263184,grad_norm: 0.9999991188057746, iteration: 19131
loss: 1.0798935890197754,grad_norm: 0.9999995194720298, iteration: 19132
loss: 1.0041457414627075,grad_norm: 0.9999994142829358, iteration: 19133
loss: 1.0274038314819336,grad_norm: 0.9999992720330019, iteration: 19134
loss: 1.0951546430587769,grad_norm: 0.9999995248611424, iteration: 19135
loss: 1.0484336614608765,grad_norm: 0.9999996808746451, iteration: 19136
loss: 1.0029315948486328,grad_norm: 0.9999991242917852, iteration: 19137
loss: 1.0176775455474854,grad_norm: 0.9999992991056119, iteration: 19138
loss: 0.9720746278762817,grad_norm: 0.9999995874831742, iteration: 19139
loss: 1.0155227184295654,grad_norm: 0.9999993603323231, iteration: 19140
loss: 1.039971947669983,grad_norm: 0.9999993121560731, iteration: 19141
loss: 1.1411596536636353,grad_norm: 0.9999997007972972, iteration: 19142
loss: 1.005890130996704,grad_norm: 0.942660455764098, iteration: 19143
loss: 1.1009942293167114,grad_norm: 0.9999994024123826, iteration: 19144
loss: 0.9926668405532837,grad_norm: 0.9999990856631982, iteration: 19145
loss: 0.9870911836624146,grad_norm: 0.9999991716562764, iteration: 19146
loss: 1.015830159187317,grad_norm: 0.9999991620435689, iteration: 19147
loss: 1.0136868953704834,grad_norm: 0.8970561984625793, iteration: 19148
loss: 1.0420105457305908,grad_norm: 0.9999991105956767, iteration: 19149
loss: 1.038426399230957,grad_norm: 0.976826093500692, iteration: 19150
loss: 0.9975040555000305,grad_norm: 0.9999992579240796, iteration: 19151
loss: 1.0950058698654175,grad_norm: 0.9999996982710121, iteration: 19152
loss: 0.9940851926803589,grad_norm: 0.9999996473091362, iteration: 19153
loss: 1.0042195320129395,grad_norm: 0.9999994623948574, iteration: 19154
loss: 1.092808723449707,grad_norm: 0.9999996651198636, iteration: 19155
loss: 1.024427890777588,grad_norm: 0.8734217372257395, iteration: 19156
loss: 0.9792188405990601,grad_norm: 0.9999991302387877, iteration: 19157
loss: 1.0161551237106323,grad_norm: 0.993141946225618, iteration: 19158
loss: 1.0240845680236816,grad_norm: 0.9999995785039419, iteration: 19159
loss: 1.1121655702590942,grad_norm: 0.9999995195629421, iteration: 19160
loss: 1.0670467615127563,grad_norm: 0.9999993057706739, iteration: 19161
loss: 1.0425994396209717,grad_norm: 0.999999453500451, iteration: 19162
loss: 0.9894570708274841,grad_norm: 0.9809256353825802, iteration: 19163
loss: 1.0636216402053833,grad_norm: 0.9999993008630863, iteration: 19164
loss: 1.0220122337341309,grad_norm: 0.999999321787084, iteration: 19165
loss: 0.9964027404785156,grad_norm: 0.8780712532096954, iteration: 19166
loss: 1.0345203876495361,grad_norm: 0.9999992516205959, iteration: 19167
loss: 1.0410854816436768,grad_norm: 0.9999993220647843, iteration: 19168
loss: 1.0636245012283325,grad_norm: 0.9999996557753785, iteration: 19169
loss: 1.052661418914795,grad_norm: 0.9999995513096657, iteration: 19170
loss: 1.0680731534957886,grad_norm: 0.9999994119074677, iteration: 19171
loss: 1.029909610748291,grad_norm: 0.9999990069023422, iteration: 19172
loss: 1.0327110290527344,grad_norm: 0.9999994931864131, iteration: 19173
loss: 1.0454189777374268,grad_norm: 0.9999992009505799, iteration: 19174
loss: 1.0448830127716064,grad_norm: 0.99999915576404, iteration: 19175
loss: 1.0241150856018066,grad_norm: 0.9999990258995755, iteration: 19176
loss: 0.9850966334342957,grad_norm: 0.9999990975938924, iteration: 19177
loss: 1.0261175632476807,grad_norm: 0.9999989876830078, iteration: 19178
loss: 1.0590274333953857,grad_norm: 0.999999822830412, iteration: 19179
loss: 1.030298113822937,grad_norm: 0.9999997614952276, iteration: 19180
loss: 1.039738416671753,grad_norm: 0.9999993737220351, iteration: 19181
loss: 1.0746163129806519,grad_norm: 0.9999993602599134, iteration: 19182
loss: 1.0462511777877808,grad_norm: 0.9999992783587508, iteration: 19183
loss: 1.033020257949829,grad_norm: 0.9999992322767552, iteration: 19184
loss: 1.0301430225372314,grad_norm: 0.8982655472758532, iteration: 19185
loss: 0.9807245135307312,grad_norm: 0.9999997170304329, iteration: 19186
loss: 1.045448660850525,grad_norm: 0.9999991199533983, iteration: 19187
loss: 0.9809966683387756,grad_norm: 0.9999991363548608, iteration: 19188
loss: 1.0777748823165894,grad_norm: 0.9999996507779156, iteration: 19189
loss: 1.019537091255188,grad_norm: 0.9999994707725953, iteration: 19190
loss: 1.1171144247055054,grad_norm: 0.9999997175897511, iteration: 19191
loss: 1.037717342376709,grad_norm: 0.9999997401077184, iteration: 19192
loss: 1.0054630041122437,grad_norm: 0.9224754805395549, iteration: 19193
loss: 0.9914861917495728,grad_norm: 0.9476871800313923, iteration: 19194
loss: 1.0104631185531616,grad_norm: 0.899321234060159, iteration: 19195
loss: 1.0657488107681274,grad_norm: 0.9999991457623241, iteration: 19196
loss: 0.9987818002700806,grad_norm: 0.9257287407726562, iteration: 19197
loss: 0.9962959289550781,grad_norm: 0.9999996376508588, iteration: 19198
loss: 1.007331371307373,grad_norm: 0.9999994271235697, iteration: 19199
loss: 0.9827771186828613,grad_norm: 0.9999996606513802, iteration: 19200
loss: 1.0233957767486572,grad_norm: 0.9259282295127393, iteration: 19201
loss: 1.079585313796997,grad_norm: 0.9999995387049141, iteration: 19202
loss: 1.023579716682434,grad_norm: 0.9999991980652583, iteration: 19203
loss: 0.9880511164665222,grad_norm: 0.9999999171429045, iteration: 19204
loss: 1.012984037399292,grad_norm: 0.9999995084518437, iteration: 19205
loss: 1.0892075300216675,grad_norm: 0.9999999400048878, iteration: 19206
loss: 0.9952877163887024,grad_norm: 0.9999992972867459, iteration: 19207
loss: 1.0571235418319702,grad_norm: 0.9999995453734457, iteration: 19208
loss: 1.0372024774551392,grad_norm: 0.9999992752917283, iteration: 19209
loss: 1.0764644145965576,grad_norm: 0.9999996487021648, iteration: 19210
loss: 1.0237436294555664,grad_norm: 0.9999991689684469, iteration: 19211
loss: 1.0309065580368042,grad_norm: 0.9999992192751018, iteration: 19212
loss: 1.035045862197876,grad_norm: 0.9999994024653219, iteration: 19213
loss: 1.0483022928237915,grad_norm: 0.9999998024949709, iteration: 19214
loss: 1.035218358039856,grad_norm: 0.9999992786930572, iteration: 19215
loss: 0.9720680713653564,grad_norm: 0.999999127469387, iteration: 19216
loss: 1.0726203918457031,grad_norm: 0.9999994477611145, iteration: 19217
loss: 1.0211901664733887,grad_norm: 0.9999991770691157, iteration: 19218
loss: 0.9901792407035828,grad_norm: 0.9999991226581727, iteration: 19219
loss: 1.0351026058197021,grad_norm: 0.9999991265394778, iteration: 19220
loss: 1.0176299810409546,grad_norm: 0.9999991994185645, iteration: 19221
loss: 1.0576109886169434,grad_norm: 0.9497462034698684, iteration: 19222
loss: 1.0252958536148071,grad_norm: 0.9999991046190744, iteration: 19223
loss: 1.0505841970443726,grad_norm: 0.9999994694653972, iteration: 19224
loss: 1.0272011756896973,grad_norm: 0.9933177449676659, iteration: 19225
loss: 0.9959276914596558,grad_norm: 0.9314873222175106, iteration: 19226
loss: 1.0693868398666382,grad_norm: 0.9999996193683781, iteration: 19227
loss: 1.054430365562439,grad_norm: 0.9999992344137916, iteration: 19228
loss: 1.1006783246994019,grad_norm: 0.9999995640061125, iteration: 19229
loss: 0.9849227070808411,grad_norm: 0.9999991933026889, iteration: 19230
loss: 1.0084298849105835,grad_norm: 0.8501648876467953, iteration: 19231
loss: 0.9919146299362183,grad_norm: 0.9999992095155881, iteration: 19232
loss: 1.0113866329193115,grad_norm: 0.9999990724803212, iteration: 19233
loss: 1.0210109949111938,grad_norm: 0.9999992608756672, iteration: 19234
loss: 1.0587373971939087,grad_norm: 0.9999994366678966, iteration: 19235
loss: 0.9945785403251648,grad_norm: 0.9999992088747364, iteration: 19236
loss: 0.9833717942237854,grad_norm: 0.9999991218617766, iteration: 19237
loss: 1.0826125144958496,grad_norm: 0.9999996403247468, iteration: 19238
loss: 1.026219129562378,grad_norm: 0.9999997896601274, iteration: 19239
loss: 1.0217362642288208,grad_norm: 0.9999996912884306, iteration: 19240
loss: 1.0358786582946777,grad_norm: 0.9999997612676121, iteration: 19241
loss: 1.012589693069458,grad_norm: 0.946387582541277, iteration: 19242
loss: 1.0504212379455566,grad_norm: 0.999999206680174, iteration: 19243
loss: 1.0108474493026733,grad_norm: 0.9925609469714577, iteration: 19244
loss: 0.9860970377922058,grad_norm: 0.9999996518092952, iteration: 19245
loss: 1.0084502696990967,grad_norm: 0.999999337387064, iteration: 19246
loss: 1.0269156694412231,grad_norm: 0.9999996120042781, iteration: 19247
loss: 0.9961987733840942,grad_norm: 0.9999992421061082, iteration: 19248
loss: 0.9597451686859131,grad_norm: 0.9920065724870601, iteration: 19249
loss: 1.0460397005081177,grad_norm: 0.9999992960393703, iteration: 19250
loss: 1.0179228782653809,grad_norm: 0.9031298903737406, iteration: 19251
loss: 1.0211445093154907,grad_norm: 0.9999991537641286, iteration: 19252
loss: 1.019303321838379,grad_norm: 0.999999266667551, iteration: 19253
loss: 0.9947202801704407,grad_norm: 0.9999992283134523, iteration: 19254
loss: 0.9939730763435364,grad_norm: 0.9999994662645013, iteration: 19255
loss: 0.9952239990234375,grad_norm: 0.9999992170911882, iteration: 19256
loss: 1.0007389783859253,grad_norm: 0.9999992078019977, iteration: 19257
loss: 1.152374505996704,grad_norm: 0.9999997476890189, iteration: 19258
loss: 1.030963659286499,grad_norm: 0.929862167538562, iteration: 19259
loss: 1.0514132976531982,grad_norm: 0.9999994966586682, iteration: 19260
loss: 1.0094960927963257,grad_norm: 0.9999994686687038, iteration: 19261
loss: 0.9753435850143433,grad_norm: 0.9999991604213583, iteration: 19262
loss: 1.0244107246398926,grad_norm: 0.9999991347624015, iteration: 19263
loss: 0.9977428913116455,grad_norm: 0.9999996597835334, iteration: 19264
loss: 0.9936625957489014,grad_norm: 0.9999996348009796, iteration: 19265
loss: 1.0368326902389526,grad_norm: 0.999999584306301, iteration: 19266
loss: 0.9958015084266663,grad_norm: 0.8969331199866001, iteration: 19267
loss: 1.0229310989379883,grad_norm: 0.9999993298095283, iteration: 19268
loss: 1.0632545948028564,grad_norm: 0.9999997833154115, iteration: 19269
loss: 1.0205738544464111,grad_norm: 0.9999993792179553, iteration: 19270
loss: 1.0321918725967407,grad_norm: 0.9999993037183013, iteration: 19271
loss: 1.0424860715866089,grad_norm: 0.9999995911710842, iteration: 19272
loss: 1.0203657150268555,grad_norm: 0.9999992792569903, iteration: 19273
loss: 0.9927685260772705,grad_norm: 0.999999442381154, iteration: 19274
loss: 0.9939731359481812,grad_norm: 0.9999991607546117, iteration: 19275
loss: 0.9973194599151611,grad_norm: 0.9999991909729689, iteration: 19276
loss: 1.0098658800125122,grad_norm: 0.9683981011575556, iteration: 19277
loss: 1.0267412662506104,grad_norm: 0.9999992226356159, iteration: 19278
loss: 1.061050534248352,grad_norm: 0.9999997432262959, iteration: 19279
loss: 1.043751835823059,grad_norm: 0.9999992312385771, iteration: 19280
loss: 1.0020583868026733,grad_norm: 0.9668925087985978, iteration: 19281
loss: 0.9490808844566345,grad_norm: 0.9999992083803251, iteration: 19282
loss: 1.106603741645813,grad_norm: 0.9999997772747865, iteration: 19283
loss: 1.0237783193588257,grad_norm: 0.9999992325336401, iteration: 19284
loss: 1.0110615491867065,grad_norm: 0.9999994396601358, iteration: 19285
loss: 1.0733487606048584,grad_norm: 0.9999997948043523, iteration: 19286
loss: 1.0106480121612549,grad_norm: 0.9999992750594291, iteration: 19287
loss: 1.0880900621414185,grad_norm: 0.9999994153575716, iteration: 19288
loss: 1.193070650100708,grad_norm: 0.9999999086547453, iteration: 19289
loss: 1.035994291305542,grad_norm: 0.9999992710152337, iteration: 19290
loss: 1.0183162689208984,grad_norm: 0.9999989915067514, iteration: 19291
loss: 1.023029088973999,grad_norm: 0.9999994792783674, iteration: 19292
loss: 1.0102393627166748,grad_norm: 0.9999995466089483, iteration: 19293
loss: 1.0214165449142456,grad_norm: 0.999999070257867, iteration: 19294
loss: 0.9873253703117371,grad_norm: 0.999999181829758, iteration: 19295
loss: 1.0123002529144287,grad_norm: 0.9999992349211073, iteration: 19296
loss: 1.0695207118988037,grad_norm: 0.9999998482769072, iteration: 19297
loss: 1.082312822341919,grad_norm: 0.9999997158949111, iteration: 19298
loss: 1.0145925283432007,grad_norm: 0.9999995850992833, iteration: 19299
loss: 1.0701091289520264,grad_norm: 0.9999992579175402, iteration: 19300
loss: 1.0716687440872192,grad_norm: 0.9999997788336894, iteration: 19301
loss: 1.0049868822097778,grad_norm: 0.9999990790488678, iteration: 19302
loss: 0.9841635227203369,grad_norm: 0.9999992323571025, iteration: 19303
loss: 1.0077722072601318,grad_norm: 0.9999993865597966, iteration: 19304
loss: 1.0036201477050781,grad_norm: 0.9999990372626959, iteration: 19305
loss: 1.0070710182189941,grad_norm: 0.9999996468438663, iteration: 19306
loss: 0.9861431121826172,grad_norm: 0.9999991590323403, iteration: 19307
loss: 0.9691985249519348,grad_norm: 0.9999991244369728, iteration: 19308
loss: 1.0825064182281494,grad_norm: 0.9999993851619903, iteration: 19309
loss: 1.0491806268692017,grad_norm: 0.9999995622645457, iteration: 19310
loss: 1.1062818765640259,grad_norm: 0.9999995434452952, iteration: 19311
loss: 1.0353403091430664,grad_norm: 0.9999996950343255, iteration: 19312
loss: 1.0033897161483765,grad_norm: 0.9775152253741766, iteration: 19313
loss: 1.00897216796875,grad_norm: 0.9999992184879908, iteration: 19314
loss: 1.0430049896240234,grad_norm: 0.9999990960185975, iteration: 19315
loss: 1.0244470834732056,grad_norm: 0.9999991262336038, iteration: 19316
loss: 1.0072832107543945,grad_norm: 0.9999990184179898, iteration: 19317
loss: 0.9703115820884705,grad_norm: 0.9999998259643085, iteration: 19318
loss: 1.0447146892547607,grad_norm: 0.9999994165274068, iteration: 19319
loss: 1.0204566717147827,grad_norm: 0.9999992270718959, iteration: 19320
loss: 1.0461148023605347,grad_norm: 0.9999990559699946, iteration: 19321
loss: 1.0180928707122803,grad_norm: 0.9999991944689168, iteration: 19322
loss: 1.013875126838684,grad_norm: 0.9999991736347716, iteration: 19323
loss: 0.9720817804336548,grad_norm: 0.9999991475378657, iteration: 19324
loss: 1.0185887813568115,grad_norm: 0.9810902121169739, iteration: 19325
loss: 1.026218056678772,grad_norm: 0.9999991011676344, iteration: 19326
loss: 1.0059212446212769,grad_norm: 0.9999992352073889, iteration: 19327
loss: 0.9916991591453552,grad_norm: 0.9999995013423167, iteration: 19328
loss: 1.1126773357391357,grad_norm: 0.999999805986913, iteration: 19329
loss: 1.0081322193145752,grad_norm: 0.9999994205505303, iteration: 19330
loss: 1.0260565280914307,grad_norm: 0.9999992000751373, iteration: 19331
loss: 1.0394502878189087,grad_norm: 0.9999995002970585, iteration: 19332
loss: 1.008590579032898,grad_norm: 0.9999992846018276, iteration: 19333
loss: 1.032381534576416,grad_norm: 0.9999997268780176, iteration: 19334
loss: 0.9419749975204468,grad_norm: 0.999999180045511, iteration: 19335
loss: 1.0444449186325073,grad_norm: 0.9999993597007278, iteration: 19336
loss: 0.9897773861885071,grad_norm: 0.9999991726557871, iteration: 19337
loss: 1.0407164096832275,grad_norm: 0.8576452498616807, iteration: 19338
loss: 1.0101101398468018,grad_norm: 0.9999991921754001, iteration: 19339
loss: 0.9707973003387451,grad_norm: 0.9999992402610574, iteration: 19340
loss: 1.0979864597320557,grad_norm: 0.999999931610171, iteration: 19341
loss: 1.0180858373641968,grad_norm: 0.9999993476459587, iteration: 19342
loss: 1.002742052078247,grad_norm: 0.8480755740976191, iteration: 19343
loss: 0.9613075256347656,grad_norm: 0.9999993383332731, iteration: 19344
loss: 1.0097383260726929,grad_norm: 0.9999993272226566, iteration: 19345
loss: 1.065142035484314,grad_norm: 0.9999993116987329, iteration: 19346
loss: 1.0210546255111694,grad_norm: 0.9553595533167692, iteration: 19347
loss: 1.0320078134536743,grad_norm: 0.9999991399959324, iteration: 19348
loss: 1.022108554840088,grad_norm: 0.9999993331155985, iteration: 19349
loss: 1.0033879280090332,grad_norm: 0.9999991328254809, iteration: 19350
loss: 1.037989854812622,grad_norm: 0.9999992342201196, iteration: 19351
loss: 0.9830989837646484,grad_norm: 0.9999996170715277, iteration: 19352
loss: 1.022015929222107,grad_norm: 0.9518744938022435, iteration: 19353
loss: 1.0380724668502808,grad_norm: 0.9265699412985167, iteration: 19354
loss: 0.9864864945411682,grad_norm: 0.9999994483124273, iteration: 19355
loss: 1.0285661220550537,grad_norm: 0.9999992786594669, iteration: 19356
loss: 1.0168547630310059,grad_norm: 0.9999996616228615, iteration: 19357
loss: 0.9904588460922241,grad_norm: 0.9617535123696008, iteration: 19358
loss: 1.0578659772872925,grad_norm: 0.9999990508489501, iteration: 19359
loss: 0.9844644665718079,grad_norm: 0.9999990272586446, iteration: 19360
loss: 1.0103600025177002,grad_norm: 0.9999990900053295, iteration: 19361
loss: 0.9885445833206177,grad_norm: 0.9999990416001449, iteration: 19362
loss: 1.070631980895996,grad_norm: 0.9999996230141828, iteration: 19363
loss: 0.9714498519897461,grad_norm: 0.9592052187190955, iteration: 19364
loss: 1.075117588043213,grad_norm: 0.999999513300663, iteration: 19365
loss: 1.0289660692214966,grad_norm: 0.9999990579714343, iteration: 19366
loss: 0.9908987879753113,grad_norm: 0.999999193653424, iteration: 19367
loss: 1.018574595451355,grad_norm: 0.8787515347856345, iteration: 19368
loss: 0.9989411234855652,grad_norm: 0.9999990393253557, iteration: 19369
loss: 1.01273775100708,grad_norm: 0.9999995963700802, iteration: 19370
loss: 1.007878303527832,grad_norm: 0.999999181379274, iteration: 19371
loss: 1.0158613920211792,grad_norm: 0.9999991182837821, iteration: 19372
loss: 1.0266302824020386,grad_norm: 0.9999990132895801, iteration: 19373
loss: 1.0223480463027954,grad_norm: 0.9085106222921167, iteration: 19374
loss: 1.0164241790771484,grad_norm: 0.9999994423364003, iteration: 19375
loss: 1.1052680015563965,grad_norm: 0.9999995964008543, iteration: 19376
loss: 1.017809510231018,grad_norm: 0.9999991752315502, iteration: 19377
loss: 1.0223084688186646,grad_norm: 0.9999992711824911, iteration: 19378
loss: 1.0390914678573608,grad_norm: 0.9999990295818446, iteration: 19379
loss: 1.0457385778427124,grad_norm: 0.9999996493326899, iteration: 19380
loss: 1.010520577430725,grad_norm: 0.9999993314738351, iteration: 19381
loss: 1.0033921003341675,grad_norm: 0.9999992444130527, iteration: 19382
loss: 0.986624538898468,grad_norm: 0.9999990625310698, iteration: 19383
loss: 0.9954104423522949,grad_norm: 0.9999992786363786, iteration: 19384
loss: 1.004920244216919,grad_norm: 0.9999992455905736, iteration: 19385
loss: 1.0917445421218872,grad_norm: 0.9999997061797219, iteration: 19386
loss: 1.0150885581970215,grad_norm: 0.9999994092254494, iteration: 19387
loss: 1.049222707748413,grad_norm: 0.9999998371427731, iteration: 19388
loss: 1.0561238527297974,grad_norm: 0.9999990860097923, iteration: 19389
loss: 1.0750960111618042,grad_norm: 0.9999994591646021, iteration: 19390
loss: 1.0145961046218872,grad_norm: 0.9999992587094559, iteration: 19391
loss: 1.010436773300171,grad_norm: 0.9999991989324006, iteration: 19392
loss: 1.0431855916976929,grad_norm: 0.9999995642241103, iteration: 19393
loss: 1.06550931930542,grad_norm: 0.9999993363074257, iteration: 19394
loss: 1.0233234167099,grad_norm: 0.9999992147827643, iteration: 19395
loss: 1.0476853847503662,grad_norm: 0.9999991253122325, iteration: 19396
loss: 1.0274534225463867,grad_norm: 0.999999047298618, iteration: 19397
loss: 1.0215213298797607,grad_norm: 0.9999996538142784, iteration: 19398
loss: 1.0162726640701294,grad_norm: 0.9999990533327352, iteration: 19399
loss: 1.0191826820373535,grad_norm: 0.9999993512724181, iteration: 19400
loss: 0.9976236820220947,grad_norm: 0.9937479028353781, iteration: 19401
loss: 1.0080204010009766,grad_norm: 0.9999997001892942, iteration: 19402
loss: 0.992675244808197,grad_norm: 0.9999992393341536, iteration: 19403
loss: 1.0318117141723633,grad_norm: 0.9011733947496354, iteration: 19404
loss: 1.0223485231399536,grad_norm: 0.999999406834281, iteration: 19405
loss: 1.0241397619247437,grad_norm: 0.9999992263153129, iteration: 19406
loss: 0.9906100034713745,grad_norm: 0.9999993283769032, iteration: 19407
loss: 1.0329047441482544,grad_norm: 0.9999995818412605, iteration: 19408
loss: 1.1164770126342773,grad_norm: 0.999999537393514, iteration: 19409
loss: 1.0776807069778442,grad_norm: 0.9999994904686736, iteration: 19410
loss: 0.9942949414253235,grad_norm: 0.9111485517755289, iteration: 19411
loss: 1.005958914756775,grad_norm: 0.9999989789957452, iteration: 19412
loss: 1.0580217838287354,grad_norm: 0.9999992592540101, iteration: 19413
loss: 1.0113657712936401,grad_norm: 0.9999992395250624, iteration: 19414
loss: 0.9983381032943726,grad_norm: 0.9999990764697355, iteration: 19415
loss: 1.03681218624115,grad_norm: 0.9999991653847813, iteration: 19416
loss: 0.9985122084617615,grad_norm: 0.9999993534198929, iteration: 19417
loss: 0.959341287612915,grad_norm: 0.9999991846718594, iteration: 19418
loss: 1.0623661279678345,grad_norm: 0.9999990662039197, iteration: 19419
loss: 0.9882963299751282,grad_norm: 0.999999064823915, iteration: 19420
loss: 1.022748351097107,grad_norm: 0.999999376474862, iteration: 19421
loss: 1.0098494291305542,grad_norm: 0.9999991445963068, iteration: 19422
loss: 1.0319985151290894,grad_norm: 0.999999144700154, iteration: 19423
loss: 0.9545258283615112,grad_norm: 0.9999992419368949, iteration: 19424
loss: 0.9879741072654724,grad_norm: 0.9999993691063963, iteration: 19425
loss: 0.9755271077156067,grad_norm: 0.9999989632519843, iteration: 19426
loss: 0.9626085162162781,grad_norm: 0.8833337002169829, iteration: 19427
loss: 1.0460342168807983,grad_norm: 0.9832799091139982, iteration: 19428
loss: 0.9539520144462585,grad_norm: 0.9999991281966683, iteration: 19429
loss: 1.0935944318771362,grad_norm: 0.9980594001266774, iteration: 19430
loss: 1.0383756160736084,grad_norm: 0.9999994729093226, iteration: 19431
loss: 1.0337111949920654,grad_norm: 0.9999991977653554, iteration: 19432
loss: 1.0693986415863037,grad_norm: 0.9999994125183398, iteration: 19433
loss: 1.0221307277679443,grad_norm: 0.9999991398984972, iteration: 19434
loss: 0.9751898050308228,grad_norm: 0.8939537904464496, iteration: 19435
loss: 1.0196127891540527,grad_norm: 0.9147528450656559, iteration: 19436
loss: 1.0305328369140625,grad_norm: 0.9999999246576132, iteration: 19437
loss: 0.9910664558410645,grad_norm: 0.9072464769612735, iteration: 19438
loss: 1.0009299516677856,grad_norm: 0.964750973802255, iteration: 19439
loss: 0.9786667227745056,grad_norm: 0.9256574587571076, iteration: 19440
loss: 1.0396769046783447,grad_norm: 0.9999992457824287, iteration: 19441
loss: 1.009533405303955,grad_norm: 0.9999989754682002, iteration: 19442
loss: 1.028741478919983,grad_norm: 0.9999990108423888, iteration: 19443
loss: 1.0320910215377808,grad_norm: 0.992833731605369, iteration: 19444
loss: 1.0154484510421753,grad_norm: 0.9999992463220926, iteration: 19445
loss: 1.0418649911880493,grad_norm: 0.9999991328422406, iteration: 19446
loss: 0.9741383790969849,grad_norm: 0.9999994954889415, iteration: 19447
loss: 1.0108387470245361,grad_norm: 0.9999990378042247, iteration: 19448
loss: 1.0335744619369507,grad_norm: 0.9999991249510836, iteration: 19449
loss: 1.0414009094238281,grad_norm: 0.999999233072808, iteration: 19450
loss: 1.0129729509353638,grad_norm: 0.9154303343120703, iteration: 19451
loss: 1.0126897096633911,grad_norm: 0.9999991983318953, iteration: 19452
loss: 1.0622576475143433,grad_norm: 0.9999993491425803, iteration: 19453
loss: 1.0132611989974976,grad_norm: 0.9999990701119692, iteration: 19454
loss: 1.0065935850143433,grad_norm: 0.9999992101555296, iteration: 19455
loss: 1.0250753164291382,grad_norm: 0.9999990625139823, iteration: 19456
loss: 0.994749903678894,grad_norm: 0.9999996614591211, iteration: 19457
loss: 1.0090502500534058,grad_norm: 0.9331305747957958, iteration: 19458
loss: 1.1453145742416382,grad_norm: 0.999999754131511, iteration: 19459
loss: 1.0170403718948364,grad_norm: 0.9999998390939646, iteration: 19460
loss: 1.014077067375183,grad_norm: 0.9698582082768086, iteration: 19461
loss: 1.0353453159332275,grad_norm: 0.9999993902314589, iteration: 19462
loss: 1.0099971294403076,grad_norm: 0.9999990299902212, iteration: 19463
loss: 0.9995787143707275,grad_norm: 0.9999991783408227, iteration: 19464
loss: 1.0520687103271484,grad_norm: 0.999999526204309, iteration: 19465
loss: 1.048012375831604,grad_norm: 0.9999991174888186, iteration: 19466
loss: 1.0476402044296265,grad_norm: 0.9999999386776274, iteration: 19467
loss: 1.0490657091140747,grad_norm: 0.9999991143454661, iteration: 19468
loss: 1.0215129852294922,grad_norm: 0.9646650772174529, iteration: 19469
loss: 1.069071650505066,grad_norm: 0.9999996786343502, iteration: 19470
loss: 0.9775609374046326,grad_norm: 0.9421523794860053, iteration: 19471
loss: 1.0203464031219482,grad_norm: 0.999999057540607, iteration: 19472
loss: 0.9884734749794006,grad_norm: 0.9999993535776034, iteration: 19473
loss: 1.1192231178283691,grad_norm: 0.9999998718026165, iteration: 19474
loss: 1.0205754041671753,grad_norm: 0.9999991094221242, iteration: 19475
loss: 1.0413402318954468,grad_norm: 0.9999990047860354, iteration: 19476
loss: 1.0212016105651855,grad_norm: 0.9999992726912955, iteration: 19477
loss: 1.0310577154159546,grad_norm: 0.9999992225610834, iteration: 19478
loss: 1.000689148902893,grad_norm: 0.9611233012480866, iteration: 19479
loss: 1.0086082220077515,grad_norm: 0.9999990597213535, iteration: 19480
loss: 1.0306833982467651,grad_norm: 0.9999990257457027, iteration: 19481
loss: 1.017824411392212,grad_norm: 0.9486994266280775, iteration: 19482
loss: 1.0790029764175415,grad_norm: 0.9999992950953704, iteration: 19483
loss: 1.0295802354812622,grad_norm: 0.9999992788153061, iteration: 19484
loss: 1.0768351554870605,grad_norm: 0.9999990217750371, iteration: 19485
loss: 1.002759575843811,grad_norm: 0.9999991343371061, iteration: 19486
loss: 1.021386981010437,grad_norm: 0.9999993518201028, iteration: 19487
loss: 0.9982286691665649,grad_norm: 0.999999072756138, iteration: 19488
loss: 1.1107591390609741,grad_norm: 0.9999996839380427, iteration: 19489
loss: 1.0415650606155396,grad_norm: 0.9999991320429479, iteration: 19490
loss: 0.9692214727401733,grad_norm: 0.9554579345107974, iteration: 19491
loss: 0.9878017902374268,grad_norm: 0.9793248609616425, iteration: 19492
loss: 1.027294397354126,grad_norm: 0.9448086338421751, iteration: 19493
loss: 0.9987270832061768,grad_norm: 0.9999989574971295, iteration: 19494
loss: 1.015372395515442,grad_norm: 0.9999992143998478, iteration: 19495
loss: 1.0543467998504639,grad_norm: 0.9999994965496113, iteration: 19496
loss: 1.0322153568267822,grad_norm: 0.9911878411641589, iteration: 19497
loss: 1.0386039018630981,grad_norm: 0.9999992185943304, iteration: 19498
loss: 1.0246645212173462,grad_norm: 0.9999989594108802, iteration: 19499
loss: 1.0163419246673584,grad_norm: 0.9999990811712327, iteration: 19500
loss: 1.016567587852478,grad_norm: 0.9999992858290296, iteration: 19501
loss: 1.1407901048660278,grad_norm: 0.9999998075257865, iteration: 19502
loss: 1.0029176473617554,grad_norm: 0.9992740164917707, iteration: 19503
loss: 1.0032657384872437,grad_norm: 0.9999993313827789, iteration: 19504
loss: 1.018045425415039,grad_norm: 0.9437847239598552, iteration: 19505
loss: 0.9621388912200928,grad_norm: 0.9999993888224759, iteration: 19506
loss: 1.0265498161315918,grad_norm: 0.974672419887155, iteration: 19507
loss: 1.0124459266662598,grad_norm: 0.9999991325686715, iteration: 19508
loss: 1.0105137825012207,grad_norm: 0.999999116946346, iteration: 19509
loss: 1.1165320873260498,grad_norm: 0.9999994424316274, iteration: 19510
loss: 1.0249196290969849,grad_norm: 0.9999991053702392, iteration: 19511
loss: 1.0004117488861084,grad_norm: 0.9999990884503506, iteration: 19512
loss: 1.0093340873718262,grad_norm: 0.9449968635408436, iteration: 19513
loss: 1.0121628046035767,grad_norm: 0.9999991123818007, iteration: 19514
loss: 1.005109190940857,grad_norm: 0.999999217680013, iteration: 19515
loss: 1.0371532440185547,grad_norm: 0.9999992360694537, iteration: 19516
loss: 1.0558620691299438,grad_norm: 0.9999991317036082, iteration: 19517
loss: 1.0342507362365723,grad_norm: 0.999999031453416, iteration: 19518
loss: 1.0203871726989746,grad_norm: 0.997946317792854, iteration: 19519
loss: 1.0210614204406738,grad_norm: 0.9936390805513406, iteration: 19520
loss: 0.9956125020980835,grad_norm: 0.9256860929042793, iteration: 19521
loss: 1.0385786294937134,grad_norm: 0.999999022848696, iteration: 19522
loss: 0.9619651436805725,grad_norm: 0.9835425903328356, iteration: 19523
loss: 0.9763767719268799,grad_norm: 0.9999991979712992, iteration: 19524
loss: 0.9959920644760132,grad_norm: 0.9999992329821085, iteration: 19525
loss: 0.9800354242324829,grad_norm: 0.9999992367147974, iteration: 19526
loss: 1.0774970054626465,grad_norm: 0.9999993432750043, iteration: 19527
loss: 1.0164244174957275,grad_norm: 0.9999993801315625, iteration: 19528
loss: 1.0031797885894775,grad_norm: 0.9828243184747733, iteration: 19529
loss: 1.1353825330734253,grad_norm: 0.9999996840749397, iteration: 19530
loss: 1.0659152269363403,grad_norm: 0.9850462622228776, iteration: 19531
loss: 0.9906796216964722,grad_norm: 0.9999991081102502, iteration: 19532
loss: 0.9869664907455444,grad_norm: 0.9999990527158792, iteration: 19533
loss: 1.0413670539855957,grad_norm: 0.9999995026717464, iteration: 19534
loss: 1.0784176588058472,grad_norm: 0.9999993402979963, iteration: 19535
loss: 1.0067280530929565,grad_norm: 0.9248493648899891, iteration: 19536
loss: 1.1520401239395142,grad_norm: 0.9999992504188836, iteration: 19537
loss: 1.0289227962493896,grad_norm: 0.9640409206094118, iteration: 19538
loss: 1.0025439262390137,grad_norm: 0.9999994394504175, iteration: 19539
loss: 1.0606507062911987,grad_norm: 0.9999995003445391, iteration: 19540
loss: 0.9962085485458374,grad_norm: 0.9999992548029051, iteration: 19541
loss: 1.018763780593872,grad_norm: 0.9999991567836388, iteration: 19542
loss: 1.0419470071792603,grad_norm: 0.9999993165590695, iteration: 19543
loss: 0.9899137616157532,grad_norm: 0.9999992021054718, iteration: 19544
loss: 1.021194577217102,grad_norm: 0.9999993968232151, iteration: 19545
loss: 0.9838542938232422,grad_norm: 0.999999167474484, iteration: 19546
loss: 1.0555768013000488,grad_norm: 0.9999998854052636, iteration: 19547
loss: 1.0468322038650513,grad_norm: 0.999999375683076, iteration: 19548
loss: 1.0285258293151855,grad_norm: 0.9999991387087919, iteration: 19549
loss: 1.0122216939926147,grad_norm: 0.9999992480308693, iteration: 19550
loss: 1.0686967372894287,grad_norm: 0.9999999262952841, iteration: 19551
loss: 1.0348784923553467,grad_norm: 0.9586993328871427, iteration: 19552
loss: 0.993501603603363,grad_norm: 0.9999992989429628, iteration: 19553
loss: 0.9752763509750366,grad_norm: 0.9999991129283262, iteration: 19554
loss: 0.9946985244750977,grad_norm: 0.999999253006384, iteration: 19555
loss: 1.0684044361114502,grad_norm: 0.9999992851842039, iteration: 19556
loss: 1.0603060722351074,grad_norm: 0.999999282092493, iteration: 19557
loss: 1.1370186805725098,grad_norm: 0.9999992342252892, iteration: 19558
loss: 0.9826952815055847,grad_norm: 0.9999991491651102, iteration: 19559
loss: 1.0459938049316406,grad_norm: 0.9999995300742345, iteration: 19560
loss: 1.0406115055084229,grad_norm: 0.8678154615443892, iteration: 19561
loss: 1.034145712852478,grad_norm: 0.9999992493636349, iteration: 19562
loss: 1.0278400182724,grad_norm: 0.9665667168578715, iteration: 19563
loss: 1.0301755666732788,grad_norm: 0.9999991410366476, iteration: 19564
loss: 0.9913782477378845,grad_norm: 0.927670793077281, iteration: 19565
loss: 0.9853641390800476,grad_norm: 0.9999991715810422, iteration: 19566
loss: 1.1202517747879028,grad_norm: 0.9999998837066492, iteration: 19567
loss: 1.0573537349700928,grad_norm: 0.9999997857109524, iteration: 19568
loss: 1.0038976669311523,grad_norm: 0.9999998105043487, iteration: 19569
loss: 0.9764957427978516,grad_norm: 0.9999990557431598, iteration: 19570
loss: 1.0411441326141357,grad_norm: 0.9999992247823805, iteration: 19571
loss: 0.978348970413208,grad_norm: 0.9999990480245553, iteration: 19572
loss: 1.0292547941207886,grad_norm: 0.9999991640004171, iteration: 19573
loss: 0.9699816703796387,grad_norm: 0.9797763171296284, iteration: 19574
loss: 1.060311198234558,grad_norm: 0.9999994016147205, iteration: 19575
loss: 1.020600438117981,grad_norm: 0.9999991085258849, iteration: 19576
loss: 1.0084657669067383,grad_norm: 0.999999306482702, iteration: 19577
loss: 0.996767520904541,grad_norm: 0.999999327427305, iteration: 19578
loss: 1.052211880683899,grad_norm: 0.9999992008416559, iteration: 19579
loss: 1.0116229057312012,grad_norm: 0.9238672185381551, iteration: 19580
loss: 1.0995532274246216,grad_norm: 0.9999990388977638, iteration: 19581
loss: 1.004827857017517,grad_norm: 0.9999990908607248, iteration: 19582
loss: 0.9898467659950256,grad_norm: 0.9468464128087302, iteration: 19583
loss: 1.0045078992843628,grad_norm: 0.999999128022505, iteration: 19584
loss: 1.0126653909683228,grad_norm: 0.88086529531431, iteration: 19585
loss: 1.0540525913238525,grad_norm: 0.999999739481351, iteration: 19586
loss: 0.9847926497459412,grad_norm: 0.9999996462632519, iteration: 19587
loss: 1.0277643203735352,grad_norm: 0.9999993125582467, iteration: 19588
loss: 1.0176353454589844,grad_norm: 0.9999993775595108, iteration: 19589
loss: 0.99058598279953,grad_norm: 0.9999991295399475, iteration: 19590
loss: 1.0449248552322388,grad_norm: 0.9999997851294353, iteration: 19591
loss: 1.0706886053085327,grad_norm: 0.999999885086241, iteration: 19592
loss: 1.0247974395751953,grad_norm: 0.9999992371207619, iteration: 19593
loss: 0.9972506165504456,grad_norm: 0.9999989344948662, iteration: 19594
loss: 1.0574369430541992,grad_norm: 0.9999993099473222, iteration: 19595
loss: 1.0247385501861572,grad_norm: 0.9999992306135106, iteration: 19596
loss: 1.009706735610962,grad_norm: 0.9999990178913588, iteration: 19597
loss: 1.0395883321762085,grad_norm: 0.9999992286898164, iteration: 19598
loss: 1.0262178182601929,grad_norm: 0.9028083532852768, iteration: 19599
loss: 1.022240400314331,grad_norm: 0.9999993813533943, iteration: 19600
loss: 1.0226823091506958,grad_norm: 0.9999991209309319, iteration: 19601
loss: 1.0579862594604492,grad_norm: 0.9999997728819802, iteration: 19602
loss: 0.9840389490127563,grad_norm: 0.9999990839218198, iteration: 19603
loss: 1.0014971494674683,grad_norm: 0.9776491534606911, iteration: 19604
loss: 1.0744301080703735,grad_norm: 0.9999999579814143, iteration: 19605
loss: 0.9983528852462769,grad_norm: 0.9999995755424445, iteration: 19606
loss: 1.0124685764312744,grad_norm: 0.9999993363039089, iteration: 19607
loss: 1.0172667503356934,grad_norm: 0.8829686742853956, iteration: 19608
loss: 1.0246728658676147,grad_norm: 0.8284854988617265, iteration: 19609
loss: 0.9882975816726685,grad_norm: 0.999999088701447, iteration: 19610
loss: 1.063194990158081,grad_norm: 0.9999993287271554, iteration: 19611
loss: 1.0380001068115234,grad_norm: 0.9999992749411866, iteration: 19612
loss: 0.9732401967048645,grad_norm: 0.999999093979734, iteration: 19613
loss: 1.0119436979293823,grad_norm: 0.8780140701722933, iteration: 19614
loss: 1.0335063934326172,grad_norm: 0.9999992821416819, iteration: 19615
loss: 1.0514756441116333,grad_norm: 0.9999997781080062, iteration: 19616
loss: 1.0041289329528809,grad_norm: 0.9999996034518168, iteration: 19617
loss: 0.9742564558982849,grad_norm: 0.9999990833096436, iteration: 19618
loss: 1.028921365737915,grad_norm: 0.999999496770122, iteration: 19619
loss: 1.0161253213882446,grad_norm: 0.9999990573051617, iteration: 19620
loss: 1.0756049156188965,grad_norm: 0.9999992355836427, iteration: 19621
loss: 1.0047308206558228,grad_norm: 0.9999993468932442, iteration: 19622
loss: 1.015703797340393,grad_norm: 0.9999995927292763, iteration: 19623
loss: 1.041783094406128,grad_norm: 0.9999991026228735, iteration: 19624
loss: 1.0266637802124023,grad_norm: 0.9999991537646634, iteration: 19625
loss: 1.0261765718460083,grad_norm: 0.9999991391816645, iteration: 19626
loss: 1.0070244073867798,grad_norm: 0.9999992584418462, iteration: 19627
loss: 0.9883426427841187,grad_norm: 0.9999993717118798, iteration: 19628
loss: 0.9755866527557373,grad_norm: 0.999999082928827, iteration: 19629
loss: 0.9852204918861389,grad_norm: 0.9898287581932544, iteration: 19630
loss: 1.0236725807189941,grad_norm: 0.9999989927641236, iteration: 19631
loss: 0.989250898361206,grad_norm: 0.9999991099063337, iteration: 19632
loss: 1.0148528814315796,grad_norm: 0.9999992486309107, iteration: 19633
loss: 0.9989347457885742,grad_norm: 0.9999990943588853, iteration: 19634
loss: 1.0043812990188599,grad_norm: 0.999999038445833, iteration: 19635
loss: 1.0779601335525513,grad_norm: 0.9999993994684127, iteration: 19636
loss: 1.00772225856781,grad_norm: 0.9999993195754366, iteration: 19637
loss: 1.004264235496521,grad_norm: 0.9999991400100786, iteration: 19638
loss: 0.9973034858703613,grad_norm: 0.9999990865023992, iteration: 19639
loss: 1.0650783777236938,grad_norm: 0.9999992381631705, iteration: 19640
loss: 1.0896166563034058,grad_norm: 0.9999998237680205, iteration: 19641
loss: 1.0295146703720093,grad_norm: 0.9999990130178171, iteration: 19642
loss: 0.9964423179626465,grad_norm: 0.9999992378287378, iteration: 19643
loss: 0.993018627166748,grad_norm: 0.9999990532116773, iteration: 19644
loss: 1.008094310760498,grad_norm: 0.9905379578566644, iteration: 19645
loss: 0.9970283508300781,grad_norm: 0.9999992438698792, iteration: 19646
loss: 0.9893396496772766,grad_norm: 0.9999991689216309, iteration: 19647
loss: 1.0341949462890625,grad_norm: 0.9999991710383431, iteration: 19648
loss: 1.0073130130767822,grad_norm: 0.9999991595930579, iteration: 19649
loss: 1.0373694896697998,grad_norm: 0.9999991954691233, iteration: 19650
loss: 0.9757317304611206,grad_norm: 0.9999991810467966, iteration: 19651
loss: 1.027947187423706,grad_norm: 0.9999992841757812, iteration: 19652
loss: 1.0139524936676025,grad_norm: 0.9999991552258389, iteration: 19653
loss: 0.9574966430664062,grad_norm: 0.9999998627425299, iteration: 19654
loss: 1.007392406463623,grad_norm: 0.999999346216156, iteration: 19655
loss: 1.0078222751617432,grad_norm: 0.9999991744337474, iteration: 19656
loss: 1.0565682649612427,grad_norm: 0.999999242678081, iteration: 19657
loss: 1.0192179679870605,grad_norm: 0.9295237090745754, iteration: 19658
loss: 1.0848913192749023,grad_norm: 0.9999994309964441, iteration: 19659
loss: 0.9726239442825317,grad_norm: 0.9684190226732377, iteration: 19660
loss: 1.1276414394378662,grad_norm: 0.9999999416893476, iteration: 19661
loss: 0.9683493971824646,grad_norm: 0.9999991381905682, iteration: 19662
loss: 1.0148588418960571,grad_norm: 0.9999991699859151, iteration: 19663
loss: 1.0258909463882446,grad_norm: 0.9999993153431707, iteration: 19664
loss: 1.0856349468231201,grad_norm: 0.9999998703694927, iteration: 19665
loss: 1.0217251777648926,grad_norm: 0.9793332209488456, iteration: 19666
loss: 1.0109871625900269,grad_norm: 0.9999991668380837, iteration: 19667
loss: 0.9782214760780334,grad_norm: 0.9999990775220944, iteration: 19668
loss: 0.9986429810523987,grad_norm: 0.9999991682083726, iteration: 19669
loss: 1.0298534631729126,grad_norm: 0.999999444965961, iteration: 19670
loss: 1.0497822761535645,grad_norm: 0.9999996732059342, iteration: 19671
loss: 1.0208362340927124,grad_norm: 0.9999992388621202, iteration: 19672
loss: 1.036150336265564,grad_norm: 0.9068191913055884, iteration: 19673
loss: 0.9997783899307251,grad_norm: 0.999999220177836, iteration: 19674
loss: 1.041438102722168,grad_norm: 0.9999992962527086, iteration: 19675
loss: 1.031066656112671,grad_norm: 0.9999992568613665, iteration: 19676
loss: 1.0380327701568604,grad_norm: 0.999999223918718, iteration: 19677
loss: 1.035636305809021,grad_norm: 0.9999993147180942, iteration: 19678
loss: 1.0371185541152954,grad_norm: 0.9999996786514412, iteration: 19679
loss: 1.0088542699813843,grad_norm: 0.9999995036575147, iteration: 19680
loss: 1.1192498207092285,grad_norm: 0.9999993836598913, iteration: 19681
loss: 1.0873026847839355,grad_norm: 0.9070545145714985, iteration: 19682
loss: 1.0001274347305298,grad_norm: 0.9126427339806533, iteration: 19683
loss: 1.0518476963043213,grad_norm: 0.999999781944523, iteration: 19684
loss: 0.9624935984611511,grad_norm: 0.9999990554514686, iteration: 19685
loss: 1.0289736986160278,grad_norm: 0.9711161762080789, iteration: 19686
loss: 1.044472575187683,grad_norm: 0.9999992349020326, iteration: 19687
loss: 1.011601209640503,grad_norm: 0.9999991590373973, iteration: 19688
loss: 1.0413042306900024,grad_norm: 0.9999994268229377, iteration: 19689
loss: 0.9908475875854492,grad_norm: 0.9999992804802353, iteration: 19690
loss: 1.0164755582809448,grad_norm: 0.9999990866437676, iteration: 19691
loss: 1.058324933052063,grad_norm: 0.9999993544117474, iteration: 19692
loss: 0.9949027299880981,grad_norm: 0.9965511414742967, iteration: 19693
loss: 1.0113558769226074,grad_norm: 0.9999993087247534, iteration: 19694
loss: 1.0343513488769531,grad_norm: 0.9999992136295072, iteration: 19695
loss: 1.038050651550293,grad_norm: 0.9999992151308865, iteration: 19696
loss: 1.0659668445587158,grad_norm: 0.9999994398738379, iteration: 19697
loss: 1.0135160684585571,grad_norm: 0.9999992525692845, iteration: 19698
loss: 0.987784206867218,grad_norm: 0.9579779995778238, iteration: 19699
loss: 1.0032254457473755,grad_norm: 0.9999991770525278, iteration: 19700
loss: 1.0131232738494873,grad_norm: 0.999999484018897, iteration: 19701
loss: 1.0062655210494995,grad_norm: 0.9999990508593194, iteration: 19702
loss: 1.0029397010803223,grad_norm: 0.9999992820578294, iteration: 19703
loss: 1.0293031930923462,grad_norm: 0.9999991050099595, iteration: 19704
loss: 1.0646027326583862,grad_norm: 0.9999991465873226, iteration: 19705
loss: 1.0101397037506104,grad_norm: 0.9809112462183353, iteration: 19706
loss: 0.9950916171073914,grad_norm: 0.9999992442309782, iteration: 19707
loss: 1.0677021741867065,grad_norm: 0.9999999139043331, iteration: 19708
loss: 1.0283011198043823,grad_norm: 0.966275565490032, iteration: 19709
loss: 1.0411040782928467,grad_norm: 0.999999087673461, iteration: 19710
loss: 1.0329694747924805,grad_norm: 0.9999992272369469, iteration: 19711
loss: 1.020713448524475,grad_norm: 0.999999706932638, iteration: 19712
loss: 1.050024390220642,grad_norm: 0.9999991359311493, iteration: 19713
loss: 1.0169801712036133,grad_norm: 0.9999993113031455, iteration: 19714
loss: 1.077288269996643,grad_norm: 0.9999991649446031, iteration: 19715
loss: 1.0149903297424316,grad_norm: 0.999999545701825, iteration: 19716
loss: 1.066791296005249,grad_norm: 0.9999991863526259, iteration: 19717
loss: 1.0425621271133423,grad_norm: 0.9999992445258346, iteration: 19718
loss: 1.0387451648712158,grad_norm: 0.9999991995357079, iteration: 19719
loss: 1.035788655281067,grad_norm: 0.9999994431237331, iteration: 19720
loss: 1.0317747592926025,grad_norm: 0.9999996271907667, iteration: 19721
loss: 1.0384039878845215,grad_norm: 0.99999903388347, iteration: 19722
loss: 0.9933802485466003,grad_norm: 0.9999990124474868, iteration: 19723
loss: 1.0266302824020386,grad_norm: 0.9999996737881754, iteration: 19724
loss: 1.0143659114837646,grad_norm: 0.9999991970369742, iteration: 19725
loss: 1.0076934099197388,grad_norm: 0.9999990926869735, iteration: 19726
loss: 1.010902762413025,grad_norm: 0.8496242898984698, iteration: 19727
loss: 1.014535903930664,grad_norm: 0.9999992138270122, iteration: 19728
loss: 1.0559359788894653,grad_norm: 0.999999573329198, iteration: 19729
loss: 1.0316227674484253,grad_norm: 0.9999992409957962, iteration: 19730
loss: 1.0279345512390137,grad_norm: 0.9999994021693234, iteration: 19731
loss: 1.0529597997665405,grad_norm: 0.9999996812369861, iteration: 19732
loss: 1.0495041608810425,grad_norm: 0.9999996142333477, iteration: 19733
loss: 0.974587619304657,grad_norm: 0.9999991746186416, iteration: 19734
loss: 1.018649697303772,grad_norm: 0.9999993641836608, iteration: 19735
loss: 1.011559247970581,grad_norm: 0.999999042132465, iteration: 19736
loss: 1.0326974391937256,grad_norm: 0.999999517900952, iteration: 19737
loss: 0.9890655279159546,grad_norm: 0.9183388468137867, iteration: 19738
loss: 1.0188896656036377,grad_norm: 0.9999997420000092, iteration: 19739
loss: 1.1046265363693237,grad_norm: 0.9999994583619217, iteration: 19740
loss: 0.9917097687721252,grad_norm: 0.9999988625141778, iteration: 19741
loss: 1.0426461696624756,grad_norm: 0.9999994845273428, iteration: 19742
loss: 1.0123764276504517,grad_norm: 0.9999993177679946, iteration: 19743
loss: 0.9915576577186584,grad_norm: 0.9999992318171201, iteration: 19744
loss: 1.0765836238861084,grad_norm: 0.9999997401146337, iteration: 19745
loss: 1.0062801837921143,grad_norm: 0.9999990215577107, iteration: 19746
loss: 1.0032719373703003,grad_norm: 0.9271867794909222, iteration: 19747
loss: 0.9900340437889099,grad_norm: 0.999999676822903, iteration: 19748
loss: 1.0085011720657349,grad_norm: 0.9999991354132981, iteration: 19749
loss: 0.9892592430114746,grad_norm: 0.9999996752533733, iteration: 19750
loss: 1.0357136726379395,grad_norm: 0.9999995346533157, iteration: 19751
loss: 1.0103780031204224,grad_norm: 0.9999995933214374, iteration: 19752
loss: 1.038489818572998,grad_norm: 0.9999991189352934, iteration: 19753
loss: 1.0041581392288208,grad_norm: 0.9999994888613455, iteration: 19754
loss: 1.0692834854125977,grad_norm: 0.9999992122559711, iteration: 19755
loss: 0.9692186713218689,grad_norm: 0.9999993476004546, iteration: 19756
loss: 1.0157523155212402,grad_norm: 0.9999994859297757, iteration: 19757
loss: 1.0383285284042358,grad_norm: 0.9999991735035588, iteration: 19758
loss: 0.9738300442695618,grad_norm: 0.999999155755526, iteration: 19759
loss: 1.0277186632156372,grad_norm: 0.9999992150472683, iteration: 19760
loss: 1.0043398141860962,grad_norm: 0.9999992759783579, iteration: 19761
loss: 1.018555998802185,grad_norm: 0.9999993239385264, iteration: 19762
loss: 1.0330394506454468,grad_norm: 0.9999991851670719, iteration: 19763
loss: 1.0041919946670532,grad_norm: 0.9476881895462805, iteration: 19764
loss: 1.0410877466201782,grad_norm: 0.9999992277774291, iteration: 19765
loss: 1.0517823696136475,grad_norm: 0.9999991438108093, iteration: 19766
loss: 1.0335077047348022,grad_norm: 0.9999990985002345, iteration: 19767
loss: 1.017768383026123,grad_norm: 0.9999994595143316, iteration: 19768
loss: 1.0046573877334595,grad_norm: 0.9999992645100763, iteration: 19769
loss: 1.0261369943618774,grad_norm: 0.9999998839071442, iteration: 19770
loss: 0.9712801575660706,grad_norm: 0.999999158909317, iteration: 19771
loss: 1.0157755613327026,grad_norm: 0.9999994118209686, iteration: 19772
loss: 0.9746622443199158,grad_norm: 0.9789167694630696, iteration: 19773
loss: 1.0196160078048706,grad_norm: 0.9999991058182655, iteration: 19774
loss: 1.0691131353378296,grad_norm: 0.9999994865090094, iteration: 19775
loss: 0.9992543458938599,grad_norm: 0.9999997243969736, iteration: 19776
loss: 0.9849539995193481,grad_norm: 0.9999992118610229, iteration: 19777
loss: 1.1040148735046387,grad_norm: 0.9999992416073394, iteration: 19778
loss: 0.9949246644973755,grad_norm: 0.9999990922974464, iteration: 19779
loss: 1.048599123954773,grad_norm: 0.9999995415737529, iteration: 19780
loss: 1.0506986379623413,grad_norm: 0.9999992686861392, iteration: 19781
loss: 1.0717358589172363,grad_norm: 0.9999992313723463, iteration: 19782
loss: 1.1063358783721924,grad_norm: 0.9999994100866613, iteration: 19783
loss: 1.0670982599258423,grad_norm: 0.9999991056738521, iteration: 19784
loss: 1.0623269081115723,grad_norm: 0.9999993240310765, iteration: 19785
loss: 1.0105396509170532,grad_norm: 0.9288904845384822, iteration: 19786
loss: 0.9975753426551819,grad_norm: 0.9999991961471556, iteration: 19787
loss: 1.029919981956482,grad_norm: 0.9999992308566144, iteration: 19788
loss: 1.0049927234649658,grad_norm: 0.9999990445003091, iteration: 19789
loss: 0.9756015539169312,grad_norm: 0.9999990982862731, iteration: 19790
loss: 1.0070661306381226,grad_norm: 0.9999990972175116, iteration: 19791
loss: 1.0302833318710327,grad_norm: 0.999999423484275, iteration: 19792
loss: 1.0485634803771973,grad_norm: 0.9999991864256741, iteration: 19793
loss: 1.0485435724258423,grad_norm: 0.9999994961161015, iteration: 19794
loss: 1.0726423263549805,grad_norm: 0.9999993973148975, iteration: 19795
loss: 1.0204414129257202,grad_norm: 0.9999991575422303, iteration: 19796
loss: 0.9805816411972046,grad_norm: 0.9999991427111605, iteration: 19797
loss: 1.0007821321487427,grad_norm: 0.9999990136423004, iteration: 19798
loss: 1.0420033931732178,grad_norm: 0.9999993599828468, iteration: 19799
loss: 0.9864218235015869,grad_norm: 0.999999197395198, iteration: 19800
loss: 1.0179599523544312,grad_norm: 0.9999991856442076, iteration: 19801
loss: 1.0108616352081299,grad_norm: 0.9999993876099411, iteration: 19802
loss: 1.0364129543304443,grad_norm: 0.9999989632522831, iteration: 19803
loss: 0.9918026328086853,grad_norm: 0.9999995514612022, iteration: 19804
loss: 1.0144200325012207,grad_norm: 0.9609774852286779, iteration: 19805
loss: 1.010122537612915,grad_norm: 0.9999991176224684, iteration: 19806
loss: 1.025743842124939,grad_norm: 0.9999992197587623, iteration: 19807
loss: 1.0194705724716187,grad_norm: 0.9999990205061511, iteration: 19808
loss: 1.0403878688812256,grad_norm: 0.9999994876094146, iteration: 19809
loss: 0.9812189936637878,grad_norm: 0.9946786875524021, iteration: 19810
loss: 1.0211219787597656,grad_norm: 0.9999991471091626, iteration: 19811
loss: 1.025672197341919,grad_norm: 0.9878736633847999, iteration: 19812
loss: 1.0266294479370117,grad_norm: 0.9999992010823002, iteration: 19813
loss: 1.0095983743667603,grad_norm: 0.9999992411461641, iteration: 19814
loss: 0.9733614325523376,grad_norm: 0.9034471279935843, iteration: 19815
loss: 1.047802448272705,grad_norm: 0.9999993120419859, iteration: 19816
loss: 1.048478364944458,grad_norm: 0.9999992985082904, iteration: 19817
loss: 1.0297657251358032,grad_norm: 0.9999994193622915, iteration: 19818
loss: 0.9975221753120422,grad_norm: 0.8920920418413621, iteration: 19819
loss: 0.9903186559677124,grad_norm: 0.9999991420923967, iteration: 19820
loss: 1.0217865705490112,grad_norm: 0.9999991922333605, iteration: 19821
loss: 1.0105279684066772,grad_norm: 0.9999992739290202, iteration: 19822
loss: 1.0542690753936768,grad_norm: 0.9999996208922235, iteration: 19823
loss: 1.0621730089187622,grad_norm: 0.9999994676849373, iteration: 19824
loss: 1.0142351388931274,grad_norm: 0.9999992550831877, iteration: 19825
loss: 1.0244446992874146,grad_norm: 0.9120131519108179, iteration: 19826
loss: 1.0176005363464355,grad_norm: 0.9999992521313867, iteration: 19827
loss: 1.0054068565368652,grad_norm: 0.9322343307813649, iteration: 19828
loss: 1.0214036703109741,grad_norm: 0.9317721686668459, iteration: 19829
loss: 1.012399435043335,grad_norm: 0.9999996294328801, iteration: 19830
loss: 1.0651285648345947,grad_norm: 0.9999993953674905, iteration: 19831
loss: 1.0481988191604614,grad_norm: 0.9999997170384917, iteration: 19832
loss: 0.9747099280357361,grad_norm: 0.9999992125445017, iteration: 19833
loss: 0.9895963072776794,grad_norm: 0.9046689786372485, iteration: 19834
loss: 1.0228009223937988,grad_norm: 0.999999203990819, iteration: 19835
loss: 0.9732738733291626,grad_norm: 0.9999990994879131, iteration: 19836
loss: 1.0400029420852661,grad_norm: 0.9999991499566026, iteration: 19837
loss: 0.9979656934738159,grad_norm: 0.9999991245177261, iteration: 19838
loss: 1.0697176456451416,grad_norm: 0.999999496740109, iteration: 19839
loss: 1.006837248802185,grad_norm: 0.8947518919714124, iteration: 19840
loss: 1.0101139545440674,grad_norm: 0.9312249807644424, iteration: 19841
loss: 1.0543274879455566,grad_norm: 0.99999963636135, iteration: 19842
loss: 1.0365817546844482,grad_norm: 0.9999990465956012, iteration: 19843
loss: 1.0592186450958252,grad_norm: 0.9999993630461145, iteration: 19844
loss: 1.0007802248001099,grad_norm: 0.9999992616189687, iteration: 19845
loss: 0.9670959115028381,grad_norm: 0.9999990341975142, iteration: 19846
loss: 1.0029973983764648,grad_norm: 0.9528618251841015, iteration: 19847
loss: 1.0171855688095093,grad_norm: 0.9895034969466312, iteration: 19848
loss: 0.9798736572265625,grad_norm: 0.9999992852209931, iteration: 19849
loss: 1.0267772674560547,grad_norm: 0.9999992274545022, iteration: 19850
loss: 1.0484751462936401,grad_norm: 0.9999995533501899, iteration: 19851
loss: 1.0248385667800903,grad_norm: 0.9999991751741778, iteration: 19852
loss: 1.020433783531189,grad_norm: 0.8490134793919173, iteration: 19853
loss: 1.0048788785934448,grad_norm: 0.9999990717905016, iteration: 19854
loss: 1.0143799781799316,grad_norm: 0.9999994727450051, iteration: 19855
loss: 1.0239498615264893,grad_norm: 0.8101433958198457, iteration: 19856
loss: 1.0333932638168335,grad_norm: 0.9657371112948604, iteration: 19857
loss: 1.0752475261688232,grad_norm: 0.9999993902155717, iteration: 19858
loss: 1.0665282011032104,grad_norm: 0.999999505108152, iteration: 19859
loss: 0.9784675240516663,grad_norm: 0.9999991271659621, iteration: 19860
loss: 1.0080865621566772,grad_norm: 0.9085368850905712, iteration: 19861
loss: 0.9962329268455505,grad_norm: 0.999999029112007, iteration: 19862
loss: 1.0275124311447144,grad_norm: 0.999999536422888, iteration: 19863
loss: 1.0369025468826294,grad_norm: 0.9118089447693645, iteration: 19864
loss: 1.0275176763534546,grad_norm: 0.9999992412665935, iteration: 19865
loss: 1.0239912271499634,grad_norm: 0.999999726067682, iteration: 19866
loss: 1.0595439672470093,grad_norm: 0.999999225367853, iteration: 19867
loss: 1.031978964805603,grad_norm: 0.9999989656904131, iteration: 19868
loss: 1.0273576974868774,grad_norm: 0.9999991165346778, iteration: 19869
loss: 1.0098856687545776,grad_norm: 0.9663562966657324, iteration: 19870
loss: 1.016522765159607,grad_norm: 0.999999390338927, iteration: 19871
loss: 1.0565887689590454,grad_norm: 0.9999995995763211, iteration: 19872
loss: 1.0150901079177856,grad_norm: 0.968972076926822, iteration: 19873
loss: 0.9777137041091919,grad_norm: 0.9999990671238246, iteration: 19874
loss: 1.0367608070373535,grad_norm: 0.9999992337065248, iteration: 19875
loss: 1.049721360206604,grad_norm: 0.9999992482168315, iteration: 19876
loss: 1.0237950086593628,grad_norm: 0.9999992264673552, iteration: 19877
loss: 1.014244794845581,grad_norm: 0.9999995608330438, iteration: 19878
loss: 0.9784345626831055,grad_norm: 0.9999990892260308, iteration: 19879
loss: 1.0209256410598755,grad_norm: 0.999999179441043, iteration: 19880
loss: 1.0241249799728394,grad_norm: 0.9999992290333111, iteration: 19881
loss: 1.0106550455093384,grad_norm: 0.9999992628640177, iteration: 19882
loss: 1.0505790710449219,grad_norm: 0.9999995125096881, iteration: 19883
loss: 1.0204522609710693,grad_norm: 0.9999996845871314, iteration: 19884
loss: 0.9535665512084961,grad_norm: 0.9999991559197688, iteration: 19885
loss: 1.0268441438674927,grad_norm: 0.9999992653713013, iteration: 19886
loss: 1.013929843902588,grad_norm: 0.8493386158503786, iteration: 19887
loss: 1.04771888256073,grad_norm: 0.9999990906685622, iteration: 19888
loss: 1.0272029638290405,grad_norm: 0.9999993062779197, iteration: 19889
loss: 0.9826029539108276,grad_norm: 0.9999990869859736, iteration: 19890
loss: 1.0183451175689697,grad_norm: 0.9999992440455223, iteration: 19891
loss: 1.0314946174621582,grad_norm: 0.9999998680670449, iteration: 19892
loss: 1.0030220746994019,grad_norm: 0.9999992432975083, iteration: 19893
loss: 1.047580599784851,grad_norm: 0.9999996431574788, iteration: 19894
loss: 0.9759078621864319,grad_norm: 0.9999992526447299, iteration: 19895
loss: 1.0406774282455444,grad_norm: 0.9999990825702825, iteration: 19896
loss: 1.0018463134765625,grad_norm: 0.998969748979751, iteration: 19897
loss: 1.0592759847640991,grad_norm: 0.9999997037556436, iteration: 19898
loss: 0.9960890412330627,grad_norm: 0.99999912183102, iteration: 19899
loss: 1.015559196472168,grad_norm: 0.9999990324873712, iteration: 19900
loss: 0.9863296151161194,grad_norm: 0.9999991871384413, iteration: 19901
loss: 1.0945255756378174,grad_norm: 0.9999997467589147, iteration: 19902
loss: 1.0158449411392212,grad_norm: 0.9999998627792952, iteration: 19903
loss: 1.0693175792694092,grad_norm: 0.9999992255305089, iteration: 19904
loss: 1.0137068033218384,grad_norm: 0.9999993586521829, iteration: 19905
loss: 1.053958773612976,grad_norm: 0.9999990295397135, iteration: 19906
loss: 1.0245441198349,grad_norm: 0.9999991221903949, iteration: 19907
loss: 1.029921054840088,grad_norm: 0.999999223049462, iteration: 19908
loss: 1.0030323266983032,grad_norm: 0.9999989799106375, iteration: 19909
loss: 1.0654559135437012,grad_norm: 0.9999991142383313, iteration: 19910
loss: 1.0450295209884644,grad_norm: 0.9999991011739742, iteration: 19911
loss: 1.017568826675415,grad_norm: 0.9999990832735568, iteration: 19912
loss: 0.9864511489868164,grad_norm: 0.9999990729175331, iteration: 19913
loss: 0.9742639064788818,grad_norm: 0.9291333226825395, iteration: 19914
loss: 1.021532416343689,grad_norm: 0.9999996121659928, iteration: 19915
loss: 1.004331111907959,grad_norm: 0.9999991438694181, iteration: 19916
loss: 1.0457533597946167,grad_norm: 0.9999993472013526, iteration: 19917
loss: 1.0149199962615967,grad_norm: 0.9999994629746785, iteration: 19918
loss: 1.0629353523254395,grad_norm: 0.9999991985756596, iteration: 19919
loss: 1.0311806201934814,grad_norm: 0.9999997488101984, iteration: 19920
loss: 1.0326485633850098,grad_norm: 0.9999995001888266, iteration: 19921
loss: 0.9783945083618164,grad_norm: 0.9999991362902296, iteration: 19922
loss: 0.9875434637069702,grad_norm: 0.9999993648005868, iteration: 19923
loss: 1.0334604978561401,grad_norm: 0.9999993877184458, iteration: 19924
loss: 1.0063506364822388,grad_norm: 0.999999287254602, iteration: 19925
loss: 1.0451117753982544,grad_norm: 0.9999992690668282, iteration: 19926
loss: 1.0288581848144531,grad_norm: 0.9999992643085681, iteration: 19927
loss: 1.0513694286346436,grad_norm: 0.99999934540166, iteration: 19928
loss: 1.097385287284851,grad_norm: 0.9999996688031841, iteration: 19929
loss: 1.0189340114593506,grad_norm: 0.9999995077093237, iteration: 19930
loss: 1.0315784215927124,grad_norm: 0.9999993406987119, iteration: 19931
loss: 1.0465754270553589,grad_norm: 0.9999991739505144, iteration: 19932
loss: 1.024848222732544,grad_norm: 0.9999990320976675, iteration: 19933
loss: 1.0273288488388062,grad_norm: 0.9999990875786258, iteration: 19934
loss: 1.0310579538345337,grad_norm: 0.9846238139287161, iteration: 19935
loss: 1.0111709833145142,grad_norm: 0.9999992659489364, iteration: 19936
loss: 1.04319167137146,grad_norm: 0.9999994608046165, iteration: 19937
loss: 0.9916316866874695,grad_norm: 0.9999991460814797, iteration: 19938
loss: 1.0202853679656982,grad_norm: 0.9999992731550956, iteration: 19939
loss: 1.0038888454437256,grad_norm: 0.9999991785657557, iteration: 19940
loss: 1.012210488319397,grad_norm: 0.9999990045224839, iteration: 19941
loss: 1.081092357635498,grad_norm: 0.9999996831076816, iteration: 19942
loss: 0.9917877316474915,grad_norm: 0.9999991667508875, iteration: 19943
loss: 0.9934700131416321,grad_norm: 0.9945698784417504, iteration: 19944
loss: 1.0061758756637573,grad_norm: 0.999999670104599, iteration: 19945
loss: 0.9873380661010742,grad_norm: 0.9999995675379633, iteration: 19946
loss: 0.9938722252845764,grad_norm: 0.9999989783163017, iteration: 19947
loss: 0.9649837017059326,grad_norm: 0.9999991832470001, iteration: 19948
loss: 1.000584602355957,grad_norm: 0.9999992292540911, iteration: 19949
loss: 1.027061104774475,grad_norm: 0.9999990313703732, iteration: 19950
loss: 1.0410199165344238,grad_norm: 0.999999342770294, iteration: 19951
loss: 1.0113935470581055,grad_norm: 0.941648812927751, iteration: 19952
loss: 1.0628553628921509,grad_norm: 0.9999996224305695, iteration: 19953
loss: 1.0489641427993774,grad_norm: 0.9999996313158744, iteration: 19954
loss: 1.0447266101837158,grad_norm: 0.9999992344730246, iteration: 19955
loss: 1.0290802717208862,grad_norm: 0.999999277019585, iteration: 19956
loss: 0.9705183506011963,grad_norm: 0.9999996434006737, iteration: 19957
loss: 0.9920389652252197,grad_norm: 0.9999993072091387, iteration: 19958
loss: 1.0511962175369263,grad_norm: 0.9999990522611142, iteration: 19959
loss: 1.0478558540344238,grad_norm: 0.999999632085053, iteration: 19960
loss: 1.0120218992233276,grad_norm: 0.9999992226873576, iteration: 19961
loss: 1.0486160516738892,grad_norm: 0.9999992591302109, iteration: 19962
loss: 1.0359808206558228,grad_norm: 0.9999993968022531, iteration: 19963
loss: 0.9820062518119812,grad_norm: 0.9999993061518794, iteration: 19964
loss: 0.9956833720207214,grad_norm: 0.9999991013965995, iteration: 19965
loss: 1.0218322277069092,grad_norm: 0.9999995399110003, iteration: 19966
loss: 1.0543243885040283,grad_norm: 0.9999992146832102, iteration: 19967
loss: 1.0302071571350098,grad_norm: 0.9999989870668967, iteration: 19968
loss: 1.0435287952423096,grad_norm: 0.9999993255627845, iteration: 19969
loss: 1.0259028673171997,grad_norm: 0.9999991391488995, iteration: 19970
loss: 1.041162371635437,grad_norm: 0.9999991502348999, iteration: 19971
loss: 1.0330708026885986,grad_norm: 0.9999991690213399, iteration: 19972
loss: 1.0522568225860596,grad_norm: 0.9999992384198249, iteration: 19973
loss: 1.0462690591812134,grad_norm: 0.9165370482125026, iteration: 19974
loss: 1.0529656410217285,grad_norm: 0.9999997778388661, iteration: 19975
loss: 1.0341047048568726,grad_norm: 0.9999993343605114, iteration: 19976
loss: 1.0491013526916504,grad_norm: 0.9999996986987, iteration: 19977
loss: 1.0114262104034424,grad_norm: 0.9999990589138941, iteration: 19978
loss: 1.0275678634643555,grad_norm: 0.9999993798653589, iteration: 19979
loss: 0.9982412457466125,grad_norm: 0.9999992071459898, iteration: 19980
loss: 0.9736457467079163,grad_norm: 0.9999996414838318, iteration: 19981
loss: 1.020804762840271,grad_norm: 0.9999993429223385, iteration: 19982
loss: 1.0642837285995483,grad_norm: 0.999999333825026, iteration: 19983
loss: 1.0605865716934204,grad_norm: 0.9999992280313094, iteration: 19984
loss: 1.0051391124725342,grad_norm: 0.9999993834789325, iteration: 19985
loss: 1.0374155044555664,grad_norm: 0.9999991113508367, iteration: 19986
loss: 1.000181794166565,grad_norm: 0.9999990543088221, iteration: 19987
loss: 0.9482499957084656,grad_norm: 0.9999993228789503, iteration: 19988
loss: 1.116239070892334,grad_norm: 0.999999927942451, iteration: 19989
loss: 1.0288974046707153,grad_norm: 0.9999993219097705, iteration: 19990
loss: 1.0502424240112305,grad_norm: 0.9999993627744631, iteration: 19991
loss: 1.065969705581665,grad_norm: 0.9999996892714614, iteration: 19992
loss: 1.0887154340744019,grad_norm: 0.9999998458315117, iteration: 19993
loss: 1.0580064058303833,grad_norm: 0.9999992963643762, iteration: 19994
loss: 1.0195553302764893,grad_norm: 0.973871084295121, iteration: 19995
loss: 1.0204397439956665,grad_norm: 0.9999991935169384, iteration: 19996
loss: 1.080032467842102,grad_norm: 0.9999991248485075, iteration: 19997
loss: 0.9736413955688477,grad_norm: 0.9557133403229482, iteration: 19998
loss: 1.0067130327224731,grad_norm: 0.9999991328025842, iteration: 19999
loss: 1.0086649656295776,grad_norm: 0.9999993649122224, iteration: 20000
Evaluating at step 20000
{'val': 1.005062511190772, 'test': 2.9752878129672697}
loss: 0.9917349815368652,grad_norm: 0.9999991753419644, iteration: 20001
loss: 1.0243562459945679,grad_norm: 0.9999991563054088, iteration: 20002
loss: 0.9910558462142944,grad_norm: 0.9999991268775706, iteration: 20003
loss: 1.043677806854248,grad_norm: 0.9999993421375772, iteration: 20004
loss: 1.020853877067566,grad_norm: 0.9608279424461611, iteration: 20005
loss: 1.2046984434127808,grad_norm: 0.9999997469443226, iteration: 20006
loss: 1.0204495191574097,grad_norm: 0.9999991260112052, iteration: 20007
loss: 1.0074204206466675,grad_norm: 0.9999992471118664, iteration: 20008
loss: 1.0489312410354614,grad_norm: 0.9999991895184096, iteration: 20009
loss: 0.999611496925354,grad_norm: 0.9999992967501743, iteration: 20010
loss: 1.0079549551010132,grad_norm: 0.9962548403839905, iteration: 20011
loss: 1.039965033531189,grad_norm: 0.999999016804853, iteration: 20012
loss: 1.0366384983062744,grad_norm: 0.9999998880055049, iteration: 20013
loss: 1.0189032554626465,grad_norm: 0.9999991394277203, iteration: 20014
loss: 0.9927809238433838,grad_norm: 0.9999992050213471, iteration: 20015
loss: 0.9648665189743042,grad_norm: 0.9931251690143338, iteration: 20016
loss: 1.0179873704910278,grad_norm: 0.9999998290089418, iteration: 20017
loss: 1.056072473526001,grad_norm: 0.9999991957969865, iteration: 20018
loss: 0.9938712120056152,grad_norm: 0.7900743985767927, iteration: 20019
loss: 1.0340416431427002,grad_norm: 0.9999992460417569, iteration: 20020
loss: 0.997406005859375,grad_norm: 0.9999990191487159, iteration: 20021
loss: 0.9962307214736938,grad_norm: 0.9999991405873323, iteration: 20022
loss: 1.0399531126022339,grad_norm: 0.9999991720730931, iteration: 20023
loss: 1.0184766054153442,grad_norm: 0.9160020119315275, iteration: 20024
loss: 1.021681308746338,grad_norm: 0.9999992345179891, iteration: 20025
loss: 1.0437952280044556,grad_norm: 0.9999991560418462, iteration: 20026
loss: 0.9959288239479065,grad_norm: 0.9829812872310932, iteration: 20027
loss: 1.062978982925415,grad_norm: 0.9999991805215769, iteration: 20028
loss: 0.9841046929359436,grad_norm: 0.9999990385631232, iteration: 20029
loss: 1.0142722129821777,grad_norm: 0.9999990559958408, iteration: 20030
loss: 1.0102434158325195,grad_norm: 0.9999991732534443, iteration: 20031
loss: 1.0488629341125488,grad_norm: 0.9451613780197345, iteration: 20032
loss: 1.0653178691864014,grad_norm: 0.9999990529926994, iteration: 20033
loss: 1.08917236328125,grad_norm: 0.9999990721229293, iteration: 20034
loss: 1.0149577856063843,grad_norm: 0.9999994727500707, iteration: 20035
loss: 1.0114712715148926,grad_norm: 0.9999991702009999, iteration: 20036
loss: 1.005483627319336,grad_norm: 0.9999991499854741, iteration: 20037
loss: 1.0230717658996582,grad_norm: 0.9999991299447144, iteration: 20038
loss: 1.0051820278167725,grad_norm: 0.9999992224347952, iteration: 20039
loss: 1.0553641319274902,grad_norm: 0.9999990469571532, iteration: 20040
loss: 1.0448464155197144,grad_norm: 0.9999991660115864, iteration: 20041
loss: 0.9808028340339661,grad_norm: 0.9999992414920021, iteration: 20042
loss: 1.0647072792053223,grad_norm: 1.0000000613575715, iteration: 20043
loss: 1.0239412784576416,grad_norm: 0.9999993027562661, iteration: 20044
loss: 1.0149624347686768,grad_norm: 0.9999997623741019, iteration: 20045
loss: 1.058264136314392,grad_norm: 0.9999996335274614, iteration: 20046
loss: 1.014217495918274,grad_norm: 0.9999992044882529, iteration: 20047
loss: 1.033574104309082,grad_norm: 0.8360628429613421, iteration: 20048
loss: 1.02814519405365,grad_norm: 0.91489811281348, iteration: 20049
loss: 1.0619776248931885,grad_norm: 0.9999992615862152, iteration: 20050
loss: 0.9972870945930481,grad_norm: 0.9999993015645309, iteration: 20051
loss: 1.0105301141738892,grad_norm: 0.9177180532837115, iteration: 20052
loss: 1.039647102355957,grad_norm: 0.999999094333831, iteration: 20053
loss: 1.031358003616333,grad_norm: 0.99999924708976, iteration: 20054
loss: 1.0521585941314697,grad_norm: 0.9999994933915838, iteration: 20055
loss: 1.0290261507034302,grad_norm: 0.9999989877434329, iteration: 20056
loss: 1.0177555084228516,grad_norm: 0.9999990424065746, iteration: 20057
loss: 1.0026875734329224,grad_norm: 0.9757228254046003, iteration: 20058
loss: 1.0615262985229492,grad_norm: 0.9999992098620784, iteration: 20059
loss: 1.026413917541504,grad_norm: 0.9999992648166404, iteration: 20060
loss: 0.996374785900116,grad_norm: 0.9614254038374092, iteration: 20061
loss: 1.0479532480239868,grad_norm: 0.9999999770629142, iteration: 20062
loss: 1.016554355621338,grad_norm: 0.9999993960015505, iteration: 20063
loss: 0.9672154188156128,grad_norm: 0.9999991378190941, iteration: 20064
loss: 1.0246151685714722,grad_norm: 0.9999991097732469, iteration: 20065
loss: 1.0762876272201538,grad_norm: 0.9999997437521622, iteration: 20066
loss: 1.0854426622390747,grad_norm: 0.9999999490232522, iteration: 20067
loss: 1.0194920301437378,grad_norm: 0.9999993975739067, iteration: 20068
loss: 1.0160274505615234,grad_norm: 0.9999992528722246, iteration: 20069
loss: 0.989949643611908,grad_norm: 0.9999992564934074, iteration: 20070
loss: 1.023280382156372,grad_norm: 0.9999991673764699, iteration: 20071
loss: 1.0160484313964844,grad_norm: 0.9895577628709143, iteration: 20072
loss: 1.022971272468567,grad_norm: 0.929838709764202, iteration: 20073
loss: 1.0273141860961914,grad_norm: 0.935004483907295, iteration: 20074
loss: 0.9994735717773438,grad_norm: 0.9478932210210581, iteration: 20075
loss: 0.9839651584625244,grad_norm: 0.9999991275967427, iteration: 20076
loss: 1.017776370048523,grad_norm: 0.9999990032266161, iteration: 20077
loss: 1.0281866788864136,grad_norm: 0.9999997605222949, iteration: 20078
loss: 1.0026880502700806,grad_norm: 0.9999991836673814, iteration: 20079
loss: 1.0186429023742676,grad_norm: 0.999999458723879, iteration: 20080
loss: 1.0092649459838867,grad_norm: 0.9999991445315428, iteration: 20081
loss: 1.0188806056976318,grad_norm: 0.9999992838652312, iteration: 20082
loss: 1.0220636129379272,grad_norm: 0.9999992718630552, iteration: 20083
loss: 1.0108397006988525,grad_norm: 0.9999992394261134, iteration: 20084
loss: 1.0541378259658813,grad_norm: 0.9999998918910528, iteration: 20085
loss: 1.0011928081512451,grad_norm: 0.9380278875157438, iteration: 20086
loss: 1.0671356916427612,grad_norm: 0.9999996366864425, iteration: 20087
loss: 1.0224517583847046,grad_norm: 0.9999992204121637, iteration: 20088
loss: 1.0180401802062988,grad_norm: 0.9999992765545365, iteration: 20089
loss: 1.0819199085235596,grad_norm: 0.9999991729695289, iteration: 20090
loss: 1.0587375164031982,grad_norm: 0.9698096707463135, iteration: 20091
loss: 1.0796865224838257,grad_norm: 1.0000000380714398, iteration: 20092
loss: 1.2341583967208862,grad_norm: 1.0000000286235189, iteration: 20093
loss: 1.014095664024353,grad_norm: 0.9759832376144989, iteration: 20094
loss: 0.9923111200332642,grad_norm: 0.9999991832149244, iteration: 20095
loss: 1.066375732421875,grad_norm: 0.9999996979395273, iteration: 20096
loss: 1.040645718574524,grad_norm: 0.93226268258885, iteration: 20097
loss: 1.030640959739685,grad_norm: 0.9999990707410908, iteration: 20098
loss: 1.3074249029159546,grad_norm: 0.9999999630622838, iteration: 20099
loss: 1.0191642045974731,grad_norm: 0.9999991371306852, iteration: 20100
loss: 1.018502116203308,grad_norm: 0.999999597156431, iteration: 20101
loss: 1.032674789428711,grad_norm: 0.9999991631517204, iteration: 20102
loss: 1.0244715213775635,grad_norm: 0.9999991165652361, iteration: 20103
loss: 0.9716284871101379,grad_norm: 0.8709821819290662, iteration: 20104
loss: 1.0096122026443481,grad_norm: 0.9999993497799836, iteration: 20105
loss: 0.9979298710823059,grad_norm: 0.9999993103435802, iteration: 20106
loss: 1.0756781101226807,grad_norm: 0.9999994701898793, iteration: 20107
loss: 1.0081738233566284,grad_norm: 0.8711958110982596, iteration: 20108
loss: 1.0224683284759521,grad_norm: 0.9999991233489227, iteration: 20109
loss: 1.011246681213379,grad_norm: 0.9645818265765101, iteration: 20110
loss: 1.0323290824890137,grad_norm: 0.9730295917876672, iteration: 20111
loss: 1.061299443244934,grad_norm: 0.9999995600915209, iteration: 20112
loss: 1.075655221939087,grad_norm: 0.9999991894277866, iteration: 20113
loss: 1.0395570993423462,grad_norm: 0.9999997703484218, iteration: 20114
loss: 1.0268745422363281,grad_norm: 0.9999990176687037, iteration: 20115
loss: 1.0168735980987549,grad_norm: 0.9999992205620201, iteration: 20116
loss: 1.0600236654281616,grad_norm: 0.9999998571360502, iteration: 20117
loss: 1.0245918035507202,grad_norm: 0.99999919289457, iteration: 20118
loss: 1.0161988735198975,grad_norm: 0.9999991611058006, iteration: 20119
loss: 1.0729238986968994,grad_norm: 0.9999996437466688, iteration: 20120
loss: 1.1488585472106934,grad_norm: 0.999999594315979, iteration: 20121
loss: 1.0645501613616943,grad_norm: 0.9999998869509731, iteration: 20122
loss: 1.090408444404602,grad_norm: 0.9999995840878099, iteration: 20123
loss: 0.9999789595603943,grad_norm: 0.9999997103907096, iteration: 20124
loss: 1.0026596784591675,grad_norm: 0.9999994359491415, iteration: 20125
loss: 1.0418733358383179,grad_norm: 0.9999993990251146, iteration: 20126
loss: 1.0282357931137085,grad_norm: 0.9999991802139867, iteration: 20127
loss: 1.0375486612319946,grad_norm: 0.9999991147719519, iteration: 20128
loss: 1.0024558305740356,grad_norm: 0.9999992090326366, iteration: 20129
loss: 1.062584400177002,grad_norm: 0.9999995185896255, iteration: 20130
loss: 1.0053523778915405,grad_norm: 0.9999992610090342, iteration: 20131
loss: 1.0410133600234985,grad_norm: 0.9999994327269575, iteration: 20132
loss: 1.0816717147827148,grad_norm: 0.9999994997500811, iteration: 20133
loss: 0.9971434473991394,grad_norm: 0.9196157720974377, iteration: 20134
loss: 1.0284301042556763,grad_norm: 0.9999997138208887, iteration: 20135
loss: 1.1018584966659546,grad_norm: 0.999999767087471, iteration: 20136
loss: 0.9734600782394409,grad_norm: 0.9999992818009262, iteration: 20137
loss: 1.0510263442993164,grad_norm: 0.999999795200943, iteration: 20138
loss: 0.9603743553161621,grad_norm: 0.9999990810789067, iteration: 20139
loss: 1.0070911645889282,grad_norm: 0.9999991323287327, iteration: 20140
loss: 1.120855450630188,grad_norm: 0.9999999790298136, iteration: 20141
loss: 1.0015242099761963,grad_norm: 0.9999993768340723, iteration: 20142
loss: 1.0552589893341064,grad_norm: 0.9999992499889464, iteration: 20143
loss: 1.0247901678085327,grad_norm: 0.9999991713665551, iteration: 20144
loss: 1.0258815288543701,grad_norm: 0.9999992216650464, iteration: 20145
loss: 1.014949083328247,grad_norm: 0.9999990252590565, iteration: 20146
loss: 1.0133566856384277,grad_norm: 0.9999993770060847, iteration: 20147
loss: 0.9892959594726562,grad_norm: 0.9999993625909324, iteration: 20148
loss: 1.0667752027511597,grad_norm: 0.9999994254417, iteration: 20149
loss: 1.0182591676712036,grad_norm: 0.9999994574990501, iteration: 20150
loss: 0.9872525334358215,grad_norm: 0.9999990572502804, iteration: 20151
loss: 0.9964951276779175,grad_norm: 0.977250413296243, iteration: 20152
loss: 1.0009167194366455,grad_norm: 0.9545831855628703, iteration: 20153
loss: 0.9821895360946655,grad_norm: 0.9999994647696416, iteration: 20154
loss: 1.0072684288024902,grad_norm: 0.9999992142964241, iteration: 20155
loss: 1.0442637205123901,grad_norm: 0.999999660831861, iteration: 20156
loss: 1.0138972997665405,grad_norm: 0.9999999576503694, iteration: 20157
loss: 1.0689067840576172,grad_norm: 0.9999994456838909, iteration: 20158
loss: 1.0804909467697144,grad_norm: 0.9999997348720246, iteration: 20159
loss: 0.9703471660614014,grad_norm: 0.999999388273676, iteration: 20160
loss: 0.9943101406097412,grad_norm: 0.9999990356281347, iteration: 20161
loss: 1.0484269857406616,grad_norm: 0.9999993507013993, iteration: 20162
loss: 1.0217112302780151,grad_norm: 0.999999282889949, iteration: 20163
loss: 1.0562543869018555,grad_norm: 0.9999994193566123, iteration: 20164
loss: 1.0235037803649902,grad_norm: 0.9999992255628221, iteration: 20165
loss: 1.0083857774734497,grad_norm: 0.9999996555855988, iteration: 20166
loss: 1.045411229133606,grad_norm: 0.9999991886573852, iteration: 20167
loss: 1.0334086418151855,grad_norm: 0.9999990447536865, iteration: 20168
loss: 1.039575219154358,grad_norm: 0.983159381572069, iteration: 20169
loss: 1.0197418928146362,grad_norm: 0.9999994006739397, iteration: 20170
loss: 1.0324305295944214,grad_norm: 0.9999994975805985, iteration: 20171
loss: 1.1020927429199219,grad_norm: 0.9999996515711487, iteration: 20172
loss: 1.0575635433197021,grad_norm: 0.9999998472776099, iteration: 20173
loss: 0.9864155650138855,grad_norm: 0.9999990729611062, iteration: 20174
loss: 1.0024902820587158,grad_norm: 0.9557954003429388, iteration: 20175
loss: 1.049712896347046,grad_norm: 0.9999991214225576, iteration: 20176
loss: 1.0157512426376343,grad_norm: 0.9999996776954833, iteration: 20177
loss: 0.9961023330688477,grad_norm: 0.9999995560165295, iteration: 20178
loss: 0.975385308265686,grad_norm: 0.9999993275720773, iteration: 20179
loss: 1.047284483909607,grad_norm: 0.9999999770841292, iteration: 20180
loss: 0.9942694306373596,grad_norm: 0.999999301356656, iteration: 20181
loss: 1.0182846784591675,grad_norm: 0.9999992089969617, iteration: 20182
loss: 0.9906603097915649,grad_norm: 0.9999990852229624, iteration: 20183
loss: 1.0312607288360596,grad_norm: 0.9999993908996474, iteration: 20184
loss: 1.0570147037506104,grad_norm: 0.9999997849682422, iteration: 20185
loss: 1.0244321823120117,grad_norm: 0.9586991502135089, iteration: 20186
loss: 1.018994688987732,grad_norm: 0.9999992251250496, iteration: 20187
loss: 0.9796536564826965,grad_norm: 0.9999990774662832, iteration: 20188
loss: 1.0098124742507935,grad_norm: 0.9999990659587581, iteration: 20189
loss: 1.0351722240447998,grad_norm: 0.9999992479642373, iteration: 20190
loss: 1.0103498697280884,grad_norm: 0.9999991993202634, iteration: 20191
loss: 1.048507571220398,grad_norm: 0.9999991432447924, iteration: 20192
loss: 1.0578668117523193,grad_norm: 0.9999996257616561, iteration: 20193
loss: 0.9983140826225281,grad_norm: 0.9999991594933663, iteration: 20194
loss: 1.0650628805160522,grad_norm: 0.9999993257856069, iteration: 20195
loss: 1.0708948373794556,grad_norm: 0.9999991083365493, iteration: 20196
loss: 1.0798590183258057,grad_norm: 0.999999281824247, iteration: 20197
loss: 1.0033588409423828,grad_norm: 0.9999989209505699, iteration: 20198
loss: 0.9961912631988525,grad_norm: 0.9935473214203443, iteration: 20199
loss: 1.0049564838409424,grad_norm: 0.9999991721599573, iteration: 20200
loss: 1.0045710802078247,grad_norm: 0.9999991840824858, iteration: 20201
loss: 0.9816937446594238,grad_norm: 0.9999989081775287, iteration: 20202
loss: 1.0653536319732666,grad_norm: 0.9999990736800736, iteration: 20203
loss: 1.0157510042190552,grad_norm: 0.9999993293944585, iteration: 20204
loss: 1.0518676042556763,grad_norm: 0.9981229964191239, iteration: 20205
loss: 1.0239754915237427,grad_norm: 0.8657233183029016, iteration: 20206
loss: 1.0148162841796875,grad_norm: 0.9999992156746101, iteration: 20207
loss: 1.024570107460022,grad_norm: 0.9729024030793609, iteration: 20208
loss: 0.9882888197898865,grad_norm: 0.9864147790337057, iteration: 20209
loss: 1.0016049146652222,grad_norm: 0.9613701024295646, iteration: 20210
loss: 0.9853436350822449,grad_norm: 0.9999996488331065, iteration: 20211
loss: 1.0465142726898193,grad_norm: 0.9999996029337541, iteration: 20212
loss: 1.018520474433899,grad_norm: 0.9999992345619594, iteration: 20213
loss: 0.9819698333740234,grad_norm: 0.9999995470271851, iteration: 20214
loss: 1.0166385173797607,grad_norm: 0.9999992164295514, iteration: 20215
loss: 0.9765217304229736,grad_norm: 0.956061703292561, iteration: 20216
loss: 1.0378795862197876,grad_norm: 0.8509608772024475, iteration: 20217
loss: 0.9976218938827515,grad_norm: 0.9785283740381474, iteration: 20218
loss: 0.9845498204231262,grad_norm: 0.999999327258296, iteration: 20219
loss: 0.9805863499641418,grad_norm: 0.9999992560112403, iteration: 20220
loss: 0.9915261268615723,grad_norm: 0.9999989888384679, iteration: 20221
loss: 1.0193730592727661,grad_norm: 0.9999991406476751, iteration: 20222
loss: 1.0002597570419312,grad_norm: 0.99999907623446, iteration: 20223
loss: 1.024514079093933,grad_norm: 0.9999989995394489, iteration: 20224
loss: 1.0505748987197876,grad_norm: 0.9999993690901035, iteration: 20225
loss: 1.001874566078186,grad_norm: 0.9999990286438178, iteration: 20226
loss: 0.9910405874252319,grad_norm: 0.9999990948771141, iteration: 20227
loss: 1.0204061269760132,grad_norm: 0.9999992803831494, iteration: 20228
loss: 1.0579676628112793,grad_norm: 0.9999990990431277, iteration: 20229
loss: 0.999744713306427,grad_norm: 0.9999991181439741, iteration: 20230
loss: 1.0054436922073364,grad_norm: 0.9999991655765409, iteration: 20231
loss: 0.9938483238220215,grad_norm: 0.9999990262870763, iteration: 20232
loss: 1.040130615234375,grad_norm: 0.9999991903272142, iteration: 20233
loss: 1.0502277612686157,grad_norm: 0.9999990626673375, iteration: 20234
loss: 0.9820076823234558,grad_norm: 0.9999992732507215, iteration: 20235
loss: 1.004844307899475,grad_norm: 0.9999991546878717, iteration: 20236
loss: 1.0112011432647705,grad_norm: 0.9999992766333055, iteration: 20237
loss: 0.987966775894165,grad_norm: 0.9999991457945998, iteration: 20238
loss: 1.0013595819473267,grad_norm: 0.9999992046035379, iteration: 20239
loss: 1.0250154733657837,grad_norm: 0.9013314030842594, iteration: 20240
loss: 1.0816200971603394,grad_norm: 0.999999826712301, iteration: 20241
loss: 1.0150145292282104,grad_norm: 0.9999990747734462, iteration: 20242
loss: 0.9611275792121887,grad_norm: 0.9999992251971088, iteration: 20243
loss: 1.031130075454712,grad_norm: 0.999999126021095, iteration: 20244
loss: 1.0386371612548828,grad_norm: 0.9999994809520408, iteration: 20245
loss: 1.0482330322265625,grad_norm: 0.9999990656307597, iteration: 20246
loss: 1.214067816734314,grad_norm: 0.999999922248096, iteration: 20247
loss: 1.0207751989364624,grad_norm: 0.997225410710768, iteration: 20248
loss: 1.039582371711731,grad_norm: 0.9999992217514836, iteration: 20249
loss: 0.989990770816803,grad_norm: 0.8916837890265457, iteration: 20250
loss: 1.080981731414795,grad_norm: 0.9999991547441999, iteration: 20251
loss: 1.0307379961013794,grad_norm: 0.9999996387632228, iteration: 20252
loss: 1.1154682636260986,grad_norm: 0.999999625971437, iteration: 20253
loss: 1.0074834823608398,grad_norm: 0.9999991313193584, iteration: 20254
loss: 1.0134644508361816,grad_norm: 0.9999990528323602, iteration: 20255
loss: 1.0608928203582764,grad_norm: 0.9999993159896582, iteration: 20256
loss: 1.0609947443008423,grad_norm: 0.9812151135847494, iteration: 20257
loss: 1.0361974239349365,grad_norm: 0.9999995905887243, iteration: 20258
loss: 1.0474377870559692,grad_norm: 0.9999993654336652, iteration: 20259
loss: 1.0197243690490723,grad_norm: 0.9999992855309947, iteration: 20260
loss: 0.9924722909927368,grad_norm: 0.9999990249843381, iteration: 20261
loss: 1.0103319883346558,grad_norm: 0.8925444748158633, iteration: 20262
loss: 1.0148990154266357,grad_norm: 0.9856637849220807, iteration: 20263
loss: 1.0216609239578247,grad_norm: 0.9999992079768947, iteration: 20264
loss: 1.035449504852295,grad_norm: 0.9999993036479637, iteration: 20265
loss: 1.0884819030761719,grad_norm: 0.9999993707760741, iteration: 20266
loss: 1.0397570133209229,grad_norm: 0.9999991232877574, iteration: 20267
loss: 1.0156826972961426,grad_norm: 0.9999995250420455, iteration: 20268
loss: 1.0320769548416138,grad_norm: 0.8418423938841206, iteration: 20269
loss: 1.06289541721344,grad_norm: 0.9999990829226452, iteration: 20270
loss: 1.0338430404663086,grad_norm: 0.9999997940320964, iteration: 20271
loss: 1.0560705661773682,grad_norm: 0.9999995305809646, iteration: 20272
loss: 0.9919944405555725,grad_norm: 0.9564395383277664, iteration: 20273
loss: 1.0274416208267212,grad_norm: 0.9999991078807904, iteration: 20274
loss: 1.0387489795684814,grad_norm: 0.9999989978404651, iteration: 20275
loss: 1.0703108310699463,grad_norm: 0.9999997982301666, iteration: 20276
loss: 0.9753934144973755,grad_norm: 0.9999990121666468, iteration: 20277
loss: 1.0115830898284912,grad_norm: 0.9999991381030886, iteration: 20278
loss: 1.0336345434188843,grad_norm: 0.9999991993944914, iteration: 20279
loss: 0.994665801525116,grad_norm: 0.9781765496185754, iteration: 20280
loss: 1.0117807388305664,grad_norm: 0.9999991825588047, iteration: 20281
loss: 1.0337921380996704,grad_norm: 0.9999991150016091, iteration: 20282
loss: 0.9976224303245544,grad_norm: 0.8518097764003677, iteration: 20283
loss: 1.058437466621399,grad_norm: 0.9999992536129035, iteration: 20284
loss: 1.0112781524658203,grad_norm: 0.9999991240681604, iteration: 20285
loss: 1.0226678848266602,grad_norm: 0.9999991080921777, iteration: 20286
loss: 1.0235222578048706,grad_norm: 0.9378466072450432, iteration: 20287
loss: 1.016167402267456,grad_norm: 0.9325200319987916, iteration: 20288
loss: 0.9977720379829407,grad_norm: 0.9999991263191874, iteration: 20289
loss: 0.9795572757720947,grad_norm: 0.9999991586860802, iteration: 20290
loss: 1.0067918300628662,grad_norm: 0.9999989771759247, iteration: 20291
loss: 1.0637941360473633,grad_norm: 0.9999997294260191, iteration: 20292
loss: 0.9615533351898193,grad_norm: 0.9999992514057849, iteration: 20293
loss: 1.0302320718765259,grad_norm: 0.9598934669441607, iteration: 20294
loss: 1.000327229499817,grad_norm: 0.9999992055083925, iteration: 20295
loss: 1.0062834024429321,grad_norm: 0.9999991794660135, iteration: 20296
loss: 1.0343961715698242,grad_norm: 0.9520104998472482, iteration: 20297
loss: 1.0487555265426636,grad_norm: 0.999999146525541, iteration: 20298
loss: 1.0322165489196777,grad_norm: 0.9911958213746879, iteration: 20299
loss: 1.0114151239395142,grad_norm: 0.9999992090464986, iteration: 20300
loss: 1.0903970003128052,grad_norm: 0.999999732864025, iteration: 20301
loss: 1.0025032758712769,grad_norm: 0.9999990968814996, iteration: 20302
loss: 1.132971167564392,grad_norm: 0.9999997516219662, iteration: 20303
loss: 1.0621731281280518,grad_norm: 0.9999996096480978, iteration: 20304
loss: 1.0614030361175537,grad_norm: 0.9999993855221935, iteration: 20305
loss: 1.0445328950881958,grad_norm: 0.999999131601456, iteration: 20306
loss: 1.0851858854293823,grad_norm: 0.9999992640303221, iteration: 20307
loss: 1.018202543258667,grad_norm: 0.9999991569831983, iteration: 20308
loss: 1.013868808746338,grad_norm: 0.9999991528892342, iteration: 20309
loss: 0.9966456890106201,grad_norm: 0.9343263266634577, iteration: 20310
loss: 1.0047441720962524,grad_norm: 0.9999990372157277, iteration: 20311
loss: 1.059527039527893,grad_norm: 0.999999596326197, iteration: 20312
loss: 1.0247423648834229,grad_norm: 0.9999992303447862, iteration: 20313
loss: 1.0182814598083496,grad_norm: 0.999999264929359, iteration: 20314
loss: 1.0051697492599487,grad_norm: 0.9999991998691348, iteration: 20315
loss: 0.9724282026290894,grad_norm: 0.9492899955974342, iteration: 20316
loss: 1.0217214822769165,grad_norm: 0.9999990917591681, iteration: 20317
loss: 1.0018407106399536,grad_norm: 0.9728475543100497, iteration: 20318
loss: 1.0094114542007446,grad_norm: 0.9999989557850666, iteration: 20319
loss: 1.0501418113708496,grad_norm: 0.9999994847843389, iteration: 20320
loss: 0.9843457341194153,grad_norm: 0.9999990009491687, iteration: 20321
loss: 1.007971167564392,grad_norm: 0.9783622973830826, iteration: 20322
loss: 1.0201380252838135,grad_norm: 0.99068760886773, iteration: 20323
loss: 1.0277165174484253,grad_norm: 0.8684788174324618, iteration: 20324
loss: 1.0231083631515503,grad_norm: 0.9999993338604906, iteration: 20325
loss: 1.0262600183486938,grad_norm: 0.9999990878860419, iteration: 20326
loss: 1.0347768068313599,grad_norm: 0.9999991072233456, iteration: 20327
loss: 1.0413293838500977,grad_norm: 0.9518177376143919, iteration: 20328
loss: 1.0159039497375488,grad_norm: 0.9999990785085902, iteration: 20329
loss: 1.0164999961853027,grad_norm: 0.9999997711786546, iteration: 20330
loss: 1.1060360670089722,grad_norm: 0.9999998831758236, iteration: 20331
loss: 0.9783835411071777,grad_norm: 0.9999994793376916, iteration: 20332
loss: 0.9922116994857788,grad_norm: 0.9999991148610121, iteration: 20333
loss: 1.0044735670089722,grad_norm: 0.9999993445067722, iteration: 20334
loss: 0.9835124015808105,grad_norm: 0.9552008996869473, iteration: 20335
loss: 1.005053162574768,grad_norm: 0.9999991095011451, iteration: 20336
loss: 1.0173925161361694,grad_norm: 0.9999991028812751, iteration: 20337
loss: 1.0322104692459106,grad_norm: 0.9999989557503305, iteration: 20338
loss: 1.0241296291351318,grad_norm: 0.9999990655269902, iteration: 20339
loss: 1.041649580001831,grad_norm: 0.999999477028328, iteration: 20340
loss: 1.04781174659729,grad_norm: 0.9999996887359162, iteration: 20341
loss: 1.0134308338165283,grad_norm: 0.999999159919409, iteration: 20342
loss: 1.0227850675582886,grad_norm: 0.9320426333185109, iteration: 20343
loss: 0.9756280183792114,grad_norm: 0.9999992883666984, iteration: 20344
loss: 1.0500057935714722,grad_norm: 0.9999992969129516, iteration: 20345
loss: 1.142063856124878,grad_norm: 0.999999650485163, iteration: 20346
loss: 0.9884356260299683,grad_norm: 0.9999991120742635, iteration: 20347
loss: 1.0983420610427856,grad_norm: 0.9999991583147418, iteration: 20348
loss: 0.9959825873374939,grad_norm: 0.9999991919957848, iteration: 20349
loss: 1.0450886487960815,grad_norm: 0.999999101595475, iteration: 20350
loss: 1.0188325643539429,grad_norm: 0.9999993503922945, iteration: 20351
loss: 1.054146409034729,grad_norm: 0.9999990958123619, iteration: 20352
loss: 1.0079302787780762,grad_norm: 0.9999990992238711, iteration: 20353
loss: 1.035778284072876,grad_norm: 0.9999991215702205, iteration: 20354
loss: 1.0025185346603394,grad_norm: 0.9999992998227778, iteration: 20355
loss: 1.0356718301773071,grad_norm: 0.986664658206766, iteration: 20356
loss: 0.9586666822433472,grad_norm: 0.9999996408784698, iteration: 20357
loss: 1.083901047706604,grad_norm: 0.9999994195643757, iteration: 20358
loss: 1.0091373920440674,grad_norm: 0.9999992560059855, iteration: 20359
loss: 1.0155662298202515,grad_norm: 0.9999990894850077, iteration: 20360
loss: 1.0078181028366089,grad_norm: 0.999999123541866, iteration: 20361
loss: 1.0367424488067627,grad_norm: 0.9999996756082835, iteration: 20362
loss: 0.9844204187393188,grad_norm: 0.9276695393548321, iteration: 20363
loss: 1.0092304944992065,grad_norm: 0.9703794402638638, iteration: 20364
loss: 1.0162601470947266,grad_norm: 0.9999997622874778, iteration: 20365
loss: 1.026353120803833,grad_norm: 0.9999998328811872, iteration: 20366
loss: 1.037779688835144,grad_norm: 0.9999997197881578, iteration: 20367
loss: 1.000494360923767,grad_norm: 0.9999991513999118, iteration: 20368
loss: 1.090935468673706,grad_norm: 0.9999998188873042, iteration: 20369
loss: 0.9802666306495667,grad_norm: 0.9999991466941177, iteration: 20370
loss: 1.015316367149353,grad_norm: 0.9999993507873565, iteration: 20371
loss: 0.9782090187072754,grad_norm: 0.9528776620588608, iteration: 20372
loss: 1.065608024597168,grad_norm: 0.9999993072883223, iteration: 20373
loss: 1.0158723592758179,grad_norm: 0.9498548214437361, iteration: 20374
loss: 0.9958319067955017,grad_norm: 0.9918130902659871, iteration: 20375
loss: 1.053263783454895,grad_norm: 0.9999992101287049, iteration: 20376
loss: 1.0401358604431152,grad_norm: 0.952627446864044, iteration: 20377
loss: 1.058924674987793,grad_norm: 0.9999994637727496, iteration: 20378
loss: 1.0865466594696045,grad_norm: 0.9999992280433984, iteration: 20379
loss: 1.0108568668365479,grad_norm: 0.9999991214145187, iteration: 20380
loss: 1.0349456071853638,grad_norm: 0.9999999290578805, iteration: 20381
loss: 0.9638283848762512,grad_norm: 0.99999915494683, iteration: 20382
loss: 1.015538215637207,grad_norm: 0.9999994659524475, iteration: 20383
loss: 1.031363844871521,grad_norm: 0.9999995486878481, iteration: 20384
loss: 1.0132251977920532,grad_norm: 0.9999998031517445, iteration: 20385
loss: 1.0373106002807617,grad_norm: 0.9999991907211828, iteration: 20386
loss: 1.0205113887786865,grad_norm: 0.881438653056926, iteration: 20387
loss: 1.015822410583496,grad_norm: 0.9999991590852713, iteration: 20388
loss: 1.0023123025894165,grad_norm: 0.9393277366260246, iteration: 20389
loss: 1.035178303718567,grad_norm: 0.9999993426977516, iteration: 20390
loss: 1.0214042663574219,grad_norm: 0.9999992627147116, iteration: 20391
loss: 1.041401982307434,grad_norm: 0.9999997523805292, iteration: 20392
loss: 1.0495120286941528,grad_norm: 0.999999079091663, iteration: 20393
loss: 1.094795823097229,grad_norm: 0.9999997349915162, iteration: 20394
loss: 1.140200138092041,grad_norm: 0.9999996062827724, iteration: 20395
loss: 1.1774189472198486,grad_norm: 0.9999997856504539, iteration: 20396
loss: 0.9955359101295471,grad_norm: 0.9999991692819883, iteration: 20397
loss: 1.010756254196167,grad_norm: 0.9226545753033929, iteration: 20398
loss: 1.0498734712600708,grad_norm: 0.9999993906866512, iteration: 20399
loss: 1.0193665027618408,grad_norm: 0.9999989534479305, iteration: 20400
loss: 1.032999038696289,grad_norm: 0.9999997454408673, iteration: 20401
loss: 1.057066798210144,grad_norm: 0.9999997782331281, iteration: 20402
loss: 1.0064244270324707,grad_norm: 0.9999994811368836, iteration: 20403
loss: 1.0271023511886597,grad_norm: 0.9999991945083735, iteration: 20404
loss: 1.0136693716049194,grad_norm: 0.9999994445021279, iteration: 20405
loss: 1.0213598012924194,grad_norm: 0.999998992200872, iteration: 20406
loss: 1.0168582201004028,grad_norm: 0.927082978705681, iteration: 20407
loss: 1.0177065134048462,grad_norm: 0.8855975931398264, iteration: 20408
loss: 1.00203537940979,grad_norm: 0.9999990870650693, iteration: 20409
loss: 1.0509799718856812,grad_norm: 0.9503700597136064, iteration: 20410
loss: 1.019737958908081,grad_norm: 0.9999993429377537, iteration: 20411
loss: 1.030014157295227,grad_norm: 0.9999991421841073, iteration: 20412
loss: 1.008732795715332,grad_norm: 0.9999990193249672, iteration: 20413
loss: 1.045238733291626,grad_norm: 0.9999994790602874, iteration: 20414
loss: 0.9960092306137085,grad_norm: 0.9999991011961152, iteration: 20415
loss: 1.0087000131607056,grad_norm: 0.9999991974909486, iteration: 20416
loss: 1.0284689664840698,grad_norm: 0.9999993462948836, iteration: 20417
loss: 0.9960632920265198,grad_norm: 0.9582879877272373, iteration: 20418
loss: 1.0250449180603027,grad_norm: 0.9999991995598742, iteration: 20419
loss: 1.021917462348938,grad_norm: 0.999999219144105, iteration: 20420
loss: 1.0121961832046509,grad_norm: 0.9999993727569092, iteration: 20421
loss: 1.016050100326538,grad_norm: 0.99999903800599, iteration: 20422
loss: 1.0095593929290771,grad_norm: 0.8120249308926816, iteration: 20423
loss: 1.0935479402542114,grad_norm: 0.9999991941368679, iteration: 20424
loss: 1.1296334266662598,grad_norm: 0.9999993617044642, iteration: 20425
loss: 1.172329306602478,grad_norm: 0.9999997235809687, iteration: 20426
loss: 1.0012638568878174,grad_norm: 0.9999991672517831, iteration: 20427
loss: 1.0201385021209717,grad_norm: 0.8753310678883952, iteration: 20428
loss: 1.0067317485809326,grad_norm: 0.999999009006148, iteration: 20429
loss: 1.0669533014297485,grad_norm: 0.9999989612826178, iteration: 20430
loss: 1.0414608716964722,grad_norm: 0.9999994117557423, iteration: 20431
loss: 1.0015019178390503,grad_norm: 0.9999994898107247, iteration: 20432
loss: 1.03736412525177,grad_norm: 0.9999990615597116, iteration: 20433
loss: 1.0242167711257935,grad_norm: 0.9999993344191758, iteration: 20434
loss: 1.0601234436035156,grad_norm: 0.9671717992570712, iteration: 20435
loss: 1.0063363313674927,grad_norm: 0.9999990576696097, iteration: 20436
loss: 1.0025607347488403,grad_norm: 0.9874822931751179, iteration: 20437
loss: 1.0326470136642456,grad_norm: 0.999999141028252, iteration: 20438
loss: 1.0197393894195557,grad_norm: 0.9999994335115542, iteration: 20439
loss: 1.0340524911880493,grad_norm: 0.9999990300771311, iteration: 20440
loss: 1.0589101314544678,grad_norm: 0.9999996278784506, iteration: 20441
loss: 1.057697057723999,grad_norm: 0.9999994708318807, iteration: 20442
loss: 0.9967692494392395,grad_norm: 0.9999991329009099, iteration: 20443
loss: 1.0347143411636353,grad_norm: 0.9999992615189379, iteration: 20444
loss: 1.00128173828125,grad_norm: 0.9393361485838815, iteration: 20445
loss: 1.0351283550262451,grad_norm: 0.9999989666504701, iteration: 20446
loss: 1.0746275186538696,grad_norm: 0.999999236631672, iteration: 20447
loss: 1.0341185331344604,grad_norm: 0.999999162937053, iteration: 20448
loss: 1.0834225416183472,grad_norm: 0.9999995462515794, iteration: 20449
loss: 1.0480183362960815,grad_norm: 0.9999992932127639, iteration: 20450
loss: 0.994874894618988,grad_norm: 0.999999161716707, iteration: 20451
loss: 1.0297858715057373,grad_norm: 0.9869838270507179, iteration: 20452
loss: 0.9981721043586731,grad_norm: 0.999999191009944, iteration: 20453
loss: 1.0135184526443481,grad_norm: 0.999999206405407, iteration: 20454
loss: 1.0306403636932373,grad_norm: 0.9999991190777009, iteration: 20455
loss: 1.0301454067230225,grad_norm: 0.999998999388071, iteration: 20456
loss: 1.0578428506851196,grad_norm: 0.9863061513996872, iteration: 20457
loss: 1.0244423151016235,grad_norm: 0.9999990081997986, iteration: 20458
loss: 1.043739676475525,grad_norm: 0.9999994674072834, iteration: 20459
loss: 0.9639728665351868,grad_norm: 0.9999991421941355, iteration: 20460
loss: 1.0206810235977173,grad_norm: 0.9778072833827953, iteration: 20461
loss: 1.0029535293579102,grad_norm: 0.9999991362898804, iteration: 20462
loss: 1.0428438186645508,grad_norm: 0.9999992305275293, iteration: 20463
loss: 1.0096596479415894,grad_norm: 0.9999990967651516, iteration: 20464
loss: 0.9895637035369873,grad_norm: 0.9999992475564028, iteration: 20465
loss: 1.0382214784622192,grad_norm: 0.9999990369190743, iteration: 20466
loss: 1.0337423086166382,grad_norm: 0.9999990123342828, iteration: 20467
loss: 1.0207819938659668,grad_norm: 0.9361427886845785, iteration: 20468
loss: 0.9828693866729736,grad_norm: 0.9999993742281427, iteration: 20469
loss: 1.000352144241333,grad_norm: 0.9999990229238276, iteration: 20470
loss: 1.0105571746826172,grad_norm: 0.9999991143304701, iteration: 20471
loss: 1.0332518815994263,grad_norm: 0.9999992069719718, iteration: 20472
loss: 1.0230790376663208,grad_norm: 0.9999990181337945, iteration: 20473
loss: 1.0515880584716797,grad_norm: 0.9650589892101126, iteration: 20474
loss: 1.0263022184371948,grad_norm: 0.999999228675169, iteration: 20475
loss: 1.0135968923568726,grad_norm: 0.9999993342202658, iteration: 20476
loss: 1.013472080230713,grad_norm: 0.9999993457283602, iteration: 20477
loss: 1.0324208736419678,grad_norm: 0.9166595588798878, iteration: 20478
loss: 1.0470417737960815,grad_norm: 0.9999992676070564, iteration: 20479
loss: 1.0176180601119995,grad_norm: 0.999999294904937, iteration: 20480
loss: 1.0401242971420288,grad_norm: 0.9999996500204638, iteration: 20481
loss: 1.0198838710784912,grad_norm: 0.9999994507561369, iteration: 20482
loss: 1.0401883125305176,grad_norm: 0.9999991617122688, iteration: 20483
loss: 1.019039273262024,grad_norm: 0.9999997895847048, iteration: 20484
loss: 1.0597045421600342,grad_norm: 0.999999496099185, iteration: 20485
loss: 1.0303417444229126,grad_norm: 0.9288378701750212, iteration: 20486
loss: 0.9887450933456421,grad_norm: 0.9999990841035451, iteration: 20487
loss: 1.0019978284835815,grad_norm: 0.9999992000260919, iteration: 20488
loss: 1.056774616241455,grad_norm: 0.9999990984397791, iteration: 20489
loss: 1.0417619943618774,grad_norm: 0.999999489654516, iteration: 20490
loss: 1.0174031257629395,grad_norm: 0.9332311923189909, iteration: 20491
loss: 1.053939938545227,grad_norm: 0.999999232294966, iteration: 20492
loss: 1.0638798475265503,grad_norm: 0.9999994436559445, iteration: 20493
loss: 0.9995774626731873,grad_norm: 0.9999989951071698, iteration: 20494
loss: 1.0403684377670288,grad_norm: 0.9999990536179779, iteration: 20495
loss: 0.9809361100196838,grad_norm: 0.9999991957678611, iteration: 20496
loss: 1.030678629875183,grad_norm: 0.9999992058149276, iteration: 20497
loss: 1.0166373252868652,grad_norm: 0.9999991775908946, iteration: 20498
loss: 0.9878820776939392,grad_norm: 0.9598852154635944, iteration: 20499
loss: 1.0463720560073853,grad_norm: 0.9999996378686372, iteration: 20500
loss: 1.0219804048538208,grad_norm: 0.9999992943510171, iteration: 20501
loss: 1.007202386856079,grad_norm: 0.9999995689408782, iteration: 20502
loss: 1.000332236289978,grad_norm: 0.9960164584216259, iteration: 20503
loss: 1.0184942483901978,grad_norm: 0.9999991733216196, iteration: 20504
loss: 1.0546506643295288,grad_norm: 0.9999992374162967, iteration: 20505
loss: 1.0654832124710083,grad_norm: 0.9999997446420171, iteration: 20506
loss: 1.0346037149429321,grad_norm: 0.9999990677424947, iteration: 20507
loss: 1.0342649221420288,grad_norm: 0.9999991063765731, iteration: 20508
loss: 1.0188795328140259,grad_norm: 0.9999992508615498, iteration: 20509
loss: 1.133916974067688,grad_norm: 0.9999999404711352, iteration: 20510
loss: 1.0315231084823608,grad_norm: 0.9505755578536333, iteration: 20511
loss: 0.9846646189689636,grad_norm: 0.9999990618370646, iteration: 20512
loss: 1.0103569030761719,grad_norm: 0.9999993029640819, iteration: 20513
loss: 1.029584527015686,grad_norm: 0.999999567507932, iteration: 20514
loss: 0.9911208748817444,grad_norm: 0.9999991013858155, iteration: 20515
loss: 1.0486255884170532,grad_norm: 0.9999994057804004, iteration: 20516
loss: 1.0539203882217407,grad_norm: 0.9999996417063514, iteration: 20517
loss: 0.9830343723297119,grad_norm: 0.854687796906766, iteration: 20518
loss: 0.9627830982208252,grad_norm: 0.9999992192119842, iteration: 20519
loss: 1.0155997276306152,grad_norm: 0.9999990273658965, iteration: 20520
loss: 1.0338237285614014,grad_norm: 0.9999994321780988, iteration: 20521
loss: 1.0520119667053223,grad_norm: 0.9999992210341256, iteration: 20522
loss: 1.0307366847991943,grad_norm: 0.9999995989128116, iteration: 20523
loss: 0.9822528958320618,grad_norm: 0.9999991602856387, iteration: 20524
loss: 1.0249857902526855,grad_norm: 0.999999611277095, iteration: 20525
loss: 0.9898995161056519,grad_norm: 0.9999993693181752, iteration: 20526
loss: 1.0252013206481934,grad_norm: 0.9999992574098939, iteration: 20527
loss: 1.0474971532821655,grad_norm: 0.999999450150768, iteration: 20528
loss: 1.0098249912261963,grad_norm: 0.9999991873418999, iteration: 20529
loss: 1.0239036083221436,grad_norm: 0.9875920060806299, iteration: 20530
loss: 1.0423935651779175,grad_norm: 0.9999995235262915, iteration: 20531
loss: 1.0080026388168335,grad_norm: 0.9999997856892514, iteration: 20532
loss: 0.9992222189903259,grad_norm: 0.9999989838373009, iteration: 20533
loss: 1.0100643634796143,grad_norm: 0.9999990607160005, iteration: 20534
loss: 0.9835362434387207,grad_norm: 0.9999992556965841, iteration: 20535
loss: 1.0368118286132812,grad_norm: 0.999999746994302, iteration: 20536
loss: 1.034670352935791,grad_norm: 0.9999995683109744, iteration: 20537
loss: 1.0693678855895996,grad_norm: 0.9999993449151726, iteration: 20538
loss: 1.0072158575057983,grad_norm: 0.9999994848331049, iteration: 20539
loss: 0.9585784077644348,grad_norm: 0.9999991979693469, iteration: 20540
loss: 0.9900702834129333,grad_norm: 0.9999992949682837, iteration: 20541
loss: 0.9920343160629272,grad_norm: 0.8714009641624216, iteration: 20542
loss: 0.9770833253860474,grad_norm: 0.9999991584355126, iteration: 20543
loss: 1.083095908164978,grad_norm: 0.9999994237287722, iteration: 20544
loss: 1.0196973085403442,grad_norm: 0.999999281562563, iteration: 20545
loss: 0.9916850924491882,grad_norm: 0.999999139692604, iteration: 20546
loss: 1.0637646913528442,grad_norm: 0.9999996184732877, iteration: 20547
loss: 1.0135911703109741,grad_norm: 0.9999992462106598, iteration: 20548
loss: 1.000118613243103,grad_norm: 0.999999323032805, iteration: 20549
loss: 1.0268744230270386,grad_norm: 0.9999991706440419, iteration: 20550
loss: 1.0339704751968384,grad_norm: 0.927295315165434, iteration: 20551
loss: 1.0176880359649658,grad_norm: 0.9999993284266294, iteration: 20552
loss: 0.992493748664856,grad_norm: 0.9999990975338445, iteration: 20553
loss: 1.0120893716812134,grad_norm: 0.9781167766027172, iteration: 20554
loss: 0.9677462577819824,grad_norm: 0.8919080938356877, iteration: 20555
loss: 1.0272583961486816,grad_norm: 0.9999989532001291, iteration: 20556
loss: 1.002442479133606,grad_norm: 0.9999993339141953, iteration: 20557
loss: 1.0395134687423706,grad_norm: 0.9999996105052963, iteration: 20558
loss: 1.0400129556655884,grad_norm: 0.9999998264782833, iteration: 20559
loss: 1.2256338596343994,grad_norm: 0.999999697685187, iteration: 20560
loss: 1.012087345123291,grad_norm: 0.9999993632573023, iteration: 20561
loss: 1.002416729927063,grad_norm: 0.9999991648590716, iteration: 20562
loss: 1.0376884937286377,grad_norm: 0.999999087587629, iteration: 20563
loss: 1.053214192390442,grad_norm: 0.9999991159818703, iteration: 20564
loss: 1.0650887489318848,grad_norm: 0.9999997599615171, iteration: 20565
loss: 1.0287266969680786,grad_norm: 0.9583413212782146, iteration: 20566
loss: 1.0255447626113892,grad_norm: 0.9999995844951367, iteration: 20567
loss: 1.0174254179000854,grad_norm: 0.999999432782899, iteration: 20568
loss: 1.0472861528396606,grad_norm: 0.9999990821688298, iteration: 20569
loss: 1.0128494501113892,grad_norm: 0.9999990260672845, iteration: 20570
loss: 1.0353695154190063,grad_norm: 0.9999992312657436, iteration: 20571
loss: 1.0308990478515625,grad_norm: 0.9999992667362834, iteration: 20572
loss: 0.9922603368759155,grad_norm: 0.9999992273788647, iteration: 20573
loss: 1.036346197128296,grad_norm: 0.9999992950357672, iteration: 20574
loss: 0.9964327812194824,grad_norm: 0.9999991533617741, iteration: 20575
loss: 1.010504126548767,grad_norm: 0.9180615305549621, iteration: 20576
loss: 0.987544059753418,grad_norm: 0.9999996169237942, iteration: 20577
loss: 1.0235354900360107,grad_norm: 0.999999058652488, iteration: 20578
loss: 1.0267398357391357,grad_norm: 0.9999989373282088, iteration: 20579
loss: 1.115025281906128,grad_norm: 0.9999997528054595, iteration: 20580
loss: 1.1545841693878174,grad_norm: 0.9999994564197534, iteration: 20581
loss: 1.0424354076385498,grad_norm: 0.9999992498492705, iteration: 20582
loss: 0.9786868095397949,grad_norm: 0.9999990913975474, iteration: 20583
loss: 0.996372401714325,grad_norm: 0.999999257129159, iteration: 20584
loss: 0.9925501942634583,grad_norm: 0.999999073271498, iteration: 20585
loss: 0.9807668924331665,grad_norm: 0.9999991582745605, iteration: 20586
loss: 1.0562547445297241,grad_norm: 0.9999991994019685, iteration: 20587
loss: 1.0161319971084595,grad_norm: 0.9992151523042636, iteration: 20588
loss: 1.0503182411193848,grad_norm: 0.9999996629910736, iteration: 20589
loss: 1.0284373760223389,grad_norm: 0.9999992226421383, iteration: 20590
loss: 1.0162934064865112,grad_norm: 0.9999992965986197, iteration: 20591
loss: 1.0163861513137817,grad_norm: 0.9999991120420549, iteration: 20592
loss: 1.036890983581543,grad_norm: 0.934815820303948, iteration: 20593
loss: 1.0501295328140259,grad_norm: 0.9999993371793927, iteration: 20594
loss: 1.0538198947906494,grad_norm: 0.9999992570674051, iteration: 20595
loss: 1.0215027332305908,grad_norm: 0.9999991637000828, iteration: 20596
loss: 1.0353124141693115,grad_norm: 0.9999991348853372, iteration: 20597
loss: 1.0105478763580322,grad_norm: 0.9999991511810559, iteration: 20598
loss: 1.0125467777252197,grad_norm: 0.9999993094486586, iteration: 20599
loss: 1.0381790399551392,grad_norm: 0.9999993681018069, iteration: 20600
loss: 1.0761127471923828,grad_norm: 0.9999994222668075, iteration: 20601
loss: 1.0182087421417236,grad_norm: 0.9999990643307742, iteration: 20602
loss: 0.9419761896133423,grad_norm: 0.9999992226516766, iteration: 20603
loss: 1.0292940139770508,grad_norm: 0.9999991958106259, iteration: 20604
loss: 0.9968365430831909,grad_norm: 0.8467105190265959, iteration: 20605
loss: 1.031116247177124,grad_norm: 0.9999995505129936, iteration: 20606
loss: 1.0007308721542358,grad_norm: 0.9122038003506194, iteration: 20607
loss: 1.0238655805587769,grad_norm: 0.999999223156376, iteration: 20608
loss: 0.9796386361122131,grad_norm: 0.9999993405006231, iteration: 20609
loss: 1.0135587453842163,grad_norm: 0.9999990156913233, iteration: 20610
loss: 0.9901174306869507,grad_norm: 0.9999993601324689, iteration: 20611
loss: 1.0057735443115234,grad_norm: 0.9999990948189105, iteration: 20612
loss: 0.9942206144332886,grad_norm: 0.9934925960368114, iteration: 20613
loss: 0.9741007685661316,grad_norm: 0.9999990780905982, iteration: 20614
loss: 1.0318427085876465,grad_norm: 0.9999990863078366, iteration: 20615
loss: 1.0205830335617065,grad_norm: 0.9919311685384932, iteration: 20616
loss: 0.984665036201477,grad_norm: 0.9999990568507608, iteration: 20617
loss: 1.0254472494125366,grad_norm: 0.9041500362841813, iteration: 20618
loss: 1.0034527778625488,grad_norm: 0.9999992680339067, iteration: 20619
loss: 0.9968016147613525,grad_norm: 0.9999993721704219, iteration: 20620
loss: 0.9815643429756165,grad_norm: 0.9566976484412867, iteration: 20621
loss: 0.997538149356842,grad_norm: 0.9654199901635658, iteration: 20622
loss: 0.9812975525856018,grad_norm: 0.9999990301958482, iteration: 20623
loss: 1.0160008668899536,grad_norm: 0.9999991037843012, iteration: 20624
loss: 1.0360069274902344,grad_norm: 0.9999990715829283, iteration: 20625
loss: 0.9965888261795044,grad_norm: 0.9999992466663927, iteration: 20626
loss: 1.0233848094940186,grad_norm: 0.9481914808484827, iteration: 20627
loss: 0.9955905675888062,grad_norm: 0.9999991861415042, iteration: 20628
loss: 1.012664556503296,grad_norm: 0.9999992145545522, iteration: 20629
loss: 1.0360596179962158,grad_norm: 0.9999991821602905, iteration: 20630
loss: 1.038448452949524,grad_norm: 0.9999991653399115, iteration: 20631
loss: 1.0249170064926147,grad_norm: 0.9999991531456333, iteration: 20632
loss: 1.0118451118469238,grad_norm: 0.9999992360052704, iteration: 20633
loss: 1.0688093900680542,grad_norm: 0.9999995869342476, iteration: 20634
loss: 0.9762338399887085,grad_norm: 0.9999990210240092, iteration: 20635
loss: 1.0014183521270752,grad_norm: 0.8307973772137248, iteration: 20636
loss: 1.065802812576294,grad_norm: 0.9999991083911822, iteration: 20637
loss: 0.9742479920387268,grad_norm: 0.9999991452732547, iteration: 20638
loss: 0.9916355609893799,grad_norm: 0.9999991187953683, iteration: 20639
loss: 1.0044119358062744,grad_norm: 0.9999992858064933, iteration: 20640
loss: 0.9780855774879456,grad_norm: 0.8729843436943722, iteration: 20641
loss: 1.0227932929992676,grad_norm: 0.9999992666596712, iteration: 20642
loss: 1.05619478225708,grad_norm: 0.9999993986483279, iteration: 20643
loss: 1.0081630945205688,grad_norm: 0.9999991861240395, iteration: 20644
loss: 1.0303605794906616,grad_norm: 0.9999994807611196, iteration: 20645
loss: 1.0485278367996216,grad_norm: 0.9999996078530405, iteration: 20646
loss: 0.9905779361724854,grad_norm: 0.9999990652934008, iteration: 20647
loss: 1.069408893585205,grad_norm: 0.9999990519528424, iteration: 20648
loss: 1.0150139331817627,grad_norm: 0.9977665460195844, iteration: 20649
loss: 1.0059750080108643,grad_norm: 0.9999991085605577, iteration: 20650
loss: 1.006034255027771,grad_norm: 0.9999992088885128, iteration: 20651
loss: 0.9853094816207886,grad_norm: 0.9999992316112044, iteration: 20652
loss: 1.0288174152374268,grad_norm: 0.999999259046667, iteration: 20653
loss: 1.0287927389144897,grad_norm: 0.8741009858818068, iteration: 20654
loss: 1.0206702947616577,grad_norm: 0.9999990530244688, iteration: 20655
loss: 1.0147340297698975,grad_norm: 0.9999991904183827, iteration: 20656
loss: 1.0392532348632812,grad_norm: 0.897020372742184, iteration: 20657
loss: 1.041277527809143,grad_norm: 0.897115635715635, iteration: 20658
loss: 1.037588357925415,grad_norm: 0.9999990990170796, iteration: 20659
loss: 0.9773588180541992,grad_norm: 0.9999992399587712, iteration: 20660
loss: 0.9792801141738892,grad_norm: 0.9903530325153734, iteration: 20661
loss: 0.9889122843742371,grad_norm: 0.9322323156462567, iteration: 20662
loss: 0.9777256846427917,grad_norm: 0.9999991284577103, iteration: 20663
loss: 1.0165921449661255,grad_norm: 0.999999096566576, iteration: 20664
loss: 0.9658922553062439,grad_norm: 0.9999990897643786, iteration: 20665
loss: 1.0462403297424316,grad_norm: 0.9999996841839026, iteration: 20666
loss: 1.009811282157898,grad_norm: 0.9999994142571109, iteration: 20667
loss: 1.029848575592041,grad_norm: 0.9999994246694185, iteration: 20668
loss: 1.022656798362732,grad_norm: 0.9999991556302983, iteration: 20669
loss: 1.0646603107452393,grad_norm: 0.9999994229800433, iteration: 20670
loss: 1.0045222043991089,grad_norm: 0.9999991841146998, iteration: 20671
loss: 0.9968562126159668,grad_norm: 0.9979392569039698, iteration: 20672
loss: 0.9403043985366821,grad_norm: 0.9828908913352172, iteration: 20673
loss: 1.0068317651748657,grad_norm: 0.994439920032697, iteration: 20674
loss: 0.9828336834907532,grad_norm: 0.9999990326791507, iteration: 20675
loss: 1.042555332183838,grad_norm: 0.9999992605059429, iteration: 20676
loss: 1.0576869249343872,grad_norm: 0.9999994898069895, iteration: 20677
loss: 0.9954338669776917,grad_norm: 0.9639064659090515, iteration: 20678
loss: 0.9799039959907532,grad_norm: 0.9999990325071966, iteration: 20679
loss: 1.1106865406036377,grad_norm: 0.9999998882611708, iteration: 20680
loss: 1.0059468746185303,grad_norm: 0.999999183179849, iteration: 20681
loss: 0.984933614730835,grad_norm: 0.9854250448391055, iteration: 20682
loss: 1.0407021045684814,grad_norm: 0.9999991239446036, iteration: 20683
loss: 0.9810149669647217,grad_norm: 0.9999994142590626, iteration: 20684
loss: 0.9986329674720764,grad_norm: 0.9999990715425047, iteration: 20685
loss: 1.0357578992843628,grad_norm: 0.909899768802722, iteration: 20686
loss: 1.0804988145828247,grad_norm: 0.9999998398591536, iteration: 20687
loss: 1.0039092302322388,grad_norm: 0.9541502233036899, iteration: 20688
loss: 1.0350415706634521,grad_norm: 0.9999993194079162, iteration: 20689
loss: 1.096352458000183,grad_norm: 0.9999992892507988, iteration: 20690
loss: 1.0513203144073486,grad_norm: 0.9999995908622168, iteration: 20691
loss: 0.9705700278282166,grad_norm: 0.9680112739943241, iteration: 20692
loss: 1.0355533361434937,grad_norm: 0.9999993276477082, iteration: 20693
loss: 1.0361075401306152,grad_norm: 0.9999995313553663, iteration: 20694
loss: 1.0443261861801147,grad_norm: 0.9999995222364743, iteration: 20695
loss: 1.0518215894699097,grad_norm: 0.999999215243833, iteration: 20696
loss: 0.9729624390602112,grad_norm: 0.9855798367368775, iteration: 20697
loss: 1.0252981185913086,grad_norm: 0.9999992334337854, iteration: 20698
loss: 0.9909526109695435,grad_norm: 0.9999990927392354, iteration: 20699
loss: 0.9847937226295471,grad_norm: 0.9999992595872828, iteration: 20700
loss: 1.002684235572815,grad_norm: 0.9168779147738769, iteration: 20701
loss: 1.0104502439498901,grad_norm: 0.9999991898812617, iteration: 20702
loss: 0.997812807559967,grad_norm: 0.9999991211573386, iteration: 20703
loss: 1.0028260946273804,grad_norm: 0.9999992141756084, iteration: 20704
loss: 1.014043927192688,grad_norm: 0.8746744297406012, iteration: 20705
loss: 1.0266977548599243,grad_norm: 0.9549065789623311, iteration: 20706
loss: 0.9928403496742249,grad_norm: 0.9999992352002286, iteration: 20707
loss: 1.0204700231552124,grad_norm: 0.9999992732406638, iteration: 20708
loss: 0.9933823347091675,grad_norm: 0.999999182222005, iteration: 20709
loss: 1.0963706970214844,grad_norm: 0.9999995583466481, iteration: 20710
loss: 0.9542693495750427,grad_norm: 0.999999125955656, iteration: 20711
loss: 0.9873336553573608,grad_norm: 0.999999239861649, iteration: 20712
loss: 1.0421637296676636,grad_norm: 0.9999990467241464, iteration: 20713
loss: 1.0064396858215332,grad_norm: 0.8436982466881932, iteration: 20714
loss: 1.0728912353515625,grad_norm: 0.9999998274417834, iteration: 20715
loss: 1.0334073305130005,grad_norm: 0.9999989684721161, iteration: 20716
loss: 1.025793194770813,grad_norm: 0.9999992378188662, iteration: 20717
loss: 1.0551154613494873,grad_norm: 0.9999994445044996, iteration: 20718
loss: 1.0702639818191528,grad_norm: 0.9999991055450276, iteration: 20719
loss: 1.0369035005569458,grad_norm: 0.9999990830099083, iteration: 20720
loss: 1.0596927404403687,grad_norm: 0.9999992550048233, iteration: 20721
loss: 1.0230058431625366,grad_norm: 0.9999990413919818, iteration: 20722
loss: 0.9943509101867676,grad_norm: 0.9132910088842419, iteration: 20723
loss: 0.9858886003494263,grad_norm: 0.9999990409569105, iteration: 20724
loss: 0.9721841812133789,grad_norm: 0.927414597157751, iteration: 20725
loss: 1.0294679403305054,grad_norm: 0.9999992055655553, iteration: 20726
loss: 1.0175364017486572,grad_norm: 0.9984303400728559, iteration: 20727
loss: 1.0322120189666748,grad_norm: 0.9593664168040842, iteration: 20728
loss: 1.0181807279586792,grad_norm: 0.999999069842682, iteration: 20729
loss: 1.0121821165084839,grad_norm: 0.9999991561144582, iteration: 20730
loss: 1.0058248043060303,grad_norm: 0.9999992583331808, iteration: 20731
loss: 0.9973160028457642,grad_norm: 0.999999066740767, iteration: 20732
loss: 1.014701247215271,grad_norm: 0.9999989829689641, iteration: 20733
loss: 0.9725465774536133,grad_norm: 0.9999991511969852, iteration: 20734
loss: 1.0465444326400757,grad_norm: 0.9999997056678482, iteration: 20735
loss: 0.9858406782150269,grad_norm: 0.9350563926918242, iteration: 20736
loss: 1.0257134437561035,grad_norm: 0.9999991387872273, iteration: 20737
loss: 1.0210018157958984,grad_norm: 0.9295414801517072, iteration: 20738
loss: 1.0198432207107544,grad_norm: 0.9999992319506436, iteration: 20739
loss: 0.9707395434379578,grad_norm: 0.9999992897247079, iteration: 20740
loss: 1.0283315181732178,grad_norm: 0.9607609950078108, iteration: 20741
loss: 0.9918668866157532,grad_norm: 0.9999992013018275, iteration: 20742
loss: 1.0223873853683472,grad_norm: 0.999999042919878, iteration: 20743
loss: 1.0083616971969604,grad_norm: 0.9999991224665518, iteration: 20744
loss: 1.0192975997924805,grad_norm: 0.9810792450283717, iteration: 20745
loss: 1.0146095752716064,grad_norm: 0.9674029638457491, iteration: 20746
loss: 0.9939559102058411,grad_norm: 0.9999990997583038, iteration: 20747
loss: 1.0145477056503296,grad_norm: 0.9999991147437725, iteration: 20748
loss: 1.0049282312393188,grad_norm: 0.9711687136020165, iteration: 20749
loss: 1.0154824256896973,grad_norm: 0.9522178387464517, iteration: 20750
loss: 1.0330313444137573,grad_norm: 0.9999990392845698, iteration: 20751
loss: 0.9825872778892517,grad_norm: 0.9999991723729482, iteration: 20752
loss: 0.9923774600028992,grad_norm: 0.9999990486860865, iteration: 20753
loss: 1.007797122001648,grad_norm: 0.9469691780519796, iteration: 20754
loss: 1.0374358892440796,grad_norm: 0.9999993229977536, iteration: 20755
loss: 1.0383597612380981,grad_norm: 0.9920487175317151, iteration: 20756
loss: 0.9877484440803528,grad_norm: 0.9897165476971027, iteration: 20757
loss: 0.9953794479370117,grad_norm: 0.935531860610745, iteration: 20758
loss: 1.0176790952682495,grad_norm: 0.9999995374602136, iteration: 20759
loss: 0.9733976721763611,grad_norm: 0.9742972742372817, iteration: 20760
loss: 1.0083377361297607,grad_norm: 0.9999995152439409, iteration: 20761
loss: 1.0281957387924194,grad_norm: 0.9999995713541456, iteration: 20762
loss: 0.977927029132843,grad_norm: 0.9999990727727601, iteration: 20763
loss: 1.0184268951416016,grad_norm: 0.999998979193191, iteration: 20764
loss: 1.0013291835784912,grad_norm: 0.999850278770384, iteration: 20765
loss: 1.0461009740829468,grad_norm: 0.9999990768640021, iteration: 20766
loss: 1.0280239582061768,grad_norm: 0.9186962734444304, iteration: 20767
loss: 0.9908274412155151,grad_norm: 0.785863622382477, iteration: 20768
loss: 1.01978600025177,grad_norm: 0.9999997856546647, iteration: 20769
loss: 1.0191493034362793,grad_norm: 0.9999991870184497, iteration: 20770
loss: 1.0067265033721924,grad_norm: 0.9490547646493175, iteration: 20771
loss: 1.0349643230438232,grad_norm: 0.9999992371745366, iteration: 20772
loss: 0.9706370234489441,grad_norm: 0.8969877037970713, iteration: 20773
loss: 1.079641580581665,grad_norm: 0.9999992909849011, iteration: 20774
loss: 1.0459495782852173,grad_norm: 0.9999990506095353, iteration: 20775
loss: 1.0206549167633057,grad_norm: 0.9887747810757281, iteration: 20776
loss: 0.993880569934845,grad_norm: 0.9999990571274086, iteration: 20777
loss: 1.044493317604065,grad_norm: 0.9999993395749074, iteration: 20778
loss: 1.0221936702728271,grad_norm: 0.9999991191287204, iteration: 20779
loss: 1.044854760169983,grad_norm: 0.9999991432071856, iteration: 20780
loss: 1.0309683084487915,grad_norm: 0.9999993471732898, iteration: 20781
loss: 1.0193421840667725,grad_norm: 0.991661137468077, iteration: 20782
loss: 1.0316507816314697,grad_norm: 0.9316804472955462, iteration: 20783
loss: 1.0358110666275024,grad_norm: 0.9999993971240082, iteration: 20784
loss: 1.037073016166687,grad_norm: 0.9999992370872867, iteration: 20785
loss: 1.1264926195144653,grad_norm: 0.9999996203196914, iteration: 20786
loss: 1.0018972158432007,grad_norm: 0.9681961117568146, iteration: 20787
loss: 1.0274485349655151,grad_norm: 0.9999991934924698, iteration: 20788
loss: 0.9844343066215515,grad_norm: 0.9999991210294891, iteration: 20789
loss: 1.0336480140686035,grad_norm: 0.9999990338499478, iteration: 20790
loss: 1.0441230535507202,grad_norm: 0.9999992639943902, iteration: 20791
loss: 1.0314199924468994,grad_norm: 0.9999989819872532, iteration: 20792
loss: 1.0501118898391724,grad_norm: 0.9999989511201219, iteration: 20793
loss: 1.0203129053115845,grad_norm: 0.9999994875768153, iteration: 20794
loss: 1.0117840766906738,grad_norm: 0.8712313389968457, iteration: 20795
loss: 1.0630136728286743,grad_norm: 0.9528054637066603, iteration: 20796
loss: 1.0632741451263428,grad_norm: 0.9999993391174273, iteration: 20797
loss: 1.0155383348464966,grad_norm: 0.9999992508800712, iteration: 20798
loss: 1.0600792169570923,grad_norm: 0.9999999380577377, iteration: 20799
loss: 0.9879556894302368,grad_norm: 0.9999992702349172, iteration: 20800
loss: 0.9984025359153748,grad_norm: 0.9999992108860143, iteration: 20801
loss: 1.0388461351394653,grad_norm: 0.9999991642806396, iteration: 20802
loss: 0.9960383772850037,grad_norm: 0.9999991399832805, iteration: 20803
loss: 0.9960388541221619,grad_norm: 0.9999990769613395, iteration: 20804
loss: 1.025089144706726,grad_norm: 0.9999993002341433, iteration: 20805
loss: 0.9915151000022888,grad_norm: 0.8554099061141038, iteration: 20806
loss: 1.0145823955535889,grad_norm: 0.9999991465163101, iteration: 20807
loss: 1.017127513885498,grad_norm: 0.999999168373774, iteration: 20808
loss: 1.0360264778137207,grad_norm: 0.999998960257076, iteration: 20809
loss: 1.0258996486663818,grad_norm: 0.9999991177755128, iteration: 20810
loss: 1.0244396924972534,grad_norm: 0.9999994910300705, iteration: 20811
loss: 1.0536037683486938,grad_norm: 0.9999991839993542, iteration: 20812
loss: 1.0415942668914795,grad_norm: 0.9999995969566525, iteration: 20813
loss: 1.0131412744522095,grad_norm: 0.999999108884147, iteration: 20814
loss: 1.028818130493164,grad_norm: 0.9598055124921173, iteration: 20815
loss: 1.0169259309768677,grad_norm: 0.999999126174503, iteration: 20816
loss: 1.046440601348877,grad_norm: 0.9999992480991914, iteration: 20817
loss: 1.0100005865097046,grad_norm: 0.9999993240360445, iteration: 20818
loss: 1.0300930738449097,grad_norm: 0.9999990414707144, iteration: 20819
loss: 0.9432817697525024,grad_norm: 0.9999994223628793, iteration: 20820
loss: 1.0722272396087646,grad_norm: 0.9999999675573173, iteration: 20821
loss: 1.067662239074707,grad_norm: 0.9999994243963907, iteration: 20822
loss: 1.0327285528182983,grad_norm: 0.9999990702040904, iteration: 20823
loss: 1.0666918754577637,grad_norm: 0.9999994935502333, iteration: 20824
loss: 0.9961718320846558,grad_norm: 0.9803237058729509, iteration: 20825
loss: 1.077358365058899,grad_norm: 0.9999996671116032, iteration: 20826
loss: 1.0395673513412476,grad_norm: 0.9999991043364017, iteration: 20827
loss: 1.0219290256500244,grad_norm: 0.9562989635528027, iteration: 20828
loss: 0.9918396472930908,grad_norm: 0.9999990380547173, iteration: 20829
loss: 1.0066200494766235,grad_norm: 0.9999990684567651, iteration: 20830
loss: 0.9980370402336121,grad_norm: 0.9484100310930532, iteration: 20831
loss: 1.0247013568878174,grad_norm: 0.9999992491029681, iteration: 20832
loss: 0.9956187009811401,grad_norm: 0.9999990955185564, iteration: 20833
loss: 1.0061073303222656,grad_norm: 0.9999991999635652, iteration: 20834
loss: 1.0092206001281738,grad_norm: 0.9999991339243544, iteration: 20835
loss: 1.050847053527832,grad_norm: 0.9999991650383858, iteration: 20836
loss: 1.0230507850646973,grad_norm: 0.9999991462689903, iteration: 20837
loss: 1.0145421028137207,grad_norm: 0.9261486924956491, iteration: 20838
loss: 1.0054504871368408,grad_norm: 0.9936061686729121, iteration: 20839
loss: 1.05092191696167,grad_norm: 0.9999991305628042, iteration: 20840
loss: 1.0168989896774292,grad_norm: 0.9255292729745368, iteration: 20841
loss: 1.0003337860107422,grad_norm: 0.9999991236479007, iteration: 20842
loss: 1.0073988437652588,grad_norm: 0.9999991314537604, iteration: 20843
loss: 1.0196046829223633,grad_norm: 0.9928658149467899, iteration: 20844
loss: 0.9585277438163757,grad_norm: 0.9940476358425947, iteration: 20845
loss: 1.0275402069091797,grad_norm: 0.9999990919977563, iteration: 20846
loss: 1.0132198333740234,grad_norm: 0.9999991989632722, iteration: 20847
loss: 1.0140599012374878,grad_norm: 0.9024119317636812, iteration: 20848
loss: 0.977283239364624,grad_norm: 0.9982437240263078, iteration: 20849
loss: 1.0402165651321411,grad_norm: 0.9999991244204824, iteration: 20850
loss: 1.0264506340026855,grad_norm: 0.9999992265240891, iteration: 20851
loss: 1.0400816202163696,grad_norm: 0.9999996696097739, iteration: 20852
loss: 1.0362250804901123,grad_norm: 0.9999993583740169, iteration: 20853
loss: 1.046284794807434,grad_norm: 0.9999993345447057, iteration: 20854
loss: 0.9938890337944031,grad_norm: 0.9796759501675039, iteration: 20855
loss: 0.9802143573760986,grad_norm: 0.8920657362417292, iteration: 20856
loss: 1.0789802074432373,grad_norm: 0.9999991021265885, iteration: 20857
loss: 1.0351744890213013,grad_norm: 0.8101088005858513, iteration: 20858
loss: 1.0203251838684082,grad_norm: 0.9999990750867657, iteration: 20859
loss: 1.0201777219772339,grad_norm: 0.9999992119549634, iteration: 20860
loss: 1.0001555681228638,grad_norm: 0.9999990938948286, iteration: 20861
loss: 1.0311890840530396,grad_norm: 0.999999227533777, iteration: 20862
loss: 1.0121275186538696,grad_norm: 0.9999991190261874, iteration: 20863
loss: 0.9744318723678589,grad_norm: 0.9999991645018578, iteration: 20864
loss: 0.990572988986969,grad_norm: 0.9999992874457309, iteration: 20865
loss: 0.9947752356529236,grad_norm: 0.9917877278186807, iteration: 20866
loss: 1.0198407173156738,grad_norm: 0.9346979769585055, iteration: 20867
loss: 1.084141492843628,grad_norm: 0.9999996773850126, iteration: 20868
loss: 1.029811143875122,grad_norm: 0.9999991847487254, iteration: 20869
loss: 1.0300370454788208,grad_norm: 0.9999991367926347, iteration: 20870
loss: 1.0028414726257324,grad_norm: 0.9999991144416944, iteration: 20871
loss: 1.0421710014343262,grad_norm: 0.9999996444095974, iteration: 20872
loss: 0.9861606955528259,grad_norm: 0.9999991689474722, iteration: 20873
loss: 0.9827789068222046,grad_norm: 0.9999991556571792, iteration: 20874
loss: 1.0083755254745483,grad_norm: 0.9999992612966899, iteration: 20875
loss: 1.0382168292999268,grad_norm: 0.9999991088338828, iteration: 20876
loss: 1.0655931234359741,grad_norm: 0.967033926442058, iteration: 20877
loss: 1.02626371383667,grad_norm: 0.9999995575139445, iteration: 20878
loss: 1.0196971893310547,grad_norm: 0.9218234857938326, iteration: 20879
loss: 0.9919446706771851,grad_norm: 0.9999991164498971, iteration: 20880
loss: 1.102957010269165,grad_norm: 0.9999998738411935, iteration: 20881
loss: 1.0200457572937012,grad_norm: 0.9999991219011053, iteration: 20882
loss: 1.0317198038101196,grad_norm: 0.9999991106749774, iteration: 20883
loss: 1.0639514923095703,grad_norm: 0.999999321638133, iteration: 20884
loss: 1.047042965888977,grad_norm: 0.9999994857047616, iteration: 20885
loss: 0.998104989528656,grad_norm: 0.9636090562942328, iteration: 20886
loss: 1.0108771324157715,grad_norm: 0.999999154674466, iteration: 20887
loss: 0.9658673405647278,grad_norm: 0.9999993006057581, iteration: 20888
loss: 0.9987826943397522,grad_norm: 0.9999991147531719, iteration: 20889
loss: 1.0117017030715942,grad_norm: 0.9999990543941198, iteration: 20890
loss: 1.0552493333816528,grad_norm: 0.9254539861207894, iteration: 20891
loss: 0.9958868622779846,grad_norm: 0.9999991748223801, iteration: 20892
loss: 0.980975329875946,grad_norm: 0.9999992265410379, iteration: 20893
loss: 1.041002869606018,grad_norm: 0.9497585601115057, iteration: 20894
loss: 1.0432170629501343,grad_norm: 0.9999991172243702, iteration: 20895
loss: 0.9817859530448914,grad_norm: 0.9999990540097755, iteration: 20896
loss: 1.040764570236206,grad_norm: 0.9999993707628316, iteration: 20897
loss: 1.022308588027954,grad_norm: 0.9999991782141168, iteration: 20898
loss: 1.0567824840545654,grad_norm: 0.9999993008441674, iteration: 20899
loss: 0.9697981476783752,grad_norm: 0.9458480293362912, iteration: 20900
loss: 1.003955364227295,grad_norm: 0.970215396842097, iteration: 20901
loss: 1.003118872642517,grad_norm: 0.9739077085103904, iteration: 20902
loss: 1.0341999530792236,grad_norm: 0.8668954000912944, iteration: 20903
loss: 1.0400630235671997,grad_norm: 0.9999991833060976, iteration: 20904
loss: 1.0271815061569214,grad_norm: 0.9153437190270367, iteration: 20905
loss: 1.0396127700805664,grad_norm: 0.9999996085877173, iteration: 20906
loss: 1.0938160419464111,grad_norm: 0.9999997903081411, iteration: 20907
loss: 1.0321837663650513,grad_norm: 0.9999991441391443, iteration: 20908
loss: 1.004355788230896,grad_norm: 0.8908393893354487, iteration: 20909
loss: 1.0154954195022583,grad_norm: 0.9999991464339287, iteration: 20910
loss: 1.0223630666732788,grad_norm: 0.9211487791023714, iteration: 20911
loss: 0.9918025732040405,grad_norm: 0.8806581740199741, iteration: 20912
loss: 1.003378987312317,grad_norm: 0.9999990473725516, iteration: 20913
loss: 1.0315433740615845,grad_norm: 0.9999990876584662, iteration: 20914
loss: 1.0278940200805664,grad_norm: 0.8610335227080742, iteration: 20915
loss: 1.0005794763565063,grad_norm: 0.9999989935608262, iteration: 20916
loss: 1.0116724967956543,grad_norm: 0.9999991837681104, iteration: 20917
loss: 1.0393850803375244,grad_norm: 0.9077741125066469, iteration: 20918
loss: 1.0152709484100342,grad_norm: 0.970974342108827, iteration: 20919
loss: 1.0585886240005493,grad_norm: 0.9922301165496633, iteration: 20920
loss: 0.9710484147071838,grad_norm: 0.9999990839902829, iteration: 20921
loss: 1.0533602237701416,grad_norm: 0.999999087945644, iteration: 20922
loss: 0.9887077808380127,grad_norm: 0.9999991746972398, iteration: 20923
loss: 1.0089002847671509,grad_norm: 0.9999989877666895, iteration: 20924
loss: 1.02186119556427,grad_norm: 0.9999998347672324, iteration: 20925
loss: 1.0897995233535767,grad_norm: 0.9999993488559198, iteration: 20926
loss: 1.0246089696884155,grad_norm: 0.853790728483839, iteration: 20927
loss: 1.022436261177063,grad_norm: 0.9324841814975542, iteration: 20928
loss: 1.017128586769104,grad_norm: 0.9674723350232504, iteration: 20929
loss: 1.0968977212905884,grad_norm: 0.9999996286134626, iteration: 20930
loss: 1.0175042152404785,grad_norm: 0.9999990457564296, iteration: 20931
loss: 1.0104390382766724,grad_norm: 0.9999991755204083, iteration: 20932
loss: 1.0314216613769531,grad_norm: 0.9999990554469373, iteration: 20933
loss: 0.9837006330490112,grad_norm: 0.8679964341557582, iteration: 20934
loss: 0.9807248115539551,grad_norm: 0.9999994894071709, iteration: 20935
loss: 1.0182868242263794,grad_norm: 0.999999110411309, iteration: 20936
loss: 1.0311055183410645,grad_norm: 0.9081910966330964, iteration: 20937
loss: 1.019902229309082,grad_norm: 0.9999992430311508, iteration: 20938
loss: 1.0412952899932861,grad_norm: 0.9999991706691689, iteration: 20939
loss: 1.0434702634811401,grad_norm: 0.9999994123375321, iteration: 20940
loss: 0.9899706244468689,grad_norm: 0.9244509438817916, iteration: 20941
loss: 1.034284234046936,grad_norm: 0.9999995867633258, iteration: 20942
loss: 0.949665904045105,grad_norm: 0.9999991840057934, iteration: 20943
loss: 1.0275684595108032,grad_norm: 0.9999990616423597, iteration: 20944
loss: 1.0120515823364258,grad_norm: 0.9999994535737452, iteration: 20945
loss: 0.9911558628082275,grad_norm: 0.9999991872282119, iteration: 20946
loss: 0.9869437217712402,grad_norm: 0.9550129868737148, iteration: 20947
loss: 1.0352675914764404,grad_norm: 0.9999994768236498, iteration: 20948
loss: 1.0339463949203491,grad_norm: 0.9999996546797959, iteration: 20949
loss: 1.030941128730774,grad_norm: 0.9881297465946798, iteration: 20950
loss: 1.007569432258606,grad_norm: 0.999999011898254, iteration: 20951
loss: 1.0111290216445923,grad_norm: 0.923807972795348, iteration: 20952
loss: 1.0561449527740479,grad_norm: 0.9999991380569817, iteration: 20953
loss: 1.0133659839630127,grad_norm: 0.9999991766771564, iteration: 20954
loss: 1.0428454875946045,grad_norm: 0.9999994569866724, iteration: 20955
loss: 0.9918044805526733,grad_norm: 0.9953744983454497, iteration: 20956
loss: 0.9768759608268738,grad_norm: 0.999999071763387, iteration: 20957
loss: 1.038259506225586,grad_norm: 0.9999994697959182, iteration: 20958
loss: 1.009109377861023,grad_norm: 0.9999992151678898, iteration: 20959
loss: 1.0338183641433716,grad_norm: 0.9999990385621166, iteration: 20960
loss: 1.044765830039978,grad_norm: 0.9999990872484344, iteration: 20961
loss: 1.0772792100906372,grad_norm: 0.9999992682807355, iteration: 20962
loss: 1.0071933269500732,grad_norm: 0.9999990810808234, iteration: 20963
loss: 1.0422037839889526,grad_norm: 0.9999997336995917, iteration: 20964
loss: 1.0327928066253662,grad_norm: 0.9999996143529335, iteration: 20965
loss: 0.9855598211288452,grad_norm: 0.9999990997114988, iteration: 20966
loss: 1.0264291763305664,grad_norm: 0.9999991818136368, iteration: 20967
loss: 1.0116569995880127,grad_norm: 0.9999991481320804, iteration: 20968
loss: 1.0463738441467285,grad_norm: 0.9999994720082329, iteration: 20969
loss: 0.9839504957199097,grad_norm: 0.9999992099289617, iteration: 20970
loss: 1.0239046812057495,grad_norm: 0.9999992721985468, iteration: 20971
loss: 0.9973115921020508,grad_norm: 0.9999997169415703, iteration: 20972
loss: 1.0525156259536743,grad_norm: 0.9999993289725146, iteration: 20973
loss: 1.006623387336731,grad_norm: 0.952935793917313, iteration: 20974
loss: 0.9858764410018921,grad_norm: 0.9999993950163552, iteration: 20975
loss: 0.9867106080055237,grad_norm: 0.9999991879405615, iteration: 20976
loss: 1.0130746364593506,grad_norm: 0.9999990785024779, iteration: 20977
loss: 1.0598976612091064,grad_norm: 0.9999994613376268, iteration: 20978
loss: 1.0059903860092163,grad_norm: 0.9999992016997787, iteration: 20979
loss: 1.0562447309494019,grad_norm: 0.9999990644406188, iteration: 20980
loss: 1.0051170587539673,grad_norm: 0.9817735826276899, iteration: 20981
loss: 1.002704381942749,grad_norm: 0.9999992910807177, iteration: 20982
loss: 1.013047456741333,grad_norm: 0.9999990557367354, iteration: 20983
loss: 0.9942362904548645,grad_norm: 0.9999991486201351, iteration: 20984
loss: 1.069865107536316,grad_norm: 0.9932856757848355, iteration: 20985
loss: 1.005737066268921,grad_norm: 0.9999990661545367, iteration: 20986
loss: 1.023223638534546,grad_norm: 0.9999995139291695, iteration: 20987
loss: 0.9752519726753235,grad_norm: 0.9906889360004928, iteration: 20988
loss: 1.0287140607833862,grad_norm: 0.9999994591093875, iteration: 20989
loss: 1.1327773332595825,grad_norm: 0.9999991949850533, iteration: 20990
loss: 1.0171949863433838,grad_norm: 0.999999256153964, iteration: 20991
loss: 1.0132970809936523,grad_norm: 0.9999990168880135, iteration: 20992
loss: 1.0111865997314453,grad_norm: 0.9999991581405876, iteration: 20993
loss: 1.0211198329925537,grad_norm: 0.9999992709137958, iteration: 20994
loss: 1.0851762294769287,grad_norm: 0.9999999686989366, iteration: 20995
loss: 1.041471242904663,grad_norm: 0.9999995462900905, iteration: 20996
loss: 1.0467278957366943,grad_norm: 0.9999990781319245, iteration: 20997
loss: 1.044621229171753,grad_norm: 0.9955825335043689, iteration: 20998
loss: 0.9964445233345032,grad_norm: 0.9999992242137571, iteration: 20999
loss: 1.0751882791519165,grad_norm: 0.9999995017852142, iteration: 21000
loss: 0.9763720631599426,grad_norm: 0.99999921904003, iteration: 21001
loss: 0.9894801378250122,grad_norm: 0.9999992453675733, iteration: 21002
loss: 0.9886837005615234,grad_norm: 0.9999992453980617, iteration: 21003
loss: 1.0480101108551025,grad_norm: 0.9999992183246398, iteration: 21004
loss: 1.0360665321350098,grad_norm: 0.9999993976805036, iteration: 21005
loss: 1.1530468463897705,grad_norm: 0.9999996952797391, iteration: 21006
loss: 1.0740269422531128,grad_norm: 0.9999992417740665, iteration: 21007
loss: 1.0070669651031494,grad_norm: 0.9190811643755137, iteration: 21008
loss: 1.0503326654434204,grad_norm: 0.9999990171672003, iteration: 21009
loss: 1.0418024063110352,grad_norm: 0.9999992138259202, iteration: 21010
loss: 1.0340911149978638,grad_norm: 0.999999148523104, iteration: 21011
loss: 0.9887701272964478,grad_norm: 0.9999991611206166, iteration: 21012
loss: 1.01156485080719,grad_norm: 0.9999992149529127, iteration: 21013
loss: 0.9708111882209778,grad_norm: 0.9123387948082006, iteration: 21014
loss: 1.143191933631897,grad_norm: 0.9999993209619047, iteration: 21015
loss: 1.0252562761306763,grad_norm: 0.9999996485241935, iteration: 21016
loss: 0.9861916303634644,grad_norm: 0.9999992460230283, iteration: 21017
loss: 1.0355833768844604,grad_norm: 0.9999995431983596, iteration: 21018
loss: 1.0085341930389404,grad_norm: 0.9999991060966048, iteration: 21019
loss: 1.0346505641937256,grad_norm: 0.999999242086924, iteration: 21020
loss: 0.9956398606300354,grad_norm: 0.9999992077073074, iteration: 21021
loss: 1.0371614694595337,grad_norm: 0.9999994980151715, iteration: 21022
loss: 0.9807095527648926,grad_norm: 0.9999992646117226, iteration: 21023
loss: 1.0306366682052612,grad_norm: 0.9882626713263761, iteration: 21024
loss: 1.0117770433425903,grad_norm: 0.9999991497716768, iteration: 21025
loss: 1.028084635734558,grad_norm: 0.9999995752664269, iteration: 21026
loss: 1.0485804080963135,grad_norm: 0.9999991023548837, iteration: 21027
loss: 1.0258433818817139,grad_norm: 0.9430151908515819, iteration: 21028
loss: 1.0995547771453857,grad_norm: 0.9999996042486308, iteration: 21029
loss: 1.0157197713851929,grad_norm: 0.9999991122994631, iteration: 21030
loss: 1.038591742515564,grad_norm: 0.9270685635369768, iteration: 21031
loss: 1.0588161945343018,grad_norm: 0.9999991236336744, iteration: 21032
loss: 1.017910122871399,grad_norm: 0.9999991206057126, iteration: 21033
loss: 1.0513756275177002,grad_norm: 0.9999993190210827, iteration: 21034
loss: 1.019364356994629,grad_norm: 0.999999166058351, iteration: 21035
loss: 1.0139410495758057,grad_norm: 0.999999186156893, iteration: 21036
loss: 1.0473424196243286,grad_norm: 0.9999998118072521, iteration: 21037
loss: 1.025132656097412,grad_norm: 0.9999996068480848, iteration: 21038
loss: 1.0083249807357788,grad_norm: 0.8936629080282051, iteration: 21039
loss: 0.9985814690589905,grad_norm: 0.9423210303741313, iteration: 21040
loss: 1.08482825756073,grad_norm: 0.9999993236605645, iteration: 21041
loss: 0.9729101657867432,grad_norm: 0.9999996026134739, iteration: 21042
loss: 0.9877451658248901,grad_norm: 0.9999991243160294, iteration: 21043
loss: 1.0091029405593872,grad_norm: 0.8805160391663293, iteration: 21044
loss: 1.0059806108474731,grad_norm: 0.9999992572539337, iteration: 21045
loss: 0.9677032828330994,grad_norm: 0.9999991711079025, iteration: 21046
loss: 1.016154170036316,grad_norm: 0.9999992757744333, iteration: 21047
loss: 1.022491693496704,grad_norm: 0.9999991676790113, iteration: 21048
loss: 1.0609408617019653,grad_norm: 0.9999991023001285, iteration: 21049
loss: 1.0012891292572021,grad_norm: 0.9890416353855037, iteration: 21050
loss: 1.0073381662368774,grad_norm: 0.9999990960524204, iteration: 21051
loss: 0.9889901280403137,grad_norm: 0.9756969306812083, iteration: 21052
loss: 1.0542007684707642,grad_norm: 0.999999385264291, iteration: 21053
loss: 1.0295504331588745,grad_norm: 0.9999990981255598, iteration: 21054
loss: 1.0585817098617554,grad_norm: 0.9999992771258703, iteration: 21055
loss: 0.9879380464553833,grad_norm: 0.9999989792192243, iteration: 21056
loss: 0.9941807389259338,grad_norm: 0.9393649739379213, iteration: 21057
loss: 0.9920440316200256,grad_norm: 0.999999242904824, iteration: 21058
loss: 1.0191644430160522,grad_norm: 0.9999992230971424, iteration: 21059
loss: 1.006190299987793,grad_norm: 0.9675611139689111, iteration: 21060
loss: 1.0166431665420532,grad_norm: 0.999999245242444, iteration: 21061
loss: 0.9861624240875244,grad_norm: 0.9523960458425371, iteration: 21062
loss: 1.0672887563705444,grad_norm: 0.9999991627741656, iteration: 21063
loss: 1.0464963912963867,grad_norm: 0.999999051742392, iteration: 21064
loss: 1.0371649265289307,grad_norm: 0.9999989629252191, iteration: 21065
loss: 1.032584309577942,grad_norm: 0.9999992387043262, iteration: 21066
loss: 1.0125195980072021,grad_norm: 0.9999992146363398, iteration: 21067
loss: 1.0113471746444702,grad_norm: 0.9999994487026751, iteration: 21068
loss: 1.0564956665039062,grad_norm: 0.9999992031031252, iteration: 21069
loss: 0.9981619119644165,grad_norm: 0.9961279461329422, iteration: 21070
loss: 1.0592725276947021,grad_norm: 0.9999991745108824, iteration: 21071
loss: 1.0264701843261719,grad_norm: 0.9999992336082609, iteration: 21072
loss: 1.0383100509643555,grad_norm: 0.9999997040452735, iteration: 21073
loss: 1.006908893585205,grad_norm: 0.9999991021535153, iteration: 21074
loss: 1.0224729776382446,grad_norm: 0.999999424185157, iteration: 21075
loss: 1.030056357383728,grad_norm: 0.9999990763900534, iteration: 21076
loss: 1.0310245752334595,grad_norm: 0.9999991111457742, iteration: 21077
loss: 1.016493558883667,grad_norm: 0.999999161592621, iteration: 21078
loss: 0.9906896352767944,grad_norm: 0.964571948258184, iteration: 21079
loss: 1.0614224672317505,grad_norm: 0.9999995081392721, iteration: 21080
loss: 1.0257444381713867,grad_norm: 0.9999997050718785, iteration: 21081
loss: 1.072986364364624,grad_norm: 0.9999994437094765, iteration: 21082
loss: 1.0391910076141357,grad_norm: 0.9999997681067977, iteration: 21083
loss: 1.016647219657898,grad_norm: 0.9999992784994947, iteration: 21084
loss: 0.9845055341720581,grad_norm: 0.9999992380612992, iteration: 21085
loss: 0.9831370711326599,grad_norm: 0.8614069734042555, iteration: 21086
loss: 1.0054452419281006,grad_norm: 0.992314613643571, iteration: 21087
loss: 1.0233452320098877,grad_norm: 0.9999995431977845, iteration: 21088
loss: 1.031101942062378,grad_norm: 0.999999175433357, iteration: 21089
loss: 1.018020749092102,grad_norm: 0.8302076217580797, iteration: 21090
loss: 1.0323903560638428,grad_norm: 0.9633144200314077, iteration: 21091
loss: 1.048729419708252,grad_norm: 0.9999994921055171, iteration: 21092
loss: 1.0125081539154053,grad_norm: 0.9938278839905359, iteration: 21093
loss: 1.0022739171981812,grad_norm: 0.9928041583524218, iteration: 21094
loss: 1.023178219795227,grad_norm: 0.9999992085298085, iteration: 21095
loss: 1.0352195501327515,grad_norm: 0.9999992031857821, iteration: 21096
loss: 1.0015299320220947,grad_norm: 0.9999993802034037, iteration: 21097
loss: 1.0374559164047241,grad_norm: 0.857191498899483, iteration: 21098
loss: 1.0298538208007812,grad_norm: 0.9999989885466584, iteration: 21099
loss: 0.9742083549499512,grad_norm: 0.9999990232028255, iteration: 21100
loss: 1.0393238067626953,grad_norm: 0.9999999697188809, iteration: 21101
loss: 1.0113377571105957,grad_norm: 0.9999991764769834, iteration: 21102
loss: 0.9964560270309448,grad_norm: 0.9999990380877108, iteration: 21103
loss: 1.0497628450393677,grad_norm: 0.9999993154148098, iteration: 21104
loss: 1.0077800750732422,grad_norm: 0.9999990669934623, iteration: 21105
loss: 0.9979113936424255,grad_norm: 0.9999991722604463, iteration: 21106
loss: 1.008333683013916,grad_norm: 0.999999044755393, iteration: 21107
loss: 0.9938859343528748,grad_norm: 0.9999990512268849, iteration: 21108
loss: 0.9644593000411987,grad_norm: 0.9999992870983795, iteration: 21109
loss: 0.9780170917510986,grad_norm: 0.8415635489049408, iteration: 21110
loss: 1.0297781229019165,grad_norm: 0.9999989995164286, iteration: 21111
loss: 1.0697320699691772,grad_norm: 0.9999992725780843, iteration: 21112
loss: 1.0125856399536133,grad_norm: 0.9477778327713288, iteration: 21113
loss: 1.0982767343521118,grad_norm: 0.999999380304496, iteration: 21114
loss: 1.0255701541900635,grad_norm: 0.9999992324185569, iteration: 21115
loss: 1.0225799083709717,grad_norm: 0.9999995149033183, iteration: 21116
loss: 1.004412055015564,grad_norm: 0.9999991844317853, iteration: 21117
loss: 1.0588510036468506,grad_norm: 0.9999993731629226, iteration: 21118
loss: 1.0120971202850342,grad_norm: 0.8238314380142978, iteration: 21119
loss: 1.0457398891448975,grad_norm: 0.9999991910613607, iteration: 21120
loss: 1.027979850769043,grad_norm: 0.9999989778302086, iteration: 21121
loss: 1.0025585889816284,grad_norm: 0.9999991330730638, iteration: 21122
loss: 1.0271483659744263,grad_norm: 0.999999542233991, iteration: 21123
loss: 1.044724464416504,grad_norm: 0.9999995999737026, iteration: 21124
loss: 1.0037027597427368,grad_norm: 0.9598301190332621, iteration: 21125
loss: 1.0077437162399292,grad_norm: 0.9411954013854709, iteration: 21126
loss: 1.1120718717575073,grad_norm: 0.9999997220722314, iteration: 21127
loss: 1.0244520902633667,grad_norm: 0.999999390786107, iteration: 21128
loss: 0.971808671951294,grad_norm: 0.9999990565575037, iteration: 21129
loss: 1.028542160987854,grad_norm: 0.9999992812047688, iteration: 21130
loss: 1.0170320272445679,grad_norm: 0.9999991301521673, iteration: 21131
loss: 0.9992154836654663,grad_norm: 0.9999996380344893, iteration: 21132
loss: 1.0453009605407715,grad_norm: 0.9999992643228179, iteration: 21133
loss: 0.9582685232162476,grad_norm: 0.999999182849466, iteration: 21134
loss: 1.0497446060180664,grad_norm: 0.9999992329166313, iteration: 21135
loss: 1.0435514450073242,grad_norm: 0.999999401306725, iteration: 21136
loss: 1.0343486070632935,grad_norm: 0.9271003828704477, iteration: 21137
loss: 0.9873130917549133,grad_norm: 0.9999990218571629, iteration: 21138
loss: 1.0246745347976685,grad_norm: 0.9999991552061643, iteration: 21139
loss: 1.0066207647323608,grad_norm: 0.982872002961308, iteration: 21140
loss: 1.0256088972091675,grad_norm: 0.9999992963011279, iteration: 21141
loss: 1.0212222337722778,grad_norm: 0.9895111677466062, iteration: 21142
loss: 0.9822141528129578,grad_norm: 0.9999990720301689, iteration: 21143
loss: 1.0152186155319214,grad_norm: 0.9999992137783725, iteration: 21144
loss: 1.0366175174713135,grad_norm: 0.9999992568272822, iteration: 21145
loss: 1.05250084400177,grad_norm: 0.9999991901186882, iteration: 21146
loss: 1.009323000907898,grad_norm: 0.9999995653550751, iteration: 21147
loss: 0.9872585535049438,grad_norm: 0.8775041891099142, iteration: 21148
loss: 1.0062477588653564,grad_norm: 0.9999992307917059, iteration: 21149
loss: 1.0487017631530762,grad_norm: 0.9111146971929855, iteration: 21150
loss: 1.0103975534439087,grad_norm: 0.9999990568204048, iteration: 21151
loss: 1.0310239791870117,grad_norm: 0.9109951915306814, iteration: 21152
loss: 1.0455094575881958,grad_norm: 0.9999990685468648, iteration: 21153
loss: 1.0236034393310547,grad_norm: 0.9999990560225044, iteration: 21154
loss: 1.0445908308029175,grad_norm: 0.9999997221444096, iteration: 21155
loss: 1.0402929782867432,grad_norm: 0.999999365560795, iteration: 21156
loss: 1.0094765424728394,grad_norm: 0.9842152914737922, iteration: 21157
loss: 1.0006428956985474,grad_norm: 0.9999992799246263, iteration: 21158
loss: 0.9852691888809204,grad_norm: 0.9079154692988899, iteration: 21159
loss: 1.017781376838684,grad_norm: 0.9999993820071986, iteration: 21160
loss: 1.004221796989441,grad_norm: 0.9999991535448673, iteration: 21161
loss: 1.0144283771514893,grad_norm: 0.9999991277963288, iteration: 21162
loss: 1.0171113014221191,grad_norm: 0.999998994734431, iteration: 21163
loss: 0.9965633749961853,grad_norm: 0.9999991535549428, iteration: 21164
loss: 1.0898395776748657,grad_norm: 0.9999994165308841, iteration: 21165
loss: 1.00257408618927,grad_norm: 0.9999994924621315, iteration: 21166
loss: 1.0307673215866089,grad_norm: 0.9999991800136142, iteration: 21167
loss: 1.0607868432998657,grad_norm: 0.9999997057865566, iteration: 21168
loss: 1.02034330368042,grad_norm: 0.999999210589359, iteration: 21169
loss: 1.0114904642105103,grad_norm: 0.9999993179664607, iteration: 21170
loss: 1.0595983266830444,grad_norm: 0.9999995516450794, iteration: 21171
loss: 1.0466324090957642,grad_norm: 0.9999992906346572, iteration: 21172
loss: 0.9831224679946899,grad_norm: 0.9999989779972516, iteration: 21173
loss: 1.0065028667449951,grad_norm: 0.9999990971319338, iteration: 21174
loss: 0.9680044054985046,grad_norm: 0.9737755940652909, iteration: 21175
loss: 0.9971591830253601,grad_norm: 0.9111847952242735, iteration: 21176
loss: 1.0117368698120117,grad_norm: 0.9875645686249125, iteration: 21177
loss: 1.0141273736953735,grad_norm: 0.9999996916270121, iteration: 21178
loss: 1.0752019882202148,grad_norm: 0.9999997933504094, iteration: 21179
loss: 1.012116551399231,grad_norm: 0.9999989705296145, iteration: 21180
loss: 1.0225759744644165,grad_norm: 0.9999991237165275, iteration: 21181
loss: 1.0136793851852417,grad_norm: 0.9999991285871396, iteration: 21182
loss: 1.0251636505126953,grad_norm: 0.9999990644345212, iteration: 21183
loss: 0.9726778268814087,grad_norm: 0.9109159847906826, iteration: 21184
loss: 1.0466804504394531,grad_norm: 0.9999993790679138, iteration: 21185
loss: 0.9986889362335205,grad_norm: 0.9999991966726232, iteration: 21186
loss: 1.0077972412109375,grad_norm: 0.9931625738399862, iteration: 21187
loss: 1.0167464017868042,grad_norm: 0.9999990479642262, iteration: 21188
loss: 0.9791698455810547,grad_norm: 0.999999109623035, iteration: 21189
loss: 1.0150096416473389,grad_norm: 0.999999133350586, iteration: 21190
loss: 1.0394504070281982,grad_norm: 0.9999990506814255, iteration: 21191
loss: 0.9853516221046448,grad_norm: 0.9999995229392497, iteration: 21192
loss: 1.139894723892212,grad_norm: 0.9999997671439806, iteration: 21193
loss: 1.049268126487732,grad_norm: 0.9999991923861099, iteration: 21194
loss: 1.0799649953842163,grad_norm: 0.9999993232144316, iteration: 21195
loss: 1.0692614316940308,grad_norm: 0.9999991375241207, iteration: 21196
loss: 1.0130107402801514,grad_norm: 0.9999992546310712, iteration: 21197
loss: 1.0523324012756348,grad_norm: 0.9999996281239855, iteration: 21198
loss: 1.0585087537765503,grad_norm: 0.9999991226971312, iteration: 21199
loss: 0.9791611433029175,grad_norm: 0.9999999004303362, iteration: 21200
loss: 0.9368071556091309,grad_norm: 0.999999153108441, iteration: 21201
loss: 1.0226467847824097,grad_norm: 0.9999991425076177, iteration: 21202
loss: 1.015058994293213,grad_norm: 0.999999600350786, iteration: 21203
loss: 1.0140585899353027,grad_norm: 0.9999995821881325, iteration: 21204
loss: 1.0219321250915527,grad_norm: 0.9999991593732743, iteration: 21205
loss: 1.0274111032485962,grad_norm: 0.8317805819600166, iteration: 21206
loss: 1.0305123329162598,grad_norm: 0.999999121407449, iteration: 21207
loss: 1.0290226936340332,grad_norm: 0.8617417455279268, iteration: 21208
loss: 1.0011112689971924,grad_norm: 0.9999991328026038, iteration: 21209
loss: 1.0383611917495728,grad_norm: 0.9953240402657901, iteration: 21210
loss: 0.9697187542915344,grad_norm: 0.9999994275190561, iteration: 21211
loss: 1.0349245071411133,grad_norm: 0.9999997405971831, iteration: 21212
loss: 1.0306544303894043,grad_norm: 0.999999041779491, iteration: 21213
loss: 0.9991573095321655,grad_norm: 0.9999992033170371, iteration: 21214
loss: 1.042879581451416,grad_norm: 0.9999991700510017, iteration: 21215
loss: 0.9872204661369324,grad_norm: 0.7955753791087347, iteration: 21216
loss: 0.9655025601387024,grad_norm: 0.9999991949257513, iteration: 21217
loss: 1.0209431648254395,grad_norm: 0.999999058076593, iteration: 21218
loss: 1.0796328783035278,grad_norm: 0.9999996027013206, iteration: 21219
loss: 1.0424230098724365,grad_norm: 0.9432987144339726, iteration: 21220
loss: 1.023998498916626,grad_norm: 0.9999993945121742, iteration: 21221
loss: 1.0086321830749512,grad_norm: 0.8791869483504401, iteration: 21222
loss: 1.0210894346237183,grad_norm: 0.9999992117392633, iteration: 21223
loss: 0.9932031035423279,grad_norm: 0.9511454068655741, iteration: 21224
loss: 1.0002909898757935,grad_norm: 0.9999989965270925, iteration: 21225
loss: 1.0438493490219116,grad_norm: 0.9999993503974753, iteration: 21226
loss: 0.9745497107505798,grad_norm: 0.9999991296752297, iteration: 21227
loss: 1.0382200479507446,grad_norm: 0.9999992654814265, iteration: 21228
loss: 1.0243741273880005,grad_norm: 0.9999990690736368, iteration: 21229
loss: 1.031495451927185,grad_norm: 0.8322769538271999, iteration: 21230
loss: 1.008705496788025,grad_norm: 0.9999992820049082, iteration: 21231
loss: 0.9897975921630859,grad_norm: 0.9999991147349094, iteration: 21232
loss: 0.9971792101860046,grad_norm: 0.9999991925404041, iteration: 21233
loss: 1.0054925680160522,grad_norm: 0.99999904118852, iteration: 21234
loss: 0.9916253089904785,grad_norm: 0.9999991650313206, iteration: 21235
loss: 0.9942035675048828,grad_norm: 0.9999990429803999, iteration: 21236
loss: 1.0105942487716675,grad_norm: 0.9999991051752914, iteration: 21237
loss: 1.0174556970596313,grad_norm: 0.9999991353154534, iteration: 21238
loss: 0.9949418902397156,grad_norm: 0.9999995570811513, iteration: 21239
loss: 1.0274665355682373,grad_norm: 0.9999991122614111, iteration: 21240
loss: 0.9601443409919739,grad_norm: 0.9999990562965101, iteration: 21241
loss: 1.0250718593597412,grad_norm: 0.9999991290368808, iteration: 21242
loss: 0.9886066317558289,grad_norm: 0.9999990201405934, iteration: 21243
loss: 1.0295230150222778,grad_norm: 0.9999991862553719, iteration: 21244
loss: 1.0046159029006958,grad_norm: 0.9999992754491296, iteration: 21245
loss: 1.0213831663131714,grad_norm: 0.9999992257451958, iteration: 21246
loss: 1.0468289852142334,grad_norm: 0.9999997230377463, iteration: 21247
loss: 0.9966326355934143,grad_norm: 0.9999993391565797, iteration: 21248
loss: 1.033311128616333,grad_norm: 0.9999990447338357, iteration: 21249
loss: 1.0120283365249634,grad_norm: 0.9936206769934669, iteration: 21250
loss: 1.0110317468643188,grad_norm: 0.9734654466314987, iteration: 21251
loss: 1.0042012929916382,grad_norm: 0.9999991186827091, iteration: 21252
loss: 1.0412445068359375,grad_norm: 0.9999990634709338, iteration: 21253
loss: 0.9871090054512024,grad_norm: 0.9999992735768636, iteration: 21254
loss: 1.0808162689208984,grad_norm: 0.9999993376867691, iteration: 21255
loss: 1.0558100938796997,grad_norm: 0.9999992163062642, iteration: 21256
loss: 0.986663818359375,grad_norm: 0.9460851798867912, iteration: 21257
loss: 1.0286792516708374,grad_norm: 0.9999991310934523, iteration: 21258
loss: 1.0234910249710083,grad_norm: 0.9999992443533339, iteration: 21259
loss: 1.0360639095306396,grad_norm: 0.9999999426801649, iteration: 21260
loss: 1.026442527770996,grad_norm: 0.9638983129053045, iteration: 21261
loss: 1.028603196144104,grad_norm: 0.9999991747117535, iteration: 21262
loss: 1.0081357955932617,grad_norm: 0.9723470354871753, iteration: 21263
loss: 1.0248465538024902,grad_norm: 0.9999993670152582, iteration: 21264
loss: 1.0050387382507324,grad_norm: 0.9999994998426481, iteration: 21265
loss: 1.0477855205535889,grad_norm: 0.999999005431155, iteration: 21266
loss: 1.0123692750930786,grad_norm: 0.9999993894373155, iteration: 21267
loss: 1.0388342142105103,grad_norm: 0.9959667937687845, iteration: 21268
loss: 0.9944970607757568,grad_norm: 0.9513210953811906, iteration: 21269
loss: 1.0504708290100098,grad_norm: 0.9819150163226043, iteration: 21270
loss: 1.0533503293991089,grad_norm: 0.8933953125567295, iteration: 21271
loss: 1.0038487911224365,grad_norm: 0.8972364777112297, iteration: 21272
loss: 0.9676037430763245,grad_norm: 0.9999992282667022, iteration: 21273
loss: 1.0045132637023926,grad_norm: 0.9999991113783263, iteration: 21274
loss: 1.0256623029708862,grad_norm: 0.9444598737917463, iteration: 21275
loss: 1.02173912525177,grad_norm: 0.9999998404944398, iteration: 21276
loss: 1.0766445398330688,grad_norm: 0.9999996457182929, iteration: 21277
loss: 0.9912910461425781,grad_norm: 0.9999990379662115, iteration: 21278
loss: 1.0259482860565186,grad_norm: 0.9999998727005204, iteration: 21279
loss: 1.014939308166504,grad_norm: 0.8412753579765297, iteration: 21280
loss: 1.0446528196334839,grad_norm: 0.999999371683568, iteration: 21281
loss: 0.9885473847389221,grad_norm: 0.9999991224064559, iteration: 21282
loss: 1.0316215753555298,grad_norm: 0.9999993629606581, iteration: 21283
loss: 1.050842046737671,grad_norm: 0.9999995920998905, iteration: 21284
loss: 1.0299160480499268,grad_norm: 0.9999995665116101, iteration: 21285
loss: 1.0812835693359375,grad_norm: 0.9999995717661414, iteration: 21286
loss: 1.048893690109253,grad_norm: 0.9999990913060665, iteration: 21287
loss: 0.9888628721237183,grad_norm: 0.9999990859816457, iteration: 21288
loss: 1.096956491470337,grad_norm: 0.9999997455603702, iteration: 21289
loss: 1.0581519603729248,grad_norm: 0.9999991950815339, iteration: 21290
loss: 1.0502885580062866,grad_norm: 0.9999996754397007, iteration: 21291
loss: 1.0076473951339722,grad_norm: 0.8997452338060199, iteration: 21292
loss: 1.0528631210327148,grad_norm: 0.9999997111166665, iteration: 21293
loss: 1.0153790712356567,grad_norm: 0.9999994372659027, iteration: 21294
loss: 0.9940941333770752,grad_norm: 0.9999991592359962, iteration: 21295
loss: 1.0217314958572388,grad_norm: 0.9999991578287026, iteration: 21296
loss: 0.9686732292175293,grad_norm: 0.9299674479031409, iteration: 21297
loss: 1.0186277627944946,grad_norm: 0.9999989179078317, iteration: 21298
loss: 1.0290396213531494,grad_norm: 0.9999991151911064, iteration: 21299
loss: 1.1494156122207642,grad_norm: 0.9999998165707956, iteration: 21300
loss: 1.0066155195236206,grad_norm: 0.9999990584716412, iteration: 21301
loss: 1.0157220363616943,grad_norm: 0.8990349731470749, iteration: 21302
loss: 0.9836032390594482,grad_norm: 0.9999989748430693, iteration: 21303
loss: 1.012636661529541,grad_norm: 0.9510273429865286, iteration: 21304
loss: 1.0258855819702148,grad_norm: 0.9567126444164931, iteration: 21305
loss: 1.0158311128616333,grad_norm: 0.8941234989060415, iteration: 21306
loss: 1.0079437494277954,grad_norm: 0.9999990154037458, iteration: 21307
loss: 0.9950425028800964,grad_norm: 0.9545086873620013, iteration: 21308
loss: 1.0205848217010498,grad_norm: 0.9999991719566487, iteration: 21309
loss: 1.0088862180709839,grad_norm: 0.9999992694234294, iteration: 21310
loss: 0.9846162796020508,grad_norm: 0.9393279880159456, iteration: 21311
loss: 0.9860905408859253,grad_norm: 0.9926245842225808, iteration: 21312
loss: 1.051154375076294,grad_norm: 0.999999189418609, iteration: 21313
loss: 0.9828040599822998,grad_norm: 0.9999995543773113, iteration: 21314
loss: 1.0290684700012207,grad_norm: 0.999999319171422, iteration: 21315
loss: 1.0356380939483643,grad_norm: 0.9999998094059456, iteration: 21316
loss: 0.9929639101028442,grad_norm: 0.9729452715171892, iteration: 21317
loss: 0.9521695971488953,grad_norm: 0.999999146778343, iteration: 21318
loss: 0.9815245270729065,grad_norm: 0.9744590808561573, iteration: 21319
loss: 1.0529804229736328,grad_norm: 0.9999994249169013, iteration: 21320
loss: 0.9860999584197998,grad_norm: 0.9999992546124875, iteration: 21321
loss: 1.0317245721817017,grad_norm: 0.9281041194729579, iteration: 21322
loss: 1.0606422424316406,grad_norm: 0.9999993909198396, iteration: 21323
loss: 1.108699917793274,grad_norm: 0.9999996559532466, iteration: 21324
loss: 1.0209550857543945,grad_norm: 0.9999991325946668, iteration: 21325
loss: 1.0225249528884888,grad_norm: 0.999999310800483, iteration: 21326
loss: 0.9672269225120544,grad_norm: 0.9999991973631301, iteration: 21327
loss: 1.0262110233306885,grad_norm: 0.9999992910157207, iteration: 21328
loss: 0.9885135293006897,grad_norm: 0.9438063198308865, iteration: 21329
loss: 1.0336803197860718,grad_norm: 0.9999992616483323, iteration: 21330
loss: 1.05413818359375,grad_norm: 0.9999994050551682, iteration: 21331
loss: 0.9555956125259399,grad_norm: 0.9999990562524523, iteration: 21332
loss: 1.0163869857788086,grad_norm: 0.8360214934173901, iteration: 21333
loss: 0.9830329418182373,grad_norm: 0.9854230378476286, iteration: 21334
loss: 0.9988382458686829,grad_norm: 0.9999992607415551, iteration: 21335
loss: 1.0656081438064575,grad_norm: 0.9999993699240806, iteration: 21336
loss: 1.015542984008789,grad_norm: 0.99999956848102, iteration: 21337
loss: 0.9305624961853027,grad_norm: 0.9999993157667244, iteration: 21338
loss: 1.046105980873108,grad_norm: 0.9999991669223628, iteration: 21339
loss: 0.9475156664848328,grad_norm: 0.9999993030056298, iteration: 21340
loss: 1.039332389831543,grad_norm: 0.9999991843921903, iteration: 21341
loss: 1.031223177909851,grad_norm: 0.9741379375418147, iteration: 21342
loss: 1.0190894603729248,grad_norm: 0.9999995046036871, iteration: 21343
loss: 1.0216538906097412,grad_norm: 0.989923053456933, iteration: 21344
loss: 1.0169434547424316,grad_norm: 0.999999237335486, iteration: 21345
loss: 1.0536410808563232,grad_norm: 0.9999996947263111, iteration: 21346
loss: 1.0340549945831299,grad_norm: 0.9999993179000204, iteration: 21347
loss: 1.0345948934555054,grad_norm: 0.8418747814737356, iteration: 21348
loss: 1.0012266635894775,grad_norm: 0.9999991675087889, iteration: 21349
loss: 1.0102849006652832,grad_norm: 0.8533691689634685, iteration: 21350
loss: 1.042539119720459,grad_norm: 0.9999995733671786, iteration: 21351
loss: 1.0146538019180298,grad_norm: 0.9999990992889326, iteration: 21352
loss: 1.103779911994934,grad_norm: 0.9999997137801707, iteration: 21353
loss: 1.1091101169586182,grad_norm: 0.9999996476383286, iteration: 21354
loss: 1.0163159370422363,grad_norm: 0.9292560412168436, iteration: 21355
loss: 1.00887131690979,grad_norm: 0.9462296853106886, iteration: 21356
loss: 1.0046237707138062,grad_norm: 0.9999990633097496, iteration: 21357
loss: 1.0691524744033813,grad_norm: 0.9999995820273389, iteration: 21358
loss: 1.0551236867904663,grad_norm: 0.9999991747517527, iteration: 21359
loss: 1.0439859628677368,grad_norm: 0.9999991252787599, iteration: 21360
loss: 1.0378881692886353,grad_norm: 0.8100998361437326, iteration: 21361
loss: 0.9754562377929688,grad_norm: 0.9999991003174779, iteration: 21362
loss: 1.012257695198059,grad_norm: 0.899711788400295, iteration: 21363
loss: 1.0046453475952148,grad_norm: 0.9999990817836781, iteration: 21364
loss: 0.9971938729286194,grad_norm: 0.9999994585286835, iteration: 21365
loss: 1.0248122215270996,grad_norm: 0.9999993762955236, iteration: 21366
loss: 0.9954587817192078,grad_norm: 0.8647627260462475, iteration: 21367
loss: 1.0298913717269897,grad_norm: 0.9999995170181785, iteration: 21368
loss: 1.00004243850708,grad_norm: 0.9999993096945822, iteration: 21369
loss: 1.083611011505127,grad_norm: 0.9999993000704668, iteration: 21370
loss: 1.0148972272872925,grad_norm: 0.9999995690471707, iteration: 21371
loss: 0.9987106919288635,grad_norm: 0.9999994679292814, iteration: 21372
loss: 1.0237929821014404,grad_norm: 0.9699855147226834, iteration: 21373
loss: 1.0028716325759888,grad_norm: 0.9504571420596362, iteration: 21374
loss: 1.1254708766937256,grad_norm: 0.9999994434619405, iteration: 21375
loss: 1.019639253616333,grad_norm: 0.9999991302544461, iteration: 21376
loss: 1.0038254261016846,grad_norm: 0.9999994965337455, iteration: 21377
loss: 1.0684114694595337,grad_norm: 0.9999997321189114, iteration: 21378
loss: 1.0590598583221436,grad_norm: 0.9999994729808356, iteration: 21379
loss: 0.9836286902427673,grad_norm: 0.9999995579865026, iteration: 21380
loss: 1.0244604349136353,grad_norm: 0.999999167625197, iteration: 21381
loss: 1.1024199724197388,grad_norm: 0.9999998186046782, iteration: 21382
loss: 0.9951097369194031,grad_norm: 0.9999993191139158, iteration: 21383
loss: 1.0129557847976685,grad_norm: 0.9999993156318977, iteration: 21384
loss: 1.0131281614303589,grad_norm: 0.9999993631490042, iteration: 21385
loss: 1.1189255714416504,grad_norm: 0.9999993976399847, iteration: 21386
loss: 1.0280028581619263,grad_norm: 0.985326720372294, iteration: 21387
loss: 1.1146740913391113,grad_norm: 0.9999999314933119, iteration: 21388
loss: 1.0379441976547241,grad_norm: 0.9999990834884553, iteration: 21389
loss: 1.0853286981582642,grad_norm: 0.9999992759561486, iteration: 21390
loss: 1.0018638372421265,grad_norm: 0.9999992162997936, iteration: 21391
loss: 1.0246220827102661,grad_norm: 0.9999994676470596, iteration: 21392
loss: 1.1112539768218994,grad_norm: 0.9999999626512333, iteration: 21393
loss: 0.9992958307266235,grad_norm: 0.9999991601002312, iteration: 21394
loss: 1.0311599969863892,grad_norm: 0.9999990507550621, iteration: 21395
loss: 0.9930264353752136,grad_norm: 0.9071050448781436, iteration: 21396
loss: 1.024954080581665,grad_norm: 0.9999995555251314, iteration: 21397
loss: 1.0732280015945435,grad_norm: 0.999999737372612, iteration: 21398
loss: 1.0153599977493286,grad_norm: 0.9999992627461112, iteration: 21399
loss: 0.9579123854637146,grad_norm: 0.9999991907473849, iteration: 21400
loss: 0.9396388530731201,grad_norm: 0.999999136242219, iteration: 21401
loss: 1.0457890033721924,grad_norm: 0.9999991502142935, iteration: 21402
loss: 0.9988366961479187,grad_norm: 0.9999990720441229, iteration: 21403
loss: 0.9952981472015381,grad_norm: 0.9495875211479209, iteration: 21404
loss: 0.9951240420341492,grad_norm: 0.9999995770536088, iteration: 21405
loss: 1.0092524290084839,grad_norm: 0.9999991416872502, iteration: 21406
loss: 1.0117138624191284,grad_norm: 0.9999991702277574, iteration: 21407
loss: 0.9750352501869202,grad_norm: 0.9999991653499154, iteration: 21408
loss: 1.1008716821670532,grad_norm: 0.9999995444417636, iteration: 21409
loss: 1.0173441171646118,grad_norm: 0.9999992952089655, iteration: 21410
loss: 0.9915664792060852,grad_norm: 0.9999997376063116, iteration: 21411
loss: 0.9953731298446655,grad_norm: 0.9999990991592654, iteration: 21412
loss: 1.009812593460083,grad_norm: 0.9999990392317504, iteration: 21413
loss: 1.1023167371749878,grad_norm: 0.9999995680892594, iteration: 21414
loss: 0.989362359046936,grad_norm: 0.9605701829189227, iteration: 21415
loss: 1.0398286581039429,grad_norm: 0.999999099781592, iteration: 21416
loss: 1.0262489318847656,grad_norm: 0.9999990896244682, iteration: 21417
loss: 1.0676978826522827,grad_norm: 0.9999995964620244, iteration: 21418
loss: 1.077328085899353,grad_norm: 0.9999994285745453, iteration: 21419
loss: 1.0370391607284546,grad_norm: 0.9999991556922722, iteration: 21420
loss: 1.0233240127563477,grad_norm: 0.9999994055492074, iteration: 21421
loss: 1.0412354469299316,grad_norm: 0.999999443492174, iteration: 21422
loss: 1.0437877178192139,grad_norm: 0.999999082088897, iteration: 21423
loss: 1.0495067834854126,grad_norm: 0.9999998992439469, iteration: 21424
loss: 1.0967620611190796,grad_norm: 0.9999995081815083, iteration: 21425
loss: 1.022035837173462,grad_norm: 0.9999993396394389, iteration: 21426
loss: 0.9983713626861572,grad_norm: 0.9999994633329878, iteration: 21427
loss: 0.9939699769020081,grad_norm: 0.9999992507708718, iteration: 21428
loss: 0.9770691990852356,grad_norm: 0.9999991711750666, iteration: 21429
loss: 1.007592797279358,grad_norm: 0.999999568437105, iteration: 21430
loss: 1.0146888494491577,grad_norm: 0.9780314685697576, iteration: 21431
loss: 1.0021377801895142,grad_norm: 0.9999993903905603, iteration: 21432
loss: 1.0316886901855469,grad_norm: 0.9999994398211586, iteration: 21433
loss: 1.0967544317245483,grad_norm: 0.999999413487555, iteration: 21434
loss: 1.0300085544586182,grad_norm: 0.8869867600445206, iteration: 21435
loss: 1.0920929908752441,grad_norm: 0.9999999255761586, iteration: 21436
loss: 0.9892508387565613,grad_norm: 0.9461486240230492, iteration: 21437
loss: 0.9936895370483398,grad_norm: 0.955440262189821, iteration: 21438
loss: 1.037838339805603,grad_norm: 0.9999992284250918, iteration: 21439
loss: 1.0438605546951294,grad_norm: 0.9611014249362131, iteration: 21440
loss: 1.0086325407028198,grad_norm: 0.9602652523784505, iteration: 21441
loss: 1.0229030847549438,grad_norm: 0.914485556220585, iteration: 21442
loss: 1.0771158933639526,grad_norm: 0.999999497403306, iteration: 21443
loss: 1.007291555404663,grad_norm: 0.9999992288086313, iteration: 21444
loss: 1.0699095726013184,grad_norm: 0.9999995878817288, iteration: 21445
loss: 1.0013314485549927,grad_norm: 0.9999993581803923, iteration: 21446
loss: 1.1375482082366943,grad_norm: 0.9999999400286661, iteration: 21447
loss: 1.0496665239334106,grad_norm: 0.9999991709232003, iteration: 21448
loss: 1.0422133207321167,grad_norm: 0.9999999051030226, iteration: 21449
loss: 1.1016486883163452,grad_norm: 0.9999996779193483, iteration: 21450
loss: 0.9780677556991577,grad_norm: 0.9999992085416795, iteration: 21451
loss: 1.0368213653564453,grad_norm: 0.9999995776595015, iteration: 21452
loss: 1.0250980854034424,grad_norm: 0.9999990656708153, iteration: 21453
loss: 1.052809715270996,grad_norm: 0.9999992713576407, iteration: 21454
loss: 1.0522533655166626,grad_norm: 0.9999992534170529, iteration: 21455
loss: 1.0476791858673096,grad_norm: 0.9999998909945348, iteration: 21456
loss: 1.096058964729309,grad_norm: 0.9999998880433221, iteration: 21457
loss: 0.9975234270095825,grad_norm: 0.9999995251278536, iteration: 21458
loss: 1.0163406133651733,grad_norm: 0.9999992419390232, iteration: 21459
loss: 1.032361626625061,grad_norm: 0.9999991030448635, iteration: 21460
loss: 1.003749966621399,grad_norm: 0.999999727049282, iteration: 21461
loss: 1.0380418300628662,grad_norm: 0.9999991287719333, iteration: 21462
loss: 1.0266932249069214,grad_norm: 0.9999996508776132, iteration: 21463
loss: 0.985927939414978,grad_norm: 0.8335316785723756, iteration: 21464
loss: 1.0019375085830688,grad_norm: 0.9999991303205229, iteration: 21465
loss: 1.023078203201294,grad_norm: 0.9999992032798993, iteration: 21466
loss: 0.9791467785835266,grad_norm: 0.9999991772828583, iteration: 21467
loss: 1.063544511795044,grad_norm: 0.9999989660271834, iteration: 21468
loss: 1.0564080476760864,grad_norm: 0.9999993347280158, iteration: 21469
loss: 1.020546793937683,grad_norm: 0.9999991361997906, iteration: 21470
loss: 0.9872170090675354,grad_norm: 0.9200772504000815, iteration: 21471
loss: 0.9649815559387207,grad_norm: 0.9999996093617357, iteration: 21472
loss: 1.028792142868042,grad_norm: 0.9999996130266875, iteration: 21473
loss: 1.0226649045944214,grad_norm: 0.9999991218858115, iteration: 21474
loss: 1.0215775966644287,grad_norm: 0.9999998452601014, iteration: 21475
loss: 1.038078784942627,grad_norm: 0.9999991128317439, iteration: 21476
loss: 1.006062626838684,grad_norm: 0.9305369633931393, iteration: 21477
loss: 1.0075803995132446,grad_norm: 0.898033153670632, iteration: 21478
loss: 1.0037717819213867,grad_norm: 0.9999991932745836, iteration: 21479
loss: 1.0240665674209595,grad_norm: 0.9999995328179367, iteration: 21480
loss: 1.001292109489441,grad_norm: 0.9506448183552334, iteration: 21481
loss: 1.0378588438034058,grad_norm: 0.9999990581239494, iteration: 21482
loss: 1.0125823020935059,grad_norm: 0.9999991792594124, iteration: 21483
loss: 1.003675103187561,grad_norm: 0.9999991096091675, iteration: 21484
loss: 1.0746253728866577,grad_norm: 0.9999996740530714, iteration: 21485
loss: 1.025227427482605,grad_norm: 0.999999847230418, iteration: 21486
loss: 0.9685473442077637,grad_norm: 0.9999990893936878, iteration: 21487
loss: 1.0140161514282227,grad_norm: 0.8509253827692083, iteration: 21488
loss: 1.0118522644042969,grad_norm: 0.9999990187248571, iteration: 21489
loss: 0.9863731861114502,grad_norm: 0.9999991064849619, iteration: 21490
loss: 1.0383493900299072,grad_norm: 0.9999993794614505, iteration: 21491
loss: 0.9944823980331421,grad_norm: 0.9999992162259, iteration: 21492
loss: 0.9643265008926392,grad_norm: 0.9999991612928602, iteration: 21493
loss: 0.9516378045082092,grad_norm: 0.9999996116997638, iteration: 21494
loss: 1.0178512334823608,grad_norm: 0.9433029168835749, iteration: 21495
loss: 1.0595483779907227,grad_norm: 0.9999996303025003, iteration: 21496
loss: 1.061277985572815,grad_norm: 0.999999396711263, iteration: 21497
loss: 1.0062915086746216,grad_norm: 0.99999918160528, iteration: 21498
loss: 1.0425094366073608,grad_norm: 0.9999992942691757, iteration: 21499
loss: 1.0178571939468384,grad_norm: 0.9999992030438956, iteration: 21500
loss: 1.029985785484314,grad_norm: 0.9999996681396013, iteration: 21501
loss: 0.9872467517852783,grad_norm: 0.9999994782091388, iteration: 21502
loss: 0.9863435626029968,grad_norm: 0.9999994876047323, iteration: 21503
loss: 1.0608075857162476,grad_norm: 0.9999995139033411, iteration: 21504
loss: 1.086917757987976,grad_norm: 0.9999997618852132, iteration: 21505
loss: 1.0973881483078003,grad_norm: 0.9999997248225233, iteration: 21506
loss: 1.005743384361267,grad_norm: 0.9484059637872917, iteration: 21507
loss: 1.0062568187713623,grad_norm: 0.9999991017434072, iteration: 21508
loss: 1.0200566053390503,grad_norm: 0.9999992553384682, iteration: 21509
loss: 1.0235518217086792,grad_norm: 0.9999991833584503, iteration: 21510
loss: 1.0168216228485107,grad_norm: 0.9592119468076199, iteration: 21511
loss: 0.9901549816131592,grad_norm: 0.9999992385977391, iteration: 21512
loss: 0.9955006241798401,grad_norm: 0.9877700719347848, iteration: 21513
loss: 1.0557385683059692,grad_norm: 0.9999997892297802, iteration: 21514
loss: 1.0018008947372437,grad_norm: 0.9999996034919589, iteration: 21515
loss: 1.0206197500228882,grad_norm: 0.9999994032851464, iteration: 21516
loss: 1.0341213941574097,grad_norm: 0.9999991945612029, iteration: 21517
loss: 0.993743896484375,grad_norm: 0.9999990992627279, iteration: 21518
loss: 1.046829104423523,grad_norm: 0.9999994368911429, iteration: 21519
loss: 1.035687804222107,grad_norm: 0.9999992384139993, iteration: 21520
loss: 1.011069655418396,grad_norm: 0.9999993016662299, iteration: 21521
loss: 0.9964920282363892,grad_norm: 0.9408588478292756, iteration: 21522
loss: 1.0205613374710083,grad_norm: 0.999999848671134, iteration: 21523
loss: 1.0850640535354614,grad_norm: 0.9999996576722587, iteration: 21524
loss: 1.013800024986267,grad_norm: 0.9999991896194154, iteration: 21525
loss: 1.0222359895706177,grad_norm: 0.9999990748010469, iteration: 21526
loss: 1.0170879364013672,grad_norm: 0.9999993093544771, iteration: 21527
loss: 0.9956843852996826,grad_norm: 0.9868257531748473, iteration: 21528
loss: 1.0261666774749756,grad_norm: 0.8269618227766536, iteration: 21529
loss: 1.0286144018173218,grad_norm: 0.9999994576205504, iteration: 21530
loss: 1.0016382932662964,grad_norm: 0.9999991934280025, iteration: 21531
loss: 1.038912296295166,grad_norm: 0.9999991149324052, iteration: 21532
loss: 1.0301376581192017,grad_norm: 0.9999994548440795, iteration: 21533
loss: 1.0336546897888184,grad_norm: 0.9999994530872462, iteration: 21534
loss: 1.022418737411499,grad_norm: 0.9999994088369984, iteration: 21535
loss: 1.007706880569458,grad_norm: 0.99999921011733, iteration: 21536
loss: 1.0092520713806152,grad_norm: 0.9999992008663507, iteration: 21537
loss: 1.0263458490371704,grad_norm: 0.9999990049460786, iteration: 21538
loss: 0.9694440960884094,grad_norm: 0.942036766902041, iteration: 21539
loss: 1.013236403465271,grad_norm: 0.9999992690118333, iteration: 21540
loss: 0.9877767562866211,grad_norm: 0.968157360390068, iteration: 21541
loss: 1.0114597082138062,grad_norm: 0.9999993477822526, iteration: 21542
loss: 0.9783076643943787,grad_norm: 0.9999995794205602, iteration: 21543
loss: 0.9711475372314453,grad_norm: 0.92275490487081, iteration: 21544
loss: 1.031775951385498,grad_norm: 0.9999990532605496, iteration: 21545
loss: 0.9845527410507202,grad_norm: 0.9999990977980162, iteration: 21546
loss: 1.0602596998214722,grad_norm: 0.9999997087011001, iteration: 21547
loss: 0.9825897216796875,grad_norm: 0.9069959435695643, iteration: 21548
loss: 1.0136271715164185,grad_norm: 0.9999992410486628, iteration: 21549
loss: 1.09947669506073,grad_norm: 0.9999997440967012, iteration: 21550
loss: 1.029181957244873,grad_norm: 0.9999989768112789, iteration: 21551
loss: 0.9569571018218994,grad_norm: 0.9999994322137762, iteration: 21552
loss: 1.0168416500091553,grad_norm: 0.929934779056125, iteration: 21553
loss: 0.967004656791687,grad_norm: 0.9999991100551416, iteration: 21554
loss: 1.0262763500213623,grad_norm: 0.9999993099502807, iteration: 21555
loss: 0.9900811314582825,grad_norm: 0.9035662229914734, iteration: 21556
loss: 0.9871004223823547,grad_norm: 0.9772716057644906, iteration: 21557
loss: 1.022006869316101,grad_norm: 0.9999991153024892, iteration: 21558
loss: 1.019707441329956,grad_norm: 0.9999994585024478, iteration: 21559
loss: 0.9830098748207092,grad_norm: 0.9999990225564405, iteration: 21560
loss: 1.037146806716919,grad_norm: 0.9999990948653544, iteration: 21561
loss: 1.0005946159362793,grad_norm: 0.9463636721812799, iteration: 21562
loss: 1.0170443058013916,grad_norm: 0.9999992215778072, iteration: 21563
loss: 1.0228604078292847,grad_norm: 0.9999990682029785, iteration: 21564
loss: 0.9856856465339661,grad_norm: 0.9999994077828315, iteration: 21565
loss: 1.0744290351867676,grad_norm: 0.9999992707661731, iteration: 21566
loss: 1.0175869464874268,grad_norm: 0.9999990867673116, iteration: 21567
loss: 1.0432541370391846,grad_norm: 0.9999993035035353, iteration: 21568
loss: 1.015010952949524,grad_norm: 0.9999992795124408, iteration: 21569
loss: 1.0204429626464844,grad_norm: 0.999999270687863, iteration: 21570
loss: 0.9838683605194092,grad_norm: 0.9999993783721406, iteration: 21571
loss: 0.9893655180931091,grad_norm: 0.9999991205647664, iteration: 21572
loss: 1.004531979560852,grad_norm: 0.9999991205754004, iteration: 21573
loss: 0.9852374196052551,grad_norm: 0.9999995819196844, iteration: 21574
loss: 1.0053619146347046,grad_norm: 0.9999994103228345, iteration: 21575
loss: 1.019042730331421,grad_norm: 0.9999996246765783, iteration: 21576
loss: 1.0451632738113403,grad_norm: 0.9999989697542003, iteration: 21577
loss: 1.0364066362380981,grad_norm: 0.9999994969043682, iteration: 21578
loss: 1.0126354694366455,grad_norm: 0.9999992549453753, iteration: 21579
loss: 1.032060146331787,grad_norm: 0.9999994307858219, iteration: 21580
loss: 1.0850679874420166,grad_norm: 0.9999990894961177, iteration: 21581
loss: 1.0325839519500732,grad_norm: 0.9817814093320425, iteration: 21582
loss: 1.0244063138961792,grad_norm: 0.9999990745602286, iteration: 21583
loss: 0.9756044149398804,grad_norm: 0.9999991868270689, iteration: 21584
loss: 1.0619161128997803,grad_norm: 0.9999997266754276, iteration: 21585
loss: 1.0556840896606445,grad_norm: 0.9999990709790366, iteration: 21586
loss: 0.9998040795326233,grad_norm: 0.9999991581391354, iteration: 21587
loss: 0.9973409175872803,grad_norm: 0.9999992035561905, iteration: 21588
loss: 1.015042781829834,grad_norm: 0.9999991168143458, iteration: 21589
loss: 0.9927408695220947,grad_norm: 0.9999990783441954, iteration: 21590
loss: 0.9895871877670288,grad_norm: 0.999999533275489, iteration: 21591
loss: 0.9896007180213928,grad_norm: 0.9437230040156169, iteration: 21592
loss: 1.067415475845337,grad_norm: 0.999999367074791, iteration: 21593
loss: 0.9700037837028503,grad_norm: 0.9911763686735423, iteration: 21594
loss: 1.0094058513641357,grad_norm: 0.9999990196452061, iteration: 21595
loss: 0.970231294631958,grad_norm: 0.9999992174473679, iteration: 21596
loss: 1.0197666883468628,grad_norm: 0.9999995412297343, iteration: 21597
loss: 0.9785411953926086,grad_norm: 0.9548748246068057, iteration: 21598
loss: 1.0542482137680054,grad_norm: 0.9999991706593675, iteration: 21599
loss: 1.0577510595321655,grad_norm: 0.9999993726373471, iteration: 21600
loss: 0.9526400566101074,grad_norm: 0.9999994337029374, iteration: 21601
loss: 1.0117775201797485,grad_norm: 0.9999990389657082, iteration: 21602
loss: 0.950007975101471,grad_norm: 0.9922698493149632, iteration: 21603
loss: 1.1311382055282593,grad_norm: 0.9999999505578087, iteration: 21604
loss: 1.011698842048645,grad_norm: 0.999999141224427, iteration: 21605
loss: 0.9896887540817261,grad_norm: 0.848329872853044, iteration: 21606
loss: 1.0301426649093628,grad_norm: 0.9999991118983761, iteration: 21607
loss: 1.0240062475204468,grad_norm: 0.9999992392485318, iteration: 21608
loss: 1.033065676689148,grad_norm: 0.9999993516092438, iteration: 21609
loss: 1.0534480810165405,grad_norm: 0.9999997551210181, iteration: 21610
loss: 1.0293985605239868,grad_norm: 0.9971650588622915, iteration: 21611
loss: 1.0157921314239502,grad_norm: 0.9302378280831425, iteration: 21612
loss: 1.0126992464065552,grad_norm: 0.9999990289741525, iteration: 21613
loss: 1.0270158052444458,grad_norm: 0.9999993992976061, iteration: 21614
loss: 1.052510142326355,grad_norm: 0.989489751205832, iteration: 21615
loss: 0.9858832955360413,grad_norm: 0.9999989450184065, iteration: 21616
loss: 1.0679044723510742,grad_norm: 0.9999993587855455, iteration: 21617
loss: 1.0698574781417847,grad_norm: 0.9999993159979235, iteration: 21618
loss: 0.9897822141647339,grad_norm: 0.9848740585078227, iteration: 21619
loss: 1.004936933517456,grad_norm: 0.9999993979692893, iteration: 21620
loss: 1.0015777349472046,grad_norm: 0.9927657229394301, iteration: 21621
loss: 1.0110900402069092,grad_norm: 0.9713529751937074, iteration: 21622
loss: 0.9753060340881348,grad_norm: 0.9999993223656723, iteration: 21623
loss: 1.0169835090637207,grad_norm: 0.8962248686046748, iteration: 21624
loss: 1.0133837461471558,grad_norm: 0.9999991995789478, iteration: 21625
loss: 0.9894675612449646,grad_norm: 0.9980644910910433, iteration: 21626
loss: 1.02301025390625,grad_norm: 0.9999992237049993, iteration: 21627
loss: 1.0441765785217285,grad_norm: 0.9999997286916613, iteration: 21628
loss: 1.0054718255996704,grad_norm: 0.9999992908719118, iteration: 21629
loss: 1.0387438535690308,grad_norm: 0.9514176648252217, iteration: 21630
loss: 1.199110984802246,grad_norm: 0.999999778323113, iteration: 21631
loss: 1.062792181968689,grad_norm: 0.9999996875717121, iteration: 21632
loss: 1.0924664735794067,grad_norm: 0.9920742695715834, iteration: 21633
loss: 1.0277271270751953,grad_norm: 0.9999992551040905, iteration: 21634
loss: 1.0040862560272217,grad_norm: 0.9910368276116861, iteration: 21635
loss: 0.9961960911750793,grad_norm: 0.9999991409172083, iteration: 21636
loss: 1.0634771585464478,grad_norm: 0.999999339538198, iteration: 21637
loss: 0.9649286866188049,grad_norm: 0.999999036220855, iteration: 21638
loss: 1.0501019954681396,grad_norm: 0.9999990180683919, iteration: 21639
loss: 1.00360906124115,grad_norm: 0.9999992717797841, iteration: 21640
loss: 1.0538897514343262,grad_norm: 0.999999725143354, iteration: 21641
loss: 1.0546116828918457,grad_norm: 0.9999994577862649, iteration: 21642
loss: 1.0430138111114502,grad_norm: 0.9999996573393833, iteration: 21643
loss: 1.003098964691162,grad_norm: 0.9999991355612943, iteration: 21644
loss: 0.9987149834632874,grad_norm: 0.9516373707487403, iteration: 21645
loss: 1.0417776107788086,grad_norm: 0.948419630756594, iteration: 21646
loss: 1.0495535135269165,grad_norm: 0.9999991250085645, iteration: 21647
loss: 1.0282377004623413,grad_norm: 0.999999737626745, iteration: 21648
loss: 0.9816542267799377,grad_norm: 0.9999991565261502, iteration: 21649
loss: 1.0269147157669067,grad_norm: 0.8823898999972767, iteration: 21650
loss: 1.0508331060409546,grad_norm: 0.9999997588508296, iteration: 21651
loss: 1.1061172485351562,grad_norm: 0.9999992197111754, iteration: 21652
loss: 1.0533682107925415,grad_norm: 0.9999995324362257, iteration: 21653
loss: 1.0260956287384033,grad_norm: 0.9999998886309852, iteration: 21654
loss: 1.0398885011672974,grad_norm: 0.999999270112347, iteration: 21655
loss: 0.9823114275932312,grad_norm: 0.9897893018237618, iteration: 21656
loss: 1.0645400285720825,grad_norm: 0.999999507298768, iteration: 21657
loss: 1.1189320087432861,grad_norm: 0.999999598308298, iteration: 21658
loss: 1.0300315618515015,grad_norm: 0.9999991699785753, iteration: 21659
loss: 0.9578877091407776,grad_norm: 0.999999218470936, iteration: 21660
loss: 1.0029807090759277,grad_norm: 0.9681683148254112, iteration: 21661
loss: 1.040486216545105,grad_norm: 0.999999481007163, iteration: 21662
loss: 1.0361220836639404,grad_norm: 0.9999993571694965, iteration: 21663
loss: 0.9713526368141174,grad_norm: 0.9238908172596955, iteration: 21664
loss: 1.0283148288726807,grad_norm: 0.9226551257681719, iteration: 21665
loss: 1.008535623550415,grad_norm: 0.9999993714213222, iteration: 21666
loss: 1.018168330192566,grad_norm: 0.9999990936388437, iteration: 21667
loss: 1.0527619123458862,grad_norm: 0.8323592450839193, iteration: 21668
loss: 0.9811689257621765,grad_norm: 0.9999990985693687, iteration: 21669
loss: 1.0188188552856445,grad_norm: 0.937417220734209, iteration: 21670
loss: 1.096863031387329,grad_norm: 0.9999994123069614, iteration: 21671
loss: 0.9801806807518005,grad_norm: 0.9999991895565423, iteration: 21672
loss: 1.0375474691390991,grad_norm: 0.9999990874749162, iteration: 21673
loss: 1.0232818126678467,grad_norm: 0.986009777120377, iteration: 21674
loss: 1.0372021198272705,grad_norm: 0.9999990569783884, iteration: 21675
loss: 1.0550745725631714,grad_norm: 0.9999992470856275, iteration: 21676
loss: 1.013445496559143,grad_norm: 0.9999997365282579, iteration: 21677
loss: 1.055026650428772,grad_norm: 0.9999993818105092, iteration: 21678
loss: 1.0010390281677246,grad_norm: 0.9999996085902402, iteration: 21679
loss: 1.022082805633545,grad_norm: 0.9999990690497134, iteration: 21680
loss: 0.9904319643974304,grad_norm: 0.9112155541144363, iteration: 21681
loss: 1.045520305633545,grad_norm: 0.9999994910454094, iteration: 21682
loss: 1.004465103149414,grad_norm: 0.9999992343600039, iteration: 21683
loss: 1.0611424446105957,grad_norm: 0.9999990292512614, iteration: 21684
loss: 1.0246515274047852,grad_norm: 0.9999991095417095, iteration: 21685
loss: 1.0224676132202148,grad_norm: 0.9983489887509671, iteration: 21686
loss: 1.0489743947982788,grad_norm: 0.9999995710433573, iteration: 21687
loss: 0.9863285422325134,grad_norm: 0.9999992987384749, iteration: 21688
loss: 1.0241880416870117,grad_norm: 0.9999995961276763, iteration: 21689
loss: 1.100567102432251,grad_norm: 0.9999999515328949, iteration: 21690
loss: 1.0083363056182861,grad_norm: 0.9999996365409678, iteration: 21691
loss: 1.0231963396072388,grad_norm: 0.9999992620976069, iteration: 21692
loss: 1.0295825004577637,grad_norm: 0.9038544872413244, iteration: 21693
loss: 1.0470998287200928,grad_norm: 0.9999993985599998, iteration: 21694
loss: 1.0082401037216187,grad_norm: 0.9999996423028239, iteration: 21695
loss: 1.1200110912322998,grad_norm: 0.9999996200759466, iteration: 21696
loss: 1.0176153182983398,grad_norm: 0.9999996227149743, iteration: 21697
loss: 0.9815248847007751,grad_norm: 0.9999991509081082, iteration: 21698
loss: 0.9923027753829956,grad_norm: 0.9999991091935514, iteration: 21699
loss: 0.9746160507202148,grad_norm: 0.9999991821073749, iteration: 21700
loss: 1.0795694589614868,grad_norm: 0.9999995980134015, iteration: 21701
loss: 0.9774830937385559,grad_norm: 0.9434806110198642, iteration: 21702
loss: 1.0281999111175537,grad_norm: 0.9999991509625578, iteration: 21703
loss: 0.9895979166030884,grad_norm: 0.999999299973875, iteration: 21704
loss: 0.9807348251342773,grad_norm: 0.9999991658167218, iteration: 21705
loss: 0.996675431728363,grad_norm: 0.9999991081484157, iteration: 21706
loss: 1.0337375402450562,grad_norm: 0.9999990936370746, iteration: 21707
loss: 0.9786902070045471,grad_norm: 0.9999990918990019, iteration: 21708
loss: 1.002583622932434,grad_norm: 0.9999990857296952, iteration: 21709
loss: 1.034986138343811,grad_norm: 0.9999996751768733, iteration: 21710
loss: 0.9840989112854004,grad_norm: 0.9343607487055904, iteration: 21711
loss: 1.0893608331680298,grad_norm: 0.9999995499223615, iteration: 21712
loss: 1.0257512331008911,grad_norm: 0.9999991407057233, iteration: 21713
loss: 1.003058910369873,grad_norm: 0.9999993325338509, iteration: 21714
loss: 1.0115331411361694,grad_norm: 0.9999992981810838, iteration: 21715
loss: 1.0439356565475464,grad_norm: 0.9455004367409565, iteration: 21716
loss: 0.9947379231452942,grad_norm: 0.9305600187907913, iteration: 21717
loss: 0.9991267323493958,grad_norm: 0.9999992765465691, iteration: 21718
loss: 1.0286372900009155,grad_norm: 0.9999992289180606, iteration: 21719
loss: 1.0191748142242432,grad_norm: 0.9999993236956278, iteration: 21720
loss: 1.053667426109314,grad_norm: 0.9999995861330765, iteration: 21721
loss: 0.9881782531738281,grad_norm: 0.9999992365799081, iteration: 21722
loss: 1.0289855003356934,grad_norm: 0.999999317878044, iteration: 21723
loss: 0.9757854342460632,grad_norm: 0.9999991602714846, iteration: 21724
loss: 1.023193597793579,grad_norm: 0.9999994717020673, iteration: 21725
loss: 0.9612342119216919,grad_norm: 0.9999990616690485, iteration: 21726
loss: 0.9750180244445801,grad_norm: 0.8601295398641668, iteration: 21727
loss: 1.0701237916946411,grad_norm: 0.9999992849594149, iteration: 21728
loss: 1.024552822113037,grad_norm: 0.999999053337833, iteration: 21729
loss: 1.0066442489624023,grad_norm: 0.9999992864292787, iteration: 21730
loss: 1.0165983438491821,grad_norm: 0.9999995510385573, iteration: 21731
loss: 1.0261437892913818,grad_norm: 0.9999993521159694, iteration: 21732
loss: 0.9989634156227112,grad_norm: 0.9999991571940228, iteration: 21733
loss: 1.0008618831634521,grad_norm: 0.9999990373676021, iteration: 21734
loss: 1.0021470785140991,grad_norm: 0.9999994283707139, iteration: 21735
loss: 1.0264079570770264,grad_norm: 0.9999992314156789, iteration: 21736
loss: 0.9418900609016418,grad_norm: 0.9999993507633189, iteration: 21737
loss: 1.039106011390686,grad_norm: 0.9999994477188774, iteration: 21738
loss: 1.0099166631698608,grad_norm: 0.9999993049200704, iteration: 21739
loss: 1.070165753364563,grad_norm: 0.999999369610605, iteration: 21740
loss: 1.0150070190429688,grad_norm: 0.9999990549361527, iteration: 21741
loss: 1.026990532875061,grad_norm: 0.999999183772908, iteration: 21742
loss: 0.987848699092865,grad_norm: 0.9999993765498747, iteration: 21743
loss: 1.0177326202392578,grad_norm: 0.9999992470702025, iteration: 21744
loss: 0.9959702491760254,grad_norm: 0.9999997203434705, iteration: 21745
loss: 0.9901221394538879,grad_norm: 0.9999990739519342, iteration: 21746
loss: 1.0194870233535767,grad_norm: 0.9999993861789221, iteration: 21747
loss: 1.0540788173675537,grad_norm: 0.999999194416418, iteration: 21748
loss: 1.001980185508728,grad_norm: 0.9999989474480875, iteration: 21749
loss: 0.9904111623764038,grad_norm: 0.907158469941319, iteration: 21750
loss: 0.9770556688308716,grad_norm: 0.9423399042330164, iteration: 21751
loss: 1.0381977558135986,grad_norm: 0.9999989644574535, iteration: 21752
loss: 1.0498820543289185,grad_norm: 0.9999994011312171, iteration: 21753
loss: 1.0364618301391602,grad_norm: 0.9999992621660917, iteration: 21754
loss: 1.0439802408218384,grad_norm: 0.9999995570999015, iteration: 21755
loss: 1.0222498178482056,grad_norm: 0.9334457749553058, iteration: 21756
loss: 0.934717059135437,grad_norm: 0.9999991031107991, iteration: 21757
loss: 1.0218335390090942,grad_norm: 0.9999993263565191, iteration: 21758
loss: 1.0074462890625,grad_norm: 0.9999993654188477, iteration: 21759
loss: 1.0074763298034668,grad_norm: 0.9999989923035714, iteration: 21760
loss: 0.9798346161842346,grad_norm: 0.9187057711858477, iteration: 21761
loss: 1.0190279483795166,grad_norm: 0.9999991134180148, iteration: 21762
loss: 0.9926254153251648,grad_norm: 0.8979467931102969, iteration: 21763
loss: 0.9758740663528442,grad_norm: 0.9999992484145812, iteration: 21764
loss: 1.0471762418746948,grad_norm: 0.9999997487674644, iteration: 21765
loss: 1.029802918434143,grad_norm: 0.9999992323767439, iteration: 21766
loss: 1.0294239521026611,grad_norm: 0.9999996409906394, iteration: 21767
loss: 1.0688633918762207,grad_norm: 0.999999878185107, iteration: 21768
loss: 0.9970777630805969,grad_norm: 0.9999994534611975, iteration: 21769
loss: 1.02872896194458,grad_norm: 0.9999991665587871, iteration: 21770
loss: 1.0137511491775513,grad_norm: 0.9999995724655397, iteration: 21771
loss: 1.039859414100647,grad_norm: 0.999999115983528, iteration: 21772
loss: 1.020973563194275,grad_norm: 0.9999992741748178, iteration: 21773
loss: 0.9830938577651978,grad_norm: 0.8462640309362458, iteration: 21774
loss: 0.9965042471885681,grad_norm: 0.9095407506954465, iteration: 21775
loss: 1.0245381593704224,grad_norm: 0.8808453747444697, iteration: 21776
loss: 0.9895421862602234,grad_norm: 0.9999991992869615, iteration: 21777
loss: 1.0700322389602661,grad_norm: 0.9999996475063221, iteration: 21778
loss: 1.023474931716919,grad_norm: 0.9999991550553309, iteration: 21779
loss: 1.036962628364563,grad_norm: 0.9999993071825624, iteration: 21780
loss: 1.0167113542556763,grad_norm: 0.9715613817489672, iteration: 21781
loss: 1.0195878744125366,grad_norm: 0.9999992202854397, iteration: 21782
loss: 1.0518853664398193,grad_norm: 0.9999996072685015, iteration: 21783
loss: 1.003130555152893,grad_norm: 0.9999990379481875, iteration: 21784
loss: 1.0122122764587402,grad_norm: 0.9999990624881364, iteration: 21785
loss: 1.0633618831634521,grad_norm: 0.9999990619867865, iteration: 21786
loss: 0.990088164806366,grad_norm: 0.9999991761640076, iteration: 21787
loss: 1.0034531354904175,grad_norm: 0.9135926864289616, iteration: 21788
loss: 1.0163313150405884,grad_norm: 0.9806758652866524, iteration: 21789
loss: 1.023808240890503,grad_norm: 0.8171091496023696, iteration: 21790
loss: 1.038683533668518,grad_norm: 0.9999992638231618, iteration: 21791
loss: 1.052544355392456,grad_norm: 0.9999992555745709, iteration: 21792
loss: 1.0164408683776855,grad_norm: 0.999999214168901, iteration: 21793
loss: 1.0205743312835693,grad_norm: 0.9174033735745725, iteration: 21794
loss: 1.0204708576202393,grad_norm: 0.9999996764653438, iteration: 21795
loss: 1.0152186155319214,grad_norm: 0.9999990927726862, iteration: 21796
loss: 1.0640196800231934,grad_norm: 0.9999996402232858, iteration: 21797
loss: 0.9712534546852112,grad_norm: 0.999999119637672, iteration: 21798
loss: 1.0008045434951782,grad_norm: 0.9999991923226462, iteration: 21799
loss: 1.0328658819198608,grad_norm: 0.9999997048012189, iteration: 21800
loss: 0.994404673576355,grad_norm: 0.9999992985599604, iteration: 21801
loss: 1.0648897886276245,grad_norm: 0.9999997854466884, iteration: 21802
loss: 0.9974252581596375,grad_norm: 0.9999991795429576, iteration: 21803
loss: 0.9783939123153687,grad_norm: 0.9999992509384756, iteration: 21804
loss: 1.0030977725982666,grad_norm: 0.9756606241330951, iteration: 21805
loss: 1.0479352474212646,grad_norm: 0.9999995743327361, iteration: 21806
loss: 1.0310416221618652,grad_norm: 0.9999989909646133, iteration: 21807
loss: 0.9856672286987305,grad_norm: 0.9999992029297591, iteration: 21808
loss: 0.9722843766212463,grad_norm: 0.9561061235642573, iteration: 21809
loss: 0.9945189952850342,grad_norm: 0.9999991237201701, iteration: 21810
loss: 1.0108481645584106,grad_norm: 0.999999155802938, iteration: 21811
loss: 1.041699767112732,grad_norm: 0.9999994870789414, iteration: 21812
loss: 1.1147243976593018,grad_norm: 0.9999993586688829, iteration: 21813
loss: 1.0065639019012451,grad_norm: 0.9999990371434837, iteration: 21814
loss: 1.0376073122024536,grad_norm: 0.9999993473260291, iteration: 21815
loss: 1.0476031303405762,grad_norm: 0.9999990961378742, iteration: 21816
loss: 0.9880344867706299,grad_norm: 0.9999992945563579, iteration: 21817
loss: 1.0059341192245483,grad_norm: 0.9940596227098434, iteration: 21818
loss: 1.062578558921814,grad_norm: 0.9999995947101383, iteration: 21819
loss: 1.0337127447128296,grad_norm: 0.9999990751585499, iteration: 21820
loss: 0.9735826253890991,grad_norm: 0.9999989490004981, iteration: 21821
loss: 0.9896755218505859,grad_norm: 0.9999992701989624, iteration: 21822
loss: 1.0325570106506348,grad_norm: 0.9743800465341951, iteration: 21823
loss: 0.9921687841415405,grad_norm: 0.9999991397741187, iteration: 21824
loss: 0.9780843257904053,grad_norm: 0.9999991306655981, iteration: 21825
loss: 1.0084526538848877,grad_norm: 0.9999990819814559, iteration: 21826
loss: 1.0400053262710571,grad_norm: 0.9999990485551259, iteration: 21827
loss: 0.9965794086456299,grad_norm: 0.9999991879189584, iteration: 21828
loss: 0.9635990262031555,grad_norm: 0.8207292463133888, iteration: 21829
loss: 1.0214285850524902,grad_norm: 0.9673220262690863, iteration: 21830
loss: 1.0130324363708496,grad_norm: 0.9999991493604675, iteration: 21831
loss: 1.0404791831970215,grad_norm: 0.8705206441687031, iteration: 21832
loss: 0.9929952025413513,grad_norm: 0.99999915732882, iteration: 21833
loss: 0.9950646162033081,grad_norm: 0.9999991509624492, iteration: 21834
loss: 1.0329294204711914,grad_norm: 0.8973633116131029, iteration: 21835
loss: 1.0269426107406616,grad_norm: 0.9999990703918703, iteration: 21836
loss: 0.9844223260879517,grad_norm: 0.999999140124234, iteration: 21837
loss: 1.0074353218078613,grad_norm: 0.9999991382083196, iteration: 21838
loss: 1.004777431488037,grad_norm: 0.9999998638468566, iteration: 21839
loss: 1.0374631881713867,grad_norm: 0.9404258789784864, iteration: 21840
loss: 1.0217026472091675,grad_norm: 0.8181043495020475, iteration: 21841
loss: 0.952459990978241,grad_norm: 0.9999990810242912, iteration: 21842
loss: 1.0193439722061157,grad_norm: 0.9999991118711816, iteration: 21843
loss: 0.9840832352638245,grad_norm: 0.9083802243108938, iteration: 21844
loss: 1.0905014276504517,grad_norm: 0.9999993666583048, iteration: 21845
loss: 1.0095674991607666,grad_norm: 0.9999990832591094, iteration: 21846
loss: 1.00874662399292,grad_norm: 0.9318053720050429, iteration: 21847
loss: 1.0044087171554565,grad_norm: 0.999999126996667, iteration: 21848
loss: 0.9865025281906128,grad_norm: 0.9999991831066816, iteration: 21849
loss: 1.0340893268585205,grad_norm: 0.9999995826774171, iteration: 21850
loss: 0.9961238503456116,grad_norm: 0.999999065152213, iteration: 21851
loss: 1.0148122310638428,grad_norm: 0.9999992456568064, iteration: 21852
loss: 1.0104767084121704,grad_norm: 0.9999990338487141, iteration: 21853
loss: 1.026646614074707,grad_norm: 0.9136340895431043, iteration: 21854
loss: 0.9893225431442261,grad_norm: 0.9044017277344759, iteration: 21855
loss: 1.0974656343460083,grad_norm: 0.9999997129952337, iteration: 21856
loss: 1.0126330852508545,grad_norm: 0.999999073485227, iteration: 21857
loss: 1.0093011856079102,grad_norm: 0.983664618982328, iteration: 21858
loss: 1.1017786264419556,grad_norm: 0.9999993031250916, iteration: 21859
loss: 1.005645990371704,grad_norm: 0.9999990911411069, iteration: 21860
loss: 1.0235971212387085,grad_norm: 0.99999981704038, iteration: 21861
loss: 1.0441607236862183,grad_norm: 0.9999999464086397, iteration: 21862
loss: 1.0310708284378052,grad_norm: 0.999999145254233, iteration: 21863
loss: 1.0361014604568481,grad_norm: 0.9999993086526624, iteration: 21864
loss: 1.0543534755706787,grad_norm: 0.9999992922263464, iteration: 21865
loss: 1.0716335773468018,grad_norm: 0.9999994452728471, iteration: 21866
loss: 1.026002287864685,grad_norm: 0.9999992002642767, iteration: 21867
loss: 1.033900260925293,grad_norm: 0.9889658948695759, iteration: 21868
loss: 0.9961009621620178,grad_norm: 0.9999991702970338, iteration: 21869
loss: 1.1084785461425781,grad_norm: 0.9999995674695918, iteration: 21870
loss: 0.9931301474571228,grad_norm: 0.9999997564726056, iteration: 21871
loss: 1.0527905225753784,grad_norm: 0.9948829030641062, iteration: 21872
loss: 1.0243664979934692,grad_norm: 0.9999995167520401, iteration: 21873
loss: 1.0157872438430786,grad_norm: 0.9999994272348085, iteration: 21874
loss: 1.0690819025039673,grad_norm: 0.9999992561885443, iteration: 21875
loss: 1.0219202041625977,grad_norm: 0.9999994026790587, iteration: 21876
loss: 1.0162405967712402,grad_norm: 0.9999991740669161, iteration: 21877
loss: 1.0212299823760986,grad_norm: 0.9861586322908783, iteration: 21878
loss: 1.004635214805603,grad_norm: 0.9999991358451973, iteration: 21879
loss: 1.0416189432144165,grad_norm: 0.9999992201880434, iteration: 21880
loss: 1.0609054565429688,grad_norm: 0.9999994769088227, iteration: 21881
loss: 1.0009595155715942,grad_norm: 0.9627438565430727, iteration: 21882
loss: 1.0051780939102173,grad_norm: 0.9999990597572522, iteration: 21883
loss: 1.0207761526107788,grad_norm: 0.936306708784495, iteration: 21884
loss: 1.0791651010513306,grad_norm: 0.9999995824724821, iteration: 21885
loss: 1.0123814344406128,grad_norm: 0.9999991533211391, iteration: 21886
loss: 1.0399432182312012,grad_norm: 0.9999991152776655, iteration: 21887
loss: 1.0350075960159302,grad_norm: 0.9999994016354813, iteration: 21888
loss: 1.0414385795593262,grad_norm: 0.9999993980043299, iteration: 21889
loss: 0.9990061521530151,grad_norm: 0.9999991916399236, iteration: 21890
loss: 1.0083062648773193,grad_norm: 0.9329827107961506, iteration: 21891
loss: 1.0292651653289795,grad_norm: 0.9999992288163218, iteration: 21892
loss: 1.0238887071609497,grad_norm: 0.9999990482731088, iteration: 21893
loss: 1.0068589448928833,grad_norm: 0.9999990217465429, iteration: 21894
loss: 0.9815962910652161,grad_norm: 0.9999991598358109, iteration: 21895
loss: 1.032827377319336,grad_norm: 0.9999991687470878, iteration: 21896
loss: 1.005163550376892,grad_norm: 0.9591134083805861, iteration: 21897
loss: 1.034351110458374,grad_norm: 0.9999992373443453, iteration: 21898
loss: 1.031785488128662,grad_norm: 0.9999990575746003, iteration: 21899
loss: 1.0360088348388672,grad_norm: 0.9999993359997771, iteration: 21900
loss: 1.0313202142715454,grad_norm: 0.9999993451058412, iteration: 21901
loss: 1.032537817955017,grad_norm: 0.9999994172484551, iteration: 21902
loss: 1.0462615489959717,grad_norm: 0.9999990732123337, iteration: 21903
loss: 1.052793264389038,grad_norm: 0.9999991420485489, iteration: 21904
loss: 1.0166271924972534,grad_norm: 0.9365486439881687, iteration: 21905
loss: 1.031767725944519,grad_norm: 0.9999993139787688, iteration: 21906
loss: 0.9954391717910767,grad_norm: 0.9635827705699858, iteration: 21907
loss: 0.9954215288162231,grad_norm: 0.9999993387203464, iteration: 21908
loss: 1.0420935153961182,grad_norm: 0.9999993827502299, iteration: 21909
loss: 1.0677095651626587,grad_norm: 0.9999994272905691, iteration: 21910
loss: 0.9768868088722229,grad_norm: 0.999998997112091, iteration: 21911
loss: 0.9920420050621033,grad_norm: 0.999999387913969, iteration: 21912
loss: 1.0491405725479126,grad_norm: 0.9999989606391159, iteration: 21913
loss: 1.0677021741867065,grad_norm: 0.9999991105365237, iteration: 21914
loss: 1.0220437049865723,grad_norm: 0.9089994254670514, iteration: 21915
loss: 0.9736693501472473,grad_norm: 0.9999990344275375, iteration: 21916
loss: 1.0286399126052856,grad_norm: 0.9458850004967831, iteration: 21917
loss: 0.9831066131591797,grad_norm: 0.9765399902275357, iteration: 21918
loss: 0.9891417622566223,grad_norm: 0.9999997969305338, iteration: 21919
loss: 0.9929587244987488,grad_norm: 0.9665877490192173, iteration: 21920
loss: 1.0107207298278809,grad_norm: 0.9999994784531667, iteration: 21921
loss: 1.0311272144317627,grad_norm: 0.9999995887979874, iteration: 21922
loss: 1.0122497081756592,grad_norm: 0.9999991659224908, iteration: 21923
loss: 1.0255736112594604,grad_norm: 0.9999997930564107, iteration: 21924
loss: 1.0131282806396484,grad_norm: 0.9999991740452373, iteration: 21925
loss: 0.9592459797859192,grad_norm: 0.9999990154403703, iteration: 21926
loss: 1.0009636878967285,grad_norm: 0.999999370545811, iteration: 21927
loss: 1.0104283094406128,grad_norm: 0.9999990496691111, iteration: 21928
loss: 1.0029747486114502,grad_norm: 0.9999991027244487, iteration: 21929
loss: 0.9843184351921082,grad_norm: 0.9999994621700702, iteration: 21930
loss: 1.0022746324539185,grad_norm: 0.999999375538983, iteration: 21931
loss: 1.0459619760513306,grad_norm: 0.9999992811221473, iteration: 21932
loss: 1.0574785470962524,grad_norm: 0.9266027783452682, iteration: 21933
loss: 1.072107195854187,grad_norm: 0.9999992902785847, iteration: 21934
loss: 0.9779140949249268,grad_norm: 0.9999990471230277, iteration: 21935
loss: 1.050456166267395,grad_norm: 0.999999314200846, iteration: 21936
loss: 1.0477871894836426,grad_norm: 0.9999990555929722, iteration: 21937
loss: 1.0478830337524414,grad_norm: 0.9999992264336548, iteration: 21938
loss: 1.0367316007614136,grad_norm: 0.9384807348793694, iteration: 21939
loss: 1.0683214664459229,grad_norm: 0.9999992062532703, iteration: 21940
loss: 1.0445151329040527,grad_norm: 0.9999993762772951, iteration: 21941
loss: 1.0282992124557495,grad_norm: 0.9980513487660999, iteration: 21942
loss: 1.0219498872756958,grad_norm: 0.9999994234106653, iteration: 21943
loss: 1.0075347423553467,grad_norm: 0.9772712966938701, iteration: 21944
loss: 1.0144233703613281,grad_norm: 0.9999992489363358, iteration: 21945
loss: 1.000766396522522,grad_norm: 0.9434045644108021, iteration: 21946
loss: 1.1253162622451782,grad_norm: 0.999999620509895, iteration: 21947
loss: 1.0328000783920288,grad_norm: 0.9797158756397892, iteration: 21948
loss: 1.0284005403518677,grad_norm: 0.9999994256580577, iteration: 21949
loss: 1.000126600265503,grad_norm: 0.9999991369433806, iteration: 21950
loss: 1.0061464309692383,grad_norm: 0.9999991869818419, iteration: 21951
loss: 1.0055792331695557,grad_norm: 0.9999989785226282, iteration: 21952
loss: 1.0614044666290283,grad_norm: 0.9999992671099655, iteration: 21953
loss: 1.0344163179397583,grad_norm: 0.9641619409709218, iteration: 21954
loss: 1.0306775569915771,grad_norm: 0.9999992903062299, iteration: 21955
loss: 1.034764289855957,grad_norm: 0.9999989532265597, iteration: 21956
loss: 0.9951397180557251,grad_norm: 0.999999020934478, iteration: 21957
loss: 1.0128977298736572,grad_norm: 0.9999992971393445, iteration: 21958
loss: 1.028807282447815,grad_norm: 0.9999990223204114, iteration: 21959
loss: 1.019614815711975,grad_norm: 0.9999993195166326, iteration: 21960
loss: 1.0056564807891846,grad_norm: 0.9999992507451646, iteration: 21961
loss: 0.9998360276222229,grad_norm: 0.8703989092233183, iteration: 21962
loss: 1.0012069940567017,grad_norm: 0.9870365130812013, iteration: 21963
loss: 1.0024698972702026,grad_norm: 0.9884902490268543, iteration: 21964
loss: 1.0000838041305542,grad_norm: 0.8605912580131881, iteration: 21965
loss: 1.01984441280365,grad_norm: 0.9999993879163158, iteration: 21966
loss: 0.9854946732521057,grad_norm: 0.9220788677690895, iteration: 21967
loss: 1.0033161640167236,grad_norm: 0.9734849641921857, iteration: 21968
loss: 0.9828268885612488,grad_norm: 0.9999991184072228, iteration: 21969
loss: 1.007165551185608,grad_norm: 0.999999704704148, iteration: 21970
loss: 0.9608671069145203,grad_norm: 0.9563517884418877, iteration: 21971
loss: 1.0099955797195435,grad_norm: 0.9207739591806443, iteration: 21972
loss: 1.0032182931900024,grad_norm: 0.9999992208453692, iteration: 21973
loss: 1.0104880332946777,grad_norm: 0.9999993002573355, iteration: 21974
loss: 1.0061794519424438,grad_norm: 0.9999990354219226, iteration: 21975
loss: 1.014915108680725,grad_norm: 0.9999991445004831, iteration: 21976
loss: 1.0033167600631714,grad_norm: 0.999999272781882, iteration: 21977
loss: 1.0460898876190186,grad_norm: 0.9999991032392344, iteration: 21978
loss: 0.9482174515724182,grad_norm: 0.9997845094932487, iteration: 21979
loss: 1.0065910816192627,grad_norm: 0.9999989872157796, iteration: 21980
loss: 1.0131343603134155,grad_norm: 0.9999990048603526, iteration: 21981
loss: 0.9643621444702148,grad_norm: 0.9999990902668364, iteration: 21982
loss: 1.0093791484832764,grad_norm: 0.9291399104455651, iteration: 21983
loss: 1.0416840314865112,grad_norm: 0.9999991499316861, iteration: 21984
loss: 1.013229489326477,grad_norm: 0.9216099048429488, iteration: 21985
loss: 1.01641047000885,grad_norm: 0.9999991607078829, iteration: 21986
loss: 1.0523505210876465,grad_norm: 0.9999993612715407, iteration: 21987
loss: 1.0207054615020752,grad_norm: 0.9999990341272155, iteration: 21988
loss: 1.0169506072998047,grad_norm: 0.99999909623832, iteration: 21989
loss: 1.0560845136642456,grad_norm: 0.9999993375541867, iteration: 21990
loss: 1.0718127489089966,grad_norm: 0.9999994725956634, iteration: 21991
loss: 1.0194389820098877,grad_norm: 0.9999992434853959, iteration: 21992
loss: 0.9925552010536194,grad_norm: 0.9498860033132304, iteration: 21993
loss: 0.9865017533302307,grad_norm: 0.999999017754709, iteration: 21994
loss: 1.0422213077545166,grad_norm: 0.9239432821376742, iteration: 21995
loss: 0.9813684821128845,grad_norm: 0.9253098410747245, iteration: 21996
loss: 1.0382870435714722,grad_norm: 0.9999994816101834, iteration: 21997
loss: 1.01933753490448,grad_norm: 0.9541782803041184, iteration: 21998
loss: 1.010371446609497,grad_norm: 0.9999992486114618, iteration: 21999
loss: 1.0050127506256104,grad_norm: 0.9999991611672577, iteration: 22000
loss: 0.9893388152122498,grad_norm: 0.9485592315396044, iteration: 22001
loss: 0.9984845519065857,grad_norm: 0.9999991115479674, iteration: 22002
loss: 1.051368236541748,grad_norm: 0.9999993330321119, iteration: 22003
loss: 1.0146255493164062,grad_norm: 0.999999012715087, iteration: 22004
loss: 1.0475752353668213,grad_norm: 0.999999901882379, iteration: 22005
loss: 1.0046985149383545,grad_norm: 0.9607918777683757, iteration: 22006
loss: 1.0177152156829834,grad_norm: 0.9999991240698024, iteration: 22007
loss: 1.0815376043319702,grad_norm: 0.9999994448939437, iteration: 22008
loss: 1.0218933820724487,grad_norm: 0.9999992661484309, iteration: 22009
loss: 1.0156192779541016,grad_norm: 0.9999997641115301, iteration: 22010
loss: 1.0032485723495483,grad_norm: 0.99999912776863, iteration: 22011
loss: 1.0091816186904907,grad_norm: 0.9999991858071551, iteration: 22012
loss: 1.026607871055603,grad_norm: 0.9999991259845018, iteration: 22013
loss: 0.9771187901496887,grad_norm: 0.9999991254705953, iteration: 22014
loss: 1.0471551418304443,grad_norm: 0.9999991195251376, iteration: 22015
loss: 1.0237643718719482,grad_norm: 0.9999993504396882, iteration: 22016
loss: 1.027818202972412,grad_norm: 0.9477044435914548, iteration: 22017
loss: 1.0227608680725098,grad_norm: 0.9999992291085008, iteration: 22018
loss: 1.0399339199066162,grad_norm: 0.9999993006752007, iteration: 22019
loss: 0.9917662739753723,grad_norm: 0.9999991455294372, iteration: 22020
loss: 1.0068455934524536,grad_norm: 0.9999991704191056, iteration: 22021
loss: 1.0448137521743774,grad_norm: 0.9999992503640133, iteration: 22022
loss: 0.9898087382316589,grad_norm: 0.9999991084348241, iteration: 22023
loss: 1.0545525550842285,grad_norm: 0.9999992997757702, iteration: 22024
loss: 0.993806779384613,grad_norm: 0.8876222567680283, iteration: 22025
loss: 1.0086325407028198,grad_norm: 0.9999994954233017, iteration: 22026
loss: 1.0017576217651367,grad_norm: 0.9999996404498139, iteration: 22027
loss: 1.0241284370422363,grad_norm: 0.999999143177036, iteration: 22028
loss: 0.9722867012023926,grad_norm: 0.9630697091489816, iteration: 22029
loss: 1.0574851036071777,grad_norm: 0.9999991259004165, iteration: 22030
loss: 1.0707223415374756,grad_norm: 0.9999998674337908, iteration: 22031
loss: 1.0046231746673584,grad_norm: 0.999999112740708, iteration: 22032
loss: 0.9956705570220947,grad_norm: 0.8995519623142672, iteration: 22033
loss: 1.0584098100662231,grad_norm: 0.9633594014334712, iteration: 22034
loss: 1.003901720046997,grad_norm: 0.9999993450585282, iteration: 22035
loss: 1.1810121536254883,grad_norm: 0.9999994811717688, iteration: 22036
loss: 1.022377848625183,grad_norm: 0.9999992013498685, iteration: 22037
loss: 1.0924367904663086,grad_norm: 0.9999993229640267, iteration: 22038
loss: 0.9920966029167175,grad_norm: 0.9999996525883496, iteration: 22039
loss: 1.0077245235443115,grad_norm: 0.9999990772728747, iteration: 22040
loss: 1.086722493171692,grad_norm: 0.9999994734683119, iteration: 22041
loss: 1.0098862648010254,grad_norm: 0.9752689483352966, iteration: 22042
loss: 1.0597490072250366,grad_norm: 0.9999993531597515, iteration: 22043
loss: 0.9859336614608765,grad_norm: 0.9865173167036296, iteration: 22044
loss: 0.9703729152679443,grad_norm: 0.9999991842628668, iteration: 22045
loss: 1.0866631269454956,grad_norm: 0.9999997956919894, iteration: 22046
loss: 1.0519578456878662,grad_norm: 0.9999994053241817, iteration: 22047
loss: 0.9711648225784302,grad_norm: 0.9999993043108747, iteration: 22048
loss: 1.0221909284591675,grad_norm: 0.9999992391791104, iteration: 22049
loss: 1.041135549545288,grad_norm: 0.9999993084149374, iteration: 22050
loss: 1.0370033979415894,grad_norm: 0.9999990956952283, iteration: 22051
loss: 1.0315967798233032,grad_norm: 0.9999992493897168, iteration: 22052
loss: 1.0302807092666626,grad_norm: 0.9999994241496089, iteration: 22053
loss: 0.9559418559074402,grad_norm: 0.9945920674868206, iteration: 22054
loss: 0.9918952584266663,grad_norm: 0.9890225314006522, iteration: 22055
loss: 1.0207070112228394,grad_norm: 0.9999990066398506, iteration: 22056
loss: 1.0481985807418823,grad_norm: 0.9999990708983767, iteration: 22057
loss: 1.0145138502120972,grad_norm: 0.9999996555864289, iteration: 22058
loss: 1.055478811264038,grad_norm: 0.9999990759844457, iteration: 22059
loss: 1.0094267129898071,grad_norm: 0.9999993403624853, iteration: 22060
loss: 1.0547865629196167,grad_norm: 0.9999991764060938, iteration: 22061
loss: 1.0133165121078491,grad_norm: 0.8954543257279378, iteration: 22062
loss: 0.9740530848503113,grad_norm: 0.9999989924197616, iteration: 22063
loss: 1.0687226057052612,grad_norm: 0.9999997291908269, iteration: 22064
loss: 1.0107311010360718,grad_norm: 0.9732389462755727, iteration: 22065
loss: 1.013292670249939,grad_norm: 0.999999322321683, iteration: 22066
loss: 1.031895399093628,grad_norm: 0.999999234236037, iteration: 22067
loss: 1.0062274932861328,grad_norm: 0.9999991716972336, iteration: 22068
loss: 1.0861237049102783,grad_norm: 0.9999993052279444, iteration: 22069
loss: 1.0044052600860596,grad_norm: 0.9999990907482449, iteration: 22070
loss: 1.030282974243164,grad_norm: 0.999999202130161, iteration: 22071
loss: 0.9742919206619263,grad_norm: 0.9999991050281442, iteration: 22072
loss: 1.0511298179626465,grad_norm: 0.9999994780821446, iteration: 22073
loss: 1.0202163457870483,grad_norm: 0.9999993047049407, iteration: 22074
loss: 1.0726248025894165,grad_norm: 0.9999991489724577, iteration: 22075
loss: 1.0237853527069092,grad_norm: 0.9871703774634506, iteration: 22076
loss: 1.0006884336471558,grad_norm: 0.9703653921797513, iteration: 22077
loss: 1.0020495653152466,grad_norm: 0.9999991972883383, iteration: 22078
loss: 1.0089460611343384,grad_norm: 0.9999990907300056, iteration: 22079
loss: 1.0260264873504639,grad_norm: 0.927012716864033, iteration: 22080
loss: 1.0907580852508545,grad_norm: 0.9999993252227857, iteration: 22081
loss: 1.0719715356826782,grad_norm: 0.9999994315887671, iteration: 22082
loss: 0.9935328364372253,grad_norm: 0.9999992833242182, iteration: 22083
loss: 1.0473217964172363,grad_norm: 0.9999992395312909, iteration: 22084
loss: 1.014777421951294,grad_norm: 0.999999206887197, iteration: 22085
loss: 1.0093804597854614,grad_norm: 0.999999036328199, iteration: 22086
loss: 1.0110145807266235,grad_norm: 0.9999990831718621, iteration: 22087
loss: 1.0023949146270752,grad_norm: 0.9999992515692915, iteration: 22088
loss: 1.0137325525283813,grad_norm: 0.9555031395943356, iteration: 22089
loss: 1.0030204057693481,grad_norm: 0.9999992841005438, iteration: 22090
loss: 0.9598855376243591,grad_norm: 0.9200357633742354, iteration: 22091
loss: 1.0230793952941895,grad_norm: 0.9999990537016121, iteration: 22092
loss: 1.0175881385803223,grad_norm: 0.9999995321093746, iteration: 22093
loss: 1.0113760232925415,grad_norm: 0.9911198159444176, iteration: 22094
loss: 1.0519287586212158,grad_norm: 0.9999992019275272, iteration: 22095
loss: 1.0125483274459839,grad_norm: 0.9999989540800123, iteration: 22096
loss: 1.0445640087127686,grad_norm: 0.9931447697373852, iteration: 22097
loss: 1.0145595073699951,grad_norm: 0.9999993117667327, iteration: 22098
loss: 1.0039873123168945,grad_norm: 0.9215462361184664, iteration: 22099
loss: 0.993385374546051,grad_norm: 0.9999991700359797, iteration: 22100
loss: 1.0380308628082275,grad_norm: 0.9999995637774906, iteration: 22101
loss: 1.0311224460601807,grad_norm: 0.9999994153362206, iteration: 22102
loss: 1.0442943572998047,grad_norm: 0.9999991913354782, iteration: 22103
loss: 1.013561725616455,grad_norm: 0.9999993035393995, iteration: 22104
loss: 0.9514883756637573,grad_norm: 0.9759965967417672, iteration: 22105
loss: 1.015070915222168,grad_norm: 0.999999035868259, iteration: 22106
loss: 1.0253784656524658,grad_norm: 0.9999990996032392, iteration: 22107
loss: 1.0028445720672607,grad_norm: 0.8560344703762403, iteration: 22108
loss: 1.0538405179977417,grad_norm: 0.9999997643708196, iteration: 22109
loss: 0.9980472326278687,grad_norm: 0.9999990808301799, iteration: 22110
loss: 1.0599209070205688,grad_norm: 0.9999991586630994, iteration: 22111
loss: 0.9927169680595398,grad_norm: 0.9999991917706267, iteration: 22112
loss: 0.9440045952796936,grad_norm: 0.9999992427393217, iteration: 22113
loss: 1.0217264890670776,grad_norm: 0.9999992351771658, iteration: 22114
loss: 1.071372389793396,grad_norm: 0.9999993722400532, iteration: 22115
loss: 1.0353193283081055,grad_norm: 0.9999991676754615, iteration: 22116
loss: 1.0688834190368652,grad_norm: 0.9999996300368944, iteration: 22117
loss: 1.0148048400878906,grad_norm: 0.9283692329429026, iteration: 22118
loss: 1.005690097808838,grad_norm: 0.9999992283937257, iteration: 22119
loss: 1.0163551568984985,grad_norm: 0.999999270093632, iteration: 22120
loss: 1.0238087177276611,grad_norm: 0.999999138938899, iteration: 22121
loss: 1.037567138671875,grad_norm: 0.9999991832775176, iteration: 22122
loss: 1.0106282234191895,grad_norm: 0.9999989657608738, iteration: 22123
loss: 1.0043610334396362,grad_norm: 0.9999991867303037, iteration: 22124
loss: 1.072090983390808,grad_norm: 0.9999995294360637, iteration: 22125
loss: 1.0235450267791748,grad_norm: 0.9209757576881572, iteration: 22126
loss: 0.9882182478904724,grad_norm: 0.9999991108071568, iteration: 22127
loss: 1.038520097732544,grad_norm: 0.9999990812477517, iteration: 22128
loss: 0.9975905418395996,grad_norm: 0.9999990298851122, iteration: 22129
loss: 1.0359441041946411,grad_norm: 0.9999992185008163, iteration: 22130
loss: 1.0483956336975098,grad_norm: 0.999999304655289, iteration: 22131
loss: 0.9911534190177917,grad_norm: 0.9240111050903036, iteration: 22132
loss: 1.0284452438354492,grad_norm: 0.9999992430327976, iteration: 22133
loss: 1.0103662014007568,grad_norm: 0.9999991472240555, iteration: 22134
loss: 1.0527530908584595,grad_norm: 0.925560542103257, iteration: 22135
loss: 1.0020084381103516,grad_norm: 0.9999989471971236, iteration: 22136
loss: 1.0040515661239624,grad_norm: 0.9999989322972679, iteration: 22137
loss: 1.0334686040878296,grad_norm: 0.9999992219569032, iteration: 22138
loss: 0.98621666431427,grad_norm: 0.9048033951278566, iteration: 22139
loss: 0.9903269410133362,grad_norm: 0.9999991220906659, iteration: 22140
loss: 0.9868232011795044,grad_norm: 0.9999989893180614, iteration: 22141
loss: 1.065914511680603,grad_norm: 0.9999995111591702, iteration: 22142
loss: 0.9881418943405151,grad_norm: 0.93282681983133, iteration: 22143
loss: 1.01123046875,grad_norm: 0.999999040195899, iteration: 22144
loss: 1.0248087644577026,grad_norm: 0.9999992883284264, iteration: 22145
loss: 0.9854940176010132,grad_norm: 0.9999992091911764, iteration: 22146
loss: 0.9847397208213806,grad_norm: 0.9999991133963831, iteration: 22147
loss: 1.005881667137146,grad_norm: 0.9294609619814277, iteration: 22148
loss: 1.0530463457107544,grad_norm: 0.9999991632926006, iteration: 22149
loss: 1.036067247390747,grad_norm: 0.9999991998297779, iteration: 22150
loss: 1.0028449296951294,grad_norm: 0.999999068834789, iteration: 22151
loss: 1.0711932182312012,grad_norm: 0.999999247566245, iteration: 22152
loss: 1.050391674041748,grad_norm: 0.9999996546000172, iteration: 22153
loss: 1.0283164978027344,grad_norm: 0.9999992702207556, iteration: 22154
loss: 1.028985857963562,grad_norm: 0.925713770646084, iteration: 22155
loss: 1.0026668310165405,grad_norm: 0.9999995227014685, iteration: 22156
loss: 1.0051597356796265,grad_norm: 0.9999991787776709, iteration: 22157
loss: 1.047974944114685,grad_norm: 0.9967222766120518, iteration: 22158
loss: 1.0623869895935059,grad_norm: 0.9999994176312749, iteration: 22159
loss: 1.0295517444610596,grad_norm: 0.9999992664390599, iteration: 22160
loss: 1.0110844373703003,grad_norm: 0.9999991539830702, iteration: 22161
loss: 1.0235755443572998,grad_norm: 0.9999992097437957, iteration: 22162
loss: 1.0238615274429321,grad_norm: 0.9208553326116989, iteration: 22163
loss: 0.9702694416046143,grad_norm: 0.9999991121807762, iteration: 22164
loss: 1.0313268899917603,grad_norm: 0.9999991805845599, iteration: 22165
loss: 1.0336624383926392,grad_norm: 0.9999992338618985, iteration: 22166
loss: 1.006071925163269,grad_norm: 0.9999991273275172, iteration: 22167
loss: 0.9804522395133972,grad_norm: 0.9999992480011233, iteration: 22168
loss: 1.0161564350128174,grad_norm: 0.9999992421717705, iteration: 22169
loss: 1.0233296155929565,grad_norm: 0.9999991348660439, iteration: 22170
loss: 0.9963827729225159,grad_norm: 0.9999995833205871, iteration: 22171
loss: 1.034623384475708,grad_norm: 0.9999997043468141, iteration: 22172
loss: 1.0413473844528198,grad_norm: 0.9508913097644345, iteration: 22173
loss: 0.971692681312561,grad_norm: 0.999999168187146, iteration: 22174
loss: 1.0274672508239746,grad_norm: 0.9999994536016982, iteration: 22175
loss: 0.9742462635040283,grad_norm: 0.999999220271397, iteration: 22176
loss: 1.0042401552200317,grad_norm: 0.9132720658984849, iteration: 22177
loss: 0.9965856075286865,grad_norm: 0.9999993195873893, iteration: 22178
loss: 1.008975625038147,grad_norm: 0.9086199685033142, iteration: 22179
loss: 0.9947676062583923,grad_norm: 0.9999990775533131, iteration: 22180
loss: 0.9847886562347412,grad_norm: 0.999999203588637, iteration: 22181
loss: 1.0079991817474365,grad_norm: 0.9656696128130772, iteration: 22182
loss: 0.9908254742622375,grad_norm: 0.9999997488751223, iteration: 22183
loss: 1.0268080234527588,grad_norm: 0.9999993257654803, iteration: 22184
loss: 1.0233392715454102,grad_norm: 0.9999990580579106, iteration: 22185
loss: 1.049798607826233,grad_norm: 0.9999991243478001, iteration: 22186
loss: 0.9872236847877502,grad_norm: 0.9999990694645735, iteration: 22187
loss: 1.0352754592895508,grad_norm: 0.9999995175261488, iteration: 22188
loss: 1.0161244869232178,grad_norm: 0.9999990393141487, iteration: 22189
loss: 1.0246697664260864,grad_norm: 0.9856238191887084, iteration: 22190
loss: 1.0239213705062866,grad_norm: 0.9959719817858527, iteration: 22191
loss: 1.0185279846191406,grad_norm: 0.9999996982411361, iteration: 22192
loss: 1.017068862915039,grad_norm: 0.9999993635881071, iteration: 22193
loss: 1.028448462486267,grad_norm: 0.9999990666837377, iteration: 22194
loss: 0.9852340221405029,grad_norm: 0.9999991036639895, iteration: 22195
loss: 1.051043152809143,grad_norm: 0.9999995235490037, iteration: 22196
loss: 1.05128014087677,grad_norm: 0.9999992865910252, iteration: 22197
loss: 1.003472924232483,grad_norm: 0.9999990310283814, iteration: 22198
loss: 1.0139368772506714,grad_norm: 0.9999992004098663, iteration: 22199
loss: 1.0440850257873535,grad_norm: 0.999999724575674, iteration: 22200
loss: 1.0341155529022217,grad_norm: 0.9999992300834363, iteration: 22201
loss: 1.101242184638977,grad_norm: 0.9999996161020512, iteration: 22202
loss: 0.9853993654251099,grad_norm: 0.9481668875397076, iteration: 22203
loss: 1.0622774362564087,grad_norm: 0.999999366843889, iteration: 22204
loss: 1.0093350410461426,grad_norm: 0.9999991124760562, iteration: 22205
loss: 1.0069166421890259,grad_norm: 0.9999991057392653, iteration: 22206
loss: 1.0526431798934937,grad_norm: 0.9684191210350882, iteration: 22207
loss: 1.041264533996582,grad_norm: 0.999999245036935, iteration: 22208
loss: 1.0233867168426514,grad_norm: 0.9999991706034733, iteration: 22209
loss: 1.0482187271118164,grad_norm: 0.9999989678930751, iteration: 22210
loss: 1.0034005641937256,grad_norm: 0.9239510429452935, iteration: 22211
loss: 1.0221401453018188,grad_norm: 0.9999992702853637, iteration: 22212
loss: 0.9766766428947449,grad_norm: 0.9999994418978929, iteration: 22213
loss: 1.0579078197479248,grad_norm: 0.9999991942043124, iteration: 22214
loss: 1.1161209344863892,grad_norm: 0.9999994742669146, iteration: 22215
loss: 1.0120549201965332,grad_norm: 0.9999991250025166, iteration: 22216
loss: 1.04886794090271,grad_norm: 0.9999993412627486, iteration: 22217
loss: 1.0213603973388672,grad_norm: 0.9999989991655002, iteration: 22218
loss: 1.0029438734054565,grad_norm: 0.9999993833703232, iteration: 22219
loss: 1.0052580833435059,grad_norm: 0.9999991618490418, iteration: 22220
loss: 1.0098646879196167,grad_norm: 0.9999991476585709, iteration: 22221
loss: 1.0166748762130737,grad_norm: 0.9999990555293438, iteration: 22222
loss: 1.0407353639602661,grad_norm: 0.9999995996564325, iteration: 22223
loss: 0.9952676296234131,grad_norm: 0.9999991938952425, iteration: 22224
loss: 1.0202043056488037,grad_norm: 0.999999096340621, iteration: 22225
loss: 1.0015671253204346,grad_norm: 0.9268579211133533, iteration: 22226
loss: 1.030548334121704,grad_norm: 0.8357049111456765, iteration: 22227
loss: 1.044015884399414,grad_norm: 0.999999361012931, iteration: 22228
loss: 1.020650863647461,grad_norm: 0.9999995748129936, iteration: 22229
loss: 1.0348951816558838,grad_norm: 0.99999916353457, iteration: 22230
loss: 1.003597617149353,grad_norm: 0.9999992668225456, iteration: 22231
loss: 0.9882317185401917,grad_norm: 0.90495504264213, iteration: 22232
loss: 1.0068203210830688,grad_norm: 0.9999993186712934, iteration: 22233
loss: 1.0125858783721924,grad_norm: 0.9999990529679649, iteration: 22234
loss: 1.0146034955978394,grad_norm: 0.9999992621714414, iteration: 22235
loss: 1.0036988258361816,grad_norm: 0.9999993797959851, iteration: 22236
loss: 1.023829460144043,grad_norm: 0.9208208260456529, iteration: 22237
loss: 1.0221428871154785,grad_norm: 0.898265696341559, iteration: 22238
loss: 1.0681527853012085,grad_norm: 0.9999992878530899, iteration: 22239
loss: 1.0316805839538574,grad_norm: 0.9999990276097959, iteration: 22240
loss: 1.0615792274475098,grad_norm: 0.9999995734741459, iteration: 22241
loss: 0.9965923428535461,grad_norm: 0.9999995995531233, iteration: 22242
loss: 1.0105398893356323,grad_norm: 0.9787337968145673, iteration: 22243
loss: 1.0238778591156006,grad_norm: 0.9999991559197904, iteration: 22244
loss: 1.0311702489852905,grad_norm: 0.9999991238049808, iteration: 22245
loss: 1.0716701745986938,grad_norm: 0.9999993353346391, iteration: 22246
loss: 1.047370433807373,grad_norm: 0.9999997217591948, iteration: 22247
loss: 1.1048649549484253,grad_norm: 0.9999996593187338, iteration: 22248
loss: 1.09368097782135,grad_norm: 0.9999995881325693, iteration: 22249
loss: 1.0475038290023804,grad_norm: 0.993274823627913, iteration: 22250
loss: 0.9933276772499084,grad_norm: 0.9999991770717327, iteration: 22251
loss: 1.0206433534622192,grad_norm: 0.9999991659405411, iteration: 22252
loss: 1.0272361040115356,grad_norm: 0.999999236598504, iteration: 22253
loss: 1.0067561864852905,grad_norm: 0.9999989835500565, iteration: 22254
loss: 0.9899764060974121,grad_norm: 0.9999991424392399, iteration: 22255
loss: 1.1258260011672974,grad_norm: 0.9999994619676926, iteration: 22256
loss: 1.0892977714538574,grad_norm: 0.9999995193281268, iteration: 22257
loss: 0.9995593428611755,grad_norm: 0.9999994008480306, iteration: 22258
loss: 1.046156406402588,grad_norm: 0.9999993555925016, iteration: 22259
loss: 1.0542781352996826,grad_norm: 0.9999992125127809, iteration: 22260
loss: 0.9988033771514893,grad_norm: 0.9850379028200192, iteration: 22261
loss: 0.9960408806800842,grad_norm: 0.945259424202947, iteration: 22262
loss: 0.9840260148048401,grad_norm: 0.9999990547185558, iteration: 22263
loss: 1.0371099710464478,grad_norm: 0.9999998590733656, iteration: 22264
loss: 1.0091972351074219,grad_norm: 0.9999991956991022, iteration: 22265
loss: 1.050142765045166,grad_norm: 0.9999992554825894, iteration: 22266
loss: 1.005861759185791,grad_norm: 0.9999998155809235, iteration: 22267
loss: 0.9999902844429016,grad_norm: 0.9999994732487995, iteration: 22268
loss: 1.000700831413269,grad_norm: 0.9999990882563243, iteration: 22269
loss: 0.9502512216567993,grad_norm: 0.9999991213605127, iteration: 22270
loss: 1.0367271900177002,grad_norm: 0.9326061577940234, iteration: 22271
loss: 0.988834798336029,grad_norm: 0.99999913772552, iteration: 22272
loss: 1.0399045944213867,grad_norm: 0.9999997600398184, iteration: 22273
loss: 1.0456162691116333,grad_norm: 0.9999990302806144, iteration: 22274
loss: 1.0847963094711304,grad_norm: 0.9999999013540467, iteration: 22275
loss: 1.0596035718917847,grad_norm: 0.9266789936319676, iteration: 22276
loss: 1.0057367086410522,grad_norm: 0.9880382435855233, iteration: 22277
loss: 1.0718636512756348,grad_norm: 0.9999991459835538, iteration: 22278
loss: 1.0143747329711914,grad_norm: 0.9999991930159294, iteration: 22279
loss: 0.9995093941688538,grad_norm: 0.8883635555187441, iteration: 22280
loss: 1.0094317197799683,grad_norm: 0.999999398234257, iteration: 22281
loss: 1.0204342603683472,grad_norm: 0.8693085703675836, iteration: 22282
loss: 1.0372613668441772,grad_norm: 0.9999991510984189, iteration: 22283
loss: 1.0474046468734741,grad_norm: 0.999999065575989, iteration: 22284
loss: 1.0165367126464844,grad_norm: 0.8989331491676785, iteration: 22285
loss: 1.0233185291290283,grad_norm: 0.7833685138549313, iteration: 22286
loss: 1.003678321838379,grad_norm: 0.8844224349427989, iteration: 22287
loss: 1.01266348361969,grad_norm: 0.9999991382803791, iteration: 22288
loss: 1.0200910568237305,grad_norm: 0.9999998004279196, iteration: 22289
loss: 0.9975934624671936,grad_norm: 0.9999992720197204, iteration: 22290
loss: 1.0097026824951172,grad_norm: 0.9399112087106857, iteration: 22291
loss: 1.0214414596557617,grad_norm: 0.9999990725710313, iteration: 22292
loss: 1.088280200958252,grad_norm: 0.9999995208790069, iteration: 22293
loss: 1.047358751296997,grad_norm: 0.9999990140122866, iteration: 22294
loss: 0.9778554439544678,grad_norm: 0.9994226499895994, iteration: 22295
loss: 1.0072439908981323,grad_norm: 0.9216137753796975, iteration: 22296
loss: 1.044060230255127,grad_norm: 0.9190858885221229, iteration: 22297
loss: 1.0186108350753784,grad_norm: 0.8994847265346925, iteration: 22298
loss: 1.033945918083191,grad_norm: 0.999999002335905, iteration: 22299
loss: 1.1249659061431885,grad_norm: 0.9999992283492222, iteration: 22300
loss: 1.001686453819275,grad_norm: 0.9328574280404658, iteration: 22301
loss: 1.0174262523651123,grad_norm: 0.9999991618282744, iteration: 22302
loss: 1.0618160963058472,grad_norm: 0.9999993138437109, iteration: 22303
loss: 1.1416656970977783,grad_norm: 0.9999996309966949, iteration: 22304
loss: 0.9671379923820496,grad_norm: 0.9999992381982853, iteration: 22305
loss: 1.0379430055618286,grad_norm: 0.9895557548535553, iteration: 22306
loss: 0.9861558675765991,grad_norm: 0.9999990869123657, iteration: 22307
loss: 1.033698558807373,grad_norm: 0.9752020144476281, iteration: 22308
loss: 1.0286706686019897,grad_norm: 0.99999941534002, iteration: 22309
loss: 0.98414546251297,grad_norm: 0.9999995805566008, iteration: 22310
loss: 0.9703862071037292,grad_norm: 0.9999991324929864, iteration: 22311
loss: 0.9814997911453247,grad_norm: 0.8779420890870904, iteration: 22312
loss: 1.0863193273544312,grad_norm: 0.9999991866179184, iteration: 22313
loss: 1.0384035110473633,grad_norm: 0.999999197139379, iteration: 22314
loss: 0.9999462366104126,grad_norm: 0.9999991113997782, iteration: 22315
loss: 1.0204640626907349,grad_norm: 0.9999991695491378, iteration: 22316
loss: 1.021948218345642,grad_norm: 0.9999990605923627, iteration: 22317
loss: 1.0430288314819336,grad_norm: 0.9999995513721059, iteration: 22318
loss: 0.9687987565994263,grad_norm: 0.9999991158767824, iteration: 22319
loss: 1.0722311735153198,grad_norm: 0.999999348537839, iteration: 22320
loss: 1.0828006267547607,grad_norm: 0.9999996228087525, iteration: 22321
loss: 1.02749502658844,grad_norm: 0.9721475654504923, iteration: 22322
loss: 0.9921779632568359,grad_norm: 0.9689013471117346, iteration: 22323
loss: 1.0155372619628906,grad_norm: 0.9325918297932904, iteration: 22324
loss: 1.0217043161392212,grad_norm: 0.999999119496492, iteration: 22325
loss: 1.0241624116897583,grad_norm: 0.9999990712681648, iteration: 22326
loss: 0.9915502071380615,grad_norm: 0.9999996948716628, iteration: 22327
loss: 1.002996802330017,grad_norm: 0.9999992825662556, iteration: 22328
loss: 1.0212557315826416,grad_norm: 0.9091341703380484, iteration: 22329
loss: 0.9780006408691406,grad_norm: 0.999999225516754, iteration: 22330
loss: 1.0657033920288086,grad_norm: 0.9999991724867331, iteration: 22331
loss: 1.0168027877807617,grad_norm: 0.9999990383357209, iteration: 22332
loss: 1.057663917541504,grad_norm: 0.999999622943965, iteration: 22333
loss: 1.005361557006836,grad_norm: 0.999999196841845, iteration: 22334
loss: 1.0180352926254272,grad_norm: 0.9583681189076889, iteration: 22335
loss: 0.9842368364334106,grad_norm: 0.8548159960161041, iteration: 22336
loss: 1.0506778955459595,grad_norm: 0.9999990731294587, iteration: 22337
loss: 1.0206512212753296,grad_norm: 0.9999990963991706, iteration: 22338
loss: 1.0344818830490112,grad_norm: 0.8974129375135889, iteration: 22339
loss: 1.0158803462982178,grad_norm: 0.9999993042266163, iteration: 22340
loss: 0.9937683343887329,grad_norm: 0.9999991193565014, iteration: 22341
loss: 1.033784031867981,grad_norm: 0.9999992658953417, iteration: 22342
loss: 1.0349727869033813,grad_norm: 0.9999990014127292, iteration: 22343
loss: 1.0284054279327393,grad_norm: 0.9470251482849394, iteration: 22344
loss: 1.0373557806015015,grad_norm: 0.8611260446388522, iteration: 22345
loss: 1.0470480918884277,grad_norm: 0.9538425894533528, iteration: 22346
loss: 0.988736093044281,grad_norm: 0.9999990709296365, iteration: 22347
loss: 1.0083523988723755,grad_norm: 0.9666190152509674, iteration: 22348
loss: 1.0071274042129517,grad_norm: 0.9999991145610265, iteration: 22349
loss: 1.0262205600738525,grad_norm: 0.9999991293122603, iteration: 22350
loss: 1.0111218690872192,grad_norm: 0.9457960651296384, iteration: 22351
loss: 1.0215122699737549,grad_norm: 0.9999991297312619, iteration: 22352
loss: 1.0160671472549438,grad_norm: 0.9999991247544321, iteration: 22353
loss: 0.9983348250389099,grad_norm: 0.9999992602301033, iteration: 22354
loss: 0.9942858219146729,grad_norm: 0.9999994534099867, iteration: 22355
loss: 1.0232776403427124,grad_norm: 0.9999991719737961, iteration: 22356
loss: 1.0110865831375122,grad_norm: 0.99999901884593, iteration: 22357
loss: 0.9729340672492981,grad_norm: 0.9999992121445146, iteration: 22358
loss: 1.0209227800369263,grad_norm: 0.9999989646826017, iteration: 22359
loss: 1.0311238765716553,grad_norm: 0.9999990561714275, iteration: 22360
loss: 1.021175503730774,grad_norm: 0.9999992119554726, iteration: 22361
loss: 1.0450080633163452,grad_norm: 0.9999993566651338, iteration: 22362
loss: 1.0289607048034668,grad_norm: 0.9824330974527771, iteration: 22363
loss: 0.9947940707206726,grad_norm: 0.8474982110809393, iteration: 22364
loss: 0.983905553817749,grad_norm: 0.9131372777801918, iteration: 22365
loss: 1.0121104717254639,grad_norm: 0.9628087962129429, iteration: 22366
loss: 1.0269057750701904,grad_norm: 0.9999997295043382, iteration: 22367
loss: 1.0348304510116577,grad_norm: 0.9999991407133375, iteration: 22368
loss: 1.0218112468719482,grad_norm: 0.9999990164589802, iteration: 22369
loss: 1.0221056938171387,grad_norm: 0.833031902832961, iteration: 22370
loss: 1.0195235013961792,grad_norm: 0.9999990262920494, iteration: 22371
loss: 1.0685263872146606,grad_norm: 0.9999994418019147, iteration: 22372
loss: 1.0142360925674438,grad_norm: 0.9999990963525971, iteration: 22373
loss: 1.0728473663330078,grad_norm: 0.9999992067553811, iteration: 22374
loss: 1.099180817604065,grad_norm: 0.9999999377917855, iteration: 22375
loss: 1.0350639820098877,grad_norm: 0.9999991952426157, iteration: 22376
loss: 0.9962385296821594,grad_norm: 0.9964416055171037, iteration: 22377
loss: 1.0233619213104248,grad_norm: 0.9999992986357493, iteration: 22378
loss: 0.9957398772239685,grad_norm: 0.9679490645278409, iteration: 22379
loss: 1.0098639726638794,grad_norm: 0.9007428151647244, iteration: 22380
loss: 0.9739691019058228,grad_norm: 0.9999991981310422, iteration: 22381
loss: 1.0339637994766235,grad_norm: 0.9999993862325515, iteration: 22382
loss: 1.0072882175445557,grad_norm: 0.9999991170772566, iteration: 22383
loss: 1.0435618162155151,grad_norm: 0.9999993511118654, iteration: 22384
loss: 0.965490460395813,grad_norm: 0.9999990959746216, iteration: 22385
loss: 1.0415698289871216,grad_norm: 0.8499740654886847, iteration: 22386
loss: 1.0306557416915894,grad_norm: 0.9686479136530892, iteration: 22387
loss: 0.9734024405479431,grad_norm: 0.999999139080962, iteration: 22388
loss: 1.059414267539978,grad_norm: 0.9999991770239569, iteration: 22389
loss: 1.0233571529388428,grad_norm: 0.9999995169597201, iteration: 22390
loss: 1.0296305418014526,grad_norm: 0.9999990014397647, iteration: 22391
loss: 1.0324863195419312,grad_norm: 0.9580312132733948, iteration: 22392
loss: 0.9983437657356262,grad_norm: 0.9999990480111247, iteration: 22393
loss: 1.0276424884796143,grad_norm: 0.9999991902343577, iteration: 22394
loss: 1.0302884578704834,grad_norm: 0.9999993323879046, iteration: 22395
loss: 0.9713143706321716,grad_norm: 0.9999992237494737, iteration: 22396
loss: 1.0094916820526123,grad_norm: 0.9999994325406821, iteration: 22397
loss: 1.014717936515808,grad_norm: 0.9812548842579535, iteration: 22398
loss: 1.0107284784317017,grad_norm: 0.9370037524170028, iteration: 22399
loss: 0.9776132702827454,grad_norm: 0.9999990365022235, iteration: 22400
loss: 1.0388602018356323,grad_norm: 0.9999991768517185, iteration: 22401
loss: 1.0430763959884644,grad_norm: 0.999999155395973, iteration: 22402
loss: 1.0192201137542725,grad_norm: 0.999999227269113, iteration: 22403
loss: 1.0101943016052246,grad_norm: 0.9999992032900928, iteration: 22404
loss: 1.0096042156219482,grad_norm: 0.9999993451097283, iteration: 22405
loss: 1.0192008018493652,grad_norm: 0.9181786552812661, iteration: 22406
loss: 1.0055500268936157,grad_norm: 0.9767901934246854, iteration: 22407
loss: 1.0186363458633423,grad_norm: 0.9999992781314967, iteration: 22408
loss: 0.9703649282455444,grad_norm: 0.9999997567160734, iteration: 22409
loss: 1.0284534692764282,grad_norm: 0.9999991859539005, iteration: 22410
loss: 1.0180758237838745,grad_norm: 0.9999992051428019, iteration: 22411
loss: 0.9820277094841003,grad_norm: 0.9999990052722576, iteration: 22412
loss: 1.0544663667678833,grad_norm: 0.9999994718993374, iteration: 22413
loss: 1.0396257638931274,grad_norm: 0.8614947246040386, iteration: 22414
loss: 1.0181784629821777,grad_norm: 0.9999991560698388, iteration: 22415
loss: 0.9800506830215454,grad_norm: 0.999999210527863, iteration: 22416
loss: 1.0078797340393066,grad_norm: 0.9999992622823484, iteration: 22417
loss: 0.9608286619186401,grad_norm: 0.8740115360861481, iteration: 22418
loss: 1.0035725831985474,grad_norm: 0.8940407691848427, iteration: 22419
loss: 1.0253514051437378,grad_norm: 0.9999993643178305, iteration: 22420
loss: 1.0142775774002075,grad_norm: 0.9849722603243972, iteration: 22421
loss: 1.0077424049377441,grad_norm: 0.9999991725741474, iteration: 22422
loss: 1.0531326532363892,grad_norm: 0.9999990313840125, iteration: 22423
loss: 1.038266658782959,grad_norm: 0.9999992919926525, iteration: 22424
loss: 1.0690548419952393,grad_norm: 0.9999990171050738, iteration: 22425
loss: 1.002346158027649,grad_norm: 0.9999991339333666, iteration: 22426
loss: 1.0565357208251953,grad_norm: 0.9999990645193597, iteration: 22427
loss: 1.0191397666931152,grad_norm: 0.9999995756462715, iteration: 22428
loss: 0.9988121390342712,grad_norm: 0.9999992019913952, iteration: 22429
loss: 1.0309572219848633,grad_norm: 0.9999990313789681, iteration: 22430
loss: 0.9771803617477417,grad_norm: 0.999999191238582, iteration: 22431
loss: 1.009832739830017,grad_norm: 0.9999990702998468, iteration: 22432
loss: 1.0107975006103516,grad_norm: 0.9999996752586018, iteration: 22433
loss: 1.005380630493164,grad_norm: 0.9999992803070328, iteration: 22434
loss: 1.0823103189468384,grad_norm: 0.9999992758404642, iteration: 22435
loss: 0.992755115032196,grad_norm: 0.8894191324027936, iteration: 22436
loss: 1.0188900232315063,grad_norm: 0.8780701139838094, iteration: 22437
loss: 1.002950668334961,grad_norm: 0.9999991388134476, iteration: 22438
loss: 1.0022557973861694,grad_norm: 0.9198808327293387, iteration: 22439
loss: 1.0118277072906494,grad_norm: 0.9999991396494499, iteration: 22440
loss: 1.0301399230957031,grad_norm: 0.9999991497730563, iteration: 22441
loss: 1.039610743522644,grad_norm: 0.9999990423102424, iteration: 22442
loss: 1.0268223285675049,grad_norm: 0.9999994833185598, iteration: 22443
loss: 0.9865503907203674,grad_norm: 0.9999990840467039, iteration: 22444
loss: 0.9996291995048523,grad_norm: 0.9999991757587444, iteration: 22445
loss: 0.976105809211731,grad_norm: 0.9999990944458905, iteration: 22446
loss: 1.0072238445281982,grad_norm: 0.966326123099279, iteration: 22447
loss: 0.9985547065734863,grad_norm: 0.9999991310667208, iteration: 22448
loss: 0.9791331887245178,grad_norm: 0.999999066316799, iteration: 22449
loss: 0.9855330586433411,grad_norm: 0.9999991412317882, iteration: 22450
loss: 1.0026350021362305,grad_norm: 0.9104964384259309, iteration: 22451
loss: 1.0411226749420166,grad_norm: 0.8912856517526638, iteration: 22452
loss: 1.0523755550384521,grad_norm: 0.9999991256842703, iteration: 22453
loss: 1.0037678480148315,grad_norm: 0.9999990598417575, iteration: 22454
loss: 0.9848813414573669,grad_norm: 0.999999580986363, iteration: 22455
loss: 1.0234569311141968,grad_norm: 0.9999992172885688, iteration: 22456
loss: 0.9721913933753967,grad_norm: 0.9999990531438803, iteration: 22457
loss: 0.9616508483886719,grad_norm: 0.9999993313520336, iteration: 22458
loss: 1.0091949701309204,grad_norm: 0.9771503771786773, iteration: 22459
loss: 0.9816640615463257,grad_norm: 0.9999990285201646, iteration: 22460
loss: 0.973292350769043,grad_norm: 0.9802697498812218, iteration: 22461
loss: 1.0057123899459839,grad_norm: 0.9999996648070384, iteration: 22462
loss: 1.0042465925216675,grad_norm: 0.9999992020826661, iteration: 22463
loss: 1.0295886993408203,grad_norm: 0.796657973094366, iteration: 22464
loss: 0.9891953468322754,grad_norm: 0.9999994601767016, iteration: 22465
loss: 0.9822131991386414,grad_norm: 0.9999992215192663, iteration: 22466
loss: 1.0357540845870972,grad_norm: 0.999999097679507, iteration: 22467
loss: 1.0025763511657715,grad_norm: 0.8950927828380193, iteration: 22468
loss: 1.083345890045166,grad_norm: 0.9999991025860796, iteration: 22469
loss: 1.0278855562210083,grad_norm: 0.9999993505947923, iteration: 22470
loss: 0.9810220003128052,grad_norm: 0.891358229845644, iteration: 22471
loss: 1.0454134941101074,grad_norm: 0.999999155207694, iteration: 22472
loss: 0.9499432444572449,grad_norm: 0.9225789981769248, iteration: 22473
loss: 0.9955986142158508,grad_norm: 0.9999991556003002, iteration: 22474
loss: 1.0078275203704834,grad_norm: 0.9719957406182563, iteration: 22475
loss: 1.033738613128662,grad_norm: 0.9999990678247452, iteration: 22476
loss: 0.9953410029411316,grad_norm: 0.9999990286545521, iteration: 22477
loss: 0.9822699427604675,grad_norm: 0.9999991796482702, iteration: 22478
loss: 1.0090231895446777,grad_norm: 0.9999992582725975, iteration: 22479
loss: 1.005440354347229,grad_norm: 0.9999994051944286, iteration: 22480
loss: 1.0508676767349243,grad_norm: 0.9999991987666138, iteration: 22481
loss: 1.0131704807281494,grad_norm: 0.9999990737174207, iteration: 22482
loss: 1.019470453262329,grad_norm: 0.9999992329468871, iteration: 22483
loss: 1.03278386592865,grad_norm: 0.999999468870007, iteration: 22484
loss: 1.0806643962860107,grad_norm: 0.9999999137032617, iteration: 22485
loss: 1.0467755794525146,grad_norm: 0.9999991128585557, iteration: 22486
loss: 1.0436618328094482,grad_norm: 0.999999608351085, iteration: 22487
loss: 0.9761354327201843,grad_norm: 0.9999991407853166, iteration: 22488
loss: 0.99839848279953,grad_norm: 0.9999990489545307, iteration: 22489
loss: 1.0008496046066284,grad_norm: 0.9360583308401963, iteration: 22490
loss: 0.974378764629364,grad_norm: 0.9999991049647482, iteration: 22491
loss: 1.0284249782562256,grad_norm: 0.9999998164768559, iteration: 22492
loss: 1.0113945007324219,grad_norm: 0.9705938812447098, iteration: 22493
loss: 1.0236467123031616,grad_norm: 0.9999998755105285, iteration: 22494
loss: 1.033242106437683,grad_norm: 0.999999146300914, iteration: 22495
loss: 1.0125863552093506,grad_norm: 0.9999991233646418, iteration: 22496
loss: 1.0211842060089111,grad_norm: 0.9999991759425565, iteration: 22497
loss: 1.0141992568969727,grad_norm: 0.9999991079574121, iteration: 22498
loss: 1.0138531923294067,grad_norm: 0.9999990295069242, iteration: 22499
loss: 1.0215569734573364,grad_norm: 0.9985347669510302, iteration: 22500
loss: 1.0341602563858032,grad_norm: 0.999999092871888, iteration: 22501
loss: 0.9888682961463928,grad_norm: 0.999999126272855, iteration: 22502
loss: 1.0612974166870117,grad_norm: 0.9999994778431406, iteration: 22503
loss: 1.0618524551391602,grad_norm: 0.999999402187122, iteration: 22504
loss: 1.0247551202774048,grad_norm: 0.9999995556351992, iteration: 22505
loss: 1.008414626121521,grad_norm: 0.9999993537856425, iteration: 22506
loss: 1.0375580787658691,grad_norm: 0.9999991000296964, iteration: 22507
loss: 1.030859112739563,grad_norm: 0.9999995086628771, iteration: 22508
loss: 0.9951536655426025,grad_norm: 0.9347613077086913, iteration: 22509
loss: 1.012149691581726,grad_norm: 0.9999992393353466, iteration: 22510
loss: 1.0013586282730103,grad_norm: 0.9999991947598568, iteration: 22511
loss: 0.9812317490577698,grad_norm: 0.9999989617722298, iteration: 22512
loss: 1.028306007385254,grad_norm: 0.9999991631345994, iteration: 22513
loss: 0.9810776114463806,grad_norm: 0.9999990454763573, iteration: 22514
loss: 0.9749878644943237,grad_norm: 0.9999990418433136, iteration: 22515
loss: 1.0596967935562134,grad_norm: 0.9999993554382064, iteration: 22516
loss: 1.0345760583877563,grad_norm: 0.9999993729889407, iteration: 22517
loss: 1.040382981300354,grad_norm: 0.9999998634103316, iteration: 22518
loss: 0.9830259680747986,grad_norm: 0.9999992427313977, iteration: 22519
loss: 1.029914379119873,grad_norm: 0.9999992502290896, iteration: 22520
loss: 0.9951532483100891,grad_norm: 0.9773198620084069, iteration: 22521
loss: 0.9346229434013367,grad_norm: 0.9999991592773864, iteration: 22522
loss: 1.07578706741333,grad_norm: 0.9999998018046565, iteration: 22523
loss: 1.0033613443374634,grad_norm: 0.9999994228540167, iteration: 22524
loss: 1.0682138204574585,grad_norm: 0.999999561027969, iteration: 22525
loss: 1.0163111686706543,grad_norm: 0.9999992024017562, iteration: 22526
loss: 0.9850339889526367,grad_norm: 0.999999223003464, iteration: 22527
loss: 1.0210685729980469,grad_norm: 0.9999992076362629, iteration: 22528
loss: 1.0449787378311157,grad_norm: 0.9999992327465708, iteration: 22529
loss: 1.016271948814392,grad_norm: 0.9999993288758504, iteration: 22530
loss: 1.0094894170761108,grad_norm: 0.9999990951955217, iteration: 22531
loss: 0.9830132722854614,grad_norm: 0.999999104141759, iteration: 22532
loss: 1.0191657543182373,grad_norm: 0.9999993164630333, iteration: 22533
loss: 1.0142087936401367,grad_norm: 0.8448036851423404, iteration: 22534
loss: 1.1126625537872314,grad_norm: 0.9999993504880748, iteration: 22535
loss: 1.0920459032058716,grad_norm: 0.9999998963002573, iteration: 22536
loss: 1.008296251296997,grad_norm: 0.9999995296091163, iteration: 22537
loss: 0.9709486961364746,grad_norm: 0.9999997496233674, iteration: 22538
loss: 1.0344491004943848,grad_norm: 0.9999990390202522, iteration: 22539
loss: 1.0413198471069336,grad_norm: 0.9303403837007165, iteration: 22540
loss: 1.0092930793762207,grad_norm: 0.9999991370054757, iteration: 22541
loss: 1.0156153440475464,grad_norm: 0.9999995851977808, iteration: 22542
loss: 1.0230289697647095,grad_norm: 0.9999992868512627, iteration: 22543
loss: 1.0111939907073975,grad_norm: 0.9405914432614951, iteration: 22544
loss: 1.004501461982727,grad_norm: 0.9999995183428336, iteration: 22545
loss: 1.0337878465652466,grad_norm: 0.9999991847426978, iteration: 22546
loss: 1.043160319328308,grad_norm: 0.9999998793940478, iteration: 22547
loss: 1.0230120420455933,grad_norm: 0.9999999620450659, iteration: 22548
loss: 1.0251903533935547,grad_norm: 0.9999995946939457, iteration: 22549
loss: 1.0444825887680054,grad_norm: 0.9579943446649274, iteration: 22550
loss: 1.0743921995162964,grad_norm: 0.9999996819942976, iteration: 22551
loss: 1.0436627864837646,grad_norm: 0.9999990289899885, iteration: 22552
loss: 1.006879448890686,grad_norm: 0.9999990376462436, iteration: 22553
loss: 0.9719514846801758,grad_norm: 0.9999992608988016, iteration: 22554
loss: 1.077178955078125,grad_norm: 0.9999996344426715, iteration: 22555
loss: 0.9952641725540161,grad_norm: 0.9999996452907738, iteration: 22556
loss: 1.035677194595337,grad_norm: 0.9999992608614409, iteration: 22557
loss: 0.9831488132476807,grad_norm: 0.9999993466436167, iteration: 22558
loss: 1.1041325330734253,grad_norm: 0.9999993266671577, iteration: 22559
loss: 1.0055078268051147,grad_norm: 0.9999995352884297, iteration: 22560
loss: 1.0226600170135498,grad_norm: 0.9999992851541978, iteration: 22561
loss: 1.0591785907745361,grad_norm: 0.9999997275522992, iteration: 22562
loss: 1.0846691131591797,grad_norm: 0.9999995123638656, iteration: 22563
loss: 1.0345255136489868,grad_norm: 0.9999994268812166, iteration: 22564
loss: 0.9994927048683167,grad_norm: 0.999999266643836, iteration: 22565
loss: 1.0197844505310059,grad_norm: 0.9999989480536852, iteration: 22566
loss: 1.0235626697540283,grad_norm: 0.9773804698978173, iteration: 22567
loss: 1.0202115774154663,grad_norm: 0.9435466473794787, iteration: 22568
loss: 1.050879955291748,grad_norm: 0.9999993306697061, iteration: 22569
loss: 1.0509682893753052,grad_norm: 0.9999995702826147, iteration: 22570
loss: 1.044273018836975,grad_norm: 0.9829448360033393, iteration: 22571
loss: 0.9863294959068298,grad_norm: 0.9999993836633164, iteration: 22572
loss: 1.042671799659729,grad_norm: 0.9999992028132674, iteration: 22573
loss: 1.0136644840240479,grad_norm: 0.8582140961543318, iteration: 22574
loss: 1.0707632303237915,grad_norm: 0.9999997151559059, iteration: 22575
loss: 0.9963176250457764,grad_norm: 0.9917424665547757, iteration: 22576
loss: 1.010728359222412,grad_norm: 0.9999992157525476, iteration: 22577
loss: 1.0803041458129883,grad_norm: 0.9999998915912007, iteration: 22578
loss: 0.9891554117202759,grad_norm: 0.9999990544492212, iteration: 22579
loss: 1.0140879154205322,grad_norm: 0.9999993858505755, iteration: 22580
loss: 1.0635383129119873,grad_norm: 0.9999996448783194, iteration: 22581
loss: 1.060239553451538,grad_norm: 0.9999994139728045, iteration: 22582
loss: 1.0120254755020142,grad_norm: 0.8562637539196203, iteration: 22583
loss: 1.0431323051452637,grad_norm: 0.9999990936950889, iteration: 22584
loss: 1.0254360437393188,grad_norm: 0.9999995783290541, iteration: 22585
loss: 1.0766704082489014,grad_norm: 0.9248628471015024, iteration: 22586
loss: 1.059428334236145,grad_norm: 0.9999998096351974, iteration: 22587
loss: 0.9895440936088562,grad_norm: 0.9999994902472819, iteration: 22588
loss: 1.078382134437561,grad_norm: 0.9999996447592103, iteration: 22589
loss: 1.0043498277664185,grad_norm: 0.9255798274754489, iteration: 22590
loss: 1.0039854049682617,grad_norm: 0.9999991000392007, iteration: 22591
loss: 1.0469392538070679,grad_norm: 0.8891017846967114, iteration: 22592
loss: 1.0037816762924194,grad_norm: 0.9457980668164016, iteration: 22593
loss: 1.0247280597686768,grad_norm: 0.9999991504326141, iteration: 22594
loss: 1.0130292177200317,grad_norm: 0.9999991789786308, iteration: 22595
loss: 1.070164442062378,grad_norm: 0.9999996081141102, iteration: 22596
loss: 1.0220719575881958,grad_norm: 0.8496126829834768, iteration: 22597
loss: 1.0340007543563843,grad_norm: 0.999999100862014, iteration: 22598
loss: 0.9796080589294434,grad_norm: 0.9999991036919248, iteration: 22599
loss: 0.9902408719062805,grad_norm: 0.9866124158109563, iteration: 22600
loss: 1.0335155725479126,grad_norm: 0.9999995017264558, iteration: 22601
loss: 0.9813323616981506,grad_norm: 0.9999991485307758, iteration: 22602
loss: 1.0440020561218262,grad_norm: 0.9999990981309306, iteration: 22603
loss: 0.9892212748527527,grad_norm: 0.999999160340745, iteration: 22604
loss: 1.0325125455856323,grad_norm: 0.9999997909053694, iteration: 22605
loss: 1.0256184339523315,grad_norm: 0.9999991019541291, iteration: 22606
loss: 1.0079532861709595,grad_norm: 0.9999991025424066, iteration: 22607
loss: 0.952981173992157,grad_norm: 0.9999991463086787, iteration: 22608
loss: 0.9920282363891602,grad_norm: 0.9999991695409582, iteration: 22609
loss: 0.9797609448432922,grad_norm: 0.9999992394602145, iteration: 22610
loss: 1.0669392347335815,grad_norm: 0.9999994858110619, iteration: 22611
loss: 1.0666015148162842,grad_norm: 0.9999993710402729, iteration: 22612
loss: 0.9839226603507996,grad_norm: 0.9999992582763702, iteration: 22613
loss: 1.0431805849075317,grad_norm: 0.9999992792435863, iteration: 22614
loss: 0.9994727969169617,grad_norm: 0.9999991871153702, iteration: 22615
loss: 1.0291856527328491,grad_norm: 0.9999991329913119, iteration: 22616
loss: 0.9899183511734009,grad_norm: 0.9999991492756454, iteration: 22617
loss: 1.0291664600372314,grad_norm: 0.9999992079908883, iteration: 22618
loss: 0.9817593097686768,grad_norm: 0.9504834933430004, iteration: 22619
loss: 1.0833748579025269,grad_norm: 0.9999995424334999, iteration: 22620
loss: 0.9575914144515991,grad_norm: 0.9999991385726624, iteration: 22621
loss: 1.0320576429367065,grad_norm: 0.9999993574209974, iteration: 22622
loss: 1.0156376361846924,grad_norm: 0.9716118750722357, iteration: 22623
loss: 0.9726555943489075,grad_norm: 0.978688274191431, iteration: 22624
loss: 1.0519121885299683,grad_norm: 0.9999997469199853, iteration: 22625
loss: 1.0913361310958862,grad_norm: 0.999999704968124, iteration: 22626
loss: 0.9939024448394775,grad_norm: 0.9999991586307669, iteration: 22627
loss: 1.00421142578125,grad_norm: 0.999999361512196, iteration: 22628
loss: 1.1340951919555664,grad_norm: 0.999999668569402, iteration: 22629
loss: 1.0357601642608643,grad_norm: 0.9999996722223953, iteration: 22630
loss: 1.0822999477386475,grad_norm: 0.9999994811803143, iteration: 22631
loss: 1.2712458372116089,grad_norm: 0.9999999414090891, iteration: 22632
loss: 1.0583903789520264,grad_norm: 0.9999995458462492, iteration: 22633
loss: 0.9990174174308777,grad_norm: 0.9999997594833396, iteration: 22634
loss: 1.062317132949829,grad_norm: 0.9999997872455066, iteration: 22635
loss: 1.0212119817733765,grad_norm: 0.9999992303879612, iteration: 22636
loss: 0.9991169571876526,grad_norm: 0.9999995210151567, iteration: 22637
loss: 1.0142194032669067,grad_norm: 0.9999993015264417, iteration: 22638
loss: 0.9617070555686951,grad_norm: 0.8693347843595491, iteration: 22639
loss: 1.0420856475830078,grad_norm: 0.9999989792147461, iteration: 22640
loss: 1.028106451034546,grad_norm: 0.8991473450096189, iteration: 22641
loss: 1.0151602029800415,grad_norm: 0.9661756476905098, iteration: 22642
loss: 0.9670248627662659,grad_norm: 0.9999991074265273, iteration: 22643
loss: 1.0087840557098389,grad_norm: 0.9999996167307856, iteration: 22644
loss: 1.0467756986618042,grad_norm: 0.999999218313809, iteration: 22645
loss: 1.0131831169128418,grad_norm: 0.999999390269017, iteration: 22646
loss: 1.0090389251708984,grad_norm: 0.9999991182966522, iteration: 22647
loss: 1.012677550315857,grad_norm: 0.9999993539875479, iteration: 22648
loss: 0.9750292301177979,grad_norm: 0.9936498326672765, iteration: 22649
loss: 1.057193398475647,grad_norm: 0.9999991247036547, iteration: 22650
loss: 0.9737343788146973,grad_norm: 0.8939835351363941, iteration: 22651
loss: 1.0566415786743164,grad_norm: 0.9999994644675951, iteration: 22652
loss: 1.0125670433044434,grad_norm: 0.9999997352335441, iteration: 22653
loss: 1.0410465002059937,grad_norm: 0.9999991733568111, iteration: 22654
loss: 1.0022169351577759,grad_norm: 0.9999996640067557, iteration: 22655
loss: 0.9880990386009216,grad_norm: 0.9999993999740356, iteration: 22656
loss: 0.995169997215271,grad_norm: 0.9999990254008433, iteration: 22657
loss: 1.0279065370559692,grad_norm: 0.9999994742744687, iteration: 22658
loss: 1.0125352144241333,grad_norm: 0.9999993638310689, iteration: 22659
loss: 1.0647891759872437,grad_norm: 0.9999997911111244, iteration: 22660
loss: 1.1028167009353638,grad_norm: 0.9999996863923245, iteration: 22661
loss: 1.0213100910186768,grad_norm: 0.9999996037618898, iteration: 22662
loss: 0.9782517552375793,grad_norm: 0.9999994538298761, iteration: 22663
loss: 1.0202833414077759,grad_norm: 0.9999995438381787, iteration: 22664
loss: 1.0339365005493164,grad_norm: 0.9999991187936861, iteration: 22665
loss: 0.9990437626838684,grad_norm: 0.8661972984218468, iteration: 22666
loss: 0.9977254271507263,grad_norm: 0.9999990314956082, iteration: 22667
loss: 1.0067739486694336,grad_norm: 0.9751153554340399, iteration: 22668
loss: 1.0226662158966064,grad_norm: 0.9999991918879454, iteration: 22669
loss: 1.0325192213058472,grad_norm: 0.974720558103524, iteration: 22670
loss: 0.9688417911529541,grad_norm: 0.9999991320703024, iteration: 22671
loss: 1.0159282684326172,grad_norm: 0.9999996456632027, iteration: 22672
loss: 1.0106993913650513,grad_norm: 0.9999992085389677, iteration: 22673
loss: 0.9958966970443726,grad_norm: 0.999999056824651, iteration: 22674
loss: 1.022382378578186,grad_norm: 0.9970068518562119, iteration: 22675
loss: 0.9754095673561096,grad_norm: 0.9999991687188511, iteration: 22676
loss: 1.0254656076431274,grad_norm: 0.9778232955975102, iteration: 22677
loss: 1.0267771482467651,grad_norm: 0.9999996422145563, iteration: 22678
loss: 0.9921015501022339,grad_norm: 0.9999990766585856, iteration: 22679
loss: 1.0691635608673096,grad_norm: 0.999999410326775, iteration: 22680
loss: 0.9759491086006165,grad_norm: 0.9383814289756116, iteration: 22681
loss: 0.9532338976860046,grad_norm: 0.9364287076413093, iteration: 22682
loss: 1.0054552555084229,grad_norm: 0.999999635345978, iteration: 22683
loss: 1.0177977085113525,grad_norm: 0.9999996124993813, iteration: 22684
loss: 0.9972202181816101,grad_norm: 0.999999159599157, iteration: 22685
loss: 0.9544629454612732,grad_norm: 0.9999993337278101, iteration: 22686
loss: 0.9710841774940491,grad_norm: 0.9999991776443148, iteration: 22687
loss: 1.0143961906433105,grad_norm: 0.9999990964007857, iteration: 22688
loss: 1.0869336128234863,grad_norm: 0.9999992462911492, iteration: 22689
loss: 1.0126222372055054,grad_norm: 0.9999993665260118, iteration: 22690
loss: 1.0119158029556274,grad_norm: 0.9999991886407971, iteration: 22691
loss: 1.0524238348007202,grad_norm: 0.9999996596087893, iteration: 22692
loss: 1.0074719190597534,grad_norm: 0.9999993371160525, iteration: 22693
loss: 1.033154010772705,grad_norm: 0.9677485519137384, iteration: 22694
loss: 1.0851646661758423,grad_norm: 0.9999996932484791, iteration: 22695
loss: 1.0418472290039062,grad_norm: 0.9999991380068536, iteration: 22696
loss: 1.0228184461593628,grad_norm: 0.9613010668676867, iteration: 22697
loss: 0.9939265251159668,grad_norm: 0.999999103964759, iteration: 22698
loss: 0.9893491864204407,grad_norm: 0.9999992626481328, iteration: 22699
loss: 1.0261718034744263,grad_norm: 0.9999993019329202, iteration: 22700
loss: 0.9990576505661011,grad_norm: 0.9999990670169098, iteration: 22701
loss: 1.0037752389907837,grad_norm: 0.9999991666538782, iteration: 22702
loss: 1.0306293964385986,grad_norm: 0.9999989993224967, iteration: 22703
loss: 1.0420258045196533,grad_norm: 0.9999993228307836, iteration: 22704
loss: 0.9779729247093201,grad_norm: 0.9739603963366062, iteration: 22705
loss: 1.0161701440811157,grad_norm: 0.999999017297156, iteration: 22706
loss: 0.9834393858909607,grad_norm: 0.9999989956363989, iteration: 22707
loss: 0.9997965097427368,grad_norm: 0.9999993704748122, iteration: 22708
loss: 1.0003224611282349,grad_norm: 0.9141240721112632, iteration: 22709
loss: 0.9982301592826843,grad_norm: 0.9673881865217855, iteration: 22710
loss: 0.9921743273735046,grad_norm: 0.9999991425691193, iteration: 22711
loss: 1.0695090293884277,grad_norm: 0.9999996866624923, iteration: 22712
loss: 1.0808144807815552,grad_norm: 0.9999993666572603, iteration: 22713
loss: 1.035677194595337,grad_norm: 0.999999603637808, iteration: 22714
loss: 1.0073813199996948,grad_norm: 0.9999991208496078, iteration: 22715
loss: 1.0224038362503052,grad_norm: 0.9999998064824086, iteration: 22716
loss: 1.0023893117904663,grad_norm: 0.9950473677653741, iteration: 22717
loss: 1.1522048711776733,grad_norm: 0.9999996934999535, iteration: 22718
loss: 1.0106595754623413,grad_norm: 0.9185441643646047, iteration: 22719
loss: 0.9932309985160828,grad_norm: 0.9742337911403002, iteration: 22720
loss: 0.9874534606933594,grad_norm: 0.9999991048168138, iteration: 22721
loss: 1.0123403072357178,grad_norm: 0.9999997178150114, iteration: 22722
loss: 0.9783027172088623,grad_norm: 0.9863496055792655, iteration: 22723
loss: 0.9727164506912231,grad_norm: 0.9999992509402756, iteration: 22724
loss: 0.9707264304161072,grad_norm: 0.9999992537836127, iteration: 22725
loss: 1.0805919170379639,grad_norm: 0.999999565265995, iteration: 22726
loss: 1.0497251749038696,grad_norm: 0.9999997997421897, iteration: 22727
loss: 0.9558525085449219,grad_norm: 0.9828068310356205, iteration: 22728
loss: 1.0220900774002075,grad_norm: 0.9999996081584063, iteration: 22729
loss: 1.0203001499176025,grad_norm: 0.9999991829904957, iteration: 22730
loss: 1.0378888845443726,grad_norm: 0.8940812533405919, iteration: 22731
loss: 0.991991400718689,grad_norm: 0.9999989470189042, iteration: 22732
loss: 1.0688430070877075,grad_norm: 0.9999998513368517, iteration: 22733
loss: 1.0311172008514404,grad_norm: 0.9999992665483474, iteration: 22734
loss: 1.0183519124984741,grad_norm: 0.9999994428854323, iteration: 22735
loss: 1.0472577810287476,grad_norm: 0.9999993899786488, iteration: 22736
loss: 1.0007054805755615,grad_norm: 0.9999995051399767, iteration: 22737
loss: 1.030576229095459,grad_norm: 0.9999992592599792, iteration: 22738
loss: 1.077944040298462,grad_norm: 0.9999997138629168, iteration: 22739
loss: 1.030894160270691,grad_norm: 0.9999992624557282, iteration: 22740
loss: 1.0212318897247314,grad_norm: 0.9999990861517105, iteration: 22741
loss: 1.0068553686141968,grad_norm: 0.9026703665545026, iteration: 22742
loss: 0.9989264607429504,grad_norm: 0.9115385609927967, iteration: 22743
loss: 1.0450546741485596,grad_norm: 0.9999992686132281, iteration: 22744
loss: 1.0218133926391602,grad_norm: 0.9999995651927085, iteration: 22745
loss: 1.03346848487854,grad_norm: 0.9999994335634566, iteration: 22746
loss: 0.9698656797409058,grad_norm: 0.9799901686489061, iteration: 22747
loss: 1.0319892168045044,grad_norm: 0.9999993508911859, iteration: 22748
loss: 1.0164737701416016,grad_norm: 0.9999991488496656, iteration: 22749
loss: 0.9969903826713562,grad_norm: 0.9999995220743172, iteration: 22750
loss: 1.076735019683838,grad_norm: 0.9999998186753308, iteration: 22751
loss: 1.0006989240646362,grad_norm: 0.9965140048078363, iteration: 22752
loss: 1.026741623878479,grad_norm: 0.9999992618893367, iteration: 22753
loss: 1.0129119157791138,grad_norm: 0.9999992031491608, iteration: 22754
loss: 1.0294263362884521,grad_norm: 0.9999992045544596, iteration: 22755
loss: 1.0178464651107788,grad_norm: 0.9999997312477923, iteration: 22756
loss: 1.0197269916534424,grad_norm: 0.9999993211244632, iteration: 22757
loss: 1.0629630088806152,grad_norm: 0.9999995270631321, iteration: 22758
loss: 1.0015910863876343,grad_norm: 0.9329147427437349, iteration: 22759
loss: 1.0236648321151733,grad_norm: 0.9999993382929049, iteration: 22760
loss: 0.9810033440589905,grad_norm: 0.9999992820024861, iteration: 22761
loss: 1.0397083759307861,grad_norm: 0.999999431908552, iteration: 22762
loss: 0.987342357635498,grad_norm: 0.9999991571041029, iteration: 22763
loss: 1.0144468545913696,grad_norm: 0.9999992590921444, iteration: 22764
loss: 0.9794107675552368,grad_norm: 0.9999990706547258, iteration: 22765
loss: 1.0702316761016846,grad_norm: 0.9999996853638272, iteration: 22766
loss: 1.0240916013717651,grad_norm: 0.9999994672002805, iteration: 22767
loss: 1.0376861095428467,grad_norm: 0.9999991926571038, iteration: 22768
loss: 0.9837307929992676,grad_norm: 0.9999990887440675, iteration: 22769
loss: 1.125042200088501,grad_norm: 0.9999997894885388, iteration: 22770
loss: 1.0219229459762573,grad_norm: 0.9999992394979523, iteration: 22771
loss: 1.0172131061553955,grad_norm: 0.9999992235059368, iteration: 22772
loss: 1.0239454507827759,grad_norm: 0.9999994135353965, iteration: 22773
loss: 1.0220357179641724,grad_norm: 0.9210078524162886, iteration: 22774
loss: 1.0427576303482056,grad_norm: 0.9999993377981302, iteration: 22775
loss: 1.030358910560608,grad_norm: 0.9999991101199396, iteration: 22776
loss: 1.0312511920928955,grad_norm: 0.9999990475341113, iteration: 22777
loss: 1.0774649381637573,grad_norm: 0.9999998833759314, iteration: 22778
loss: 1.0031687021255493,grad_norm: 0.9999996815455255, iteration: 22779
loss: 0.9923426508903503,grad_norm: 0.9999991773228831, iteration: 22780
loss: 0.9984019994735718,grad_norm: 0.9999990627694193, iteration: 22781
loss: 1.0352274179458618,grad_norm: 0.9999992952317266, iteration: 22782
loss: 1.001021146774292,grad_norm: 0.99999909444141, iteration: 22783
loss: 1.0633388757705688,grad_norm: 0.9999997145968036, iteration: 22784
loss: 1.0700020790100098,grad_norm: 0.9999990744095716, iteration: 22785
loss: 1.0877137184143066,grad_norm: 0.9999994484207635, iteration: 22786
loss: 1.1004692316055298,grad_norm: 0.9999996126544349, iteration: 22787
loss: 1.1289293766021729,grad_norm: 0.9999993151853529, iteration: 22788
loss: 1.0568690299987793,grad_norm: 0.9999990409672049, iteration: 22789
loss: 1.0407521724700928,grad_norm: 0.9999997193246651, iteration: 22790
loss: 1.043992280960083,grad_norm: 0.9999992355209578, iteration: 22791
loss: 1.1030503511428833,grad_norm: 0.9999991077339682, iteration: 22792
loss: 1.0292644500732422,grad_norm: 0.9999991640538046, iteration: 22793
loss: 1.0180550813674927,grad_norm: 0.9999990841308461, iteration: 22794
loss: 1.0222957134246826,grad_norm: 0.9999991149845592, iteration: 22795
loss: 1.0614712238311768,grad_norm: 0.9999990907439387, iteration: 22796
loss: 1.0296214818954468,grad_norm: 0.9999991609263837, iteration: 22797
loss: 0.9827772378921509,grad_norm: 0.8549509773718843, iteration: 22798
loss: 0.992226243019104,grad_norm: 0.9885532460495375, iteration: 22799
loss: 1.0351042747497559,grad_norm: 0.9999996952396929, iteration: 22800
loss: 1.0201777219772339,grad_norm: 0.9999992809766624, iteration: 22801
loss: 1.0431960821151733,grad_norm: 0.9999990464619525, iteration: 22802
loss: 0.9804278016090393,grad_norm: 0.9999991089397484, iteration: 22803
loss: 1.0028280019760132,grad_norm: 0.9999991856928185, iteration: 22804
loss: 1.0713070631027222,grad_norm: 0.9999996282551974, iteration: 22805
loss: 1.1170437335968018,grad_norm: 0.9999998791246524, iteration: 22806
loss: 1.0337603092193604,grad_norm: 0.9999991564080223, iteration: 22807
loss: 1.1193933486938477,grad_norm: 0.9999997508060064, iteration: 22808
loss: 0.9991331696510315,grad_norm: 0.9999990176949717, iteration: 22809
loss: 1.0101534128189087,grad_norm: 0.9999990909046143, iteration: 22810
loss: 0.9936327338218689,grad_norm: 0.9999994316005618, iteration: 22811
loss: 1.007166862487793,grad_norm: 0.9999990862883266, iteration: 22812
loss: 0.9908395409584045,grad_norm: 0.9999993489253804, iteration: 22813
loss: 1.0094590187072754,grad_norm: 0.9128321626264505, iteration: 22814
loss: 1.1902129650115967,grad_norm: 0.9999997212845981, iteration: 22815
loss: 0.9882965087890625,grad_norm: 0.9999990011968517, iteration: 22816
loss: 1.0677366256713867,grad_norm: 0.9999995326453539, iteration: 22817
loss: 1.1004053354263306,grad_norm: 0.9999998933460509, iteration: 22818
loss: 0.9624698758125305,grad_norm: 0.9999991532184739, iteration: 22819
loss: 1.0449788570404053,grad_norm: 0.999999759491011, iteration: 22820
loss: 1.053624153137207,grad_norm: 0.9999996208636973, iteration: 22821
loss: 1.0517431497573853,grad_norm: 0.9999992607305488, iteration: 22822
loss: 1.0535367727279663,grad_norm: 0.9999995045395002, iteration: 22823
loss: 1.0070699453353882,grad_norm: 0.999999227383635, iteration: 22824
loss: 1.0332876443862915,grad_norm: 0.9092256651709321, iteration: 22825
loss: 0.9987697005271912,grad_norm: 0.8829912637865336, iteration: 22826
loss: 1.0336570739746094,grad_norm: 0.9999992153188413, iteration: 22827
loss: 0.9691842198371887,grad_norm: 0.9999991065838666, iteration: 22828
loss: 1.0549758672714233,grad_norm: 0.9999991775118234, iteration: 22829
loss: 1.0092071294784546,grad_norm: 0.9999990417498217, iteration: 22830
loss: 0.9864771962165833,grad_norm: 0.9999994960775408, iteration: 22831
loss: 1.0264204740524292,grad_norm: 0.9999991672208234, iteration: 22832
loss: 1.041121006011963,grad_norm: 0.9999990778396037, iteration: 22833
loss: 1.1259409189224243,grad_norm: 0.9999993465991668, iteration: 22834
loss: 1.0097475051879883,grad_norm: 0.9999993734093824, iteration: 22835
loss: 0.9964521527290344,grad_norm: 0.9999991530760192, iteration: 22836
loss: 1.0159058570861816,grad_norm: 0.9999991583133911, iteration: 22837
loss: 1.0886098146438599,grad_norm: 0.9999996739166872, iteration: 22838
loss: 1.0094157457351685,grad_norm: 0.9999991448185527, iteration: 22839
loss: 0.9893006086349487,grad_norm: 0.9999993675430784, iteration: 22840
loss: 0.9771048426628113,grad_norm: 0.9999995446785868, iteration: 22841
loss: 1.0358102321624756,grad_norm: 0.9999989590351782, iteration: 22842
loss: 1.0056023597717285,grad_norm: 0.9999993705149156, iteration: 22843
loss: 1.086655616760254,grad_norm: 0.9999993794535061, iteration: 22844
loss: 1.037617564201355,grad_norm: 0.999999258707279, iteration: 22845
loss: 1.0121248960494995,grad_norm: 0.9822451798189079, iteration: 22846
loss: 0.9877145886421204,grad_norm: 0.9999992646391412, iteration: 22847
loss: 1.0319362878799438,grad_norm: 0.9999993574079517, iteration: 22848
loss: 1.0212898254394531,grad_norm: 0.9999990709234657, iteration: 22849
loss: 1.0422090291976929,grad_norm: 0.999999461870143, iteration: 22850
loss: 0.9883520007133484,grad_norm: 0.9999992426622204, iteration: 22851
loss: 1.0071181058883667,grad_norm: 0.9942221604503916, iteration: 22852
loss: 1.0378607511520386,grad_norm: 0.9999991697885593, iteration: 22853
loss: 1.0069530010223389,grad_norm: 0.9852241476780963, iteration: 22854
loss: 1.0744503736495972,grad_norm: 0.9999997201774931, iteration: 22855
loss: 1.0126503705978394,grad_norm: 0.9999990540856339, iteration: 22856
loss: 1.0174469947814941,grad_norm: 0.9999991824839178, iteration: 22857
loss: 1.0125597715377808,grad_norm: 0.9993468412885952, iteration: 22858
loss: 1.0267459154129028,grad_norm: 0.9199690377022562, iteration: 22859
loss: 1.0126368999481201,grad_norm: 0.9999990620819245, iteration: 22860
loss: 1.107512354850769,grad_norm: 0.9999999143540969, iteration: 22861
loss: 1.0087922811508179,grad_norm: 0.9999995731310345, iteration: 22862
loss: 1.0187686681747437,grad_norm: 0.9276889754852206, iteration: 22863
loss: 0.9995096325874329,grad_norm: 0.9999993259301299, iteration: 22864
loss: 1.0133041143417358,grad_norm: 0.9999992006483158, iteration: 22865
loss: 1.0203222036361694,grad_norm: 0.9046164520248073, iteration: 22866
loss: 1.0251249074935913,grad_norm: 0.9999990919458215, iteration: 22867
loss: 1.0374674797058105,grad_norm: 0.9999992846906657, iteration: 22868
loss: 1.0103881359100342,grad_norm: 0.8917645935992228, iteration: 22869
loss: 0.9778792262077332,grad_norm: 0.9999994284604385, iteration: 22870
loss: 1.0540415048599243,grad_norm: 0.9999996123485896, iteration: 22871
loss: 1.026591420173645,grad_norm: 0.9999992793920891, iteration: 22872
loss: 1.0134779214859009,grad_norm: 0.7968487526321044, iteration: 22873
loss: 1.061612606048584,grad_norm: 0.9999996127343732, iteration: 22874
loss: 1.0140286684036255,grad_norm: 0.9999993282652636, iteration: 22875
loss: 0.9930164217948914,grad_norm: 0.9327431492007815, iteration: 22876
loss: 0.9972471594810486,grad_norm: 0.9117555113534839, iteration: 22877
loss: 1.020401120185852,grad_norm: 0.999999232854567, iteration: 22878
loss: 1.031072974205017,grad_norm: 0.9999993449542952, iteration: 22879
loss: 1.0130391120910645,grad_norm: 0.9999991140968381, iteration: 22880
loss: 1.024059534072876,grad_norm: 0.999999078168991, iteration: 22881
loss: 1.0263972282409668,grad_norm: 0.99999913057447, iteration: 22882
loss: 1.023258090019226,grad_norm: 0.9999991938042776, iteration: 22883
loss: 1.0220065116882324,grad_norm: 0.9999995300204138, iteration: 22884
loss: 1.0118051767349243,grad_norm: 0.9999996636051617, iteration: 22885
loss: 1.0085923671722412,grad_norm: 0.9999990590969546, iteration: 22886
loss: 1.0256638526916504,grad_norm: 0.9999994863388644, iteration: 22887
loss: 1.001412272453308,grad_norm: 0.9999997910362862, iteration: 22888
loss: 1.033968210220337,grad_norm: 0.999999097396314, iteration: 22889
loss: 1.0198971033096313,grad_norm: 0.9087004348788392, iteration: 22890
loss: 1.0240105390548706,grad_norm: 0.9999993535321556, iteration: 22891
loss: 1.0256801843643188,grad_norm: 0.9999992485691818, iteration: 22892
loss: 0.9238995909690857,grad_norm: 0.9999991341743236, iteration: 22893
loss: 1.0032434463500977,grad_norm: 0.937978615661102, iteration: 22894
loss: 1.0321052074432373,grad_norm: 0.9487872779220238, iteration: 22895
loss: 1.0504052639007568,grad_norm: 0.9999991287868883, iteration: 22896
loss: 1.0235270261764526,grad_norm: 0.9999993359537585, iteration: 22897
loss: 1.0113481283187866,grad_norm: 0.9999991376877436, iteration: 22898
loss: 1.0125131607055664,grad_norm: 0.9999993613954016, iteration: 22899
loss: 1.0175535678863525,grad_norm: 0.9999994374966007, iteration: 22900
loss: 1.011004090309143,grad_norm: 0.9999996285595313, iteration: 22901
loss: 1.0299339294433594,grad_norm: 0.9999993028680557, iteration: 22902
loss: 1.0004277229309082,grad_norm: 0.9222191255880242, iteration: 22903
loss: 1.0084209442138672,grad_norm: 0.9432092819162303, iteration: 22904
loss: 1.0076850652694702,grad_norm: 0.9999990808244624, iteration: 22905
loss: 1.033024787902832,grad_norm: 0.9999994339676269, iteration: 22906
loss: 1.0004945993423462,grad_norm: 0.9999996733687188, iteration: 22907
loss: 0.9821107387542725,grad_norm: 0.9999990292529798, iteration: 22908
loss: 0.9833167195320129,grad_norm: 0.9562331802703122, iteration: 22909
loss: 0.9975161552429199,grad_norm: 0.9999991409659734, iteration: 22910
loss: 0.9814639687538147,grad_norm: 0.9999995599044049, iteration: 22911
loss: 0.9964479804039001,grad_norm: 0.9999990337674111, iteration: 22912
loss: 1.0116126537322998,grad_norm: 0.9999991697084019, iteration: 22913
loss: 1.0304877758026123,grad_norm: 0.999999723720136, iteration: 22914
loss: 1.0679792165756226,grad_norm: 0.9999995130482034, iteration: 22915
loss: 0.9830831289291382,grad_norm: 0.9398939540056506, iteration: 22916
loss: 1.043519139289856,grad_norm: 0.9999991521626892, iteration: 22917
loss: 1.005571961402893,grad_norm: 0.9874856876936637, iteration: 22918
loss: 1.001940131187439,grad_norm: 0.9999992576272962, iteration: 22919
loss: 1.0196998119354248,grad_norm: 0.9999996507592878, iteration: 22920
loss: 1.088295340538025,grad_norm: 0.9999997029228364, iteration: 22921
loss: 0.9771536588668823,grad_norm: 0.8978507129141656, iteration: 22922
loss: 1.0121339559555054,grad_norm: 0.9061448079364534, iteration: 22923
loss: 1.0163302421569824,grad_norm: 0.9999991292555842, iteration: 22924
loss: 1.0695422887802124,grad_norm: 0.9999991820828088, iteration: 22925
loss: 1.0126879215240479,grad_norm: 0.9999993378745052, iteration: 22926
loss: 1.0295896530151367,grad_norm: 0.9999991117828001, iteration: 22927
loss: 0.9949508905410767,grad_norm: 0.9999990438792978, iteration: 22928
loss: 1.0028856992721558,grad_norm: 0.9999991633401277, iteration: 22929
loss: 0.9817048907279968,grad_norm: 0.9999991609702272, iteration: 22930
loss: 1.0315160751342773,grad_norm: 0.9844111797009105, iteration: 22931
loss: 0.9769997596740723,grad_norm: 0.9999991211270781, iteration: 22932
loss: 1.0904368162155151,grad_norm: 0.9999993797978386, iteration: 22933
loss: 1.0217067003250122,grad_norm: 0.9999996181720858, iteration: 22934
loss: 1.0618208646774292,grad_norm: 0.999999228066842, iteration: 22935
loss: 0.9932845234870911,grad_norm: 0.9999992425624525, iteration: 22936
loss: 0.9653254151344299,grad_norm: 0.9999996538332137, iteration: 22937
loss: 0.9622313380241394,grad_norm: 0.9999991309260261, iteration: 22938
loss: 0.9378676414489746,grad_norm: 0.9999992465776498, iteration: 22939
loss: 1.0271408557891846,grad_norm: 0.9999999555099887, iteration: 22940
loss: 0.9570883512496948,grad_norm: 0.9999996921827418, iteration: 22941
loss: 1.0776716470718384,grad_norm: 0.999999688128867, iteration: 22942
loss: 0.9525690674781799,grad_norm: 0.9999991592262563, iteration: 22943
loss: 1.0225595235824585,grad_norm: 0.9999991113610722, iteration: 22944
loss: 1.024833083152771,grad_norm: 0.9776754842065936, iteration: 22945
loss: 1.010513186454773,grad_norm: 0.8679145204136167, iteration: 22946
loss: 1.0506458282470703,grad_norm: 0.972515868791156, iteration: 22947
loss: 1.0171607732772827,grad_norm: 0.9655773314059048, iteration: 22948
loss: 0.985057532787323,grad_norm: 0.9999994196588522, iteration: 22949
loss: 1.0101755857467651,grad_norm: 0.7768657471329304, iteration: 22950
loss: 0.9961090087890625,grad_norm: 0.9999991587664687, iteration: 22951
loss: 1.00093674659729,grad_norm: 0.9999990814164263, iteration: 22952
loss: 0.9861750602722168,grad_norm: 0.9999995542793653, iteration: 22953
loss: 1.017085313796997,grad_norm: 0.9999992661997753, iteration: 22954
loss: 1.0533808469772339,grad_norm: 0.9999999029505724, iteration: 22955
loss: 1.0168566703796387,grad_norm: 0.9999991267097225, iteration: 22956
loss: 1.0811657905578613,grad_norm: 0.9999997016238443, iteration: 22957
loss: 1.023875117301941,grad_norm: 0.8553164083349621, iteration: 22958
loss: 1.0195707082748413,grad_norm: 0.999999258278451, iteration: 22959
loss: 0.9917280673980713,grad_norm: 0.9999993061730091, iteration: 22960
loss: 0.9963842630386353,grad_norm: 0.9999990264403046, iteration: 22961
loss: 1.003396987915039,grad_norm: 0.9999995005885428, iteration: 22962
loss: 1.071711778640747,grad_norm: 0.9999990682218132, iteration: 22963
loss: 0.9999195337295532,grad_norm: 0.9999990345852919, iteration: 22964
loss: 1.0154155492782593,grad_norm: 0.9844112294196593, iteration: 22965
loss: 1.055080533027649,grad_norm: 0.9999997165255783, iteration: 22966
loss: 1.0278258323669434,grad_norm: 0.9999993006697065, iteration: 22967
loss: 0.9569690227508545,grad_norm: 0.9551995470075162, iteration: 22968
loss: 1.0212830305099487,grad_norm: 0.9999991289719385, iteration: 22969
loss: 0.9968457221984863,grad_norm: 0.9999993310177185, iteration: 22970
loss: 1.1679272651672363,grad_norm: 0.999999919321502, iteration: 22971
loss: 0.9839751124382019,grad_norm: 0.9045349899881081, iteration: 22972
loss: 0.9936147332191467,grad_norm: 0.9999992559065132, iteration: 22973
loss: 0.9950789213180542,grad_norm: 0.9999994208905026, iteration: 22974
loss: 1.0825062990188599,grad_norm: 0.9999996124016941, iteration: 22975
loss: 1.0015257596969604,grad_norm: 0.9999992309022879, iteration: 22976
loss: 1.009409785270691,grad_norm: 0.999999246247379, iteration: 22977
loss: 0.9873766899108887,grad_norm: 0.9999992599281633, iteration: 22978
loss: 1.035028100013733,grad_norm: 0.9999992384824745, iteration: 22979
loss: 1.0361346006393433,grad_norm: 0.9999990154924282, iteration: 22980
loss: 1.035423994064331,grad_norm: 0.999999357829499, iteration: 22981
loss: 1.01905357837677,grad_norm: 0.999999340087744, iteration: 22982
loss: 1.0558953285217285,grad_norm: 0.9999997277367898, iteration: 22983
loss: 1.0086071491241455,grad_norm: 0.9999991253470273, iteration: 22984
loss: 1.0172581672668457,grad_norm: 0.9999998496641985, iteration: 22985
loss: 0.9660612344741821,grad_norm: 0.9925452528951089, iteration: 22986
loss: 1.0081568956375122,grad_norm: 0.9496812527579387, iteration: 22987
loss: 1.0163016319274902,grad_norm: 0.9999989898048828, iteration: 22988
loss: 1.0378731489181519,grad_norm: 0.9999991832617897, iteration: 22989
loss: 1.0315033197402954,grad_norm: 0.9887304293137645, iteration: 22990
loss: 1.0311775207519531,grad_norm: 0.9999992244447324, iteration: 22991
loss: 1.0175361633300781,grad_norm: 0.9389613177775399, iteration: 22992
loss: 1.0029529333114624,grad_norm: 0.9999994396316096, iteration: 22993
loss: 0.9965746998786926,grad_norm: 0.9999993334432281, iteration: 22994
loss: 1.0000476837158203,grad_norm: 0.999999425855824, iteration: 22995
loss: 1.0239626169204712,grad_norm: 0.9999990960400816, iteration: 22996
loss: 0.9948345422744751,grad_norm: 0.9999990759062412, iteration: 22997
loss: 1.0161621570587158,grad_norm: 0.8059242518189283, iteration: 22998
loss: 1.0277467966079712,grad_norm: 0.9999989987507032, iteration: 22999
loss: 0.9970544576644897,grad_norm: 0.9999995679530371, iteration: 23000
loss: 1.0312809944152832,grad_norm: 0.9999993902027765, iteration: 23001
loss: 1.0022157430648804,grad_norm: 0.9351689854688479, iteration: 23002
loss: 1.0387535095214844,grad_norm: 0.8978999395629865, iteration: 23003
loss: 0.9882135987281799,grad_norm: 0.9884131465689371, iteration: 23004
loss: 1.0516268014907837,grad_norm: 0.9999990734837402, iteration: 23005
loss: 0.9774362444877625,grad_norm: 0.999999158447778, iteration: 23006
loss: 1.0644631385803223,grad_norm: 0.9999993346325143, iteration: 23007
loss: 1.0211199522018433,grad_norm: 0.9359851764292914, iteration: 23008
loss: 1.0113109350204468,grad_norm: 0.9999990358930932, iteration: 23009
loss: 1.0499176979064941,grad_norm: 0.9999991118440387, iteration: 23010
loss: 1.0444363355636597,grad_norm: 0.9999990684297134, iteration: 23011
loss: 1.0360718965530396,grad_norm: 0.842150194183858, iteration: 23012
loss: 1.0200104713439941,grad_norm: 0.9999991776337892, iteration: 23013
loss: 1.0636563301086426,grad_norm: 0.9999995853255261, iteration: 23014
loss: 1.0170480012893677,grad_norm: 0.9999991851409099, iteration: 23015
loss: 1.009609341621399,grad_norm: 0.9256298366481988, iteration: 23016
loss: 1.0277118682861328,grad_norm: 0.8625797737815067, iteration: 23017
loss: 1.0329352617263794,grad_norm: 0.9999990574815587, iteration: 23018
loss: 1.0116684436798096,grad_norm: 0.9999990882472598, iteration: 23019
loss: 1.0330023765563965,grad_norm: 0.9999990671664034, iteration: 23020
loss: 0.982420027256012,grad_norm: 0.9999991499703147, iteration: 23021
loss: 1.0082495212554932,grad_norm: 0.9999991800803814, iteration: 23022
loss: 1.0089166164398193,grad_norm: 0.9999991874790611, iteration: 23023
loss: 1.0317761898040771,grad_norm: 0.9961276875320786, iteration: 23024
loss: 1.0228209495544434,grad_norm: 0.9999996450201908, iteration: 23025
loss: 1.0241025686264038,grad_norm: 0.9896261455612826, iteration: 23026
loss: 1.0109591484069824,grad_norm: 0.9999991678544807, iteration: 23027
loss: 1.0270566940307617,grad_norm: 0.9999991686556846, iteration: 23028
loss: 1.0225170850753784,grad_norm: 0.8610057186697708, iteration: 23029
loss: 1.0173249244689941,grad_norm: 0.9414907132503669, iteration: 23030
loss: 0.9833469986915588,grad_norm: 0.9999998252914403, iteration: 23031
loss: 1.0416995286941528,grad_norm: 0.9999993158138969, iteration: 23032
loss: 1.0169662237167358,grad_norm: 0.915473048019962, iteration: 23033
loss: 1.0104514360427856,grad_norm: 0.999998969956527, iteration: 23034
loss: 1.0457018613815308,grad_norm: 0.9999991377894489, iteration: 23035
loss: 1.043328046798706,grad_norm: 0.9999991095834335, iteration: 23036
loss: 1.0186729431152344,grad_norm: 0.9999992807649272, iteration: 23037
loss: 1.0189940929412842,grad_norm: 0.9999991528686593, iteration: 23038
loss: 0.9992640018463135,grad_norm: 0.9999991088241482, iteration: 23039
loss: 0.9614292979240417,grad_norm: 0.9999991544245983, iteration: 23040
loss: 0.9933671355247498,grad_norm: 0.9999993223191811, iteration: 23041
loss: 1.0009965896606445,grad_norm: 0.9999990402324616, iteration: 23042
loss: 0.9921913146972656,grad_norm: 0.9493767448322216, iteration: 23043
loss: 1.000030755996704,grad_norm: 0.9999990729925575, iteration: 23044
loss: 1.00539231300354,grad_norm: 0.9999991227358703, iteration: 23045
loss: 1.0332711935043335,grad_norm: 0.9999991285095744, iteration: 23046
loss: 0.9719797372817993,grad_norm: 0.9999991300837596, iteration: 23047
loss: 0.9962928295135498,grad_norm: 0.9999992239061943, iteration: 23048
loss: 0.9972279071807861,grad_norm: 0.999508348698844, iteration: 23049
loss: 1.0274200439453125,grad_norm: 0.9711734777180954, iteration: 23050
loss: 1.0362993478775024,grad_norm: 0.9850384646333562, iteration: 23051
loss: 1.018234372138977,grad_norm: 0.9999991354240584, iteration: 23052
loss: 0.9911730885505676,grad_norm: 0.9999993090368592, iteration: 23053
loss: 1.0365132093429565,grad_norm: 0.9999990897533858, iteration: 23054
loss: 1.0141264200210571,grad_norm: 0.9999992698682643, iteration: 23055
loss: 1.0748580694198608,grad_norm: 0.9999991482089619, iteration: 23056
loss: 1.0905689001083374,grad_norm: 0.9999992416576393, iteration: 23057
loss: 1.0251870155334473,grad_norm: 0.9188826227434397, iteration: 23058
loss: 0.975837230682373,grad_norm: 0.959250334086226, iteration: 23059
loss: 1.0292872190475464,grad_norm: 0.9731920757396977, iteration: 23060
loss: 0.9905883073806763,grad_norm: 0.8767849686038685, iteration: 23061
loss: 0.9892674684524536,grad_norm: 0.9999990550879002, iteration: 23062
loss: 0.9856204390525818,grad_norm: 0.9999994453014273, iteration: 23063
loss: 1.0190104246139526,grad_norm: 0.9961900043271984, iteration: 23064
loss: 0.9977270364761353,grad_norm: 0.9024745405225689, iteration: 23065
loss: 1.0555825233459473,grad_norm: 0.9494532444673702, iteration: 23066
loss: 0.9748600125312805,grad_norm: 0.9999990799280954, iteration: 23067
loss: 1.0398956537246704,grad_norm: 0.9999991148309333, iteration: 23068
loss: 0.9835541248321533,grad_norm: 0.9999991092939439, iteration: 23069
loss: 1.0170716047286987,grad_norm: 0.9999991200043857, iteration: 23070
loss: 1.0157735347747803,grad_norm: 0.9999992325430779, iteration: 23071
loss: 0.9963752031326294,grad_norm: 0.9999990321621397, iteration: 23072
loss: 0.9699224829673767,grad_norm: 0.9999991128321933, iteration: 23073
loss: 1.0600202083587646,grad_norm: 0.9999992539532275, iteration: 23074
loss: 1.0251020193099976,grad_norm: 0.994206054159022, iteration: 23075
loss: 1.0141847133636475,grad_norm: 0.9173962825155396, iteration: 23076
loss: 1.034671664237976,grad_norm: 0.9999991987291763, iteration: 23077
loss: 1.0321249961853027,grad_norm: 0.9716218144492053, iteration: 23078
loss: 1.0016151666641235,grad_norm: 0.9515258062778839, iteration: 23079
loss: 0.9942352771759033,grad_norm: 0.9999990933096957, iteration: 23080
loss: 1.001896619796753,grad_norm: 0.9991746654452688, iteration: 23081
loss: 1.0036098957061768,grad_norm: 0.9999992268406788, iteration: 23082
loss: 1.0068131685256958,grad_norm: 0.9999995347780701, iteration: 23083
loss: 1.009465217590332,grad_norm: 0.999999390021535, iteration: 23084
loss: 0.9937575459480286,grad_norm: 0.995932340208607, iteration: 23085
loss: 1.0186879634857178,grad_norm: 0.9591282310397504, iteration: 23086
loss: 0.9942249655723572,grad_norm: 0.9764444787844534, iteration: 23087
loss: 1.0159820318222046,grad_norm: 0.9999991207802975, iteration: 23088
loss: 1.0116931200027466,grad_norm: 0.9999992034486194, iteration: 23089
loss: 0.9971639513969421,grad_norm: 0.9999991308330798, iteration: 23090
loss: 0.9399710893630981,grad_norm: 0.9999991502153257, iteration: 23091
loss: 1.0164107084274292,grad_norm: 0.9999991017907951, iteration: 23092
loss: 0.9838236570358276,grad_norm: 0.9999991314493882, iteration: 23093
loss: 1.0101772546768188,grad_norm: 0.9999992191490672, iteration: 23094
loss: 0.9731226563453674,grad_norm: 0.934373757920751, iteration: 23095
loss: 1.0006214380264282,grad_norm: 0.9999991115003867, iteration: 23096
loss: 1.0379014015197754,grad_norm: 0.9999990799769166, iteration: 23097
loss: 0.9876795411109924,grad_norm: 0.9470048221493859, iteration: 23098
loss: 0.9777323603630066,grad_norm: 0.9204686506711509, iteration: 23099
loss: 0.9900031685829163,grad_norm: 0.8572258699019158, iteration: 23100
loss: 0.9666574001312256,grad_norm: 0.9999991091241822, iteration: 23101
loss: 1.0416358709335327,grad_norm: 0.9999992540979122, iteration: 23102
loss: 0.9676305651664734,grad_norm: 0.9377474317394315, iteration: 23103
loss: 1.0985760688781738,grad_norm: 0.9999996186954409, iteration: 23104
loss: 0.9642947912216187,grad_norm: 0.8970036606497268, iteration: 23105
loss: 1.0300378799438477,grad_norm: 0.9999990709365039, iteration: 23106
loss: 0.9701798558235168,grad_norm: 0.8986984387449624, iteration: 23107
loss: 1.0056952238082886,grad_norm: 0.8768903220823293, iteration: 23108
loss: 1.0114790201187134,grad_norm: 0.9718090913316837, iteration: 23109
loss: 1.0029399394989014,grad_norm: 0.9999990429936828, iteration: 23110
loss: 1.0241740942001343,grad_norm: 0.9999994707721664, iteration: 23111
loss: 1.0166780948638916,grad_norm: 0.9999991764674848, iteration: 23112
loss: 0.9966141581535339,grad_norm: 0.9833641591148319, iteration: 23113
loss: 1.000038504600525,grad_norm: 0.999999208638109, iteration: 23114
loss: 1.020661473274231,grad_norm: 0.9999990208331333, iteration: 23115
loss: 1.114880084991455,grad_norm: 0.9999994286062839, iteration: 23116
loss: 0.9744971990585327,grad_norm: 0.9999996975365836, iteration: 23117
loss: 1.0169428586959839,grad_norm: 0.999999208284432, iteration: 23118
loss: 0.9543387293815613,grad_norm: 0.9999991471649604, iteration: 23119
loss: 1.0456072092056274,grad_norm: 0.9643318819406991, iteration: 23120
loss: 0.9878042340278625,grad_norm: 0.9999989839747548, iteration: 23121
loss: 1.0231107473373413,grad_norm: 0.9416546734327615, iteration: 23122
loss: 1.030824899673462,grad_norm: 0.99999925227619, iteration: 23123
loss: 1.0488128662109375,grad_norm: 0.9999991819933569, iteration: 23124
loss: 1.0396722555160522,grad_norm: 0.9999991798941581, iteration: 23125
loss: 0.9961464405059814,grad_norm: 0.9999992648514113, iteration: 23126
loss: 1.0186405181884766,grad_norm: 0.9999994999447215, iteration: 23127
loss: 1.024873971939087,grad_norm: 0.9999995312082931, iteration: 23128
loss: 1.0437850952148438,grad_norm: 0.9999997025867892, iteration: 23129
loss: 1.0079689025878906,grad_norm: 0.9999992160844352, iteration: 23130
loss: 0.9947498440742493,grad_norm: 0.9999989897526629, iteration: 23131
loss: 1.0642859935760498,grad_norm: 0.9999993772111011, iteration: 23132
loss: 1.0669682025909424,grad_norm: 0.99999951966122, iteration: 23133
loss: 0.9942838549613953,grad_norm: 0.999999043792485, iteration: 23134
loss: 1.0562130212783813,grad_norm: 0.9999995588528466, iteration: 23135
loss: 1.0663434267044067,grad_norm: 0.9999997616984089, iteration: 23136
loss: 1.0029963254928589,grad_norm: 0.9999996756776954, iteration: 23137
loss: 1.0433385372161865,grad_norm: 0.9999990934216318, iteration: 23138
loss: 1.0093942880630493,grad_norm: 0.9845891449606279, iteration: 23139
loss: 0.9755454659461975,grad_norm: 0.9999992196156559, iteration: 23140
loss: 1.0253455638885498,grad_norm: 0.9999991971788673, iteration: 23141
loss: 1.0292764902114868,grad_norm: 0.9999993321812043, iteration: 23142
loss: 0.9758749008178711,grad_norm: 0.9999990603961193, iteration: 23143
loss: 1.048429012298584,grad_norm: 0.9999994014794713, iteration: 23144
loss: 1.054297924041748,grad_norm: 0.9999996949275123, iteration: 23145
loss: 1.0610796213150024,grad_norm: 0.9999997991438961, iteration: 23146
loss: 0.963596761226654,grad_norm: 0.8348780585447316, iteration: 23147
loss: 1.0418692827224731,grad_norm: 0.999999523759406, iteration: 23148
loss: 0.9909236431121826,grad_norm: 0.9999990423422855, iteration: 23149
loss: 1.0147573947906494,grad_norm: 0.9999991194613476, iteration: 23150
loss: 1.0590890645980835,grad_norm: 0.999999140528783, iteration: 23151
loss: 1.0154554843902588,grad_norm: 0.9999993728360473, iteration: 23152
loss: 0.9816754460334778,grad_norm: 0.9999992217566461, iteration: 23153
loss: 1.0258946418762207,grad_norm: 0.9675678661172755, iteration: 23154
loss: 1.0259995460510254,grad_norm: 0.9999991806775504, iteration: 23155
loss: 1.0184671878814697,grad_norm: 0.9999992641027848, iteration: 23156
loss: 0.9878627061843872,grad_norm: 0.9953266407952146, iteration: 23157
loss: 0.9876818656921387,grad_norm: 0.9999992281130075, iteration: 23158
loss: 1.0104988813400269,grad_norm: 0.9999992738546526, iteration: 23159
loss: 1.068989872932434,grad_norm: 0.9999998558993587, iteration: 23160
loss: 1.017053484916687,grad_norm: 0.9999990481399701, iteration: 23161
loss: 0.9885563254356384,grad_norm: 0.9999992122214278, iteration: 23162
loss: 0.9770408272743225,grad_norm: 0.9260266881190444, iteration: 23163
loss: 0.9970906376838684,grad_norm: 0.9766155072655378, iteration: 23164
loss: 1.0275132656097412,grad_norm: 0.9571028288098642, iteration: 23165
loss: 0.9752511382102966,grad_norm: 0.999999688277758, iteration: 23166
loss: 0.9733603000640869,grad_norm: 0.9999991140299468, iteration: 23167
loss: 0.9899650812149048,grad_norm: 0.999999026897374, iteration: 23168
loss: 1.045055866241455,grad_norm: 0.9999991424769168, iteration: 23169
loss: 1.031801462173462,grad_norm: 0.9833283065908185, iteration: 23170
loss: 1.0351096391677856,grad_norm: 0.8073793996319097, iteration: 23171
loss: 0.9823734164237976,grad_norm: 0.9999991002760137, iteration: 23172
loss: 1.0106252431869507,grad_norm: 0.9710104953138315, iteration: 23173
loss: 0.9915182590484619,grad_norm: 0.9999991685299974, iteration: 23174
loss: 1.010751724243164,grad_norm: 0.9574377765126593, iteration: 23175
loss: 1.007019281387329,grad_norm: 0.9999993544236689, iteration: 23176
loss: 1.1137077808380127,grad_norm: 0.9999997150835895, iteration: 23177
loss: 1.0130798816680908,grad_norm: 0.9999990493143345, iteration: 23178
loss: 0.9927191138267517,grad_norm: 0.9096074445112472, iteration: 23179
loss: 0.9706920385360718,grad_norm: 0.9756798179296359, iteration: 23180
loss: 1.0611531734466553,grad_norm: 0.9999998444942948, iteration: 23181
loss: 1.0363490581512451,grad_norm: 0.9999995616047673, iteration: 23182
loss: 0.9806926846504211,grad_norm: 0.999999163321564, iteration: 23183
loss: 1.0263382196426392,grad_norm: 0.999999463968754, iteration: 23184
loss: 1.0123382806777954,grad_norm: 0.9999991876145865, iteration: 23185
loss: 1.0676615238189697,grad_norm: 0.9999994351454992, iteration: 23186
loss: 1.0177291631698608,grad_norm: 0.8946252519504057, iteration: 23187
loss: 0.9999966025352478,grad_norm: 0.9999991152050706, iteration: 23188
loss: 1.0295697450637817,grad_norm: 0.9999999025508046, iteration: 23189
loss: 1.0411759614944458,grad_norm: 0.9218532336982286, iteration: 23190
loss: 0.9871033430099487,grad_norm: 0.9999992710215253, iteration: 23191
loss: 1.0305728912353516,grad_norm: 0.9999995667380074, iteration: 23192
loss: 1.0538417100906372,grad_norm: 0.9999995838294139, iteration: 23193
loss: 0.9973186254501343,grad_norm: 0.9519131589332148, iteration: 23194
loss: 1.0359208583831787,grad_norm: 0.9999990079241257, iteration: 23195
loss: 1.0137853622436523,grad_norm: 0.8790953695456428, iteration: 23196
loss: 0.9948862791061401,grad_norm: 0.9200931233811208, iteration: 23197
loss: 1.0403460264205933,grad_norm: 0.999999499863003, iteration: 23198
loss: 1.0347919464111328,grad_norm: 0.9877530017545435, iteration: 23199
loss: 1.0370259284973145,grad_norm: 0.9451114377040661, iteration: 23200
loss: 1.0065968036651611,grad_norm: 0.804121142537367, iteration: 23201
loss: 1.0128498077392578,grad_norm: 0.9999991883992466, iteration: 23202
loss: 1.0128309726715088,grad_norm: 0.9999990625366948, iteration: 23203
loss: 0.9765623211860657,grad_norm: 0.9132781850371902, iteration: 23204
loss: 1.0156968832015991,grad_norm: 0.963552110760387, iteration: 23205
loss: 0.9979649186134338,grad_norm: 0.9999990835409631, iteration: 23206
loss: 1.0096310377120972,grad_norm: 0.999999431301189, iteration: 23207
loss: 1.0100457668304443,grad_norm: 0.9121965531114363, iteration: 23208
loss: 1.0376555919647217,grad_norm: 0.9999993566930465, iteration: 23209
loss: 0.9806745052337646,grad_norm: 0.9459657635562569, iteration: 23210
loss: 1.0402637720108032,grad_norm: 0.9999994173601908, iteration: 23211
loss: 1.0218911170959473,grad_norm: 0.9682393074987456, iteration: 23212
loss: 1.0015349388122559,grad_norm: 0.9999990309522366, iteration: 23213
loss: 1.0286965370178223,grad_norm: 0.9999991599286395, iteration: 23214
loss: 1.0375666618347168,grad_norm: 0.9999991665489466, iteration: 23215
loss: 1.0346570014953613,grad_norm: 0.9999991541289543, iteration: 23216
loss: 1.0620993375778198,grad_norm: 0.9999992995184391, iteration: 23217
loss: 1.0193899869918823,grad_norm: 0.8348309651943496, iteration: 23218
loss: 0.9905632138252258,grad_norm: 0.9999991294461094, iteration: 23219
loss: 0.9938933849334717,grad_norm: 0.8962165687532463, iteration: 23220
loss: 0.9954099059104919,grad_norm: 0.9999992498183728, iteration: 23221
loss: 1.0293018817901611,grad_norm: 0.9999991885739342, iteration: 23222
loss: 1.0020455121994019,grad_norm: 0.9999990548584702, iteration: 23223
loss: 1.0198522806167603,grad_norm: 0.8997310896087984, iteration: 23224
loss: 1.015415072441101,grad_norm: 0.9999993883635443, iteration: 23225
loss: 1.0064356327056885,grad_norm: 0.9999993220816862, iteration: 23226
loss: 1.0057036876678467,grad_norm: 0.999999442171633, iteration: 23227
loss: 1.038457989692688,grad_norm: 0.9999993817115342, iteration: 23228
loss: 0.9883198738098145,grad_norm: 0.9999994926292974, iteration: 23229
loss: 1.2234967947006226,grad_norm: 0.9999996114215878, iteration: 23230
loss: 0.9932923316955566,grad_norm: 0.9999994350337778, iteration: 23231
loss: 1.0466049909591675,grad_norm: 0.9999992830728616, iteration: 23232
loss: 1.0184556245803833,grad_norm: 0.9999991400738883, iteration: 23233
loss: 1.0073707103729248,grad_norm: 0.999999085315671, iteration: 23234
loss: 0.9738898277282715,grad_norm: 0.9999992123401179, iteration: 23235
loss: 1.0156373977661133,grad_norm: 0.9999990630582672, iteration: 23236
loss: 1.0354526042938232,grad_norm: 0.9999998808214497, iteration: 23237
loss: 1.0534098148345947,grad_norm: 0.9999998294216712, iteration: 23238
loss: 1.0314158201217651,grad_norm: 0.9999991606060796, iteration: 23239
loss: 0.9908136129379272,grad_norm: 0.890588712059487, iteration: 23240
loss: 1.0208334922790527,grad_norm: 0.9999991734781571, iteration: 23241
loss: 0.9828674793243408,grad_norm: 0.9999993106221282, iteration: 23242
loss: 1.0229361057281494,grad_norm: 0.9999991283307257, iteration: 23243
loss: 1.0314242839813232,grad_norm: 0.9999993987258665, iteration: 23244
loss: 0.9791724681854248,grad_norm: 0.9999992986941971, iteration: 23245
loss: 0.959855854511261,grad_norm: 0.9999989797438602, iteration: 23246
loss: 1.0069118738174438,grad_norm: 0.9999992636155004, iteration: 23247
loss: 1.1009881496429443,grad_norm: 0.9999996498693295, iteration: 23248
loss: 1.1090565919876099,grad_norm: 0.9999995117163992, iteration: 23249
loss: 1.0511387586593628,grad_norm: 0.9999990812634203, iteration: 23250
loss: 1.0538829565048218,grad_norm: 0.9999994564743191, iteration: 23251
loss: 1.063828945159912,grad_norm: 0.999998983314471, iteration: 23252
loss: 1.0536359548568726,grad_norm: 0.9632289503077552, iteration: 23253
loss: 1.0204421281814575,grad_norm: 0.9999991547601637, iteration: 23254
loss: 1.0311943292617798,grad_norm: 0.9999994934465248, iteration: 23255
loss: 0.9738004803657532,grad_norm: 0.960569205023684, iteration: 23256
loss: 1.0308678150177002,grad_norm: 0.9999991614650376, iteration: 23257
loss: 0.9907738566398621,grad_norm: 0.9999991266148279, iteration: 23258
loss: 1.0954935550689697,grad_norm: 0.9999997072307085, iteration: 23259
loss: 1.0546958446502686,grad_norm: 0.9999993489988717, iteration: 23260
loss: 1.0177252292633057,grad_norm: 0.9999992868587471, iteration: 23261
loss: 1.0308003425598145,grad_norm: 0.9873302021837812, iteration: 23262
loss: 0.9648982286453247,grad_norm: 0.9999991429005937, iteration: 23263
loss: 1.0038024187088013,grad_norm: 0.9999990859318043, iteration: 23264
loss: 1.0372772216796875,grad_norm: 0.9999991308642359, iteration: 23265
loss: 1.0586296319961548,grad_norm: 0.9999997955361231, iteration: 23266
loss: 1.0242984294891357,grad_norm: 0.8624625811809367, iteration: 23267
loss: 0.993232786655426,grad_norm: 0.8513647244346809, iteration: 23268
loss: 1.0113762617111206,grad_norm: 0.9999990369444866, iteration: 23269
loss: 1.0343060493469238,grad_norm: 0.9999996242782142, iteration: 23270
loss: 0.9985694885253906,grad_norm: 0.9999992693403363, iteration: 23271
loss: 1.0986111164093018,grad_norm: 0.9999997367537784, iteration: 23272
loss: 1.0003747940063477,grad_norm: 0.9513357461125906, iteration: 23273
loss: 1.0295723676681519,grad_norm: 0.9401309409581338, iteration: 23274
loss: 1.0009357929229736,grad_norm: 0.9242798994223542, iteration: 23275
loss: 0.9704764485359192,grad_norm: 0.9999991659444036, iteration: 23276
loss: 1.0333222150802612,grad_norm: 0.9999990805178436, iteration: 23277
loss: 1.0413240194320679,grad_norm: 0.9999991420605431, iteration: 23278
loss: 1.0543816089630127,grad_norm: 0.9999994568715738, iteration: 23279
loss: 1.007672667503357,grad_norm: 0.999999719767834, iteration: 23280
loss: 0.9732244610786438,grad_norm: 0.9511561318913017, iteration: 23281
loss: 1.0326542854309082,grad_norm: 0.9999999766336208, iteration: 23282
loss: 1.0228824615478516,grad_norm: 0.9999989758609421, iteration: 23283
loss: 1.0235689878463745,grad_norm: 0.9999990335939891, iteration: 23284
loss: 0.9877615571022034,grad_norm: 0.9999991114498595, iteration: 23285
loss: 1.0213847160339355,grad_norm: 0.9999992153432096, iteration: 23286
loss: 1.031386137008667,grad_norm: 0.9100952756627328, iteration: 23287
loss: 1.0028518438339233,grad_norm: 0.9906567860186865, iteration: 23288
loss: 1.037171721458435,grad_norm: 0.9999991421219541, iteration: 23289
loss: 0.9481006264686584,grad_norm: 0.9999992006535793, iteration: 23290
loss: 1.0506105422973633,grad_norm: 0.9999992007418986, iteration: 23291
loss: 0.936552882194519,grad_norm: 0.999999127670668, iteration: 23292
loss: 1.0605031251907349,grad_norm: 0.9999994550389721, iteration: 23293
loss: 0.9616687893867493,grad_norm: 0.9071487732769443, iteration: 23294
loss: 0.9778662323951721,grad_norm: 0.9999993078699476, iteration: 23295
loss: 1.0573642253875732,grad_norm: 0.9999995624604978, iteration: 23296
loss: 1.0158460140228271,grad_norm: 0.9999989456397446, iteration: 23297
loss: 1.032148003578186,grad_norm: 0.9364967368076335, iteration: 23298
loss: 1.0153346061706543,grad_norm: 0.999999322361369, iteration: 23299
loss: 1.0592758655548096,grad_norm: 0.9999993862701794, iteration: 23300
loss: 1.007189393043518,grad_norm: 0.9999993278956804, iteration: 23301
loss: 1.0251706838607788,grad_norm: 0.9999991282706593, iteration: 23302
loss: 1.0525964498519897,grad_norm: 0.999999614632927, iteration: 23303
loss: 1.0580109357833862,grad_norm: 0.9999997232690894, iteration: 23304
loss: 0.9978690147399902,grad_norm: 0.9999994127864072, iteration: 23305
loss: 1.0646958351135254,grad_norm: 0.9999997021184899, iteration: 23306
loss: 0.9968618750572205,grad_norm: 0.9870150947011379, iteration: 23307
loss: 1.0621118545532227,grad_norm: 0.9999992034059635, iteration: 23308
loss: 1.0140656232833862,grad_norm: 0.9999991728240363, iteration: 23309
loss: 1.0369402170181274,grad_norm: 0.9999993200094762, iteration: 23310
loss: 0.981592059135437,grad_norm: 0.9999993506091917, iteration: 23311
loss: 0.9757499098777771,grad_norm: 0.9999993388353827, iteration: 23312
loss: 1.0147150754928589,grad_norm: 0.9999994215071576, iteration: 23313
loss: 1.0543718338012695,grad_norm: 0.9999996716373156, iteration: 23314
loss: 0.9885187149047852,grad_norm: 0.999999370336452, iteration: 23315
loss: 1.021165132522583,grad_norm: 0.9999996502093428, iteration: 23316
loss: 0.9334839582443237,grad_norm: 0.9999996932610317, iteration: 23317
loss: 0.9877384901046753,grad_norm: 0.9999991341685138, iteration: 23318
loss: 0.99881511926651,grad_norm: 0.9999992175345783, iteration: 23319
loss: 1.0304265022277832,grad_norm: 0.9999989711328707, iteration: 23320
loss: 0.9927477240562439,grad_norm: 0.9999994267209548, iteration: 23321
loss: 1.0175288915634155,grad_norm: 0.9999993337673472, iteration: 23322
loss: 1.00706148147583,grad_norm: 0.999999299608199, iteration: 23323
loss: 0.9907392263412476,grad_norm: 0.9102787625646815, iteration: 23324
loss: 1.0244998931884766,grad_norm: 0.9999991141622577, iteration: 23325
loss: 1.0112762451171875,grad_norm: 0.9999990935832667, iteration: 23326
loss: 1.0614885091781616,grad_norm: 0.9999990839612155, iteration: 23327
loss: 1.0017768144607544,grad_norm: 0.9999994072902464, iteration: 23328
loss: 1.056363582611084,grad_norm: 0.9999996697946965, iteration: 23329
loss: 0.9790549278259277,grad_norm: 0.991211311031676, iteration: 23330
loss: 1.0164904594421387,grad_norm: 0.9170561474395389, iteration: 23331
loss: 1.0193856954574585,grad_norm: 0.9999992250292331, iteration: 23332
loss: 0.9910510778427124,grad_norm: 0.9999992580223948, iteration: 23333
loss: 1.0011378526687622,grad_norm: 0.9999992581895172, iteration: 23334
loss: 1.0261660814285278,grad_norm: 0.9999991705093499, iteration: 23335
loss: 1.0675543546676636,grad_norm: 0.9999996491667152, iteration: 23336
loss: 1.0259532928466797,grad_norm: 0.9999996125923415, iteration: 23337
loss: 1.0218088626861572,grad_norm: 0.9999991558573403, iteration: 23338
loss: 1.012550950050354,grad_norm: 0.9999991259089283, iteration: 23339
loss: 1.0586357116699219,grad_norm: 0.9999998838870071, iteration: 23340
loss: 1.0016298294067383,grad_norm: 0.9999991793889783, iteration: 23341
loss: 1.0047146081924438,grad_norm: 0.8889129374497617, iteration: 23342
loss: 0.9918931722640991,grad_norm: 0.9999990068417919, iteration: 23343
loss: 1.0224303007125854,grad_norm: 0.9999992944300077, iteration: 23344
loss: 1.076883316040039,grad_norm: 0.9999997871322625, iteration: 23345
loss: 1.0432686805725098,grad_norm: 0.9999991760233451, iteration: 23346
loss: 0.9785710573196411,grad_norm: 0.8636752883685762, iteration: 23347
loss: 0.9901911616325378,grad_norm: 0.9999992832320526, iteration: 23348
loss: 0.9862943291664124,grad_norm: 0.91556187312311, iteration: 23349
loss: 0.9782801866531372,grad_norm: 0.999999098276202, iteration: 23350
loss: 0.9775072336196899,grad_norm: 0.9643922780420106, iteration: 23351
loss: 1.067570686340332,grad_norm: 0.9999994357209684, iteration: 23352
loss: 0.9734258055686951,grad_norm: 0.9999990985444446, iteration: 23353
loss: 1.005612850189209,grad_norm: 0.9920996360568454, iteration: 23354
loss: 0.9971243143081665,grad_norm: 0.9396171210417613, iteration: 23355
loss: 1.0842797756195068,grad_norm: 0.9999995976556302, iteration: 23356
loss: 0.9981429576873779,grad_norm: 0.9999993477452225, iteration: 23357
loss: 1.0490890741348267,grad_norm: 0.9999997345027033, iteration: 23358
loss: 1.003905177116394,grad_norm: 0.9312268429052909, iteration: 23359
loss: 1.010922908782959,grad_norm: 0.999999281641129, iteration: 23360
loss: 0.9809587001800537,grad_norm: 0.9999990408857977, iteration: 23361
loss: 1.026761770248413,grad_norm: 0.999999605871225, iteration: 23362
loss: 0.9825050234794617,grad_norm: 0.9999993055159467, iteration: 23363
loss: 0.984933078289032,grad_norm: 0.99999899682216, iteration: 23364
loss: 1.005828619003296,grad_norm: 0.999999125053804, iteration: 23365
loss: 0.9897350668907166,grad_norm: 0.9999995279067239, iteration: 23366
loss: 1.1089164018630981,grad_norm: 0.9999997503230892, iteration: 23367
loss: 1.0071487426757812,grad_norm: 0.9999990980853389, iteration: 23368
loss: 0.9571051001548767,grad_norm: 0.9999991718387802, iteration: 23369
loss: 1.0160363912582397,grad_norm: 0.9999997841385084, iteration: 23370
loss: 0.9878296256065369,grad_norm: 0.9999997306234079, iteration: 23371
loss: 1.0277559757232666,grad_norm: 0.9999993668517693, iteration: 23372
loss: 1.0327144861221313,grad_norm: 0.9999990586909473, iteration: 23373
loss: 1.025459885597229,grad_norm: 0.88551965619933, iteration: 23374
loss: 1.0145399570465088,grad_norm: 0.9999991949052284, iteration: 23375
loss: 0.9994269609451294,grad_norm: 0.9999991022721321, iteration: 23376
loss: 1.0514202117919922,grad_norm: 0.9999991883864113, iteration: 23377
loss: 1.0490626096725464,grad_norm: 0.99999954835438, iteration: 23378
loss: 1.066043496131897,grad_norm: 0.9999997198446797, iteration: 23379
loss: 1.0536712408065796,grad_norm: 0.9999995813552118, iteration: 23380
loss: 1.0061699151992798,grad_norm: 0.9999989916735083, iteration: 23381
loss: 1.061448335647583,grad_norm: 0.999999522913481, iteration: 23382
loss: 1.010535478591919,grad_norm: 0.9999991688123191, iteration: 23383
loss: 0.9779167771339417,grad_norm: 0.9999991481171933, iteration: 23384
loss: 1.021298885345459,grad_norm: 0.9728553632046331, iteration: 23385
loss: 0.9761103391647339,grad_norm: 0.9999991027503402, iteration: 23386
loss: 1.0910786390304565,grad_norm: 0.9999996178796887, iteration: 23387
loss: 1.041279911994934,grad_norm: 0.999999627666284, iteration: 23388
loss: 0.9882002472877502,grad_norm: 0.8832130922033029, iteration: 23389
loss: 1.0249155759811401,grad_norm: 0.9999990470864449, iteration: 23390
loss: 1.0550018548965454,grad_norm: 0.9999998738921116, iteration: 23391
loss: 1.0404472351074219,grad_norm: 0.9999990395641458, iteration: 23392
loss: 1.0098017454147339,grad_norm: 0.9833103607463253, iteration: 23393
loss: 1.050608515739441,grad_norm: 0.9851292706834396, iteration: 23394
loss: 0.9705243110656738,grad_norm: 0.9999992210510485, iteration: 23395
loss: 1.0361292362213135,grad_norm: 0.9999990988999609, iteration: 23396
loss: 1.00327730178833,grad_norm: 0.9999991690733535, iteration: 23397
loss: 1.0600478649139404,grad_norm: 0.9999997856738833, iteration: 23398
loss: 1.0879042148590088,grad_norm: 0.9999995493081211, iteration: 23399
loss: 0.9884639382362366,grad_norm: 0.9772650675682863, iteration: 23400
loss: 1.003231167793274,grad_norm: 0.9630314588910497, iteration: 23401
loss: 1.0388433933258057,grad_norm: 0.9999994183779115, iteration: 23402
loss: 1.026034951210022,grad_norm: 0.9999992303295264, iteration: 23403
loss: 1.065159797668457,grad_norm: 0.9999999342577965, iteration: 23404
loss: 1.0127615928649902,grad_norm: 0.9999991050530931, iteration: 23405
loss: 1.0460824966430664,grad_norm: 0.9549291275937094, iteration: 23406
loss: 1.0388739109039307,grad_norm: 0.8158383026281827, iteration: 23407
loss: 1.0232017040252686,grad_norm: 0.9999992855209069, iteration: 23408
loss: 0.9787217378616333,grad_norm: 0.9999991112635251, iteration: 23409
loss: 1.0040513277053833,grad_norm: 0.9279770037417425, iteration: 23410
loss: 1.0226460695266724,grad_norm: 0.9727672588960505, iteration: 23411
loss: 1.0260206460952759,grad_norm: 0.9550603358286374, iteration: 23412
loss: 1.0532104969024658,grad_norm: 0.9999991715570975, iteration: 23413
loss: 1.039371132850647,grad_norm: 0.9999995746268214, iteration: 23414
loss: 1.02800452709198,grad_norm: 0.9999993082413645, iteration: 23415
loss: 0.9945585131645203,grad_norm: 0.9999991099200004, iteration: 23416
loss: 1.024065613746643,grad_norm: 0.8316096900755683, iteration: 23417
loss: 1.0176582336425781,grad_norm: 0.9999991517823509, iteration: 23418
loss: 0.9830159544944763,grad_norm: 0.9651718281781249, iteration: 23419
loss: 1.0939457416534424,grad_norm: 0.9999998342078524, iteration: 23420
loss: 0.9910711646080017,grad_norm: 0.9999992358374458, iteration: 23421
loss: 1.0035269260406494,grad_norm: 0.9999990679646118, iteration: 23422
loss: 1.0179178714752197,grad_norm: 0.9999992576069715, iteration: 23423
loss: 1.023560881614685,grad_norm: 0.9999996470490778, iteration: 23424
loss: 1.016422152519226,grad_norm: 0.8661188535752896, iteration: 23425
loss: 1.0446239709854126,grad_norm: 0.9999994660556287, iteration: 23426
loss: 1.0725390911102295,grad_norm: 0.9999994244634043, iteration: 23427
loss: 1.0503487586975098,grad_norm: 0.9999994712044598, iteration: 23428
loss: 0.9759145975112915,grad_norm: 0.9637120782407796, iteration: 23429
loss: 1.0554101467132568,grad_norm: 0.9999990861640953, iteration: 23430
loss: 1.0041143894195557,grad_norm: 0.9334734566243339, iteration: 23431
loss: 1.0274605751037598,grad_norm: 0.9095216653274161, iteration: 23432
loss: 0.986521303653717,grad_norm: 0.9999990981244241, iteration: 23433
loss: 0.9913097023963928,grad_norm: 0.9999989973858915, iteration: 23434
loss: 1.0200153589248657,grad_norm: 0.9737900078151831, iteration: 23435
loss: 0.9873818159103394,grad_norm: 0.9185892074754649, iteration: 23436
loss: 0.9908342361450195,grad_norm: 0.9533341740150105, iteration: 23437
loss: 1.0039068460464478,grad_norm: 0.9999990699887661, iteration: 23438
loss: 1.04140305519104,grad_norm: 0.9999997011542854, iteration: 23439
loss: 0.9846627712249756,grad_norm: 0.9999990835545088, iteration: 23440
loss: 1.0172988176345825,grad_norm: 0.9752925586141576, iteration: 23441
loss: 0.9904934167861938,grad_norm: 0.9999991340685617, iteration: 23442
loss: 0.9609276652336121,grad_norm: 0.9999992933773055, iteration: 23443
loss: 1.003383755683899,grad_norm: 0.9868512326781751, iteration: 23444
loss: 1.007379412651062,grad_norm: 0.9621109841687003, iteration: 23445
loss: 0.9911959171295166,grad_norm: 0.9999989923848749, iteration: 23446
loss: 0.9748077392578125,grad_norm: 0.9653170118396358, iteration: 23447
loss: 1.0225003957748413,grad_norm: 0.9999994646068624, iteration: 23448
loss: 1.0254335403442383,grad_norm: 0.9394550606526608, iteration: 23449
loss: 1.013959527015686,grad_norm: 0.9999991850522234, iteration: 23450
loss: 1.0084022283554077,grad_norm: 0.9999994719512213, iteration: 23451
loss: 1.0173147916793823,grad_norm: 0.9024972872584436, iteration: 23452
loss: 1.033982276916504,grad_norm: 0.9999991519383741, iteration: 23453
loss: 1.027103304862976,grad_norm: 0.9999993680509054, iteration: 23454
loss: 1.0302153825759888,grad_norm: 0.9999991926375832, iteration: 23455
loss: 1.0213196277618408,grad_norm: 0.9999993909130361, iteration: 23456
loss: 1.0769984722137451,grad_norm: 0.9999992734437477, iteration: 23457
loss: 1.0003920793533325,grad_norm: 0.9388616572459467, iteration: 23458
loss: 1.1462409496307373,grad_norm: 0.9999994162597835, iteration: 23459
loss: 1.040341854095459,grad_norm: 0.9999990956659774, iteration: 23460
loss: 1.0125017166137695,grad_norm: 0.9999994213520135, iteration: 23461
loss: 0.9879360795021057,grad_norm: 0.9266895750064085, iteration: 23462
loss: 1.0370454788208008,grad_norm: 0.9999990638792295, iteration: 23463
loss: 1.0046659708023071,grad_norm: 0.9999992752555116, iteration: 23464
loss: 1.068801999092102,grad_norm: 0.9999998420315053, iteration: 23465
loss: 1.042701005935669,grad_norm: 0.9999996969936831, iteration: 23466
loss: 0.9955593347549438,grad_norm: 0.9999990712458607, iteration: 23467
loss: 1.0293179750442505,grad_norm: 0.999999089555606, iteration: 23468
loss: 1.065143346786499,grad_norm: 0.9999998537897414, iteration: 23469
loss: 0.9955692887306213,grad_norm: 0.9786240492273364, iteration: 23470
loss: 0.9998065233230591,grad_norm: 0.9999993331698871, iteration: 23471
loss: 1.0444692373275757,grad_norm: 0.999999291350194, iteration: 23472
loss: 1.032583236694336,grad_norm: 0.9999993817148888, iteration: 23473
loss: 1.019875407218933,grad_norm: 0.9999991697061109, iteration: 23474
loss: 1.0044782161712646,grad_norm: 0.9999992578934018, iteration: 23475
loss: 1.0111901760101318,grad_norm: 0.9936115160176439, iteration: 23476
loss: 1.0229583978652954,grad_norm: 0.9999989955922608, iteration: 23477
loss: 0.9841411709785461,grad_norm: 0.9999991663340221, iteration: 23478
loss: 0.9649644494056702,grad_norm: 0.9999991632670704, iteration: 23479
loss: 1.0462043285369873,grad_norm: 0.9999994034883161, iteration: 23480
loss: 1.00863516330719,grad_norm: 0.9999991416106843, iteration: 23481
loss: 0.9868854880332947,grad_norm: 0.9364272660288766, iteration: 23482
loss: 1.0236682891845703,grad_norm: 0.9999993725475762, iteration: 23483
loss: 1.0912219285964966,grad_norm: 0.9999998553736438, iteration: 23484
loss: 1.0269715785980225,grad_norm: 0.9999991702017457, iteration: 23485
loss: 1.039974331855774,grad_norm: 0.9999997710127921, iteration: 23486
loss: 1.0238525867462158,grad_norm: 0.8371358614491906, iteration: 23487
loss: 0.9800386428833008,grad_norm: 0.8877579368384427, iteration: 23488
loss: 0.9974945187568665,grad_norm: 0.9999997396966728, iteration: 23489
loss: 0.998760998249054,grad_norm: 0.8450643063253197, iteration: 23490
loss: 1.0029696226119995,grad_norm: 0.8732701748381624, iteration: 23491
loss: 1.0125970840454102,grad_norm: 0.9999991598041857, iteration: 23492
loss: 1.001331090927124,grad_norm: 0.9999992208085974, iteration: 23493
loss: 1.0237925052642822,grad_norm: 0.9957240313911209, iteration: 23494
loss: 1.0118025541305542,grad_norm: 0.9999992176775044, iteration: 23495
loss: 1.1031877994537354,grad_norm: 0.9999991421203357, iteration: 23496
loss: 1.0168616771697998,grad_norm: 0.9999992941847949, iteration: 23497
loss: 0.9917905926704407,grad_norm: 0.9292794287497104, iteration: 23498
loss: 1.016628384590149,grad_norm: 0.9999993739055215, iteration: 23499
loss: 1.0302730798721313,grad_norm: 0.9999992159770751, iteration: 23500
loss: 1.0103535652160645,grad_norm: 0.9999992846867064, iteration: 23501
loss: 0.952406644821167,grad_norm: 0.9999991382530174, iteration: 23502
loss: 1.0338990688323975,grad_norm: 0.9999990795978209, iteration: 23503
loss: 0.9876244068145752,grad_norm: 0.9829113637072127, iteration: 23504
loss: 1.020020842552185,grad_norm: 0.999999137279482, iteration: 23505
loss: 0.9303010106086731,grad_norm: 0.8791669390172973, iteration: 23506
loss: 1.0016995668411255,grad_norm: 0.9999991419035817, iteration: 23507
loss: 0.9930651187896729,grad_norm: 0.9999991566541062, iteration: 23508
loss: 0.9518914818763733,grad_norm: 0.9999992869392447, iteration: 23509
loss: 1.0492033958435059,grad_norm: 0.9999993982383404, iteration: 23510
loss: 1.0356910228729248,grad_norm: 0.999999265034719, iteration: 23511
loss: 1.0211906433105469,grad_norm: 0.9999992254216294, iteration: 23512
loss: 1.0506765842437744,grad_norm: 0.9999998374740815, iteration: 23513
loss: 0.9703744649887085,grad_norm: 0.9999990838150077, iteration: 23514
loss: 1.0234375,grad_norm: 0.978986381557646, iteration: 23515
loss: 0.9998589158058167,grad_norm: 0.9999991140434241, iteration: 23516
loss: 1.027490258216858,grad_norm: 0.9999991402569048, iteration: 23517
loss: 1.0016865730285645,grad_norm: 0.999999135325797, iteration: 23518
loss: 0.9922026991844177,grad_norm: 0.9035194843603248, iteration: 23519
loss: 0.9894013404846191,grad_norm: 0.9999991185399599, iteration: 23520
loss: 1.0106312036514282,grad_norm: 0.8702715515276293, iteration: 23521
loss: 1.0731773376464844,grad_norm: 0.9999996028552365, iteration: 23522
loss: 1.004807710647583,grad_norm: 0.9999990052384204, iteration: 23523
loss: 1.00809645652771,grad_norm: 0.9999992719875052, iteration: 23524
loss: 1.0360209941864014,grad_norm: 0.9999990603249315, iteration: 23525
loss: 1.0045294761657715,grad_norm: 0.9999990892688585, iteration: 23526
loss: 0.9909677505493164,grad_norm: 0.999999071704936, iteration: 23527
loss: 1.1185709238052368,grad_norm: 0.9999999432837938, iteration: 23528
loss: 1.0307477712631226,grad_norm: 0.9760566389654824, iteration: 23529
loss: 1.0086885690689087,grad_norm: 0.999999044167866, iteration: 23530
loss: 1.0275076627731323,grad_norm: 0.9999991126030203, iteration: 23531
loss: 1.0103743076324463,grad_norm: 0.9999990779332407, iteration: 23532
loss: 0.9542565941810608,grad_norm: 0.999999184366366, iteration: 23533
loss: 0.9949890375137329,grad_norm: 0.9248861588847572, iteration: 23534
loss: 1.0522266626358032,grad_norm: 0.9999994784837842, iteration: 23535
loss: 1.0116102695465088,grad_norm: 0.9999992527112813, iteration: 23536
loss: 1.0395188331604004,grad_norm: 0.9999990792877147, iteration: 23537
loss: 1.049410343170166,grad_norm: 0.9999991591411092, iteration: 23538
loss: 1.025986671447754,grad_norm: 0.9999991033210591, iteration: 23539
loss: 1.0289101600646973,grad_norm: 0.9999991714020702, iteration: 23540
loss: 1.017633318901062,grad_norm: 0.999999142195257, iteration: 23541
loss: 1.033307433128357,grad_norm: 0.9854957516446071, iteration: 23542
loss: 1.0252466201782227,grad_norm: 0.9999989822630848, iteration: 23543
loss: 1.0128895044326782,grad_norm: 0.9999996481740693, iteration: 23544
loss: 1.0410559177398682,grad_norm: 0.9999990204512968, iteration: 23545
loss: 0.9884696006774902,grad_norm: 0.8724588583922828, iteration: 23546
loss: 0.9992309808731079,grad_norm: 0.9999991243120525, iteration: 23547
loss: 1.015371561050415,grad_norm: 0.9999991463758612, iteration: 23548
loss: 1.0745313167572021,grad_norm: 0.999999684814771, iteration: 23549
loss: 1.043750524520874,grad_norm: 0.9999995379866009, iteration: 23550
loss: 1.017205834388733,grad_norm: 0.9009686333413255, iteration: 23551
loss: 0.9968481659889221,grad_norm: 0.9999996539539876, iteration: 23552
loss: 1.0490561723709106,grad_norm: 0.947455298629246, iteration: 23553
loss: 1.0208361148834229,grad_norm: 0.9937183446130087, iteration: 23554
loss: 1.0471147298812866,grad_norm: 0.999999754710113, iteration: 23555
loss: 1.029287576675415,grad_norm: 0.922643616266724, iteration: 23556
loss: 1.0470138788223267,grad_norm: 0.9645535277831867, iteration: 23557
loss: 1.0736242532730103,grad_norm: 0.9999991679666639, iteration: 23558
loss: 0.9712831377983093,grad_norm: 0.9999995031154477, iteration: 23559
loss: 0.9808805584907532,grad_norm: 0.9999990243043364, iteration: 23560
loss: 1.0404276847839355,grad_norm: 0.9999993907428699, iteration: 23561
loss: 1.0070347785949707,grad_norm: 0.9717380980996163, iteration: 23562
loss: 1.0207058191299438,grad_norm: 0.8709857131604659, iteration: 23563
loss: 1.0006510019302368,grad_norm: 0.9999992574220398, iteration: 23564
loss: 1.0151242017745972,grad_norm: 0.9999994771246874, iteration: 23565
loss: 1.0334250926971436,grad_norm: 0.9999992479510779, iteration: 23566
loss: 1.06460440158844,grad_norm: 0.9999991426120572, iteration: 23567
loss: 1.0122474431991577,grad_norm: 0.9999991353595274, iteration: 23568
loss: 1.0449366569519043,grad_norm: 0.9999992950086305, iteration: 23569
loss: 1.0210171937942505,grad_norm: 0.9999992689829375, iteration: 23570
loss: 1.0414766073226929,grad_norm: 0.9999992765660217, iteration: 23571
loss: 1.0231364965438843,grad_norm: 0.9309533189883215, iteration: 23572
loss: 0.9782148599624634,grad_norm: 0.9195162733650243, iteration: 23573
loss: 0.9849333167076111,grad_norm: 0.999999214566185, iteration: 23574
loss: 1.0121283531188965,grad_norm: 0.9999993227946741, iteration: 23575
loss: 1.01444411277771,grad_norm: 0.9999992352387287, iteration: 23576
loss: 1.0248355865478516,grad_norm: 0.9999992871657889, iteration: 23577
loss: 1.0432950258255005,grad_norm: 0.9999994438965475, iteration: 23578
loss: 0.9900839924812317,grad_norm: 0.814935080772616, iteration: 23579
loss: 1.0464611053466797,grad_norm: 0.9999991657116928, iteration: 23580
loss: 1.0367780923843384,grad_norm: 0.9949427817625003, iteration: 23581
loss: 0.9959862232208252,grad_norm: 0.9118673309361854, iteration: 23582
loss: 0.9797493815422058,grad_norm: 0.9999991704494824, iteration: 23583
loss: 1.0287480354309082,grad_norm: 0.9164266858406843, iteration: 23584
loss: 1.0193177461624146,grad_norm: 0.8936612617356918, iteration: 23585
loss: 1.0327507257461548,grad_norm: 0.9999990643926708, iteration: 23586
loss: 1.0085937976837158,grad_norm: 0.9999991997091993, iteration: 23587
loss: 1.026124119758606,grad_norm: 0.8653520761490541, iteration: 23588
loss: 0.9690555930137634,grad_norm: 0.9890548247293172, iteration: 23589
loss: 1.0249398946762085,grad_norm: 0.9999994327054643, iteration: 23590
loss: 0.9926819205284119,grad_norm: 0.9999991742460357, iteration: 23591
loss: 1.0234061479568481,grad_norm: 0.9999992959034225, iteration: 23592
loss: 1.013572335243225,grad_norm: 0.9196137735755979, iteration: 23593
loss: 1.017581582069397,grad_norm: 0.9999992553140803, iteration: 23594
loss: 1.0043638944625854,grad_norm: 0.9693542207116161, iteration: 23595
loss: 0.9986679553985596,grad_norm: 0.9999991396184031, iteration: 23596
loss: 1.0051281452178955,grad_norm: 0.9091633953795424, iteration: 23597
loss: 0.9897396564483643,grad_norm: 0.98427465047143, iteration: 23598
loss: 1.0221052169799805,grad_norm: 0.9999991659271475, iteration: 23599
loss: 0.9778565764427185,grad_norm: 0.9999992308718532, iteration: 23600
loss: 1.0731899738311768,grad_norm: 0.9999993067811199, iteration: 23601
loss: 1.025204062461853,grad_norm: 0.9485179837468896, iteration: 23602
loss: 1.0335718393325806,grad_norm: 0.999999073165587, iteration: 23603
loss: 1.052962303161621,grad_norm: 0.9999998615863842, iteration: 23604
loss: 1.0683047771453857,grad_norm: 0.9999991266786024, iteration: 23605
loss: 0.9651209115982056,grad_norm: 0.999999397636191, iteration: 23606
loss: 0.9877786636352539,grad_norm: 0.9999989902075527, iteration: 23607
loss: 0.9965893626213074,grad_norm: 0.9129480075142993, iteration: 23608
loss: 1.0218093395233154,grad_norm: 0.8844382874448316, iteration: 23609
loss: 1.0582671165466309,grad_norm: 0.9392246072461985, iteration: 23610
loss: 1.007625699043274,grad_norm: 0.9831293754753812, iteration: 23611
loss: 1.0084792375564575,grad_norm: 0.9230129392748764, iteration: 23612
loss: 1.0070878267288208,grad_norm: 0.9999991703534495, iteration: 23613
loss: 1.0144774913787842,grad_norm: 0.9999990574057552, iteration: 23614
loss: 1.0152069330215454,grad_norm: 0.9999991061107825, iteration: 23615
loss: 1.0377473831176758,grad_norm: 0.9999994301875352, iteration: 23616
loss: 1.0268458127975464,grad_norm: 0.9999990919586442, iteration: 23617
loss: 0.9965909719467163,grad_norm: 0.9999993078049846, iteration: 23618
loss: 1.0410743951797485,grad_norm: 0.9999990008840213, iteration: 23619
loss: 1.0264078378677368,grad_norm: 0.9999992124151482, iteration: 23620
loss: 1.0455467700958252,grad_norm: 0.9999999118646196, iteration: 23621
loss: 1.0791504383087158,grad_norm: 0.9999996916228032, iteration: 23622
loss: 0.9853516221046448,grad_norm: 0.9236161066888796, iteration: 23623
loss: 1.0221234560012817,grad_norm: 0.9953199219141768, iteration: 23624
loss: 1.0180124044418335,grad_norm: 0.9999990550702569, iteration: 23625
loss: 0.9888308048248291,grad_norm: 0.9999992029447472, iteration: 23626
loss: 1.0861754417419434,grad_norm: 0.9999997542128485, iteration: 23627
loss: 1.030389428138733,grad_norm: 0.9999994047776006, iteration: 23628
loss: 1.0281906127929688,grad_norm: 0.9999992902096257, iteration: 23629
loss: 0.9822489023208618,grad_norm: 0.9999991008839371, iteration: 23630
loss: 1.0371932983398438,grad_norm: 0.9999991072980717, iteration: 23631
loss: 1.0413392782211304,grad_norm: 0.9172281787416391, iteration: 23632
loss: 1.0418884754180908,grad_norm: 0.9999992535542095, iteration: 23633
loss: 0.9749807119369507,grad_norm: 0.9935068120162635, iteration: 23634
loss: 1.0420794486999512,grad_norm: 0.9999995198227932, iteration: 23635
loss: 1.0052576065063477,grad_norm: 0.999999310774957, iteration: 23636
loss: 1.0264514684677124,grad_norm: 0.9999990572023536, iteration: 23637
loss: 1.0584218502044678,grad_norm: 0.9999994212641184, iteration: 23638
loss: 0.9981476068496704,grad_norm: 0.9564635211064253, iteration: 23639
loss: 1.041613221168518,grad_norm: 0.9999992472482423, iteration: 23640
loss: 1.0574463605880737,grad_norm: 0.9999994150094438, iteration: 23641
loss: 1.0287578105926514,grad_norm: 0.8662453917844228, iteration: 23642
loss: 1.0053943395614624,grad_norm: 0.9999992841384892, iteration: 23643
loss: 1.0238392353057861,grad_norm: 0.9999991505595909, iteration: 23644
loss: 1.011547327041626,grad_norm: 0.9999995991559038, iteration: 23645
loss: 1.0151540040969849,grad_norm: 0.9999994135908951, iteration: 23646
loss: 1.004542589187622,grad_norm: 0.9999990181960396, iteration: 23647
loss: 1.0066485404968262,grad_norm: 0.9910827990484636, iteration: 23648
loss: 1.0500091314315796,grad_norm: 0.9999997563656609, iteration: 23649
loss: 1.0373482704162598,grad_norm: 0.9999991179191783, iteration: 23650
loss: 1.0189428329467773,grad_norm: 0.9827216775358791, iteration: 23651
loss: 1.0392627716064453,grad_norm: 0.9999993853117455, iteration: 23652
loss: 1.040519118309021,grad_norm: 0.99999959101631, iteration: 23653
loss: 1.0156657695770264,grad_norm: 0.9452697878142052, iteration: 23654
loss: 1.0028231143951416,grad_norm: 0.9999990652786604, iteration: 23655
loss: 1.1085944175720215,grad_norm: 0.9999992984891716, iteration: 23656
loss: 0.981299102306366,grad_norm: 0.9999990996857684, iteration: 23657
loss: 1.0267807245254517,grad_norm: 0.8961626328462667, iteration: 23658
loss: 1.014135718345642,grad_norm: 0.9999991230800317, iteration: 23659
loss: 1.0315427780151367,grad_norm: 0.9999993081632848, iteration: 23660
loss: 1.0908547639846802,grad_norm: 0.999999899073248, iteration: 23661
loss: 0.9978001713752747,grad_norm: 0.9999992197694189, iteration: 23662
loss: 1.0634831190109253,grad_norm: 0.9999994006037579, iteration: 23663
loss: 1.0190943479537964,grad_norm: 0.9999989962392746, iteration: 23664
loss: 1.0276777744293213,grad_norm: 0.9999990835087678, iteration: 23665
loss: 1.0024702548980713,grad_norm: 0.9999993439515296, iteration: 23666
loss: 1.043379783630371,grad_norm: 0.9999993918073503, iteration: 23667
loss: 0.9583298563957214,grad_norm: 0.9909995994505146, iteration: 23668
loss: 1.003995656967163,grad_norm: 0.9999989865154988, iteration: 23669
loss: 0.9895135164260864,grad_norm: 0.9719491225475266, iteration: 23670
loss: 1.026983380317688,grad_norm: 0.9999991743167627, iteration: 23671
loss: 0.9593178629875183,grad_norm: 0.9883156106683432, iteration: 23672
loss: 1.015113353729248,grad_norm: 0.9999992953762219, iteration: 23673
loss: 1.0308915376663208,grad_norm: 0.9999991273634181, iteration: 23674
loss: 0.9873209595680237,grad_norm: 0.9474370388767459, iteration: 23675
loss: 1.0284236669540405,grad_norm: 0.9999998963185679, iteration: 23676
loss: 0.9992932081222534,grad_norm: 0.9999994575884394, iteration: 23677
loss: 1.0116785764694214,grad_norm: 0.999999384632275, iteration: 23678
loss: 1.0302554368972778,grad_norm: 0.9999990790301396, iteration: 23679
loss: 0.9898425936698914,grad_norm: 0.9999990836343144, iteration: 23680
loss: 1.0477062463760376,grad_norm: 0.9999996268157078, iteration: 23681
loss: 1.0108126401901245,grad_norm: 0.9999998294474467, iteration: 23682
loss: 0.9793910980224609,grad_norm: 0.9171272617862049, iteration: 23683
loss: 0.9995424747467041,grad_norm: 0.9868932421675645, iteration: 23684
loss: 1.0101267099380493,grad_norm: 0.9999990809524127, iteration: 23685
loss: 1.0172362327575684,grad_norm: 0.9760659071347617, iteration: 23686
loss: 1.0913461446762085,grad_norm: 0.9999997816536422, iteration: 23687
loss: 1.0402748584747314,grad_norm: 0.9999990809744538, iteration: 23688
loss: 1.0000474452972412,grad_norm: 0.999999682308667, iteration: 23689
loss: 1.0052080154418945,grad_norm: 0.9999994840912327, iteration: 23690
loss: 1.033649206161499,grad_norm: 0.9999992456470979, iteration: 23691
loss: 0.9825639128684998,grad_norm: 0.9999997329130128, iteration: 23692
loss: 1.033198356628418,grad_norm: 0.9999993490986979, iteration: 23693
loss: 1.0088927745819092,grad_norm: 0.932482013856987, iteration: 23694
loss: 1.0416918992996216,grad_norm: 0.9999991985331141, iteration: 23695
loss: 1.0356009006500244,grad_norm: 0.9999992080084831, iteration: 23696
loss: 1.054763913154602,grad_norm: 0.8758080068719506, iteration: 23697
loss: 1.011838436126709,grad_norm: 0.9999991688746618, iteration: 23698
loss: 1.0019032955169678,grad_norm: 0.9999997841135434, iteration: 23699
loss: 1.0656086206436157,grad_norm: 0.911767600237807, iteration: 23700
loss: 1.0462316274642944,grad_norm: 0.9999992312446794, iteration: 23701
loss: 0.9882583618164062,grad_norm: 0.9999991248156747, iteration: 23702
loss: 0.9918087124824524,grad_norm: 0.929937378229567, iteration: 23703
loss: 0.9765562415122986,grad_norm: 0.9999990702800822, iteration: 23704
loss: 1.0467365980148315,grad_norm: 0.993937708898929, iteration: 23705
loss: 0.980904757976532,grad_norm: 0.8761398948451151, iteration: 23706
loss: 1.0359505414962769,grad_norm: 0.9999991516338023, iteration: 23707
loss: 1.0176445245742798,grad_norm: 0.999999206137676, iteration: 23708
loss: 0.9956904053688049,grad_norm: 0.9999991286306749, iteration: 23709
loss: 1.0816922187805176,grad_norm: 0.9999992439361528, iteration: 23710
loss: 1.0116790533065796,grad_norm: 0.9999997189830055, iteration: 23711
loss: 1.0087275505065918,grad_norm: 0.9999993787797262, iteration: 23712
loss: 1.011083960533142,grad_norm: 0.9999991252539283, iteration: 23713
loss: 0.9993513226509094,grad_norm: 0.9999997020918003, iteration: 23714
loss: 1.0247796773910522,grad_norm: 0.9999991494494533, iteration: 23715
loss: 1.015541911125183,grad_norm: 0.9999990683646001, iteration: 23716
loss: 1.047001600265503,grad_norm: 0.9999997053403854, iteration: 23717
loss: 1.015625238418579,grad_norm: 0.9999992397508792, iteration: 23718
loss: 0.9869242906570435,grad_norm: 0.9999993596558218, iteration: 23719
loss: 1.1196610927581787,grad_norm: 0.9999995750043547, iteration: 23720
loss: 1.0008183717727661,grad_norm: 0.9999994061339011, iteration: 23721
loss: 1.0116071701049805,grad_norm: 0.9665018846341729, iteration: 23722
loss: 1.0246332883834839,grad_norm: 0.8658155398963161, iteration: 23723
loss: 0.9750365018844604,grad_norm: 0.9999990499164774, iteration: 23724
loss: 1.0492650270462036,grad_norm: 0.925015102230135, iteration: 23725
loss: 1.0642529726028442,grad_norm: 0.9999990693357016, iteration: 23726
loss: 1.0028687715530396,grad_norm: 0.9999993794909596, iteration: 23727
loss: 0.9662891030311584,grad_norm: 0.9999990808564531, iteration: 23728
loss: 1.0091346502304077,grad_norm: 0.999999066184819, iteration: 23729
loss: 1.0746320486068726,grad_norm: 0.9999996259830035, iteration: 23730
loss: 1.0780037641525269,grad_norm: 0.9999991984710384, iteration: 23731
loss: 0.9965835809707642,grad_norm: 0.9844284168055765, iteration: 23732
loss: 0.9889687299728394,grad_norm: 0.9999993436316909, iteration: 23733
loss: 1.0185445547103882,grad_norm: 0.9631795592372918, iteration: 23734
loss: 1.0366921424865723,grad_norm: 0.9999990169439549, iteration: 23735
loss: 1.0308729410171509,grad_norm: 0.8937107865951242, iteration: 23736
loss: 1.0583592653274536,grad_norm: 0.9999997237569546, iteration: 23737
loss: 1.0252100229263306,grad_norm: 0.9999996927293462, iteration: 23738
loss: 1.0345405340194702,grad_norm: 0.9462742477280622, iteration: 23739
loss: 1.0267231464385986,grad_norm: 0.9999991360489007, iteration: 23740
loss: 0.9900826215744019,grad_norm: 0.9999990527819466, iteration: 23741
loss: 1.0040346384048462,grad_norm: 0.982120596404683, iteration: 23742
loss: 1.0104771852493286,grad_norm: 0.9999992495087354, iteration: 23743
loss: 1.0209296941757202,grad_norm: 0.9999989638782606, iteration: 23744
loss: 0.9843448400497437,grad_norm: 0.999999541112829, iteration: 23745
loss: 1.014114499092102,grad_norm: 0.9999991429604512, iteration: 23746
loss: 0.9799217581748962,grad_norm: 0.9999991081834692, iteration: 23747
loss: 0.9978267550468445,grad_norm: 0.8747305065270016, iteration: 23748
loss: 1.0399088859558105,grad_norm: 0.9349157205280487, iteration: 23749
loss: 1.08208429813385,grad_norm: 0.999999622883336, iteration: 23750
loss: 1.0281778573989868,grad_norm: 0.884599639119753, iteration: 23751
loss: 1.0057873725891113,grad_norm: 0.9999994754514887, iteration: 23752
loss: 1.0415760278701782,grad_norm: 0.9999992048888728, iteration: 23753
loss: 1.0429459810256958,grad_norm: 0.9999990418212114, iteration: 23754
loss: 0.9644830822944641,grad_norm: 0.9340271355034245, iteration: 23755
loss: 1.0273857116699219,grad_norm: 0.9370045117795135, iteration: 23756
loss: 0.9964972138404846,grad_norm: 0.9999996098248459, iteration: 23757
loss: 0.9969590306282043,grad_norm: 0.9999991909402536, iteration: 23758
loss: 1.0404144525527954,grad_norm: 0.9999992148835635, iteration: 23759
loss: 1.0141189098358154,grad_norm: 0.9182691727873987, iteration: 23760
loss: 0.9701103568077087,grad_norm: 0.9999993273372934, iteration: 23761
loss: 1.0278613567352295,grad_norm: 0.9999992517412696, iteration: 23762
loss: 0.9918424487113953,grad_norm: 0.9999990873756865, iteration: 23763
loss: 0.9873584508895874,grad_norm: 0.9999990748923927, iteration: 23764
loss: 1.017045021057129,grad_norm: 0.8812057556354327, iteration: 23765
loss: 1.0031594038009644,grad_norm: 0.9999990513452804, iteration: 23766
loss: 1.00026535987854,grad_norm: 0.9105198431994991, iteration: 23767
loss: 1.0046242475509644,grad_norm: 0.9999991062326095, iteration: 23768
loss: 1.0326074361801147,grad_norm: 0.9999994817792208, iteration: 23769
loss: 1.0161769390106201,grad_norm: 0.9999992053759972, iteration: 23770
loss: 1.001927375793457,grad_norm: 0.9197023811168586, iteration: 23771
loss: 1.0335098505020142,grad_norm: 0.9999992492666744, iteration: 23772
loss: 1.0219610929489136,grad_norm: 0.9999990914390204, iteration: 23773
loss: 1.020378589630127,grad_norm: 0.999999733342316, iteration: 23774
loss: 1.0351858139038086,grad_norm: 0.9999995691440551, iteration: 23775
loss: 1.0290367603302002,grad_norm: 0.9029045333860257, iteration: 23776
loss: 1.0770736932754517,grad_norm: 0.9999994580307923, iteration: 23777
loss: 0.97190260887146,grad_norm: 0.9609987815443427, iteration: 23778
loss: 1.0283923149108887,grad_norm: 0.8999840280782596, iteration: 23779
loss: 1.0445842742919922,grad_norm: 0.9999998481222038, iteration: 23780
loss: 0.9924981594085693,grad_norm: 0.9979166803348096, iteration: 23781
loss: 0.985701322555542,grad_norm: 0.999999042424877, iteration: 23782
loss: 1.0959762334823608,grad_norm: 0.9999992308314215, iteration: 23783
loss: 1.003735065460205,grad_norm: 0.999999166113195, iteration: 23784
loss: 1.03263521194458,grad_norm: 0.9999995387908933, iteration: 23785
loss: 1.0069314241409302,grad_norm: 0.9999994194134909, iteration: 23786
loss: 1.0688095092773438,grad_norm: 0.9999991928872215, iteration: 23787
loss: 0.9878547787666321,grad_norm: 0.9999989752796518, iteration: 23788
loss: 1.0293906927108765,grad_norm: 0.999999267496699, iteration: 23789
loss: 1.0939174890518188,grad_norm: 0.999999822985361, iteration: 23790
loss: 1.0144275426864624,grad_norm: 0.9999991256340742, iteration: 23791
loss: 1.0310689210891724,grad_norm: 0.8524378913200406, iteration: 23792
loss: 1.033684492111206,grad_norm: 0.9999993054829435, iteration: 23793
loss: 0.9838595986366272,grad_norm: 0.9999992099765014, iteration: 23794
loss: 0.9899232387542725,grad_norm: 0.8999750907165565, iteration: 23795
loss: 1.041256070137024,grad_norm: 0.9999996977267159, iteration: 23796
loss: 1.108169436454773,grad_norm: 0.9999999230778254, iteration: 23797
loss: 1.0231398344039917,grad_norm: 0.9999992163392798, iteration: 23798
loss: 1.324828028678894,grad_norm: 0.9999998102827473, iteration: 23799
loss: 1.0262069702148438,grad_norm: 0.9999992051097268, iteration: 23800
loss: 1.0520105361938477,grad_norm: 0.9822800188257729, iteration: 23801
loss: 1.0007431507110596,grad_norm: 0.9999993471412165, iteration: 23802
loss: 1.051599383354187,grad_norm: 0.9999992885568395, iteration: 23803
loss: 1.0344682931900024,grad_norm: 0.9999991921044075, iteration: 23804
loss: 1.0089961290359497,grad_norm: 0.9999991972008644, iteration: 23805
loss: 0.9807183146476746,grad_norm: 0.9999992337016282, iteration: 23806
loss: 1.0316184759140015,grad_norm: 0.999999163923966, iteration: 23807
loss: 1.0174862146377563,grad_norm: 0.9999994607917342, iteration: 23808
loss: 0.9699122309684753,grad_norm: 0.9999991610777419, iteration: 23809
loss: 1.040723443031311,grad_norm: 0.9999992795090126, iteration: 23810
loss: 1.0091993808746338,grad_norm: 0.9832690689228051, iteration: 23811
loss: 1.0302751064300537,grad_norm: 0.9999990634256978, iteration: 23812
loss: 1.0232172012329102,grad_norm: 0.9095423943126514, iteration: 23813
loss: 1.00425386428833,grad_norm: 0.9999991064086088, iteration: 23814
loss: 1.004257082939148,grad_norm: 0.9999991371596146, iteration: 23815
loss: 0.9961093664169312,grad_norm: 0.9559353701826223, iteration: 23816
loss: 0.9909014105796814,grad_norm: 0.9999990909678286, iteration: 23817
loss: 1.0026955604553223,grad_norm: 0.9199728300902034, iteration: 23818
loss: 1.0048052072525024,grad_norm: 0.8509680695559022, iteration: 23819
loss: 1.009473443031311,grad_norm: 0.9999994044034684, iteration: 23820
loss: 1.0102174282073975,grad_norm: 0.9999990741431879, iteration: 23821
loss: 0.99421626329422,grad_norm: 0.973889448511021, iteration: 23822
loss: 0.9688603281974792,grad_norm: 0.9999996762353773, iteration: 23823
loss: 1.057222843170166,grad_norm: 0.9999991409174466, iteration: 23824
loss: 0.9912049770355225,grad_norm: 0.909816119604865, iteration: 23825
loss: 1.001475214958191,grad_norm: 0.9999992154422005, iteration: 23826
loss: 0.9962340593338013,grad_norm: 0.9999992265267574, iteration: 23827
loss: 1.0272045135498047,grad_norm: 0.9165843654525794, iteration: 23828
loss: 1.0218281745910645,grad_norm: 0.9999993589657626, iteration: 23829
loss: 1.015920639038086,grad_norm: 0.9999992198335331, iteration: 23830
loss: 0.9897165894508362,grad_norm: 0.9172019531953637, iteration: 23831
loss: 1.0050724744796753,grad_norm: 0.9999991269922379, iteration: 23832
loss: 1.1245733499526978,grad_norm: 0.9999996435449501, iteration: 23833
loss: 1.023058295249939,grad_norm: 0.9999990840823872, iteration: 23834
loss: 1.03702712059021,grad_norm: 0.9402749224160369, iteration: 23835
loss: 1.027745008468628,grad_norm: 0.9999993654206382, iteration: 23836
loss: 0.9744131565093994,grad_norm: 0.9999999526525223, iteration: 23837
loss: 1.03499174118042,grad_norm: 0.904589175282735, iteration: 23838
loss: 1.0339456796646118,grad_norm: 0.999999155806433, iteration: 23839
loss: 0.975686252117157,grad_norm: 0.9999992953894292, iteration: 23840
loss: 1.0214110612869263,grad_norm: 0.9999991566581284, iteration: 23841
loss: 1.0246366262435913,grad_norm: 0.9999989886933184, iteration: 23842
loss: 1.0237555503845215,grad_norm: 0.9999990624784836, iteration: 23843
loss: 1.0260347127914429,grad_norm: 0.9199495580840101, iteration: 23844
loss: 1.060683250427246,grad_norm: 0.9999993940775925, iteration: 23845
loss: 1.0621188879013062,grad_norm: 0.9999995342207225, iteration: 23846
loss: 0.9900761842727661,grad_norm: 0.9999991775525001, iteration: 23847
loss: 1.1063095331192017,grad_norm: 0.9999997803070998, iteration: 23848
loss: 1.0309871435165405,grad_norm: 0.9999997099246831, iteration: 23849
loss: 1.0198460817337036,grad_norm: 0.9999991991249068, iteration: 23850
loss: 1.0162423849105835,grad_norm: 0.8845884098848044, iteration: 23851
loss: 0.9824452996253967,grad_norm: 0.9999990130257512, iteration: 23852
loss: 1.0020307302474976,grad_norm: 0.9209151694283721, iteration: 23853
loss: 0.9943305850028992,grad_norm: 0.9575144209277617, iteration: 23854
loss: 1.0115453004837036,grad_norm: 0.9999993509834639, iteration: 23855
loss: 1.0319520235061646,grad_norm: 0.9999994389615028, iteration: 23856
loss: 1.0070487260818481,grad_norm: 0.8959840265045056, iteration: 23857
loss: 0.9991520047187805,grad_norm: 0.9501647295169706, iteration: 23858
loss: 1.0145046710968018,grad_norm: 0.9999990569625121, iteration: 23859
loss: 1.0427418947219849,grad_norm: 0.9999991397757252, iteration: 23860
loss: 1.022025465965271,grad_norm: 0.9999991769733152, iteration: 23861
loss: 1.0212335586547852,grad_norm: 0.9342480397559864, iteration: 23862
loss: 1.0067211389541626,grad_norm: 0.8438953702541907, iteration: 23863
loss: 1.015493392944336,grad_norm: 0.8597158623798472, iteration: 23864
loss: 0.9889903664588928,grad_norm: 0.9999994163153143, iteration: 23865
loss: 1.044906735420227,grad_norm: 0.9999994244647532, iteration: 23866
loss: 1.021530032157898,grad_norm: 0.9999992443030836, iteration: 23867
loss: 1.012848138809204,grad_norm: 0.9999991539775304, iteration: 23868
loss: 1.0039361715316772,grad_norm: 0.999999495673371, iteration: 23869
loss: 1.0051559209823608,grad_norm: 0.9999991841217621, iteration: 23870
loss: 1.0058153867721558,grad_norm: 0.9999992568110379, iteration: 23871
loss: 1.0260733366012573,grad_norm: 0.9999992076865527, iteration: 23872
loss: 0.9622618556022644,grad_norm: 0.9443267777617352, iteration: 23873
loss: 1.0858221054077148,grad_norm: 0.9999992722039107, iteration: 23874
loss: 0.9829022288322449,grad_norm: 0.9999990798752829, iteration: 23875
loss: 0.991428017616272,grad_norm: 0.9999991641937847, iteration: 23876
loss: 1.0350868701934814,grad_norm: 0.9999990450721249, iteration: 23877
loss: 1.0406240224838257,grad_norm: 0.9994087790658253, iteration: 23878
loss: 0.98848557472229,grad_norm: 0.9999989902888408, iteration: 23879
loss: 1.0124342441558838,grad_norm: 0.999999122585605, iteration: 23880
loss: 1.0443546772003174,grad_norm: 0.9999992562509465, iteration: 23881
loss: 1.05014169216156,grad_norm: 0.9999998593462829, iteration: 23882
loss: 1.03507399559021,grad_norm: 0.9999990801091785, iteration: 23883
loss: 1.0036505460739136,grad_norm: 0.9174564373222918, iteration: 23884
loss: 1.0206180810928345,grad_norm: 0.872085538394766, iteration: 23885
loss: 1.0700256824493408,grad_norm: 0.8989477676905144, iteration: 23886
loss: 1.0148980617523193,grad_norm: 0.9999989829628765, iteration: 23887
loss: 1.0125187635421753,grad_norm: 0.8912693080914926, iteration: 23888
loss: 0.9911948442459106,grad_norm: 0.9999991607832929, iteration: 23889
loss: 1.020826816558838,grad_norm: 0.9999992035257067, iteration: 23890
loss: 0.9976462125778198,grad_norm: 0.9999991079961353, iteration: 23891
loss: 1.0134732723236084,grad_norm: 0.9999990306510848, iteration: 23892
loss: 1.0210597515106201,grad_norm: 0.9999990611081341, iteration: 23893
loss: 0.9945589900016785,grad_norm: 0.9999990935658476, iteration: 23894
loss: 1.0636671781539917,grad_norm: 0.8785653279507825, iteration: 23895
loss: 1.0031490325927734,grad_norm: 0.9999993607177692, iteration: 23896
loss: 1.0248786211013794,grad_norm: 0.9890055179249456, iteration: 23897
loss: 1.0023446083068848,grad_norm: 0.9999991711622483, iteration: 23898
loss: 1.039542555809021,grad_norm: 0.9986138135034596, iteration: 23899
loss: 0.9915971159934998,grad_norm: 0.9999991750282744, iteration: 23900
loss: 0.9944002032279968,grad_norm: 0.9999993228433889, iteration: 23901
loss: 1.055299997329712,grad_norm: 0.9165506660905817, iteration: 23902
loss: 1.0385916233062744,grad_norm: 0.9999991609009102, iteration: 23903
loss: 1.0020742416381836,grad_norm: 0.9736995375835247, iteration: 23904
loss: 0.9676724672317505,grad_norm: 0.9999992553465189, iteration: 23905
loss: 0.9977860450744629,grad_norm: 0.9399950329908711, iteration: 23906
loss: 1.0551255941390991,grad_norm: 0.9999991028691121, iteration: 23907
loss: 0.972419023513794,grad_norm: 0.999999183515927, iteration: 23908
loss: 0.9903346300125122,grad_norm: 0.9999990121968333, iteration: 23909
loss: 1.0103871822357178,grad_norm: 0.9999994667615935, iteration: 23910
loss: 1.0172611474990845,grad_norm: 0.9999998342058636, iteration: 23911
loss: 1.0339902639389038,grad_norm: 0.9999990912760548, iteration: 23912
loss: 1.031801700592041,grad_norm: 0.9392467470353117, iteration: 23913
loss: 1.00021493434906,grad_norm: 0.9999991125709081, iteration: 23914
loss: 1.0227895975112915,grad_norm: 0.9999991224289122, iteration: 23915
loss: 1.040759563446045,grad_norm: 0.9999990444896945, iteration: 23916
loss: 0.9999760985374451,grad_norm: 0.9999990401609299, iteration: 23917
loss: 1.0192373991012573,grad_norm: 0.983053492606596, iteration: 23918
loss: 1.0496872663497925,grad_norm: 0.9999998011841956, iteration: 23919
loss: 1.0359774827957153,grad_norm: 0.9411243824729477, iteration: 23920
loss: 1.0615525245666504,grad_norm: 0.9999994627879574, iteration: 23921
loss: 1.0143249034881592,grad_norm: 0.9999990319906806, iteration: 23922
loss: 1.0199558734893799,grad_norm: 0.9999993064281447, iteration: 23923
loss: 1.0134689807891846,grad_norm: 0.99999929416388, iteration: 23924
loss: 0.9995337724685669,grad_norm: 0.8993372051740924, iteration: 23925
loss: 0.978803277015686,grad_norm: 0.9999991497663621, iteration: 23926
loss: 0.9633050560951233,grad_norm: 0.9999992047146633, iteration: 23927
loss: 1.0171526670455933,grad_norm: 0.9999990898280509, iteration: 23928
loss: 0.9959298372268677,grad_norm: 0.9999993822633237, iteration: 23929
loss: 1.0224155187606812,grad_norm: 0.9999995186743941, iteration: 23930
loss: 1.0998259782791138,grad_norm: 0.9999991944619264, iteration: 23931
loss: 1.0406310558319092,grad_norm: 0.9350664116267928, iteration: 23932
loss: 1.019416093826294,grad_norm: 0.9114430128776753, iteration: 23933
loss: 1.028761625289917,grad_norm: 0.9999992960518764, iteration: 23934
loss: 0.9916211366653442,grad_norm: 0.9647343377869948, iteration: 23935
loss: 1.0252225399017334,grad_norm: 0.9999990534974518, iteration: 23936
loss: 1.0238587856292725,grad_norm: 0.9999992607595913, iteration: 23937
loss: 1.0386669635772705,grad_norm: 0.9999995206735965, iteration: 23938
loss: 1.0023921728134155,grad_norm: 0.9999991122578469, iteration: 23939
loss: 1.0325615406036377,grad_norm: 0.9999999276214605, iteration: 23940
loss: 1.0369490385055542,grad_norm: 0.9999991038452465, iteration: 23941
loss: 1.010259985923767,grad_norm: 0.9999994559391844, iteration: 23942
loss: 1.0043672323226929,grad_norm: 0.9638798966924794, iteration: 23943
loss: 0.9986181259155273,grad_norm: 0.9999990398721567, iteration: 23944
loss: 1.0166794061660767,grad_norm: 0.9999990092360757, iteration: 23945
loss: 1.0032958984375,grad_norm: 0.9999992265303417, iteration: 23946
loss: 0.9994948506355286,grad_norm: 0.9187536808535065, iteration: 23947
loss: 1.0015734434127808,grad_norm: 0.9999991238689862, iteration: 23948
loss: 1.0213061571121216,grad_norm: 0.9999993343847596, iteration: 23949
loss: 1.0336096286773682,grad_norm: 0.9999992210669558, iteration: 23950
loss: 0.9856650233268738,grad_norm: 0.9876753412140815, iteration: 23951
loss: 0.9939588308334351,grad_norm: 0.9999991044583569, iteration: 23952
loss: 1.0142700672149658,grad_norm: 0.8843272682062668, iteration: 23953
loss: 1.013085961341858,grad_norm: 0.9999990932152077, iteration: 23954
loss: 0.9743398427963257,grad_norm: 0.991757385310567, iteration: 23955
loss: 1.0837044715881348,grad_norm: 0.9999999042061113, iteration: 23956
loss: 0.9490752816200256,grad_norm: 0.9999990635566143, iteration: 23957
loss: 1.0306307077407837,grad_norm: 0.9999992415925064, iteration: 23958
loss: 1.076515793800354,grad_norm: 0.9999992088618096, iteration: 23959
loss: 1.022327184677124,grad_norm: 0.9962634231637582, iteration: 23960
loss: 1.0035122632980347,grad_norm: 0.9926073409189767, iteration: 23961
loss: 1.0038120746612549,grad_norm: 0.9999992125777022, iteration: 23962
loss: 1.007843017578125,grad_norm: 0.9999990672810142, iteration: 23963
loss: 0.9742937088012695,grad_norm: 0.9999990308466179, iteration: 23964
loss: 0.9760712385177612,grad_norm: 0.9999991736870245, iteration: 23965
loss: 1.0511654615402222,grad_norm: 0.999999274103114, iteration: 23966
loss: 1.010595679283142,grad_norm: 0.9999990050771363, iteration: 23967
loss: 1.0308763980865479,grad_norm: 0.9999996936621822, iteration: 23968
loss: 1.0183887481689453,grad_norm: 0.9999991201996997, iteration: 23969
loss: 1.0222785472869873,grad_norm: 0.9999990224066448, iteration: 23970
loss: 1.047855019569397,grad_norm: 0.9999995433053808, iteration: 23971
loss: 1.0098109245300293,grad_norm: 0.9864167838803773, iteration: 23972
loss: 1.005378246307373,grad_norm: 0.8587381727152833, iteration: 23973
loss: 1.0583240985870361,grad_norm: 0.999999924176517, iteration: 23974
loss: 1.0433166027069092,grad_norm: 0.9999992923906613, iteration: 23975
loss: 1.0003232955932617,grad_norm: 0.9999990662630591, iteration: 23976
loss: 1.0454113483428955,grad_norm: 0.9999990558502886, iteration: 23977
loss: 1.0181993246078491,grad_norm: 0.8713389084471616, iteration: 23978
loss: 1.0043015480041504,grad_norm: 0.9989872477799691, iteration: 23979
loss: 1.0055421590805054,grad_norm: 0.9999990098493006, iteration: 23980
loss: 1.0012834072113037,grad_norm: 0.9999993043049654, iteration: 23981
loss: 1.1051161289215088,grad_norm: 0.999999404735524, iteration: 23982
loss: 1.0000667572021484,grad_norm: 0.9475810698518167, iteration: 23983
loss: 1.0139795541763306,grad_norm: 0.9999993423703556, iteration: 23984
loss: 1.0409985780715942,grad_norm: 0.9999994481487473, iteration: 23985
loss: 1.0002822875976562,grad_norm: 0.9999989873350167, iteration: 23986
loss: 1.0024135112762451,grad_norm: 0.9601196078019506, iteration: 23987
loss: 0.9928542375564575,grad_norm: 0.911268748935276, iteration: 23988
loss: 1.0612882375717163,grad_norm: 0.999999600694515, iteration: 23989
loss: 1.0352983474731445,grad_norm: 0.9999991292787815, iteration: 23990
loss: 0.9965901970863342,grad_norm: 0.8401694548931427, iteration: 23991
loss: 1.010146975517273,grad_norm: 0.9999993590386574, iteration: 23992
loss: 1.0140916109085083,grad_norm: 0.9999991145460663, iteration: 23993
loss: 1.0517466068267822,grad_norm: 0.999999500191929, iteration: 23994
loss: 1.0112930536270142,grad_norm: 0.8113095205788481, iteration: 23995
loss: 1.0266598463058472,grad_norm: 0.9999990283979835, iteration: 23996
loss: 1.0392889976501465,grad_norm: 0.9892442494325054, iteration: 23997
loss: 1.0329930782318115,grad_norm: 0.9999993053143491, iteration: 23998
loss: 0.9753260612487793,grad_norm: 0.9999991085294399, iteration: 23999
loss: 1.0215448141098022,grad_norm: 0.9999992754428793, iteration: 24000
loss: 1.019710659980774,grad_norm: 0.9999992724713612, iteration: 24001
loss: 1.0113317966461182,grad_norm: 0.9372536169815394, iteration: 24002
loss: 0.9882872104644775,grad_norm: 0.9700848193887796, iteration: 24003
loss: 1.0557284355163574,grad_norm: 0.9999997226073011, iteration: 24004
loss: 1.0709768533706665,grad_norm: 0.9999991774673176, iteration: 24005
loss: 0.9979476928710938,grad_norm: 0.9999992645515934, iteration: 24006
loss: 1.0070037841796875,grad_norm: 0.9999990723492381, iteration: 24007
loss: 1.0167455673217773,grad_norm: 0.9999992072465281, iteration: 24008
loss: 0.9904083013534546,grad_norm: 0.9999994249052946, iteration: 24009
loss: 1.0135279893875122,grad_norm: 0.9981194569415739, iteration: 24010
loss: 1.0492446422576904,grad_norm: 0.9999994181871971, iteration: 24011
loss: 1.0372813940048218,grad_norm: 0.9507237948014797, iteration: 24012
loss: 1.0020179748535156,grad_norm: 0.9999994035748496, iteration: 24013
loss: 0.9796759486198425,grad_norm: 0.9999990668220541, iteration: 24014
loss: 0.982506513595581,grad_norm: 0.9999996287887587, iteration: 24015
loss: 1.02005136013031,grad_norm: 0.9440305761053129, iteration: 24016
loss: 1.0364584922790527,grad_norm: 0.9999991775868009, iteration: 24017
loss: 1.005432367324829,grad_norm: 0.994525022684325, iteration: 24018
loss: 0.9837357997894287,grad_norm: 0.999999146099069, iteration: 24019
loss: 1.0567936897277832,grad_norm: 0.9999995349673289, iteration: 24020
loss: 1.0084093809127808,grad_norm: 0.9999992526424634, iteration: 24021
loss: 0.9911412596702576,grad_norm: 0.9100585278105824, iteration: 24022
loss: 1.0338352918624878,grad_norm: 0.9999991602627105, iteration: 24023
loss: 1.0493422746658325,grad_norm: 0.9999995267658385, iteration: 24024
loss: 1.0291550159454346,grad_norm: 0.9999996381018312, iteration: 24025
loss: 1.0006239414215088,grad_norm: 0.99999906283352, iteration: 24026
loss: 1.0284819602966309,grad_norm: 0.9999990751436685, iteration: 24027
loss: 1.0167206525802612,grad_norm: 0.999999087047717, iteration: 24028
loss: 1.041828989982605,grad_norm: 0.9999993750894407, iteration: 24029
loss: 0.9539097547531128,grad_norm: 0.999999117491806, iteration: 24030
loss: 1.0699195861816406,grad_norm: 0.9999994547645158, iteration: 24031
loss: 1.1132694482803345,grad_norm: 0.9999994896012739, iteration: 24032
loss: 1.0273481607437134,grad_norm: 0.9999992482996573, iteration: 24033
loss: 1.0161858797073364,grad_norm: 0.9999993764113422, iteration: 24034
loss: 1.0126591920852661,grad_norm: 0.999999166048938, iteration: 24035
loss: 0.9953026175498962,grad_norm: 0.9999992720788049, iteration: 24036
loss: 1.0027631521224976,grad_norm: 0.9999992058205834, iteration: 24037
loss: 1.0046824216842651,grad_norm: 0.9999991058036455, iteration: 24038
loss: 1.0381566286087036,grad_norm: 0.9354137876618503, iteration: 24039
loss: 1.1146732568740845,grad_norm: 0.9999991021439407, iteration: 24040
loss: 1.0807194709777832,grad_norm: 0.9999998299656311, iteration: 24041
loss: 1.0688672065734863,grad_norm: 0.9999990441875415, iteration: 24042
loss: 0.961464524269104,grad_norm: 0.9999992684043276, iteration: 24043
loss: 1.0184333324432373,grad_norm: 0.9999991796598877, iteration: 24044
loss: 1.0088038444519043,grad_norm: 0.9999993691044161, iteration: 24045
loss: 0.9534497857093811,grad_norm: 0.9999991273971846, iteration: 24046
loss: 1.0016181468963623,grad_norm: 0.9014468147466593, iteration: 24047
loss: 1.0686627626419067,grad_norm: 0.8529755232377759, iteration: 24048
loss: 1.0300657749176025,grad_norm: 0.9999995299990102, iteration: 24049
loss: 1.0271247625350952,grad_norm: 0.9999993401683147, iteration: 24050
loss: 1.021402359008789,grad_norm: 0.9999994210679223, iteration: 24051
loss: 0.983121931552887,grad_norm: 0.9627452596641649, iteration: 24052
loss: 0.9559342265129089,grad_norm: 0.9999991442135043, iteration: 24053
loss: 1.0845774412155151,grad_norm: 0.9999995572065526, iteration: 24054
loss: 1.0277595520019531,grad_norm: 0.9999991521145429, iteration: 24055
loss: 1.0370415449142456,grad_norm: 0.9999990726529805, iteration: 24056
loss: 1.0021774768829346,grad_norm: 0.9999993512335636, iteration: 24057
loss: 1.016465663909912,grad_norm: 0.9999992693411034, iteration: 24058
loss: 1.0536370277404785,grad_norm: 0.9999994562778929, iteration: 24059
loss: 1.0408918857574463,grad_norm: 0.9355331156865303, iteration: 24060
loss: 1.0071289539337158,grad_norm: 0.9999994892082325, iteration: 24061
loss: 1.01389479637146,grad_norm: 0.9999989711860798, iteration: 24062
loss: 0.984715461730957,grad_norm: 0.999999264422474, iteration: 24063
loss: 1.043982744216919,grad_norm: 0.9999992798304947, iteration: 24064
loss: 1.0349533557891846,grad_norm: 0.9085421638915835, iteration: 24065
loss: 1.0391993522644043,grad_norm: 0.9999992430881501, iteration: 24066
loss: 1.0548185110092163,grad_norm: 0.9999993319315799, iteration: 24067
loss: 1.0061545372009277,grad_norm: 0.9999991698302434, iteration: 24068
loss: 1.0201207399368286,grad_norm: 0.9455665686333986, iteration: 24069
loss: 0.9951940178871155,grad_norm: 0.9999990697621604, iteration: 24070
loss: 1.0354851484298706,grad_norm: 0.9999990925822377, iteration: 24071
loss: 1.0071837902069092,grad_norm: 0.999999266676372, iteration: 24072
loss: 0.9851029515266418,grad_norm: 0.9999990488921131, iteration: 24073
loss: 1.0041418075561523,grad_norm: 0.9999993088870516, iteration: 24074
loss: 0.9741717576980591,grad_norm: 0.999999066920658, iteration: 24075
loss: 1.009398341178894,grad_norm: 0.9155949069232344, iteration: 24076
loss: 0.9713610410690308,grad_norm: 0.9999992091658233, iteration: 24077
loss: 1.0701216459274292,grad_norm: 0.9999996744531517, iteration: 24078
loss: 1.0163254737854004,grad_norm: 0.9999994009597124, iteration: 24079
loss: 1.0158213376998901,grad_norm: 0.9999993322475995, iteration: 24080
loss: 1.0316637754440308,grad_norm: 0.9999993657477321, iteration: 24081
loss: 1.023738145828247,grad_norm: 0.9999992583399703, iteration: 24082
loss: 0.9819306135177612,grad_norm: 0.9999989805998094, iteration: 24083
loss: 1.084573745727539,grad_norm: 0.9999998759909161, iteration: 24084
loss: 1.040663480758667,grad_norm: 0.9999990506463077, iteration: 24085
loss: 1.0194109678268433,grad_norm: 0.8614210611467741, iteration: 24086
loss: 1.0240811109542847,grad_norm: 0.9999992265466439, iteration: 24087
loss: 0.9655589461326599,grad_norm: 0.8963563389050113, iteration: 24088
loss: 0.9896174669265747,grad_norm: 0.9902687547645905, iteration: 24089
loss: 1.0043118000030518,grad_norm: 0.9999993953326367, iteration: 24090
loss: 1.0138657093048096,grad_norm: 0.9999990867819245, iteration: 24091
loss: 1.0093557834625244,grad_norm: 0.9363276146819508, iteration: 24092
loss: 0.9966742396354675,grad_norm: 0.8966633778497289, iteration: 24093
loss: 1.0123628377914429,grad_norm: 0.9999992969281268, iteration: 24094
loss: 1.0188294649124146,grad_norm: 0.9999991143082442, iteration: 24095
loss: 1.0286322832107544,grad_norm: 0.9999991748063958, iteration: 24096
loss: 1.0497815608978271,grad_norm: 0.9999989875578594, iteration: 24097
loss: 0.9987201690673828,grad_norm: 0.9716468679924717, iteration: 24098
loss: 0.9980751276016235,grad_norm: 0.9317077769945058, iteration: 24099
loss: 1.0285401344299316,grad_norm: 0.9999996649303527, iteration: 24100
loss: 0.9708319902420044,grad_norm: 0.9502550708300043, iteration: 24101
loss: 1.0184218883514404,grad_norm: 0.9999992268252142, iteration: 24102
loss: 1.0078520774841309,grad_norm: 0.9999991834492956, iteration: 24103
loss: 0.9687645435333252,grad_norm: 0.9999991376888194, iteration: 24104
loss: 0.9790247678756714,grad_norm: 0.9250200390306098, iteration: 24105
loss: 0.9833759665489197,grad_norm: 0.9246140297236297, iteration: 24106
loss: 1.0258768796920776,grad_norm: 0.9970491065161128, iteration: 24107
loss: 1.0357575416564941,grad_norm: 0.8797837029695742, iteration: 24108
loss: 1.0067923069000244,grad_norm: 0.9976068702233624, iteration: 24109
loss: 0.9845424890518188,grad_norm: 0.9625306621902188, iteration: 24110
loss: 0.961243748664856,grad_norm: 0.9999990233536847, iteration: 24111
loss: 1.0246785879135132,grad_norm: 0.9698545635970524, iteration: 24112
loss: 0.9933237433433533,grad_norm: 0.9999991622391647, iteration: 24113
loss: 1.0443614721298218,grad_norm: 0.9999993262321866, iteration: 24114
loss: 0.9862673878669739,grad_norm: 0.9780283794156337, iteration: 24115
loss: 1.008591651916504,grad_norm: 0.9999992124369793, iteration: 24116
loss: 1.0296021699905396,grad_norm: 0.9999992296467345, iteration: 24117
loss: 0.999293863773346,grad_norm: 0.9796705918323226, iteration: 24118
loss: 1.0118337869644165,grad_norm: 0.9999992882677086, iteration: 24119
loss: 1.0086451768875122,grad_norm: 0.9999990489899665, iteration: 24120
loss: 1.0105515718460083,grad_norm: 0.999999150783447, iteration: 24121
loss: 1.0195095539093018,grad_norm: 0.9999991623758385, iteration: 24122
loss: 1.0306801795959473,grad_norm: 0.9999990460918158, iteration: 24123
loss: 1.0698827505111694,grad_norm: 0.9810810679575588, iteration: 24124
loss: 0.9996879696846008,grad_norm: 0.966276525309179, iteration: 24125
loss: 1.0315518379211426,grad_norm: 0.9999993533820384, iteration: 24126
loss: 0.9958329796791077,grad_norm: 0.9999990276665527, iteration: 24127
loss: 1.0051398277282715,grad_norm: 0.9999992340878983, iteration: 24128
loss: 0.9437199831008911,grad_norm: 0.8728524960735322, iteration: 24129
loss: 1.0056291818618774,grad_norm: 0.999999037129824, iteration: 24130
loss: 1.005292296409607,grad_norm: 0.9999991517026021, iteration: 24131
loss: 0.9881998896598816,grad_norm: 0.9151711478429396, iteration: 24132
loss: 0.9606437087059021,grad_norm: 0.9999991520042553, iteration: 24133
loss: 1.0353528261184692,grad_norm: 0.9773338029843809, iteration: 24134
loss: 0.9881988763809204,grad_norm: 0.9999991069220975, iteration: 24135
loss: 0.9783004522323608,grad_norm: 0.9999990930269577, iteration: 24136
loss: 1.000040054321289,grad_norm: 0.8807073206584256, iteration: 24137
loss: 1.0052883625030518,grad_norm: 0.8387764635815133, iteration: 24138
loss: 1.017095685005188,grad_norm: 0.967537140853804, iteration: 24139
loss: 1.0920528173446655,grad_norm: 0.9999999268055815, iteration: 24140
loss: 1.0923079252243042,grad_norm: 0.9999996205363985, iteration: 24141
loss: 1.0301268100738525,grad_norm: 0.9999991439373499, iteration: 24142
loss: 0.9826562404632568,grad_norm: 0.9999991790968132, iteration: 24143
loss: 1.0338959693908691,grad_norm: 0.9999992251106816, iteration: 24144
loss: 1.0187195539474487,grad_norm: 0.9962202596462197, iteration: 24145
loss: 0.9869301915168762,grad_norm: 0.8901048134795632, iteration: 24146
loss: 0.9991855621337891,grad_norm: 0.9497735374102279, iteration: 24147
loss: 1.0017075538635254,grad_norm: 0.9999991782067628, iteration: 24148
loss: 0.9966161847114563,grad_norm: 0.999999263730883, iteration: 24149
loss: 1.0579886436462402,grad_norm: 0.999999246352192, iteration: 24150
loss: 1.0142985582351685,grad_norm: 0.9145792168882164, iteration: 24151
loss: 1.0645627975463867,grad_norm: 0.9999993953163014, iteration: 24152
loss: 0.9687387943267822,grad_norm: 0.9985154158346167, iteration: 24153
loss: 1.0079636573791504,grad_norm: 0.9733979758952435, iteration: 24154
loss: 1.001274585723877,grad_norm: 0.8946199707002959, iteration: 24155
loss: 1.031409502029419,grad_norm: 0.9999992517522793, iteration: 24156
loss: 1.1045432090759277,grad_norm: 0.9999991925581733, iteration: 24157
loss: 1.0037360191345215,grad_norm: 0.9999992990226748, iteration: 24158
loss: 1.0145018100738525,grad_norm: 0.9884675757338139, iteration: 24159
loss: 1.0636059045791626,grad_norm: 0.9999996043465725, iteration: 24160
loss: 1.1368329524993896,grad_norm: 0.99999977562127, iteration: 24161
loss: 1.031166911125183,grad_norm: 0.9999998080069382, iteration: 24162
loss: 1.0220742225646973,grad_norm: 0.884613497343122, iteration: 24163
loss: 1.0539934635162354,grad_norm: 0.9999997260305813, iteration: 24164
loss: 1.0312292575836182,grad_norm: 0.99999911865219, iteration: 24165
loss: 1.0144267082214355,grad_norm: 0.9999990334094847, iteration: 24166
loss: 1.0052226781845093,grad_norm: 0.9879926967060014, iteration: 24167
loss: 1.0223636627197266,grad_norm: 0.999999085885541, iteration: 24168
loss: 1.0138466358184814,grad_norm: 0.9670340377525796, iteration: 24169
loss: 1.0141645669937134,grad_norm: 0.999998997711147, iteration: 24170
loss: 0.9840942025184631,grad_norm: 0.9999990543266781, iteration: 24171
loss: 1.0183578729629517,grad_norm: 0.9999992640759822, iteration: 24172
loss: 1.1003750562667847,grad_norm: 0.9999993113484134, iteration: 24173
loss: 1.0393152236938477,grad_norm: 0.9999995823018358, iteration: 24174
loss: 1.0439971685409546,grad_norm: 0.9999992542184665, iteration: 24175
loss: 1.0105479955673218,grad_norm: 0.9827357777691522, iteration: 24176
loss: 1.022831678390503,grad_norm: 0.9999992756825171, iteration: 24177
loss: 0.984808087348938,grad_norm: 0.9999991679122665, iteration: 24178
loss: 1.027916669845581,grad_norm: 0.9999994458221747, iteration: 24179
loss: 0.9949088096618652,grad_norm: 0.9999991002557155, iteration: 24180
loss: 1.0152065753936768,grad_norm: 0.9507411273795912, iteration: 24181
loss: 0.9801298975944519,grad_norm: 0.9999991164867433, iteration: 24182
loss: 1.0188255310058594,grad_norm: 0.9999990765907473, iteration: 24183
loss: 1.0099295377731323,grad_norm: 0.9999992387573411, iteration: 24184
loss: 1.0328731536865234,grad_norm: 0.999999044939888, iteration: 24185
loss: 1.0152791738510132,grad_norm: 0.9999990763433769, iteration: 24186
loss: 1.0350027084350586,grad_norm: 0.999999082064527, iteration: 24187
loss: 1.005074143409729,grad_norm: 0.9999995842799726, iteration: 24188
loss: 1.0650166273117065,grad_norm: 0.9999994834662739, iteration: 24189
loss: 0.998088002204895,grad_norm: 0.9937498927198439, iteration: 24190
loss: 0.9963343143463135,grad_norm: 0.9373272096901496, iteration: 24191
loss: 1.0618765354156494,grad_norm: 0.9999992244019948, iteration: 24192
loss: 1.000778317451477,grad_norm: 0.9999990812512273, iteration: 24193
loss: 0.9996288418769836,grad_norm: 0.9999991629797756, iteration: 24194
loss: 0.9823039174079895,grad_norm: 0.8645121706873017, iteration: 24195
loss: 1.0193389654159546,grad_norm: 0.9572752488816261, iteration: 24196
loss: 0.9757749438285828,grad_norm: 0.9043845211950342, iteration: 24197
loss: 0.987890899181366,grad_norm: 0.9408017546193072, iteration: 24198
loss: 1.0069949626922607,grad_norm: 0.8909344565734145, iteration: 24199
loss: 1.0294477939605713,grad_norm: 0.9999990240048496, iteration: 24200
loss: 1.0013960599899292,grad_norm: 0.9643898337548864, iteration: 24201
loss: 1.0135498046875,grad_norm: 0.9999993634571562, iteration: 24202
loss: 0.990321934223175,grad_norm: 0.9990212510067147, iteration: 24203
loss: 1.0618184804916382,grad_norm: 0.9999995491948149, iteration: 24204
loss: 0.9711157083511353,grad_norm: 0.9348279184756703, iteration: 24205
loss: 0.9788299798965454,grad_norm: 0.9999990802465352, iteration: 24206
loss: 0.9896528720855713,grad_norm: 0.8701119748318353, iteration: 24207
loss: 1.0048025846481323,grad_norm: 0.9436493550492584, iteration: 24208
loss: 1.0159785747528076,grad_norm: 0.9999996512297639, iteration: 24209
loss: 0.9849263429641724,grad_norm: 0.9999991447631781, iteration: 24210
loss: 1.0183175802230835,grad_norm: 0.8676155783620152, iteration: 24211
loss: 1.063157320022583,grad_norm: 0.9999996028838847, iteration: 24212
loss: 0.9913235306739807,grad_norm: 0.977286673216696, iteration: 24213
loss: 1.0361450910568237,grad_norm: 0.9440238739620997, iteration: 24214
loss: 1.0084768533706665,grad_norm: 0.9999990686013427, iteration: 24215
loss: 1.0226166248321533,grad_norm: 0.9590629018699656, iteration: 24216
loss: 1.0618722438812256,grad_norm: 0.999999546448305, iteration: 24217
loss: 0.9907137155532837,grad_norm: 0.9999991951772863, iteration: 24218
loss: 1.0369514226913452,grad_norm: 0.9999992646628344, iteration: 24219
loss: 1.013621211051941,grad_norm: 0.999999150432556, iteration: 24220
loss: 0.9951236844062805,grad_norm: 0.9999992081482401, iteration: 24221
loss: 1.0092943906784058,grad_norm: 0.9999991022371378, iteration: 24222
loss: 1.033382534980774,grad_norm: 0.9621435280712425, iteration: 24223
loss: 1.000167965888977,grad_norm: 0.9999992973676188, iteration: 24224
loss: 1.02570378780365,grad_norm: 0.9999994815012824, iteration: 24225
loss: 0.9949454069137573,grad_norm: 0.9763646160514642, iteration: 24226
loss: 1.0038679838180542,grad_norm: 0.9999990942114068, iteration: 24227
loss: 1.026235580444336,grad_norm: 0.9999990915776762, iteration: 24228
loss: 1.0338757038116455,grad_norm: 0.9999990623182997, iteration: 24229
loss: 1.0094116926193237,grad_norm: 0.9999994091983839, iteration: 24230
loss: 1.0237979888916016,grad_norm: 0.9429126770835836, iteration: 24231
loss: 1.0065557956695557,grad_norm: 0.9999995362806304, iteration: 24232
loss: 1.0216435194015503,grad_norm: 0.9999990929528972, iteration: 24233
loss: 0.998806357383728,grad_norm: 0.9999996407078529, iteration: 24234
loss: 0.9942604303359985,grad_norm: 0.9999991484903971, iteration: 24235
loss: 0.9934375286102295,grad_norm: 0.9999990597577361, iteration: 24236
loss: 1.0134098529815674,grad_norm: 0.9999990890327729, iteration: 24237
loss: 0.9732778072357178,grad_norm: 0.8996766043488615, iteration: 24238
loss: 0.9725554585456848,grad_norm: 0.9999991336807234, iteration: 24239
loss: 1.0078593492507935,grad_norm: 0.9999990913247909, iteration: 24240
loss: 0.9994164705276489,grad_norm: 0.9596651320221784, iteration: 24241
loss: 1.0031158924102783,grad_norm: 0.927152711910282, iteration: 24242
loss: 0.9999710321426392,grad_norm: 0.9999991184971666, iteration: 24243
loss: 0.9909152388572693,grad_norm: 0.9664794989997628, iteration: 24244
loss: 1.0100103616714478,grad_norm: 0.9999991760358041, iteration: 24245
loss: 1.0661264657974243,grad_norm: 0.9999993151871557, iteration: 24246
loss: 1.0066016912460327,grad_norm: 0.9999992162735825, iteration: 24247
loss: 0.9981507062911987,grad_norm: 0.7584101706728827, iteration: 24248
loss: 1.014937162399292,grad_norm: 0.98583235520353, iteration: 24249
loss: 0.9989524483680725,grad_norm: 0.999999036743994, iteration: 24250
loss: 1.023868441581726,grad_norm: 0.9310681643564029, iteration: 24251
loss: 0.9846974015235901,grad_norm: 0.8227043644645645, iteration: 24252
loss: 0.9938763380050659,grad_norm: 0.9490589778637488, iteration: 24253
loss: 1.0377355813980103,grad_norm: 0.9622074523271791, iteration: 24254
loss: 1.015581727027893,grad_norm: 0.9999991361581252, iteration: 24255
loss: 1.0206893682479858,grad_norm: 0.9999992510155721, iteration: 24256
loss: 1.0414434671401978,grad_norm: 0.9999990270808682, iteration: 24257
loss: 0.9986775517463684,grad_norm: 0.9999991639239788, iteration: 24258
loss: 1.017690658569336,grad_norm: 0.9999991865118316, iteration: 24259
loss: 1.0243440866470337,grad_norm: 0.9163283283571731, iteration: 24260
loss: 1.0155742168426514,grad_norm: 0.9999998152345587, iteration: 24261
loss: 0.9947550892829895,grad_norm: 0.8496512692260233, iteration: 24262
loss: 1.0142896175384521,grad_norm: 0.9223661498008043, iteration: 24263
loss: 1.0015456676483154,grad_norm: 0.9572751826150487, iteration: 24264
loss: 1.0471687316894531,grad_norm: 0.9999997001881882, iteration: 24265
loss: 1.054882526397705,grad_norm: 0.9999995261731128, iteration: 24266
loss: 1.0054724216461182,grad_norm: 0.9999990200922293, iteration: 24267
loss: 1.037008285522461,grad_norm: 0.999999358701382, iteration: 24268
loss: 0.9911482334136963,grad_norm: 0.9999995118154082, iteration: 24269
loss: 0.9785943031311035,grad_norm: 0.9999991647652204, iteration: 24270
loss: 0.9912298321723938,grad_norm: 0.9339901288737056, iteration: 24271
loss: 1.0360677242279053,grad_norm: 0.9303756328317505, iteration: 24272
loss: 1.0295709371566772,grad_norm: 0.9999992162169773, iteration: 24273
loss: 0.9786818027496338,grad_norm: 0.9999991446040709, iteration: 24274
loss: 1.0180047750473022,grad_norm: 0.9999991156953508, iteration: 24275
loss: 0.966350257396698,grad_norm: 0.999999534138966, iteration: 24276
loss: 0.9924571514129639,grad_norm: 0.9999991416141938, iteration: 24277
loss: 1.0209308862686157,grad_norm: 0.999999092624833, iteration: 24278
loss: 0.9967613220214844,grad_norm: 0.9999997410730663, iteration: 24279
loss: 1.0033445358276367,grad_norm: 0.9999990939086808, iteration: 24280
loss: 1.015343189239502,grad_norm: 0.9999991032532324, iteration: 24281
loss: 1.031297206878662,grad_norm: 0.999999141244488, iteration: 24282
loss: 1.0250011682510376,grad_norm: 0.98396310683796, iteration: 24283
loss: 1.021022915840149,grad_norm: 0.9999991711398559, iteration: 24284
loss: 1.0146950483322144,grad_norm: 0.9999991986690421, iteration: 24285
loss: 1.0309401750564575,grad_norm: 0.999999670401215, iteration: 24286
loss: 1.0117838382720947,grad_norm: 0.9999991601702167, iteration: 24287
loss: 1.0785235166549683,grad_norm: 0.9999995159030833, iteration: 24288
loss: 0.9983371496200562,grad_norm: 0.9999991743066428, iteration: 24289
loss: 1.0339815616607666,grad_norm: 0.9999995481929597, iteration: 24290
loss: 1.0208722352981567,grad_norm: 0.9999990686250613, iteration: 24291
loss: 1.004266619682312,grad_norm: 0.9999992153080243, iteration: 24292
loss: 1.0382355451583862,grad_norm: 0.9765758631683755, iteration: 24293
loss: 1.0284122228622437,grad_norm: 0.9999992083809082, iteration: 24294
loss: 1.0224004983901978,grad_norm: 0.9999995202423908, iteration: 24295
loss: 0.9840164184570312,grad_norm: 0.9967945107126813, iteration: 24296
loss: 0.9837745428085327,grad_norm: 0.9999990743674632, iteration: 24297
loss: 0.9900351166725159,grad_norm: 0.9999991335295195, iteration: 24298
loss: 1.0343552827835083,grad_norm: 0.9999997965433404, iteration: 24299
loss: 1.007469654083252,grad_norm: 0.9999992017189927, iteration: 24300
loss: 1.0243281126022339,grad_norm: 0.9999996836850723, iteration: 24301
loss: 1.0517109632492065,grad_norm: 0.9999995224074487, iteration: 24302
loss: 1.0739731788635254,grad_norm: 0.999999623213088, iteration: 24303
loss: 1.016804575920105,grad_norm: 0.9999990390427612, iteration: 24304
loss: 1.045686960220337,grad_norm: 0.9999993163800649, iteration: 24305
loss: 1.027875304222107,grad_norm: 0.9999992526381251, iteration: 24306
loss: 1.040492296218872,grad_norm: 0.9999991279209834, iteration: 24307
loss: 1.0581051111221313,grad_norm: 0.9999995950183405, iteration: 24308
loss: 0.9967840909957886,grad_norm: 0.9999990927094452, iteration: 24309
loss: 1.066787838935852,grad_norm: 0.9999991600352877, iteration: 24310
loss: 1.022647500038147,grad_norm: 0.999999262892182, iteration: 24311
loss: 1.0694537162780762,grad_norm: 0.9999996041265141, iteration: 24312
loss: 1.0336964130401611,grad_norm: 0.999999061116374, iteration: 24313
loss: 1.0010526180267334,grad_norm: 0.9999990663431411, iteration: 24314
loss: 1.0714197158813477,grad_norm: 0.9999994648340161, iteration: 24315
loss: 1.005738615989685,grad_norm: 0.9577060170857845, iteration: 24316
loss: 1.013553261756897,grad_norm: 0.9999996500710747, iteration: 24317
loss: 0.9910478591918945,grad_norm: 0.9999990317316442, iteration: 24318
loss: 1.0532597303390503,grad_norm: 0.9999992768469524, iteration: 24319
loss: 0.9434468150138855,grad_norm: 0.9999992408024241, iteration: 24320
loss: 1.0272785425186157,grad_norm: 0.9999992836052298, iteration: 24321
loss: 1.0180633068084717,grad_norm: 0.9999993706865338, iteration: 24322
loss: 1.0390623807907104,grad_norm: 0.9999995125249672, iteration: 24323
loss: 1.042554259300232,grad_norm: 0.9999993938860441, iteration: 24324
loss: 1.0203725099563599,grad_norm: 0.9999991209209517, iteration: 24325
loss: 0.9942988753318787,grad_norm: 0.9999995460437755, iteration: 24326
loss: 1.0227937698364258,grad_norm: 0.9999993847836821, iteration: 24327
loss: 1.000422477722168,grad_norm: 0.9402621153778389, iteration: 24328
loss: 0.9553571939468384,grad_norm: 0.9999994367145444, iteration: 24329
loss: 1.0079532861709595,grad_norm: 0.9268615120480147, iteration: 24330
loss: 1.0721486806869507,grad_norm: 0.9999995126901268, iteration: 24331
loss: 1.019193172454834,grad_norm: 0.9787735567520117, iteration: 24332
loss: 0.9729264974594116,grad_norm: 0.9999991744964875, iteration: 24333
loss: 1.0269629955291748,grad_norm: 0.999999182931957, iteration: 24334
loss: 1.0146450996398926,grad_norm: 0.9999992075232794, iteration: 24335
loss: 0.9983969926834106,grad_norm: 0.9999989979889374, iteration: 24336
loss: 1.0043792724609375,grad_norm: 0.999998999920006, iteration: 24337
loss: 1.0465983152389526,grad_norm: 0.9999992281469232, iteration: 24338
loss: 1.069909691810608,grad_norm: 0.9999995996554522, iteration: 24339
loss: 1.0257856845855713,grad_norm: 0.9043586305918049, iteration: 24340
loss: 1.0659716129302979,grad_norm: 0.9999993908242906, iteration: 24341
loss: 0.9904210567474365,grad_norm: 0.9999991021084317, iteration: 24342
loss: 1.006454348564148,grad_norm: 0.9999991852105035, iteration: 24343
loss: 0.9696497917175293,grad_norm: 0.999999082567947, iteration: 24344
loss: 1.0120941400527954,grad_norm: 0.9999995419337006, iteration: 24345
loss: 1.0138472318649292,grad_norm: 0.9999989990491936, iteration: 24346
loss: 1.0086147785186768,grad_norm: 0.9999992104089916, iteration: 24347
loss: 0.9743193984031677,grad_norm: 0.9999992694309334, iteration: 24348
loss: 1.036652684211731,grad_norm: 0.999999103859245, iteration: 24349
loss: 1.013250708580017,grad_norm: 0.9999991364956273, iteration: 24350
loss: 0.976530909538269,grad_norm: 0.9999992337440249, iteration: 24351
loss: 1.0265191793441772,grad_norm: 0.9999991218988407, iteration: 24352
loss: 0.9933750629425049,grad_norm: 0.9999992253291604, iteration: 24353
loss: 1.0155752897262573,grad_norm: 0.999999348055177, iteration: 24354
loss: 1.0473825931549072,grad_norm: 0.9999994482719835, iteration: 24355
loss: 1.0513978004455566,grad_norm: 0.9999993118778581, iteration: 24356
loss: 1.0155092477798462,grad_norm: 0.9999990762218862, iteration: 24357
loss: 0.9953302145004272,grad_norm: 0.9999995004668597, iteration: 24358
loss: 0.9720749258995056,grad_norm: 0.8949025335916978, iteration: 24359
loss: 1.141973614692688,grad_norm: 0.9999997294166558, iteration: 24360
loss: 1.028805136680603,grad_norm: 0.9999990908645164, iteration: 24361
loss: 1.022898554801941,grad_norm: 0.9999991144348257, iteration: 24362
loss: 1.0167522430419922,grad_norm: 0.9999992722598685, iteration: 24363
loss: 0.9907351732254028,grad_norm: 0.999998991224981, iteration: 24364
loss: 1.0204193592071533,grad_norm: 0.958770283277807, iteration: 24365
loss: 1.0244007110595703,grad_norm: 0.9999991113288369, iteration: 24366
loss: 0.9692599773406982,grad_norm: 0.9999992158939267, iteration: 24367
loss: 0.9795474410057068,grad_norm: 0.9999992975861214, iteration: 24368
loss: 1.0278334617614746,grad_norm: 0.9999991102230966, iteration: 24369
loss: 0.9713006019592285,grad_norm: 0.9999990919338138, iteration: 24370
loss: 1.055814504623413,grad_norm: 0.9999993923113636, iteration: 24371
loss: 1.0563396215438843,grad_norm: 0.9999997432792676, iteration: 24372
loss: 1.036298155784607,grad_norm: 0.9999991192439812, iteration: 24373
loss: 1.0384756326675415,grad_norm: 0.9999992262134423, iteration: 24374
loss: 1.0102438926696777,grad_norm: 0.8983835792183511, iteration: 24375
loss: 0.9696853160858154,grad_norm: 0.999999301103044, iteration: 24376
loss: 1.0285167694091797,grad_norm: 0.9786758765070036, iteration: 24377
loss: 1.0084365606307983,grad_norm: 0.9999992306486806, iteration: 24378
loss: 1.0397617816925049,grad_norm: 0.9999995707474784, iteration: 24379
loss: 1.0929547548294067,grad_norm: 0.9999995843120852, iteration: 24380
loss: 0.97968989610672,grad_norm: 0.9999993142275019, iteration: 24381
loss: 0.9941303730010986,grad_norm: 0.9999991092587002, iteration: 24382
loss: 0.9859282374382019,grad_norm: 0.9999991611761923, iteration: 24383
loss: 1.0581598281860352,grad_norm: 0.9999993958606506, iteration: 24384
loss: 0.9933722019195557,grad_norm: 0.8953140139843054, iteration: 24385
loss: 1.024522066116333,grad_norm: 0.9999990958320565, iteration: 24386
loss: 1.0052416324615479,grad_norm: 0.9999989859261391, iteration: 24387
loss: 1.0160043239593506,grad_norm: 0.9303137375864012, iteration: 24388
loss: 1.0308067798614502,grad_norm: 0.9999992143170281, iteration: 24389
loss: 1.1351509094238281,grad_norm: 0.9999998631073057, iteration: 24390
loss: 1.0825848579406738,grad_norm: 0.9999991308573405, iteration: 24391
loss: 1.0075652599334717,grad_norm: 0.9999995457828356, iteration: 24392
loss: 0.9961851239204407,grad_norm: 0.9999992535591664, iteration: 24393
loss: 1.0446012020111084,grad_norm: 0.9999992490671109, iteration: 24394
loss: 1.0990544557571411,grad_norm: 0.9999995890096436, iteration: 24395
loss: 1.0534340143203735,grad_norm: 0.9999990814855615, iteration: 24396
loss: 1.0028554201126099,grad_norm: 0.9999990701210374, iteration: 24397
loss: 1.2296068668365479,grad_norm: 0.9999996633794517, iteration: 24398
loss: 1.0052653551101685,grad_norm: 0.9999990240444059, iteration: 24399
loss: 1.0232231616973877,grad_norm: 0.9999990816788598, iteration: 24400
loss: 1.0431751012802124,grad_norm: 0.9999991613079608, iteration: 24401
loss: 1.0330543518066406,grad_norm: 0.9999992215563346, iteration: 24402
loss: 1.0852597951889038,grad_norm: 0.9999996649853761, iteration: 24403
loss: 1.037458896636963,grad_norm: 0.9999994819665751, iteration: 24404
loss: 1.035225749015808,grad_norm: 0.9999994899790938, iteration: 24405
loss: 1.0403109788894653,grad_norm: 0.9999994106765175, iteration: 24406
loss: 1.0279369354248047,grad_norm: 0.9999994647426882, iteration: 24407
loss: 1.0267153978347778,grad_norm: 0.9999991653311681, iteration: 24408
loss: 1.0207940340042114,grad_norm: 0.9999991482741012, iteration: 24409
loss: 1.0203207731246948,grad_norm: 0.9999991892315206, iteration: 24410
loss: 1.0300540924072266,grad_norm: 0.9999991257384231, iteration: 24411
loss: 1.0288456678390503,grad_norm: 0.9999993228363661, iteration: 24412
loss: 0.9551683068275452,grad_norm: 0.9583890055579765, iteration: 24413
loss: 1.0512994527816772,grad_norm: 0.9999993875719856, iteration: 24414
loss: 1.0441508293151855,grad_norm: 0.9999998383737775, iteration: 24415
loss: 1.0264800786972046,grad_norm: 0.9681724986633773, iteration: 24416
loss: 1.0307892560958862,grad_norm: 0.9999995610938641, iteration: 24417
loss: 1.0354282855987549,grad_norm: 0.9999996830492303, iteration: 24418
loss: 1.0220484733581543,grad_norm: 0.9999992855723868, iteration: 24419
loss: 1.066146969795227,grad_norm: 0.9999992809683427, iteration: 24420
loss: 1.0116534233093262,grad_norm: 0.9999996777632177, iteration: 24421
loss: 1.0795921087265015,grad_norm: 0.9999995385645897, iteration: 24422
loss: 1.030631184577942,grad_norm: 0.9999992774081333, iteration: 24423
loss: 1.0215506553649902,grad_norm: 0.9999998173490018, iteration: 24424
loss: 1.090896487236023,grad_norm: 0.9999995400982846, iteration: 24425
loss: 0.9998650550842285,grad_norm: 0.999999425802875, iteration: 24426
loss: 1.0480217933654785,grad_norm: 0.9999993591159934, iteration: 24427
loss: 1.0581790208816528,grad_norm: 0.9999993785259627, iteration: 24428
loss: 0.9896898865699768,grad_norm: 0.9999991199446504, iteration: 24429
loss: 1.0239630937576294,grad_norm: 0.9999990517076668, iteration: 24430
loss: 0.9801390767097473,grad_norm: 0.9999991624888165, iteration: 24431
loss: 1.0345673561096191,grad_norm: 0.9999991275036811, iteration: 24432
loss: 1.011194109916687,grad_norm: 0.9999991109209366, iteration: 24433
loss: 0.9919791221618652,grad_norm: 0.9999991558746534, iteration: 24434
loss: 1.0445759296417236,grad_norm: 0.9999993266303984, iteration: 24435
loss: 1.0033483505249023,grad_norm: 0.999999169124429, iteration: 24436
loss: 0.983124852180481,grad_norm: 0.906815018456998, iteration: 24437
loss: 1.0009318590164185,grad_norm: 0.9999997381455458, iteration: 24438
loss: 1.011775016784668,grad_norm: 0.9999992197982778, iteration: 24439
loss: 0.9891511797904968,grad_norm: 0.9999994768766495, iteration: 24440
loss: 1.0323991775512695,grad_norm: 0.9013738391586429, iteration: 24441
loss: 1.0208191871643066,grad_norm: 0.9999991065326345, iteration: 24442
loss: 1.032813549041748,grad_norm: 0.8574015979451012, iteration: 24443
loss: 1.0161727666854858,grad_norm: 0.9999991163657388, iteration: 24444
loss: 1.0059142112731934,grad_norm: 0.9999990701197503, iteration: 24445
loss: 0.9911845326423645,grad_norm: 0.9999994655200748, iteration: 24446
loss: 0.9771142601966858,grad_norm: 0.9999992425146303, iteration: 24447
loss: 1.0434861183166504,grad_norm: 0.9999991708999654, iteration: 24448
loss: 1.041854739189148,grad_norm: 0.9709391150048295, iteration: 24449
loss: 0.9724469184875488,grad_norm: 0.9999990207227692, iteration: 24450
loss: 1.0168362855911255,grad_norm: 0.9994530920636377, iteration: 24451
loss: 1.042696475982666,grad_norm: 0.9999991521373319, iteration: 24452
loss: 0.9996157288551331,grad_norm: 0.9999992923821766, iteration: 24453
loss: 1.000276803970337,grad_norm: 0.9999990981963984, iteration: 24454
loss: 0.993306577205658,grad_norm: 0.9999991955482974, iteration: 24455
loss: 1.0880141258239746,grad_norm: 0.9999994740567347, iteration: 24456
loss: 1.0278875827789307,grad_norm: 0.9999990834667537, iteration: 24457
loss: 1.0154223442077637,grad_norm: 0.9999990878729186, iteration: 24458
loss: 1.0668636560440063,grad_norm: 0.9999990947991125, iteration: 24459
loss: 1.0067148208618164,grad_norm: 0.9875804422366654, iteration: 24460
loss: 0.9709743857383728,grad_norm: 0.9999992923945448, iteration: 24461
loss: 0.9815548658370972,grad_norm: 0.9999990616389997, iteration: 24462
loss: 1.01493239402771,grad_norm: 0.9999993291448785, iteration: 24463
loss: 1.010169267654419,grad_norm: 0.9999998293104908, iteration: 24464
loss: 0.9908930063247681,grad_norm: 0.9999992970920851, iteration: 24465
loss: 1.0279076099395752,grad_norm: 0.9999991614654593, iteration: 24466
loss: 1.0282065868377686,grad_norm: 0.9999992527042824, iteration: 24467
loss: 1.05442214012146,grad_norm: 0.9999991233588695, iteration: 24468
loss: 1.04275643825531,grad_norm: 0.9999993233420241, iteration: 24469
loss: 1.0177820920944214,grad_norm: 0.9378006997695009, iteration: 24470
loss: 1.0183497667312622,grad_norm: 0.9999991170191983, iteration: 24471
loss: 0.9849840998649597,grad_norm: 0.9999992565714388, iteration: 24472
loss: 1.0925811529159546,grad_norm: 0.9999994753336731, iteration: 24473
loss: 0.9859648942947388,grad_norm: 0.9625703121618119, iteration: 24474
loss: 1.0934330224990845,grad_norm: 0.9999996220846686, iteration: 24475
loss: 0.9957573413848877,grad_norm: 0.9999990402785968, iteration: 24476
loss: 0.9992085099220276,grad_norm: 0.9737969861905401, iteration: 24477
loss: 1.0182660818099976,grad_norm: 0.763350780466916, iteration: 24478
loss: 1.0112521648406982,grad_norm: 0.9999994518070213, iteration: 24479
loss: 0.994327187538147,grad_norm: 0.9759229721751286, iteration: 24480
loss: 1.0000513792037964,grad_norm: 0.9999990633319147, iteration: 24481
loss: 0.9971425533294678,grad_norm: 0.9953470520077687, iteration: 24482
loss: 1.0028467178344727,grad_norm: 0.9512680427228261, iteration: 24483
loss: 1.0293867588043213,grad_norm: 0.9999991627882958, iteration: 24484
loss: 0.995320737361908,grad_norm: 0.8676431689508949, iteration: 24485
loss: 0.998171865940094,grad_norm: 0.9999990467936786, iteration: 24486
loss: 1.0512840747833252,grad_norm: 0.9112512401846825, iteration: 24487
loss: 1.0250169038772583,grad_norm: 0.999999046023092, iteration: 24488
loss: 1.0101544857025146,grad_norm: 0.999999082529973, iteration: 24489
loss: 1.023016333580017,grad_norm: 0.9999994610860592, iteration: 24490
loss: 1.043152928352356,grad_norm: 0.8120004291176509, iteration: 24491
loss: 0.9946377277374268,grad_norm: 0.930342673494418, iteration: 24492
loss: 1.0021281242370605,grad_norm: 0.9999997423197512, iteration: 24493
loss: 0.9745054841041565,grad_norm: 0.8993408042127122, iteration: 24494
loss: 1.0282337665557861,grad_norm: 0.999999449678956, iteration: 24495
loss: 1.0161242485046387,grad_norm: 0.9999994407017112, iteration: 24496
loss: 1.0261938571929932,grad_norm: 0.9999995198526739, iteration: 24497
loss: 1.0213148593902588,grad_norm: 0.9389464781451957, iteration: 24498
loss: 1.036558985710144,grad_norm: 0.9904020532568679, iteration: 24499
loss: 0.9789161682128906,grad_norm: 0.9999989758307539, iteration: 24500
loss: 0.9968935251235962,grad_norm: 0.9999994766586991, iteration: 24501
loss: 0.9811038374900818,grad_norm: 0.9712578795893978, iteration: 24502
loss: 1.046939492225647,grad_norm: 0.9999992079276727, iteration: 24503
loss: 0.9854612946510315,grad_norm: 0.9999990999140187, iteration: 24504
loss: 1.0154614448547363,grad_norm: 0.9999995480298629, iteration: 24505
loss: 0.9984432458877563,grad_norm: 0.9999990834014931, iteration: 24506
loss: 0.9835866093635559,grad_norm: 0.9459429111418349, iteration: 24507
loss: 0.9855571985244751,grad_norm: 0.8618200060173865, iteration: 24508
loss: 0.9977032542228699,grad_norm: 0.9071827788235794, iteration: 24509
loss: 1.0038329362869263,grad_norm: 0.9999991200632162, iteration: 24510
loss: 1.0235971212387085,grad_norm: 0.9999991342143804, iteration: 24511
loss: 1.0810526609420776,grad_norm: 0.999999565718205, iteration: 24512
loss: 0.9572180509567261,grad_norm: 0.9999992951723145, iteration: 24513
loss: 1.0528054237365723,grad_norm: 0.9999991689305978, iteration: 24514
loss: 1.0264670848846436,grad_norm: 0.9999991336759864, iteration: 24515
loss: 1.0312200784683228,grad_norm: 0.9999992931339479, iteration: 24516
loss: 1.062929391860962,grad_norm: 0.9999998845711995, iteration: 24517
loss: 0.9944300055503845,grad_norm: 0.9999996091010859, iteration: 24518
loss: 1.025619626045227,grad_norm: 0.999999107157805, iteration: 24519
loss: 0.9694753885269165,grad_norm: 0.9999991024955547, iteration: 24520
loss: 1.0207620859146118,grad_norm: 0.9999992806681474, iteration: 24521
loss: 1.049829363822937,grad_norm: 0.9999992253992206, iteration: 24522
loss: 1.050818920135498,grad_norm: 0.97570442893866, iteration: 24523
loss: 1.0435034036636353,grad_norm: 0.9999990534316011, iteration: 24524
loss: 1.0326058864593506,grad_norm: 0.9647321090720263, iteration: 24525
loss: 1.028337836265564,grad_norm: 0.999999486704322, iteration: 24526
loss: 1.0104213953018188,grad_norm: 0.8838780560219767, iteration: 24527
loss: 1.0392216444015503,grad_norm: 0.9978700860354263, iteration: 24528
loss: 1.0714497566223145,grad_norm: 0.9999991396133716, iteration: 24529
loss: 0.9996996521949768,grad_norm: 0.993660770002962, iteration: 24530
loss: 1.0053520202636719,grad_norm: 0.9999995953027527, iteration: 24531
loss: 1.0157362222671509,grad_norm: 0.999999075237058, iteration: 24532
loss: 0.9909420609474182,grad_norm: 0.958240598633469, iteration: 24533
loss: 0.9986209273338318,grad_norm: 0.9999992463828051, iteration: 24534
loss: 0.9941704273223877,grad_norm: 0.9999992697660476, iteration: 24535
loss: 1.0411187410354614,grad_norm: 0.9999995357339663, iteration: 24536
loss: 1.0579184293746948,grad_norm: 0.9999996140790602, iteration: 24537
loss: 0.9889154434204102,grad_norm: 0.9999991539067686, iteration: 24538
loss: 1.0281318426132202,grad_norm: 0.9999992877640539, iteration: 24539
loss: 1.0090460777282715,grad_norm: 0.9999992244258396, iteration: 24540
loss: 0.9643754363059998,grad_norm: 0.9330919735931069, iteration: 24541
loss: 1.023437261581421,grad_norm: 0.8647401080296581, iteration: 24542
loss: 1.0168622732162476,grad_norm: 0.8908445377384974, iteration: 24543
loss: 0.9886909127235413,grad_norm: 0.9999990031475269, iteration: 24544
loss: 1.000303030014038,grad_norm: 0.9999991455886198, iteration: 24545
loss: 1.0615073442459106,grad_norm: 0.999999236560741, iteration: 24546
loss: 1.002261757850647,grad_norm: 0.9999989981891096, iteration: 24547
loss: 1.0294785499572754,grad_norm: 0.8855156394097841, iteration: 24548
loss: 1.031558871269226,grad_norm: 0.9268883168724963, iteration: 24549
loss: 0.9908563494682312,grad_norm: 0.9999993023398654, iteration: 24550
loss: 1.0024070739746094,grad_norm: 0.8544057315653055, iteration: 24551
loss: 1.0451867580413818,grad_norm: 0.9999996572868506, iteration: 24552
loss: 1.0244158506393433,grad_norm: 0.9999991312603658, iteration: 24553
loss: 1.0642662048339844,grad_norm: 0.9999991916692677, iteration: 24554
loss: 1.0350028276443481,grad_norm: 0.9999991759291742, iteration: 24555
loss: 1.0420689582824707,grad_norm: 0.9999990694714416, iteration: 24556
loss: 0.9844883680343628,grad_norm: 0.899273598020446, iteration: 24557
loss: 1.0496164560317993,grad_norm: 0.9999991236373685, iteration: 24558
loss: 0.9909965991973877,grad_norm: 0.9217210989695739, iteration: 24559
loss: 1.1029105186462402,grad_norm: 0.9999990993081028, iteration: 24560
loss: 0.9957224130630493,grad_norm: 0.9999990372386116, iteration: 24561
loss: 1.0237948894500732,grad_norm: 0.9999990473422847, iteration: 24562
loss: 0.979110062122345,grad_norm: 0.987093653210421, iteration: 24563
loss: 1.0371336936950684,grad_norm: 0.9999990804682498, iteration: 24564
loss: 1.030207872390747,grad_norm: 0.9999991474905635, iteration: 24565
loss: 0.9912879467010498,grad_norm: 0.9842108545063875, iteration: 24566
loss: 0.9778268337249756,grad_norm: 0.9383274559619119, iteration: 24567
loss: 1.020902156829834,grad_norm: 0.9999991734596874, iteration: 24568
loss: 1.0112472772598267,grad_norm: 0.9999992046605763, iteration: 24569
loss: 1.047479271888733,grad_norm: 0.9999990515100351, iteration: 24570
loss: 0.9936015605926514,grad_norm: 0.9654934533799868, iteration: 24571
loss: 1.0006258487701416,grad_norm: 0.9999989928281098, iteration: 24572
loss: 1.0314956903457642,grad_norm: 0.9999990562100846, iteration: 24573
loss: 0.9940381646156311,grad_norm: 0.999999173884415, iteration: 24574
loss: 1.0414817333221436,grad_norm: 0.999999092439402, iteration: 24575
loss: 0.9917737245559692,grad_norm: 0.9565096721953521, iteration: 24576
loss: 0.9951385259628296,grad_norm: 0.9999991082869747, iteration: 24577
loss: 0.9784300923347473,grad_norm: 0.9999991126008613, iteration: 24578
loss: 0.9991240501403809,grad_norm: 0.9170979965530344, iteration: 24579
loss: 0.9976493716239929,grad_norm: 0.94618328163383, iteration: 24580
loss: 1.030550479888916,grad_norm: 0.9999992032622106, iteration: 24581
loss: 0.9930360317230225,grad_norm: 0.9117435090736464, iteration: 24582
loss: 1.0160621404647827,grad_norm: 0.9183832759677836, iteration: 24583
loss: 1.0027220249176025,grad_norm: 0.8932625254399761, iteration: 24584
loss: 0.9918908476829529,grad_norm: 0.9999991155406331, iteration: 24585
loss: 0.9893842339515686,grad_norm: 0.840814983634033, iteration: 24586
loss: 1.0157105922698975,grad_norm: 0.9999992316664402, iteration: 24587
loss: 0.972649872303009,grad_norm: 0.9999990557029876, iteration: 24588
loss: 1.0007963180541992,grad_norm: 0.9999991157236365, iteration: 24589
loss: 1.006679654121399,grad_norm: 0.9079603778848613, iteration: 24590
loss: 1.0023103952407837,grad_norm: 0.8743912115708443, iteration: 24591
loss: 0.9674521684646606,grad_norm: 0.9999993618740156, iteration: 24592
loss: 1.0306720733642578,grad_norm: 0.9709168741270927, iteration: 24593
loss: 0.9918106198310852,grad_norm: 0.9617954210328951, iteration: 24594
loss: 1.0422133207321167,grad_norm: 0.9881973224922151, iteration: 24595
loss: 1.0582529306411743,grad_norm: 0.953290877779734, iteration: 24596
loss: 0.994281530380249,grad_norm: 0.9357164699072074, iteration: 24597
loss: 1.0463430881500244,grad_norm: 0.9999990546149449, iteration: 24598
loss: 1.0223302841186523,grad_norm: 0.9999994662206199, iteration: 24599
loss: 1.0522798299789429,grad_norm: 0.9999992387327261, iteration: 24600
loss: 1.0374687910079956,grad_norm: 0.9999991979691467, iteration: 24601
loss: 1.0227100849151611,grad_norm: 0.9265437397161084, iteration: 24602
loss: 0.9670069217681885,grad_norm: 0.8679748478433649, iteration: 24603
loss: 1.0320391654968262,grad_norm: 0.9999991820681943, iteration: 24604
loss: 0.9887257814407349,grad_norm: 0.9999992227614611, iteration: 24605
loss: 1.0079048871994019,grad_norm: 0.9250634353075284, iteration: 24606
loss: 1.0003187656402588,grad_norm: 0.9999990469472803, iteration: 24607
loss: 1.0013824701309204,grad_norm: 0.9999991370291204, iteration: 24608
loss: 0.9934098720550537,grad_norm: 0.9999995414559765, iteration: 24609
loss: 0.975375771522522,grad_norm: 0.9892790578524419, iteration: 24610
loss: 1.0627689361572266,grad_norm: 0.9999995353525585, iteration: 24611
loss: 1.0034730434417725,grad_norm: 0.9999992308524729, iteration: 24612
loss: 1.0182043313980103,grad_norm: 0.9999991446053713, iteration: 24613
loss: 1.0215957164764404,grad_norm: 0.967771984567402, iteration: 24614
loss: 1.0334392786026,grad_norm: 0.9650390038092526, iteration: 24615
loss: 1.0025688409805298,grad_norm: 0.9999989930539424, iteration: 24616
loss: 1.0272791385650635,grad_norm: 0.9999992977439568, iteration: 24617
loss: 1.0115398168563843,grad_norm: 0.9999990995316014, iteration: 24618
loss: 0.96878582239151,grad_norm: 0.999999031807565, iteration: 24619
loss: 0.9818124771118164,grad_norm: 0.9971574778521767, iteration: 24620
loss: 0.9625676274299622,grad_norm: 0.9999989885819716, iteration: 24621
loss: 0.984713613986969,grad_norm: 0.9766041243245965, iteration: 24622
loss: 0.9989061951637268,grad_norm: 0.9544577076794133, iteration: 24623
loss: 1.037108302116394,grad_norm: 0.9999991276283662, iteration: 24624
loss: 0.9911115169525146,grad_norm: 0.9694654223272077, iteration: 24625
loss: 1.001288890838623,grad_norm: 0.9999990300248034, iteration: 24626
loss: 1.0238490104675293,grad_norm: 0.9999992756574958, iteration: 24627
loss: 1.0487216711044312,grad_norm: 0.9999994463787422, iteration: 24628
loss: 1.0240029096603394,grad_norm: 0.9999991565651146, iteration: 24629
loss: 0.9658201336860657,grad_norm: 0.999607016191328, iteration: 24630
loss: 0.9883970022201538,grad_norm: 0.9200777099225028, iteration: 24631
loss: 1.0129035711288452,grad_norm: 0.9219299386020237, iteration: 24632
loss: 1.044162631034851,grad_norm: 0.9043363099131185, iteration: 24633
loss: 1.033858299255371,grad_norm: 0.9852679307705681, iteration: 24634
loss: 0.9780207276344299,grad_norm: 0.9999993758990738, iteration: 24635
loss: 0.9804223775863647,grad_norm: 0.981030104440247, iteration: 24636
loss: 1.0067099332809448,grad_norm: 0.999999388084895, iteration: 24637
loss: 1.0548549890518188,grad_norm: 0.9999991186761401, iteration: 24638
loss: 1.035093903541565,grad_norm: 0.9999997302444666, iteration: 24639
loss: 0.9653407335281372,grad_norm: 0.999999064572046, iteration: 24640
loss: 1.0502489805221558,grad_norm: 0.9999990823822901, iteration: 24641
loss: 1.037796974182129,grad_norm: 0.9999993223793125, iteration: 24642
loss: 1.0351347923278809,grad_norm: 0.999999460465838, iteration: 24643
loss: 0.9560518264770508,grad_norm: 0.9999991620318326, iteration: 24644
loss: 0.9861412644386292,grad_norm: 0.9859765179615172, iteration: 24645
loss: 1.0537468194961548,grad_norm: 0.999999681790367, iteration: 24646
loss: 0.9885802268981934,grad_norm: 0.99999903311363, iteration: 24647
loss: 1.0118818283081055,grad_norm: 0.9734494798775093, iteration: 24648
loss: 0.9818856120109558,grad_norm: 0.9270859678483356, iteration: 24649
loss: 1.016411304473877,grad_norm: 0.9172824405897526, iteration: 24650
loss: 0.9970058798789978,grad_norm: 0.9900816166198685, iteration: 24651
loss: 1.0241800546646118,grad_norm: 0.9312267120324024, iteration: 24652
loss: 1.0108990669250488,grad_norm: 0.9725519216647918, iteration: 24653
loss: 1.035412311553955,grad_norm: 0.9999992172558837, iteration: 24654
loss: 1.0278081893920898,grad_norm: 0.9999990445104933, iteration: 24655
loss: 0.9827629327774048,grad_norm: 0.9999992288391215, iteration: 24656
loss: 1.009639859199524,grad_norm: 0.999999104797519, iteration: 24657
loss: 0.9795562028884888,grad_norm: 0.9999991391958579, iteration: 24658
loss: 1.0051982402801514,grad_norm: 0.9999991788643178, iteration: 24659
loss: 1.0586481094360352,grad_norm: 0.9999997614151986, iteration: 24660
loss: 0.9955012202262878,grad_norm: 0.8816364866010947, iteration: 24661
loss: 0.9478184580802917,grad_norm: 0.954249526433483, iteration: 24662
loss: 0.992027223110199,grad_norm: 0.9931686618895125, iteration: 24663
loss: 1.095157265663147,grad_norm: 0.9999996196240754, iteration: 24664
loss: 1.014944076538086,grad_norm: 0.99999981035123, iteration: 24665
loss: 1.0483123064041138,grad_norm: 0.9572809764291355, iteration: 24666
loss: 1.0339174270629883,grad_norm: 0.9999994161245169, iteration: 24667
loss: 0.9966320395469666,grad_norm: 0.9999989908888706, iteration: 24668
loss: 1.031982660293579,grad_norm: 0.9999991703610994, iteration: 24669
loss: 1.0016562938690186,grad_norm: 0.9003970384527268, iteration: 24670
loss: 1.0069118738174438,grad_norm: 0.9999997155445558, iteration: 24671
loss: 1.0554109811782837,grad_norm: 0.999999481234798, iteration: 24672
loss: 1.0226380825042725,grad_norm: 0.9460611749154886, iteration: 24673
loss: 0.9673873782157898,grad_norm: 0.9999990451076092, iteration: 24674
loss: 1.0177618265151978,grad_norm: 0.9999989662683878, iteration: 24675
loss: 0.9965294003486633,grad_norm: 0.8779023732323719, iteration: 24676
loss: 1.0135293006896973,grad_norm: 0.99999912388071, iteration: 24677
loss: 1.0135595798492432,grad_norm: 0.9999991049260485, iteration: 24678
loss: 0.9758001565933228,grad_norm: 0.999999229474949, iteration: 24679
loss: 1.0813080072402954,grad_norm: 0.999999302516209, iteration: 24680
loss: 0.9661706686019897,grad_norm: 0.9785851652499734, iteration: 24681
loss: 0.98805171251297,grad_norm: 0.9999993872627241, iteration: 24682
loss: 1.0188699960708618,grad_norm: 0.9999994116492121, iteration: 24683
loss: 0.9645819664001465,grad_norm: 0.9999990618183776, iteration: 24684
loss: 1.023573637008667,grad_norm: 0.8262220461411363, iteration: 24685
loss: 1.0332014560699463,grad_norm: 0.9999990763796293, iteration: 24686
loss: 1.030411958694458,grad_norm: 0.9999994476835441, iteration: 24687
loss: 0.9932606220245361,grad_norm: 0.9999990903545044, iteration: 24688
loss: 0.9835965037345886,grad_norm: 0.9640678025482096, iteration: 24689
loss: 0.9984913468360901,grad_norm: 0.9999993429985162, iteration: 24690
loss: 1.0368213653564453,grad_norm: 0.9999991346650117, iteration: 24691
loss: 1.092218041419983,grad_norm: 0.9999993664068489, iteration: 24692
loss: 0.9683945775032043,grad_norm: 0.8901766448713835, iteration: 24693
loss: 1.0307875871658325,grad_norm: 0.9999992800032987, iteration: 24694
loss: 1.0075280666351318,grad_norm: 0.9999990381873016, iteration: 24695
loss: 1.0367661714553833,grad_norm: 0.9999994963323774, iteration: 24696
loss: 1.015855312347412,grad_norm: 0.9999990633987923, iteration: 24697
loss: 1.0063797235488892,grad_norm: 0.9999994421694699, iteration: 24698
loss: 0.976059079170227,grad_norm: 0.9284954249064008, iteration: 24699
loss: 1.0284489393234253,grad_norm: 0.9999996652303949, iteration: 24700
loss: 1.0137276649475098,grad_norm: 0.9999996837300562, iteration: 24701
loss: 1.067975401878357,grad_norm: 0.9999993710001255, iteration: 24702
loss: 1.025881290435791,grad_norm: 0.9999992356145349, iteration: 24703
loss: 0.9924873113632202,grad_norm: 0.9806150083263757, iteration: 24704
loss: 1.0025303363800049,grad_norm: 0.9843102604163235, iteration: 24705
loss: 1.0052701234817505,grad_norm: 0.9999992169850919, iteration: 24706
loss: 1.0191174745559692,grad_norm: 0.8462014725828294, iteration: 24707
loss: 1.0250484943389893,grad_norm: 0.9999990982844729, iteration: 24708
loss: 1.0187716484069824,grad_norm: 0.9973662675922175, iteration: 24709
loss: 1.0141081809997559,grad_norm: 0.9999989935876388, iteration: 24710
loss: 0.9969621896743774,grad_norm: 0.9999989670224794, iteration: 24711
loss: 0.962307870388031,grad_norm: 0.9999989441888694, iteration: 24712
loss: 1.015132188796997,grad_norm: 0.923831681987576, iteration: 24713
loss: 0.9989367723464966,grad_norm: 0.9999996375931943, iteration: 24714
loss: 0.9885998964309692,grad_norm: 0.9381407514499607, iteration: 24715
loss: 1.0488933324813843,grad_norm: 0.9999992320179601, iteration: 24716
loss: 0.978482723236084,grad_norm: 0.9999994036590808, iteration: 24717
loss: 1.0313605070114136,grad_norm: 0.8305365290323817, iteration: 24718
loss: 0.9857271313667297,grad_norm: 0.9243772817336718, iteration: 24719
loss: 0.9910655617713928,grad_norm: 0.9999992563218715, iteration: 24720
loss: 1.0396286249160767,grad_norm: 0.9999996844960404, iteration: 24721
loss: 1.044769048690796,grad_norm: 0.9214136365767083, iteration: 24722
loss: 0.9841427206993103,grad_norm: 0.9999990627290762, iteration: 24723
loss: 1.017317533493042,grad_norm: 0.9999991253974084, iteration: 24724
loss: 1.0703070163726807,grad_norm: 0.9999992935253642, iteration: 24725
loss: 1.052059292793274,grad_norm: 0.9999996291425273, iteration: 24726
loss: 1.0212440490722656,grad_norm: 0.9048662574932455, iteration: 24727
loss: 1.0308866500854492,grad_norm: 0.9999991975046922, iteration: 24728
loss: 1.0420968532562256,grad_norm: 0.9999990406427942, iteration: 24729
loss: 1.0047365427017212,grad_norm: 0.9999989859364885, iteration: 24730
loss: 1.04891037940979,grad_norm: 0.9999993151896049, iteration: 24731
loss: 1.0199774503707886,grad_norm: 0.9999990890701629, iteration: 24732
loss: 1.0365370512008667,grad_norm: 0.9816573689516745, iteration: 24733
loss: 1.022144079208374,grad_norm: 0.9999996465835338, iteration: 24734
loss: 1.0546927452087402,grad_norm: 0.9166852769836388, iteration: 24735
loss: 1.0213398933410645,grad_norm: 0.9999991850489429, iteration: 24736
loss: 1.0249959230422974,grad_norm: 0.9999991920322219, iteration: 24737
loss: 1.0171809196472168,grad_norm: 0.9999991586027144, iteration: 24738
loss: 1.0077202320098877,grad_norm: 0.8887989829788937, iteration: 24739
loss: 1.020280361175537,grad_norm: 0.9999990612817728, iteration: 24740
loss: 1.023895263671875,grad_norm: 0.9999992475227067, iteration: 24741
loss: 1.0917423963546753,grad_norm: 0.9999993673840374, iteration: 24742
loss: 0.9988180994987488,grad_norm: 0.8809114370870014, iteration: 24743
loss: 0.9939391016960144,grad_norm: 0.9999993011885384, iteration: 24744
loss: 1.0208605527877808,grad_norm: 0.9999991176173451, iteration: 24745
loss: 1.0346519947052002,grad_norm: 0.9999991413870037, iteration: 24746
loss: 1.0247968435287476,grad_norm: 0.8744949861677048, iteration: 24747
loss: 1.0047261714935303,grad_norm: 0.9999990063285877, iteration: 24748
loss: 1.1149883270263672,grad_norm: 0.9999993553037796, iteration: 24749
loss: 0.9962638020515442,grad_norm: 0.9999992201545421, iteration: 24750
loss: 1.0402703285217285,grad_norm: 0.9999994155708821, iteration: 24751
loss: 1.0127935409545898,grad_norm: 0.9999992287034541, iteration: 24752
loss: 1.017087697982788,grad_norm: 0.9999991419304808, iteration: 24753
loss: 1.0605708360671997,grad_norm: 0.9999992769011976, iteration: 24754
loss: 1.013105034828186,grad_norm: 0.9999996323706627, iteration: 24755
loss: 1.014504075050354,grad_norm: 0.9999991119839765, iteration: 24756
loss: 1.0310842990875244,grad_norm: 0.9999992000529973, iteration: 24757
loss: 1.0259206295013428,grad_norm: 0.9417267965988414, iteration: 24758
loss: 1.0382583141326904,grad_norm: 0.999999287870304, iteration: 24759
loss: 0.9866917729377747,grad_norm: 0.999999157284119, iteration: 24760
loss: 1.0171213150024414,grad_norm: 0.8337705725684312, iteration: 24761
loss: 1.0527293682098389,grad_norm: 0.829029521927353, iteration: 24762
loss: 0.9691992402076721,grad_norm: 0.9999990225435326, iteration: 24763
loss: 0.9981817603111267,grad_norm: 0.9999991220758914, iteration: 24764
loss: 0.9905307292938232,grad_norm: 0.9999993435242572, iteration: 24765
loss: 1.0249930620193481,grad_norm: 0.9989650229837657, iteration: 24766
loss: 1.0020456314086914,grad_norm: 0.9999991115453385, iteration: 24767
loss: 0.998100221157074,grad_norm: 0.9653256081089215, iteration: 24768
loss: 0.9893648028373718,grad_norm: 0.9999992544320582, iteration: 24769
loss: 1.0229790210723877,grad_norm: 0.9999994121747596, iteration: 24770
loss: 1.0428788661956787,grad_norm: 0.9999992103642131, iteration: 24771
loss: 1.0091849565505981,grad_norm: 0.999999079828246, iteration: 24772
loss: 0.9930709600448608,grad_norm: 0.7459169602442789, iteration: 24773
loss: 0.9723721742630005,grad_norm: 0.9524291624278269, iteration: 24774
loss: 1.0138412714004517,grad_norm: 0.9248815058914484, iteration: 24775
loss: 1.0191901922225952,grad_norm: 0.9999991645606018, iteration: 24776
loss: 1.0368820428848267,grad_norm: 0.9999996668099137, iteration: 24777
loss: 1.0401699542999268,grad_norm: 0.9999990740609442, iteration: 24778
loss: 1.0225192308425903,grad_norm: 0.8975602496883845, iteration: 24779
loss: 1.011906385421753,grad_norm: 0.9527850273091533, iteration: 24780
loss: 0.9916141629219055,grad_norm: 0.9911566076775081, iteration: 24781
loss: 1.039689302444458,grad_norm: 0.9999993106777277, iteration: 24782
loss: 1.0152833461761475,grad_norm: 0.9614388756069133, iteration: 24783
loss: 1.0014278888702393,grad_norm: 0.999999337363901, iteration: 24784
loss: 1.0097332000732422,grad_norm: 0.9999992644571569, iteration: 24785
loss: 1.0897572040557861,grad_norm: 0.99999950185245, iteration: 24786
loss: 0.9921992421150208,grad_norm: 0.999999136272791, iteration: 24787
loss: 1.0475751161575317,grad_norm: 0.9999992920378967, iteration: 24788
loss: 1.0160026550292969,grad_norm: 0.999999208618311, iteration: 24789
loss: 1.0431387424468994,grad_norm: 0.9999995708020668, iteration: 24790
loss: 0.9820016622543335,grad_norm: 0.999999152003119, iteration: 24791
loss: 0.9946283102035522,grad_norm: 0.8623813257718103, iteration: 24792
loss: 0.9446877241134644,grad_norm: 0.9376551581090167, iteration: 24793
loss: 1.0028542280197144,grad_norm: 0.9367418409765802, iteration: 24794
loss: 1.0128369331359863,grad_norm: 0.8450839932178172, iteration: 24795
loss: 1.0084489583969116,grad_norm: 0.9132436875802341, iteration: 24796
loss: 1.0651962757110596,grad_norm: 0.9999999384843127, iteration: 24797
loss: 0.971871018409729,grad_norm: 0.8848690440915729, iteration: 24798
loss: 1.0005238056182861,grad_norm: 0.9999992763503801, iteration: 24799
loss: 1.0046324729919434,grad_norm: 0.9999991618139354, iteration: 24800
loss: 1.0351241827011108,grad_norm: 0.9999991309151888, iteration: 24801
loss: 0.9967415928840637,grad_norm: 0.9999993177790357, iteration: 24802
loss: 1.0034795999526978,grad_norm: 0.9999990838238907, iteration: 24803
loss: 0.9860382676124573,grad_norm: 0.9999990126043412, iteration: 24804
loss: 0.9981951117515564,grad_norm: 0.9933746814191587, iteration: 24805
loss: 0.9866558909416199,grad_norm: 0.9999992400059696, iteration: 24806
loss: 1.0092676877975464,grad_norm: 0.88306059282941, iteration: 24807
loss: 0.9725363254547119,grad_norm: 0.8357178021261131, iteration: 24808
loss: 1.0389456748962402,grad_norm: 0.9999996192328968, iteration: 24809
loss: 1.0223188400268555,grad_norm: 0.9999990119364814, iteration: 24810
loss: 0.9881651401519775,grad_norm: 0.9999990591130434, iteration: 24811
loss: 1.0170791149139404,grad_norm: 0.9999992718497974, iteration: 24812
loss: 0.9953736662864685,grad_norm: 0.9454290843147513, iteration: 24813
loss: 1.0457290410995483,grad_norm: 0.9999997816638873, iteration: 24814
loss: 1.0283352136611938,grad_norm: 0.9846579992745887, iteration: 24815
loss: 1.0268670320510864,grad_norm: 0.9999990667821892, iteration: 24816
loss: 0.9689531922340393,grad_norm: 0.9999992308861583, iteration: 24817
loss: 1.0069876909255981,grad_norm: 0.9999991182907342, iteration: 24818
loss: 1.0060943365097046,grad_norm: 0.9999993161562742, iteration: 24819
loss: 1.0179811716079712,grad_norm: 0.9999992074276387, iteration: 24820
loss: 0.9798077344894409,grad_norm: 0.9391790981762685, iteration: 24821
loss: 1.0533660650253296,grad_norm: 0.999999463198374, iteration: 24822
loss: 1.0429637432098389,grad_norm: 0.9936112043012141, iteration: 24823
loss: 0.9955211877822876,grad_norm: 0.9483730173898598, iteration: 24824
loss: 0.9572082161903381,grad_norm: 0.9999991744058307, iteration: 24825
loss: 1.0540722608566284,grad_norm: 0.9999992223646175, iteration: 24826
loss: 1.025266408920288,grad_norm: 0.9999991870590272, iteration: 24827
loss: 1.009313702583313,grad_norm: 0.9999990867658524, iteration: 24828
loss: 1.0056132078170776,grad_norm: 0.9999990340416951, iteration: 24829
loss: 0.9975491762161255,grad_norm: 0.9921452649756167, iteration: 24830
loss: 0.9915869832038879,grad_norm: 0.9999991833796715, iteration: 24831
loss: 1.0153611898422241,grad_norm: 0.999999079133159, iteration: 24832
loss: 0.9560016393661499,grad_norm: 0.9999990611244174, iteration: 24833
loss: 0.9941995143890381,grad_norm: 0.9413675259840469, iteration: 24834
loss: 1.009214162826538,grad_norm: 0.8414295798340165, iteration: 24835
loss: 1.0037473440170288,grad_norm: 0.99999922215406, iteration: 24836
loss: 1.0551546812057495,grad_norm: 0.9476879267514556, iteration: 24837
loss: 1.0656371116638184,grad_norm: 0.8917913017576716, iteration: 24838
loss: 1.0207417011260986,grad_norm: 0.9999990038068604, iteration: 24839
loss: 0.9911884069442749,grad_norm: 0.9999990342470814, iteration: 24840
loss: 1.0080591440200806,grad_norm: 0.9999991228840516, iteration: 24841
loss: 1.0406298637390137,grad_norm: 0.9388238387123151, iteration: 24842
loss: 1.0105124711990356,grad_norm: 0.8594482094778446, iteration: 24843
loss: 1.038835883140564,grad_norm: 0.9661623196770488, iteration: 24844
loss: 1.029245376586914,grad_norm: 0.9999992025423885, iteration: 24845
loss: 0.9586248397827148,grad_norm: 0.9095668113514991, iteration: 24846
loss: 1.0212788581848145,grad_norm: 0.9999991433410733, iteration: 24847
loss: 1.0143945217132568,grad_norm: 0.9999996168532146, iteration: 24848
loss: 1.043563723564148,grad_norm: 0.9999991019154968, iteration: 24849
loss: 1.0013155937194824,grad_norm: 0.9415909597109134, iteration: 24850
loss: 1.0050034523010254,grad_norm: 0.9999990926362214, iteration: 24851
loss: 1.0288337469100952,grad_norm: 0.8452035786520663, iteration: 24852
loss: 1.0181759595870972,grad_norm: 0.9050948895921481, iteration: 24853
loss: 0.9969446659088135,grad_norm: 0.9999993705750314, iteration: 24854
loss: 1.0324130058288574,grad_norm: 0.9999996234690102, iteration: 24855
loss: 1.033112645149231,grad_norm: 0.9999998616575566, iteration: 24856
loss: 1.0134570598602295,grad_norm: 0.9842092977866979, iteration: 24857
loss: 1.0013352632522583,grad_norm: 0.9961575366745422, iteration: 24858
loss: 1.0016032457351685,grad_norm: 0.9999991198504109, iteration: 24859
loss: 0.9753815531730652,grad_norm: 0.8931863921557261, iteration: 24860
loss: 1.0400267839431763,grad_norm: 0.9238662657605011, iteration: 24861
loss: 0.9688481092453003,grad_norm: 0.9999991291854983, iteration: 24862
loss: 0.9965064525604248,grad_norm: 0.8892799369609765, iteration: 24863
loss: 1.025994896888733,grad_norm: 0.9999996409983083, iteration: 24864
loss: 1.0329784154891968,grad_norm: 0.9999991192003244, iteration: 24865
loss: 1.016912817955017,grad_norm: 0.9999993352731682, iteration: 24866
loss: 1.040177345275879,grad_norm: 0.9877370357744556, iteration: 24867
loss: 0.9599653482437134,grad_norm: 0.9999990375042412, iteration: 24868
loss: 1.020167589187622,grad_norm: 0.9999993014091085, iteration: 24869
loss: 1.010615348815918,grad_norm: 0.9999990392458125, iteration: 24870
loss: 1.0354204177856445,grad_norm: 0.999999612605724, iteration: 24871
loss: 0.999360978603363,grad_norm: 0.9999991939025131, iteration: 24872
loss: 1.1191486120224,grad_norm: 0.9999997937313265, iteration: 24873
loss: 1.0396522283554077,grad_norm: 0.9999998160091678, iteration: 24874
loss: 1.005476713180542,grad_norm: 0.981786897555047, iteration: 24875
loss: 1.0722450017929077,grad_norm: 0.9999994302404338, iteration: 24876
loss: 1.0240126848220825,grad_norm: 0.958123651983878, iteration: 24877
loss: 0.9892576336860657,grad_norm: 0.9120715527546808, iteration: 24878
loss: 1.0184766054153442,grad_norm: 0.9999995627044111, iteration: 24879
loss: 1.0176959037780762,grad_norm: 0.9999991832370024, iteration: 24880
loss: 0.9789947271347046,grad_norm: 0.9767215490951793, iteration: 24881
loss: 0.9846993088722229,grad_norm: 0.9999990780804737, iteration: 24882
loss: 0.9982374310493469,grad_norm: 0.9999991070083311, iteration: 24883
loss: 0.9811227917671204,grad_norm: 0.9822872235865051, iteration: 24884
loss: 1.1007746458053589,grad_norm: 0.9999994040977971, iteration: 24885
loss: 1.0059423446655273,grad_norm: 0.9150471430799773, iteration: 24886
loss: 1.0095237493515015,grad_norm: 0.9999990653040889, iteration: 24887
loss: 1.029374122619629,grad_norm: 0.9999991205015516, iteration: 24888
loss: 1.0697424411773682,grad_norm: 0.9999996274146494, iteration: 24889
loss: 0.9949518442153931,grad_norm: 0.9999991365646711, iteration: 24890
loss: 1.0413200855255127,grad_norm: 0.9999992424799319, iteration: 24891
loss: 1.0119385719299316,grad_norm: 0.9999991466799028, iteration: 24892
loss: 1.008478045463562,grad_norm: 0.9999992525008888, iteration: 24893
loss: 1.0075825452804565,grad_norm: 0.9254831626809402, iteration: 24894
loss: 1.0228947401046753,grad_norm: 0.9999991215811977, iteration: 24895
loss: 1.089516282081604,grad_norm: 0.9999995835374134, iteration: 24896
loss: 1.0044856071472168,grad_norm: 0.9588208311791799, iteration: 24897
loss: 1.0257967710494995,grad_norm: 0.9999994067263447, iteration: 24898
loss: 1.0073330402374268,grad_norm: 0.9999992041658965, iteration: 24899
loss: 1.0313050746917725,grad_norm: 0.9999990448288314, iteration: 24900
loss: 1.0541688203811646,grad_norm: 0.9999991855751927, iteration: 24901
loss: 1.0097565650939941,grad_norm: 0.9999992384823304, iteration: 24902
loss: 1.0059523582458496,grad_norm: 0.9927764823168103, iteration: 24903
loss: 0.9982264041900635,grad_norm: 0.949134391632187, iteration: 24904
loss: 1.001350998878479,grad_norm: 0.8349280445212802, iteration: 24905
loss: 1.0431394577026367,grad_norm: 0.9999991112424882, iteration: 24906
loss: 1.0317281484603882,grad_norm: 0.9781175219265241, iteration: 24907
loss: 1.0623137950897217,grad_norm: 0.999999172939696, iteration: 24908
loss: 0.9803311228752136,grad_norm: 0.99957236465504, iteration: 24909
loss: 1.0456631183624268,grad_norm: 0.9999993325349775, iteration: 24910
loss: 1.049578309059143,grad_norm: 0.9999995423555987, iteration: 24911
loss: 1.0018398761749268,grad_norm: 0.9106991127059719, iteration: 24912
loss: 1.0221905708312988,grad_norm: 0.935184664241211, iteration: 24913
loss: 0.9931932091712952,grad_norm: 0.853289152198521, iteration: 24914
loss: 1.111641526222229,grad_norm: 0.9999998782246633, iteration: 24915
loss: 0.9973947405815125,grad_norm: 0.9149111592694877, iteration: 24916
loss: 1.0021262168884277,grad_norm: 0.9766839395236989, iteration: 24917
loss: 1.0325783491134644,grad_norm: 0.9999990870614632, iteration: 24918
loss: 1.0232107639312744,grad_norm: 0.9999992122664529, iteration: 24919
loss: 1.004643201828003,grad_norm: 0.9999992595494764, iteration: 24920
loss: 1.014620065689087,grad_norm: 0.9999991146725977, iteration: 24921
loss: 0.9865959286689758,grad_norm: 0.8996827013173385, iteration: 24922
loss: 1.0068650245666504,grad_norm: 0.99999936489919, iteration: 24923
loss: 1.0272319316864014,grad_norm: 0.999999136574739, iteration: 24924
loss: 0.9967473745346069,grad_norm: 0.9999991138443464, iteration: 24925
loss: 0.9938750863075256,grad_norm: 0.9921819639073891, iteration: 24926
loss: 1.0700678825378418,grad_norm: 0.9999997926421051, iteration: 24927
loss: 1.0168883800506592,grad_norm: 0.9999990033651374, iteration: 24928
loss: 0.9786482453346252,grad_norm: 0.9999991257589228, iteration: 24929
loss: 1.1089468002319336,grad_norm: 0.999999858937158, iteration: 24930
loss: 1.0360164642333984,grad_norm: 0.9999990313570027, iteration: 24931
loss: 1.019600510597229,grad_norm: 0.9873958244642886, iteration: 24932
loss: 1.0344144105911255,grad_norm: 0.9999991516165585, iteration: 24933
loss: 0.9834668040275574,grad_norm: 0.9999995321834863, iteration: 24934
loss: 1.002931833267212,grad_norm: 0.9999994661867829, iteration: 24935
loss: 1.0044211149215698,grad_norm: 0.9999991476617247, iteration: 24936
loss: 1.0429590940475464,grad_norm: 0.9999991154306231, iteration: 24937
loss: 1.0122789144515991,grad_norm: 0.9968157233088668, iteration: 24938
loss: 1.0385105609893799,grad_norm: 0.9999994740902846, iteration: 24939
loss: 1.0590745210647583,grad_norm: 0.9999992689440523, iteration: 24940
loss: 1.0624324083328247,grad_norm: 0.9999994339273607, iteration: 24941
loss: 0.968103289604187,grad_norm: 0.9462049840269643, iteration: 24942
loss: 1.0049054622650146,grad_norm: 0.9999992115775119, iteration: 24943
loss: 1.0811768770217896,grad_norm: 0.9999995407571779, iteration: 24944
loss: 1.016074299812317,grad_norm: 0.9136179600807045, iteration: 24945
loss: 1.0205061435699463,grad_norm: 0.999999685669899, iteration: 24946
loss: 1.027695894241333,grad_norm: 0.9999992014398146, iteration: 24947
loss: 1.0106979608535767,grad_norm: 0.99999939297865, iteration: 24948
loss: 1.0348373651504517,grad_norm: 0.9999992220981306, iteration: 24949
loss: 0.9912994503974915,grad_norm: 0.9068496346430532, iteration: 24950
loss: 1.0255995988845825,grad_norm: 0.9999995486826352, iteration: 24951
loss: 1.02656888961792,grad_norm: 0.9999991245447754, iteration: 24952
loss: 1.012733817100525,grad_norm: 0.9999992780653525, iteration: 24953
loss: 1.0430841445922852,grad_norm: 0.9999991899640173, iteration: 24954
loss: 1.0181527137756348,grad_norm: 0.9771468470603585, iteration: 24955
loss: 1.0638850927352905,grad_norm: 0.9999992229781746, iteration: 24956
loss: 0.998119056224823,grad_norm: 0.8875924293128654, iteration: 24957
loss: 1.0108333826065063,grad_norm: 0.9028779008880072, iteration: 24958
loss: 0.9866620302200317,grad_norm: 0.7990870744337619, iteration: 24959
loss: 1.0261882543563843,grad_norm: 0.9999994659327415, iteration: 24960
loss: 1.1324480772018433,grad_norm: 0.9999997888132597, iteration: 24961
loss: 1.0273871421813965,grad_norm: 0.9999993497920444, iteration: 24962
loss: 1.0142488479614258,grad_norm: 0.833181702490168, iteration: 24963
loss: 1.0308635234832764,grad_norm: 0.9999993611859684, iteration: 24964
loss: 0.9873135089874268,grad_norm: 0.9999990984334252, iteration: 24965
loss: 1.0317472219467163,grad_norm: 0.999999514974506, iteration: 24966
loss: 1.0086894035339355,grad_norm: 0.9999990820195019, iteration: 24967
loss: 0.9537441730499268,grad_norm: 0.9999994099016591, iteration: 24968
loss: 1.0059304237365723,grad_norm: 0.8671762460802453, iteration: 24969
loss: 1.0068665742874146,grad_norm: 0.9999991492079102, iteration: 24970
loss: 1.0395667552947998,grad_norm: 0.9999992311047999, iteration: 24971
loss: 0.980440080165863,grad_norm: 0.9999994737369691, iteration: 24972
loss: 1.0018047094345093,grad_norm: 0.905662555913687, iteration: 24973
loss: 1.01755952835083,grad_norm: 0.9999993194984756, iteration: 24974
loss: 0.9815465807914734,grad_norm: 0.9999990563535248, iteration: 24975
loss: 1.022716999053955,grad_norm: 0.9999990666054912, iteration: 24976
loss: 1.0052766799926758,grad_norm: 0.9763071729674586, iteration: 24977
loss: 1.0632563829421997,grad_norm: 0.9999998707180306, iteration: 24978
loss: 1.0537537336349487,grad_norm: 0.9999996941467248, iteration: 24979
loss: 0.9977148175239563,grad_norm: 0.9193426381495104, iteration: 24980
loss: 1.049099087715149,grad_norm: 0.9999993065473367, iteration: 24981
loss: 1.0314034223556519,grad_norm: 0.9999991525002726, iteration: 24982
loss: 1.0038701295852661,grad_norm: 0.9999990539993655, iteration: 24983
loss: 1.0146188735961914,grad_norm: 0.9999992792682287, iteration: 24984
loss: 0.9862201809883118,grad_norm: 0.9999990775375124, iteration: 24985
loss: 1.0012892484664917,grad_norm: 0.9999990112465846, iteration: 24986
loss: 1.0216422080993652,grad_norm: 0.9810199586640359, iteration: 24987
loss: 1.0465686321258545,grad_norm: 0.9999993427999008, iteration: 24988
loss: 1.0414067506790161,grad_norm: 0.9999991975798178, iteration: 24989
loss: 1.0028263330459595,grad_norm: 0.9999991863886487, iteration: 24990
loss: 1.0326513051986694,grad_norm: 0.9999992060638326, iteration: 24991
loss: 1.006626844406128,grad_norm: 0.9999993198681069, iteration: 24992
loss: 1.0557396411895752,grad_norm: 0.9370878234894435, iteration: 24993
loss: 1.0425697565078735,grad_norm: 0.9999992662256775, iteration: 24994
loss: 0.9660168290138245,grad_norm: 0.9877960827323158, iteration: 24995
loss: 1.001273512840271,grad_norm: 0.9999991828292956, iteration: 24996
loss: 1.0025869607925415,grad_norm: 0.9999992739615913, iteration: 24997
loss: 0.9952154159545898,grad_norm: 0.9221268486813674, iteration: 24998
loss: 0.9888168573379517,grad_norm: 0.9999992268232942, iteration: 24999
loss: 1.0252453088760376,grad_norm: 0.9324681934634697, iteration: 25000
loss: 1.0266810655593872,grad_norm: 0.9999993093663423, iteration: 25001
loss: 1.0244203805923462,grad_norm: 0.9866382999727242, iteration: 25002
loss: 1.0752052068710327,grad_norm: 0.9999993125087915, iteration: 25003
loss: 1.044369101524353,grad_norm: 0.9775840887291726, iteration: 25004
loss: 1.0280091762542725,grad_norm: 0.9999992257419659, iteration: 25005
loss: 1.009910225868225,grad_norm: 0.9999991745898785, iteration: 25006
loss: 1.0592174530029297,grad_norm: 0.999999830912348, iteration: 25007
loss: 1.013366937637329,grad_norm: 0.9999991909565699, iteration: 25008
loss: 1.0081102848052979,grad_norm: 0.999999194270792, iteration: 25009
loss: 1.0422130823135376,grad_norm: 0.9999992832670472, iteration: 25010
loss: 1.0313414335250854,grad_norm: 0.99999950544967, iteration: 25011
loss: 1.005462646484375,grad_norm: 0.9999990832085032, iteration: 25012
loss: 1.0329104661941528,grad_norm: 0.9205176095590326, iteration: 25013
loss: 0.9752887487411499,grad_norm: 0.999999068936191, iteration: 25014
loss: 1.042143702507019,grad_norm: 0.9999992308977235, iteration: 25015
loss: 0.9839071035385132,grad_norm: 0.9999991030968965, iteration: 25016
loss: 0.9619923830032349,grad_norm: 0.9999990906619906, iteration: 25017
loss: 1.0251673460006714,grad_norm: 0.9999672466865173, iteration: 25018
loss: 1.0422974824905396,grad_norm: 0.9244859871903005, iteration: 25019
loss: 1.0204945802688599,grad_norm: 0.8732881137848648, iteration: 25020
loss: 1.0718554258346558,grad_norm: 0.9042836592733334, iteration: 25021
loss: 1.0297355651855469,grad_norm: 0.9999990143045956, iteration: 25022
loss: 1.0289499759674072,grad_norm: 0.9999993959223776, iteration: 25023
loss: 1.1064969301223755,grad_norm: 0.999999824100315, iteration: 25024
loss: 1.0058618783950806,grad_norm: 0.9999991262475109, iteration: 25025
loss: 0.9863587021827698,grad_norm: 0.9999996701207414, iteration: 25026
loss: 1.0038870573043823,grad_norm: 0.9999991260869667, iteration: 25027
loss: 0.9798082113265991,grad_norm: 0.9495360581607153, iteration: 25028
loss: 1.0260392427444458,grad_norm: 0.8580651932493315, iteration: 25029
loss: 0.9749892950057983,grad_norm: 0.9999994247913043, iteration: 25030
loss: 1.0296094417572021,grad_norm: 0.9999993476586538, iteration: 25031
loss: 1.0111960172653198,grad_norm: 0.9615523320179363, iteration: 25032
loss: 0.9709376096725464,grad_norm: 0.9999992298590138, iteration: 25033
loss: 0.9734172821044922,grad_norm: 0.9999992868795894, iteration: 25034
loss: 1.0251966714859009,grad_norm: 0.9660000396360832, iteration: 25035
loss: 1.0241862535476685,grad_norm: 0.9999991343566252, iteration: 25036
loss: 1.021805763244629,grad_norm: 0.9135811641307512, iteration: 25037
loss: 1.0350420475006104,grad_norm: 0.9134321886981328, iteration: 25038
loss: 1.0472368001937866,grad_norm: 0.9999992884711459, iteration: 25039
loss: 1.0502312183380127,grad_norm: 0.9999991812200594, iteration: 25040
loss: 1.0299416780471802,grad_norm: 0.9999992418947458, iteration: 25041
loss: 0.9720554947853088,grad_norm: 0.9837985106569509, iteration: 25042
loss: 1.1066482067108154,grad_norm: 0.999999190777694, iteration: 25043
loss: 1.0367776155471802,grad_norm: 0.9999991792801686, iteration: 25044
loss: 1.026023268699646,grad_norm: 0.9248228910722848, iteration: 25045
loss: 1.0568779706954956,grad_norm: 0.9999996217660986, iteration: 25046
loss: 1.0044102668762207,grad_norm: 0.9948252383848876, iteration: 25047
loss: 1.0335335731506348,grad_norm: 0.8452649032965703, iteration: 25048
loss: 0.9832435250282288,grad_norm: 0.9999991300203477, iteration: 25049
loss: 1.0263378620147705,grad_norm: 0.9999992875210452, iteration: 25050
loss: 1.0163177251815796,grad_norm: 0.9999994512336111, iteration: 25051
loss: 1.0182631015777588,grad_norm: 0.9999995288089044, iteration: 25052
loss: 0.9866817593574524,grad_norm: 0.9577189222842237, iteration: 25053
loss: 0.9825904965400696,grad_norm: 0.999998999957235, iteration: 25054
loss: 1.0242985486984253,grad_norm: 0.853451645498356, iteration: 25055
loss: 0.9793401956558228,grad_norm: 0.9999992148403187, iteration: 25056
loss: 0.9819650053977966,grad_norm: 0.8970673523993267, iteration: 25057
loss: 1.0036152601242065,grad_norm: 0.9999991652066764, iteration: 25058
loss: 1.0048832893371582,grad_norm: 0.9999990897986537, iteration: 25059
loss: 1.0303763151168823,grad_norm: 0.9999993462029785, iteration: 25060
loss: 0.9956472516059875,grad_norm: 0.9999991372931162, iteration: 25061
loss: 1.0194536447525024,grad_norm: 0.9999991630629614, iteration: 25062
loss: 1.0416247844696045,grad_norm: 0.9999994802794686, iteration: 25063
loss: 0.995703935623169,grad_norm: 0.9998991624471797, iteration: 25064
loss: 0.9949837923049927,grad_norm: 0.9919749738659835, iteration: 25065
loss: 1.0324400663375854,grad_norm: 0.9999991964007167, iteration: 25066
loss: 1.0594638586044312,grad_norm: 0.9999991442950601, iteration: 25067
loss: 1.0103529691696167,grad_norm: 0.9999993287245956, iteration: 25068
loss: 1.049134373664856,grad_norm: 0.999999276318344, iteration: 25069
loss: 1.0815280675888062,grad_norm: 0.9999994109922999, iteration: 25070
loss: 1.0227174758911133,grad_norm: 0.9999992853026045, iteration: 25071
loss: 1.0290247201919556,grad_norm: 0.9999992574360764, iteration: 25072
loss: 1.011678695678711,grad_norm: 0.995242210559607, iteration: 25073
loss: 0.9960300922393799,grad_norm: 0.8431933263327983, iteration: 25074
loss: 0.9912935495376587,grad_norm: 0.9999991588818298, iteration: 25075
loss: 1.0513015985488892,grad_norm: 0.9999997685108639, iteration: 25076
loss: 0.9979889392852783,grad_norm: 0.9923222887712856, iteration: 25077
loss: 0.9594931602478027,grad_norm: 0.9999992454699302, iteration: 25078
loss: 1.0222976207733154,grad_norm: 0.9999990541205002, iteration: 25079
loss: 0.9965099096298218,grad_norm: 0.9999990870930379, iteration: 25080
loss: 1.0376977920532227,grad_norm: 0.999999562876018, iteration: 25081
loss: 1.005685806274414,grad_norm: 0.9999991949998651, iteration: 25082
loss: 0.9779530167579651,grad_norm: 0.9545387309158021, iteration: 25083
loss: 1.0111926794052124,grad_norm: 0.9999990054990424, iteration: 25084
loss: 1.0372793674468994,grad_norm: 0.9999995776594139, iteration: 25085
loss: 1.0760772228240967,grad_norm: 0.9999991277856921, iteration: 25086
loss: 0.9777591824531555,grad_norm: 0.9999992872133525, iteration: 25087
loss: 1.0172022581100464,grad_norm: 0.9999992686551862, iteration: 25088
loss: 0.9780810475349426,grad_norm: 0.9999991582625477, iteration: 25089
loss: 1.0296926498413086,grad_norm: 0.9999990919404236, iteration: 25090
loss: 0.9900189638137817,grad_norm: 0.9999991371067746, iteration: 25091
loss: 1.0165801048278809,grad_norm: 0.9999991462366795, iteration: 25092
loss: 0.9789154529571533,grad_norm: 0.9999992086819495, iteration: 25093
loss: 1.0683143138885498,grad_norm: 0.9999997654736497, iteration: 25094
loss: 0.9997244477272034,grad_norm: 0.9956642059320595, iteration: 25095
loss: 1.02305269241333,grad_norm: 0.9999989934199779, iteration: 25096
loss: 1.053214192390442,grad_norm: 0.9999991458753524, iteration: 25097
loss: 1.0254487991333008,grad_norm: 0.9999990839041079, iteration: 25098
loss: 1.0205633640289307,grad_norm: 0.9999992309993135, iteration: 25099
loss: 1.0186282396316528,grad_norm: 0.9999991866978803, iteration: 25100
loss: 1.021146297454834,grad_norm: 0.9717570303392564, iteration: 25101
loss: 1.0128592252731323,grad_norm: 0.9999992396987663, iteration: 25102
loss: 1.0116238594055176,grad_norm: 0.9999991277097557, iteration: 25103
loss: 1.021939992904663,grad_norm: 0.9999990885747491, iteration: 25104
loss: 0.9971446394920349,grad_norm: 0.9999991187643927, iteration: 25105
loss: 1.0620148181915283,grad_norm: 0.9999993486775266, iteration: 25106
loss: 0.9913151264190674,grad_norm: 0.9999996008876435, iteration: 25107
loss: 1.022031545639038,grad_norm: 0.9999995563746095, iteration: 25108
loss: 0.9547885060310364,grad_norm: 0.9999993206684494, iteration: 25109
loss: 0.993365466594696,grad_norm: 0.9999997245230676, iteration: 25110
loss: 0.9951924085617065,grad_norm: 0.9999992420305709, iteration: 25111
loss: 0.9961457848548889,grad_norm: 0.9496266889926211, iteration: 25112
loss: 0.9904427528381348,grad_norm: 0.9999991341499161, iteration: 25113
loss: 1.0757211446762085,grad_norm: 0.9999995732349536, iteration: 25114
loss: 1.0179768800735474,grad_norm: 0.9157148583233263, iteration: 25115
loss: 1.0652434825897217,grad_norm: 0.9999994555618007, iteration: 25116
loss: 0.9861587285995483,grad_norm: 0.99999910894874, iteration: 25117
loss: 1.033604383468628,grad_norm: 0.9999996822486849, iteration: 25118
loss: 1.0185964107513428,grad_norm: 0.7824952807781251, iteration: 25119
loss: 1.018266201019287,grad_norm: 0.9999993823469664, iteration: 25120
loss: 0.9959856271743774,grad_norm: 0.9999991758844889, iteration: 25121
loss: 1.0368350744247437,grad_norm: 0.9999992755098217, iteration: 25122
loss: 0.9824638962745667,grad_norm: 0.9784915505681072, iteration: 25123
loss: 1.0560102462768555,grad_norm: 0.8650887042047968, iteration: 25124
loss: 1.0338884592056274,grad_norm: 0.9440550993837507, iteration: 25125
loss: 0.9994127154350281,grad_norm: 0.9999993599585455, iteration: 25126
loss: 1.032753825187683,grad_norm: 0.9999994929698, iteration: 25127
loss: 0.9822924137115479,grad_norm: 0.998286551933983, iteration: 25128
loss: 1.0053497552871704,grad_norm: 0.9933492079737828, iteration: 25129
loss: 1.003043293952942,grad_norm: 0.9999997150547608, iteration: 25130
loss: 0.9840908646583557,grad_norm: 0.9999995755220893, iteration: 25131
loss: 0.992396891117096,grad_norm: 0.9999992557887235, iteration: 25132
loss: 0.9746320247650146,grad_norm: 0.9999991212888674, iteration: 25133
loss: 1.0670275688171387,grad_norm: 0.999999275477722, iteration: 25134
loss: 1.039486289024353,grad_norm: 0.9962623370322132, iteration: 25135
loss: 1.0102988481521606,grad_norm: 0.9999993452991269, iteration: 25136
loss: 0.9939409494400024,grad_norm: 0.999999557035706, iteration: 25137
loss: 0.9639586210250854,grad_norm: 0.9999992710392956, iteration: 25138
loss: 1.0083810091018677,grad_norm: 0.9675978861141861, iteration: 25139
loss: 1.0201495885849,grad_norm: 0.962894394752141, iteration: 25140
loss: 0.9931100606918335,grad_norm: 0.9999993052084654, iteration: 25141
loss: 1.0505625009536743,grad_norm: 0.9999997903460475, iteration: 25142
loss: 1.0335755348205566,grad_norm: 0.9999990818243218, iteration: 25143
loss: 0.985106885433197,grad_norm: 0.9999991347072027, iteration: 25144
loss: 1.0353870391845703,grad_norm: 0.9930737911309302, iteration: 25145
loss: 1.0006035566329956,grad_norm: 0.8568784999602941, iteration: 25146
loss: 1.0863248109817505,grad_norm: 0.9999995876750211, iteration: 25147
loss: 1.024505376815796,grad_norm: 0.9999990341041286, iteration: 25148
loss: 1.0356947183609009,grad_norm: 0.9999990058490548, iteration: 25149
loss: 1.107120156288147,grad_norm: 0.9999994610502301, iteration: 25150
loss: 0.9984380602836609,grad_norm: 0.9999990703334017, iteration: 25151
loss: 0.9835478663444519,grad_norm: 0.9999991934525887, iteration: 25152
loss: 0.9845070242881775,grad_norm: 0.9065319520509427, iteration: 25153
loss: 1.0167739391326904,grad_norm: 0.9965005074155957, iteration: 25154
loss: 1.0308911800384521,grad_norm: 0.9999991690317811, iteration: 25155
loss: 1.0491036176681519,grad_norm: 0.9646636733717722, iteration: 25156
loss: 1.0159730911254883,grad_norm: 0.9328080136164649, iteration: 25157
loss: 1.0107783079147339,grad_norm: 0.999999462985694, iteration: 25158
loss: 1.0338529348373413,grad_norm: 0.9999994697879937, iteration: 25159
loss: 1.0058506727218628,grad_norm: 0.952347216555111, iteration: 25160
loss: 1.0863618850708008,grad_norm: 0.9999991167256919, iteration: 25161
loss: 1.0507848262786865,grad_norm: 0.999999448101207, iteration: 25162
loss: 1.0079785585403442,grad_norm: 0.999999692108671, iteration: 25163
loss: 1.035698413848877,grad_norm: 0.9004973671208117, iteration: 25164
loss: 1.044146180152893,grad_norm: 0.9999992885829001, iteration: 25165
loss: 1.055971384048462,grad_norm: 0.9999996462692662, iteration: 25166
loss: 1.0126595497131348,grad_norm: 0.9999990374979677, iteration: 25167
loss: 0.9876731634140015,grad_norm: 0.9999990526533834, iteration: 25168
loss: 1.050432801246643,grad_norm: 0.9999991341065572, iteration: 25169
loss: 0.9640937447547913,grad_norm: 0.9999991308744114, iteration: 25170
loss: 0.9807591438293457,grad_norm: 0.9999990207915144, iteration: 25171
loss: 1.0055432319641113,grad_norm: 0.9999989991807592, iteration: 25172
loss: 1.013425350189209,grad_norm: 0.9999996297059933, iteration: 25173
loss: 0.9969620108604431,grad_norm: 0.8699444980365735, iteration: 25174
loss: 0.9890637993812561,grad_norm: 0.9823868839050667, iteration: 25175
loss: 1.0062880516052246,grad_norm: 0.9410573410853363, iteration: 25176
loss: 1.0424857139587402,grad_norm: 0.9999991408426466, iteration: 25177
loss: 1.0816740989685059,grad_norm: 0.999999749940205, iteration: 25178
loss: 1.029153823852539,grad_norm: 0.8163658084866388, iteration: 25179
loss: 1.0135023593902588,grad_norm: 0.9999991936797717, iteration: 25180
loss: 0.9781561493873596,grad_norm: 0.9999990909547828, iteration: 25181
loss: 1.0000778436660767,grad_norm: 0.9999993313900474, iteration: 25182
loss: 1.0167605876922607,grad_norm: 0.9598463227949667, iteration: 25183
loss: 1.0235707759857178,grad_norm: 0.9999991992911733, iteration: 25184
loss: 1.0205647945404053,grad_norm: 0.9999994199629135, iteration: 25185
loss: 0.999707043170929,grad_norm: 0.9888437313453632, iteration: 25186
loss: 1.0053967237472534,grad_norm: 0.8380100549658747, iteration: 25187
loss: 0.9890902638435364,grad_norm: 0.9999991892570549, iteration: 25188
loss: 1.009503960609436,grad_norm: 0.9010194298845028, iteration: 25189
loss: 1.053148865699768,grad_norm: 0.9999990886358036, iteration: 25190
loss: 0.9938589334487915,grad_norm: 0.9999991442787196, iteration: 25191
loss: 1.0000900030136108,grad_norm: 0.9999989814795076, iteration: 25192
loss: 1.0128819942474365,grad_norm: 0.9510308093640801, iteration: 25193
loss: 0.999663233757019,grad_norm: 0.999999114958232, iteration: 25194
loss: 0.9782980680465698,grad_norm: 0.8119512297579244, iteration: 25195
loss: 1.1280590295791626,grad_norm: 0.9999997399407987, iteration: 25196
loss: 1.0003567934036255,grad_norm: 0.9999991568235672, iteration: 25197
loss: 0.9920498728752136,grad_norm: 0.999999309386015, iteration: 25198
loss: 0.9795018434524536,grad_norm: 0.9230254683730464, iteration: 25199
loss: 0.9936720132827759,grad_norm: 0.9714382091402101, iteration: 25200
loss: 1.0074065923690796,grad_norm: 0.9999994003956335, iteration: 25201
loss: 0.9936724305152893,grad_norm: 0.9999999078418357, iteration: 25202
loss: 1.0258020162582397,grad_norm: 0.9999992056862899, iteration: 25203
loss: 1.0137957334518433,grad_norm: 0.9999990419809088, iteration: 25204
loss: 0.9655577540397644,grad_norm: 0.9648033026099081, iteration: 25205
loss: 1.0148003101348877,grad_norm: 0.9999991932057599, iteration: 25206
loss: 0.9907342791557312,grad_norm: 0.9511607442226458, iteration: 25207
loss: 1.04345703125,grad_norm: 0.9999995778380141, iteration: 25208
loss: 1.0148941278457642,grad_norm: 0.8170454493699891, iteration: 25209
loss: 1.0487229824066162,grad_norm: 0.9999994479470904, iteration: 25210
loss: 1.0104355812072754,grad_norm: 0.9999992643496426, iteration: 25211
loss: 0.9951386451721191,grad_norm: 0.9999992729021284, iteration: 25212
loss: 1.0580461025238037,grad_norm: 0.9999989546197764, iteration: 25213
loss: 1.0282158851623535,grad_norm: 0.9999991715899552, iteration: 25214
loss: 1.0003609657287598,grad_norm: 0.9999990743128508, iteration: 25215
loss: 1.022647738456726,grad_norm: 0.9999994882649664, iteration: 25216
loss: 0.9888044595718384,grad_norm: 0.9999991846972465, iteration: 25217
loss: 1.0288212299346924,grad_norm: 0.9999992763558788, iteration: 25218
loss: 1.0100370645523071,grad_norm: 0.9999992887989361, iteration: 25219
loss: 1.0133140087127686,grad_norm: 0.8908811560022755, iteration: 25220
loss: 1.0328006744384766,grad_norm: 0.9999991391630434, iteration: 25221
loss: 0.9804105758666992,grad_norm: 0.9999993315881006, iteration: 25222
loss: 0.9800643920898438,grad_norm: 0.9999991708341259, iteration: 25223
loss: 0.9897955656051636,grad_norm: 0.8824405450067921, iteration: 25224
loss: 1.0168960094451904,grad_norm: 0.9999990377840422, iteration: 25225
loss: 1.0254136323928833,grad_norm: 0.9999992155841647, iteration: 25226
loss: 1.0081443786621094,grad_norm: 0.9619214498939966, iteration: 25227
loss: 0.9945198893547058,grad_norm: 0.9999990504783579, iteration: 25228
loss: 1.0228931903839111,grad_norm: 0.9022491299332309, iteration: 25229
loss: 0.9642626047134399,grad_norm: 0.9999990495913779, iteration: 25230
loss: 0.9907149076461792,grad_norm: 0.999999473913231, iteration: 25231
loss: 1.0150713920593262,grad_norm: 0.9831887444783292, iteration: 25232
loss: 1.043221116065979,grad_norm: 0.9999996106410735, iteration: 25233
loss: 1.0234194993972778,grad_norm: 0.9629701081063491, iteration: 25234
loss: 1.0455563068389893,grad_norm: 0.9999993825347512, iteration: 25235
loss: 1.0008678436279297,grad_norm: 0.9999989995665549, iteration: 25236
loss: 1.0460331439971924,grad_norm: 0.8137628587286787, iteration: 25237
loss: 1.004469633102417,grad_norm: 0.9999991424036283, iteration: 25238
loss: 1.023373007774353,grad_norm: 0.9999990969305581, iteration: 25239
loss: 1.0421555042266846,grad_norm: 0.9999991141906098, iteration: 25240
loss: 1.060265302658081,grad_norm: 0.9446544985944959, iteration: 25241
loss: 1.0064772367477417,grad_norm: 0.999999296272867, iteration: 25242
loss: 0.9967073202133179,grad_norm: 0.9537686681268075, iteration: 25243
loss: 1.0180262327194214,grad_norm: 0.9999990033550745, iteration: 25244
loss: 1.016198992729187,grad_norm: 0.9999995800612337, iteration: 25245
loss: 1.0119800567626953,grad_norm: 0.9999992710086373, iteration: 25246
loss: 1.0290660858154297,grad_norm: 0.8153774697748756, iteration: 25247
loss: 1.0161831378936768,grad_norm: 0.9999993814319328, iteration: 25248
loss: 0.9879536628723145,grad_norm: 0.9510597912620465, iteration: 25249
loss: 1.0150384902954102,grad_norm: 0.9999991322913235, iteration: 25250
loss: 1.0322235822677612,grad_norm: 0.9999989975406972, iteration: 25251
loss: 0.9953300952911377,grad_norm: 0.9966521762887329, iteration: 25252
loss: 1.0353212356567383,grad_norm: 0.9999995047239104, iteration: 25253
loss: 1.0516774654388428,grad_norm: 0.9854386402691369, iteration: 25254
loss: 1.0362179279327393,grad_norm: 0.9999990565443377, iteration: 25255
loss: 0.9995328187942505,grad_norm: 0.9999991078060327, iteration: 25256
loss: 1.000220775604248,grad_norm: 0.9999990643192215, iteration: 25257
loss: 1.00730562210083,grad_norm: 0.9999990548240633, iteration: 25258
loss: 0.9984250068664551,grad_norm: 0.9641792452938371, iteration: 25259
loss: 1.0108634233474731,grad_norm: 0.9644582375755039, iteration: 25260
loss: 0.989142894744873,grad_norm: 0.7805414641922409, iteration: 25261
loss: 0.9990456700325012,grad_norm: 0.9999992103333866, iteration: 25262
loss: 1.0283070802688599,grad_norm: 0.9005148170262068, iteration: 25263
loss: 1.0326961278915405,grad_norm: 0.9999995635245025, iteration: 25264
loss: 0.9862341284751892,grad_norm: 0.9999990441846436, iteration: 25265
loss: 1.036539912223816,grad_norm: 0.9999997508674318, iteration: 25266
loss: 1.0069466829299927,grad_norm: 0.9999993564063874, iteration: 25267
loss: 1.0216869115829468,grad_norm: 0.9999991100909374, iteration: 25268
loss: 1.0725866556167603,grad_norm: 0.9999990892212959, iteration: 25269
loss: 1.0769615173339844,grad_norm: 0.9999996679704227, iteration: 25270
loss: 1.018804907798767,grad_norm: 0.9999992307542623, iteration: 25271
loss: 0.9640052914619446,grad_norm: 0.999999187580912, iteration: 25272
loss: 1.048051357269287,grad_norm: 0.985805265401326, iteration: 25273
loss: 1.0308222770690918,grad_norm: 0.8855400400926847, iteration: 25274
loss: 1.0192545652389526,grad_norm: 0.9999999015665813, iteration: 25275
loss: 1.0183671712875366,grad_norm: 0.9383433124378794, iteration: 25276
loss: 1.0121997594833374,grad_norm: 0.9999991032063051, iteration: 25277
loss: 0.9652765393257141,grad_norm: 0.9999992297232814, iteration: 25278
loss: 0.9796375632286072,grad_norm: 0.9999992126687245, iteration: 25279
loss: 0.9945698380470276,grad_norm: 0.9999992305107682, iteration: 25280
loss: 0.9647030830383301,grad_norm: 0.9612550043062308, iteration: 25281
loss: 0.9951752424240112,grad_norm: 0.9999991668364256, iteration: 25282
loss: 1.0255316495895386,grad_norm: 0.8568965268324265, iteration: 25283
loss: 1.0181491374969482,grad_norm: 0.9999991344188198, iteration: 25284
loss: 1.0132379531860352,grad_norm: 0.9234738480524224, iteration: 25285
loss: 1.101430892944336,grad_norm: 0.9999996246975724, iteration: 25286
loss: 0.9653658270835876,grad_norm: 0.9999991389856067, iteration: 25287
loss: 1.0118101835250854,grad_norm: 0.9999990388475609, iteration: 25288
loss: 1.0392740964889526,grad_norm: 0.9999997536702789, iteration: 25289
loss: 1.0456463098526,grad_norm: 0.9999997620587168, iteration: 25290
loss: 0.9963468313217163,grad_norm: 0.9999992360405286, iteration: 25291
loss: 0.9975902438163757,grad_norm: 0.980159089836216, iteration: 25292
loss: 1.055586814880371,grad_norm: 0.9999993771284874, iteration: 25293
loss: 1.0604876279830933,grad_norm: 0.9999990769149787, iteration: 25294
loss: 0.9727956652641296,grad_norm: 0.9999992704924702, iteration: 25295
loss: 0.9843235015869141,grad_norm: 0.9999995869125791, iteration: 25296
loss: 1.1942050457000732,grad_norm: 0.9999998609453404, iteration: 25297
loss: 1.0384037494659424,grad_norm: 0.9999994443283569, iteration: 25298
loss: 1.0354368686676025,grad_norm: 0.9999997353955911, iteration: 25299
loss: 1.0136100053787231,grad_norm: 0.9686800558216101, iteration: 25300
loss: 0.9787764549255371,grad_norm: 0.999999167226404, iteration: 25301
loss: 1.0365029573440552,grad_norm: 0.9999993412532882, iteration: 25302
loss: 0.9939457774162292,grad_norm: 0.9999991223332764, iteration: 25303
loss: 1.0265614986419678,grad_norm: 0.9999992848553844, iteration: 25304
loss: 0.9951033592224121,grad_norm: 0.9999990256399601, iteration: 25305
loss: 1.0113052129745483,grad_norm: 0.8981775514206524, iteration: 25306
loss: 0.9952707290649414,grad_norm: 0.9999992613994957, iteration: 25307
loss: 0.9946273565292358,grad_norm: 0.9999990852119682, iteration: 25308
loss: 1.0264759063720703,grad_norm: 0.9999996937611748, iteration: 25309
loss: 1.030879020690918,grad_norm: 0.9999990432995285, iteration: 25310
loss: 1.0555588006973267,grad_norm: 0.9999992702447162, iteration: 25311
loss: 1.0208427906036377,grad_norm: 0.99999922609185, iteration: 25312
loss: 0.9971074461936951,grad_norm: 0.937078684243081, iteration: 25313
loss: 1.0152437686920166,grad_norm: 0.9999990683398099, iteration: 25314
loss: 0.9987012147903442,grad_norm: 0.9999992963498661, iteration: 25315
loss: 1.0648877620697021,grad_norm: 0.9999996754058167, iteration: 25316
loss: 0.9496840238571167,grad_norm: 0.9999990953827208, iteration: 25317
loss: 1.1025419235229492,grad_norm: 0.9999996807781328, iteration: 25318
loss: 1.0244524478912354,grad_norm: 0.8324962368207381, iteration: 25319
loss: 1.0317835807800293,grad_norm: 0.9999995958975008, iteration: 25320
loss: 1.0614687204360962,grad_norm: 0.9999995061079173, iteration: 25321
loss: 0.978981614112854,grad_norm: 0.9291880743669647, iteration: 25322
loss: 1.0551965236663818,grad_norm: 0.9999991413688157, iteration: 25323
loss: 0.9866088032722473,grad_norm: 0.9999992905898101, iteration: 25324
loss: 0.9728467464447021,grad_norm: 0.9999991472434395, iteration: 25325
loss: 1.0130678415298462,grad_norm: 0.9288961417008587, iteration: 25326
loss: 1.147149682044983,grad_norm: 0.999999622302665, iteration: 25327
loss: 1.0214555263519287,grad_norm: 0.8229658999955358, iteration: 25328
loss: 1.0284178256988525,grad_norm: 0.9999992449618423, iteration: 25329
loss: 1.0019222497940063,grad_norm: 0.9748922174835533, iteration: 25330
loss: 1.0288670063018799,grad_norm: 0.999999630467599, iteration: 25331
loss: 1.0132458209991455,grad_norm: 0.9999992281635225, iteration: 25332
loss: 1.0402061939239502,grad_norm: 0.999999527510406, iteration: 25333
loss: 1.023064374923706,grad_norm: 0.999999184744733, iteration: 25334
loss: 1.0055545568466187,grad_norm: 0.999998999402905, iteration: 25335
loss: 1.0000725984573364,grad_norm: 0.9999991686972736, iteration: 25336
loss: 1.0241016149520874,grad_norm: 0.9999993478429835, iteration: 25337
loss: 1.116269826889038,grad_norm: 0.9999990444872296, iteration: 25338
loss: 0.9850245118141174,grad_norm: 0.9999991701556277, iteration: 25339
loss: 1.019731044769287,grad_norm: 0.9999992546636123, iteration: 25340
loss: 1.0576579570770264,grad_norm: 0.8829597675153843, iteration: 25341
loss: 1.1285346746444702,grad_norm: 0.9999995108164593, iteration: 25342
loss: 1.0313814878463745,grad_norm: 0.9999995649661091, iteration: 25343
loss: 0.9640804529190063,grad_norm: 0.8887713640577027, iteration: 25344
loss: 1.0906883478164673,grad_norm: 0.9999994886920918, iteration: 25345
loss: 1.0587011575698853,grad_norm: 0.9999996108769451, iteration: 25346
loss: 1.0027498006820679,grad_norm: 0.9716672482050016, iteration: 25347
loss: 0.9715986847877502,grad_norm: 0.9999990301501699, iteration: 25348
loss: 1.0288797616958618,grad_norm: 0.9999992842855937, iteration: 25349
loss: 1.0006239414215088,grad_norm: 0.9999989433631341, iteration: 25350
loss: 1.02352774143219,grad_norm: 0.9794554253672567, iteration: 25351
loss: 1.009372591972351,grad_norm: 0.9999991086300155, iteration: 25352
loss: 1.0221599340438843,grad_norm: 0.9443294610003643, iteration: 25353
loss: 0.9619259238243103,grad_norm: 0.8986557527417918, iteration: 25354
loss: 0.9972505569458008,grad_norm: 0.9182959529383459, iteration: 25355
loss: 1.0210087299346924,grad_norm: 0.9999991083521018, iteration: 25356
loss: 1.0089629888534546,grad_norm: 0.999999773964994, iteration: 25357
loss: 1.055814266204834,grad_norm: 0.9999995156440542, iteration: 25358
loss: 1.0325002670288086,grad_norm: 0.9999991642989912, iteration: 25359
loss: 1.0254899263381958,grad_norm: 0.9685742005925043, iteration: 25360
loss: 1.0414587259292603,grad_norm: 0.99999921971772, iteration: 25361
loss: 1.008235216140747,grad_norm: 0.9999991280156024, iteration: 25362
loss: 1.0493056774139404,grad_norm: 0.9783921294175683, iteration: 25363
loss: 1.0221043825149536,grad_norm: 0.9999998839765706, iteration: 25364
loss: 1.0297471284866333,grad_norm: 0.9999994644122632, iteration: 25365
loss: 1.0051000118255615,grad_norm: 0.9946517443266079, iteration: 25366
loss: 0.9952450394630432,grad_norm: 0.9999991013108477, iteration: 25367
loss: 1.0251972675323486,grad_norm: 0.9999991484052442, iteration: 25368
loss: 1.019701600074768,grad_norm: 0.9999996515105252, iteration: 25369
loss: 1.011986255645752,grad_norm: 0.94412148491495, iteration: 25370
loss: 1.0504344701766968,grad_norm: 0.9999996964297321, iteration: 25371
loss: 0.9983745217323303,grad_norm: 0.9999992355146623, iteration: 25372
loss: 1.0023860931396484,grad_norm: 0.9999995712348814, iteration: 25373
loss: 1.0272345542907715,grad_norm: 0.9715809071026119, iteration: 25374
loss: 0.9976799488067627,grad_norm: 0.9999992645272034, iteration: 25375
loss: 1.0057703256607056,grad_norm: 0.9999992818640111, iteration: 25376
loss: 1.0127270221710205,grad_norm: 0.8859842868196792, iteration: 25377
loss: 0.984063982963562,grad_norm: 0.9999989580510205, iteration: 25378
loss: 1.0162286758422852,grad_norm: 0.9999991919440635, iteration: 25379
loss: 1.018441081047058,grad_norm: 0.9999994564549957, iteration: 25380
loss: 0.9936848282814026,grad_norm: 0.9246653981917249, iteration: 25381
loss: 0.9907463192939758,grad_norm: 0.9999992626090907, iteration: 25382
loss: 1.0125678777694702,grad_norm: 0.9580058081095016, iteration: 25383
loss: 1.012497067451477,grad_norm: 0.9999990341118151, iteration: 25384
loss: 0.9834284782409668,grad_norm: 0.950111880641394, iteration: 25385
loss: 1.0454143285751343,grad_norm: 0.9108554675612847, iteration: 25386
loss: 0.9721924066543579,grad_norm: 0.9999989830804973, iteration: 25387
loss: 0.9989093542098999,grad_norm: 0.9999991243630783, iteration: 25388
loss: 1.0188770294189453,grad_norm: 0.9999990773328183, iteration: 25389
loss: 1.0293488502502441,grad_norm: 0.999999094545466, iteration: 25390
loss: 1.0176886320114136,grad_norm: 0.9999990758540513, iteration: 25391
loss: 0.9991423487663269,grad_norm: 0.9999991672630245, iteration: 25392
loss: 1.0750761032104492,grad_norm: 0.999999530836317, iteration: 25393
loss: 1.0335257053375244,grad_norm: 0.9070519440293492, iteration: 25394
loss: 0.9910308122634888,grad_norm: 0.9578900870617196, iteration: 25395
loss: 1.0693061351776123,grad_norm: 0.9999993003785396, iteration: 25396
loss: 1.0070306062698364,grad_norm: 0.9999991609630345, iteration: 25397
loss: 0.9669899344444275,grad_norm: 0.9999992060403167, iteration: 25398
loss: 1.0383456945419312,grad_norm: 0.9999991848939163, iteration: 25399
loss: 1.0283797979354858,grad_norm: 0.9891317819855876, iteration: 25400
loss: 1.0255882740020752,grad_norm: 0.9999991994462468, iteration: 25401
loss: 1.0280191898345947,grad_norm: 0.9999991461729528, iteration: 25402
loss: 0.9698024988174438,grad_norm: 0.9999991872762801, iteration: 25403
loss: 0.9700146317481995,grad_norm: 0.9481420933567551, iteration: 25404
loss: 0.9896978735923767,grad_norm: 0.9217486072538118, iteration: 25405
loss: 1.0668400526046753,grad_norm: 0.9999996317783887, iteration: 25406
loss: 1.0222257375717163,grad_norm: 0.9999991875040712, iteration: 25407
loss: 1.0208301544189453,grad_norm: 0.9999992986568219, iteration: 25408
loss: 0.9724645018577576,grad_norm: 0.9999990095355585, iteration: 25409
loss: 0.958018958568573,grad_norm: 0.8892543795404706, iteration: 25410
loss: 1.0789109468460083,grad_norm: 0.9999999583857833, iteration: 25411
loss: 1.0622442960739136,grad_norm: 0.9999993968563824, iteration: 25412
loss: 1.0293300151824951,grad_norm: 0.999998997387863, iteration: 25413
loss: 0.9993345737457275,grad_norm: 0.9999992386388865, iteration: 25414
loss: 1.0365915298461914,grad_norm: 0.9999997754650286, iteration: 25415
loss: 1.0278278589248657,grad_norm: 0.9999993681948427, iteration: 25416
loss: 1.0190588235855103,grad_norm: 0.986020534961617, iteration: 25417
loss: 1.0041154623031616,grad_norm: 0.999999146813186, iteration: 25418
loss: 0.9891088604927063,grad_norm: 0.9717350693335178, iteration: 25419
loss: 1.0453838109970093,grad_norm: 0.9817883312668502, iteration: 25420
loss: 1.0141897201538086,grad_norm: 0.9999996673259782, iteration: 25421
loss: 0.9904333353042603,grad_norm: 0.9999992742052977, iteration: 25422
loss: 0.9860758781433105,grad_norm: 0.8511728426011136, iteration: 25423
loss: 0.9973921775817871,grad_norm: 0.9735759507350853, iteration: 25424
loss: 1.013651967048645,grad_norm: 0.9999991349536049, iteration: 25425
loss: 1.010833978652954,grad_norm: 0.9999991646856056, iteration: 25426
loss: 1.068120002746582,grad_norm: 0.9999991883743061, iteration: 25427
loss: 1.0502265691757202,grad_norm: 0.9999995538112528, iteration: 25428
loss: 1.010040521621704,grad_norm: 0.9999992227472482, iteration: 25429
loss: 1.019861102104187,grad_norm: 0.9999989920093516, iteration: 25430
loss: 1.0007177591323853,grad_norm: 0.8774222338165495, iteration: 25431
loss: 0.9682682156562805,grad_norm: 0.9999991996370718, iteration: 25432
loss: 0.9821950793266296,grad_norm: 0.9292824943770933, iteration: 25433
loss: 1.0393867492675781,grad_norm: 0.9520577068588328, iteration: 25434
loss: 1.0092967748641968,grad_norm: 0.9999990223600268, iteration: 25435
loss: 1.0047543048858643,grad_norm: 0.8519539218599707, iteration: 25436
loss: 1.1092853546142578,grad_norm: 0.9999993201391862, iteration: 25437
loss: 1.005296230316162,grad_norm: 0.999999060207922, iteration: 25438
loss: 1.0155205726623535,grad_norm: 0.9499922151884949, iteration: 25439
loss: 1.0336971282958984,grad_norm: 0.999999023873659, iteration: 25440
loss: 0.973852813243866,grad_norm: 0.9999991967010298, iteration: 25441
loss: 1.013830542564392,grad_norm: 0.999999248134766, iteration: 25442
loss: 1.0732557773590088,grad_norm: 0.9999993623309033, iteration: 25443
loss: 1.053154468536377,grad_norm: 0.9999993429804566, iteration: 25444
loss: 1.025465488433838,grad_norm: 0.9999990663877948, iteration: 25445
loss: 1.0051460266113281,grad_norm: 0.9999993122860934, iteration: 25446
loss: 0.9967779517173767,grad_norm: 0.8464011953841332, iteration: 25447
loss: 1.0077606439590454,grad_norm: 0.9999991467937615, iteration: 25448
loss: 1.0491310358047485,grad_norm: 0.9999991378826865, iteration: 25449
loss: 1.0203814506530762,grad_norm: 0.8887625476154732, iteration: 25450
loss: 1.0413788557052612,grad_norm: 0.9999992246228671, iteration: 25451
loss: 1.021714448928833,grad_norm: 0.9999995545366535, iteration: 25452
loss: 0.9907429218292236,grad_norm: 0.9240760624215412, iteration: 25453
loss: 1.0262951850891113,grad_norm: 0.9999999603615461, iteration: 25454
loss: 1.0141812562942505,grad_norm: 0.9999991387835241, iteration: 25455
loss: 0.9981365203857422,grad_norm: 0.9553319316862522, iteration: 25456
loss: 1.0308319330215454,grad_norm: 0.9999991841363067, iteration: 25457
loss: 1.008941650390625,grad_norm: 0.9999994150143421, iteration: 25458
loss: 1.0220792293548584,grad_norm: 0.9999993932773033, iteration: 25459
loss: 0.9871410131454468,grad_norm: 0.9318300376115329, iteration: 25460
loss: 1.0489367246627808,grad_norm: 0.9999992240183369, iteration: 25461
loss: 0.9862999320030212,grad_norm: 0.9258765925249741, iteration: 25462
loss: 1.0013755559921265,grad_norm: 0.999999481894987, iteration: 25463
loss: 0.9828440546989441,grad_norm: 0.9130436906883321, iteration: 25464
loss: 1.0268921852111816,grad_norm: 0.9999991567273534, iteration: 25465
loss: 1.0338917970657349,grad_norm: 0.9999992616650925, iteration: 25466
loss: 1.049249291419983,grad_norm: 0.9909083998553626, iteration: 25467
loss: 1.0334149599075317,grad_norm: 0.9999992490991988, iteration: 25468
loss: 1.020604133605957,grad_norm: 0.914030493300092, iteration: 25469
loss: 1.0229554176330566,grad_norm: 0.8987460942421003, iteration: 25470
loss: 1.0265077352523804,grad_norm: 0.9999991195237735, iteration: 25471
loss: 0.9764666557312012,grad_norm: 0.9999991785374712, iteration: 25472
loss: 1.0009145736694336,grad_norm: 0.9475327125300255, iteration: 25473
loss: 1.043189287185669,grad_norm: 0.9498137204838266, iteration: 25474
loss: 0.9840931296348572,grad_norm: 0.9594818531186994, iteration: 25475
loss: 1.0134259462356567,grad_norm: 0.9999991564160762, iteration: 25476
loss: 1.0042471885681152,grad_norm: 0.9999991087193344, iteration: 25477
loss: 1.0109138488769531,grad_norm: 0.9999991697568604, iteration: 25478
loss: 1.0306185483932495,grad_norm: 0.999999668582745, iteration: 25479
loss: 1.053836464881897,grad_norm: 0.9999990925886969, iteration: 25480
loss: 1.01295804977417,grad_norm: 0.9670388788394302, iteration: 25481
loss: 1.0013554096221924,grad_norm: 0.9999990552964343, iteration: 25482
loss: 1.0005348920822144,grad_norm: 0.9343505592897592, iteration: 25483
loss: 1.076916217803955,grad_norm: 0.9999994569977709, iteration: 25484
loss: 1.0180474519729614,grad_norm: 0.8767500824709679, iteration: 25485
loss: 1.0531493425369263,grad_norm: 0.9999991423761262, iteration: 25486
loss: 0.9734181761741638,grad_norm: 0.9999993181716257, iteration: 25487
loss: 0.9885795712471008,grad_norm: 0.9999990611350185, iteration: 25488
loss: 1.041521668434143,grad_norm: 0.97298294334093, iteration: 25489
loss: 1.017347812652588,grad_norm: 0.9999991042225068, iteration: 25490
loss: 1.0452032089233398,grad_norm: 0.9999997620606783, iteration: 25491
loss: 1.049889326095581,grad_norm: 0.9999992838642711, iteration: 25492
loss: 1.0028774738311768,grad_norm: 0.9999991075349143, iteration: 25493
loss: 1.0034245252609253,grad_norm: 0.8571962351164006, iteration: 25494
loss: 1.031561255455017,grad_norm: 0.9999991785018373, iteration: 25495
loss: 1.0956954956054688,grad_norm: 0.9999994244906562, iteration: 25496
loss: 0.9886331558227539,grad_norm: 0.8956799546096866, iteration: 25497
loss: 1.026832103729248,grad_norm: 0.9462911319351407, iteration: 25498
loss: 1.0224852561950684,grad_norm: 0.9999991180564975, iteration: 25499
loss: 1.0138949155807495,grad_norm: 0.9999990731068029, iteration: 25500
loss: 0.961777925491333,grad_norm: 0.9999991513020897, iteration: 25501
loss: 1.072586178779602,grad_norm: 0.999999399675879, iteration: 25502
loss: 1.0928459167480469,grad_norm: 0.9999991173147967, iteration: 25503
loss: 1.0225204229354858,grad_norm: 0.9999992737530392, iteration: 25504
loss: 1.0023441314697266,grad_norm: 0.999999064844007, iteration: 25505
loss: 1.084603190422058,grad_norm: 0.999999368481853, iteration: 25506
loss: 1.0193471908569336,grad_norm: 0.9999995037911755, iteration: 25507
loss: 0.9559218287467957,grad_norm: 0.9999992485625162, iteration: 25508
loss: 1.02394700050354,grad_norm: 0.9999991639403932, iteration: 25509
loss: 1.0464359521865845,grad_norm: 0.9999990799515072, iteration: 25510
loss: 1.0147502422332764,grad_norm: 0.9879058978917419, iteration: 25511
loss: 1.0758585929870605,grad_norm: 0.9999996167500095, iteration: 25512
loss: 1.0094029903411865,grad_norm: 0.9999990057044634, iteration: 25513
loss: 1.0330424308776855,grad_norm: 0.9040136548809486, iteration: 25514
loss: 0.9879098534584045,grad_norm: 0.9999996220975186, iteration: 25515
loss: 1.0487539768218994,grad_norm: 0.9999994179053294, iteration: 25516
loss: 1.0106183290481567,grad_norm: 0.9999991420263409, iteration: 25517
loss: 1.0155788660049438,grad_norm: 0.9999989566930637, iteration: 25518
loss: 1.019486665725708,grad_norm: 0.9999991118417869, iteration: 25519
loss: 1.0350595712661743,grad_norm: 0.9999991822926317, iteration: 25520
loss: 1.0010414123535156,grad_norm: 0.9999991333912072, iteration: 25521
loss: 1.0204771757125854,grad_norm: 0.9999993141261716, iteration: 25522
loss: 0.9688073992729187,grad_norm: 0.8981148375275115, iteration: 25523
loss: 1.011581540107727,grad_norm: 0.9999994983805477, iteration: 25524
loss: 1.0166234970092773,grad_norm: 0.9999991214540113, iteration: 25525
loss: 1.1322535276412964,grad_norm: 0.9999996659485312, iteration: 25526
loss: 0.9817629456520081,grad_norm: 0.9999991485695015, iteration: 25527
loss: 1.0279648303985596,grad_norm: 0.9999992075388572, iteration: 25528
loss: 0.9936695694923401,grad_norm: 0.9869785571711293, iteration: 25529
loss: 0.9689109921455383,grad_norm: 0.9999992325461279, iteration: 25530
loss: 1.052014946937561,grad_norm: 0.9999994422579037, iteration: 25531
loss: 1.0630356073379517,grad_norm: 0.9999994728548006, iteration: 25532
loss: 1.008646011352539,grad_norm: 0.999999102174335, iteration: 25533
loss: 1.0445095300674438,grad_norm: 0.9999993010183084, iteration: 25534
loss: 1.0536699295043945,grad_norm: 0.9999999086556595, iteration: 25535
loss: 1.0607683658599854,grad_norm: 0.999999246019265, iteration: 25536
loss: 1.0360713005065918,grad_norm: 0.9999991029519564, iteration: 25537
loss: 1.0056859254837036,grad_norm: 0.9999990577739718, iteration: 25538
loss: 1.0321458578109741,grad_norm: 0.9265686952265815, iteration: 25539
loss: 1.192396640777588,grad_norm: 0.9999998684433695, iteration: 25540
loss: 1.0510646104812622,grad_norm: 0.99999980903512, iteration: 25541
loss: 1.0240192413330078,grad_norm: 0.9999991689091868, iteration: 25542
loss: 1.0026166439056396,grad_norm: 0.9685316422547795, iteration: 25543
loss: 1.0518816709518433,grad_norm: 0.9999995797133064, iteration: 25544
loss: 0.951575756072998,grad_norm: 0.9328228322991354, iteration: 25545
loss: 1.0047576427459717,grad_norm: 0.9999991897787506, iteration: 25546
loss: 1.0433796644210815,grad_norm: 0.999999101658457, iteration: 25547
loss: 1.0264276266098022,grad_norm: 0.9999995076089299, iteration: 25548
loss: 1.0257676839828491,grad_norm: 0.9999991454980702, iteration: 25549
loss: 1.0354852676391602,grad_norm: 0.9999991631675866, iteration: 25550
loss: 1.015461802482605,grad_norm: 0.9999994118051239, iteration: 25551
loss: 1.0409142971038818,grad_norm: 0.9999991773945368, iteration: 25552
loss: 1.0422394275665283,grad_norm: 0.9999995085474865, iteration: 25553
loss: 1.046213150024414,grad_norm: 0.9999990240853618, iteration: 25554
loss: 1.022589921951294,grad_norm: 0.9999992718590056, iteration: 25555
loss: 1.0160177946090698,grad_norm: 0.9999992332754761, iteration: 25556
loss: 0.9689891934394836,grad_norm: 0.8659957725211959, iteration: 25557
loss: 1.03230619430542,grad_norm: 0.9999994045076012, iteration: 25558
loss: 1.0160322189331055,grad_norm: 0.9999996177302266, iteration: 25559
loss: 0.9666794538497925,grad_norm: 0.9999991486862648, iteration: 25560
loss: 0.9689064621925354,grad_norm: 0.9999991493978381, iteration: 25561
loss: 0.9713773131370544,grad_norm: 0.999999234166982, iteration: 25562
loss: 1.0730226039886475,grad_norm: 0.9999990628631369, iteration: 25563
loss: 0.9837546348571777,grad_norm: 0.8493056989487954, iteration: 25564
loss: 1.1014257669448853,grad_norm: 0.999999894803584, iteration: 25565
loss: 1.0534130334854126,grad_norm: 0.9999992612626053, iteration: 25566
loss: 1.031496286392212,grad_norm: 0.9444026744812343, iteration: 25567
loss: 1.0583183765411377,grad_norm: 0.9999991852865705, iteration: 25568
loss: 0.9870737195014954,grad_norm: 0.9999992706335901, iteration: 25569
loss: 1.01028311252594,grad_norm: 0.9999991442475291, iteration: 25570
loss: 0.9772695899009705,grad_norm: 0.9999989605306497, iteration: 25571
loss: 1.0263992547988892,grad_norm: 0.8537587833113024, iteration: 25572
loss: 1.008514642715454,grad_norm: 0.9999999512438889, iteration: 25573
loss: 0.9878412485122681,grad_norm: 0.9077008518495578, iteration: 25574
loss: 0.9869530200958252,grad_norm: 0.9999992153532892, iteration: 25575
loss: 1.0083051919937134,grad_norm: 0.9999991239679064, iteration: 25576
loss: 1.0419868230819702,grad_norm: 0.9060626845124725, iteration: 25577
loss: 1.0162708759307861,grad_norm: 0.939229235365627, iteration: 25578
loss: 1.0656801462173462,grad_norm: 0.9999998617890417, iteration: 25579
loss: 1.0363513231277466,grad_norm: 0.9999995997759185, iteration: 25580
loss: 1.0350048542022705,grad_norm: 0.9999993523962244, iteration: 25581
loss: 1.0188348293304443,grad_norm: 0.9999991756839658, iteration: 25582
loss: 1.0924286842346191,grad_norm: 0.9999998202246926, iteration: 25583
loss: 1.0195356607437134,grad_norm: 0.9999990351779823, iteration: 25584
loss: 1.0162097215652466,grad_norm: 0.9319398946494462, iteration: 25585
loss: 1.0695823431015015,grad_norm: 0.9999996444482779, iteration: 25586
loss: 1.0132339000701904,grad_norm: 0.9999991963740057, iteration: 25587
loss: 0.9867568612098694,grad_norm: 0.9999991102679542, iteration: 25588
loss: 1.014496922492981,grad_norm: 0.9999991280076127, iteration: 25589
loss: 1.0166451930999756,grad_norm: 0.9999992590801401, iteration: 25590
loss: 1.0337649583816528,grad_norm: 0.9999991772324358, iteration: 25591
loss: 1.0282713174819946,grad_norm: 0.9999990961324124, iteration: 25592
loss: 0.9811778664588928,grad_norm: 0.9999990377815886, iteration: 25593
loss: 1.0044609308242798,grad_norm: 0.9999991790845445, iteration: 25594
loss: 1.1769905090332031,grad_norm: 0.999999172704528, iteration: 25595
loss: 1.2058409452438354,grad_norm: 0.9999994898720491, iteration: 25596
loss: 1.119073748588562,grad_norm: 1.0000000106972873, iteration: 25597
loss: 1.0361486673355103,grad_norm: 0.9073177104615125, iteration: 25598
loss: 0.9797268509864807,grad_norm: 0.9999989924054069, iteration: 25599
loss: 1.0045770406723022,grad_norm: 0.9488569985054743, iteration: 25600
loss: 1.026346206665039,grad_norm: 0.9999992110440534, iteration: 25601
loss: 1.0163519382476807,grad_norm: 0.9078153126997484, iteration: 25602
loss: 0.9976981282234192,grad_norm: 0.999999244762474, iteration: 25603
loss: 1.0024884939193726,grad_norm: 0.9999991810660143, iteration: 25604
loss: 1.0037798881530762,grad_norm: 0.9999990069085944, iteration: 25605
loss: 1.016032099723816,grad_norm: 0.9399052415789142, iteration: 25606
loss: 1.0025643110275269,grad_norm: 0.9999993109974074, iteration: 25607
loss: 1.0221142768859863,grad_norm: 0.9999996946684271, iteration: 25608
loss: 1.055166482925415,grad_norm: 0.9999992564987478, iteration: 25609
loss: 1.0092941522598267,grad_norm: 0.8165446495008188, iteration: 25610
loss: 1.05439293384552,grad_norm: 0.9999993830936492, iteration: 25611
loss: 0.9729725122451782,grad_norm: 0.9999991281801826, iteration: 25612
loss: 0.9927205443382263,grad_norm: 0.9999991312633904, iteration: 25613
loss: 1.0154991149902344,grad_norm: 0.999999310062318, iteration: 25614
loss: 1.0300465822219849,grad_norm: 0.9577206592796613, iteration: 25615
loss: 1.0447880029678345,grad_norm: 0.9999992591137227, iteration: 25616
loss: 1.1017183065414429,grad_norm: 0.9999995702405824, iteration: 25617
loss: 0.9834814667701721,grad_norm: 0.9999991206574532, iteration: 25618
loss: 1.030210018157959,grad_norm: 0.9999994082563143, iteration: 25619
loss: 1.0029053688049316,grad_norm: 0.999999082836845, iteration: 25620
loss: 1.0332635641098022,grad_norm: 0.9999991249352913, iteration: 25621
loss: 1.027851939201355,grad_norm: 0.9999990562734344, iteration: 25622
loss: 1.0260159969329834,grad_norm: 0.9999996826903647, iteration: 25623
loss: 1.046028733253479,grad_norm: 0.9999994939019303, iteration: 25624
loss: 1.0516235828399658,grad_norm: 0.9999991777128766, iteration: 25625
loss: 1.0207860469818115,grad_norm: 0.9633645898848715, iteration: 25626
loss: 1.072834849357605,grad_norm: 0.9999994386088756, iteration: 25627
loss: 0.9889930486679077,grad_norm: 0.8185269694042012, iteration: 25628
loss: 0.993270218372345,grad_norm: 0.9999995887507986, iteration: 25629
loss: 1.0252190828323364,grad_norm: 0.9999992045962894, iteration: 25630
loss: 1.129622459411621,grad_norm: 0.9999997821582471, iteration: 25631
loss: 1.0016177892684937,grad_norm: 0.9433941451475065, iteration: 25632
loss: 1.0141030550003052,grad_norm: 0.9647186109729982, iteration: 25633
loss: 1.05573570728302,grad_norm: 0.9999991702670429, iteration: 25634
loss: 1.0390318632125854,grad_norm: 0.999998994677558, iteration: 25635
loss: 1.0351208448410034,grad_norm: 0.8536743969538632, iteration: 25636
loss: 1.0207152366638184,grad_norm: 0.9999995198217287, iteration: 25637
loss: 1.015074610710144,grad_norm: 0.9999991367491378, iteration: 25638
loss: 1.0071821212768555,grad_norm: 0.9379050671464849, iteration: 25639
loss: 0.9930505156517029,grad_norm: 0.9999991580533605, iteration: 25640
loss: 0.9983770251274109,grad_norm: 0.8855751600899907, iteration: 25641
loss: 0.9760012626647949,grad_norm: 0.9469205022802267, iteration: 25642
loss: 1.0188958644866943,grad_norm: 0.9999991495878549, iteration: 25643
loss: 1.0324476957321167,grad_norm: 0.9661057581712595, iteration: 25644
loss: 1.0235179662704468,grad_norm: 0.9999991314796572, iteration: 25645
loss: 1.040594458580017,grad_norm: 0.8699224620908331, iteration: 25646
loss: 1.0111569166183472,grad_norm: 0.9999991887802696, iteration: 25647
loss: 1.0207128524780273,grad_norm: 0.9999990765638278, iteration: 25648
loss: 1.005686640739441,grad_norm: 0.9999991637656025, iteration: 25649
loss: 1.0782185792922974,grad_norm: 0.9999998038393272, iteration: 25650
loss: 0.9553327560424805,grad_norm: 0.9999992578407421, iteration: 25651
loss: 1.039554476737976,grad_norm: 0.9776013738112185, iteration: 25652
loss: 1.0282224416732788,grad_norm: 0.9999992067297493, iteration: 25653
loss: 1.0878514051437378,grad_norm: 0.9999991936375592, iteration: 25654
loss: 1.0436629056930542,grad_norm: 0.9999997986673519, iteration: 25655
loss: 0.9944981932640076,grad_norm: 0.9431217241346441, iteration: 25656
loss: 1.0776492357254028,grad_norm: 0.9999999061764865, iteration: 25657
loss: 1.0323795080184937,grad_norm: 0.9999991985671016, iteration: 25658
loss: 1.0329513549804688,grad_norm: 0.9999990622755353, iteration: 25659
loss: 1.007737398147583,grad_norm: 0.9999990516836299, iteration: 25660
loss: 0.9757475256919861,grad_norm: 0.9999990883803194, iteration: 25661
loss: 1.0186123847961426,grad_norm: 0.999999110961067, iteration: 25662
loss: 1.0384087562561035,grad_norm: 0.9999992641969838, iteration: 25663
loss: 0.9694584012031555,grad_norm: 0.9999991046993635, iteration: 25664
loss: 0.9941603541374207,grad_norm: 0.9631855593848672, iteration: 25665
loss: 1.0155143737792969,grad_norm: 0.8935384658190509, iteration: 25666
loss: 1.0173507928848267,grad_norm: 0.999999289977555, iteration: 25667
loss: 0.9733746647834778,grad_norm: 0.9999993396085254, iteration: 25668
loss: 0.9936942458152771,grad_norm: 0.9999990527082706, iteration: 25669
loss: 1.003316044807434,grad_norm: 0.9999994252127721, iteration: 25670
loss: 1.0453566312789917,grad_norm: 0.9999992217904328, iteration: 25671
loss: 1.0606317520141602,grad_norm: 0.9999992598086069, iteration: 25672
loss: 1.0076524019241333,grad_norm: 0.999999086036822, iteration: 25673
loss: 0.9724108576774597,grad_norm: 0.9953502661679979, iteration: 25674
loss: 0.9850451350212097,grad_norm: 0.8763578406677532, iteration: 25675
loss: 1.0042883157730103,grad_norm: 0.9999990997480471, iteration: 25676
loss: 1.0462381839752197,grad_norm: 0.8579638462376687, iteration: 25677
loss: 0.9767979383468628,grad_norm: 0.9999990266292078, iteration: 25678
loss: 1.0381134748458862,grad_norm: 0.9999991226127204, iteration: 25679
loss: 1.0065300464630127,grad_norm: 0.999998911720976, iteration: 25680
loss: 1.0165092945098877,grad_norm: 0.9231129128082278, iteration: 25681
loss: 0.9883509278297424,grad_norm: 0.9274457714369436, iteration: 25682
loss: 1.0064165592193604,grad_norm: 0.9999990668437398, iteration: 25683
loss: 1.0588569641113281,grad_norm: 0.9999997505745707, iteration: 25684
loss: 1.0157551765441895,grad_norm: 0.9470776241437765, iteration: 25685
loss: 0.9861087203025818,grad_norm: 0.9999992105974389, iteration: 25686
loss: 0.9990124106407166,grad_norm: 0.9456187993165551, iteration: 25687
loss: 0.9964799880981445,grad_norm: 0.8664236915405423, iteration: 25688
loss: 1.0289182662963867,grad_norm: 0.9999990767600346, iteration: 25689
loss: 1.0550737380981445,grad_norm: 0.9999998140352003, iteration: 25690
loss: 0.9884796142578125,grad_norm: 0.9665304228308225, iteration: 25691
loss: 1.0175175666809082,grad_norm: 0.9999994414272216, iteration: 25692
loss: 1.0229392051696777,grad_norm: 0.9999991090476353, iteration: 25693
loss: 1.0122976303100586,grad_norm: 0.927220254705972, iteration: 25694
loss: 1.020944595336914,grad_norm: 0.9999995904773894, iteration: 25695
loss: 0.9894833564758301,grad_norm: 0.9464941642169535, iteration: 25696
loss: 1.0104540586471558,grad_norm: 0.9999991040713724, iteration: 25697
loss: 0.9855248928070068,grad_norm: 0.8131786544114412, iteration: 25698
loss: 1.0031747817993164,grad_norm: 0.9078973173460412, iteration: 25699
loss: 0.9761127233505249,grad_norm: 0.933458786100947, iteration: 25700
loss: 1.0649974346160889,grad_norm: 0.9999997810350636, iteration: 25701
loss: 1.0069736242294312,grad_norm: 0.8294966122197751, iteration: 25702
loss: 0.9970545172691345,grad_norm: 0.8569967707905652, iteration: 25703
loss: 0.9824966192245483,grad_norm: 0.8567300955767897, iteration: 25704
loss: 0.9980432391166687,grad_norm: 0.9999998950602842, iteration: 25705
loss: 1.0278799533843994,grad_norm: 0.9999993997152942, iteration: 25706
loss: 1.0522631406784058,grad_norm: 0.999999051885215, iteration: 25707
loss: 1.0290144681930542,grad_norm: 0.8797349633539353, iteration: 25708
loss: 1.0826194286346436,grad_norm: 0.9999994073168327, iteration: 25709
loss: 1.0237900018692017,grad_norm: 0.9999990999408145, iteration: 25710
loss: 1.0286647081375122,grad_norm: 0.9542247754068452, iteration: 25711
loss: 0.963152289390564,grad_norm: 0.9432727174069414, iteration: 25712
loss: 0.9845033288002014,grad_norm: 0.9942322532535011, iteration: 25713
loss: 1.0900990962982178,grad_norm: 0.999999374328583, iteration: 25714
loss: 1.0511153936386108,grad_norm: 0.9999997971676423, iteration: 25715
loss: 1.0138353109359741,grad_norm: 0.9999992563634676, iteration: 25716
loss: 0.9635066986083984,grad_norm: 0.9999990930463792, iteration: 25717
loss: 1.028356909751892,grad_norm: 0.9999990820256212, iteration: 25718
loss: 1.0025144815444946,grad_norm: 0.9998285462487833, iteration: 25719
loss: 0.9871951937675476,grad_norm: 0.9999991747036024, iteration: 25720
loss: 1.04120671749115,grad_norm: 0.9999992948551693, iteration: 25721
loss: 1.0687713623046875,grad_norm: 0.999999711479403, iteration: 25722
loss: 1.0414676666259766,grad_norm: 0.8972158582202275, iteration: 25723
loss: 1.01557457447052,grad_norm: 0.9017881107147978, iteration: 25724
loss: 1.0076979398727417,grad_norm: 0.9999989278364041, iteration: 25725
loss: 1.0603832006454468,grad_norm: 0.8719595516520139, iteration: 25726
loss: 1.0053294897079468,grad_norm: 0.9999995808555654, iteration: 25727
loss: 1.0015287399291992,grad_norm: 0.9999991871598843, iteration: 25728
loss: 1.0106430053710938,grad_norm: 0.999998970948376, iteration: 25729
loss: 1.0210365056991577,grad_norm: 0.9999992762789514, iteration: 25730
loss: 0.998382031917572,grad_norm: 0.9999992722392678, iteration: 25731
loss: 0.9941996932029724,grad_norm: 0.9671255431214395, iteration: 25732
loss: 1.0191274881362915,grad_norm: 0.9173304827302883, iteration: 25733
loss: 1.0142143964767456,grad_norm: 0.9844454775181682, iteration: 25734
loss: 1.0481932163238525,grad_norm: 0.9999993258548099, iteration: 25735
loss: 1.1070317029953003,grad_norm: 0.9999995201476268, iteration: 25736
loss: 1.0254441499710083,grad_norm: 0.999999012730101, iteration: 25737
loss: 0.9856234788894653,grad_norm: 0.8948959126230658, iteration: 25738
loss: 1.0363898277282715,grad_norm: 0.9999995949235849, iteration: 25739
loss: 1.0159095525741577,grad_norm: 0.9999991990970383, iteration: 25740
loss: 0.9978849291801453,grad_norm: 0.9999990140095264, iteration: 25741
loss: 1.0164148807525635,grad_norm: 0.8616616651268558, iteration: 25742
loss: 0.9898455142974854,grad_norm: 0.9161923840806225, iteration: 25743
loss: 1.0062553882598877,grad_norm: 0.9999991267866328, iteration: 25744
loss: 1.0441887378692627,grad_norm: 0.9999999422075543, iteration: 25745
loss: 0.9962223768234253,grad_norm: 0.9999991910642807, iteration: 25746
loss: 0.9993627667427063,grad_norm: 0.9999990783075882, iteration: 25747
loss: 1.005524754524231,grad_norm: 0.9999992891462497, iteration: 25748
loss: 1.028691291809082,grad_norm: 0.9999991542850551, iteration: 25749
loss: 1.0640043020248413,grad_norm: 0.9999991473496141, iteration: 25750
loss: 1.0440078973770142,grad_norm: 0.9999996146486911, iteration: 25751
loss: 1.0036317110061646,grad_norm: 0.9999990039390811, iteration: 25752
loss: 0.9987919330596924,grad_norm: 0.9742065687666464, iteration: 25753
loss: 1.0150930881500244,grad_norm: 0.99999924323508, iteration: 25754
loss: 1.0235331058502197,grad_norm: 0.9999990831306247, iteration: 25755
loss: 1.0019700527191162,grad_norm: 0.925830739833205, iteration: 25756
loss: 0.9887603521347046,grad_norm: 0.9999990479876262, iteration: 25757
loss: 1.0445934534072876,grad_norm: 0.9999993711274043, iteration: 25758
loss: 1.032713770866394,grad_norm: 0.9999990316796622, iteration: 25759
loss: 1.0250630378723145,grad_norm: 0.9999990349200671, iteration: 25760
loss: 1.0749582052230835,grad_norm: 0.9999991801307384, iteration: 25761
loss: 1.002018928527832,grad_norm: 0.9999990154279887, iteration: 25762
loss: 1.0093083381652832,grad_norm: 0.9999997053535777, iteration: 25763
loss: 1.1213796138763428,grad_norm: 0.9999991386151995, iteration: 25764
loss: 1.0296896696090698,grad_norm: 0.9999994328272546, iteration: 25765
loss: 1.0652920007705688,grad_norm: 0.99999961776322, iteration: 25766
loss: 0.9962900280952454,grad_norm: 0.999999654783278, iteration: 25767
loss: 1.0102038383483887,grad_norm: 0.9999991802071563, iteration: 25768
loss: 1.03506338596344,grad_norm: 0.9999993217136491, iteration: 25769
loss: 1.0306079387664795,grad_norm: 0.999999681358049, iteration: 25770
loss: 1.04464590549469,grad_norm: 0.9999993526983746, iteration: 25771
loss: 0.9873073101043701,grad_norm: 0.8642481938193628, iteration: 25772
loss: 1.0094037055969238,grad_norm: 0.9999992492112372, iteration: 25773
loss: 1.0145643949508667,grad_norm: 0.9999991874054859, iteration: 25774
loss: 1.0508477687835693,grad_norm: 0.9999995716416972, iteration: 25775
loss: 1.0643311738967896,grad_norm: 0.9999990714859833, iteration: 25776
loss: 0.9947143793106079,grad_norm: 0.9999991317445706, iteration: 25777
loss: 1.0444989204406738,grad_norm: 0.9999991168600536, iteration: 25778
loss: 1.0158600807189941,grad_norm: 0.9999998194878575, iteration: 25779
loss: 1.0721983909606934,grad_norm: 0.9999995650281437, iteration: 25780
loss: 1.0203626155853271,grad_norm: 0.9999991746724024, iteration: 25781
loss: 0.9894174933433533,grad_norm: 0.9999990871103243, iteration: 25782
loss: 1.0170360803604126,grad_norm: 0.9751556477289156, iteration: 25783
loss: 1.022667646408081,grad_norm: 0.9999992777572309, iteration: 25784
loss: 1.0099016427993774,grad_norm: 0.9999994755889993, iteration: 25785
loss: 1.0477148294448853,grad_norm: 0.9999993230421463, iteration: 25786
loss: 0.9928858280181885,grad_norm: 0.9999996503543461, iteration: 25787
loss: 1.0127413272857666,grad_norm: 0.9999991723470357, iteration: 25788
loss: 1.0117241144180298,grad_norm: 0.999999415176204, iteration: 25789
loss: 1.0795533657073975,grad_norm: 0.9999999284723838, iteration: 25790
loss: 1.0370243787765503,grad_norm: 0.9999993747327017, iteration: 25791
loss: 1.0270018577575684,grad_norm: 0.9999996840710818, iteration: 25792
loss: 1.020163893699646,grad_norm: 0.9999991734446035, iteration: 25793
loss: 1.0092463493347168,grad_norm: 0.9999994530387819, iteration: 25794
loss: 1.010311484336853,grad_norm: 0.9999992179416504, iteration: 25795
loss: 1.0816055536270142,grad_norm: 0.9999998077308142, iteration: 25796
loss: 1.0138187408447266,grad_norm: 0.9999994751003788, iteration: 25797
loss: 1.0517951250076294,grad_norm: 0.9999994940993127, iteration: 25798
loss: 0.9813850522041321,grad_norm: 0.9733311162354211, iteration: 25799
loss: 1.0119822025299072,grad_norm: 0.8299308230885414, iteration: 25800
loss: 1.0765236616134644,grad_norm: 0.9999997429010956, iteration: 25801
loss: 1.1660244464874268,grad_norm: 0.9999998546730019, iteration: 25802
loss: 1.033661961555481,grad_norm: 0.9999991002403633, iteration: 25803
loss: 1.0206531286239624,grad_norm: 0.9999995039168662, iteration: 25804
loss: 1.036697506904602,grad_norm: 0.9999993846658637, iteration: 25805
loss: 1.0845742225646973,grad_norm: 0.9999991511224016, iteration: 25806
loss: 1.1327019929885864,grad_norm: 0.9999997742600858, iteration: 25807
loss: 1.025547742843628,grad_norm: 0.9999997311574123, iteration: 25808
loss: 1.012433648109436,grad_norm: 0.9999992955709737, iteration: 25809
loss: 0.9994663000106812,grad_norm: 0.9999995254110854, iteration: 25810
loss: 0.9791816473007202,grad_norm: 0.9906862764015787, iteration: 25811
loss: 0.9962446689605713,grad_norm: 0.9999994230855533, iteration: 25812
loss: 0.967637836933136,grad_norm: 0.9999991333132299, iteration: 25813
loss: 1.0273091793060303,grad_norm: 0.9461664081160001, iteration: 25814
loss: 1.0901285409927368,grad_norm: 0.9999999761713947, iteration: 25815
loss: 1.007799506187439,grad_norm: 0.9140363429659731, iteration: 25816
loss: 0.9909995794296265,grad_norm: 0.9999990372883654, iteration: 25817
loss: 1.0017377138137817,grad_norm: 0.999999218998477, iteration: 25818
loss: 1.0328655242919922,grad_norm: 0.9999991699923707, iteration: 25819
loss: 1.0222156047821045,grad_norm: 0.9999990786466226, iteration: 25820
loss: 1.0245110988616943,grad_norm: 0.9999995655004847, iteration: 25821
loss: 1.0148576498031616,grad_norm: 0.9952164453188198, iteration: 25822
loss: 0.9808281660079956,grad_norm: 0.9999993663139112, iteration: 25823
loss: 0.9700891971588135,grad_norm: 0.9936130527152808, iteration: 25824
loss: 0.9755223393440247,grad_norm: 0.9999990760719129, iteration: 25825
loss: 1.0334737300872803,grad_norm: 0.8617017756167763, iteration: 25826
loss: 1.0624074935913086,grad_norm: 0.99999982651293, iteration: 25827
loss: 0.9854134321212769,grad_norm: 0.9540529400725578, iteration: 25828
loss: 1.025888442993164,grad_norm: 0.9999991901531354, iteration: 25829
loss: 1.0303717851638794,grad_norm: 0.9999996558995354, iteration: 25830
loss: 1.0131332874298096,grad_norm: 0.9358390710972675, iteration: 25831
loss: 1.0077848434448242,grad_norm: 0.9817139587361926, iteration: 25832
loss: 1.0434256792068481,grad_norm: 0.999999329429317, iteration: 25833
loss: 1.0412185192108154,grad_norm: 0.9999990899071811, iteration: 25834
loss: 0.9756150245666504,grad_norm: 0.9351746256955387, iteration: 25835
loss: 1.0435758829116821,grad_norm: 0.9999994463097307, iteration: 25836
loss: 0.9879838824272156,grad_norm: 0.9999999109849415, iteration: 25837
loss: 1.0235037803649902,grad_norm: 0.9091152243606899, iteration: 25838
loss: 1.0579123497009277,grad_norm: 0.9999992017139465, iteration: 25839
loss: 0.9993563890457153,grad_norm: 0.8568765279256847, iteration: 25840
loss: 1.0145726203918457,grad_norm: 0.9999994441025722, iteration: 25841
loss: 1.008791208267212,grad_norm: 0.9999990924888225, iteration: 25842
loss: 1.0221350193023682,grad_norm: 0.9999991570275245, iteration: 25843
loss: 1.0061007738113403,grad_norm: 0.9386311123937315, iteration: 25844
loss: 1.026656985282898,grad_norm: 0.9999989656253169, iteration: 25845
loss: 1.0225359201431274,grad_norm: 0.9999990869524933, iteration: 25846
loss: 1.064827799797058,grad_norm: 0.999999234517092, iteration: 25847
loss: 1.0101418495178223,grad_norm: 0.9999992941916129, iteration: 25848
loss: 1.0115870237350464,grad_norm: 0.9999991905874668, iteration: 25849
loss: 0.9962798357009888,grad_norm: 0.9999989696812432, iteration: 25850
loss: 1.0514593124389648,grad_norm: 0.9999991376582191, iteration: 25851
loss: 1.0315518379211426,grad_norm: 0.9999996255375139, iteration: 25852
loss: 1.078446865081787,grad_norm: 0.9999995022578997, iteration: 25853
loss: 1.024205207824707,grad_norm: 0.7805849623413922, iteration: 25854
loss: 1.0156060457229614,grad_norm: 0.9999995181090687, iteration: 25855
loss: 0.9774721264839172,grad_norm: 0.9999992011009973, iteration: 25856
loss: 0.9683984518051147,grad_norm: 0.999999172552523, iteration: 25857
loss: 1.0330106019973755,grad_norm: 0.9999997209918687, iteration: 25858
loss: 0.9776702523231506,grad_norm: 0.9999991848141015, iteration: 25859
loss: 1.034746527671814,grad_norm: 0.999999229645243, iteration: 25860
loss: 1.1650179624557495,grad_norm: 0.9999997881445253, iteration: 25861
loss: 1.010764718055725,grad_norm: 0.8826180462093292, iteration: 25862
loss: 0.9975690245628357,grad_norm: 0.9999992348286054, iteration: 25863
loss: 0.9943496584892273,grad_norm: 0.9999998112514507, iteration: 25864
loss: 0.9989964365959167,grad_norm: 0.8992634051960784, iteration: 25865
loss: 0.9648961424827576,grad_norm: 0.9999989037874663, iteration: 25866
loss: 1.058337926864624,grad_norm: 0.9999991520566335, iteration: 25867
loss: 0.983466625213623,grad_norm: 0.9050903403510279, iteration: 25868
loss: 0.9981178045272827,grad_norm: 0.9368120371456504, iteration: 25869
loss: 1.0411708354949951,grad_norm: 0.9999990775975109, iteration: 25870
loss: 1.0314503908157349,grad_norm: 0.9999993195435292, iteration: 25871
loss: 1.0489059686660767,grad_norm: 0.9999998267551683, iteration: 25872
loss: 0.9981300830841064,grad_norm: 0.8825373258069049, iteration: 25873
loss: 0.993642270565033,grad_norm: 0.9111000828192243, iteration: 25874
loss: 1.0034875869750977,grad_norm: 0.9999990421651173, iteration: 25875
loss: 1.004676103591919,grad_norm: 0.9730453255010932, iteration: 25876
loss: 1.014827013015747,grad_norm: 0.9256101153160764, iteration: 25877
loss: 0.9844416975975037,grad_norm: 0.8715448288079436, iteration: 25878
loss: 1.0705249309539795,grad_norm: 0.9999996088223306, iteration: 25879
loss: 1.0045229196548462,grad_norm: 0.9999993300854798, iteration: 25880
loss: 1.0027024745941162,grad_norm: 0.9999991620735156, iteration: 25881
loss: 1.028046727180481,grad_norm: 0.9943542647369269, iteration: 25882
loss: 0.9672473669052124,grad_norm: 0.8178240554365936, iteration: 25883
loss: 1.0028427839279175,grad_norm: 0.8957947330166423, iteration: 25884
loss: 1.0203094482421875,grad_norm: 0.9999992888679957, iteration: 25885
loss: 1.0367188453674316,grad_norm: 0.9640451573491923, iteration: 25886
loss: 1.0086826086044312,grad_norm: 0.9999995950496959, iteration: 25887
loss: 0.973540723323822,grad_norm: 0.9603157481798005, iteration: 25888
loss: 0.9508923292160034,grad_norm: 0.9889236725844088, iteration: 25889
loss: 1.0180182456970215,grad_norm: 0.9830894281530602, iteration: 25890
loss: 1.0181028842926025,grad_norm: 0.9999992102639406, iteration: 25891
loss: 0.99791020154953,grad_norm: 0.9999993555829604, iteration: 25892
loss: 1.0999623537063599,grad_norm: 0.9999991767455452, iteration: 25893
loss: 1.0279377698898315,grad_norm: 0.9999990309090361, iteration: 25894
loss: 1.0646984577178955,grad_norm: 0.9999998111340657, iteration: 25895
loss: 1.0674211978912354,grad_norm: 0.9999991227856119, iteration: 25896
loss: 1.0069085359573364,grad_norm: 0.9999991900485852, iteration: 25897
loss: 1.0360369682312012,grad_norm: 0.999999683434852, iteration: 25898
loss: 1.0214923620224,grad_norm: 0.9999994404563228, iteration: 25899
loss: 1.0053887367248535,grad_norm: 0.8808968960290852, iteration: 25900
loss: 1.0371882915496826,grad_norm: 0.9999993016617668, iteration: 25901
loss: 0.9820918440818787,grad_norm: 0.7371677427356845, iteration: 25902
loss: 1.0213029384613037,grad_norm: 0.9999990580555586, iteration: 25903
loss: 1.0459468364715576,grad_norm: 0.9689319950571396, iteration: 25904
loss: 1.0591070652008057,grad_norm: 0.9999993551903422, iteration: 25905
loss: 0.976392924785614,grad_norm: 0.999998956338461, iteration: 25906
loss: 1.0128657817840576,grad_norm: 0.9999991388076382, iteration: 25907
loss: 1.0171029567718506,grad_norm: 0.7888572216184884, iteration: 25908
loss: 1.0327956676483154,grad_norm: 0.9999994840606425, iteration: 25909
loss: 1.0844985246658325,grad_norm: 0.9999996387638194, iteration: 25910
loss: 0.965078592300415,grad_norm: 0.999999263876941, iteration: 25911
loss: 0.9901336431503296,grad_norm: 0.9999990887544674, iteration: 25912
loss: 0.9772360920906067,grad_norm: 0.9999991373906479, iteration: 25913
loss: 0.9696305394172668,grad_norm: 0.9920064686318464, iteration: 25914
loss: 1.0064870119094849,grad_norm: 0.9999994422058907, iteration: 25915
loss: 1.0049532651901245,grad_norm: 0.9999989340039402, iteration: 25916
loss: 0.9961240887641907,grad_norm: 0.9999992126233276, iteration: 25917
loss: 1.0036463737487793,grad_norm: 0.999999095311083, iteration: 25918
loss: 1.0124118328094482,grad_norm: 0.9999991029391334, iteration: 25919
loss: 1.0038506984710693,grad_norm: 0.9999992500367951, iteration: 25920
loss: 1.0418353080749512,grad_norm: 0.9999992980679004, iteration: 25921
loss: 1.0367976427078247,grad_norm: 0.9999993739285615, iteration: 25922
loss: 1.0369958877563477,grad_norm: 0.9999993641646829, iteration: 25923
loss: 1.0367103815078735,grad_norm: 0.9140679060982673, iteration: 25924
loss: 1.0171139240264893,grad_norm: 0.9999989931960231, iteration: 25925
loss: 1.0421321392059326,grad_norm: 0.9999993975603796, iteration: 25926
loss: 1.0219100713729858,grad_norm: 0.9999991390074704, iteration: 25927
loss: 0.9754675030708313,grad_norm: 0.9999991326865307, iteration: 25928
loss: 0.9572420716285706,grad_norm: 0.9999991223692454, iteration: 25929
loss: 1.0488481521606445,grad_norm: 0.9999991850312268, iteration: 25930
loss: 0.9989776015281677,grad_norm: 0.9999991354861473, iteration: 25931
loss: 0.9910597801208496,grad_norm: 0.9999991365286027, iteration: 25932
loss: 1.0717668533325195,grad_norm: 0.9999991202602674, iteration: 25933
loss: 1.0014755725860596,grad_norm: 0.9999992523394766, iteration: 25934
loss: 0.995511531829834,grad_norm: 0.9999992262219516, iteration: 25935
loss: 1.061943531036377,grad_norm: 0.9999991776128233, iteration: 25936
loss: 0.9928986430168152,grad_norm: 0.9327648768647362, iteration: 25937
loss: 0.9872443675994873,grad_norm: 0.999999498158111, iteration: 25938
loss: 0.9874868988990784,grad_norm: 0.9999992403052826, iteration: 25939
loss: 1.0473480224609375,grad_norm: 0.9999992443946565, iteration: 25940
loss: 0.9945517778396606,grad_norm: 0.8421376299930846, iteration: 25941
loss: 0.949800431728363,grad_norm: 0.9999992843855402, iteration: 25942
loss: 0.9659181833267212,grad_norm: 0.999999087940329, iteration: 25943
loss: 1.0151082277297974,grad_norm: 0.9999990575432243, iteration: 25944
loss: 1.0358392000198364,grad_norm: 0.9999991415955344, iteration: 25945
loss: 1.046501874923706,grad_norm: 0.9999990841677177, iteration: 25946
loss: 0.9759230613708496,grad_norm: 0.9999990600665356, iteration: 25947
loss: 0.9828343987464905,grad_norm: 0.9872829619710649, iteration: 25948
loss: 0.9627699851989746,grad_norm: 0.833159423045873, iteration: 25949
loss: 0.990138053894043,grad_norm: 0.9999991122367177, iteration: 25950
loss: 0.9958005547523499,grad_norm: 0.9999992434175556, iteration: 25951
loss: 1.0256518125534058,grad_norm: 0.9731971735280227, iteration: 25952
loss: 1.0046665668487549,grad_norm: 0.9999993379254314, iteration: 25953
loss: 0.9949144124984741,grad_norm: 0.9479136036856739, iteration: 25954
loss: 1.0438404083251953,grad_norm: 0.9999990800016837, iteration: 25955
loss: 1.0174561738967896,grad_norm: 0.8128248098123685, iteration: 25956
loss: 1.0305670499801636,grad_norm: 0.9999991021401264, iteration: 25957
loss: 1.0660121440887451,grad_norm: 0.9999991947458476, iteration: 25958
loss: 0.9962120652198792,grad_norm: 0.9999994154101699, iteration: 25959
loss: 1.001707911491394,grad_norm: 0.7903900686806458, iteration: 25960
loss: 1.013939380645752,grad_norm: 0.9999994276985404, iteration: 25961
loss: 0.9809769988059998,grad_norm: 0.9999991842031762, iteration: 25962
loss: 0.9958162307739258,grad_norm: 0.9999994462383922, iteration: 25963
loss: 1.0105394124984741,grad_norm: 0.9894423657655238, iteration: 25964
loss: 1.0147923231124878,grad_norm: 0.9999991665006557, iteration: 25965
loss: 1.052385687828064,grad_norm: 0.9999990828668333, iteration: 25966
loss: 1.0275626182556152,grad_norm: 0.8640948599758252, iteration: 25967
loss: 0.9905182123184204,grad_norm: 0.9505171998250183, iteration: 25968
loss: 1.001670241355896,grad_norm: 0.9999998147343744, iteration: 25969
loss: 1.022510051727295,grad_norm: 0.9999992328477181, iteration: 25970
loss: 1.0016522407531738,grad_norm: 0.9999993934480591, iteration: 25971
loss: 1.0489381551742554,grad_norm: 0.9988836667253032, iteration: 25972
loss: 1.015295147895813,grad_norm: 0.9999991663919191, iteration: 25973
loss: 0.9918903708457947,grad_norm: 0.9611787091460542, iteration: 25974
loss: 1.0048770904541016,grad_norm: 0.9999990914844481, iteration: 25975
loss: 1.0099117755889893,grad_norm: 0.8141666226361082, iteration: 25976
loss: 1.030625581741333,grad_norm: 0.9999989953915076, iteration: 25977
loss: 0.9934021234512329,grad_norm: 0.8127931572571323, iteration: 25978
loss: 0.9797843098640442,grad_norm: 0.9999990888440732, iteration: 25979
loss: 0.9888233542442322,grad_norm: 0.9999991479596995, iteration: 25980
loss: 1.0111995935440063,grad_norm: 0.999999046055543, iteration: 25981
loss: 0.9926126003265381,grad_norm: 0.9503821190712924, iteration: 25982
loss: 1.035857081413269,grad_norm: 0.9999998470615427, iteration: 25983
loss: 1.0523531436920166,grad_norm: 0.999999228728788, iteration: 25984
loss: 0.9677326679229736,grad_norm: 0.999999378920542, iteration: 25985
loss: 1.0239921808242798,grad_norm: 0.8625158092940486, iteration: 25986
loss: 0.9959427118301392,grad_norm: 0.9999991601246276, iteration: 25987
loss: 0.9924387335777283,grad_norm: 0.9999990539782302, iteration: 25988
loss: 1.0359702110290527,grad_norm: 0.9939880895114012, iteration: 25989
loss: 1.0581988096237183,grad_norm: 0.9999996178597652, iteration: 25990
loss: 1.0192888975143433,grad_norm: 0.9139493231323663, iteration: 25991
loss: 0.9888602495193481,grad_norm: 0.9999990015072409, iteration: 25992
loss: 1.0003787279129028,grad_norm: 0.9999991448579895, iteration: 25993
loss: 0.9875147938728333,grad_norm: 0.9999989656650928, iteration: 25994
loss: 1.0399729013442993,grad_norm: 0.9999996504474232, iteration: 25995
loss: 1.0146161317825317,grad_norm: 0.9212595498107536, iteration: 25996
loss: 1.0403497219085693,grad_norm: 0.9999990739506106, iteration: 25997
loss: 0.9993676543235779,grad_norm: 0.8908417424035715, iteration: 25998
loss: 0.9986130595207214,grad_norm: 0.9999993691367093, iteration: 25999
loss: 1.0022151470184326,grad_norm: 0.8278909625707997, iteration: 26000
loss: 1.0149120092391968,grad_norm: 0.9999991357658367, iteration: 26001
loss: 0.9651451706886292,grad_norm: 0.9123742200809042, iteration: 26002
loss: 0.9965804219245911,grad_norm: 0.9933779930399003, iteration: 26003
loss: 0.9789220094680786,grad_norm: 0.8833213596454004, iteration: 26004
loss: 0.9954891204833984,grad_norm: 0.9402571052709667, iteration: 26005
loss: 1.0184775590896606,grad_norm: 0.9999991759104239, iteration: 26006
loss: 1.0416349172592163,grad_norm: 0.999999303043129, iteration: 26007
loss: 1.0054391622543335,grad_norm: 0.9999998675600588, iteration: 26008
loss: 1.0165094137191772,grad_norm: 0.9999991788811761, iteration: 26009
loss: 1.0591988563537598,grad_norm: 0.9999991992844253, iteration: 26010
loss: 1.007693886756897,grad_norm: 0.9999992728994448, iteration: 26011
loss: 1.0207263231277466,grad_norm: 0.9999994818878081, iteration: 26012
loss: 1.0209863185882568,grad_norm: 0.99999950626585, iteration: 26013
loss: 0.9750198721885681,grad_norm: 0.9279989470107063, iteration: 26014
loss: 0.996816873550415,grad_norm: 0.9999989857410928, iteration: 26015
loss: 1.0509319305419922,grad_norm: 0.9999996465883552, iteration: 26016
loss: 1.0125305652618408,grad_norm: 0.9999990602432942, iteration: 26017
loss: 1.0137183666229248,grad_norm: 0.9480934593064331, iteration: 26018
loss: 1.005827784538269,grad_norm: 0.9999990949373769, iteration: 26019
loss: 1.0253677368164062,grad_norm: 0.9999991514198703, iteration: 26020
loss: 0.9798346757888794,grad_norm: 0.9999992420612536, iteration: 26021
loss: 1.0047112703323364,grad_norm: 0.9210642748992256, iteration: 26022
loss: 1.05142343044281,grad_norm: 0.9999993012527242, iteration: 26023
loss: 0.96722412109375,grad_norm: 0.9386790376821831, iteration: 26024
loss: 1.071638822555542,grad_norm: 0.9999995578900973, iteration: 26025
loss: 0.9969258904457092,grad_norm: 0.9999990553411462, iteration: 26026
loss: 1.0193780660629272,grad_norm: 0.9268018928702944, iteration: 26027
loss: 1.0536742210388184,grad_norm: 0.9999990463707165, iteration: 26028
loss: 1.0387932062149048,grad_norm: 0.9999994381686494, iteration: 26029
loss: 1.0404666662216187,grad_norm: 0.9999992652906441, iteration: 26030
loss: 0.9819896221160889,grad_norm: 0.9698265199266899, iteration: 26031
loss: 1.017072081565857,grad_norm: 0.9926488492656772, iteration: 26032
loss: 1.0542925596237183,grad_norm: 0.9999990549941896, iteration: 26033
loss: 0.9946867227554321,grad_norm: 0.9999990416363236, iteration: 26034
loss: 1.0276769399642944,grad_norm: 0.9999998482184458, iteration: 26035
loss: 0.9815625548362732,grad_norm: 0.9999990206101315, iteration: 26036
loss: 1.0029693841934204,grad_norm: 0.9999990762496493, iteration: 26037
loss: 1.0059541463851929,grad_norm: 0.9521020102029869, iteration: 26038
loss: 0.9974566102027893,grad_norm: 0.9857658860775032, iteration: 26039
loss: 0.9930714964866638,grad_norm: 0.9999990475346278, iteration: 26040
loss: 1.0000081062316895,grad_norm: 0.9999994811905808, iteration: 26041
loss: 1.0300673246383667,grad_norm: 0.8902185999477327, iteration: 26042
loss: 1.0188627243041992,grad_norm: 0.9999991688318499, iteration: 26043
loss: 1.0368348360061646,grad_norm: 0.9999991485693764, iteration: 26044
loss: 1.012632966041565,grad_norm: 0.9999991019656981, iteration: 26045
loss: 1.0577155351638794,grad_norm: 0.9999995523653881, iteration: 26046
loss: 1.0179378986358643,grad_norm: 0.9999993595546273, iteration: 26047
loss: 0.9958144426345825,grad_norm: 0.9999991847498704, iteration: 26048
loss: 0.9991504549980164,grad_norm: 0.9999991166783773, iteration: 26049
loss: 1.0118345022201538,grad_norm: 0.9550948559395149, iteration: 26050
loss: 1.024855613708496,grad_norm: 0.9999993023923855, iteration: 26051
loss: 1.0051100254058838,grad_norm: 0.9195684921163654, iteration: 26052
loss: 1.0571669340133667,grad_norm: 0.8446524322852489, iteration: 26053
loss: 1.0082638263702393,grad_norm: 0.999999205958796, iteration: 26054
loss: 1.0098285675048828,grad_norm: 0.8284288797349096, iteration: 26055
loss: 1.0241352319717407,grad_norm: 0.9999995966695033, iteration: 26056
loss: 1.0018118619918823,grad_norm: 0.8866633414191553, iteration: 26057
loss: 0.9952964186668396,grad_norm: 0.9999990694577071, iteration: 26058
loss: 1.0029376745224,grad_norm: 0.9637609035656866, iteration: 26059
loss: 1.0421836376190186,grad_norm: 0.9999992456681527, iteration: 26060
loss: 1.0201233625411987,grad_norm: 0.9984060956027512, iteration: 26061
loss: 1.0594000816345215,grad_norm: 0.999999026528016, iteration: 26062
loss: 1.0092182159423828,grad_norm: 0.9999992008004382, iteration: 26063
loss: 1.0052616596221924,grad_norm: 0.9999990189674014, iteration: 26064
loss: 1.0382351875305176,grad_norm: 0.9951035012702787, iteration: 26065
loss: 1.021928071975708,grad_norm: 0.9999991765087946, iteration: 26066
loss: 1.0140981674194336,grad_norm: 0.9999992450313249, iteration: 26067
loss: 1.012567400932312,grad_norm: 0.9999990961746001, iteration: 26068
loss: 1.0288264751434326,grad_norm: 0.9999991559559139, iteration: 26069
loss: 0.9948990345001221,grad_norm: 0.9999990982967537, iteration: 26070
loss: 1.0163549184799194,grad_norm: 0.9538164952079379, iteration: 26071
loss: 1.0396112203598022,grad_norm: 0.9999993918866554, iteration: 26072
loss: 1.0464926958084106,grad_norm: 0.9999997726652955, iteration: 26073
loss: 1.0431208610534668,grad_norm: 0.9999997408870775, iteration: 26074
loss: 1.003868579864502,grad_norm: 0.9999991414825576, iteration: 26075
loss: 1.0216199159622192,grad_norm: 0.9999991149342522, iteration: 26076
loss: 0.9861937165260315,grad_norm: 0.9999991089822721, iteration: 26077
loss: 0.992249608039856,grad_norm: 0.9999993096378849, iteration: 26078
loss: 0.9799238443374634,grad_norm: 0.9999989621036524, iteration: 26079
loss: 1.008905053138733,grad_norm: 0.8506479645658518, iteration: 26080
loss: 0.9643837213516235,grad_norm: 0.8886451740429453, iteration: 26081
loss: 0.9780447483062744,grad_norm: 0.9150174263183894, iteration: 26082
loss: 1.0076279640197754,grad_norm: 0.9999990644348071, iteration: 26083
loss: 1.009735107421875,grad_norm: 0.999999076097744, iteration: 26084
loss: 1.012850284576416,grad_norm: 0.9999992442955853, iteration: 26085
loss: 1.0178085565567017,grad_norm: 0.9999990908139754, iteration: 26086
loss: 1.0269956588745117,grad_norm: 0.9194933445415888, iteration: 26087
loss: 0.9530107975006104,grad_norm: 0.9561792314859467, iteration: 26088
loss: 1.0407140254974365,grad_norm: 0.999999485025718, iteration: 26089
loss: 0.9605432152748108,grad_norm: 0.9999992271495389, iteration: 26090
loss: 1.0523710250854492,grad_norm: 0.9999991141720354, iteration: 26091
loss: 1.0316828489303589,grad_norm: 0.8870841830260131, iteration: 26092
loss: 0.9760851263999939,grad_norm: 0.9999990356444799, iteration: 26093
loss: 1.0192078351974487,grad_norm: 0.999999382693326, iteration: 26094
loss: 1.0190478563308716,grad_norm: 0.824468600437212, iteration: 26095
loss: 0.9918231964111328,grad_norm: 0.782814071334712, iteration: 26096
loss: 1.0324921607971191,grad_norm: 0.9999998226545035, iteration: 26097
loss: 1.0296376943588257,grad_norm: 0.9999990928665922, iteration: 26098
loss: 0.9944744110107422,grad_norm: 0.9999990853802555, iteration: 26099
loss: 1.0207277536392212,grad_norm: 0.999999254223098, iteration: 26100
loss: 1.0184704065322876,grad_norm: 0.9999992715869371, iteration: 26101
loss: 1.0092847347259521,grad_norm: 0.9999996595698127, iteration: 26102
loss: 1.0759214162826538,grad_norm: 0.9999998376884893, iteration: 26103
loss: 1.0163562297821045,grad_norm: 0.9807632789523789, iteration: 26104
loss: 0.9832333922386169,grad_norm: 0.9506527656416126, iteration: 26105
loss: 0.9856980443000793,grad_norm: 0.9440452268248074, iteration: 26106
loss: 1.0141065120697021,grad_norm: 0.9999990572080196, iteration: 26107
loss: 1.0555189847946167,grad_norm: 0.99584050522792, iteration: 26108
loss: 0.9814521670341492,grad_norm: 0.9999994180368629, iteration: 26109
loss: 1.023318886756897,grad_norm: 0.9999993962648394, iteration: 26110
loss: 1.0558525323867798,grad_norm: 0.9999994951174768, iteration: 26111
loss: 1.023216962814331,grad_norm: 0.9999990799655487, iteration: 26112
loss: 0.9947764873504639,grad_norm: 0.9754066492908798, iteration: 26113
loss: 1.0190991163253784,grad_norm: 0.9999991750964898, iteration: 26114
loss: 1.0735657215118408,grad_norm: 0.9999990731512243, iteration: 26115
loss: 1.0168395042419434,grad_norm: 0.9999989612954696, iteration: 26116
loss: 1.0291657447814941,grad_norm: 0.9999993358573576, iteration: 26117
loss: 1.0225450992584229,grad_norm: 0.9453262264477644, iteration: 26118
loss: 1.0253404378890991,grad_norm: 0.9271674506780619, iteration: 26119
loss: 1.0360249280929565,grad_norm: 0.9999998918019414, iteration: 26120
loss: 1.022929310798645,grad_norm: 0.999999193420776, iteration: 26121
loss: 1.0419193506240845,grad_norm: 0.9999997274356193, iteration: 26122
loss: 1.0707340240478516,grad_norm: 0.9999995462382127, iteration: 26123
loss: 1.0118412971496582,grad_norm: 0.9999994230941427, iteration: 26124
loss: 0.9690777063369751,grad_norm: 0.9999992813209366, iteration: 26125
loss: 1.0232833623886108,grad_norm: 0.9999992592325205, iteration: 26126
loss: 1.0369096994400024,grad_norm: 0.9999993073230393, iteration: 26127
loss: 0.9919908046722412,grad_norm: 0.9760282826347381, iteration: 26128
loss: 0.995988130569458,grad_norm: 0.9999994352384232, iteration: 26129
loss: 1.0799288749694824,grad_norm: 0.9999994517451554, iteration: 26130
loss: 1.030220866203308,grad_norm: 0.9999989623694582, iteration: 26131
loss: 1.023878574371338,grad_norm: 0.999999486795006, iteration: 26132
loss: 1.0040104389190674,grad_norm: 0.999999129794805, iteration: 26133
loss: 1.0327345132827759,grad_norm: 0.9999998037864598, iteration: 26134
loss: 1.0556247234344482,grad_norm: 0.999999309058779, iteration: 26135
loss: 1.0097148418426514,grad_norm: 0.9999993366799305, iteration: 26136
loss: 0.9963721036911011,grad_norm: 0.9999991824046706, iteration: 26137
loss: 0.9964326024055481,grad_norm: 0.9904470616971627, iteration: 26138
loss: 1.0088878870010376,grad_norm: 0.9999991742023794, iteration: 26139
loss: 1.0091021060943604,grad_norm: 0.9999990678136695, iteration: 26140
loss: 1.0134289264678955,grad_norm: 0.9162621124312263, iteration: 26141
loss: 0.9881306290626526,grad_norm: 0.9999992495858409, iteration: 26142
loss: 1.0429946184158325,grad_norm: 0.9999994537348075, iteration: 26143
loss: 1.0311459302902222,grad_norm: 0.999999214250943, iteration: 26144
loss: 1.0464798212051392,grad_norm: 0.9496892175352369, iteration: 26145
loss: 1.015489935874939,grad_norm: 0.8612353043464548, iteration: 26146
loss: 1.0396798849105835,grad_norm: 0.9670042362215101, iteration: 26147
loss: 1.022588849067688,grad_norm: 0.9999991850419583, iteration: 26148
loss: 0.9872621297836304,grad_norm: 0.9959967100161289, iteration: 26149
loss: 0.9680017232894897,grad_norm: 0.8145038466128549, iteration: 26150
loss: 1.0248066186904907,grad_norm: 0.9999991840552424, iteration: 26151
loss: 1.017409086227417,grad_norm: 0.9071274635783926, iteration: 26152
loss: 0.9956429600715637,grad_norm: 0.9851849166146427, iteration: 26153
loss: 1.0235646963119507,grad_norm: 0.9999992818923098, iteration: 26154
loss: 1.0007877349853516,grad_norm: 0.99999917012753, iteration: 26155
loss: 1.016999363899231,grad_norm: 0.9573517978585476, iteration: 26156
loss: 0.9987738728523254,grad_norm: 0.9999991372563347, iteration: 26157
loss: 1.0240334272384644,grad_norm: 0.9999992777454947, iteration: 26158
loss: 1.0157955884933472,grad_norm: 0.926180138891159, iteration: 26159
loss: 0.9717698097229004,grad_norm: 0.9190321716328237, iteration: 26160
loss: 0.9967848062515259,grad_norm: 0.9999994184270729, iteration: 26161
loss: 0.9821277856826782,grad_norm: 0.9262369778184597, iteration: 26162
loss: 1.0143598318099976,grad_norm: 0.9999991213723988, iteration: 26163
loss: 1.0291037559509277,grad_norm: 0.9464434790227475, iteration: 26164
loss: 1.002395749092102,grad_norm: 0.9999993218449027, iteration: 26165
loss: 0.9787538051605225,grad_norm: 0.999999071423365, iteration: 26166
loss: 1.0171887874603271,grad_norm: 0.9999990120232034, iteration: 26167
loss: 1.0195467472076416,grad_norm: 0.9999991098971511, iteration: 26168
loss: 1.0282998085021973,grad_norm: 0.8836566414647099, iteration: 26169
loss: 1.0374531745910645,grad_norm: 0.9413603300426862, iteration: 26170
loss: 1.0010961294174194,grad_norm: 0.9999991630080223, iteration: 26171
loss: 1.06352961063385,grad_norm: 0.9999996067877731, iteration: 26172
loss: 1.0128896236419678,grad_norm: 0.9999992123556057, iteration: 26173
loss: 1.004267930984497,grad_norm: 0.9887618956325528, iteration: 26174
loss: 1.0271782875061035,grad_norm: 0.9999992289500713, iteration: 26175
loss: 1.044426679611206,grad_norm: 0.8524149546129244, iteration: 26176
loss: 1.024368405342102,grad_norm: 0.9707911323600594, iteration: 26177
loss: 0.9843562841415405,grad_norm: 0.9865846788055692, iteration: 26178
loss: 1.0772731304168701,grad_norm: 0.9999992073894045, iteration: 26179
loss: 1.0170531272888184,grad_norm: 0.9369803519868087, iteration: 26180
loss: 1.0199673175811768,grad_norm: 0.9999990786066496, iteration: 26181
loss: 1.0497323274612427,grad_norm: 0.9999997207790317, iteration: 26182
loss: 1.1043003797531128,grad_norm: 0.99999988450728, iteration: 26183
loss: 1.0065213441848755,grad_norm: 0.9999995477436027, iteration: 26184
loss: 0.9965722560882568,grad_norm: 0.9999992182903429, iteration: 26185
loss: 1.0102907419204712,grad_norm: 0.9999989936848452, iteration: 26186
loss: 1.0288100242614746,grad_norm: 0.8356029586337217, iteration: 26187
loss: 1.030576467514038,grad_norm: 0.9999991915063842, iteration: 26188
loss: 0.9835318326950073,grad_norm: 0.8931778461839449, iteration: 26189
loss: 0.9996505975723267,grad_norm: 0.9999993702498494, iteration: 26190
loss: 0.9865787029266357,grad_norm: 0.999999028992532, iteration: 26191
loss: 1.0186097621917725,grad_norm: 0.9999991531894953, iteration: 26192
loss: 1.042992115020752,grad_norm: 0.9995939618573662, iteration: 26193
loss: 1.0050486326217651,grad_norm: 0.8626568800806316, iteration: 26194
loss: 0.9947444796562195,grad_norm: 0.9999989841705123, iteration: 26195
loss: 1.0267069339752197,grad_norm: 0.9999991685151959, iteration: 26196
loss: 1.0080634355545044,grad_norm: 0.9999990190586671, iteration: 26197
loss: 1.0041545629501343,grad_norm: 0.9999993861077267, iteration: 26198
loss: 1.0077804327011108,grad_norm: 0.9356404965951429, iteration: 26199
loss: 0.9904558658599854,grad_norm: 0.8842908092253114, iteration: 26200
loss: 1.0162684917449951,grad_norm: 0.9957942750720696, iteration: 26201
loss: 1.0873191356658936,grad_norm: 0.9999997074800028, iteration: 26202
loss: 0.9880238771438599,grad_norm: 0.9999991081562346, iteration: 26203
loss: 0.9821484088897705,grad_norm: 0.9999991720484178, iteration: 26204
loss: 0.964915931224823,grad_norm: 0.9999991834175274, iteration: 26205
loss: 1.0132081508636475,grad_norm: 0.9999993046432925, iteration: 26206
loss: 1.0360593795776367,grad_norm: 0.9999992723513523, iteration: 26207
loss: 1.108034372329712,grad_norm: 0.9999995841367747, iteration: 26208
loss: 0.9879193305969238,grad_norm: 0.9671870293468269, iteration: 26209
loss: 1.03475022315979,grad_norm: 0.9999996462484398, iteration: 26210
loss: 1.00733482837677,grad_norm: 0.9711505645249349, iteration: 26211
loss: 1.023240327835083,grad_norm: 0.9999991712885345, iteration: 26212
loss: 1.0439494848251343,grad_norm: 0.9620915729740088, iteration: 26213
loss: 0.9759840965270996,grad_norm: 0.9999991505706546, iteration: 26214
loss: 1.0074162483215332,grad_norm: 0.9999992639392395, iteration: 26215
loss: 1.0360373258590698,grad_norm: 0.8707078772857378, iteration: 26216
loss: 0.9934759140014648,grad_norm: 0.9897133792344469, iteration: 26217
loss: 1.0198463201522827,grad_norm: 0.9999989937109607, iteration: 26218
loss: 1.035395622253418,grad_norm: 0.9999992667249188, iteration: 26219
loss: 1.0399609804153442,grad_norm: 0.9999990391964629, iteration: 26220
loss: 1.0414000749588013,grad_norm: 0.9999992966696244, iteration: 26221
loss: 1.001421332359314,grad_norm: 0.9781987243618415, iteration: 26222
loss: 1.0465593338012695,grad_norm: 0.9999993078739917, iteration: 26223
loss: 0.9938480257987976,grad_norm: 0.9999991726925453, iteration: 26224
loss: 1.0399020910263062,grad_norm: 0.9999996425137895, iteration: 26225
loss: 1.0339555740356445,grad_norm: 0.9999990953330271, iteration: 26226
loss: 1.0157830715179443,grad_norm: 0.9999993306989023, iteration: 26227
loss: 1.0412520170211792,grad_norm: 0.9999995172796832, iteration: 26228
loss: 1.0542906522750854,grad_norm: 0.9999994604735052, iteration: 26229
loss: 1.0219248533248901,grad_norm: 0.9464047620789954, iteration: 26230
loss: 0.9923933744430542,grad_norm: 0.9999991317431208, iteration: 26231
loss: 0.9874010682106018,grad_norm: 0.999999386119587, iteration: 26232
loss: 0.9881563186645508,grad_norm: 0.8779423760044415, iteration: 26233
loss: 1.016958236694336,grad_norm: 0.999999121306774, iteration: 26234
loss: 1.055342197418213,grad_norm: 0.999999531038844, iteration: 26235
loss: 1.048632264137268,grad_norm: 0.9999990513084971, iteration: 26236
loss: 0.9953929781913757,grad_norm: 0.9999990607052444, iteration: 26237
loss: 1.0200217962265015,grad_norm: 0.9999993728657078, iteration: 26238
loss: 1.0002285242080688,grad_norm: 0.9999990707259461, iteration: 26239
loss: 0.9948878884315491,grad_norm: 0.9999992021242616, iteration: 26240
loss: 1.0425381660461426,grad_norm: 0.9999990656996443, iteration: 26241
loss: 1.008313775062561,grad_norm: 0.8739383167993828, iteration: 26242
loss: 1.0554255247116089,grad_norm: 0.9999995746430238, iteration: 26243
loss: 1.0540745258331299,grad_norm: 0.9999995279056635, iteration: 26244
loss: 1.0142278671264648,grad_norm: 0.999999322946994, iteration: 26245
loss: 0.9487723708152771,grad_norm: 0.9999991485706855, iteration: 26246
loss: 1.0235533714294434,grad_norm: 0.838625204542917, iteration: 26247
loss: 0.9848203063011169,grad_norm: 0.9507787786546388, iteration: 26248
loss: 0.9781928658485413,grad_norm: 0.9999990644805792, iteration: 26249
loss: 1.005661129951477,grad_norm: 0.9999992029190388, iteration: 26250
loss: 1.017371654510498,grad_norm: 0.9999993431994256, iteration: 26251
loss: 1.0183768272399902,grad_norm: 0.9999998067912398, iteration: 26252
loss: 0.9792421460151672,grad_norm: 0.9999991252633711, iteration: 26253
loss: 1.03626549243927,grad_norm: 0.9999990424512459, iteration: 26254
loss: 0.9928092956542969,grad_norm: 0.9999992200358343, iteration: 26255
loss: 0.9953452944755554,grad_norm: 0.9898615571336516, iteration: 26256
loss: 1.006927728652954,grad_norm: 0.9999991651665996, iteration: 26257
loss: 1.0368198156356812,grad_norm: 0.9520010818940834, iteration: 26258
loss: 0.9564331769943237,grad_norm: 0.907150679632175, iteration: 26259
loss: 1.004832148551941,grad_norm: 0.9999993274477212, iteration: 26260
loss: 0.9921504855155945,grad_norm: 0.9999990706950488, iteration: 26261
loss: 1.0328433513641357,grad_norm: 0.9999997892528946, iteration: 26262
loss: 1.0329086780548096,grad_norm: 0.9411103159227382, iteration: 26263
loss: 1.027043342590332,grad_norm: 0.9999995137531745, iteration: 26264
loss: 1.028516173362732,grad_norm: 0.9999990968843209, iteration: 26265
loss: 1.0150355100631714,grad_norm: 0.8392745865940632, iteration: 26266
loss: 1.010959506034851,grad_norm: 0.9115499878880534, iteration: 26267
loss: 1.0275894403457642,grad_norm: 0.9999991078035628, iteration: 26268
loss: 1.0308427810668945,grad_norm: 0.9999991844769006, iteration: 26269
loss: 1.015433430671692,grad_norm: 0.9999991528057977, iteration: 26270
loss: 0.9931909441947937,grad_norm: 0.8646760699893692, iteration: 26271
loss: 1.0017586946487427,grad_norm: 0.9999994948343125, iteration: 26272
loss: 1.022922158241272,grad_norm: 0.9999992352410617, iteration: 26273
loss: 1.0405824184417725,grad_norm: 0.9999995918859734, iteration: 26274
loss: 1.0098475217819214,grad_norm: 0.9999990904597837, iteration: 26275
loss: 1.0282957553863525,grad_norm: 0.9999992560207718, iteration: 26276
loss: 1.0536105632781982,grad_norm: 0.9999992211310692, iteration: 26277
loss: 1.0096204280853271,grad_norm: 0.9450570019755492, iteration: 26278
loss: 1.0187028646469116,grad_norm: 0.9999992646343229, iteration: 26279
loss: 1.0310814380645752,grad_norm: 0.9999995241752112, iteration: 26280
loss: 1.0247782468795776,grad_norm: 0.8893212451872686, iteration: 26281
loss: 0.9945231080055237,grad_norm: 0.9327986699662086, iteration: 26282
loss: 1.0431897640228271,grad_norm: 0.9999991756540334, iteration: 26283
loss: 0.9857626557350159,grad_norm: 0.7915396339850094, iteration: 26284
loss: 1.0527819395065308,grad_norm: 0.9697965262640765, iteration: 26285
loss: 0.9886220693588257,grad_norm: 0.9999990387280374, iteration: 26286
loss: 0.9970948100090027,grad_norm: 0.9999993455167011, iteration: 26287
loss: 1.0147792100906372,grad_norm: 0.9999992856823631, iteration: 26288
loss: 1.0043011903762817,grad_norm: 0.9999995512065867, iteration: 26289
loss: 0.983561635017395,grad_norm: 0.9999992122052346, iteration: 26290
loss: 1.050408959388733,grad_norm: 0.9999990998762895, iteration: 26291
loss: 1.004732608795166,grad_norm: 0.9999991035449988, iteration: 26292
loss: 0.9953267574310303,grad_norm: 0.9999991700991153, iteration: 26293
loss: 1.0207785367965698,grad_norm: 0.9702818662377444, iteration: 26294
loss: 1.0195884704589844,grad_norm: 0.9750615792943712, iteration: 26295
loss: 1.0088728666305542,grad_norm: 0.999999101426239, iteration: 26296
loss: 1.0174640417099,grad_norm: 0.999999437054307, iteration: 26297
loss: 0.9881381988525391,grad_norm: 0.9999992497938299, iteration: 26298
loss: 1.0270676612854004,grad_norm: 0.9999995174108967, iteration: 26299
loss: 0.9736285209655762,grad_norm: 0.999999137633169, iteration: 26300
loss: 1.0284926891326904,grad_norm: 0.999999099125124, iteration: 26301
loss: 1.0211055278778076,grad_norm: 0.9999991255829057, iteration: 26302
loss: 1.0232007503509521,grad_norm: 0.9999994117165828, iteration: 26303
loss: 1.0288243293762207,grad_norm: 0.9979814226963507, iteration: 26304
loss: 1.0410380363464355,grad_norm: 0.9999995218845183, iteration: 26305
loss: 0.9629754424095154,grad_norm: 0.9999990949213938, iteration: 26306
loss: 1.0348215103149414,grad_norm: 0.9999993740326745, iteration: 26307
loss: 1.055015206336975,grad_norm: 0.992224733609097, iteration: 26308
loss: 1.0119366645812988,grad_norm: 0.9809659823096354, iteration: 26309
loss: 1.0094904899597168,grad_norm: 0.9999998128979517, iteration: 26310
loss: 1.0194247961044312,grad_norm: 0.9999994086535334, iteration: 26311
loss: 1.0143338441848755,grad_norm: 0.9999990106090682, iteration: 26312
loss: 0.969913125038147,grad_norm: 0.9999993113366595, iteration: 26313
loss: 1.0085291862487793,grad_norm: 0.9999996955353823, iteration: 26314
loss: 0.9561076760292053,grad_norm: 0.9672069397465779, iteration: 26315
loss: 1.0168753862380981,grad_norm: 0.9999991466156346, iteration: 26316
loss: 1.007003903388977,grad_norm: 0.9999991394234096, iteration: 26317
loss: 1.0235446691513062,grad_norm: 0.9999995635685629, iteration: 26318
loss: 0.9938620328903198,grad_norm: 0.9999989981618949, iteration: 26319
loss: 0.9925076365470886,grad_norm: 0.9999991414767937, iteration: 26320
loss: 1.0046584606170654,grad_norm: 0.9999991504715271, iteration: 26321
loss: 1.0084775686264038,grad_norm: 0.9999994370747469, iteration: 26322
loss: 0.9812967777252197,grad_norm: 0.9999991813156358, iteration: 26323
loss: 0.9861024022102356,grad_norm: 0.957886625458437, iteration: 26324
loss: 0.9818994402885437,grad_norm: 0.9212226431204624, iteration: 26325
loss: 1.0162928104400635,grad_norm: 0.9999991408509531, iteration: 26326
loss: 1.0352046489715576,grad_norm: 0.9999990270760878, iteration: 26327
loss: 1.0472793579101562,grad_norm: 0.9999992615142472, iteration: 26328
loss: 1.0175838470458984,grad_norm: 0.9071976151992239, iteration: 26329
loss: 0.9656875729560852,grad_norm: 0.9928723426604469, iteration: 26330
loss: 1.0161128044128418,grad_norm: 0.9999991193882133, iteration: 26331
loss: 1.0102310180664062,grad_norm: 0.8995072224582319, iteration: 26332
loss: 1.0176256895065308,grad_norm: 0.9999992619858186, iteration: 26333
loss: 1.0090323686599731,grad_norm: 0.9179903751012046, iteration: 26334
loss: 0.9914062023162842,grad_norm: 0.9999989637918473, iteration: 26335
loss: 1.000769019126892,grad_norm: 0.9999990555179934, iteration: 26336
loss: 0.9855014085769653,grad_norm: 0.8883561845850454, iteration: 26337
loss: 1.03517746925354,grad_norm: 0.999999198798072, iteration: 26338
loss: 1.008939504623413,grad_norm: 0.999999153980176, iteration: 26339
loss: 0.9648786783218384,grad_norm: 0.9813838882641484, iteration: 26340
loss: 1.0112255811691284,grad_norm: 0.9999992880646044, iteration: 26341
loss: 1.0001496076583862,grad_norm: 0.8981074502932838, iteration: 26342
loss: 0.9692600965499878,grad_norm: 0.9999992655845872, iteration: 26343
loss: 0.9861094951629639,grad_norm: 0.9999994661682888, iteration: 26344
loss: 1.0039583444595337,grad_norm: 0.9999991699294442, iteration: 26345
loss: 0.9847787618637085,grad_norm: 0.9001690709854927, iteration: 26346
loss: 1.0126893520355225,grad_norm: 0.9999992368963544, iteration: 26347
loss: 1.001562476158142,grad_norm: 0.9999991801398002, iteration: 26348
loss: 1.0072219371795654,grad_norm: 0.8211020615279103, iteration: 26349
loss: 0.9928489327430725,grad_norm: 0.99999906730231, iteration: 26350
loss: 1.0156041383743286,grad_norm: 0.8626441127598155, iteration: 26351
loss: 1.0674635171890259,grad_norm: 0.9999998781123519, iteration: 26352
loss: 1.0048401355743408,grad_norm: 0.9999991225303865, iteration: 26353
loss: 1.023046851158142,grad_norm: 0.924051854271058, iteration: 26354
loss: 1.0377808809280396,grad_norm: 0.9524345939317607, iteration: 26355
loss: 1.016386866569519,grad_norm: 0.9999993339504804, iteration: 26356
loss: 1.0621291399002075,grad_norm: 0.9999996472217381, iteration: 26357
loss: 0.9976838231086731,grad_norm: 0.9637732976028565, iteration: 26358
loss: 1.087986946105957,grad_norm: 0.999999804736577, iteration: 26359
loss: 1.0111063718795776,grad_norm: 0.9280240336683359, iteration: 26360
loss: 1.0081676244735718,grad_norm: 0.9999992664122802, iteration: 26361
loss: 1.0088231563568115,grad_norm: 0.9367436755359336, iteration: 26362
loss: 0.9811193943023682,grad_norm: 0.9999993426896657, iteration: 26363
loss: 1.0353537797927856,grad_norm: 0.9999991662682416, iteration: 26364
loss: 0.9865493774414062,grad_norm: 0.9888681028218809, iteration: 26365
loss: 0.999742329120636,grad_norm: 0.9999995150080787, iteration: 26366
loss: 0.9779747128486633,grad_norm: 0.9999990293485703, iteration: 26367
loss: 1.0303404331207275,grad_norm: 0.9999991789569259, iteration: 26368
loss: 1.0160688161849976,grad_norm: 0.9726234876105088, iteration: 26369
loss: 1.0430080890655518,grad_norm: 0.8226011503551632, iteration: 26370
loss: 1.0440118312835693,grad_norm: 0.9999991054797589, iteration: 26371
loss: 1.0166610479354858,grad_norm: 0.9999990959938309, iteration: 26372
loss: 0.9922613501548767,grad_norm: 0.8741194116384707, iteration: 26373
loss: 1.029687762260437,grad_norm: 0.96551098105375, iteration: 26374
loss: 1.0182368755340576,grad_norm: 0.9391561721553474, iteration: 26375
loss: 1.0177316665649414,grad_norm: 0.9702978492762947, iteration: 26376
loss: 1.009141206741333,grad_norm: 0.9999995559624806, iteration: 26377
loss: 1.0150774717330933,grad_norm: 0.9999991849061656, iteration: 26378
loss: 0.9909915924072266,grad_norm: 0.988385698710007, iteration: 26379
loss: 1.0116060972213745,grad_norm: 0.9102745988259354, iteration: 26380
loss: 1.0171245336532593,grad_norm: 0.8683246958558084, iteration: 26381
loss: 1.0038981437683105,grad_norm: 0.9999990009690236, iteration: 26382
loss: 1.006475806236267,grad_norm: 0.8924373844558826, iteration: 26383
loss: 0.9742023348808289,grad_norm: 0.9539505612085674, iteration: 26384
loss: 1.0239437818527222,grad_norm: 0.9999991403693997, iteration: 26385
loss: 1.1241495609283447,grad_norm: 0.9999998103705524, iteration: 26386
loss: 0.9775786399841309,grad_norm: 0.9999991492399214, iteration: 26387
loss: 1.1022082567214966,grad_norm: 0.9999995025601329, iteration: 26388
loss: 1.0189704895019531,grad_norm: 0.9999991288567517, iteration: 26389
loss: 1.0131593942642212,grad_norm: 0.9567001441281942, iteration: 26390
loss: 0.9907278418540955,grad_norm: 0.8949046381696278, iteration: 26391
loss: 0.993720293045044,grad_norm: 0.9999992139066238, iteration: 26392
loss: 0.9871367812156677,grad_norm: 0.9930661628398325, iteration: 26393
loss: 0.9858759045600891,grad_norm: 0.9970416149309338, iteration: 26394
loss: 1.0411043167114258,grad_norm: 0.9999991470068149, iteration: 26395
loss: 1.0400274991989136,grad_norm: 0.9387597153646198, iteration: 26396
loss: 1.0305427312850952,grad_norm: 0.9176177574440024, iteration: 26397
loss: 0.9737082123756409,grad_norm: 0.9652798191478492, iteration: 26398
loss: 1.0502384901046753,grad_norm: 0.9395668566393073, iteration: 26399
loss: 1.016996145248413,grad_norm: 0.997912709449204, iteration: 26400
loss: 1.0179146528244019,grad_norm: 0.9999990559318429, iteration: 26401
loss: 1.0022023916244507,grad_norm: 0.9424476207895632, iteration: 26402
loss: 1.0246727466583252,grad_norm: 0.8841521393759615, iteration: 26403
loss: 0.9911774396896362,grad_norm: 0.9356769310795985, iteration: 26404
loss: 0.9885664582252502,grad_norm: 0.9999990768185735, iteration: 26405
loss: 1.0581411123275757,grad_norm: 0.9999991265168627, iteration: 26406
loss: 1.0017801523208618,grad_norm: 0.9999991682252468, iteration: 26407
loss: 1.0252732038497925,grad_norm: 0.9999991421477993, iteration: 26408
loss: 1.0697064399719238,grad_norm: 0.999999030460322, iteration: 26409
loss: 1.005075454711914,grad_norm: 0.9999994333956078, iteration: 26410
loss: 1.0459710359573364,grad_norm: 0.9999991706116059, iteration: 26411
loss: 1.0937857627868652,grad_norm: 0.9999997104502623, iteration: 26412
loss: 0.9991154074668884,grad_norm: 0.9999990878426989, iteration: 26413
loss: 1.0280377864837646,grad_norm: 0.9729054914948919, iteration: 26414
loss: 1.0661965608596802,grad_norm: 0.9999996556960848, iteration: 26415
loss: 1.0024012327194214,grad_norm: 0.9999992097626808, iteration: 26416
loss: 1.0008926391601562,grad_norm: 0.9999990957069916, iteration: 26417
loss: 1.0393298864364624,grad_norm: 0.9999991601034562, iteration: 26418
loss: 0.9740749597549438,grad_norm: 0.9999991044883824, iteration: 26419
loss: 0.97104811668396,grad_norm: 0.8661567127912579, iteration: 26420
loss: 0.9806041121482849,grad_norm: 0.9999991542435758, iteration: 26421
loss: 1.0014978647232056,grad_norm: 0.9999991777154049, iteration: 26422
loss: 1.0258069038391113,grad_norm: 0.9592979879461172, iteration: 26423
loss: 0.9992148876190186,grad_norm: 0.9999992067922575, iteration: 26424
loss: 1.0458589792251587,grad_norm: 0.9999998214244101, iteration: 26425
loss: 0.9914366602897644,grad_norm: 0.9999992229290448, iteration: 26426
loss: 0.9941003322601318,grad_norm: 0.9999990555724484, iteration: 26427
loss: 0.98089200258255,grad_norm: 0.9999992008802538, iteration: 26428
loss: 1.00461745262146,grad_norm: 0.9722012323303844, iteration: 26429
loss: 1.0118988752365112,grad_norm: 0.9430759388603027, iteration: 26430
loss: 0.9947358965873718,grad_norm: 0.8278484062498414, iteration: 26431
loss: 1.0038032531738281,grad_norm: 0.9999991452951935, iteration: 26432
loss: 1.0073028802871704,grad_norm: 0.9999992121066279, iteration: 26433
loss: 0.9789491891860962,grad_norm: 0.999998986397907, iteration: 26434
loss: 0.979749321937561,grad_norm: 0.9999992753631707, iteration: 26435
loss: 0.9934973120689392,grad_norm: 0.9056838991652477, iteration: 26436
loss: 1.024429440498352,grad_norm: 0.999999062663535, iteration: 26437
loss: 1.0331907272338867,grad_norm: 0.9582557122141213, iteration: 26438
loss: 1.0352250337600708,grad_norm: 0.9999990363690667, iteration: 26439
loss: 1.0202566385269165,grad_norm: 0.9999991150743521, iteration: 26440
loss: 0.968239426612854,grad_norm: 0.9833676871385318, iteration: 26441
loss: 1.0018116235733032,grad_norm: 0.9423661483848332, iteration: 26442
loss: 1.0393645763397217,grad_norm: 0.9899694313963181, iteration: 26443
loss: 1.0177054405212402,grad_norm: 0.9999990151480435, iteration: 26444
loss: 1.0374728441238403,grad_norm: 0.8281345070348674, iteration: 26445
loss: 1.0197330713272095,grad_norm: 0.8204092508841154, iteration: 26446
loss: 1.0150330066680908,grad_norm: 0.8952000263973562, iteration: 26447
loss: 0.9964688420295715,grad_norm: 0.9370985556546374, iteration: 26448
loss: 1.0081783533096313,grad_norm: 0.8409453691798298, iteration: 26449
loss: 1.042791724205017,grad_norm: 0.9999991276683415, iteration: 26450
loss: 0.9786330461502075,grad_norm: 0.9999992136797987, iteration: 26451
loss: 1.0056698322296143,grad_norm: 0.999999250814949, iteration: 26452
loss: 0.973567008972168,grad_norm: 0.9999989958569202, iteration: 26453
loss: 1.0252238512039185,grad_norm: 0.9999991086657993, iteration: 26454
loss: 1.0103018283843994,grad_norm: 0.8206287648424663, iteration: 26455
loss: 1.0060570240020752,grad_norm: 0.9999993325021351, iteration: 26456
loss: 1.0369867086410522,grad_norm: 0.9999991689306794, iteration: 26457
loss: 0.9826109409332275,grad_norm: 0.9999993525459492, iteration: 26458
loss: 1.0267804861068726,grad_norm: 0.9999997256545471, iteration: 26459
loss: 0.9687302112579346,grad_norm: 0.9999990954919462, iteration: 26460
loss: 1.0306577682495117,grad_norm: 0.9999991730615079, iteration: 26461
loss: 1.001440167427063,grad_norm: 0.9464610582282783, iteration: 26462
loss: 1.011306881904602,grad_norm: 0.9275573869108734, iteration: 26463
loss: 1.0197052955627441,grad_norm: 0.9999996361358165, iteration: 26464
loss: 1.0263593196868896,grad_norm: 0.999999768934503, iteration: 26465
loss: 0.9826501607894897,grad_norm: 0.9836640486052822, iteration: 26466
loss: 1.0195502042770386,grad_norm: 0.9999995728559431, iteration: 26467
loss: 0.9964483380317688,grad_norm: 0.9999991818978731, iteration: 26468
loss: 1.0108904838562012,grad_norm: 0.9999990089813768, iteration: 26469
loss: 0.9677286148071289,grad_norm: 0.9999990546814066, iteration: 26470
loss: 0.9719184041023254,grad_norm: 0.8608206447763002, iteration: 26471
loss: 1.0321089029312134,grad_norm: 0.9999991011250218, iteration: 26472
loss: 1.023995041847229,grad_norm: 0.9999991169557584, iteration: 26473
loss: 1.0700247287750244,grad_norm: 0.9999990954561153, iteration: 26474
loss: 1.020392894744873,grad_norm: 0.7990983114755502, iteration: 26475
loss: 1.016746997833252,grad_norm: 0.904104291352161, iteration: 26476
loss: 1.0474194288253784,grad_norm: 0.9999994969225756, iteration: 26477
loss: 1.013379454612732,grad_norm: 0.9999990554613255, iteration: 26478
loss: 1.0084736347198486,grad_norm: 0.9077864114088025, iteration: 26479
loss: 1.00140380859375,grad_norm: 0.9999992960089531, iteration: 26480
loss: 1.0551131963729858,grad_norm: 0.9999994890446677, iteration: 26481
loss: 1.0196141004562378,grad_norm: 0.9999993356821311, iteration: 26482
loss: 1.0526303052902222,grad_norm: 0.999999141105221, iteration: 26483
loss: 1.0190125703811646,grad_norm: 0.9999994156033992, iteration: 26484
loss: 1.034554123878479,grad_norm: 0.9999992510176623, iteration: 26485
loss: 1.0281628370285034,grad_norm: 0.9408153724485755, iteration: 26486
loss: 0.9915119409561157,grad_norm: 0.9999993198142422, iteration: 26487
loss: 0.9850047826766968,grad_norm: 0.9209332780444459, iteration: 26488
loss: 1.00577712059021,grad_norm: 0.7947458856344041, iteration: 26489
loss: 1.028204321861267,grad_norm: 0.999999246091022, iteration: 26490
loss: 1.0447665452957153,grad_norm: 0.8240404728841212, iteration: 26491
loss: 0.9965752959251404,grad_norm: 0.9999991533543315, iteration: 26492
loss: 1.0115392208099365,grad_norm: 0.9999991670933243, iteration: 26493
loss: 1.0889025926589966,grad_norm: 0.99999969102479, iteration: 26494
loss: 0.9891260266304016,grad_norm: 0.9789060365129185, iteration: 26495
loss: 1.010980248451233,grad_norm: 0.9999992108360959, iteration: 26496
loss: 1.0309923887252808,grad_norm: 0.8683338315854691, iteration: 26497
loss: 1.008762001991272,grad_norm: 0.9999991786415497, iteration: 26498
loss: 1.0042434930801392,grad_norm: 0.9692507557980452, iteration: 26499
loss: 1.007032871246338,grad_norm: 0.999999403096573, iteration: 26500
loss: 0.9909188151359558,grad_norm: 0.9102583394201772, iteration: 26501
loss: 1.0205272436141968,grad_norm: 0.9999991145418421, iteration: 26502
loss: 1.0386055707931519,grad_norm: 0.9999997405544743, iteration: 26503
loss: 1.0268312692642212,grad_norm: 0.9999989907423747, iteration: 26504
loss: 0.995965301990509,grad_norm: 0.9369116704709138, iteration: 26505
loss: 1.0093021392822266,grad_norm: 0.9999989810838678, iteration: 26506
loss: 1.0244171619415283,grad_norm: 0.9265802424874345, iteration: 26507
loss: 0.9818847179412842,grad_norm: 0.9755375652000733, iteration: 26508
loss: 1.004509449005127,grad_norm: 0.9999990686421434, iteration: 26509
loss: 1.0317827463150024,grad_norm: 0.9999991098385399, iteration: 26510
loss: 1.0239607095718384,grad_norm: 0.9999991528132797, iteration: 26511
loss: 0.9983363747596741,grad_norm: 0.7879375631670891, iteration: 26512
loss: 1.0263419151306152,grad_norm: 0.9226100373222494, iteration: 26513
loss: 1.0213334560394287,grad_norm: 0.9135156366118996, iteration: 26514
loss: 1.0230718851089478,grad_norm: 0.9060015875729718, iteration: 26515
loss: 1.0304065942764282,grad_norm: 0.9999991171713645, iteration: 26516
loss: 1.0475527048110962,grad_norm: 0.9999993033009447, iteration: 26517
loss: 1.0226690769195557,grad_norm: 0.9999991706141409, iteration: 26518
loss: 1.0297434329986572,grad_norm: 0.8798191255930176, iteration: 26519
loss: 1.0176947116851807,grad_norm: 0.9962551031619503, iteration: 26520
loss: 1.040848970413208,grad_norm: 0.9999991368980978, iteration: 26521
loss: 0.9874874353408813,grad_norm: 0.9999992601668597, iteration: 26522
loss: 1.0155506134033203,grad_norm: 0.9999994357896589, iteration: 26523
loss: 0.9990710616111755,grad_norm: 0.9524199695374967, iteration: 26524
loss: 1.0221502780914307,grad_norm: 0.9257695352316112, iteration: 26525
loss: 1.0122390985488892,grad_norm: 0.9999992524186394, iteration: 26526
loss: 0.9723110795021057,grad_norm: 0.9999991343844075, iteration: 26527
loss: 0.9741791486740112,grad_norm: 0.999999593461416, iteration: 26528
loss: 1.039252758026123,grad_norm: 0.9999990032744038, iteration: 26529
loss: 1.006935477256775,grad_norm: 0.999999173290381, iteration: 26530
loss: 0.9719041585922241,grad_norm: 0.9650899001965226, iteration: 26531
loss: 0.9533067345619202,grad_norm: 0.9836568928732289, iteration: 26532
loss: 0.9893298745155334,grad_norm: 0.9906069101429703, iteration: 26533
loss: 0.9902266263961792,grad_norm: 0.9999992171673974, iteration: 26534
loss: 0.9963778257369995,grad_norm: 0.9999992356219339, iteration: 26535
loss: 1.1179801225662231,grad_norm: 0.9999997901004875, iteration: 26536
loss: 1.0200941562652588,grad_norm: 0.999999529083531, iteration: 26537
loss: 1.0854039192199707,grad_norm: 0.9999994083746849, iteration: 26538
loss: 1.045754075050354,grad_norm: 0.9999997146192409, iteration: 26539
loss: 0.9808762073516846,grad_norm: 0.940855807270834, iteration: 26540
loss: 1.039036512374878,grad_norm: 0.9413192590275108, iteration: 26541
loss: 1.0377293825149536,grad_norm: 0.9654759300516889, iteration: 26542
loss: 1.0184754133224487,grad_norm: 0.9999991810903887, iteration: 26543
loss: 1.0047098398208618,grad_norm: 0.9999992178295108, iteration: 26544
loss: 1.0006047487258911,grad_norm: 0.9999990611661749, iteration: 26545
loss: 1.0113394260406494,grad_norm: 0.9999991100606294, iteration: 26546
loss: 1.0551130771636963,grad_norm: 0.9999988994384691, iteration: 26547
loss: 1.0567430257797241,grad_norm: 0.9999994403990397, iteration: 26548
loss: 1.011145830154419,grad_norm: 0.9999991921354842, iteration: 26549
loss: 1.0671747922897339,grad_norm: 0.999999069245497, iteration: 26550
loss: 0.9961008429527283,grad_norm: 0.9999995062925411, iteration: 26551
loss: 1.0006060600280762,grad_norm: 0.962138759462965, iteration: 26552
loss: 1.0066230297088623,grad_norm: 0.9148561133915889, iteration: 26553
loss: 1.0487455129623413,grad_norm: 0.9999993553627344, iteration: 26554
loss: 1.0407851934432983,grad_norm: 0.9999993060605388, iteration: 26555
loss: 1.0252685546875,grad_norm: 0.9999992836007031, iteration: 26556
loss: 0.9927986264228821,grad_norm: 0.999999138938297, iteration: 26557
loss: 1.0468511581420898,grad_norm: 0.9999990737808419, iteration: 26558
loss: 0.9995813965797424,grad_norm: 0.7451809382891056, iteration: 26559
loss: 0.9591382145881653,grad_norm: 0.9999991804358742, iteration: 26560
loss: 1.0089823007583618,grad_norm: 0.8024790812721654, iteration: 26561
loss: 1.0119929313659668,grad_norm: 0.999999074144393, iteration: 26562
loss: 1.0361173152923584,grad_norm: 0.9999991350334709, iteration: 26563
loss: 0.9966779351234436,grad_norm: 0.9999990865222612, iteration: 26564
loss: 0.969082772731781,grad_norm: 0.7850977501038389, iteration: 26565
loss: 1.0190051794052124,grad_norm: 0.999999260970606, iteration: 26566
loss: 1.0248206853866577,grad_norm: 0.90911048853095, iteration: 26567
loss: 1.0601140260696411,grad_norm: 0.9999992466205528, iteration: 26568
loss: 1.0092582702636719,grad_norm: 0.9999991332052506, iteration: 26569
loss: 0.9910565614700317,grad_norm: 0.9999990699462572, iteration: 26570
loss: 1.0267030000686646,grad_norm: 0.9999990813892968, iteration: 26571
loss: 1.0429085493087769,grad_norm: 0.9620202325230105, iteration: 26572
loss: 1.04212486743927,grad_norm: 0.9999990840601134, iteration: 26573
loss: 1.0000063180923462,grad_norm: 0.9999991022953936, iteration: 26574
loss: 0.9952406287193298,grad_norm: 0.9670533098927387, iteration: 26575
loss: 1.0515501499176025,grad_norm: 0.9999993991306672, iteration: 26576
loss: 1.0463008880615234,grad_norm: 0.868955399590941, iteration: 26577
loss: 1.0517879724502563,grad_norm: 0.9999991453691627, iteration: 26578
loss: 1.0688644647598267,grad_norm: 0.9999996441447708, iteration: 26579
loss: 1.0038821697235107,grad_norm: 0.9923410089538757, iteration: 26580
loss: 1.0587588548660278,grad_norm: 0.9999993571843477, iteration: 26581
loss: 0.9921768307685852,grad_norm: 0.8662846752132003, iteration: 26582
loss: 1.0345653295516968,grad_norm: 0.9999990741109492, iteration: 26583
loss: 1.0015307664871216,grad_norm: 0.8701914578424411, iteration: 26584
loss: 1.0342081785202026,grad_norm: 0.9862168194232677, iteration: 26585
loss: 1.0111750364303589,grad_norm: 0.9443165952965792, iteration: 26586
loss: 1.008232831954956,grad_norm: 0.9944316777366707, iteration: 26587
loss: 0.9906682372093201,grad_norm: 0.880310135419182, iteration: 26588
loss: 1.002038598060608,grad_norm: 0.999999222918025, iteration: 26589
loss: 0.9971199035644531,grad_norm: 0.9999990066168865, iteration: 26590
loss: 0.9713280200958252,grad_norm: 0.9907706112915925, iteration: 26591
loss: 1.0026776790618896,grad_norm: 0.9999992265426356, iteration: 26592
loss: 0.9813521504402161,grad_norm: 0.9999990552394334, iteration: 26593
loss: 1.0333563089370728,grad_norm: 0.9999992348238403, iteration: 26594
loss: 0.9898653030395508,grad_norm: 0.9999990639365169, iteration: 26595
loss: 1.013501524925232,grad_norm: 0.9185567922201382, iteration: 26596
loss: 1.006312608718872,grad_norm: 0.936339028084256, iteration: 26597
loss: 1.0037976503372192,grad_norm: 0.8588104483101187, iteration: 26598
loss: 0.9830325245857239,grad_norm: 0.9999992603229174, iteration: 26599
loss: 1.056938886642456,grad_norm: 0.9999993746551019, iteration: 26600
loss: 1.1504580974578857,grad_norm: 0.9999996703605508, iteration: 26601
loss: 1.0009100437164307,grad_norm: 0.9788450584947225, iteration: 26602
loss: 1.0608266592025757,grad_norm: 0.9999990816974382, iteration: 26603
loss: 1.0081071853637695,grad_norm: 0.999999523687412, iteration: 26604
loss: 0.9838555455207825,grad_norm: 0.9999991723264848, iteration: 26605
loss: 0.985795259475708,grad_norm: 0.88857506551808, iteration: 26606
loss: 1.0080375671386719,grad_norm: 0.8505853277461617, iteration: 26607
loss: 1.0425654649734497,grad_norm: 0.9999994527141329, iteration: 26608
loss: 1.0333905220031738,grad_norm: 0.9272973105743889, iteration: 26609
loss: 1.0437644720077515,grad_norm: 0.9999996811891935, iteration: 26610
loss: 1.0169609785079956,grad_norm: 0.982334718481081, iteration: 26611
loss: 1.0117613077163696,grad_norm: 0.965227889461154, iteration: 26612
loss: 1.006176233291626,grad_norm: 0.9367775506165245, iteration: 26613
loss: 0.9991384744644165,grad_norm: 0.989850773848387, iteration: 26614
loss: 1.0236042737960815,grad_norm: 0.8382093875296576, iteration: 26615
loss: 0.9915545582771301,grad_norm: 0.97559234385642, iteration: 26616
loss: 1.0303055047988892,grad_norm: 0.9999994964499588, iteration: 26617
loss: 1.0122970342636108,grad_norm: 0.8698035125207472, iteration: 26618
loss: 0.9929733276367188,grad_norm: 0.9999993204655008, iteration: 26619
loss: 1.0246292352676392,grad_norm: 0.9999990858401197, iteration: 26620
loss: 1.013472557067871,grad_norm: 0.9217862498297006, iteration: 26621
loss: 0.9690732955932617,grad_norm: 0.9218196597633513, iteration: 26622
loss: 0.9695737957954407,grad_norm: 0.9999991342519295, iteration: 26623
loss: 1.0174338817596436,grad_norm: 0.9323079559465626, iteration: 26624
loss: 1.0302199125289917,grad_norm: 0.9999990593449385, iteration: 26625
loss: 1.0106021165847778,grad_norm: 0.9999991374363634, iteration: 26626
loss: 1.0458381175994873,grad_norm: 0.9999991278837308, iteration: 26627
loss: 0.9682554602622986,grad_norm: 0.999039713855083, iteration: 26628
loss: 0.9642378091812134,grad_norm: 0.8981441560412491, iteration: 26629
loss: 1.0070130825042725,grad_norm: 0.9999992022750579, iteration: 26630
loss: 0.992416262626648,grad_norm: 0.9999993900533355, iteration: 26631
loss: 1.0278831720352173,grad_norm: 0.9999991504977017, iteration: 26632
loss: 0.9858633875846863,grad_norm: 0.9999989792659801, iteration: 26633
loss: 0.9876800179481506,grad_norm: 0.9352285194590376, iteration: 26634
loss: 1.008017897605896,grad_norm: 0.9314004621005393, iteration: 26635
loss: 1.0113608837127686,grad_norm: 0.948623496996178, iteration: 26636
loss: 0.9979972839355469,grad_norm: 0.9182156272814418, iteration: 26637
loss: 1.0142529010772705,grad_norm: 0.9999992802734626, iteration: 26638
loss: 1.01683509349823,grad_norm: 0.9999992467956607, iteration: 26639
loss: 1.0066182613372803,grad_norm: 0.8862775794864469, iteration: 26640
loss: 1.0435969829559326,grad_norm: 0.9999992337674399, iteration: 26641
loss: 0.9963963627815247,grad_norm: 0.900056673103376, iteration: 26642
loss: 1.0302788019180298,grad_norm: 0.9999989999468062, iteration: 26643
loss: 1.0020891427993774,grad_norm: 0.9999990328373218, iteration: 26644
loss: 1.0140464305877686,grad_norm: 0.9523700549963253, iteration: 26645
loss: 0.9723150730133057,grad_norm: 0.9999991148381785, iteration: 26646
loss: 0.9953030347824097,grad_norm: 0.8755048590794272, iteration: 26647
loss: 1.0140068531036377,grad_norm: 0.9999994597070165, iteration: 26648
loss: 1.0013996362686157,grad_norm: 0.9999997836820388, iteration: 26649
loss: 0.9807700514793396,grad_norm: 0.9999992108615887, iteration: 26650
loss: 1.0083680152893066,grad_norm: 0.9999991607475155, iteration: 26651
loss: 1.0362144708633423,grad_norm: 0.9999994625078014, iteration: 26652
loss: 1.0079686641693115,grad_norm: 0.9999990283988706, iteration: 26653
loss: 1.030097484588623,grad_norm: 0.9999991438922812, iteration: 26654
loss: 0.9816732406616211,grad_norm: 0.9999998220446249, iteration: 26655
loss: 0.9808027744293213,grad_norm: 0.9041395917279607, iteration: 26656
loss: 1.0211520195007324,grad_norm: 0.9999990745252264, iteration: 26657
loss: 0.9936307668685913,grad_norm: 0.8972861495191401, iteration: 26658
loss: 0.9866893887519836,grad_norm: 0.9746668891984923, iteration: 26659
loss: 0.9964589476585388,grad_norm: 0.9999991336899577, iteration: 26660
loss: 1.027240514755249,grad_norm: 0.8562411059308138, iteration: 26661
loss: 1.0179120302200317,grad_norm: 0.9999992800749262, iteration: 26662
loss: 1.0150716304779053,grad_norm: 0.9999999400311511, iteration: 26663
loss: 0.9997342228889465,grad_norm: 0.9999992681902761, iteration: 26664
loss: 1.0186433792114258,grad_norm: 0.962818647239601, iteration: 26665
loss: 0.9673312902450562,grad_norm: 0.9999991303920169, iteration: 26666
loss: 1.030367136001587,grad_norm: 0.9999990571344951, iteration: 26667
loss: 0.9879314303398132,grad_norm: 0.912995833058196, iteration: 26668
loss: 1.0440983772277832,grad_norm: 0.9999990663323373, iteration: 26669
loss: 1.0162773132324219,grad_norm: 0.9999994021467384, iteration: 26670
loss: 1.020605444908142,grad_norm: 0.9020796243407205, iteration: 26671
loss: 1.018409252166748,grad_norm: 0.8309854009639771, iteration: 26672
loss: 1.009421706199646,grad_norm: 0.9999992203179973, iteration: 26673
loss: 1.0254426002502441,grad_norm: 0.9999991670756756, iteration: 26674
loss: 1.0320875644683838,grad_norm: 0.9204757224997241, iteration: 26675
loss: 0.9985216856002808,grad_norm: 0.8108181985597656, iteration: 26676
loss: 1.0169094800949097,grad_norm: 0.9999992155069458, iteration: 26677
loss: 0.9927776455879211,grad_norm: 0.9999991880398619, iteration: 26678
loss: 1.0892212390899658,grad_norm: 0.9999993063876657, iteration: 26679
loss: 0.9965046048164368,grad_norm: 0.999999185998088, iteration: 26680
loss: 1.006341814994812,grad_norm: 0.9205268296411628, iteration: 26681
loss: 0.9620374441146851,grad_norm: 0.9999991324500555, iteration: 26682
loss: 1.0058002471923828,grad_norm: 0.9999991286931478, iteration: 26683
loss: 1.0122874975204468,grad_norm: 0.886898106917235, iteration: 26684
loss: 1.0432615280151367,grad_norm: 0.9999992192416791, iteration: 26685
loss: 1.0404623746871948,grad_norm: 0.9999990479264423, iteration: 26686
loss: 0.9758559465408325,grad_norm: 0.9999991008932364, iteration: 26687
loss: 1.0378471612930298,grad_norm: 0.9999999547422596, iteration: 26688
loss: 1.0276328325271606,grad_norm: 0.9209634390742191, iteration: 26689
loss: 1.020431399345398,grad_norm: 0.9999991014364782, iteration: 26690
loss: 1.0091063976287842,grad_norm: 0.999999254530297, iteration: 26691
loss: 1.0320998430252075,grad_norm: 0.9999991898253325, iteration: 26692
loss: 0.9966258406639099,grad_norm: 0.9999992134795651, iteration: 26693
loss: 0.9814227223396301,grad_norm: 0.9076761274691213, iteration: 26694
loss: 0.9514994025230408,grad_norm: 0.8751457843590126, iteration: 26695
loss: 1.0862383842468262,grad_norm: 0.9999992354283362, iteration: 26696
loss: 1.0586745738983154,grad_norm: 0.9999991337391156, iteration: 26697
loss: 0.9709208607673645,grad_norm: 0.8886675788342107, iteration: 26698
loss: 0.9776906967163086,grad_norm: 0.9999990642253204, iteration: 26699
loss: 1.0214736461639404,grad_norm: 0.8632496959178267, iteration: 26700
loss: 1.0333847999572754,grad_norm: 0.9999992043664979, iteration: 26701
loss: 1.014814019203186,grad_norm: 0.9582708315738921, iteration: 26702
loss: 0.9733158946037292,grad_norm: 0.9138231813354868, iteration: 26703
loss: 0.9893863201141357,grad_norm: 0.9122863922808306, iteration: 26704
loss: 1.0238850116729736,grad_norm: 0.9992701220505509, iteration: 26705
loss: 1.0273557901382446,grad_norm: 0.9999991828831376, iteration: 26706
loss: 0.9956983327865601,grad_norm: 0.9999990914481166, iteration: 26707
loss: 0.9802572131156921,grad_norm: 0.9510683270261396, iteration: 26708
loss: 1.0113191604614258,grad_norm: 0.7994960153591034, iteration: 26709
loss: 1.0891754627227783,grad_norm: 0.9999991797592044, iteration: 26710
loss: 1.014035940170288,grad_norm: 0.9576426004458327, iteration: 26711
loss: 0.9541365504264832,grad_norm: 0.966456192660048, iteration: 26712
loss: 1.0090351104736328,grad_norm: 0.9772855471743337, iteration: 26713
loss: 1.0153720378875732,grad_norm: 0.9206457811889983, iteration: 26714
loss: 1.0067466497421265,grad_norm: 0.9999990574242548, iteration: 26715
loss: 1.007523775100708,grad_norm: 0.8487770180380458, iteration: 26716
loss: 1.0082311630249023,grad_norm: 0.9999991567943669, iteration: 26717
loss: 1.0214309692382812,grad_norm: 0.9290290017337932, iteration: 26718
loss: 1.0104646682739258,grad_norm: 0.8924866046967848, iteration: 26719
loss: 1.0519676208496094,grad_norm: 0.9999995434275057, iteration: 26720
loss: 0.9643944501876831,grad_norm: 0.8558006477966725, iteration: 26721
loss: 0.9910897612571716,grad_norm: 0.999999017170488, iteration: 26722
loss: 1.0297373533248901,grad_norm: 0.9775654615409963, iteration: 26723
loss: 1.0438708066940308,grad_norm: 0.9999992744993341, iteration: 26724
loss: 0.9658969044685364,grad_norm: 0.9999990345207133, iteration: 26725
loss: 1.0402575731277466,grad_norm: 0.968224605072456, iteration: 26726
loss: 1.0339832305908203,grad_norm: 0.999999171231591, iteration: 26727
loss: 1.018489122390747,grad_norm: 0.9999990387790819, iteration: 26728
loss: 1.0218629837036133,grad_norm: 0.8570200678005492, iteration: 26729
loss: 1.0189343690872192,grad_norm: 0.999999004877751, iteration: 26730
loss: 0.9958236813545227,grad_norm: 0.9999990204819517, iteration: 26731
loss: 0.9837316274642944,grad_norm: 0.9999991574380984, iteration: 26732
loss: 1.0080939531326294,grad_norm: 0.9999992200689714, iteration: 26733
loss: 1.0153322219848633,grad_norm: 0.9999990413294727, iteration: 26734
loss: 1.0521293878555298,grad_norm: 0.999999029723986, iteration: 26735
loss: 1.0058091878890991,grad_norm: 0.9025746061968967, iteration: 26736
loss: 1.011228084564209,grad_norm: 0.9999991301641338, iteration: 26737
loss: 0.976613461971283,grad_norm: 0.9894722440745207, iteration: 26738
loss: 0.9942085146903992,grad_norm: 0.9999990909627229, iteration: 26739
loss: 1.0302681922912598,grad_norm: 0.9999993662945007, iteration: 26740
loss: 0.9876206517219543,grad_norm: 0.999999032330375, iteration: 26741
loss: 1.0166174173355103,grad_norm: 0.9999990401022218, iteration: 26742
loss: 0.9780756235122681,grad_norm: 0.9999991006947149, iteration: 26743
loss: 1.0365184545516968,grad_norm: 0.9312323493638939, iteration: 26744
loss: 0.9830460548400879,grad_norm: 0.999999414066841, iteration: 26745
loss: 1.0491390228271484,grad_norm: 0.9999991792639371, iteration: 26746
loss: 1.0193580389022827,grad_norm: 0.9999990926483147, iteration: 26747
loss: 0.9993375539779663,grad_norm: 0.9999989863989782, iteration: 26748
loss: 1.0002772808074951,grad_norm: 0.8900468285334026, iteration: 26749
loss: 1.0300596952438354,grad_norm: 0.8962990178322441, iteration: 26750
loss: 0.9720630645751953,grad_norm: 0.8029604897037766, iteration: 26751
loss: 1.0211970806121826,grad_norm: 0.999999089299317, iteration: 26752
loss: 0.9641252756118774,grad_norm: 0.9999991713885328, iteration: 26753
loss: 1.0440303087234497,grad_norm: 0.9999996935804076, iteration: 26754
loss: 1.0759496688842773,grad_norm: 0.9999994100024081, iteration: 26755
loss: 1.0190210342407227,grad_norm: 0.9999988802216774, iteration: 26756
loss: 0.9817498326301575,grad_norm: 0.9999993025303822, iteration: 26757
loss: 1.0229299068450928,grad_norm: 0.896566565495061, iteration: 26758
loss: 0.9959825873374939,grad_norm: 0.9999990354794154, iteration: 26759
loss: 0.9745986461639404,grad_norm: 0.9999990206190075, iteration: 26760
loss: 1.003828525543213,grad_norm: 0.9999990987874002, iteration: 26761
loss: 0.9684057831764221,grad_norm: 0.94865724116051, iteration: 26762
loss: 0.989719569683075,grad_norm: 0.8695781971009049, iteration: 26763
loss: 1.006708025932312,grad_norm: 0.9999990032058844, iteration: 26764
loss: 1.0174635648727417,grad_norm: 0.9999995937339775, iteration: 26765
loss: 1.0490401983261108,grad_norm: 0.9999999140743603, iteration: 26766
loss: 0.9963440895080566,grad_norm: 0.9999994290000597, iteration: 26767
loss: 1.0266087055206299,grad_norm: 0.9999993047165073, iteration: 26768
loss: 0.9846087098121643,grad_norm: 0.9999989621738409, iteration: 26769
loss: 1.033880352973938,grad_norm: 0.9545008814408154, iteration: 26770
loss: 1.0220845937728882,grad_norm: 0.9999991362501621, iteration: 26771
loss: 0.9764769673347473,grad_norm: 0.9999998141694179, iteration: 26772
loss: 1.086213231086731,grad_norm: 0.9999995827975506, iteration: 26773
loss: 1.0145183801651,grad_norm: 0.9999989756755905, iteration: 26774
loss: 1.0007437467575073,grad_norm: 0.9897200924903019, iteration: 26775
loss: 1.0317422151565552,grad_norm: 0.9282034084002685, iteration: 26776
loss: 1.0127193927764893,grad_norm: 0.9970331553319526, iteration: 26777
loss: 1.019139051437378,grad_norm: 0.9999990743982962, iteration: 26778
loss: 0.9877492189407349,grad_norm: 0.8980531688341497, iteration: 26779
loss: 1.0854772329330444,grad_norm: 0.9999992558268781, iteration: 26780
loss: 1.0828691720962524,grad_norm: 0.9999995873685924, iteration: 26781
loss: 1.000666618347168,grad_norm: 0.9999993134910202, iteration: 26782
loss: 1.0247979164123535,grad_norm: 0.9201745223895447, iteration: 26783
loss: 0.9859622120857239,grad_norm: 0.9999995337404787, iteration: 26784
loss: 1.1377789974212646,grad_norm: 0.9999996623772618, iteration: 26785
loss: 1.0221484899520874,grad_norm: 0.9999992636957378, iteration: 26786
loss: 1.0673550367355347,grad_norm: 0.9999995004712345, iteration: 26787
loss: 0.9465377330780029,grad_norm: 0.9723590062421331, iteration: 26788
loss: 1.2072412967681885,grad_norm: 0.9999996833888783, iteration: 26789
loss: 1.0544100999832153,grad_norm: 0.9927304607030409, iteration: 26790
loss: 1.1392972469329834,grad_norm: 0.9999996970809591, iteration: 26791
loss: 1.0210567712783813,grad_norm: 0.9999993212336319, iteration: 26792
loss: 1.1663638353347778,grad_norm: 0.9999998825002038, iteration: 26793
loss: 0.9867006540298462,grad_norm: 0.999999234252473, iteration: 26794
loss: 1.0303113460540771,grad_norm: 0.9999994495326288, iteration: 26795
loss: 1.015038251876831,grad_norm: 0.9999989850177744, iteration: 26796
loss: 1.0320907831192017,grad_norm: 0.9999990478390325, iteration: 26797
loss: 1.0447062253952026,grad_norm: 0.961973036108446, iteration: 26798
loss: 1.038153886795044,grad_norm: 0.8241548249083146, iteration: 26799
loss: 0.9929969310760498,grad_norm: 0.9999991147779024, iteration: 26800
loss: 1.0004266500473022,grad_norm: 0.9999991746938538, iteration: 26801
loss: 1.0194101333618164,grad_norm: 0.9999996843549924, iteration: 26802
loss: 1.0151770114898682,grad_norm: 0.999999089854165, iteration: 26803
loss: 0.9868603944778442,grad_norm: 0.999999085703942, iteration: 26804
loss: 1.0523179769515991,grad_norm: 0.9999996916880357, iteration: 26805
loss: 1.0308486223220825,grad_norm: 0.9999994317038331, iteration: 26806
loss: 1.0087882280349731,grad_norm: 0.9999996191096536, iteration: 26807
loss: 0.9831394553184509,grad_norm: 0.9027544791537816, iteration: 26808
loss: 1.0217761993408203,grad_norm: 0.9723092297162063, iteration: 26809
loss: 1.0844495296478271,grad_norm: 0.9999993692294258, iteration: 26810
loss: 1.0168503522872925,grad_norm: 0.9999991567228356, iteration: 26811
loss: 1.0073020458221436,grad_norm: 0.9252991657732328, iteration: 26812
loss: 0.9988993406295776,grad_norm: 0.9968792358088969, iteration: 26813
loss: 1.0783579349517822,grad_norm: 0.9999994303820714, iteration: 26814
loss: 1.0052225589752197,grad_norm: 0.9256934767346626, iteration: 26815
loss: 0.9842113852500916,grad_norm: 0.9999991674645332, iteration: 26816
loss: 1.0559923648834229,grad_norm: 0.9999997710077778, iteration: 26817
loss: 1.0127060413360596,grad_norm: 0.9999991041893553, iteration: 26818
loss: 1.0084325075149536,grad_norm: 0.9312344090270737, iteration: 26819
loss: 1.214248538017273,grad_norm: 0.9999995674974032, iteration: 26820
loss: 0.9895551204681396,grad_norm: 0.9999990578640758, iteration: 26821
loss: 0.9981058835983276,grad_norm: 0.9999990700675522, iteration: 26822
loss: 0.9734417200088501,grad_norm: 0.9999990885059221, iteration: 26823
loss: 1.0437268018722534,grad_norm: 0.9999991807302483, iteration: 26824
loss: 0.9638459086418152,grad_norm: 0.9904707553527038, iteration: 26825
loss: 1.0025858879089355,grad_norm: 0.9999995817838478, iteration: 26826
loss: 1.0237863063812256,grad_norm: 0.9815936490742229, iteration: 26827
loss: 1.012571096420288,grad_norm: 0.8725821320729713, iteration: 26828
loss: 1.01282799243927,grad_norm: 0.9999994316828702, iteration: 26829
loss: 1.0233325958251953,grad_norm: 0.910822064436916, iteration: 26830
loss: 0.9821587800979614,grad_norm: 0.9999990998622562, iteration: 26831
loss: 0.97822105884552,grad_norm: 0.9881182162781592, iteration: 26832
loss: 1.0283100605010986,grad_norm: 0.8558093710483773, iteration: 26833
loss: 1.019303560256958,grad_norm: 0.999999113089116, iteration: 26834
loss: 1.0182782411575317,grad_norm: 0.9999991515133907, iteration: 26835
loss: 1.0389533042907715,grad_norm: 0.9999993143121849, iteration: 26836
loss: 1.0307915210723877,grad_norm: 0.9999992297608191, iteration: 26837
loss: 1.0533820390701294,grad_norm: 0.9999996755517675, iteration: 26838
loss: 1.0316070318222046,grad_norm: 0.9999998763799551, iteration: 26839
loss: 0.9811595678329468,grad_norm: 0.9999990924531055, iteration: 26840
loss: 0.9895737767219543,grad_norm: 0.9865373830197013, iteration: 26841
loss: 1.0277245044708252,grad_norm: 0.9999993544824285, iteration: 26842
loss: 1.0003924369812012,grad_norm: 0.9999990596901934, iteration: 26843
loss: 0.9672840237617493,grad_norm: 0.9999992322352389, iteration: 26844
loss: 1.014438509941101,grad_norm: 0.9999993517639991, iteration: 26845
loss: 1.032352089881897,grad_norm: 0.9999994529307396, iteration: 26846
loss: 1.0752331018447876,grad_norm: 0.9999997944461814, iteration: 26847
loss: 1.0275894403457642,grad_norm: 0.8857336553573344, iteration: 26848
loss: 1.0095770359039307,grad_norm: 0.9977481769104141, iteration: 26849
loss: 0.9830653667449951,grad_norm: 0.9999992297817691, iteration: 26850
loss: 0.9658196568489075,grad_norm: 0.9315573428171458, iteration: 26851
loss: 0.9615243673324585,grad_norm: 0.9340890840821136, iteration: 26852
loss: 1.0264534950256348,grad_norm: 0.8014929689683218, iteration: 26853
loss: 1.0387427806854248,grad_norm: 0.9333021943176477, iteration: 26854
loss: 1.0452759265899658,grad_norm: 0.9999993284681916, iteration: 26855
loss: 0.9865967035293579,grad_norm: 0.9999991820223002, iteration: 26856
loss: 0.9431291818618774,grad_norm: 0.9680236892762335, iteration: 26857
loss: 1.0464205741882324,grad_norm: 0.9999989932226331, iteration: 26858
loss: 1.0209344625473022,grad_norm: 0.9999992511302985, iteration: 26859
loss: 0.995855987071991,grad_norm: 0.9999995564370067, iteration: 26860
loss: 1.0203604698181152,grad_norm: 0.9999991374511118, iteration: 26861
loss: 1.0353455543518066,grad_norm: 0.9999996483346512, iteration: 26862
loss: 0.9557471871376038,grad_norm: 0.993818329614174, iteration: 26863
loss: 0.9853971600532532,grad_norm: 0.9999990706377098, iteration: 26864
loss: 0.995279848575592,grad_norm: 0.9999992882374061, iteration: 26865
loss: 1.0601000785827637,grad_norm: 0.9999990422424214, iteration: 26866
loss: 0.9791396856307983,grad_norm: 0.9799833797933196, iteration: 26867
loss: 1.0383285284042358,grad_norm: 0.9999991630649511, iteration: 26868
loss: 1.0603852272033691,grad_norm: 0.9909584332272076, iteration: 26869
loss: 0.9883816838264465,grad_norm: 0.9999998965367953, iteration: 26870
loss: 1.0466744899749756,grad_norm: 0.9999996691760685, iteration: 26871
loss: 0.9934037327766418,grad_norm: 0.9999992637264626, iteration: 26872
loss: 1.0081666707992554,grad_norm: 0.9896720296630412, iteration: 26873
loss: 0.9649009704589844,grad_norm: 0.9999991749097144, iteration: 26874
loss: 0.9648142457008362,grad_norm: 0.9266367044914902, iteration: 26875
loss: 1.0585544109344482,grad_norm: 0.999999269728897, iteration: 26876
loss: 1.0534011125564575,grad_norm: 0.9999993578256066, iteration: 26877
loss: 0.978527843952179,grad_norm: 0.9999991943947724, iteration: 26878
loss: 1.0362628698349,grad_norm: 0.9999994443563883, iteration: 26879
loss: 1.0427829027175903,grad_norm: 0.9999994736362441, iteration: 26880
loss: 1.0385215282440186,grad_norm: 0.9999999152133656, iteration: 26881
loss: 1.0221720933914185,grad_norm: 0.999999267367611, iteration: 26882
loss: 1.0203732252120972,grad_norm: 0.9999989647492997, iteration: 26883
loss: 0.9644761681556702,grad_norm: 0.9999991247704809, iteration: 26884
loss: 1.0417311191558838,grad_norm: 0.9999993655449773, iteration: 26885
loss: 0.9741376042366028,grad_norm: 0.9488937719297682, iteration: 26886
loss: 0.9507929086685181,grad_norm: 0.857416753783512, iteration: 26887
loss: 1.0878093242645264,grad_norm: 0.9999997410531406, iteration: 26888
loss: 0.9927794933319092,grad_norm: 0.9999991810098388, iteration: 26889
loss: 1.0015687942504883,grad_norm: 0.999999423213658, iteration: 26890
loss: 1.0546954870224,grad_norm: 0.999999183977656, iteration: 26891
loss: 1.0389564037322998,grad_norm: 0.9646074887316471, iteration: 26892
loss: 1.0382078886032104,grad_norm: 0.9622990219352675, iteration: 26893
loss: 0.984037458896637,grad_norm: 0.9999991306599968, iteration: 26894
loss: 1.024055004119873,grad_norm: 0.999758315816374, iteration: 26895
loss: 0.9811580181121826,grad_norm: 0.9999990693331064, iteration: 26896
loss: 1.010326623916626,grad_norm: 0.9999992685377144, iteration: 26897
loss: 1.0062826871871948,grad_norm: 0.9748426541360556, iteration: 26898
loss: 1.0508272647857666,grad_norm: 0.9999997619560212, iteration: 26899
loss: 0.97007155418396,grad_norm: 0.9999990411847411, iteration: 26900
loss: 1.0145378112792969,grad_norm: 0.8834375921691231, iteration: 26901
loss: 1.0122138261795044,grad_norm: 0.8313143259694381, iteration: 26902
loss: 0.9932658672332764,grad_norm: 0.9999990937245826, iteration: 26903
loss: 1.010015606880188,grad_norm: 0.9999991629042327, iteration: 26904
loss: 0.9924934506416321,grad_norm: 0.9999992148216096, iteration: 26905
loss: 1.0345932245254517,grad_norm: 0.99999977488469, iteration: 26906
loss: 1.0805859565734863,grad_norm: 0.9999996377958402, iteration: 26907
loss: 0.9912927746772766,grad_norm: 0.9837210102501254, iteration: 26908
loss: 1.0918984413146973,grad_norm: 0.9999994082084125, iteration: 26909
loss: 0.9782518148422241,grad_norm: 0.9999990910202204, iteration: 26910
loss: 1.0062774419784546,grad_norm: 0.9708318426207068, iteration: 26911
loss: 1.0536354780197144,grad_norm: 0.9999995514608044, iteration: 26912
loss: 1.0170210599899292,grad_norm: 0.9999991100986395, iteration: 26913
loss: 1.0182452201843262,grad_norm: 0.9999991409903713, iteration: 26914
loss: 0.997035801410675,grad_norm: 0.9963473330409054, iteration: 26915
loss: 0.9688196778297424,grad_norm: 0.9999993598902672, iteration: 26916
loss: 1.0028599500656128,grad_norm: 0.9432427199732603, iteration: 26917
loss: 0.9516963958740234,grad_norm: 0.7912518565475423, iteration: 26918
loss: 0.9966705441474915,grad_norm: 0.9499205995376765, iteration: 26919
loss: 1.0609705448150635,grad_norm: 0.9999990164956852, iteration: 26920
loss: 1.0126447677612305,grad_norm: 0.9999992259963975, iteration: 26921
loss: 1.0451087951660156,grad_norm: 0.9999991057539068, iteration: 26922
loss: 0.9463746547698975,grad_norm: 0.9999991915173447, iteration: 26923
loss: 0.9749197959899902,grad_norm: 0.9999993252058709, iteration: 26924
loss: 1.0548900365829468,grad_norm: 0.9999996935440648, iteration: 26925
loss: 1.0334393978118896,grad_norm: 0.99116144908489, iteration: 26926
loss: 1.0265586376190186,grad_norm: 0.9999991421965495, iteration: 26927
loss: 1.0528165102005005,grad_norm: 0.9999992367499881, iteration: 26928
loss: 0.9761037826538086,grad_norm: 0.8685125101007696, iteration: 26929
loss: 0.9974334239959717,grad_norm: 0.8360627363592299, iteration: 26930
loss: 0.9883893728256226,grad_norm: 0.9999996470713459, iteration: 26931
loss: 1.0517911911010742,grad_norm: 0.9906144629664342, iteration: 26932
loss: 1.0491927862167358,grad_norm: 0.9999990645438099, iteration: 26933
loss: 1.0248605012893677,grad_norm: 0.9999996904356027, iteration: 26934
loss: 1.008821725845337,grad_norm: 0.9999992487473498, iteration: 26935
loss: 1.013735055923462,grad_norm: 0.9539601915147111, iteration: 26936
loss: 1.0350929498672485,grad_norm: 0.9309122759814408, iteration: 26937
loss: 1.0132406949996948,grad_norm: 0.999999178914455, iteration: 26938
loss: 1.0046100616455078,grad_norm: 0.960300119754444, iteration: 26939
loss: 1.0108883380889893,grad_norm: 0.8870576394368167, iteration: 26940
loss: 1.008671522140503,grad_norm: 0.9581435843761911, iteration: 26941
loss: 1.00382661819458,grad_norm: 0.978216982431412, iteration: 26942
loss: 1.0372506380081177,grad_norm: 0.9999998244508769, iteration: 26943
loss: 1.0641252994537354,grad_norm: 0.9999995209399752, iteration: 26944
loss: 1.0067378282546997,grad_norm: 0.9307664222678781, iteration: 26945
loss: 1.0401239395141602,grad_norm: 0.9999993613232236, iteration: 26946
loss: 1.0142511129379272,grad_norm: 0.999999084921654, iteration: 26947
loss: 0.9985501170158386,grad_norm: 0.9999997606179302, iteration: 26948
loss: 1.0126042366027832,grad_norm: 0.9999989619835411, iteration: 26949
loss: 0.9750049114227295,grad_norm: 0.999999266121926, iteration: 26950
loss: 0.9820325374603271,grad_norm: 0.9999992058756801, iteration: 26951
loss: 0.9711219072341919,grad_norm: 0.9268779352000002, iteration: 26952
loss: 1.000115156173706,grad_norm: 0.8153633888943291, iteration: 26953
loss: 1.0123813152313232,grad_norm: 0.9999991906597094, iteration: 26954
loss: 1.01947820186615,grad_norm: 0.9464678191222108, iteration: 26955
loss: 1.0555039644241333,grad_norm: 0.9999992321933344, iteration: 26956
loss: 0.9989408254623413,grad_norm: 0.9999990932431041, iteration: 26957
loss: 0.993049144744873,grad_norm: 0.9999992799497357, iteration: 26958
loss: 1.0600641965866089,grad_norm: 0.9999993306473243, iteration: 26959
loss: 1.0302188396453857,grad_norm: 0.8874501470417036, iteration: 26960
loss: 0.9531195163726807,grad_norm: 0.9765913504869784, iteration: 26961
loss: 1.0048439502716064,grad_norm: 0.9999990921700678, iteration: 26962
loss: 0.9770382046699524,grad_norm: 0.9999994936808952, iteration: 26963
loss: 1.0027111768722534,grad_norm: 0.9999991314340331, iteration: 26964
loss: 0.9716975688934326,grad_norm: 0.9999992305652172, iteration: 26965
loss: 0.9621968269348145,grad_norm: 0.9999991764505177, iteration: 26966
loss: 1.0280174016952515,grad_norm: 0.9999991258156335, iteration: 26967
loss: 1.0113729238510132,grad_norm: 0.9999991776671686, iteration: 26968
loss: 1.0266468524932861,grad_norm: 0.9999991527558503, iteration: 26969
loss: 1.0203580856323242,grad_norm: 0.9999990867364709, iteration: 26970
loss: 0.9838855266571045,grad_norm: 0.9999992083224337, iteration: 26971
loss: 1.0417989492416382,grad_norm: 0.853485908994267, iteration: 26972
loss: 1.0156958103179932,grad_norm: 0.9184716991794566, iteration: 26973
loss: 0.9826216697692871,grad_norm: 0.9999990181582712, iteration: 26974
loss: 1.0537021160125732,grad_norm: 0.9999993174553852, iteration: 26975
loss: 1.0025277137756348,grad_norm: 0.9999993312315709, iteration: 26976
loss: 1.0037665367126465,grad_norm: 0.9999992794723765, iteration: 26977
loss: 0.9688247442245483,grad_norm: 0.9999992636240365, iteration: 26978
loss: 0.9680348634719849,grad_norm: 0.9999992111358823, iteration: 26979
loss: 0.989293098449707,grad_norm: 0.9999990992970516, iteration: 26980
loss: 1.0182764530181885,grad_norm: 0.9999992344457772, iteration: 26981
loss: 1.0317049026489258,grad_norm: 0.9157337081382906, iteration: 26982
loss: 0.9712632298469543,grad_norm: 0.999999125640979, iteration: 26983
loss: 1.0552724599838257,grad_norm: 0.8735255556085079, iteration: 26984
loss: 1.0401787757873535,grad_norm: 0.9999991568415106, iteration: 26985
loss: 1.006826400756836,grad_norm: 0.9999992090714817, iteration: 26986
loss: 1.039214849472046,grad_norm: 0.8787307367193063, iteration: 26987
loss: 1.0259897708892822,grad_norm: 0.85028517338282, iteration: 26988
loss: 1.0165408849716187,grad_norm: 0.9588827944550649, iteration: 26989
loss: 1.049368143081665,grad_norm: 0.8965068346554713, iteration: 26990
loss: 1.0116140842437744,grad_norm: 0.9999991699620718, iteration: 26991
loss: 1.0107629299163818,grad_norm: 0.8886696901717045, iteration: 26992
loss: 1.004384994506836,grad_norm: 0.9273891077005488, iteration: 26993
loss: 1.123380422592163,grad_norm: 0.9999993169649581, iteration: 26994
loss: 1.0313984155654907,grad_norm: 0.9999991587418294, iteration: 26995
loss: 1.0139962434768677,grad_norm: 0.9380287764781629, iteration: 26996
loss: 1.0355727672576904,grad_norm: 0.9999998818077128, iteration: 26997
loss: 1.047766089439392,grad_norm: 0.9999997877747852, iteration: 26998
loss: 0.9795938730239868,grad_norm: 0.8825414194692462, iteration: 26999
loss: 1.0069366693496704,grad_norm: 0.9999990435570042, iteration: 27000
loss: 1.0105313062667847,grad_norm: 0.9999991066389816, iteration: 27001
loss: 0.9888909459114075,grad_norm: 0.9999996499331311, iteration: 27002
loss: 0.948762059211731,grad_norm: 0.9999990362689944, iteration: 27003
loss: 1.0009396076202393,grad_norm: 0.9228231621510108, iteration: 27004
loss: 0.9778192639350891,grad_norm: 0.9999992534333634, iteration: 27005
loss: 1.0428458452224731,grad_norm: 0.9999996620289014, iteration: 27006
loss: 1.038982629776001,grad_norm: 0.9999994423713603, iteration: 27007
loss: 0.9889816045761108,grad_norm: 0.9999992000141725, iteration: 27008
loss: 1.0326480865478516,grad_norm: 0.9395271671858578, iteration: 27009
loss: 1.0482087135314941,grad_norm: 0.8808857430911708, iteration: 27010
loss: 1.0270726680755615,grad_norm: 0.8938938705322588, iteration: 27011
loss: 1.0506199598312378,grad_norm: 0.989374476736612, iteration: 27012
loss: 1.0205086469650269,grad_norm: 0.937166597775059, iteration: 27013
loss: 0.9978970289230347,grad_norm: 0.9999990251839735, iteration: 27014
loss: 0.9697400331497192,grad_norm: 0.9521028159226537, iteration: 27015
loss: 0.9768178462982178,grad_norm: 0.8691550016645414, iteration: 27016
loss: 1.0101439952850342,grad_norm: 0.918089729399998, iteration: 27017
loss: 1.050054907798767,grad_norm: 0.9999990991962233, iteration: 27018
loss: 1.0110520124435425,grad_norm: 0.9999991024149903, iteration: 27019
loss: 1.0237139463424683,grad_norm: 0.8805178171413659, iteration: 27020
loss: 1.023305058479309,grad_norm: 0.9999993363229125, iteration: 27021
loss: 0.9790080189704895,grad_norm: 0.9529543149049132, iteration: 27022
loss: 1.020309567451477,grad_norm: 0.9999991462980801, iteration: 27023
loss: 1.0728332996368408,grad_norm: 0.9999993989380791, iteration: 27024
loss: 0.9963685274124146,grad_norm: 0.9999992618681637, iteration: 27025
loss: 1.0301119089126587,grad_norm: 0.9999998765755808, iteration: 27026
loss: 1.043982744216919,grad_norm: 0.999999045632105, iteration: 27027
loss: 1.0020906925201416,grad_norm: 0.9999991723351488, iteration: 27028
loss: 1.012900471687317,grad_norm: 0.9999990807347313, iteration: 27029
loss: 1.0191154479980469,grad_norm: 0.8628752391706599, iteration: 27030
loss: 1.0375645160675049,grad_norm: 0.9999994574005339, iteration: 27031
loss: 0.9915894865989685,grad_norm: 0.999999213023138, iteration: 27032
loss: 1.0362646579742432,grad_norm: 0.9999994469108495, iteration: 27033
loss: 1.0188932418823242,grad_norm: 0.9081820314902539, iteration: 27034
loss: 1.0037283897399902,grad_norm: 0.9999993013408794, iteration: 27035
loss: 0.9715471267700195,grad_norm: 0.9999993918487735, iteration: 27036
loss: 1.0164103507995605,grad_norm: 0.9999992388629526, iteration: 27037
loss: 0.9945932626724243,grad_norm: 0.8899097761165619, iteration: 27038
loss: 0.9802893996238708,grad_norm: 0.9999992409891679, iteration: 27039
loss: 1.017757534980774,grad_norm: 0.999999298295903, iteration: 27040
loss: 1.0306895971298218,grad_norm: 0.9999990679336417, iteration: 27041
loss: 0.9903718829154968,grad_norm: 0.9999992039976736, iteration: 27042
loss: 0.9735937118530273,grad_norm: 0.9999989877315327, iteration: 27043
loss: 1.0584408044815063,grad_norm: 0.9999992251329437, iteration: 27044
loss: 1.0191481113433838,grad_norm: 0.9999993670470806, iteration: 27045
loss: 1.0194453001022339,grad_norm: 0.9348236459393993, iteration: 27046
loss: 1.043386459350586,grad_norm: 0.892143945131811, iteration: 27047
loss: 1.1042767763137817,grad_norm: 0.9999994469968102, iteration: 27048
loss: 0.9708755612373352,grad_norm: 0.9999994191840769, iteration: 27049
loss: 1.038469910621643,grad_norm: 0.9999992808387735, iteration: 27050
loss: 1.0239686965942383,grad_norm: 0.9999991439526627, iteration: 27051
loss: 1.0349857807159424,grad_norm: 0.999999086035728, iteration: 27052
loss: 1.0651555061340332,grad_norm: 0.989935219195665, iteration: 27053
loss: 1.0406049489974976,grad_norm: 0.9520791936693392, iteration: 27054
loss: 1.0458570718765259,grad_norm: 0.9999997295463696, iteration: 27055
loss: 1.021009087562561,grad_norm: 0.9999989964368754, iteration: 27056
loss: 0.9750276803970337,grad_norm: 0.9266292451800753, iteration: 27057
loss: 0.9905927777290344,grad_norm: 0.9999993772615798, iteration: 27058
loss: 1.0486356019973755,grad_norm: 0.9999998233686503, iteration: 27059
loss: 0.9979652166366577,grad_norm: 0.9330440773553704, iteration: 27060
loss: 0.9739938974380493,grad_norm: 0.9999990198224319, iteration: 27061
loss: 1.0623960494995117,grad_norm: 0.9999993252619832, iteration: 27062
loss: 1.0864757299423218,grad_norm: 0.9999994303890247, iteration: 27063
loss: 1.0160514116287231,grad_norm: 0.995164002906029, iteration: 27064
loss: 0.9991786479949951,grad_norm: 0.9242218270191681, iteration: 27065
loss: 0.9910237789154053,grad_norm: 0.764663576548072, iteration: 27066
loss: 1.0302811861038208,grad_norm: 0.8882157289779518, iteration: 27067
loss: 0.9697892069816589,grad_norm: 0.9600690352517023, iteration: 27068
loss: 0.9539925456047058,grad_norm: 0.9266560307895606, iteration: 27069
loss: 1.0126553773880005,grad_norm: 0.9503075492323074, iteration: 27070
loss: 1.0120748281478882,grad_norm: 0.9999991161689445, iteration: 27071
loss: 1.0567994117736816,grad_norm: 0.9999997263983216, iteration: 27072
loss: 1.024679183959961,grad_norm: 0.9999994660260354, iteration: 27073
loss: 0.9867680072784424,grad_norm: 0.999999448793119, iteration: 27074
loss: 1.025038242340088,grad_norm: 0.9999991040782253, iteration: 27075
loss: 1.0354946851730347,grad_norm: 0.9999992431986184, iteration: 27076
loss: 1.0395312309265137,grad_norm: 0.992180736079153, iteration: 27077
loss: 1.0066879987716675,grad_norm: 0.9929527354955799, iteration: 27078
loss: 0.9720431566238403,grad_norm: 0.9999991225681408, iteration: 27079
loss: 1.010982871055603,grad_norm: 0.9999991846059076, iteration: 27080
loss: 1.0108444690704346,grad_norm: 0.9999993741840832, iteration: 27081
loss: 1.0698258876800537,grad_norm: 0.9999994242975081, iteration: 27082
loss: 1.0239368677139282,grad_norm: 0.9999991489629118, iteration: 27083
loss: 1.063572883605957,grad_norm: 0.9999992758122355, iteration: 27084
loss: 1.0558698177337646,grad_norm: 0.9999993497332939, iteration: 27085
loss: 1.001006007194519,grad_norm: 0.9034909668879236, iteration: 27086
loss: 1.0282739400863647,grad_norm: 0.9999996679879912, iteration: 27087
loss: 1.0321069955825806,grad_norm: 0.9999993074147605, iteration: 27088
loss: 0.9782333970069885,grad_norm: 0.9999999064762165, iteration: 27089
loss: 1.0123614072799683,grad_norm: 0.9999990186660437, iteration: 27090
loss: 1.0038546323776245,grad_norm: 0.9999992189432186, iteration: 27091
loss: 1.0353593826293945,grad_norm: 0.9999995097090237, iteration: 27092
loss: 0.9707550406455994,grad_norm: 0.9822528749365782, iteration: 27093
loss: 1.0265998840332031,grad_norm: 0.9999991240674103, iteration: 27094
loss: 0.9834524393081665,grad_norm: 0.9999991472254621, iteration: 27095
loss: 0.99675053358078,grad_norm: 0.9999990263934524, iteration: 27096
loss: 1.0283269882202148,grad_norm: 0.999999253668988, iteration: 27097
loss: 0.9908809661865234,grad_norm: 0.9500218133749185, iteration: 27098
loss: 1.0193368196487427,grad_norm: 0.9999995080028569, iteration: 27099
loss: 1.0171258449554443,grad_norm: 0.9999998820086295, iteration: 27100
loss: 1.0536848306655884,grad_norm: 0.9999993122114653, iteration: 27101
loss: 1.0144224166870117,grad_norm: 0.9048846767402101, iteration: 27102
loss: 1.015700101852417,grad_norm: 0.9999998522366799, iteration: 27103
loss: 0.9971507787704468,grad_norm: 0.8157020417089091, iteration: 27104
loss: 1.033261775970459,grad_norm: 0.999999271032295, iteration: 27105
loss: 0.9934220910072327,grad_norm: 0.8078008255752611, iteration: 27106
loss: 0.9796248078346252,grad_norm: 0.9161207676487648, iteration: 27107
loss: 1.052027940750122,grad_norm: 0.9999994162202936, iteration: 27108
loss: 1.0233222246170044,grad_norm: 0.9999991824498682, iteration: 27109
loss: 1.016465187072754,grad_norm: 0.9550903002140148, iteration: 27110
loss: 1.0028082132339478,grad_norm: 0.999999064222443, iteration: 27111
loss: 0.9748271107673645,grad_norm: 0.9540451494805046, iteration: 27112
loss: 1.0125586986541748,grad_norm: 0.9844367592252595, iteration: 27113
loss: 0.9960311651229858,grad_norm: 0.9999990626283219, iteration: 27114
loss: 0.9510498046875,grad_norm: 0.9999991538747821, iteration: 27115
loss: 1.0470447540283203,grad_norm: 0.9718276060326513, iteration: 27116
loss: 1.1240262985229492,grad_norm: 0.9999995151610077, iteration: 27117
loss: 0.9899214506149292,grad_norm: 0.9999992989992178, iteration: 27118
loss: 0.976880669593811,grad_norm: 0.9999990916211794, iteration: 27119
loss: 1.027561902999878,grad_norm: 0.9999992358288458, iteration: 27120
loss: 0.9949662089347839,grad_norm: 0.9959810276867211, iteration: 27121
loss: 1.0170567035675049,grad_norm: 0.9907553580034034, iteration: 27122
loss: 0.961761474609375,grad_norm: 0.852099998020567, iteration: 27123
loss: 1.0365259647369385,grad_norm: 0.9999992673453936, iteration: 27124
loss: 1.0736523866653442,grad_norm: 0.9999994988758718, iteration: 27125
loss: 1.0512834787368774,grad_norm: 0.9999990761774846, iteration: 27126
loss: 1.0279477834701538,grad_norm: 0.87163916478023, iteration: 27127
loss: 0.9999528527259827,grad_norm: 0.9999992015281489, iteration: 27128
loss: 1.0492554903030396,grad_norm: 0.9763253928293039, iteration: 27129
loss: 1.0427697896957397,grad_norm: 0.9999992322999757, iteration: 27130
loss: 1.0149723291397095,grad_norm: 0.9999991992772562, iteration: 27131
loss: 1.0145306587219238,grad_norm: 0.9902455759840212, iteration: 27132
loss: 0.9603883028030396,grad_norm: 0.9999991904575919, iteration: 27133
loss: 1.0403106212615967,grad_norm: 0.9999989936976149, iteration: 27134
loss: 1.053169846534729,grad_norm: 0.9999993944100527, iteration: 27135
loss: 0.9759669303894043,grad_norm: 0.9905334554383327, iteration: 27136
loss: 0.9901360273361206,grad_norm: 0.9857416578862429, iteration: 27137
loss: 0.9658417701721191,grad_norm: 0.9999990880585354, iteration: 27138
loss: 0.9862774610519409,grad_norm: 0.865927205248906, iteration: 27139
loss: 1.0364083051681519,grad_norm: 0.9999992101607489, iteration: 27140
loss: 0.9965261816978455,grad_norm: 0.9999989260212166, iteration: 27141
loss: 1.013240098953247,grad_norm: 0.9401806358089873, iteration: 27142
loss: 0.9749351739883423,grad_norm: 0.9938668617494801, iteration: 27143
loss: 1.002164602279663,grad_norm: 0.9999992498905154, iteration: 27144
loss: 1.0968084335327148,grad_norm: 0.9999996417252207, iteration: 27145
loss: 1.1029313802719116,grad_norm: 0.9999992746610907, iteration: 27146
loss: 1.0592231750488281,grad_norm: 0.9999998078241331, iteration: 27147
loss: 0.9912758469581604,grad_norm: 0.9385468639864679, iteration: 27148
loss: 1.016835331916809,grad_norm: 0.9809707280331309, iteration: 27149
loss: 0.986795961856842,grad_norm: 0.9407669836010054, iteration: 27150
loss: 1.0176750421524048,grad_norm: 0.9896526009536775, iteration: 27151
loss: 1.021599292755127,grad_norm: 0.8778660685610649, iteration: 27152
loss: 1.0067888498306274,grad_norm: 0.9999993668902319, iteration: 27153
loss: 0.9808152914047241,grad_norm: 0.8257548892534693, iteration: 27154
loss: 1.035156011581421,grad_norm: 0.9265551331584619, iteration: 27155
loss: 1.0037875175476074,grad_norm: 0.9999994144449766, iteration: 27156
loss: 1.0021189451217651,grad_norm: 0.9999992423415248, iteration: 27157
loss: 0.9812917709350586,grad_norm: 0.9999993111016998, iteration: 27158
loss: 0.9753724932670593,grad_norm: 0.9999991408299381, iteration: 27159
loss: 0.9679716229438782,grad_norm: 0.9999990693305256, iteration: 27160
loss: 0.9806007146835327,grad_norm: 0.9999991478677219, iteration: 27161
loss: 0.9782007932662964,grad_norm: 0.9560616795509245, iteration: 27162
loss: 1.0483825206756592,grad_norm: 0.9999992269840925, iteration: 27163
loss: 1.0212795734405518,grad_norm: 0.982536426505653, iteration: 27164
loss: 1.038719654083252,grad_norm: 0.9254110807935926, iteration: 27165
loss: 1.015012502670288,grad_norm: 0.9999992582208783, iteration: 27166
loss: 1.0491610765457153,grad_norm: 0.9999997491675805, iteration: 27167
loss: 0.9766610264778137,grad_norm: 0.9999992604682933, iteration: 27168
loss: 1.0121837854385376,grad_norm: 0.9999992110822777, iteration: 27169
loss: 1.039762020111084,grad_norm: 0.9639042446724844, iteration: 27170
loss: 1.007704734802246,grad_norm: 0.9776779503685731, iteration: 27171
loss: 0.9816566705703735,grad_norm: 0.9708280510789843, iteration: 27172
loss: 1.1802570819854736,grad_norm: 0.9999998877128771, iteration: 27173
loss: 1.0214874744415283,grad_norm: 0.9999991543591432, iteration: 27174
loss: 1.0325279235839844,grad_norm: 0.9999990594523628, iteration: 27175
loss: 0.9730398654937744,grad_norm: 0.9028915568142097, iteration: 27176
loss: 1.0424973964691162,grad_norm: 0.9999991066480712, iteration: 27177
loss: 1.0370326042175293,grad_norm: 0.9999998929837906, iteration: 27178
loss: 0.9670463800430298,grad_norm: 0.9369841956180074, iteration: 27179
loss: 0.9857503175735474,grad_norm: 0.9999998573703551, iteration: 27180
loss: 1.0043822526931763,grad_norm: 0.9999990201672814, iteration: 27181
loss: 1.0112117528915405,grad_norm: 0.9289473502840002, iteration: 27182
loss: 1.019972562789917,grad_norm: 0.999999728752025, iteration: 27183
loss: 1.0460107326507568,grad_norm: 0.9999995740253567, iteration: 27184
loss: 1.1274781227111816,grad_norm: 0.999999859970369, iteration: 27185
loss: 1.028640866279602,grad_norm: 0.9999992245655803, iteration: 27186
loss: 0.9638601541519165,grad_norm: 0.9999996539187829, iteration: 27187
loss: 1.027259349822998,grad_norm: 0.9999993774269316, iteration: 27188
loss: 1.0100938081741333,grad_norm: 0.9999989931099227, iteration: 27189
loss: 1.0385117530822754,grad_norm: 0.9999995137105135, iteration: 27190
loss: 1.0328137874603271,grad_norm: 0.9999990525785732, iteration: 27191
loss: 1.022581934928894,grad_norm: 0.9999992854097446, iteration: 27192
loss: 0.9801274538040161,grad_norm: 0.9999991455248473, iteration: 27193
loss: 1.0082045793533325,grad_norm: 0.9999992324091956, iteration: 27194
loss: 1.0473577976226807,grad_norm: 0.9999994275809911, iteration: 27195
loss: 0.9946941137313843,grad_norm: 0.9999991046739174, iteration: 27196
loss: 1.0316730737686157,grad_norm: 0.9999992401809719, iteration: 27197
loss: 1.0144044160842896,grad_norm: 0.9901263980329861, iteration: 27198
loss: 1.0324318408966064,grad_norm: 0.9999996737682121, iteration: 27199
loss: 0.9863328337669373,grad_norm: 0.9478295964453056, iteration: 27200
loss: 1.0087521076202393,grad_norm: 0.9999992367166566, iteration: 27201
loss: 1.0170592069625854,grad_norm: 0.999999118871549, iteration: 27202
loss: 1.0247507095336914,grad_norm: 0.9999990891729074, iteration: 27203
loss: 1.0005524158477783,grad_norm: 0.9976212517432108, iteration: 27204
loss: 1.0029540061950684,grad_norm: 0.9847632173121399, iteration: 27205
loss: 0.979741632938385,grad_norm: 0.9999990401649699, iteration: 27206
loss: 1.0554444789886475,grad_norm: 0.9999996945417181, iteration: 27207
loss: 1.016616940498352,grad_norm: 0.9999997281093094, iteration: 27208
loss: 1.0070000886917114,grad_norm: 0.9999993776783431, iteration: 27209
loss: 1.0210984945297241,grad_norm: 0.999999006418537, iteration: 27210
loss: 0.9687525033950806,grad_norm: 0.9999990783640044, iteration: 27211
loss: 1.015280842781067,grad_norm: 0.9999990875460347, iteration: 27212
loss: 1.0134302377700806,grad_norm: 0.9999996644348165, iteration: 27213
loss: 1.0413731336593628,grad_norm: 0.9999990111585735, iteration: 27214
loss: 1.0078281164169312,grad_norm: 0.999999076639257, iteration: 27215
loss: 1.0123134851455688,grad_norm: 0.9679408337930558, iteration: 27216
loss: 1.006536602973938,grad_norm: 0.9319296532747781, iteration: 27217
loss: 1.000238299369812,grad_norm: 0.9999990431864898, iteration: 27218
loss: 1.0138875246047974,grad_norm: 0.906048109069019, iteration: 27219
loss: 1.0090837478637695,grad_norm: 0.9999991110057768, iteration: 27220
loss: 1.044364094734192,grad_norm: 0.9999989751418344, iteration: 27221
loss: 1.0388811826705933,grad_norm: 0.9999994006669644, iteration: 27222
loss: 1.098800539970398,grad_norm: 0.9999998723239713, iteration: 27223
loss: 1.014103651046753,grad_norm: 0.9999995013869064, iteration: 27224
loss: 1.0179694890975952,grad_norm: 0.9999991189160639, iteration: 27225
loss: 1.0329731702804565,grad_norm: 0.9013399722612455, iteration: 27226
loss: 1.0497289896011353,grad_norm: 0.9999995737240966, iteration: 27227
loss: 1.0103198289871216,grad_norm: 0.9999992807091809, iteration: 27228
loss: 1.0548326969146729,grad_norm: 0.9999995045049076, iteration: 27229
loss: 1.0209065675735474,grad_norm: 0.9214602940659622, iteration: 27230
loss: 1.0567501783370972,grad_norm: 0.9999993735682268, iteration: 27231
loss: 0.9950201511383057,grad_norm: 0.9198919940433221, iteration: 27232
loss: 1.0176782608032227,grad_norm: 0.9302234067295089, iteration: 27233
loss: 1.0385282039642334,grad_norm: 0.9999992128347058, iteration: 27234
loss: 1.0265417098999023,grad_norm: 0.9999996863622918, iteration: 27235
loss: 1.000526785850525,grad_norm: 0.9999989467913419, iteration: 27236
loss: 1.0776926279067993,grad_norm: 0.999999478548295, iteration: 27237
loss: 0.9857887625694275,grad_norm: 0.9999991678426698, iteration: 27238
loss: 1.0598548650741577,grad_norm: 0.9999991496811212, iteration: 27239
loss: 1.0322620868682861,grad_norm: 0.9951808027043795, iteration: 27240
loss: 0.9916070699691772,grad_norm: 0.8167998488262815, iteration: 27241
loss: 1.0753620862960815,grad_norm: 0.9999995657900426, iteration: 27242
loss: 1.0582019090652466,grad_norm: 0.9999996314353311, iteration: 27243
loss: 1.0488691329956055,grad_norm: 0.9999992891281072, iteration: 27244
loss: 1.0052168369293213,grad_norm: 0.9767209343859096, iteration: 27245
loss: 0.9907752871513367,grad_norm: 0.9999992772780275, iteration: 27246
loss: 0.9919483065605164,grad_norm: 0.9999992804058573, iteration: 27247
loss: 1.0552256107330322,grad_norm: 0.9999995414557497, iteration: 27248
loss: 1.0433303117752075,grad_norm: 0.999999352704803, iteration: 27249
loss: 1.0119560956954956,grad_norm: 0.9999996190019352, iteration: 27250
loss: 1.0341243743896484,grad_norm: 0.9999995490122052, iteration: 27251
loss: 0.995134174823761,grad_norm: 0.9999990955346876, iteration: 27252
loss: 1.0131853818893433,grad_norm: 0.9999992469706411, iteration: 27253
loss: 1.0181033611297607,grad_norm: 0.9999996918586145, iteration: 27254
loss: 1.0514626502990723,grad_norm: 0.999999541137394, iteration: 27255
loss: 0.9963146448135376,grad_norm: 0.9999994085287016, iteration: 27256
loss: 0.9913633465766907,grad_norm: 0.9999991244918506, iteration: 27257
loss: 1.026902675628662,grad_norm: 0.999999645944775, iteration: 27258
loss: 0.9844987392425537,grad_norm: 0.9601694877726352, iteration: 27259
loss: 0.9960013628005981,grad_norm: 0.9999991904871535, iteration: 27260
loss: 0.9776359796524048,grad_norm: 0.9999991862053498, iteration: 27261
loss: 0.9755768179893494,grad_norm: 0.8037494213885358, iteration: 27262
loss: 1.0294296741485596,grad_norm: 0.9999991229366177, iteration: 27263
loss: 1.0430570840835571,grad_norm: 0.999999520991537, iteration: 27264
loss: 1.0574162006378174,grad_norm: 0.9999991009447856, iteration: 27265
loss: 1.0198936462402344,grad_norm: 0.9999996749352341, iteration: 27266
loss: 1.0611082315444946,grad_norm: 0.9999990790479064, iteration: 27267
loss: 1.0267937183380127,grad_norm: 0.9999992590576628, iteration: 27268
loss: 0.9558094143867493,grad_norm: 0.999999178676727, iteration: 27269
loss: 1.0311353206634521,grad_norm: 0.9999992549308211, iteration: 27270
loss: 1.0682291984558105,grad_norm: 0.9999994696451837, iteration: 27271
loss: 0.9871137142181396,grad_norm: 0.9999993584385324, iteration: 27272
loss: 1.0079185962677002,grad_norm: 0.9999991865803324, iteration: 27273
loss: 1.0277785062789917,grad_norm: 0.9959500739189862, iteration: 27274
loss: 1.0098336935043335,grad_norm: 0.999999240277837, iteration: 27275
loss: 1.0517749786376953,grad_norm: 0.9999993777012102, iteration: 27276
loss: 1.1154826879501343,grad_norm: 0.9999992070947323, iteration: 27277
loss: 1.0143916606903076,grad_norm: 0.9999990650245398, iteration: 27278
loss: 1.0032579898834229,grad_norm: 0.9999992794577258, iteration: 27279
loss: 0.9836254715919495,grad_norm: 0.9070403078587981, iteration: 27280
loss: 0.9817422032356262,grad_norm: 0.8574403626510779, iteration: 27281
loss: 0.957065224647522,grad_norm: 0.9901829756514027, iteration: 27282
loss: 1.0261263847351074,grad_norm: 0.9820807190877974, iteration: 27283
loss: 1.0556235313415527,grad_norm: 0.9868904652559688, iteration: 27284
loss: 1.0111584663391113,grad_norm: 0.8931826615435093, iteration: 27285
loss: 1.0249567031860352,grad_norm: 0.8998592128963694, iteration: 27286
loss: 1.0327914953231812,grad_norm: 0.9999992790846802, iteration: 27287
loss: 0.9921117424964905,grad_norm: 0.9999991058080254, iteration: 27288
loss: 1.0262460708618164,grad_norm: 0.999999494352437, iteration: 27289
loss: 1.0049439668655396,grad_norm: 0.7857186163867018, iteration: 27290
loss: 0.9903808832168579,grad_norm: 0.9999991181180471, iteration: 27291
loss: 1.0325828790664673,grad_norm: 0.9415128146029228, iteration: 27292
loss: 1.0580692291259766,grad_norm: 0.9999990259705043, iteration: 27293
loss: 1.0117206573486328,grad_norm: 0.9517191166476243, iteration: 27294
loss: 0.9889601469039917,grad_norm: 0.9999992567970316, iteration: 27295
loss: 1.0073679685592651,grad_norm: 0.9999991726515065, iteration: 27296
loss: 1.01771879196167,grad_norm: 0.9999991396875183, iteration: 27297
loss: 1.006792664527893,grad_norm: 0.9999996522301001, iteration: 27298
loss: 1.042325735092163,grad_norm: 0.9158657549384193, iteration: 27299
loss: 1.0240424871444702,grad_norm: 0.9931120496108429, iteration: 27300
loss: 1.0162255764007568,grad_norm: 0.9999991662296501, iteration: 27301
loss: 1.0071951150894165,grad_norm: 0.8073870848006791, iteration: 27302
loss: 0.9742726683616638,grad_norm: 0.9999992058488897, iteration: 27303
loss: 1.0116664171218872,grad_norm: 0.8090620339123503, iteration: 27304
loss: 0.9992953538894653,grad_norm: 0.9308192839092196, iteration: 27305
loss: 1.033666968345642,grad_norm: 0.8651196046276582, iteration: 27306
loss: 1.0153298377990723,grad_norm: 0.9999992533399958, iteration: 27307
loss: 1.0027118921279907,grad_norm: 0.9999992391736126, iteration: 27308
loss: 0.9909206628799438,grad_norm: 0.9999991113118241, iteration: 27309
loss: 0.972257673740387,grad_norm: 0.9999990093170529, iteration: 27310
loss: 1.0228724479675293,grad_norm: 0.9999997514018824, iteration: 27311
loss: 0.9938808679580688,grad_norm: 0.9999990967005117, iteration: 27312
loss: 0.9835328459739685,grad_norm: 0.9616867460222654, iteration: 27313
loss: 1.0367577075958252,grad_norm: 0.9999991505560875, iteration: 27314
loss: 1.0021439790725708,grad_norm: 0.99999906238711, iteration: 27315
loss: 1.0271843671798706,grad_norm: 0.9999990559849139, iteration: 27316
loss: 1.018196702003479,grad_norm: 0.9999992914577912, iteration: 27317
loss: 1.037804126739502,grad_norm: 0.9999994767228775, iteration: 27318
loss: 0.9956962466239929,grad_norm: 0.9718714958899832, iteration: 27319
loss: 1.0277564525604248,grad_norm: 0.9495783834535974, iteration: 27320
loss: 1.0035775899887085,grad_norm: 0.9999994737500133, iteration: 27321
loss: 1.0214033126831055,grad_norm: 0.9999991918910832, iteration: 27322
loss: 1.0142356157302856,grad_norm: 0.9999991440636965, iteration: 27323
loss: 0.9754738211631775,grad_norm: 0.9999991171090252, iteration: 27324
loss: 0.9768443703651428,grad_norm: 0.9999993197944422, iteration: 27325
loss: 0.986686110496521,grad_norm: 0.9932490756288596, iteration: 27326
loss: 1.0254533290863037,grad_norm: 0.8909733148077542, iteration: 27327
loss: 1.047339677810669,grad_norm: 0.9499148217893666, iteration: 27328
loss: 1.0325021743774414,grad_norm: 0.9754162240239673, iteration: 27329
loss: 1.0380953550338745,grad_norm: 0.8697522355976418, iteration: 27330
loss: 1.071833610534668,grad_norm: 0.836541779856428, iteration: 27331
loss: 1.0338119268417358,grad_norm: 0.9657692936775181, iteration: 27332
loss: 1.0402424335479736,grad_norm: 0.960911935758895, iteration: 27333
loss: 0.9886711239814758,grad_norm: 0.9999992567274497, iteration: 27334
loss: 0.9944201707839966,grad_norm: 0.9871630335275025, iteration: 27335
loss: 1.010387659072876,grad_norm: 0.9999991134999573, iteration: 27336
loss: 0.9984097480773926,grad_norm: 0.9542976402168467, iteration: 27337
loss: 1.0489879846572876,grad_norm: 0.9999991443474199, iteration: 27338
loss: 1.009451985359192,grad_norm: 0.9770876224994065, iteration: 27339
loss: 0.9914930462837219,grad_norm: 0.983169007045929, iteration: 27340
loss: 1.0383743047714233,grad_norm: 0.9848454640686463, iteration: 27341
loss: 1.011926293373108,grad_norm: 0.9999991704794691, iteration: 27342
loss: 1.0048471689224243,grad_norm: 0.8910593078528599, iteration: 27343
loss: 0.9823525547981262,grad_norm: 0.8983315300222329, iteration: 27344
loss: 0.9951463341712952,grad_norm: 0.9999990718019319, iteration: 27345
loss: 1.0161570310592651,grad_norm: 0.8202458026920171, iteration: 27346
loss: 0.9575015902519226,grad_norm: 0.9580310297900991, iteration: 27347
loss: 1.0311760902404785,grad_norm: 0.9999990969141564, iteration: 27348
loss: 1.0211824178695679,grad_norm: 0.9999992195616839, iteration: 27349
loss: 1.0198041200637817,grad_norm: 0.999999173873619, iteration: 27350
loss: 0.9922832250595093,grad_norm: 0.8946093919374818, iteration: 27351
loss: 1.010131597518921,grad_norm: 0.9999991343554441, iteration: 27352
loss: 0.9825979471206665,grad_norm: 0.9999991569791679, iteration: 27353
loss: 1.0251821279525757,grad_norm: 0.9999992884080647, iteration: 27354
loss: 1.0423862934112549,grad_norm: 0.9999997134048252, iteration: 27355
loss: 1.0231190919876099,grad_norm: 0.769526927697151, iteration: 27356
loss: 0.974822461605072,grad_norm: 0.8572063025938864, iteration: 27357
loss: 0.9380096793174744,grad_norm: 0.9999991946628074, iteration: 27358
loss: 1.0127143859863281,grad_norm: 0.9819557384549581, iteration: 27359
loss: 1.0620075464248657,grad_norm: 0.9999997294500635, iteration: 27360
loss: 1.0171701908111572,grad_norm: 0.9999990117491141, iteration: 27361
loss: 0.9867112636566162,grad_norm: 0.9194512070737489, iteration: 27362
loss: 0.9802072048187256,grad_norm: 0.9999991743132681, iteration: 27363
loss: 0.986629843711853,grad_norm: 0.9662334349303524, iteration: 27364
loss: 0.9948104619979858,grad_norm: 0.9999990350466175, iteration: 27365
loss: 0.9923977851867676,grad_norm: 0.9999991068015442, iteration: 27366
loss: 1.0832290649414062,grad_norm: 0.9999995121109425, iteration: 27367
loss: 0.9833201766014099,grad_norm: 0.9926105315106791, iteration: 27368
loss: 1.018528699874878,grad_norm: 0.9999990155692551, iteration: 27369
loss: 1.0416232347488403,grad_norm: 0.9005536225371236, iteration: 27370
loss: 1.0300970077514648,grad_norm: 0.9999993566613974, iteration: 27371
loss: 1.000365972518921,grad_norm: 0.9999989594678066, iteration: 27372
loss: 0.9895433783531189,grad_norm: 0.9999997077178612, iteration: 27373
loss: 1.0317002534866333,grad_norm: 0.9999991912770173, iteration: 27374
loss: 1.0079903602600098,grad_norm: 0.9999990993458863, iteration: 27375
loss: 1.0034942626953125,grad_norm: 0.9854697164276846, iteration: 27376
loss: 1.0543642044067383,grad_norm: 0.8436351026618065, iteration: 27377
loss: 1.0167405605316162,grad_norm: 0.9030442512180239, iteration: 27378
loss: 0.9811113476753235,grad_norm: 0.9967620161551948, iteration: 27379
loss: 1.053717851638794,grad_norm: 0.9999994143936859, iteration: 27380
loss: 0.9675454497337341,grad_norm: 0.8470985113772084, iteration: 27381
loss: 1.0477325916290283,grad_norm: 0.9999992626225775, iteration: 27382
loss: 1.0255192518234253,grad_norm: 0.9287098893114831, iteration: 27383
loss: 0.9970161318778992,grad_norm: 0.9118127344692324, iteration: 27384
loss: 1.0075225830078125,grad_norm: 0.9999996591498439, iteration: 27385
loss: 1.0917710065841675,grad_norm: 0.9999992136087068, iteration: 27386
loss: 1.0259182453155518,grad_norm: 0.9999993847361874, iteration: 27387
loss: 0.9990560412406921,grad_norm: 0.9999990827381194, iteration: 27388
loss: 1.0424262285232544,grad_norm: 0.9999997347797245, iteration: 27389
loss: 0.9834012389183044,grad_norm: 0.8878285052662318, iteration: 27390
loss: 1.0255017280578613,grad_norm: 0.9992185653747646, iteration: 27391
loss: 1.0367839336395264,grad_norm: 0.831737207890614, iteration: 27392
loss: 1.0226680040359497,grad_norm: 0.8790102533743755, iteration: 27393
loss: 1.0420844554901123,grad_norm: 0.9999990615520493, iteration: 27394
loss: 0.9969790577888489,grad_norm: 0.8744720445123642, iteration: 27395
loss: 1.043135166168213,grad_norm: 0.9999990512346205, iteration: 27396
loss: 1.0091785192489624,grad_norm: 0.826539377868218, iteration: 27397
loss: 1.0038669109344482,grad_norm: 0.9999992373299278, iteration: 27398
loss: 1.0335144996643066,grad_norm: 0.9999992432814752, iteration: 27399
loss: 1.0066605806350708,grad_norm: 0.9939135323705993, iteration: 27400
loss: 1.0408200025558472,grad_norm: 0.9999998911051264, iteration: 27401
loss: 1.0212771892547607,grad_norm: 0.9807346207206981, iteration: 27402
loss: 1.0347834825515747,grad_norm: 0.999998998508829, iteration: 27403
loss: 1.009730577468872,grad_norm: 0.9619340050483428, iteration: 27404
loss: 1.0625431537628174,grad_norm: 0.9999992398423557, iteration: 27405
loss: 0.9945229887962341,grad_norm: 0.9999991813300162, iteration: 27406
loss: 0.9813461899757385,grad_norm: 0.9318167906616618, iteration: 27407
loss: 1.0107897520065308,grad_norm: 0.9999991592891798, iteration: 27408
loss: 1.0107297897338867,grad_norm: 0.9999991725344397, iteration: 27409
loss: 0.9903106689453125,grad_norm: 0.9309047466371023, iteration: 27410
loss: 1.0233632326126099,grad_norm: 0.9859374670158881, iteration: 27411
loss: 1.0682885646820068,grad_norm: 0.9999996242357647, iteration: 27412
loss: 1.0180933475494385,grad_norm: 0.8611051272938866, iteration: 27413
loss: 1.0324758291244507,grad_norm: 0.9999990825699799, iteration: 27414
loss: 1.0139217376708984,grad_norm: 0.9768540160299016, iteration: 27415
loss: 1.0153014659881592,grad_norm: 0.9999992310791784, iteration: 27416
loss: 0.9890881180763245,grad_norm: 0.9999989931027772, iteration: 27417
loss: 1.0139068365097046,grad_norm: 0.9999991473460081, iteration: 27418
loss: 1.080291509628296,grad_norm: 0.9999998436949039, iteration: 27419
loss: 1.0219371318817139,grad_norm: 0.9999991187125262, iteration: 27420
loss: 0.9880706667900085,grad_norm: 0.9238381140737446, iteration: 27421
loss: 0.986913800239563,grad_norm: 0.9999990693178576, iteration: 27422
loss: 1.0161176919937134,grad_norm: 0.9999990602803198, iteration: 27423
loss: 1.0577166080474854,grad_norm: 0.9999993316387918, iteration: 27424
loss: 1.1041005849838257,grad_norm: 0.9999999243823317, iteration: 27425
loss: 1.002326488494873,grad_norm: 0.9999991219645014, iteration: 27426
loss: 1.0393894910812378,grad_norm: 0.9999992249509714, iteration: 27427
loss: 1.0909281969070435,grad_norm: 0.9999989722800917, iteration: 27428
loss: 1.03208589553833,grad_norm: 0.9999997075283131, iteration: 27429
loss: 1.0286041498184204,grad_norm: 0.9999991366207615, iteration: 27430
loss: 1.0028592348098755,grad_norm: 0.999999799352822, iteration: 27431
loss: 1.0326544046401978,grad_norm: 0.9999997924107589, iteration: 27432
loss: 1.0014694929122925,grad_norm: 0.856662577639919, iteration: 27433
loss: 1.0058640241622925,grad_norm: 0.9999991330910154, iteration: 27434
loss: 0.9815266132354736,grad_norm: 0.9999991407018917, iteration: 27435
loss: 1.0498846769332886,grad_norm: 0.9999991595468553, iteration: 27436
loss: 1.0321145057678223,grad_norm: 0.9372849977475305, iteration: 27437
loss: 1.0572251081466675,grad_norm: 0.9999997137998092, iteration: 27438
loss: 1.0067983865737915,grad_norm: 0.9593015408024045, iteration: 27439
loss: 1.0311484336853027,grad_norm: 0.9714386531379333, iteration: 27440
loss: 1.0071916580200195,grad_norm: 0.9999991624730703, iteration: 27441
loss: 1.0162709951400757,grad_norm: 0.9999990371116368, iteration: 27442
loss: 1.0031044483184814,grad_norm: 0.9999990025263389, iteration: 27443
loss: 0.9897258281707764,grad_norm: 0.8675561639694652, iteration: 27444
loss: 1.0167615413665771,grad_norm: 0.9999994229696263, iteration: 27445
loss: 1.033467173576355,grad_norm: 0.9999995126149505, iteration: 27446
loss: 1.0171785354614258,grad_norm: 0.9999997092588104, iteration: 27447
loss: 1.0682989358901978,grad_norm: 0.9999998359278267, iteration: 27448
loss: 1.082788348197937,grad_norm: 0.999999650431734, iteration: 27449
loss: 1.019005537033081,grad_norm: 0.9999990946942907, iteration: 27450
loss: 1.07138991355896,grad_norm: 0.9999993435901369, iteration: 27451
loss: 1.0106257200241089,grad_norm: 0.9999993361870472, iteration: 27452
loss: 1.0253891944885254,grad_norm: 0.9999991058055845, iteration: 27453
loss: 1.0042935609817505,grad_norm: 0.9288041868184763, iteration: 27454
loss: 0.9892613887786865,grad_norm: 0.9999991184964588, iteration: 27455
loss: 1.038118600845337,grad_norm: 0.99999911770193, iteration: 27456
loss: 1.0333638191223145,grad_norm: 0.9999996998678614, iteration: 27457
loss: 1.0718599557876587,grad_norm: 0.999999807212224, iteration: 27458
loss: 1.0046215057373047,grad_norm: 0.9999990985699868, iteration: 27459
loss: 1.00754976272583,grad_norm: 0.9999997496978584, iteration: 27460
loss: 1.016876459121704,grad_norm: 0.881798717821478, iteration: 27461
loss: 1.0227411985397339,grad_norm: 0.9468304301186152, iteration: 27462
loss: 1.0587375164031982,grad_norm: 0.999999177348501, iteration: 27463
loss: 1.002251386642456,grad_norm: 0.9999997085863482, iteration: 27464
loss: 1.0183817148208618,grad_norm: 0.9999990945863667, iteration: 27465
loss: 1.0209988355636597,grad_norm: 0.9577153696880075, iteration: 27466
loss: 1.0248348712921143,grad_norm: 0.9999990844689732, iteration: 27467
loss: 1.0118972063064575,grad_norm: 0.9231453337078028, iteration: 27468
loss: 1.03120756149292,grad_norm: 0.9999990009217298, iteration: 27469
loss: 1.0074838399887085,grad_norm: 0.9999992753107698, iteration: 27470
loss: 0.9930800199508667,grad_norm: 0.9999993136525702, iteration: 27471
loss: 0.9948061108589172,grad_norm: 0.9999989907977118, iteration: 27472
loss: 0.995422899723053,grad_norm: 0.9999991459325397, iteration: 27473
loss: 1.0389540195465088,grad_norm: 0.9999991828841237, iteration: 27474
loss: 1.0411949157714844,grad_norm: 0.9999998914515971, iteration: 27475
loss: 1.0681082010269165,grad_norm: 0.9999997496264955, iteration: 27476
loss: 1.045453429222107,grad_norm: 0.9999995856491082, iteration: 27477
loss: 0.9704364538192749,grad_norm: 0.9999990298196736, iteration: 27478
loss: 1.0377922058105469,grad_norm: 0.9999990746865246, iteration: 27479
loss: 0.9752330780029297,grad_norm: 0.9999995153310623, iteration: 27480
loss: 1.0101985931396484,grad_norm: 0.9999991417992966, iteration: 27481
loss: 0.9997409582138062,grad_norm: 0.9999991602991941, iteration: 27482
loss: 1.0103296041488647,grad_norm: 0.8905370430271803, iteration: 27483
loss: 0.9597630500793457,grad_norm: 0.9999991203831055, iteration: 27484
loss: 1.041213870048523,grad_norm: 0.9999995198360596, iteration: 27485
loss: 1.038682460784912,grad_norm: 0.9999991122263371, iteration: 27486
loss: 1.0248831510543823,grad_norm: 0.999999280345605, iteration: 27487
loss: 0.9782810211181641,grad_norm: 0.9416337751739879, iteration: 27488
loss: 0.9937570691108704,grad_norm: 0.9999990738941151, iteration: 27489
loss: 1.0017322301864624,grad_norm: 0.9094522500029922, iteration: 27490
loss: 0.9984824061393738,grad_norm: 0.9999994380094309, iteration: 27491
loss: 1.013333797454834,grad_norm: 0.999999939709143, iteration: 27492
loss: 0.9870297908782959,grad_norm: 0.9999990083830521, iteration: 27493
loss: 1.0084673166275024,grad_norm: 0.9999993654189517, iteration: 27494
loss: 0.9493120312690735,grad_norm: 0.9999993959978557, iteration: 27495
loss: 1.0064634084701538,grad_norm: 0.9214323502491286, iteration: 27496
loss: 1.0203856229782104,grad_norm: 0.9999991088837803, iteration: 27497
loss: 1.0228623151779175,grad_norm: 0.9999993773364684, iteration: 27498
loss: 1.0223512649536133,grad_norm: 0.9726718847724516, iteration: 27499
loss: 1.0163336992263794,grad_norm: 0.9061993175058137, iteration: 27500
loss: 1.0120259523391724,grad_norm: 0.9999994099122868, iteration: 27501
loss: 1.0544078350067139,grad_norm: 0.9999994470736194, iteration: 27502
loss: 0.9669076204299927,grad_norm: 0.9999990056961006, iteration: 27503
loss: 1.0616506338119507,grad_norm: 0.9999992846839656, iteration: 27504
loss: 1.0015177726745605,grad_norm: 0.9999990572280825, iteration: 27505
loss: 1.0137492418289185,grad_norm: 0.9999994040285797, iteration: 27506
loss: 0.9900251626968384,grad_norm: 0.999999095316358, iteration: 27507
loss: 0.9953455328941345,grad_norm: 0.9999991407874472, iteration: 27508
loss: 0.9813425540924072,grad_norm: 0.9906891319132094, iteration: 27509
loss: 1.0101655721664429,grad_norm: 0.9999991455655205, iteration: 27510
loss: 1.0030075311660767,grad_norm: 0.999998969569346, iteration: 27511
loss: 0.9872758388519287,grad_norm: 0.999999182425703, iteration: 27512
loss: 1.0230907201766968,grad_norm: 0.9999990240421843, iteration: 27513
loss: 1.0300413370132446,grad_norm: 0.999999771476295, iteration: 27514
loss: 1.0191415548324585,grad_norm: 0.9999992526274054, iteration: 27515
loss: 1.1234077215194702,grad_norm: 0.9999998601461112, iteration: 27516
loss: 1.0362435579299927,grad_norm: 0.9999993928963298, iteration: 27517
loss: 1.0301761627197266,grad_norm: 0.9999993490516105, iteration: 27518
loss: 1.094682216644287,grad_norm: 0.9999992114785193, iteration: 27519
loss: 1.005914330482483,grad_norm: 0.8033719578060343, iteration: 27520
loss: 1.038973331451416,grad_norm: 0.9999994641705269, iteration: 27521
loss: 1.028430700302124,grad_norm: 0.9942620731821314, iteration: 27522
loss: 0.9781585931777954,grad_norm: 0.9422208765231741, iteration: 27523
loss: 0.9709948897361755,grad_norm: 0.8869586391978899, iteration: 27524
loss: 1.0208882093429565,grad_norm: 0.9999991766423404, iteration: 27525
loss: 1.0016714334487915,grad_norm: 0.9999993458086847, iteration: 27526
loss: 1.0130163431167603,grad_norm: 0.9559993957213805, iteration: 27527
loss: 0.9839069247245789,grad_norm: 0.999999505039902, iteration: 27528
loss: 1.0173441171646118,grad_norm: 0.9817332317751593, iteration: 27529
loss: 0.999616801738739,grad_norm: 0.9698044842487299, iteration: 27530
loss: 1.053002953529358,grad_norm: 0.7690349403354921, iteration: 27531
loss: 1.0152662992477417,grad_norm: 0.9999989951338659, iteration: 27532
loss: 1.0116826295852661,grad_norm: 0.9999991829170543, iteration: 27533
loss: 0.9790086150169373,grad_norm: 0.9295631439133444, iteration: 27534
loss: 1.0121731758117676,grad_norm: 0.9517466171754251, iteration: 27535
loss: 1.031333565711975,grad_norm: 0.9999996320484529, iteration: 27536
loss: 1.040083408355713,grad_norm: 0.9999993868842483, iteration: 27537
loss: 1.0212063789367676,grad_norm: 0.9999992666965688, iteration: 27538
loss: 1.0262211561203003,grad_norm: 0.9999993392037124, iteration: 27539
loss: 1.04630708694458,grad_norm: 0.9288096099788608, iteration: 27540
loss: 1.0191984176635742,grad_norm: 0.9999991262506792, iteration: 27541
loss: 1.0645685195922852,grad_norm: 0.9999997613275468, iteration: 27542
loss: 1.0328599214553833,grad_norm: 0.9999992412666681, iteration: 27543
loss: 0.9707668423652649,grad_norm: 0.9999990937680622, iteration: 27544
loss: 1.0015974044799805,grad_norm: 0.9171695588057082, iteration: 27545
loss: 1.028159499168396,grad_norm: 0.7694168652495392, iteration: 27546
loss: 1.0203896760940552,grad_norm: 0.9999995128679303, iteration: 27547
loss: 0.9886757731437683,grad_norm: 0.9999991431014077, iteration: 27548
loss: 1.0188544988632202,grad_norm: 0.9574469747167674, iteration: 27549
loss: 1.0053285360336304,grad_norm: 0.8550551121655418, iteration: 27550
loss: 0.9846484661102295,grad_norm: 0.9999991582226583, iteration: 27551
loss: 0.9920392632484436,grad_norm: 0.9899499092785391, iteration: 27552
loss: 1.0114295482635498,grad_norm: 0.9999992299297341, iteration: 27553
loss: 1.034348487854004,grad_norm: 0.9425421313426333, iteration: 27554
loss: 1.018843412399292,grad_norm: 0.8276145120714473, iteration: 27555
loss: 1.0325076580047607,grad_norm: 0.9999990104018481, iteration: 27556
loss: 1.0097994804382324,grad_norm: 0.9999989726333601, iteration: 27557
loss: 1.0477558374404907,grad_norm: 0.9999995137070818, iteration: 27558
loss: 0.9907875657081604,grad_norm: 0.8504983454311908, iteration: 27559
loss: 1.0221130847930908,grad_norm: 0.9999989824339631, iteration: 27560
loss: 1.0512906312942505,grad_norm: 0.999999613234925, iteration: 27561
loss: 1.000303030014038,grad_norm: 0.8198097493756477, iteration: 27562
loss: 0.9964192509651184,grad_norm: 0.9999992267111046, iteration: 27563
loss: 1.027718424797058,grad_norm: 0.870568710825495, iteration: 27564
loss: 1.0080726146697998,grad_norm: 0.9999991054031355, iteration: 27565
loss: 1.0330661535263062,grad_norm: 0.8088896448285002, iteration: 27566
loss: 0.9972112774848938,grad_norm: 0.9999989346343675, iteration: 27567
loss: 0.9844287037849426,grad_norm: 0.9999994388843598, iteration: 27568
loss: 0.9905699491500854,grad_norm: 0.9999993743463864, iteration: 27569
loss: 1.0310131311416626,grad_norm: 0.9999992483759219, iteration: 27570
loss: 1.0662628412246704,grad_norm: 0.9999995913823718, iteration: 27571
loss: 0.9970154762268066,grad_norm: 0.9101221694689878, iteration: 27572
loss: 0.9929794669151306,grad_norm: 0.9999992380502863, iteration: 27573
loss: 1.0567262172698975,grad_norm: 0.9999995959666068, iteration: 27574
loss: 1.0179047584533691,grad_norm: 0.9999995888626381, iteration: 27575
loss: 1.044312596321106,grad_norm: 0.9601701620876797, iteration: 27576
loss: 1.0275413990020752,grad_norm: 0.999999484891179, iteration: 27577
loss: 1.0215781927108765,grad_norm: 0.9999991864901631, iteration: 27578
loss: 1.036490797996521,grad_norm: 0.999999173720907, iteration: 27579
loss: 0.9814267158508301,grad_norm: 0.9999991171396071, iteration: 27580
loss: 1.017635464668274,grad_norm: 0.997054092855331, iteration: 27581
loss: 0.969664454460144,grad_norm: 0.9999992199711073, iteration: 27582
loss: 1.0492459535598755,grad_norm: 0.99999935117032, iteration: 27583
loss: 1.0414302349090576,grad_norm: 0.999999587849165, iteration: 27584
loss: 1.03434419631958,grad_norm: 0.9999996510664693, iteration: 27585
loss: 1.0121790170669556,grad_norm: 0.9999994958210203, iteration: 27586
loss: 1.0317747592926025,grad_norm: 0.9999990248534812, iteration: 27587
loss: 1.0402297973632812,grad_norm: 0.8107599623818658, iteration: 27588
loss: 0.9939970970153809,grad_norm: 0.999999075566346, iteration: 27589
loss: 1.025022029876709,grad_norm: 0.9999994306174513, iteration: 27590
loss: 0.99986332654953,grad_norm: 0.9999990975054462, iteration: 27591
loss: 1.065791130065918,grad_norm: 0.9999994467785069, iteration: 27592
loss: 0.9843059182167053,grad_norm: 0.9820658584091407, iteration: 27593
loss: 0.9844459295272827,grad_norm: 0.9149843122118295, iteration: 27594
loss: 1.0263015031814575,grad_norm: 0.9999995860553362, iteration: 27595
loss: 1.023882508277893,grad_norm: 0.999999344386616, iteration: 27596
loss: 1.123197317123413,grad_norm: 0.9999991429829179, iteration: 27597
loss: 0.9722192883491516,grad_norm: 0.9999992632883119, iteration: 27598
loss: 1.0063966512680054,grad_norm: 0.9999989886996227, iteration: 27599
loss: 1.0476254224777222,grad_norm: 0.9999996420808113, iteration: 27600
loss: 0.9797425270080566,grad_norm: 0.999999086275378, iteration: 27601
loss: 1.115305781364441,grad_norm: 0.9999993528934311, iteration: 27602
loss: 1.00938081741333,grad_norm: 0.9999991880081878, iteration: 27603
loss: 1.0250370502471924,grad_norm: 0.9999992389641575, iteration: 27604
loss: 1.0192409753799438,grad_norm: 0.9999991575757792, iteration: 27605
loss: 1.0234977006912231,grad_norm: 0.9999995195836083, iteration: 27606
loss: 1.0021283626556396,grad_norm: 0.9999993045726632, iteration: 27607
loss: 0.9790585041046143,grad_norm: 0.999999108947227, iteration: 27608
loss: 0.9980732798576355,grad_norm: 0.9387065165381722, iteration: 27609
loss: 1.0303500890731812,grad_norm: 0.9999991318740905, iteration: 27610
loss: 1.1824278831481934,grad_norm: 0.9999997814485413, iteration: 27611
loss: 1.0095412731170654,grad_norm: 0.9999995459557759, iteration: 27612
loss: 1.036516785621643,grad_norm: 0.9999989893339077, iteration: 27613
loss: 1.1154435873031616,grad_norm: 0.999999761051028, iteration: 27614
loss: 1.0052850246429443,grad_norm: 0.9999991065288328, iteration: 27615
loss: 1.0863399505615234,grad_norm: 0.9999994799638661, iteration: 27616
loss: 0.9964256286621094,grad_norm: 0.9999989538831212, iteration: 27617
loss: 1.1854184865951538,grad_norm: 0.9999998005414379, iteration: 27618
loss: 1.0274877548217773,grad_norm: 0.9999992356572138, iteration: 27619
loss: 1.0284284353256226,grad_norm: 0.9999994895519854, iteration: 27620
loss: 0.9616997241973877,grad_norm: 0.9999993038257567, iteration: 27621
loss: 1.0110751390457153,grad_norm: 0.9999992020412561, iteration: 27622
loss: 0.9800217747688293,grad_norm: 0.9999993733720247, iteration: 27623
loss: 0.9781474471092224,grad_norm: 0.9195963653599006, iteration: 27624
loss: 1.011573076248169,grad_norm: 0.9999994633661995, iteration: 27625
loss: 0.9911263585090637,grad_norm: 0.8474942103892212, iteration: 27626
loss: 1.0743095874786377,grad_norm: 0.9999998157694134, iteration: 27627
loss: 1.0065139532089233,grad_norm: 0.999999285312005, iteration: 27628
loss: 1.0034730434417725,grad_norm: 0.999999374132135, iteration: 27629
loss: 1.100441336631775,grad_norm: 0.9999996375510839, iteration: 27630
loss: 1.0544971227645874,grad_norm: 0.9762370154038058, iteration: 27631
loss: 0.9885368943214417,grad_norm: 0.9999990828943395, iteration: 27632
loss: 1.0305474996566772,grad_norm: 0.9999991720244695, iteration: 27633
loss: 0.9609813690185547,grad_norm: 0.9999991469152929, iteration: 27634
loss: 1.0028001070022583,grad_norm: 0.9999990565729034, iteration: 27635
loss: 0.9890232086181641,grad_norm: 0.9887749252721449, iteration: 27636
loss: 1.1402100324630737,grad_norm: 0.9999998242941868, iteration: 27637
loss: 1.0376267433166504,grad_norm: 0.9999991621420521, iteration: 27638
loss: 1.0014303922653198,grad_norm: 0.9999990305355567, iteration: 27639
loss: 1.0115299224853516,grad_norm: 0.9216989946405506, iteration: 27640
loss: 1.0251774787902832,grad_norm: 0.9999991461399368, iteration: 27641
loss: 1.0481077432632446,grad_norm: 0.9999993812117687, iteration: 27642
loss: 1.0072221755981445,grad_norm: 0.9999990612249717, iteration: 27643
loss: 0.9407548904418945,grad_norm: 0.9999992742292008, iteration: 27644
loss: 1.057592511177063,grad_norm: 0.999999714281594, iteration: 27645
loss: 0.979141891002655,grad_norm: 0.9999991818467302, iteration: 27646
loss: 1.0112048387527466,grad_norm: 0.999998982741409, iteration: 27647
loss: 0.9902817010879517,grad_norm: 0.9999996512337417, iteration: 27648
loss: 1.030677318572998,grad_norm: 0.9999990969086352, iteration: 27649
loss: 0.9361981749534607,grad_norm: 0.824726362076557, iteration: 27650
loss: 1.0322341918945312,grad_norm: 0.936790804023388, iteration: 27651
loss: 1.0090858936309814,grad_norm: 0.9999993527673594, iteration: 27652
loss: 1.0452195405960083,grad_norm: 0.9999989763891567, iteration: 27653
loss: 1.0245001316070557,grad_norm: 0.8932008814083456, iteration: 27654
loss: 0.9785813689231873,grad_norm: 0.9999991345532387, iteration: 27655
loss: 1.0171468257904053,grad_norm: 0.999999159092696, iteration: 27656
loss: 1.0553663969039917,grad_norm: 0.999999229199583, iteration: 27657
loss: 0.9701629281044006,grad_norm: 0.9143159538618798, iteration: 27658
loss: 0.997868001461029,grad_norm: 0.9398874833434829, iteration: 27659
loss: 1.0234711170196533,grad_norm: 0.9999996634457059, iteration: 27660
loss: 1.0673763751983643,grad_norm: 0.9999996102704813, iteration: 27661
loss: 0.995311975479126,grad_norm: 0.8871128478907062, iteration: 27662
loss: 1.0175561904907227,grad_norm: 0.9130646117470765, iteration: 27663
loss: 1.0127063989639282,grad_norm: 0.9999991562228489, iteration: 27664
loss: 1.0376230478286743,grad_norm: 0.8823746617226823, iteration: 27665
loss: 0.9660949110984802,grad_norm: 0.994466915799135, iteration: 27666
loss: 1.0050932168960571,grad_norm: 0.999999075087965, iteration: 27667
loss: 1.0395796298980713,grad_norm: 0.9999994785125546, iteration: 27668
loss: 1.0064438581466675,grad_norm: 0.9999993894148476, iteration: 27669
loss: 0.9797691106796265,grad_norm: 0.9999996637802584, iteration: 27670
loss: 1.0212706327438354,grad_norm: 0.8669675508694341, iteration: 27671
loss: 1.025804877281189,grad_norm: 0.9999990665920381, iteration: 27672
loss: 0.9872987270355225,grad_norm: 0.9077273396384379, iteration: 27673
loss: 1.0592371225357056,grad_norm: 0.9999994376732928, iteration: 27674
loss: 1.0173906087875366,grad_norm: 0.9999991800951201, iteration: 27675
loss: 0.9832571148872375,grad_norm: 0.9999992002178248, iteration: 27676
loss: 1.0149847269058228,grad_norm: 0.9999993335426736, iteration: 27677
loss: 0.9959297776222229,grad_norm: 0.9999991036587013, iteration: 27678
loss: 1.0273574590682983,grad_norm: 0.9999997178823483, iteration: 27679
loss: 1.0471243858337402,grad_norm: 0.9999991165062857, iteration: 27680
loss: 0.9911507964134216,grad_norm: 0.9999992775117195, iteration: 27681
loss: 1.0522853136062622,grad_norm: 0.9999991976570816, iteration: 27682
loss: 0.984355628490448,grad_norm: 0.9999991246826413, iteration: 27683
loss: 0.9825415015220642,grad_norm: 0.9999992303223205, iteration: 27684
loss: 1.026593565940857,grad_norm: 0.9215576806231796, iteration: 27685
loss: 1.0036556720733643,grad_norm: 0.9999992475144283, iteration: 27686
loss: 1.011620044708252,grad_norm: 0.9999990239488317, iteration: 27687
loss: 1.010740041732788,grad_norm: 0.9999995190703794, iteration: 27688
loss: 1.0515410900115967,grad_norm: 0.9503823298069469, iteration: 27689
loss: 1.0033247470855713,grad_norm: 0.9586463044074962, iteration: 27690
loss: 1.0269954204559326,grad_norm: 0.9505266895899669, iteration: 27691
loss: 1.0077459812164307,grad_norm: 0.9999990600004572, iteration: 27692
loss: 1.0028407573699951,grad_norm: 0.9999992493575797, iteration: 27693
loss: 1.0151890516281128,grad_norm: 0.9999998878048746, iteration: 27694
loss: 1.1259522438049316,grad_norm: 0.9999994959408054, iteration: 27695
loss: 0.990727424621582,grad_norm: 0.999999511134417, iteration: 27696
loss: 1.043575406074524,grad_norm: 0.9999997588437314, iteration: 27697
loss: 1.0668225288391113,grad_norm: 0.9999998388225484, iteration: 27698
loss: 1.0264253616333008,grad_norm: 0.9999996049912422, iteration: 27699
loss: 0.9798169732093811,grad_norm: 0.9999991237397027, iteration: 27700
loss: 1.0007178783416748,grad_norm: 0.8868290923623301, iteration: 27701
loss: 1.0368181467056274,grad_norm: 0.9999992877664785, iteration: 27702
loss: 1.0538339614868164,grad_norm: 0.9999993794053341, iteration: 27703
loss: 1.019871473312378,grad_norm: 0.9290061165911034, iteration: 27704
loss: 1.0280345678329468,grad_norm: 0.9947575430232863, iteration: 27705
loss: 1.0289883613586426,grad_norm: 0.9999991247973501, iteration: 27706
loss: 1.0401471853256226,grad_norm: 0.9999989733915012, iteration: 27707
loss: 0.9978628754615784,grad_norm: 0.9779137099097484, iteration: 27708
loss: 1.0344042778015137,grad_norm: 0.9156867744945382, iteration: 27709
loss: 1.0088517665863037,grad_norm: 0.9999990188893257, iteration: 27710
loss: 1.0101089477539062,grad_norm: 0.9999995218986009, iteration: 27711
loss: 1.0537115335464478,grad_norm: 0.9999995980269935, iteration: 27712
loss: 0.9968147873878479,grad_norm: 0.9092563803246018, iteration: 27713
loss: 1.0622930526733398,grad_norm: 0.9999990564727435, iteration: 27714
loss: 0.9714434742927551,grad_norm: 0.9999991205829499, iteration: 27715
loss: 1.0010671615600586,grad_norm: 0.959644919478322, iteration: 27716
loss: 1.0184392929077148,grad_norm: 0.9999992768152227, iteration: 27717
loss: 1.0718427896499634,grad_norm: 0.9999991977989611, iteration: 27718
loss: 0.9797741770744324,grad_norm: 0.9525310380474893, iteration: 27719
loss: 0.9967522025108337,grad_norm: 0.9756223142416766, iteration: 27720
loss: 1.0578572750091553,grad_norm: 0.9999999686942379, iteration: 27721
loss: 1.030191421508789,grad_norm: 0.9999992997052941, iteration: 27722
loss: 1.030231237411499,grad_norm: 0.9655050826197239, iteration: 27723
loss: 1.058397650718689,grad_norm: 0.8717826034923121, iteration: 27724
loss: 1.0136570930480957,grad_norm: 0.9999992855336032, iteration: 27725
loss: 1.0009281635284424,grad_norm: 0.9611223546987243, iteration: 27726
loss: 0.9918348789215088,grad_norm: 0.9660701464325998, iteration: 27727
loss: 1.055505633354187,grad_norm: 0.9999992471539191, iteration: 27728
loss: 1.0100812911987305,grad_norm: 0.9999992053054774, iteration: 27729
loss: 1.0658619403839111,grad_norm: 0.9999994414680208, iteration: 27730
loss: 1.0066214799880981,grad_norm: 0.9999992470018308, iteration: 27731
loss: 1.0417683124542236,grad_norm: 0.9999990438446462, iteration: 27732
loss: 1.0045490264892578,grad_norm: 0.9999995706314028, iteration: 27733
loss: 1.0116878747940063,grad_norm: 0.999999166098011, iteration: 27734
loss: 1.0219438076019287,grad_norm: 0.9999995018013959, iteration: 27735
loss: 1.012868881225586,grad_norm: 0.9999991385458084, iteration: 27736
loss: 0.9943040013313293,grad_norm: 0.9999991536085899, iteration: 27737
loss: 0.988257110118866,grad_norm: 0.9999991618508075, iteration: 27738
loss: 1.0590457916259766,grad_norm: 0.9999995541463762, iteration: 27739
loss: 1.0228487253189087,grad_norm: 0.8586541873831175, iteration: 27740
loss: 1.0135478973388672,grad_norm: 0.9999990250468798, iteration: 27741
loss: 1.0032463073730469,grad_norm: 0.9999991425292917, iteration: 27742
loss: 0.9986703395843506,grad_norm: 0.9819530350461092, iteration: 27743
loss: 1.0922598838806152,grad_norm: 0.9999993841515056, iteration: 27744
loss: 1.047455906867981,grad_norm: 0.9999994298953301, iteration: 27745
loss: 1.0186442136764526,grad_norm: 0.9999990743512677, iteration: 27746
loss: 0.9983565211296082,grad_norm: 0.9999992639288073, iteration: 27747
loss: 0.9824544191360474,grad_norm: 0.8762343454911798, iteration: 27748
loss: 1.0039135217666626,grad_norm: 0.9999994009458111, iteration: 27749
loss: 1.0111333131790161,grad_norm: 0.9999998883952254, iteration: 27750
loss: 0.9773637056350708,grad_norm: 0.999999225309355, iteration: 27751
loss: 0.9967210292816162,grad_norm: 0.9999991851887222, iteration: 27752
loss: 0.9738607406616211,grad_norm: 0.794259472603383, iteration: 27753
loss: 0.9593585133552551,grad_norm: 0.9999991987232594, iteration: 27754
loss: 1.0819969177246094,grad_norm: 0.9999995527462738, iteration: 27755
loss: 1.0187560319900513,grad_norm: 0.9999988999737794, iteration: 27756
loss: 1.011059284210205,grad_norm: 0.9999990960376455, iteration: 27757
loss: 1.050319790840149,grad_norm: 0.9343061800060599, iteration: 27758
loss: 1.078456997871399,grad_norm: 0.9999992863851322, iteration: 27759
loss: 1.0463367700576782,grad_norm: 0.9999991215582807, iteration: 27760
loss: 1.0412933826446533,grad_norm: 0.9999990827828679, iteration: 27761
loss: 1.0033644437789917,grad_norm: 0.9537252469322428, iteration: 27762
loss: 1.0742985010147095,grad_norm: 0.9999995392497835, iteration: 27763
loss: 1.0423007011413574,grad_norm: 0.8811129485039143, iteration: 27764
loss: 1.0433727502822876,grad_norm: 0.9999991598909127, iteration: 27765
loss: 1.0554468631744385,grad_norm: 0.9999995668980725, iteration: 27766
loss: 1.0000908374786377,grad_norm: 0.9999994302004483, iteration: 27767
loss: 1.00308096408844,grad_norm: 0.9905084674899866, iteration: 27768
loss: 1.0065782070159912,grad_norm: 0.9999992499181136, iteration: 27769
loss: 1.0141499042510986,grad_norm: 0.9999990532289759, iteration: 27770
loss: 0.997088611125946,grad_norm: 0.999999151326933, iteration: 27771
loss: 0.9709781408309937,grad_norm: 0.8668860941980382, iteration: 27772
loss: 1.0020474195480347,grad_norm: 0.9795272223044019, iteration: 27773
loss: 1.007249355316162,grad_norm: 0.9999990768098379, iteration: 27774
loss: 1.0255801677703857,grad_norm: 0.9999990818467381, iteration: 27775
loss: 1.004116177558899,grad_norm: 0.9999994594206016, iteration: 27776
loss: 0.9964892268180847,grad_norm: 0.9816799581679421, iteration: 27777
loss: 1.035245418548584,grad_norm: 0.9741989921271967, iteration: 27778
loss: 0.9631333947181702,grad_norm: 0.9999992969149856, iteration: 27779
loss: 0.9924649000167847,grad_norm: 0.9999990783882324, iteration: 27780
loss: 1.018894910812378,grad_norm: 0.8646509133312544, iteration: 27781
loss: 1.0410008430480957,grad_norm: 0.9999993810614253, iteration: 27782
loss: 1.028290033340454,grad_norm: 0.9999991793715353, iteration: 27783
loss: 1.0329936742782593,grad_norm: 0.999999339534833, iteration: 27784
loss: 1.013322353363037,grad_norm: 0.9999992568097835, iteration: 27785
loss: 0.9974498152732849,grad_norm: 0.9999994053174526, iteration: 27786
loss: 0.9879230260848999,grad_norm: 0.983732838535494, iteration: 27787
loss: 0.988129198551178,grad_norm: 0.9999993210228746, iteration: 27788
loss: 1.0457561016082764,grad_norm: 0.9587531194652699, iteration: 27789
loss: 1.0170949697494507,grad_norm: 0.9135711579621341, iteration: 27790
loss: 1.0369148254394531,grad_norm: 0.9999991052047413, iteration: 27791
loss: 1.078035593032837,grad_norm: 0.999999178827051, iteration: 27792
loss: 1.197768211364746,grad_norm: 0.9999998451653656, iteration: 27793
loss: 1.0310845375061035,grad_norm: 0.9999992364117192, iteration: 27794
loss: 1.0443015098571777,grad_norm: 0.9999997996633122, iteration: 27795
loss: 1.015004277229309,grad_norm: 0.9999993546977648, iteration: 27796
loss: 1.001965880393982,grad_norm: 0.8299771889145461, iteration: 27797
loss: 1.007765769958496,grad_norm: 0.9999989540077366, iteration: 27798
loss: 1.0264534950256348,grad_norm: 0.9999997134297165, iteration: 27799
loss: 1.0255138874053955,grad_norm: 0.9999992097069631, iteration: 27800
loss: 1.0380244255065918,grad_norm: 0.8638053641004141, iteration: 27801
loss: 1.0616713762283325,grad_norm: 0.999999614629515, iteration: 27802
loss: 1.0280743837356567,grad_norm: 0.9999992306237839, iteration: 27803
loss: 1.0275204181671143,grad_norm: 0.7760459903566728, iteration: 27804
loss: 1.084460973739624,grad_norm: 0.9999993921574749, iteration: 27805
loss: 0.9997370839118958,grad_norm: 0.9836542507559927, iteration: 27806
loss: 1.0463600158691406,grad_norm: 0.9999992554566941, iteration: 27807
loss: 0.9873555898666382,grad_norm: 0.9999995106941233, iteration: 27808
loss: 1.0152779817581177,grad_norm: 0.9999990312713798, iteration: 27809
loss: 0.953930675983429,grad_norm: 0.9755089981904781, iteration: 27810
loss: 0.9963819980621338,grad_norm: 0.9425766259944749, iteration: 27811
loss: 1.0167757272720337,grad_norm: 0.9195798165661169, iteration: 27812
loss: 1.0339643955230713,grad_norm: 0.9999995899541128, iteration: 27813
loss: 1.0360628366470337,grad_norm: 0.9291696416763165, iteration: 27814
loss: 0.9696512222290039,grad_norm: 0.8238080463238295, iteration: 27815
loss: 1.0601752996444702,grad_norm: 0.9999996902092178, iteration: 27816
loss: 1.0077553987503052,grad_norm: 0.999999256707038, iteration: 27817
loss: 1.0037260055541992,grad_norm: 0.9999991888200518, iteration: 27818
loss: 0.9714885950088501,grad_norm: 0.9999990758753722, iteration: 27819
loss: 0.9645837545394897,grad_norm: 0.9999993819261952, iteration: 27820
loss: 0.9978558421134949,grad_norm: 0.9612468640130077, iteration: 27821
loss: 1.01560640335083,grad_norm: 0.9999996369239703, iteration: 27822
loss: 1.0805834531784058,grad_norm: 0.9999998935421888, iteration: 27823
loss: 1.0240422487258911,grad_norm: 0.99999928594842, iteration: 27824
loss: 1.005206823348999,grad_norm: 0.9170969871963717, iteration: 27825
loss: 1.1856330633163452,grad_norm: 0.99999989333706, iteration: 27826
loss: 1.009462833404541,grad_norm: 0.9999997265571361, iteration: 27827
loss: 1.0509364604949951,grad_norm: 0.9999996058107965, iteration: 27828
loss: 0.954256534576416,grad_norm: 0.9999992134412161, iteration: 27829
loss: 0.9903701543807983,grad_norm: 0.9045736058347685, iteration: 27830
loss: 0.9762182235717773,grad_norm: 0.9519342101311326, iteration: 27831
loss: 0.9847322702407837,grad_norm: 0.9999991599249357, iteration: 27832
loss: 1.052562952041626,grad_norm: 0.9999997156220958, iteration: 27833
loss: 1.0325779914855957,grad_norm: 0.999999022312059, iteration: 27834
loss: 1.0074002742767334,grad_norm: 0.9999990970951685, iteration: 27835
loss: 0.9542565941810608,grad_norm: 0.999999034089487, iteration: 27836
loss: 1.0581430196762085,grad_norm: 0.9999991398291187, iteration: 27837
loss: 1.111918330192566,grad_norm: 0.9999996375846685, iteration: 27838
loss: 1.0391457080841064,grad_norm: 0.9999998878659726, iteration: 27839
loss: 1.1281555891036987,grad_norm: 0.9999992360913555, iteration: 27840
loss: 0.9919577240943909,grad_norm: 0.9999990710282329, iteration: 27841
loss: 1.100332498550415,grad_norm: 0.9999998522258889, iteration: 27842
loss: 0.9974753856658936,grad_norm: 0.9999991589068307, iteration: 27843
loss: 1.0364179611206055,grad_norm: 0.9999995135272711, iteration: 27844
loss: 1.043107032775879,grad_norm: 0.9519717637759748, iteration: 27845
loss: 1.0168551206588745,grad_norm: 0.9999992978408504, iteration: 27846
loss: 0.9855343103408813,grad_norm: 0.9999993349527585, iteration: 27847
loss: 1.0114364624023438,grad_norm: 0.9999989724065576, iteration: 27848
loss: 1.0291883945465088,grad_norm: 0.9999990637319106, iteration: 27849
loss: 1.0464715957641602,grad_norm: 0.999999076683161, iteration: 27850
loss: 1.0037803649902344,grad_norm: 0.9999991729488076, iteration: 27851
loss: 0.9634085893630981,grad_norm: 0.9999994932746801, iteration: 27852
loss: 1.0073109865188599,grad_norm: 0.9413949473303396, iteration: 27853
loss: 0.9956552982330322,grad_norm: 0.9999990814354844, iteration: 27854
loss: 1.0325251817703247,grad_norm: 0.9999998020824095, iteration: 27855
loss: 1.016536831855774,grad_norm: 0.9999996335467806, iteration: 27856
loss: 1.0171630382537842,grad_norm: 0.9848350230036894, iteration: 27857
loss: 1.0019422769546509,grad_norm: 0.9999991586046182, iteration: 27858
loss: 1.0094693899154663,grad_norm: 0.999999276260558, iteration: 27859
loss: 1.025834560394287,grad_norm: 0.8439812569335575, iteration: 27860
loss: 1.0124496221542358,grad_norm: 0.9999998910805706, iteration: 27861
loss: 1.0415034294128418,grad_norm: 0.9689274301866284, iteration: 27862
loss: 1.0226212739944458,grad_norm: 0.9999994345236751, iteration: 27863
loss: 1.136612057685852,grad_norm: 0.9999992485240938, iteration: 27864
loss: 1.0324095487594604,grad_norm: 0.999999062081373, iteration: 27865
loss: 0.9793880581855774,grad_norm: 0.9775831297291038, iteration: 27866
loss: 1.026947259902954,grad_norm: 0.9999992905549314, iteration: 27867
loss: 1.0160701274871826,grad_norm: 0.9999991795843292, iteration: 27868
loss: 1.1102076768875122,grad_norm: 0.9999997945830831, iteration: 27869
loss: 1.0015796422958374,grad_norm: 0.9999990476228403, iteration: 27870
loss: 1.0045506954193115,grad_norm: 0.9999990773364092, iteration: 27871
loss: 1.016605019569397,grad_norm: 0.9999990982309375, iteration: 27872
loss: 1.0271835327148438,grad_norm: 0.9999991069140842, iteration: 27873
loss: 1.0046007633209229,grad_norm: 0.9999992766015476, iteration: 27874
loss: 0.9765039682388306,grad_norm: 0.999999123380999, iteration: 27875
loss: 0.9917169213294983,grad_norm: 0.8989924322115816, iteration: 27876
loss: 1.0070066452026367,grad_norm: 0.9999991506664242, iteration: 27877
loss: 0.9928057193756104,grad_norm: 0.9999991195205588, iteration: 27878
loss: 0.9898306727409363,grad_norm: 0.9999992748440678, iteration: 27879
loss: 1.0768226385116577,grad_norm: 0.9999993548041028, iteration: 27880
loss: 1.010757565498352,grad_norm: 0.9999989473500165, iteration: 27881
loss: 1.011871576309204,grad_norm: 0.9999991527284593, iteration: 27882
loss: 1.1007497310638428,grad_norm: 0.9999991967483138, iteration: 27883
loss: 1.0204392671585083,grad_norm: 0.9999992955561404, iteration: 27884
loss: 1.0153539180755615,grad_norm: 0.9836799404512212, iteration: 27885
loss: 1.0165479183197021,grad_norm: 0.9191964110290239, iteration: 27886
loss: 1.0897436141967773,grad_norm: 0.9999993650098901, iteration: 27887
loss: 1.0022178888320923,grad_norm: 0.9999992109781402, iteration: 27888
loss: 0.9242349863052368,grad_norm: 0.9439209026036318, iteration: 27889
loss: 1.0151476860046387,grad_norm: 0.9999991831867356, iteration: 27890
loss: 1.0218982696533203,grad_norm: 0.9235537189285263, iteration: 27891
loss: 0.9924134016036987,grad_norm: 0.9999994141172842, iteration: 27892
loss: 1.0299782752990723,grad_norm: 0.9999992895611631, iteration: 27893
loss: 0.9872097969055176,grad_norm: 0.9999992225038624, iteration: 27894
loss: 0.98870849609375,grad_norm: 0.9999990392742285, iteration: 27895
loss: 0.9951537847518921,grad_norm: 0.9999996513942224, iteration: 27896
loss: 1.022545576095581,grad_norm: 0.9999992111067489, iteration: 27897
loss: 1.032902479171753,grad_norm: 0.8802326748637901, iteration: 27898
loss: 1.024174451828003,grad_norm: 0.9999990723793298, iteration: 27899
loss: 1.0635627508163452,grad_norm: 0.982130453337362, iteration: 27900
loss: 0.993489682674408,grad_norm: 0.8723986579565247, iteration: 27901
loss: 1.0255047082901,grad_norm: 0.99999963357161, iteration: 27902
loss: 0.9727923274040222,grad_norm: 0.8423809494162949, iteration: 27903
loss: 1.0041327476501465,grad_norm: 0.9936060920239596, iteration: 27904
loss: 1.0481520891189575,grad_norm: 0.9999991955555708, iteration: 27905
loss: 1.081379771232605,grad_norm: 0.975067651073296, iteration: 27906
loss: 1.0390700101852417,grad_norm: 0.9999990993504443, iteration: 27907
loss: 0.9615640044212341,grad_norm: 0.8640954666829191, iteration: 27908
loss: 0.9960914254188538,grad_norm: 0.9999992012750729, iteration: 27909
loss: 1.04416823387146,grad_norm: 0.9250793481435681, iteration: 27910
loss: 1.0364954471588135,grad_norm: 0.9999994598083073, iteration: 27911
loss: 1.0334476232528687,grad_norm: 0.999999775513541, iteration: 27912
loss: 1.0500760078430176,grad_norm: 0.994353569825282, iteration: 27913
loss: 1.0289888381958008,grad_norm: 0.8119087843611484, iteration: 27914
loss: 1.0188654661178589,grad_norm: 0.9999991177668083, iteration: 27915
loss: 1.0200049877166748,grad_norm: 0.9999991580947515, iteration: 27916
loss: 0.9826152920722961,grad_norm: 0.7849598925832723, iteration: 27917
loss: 1.0722637176513672,grad_norm: 0.9384444304669047, iteration: 27918
loss: 1.0111935138702393,grad_norm: 0.9616654196424845, iteration: 27919
loss: 1.023991346359253,grad_norm: 0.9716320760434597, iteration: 27920
loss: 1.030394196510315,grad_norm: 0.9999994268295981, iteration: 27921
loss: 1.0364474058151245,grad_norm: 0.999999342146973, iteration: 27922
loss: 1.0319552421569824,grad_norm: 0.9999991411061361, iteration: 27923
loss: 1.0015063285827637,grad_norm: 0.9999991317611665, iteration: 27924
loss: 1.0160374641418457,grad_norm: 0.9999990296805708, iteration: 27925
loss: 1.0043374300003052,grad_norm: 0.8867606884761241, iteration: 27926
loss: 1.0481897592544556,grad_norm: 0.9999992878072039, iteration: 27927
loss: 1.0175387859344482,grad_norm: 0.9999990975743441, iteration: 27928
loss: 0.9978881478309631,grad_norm: 0.9331180019613384, iteration: 27929
loss: 1.0326688289642334,grad_norm: 0.9999991457766602, iteration: 27930
loss: 1.0295004844665527,grad_norm: 0.9999995888058346, iteration: 27931
loss: 1.0334678888320923,grad_norm: 0.9999993032780787, iteration: 27932
loss: 1.0356563329696655,grad_norm: 0.9258690144536442, iteration: 27933
loss: 1.0323374271392822,grad_norm: 0.9999993266310435, iteration: 27934
loss: 0.9855215549468994,grad_norm: 0.9575903400097926, iteration: 27935
loss: 1.0283281803131104,grad_norm: 0.8817051628988751, iteration: 27936
loss: 1.059300184249878,grad_norm: 0.9080350887426272, iteration: 27937
loss: 1.0363613367080688,grad_norm: 0.9999992495208145, iteration: 27938
loss: 1.0022855997085571,grad_norm: 0.9999993012641525, iteration: 27939
loss: 1.044684648513794,grad_norm: 0.9367776409132704, iteration: 27940
loss: 1.0171422958374023,grad_norm: 0.9999993353554665, iteration: 27941
loss: 1.005248785018921,grad_norm: 0.9999992053817024, iteration: 27942
loss: 1.0080288648605347,grad_norm: 0.9654520695583012, iteration: 27943
loss: 0.9806747436523438,grad_norm: 0.9999992980943857, iteration: 27944
loss: 1.0213247537612915,grad_norm: 0.9678442017937837, iteration: 27945
loss: 1.0410237312316895,grad_norm: 0.9852762523706118, iteration: 27946
loss: 1.042400598526001,grad_norm: 0.9999995678941093, iteration: 27947
loss: 1.0241042375564575,grad_norm: 0.9999990849955526, iteration: 27948
loss: 0.9880852699279785,grad_norm: 0.9884550594522984, iteration: 27949
loss: 1.0096023082733154,grad_norm: 0.943019777217799, iteration: 27950
loss: 0.9940326809883118,grad_norm: 0.9999990631389076, iteration: 27951
loss: 0.9604365825653076,grad_norm: 0.9999989561460635, iteration: 27952
loss: 1.0379796028137207,grad_norm: 0.9724419648623461, iteration: 27953
loss: 1.0414897203445435,grad_norm: 0.8711078387337191, iteration: 27954
loss: 0.9500183463096619,grad_norm: 0.9999990563185148, iteration: 27955
loss: 1.027979850769043,grad_norm: 0.9999992687328189, iteration: 27956
loss: 1.0093796253204346,grad_norm: 0.9999996720848687, iteration: 27957
loss: 1.0578075647354126,grad_norm: 0.9999992405508875, iteration: 27958
loss: 1.0461750030517578,grad_norm: 0.9999993373156681, iteration: 27959
loss: 1.010627031326294,grad_norm: 0.9401885265754782, iteration: 27960
loss: 0.9662661552429199,grad_norm: 0.8691140667758329, iteration: 27961
loss: 1.0346190929412842,grad_norm: 0.9999990053740101, iteration: 27962
loss: 1.0723159313201904,grad_norm: 0.9999993668789517, iteration: 27963
loss: 1.020085096359253,grad_norm: 0.9279350477668971, iteration: 27964
loss: 0.9965978860855103,grad_norm: 0.8962849529700075, iteration: 27965
loss: 1.0829777717590332,grad_norm: 0.9999994455752738, iteration: 27966
loss: 0.9932646155357361,grad_norm: 0.9999991353910626, iteration: 27967
loss: 1.0307743549346924,grad_norm: 0.9578144754239947, iteration: 27968
loss: 1.0169178247451782,grad_norm: 0.9999990354247371, iteration: 27969
loss: 0.9805181622505188,grad_norm: 0.99664712730012, iteration: 27970
loss: 1.0849874019622803,grad_norm: 0.9999997240019147, iteration: 27971
loss: 1.0043367147445679,grad_norm: 0.7631061675325038, iteration: 27972
loss: 1.0052475929260254,grad_norm: 0.9999990670513718, iteration: 27973
loss: 1.0172157287597656,grad_norm: 0.9999991705966825, iteration: 27974
loss: 1.0089664459228516,grad_norm: 0.8711020144271534, iteration: 27975
loss: 0.9990590214729309,grad_norm: 0.983299846502259, iteration: 27976
loss: 1.025654673576355,grad_norm: 0.9999991178846194, iteration: 27977
loss: 1.010750651359558,grad_norm: 0.9999996841406381, iteration: 27978
loss: 0.952790379524231,grad_norm: 0.9041805833033111, iteration: 27979
loss: 1.0606611967086792,grad_norm: 0.999999662255117, iteration: 27980
loss: 1.0651044845581055,grad_norm: 0.9999999062588223, iteration: 27981
loss: 0.9921737909317017,grad_norm: 0.9999991366872738, iteration: 27982
loss: 0.9765984416007996,grad_norm: 0.999999023671427, iteration: 27983
loss: 1.002597451210022,grad_norm: 0.7515745006393524, iteration: 27984
loss: 1.0272141695022583,grad_norm: 0.9796411932627886, iteration: 27985
loss: 1.0723170042037964,grad_norm: 0.9999997664376064, iteration: 27986
loss: 1.0019878149032593,grad_norm: 0.9999990694271628, iteration: 27987
loss: 1.1250680685043335,grad_norm: 0.999999499993258, iteration: 27988
loss: 0.984492301940918,grad_norm: 0.857430215902172, iteration: 27989
loss: 1.009298324584961,grad_norm: 0.999999556386764, iteration: 27990
loss: 1.0185757875442505,grad_norm: 0.9999991945554806, iteration: 27991
loss: 1.0253870487213135,grad_norm: 0.9999990165964486, iteration: 27992
loss: 1.026012659072876,grad_norm: 0.9999993154840884, iteration: 27993
loss: 1.003354549407959,grad_norm: 0.9999991165643566, iteration: 27994
loss: 0.9860562682151794,grad_norm: 0.8080122948527237, iteration: 27995
loss: 1.0264310836791992,grad_norm: 0.999999209226575, iteration: 27996
loss: 0.9798710346221924,grad_norm: 0.9982396241362442, iteration: 27997
loss: 1.0440905094146729,grad_norm: 0.9999990397960714, iteration: 27998
loss: 0.995881974697113,grad_norm: 0.9999991787183165, iteration: 27999
loss: 1.0288656949996948,grad_norm: 0.9999995797802936, iteration: 28000
loss: 1.0385119915008545,grad_norm: 0.9891423401644458, iteration: 28001
loss: 0.999070942401886,grad_norm: 0.9999993881371125, iteration: 28002
loss: 1.023715615272522,grad_norm: 0.9999990979046279, iteration: 28003
loss: 0.9603980779647827,grad_norm: 0.923012307732076, iteration: 28004
loss: 0.9824959635734558,grad_norm: 0.9999991253684694, iteration: 28005
loss: 1.0077134370803833,grad_norm: 0.9999993984207397, iteration: 28006
loss: 0.983847439289093,grad_norm: 0.814731126621978, iteration: 28007
loss: 0.9933867454528809,grad_norm: 0.9999990770119511, iteration: 28008
loss: 1.0303950309753418,grad_norm: 0.9999991772105052, iteration: 28009
loss: 1.038663387298584,grad_norm: 0.9999991291349553, iteration: 28010
loss: 1.004282832145691,grad_norm: 0.9903974028604945, iteration: 28011
loss: 1.009426236152649,grad_norm: 0.9375350416108502, iteration: 28012
loss: 1.0354633331298828,grad_norm: 0.9410669745645539, iteration: 28013
loss: 0.9871940612792969,grad_norm: 0.8915447171899333, iteration: 28014
loss: 1.0097389221191406,grad_norm: 0.9999992626216418, iteration: 28015
loss: 1.0580922365188599,grad_norm: 0.9999992289076428, iteration: 28016
loss: 0.9676142930984497,grad_norm: 0.9999990186063029, iteration: 28017
loss: 1.025154709815979,grad_norm: 0.9999992745472579, iteration: 28018
loss: 0.9978236556053162,grad_norm: 0.9999991951386837, iteration: 28019
loss: 1.0475389957427979,grad_norm: 0.8094485094085133, iteration: 28020
loss: 1.0111613273620605,grad_norm: 0.9999991206017402, iteration: 28021
loss: 0.9824509620666504,grad_norm: 0.9836978101793304, iteration: 28022
loss: 0.9702348113059998,grad_norm: 0.981974383745335, iteration: 28023
loss: 1.0360345840454102,grad_norm: 0.9577027630429666, iteration: 28024
loss: 0.9982695579528809,grad_norm: 0.9689753103635238, iteration: 28025
loss: 1.0030080080032349,grad_norm: 0.9819311957323223, iteration: 28026
loss: 1.0107306241989136,grad_norm: 0.9999990907839373, iteration: 28027
loss: 1.0370123386383057,grad_norm: 0.9259196344091215, iteration: 28028
loss: 1.0014641284942627,grad_norm: 0.8905946379319604, iteration: 28029
loss: 1.003985047340393,grad_norm: 0.9999994742229108, iteration: 28030
loss: 0.9805951118469238,grad_norm: 0.9076376041741737, iteration: 28031
loss: 0.9907727241516113,grad_norm: 0.9999991801833125, iteration: 28032
loss: 1.0489681959152222,grad_norm: 0.9677020883012093, iteration: 28033
loss: 0.9531334042549133,grad_norm: 0.9999992374144759, iteration: 28034
loss: 0.9953455328941345,grad_norm: 0.9999992307080884, iteration: 28035
loss: 1.0423489809036255,grad_norm: 0.9999994295343921, iteration: 28036
loss: 1.014540195465088,grad_norm: 0.9858398293152206, iteration: 28037
loss: 1.0512522459030151,grad_norm: 0.9999991150920577, iteration: 28038
loss: 0.9978659152984619,grad_norm: 0.9999992520034795, iteration: 28039
loss: 0.9936431050300598,grad_norm: 0.99999902692627, iteration: 28040
loss: 1.0115536451339722,grad_norm: 0.9999994523930713, iteration: 28041
loss: 1.0043631792068481,grad_norm: 0.9611390468196185, iteration: 28042
loss: 0.9976049065589905,grad_norm: 0.9999991204488133, iteration: 28043
loss: 1.047311782836914,grad_norm: 0.9999991119430366, iteration: 28044
loss: 1.057196855545044,grad_norm: 0.999999010997359, iteration: 28045
loss: 0.9754738211631775,grad_norm: 0.9999991713485487, iteration: 28046
loss: 0.9782441258430481,grad_norm: 0.9304284866352893, iteration: 28047
loss: 0.9571744799613953,grad_norm: 0.9999991116150524, iteration: 28048
loss: 0.999075174331665,grad_norm: 0.9826487593591643, iteration: 28049
loss: 0.987031877040863,grad_norm: 0.9999989995632242, iteration: 28050
loss: 1.009954810142517,grad_norm: 0.9868183755128841, iteration: 28051
loss: 0.9956145882606506,grad_norm: 0.99999902173887, iteration: 28052
loss: 1.0535207986831665,grad_norm: 0.9999998080395214, iteration: 28053
loss: 1.0441020727157593,grad_norm: 0.999999135676557, iteration: 28054
loss: 1.0228654146194458,grad_norm: 0.9044037477178749, iteration: 28055
loss: 1.0709837675094604,grad_norm: 0.9999995354240698, iteration: 28056
loss: 1.0776196718215942,grad_norm: 0.9999993755078865, iteration: 28057
loss: 1.0467543601989746,grad_norm: 0.9595233220002928, iteration: 28058
loss: 1.0466639995574951,grad_norm: 0.999998996095723, iteration: 28059
loss: 1.0297307968139648,grad_norm: 0.9999995617743351, iteration: 28060
loss: 0.9471505880355835,grad_norm: 0.9999991262404794, iteration: 28061
loss: 0.996947169303894,grad_norm: 0.9999994772763077, iteration: 28062
loss: 1.0586234331130981,grad_norm: 0.9052170310392909, iteration: 28063
loss: 1.0083229541778564,grad_norm: 0.9999996071528657, iteration: 28064
loss: 1.0236656665802002,grad_norm: 0.9586487208884563, iteration: 28065
loss: 0.9794308543205261,grad_norm: 0.9999995025524164, iteration: 28066
loss: 1.008296251296997,grad_norm: 0.900290527655762, iteration: 28067
loss: 1.0008336305618286,grad_norm: 0.9999991594463136, iteration: 28068
loss: 0.9644939303398132,grad_norm: 0.9999990900916447, iteration: 28069
loss: 1.0447189807891846,grad_norm: 0.9999991321022932, iteration: 28070
loss: 1.0315310955047607,grad_norm: 0.9999992124258628, iteration: 28071
loss: 1.0260370969772339,grad_norm: 0.9999991445019595, iteration: 28072
loss: 1.0942076444625854,grad_norm: 0.9999998849890671, iteration: 28073
loss: 1.0230218172073364,grad_norm: 0.9524876031714484, iteration: 28074
loss: 1.0005805492401123,grad_norm: 0.9999992980997933, iteration: 28075
loss: 1.0271830558776855,grad_norm: 0.900767153869107, iteration: 28076
loss: 1.0146235227584839,grad_norm: 0.9210008662664202, iteration: 28077
loss: 1.0425647497177124,grad_norm: 0.9999993527395117, iteration: 28078
loss: 0.9910687208175659,grad_norm: 0.9999991386688828, iteration: 28079
loss: 1.0178090333938599,grad_norm: 0.9999990286336256, iteration: 28080
loss: 1.0317044258117676,grad_norm: 0.9999991036356367, iteration: 28081
loss: 0.9815224409103394,grad_norm: 0.922734025069898, iteration: 28082
loss: 1.000854730606079,grad_norm: 0.826600967973377, iteration: 28083
loss: 1.0202051401138306,grad_norm: 0.9999990483641701, iteration: 28084
loss: 1.0300192832946777,grad_norm: 0.9922715242194982, iteration: 28085
loss: 0.9893736243247986,grad_norm: 0.9327653963625641, iteration: 28086
loss: 1.0259073972702026,grad_norm: 0.9705792927066405, iteration: 28087
loss: 1.1279231309890747,grad_norm: 0.9999996945001984, iteration: 28088
loss: 1.0307544469833374,grad_norm: 0.8924668524992733, iteration: 28089
loss: 1.002703070640564,grad_norm: 0.9999991887615611, iteration: 28090
loss: 0.9673689007759094,grad_norm: 0.9999991079277373, iteration: 28091
loss: 0.9896456599235535,grad_norm: 0.9999991818979272, iteration: 28092
loss: 1.0334243774414062,grad_norm: 0.9999994759763455, iteration: 28093
loss: 1.0182596445083618,grad_norm: 0.9999989890819847, iteration: 28094
loss: 0.9848130345344543,grad_norm: 0.9999993911264581, iteration: 28095
loss: 1.02435302734375,grad_norm: 0.9999991216566148, iteration: 28096
loss: 0.9848605394363403,grad_norm: 0.9999993789436291, iteration: 28097
loss: 1.0198452472686768,grad_norm: 0.9999997204268153, iteration: 28098
loss: 1.0091441869735718,grad_norm: 0.9999990221040096, iteration: 28099
loss: 1.0768094062805176,grad_norm: 0.9999993851226502, iteration: 28100
loss: 0.9609290361404419,grad_norm: 0.999998997057994, iteration: 28101
loss: 1.0039682388305664,grad_norm: 0.905002258744638, iteration: 28102
loss: 1.0267302989959717,grad_norm: 0.9999991081754006, iteration: 28103
loss: 1.0080329179763794,grad_norm: 0.9999994748097594, iteration: 28104
loss: 0.9826699495315552,grad_norm: 0.9999991134394727, iteration: 28105
loss: 1.0365846157073975,grad_norm: 0.9613644146912941, iteration: 28106
loss: 1.0114450454711914,grad_norm: 0.9154921126557444, iteration: 28107
loss: 1.0312366485595703,grad_norm: 0.9999992013416089, iteration: 28108
loss: 0.9973121285438538,grad_norm: 0.999999123295089, iteration: 28109
loss: 1.0222647190093994,grad_norm: 0.9329699886458861, iteration: 28110
loss: 1.0193496942520142,grad_norm: 0.9999992588558845, iteration: 28111
loss: 1.0292531251907349,grad_norm: 0.9999991307926176, iteration: 28112
loss: 1.0412899255752563,grad_norm: 0.999999668888811, iteration: 28113
loss: 1.0402803421020508,grad_norm: 0.9999994233212564, iteration: 28114
loss: 0.987011194229126,grad_norm: 0.9999989833845011, iteration: 28115
loss: 1.0323662757873535,grad_norm: 0.9999994151715402, iteration: 28116
loss: 1.0070457458496094,grad_norm: 0.9999990322116382, iteration: 28117
loss: 1.0153725147247314,grad_norm: 0.9999990613286716, iteration: 28118
loss: 1.073703646659851,grad_norm: 0.9999993324924075, iteration: 28119
loss: 1.013978362083435,grad_norm: 0.9999989930100269, iteration: 28120
loss: 1.0148752927780151,grad_norm: 0.8959582160470229, iteration: 28121
loss: 1.0247135162353516,grad_norm: 0.9999992314739118, iteration: 28122
loss: 0.9886447787284851,grad_norm: 0.9978240882838292, iteration: 28123
loss: 0.9770907163619995,grad_norm: 0.9999991102238679, iteration: 28124
loss: 1.06474769115448,grad_norm: 0.9999995396215257, iteration: 28125
loss: 1.0403473377227783,grad_norm: 0.9999991347407897, iteration: 28126
loss: 1.0327012538909912,grad_norm: 0.9999991821241648, iteration: 28127
loss: 1.0331733226776123,grad_norm: 0.9719804449475157, iteration: 28128
loss: 1.0080921649932861,grad_norm: 0.9645497547654482, iteration: 28129
loss: 1.023542046546936,grad_norm: 0.999998956690558, iteration: 28130
loss: 1.0386472940444946,grad_norm: 0.9999993453146777, iteration: 28131
loss: 1.0257049798965454,grad_norm: 0.8173886318451233, iteration: 28132
loss: 1.032367467880249,grad_norm: 0.8766643594363964, iteration: 28133
loss: 0.9756820797920227,grad_norm: 0.9999990206662163, iteration: 28134
loss: 1.0264649391174316,grad_norm: 0.9573222961232599, iteration: 28135
loss: 0.9956950545310974,grad_norm: 0.9999990602522411, iteration: 28136
loss: 0.9800757169723511,grad_norm: 0.9999991489363681, iteration: 28137
loss: 0.9984937310218811,grad_norm: 0.999999429085428, iteration: 28138
loss: 0.9702451229095459,grad_norm: 0.9711757285737703, iteration: 28139
loss: 1.007192611694336,grad_norm: 0.8757190051019279, iteration: 28140
loss: 1.028800368309021,grad_norm: 0.9828549187440008, iteration: 28141
loss: 1.006556510925293,grad_norm: 0.8518831202623703, iteration: 28142
loss: 1.0139881372451782,grad_norm: 0.9999992565757951, iteration: 28143
loss: 0.9965384602546692,grad_norm: 0.9277484167078153, iteration: 28144
loss: 0.9887816309928894,grad_norm: 0.9999991419451428, iteration: 28145
loss: 1.0298945903778076,grad_norm: 0.9999998254356177, iteration: 28146
loss: 0.9732255935668945,grad_norm: 0.904709327223677, iteration: 28147
loss: 1.0618740320205688,grad_norm: 0.9999992242968915, iteration: 28148
loss: 1.0298566818237305,grad_norm: 0.9999992089410713, iteration: 28149
loss: 0.9534509778022766,grad_norm: 0.9999990286475083, iteration: 28150
loss: 1.0034713745117188,grad_norm: 0.9453748637253658, iteration: 28151
loss: 1.041171669960022,grad_norm: 0.9999992070854453, iteration: 28152
loss: 0.9669941067695618,grad_norm: 0.8978872065922483, iteration: 28153
loss: 1.0022332668304443,grad_norm: 0.9999993826135369, iteration: 28154
loss: 0.9982787370681763,grad_norm: 0.9465981932137597, iteration: 28155
loss: 1.0515209436416626,grad_norm: 0.9999994551205202, iteration: 28156
loss: 1.0256903171539307,grad_norm: 0.9999993579226693, iteration: 28157
loss: 1.0080643892288208,grad_norm: 0.8158597870154068, iteration: 28158
loss: 0.9798828363418579,grad_norm: 0.9945681908490802, iteration: 28159
loss: 1.0052406787872314,grad_norm: 0.9999991292659314, iteration: 28160
loss: 1.0244207382202148,grad_norm: 0.9999990178334456, iteration: 28161
loss: 1.0197685956954956,grad_norm: 0.999999007860456, iteration: 28162
loss: 1.0459301471710205,grad_norm: 0.9999990478922048, iteration: 28163
loss: 1.0049781799316406,grad_norm: 0.9999990896375821, iteration: 28164
loss: 0.9966869354248047,grad_norm: 0.9999991940679013, iteration: 28165
loss: 0.9967124462127686,grad_norm: 0.9619849537559384, iteration: 28166
loss: 0.9816240072250366,grad_norm: 0.8612610369755129, iteration: 28167
loss: 1.0151557922363281,grad_norm: 0.9999992361273539, iteration: 28168
loss: 1.0862313508987427,grad_norm: 0.9999993002530781, iteration: 28169
loss: 0.9674168825149536,grad_norm: 0.9081953294013876, iteration: 28170
loss: 1.0274333953857422,grad_norm: 0.9433517747729369, iteration: 28171
loss: 1.0347630977630615,grad_norm: 0.9999991767363403, iteration: 28172
loss: 0.9691897034645081,grad_norm: 0.9999989993859467, iteration: 28173
loss: 1.0064697265625,grad_norm: 0.9936838648948075, iteration: 28174
loss: 1.0297753810882568,grad_norm: 0.9999993600160642, iteration: 28175
loss: 1.0328317880630493,grad_norm: 0.9642763139583856, iteration: 28176
loss: 1.0108131170272827,grad_norm: 0.9999990749783936, iteration: 28177
loss: 1.0525453090667725,grad_norm: 0.9999996925107374, iteration: 28178
loss: 1.04257071018219,grad_norm: 0.9999990840987175, iteration: 28179
loss: 0.9801085591316223,grad_norm: 0.9999992416924031, iteration: 28180
loss: 0.9966856241226196,grad_norm: 0.8767251626929047, iteration: 28181
loss: 1.0020701885223389,grad_norm: 0.9601213941641956, iteration: 28182
loss: 1.01018226146698,grad_norm: 0.9999990183980646, iteration: 28183
loss: 1.0303432941436768,grad_norm: 0.9999990736276435, iteration: 28184
loss: 0.9692814946174622,grad_norm: 0.8429861588897508, iteration: 28185
loss: 1.0041781663894653,grad_norm: 0.9999991962406309, iteration: 28186
loss: 1.0182552337646484,grad_norm: 0.9999994864741463, iteration: 28187
loss: 1.0295116901397705,grad_norm: 0.831091766834054, iteration: 28188
loss: 1.0366581678390503,grad_norm: 0.9999990651247944, iteration: 28189
loss: 1.0099656581878662,grad_norm: 0.7976479613849669, iteration: 28190
loss: 1.0086612701416016,grad_norm: 0.9999992414141131, iteration: 28191
loss: 1.06134831905365,grad_norm: 0.9999993577876641, iteration: 28192
loss: 1.0093551874160767,grad_norm: 0.999999099195763, iteration: 28193
loss: 1.0022201538085938,grad_norm: 0.8912203687978361, iteration: 28194
loss: 0.9955474734306335,grad_norm: 0.9999991358994599, iteration: 28195
loss: 1.0018585920333862,grad_norm: 0.9999990474128044, iteration: 28196
loss: 1.0117162466049194,grad_norm: 0.9752763195521686, iteration: 28197
loss: 1.0486239194869995,grad_norm: 0.9999990496737391, iteration: 28198
loss: 0.9759940505027771,grad_norm: 0.8919564624795995, iteration: 28199
loss: 0.9968488216400146,grad_norm: 0.9999991215226495, iteration: 28200
loss: 1.0096454620361328,grad_norm: 0.9193576957368869, iteration: 28201
loss: 1.0253818035125732,grad_norm: 0.9999998011326174, iteration: 28202
loss: 1.0118145942687988,grad_norm: 0.999999586969287, iteration: 28203
loss: 1.0158357620239258,grad_norm: 0.9327744945260353, iteration: 28204
loss: 1.0272608995437622,grad_norm: 0.8220576156930949, iteration: 28205
loss: 0.966559112071991,grad_norm: 0.9999997305323175, iteration: 28206
loss: 1.0074076652526855,grad_norm: 0.9999990346043879, iteration: 28207
loss: 1.0389389991760254,grad_norm: 0.9999993621767869, iteration: 28208
loss: 0.9933441877365112,grad_norm: 0.9999990290033851, iteration: 28209
loss: 1.0094796419143677,grad_norm: 0.9999993489745149, iteration: 28210
loss: 1.0208430290222168,grad_norm: 0.9999994504505691, iteration: 28211
loss: 1.0131455659866333,grad_norm: 0.9641469495028134, iteration: 28212
loss: 0.9875805377960205,grad_norm: 0.9273121438534482, iteration: 28213
loss: 0.9910866618156433,grad_norm: 0.9999999749747103, iteration: 28214
loss: 0.9788302183151245,grad_norm: 0.9999993042931821, iteration: 28215
loss: 0.972978413105011,grad_norm: 0.9999994056534661, iteration: 28216
loss: 1.0131312608718872,grad_norm: 0.9999990311208847, iteration: 28217
loss: 1.0271657705307007,grad_norm: 0.9371268742681833, iteration: 28218
loss: 1.0247454643249512,grad_norm: 0.9999991476926166, iteration: 28219
loss: 0.9720261693000793,grad_norm: 0.9999995281179374, iteration: 28220
loss: 1.0227473974227905,grad_norm: 0.9999991280109948, iteration: 28221
loss: 1.0073922872543335,grad_norm: 0.9999993013580956, iteration: 28222
loss: 1.017045259475708,grad_norm: 0.9999991399843321, iteration: 28223
loss: 1.0609546899795532,grad_norm: 0.9999992335276431, iteration: 28224
loss: 1.0010026693344116,grad_norm: 0.9778743840620269, iteration: 28225
loss: 1.0381786823272705,grad_norm: 0.9999996143276514, iteration: 28226
loss: 1.0370548963546753,grad_norm: 0.9999992757039775, iteration: 28227
loss: 1.0641101598739624,grad_norm: 0.9999997448857026, iteration: 28228
loss: 1.0019307136535645,grad_norm: 0.9999990482386846, iteration: 28229
loss: 1.0451709032058716,grad_norm: 0.9999990758881402, iteration: 28230
loss: 1.0270905494689941,grad_norm: 0.9999993258375215, iteration: 28231
loss: 1.028771996498108,grad_norm: 0.9999997937224159, iteration: 28232
loss: 0.9865785837173462,grad_norm: 0.9999999090372046, iteration: 28233
loss: 0.9875761866569519,grad_norm: 0.9196247961821766, iteration: 28234
loss: 0.9733142256736755,grad_norm: 0.9999991397196623, iteration: 28235
loss: 0.9852651953697205,grad_norm: 0.9999994170590144, iteration: 28236
loss: 1.011216640472412,grad_norm: 0.8606094228942526, iteration: 28237
loss: 0.9993872046470642,grad_norm: 0.9999992967017632, iteration: 28238
loss: 1.0191736221313477,grad_norm: 0.9999991276350346, iteration: 28239
loss: 1.021459937095642,grad_norm: 0.9999991617033237, iteration: 28240
loss: 1.0371297597885132,grad_norm: 0.9919973113021388, iteration: 28241
loss: 1.0294193029403687,grad_norm: 0.999999516121699, iteration: 28242
loss: 1.0672537088394165,grad_norm: 0.999999844718171, iteration: 28243
loss: 0.9729132652282715,grad_norm: 0.9766788665365587, iteration: 28244
loss: 1.0007522106170654,grad_norm: 0.9961056321563101, iteration: 28245
loss: 1.0432804822921753,grad_norm: 0.999999104500008, iteration: 28246
loss: 1.0095059871673584,grad_norm: 0.9999989681160175, iteration: 28247
loss: 1.0463875532150269,grad_norm: 0.9999992087824048, iteration: 28248
loss: 1.1164019107818604,grad_norm: 0.9999995392537867, iteration: 28249
loss: 1.0076406002044678,grad_norm: 0.9999990069229377, iteration: 28250
loss: 0.9656236171722412,grad_norm: 0.9999990219384421, iteration: 28251
loss: 1.0372674465179443,grad_norm: 0.9999990036343727, iteration: 28252
loss: 1.0221601724624634,grad_norm: 0.879800997347991, iteration: 28253
loss: 1.0019930601119995,grad_norm: 0.999999227551048, iteration: 28254
loss: 1.0604889392852783,grad_norm: 0.9999997196110697, iteration: 28255
loss: 1.0099760293960571,grad_norm: 0.9732735709858541, iteration: 28256
loss: 0.9774994850158691,grad_norm: 0.9999990939181627, iteration: 28257
loss: 0.9963706731796265,grad_norm: 0.8992908163827814, iteration: 28258
loss: 0.9896542429924011,grad_norm: 0.9953749730592136, iteration: 28259
loss: 0.9845333099365234,grad_norm: 0.9999993776717172, iteration: 28260
loss: 1.0271210670471191,grad_norm: 0.9999997010132551, iteration: 28261
loss: 1.0295560359954834,grad_norm: 0.9999992296485387, iteration: 28262
loss: 1.0091462135314941,grad_norm: 0.8853853218302975, iteration: 28263
loss: 1.0009037256240845,grad_norm: 0.9999991907970247, iteration: 28264
loss: 1.0133473873138428,grad_norm: 0.8858035696486591, iteration: 28265
loss: 1.0024439096450806,grad_norm: 0.9626684536365577, iteration: 28266
loss: 1.0288002490997314,grad_norm: 0.9999995342428701, iteration: 28267
loss: 1.009364128112793,grad_norm: 0.9999994016820645, iteration: 28268
loss: 1.0424795150756836,grad_norm: 0.9999993351645743, iteration: 28269
loss: 0.9914180636405945,grad_norm: 0.887009363934326, iteration: 28270
loss: 0.9849783778190613,grad_norm: 0.9999992236731937, iteration: 28271
loss: 1.0365040302276611,grad_norm: 0.8847137136397598, iteration: 28272
loss: 0.9974495768547058,grad_norm: 0.9999993498190731, iteration: 28273
loss: 1.0282829999923706,grad_norm: 0.9583105236960532, iteration: 28274
loss: 0.9970610737800598,grad_norm: 0.9908014818438887, iteration: 28275
loss: 1.0405714511871338,grad_norm: 0.9999993232516146, iteration: 28276
loss: 0.9841560125350952,grad_norm: 0.9999991527081226, iteration: 28277
loss: 1.0488450527191162,grad_norm: 0.9999994694875215, iteration: 28278
loss: 1.0657451152801514,grad_norm: 0.9999991525460132, iteration: 28279
loss: 1.065744161605835,grad_norm: 0.9999999973886246, iteration: 28280
loss: 0.9933695793151855,grad_norm: 0.961599491291219, iteration: 28281
loss: 1.0065498352050781,grad_norm: 0.9999993678580126, iteration: 28282
loss: 0.9822606444358826,grad_norm: 0.9999990323927638, iteration: 28283
loss: 0.9758518934249878,grad_norm: 0.9999990533076702, iteration: 28284
loss: 1.0328688621520996,grad_norm: 0.9999997118680831, iteration: 28285
loss: 1.0073715448379517,grad_norm: 0.9999989781235495, iteration: 28286
loss: 0.9875727891921997,grad_norm: 0.9999990134089741, iteration: 28287
loss: 0.9907476902008057,grad_norm: 0.9999991378757922, iteration: 28288
loss: 0.9993669986724854,grad_norm: 0.999999170976251, iteration: 28289
loss: 1.0143623352050781,grad_norm: 0.8818705034408232, iteration: 28290
loss: 1.014136552810669,grad_norm: 0.9403958562009928, iteration: 28291
loss: 0.9999064207077026,grad_norm: 0.9999991263340531, iteration: 28292
loss: 0.993879497051239,grad_norm: 0.9999992531813642, iteration: 28293
loss: 1.0572705268859863,grad_norm: 0.9999990622880011, iteration: 28294
loss: 1.0284987688064575,grad_norm: 0.964916154793677, iteration: 28295
loss: 0.9773896336555481,grad_norm: 0.9999990393817817, iteration: 28296
loss: 1.022506594657898,grad_norm: 0.9016827999996677, iteration: 28297
loss: 1.0090723037719727,grad_norm: 0.9999994909764651, iteration: 28298
loss: 0.9792695641517639,grad_norm: 0.9999992771758941, iteration: 28299
loss: 1.0222485065460205,grad_norm: 0.9999993998966888, iteration: 28300
loss: 1.0078991651535034,grad_norm: 0.8706759627930007, iteration: 28301
loss: 1.0166066884994507,grad_norm: 0.9999994513527581, iteration: 28302
loss: 1.0096409320831299,grad_norm: 0.9788966763878454, iteration: 28303
loss: 1.0134581327438354,grad_norm: 0.9699610463999652, iteration: 28304
loss: 1.0161492824554443,grad_norm: 0.9999991301466308, iteration: 28305
loss: 0.9757047891616821,grad_norm: 0.9625903238025825, iteration: 28306
loss: 0.9871723651885986,grad_norm: 0.9153090512878419, iteration: 28307
loss: 1.0007585287094116,grad_norm: 0.9999990842937873, iteration: 28308
loss: 0.9985519647598267,grad_norm: 0.9999990323352564, iteration: 28309
loss: 1.0409554243087769,grad_norm: 0.9797828985311856, iteration: 28310
loss: 1.042609691619873,grad_norm: 0.9888663360019505, iteration: 28311
loss: 1.0239020586013794,grad_norm: 0.9999994939307493, iteration: 28312
loss: 1.020212173461914,grad_norm: 0.9999996657438062, iteration: 28313
loss: 0.9748189449310303,grad_norm: 0.9907885663454922, iteration: 28314
loss: 1.0059007406234741,grad_norm: 0.999999220901113, iteration: 28315
loss: 1.0518476963043213,grad_norm: 0.9999994997713071, iteration: 28316
loss: 1.0328136682510376,grad_norm: 0.9999997919990381, iteration: 28317
loss: 1.026626706123352,grad_norm: 0.911361215182043, iteration: 28318
loss: 1.081782341003418,grad_norm: 0.9999997533475041, iteration: 28319
loss: 0.9911778569221497,grad_norm: 0.9999991543943103, iteration: 28320
loss: 0.9949594140052795,grad_norm: 0.9999992310940948, iteration: 28321
loss: 0.9939197301864624,grad_norm: 0.9949685780554646, iteration: 28322
loss: 0.9889737367630005,grad_norm: 0.9999991085679902, iteration: 28323
loss: 1.0037431716918945,grad_norm: 0.9711455603083462, iteration: 28324
loss: 0.9832793474197388,grad_norm: 0.8556937860604928, iteration: 28325
loss: 1.0349860191345215,grad_norm: 0.9999992705644349, iteration: 28326
loss: 0.9764207601547241,grad_norm: 0.9875500895529454, iteration: 28327
loss: 1.0531259775161743,grad_norm: 0.9999994407494528, iteration: 28328
loss: 1.0840915441513062,grad_norm: 0.9999998615631451, iteration: 28329
loss: 1.0092588663101196,grad_norm: 0.999998923775865, iteration: 28330
loss: 1.0037740468978882,grad_norm: 0.9968770257031134, iteration: 28331
loss: 1.0272780656814575,grad_norm: 0.9999990481378209, iteration: 28332
loss: 1.0050853490829468,grad_norm: 0.9999990480122555, iteration: 28333
loss: 1.0513163805007935,grad_norm: 0.9999991332623643, iteration: 28334
loss: 1.036790370941162,grad_norm: 0.9999992033164082, iteration: 28335
loss: 1.0244801044464111,grad_norm: 0.9999991507840853, iteration: 28336
loss: 1.0110719203948975,grad_norm: 0.9999992393226342, iteration: 28337
loss: 1.0063567161560059,grad_norm: 0.9882966554450625, iteration: 28338
loss: 1.027423620223999,grad_norm: 0.9705557618515104, iteration: 28339
loss: 1.020344614982605,grad_norm: 0.9016537169862167, iteration: 28340
loss: 1.042940616607666,grad_norm: 0.9999991986587705, iteration: 28341
loss: 1.030164122581482,grad_norm: 0.9990775687933763, iteration: 28342
loss: 1.0477268695831299,grad_norm: 0.9999994414858495, iteration: 28343
loss: 1.0142319202423096,grad_norm: 0.9999990795283362, iteration: 28344
loss: 1.0058505535125732,grad_norm: 0.9999990775625344, iteration: 28345
loss: 0.9995546936988831,grad_norm: 0.9777630439553887, iteration: 28346
loss: 0.9746516346931458,grad_norm: 0.9246595057534406, iteration: 28347
loss: 1.0468308925628662,grad_norm: 0.9999991001449033, iteration: 28348
loss: 1.012652039527893,grad_norm: 0.9999989935456954, iteration: 28349
loss: 1.0198101997375488,grad_norm: 0.9999990220880273, iteration: 28350
loss: 1.0399972200393677,grad_norm: 0.9999991706107028, iteration: 28351
loss: 1.0402348041534424,grad_norm: 0.9999992335135178, iteration: 28352
loss: 0.9892085194587708,grad_norm: 0.9536545327156737, iteration: 28353
loss: 1.0213862657546997,grad_norm: 0.9999991248041794, iteration: 28354
loss: 1.0480324029922485,grad_norm: 0.9999991460505885, iteration: 28355
loss: 1.0209516286849976,grad_norm: 0.8576847649334358, iteration: 28356
loss: 1.0212024450302124,grad_norm: 0.9999993584342298, iteration: 28357
loss: 1.0063316822052002,grad_norm: 0.9384799622243182, iteration: 28358
loss: 1.0192005634307861,grad_norm: 0.9155070151934261, iteration: 28359
loss: 1.0198498964309692,grad_norm: 0.9818185221080596, iteration: 28360
loss: 1.0289089679718018,grad_norm: 0.8113795349293947, iteration: 28361
loss: 1.0209124088287354,grad_norm: 0.8123565858727994, iteration: 28362
loss: 1.018479824066162,grad_norm: 0.999999547009074, iteration: 28363
loss: 0.9745428562164307,grad_norm: 0.9338381584827393, iteration: 28364
loss: 1.0777019262313843,grad_norm: 0.9999991492152348, iteration: 28365
loss: 1.0260751247406006,grad_norm: 0.9999993894321848, iteration: 28366
loss: 0.9930547475814819,grad_norm: 0.999999152831062, iteration: 28367
loss: 1.020806908607483,grad_norm: 0.9239928014982616, iteration: 28368
loss: 1.0085636377334595,grad_norm: 0.9905942120688807, iteration: 28369
loss: 0.9845308661460876,grad_norm: 0.9999990738156466, iteration: 28370
loss: 1.0104833841323853,grad_norm: 0.9596434122886894, iteration: 28371
loss: 0.9923893809318542,grad_norm: 0.99999899321052, iteration: 28372
loss: 0.9865550994873047,grad_norm: 0.9999994579050938, iteration: 28373
loss: 1.0389959812164307,grad_norm: 0.9999994898314383, iteration: 28374
loss: 0.9846071004867554,grad_norm: 0.8703615601747624, iteration: 28375
loss: 1.0407705307006836,grad_norm: 0.9960062099005491, iteration: 28376
loss: 1.0224268436431885,grad_norm: 0.9999989441962611, iteration: 28377
loss: 1.0534355640411377,grad_norm: 0.9999997899947889, iteration: 28378
loss: 1.0431125164031982,grad_norm: 0.8571251377577678, iteration: 28379
loss: 1.0412098169326782,grad_norm: 0.9999992787276359, iteration: 28380
loss: 1.0092120170593262,grad_norm: 0.9999992338980035, iteration: 28381
loss: 0.9899102449417114,grad_norm: 0.9999991570482115, iteration: 28382
loss: 1.0451598167419434,grad_norm: 0.9999996359998119, iteration: 28383
loss: 1.0459015369415283,grad_norm: 0.9999993848675632, iteration: 28384
loss: 1.0028266906738281,grad_norm: 0.9999992566787532, iteration: 28385
loss: 0.9972207546234131,grad_norm: 0.9999994005864342, iteration: 28386
loss: 1.034941554069519,grad_norm: 0.9999991184545174, iteration: 28387
loss: 0.9865298867225647,grad_norm: 0.9999993363406117, iteration: 28388
loss: 0.9984328746795654,grad_norm: 0.9999991019979249, iteration: 28389
loss: 1.0370049476623535,grad_norm: 0.9999995124547598, iteration: 28390
loss: 1.0172359943389893,grad_norm: 0.9999994674032338, iteration: 28391
loss: 1.0061737298965454,grad_norm: 0.9999991174338009, iteration: 28392
loss: 0.988711416721344,grad_norm: 0.9999992651719427, iteration: 28393
loss: 1.0286046266555786,grad_norm: 0.9999991991588106, iteration: 28394
loss: 1.0262142419815063,grad_norm: 0.9678914162040516, iteration: 28395
loss: 0.9841610193252563,grad_norm: 0.9999991893320069, iteration: 28396
loss: 1.0373774766921997,grad_norm: 0.9999994790507701, iteration: 28397
loss: 1.0408742427825928,grad_norm: 0.9950318243895345, iteration: 28398
loss: 1.0734615325927734,grad_norm: 0.999999577938678, iteration: 28399
loss: 1.0253448486328125,grad_norm: 0.9999993527960929, iteration: 28400
loss: 1.0690361261367798,grad_norm: 0.9999993364140033, iteration: 28401
loss: 1.1381127834320068,grad_norm: 0.9999999315239338, iteration: 28402
loss: 0.9871253967285156,grad_norm: 0.999999275868536, iteration: 28403
loss: 1.0200504064559937,grad_norm: 0.9999990567132764, iteration: 28404
loss: 1.0557475090026855,grad_norm: 0.9999992780658802, iteration: 28405
loss: 1.0186519622802734,grad_norm: 0.9294679995506768, iteration: 28406
loss: 1.0377086400985718,grad_norm: 0.9999995817750912, iteration: 28407
loss: 1.0323143005371094,grad_norm: 0.959891238924858, iteration: 28408
loss: 0.9927422404289246,grad_norm: 0.9999993371023875, iteration: 28409
loss: 0.9836567640304565,grad_norm: 0.9999993334556981, iteration: 28410
loss: 1.013301968574524,grad_norm: 0.8519515771443906, iteration: 28411
loss: 1.1237019300460815,grad_norm: 0.999999051716311, iteration: 28412
loss: 1.0388524532318115,grad_norm: 0.9999990723881771, iteration: 28413
loss: 1.049855351448059,grad_norm: 0.9999993762274252, iteration: 28414
loss: 1.0042290687561035,grad_norm: 0.9999989997491759, iteration: 28415
loss: 1.001671314239502,grad_norm: 0.893381523677611, iteration: 28416
loss: 0.9983593821525574,grad_norm: 0.9999992365196102, iteration: 28417
loss: 1.055044412612915,grad_norm: 0.9999991052407776, iteration: 28418
loss: 1.0292960405349731,grad_norm: 0.9999992335246858, iteration: 28419
loss: 1.002726674079895,grad_norm: 0.8924683892140831, iteration: 28420
loss: 1.107932448387146,grad_norm: 0.9999991573829669, iteration: 28421
loss: 1.0358903408050537,grad_norm: 0.9999989733260516, iteration: 28422
loss: 1.0217969417572021,grad_norm: 0.9075313570188589, iteration: 28423
loss: 1.0073966979980469,grad_norm: 0.8707576313128185, iteration: 28424
loss: 1.012192726135254,grad_norm: 0.9788225037883855, iteration: 28425
loss: 1.066287875175476,grad_norm: 0.9999993711950376, iteration: 28426
loss: 1.0098612308502197,grad_norm: 0.892720848759542, iteration: 28427
loss: 1.0306540727615356,grad_norm: 0.9749334395444018, iteration: 28428
loss: 0.9871968030929565,grad_norm: 0.9489123649611286, iteration: 28429
loss: 0.9697771072387695,grad_norm: 0.8448764963762339, iteration: 28430
loss: 0.9943432807922363,grad_norm: 0.9999990929698344, iteration: 28431
loss: 0.9974240064620972,grad_norm: 0.9999992018453053, iteration: 28432
loss: 1.0207809209823608,grad_norm: 0.9999996332145649, iteration: 28433
loss: 0.9514504075050354,grad_norm: 0.9999990578778037, iteration: 28434
loss: 0.9839680790901184,grad_norm: 0.9999990201690819, iteration: 28435
loss: 1.0314996242523193,grad_norm: 0.9999993465349435, iteration: 28436
loss: 1.017675757408142,grad_norm: 0.9999992740298407, iteration: 28437
loss: 0.975249707698822,grad_norm: 0.8966572605005153, iteration: 28438
loss: 0.9994263052940369,grad_norm: 0.9027398518992898, iteration: 28439
loss: 1.008225917816162,grad_norm: 0.9747413820089457, iteration: 28440
loss: 0.9793438911437988,grad_norm: 0.999999242362356, iteration: 28441
loss: 0.9983076453208923,grad_norm: 0.9999997402912851, iteration: 28442
loss: 0.9822431206703186,grad_norm: 0.9999990629474217, iteration: 28443
loss: 1.1096304655075073,grad_norm: 0.9999994313068672, iteration: 28444
loss: 1.0785537958145142,grad_norm: 0.9999994850678207, iteration: 28445
loss: 1.0235432386398315,grad_norm: 0.9332127304922728, iteration: 28446
loss: 1.0082899332046509,grad_norm: 0.9999993108389639, iteration: 28447
loss: 0.9801705479621887,grad_norm: 0.9230628913245422, iteration: 28448
loss: 0.9954941272735596,grad_norm: 0.9999991074555962, iteration: 28449
loss: 0.977384090423584,grad_norm: 0.8682152662100977, iteration: 28450
loss: 0.9932218790054321,grad_norm: 0.9565771053077937, iteration: 28451
loss: 0.9818755388259888,grad_norm: 0.9999990059794476, iteration: 28452
loss: 1.006555438041687,grad_norm: 0.9210480601902044, iteration: 28453
loss: 0.9975598454475403,grad_norm: 0.9999989996311385, iteration: 28454
loss: 0.9833751320838928,grad_norm: 0.8377465324643685, iteration: 28455
loss: 1.0183079242706299,grad_norm: 0.9999993874866047, iteration: 28456
loss: 1.0396685600280762,grad_norm: 0.9999991545529155, iteration: 28457
loss: 0.9716692566871643,grad_norm: 0.9999990718246731, iteration: 28458
loss: 1.0034786462783813,grad_norm: 0.9266356378671224, iteration: 28459
loss: 0.9708619713783264,grad_norm: 0.8892776763445608, iteration: 28460
loss: 1.0159002542495728,grad_norm: 0.9999995334408879, iteration: 28461
loss: 0.9679597616195679,grad_norm: 0.9999991375154136, iteration: 28462
loss: 1.040210247039795,grad_norm: 0.9999993689797386, iteration: 28463
loss: 1.0467759370803833,grad_norm: 0.9918841919740624, iteration: 28464
loss: 1.1498892307281494,grad_norm: 0.9999998918719981, iteration: 28465
loss: 1.0079352855682373,grad_norm: 0.999999010780267, iteration: 28466
loss: 1.030430555343628,grad_norm: 0.9880359689233909, iteration: 28467
loss: 1.0230278968811035,grad_norm: 0.9999994587943775, iteration: 28468
loss: 0.9759021997451782,grad_norm: 0.9999990573144794, iteration: 28469
loss: 1.0260950326919556,grad_norm: 0.8948401907209503, iteration: 28470
loss: 1.0170060396194458,grad_norm: 0.9999992662628778, iteration: 28471
loss: 0.9739614725112915,grad_norm: 0.9528080478387089, iteration: 28472
loss: 0.9585200548171997,grad_norm: 0.9999989939515318, iteration: 28473
loss: 1.049975872039795,grad_norm: 0.925313704872612, iteration: 28474
loss: 1.0139732360839844,grad_norm: 0.9999992276955724, iteration: 28475
loss: 0.9795935750007629,grad_norm: 0.9186126194890388, iteration: 28476
loss: 1.0224542617797852,grad_norm: 0.9999989752270654, iteration: 28477
loss: 1.015504240989685,grad_norm: 0.9999991433596246, iteration: 28478
loss: 1.040292739868164,grad_norm: 0.9999990955620897, iteration: 28479
loss: 0.966225802898407,grad_norm: 0.999998955281624, iteration: 28480
loss: 1.021641731262207,grad_norm: 0.9185570654865475, iteration: 28481
loss: 1.010319709777832,grad_norm: 0.9999991590942214, iteration: 28482
loss: 1.022247552871704,grad_norm: 0.9958187189874872, iteration: 28483
loss: 1.0069712400436401,grad_norm: 0.9703712149966328, iteration: 28484
loss: 1.00167977809906,grad_norm: 0.9861646611848971, iteration: 28485
loss: 1.0582653284072876,grad_norm: 0.9999997002207646, iteration: 28486
loss: 1.020797610282898,grad_norm: 0.9754028060122365, iteration: 28487
loss: 1.020980715751648,grad_norm: 0.7544538477329712, iteration: 28488
loss: 0.9805194139480591,grad_norm: 0.9999990585275936, iteration: 28489
loss: 1.0032707452774048,grad_norm: 0.7906932686057805, iteration: 28490
loss: 0.9999189972877502,grad_norm: 0.8793394230756743, iteration: 28491
loss: 1.0461169481277466,grad_norm: 0.9999995059560151, iteration: 28492
loss: 1.001197338104248,grad_norm: 0.9999991504671102, iteration: 28493
loss: 1.0212656259536743,grad_norm: 0.9999992038783473, iteration: 28494
loss: 0.9990168809890747,grad_norm: 0.9999992275401363, iteration: 28495
loss: 1.0015122890472412,grad_norm: 0.9999989965418598, iteration: 28496
loss: 1.0178899765014648,grad_norm: 0.9999999337321945, iteration: 28497
loss: 0.9993669390678406,grad_norm: 0.9999991089430126, iteration: 28498
loss: 1.0054212808609009,grad_norm: 0.9660998829318189, iteration: 28499
loss: 1.030442714691162,grad_norm: 0.9995428354654281, iteration: 28500
loss: 1.0388576984405518,grad_norm: 0.9841707525188255, iteration: 28501
loss: 1.0156567096710205,grad_norm: 0.9999992285492347, iteration: 28502
loss: 1.0958319902420044,grad_norm: 0.9999995642119678, iteration: 28503
loss: 0.9979979395866394,grad_norm: 0.9901103267539315, iteration: 28504
loss: 1.0384840965270996,grad_norm: 0.9826472808062667, iteration: 28505
loss: 1.0757333040237427,grad_norm: 0.9999992797947462, iteration: 28506
loss: 1.0376579761505127,grad_norm: 0.9999993337269955, iteration: 28507
loss: 1.003805160522461,grad_norm: 0.9999990596714721, iteration: 28508
loss: 0.9717559218406677,grad_norm: 0.999999770008193, iteration: 28509
loss: 0.9909199476242065,grad_norm: 0.9999990246725774, iteration: 28510
loss: 1.0205811262130737,grad_norm: 0.9999993664797765, iteration: 28511
loss: 1.0072956085205078,grad_norm: 0.9999997673955697, iteration: 28512
loss: 1.0026648044586182,grad_norm: 0.7825456818634298, iteration: 28513
loss: 1.004164457321167,grad_norm: 0.8931626804352234, iteration: 28514
loss: 1.022534966468811,grad_norm: 0.9999994184161294, iteration: 28515
loss: 1.022798776626587,grad_norm: 0.9999992096053631, iteration: 28516
loss: 1.0106321573257446,grad_norm: 0.9999990967591625, iteration: 28517
loss: 0.9937362670898438,grad_norm: 0.9999993105482243, iteration: 28518
loss: 1.0033570528030396,grad_norm: 0.9999991240050159, iteration: 28519
loss: 1.0201326608657837,grad_norm: 0.999999320133555, iteration: 28520
loss: 1.0308401584625244,grad_norm: 0.9999990583220267, iteration: 28521
loss: 1.0477358102798462,grad_norm: 0.999999479775123, iteration: 28522
loss: 1.0334548950195312,grad_norm: 0.9999994479767168, iteration: 28523
loss: 1.023676872253418,grad_norm: 0.9999991727475301, iteration: 28524
loss: 1.030094027519226,grad_norm: 0.999999116166928, iteration: 28525
loss: 0.9916896820068359,grad_norm: 0.8896670919981298, iteration: 28526
loss: 0.9644650816917419,grad_norm: 0.9999991007601531, iteration: 28527
loss: 0.9955255389213562,grad_norm: 0.8804711420468715, iteration: 28528
loss: 0.9890532493591309,grad_norm: 0.9999990170432393, iteration: 28529
loss: 1.0324347019195557,grad_norm: 0.9247097055534655, iteration: 28530
loss: 1.0109918117523193,grad_norm: 0.9999995118909454, iteration: 28531
loss: 1.0155105590820312,grad_norm: 0.9999997904296917, iteration: 28532
loss: 1.039043664932251,grad_norm: 0.9999991058494416, iteration: 28533
loss: 0.9999149441719055,grad_norm: 0.9259136098898013, iteration: 28534
loss: 1.0059449672698975,grad_norm: 0.9999991203565123, iteration: 28535
loss: 0.9879385828971863,grad_norm: 0.999999119846069, iteration: 28536
loss: 0.9760706424713135,grad_norm: 0.9999991681562033, iteration: 28537
loss: 1.0049115419387817,grad_norm: 0.9999990378433081, iteration: 28538
loss: 0.9851356148719788,grad_norm: 0.9011586272506298, iteration: 28539
loss: 0.9741109013557434,grad_norm: 0.7954077318927134, iteration: 28540
loss: 1.0013428926467896,grad_norm: 0.8973999111021034, iteration: 28541
loss: 1.0128357410430908,grad_norm: 0.9999991481995634, iteration: 28542
loss: 0.9900562167167664,grad_norm: 0.8195434168518739, iteration: 28543
loss: 1.0287238359451294,grad_norm: 0.9762916037663986, iteration: 28544
loss: 1.0574142932891846,grad_norm: 0.9999995089990067, iteration: 28545
loss: 0.9816544651985168,grad_norm: 0.9999991107293116, iteration: 28546
loss: 1.044434905052185,grad_norm: 0.9999993959170602, iteration: 28547
loss: 1.0112229585647583,grad_norm: 0.9109314615778232, iteration: 28548
loss: 0.9985001087188721,grad_norm: 0.9999991523037782, iteration: 28549
loss: 0.9648129940032959,grad_norm: 0.9999990211118311, iteration: 28550
loss: 1.0291789770126343,grad_norm: 0.9999994842511252, iteration: 28551
loss: 1.026881217956543,grad_norm: 0.9999990719693345, iteration: 28552
loss: 0.9892582297325134,grad_norm: 0.9999991398955574, iteration: 28553
loss: 1.0120768547058105,grad_norm: 0.9818570232316287, iteration: 28554
loss: 0.9810271859169006,grad_norm: 0.9514609904210614, iteration: 28555
loss: 1.0382375717163086,grad_norm: 0.9999991917779448, iteration: 28556
loss: 0.9713866114616394,grad_norm: 0.9999992657160506, iteration: 28557
loss: 1.0314480066299438,grad_norm: 0.99999918383075, iteration: 28558
loss: 1.0480252504348755,grad_norm: 0.9999990651728495, iteration: 28559
loss: 1.0264627933502197,grad_norm: 0.9545553051386089, iteration: 28560
loss: 1.026715636253357,grad_norm: 0.871177867475631, iteration: 28561
loss: 0.9942255020141602,grad_norm: 0.8997800702458453, iteration: 28562
loss: 1.0290560722351074,grad_norm: 0.9987932957302077, iteration: 28563
loss: 1.0515152215957642,grad_norm: 0.999999534737314, iteration: 28564
loss: 0.9990725517272949,grad_norm: 0.9999991526999366, iteration: 28565
loss: 1.0443447828292847,grad_norm: 0.9999990760518702, iteration: 28566
loss: 1.0145325660705566,grad_norm: 0.9999995376032829, iteration: 28567
loss: 0.9894506931304932,grad_norm: 0.9037730855722069, iteration: 28568
loss: 0.9829016327857971,grad_norm: 0.9474817185473637, iteration: 28569
loss: 0.9793306589126587,grad_norm: 0.9587161829037653, iteration: 28570
loss: 1.0198336839675903,grad_norm: 0.9999994711949622, iteration: 28571
loss: 1.019155502319336,grad_norm: 0.9205715769949061, iteration: 28572
loss: 0.9741045236587524,grad_norm: 0.9999992037585601, iteration: 28573
loss: 1.0483758449554443,grad_norm: 0.9999996267000361, iteration: 28574
loss: 1.0223674774169922,grad_norm: 0.9999995291498592, iteration: 28575
loss: 0.9717403650283813,grad_norm: 0.9999990410517734, iteration: 28576
loss: 0.9810486435890198,grad_norm: 0.9999991366176215, iteration: 28577
loss: 0.9779766201972961,grad_norm: 0.8613564396202278, iteration: 28578
loss: 0.991986870765686,grad_norm: 0.8415317625375199, iteration: 28579
loss: 1.0315881967544556,grad_norm: 0.9982771238589847, iteration: 28580
loss: 1.0944485664367676,grad_norm: 0.9999997990083961, iteration: 28581
loss: 1.056227684020996,grad_norm: 0.9758840574146334, iteration: 28582
loss: 1.0027363300323486,grad_norm: 0.999999042138095, iteration: 28583
loss: 1.0361031293869019,grad_norm: 0.9999997699410044, iteration: 28584
loss: 1.0068401098251343,grad_norm: 0.9999991302514633, iteration: 28585
loss: 1.0754255056381226,grad_norm: 0.9999995546814188, iteration: 28586
loss: 0.9750761389732361,grad_norm: 0.9999993547021927, iteration: 28587
loss: 1.0288546085357666,grad_norm: 0.999999175533942, iteration: 28588
loss: 1.0164752006530762,grad_norm: 0.999999142290799, iteration: 28589
loss: 1.0322662591934204,grad_norm: 0.9999995274781889, iteration: 28590
loss: 1.0506551265716553,grad_norm: 0.9999989833750731, iteration: 28591
loss: 0.9974262118339539,grad_norm: 0.9999999006025506, iteration: 28592
loss: 1.0101704597473145,grad_norm: 0.9999990979348417, iteration: 28593
loss: 1.014552354812622,grad_norm: 0.9999990700884119, iteration: 28594
loss: 1.0636719465255737,grad_norm: 0.9999992503368512, iteration: 28595
loss: 1.0056520700454712,grad_norm: 0.9697169222371661, iteration: 28596
loss: 0.9647884368896484,grad_norm: 0.9999989567573248, iteration: 28597
loss: 1.0344440937042236,grad_norm: 0.9999991229425352, iteration: 28598
loss: 0.9879416227340698,grad_norm: 0.9999992859715001, iteration: 28599
loss: 0.9975946545600891,grad_norm: 0.9999992922830896, iteration: 28600
loss: 1.0219584703445435,grad_norm: 0.9999992430798387, iteration: 28601
loss: 1.0038772821426392,grad_norm: 0.9999991599429883, iteration: 28602
loss: 0.9480878710746765,grad_norm: 0.9999991642761817, iteration: 28603
loss: 1.0195389986038208,grad_norm: 0.9999990890699392, iteration: 28604
loss: 1.1957974433898926,grad_norm: 0.999999682811994, iteration: 28605
loss: 1.0374090671539307,grad_norm: 0.9321286475497543, iteration: 28606
loss: 1.0075627565383911,grad_norm: 0.8661023253372152, iteration: 28607
loss: 1.0232864618301392,grad_norm: 0.9999992651822639, iteration: 28608
loss: 1.022812008857727,grad_norm: 0.9999997601564915, iteration: 28609
loss: 1.0682954788208008,grad_norm: 0.999999854880926, iteration: 28610
loss: 1.0527740716934204,grad_norm: 0.9999997903646386, iteration: 28611
loss: 0.976294219493866,grad_norm: 0.9999992324034761, iteration: 28612
loss: 0.9873706698417664,grad_norm: 0.8676796020601862, iteration: 28613
loss: 1.0032860040664673,grad_norm: 0.9609129027261707, iteration: 28614
loss: 0.974722683429718,grad_norm: 0.9501524311915648, iteration: 28615
loss: 1.0253535509109497,grad_norm: 0.9999998480725002, iteration: 28616
loss: 0.9774715900421143,grad_norm: 0.9999991405735571, iteration: 28617
loss: 0.9871559739112854,grad_norm: 0.8990385079424889, iteration: 28618
loss: 1.0194729566574097,grad_norm: 0.9586472715757081, iteration: 28619
loss: 1.003806471824646,grad_norm: 0.8998693280418656, iteration: 28620
loss: 0.9986588358879089,grad_norm: 0.8589578594890347, iteration: 28621
loss: 1.0275888442993164,grad_norm: 0.9999990912060494, iteration: 28622
loss: 1.0244572162628174,grad_norm: 0.999999133532453, iteration: 28623
loss: 1.0002180337905884,grad_norm: 0.9999993184768252, iteration: 28624
loss: 1.0042424201965332,grad_norm: 0.9636045371670331, iteration: 28625
loss: 1.024149775505066,grad_norm: 0.9742906355699308, iteration: 28626
loss: 1.02800452709198,grad_norm: 0.9999991787062913, iteration: 28627
loss: 1.01921808719635,grad_norm: 0.9968532678492652, iteration: 28628
loss: 1.0573370456695557,grad_norm: 0.9999998256569367, iteration: 28629
loss: 1.0072938203811646,grad_norm: 0.9999993071891077, iteration: 28630
loss: 1.0248390436172485,grad_norm: 0.999999335101287, iteration: 28631
loss: 1.1143115758895874,grad_norm: 0.9999998917022015, iteration: 28632
loss: 1.0305132865905762,grad_norm: 0.9999993817138824, iteration: 28633
loss: 1.0057450532913208,grad_norm: 0.8932731693632731, iteration: 28634
loss: 1.0301283597946167,grad_norm: 0.9999993750061705, iteration: 28635
loss: 1.0395182371139526,grad_norm: 0.9999993633388725, iteration: 28636
loss: 1.0032823085784912,grad_norm: 0.9677459932783378, iteration: 28637
loss: 1.030809998512268,grad_norm: 0.9999991010276237, iteration: 28638
loss: 0.9848926067352295,grad_norm: 0.9166881730565063, iteration: 28639
loss: 1.0776407718658447,grad_norm: 0.9999996817057454, iteration: 28640
loss: 0.9877253770828247,grad_norm: 0.9999990024191047, iteration: 28641
loss: 1.0320444107055664,grad_norm: 0.8560969354382573, iteration: 28642
loss: 0.9987145066261292,grad_norm: 0.9999992629017318, iteration: 28643
loss: 1.0514826774597168,grad_norm: 0.9999993286794954, iteration: 28644
loss: 1.012339472770691,grad_norm: 0.9999992797807679, iteration: 28645
loss: 1.003531575202942,grad_norm: 0.8672600216595521, iteration: 28646
loss: 1.0362086296081543,grad_norm: 0.9999992072883713, iteration: 28647
loss: 1.0360443592071533,grad_norm: 0.9471395554669438, iteration: 28648
loss: 0.9904499053955078,grad_norm: 0.9999993865790296, iteration: 28649
loss: 1.0119959115982056,grad_norm: 0.8238890865083924, iteration: 28650
loss: 0.9610673785209656,grad_norm: 0.9999990725900466, iteration: 28651
loss: 1.0539295673370361,grad_norm: 0.9999998628736788, iteration: 28652
loss: 1.0689979791641235,grad_norm: 0.9999996039136296, iteration: 28653
loss: 1.0297143459320068,grad_norm: 0.9250827342533806, iteration: 28654
loss: 0.9829514622688293,grad_norm: 0.9999994966962309, iteration: 28655
loss: 0.9767017364501953,grad_norm: 0.9999991468623363, iteration: 28656
loss: 1.0906968116760254,grad_norm: 0.999999407255115, iteration: 28657
loss: 0.9770020246505737,grad_norm: 0.9753294526213122, iteration: 28658
loss: 1.0037611722946167,grad_norm: 0.9999992602091073, iteration: 28659
loss: 1.0206968784332275,grad_norm: 0.99999905076658, iteration: 28660
loss: 1.0362409353256226,grad_norm: 0.9999997379810655, iteration: 28661
loss: 1.080509901046753,grad_norm: 0.9999996727927974, iteration: 28662
loss: 0.963089644908905,grad_norm: 0.8718796260016932, iteration: 28663
loss: 0.9703114032745361,grad_norm: 0.950652232396492, iteration: 28664
loss: 0.990301251411438,grad_norm: 0.9999991548070303, iteration: 28665
loss: 1.0598517656326294,grad_norm: 0.9999995741491076, iteration: 28666
loss: 1.029988408088684,grad_norm: 0.9999991801919341, iteration: 28667
loss: 1.0817917585372925,grad_norm: 0.9999990512876087, iteration: 28668
loss: 1.0336265563964844,grad_norm: 0.9999996876365128, iteration: 28669
loss: 1.0455594062805176,grad_norm: 0.9999993713966447, iteration: 28670
loss: 0.9860578775405884,grad_norm: 0.9999992734778342, iteration: 28671
loss: 1.075571060180664,grad_norm: 0.999999525632292, iteration: 28672
loss: 1.0289310216903687,grad_norm: 0.9999990219077627, iteration: 28673
loss: 1.0235722064971924,grad_norm: 0.9999992079758453, iteration: 28674
loss: 1.0321921110153198,grad_norm: 0.9344012808309476, iteration: 28675
loss: 0.9927691221237183,grad_norm: 0.9950797433768052, iteration: 28676
loss: 0.9810242652893066,grad_norm: 0.9999997506759277, iteration: 28677
loss: 1.0380140542984009,grad_norm: 0.999999450674406, iteration: 28678
loss: 1.0308198928833008,grad_norm: 0.9204532745723297, iteration: 28679
loss: 0.9830899834632874,grad_norm: 0.9818947460342143, iteration: 28680
loss: 1.0055617094039917,grad_norm: 0.9621659605955221, iteration: 28681
loss: 0.9642227292060852,grad_norm: 0.9999990863226543, iteration: 28682
loss: 1.033013939857483,grad_norm: 0.9999991346694823, iteration: 28683
loss: 0.9631159901618958,grad_norm: 0.9999993946914137, iteration: 28684
loss: 1.025887131690979,grad_norm: 0.9026160833683015, iteration: 28685
loss: 1.0270359516143799,grad_norm: 0.9999990517767019, iteration: 28686
loss: 1.002092719078064,grad_norm: 0.9999991544311643, iteration: 28687
loss: 1.0130107402801514,grad_norm: 0.9567208520119961, iteration: 28688
loss: 0.9860524535179138,grad_norm: 0.9999991689942132, iteration: 28689
loss: 1.0200042724609375,grad_norm: 0.8740656999781353, iteration: 28690
loss: 1.0114631652832031,grad_norm: 0.9999992137724264, iteration: 28691
loss: 1.0527575016021729,grad_norm: 0.9999996526437299, iteration: 28692
loss: 1.019010305404663,grad_norm: 0.9999990246275026, iteration: 28693
loss: 1.0623302459716797,grad_norm: 0.9999991547141573, iteration: 28694
loss: 0.9993059039115906,grad_norm: 0.9999992372591017, iteration: 28695
loss: 1.0296825170516968,grad_norm: 0.9999991541858468, iteration: 28696
loss: 1.0355557203292847,grad_norm: 0.9999990889690791, iteration: 28697
loss: 1.0031611919403076,grad_norm: 0.9999990774256873, iteration: 28698
loss: 1.0146582126617432,grad_norm: 0.9999990730424291, iteration: 28699
loss: 1.0102554559707642,grad_norm: 0.7817627247955902, iteration: 28700
loss: 0.9893615245819092,grad_norm: 0.9518044252861724, iteration: 28701
loss: 1.0080928802490234,grad_norm: 0.9999992192711312, iteration: 28702
loss: 1.0484575033187866,grad_norm: 0.9999994472733286, iteration: 28703
loss: 1.0016173124313354,grad_norm: 0.8810979575722558, iteration: 28704
loss: 1.025901198387146,grad_norm: 0.89405854234747, iteration: 28705
loss: 0.9797451496124268,grad_norm: 0.8515003646019441, iteration: 28706
loss: 1.0021783113479614,grad_norm: 0.8218467895566828, iteration: 28707
loss: 1.0180119276046753,grad_norm: 0.9999992001210113, iteration: 28708
loss: 0.986901044845581,grad_norm: 0.999999145199763, iteration: 28709
loss: 0.989231526851654,grad_norm: 0.9269748443697583, iteration: 28710
loss: 1.0426757335662842,grad_norm: 0.9999990913844187, iteration: 28711
loss: 1.0123168230056763,grad_norm: 0.8253176401006861, iteration: 28712
loss: 1.0310364961624146,grad_norm: 0.9965498991860221, iteration: 28713
loss: 1.0329484939575195,grad_norm: 0.9999994375200953, iteration: 28714
loss: 1.0072449445724487,grad_norm: 0.9285226900596049, iteration: 28715
loss: 1.0049045085906982,grad_norm: 0.8866665418062947, iteration: 28716
loss: 1.0456770658493042,grad_norm: 0.9999990795449513, iteration: 28717
loss: 1.0111377239227295,grad_norm: 0.9999989801585433, iteration: 28718
loss: 0.9633690714836121,grad_norm: 0.9999990962620277, iteration: 28719
loss: 1.0099390745162964,grad_norm: 0.9999999057753348, iteration: 28720
loss: 0.9873841404914856,grad_norm: 0.9429064790773243, iteration: 28721
loss: 1.0079777240753174,grad_norm: 0.9999994634874029, iteration: 28722
loss: 1.0237523317337036,grad_norm: 0.9416493717166425, iteration: 28723
loss: 0.981665313243866,grad_norm: 0.9999991923191196, iteration: 28724
loss: 1.044541358947754,grad_norm: 0.9999990986831714, iteration: 28725
loss: 0.9812353849411011,grad_norm: 0.8174664109448784, iteration: 28726
loss: 1.0222337245941162,grad_norm: 0.9999990839403109, iteration: 28727
loss: 0.985346257686615,grad_norm: 0.9999990678826401, iteration: 28728
loss: 0.991371214389801,grad_norm: 0.9999993733599208, iteration: 28729
loss: 1.0300241708755493,grad_norm: 0.9999990588765786, iteration: 28730
loss: 0.9637932777404785,grad_norm: 0.9999991617505206, iteration: 28731
loss: 1.0143382549285889,grad_norm: 0.9999991040282521, iteration: 28732
loss: 1.0114048719406128,grad_norm: 0.9999991326770048, iteration: 28733
loss: 0.9799039959907532,grad_norm: 0.995950702781026, iteration: 28734
loss: 0.987409770488739,grad_norm: 0.9999991973085136, iteration: 28735
loss: 0.9957046508789062,grad_norm: 0.9999991806098538, iteration: 28736
loss: 1.0117822885513306,grad_norm: 0.9939533210782799, iteration: 28737
loss: 1.023154377937317,grad_norm: 0.9999993565448494, iteration: 28738
loss: 1.0426139831542969,grad_norm: 0.9999994731041142, iteration: 28739
loss: 0.9694614410400391,grad_norm: 0.8837573863690551, iteration: 28740
loss: 1.0392827987670898,grad_norm: 0.9999992733567714, iteration: 28741
loss: 0.984551727771759,grad_norm: 0.9999992388207718, iteration: 28742
loss: 1.0996004343032837,grad_norm: 0.9999998677115873, iteration: 28743
loss: 1.0310981273651123,grad_norm: 0.9698464327616093, iteration: 28744
loss: 1.0419083833694458,grad_norm: 0.9999991390848483, iteration: 28745
loss: 1.0447572469711304,grad_norm: 0.9999989601043484, iteration: 28746
loss: 1.039103627204895,grad_norm: 0.9492474841259615, iteration: 28747
loss: 0.9819239377975464,grad_norm: 0.9999992167625875, iteration: 28748
loss: 0.9956759214401245,grad_norm: 0.9067198409567625, iteration: 28749
loss: 1.1109496355056763,grad_norm: 0.9999999348434937, iteration: 28750
loss: 1.016462802886963,grad_norm: 0.9999990289485342, iteration: 28751
loss: 1.0172157287597656,grad_norm: 0.9999991532330641, iteration: 28752
loss: 1.0090819597244263,grad_norm: 0.9708809325142793, iteration: 28753
loss: 0.9927992820739746,grad_norm: 0.9855960828812866, iteration: 28754
loss: 1.0131806135177612,grad_norm: 0.999999117273455, iteration: 28755
loss: 0.9996179342269897,grad_norm: 0.9999991942444003, iteration: 28756
loss: 1.0899789333343506,grad_norm: 0.9999994148476679, iteration: 28757
loss: 0.9678043127059937,grad_norm: 0.8721362462172637, iteration: 28758
loss: 0.9764541387557983,grad_norm: 0.9999992016575491, iteration: 28759
loss: 1.0650793313980103,grad_norm: 0.9999996240266652, iteration: 28760
loss: 0.9835469126701355,grad_norm: 0.9999995534405787, iteration: 28761
loss: 1.0046037435531616,grad_norm: 0.8390869560657268, iteration: 28762
loss: 0.9604812860488892,grad_norm: 0.9999992665958842, iteration: 28763
loss: 1.0077564716339111,grad_norm: 0.8987323795570521, iteration: 28764
loss: 1.0246928930282593,grad_norm: 0.9999991980622259, iteration: 28765
loss: 1.040555715560913,grad_norm: 0.9864469516798879, iteration: 28766
loss: 1.0310752391815186,grad_norm: 0.9999991742711021, iteration: 28767
loss: 0.9836964011192322,grad_norm: 0.9999990964117992, iteration: 28768
loss: 1.0166574716567993,grad_norm: 0.9999991752884025, iteration: 28769
loss: 0.9981302618980408,grad_norm: 0.9999995098714005, iteration: 28770
loss: 0.9973188042640686,grad_norm: 0.8681545977118549, iteration: 28771
loss: 1.036702275276184,grad_norm: 0.9999992701292164, iteration: 28772
loss: 0.9947905540466309,grad_norm: 0.9999990239575592, iteration: 28773
loss: 0.9872074127197266,grad_norm: 0.8920781914413398, iteration: 28774
loss: 1.01322603225708,grad_norm: 0.9808069024609328, iteration: 28775
loss: 1.136305570602417,grad_norm: 0.9999992765293115, iteration: 28776
loss: 1.0381547212600708,grad_norm: 0.9999991095365814, iteration: 28777
loss: 0.9775336980819702,grad_norm: 0.9999992545134035, iteration: 28778
loss: 1.0528061389923096,grad_norm: 0.999999961687746, iteration: 28779
loss: 1.0046616792678833,grad_norm: 0.914327682777605, iteration: 28780
loss: 1.0171352624893188,grad_norm: 0.9999990049647844, iteration: 28781
loss: 1.010817527770996,grad_norm: 0.982531032287678, iteration: 28782
loss: 1.0404136180877686,grad_norm: 0.7862708225225519, iteration: 28783
loss: 1.0179742574691772,grad_norm: 0.9999990820065707, iteration: 28784
loss: 0.9999520778656006,grad_norm: 0.9466477323773116, iteration: 28785
loss: 0.9811344742774963,grad_norm: 0.7993599560429219, iteration: 28786
loss: 1.21175217628479,grad_norm: 0.9999997388974722, iteration: 28787
loss: 1.0118696689605713,grad_norm: 0.9685928710689887, iteration: 28788
loss: 1.0635102987289429,grad_norm: 0.9055667142274639, iteration: 28789
loss: 1.0034312009811401,grad_norm: 0.9999992773370325, iteration: 28790
loss: 0.9983603954315186,grad_norm: 0.8479435814490773, iteration: 28791
loss: 1.0117582082748413,grad_norm: 0.9999992192194175, iteration: 28792
loss: 1.0133123397827148,grad_norm: 0.7708250533358292, iteration: 28793
loss: 0.9968044757843018,grad_norm: 0.999999211321053, iteration: 28794
loss: 1.022767186164856,grad_norm: 0.8503298702198994, iteration: 28795
loss: 1.0015872716903687,grad_norm: 0.8694027227726563, iteration: 28796
loss: 1.015893578529358,grad_norm: 0.9999990825851459, iteration: 28797
loss: 0.997890293598175,grad_norm: 0.9207639855229195, iteration: 28798
loss: 1.0060498714447021,grad_norm: 0.9999991762530357, iteration: 28799
loss: 1.051546573638916,grad_norm: 0.9999990093194527, iteration: 28800
loss: 1.0857728719711304,grad_norm: 0.9999995370485937, iteration: 28801
loss: 0.9717263579368591,grad_norm: 0.9667037596700725, iteration: 28802
loss: 0.9994468092918396,grad_norm: 0.8983088519520439, iteration: 28803
loss: 0.9978105425834656,grad_norm: 0.9739570309942991, iteration: 28804
loss: 1.1205220222473145,grad_norm: 0.9999993679987828, iteration: 28805
loss: 0.992497980594635,grad_norm: 0.8973152754617302, iteration: 28806
loss: 0.9671630859375,grad_norm: 0.9100115328324976, iteration: 28807
loss: 1.0845246315002441,grad_norm: 0.9999992397136669, iteration: 28808
loss: 1.0107039213180542,grad_norm: 0.9999993029761967, iteration: 28809
loss: 1.0474603176116943,grad_norm: 0.999999095910147, iteration: 28810
loss: 1.0401182174682617,grad_norm: 0.999999580553845, iteration: 28811
loss: 0.9825794696807861,grad_norm: 0.9600140625570148, iteration: 28812
loss: 1.0386579036712646,grad_norm: 0.9999992069338006, iteration: 28813
loss: 0.9955257177352905,grad_norm: 0.9999991097374373, iteration: 28814
loss: 1.006431221961975,grad_norm: 0.999999242469303, iteration: 28815
loss: 1.0297737121582031,grad_norm: 0.9999994447142949, iteration: 28816
loss: 1.006705403327942,grad_norm: 0.9417415989615286, iteration: 28817
loss: 0.9987615346908569,grad_norm: 0.9999992975219144, iteration: 28818
loss: 1.0619041919708252,grad_norm: 0.9999997675538723, iteration: 28819
loss: 1.0186017751693726,grad_norm: 0.999999375012483, iteration: 28820
loss: 0.9698580503463745,grad_norm: 0.9999990732208732, iteration: 28821
loss: 1.0135396718978882,grad_norm: 0.9752829813957649, iteration: 28822
loss: 1.0099852085113525,grad_norm: 0.8453665618115426, iteration: 28823
loss: 1.0268410444259644,grad_norm: 0.9598731298776643, iteration: 28824
loss: 1.0036542415618896,grad_norm: 0.7960879114828717, iteration: 28825
loss: 1.0488054752349854,grad_norm: 0.9465751259113488, iteration: 28826
loss: 0.981200635433197,grad_norm: 0.9998957889516286, iteration: 28827
loss: 1.0302751064300537,grad_norm: 0.8412974854375262, iteration: 28828
loss: 0.9907082915306091,grad_norm: 0.9649077299842127, iteration: 28829
loss: 1.0053943395614624,grad_norm: 0.9999991619688721, iteration: 28830
loss: 1.0145152807235718,grad_norm: 0.999999083630862, iteration: 28831
loss: 1.0020489692687988,grad_norm: 0.8474607156113012, iteration: 28832
loss: 1.0246751308441162,grad_norm: 0.9740305290125392, iteration: 28833
loss: 1.0152394771575928,grad_norm: 0.9999993445955976, iteration: 28834
loss: 0.9528152346611023,grad_norm: 0.999999100650753, iteration: 28835
loss: 1.003995656967163,grad_norm: 0.9999991181799418, iteration: 28836
loss: 1.0290546417236328,grad_norm: 0.9574578121926174, iteration: 28837
loss: 1.03079092502594,grad_norm: 0.9999993037004603, iteration: 28838
loss: 1.0386022329330444,grad_norm: 0.804861371422119, iteration: 28839
loss: 1.0116033554077148,grad_norm: 0.9999990479362187, iteration: 28840
loss: 1.035496711730957,grad_norm: 0.9230134628384975, iteration: 28841
loss: 1.027137041091919,grad_norm: 0.9999996074021601, iteration: 28842
loss: 0.9821410179138184,grad_norm: 0.9194885445125485, iteration: 28843
loss: 1.135903000831604,grad_norm: 0.9999995755054595, iteration: 28844
loss: 0.9960001111030579,grad_norm: 0.9698103434392888, iteration: 28845
loss: 0.9919939041137695,grad_norm: 0.9999991166668392, iteration: 28846
loss: 1.0115283727645874,grad_norm: 0.9999991162075007, iteration: 28847
loss: 0.9913692474365234,grad_norm: 0.8041754689736437, iteration: 28848
loss: 0.9811221957206726,grad_norm: 0.999999051458732, iteration: 28849
loss: 0.9869169592857361,grad_norm: 0.847873401802209, iteration: 28850
loss: 1.0467240810394287,grad_norm: 0.9999992563172551, iteration: 28851
loss: 1.0030310153961182,grad_norm: 0.9942333663487241, iteration: 28852
loss: 1.0117149353027344,grad_norm: 0.9999990533928266, iteration: 28853
loss: 1.0287281274795532,grad_norm: 0.9999993360662909, iteration: 28854
loss: 1.007497787475586,grad_norm: 0.9050920947599176, iteration: 28855
loss: 1.0240947008132935,grad_norm: 0.9999990643479526, iteration: 28856
loss: 0.9835087656974792,grad_norm: 0.9999994377454772, iteration: 28857
loss: 1.0082212686538696,grad_norm: 0.8688008552647377, iteration: 28858
loss: 0.9931011199951172,grad_norm: 0.9527401054450644, iteration: 28859
loss: 0.9913638830184937,grad_norm: 0.9999992640050173, iteration: 28860
loss: 1.0656750202178955,grad_norm: 0.9999991893605695, iteration: 28861
loss: 0.9824584722518921,grad_norm: 0.9999991220317551, iteration: 28862
loss: 1.0324851274490356,grad_norm: 0.9687443727759939, iteration: 28863
loss: 0.993100643157959,grad_norm: 0.8423294400033962, iteration: 28864
loss: 1.0136032104492188,grad_norm: 0.9999994351866252, iteration: 28865
loss: 0.9543117880821228,grad_norm: 0.838261469834827, iteration: 28866
loss: 1.0022468566894531,grad_norm: 0.9999990661388551, iteration: 28867
loss: 1.0514954328536987,grad_norm: 0.9999997148079298, iteration: 28868
loss: 1.0090067386627197,grad_norm: 0.9999992586455542, iteration: 28869
loss: 1.0423521995544434,grad_norm: 0.9999993025073516, iteration: 28870
loss: 1.0478363037109375,grad_norm: 0.9999992256485751, iteration: 28871
loss: 1.0022715330123901,grad_norm: 0.9999992088719879, iteration: 28872
loss: 0.9680190682411194,grad_norm: 0.8792242369905104, iteration: 28873
loss: 1.0122159719467163,grad_norm: 0.9999989547086083, iteration: 28874
loss: 1.0204004049301147,grad_norm: 0.9912485131005607, iteration: 28875
loss: 1.0152555704116821,grad_norm: 0.8527825044625094, iteration: 28876
loss: 1.0324841737747192,grad_norm: 0.9999991011070867, iteration: 28877
loss: 1.018057942390442,grad_norm: 0.9889444980082861, iteration: 28878
loss: 1.0475564002990723,grad_norm: 0.9999991755896345, iteration: 28879
loss: 1.035570502281189,grad_norm: 0.9999992341954242, iteration: 28880
loss: 0.9876396656036377,grad_norm: 0.9999991444341595, iteration: 28881
loss: 0.9767950773239136,grad_norm: 0.9999990482381387, iteration: 28882
loss: 0.9997333288192749,grad_norm: 0.8851932353459689, iteration: 28883
loss: 0.9761112332344055,grad_norm: 0.9441633036012487, iteration: 28884
loss: 1.05771803855896,grad_norm: 0.9710108582414897, iteration: 28885
loss: 1.0547846555709839,grad_norm: 0.9999990321999217, iteration: 28886
loss: 1.0054938793182373,grad_norm: 0.9999994278121314, iteration: 28887
loss: 0.9812408685684204,grad_norm: 0.9348568646470712, iteration: 28888
loss: 0.9995511770248413,grad_norm: 0.999999387478698, iteration: 28889
loss: 1.006902813911438,grad_norm: 0.99999924397341, iteration: 28890
loss: 0.9789132475852966,grad_norm: 0.9509280017337096, iteration: 28891
loss: 0.9779676198959351,grad_norm: 0.8946944012561383, iteration: 28892
loss: 1.036514163017273,grad_norm: 0.9999991612339063, iteration: 28893
loss: 1.039819359779358,grad_norm: 0.9999992054668464, iteration: 28894
loss: 0.9637595415115356,grad_norm: 0.9999992006703605, iteration: 28895
loss: 1.0135819911956787,grad_norm: 0.9999990510064521, iteration: 28896
loss: 1.018856406211853,grad_norm: 0.9999992271613691, iteration: 28897
loss: 0.9708049297332764,grad_norm: 0.9999996683422743, iteration: 28898
loss: 0.9986402988433838,grad_norm: 0.8700862309014659, iteration: 28899
loss: 0.9803627729415894,grad_norm: 0.999910891018785, iteration: 28900
loss: 0.9981417059898376,grad_norm: 0.9237242085635051, iteration: 28901
loss: 1.0042214393615723,grad_norm: 0.9970023127699359, iteration: 28902
loss: 1.0096170902252197,grad_norm: 0.9559276426227077, iteration: 28903
loss: 1.032392144203186,grad_norm: 0.999999723954406, iteration: 28904
loss: 1.0269855260849,grad_norm: 0.9680754888475906, iteration: 28905
loss: 1.0401098728179932,grad_norm: 0.964563931152114, iteration: 28906
loss: 1.0507621765136719,grad_norm: 0.9999994984249491, iteration: 28907
loss: 0.9932482242584229,grad_norm: 0.7922443699343599, iteration: 28908
loss: 1.0514644384384155,grad_norm: 0.9999994310590801, iteration: 28909
loss: 1.0160518884658813,grad_norm: 0.8514385149836183, iteration: 28910
loss: 1.012437343597412,grad_norm: 0.9999991542796461, iteration: 28911
loss: 1.0315595865249634,grad_norm: 0.9999995981574902, iteration: 28912
loss: 1.076719045639038,grad_norm: 0.9999993240603104, iteration: 28913
loss: 1.0173689126968384,grad_norm: 0.8902615823699808, iteration: 28914
loss: 0.9945839047431946,grad_norm: 0.999999714202312, iteration: 28915
loss: 1.0206456184387207,grad_norm: 0.9999991799311074, iteration: 28916
loss: 1.0466666221618652,grad_norm: 0.9999992248837957, iteration: 28917
loss: 1.0476009845733643,grad_norm: 0.9249278680508709, iteration: 28918
loss: 1.0477502346038818,grad_norm: 0.9999991453106183, iteration: 28919
loss: 0.9934861660003662,grad_norm: 0.9460362890365928, iteration: 28920
loss: 1.0355693101882935,grad_norm: 0.999999524397223, iteration: 28921
loss: 1.0169687271118164,grad_norm: 0.945932607106869, iteration: 28922
loss: 1.010536789894104,grad_norm: 0.9999992640834788, iteration: 28923
loss: 1.037731409072876,grad_norm: 0.9164273432759767, iteration: 28924
loss: 1.0148524045944214,grad_norm: 0.9999996308430951, iteration: 28925
loss: 0.995269238948822,grad_norm: 0.9999991181361018, iteration: 28926
loss: 0.9844554662704468,grad_norm: 0.9622059299860085, iteration: 28927
loss: 1.0091402530670166,grad_norm: 0.9157617031948397, iteration: 28928
loss: 1.0634769201278687,grad_norm: 0.999999381618018, iteration: 28929
loss: 1.03355073928833,grad_norm: 0.8815708455944312, iteration: 28930
loss: 1.0052284002304077,grad_norm: 0.9740422811956517, iteration: 28931
loss: 1.058434247970581,grad_norm: 0.9999996657733877, iteration: 28932
loss: 1.0637236833572388,grad_norm: 0.9999991717592642, iteration: 28933
loss: 1.0222036838531494,grad_norm: 0.9999995370816284, iteration: 28934
loss: 0.9922894835472107,grad_norm: 0.8806217799786281, iteration: 28935
loss: 1.0473756790161133,grad_norm: 0.9999995792728897, iteration: 28936
loss: 0.9896861910820007,grad_norm: 0.9999993864659954, iteration: 28937
loss: 1.0040820837020874,grad_norm: 0.8999775879094551, iteration: 28938
loss: 1.0984992980957031,grad_norm: 0.9999998988479056, iteration: 28939
loss: 0.9958268404006958,grad_norm: 0.9904434866281641, iteration: 28940
loss: 1.0110273361206055,grad_norm: 0.9118991707658501, iteration: 28941
loss: 1.0064619779586792,grad_norm: 0.9999989009228698, iteration: 28942
loss: 1.0504049062728882,grad_norm: 0.999999175723894, iteration: 28943
loss: 1.0212676525115967,grad_norm: 0.9999991875922206, iteration: 28944
loss: 0.9913883209228516,grad_norm: 0.9999993806441729, iteration: 28945
loss: 1.057431697845459,grad_norm: 0.999999398531385, iteration: 28946
loss: 0.9992154240608215,grad_norm: 0.9999990760563332, iteration: 28947
loss: 1.0203863382339478,grad_norm: 0.9999989362444116, iteration: 28948
loss: 1.0245174169540405,grad_norm: 0.9999991121179986, iteration: 28949
loss: 0.9886798858642578,grad_norm: 0.99999904225826, iteration: 28950
loss: 1.0882991552352905,grad_norm: 0.9770236695280974, iteration: 28951
loss: 1.0138262510299683,grad_norm: 0.9999991644800177, iteration: 28952
loss: 1.0319856405258179,grad_norm: 0.9028298688503149, iteration: 28953
loss: 1.0746270418167114,grad_norm: 0.9999998070168162, iteration: 28954
loss: 1.0143909454345703,grad_norm: 0.999999054913057, iteration: 28955
loss: 1.021999478340149,grad_norm: 0.9378128427574958, iteration: 28956
loss: 1.0235873460769653,grad_norm: 0.9239317241230822, iteration: 28957
loss: 1.0204908847808838,grad_norm: 0.9999995097417387, iteration: 28958
loss: 1.0300395488739014,grad_norm: 0.9999999224442244, iteration: 28959
loss: 1.0387731790542603,grad_norm: 0.9999998821570495, iteration: 28960
loss: 1.011365532875061,grad_norm: 0.8847827315857844, iteration: 28961
loss: 0.9923860430717468,grad_norm: 0.9556923485114097, iteration: 28962
loss: 0.9976065754890442,grad_norm: 0.9999991592032565, iteration: 28963
loss: 1.0373893976211548,grad_norm: 0.9999998052995647, iteration: 28964
loss: 1.0382165908813477,grad_norm: 0.9999994540404638, iteration: 28965
loss: 1.0202245712280273,grad_norm: 0.9999998502649996, iteration: 28966
loss: 1.0113041400909424,grad_norm: 0.999999207572956, iteration: 28967
loss: 0.9777764678001404,grad_norm: 0.999999153347881, iteration: 28968
loss: 1.0413519144058228,grad_norm: 0.9999995547480943, iteration: 28969
loss: 1.0062631368637085,grad_norm: 0.84624310504265, iteration: 28970
loss: 1.0548958778381348,grad_norm: 0.9999991530399776, iteration: 28971
loss: 1.0222748517990112,grad_norm: 0.9999989428410049, iteration: 28972
loss: 0.9798064231872559,grad_norm: 0.9999989585472406, iteration: 28973
loss: 0.9992212653160095,grad_norm: 0.9579163218075343, iteration: 28974
loss: 0.9786398410797119,grad_norm: 0.9999991510847662, iteration: 28975
loss: 1.0045313835144043,grad_norm: 0.9999996333536898, iteration: 28976
loss: 0.9927793741226196,grad_norm: 0.983823187111527, iteration: 28977
loss: 1.1705058813095093,grad_norm: 0.9999998011728486, iteration: 28978
loss: 0.9947279095649719,grad_norm: 0.9999991898801232, iteration: 28979
loss: 0.996852695941925,grad_norm: 0.9720873980731453, iteration: 28980
loss: 1.0344690084457397,grad_norm: 0.8067278221713806, iteration: 28981
loss: 0.9872840046882629,grad_norm: 0.9999993517592796, iteration: 28982
loss: 0.977942943572998,grad_norm: 0.9999992062760968, iteration: 28983
loss: 1.032662272453308,grad_norm: 0.9999990661171432, iteration: 28984
loss: 1.0412390232086182,grad_norm: 0.9184048812650144, iteration: 28985
loss: 1.0343190431594849,grad_norm: 0.9999989870193046, iteration: 28986
loss: 1.0027791261672974,grad_norm: 0.9999991496188168, iteration: 28987
loss: 1.0044736862182617,grad_norm: 0.9999991493715807, iteration: 28988
loss: 1.0009636878967285,grad_norm: 0.9999992700888136, iteration: 28989
loss: 0.9986445307731628,grad_norm: 0.8814728876226436, iteration: 28990
loss: 1.0128026008605957,grad_norm: 0.999999372437, iteration: 28991
loss: 0.9616885781288147,grad_norm: 0.907996658370332, iteration: 28992
loss: 1.0061924457550049,grad_norm: 0.8336162350532431, iteration: 28993
loss: 0.9691599011421204,grad_norm: 0.9999991726203121, iteration: 28994
loss: 1.0453547239303589,grad_norm: 0.9999994206106411, iteration: 28995
loss: 1.006564736366272,grad_norm: 0.9604566533147618, iteration: 28996
loss: 1.031624674797058,grad_norm: 0.8772933493653997, iteration: 28997
loss: 1.0107438564300537,grad_norm: 0.9192361416024635, iteration: 28998
loss: 0.9831143617630005,grad_norm: 0.934345463478113, iteration: 28999
loss: 0.9960059523582458,grad_norm: 0.9999991533614196, iteration: 29000
loss: 0.9874939918518066,grad_norm: 0.9999992136126544, iteration: 29001
loss: 0.9838576912879944,grad_norm: 0.9834074751913737, iteration: 29002
loss: 0.9743340015411377,grad_norm: 0.9359403141733194, iteration: 29003
loss: 0.9765798449516296,grad_norm: 0.9999990588634542, iteration: 29004
loss: 1.1176104545593262,grad_norm: 0.9999990057568163, iteration: 29005
loss: 0.9860560297966003,grad_norm: 0.9999996064898758, iteration: 29006
loss: 0.9449251890182495,grad_norm: 0.999999204523209, iteration: 29007
loss: 1.0304769277572632,grad_norm: 0.999999389623831, iteration: 29008
loss: 1.0196627378463745,grad_norm: 0.9135289921553904, iteration: 29009
loss: 0.9825981259346008,grad_norm: 0.9880891045363404, iteration: 29010
loss: 1.061169981956482,grad_norm: 0.9999991796467597, iteration: 29011
loss: 0.9885519742965698,grad_norm: 0.9999991438487434, iteration: 29012
loss: 0.9568460583686829,grad_norm: 0.9999991390303947, iteration: 29013
loss: 1.0875213146209717,grad_norm: 0.9999998566606433, iteration: 29014
loss: 0.9957990050315857,grad_norm: 0.8400346662272266, iteration: 29015
loss: 0.997935950756073,grad_norm: 0.7651510223061855, iteration: 29016
loss: 0.9905821084976196,grad_norm: 0.9999992487094136, iteration: 29017
loss: 1.037386178970337,grad_norm: 0.9285882145904443, iteration: 29018
loss: 1.0484713315963745,grad_norm: 0.9999997672603111, iteration: 29019
loss: 1.0709558725357056,grad_norm: 0.9999993084271894, iteration: 29020
loss: 0.9875656962394714,grad_norm: 0.9999991138363475, iteration: 29021
loss: 1.1528373956680298,grad_norm: 0.999999820813544, iteration: 29022
loss: 0.9763733148574829,grad_norm: 0.9085960135600676, iteration: 29023
loss: 0.9995311498641968,grad_norm: 0.9999993414223388, iteration: 29024
loss: 1.0685341358184814,grad_norm: 0.9999994379348824, iteration: 29025
loss: 1.03009831905365,grad_norm: 0.9999997495346074, iteration: 29026
loss: 1.0431559085845947,grad_norm: 0.9999991930636374, iteration: 29027
loss: 0.9835686683654785,grad_norm: 0.9169588339014374, iteration: 29028
loss: 1.0219794511795044,grad_norm: 0.9999990804417498, iteration: 29029
loss: 1.0306406021118164,grad_norm: 0.9427900306553254, iteration: 29030
loss: 1.0281163454055786,grad_norm: 0.8859841360532854, iteration: 29031
loss: 0.9935636520385742,grad_norm: 0.8078647717823505, iteration: 29032
loss: 1.3993228673934937,grad_norm: 0.9999997951445406, iteration: 29033
loss: 1.0048143863677979,grad_norm: 0.8804452953793171, iteration: 29034
loss: 1.0387452840805054,grad_norm: 0.9999995834956302, iteration: 29035
loss: 0.9892598986625671,grad_norm: 0.9405152329192681, iteration: 29036
loss: 1.0877878665924072,grad_norm: 1.0000000468334387, iteration: 29037
loss: 1.0359032154083252,grad_norm: 0.8935412199703997, iteration: 29038
loss: 1.066605567932129,grad_norm: 0.9999995075808424, iteration: 29039
loss: 1.0178645849227905,grad_norm: 0.999999562564942, iteration: 29040
loss: 0.9811084866523743,grad_norm: 0.9999989731038165, iteration: 29041
loss: 0.9582375288009644,grad_norm: 0.9999990645957869, iteration: 29042
loss: 1.090380311012268,grad_norm: 0.9999997657223751, iteration: 29043
loss: 0.9869871139526367,grad_norm: 0.8243944121051684, iteration: 29044
loss: 1.025452733039856,grad_norm: 0.999999481194336, iteration: 29045
loss: 1.024571180343628,grad_norm: 0.9522594597348277, iteration: 29046
loss: 1.0152888298034668,grad_norm: 0.9999992156690491, iteration: 29047
loss: 1.0047603845596313,grad_norm: 0.9480537661364793, iteration: 29048
loss: 1.0436125993728638,grad_norm: 0.9999991776252893, iteration: 29049
loss: 1.0172673463821411,grad_norm: 0.919384892878807, iteration: 29050
loss: 1.02876877784729,grad_norm: 0.9999998255616362, iteration: 29051
loss: 1.0334035158157349,grad_norm: 0.9999999115948394, iteration: 29052
loss: 1.043421983718872,grad_norm: 0.999999166017823, iteration: 29053
loss: 0.9879004955291748,grad_norm: 0.9999992222010359, iteration: 29054
loss: 0.9982908368110657,grad_norm: 0.7989914409536117, iteration: 29055
loss: 0.9809548258781433,grad_norm: 0.9999992468083442, iteration: 29056
loss: 0.9620713591575623,grad_norm: 0.999999046934738, iteration: 29057
loss: 1.0478142499923706,grad_norm: 0.9999999031910973, iteration: 29058
loss: 1.0212751626968384,grad_norm: 0.9999989987569311, iteration: 29059
loss: 0.9853695034980774,grad_norm: 0.9999991438746167, iteration: 29060
loss: 1.0368164777755737,grad_norm: 0.9671907662111969, iteration: 29061
loss: 1.0254781246185303,grad_norm: 0.9999994301647697, iteration: 29062
loss: 1.0227655172348022,grad_norm: 0.9999991662123349, iteration: 29063
loss: 1.0073789358139038,grad_norm: 0.9999992045391141, iteration: 29064
loss: 1.0056432485580444,grad_norm: 0.9999989409181699, iteration: 29065
loss: 1.0032950639724731,grad_norm: 0.9611164014752033, iteration: 29066
loss: 1.0340934991836548,grad_norm: 0.999999018070202, iteration: 29067
loss: 1.0890556573867798,grad_norm: 0.9186739734811397, iteration: 29068
loss: 1.0277419090270996,grad_norm: 0.9999990825561121, iteration: 29069
loss: 1.1027010679244995,grad_norm: 0.9999997431246049, iteration: 29070
loss: 1.0129822492599487,grad_norm: 0.9999990572974735, iteration: 29071
loss: 1.0555331707000732,grad_norm: 0.9999991345364219, iteration: 29072
loss: 1.0326181650161743,grad_norm: 0.999999570118287, iteration: 29073
loss: 1.0042543411254883,grad_norm: 0.9999992230285901, iteration: 29074
loss: 1.1056175231933594,grad_norm: 0.9999995517993067, iteration: 29075
loss: 1.0027282238006592,grad_norm: 0.9999993663128852, iteration: 29076
loss: 1.0057307481765747,grad_norm: 0.8734713090488346, iteration: 29077
loss: 0.9966707825660706,grad_norm: 0.9538689350746067, iteration: 29078
loss: 1.045331358909607,grad_norm: 0.9999990309801455, iteration: 29079
loss: 0.9804031252861023,grad_norm: 0.9999991413061731, iteration: 29080
loss: 1.0139809846878052,grad_norm: 0.9999991187379905, iteration: 29081
loss: 0.9880899786949158,grad_norm: 0.9999992184205122, iteration: 29082
loss: 1.008933424949646,grad_norm: 0.9999995844526101, iteration: 29083
loss: 0.9746077060699463,grad_norm: 0.9283169495303093, iteration: 29084
loss: 1.0027170181274414,grad_norm: 0.9999990776991663, iteration: 29085
loss: 1.0136593580245972,grad_norm: 0.9740333540110162, iteration: 29086
loss: 1.0389117002487183,grad_norm: 0.9999992918180909, iteration: 29087
loss: 1.0304646492004395,grad_norm: 0.9710423157861375, iteration: 29088
loss: 1.02360999584198,grad_norm: 0.9999993345055571, iteration: 29089
loss: 0.9986886978149414,grad_norm: 0.9999991284189457, iteration: 29090
loss: 1.012574553489685,grad_norm: 0.9357473015655572, iteration: 29091
loss: 0.9688292145729065,grad_norm: 0.9999991420730815, iteration: 29092
loss: 1.0306065082550049,grad_norm: 0.9999991110951922, iteration: 29093
loss: 1.091245412826538,grad_norm: 0.9999997143930301, iteration: 29094
loss: 0.9557201862335205,grad_norm: 0.9846649448854744, iteration: 29095
loss: 1.0485979318618774,grad_norm: 0.9999993751747466, iteration: 29096
loss: 0.9819486737251282,grad_norm: 0.9312098505449226, iteration: 29097
loss: 1.0076704025268555,grad_norm: 0.9999991588929916, iteration: 29098
loss: 0.9805244207382202,grad_norm: 0.9146825893318291, iteration: 29099
loss: 1.0174061059951782,grad_norm: 0.9999991790705175, iteration: 29100
loss: 1.0380465984344482,grad_norm: 0.9999996595450616, iteration: 29101
loss: 1.168701410293579,grad_norm: 0.999999745968001, iteration: 29102
loss: 1.019302487373352,grad_norm: 0.9999990875717717, iteration: 29103
loss: 1.0300010442733765,grad_norm: 0.9999997129301855, iteration: 29104
loss: 0.9801898002624512,grad_norm: 0.9719543314800149, iteration: 29105
loss: 1.0179990530014038,grad_norm: 0.9305578283130891, iteration: 29106
loss: 0.9982063174247742,grad_norm: 0.8528032441045962, iteration: 29107
loss: 0.9672312140464783,grad_norm: 0.9999990753531692, iteration: 29108
loss: 1.0074363946914673,grad_norm: 0.964026986365247, iteration: 29109
loss: 1.0485107898712158,grad_norm: 0.9999994637152925, iteration: 29110
loss: 1.0085668563842773,grad_norm: 0.9999996049723104, iteration: 29111
loss: 1.0221576690673828,grad_norm: 0.9999994505748572, iteration: 29112
loss: 1.0194475650787354,grad_norm: 0.9823911464952189, iteration: 29113
loss: 0.9473360776901245,grad_norm: 0.9999991165064577, iteration: 29114
loss: 1.0236380100250244,grad_norm: 0.9999996961582694, iteration: 29115
loss: 1.0559338331222534,grad_norm: 0.9999993167659998, iteration: 29116
loss: 1.0296584367752075,grad_norm: 0.9999991576852031, iteration: 29117
loss: 0.9958488345146179,grad_norm: 0.8247380280553744, iteration: 29118
loss: 1.0456568002700806,grad_norm: 0.9999995891715099, iteration: 29119
loss: 0.9602993726730347,grad_norm: 0.9999990684159048, iteration: 29120
loss: 1.0430594682693481,grad_norm: 0.9999996292510849, iteration: 29121
loss: 1.0356296300888062,grad_norm: 0.9999992506278549, iteration: 29122
loss: 1.0074920654296875,grad_norm: 0.9999991356309658, iteration: 29123
loss: 0.9796954393386841,grad_norm: 0.9359789816544013, iteration: 29124
loss: 1.0195403099060059,grad_norm: 0.9999994154597535, iteration: 29125
loss: 1.0661594867706299,grad_norm: 0.9999995631788938, iteration: 29126
loss: 0.9564030170440674,grad_norm: 0.999999221575272, iteration: 29127
loss: 1.0450104475021362,grad_norm: 0.9999991811351416, iteration: 29128
loss: 1.0540660619735718,grad_norm: 0.9999992487919127, iteration: 29129
loss: 1.0384650230407715,grad_norm: 0.9999998692353571, iteration: 29130
loss: 0.9826464056968689,grad_norm: 0.9999990918046877, iteration: 29131
loss: 0.9984778761863708,grad_norm: 0.9011918740001766, iteration: 29132
loss: 0.9917148351669312,grad_norm: 0.9999990763754425, iteration: 29133
loss: 1.0584146976470947,grad_norm: 0.9999996126912102, iteration: 29134
loss: 1.0246213674545288,grad_norm: 0.9999992519633383, iteration: 29135
loss: 0.9919627904891968,grad_norm: 0.9637415451437037, iteration: 29136
loss: 0.9688486456871033,grad_norm: 0.9999990716147663, iteration: 29137
loss: 1.033494234085083,grad_norm: 0.9450458727363175, iteration: 29138
loss: 1.0100603103637695,grad_norm: 0.944103533381588, iteration: 29139
loss: 1.0699001550674438,grad_norm: 0.9999995899373698, iteration: 29140
loss: 0.996433675289154,grad_norm: 0.9999996033415748, iteration: 29141
loss: 1.0287057161331177,grad_norm: 0.9999994527168804, iteration: 29142
loss: 1.0469062328338623,grad_norm: 0.9126037695500954, iteration: 29143
loss: 1.035483479499817,grad_norm: 0.8868955193134114, iteration: 29144
loss: 1.0294065475463867,grad_norm: 0.9999990417768451, iteration: 29145
loss: 1.0254732370376587,grad_norm: 0.9004602604669374, iteration: 29146
loss: 1.017169713973999,grad_norm: 0.9999992169719892, iteration: 29147
loss: 1.016721248626709,grad_norm: 0.9999990524403375, iteration: 29148
loss: 1.0205034017562866,grad_norm: 0.9009390067116659, iteration: 29149
loss: 1.0350154638290405,grad_norm: 0.9999990569056042, iteration: 29150
loss: 0.9572665691375732,grad_norm: 0.8448376233311813, iteration: 29151
loss: 1.0209990739822388,grad_norm: 0.9491247904474596, iteration: 29152
loss: 1.0051875114440918,grad_norm: 0.999999761107903, iteration: 29153
loss: 1.0217324495315552,grad_norm: 0.9999994479217296, iteration: 29154
loss: 0.9941790699958801,grad_norm: 0.9999990568509629, iteration: 29155
loss: 1.0587618350982666,grad_norm: 0.9999992117828902, iteration: 29156
loss: 1.0203132629394531,grad_norm: 0.9546403760184216, iteration: 29157
loss: 0.9819163680076599,grad_norm: 0.9527789633188063, iteration: 29158
loss: 1.0249731540679932,grad_norm: 0.9999995905586665, iteration: 29159
loss: 0.9830062985420227,grad_norm: 0.911914855882695, iteration: 29160
loss: 1.0359143018722534,grad_norm: 0.9999993651212102, iteration: 29161
loss: 0.983589231967926,grad_norm: 0.9999995972885624, iteration: 29162
loss: 1.0273088216781616,grad_norm: 0.9999990698354019, iteration: 29163
loss: 1.011803150177002,grad_norm: 0.9999991163697056, iteration: 29164
loss: 0.9898144006729126,grad_norm: 0.9999990613764855, iteration: 29165
loss: 0.9915295839309692,grad_norm: 0.9210943223765071, iteration: 29166
loss: 1.0094789266586304,grad_norm: 0.999999090381422, iteration: 29167
loss: 1.0059211254119873,grad_norm: 0.9999990349939004, iteration: 29168
loss: 1.0837630033493042,grad_norm: 0.9999998716427722, iteration: 29169
loss: 1.0142203569412231,grad_norm: 0.9937866474103452, iteration: 29170
loss: 1.0166796445846558,grad_norm: 0.8246333599034048, iteration: 29171
loss: 0.9947029948234558,grad_norm: 0.999999081375822, iteration: 29172
loss: 0.994841456413269,grad_norm: 0.8380946582831608, iteration: 29173
loss: 1.0496838092803955,grad_norm: 0.9999996673051805, iteration: 29174
loss: 1.010499119758606,grad_norm: 0.9999991929844375, iteration: 29175
loss: 1.0083060264587402,grad_norm: 0.9999994709350397, iteration: 29176
loss: 1.0028740167617798,grad_norm: 0.9999993568387776, iteration: 29177
loss: 1.0088353157043457,grad_norm: 0.9528269475999912, iteration: 29178
loss: 1.048479676246643,grad_norm: 0.8405094736445378, iteration: 29179
loss: 1.0669410228729248,grad_norm: 0.9999995528373375, iteration: 29180
loss: 0.9871794581413269,grad_norm: 0.9999996266997561, iteration: 29181
loss: 1.0089845657348633,grad_norm: 0.9941955355977022, iteration: 29182
loss: 1.0521718263626099,grad_norm: 0.9999999309368115, iteration: 29183
loss: 0.975653350353241,grad_norm: 0.9999990108481572, iteration: 29184
loss: 0.9705535173416138,grad_norm: 0.8273500167201794, iteration: 29185
loss: 1.0282108783721924,grad_norm: 0.9999993447042773, iteration: 29186
loss: 1.0743674039840698,grad_norm: 0.9999992439749804, iteration: 29187
loss: 1.0896719694137573,grad_norm: 0.9999993645548061, iteration: 29188
loss: 1.0270588397979736,grad_norm: 0.8490628017283498, iteration: 29189
loss: 1.0788235664367676,grad_norm: 0.9999993536667051, iteration: 29190
loss: 1.025098204612732,grad_norm: 0.9999990468117451, iteration: 29191
loss: 1.0025346279144287,grad_norm: 0.9999990926002827, iteration: 29192
loss: 1.0465291738510132,grad_norm: 0.9999991131684641, iteration: 29193
loss: 1.0147708654403687,grad_norm: 0.9999993846621771, iteration: 29194
loss: 0.988746702671051,grad_norm: 0.9752989417135505, iteration: 29195
loss: 1.0265604257583618,grad_norm: 0.9895881311911395, iteration: 29196
loss: 1.050149917602539,grad_norm: 0.991408008438818, iteration: 29197
loss: 1.0309983491897583,grad_norm: 0.9076280407910383, iteration: 29198
loss: 1.0179061889648438,grad_norm: 0.9999992854520442, iteration: 29199
loss: 0.9859536290168762,grad_norm: 0.820229914851382, iteration: 29200
loss: 0.9991181492805481,grad_norm: 0.999999084282747, iteration: 29201
loss: 1.0607677698135376,grad_norm: 0.9999993622951108, iteration: 29202
loss: 0.9953399896621704,grad_norm: 0.9999992354501558, iteration: 29203
loss: 1.0376646518707275,grad_norm: 0.9999996130628015, iteration: 29204
loss: 0.9838014245033264,grad_norm: 0.9999991204820788, iteration: 29205
loss: 1.0060479640960693,grad_norm: 0.9999992498761145, iteration: 29206
loss: 1.0514177083969116,grad_norm: 0.9999998500381753, iteration: 29207
loss: 0.9917103052139282,grad_norm: 0.999999162838192, iteration: 29208
loss: 1.1780798435211182,grad_norm: 0.9999996524869306, iteration: 29209
loss: 1.0855467319488525,grad_norm: 0.9798531891514685, iteration: 29210
loss: 1.094823956489563,grad_norm: 0.9999992171973597, iteration: 29211
loss: 0.9863401055335999,grad_norm: 0.8104115984958915, iteration: 29212
loss: 1.0459829568862915,grad_norm: 0.999999940583975, iteration: 29213
loss: 0.994875967502594,grad_norm: 0.9583775147104296, iteration: 29214
loss: 1.0633854866027832,grad_norm: 0.9398230909710793, iteration: 29215
loss: 1.0200905799865723,grad_norm: 0.9999991181579269, iteration: 29216
loss: 1.0370596647262573,grad_norm: 0.999999312474714, iteration: 29217
loss: 1.0159952640533447,grad_norm: 0.9999991811700039, iteration: 29218
loss: 1.0031743049621582,grad_norm: 0.9160769468118802, iteration: 29219
loss: 1.0912164449691772,grad_norm: 0.9999990179350599, iteration: 29220
loss: 1.0109827518463135,grad_norm: 0.9999992378467654, iteration: 29221
loss: 1.033537745475769,grad_norm: 0.9999990006135345, iteration: 29222
loss: 1.003951072692871,grad_norm: 0.9999991666138346, iteration: 29223
loss: 1.0223281383514404,grad_norm: 0.8561535575018216, iteration: 29224
loss: 1.0069279670715332,grad_norm: 0.9999990744397041, iteration: 29225
loss: 1.0401827096939087,grad_norm: 0.9999991884616451, iteration: 29226
loss: 1.0069500207901,grad_norm: 0.9999990895004356, iteration: 29227
loss: 1.0827714204788208,grad_norm: 0.9999997616647543, iteration: 29228
loss: 1.0094095468521118,grad_norm: 0.9999991207172598, iteration: 29229
loss: 0.9756362438201904,grad_norm: 0.9003314568918569, iteration: 29230
loss: 1.072654366493225,grad_norm: 0.9999991487857337, iteration: 29231
loss: 1.023833155632019,grad_norm: 0.9999991304602013, iteration: 29232
loss: 1.016254186630249,grad_norm: 0.9651725797338522, iteration: 29233
loss: 1.0047452449798584,grad_norm: 0.8400959088077397, iteration: 29234
loss: 1.014175295829773,grad_norm: 0.9644136582808839, iteration: 29235
loss: 1.023667573928833,grad_norm: 0.9999990908160782, iteration: 29236
loss: 1.0025625228881836,grad_norm: 0.9422488617146263, iteration: 29237
loss: 1.0619282722473145,grad_norm: 0.9999995177264798, iteration: 29238
loss: 0.9954221844673157,grad_norm: 0.9999993352450919, iteration: 29239
loss: 1.0245928764343262,grad_norm: 0.999999069247426, iteration: 29240
loss: 1.0149550437927246,grad_norm: 0.9999989800723585, iteration: 29241
loss: 0.9962288737297058,grad_norm: 0.9999993035096248, iteration: 29242
loss: 0.9733534455299377,grad_norm: 0.9564007018554831, iteration: 29243
loss: 1.0051344633102417,grad_norm: 0.9999991283630264, iteration: 29244
loss: 0.9923386573791504,grad_norm: 0.8727511307157836, iteration: 29245
loss: 1.0191471576690674,grad_norm: 0.9999991483673588, iteration: 29246
loss: 1.022564172744751,grad_norm: 0.9999999209764059, iteration: 29247
loss: 0.9837531447410583,grad_norm: 0.9999989777153961, iteration: 29248
loss: 1.0225087404251099,grad_norm: 0.9999996082516295, iteration: 29249
loss: 0.9876975417137146,grad_norm: 0.9281659511339816, iteration: 29250
loss: 0.9869750738143921,grad_norm: 0.9335302055377882, iteration: 29251
loss: 0.9577962756156921,grad_norm: 0.9800028521535614, iteration: 29252
loss: 1.0133830308914185,grad_norm: 0.8922180935918222, iteration: 29253
loss: 1.0586820840835571,grad_norm: 0.9999991029729814, iteration: 29254
loss: 1.0160541534423828,grad_norm: 0.8913755890806194, iteration: 29255
loss: 1.0557032823562622,grad_norm: 0.9999992357411417, iteration: 29256
loss: 1.001326560974121,grad_norm: 0.9999990415495218, iteration: 29257
loss: 1.017349123954773,grad_norm: 0.9999992965148085, iteration: 29258
loss: 1.0275416374206543,grad_norm: 0.9999993707252525, iteration: 29259
loss: 1.0134772062301636,grad_norm: 0.9999991143166048, iteration: 29260
loss: 1.0203346014022827,grad_norm: 0.740805299604167, iteration: 29261
loss: 1.0239596366882324,grad_norm: 0.7731939019757644, iteration: 29262
loss: 0.980679988861084,grad_norm: 0.9999991259451209, iteration: 29263
loss: 1.0100769996643066,grad_norm: 0.9999990983185288, iteration: 29264
loss: 1.0382683277130127,grad_norm: 0.9999991369089185, iteration: 29265
loss: 0.977799117565155,grad_norm: 0.9690118835626995, iteration: 29266
loss: 1.0100773572921753,grad_norm: 0.9999991704267589, iteration: 29267
loss: 1.0336483716964722,grad_norm: 0.9999990869924341, iteration: 29268
loss: 1.0348795652389526,grad_norm: 0.9999991003408952, iteration: 29269
loss: 1.0301504135131836,grad_norm: 0.9999995267090226, iteration: 29270
loss: 1.0372467041015625,grad_norm: 0.999998963137202, iteration: 29271
loss: 1.003267526626587,grad_norm: 0.9999991833187167, iteration: 29272
loss: 1.0541828870773315,grad_norm: 0.9999994216222974, iteration: 29273
loss: 1.021081805229187,grad_norm: 0.9537086482958397, iteration: 29274
loss: 1.010556936264038,grad_norm: 0.999999062812752, iteration: 29275
loss: 1.0107901096343994,grad_norm: 0.8367537273798273, iteration: 29276
loss: 1.0287657976150513,grad_norm: 0.9999990291816055, iteration: 29277
loss: 0.9672302007675171,grad_norm: 0.874643605777427, iteration: 29278
loss: 1.0387808084487915,grad_norm: 0.9999995774321612, iteration: 29279
loss: 1.0142923593521118,grad_norm: 0.9999990487714279, iteration: 29280
loss: 1.018687129020691,grad_norm: 0.9323339055651838, iteration: 29281
loss: 1.0075479745864868,grad_norm: 0.9999996292939956, iteration: 29282
loss: 0.9724839329719543,grad_norm: 0.9999990392949659, iteration: 29283
loss: 0.9962683320045471,grad_norm: 0.9476252409332753, iteration: 29284
loss: 1.045253872871399,grad_norm: 0.9999990687540093, iteration: 29285
loss: 1.036515474319458,grad_norm: 0.9164742846974778, iteration: 29286
loss: 1.0180838108062744,grad_norm: 0.9999993531342726, iteration: 29287
loss: 0.9984403848648071,grad_norm: 0.9560270246823817, iteration: 29288
loss: 1.0151656866073608,grad_norm: 0.8964501179179415, iteration: 29289
loss: 0.9995118975639343,grad_norm: 0.999999236886765, iteration: 29290
loss: 1.0131351947784424,grad_norm: 0.9999990706975981, iteration: 29291
loss: 1.0115807056427002,grad_norm: 0.778955224374176, iteration: 29292
loss: 1.0398889780044556,grad_norm: 0.9752141955399277, iteration: 29293
loss: 1.0094395875930786,grad_norm: 0.9999991935924757, iteration: 29294
loss: 0.9857499003410339,grad_norm: 0.8999939964269367, iteration: 29295
loss: 1.0328017473220825,grad_norm: 0.9999990715378516, iteration: 29296
loss: 0.9901503920555115,grad_norm: 0.9999992572830323, iteration: 29297
loss: 0.9844886660575867,grad_norm: 0.8974722618999831, iteration: 29298
loss: 1.0114730596542358,grad_norm: 0.9999989758806153, iteration: 29299
loss: 1.038337230682373,grad_norm: 0.9999992558480205, iteration: 29300
loss: 1.0289102792739868,grad_norm: 0.9939456277083585, iteration: 29301
loss: 0.9921378493309021,grad_norm: 0.9999995168141814, iteration: 29302
loss: 1.0293618440628052,grad_norm: 0.999999229083797, iteration: 29303
loss: 0.9882094264030457,grad_norm: 0.8936750587337731, iteration: 29304
loss: 1.0003108978271484,grad_norm: 0.9869303574740335, iteration: 29305
loss: 1.0401906967163086,grad_norm: 0.9999990664228482, iteration: 29306
loss: 1.0309016704559326,grad_norm: 0.9942419327633868, iteration: 29307
loss: 1.0065317153930664,grad_norm: 0.999999134481158, iteration: 29308
loss: 0.9804158806800842,grad_norm: 0.9999990366328724, iteration: 29309
loss: 1.001468539237976,grad_norm: 0.9999990658128936, iteration: 29310
loss: 1.0234267711639404,grad_norm: 0.962888220341378, iteration: 29311
loss: 1.0061225891113281,grad_norm: 0.9978675578092918, iteration: 29312
loss: 0.9830409288406372,grad_norm: 0.924183341656135, iteration: 29313
loss: 1.0628867149353027,grad_norm: 0.9999990271682883, iteration: 29314
loss: 1.232555866241455,grad_norm: 0.9999997129022808, iteration: 29315
loss: 1.0258996486663818,grad_norm: 0.9972365788334994, iteration: 29316
loss: 0.9641793966293335,grad_norm: 0.9999996182272239, iteration: 29317
loss: 0.9823439121246338,grad_norm: 0.9999991646042167, iteration: 29318
loss: 1.0305378437042236,grad_norm: 0.9999991900203672, iteration: 29319
loss: 0.9619626998901367,grad_norm: 0.999998978783342, iteration: 29320
loss: 1.0332355499267578,grad_norm: 0.9999992478692049, iteration: 29321
loss: 0.9865812659263611,grad_norm: 0.9944045122529261, iteration: 29322
loss: 1.017930030822754,grad_norm: 0.9999995802912471, iteration: 29323
loss: 1.0057363510131836,grad_norm: 0.9999992062463829, iteration: 29324
loss: 1.018021821975708,grad_norm: 0.9670240452959137, iteration: 29325
loss: 1.0003114938735962,grad_norm: 0.9877313546160883, iteration: 29326
loss: 1.0104784965515137,grad_norm: 0.9473839767173464, iteration: 29327
loss: 0.9897379875183105,grad_norm: 0.8957460314761552, iteration: 29328
loss: 0.9938321709632874,grad_norm: 0.9999995816987852, iteration: 29329
loss: 0.9803103804588318,grad_norm: 0.9999992505580472, iteration: 29330
loss: 1.035586953163147,grad_norm: 0.9999992060003998, iteration: 29331
loss: 1.0134490728378296,grad_norm: 0.965576622830995, iteration: 29332
loss: 1.0024707317352295,grad_norm: 0.9999993934521104, iteration: 29333
loss: 0.9769837260246277,grad_norm: 0.999999181126164, iteration: 29334
loss: 0.9783811569213867,grad_norm: 0.9999991494047566, iteration: 29335
loss: 1.115885615348816,grad_norm: 0.9808649419147542, iteration: 29336
loss: 1.0246601104736328,grad_norm: 0.9999990708267297, iteration: 29337
loss: 1.0018178224563599,grad_norm: 0.9999991334951928, iteration: 29338
loss: 1.0037816762924194,grad_norm: 0.9999992485274513, iteration: 29339
loss: 1.0513166189193726,grad_norm: 0.9999998193421042, iteration: 29340
loss: 0.9974668622016907,grad_norm: 0.9179370481935353, iteration: 29341
loss: 1.0391806364059448,grad_norm: 0.9749511096543951, iteration: 29342
loss: 1.0625345706939697,grad_norm: 0.9530674402761097, iteration: 29343
loss: 1.0353463888168335,grad_norm: 0.9999991269468144, iteration: 29344
loss: 1.0369415283203125,grad_norm: 0.9999992234823474, iteration: 29345
loss: 1.0414390563964844,grad_norm: 0.7872018187443456, iteration: 29346
loss: 1.040328860282898,grad_norm: 0.9999990991948791, iteration: 29347
loss: 0.9853689670562744,grad_norm: 0.9239793645962401, iteration: 29348
loss: 1.0082017183303833,grad_norm: 0.9999992929013333, iteration: 29349
loss: 1.041690468788147,grad_norm: 0.9999994439070022, iteration: 29350
loss: 1.0398579835891724,grad_norm: 0.8350445239537151, iteration: 29351
loss: 0.976478636264801,grad_norm: 0.9041789264886672, iteration: 29352
loss: 0.9828526973724365,grad_norm: 0.7980800595602479, iteration: 29353
loss: 0.977924108505249,grad_norm: 0.9999991136082603, iteration: 29354
loss: 1.080186128616333,grad_norm: 0.9999995092379996, iteration: 29355
loss: 1.0011565685272217,grad_norm: 0.8672291227574824, iteration: 29356
loss: 1.0144587755203247,grad_norm: 0.9999992891128746, iteration: 29357
loss: 1.0394785404205322,grad_norm: 0.9999992784824044, iteration: 29358
loss: 0.9910322427749634,grad_norm: 0.9329930489196306, iteration: 29359
loss: 0.9782475233078003,grad_norm: 0.8524055729115929, iteration: 29360
loss: 0.9890782237052917,grad_norm: 0.9999995766253308, iteration: 29361
loss: 1.02645742893219,grad_norm: 0.9999993135408684, iteration: 29362
loss: 0.9900991916656494,grad_norm: 0.999998954999449, iteration: 29363
loss: 1.050998330116272,grad_norm: 0.9751656799369994, iteration: 29364
loss: 0.9924571514129639,grad_norm: 0.8864339386601972, iteration: 29365
loss: 1.0198098421096802,grad_norm: 0.9999992445699036, iteration: 29366
loss: 0.9987128376960754,grad_norm: 0.8825821548052634, iteration: 29367
loss: 1.009090781211853,grad_norm: 0.9327683836917204, iteration: 29368
loss: 1.0092719793319702,grad_norm: 0.9675581059442389, iteration: 29369
loss: 0.9907985329627991,grad_norm: 0.9216354636862024, iteration: 29370
loss: 1.0260941982269287,grad_norm: 0.9999990218069065, iteration: 29371
loss: 1.0385805368423462,grad_norm: 0.9999992053200877, iteration: 29372
loss: 1.0252330303192139,grad_norm: 0.9999992608035965, iteration: 29373
loss: 0.987240195274353,grad_norm: 0.999999046359863, iteration: 29374
loss: 0.9763604402542114,grad_norm: 0.9441879014104435, iteration: 29375
loss: 1.0312930345535278,grad_norm: 0.9999996062316235, iteration: 29376
loss: 1.0476948022842407,grad_norm: 0.9999991587375695, iteration: 29377
loss: 0.9908496141433716,grad_norm: 0.9999991000357364, iteration: 29378
loss: 0.9848242402076721,grad_norm: 0.9999992013440253, iteration: 29379
loss: 1.0174143314361572,grad_norm: 0.9008326785141864, iteration: 29380
loss: 0.9955387711524963,grad_norm: 0.9999990071623525, iteration: 29381
loss: 1.0329142808914185,grad_norm: 0.9999991706944226, iteration: 29382
loss: 1.0964750051498413,grad_norm: 0.990312265738918, iteration: 29383
loss: 0.979933500289917,grad_norm: 0.8818042712987924, iteration: 29384
loss: 1.0106117725372314,grad_norm: 0.9408272253903167, iteration: 29385
loss: 0.9829258918762207,grad_norm: 0.9999991300219228, iteration: 29386
loss: 1.0081039667129517,grad_norm: 0.8555757554761472, iteration: 29387
loss: 0.9777749180793762,grad_norm: 0.9999993711706968, iteration: 29388
loss: 1.0151488780975342,grad_norm: 0.9999993613532185, iteration: 29389
loss: 1.012735366821289,grad_norm: 0.9999989479255086, iteration: 29390
loss: 0.9404013156890869,grad_norm: 0.9297672808887415, iteration: 29391
loss: 0.9837852120399475,grad_norm: 0.850677368162507, iteration: 29392
loss: 1.0034152269363403,grad_norm: 0.8130627243433788, iteration: 29393
loss: 1.0071368217468262,grad_norm: 0.9999990904102035, iteration: 29394
loss: 1.001441478729248,grad_norm: 0.9773096012990821, iteration: 29395
loss: 1.024186372756958,grad_norm: 0.9999994062780408, iteration: 29396
loss: 0.9974475502967834,grad_norm: 0.9999994356502856, iteration: 29397
loss: 1.0187958478927612,grad_norm: 0.8520415512412982, iteration: 29398
loss: 1.0200437307357788,grad_norm: 0.9999992508953621, iteration: 29399
loss: 0.9967994093894958,grad_norm: 0.9036132859316133, iteration: 29400
loss: 1.0083715915679932,grad_norm: 0.8879557479824689, iteration: 29401
loss: 1.0361239910125732,grad_norm: 0.887729888665363, iteration: 29402
loss: 0.9917272925376892,grad_norm: 0.9072017366129284, iteration: 29403
loss: 0.9723358154296875,grad_norm: 0.9990499340153075, iteration: 29404
loss: 1.0043014287948608,grad_norm: 0.9505086747565097, iteration: 29405
loss: 0.9977359771728516,grad_norm: 0.9596511182754269, iteration: 29406
loss: 1.0348469018936157,grad_norm: 0.9221223157082588, iteration: 29407
loss: 0.9761640429496765,grad_norm: 0.8864312664264332, iteration: 29408
loss: 0.9361473321914673,grad_norm: 0.9999990330280666, iteration: 29409
loss: 1.042555570602417,grad_norm: 0.999999217888491, iteration: 29410
loss: 1.0285602807998657,grad_norm: 0.9999991475442848, iteration: 29411
loss: 1.0312930345535278,grad_norm: 0.9882823747450286, iteration: 29412
loss: 1.0269578695297241,grad_norm: 0.9999991335669888, iteration: 29413
loss: 1.0264179706573486,grad_norm: 0.9951471357836102, iteration: 29414
loss: 1.0112468004226685,grad_norm: 0.8792500680974057, iteration: 29415
loss: 1.014350175857544,grad_norm: 0.9538045146866967, iteration: 29416
loss: 1.0211255550384521,grad_norm: 0.8944356095516409, iteration: 29417
loss: 0.9850943684577942,grad_norm: 0.9999991881664396, iteration: 29418
loss: 1.0311609506607056,grad_norm: 0.9366265361966212, iteration: 29419
loss: 1.03169846534729,grad_norm: 0.957086006215256, iteration: 29420
loss: 1.0186752080917358,grad_norm: 0.9615502815253857, iteration: 29421
loss: 1.0049349069595337,grad_norm: 0.9999989910200286, iteration: 29422
loss: 0.9582446217536926,grad_norm: 0.982684314216082, iteration: 29423
loss: 1.010921835899353,grad_norm: 0.9999991540596226, iteration: 29424
loss: 1.022436261177063,grad_norm: 0.9999991590225424, iteration: 29425
loss: 1.0163520574569702,grad_norm: 0.9679310809798388, iteration: 29426
loss: 0.9867573380470276,grad_norm: 0.9806568992175827, iteration: 29427
loss: 0.9764804840087891,grad_norm: 0.9999994205977886, iteration: 29428
loss: 1.017216444015503,grad_norm: 0.8908534399097829, iteration: 29429
loss: 1.0049223899841309,grad_norm: 0.9911038622578393, iteration: 29430
loss: 0.9919196367263794,grad_norm: 0.881736549719159, iteration: 29431
loss: 0.9591901302337646,grad_norm: 0.9999990745316055, iteration: 29432
loss: 1.0186866521835327,grad_norm: 0.9999997041543097, iteration: 29433
loss: 0.9948749542236328,grad_norm: 0.9999991862458542, iteration: 29434
loss: 1.0350010395050049,grad_norm: 0.9999989628116023, iteration: 29435
loss: 0.9802427291870117,grad_norm: 0.915675821373878, iteration: 29436
loss: 1.0265865325927734,grad_norm: 0.8825062042248354, iteration: 29437
loss: 1.0307506322860718,grad_norm: 0.9999990100286827, iteration: 29438
loss: 1.0265426635742188,grad_norm: 0.9999990825933824, iteration: 29439
loss: 1.0156145095825195,grad_norm: 0.9636241969606157, iteration: 29440
loss: 1.0018806457519531,grad_norm: 0.9340275796877524, iteration: 29441
loss: 1.0119487047195435,grad_norm: 0.8671993686071728, iteration: 29442
loss: 1.0409001111984253,grad_norm: 0.9999989747027538, iteration: 29443
loss: 0.9723514318466187,grad_norm: 0.9401625787162299, iteration: 29444
loss: 0.9575508236885071,grad_norm: 0.9238120516275036, iteration: 29445
loss: 1.012951374053955,grad_norm: 0.9742569839980316, iteration: 29446
loss: 1.0185853242874146,grad_norm: 0.9999992564220647, iteration: 29447
loss: 1.0533827543258667,grad_norm: 0.9999997524708596, iteration: 29448
loss: 0.9657993316650391,grad_norm: 0.9550030452782635, iteration: 29449
loss: 0.9973998069763184,grad_norm: 0.9626336179107815, iteration: 29450
loss: 1.0314995050430298,grad_norm: 0.8962336878809308, iteration: 29451
loss: 1.0405337810516357,grad_norm: 0.9999990610413968, iteration: 29452
loss: 1.0145188570022583,grad_norm: 0.8900593123985734, iteration: 29453
loss: 1.0089695453643799,grad_norm: 0.7933328868928542, iteration: 29454
loss: 0.9937454462051392,grad_norm: 0.9999994022748558, iteration: 29455
loss: 0.939711332321167,grad_norm: 0.9996613398932995, iteration: 29456
loss: 0.9857369661331177,grad_norm: 0.8843034586407802, iteration: 29457
loss: 1.00650954246521,grad_norm: 0.8104415901339012, iteration: 29458
loss: 0.9343566298484802,grad_norm: 0.8734571771837683, iteration: 29459
loss: 1.0050660371780396,grad_norm: 0.9999995590396823, iteration: 29460
loss: 1.0319807529449463,grad_norm: 0.9999992312723653, iteration: 29461
loss: 1.0194168090820312,grad_norm: 0.9374892152461659, iteration: 29462
loss: 1.0350291728973389,grad_norm: 0.9999995212356448, iteration: 29463
loss: 1.0226550102233887,grad_norm: 0.9999990568121586, iteration: 29464
loss: 1.018717646598816,grad_norm: 0.9798676589560826, iteration: 29465
loss: 1.0381499528884888,grad_norm: 0.9999993841102583, iteration: 29466
loss: 1.0168730020523071,grad_norm: 0.965473004389955, iteration: 29467
loss: 0.9705520272254944,grad_norm: 0.9236758408363661, iteration: 29468
loss: 1.060612440109253,grad_norm: 0.9999997640579111, iteration: 29469
loss: 1.0296438932418823,grad_norm: 0.9999991030315212, iteration: 29470
loss: 1.0552594661712646,grad_norm: 0.9999997824244997, iteration: 29471
loss: 1.0487686395645142,grad_norm: 0.9999993649843408, iteration: 29472
loss: 1.0128488540649414,grad_norm: 0.9511355553613251, iteration: 29473
loss: 1.0161534547805786,grad_norm: 0.9580127867769113, iteration: 29474
loss: 1.1156607866287231,grad_norm: 0.999999933377931, iteration: 29475
loss: 1.0263125896453857,grad_norm: 0.9022050749964795, iteration: 29476
loss: 1.0090032815933228,grad_norm: 0.9328305469456859, iteration: 29477
loss: 1.0307289361953735,grad_norm: 0.999999702534573, iteration: 29478
loss: 1.0451054573059082,grad_norm: 0.9999991818373085, iteration: 29479
loss: 0.9863854646682739,grad_norm: 0.9322399629003899, iteration: 29480
loss: 0.996673047542572,grad_norm: 0.9999996436003331, iteration: 29481
loss: 0.989870011806488,grad_norm: 0.9042154769539018, iteration: 29482
loss: 1.013081669807434,grad_norm: 0.9999992944906225, iteration: 29483
loss: 0.9812538623809814,grad_norm: 0.9999990040609785, iteration: 29484
loss: 1.0220650434494019,grad_norm: 0.999999104865417, iteration: 29485
loss: 1.008070468902588,grad_norm: 0.8137041844685174, iteration: 29486
loss: 1.0207645893096924,grad_norm: 0.9999993474561247, iteration: 29487
loss: 1.02688729763031,grad_norm: 0.9999993515930444, iteration: 29488
loss: 0.9849061965942383,grad_norm: 0.8991972296284024, iteration: 29489
loss: 1.0494518280029297,grad_norm: 0.9999997218736784, iteration: 29490
loss: 1.001469373703003,grad_norm: 0.9177105564403755, iteration: 29491
loss: 0.9909442663192749,grad_norm: 0.9492913091603522, iteration: 29492
loss: 1.0593794584274292,grad_norm: 0.9999997510146122, iteration: 29493
loss: 1.0272808074951172,grad_norm: 0.9999991269016891, iteration: 29494
loss: 0.9736212491989136,grad_norm: 0.9619824648533017, iteration: 29495
loss: 1.020308494567871,grad_norm: 0.9545079950313249, iteration: 29496
loss: 1.059470772743225,grad_norm: 0.892366348466021, iteration: 29497
loss: 0.9807013869285583,grad_norm: 0.9999991302102154, iteration: 29498
loss: 1.0178015232086182,grad_norm: 0.9999994097421475, iteration: 29499
loss: 0.997745931148529,grad_norm: 0.9999992428634638, iteration: 29500
loss: 1.0246351957321167,grad_norm: 0.9999989949384728, iteration: 29501
loss: 1.0365725755691528,grad_norm: 0.9999990766143362, iteration: 29502
loss: 1.1029545068740845,grad_norm: 0.9999998517973738, iteration: 29503
loss: 1.0183545351028442,grad_norm: 0.9999991578331654, iteration: 29504
loss: 1.054162621498108,grad_norm: 0.9999998052921724, iteration: 29505
loss: 0.9916403889656067,grad_norm: 0.9999991345070607, iteration: 29506
loss: 1.0647521018981934,grad_norm: 0.9999997866340917, iteration: 29507
loss: 0.9853097200393677,grad_norm: 0.9284806992006124, iteration: 29508
loss: 1.0049554109573364,grad_norm: 0.8797751121764212, iteration: 29509
loss: 0.9850419759750366,grad_norm: 0.963695580613147, iteration: 29510
loss: 0.9748372435569763,grad_norm: 0.9999989760698836, iteration: 29511
loss: 0.9978269338607788,grad_norm: 0.9686402882880878, iteration: 29512
loss: 0.9918821454048157,grad_norm: 0.8910330069938502, iteration: 29513
loss: 1.004976511001587,grad_norm: 0.963220650685001, iteration: 29514
loss: 1.0160176753997803,grad_norm: 0.9999991674711868, iteration: 29515
loss: 1.025349736213684,grad_norm: 0.9999994316103074, iteration: 29516
loss: 1.0250446796417236,grad_norm: 0.8952699086433626, iteration: 29517
loss: 1.0105384588241577,grad_norm: 0.9999994337686153, iteration: 29518
loss: 1.0406287908554077,grad_norm: 0.9999990817425938, iteration: 29519
loss: 1.0591107606887817,grad_norm: 0.9999991916981029, iteration: 29520
loss: 1.1856884956359863,grad_norm: 0.9999996222325166, iteration: 29521
loss: 1.0557515621185303,grad_norm: 0.9999997366701455, iteration: 29522
loss: 1.1968363523483276,grad_norm: 0.9999999516999344, iteration: 29523
loss: 1.0033855438232422,grad_norm: 0.999999815138412, iteration: 29524
loss: 1.0041000843048096,grad_norm: 0.9999990753060908, iteration: 29525
loss: 1.0175660848617554,grad_norm: 0.9128478580961674, iteration: 29526
loss: 1.0860235691070557,grad_norm: 0.8205826981093393, iteration: 29527
loss: 1.0009093284606934,grad_norm: 0.9999993860189625, iteration: 29528
loss: 1.1404308080673218,grad_norm: 1.0000000058196559, iteration: 29529
loss: 1.0352919101715088,grad_norm: 0.9999992296646733, iteration: 29530
loss: 1.0292197465896606,grad_norm: 0.9999995953989027, iteration: 29531
loss: 1.0144543647766113,grad_norm: 0.9999992457245436, iteration: 29532
loss: 0.9713013768196106,grad_norm: 0.9999989031722641, iteration: 29533
loss: 1.0475293397903442,grad_norm: 0.9478293730902442, iteration: 29534
loss: 1.0373271703720093,grad_norm: 0.9999994293136261, iteration: 29535
loss: 1.040447473526001,grad_norm: 0.9999991483399895, iteration: 29536
loss: 0.9926216006278992,grad_norm: 0.9930891010786194, iteration: 29537
loss: 1.0917490720748901,grad_norm: 0.9999993037817786, iteration: 29538
loss: 1.105180263519287,grad_norm: 0.999999148513622, iteration: 29539
loss: 1.0098903179168701,grad_norm: 0.8504971720522546, iteration: 29540
loss: 0.9891941547393799,grad_norm: 0.8970017638239132, iteration: 29541
loss: 1.0850138664245605,grad_norm: 0.9999998976148677, iteration: 29542
loss: 0.975929856300354,grad_norm: 0.8845744175900203, iteration: 29543
loss: 0.9769989848136902,grad_norm: 0.9480892890981514, iteration: 29544
loss: 1.0840195417404175,grad_norm: 0.9999996646964159, iteration: 29545
loss: 1.0375500917434692,grad_norm: 0.999999036288793, iteration: 29546
loss: 0.9556861519813538,grad_norm: 0.9381942437084976, iteration: 29547
loss: 1.092752456665039,grad_norm: 0.9999990539956192, iteration: 29548
loss: 1.0293811559677124,grad_norm: 0.9999998800520045, iteration: 29549
loss: 1.0899511575698853,grad_norm: 0.9999991918440436, iteration: 29550
loss: 1.090088129043579,grad_norm: 0.9999997599466955, iteration: 29551
loss: 1.0092799663543701,grad_norm: 0.9730674376146663, iteration: 29552
loss: 1.0266395807266235,grad_norm: 0.8366432968200596, iteration: 29553
loss: 0.9757801294326782,grad_norm: 0.9999991127596864, iteration: 29554
loss: 1.0374027490615845,grad_norm: 0.9999993348035947, iteration: 29555
loss: 1.000381588935852,grad_norm: 0.9999991455577902, iteration: 29556
loss: 1.0900609493255615,grad_norm: 0.9384023454787531, iteration: 29557
loss: 0.9991256594657898,grad_norm: 0.9527083373023223, iteration: 29558
loss: 1.060958981513977,grad_norm: 0.9999992975140178, iteration: 29559
loss: 1.0026158094406128,grad_norm: 0.9977850687631862, iteration: 29560
loss: 1.0365824699401855,grad_norm: 0.9999996196974305, iteration: 29561
loss: 1.0233014822006226,grad_norm: 0.9517949463811455, iteration: 29562
loss: 1.0015497207641602,grad_norm: 0.963306853769147, iteration: 29563
loss: 1.0026828050613403,grad_norm: 0.9999996623612007, iteration: 29564
loss: 1.0048680305480957,grad_norm: 0.9999991865519839, iteration: 29565
loss: 1.0134572982788086,grad_norm: 0.9999994474493823, iteration: 29566
loss: 0.9996147751808167,grad_norm: 0.8597225272896575, iteration: 29567
loss: 1.069291114807129,grad_norm: 0.9999992147748671, iteration: 29568
loss: 0.9923295974731445,grad_norm: 0.9999991369353575, iteration: 29569
loss: 1.0422381162643433,grad_norm: 0.9999992596935859, iteration: 29570
loss: 1.0653681755065918,grad_norm: 0.9999995051166904, iteration: 29571
loss: 1.0054761171340942,grad_norm: 0.9999989771721086, iteration: 29572
loss: 1.1477361917495728,grad_norm: 0.9999998730825597, iteration: 29573
loss: 1.0221349000930786,grad_norm: 0.9999996002130576, iteration: 29574
loss: 1.0057185888290405,grad_norm: 0.9999991112921407, iteration: 29575
loss: 0.9916223287582397,grad_norm: 0.9999993525773287, iteration: 29576
loss: 0.9874262809753418,grad_norm: 0.9250850651877354, iteration: 29577
loss: 1.0110602378845215,grad_norm: 0.9999991896760962, iteration: 29578
loss: 1.036874771118164,grad_norm: 0.9999991704910363, iteration: 29579
loss: 1.0113787651062012,grad_norm: 0.9456969649166101, iteration: 29580
loss: 1.0096300840377808,grad_norm: 0.9999990441610604, iteration: 29581
loss: 1.0202879905700684,grad_norm: 0.9999996343799877, iteration: 29582
loss: 1.0259335041046143,grad_norm: 0.988898513922161, iteration: 29583
loss: 0.9865146279335022,grad_norm: 0.9327340705515599, iteration: 29584
loss: 1.0025352239608765,grad_norm: 0.9803703766193509, iteration: 29585
loss: 1.0154905319213867,grad_norm: 0.9786852139549316, iteration: 29586
loss: 1.0216619968414307,grad_norm: 0.9999996329168298, iteration: 29587
loss: 0.9844772815704346,grad_norm: 0.9337315048377636, iteration: 29588
loss: 1.0027052164077759,grad_norm: 0.9999991281510522, iteration: 29589
loss: 1.0655866861343384,grad_norm: 0.9999996860494648, iteration: 29590
loss: 0.9855086803436279,grad_norm: 0.9999996380045538, iteration: 29591
loss: 1.0047274827957153,grad_norm: 0.9999991398539931, iteration: 29592
loss: 1.0019551515579224,grad_norm: 0.7955092459786791, iteration: 29593
loss: 0.9896345138549805,grad_norm: 0.8915112057840638, iteration: 29594
loss: 0.9582107663154602,grad_norm: 0.943620819131383, iteration: 29595
loss: 0.9913220405578613,grad_norm: 0.9999989918974859, iteration: 29596
loss: 1.0502797365188599,grad_norm: 0.9973856033119349, iteration: 29597
loss: 0.975502073764801,grad_norm: 0.9506893449006188, iteration: 29598
loss: 1.013453722000122,grad_norm: 0.9999992609550431, iteration: 29599
loss: 0.9919019937515259,grad_norm: 0.9200737473488902, iteration: 29600
loss: 1.0683554410934448,grad_norm: 0.9999999348858202, iteration: 29601
loss: 1.0550931692123413,grad_norm: 0.9999998115634402, iteration: 29602
loss: 1.1306973695755005,grad_norm: 0.9999991820041022, iteration: 29603
loss: 1.016981601715088,grad_norm: 0.9999990551123383, iteration: 29604
loss: 1.0412657260894775,grad_norm: 0.9999995815481397, iteration: 29605
loss: 1.0144834518432617,grad_norm: 0.9208313256726478, iteration: 29606
loss: 1.026749849319458,grad_norm: 0.9949289653876426, iteration: 29607
loss: 1.0269814729690552,grad_norm: 0.9461943062335733, iteration: 29608
loss: 1.0115752220153809,grad_norm: 0.8479085036068847, iteration: 29609
loss: 1.0081875324249268,grad_norm: 0.9725774076605878, iteration: 29610
loss: 1.0346966981887817,grad_norm: 0.9999992303536732, iteration: 29611
loss: 1.0033451318740845,grad_norm: 0.8978878345546233, iteration: 29612
loss: 1.0628834962844849,grad_norm: 0.9999992909890252, iteration: 29613
loss: 0.9568135738372803,grad_norm: 0.8946079326616184, iteration: 29614
loss: 1.0140739679336548,grad_norm: 0.8463182644485995, iteration: 29615
loss: 0.9853400588035583,grad_norm: 0.9563190201951053, iteration: 29616
loss: 1.0327348709106445,grad_norm: 0.9999997674850337, iteration: 29617
loss: 1.0128529071807861,grad_norm: 0.9999995690712653, iteration: 29618
loss: 1.0057109594345093,grad_norm: 0.9101268523618198, iteration: 29619
loss: 1.0374730825424194,grad_norm: 0.9691813537971231, iteration: 29620
loss: 1.0355561971664429,grad_norm: 0.9999995609494203, iteration: 29621
loss: 1.0373023748397827,grad_norm: 0.9999989513994318, iteration: 29622
loss: 1.0221459865570068,grad_norm: 0.9999997441092093, iteration: 29623
loss: 0.9567410945892334,grad_norm: 0.9999991825560427, iteration: 29624
loss: 1.009466528892517,grad_norm: 0.9475757564414181, iteration: 29625
loss: 1.0789897441864014,grad_norm: 0.9999996028730633, iteration: 29626
loss: 1.0266259908676147,grad_norm: 0.9999991338068938, iteration: 29627
loss: 0.9811258912086487,grad_norm: 0.9139691381083715, iteration: 29628
loss: 0.9854243397712708,grad_norm: 0.9999992564792586, iteration: 29629
loss: 1.006772756576538,grad_norm: 0.8528985203064722, iteration: 29630
loss: 0.9942759275436401,grad_norm: 0.9999991219289871, iteration: 29631
loss: 1.1698182821273804,grad_norm: 0.9999997577977825, iteration: 29632
loss: 1.0527547597885132,grad_norm: 0.9999990059784126, iteration: 29633
loss: 1.0183143615722656,grad_norm: 0.9999995367055371, iteration: 29634
loss: 1.005061149597168,grad_norm: 0.9185811956407934, iteration: 29635
loss: 0.9361240267753601,grad_norm: 0.956957768472096, iteration: 29636
loss: 1.0125445127487183,grad_norm: 0.9765316741142153, iteration: 29637
loss: 1.022141695022583,grad_norm: 0.9999991048465868, iteration: 29638
loss: 1.054748773574829,grad_norm: 0.9999994078420056, iteration: 29639
loss: 0.9495257139205933,grad_norm: 0.9999991066424405, iteration: 29640
loss: 1.0427685976028442,grad_norm: 0.9999992295906829, iteration: 29641
loss: 1.0159512758255005,grad_norm: 0.8964810959939162, iteration: 29642
loss: 0.9732748866081238,grad_norm: 0.7901401733610728, iteration: 29643
loss: 1.0245636701583862,grad_norm: 0.8709335948632211, iteration: 29644
loss: 1.0252747535705566,grad_norm: 0.9999992058317948, iteration: 29645
loss: 1.0412250757217407,grad_norm: 0.9999991942158243, iteration: 29646
loss: 1.0448349714279175,grad_norm: 0.9999998061228123, iteration: 29647
loss: 1.0316628217697144,grad_norm: 0.9999990700901985, iteration: 29648
loss: 0.9949853420257568,grad_norm: 0.9999990032977168, iteration: 29649
loss: 0.9993341565132141,grad_norm: 0.8389701497518866, iteration: 29650
loss: 0.9950034022331238,grad_norm: 0.9999993551839609, iteration: 29651
loss: 0.9805153608322144,grad_norm: 0.8346026621459053, iteration: 29652
loss: 0.9863153100013733,grad_norm: 0.9999999310570963, iteration: 29653
loss: 1.020329236984253,grad_norm: 0.9999993211181211, iteration: 29654
loss: 1.1480578184127808,grad_norm: 0.9999993235978901, iteration: 29655
loss: 1.0226151943206787,grad_norm: 0.9999991746477511, iteration: 29656
loss: 1.0282975435256958,grad_norm: 0.9999994954732225, iteration: 29657
loss: 0.9778663516044617,grad_norm: 0.7762947330719573, iteration: 29658
loss: 0.9896627068519592,grad_norm: 0.9999995428949542, iteration: 29659
loss: 1.0154670476913452,grad_norm: 0.9999991994817279, iteration: 29660
loss: 0.9659384489059448,grad_norm: 0.999999179069684, iteration: 29661
loss: 1.0011100769042969,grad_norm: 0.9609509454627381, iteration: 29662
loss: 0.9829042553901672,grad_norm: 0.8918241312565038, iteration: 29663
loss: 0.9847160577774048,grad_norm: 0.9999992104809753, iteration: 29664
loss: 1.047853708267212,grad_norm: 0.9999996619121736, iteration: 29665
loss: 1.0317083597183228,grad_norm: 0.999999239935022, iteration: 29666
loss: 1.0367950201034546,grad_norm: 0.9999996881484232, iteration: 29667
loss: 1.022308111190796,grad_norm: 0.9999993907384676, iteration: 29668
loss: 1.0220867395401,grad_norm: 0.9341975142059272, iteration: 29669
loss: 0.9978519082069397,grad_norm: 0.9999992942802369, iteration: 29670
loss: 0.9974377751350403,grad_norm: 0.9999990952417036, iteration: 29671
loss: 1.0437262058258057,grad_norm: 0.9684327155899985, iteration: 29672
loss: 1.0225504636764526,grad_norm: 0.999999449066981, iteration: 29673
loss: 1.0496567487716675,grad_norm: 0.9815333923879184, iteration: 29674
loss: 1.0698872804641724,grad_norm: 0.9999993015119412, iteration: 29675
loss: 1.0191106796264648,grad_norm: 0.8674132312680752, iteration: 29676
loss: 0.9921764731407166,grad_norm: 0.9150028394937799, iteration: 29677
loss: 1.0336410999298096,grad_norm: 0.8251471914263079, iteration: 29678
loss: 0.9997836947441101,grad_norm: 0.9999990355505891, iteration: 29679
loss: 0.9968114495277405,grad_norm: 0.999999392011276, iteration: 29680
loss: 1.0542176961898804,grad_norm: 0.9999995245195163, iteration: 29681
loss: 0.9966028928756714,grad_norm: 0.8846735568908727, iteration: 29682
loss: 1.0016906261444092,grad_norm: 0.9999990962115874, iteration: 29683
loss: 1.0095514059066772,grad_norm: 0.9999991774034279, iteration: 29684
loss: 0.989075779914856,grad_norm: 0.9999993474385125, iteration: 29685
loss: 1.2275512218475342,grad_norm: 0.9999993450498438, iteration: 29686
loss: 1.000267505645752,grad_norm: 0.7996740086699651, iteration: 29687
loss: 1.0024689435958862,grad_norm: 0.9999999072298552, iteration: 29688
loss: 1.0763044357299805,grad_norm: 0.9999993466872727, iteration: 29689
loss: 1.0051273107528687,grad_norm: 0.9999992779248145, iteration: 29690
loss: 1.025744915008545,grad_norm: 0.9667205287907901, iteration: 29691
loss: 1.0633718967437744,grad_norm: 0.9999995380456064, iteration: 29692
loss: 1.0512511730194092,grad_norm: 0.9999994552183862, iteration: 29693
loss: 0.9892241954803467,grad_norm: 0.9742050512131559, iteration: 29694
loss: 1.0054646730422974,grad_norm: 0.9742177475400403, iteration: 29695
loss: 1.0016145706176758,grad_norm: 0.9999998436758258, iteration: 29696
loss: 0.9794108271598816,grad_norm: 0.9999991966162122, iteration: 29697
loss: 1.0018696784973145,grad_norm: 0.8986375323580688, iteration: 29698
loss: 1.06080162525177,grad_norm: 0.9999996344103319, iteration: 29699
loss: 1.0458158254623413,grad_norm: 0.9999992914083415, iteration: 29700
loss: 1.030153512954712,grad_norm: 0.8322778212093959, iteration: 29701
loss: 1.0000149011611938,grad_norm: 0.9657894709367488, iteration: 29702
loss: 0.9955394864082336,grad_norm: 0.9999992223591548, iteration: 29703
loss: 1.0424084663391113,grad_norm: 0.9999996941345478, iteration: 29704
loss: 1.027273416519165,grad_norm: 0.9999991112824228, iteration: 29705
loss: 1.0161775350570679,grad_norm: 0.9999992233856675, iteration: 29706
loss: 1.034143328666687,grad_norm: 0.9999993545715755, iteration: 29707
loss: 1.0033395290374756,grad_norm: 0.8302430829647025, iteration: 29708
loss: 1.0353816747665405,grad_norm: 0.9999991772104408, iteration: 29709
loss: 0.9976253509521484,grad_norm: 0.9999989839202927, iteration: 29710
loss: 1.040755033493042,grad_norm: 0.9999991839176261, iteration: 29711
loss: 0.9876767992973328,grad_norm: 0.9999990721417605, iteration: 29712
loss: 1.0015467405319214,grad_norm: 0.9999991149604048, iteration: 29713
loss: 0.9707905054092407,grad_norm: 0.999999275981916, iteration: 29714
loss: 0.9983084201812744,grad_norm: 0.9766559923563404, iteration: 29715
loss: 0.9781100153923035,grad_norm: 0.9999991287365689, iteration: 29716
loss: 1.0333805084228516,grad_norm: 0.9999991685972285, iteration: 29717
loss: 0.9625144004821777,grad_norm: 0.9109310167021132, iteration: 29718
loss: 0.9832072257995605,grad_norm: 0.9999990793915354, iteration: 29719
loss: 0.9580669403076172,grad_norm: 0.9999990714656091, iteration: 29720
loss: 0.9752410650253296,grad_norm: 0.9999992070957381, iteration: 29721
loss: 1.04398775100708,grad_norm: 0.9999994393788088, iteration: 29722
loss: 1.0061721801757812,grad_norm: 0.9999991460506955, iteration: 29723
loss: 1.0337910652160645,grad_norm: 0.8308057671570869, iteration: 29724
loss: 1.0548746585845947,grad_norm: 0.9999992111331775, iteration: 29725
loss: 0.9840695858001709,grad_norm: 0.9999990884383657, iteration: 29726
loss: 1.0893375873565674,grad_norm: 0.9999992247104085, iteration: 29727
loss: 0.9899332523345947,grad_norm: 0.9021672773816841, iteration: 29728
loss: 0.9900391101837158,grad_norm: 0.9999992323030049, iteration: 29729
loss: 1.0330506563186646,grad_norm: 0.9445095684334485, iteration: 29730
loss: 1.014503836631775,grad_norm: 0.7363613339775579, iteration: 29731
loss: 0.973698079586029,grad_norm: 0.9836294866702602, iteration: 29732
loss: 1.0068933963775635,grad_norm: 0.9477751221917436, iteration: 29733
loss: 1.0124483108520508,grad_norm: 0.8976121919180581, iteration: 29734
loss: 1.0540575981140137,grad_norm: 0.956328778470038, iteration: 29735
loss: 1.0360984802246094,grad_norm: 0.9424520582419071, iteration: 29736
loss: 1.001038908958435,grad_norm: 0.9999990773833838, iteration: 29737
loss: 1.009334683418274,grad_norm: 0.9999989774134357, iteration: 29738
loss: 1.0580908060073853,grad_norm: 0.9999995066462875, iteration: 29739
loss: 1.0373990535736084,grad_norm: 0.9999992374396707, iteration: 29740
loss: 1.1192803382873535,grad_norm: 0.999999188587979, iteration: 29741
loss: 1.0068366527557373,grad_norm: 0.724536945212361, iteration: 29742
loss: 1.0295621156692505,grad_norm: 0.9999995666061756, iteration: 29743
loss: 1.0024356842041016,grad_norm: 0.8799568244189506, iteration: 29744
loss: 1.0307599306106567,grad_norm: 0.9876837204447229, iteration: 29745
loss: 1.0710842609405518,grad_norm: 0.9999996066670428, iteration: 29746
loss: 0.9983488321304321,grad_norm: 0.999999056895762, iteration: 29747
loss: 0.978285014629364,grad_norm: 0.89132270882712, iteration: 29748
loss: 1.0501455068588257,grad_norm: 0.9999997488283733, iteration: 29749
loss: 0.9877933859825134,grad_norm: 0.9999991334740304, iteration: 29750
loss: 0.990301251411438,grad_norm: 0.8339326245409784, iteration: 29751
loss: 1.0067178010940552,grad_norm: 0.9999992648515893, iteration: 29752
loss: 1.0127578973770142,grad_norm: 0.8763870185805107, iteration: 29753
loss: 1.0179023742675781,grad_norm: 0.9565667978277959, iteration: 29754
loss: 0.9966492652893066,grad_norm: 0.9999990544415195, iteration: 29755
loss: 1.0315518379211426,grad_norm: 0.94598932172774, iteration: 29756
loss: 1.0049219131469727,grad_norm: 0.981432954703857, iteration: 29757
loss: 1.0052802562713623,grad_norm: 0.9999991599667749, iteration: 29758
loss: 1.0034053325653076,grad_norm: 0.8644867962384395, iteration: 29759
loss: 1.0067461729049683,grad_norm: 0.8654174046214118, iteration: 29760
loss: 0.9765493273735046,grad_norm: 0.9352959712164725, iteration: 29761
loss: 0.9943663477897644,grad_norm: 0.9864035403813308, iteration: 29762
loss: 1.0115841627120972,grad_norm: 0.9312353565642952, iteration: 29763
loss: 0.9955706000328064,grad_norm: 0.9999990346096536, iteration: 29764
loss: 1.0020695924758911,grad_norm: 0.8812723217264349, iteration: 29765
loss: 0.9737474918365479,grad_norm: 0.976324097820207, iteration: 29766
loss: 1.0276838541030884,grad_norm: 0.9999990808571184, iteration: 29767
loss: 1.0409801006317139,grad_norm: 0.908447354054678, iteration: 29768
loss: 0.9685338735580444,grad_norm: 0.8909318123763299, iteration: 29769
loss: 1.0158196687698364,grad_norm: 0.9999991299261224, iteration: 29770
loss: 1.0161365270614624,grad_norm: 0.9114493789635253, iteration: 29771
loss: 0.9937114119529724,grad_norm: 0.9999992291473342, iteration: 29772
loss: 1.0006451606750488,grad_norm: 0.9999991246867159, iteration: 29773
loss: 1.022641897201538,grad_norm: 0.999999613244727, iteration: 29774
loss: 1.0082939863204956,grad_norm: 0.969306929195763, iteration: 29775
loss: 1.0115917921066284,grad_norm: 0.9999991783287279, iteration: 29776
loss: 0.9759556651115417,grad_norm: 0.9804158072793785, iteration: 29777
loss: 1.0233153104782104,grad_norm: 0.9338679463810614, iteration: 29778
loss: 1.050649881362915,grad_norm: 0.9999997500824929, iteration: 29779
loss: 0.991095781326294,grad_norm: 0.9999992522353077, iteration: 29780
loss: 1.097642421722412,grad_norm: 0.9999995247827159, iteration: 29781
loss: 1.0290096998214722,grad_norm: 0.9999992360183585, iteration: 29782
loss: 1.00503408908844,grad_norm: 0.8579740602535137, iteration: 29783
loss: 1.0173152685165405,grad_norm: 0.8731064943397993, iteration: 29784
loss: 1.044948935508728,grad_norm: 0.9999993574887756, iteration: 29785
loss: 1.0047025680541992,grad_norm: 0.8818594216358999, iteration: 29786
loss: 0.9801703691482544,grad_norm: 0.9999990031993164, iteration: 29787
loss: 1.033360481262207,grad_norm: 0.932831414402985, iteration: 29788
loss: 1.025522232055664,grad_norm: 0.9999990689100676, iteration: 29789
loss: 0.9988973736763,grad_norm: 0.9999990691229643, iteration: 29790
loss: 1.0354905128479004,grad_norm: 0.9999996597468574, iteration: 29791
loss: 1.002681016921997,grad_norm: 0.9999990885161065, iteration: 29792
loss: 1.0327507257461548,grad_norm: 0.9999994198507506, iteration: 29793
loss: 1.001402735710144,grad_norm: 0.9999990743997066, iteration: 29794
loss: 0.9879739880561829,grad_norm: 0.9999990026655391, iteration: 29795
loss: 1.0183640718460083,grad_norm: 0.9999990847716685, iteration: 29796
loss: 0.9767189621925354,grad_norm: 0.9603223597135077, iteration: 29797
loss: 1.0026057958602905,grad_norm: 0.9869726953189419, iteration: 29798
loss: 1.0375243425369263,grad_norm: 0.9999995149288551, iteration: 29799
loss: 1.0345017910003662,grad_norm: 0.9999992140031577, iteration: 29800
loss: 1.0251096487045288,grad_norm: 0.999999210467295, iteration: 29801
loss: 1.0167325735092163,grad_norm: 0.9612898473557602, iteration: 29802
loss: 1.0426005125045776,grad_norm: 0.9999990427844699, iteration: 29803
loss: 1.071183681488037,grad_norm: 0.9999993144312613, iteration: 29804
loss: 1.0286484956741333,grad_norm: 1.0000000321269087, iteration: 29805
loss: 0.9978076219558716,grad_norm: 0.7998053114060356, iteration: 29806
loss: 0.9945892095565796,grad_norm: 0.9353175598395456, iteration: 29807
loss: 1.0259844064712524,grad_norm: 0.9725027546779396, iteration: 29808
loss: 0.9922791123390198,grad_norm: 0.9999992129722128, iteration: 29809
loss: 0.9780870079994202,grad_norm: 0.8795495678979172, iteration: 29810
loss: 1.019117832183838,grad_norm: 0.9999989643271915, iteration: 29811
loss: 1.0004661083221436,grad_norm: 0.9999996701672937, iteration: 29812
loss: 1.0193318128585815,grad_norm: 0.9999997653539555, iteration: 29813
loss: 1.0199295282363892,grad_norm: 0.782163193025127, iteration: 29814
loss: 1.0355335474014282,grad_norm: 0.9999991603811121, iteration: 29815
loss: 1.0348201990127563,grad_norm: 0.9999991704454522, iteration: 29816
loss: 1.0134590864181519,grad_norm: 0.978501080436883, iteration: 29817
loss: 1.0216560363769531,grad_norm: 0.9999990621034642, iteration: 29818
loss: 0.9764862060546875,grad_norm: 0.962975486093783, iteration: 29819
loss: 1.0482358932495117,grad_norm: 0.999999210350571, iteration: 29820
loss: 0.9990757703781128,grad_norm: 0.8929257889133384, iteration: 29821
loss: 1.0326188802719116,grad_norm: 0.9384099108249266, iteration: 29822
loss: 1.031203269958496,grad_norm: 0.8759109688783373, iteration: 29823
loss: 0.9918311834335327,grad_norm: 0.993345103735017, iteration: 29824
loss: 0.962059497833252,grad_norm: 0.9579429987740419, iteration: 29825
loss: 1.054095983505249,grad_norm: 0.9999996467244167, iteration: 29826
loss: 1.0281665325164795,grad_norm: 0.9999990607914301, iteration: 29827
loss: 1.1078029870986938,grad_norm: 0.9999996652719549, iteration: 29828
loss: 1.0242325067520142,grad_norm: 0.9999991706272918, iteration: 29829
loss: 0.9872201085090637,grad_norm: 0.9999997223880245, iteration: 29830
loss: 0.9963094592094421,grad_norm: 0.9999993013436813, iteration: 29831
loss: 1.0148390531539917,grad_norm: 0.9822888131456844, iteration: 29832
loss: 1.0311529636383057,grad_norm: 0.9999991190784938, iteration: 29833
loss: 1.0095700025558472,grad_norm: 0.9627457456859878, iteration: 29834
loss: 1.003233790397644,grad_norm: 0.9881868677306147, iteration: 29835
loss: 0.9911999106407166,grad_norm: 0.9999991379435109, iteration: 29836
loss: 0.9780833721160889,grad_norm: 0.9999989905376856, iteration: 29837
loss: 0.9501827359199524,grad_norm: 0.9780280454236048, iteration: 29838
loss: 1.0169481039047241,grad_norm: 0.9999991727739417, iteration: 29839
loss: 1.0239800214767456,grad_norm: 0.8976686621316067, iteration: 29840
loss: 1.0237370729446411,grad_norm: 0.9999991772036882, iteration: 29841
loss: 1.0478817224502563,grad_norm: 0.9999992179165054, iteration: 29842
loss: 1.0421916246414185,grad_norm: 0.945399981341944, iteration: 29843
loss: 0.9995858073234558,grad_norm: 0.9999992521413238, iteration: 29844
loss: 1.014927625656128,grad_norm: 0.962802556625261, iteration: 29845
loss: 0.9889485836029053,grad_norm: 0.8749896018665365, iteration: 29846
loss: 1.0098875761032104,grad_norm: 0.8597566901470425, iteration: 29847
loss: 1.0013586282730103,grad_norm: 0.999999484624331, iteration: 29848
loss: 1.0342732667922974,grad_norm: 0.9999991140647356, iteration: 29849
loss: 1.0325273275375366,grad_norm: 0.9999993954430859, iteration: 29850
loss: 1.0570189952850342,grad_norm: 0.9999994604189575, iteration: 29851
loss: 1.048437237739563,grad_norm: 0.9999993606277215, iteration: 29852
loss: 0.993657112121582,grad_norm: 0.99999909398743, iteration: 29853
loss: 0.9944486021995544,grad_norm: 0.9694991035798157, iteration: 29854
loss: 1.000866174697876,grad_norm: 0.9999992519635204, iteration: 29855
loss: 1.0332839488983154,grad_norm: 0.9999990457521781, iteration: 29856
loss: 1.0509369373321533,grad_norm: 0.9312623095283886, iteration: 29857
loss: 0.9883903861045837,grad_norm: 0.9999991948528038, iteration: 29858
loss: 0.9955630302429199,grad_norm: 0.9999990719819065, iteration: 29859
loss: 1.0562677383422852,grad_norm: 0.9999993360077061, iteration: 29860
loss: 1.0080580711364746,grad_norm: 0.9999990421087961, iteration: 29861
loss: 1.001442790031433,grad_norm: 0.9999989049001354, iteration: 29862
loss: 1.0435479879379272,grad_norm: 0.8338921111299931, iteration: 29863
loss: 1.070993423461914,grad_norm: 0.999999511334397, iteration: 29864
loss: 0.9849472641944885,grad_norm: 0.9999994995014602, iteration: 29865
loss: 1.0581189393997192,grad_norm: 0.9999996257050159, iteration: 29866
loss: 1.0122255086898804,grad_norm: 0.9999997942443252, iteration: 29867
loss: 1.0461969375610352,grad_norm: 0.9999995928682937, iteration: 29868
loss: 1.0116760730743408,grad_norm: 0.9999993499706579, iteration: 29869
loss: 1.0115301609039307,grad_norm: 0.9846707957707633, iteration: 29870
loss: 1.0507750511169434,grad_norm: 0.9999999154438509, iteration: 29871
loss: 0.9937579035758972,grad_norm: 0.9999991003350621, iteration: 29872
loss: 1.0593570470809937,grad_norm: 0.9999997028629017, iteration: 29873
loss: 1.0699489116668701,grad_norm: 0.9999993646992801, iteration: 29874
loss: 1.0328350067138672,grad_norm: 0.9999990892598659, iteration: 29875
loss: 1.030105471611023,grad_norm: 0.99999927859026, iteration: 29876
loss: 1.0084317922592163,grad_norm: 0.9999990296764331, iteration: 29877
loss: 0.9931633472442627,grad_norm: 0.8729800182513799, iteration: 29878
loss: 1.0038065910339355,grad_norm: 0.9999992285331981, iteration: 29879
loss: 0.9965362548828125,grad_norm: 0.8851809283528127, iteration: 29880
loss: 1.0390757322311401,grad_norm: 0.9999994389774602, iteration: 29881
loss: 1.0421072244644165,grad_norm: 0.9999997850316995, iteration: 29882
loss: 1.0121647119522095,grad_norm: 0.9999990644770551, iteration: 29883
loss: 1.0076978206634521,grad_norm: 0.9170867468750962, iteration: 29884
loss: 0.9953398704528809,grad_norm: 0.9046767961446965, iteration: 29885
loss: 1.0313986539840698,grad_norm: 0.9999991103048308, iteration: 29886
loss: 0.9499866366386414,grad_norm: 0.9999991697625059, iteration: 29887
loss: 1.0735985040664673,grad_norm: 0.9999990821468716, iteration: 29888
loss: 1.040496587753296,grad_norm: 0.9999994831989049, iteration: 29889
loss: 1.0016939640045166,grad_norm: 0.9999992263808056, iteration: 29890
loss: 1.0568954944610596,grad_norm: 0.9999991273908606, iteration: 29891
loss: 1.1133641004562378,grad_norm: 0.9999994308789039, iteration: 29892
loss: 1.0144137144088745,grad_norm: 0.999999220226082, iteration: 29893
loss: 1.0396188497543335,grad_norm: 0.9690948695226825, iteration: 29894
loss: 0.974378228187561,grad_norm: 0.9999990954909805, iteration: 29895
loss: 0.97333824634552,grad_norm: 0.8619704689687425, iteration: 29896
loss: 0.9907169342041016,grad_norm: 0.953928523832113, iteration: 29897
loss: 0.9984193444252014,grad_norm: 0.999999091447576, iteration: 29898
loss: 0.9915016293525696,grad_norm: 0.8025352951792175, iteration: 29899
loss: 1.0228337049484253,grad_norm: 0.9999997015121336, iteration: 29900
loss: 1.0256128311157227,grad_norm: 0.9999989363943018, iteration: 29901
loss: 0.9922018647193909,grad_norm: 0.9157924595798091, iteration: 29902
loss: 1.0292787551879883,grad_norm: 0.9232213080023534, iteration: 29903
loss: 0.9889602065086365,grad_norm: 0.9404486542997378, iteration: 29904
loss: 1.218111276626587,grad_norm: 0.9999997651536721, iteration: 29905
loss: 0.996035099029541,grad_norm: 0.999999138397412, iteration: 29906
loss: 1.20094633102417,grad_norm: 0.9999997032641368, iteration: 29907
loss: 1.0208853483200073,grad_norm: 0.9999990691210141, iteration: 29908
loss: 0.9698380827903748,grad_norm: 0.8755615729736265, iteration: 29909
loss: 1.1053723096847534,grad_norm: 0.9999995565699644, iteration: 29910
loss: 1.0096726417541504,grad_norm: 0.8269652355374575, iteration: 29911
loss: 1.0431147813796997,grad_norm: 0.9999991612769659, iteration: 29912
loss: 1.0523650646209717,grad_norm: 0.9999997102415257, iteration: 29913
loss: 0.9816117286682129,grad_norm: 0.9999990572475392, iteration: 29914
loss: 1.0370373725891113,grad_norm: 0.9999991125631991, iteration: 29915
loss: 1.0366994142532349,grad_norm: 0.9999991809125626, iteration: 29916
loss: 1.0394244194030762,grad_norm: 0.9999994647654762, iteration: 29917
loss: 0.9721415638923645,grad_norm: 0.9999991487905461, iteration: 29918
loss: 1.0170118808746338,grad_norm: 0.999999070472598, iteration: 29919
loss: 1.0729539394378662,grad_norm: 0.99999993127577, iteration: 29920
loss: 1.1314737796783447,grad_norm: 0.9999995259636236, iteration: 29921
loss: 1.0180660486221313,grad_norm: 0.9999991200723457, iteration: 29922
loss: 0.9731156229972839,grad_norm: 0.9999991786421906, iteration: 29923
loss: 1.047160029411316,grad_norm: 0.9401230836076075, iteration: 29924
loss: 0.9844794273376465,grad_norm: 0.9687247426077625, iteration: 29925
loss: 0.9916766285896301,grad_norm: 0.9999997616918566, iteration: 29926
loss: 1.0659689903259277,grad_norm: 0.9999991536437082, iteration: 29927
loss: 1.017304539680481,grad_norm: 0.999999084071027, iteration: 29928
loss: 1.0948766469955444,grad_norm: 0.9999995277932942, iteration: 29929
loss: 1.014549970626831,grad_norm: 0.999999224464348, iteration: 29930
loss: 0.9778187870979309,grad_norm: 0.9999991458389419, iteration: 29931
loss: 1.0314254760742188,grad_norm: 0.8846443332225997, iteration: 29932
loss: 0.9895117282867432,grad_norm: 0.9999990957344695, iteration: 29933
loss: 1.023974895477295,grad_norm: 0.9999990541869961, iteration: 29934
loss: 1.051210641860962,grad_norm: 0.953325547669464, iteration: 29935
loss: 1.0361744165420532,grad_norm: 0.9999990495587435, iteration: 29936
loss: 1.0412284135818481,grad_norm: 0.9999992300469772, iteration: 29937
loss: 1.1052062511444092,grad_norm: 0.9999997983881939, iteration: 29938
loss: 1.017316222190857,grad_norm: 0.8989093581518219, iteration: 29939
loss: 0.9864924550056458,grad_norm: 0.9999993276690053, iteration: 29940
loss: 0.9704912304878235,grad_norm: 0.9999992265368901, iteration: 29941
loss: 0.9804152250289917,grad_norm: 0.9999991633686663, iteration: 29942
loss: 1.0269299745559692,grad_norm: 0.9203258415358986, iteration: 29943
loss: 1.0868891477584839,grad_norm: 0.9999992645809611, iteration: 29944
loss: 1.0570529699325562,grad_norm: 0.9999991775700323, iteration: 29945
loss: 1.0013453960418701,grad_norm: 0.9999990744666508, iteration: 29946
loss: 0.9826085567474365,grad_norm: 0.9775198285935699, iteration: 29947
loss: 0.9963893890380859,grad_norm: 0.9999996444752989, iteration: 29948
loss: 1.011208176612854,grad_norm: 0.999999104402848, iteration: 29949
loss: 0.9799573421478271,grad_norm: 0.9999991515538919, iteration: 29950
loss: 1.0168571472167969,grad_norm: 0.999999094172812, iteration: 29951
loss: 1.0055466890335083,grad_norm: 0.8687832888802923, iteration: 29952
loss: 0.9743151068687439,grad_norm: 0.9999993065758733, iteration: 29953
loss: 1.0415525436401367,grad_norm: 0.9999992312570174, iteration: 29954
loss: 1.0143970251083374,grad_norm: 0.9905367801293519, iteration: 29955
loss: 1.0321087837219238,grad_norm: 0.9870903419290498, iteration: 29956
loss: 1.0315438508987427,grad_norm: 0.9738579153188942, iteration: 29957
loss: 1.0015851259231567,grad_norm: 0.9340745793408254, iteration: 29958
loss: 1.024132251739502,grad_norm: 0.9999990418846717, iteration: 29959
loss: 0.9729923605918884,grad_norm: 0.9299185704423678, iteration: 29960
loss: 0.9738313555717468,grad_norm: 0.8685408838669146, iteration: 29961
loss: 1.0418721437454224,grad_norm: 0.9038053816717166, iteration: 29962
loss: 1.0090599060058594,grad_norm: 0.9999992163059268, iteration: 29963
loss: 0.9917762875556946,grad_norm: 0.9785953989625082, iteration: 29964
loss: 0.9922506213188171,grad_norm: 0.9196226007591448, iteration: 29965
loss: 1.0280357599258423,grad_norm: 0.9999991411490601, iteration: 29966
loss: 0.9966514706611633,grad_norm: 0.9999990563549701, iteration: 29967
loss: 1.026202917098999,grad_norm: 0.9999990583254904, iteration: 29968
loss: 1.0898256301879883,grad_norm: 0.9999992024195359, iteration: 29969
loss: 0.9883154630661011,grad_norm: 0.8502596748320398, iteration: 29970
loss: 1.0264699459075928,grad_norm: 0.8922720218498372, iteration: 29971
loss: 1.0407785177230835,grad_norm: 0.9999991575438001, iteration: 29972
loss: 1.0121347904205322,grad_norm: 0.9999991322817773, iteration: 29973
loss: 1.0090380907058716,grad_norm: 0.8389299493072147, iteration: 29974
loss: 0.9894699454307556,grad_norm: 0.9632215289834561, iteration: 29975
loss: 0.9816456437110901,grad_norm: 0.9999991652176153, iteration: 29976
loss: 1.0399144887924194,grad_norm: 0.9999989682479513, iteration: 29977
loss: 0.9829257130622864,grad_norm: 0.9765747367724111, iteration: 29978
loss: 0.991226851940155,grad_norm: 0.9999995024513758, iteration: 29979
loss: 1.0178967714309692,grad_norm: 0.9999993414756944, iteration: 29980
loss: 1.0269747972488403,grad_norm: 0.9999991904495603, iteration: 29981
loss: 0.9923300743103027,grad_norm: 0.9999989905443821, iteration: 29982
loss: 1.0160667896270752,grad_norm: 0.9999992929747527, iteration: 29983
loss: 0.9907937049865723,grad_norm: 0.9999991832725071, iteration: 29984
loss: 1.131904125213623,grad_norm: 0.9999990906222672, iteration: 29985
loss: 1.009939432144165,grad_norm: 0.8304315328286418, iteration: 29986
loss: 1.122649073600769,grad_norm: 0.9999998815044135, iteration: 29987
loss: 1.2166345119476318,grad_norm: 0.9999994871665973, iteration: 29988
loss: 0.9941499829292297,grad_norm: 0.9833839898202846, iteration: 29989
loss: 1.026412844657898,grad_norm: 0.9505854405419386, iteration: 29990
loss: 1.007351040840149,grad_norm: 0.875492467666395, iteration: 29991
loss: 1.0405254364013672,grad_norm: 1.0000000015681059, iteration: 29992
loss: 1.0124809741973877,grad_norm: 0.9402096840464674, iteration: 29993
loss: 0.9834195971488953,grad_norm: 0.9642076031058368, iteration: 29994
loss: 1.048421859741211,grad_norm: 0.9999993241468751, iteration: 29995
loss: 1.0859946012496948,grad_norm: 0.9999997763861667, iteration: 29996
loss: 1.028723120689392,grad_norm: 0.9999997370734156, iteration: 29997
loss: 1.0078010559082031,grad_norm: 0.8217018724269717, iteration: 29998
loss: 0.9809626340866089,grad_norm: 0.9532911108859302, iteration: 29999
loss: 0.9504597783088684,grad_norm: 0.989001105322766, iteration: 30000
Evaluating at step 30000
{'val': 0.9991247896105051, 'test': 2.7374209923485218}
loss: 1.046133041381836,grad_norm: 0.9999997044318439, iteration: 30001
loss: 1.0030466318130493,grad_norm: 0.9999990589126792, iteration: 30002
loss: 1.0047328472137451,grad_norm: 0.974687206630158, iteration: 30003
loss: 1.1086546182632446,grad_norm: 0.9999997459843694, iteration: 30004
loss: 1.021275520324707,grad_norm: 0.9999992891744138, iteration: 30005
loss: 0.9802923202514648,grad_norm: 0.9999992009123593, iteration: 30006
loss: 1.0110445022583008,grad_norm: 0.9999993533671394, iteration: 30007
loss: 0.9345685243606567,grad_norm: 0.7780783095554469, iteration: 30008
loss: 1.0046662092208862,grad_norm: 0.9999992846674013, iteration: 30009
loss: 1.0211570262908936,grad_norm: 0.9174291233302865, iteration: 30010
loss: 1.018644094467163,grad_norm: 0.9151176713802327, iteration: 30011
loss: 0.9938012957572937,grad_norm: 0.9999994596809358, iteration: 30012
loss: 1.0474798679351807,grad_norm: 0.9999994213956478, iteration: 30013
loss: 1.0534310340881348,grad_norm: 0.9999989957066033, iteration: 30014
loss: 1.0105805397033691,grad_norm: 0.9820426425472392, iteration: 30015
loss: 1.060367226600647,grad_norm: 0.9999991189240844, iteration: 30016
loss: 0.9921300411224365,grad_norm: 0.9999993002452001, iteration: 30017
loss: 0.9872494339942932,grad_norm: 0.999999508992658, iteration: 30018
loss: 1.0054514408111572,grad_norm: 0.9999992941050725, iteration: 30019
loss: 1.0345309972763062,grad_norm: 0.9999991498270628, iteration: 30020
loss: 1.0194019079208374,grad_norm: 0.9507337052967539, iteration: 30021
loss: 1.0783530473709106,grad_norm: 0.999999093764736, iteration: 30022
loss: 1.0027884244918823,grad_norm: 0.9999994866449026, iteration: 30023
loss: 0.9938824772834778,grad_norm: 0.9999993535625719, iteration: 30024
loss: 0.9874510169029236,grad_norm: 0.7747758579379787, iteration: 30025
loss: 1.004480004310608,grad_norm: 0.9999990339314266, iteration: 30026
loss: 1.0209213495254517,grad_norm: 0.9999992805045201, iteration: 30027
loss: 1.0133264064788818,grad_norm: 0.9999991637912, iteration: 30028
loss: 1.056203842163086,grad_norm: 0.9999994136366643, iteration: 30029
loss: 1.036579966545105,grad_norm: 0.9999996000950759, iteration: 30030
loss: 0.990431547164917,grad_norm: 0.9999994993372143, iteration: 30031
loss: 1.0221065282821655,grad_norm: 0.8568024199652169, iteration: 30032
loss: 0.9937068819999695,grad_norm: 0.9999991401740539, iteration: 30033
loss: 1.0009794235229492,grad_norm: 0.9494097372708049, iteration: 30034
loss: 1.0225529670715332,grad_norm: 0.9999993861282469, iteration: 30035
loss: 1.0820589065551758,grad_norm: 0.9999991700291911, iteration: 30036
loss: 1.034787893295288,grad_norm: 0.9426974389969579, iteration: 30037
loss: 0.9810324311256409,grad_norm: 0.9999990937786218, iteration: 30038
loss: 1.0311646461486816,grad_norm: 0.9999993492180298, iteration: 30039
loss: 1.0073713064193726,grad_norm: 0.999999051795416, iteration: 30040
loss: 1.022767424583435,grad_norm: 0.999999093217121, iteration: 30041
loss: 1.1221402883529663,grad_norm: 0.9999992748748291, iteration: 30042
loss: 1.0396363735198975,grad_norm: 0.9999990672925623, iteration: 30043
loss: 1.0269566774368286,grad_norm: 0.999999534724879, iteration: 30044
loss: 1.0198378562927246,grad_norm: 0.9674937433143506, iteration: 30045
loss: 0.995897114276886,grad_norm: 0.9698359248467737, iteration: 30046
loss: 1.0094420909881592,grad_norm: 0.8633427346617423, iteration: 30047
loss: 1.01436185836792,grad_norm: 0.9999992539321132, iteration: 30048
loss: 1.097809910774231,grad_norm: 0.9751057513226705, iteration: 30049
loss: 1.0045033693313599,grad_norm: 0.9305560525272443, iteration: 30050
loss: 0.9851998090744019,grad_norm: 0.8948664359809133, iteration: 30051
loss: 1.0002824068069458,grad_norm: 0.9051002131319076, iteration: 30052
loss: 1.0263376235961914,grad_norm: 0.9711076454328169, iteration: 30053
loss: 1.0915671586990356,grad_norm: 0.9999997274349136, iteration: 30054
loss: 0.9856358170509338,grad_norm: 0.9999994278480354, iteration: 30055
loss: 0.974415123462677,grad_norm: 0.9999991865408295, iteration: 30056
loss: 1.011847972869873,grad_norm: 0.9999990006653104, iteration: 30057
loss: 0.9869794249534607,grad_norm: 0.999999037336695, iteration: 30058
loss: 0.9900400042533875,grad_norm: 0.9999993581721704, iteration: 30059
loss: 1.0049318075180054,grad_norm: 0.9999994655459532, iteration: 30060
loss: 1.018162488937378,grad_norm: 0.9999990275805121, iteration: 30061
loss: 1.0324634313583374,grad_norm: 0.9999994065129256, iteration: 30062
loss: 1.0171452760696411,grad_norm: 0.9937911393242811, iteration: 30063
loss: 1.0396218299865723,grad_norm: 0.9999990989209872, iteration: 30064
loss: 1.0263886451721191,grad_norm: 0.9999991948427557, iteration: 30065
loss: 1.0133332014083862,grad_norm: 0.9114954519411445, iteration: 30066
loss: 1.0215368270874023,grad_norm: 0.9999994141201061, iteration: 30067
loss: 1.039970874786377,grad_norm: 0.9999990134800618, iteration: 30068
loss: 1.0042387247085571,grad_norm: 0.9999995186442604, iteration: 30069
loss: 1.016357660293579,grad_norm: 0.9999989753051434, iteration: 30070
loss: 1.0131902694702148,grad_norm: 0.9433025724670792, iteration: 30071
loss: 1.0187450647354126,grad_norm: 0.9695549656440839, iteration: 30072
loss: 1.0950483083724976,grad_norm: 0.9999995921154053, iteration: 30073
loss: 1.0675159692764282,grad_norm: 0.999999832885296, iteration: 30074
loss: 1.045382022857666,grad_norm: 0.9999994107506632, iteration: 30075
loss: 1.0541437864303589,grad_norm: 0.9718388334556858, iteration: 30076
loss: 0.9954698085784912,grad_norm: 0.8846076306098974, iteration: 30077
loss: 1.0357855558395386,grad_norm: 0.9999994615908381, iteration: 30078
loss: 1.036222219467163,grad_norm: 0.9797111874700468, iteration: 30079
loss: 1.0205354690551758,grad_norm: 0.999999712715364, iteration: 30080
loss: 0.998243510723114,grad_norm: 0.9999994586178915, iteration: 30081
loss: 1.0214489698410034,grad_norm: 0.9999992423921104, iteration: 30082
loss: 1.0375430583953857,grad_norm: 0.9999990471606275, iteration: 30083
loss: 1.0177979469299316,grad_norm: 0.9999990151537428, iteration: 30084
loss: 1.0542110204696655,grad_norm: 0.9999995341625245, iteration: 30085
loss: 1.0157872438430786,grad_norm: 0.9999992756838089, iteration: 30086
loss: 1.0025173425674438,grad_norm: 0.9999990661932735, iteration: 30087
loss: 1.0159138441085815,grad_norm: 0.9999991523614876, iteration: 30088
loss: 0.9910105466842651,grad_norm: 0.9999989491680974, iteration: 30089
loss: 1.0475908517837524,grad_norm: 0.9999994865920577, iteration: 30090
loss: 1.0151028633117676,grad_norm: 0.9595495476719454, iteration: 30091
loss: 0.9910830855369568,grad_norm: 0.9093815279524327, iteration: 30092
loss: 1.0560499429702759,grad_norm: 0.9999994749672656, iteration: 30093
loss: 0.9909800887107849,grad_norm: 0.9999994108823702, iteration: 30094
loss: 1.010176658630371,grad_norm: 0.999999114458419, iteration: 30095
loss: 0.9802758097648621,grad_norm: 0.9999991140940546, iteration: 30096
loss: 1.0159108638763428,grad_norm: 0.999999882401931, iteration: 30097
loss: 1.000276803970337,grad_norm: 0.9760377922947964, iteration: 30098
loss: 1.0018229484558105,grad_norm: 0.9519764984800768, iteration: 30099
loss: 0.9613727927207947,grad_norm: 0.9999991784797299, iteration: 30100
loss: 1.0266785621643066,grad_norm: 0.9999991414901935, iteration: 30101
loss: 1.0097287893295288,grad_norm: 0.9999990098499172, iteration: 30102
loss: 1.0303088426589966,grad_norm: 0.9999995424426753, iteration: 30103
loss: 1.0248650312423706,grad_norm: 0.9999993870815382, iteration: 30104
loss: 0.9980512857437134,grad_norm: 0.9999990232272296, iteration: 30105
loss: 1.0087047815322876,grad_norm: 0.9999990351205379, iteration: 30106
loss: 0.9731990098953247,grad_norm: 0.9999991237511, iteration: 30107
loss: 1.0142277479171753,grad_norm: 0.9412804077079503, iteration: 30108
loss: 1.0436235666275024,grad_norm: 0.9111614797007549, iteration: 30109
loss: 1.0273139476776123,grad_norm: 0.9558719945959088, iteration: 30110
loss: 1.0333389043807983,grad_norm: 0.9999991338731754, iteration: 30111
loss: 0.9973756670951843,grad_norm: 0.9999993454201724, iteration: 30112
loss: 1.0261965990066528,grad_norm: 0.9999991885754252, iteration: 30113
loss: 1.076841950416565,grad_norm: 0.9999995965583942, iteration: 30114
loss: 1.032589077949524,grad_norm: 0.9999992795481945, iteration: 30115
loss: 1.0039576292037964,grad_norm: 0.9895884605350931, iteration: 30116
loss: 1.0643976926803589,grad_norm: 0.9999996890485178, iteration: 30117
loss: 1.0027590990066528,grad_norm: 0.8191587589172169, iteration: 30118
loss: 1.0264382362365723,grad_norm: 0.9548853938554075, iteration: 30119
loss: 0.9890817403793335,grad_norm: 0.8814385042028123, iteration: 30120
loss: 1.01442551612854,grad_norm: 0.9999994310712824, iteration: 30121
loss: 1.0270745754241943,grad_norm: 0.9999992339697129, iteration: 30122
loss: 1.0349448919296265,grad_norm: 0.9999991321627751, iteration: 30123
loss: 1.058006763458252,grad_norm: 0.9999988932231448, iteration: 30124
loss: 1.0328645706176758,grad_norm: 0.8633933916915643, iteration: 30125
loss: 0.9797004461288452,grad_norm: 0.9999994353140884, iteration: 30126
loss: 1.0547698736190796,grad_norm: 0.9999992920680627, iteration: 30127
loss: 1.0076957941055298,grad_norm: 0.9999991146985626, iteration: 30128
loss: 0.9883993864059448,grad_norm: 0.9999990805559908, iteration: 30129
loss: 1.01280677318573,grad_norm: 0.9999994006224978, iteration: 30130
loss: 1.024863362312317,grad_norm: 0.9999996844939212, iteration: 30131
loss: 1.047507643699646,grad_norm: 0.9999989648786004, iteration: 30132
loss: 0.9810552000999451,grad_norm: 0.8783303033233757, iteration: 30133
loss: 1.021644949913025,grad_norm: 0.9279268495216003, iteration: 30134
loss: 0.9785518050193787,grad_norm: 0.9143400102734472, iteration: 30135
loss: 1.0120939016342163,grad_norm: 0.9999995404934116, iteration: 30136
loss: 1.045640230178833,grad_norm: 0.9999991416929179, iteration: 30137
loss: 0.9972841143608093,grad_norm: 0.906736052430446, iteration: 30138
loss: 0.9741324186325073,grad_norm: 0.9999993746893828, iteration: 30139
loss: 1.012474536895752,grad_norm: 0.894566851604309, iteration: 30140
loss: 1.0273404121398926,grad_norm: 0.9999990529648537, iteration: 30141
loss: 0.9886757135391235,grad_norm: 0.9999990378917726, iteration: 30142
loss: 0.9842099547386169,grad_norm: 0.9999994541903461, iteration: 30143
loss: 1.0391923189163208,grad_norm: 0.999999286199717, iteration: 30144
loss: 0.9960514903068542,grad_norm: 0.9999991276547788, iteration: 30145
loss: 0.9587967991828918,grad_norm: 0.9241824468810008, iteration: 30146
loss: 1.014123797416687,grad_norm: 0.999999101896761, iteration: 30147
loss: 1.0356394052505493,grad_norm: 0.9999993223140494, iteration: 30148
loss: 0.9770828485488892,grad_norm: 0.9999990731871153, iteration: 30149
loss: 1.0011111497879028,grad_norm: 0.9058045707039593, iteration: 30150
loss: 0.9896945357322693,grad_norm: 0.8749297424387691, iteration: 30151
loss: 1.0169789791107178,grad_norm: 0.9999993169761012, iteration: 30152
loss: 0.9760419130325317,grad_norm: 0.9999991611657011, iteration: 30153
loss: 1.0173077583312988,grad_norm: 0.9999992244179998, iteration: 30154
loss: 1.0002630949020386,grad_norm: 0.9999997960688924, iteration: 30155
loss: 0.9738832116127014,grad_norm: 0.9999992922816199, iteration: 30156
loss: 1.0318942070007324,grad_norm: 0.999999621987516, iteration: 30157
loss: 1.0164226293563843,grad_norm: 0.9511961703166402, iteration: 30158
loss: 1.1268893480300903,grad_norm: 0.9999992339124597, iteration: 30159
loss: 1.0423048734664917,grad_norm: 0.9999992596624392, iteration: 30160
loss: 0.9798945784568787,grad_norm: 0.8353791672542965, iteration: 30161
loss: 1.0268425941467285,grad_norm: 0.9999990815771553, iteration: 30162
loss: 1.0361974239349365,grad_norm: 0.9999998465725771, iteration: 30163
loss: 1.0153611898422241,grad_norm: 0.9999992833568494, iteration: 30164
loss: 0.9804802536964417,grad_norm: 0.9612493486363403, iteration: 30165
loss: 1.0521173477172852,grad_norm: 0.9999990342348372, iteration: 30166
loss: 1.01271653175354,grad_norm: 0.8872770320560605, iteration: 30167
loss: 0.9766904711723328,grad_norm: 0.9165052705030636, iteration: 30168
loss: 1.0211231708526611,grad_norm: 0.9999997131818691, iteration: 30169
loss: 1.0377062559127808,grad_norm: 0.999999092215086, iteration: 30170
loss: 0.9875271320343018,grad_norm: 0.9999990752804523, iteration: 30171
loss: 1.0090863704681396,grad_norm: 0.9999992137900587, iteration: 30172
loss: 1.0205929279327393,grad_norm: 0.9999992820539214, iteration: 30173
loss: 1.0230780839920044,grad_norm: 0.9999992384823787, iteration: 30174
loss: 1.0129190683364868,grad_norm: 0.9999992644059321, iteration: 30175
loss: 0.996871292591095,grad_norm: 0.847032555599787, iteration: 30176
loss: 1.0518171787261963,grad_norm: 0.9999994347968543, iteration: 30177
loss: 0.993330717086792,grad_norm: 0.9445233249842284, iteration: 30178
loss: 1.018643856048584,grad_norm: 0.9999992325869791, iteration: 30179
loss: 1.0083534717559814,grad_norm: 0.999999585618883, iteration: 30180
loss: 0.9816150069236755,grad_norm: 0.7699400804463724, iteration: 30181
loss: 1.0202100276947021,grad_norm: 0.9999991024709532, iteration: 30182
loss: 1.017987847328186,grad_norm: 0.8302503130086205, iteration: 30183
loss: 1.0165084600448608,grad_norm: 0.999999953822742, iteration: 30184
loss: 0.9784975051879883,grad_norm: 0.999999049322145, iteration: 30185
loss: 1.00663161277771,grad_norm: 0.9999990174498758, iteration: 30186
loss: 1.031651258468628,grad_norm: 0.9999989594982523, iteration: 30187
loss: 0.9751217365264893,grad_norm: 0.8628096828560855, iteration: 30188
loss: 1.0003080368041992,grad_norm: 0.8123641674913656, iteration: 30189
loss: 0.9895555973052979,grad_norm: 0.9142046435845697, iteration: 30190
loss: 0.9965439438819885,grad_norm: 0.9461021020246145, iteration: 30191
loss: 1.0364019870758057,grad_norm: 0.9924638653202827, iteration: 30192
loss: 1.001596212387085,grad_norm: 0.8894299430394349, iteration: 30193
loss: 0.9850492477416992,grad_norm: 0.9999991561278619, iteration: 30194
loss: 0.9809048175811768,grad_norm: 0.9501372132731866, iteration: 30195
loss: 0.9846670031547546,grad_norm: 0.9999992067173248, iteration: 30196
loss: 0.9931562542915344,grad_norm: 0.8045900914575065, iteration: 30197
loss: 0.9938443303108215,grad_norm: 0.9999993778949624, iteration: 30198
loss: 0.9983373284339905,grad_norm: 0.9285222391786194, iteration: 30199
loss: 1.010433316230774,grad_norm: 0.9766609590168299, iteration: 30200
loss: 1.0560420751571655,grad_norm: 0.9999990185316174, iteration: 30201
loss: 0.9786913394927979,grad_norm: 0.9999992551184799, iteration: 30202
loss: 0.9731622338294983,grad_norm: 0.9918699255075094, iteration: 30203
loss: 1.0415778160095215,grad_norm: 0.9999989808672941, iteration: 30204
loss: 0.9762749075889587,grad_norm: 0.9999995979057061, iteration: 30205
loss: 0.9583260416984558,grad_norm: 0.9455960263501499, iteration: 30206
loss: 1.029577374458313,grad_norm: 0.991086520035481, iteration: 30207
loss: 1.0336904525756836,grad_norm: 0.9999990116180965, iteration: 30208
loss: 1.0558489561080933,grad_norm: 0.9999993443036673, iteration: 30209
loss: 1.0343862771987915,grad_norm: 0.9999994552464877, iteration: 30210
loss: 1.0014756917953491,grad_norm: 0.8939717428860343, iteration: 30211
loss: 1.0947904586791992,grad_norm: 0.9999999419747148, iteration: 30212
loss: 1.0408433675765991,grad_norm: 0.9999998144434471, iteration: 30213
loss: 1.0942250490188599,grad_norm: 0.999999567000649, iteration: 30214
loss: 1.0028355121612549,grad_norm: 0.9220001814989972, iteration: 30215
loss: 0.9928764700889587,grad_norm: 0.9486994874214946, iteration: 30216
loss: 1.0343595743179321,grad_norm: 0.9843141070704494, iteration: 30217
loss: 0.9962190389633179,grad_norm: 0.8071497020597562, iteration: 30218
loss: 1.0048363208770752,grad_norm: 0.9999991290494515, iteration: 30219
loss: 1.0161021947860718,grad_norm: 0.8381973523692713, iteration: 30220
loss: 1.0013444423675537,grad_norm: 0.9550760634877251, iteration: 30221
loss: 1.0292054414749146,grad_norm: 0.9999990484033737, iteration: 30222
loss: 1.107445478439331,grad_norm: 0.9999992048773895, iteration: 30223
loss: 1.001319169998169,grad_norm: 0.999999088600816, iteration: 30224
loss: 0.9991281628608704,grad_norm: 0.9999998310723645, iteration: 30225
loss: 1.2020974159240723,grad_norm: 0.9999996574744359, iteration: 30226
loss: 1.2187083959579468,grad_norm: 0.9999995092956597, iteration: 30227
loss: 1.4101507663726807,grad_norm: 0.9999996701634147, iteration: 30228
loss: 1.3868024349212646,grad_norm: 0.9999998747600976, iteration: 30229
loss: 1.3169655799865723,grad_norm: 0.9999995383596846, iteration: 30230
loss: 1.5786594152450562,grad_norm: 0.9999996903215169, iteration: 30231
loss: 1.9425327777862549,grad_norm: 0.9999998407326974, iteration: 30232
loss: 1.8907253742218018,grad_norm: 0.9999998944667977, iteration: 30233
loss: 2.063850164413452,grad_norm: 0.9999998030941148, iteration: 30234
loss: 1.9562288522720337,grad_norm: 0.9999998386753068, iteration: 30235
loss: 1.5166910886764526,grad_norm: 0.999999619239665, iteration: 30236
loss: 2.1409707069396973,grad_norm: 0.9999997542424814, iteration: 30237
loss: 2.3340070247650146,grad_norm: 0.9999998436569523, iteration: 30238
loss: 2.099729061126709,grad_norm: 0.999999848373291, iteration: 30239
loss: 1.4370355606079102,grad_norm: 0.9999996687375635, iteration: 30240
loss: 1.5696163177490234,grad_norm: 0.9999997767670533, iteration: 30241
loss: 1.4335294961929321,grad_norm: 0.9999996876606696, iteration: 30242
loss: 1.5209417343139648,grad_norm: 0.9999997874026919, iteration: 30243
loss: 1.6369787454605103,grad_norm: 0.9999999443901717, iteration: 30244
loss: 1.2079811096191406,grad_norm: 0.9999993465434778, iteration: 30245
loss: 1.4198659658432007,grad_norm: 0.9999998598053509, iteration: 30246
loss: 1.1073613166809082,grad_norm: 0.9999999891881005, iteration: 30247
loss: 1.0597989559173584,grad_norm: 0.9999997059932848, iteration: 30248
loss: 1.1740946769714355,grad_norm: 0.9999997762678257, iteration: 30249
loss: 1.0659102201461792,grad_norm: 0.9999995480955741, iteration: 30250
loss: 1.0558112859725952,grad_norm: 0.9999998868791652, iteration: 30251
loss: 1.0598418712615967,grad_norm: 0.9999992856456381, iteration: 30252
loss: 1.0285884141921997,grad_norm: 0.9999993461526114, iteration: 30253
loss: 1.086207389831543,grad_norm: 0.9999989995176328, iteration: 30254
loss: 1.0488193035125732,grad_norm: 0.9999994216299464, iteration: 30255
loss: 1.0443785190582275,grad_norm: 0.9999996745792685, iteration: 30256
loss: 1.0167196989059448,grad_norm: 0.9999993450780961, iteration: 30257
loss: 1.1092463731765747,grad_norm: 0.9999993365682707, iteration: 30258
loss: 1.051679253578186,grad_norm: 0.999999460622331, iteration: 30259
loss: 1.007176160812378,grad_norm: 0.9999990131308055, iteration: 30260
loss: 1.0377423763275146,grad_norm: 0.9999995297926231, iteration: 30261
loss: 1.0148870944976807,grad_norm: 0.9999991327238589, iteration: 30262
loss: 1.0433225631713867,grad_norm: 0.9999991905285049, iteration: 30263
loss: 1.0697224140167236,grad_norm: 0.9999995561211499, iteration: 30264
loss: 1.0865793228149414,grad_norm: 0.9999997474937452, iteration: 30265
loss: 1.0148180723190308,grad_norm: 0.9999994170682245, iteration: 30266
loss: 1.02362859249115,grad_norm: 0.9999994349686856, iteration: 30267
loss: 0.9951333999633789,grad_norm: 0.9999989659500375, iteration: 30268
loss: 0.9977115392684937,grad_norm: 0.9999991778268608, iteration: 30269
loss: 0.9644089341163635,grad_norm: 0.9999993688009186, iteration: 30270
loss: 1.0190222263336182,grad_norm: 0.9648605347843895, iteration: 30271
loss: 1.004252552986145,grad_norm: 0.9999990301721399, iteration: 30272
loss: 1.2304459810256958,grad_norm: 0.9999992584018327, iteration: 30273
loss: 1.0186830759048462,grad_norm: 0.9999994597352341, iteration: 30274
loss: 0.9947524666786194,grad_norm: 0.8919215824581367, iteration: 30275
loss: 1.0668121576309204,grad_norm: 0.9999998023714429, iteration: 30276
loss: 1.0028877258300781,grad_norm: 0.9999990377201936, iteration: 30277
loss: 1.1039128303527832,grad_norm: 0.9999999040432983, iteration: 30278
loss: 1.006782054901123,grad_norm: 0.9815022560344743, iteration: 30279
loss: 1.0444567203521729,grad_norm: 0.9999997263001907, iteration: 30280
loss: 0.996704638004303,grad_norm: 0.9999992817067686, iteration: 30281
loss: 1.03822922706604,grad_norm: 0.9999996412775622, iteration: 30282
loss: 0.9943909049034119,grad_norm: 0.953556088223685, iteration: 30283
loss: 0.9895902276039124,grad_norm: 0.9999991227729718, iteration: 30284
loss: 1.0369069576263428,grad_norm: 0.9999992095578906, iteration: 30285
loss: 0.9855466485023499,grad_norm: 0.9911601320069394, iteration: 30286
loss: 1.0236694812774658,grad_norm: 0.9999994183489996, iteration: 30287
loss: 1.0120596885681152,grad_norm: 0.8749247248284613, iteration: 30288
loss: 1.1098179817199707,grad_norm: 0.9999998843165862, iteration: 30289
loss: 1.0860507488250732,grad_norm: 0.9999995580974081, iteration: 30290
loss: 1.079886794090271,grad_norm: 0.9999995133269486, iteration: 30291
loss: 1.0471190214157104,grad_norm: 0.9999993214521635, iteration: 30292
loss: 1.093902349472046,grad_norm: 0.9999999572622963, iteration: 30293
loss: 1.0474576950073242,grad_norm: 0.9999993903010704, iteration: 30294
loss: 1.0140626430511475,grad_norm: 0.9999990529408483, iteration: 30295
loss: 0.991936206817627,grad_norm: 0.9999994462377284, iteration: 30296
loss: 0.9842157363891602,grad_norm: 0.9999992162007995, iteration: 30297
loss: 1.0284440517425537,grad_norm: 0.9999995558660364, iteration: 30298
loss: 0.9679998755455017,grad_norm: 0.9999996763013874, iteration: 30299
loss: 1.0056344270706177,grad_norm: 0.9999992802756088, iteration: 30300
loss: 1.0298570394515991,grad_norm: 0.9999992178751825, iteration: 30301
loss: 1.1157662868499756,grad_norm: 0.9999994300197931, iteration: 30302
loss: 1.0153183937072754,grad_norm: 0.9999992625063295, iteration: 30303
loss: 0.9854609966278076,grad_norm: 0.9999990584179231, iteration: 30304
loss: 1.0389204025268555,grad_norm: 0.966209944873757, iteration: 30305
loss: 1.0249813795089722,grad_norm: 0.9999991860310107, iteration: 30306
loss: 0.9881332516670227,grad_norm: 0.9999990128046438, iteration: 30307
loss: 0.9864662289619446,grad_norm: 0.9999991832380185, iteration: 30308
loss: 0.9692136645317078,grad_norm: 0.8775582378356036, iteration: 30309
loss: 1.032234787940979,grad_norm: 0.8054532250443088, iteration: 30310
loss: 1.059038758277893,grad_norm: 0.9999995118773484, iteration: 30311
loss: 0.9985058903694153,grad_norm: 0.9999992101411856, iteration: 30312
loss: 0.9970759153366089,grad_norm: 0.9999996905184123, iteration: 30313
loss: 1.0161957740783691,grad_norm: 0.8452965746857475, iteration: 30314
loss: 1.0076156854629517,grad_norm: 0.9999991553768686, iteration: 30315
loss: 1.0017006397247314,grad_norm: 0.9560532118130841, iteration: 30316
loss: 1.0325415134429932,grad_norm: 0.9999991613940318, iteration: 30317
loss: 1.0384740829467773,grad_norm: 0.9816890610975822, iteration: 30318
loss: 1.0555495023727417,grad_norm: 0.9999993015684185, iteration: 30319
loss: 1.0058667659759521,grad_norm: 0.9379488190590876, iteration: 30320
loss: 1.084079384803772,grad_norm: 0.9999992744574344, iteration: 30321
loss: 1.0271871089935303,grad_norm: 0.9999997821620993, iteration: 30322
loss: 0.980827271938324,grad_norm: 0.9999992497695737, iteration: 30323
loss: 1.0584830045700073,grad_norm: 0.9999991250778897, iteration: 30324
loss: 1.0353360176086426,grad_norm: 0.9999995288113291, iteration: 30325
loss: 0.9996052384376526,grad_norm: 0.9999991211169893, iteration: 30326
loss: 1.0251389741897583,grad_norm: 0.9999991884892564, iteration: 30327
loss: 1.0296111106872559,grad_norm: 0.9999992096586714, iteration: 30328
loss: 1.0087529420852661,grad_norm: 0.9776623056084531, iteration: 30329
loss: 1.0308940410614014,grad_norm: 0.9999990735703145, iteration: 30330
loss: 1.0257066488265991,grad_norm: 0.9427048111961318, iteration: 30331
loss: 1.0521304607391357,grad_norm: 0.999999381882984, iteration: 30332
loss: 0.9819132089614868,grad_norm: 0.9999991222340037, iteration: 30333
loss: 1.0192562341690063,grad_norm: 0.9999991957361708, iteration: 30334
loss: 0.9846039414405823,grad_norm: 0.9999998816632992, iteration: 30335
loss: 0.9868052005767822,grad_norm: 0.8550908313207312, iteration: 30336
loss: 1.0396137237548828,grad_norm: 0.9999991545266206, iteration: 30337
loss: 0.970114529132843,grad_norm: 0.9778185932580106, iteration: 30338
loss: 1.0633114576339722,grad_norm: 0.9999991907685473, iteration: 30339
loss: 1.0284628868103027,grad_norm: 0.9999990969585681, iteration: 30340
loss: 1.0231047868728638,grad_norm: 0.9999992382870784, iteration: 30341
loss: 0.9890428781509399,grad_norm: 0.9999991987125882, iteration: 30342
loss: 0.9910099506378174,grad_norm: 0.8658829588052669, iteration: 30343
loss: 1.0106136798858643,grad_norm: 0.9999993192647544, iteration: 30344
loss: 1.0368590354919434,grad_norm: 0.9999994435213488, iteration: 30345
loss: 1.088938593864441,grad_norm: 0.9999990850633944, iteration: 30346
loss: 1.0103113651275635,grad_norm: 0.93724547710878, iteration: 30347
loss: 1.1158751249313354,grad_norm: 0.9999993894376713, iteration: 30348
loss: 1.004558801651001,grad_norm: 0.9999991659898346, iteration: 30349
loss: 0.974025547504425,grad_norm: 0.9999992766017649, iteration: 30350
loss: 1.0193336009979248,grad_norm: 0.9999992015378444, iteration: 30351
loss: 1.0192480087280273,grad_norm: 0.9999992527110988, iteration: 30352
loss: 1.0027185678482056,grad_norm: 0.9999991396627507, iteration: 30353
loss: 1.0252734422683716,grad_norm: 0.9999993065958704, iteration: 30354
loss: 0.9698978662490845,grad_norm: 0.9619507982588483, iteration: 30355
loss: 1.0509986877441406,grad_norm: 0.9999992331846206, iteration: 30356
loss: 1.0219638347625732,grad_norm: 0.9280336657122743, iteration: 30357
loss: 0.9970406889915466,grad_norm: 0.9999991817223922, iteration: 30358
loss: 0.9947177767753601,grad_norm: 0.9999998423550892, iteration: 30359
loss: 1.0611915588378906,grad_norm: 0.9999990237721068, iteration: 30360
loss: 1.003333330154419,grad_norm: 0.9999991389483839, iteration: 30361
loss: 1.0387239456176758,grad_norm: 0.9999994811372355, iteration: 30362
loss: 1.0083881616592407,grad_norm: 0.9267254325701698, iteration: 30363
loss: 0.9935314059257507,grad_norm: 0.8963733425542574, iteration: 30364
loss: 1.0793167352676392,grad_norm: 0.9999996046122295, iteration: 30365
loss: 1.0001298189163208,grad_norm: 0.999999344509334, iteration: 30366
loss: 0.9753075838088989,grad_norm: 0.9247038645956548, iteration: 30367
loss: 0.987732470035553,grad_norm: 0.961736597534556, iteration: 30368
loss: 0.993992030620575,grad_norm: 0.9999993054748281, iteration: 30369
loss: 0.9862480759620667,grad_norm: 0.9215175564904969, iteration: 30370
loss: 0.9850409626960754,grad_norm: 0.9223477564726814, iteration: 30371
loss: 1.0359926223754883,grad_norm: 0.9999995441385577, iteration: 30372
loss: 1.0095614194869995,grad_norm: 0.9753547400233068, iteration: 30373
loss: 1.0269041061401367,grad_norm: 0.9999998361436794, iteration: 30374
loss: 1.0318528413772583,grad_norm: 0.9999990707253147, iteration: 30375
loss: 0.9929232001304626,grad_norm: 0.9189550669178138, iteration: 30376
loss: 1.0173112154006958,grad_norm: 0.9482166392792716, iteration: 30377
loss: 1.0023367404937744,grad_norm: 0.999999253332161, iteration: 30378
loss: 1.0597060918807983,grad_norm: 0.9999993965823547, iteration: 30379
loss: 1.038724422454834,grad_norm: 0.9999991650936851, iteration: 30380
loss: 0.9760613441467285,grad_norm: 0.9904309651405343, iteration: 30381
loss: 1.0077894926071167,grad_norm: 0.9999990965666405, iteration: 30382
loss: 1.0216139554977417,grad_norm: 0.9187940331594288, iteration: 30383
loss: 0.9489070177078247,grad_norm: 0.9999993155841296, iteration: 30384
loss: 1.0144891738891602,grad_norm: 0.9999991278898489, iteration: 30385
loss: 0.9893852472305298,grad_norm: 0.9999989839782277, iteration: 30386
loss: 1.0010141134262085,grad_norm: 0.9999994216316996, iteration: 30387
loss: 0.9938327670097351,grad_norm: 0.9999991322745491, iteration: 30388
loss: 1.0090023279190063,grad_norm: 0.9999991570689388, iteration: 30389
loss: 1.026710867881775,grad_norm: 0.8535974773904026, iteration: 30390
loss: 1.0430704355239868,grad_norm: 0.9999990292606205, iteration: 30391
loss: 1.0188789367675781,grad_norm: 0.8987659314029219, iteration: 30392
loss: 1.0364489555358887,grad_norm: 0.9999992505601366, iteration: 30393
loss: 1.015168309211731,grad_norm: 0.7295024874796393, iteration: 30394
loss: 0.9674981832504272,grad_norm: 0.9612869382186107, iteration: 30395
loss: 1.0265953540802002,grad_norm: 0.9999991342002102, iteration: 30396
loss: 1.0165493488311768,grad_norm: 0.8957533438273699, iteration: 30397
loss: 1.00192391872406,grad_norm: 0.999999344895736, iteration: 30398
loss: 0.995829701423645,grad_norm: 0.9999991046505191, iteration: 30399
loss: 1.0248119831085205,grad_norm: 0.9999995343512122, iteration: 30400
loss: 0.9852023124694824,grad_norm: 0.9999989822119865, iteration: 30401
loss: 1.0116032361984253,grad_norm: 0.9999991541612528, iteration: 30402
loss: 0.9909308552742004,grad_norm: 0.8525705079146673, iteration: 30403
loss: 1.0511748790740967,grad_norm: 0.9999991518301791, iteration: 30404
loss: 1.0425729751586914,grad_norm: 0.9999994537320498, iteration: 30405
loss: 1.0727241039276123,grad_norm: 0.9999996461538153, iteration: 30406
loss: 1.0176200866699219,grad_norm: 0.8377417914659251, iteration: 30407
loss: 1.0018789768218994,grad_norm: 0.9999991272250776, iteration: 30408
loss: 0.990492582321167,grad_norm: 0.9999992533961519, iteration: 30409
loss: 1.0254603624343872,grad_norm: 0.9999994060694305, iteration: 30410
loss: 1.001204252243042,grad_norm: 0.7912427854134739, iteration: 30411
loss: 1.0053187608718872,grad_norm: 0.9933262806412138, iteration: 30412
loss: 1.0612174272537231,grad_norm: 0.9999990852268491, iteration: 30413
loss: 1.0603690147399902,grad_norm: 0.999999579589697, iteration: 30414
loss: 0.9924387335777283,grad_norm: 0.9999997495603851, iteration: 30415
loss: 0.9939789772033691,grad_norm: 0.9999992102522969, iteration: 30416
loss: 1.0260286331176758,grad_norm: 0.99999933213616, iteration: 30417
loss: 1.0174955129623413,grad_norm: 0.8876019401633629, iteration: 30418
loss: 1.0103695392608643,grad_norm: 0.9999990712101315, iteration: 30419
loss: 1.093000054359436,grad_norm: 0.9999997906695506, iteration: 30420
loss: 0.992633581161499,grad_norm: 0.9372889986740142, iteration: 30421
loss: 1.012165904045105,grad_norm: 0.9586750337499376, iteration: 30422
loss: 1.0042543411254883,grad_norm: 0.9178245635530118, iteration: 30423
loss: 0.9777624011039734,grad_norm: 0.8734958957534787, iteration: 30424
loss: 1.0770262479782104,grad_norm: 0.9999995794532957, iteration: 30425
loss: 1.0249217748641968,grad_norm: 0.8693647956690765, iteration: 30426
loss: 0.9786203503608704,grad_norm: 0.972169152698771, iteration: 30427
loss: 0.9853468537330627,grad_norm: 0.9999990403807457, iteration: 30428
loss: 0.9860734343528748,grad_norm: 0.8647652697603977, iteration: 30429
loss: 0.9579513072967529,grad_norm: 0.9923712818795096, iteration: 30430
loss: 0.9535487294197083,grad_norm: 0.9999991751957266, iteration: 30431
loss: 1.0718159675598145,grad_norm: 0.9999992379932288, iteration: 30432
loss: 1.0291928052902222,grad_norm: 0.9769651732104097, iteration: 30433
loss: 1.0168951749801636,grad_norm: 0.999999264404678, iteration: 30434
loss: 1.015432596206665,grad_norm: 0.8676738887693417, iteration: 30435
loss: 0.989841103553772,grad_norm: 0.9739947004552986, iteration: 30436
loss: 1.0568537712097168,grad_norm: 0.9195181507070816, iteration: 30437
loss: 1.0728082656860352,grad_norm: 0.946425875144121, iteration: 30438
loss: 1.0099149942398071,grad_norm: 0.8789835754996789, iteration: 30439
loss: 1.0202573537826538,grad_norm: 0.9999996470966517, iteration: 30440
loss: 0.9802268147468567,grad_norm: 0.9999991253216467, iteration: 30441
loss: 1.0423163175582886,grad_norm: 0.887139278651169, iteration: 30442
loss: 1.0268964767456055,grad_norm: 0.9999992629691473, iteration: 30443
loss: 1.0120354890823364,grad_norm: 0.9999992206451296, iteration: 30444
loss: 1.0110821723937988,grad_norm: 0.8646200390687956, iteration: 30445
loss: 0.9769142270088196,grad_norm: 0.9999992729373539, iteration: 30446
loss: 0.9956969022750854,grad_norm: 0.8742101448379681, iteration: 30447
loss: 0.9887509346008301,grad_norm: 0.9999990598005474, iteration: 30448
loss: 0.9951410889625549,grad_norm: 0.790865857727157, iteration: 30449
loss: 0.9622399806976318,grad_norm: 0.9999993756876211, iteration: 30450
loss: 1.0059643983840942,grad_norm: 0.999999600183517, iteration: 30451
loss: 1.0159060955047607,grad_norm: 0.788028176754929, iteration: 30452
loss: 1.0612748861312866,grad_norm: 0.9999992538901379, iteration: 30453
loss: 1.0122926235198975,grad_norm: 0.8751942836996313, iteration: 30454
loss: 1.0009080171585083,grad_norm: 0.9999991364745371, iteration: 30455
loss: 1.006101369857788,grad_norm: 0.999998915460234, iteration: 30456
loss: 1.016489028930664,grad_norm: 0.8595147089699444, iteration: 30457
loss: 1.0457442998886108,grad_norm: 0.9999993190302824, iteration: 30458
loss: 1.0122042894363403,grad_norm: 0.8698623015717398, iteration: 30459
loss: 1.0283684730529785,grad_norm: 0.9999991912925438, iteration: 30460
loss: 1.0214461088180542,grad_norm: 0.8766330013036099, iteration: 30461
loss: 0.9974713921546936,grad_norm: 0.9186756495615224, iteration: 30462
loss: 0.9755020141601562,grad_norm: 0.9495069328096356, iteration: 30463
loss: 1.026427149772644,grad_norm: 0.9999991248771064, iteration: 30464
loss: 0.9960916042327881,grad_norm: 0.9910269748930657, iteration: 30465
loss: 1.0023865699768066,grad_norm: 0.9999996214047617, iteration: 30466
loss: 1.0167855024337769,grad_norm: 0.926891547601506, iteration: 30467
loss: 1.0419700145721436,grad_norm: 0.9999990382323324, iteration: 30468
loss: 0.9916919469833374,grad_norm: 0.9233917178857651, iteration: 30469
loss: 1.0106145143508911,grad_norm: 0.8977405497322577, iteration: 30470
loss: 0.984595537185669,grad_norm: 0.9999995004413512, iteration: 30471
loss: 1.0114223957061768,grad_norm: 0.8615468861066263, iteration: 30472
loss: 1.0412348508834839,grad_norm: 0.8450213281187166, iteration: 30473
loss: 1.0056408643722534,grad_norm: 0.999999093774304, iteration: 30474
loss: 1.0022259950637817,grad_norm: 0.8665219736264804, iteration: 30475
loss: 1.0353858470916748,grad_norm: 0.8331558109082983, iteration: 30476
loss: 0.9607350826263428,grad_norm: 0.9999993478312701, iteration: 30477
loss: 1.028867483139038,grad_norm: 0.9999991693201705, iteration: 30478
loss: 1.0239688158035278,grad_norm: 0.9455636693538156, iteration: 30479
loss: 1.0255413055419922,grad_norm: 0.9999993834983839, iteration: 30480
loss: 1.0496244430541992,grad_norm: 0.9999992221640077, iteration: 30481
loss: 1.020241379737854,grad_norm: 0.9914746831682797, iteration: 30482
loss: 0.9927158951759338,grad_norm: 0.9552403628532219, iteration: 30483
loss: 1.011042594909668,grad_norm: 0.9856400303464062, iteration: 30484
loss: 0.9667424559593201,grad_norm: 0.9999991719268924, iteration: 30485
loss: 0.9841600060462952,grad_norm: 0.999999045877594, iteration: 30486
loss: 1.0049318075180054,grad_norm: 0.9999995245140259, iteration: 30487
loss: 0.9831410646438599,grad_norm: 0.8470196405758956, iteration: 30488
loss: 1.0272564888000488,grad_norm: 0.8723260589691284, iteration: 30489
loss: 1.061192274093628,grad_norm: 0.9999990680847803, iteration: 30490
loss: 1.0173383951187134,grad_norm: 0.8348605797964155, iteration: 30491
loss: 1.008558750152588,grad_norm: 0.9562587055260314, iteration: 30492
loss: 1.0131404399871826,grad_norm: 0.8916679803172572, iteration: 30493
loss: 0.998907744884491,grad_norm: 0.9999994826499999, iteration: 30494
loss: 0.9976350665092468,grad_norm: 0.9999994289937596, iteration: 30495
loss: 1.0098810195922852,grad_norm: 0.848159890095538, iteration: 30496
loss: 1.025368571281433,grad_norm: 0.9999989481272594, iteration: 30497
loss: 1.0150976181030273,grad_norm: 0.999999547521531, iteration: 30498
loss: 1.0188379287719727,grad_norm: 0.9442172457033686, iteration: 30499
loss: 0.9951968789100647,grad_norm: 0.9304033200005093, iteration: 30500
loss: 0.9725093841552734,grad_norm: 0.9999990931578137, iteration: 30501
loss: 0.9881743788719177,grad_norm: 0.8674498543626303, iteration: 30502
loss: 1.0243583917617798,grad_norm: 0.9999992630791738, iteration: 30503
loss: 1.02335786819458,grad_norm: 0.9488501694120725, iteration: 30504
loss: 0.99005126953125,grad_norm: 0.8940408333599235, iteration: 30505
loss: 1.0440691709518433,grad_norm: 0.9999991659271767, iteration: 30506
loss: 1.098076581954956,grad_norm: 0.9999997356694577, iteration: 30507
loss: 1.003970742225647,grad_norm: 0.9916306427263112, iteration: 30508
loss: 1.0346864461898804,grad_norm: 0.9999997374427724, iteration: 30509
loss: 1.0048255920410156,grad_norm: 0.9715908104492028, iteration: 30510
loss: 1.0141892433166504,grad_norm: 0.9999990835603894, iteration: 30511
loss: 1.0426013469696045,grad_norm: 0.9999990364066008, iteration: 30512
loss: 0.996755838394165,grad_norm: 0.930220956445878, iteration: 30513
loss: 1.0381282567977905,grad_norm: 0.9999991656170205, iteration: 30514
loss: 1.0277122259140015,grad_norm: 0.9361851085405768, iteration: 30515
loss: 0.9740420579910278,grad_norm: 0.9634548289180305, iteration: 30516
loss: 0.9785104990005493,grad_norm: 0.9999990005252174, iteration: 30517
loss: 1.0174182653427124,grad_norm: 0.9419940964897036, iteration: 30518
loss: 1.0008916854858398,grad_norm: 0.9029417410409396, iteration: 30519
loss: 1.0131787061691284,grad_norm: 0.9999991838289456, iteration: 30520
loss: 1.0068427324295044,grad_norm: 0.8803443821376189, iteration: 30521
loss: 1.0469064712524414,grad_norm: 1.000000047415037, iteration: 30522
loss: 1.0084151029586792,grad_norm: 0.9999990186717592, iteration: 30523
loss: 0.992978036403656,grad_norm: 0.9405057301924697, iteration: 30524
loss: 1.034536600112915,grad_norm: 0.9661977743046647, iteration: 30525
loss: 0.9731461405754089,grad_norm: 0.9507079764232105, iteration: 30526
loss: 0.9539827704429626,grad_norm: 0.9999992840883156, iteration: 30527
loss: 1.028969168663025,grad_norm: 0.988238730828214, iteration: 30528
loss: 1.0354630947113037,grad_norm: 0.9106620006848197, iteration: 30529
loss: 1.0480507612228394,grad_norm: 0.9719682552528911, iteration: 30530
loss: 1.0032474994659424,grad_norm: 0.7920854908707573, iteration: 30531
loss: 1.0060564279556274,grad_norm: 0.9999989530390482, iteration: 30532
loss: 1.0274245738983154,grad_norm: 0.9999993676816998, iteration: 30533
loss: 1.0336692333221436,grad_norm: 0.9669971538624235, iteration: 30534
loss: 1.0141404867172241,grad_norm: 0.8911634861250798, iteration: 30535
loss: 1.0049470663070679,grad_norm: 0.9606351691934799, iteration: 30536
loss: 0.9993933439254761,grad_norm: 0.988081014201206, iteration: 30537
loss: 1.047507643699646,grad_norm: 0.9999995452797106, iteration: 30538
loss: 0.9615693092346191,grad_norm: 0.8504216757078118, iteration: 30539
loss: 1.0221221446990967,grad_norm: 0.9999990545701044, iteration: 30540
loss: 1.025909423828125,grad_norm: 0.7944573079352953, iteration: 30541
loss: 1.0435062646865845,grad_norm: 0.9999993085778394, iteration: 30542
loss: 1.0434694290161133,grad_norm: 0.9999993775632982, iteration: 30543
loss: 1.0369768142700195,grad_norm: 0.9999990389264201, iteration: 30544
loss: 1.0007970333099365,grad_norm: 0.9999994274570566, iteration: 30545
loss: 0.9798206090927124,grad_norm: 0.9999992062116891, iteration: 30546
loss: 1.0254182815551758,grad_norm: 0.9999993010706639, iteration: 30547
loss: 0.9943815469741821,grad_norm: 0.9999990776539922, iteration: 30548
loss: 1.0598747730255127,grad_norm: 0.9999995630310973, iteration: 30549
loss: 0.991481363773346,grad_norm: 0.9999990025988538, iteration: 30550
loss: 0.9901949763298035,grad_norm: 0.8110494030118385, iteration: 30551
loss: 1.0282087326049805,grad_norm: 0.999998951157656, iteration: 30552
loss: 1.0486063957214355,grad_norm: 0.9999992731308038, iteration: 30553
loss: 1.0572030544281006,grad_norm: 0.9999992500378577, iteration: 30554
loss: 0.9904237985610962,grad_norm: 0.8457579643340118, iteration: 30555
loss: 0.9845702648162842,grad_norm: 0.9999991288679924, iteration: 30556
loss: 0.9679644703865051,grad_norm: 0.9882140608709055, iteration: 30557
loss: 1.0248210430145264,grad_norm: 0.9888625110174157, iteration: 30558
loss: 0.9996970295906067,grad_norm: 0.8559413299087492, iteration: 30559
loss: 1.0247308015823364,grad_norm: 0.9999992075904601, iteration: 30560
loss: 1.001692533493042,grad_norm: 0.9999991949692725, iteration: 30561
loss: 1.0188907384872437,grad_norm: 0.9999998824028676, iteration: 30562
loss: 1.017374038696289,grad_norm: 0.9999997162565599, iteration: 30563
loss: 1.0483561754226685,grad_norm: 0.9999996316840357, iteration: 30564
loss: 0.9850929379463196,grad_norm: 0.999999306797408, iteration: 30565
loss: 0.9866969585418701,grad_norm: 0.9255905541330949, iteration: 30566
loss: 1.0886090993881226,grad_norm: 0.9999993521361307, iteration: 30567
loss: 0.9866037368774414,grad_norm: 0.9884405771236663, iteration: 30568
loss: 1.0608714818954468,grad_norm: 0.9999990815353551, iteration: 30569
loss: 0.9778440594673157,grad_norm: 0.9999991490248797, iteration: 30570
loss: 0.9975996613502502,grad_norm: 0.8714108433615597, iteration: 30571
loss: 1.1688597202301025,grad_norm: 0.9999995508868765, iteration: 30572
loss: 0.9801730513572693,grad_norm: 0.9999992722656429, iteration: 30573
loss: 0.9975665807723999,grad_norm: 0.9999990887371081, iteration: 30574
loss: 0.947602391242981,grad_norm: 0.8996832010608152, iteration: 30575
loss: 1.0115910768508911,grad_norm: 0.9631709252711697, iteration: 30576
loss: 1.0226646661758423,grad_norm: 0.999999094653601, iteration: 30577
loss: 0.9965025186538696,grad_norm: 0.8112858978876324, iteration: 30578
loss: 1.0016684532165527,grad_norm: 0.9999993381279433, iteration: 30579
loss: 1.0001111030578613,grad_norm: 0.999999123492181, iteration: 30580
loss: 1.0374627113342285,grad_norm: 0.9999991900193455, iteration: 30581
loss: 0.9903624653816223,grad_norm: 0.9999991564797498, iteration: 30582
loss: 1.0462205410003662,grad_norm: 0.9999993799696829, iteration: 30583
loss: 1.0078552961349487,grad_norm: 0.8582351404388388, iteration: 30584
loss: 1.019217610359192,grad_norm: 0.999999692335804, iteration: 30585
loss: 0.9935743808746338,grad_norm: 0.9999990591723742, iteration: 30586
loss: 1.0239648818969727,grad_norm: 0.8191092080922384, iteration: 30587
loss: 1.0181522369384766,grad_norm: 0.9947094535628317, iteration: 30588
loss: 0.9921064972877502,grad_norm: 0.9868908866308946, iteration: 30589
loss: 1.0024570226669312,grad_norm: 0.9999991310105204, iteration: 30590
loss: 1.0179177522659302,grad_norm: 0.991977270451376, iteration: 30591
loss: 1.0327810049057007,grad_norm: 0.9999991967649648, iteration: 30592
loss: 0.9793571829795837,grad_norm: 0.857237224262681, iteration: 30593
loss: 1.0090678930282593,grad_norm: 0.9138233970116982, iteration: 30594
loss: 0.9907430410385132,grad_norm: 0.9739645238802904, iteration: 30595
loss: 1.030510663986206,grad_norm: 0.9999993883202595, iteration: 30596
loss: 1.012730360031128,grad_norm: 0.9999991055080468, iteration: 30597
loss: 0.9990361928939819,grad_norm: 0.7758786374744904, iteration: 30598
loss: 1.0116474628448486,grad_norm: 0.9999991002410588, iteration: 30599
loss: 0.9944228529930115,grad_norm: 0.9999989512723914, iteration: 30600
loss: 1.062754511833191,grad_norm: 0.9999991020187627, iteration: 30601
loss: 1.0077779293060303,grad_norm: 0.99999939270591, iteration: 30602
loss: 1.0245048999786377,grad_norm: 0.9999991872194812, iteration: 30603
loss: 0.9844098687171936,grad_norm: 0.9999991809783071, iteration: 30604
loss: 1.012582778930664,grad_norm: 0.9999991192985989, iteration: 30605
loss: 1.005150318145752,grad_norm: 0.9207483679702562, iteration: 30606
loss: 0.9985365867614746,grad_norm: 0.9999991251271425, iteration: 30607
loss: 0.9791063666343689,grad_norm: 0.9917597011817952, iteration: 30608
loss: 1.0084596872329712,grad_norm: 0.9999992119091112, iteration: 30609
loss: 0.9991922378540039,grad_norm: 0.7772258565489212, iteration: 30610
loss: 0.9833504557609558,grad_norm: 0.7768122070026204, iteration: 30611
loss: 0.9863511323928833,grad_norm: 0.9929051827935264, iteration: 30612
loss: 0.9615291357040405,grad_norm: 0.9999990196385624, iteration: 30613
loss: 1.0188713073730469,grad_norm: 0.9999991722150607, iteration: 30614
loss: 1.0119661092758179,grad_norm: 0.9795572278905789, iteration: 30615
loss: 1.0441468954086304,grad_norm: 0.9999991443830725, iteration: 30616
loss: 0.9902245402336121,grad_norm: 0.8731253108016297, iteration: 30617
loss: 0.980617880821228,grad_norm: 0.9443880408421416, iteration: 30618
loss: 1.0116279125213623,grad_norm: 0.9999989691655783, iteration: 30619
loss: 0.9717870950698853,grad_norm: 0.9999991010635284, iteration: 30620
loss: 1.0050137042999268,grad_norm: 0.999999110283984, iteration: 30621
loss: 0.9959757328033447,grad_norm: 0.999999044276191, iteration: 30622
loss: 0.9972935318946838,grad_norm: 0.790518049201919, iteration: 30623
loss: 1.0130343437194824,grad_norm: 0.9999991653230167, iteration: 30624
loss: 0.9784591197967529,grad_norm: 0.9999991794807835, iteration: 30625
loss: 1.0182996988296509,grad_norm: 0.9952585327419414, iteration: 30626
loss: 1.028695821762085,grad_norm: 0.9999998066935986, iteration: 30627
loss: 1.0160666704177856,grad_norm: 0.9776465609459818, iteration: 30628
loss: 0.9613136053085327,grad_norm: 0.9999990802070866, iteration: 30629
loss: 0.994816780090332,grad_norm: 0.8887869373153091, iteration: 30630
loss: 0.9878926277160645,grad_norm: 0.9438826408074915, iteration: 30631
loss: 0.985748291015625,grad_norm: 0.8917452906355601, iteration: 30632
loss: 1.025748372077942,grad_norm: 0.9999992010871478, iteration: 30633
loss: 0.9812741279602051,grad_norm: 0.9999992941345061, iteration: 30634
loss: 1.072601556777954,grad_norm: 0.9999996234572609, iteration: 30635
loss: 1.0234651565551758,grad_norm: 0.9999991332177496, iteration: 30636
loss: 1.0156649351119995,grad_norm: 0.9999991371476874, iteration: 30637
loss: 1.0079973936080933,grad_norm: 0.9020355490015067, iteration: 30638
loss: 1.0241492986679077,grad_norm: 0.9999992124333369, iteration: 30639
loss: 1.0350884199142456,grad_norm: 0.9999993385182602, iteration: 30640
loss: 1.0102607011795044,grad_norm: 0.839028662860668, iteration: 30641
loss: 1.013334035873413,grad_norm: 0.9999989374650393, iteration: 30642
loss: 1.0013482570648193,grad_norm: 0.947004119654385, iteration: 30643
loss: 1.0376720428466797,grad_norm: 0.9999993319440343, iteration: 30644
loss: 1.0457032918930054,grad_norm: 0.9532749079638045, iteration: 30645
loss: 1.0094271898269653,grad_norm: 0.9999992501166889, iteration: 30646
loss: 0.9948027729988098,grad_norm: 0.90990764990083, iteration: 30647
loss: 0.9961695075035095,grad_norm: 0.9274252167050052, iteration: 30648
loss: 1.0215920209884644,grad_norm: 0.9549208713169018, iteration: 30649
loss: 0.9832361340522766,grad_norm: 0.9863421637775911, iteration: 30650
loss: 1.014492392539978,grad_norm: 0.9999990050147093, iteration: 30651
loss: 0.9917808175086975,grad_norm: 0.9999990502270376, iteration: 30652
loss: 0.9996194243431091,grad_norm: 0.9999991236216504, iteration: 30653
loss: 1.0628408193588257,grad_norm: 0.999999821494438, iteration: 30654
loss: 1.0097346305847168,grad_norm: 0.9999991497781525, iteration: 30655
loss: 1.0315927267074585,grad_norm: 0.8982592383871079, iteration: 30656
loss: 1.0076600313186646,grad_norm: 0.9824258002363203, iteration: 30657
loss: 0.9774487614631653,grad_norm: 0.9883813789216425, iteration: 30658
loss: 1.0696138143539429,grad_norm: 0.9999998165154895, iteration: 30659
loss: 0.9982265830039978,grad_norm: 0.9999990863785945, iteration: 30660
loss: 1.0241984128952026,grad_norm: 0.9999993349573946, iteration: 30661
loss: 1.0463019609451294,grad_norm: 0.9999992889043272, iteration: 30662
loss: 1.0170707702636719,grad_norm: 0.8219921327077561, iteration: 30663
loss: 1.0681397914886475,grad_norm: 0.9999991018209865, iteration: 30664
loss: 0.9805538058280945,grad_norm: 0.910204495122155, iteration: 30665
loss: 1.0378223657608032,grad_norm: 0.8046518831836257, iteration: 30666
loss: 1.0017160177230835,grad_norm: 0.9999991030127754, iteration: 30667
loss: 1.0313258171081543,grad_norm: 0.9999990460832199, iteration: 30668
loss: 1.0092384815216064,grad_norm: 0.8660766480701453, iteration: 30669
loss: 1.0227587223052979,grad_norm: 0.8840935426212695, iteration: 30670
loss: 0.9821173548698425,grad_norm: 0.9999998395879375, iteration: 30671
loss: 1.0136356353759766,grad_norm: 0.9999991151096593, iteration: 30672
loss: 0.9892181158065796,grad_norm: 0.9286404730829997, iteration: 30673
loss: 0.9911476969718933,grad_norm: 0.8987366333729315, iteration: 30674
loss: 1.0340142250061035,grad_norm: 0.9999991246580117, iteration: 30675
loss: 0.9770830869674683,grad_norm: 0.9999991368794758, iteration: 30676
loss: 0.9767851829528809,grad_norm: 0.9999989759575285, iteration: 30677
loss: 0.9960898160934448,grad_norm: 0.8254785307717319, iteration: 30678
loss: 0.9818713068962097,grad_norm: 0.9020513912044928, iteration: 30679
loss: 1.0252400636672974,grad_norm: 0.9777249740166644, iteration: 30680
loss: 1.0735387802124023,grad_norm: 0.9999994390398772, iteration: 30681
loss: 1.0462101697921753,grad_norm: 0.885545616861311, iteration: 30682
loss: 0.9888991117477417,grad_norm: 0.9999997707202828, iteration: 30683
loss: 1.0056368112564087,grad_norm: 0.7513705869605471, iteration: 30684
loss: 1.0515092611312866,grad_norm: 0.9999993314576896, iteration: 30685
loss: 1.0408005714416504,grad_norm: 0.9999992079539368, iteration: 30686
loss: 0.9632058143615723,grad_norm: 0.9999994400676537, iteration: 30687
loss: 1.0743248462677002,grad_norm: 0.9999991112687127, iteration: 30688
loss: 1.0257021188735962,grad_norm: 0.9496777320052096, iteration: 30689
loss: 1.0240821838378906,grad_norm: 0.9999992410022003, iteration: 30690
loss: 1.0551586151123047,grad_norm: 0.9999991304835065, iteration: 30691
loss: 1.0642930269241333,grad_norm: 0.9667485649917997, iteration: 30692
loss: 1.024846076965332,grad_norm: 0.9999991024370166, iteration: 30693
loss: 1.0667154788970947,grad_norm: 0.9999989566571414, iteration: 30694
loss: 0.99141526222229,grad_norm: 0.9999997069951405, iteration: 30695
loss: 0.9991781711578369,grad_norm: 0.9931423739969124, iteration: 30696
loss: 1.0087471008300781,grad_norm: 0.9999992269497134, iteration: 30697
loss: 0.9862716197967529,grad_norm: 0.9999998947730099, iteration: 30698
loss: 1.037089228630066,grad_norm: 0.9999990896425123, iteration: 30699
loss: 1.0124199390411377,grad_norm: 0.9999990375823137, iteration: 30700
loss: 0.9844367504119873,grad_norm: 0.9999993979055558, iteration: 30701
loss: 1.0095477104187012,grad_norm: 0.9554289723643721, iteration: 30702
loss: 1.0305593013763428,grad_norm: 0.9665683779192803, iteration: 30703
loss: 0.9806174635887146,grad_norm: 0.8945443568865343, iteration: 30704
loss: 1.0102015733718872,grad_norm: 0.9494532877463494, iteration: 30705
loss: 1.0188417434692383,grad_norm: 0.9201059959023976, iteration: 30706
loss: 1.0279932022094727,grad_norm: 0.9471761187635207, iteration: 30707
loss: 1.0293461084365845,grad_norm: 0.9999992883019224, iteration: 30708
loss: 1.0077781677246094,grad_norm: 0.8099941534079736, iteration: 30709
loss: 0.96346515417099,grad_norm: 0.8675428471078759, iteration: 30710
loss: 1.0649386644363403,grad_norm: 0.9999996097021278, iteration: 30711
loss: 0.9758160710334778,grad_norm: 0.9444484832851059, iteration: 30712
loss: 1.090965747833252,grad_norm: 0.9999999889817532, iteration: 30713
loss: 0.9870445728302002,grad_norm: 0.8375987398705743, iteration: 30714
loss: 1.0136076211929321,grad_norm: 0.9999997393666952, iteration: 30715
loss: 1.0085194110870361,grad_norm: 0.9891685728548153, iteration: 30716
loss: 1.041829228401184,grad_norm: 0.999999222616672, iteration: 30717
loss: 0.9823244214057922,grad_norm: 0.8704268283332959, iteration: 30718
loss: 1.0203776359558105,grad_norm: 0.9999991494013479, iteration: 30719
loss: 1.0039077997207642,grad_norm: 0.9999991547637962, iteration: 30720
loss: 1.0146160125732422,grad_norm: 0.9863712700876895, iteration: 30721
loss: 1.0082117319107056,grad_norm: 0.8261595242249818, iteration: 30722
loss: 1.0094465017318726,grad_norm: 0.9999992934843833, iteration: 30723
loss: 1.0306614637374878,grad_norm: 0.9395766088522268, iteration: 30724
loss: 0.9939538836479187,grad_norm: 0.9999990311075055, iteration: 30725
loss: 1.0035783052444458,grad_norm: 0.9136730344009585, iteration: 30726
loss: 0.9959174990653992,grad_norm: 0.9528497805673275, iteration: 30727
loss: 0.9938424229621887,grad_norm: 0.8503241138744605, iteration: 30728
loss: 0.9799297451972961,grad_norm: 0.927596535586603, iteration: 30729
loss: 0.9779350161552429,grad_norm: 0.9999991456399064, iteration: 30730
loss: 0.9982529282569885,grad_norm: 0.8040647242601967, iteration: 30731
loss: 1.0501822233200073,grad_norm: 0.9999992456665674, iteration: 30732
loss: 1.0212451219558716,grad_norm: 0.9999990185545087, iteration: 30733
loss: 1.0204331874847412,grad_norm: 0.9999992098381114, iteration: 30734
loss: 1.0223135948181152,grad_norm: 0.9999992002796867, iteration: 30735
loss: 1.0139011144638062,grad_norm: 0.9543482586945241, iteration: 30736
loss: 1.017695426940918,grad_norm: 0.9999990558848493, iteration: 30737
loss: 1.0528301000595093,grad_norm: 0.9999998116034398, iteration: 30738
loss: 1.046280026435852,grad_norm: 0.9390179190208688, iteration: 30739
loss: 1.0023607015609741,grad_norm: 0.8346331573221377, iteration: 30740
loss: 1.040763258934021,grad_norm: 0.9102062681474667, iteration: 30741
loss: 0.993396520614624,grad_norm: 0.9999990708783203, iteration: 30742
loss: 1.0144327878952026,grad_norm: 0.9198696384422406, iteration: 30743
loss: 1.004299283027649,grad_norm: 0.999999540269492, iteration: 30744
loss: 1.0024514198303223,grad_norm: 0.9304771139711641, iteration: 30745
loss: 1.058533787727356,grad_norm: 0.9999995610676674, iteration: 30746
loss: 1.0007078647613525,grad_norm: 0.9375227751266519, iteration: 30747
loss: 1.0165178775787354,grad_norm: 0.9234121294371727, iteration: 30748
loss: 0.9962629675865173,grad_norm: 0.9950534982319795, iteration: 30749
loss: 1.004091739654541,grad_norm: 0.8955783549370168, iteration: 30750
loss: 0.9834495186805725,grad_norm: 0.8788984917353094, iteration: 30751
loss: 0.9823600649833679,grad_norm: 0.9999990902794762, iteration: 30752
loss: 1.0096614360809326,grad_norm: 0.9106604734135684, iteration: 30753
loss: 1.0169031620025635,grad_norm: 0.9999998814632268, iteration: 30754
loss: 1.0435802936553955,grad_norm: 0.9999997151794371, iteration: 30755
loss: 1.0466450452804565,grad_norm: 0.9999991104724248, iteration: 30756
loss: 1.026450753211975,grad_norm: 0.9999990965091701, iteration: 30757
loss: 1.0037891864776611,grad_norm: 0.9999989601849667, iteration: 30758
loss: 0.9983052015304565,grad_norm: 0.9999990420674526, iteration: 30759
loss: 0.9993302226066589,grad_norm: 0.888720992977947, iteration: 30760
loss: 1.019561529159546,grad_norm: 0.9999991066841498, iteration: 30761
loss: 1.001934289932251,grad_norm: 0.9999991340515254, iteration: 30762
loss: 1.032344102859497,grad_norm: 0.9999997943629094, iteration: 30763
loss: 0.98259037733078,grad_norm: 0.9999990876994852, iteration: 30764
loss: 1.0294585227966309,grad_norm: 0.9384431870959119, iteration: 30765
loss: 1.0354466438293457,grad_norm: 0.9999997799647703, iteration: 30766
loss: 0.9915612936019897,grad_norm: 0.9999991813571877, iteration: 30767
loss: 0.9998149871826172,grad_norm: 0.941694478872755, iteration: 30768
loss: 1.012614130973816,grad_norm: 0.9999990511710328, iteration: 30769
loss: 1.0182737112045288,grad_norm: 0.9999995150360901, iteration: 30770
loss: 0.9971221685409546,grad_norm: 0.9999996313950862, iteration: 30771
loss: 0.9788185358047485,grad_norm: 0.999999185085365, iteration: 30772
loss: 1.031699776649475,grad_norm: 0.9999990675229026, iteration: 30773
loss: 1.0118939876556396,grad_norm: 0.9999990258806598, iteration: 30774
loss: 0.9933047890663147,grad_norm: 0.9999992152557192, iteration: 30775
loss: 0.9704954624176025,grad_norm: 0.8141527678433358, iteration: 30776
loss: 1.0103858709335327,grad_norm: 0.9999992578189786, iteration: 30777
loss: 1.0012257099151611,grad_norm: 0.9999990910426985, iteration: 30778
loss: 1.0104187726974487,grad_norm: 0.9999993544666352, iteration: 30779
loss: 0.9532710313796997,grad_norm: 0.9999990548661626, iteration: 30780
loss: 1.022823452949524,grad_norm: 0.9999991483469207, iteration: 30781
loss: 1.0202914476394653,grad_norm: 0.9999991449817571, iteration: 30782
loss: 0.9843382239341736,grad_norm: 0.9999990946895054, iteration: 30783
loss: 1.0361382961273193,grad_norm: 0.9999989533882205, iteration: 30784
loss: 1.0295097827911377,grad_norm: 0.9579588234218249, iteration: 30785
loss: 1.021671175956726,grad_norm: 0.9999991232090719, iteration: 30786
loss: 0.9761685132980347,grad_norm: 0.9663618742355595, iteration: 30787
loss: 1.0028234720230103,grad_norm: 0.9035576368126209, iteration: 30788
loss: 1.0566226243972778,grad_norm: 0.9999995738248685, iteration: 30789
loss: 0.9801240563392639,grad_norm: 0.9999993584736092, iteration: 30790
loss: 0.9880663752555847,grad_norm: 0.9221015504554759, iteration: 30791
loss: 1.0510985851287842,grad_norm: 0.999999931120359, iteration: 30792
loss: 1.0161737203598022,grad_norm: 0.9578170842037339, iteration: 30793
loss: 1.0268172025680542,grad_norm: 0.9999994302359675, iteration: 30794
loss: 1.018470287322998,grad_norm: 0.999999412134328, iteration: 30795
loss: 0.9633358716964722,grad_norm: 0.9999990448653967, iteration: 30796
loss: 1.0743566751480103,grad_norm: 0.9999994377031047, iteration: 30797
loss: 1.0925873517990112,grad_norm: 0.9999996088406774, iteration: 30798
loss: 1.0157006978988647,grad_norm: 0.9999989978149746, iteration: 30799
loss: 1.0144091844558716,grad_norm: 0.9027769467259437, iteration: 30800
loss: 1.0141773223876953,grad_norm: 0.8315451332674989, iteration: 30801
loss: 1.0207394361495972,grad_norm: 0.9809952561966974, iteration: 30802
loss: 0.9869771599769592,grad_norm: 0.938040273264503, iteration: 30803
loss: 1.1171859502792358,grad_norm: 0.9999989978626608, iteration: 30804
loss: 1.0205483436584473,grad_norm: 0.9999992467682666, iteration: 30805
loss: 0.9771749973297119,grad_norm: 0.9999991582501799, iteration: 30806
loss: 0.9918974041938782,grad_norm: 0.9999998658208265, iteration: 30807
loss: 1.009331226348877,grad_norm: 0.9999994678793224, iteration: 30808
loss: 1.018161654472351,grad_norm: 0.8802265868669817, iteration: 30809
loss: 1.0126463174819946,grad_norm: 0.9999990524873806, iteration: 30810
loss: 0.9996749758720398,grad_norm: 0.9999990185027298, iteration: 30811
loss: 1.1261972188949585,grad_norm: 0.9999996727298062, iteration: 30812
loss: 1.0369205474853516,grad_norm: 0.9999992537300642, iteration: 30813
loss: 1.0206996202468872,grad_norm: 0.9999990456683118, iteration: 30814
loss: 0.9891437888145447,grad_norm: 0.9999991038573396, iteration: 30815
loss: 0.994870662689209,grad_norm: 0.9112673700295689, iteration: 30816
loss: 1.0203423500061035,grad_norm: 0.9540534931893428, iteration: 30817
loss: 0.9969262480735779,grad_norm: 0.8845607135294683, iteration: 30818
loss: 1.026516318321228,grad_norm: 0.9999992744893615, iteration: 30819
loss: 1.017391562461853,grad_norm: 0.9999990745181806, iteration: 30820
loss: 1.0087478160858154,grad_norm: 0.9999991490951418, iteration: 30821
loss: 1.0302170515060425,grad_norm: 0.9999990691797405, iteration: 30822
loss: 1.0008009672164917,grad_norm: 0.9999991054309242, iteration: 30823
loss: 1.0160294771194458,grad_norm: 0.7972982367372662, iteration: 30824
loss: 0.9997692108154297,grad_norm: 0.9999990454846498, iteration: 30825
loss: 1.0178053379058838,grad_norm: 0.9281432881067869, iteration: 30826
loss: 1.0019700527191162,grad_norm: 0.7816234099962852, iteration: 30827
loss: 1.0143687725067139,grad_norm: 0.9027306560567332, iteration: 30828
loss: 1.0011018514633179,grad_norm: 0.9999992115424176, iteration: 30829
loss: 1.0508852005004883,grad_norm: 0.9210039099814904, iteration: 30830
loss: 0.9696815013885498,grad_norm: 0.9999996238484689, iteration: 30831
loss: 1.0010923147201538,grad_norm: 0.9999992620637551, iteration: 30832
loss: 0.9894161820411682,grad_norm: 0.9461333129498584, iteration: 30833
loss: 0.96424800157547,grad_norm: 0.9999992111561044, iteration: 30834
loss: 1.0494985580444336,grad_norm: 0.9852090190518663, iteration: 30835
loss: 1.0355684757232666,grad_norm: 0.9999991303203032, iteration: 30836
loss: 1.149941325187683,grad_norm: 0.9999996817687039, iteration: 30837
loss: 1.047806739807129,grad_norm: 0.9999994728878194, iteration: 30838
loss: 1.0312647819519043,grad_norm: 0.999999206144639, iteration: 30839
loss: 1.0402292013168335,grad_norm: 0.9999991744395909, iteration: 30840
loss: 0.9947507381439209,grad_norm: 0.9999995677554119, iteration: 30841
loss: 1.0277128219604492,grad_norm: 0.8715672961440137, iteration: 30842
loss: 1.0014535188674927,grad_norm: 0.9999992072803349, iteration: 30843
loss: 1.005036473274231,grad_norm: 0.9999991458727459, iteration: 30844
loss: 0.9825429320335388,grad_norm: 0.9999990568949418, iteration: 30845
loss: 1.0227090120315552,grad_norm: 0.9468552131630363, iteration: 30846
loss: 1.0084187984466553,grad_norm: 0.9999993673412154, iteration: 30847
loss: 1.0099787712097168,grad_norm: 0.7874524175527501, iteration: 30848
loss: 0.9575081467628479,grad_norm: 0.9999992809545729, iteration: 30849
loss: 0.9546952247619629,grad_norm: 0.9999991761549079, iteration: 30850
loss: 0.9997097253799438,grad_norm: 0.9999994024475385, iteration: 30851
loss: 1.0980911254882812,grad_norm: 0.9999992591611665, iteration: 30852
loss: 1.0345958471298218,grad_norm: 0.9999992445464359, iteration: 30853
loss: 0.9990742206573486,grad_norm: 0.9999992474700262, iteration: 30854
loss: 1.0640405416488647,grad_norm: 0.9999993442152199, iteration: 30855
loss: 1.0130202770233154,grad_norm: 0.9999994827939455, iteration: 30856
loss: 0.9648236036300659,grad_norm: 0.9999991169790691, iteration: 30857
loss: 1.0183035135269165,grad_norm: 0.8892598411606211, iteration: 30858
loss: 1.0139288902282715,grad_norm: 0.9999991388924193, iteration: 30859
loss: 0.9725697040557861,grad_norm: 0.9641777303291431, iteration: 30860
loss: 1.0008912086486816,grad_norm: 0.9999989423072831, iteration: 30861
loss: 1.0520035028457642,grad_norm: 0.9999990875913577, iteration: 30862
loss: 1.0216429233551025,grad_norm: 0.7957998170116639, iteration: 30863
loss: 1.0068614482879639,grad_norm: 0.999999058444433, iteration: 30864
loss: 1.0155808925628662,grad_norm: 0.9999990876474352, iteration: 30865
loss: 1.004976749420166,grad_norm: 0.9170948027843634, iteration: 30866
loss: 1.050248622894287,grad_norm: 0.9401430459313386, iteration: 30867
loss: 1.0248695611953735,grad_norm: 0.9999991961990973, iteration: 30868
loss: 0.9922270178794861,grad_norm: 0.9842237209388005, iteration: 30869
loss: 1.0239156484603882,grad_norm: 0.9999990582671983, iteration: 30870
loss: 1.011432409286499,grad_norm: 0.9999992030279075, iteration: 30871
loss: 1.0188987255096436,grad_norm: 0.9999992726975807, iteration: 30872
loss: 0.95639568567276,grad_norm: 0.9326448991460401, iteration: 30873
loss: 1.009394645690918,grad_norm: 0.9999993015616246, iteration: 30874
loss: 1.0146610736846924,grad_norm: 0.8341653593415929, iteration: 30875
loss: 0.9600584506988525,grad_norm: 0.9999990631925688, iteration: 30876
loss: 1.0084810256958008,grad_norm: 0.9829763261213423, iteration: 30877
loss: 0.9877443313598633,grad_norm: 0.9999992168750892, iteration: 30878
loss: 1.0034674406051636,grad_norm: 0.9999989461135596, iteration: 30879
loss: 1.0415711402893066,grad_norm: 0.9999993994844281, iteration: 30880
loss: 1.0094432830810547,grad_norm: 0.9999992367840833, iteration: 30881
loss: 1.0042306184768677,grad_norm: 0.8408082048635607, iteration: 30882
loss: 0.9869743585586548,grad_norm: 0.9999991405482929, iteration: 30883
loss: 1.0100266933441162,grad_norm: 0.9451199718558485, iteration: 30884
loss: 1.0093894004821777,grad_norm: 0.9999991815367916, iteration: 30885
loss: 1.00101637840271,grad_norm: 0.8963734053399226, iteration: 30886
loss: 1.0148351192474365,grad_norm: 0.911461099522859, iteration: 30887
loss: 1.0157229900360107,grad_norm: 0.9233393807658468, iteration: 30888
loss: 1.0481733083724976,grad_norm: 0.9999992707663977, iteration: 30889
loss: 0.9789490103721619,grad_norm: 0.9365646518106197, iteration: 30890
loss: 1.0070507526397705,grad_norm: 0.7754392435358114, iteration: 30891
loss: 1.0186982154846191,grad_norm: 0.9999993444511122, iteration: 30892
loss: 0.9760075211524963,grad_norm: 0.9108224488889998, iteration: 30893
loss: 0.9774461984634399,grad_norm: 0.9999990331795897, iteration: 30894
loss: 0.9947749972343445,grad_norm: 0.9959083416589536, iteration: 30895
loss: 0.975562334060669,grad_norm: 0.9730086702829746, iteration: 30896
loss: 1.0332449674606323,grad_norm: 0.8473804836085926, iteration: 30897
loss: 1.0169813632965088,grad_norm: 0.9999992391321161, iteration: 30898
loss: 1.004124641418457,grad_norm: 0.9999994192062139, iteration: 30899
loss: 1.0072697401046753,grad_norm: 0.999999731405093, iteration: 30900
loss: 1.0487940311431885,grad_norm: 0.9999996637673323, iteration: 30901
loss: 0.9973320960998535,grad_norm: 0.9999992436099067, iteration: 30902
loss: 1.0038970708847046,grad_norm: 0.9776393294925917, iteration: 30903
loss: 1.0272856950759888,grad_norm: 0.9999990061374269, iteration: 30904
loss: 1.0029829740524292,grad_norm: 0.9794342945683198, iteration: 30905
loss: 1.0106079578399658,grad_norm: 0.9999991848695846, iteration: 30906
loss: 0.9740915894508362,grad_norm: 0.9999992095800915, iteration: 30907
loss: 0.9926534295082092,grad_norm: 0.9999993310741297, iteration: 30908
loss: 1.0064657926559448,grad_norm: 0.9216802478977885, iteration: 30909
loss: 1.0276819467544556,grad_norm: 0.9999996256086167, iteration: 30910
loss: 0.9883881211280823,grad_norm: 0.8199315166219427, iteration: 30911
loss: 1.0476337671279907,grad_norm: 0.9999992886501413, iteration: 30912
loss: 1.0204873085021973,grad_norm: 0.9999990216452086, iteration: 30913
loss: 0.9773229956626892,grad_norm: 0.9910698028292341, iteration: 30914
loss: 1.0619226694107056,grad_norm: 0.999999537194232, iteration: 30915
loss: 1.0048844814300537,grad_norm: 0.890521007103119, iteration: 30916
loss: 1.0227717161178589,grad_norm: 0.9999992171129504, iteration: 30917
loss: 1.0019598007202148,grad_norm: 0.9732113698643524, iteration: 30918
loss: 1.0176302194595337,grad_norm: 0.9087546121356372, iteration: 30919
loss: 0.9788320660591125,grad_norm: 0.9672401938699593, iteration: 30920
loss: 0.9950584769248962,grad_norm: 0.9999991654329404, iteration: 30921
loss: 1.0353127717971802,grad_norm: 0.9999989942802681, iteration: 30922
loss: 1.0328692197799683,grad_norm: 0.9038444527256103, iteration: 30923
loss: 0.9975282549858093,grad_norm: 0.9999990799561582, iteration: 30924
loss: 1.0180068016052246,grad_norm: 0.9999994417078091, iteration: 30925
loss: 1.0337352752685547,grad_norm: 0.929372223405007, iteration: 30926
loss: 1.0058047771453857,grad_norm: 0.8707720911030696, iteration: 30927
loss: 1.0253366231918335,grad_norm: 0.9999992560277327, iteration: 30928
loss: 0.9962107539176941,grad_norm: 0.9999992463954092, iteration: 30929
loss: 1.012608528137207,grad_norm: 0.8643645448676809, iteration: 30930
loss: 1.0220997333526611,grad_norm: 0.9414219603829544, iteration: 30931
loss: 1.108381748199463,grad_norm: 0.9999998417433823, iteration: 30932
loss: 0.9796149134635925,grad_norm: 0.9999990932830488, iteration: 30933
loss: 1.0165684223175049,grad_norm: 0.9999996325392284, iteration: 30934
loss: 0.9930947422981262,grad_norm: 0.8813486206953615, iteration: 30935
loss: 1.0037555694580078,grad_norm: 0.8983282926897269, iteration: 30936
loss: 1.0292538404464722,grad_norm: 0.9862432932036611, iteration: 30937
loss: 0.9934144616127014,grad_norm: 0.9999990686062589, iteration: 30938
loss: 1.033250093460083,grad_norm: 0.8929029594407641, iteration: 30939
loss: 1.0308500528335571,grad_norm: 0.9907531939093038, iteration: 30940
loss: 1.0178810358047485,grad_norm: 0.949058791954562, iteration: 30941
loss: 1.0026812553405762,grad_norm: 0.9999991299389974, iteration: 30942
loss: 1.0300060510635376,grad_norm: 0.9999994217586735, iteration: 30943
loss: 0.9965810775756836,grad_norm: 0.9999995458455448, iteration: 30944
loss: 1.014322280883789,grad_norm: 0.9507916786054005, iteration: 30945
loss: 1.002766489982605,grad_norm: 0.9999993615741068, iteration: 30946
loss: 0.9794560074806213,grad_norm: 0.9999991851590642, iteration: 30947
loss: 1.0145518779754639,grad_norm: 0.927825361451724, iteration: 30948
loss: 1.0049337148666382,grad_norm: 0.9756120706245021, iteration: 30949
loss: 1.0342419147491455,grad_norm: 0.9999991287334508, iteration: 30950
loss: 1.0048757791519165,grad_norm: 0.8657264669689544, iteration: 30951
loss: 1.019424319267273,grad_norm: 0.9999994487083171, iteration: 30952
loss: 0.9763004183769226,grad_norm: 0.9999990423759993, iteration: 30953
loss: 0.9905182123184204,grad_norm: 0.9999992111971443, iteration: 30954
loss: 1.044173002243042,grad_norm: 0.9999996108400565, iteration: 30955
loss: 1.0150870084762573,grad_norm: 0.9999991195759762, iteration: 30956
loss: 1.0274640321731567,grad_norm: 0.9186765665889662, iteration: 30957
loss: 1.0235017538070679,grad_norm: 0.999999194103315, iteration: 30958
loss: 1.0172938108444214,grad_norm: 0.8290984264337703, iteration: 30959
loss: 1.0390247106552124,grad_norm: 0.9999991536548557, iteration: 30960
loss: 1.0275425910949707,grad_norm: 0.9999992788081878, iteration: 30961
loss: 0.9971725940704346,grad_norm: 0.9999989625718405, iteration: 30962
loss: 0.9906250834465027,grad_norm: 0.8777455919221124, iteration: 30963
loss: 1.003373622894287,grad_norm: 0.9738377761406777, iteration: 30964
loss: 1.0123497247695923,grad_norm: 0.8877990848414973, iteration: 30965
loss: 1.0142112970352173,grad_norm: 0.9728000805338503, iteration: 30966
loss: 1.0412970781326294,grad_norm: 0.9846185821485122, iteration: 30967
loss: 0.9741986393928528,grad_norm: 0.9999991554069806, iteration: 30968
loss: 1.0062884092330933,grad_norm: 0.9568633119965128, iteration: 30969
loss: 1.0018573999404907,grad_norm: 0.9999991478131215, iteration: 30970
loss: 0.9833793044090271,grad_norm: 0.926336365761991, iteration: 30971
loss: 0.9898725152015686,grad_norm: 0.8978971080532794, iteration: 30972
loss: 1.0620747804641724,grad_norm: 0.9999991857778892, iteration: 30973
loss: 1.1058762073516846,grad_norm: 0.9999994194985122, iteration: 30974
loss: 1.0297592878341675,grad_norm: 0.9206620811621663, iteration: 30975
loss: 0.9931471943855286,grad_norm: 0.8773714414411352, iteration: 30976
loss: 0.989859402179718,grad_norm: 0.9477692346554993, iteration: 30977
loss: 1.0283221006393433,grad_norm: 0.8379292173608192, iteration: 30978
loss: 0.9776018857955933,grad_norm: 0.9999990349357295, iteration: 30979
loss: 1.0292576551437378,grad_norm: 0.9999990210035056, iteration: 30980
loss: 0.9879457950592041,grad_norm: 0.9085481870487702, iteration: 30981
loss: 1.0831061601638794,grad_norm: 0.9999991092791204, iteration: 30982
loss: 1.0466501712799072,grad_norm: 0.9999995517597485, iteration: 30983
loss: 1.0616689920425415,grad_norm: 0.9999998455461254, iteration: 30984
loss: 0.9890121817588806,grad_norm: 0.9513537171895682, iteration: 30985
loss: 1.023085117340088,grad_norm: 0.9999991725513347, iteration: 30986
loss: 0.9693727493286133,grad_norm: 0.9999996754059493, iteration: 30987
loss: 0.9855273962020874,grad_norm: 0.8928535792266815, iteration: 30988
loss: 0.9932350516319275,grad_norm: 0.9494049553071497, iteration: 30989
loss: 1.1163746118545532,grad_norm: 1.0000000166621519, iteration: 30990
loss: 1.0241085290908813,grad_norm: 0.9999990897830916, iteration: 30991
loss: 1.0065970420837402,grad_norm: 0.969292896539556, iteration: 30992
loss: 0.9771415591239929,grad_norm: 0.7477339679148131, iteration: 30993
loss: 1.0115042924880981,grad_norm: 0.996357217014698, iteration: 30994
loss: 1.0116686820983887,grad_norm: 0.999999563217009, iteration: 30995
loss: 1.103958010673523,grad_norm: 0.9999997032551116, iteration: 30996
loss: 1.0014559030532837,grad_norm: 0.9999990942032444, iteration: 30997
loss: 0.9767677783966064,grad_norm: 0.6945318980371981, iteration: 30998
loss: 1.0048891305923462,grad_norm: 0.9999990266470009, iteration: 30999
loss: 1.0689706802368164,grad_norm: 0.9999995997269054, iteration: 31000
loss: 0.9898489713668823,grad_norm: 0.999999118333582, iteration: 31001
loss: 1.031031847000122,grad_norm: 0.959911395811737, iteration: 31002
loss: 0.9840049743652344,grad_norm: 0.9064307185311816, iteration: 31003
loss: 1.0184242725372314,grad_norm: 0.8381405000188336, iteration: 31004
loss: 0.9852531552314758,grad_norm: 0.9999990871234182, iteration: 31005
loss: 0.9898833632469177,grad_norm: 0.9368156682870905, iteration: 31006
loss: 1.0000205039978027,grad_norm: 0.9444805209130394, iteration: 31007
loss: 1.010268211364746,grad_norm: 0.9416127067487395, iteration: 31008
loss: 1.0012969970703125,grad_norm: 0.9999991058731539, iteration: 31009
loss: 1.1718039512634277,grad_norm: 0.9999991342900495, iteration: 31010
loss: 1.0516669750213623,grad_norm: 0.9999995313644524, iteration: 31011
loss: 1.0271189212799072,grad_norm: 0.9068733543203497, iteration: 31012
loss: 1.0362932682037354,grad_norm: 0.9987674618056316, iteration: 31013
loss: 1.0440789461135864,grad_norm: 0.9999990563850387, iteration: 31014
loss: 1.0064414739608765,grad_norm: 0.9999991014906583, iteration: 31015
loss: 1.0340536832809448,grad_norm: 0.8895322507982196, iteration: 31016
loss: 1.0517476797103882,grad_norm: 0.9999994526950606, iteration: 31017
loss: 1.0281168222427368,grad_norm: 0.8454286154487407, iteration: 31018
loss: 1.0232720375061035,grad_norm: 0.999999726081979, iteration: 31019
loss: 1.042044758796692,grad_norm: 0.9952851942094899, iteration: 31020
loss: 0.9805161356925964,grad_norm: 0.9207189208149327, iteration: 31021
loss: 1.1491330862045288,grad_norm: 0.9999990910866738, iteration: 31022
loss: 1.022784948348999,grad_norm: 0.9999992885565965, iteration: 31023
loss: 1.0173569917678833,grad_norm: 0.9210180686650119, iteration: 31024
loss: 0.991814911365509,grad_norm: 0.9147744357925122, iteration: 31025
loss: 0.9832731485366821,grad_norm: 0.9999990720806746, iteration: 31026
loss: 1.1011195182800293,grad_norm: 0.9999992029154486, iteration: 31027
loss: 0.9847238063812256,grad_norm: 0.9999993139456858, iteration: 31028
loss: 1.0354079008102417,grad_norm: 0.9999990201553459, iteration: 31029
loss: 0.9894649982452393,grad_norm: 0.7940072175046541, iteration: 31030
loss: 0.999298095703125,grad_norm: 0.9999998168254689, iteration: 31031
loss: 1.029014229774475,grad_norm: 0.9376086379082706, iteration: 31032
loss: 1.0339676141738892,grad_norm: 0.9839035730388603, iteration: 31033
loss: 1.0410810708999634,grad_norm: 0.9999990372144596, iteration: 31034
loss: 1.0382773876190186,grad_norm: 0.9999992358900519, iteration: 31035
loss: 1.0191667079925537,grad_norm: 0.9197665547446564, iteration: 31036
loss: 0.9705803990364075,grad_norm: 0.9280709009142555, iteration: 31037
loss: 1.007828950881958,grad_norm: 0.999999206979856, iteration: 31038
loss: 0.9762207865715027,grad_norm: 0.9982640515868549, iteration: 31039
loss: 0.9748942852020264,grad_norm: 0.9999991030452945, iteration: 31040
loss: 1.0447702407836914,grad_norm: 0.9999995552508462, iteration: 31041
loss: 1.0283029079437256,grad_norm: 0.9999990544988883, iteration: 31042
loss: 0.9860276579856873,grad_norm: 0.9512872380254951, iteration: 31043
loss: 1.0355342626571655,grad_norm: 0.9999992735109734, iteration: 31044
loss: 1.025601863861084,grad_norm: 0.9999991782340427, iteration: 31045
loss: 1.0991507768630981,grad_norm: 0.9999992129376997, iteration: 31046
loss: 1.009139060974121,grad_norm: 0.9999991954130553, iteration: 31047
loss: 0.9547780156135559,grad_norm: 0.9999993171641417, iteration: 31048
loss: 0.9961938858032227,grad_norm: 0.999999132174904, iteration: 31049
loss: 0.9988856315612793,grad_norm: 0.9554074092465755, iteration: 31050
loss: 1.0058647394180298,grad_norm: 0.8994363172659037, iteration: 31051
loss: 1.007013201713562,grad_norm: 0.8576006858672491, iteration: 31052
loss: 0.9986827969551086,grad_norm: 0.8412684240201257, iteration: 31053
loss: 0.996540904045105,grad_norm: 0.9235825085468722, iteration: 31054
loss: 1.1007400751113892,grad_norm: 0.9999993346124109, iteration: 31055
loss: 0.9388349056243896,grad_norm: 0.9137850208525589, iteration: 31056
loss: 1.02229642868042,grad_norm: 0.9222575888120149, iteration: 31057
loss: 0.9929553270339966,grad_norm: 0.9999989595892185, iteration: 31058
loss: 0.9960994124412537,grad_norm: 0.9250776231135592, iteration: 31059
loss: 1.0086696147918701,grad_norm: 0.9739121253554545, iteration: 31060
loss: 1.0022306442260742,grad_norm: 0.99999904797193, iteration: 31061
loss: 1.0556951761245728,grad_norm: 0.9999996036642289, iteration: 31062
loss: 1.0215389728546143,grad_norm: 0.9999991414842712, iteration: 31063
loss: 1.0435359477996826,grad_norm: 0.9999995856345544, iteration: 31064
loss: 1.0036853551864624,grad_norm: 0.8527526190161011, iteration: 31065
loss: 1.0232905149459839,grad_norm: 0.9999991980681519, iteration: 31066
loss: 0.9977863430976868,grad_norm: 0.9999991476489805, iteration: 31067
loss: 0.9879333972930908,grad_norm: 0.8746748796079512, iteration: 31068
loss: 0.9606552124023438,grad_norm: 0.9999991341635007, iteration: 31069
loss: 0.9962583780288696,grad_norm: 0.9999991192748489, iteration: 31070
loss: 1.0955331325531006,grad_norm: 0.999999529872996, iteration: 31071
loss: 1.0468199253082275,grad_norm: 0.9457515227178057, iteration: 31072
loss: 1.0494611263275146,grad_norm: 0.9999992621649473, iteration: 31073
loss: 1.014793038368225,grad_norm: 0.9982102376411573, iteration: 31074
loss: 1.015872836112976,grad_norm: 0.9999989758451184, iteration: 31075
loss: 0.9955982565879822,grad_norm: 0.999999541925883, iteration: 31076
loss: 1.0321712493896484,grad_norm: 0.8720499662977758, iteration: 31077
loss: 0.9717203974723816,grad_norm: 0.9999838527491999, iteration: 31078
loss: 1.0074256658554077,grad_norm: 0.8576199684980439, iteration: 31079
loss: 1.0239437818527222,grad_norm: 0.9999991266520367, iteration: 31080
loss: 0.9825085997581482,grad_norm: 0.9117477520542581, iteration: 31081
loss: 1.032692313194275,grad_norm: 0.9999989448316804, iteration: 31082
loss: 0.9715763330459595,grad_norm: 0.9999989891267261, iteration: 31083
loss: 1.0070759057998657,grad_norm: 0.9999991472482748, iteration: 31084
loss: 0.9592265486717224,grad_norm: 0.972158981178231, iteration: 31085
loss: 1.0301814079284668,grad_norm: 0.9999991608566203, iteration: 31086
loss: 1.051510214805603,grad_norm: 0.9999990233701354, iteration: 31087
loss: 1.0088562965393066,grad_norm: 0.9999988963850966, iteration: 31088
loss: 1.0385531187057495,grad_norm: 0.9999994228332794, iteration: 31089
loss: 1.0197715759277344,grad_norm: 0.9999991606938518, iteration: 31090
loss: 0.9541221261024475,grad_norm: 0.916159183723511, iteration: 31091
loss: 1.0161575078964233,grad_norm: 0.9999995415978237, iteration: 31092
loss: 0.9989987015724182,grad_norm: 0.9638732106383635, iteration: 31093
loss: 1.033591389656067,grad_norm: 0.7517051287255312, iteration: 31094
loss: 0.9897657632827759,grad_norm: 0.9999994760103877, iteration: 31095
loss: 1.0410314798355103,grad_norm: 0.9347685406967844, iteration: 31096
loss: 0.9865711331367493,grad_norm: 0.9999992195587512, iteration: 31097
loss: 1.0426193475723267,grad_norm: 0.9195509084338527, iteration: 31098
loss: 1.0104063749313354,grad_norm: 0.999738670945519, iteration: 31099
loss: 1.0142830610275269,grad_norm: 0.9302106064755167, iteration: 31100
loss: 1.0602116584777832,grad_norm: 0.9999996967968574, iteration: 31101
loss: 0.9758822917938232,grad_norm: 0.8647964371009461, iteration: 31102
loss: 1.0140469074249268,grad_norm: 0.9999991407560681, iteration: 31103
loss: 1.052154541015625,grad_norm: 0.9285795152644053, iteration: 31104
loss: 1.0277533531188965,grad_norm: 0.9999990943267768, iteration: 31105
loss: 1.0014982223510742,grad_norm: 0.9999991174086427, iteration: 31106
loss: 1.0166300535202026,grad_norm: 0.9999992109632626, iteration: 31107
loss: 1.027991771697998,grad_norm: 0.9999992521257276, iteration: 31108
loss: 1.009068250656128,grad_norm: 0.8730550005982837, iteration: 31109
loss: 1.0334093570709229,grad_norm: 0.8971886610035728, iteration: 31110
loss: 0.995023250579834,grad_norm: 0.9999991525412577, iteration: 31111
loss: 0.9961133599281311,grad_norm: 0.9999991114664104, iteration: 31112
loss: 0.9804719686508179,grad_norm: 0.9999991921809704, iteration: 31113
loss: 1.027411699295044,grad_norm: 0.9999990998895737, iteration: 31114
loss: 0.9915242791175842,grad_norm: 0.9999992266711543, iteration: 31115
loss: 0.9615358710289001,grad_norm: 0.9999992138189445, iteration: 31116
loss: 1.0365407466888428,grad_norm: 0.9999994261319948, iteration: 31117
loss: 1.0465068817138672,grad_norm: 0.9999992776120098, iteration: 31118
loss: 1.0078142881393433,grad_norm: 0.9999991752604067, iteration: 31119
loss: 0.9879552721977234,grad_norm: 0.8606387443504505, iteration: 31120
loss: 0.9626998901367188,grad_norm: 0.9632211218814035, iteration: 31121
loss: 1.043291449546814,grad_norm: 0.9999989664140164, iteration: 31122
loss: 1.0086209774017334,grad_norm: 0.9999990860989832, iteration: 31123
loss: 0.9879587888717651,grad_norm: 0.807749674785427, iteration: 31124
loss: 0.9615466594696045,grad_norm: 0.9999992390893386, iteration: 31125
loss: 0.9951279163360596,grad_norm: 0.9999991508704417, iteration: 31126
loss: 1.029447317123413,grad_norm: 0.9999991990493964, iteration: 31127
loss: 0.9809849858283997,grad_norm: 0.8225594754286274, iteration: 31128
loss: 0.9954003095626831,grad_norm: 0.8517943898300542, iteration: 31129
loss: 0.9796847105026245,grad_norm: 0.9855908439726395, iteration: 31130
loss: 1.0009965896606445,grad_norm: 0.957698705881704, iteration: 31131
loss: 1.000177264213562,grad_norm: 0.999999456532308, iteration: 31132
loss: 1.0154962539672852,grad_norm: 0.9830658567453627, iteration: 31133
loss: 1.0061224699020386,grad_norm: 0.999999008636383, iteration: 31134
loss: 1.0028373003005981,grad_norm: 0.9515129493712743, iteration: 31135
loss: 1.0344572067260742,grad_norm: 0.999999072379906, iteration: 31136
loss: 1.0021042823791504,grad_norm: 0.9585788022013991, iteration: 31137
loss: 0.9856186509132385,grad_norm: 0.9999993096616011, iteration: 31138
loss: 0.9938363432884216,grad_norm: 0.9999991351845728, iteration: 31139
loss: 0.9698022603988647,grad_norm: 0.9999991213062494, iteration: 31140
loss: 0.9868918657302856,grad_norm: 0.9677334107806068, iteration: 31141
loss: 1.0096373558044434,grad_norm: 0.9999992132422891, iteration: 31142
loss: 0.9905219674110413,grad_norm: 0.9734178247384764, iteration: 31143
loss: 0.9653616547584534,grad_norm: 0.999999045585754, iteration: 31144
loss: 0.9937251210212708,grad_norm: 0.9999991768797097, iteration: 31145
loss: 1.0114734172821045,grad_norm: 0.999999034114433, iteration: 31146
loss: 1.0303690433502197,grad_norm: 0.9076329498409498, iteration: 31147
loss: 1.007354497909546,grad_norm: 0.9999990899058044, iteration: 31148
loss: 1.0447880029678345,grad_norm: 0.9999991062406214, iteration: 31149
loss: 1.0623432397842407,grad_norm: 0.8560872900952197, iteration: 31150
loss: 1.0010823011398315,grad_norm: 0.9729337828974421, iteration: 31151
loss: 1.0092825889587402,grad_norm: 0.9999989944842129, iteration: 31152
loss: 1.0340039730072021,grad_norm: 0.9999996052635729, iteration: 31153
loss: 0.9592444896697998,grad_norm: 0.9531467597915616, iteration: 31154
loss: 1.0121729373931885,grad_norm: 0.9350207098440562, iteration: 31155
loss: 1.0821818113327026,grad_norm: 0.999998984530382, iteration: 31156
loss: 1.0334612131118774,grad_norm: 0.9999994550487379, iteration: 31157
loss: 1.0295730829238892,grad_norm: 0.9999990451060401, iteration: 31158
loss: 0.979363203048706,grad_norm: 0.9999990431915574, iteration: 31159
loss: 1.0128884315490723,grad_norm: 0.8272537350842597, iteration: 31160
loss: 0.9576300978660583,grad_norm: 0.9320125467948703, iteration: 31161
loss: 0.9742434024810791,grad_norm: 0.8490408255506232, iteration: 31162
loss: 1.0204086303710938,grad_norm: 0.8655755312838465, iteration: 31163
loss: 0.9837623834609985,grad_norm: 0.984438416355876, iteration: 31164
loss: 1.0283479690551758,grad_norm: 0.99446415587637, iteration: 31165
loss: 0.9928379654884338,grad_norm: 0.999999117178161, iteration: 31166
loss: 0.9917473793029785,grad_norm: 0.999999169702456, iteration: 31167
loss: 0.9991890788078308,grad_norm: 0.9999993904197098, iteration: 31168
loss: 0.9746504426002502,grad_norm: 0.9999991785921432, iteration: 31169
loss: 0.9879839420318604,grad_norm: 0.9752739687165652, iteration: 31170
loss: 1.0200285911560059,grad_norm: 0.9999992705628381, iteration: 31171
loss: 1.0173015594482422,grad_norm: 0.9999990418650073, iteration: 31172
loss: 0.9997351765632629,grad_norm: 0.9999991760905629, iteration: 31173
loss: 0.9940035343170166,grad_norm: 0.9999993893475216, iteration: 31174
loss: 0.9995197653770447,grad_norm: 0.9993657904187617, iteration: 31175
loss: 0.9575930833816528,grad_norm: 0.977819201577829, iteration: 31176
loss: 1.01809561252594,grad_norm: 0.8609173181139932, iteration: 31177
loss: 1.076035499572754,grad_norm: 0.9999994517028438, iteration: 31178
loss: 1.0293476581573486,grad_norm: 0.9647042501051887, iteration: 31179
loss: 1.0151478052139282,grad_norm: 0.9111134685075547, iteration: 31180
loss: 1.0131781101226807,grad_norm: 0.8631107138584757, iteration: 31181
loss: 1.015511155128479,grad_norm: 0.9999994712901727, iteration: 31182
loss: 1.0217705965042114,grad_norm: 0.9999991307947945, iteration: 31183
loss: 1.018279790878296,grad_norm: 0.9999991299710081, iteration: 31184
loss: 1.0168167352676392,grad_norm: 0.916720029951688, iteration: 31185
loss: 0.9731636047363281,grad_norm: 0.9604551072780941, iteration: 31186
loss: 1.0072332620620728,grad_norm: 0.9999992123697529, iteration: 31187
loss: 0.9738975763320923,grad_norm: 0.9345179795472541, iteration: 31188
loss: 1.0389400720596313,grad_norm: 0.9097688247526855, iteration: 31189
loss: 1.0054173469543457,grad_norm: 0.999999098266984, iteration: 31190
loss: 0.9711219668388367,grad_norm: 0.9999991500673444, iteration: 31191
loss: 1.0027384757995605,grad_norm: 0.9711628827845777, iteration: 31192
loss: 1.041918158531189,grad_norm: 0.9999991950553764, iteration: 31193
loss: 1.0544363260269165,grad_norm: 0.9538882094605533, iteration: 31194
loss: 1.0588525533676147,grad_norm: 0.9879934205351132, iteration: 31195
loss: 0.966944694519043,grad_norm: 0.9999990766609631, iteration: 31196
loss: 1.0119006633758545,grad_norm: 0.8752153066171596, iteration: 31197
loss: 0.9895516037940979,grad_norm: 0.8105418740842932, iteration: 31198
loss: 1.0122894048690796,grad_norm: 0.9999991394276274, iteration: 31199
loss: 1.021569013595581,grad_norm: 0.9509473059153937, iteration: 31200
loss: 0.9817674160003662,grad_norm: 0.9999990492740062, iteration: 31201
loss: 0.9922335743904114,grad_norm: 0.9690236646033261, iteration: 31202
loss: 1.0383814573287964,grad_norm: 0.9999991350030135, iteration: 31203
loss: 0.9922782182693481,grad_norm: 0.9999991172345654, iteration: 31204
loss: 1.0407413244247437,grad_norm: 0.9999992018347602, iteration: 31205
loss: 1.0058352947235107,grad_norm: 0.9870625764354946, iteration: 31206
loss: 1.042037844657898,grad_norm: 0.9999995598750991, iteration: 31207
loss: 1.0172590017318726,grad_norm: 0.8801635660244287, iteration: 31208
loss: 1.0352767705917358,grad_norm: 0.9268579409359418, iteration: 31209
loss: 0.9978439211845398,grad_norm: 0.7400337309959111, iteration: 31210
loss: 1.0274823904037476,grad_norm: 0.9999995752490968, iteration: 31211
loss: 1.0168293714523315,grad_norm: 0.9999992976538705, iteration: 31212
loss: 0.9924211502075195,grad_norm: 0.9999990094183355, iteration: 31213
loss: 1.0042364597320557,grad_norm: 0.9999989529802943, iteration: 31214
loss: 1.0064750909805298,grad_norm: 0.8912079916690934, iteration: 31215
loss: 1.1008045673370361,grad_norm: 0.9999993818423001, iteration: 31216
loss: 1.0545316934585571,grad_norm: 0.9999990169567101, iteration: 31217
loss: 1.0417225360870361,grad_norm: 0.8467488480218499, iteration: 31218
loss: 0.9929875135421753,grad_norm: 0.9999990843908874, iteration: 31219
loss: 0.988555371761322,grad_norm: 0.9999990787822304, iteration: 31220
loss: 1.015235424041748,grad_norm: 0.987404697605276, iteration: 31221
loss: 0.9824845790863037,grad_norm: 0.9999990606276046, iteration: 31222
loss: 1.0013550519943237,grad_norm: 0.999999502261107, iteration: 31223
loss: 1.0095677375793457,grad_norm: 0.9901299871052575, iteration: 31224
loss: 1.03544282913208,grad_norm: 0.9999991691386471, iteration: 31225
loss: 0.9953073263168335,grad_norm: 0.8025417005492401, iteration: 31226
loss: 0.9907462000846863,grad_norm: 0.9294151358652376, iteration: 31227
loss: 1.0238494873046875,grad_norm: 0.9808108982620202, iteration: 31228
loss: 0.996966540813446,grad_norm: 0.9170303674759988, iteration: 31229
loss: 1.062665343284607,grad_norm: 0.9999993644120958, iteration: 31230
loss: 0.9730926752090454,grad_norm: 0.999998981093715, iteration: 31231
loss: 1.010557770729065,grad_norm: 0.9999991948761203, iteration: 31232
loss: 0.9732398390769958,grad_norm: 0.9999992092459268, iteration: 31233
loss: 0.9999266266822815,grad_norm: 0.9999990901521508, iteration: 31234
loss: 1.016720175743103,grad_norm: 0.9999991217389183, iteration: 31235
loss: 0.9836176037788391,grad_norm: 0.9999990989429373, iteration: 31236
loss: 1.0446010828018188,grad_norm: 0.9999990545116386, iteration: 31237
loss: 0.9921917915344238,grad_norm: 0.9999991221348853, iteration: 31238
loss: 0.9858315587043762,grad_norm: 0.9526692589482647, iteration: 31239
loss: 1.0235395431518555,grad_norm: 0.9999991193825656, iteration: 31240
loss: 1.0092909336090088,grad_norm: 0.9803828458194574, iteration: 31241
loss: 0.9779173135757446,grad_norm: 0.9999990933578851, iteration: 31242
loss: 1.0304256677627563,grad_norm: 0.9999991528222542, iteration: 31243
loss: 1.0334489345550537,grad_norm: 0.9972975077609769, iteration: 31244
loss: 0.9581190943717957,grad_norm: 0.9580346583338835, iteration: 31245
loss: 0.9883707165718079,grad_norm: 0.866027041443654, iteration: 31246
loss: 1.0164376497268677,grad_norm: 0.8909498728521594, iteration: 31247
loss: 0.9668805599212646,grad_norm: 0.99999955301349, iteration: 31248
loss: 1.0322421789169312,grad_norm: 0.999999121343206, iteration: 31249
loss: 0.9928751587867737,grad_norm: 0.9999991082735377, iteration: 31250
loss: 0.9915133714675903,grad_norm: 0.9999993536052869, iteration: 31251
loss: 1.007097840309143,grad_norm: 0.943868990285689, iteration: 31252
loss: 1.0105206966400146,grad_norm: 0.9999998657108167, iteration: 31253
loss: 1.0275557041168213,grad_norm: 0.9999991049237819, iteration: 31254
loss: 1.0016911029815674,grad_norm: 0.8574877326041863, iteration: 31255
loss: 0.9769255518913269,grad_norm: 0.9999990409130646, iteration: 31256
loss: 1.0120761394500732,grad_norm: 0.9999990614111408, iteration: 31257
loss: 1.0053341388702393,grad_norm: 0.9999990913137625, iteration: 31258
loss: 0.9740129113197327,grad_norm: 0.770138639392339, iteration: 31259
loss: 1.0222517251968384,grad_norm: 0.999999460556505, iteration: 31260
loss: 1.011259913444519,grad_norm: 0.8811912160626808, iteration: 31261
loss: 0.9729351997375488,grad_norm: 0.9999993189667647, iteration: 31262
loss: 1.050695538520813,grad_norm: 0.999999143030075, iteration: 31263
loss: 0.9783448576927185,grad_norm: 0.9999991315016916, iteration: 31264
loss: 1.0340758562088013,grad_norm: 0.9999997133369407, iteration: 31265
loss: 1.0407825708389282,grad_norm: 0.9999990506266828, iteration: 31266
loss: 1.0161092281341553,grad_norm: 0.999999113916071, iteration: 31267
loss: 1.0385833978652954,grad_norm: 0.9999991214944621, iteration: 31268
loss: 1.0262811183929443,grad_norm: 0.9999991666656132, iteration: 31269
loss: 0.9934144020080566,grad_norm: 0.9999992259861861, iteration: 31270
loss: 1.0540889501571655,grad_norm: 0.9999989961911783, iteration: 31271
loss: 0.9880658388137817,grad_norm: 0.9999993849045944, iteration: 31272
loss: 1.016610860824585,grad_norm: 0.9999990335638312, iteration: 31273
loss: 0.9943223595619202,grad_norm: 0.9999990163539502, iteration: 31274
loss: 1.0417464971542358,grad_norm: 0.9999991109280214, iteration: 31275
loss: 1.0098330974578857,grad_norm: 0.99999950607853, iteration: 31276
loss: 1.002711534500122,grad_norm: 0.9999997165197808, iteration: 31277
loss: 1.0159564018249512,grad_norm: 0.857255901340628, iteration: 31278
loss: 1.0295886993408203,grad_norm: 0.9999992038886435, iteration: 31279
loss: 1.027163028717041,grad_norm: 0.7682144016811887, iteration: 31280
loss: 1.0199217796325684,grad_norm: 0.8931074606508693, iteration: 31281
loss: 1.0113167762756348,grad_norm: 0.8681593600189658, iteration: 31282
loss: 1.0150614976882935,grad_norm: 0.9999994992604799, iteration: 31283
loss: 1.0077074766159058,grad_norm: 0.9999990811336466, iteration: 31284
loss: 0.9961768984794617,grad_norm: 0.9999991761999942, iteration: 31285
loss: 1.0378563404083252,grad_norm: 0.8760413932528479, iteration: 31286
loss: 0.9826861619949341,grad_norm: 0.999999193568379, iteration: 31287
loss: 1.0323233604431152,grad_norm: 0.9999994776100436, iteration: 31288
loss: 1.0254205465316772,grad_norm: 0.9999991009949817, iteration: 31289
loss: 0.9905909299850464,grad_norm: 0.9702371052180186, iteration: 31290
loss: 0.990812361240387,grad_norm: 0.9999991028863411, iteration: 31291
loss: 1.0255413055419922,grad_norm: 0.8276899284263658, iteration: 31292
loss: 0.9883309006690979,grad_norm: 0.999998980912342, iteration: 31293
loss: 1.0215901136398315,grad_norm: 0.8346493086938264, iteration: 31294
loss: 0.9864029884338379,grad_norm: 0.9122220346565362, iteration: 31295
loss: 1.0080080032348633,grad_norm: 0.9999993698731616, iteration: 31296
loss: 1.0839449167251587,grad_norm: 0.9999999493570211, iteration: 31297
loss: 1.0084450244903564,grad_norm: 0.9044896624191049, iteration: 31298
loss: 0.9735651612281799,grad_norm: 0.7711118072328119, iteration: 31299
loss: 0.9969130754470825,grad_norm: 0.9999991294089852, iteration: 31300
loss: 1.0154117345809937,grad_norm: 0.9999990722324572, iteration: 31301
loss: 1.0024352073669434,grad_norm: 0.9999990168022901, iteration: 31302
loss: 1.0401932001113892,grad_norm: 0.9101638294432458, iteration: 31303
loss: 0.9981579184532166,grad_norm: 0.9577118376234186, iteration: 31304
loss: 0.9974626898765564,grad_norm: 0.9999991018428597, iteration: 31305
loss: 1.0134491920471191,grad_norm: 0.8910620052665554, iteration: 31306
loss: 1.010536789894104,grad_norm: 0.9999992210223778, iteration: 31307
loss: 0.994173526763916,grad_norm: 0.9999994619908927, iteration: 31308
loss: 0.9865195751190186,grad_norm: 0.9999990526397203, iteration: 31309
loss: 1.029623031616211,grad_norm: 0.9583456145837348, iteration: 31310
loss: 0.9889360070228577,grad_norm: 0.8934184304700116, iteration: 31311
loss: 1.0694044828414917,grad_norm: 0.9999996881902191, iteration: 31312
loss: 0.9943118691444397,grad_norm: 0.9999990010905803, iteration: 31313
loss: 1.0255401134490967,grad_norm: 0.9494164594467523, iteration: 31314
loss: 1.0190576314926147,grad_norm: 0.9788449544543409, iteration: 31315
loss: 1.027022361755371,grad_norm: 0.9999992340041722, iteration: 31316
loss: 1.018448829650879,grad_norm: 0.8629241716344986, iteration: 31317
loss: 0.9746445417404175,grad_norm: 0.9564387096454263, iteration: 31318
loss: 0.987860918045044,grad_norm: 0.9264884719268366, iteration: 31319
loss: 0.9936965703964233,grad_norm: 0.9208868821587182, iteration: 31320
loss: 1.0116283893585205,grad_norm: 0.9999995031195077, iteration: 31321
loss: 1.0130316019058228,grad_norm: 0.8082329185753168, iteration: 31322
loss: 1.0407097339630127,grad_norm: 0.9679800012391597, iteration: 31323
loss: 1.0327943563461304,grad_norm: 0.9999996526311913, iteration: 31324
loss: 0.9912968873977661,grad_norm: 0.8262132151067234, iteration: 31325
loss: 1.0095170736312866,grad_norm: 0.9592151911833319, iteration: 31326
loss: 0.9918056130409241,grad_norm: 0.9999991202741252, iteration: 31327
loss: 1.0495043992996216,grad_norm: 0.9999995198149056, iteration: 31328
loss: 1.037390112876892,grad_norm: 0.9470750620430197, iteration: 31329
loss: 1.0136034488677979,grad_norm: 0.9073351665056051, iteration: 31330
loss: 1.0270740985870361,grad_norm: 0.9999994226592178, iteration: 31331
loss: 1.008058786392212,grad_norm: 0.9459342416364034, iteration: 31332
loss: 1.0089083909988403,grad_norm: 0.9999992679468874, iteration: 31333
loss: 0.9898604154586792,grad_norm: 0.9999991409583407, iteration: 31334
loss: 1.017379641532898,grad_norm: 0.965123180748439, iteration: 31335
loss: 1.0050147771835327,grad_norm: 0.8390845017461492, iteration: 31336
loss: 1.0016608238220215,grad_norm: 0.9480118810757567, iteration: 31337
loss: 1.0347756147384644,grad_norm: 0.9999992010557729, iteration: 31338
loss: 1.0102176666259766,grad_norm: 0.9999991312750206, iteration: 31339
loss: 1.01181161403656,grad_norm: 0.8495466340889918, iteration: 31340
loss: 0.9907000064849854,grad_norm: 0.9639150315674034, iteration: 31341
loss: 1.0113773345947266,grad_norm: 0.9999990192862547, iteration: 31342
loss: 1.009424090385437,grad_norm: 0.9999991150267427, iteration: 31343
loss: 0.9870031476020813,grad_norm: 0.997523599182689, iteration: 31344
loss: 1.023995041847229,grad_norm: 0.9999992951276345, iteration: 31345
loss: 1.020269751548767,grad_norm: 0.9999992162885544, iteration: 31346
loss: 0.9954014420509338,grad_norm: 0.9239610009729108, iteration: 31347
loss: 1.0275987386703491,grad_norm: 0.9289540448757233, iteration: 31348
loss: 1.032411813735962,grad_norm: 0.9999997076101886, iteration: 31349
loss: 1.0107988119125366,grad_norm: 0.9999999508801343, iteration: 31350
loss: 1.0120375156402588,grad_norm: 0.933591047812993, iteration: 31351
loss: 1.0100513696670532,grad_norm: 0.9999989375780383, iteration: 31352
loss: 1.027866005897522,grad_norm: 0.9999993183512654, iteration: 31353
loss: 0.951885998249054,grad_norm: 0.9999992995508534, iteration: 31354
loss: 1.0149554014205933,grad_norm: 0.8356739264612654, iteration: 31355
loss: 0.9602094888687134,grad_norm: 0.9999991453679368, iteration: 31356
loss: 0.9810212850570679,grad_norm: 0.9653407160118374, iteration: 31357
loss: 1.0105725526809692,grad_norm: 0.9999991921564315, iteration: 31358
loss: 0.9596782326698303,grad_norm: 0.9999991586621844, iteration: 31359
loss: 1.0086055994033813,grad_norm: 0.8368527086599797, iteration: 31360
loss: 0.9912084341049194,grad_norm: 0.939543763823324, iteration: 31361
loss: 1.026821255683899,grad_norm: 0.999999079682196, iteration: 31362
loss: 0.9996098279953003,grad_norm: 0.9365786240097282, iteration: 31363
loss: 1.0328534841537476,grad_norm: 0.9262089560996171, iteration: 31364
loss: 0.9823588132858276,grad_norm: 0.9999992443281073, iteration: 31365
loss: 1.0640689134597778,grad_norm: 0.9999991700622659, iteration: 31366
loss: 1.0043188333511353,grad_norm: 0.9584616309674469, iteration: 31367
loss: 0.9956247210502625,grad_norm: 0.9999993722541028, iteration: 31368
loss: 1.0101759433746338,grad_norm: 0.9090075856467728, iteration: 31369
loss: 1.0321969985961914,grad_norm: 0.842952553115059, iteration: 31370
loss: 1.019611120223999,grad_norm: 0.9999992100713482, iteration: 31371
loss: 1.015412449836731,grad_norm: 0.9999995526248863, iteration: 31372
loss: 0.999743640422821,grad_norm: 0.9405521110005592, iteration: 31373
loss: 0.9954676032066345,grad_norm: 0.9515491563381455, iteration: 31374
loss: 1.0748904943466187,grad_norm: 0.9999994166110989, iteration: 31375
loss: 1.0030394792556763,grad_norm: 0.9999992389437565, iteration: 31376
loss: 1.0080540180206299,grad_norm: 0.9820979735286687, iteration: 31377
loss: 1.0687708854675293,grad_norm: 0.9694565965953859, iteration: 31378
loss: 1.0220686197280884,grad_norm: 0.9999990160101442, iteration: 31379
loss: 0.9965420961380005,grad_norm: 0.9999996681954014, iteration: 31380
loss: 1.0447300672531128,grad_norm: 0.9999994824504018, iteration: 31381
loss: 1.0233521461486816,grad_norm: 0.9999999136701807, iteration: 31382
loss: 1.0234581232070923,grad_norm: 0.9999991097689931, iteration: 31383
loss: 1.0184617042541504,grad_norm: 0.9999994028749369, iteration: 31384
loss: 1.0092682838439941,grad_norm: 0.9999996908922473, iteration: 31385
loss: 1.0194214582443237,grad_norm: 0.7862694169699633, iteration: 31386
loss: 1.039400339126587,grad_norm: 0.9176325358059877, iteration: 31387
loss: 1.12577486038208,grad_norm: 0.9999997641127806, iteration: 31388
loss: 0.994422197341919,grad_norm: 0.9999990780760546, iteration: 31389
loss: 1.0939545631408691,grad_norm: 0.9999994319554814, iteration: 31390
loss: 1.0376023054122925,grad_norm: 0.9999992911849482, iteration: 31391
loss: 1.04298734664917,grad_norm: 0.9999990732176189, iteration: 31392
loss: 1.0163606405258179,grad_norm: 0.9999990400830892, iteration: 31393
loss: 1.0877374410629272,grad_norm: 0.9999996702105052, iteration: 31394
loss: 1.01193106174469,grad_norm: 0.9999989797255305, iteration: 31395
loss: 1.0227792263031006,grad_norm: 0.8210664664090285, iteration: 31396
loss: 1.0285958051681519,grad_norm: 0.8985230724356775, iteration: 31397
loss: 1.0108554363250732,grad_norm: 0.9999990766360416, iteration: 31398
loss: 0.9875351190567017,grad_norm: 0.9999992098069004, iteration: 31399
loss: 0.9869195222854614,grad_norm: 0.8771633139503375, iteration: 31400
loss: 1.0439833402633667,grad_norm: 0.9999993510782041, iteration: 31401
loss: 1.0085073709487915,grad_norm: 0.9999997017546811, iteration: 31402
loss: 1.0382273197174072,grad_norm: 0.9999995892771029, iteration: 31403
loss: 0.98440021276474,grad_norm: 0.9844069038504181, iteration: 31404
loss: 1.0075933933258057,grad_norm: 0.9021633364488244, iteration: 31405
loss: 0.9887203574180603,grad_norm: 0.99999934092486, iteration: 31406
loss: 1.0727958679199219,grad_norm: 0.9999994590312165, iteration: 31407
loss: 1.0052361488342285,grad_norm: 0.9999991441789181, iteration: 31408
loss: 0.9914959073066711,grad_norm: 0.979122241606048, iteration: 31409
loss: 0.9876259565353394,grad_norm: 0.9999991245039117, iteration: 31410
loss: 1.046369194984436,grad_norm: 0.9999994536925727, iteration: 31411
loss: 1.0043410062789917,grad_norm: 0.8296273727179884, iteration: 31412
loss: 0.9940345883369446,grad_norm: 0.9999991563195364, iteration: 31413
loss: 1.0408005714416504,grad_norm: 0.9999993346525482, iteration: 31414
loss: 1.0107277631759644,grad_norm: 0.9878928691145643, iteration: 31415
loss: 1.03476083278656,grad_norm: 0.9999992692717591, iteration: 31416
loss: 1.074747085571289,grad_norm: 0.9999998605697894, iteration: 31417
loss: 0.9847461581230164,grad_norm: 0.9999991545726548, iteration: 31418
loss: 0.9526621103286743,grad_norm: 0.9999990363559212, iteration: 31419
loss: 0.9776090383529663,grad_norm: 0.9999992425389782, iteration: 31420
loss: 1.107283353805542,grad_norm: 0.9999993710391043, iteration: 31421
loss: 1.0447187423706055,grad_norm: 0.9999991135111852, iteration: 31422
loss: 0.9909073710441589,grad_norm: 0.9999991020346386, iteration: 31423
loss: 0.9942824244499207,grad_norm: 0.9999990564943153, iteration: 31424
loss: 0.9967942833900452,grad_norm: 0.8963340205720317, iteration: 31425
loss: 1.025903344154358,grad_norm: 0.9999990679420989, iteration: 31426
loss: 1.015068531036377,grad_norm: 0.9881442262718845, iteration: 31427
loss: 1.0207217931747437,grad_norm: 0.9999991154586176, iteration: 31428
loss: 0.9987843632698059,grad_norm: 0.9999992876635289, iteration: 31429
loss: 0.9929776191711426,grad_norm: 0.9999990265159682, iteration: 31430
loss: 1.0454832315444946,grad_norm: 0.999999454660353, iteration: 31431
loss: 0.97829669713974,grad_norm: 0.999999173386101, iteration: 31432
loss: 1.0110418796539307,grad_norm: 0.9590097833052333, iteration: 31433
loss: 0.970092236995697,grad_norm: 0.9902550486100355, iteration: 31434
loss: 1.0488619804382324,grad_norm: 0.9999995143574102, iteration: 31435
loss: 1.0116722583770752,grad_norm: 0.9999993410858722, iteration: 31436
loss: 1.0064505338668823,grad_norm: 0.9999990988719166, iteration: 31437
loss: 0.99774569272995,grad_norm: 0.9348017308927574, iteration: 31438
loss: 1.0840976238250732,grad_norm: 0.9999993324146041, iteration: 31439
loss: 1.0486195087432861,grad_norm: 0.9999990535061721, iteration: 31440
loss: 1.0042626857757568,grad_norm: 0.81645709138318, iteration: 31441
loss: 1.0404102802276611,grad_norm: 0.920391316991906, iteration: 31442
loss: 1.2252849340438843,grad_norm: 0.9999996568641815, iteration: 31443
loss: 1.0459067821502686,grad_norm: 0.99999910163131, iteration: 31444
loss: 0.9554561376571655,grad_norm: 0.9999989762734602, iteration: 31445
loss: 1.032238245010376,grad_norm: 0.9150714700265372, iteration: 31446
loss: 1.032105803489685,grad_norm: 0.8842018660776809, iteration: 31447
loss: 1.0322961807250977,grad_norm: 0.9999990570444413, iteration: 31448
loss: 0.959367036819458,grad_norm: 0.9458538392525301, iteration: 31449
loss: 1.1638479232788086,grad_norm: 0.9999998554251295, iteration: 31450
loss: 1.022199034690857,grad_norm: 0.9999991453132814, iteration: 31451
loss: 0.9842661023139954,grad_norm: 0.9999990381677186, iteration: 31452
loss: 1.0222541093826294,grad_norm: 0.9999993681424694, iteration: 31453
loss: 1.0589993000030518,grad_norm: 0.9999998489855463, iteration: 31454
loss: 0.9984676837921143,grad_norm: 0.9057884877858841, iteration: 31455
loss: 0.981313169002533,grad_norm: 0.9999991141828961, iteration: 31456
loss: 1.0772600173950195,grad_norm: 0.9999996425884069, iteration: 31457
loss: 1.0164711475372314,grad_norm: 0.9999990222067422, iteration: 31458
loss: 1.0011320114135742,grad_norm: 0.9999989889016158, iteration: 31459
loss: 1.0190752744674683,grad_norm: 0.9078367382591251, iteration: 31460
loss: 1.0772640705108643,grad_norm: 0.9999992784280114, iteration: 31461
loss: 1.0652744770050049,grad_norm: 0.9999989691936524, iteration: 31462
loss: 0.9651361107826233,grad_norm: 0.9999992659710336, iteration: 31463
loss: 1.0245275497436523,grad_norm: 0.9999990995053635, iteration: 31464
loss: 0.9960997700691223,grad_norm: 0.9379180913755979, iteration: 31465
loss: 0.9907904267311096,grad_norm: 0.9014186042702202, iteration: 31466
loss: 0.9861671924591064,grad_norm: 0.9999990900472433, iteration: 31467
loss: 0.9837935566902161,grad_norm: 0.8262274626015116, iteration: 31468
loss: 1.019053339958191,grad_norm: 0.99999901763609, iteration: 31469
loss: 1.0897510051727295,grad_norm: 0.9999994539625103, iteration: 31470
loss: 1.038734793663025,grad_norm: 0.9881587655736536, iteration: 31471
loss: 1.0177996158599854,grad_norm: 0.9694720796736429, iteration: 31472
loss: 1.0351059436798096,grad_norm: 0.9999991099652858, iteration: 31473
loss: 1.0321376323699951,grad_norm: 0.9999990394691383, iteration: 31474
loss: 1.049272894859314,grad_norm: 0.9999993470671323, iteration: 31475
loss: 0.9614993333816528,grad_norm: 0.8887978778732398, iteration: 31476
loss: 1.0295530557632446,grad_norm: 0.892277059940721, iteration: 31477
loss: 0.9713857173919678,grad_norm: 0.9999991714990767, iteration: 31478
loss: 1.0390610694885254,grad_norm: 0.9999992326152422, iteration: 31479
loss: 1.0431677103042603,grad_norm: 0.9999990056584459, iteration: 31480
loss: 1.0283396244049072,grad_norm: 0.9999991194914556, iteration: 31481
loss: 1.0029062032699585,grad_norm: 0.9999992260622549, iteration: 31482
loss: 1.0126665830612183,grad_norm: 0.9999992968839282, iteration: 31483
loss: 1.049075722694397,grad_norm: 0.9999994099702902, iteration: 31484
loss: 1.0042808055877686,grad_norm: 0.9999993035183956, iteration: 31485
loss: 1.0214550495147705,grad_norm: 0.9999992255614407, iteration: 31486
loss: 0.9782028794288635,grad_norm: 0.9420923588188012, iteration: 31487
loss: 0.9942057728767395,grad_norm: 0.9999990219697742, iteration: 31488
loss: 0.9994910955429077,grad_norm: 0.8950864540802058, iteration: 31489
loss: 1.013866662979126,grad_norm: 0.9999993380555752, iteration: 31490
loss: 1.0559182167053223,grad_norm: 0.9231263521352174, iteration: 31491
loss: 1.048682451248169,grad_norm: 0.9999990323087948, iteration: 31492
loss: 0.963610827922821,grad_norm: 0.9536267585702871, iteration: 31493
loss: 1.0499895811080933,grad_norm: 0.9999991910728898, iteration: 31494
loss: 1.0222487449645996,grad_norm: 0.8628168196894822, iteration: 31495
loss: 1.0021674633026123,grad_norm: 0.9999990770265967, iteration: 31496
loss: 1.0099719762802124,grad_norm: 0.999999049564342, iteration: 31497
loss: 1.046305537223816,grad_norm: 0.9999995597229993, iteration: 31498
loss: 1.0196629762649536,grad_norm: 0.9999992732641446, iteration: 31499
loss: 1.0010377168655396,grad_norm: 0.854264192646106, iteration: 31500
loss: 1.015234351158142,grad_norm: 0.918782003941674, iteration: 31501
loss: 1.01747465133667,grad_norm: 0.9071011656238427, iteration: 31502
loss: 1.0170276165008545,grad_norm: 0.9999996072042932, iteration: 31503
loss: 1.027802586555481,grad_norm: 0.8507172066672021, iteration: 31504
loss: 0.9880953431129456,grad_norm: 0.8971795386994993, iteration: 31505
loss: 1.0242966413497925,grad_norm: 0.9999990215900219, iteration: 31506
loss: 1.0003132820129395,grad_norm: 0.960536071112722, iteration: 31507
loss: 1.0034294128417969,grad_norm: 0.9999992519541193, iteration: 31508
loss: 0.9907273054122925,grad_norm: 0.9999991567478299, iteration: 31509
loss: 1.0012216567993164,grad_norm: 0.8787264928787843, iteration: 31510
loss: 1.0074962377548218,grad_norm: 0.8433772309291375, iteration: 31511
loss: 1.0695189237594604,grad_norm: 0.9999996026730082, iteration: 31512
loss: 1.0344849824905396,grad_norm: 0.9999994014384219, iteration: 31513
loss: 1.028344750404358,grad_norm: 0.8806828737919461, iteration: 31514
loss: 1.0100524425506592,grad_norm: 0.9999990484234001, iteration: 31515
loss: 0.9924240708351135,grad_norm: 0.943203497631675, iteration: 31516
loss: 0.9785065650939941,grad_norm: 0.9991300299108538, iteration: 31517
loss: 1.0251790285110474,grad_norm: 0.9379927625310899, iteration: 31518
loss: 0.9858153462409973,grad_norm: 0.8351323844956826, iteration: 31519
loss: 1.0353312492370605,grad_norm: 0.9999992058631882, iteration: 31520
loss: 0.9906958937644958,grad_norm: 0.9870854613211961, iteration: 31521
loss: 1.0339418649673462,grad_norm: 0.9999992991478734, iteration: 31522
loss: 0.9926009774208069,grad_norm: 0.8635702896029506, iteration: 31523
loss: 0.979087233543396,grad_norm: 0.9476045700925877, iteration: 31524
loss: 1.0034643411636353,grad_norm: 0.8841484974064752, iteration: 31525
loss: 0.9937310814857483,grad_norm: 0.9999991115365865, iteration: 31526
loss: 1.0491756200790405,grad_norm: 0.9999994702238559, iteration: 31527
loss: 0.9850035309791565,grad_norm: 0.9007146249707159, iteration: 31528
loss: 1.0025631189346313,grad_norm: 0.9999991705978263, iteration: 31529
loss: 1.0050220489501953,grad_norm: 0.9999990825271318, iteration: 31530
loss: 0.9951042532920837,grad_norm: 0.8848776412251544, iteration: 31531
loss: 1.0209777355194092,grad_norm: 0.9999992558231863, iteration: 31532
loss: 1.0047616958618164,grad_norm: 0.9999993833922753, iteration: 31533
loss: 1.001672625541687,grad_norm: 0.9384282449414845, iteration: 31534
loss: 0.9913874268531799,grad_norm: 0.9999990854315495, iteration: 31535
loss: 0.9927741289138794,grad_norm: 0.9644894426662237, iteration: 31536
loss: 1.0003708600997925,grad_norm: 0.9573344037313953, iteration: 31537
loss: 1.0236845016479492,grad_norm: 0.9999991771923185, iteration: 31538
loss: 0.9735167622566223,grad_norm: 0.9999995344037935, iteration: 31539
loss: 0.9952900409698486,grad_norm: 0.9918610414890606, iteration: 31540
loss: 0.9955157041549683,grad_norm: 0.8871055594644033, iteration: 31541
loss: 1.0001707077026367,grad_norm: 0.9999990662491213, iteration: 31542
loss: 0.9664899706840515,grad_norm: 0.9527569683745325, iteration: 31543
loss: 1.0223238468170166,grad_norm: 0.9999991930925458, iteration: 31544
loss: 1.0379817485809326,grad_norm: 0.9999992611157571, iteration: 31545
loss: 1.004716396331787,grad_norm: 0.9999990936023868, iteration: 31546
loss: 1.0177671909332275,grad_norm: 0.8829748389095484, iteration: 31547
loss: 1.016209602355957,grad_norm: 0.9999990284303214, iteration: 31548
loss: 1.0221455097198486,grad_norm: 0.9999990809583625, iteration: 31549
loss: 0.9887875318527222,grad_norm: 0.8802847481889369, iteration: 31550
loss: 1.0136311054229736,grad_norm: 0.9999990186462225, iteration: 31551
loss: 0.9976677894592285,grad_norm: 0.9999992091153383, iteration: 31552
loss: 1.0545532703399658,grad_norm: 0.9999992850399003, iteration: 31553
loss: 0.9799991846084595,grad_norm: 0.9581472650154538, iteration: 31554
loss: 1.0511913299560547,grad_norm: 0.9999996604777047, iteration: 31555
loss: 1.0319572687149048,grad_norm: 0.9999990968934436, iteration: 31556
loss: 0.9824354648590088,grad_norm: 0.9999994848525756, iteration: 31557
loss: 1.009868860244751,grad_norm: 0.9999992509828194, iteration: 31558
loss: 0.9770481586456299,grad_norm: 0.9999991376185424, iteration: 31559
loss: 1.0122714042663574,grad_norm: 0.8093885489093394, iteration: 31560
loss: 0.9893330931663513,grad_norm: 0.9367497519924307, iteration: 31561
loss: 1.0380369424819946,grad_norm: 0.9999994418409276, iteration: 31562
loss: 0.9752412438392639,grad_norm: 0.9485473783316731, iteration: 31563
loss: 1.0587712526321411,grad_norm: 0.9999991064124871, iteration: 31564
loss: 0.96446293592453,grad_norm: 0.9999991103142454, iteration: 31565
loss: 1.0156186819076538,grad_norm: 0.9690895185765411, iteration: 31566
loss: 1.0841951370239258,grad_norm: 0.9999992537353711, iteration: 31567
loss: 1.037522792816162,grad_norm: 0.9999994025016957, iteration: 31568
loss: 0.9958717226982117,grad_norm: 0.9999990578194241, iteration: 31569
loss: 0.9977653622627258,grad_norm: 0.9999991549961886, iteration: 31570
loss: 0.9982503652572632,grad_norm: 0.98804696542191, iteration: 31571
loss: 1.0172673463821411,grad_norm: 0.9646210102340481, iteration: 31572
loss: 1.0383708477020264,grad_norm: 0.8869986422089605, iteration: 31573
loss: 1.022355079650879,grad_norm: 0.9895026625827028, iteration: 31574
loss: 0.9679629802703857,grad_norm: 0.9999991099539374, iteration: 31575
loss: 0.9800897240638733,grad_norm: 0.8948603598741729, iteration: 31576
loss: 1.0116914510726929,grad_norm: 0.8739985498541754, iteration: 31577
loss: 1.0734728574752808,grad_norm: 0.9999997294729193, iteration: 31578
loss: 1.0025917291641235,grad_norm: 0.9999995006958263, iteration: 31579
loss: 1.0681058168411255,grad_norm: 0.9999998925444105, iteration: 31580
loss: 1.0204262733459473,grad_norm: 0.9999991130487802, iteration: 31581
loss: 1.0016967058181763,grad_norm: 0.999999196787086, iteration: 31582
loss: 0.990820050239563,grad_norm: 0.9999991938348844, iteration: 31583
loss: 0.9956994652748108,grad_norm: 0.9999995448554104, iteration: 31584
loss: 0.9868083596229553,grad_norm: 0.9025458615248761, iteration: 31585
loss: 1.0665092468261719,grad_norm: 0.9999989392892782, iteration: 31586
loss: 1.0389330387115479,grad_norm: 0.9999995198497434, iteration: 31587
loss: 1.0185956954956055,grad_norm: 0.947644127625807, iteration: 31588
loss: 1.0220283269882202,grad_norm: 0.9999993075302097, iteration: 31589
loss: 0.9771448373794556,grad_norm: 0.9099048135647887, iteration: 31590
loss: 0.9773335456848145,grad_norm: 0.999998922098105, iteration: 31591
loss: 1.040528416633606,grad_norm: 0.9899936505331909, iteration: 31592
loss: 0.9931517243385315,grad_norm: 0.9999992256073393, iteration: 31593
loss: 1.0313193798065186,grad_norm: 0.9999990407609952, iteration: 31594
loss: 1.0152349472045898,grad_norm: 0.9999991991380445, iteration: 31595
loss: 1.006995439529419,grad_norm: 0.811709738263306, iteration: 31596
loss: 1.0587726831436157,grad_norm: 0.9999992034673535, iteration: 31597
loss: 1.053510069847107,grad_norm: 0.9999990736959139, iteration: 31598
loss: 1.065024733543396,grad_norm: 0.9999997950345986, iteration: 31599
loss: 1.052024483680725,grad_norm: 0.9999992212843849, iteration: 31600
loss: 1.027732253074646,grad_norm: 0.9999996126369244, iteration: 31601
loss: 1.0366837978363037,grad_norm: 0.999999291308949, iteration: 31602
loss: 1.0132946968078613,grad_norm: 0.8867270524971214, iteration: 31603
loss: 1.043453574180603,grad_norm: 0.9999991751612063, iteration: 31604
loss: 0.9645563364028931,grad_norm: 0.9999990119109129, iteration: 31605
loss: 1.0362855195999146,grad_norm: 0.9999991596737184, iteration: 31606
loss: 1.0221232175827026,grad_norm: 0.9999997115813156, iteration: 31607
loss: 1.0079138278961182,grad_norm: 0.989192244144967, iteration: 31608
loss: 0.9954754710197449,grad_norm: 0.9645619773532774, iteration: 31609
loss: 0.9810457229614258,grad_norm: 0.9999990741586011, iteration: 31610
loss: 1.0204002857208252,grad_norm: 0.9300008047752133, iteration: 31611
loss: 1.0220961570739746,grad_norm: 0.9999990618219717, iteration: 31612
loss: 1.006575345993042,grad_norm: 0.9942231905723032, iteration: 31613
loss: 1.0029957294464111,grad_norm: 0.9028412165263638, iteration: 31614
loss: 0.9989078640937805,grad_norm: 0.8770756168075607, iteration: 31615
loss: 1.0130062103271484,grad_norm: 0.9999990868545084, iteration: 31616
loss: 0.992923378944397,grad_norm: 0.9999990037570703, iteration: 31617
loss: 1.032036542892456,grad_norm: 0.9999991410264685, iteration: 31618
loss: 1.0175869464874268,grad_norm: 0.9999990977763048, iteration: 31619
loss: 0.9913778901100159,grad_norm: 0.9999992756076936, iteration: 31620
loss: 1.0259660482406616,grad_norm: 0.9999991990469075, iteration: 31621
loss: 0.9777373671531677,grad_norm: 0.9861087206939443, iteration: 31622
loss: 1.0069891214370728,grad_norm: 0.9541159239843054, iteration: 31623
loss: 1.011777400970459,grad_norm: 0.8740507737604734, iteration: 31624
loss: 0.9970584511756897,grad_norm: 0.9999992823105969, iteration: 31625
loss: 1.0023225545883179,grad_norm: 0.9913505274509432, iteration: 31626
loss: 1.0093337297439575,grad_norm: 0.9063997013024596, iteration: 31627
loss: 1.0079069137573242,grad_norm: 0.9999997435182051, iteration: 31628
loss: 1.0214831829071045,grad_norm: 0.9524070794784606, iteration: 31629
loss: 0.9953727126121521,grad_norm: 0.9999990831614696, iteration: 31630
loss: 0.9939142465591431,grad_norm: 0.9999989521065634, iteration: 31631
loss: 1.0003312826156616,grad_norm: 0.999999069059532, iteration: 31632
loss: 1.0214228630065918,grad_norm: 0.9999993448575327, iteration: 31633
loss: 1.0143319368362427,grad_norm: 0.9999989507879595, iteration: 31634
loss: 0.98087078332901,grad_norm: 0.9999997981107001, iteration: 31635
loss: 1.04786217212677,grad_norm: 0.999999259826079, iteration: 31636
loss: 1.0560437440872192,grad_norm: 0.9999993462121297, iteration: 31637
loss: 1.0155595541000366,grad_norm: 0.999999135795222, iteration: 31638
loss: 1.00849187374115,grad_norm: 0.9570008741627557, iteration: 31639
loss: 1.0444339513778687,grad_norm: 0.8603291708828974, iteration: 31640
loss: 1.0201308727264404,grad_norm: 0.9999990339636178, iteration: 31641
loss: 1.042266607284546,grad_norm: 0.9999992065215096, iteration: 31642
loss: 0.9942525625228882,grad_norm: 0.9999991505493107, iteration: 31643
loss: 1.0337247848510742,grad_norm: 0.9999991667449866, iteration: 31644
loss: 0.9931369423866272,grad_norm: 0.994968420868141, iteration: 31645
loss: 1.0049394369125366,grad_norm: 0.8716623569156966, iteration: 31646
loss: 1.0187581777572632,grad_norm: 0.8665806290833519, iteration: 31647
loss: 0.9847877025604248,grad_norm: 0.941963502715235, iteration: 31648
loss: 1.027138352394104,grad_norm: 0.9999991063143322, iteration: 31649
loss: 1.0051233768463135,grad_norm: 0.9999989802331745, iteration: 31650
loss: 0.9650087356567383,grad_norm: 0.9680250699210422, iteration: 31651
loss: 1.0153173208236694,grad_norm: 0.9687346165671646, iteration: 31652
loss: 1.0058220624923706,grad_norm: 0.7023766050146273, iteration: 31653
loss: 1.0706924200057983,grad_norm: 0.9999998524223486, iteration: 31654
loss: 0.977617621421814,grad_norm: 0.826982268337751, iteration: 31655
loss: 1.0051571130752563,grad_norm: 0.9492772128629678, iteration: 31656
loss: 0.9765447378158569,grad_norm: 0.9999990382147733, iteration: 31657
loss: 0.9524572491645813,grad_norm: 0.9999990693196344, iteration: 31658
loss: 0.9940924048423767,grad_norm: 0.8362331808626335, iteration: 31659
loss: 1.0173388719558716,grad_norm: 0.9999990013314294, iteration: 31660
loss: 1.002699851989746,grad_norm: 0.9999991125119554, iteration: 31661
loss: 1.0245307683944702,grad_norm: 0.9279383374949777, iteration: 31662
loss: 0.9684913158416748,grad_norm: 0.9415427467245313, iteration: 31663
loss: 1.0251322984695435,grad_norm: 0.8489021653674933, iteration: 31664
loss: 0.9960098266601562,grad_norm: 0.8650987460679237, iteration: 31665
loss: 0.9990586638450623,grad_norm: 0.9999991776286079, iteration: 31666
loss: 1.0058319568634033,grad_norm: 0.9808488676295211, iteration: 31667
loss: 0.991180956363678,grad_norm: 0.832244989802014, iteration: 31668
loss: 1.005886435508728,grad_norm: 0.9999989325462799, iteration: 31669
loss: 1.0054917335510254,grad_norm: 0.8631244718057022, iteration: 31670
loss: 1.0272945165634155,grad_norm: 0.9999992015724988, iteration: 31671
loss: 1.030918836593628,grad_norm: 0.8025544113023423, iteration: 31672
loss: 0.9602563977241516,grad_norm: 0.9962437734492268, iteration: 31673
loss: 0.9943830966949463,grad_norm: 0.9999990825496841, iteration: 31674
loss: 1.0121757984161377,grad_norm: 0.8591670278753324, iteration: 31675
loss: 1.0615020990371704,grad_norm: 0.9999991536684223, iteration: 31676
loss: 0.9664977788925171,grad_norm: 0.9274122635196594, iteration: 31677
loss: 1.0042425394058228,grad_norm: 0.9999990077607618, iteration: 31678
loss: 0.9869935512542725,grad_norm: 0.9999991439148072, iteration: 31679
loss: 1.0305612087249756,grad_norm: 0.9999991175252613, iteration: 31680
loss: 1.0283808708190918,grad_norm: 0.9999991416550523, iteration: 31681
loss: 1.0085625648498535,grad_norm: 0.9999990739633953, iteration: 31682
loss: 0.992487370967865,grad_norm: 0.9443162877583281, iteration: 31683
loss: 0.9990009069442749,grad_norm: 0.96332728844141, iteration: 31684
loss: 0.9878541231155396,grad_norm: 0.9999990500075439, iteration: 31685
loss: 1.0071146488189697,grad_norm: 0.999999253156799, iteration: 31686
loss: 0.9983062744140625,grad_norm: 0.9999991509831239, iteration: 31687
loss: 0.9741728901863098,grad_norm: 0.8207894105860235, iteration: 31688
loss: 1.0142267942428589,grad_norm: 0.9148126724133514, iteration: 31689
loss: 1.0247267484664917,grad_norm: 0.9602210749858102, iteration: 31690
loss: 1.0157970190048218,grad_norm: 0.9999991931601733, iteration: 31691
loss: 1.0151078701019287,grad_norm: 0.9999990654801869, iteration: 31692
loss: 0.9798699617385864,grad_norm: 0.999999138745678, iteration: 31693
loss: 0.9642764925956726,grad_norm: 0.9999990674209696, iteration: 31694
loss: 0.9794119000434875,grad_norm: 0.9999992232123791, iteration: 31695
loss: 0.9968248605728149,grad_norm: 0.9705328018190172, iteration: 31696
loss: 1.0019065141677856,grad_norm: 0.9999996648722829, iteration: 31697
loss: 1.0180617570877075,grad_norm: 0.8814887042396498, iteration: 31698
loss: 0.9964247941970825,grad_norm: 0.9999993647969578, iteration: 31699
loss: 0.9851801991462708,grad_norm: 0.980611035808417, iteration: 31700
loss: 1.0253397226333618,grad_norm: 0.9999992057631256, iteration: 31701
loss: 1.023343801498413,grad_norm: 0.9999990126539876, iteration: 31702
loss: 1.0112583637237549,grad_norm: 0.9844295473583897, iteration: 31703
loss: 0.9923363327980042,grad_norm: 0.9999990286608889, iteration: 31704
loss: 1.0187584161758423,grad_norm: 0.9999993304071852, iteration: 31705
loss: 1.019762396812439,grad_norm: 0.9368236649249185, iteration: 31706
loss: 0.9934741854667664,grad_norm: 0.9999997482286896, iteration: 31707
loss: 1.056363821029663,grad_norm: 0.9999998225617693, iteration: 31708
loss: 1.012861967086792,grad_norm: 0.9480011353178819, iteration: 31709
loss: 0.9992935657501221,grad_norm: 0.9411928799400486, iteration: 31710
loss: 0.99837327003479,grad_norm: 0.9697649909881728, iteration: 31711
loss: 1.0317472219467163,grad_norm: 0.9999991708863916, iteration: 31712
loss: 0.9884452223777771,grad_norm: 0.8511949506203556, iteration: 31713
loss: 1.0150012969970703,grad_norm: 0.9999990066481238, iteration: 31714
loss: 1.0948731899261475,grad_norm: 0.999999016197782, iteration: 31715
loss: 1.0711129903793335,grad_norm: 0.9999997661953404, iteration: 31716
loss: 1.0120279788970947,grad_norm: 0.9999994394574813, iteration: 31717
loss: 0.9799708724021912,grad_norm: 0.9774988306619143, iteration: 31718
loss: 0.9891557097434998,grad_norm: 0.9407478866291371, iteration: 31719
loss: 1.0204496383666992,grad_norm: 0.9758624144762663, iteration: 31720
loss: 1.0280845165252686,grad_norm: 0.9343338009411665, iteration: 31721
loss: 1.0087178945541382,grad_norm: 0.9999991397654702, iteration: 31722
loss: 1.026368260383606,grad_norm: 0.9999990871296136, iteration: 31723
loss: 0.9927469491958618,grad_norm: 0.8388435119027605, iteration: 31724
loss: 1.0076338052749634,grad_norm: 0.9082305057408745, iteration: 31725
loss: 1.0016365051269531,grad_norm: 0.9459807843670116, iteration: 31726
loss: 1.0184160470962524,grad_norm: 0.7819670968228818, iteration: 31727
loss: 1.0902272462844849,grad_norm: 0.9999993362600283, iteration: 31728
loss: 1.093897819519043,grad_norm: 0.999999320427023, iteration: 31729
loss: 1.0538958311080933,grad_norm: 0.9999990932543255, iteration: 31730
loss: 1.0002927780151367,grad_norm: 0.9999991310814964, iteration: 31731
loss: 0.9818822741508484,grad_norm: 0.8739334056359134, iteration: 31732
loss: 1.0778695344924927,grad_norm: 0.9999991832263623, iteration: 31733
loss: 1.008741855621338,grad_norm: 0.9070849192922488, iteration: 31734
loss: 1.1179497241973877,grad_norm: 0.8653955427209614, iteration: 31735
loss: 1.0103883743286133,grad_norm: 0.9999995187227586, iteration: 31736
loss: 0.9839117527008057,grad_norm: 0.9999994155517744, iteration: 31737
loss: 1.0188045501708984,grad_norm: 0.9999994590056086, iteration: 31738
loss: 1.0120558738708496,grad_norm: 0.9999990749890492, iteration: 31739
loss: 1.0058149099349976,grad_norm: 0.9999990245355598, iteration: 31740
loss: 1.0708320140838623,grad_norm: 0.9999991515618539, iteration: 31741
loss: 0.9876422882080078,grad_norm: 0.9999990066220469, iteration: 31742
loss: 0.9838822484016418,grad_norm: 0.8619743973227395, iteration: 31743
loss: 1.0410192012786865,grad_norm: 0.9999990887387324, iteration: 31744
loss: 1.0828039646148682,grad_norm: 0.9999998053058617, iteration: 31745
loss: 1.0380936861038208,grad_norm: 0.9999996066943286, iteration: 31746
loss: 1.0041358470916748,grad_norm: 0.9999992033260207, iteration: 31747
loss: 1.0718439817428589,grad_norm: 0.9999996089904529, iteration: 31748
loss: 1.004463791847229,grad_norm: 0.9999990844825656, iteration: 31749
loss: 0.990545928478241,grad_norm: 0.9999991705718553, iteration: 31750
loss: 0.9616014361381531,grad_norm: 0.9735373047055698, iteration: 31751
loss: 1.0593534708023071,grad_norm: 0.999999292512731, iteration: 31752
loss: 1.0025980472564697,grad_norm: 0.9999993388351818, iteration: 31753
loss: 1.0082927942276,grad_norm: 0.9999991586297035, iteration: 31754
loss: 1.0276060104370117,grad_norm: 0.9999993338852563, iteration: 31755
loss: 1.0434199571609497,grad_norm: 0.8932273435564575, iteration: 31756
loss: 0.9986104965209961,grad_norm: 0.941617413027759, iteration: 31757
loss: 1.0220249891281128,grad_norm: 0.9999991032633989, iteration: 31758
loss: 0.9966251850128174,grad_norm: 0.7933138368662772, iteration: 31759
loss: 1.077726125717163,grad_norm: 0.9999994451073291, iteration: 31760
loss: 1.0471546649932861,grad_norm: 0.9999994148628453, iteration: 31761
loss: 0.9990794062614441,grad_norm: 0.9696461051828531, iteration: 31762
loss: 1.0383590459823608,grad_norm: 0.9336395855191112, iteration: 31763
loss: 0.9893195033073425,grad_norm: 0.9999991469982209, iteration: 31764
loss: 1.0487234592437744,grad_norm: 0.9999992600791805, iteration: 31765
loss: 1.0391792058944702,grad_norm: 0.9999995710946212, iteration: 31766
loss: 1.0161234140396118,grad_norm: 0.9999991379322857, iteration: 31767
loss: 0.9856971502304077,grad_norm: 0.999999014474281, iteration: 31768
loss: 1.0420992374420166,grad_norm: 0.9999996225671584, iteration: 31769
loss: 0.9853600263595581,grad_norm: 0.9999990363511853, iteration: 31770
loss: 1.016514539718628,grad_norm: 0.9999994044600559, iteration: 31771
loss: 0.9959626197814941,grad_norm: 0.99999915993074, iteration: 31772
loss: 1.0217703580856323,grad_norm: 0.9999989465813568, iteration: 31773
loss: 1.1087327003479004,grad_norm: 0.9999998990265965, iteration: 31774
loss: 0.9946867227554321,grad_norm: 0.8523741617819337, iteration: 31775
loss: 0.9563358426094055,grad_norm: 0.9999990796824257, iteration: 31776
loss: 0.9652507901191711,grad_norm: 0.999366440527722, iteration: 31777
loss: 0.9968772530555725,grad_norm: 0.9999990198521558, iteration: 31778
loss: 0.9774987101554871,grad_norm: 0.994481440620047, iteration: 31779
loss: 1.0329558849334717,grad_norm: 0.9999995475195858, iteration: 31780
loss: 0.9735745787620544,grad_norm: 0.9952287157381889, iteration: 31781
loss: 1.0523806810379028,grad_norm: 0.9011982866326473, iteration: 31782
loss: 1.0033818483352661,grad_norm: 0.9999990919308624, iteration: 31783
loss: 0.9927515387535095,grad_norm: 0.899884315396709, iteration: 31784
loss: 1.0455292463302612,grad_norm: 0.9999996392456181, iteration: 31785
loss: 0.9866943359375,grad_norm: 0.9649068439480787, iteration: 31786
loss: 1.0463286638259888,grad_norm: 0.9632083514664642, iteration: 31787
loss: 1.0222777128219604,grad_norm: 0.9167529033181614, iteration: 31788
loss: 1.0191335678100586,grad_norm: 0.9999993313103274, iteration: 31789
loss: 1.0198290348052979,grad_norm: 0.9999990676652544, iteration: 31790
loss: 1.0445820093154907,grad_norm: 0.9999994128059383, iteration: 31791
loss: 0.9877651333808899,grad_norm: 0.9999991116790947, iteration: 31792
loss: 1.0044273138046265,grad_norm: 0.9877866942970815, iteration: 31793
loss: 1.0081242322921753,grad_norm: 0.9044271311648563, iteration: 31794
loss: 0.9702565670013428,grad_norm: 0.9999992623763458, iteration: 31795
loss: 0.9907935261726379,grad_norm: 0.9999990004715035, iteration: 31796
loss: 1.0498138666152954,grad_norm: 0.9999990509670793, iteration: 31797
loss: 1.023424506187439,grad_norm: 0.838294729125629, iteration: 31798
loss: 0.9897748231887817,grad_norm: 0.9999991726930307, iteration: 31799
loss: 1.0101739168167114,grad_norm: 0.926772059216345, iteration: 31800
loss: 1.0180696249008179,grad_norm: 0.9050276776209382, iteration: 31801
loss: 1.1065069437026978,grad_norm: 0.9999994380881454, iteration: 31802
loss: 1.0479984283447266,grad_norm: 0.9999993074810064, iteration: 31803
loss: 1.056294322013855,grad_norm: 0.9999998756177049, iteration: 31804
loss: 0.9955412745475769,grad_norm: 0.9617070634645573, iteration: 31805
loss: 0.9952883720397949,grad_norm: 0.9999991850942527, iteration: 31806
loss: 0.985974133014679,grad_norm: 0.9999990995210284, iteration: 31807
loss: 1.141730546951294,grad_norm: 0.9999993438127596, iteration: 31808
loss: 1.0178388357162476,grad_norm: 0.9999993838748317, iteration: 31809
loss: 1.0263036489486694,grad_norm: 0.9144974055412659, iteration: 31810
loss: 1.0098192691802979,grad_norm: 0.9999992659554313, iteration: 31811
loss: 1.0099138021469116,grad_norm: 0.9999992506670481, iteration: 31812
loss: 1.00349760055542,grad_norm: 0.9667057507973729, iteration: 31813
loss: 1.0259000062942505,grad_norm: 0.9999990484483187, iteration: 31814
loss: 0.9907588958740234,grad_norm: 0.8373230331859753, iteration: 31815
loss: 1.0460902452468872,grad_norm: 0.9999993307165852, iteration: 31816
loss: 1.0224851369857788,grad_norm: 0.9521281826992843, iteration: 31817
loss: 1.0500859022140503,grad_norm: 0.9999995112939118, iteration: 31818
loss: 0.9755468964576721,grad_norm: 0.8799462082132771, iteration: 31819
loss: 1.0288292169570923,grad_norm: 0.9808321990343369, iteration: 31820
loss: 1.036820650100708,grad_norm: 0.9999990467158669, iteration: 31821
loss: 0.9970096945762634,grad_norm: 0.9999991490186464, iteration: 31822
loss: 0.9824976921081543,grad_norm: 0.9775115367394781, iteration: 31823
loss: 1.074963092803955,grad_norm: 0.9999998697000301, iteration: 31824
loss: 0.9705859422683716,grad_norm: 0.9999990932028704, iteration: 31825
loss: 1.1586995124816895,grad_norm: 0.9999996731586827, iteration: 31826
loss: 1.2497297525405884,grad_norm: 0.9999996564364424, iteration: 31827
loss: 1.1564431190490723,grad_norm: 0.9999994689336373, iteration: 31828
loss: 1.8354638814926147,grad_norm: 0.9999999859793742, iteration: 31829
loss: 1.1612197160720825,grad_norm: 0.9999993210007687, iteration: 31830
loss: 1.032888650894165,grad_norm: 0.9999994342263211, iteration: 31831
loss: 1.0409059524536133,grad_norm: 0.9514968784289031, iteration: 31832
loss: 1.0202662944793701,grad_norm: 0.9999992051958849, iteration: 31833
loss: 0.9828436374664307,grad_norm: 0.9999992379864717, iteration: 31834
loss: 1.0005979537963867,grad_norm: 0.9999990600956953, iteration: 31835
loss: 1.0272341966629028,grad_norm: 0.9999993140513627, iteration: 31836
loss: 0.9866417050361633,grad_norm: 0.999999234799833, iteration: 31837
loss: 1.0158135890960693,grad_norm: 0.8121159884958071, iteration: 31838
loss: 1.012086272239685,grad_norm: 0.9394635048880178, iteration: 31839
loss: 0.9909542202949524,grad_norm: 0.8562137630744882, iteration: 31840
loss: 1.0049704313278198,grad_norm: 0.9748764797018081, iteration: 31841
loss: 1.0114630460739136,grad_norm: 0.8188978342360799, iteration: 31842
loss: 1.0247013568878174,grad_norm: 0.9999990655313025, iteration: 31843
loss: 0.9806993007659912,grad_norm: 0.9276777939756431, iteration: 31844
loss: 1.0120201110839844,grad_norm: 0.7972009822503896, iteration: 31845
loss: 0.9991660714149475,grad_norm: 0.8765902148110135, iteration: 31846
loss: 1.0448449850082397,grad_norm: 0.9999990612191194, iteration: 31847
loss: 0.9733965396881104,grad_norm: 0.9999990842954146, iteration: 31848
loss: 0.9870370626449585,grad_norm: 0.9712674926408489, iteration: 31849
loss: 1.0052590370178223,grad_norm: 0.9999991842873835, iteration: 31850
loss: 1.0244178771972656,grad_norm: 0.9999990559523763, iteration: 31851
loss: 0.9945697784423828,grad_norm: 0.9999991016396372, iteration: 31852
loss: 0.9728185534477234,grad_norm: 0.9999989911621486, iteration: 31853
loss: 0.994587779045105,grad_norm: 0.8735382764989474, iteration: 31854
loss: 1.069564700126648,grad_norm: 0.886052028060735, iteration: 31855
loss: 1.0586687326431274,grad_norm: 0.9999991571287007, iteration: 31856
loss: 1.038921594619751,grad_norm: 0.999999044992217, iteration: 31857
loss: 0.9941326379776001,grad_norm: 0.999999224954749, iteration: 31858
loss: 0.9993016719818115,grad_norm: 0.9999992865880024, iteration: 31859
loss: 1.0195568799972534,grad_norm: 0.9568942413501657, iteration: 31860
loss: 0.9864799380302429,grad_norm: 0.9999990151493888, iteration: 31861
loss: 1.0278325080871582,grad_norm: 0.8264387315420234, iteration: 31862
loss: 0.9894586205482483,grad_norm: 0.9613302697955984, iteration: 31863
loss: 1.0043355226516724,grad_norm: 0.9309337537208163, iteration: 31864
loss: 0.9873446822166443,grad_norm: 0.9999991723153477, iteration: 31865
loss: 1.019392728805542,grad_norm: 0.9999993275293408, iteration: 31866
loss: 1.001596212387085,grad_norm: 0.7838673500786024, iteration: 31867
loss: 1.010027289390564,grad_norm: 0.8985293825582125, iteration: 31868
loss: 0.9726901650428772,grad_norm: 0.9999991122170842, iteration: 31869
loss: 0.9947516918182373,grad_norm: 0.9999990037970827, iteration: 31870
loss: 1.0038535594940186,grad_norm: 0.9999990477167533, iteration: 31871
loss: 0.9926181435585022,grad_norm: 0.904189828987532, iteration: 31872
loss: 1.0205539464950562,grad_norm: 0.7722481398195001, iteration: 31873
loss: 0.9809848666191101,grad_norm: 0.8337840011992239, iteration: 31874
loss: 0.9389334917068481,grad_norm: 0.999999124690478, iteration: 31875
loss: 0.980038046836853,grad_norm: 0.8633881458706538, iteration: 31876
loss: 1.0150738954544067,grad_norm: 0.8990324194568412, iteration: 31877
loss: 1.0202689170837402,grad_norm: 0.8150311176099587, iteration: 31878
loss: 1.0272384881973267,grad_norm: 0.9999990429052621, iteration: 31879
loss: 1.1184360980987549,grad_norm: 0.9999994324737941, iteration: 31880
loss: 0.995629608631134,grad_norm: 0.811160333737718, iteration: 31881
loss: 0.9997994303703308,grad_norm: 0.9999991012541823, iteration: 31882
loss: 0.9612551927566528,grad_norm: 0.9999991179406337, iteration: 31883
loss: 0.9887945055961609,grad_norm: 0.9999989821690827, iteration: 31884
loss: 1.0098979473114014,grad_norm: 0.9999996822271505, iteration: 31885
loss: 1.010663628578186,grad_norm: 0.999999089446383, iteration: 31886
loss: 0.9689542651176453,grad_norm: 0.8562659107196214, iteration: 31887
loss: 1.0783512592315674,grad_norm: 0.9671885722943029, iteration: 31888
loss: 1.0437721014022827,grad_norm: 0.9946833841749598, iteration: 31889
loss: 0.9687565565109253,grad_norm: 0.9999992110475948, iteration: 31890
loss: 1.0122838020324707,grad_norm: 0.8826073103003349, iteration: 31891
loss: 0.9717960953712463,grad_norm: 0.9999990834787166, iteration: 31892
loss: 1.0108373165130615,grad_norm: 0.9262753263229235, iteration: 31893
loss: 1.0233243703842163,grad_norm: 0.9999990210714539, iteration: 31894
loss: 1.0165449380874634,grad_norm: 0.9458629436108258, iteration: 31895
loss: 1.0135101079940796,grad_norm: 0.9999991394314448, iteration: 31896
loss: 1.0035407543182373,grad_norm: 0.9624820901535702, iteration: 31897
loss: 1.0236417055130005,grad_norm: 0.9921378009741886, iteration: 31898
loss: 1.0153943300247192,grad_norm: 0.925634213687596, iteration: 31899
loss: 1.0285700559616089,grad_norm: 0.9735289894856715, iteration: 31900
loss: 0.9927859902381897,grad_norm: 0.9999996964896042, iteration: 31901
loss: 1.0075693130493164,grad_norm: 0.9999990564896132, iteration: 31902
loss: 0.9730948805809021,grad_norm: 0.9999992677163188, iteration: 31903
loss: 1.0389063358306885,grad_norm: 0.9999992590999015, iteration: 31904
loss: 0.9541663527488708,grad_norm: 0.988987250685181, iteration: 31905
loss: 0.9987834692001343,grad_norm: 0.8908595652575179, iteration: 31906
loss: 1.0583646297454834,grad_norm: 0.9999994197091429, iteration: 31907
loss: 1.0182126760482788,grad_norm: 0.8842171137436401, iteration: 31908
loss: 1.0025432109832764,grad_norm: 0.9999990988136871, iteration: 31909
loss: 1.0061537027359009,grad_norm: 0.9999991611720311, iteration: 31910
loss: 1.0230973958969116,grad_norm: 0.9999992630962464, iteration: 31911
loss: 0.9995627403259277,grad_norm: 0.8794305779066417, iteration: 31912
loss: 0.9829509854316711,grad_norm: 0.8233409624607203, iteration: 31913
loss: 1.0172253847122192,grad_norm: 0.9999996378457741, iteration: 31914
loss: 1.0889983177185059,grad_norm: 0.9999992638208676, iteration: 31915
loss: 0.9779747128486633,grad_norm: 0.779947109528949, iteration: 31916
loss: 1.0012086629867554,grad_norm: 0.9999990155677982, iteration: 31917
loss: 0.9933430552482605,grad_norm: 0.9276742849943477, iteration: 31918
loss: 1.0036283731460571,grad_norm: 0.9999995775576644, iteration: 31919
loss: 0.9933494925498962,grad_norm: 0.8336279031041612, iteration: 31920
loss: 1.0006436109542847,grad_norm: 0.9999990817790494, iteration: 31921
loss: 0.9996462464332581,grad_norm: 0.999999110991096, iteration: 31922
loss: 1.0022000074386597,grad_norm: 0.9999990293603837, iteration: 31923
loss: 1.012478232383728,grad_norm: 0.9762551545196471, iteration: 31924
loss: 0.9707452058792114,grad_norm: 0.7823957703465931, iteration: 31925
loss: 0.9738696217536926,grad_norm: 0.8244354350574254, iteration: 31926
loss: 1.02150559425354,grad_norm: 0.9932661201066911, iteration: 31927
loss: 1.0347541570663452,grad_norm: 0.9999991701139395, iteration: 31928
loss: 1.0314671993255615,grad_norm: 0.9999991918439618, iteration: 31929
loss: 1.0408201217651367,grad_norm: 0.9020271610253502, iteration: 31930
loss: 0.9942073822021484,grad_norm: 0.9464838309817987, iteration: 31931
loss: 1.036403775215149,grad_norm: 0.9999989984775303, iteration: 31932
loss: 1.000373363494873,grad_norm: 0.9999990802471059, iteration: 31933
loss: 1.0025180578231812,grad_norm: 0.9999990055901774, iteration: 31934
loss: 1.010035514831543,grad_norm: 0.8617726814170641, iteration: 31935
loss: 1.0132476091384888,grad_norm: 0.9999993739886092, iteration: 31936
loss: 1.0092096328735352,grad_norm: 0.9999990890112033, iteration: 31937
loss: 1.0159164667129517,grad_norm: 0.9999991868766065, iteration: 31938
loss: 0.9959869384765625,grad_norm: 0.9748482059176025, iteration: 31939
loss: 1.0133551359176636,grad_norm: 0.9679708980754049, iteration: 31940
loss: 1.0428982973098755,grad_norm: 0.9999992774766917, iteration: 31941
loss: 0.9963808655738831,grad_norm: 0.7941417860166492, iteration: 31942
loss: 0.9805410504341125,grad_norm: 0.9035687496092294, iteration: 31943
loss: 1.0049036741256714,grad_norm: 0.9354328174122014, iteration: 31944
loss: 1.0116795301437378,grad_norm: 0.999999002167536, iteration: 31945
loss: 1.0144575834274292,grad_norm: 0.9018938242126977, iteration: 31946
loss: 0.9865939617156982,grad_norm: 0.8595164848535894, iteration: 31947
loss: 1.0417762994766235,grad_norm: 0.8871984972964977, iteration: 31948
loss: 1.015859603881836,grad_norm: 0.9999992242539959, iteration: 31949
loss: 0.9857778549194336,grad_norm: 0.9999993098035275, iteration: 31950
loss: 1.0038480758666992,grad_norm: 0.9446284096464901, iteration: 31951
loss: 1.0043710470199585,grad_norm: 0.8897651382663898, iteration: 31952
loss: 1.026711106300354,grad_norm: 0.9999990868070737, iteration: 31953
loss: 1.0281730890274048,grad_norm: 0.9999993347099129, iteration: 31954
loss: 1.0191847085952759,grad_norm: 0.9999992565888023, iteration: 31955
loss: 1.0161889791488647,grad_norm: 0.9999999396477882, iteration: 31956
loss: 1.0000550746917725,grad_norm: 0.8614469487530098, iteration: 31957
loss: 0.9971979856491089,grad_norm: 0.9116640960018335, iteration: 31958
loss: 1.0236985683441162,grad_norm: 0.9726092193505314, iteration: 31959
loss: 1.017765998840332,grad_norm: 0.9999998080910234, iteration: 31960
loss: 1.0054982900619507,grad_norm: 0.9999990485700025, iteration: 31961
loss: 1.0019758939743042,grad_norm: 0.9999990972974816, iteration: 31962
loss: 0.9552841186523438,grad_norm: 0.9870119512248456, iteration: 31963
loss: 1.0433266162872314,grad_norm: 0.999999702997996, iteration: 31964
loss: 1.0187464952468872,grad_norm: 0.9999992001695259, iteration: 31965
loss: 1.010898470878601,grad_norm: 0.999999226547516, iteration: 31966
loss: 0.9905306696891785,grad_norm: 0.8344521712975919, iteration: 31967
loss: 1.0114518404006958,grad_norm: 0.9344369584034853, iteration: 31968
loss: 1.0194666385650635,grad_norm: 0.9999992008459793, iteration: 31969
loss: 1.0331159830093384,grad_norm: 0.9999995059501692, iteration: 31970
loss: 0.9890897870063782,grad_norm: 0.9028227783846499, iteration: 31971
loss: 0.9812453985214233,grad_norm: 0.999999196395798, iteration: 31972
loss: 1.053942084312439,grad_norm: 0.9999992943560158, iteration: 31973
loss: 0.9669172763824463,grad_norm: 0.9559201369103404, iteration: 31974
loss: 1.003024935722351,grad_norm: 0.9999989950065687, iteration: 31975
loss: 1.0095127820968628,grad_norm: 0.904010525597335, iteration: 31976
loss: 0.9606413841247559,grad_norm: 0.9385333489697425, iteration: 31977
loss: 1.0984408855438232,grad_norm: 0.9999999264899331, iteration: 31978
loss: 1.0294575691223145,grad_norm: 0.8700212502522238, iteration: 31979
loss: 1.0798609256744385,grad_norm: 0.9999990389228024, iteration: 31980
loss: 0.9954208135604858,grad_norm: 0.9131513429081329, iteration: 31981
loss: 1.0051326751708984,grad_norm: 0.8444947489001937, iteration: 31982
loss: 0.9921920895576477,grad_norm: 0.9999991816043701, iteration: 31983
loss: 0.9794910550117493,grad_norm: 0.9433887783418956, iteration: 31984
loss: 0.9491051435470581,grad_norm: 0.9999997752016677, iteration: 31985
loss: 1.0007245540618896,grad_norm: 0.9999991345026163, iteration: 31986
loss: 1.0206384658813477,grad_norm: 0.8386021437360065, iteration: 31987
loss: 1.0018872022628784,grad_norm: 0.9610925742064691, iteration: 31988
loss: 1.0150517225265503,grad_norm: 0.9694540918766851, iteration: 31989
loss: 1.0905849933624268,grad_norm: 0.9999997691498825, iteration: 31990
loss: 0.9673718810081482,grad_norm: 0.9999999726805134, iteration: 31991
loss: 1.029392123222351,grad_norm: 0.7406636111216754, iteration: 31992
loss: 1.024787425994873,grad_norm: 0.9162054294718373, iteration: 31993
loss: 1.0188509225845337,grad_norm: 0.9603301129683324, iteration: 31994
loss: 1.011023998260498,grad_norm: 0.8964205114873781, iteration: 31995
loss: 0.9973171949386597,grad_norm: 0.999999292418019, iteration: 31996
loss: 0.9989866614341736,grad_norm: 0.8376243875632791, iteration: 31997
loss: 0.9545440673828125,grad_norm: 0.9999990875187302, iteration: 31998
loss: 1.0504957437515259,grad_norm: 0.9999998523324006, iteration: 31999
loss: 1.011908769607544,grad_norm: 0.9877172713771194, iteration: 32000
loss: 1.011291265487671,grad_norm: 0.9999990578468844, iteration: 32001
loss: 1.0052156448364258,grad_norm: 0.863481802466153, iteration: 32002
loss: 1.1136525869369507,grad_norm: 0.9999994381118494, iteration: 32003
loss: 0.9728018045425415,grad_norm: 0.9999994416327201, iteration: 32004
loss: 1.0055861473083496,grad_norm: 0.8978334197339106, iteration: 32005
loss: 1.010509967803955,grad_norm: 0.9193797168036896, iteration: 32006
loss: 1.1013281345367432,grad_norm: 0.9999997357645841, iteration: 32007
loss: 0.9954984784126282,grad_norm: 0.9526379466401503, iteration: 32008
loss: 0.9816024899482727,grad_norm: 0.8740963403267797, iteration: 32009
loss: 1.0124648809432983,grad_norm: 0.9078415769846188, iteration: 32010
loss: 1.0904241800308228,grad_norm: 0.9999991968304838, iteration: 32011
loss: 1.0540738105773926,grad_norm: 0.9999995406522956, iteration: 32012
loss: 1.0385291576385498,grad_norm: 0.999999108534219, iteration: 32013
loss: 1.0666372776031494,grad_norm: 0.9999992839898386, iteration: 32014
loss: 0.9955881237983704,grad_norm: 0.8773808028236121, iteration: 32015
loss: 1.0175281763076782,grad_norm: 0.8915063407674207, iteration: 32016
loss: 1.0086287260055542,grad_norm: 0.9315635940784684, iteration: 32017
loss: 1.0243704319000244,grad_norm: 0.8749798743862054, iteration: 32018
loss: 1.0102769136428833,grad_norm: 0.9999990989631313, iteration: 32019
loss: 0.9977273344993591,grad_norm: 0.9034154775020226, iteration: 32020
loss: 1.0289862155914307,grad_norm: 0.9999997541435407, iteration: 32021
loss: 1.0006730556488037,grad_norm: 0.9760522726450062, iteration: 32022
loss: 1.0162221193313599,grad_norm: 0.9999991818500443, iteration: 32023
loss: 0.9821091890335083,grad_norm: 0.9999997706795271, iteration: 32024
loss: 0.9753240346908569,grad_norm: 0.9738709398610589, iteration: 32025
loss: 0.9995906949043274,grad_norm: 0.9999997321499409, iteration: 32026
loss: 1.0182055234909058,grad_norm: 0.9690732031945355, iteration: 32027
loss: 1.006555199623108,grad_norm: 0.9999990075345718, iteration: 32028
loss: 0.9972701668739319,grad_norm: 0.8435062110289397, iteration: 32029
loss: 0.9914918541908264,grad_norm: 0.9999990819808302, iteration: 32030
loss: 1.016703486442566,grad_norm: 0.9999993434559334, iteration: 32031
loss: 1.022513508796692,grad_norm: 0.8962296226446551, iteration: 32032
loss: 0.9890565276145935,grad_norm: 0.9379918705506869, iteration: 32033
loss: 1.0176396369934082,grad_norm: 0.9820016030659202, iteration: 32034
loss: 0.9863189458847046,grad_norm: 0.8624131732175495, iteration: 32035
loss: 1.0052591562271118,grad_norm: 0.99999909247794, iteration: 32036
loss: 1.0629794597625732,grad_norm: 0.9999999754822341, iteration: 32037
loss: 1.0824744701385498,grad_norm: 0.999999693432713, iteration: 32038
loss: 1.0118669271469116,grad_norm: 0.9374952913092555, iteration: 32039
loss: 1.0324381589889526,grad_norm: 0.8934573953800492, iteration: 32040
loss: 0.9964029788970947,grad_norm: 0.9126455331471733, iteration: 32041
loss: 0.9582744836807251,grad_norm: 0.8593346178288346, iteration: 32042
loss: 1.0142983198165894,grad_norm: 0.999999361054597, iteration: 32043
loss: 0.9979634881019592,grad_norm: 0.9999994934982279, iteration: 32044
loss: 0.9860857725143433,grad_norm: 0.9999991480182, iteration: 32045
loss: 0.9668700695037842,grad_norm: 0.9557493214089052, iteration: 32046
loss: 1.0156830549240112,grad_norm: 0.9829069728938645, iteration: 32047
loss: 0.9823629856109619,grad_norm: 0.999999047542667, iteration: 32048
loss: 0.973893404006958,grad_norm: 0.9999993143135824, iteration: 32049
loss: 1.0163071155548096,grad_norm: 0.9999997742064873, iteration: 32050
loss: 1.007846474647522,grad_norm: 0.9171285926013091, iteration: 32051
loss: 1.0080116987228394,grad_norm: 0.9494771694008683, iteration: 32052
loss: 0.9973659515380859,grad_norm: 0.9999996654603721, iteration: 32053
loss: 1.0176550149917603,grad_norm: 0.9999997442277386, iteration: 32054
loss: 1.0453438758850098,grad_norm: 0.9097818700313213, iteration: 32055
loss: 1.013153076171875,grad_norm: 0.9999996924745427, iteration: 32056
loss: 0.978033721446991,grad_norm: 0.9329857254986827, iteration: 32057
loss: 1.0274800062179565,grad_norm: 0.9999995912170092, iteration: 32058
loss: 1.0008260011672974,grad_norm: 0.8910039084065874, iteration: 32059
loss: 0.9909255504608154,grad_norm: 0.9765251878528196, iteration: 32060
loss: 0.9827840924263,grad_norm: 0.999999086646039, iteration: 32061
loss: 1.1006802320480347,grad_norm: 0.9999994454389403, iteration: 32062
loss: 0.9692880511283875,grad_norm: 0.9801587538304211, iteration: 32063
loss: 0.9720975160598755,grad_norm: 0.8969491322609316, iteration: 32064
loss: 0.9986243844032288,grad_norm: 0.8860676881199655, iteration: 32065
loss: 1.000383973121643,grad_norm: 0.9999992297181008, iteration: 32066
loss: 0.9821081757545471,grad_norm: 0.8583454050005676, iteration: 32067
loss: 1.025007724761963,grad_norm: 0.9340799496842439, iteration: 32068
loss: 1.007614254951477,grad_norm: 0.8888740060963153, iteration: 32069
loss: 0.984885573387146,grad_norm: 0.9999992911811619, iteration: 32070
loss: 1.0445135831832886,grad_norm: 0.9999991028636765, iteration: 32071
loss: 1.0228084325790405,grad_norm: 0.9999996349722848, iteration: 32072
loss: 0.9943933486938477,grad_norm: 0.9800921282949319, iteration: 32073
loss: 1.011570692062378,grad_norm: 0.8771581282224662, iteration: 32074
loss: 1.0995006561279297,grad_norm: 0.9999996320134633, iteration: 32075
loss: 1.0391206741333008,grad_norm: 0.999999745817001, iteration: 32076
loss: 1.0373680591583252,grad_norm: 0.9652422876642309, iteration: 32077
loss: 0.9979442358016968,grad_norm: 0.8801146171584898, iteration: 32078
loss: 1.0601129531860352,grad_norm: 0.9999996586531706, iteration: 32079
loss: 1.0589141845703125,grad_norm: 0.9999990356041353, iteration: 32080
loss: 1.018480658531189,grad_norm: 0.999999221835452, iteration: 32081
loss: 1.030308485031128,grad_norm: 0.8566599061237814, iteration: 32082
loss: 1.008551001548767,grad_norm: 0.9999991943337191, iteration: 32083
loss: 1.0260533094406128,grad_norm: 0.9999989544973203, iteration: 32084
loss: 1.009633183479309,grad_norm: 0.8170495454968392, iteration: 32085
loss: 1.0881396532058716,grad_norm: 0.9999997666733269, iteration: 32086
loss: 0.9987990856170654,grad_norm: 0.9057249846215074, iteration: 32087
loss: 1.0023870468139648,grad_norm: 0.9788978109683208, iteration: 32088
loss: 1.0047119855880737,grad_norm: 0.9999990403779866, iteration: 32089
loss: 0.9850437045097351,grad_norm: 0.8210646397758062, iteration: 32090
loss: 1.0378479957580566,grad_norm: 0.9999992953304797, iteration: 32091
loss: 1.0059094429016113,grad_norm: 0.9399280025006059, iteration: 32092
loss: 1.0001201629638672,grad_norm: 0.9999991325579313, iteration: 32093
loss: 1.009060263633728,grad_norm: 0.9999994775305243, iteration: 32094
loss: 1.176081657409668,grad_norm: 0.9999997612385559, iteration: 32095
loss: 1.0070475339889526,grad_norm: 0.9999990436823302, iteration: 32096
loss: 1.0288816690444946,grad_norm: 0.9999991635649781, iteration: 32097
loss: 0.9562777876853943,grad_norm: 0.9259789223774686, iteration: 32098
loss: 1.0361392498016357,grad_norm: 0.8053646595133502, iteration: 32099
loss: 1.0110461711883545,grad_norm: 0.8820636046346255, iteration: 32100
loss: 0.9835803508758545,grad_norm: 0.9999990480769055, iteration: 32101
loss: 1.0071947574615479,grad_norm: 0.9999990117383569, iteration: 32102
loss: 1.0450401306152344,grad_norm: 0.9999999520259355, iteration: 32103
loss: 1.008738398551941,grad_norm: 0.999999112256742, iteration: 32104
loss: 1.0103302001953125,grad_norm: 0.9999992320581986, iteration: 32105
loss: 1.0440394878387451,grad_norm: 0.9999993152036272, iteration: 32106
loss: 0.9967899918556213,grad_norm: 0.9657267603202822, iteration: 32107
loss: 1.0120431184768677,grad_norm: 0.9999993966423996, iteration: 32108
loss: 0.9788609147071838,grad_norm: 0.9999993304878304, iteration: 32109
loss: 1.0014721155166626,grad_norm: 0.9610695903837139, iteration: 32110
loss: 1.0190051794052124,grad_norm: 0.999999391017691, iteration: 32111
loss: 1.049593210220337,grad_norm: 0.9259227498945263, iteration: 32112
loss: 1.028475046157837,grad_norm: 0.8110836134054717, iteration: 32113
loss: 0.9788834452629089,grad_norm: 0.919330974422046, iteration: 32114
loss: 0.9847384095191956,grad_norm: 0.9999991992368527, iteration: 32115
loss: 0.9919707179069519,grad_norm: 0.9999992629406643, iteration: 32116
loss: 1.0655765533447266,grad_norm: 0.9999996988443561, iteration: 32117
loss: 1.0117672681808472,grad_norm: 0.9999991791087337, iteration: 32118
loss: 0.9768099784851074,grad_norm: 0.9992902463528309, iteration: 32119
loss: 1.018983244895935,grad_norm: 0.9999993154362505, iteration: 32120
loss: 1.0187783241271973,grad_norm: 0.9782380862265615, iteration: 32121
loss: 1.0286622047424316,grad_norm: 0.9769552644511105, iteration: 32122
loss: 0.9756892323493958,grad_norm: 0.8479551039433256, iteration: 32123
loss: 1.0215600728988647,grad_norm: 0.999999489120179, iteration: 32124
loss: 0.9941266179084778,grad_norm: 0.9999995559797897, iteration: 32125
loss: 1.019649624824524,grad_norm: 0.9999992771431371, iteration: 32126
loss: 1.025389313697815,grad_norm: 0.9193116935208709, iteration: 32127
loss: 1.0149329900741577,grad_norm: 0.9999989731484764, iteration: 32128
loss: 0.9897370934486389,grad_norm: 0.9731493216043321, iteration: 32129
loss: 1.0148200988769531,grad_norm: 0.9999994936444798, iteration: 32130
loss: 1.0081088542938232,grad_norm: 0.9999994286973661, iteration: 32131
loss: 0.9889318346977234,grad_norm: 0.9247689093287018, iteration: 32132
loss: 1.066060185432434,grad_norm: 0.9999995727348485, iteration: 32133
loss: 1.0056993961334229,grad_norm: 0.8802289886201455, iteration: 32134
loss: 1.183067798614502,grad_norm: 0.9999996093925869, iteration: 32135
loss: 1.0274200439453125,grad_norm: 0.8385168185081143, iteration: 32136
loss: 1.0644952058792114,grad_norm: 0.9999991681658627, iteration: 32137
loss: 1.0113272666931152,grad_norm: 0.8574333934874866, iteration: 32138
loss: 1.0171852111816406,grad_norm: 0.9999997825106077, iteration: 32139
loss: 1.0039583444595337,grad_norm: 0.9999991373128387, iteration: 32140
loss: 1.0040684938430786,grad_norm: 0.9925623392658243, iteration: 32141
loss: 0.9838099479675293,grad_norm: 0.9999989845563443, iteration: 32142
loss: 1.015840768814087,grad_norm: 0.9665030470434098, iteration: 32143
loss: 1.0488560199737549,grad_norm: 0.9999995771444709, iteration: 32144
loss: 0.9875785112380981,grad_norm: 0.9999995920870748, iteration: 32145
loss: 1.0458855628967285,grad_norm: 0.999999338320908, iteration: 32146
loss: 0.9869576096534729,grad_norm: 0.9999994946052811, iteration: 32147
loss: 1.0078130960464478,grad_norm: 0.9999995962606649, iteration: 32148
loss: 0.9971559047698975,grad_norm: 0.9322879747073283, iteration: 32149
loss: 1.0606015920639038,grad_norm: 0.9999990463851779, iteration: 32150
loss: 1.003003478050232,grad_norm: 0.9999991792044175, iteration: 32151
loss: 0.9876721501350403,grad_norm: 0.919460777728459, iteration: 32152
loss: 1.0000537633895874,grad_norm: 0.999999087609117, iteration: 32153
loss: 1.0352394580841064,grad_norm: 0.9999997657496024, iteration: 32154
loss: 1.0501477718353271,grad_norm: 0.9999994211856341, iteration: 32155
loss: 1.0218212604522705,grad_norm: 0.9999991621090559, iteration: 32156
loss: 0.9938228130340576,grad_norm: 0.9999990061336517, iteration: 32157
loss: 1.0070267915725708,grad_norm: 0.7707622884517159, iteration: 32158
loss: 1.0146749019622803,grad_norm: 0.9999991697713664, iteration: 32159
loss: 0.9761844873428345,grad_norm: 0.9999993437957732, iteration: 32160
loss: 1.0371090173721313,grad_norm: 0.9999996605702033, iteration: 32161
loss: 1.1064140796661377,grad_norm: 0.9999995501761978, iteration: 32162
loss: 0.9872523546218872,grad_norm: 0.9999989823757576, iteration: 32163
loss: 0.9829435348510742,grad_norm: 0.9999992847167215, iteration: 32164
loss: 1.0349366664886475,grad_norm: 0.997936831087505, iteration: 32165
loss: 1.0938808917999268,grad_norm: 0.9999993359829578, iteration: 32166
loss: 1.0194357633590698,grad_norm: 0.9703950634604754, iteration: 32167
loss: 1.1002109050750732,grad_norm: 0.9999991244887099, iteration: 32168
loss: 0.9830455780029297,grad_norm: 0.9999992855416006, iteration: 32169
loss: 1.0205001831054688,grad_norm: 0.9999991416031855, iteration: 32170
loss: 1.0564457178115845,grad_norm: 0.999999731917314, iteration: 32171
loss: 1.0097466707229614,grad_norm: 0.999999733135748, iteration: 32172
loss: 1.020838975906372,grad_norm: 0.9678461911567652, iteration: 32173
loss: 0.9934646487236023,grad_norm: 0.9999998296997475, iteration: 32174
loss: 1.0000439882278442,grad_norm: 0.9498701419039488, iteration: 32175
loss: 1.0152292251586914,grad_norm: 0.9388659193503384, iteration: 32176
loss: 0.9843319058418274,grad_norm: 0.9999991415417979, iteration: 32177
loss: 1.1827168464660645,grad_norm: 0.9999994805138608, iteration: 32178
loss: 0.9800935387611389,grad_norm: 0.9999992261259616, iteration: 32179
loss: 1.0468549728393555,grad_norm: 0.9999994578775536, iteration: 32180
loss: 0.9939783811569214,grad_norm: 0.9056426761767006, iteration: 32181
loss: 0.998904824256897,grad_norm: 0.9608737031180766, iteration: 32182
loss: 1.0055822134017944,grad_norm: 0.914290713746348, iteration: 32183
loss: 1.0003527402877808,grad_norm: 0.9999992836045094, iteration: 32184
loss: 0.9833957552909851,grad_norm: 0.9999990806314216, iteration: 32185
loss: 0.9720306396484375,grad_norm: 0.9999995016797044, iteration: 32186
loss: 0.9957440495491028,grad_norm: 0.7859201362679549, iteration: 32187
loss: 1.0741493701934814,grad_norm: 0.9999996520355373, iteration: 32188
loss: 1.1449013948440552,grad_norm: 0.9999994068091769, iteration: 32189
loss: 0.9898285865783691,grad_norm: 0.9999991941782744, iteration: 32190
loss: 0.9874116778373718,grad_norm: 0.9999991126824754, iteration: 32191
loss: 1.0590673685073853,grad_norm: 0.9999997354763147, iteration: 32192
loss: 1.0563308000564575,grad_norm: 0.999999360025124, iteration: 32193
loss: 1.018110752105713,grad_norm: 0.8215787837416003, iteration: 32194
loss: 1.0067086219787598,grad_norm: 0.9800792868364812, iteration: 32195
loss: 1.1098400354385376,grad_norm: 0.9999993371736934, iteration: 32196
loss: 1.0012446641921997,grad_norm: 0.9058892104586551, iteration: 32197
loss: 1.0162436962127686,grad_norm: 0.97776451125891, iteration: 32198
loss: 1.0037147998809814,grad_norm: 0.9999995571899212, iteration: 32199
loss: 1.0302345752716064,grad_norm: 0.9999997131966278, iteration: 32200
loss: 1.0189292430877686,grad_norm: 0.9999998509401065, iteration: 32201
loss: 1.0427570343017578,grad_norm: 0.9999995416909239, iteration: 32202
loss: 1.1153528690338135,grad_norm: 0.9999998686641791, iteration: 32203
loss: 1.0109601020812988,grad_norm: 0.9876406664473664, iteration: 32204
loss: 1.0904608964920044,grad_norm: 0.999999274013281, iteration: 32205
loss: 1.0047861337661743,grad_norm: 0.9999991531817514, iteration: 32206
loss: 1.0634713172912598,grad_norm: 0.9999989930800286, iteration: 32207
loss: 1.0022506713867188,grad_norm: 0.9999989679248421, iteration: 32208
loss: 1.0202229022979736,grad_norm: 0.9999990780564403, iteration: 32209
loss: 1.020041823387146,grad_norm: 0.9999990538054829, iteration: 32210
loss: 1.0126181840896606,grad_norm: 0.9999994087600844, iteration: 32211
loss: 1.028718113899231,grad_norm: 0.9999996304502295, iteration: 32212
loss: 1.0154892206192017,grad_norm: 0.96337606859387, iteration: 32213
loss: 1.0832445621490479,grad_norm: 0.9999992549096862, iteration: 32214
loss: 1.0073851346969604,grad_norm: 0.999999093985242, iteration: 32215
loss: 1.0081827640533447,grad_norm: 0.9999991807616155, iteration: 32216
loss: 0.9941948652267456,grad_norm: 0.9999991809757675, iteration: 32217
loss: 0.9956409931182861,grad_norm: 0.9999991615101437, iteration: 32218
loss: 0.9966771602630615,grad_norm: 0.9999990346765691, iteration: 32219
loss: 1.0403496026992798,grad_norm: 0.9999991421697223, iteration: 32220
loss: 0.9735035300254822,grad_norm: 0.9037767448300386, iteration: 32221
loss: 1.0784755945205688,grad_norm: 0.9999998229012376, iteration: 32222
loss: 1.0530645847320557,grad_norm: 0.9999995764704616, iteration: 32223
loss: 1.0368932485580444,grad_norm: 0.999999346968203, iteration: 32224
loss: 1.0103727579116821,grad_norm: 0.9999994521384832, iteration: 32225
loss: 1.0489683151245117,grad_norm: 0.9999991767682047, iteration: 32226
loss: 1.109594702720642,grad_norm: 0.9999996000103352, iteration: 32227
loss: 1.0265355110168457,grad_norm: 0.9999993867097058, iteration: 32228
loss: 0.9998674392700195,grad_norm: 0.9025149046977321, iteration: 32229
loss: 1.0067055225372314,grad_norm: 0.9999991998450717, iteration: 32230
loss: 0.9818286299705505,grad_norm: 0.8120125755176321, iteration: 32231
loss: 1.0196830034255981,grad_norm: 0.9999997778939859, iteration: 32232
loss: 1.002668857574463,grad_norm: 0.9999990752256634, iteration: 32233
loss: 1.0544999837875366,grad_norm: 0.9999994413705959, iteration: 32234
loss: 1.070119857788086,grad_norm: 0.9999994693426988, iteration: 32235
loss: 0.987711489200592,grad_norm: 0.9999990901885419, iteration: 32236
loss: 1.0711725950241089,grad_norm: 0.9999995354398619, iteration: 32237
loss: 0.9928207397460938,grad_norm: 0.999999755018816, iteration: 32238
loss: 1.0501642227172852,grad_norm: 0.9999992789651806, iteration: 32239
loss: 1.0397764444351196,grad_norm: 0.9999992740845066, iteration: 32240
loss: 1.1328344345092773,grad_norm: 0.9999999061632852, iteration: 32241
loss: 1.0426877737045288,grad_norm: 0.9999996161463511, iteration: 32242
loss: 1.0303699970245361,grad_norm: 0.9999992999738165, iteration: 32243
loss: 1.0166194438934326,grad_norm: 0.9777581833455894, iteration: 32244
loss: 1.0212650299072266,grad_norm: 0.9999992304101072, iteration: 32245
loss: 1.0265624523162842,grad_norm: 0.9999992323380286, iteration: 32246
loss: 1.0068281888961792,grad_norm: 0.9032484272691064, iteration: 32247
loss: 0.989633321762085,grad_norm: 0.8574000000971532, iteration: 32248
loss: 0.9890896081924438,grad_norm: 0.9860626504170132, iteration: 32249
loss: 1.033638596534729,grad_norm: 0.9999996085993195, iteration: 32250
loss: 1.0423643589019775,grad_norm: 0.999999220948854, iteration: 32251
loss: 1.0553232431411743,grad_norm: 0.9999992855691163, iteration: 32252
loss: 0.9884056448936462,grad_norm: 0.9806887569402425, iteration: 32253
loss: 0.9661614298820496,grad_norm: 0.9366377404086695, iteration: 32254
loss: 1.009139895439148,grad_norm: 0.9999992274162816, iteration: 32255
loss: 0.9794882535934448,grad_norm: 0.9706596955518878, iteration: 32256
loss: 1.0576527118682861,grad_norm: 0.9999993081536711, iteration: 32257
loss: 1.012444257736206,grad_norm: 0.9999989604360221, iteration: 32258
loss: 1.0309005975723267,grad_norm: 0.9999991071375125, iteration: 32259
loss: 0.983802318572998,grad_norm: 0.9999990962736028, iteration: 32260
loss: 1.0107084512710571,grad_norm: 0.9999990835723221, iteration: 32261
loss: 1.0067170858383179,grad_norm: 0.9999993193465043, iteration: 32262
loss: 1.0520988702774048,grad_norm: 0.9999996475245059, iteration: 32263
loss: 1.0335134267807007,grad_norm: 0.9999992087595768, iteration: 32264
loss: 1.026921033859253,grad_norm: 0.9999992525830251, iteration: 32265
loss: 1.155927062034607,grad_norm: 0.9999998972503389, iteration: 32266
loss: 1.0579789876937866,grad_norm: 0.9999997712852584, iteration: 32267
loss: 1.0500562191009521,grad_norm: 0.9999995180454825, iteration: 32268
loss: 0.9647810459136963,grad_norm: 0.9999994130645204, iteration: 32269
loss: 1.0405515432357788,grad_norm: 0.9999993775636686, iteration: 32270
loss: 1.022685170173645,grad_norm: 0.9258176859211863, iteration: 32271
loss: 1.0460772514343262,grad_norm: 0.9999991638515591, iteration: 32272
loss: 1.009243369102478,grad_norm: 0.9221804236681178, iteration: 32273
loss: 1.0153430700302124,grad_norm: 0.999999089383995, iteration: 32274
loss: 1.0140016078948975,grad_norm: 0.9999990383529836, iteration: 32275
loss: 0.9669765830039978,grad_norm: 0.9999990972313428, iteration: 32276
loss: 1.0059510469436646,grad_norm: 0.9999990425361992, iteration: 32277
loss: 0.9850160479545593,grad_norm: 0.9999990740165121, iteration: 32278
loss: 1.0681400299072266,grad_norm: 0.999999867776934, iteration: 32279
loss: 1.0268744230270386,grad_norm: 0.9055937441629871, iteration: 32280
loss: 0.9934139251708984,grad_norm: 0.9999989292494613, iteration: 32281
loss: 1.0244228839874268,grad_norm: 0.9999990965417277, iteration: 32282
loss: 1.0336028337478638,grad_norm: 0.9999991985090901, iteration: 32283
loss: 0.9969894289970398,grad_norm: 0.9999988758652175, iteration: 32284
loss: 0.9972203373908997,grad_norm: 0.999999112472294, iteration: 32285
loss: 1.0225194692611694,grad_norm: 0.9198880992830543, iteration: 32286
loss: 1.0573481321334839,grad_norm: 0.9999999022358256, iteration: 32287
loss: 1.008362889289856,grad_norm: 0.9562168594507248, iteration: 32288
loss: 1.0151292085647583,grad_norm: 0.9999993335776219, iteration: 32289
loss: 0.9824051260948181,grad_norm: 0.9277432002661423, iteration: 32290
loss: 1.0601650476455688,grad_norm: 0.999999202047486, iteration: 32291
loss: 0.9847539663314819,grad_norm: 0.9999992186870703, iteration: 32292
loss: 0.9933708906173706,grad_norm: 0.9999992934653337, iteration: 32293
loss: 0.9964777231216431,grad_norm: 0.999999148682213, iteration: 32294
loss: 0.9952908754348755,grad_norm: 0.9999990935965107, iteration: 32295
loss: 1.024724006652832,grad_norm: 0.8427253329419157, iteration: 32296
loss: 1.0193277597427368,grad_norm: 0.9999992126310088, iteration: 32297
loss: 1.0148369073867798,grad_norm: 0.9031947704753229, iteration: 32298
loss: 1.0165948867797852,grad_norm: 0.7176369649578822, iteration: 32299
loss: 0.9892602562904358,grad_norm: 0.9999992467010137, iteration: 32300
loss: 1.050417423248291,grad_norm: 0.9151156740451428, iteration: 32301
loss: 0.9781453013420105,grad_norm: 0.9448309582591387, iteration: 32302
loss: 1.0334992408752441,grad_norm: 0.9911129949307075, iteration: 32303
loss: 1.0289241075515747,grad_norm: 0.9999990390218165, iteration: 32304
loss: 1.021817684173584,grad_norm: 0.9999997803816344, iteration: 32305
loss: 0.974181592464447,grad_norm: 0.9999990922291564, iteration: 32306
loss: 0.9989141225814819,grad_norm: 0.817166654877575, iteration: 32307
loss: 0.9715096354484558,grad_norm: 0.9999990668110883, iteration: 32308
loss: 1.0392255783081055,grad_norm: 0.9999996608963828, iteration: 32309
loss: 1.0172134637832642,grad_norm: 0.9228847000750559, iteration: 32310
loss: 0.9877266883850098,grad_norm: 0.9999996826348209, iteration: 32311
loss: 1.1114684343338013,grad_norm: 0.9999998853202822, iteration: 32312
loss: 1.0009446144104004,grad_norm: 0.9999990734694242, iteration: 32313
loss: 1.0025873184204102,grad_norm: 0.9999990864042092, iteration: 32314
loss: 1.0205477476119995,grad_norm: 0.9999991073940964, iteration: 32315
loss: 0.9675628542900085,grad_norm: 0.9999992767072229, iteration: 32316
loss: 1.031851053237915,grad_norm: 0.9111539016993117, iteration: 32317
loss: 1.0175596475601196,grad_norm: 0.9999993939369582, iteration: 32318
loss: 1.0122894048690796,grad_norm: 0.9933886791875317, iteration: 32319
loss: 1.0148906707763672,grad_norm: 0.8435278087676221, iteration: 32320
loss: 1.0401616096496582,grad_norm: 0.9430264085999779, iteration: 32321
loss: 1.0330415964126587,grad_norm: 0.9528035473754255, iteration: 32322
loss: 0.9742358326911926,grad_norm: 0.9999991483820443, iteration: 32323
loss: 0.9939041137695312,grad_norm: 0.9766481735063605, iteration: 32324
loss: 1.0240432024002075,grad_norm: 0.9999992417059429, iteration: 32325
loss: 1.0109870433807373,grad_norm: 0.9999991069120397, iteration: 32326
loss: 1.0279688835144043,grad_norm: 0.9999993689188466, iteration: 32327
loss: 1.0406960248947144,grad_norm: 0.9999997630337683, iteration: 32328
loss: 1.0187374353408813,grad_norm: 0.9999993452581829, iteration: 32329
loss: 0.949183464050293,grad_norm: 0.8274988943249082, iteration: 32330
loss: 1.0263023376464844,grad_norm: 0.9429345675850938, iteration: 32331
loss: 1.0198416709899902,grad_norm: 0.9999992165343023, iteration: 32332
loss: 0.9823076128959656,grad_norm: 0.9594648796877827, iteration: 32333
loss: 1.0343559980392456,grad_norm: 0.9999992613124072, iteration: 32334
loss: 0.9829712510108948,grad_norm: 0.999999171675122, iteration: 32335
loss: 1.0269118547439575,grad_norm: 0.9221940384737434, iteration: 32336
loss: 0.9679794311523438,grad_norm: 0.9566766466482908, iteration: 32337
loss: 1.0156633853912354,grad_norm: 0.8761732247063007, iteration: 32338
loss: 0.9887852668762207,grad_norm: 0.8778565480168236, iteration: 32339
loss: 1.0344290733337402,grad_norm: 0.9901505852078376, iteration: 32340
loss: 0.9440110921859741,grad_norm: 0.9325522613981309, iteration: 32341
loss: 0.9914205074310303,grad_norm: 0.9999994275184246, iteration: 32342
loss: 1.0179312229156494,grad_norm: 0.9999991856944882, iteration: 32343
loss: 1.0274542570114136,grad_norm: 0.9999991024221332, iteration: 32344
loss: 1.050960898399353,grad_norm: 0.9999994790425916, iteration: 32345
loss: 1.0203843116760254,grad_norm: 0.9999993407283637, iteration: 32346
loss: 1.0283085107803345,grad_norm: 0.8914210215531246, iteration: 32347
loss: 1.0163205862045288,grad_norm: 0.9999998603209446, iteration: 32348
loss: 1.0143789052963257,grad_norm: 0.8037143023196334, iteration: 32349
loss: 1.0111356973648071,grad_norm: 0.9831715806763058, iteration: 32350
loss: 1.0544153451919556,grad_norm: 0.9999992515192914, iteration: 32351
loss: 1.041981816291809,grad_norm: 0.9999996320190203, iteration: 32352
loss: 1.0202912092208862,grad_norm: 0.9999991825265385, iteration: 32353
loss: 1.0273220539093018,grad_norm: 0.7920729969486979, iteration: 32354
loss: 0.994037926197052,grad_norm: 0.9999994828679764, iteration: 32355
loss: 1.0401337146759033,grad_norm: 0.9999992007604291, iteration: 32356
loss: 1.0492700338363647,grad_norm: 0.9999994141197276, iteration: 32357
loss: 1.0327610969543457,grad_norm: 0.9999990902958373, iteration: 32358
loss: 1.0056462287902832,grad_norm: 0.9999993784674628, iteration: 32359
loss: 1.002614140510559,grad_norm: 0.845411223761021, iteration: 32360
loss: 1.0092744827270508,grad_norm: 0.9999991081794409, iteration: 32361
loss: 1.0327630043029785,grad_norm: 0.999999486124519, iteration: 32362
loss: 0.9978412985801697,grad_norm: 0.8142169358875856, iteration: 32363
loss: 1.0275157690048218,grad_norm: 0.9794889356801474, iteration: 32364
loss: 1.0481221675872803,grad_norm: 0.9999995808464897, iteration: 32365
loss: 1.0341695547103882,grad_norm: 0.9999996452051516, iteration: 32366
loss: 1.033969521522522,grad_norm: 0.9999997309765556, iteration: 32367
loss: 0.9949536919593811,grad_norm: 0.9541867812487783, iteration: 32368
loss: 1.0068042278289795,grad_norm: 0.9817539122341031, iteration: 32369
loss: 1.0831109285354614,grad_norm: 0.999999566763158, iteration: 32370
loss: 1.0555763244628906,grad_norm: 0.9999994706676629, iteration: 32371
loss: 1.0300945043563843,grad_norm: 0.860364786444023, iteration: 32372
loss: 1.0224015712738037,grad_norm: 0.9999994892807245, iteration: 32373
loss: 1.063031554222107,grad_norm: 0.9999993867972934, iteration: 32374
loss: 1.0055694580078125,grad_norm: 0.9198653382286849, iteration: 32375
loss: 1.0191583633422852,grad_norm: 0.9999992046160233, iteration: 32376
loss: 1.059486985206604,grad_norm: 0.9999996760896857, iteration: 32377
loss: 1.0671111345291138,grad_norm: 0.9999991992994249, iteration: 32378
loss: 1.0182716846466064,grad_norm: 0.9999991380138731, iteration: 32379
loss: 1.0322800874710083,grad_norm: 0.9485936116633171, iteration: 32380
loss: 1.026626467704773,grad_norm: 0.895155060598288, iteration: 32381
loss: 1.0065892934799194,grad_norm: 0.9999992672968184, iteration: 32382
loss: 1.0043292045593262,grad_norm: 0.8438331704307773, iteration: 32383
loss: 1.072480320930481,grad_norm: 0.9999990352986681, iteration: 32384
loss: 1.040683627128601,grad_norm: 0.9999991679917176, iteration: 32385
loss: 1.0771652460098267,grad_norm: 0.9999993915121359, iteration: 32386
loss: 1.0119723081588745,grad_norm: 0.9999994053183954, iteration: 32387
loss: 1.0333980321884155,grad_norm: 0.9999993633417824, iteration: 32388
loss: 1.039652705192566,grad_norm: 0.9999991551267314, iteration: 32389
loss: 1.0098527669906616,grad_norm: 0.9999989871763464, iteration: 32390
loss: 1.0091770887374878,grad_norm: 0.9981905661388156, iteration: 32391
loss: 0.9526072144508362,grad_norm: 0.9999991391485544, iteration: 32392
loss: 1.0192023515701294,grad_norm: 0.9822630853605696, iteration: 32393
loss: 0.9955354332923889,grad_norm: 0.9902047114042409, iteration: 32394
loss: 1.0019307136535645,grad_norm: 0.9999990660296924, iteration: 32395
loss: 0.9964962601661682,grad_norm: 0.931065240071878, iteration: 32396
loss: 0.9976394176483154,grad_norm: 0.8411348912501792, iteration: 32397
loss: 1.0357266664505005,grad_norm: 0.989690368220577, iteration: 32398
loss: 1.0297608375549316,grad_norm: 0.9866917559697844, iteration: 32399
loss: 1.0220097303390503,grad_norm: 0.7938942750255823, iteration: 32400
loss: 1.0712205171585083,grad_norm: 0.9999990455979504, iteration: 32401
loss: 1.0030033588409424,grad_norm: 0.9619170292147639, iteration: 32402
loss: 1.0134459733963013,grad_norm: 0.9883437034177437, iteration: 32403
loss: 1.0400385856628418,grad_norm: 0.9819621896373276, iteration: 32404
loss: 1.0043257474899292,grad_norm: 0.8743080672274494, iteration: 32405
loss: 0.9648464918136597,grad_norm: 0.8575214141164137, iteration: 32406
loss: 0.9904569387435913,grad_norm: 0.9999991428609629, iteration: 32407
loss: 1.0221433639526367,grad_norm: 0.9065182712489287, iteration: 32408
loss: 1.0410091876983643,grad_norm: 0.9999996150220624, iteration: 32409
loss: 1.0195621252059937,grad_norm: 0.9999992762737704, iteration: 32410
loss: 1.0092384815216064,grad_norm: 0.9506492641435322, iteration: 32411
loss: 1.0124807357788086,grad_norm: 0.9148516116306542, iteration: 32412
loss: 0.9810758233070374,grad_norm: 0.8297334273819846, iteration: 32413
loss: 1.0383763313293457,grad_norm: 0.9999990219423743, iteration: 32414
loss: 1.028648853302002,grad_norm: 0.9911312963005717, iteration: 32415
loss: 0.9979341626167297,grad_norm: 0.9999991498552231, iteration: 32416
loss: 1.0227653980255127,grad_norm: 0.8046367688474776, iteration: 32417
loss: 1.0442931652069092,grad_norm: 0.9999992102999443, iteration: 32418
loss: 0.9710217118263245,grad_norm: 0.999999234220204, iteration: 32419
loss: 0.9900727272033691,grad_norm: 0.8877699937091121, iteration: 32420
loss: 1.010299563407898,grad_norm: 0.9999990396195794, iteration: 32421
loss: 0.9955630302429199,grad_norm: 0.9999991297909399, iteration: 32422
loss: 0.951155424118042,grad_norm: 0.9999990965615931, iteration: 32423
loss: 1.0167365074157715,grad_norm: 0.9419656178732349, iteration: 32424
loss: 1.0187071561813354,grad_norm: 0.971550887255935, iteration: 32425
loss: 1.01288902759552,grad_norm: 0.9999993739846464, iteration: 32426
loss: 0.9759792685508728,grad_norm: 0.9999991921065313, iteration: 32427
loss: 0.9947148561477661,grad_norm: 0.9237917900610412, iteration: 32428
loss: 1.0045932531356812,grad_norm: 0.9999990421421247, iteration: 32429
loss: 0.9944669008255005,grad_norm: 0.9999992866204879, iteration: 32430
loss: 1.0680924654006958,grad_norm: 0.9999991632512332, iteration: 32431
loss: 1.0639243125915527,grad_norm: 0.99999923545452, iteration: 32432
loss: 1.0095607042312622,grad_norm: 0.8810300957529226, iteration: 32433
loss: 1.010998010635376,grad_norm: 0.8232904745349552, iteration: 32434
loss: 1.0498558282852173,grad_norm: 0.9999998230753216, iteration: 32435
loss: 1.0391138792037964,grad_norm: 0.9317098504017018, iteration: 32436
loss: 1.0202370882034302,grad_norm: 0.9999991232517481, iteration: 32437
loss: 1.0018165111541748,grad_norm: 0.8892425369748277, iteration: 32438
loss: 1.0026485919952393,grad_norm: 0.9999990405783636, iteration: 32439
loss: 1.045270323753357,grad_norm: 0.875660868317402, iteration: 32440
loss: 1.003546118736267,grad_norm: 0.9999990344059101, iteration: 32441
loss: 1.0336390733718872,grad_norm: 0.9999991227017765, iteration: 32442
loss: 1.0265965461730957,grad_norm: 0.9999991786134893, iteration: 32443
loss: 1.0165743827819824,grad_norm: 0.9999990913644493, iteration: 32444
loss: 1.0151114463806152,grad_norm: 0.9008615442989393, iteration: 32445
loss: 0.9772400856018066,grad_norm: 0.9694326164539604, iteration: 32446
loss: 1.0214537382125854,grad_norm: 0.9910684268378033, iteration: 32447
loss: 1.00519597530365,grad_norm: 0.9999991977041902, iteration: 32448
loss: 0.9966023564338684,grad_norm: 0.9918784749560828, iteration: 32449
loss: 1.0382364988327026,grad_norm: 0.9999990422089254, iteration: 32450
loss: 0.9879090189933777,grad_norm: 0.8181730725014315, iteration: 32451
loss: 1.0035017728805542,grad_norm: 0.9172357877417899, iteration: 32452
loss: 1.0058151483535767,grad_norm: 0.9926127477452837, iteration: 32453
loss: 1.0292226076126099,grad_norm: 0.9999989132334633, iteration: 32454
loss: 0.9970463514328003,grad_norm: 0.8743484384352864, iteration: 32455
loss: 1.0093276500701904,grad_norm: 0.9999991232851034, iteration: 32456
loss: 1.0217814445495605,grad_norm: 0.9999992972764647, iteration: 32457
loss: 1.0098828077316284,grad_norm: 0.9999993452021169, iteration: 32458
loss: 1.0225588083267212,grad_norm: 0.9999990058638955, iteration: 32459
loss: 1.0191144943237305,grad_norm: 0.999999168343558, iteration: 32460
loss: 0.9975753426551819,grad_norm: 0.9670706216202363, iteration: 32461
loss: 1.0009651184082031,grad_norm: 0.9999992044537966, iteration: 32462
loss: 1.041900634765625,grad_norm: 0.9999990819727725, iteration: 32463
loss: 1.0017153024673462,grad_norm: 0.9723627168169742, iteration: 32464
loss: 0.9956867694854736,grad_norm: 0.9402993276605942, iteration: 32465
loss: 1.039258599281311,grad_norm: 0.9999998912477169, iteration: 32466
loss: 0.9899244904518127,grad_norm: 0.9999992778958509, iteration: 32467
loss: 1.013008713722229,grad_norm: 0.9999990555041103, iteration: 32468
loss: 1.019848346710205,grad_norm: 0.8925846466079577, iteration: 32469
loss: 1.0128892660140991,grad_norm: 0.9999990508977351, iteration: 32470
loss: 0.9896721243858337,grad_norm: 0.9999992176825945, iteration: 32471
loss: 0.9951793551445007,grad_norm: 0.9999989834995984, iteration: 32472
loss: 0.9935237169265747,grad_norm: 0.9999990955702397, iteration: 32473
loss: 0.983125627040863,grad_norm: 0.9317130761209432, iteration: 32474
loss: 1.0258886814117432,grad_norm: 0.951126382783842, iteration: 32475
loss: 1.0343585014343262,grad_norm: 0.9999994688671685, iteration: 32476
loss: 1.0387269258499146,grad_norm: 0.9999991077079404, iteration: 32477
loss: 0.9871602654457092,grad_norm: 0.8865452696085815, iteration: 32478
loss: 0.9998195171356201,grad_norm: 0.9420831279411058, iteration: 32479
loss: 1.0184565782546997,grad_norm: 0.8640502959183339, iteration: 32480
loss: 1.053713321685791,grad_norm: 0.9999997045259343, iteration: 32481
loss: 1.0100876092910767,grad_norm: 0.999999219383333, iteration: 32482
loss: 1.0552347898483276,grad_norm: 0.9999993135525601, iteration: 32483
loss: 0.9896384477615356,grad_norm: 0.9228590352190762, iteration: 32484
loss: 1.0489907264709473,grad_norm: 0.9999993172859247, iteration: 32485
loss: 0.9901493787765503,grad_norm: 0.9999989590511784, iteration: 32486
loss: 1.048015832901001,grad_norm: 0.9999990984984772, iteration: 32487
loss: 1.059541940689087,grad_norm: 0.9999992019314948, iteration: 32488
loss: 1.0003273487091064,grad_norm: 0.9786117036895096, iteration: 32489
loss: 0.9955628514289856,grad_norm: 0.8497037270901352, iteration: 32490
loss: 1.0104210376739502,grad_norm: 0.9999993472132065, iteration: 32491
loss: 0.9981086850166321,grad_norm: 0.9999990965167901, iteration: 32492
loss: 0.984017014503479,grad_norm: 0.7837475891963984, iteration: 32493
loss: 1.0160022974014282,grad_norm: 0.8492049068188386, iteration: 32494
loss: 0.9560624957084656,grad_norm: 0.9999991475766449, iteration: 32495
loss: 1.0488742589950562,grad_norm: 0.9999996987264584, iteration: 32496
loss: 0.9851217269897461,grad_norm: 0.9827106320550646, iteration: 32497
loss: 1.0196555852890015,grad_norm: 0.9999991584896353, iteration: 32498
loss: 1.070329189300537,grad_norm: 0.999999159022054, iteration: 32499
loss: 0.9820743799209595,grad_norm: 0.9999992100796048, iteration: 32500
loss: 1.0332674980163574,grad_norm: 0.8553580861899254, iteration: 32501
loss: 1.0078215599060059,grad_norm: 0.9999991719176604, iteration: 32502
loss: 1.0023053884506226,grad_norm: 0.74477314994387, iteration: 32503
loss: 0.9783074855804443,grad_norm: 0.9010327420051915, iteration: 32504
loss: 1.0255671739578247,grad_norm: 0.8151900354453951, iteration: 32505
loss: 1.014146327972412,grad_norm: 0.9240364123496688, iteration: 32506
loss: 1.0403136014938354,grad_norm: 0.9428076620775776, iteration: 32507
loss: 0.9976085424423218,grad_norm: 0.9356430669206057, iteration: 32508
loss: 0.9773960113525391,grad_norm: 0.999999071011259, iteration: 32509
loss: 0.9713773131370544,grad_norm: 0.9149265502980156, iteration: 32510
loss: 1.0481690168380737,grad_norm: 0.9999991377500239, iteration: 32511
loss: 0.9704203605651855,grad_norm: 0.9999992319418186, iteration: 32512
loss: 1.061724066734314,grad_norm: 0.9999993133679077, iteration: 32513
loss: 1.0025901794433594,grad_norm: 0.8941012152259369, iteration: 32514
loss: 1.0054700374603271,grad_norm: 0.949011618004371, iteration: 32515
loss: 1.004569411277771,grad_norm: 0.9999990115851835, iteration: 32516
loss: 0.9921388626098633,grad_norm: 0.9458946998228178, iteration: 32517
loss: 1.013841152191162,grad_norm: 0.9848330214516932, iteration: 32518
loss: 1.1925451755523682,grad_norm: 0.9999994841564241, iteration: 32519
loss: 0.9523510336875916,grad_norm: 0.9999990724374922, iteration: 32520
loss: 1.044384241104126,grad_norm: 0.9999993741371259, iteration: 32521
loss: 1.0251532793045044,grad_norm: 0.8913253953170177, iteration: 32522
loss: 1.007313847541809,grad_norm: 0.9999989601013457, iteration: 32523
loss: 1.0282633304595947,grad_norm: 0.984540318298454, iteration: 32524
loss: 1.032808780670166,grad_norm: 0.824451567702234, iteration: 32525
loss: 1.0195192098617554,grad_norm: 0.8830257587875532, iteration: 32526
loss: 1.017534852027893,grad_norm: 0.9999991033467092, iteration: 32527
loss: 1.00311279296875,grad_norm: 0.9365084668425331, iteration: 32528
loss: 1.0178896188735962,grad_norm: 0.999999142213237, iteration: 32529
loss: 1.0131378173828125,grad_norm: 0.9999992027063436, iteration: 32530
loss: 0.9744502902030945,grad_norm: 0.8213314587468326, iteration: 32531
loss: 1.0066438913345337,grad_norm: 0.9999990335637697, iteration: 32532
loss: 1.012766718864441,grad_norm: 0.8126590428804318, iteration: 32533
loss: 0.983536958694458,grad_norm: 0.9829202731568915, iteration: 32534
loss: 1.0096745491027832,grad_norm: 0.8827366004874035, iteration: 32535
loss: 1.0237294435501099,grad_norm: 0.9510686761000126, iteration: 32536
loss: 1.013988971710205,grad_norm: 0.9999989658026585, iteration: 32537
loss: 1.0066474676132202,grad_norm: 0.9999991641220239, iteration: 32538
loss: 1.0030474662780762,grad_norm: 0.9077012157313382, iteration: 32539
loss: 1.013201355934143,grad_norm: 0.9999991305164297, iteration: 32540
loss: 0.9684825539588928,grad_norm: 0.9230527617039148, iteration: 32541
loss: 1.0489189624786377,grad_norm: 0.9999995660066406, iteration: 32542
loss: 0.998938262462616,grad_norm: 0.9999990719976002, iteration: 32543
loss: 1.0228317975997925,grad_norm: 0.9498363096320235, iteration: 32544
loss: 1.0284825563430786,grad_norm: 0.862558896497813, iteration: 32545
loss: 0.9478101134300232,grad_norm: 0.9157566376356927, iteration: 32546
loss: 1.0297712087631226,grad_norm: 0.8301882998388026, iteration: 32547
loss: 1.0074890851974487,grad_norm: 0.7808558525153664, iteration: 32548
loss: 1.0124820470809937,grad_norm: 0.9999992807555624, iteration: 32549
loss: 1.025708794593811,grad_norm: 0.9925973306685544, iteration: 32550
loss: 1.0057220458984375,grad_norm: 0.9258628667661114, iteration: 32551
loss: 0.9882989525794983,grad_norm: 0.9753525918885363, iteration: 32552
loss: 0.998140811920166,grad_norm: 0.8483840190625638, iteration: 32553
loss: 0.9703244566917419,grad_norm: 0.9999994804051547, iteration: 32554
loss: 1.0099064111709595,grad_norm: 0.9642997449969994, iteration: 32555
loss: 1.02708899974823,grad_norm: 0.9999994398313116, iteration: 32556
loss: 0.9788861870765686,grad_norm: 0.9450948779008027, iteration: 32557
loss: 0.9893808960914612,grad_norm: 0.9999991014117698, iteration: 32558
loss: 0.9884843826293945,grad_norm: 0.8348980900274328, iteration: 32559
loss: 0.9946328997612,grad_norm: 0.9953129101637065, iteration: 32560
loss: 0.9839526414871216,grad_norm: 0.9146420562439919, iteration: 32561
loss: 1.0217481851577759,grad_norm: 0.9186135393926701, iteration: 32562
loss: 0.9882810711860657,grad_norm: 0.9911383804952616, iteration: 32563
loss: 1.030027985572815,grad_norm: 0.999999253430005, iteration: 32564
loss: 0.9988595247268677,grad_norm: 0.9121540058305015, iteration: 32565
loss: 1.005371332168579,grad_norm: 0.9126500347390939, iteration: 32566
loss: 1.0045347213745117,grad_norm: 0.9999990600716241, iteration: 32567
loss: 1.0365924835205078,grad_norm: 0.9999990426300209, iteration: 32568
loss: 0.9876840710639954,grad_norm: 0.9365270316924391, iteration: 32569
loss: 1.0234756469726562,grad_norm: 0.9999990857968376, iteration: 32570
loss: 1.0261036157608032,grad_norm: 0.8180532034306094, iteration: 32571
loss: 0.9833341240882874,grad_norm: 0.8189138219290129, iteration: 32572
loss: 1.0425496101379395,grad_norm: 0.846782837788393, iteration: 32573
loss: 1.0162261724472046,grad_norm: 0.8162307606998218, iteration: 32574
loss: 1.015592098236084,grad_norm: 0.999999245155648, iteration: 32575
loss: 0.9937699437141418,grad_norm: 0.9999993539298365, iteration: 32576
loss: 0.9861600995063782,grad_norm: 0.8463597635482009, iteration: 32577
loss: 0.9817121028900146,grad_norm: 0.9999991044242738, iteration: 32578
loss: 0.980066180229187,grad_norm: 0.8680877557358894, iteration: 32579
loss: 1.0185208320617676,grad_norm: 0.9999991744855535, iteration: 32580
loss: 0.9820343255996704,grad_norm: 0.9499871808205191, iteration: 32581
loss: 1.0176142454147339,grad_norm: 0.9952711740191751, iteration: 32582
loss: 0.9788349270820618,grad_norm: 0.9999992312220366, iteration: 32583
loss: 1.0034245252609253,grad_norm: 0.99999900757351, iteration: 32584
loss: 1.018917441368103,grad_norm: 0.9999991505957332, iteration: 32585
loss: 0.9978732466697693,grad_norm: 0.8973975082112805, iteration: 32586
loss: 1.0131481885910034,grad_norm: 0.9999991536264121, iteration: 32587
loss: 1.0167169570922852,grad_norm: 0.9999989919367297, iteration: 32588
loss: 1.0105174779891968,grad_norm: 0.897388422070391, iteration: 32589
loss: 1.0198734998703003,grad_norm: 0.9434976614389569, iteration: 32590
loss: 1.0563135147094727,grad_norm: 0.999999032182462, iteration: 32591
loss: 1.0248439311981201,grad_norm: 0.9254294474527426, iteration: 32592
loss: 1.0064791440963745,grad_norm: 0.9152818930832527, iteration: 32593
loss: 0.9976540207862854,grad_norm: 0.7705233098017337, iteration: 32594
loss: 0.99819016456604,grad_norm: 0.9999992634509632, iteration: 32595
loss: 0.9840443134307861,grad_norm: 0.8973870570084372, iteration: 32596
loss: 0.9876465797424316,grad_norm: 0.9999991857438137, iteration: 32597
loss: 1.0589113235473633,grad_norm: 1.0000000391395039, iteration: 32598
loss: 1.0054168701171875,grad_norm: 0.9999992440245333, iteration: 32599
loss: 0.9969208240509033,grad_norm: 0.9999990945829825, iteration: 32600
loss: 1.0101821422576904,grad_norm: 0.9400413427762809, iteration: 32601
loss: 0.992058277130127,grad_norm: 0.8960118706599913, iteration: 32602
loss: 1.0570673942565918,grad_norm: 0.8199453261577738, iteration: 32603
loss: 0.9980474710464478,grad_norm: 0.8238956737262149, iteration: 32604
loss: 1.0294712781906128,grad_norm: 0.8556351973423162, iteration: 32605
loss: 0.988801121711731,grad_norm: 0.9777534607738536, iteration: 32606
loss: 0.9956480860710144,grad_norm: 0.9999993879053051, iteration: 32607
loss: 1.0373425483703613,grad_norm: 0.9999995708121743, iteration: 32608
loss: 1.0085722208023071,grad_norm: 0.9999991805777161, iteration: 32609
loss: 1.0093716382980347,grad_norm: 0.999999042752083, iteration: 32610
loss: 0.9847940802574158,grad_norm: 0.9999990417375539, iteration: 32611
loss: 1.035326361656189,grad_norm: 0.8216862081137809, iteration: 32612
loss: 0.9964503645896912,grad_norm: 0.9999990194378632, iteration: 32613
loss: 1.0557680130004883,grad_norm: 0.9999990348247842, iteration: 32614
loss: 1.030258297920227,grad_norm: 0.9999991920618218, iteration: 32615
loss: 1.030240774154663,grad_norm: 0.9309774907880347, iteration: 32616
loss: 1.0280635356903076,grad_norm: 0.7955108830622155, iteration: 32617
loss: 1.0175879001617432,grad_norm: 0.8542380843804147, iteration: 32618
loss: 0.979584813117981,grad_norm: 0.9986946431600108, iteration: 32619
loss: 1.0150386095046997,grad_norm: 0.9999990997024344, iteration: 32620
loss: 0.9860658049583435,grad_norm: 0.999999300904579, iteration: 32621
loss: 1.0248098373413086,grad_norm: 0.9999989652078216, iteration: 32622
loss: 0.9990499019622803,grad_norm: 0.9151528909286336, iteration: 32623
loss: 1.0183244943618774,grad_norm: 0.9250786870740575, iteration: 32624
loss: 1.0397306680679321,grad_norm: 0.985395732125999, iteration: 32625
loss: 1.0256317853927612,grad_norm: 0.9774461493413076, iteration: 32626
loss: 1.0309730768203735,grad_norm: 0.9952885922251657, iteration: 32627
loss: 1.014993667602539,grad_norm: 0.9999992271832663, iteration: 32628
loss: 1.010230302810669,grad_norm: 0.785959048469025, iteration: 32629
loss: 1.0075873136520386,grad_norm: 0.8611472128423697, iteration: 32630
loss: 0.9985859990119934,grad_norm: 0.9556917645150013, iteration: 32631
loss: 1.0162765979766846,grad_norm: 0.99174366613275, iteration: 32632
loss: 0.9807737469673157,grad_norm: 0.9999991342286643, iteration: 32633
loss: 0.953289270401001,grad_norm: 0.9999992046417808, iteration: 32634
loss: 0.9780237674713135,grad_norm: 0.9999990684266595, iteration: 32635
loss: 1.0200766324996948,grad_norm: 0.9730856412144993, iteration: 32636
loss: 0.9598928689956665,grad_norm: 0.8906458394368433, iteration: 32637
loss: 1.0069125890731812,grad_norm: 0.9729717012770974, iteration: 32638
loss: 1.057295799255371,grad_norm: 0.9985180680402101, iteration: 32639
loss: 0.9700061082839966,grad_norm: 0.9999991389730079, iteration: 32640
loss: 1.0219508409500122,grad_norm: 0.9549319982399314, iteration: 32641
loss: 1.01207435131073,grad_norm: 0.8655203929666663, iteration: 32642
loss: 1.002867341041565,grad_norm: 0.8911409022440951, iteration: 32643
loss: 0.968940019607544,grad_norm: 0.9972946243550088, iteration: 32644
loss: 1.0411254167556763,grad_norm: 0.9999990186248872, iteration: 32645
loss: 0.9901289939880371,grad_norm: 0.9999990672347437, iteration: 32646
loss: 1.0204390287399292,grad_norm: 0.9999990354248012, iteration: 32647
loss: 1.0083396434783936,grad_norm: 0.8098067083465981, iteration: 32648
loss: 0.9661455750465393,grad_norm: 0.9999992272707916, iteration: 32649
loss: 1.048409342765808,grad_norm: 0.9999995644284772, iteration: 32650
loss: 0.9899309277534485,grad_norm: 0.8959454863595785, iteration: 32651
loss: 1.0160245895385742,grad_norm: 0.9999990080217586, iteration: 32652
loss: 0.997025191783905,grad_norm: 0.999999084951196, iteration: 32653
loss: 1.0254766941070557,grad_norm: 0.9999991078637337, iteration: 32654
loss: 0.9797379970550537,grad_norm: 0.9999991580371509, iteration: 32655
loss: 1.0409889221191406,grad_norm: 0.9645009624086035, iteration: 32656
loss: 1.0438989400863647,grad_norm: 0.9999992832397763, iteration: 32657
loss: 1.0138499736785889,grad_norm: 0.999998894747382, iteration: 32658
loss: 1.0442639589309692,grad_norm: 0.999998972406157, iteration: 32659
loss: 0.9879150986671448,grad_norm: 0.9346900400906224, iteration: 32660
loss: 1.0286586284637451,grad_norm: 0.999999191225218, iteration: 32661
loss: 0.9764930605888367,grad_norm: 0.9999992837291312, iteration: 32662
loss: 1.0195451974868774,grad_norm: 0.857309421314988, iteration: 32663
loss: 1.0154216289520264,grad_norm: 0.9999991925039993, iteration: 32664
loss: 0.9906564354896545,grad_norm: 0.9999993358248723, iteration: 32665
loss: 1.0204901695251465,grad_norm: 0.9999990299425927, iteration: 32666
loss: 0.9664663076400757,grad_norm: 0.9999989865523136, iteration: 32667
loss: 1.0125772953033447,grad_norm: 0.9999992951044344, iteration: 32668
loss: 1.017966866493225,grad_norm: 0.9999996686088636, iteration: 32669
loss: 0.9975354671478271,grad_norm: 0.9999990723510067, iteration: 32670
loss: 0.982278048992157,grad_norm: 0.9999995581283485, iteration: 32671
loss: 0.9705255627632141,grad_norm: 0.9561336535229583, iteration: 32672
loss: 0.999792218208313,grad_norm: 0.964755337768096, iteration: 32673
loss: 0.9685019254684448,grad_norm: 0.9999992044984225, iteration: 32674
loss: 0.9678944945335388,grad_norm: 0.8695911183561252, iteration: 32675
loss: 1.0255122184753418,grad_norm: 0.9405871560936347, iteration: 32676
loss: 1.0074245929718018,grad_norm: 0.9194050316147018, iteration: 32677
loss: 0.9711107015609741,grad_norm: 0.9999992131683411, iteration: 32678
loss: 0.9616016149520874,grad_norm: 0.9376570644250142, iteration: 32679
loss: 1.0064340829849243,grad_norm: 0.9999992477112841, iteration: 32680
loss: 0.9894081354141235,grad_norm: 0.9999995187586772, iteration: 32681
loss: 1.0204054117202759,grad_norm: 0.8934398956273453, iteration: 32682
loss: 0.9877060055732727,grad_norm: 0.818835181775299, iteration: 32683
loss: 0.9782625436782837,grad_norm: 0.9151110591015235, iteration: 32684
loss: 0.9999252557754517,grad_norm: 0.9456499003939366, iteration: 32685
loss: 0.9946084022521973,grad_norm: 0.976555139882062, iteration: 32686
loss: 1.0197169780731201,grad_norm: 0.9999991448033092, iteration: 32687
loss: 1.0086796283721924,grad_norm: 0.8390260760979835, iteration: 32688
loss: 1.0253618955612183,grad_norm: 0.9999994714618116, iteration: 32689
loss: 1.0032179355621338,grad_norm: 0.8553872461386787, iteration: 32690
loss: 0.981769859790802,grad_norm: 0.9999993042091507, iteration: 32691
loss: 1.0033775568008423,grad_norm: 0.9276665445689567, iteration: 32692
loss: 1.0067123174667358,grad_norm: 0.9999991451865325, iteration: 32693
loss: 1.0246613025665283,grad_norm: 0.9393203553564178, iteration: 32694
loss: 0.9900730848312378,grad_norm: 0.9202948327927519, iteration: 32695
loss: 0.9806211590766907,grad_norm: 0.9999989149706493, iteration: 32696
loss: 0.9883335828781128,grad_norm: 0.9999991311166477, iteration: 32697
loss: 0.991777241230011,grad_norm: 0.9999991704948846, iteration: 32698
loss: 1.0311726331710815,grad_norm: 0.8760073120294566, iteration: 32699
loss: 1.0332006216049194,grad_norm: 0.9999990987364028, iteration: 32700
loss: 1.0336004495620728,grad_norm: 0.9999995616153204, iteration: 32701
loss: 1.0371695756912231,grad_norm: 0.9982449270030131, iteration: 32702
loss: 1.010353684425354,grad_norm: 0.9592051984938409, iteration: 32703
loss: 1.0222562551498413,grad_norm: 0.9462151552274324, iteration: 32704
loss: 0.9942396879196167,grad_norm: 0.880636662345549, iteration: 32705
loss: 1.0274280309677124,grad_norm: 0.999999217257524, iteration: 32706
loss: 0.9759243130683899,grad_norm: 0.9712214088753529, iteration: 32707
loss: 1.011892557144165,grad_norm: 0.9999991009789646, iteration: 32708
loss: 1.061445713043213,grad_norm: 0.9999999132665821, iteration: 32709
loss: 0.9975992441177368,grad_norm: 0.8185310617463434, iteration: 32710
loss: 0.9967672228813171,grad_norm: 0.9999995336527161, iteration: 32711
loss: 0.9758052825927734,grad_norm: 0.9179745274281046, iteration: 32712
loss: 1.007770299911499,grad_norm: 0.9999991203411499, iteration: 32713
loss: 0.9784627556800842,grad_norm: 0.9147486005682849, iteration: 32714
loss: 1.046907663345337,grad_norm: 0.9999993763441031, iteration: 32715
loss: 1.0490391254425049,grad_norm: 0.9999998084206957, iteration: 32716
loss: 1.0427119731903076,grad_norm: 0.9999994320171363, iteration: 32717
loss: 1.0739084482192993,grad_norm: 0.9999996574040494, iteration: 32718
loss: 1.0183252096176147,grad_norm: 0.919854569041958, iteration: 32719
loss: 0.9853076338768005,grad_norm: 0.8730016239784126, iteration: 32720
loss: 1.016656756401062,grad_norm: 0.899695612614041, iteration: 32721
loss: 1.0234299898147583,grad_norm: 0.9999990687141899, iteration: 32722
loss: 1.1002602577209473,grad_norm: 0.9999992696589592, iteration: 32723
loss: 0.9983450770378113,grad_norm: 0.8717414421686933, iteration: 32724
loss: 1.0125333070755005,grad_norm: 0.7630562864657622, iteration: 32725
loss: 1.0128653049468994,grad_norm: 0.8834117922422188, iteration: 32726
loss: 1.0316492319107056,grad_norm: 0.9999998201708431, iteration: 32727
loss: 1.0004675388336182,grad_norm: 0.8531900191075867, iteration: 32728
loss: 1.0167248249053955,grad_norm: 0.9008834469122929, iteration: 32729
loss: 0.9953370690345764,grad_norm: 0.8672866118290405, iteration: 32730
loss: 0.9838598966598511,grad_norm: 0.96295386422361, iteration: 32731
loss: 1.07081139087677,grad_norm: 0.9999990198430513, iteration: 32732
loss: 0.9965325593948364,grad_norm: 0.9234788960255638, iteration: 32733
loss: 1.0260051488876343,grad_norm: 0.999999196569848, iteration: 32734
loss: 1.0210381746292114,grad_norm: 0.9999991644687323, iteration: 32735
loss: 0.9997653365135193,grad_norm: 0.9999991014103703, iteration: 32736
loss: 1.022915005683899,grad_norm: 0.9999997540758124, iteration: 32737
loss: 0.9969412088394165,grad_norm: 0.9999991698550418, iteration: 32738
loss: 1.0189592838287354,grad_norm: 0.9999991193208875, iteration: 32739
loss: 1.001267671585083,grad_norm: 0.9420962503427797, iteration: 32740
loss: 1.033454418182373,grad_norm: 0.9555949204221096, iteration: 32741
loss: 0.9708536267280579,grad_norm: 0.9999992210261212, iteration: 32742
loss: 0.9972479343414307,grad_norm: 0.8923851919810125, iteration: 32743
loss: 1.0167725086212158,grad_norm: 0.9999990671827508, iteration: 32744
loss: 0.9792686104774475,grad_norm: 0.9670009935406412, iteration: 32745
loss: 0.9886714816093445,grad_norm: 0.9999990275860736, iteration: 32746
loss: 0.9974102973937988,grad_norm: 0.9999992041754269, iteration: 32747
loss: 1.0615553855895996,grad_norm: 0.9999993504595468, iteration: 32748
loss: 0.9777815937995911,grad_norm: 0.9999991702537936, iteration: 32749
loss: 1.0119857788085938,grad_norm: 0.9346175532452847, iteration: 32750
loss: 0.9957414269447327,grad_norm: 0.9999991395942373, iteration: 32751
loss: 1.0139350891113281,grad_norm: 0.9999991857182611, iteration: 32752
loss: 1.0055238008499146,grad_norm: 0.9999992552416443, iteration: 32753
loss: 1.0913842916488647,grad_norm: 0.9999993308699162, iteration: 32754
loss: 1.0287812948226929,grad_norm: 0.9953314361803527, iteration: 32755
loss: 0.9854138493537903,grad_norm: 0.9999993002745001, iteration: 32756
loss: 0.9770468473434448,grad_norm: 0.9662010565218553, iteration: 32757
loss: 1.0131736993789673,grad_norm: 0.9999998191765886, iteration: 32758
loss: 1.0019984245300293,grad_norm: 0.9999991544570025, iteration: 32759
loss: 1.0099310874938965,grad_norm: 0.9489433948207237, iteration: 32760
loss: 0.9901261925697327,grad_norm: 0.9516981252610429, iteration: 32761
loss: 1.0022943019866943,grad_norm: 0.9999991867714131, iteration: 32762
loss: 0.9575942754745483,grad_norm: 0.9352435803089599, iteration: 32763
loss: 0.9667434096336365,grad_norm: 0.9560732599192233, iteration: 32764
loss: 1.0657483339309692,grad_norm: 0.9754553651236013, iteration: 32765
loss: 1.0290802717208862,grad_norm: 0.9999992929048088, iteration: 32766
loss: 1.0084056854248047,grad_norm: 0.9999996082916784, iteration: 32767
loss: 0.9858086705207825,grad_norm: 0.8443725820112307, iteration: 32768
loss: 1.045966386795044,grad_norm: 0.9999991338027895, iteration: 32769
loss: 1.037691354751587,grad_norm: 0.9999992312794572, iteration: 32770
loss: 1.010918140411377,grad_norm: 0.9999991296675343, iteration: 32771
loss: 1.0047487020492554,grad_norm: 0.9517959280955534, iteration: 32772
loss: 0.9855017066001892,grad_norm: 0.999999456408197, iteration: 32773
loss: 1.0132460594177246,grad_norm: 0.9999994642142775, iteration: 32774
loss: 1.0057801008224487,grad_norm: 0.9999992661074484, iteration: 32775
loss: 1.0117839574813843,grad_norm: 0.9999997774798344, iteration: 32776
loss: 0.9907894730567932,grad_norm: 0.999999244689177, iteration: 32777
loss: 1.0627446174621582,grad_norm: 0.8577313303401081, iteration: 32778
loss: 0.995613157749176,grad_norm: 0.999999196599299, iteration: 32779
loss: 0.9967670440673828,grad_norm: 0.878712486778742, iteration: 32780
loss: 0.9955090880393982,grad_norm: 0.999999353881475, iteration: 32781
loss: 1.0163418054580688,grad_norm: 0.9999994314135098, iteration: 32782
loss: 0.9973031878471375,grad_norm: 0.99999916660853, iteration: 32783
loss: 0.9756208658218384,grad_norm: 0.9575283516196714, iteration: 32784
loss: 1.040964126586914,grad_norm: 0.9999997147662233, iteration: 32785
loss: 0.9890408515930176,grad_norm: 0.9999991985072265, iteration: 32786
loss: 1.046186923980713,grad_norm: 0.9999991880975377, iteration: 32787
loss: 1.0726491212844849,grad_norm: 0.9999996567059897, iteration: 32788
loss: 1.0015119314193726,grad_norm: 0.9714795838254396, iteration: 32789
loss: 1.0288978815078735,grad_norm: 0.9999991075391185, iteration: 32790
loss: 0.9986646771430969,grad_norm: 0.8778105104943486, iteration: 32791
loss: 1.0381578207015991,grad_norm: 0.9082178073748829, iteration: 32792
loss: 1.0084129571914673,grad_norm: 0.9404597148463569, iteration: 32793
loss: 0.9651327133178711,grad_norm: 0.7787070370426482, iteration: 32794
loss: 0.9980748891830444,grad_norm: 0.999999405702501, iteration: 32795
loss: 0.9993734955787659,grad_norm: 0.7994493984936998, iteration: 32796
loss: 1.0250134468078613,grad_norm: 0.9999990615298728, iteration: 32797
loss: 1.0576790571212769,grad_norm: 0.9999996342689196, iteration: 32798
loss: 1.013763189315796,grad_norm: 0.9999991393811138, iteration: 32799
loss: 1.0265752077102661,grad_norm: 0.8511801610935246, iteration: 32800
loss: 1.0462762117385864,grad_norm: 0.9999995676703994, iteration: 32801
loss: 0.994728147983551,grad_norm: 0.8557139428896157, iteration: 32802
loss: 1.0411291122436523,grad_norm: 0.9622131141594433, iteration: 32803
loss: 1.010414481163025,grad_norm: 0.7715749161586568, iteration: 32804
loss: 1.0044695138931274,grad_norm: 0.9999990757713398, iteration: 32805
loss: 0.9946036338806152,grad_norm: 0.9767968474417477, iteration: 32806
loss: 0.9996698498725891,grad_norm: 0.9999991218441634, iteration: 32807
loss: 1.0023517608642578,grad_norm: 0.9688540054738619, iteration: 32808
loss: 1.0430538654327393,grad_norm: 0.9228947993822729, iteration: 32809
loss: 1.0229145288467407,grad_norm: 0.8816864047902325, iteration: 32810
loss: 1.1071447134017944,grad_norm: 0.9999995377814707, iteration: 32811
loss: 1.1048518419265747,grad_norm: 0.999999214242845, iteration: 32812
loss: 1.1093639135360718,grad_norm: 0.9999994198411235, iteration: 32813
loss: 1.1138635873794556,grad_norm: 0.9999998892670667, iteration: 32814
loss: 1.0925602912902832,grad_norm: 0.9313833486949045, iteration: 32815
loss: 0.9873207807540894,grad_norm: 0.9999990818555937, iteration: 32816
loss: 1.0278066396713257,grad_norm: 0.7628097125190924, iteration: 32817
loss: 1.0356122255325317,grad_norm: 0.9999992192264696, iteration: 32818
loss: 1.1079035997390747,grad_norm: 0.9999995522937203, iteration: 32819
loss: 1.1267651319503784,grad_norm: 0.9999997482115071, iteration: 32820
loss: 0.9942725896835327,grad_norm: 0.8871675328409961, iteration: 32821
loss: 1.0331668853759766,grad_norm: 0.9999991865967612, iteration: 32822
loss: 1.0141464471817017,grad_norm: 0.9999991324361539, iteration: 32823
loss: 1.0300465822219849,grad_norm: 0.986171545124645, iteration: 32824
loss: 1.118898630142212,grad_norm: 0.9999996240072465, iteration: 32825
loss: 1.0131542682647705,grad_norm: 0.9659583154355493, iteration: 32826
loss: 1.0335986614227295,grad_norm: 0.9999991388302212, iteration: 32827
loss: 1.023589015007019,grad_norm: 0.9999991362296927, iteration: 32828
loss: 1.0188058614730835,grad_norm: 0.9999998322825135, iteration: 32829
loss: 1.0499242544174194,grad_norm: 0.9999999304602181, iteration: 32830
loss: 0.9942863583564758,grad_norm: 0.9999989408176462, iteration: 32831
loss: 1.0009596347808838,grad_norm: 0.9045627797106478, iteration: 32832
loss: 1.0044636726379395,grad_norm: 0.8939174234942167, iteration: 32833
loss: 0.9562293887138367,grad_norm: 0.890674453147003, iteration: 32834
loss: 1.0718685388565063,grad_norm: 0.9999992508591562, iteration: 32835
loss: 1.0249526500701904,grad_norm: 0.999999430061214, iteration: 32836
loss: 0.9937146306037903,grad_norm: 0.9999990978714348, iteration: 32837
loss: 1.0315642356872559,grad_norm: 0.9999997624673435, iteration: 32838
loss: 0.9745722413063049,grad_norm: 0.9990560060681778, iteration: 32839
loss: 0.9921794533729553,grad_norm: 0.9999050636162098, iteration: 32840
loss: 1.0538017749786377,grad_norm: 0.9999994915766451, iteration: 32841
loss: 0.9984264373779297,grad_norm: 0.8802832259133194, iteration: 32842
loss: 1.0180726051330566,grad_norm: 0.7992721028116049, iteration: 32843
loss: 1.0454813241958618,grad_norm: 0.9999994900916341, iteration: 32844
loss: 0.9857344627380371,grad_norm: 0.7961300228404536, iteration: 32845
loss: 0.9929602742195129,grad_norm: 0.9999991378669317, iteration: 32846
loss: 1.0392647981643677,grad_norm: 0.9999996508130151, iteration: 32847
loss: 0.9910932183265686,grad_norm: 0.9999998288859355, iteration: 32848
loss: 1.0413591861724854,grad_norm: 0.9999995205847112, iteration: 32849
loss: 1.0365023612976074,grad_norm: 0.9567835272733384, iteration: 32850
loss: 1.0209710597991943,grad_norm: 0.9593524144277484, iteration: 32851
loss: 1.0735905170440674,grad_norm: 0.9999994181816911, iteration: 32852
loss: 1.0338600873947144,grad_norm: 0.9999991485154582, iteration: 32853
loss: 1.0346063375473022,grad_norm: 0.8331501947181618, iteration: 32854
loss: 0.9999946355819702,grad_norm: 0.9999990565571272, iteration: 32855
loss: 1.0066102743148804,grad_norm: 0.9999990862855721, iteration: 32856
loss: 0.9711524248123169,grad_norm: 0.9086535006481319, iteration: 32857
loss: 0.9985471367835999,grad_norm: 0.9999992754152277, iteration: 32858
loss: 1.0313953161239624,grad_norm: 0.9999995902334952, iteration: 32859
loss: 0.9932071566581726,grad_norm: 0.999999187565492, iteration: 32860
loss: 1.0022205114364624,grad_norm: 0.8839412058226335, iteration: 32861
loss: 1.0038602352142334,grad_norm: 0.8623693341271225, iteration: 32862
loss: 1.067322850227356,grad_norm: 0.9999994161888672, iteration: 32863
loss: 1.0266417264938354,grad_norm: 0.9999997228463637, iteration: 32864
loss: 0.9839774966239929,grad_norm: 0.9999998763351147, iteration: 32865
loss: 0.9772220253944397,grad_norm: 0.9655046147549827, iteration: 32866
loss: 0.9897797703742981,grad_norm: 0.8187764355569339, iteration: 32867
loss: 1.0139925479888916,grad_norm: 0.999999490427837, iteration: 32868
loss: 1.0440247058868408,grad_norm: 0.8458387475912563, iteration: 32869
loss: 1.0183744430541992,grad_norm: 0.999999206233074, iteration: 32870
loss: 1.0170081853866577,grad_norm: 0.966361391663396, iteration: 32871
loss: 1.019742727279663,grad_norm: 0.9263361272808466, iteration: 32872
loss: 1.019133448600769,grad_norm: 0.8338211065494836, iteration: 32873
loss: 1.0310330390930176,grad_norm: 0.9999991689589109, iteration: 32874
loss: 1.0041800737380981,grad_norm: 0.9999994053489611, iteration: 32875
loss: 1.0308071374893188,grad_norm: 0.9999993143179247, iteration: 32876
loss: 0.9898352026939392,grad_norm: 0.9140631288651632, iteration: 32877
loss: 1.0055989027023315,grad_norm: 0.9999991609319685, iteration: 32878
loss: 0.9949468374252319,grad_norm: 0.9999991109177772, iteration: 32879
loss: 1.0472450256347656,grad_norm: 0.9999995463291431, iteration: 32880
loss: 1.083179235458374,grad_norm: 0.9999994295726975, iteration: 32881
loss: 0.9653019905090332,grad_norm: 0.9999990994067512, iteration: 32882
loss: 1.0206091403961182,grad_norm: 0.9846132324611266, iteration: 32883
loss: 1.0673210620880127,grad_norm: 0.9999992498059643, iteration: 32884
loss: 1.0224148035049438,grad_norm: 0.9999990895903167, iteration: 32885
loss: 1.0195090770721436,grad_norm: 0.9999993911958065, iteration: 32886
loss: 1.12522554397583,grad_norm: 0.9999994686170885, iteration: 32887
loss: 1.0046346187591553,grad_norm: 0.9999992478885326, iteration: 32888
loss: 1.0001987218856812,grad_norm: 0.9999994768870664, iteration: 32889
loss: 1.0129427909851074,grad_norm: 0.8252512995667654, iteration: 32890
loss: 0.9993911981582642,grad_norm: 0.7502945944980315, iteration: 32891
loss: 1.038894772529602,grad_norm: 0.9999991573685244, iteration: 32892
loss: 0.9774112105369568,grad_norm: 0.9999990878859796, iteration: 32893
loss: 1.0463439226150513,grad_norm: 0.9999992198023592, iteration: 32894
loss: 1.0087671279907227,grad_norm: 0.9619538885613405, iteration: 32895
loss: 1.0203510522842407,grad_norm: 0.999999537222309, iteration: 32896
loss: 0.9765657186508179,grad_norm: 0.9378694045233354, iteration: 32897
loss: 1.0087401866912842,grad_norm: 0.9999990850721389, iteration: 32898
loss: 1.044745683670044,grad_norm: 0.9999991529537359, iteration: 32899
loss: 0.9854177236557007,grad_norm: 0.9640374415095359, iteration: 32900
loss: 1.0486153364181519,grad_norm: 0.9999992264989108, iteration: 32901
loss: 1.021026849746704,grad_norm: 0.9999997824701533, iteration: 32902
loss: 1.0185699462890625,grad_norm: 0.9999990150774576, iteration: 32903
loss: 1.0019906759262085,grad_norm: 0.9999991500049139, iteration: 32904
loss: 1.0318195819854736,grad_norm: 0.9999996385987768, iteration: 32905
loss: 1.0092177391052246,grad_norm: 0.8573516589905384, iteration: 32906
loss: 0.9994004964828491,grad_norm: 0.8870779381197595, iteration: 32907
loss: 0.9893379807472229,grad_norm: 0.999999317496551, iteration: 32908
loss: 1.0474337339401245,grad_norm: 0.999999686475636, iteration: 32909
loss: 1.0324138402938843,grad_norm: 0.9999994809718881, iteration: 32910
loss: 0.9575247764587402,grad_norm: 0.9921828212204926, iteration: 32911
loss: 1.0339727401733398,grad_norm: 0.999999163652524, iteration: 32912
loss: 1.0311901569366455,grad_norm: 0.9999994494969098, iteration: 32913
loss: 0.9889103770256042,grad_norm: 0.9028476453298255, iteration: 32914
loss: 0.991936445236206,grad_norm: 0.8257732977564638, iteration: 32915
loss: 1.005104660987854,grad_norm: 0.9973003516053381, iteration: 32916
loss: 1.0327706336975098,grad_norm: 0.9575363032624072, iteration: 32917
loss: 0.9970317482948303,grad_norm: 0.9830612358573972, iteration: 32918
loss: 1.0551563501358032,grad_norm: 0.9999991420761226, iteration: 32919
loss: 1.0450443029403687,grad_norm: 0.9999995576803561, iteration: 32920
loss: 0.996284544467926,grad_norm: 0.9140728426131832, iteration: 32921
loss: 1.0083069801330566,grad_norm: 0.9999991390208448, iteration: 32922
loss: 1.1445856094360352,grad_norm: 0.9999998212453605, iteration: 32923
loss: 1.0129436254501343,grad_norm: 0.999999089626896, iteration: 32924
loss: 0.9769237637519836,grad_norm: 0.7775376438443662, iteration: 32925
loss: 1.032270073890686,grad_norm: 0.966577482587145, iteration: 32926
loss: 0.9965936541557312,grad_norm: 0.999999405120018, iteration: 32927
loss: 0.9709623456001282,grad_norm: 0.9999993126510204, iteration: 32928
loss: 0.985183596611023,grad_norm: 0.9893647670641731, iteration: 32929
loss: 1.049955129623413,grad_norm: 0.9999993087766178, iteration: 32930
loss: 1.0200135707855225,grad_norm: 0.8632306400905004, iteration: 32931
loss: 1.0035561323165894,grad_norm: 0.9999991220091214, iteration: 32932
loss: 1.0235705375671387,grad_norm: 0.9999991069675729, iteration: 32933
loss: 1.0224531888961792,grad_norm: 0.9999993054886915, iteration: 32934
loss: 1.0140405893325806,grad_norm: 0.7952911744436066, iteration: 32935
loss: 1.0042439699172974,grad_norm: 0.9673406702641426, iteration: 32936
loss: 0.9854550957679749,grad_norm: 0.9999990677443489, iteration: 32937
loss: 1.088845133781433,grad_norm: 0.99999951195837, iteration: 32938
loss: 0.9943276643753052,grad_norm: 0.9999999533699887, iteration: 32939
loss: 0.9983497858047485,grad_norm: 0.9025625427821776, iteration: 32940
loss: 1.0011752843856812,grad_norm: 0.9999998167026918, iteration: 32941
loss: 1.0566716194152832,grad_norm: 0.9999993623641494, iteration: 32942
loss: 1.010077953338623,grad_norm: 0.9999991038575856, iteration: 32943
loss: 0.9606801271438599,grad_norm: 0.9999992029988822, iteration: 32944
loss: 1.0421234369277954,grad_norm: 0.999999192554295, iteration: 32945
loss: 1.004831075668335,grad_norm: 0.9999991351067798, iteration: 32946
loss: 1.0130997896194458,grad_norm: 0.9999991751682306, iteration: 32947
loss: 1.024990439414978,grad_norm: 0.9144755108274273, iteration: 32948
loss: 1.0097472667694092,grad_norm: 0.9999993149578636, iteration: 32949
loss: 1.0208970308303833,grad_norm: 0.9999994066451908, iteration: 32950
loss: 1.0303703546524048,grad_norm: 0.8541946675684906, iteration: 32951
loss: 1.013687014579773,grad_norm: 0.9999991765228057, iteration: 32952
loss: 0.9793815612792969,grad_norm: 0.9999991391667703, iteration: 32953
loss: 1.0123183727264404,grad_norm: 0.9999989770780158, iteration: 32954
loss: 1.020970344543457,grad_norm: 0.9374331065026498, iteration: 32955
loss: 0.9647488594055176,grad_norm: 0.9999992821610783, iteration: 32956
loss: 0.99958336353302,grad_norm: 0.9944706673534963, iteration: 32957
loss: 1.0083845853805542,grad_norm: 0.999999154952387, iteration: 32958
loss: 1.0251564979553223,grad_norm: 0.9999995934540643, iteration: 32959
loss: 1.0340797901153564,grad_norm: 0.9999992985659272, iteration: 32960
loss: 1.0245583057403564,grad_norm: 0.7345947295666059, iteration: 32961
loss: 0.9968641996383667,grad_norm: 0.9732338469510305, iteration: 32962
loss: 1.0099211931228638,grad_norm: 0.9999991763543142, iteration: 32963
loss: 1.0018212795257568,grad_norm: 0.9851940322686217, iteration: 32964
loss: 1.005590558052063,grad_norm: 0.9999998529559454, iteration: 32965
loss: 1.101845622062683,grad_norm: 0.9999995700244294, iteration: 32966
loss: 1.0284218788146973,grad_norm: 0.9999994926747812, iteration: 32967
loss: 0.9553287029266357,grad_norm: 0.8208728445202342, iteration: 32968
loss: 1.0121493339538574,grad_norm: 0.9999997011150548, iteration: 32969
loss: 1.0059082508087158,grad_norm: 0.9366420786177212, iteration: 32970
loss: 1.0615092515945435,grad_norm: 0.9999997537975561, iteration: 32971
loss: 1.0260096788406372,grad_norm: 0.9825281055339695, iteration: 32972
loss: 0.9792260527610779,grad_norm: 0.9396329337409371, iteration: 32973
loss: 1.0321645736694336,grad_norm: 0.9999993334919075, iteration: 32974
loss: 1.0258219242095947,grad_norm: 0.8933409684160344, iteration: 32975
loss: 1.0670561790466309,grad_norm: 0.9999994231450591, iteration: 32976
loss: 1.0397366285324097,grad_norm: 0.9999992196023016, iteration: 32977
loss: 1.0136303901672363,grad_norm: 0.9999994953384939, iteration: 32978
loss: 1.0160552263259888,grad_norm: 0.7887730229071217, iteration: 32979
loss: 0.9744879007339478,grad_norm: 0.9999991917001733, iteration: 32980
loss: 0.9637200236320496,grad_norm: 0.9833616491789706, iteration: 32981
loss: 1.0015268325805664,grad_norm: 0.9999995024522792, iteration: 32982
loss: 0.9785420298576355,grad_norm: 0.9999990668274841, iteration: 32983
loss: 1.0188204050064087,grad_norm: 0.9967440885752756, iteration: 32984
loss: 1.0286264419555664,grad_norm: 0.9999995709043626, iteration: 32985
loss: 1.0423274040222168,grad_norm: 0.9999994333442962, iteration: 32986
loss: 1.0489765405654907,grad_norm: 0.999999453158718, iteration: 32987
loss: 1.042055606842041,grad_norm: 0.9541440613174537, iteration: 32988
loss: 0.9870126843452454,grad_norm: 0.9886155484535828, iteration: 32989
loss: 0.9771050810813904,grad_norm: 0.865853517612834, iteration: 32990
loss: 1.0222018957138062,grad_norm: 0.9999989981601579, iteration: 32991
loss: 1.0195016860961914,grad_norm: 0.8611494039682699, iteration: 32992
loss: 0.9963571429252625,grad_norm: 0.9606864379085736, iteration: 32993
loss: 1.024865984916687,grad_norm: 0.9999990471791678, iteration: 32994
loss: 0.9973168969154358,grad_norm: 0.8655449004416895, iteration: 32995
loss: 1.0498336553573608,grad_norm: 0.9999997369546191, iteration: 32996
loss: 1.007904291152954,grad_norm: 0.9999992649964207, iteration: 32997
loss: 1.008497714996338,grad_norm: 0.9757347942500867, iteration: 32998
loss: 1.0671662092208862,grad_norm: 0.9999997637057735, iteration: 32999
loss: 0.9711373448371887,grad_norm: 0.9999989917632055, iteration: 33000
loss: 0.9887627959251404,grad_norm: 0.9582143640089866, iteration: 33001
loss: 1.0081331729888916,grad_norm: 0.9630731258605685, iteration: 33002
loss: 0.9964325428009033,grad_norm: 0.914810937739392, iteration: 33003
loss: 0.9971978068351746,grad_norm: 0.999999181675787, iteration: 33004
loss: 1.019425392150879,grad_norm: 0.8931614875120832, iteration: 33005
loss: 1.04533052444458,grad_norm: 0.9442562824444589, iteration: 33006
loss: 1.0718193054199219,grad_norm: 0.9999994083903617, iteration: 33007
loss: 1.0108706951141357,grad_norm: 0.9999999065231767, iteration: 33008
loss: 1.0048844814300537,grad_norm: 0.9999991117095076, iteration: 33009
loss: 1.0020238161087036,grad_norm: 0.9999990349407013, iteration: 33010
loss: 1.0223437547683716,grad_norm: 0.9830949202374825, iteration: 33011
loss: 1.0558568239212036,grad_norm: 0.9999995908246, iteration: 33012
loss: 1.0297365188598633,grad_norm: 0.9999992941641322, iteration: 33013
loss: 0.9729326963424683,grad_norm: 0.8929170907723281, iteration: 33014
loss: 0.9875531196594238,grad_norm: 0.9708717500854274, iteration: 33015
loss: 1.0157681703567505,grad_norm: 0.9999991224070338, iteration: 33016
loss: 1.006084680557251,grad_norm: 0.9999992124523099, iteration: 33017
loss: 1.0614763498306274,grad_norm: 0.9999992829903449, iteration: 33018
loss: 1.0062856674194336,grad_norm: 0.9724438208933099, iteration: 33019
loss: 0.983494758605957,grad_norm: 0.905335383829576, iteration: 33020
loss: 1.0205811262130737,grad_norm: 0.9885213883571284, iteration: 33021
loss: 1.0286672115325928,grad_norm: 0.9999993851616085, iteration: 33022
loss: 1.0910574197769165,grad_norm: 0.9999996015494899, iteration: 33023
loss: 0.984347403049469,grad_norm: 0.9999992384458115, iteration: 33024
loss: 1.0200568437576294,grad_norm: 0.9999990935375433, iteration: 33025
loss: 1.0107026100158691,grad_norm: 0.9999990422923367, iteration: 33026
loss: 1.0121990442276,grad_norm: 0.8722631949837538, iteration: 33027
loss: 1.1310664415359497,grad_norm: 0.9999999565491956, iteration: 33028
loss: 0.9906450510025024,grad_norm: 0.9999990246217716, iteration: 33029
loss: 1.0000771284103394,grad_norm: 0.9999990138047437, iteration: 33030
loss: 0.9968059062957764,grad_norm: 0.9999991263291702, iteration: 33031
loss: 1.0334835052490234,grad_norm: 0.8854341385676321, iteration: 33032
loss: 0.9970237016677856,grad_norm: 0.8685780022751847, iteration: 33033
loss: 0.9954199194908142,grad_norm: 0.782048654712328, iteration: 33034
loss: 0.9997882843017578,grad_norm: 0.848831372220721, iteration: 33035
loss: 1.0272351503372192,grad_norm: 0.9999999081895289, iteration: 33036
loss: 0.9983773231506348,grad_norm: 0.9999994703918224, iteration: 33037
loss: 1.0210998058319092,grad_norm: 0.9999994616699005, iteration: 33038
loss: 1.0182033777236938,grad_norm: 0.9999994991654825, iteration: 33039
loss: 1.0197551250457764,grad_norm: 0.9999993336093614, iteration: 33040
loss: 1.0153696537017822,grad_norm: 0.8175620929867105, iteration: 33041
loss: 1.018747329711914,grad_norm: 0.9455069179512361, iteration: 33042
loss: 1.0767008066177368,grad_norm: 0.9999995474727843, iteration: 33043
loss: 0.993462085723877,grad_norm: 0.8561999390068474, iteration: 33044
loss: 0.9917622804641724,grad_norm: 0.9999991220268549, iteration: 33045
loss: 1.0244204998016357,grad_norm: 0.9999997877880099, iteration: 33046
loss: 1.0112515687942505,grad_norm: 0.8103061058862414, iteration: 33047
loss: 0.9871270656585693,grad_norm: 0.9003379503703239, iteration: 33048
loss: 0.9856171011924744,grad_norm: 0.9289653572582122, iteration: 33049
loss: 1.0001847743988037,grad_norm: 0.9999993429107195, iteration: 33050
loss: 1.0110077857971191,grad_norm: 0.9012710181881316, iteration: 33051
loss: 1.020653486251831,grad_norm: 0.8498538422953027, iteration: 33052
loss: 0.9700920581817627,grad_norm: 0.9999999521157732, iteration: 33053
loss: 1.0278795957565308,grad_norm: 0.9999992979501293, iteration: 33054
loss: 1.017980933189392,grad_norm: 0.8814532680646436, iteration: 33055
loss: 1.0709577798843384,grad_norm: 0.9999998743376584, iteration: 33056
loss: 1.0166548490524292,grad_norm: 0.9669545380865003, iteration: 33057
loss: 0.9871296882629395,grad_norm: 0.9999991306061738, iteration: 33058
loss: 1.0187972784042358,grad_norm: 0.9999990040850767, iteration: 33059
loss: 1.0202791690826416,grad_norm: 0.9999991028513046, iteration: 33060
loss: 1.0174037218093872,grad_norm: 0.930685924662557, iteration: 33061
loss: 0.9583686590194702,grad_norm: 0.8601659605072004, iteration: 33062
loss: 0.9768925309181213,grad_norm: 0.9137004512254997, iteration: 33063
loss: 1.0309491157531738,grad_norm: 0.9999997987954381, iteration: 33064
loss: 1.0188075304031372,grad_norm: 0.9916570343354728, iteration: 33065
loss: 0.9927531480789185,grad_norm: 0.9999994124498728, iteration: 33066
loss: 1.0013302564620972,grad_norm: 0.999999175910837, iteration: 33067
loss: 1.0387299060821533,grad_norm: 0.9999994212068462, iteration: 33068
loss: 0.9757572412490845,grad_norm: 0.9999990053372173, iteration: 33069
loss: 0.9988477826118469,grad_norm: 0.9999991326154476, iteration: 33070
loss: 1.0428416728973389,grad_norm: 0.9999994600767605, iteration: 33071
loss: 1.02729070186615,grad_norm: 0.9491190770654474, iteration: 33072
loss: 1.0266687870025635,grad_norm: 0.9528560069055401, iteration: 33073
loss: 1.019243597984314,grad_norm: 0.9202311366171665, iteration: 33074
loss: 1.0236451625823975,grad_norm: 0.9999992057319896, iteration: 33075
loss: 1.0023491382598877,grad_norm: 0.9030594839501426, iteration: 33076
loss: 1.0513027906417847,grad_norm: 0.9999998792206465, iteration: 33077
loss: 1.0332759618759155,grad_norm: 0.9999995878671706, iteration: 33078
loss: 1.0051836967468262,grad_norm: 0.9999996046713775, iteration: 33079
loss: 0.9792599678039551,grad_norm: 0.9999993063135862, iteration: 33080
loss: 0.9989263415336609,grad_norm: 0.851467917318277, iteration: 33081
loss: 1.0654093027114868,grad_norm: 0.9999992967532185, iteration: 33082
loss: 1.0050169229507446,grad_norm: 0.9999991434142006, iteration: 33083
loss: 1.034362554550171,grad_norm: 0.9999991286087482, iteration: 33084
loss: 1.0133459568023682,grad_norm: 0.9999991342274799, iteration: 33085
loss: 0.9634984731674194,grad_norm: 0.9085285395220992, iteration: 33086
loss: 0.9966239333152771,grad_norm: 0.9999990875658593, iteration: 33087
loss: 1.0250301361083984,grad_norm: 0.9999991808107633, iteration: 33088
loss: 0.9880543947219849,grad_norm: 0.9999992505120684, iteration: 33089
loss: 1.012041687965393,grad_norm: 0.9359029341886026, iteration: 33090
loss: 1.0015597343444824,grad_norm: 0.9999990280033798, iteration: 33091
loss: 1.014603614807129,grad_norm: 0.9999991117275262, iteration: 33092
loss: 0.9756965041160583,grad_norm: 0.9084892503348139, iteration: 33093
loss: 1.0223307609558105,grad_norm: 0.9899359689989834, iteration: 33094
loss: 1.003325343132019,grad_norm: 0.9999990678225924, iteration: 33095
loss: 1.0614588260650635,grad_norm: 0.9999996373568593, iteration: 33096
loss: 1.034866213798523,grad_norm: 0.8156674614735141, iteration: 33097
loss: 0.9747191667556763,grad_norm: 0.951574084307494, iteration: 33098
loss: 1.0293924808502197,grad_norm: 0.9999999034835642, iteration: 33099
loss: 1.0192457437515259,grad_norm: 0.9999991338692883, iteration: 33100
loss: 1.043119192123413,grad_norm: 0.9999990617661316, iteration: 33101
loss: 1.0128793716430664,grad_norm: 0.9999997503997532, iteration: 33102
loss: 1.0357189178466797,grad_norm: 0.9999996116505211, iteration: 33103
loss: 1.0068607330322266,grad_norm: 0.9999996037331259, iteration: 33104
loss: 1.0190727710723877,grad_norm: 0.9999990596137495, iteration: 33105
loss: 0.9727580547332764,grad_norm: 0.9999989694470633, iteration: 33106
loss: 1.037616491317749,grad_norm: 0.9098646282612316, iteration: 33107
loss: 1.0050405263900757,grad_norm: 0.9521160775069734, iteration: 33108
loss: 0.9536216259002686,grad_norm: 0.8368415644525822, iteration: 33109
loss: 1.0806256532669067,grad_norm: 0.9999991481146768, iteration: 33110
loss: 1.0784761905670166,grad_norm: 0.9999998373486835, iteration: 33111
loss: 1.043014407157898,grad_norm: 0.9999994560321696, iteration: 33112
loss: 0.9738864898681641,grad_norm: 0.9999990465631317, iteration: 33113
loss: 1.0077821016311646,grad_norm: 0.9999997802751961, iteration: 33114
loss: 1.02103853225708,grad_norm: 0.9291940014264842, iteration: 33115
loss: 0.997283399105072,grad_norm: 0.9999990856162732, iteration: 33116
loss: 1.0420385599136353,grad_norm: 0.8326814004229786, iteration: 33117
loss: 0.9940541982650757,grad_norm: 0.9999994544294943, iteration: 33118
loss: 1.0125924348831177,grad_norm: 0.9349608917988618, iteration: 33119
loss: 1.0258151292800903,grad_norm: 0.9999995625596427, iteration: 33120
loss: 0.9726552367210388,grad_norm: 0.9415656858269547, iteration: 33121
loss: 1.0372271537780762,grad_norm: 0.9540441823089391, iteration: 33122
loss: 0.9838070869445801,grad_norm: 0.9999993964446096, iteration: 33123
loss: 0.9978904128074646,grad_norm: 0.9042946707340721, iteration: 33124
loss: 1.1463853120803833,grad_norm: 0.9999996093082517, iteration: 33125
loss: 1.0149356126785278,grad_norm: 0.9999990088178413, iteration: 33126
loss: 0.965910792350769,grad_norm: 0.9999989263346263, iteration: 33127
loss: 0.9980390071868896,grad_norm: 0.8178034800664001, iteration: 33128
loss: 1.050550103187561,grad_norm: 0.8497740786650134, iteration: 33129
loss: 1.0072245597839355,grad_norm: 0.8504986571574548, iteration: 33130
loss: 1.0729446411132812,grad_norm: 0.9999995883171026, iteration: 33131
loss: 1.0872384309768677,grad_norm: 0.99999917296534, iteration: 33132
loss: 1.0330603122711182,grad_norm: 0.8746122580410315, iteration: 33133
loss: 1.108253002166748,grad_norm: 0.9999996389469047, iteration: 33134
loss: 1.044059157371521,grad_norm: 0.8726562829174194, iteration: 33135
loss: 0.9732948541641235,grad_norm: 0.7891491093625365, iteration: 33136
loss: 0.9956453442573547,grad_norm: 0.9999994250691886, iteration: 33137
loss: 1.0278069972991943,grad_norm: 0.9999990557139663, iteration: 33138
loss: 0.98738694190979,grad_norm: 0.9999992493928125, iteration: 33139
loss: 1.0057477951049805,grad_norm: 0.9999993674434089, iteration: 33140
loss: 1.048464059829712,grad_norm: 0.9999994569052315, iteration: 33141
loss: 1.009541630744934,grad_norm: 0.9999993351444598, iteration: 33142
loss: 0.9920122623443604,grad_norm: 0.9426210598385026, iteration: 33143
loss: 1.0007407665252686,grad_norm: 0.9239538917977916, iteration: 33144
loss: 1.023714542388916,grad_norm: 0.9999994489659022, iteration: 33145
loss: 1.0036696195602417,grad_norm: 0.9999992805069421, iteration: 33146
loss: 0.99009108543396,grad_norm: 0.8259819080020663, iteration: 33147
loss: 1.0256808996200562,grad_norm: 0.9980829847833921, iteration: 33148
loss: 1.0545445680618286,grad_norm: 0.9999995922903459, iteration: 33149
loss: 0.9952297806739807,grad_norm: 0.9539204045828542, iteration: 33150
loss: 1.0830392837524414,grad_norm: 0.9999993460051554, iteration: 33151
loss: 1.0739631652832031,grad_norm: 0.9999994903495191, iteration: 33152
loss: 1.1070470809936523,grad_norm: 0.9999999639813956, iteration: 33153
loss: 1.0358916521072388,grad_norm: 0.9999990381903441, iteration: 33154
loss: 1.0733115673065186,grad_norm: 0.9999991602201361, iteration: 33155
loss: 0.9669774174690247,grad_norm: 0.9999995300284571, iteration: 33156
loss: 0.9760984182357788,grad_norm: 0.9999989417487618, iteration: 33157
loss: 1.0655736923217773,grad_norm: 0.9999991832898054, iteration: 33158
loss: 1.0205111503601074,grad_norm: 0.9999993616641611, iteration: 33159
loss: 1.0610548257827759,grad_norm: 0.9999993612485562, iteration: 33160
loss: 1.0478689670562744,grad_norm: 0.9999991853090193, iteration: 33161
loss: 0.9710829257965088,grad_norm: 0.9743863852449034, iteration: 33162
loss: 1.005050778388977,grad_norm: 0.8571972676448524, iteration: 33163
loss: 1.0163047313690186,grad_norm: 0.9999995167001653, iteration: 33164
loss: 1.0137101411819458,grad_norm: 0.7607843199894184, iteration: 33165
loss: 0.9743127822875977,grad_norm: 0.8500337794948184, iteration: 33166
loss: 1.1464751958847046,grad_norm: 0.9999997926774427, iteration: 33167
loss: 0.978721022605896,grad_norm: 0.9999991869582461, iteration: 33168
loss: 1.0346494913101196,grad_norm: 0.9999996029946238, iteration: 33169
loss: 1.1309401988983154,grad_norm: 0.9999993852119382, iteration: 33170
loss: 0.9865091443061829,grad_norm: 0.8813960200305326, iteration: 33171
loss: 1.0052659511566162,grad_norm: 0.9389767410507117, iteration: 33172
loss: 1.0115745067596436,grad_norm: 0.9999991010735537, iteration: 33173
loss: 1.0400843620300293,grad_norm: 0.9408465429714479, iteration: 33174
loss: 1.0244410037994385,grad_norm: 0.9999992727887207, iteration: 33175
loss: 1.0292481184005737,grad_norm: 0.9999993058180034, iteration: 33176
loss: 1.0389964580535889,grad_norm: 0.9644276970025891, iteration: 33177
loss: 1.0166233777999878,grad_norm: 0.999999355924824, iteration: 33178
loss: 1.0147464275360107,grad_norm: 0.9999991332463466, iteration: 33179
loss: 1.0118601322174072,grad_norm: 0.8379516113377954, iteration: 33180
loss: 1.005386233329773,grad_norm: 0.9999991193494181, iteration: 33181
loss: 0.9939583539962769,grad_norm: 0.9999991853389241, iteration: 33182
loss: 1.0234096050262451,grad_norm: 0.9999990762320415, iteration: 33183
loss: 1.0342448949813843,grad_norm: 0.9190579611496342, iteration: 33184
loss: 1.0113167762756348,grad_norm: 0.9999990736711335, iteration: 33185
loss: 1.018678069114685,grad_norm: 0.9220872871817795, iteration: 33186
loss: 1.0540814399719238,grad_norm: 0.9999996882873879, iteration: 33187
loss: 1.0096310377120972,grad_norm: 0.9999990827603528, iteration: 33188
loss: 1.028632640838623,grad_norm: 0.999999334647804, iteration: 33189
loss: 0.9814894199371338,grad_norm: 0.9655636531416945, iteration: 33190
loss: 1.0254671573638916,grad_norm: 0.9179380110757044, iteration: 33191
loss: 1.0394132137298584,grad_norm: 0.9010876674786301, iteration: 33192
loss: 0.9992219805717468,grad_norm: 0.9999992600168245, iteration: 33193
loss: 0.9933892488479614,grad_norm: 0.9999991640028398, iteration: 33194
loss: 1.0189611911773682,grad_norm: 0.9999993936567604, iteration: 33195
loss: 1.0355117321014404,grad_norm: 0.9461498933632476, iteration: 33196
loss: 1.0497305393218994,grad_norm: 0.9999991010050422, iteration: 33197
loss: 1.0366626977920532,grad_norm: 0.9738540297837647, iteration: 33198
loss: 1.0063340663909912,grad_norm: 0.9999992990168834, iteration: 33199
loss: 1.0358669757843018,grad_norm: 0.8330833930951667, iteration: 33200
loss: 0.9985418915748596,grad_norm: 0.8809143689475268, iteration: 33201
loss: 1.0054080486297607,grad_norm: 0.9999991279275499, iteration: 33202
loss: 1.0399221181869507,grad_norm: 0.9999994920939823, iteration: 33203
loss: 1.049880862236023,grad_norm: 0.9325230084954998, iteration: 33204
loss: 1.033257007598877,grad_norm: 0.9897404630278293, iteration: 33205
loss: 1.032167673110962,grad_norm: 0.8669315644993927, iteration: 33206
loss: 1.0270332098007202,grad_norm: 0.9999991787359472, iteration: 33207
loss: 1.0784261226654053,grad_norm: 0.9999994741691239, iteration: 33208
loss: 1.0278236865997314,grad_norm: 0.999999197874876, iteration: 33209
loss: 0.9959650635719299,grad_norm: 0.9999990267145468, iteration: 33210
loss: 0.9880167841911316,grad_norm: 0.9999991764658107, iteration: 33211
loss: 0.9768465757369995,grad_norm: 0.9999990571116932, iteration: 33212
loss: 1.023368000984192,grad_norm: 0.9999991260368354, iteration: 33213
loss: 0.9702169299125671,grad_norm: 0.9999990912210434, iteration: 33214
loss: 1.0340094566345215,grad_norm: 0.8407106417284609, iteration: 33215
loss: 0.9926793575286865,grad_norm: 0.9999992730422984, iteration: 33216
loss: 1.0138531923294067,grad_norm: 0.9999990195392128, iteration: 33217
loss: 1.0067278146743774,grad_norm: 0.9702925298120241, iteration: 33218
loss: 1.025728702545166,grad_norm: 0.9907835099744999, iteration: 33219
loss: 0.9800429344177246,grad_norm: 0.999999376787079, iteration: 33220
loss: 0.9967998266220093,grad_norm: 0.9231207737921682, iteration: 33221
loss: 1.0110540390014648,grad_norm: 0.8879321258534897, iteration: 33222
loss: 1.0424658060073853,grad_norm: 0.999999233273896, iteration: 33223
loss: 0.9969836473464966,grad_norm: 0.9122967675096187, iteration: 33224
loss: 1.0367553234100342,grad_norm: 0.999999167405249, iteration: 33225
loss: 1.027970552444458,grad_norm: 0.8775011577762978, iteration: 33226
loss: 1.0060126781463623,grad_norm: 0.9765950679483661, iteration: 33227
loss: 0.9569329023361206,grad_norm: 0.9999991507713412, iteration: 33228
loss: 1.0259487628936768,grad_norm: 0.9999990644061054, iteration: 33229
loss: 0.996198296546936,grad_norm: 0.9999992208301735, iteration: 33230
loss: 1.0210148096084595,grad_norm: 0.999999058728844, iteration: 33231
loss: 1.0001884698867798,grad_norm: 0.9944158355267037, iteration: 33232
loss: 0.9980548620223999,grad_norm: 0.9924010965579532, iteration: 33233
loss: 1.159697413444519,grad_norm: 0.9999995198348122, iteration: 33234
loss: 1.0161542892456055,grad_norm: 0.9999997843822416, iteration: 33235
loss: 1.0143022537231445,grad_norm: 0.8589288274129205, iteration: 33236
loss: 0.992487370967865,grad_norm: 0.9999991796377121, iteration: 33237
loss: 1.0367060899734497,grad_norm: 0.9673883611538634, iteration: 33238
loss: 0.9948569536209106,grad_norm: 0.9999990455617214, iteration: 33239
loss: 1.019629716873169,grad_norm: 0.9840192316047827, iteration: 33240
loss: 0.9883515238761902,grad_norm: 0.9249564937215541, iteration: 33241
loss: 0.982721745967865,grad_norm: 0.9810670456935932, iteration: 33242
loss: 0.9897792935371399,grad_norm: 0.9642529223979265, iteration: 33243
loss: 0.9971609115600586,grad_norm: 0.9152696144385035, iteration: 33244
loss: 0.9825812578201294,grad_norm: 0.9999990638170044, iteration: 33245
loss: 1.021630048751831,grad_norm: 0.9999991032941676, iteration: 33246
loss: 1.0267422199249268,grad_norm: 0.8393878806466022, iteration: 33247
loss: 0.97467041015625,grad_norm: 0.9867740713981206, iteration: 33248
loss: 1.0447373390197754,grad_norm: 0.9999993714857975, iteration: 33249
loss: 1.028192400932312,grad_norm: 0.9999991270563567, iteration: 33250
loss: 1.0520364046096802,grad_norm: 0.9999995533399901, iteration: 33251
loss: 0.9474815130233765,grad_norm: 0.9638631263376942, iteration: 33252
loss: 1.0277570486068726,grad_norm: 0.9999994415498464, iteration: 33253
loss: 0.9897183775901794,grad_norm: 0.8976280322482927, iteration: 33254
loss: 0.9753982424736023,grad_norm: 0.9468038143401485, iteration: 33255
loss: 1.0040414333343506,grad_norm: 0.7770275970779742, iteration: 33256
loss: 1.0617414712905884,grad_norm: 0.9999997034750531, iteration: 33257
loss: 0.9890493750572205,grad_norm: 0.9999989821909911, iteration: 33258
loss: 0.99901282787323,grad_norm: 0.9718713872948349, iteration: 33259
loss: 1.0020278692245483,grad_norm: 0.9999995036227751, iteration: 33260
loss: 0.9708542227745056,grad_norm: 0.9999991046939855, iteration: 33261
loss: 0.9889577031135559,grad_norm: 0.9999989721447285, iteration: 33262
loss: 1.042370080947876,grad_norm: 0.9999993400504571, iteration: 33263
loss: 1.0116573572158813,grad_norm: 0.9118182426372201, iteration: 33264
loss: 1.0272042751312256,grad_norm: 0.8727548008506049, iteration: 33265
loss: 1.0752923488616943,grad_norm: 0.999999575025332, iteration: 33266
loss: 1.0044403076171875,grad_norm: 0.9325521406086773, iteration: 33267
loss: 1.0182605981826782,grad_norm: 0.914506310182489, iteration: 33268
loss: 1.0295634269714355,grad_norm: 0.9999991293480405, iteration: 33269
loss: 1.049514889717102,grad_norm: 0.967598310699179, iteration: 33270
loss: 0.9935810565948486,grad_norm: 0.9999990533600651, iteration: 33271
loss: 0.9734414219856262,grad_norm: 0.9235613303155172, iteration: 33272
loss: 1.0088390111923218,grad_norm: 0.843615321657604, iteration: 33273
loss: 0.9815311431884766,grad_norm: 0.9999991055431554, iteration: 33274
loss: 0.9807004332542419,grad_norm: 0.9454594455273816, iteration: 33275
loss: 0.9975396990776062,grad_norm: 0.9046473052761193, iteration: 33276
loss: 1.0010360479354858,grad_norm: 0.8337812504106867, iteration: 33277
loss: 1.023734211921692,grad_norm: 0.9865474272370373, iteration: 33278
loss: 1.0365310907363892,grad_norm: 0.9999995421762149, iteration: 33279
loss: 1.0794191360473633,grad_norm: 0.9999996908331239, iteration: 33280
loss: 1.0163066387176514,grad_norm: 0.9999990728680374, iteration: 33281
loss: 1.0007894039154053,grad_norm: 0.9285379040924998, iteration: 33282
loss: 0.9920743107795715,grad_norm: 0.8744299468258225, iteration: 33283
loss: 0.989289402961731,grad_norm: 0.9366080406225762, iteration: 33284
loss: 1.0269380807876587,grad_norm: 0.9748353606683543, iteration: 33285
loss: 1.0195752382278442,grad_norm: 0.922083012755546, iteration: 33286
loss: 0.9827068448066711,grad_norm: 0.8605854676285414, iteration: 33287
loss: 1.0194348096847534,grad_norm: 0.9999989718185568, iteration: 33288
loss: 1.0271027088165283,grad_norm: 0.9999991447246894, iteration: 33289
loss: 1.0366487503051758,grad_norm: 0.9999991635168979, iteration: 33290
loss: 0.9876237511634827,grad_norm: 0.846877291944613, iteration: 33291
loss: 1.0348318815231323,grad_norm: 0.9804959062256164, iteration: 33292
loss: 1.0136618614196777,grad_norm: 0.9999990890978193, iteration: 33293
loss: 0.9813674688339233,grad_norm: 0.9513141174121568, iteration: 33294
loss: 0.9998374581336975,grad_norm: 0.9999992742611565, iteration: 33295
loss: 1.0134015083312988,grad_norm: 0.8571174366844589, iteration: 33296
loss: 0.9998604655265808,grad_norm: 0.9622998184278359, iteration: 33297
loss: 1.0462158918380737,grad_norm: 0.952416700374643, iteration: 33298
loss: 0.9875973463058472,grad_norm: 0.9999991023839678, iteration: 33299
loss: 1.0087257623672485,grad_norm: 0.9999991090810975, iteration: 33300
loss: 0.9557909369468689,grad_norm: 0.9534922221979696, iteration: 33301
loss: 1.0425775051116943,grad_norm: 0.999999407815678, iteration: 33302
loss: 0.9915422201156616,grad_norm: 0.953513048788771, iteration: 33303
loss: 1.004647970199585,grad_norm: 0.9604898621545713, iteration: 33304
loss: 1.0064674615859985,grad_norm: 0.8246478462582169, iteration: 33305
loss: 1.0137380361557007,grad_norm: 0.8500249428125957, iteration: 33306
loss: 1.016724705696106,grad_norm: 0.8247769845222918, iteration: 33307
loss: 1.003341794013977,grad_norm: 0.8310020004138231, iteration: 33308
loss: 1.000345230102539,grad_norm: 0.9999990428049509, iteration: 33309
loss: 0.9763810634613037,grad_norm: 0.9766078049282657, iteration: 33310
loss: 0.9829193949699402,grad_norm: 0.8961359788508741, iteration: 33311
loss: 1.019049882888794,grad_norm: 0.9999990597726583, iteration: 33312
loss: 0.9500194787979126,grad_norm: 0.9013126420948371, iteration: 33313
loss: 0.9810168743133545,grad_norm: 0.9999993554112101, iteration: 33314
loss: 1.0481373071670532,grad_norm: 0.9999992031522595, iteration: 33315
loss: 0.988499104976654,grad_norm: 0.8274022289900698, iteration: 33316
loss: 0.9654721617698669,grad_norm: 0.9999991679438226, iteration: 33317
loss: 0.9661361575126648,grad_norm: 0.8609403361943012, iteration: 33318
loss: 0.9667401909828186,grad_norm: 0.9828032185735532, iteration: 33319
loss: 1.0365757942199707,grad_norm: 0.9999992705095918, iteration: 33320
loss: 1.0070760250091553,grad_norm: 0.9459017846839196, iteration: 33321
loss: 1.0557217597961426,grad_norm: 0.9999991870595093, iteration: 33322
loss: 0.990166187286377,grad_norm: 0.9122348200411066, iteration: 33323
loss: 1.0231564044952393,grad_norm: 0.841207324296163, iteration: 33324
loss: 1.034578800201416,grad_norm: 0.9559894699987811, iteration: 33325
loss: 0.9993376135826111,grad_norm: 0.8756115144484337, iteration: 33326
loss: 1.0589773654937744,grad_norm: 0.8876549634768943, iteration: 33327
loss: 1.0004420280456543,grad_norm: 0.9999990771285823, iteration: 33328
loss: 0.9941425323486328,grad_norm: 0.9129506553114055, iteration: 33329
loss: 0.996889591217041,grad_norm: 0.8424035661258557, iteration: 33330
loss: 0.9672935605049133,grad_norm: 0.9387495748699228, iteration: 33331
loss: 1.0127724409103394,grad_norm: 0.8633486840722995, iteration: 33332
loss: 1.0105836391448975,grad_norm: 0.9999990980752086, iteration: 33333
loss: 0.9742016792297363,grad_norm: 0.7998440998213805, iteration: 33334
loss: 1.0409010648727417,grad_norm: 0.9249577656816552, iteration: 33335
loss: 1.015116572380066,grad_norm: 0.9039295904449668, iteration: 33336
loss: 1.020024061203003,grad_norm: 0.8068057223953403, iteration: 33337
loss: 0.998898446559906,grad_norm: 0.9178611053693175, iteration: 33338
loss: 0.9748520255088806,grad_norm: 0.9999991389778297, iteration: 33339
loss: 1.0186738967895508,grad_norm: 0.9999992287184762, iteration: 33340
loss: 1.0194072723388672,grad_norm: 0.9999992148009071, iteration: 33341
loss: 1.0240782499313354,grad_norm: 0.9999991386457366, iteration: 33342
loss: 1.0303947925567627,grad_norm: 0.9339668883345962, iteration: 33343
loss: 0.9795037508010864,grad_norm: 0.972929783418718, iteration: 33344
loss: 1.003339171409607,grad_norm: 0.9999990226825716, iteration: 33345
loss: 0.9919390678405762,grad_norm: 0.9999991334407932, iteration: 33346
loss: 0.9966629147529602,grad_norm: 0.8413389449143057, iteration: 33347
loss: 1.0244718790054321,grad_norm: 0.9999991868488729, iteration: 33348
loss: 1.0103617906570435,grad_norm: 0.9233405176719196, iteration: 33349
loss: 0.9796023964881897,grad_norm: 0.8344170454687879, iteration: 33350
loss: 0.9836322665214539,grad_norm: 0.9999991351417709, iteration: 33351
loss: 1.0360065698623657,grad_norm: 0.9999993625670264, iteration: 33352
loss: 1.0104105472564697,grad_norm: 0.8024382915241366, iteration: 33353
loss: 1.0230121612548828,grad_norm: 0.9999991231671491, iteration: 33354
loss: 1.0392881631851196,grad_norm: 0.999999920380619, iteration: 33355
loss: 1.0436463356018066,grad_norm: 0.9547682967766001, iteration: 33356
loss: 1.0000485181808472,grad_norm: 0.9233793592097032, iteration: 33357
loss: 1.034956932067871,grad_norm: 0.9999991411275867, iteration: 33358
loss: 1.0482454299926758,grad_norm: 0.9999991197240384, iteration: 33359
loss: 0.9983912110328674,grad_norm: 0.8886645898459745, iteration: 33360
loss: 1.0047422647476196,grad_norm: 0.9999989880591634, iteration: 33361
loss: 1.0349253416061401,grad_norm: 0.956899172123374, iteration: 33362
loss: 0.9874453544616699,grad_norm: 0.939727068180722, iteration: 33363
loss: 1.014400601387024,grad_norm: 0.999999076672913, iteration: 33364
loss: 1.0393033027648926,grad_norm: 0.999999267734639, iteration: 33365
loss: 0.9945384860038757,grad_norm: 0.9285095042368636, iteration: 33366
loss: 1.0161710977554321,grad_norm: 0.8057636320779369, iteration: 33367
loss: 1.0376728773117065,grad_norm: 0.9999995660882744, iteration: 33368
loss: 1.002098798751831,grad_norm: 0.9646749400557089, iteration: 33369
loss: 0.9919058680534363,grad_norm: 0.8389350184785863, iteration: 33370
loss: 0.9944551587104797,grad_norm: 0.999999161008525, iteration: 33371
loss: 1.0536766052246094,grad_norm: 0.9999997875297666, iteration: 33372
loss: 1.0175786018371582,grad_norm: 0.7868476761539768, iteration: 33373
loss: 1.015060544013977,grad_norm: 0.9999996094026985, iteration: 33374
loss: 0.9878607392311096,grad_norm: 0.9795861915684184, iteration: 33375
loss: 0.9647883176803589,grad_norm: 0.9999991029875881, iteration: 33376
loss: 0.9831646680831909,grad_norm: 0.8661157009848737, iteration: 33377
loss: 0.9888610243797302,grad_norm: 0.9999994219739564, iteration: 33378
loss: 1.0095316171646118,grad_norm: 0.9999992427694858, iteration: 33379
loss: 1.0036598443984985,grad_norm: 0.8553329819954462, iteration: 33380
loss: 0.9797536730766296,grad_norm: 0.9999990784041065, iteration: 33381
loss: 1.04658842086792,grad_norm: 0.9999993111389666, iteration: 33382
loss: 1.0781077146530151,grad_norm: 0.9999991942202434, iteration: 33383
loss: 0.9932636618614197,grad_norm: 0.9999991521476912, iteration: 33384
loss: 0.995765209197998,grad_norm: 0.9448262756121802, iteration: 33385
loss: 0.9981159567832947,grad_norm: 0.968296223678926, iteration: 33386
loss: 1.00868821144104,grad_norm: 0.9999991342757966, iteration: 33387
loss: 1.0001603364944458,grad_norm: 0.9999990250384808, iteration: 33388
loss: 1.029861330986023,grad_norm: 0.9999995548945305, iteration: 33389
loss: 0.9726125001907349,grad_norm: 0.9999989841215904, iteration: 33390
loss: 0.9409080147743225,grad_norm: 0.9999990594024704, iteration: 33391
loss: 0.969102144241333,grad_norm: 0.847512110210761, iteration: 33392
loss: 0.9920818209648132,grad_norm: 0.9999990839635662, iteration: 33393
loss: 0.9970679879188538,grad_norm: 0.9999991432234904, iteration: 33394
loss: 0.9951779246330261,grad_norm: 0.9999990650771928, iteration: 33395
loss: 1.0124562978744507,grad_norm: 0.9257018731970583, iteration: 33396
loss: 0.9626284241676331,grad_norm: 0.9705620901712875, iteration: 33397
loss: 1.0093042850494385,grad_norm: 0.9965088289246521, iteration: 33398
loss: 0.992718517780304,grad_norm: 0.9399568117989735, iteration: 33399
loss: 0.9923084378242493,grad_norm: 0.986140286893084, iteration: 33400
loss: 1.0842808485031128,grad_norm: 0.9999992596736451, iteration: 33401
loss: 1.1059825420379639,grad_norm: 0.9999999054238584, iteration: 33402
loss: 0.9939762949943542,grad_norm: 0.9977838522018093, iteration: 33403
loss: 1.0270798206329346,grad_norm: 0.9614670799704998, iteration: 33404
loss: 1.0015193223953247,grad_norm: 0.9300471869380722, iteration: 33405
loss: 1.0317847728729248,grad_norm: 0.9999991262497182, iteration: 33406
loss: 0.9463616609573364,grad_norm: 0.9999991255119453, iteration: 33407
loss: 0.993634819984436,grad_norm: 0.9999991234916609, iteration: 33408
loss: 1.0325307846069336,grad_norm: 0.9999995112982337, iteration: 33409
loss: 0.9806610345840454,grad_norm: 0.7843281929055648, iteration: 33410
loss: 1.007415771484375,grad_norm: 0.9820754531741173, iteration: 33411
loss: 0.9744698405265808,grad_norm: 0.9892266594677611, iteration: 33412
loss: 1.0053761005401611,grad_norm: 0.9999991677338586, iteration: 33413
loss: 1.0042167901992798,grad_norm: 0.9807319302454862, iteration: 33414
loss: 1.010949969291687,grad_norm: 0.9999994887472293, iteration: 33415
loss: 0.9832956790924072,grad_norm: 0.9257287826821413, iteration: 33416
loss: 1.0092365741729736,grad_norm: 0.9999991104245521, iteration: 33417
loss: 1.0423829555511475,grad_norm: 0.9999990691282165, iteration: 33418
loss: 1.0271477699279785,grad_norm: 0.9999991370342076, iteration: 33419
loss: 1.0122451782226562,grad_norm: 0.9999992977635861, iteration: 33420
loss: 1.0283700227737427,grad_norm: 0.9999991153258219, iteration: 33421
loss: 1.0021129846572876,grad_norm: 0.9999991155274205, iteration: 33422
loss: 1.0190017223358154,grad_norm: 0.9257469326805573, iteration: 33423
loss: 1.009253740310669,grad_norm: 0.9999990413790827, iteration: 33424
loss: 1.1207175254821777,grad_norm: 0.9999992222573036, iteration: 33425
loss: 1.0089850425720215,grad_norm: 0.9917417981361963, iteration: 33426
loss: 0.9828023314476013,grad_norm: 0.9999993981907929, iteration: 33427
loss: 1.0140053033828735,grad_norm: 0.9999992022512351, iteration: 33428
loss: 0.9884584546089172,grad_norm: 0.8676883521411957, iteration: 33429
loss: 0.9939870238304138,grad_norm: 0.999999324626932, iteration: 33430
loss: 1.0048490762710571,grad_norm: 0.9999990893263005, iteration: 33431
loss: 0.9383446574211121,grad_norm: 0.9783042875743575, iteration: 33432
loss: 0.9545226097106934,grad_norm: 0.9357814535685695, iteration: 33433
loss: 1.0365804433822632,grad_norm: 0.9225939515551725, iteration: 33434
loss: 1.0365667343139648,grad_norm: 0.8843986308346942, iteration: 33435
loss: 1.0153424739837646,grad_norm: 0.8732239477152972, iteration: 33436
loss: 1.029983639717102,grad_norm: 0.9464146944734529, iteration: 33437
loss: 1.0139132738113403,grad_norm: 0.9999990997872844, iteration: 33438
loss: 0.9811931252479553,grad_norm: 0.9823040247688988, iteration: 33439
loss: 1.024484634399414,grad_norm: 0.9999991375512911, iteration: 33440
loss: 1.0067744255065918,grad_norm: 0.9999997888092104, iteration: 33441
loss: 0.9804222583770752,grad_norm: 0.924482874966683, iteration: 33442
loss: 1.0381237268447876,grad_norm: 0.9999992300450995, iteration: 33443
loss: 1.0106658935546875,grad_norm: 0.9760806254090768, iteration: 33444
loss: 0.9888221025466919,grad_norm: 0.9032779526849443, iteration: 33445
loss: 1.064585566520691,grad_norm: 0.999999059159849, iteration: 33446
loss: 0.988109827041626,grad_norm: 0.9999991080021132, iteration: 33447
loss: 0.998028039932251,grad_norm: 0.9999990585495097, iteration: 33448
loss: 0.9855833053588867,grad_norm: 0.8654882541585449, iteration: 33449
loss: 1.0456775426864624,grad_norm: 0.999998974338678, iteration: 33450
loss: 1.0322757959365845,grad_norm: 0.9915582412650463, iteration: 33451
loss: 1.0118415355682373,grad_norm: 0.9999993496526254, iteration: 33452
loss: 1.0481654405593872,grad_norm: 0.9534915508966118, iteration: 33453
loss: 1.0340827703475952,grad_norm: 0.8479586842969867, iteration: 33454
loss: 1.0455467700958252,grad_norm: 0.9999993149286047, iteration: 33455
loss: 1.0229774713516235,grad_norm: 0.8293945481888437, iteration: 33456
loss: 1.0307350158691406,grad_norm: 0.7737720850966133, iteration: 33457
loss: 0.9980778694152832,grad_norm: 0.9999990240013739, iteration: 33458
loss: 0.9737123847007751,grad_norm: 0.7848206394359136, iteration: 33459
loss: 0.9718904495239258,grad_norm: 0.9999991164348365, iteration: 33460
loss: 0.9754834771156311,grad_norm: 0.9899632931679481, iteration: 33461
loss: 1.0147475004196167,grad_norm: 0.9787490124483299, iteration: 33462
loss: 1.0028786659240723,grad_norm: 0.7409692451587125, iteration: 33463
loss: 1.0192492008209229,grad_norm: 0.9999990723345975, iteration: 33464
loss: 1.0265098810195923,grad_norm: 0.9999997085624507, iteration: 33465
loss: 0.9994422793388367,grad_norm: 0.9999995499022991, iteration: 33466
loss: 1.020271897315979,grad_norm: 0.9999990861877198, iteration: 33467
loss: 1.0168490409851074,grad_norm: 0.9831343085117386, iteration: 33468
loss: 1.0276743173599243,grad_norm: 0.9999990721029746, iteration: 33469
loss: 1.0387579202651978,grad_norm: 0.9999996618233126, iteration: 33470
loss: 1.035980224609375,grad_norm: 0.9999992312959349, iteration: 33471
loss: 0.9710530638694763,grad_norm: 0.816835733042807, iteration: 33472
loss: 1.02729332447052,grad_norm: 0.9999992120975094, iteration: 33473
loss: 1.0457308292388916,grad_norm: 0.9453893881713098, iteration: 33474
loss: 1.0059374570846558,grad_norm: 0.8157553692072794, iteration: 33475
loss: 1.0078835487365723,grad_norm: 0.9999993381525134, iteration: 33476
loss: 0.9907744526863098,grad_norm: 0.9999992052190159, iteration: 33477
loss: 0.9924740791320801,grad_norm: 0.9999994570146657, iteration: 33478
loss: 1.1074798107147217,grad_norm: 0.9999995625046172, iteration: 33479
loss: 1.1111665964126587,grad_norm: 0.9999995356789136, iteration: 33480
loss: 1.0202317237854004,grad_norm: 0.9999991031524204, iteration: 33481
loss: 0.9760154485702515,grad_norm: 0.9999991607365908, iteration: 33482
loss: 0.9861981272697449,grad_norm: 0.9970988220106753, iteration: 33483
loss: 0.9819437861442566,grad_norm: 0.9087728060032596, iteration: 33484
loss: 0.9657436609268188,grad_norm: 0.9999993183571231, iteration: 33485
loss: 1.030913233757019,grad_norm: 0.8284262019155424, iteration: 33486
loss: 1.0166256427764893,grad_norm: 0.9999993852396337, iteration: 33487
loss: 0.9997714161872864,grad_norm: 0.9833943812593272, iteration: 33488
loss: 0.9884447455406189,grad_norm: 0.8484417752326425, iteration: 33489
loss: 1.0410702228546143,grad_norm: 0.8582379870869488, iteration: 33490
loss: 1.0075739622116089,grad_norm: 0.9999990985676747, iteration: 33491
loss: 1.0127874612808228,grad_norm: 0.9999990916762281, iteration: 33492
loss: 1.0870797634124756,grad_norm: 0.99999982974549, iteration: 33493
loss: 1.0136598348617554,grad_norm: 0.9999994515478062, iteration: 33494
loss: 1.00479257106781,grad_norm: 0.9999990563328158, iteration: 33495
loss: 1.0030808448791504,grad_norm: 0.9999990417081658, iteration: 33496
loss: 1.0408201217651367,grad_norm: 0.9999997672960522, iteration: 33497
loss: 0.9810025691986084,grad_norm: 0.9343383512503209, iteration: 33498
loss: 1.0466315746307373,grad_norm: 0.9999995063280932, iteration: 33499
loss: 0.9939737319946289,grad_norm: 0.9999992515284778, iteration: 33500
loss: 1.0096185207366943,grad_norm: 0.9999993024227392, iteration: 33501
loss: 1.1132019758224487,grad_norm: 0.9999997825784045, iteration: 33502
loss: 1.0400267839431763,grad_norm: 0.9999997813136321, iteration: 33503
loss: 1.021788477897644,grad_norm: 0.9999994730090349, iteration: 33504
loss: 0.9980466961860657,grad_norm: 0.9178337356309773, iteration: 33505
loss: 1.092342495918274,grad_norm: 0.9999998578005976, iteration: 33506
loss: 1.009762167930603,grad_norm: 0.9999993721619905, iteration: 33507
loss: 0.9477756023406982,grad_norm: 0.9008845089619274, iteration: 33508
loss: 1.033843755722046,grad_norm: 0.999999574438239, iteration: 33509
loss: 0.9912402033805847,grad_norm: 0.9999991923888519, iteration: 33510
loss: 1.0019142627716064,grad_norm: 0.8857932720803307, iteration: 33511
loss: 1.008962631225586,grad_norm: 0.9999991315528969, iteration: 33512
loss: 0.9955406188964844,grad_norm: 0.739455611733133, iteration: 33513
loss: 0.9810755848884583,grad_norm: 0.9999993698774073, iteration: 33514
loss: 1.020617127418518,grad_norm: 0.9999992117173345, iteration: 33515
loss: 1.0001262426376343,grad_norm: 0.9999991044814054, iteration: 33516
loss: 0.9933422803878784,grad_norm: 0.9999993693847331, iteration: 33517
loss: 1.065355658531189,grad_norm: 0.9999997752264204, iteration: 33518
loss: 1.0178202390670776,grad_norm: 0.8525859788387513, iteration: 33519
loss: 1.026883602142334,grad_norm: 0.879769005199112, iteration: 33520
loss: 1.0521891117095947,grad_norm: 0.9999994980812308, iteration: 33521
loss: 1.0779047012329102,grad_norm: 0.9999998083550785, iteration: 33522
loss: 0.9774346351623535,grad_norm: 0.974193404007436, iteration: 33523
loss: 1.0139464139938354,grad_norm: 0.9999993812788692, iteration: 33524
loss: 1.0261198282241821,grad_norm: 0.7750149862811425, iteration: 33525
loss: 1.029613733291626,grad_norm: 0.9999992805218931, iteration: 33526
loss: 1.0100423097610474,grad_norm: 0.865075453777055, iteration: 33527
loss: 1.0355031490325928,grad_norm: 0.9999996926913414, iteration: 33528
loss: 1.0518481731414795,grad_norm: 0.999999601064918, iteration: 33529
loss: 1.0386946201324463,grad_norm: 0.9999997033300251, iteration: 33530
loss: 1.0369563102722168,grad_norm: 0.999999231186013, iteration: 33531
loss: 0.9853317141532898,grad_norm: 0.9999990583395894, iteration: 33532
loss: 1.0148085355758667,grad_norm: 0.9999990743269106, iteration: 33533
loss: 1.027424693107605,grad_norm: 0.8994313041307594, iteration: 33534
loss: 1.041387677192688,grad_norm: 0.99999933092748, iteration: 33535
loss: 1.19984769821167,grad_norm: 0.9999994111973709, iteration: 33536
loss: 0.9972971677780151,grad_norm: 0.9999993824304926, iteration: 33537
loss: 1.012237548828125,grad_norm: 0.9999990300504641, iteration: 33538
loss: 1.0128806829452515,grad_norm: 0.9999995426496209, iteration: 33539
loss: 1.0260356664657593,grad_norm: 0.9999992323005512, iteration: 33540
loss: 1.0140700340270996,grad_norm: 0.9999990477947089, iteration: 33541
loss: 1.019964337348938,grad_norm: 0.9239994656198429, iteration: 33542
loss: 1.0386483669281006,grad_norm: 0.9999995835409513, iteration: 33543
loss: 1.0408719778060913,grad_norm: 0.9999993540474752, iteration: 33544
loss: 1.0065027475357056,grad_norm: 0.9999993477872198, iteration: 33545
loss: 1.1332241296768188,grad_norm: 0.9999999038248479, iteration: 33546
loss: 1.0209431648254395,grad_norm: 0.9742911518522539, iteration: 33547
loss: 1.020694613456726,grad_norm: 0.9999992607475454, iteration: 33548
loss: 1.0161112546920776,grad_norm: 0.7970041073857775, iteration: 33549
loss: 1.0516539812088013,grad_norm: 0.9999992190803713, iteration: 33550
loss: 1.041233777999878,grad_norm: 0.9999999415525989, iteration: 33551
loss: 0.9844056367874146,grad_norm: 0.9999989901171735, iteration: 33552
loss: 1.0226763486862183,grad_norm: 0.9999995412480626, iteration: 33553
loss: 1.0243815183639526,grad_norm: 0.9999991958912177, iteration: 33554
loss: 1.0168201923370361,grad_norm: 0.999999343782888, iteration: 33555
loss: 1.0466136932373047,grad_norm: 0.9310250579944117, iteration: 33556
loss: 0.9715114831924438,grad_norm: 0.9999996690603264, iteration: 33557
loss: 1.0189297199249268,grad_norm: 0.8488320705037633, iteration: 33558
loss: 0.9888231158256531,grad_norm: 0.7348944105569346, iteration: 33559
loss: 1.0979008674621582,grad_norm: 1.0000000043480437, iteration: 33560
loss: 1.0490907430648804,grad_norm: 0.9999994728300765, iteration: 33561
loss: 0.9702471494674683,grad_norm: 0.8495005364327772, iteration: 33562
loss: 1.0235711336135864,grad_norm: 0.9999994091302055, iteration: 33563
loss: 1.0048211812973022,grad_norm: 0.9999992063911258, iteration: 33564
loss: 0.9929088354110718,grad_norm: 0.9142691641860721, iteration: 33565
loss: 0.9999109506607056,grad_norm: 0.9740245299888396, iteration: 33566
loss: 0.9943954944610596,grad_norm: 0.9999993405318678, iteration: 33567
loss: 1.0549116134643555,grad_norm: 0.9999999121840389, iteration: 33568
loss: 1.0029797554016113,grad_norm: 0.9999991155264212, iteration: 33569
loss: 0.9916129112243652,grad_norm: 0.999999888984526, iteration: 33570
loss: 1.045163869857788,grad_norm: 0.999999724467376, iteration: 33571
loss: 0.9819555282592773,grad_norm: 0.9247935218844574, iteration: 33572
loss: 1.0039258003234863,grad_norm: 0.9853653499977907, iteration: 33573
loss: 1.0658079385757446,grad_norm: 0.9999992680195537, iteration: 33574
loss: 1.0348557233810425,grad_norm: 0.8474768880407907, iteration: 33575
loss: 0.990117073059082,grad_norm: 0.9999995143888603, iteration: 33576
loss: 1.028407096862793,grad_norm: 0.9999991702685407, iteration: 33577
loss: 0.9832056164741516,grad_norm: 0.9999995401389717, iteration: 33578
loss: 1.0556021928787231,grad_norm: 0.9999994094037862, iteration: 33579
loss: 1.0549296140670776,grad_norm: 0.9999996443828948, iteration: 33580
loss: 1.0221540927886963,grad_norm: 0.9999992208012376, iteration: 33581
loss: 0.9832809567451477,grad_norm: 0.9999991533422865, iteration: 33582
loss: 1.0325385332107544,grad_norm: 0.9752619682817822, iteration: 33583
loss: 0.9909235835075378,grad_norm: 0.8380374727900549, iteration: 33584
loss: 1.0153409242630005,grad_norm: 0.9999994730376783, iteration: 33585
loss: 1.0941897630691528,grad_norm: 0.9999999073339287, iteration: 33586
loss: 0.9965733885765076,grad_norm: 0.8781503343707381, iteration: 33587
loss: 0.9882246255874634,grad_norm: 0.9999995965152497, iteration: 33588
loss: 0.9994249939918518,grad_norm: 0.9999989453016139, iteration: 33589
loss: 0.9934646487236023,grad_norm: 0.8957587380723471, iteration: 33590
loss: 1.021397352218628,grad_norm: 0.9999991373469609, iteration: 33591
loss: 1.0190457105636597,grad_norm: 0.9999990111690756, iteration: 33592
loss: 1.0433955192565918,grad_norm: 0.9999995582437815, iteration: 33593
loss: 1.0308293104171753,grad_norm: 0.9999991569737667, iteration: 33594
loss: 1.093315601348877,grad_norm: 0.9999999017244717, iteration: 33595
loss: 0.9986076951026917,grad_norm: 0.9999990142378764, iteration: 33596
loss: 1.0080279111862183,grad_norm: 0.999999582216519, iteration: 33597
loss: 0.976236879825592,grad_norm: 0.9999991081916825, iteration: 33598
loss: 0.9835777282714844,grad_norm: 0.9541155640373502, iteration: 33599
loss: 0.9695980548858643,grad_norm: 0.9999992319476231, iteration: 33600
loss: 1.0195178985595703,grad_norm: 0.910528325543911, iteration: 33601
loss: 1.0246400833129883,grad_norm: 0.9089120260900464, iteration: 33602
loss: 1.0196006298065186,grad_norm: 0.9999992523380254, iteration: 33603
loss: 1.0081462860107422,grad_norm: 0.9999991753117712, iteration: 33604
loss: 1.032222032546997,grad_norm: 0.9200888349358441, iteration: 33605
loss: 1.0330212116241455,grad_norm: 0.99999901743382, iteration: 33606
loss: 0.953605055809021,grad_norm: 0.9999991506700933, iteration: 33607
loss: 1.0757235288619995,grad_norm: 0.999999753204245, iteration: 33608
loss: 1.0158319473266602,grad_norm: 0.9573228006544421, iteration: 33609
loss: 1.036902666091919,grad_norm: 0.9999990203465258, iteration: 33610
loss: 1.0418294668197632,grad_norm: 0.999999059586169, iteration: 33611
loss: 1.0215492248535156,grad_norm: 0.8637523683900625, iteration: 33612
loss: 1.0445504188537598,grad_norm: 0.9999996185762228, iteration: 33613
loss: 0.9917068481445312,grad_norm: 0.8820367127153411, iteration: 33614
loss: 0.9781510233879089,grad_norm: 0.999999028469923, iteration: 33615
loss: 1.0433562994003296,grad_norm: 0.9999993289103286, iteration: 33616
loss: 1.0421377420425415,grad_norm: 0.9999991478836656, iteration: 33617
loss: 1.0204440355300903,grad_norm: 0.9999991317928075, iteration: 33618
loss: 0.9906957149505615,grad_norm: 0.936797683551094, iteration: 33619
loss: 1.03342866897583,grad_norm: 0.9999993121298024, iteration: 33620
loss: 1.0141700506210327,grad_norm: 0.9134337852117196, iteration: 33621
loss: 1.0345393419265747,grad_norm: 0.9999995604172389, iteration: 33622
loss: 1.0617927312850952,grad_norm: 0.9999996105208113, iteration: 33623
loss: 0.9913502335548401,grad_norm: 0.9023718531958492, iteration: 33624
loss: 0.9958794713020325,grad_norm: 0.9999990127762413, iteration: 33625
loss: 0.9905230402946472,grad_norm: 0.9940310877320493, iteration: 33626
loss: 1.0062001943588257,grad_norm: 0.9178969564365648, iteration: 33627
loss: 1.014385461807251,grad_norm: 0.921455937713714, iteration: 33628
loss: 0.9801324605941772,grad_norm: 0.9106234463964069, iteration: 33629
loss: 0.9949647784233093,grad_norm: 0.9872302480291241, iteration: 33630
loss: 1.07152259349823,grad_norm: 0.999999408687065, iteration: 33631
loss: 0.9880417585372925,grad_norm: 0.978148901584688, iteration: 33632
loss: 1.0517371892929077,grad_norm: 0.9167945247676305, iteration: 33633
loss: 1.0421559810638428,grad_norm: 0.9999991850954411, iteration: 33634
loss: 1.0093960762023926,grad_norm: 0.9999990458092702, iteration: 33635
loss: 1.032728672027588,grad_norm: 0.9999992062336128, iteration: 33636
loss: 1.0657891035079956,grad_norm: 0.9999990854430391, iteration: 33637
loss: 1.020362377166748,grad_norm: 0.9999991942978341, iteration: 33638
loss: 0.9881275296211243,grad_norm: 0.9814960586400705, iteration: 33639
loss: 0.9952776432037354,grad_norm: 0.7948276089986976, iteration: 33640
loss: 1.0449281930923462,grad_norm: 0.9999993740641282, iteration: 33641
loss: 0.9507616758346558,grad_norm: 0.918453346488285, iteration: 33642
loss: 0.9899625778198242,grad_norm: 0.9999990758733143, iteration: 33643
loss: 1.0119142532348633,grad_norm: 0.900990945049845, iteration: 33644
loss: 1.0671942234039307,grad_norm: 0.9999992930099428, iteration: 33645
loss: 1.0184049606323242,grad_norm: 0.999999170714148, iteration: 33646
loss: 0.9776834845542908,grad_norm: 0.9999991593563704, iteration: 33647
loss: 1.023181676864624,grad_norm: 0.9999991049619346, iteration: 33648
loss: 0.9912766218185425,grad_norm: 0.9073958382174354, iteration: 33649
loss: 0.9750717878341675,grad_norm: 0.9999990525043926, iteration: 33650
loss: 1.0031330585479736,grad_norm: 0.8293454041817716, iteration: 33651
loss: 1.0202914476394653,grad_norm: 0.9999990687907001, iteration: 33652
loss: 1.0345088243484497,grad_norm: 0.9213823095829109, iteration: 33653
loss: 1.0624382495880127,grad_norm: 0.9999990922743082, iteration: 33654
loss: 1.0096732378005981,grad_norm: 0.8222376722005551, iteration: 33655
loss: 1.0208936929702759,grad_norm: 0.9999991772836013, iteration: 33656
loss: 0.9865807890892029,grad_norm: 0.9999991512026963, iteration: 33657
loss: 0.9681703448295593,grad_norm: 0.909396828479243, iteration: 33658
loss: 0.981019139289856,grad_norm: 0.9952699835091775, iteration: 33659
loss: 0.9738783240318298,grad_norm: 0.9999991613222747, iteration: 33660
loss: 0.9938647747039795,grad_norm: 0.9129223769940805, iteration: 33661
loss: 1.0290995836257935,grad_norm: 0.999999188878399, iteration: 33662
loss: 1.0367100238800049,grad_norm: 0.9999990473667398, iteration: 33663
loss: 1.006316900253296,grad_norm: 0.9999998317962677, iteration: 33664
loss: 1.004392385482788,grad_norm: 0.792508692403242, iteration: 33665
loss: 1.0067214965820312,grad_norm: 0.7655318844545285, iteration: 33666
loss: 1.0144418478012085,grad_norm: 0.9403448630829873, iteration: 33667
loss: 0.9651269912719727,grad_norm: 0.9189468271420357, iteration: 33668
loss: 0.9887545108795166,grad_norm: 0.9999992361720164, iteration: 33669
loss: 1.0474035739898682,grad_norm: 0.9999992413994698, iteration: 33670
loss: 0.9975783228874207,grad_norm: 0.9539020296476127, iteration: 33671
loss: 0.990481972694397,grad_norm: 0.9999991693931303, iteration: 33672
loss: 0.9612941741943359,grad_norm: 0.8855121136107555, iteration: 33673
loss: 1.0198694467544556,grad_norm: 0.999999344904261, iteration: 33674
loss: 1.0245490074157715,grad_norm: 0.9003052196920995, iteration: 33675
loss: 1.0009833574295044,grad_norm: 0.7310502356663313, iteration: 33676
loss: 0.9819840788841248,grad_norm: 0.9999991725935953, iteration: 33677
loss: 1.0721931457519531,grad_norm: 0.9999996590766949, iteration: 33678
loss: 1.0242904424667358,grad_norm: 0.9999991315893639, iteration: 33679
loss: 0.9952642917633057,grad_norm: 0.9999998939268626, iteration: 33680
loss: 1.0186347961425781,grad_norm: 0.9999998869135753, iteration: 33681
loss: 1.029678225517273,grad_norm: 0.8484076742318906, iteration: 33682
loss: 1.0063145160675049,grad_norm: 0.9727410363537865, iteration: 33683
loss: 1.0463720560073853,grad_norm: 0.9925876591742097, iteration: 33684
loss: 0.9746280908584595,grad_norm: 0.9999990775788583, iteration: 33685
loss: 0.9894607663154602,grad_norm: 0.9999995222624878, iteration: 33686
loss: 1.0045826435089111,grad_norm: 0.9999990952962374, iteration: 33687
loss: 1.0195481777191162,grad_norm: 0.8388319776550146, iteration: 33688
loss: 0.9740949273109436,grad_norm: 0.9999993819712694, iteration: 33689
loss: 1.0016562938690186,grad_norm: 0.9872766817001417, iteration: 33690
loss: 1.030012845993042,grad_norm: 0.9999992553278374, iteration: 33691
loss: 0.9937490820884705,grad_norm: 0.9334890837896721, iteration: 33692
loss: 1.023724913597107,grad_norm: 0.994702423451056, iteration: 33693
loss: 1.0112314224243164,grad_norm: 0.9727942068697841, iteration: 33694
loss: 0.9876620173454285,grad_norm: 0.9089775925013467, iteration: 33695
loss: 0.998525857925415,grad_norm: 0.9999999276584243, iteration: 33696
loss: 1.0146543979644775,grad_norm: 0.9632831966137282, iteration: 33697
loss: 1.057966947555542,grad_norm: 0.9654580990756392, iteration: 33698
loss: 1.0096042156219482,grad_norm: 0.9999990571780101, iteration: 33699
loss: 0.9932947158813477,grad_norm: 0.9843536367218488, iteration: 33700
loss: 1.0402404069900513,grad_norm: 0.9999992802817196, iteration: 33701
loss: 0.989006519317627,grad_norm: 0.8907169275478655, iteration: 33702
loss: 1.0355604887008667,grad_norm: 0.8482043553911995, iteration: 33703
loss: 1.110768437385559,grad_norm: 0.9999992286968151, iteration: 33704
loss: 1.01744544506073,grad_norm: 0.9999998005254682, iteration: 33705
loss: 0.9707053303718567,grad_norm: 0.9737217855085635, iteration: 33706
loss: 1.022375226020813,grad_norm: 0.9606749583061321, iteration: 33707
loss: 0.9742532968521118,grad_norm: 0.9999990313662747, iteration: 33708
loss: 0.978287398815155,grad_norm: 0.9999991969850722, iteration: 33709
loss: 1.0015652179718018,grad_norm: 0.9999996353312431, iteration: 33710
loss: 1.0294328927993774,grad_norm: 0.9344541448412013, iteration: 33711
loss: 1.0175551176071167,grad_norm: 0.910947674148425, iteration: 33712
loss: 0.9972955584526062,grad_norm: 0.8585603995834916, iteration: 33713
loss: 1.0144250392913818,grad_norm: 0.9999997954187795, iteration: 33714
loss: 1.1911884546279907,grad_norm: 0.9999994855728085, iteration: 33715
loss: 1.038750171661377,grad_norm: 0.9999992299338649, iteration: 33716
loss: 1.0317450761795044,grad_norm: 0.9762098136847511, iteration: 33717
loss: 1.053091049194336,grad_norm: 0.9999995738163492, iteration: 33718
loss: 0.9718796610832214,grad_norm: 0.9212105812603918, iteration: 33719
loss: 0.9803637862205505,grad_norm: 0.9740318144272145, iteration: 33720
loss: 1.0420485734939575,grad_norm: 1.000000008165248, iteration: 33721
loss: 0.9942697286605835,grad_norm: 0.8778531134896723, iteration: 33722
loss: 1.164052128791809,grad_norm: 0.9999994921536195, iteration: 33723
loss: 1.2203789949417114,grad_norm: 0.9999998549368101, iteration: 33724
loss: 1.0044194459915161,grad_norm: 0.9999991653443521, iteration: 33725
loss: 1.0849478244781494,grad_norm: 0.9999992918519696, iteration: 33726
loss: 1.3474704027175903,grad_norm: 0.9999992181592672, iteration: 33727
loss: 1.1040362119674683,grad_norm: 0.9999995577173076, iteration: 33728
loss: 0.955207109451294,grad_norm: 0.9999992412235423, iteration: 33729
loss: 1.0736724138259888,grad_norm: 0.9999996896788913, iteration: 33730
loss: 1.0686659812927246,grad_norm: 0.9999993060158433, iteration: 33731
loss: 0.9832597374916077,grad_norm: 0.9610732614874471, iteration: 33732
loss: 1.1291662454605103,grad_norm: 0.9999996598879587, iteration: 33733
loss: 1.0292938947677612,grad_norm: 0.9999990761097521, iteration: 33734
loss: 1.1166024208068848,grad_norm: 0.999999345939165, iteration: 33735
loss: 1.3187918663024902,grad_norm: 0.9999994098691221, iteration: 33736
loss: 1.2493705749511719,grad_norm: 0.9999995481173043, iteration: 33737
loss: 1.1230942010879517,grad_norm: 0.9999993571276617, iteration: 33738
loss: 1.0507866144180298,grad_norm: 0.9999990809311423, iteration: 33739
loss: 1.6003628969192505,grad_norm: 0.9999997347385344, iteration: 33740
loss: 1.0908623933792114,grad_norm: 0.9999990968967633, iteration: 33741
loss: 1.3293416500091553,grad_norm: 0.999999550412949, iteration: 33742
loss: 1.2987905740737915,grad_norm: 0.9999999012789957, iteration: 33743
loss: 1.038615107536316,grad_norm: 0.9999993187915369, iteration: 33744
loss: 1.370430827140808,grad_norm: 0.9999994454336294, iteration: 33745
loss: 1.388690710067749,grad_norm: 0.999999889653412, iteration: 33746
loss: 1.0755659341812134,grad_norm: 0.9999998579573635, iteration: 33747
loss: 1.0282329320907593,grad_norm: 0.9999993825886111, iteration: 33748
loss: 1.1934840679168701,grad_norm: 0.9999999602273407, iteration: 33749
loss: 1.0173523426055908,grad_norm: 0.999999513874724, iteration: 33750
loss: 1.069176197052002,grad_norm: 0.9999991016322739, iteration: 33751
loss: 1.2184048891067505,grad_norm: 0.9999999372028079, iteration: 33752
loss: 1.0220075845718384,grad_norm: 0.9999994687935672, iteration: 33753
loss: 1.2518938779830933,grad_norm: 0.999999682450784, iteration: 33754
loss: 1.2013561725616455,grad_norm: 0.9999993923760424, iteration: 33755
loss: 1.6469792127609253,grad_norm: 0.9999998776725766, iteration: 33756
loss: 1.4076789617538452,grad_norm: 0.9999997843062352, iteration: 33757
loss: 1.1162141561508179,grad_norm: 0.9999990829379369, iteration: 33758
loss: 1.2397645711898804,grad_norm: 0.9999997276262004, iteration: 33759
loss: 1.2924429178237915,grad_norm: 0.9999997145056367, iteration: 33760
loss: 1.5038224458694458,grad_norm: 1.000000017678294, iteration: 33761
loss: 1.34994375705719,grad_norm: 0.9999999710413983, iteration: 33762
loss: 1.193771243095398,grad_norm: 0.9999995400484516, iteration: 33763
loss: 1.4312382936477661,grad_norm: 0.9999995061478518, iteration: 33764
loss: 1.379652738571167,grad_norm: 0.9999998696206118, iteration: 33765
loss: 1.3209176063537598,grad_norm: 1.000000027306729, iteration: 33766
loss: 1.4451571702957153,grad_norm: 0.9999997168418833, iteration: 33767
loss: 1.338027000427246,grad_norm: 0.9999996682952175, iteration: 33768
loss: 1.2350008487701416,grad_norm: 0.9999996408736553, iteration: 33769
loss: 1.149752140045166,grad_norm: 0.9999995308724957, iteration: 33770
loss: 1.3308638334274292,grad_norm: 1.0000000040075143, iteration: 33771
loss: 1.3833465576171875,grad_norm: 0.9999998088708855, iteration: 33772
loss: 1.1349948644638062,grad_norm: 0.9999998082658138, iteration: 33773
loss: 1.4460150003433228,grad_norm: 0.999999705828407, iteration: 33774
loss: 1.4042249917984009,grad_norm: 0.9999994363910862, iteration: 33775
loss: 1.2495845556259155,grad_norm: 0.9999995964514943, iteration: 33776
loss: 1.2334617376327515,grad_norm: 0.9999995963542646, iteration: 33777
loss: 1.222215175628662,grad_norm: 0.9999996451494071, iteration: 33778
loss: 1.3310226202011108,grad_norm: 0.9999993429542925, iteration: 33779
loss: 1.0481618642807007,grad_norm: 0.9999993770943001, iteration: 33780
loss: 1.2634979486465454,grad_norm: 0.9999997287640395, iteration: 33781
loss: 1.1371076107025146,grad_norm: 0.9999995976656934, iteration: 33782
loss: 1.2180536985397339,grad_norm: 0.9999997264649856, iteration: 33783
loss: 1.1851507425308228,grad_norm: 0.999999501858451, iteration: 33784
loss: 1.1219924688339233,grad_norm: 0.9808884928034265, iteration: 33785
loss: 1.0377944707870483,grad_norm: 0.9999990775399787, iteration: 33786
loss: 1.139527678489685,grad_norm: 0.99999920797137, iteration: 33787
loss: 1.0292086601257324,grad_norm: 0.9893209293843348, iteration: 33788
loss: 1.0158807039260864,grad_norm: 0.9462359426133108, iteration: 33789
loss: 0.9752991795539856,grad_norm: 0.9999991355685042, iteration: 33790
loss: 1.0466364622116089,grad_norm: 0.9999998636733651, iteration: 33791
loss: 1.345043659210205,grad_norm: 0.9999997600643484, iteration: 33792
loss: 1.0413860082626343,grad_norm: 0.869991035293342, iteration: 33793
loss: 1.126694679260254,grad_norm: 0.9999993352618766, iteration: 33794
loss: 1.246488094329834,grad_norm: 0.999999378086306, iteration: 33795
loss: 1.0342214107513428,grad_norm: 0.9999996377584812, iteration: 33796
loss: 1.098192572593689,grad_norm: 0.999999310883558, iteration: 33797
loss: 1.0975315570831299,grad_norm: 0.9999997461479191, iteration: 33798
loss: 1.0068135261535645,grad_norm: 0.9999991877405106, iteration: 33799
loss: 1.1663678884506226,grad_norm: 0.9999997069065831, iteration: 33800
loss: 1.0530903339385986,grad_norm: 0.9999998845466375, iteration: 33801
loss: 1.0025886297225952,grad_norm: 0.9999997743877147, iteration: 33802
loss: 1.1736769676208496,grad_norm: 0.9999991942553739, iteration: 33803
loss: 1.0501502752304077,grad_norm: 0.9999992096902703, iteration: 33804
loss: 0.9541053771972656,grad_norm: 0.9999992604067108, iteration: 33805
loss: 1.0399730205535889,grad_norm: 0.99999891185374, iteration: 33806
loss: 0.9693095088005066,grad_norm: 0.9999990969267368, iteration: 33807
loss: 1.0110082626342773,grad_norm: 0.9999996043559609, iteration: 33808
loss: 1.0551866292953491,grad_norm: 0.9999990986898101, iteration: 33809
loss: 1.0705968141555786,grad_norm: 0.9999992795670666, iteration: 33810
loss: 1.1997804641723633,grad_norm: 0.9999998222594243, iteration: 33811
loss: 1.033974528312683,grad_norm: 0.9999993964345055, iteration: 33812
loss: 1.0697718858718872,grad_norm: 0.9999995257661254, iteration: 33813
loss: 1.0352277755737305,grad_norm: 0.9999992405509257, iteration: 33814
loss: 1.0565440654754639,grad_norm: 0.7950580434352518, iteration: 33815
loss: 1.0191946029663086,grad_norm: 0.9999992589804862, iteration: 33816
loss: 1.0877939462661743,grad_norm: 0.9999990809460417, iteration: 33817
loss: 1.0096793174743652,grad_norm: 0.8178970572065171, iteration: 33818
loss: 1.0671039819717407,grad_norm: 0.9999998097558115, iteration: 33819
loss: 1.2296279668807983,grad_norm: 0.9999992130263907, iteration: 33820
loss: 1.0879502296447754,grad_norm: 0.9999991653407115, iteration: 33821
loss: 1.0591243505477905,grad_norm: 0.9999993586233485, iteration: 33822
loss: 0.9824758172035217,grad_norm: 0.9999990434924977, iteration: 33823
loss: 1.0132592916488647,grad_norm: 0.9134514429249271, iteration: 33824
loss: 1.0549193620681763,grad_norm: 0.9999991737891855, iteration: 33825
loss: 0.9973793029785156,grad_norm: 0.9195656687449474, iteration: 33826
loss: 0.9959079623222351,grad_norm: 0.9405438391640949, iteration: 33827
loss: 1.0339654684066772,grad_norm: 0.9999991644137044, iteration: 33828
loss: 0.9946893453598022,grad_norm: 0.9999990068776834, iteration: 33829
loss: 1.1598554849624634,grad_norm: 0.9999996769933529, iteration: 33830
loss: 0.9894291758537292,grad_norm: 0.9999991548509501, iteration: 33831
loss: 1.0056582689285278,grad_norm: 0.9999990742892303, iteration: 33832
loss: 1.0150337219238281,grad_norm: 0.9999993699744159, iteration: 33833
loss: 1.0648694038391113,grad_norm: 0.9999994874172424, iteration: 33834
loss: 1.066454291343689,grad_norm: 0.9999992160289705, iteration: 33835
loss: 1.045164704322815,grad_norm: 0.9999992698210822, iteration: 33836
loss: 1.3005800247192383,grad_norm: 0.9999993567436035, iteration: 33837
loss: 1.029177188873291,grad_norm: 0.8157320644125056, iteration: 33838
loss: 1.026477336883545,grad_norm: 0.9999989999749501, iteration: 33839
loss: 1.0117475986480713,grad_norm: 0.9348013978658756, iteration: 33840
loss: 1.1163707971572876,grad_norm: 0.9999998215407545, iteration: 33841
loss: 1.0000545978546143,grad_norm: 0.8335363246332497, iteration: 33842
loss: 1.02094304561615,grad_norm: 0.9999993600745222, iteration: 33843
loss: 1.0879212617874146,grad_norm: 0.99999931206716, iteration: 33844
loss: 0.9871999621391296,grad_norm: 0.9999994961976774, iteration: 33845
loss: 1.0280994176864624,grad_norm: 0.9999998110270285, iteration: 33846
loss: 0.9801559448242188,grad_norm: 0.9151700075287238, iteration: 33847
loss: 0.995880126953125,grad_norm: 0.999999196147509, iteration: 33848
loss: 1.019868016242981,grad_norm: 0.9999990839772559, iteration: 33849
loss: 1.0599236488342285,grad_norm: 0.9999994746867964, iteration: 33850
loss: 1.028406023979187,grad_norm: 0.757788910600385, iteration: 33851
loss: 0.9849773645401001,grad_norm: 0.9999995161039353, iteration: 33852
loss: 1.0045855045318604,grad_norm: 0.9999991073538826, iteration: 33853
loss: 0.9867640137672424,grad_norm: 0.9812190460277356, iteration: 33854
loss: 0.9714695811271667,grad_norm: 0.9999990580531632, iteration: 33855
loss: 1.1220430135726929,grad_norm: 0.9999996844567796, iteration: 33856
loss: 1.0235058069229126,grad_norm: 0.9999990975877006, iteration: 33857
loss: 1.0460213422775269,grad_norm: 0.9999999483346824, iteration: 33858
loss: 1.0821632146835327,grad_norm: 0.9999996663955912, iteration: 33859
loss: 1.0265724658966064,grad_norm: 0.7636884564122571, iteration: 33860
loss: 1.089921236038208,grad_norm: 0.9627589036139225, iteration: 33861
loss: 1.045884132385254,grad_norm: 0.9999999772156369, iteration: 33862
loss: 1.004271388053894,grad_norm: 0.9999991042206415, iteration: 33863
loss: 1.0034518241882324,grad_norm: 0.9999991783767427, iteration: 33864
loss: 1.0353095531463623,grad_norm: 0.9999995868406033, iteration: 33865
loss: 1.0481863021850586,grad_norm: 0.9999989325865003, iteration: 33866
loss: 1.0199236869812012,grad_norm: 0.9999991695063667, iteration: 33867
loss: 1.0264859199523926,grad_norm: 0.9999994970604027, iteration: 33868
loss: 1.0439156293869019,grad_norm: 0.9999989755691422, iteration: 33869
loss: 1.019421100616455,grad_norm: 0.9999995055828264, iteration: 33870
loss: 1.0146113634109497,grad_norm: 0.8604820897766203, iteration: 33871
loss: 1.0397145748138428,grad_norm: 0.9212834285077006, iteration: 33872
loss: 1.056519865989685,grad_norm: 0.9999993483138513, iteration: 33873
loss: 1.0219122171401978,grad_norm: 0.9999990850325117, iteration: 33874
loss: 1.0176087617874146,grad_norm: 0.9999993516595134, iteration: 33875
loss: 1.0451269149780273,grad_norm: 0.9999995297597338, iteration: 33876
loss: 0.9989256262779236,grad_norm: 0.9999993639668943, iteration: 33877
loss: 0.9836207628250122,grad_norm: 0.9381990311235479, iteration: 33878
loss: 1.1489765644073486,grad_norm: 0.9999993981214387, iteration: 33879
loss: 1.0574589967727661,grad_norm: 0.9999993273701729, iteration: 33880
loss: 0.996475100517273,grad_norm: 0.8561093093896964, iteration: 33881
loss: 0.9660464525222778,grad_norm: 0.9999994080306909, iteration: 33882
loss: 1.0221331119537354,grad_norm: 0.881879473379841, iteration: 33883
loss: 0.953722357749939,grad_norm: 0.9999991092963239, iteration: 33884
loss: 1.0651063919067383,grad_norm: 0.9949843469720713, iteration: 33885
loss: 1.0222854614257812,grad_norm: 0.9292240663206967, iteration: 33886
loss: 1.0454046726226807,grad_norm: 0.9999990153104896, iteration: 33887
loss: 1.0315759181976318,grad_norm: 1.0000000162149643, iteration: 33888
loss: 1.0045055150985718,grad_norm: 0.9999990684942618, iteration: 33889
loss: 0.9757121801376343,grad_norm: 0.9999991880148973, iteration: 33890
loss: 0.9845691323280334,grad_norm: 0.9999992520432813, iteration: 33891
loss: 1.0852161645889282,grad_norm: 0.9999994127642132, iteration: 33892
loss: 1.010472059249878,grad_norm: 0.9931574738311127, iteration: 33893
loss: 1.0374757051467896,grad_norm: 0.9999990680291381, iteration: 33894
loss: 1.0936354398727417,grad_norm: 0.9999992464179358, iteration: 33895
loss: 0.9790151119232178,grad_norm: 0.9260336036580388, iteration: 33896
loss: 1.0106275081634521,grad_norm: 0.9999993659424178, iteration: 33897
loss: 0.9862133264541626,grad_norm: 0.7356614589281117, iteration: 33898
loss: 1.0163207054138184,grad_norm: 0.9782178914111974, iteration: 33899
loss: 1.0337668657302856,grad_norm: 0.9999993155677166, iteration: 33900
loss: 1.0259616374969482,grad_norm: 0.9320425418564595, iteration: 33901
loss: 1.0204709768295288,grad_norm: 0.8199023784437103, iteration: 33902
loss: 0.9989080429077148,grad_norm: 0.9999992968303548, iteration: 33903
loss: 1.011491298675537,grad_norm: 0.9999991774841691, iteration: 33904
loss: 0.9721538424491882,grad_norm: 0.9338240300251415, iteration: 33905
loss: 1.0165213346481323,grad_norm: 0.9286746273658104, iteration: 33906
loss: 1.0360831022262573,grad_norm: 0.9999993591771861, iteration: 33907
loss: 1.0267211198806763,grad_norm: 0.9464918016428694, iteration: 33908
loss: 0.9776976108551025,grad_norm: 0.9999991181257165, iteration: 33909
loss: 1.0416972637176514,grad_norm: 0.9951578135240913, iteration: 33910
loss: 0.9412174224853516,grad_norm: 0.9707092754441673, iteration: 33911
loss: 1.0274578332901,grad_norm: 0.984414204439545, iteration: 33912
loss: 1.0113093852996826,grad_norm: 0.9999992971105889, iteration: 33913
loss: 1.029563307762146,grad_norm: 0.9999989810753678, iteration: 33914
loss: 1.1416987180709839,grad_norm: 0.9999995941835257, iteration: 33915
loss: 0.9832994937896729,grad_norm: 0.9999991126335563, iteration: 33916
loss: 1.0047636032104492,grad_norm: 0.9999996368668728, iteration: 33917
loss: 1.0098681449890137,grad_norm: 0.8887046642078471, iteration: 33918
loss: 0.9771320223808289,grad_norm: 0.8159132331340369, iteration: 33919
loss: 1.0127195119857788,grad_norm: 0.946222728572367, iteration: 33920
loss: 1.0259424448013306,grad_norm: 0.9040046375475217, iteration: 33921
loss: 1.0907193422317505,grad_norm: 0.9999992562075194, iteration: 33922
loss: 1.069621205329895,grad_norm: 0.9999990998811495, iteration: 33923
loss: 1.1138609647750854,grad_norm: 0.9999995046890228, iteration: 33924
loss: 1.0012046098709106,grad_norm: 0.9994117450933155, iteration: 33925
loss: 1.0585824251174927,grad_norm: 0.9999998438127431, iteration: 33926
loss: 1.0780012607574463,grad_norm: 0.9999991282011883, iteration: 33927
loss: 1.0019335746765137,grad_norm: 0.8772342439391504, iteration: 33928
loss: 1.070146918296814,grad_norm: 0.9999993358258049, iteration: 33929
loss: 1.0049409866333008,grad_norm: 0.8803621755805116, iteration: 33930
loss: 1.021591305732727,grad_norm: 0.9999990199714592, iteration: 33931
loss: 0.960752546787262,grad_norm: 0.999999126164241, iteration: 33932
loss: 0.9721971750259399,grad_norm: 0.943035979208137, iteration: 33933
loss: 1.026362657546997,grad_norm: 0.9077857369567819, iteration: 33934
loss: 1.0011919736862183,grad_norm: 0.9999990510176867, iteration: 33935
loss: 1.0097663402557373,grad_norm: 0.999999234894543, iteration: 33936
loss: 1.0171774625778198,grad_norm: 0.9999997771317523, iteration: 33937
loss: 1.0493273735046387,grad_norm: 0.9999994422096858, iteration: 33938
loss: 1.0356520414352417,grad_norm: 0.9999991944751431, iteration: 33939
loss: 0.9638230204582214,grad_norm: 0.9999991520413434, iteration: 33940
loss: 1.0841008424758911,grad_norm: 0.9999997234524185, iteration: 33941
loss: 0.9854725003242493,grad_norm: 0.8936678335393056, iteration: 33942
loss: 1.0325231552124023,grad_norm: 0.9999992260913471, iteration: 33943
loss: 1.1101559400558472,grad_norm: 0.9999994764081337, iteration: 33944
loss: 1.0093631744384766,grad_norm: 0.9999992084966801, iteration: 33945
loss: 1.0272529125213623,grad_norm: 0.9999991154493185, iteration: 33946
loss: 1.0242843627929688,grad_norm: 0.9999996378893774, iteration: 33947
loss: 1.0101367235183716,grad_norm: 0.999999235670984, iteration: 33948
loss: 1.0609792470932007,grad_norm: 0.9999995060516073, iteration: 33949
loss: 0.9979113340377808,grad_norm: 0.9249478593604724, iteration: 33950
loss: 1.0299842357635498,grad_norm: 0.9999993264306294, iteration: 33951
loss: 1.0040345191955566,grad_norm: 0.9999990363406311, iteration: 33952
loss: 0.9682074189186096,grad_norm: 0.9999991152167593, iteration: 33953
loss: 1.0262151956558228,grad_norm: 0.9999996872406887, iteration: 33954
loss: 1.0195579528808594,grad_norm: 0.9999991036940192, iteration: 33955
loss: 1.011600375175476,grad_norm: 0.9999993490377251, iteration: 33956
loss: 1.0189727544784546,grad_norm: 0.999999099945958, iteration: 33957
loss: 1.0299251079559326,grad_norm: 0.9999996726363288, iteration: 33958
loss: 1.03072190284729,grad_norm: 0.9546970905533785, iteration: 33959
loss: 0.9707622528076172,grad_norm: 0.9999991641376943, iteration: 33960
loss: 1.0908803939819336,grad_norm: 0.9999993960153072, iteration: 33961
loss: 1.0077190399169922,grad_norm: 0.887585259721442, iteration: 33962
loss: 1.0242811441421509,grad_norm: 0.9999991011499454, iteration: 33963
loss: 1.0045912265777588,grad_norm: 0.8532874205158766, iteration: 33964
loss: 1.0043116807937622,grad_norm: 0.9999994705306892, iteration: 33965
loss: 1.0075697898864746,grad_norm: 0.9999990732290317, iteration: 33966
loss: 1.0591161251068115,grad_norm: 0.9940830732620907, iteration: 33967
loss: 1.0229058265686035,grad_norm: 0.9999992325244438, iteration: 33968
loss: 1.0556726455688477,grad_norm: 0.9999998424578181, iteration: 33969
loss: 1.0272971391677856,grad_norm: 0.8962451668659206, iteration: 33970
loss: 1.0365654230117798,grad_norm: 0.9727670162220984, iteration: 33971
loss: 1.0334185361862183,grad_norm: 0.9999991228060329, iteration: 33972
loss: 1.0013538599014282,grad_norm: 0.9999992004907656, iteration: 33973
loss: 0.964865505695343,grad_norm: 0.851277266179126, iteration: 33974
loss: 0.973596453666687,grad_norm: 0.9492112947591961, iteration: 33975
loss: 0.9970420598983765,grad_norm: 0.9999991544556391, iteration: 33976
loss: 1.01766037940979,grad_norm: 0.9399892505135616, iteration: 33977
loss: 1.0094319581985474,grad_norm: 0.9856649630353395, iteration: 33978
loss: 1.0183894634246826,grad_norm: 0.9999991114675543, iteration: 33979
loss: 1.0381240844726562,grad_norm: 0.9942890088152527, iteration: 33980
loss: 1.0212393999099731,grad_norm: 0.9999993156739791, iteration: 33981
loss: 1.0378469228744507,grad_norm: 0.9631157940451281, iteration: 33982
loss: 0.9839919209480286,grad_norm: 0.8557637592691043, iteration: 33983
loss: 0.9747437834739685,grad_norm: 0.9037933677120884, iteration: 33984
loss: 1.0036396980285645,grad_norm: 0.9454588094387432, iteration: 33985
loss: 0.9777095913887024,grad_norm: 0.9999992594351739, iteration: 33986
loss: 0.9922274947166443,grad_norm: 0.73779332530078, iteration: 33987
loss: 1.000443458557129,grad_norm: 0.9999989014927135, iteration: 33988
loss: 1.1228151321411133,grad_norm: 0.9639680237834035, iteration: 33989
loss: 1.0057748556137085,grad_norm: 0.999999832427721, iteration: 33990
loss: 1.0478302240371704,grad_norm: 0.9999990947645896, iteration: 33991
loss: 0.9900246262550354,grad_norm: 0.9999992802466844, iteration: 33992
loss: 1.0434784889221191,grad_norm: 0.9999995542644822, iteration: 33993
loss: 1.0026201009750366,grad_norm: 0.9999991205263515, iteration: 33994
loss: 1.0412683486938477,grad_norm: 0.9999993316399024, iteration: 33995
loss: 1.0356144905090332,grad_norm: 0.9979751072629426, iteration: 33996
loss: 1.0138458013534546,grad_norm: 0.9148704454795097, iteration: 33997
loss: 0.9782484769821167,grad_norm: 0.9999991464835604, iteration: 33998
loss: 1.0132218599319458,grad_norm: 0.9999990871989486, iteration: 33999
loss: 0.9845592379570007,grad_norm: 0.9619949925290227, iteration: 34000
loss: 1.0036531686782837,grad_norm: 0.9607638901922989, iteration: 34001
loss: 0.956386387348175,grad_norm: 0.9665471330907184, iteration: 34002
loss: 0.963099479675293,grad_norm: 0.8654813784211483, iteration: 34003
loss: 0.9884030818939209,grad_norm: 0.7857554599105578, iteration: 34004
loss: 1.0321069955825806,grad_norm: 0.9896433123298308, iteration: 34005
loss: 1.0551823377609253,grad_norm: 0.9999993359786423, iteration: 34006
loss: 1.0546562671661377,grad_norm: 0.9999991394042984, iteration: 34007
loss: 1.049635410308838,grad_norm: 0.9999996585640667, iteration: 34008
loss: 0.9942962527275085,grad_norm: 0.9999991091018078, iteration: 34009
loss: 1.0521138906478882,grad_norm: 0.9999994826713556, iteration: 34010
loss: 0.9853376746177673,grad_norm: 0.8717728669235113, iteration: 34011
loss: 0.973531186580658,grad_norm: 0.9310492694268455, iteration: 34012
loss: 1.0302813053131104,grad_norm: 0.9999996753299533, iteration: 34013
loss: 1.0162038803100586,grad_norm: 0.9999992028853025, iteration: 34014
loss: 1.0023103952407837,grad_norm: 0.999999087209054, iteration: 34015
loss: 1.0315498113632202,grad_norm: 0.9999992296002337, iteration: 34016
loss: 0.9901041984558105,grad_norm: 0.865766481143698, iteration: 34017
loss: 1.0544365644454956,grad_norm: 0.9741594022784535, iteration: 34018
loss: 1.080276608467102,grad_norm: 0.9999997493674475, iteration: 34019
loss: 1.0417861938476562,grad_norm: 0.999998977102407, iteration: 34020
loss: 1.082957148551941,grad_norm: 0.9999990819090346, iteration: 34021
loss: 1.0439609289169312,grad_norm: 0.9075947326672021, iteration: 34022
loss: 1.1012992858886719,grad_norm: 0.9999996839286659, iteration: 34023
loss: 0.9586615562438965,grad_norm: 0.9309492559054975, iteration: 34024
loss: 0.9939410090446472,grad_norm: 0.9520334952157347, iteration: 34025
loss: 0.9987688064575195,grad_norm: 0.9999992677953395, iteration: 34026
loss: 0.989100456237793,grad_norm: 0.9999995190641003, iteration: 34027
loss: 1.0972436666488647,grad_norm: 0.9999998435121151, iteration: 34028
loss: 0.9987229108810425,grad_norm: 0.9779907699264682, iteration: 34029
loss: 1.0559086799621582,grad_norm: 0.9999997932736211, iteration: 34030
loss: 0.9895943999290466,grad_norm: 0.8571035641558953, iteration: 34031
loss: 1.0139553546905518,grad_norm: 0.99999917963652, iteration: 34032
loss: 0.9882696270942688,grad_norm: 0.8947273336461142, iteration: 34033
loss: 1.0113555192947388,grad_norm: 0.9999991339710574, iteration: 34034
loss: 0.9584551453590393,grad_norm: 0.9233707119890484, iteration: 34035
loss: 1.018139362335205,grad_norm: 0.9999990381042215, iteration: 34036
loss: 0.9983272552490234,grad_norm: 0.9999992363373026, iteration: 34037
loss: 0.9967591762542725,grad_norm: 0.9373836517911261, iteration: 34038
loss: 1.0573911666870117,grad_norm: 0.9999999210635226, iteration: 34039
loss: 0.9989007711410522,grad_norm: 0.7780644233289806, iteration: 34040
loss: 1.0389269590377808,grad_norm: 0.999999129141003, iteration: 34041
loss: 0.9951203465461731,grad_norm: 0.9725696340600432, iteration: 34042
loss: 0.9883454442024231,grad_norm: 0.9999992612627401, iteration: 34043
loss: 1.025624394416809,grad_norm: 0.9071664704489579, iteration: 34044
loss: 1.0280206203460693,grad_norm: 0.9477095031136383, iteration: 34045
loss: 1.0799990892410278,grad_norm: 0.9999990180161741, iteration: 34046
loss: 0.9852468371391296,grad_norm: 0.852298003168879, iteration: 34047
loss: 0.9833448529243469,grad_norm: 0.9403404880819255, iteration: 34048
loss: 1.0402324199676514,grad_norm: 0.9999990326752984, iteration: 34049
loss: 1.0015368461608887,grad_norm: 0.9999990590309974, iteration: 34050
loss: 1.0057495832443237,grad_norm: 0.9966488218387677, iteration: 34051
loss: 0.9826402068138123,grad_norm: 0.8195446413071528, iteration: 34052
loss: 0.9998558759689331,grad_norm: 0.9999991630102607, iteration: 34053
loss: 1.0256164073944092,grad_norm: 0.9595783336383041, iteration: 34054
loss: 1.0799808502197266,grad_norm: 0.9999990220963043, iteration: 34055
loss: 1.0380760431289673,grad_norm: 0.9999990748130069, iteration: 34056
loss: 0.9595515131950378,grad_norm: 0.9933877812414988, iteration: 34057
loss: 0.9856859445571899,grad_norm: 0.9999992575157677, iteration: 34058
loss: 1.0484436750411987,grad_norm: 0.9999992582313859, iteration: 34059
loss: 1.0284485816955566,grad_norm: 0.8939985182569291, iteration: 34060
loss: 1.024021029472351,grad_norm: 0.9923103039676174, iteration: 34061
loss: 1.0255900621414185,grad_norm: 0.9244201149974595, iteration: 34062
loss: 1.0487159490585327,grad_norm: 0.9999991424385508, iteration: 34063
loss: 1.0171648263931274,grad_norm: 0.8692808510647606, iteration: 34064
loss: 1.0244370698928833,grad_norm: 0.9999993331067921, iteration: 34065
loss: 0.9862056970596313,grad_norm: 0.9092232067273545, iteration: 34066
loss: 1.0027397871017456,grad_norm: 0.9999991863245888, iteration: 34067
loss: 1.0171716213226318,grad_norm: 0.9999991408546559, iteration: 34068
loss: 1.0501853227615356,grad_norm: 0.9999990742966314, iteration: 34069
loss: 1.0057796239852905,grad_norm: 0.9888175400562997, iteration: 34070
loss: 1.0009222030639648,grad_norm: 0.9999992404447321, iteration: 34071
loss: 0.9824907183647156,grad_norm: 0.9825925623537453, iteration: 34072
loss: 1.0121461153030396,grad_norm: 0.9999990404270175, iteration: 34073
loss: 1.0042898654937744,grad_norm: 0.9999990703864696, iteration: 34074
loss: 1.0462697744369507,grad_norm: 0.999999562133815, iteration: 34075
loss: 1.0237932205200195,grad_norm: 0.9999991034549762, iteration: 34076
loss: 1.0457324981689453,grad_norm: 0.9999993581534206, iteration: 34077
loss: 1.0234135389328003,grad_norm: 0.9999990874008048, iteration: 34078
loss: 0.9858688712120056,grad_norm: 0.7292554636148907, iteration: 34079
loss: 1.0054692029953003,grad_norm: 0.9667895236762933, iteration: 34080
loss: 1.0018671751022339,grad_norm: 0.9262197321918468, iteration: 34081
loss: 1.0097332000732422,grad_norm: 0.8937874092211812, iteration: 34082
loss: 0.9981575608253479,grad_norm: 0.9999989766869298, iteration: 34083
loss: 1.0003106594085693,grad_norm: 0.9999992816027277, iteration: 34084
loss: 0.9903164505958557,grad_norm: 0.9158149881400137, iteration: 34085
loss: 1.0510003566741943,grad_norm: 0.9999990294552684, iteration: 34086
loss: 1.0117368698120117,grad_norm: 0.9999991025280875, iteration: 34087
loss: 0.989416778087616,grad_norm: 0.9910562952823245, iteration: 34088
loss: 1.0470839738845825,grad_norm: 0.9999997590006692, iteration: 34089
loss: 1.0188632011413574,grad_norm: 0.9690701113112673, iteration: 34090
loss: 1.1080509424209595,grad_norm: 0.999999065972989, iteration: 34091
loss: 0.9965887665748596,grad_norm: 0.8441725909532906, iteration: 34092
loss: 1.002751350402832,grad_norm: 0.9581568743457399, iteration: 34093
loss: 1.0323041677474976,grad_norm: 0.8693937313329455, iteration: 34094
loss: 0.9644249677658081,grad_norm: 0.9999991974728064, iteration: 34095
loss: 1.0084384679794312,grad_norm: 0.9004738322062922, iteration: 34096
loss: 0.9944051504135132,grad_norm: 0.9999992039356721, iteration: 34097
loss: 0.9926391839981079,grad_norm: 0.9214470858002916, iteration: 34098
loss: 0.9719196557998657,grad_norm: 0.8824626320774062, iteration: 34099
loss: 1.0097367763519287,grad_norm: 0.9755156612216856, iteration: 34100
loss: 0.9747549295425415,grad_norm: 0.9999990410211798, iteration: 34101
loss: 0.9994378685951233,grad_norm: 0.9999994810644036, iteration: 34102
loss: 0.9964450001716614,grad_norm: 0.8385097677208919, iteration: 34103
loss: 1.0654984712600708,grad_norm: 0.9999994164727409, iteration: 34104
loss: 0.9939953088760376,grad_norm: 0.9999993647415035, iteration: 34105
loss: 1.0420608520507812,grad_norm: 0.9999990580803207, iteration: 34106
loss: 0.9983418583869934,grad_norm: 0.9999989701883283, iteration: 34107
loss: 1.0054261684417725,grad_norm: 0.8747165198991733, iteration: 34108
loss: 0.9841161966323853,grad_norm: 0.9999990693887171, iteration: 34109
loss: 0.9932574033737183,grad_norm: 0.9999992028136659, iteration: 34110
loss: 1.0519630908966064,grad_norm: 0.9999996475810397, iteration: 34111
loss: 0.9950502514839172,grad_norm: 0.9955508000346694, iteration: 34112
loss: 1.0201811790466309,grad_norm: 0.8521232610426617, iteration: 34113
loss: 1.0528032779693604,grad_norm: 0.9999994436321631, iteration: 34114
loss: 1.0416088104248047,grad_norm: 0.999999161348344, iteration: 34115
loss: 0.9646018743515015,grad_norm: 0.9999990450228022, iteration: 34116
loss: 1.0245139598846436,grad_norm: 0.9999992359349806, iteration: 34117
loss: 1.0064027309417725,grad_norm: 0.999999440687684, iteration: 34118
loss: 1.042041301727295,grad_norm: 0.9999991167422759, iteration: 34119
loss: 0.9681339859962463,grad_norm: 0.8635117745181615, iteration: 34120
loss: 0.9988390803337097,grad_norm: 0.9999990340078131, iteration: 34121
loss: 1.0326157808303833,grad_norm: 0.8889313236976353, iteration: 34122
loss: 0.9900026321411133,grad_norm: 0.9316724338797107, iteration: 34123
loss: 0.9901880025863647,grad_norm: 0.90204158804854, iteration: 34124
loss: 1.0090131759643555,grad_norm: 0.9999989782992627, iteration: 34125
loss: 0.9755654335021973,grad_norm: 0.8399551076039087, iteration: 34126
loss: 1.0065650939941406,grad_norm: 0.9999994756534514, iteration: 34127
loss: 1.0100162029266357,grad_norm: 0.9999990454787003, iteration: 34128
loss: 1.0088987350463867,grad_norm: 0.9999991426023098, iteration: 34129
loss: 1.0796685218811035,grad_norm: 0.9999993795095315, iteration: 34130
loss: 0.9539485573768616,grad_norm: 0.9999992117916716, iteration: 34131
loss: 1.0272316932678223,grad_norm: 0.9160912214222778, iteration: 34132
loss: 1.0086135864257812,grad_norm: 0.9999992230970135, iteration: 34133
loss: 1.040919542312622,grad_norm: 0.9999991921473319, iteration: 34134
loss: 1.0605132579803467,grad_norm: 0.9999996166346441, iteration: 34135
loss: 0.9989760518074036,grad_norm: 0.9865921420955023, iteration: 34136
loss: 0.9984754323959351,grad_norm: 0.99999902474315, iteration: 34137
loss: 1.0593886375427246,grad_norm: 0.9999993565166422, iteration: 34138
loss: 0.9867849349975586,grad_norm: 0.9999993727527016, iteration: 34139
loss: 0.9881382584571838,grad_norm: 0.999999429951817, iteration: 34140
loss: 1.0437787771224976,grad_norm: 0.9999993181786387, iteration: 34141
loss: 0.9915614724159241,grad_norm: 0.9999998925261493, iteration: 34142
loss: 1.0437923669815063,grad_norm: 0.9999993662918611, iteration: 34143
loss: 1.056962490081787,grad_norm: 0.9999995661662849, iteration: 34144
loss: 1.0382591485977173,grad_norm: 0.9999996466731913, iteration: 34145
loss: 0.9567599892616272,grad_norm: 0.9999993092036439, iteration: 34146
loss: 1.0732777118682861,grad_norm: 0.9086595312209921, iteration: 34147
loss: 1.0010840892791748,grad_norm: 0.9999993061071286, iteration: 34148
loss: 1.0088247060775757,grad_norm: 0.9999995352938084, iteration: 34149
loss: 1.0135307312011719,grad_norm: 0.9999991946343079, iteration: 34150
loss: 1.0062271356582642,grad_norm: 0.9877877104020163, iteration: 34151
loss: 0.9634935259819031,grad_norm: 0.8586669686089823, iteration: 34152
loss: 1.029502272605896,grad_norm: 0.979473280646495, iteration: 34153
loss: 1.0122764110565186,grad_norm: 0.9999993323446646, iteration: 34154
loss: 0.9956027269363403,grad_norm: 0.9999990332855127, iteration: 34155
loss: 1.0314687490463257,grad_norm: 0.9322821798778753, iteration: 34156
loss: 1.0037463903427124,grad_norm: 0.9999991172042952, iteration: 34157
loss: 0.9876568913459778,grad_norm: 0.9999995576495729, iteration: 34158
loss: 0.9667215943336487,grad_norm: 0.9906209807096645, iteration: 34159
loss: 1.0331615209579468,grad_norm: 0.9999992530449217, iteration: 34160
loss: 1.0085396766662598,grad_norm: 0.9999990172757084, iteration: 34161
loss: 0.9376262426376343,grad_norm: 0.999999221492833, iteration: 34162
loss: 1.0257512331008911,grad_norm: 0.9897563608020522, iteration: 34163
loss: 1.0296388864517212,grad_norm: 0.9765187804426181, iteration: 34164
loss: 0.9711328744888306,grad_norm: 0.752396717872065, iteration: 34165
loss: 1.0353777408599854,grad_norm: 0.9740335839427974, iteration: 34166
loss: 0.9962114095687866,grad_norm: 0.8592199294503535, iteration: 34167
loss: 1.0008318424224854,grad_norm: 0.9631279657679228, iteration: 34168
loss: 1.0301307439804077,grad_norm: 0.9999994324320199, iteration: 34169
loss: 0.9739622473716736,grad_norm: 0.9999990048163756, iteration: 34170
loss: 1.071101427078247,grad_norm: 0.9668883527419903, iteration: 34171
loss: 0.9980204701423645,grad_norm: 0.8599001699285693, iteration: 34172
loss: 1.042962908744812,grad_norm: 0.9999992973894335, iteration: 34173
loss: 1.0275402069091797,grad_norm: 0.9719449091015041, iteration: 34174
loss: 0.9994553923606873,grad_norm: 0.9999990411215867, iteration: 34175
loss: 0.9809073209762573,grad_norm: 0.9999992279860981, iteration: 34176
loss: 0.9679446816444397,grad_norm: 0.859502934076276, iteration: 34177
loss: 0.9938051700592041,grad_norm: 0.9999991259609747, iteration: 34178
loss: 0.9983066320419312,grad_norm: 0.9999990507309026, iteration: 34179
loss: 0.9723838567733765,grad_norm: 0.9999990946526777, iteration: 34180
loss: 1.0303236246109009,grad_norm: 0.9999990543649859, iteration: 34181
loss: 0.9878419637680054,grad_norm: 0.927246992673988, iteration: 34182
loss: 1.1280397176742554,grad_norm: 0.9999994674514773, iteration: 34183
loss: 0.971824049949646,grad_norm: 0.9999991292532917, iteration: 34184
loss: 0.9984813332557678,grad_norm: 0.9999991544051426, iteration: 34185
loss: 1.0172927379608154,grad_norm: 0.9999992615333385, iteration: 34186
loss: 0.9875722527503967,grad_norm: 0.8697261130955829, iteration: 34187
loss: 1.0693553686141968,grad_norm: 0.9999997969035592, iteration: 34188
loss: 1.0174111127853394,grad_norm: 0.927600207511376, iteration: 34189
loss: 0.9492080807685852,grad_norm: 0.9999991598391719, iteration: 34190
loss: 1.0118931531906128,grad_norm: 0.9999991658541614, iteration: 34191
loss: 1.016121506690979,grad_norm: 0.9999991257589538, iteration: 34192
loss: 1.0696117877960205,grad_norm: 0.9999995556715835, iteration: 34193
loss: 1.0012013912200928,grad_norm: 0.9999991421699234, iteration: 34194
loss: 1.018836259841919,grad_norm: 0.9999992247502101, iteration: 34195
loss: 1.0470423698425293,grad_norm: 0.9999991940596665, iteration: 34196
loss: 1.0416932106018066,grad_norm: 0.936917427177989, iteration: 34197
loss: 1.0193567276000977,grad_norm: 0.9371288971754164, iteration: 34198
loss: 1.0006194114685059,grad_norm: 0.9947601904748086, iteration: 34199
loss: 1.009628415107727,grad_norm: 0.910062707708101, iteration: 34200
loss: 0.9736473560333252,grad_norm: 0.9926856285701652, iteration: 34201
loss: 1.0025001764297485,grad_norm: 0.9999992861356505, iteration: 34202
loss: 1.033321499824524,grad_norm: 0.9643530768109149, iteration: 34203
loss: 1.0140293836593628,grad_norm: 0.7850825390431485, iteration: 34204
loss: 0.9836001992225647,grad_norm: 0.9270981870625757, iteration: 34205
loss: 1.0131748914718628,grad_norm: 0.9999993578464176, iteration: 34206
loss: 0.9822104573249817,grad_norm: 0.9999990535541523, iteration: 34207
loss: 1.0585626363754272,grad_norm: 0.9915122230449614, iteration: 34208
loss: 0.9691575765609741,grad_norm: 0.976179346661774, iteration: 34209
loss: 1.003719449043274,grad_norm: 0.9272307178039835, iteration: 34210
loss: 1.0154191255569458,grad_norm: 0.9999991434528128, iteration: 34211
loss: 1.0569143295288086,grad_norm: 0.9999996668511976, iteration: 34212
loss: 1.0069934129714966,grad_norm: 0.9975363936146557, iteration: 34213
loss: 1.01050865650177,grad_norm: 0.9999527473236195, iteration: 34214
loss: 0.9470525979995728,grad_norm: 0.9136985456002786, iteration: 34215
loss: 0.9834834933280945,grad_norm: 0.9999992826310976, iteration: 34216
loss: 0.9658877849578857,grad_norm: 0.8845019516348808, iteration: 34217
loss: 0.9835612177848816,grad_norm: 0.9999991760025568, iteration: 34218
loss: 1.0335052013397217,grad_norm: 0.9999990828941121, iteration: 34219
loss: 0.9871291518211365,grad_norm: 0.8770965788441921, iteration: 34220
loss: 0.9829664826393127,grad_norm: 0.9999996261064743, iteration: 34221
loss: 1.0556739568710327,grad_norm: 0.9999993775717188, iteration: 34222
loss: 1.0272457599639893,grad_norm: 0.9999992081366237, iteration: 34223
loss: 0.9604718089103699,grad_norm: 0.9448124554855349, iteration: 34224
loss: 0.9994900226593018,grad_norm: 0.9999991813638419, iteration: 34225
loss: 1.0189614295959473,grad_norm: 0.9999990064089994, iteration: 34226
loss: 1.0874998569488525,grad_norm: 0.9999994549344126, iteration: 34227
loss: 1.0516809225082397,grad_norm: 0.9999993185302252, iteration: 34228
loss: 1.030706524848938,grad_norm: 0.8091219471346969, iteration: 34229
loss: 1.003334641456604,grad_norm: 0.9999991112641784, iteration: 34230
loss: 0.9909313917160034,grad_norm: 0.9600249252105105, iteration: 34231
loss: 1.0194571018218994,grad_norm: 0.9999991467683496, iteration: 34232
loss: 1.0271042585372925,grad_norm: 0.9999991502565344, iteration: 34233
loss: 0.962414562702179,grad_norm: 0.9468652231118253, iteration: 34234
loss: 0.993493378162384,grad_norm: 0.8360381401083872, iteration: 34235
loss: 0.9823740720748901,grad_norm: 0.9999991054100106, iteration: 34236
loss: 1.0967247486114502,grad_norm: 0.9865143480161626, iteration: 34237
loss: 0.9697970747947693,grad_norm: 0.8683351869152196, iteration: 34238
loss: 0.9926166534423828,grad_norm: 0.9172959191783305, iteration: 34239
loss: 1.048401117324829,grad_norm: 0.9999991356828724, iteration: 34240
loss: 0.9970747232437134,grad_norm: 0.9999992333906561, iteration: 34241
loss: 1.1285604238510132,grad_norm: 0.9999991020842163, iteration: 34242
loss: 0.9851751327514648,grad_norm: 0.9858607340439235, iteration: 34243
loss: 1.0486878156661987,grad_norm: 0.8992162099128401, iteration: 34244
loss: 1.0056654214859009,grad_norm: 0.9124896946702593, iteration: 34245
loss: 1.0231635570526123,grad_norm: 0.9999992169802294, iteration: 34246
loss: 1.0338610410690308,grad_norm: 0.8812833937053102, iteration: 34247
loss: 0.9834651350975037,grad_norm: 0.9757090147441738, iteration: 34248
loss: 1.021427869796753,grad_norm: 0.9999997509361223, iteration: 34249
loss: 1.0629119873046875,grad_norm: 0.9999990811373781, iteration: 34250
loss: 1.0087016820907593,grad_norm: 0.9717760949442347, iteration: 34251
loss: 1.044560194015503,grad_norm: 0.9999992705445547, iteration: 34252
loss: 1.0157771110534668,grad_norm: 0.9387090830047162, iteration: 34253
loss: 1.01890230178833,grad_norm: 0.9305539708970262, iteration: 34254
loss: 1.0458552837371826,grad_norm: 0.8740893024578603, iteration: 34255
loss: 1.0047004222869873,grad_norm: 0.999999246126372, iteration: 34256
loss: 1.0413310527801514,grad_norm: 0.904414393302712, iteration: 34257
loss: 1.0086164474487305,grad_norm: 0.8316093450679851, iteration: 34258
loss: 1.042681336402893,grad_norm: 0.9810363082632042, iteration: 34259
loss: 0.9908750653266907,grad_norm: 0.9999991454842543, iteration: 34260
loss: 1.0363739728927612,grad_norm: 0.9999992802832164, iteration: 34261
loss: 1.0211632251739502,grad_norm: 0.99999911480926, iteration: 34262
loss: 1.002812147140503,grad_norm: 0.9999992289588017, iteration: 34263
loss: 1.0448378324508667,grad_norm: 0.9999993796654774, iteration: 34264
loss: 1.0022372007369995,grad_norm: 0.8976417314332439, iteration: 34265
loss: 1.0953577756881714,grad_norm: 0.9999991574431223, iteration: 34266
loss: 0.9785915613174438,grad_norm: 0.9999991188880972, iteration: 34267
loss: 0.9974154233932495,grad_norm: 0.9999995654341199, iteration: 34268
loss: 1.0020134449005127,grad_norm: 0.9658934434866815, iteration: 34269
loss: 1.0259994268417358,grad_norm: 0.9999991582587552, iteration: 34270
loss: 1.027220606803894,grad_norm: 0.999999076478369, iteration: 34271
loss: 1.012344479560852,grad_norm: 0.9942570545837434, iteration: 34272
loss: 1.0265735387802124,grad_norm: 0.999999347921724, iteration: 34273
loss: 1.0168232917785645,grad_norm: 0.9947694219146032, iteration: 34274
loss: 1.0511971712112427,grad_norm: 0.9999995637949612, iteration: 34275
loss: 0.9995536804199219,grad_norm: 0.9268463053068552, iteration: 34276
loss: 0.9900328516960144,grad_norm: 0.8252749043671499, iteration: 34277
loss: 1.0541712045669556,grad_norm: 0.9999993789279137, iteration: 34278
loss: 0.9796249866485596,grad_norm: 0.9999990309191873, iteration: 34279
loss: 0.9789227247238159,grad_norm: 0.9006355184288825, iteration: 34280
loss: 0.9927727580070496,grad_norm: 0.9999991232467997, iteration: 34281
loss: 0.9968196749687195,grad_norm: 0.8076501534926259, iteration: 34282
loss: 0.995913028717041,grad_norm: 0.8610262952828788, iteration: 34283
loss: 1.020918369293213,grad_norm: 0.9999991675340307, iteration: 34284
loss: 1.031421422958374,grad_norm: 0.9999992628492362, iteration: 34285
loss: 0.985418975353241,grad_norm: 0.8281452263375717, iteration: 34286
loss: 1.0215831995010376,grad_norm: 0.9855224324317048, iteration: 34287
loss: 1.037467122077942,grad_norm: 0.9999993221903866, iteration: 34288
loss: 1.010798454284668,grad_norm: 0.8871276390575815, iteration: 34289
loss: 0.9773826003074646,grad_norm: 0.9999991254158614, iteration: 34290
loss: 1.0504361391067505,grad_norm: 0.9999990236436724, iteration: 34291
loss: 1.0156536102294922,grad_norm: 0.980701792843271, iteration: 34292
loss: 0.9683484435081482,grad_norm: 0.9999990899909784, iteration: 34293
loss: 0.9857893586158752,grad_norm: 0.9999991473660703, iteration: 34294
loss: 0.9975922703742981,grad_norm: 0.9841076632271575, iteration: 34295
loss: 1.0361772775650024,grad_norm: 0.9999997503804059, iteration: 34296
loss: 0.9651334881782532,grad_norm: 0.9999991912883517, iteration: 34297
loss: 0.993394136428833,grad_norm: 0.8932539311038183, iteration: 34298
loss: 1.0599185228347778,grad_norm: 0.9999992137846669, iteration: 34299
loss: 1.0032049417495728,grad_norm: 0.9999990037807392, iteration: 34300
loss: 1.0110269784927368,grad_norm: 0.9999993788843491, iteration: 34301
loss: 0.9887118339538574,grad_norm: 0.9999991227977241, iteration: 34302
loss: 1.0095781087875366,grad_norm: 0.8510982205946653, iteration: 34303
loss: 1.0148341655731201,grad_norm: 0.8874858510491493, iteration: 34304
loss: 1.0159623622894287,grad_norm: 0.9767471390984955, iteration: 34305
loss: 1.0361931324005127,grad_norm: 0.9822790900623353, iteration: 34306
loss: 1.004520297050476,grad_norm: 0.9999990337306068, iteration: 34307
loss: 1.0044329166412354,grad_norm: 0.9499859787326167, iteration: 34308
loss: 1.0167207717895508,grad_norm: 0.944617059074065, iteration: 34309
loss: 1.0429179668426514,grad_norm: 0.9999996304495372, iteration: 34310
loss: 0.998928964138031,grad_norm: 0.957548120628861, iteration: 34311
loss: 1.0579044818878174,grad_norm: 0.9999990913338216, iteration: 34312
loss: 0.9999969005584717,grad_norm: 0.9999991558336556, iteration: 34313
loss: 1.0159684419631958,grad_norm: 0.9999991372507891, iteration: 34314
loss: 1.0292105674743652,grad_norm: 0.9999990853994558, iteration: 34315
loss: 1.0400323867797852,grad_norm: 0.9945685171662997, iteration: 34316
loss: 0.9873397946357727,grad_norm: 0.9999990551587922, iteration: 34317
loss: 0.9816251397132874,grad_norm: 0.9568135742774494, iteration: 34318
loss: 1.0343730449676514,grad_norm: 0.9999994350179061, iteration: 34319
loss: 1.0019780397415161,grad_norm: 0.8611611195549054, iteration: 34320
loss: 0.980955183506012,grad_norm: 0.9073366096599268, iteration: 34321
loss: 1.0360956192016602,grad_norm: 0.8945987535934128, iteration: 34322
loss: 1.0409388542175293,grad_norm: 0.9117197748702837, iteration: 34323
loss: 1.0119292736053467,grad_norm: 0.9999991046750827, iteration: 34324
loss: 1.0124272108078003,grad_norm: 0.9999991054849939, iteration: 34325
loss: 1.000388503074646,grad_norm: 0.9999993739046643, iteration: 34326
loss: 0.9989436268806458,grad_norm: 0.985244834279123, iteration: 34327
loss: 1.0167559385299683,grad_norm: 0.9999990696778562, iteration: 34328
loss: 1.0097367763519287,grad_norm: 0.9558407409797418, iteration: 34329
loss: 0.9936795234680176,grad_norm: 0.8489902618118264, iteration: 34330
loss: 1.008532166481018,grad_norm: 0.9999990164098519, iteration: 34331
loss: 1.0173183679580688,grad_norm: 0.8242248818184159, iteration: 34332
loss: 1.0172913074493408,grad_norm: 0.9999989417257544, iteration: 34333
loss: 1.0481027364730835,grad_norm: 0.9999996986159396, iteration: 34334
loss: 1.0075818300247192,grad_norm: 0.9013025170866562, iteration: 34335
loss: 1.003497838973999,grad_norm: 0.9999990666348203, iteration: 34336
loss: 1.0029900074005127,grad_norm: 0.999999231647541, iteration: 34337
loss: 1.0362005233764648,grad_norm: 0.9393188276297004, iteration: 34338
loss: 1.0041066408157349,grad_norm: 0.9162992760145864, iteration: 34339
loss: 1.0323954820632935,grad_norm: 0.909675171016234, iteration: 34340
loss: 0.9909834861755371,grad_norm: 0.9999991620727718, iteration: 34341
loss: 1.0236096382141113,grad_norm: 0.9504946378437457, iteration: 34342
loss: 0.9794183373451233,grad_norm: 0.9999993365567366, iteration: 34343
loss: 1.0257899761199951,grad_norm: 0.9755376585143747, iteration: 34344
loss: 1.0274828672409058,grad_norm: 0.8144245269159525, iteration: 34345
loss: 0.9930921196937561,grad_norm: 0.8097062580696979, iteration: 34346
loss: 0.9930204153060913,grad_norm: 0.8959754313046849, iteration: 34347
loss: 1.073188066482544,grad_norm: 0.9999997159485651, iteration: 34348
loss: 0.9850711226463318,grad_norm: 0.9467116442685071, iteration: 34349
loss: 0.9851046800613403,grad_norm: 0.7976506655388559, iteration: 34350
loss: 1.0073931217193604,grad_norm: 0.9988551122851177, iteration: 34351
loss: 1.0369421243667603,grad_norm: 0.9999991817742445, iteration: 34352
loss: 1.0342615842819214,grad_norm: 0.9999991208924052, iteration: 34353
loss: 0.9986467957496643,grad_norm: 0.9899975550830138, iteration: 34354
loss: 1.0199239253997803,grad_norm: 0.9999991692304668, iteration: 34355
loss: 1.045593023300171,grad_norm: 0.9331304936568647, iteration: 34356
loss: 1.0273953676223755,grad_norm: 0.9999992867895082, iteration: 34357
loss: 1.0093356370925903,grad_norm: 0.9176484582514416, iteration: 34358
loss: 1.0375233888626099,grad_norm: 0.9999998125046069, iteration: 34359
loss: 1.0451722145080566,grad_norm: 0.9999992413682541, iteration: 34360
loss: 1.021318793296814,grad_norm: 0.9408040372123101, iteration: 34361
loss: 1.0174232721328735,grad_norm: 0.908564166437731, iteration: 34362
loss: 0.9411846399307251,grad_norm: 0.9644699621476905, iteration: 34363
loss: 1.0511491298675537,grad_norm: 0.8586001851181914, iteration: 34364
loss: 0.9655094146728516,grad_norm: 0.9999994185317458, iteration: 34365
loss: 1.0169777870178223,grad_norm: 0.9999990939372664, iteration: 34366
loss: 0.9928157329559326,grad_norm: 0.9822994893144866, iteration: 34367
loss: 1.0324593782424927,grad_norm: 0.9999991089019039, iteration: 34368
loss: 1.0521788597106934,grad_norm: 0.8562185366733355, iteration: 34369
loss: 0.9996283054351807,grad_norm: 0.9785778659365308, iteration: 34370
loss: 1.0118707418441772,grad_norm: 0.8943591927961988, iteration: 34371
loss: 1.0334486961364746,grad_norm: 0.9999992489295719, iteration: 34372
loss: 1.0071834325790405,grad_norm: 0.8814486213553197, iteration: 34373
loss: 1.0264335870742798,grad_norm: 0.9999992022139014, iteration: 34374
loss: 1.0139527320861816,grad_norm: 0.9999997922201147, iteration: 34375
loss: 1.013839602470398,grad_norm: 0.9999991562321358, iteration: 34376
loss: 0.980786144733429,grad_norm: 0.9282936802141447, iteration: 34377
loss: 0.985327959060669,grad_norm: 0.9999992772399457, iteration: 34378
loss: 0.9555153250694275,grad_norm: 0.999999162497753, iteration: 34379
loss: 0.9761404395103455,grad_norm: 0.9999990273908522, iteration: 34380
loss: 0.9957473874092102,grad_norm: 0.9690107696990545, iteration: 34381
loss: 0.9903642535209656,grad_norm: 0.9999991815114239, iteration: 34382
loss: 0.9974751472473145,grad_norm: 0.8321855859403345, iteration: 34383
loss: 1.0363383293151855,grad_norm: 0.9749872318962418, iteration: 34384
loss: 1.0152055025100708,grad_norm: 0.9338165721276988, iteration: 34385
loss: 0.9830854535102844,grad_norm: 0.9999992059803278, iteration: 34386
loss: 1.0131322145462036,grad_norm: 0.962706214703313, iteration: 34387
loss: 1.028458833694458,grad_norm: 0.9999997934144881, iteration: 34388
loss: 0.9982763528823853,grad_norm: 0.9999993659663217, iteration: 34389
loss: 1.0426455736160278,grad_norm: 0.8614922049187619, iteration: 34390
loss: 1.0190937519073486,grad_norm: 0.9040375228394607, iteration: 34391
loss: 0.991558313369751,grad_norm: 0.9177729465009156, iteration: 34392
loss: 1.1081562042236328,grad_norm: 0.9999996689945247, iteration: 34393
loss: 1.0044891834259033,grad_norm: 0.9144995660258286, iteration: 34394
loss: 0.9978341460227966,grad_norm: 0.8725994904256043, iteration: 34395
loss: 0.9974101781845093,grad_norm: 0.9402017768026577, iteration: 34396
loss: 1.0123004913330078,grad_norm: 0.7377684312915467, iteration: 34397
loss: 1.008662223815918,grad_norm: 0.807313948620527, iteration: 34398
loss: 0.9698176980018616,grad_norm: 0.9999990645589717, iteration: 34399
loss: 0.9856670498847961,grad_norm: 0.9066976174613335, iteration: 34400
loss: 0.968732476234436,grad_norm: 0.9222050539677873, iteration: 34401
loss: 0.9924418926239014,grad_norm: 0.7481741954002278, iteration: 34402
loss: 1.0098084211349487,grad_norm: 0.9569180128620837, iteration: 34403
loss: 0.9999834299087524,grad_norm: 0.959389244190866, iteration: 34404
loss: 0.9834447503089905,grad_norm: 0.9999993844989217, iteration: 34405
loss: 1.0181784629821777,grad_norm: 0.893917693174194, iteration: 34406
loss: 1.0492855310440063,grad_norm: 0.9019535971880297, iteration: 34407
loss: 1.009487509727478,grad_norm: 0.9542665000368253, iteration: 34408
loss: 0.9859402179718018,grad_norm: 0.9703484514827108, iteration: 34409
loss: 1.033585786819458,grad_norm: 0.9453253349923286, iteration: 34410
loss: 0.9866895079612732,grad_norm: 0.8746833113544069, iteration: 34411
loss: 0.9835373163223267,grad_norm: 0.99999914587583, iteration: 34412
loss: 1.0493875741958618,grad_norm: 0.9999991457098024, iteration: 34413
loss: 1.009891390800476,grad_norm: 0.8580600301698404, iteration: 34414
loss: 0.9774197340011597,grad_norm: 0.9999990465239558, iteration: 34415
loss: 1.0014877319335938,grad_norm: 0.9999995449667491, iteration: 34416
loss: 0.977234423160553,grad_norm: 0.9765411671645851, iteration: 34417
loss: 1.0146812200546265,grad_norm: 0.94562879143412, iteration: 34418
loss: 1.0243653059005737,grad_norm: 0.9999990575547354, iteration: 34419
loss: 1.0950807332992554,grad_norm: 0.999999901761077, iteration: 34420
loss: 1.0387862920761108,grad_norm: 0.9999996085885986, iteration: 34421
loss: 1.0710737705230713,grad_norm: 0.9999991937220082, iteration: 34422
loss: 0.9939061999320984,grad_norm: 0.9434596298024452, iteration: 34423
loss: 1.0088804960250854,grad_norm: 0.9999989922607034, iteration: 34424
loss: 1.0158469676971436,grad_norm: 0.9999991201699665, iteration: 34425
loss: 1.0446364879608154,grad_norm: 0.9999991869963807, iteration: 34426
loss: 0.9987831711769104,grad_norm: 0.9999991981130016, iteration: 34427
loss: 1.0125596523284912,grad_norm: 0.9999989966994296, iteration: 34428
loss: 1.0142203569412231,grad_norm: 0.9999995772597529, iteration: 34429
loss: 0.9904735088348389,grad_norm: 0.8662094025080838, iteration: 34430
loss: 1.0388871431350708,grad_norm: 0.9601448319441069, iteration: 34431
loss: 1.0245245695114136,grad_norm: 0.8768601673399631, iteration: 34432
loss: 1.0190812349319458,grad_norm: 0.8639055174513959, iteration: 34433
loss: 1.086059331893921,grad_norm: 0.9999993650469153, iteration: 34434
loss: 1.0141180753707886,grad_norm: 0.914599296455767, iteration: 34435
loss: 1.0400773286819458,grad_norm: 0.9999994105582414, iteration: 34436
loss: 1.0145469903945923,grad_norm: 0.999999105496394, iteration: 34437
loss: 1.0046570301055908,grad_norm: 0.8136611277303925, iteration: 34438
loss: 1.0237542390823364,grad_norm: 0.9999989599659636, iteration: 34439
loss: 1.0338534116744995,grad_norm: 0.993066064869416, iteration: 34440
loss: 1.0604490041732788,grad_norm: 0.9999997391494889, iteration: 34441
loss: 1.0240428447723389,grad_norm: 0.9999996315140957, iteration: 34442
loss: 1.0365303754806519,grad_norm: 0.9999994958896182, iteration: 34443
loss: 1.0609968900680542,grad_norm: 0.9999994108567221, iteration: 34444
loss: 0.977418839931488,grad_norm: 0.9452575882297142, iteration: 34445
loss: 1.0212093591690063,grad_norm: 0.8157294774333003, iteration: 34446
loss: 0.957754373550415,grad_norm: 0.8904145911677867, iteration: 34447
loss: 1.0156716108322144,grad_norm: 0.9032616620001411, iteration: 34448
loss: 1.0183314085006714,grad_norm: 0.9999990157286326, iteration: 34449
loss: 1.0205594301223755,grad_norm: 0.9999992191334883, iteration: 34450
loss: 0.9914746284484863,grad_norm: 0.9999991488988571, iteration: 34451
loss: 1.004369854927063,grad_norm: 0.8972819543058079, iteration: 34452
loss: 1.0710177421569824,grad_norm: 0.9999998190868065, iteration: 34453
loss: 1.0098172426223755,grad_norm: 0.9999990236970042, iteration: 34454
loss: 1.0156558752059937,grad_norm: 0.962558017164448, iteration: 34455
loss: 1.0485001802444458,grad_norm: 0.9994103043100355, iteration: 34456
loss: 0.9936497211456299,grad_norm: 0.9348933386540811, iteration: 34457
loss: 1.049851417541504,grad_norm: 0.9999997466423297, iteration: 34458
loss: 1.0857058763504028,grad_norm: 0.9999997392701706, iteration: 34459
loss: 0.9995510578155518,grad_norm: 0.999999176319791, iteration: 34460
loss: 1.006368637084961,grad_norm: 0.8713703504861653, iteration: 34461
loss: 0.9765406847000122,grad_norm: 0.999999178209457, iteration: 34462
loss: 0.9865128397941589,grad_norm: 0.9999989644530614, iteration: 34463
loss: 1.02009916305542,grad_norm: 0.7393927977497252, iteration: 34464
loss: 1.0205038785934448,grad_norm: 0.9999999793410531, iteration: 34465
loss: 1.0340055227279663,grad_norm: 0.999999394439455, iteration: 34466
loss: 1.010971188545227,grad_norm: 0.7408253222069444, iteration: 34467
loss: 0.9954829216003418,grad_norm: 0.9933573892321311, iteration: 34468
loss: 1.0916858911514282,grad_norm: 0.999999865394232, iteration: 34469
loss: 1.0033702850341797,grad_norm: 0.9316390531515957, iteration: 34470
loss: 0.9873545169830322,grad_norm: 0.8470619699161268, iteration: 34471
loss: 1.0055614709854126,grad_norm: 0.9847521455474394, iteration: 34472
loss: 0.9875286817550659,grad_norm: 0.9488770671356825, iteration: 34473
loss: 1.030375599861145,grad_norm: 0.9999996146902234, iteration: 34474
loss: 1.0035468339920044,grad_norm: 0.8239318234998229, iteration: 34475
loss: 0.965209424495697,grad_norm: 0.9999993011819339, iteration: 34476
loss: 1.004704475402832,grad_norm: 0.9785244138754994, iteration: 34477
loss: 1.0246179103851318,grad_norm: 0.9999991051296537, iteration: 34478
loss: 1.027911901473999,grad_norm: 0.9999992420112357, iteration: 34479
loss: 1.0307893753051758,grad_norm: 0.9999992890260023, iteration: 34480
loss: 1.0490200519561768,grad_norm: 0.9999994726017625, iteration: 34481
loss: 0.9891754388809204,grad_norm: 0.9120173612521377, iteration: 34482
loss: 0.991820216178894,grad_norm: 0.9999990201009676, iteration: 34483
loss: 1.0328749418258667,grad_norm: 0.8328008997135784, iteration: 34484
loss: 1.0157768726348877,grad_norm: 0.9677757098651637, iteration: 34485
loss: 0.9604731202125549,grad_norm: 0.9999991005869893, iteration: 34486
loss: 0.9914591908454895,grad_norm: 0.9109512397461997, iteration: 34487
loss: 1.039747953414917,grad_norm: 0.9999994324254612, iteration: 34488
loss: 0.9937950968742371,grad_norm: 0.9999990680173102, iteration: 34489
loss: 1.0095417499542236,grad_norm: 0.9999991042088665, iteration: 34490
loss: 1.016299843788147,grad_norm: 0.9999998069334497, iteration: 34491
loss: 1.014752984046936,grad_norm: 0.9999991192267919, iteration: 34492
loss: 1.0244535207748413,grad_norm: 0.9999992534093535, iteration: 34493
loss: 1.0199782848358154,grad_norm: 0.9104300627076314, iteration: 34494
loss: 1.033239483833313,grad_norm: 0.9560351316182297, iteration: 34495
loss: 1.0026227235794067,grad_norm: 0.9909913079224217, iteration: 34496
loss: 0.999019980430603,grad_norm: 0.923409659860703, iteration: 34497
loss: 0.995853841304779,grad_norm: 0.9999993313078418, iteration: 34498
loss: 1.009072184562683,grad_norm: 0.9999997203810085, iteration: 34499
loss: 1.0011266469955444,grad_norm: 0.9999993418875665, iteration: 34500
loss: 1.0237683057785034,grad_norm: 0.9061150552528859, iteration: 34501
loss: 0.9837650060653687,grad_norm: 0.9526866528874297, iteration: 34502
loss: 1.0069477558135986,grad_norm: 0.9045316864695765, iteration: 34503
loss: 1.0300710201263428,grad_norm: 0.9999991036426805, iteration: 34504
loss: 1.0193129777908325,grad_norm: 0.9458114158792078, iteration: 34505
loss: 1.0282078981399536,grad_norm: 0.8472493690431419, iteration: 34506
loss: 1.0141451358795166,grad_norm: 0.9999991477733551, iteration: 34507
loss: 1.0254359245300293,grad_norm: 0.8570597843935773, iteration: 34508
loss: 0.9886413216590881,grad_norm: 0.9907438430778231, iteration: 34509
loss: 0.9753444790840149,grad_norm: 0.9479578096326244, iteration: 34510
loss: 1.0669153928756714,grad_norm: 0.9999998321528262, iteration: 34511
loss: 0.9956492781639099,grad_norm: 0.9999994950905295, iteration: 34512
loss: 0.9798348546028137,grad_norm: 0.9903325677739023, iteration: 34513
loss: 1.068673849105835,grad_norm: 0.9999992299999942, iteration: 34514
loss: 1.0631980895996094,grad_norm: 0.9999992044943775, iteration: 34515
loss: 0.997555136680603,grad_norm: 0.9999998685076281, iteration: 34516
loss: 0.9977649450302124,grad_norm: 0.7563284294790914, iteration: 34517
loss: 1.0423890352249146,grad_norm: 0.9999992608631336, iteration: 34518
loss: 1.0405570268630981,grad_norm: 0.999999242116488, iteration: 34519
loss: 0.9809611439704895,grad_norm: 0.9999989579946699, iteration: 34520
loss: 0.9666086435317993,grad_norm: 0.9772722006432639, iteration: 34521
loss: 1.0334844589233398,grad_norm: 0.9999995031375326, iteration: 34522
loss: 1.0224517583847046,grad_norm: 0.7837063229322343, iteration: 34523
loss: 1.0467898845672607,grad_norm: 0.9999991536233751, iteration: 34524
loss: 0.9868234395980835,grad_norm: 0.9999991185612752, iteration: 34525
loss: 1.0021636486053467,grad_norm: 0.9153696404993481, iteration: 34526
loss: 0.9942315220832825,grad_norm: 0.8750280144453761, iteration: 34527
loss: 0.9861819744110107,grad_norm: 0.9885254628784117, iteration: 34528
loss: 1.008481502532959,grad_norm: 0.9999990284413136, iteration: 34529
loss: 1.024699330329895,grad_norm: 0.9999990826516761, iteration: 34530
loss: 0.9830005168914795,grad_norm: 0.9999991386773576, iteration: 34531
loss: 1.00802481174469,grad_norm: 0.9999990462605756, iteration: 34532
loss: 0.99372798204422,grad_norm: 0.9139265463944499, iteration: 34533
loss: 1.0025372505187988,grad_norm: 0.9999993873405613, iteration: 34534
loss: 0.9710649847984314,grad_norm: 0.7915596707488528, iteration: 34535
loss: 1.0505688190460205,grad_norm: 0.9999990473787714, iteration: 34536
loss: 1.017804741859436,grad_norm: 0.8939153948636411, iteration: 34537
loss: 1.0031719207763672,grad_norm: 0.903104768792699, iteration: 34538
loss: 1.0205368995666504,grad_norm: 0.99999907922188, iteration: 34539
loss: 1.0156641006469727,grad_norm: 0.7843063708067273, iteration: 34540
loss: 0.986608624458313,grad_norm: 0.9414942690455275, iteration: 34541
loss: 1.0018078088760376,grad_norm: 0.9269099511692663, iteration: 34542
loss: 1.038492202758789,grad_norm: 0.938663304287095, iteration: 34543
loss: 0.9795432090759277,grad_norm: 0.999999612979284, iteration: 34544
loss: 1.0283006429672241,grad_norm: 0.950499472406643, iteration: 34545
loss: 1.0244975090026855,grad_norm: 0.9999991602630883, iteration: 34546
loss: 0.9966022968292236,grad_norm: 0.9317654608521891, iteration: 34547
loss: 1.056886911392212,grad_norm: 0.9614145111448256, iteration: 34548
loss: 0.9988651871681213,grad_norm: 0.9999992355272439, iteration: 34549
loss: 1.0299057960510254,grad_norm: 0.9565363438017167, iteration: 34550
loss: 1.0244609117507935,grad_norm: 0.999999220743551, iteration: 34551
loss: 1.0238629579544067,grad_norm: 0.9999991804552194, iteration: 34552
loss: 1.0134317874908447,grad_norm: 0.9793761020110558, iteration: 34553
loss: 1.0077755451202393,grad_norm: 0.9999990751010582, iteration: 34554
loss: 1.0235387086868286,grad_norm: 0.9999990083568173, iteration: 34555
loss: 1.0008270740509033,grad_norm: 0.9055877092979623, iteration: 34556
loss: 1.0004805326461792,grad_norm: 0.9101580083450533, iteration: 34557
loss: 0.9940743446350098,grad_norm: 0.9307878015929228, iteration: 34558
loss: 1.0159226655960083,grad_norm: 0.9999991337740485, iteration: 34559
loss: 0.9994909167289734,grad_norm: 0.8587933379946028, iteration: 34560
loss: 1.0443097352981567,grad_norm: 0.9999992353979058, iteration: 34561
loss: 1.0016863346099854,grad_norm: 0.9297094538244292, iteration: 34562
loss: 1.0720409154891968,grad_norm: 0.9999996345861373, iteration: 34563
loss: 0.9763685464859009,grad_norm: 0.9999996991927806, iteration: 34564
loss: 0.9970600008964539,grad_norm: 0.9999990698734953, iteration: 34565
loss: 1.0094586610794067,grad_norm: 0.9999992870128999, iteration: 34566
loss: 1.0186021327972412,grad_norm: 0.8091718734783008, iteration: 34567
loss: 1.0022006034851074,grad_norm: 0.9999991670839629, iteration: 34568
loss: 1.000737190246582,grad_norm: 0.8761983001071828, iteration: 34569
loss: 0.9939298629760742,grad_norm: 0.9999991639503808, iteration: 34570
loss: 1.009088158607483,grad_norm: 0.8444648281621022, iteration: 34571
loss: 1.050402283668518,grad_norm: 0.9999992812931967, iteration: 34572
loss: 1.0346043109893799,grad_norm: 0.9057166187874227, iteration: 34573
loss: 1.040664553642273,grad_norm: 0.9999996180614513, iteration: 34574
loss: 1.0503226518630981,grad_norm: 0.9999999059808232, iteration: 34575
loss: 1.2407387495040894,grad_norm: 0.999999832567504, iteration: 34576
loss: 1.1551584005355835,grad_norm: 0.9999995227125796, iteration: 34577
loss: 0.9882357716560364,grad_norm: 0.7761107631659734, iteration: 34578
loss: 1.0283843278884888,grad_norm: 0.9999993193841974, iteration: 34579
loss: 1.084040641784668,grad_norm: 0.9999997018841543, iteration: 34580
loss: 0.9925710558891296,grad_norm: 0.9251432737660121, iteration: 34581
loss: 1.162955641746521,grad_norm: 0.9999998069503482, iteration: 34582
loss: 1.2659558057785034,grad_norm: 0.9999996081416919, iteration: 34583
loss: 0.9840337038040161,grad_norm: 0.7871370657742698, iteration: 34584
loss: 1.0913087129592896,grad_norm: 0.9999997479336489, iteration: 34585
loss: 1.0456955432891846,grad_norm: 0.9991427362734492, iteration: 34586
loss: 1.0627175569534302,grad_norm: 0.999999640315726, iteration: 34587
loss: 1.023191213607788,grad_norm: 0.9999994916578485, iteration: 34588
loss: 1.1426336765289307,grad_norm: 0.999999746143392, iteration: 34589
loss: 1.1091530323028564,grad_norm: 0.9999993007603301, iteration: 34590
loss: 1.017844319343567,grad_norm: 0.9999991344100029, iteration: 34591
loss: 0.9933900833129883,grad_norm: 0.999999840845181, iteration: 34592
loss: 1.0898925065994263,grad_norm: 0.999999626023887, iteration: 34593
loss: 1.1644327640533447,grad_norm: 0.9999997887767333, iteration: 34594
loss: 1.0152735710144043,grad_norm: 0.9999992877790106, iteration: 34595
loss: 1.0338258743286133,grad_norm: 0.9999990924055355, iteration: 34596
loss: 0.998019278049469,grad_norm: 0.9999993107707985, iteration: 34597
loss: 1.0656821727752686,grad_norm: 0.9623264299091298, iteration: 34598
loss: 1.2140032052993774,grad_norm: 0.9999996329848602, iteration: 34599
loss: 1.135493278503418,grad_norm: 0.999999081850398, iteration: 34600
loss: 1.2006735801696777,grad_norm: 0.9999999165166801, iteration: 34601
loss: 1.0715112686157227,grad_norm: 0.9999991140112161, iteration: 34602
loss: 1.039594292640686,grad_norm: 0.999999846737082, iteration: 34603
loss: 1.01813542842865,grad_norm: 0.9631542873620634, iteration: 34604
loss: 1.0390164852142334,grad_norm: 0.9999991819860488, iteration: 34605
loss: 1.0326833724975586,grad_norm: 0.999998993054755, iteration: 34606
loss: 1.0127631425857544,grad_norm: 0.889177935267958, iteration: 34607
loss: 1.0294240713119507,grad_norm: 0.9999990489999445, iteration: 34608
loss: 0.9856488704681396,grad_norm: 0.9500881599852314, iteration: 34609
loss: 1.0516424179077148,grad_norm: 0.9999992164708253, iteration: 34610
loss: 1.2069920301437378,grad_norm: 0.9999998987492211, iteration: 34611
loss: 1.0002906322479248,grad_norm: 0.9552109827464351, iteration: 34612
loss: 1.0366101264953613,grad_norm: 0.9999991956467702, iteration: 34613
loss: 0.971976101398468,grad_norm: 0.9201056589851119, iteration: 34614
loss: 1.038710117340088,grad_norm: 0.8622243289585685, iteration: 34615
loss: 1.0350507497787476,grad_norm: 0.9999997780801172, iteration: 34616
loss: 1.0370551347732544,grad_norm: 0.7984556882743126, iteration: 34617
loss: 1.00775945186615,grad_norm: 0.830564248621819, iteration: 34618
loss: 1.0251470804214478,grad_norm: 0.8080902746383326, iteration: 34619
loss: 1.053976058959961,grad_norm: 0.9999993488607589, iteration: 34620
loss: 0.9926581978797913,grad_norm: 0.8687884023002937, iteration: 34621
loss: 0.9879445433616638,grad_norm: 0.9450913390324492, iteration: 34622
loss: 1.0301812887191772,grad_norm: 0.9708665872490456, iteration: 34623
loss: 1.0201263427734375,grad_norm: 0.768018489804699, iteration: 34624
loss: 1.0245243310928345,grad_norm: 0.9999996629625595, iteration: 34625
loss: 1.0140472650527954,grad_norm: 0.9999990127222098, iteration: 34626
loss: 1.0515987873077393,grad_norm: 0.9999996243597928, iteration: 34627
loss: 0.9889707565307617,grad_norm: 0.8942465109550398, iteration: 34628
loss: 1.0499048233032227,grad_norm: 0.9582739209587338, iteration: 34629
loss: 1.0326871871948242,grad_norm: 0.9999991804751842, iteration: 34630
loss: 0.9841732978820801,grad_norm: 0.9428789707442623, iteration: 34631
loss: 1.008859395980835,grad_norm: 0.9569464141835755, iteration: 34632
loss: 1.0058566331863403,grad_norm: 0.9999992981719894, iteration: 34633
loss: 1.004090428352356,grad_norm: 0.9999993765891836, iteration: 34634
loss: 0.9955364465713501,grad_norm: 0.855076658313888, iteration: 34635
loss: 1.057260513305664,grad_norm: 0.9036491708055846, iteration: 34636
loss: 0.9956080913543701,grad_norm: 0.9504661191275302, iteration: 34637
loss: 1.0317798852920532,grad_norm: 0.9999991062725115, iteration: 34638
loss: 0.9986700415611267,grad_norm: 0.99999906335864, iteration: 34639
loss: 1.0775196552276611,grad_norm: 0.9999992061799884, iteration: 34640
loss: 1.0353695154190063,grad_norm: 0.9946806478679108, iteration: 34641
loss: 1.1067434549331665,grad_norm: 0.9999998877130856, iteration: 34642
loss: 1.0318353176116943,grad_norm: 0.980356633408955, iteration: 34643
loss: 0.973801851272583,grad_norm: 0.9999993613030209, iteration: 34644
loss: 0.9832183122634888,grad_norm: 0.9217153733409544, iteration: 34645
loss: 1.032058596611023,grad_norm: 0.9381695904957502, iteration: 34646
loss: 1.0277351140975952,grad_norm: 0.9433555132228553, iteration: 34647
loss: 0.977380633354187,grad_norm: 0.9999991798885514, iteration: 34648
loss: 1.0048178434371948,grad_norm: 0.9999995608304406, iteration: 34649
loss: 0.9945785403251648,grad_norm: 0.9055817806224865, iteration: 34650
loss: 0.9806233048439026,grad_norm: 0.9676972698325913, iteration: 34651
loss: 1.0071592330932617,grad_norm: 0.9514403976739692, iteration: 34652
loss: 0.9994468688964844,grad_norm: 0.9999990963914772, iteration: 34653
loss: 0.9864625334739685,grad_norm: 0.8442155892179302, iteration: 34654
loss: 1.0119001865386963,grad_norm: 0.820054492090576, iteration: 34655
loss: 0.9899160265922546,grad_norm: 0.9642973567975934, iteration: 34656
loss: 1.0008853673934937,grad_norm: 0.830532210799071, iteration: 34657
loss: 1.067527413368225,grad_norm: 0.9999990665238435, iteration: 34658
loss: 1.0075448751449585,grad_norm: 0.9999992035087581, iteration: 34659
loss: 0.9957122206687927,grad_norm: 0.9999991815940277, iteration: 34660
loss: 0.9934220910072327,grad_norm: 0.9263137097942964, iteration: 34661
loss: 1.0006983280181885,grad_norm: 0.878723298355596, iteration: 34662
loss: 1.1435916423797607,grad_norm: 0.9999995340582689, iteration: 34663
loss: 1.0212209224700928,grad_norm: 0.9999993636016058, iteration: 34664
loss: 1.0077309608459473,grad_norm: 0.9999995896905959, iteration: 34665
loss: 1.0065288543701172,grad_norm: 0.9999990603522918, iteration: 34666
loss: 0.9723532795906067,grad_norm: 0.9999992404903106, iteration: 34667
loss: 0.9739275574684143,grad_norm: 0.8698194901966921, iteration: 34668
loss: 1.0121594667434692,grad_norm: 0.9580796794958625, iteration: 34669
loss: 1.0271973609924316,grad_norm: 0.9650472820425134, iteration: 34670
loss: 1.0195454359054565,grad_norm: 0.9999992459176046, iteration: 34671
loss: 1.0439002513885498,grad_norm: 0.9999996594651526, iteration: 34672
loss: 0.991020679473877,grad_norm: 0.9999992844109213, iteration: 34673
loss: 0.9898472428321838,grad_norm: 0.9239535474328132, iteration: 34674
loss: 0.9901825785636902,grad_norm: 0.9999990479258932, iteration: 34675
loss: 1.0312533378601074,grad_norm: 0.8824710403791095, iteration: 34676
loss: 1.0121585130691528,grad_norm: 0.8703339657250382, iteration: 34677
loss: 1.0027709007263184,grad_norm: 0.9999991834060781, iteration: 34678
loss: 1.0037699937820435,grad_norm: 0.9999992327224518, iteration: 34679
loss: 1.0045106410980225,grad_norm: 0.9999991059471225, iteration: 34680
loss: 1.0176165103912354,grad_norm: 0.9999993297267765, iteration: 34681
loss: 1.0041353702545166,grad_norm: 0.9816235251515919, iteration: 34682
loss: 1.002161979675293,grad_norm: 0.9999989027683861, iteration: 34683
loss: 1.0359807014465332,grad_norm: 0.9999994540122417, iteration: 34684
loss: 1.0140278339385986,grad_norm: 0.8697940693863226, iteration: 34685
loss: 1.0341602563858032,grad_norm: 0.9389012551856158, iteration: 34686
loss: 0.9969130754470825,grad_norm: 0.7819667113865305, iteration: 34687
loss: 1.017845630645752,grad_norm: 0.9999992219096009, iteration: 34688
loss: 1.006725788116455,grad_norm: 0.7883443315301578, iteration: 34689
loss: 1.0132155418395996,grad_norm: 0.999999093263442, iteration: 34690
loss: 1.0123538970947266,grad_norm: 0.9585138541758936, iteration: 34691
loss: 1.0263715982437134,grad_norm: 0.9211561994962688, iteration: 34692
loss: 1.3121933937072754,grad_norm: 1.0000000468845787, iteration: 34693
loss: 1.0012176036834717,grad_norm: 0.9999992029537045, iteration: 34694
loss: 0.9895952939987183,grad_norm: 0.9999991240942259, iteration: 34695
loss: 0.9897078275680542,grad_norm: 0.9999991263218874, iteration: 34696
loss: 1.0549649000167847,grad_norm: 0.8432230605025639, iteration: 34697
loss: 1.0281095504760742,grad_norm: 0.9999989646126463, iteration: 34698
loss: 0.9870122671127319,grad_norm: 0.9849144141321337, iteration: 34699
loss: 1.0257189273834229,grad_norm: 0.9999996965636478, iteration: 34700
loss: 1.009447455406189,grad_norm: 0.9469126086569238, iteration: 34701
loss: 1.0286173820495605,grad_norm: 0.9999989958975323, iteration: 34702
loss: 1.0249477624893188,grad_norm: 0.9999991921074216, iteration: 34703
loss: 0.9857742190361023,grad_norm: 0.9486397238593387, iteration: 34704
loss: 1.0281680822372437,grad_norm: 0.9999990912883185, iteration: 34705
loss: 1.044978380203247,grad_norm: 0.99999945650445, iteration: 34706
loss: 1.0224610567092896,grad_norm: 0.9999991448859208, iteration: 34707
loss: 1.0133976936340332,grad_norm: 0.8876149770210067, iteration: 34708
loss: 1.0913647413253784,grad_norm: 0.9999996273207871, iteration: 34709
loss: 1.01851487159729,grad_norm: 0.999999630666451, iteration: 34710
loss: 0.9866724610328674,grad_norm: 0.8725247609751142, iteration: 34711
loss: 1.0307013988494873,grad_norm: 0.9999991512901357, iteration: 34712
loss: 1.0246645212173462,grad_norm: 0.9999991329556397, iteration: 34713
loss: 1.014833927154541,grad_norm: 0.805138088312097, iteration: 34714
loss: 1.058421015739441,grad_norm: 0.9999995939555257, iteration: 34715
loss: 1.0271846055984497,grad_norm: 0.9380323439604309, iteration: 34716
loss: 1.046329379081726,grad_norm: 0.9999996221896617, iteration: 34717
loss: 1.0344387292861938,grad_norm: 0.9999992223769547, iteration: 34718
loss: 1.0228363275527954,grad_norm: 0.9999991540399454, iteration: 34719
loss: 0.9768345355987549,grad_norm: 0.999999072158758, iteration: 34720
loss: 1.0412423610687256,grad_norm: 0.9999997156581164, iteration: 34721
loss: 1.0171444416046143,grad_norm: 0.9999992327918473, iteration: 34722
loss: 1.0209977626800537,grad_norm: 0.9999997530923845, iteration: 34723
loss: 0.98577880859375,grad_norm: 0.9251931041267599, iteration: 34724
loss: 1.02338445186615,grad_norm: 0.9999991031907745, iteration: 34725
loss: 0.9733609557151794,grad_norm: 0.9999989935036743, iteration: 34726
loss: 0.9801632165908813,grad_norm: 0.9999992107231305, iteration: 34727
loss: 0.9959671497344971,grad_norm: 0.8563942784249323, iteration: 34728
loss: 1.0302188396453857,grad_norm: 0.9999992905075948, iteration: 34729
loss: 1.0036457777023315,grad_norm: 0.9999994818422342, iteration: 34730
loss: 1.006920337677002,grad_norm: 0.9999996213934264, iteration: 34731
loss: 1.0576459169387817,grad_norm: 0.9999992600936195, iteration: 34732
loss: 1.0017331838607788,grad_norm: 0.938982356249929, iteration: 34733
loss: 1.019087791442871,grad_norm: 0.91043716569485, iteration: 34734
loss: 0.9988893866539001,grad_norm: 0.977746099169202, iteration: 34735
loss: 0.9660026431083679,grad_norm: 0.9040023466994344, iteration: 34736
loss: 1.0553083419799805,grad_norm: 0.9999997587893119, iteration: 34737
loss: 1.0153461694717407,grad_norm: 0.999999180732734, iteration: 34738
loss: 1.0183913707733154,grad_norm: 0.9999995376793183, iteration: 34739
loss: 1.0398650169372559,grad_norm: 0.9252068502932728, iteration: 34740
loss: 1.016664743423462,grad_norm: 0.9999992522082122, iteration: 34741
loss: 1.030378818511963,grad_norm: 0.9999995665431176, iteration: 34742
loss: 1.0231785774230957,grad_norm: 0.9999991150180721, iteration: 34743
loss: 1.0015090703964233,grad_norm: 0.9585354972880485, iteration: 34744
loss: 0.981705904006958,grad_norm: 0.9999990334112717, iteration: 34745
loss: 0.985863447189331,grad_norm: 0.9999991781930775, iteration: 34746
loss: 0.9602925181388855,grad_norm: 0.9359326982596416, iteration: 34747
loss: 1.005010962486267,grad_norm: 0.8883332571684207, iteration: 34748
loss: 1.0338408946990967,grad_norm: 0.9999989873610955, iteration: 34749
loss: 1.054478645324707,grad_norm: 0.9999991607838086, iteration: 34750
loss: 1.0034576654434204,grad_norm: 0.999999114336988, iteration: 34751
loss: 1.0080347061157227,grad_norm: 0.999999198475631, iteration: 34752
loss: 1.0025238990783691,grad_norm: 0.9999990797380932, iteration: 34753
loss: 1.0245805978775024,grad_norm: 0.999999250116237, iteration: 34754
loss: 1.0388782024383545,grad_norm: 0.8909326222855227, iteration: 34755
loss: 1.038364291191101,grad_norm: 0.9999993247761616, iteration: 34756
loss: 1.0250424146652222,grad_norm: 0.999999120383662, iteration: 34757
loss: 1.0275254249572754,grad_norm: 0.9982284890065406, iteration: 34758
loss: 1.0214828252792358,grad_norm: 0.9999993178641248, iteration: 34759
loss: 0.992650032043457,grad_norm: 0.8868686058759151, iteration: 34760
loss: 0.9986670613288879,grad_norm: 0.9999992118435104, iteration: 34761
loss: 1.0056893825531006,grad_norm: 0.9999990876560799, iteration: 34762
loss: 1.0403727293014526,grad_norm: 0.9999994927479989, iteration: 34763
loss: 1.044266939163208,grad_norm: 0.999999607470919, iteration: 34764
loss: 1.035091757774353,grad_norm: 0.8836736287566552, iteration: 34765
loss: 0.9855203628540039,grad_norm: 0.9999990717269043, iteration: 34766
loss: 1.03416907787323,grad_norm: 0.9027496767144061, iteration: 34767
loss: 0.989341676235199,grad_norm: 0.9182706319769522, iteration: 34768
loss: 0.9953457713127136,grad_norm: 0.7799320419965459, iteration: 34769
loss: 0.9817548394203186,grad_norm: 0.999999422466585, iteration: 34770
loss: 1.023846983909607,grad_norm: 0.9999991395590906, iteration: 34771
loss: 0.963685929775238,grad_norm: 0.8912391906774554, iteration: 34772
loss: 1.0007827281951904,grad_norm: 0.9999991725394058, iteration: 34773
loss: 0.9990186095237732,grad_norm: 0.8725377513267699, iteration: 34774
loss: 1.0028648376464844,grad_norm: 0.9999989542882631, iteration: 34775
loss: 0.9990096688270569,grad_norm: 0.9999992876542146, iteration: 34776
loss: 1.0107771158218384,grad_norm: 0.9999995273651979, iteration: 34777
loss: 1.044608473777771,grad_norm: 0.9999990261060823, iteration: 34778
loss: 1.0224931240081787,grad_norm: 0.9999996853518568, iteration: 34779
loss: 0.9794439673423767,grad_norm: 0.9520313933528147, iteration: 34780
loss: 1.0070933103561401,grad_norm: 0.9999990764031464, iteration: 34781
loss: 1.0065795183181763,grad_norm: 0.8140268516840603, iteration: 34782
loss: 1.0075381994247437,grad_norm: 0.9999993986655129, iteration: 34783
loss: 1.0147439241409302,grad_norm: 0.9999989947975588, iteration: 34784
loss: 1.0279463529586792,grad_norm: 0.9999999332477465, iteration: 34785
loss: 0.990605890750885,grad_norm: 0.999999207954844, iteration: 34786
loss: 1.0443826913833618,grad_norm: 0.9999996410261899, iteration: 34787
loss: 1.02387535572052,grad_norm: 0.8836828151405658, iteration: 34788
loss: 1.0059828758239746,grad_norm: 0.8590075266127902, iteration: 34789
loss: 1.0208795070648193,grad_norm: 0.9276647894645801, iteration: 34790
loss: 1.0241646766662598,grad_norm: 0.7864542055266777, iteration: 34791
loss: 1.0303394794464111,grad_norm: 0.9070705524573558, iteration: 34792
loss: 0.9976891875267029,grad_norm: 0.9389855624575196, iteration: 34793
loss: 1.0131702423095703,grad_norm: 0.9999990374240186, iteration: 34794
loss: 1.0567574501037598,grad_norm: 0.9999990823742748, iteration: 34795
loss: 0.9997177124023438,grad_norm: 0.8830764316200191, iteration: 34796
loss: 0.981080949306488,grad_norm: 0.9999993784455783, iteration: 34797
loss: 0.9840797185897827,grad_norm: 0.9670459586836684, iteration: 34798
loss: 1.051865577697754,grad_norm: 0.9999990840432692, iteration: 34799
loss: 1.0209838151931763,grad_norm: 0.9577608540931287, iteration: 34800
loss: 1.0200668573379517,grad_norm: 0.9999991506134813, iteration: 34801
loss: 1.0186597108840942,grad_norm: 0.9999994421456753, iteration: 34802
loss: 0.966529130935669,grad_norm: 0.9999990149986202, iteration: 34803
loss: 0.9905962944030762,grad_norm: 0.9488540559624432, iteration: 34804
loss: 1.0037205219268799,grad_norm: 0.9999991442043128, iteration: 34805
loss: 1.0175920724868774,grad_norm: 0.9999991883811512, iteration: 34806
loss: 1.0284751653671265,grad_norm: 0.9999994650479107, iteration: 34807
loss: 1.0196226835250854,grad_norm: 0.999999248820213, iteration: 34808
loss: 1.0992777347564697,grad_norm: 0.9999996740857585, iteration: 34809
loss: 1.0998210906982422,grad_norm: 0.9999996346759243, iteration: 34810
loss: 1.0067590475082397,grad_norm: 0.8741621274549166, iteration: 34811
loss: 1.0038137435913086,grad_norm: 0.9999991262090225, iteration: 34812
loss: 1.0230735540390015,grad_norm: 0.9899831816334831, iteration: 34813
loss: 1.0256130695343018,grad_norm: 0.99999946554704, iteration: 34814
loss: 0.9931782484054565,grad_norm: 0.7107621252990556, iteration: 34815
loss: 0.9879816174507141,grad_norm: 0.9028709654857393, iteration: 34816
loss: 0.9915485978126526,grad_norm: 0.9999991617017926, iteration: 34817
loss: 1.0043649673461914,grad_norm: 0.9999992076451887, iteration: 34818
loss: 0.9815600514411926,grad_norm: 0.8914708845991308, iteration: 34819
loss: 0.9767534732818604,grad_norm: 0.9038877201156509, iteration: 34820
loss: 1.033360481262207,grad_norm: 0.9999995386507583, iteration: 34821
loss: 0.9810647368431091,grad_norm: 0.99999923837972, iteration: 34822
loss: 1.0267285108566284,grad_norm: 0.999999116076367, iteration: 34823
loss: 1.0965784788131714,grad_norm: 0.9999993376306415, iteration: 34824
loss: 1.0998910665512085,grad_norm: 0.9999994333597317, iteration: 34825
loss: 1.0103808641433716,grad_norm: 0.8812056297998393, iteration: 34826
loss: 1.1307203769683838,grad_norm: 0.9999993931107434, iteration: 34827
loss: 0.9714788198471069,grad_norm: 0.9761655866935779, iteration: 34828
loss: 1.01136314868927,grad_norm: 0.9999995641243586, iteration: 34829
loss: 0.9750109314918518,grad_norm: 0.8875275851145654, iteration: 34830
loss: 0.9868353605270386,grad_norm: 0.9065000985083843, iteration: 34831
loss: 1.0978171825408936,grad_norm: 0.9999996645608165, iteration: 34832
loss: 1.0419776439666748,grad_norm: 0.999999632736613, iteration: 34833
loss: 1.0063453912734985,grad_norm: 0.9364384115031105, iteration: 34834
loss: 1.0056451559066772,grad_norm: 0.999999740692225, iteration: 34835
loss: 0.9755488634109497,grad_norm: 0.9999992257534049, iteration: 34836
loss: 0.9943304061889648,grad_norm: 0.9032536151915032, iteration: 34837
loss: 1.0062761306762695,grad_norm: 0.9999991513202551, iteration: 34838
loss: 1.0087391138076782,grad_norm: 0.9087308331430096, iteration: 34839
loss: 1.0160331726074219,grad_norm: 0.9999991655356694, iteration: 34840
loss: 0.9914857745170593,grad_norm: 0.8297072719509982, iteration: 34841
loss: 1.0247081518173218,grad_norm: 0.8465572095170872, iteration: 34842
loss: 1.015979528427124,grad_norm: 0.9999995136304355, iteration: 34843
loss: 0.9683655500411987,grad_norm: 0.7797677823618402, iteration: 34844
loss: 1.0074111223220825,grad_norm: 0.9567258219540045, iteration: 34845
loss: 1.0299931764602661,grad_norm: 0.9999992295458578, iteration: 34846
loss: 0.9959385991096497,grad_norm: 0.8285222411776286, iteration: 34847
loss: 1.024926781654358,grad_norm: 0.9999990626001612, iteration: 34848
loss: 1.0216925144195557,grad_norm: 0.9999992129940247, iteration: 34849
loss: 1.0271433591842651,grad_norm: 0.9999991759364762, iteration: 34850
loss: 1.027295470237732,grad_norm: 0.9999991994431393, iteration: 34851
loss: 1.0166809558868408,grad_norm: 0.9999991129157636, iteration: 34852
loss: 1.0078784227371216,grad_norm: 0.9999993285888161, iteration: 34853
loss: 1.0637162923812866,grad_norm: 0.9999995951527414, iteration: 34854
loss: 1.0286006927490234,grad_norm: 0.9999991227953611, iteration: 34855
loss: 0.9825247526168823,grad_norm: 0.9633234979863498, iteration: 34856
loss: 0.9752094149589539,grad_norm: 0.9999991123604477, iteration: 34857
loss: 1.0077974796295166,grad_norm: 0.9999991177291617, iteration: 34858
loss: 0.9866898655891418,grad_norm: 0.9584940715700103, iteration: 34859
loss: 0.9716763496398926,grad_norm: 0.9318719632871771, iteration: 34860
loss: 0.9935755729675293,grad_norm: 0.9999990152674839, iteration: 34861
loss: 0.9809156656265259,grad_norm: 0.9999991126730363, iteration: 34862
loss: 0.9871432781219482,grad_norm: 0.999999179752519, iteration: 34863
loss: 1.0002689361572266,grad_norm: 0.8630950009554745, iteration: 34864
loss: 0.9931545853614807,grad_norm: 0.8355754399635463, iteration: 34865
loss: 1.0623942613601685,grad_norm: 0.9999990379521766, iteration: 34866
loss: 1.0210460424423218,grad_norm: 0.9999990415454083, iteration: 34867
loss: 0.9947202801704407,grad_norm: 0.999999554465522, iteration: 34868
loss: 1.0189484357833862,grad_norm: 0.9999994290956999, iteration: 34869
loss: 0.9824686050415039,grad_norm: 0.757479591391411, iteration: 34870
loss: 1.0172330141067505,grad_norm: 0.9999992423059011, iteration: 34871
loss: 1.0004000663757324,grad_norm: 0.9658950258220352, iteration: 34872
loss: 1.014261245727539,grad_norm: 0.999999040466303, iteration: 34873
loss: 1.0149835348129272,grad_norm: 0.9492971809794627, iteration: 34874
loss: 1.012422800064087,grad_norm: 0.9999991070766004, iteration: 34875
loss: 1.0093756914138794,grad_norm: 0.9999994506457546, iteration: 34876
loss: 1.0530502796173096,grad_norm: 0.999999711058527, iteration: 34877
loss: 1.0158498287200928,grad_norm: 0.9999991803850492, iteration: 34878
loss: 0.9907802939414978,grad_norm: 0.9999994060601152, iteration: 34879
loss: 0.9659608006477356,grad_norm: 0.970312518927497, iteration: 34880
loss: 0.9859201908111572,grad_norm: 0.7976677930962495, iteration: 34881
loss: 0.9757918119430542,grad_norm: 0.9664338538223233, iteration: 34882
loss: 0.9843215942382812,grad_norm: 0.8635965981211116, iteration: 34883
loss: 0.9615179896354675,grad_norm: 0.9999991064485776, iteration: 34884
loss: 1.034043788909912,grad_norm: 0.9999991702878628, iteration: 34885
loss: 1.0408575534820557,grad_norm: 0.9283076713111893, iteration: 34886
loss: 1.0019837617874146,grad_norm: 0.9999995738258282, iteration: 34887
loss: 0.9918739795684814,grad_norm: 0.9767601181347666, iteration: 34888
loss: 0.9562894701957703,grad_norm: 0.9999992255525315, iteration: 34889
loss: 0.9879183173179626,grad_norm: 0.8535724745157001, iteration: 34890
loss: 1.0192533731460571,grad_norm: 0.8605891986925303, iteration: 34891
loss: 1.0179359912872314,grad_norm: 0.9999993248433522, iteration: 34892
loss: 1.0512793064117432,grad_norm: 0.9117451120017309, iteration: 34893
loss: 1.031545639038086,grad_norm: 0.9143433489267303, iteration: 34894
loss: 1.045668125152588,grad_norm: 0.9999990370885993, iteration: 34895
loss: 1.0326851606369019,grad_norm: 0.9304219675542204, iteration: 34896
loss: 1.0222886800765991,grad_norm: 0.9222017992116963, iteration: 34897
loss: 1.0295559167861938,grad_norm: 0.8577509856541099, iteration: 34898
loss: 0.9606885313987732,grad_norm: 0.9999990513672707, iteration: 34899
loss: 1.0451246500015259,grad_norm: 0.9999991529530847, iteration: 34900
loss: 0.9752404689788818,grad_norm: 0.8796754281045616, iteration: 34901
loss: 1.0034267902374268,grad_norm: 0.9546420294069593, iteration: 34902
loss: 0.9957901835441589,grad_norm: 0.9999991197465538, iteration: 34903
loss: 1.0348272323608398,grad_norm: 0.9999993252654291, iteration: 34904
loss: 1.0257021188735962,grad_norm: 0.937216647031879, iteration: 34905
loss: 0.9998088479042053,grad_norm: 0.9999991267513337, iteration: 34906
loss: 1.0109922885894775,grad_norm: 0.9700985394453822, iteration: 34907
loss: 0.9874879717826843,grad_norm: 0.886976122252442, iteration: 34908
loss: 1.0101255178451538,grad_norm: 0.7996244949231315, iteration: 34909
loss: 0.9936630129814148,grad_norm: 0.9999991153858081, iteration: 34910
loss: 1.015040636062622,grad_norm: 0.9277351305155256, iteration: 34911
loss: 0.9883834719657898,grad_norm: 0.9549200085336976, iteration: 34912
loss: 1.0222152471542358,grad_norm: 0.9137054364729706, iteration: 34913
loss: 1.019110083580017,grad_norm: 0.999999525528981, iteration: 34914
loss: 0.9994678497314453,grad_norm: 0.9688269638077832, iteration: 34915
loss: 1.016791582107544,grad_norm: 0.8472957836674073, iteration: 34916
loss: 0.9867106080055237,grad_norm: 0.9999996206094214, iteration: 34917
loss: 1.0561033487319946,grad_norm: 0.9999997630536513, iteration: 34918
loss: 1.0046600103378296,grad_norm: 0.9156759818888716, iteration: 34919
loss: 1.000349998474121,grad_norm: 0.9999991111859582, iteration: 34920
loss: 1.072965383529663,grad_norm: 0.9999993217707405, iteration: 34921
loss: 1.0112521648406982,grad_norm: 0.9939501588841735, iteration: 34922
loss: 1.0201672315597534,grad_norm: 0.9999990015681859, iteration: 34923
loss: 0.9908267259597778,grad_norm: 0.9568850480949761, iteration: 34924
loss: 1.0269290208816528,grad_norm: 0.9274653563863667, iteration: 34925
loss: 1.0055609941482544,grad_norm: 0.9981222246265915, iteration: 34926
loss: 1.0506646633148193,grad_norm: 0.9999992432269404, iteration: 34927
loss: 0.9993380308151245,grad_norm: 0.9999996256003897, iteration: 34928
loss: 0.9908506870269775,grad_norm: 0.9208613085986334, iteration: 34929
loss: 1.0017555952072144,grad_norm: 0.9040448580963181, iteration: 34930
loss: 1.0261768102645874,grad_norm: 0.9999996098530335, iteration: 34931
loss: 1.0210977792739868,grad_norm: 0.9999992826216152, iteration: 34932
loss: 1.0047446489334106,grad_norm: 0.9999992037213733, iteration: 34933
loss: 0.9901840090751648,grad_norm: 0.999999608142824, iteration: 34934
loss: 1.0184046030044556,grad_norm: 0.9999992131096253, iteration: 34935
loss: 1.0580694675445557,grad_norm: 0.9999996590489869, iteration: 34936
loss: 1.0289231538772583,grad_norm: 0.9017612006271458, iteration: 34937
loss: 0.9901082515716553,grad_norm: 0.9546794599561363, iteration: 34938
loss: 1.016158103942871,grad_norm: 0.9999991148139189, iteration: 34939
loss: 1.0187594890594482,grad_norm: 0.99999965942801, iteration: 34940
loss: 1.000494122505188,grad_norm: 0.9999990596498574, iteration: 34941
loss: 0.990885853767395,grad_norm: 0.8823964448045141, iteration: 34942
loss: 1.017586350440979,grad_norm: 0.9999992878030252, iteration: 34943
loss: 0.9753660559654236,grad_norm: 0.9442215165600067, iteration: 34944
loss: 1.0128918886184692,grad_norm: 0.9999994121913418, iteration: 34945
loss: 1.117793083190918,grad_norm: 0.9999996020855151, iteration: 34946
loss: 1.0285027027130127,grad_norm: 0.9150175798502912, iteration: 34947
loss: 0.9968807697296143,grad_norm: 0.9741154765168833, iteration: 34948
loss: 0.9859545826911926,grad_norm: 0.8162658609652563, iteration: 34949
loss: 0.9977244734764099,grad_norm: 0.9999992407933431, iteration: 34950
loss: 0.9995774626731873,grad_norm: 0.8242705545959577, iteration: 34951
loss: 1.0028992891311646,grad_norm: 0.9999989478473141, iteration: 34952
loss: 1.0332919359207153,grad_norm: 0.999999948035293, iteration: 34953
loss: 0.9984229207038879,grad_norm: 0.9831257972395213, iteration: 34954
loss: 0.9943931698799133,grad_norm: 0.9999993063779617, iteration: 34955
loss: 0.9949238896369934,grad_norm: 0.8690479063293842, iteration: 34956
loss: 0.9886819124221802,grad_norm: 0.9999992686418259, iteration: 34957
loss: 0.9602535963058472,grad_norm: 0.9999990712869776, iteration: 34958
loss: 0.9975787997245789,grad_norm: 0.99999914950716, iteration: 34959
loss: 0.980176568031311,grad_norm: 0.9999991939660484, iteration: 34960
loss: 0.9925665259361267,grad_norm: 0.9999994713408655, iteration: 34961
loss: 1.0074715614318848,grad_norm: 0.982931845746366, iteration: 34962
loss: 1.1656030416488647,grad_norm: 0.9999997835803281, iteration: 34963
loss: 1.045241355895996,grad_norm: 0.8994996460605365, iteration: 34964
loss: 0.9867674112319946,grad_norm: 0.9954283964713835, iteration: 34965
loss: 0.9736450910568237,grad_norm: 0.9999990640707858, iteration: 34966
loss: 1.0155383348464966,grad_norm: 0.7305303518990746, iteration: 34967
loss: 1.0884838104248047,grad_norm: 0.9999994374407497, iteration: 34968
loss: 1.0452667474746704,grad_norm: 0.9999995139588093, iteration: 34969
loss: 1.0194413661956787,grad_norm: 0.8261308602038328, iteration: 34970
loss: 0.9780375957489014,grad_norm: 0.9801637203351565, iteration: 34971
loss: 0.9653339385986328,grad_norm: 0.9999990897298734, iteration: 34972
loss: 1.0328426361083984,grad_norm: 0.9999990732828862, iteration: 34973
loss: 1.0323454141616821,grad_norm: 0.999999192639128, iteration: 34974
loss: 1.1073150634765625,grad_norm: 0.999999702644583, iteration: 34975
loss: 1.0114549398422241,grad_norm: 0.8816718966280944, iteration: 34976
loss: 1.0160367488861084,grad_norm: 0.9999991339077335, iteration: 34977
loss: 1.09253990650177,grad_norm: 0.999999793725512, iteration: 34978
loss: 1.0374717712402344,grad_norm: 0.9999991164339219, iteration: 34979
loss: 0.978325366973877,grad_norm: 0.7923537498890935, iteration: 34980
loss: 1.047576904296875,grad_norm: 0.9999991100681371, iteration: 34981
loss: 1.0116550922393799,grad_norm: 0.9999994437190699, iteration: 34982
loss: 1.0463427305221558,grad_norm: 0.9669249455813337, iteration: 34983
loss: 1.0203715562820435,grad_norm: 0.9877306062976147, iteration: 34984
loss: 1.0070372819900513,grad_norm: 0.9701408563299021, iteration: 34985
loss: 0.9836477637290955,grad_norm: 0.8943098105508167, iteration: 34986
loss: 1.0093183517456055,grad_norm: 0.9999991352326587, iteration: 34987
loss: 0.9760108590126038,grad_norm: 0.9999993528512644, iteration: 34988
loss: 1.0322213172912598,grad_norm: 0.9938210995694553, iteration: 34989
loss: 1.063543677330017,grad_norm: 0.9999998077045208, iteration: 34990
loss: 0.996219277381897,grad_norm: 0.9999994971846267, iteration: 34991
loss: 1.0376341342926025,grad_norm: 0.9042426735367521, iteration: 34992
loss: 1.0026127099990845,grad_norm: 0.9999990956746446, iteration: 34993
loss: 0.9816816449165344,grad_norm: 0.9883556690143431, iteration: 34994
loss: 1.0472511053085327,grad_norm: 0.999999901147119, iteration: 34995
loss: 1.0436410903930664,grad_norm: 0.9999994457620659, iteration: 34996
loss: 0.988633394241333,grad_norm: 0.9814743738887919, iteration: 34997
loss: 1.0149718523025513,grad_norm: 0.9999991898761194, iteration: 34998
loss: 1.013850450515747,grad_norm: 0.8002897192352157, iteration: 34999
loss: 0.9949256777763367,grad_norm: 0.999999067111928, iteration: 35000
loss: 1.0027467012405396,grad_norm: 0.8031320084813246, iteration: 35001
loss: 1.0230076313018799,grad_norm: 0.999999176083094, iteration: 35002
loss: 0.9998313784599304,grad_norm: 0.9999992217793514, iteration: 35003
loss: 1.0405869483947754,grad_norm: 0.9043599028570495, iteration: 35004
loss: 1.0345628261566162,grad_norm: 0.9999991164614815, iteration: 35005
loss: 1.0156480073928833,grad_norm: 0.9999997056093352, iteration: 35006
loss: 0.9979246258735657,grad_norm: 0.9834235658667609, iteration: 35007
loss: 1.0001521110534668,grad_norm: 0.9999991367902481, iteration: 35008
loss: 1.0159251689910889,grad_norm: 0.7886316301706942, iteration: 35009
loss: 1.057662844657898,grad_norm: 0.9999992216662942, iteration: 35010
loss: 1.048965573310852,grad_norm: 0.9508061374908452, iteration: 35011
loss: 0.9989257454872131,grad_norm: 0.8820346430220979, iteration: 35012
loss: 1.0115467309951782,grad_norm: 0.9999990953739494, iteration: 35013
loss: 1.072105884552002,grad_norm: 0.9999998337285394, iteration: 35014
loss: 1.0374170541763306,grad_norm: 0.999999784911213, iteration: 35015
loss: 1.0280232429504395,grad_norm: 0.9924152262480972, iteration: 35016
loss: 1.014257550239563,grad_norm: 0.9999991627512692, iteration: 35017
loss: 0.9991453886032104,grad_norm: 0.9999990671064322, iteration: 35018
loss: 1.0096567869186401,grad_norm: 0.9209648535067684, iteration: 35019
loss: 1.1183048486709595,grad_norm: 0.9999994254913087, iteration: 35020
loss: 0.9734970331192017,grad_norm: 0.9758875187585228, iteration: 35021
loss: 1.0614168643951416,grad_norm: 0.8875176501448337, iteration: 35022
loss: 1.001829981803894,grad_norm: 0.954666161214024, iteration: 35023
loss: 0.9878957867622375,grad_norm: 0.8902297382300066, iteration: 35024
loss: 1.0291876792907715,grad_norm: 0.8551006226415633, iteration: 35025
loss: 1.0147877931594849,grad_norm: 0.9999991460657766, iteration: 35026
loss: 1.017164945602417,grad_norm: 0.9999993920017182, iteration: 35027
loss: 0.9877318739891052,grad_norm: 0.8711753087141023, iteration: 35028
loss: 1.009839415550232,grad_norm: 0.9999991838272908, iteration: 35029
loss: 1.0322715044021606,grad_norm: 0.9999991375841589, iteration: 35030
loss: 1.1460545063018799,grad_norm: 0.999999546653332, iteration: 35031
loss: 1.0204384326934814,grad_norm: 0.9999992315285771, iteration: 35032
loss: 1.004233479499817,grad_norm: 0.8084303544793074, iteration: 35033
loss: 0.9643382430076599,grad_norm: 0.9999991147470134, iteration: 35034
loss: 0.9764448404312134,grad_norm: 0.9999991317018908, iteration: 35035
loss: 1.098706603050232,grad_norm: 0.9999992278624465, iteration: 35036
loss: 0.9849382638931274,grad_norm: 0.9999993086648977, iteration: 35037
loss: 1.0016065835952759,grad_norm: 0.9999991304875039, iteration: 35038
loss: 1.0089126825332642,grad_norm: 0.9999992855844239, iteration: 35039
loss: 0.9661160111427307,grad_norm: 0.9999991781463973, iteration: 35040
loss: 0.9899756908416748,grad_norm: 0.9513094656469958, iteration: 35041
loss: 1.0385956764221191,grad_norm: 0.914207525037989, iteration: 35042
loss: 1.0243157148361206,grad_norm: 0.888301040037427, iteration: 35043
loss: 0.9465452432632446,grad_norm: 0.8646108913405077, iteration: 35044
loss: 0.9898697137832642,grad_norm: 0.9610761415224537, iteration: 35045
loss: 1.0056754350662231,grad_norm: 0.9999996682673137, iteration: 35046
loss: 1.0452052354812622,grad_norm: 0.9999990165253961, iteration: 35047
loss: 1.0229265689849854,grad_norm: 0.9959897597023339, iteration: 35048
loss: 0.9940419793128967,grad_norm: 0.9378801401731574, iteration: 35049
loss: 1.0135977268218994,grad_norm: 0.999999126885818, iteration: 35050
loss: 1.0637710094451904,grad_norm: 0.9999995715446467, iteration: 35051
loss: 1.0372391939163208,grad_norm: 0.9648895400499469, iteration: 35052
loss: 1.055482268333435,grad_norm: 0.9999997812629312, iteration: 35053
loss: 1.0462124347686768,grad_norm: 0.8870263962750832, iteration: 35054
loss: 1.0201412439346313,grad_norm: 0.8610987257331714, iteration: 35055
loss: 0.9889751672744751,grad_norm: 0.8608666981696289, iteration: 35056
loss: 1.0257278680801392,grad_norm: 0.999999129618466, iteration: 35057
loss: 1.005818486213684,grad_norm: 0.9839154951899061, iteration: 35058
loss: 1.0311297178268433,grad_norm: 0.8696515571890104, iteration: 35059
loss: 0.9894365072250366,grad_norm: 0.9999991474590774, iteration: 35060
loss: 1.0216678380966187,grad_norm: 0.999999056983694, iteration: 35061
loss: 1.0121346712112427,grad_norm: 0.9554576571618251, iteration: 35062
loss: 1.0853503942489624,grad_norm: 0.9999995809214302, iteration: 35063
loss: 0.9998454451560974,grad_norm: 0.8750177835260332, iteration: 35064
loss: 1.0010799169540405,grad_norm: 0.9999992557955538, iteration: 35065
loss: 0.9892290830612183,grad_norm: 0.7522119370744125, iteration: 35066
loss: 1.0070009231567383,grad_norm: 0.8501961775899854, iteration: 35067
loss: 0.9824420213699341,grad_norm: 0.9999991820506072, iteration: 35068
loss: 1.0002700090408325,grad_norm: 0.9999992169811972, iteration: 35069
loss: 1.0474966764450073,grad_norm: 0.9999992611603019, iteration: 35070
loss: 1.0122342109680176,grad_norm: 0.9837044673817024, iteration: 35071
loss: 0.9968937635421753,grad_norm: 0.9323922142252992, iteration: 35072
loss: 1.0203834772109985,grad_norm: 0.7669373038910712, iteration: 35073
loss: 1.0152318477630615,grad_norm: 0.9999998411757942, iteration: 35074
loss: 1.0089000463485718,grad_norm: 0.9999991778036029, iteration: 35075
loss: 1.0105488300323486,grad_norm: 0.86206453849744, iteration: 35076
loss: 1.0272669792175293,grad_norm: 0.9023947789052645, iteration: 35077
loss: 0.9980848431587219,grad_norm: 0.9369894696652774, iteration: 35078
loss: 0.9764509201049805,grad_norm: 0.9041187082395986, iteration: 35079
loss: 1.0199999809265137,grad_norm: 0.8084541933706911, iteration: 35080
loss: 0.9816980361938477,grad_norm: 0.9190952456307785, iteration: 35081
loss: 0.9831348061561584,grad_norm: 0.9999990864990334, iteration: 35082
loss: 1.0114936828613281,grad_norm: 0.9086361633667634, iteration: 35083
loss: 1.0200496912002563,grad_norm: 0.9812080073090041, iteration: 35084
loss: 1.0151585340499878,grad_norm: 0.999999313407677, iteration: 35085
loss: 1.1345950365066528,grad_norm: 0.9999990746950744, iteration: 35086
loss: 1.0340371131896973,grad_norm: 0.999999217141362, iteration: 35087
loss: 1.0337638854980469,grad_norm: 0.9999990469034724, iteration: 35088
loss: 1.025179147720337,grad_norm: 0.9999992219411643, iteration: 35089
loss: 0.9910247921943665,grad_norm: 0.9545661249082973, iteration: 35090
loss: 0.9870458841323853,grad_norm: 0.9153243687714628, iteration: 35091
loss: 1.038948893547058,grad_norm: 0.9999991128184005, iteration: 35092
loss: 1.0209096670150757,grad_norm: 0.9684730802442306, iteration: 35093
loss: 1.0008684396743774,grad_norm: 0.9999992759506647, iteration: 35094
loss: 1.0156663656234741,grad_norm: 0.9999991554808239, iteration: 35095
loss: 1.0313475131988525,grad_norm: 0.9054731808118464, iteration: 35096
loss: 1.0861058235168457,grad_norm: 0.9999991575490004, iteration: 35097
loss: 0.9724632501602173,grad_norm: 0.9571629141973763, iteration: 35098
loss: 0.9971135258674622,grad_norm: 0.9142720141426948, iteration: 35099
loss: 1.0202184915542603,grad_norm: 0.9913722538199506, iteration: 35100
loss: 0.9979483485221863,grad_norm: 0.9887218903477425, iteration: 35101
loss: 1.0445722341537476,grad_norm: 0.9999992056254244, iteration: 35102
loss: 0.990875244140625,grad_norm: 0.9901423636934858, iteration: 35103
loss: 1.0428876876831055,grad_norm: 0.9999997156095362, iteration: 35104
loss: 0.9961364269256592,grad_norm: 0.9776229186702743, iteration: 35105
loss: 1.003741979598999,grad_norm: 0.9483325935465161, iteration: 35106
loss: 1.0123028755187988,grad_norm: 0.9450544117198977, iteration: 35107
loss: 0.9965366721153259,grad_norm: 0.9521448779981355, iteration: 35108
loss: 1.0213614702224731,grad_norm: 0.9999993909134107, iteration: 35109
loss: 0.9717990756034851,grad_norm: 0.9999990943529847, iteration: 35110
loss: 0.9952671527862549,grad_norm: 0.9999990823226547, iteration: 35111
loss: 0.9977612495422363,grad_norm: 0.9999991908790018, iteration: 35112
loss: 0.9956579804420471,grad_norm: 0.8891437200242466, iteration: 35113
loss: 0.9972427487373352,grad_norm: 0.9377248351497861, iteration: 35114
loss: 1.0058919191360474,grad_norm: 0.9333353826448233, iteration: 35115
loss: 0.9790599346160889,grad_norm: 0.9999990550030682, iteration: 35116
loss: 1.0094062089920044,grad_norm: 0.9999992316677031, iteration: 35117
loss: 1.0341228246688843,grad_norm: 0.944895829743788, iteration: 35118
loss: 0.9941993355751038,grad_norm: 0.793651508455432, iteration: 35119
loss: 0.9826993346214294,grad_norm: 0.9999990797103703, iteration: 35120
loss: 1.0208446979522705,grad_norm: 0.9999992354650478, iteration: 35121
loss: 1.0503891706466675,grad_norm: 0.9999992005557183, iteration: 35122
loss: 1.0309016704559326,grad_norm: 0.9249305306372413, iteration: 35123
loss: 0.976386547088623,grad_norm: 0.9999991088804834, iteration: 35124
loss: 1.045630693435669,grad_norm: 0.9999991737189694, iteration: 35125
loss: 1.0783718824386597,grad_norm: 0.9999994744532159, iteration: 35126
loss: 0.9996803402900696,grad_norm: 0.9999993899760133, iteration: 35127
loss: 0.9816993474960327,grad_norm: 0.9640536060965532, iteration: 35128
loss: 0.9949136972427368,grad_norm: 0.8313470647857988, iteration: 35129
loss: 0.959222137928009,grad_norm: 0.920110160318743, iteration: 35130
loss: 1.011207938194275,grad_norm: 0.7629847309208283, iteration: 35131
loss: 1.0462309122085571,grad_norm: 0.9999991424803595, iteration: 35132
loss: 0.9916353225708008,grad_norm: 0.9999990161130479, iteration: 35133
loss: 1.0213874578475952,grad_norm: 0.9349273967700873, iteration: 35134
loss: 1.0363044738769531,grad_norm: 0.9999991233633337, iteration: 35135
loss: 0.9715370535850525,grad_norm: 0.894885286354351, iteration: 35136
loss: 1.0354201793670654,grad_norm: 0.9597180571552912, iteration: 35137
loss: 0.9974585175514221,grad_norm: 0.9999990344043653, iteration: 35138
loss: 0.9990581274032593,grad_norm: 0.9999991772749143, iteration: 35139
loss: 1.01124906539917,grad_norm: 0.9365146913915092, iteration: 35140
loss: 1.04265558719635,grad_norm: 0.9999994071585594, iteration: 35141
loss: 0.9801964163780212,grad_norm: 0.966854090676436, iteration: 35142
loss: 0.9944348335266113,grad_norm: 0.99999962621248, iteration: 35143
loss: 1.0329254865646362,grad_norm: 0.9999991207851819, iteration: 35144
loss: 1.0187357664108276,grad_norm: 0.9140719510455566, iteration: 35145
loss: 1.0085222721099854,grad_norm: 0.999999240018901, iteration: 35146
loss: 1.0001531839370728,grad_norm: 0.8565955721036307, iteration: 35147
loss: 0.9791237115859985,grad_norm: 0.8820470876582415, iteration: 35148
loss: 1.0048377513885498,grad_norm: 0.9999991219667999, iteration: 35149
loss: 1.019961953163147,grad_norm: 0.8945497440986834, iteration: 35150
loss: 1.0257173776626587,grad_norm: 0.8025333070654155, iteration: 35151
loss: 0.9894826412200928,grad_norm: 0.9022121193221868, iteration: 35152
loss: 0.9893092513084412,grad_norm: 0.999999114820815, iteration: 35153
loss: 1.0168757438659668,grad_norm: 0.9999990435715278, iteration: 35154
loss: 0.9905966520309448,grad_norm: 0.9999990841331535, iteration: 35155
loss: 1.0096290111541748,grad_norm: 0.9999990410231441, iteration: 35156
loss: 1.039717435836792,grad_norm: 0.981588659510673, iteration: 35157
loss: 0.9917972087860107,grad_norm: 0.9999993091904702, iteration: 35158
loss: 1.0033836364746094,grad_norm: 0.9999994955538277, iteration: 35159
loss: 0.9864206910133362,grad_norm: 0.9999990816232633, iteration: 35160
loss: 0.9911088347434998,grad_norm: 0.7914318388388185, iteration: 35161
loss: 0.9890373349189758,grad_norm: 0.943517046785461, iteration: 35162
loss: 1.041192650794983,grad_norm: 0.9289471647508037, iteration: 35163
loss: 1.0403331518173218,grad_norm: 0.9999990749614238, iteration: 35164
loss: 0.9965224266052246,grad_norm: 0.999999228584378, iteration: 35165
loss: 0.9974963068962097,grad_norm: 0.7791677015377529, iteration: 35166
loss: 1.0322834253311157,grad_norm: 0.9999990974490442, iteration: 35167
loss: 1.0182843208312988,grad_norm: 0.9914729995634328, iteration: 35168
loss: 0.9966369867324829,grad_norm: 0.9435537509426015, iteration: 35169
loss: 0.9904667139053345,grad_norm: 0.9999991674786423, iteration: 35170
loss: 1.0250896215438843,grad_norm: 0.9999990597392628, iteration: 35171
loss: 1.0526691675186157,grad_norm: 0.9999995936837165, iteration: 35172
loss: 1.024021029472351,grad_norm: 0.8077477710829114, iteration: 35173
loss: 0.9654913544654846,grad_norm: 0.9999990956032022, iteration: 35174
loss: 0.9987101554870605,grad_norm: 0.9999993942984641, iteration: 35175
loss: 1.111361026763916,grad_norm: 0.9999997543756508, iteration: 35176
loss: 0.9928372502326965,grad_norm: 0.9999992180207005, iteration: 35177
loss: 1.025837779045105,grad_norm: 0.9999999909508331, iteration: 35178
loss: 1.0110459327697754,grad_norm: 0.9999992096855838, iteration: 35179
loss: 1.021039605140686,grad_norm: 0.9999990053449418, iteration: 35180
loss: 1.004685401916504,grad_norm: 0.999999830477362, iteration: 35181
loss: 1.0152901411056519,grad_norm: 0.9999991615000132, iteration: 35182
loss: 0.9667675495147705,grad_norm: 0.9999989674403131, iteration: 35183
loss: 1.005363941192627,grad_norm: 0.9999991637250084, iteration: 35184
loss: 1.0031317472457886,grad_norm: 0.9999995417575074, iteration: 35185
loss: 1.0000649690628052,grad_norm: 0.959952907245983, iteration: 35186
loss: 1.0417790412902832,grad_norm: 0.999999105264994, iteration: 35187
loss: 1.0519413948059082,grad_norm: 0.9999994513565102, iteration: 35188
loss: 0.9579641222953796,grad_norm: 0.9999993121280133, iteration: 35189
loss: 1.0311143398284912,grad_norm: 0.9867130955742336, iteration: 35190
loss: 0.9761936664581299,grad_norm: 0.9819594088104157, iteration: 35191
loss: 0.9849709868431091,grad_norm: 0.9454655560687839, iteration: 35192
loss: 0.9826975464820862,grad_norm: 0.9999990703603114, iteration: 35193
loss: 0.9940673112869263,grad_norm: 0.9999991193764056, iteration: 35194
loss: 1.0908201932907104,grad_norm: 0.9999991970966264, iteration: 35195
loss: 1.0085991621017456,grad_norm: 0.9999989044413945, iteration: 35196
loss: 0.9939047694206238,grad_norm: 0.9999990952506432, iteration: 35197
loss: 1.0524016618728638,grad_norm: 0.9999996320238285, iteration: 35198
loss: 1.0318586826324463,grad_norm: 0.9999993847318921, iteration: 35199
loss: 1.0161694288253784,grad_norm: 0.9999991413735991, iteration: 35200
loss: 0.9631146788597107,grad_norm: 0.9875707683535756, iteration: 35201
loss: 0.9443396329879761,grad_norm: 0.9999990320062379, iteration: 35202
loss: 1.0350021123886108,grad_norm: 0.9999997023465319, iteration: 35203
loss: 1.0047425031661987,grad_norm: 0.8943260653944578, iteration: 35204
loss: 1.0156583786010742,grad_norm: 0.8081101509997336, iteration: 35205
loss: 1.004870057106018,grad_norm: 0.8968254614298464, iteration: 35206
loss: 1.0064316987991333,grad_norm: 0.99999926949875, iteration: 35207
loss: 1.041954517364502,grad_norm: 0.9647453683295281, iteration: 35208
loss: 0.9752508997917175,grad_norm: 0.7990993025593149, iteration: 35209
loss: 1.0040751695632935,grad_norm: 0.9999997122778651, iteration: 35210
loss: 1.089416742324829,grad_norm: 0.9145648134310663, iteration: 35211
loss: 0.9974648356437683,grad_norm: 0.999999219101779, iteration: 35212
loss: 1.0175442695617676,grad_norm: 0.9999991703324536, iteration: 35213
loss: 0.9620481729507446,grad_norm: 0.960109505422766, iteration: 35214
loss: 1.0021754503250122,grad_norm: 0.999999091938303, iteration: 35215
loss: 1.0170347690582275,grad_norm: 0.9004377327376955, iteration: 35216
loss: 1.0653486251831055,grad_norm: 0.9978475146396155, iteration: 35217
loss: 1.0225275754928589,grad_norm: 0.9999992753607827, iteration: 35218
loss: 0.9764801859855652,grad_norm: 0.9999993091469493, iteration: 35219
loss: 1.0007802248001099,grad_norm: 0.9999990428148271, iteration: 35220
loss: 1.0097280740737915,grad_norm: 0.8716591971268727, iteration: 35221
loss: 0.994045615196228,grad_norm: 0.9999994717315837, iteration: 35222
loss: 1.055088996887207,grad_norm: 0.941101684168991, iteration: 35223
loss: 1.0684906244277954,grad_norm: 0.9999994586728096, iteration: 35224
loss: 1.0752345323562622,grad_norm: 0.999999834349576, iteration: 35225
loss: 1.0097732543945312,grad_norm: 0.8945861526921991, iteration: 35226
loss: 0.992250919342041,grad_norm: 0.8334256898839489, iteration: 35227
loss: 0.9705462455749512,grad_norm: 0.999999105985073, iteration: 35228
loss: 0.9983309507369995,grad_norm: 0.999999213113883, iteration: 35229
loss: 1.054360032081604,grad_norm: 0.9833080383718265, iteration: 35230
loss: 1.1023309230804443,grad_norm: 0.9999992164922349, iteration: 35231
loss: 1.0171784162521362,grad_norm: 0.9999991966916169, iteration: 35232
loss: 1.0714340209960938,grad_norm: 0.9999995913540922, iteration: 35233
loss: 1.0523838996887207,grad_norm: 0.9654562403660698, iteration: 35234
loss: 1.086363673210144,grad_norm: 0.9999992594609175, iteration: 35235
loss: 0.9758746027946472,grad_norm: 0.9999992861135977, iteration: 35236
loss: 1.0287574529647827,grad_norm: 0.9999993053925106, iteration: 35237
loss: 1.0155946016311646,grad_norm: 0.9567160563978101, iteration: 35238
loss: 1.0592851638793945,grad_norm: 0.9999993059451021, iteration: 35239
loss: 1.0282001495361328,grad_norm: 0.9491380770043523, iteration: 35240
loss: 1.0377897024154663,grad_norm: 0.9999989756469881, iteration: 35241
loss: 1.014428973197937,grad_norm: 0.900633057662927, iteration: 35242
loss: 0.9789552688598633,grad_norm: 0.917207940563672, iteration: 35243
loss: 1.008049488067627,grad_norm: 0.9999991054688989, iteration: 35244
loss: 1.0191770792007446,grad_norm: 0.999999138006316, iteration: 35245
loss: 1.074915885925293,grad_norm: 0.9999994684294536, iteration: 35246
loss: 1.0362412929534912,grad_norm: 0.999999115118151, iteration: 35247
loss: 0.9915023446083069,grad_norm: 0.999999046094159, iteration: 35248
loss: 1.01833176612854,grad_norm: 0.854171799826845, iteration: 35249
loss: 0.979650616645813,grad_norm: 0.9851819683279235, iteration: 35250
loss: 1.020870566368103,grad_norm: 0.977901897279243, iteration: 35251
loss: 1.0470283031463623,grad_norm: 0.9999991826326949, iteration: 35252
loss: 0.9809901714324951,grad_norm: 0.8499029323467645, iteration: 35253
loss: 1.019118070602417,grad_norm: 0.9593782326212341, iteration: 35254
loss: 0.9973214268684387,grad_norm: 0.7555704829069313, iteration: 35255
loss: 0.9976620078086853,grad_norm: 0.8790099701829766, iteration: 35256
loss: 0.9980433583259583,grad_norm: 0.9268307372821555, iteration: 35257
loss: 1.0115886926651,grad_norm: 0.9999989423710736, iteration: 35258
loss: 0.9927417039871216,grad_norm: 0.9999990665686761, iteration: 35259
loss: 1.0190404653549194,grad_norm: 0.8999274735700104, iteration: 35260
loss: 1.0133780241012573,grad_norm: 0.8367051735807859, iteration: 35261
loss: 1.0243127346038818,grad_norm: 0.9999992783351198, iteration: 35262
loss: 1.0691531896591187,grad_norm: 0.9999990458143011, iteration: 35263
loss: 0.9698627591133118,grad_norm: 0.7758372151019193, iteration: 35264
loss: 0.9997520446777344,grad_norm: 0.9999990922159814, iteration: 35265
loss: 1.0370889902114868,grad_norm: 0.9838509081937904, iteration: 35266
loss: 1.010102391242981,grad_norm: 0.8807178066731882, iteration: 35267
loss: 0.9895973801612854,grad_norm: 0.9185163164003218, iteration: 35268
loss: 1.01323664188385,grad_norm: 0.9999992708404581, iteration: 35269
loss: 0.9934072494506836,grad_norm: 0.9999990516784961, iteration: 35270
loss: 0.995972752571106,grad_norm: 0.7515445513270217, iteration: 35271
loss: 0.9770426154136658,grad_norm: 0.9999991999957669, iteration: 35272
loss: 0.9771870970726013,grad_norm: 0.9579996110561171, iteration: 35273
loss: 1.0031683444976807,grad_norm: 0.9999993336332716, iteration: 35274
loss: 1.008713722229004,grad_norm: 0.9965585213950252, iteration: 35275
loss: 0.9448555111885071,grad_norm: 0.9462956723225713, iteration: 35276
loss: 0.9741153120994568,grad_norm: 0.7965426852869384, iteration: 35277
loss: 1.0282155275344849,grad_norm: 0.9999994633603415, iteration: 35278
loss: 1.0263487100601196,grad_norm: 0.9999993066722834, iteration: 35279
loss: 1.0701782703399658,grad_norm: 0.9999996098518471, iteration: 35280
loss: 1.0194780826568604,grad_norm: 0.9999996114586231, iteration: 35281
loss: 0.9976381063461304,grad_norm: 0.9733259158759376, iteration: 35282
loss: 0.9660106301307678,grad_norm: 0.9999992165900936, iteration: 35283
loss: 1.0325992107391357,grad_norm: 0.9436513911172704, iteration: 35284
loss: 1.018085241317749,grad_norm: 0.9999995183756731, iteration: 35285
loss: 0.9695746898651123,grad_norm: 0.8864876951103875, iteration: 35286
loss: 0.9835017919540405,grad_norm: 0.99999951589492, iteration: 35287
loss: 1.00070321559906,grad_norm: 0.9886807739479231, iteration: 35288
loss: 1.0155616998672485,grad_norm: 0.9586246275537627, iteration: 35289
loss: 1.008190631866455,grad_norm: 0.8522841303908701, iteration: 35290
loss: 1.0121644735336304,grad_norm: 0.9999990925742103, iteration: 35291
loss: 1.0027979612350464,grad_norm: 0.9999994489401175, iteration: 35292
loss: 1.0070269107818604,grad_norm: 0.9836293500791836, iteration: 35293
loss: 0.9760108590126038,grad_norm: 0.9478083449602273, iteration: 35294
loss: 1.0374089479446411,grad_norm: 0.9999993129724005, iteration: 35295
loss: 0.9996205568313599,grad_norm: 0.9191081074966804, iteration: 35296
loss: 1.0387086868286133,grad_norm: 0.865429322668243, iteration: 35297
loss: 0.9952028393745422,grad_norm: 0.9999990823020288, iteration: 35298
loss: 1.0598716735839844,grad_norm: 0.879612609995249, iteration: 35299
loss: 1.0292810201644897,grad_norm: 0.999999348839194, iteration: 35300
loss: 1.0255662202835083,grad_norm: 0.7989980882452236, iteration: 35301
loss: 1.0134015083312988,grad_norm: 0.9925606440363735, iteration: 35302
loss: 1.0084309577941895,grad_norm: 0.821635030086867, iteration: 35303
loss: 0.9949681162834167,grad_norm: 0.9999992489762494, iteration: 35304
loss: 1.0120481252670288,grad_norm: 0.8754077254375076, iteration: 35305
loss: 1.0251729488372803,grad_norm: 0.9999991228106546, iteration: 35306
loss: 1.0198942422866821,grad_norm: 0.9999997548929785, iteration: 35307
loss: 1.0282167196273804,grad_norm: 0.9999994582557462, iteration: 35308
loss: 1.014270544052124,grad_norm: 0.9999990170138435, iteration: 35309
loss: 1.0067068338394165,grad_norm: 0.999999291152783, iteration: 35310
loss: 1.008196234703064,grad_norm: 0.9999990260275439, iteration: 35311
loss: 1.0156235694885254,grad_norm: 0.9535075189999906, iteration: 35312
loss: 1.0040616989135742,grad_norm: 0.9999990883161373, iteration: 35313
loss: 0.9765995740890503,grad_norm: 0.9999990158668315, iteration: 35314
loss: 0.9750180244445801,grad_norm: 0.9839429923137823, iteration: 35315
loss: 1.0037519931793213,grad_norm: 0.9999991743530802, iteration: 35316
loss: 0.9983348846435547,grad_norm: 0.9999992410085686, iteration: 35317
loss: 1.04020357131958,grad_norm: 0.8865393012511463, iteration: 35318
loss: 0.9890193343162537,grad_norm: 0.9999994864920626, iteration: 35319
loss: 1.040324091911316,grad_norm: 0.9935665666068804, iteration: 35320
loss: 1.0326122045516968,grad_norm: 0.999999364295132, iteration: 35321
loss: 0.9582762122154236,grad_norm: 0.9999992776504761, iteration: 35322
loss: 1.049134612083435,grad_norm: 0.999999381590672, iteration: 35323
loss: 1.0964967012405396,grad_norm: 0.8809355789985375, iteration: 35324
loss: 1.037021517753601,grad_norm: 0.9999998022860825, iteration: 35325
loss: 1.040135145187378,grad_norm: 0.9999996650144338, iteration: 35326
loss: 0.9915891289710999,grad_norm: 0.9999992169310712, iteration: 35327
loss: 1.0037399530410767,grad_norm: 0.9999990899886938, iteration: 35328
loss: 1.0194119215011597,grad_norm: 0.9999994761307394, iteration: 35329
loss: 1.0200916528701782,grad_norm: 0.9720824055387332, iteration: 35330
loss: 1.0652525424957275,grad_norm: 0.9999991116627391, iteration: 35331
loss: 0.9888418912887573,grad_norm: 0.8801283925587062, iteration: 35332
loss: 0.9972859025001526,grad_norm: 0.9999991187963969, iteration: 35333
loss: 0.98530113697052,grad_norm: 0.9999991755738323, iteration: 35334
loss: 0.9922919869422913,grad_norm: 0.999999177254091, iteration: 35335
loss: 1.0589430332183838,grad_norm: 0.9999990499188164, iteration: 35336
loss: 1.0069197416305542,grad_norm: 0.8448277215000213, iteration: 35337
loss: 1.005677580833435,grad_norm: 0.8517793307234093, iteration: 35338
loss: 1.0071406364440918,grad_norm: 0.9999992955348302, iteration: 35339
loss: 0.9717667102813721,grad_norm: 0.9115577593448642, iteration: 35340
loss: 0.9917961955070496,grad_norm: 0.9999991046990666, iteration: 35341
loss: 1.0191203355789185,grad_norm: 0.9239308253723579, iteration: 35342
loss: 1.0045779943466187,grad_norm: 0.9999994895838046, iteration: 35343
loss: 1.0087571144104004,grad_norm: 0.8988338372381578, iteration: 35344
loss: 1.0130484104156494,grad_norm: 0.9503831190846445, iteration: 35345
loss: 0.9824007153511047,grad_norm: 0.9999992304825074, iteration: 35346
loss: 0.9712198972702026,grad_norm: 0.9999991802547274, iteration: 35347
loss: 0.9782369136810303,grad_norm: 0.9999990818846587, iteration: 35348
loss: 1.0015578269958496,grad_norm: 0.769652942935161, iteration: 35349
loss: 1.0029655694961548,grad_norm: 0.9999990688311262, iteration: 35350
loss: 1.0071697235107422,grad_norm: 0.7876422646315034, iteration: 35351
loss: 1.0174839496612549,grad_norm: 0.9058560223336989, iteration: 35352
loss: 1.0018028020858765,grad_norm: 0.8220077943405592, iteration: 35353
loss: 0.9830015897750854,grad_norm: 0.9994099403037489, iteration: 35354
loss: 1.0029380321502686,grad_norm: 0.9892711497386297, iteration: 35355
loss: 1.0070408582687378,grad_norm: 0.9999992149467111, iteration: 35356
loss: 1.0654102563858032,grad_norm: 0.9619624819825732, iteration: 35357
loss: 1.009486436843872,grad_norm: 0.9999998295468268, iteration: 35358
loss: 0.9964357614517212,grad_norm: 0.9082294878962567, iteration: 35359
loss: 0.9950519800186157,grad_norm: 0.9999992117850397, iteration: 35360
loss: 1.0396915674209595,grad_norm: 0.999999746549026, iteration: 35361
loss: 1.004489541053772,grad_norm: 0.9999990929761199, iteration: 35362
loss: 0.9963111281394958,grad_norm: 0.9999993090941673, iteration: 35363
loss: 1.027256965637207,grad_norm: 0.9690435814147748, iteration: 35364
loss: 1.0915888547897339,grad_norm: 0.9999996150665226, iteration: 35365
loss: 1.0087860822677612,grad_norm: 0.9830198896606849, iteration: 35366
loss: 1.0263253450393677,grad_norm: 0.8979490575684765, iteration: 35367
loss: 0.9872618317604065,grad_norm: 0.898101453404763, iteration: 35368
loss: 1.0487086772918701,grad_norm: 0.9999991629157785, iteration: 35369
loss: 0.9951522946357727,grad_norm: 0.9253860542729551, iteration: 35370
loss: 1.0387052297592163,grad_norm: 0.9999991660717692, iteration: 35371
loss: 1.0365819931030273,grad_norm: 0.9619512555971795, iteration: 35372
loss: 1.0539876222610474,grad_norm: 0.9999993745071972, iteration: 35373
loss: 0.9524831175804138,grad_norm: 0.9437623304939687, iteration: 35374
loss: 1.0115524530410767,grad_norm: 0.9620154669341683, iteration: 35375
loss: 1.0109992027282715,grad_norm: 0.9999991815924868, iteration: 35376
loss: 1.0167310237884521,grad_norm: 0.9208859175094846, iteration: 35377
loss: 1.0352468490600586,grad_norm: 0.9999990992858167, iteration: 35378
loss: 0.9963770508766174,grad_norm: 0.9999989742759875, iteration: 35379
loss: 0.984355092048645,grad_norm: 0.9999991822351778, iteration: 35380
loss: 1.0000382661819458,grad_norm: 0.941424148420844, iteration: 35381
loss: 0.9859115481376648,grad_norm: 0.9999991000837977, iteration: 35382
loss: 0.9765700697898865,grad_norm: 0.9363500937612774, iteration: 35383
loss: 1.0114678144454956,grad_norm: 0.9111333020806214, iteration: 35384
loss: 0.9649181365966797,grad_norm: 0.9528170951745267, iteration: 35385
loss: 0.9858732223510742,grad_norm: 0.9952246453656054, iteration: 35386
loss: 1.0180678367614746,grad_norm: 0.99999924419923, iteration: 35387
loss: 1.0151455402374268,grad_norm: 0.9249930650988654, iteration: 35388
loss: 0.9872534871101379,grad_norm: 0.9116248470950703, iteration: 35389
loss: 1.0238933563232422,grad_norm: 0.9203093140121203, iteration: 35390
loss: 1.0111777782440186,grad_norm: 0.8209055345568185, iteration: 35391
loss: 0.9540555477142334,grad_norm: 0.9999991406861173, iteration: 35392
loss: 0.9635921120643616,grad_norm: 0.9999994154470967, iteration: 35393
loss: 1.0181238651275635,grad_norm: 0.9848465199269462, iteration: 35394
loss: 0.9847137331962585,grad_norm: 0.9777602834616426, iteration: 35395
loss: 1.0770729780197144,grad_norm: 0.9999992036453542, iteration: 35396
loss: 1.022682547569275,grad_norm: 0.9737934129337513, iteration: 35397
loss: 1.0206851959228516,grad_norm: 0.9999995303134254, iteration: 35398
loss: 0.9955870509147644,grad_norm: 0.8384796527482237, iteration: 35399
loss: 0.9960843324661255,grad_norm: 0.9581215572874934, iteration: 35400
loss: 1.0783674716949463,grad_norm: 0.9999995184823606, iteration: 35401
loss: 1.0471223592758179,grad_norm: 0.9999991412733976, iteration: 35402
loss: 1.0125797986984253,grad_norm: 0.901017483010677, iteration: 35403
loss: 1.0167427062988281,grad_norm: 0.9541070499780355, iteration: 35404
loss: 0.9615599513053894,grad_norm: 0.891966087487269, iteration: 35405
loss: 1.0061562061309814,grad_norm: 0.9999991515143823, iteration: 35406
loss: 1.0075222253799438,grad_norm: 0.8745923530736992, iteration: 35407
loss: 1.030641794204712,grad_norm: 0.9916960111505124, iteration: 35408
loss: 1.0028396844863892,grad_norm: 0.9508971618292011, iteration: 35409
loss: 1.0264503955841064,grad_norm: 0.999999178143286, iteration: 35410
loss: 1.1155974864959717,grad_norm: 0.9999997008920007, iteration: 35411
loss: 1.009010910987854,grad_norm: 0.7786216651145856, iteration: 35412
loss: 1.0506343841552734,grad_norm: 0.9999992535259671, iteration: 35413
loss: 0.989633321762085,grad_norm: 0.8558377564680889, iteration: 35414
loss: 0.992224395275116,grad_norm: 0.7609147227621035, iteration: 35415
loss: 0.9835205078125,grad_norm: 0.9860218028049158, iteration: 35416
loss: 0.9781422019004822,grad_norm: 0.9999990442153612, iteration: 35417
loss: 0.9994770884513855,grad_norm: 0.9792835266689528, iteration: 35418
loss: 1.0077062845230103,grad_norm: 0.9481927898837508, iteration: 35419
loss: 1.0131455659866333,grad_norm: 0.9813577410131774, iteration: 35420
loss: 1.0271389484405518,grad_norm: 0.9999998673061793, iteration: 35421
loss: 1.0093389749526978,grad_norm: 0.9277139863593086, iteration: 35422
loss: 1.0139775276184082,grad_norm: 0.9028506718571513, iteration: 35423
loss: 1.0497078895568848,grad_norm: 0.9999991136276744, iteration: 35424
loss: 1.034961462020874,grad_norm: 0.9999991811056655, iteration: 35425
loss: 1.0117809772491455,grad_norm: 0.9999993976652846, iteration: 35426
loss: 1.030510425567627,grad_norm: 0.999999538250596, iteration: 35427
loss: 1.018860936164856,grad_norm: 0.9420991580998083, iteration: 35428
loss: 0.9453273415565491,grad_norm: 0.9007368790842393, iteration: 35429
loss: 0.9741466045379639,grad_norm: 0.9999990143638903, iteration: 35430
loss: 0.9996868371963501,grad_norm: 0.9973574523784333, iteration: 35431
loss: 1.0066934823989868,grad_norm: 0.9995284581863941, iteration: 35432
loss: 1.0198252201080322,grad_norm: 0.9913489668841035, iteration: 35433
loss: 1.0400962829589844,grad_norm: 0.8905838186955806, iteration: 35434
loss: 1.0203813314437866,grad_norm: 0.9999990421203457, iteration: 35435
loss: 0.9758716225624084,grad_norm: 0.9982402226900593, iteration: 35436
loss: 1.037746548652649,grad_norm: 0.9999993459529646, iteration: 35437
loss: 0.994996964931488,grad_norm: 0.9999989033187956, iteration: 35438
loss: 1.0800079107284546,grad_norm: 0.9999996167256817, iteration: 35439
loss: 1.0022616386413574,grad_norm: 0.9999996391889231, iteration: 35440
loss: 0.9979072213172913,grad_norm: 0.820866416291005, iteration: 35441
loss: 0.9867144823074341,grad_norm: 0.9999991823460975, iteration: 35442
loss: 1.0040642023086548,grad_norm: 0.9999989475490526, iteration: 35443
loss: 0.9899082183837891,grad_norm: 0.9545317397709534, iteration: 35444
loss: 1.0226621627807617,grad_norm: 0.9999994153127418, iteration: 35445
loss: 0.9775359630584717,grad_norm: 0.9699881437389136, iteration: 35446
loss: 0.9983701109886169,grad_norm: 0.9833388438243658, iteration: 35447
loss: 0.989932119846344,grad_norm: 0.8175407374926795, iteration: 35448
loss: 0.9776480793952942,grad_norm: 0.9168633452712681, iteration: 35449
loss: 1.0315909385681152,grad_norm: 0.9999990842517535, iteration: 35450
loss: 0.9993242025375366,grad_norm: 0.8607543845285102, iteration: 35451
loss: 1.02082097530365,grad_norm: 0.9999991578477875, iteration: 35452
loss: 1.0192313194274902,grad_norm: 0.7903588186433483, iteration: 35453
loss: 1.008573055267334,grad_norm: 0.9999990487334383, iteration: 35454
loss: 0.9776888489723206,grad_norm: 0.8667181893311433, iteration: 35455
loss: 1.0316051244735718,grad_norm: 0.9999997785016445, iteration: 35456
loss: 1.0269747972488403,grad_norm: 0.9999990906440273, iteration: 35457
loss: 0.9710857272148132,grad_norm: 0.9657122115791, iteration: 35458
loss: 1.0176650285720825,grad_norm: 0.9999999328977566, iteration: 35459
loss: 1.0020809173583984,grad_norm: 0.9672281745455814, iteration: 35460
loss: 1.0189716815948486,grad_norm: 0.9806486635684485, iteration: 35461
loss: 0.9964110851287842,grad_norm: 0.9999991939237526, iteration: 35462
loss: 0.9766594767570496,grad_norm: 0.9999991499784397, iteration: 35463
loss: 1.0169832706451416,grad_norm: 0.8280728060749968, iteration: 35464
loss: 0.9857249855995178,grad_norm: 0.9089907878280313, iteration: 35465
loss: 1.020228385925293,grad_norm: 0.9999990424856429, iteration: 35466
loss: 1.01596200466156,grad_norm: 0.933474465547375, iteration: 35467
loss: 1.0639569759368896,grad_norm: 0.9999993351091654, iteration: 35468
loss: 1.015791416168213,grad_norm: 0.9465316834642845, iteration: 35469
loss: 0.9858991503715515,grad_norm: 0.8928709331027643, iteration: 35470
loss: 1.0355554819107056,grad_norm: 0.8035375509116001, iteration: 35471
loss: 0.9888024926185608,grad_norm: 0.9748764351874825, iteration: 35472
loss: 1.0084984302520752,grad_norm: 0.999999167120444, iteration: 35473
loss: 1.0320347547531128,grad_norm: 0.9999991143325064, iteration: 35474
loss: 1.0265834331512451,grad_norm: 0.9999991114895258, iteration: 35475
loss: 1.0405465364456177,grad_norm: 0.9999998374522835, iteration: 35476
loss: 1.0498826503753662,grad_norm: 0.8924656856550205, iteration: 35477
loss: 0.9993404746055603,grad_norm: 0.9743519954314535, iteration: 35478
loss: 1.069459319114685,grad_norm: 0.9431056766126465, iteration: 35479
loss: 0.9881142973899841,grad_norm: 0.8923712912385853, iteration: 35480
loss: 0.9969035387039185,grad_norm: 0.9999990672363603, iteration: 35481
loss: 1.0260791778564453,grad_norm: 0.9999996681197248, iteration: 35482
loss: 1.0403287410736084,grad_norm: 0.9999993628304521, iteration: 35483
loss: 1.0232322216033936,grad_norm: 0.9999995134694376, iteration: 35484
loss: 1.0322365760803223,grad_norm: 0.999999704551223, iteration: 35485
loss: 1.018642544746399,grad_norm: 0.9999990303604168, iteration: 35486
loss: 1.0309619903564453,grad_norm: 0.9999995469770963, iteration: 35487
loss: 0.9995855093002319,grad_norm: 0.999999185990342, iteration: 35488
loss: 1.1349828243255615,grad_norm: 0.9999997851800392, iteration: 35489
loss: 0.9840655326843262,grad_norm: 0.9999990517256097, iteration: 35490
loss: 1.0333484411239624,grad_norm: 0.9999990536118604, iteration: 35491
loss: 1.0285439491271973,grad_norm: 0.8555224256182258, iteration: 35492
loss: 1.021775722503662,grad_norm: 0.9999994725639089, iteration: 35493
loss: 0.992711067199707,grad_norm: 0.9694546458551713, iteration: 35494
loss: 1.0205212831497192,grad_norm: 0.9316353186748446, iteration: 35495
loss: 0.9807143211364746,grad_norm: 0.9999993988387403, iteration: 35496
loss: 0.982864260673523,grad_norm: 0.9999992982987113, iteration: 35497
loss: 1.0102908611297607,grad_norm: 0.9999989332615736, iteration: 35498
loss: 1.0105700492858887,grad_norm: 0.8205126827143869, iteration: 35499
loss: 1.0114541053771973,grad_norm: 0.999999137650288, iteration: 35500
loss: 1.0390535593032837,grad_norm: 0.9999997489391795, iteration: 35501
loss: 1.016520619392395,grad_norm: 0.9999994228132911, iteration: 35502
loss: 1.0381962060928345,grad_norm: 0.996085007233721, iteration: 35503
loss: 1.03828763961792,grad_norm: 0.9999992064882898, iteration: 35504
loss: 1.0482869148254395,grad_norm: 0.9999993539671755, iteration: 35505
loss: 1.0170283317565918,grad_norm: 0.9317394779159417, iteration: 35506
loss: 0.9765132069587708,grad_norm: 0.9999990048729214, iteration: 35507
loss: 0.980992317199707,grad_norm: 0.9999992803166263, iteration: 35508
loss: 1.0329471826553345,grad_norm: 0.9999994038109092, iteration: 35509
loss: 0.9857279062271118,grad_norm: 0.9010868412354243, iteration: 35510
loss: 1.0115506649017334,grad_norm: 0.9999998277870717, iteration: 35511
loss: 1.0062416791915894,grad_norm: 0.9999996171488629, iteration: 35512
loss: 1.0164330005645752,grad_norm: 0.9999992647334165, iteration: 35513
loss: 1.027448296546936,grad_norm: 0.999999569418126, iteration: 35514
loss: 1.0429754257202148,grad_norm: 0.999999430507869, iteration: 35515
loss: 0.9928043484687805,grad_norm: 0.9999994134015397, iteration: 35516
loss: 1.0162626504898071,grad_norm: 0.9024224598417611, iteration: 35517
loss: 1.034989595413208,grad_norm: 0.9999993302160557, iteration: 35518
loss: 1.0095933675765991,grad_norm: 0.8315911622353261, iteration: 35519
loss: 1.0203391313552856,grad_norm: 0.9999991114179303, iteration: 35520
loss: 1.024336576461792,grad_norm: 0.9999991375970033, iteration: 35521
loss: 0.9850736260414124,grad_norm: 0.999999220100804, iteration: 35522
loss: 1.0664687156677246,grad_norm: 0.9999993669384772, iteration: 35523
loss: 0.9702025055885315,grad_norm: 0.9999991873373255, iteration: 35524
loss: 1.091246247291565,grad_norm: 0.9999993657209632, iteration: 35525
loss: 0.9792383909225464,grad_norm: 0.9999991039108551, iteration: 35526
loss: 0.9728142619132996,grad_norm: 0.999999022642933, iteration: 35527
loss: 1.0074188709259033,grad_norm: 0.7590559763791658, iteration: 35528
loss: 1.0499801635742188,grad_norm: 0.9999995379267148, iteration: 35529
loss: 1.0274875164031982,grad_norm: 0.9999991739217613, iteration: 35530
loss: 1.0228215456008911,grad_norm: 0.9999992878793068, iteration: 35531
loss: 1.0613807439804077,grad_norm: 0.9999999038745474, iteration: 35532
loss: 0.9760310053825378,grad_norm: 0.9114689517174926, iteration: 35533
loss: 1.054732322692871,grad_norm: 0.9999992055682222, iteration: 35534
loss: 1.0252552032470703,grad_norm: 0.902830882283401, iteration: 35535
loss: 1.0505337715148926,grad_norm: 0.9999995710889354, iteration: 35536
loss: 1.043354868888855,grad_norm: 0.999999686691635, iteration: 35537
loss: 1.0314174890518188,grad_norm: 0.970964650254504, iteration: 35538
loss: 0.9837314486503601,grad_norm: 0.9284871313684951, iteration: 35539
loss: 1.0112981796264648,grad_norm: 0.9999991316202628, iteration: 35540
loss: 1.0480670928955078,grad_norm: 0.9999995593834506, iteration: 35541
loss: 1.052188515663147,grad_norm: 0.9999995888592628, iteration: 35542
loss: 1.034785509109497,grad_norm: 0.9748062959740497, iteration: 35543
loss: 1.0317869186401367,grad_norm: 0.9115754311141724, iteration: 35544
loss: 0.9963077306747437,grad_norm: 0.9999990625956102, iteration: 35545
loss: 1.0199464559555054,grad_norm: 0.9999991214584483, iteration: 35546
loss: 1.071260690689087,grad_norm: 0.9999993903929355, iteration: 35547
loss: 1.01199471950531,grad_norm: 0.794397077369971, iteration: 35548
loss: 0.9816179275512695,grad_norm: 0.9346992587820309, iteration: 35549
loss: 1.012671709060669,grad_norm: 0.9999989391496042, iteration: 35550
loss: 1.0185799598693848,grad_norm: 0.9382370516194292, iteration: 35551
loss: 0.983343243598938,grad_norm: 0.8683939958062493, iteration: 35552
loss: 1.0055686235427856,grad_norm: 0.9898552681688081, iteration: 35553
loss: 1.0165129899978638,grad_norm: 0.9389561380348435, iteration: 35554
loss: 0.9710517525672913,grad_norm: 0.9999990260121537, iteration: 35555
loss: 1.032646894454956,grad_norm: 0.9999992148527538, iteration: 35556
loss: 0.9750834703445435,grad_norm: 0.9999989377402185, iteration: 35557
loss: 1.0061308145523071,grad_norm: 0.9999990950437513, iteration: 35558
loss: 1.0044931173324585,grad_norm: 0.999999869866385, iteration: 35559
loss: 0.9833543300628662,grad_norm: 0.999998944281352, iteration: 35560
loss: 0.9589187502861023,grad_norm: 0.9160890878924952, iteration: 35561
loss: 1.0208271741867065,grad_norm: 0.9999993341230293, iteration: 35562
loss: 1.0307146310806274,grad_norm: 0.8244078175786855, iteration: 35563
loss: 0.9645658135414124,grad_norm: 0.9999990052748362, iteration: 35564
loss: 1.0626018047332764,grad_norm: 0.9999999226570219, iteration: 35565
loss: 1.0246244668960571,grad_norm: 0.8125156523180818, iteration: 35566
loss: 0.9947295784950256,grad_norm: 0.9999994297939019, iteration: 35567
loss: 1.0162005424499512,grad_norm: 0.9999990766122279, iteration: 35568
loss: 1.01205575466156,grad_norm: 0.8607997567278189, iteration: 35569
loss: 1.0014004707336426,grad_norm: 0.9443277321963636, iteration: 35570
loss: 1.026050090789795,grad_norm: 0.9527252329956414, iteration: 35571
loss: 1.0256624221801758,grad_norm: 0.9999992592780068, iteration: 35572
loss: 1.113427996635437,grad_norm: 0.9999998416010614, iteration: 35573
loss: 1.0017194747924805,grad_norm: 0.9082407153396743, iteration: 35574
loss: 1.0221513509750366,grad_norm: 0.9999991794907148, iteration: 35575
loss: 1.0246912240982056,grad_norm: 0.999999990375579, iteration: 35576
loss: 1.0413347482681274,grad_norm: 0.9999990318866824, iteration: 35577
loss: 0.9720731973648071,grad_norm: 0.8528392857650045, iteration: 35578
loss: 1.0039985179901123,grad_norm: 0.9999988821163874, iteration: 35579
loss: 1.0357179641723633,grad_norm: 0.9999991793509206, iteration: 35580
loss: 0.9951320290565491,grad_norm: 0.9999993062791973, iteration: 35581
loss: 1.0450427532196045,grad_norm: 0.9155510395974218, iteration: 35582
loss: 0.9995511770248413,grad_norm: 0.9999991279930592, iteration: 35583
loss: 1.0816287994384766,grad_norm: 0.9999998691138257, iteration: 35584
loss: 1.019193172454834,grad_norm: 0.9999996733529248, iteration: 35585
loss: 1.0118376016616821,grad_norm: 0.8423564419392952, iteration: 35586
loss: 1.0031160116195679,grad_norm: 0.9577875828943949, iteration: 35587
loss: 0.9671671986579895,grad_norm: 0.9999990180865116, iteration: 35588
loss: 1.0007601976394653,grad_norm: 0.9160174722788357, iteration: 35589
loss: 0.9942450523376465,grad_norm: 0.9999990948066366, iteration: 35590
loss: 1.005578637123108,grad_norm: 0.9999994068099967, iteration: 35591
loss: 1.0050592422485352,grad_norm: 0.9999993842845579, iteration: 35592
loss: 1.0283781290054321,grad_norm: 0.999999028081447, iteration: 35593
loss: 1.029262661933899,grad_norm: 0.9999991066769106, iteration: 35594
loss: 0.9913781881332397,grad_norm: 0.9999991339881877, iteration: 35595
loss: 1.0886085033416748,grad_norm: 0.9999992709242983, iteration: 35596
loss: 1.0106850862503052,grad_norm: 0.9999993001730528, iteration: 35597
loss: 1.0794663429260254,grad_norm: 0.9743825879843535, iteration: 35598
loss: 1.1568540334701538,grad_norm: 0.9999992088231636, iteration: 35599
loss: 1.0439406633377075,grad_norm: 0.9999993089832178, iteration: 35600
loss: 1.097382664680481,grad_norm: 0.9786160769856389, iteration: 35601
loss: 1.004004955291748,grad_norm: 0.9999991652144721, iteration: 35602
loss: 0.9963317513465881,grad_norm: 0.9999995060117463, iteration: 35603
loss: 1.0068928003311157,grad_norm: 0.8253199618154627, iteration: 35604
loss: 1.019001841545105,grad_norm: 0.9999990375189148, iteration: 35605
loss: 1.0165239572525024,grad_norm: 0.9999990344191929, iteration: 35606
loss: 0.9810693860054016,grad_norm: 0.9999989900974601, iteration: 35607
loss: 0.98603755235672,grad_norm: 0.8884686687436205, iteration: 35608
loss: 1.0626989603042603,grad_norm: 0.8961657107255309, iteration: 35609
loss: 1.0206658840179443,grad_norm: 0.9845433986016883, iteration: 35610
loss: 1.0199781656265259,grad_norm: 0.8696944796647894, iteration: 35611
loss: 1.0045133829116821,grad_norm: 0.9999992013628042, iteration: 35612
loss: 1.0067349672317505,grad_norm: 0.9999991080752618, iteration: 35613
loss: 1.0933884382247925,grad_norm: 0.9999990577717013, iteration: 35614
loss: 1.0451364517211914,grad_norm: 0.9999998794833337, iteration: 35615
loss: 1.0267106294631958,grad_norm: 0.9999991331865073, iteration: 35616
loss: 1.0205893516540527,grad_norm: 0.9999992562432537, iteration: 35617
loss: 1.0065377950668335,grad_norm: 0.9561562225573609, iteration: 35618
loss: 1.0566705465316772,grad_norm: 0.9999991699796057, iteration: 35619
loss: 0.9839144945144653,grad_norm: 0.9999990858700312, iteration: 35620
loss: 1.0110381841659546,grad_norm: 0.9999992735147379, iteration: 35621
loss: 0.997320294380188,grad_norm: 0.9999993348455342, iteration: 35622
loss: 0.9540663361549377,grad_norm: 0.9513569706113701, iteration: 35623
loss: 1.0051400661468506,grad_norm: 0.9631024553205694, iteration: 35624
loss: 0.9689204096794128,grad_norm: 0.8838514019474183, iteration: 35625
loss: 1.0361336469650269,grad_norm: 0.9999991703995621, iteration: 35626
loss: 0.9720031023025513,grad_norm: 0.9955696762273776, iteration: 35627
loss: 0.9733081459999084,grad_norm: 0.9999991516229109, iteration: 35628
loss: 1.0301164388656616,grad_norm: 0.9999996933346694, iteration: 35629
loss: 0.9769489765167236,grad_norm: 0.9507370956681277, iteration: 35630
loss: 1.0367660522460938,grad_norm: 0.947871178860206, iteration: 35631
loss: 1.009588599205017,grad_norm: 0.999999162761633, iteration: 35632
loss: 0.9958145022392273,grad_norm: 0.8907036511741477, iteration: 35633
loss: 1.0210645198822021,grad_norm: 0.9999990630999975, iteration: 35634
loss: 1.0037685632705688,grad_norm: 0.8928452608376325, iteration: 35635
loss: 1.0190178155899048,grad_norm: 0.9999994090428183, iteration: 35636
loss: 1.005945086479187,grad_norm: 0.9999990077603601, iteration: 35637
loss: 1.086028814315796,grad_norm: 0.9999995003123522, iteration: 35638
loss: 1.0231503248214722,grad_norm: 0.9239347325930561, iteration: 35639
loss: 1.164847731590271,grad_norm: 0.9999998827256255, iteration: 35640
loss: 1.0125566720962524,grad_norm: 0.999999092958668, iteration: 35641
loss: 1.0096498727798462,grad_norm: 0.9999990547537304, iteration: 35642
loss: 0.9961221814155579,grad_norm: 0.9999994436825818, iteration: 35643
loss: 1.002867579460144,grad_norm: 0.9999989680946301, iteration: 35644
loss: 1.0027544498443604,grad_norm: 0.999999225635093, iteration: 35645
loss: 1.0099490880966187,grad_norm: 0.9635367302623737, iteration: 35646
loss: 1.0312644243240356,grad_norm: 0.8067556909711692, iteration: 35647
loss: 1.048761248588562,grad_norm: 0.9999997494864616, iteration: 35648
loss: 0.9999567866325378,grad_norm: 0.9999991838960129, iteration: 35649
loss: 0.9668734669685364,grad_norm: 0.999999091599535, iteration: 35650
loss: 1.0092800855636597,grad_norm: 0.953092512941144, iteration: 35651
loss: 1.0395041704177856,grad_norm: 0.9999990121802129, iteration: 35652
loss: 1.017155647277832,grad_norm: 0.8683537459711428, iteration: 35653
loss: 1.0056127309799194,grad_norm: 0.8559733423115521, iteration: 35654
loss: 0.998075008392334,grad_norm: 0.9530240639885688, iteration: 35655
loss: 0.9714679718017578,grad_norm: 0.9466269525160605, iteration: 35656
loss: 1.0043821334838867,grad_norm: 0.9999989995175349, iteration: 35657
loss: 1.1490916013717651,grad_norm: 0.9999999052958998, iteration: 35658
loss: 0.9751259684562683,grad_norm: 0.9999991946908066, iteration: 35659
loss: 1.0268337726593018,grad_norm: 0.9999991942946135, iteration: 35660
loss: 0.975117027759552,grad_norm: 0.9999991217549141, iteration: 35661
loss: 0.9782366156578064,grad_norm: 0.999999015508456, iteration: 35662
loss: 1.0026687383651733,grad_norm: 0.9230325996834862, iteration: 35663
loss: 1.000504970550537,grad_norm: 0.8767066977785543, iteration: 35664
loss: 1.005376935005188,grad_norm: 0.9067629181591718, iteration: 35665
loss: 1.0253851413726807,grad_norm: 0.9569180573792823, iteration: 35666
loss: 1.015763759613037,grad_norm: 0.9890412349660856, iteration: 35667
loss: 1.019568681716919,grad_norm: 0.9999996635372858, iteration: 35668
loss: 0.9959374070167542,grad_norm: 0.8730494361534114, iteration: 35669
loss: 0.988094687461853,grad_norm: 0.9223846297292405, iteration: 35670
loss: 1.0521351099014282,grad_norm: 0.9999992707707588, iteration: 35671
loss: 0.9722927808761597,grad_norm: 0.9413288448963416, iteration: 35672
loss: 1.0258928537368774,grad_norm: 0.9999990715453868, iteration: 35673
loss: 0.9750846028327942,grad_norm: 0.9342600147639534, iteration: 35674
loss: 1.0650538206100464,grad_norm: 0.999999451363224, iteration: 35675
loss: 1.0088965892791748,grad_norm: 0.9999990922527965, iteration: 35676
loss: 0.9745896458625793,grad_norm: 0.9999991690716521, iteration: 35677
loss: 1.0482126474380493,grad_norm: 0.9999992122373967, iteration: 35678
loss: 0.9641790390014648,grad_norm: 0.9999990621443621, iteration: 35679
loss: 0.9961068630218506,grad_norm: 0.9999993088795093, iteration: 35680
loss: 1.028809666633606,grad_norm: 0.9491165988171756, iteration: 35681
loss: 0.9768789410591125,grad_norm: 0.9999991007960697, iteration: 35682
loss: 0.945177435874939,grad_norm: 0.9558708692929531, iteration: 35683
loss: 1.105269193649292,grad_norm: 0.9999998381069076, iteration: 35684
loss: 1.0128793716430664,grad_norm: 0.9999991404901722, iteration: 35685
loss: 1.0361833572387695,grad_norm: 0.9999997576545352, iteration: 35686
loss: 0.9738739728927612,grad_norm: 0.744005988536866, iteration: 35687
loss: 1.0752569437026978,grad_norm: 0.9999995311276567, iteration: 35688
loss: 1.012479543685913,grad_norm: 0.9999990421990601, iteration: 35689
loss: 1.0814194679260254,grad_norm: 0.9999993834390638, iteration: 35690
loss: 1.0270506143569946,grad_norm: 0.930806673376008, iteration: 35691
loss: 1.0113162994384766,grad_norm: 0.9999990915044294, iteration: 35692
loss: 1.0083770751953125,grad_norm: 0.9999991065470187, iteration: 35693
loss: 1.0360665321350098,grad_norm: 0.9999992087853918, iteration: 35694
loss: 1.0055040121078491,grad_norm: 0.9999991080914785, iteration: 35695
loss: 1.0042548179626465,grad_norm: 0.999999056916737, iteration: 35696
loss: 0.9693720936775208,grad_norm: 0.9999990015430241, iteration: 35697
loss: 1.0003842115402222,grad_norm: 0.9999990633730202, iteration: 35698
loss: 1.082614541053772,grad_norm: 0.9999992881779058, iteration: 35699
loss: 1.0009597539901733,grad_norm: 0.9999990519941256, iteration: 35700
loss: 1.009305477142334,grad_norm: 0.9999990474434459, iteration: 35701
loss: 1.0315419435501099,grad_norm: 0.9999993204703099, iteration: 35702
loss: 0.9967145323753357,grad_norm: 0.9999991745962541, iteration: 35703
loss: 0.9965725541114807,grad_norm: 0.9523181164311387, iteration: 35704
loss: 0.9966266751289368,grad_norm: 0.8599114683673227, iteration: 35705
loss: 1.0334407091140747,grad_norm: 0.9844361999853576, iteration: 35706
loss: 1.0189621448516846,grad_norm: 0.9999998565355763, iteration: 35707
loss: 1.0020227432250977,grad_norm: 0.8195862344664913, iteration: 35708
loss: 1.0137786865234375,grad_norm: 0.9999990518908528, iteration: 35709
loss: 1.0017683506011963,grad_norm: 0.9612421074722016, iteration: 35710
loss: 1.0079346895217896,grad_norm: 0.8871652307667866, iteration: 35711
loss: 1.0204523801803589,grad_norm: 0.9999991448448939, iteration: 35712
loss: 0.9956097602844238,grad_norm: 0.9999995876418605, iteration: 35713
loss: 1.0199427604675293,grad_norm: 0.9999999985442811, iteration: 35714
loss: 0.9801205396652222,grad_norm: 0.908037631173346, iteration: 35715
loss: 1.0010393857955933,grad_norm: 0.9956038238331981, iteration: 35716
loss: 0.989306628704071,grad_norm: 0.9238242060162991, iteration: 35717
loss: 1.0005528926849365,grad_norm: 0.9528396044878561, iteration: 35718
loss: 1.0263456106185913,grad_norm: 0.9999992499559188, iteration: 35719
loss: 0.9767603278160095,grad_norm: 0.835941485396716, iteration: 35720
loss: 1.01957368850708,grad_norm: 0.9670441421757845, iteration: 35721
loss: 1.0289833545684814,grad_norm: 0.9999990006417211, iteration: 35722
loss: 1.009299874305725,grad_norm: 0.9999991274182196, iteration: 35723
loss: 1.0959076881408691,grad_norm: 0.9999997111076294, iteration: 35724
loss: 1.0246639251708984,grad_norm: 0.9262592157054316, iteration: 35725
loss: 0.9885541796684265,grad_norm: 0.9053658277145885, iteration: 35726
loss: 1.0008790493011475,grad_norm: 0.9999991779885241, iteration: 35727
loss: 1.0137320756912231,grad_norm: 0.9999989969658674, iteration: 35728
loss: 0.9792578220367432,grad_norm: 0.8498298012629838, iteration: 35729
loss: 0.9706559777259827,grad_norm: 0.9999992727663255, iteration: 35730
loss: 0.9976814389228821,grad_norm: 0.8255729344918943, iteration: 35731
loss: 1.0061091184616089,grad_norm: 0.7708152904747233, iteration: 35732
loss: 1.0282198190689087,grad_norm: 0.9999999046845482, iteration: 35733
loss: 1.0253961086273193,grad_norm: 0.9999991404153713, iteration: 35734
loss: 0.9746365547180176,grad_norm: 0.8516633979662613, iteration: 35735
loss: 0.9893983602523804,grad_norm: 0.9999991299990438, iteration: 35736
loss: 1.030800461769104,grad_norm: 0.9999991552231594, iteration: 35737
loss: 1.0443482398986816,grad_norm: 0.9266875908789822, iteration: 35738
loss: 1.0469098091125488,grad_norm: 0.9999991728386041, iteration: 35739
loss: 0.9997727274894714,grad_norm: 0.9557425343192887, iteration: 35740
loss: 1.006515622138977,grad_norm: 0.8361918140103181, iteration: 35741
loss: 0.946343183517456,grad_norm: 0.9659262751741596, iteration: 35742
loss: 1.0607657432556152,grad_norm: 0.8564208143900734, iteration: 35743
loss: 1.0415064096450806,grad_norm: 0.9999996313619084, iteration: 35744
loss: 1.000163197517395,grad_norm: 0.9999991982938495, iteration: 35745
loss: 1.0291017293930054,grad_norm: 0.9999990854829549, iteration: 35746
loss: 1.0209448337554932,grad_norm: 0.9999995738824807, iteration: 35747
loss: 1.0203825235366821,grad_norm: 0.9999996458392819, iteration: 35748
loss: 1.0028764009475708,grad_norm: 0.9999993170771385, iteration: 35749
loss: 1.0765057802200317,grad_norm: 0.9999991755731744, iteration: 35750
loss: 1.018642783164978,grad_norm: 0.9999992243686868, iteration: 35751
loss: 0.9577907919883728,grad_norm: 0.9999991198105797, iteration: 35752
loss: 1.046802043914795,grad_norm: 0.9999992381812769, iteration: 35753
loss: 1.0434132814407349,grad_norm: 0.9999999402262094, iteration: 35754
loss: 1.0165977478027344,grad_norm: 0.99999930056411, iteration: 35755
loss: 0.9923010468482971,grad_norm: 0.9999991241226687, iteration: 35756
loss: 0.9933162331581116,grad_norm: 0.9999990791553879, iteration: 35757
loss: 1.0194191932678223,grad_norm: 0.9262902748882751, iteration: 35758
loss: 0.99903404712677,grad_norm: 0.9307809115787408, iteration: 35759
loss: 1.0316940546035767,grad_norm: 0.999999078651561, iteration: 35760
loss: 1.0344126224517822,grad_norm: 0.9999992910932469, iteration: 35761
loss: 1.2391083240509033,grad_norm: 0.9999994384364509, iteration: 35762
loss: 1.0340276956558228,grad_norm: 0.9999994193539158, iteration: 35763
loss: 0.983207106590271,grad_norm: 0.9159503484197709, iteration: 35764
loss: 1.0554348230361938,grad_norm: 0.9705582862976433, iteration: 35765
loss: 1.0024347305297852,grad_norm: 0.9999992430036642, iteration: 35766
loss: 1.0512686967849731,grad_norm: 0.9999998655845534, iteration: 35767
loss: 1.0202977657318115,grad_norm: 0.81971523789375, iteration: 35768
loss: 1.0420033931732178,grad_norm: 0.9999996107180262, iteration: 35769
loss: 0.9936696887016296,grad_norm: 0.9904585425603624, iteration: 35770
loss: 1.0075476169586182,grad_norm: 0.8905068016395177, iteration: 35771
loss: 1.027815818786621,grad_norm: 0.9999993636256191, iteration: 35772
loss: 0.9940477013587952,grad_norm: 0.9076769512018757, iteration: 35773
loss: 1.0059059858322144,grad_norm: 0.7732293795254056, iteration: 35774
loss: 1.0108909606933594,grad_norm: 0.9065794600177741, iteration: 35775
loss: 0.9757914543151855,grad_norm: 0.999999120148002, iteration: 35776
loss: 1.0140464305877686,grad_norm: 0.8272716987928243, iteration: 35777
loss: 0.9950838088989258,grad_norm: 0.9223305865658534, iteration: 35778
loss: 1.0296157598495483,grad_norm: 0.9999994465215775, iteration: 35779
loss: 1.0338579416275024,grad_norm: 0.9999995743797112, iteration: 35780
loss: 0.9867334961891174,grad_norm: 0.9999990268647955, iteration: 35781
loss: 1.032817006111145,grad_norm: 0.892913380989687, iteration: 35782
loss: 1.0009006261825562,grad_norm: 0.918823158093865, iteration: 35783
loss: 1.0026144981384277,grad_norm: 0.8248278662979007, iteration: 35784
loss: 1.0280280113220215,grad_norm: 0.9418601587482863, iteration: 35785
loss: 1.0088064670562744,grad_norm: 0.879131031983852, iteration: 35786
loss: 1.0112707614898682,grad_norm: 0.8485216843387506, iteration: 35787
loss: 1.000725507736206,grad_norm: 0.9310509677043537, iteration: 35788
loss: 0.9915593266487122,grad_norm: 0.9999991612675029, iteration: 35789
loss: 0.9926753640174866,grad_norm: 0.9999991385703352, iteration: 35790
loss: 1.0235029458999634,grad_norm: 0.9999994114871981, iteration: 35791
loss: 0.9866821765899658,grad_norm: 0.9999990613300049, iteration: 35792
loss: 0.9638307690620422,grad_norm: 0.8839948492255245, iteration: 35793
loss: 1.00846266746521,grad_norm: 0.7851916763408872, iteration: 35794
loss: 0.9746602773666382,grad_norm: 0.9999989563854759, iteration: 35795
loss: 0.9813966155052185,grad_norm: 0.9999993079417543, iteration: 35796
loss: 0.964781641960144,grad_norm: 0.9999997070451141, iteration: 35797
loss: 1.033855676651001,grad_norm: 0.9999993191883239, iteration: 35798
loss: 1.012246012687683,grad_norm: 0.7083616971923845, iteration: 35799
loss: 1.0735883712768555,grad_norm: 0.9999994264543672, iteration: 35800
loss: 1.0276377201080322,grad_norm: 0.9999990228780554, iteration: 35801
loss: 1.0142700672149658,grad_norm: 0.9999990683557661, iteration: 35802
loss: 1.0014902353286743,grad_norm: 0.9099149222933836, iteration: 35803
loss: 0.9852924346923828,grad_norm: 0.9999992689854016, iteration: 35804
loss: 0.9736833572387695,grad_norm: 0.9999992214359216, iteration: 35805
loss: 1.017283320426941,grad_norm: 0.8975223514099646, iteration: 35806
loss: 1.0068321228027344,grad_norm: 0.9343783679264305, iteration: 35807
loss: 0.99934983253479,grad_norm: 0.9054651225115962, iteration: 35808
loss: 1.0105512142181396,grad_norm: 0.9596001404861614, iteration: 35809
loss: 1.0043028593063354,grad_norm: 0.8584490578825883, iteration: 35810
loss: 0.9815609455108643,grad_norm: 0.9999990604936366, iteration: 35811
loss: 1.0267091989517212,grad_norm: 0.9999991264914492, iteration: 35812
loss: 1.0047155618667603,grad_norm: 0.9999993958865658, iteration: 35813
loss: 1.0602846145629883,grad_norm: 0.9999998040728059, iteration: 35814
loss: 1.0083153247833252,grad_norm: 0.9455310938271734, iteration: 35815
loss: 1.0673797130584717,grad_norm: 0.924453411630189, iteration: 35816
loss: 1.0219100713729858,grad_norm: 0.9636934190002734, iteration: 35817
loss: 0.9796690940856934,grad_norm: 0.9999992152410678, iteration: 35818
loss: 1.003990888595581,grad_norm: 0.999999122690554, iteration: 35819
loss: 1.0188227891921997,grad_norm: 0.9999995985827077, iteration: 35820
loss: 1.020630121231079,grad_norm: 0.9999992642433271, iteration: 35821
loss: 1.0303089618682861,grad_norm: 0.9999991924637497, iteration: 35822
loss: 1.0384728908538818,grad_norm: 0.9999992851896545, iteration: 35823
loss: 1.0381999015808105,grad_norm: 0.9999993822751473, iteration: 35824
loss: 1.0228387117385864,grad_norm: 0.9583896562835608, iteration: 35825
loss: 1.0260412693023682,grad_norm: 0.889695587239786, iteration: 35826
loss: 0.9772491455078125,grad_norm: 0.8993080015994891, iteration: 35827
loss: 0.9887438416481018,grad_norm: 0.9999996662675192, iteration: 35828
loss: 1.0044173002243042,grad_norm: 0.9999990908318775, iteration: 35829
loss: 1.0311975479125977,grad_norm: 0.9999992826198251, iteration: 35830
loss: 0.982982337474823,grad_norm: 0.8426058937251646, iteration: 35831
loss: 1.0292649269104004,grad_norm: 0.9999998874097715, iteration: 35832
loss: 1.0206868648529053,grad_norm: 0.9999994896258138, iteration: 35833
loss: 1.003244400024414,grad_norm: 0.9999992703651357, iteration: 35834
loss: 1.0148594379425049,grad_norm: 0.9853049277949203, iteration: 35835
loss: 0.9987396597862244,grad_norm: 0.9382860413094951, iteration: 35836
loss: 1.0637470483779907,grad_norm: 0.9999991453916488, iteration: 35837
loss: 1.015282154083252,grad_norm: 0.8579921904959348, iteration: 35838
loss: 1.0242283344268799,grad_norm: 0.9999993744582281, iteration: 35839
loss: 1.0089797973632812,grad_norm: 0.9999991880398135, iteration: 35840
loss: 1.0435690879821777,grad_norm: 0.999999326595561, iteration: 35841
loss: 1.0170598030090332,grad_norm: 0.999999407290333, iteration: 35842
loss: 0.9892215132713318,grad_norm: 0.999999671347661, iteration: 35843
loss: 1.0589613914489746,grad_norm: 0.9999993272060222, iteration: 35844
loss: 1.0469300746917725,grad_norm: 0.9999991365058768, iteration: 35845
loss: 1.0151150226593018,grad_norm: 0.9999991843562741, iteration: 35846
loss: 1.0282913446426392,grad_norm: 0.8730144496996258, iteration: 35847
loss: 0.9966927170753479,grad_norm: 0.7687212008383093, iteration: 35848
loss: 0.9839317798614502,grad_norm: 0.999999357237117, iteration: 35849
loss: 1.0038450956344604,grad_norm: 0.9944778384381747, iteration: 35850
loss: 1.0175138711929321,grad_norm: 0.9570599131700431, iteration: 35851
loss: 1.0072593688964844,grad_norm: 0.9999990803956447, iteration: 35852
loss: 0.9781621098518372,grad_norm: 0.9999990455414212, iteration: 35853
loss: 1.0200741291046143,grad_norm: 0.9999994156387931, iteration: 35854
loss: 0.9840168356895447,grad_norm: 0.9463316379351523, iteration: 35855
loss: 1.0405299663543701,grad_norm: 0.8024413651353116, iteration: 35856
loss: 0.9854459166526794,grad_norm: 0.9580542354219175, iteration: 35857
loss: 1.0272656679153442,grad_norm: 0.9999990888591127, iteration: 35858
loss: 0.971850574016571,grad_norm: 0.9999991029165999, iteration: 35859
loss: 1.0294538736343384,grad_norm: 0.9999990760968361, iteration: 35860
loss: 0.9755626916885376,grad_norm: 0.9999990370620546, iteration: 35861
loss: 1.0367459058761597,grad_norm: 0.999999285376077, iteration: 35862
loss: 0.980306088924408,grad_norm: 0.9460637561810635, iteration: 35863
loss: 1.0449320077896118,grad_norm: 0.9999995708590563, iteration: 35864
loss: 1.023449420928955,grad_norm: 0.9999991140666459, iteration: 35865
loss: 1.0516401529312134,grad_norm: 0.9999991381136872, iteration: 35866
loss: 1.0105003118515015,grad_norm: 0.9825689356757615, iteration: 35867
loss: 1.0141053199768066,grad_norm: 0.9200127796100593, iteration: 35868
loss: 1.0014417171478271,grad_norm: 0.9999990868873977, iteration: 35869
loss: 1.000831961631775,grad_norm: 0.9999990341178563, iteration: 35870
loss: 1.0596939325332642,grad_norm: 0.9999995657839581, iteration: 35871
loss: 1.009421467781067,grad_norm: 0.8675329007518208, iteration: 35872
loss: 1.0074899196624756,grad_norm: 0.8846513797172028, iteration: 35873
loss: 0.9736012816429138,grad_norm: 0.8872938857477363, iteration: 35874
loss: 0.988730788230896,grad_norm: 0.9104715928367628, iteration: 35875
loss: 1.0265915393829346,grad_norm: 0.9238446320487065, iteration: 35876
loss: 0.9862515330314636,grad_norm: 0.9473330450375195, iteration: 35877
loss: 1.0168566703796387,grad_norm: 0.9999989960083048, iteration: 35878
loss: 0.9897586107254028,grad_norm: 0.9999993946854994, iteration: 35879
loss: 0.9905681014060974,grad_norm: 0.9999994864851566, iteration: 35880
loss: 1.0413202047348022,grad_norm: 0.9564901552090169, iteration: 35881
loss: 0.997605562210083,grad_norm: 0.9830706543486513, iteration: 35882
loss: 1.0876928567886353,grad_norm: 0.9999991871609085, iteration: 35883
loss: 1.0329937934875488,grad_norm: 0.8760498948012754, iteration: 35884
loss: 1.0143035650253296,grad_norm: 0.9246066290681902, iteration: 35885
loss: 0.9265890121459961,grad_norm: 0.9999990396683929, iteration: 35886
loss: 0.9978508949279785,grad_norm: 0.8969767944520332, iteration: 35887
loss: 0.9881545305252075,grad_norm: 0.9999991201604312, iteration: 35888
loss: 1.0136290788650513,grad_norm: 0.9270048372094525, iteration: 35889
loss: 0.9825295805931091,grad_norm: 0.9999998001843885, iteration: 35890
loss: 1.0448954105377197,grad_norm: 0.8955122067736395, iteration: 35891
loss: 1.0086523294448853,grad_norm: 0.9999991597144742, iteration: 35892
loss: 1.0495647192001343,grad_norm: 0.8858866857922346, iteration: 35893
loss: 1.042120337486267,grad_norm: 0.9999990659774118, iteration: 35894
loss: 1.0126991271972656,grad_norm: 0.9061971994146467, iteration: 35895
loss: 1.0264129638671875,grad_norm: 0.9999990167099537, iteration: 35896
loss: 1.014864444732666,grad_norm: 0.9999993656542108, iteration: 35897
loss: 1.0068578720092773,grad_norm: 0.9999990831861187, iteration: 35898
loss: 1.0430477857589722,grad_norm: 0.999999135918547, iteration: 35899
loss: 0.9829110503196716,grad_norm: 0.9467007130345368, iteration: 35900
loss: 1.0426511764526367,grad_norm: 0.9999994635276358, iteration: 35901
loss: 0.985730767250061,grad_norm: 0.999999075458181, iteration: 35902
loss: 1.0011504888534546,grad_norm: 0.9999991327487121, iteration: 35903
loss: 1.0473957061767578,grad_norm: 0.9999992413589552, iteration: 35904
loss: 1.0324969291687012,grad_norm: 0.9999992266519315, iteration: 35905
loss: 1.0249634981155396,grad_norm: 0.9999994591082698, iteration: 35906
loss: 0.9700023531913757,grad_norm: 0.8730480469978745, iteration: 35907
loss: 0.987379252910614,grad_norm: 0.9527020296054701, iteration: 35908
loss: 0.9189417362213135,grad_norm: 0.8279588858081679, iteration: 35909
loss: 0.9990651607513428,grad_norm: 0.992984267645868, iteration: 35910
loss: 1.0107349157333374,grad_norm: 0.9119188182886967, iteration: 35911
loss: 1.0062378644943237,grad_norm: 0.9955807911045007, iteration: 35912
loss: 1.005562663078308,grad_norm: 0.8576864326071958, iteration: 35913
loss: 0.987545371055603,grad_norm: 0.8417447789877988, iteration: 35914
loss: 1.0436087846755981,grad_norm: 0.886679808453018, iteration: 35915
loss: 1.11636483669281,grad_norm: 0.999999757140384, iteration: 35916
loss: 0.9836354851722717,grad_norm: 0.8711859866469056, iteration: 35917
loss: 0.9980632662773132,grad_norm: 0.928208009913217, iteration: 35918
loss: 1.0216633081436157,grad_norm: 0.8735481113798421, iteration: 35919
loss: 1.0114433765411377,grad_norm: 0.9999991477075554, iteration: 35920
loss: 1.003639578819275,grad_norm: 0.9799520608580413, iteration: 35921
loss: 1.025214433670044,grad_norm: 0.8147158160159518, iteration: 35922
loss: 1.026581883430481,grad_norm: 0.9260067544597167, iteration: 35923
loss: 1.0156564712524414,grad_norm: 0.8278645057887537, iteration: 35924
loss: 0.9979913234710693,grad_norm: 0.9686842257578906, iteration: 35925
loss: 0.9565578103065491,grad_norm: 0.9999991131228102, iteration: 35926
loss: 1.0012600421905518,grad_norm: 0.971037592324143, iteration: 35927
loss: 1.0172370672225952,grad_norm: 0.9239107514706648, iteration: 35928
loss: 0.9874835014343262,grad_norm: 0.9398444679600807, iteration: 35929
loss: 1.0315638780593872,grad_norm: 0.9999990242892085, iteration: 35930
loss: 0.9305823445320129,grad_norm: 0.9999991077461532, iteration: 35931
loss: 0.9632322192192078,grad_norm: 0.9999991994873958, iteration: 35932
loss: 1.0100189447402954,grad_norm: 0.9999991935149067, iteration: 35933
loss: 1.0181729793548584,grad_norm: 0.8828046215230875, iteration: 35934
loss: 1.0216431617736816,grad_norm: 0.9999990461336933, iteration: 35935
loss: 0.9959427714347839,grad_norm: 0.9772141909602273, iteration: 35936
loss: 1.0069292783737183,grad_norm: 0.7800685133126403, iteration: 35937
loss: 1.0628663301467896,grad_norm: 0.9999998216306796, iteration: 35938
loss: 0.9814707636833191,grad_norm: 0.9999989233203084, iteration: 35939
loss: 1.0272375345230103,grad_norm: 0.9357275169985505, iteration: 35940
loss: 1.0848872661590576,grad_norm: 0.999999716411949, iteration: 35941
loss: 1.0392285585403442,grad_norm: 0.8814788128444909, iteration: 35942
loss: 1.057515263557434,grad_norm: 0.9999995204786101, iteration: 35943
loss: 1.0043652057647705,grad_norm: 0.8389476723112227, iteration: 35944
loss: 1.0026737451553345,grad_norm: 0.9999991558076344, iteration: 35945
loss: 1.0521891117095947,grad_norm: 0.9999991911332602, iteration: 35946
loss: 1.0302337408065796,grad_norm: 0.9999993445462382, iteration: 35947
loss: 0.9719046354293823,grad_norm: 0.9428023697065124, iteration: 35948
loss: 1.0283864736557007,grad_norm: 0.9999991865361819, iteration: 35949
loss: 0.9761882424354553,grad_norm: 0.999999186845761, iteration: 35950
loss: 1.0108058452606201,grad_norm: 0.9999995134816074, iteration: 35951
loss: 1.0226552486419678,grad_norm: 0.999999176164597, iteration: 35952
loss: 1.0336343050003052,grad_norm: 0.9999990956674204, iteration: 35953
loss: 0.9859569668769836,grad_norm: 0.9836331426594475, iteration: 35954
loss: 0.9872736930847168,grad_norm: 0.9999991569689592, iteration: 35955
loss: 1.0129648447036743,grad_norm: 0.949761198549953, iteration: 35956
loss: 1.1104809045791626,grad_norm: 0.9999993369289901, iteration: 35957
loss: 1.0382152795791626,grad_norm: 0.9999993065237445, iteration: 35958
loss: 0.9897704124450684,grad_norm: 0.7894234937287146, iteration: 35959
loss: 1.0278942584991455,grad_norm: 0.9999991968875901, iteration: 35960
loss: 1.0223716497421265,grad_norm: 0.9999993841718897, iteration: 35961
loss: 1.0971089601516724,grad_norm: 0.9999997091139985, iteration: 35962
loss: 1.3013502359390259,grad_norm: 0.9999997792375013, iteration: 35963
loss: 1.080918312072754,grad_norm: 0.9999992024937974, iteration: 35964
loss: 1.0041751861572266,grad_norm: 0.9999991111466662, iteration: 35965
loss: 1.0796138048171997,grad_norm: 0.9999993358183026, iteration: 35966
loss: 1.0772162675857544,grad_norm: 0.9999994763483654, iteration: 35967
loss: 1.1194130182266235,grad_norm: 0.999999814704487, iteration: 35968
loss: 1.0612620115280151,grad_norm: 0.9999990783933638, iteration: 35969
loss: 0.9838989973068237,grad_norm: 0.9999991327920794, iteration: 35970
loss: 0.9638134837150574,grad_norm: 0.9402880372648281, iteration: 35971
loss: 1.0476781129837036,grad_norm: 0.9999992434048537, iteration: 35972
loss: 1.0365809202194214,grad_norm: 0.9999996350215807, iteration: 35973
loss: 1.0281381607055664,grad_norm: 0.999999141617714, iteration: 35974
loss: 1.1428369283676147,grad_norm: 0.9999998898162832, iteration: 35975
loss: 1.0450176000595093,grad_norm: 0.9999993752550106, iteration: 35976
loss: 0.9791027903556824,grad_norm: 0.9999990890382942, iteration: 35977
loss: 1.0207715034484863,grad_norm: 0.9999991753255889, iteration: 35978
loss: 1.0844095945358276,grad_norm: 0.9999998504404155, iteration: 35979
loss: 0.9619734287261963,grad_norm: 0.9999991352936005, iteration: 35980
loss: 1.0399242639541626,grad_norm: 0.9999996416190172, iteration: 35981
loss: 1.0102709531784058,grad_norm: 0.9999996790790852, iteration: 35982
loss: 1.0230439901351929,grad_norm: 0.9999993399221968, iteration: 35983
loss: 0.9998076558113098,grad_norm: 0.858675846068105, iteration: 35984
loss: 1.0416330099105835,grad_norm: 0.9999996816189076, iteration: 35985
loss: 1.0091512203216553,grad_norm: 0.8986744948923221, iteration: 35986
loss: 1.0396406650543213,grad_norm: 0.9999991920325443, iteration: 35987
loss: 1.023407220840454,grad_norm: 0.9999993056854592, iteration: 35988
loss: 1.0673105716705322,grad_norm: 0.9999994010451754, iteration: 35989
loss: 1.0023823976516724,grad_norm: 0.7305521370756594, iteration: 35990
loss: 0.9893348813056946,grad_norm: 0.9999990193646099, iteration: 35991
loss: 1.027471899986267,grad_norm: 0.9536797849849776, iteration: 35992
loss: 1.0026954412460327,grad_norm: 0.8589134965108196, iteration: 35993
loss: 1.0211197137832642,grad_norm: 0.9391224396925362, iteration: 35994
loss: 1.0162681341171265,grad_norm: 0.9676189015104314, iteration: 35995
loss: 1.0283863544464111,grad_norm: 0.9999994527153778, iteration: 35996
loss: 0.9798926115036011,grad_norm: 0.9446557928271329, iteration: 35997
loss: 0.998145341873169,grad_norm: 0.9999992448556905, iteration: 35998
loss: 1.0359526872634888,grad_norm: 0.9999991156471596, iteration: 35999
loss: 1.067168116569519,grad_norm: 0.9999996653264515, iteration: 36000
loss: 0.9706918001174927,grad_norm: 0.9478787064849197, iteration: 36001
loss: 1.0286705493927002,grad_norm: 0.9999993156887744, iteration: 36002
loss: 1.0203689336776733,grad_norm: 0.9999993088423755, iteration: 36003
loss: 1.0286897420883179,grad_norm: 0.9964392312650323, iteration: 36004
loss: 0.9950395822525024,grad_norm: 0.9999990653714957, iteration: 36005
loss: 0.9792961478233337,grad_norm: 0.9176618600824642, iteration: 36006
loss: 0.9912043809890747,grad_norm: 0.8955271903509063, iteration: 36007
loss: 1.0062512159347534,grad_norm: 0.9986814453499032, iteration: 36008
loss: 1.0025179386138916,grad_norm: 0.9816746998292682, iteration: 36009
loss: 1.026095986366272,grad_norm: 0.88242409189804, iteration: 36010
loss: 0.9613388180732727,grad_norm: 0.8981031312486054, iteration: 36011
loss: 0.9873400926589966,grad_norm: 0.9999991304322334, iteration: 36012
loss: 1.000693678855896,grad_norm: 0.9574293252519341, iteration: 36013
loss: 0.9700728058815002,grad_norm: 0.9999991458149889, iteration: 36014
loss: 1.0089869499206543,grad_norm: 0.9999992881495412, iteration: 36015
loss: 1.0171480178833008,grad_norm: 0.999999481751199, iteration: 36016
loss: 0.9969342350959778,grad_norm: 0.9999992186085482, iteration: 36017
loss: 0.988760769367218,grad_norm: 0.9151301268674537, iteration: 36018
loss: 0.9663251042366028,grad_norm: 0.9043827705826148, iteration: 36019
loss: 1.0470889806747437,grad_norm: 0.9999993807908382, iteration: 36020
loss: 1.015369176864624,grad_norm: 0.9999992140300212, iteration: 36021
loss: 1.0029546022415161,grad_norm: 0.9999990561280704, iteration: 36022
loss: 0.9933748841285706,grad_norm: 0.9065006038176672, iteration: 36023
loss: 1.0516021251678467,grad_norm: 0.9999993760394279, iteration: 36024
loss: 1.0024651288986206,grad_norm: 0.9649515637287608, iteration: 36025
loss: 0.9996570348739624,grad_norm: 0.9999996723013556, iteration: 36026
loss: 0.9743068218231201,grad_norm: 0.8148708430424385, iteration: 36027
loss: 0.9772259593009949,grad_norm: 0.9999991403351253, iteration: 36028
loss: 0.9931363463401794,grad_norm: 0.9999993891737152, iteration: 36029
loss: 0.9698532223701477,grad_norm: 0.9999992326309612, iteration: 36030
loss: 1.0112175941467285,grad_norm: 0.9999991815171716, iteration: 36031
loss: 1.0021756887435913,grad_norm: 0.9654155259160098, iteration: 36032
loss: 1.0057016611099243,grad_norm: 0.958140545522007, iteration: 36033
loss: 1.0261213779449463,grad_norm: 0.9999991717330752, iteration: 36034
loss: 1.0881794691085815,grad_norm: 0.9999994220505598, iteration: 36035
loss: 1.036524772644043,grad_norm: 0.9999994963909917, iteration: 36036
loss: 0.9825053811073303,grad_norm: 0.9999994878039403, iteration: 36037
loss: 1.0137584209442139,grad_norm: 0.9669968013570863, iteration: 36038
loss: 1.0318289995193481,grad_norm: 0.9999993601754642, iteration: 36039
loss: 1.0004774332046509,grad_norm: 0.9999992171358065, iteration: 36040
loss: 1.017079472541809,grad_norm: 0.9999990653091962, iteration: 36041
loss: 0.995200514793396,grad_norm: 0.9999990961407558, iteration: 36042
loss: 0.9864704012870789,grad_norm: 0.9999994081029272, iteration: 36043
loss: 0.9908275008201599,grad_norm: 0.898987520616526, iteration: 36044
loss: 0.999783456325531,grad_norm: 0.9999992951807463, iteration: 36045
loss: 1.0173110961914062,grad_norm: 0.9999991080643826, iteration: 36046
loss: 1.0218634605407715,grad_norm: 0.9999990466206117, iteration: 36047
loss: 1.0542657375335693,grad_norm: 0.9999993934170254, iteration: 36048
loss: 1.0178221464157104,grad_norm: 0.999999173552644, iteration: 36049
loss: 0.9765923023223877,grad_norm: 0.7424786341670052, iteration: 36050
loss: 1.0032391548156738,grad_norm: 0.9999989181771001, iteration: 36051
loss: 1.0155099630355835,grad_norm: 0.9999990576266703, iteration: 36052
loss: 1.0132739543914795,grad_norm: 0.9999993948604066, iteration: 36053
loss: 1.0171780586242676,grad_norm: 0.9532052396206703, iteration: 36054
loss: 1.016187310218811,grad_norm: 0.9999994778077663, iteration: 36055
loss: 1.055733561515808,grad_norm: 0.9999997784433587, iteration: 36056
loss: 0.9774840474128723,grad_norm: 0.9999995925537389, iteration: 36057
loss: 1.0187761783599854,grad_norm: 0.9999996444284485, iteration: 36058
loss: 1.0080015659332275,grad_norm: 0.9999995898374423, iteration: 36059
loss: 0.9923071265220642,grad_norm: 0.9999990831534731, iteration: 36060
loss: 0.9926490187644958,grad_norm: 0.9610527485904828, iteration: 36061
loss: 0.9682841897010803,grad_norm: 0.9999995007141929, iteration: 36062
loss: 0.9775816202163696,grad_norm: 0.9999991787201722, iteration: 36063
loss: 0.9980370998382568,grad_norm: 0.9999996372066391, iteration: 36064
loss: 0.9566631317138672,grad_norm: 0.9481850784336721, iteration: 36065
loss: 0.9748718738555908,grad_norm: 0.8291656178513445, iteration: 36066
loss: 1.0074917078018188,grad_norm: 0.9999990674995798, iteration: 36067
loss: 0.9487894177436829,grad_norm: 0.9999994881121566, iteration: 36068
loss: 0.9881381988525391,grad_norm: 0.8881321363841354, iteration: 36069
loss: 1.0141023397445679,grad_norm: 0.9999992320295392, iteration: 36070
loss: 1.0309516191482544,grad_norm: 0.9999997924814383, iteration: 36071
loss: 0.966468095779419,grad_norm: 0.9999992587379521, iteration: 36072
loss: 1.0679347515106201,grad_norm: 0.9999990759335021, iteration: 36073
loss: 0.9921953678131104,grad_norm: 0.8642036264708686, iteration: 36074
loss: 1.0143539905548096,grad_norm: 0.999999691159696, iteration: 36075
loss: 1.0312607288360596,grad_norm: 0.9999992601971696, iteration: 36076
loss: 0.9807100892066956,grad_norm: 0.8533373063558751, iteration: 36077
loss: 1.0301220417022705,grad_norm: 0.9999996064228756, iteration: 36078
loss: 1.026236653327942,grad_norm: 0.999999250520986, iteration: 36079
loss: 1.1074018478393555,grad_norm: 0.9999999102264058, iteration: 36080
loss: 0.9910408854484558,grad_norm: 0.9503092599820585, iteration: 36081
loss: 1.061033010482788,grad_norm: 0.999999055908198, iteration: 36082
loss: 1.0350576639175415,grad_norm: 0.9999996076764095, iteration: 36083
loss: 0.999015212059021,grad_norm: 0.9945409566877376, iteration: 36084
loss: 1.0083266496658325,grad_norm: 0.9262058113944075, iteration: 36085
loss: 1.005241870880127,grad_norm: 0.9999994342065975, iteration: 36086
loss: 1.0470131635665894,grad_norm: 0.9999990533637421, iteration: 36087
loss: 0.9896520972251892,grad_norm: 0.8411981396845237, iteration: 36088
loss: 0.9930806756019592,grad_norm: 0.9259462010228184, iteration: 36089
loss: 0.9992561936378479,grad_norm: 0.9999990598939442, iteration: 36090
loss: 1.01266610622406,grad_norm: 0.9999991667813738, iteration: 36091
loss: 1.0481390953063965,grad_norm: 0.9999997119162831, iteration: 36092
loss: 1.0227395296096802,grad_norm: 0.9999993114541069, iteration: 36093
loss: 0.9768041968345642,grad_norm: 0.9999992033854218, iteration: 36094
loss: 1.0177342891693115,grad_norm: 0.7636095408924741, iteration: 36095
loss: 0.9772369861602783,grad_norm: 0.8986139075591363, iteration: 36096
loss: 1.0128068923950195,grad_norm: 0.9195352445964204, iteration: 36097
loss: 0.9918786883354187,grad_norm: 0.9999991617724181, iteration: 36098
loss: 1.0306031703948975,grad_norm: 0.999999130252003, iteration: 36099
loss: 1.0096168518066406,grad_norm: 0.9999991536915429, iteration: 36100
loss: 1.0261937379837036,grad_norm: 0.86049259231777, iteration: 36101
loss: 1.021574854850769,grad_norm: 0.9999991254133007, iteration: 36102
loss: 0.9990867376327515,grad_norm: 0.9999991600984931, iteration: 36103
loss: 1.0168507099151611,grad_norm: 0.9190928385658825, iteration: 36104
loss: 1.0107473134994507,grad_norm: 0.9999990045762196, iteration: 36105
loss: 1.0211067199707031,grad_norm: 0.9999991579397799, iteration: 36106
loss: 1.0287326574325562,grad_norm: 0.9999990819347919, iteration: 36107
loss: 1.022558331489563,grad_norm: 0.999999165811993, iteration: 36108
loss: 1.0507431030273438,grad_norm: 0.9999993467428944, iteration: 36109
loss: 0.9982922077178955,grad_norm: 0.999998991759203, iteration: 36110
loss: 1.0470166206359863,grad_norm: 0.9999990757454701, iteration: 36111
loss: 1.0023974180221558,grad_norm: 0.9561970368069334, iteration: 36112
loss: 1.0214025974273682,grad_norm: 0.8983816410140446, iteration: 36113
loss: 1.0221716165542603,grad_norm: 0.8658755257514136, iteration: 36114
loss: 1.0042438507080078,grad_norm: 0.9999997838473887, iteration: 36115
loss: 0.9678373336791992,grad_norm: 0.9999990539927016, iteration: 36116
loss: 0.9655346274375916,grad_norm: 0.9999989287967305, iteration: 36117
loss: 1.0298184156417847,grad_norm: 0.8735999090091556, iteration: 36118
loss: 0.9846453666687012,grad_norm: 0.9999991882325372, iteration: 36119
loss: 0.9597698450088501,grad_norm: 0.9999994084661015, iteration: 36120
loss: 1.0091406106948853,grad_norm: 0.99999904626026, iteration: 36121
loss: 1.0211122035980225,grad_norm: 0.9999990923521944, iteration: 36122
loss: 1.012933373451233,grad_norm: 0.9999995408652592, iteration: 36123
loss: 1.0025185346603394,grad_norm: 0.9083210474630796, iteration: 36124
loss: 0.9705117344856262,grad_norm: 0.8900580039098458, iteration: 36125
loss: 0.9840947389602661,grad_norm: 0.9999991441964732, iteration: 36126
loss: 1.0037750005722046,grad_norm: 0.8886554777136868, iteration: 36127
loss: 1.035470962524414,grad_norm: 0.9397375177257676, iteration: 36128
loss: 1.0269343852996826,grad_norm: 0.9432889610056203, iteration: 36129
loss: 0.9978145360946655,grad_norm: 0.9602240914316861, iteration: 36130
loss: 1.017117977142334,grad_norm: 0.9999992023469123, iteration: 36131
loss: 0.9960839748382568,grad_norm: 0.936775872515618, iteration: 36132
loss: 1.011887788772583,grad_norm: 0.9999990626353888, iteration: 36133
loss: 1.0102550983428955,grad_norm: 0.9999989647461909, iteration: 36134
loss: 1.0353728532791138,grad_norm: 0.9069089889102562, iteration: 36135
loss: 0.9908763766288757,grad_norm: 0.9999990909436367, iteration: 36136
loss: 1.042503833770752,grad_norm: 0.9277539908889159, iteration: 36137
loss: 1.0360783338546753,grad_norm: 0.9999993017276488, iteration: 36138
loss: 1.0125828981399536,grad_norm: 0.9999989880561067, iteration: 36139
loss: 1.0006111860275269,grad_norm: 0.9999991597689253, iteration: 36140
loss: 0.9936208128929138,grad_norm: 0.9999991090811498, iteration: 36141
loss: 0.9792823791503906,grad_norm: 0.9999990865095678, iteration: 36142
loss: 0.9775201678276062,grad_norm: 0.9343848430333318, iteration: 36143
loss: 0.9735170006752014,grad_norm: 0.7579850540773837, iteration: 36144
loss: 0.9947550892829895,grad_norm: 0.8225743071866835, iteration: 36145
loss: 0.9928539991378784,grad_norm: 0.9257133317273023, iteration: 36146
loss: 1.0241607427597046,grad_norm: 0.9510854750747276, iteration: 36147
loss: 1.0132482051849365,grad_norm: 0.8977749075909884, iteration: 36148
loss: 0.9947667717933655,grad_norm: 0.9883484921652456, iteration: 36149
loss: 1.0246577262878418,grad_norm: 0.9748978657855499, iteration: 36150
loss: 1.0306551456451416,grad_norm: 1.0000000482471056, iteration: 36151
loss: 1.0220696926116943,grad_norm: 0.9999990509886133, iteration: 36152
loss: 0.9779552817344666,grad_norm: 0.9999991328243049, iteration: 36153
loss: 1.037709355354309,grad_norm: 0.9999993083325687, iteration: 36154
loss: 0.9916963577270508,grad_norm: 0.9999992392977443, iteration: 36155
loss: 1.003290057182312,grad_norm: 0.9999992442638972, iteration: 36156
loss: 1.0063562393188477,grad_norm: 0.9999991960246596, iteration: 36157
loss: 1.0168213844299316,grad_norm: 0.880390591835911, iteration: 36158
loss: 1.0430048704147339,grad_norm: 0.8826817168229072, iteration: 36159
loss: 1.0394548177719116,grad_norm: 0.7931905986290391, iteration: 36160
loss: 1.0667933225631714,grad_norm: 0.9999995595900439, iteration: 36161
loss: 0.9988856315612793,grad_norm: 0.9080988988364685, iteration: 36162
loss: 0.9976451396942139,grad_norm: 0.9999991859855167, iteration: 36163
loss: 1.0157490968704224,grad_norm: 0.9999991211020074, iteration: 36164
loss: 1.018033742904663,grad_norm: 0.9999993835778037, iteration: 36165
loss: 1.0302866697311401,grad_norm: 0.9404545118216399, iteration: 36166
loss: 1.0410075187683105,grad_norm: 0.9999996491082677, iteration: 36167
loss: 1.011303186416626,grad_norm: 0.8869727870324335, iteration: 36168
loss: 0.9955474734306335,grad_norm: 0.7990038788375754, iteration: 36169
loss: 1.0425313711166382,grad_norm: 0.9999996392953809, iteration: 36170
loss: 1.0300248861312866,grad_norm: 0.9035126660263316, iteration: 36171
loss: 0.9972098469734192,grad_norm: 0.8634183925080833, iteration: 36172
loss: 0.9938938617706299,grad_norm: 0.9999989647083672, iteration: 36173
loss: 0.9971283078193665,grad_norm: 0.6424707493491822, iteration: 36174
loss: 0.9870518445968628,grad_norm: 0.9112425217820777, iteration: 36175
loss: 0.9747796058654785,grad_norm: 0.9999989620957007, iteration: 36176
loss: 1.0428482294082642,grad_norm: 0.9188790123018458, iteration: 36177
loss: 1.016316533088684,grad_norm: 0.9999990843807811, iteration: 36178
loss: 1.0539671182632446,grad_norm: 0.9999993902043431, iteration: 36179
loss: 1.00933039188385,grad_norm: 0.9373640612461785, iteration: 36180
loss: 1.0113500356674194,grad_norm: 0.9999992079663453, iteration: 36181
loss: 1.1214276552200317,grad_norm: 0.999999675457566, iteration: 36182
loss: 1.0393638610839844,grad_norm: 0.9999996443768052, iteration: 36183
loss: 1.0156667232513428,grad_norm: 0.9999991781098716, iteration: 36184
loss: 1.0102542638778687,grad_norm: 0.9999991140901633, iteration: 36185
loss: 1.0074619054794312,grad_norm: 0.9999990694233598, iteration: 36186
loss: 1.0179588794708252,grad_norm: 0.9757270647480064, iteration: 36187
loss: 1.0135942697525024,grad_norm: 0.8175400395387299, iteration: 36188
loss: 0.9897397756576538,grad_norm: 0.9999997262659334, iteration: 36189
loss: 1.0398850440979004,grad_norm: 0.9999992923524867, iteration: 36190
loss: 1.0235618352890015,grad_norm: 0.9999990996543143, iteration: 36191
loss: 0.9953386187553406,grad_norm: 0.8686594776969327, iteration: 36192
loss: 1.0262638330459595,grad_norm: 0.9999992766269526, iteration: 36193
loss: 1.0119719505310059,grad_norm: 0.9577031026541122, iteration: 36194
loss: 1.0857118368148804,grad_norm: 0.9999993085829506, iteration: 36195
loss: 0.9867324233055115,grad_norm: 0.9864283981080864, iteration: 36196
loss: 1.040082335472107,grad_norm: 0.979134360815333, iteration: 36197
loss: 1.0524486303329468,grad_norm: 0.9999994530065731, iteration: 36198
loss: 1.0322237014770508,grad_norm: 0.9999994136338006, iteration: 36199
loss: 0.9904798865318298,grad_norm: 0.8721136107961204, iteration: 36200
loss: 1.0937004089355469,grad_norm: 0.9999995772674627, iteration: 36201
loss: 1.0642212629318237,grad_norm: 0.9999992290653491, iteration: 36202
loss: 1.0626744031906128,grad_norm: 0.9999994275827413, iteration: 36203
loss: 0.9483224153518677,grad_norm: 0.9999991879356901, iteration: 36204
loss: 1.0334402322769165,grad_norm: 0.9999992660909018, iteration: 36205
loss: 1.0026065111160278,grad_norm: 0.9897067591325007, iteration: 36206
loss: 1.0760711431503296,grad_norm: 0.9999995620614714, iteration: 36207
loss: 1.0107896327972412,grad_norm: 0.999999742390787, iteration: 36208
loss: 0.9390392303466797,grad_norm: 0.9999991634331176, iteration: 36209
loss: 1.0290100574493408,grad_norm: 0.9999990805787634, iteration: 36210
loss: 1.0187420845031738,grad_norm: 0.9313300379675625, iteration: 36211
loss: 1.045367956161499,grad_norm: 0.9999992584882368, iteration: 36212
loss: 0.9841269850730896,grad_norm: 0.9999990780557978, iteration: 36213
loss: 1.0127204656600952,grad_norm: 0.9086174011014883, iteration: 36214
loss: 0.9785679578781128,grad_norm: 0.999999257906962, iteration: 36215
loss: 0.996626079082489,grad_norm: 0.9621903483950999, iteration: 36216
loss: 0.9912196397781372,grad_norm: 0.9999992353264983, iteration: 36217
loss: 1.0524160861968994,grad_norm: 0.999999348918879, iteration: 36218
loss: 0.9809436202049255,grad_norm: 0.9402132640474564, iteration: 36219
loss: 1.0094047784805298,grad_norm: 0.9999990124461589, iteration: 36220
loss: 1.0344423055648804,grad_norm: 0.999999162964921, iteration: 36221
loss: 1.1039543151855469,grad_norm: 0.9999994617783169, iteration: 36222
loss: 0.9606606364250183,grad_norm: 0.880464392374788, iteration: 36223
loss: 1.067897915840149,grad_norm: 0.9999994727374327, iteration: 36224
loss: 1.0268532037734985,grad_norm: 0.9999991161287255, iteration: 36225
loss: 1.0004334449768066,grad_norm: 0.9090736192319895, iteration: 36226
loss: 0.9984872937202454,grad_norm: 0.8951716080050055, iteration: 36227
loss: 1.004397988319397,grad_norm: 0.9623963809683838, iteration: 36228
loss: 1.0140796899795532,grad_norm: 0.9371642349461569, iteration: 36229
loss: 0.9981409311294556,grad_norm: 0.9999993816767907, iteration: 36230
loss: 1.0247567892074585,grad_norm: 0.9999995986313192, iteration: 36231
loss: 1.0417476892471313,grad_norm: 0.9999998164852215, iteration: 36232
loss: 1.0232127904891968,grad_norm: 0.9999991226227374, iteration: 36233
loss: 1.0432536602020264,grad_norm: 0.9999992901765552, iteration: 36234
loss: 1.0182219743728638,grad_norm: 0.8899597530581032, iteration: 36235
loss: 1.0014923810958862,grad_norm: 0.8995233903629404, iteration: 36236
loss: 1.0689529180526733,grad_norm: 0.9999992318000024, iteration: 36237
loss: 1.0298622846603394,grad_norm: 0.999999246686049, iteration: 36238
loss: 1.0152777433395386,grad_norm: 0.9999991951949585, iteration: 36239
loss: 1.0043224096298218,grad_norm: 0.9999991712407894, iteration: 36240
loss: 0.969957172870636,grad_norm: 0.9999990459908186, iteration: 36241
loss: 1.0347182750701904,grad_norm: 0.9999996835581094, iteration: 36242
loss: 1.0509023666381836,grad_norm: 0.9999991551350309, iteration: 36243
loss: 1.016062617301941,grad_norm: 0.9999991605768648, iteration: 36244
loss: 1.012797474861145,grad_norm: 0.9999990814101528, iteration: 36245
loss: 1.0143976211547852,grad_norm: 0.9999991750303884, iteration: 36246
loss: 1.0190765857696533,grad_norm: 0.9605191846990158, iteration: 36247
loss: 1.0206135511398315,grad_norm: 0.9999992196882944, iteration: 36248
loss: 1.016471266746521,grad_norm: 0.9097343849091165, iteration: 36249
loss: 1.0554598569869995,grad_norm: 0.9585946671686174, iteration: 36250
loss: 0.9957370758056641,grad_norm: 0.9105520819448462, iteration: 36251
loss: 1.0334854125976562,grad_norm: 0.9999994572381722, iteration: 36252
loss: 0.9672319293022156,grad_norm: 0.9999990812426055, iteration: 36253
loss: 1.0163798332214355,grad_norm: 0.999999016509288, iteration: 36254
loss: 1.359603762626648,grad_norm: 0.9999997274309698, iteration: 36255
loss: 0.9910420179367065,grad_norm: 0.9680863610396341, iteration: 36256
loss: 1.012732744216919,grad_norm: 0.9999993369727628, iteration: 36257
loss: 1.0482860803604126,grad_norm: 0.9999991567413642, iteration: 36258
loss: 1.0238397121429443,grad_norm: 0.9999992138030045, iteration: 36259
loss: 1.0393834114074707,grad_norm: 0.9999990806823004, iteration: 36260
loss: 0.9925668239593506,grad_norm: 0.9982957990595887, iteration: 36261
loss: 0.9999951720237732,grad_norm: 0.8368991317315608, iteration: 36262
loss: 1.0002208948135376,grad_norm: 0.9319989271685295, iteration: 36263
loss: 1.0193161964416504,grad_norm: 0.9999994047707177, iteration: 36264
loss: 1.0375394821166992,grad_norm: 0.9999989955095674, iteration: 36265
loss: 1.0031020641326904,grad_norm: 0.9858012131880262, iteration: 36266
loss: 1.0747785568237305,grad_norm: 0.999999498858366, iteration: 36267
loss: 1.0259830951690674,grad_norm: 0.8962873706424166, iteration: 36268
loss: 1.0112019777297974,grad_norm: 0.9999992958441785, iteration: 36269
loss: 1.0392208099365234,grad_norm: 0.9699110488173038, iteration: 36270
loss: 0.9955730438232422,grad_norm: 0.9629433417398325, iteration: 36271
loss: 1.0419970750808716,grad_norm: 0.9999996076859216, iteration: 36272
loss: 1.0195919275283813,grad_norm: 0.99999904369233, iteration: 36273
loss: 1.001686692237854,grad_norm: 0.999999101257142, iteration: 36274
loss: 1.034558892250061,grad_norm: 0.7730485914938376, iteration: 36275
loss: 1.025160551071167,grad_norm: 0.9999990665675761, iteration: 36276
loss: 1.026259183883667,grad_norm: 0.999999849853717, iteration: 36277
loss: 1.0209355354309082,grad_norm: 0.9999991674687063, iteration: 36278
loss: 1.0018231868743896,grad_norm: 0.7990434947385378, iteration: 36279
loss: 0.9766203165054321,grad_norm: 0.9999991466764648, iteration: 36280
loss: 0.9778593182563782,grad_norm: 0.9999991245258463, iteration: 36281
loss: 1.014799952507019,grad_norm: 0.9999990784438759, iteration: 36282
loss: 0.9683857560157776,grad_norm: 0.9999990186887201, iteration: 36283
loss: 1.0048719644546509,grad_norm: 0.7803921019456662, iteration: 36284
loss: 1.0378694534301758,grad_norm: 0.8666214626141644, iteration: 36285
loss: 0.9932252764701843,grad_norm: 0.9999991769609717, iteration: 36286
loss: 1.072600245475769,grad_norm: 0.9999999411133541, iteration: 36287
loss: 1.0155062675476074,grad_norm: 0.9999990495807101, iteration: 36288
loss: 1.0276668071746826,grad_norm: 0.9999989746241309, iteration: 36289
loss: 1.0037965774536133,grad_norm: 0.8842536723350556, iteration: 36290
loss: 1.0387232303619385,grad_norm: 0.9999992998337693, iteration: 36291
loss: 1.0098040103912354,grad_norm: 0.8830206244101856, iteration: 36292
loss: 0.9931333661079407,grad_norm: 0.9999991404179361, iteration: 36293
loss: 0.9985441565513611,grad_norm: 0.8462790882515773, iteration: 36294
loss: 1.0222376585006714,grad_norm: 0.8322158392487331, iteration: 36295
loss: 1.0379656553268433,grad_norm: 0.9999990411013925, iteration: 36296
loss: 0.997758686542511,grad_norm: 0.999999055552154, iteration: 36297
loss: 0.9952186942100525,grad_norm: 0.9252821823521757, iteration: 36298
loss: 0.9894570708274841,grad_norm: 0.9936013142686067, iteration: 36299
loss: 1.0710612535476685,grad_norm: 0.9999991688215765, iteration: 36300
loss: 1.0130504369735718,grad_norm: 0.9999992006381314, iteration: 36301
loss: 1.0375621318817139,grad_norm: 0.9999990939864599, iteration: 36302
loss: 1.0285152196884155,grad_norm: 0.9972901184003068, iteration: 36303
loss: 1.0263631343841553,grad_norm: 0.8151384832647828, iteration: 36304
loss: 1.0054107904434204,grad_norm: 0.9999994176559375, iteration: 36305
loss: 0.9959753155708313,grad_norm: 0.9999990654989876, iteration: 36306
loss: 0.9476071000099182,grad_norm: 0.9544329880405694, iteration: 36307
loss: 1.008017897605896,grad_norm: 0.8473639459030665, iteration: 36308
loss: 1.008648157119751,grad_norm: 0.9999990882192857, iteration: 36309
loss: 1.032028317451477,grad_norm: 0.9999992376115923, iteration: 36310
loss: 1.0091073513031006,grad_norm: 0.8650660423744121, iteration: 36311
loss: 1.0121281147003174,grad_norm: 0.9878261428855485, iteration: 36312
loss: 1.0374385118484497,grad_norm: 0.8316163028105531, iteration: 36313
loss: 1.056249737739563,grad_norm: 0.9999998548720384, iteration: 36314
loss: 1.0249834060668945,grad_norm: 0.9483593798437786, iteration: 36315
loss: 0.9504371285438538,grad_norm: 0.895391411154691, iteration: 36316
loss: 1.0333781242370605,grad_norm: 0.9999995809765118, iteration: 36317
loss: 1.0516126155853271,grad_norm: 0.9999991338122011, iteration: 36318
loss: 1.2079353332519531,grad_norm: 0.9999998044494713, iteration: 36319
loss: 0.990856945514679,grad_norm: 0.9324435782053331, iteration: 36320
loss: 1.0137442350387573,grad_norm: 0.9999991460136696, iteration: 36321
loss: 1.0185391902923584,grad_norm: 0.9999994457411546, iteration: 36322
loss: 1.0393251180648804,grad_norm: 0.999999327212262, iteration: 36323
loss: 1.0253872871398926,grad_norm: 0.9999990427435445, iteration: 36324
loss: 0.9895743727684021,grad_norm: 0.9999990158305893, iteration: 36325
loss: 0.9844954609870911,grad_norm: 0.7982971314315808, iteration: 36326
loss: 1.0379656553268433,grad_norm: 0.9999996485963092, iteration: 36327
loss: 1.0575071573257446,grad_norm: 0.8726101936491483, iteration: 36328
loss: 0.9970216155052185,grad_norm: 0.999999216643641, iteration: 36329
loss: 0.9896911382675171,grad_norm: 0.9279701490721821, iteration: 36330
loss: 0.9956262111663818,grad_norm: 0.9999992046198183, iteration: 36331
loss: 1.0169047117233276,grad_norm: 0.9999990398802308, iteration: 36332
loss: 1.003955602645874,grad_norm: 0.9752699829147047, iteration: 36333
loss: 1.007691502571106,grad_norm: 0.9819740547497315, iteration: 36334
loss: 0.9896261692047119,grad_norm: 0.9999991539697523, iteration: 36335
loss: 1.0048569440841675,grad_norm: 0.9811941460486232, iteration: 36336
loss: 1.0027754306793213,grad_norm: 0.9804977033681669, iteration: 36337
loss: 1.013834834098816,grad_norm: 0.9999991805560015, iteration: 36338
loss: 1.0023276805877686,grad_norm: 0.9999990701204386, iteration: 36339
loss: 0.9854987263679504,grad_norm: 0.8578556905475755, iteration: 36340
loss: 1.0263046026229858,grad_norm: 0.9608558286325511, iteration: 36341
loss: 1.0039957761764526,grad_norm: 0.9999997007923056, iteration: 36342
loss: 1.0879939794540405,grad_norm: 0.9747066962674888, iteration: 36343
loss: 0.9999992251396179,grad_norm: 0.9999991282458487, iteration: 36344
loss: 0.9812129735946655,grad_norm: 0.9999992070445144, iteration: 36345
loss: 0.9786385893821716,grad_norm: 0.999999264164625, iteration: 36346
loss: 0.9854171276092529,grad_norm: 0.9606697779651856, iteration: 36347
loss: 1.020689606666565,grad_norm: 0.9999991552487495, iteration: 36348
loss: 1.0399938821792603,grad_norm: 0.9999990301356699, iteration: 36349
loss: 1.0167536735534668,grad_norm: 0.9999990229165464, iteration: 36350
loss: 1.0186904668807983,grad_norm: 0.9999998698969493, iteration: 36351
loss: 1.0764834880828857,grad_norm: 0.9999993506267947, iteration: 36352
loss: 0.9923275709152222,grad_norm: 0.8691150731090282, iteration: 36353
loss: 1.0182151794433594,grad_norm: 0.9999997121849404, iteration: 36354
loss: 1.017301321029663,grad_norm: 0.9999991194841023, iteration: 36355
loss: 0.9873012900352478,grad_norm: 0.9999990924225955, iteration: 36356
loss: 1.0198596715927124,grad_norm: 0.8850038940711764, iteration: 36357
loss: 1.0051318407058716,grad_norm: 0.999999091022339, iteration: 36358
loss: 1.0213834047317505,grad_norm: 0.9999991967303361, iteration: 36359
loss: 0.9684659838676453,grad_norm: 0.9425425053329617, iteration: 36360
loss: 0.9744402766227722,grad_norm: 0.9061259221869563, iteration: 36361
loss: 1.092200517654419,grad_norm: 0.999999489024891, iteration: 36362
loss: 1.0267459154129028,grad_norm: 0.9999991823245671, iteration: 36363
loss: 1.0409541130065918,grad_norm: 0.8719805102567338, iteration: 36364
loss: 0.9795603156089783,grad_norm: 0.999999165396299, iteration: 36365
loss: 1.0104964971542358,grad_norm: 0.9761735586700132, iteration: 36366
loss: 0.9995931386947632,grad_norm: 0.851828658850687, iteration: 36367
loss: 1.015409231185913,grad_norm: 0.9739480000991155, iteration: 36368
loss: 1.036801815032959,grad_norm: 0.9999993029086611, iteration: 36369
loss: 1.0078011751174927,grad_norm: 0.8841822021152717, iteration: 36370
loss: 1.0347027778625488,grad_norm: 0.9356548768403767, iteration: 36371
loss: 1.0315346717834473,grad_norm: 0.9626555594762137, iteration: 36372
loss: 1.014451265335083,grad_norm: 0.9999993187680518, iteration: 36373
loss: 1.0089439153671265,grad_norm: 0.9999989923295998, iteration: 36374
loss: 0.9866386651992798,grad_norm: 0.9999998370498071, iteration: 36375
loss: 1.0091816186904907,grad_norm: 0.9999992360323194, iteration: 36376
loss: 1.0681675672531128,grad_norm: 0.9999992092512465, iteration: 36377
loss: 1.0136491060256958,grad_norm: 0.999999295101951, iteration: 36378
loss: 0.9781663417816162,grad_norm: 0.8922361238154835, iteration: 36379
loss: 1.040809988975525,grad_norm: 0.9877207892672166, iteration: 36380
loss: 1.0070704221725464,grad_norm: 0.99999910630392, iteration: 36381
loss: 0.9937407374382019,grad_norm: 0.911558152413945, iteration: 36382
loss: 1.0308756828308105,grad_norm: 0.9999992425223082, iteration: 36383
loss: 0.9638383984565735,grad_norm: 0.885722523579803, iteration: 36384
loss: 1.020532488822937,grad_norm: 0.9999991891705741, iteration: 36385
loss: 0.9830461740493774,grad_norm: 0.9355226399507989, iteration: 36386
loss: 1.043881893157959,grad_norm: 0.999999851889164, iteration: 36387
loss: 0.9975727200508118,grad_norm: 0.8859717581090326, iteration: 36388
loss: 1.0149641036987305,grad_norm: 0.8760213605080045, iteration: 36389
loss: 1.0168179273605347,grad_norm: 0.9999991191548925, iteration: 36390
loss: 1.0686874389648438,grad_norm: 0.9999993052510915, iteration: 36391
loss: 1.0210708379745483,grad_norm: 0.9278865451222531, iteration: 36392
loss: 1.0056122541427612,grad_norm: 0.8645068874756148, iteration: 36393
loss: 1.0100995302200317,grad_norm: 0.9999991049962543, iteration: 36394
loss: 1.0082647800445557,grad_norm: 0.9999991193417389, iteration: 36395
loss: 1.0220297574996948,grad_norm: 0.9999990888552275, iteration: 36396
loss: 1.0431116819381714,grad_norm: 0.9422647535476384, iteration: 36397
loss: 0.9992730021476746,grad_norm: 0.8384358267735671, iteration: 36398
loss: 1.0639410018920898,grad_norm: 0.9999997419974102, iteration: 36399
loss: 1.0062904357910156,grad_norm: 0.9443870550017053, iteration: 36400
loss: 0.9965077638626099,grad_norm: 0.8262809981195246, iteration: 36401
loss: 0.9580069780349731,grad_norm: 0.9860342463917624, iteration: 36402
loss: 1.0359818935394287,grad_norm: 0.9999992570916832, iteration: 36403
loss: 1.0051603317260742,grad_norm: 0.999999221220973, iteration: 36404
loss: 0.9866634607315063,grad_norm: 0.9999992211552724, iteration: 36405
loss: 0.999807596206665,grad_norm: 0.999999010463561, iteration: 36406
loss: 0.992637038230896,grad_norm: 0.9999990085001844, iteration: 36407
loss: 0.9841756224632263,grad_norm: 0.9999991831117642, iteration: 36408
loss: 1.0545639991760254,grad_norm: 0.9999992502811111, iteration: 36409
loss: 1.0301910638809204,grad_norm: 0.9098039367057709, iteration: 36410
loss: 1.0464515686035156,grad_norm: 0.9640015076666406, iteration: 36411
loss: 1.0133717060089111,grad_norm: 0.9999999859973692, iteration: 36412
loss: 1.0029690265655518,grad_norm: 0.8989786695657768, iteration: 36413
loss: 1.0264595746994019,grad_norm: 0.9999995666335953, iteration: 36414
loss: 0.9981663227081299,grad_norm: 0.9006687612645993, iteration: 36415
loss: 1.0336003303527832,grad_norm: 0.9999990067201388, iteration: 36416
loss: 1.001578450202942,grad_norm: 0.9999993512652324, iteration: 36417
loss: 1.0119667053222656,grad_norm: 0.8950936629313477, iteration: 36418
loss: 1.0289195775985718,grad_norm: 0.8594729951386204, iteration: 36419
loss: 1.0099514722824097,grad_norm: 0.8573580067622505, iteration: 36420
loss: 1.0485717058181763,grad_norm: 0.9537470663736539, iteration: 36421
loss: 1.0183058977127075,grad_norm: 0.9530037245603991, iteration: 36422
loss: 1.0302525758743286,grad_norm: 0.8278815875407413, iteration: 36423
loss: 0.9830671548843384,grad_norm: 0.9144905641381046, iteration: 36424
loss: 1.0038186311721802,grad_norm: 0.9640556802723862, iteration: 36425
loss: 1.0013542175292969,grad_norm: 0.9044994526539587, iteration: 36426
loss: 1.0518512725830078,grad_norm: 0.9999991401693323, iteration: 36427
loss: 1.0198508501052856,grad_norm: 0.9999994375052599, iteration: 36428
loss: 1.004495620727539,grad_norm: 0.9999990982468494, iteration: 36429
loss: 0.9892446398735046,grad_norm: 0.9946996895729744, iteration: 36430
loss: 1.0171762704849243,grad_norm: 0.852975259441976, iteration: 36431
loss: 1.0735746622085571,grad_norm: 0.9999989796456276, iteration: 36432
loss: 0.9844065308570862,grad_norm: 0.9512509996855931, iteration: 36433
loss: 0.9941417574882507,grad_norm: 0.9999991930482244, iteration: 36434
loss: 0.9999224543571472,grad_norm: 0.9336891994153069, iteration: 36435
loss: 1.0482031106948853,grad_norm: 0.7872962385037091, iteration: 36436
loss: 1.0031217336654663,grad_norm: 0.9999990704968795, iteration: 36437
loss: 0.9954768419265747,grad_norm: 0.9999990629561846, iteration: 36438
loss: 1.0200339555740356,grad_norm: 0.7904953100374613, iteration: 36439
loss: 1.0131192207336426,grad_norm: 0.9845750774245647, iteration: 36440
loss: 1.03347647190094,grad_norm: 0.8887064809114954, iteration: 36441
loss: 1.006146788597107,grad_norm: 0.9517094005222583, iteration: 36442
loss: 1.002755880355835,grad_norm: 0.8632005241769796, iteration: 36443
loss: 1.0455347299575806,grad_norm: 0.9432252073278844, iteration: 36444
loss: 1.0690360069274902,grad_norm: 0.9999996412180548, iteration: 36445
loss: 1.0582724809646606,grad_norm: 0.9999998191995373, iteration: 36446
loss: 1.0190478563308716,grad_norm: 0.7743830282593821, iteration: 36447
loss: 1.0100328922271729,grad_norm: 0.9647434774238838, iteration: 36448
loss: 1.0412461757659912,grad_norm: 0.8956079500286656, iteration: 36449
loss: 1.0271892547607422,grad_norm: 0.8702220229467376, iteration: 36450
loss: 0.9839527606964111,grad_norm: 0.9999991746585014, iteration: 36451
loss: 0.995693564414978,grad_norm: 0.9999991383134162, iteration: 36452
loss: 0.9816280603408813,grad_norm: 0.9999992811544516, iteration: 36453
loss: 0.985651969909668,grad_norm: 0.9202454490572558, iteration: 36454
loss: 1.0209858417510986,grad_norm: 0.9999991979602307, iteration: 36455
loss: 1.0419951677322388,grad_norm: 0.9999990004701144, iteration: 36456
loss: 1.016477346420288,grad_norm: 0.9999996943298243, iteration: 36457
loss: 1.0243263244628906,grad_norm: 0.9999999066669447, iteration: 36458
loss: 1.0605887174606323,grad_norm: 0.9999991035122375, iteration: 36459
loss: 0.9737147688865662,grad_norm: 0.9999993582345577, iteration: 36460
loss: 1.0002604722976685,grad_norm: 0.9194564887576836, iteration: 36461
loss: 1.0324467420578003,grad_norm: 0.999999817592499, iteration: 36462
loss: 0.9953567981719971,grad_norm: 0.9999992787603016, iteration: 36463
loss: 1.000329613685608,grad_norm: 0.9999991069670338, iteration: 36464
loss: 1.0142714977264404,grad_norm: 0.8607084660299561, iteration: 36465
loss: 1.0166215896606445,grad_norm: 0.8303555566997147, iteration: 36466
loss: 1.0077250003814697,grad_norm: 0.796561249250478, iteration: 36467
loss: 1.0192387104034424,grad_norm: 0.9999991646447759, iteration: 36468
loss: 1.0027259588241577,grad_norm: 0.8354259275046803, iteration: 36469
loss: 1.0626707077026367,grad_norm: 0.9999999554404009, iteration: 36470
loss: 1.0143247842788696,grad_norm: 0.9999991839061699, iteration: 36471
loss: 1.0063105821609497,grad_norm: 0.8456518912554527, iteration: 36472
loss: 1.0032471418380737,grad_norm: 0.9134454188621058, iteration: 36473
loss: 1.0081747770309448,grad_norm: 0.9999991451168788, iteration: 36474
loss: 1.0032175779342651,grad_norm: 0.9999992670906674, iteration: 36475
loss: 1.0038033723831177,grad_norm: 0.9999992024766379, iteration: 36476
loss: 1.0983763933181763,grad_norm: 0.9999999095756094, iteration: 36477
loss: 0.9680713415145874,grad_norm: 0.9622039611074492, iteration: 36478
loss: 1.0180259943008423,grad_norm: 0.9999990979638331, iteration: 36479
loss: 1.009749412536621,grad_norm: 0.9479766490980005, iteration: 36480
loss: 1.0294495820999146,grad_norm: 0.9999991301463911, iteration: 36481
loss: 1.0100023746490479,grad_norm: 0.999999330525199, iteration: 36482
loss: 1.0069472789764404,grad_norm: 0.9439479109630489, iteration: 36483
loss: 0.9984865188598633,grad_norm: 0.9999990052741728, iteration: 36484
loss: 1.0130119323730469,grad_norm: 0.9999993886776218, iteration: 36485
loss: 1.0570768117904663,grad_norm: 0.9999998021028362, iteration: 36486
loss: 0.9719619154930115,grad_norm: 0.9612520360962806, iteration: 36487
loss: 1.0221725702285767,grad_norm: 0.9999990217087739, iteration: 36488
loss: 0.990915834903717,grad_norm: 0.8863035613458287, iteration: 36489
loss: 1.0253357887268066,grad_norm: 0.9999994348046944, iteration: 36490
loss: 1.0167274475097656,grad_norm: 0.9610062158907493, iteration: 36491
loss: 1.1430916786193848,grad_norm: 0.9999995027119353, iteration: 36492
loss: 1.0119788646697998,grad_norm: 0.9360271163330178, iteration: 36493
loss: 1.0414782762527466,grad_norm: 0.9999992664060707, iteration: 36494
loss: 1.0063906908035278,grad_norm: 0.9999992881237152, iteration: 36495
loss: 1.0469175577163696,grad_norm: 0.9999991199303155, iteration: 36496
loss: 0.9642037153244019,grad_norm: 0.999999318769546, iteration: 36497
loss: 1.0043370723724365,grad_norm: 0.9820451689951768, iteration: 36498
loss: 1.0787980556488037,grad_norm: 0.9999995605406434, iteration: 36499
loss: 1.0159904956817627,grad_norm: 0.9999990527342212, iteration: 36500
loss: 1.0492916107177734,grad_norm: 0.9789976980042256, iteration: 36501
loss: 1.0009242296218872,grad_norm: 0.8706795813126957, iteration: 36502
loss: 0.9779313802719116,grad_norm: 0.9999990986027888, iteration: 36503
loss: 1.0004572868347168,grad_norm: 0.9999989863535917, iteration: 36504
loss: 0.9964920282363892,grad_norm: 0.9999992501766266, iteration: 36505
loss: 0.9920790791511536,grad_norm: 0.9784598950262364, iteration: 36506
loss: 0.9952314496040344,grad_norm: 0.9999992279247916, iteration: 36507
loss: 1.0031490325927734,grad_norm: 0.913818940128045, iteration: 36508
loss: 1.005454182624817,grad_norm: 0.9918766506917652, iteration: 36509
loss: 1.0159213542938232,grad_norm: 0.9999997527664147, iteration: 36510
loss: 1.0045119524002075,grad_norm: 0.9999994419413122, iteration: 36511
loss: 1.0211697816848755,grad_norm: 0.99999920746472, iteration: 36512
loss: 0.9927732348442078,grad_norm: 0.9578047573643477, iteration: 36513
loss: 1.0687271356582642,grad_norm: 0.9999997280404567, iteration: 36514
loss: 1.0664070844650269,grad_norm: 0.999999540858547, iteration: 36515
loss: 1.028656244277954,grad_norm: 0.9999990509077673, iteration: 36516
loss: 0.9764386415481567,grad_norm: 0.9871625082068051, iteration: 36517
loss: 1.0087820291519165,grad_norm: 0.9335239554449928, iteration: 36518
loss: 1.0271962881088257,grad_norm: 0.9999991609979465, iteration: 36519
loss: 0.9995979070663452,grad_norm: 0.9287629442007994, iteration: 36520
loss: 1.0644553899765015,grad_norm: 0.9999991846771888, iteration: 36521
loss: 1.0104979276657104,grad_norm: 0.9999996778756102, iteration: 36522
loss: 1.0340427160263062,grad_norm: 0.8141893913389957, iteration: 36523
loss: 0.9879583120346069,grad_norm: 0.9999992472851164, iteration: 36524
loss: 1.0343786478042603,grad_norm: 0.9999991035810801, iteration: 36525
loss: 0.9841289520263672,grad_norm: 0.9943783536636929, iteration: 36526
loss: 1.023460865020752,grad_norm: 0.9999995072637612, iteration: 36527
loss: 1.038018822669983,grad_norm: 0.9999997793910371, iteration: 36528
loss: 1.0232502222061157,grad_norm: 0.9999990547226206, iteration: 36529
loss: 0.9682456254959106,grad_norm: 0.9999998625930725, iteration: 36530
loss: 1.0305135250091553,grad_norm: 0.9421196262356112, iteration: 36531
loss: 1.1099997758865356,grad_norm: 0.9999991565566992, iteration: 36532
loss: 1.0340105295181274,grad_norm: 0.9999990978538865, iteration: 36533
loss: 1.014289140701294,grad_norm: 0.999999660063286, iteration: 36534
loss: 1.008354902267456,grad_norm: 0.9999996123773472, iteration: 36535
loss: 0.9962340593338013,grad_norm: 0.8617111399899827, iteration: 36536
loss: 1.025593638420105,grad_norm: 0.9999993842646772, iteration: 36537
loss: 0.9638682007789612,grad_norm: 0.9115943754675027, iteration: 36538
loss: 1.019105076789856,grad_norm: 0.9999990022776665, iteration: 36539
loss: 1.0844340324401855,grad_norm: 0.999999924808792, iteration: 36540
loss: 1.0435659885406494,grad_norm: 0.9999995437709743, iteration: 36541
loss: 1.0493851900100708,grad_norm: 0.9433760856097321, iteration: 36542
loss: 1.0465660095214844,grad_norm: 0.9999990255173372, iteration: 36543
loss: 0.9887794256210327,grad_norm: 0.9607369207086179, iteration: 36544
loss: 1.0383042097091675,grad_norm: 0.9375130406943476, iteration: 36545
loss: 0.9973469376564026,grad_norm: 0.9411792371958619, iteration: 36546
loss: 0.9876298308372498,grad_norm: 0.9999990907591771, iteration: 36547
loss: 1.0021363496780396,grad_norm: 0.8490259122077847, iteration: 36548
loss: 1.0336153507232666,grad_norm: 0.9130116461654129, iteration: 36549
loss: 1.0089036226272583,grad_norm: 0.9287147109410574, iteration: 36550
loss: 1.0306841135025024,grad_norm: 0.9999994881495652, iteration: 36551
loss: 1.0746510028839111,grad_norm: 0.9999993869607299, iteration: 36552
loss: 0.9821763038635254,grad_norm: 0.9999991897957504, iteration: 36553
loss: 1.0944260358810425,grad_norm: 0.9999997487417117, iteration: 36554
loss: 0.9731167554855347,grad_norm: 0.8070202797090094, iteration: 36555
loss: 0.9830067157745361,grad_norm: 0.9140876245522956, iteration: 36556
loss: 1.0355340242385864,grad_norm: 0.9999993036307363, iteration: 36557
loss: 0.9816451072692871,grad_norm: 0.9999991098491243, iteration: 36558
loss: 1.038861632347107,grad_norm: 0.9680538743448885, iteration: 36559
loss: 1.0437895059585571,grad_norm: 0.9999992920493967, iteration: 36560
loss: 1.0111894607543945,grad_norm: 0.8140790380069644, iteration: 36561
loss: 1.0226917266845703,grad_norm: 0.9999991435481979, iteration: 36562
loss: 1.0031447410583496,grad_norm: 0.9276634873042496, iteration: 36563
loss: 0.9771702885627747,grad_norm: 0.9082211774728665, iteration: 36564
loss: 1.0423604249954224,grad_norm: 0.9999997605972202, iteration: 36565
loss: 1.0258262157440186,grad_norm: 0.9088843396674137, iteration: 36566
loss: 1.0146751403808594,grad_norm: 0.8378472780476497, iteration: 36567
loss: 1.0364761352539062,grad_norm: 0.9999992939730479, iteration: 36568
loss: 1.006209135055542,grad_norm: 0.9999992478761858, iteration: 36569
loss: 0.9724461436271667,grad_norm: 0.9999990627849447, iteration: 36570
loss: 0.9766726493835449,grad_norm: 0.8379469413343107, iteration: 36571
loss: 1.015158772468567,grad_norm: 0.9999991121046682, iteration: 36572
loss: 1.0383943319320679,grad_norm: 0.9999993148702478, iteration: 36573
loss: 0.989196240901947,grad_norm: 0.9818885232512099, iteration: 36574
loss: 0.9910498857498169,grad_norm: 0.999998988085698, iteration: 36575
loss: 1.0230789184570312,grad_norm: 0.9999991251067325, iteration: 36576
loss: 0.98331618309021,grad_norm: 0.8509008139750317, iteration: 36577
loss: 0.9617668986320496,grad_norm: 0.9478192745487746, iteration: 36578
loss: 0.9962397813796997,grad_norm: 0.793671695447631, iteration: 36579
loss: 0.9656822681427002,grad_norm: 0.9999990373930734, iteration: 36580
loss: 0.9936048984527588,grad_norm: 0.9302889884007185, iteration: 36581
loss: 0.9998849630355835,grad_norm: 0.9927603103864266, iteration: 36582
loss: 1.0055382251739502,grad_norm: 0.9999991332119862, iteration: 36583
loss: 1.0064948797225952,grad_norm: 0.8836269666692428, iteration: 36584
loss: 1.0740363597869873,grad_norm: 0.999999653425515, iteration: 36585
loss: 0.9929039478302002,grad_norm: 0.9999990841052037, iteration: 36586
loss: 1.0230191946029663,grad_norm: 0.9999995927505686, iteration: 36587
loss: 1.0012885332107544,grad_norm: 0.9609642764033265, iteration: 36588
loss: 1.1204386949539185,grad_norm: 0.9999994178731568, iteration: 36589
loss: 0.9716554284095764,grad_norm: 0.8764830190647939, iteration: 36590
loss: 0.9797105193138123,grad_norm: 0.8449820474221991, iteration: 36591
loss: 0.998096227645874,grad_norm: 0.7860708443878913, iteration: 36592
loss: 1.028814435005188,grad_norm: 0.9999989637200417, iteration: 36593
loss: 0.996466338634491,grad_norm: 0.9999990645767327, iteration: 36594
loss: 1.0272730588912964,grad_norm: 0.9999993603872438, iteration: 36595
loss: 0.9843016862869263,grad_norm: 0.9999990748061547, iteration: 36596
loss: 0.9985145926475525,grad_norm: 0.907343807051756, iteration: 36597
loss: 0.9910039901733398,grad_norm: 0.999999820198791, iteration: 36598
loss: 0.998950719833374,grad_norm: 0.999999332210415, iteration: 36599
loss: 1.0010675191879272,grad_norm: 0.9999989459223856, iteration: 36600
loss: 1.0207887887954712,grad_norm: 0.9406117730838199, iteration: 36601
loss: 1.0271626710891724,grad_norm: 0.9550914848846984, iteration: 36602
loss: 1.057330846786499,grad_norm: 0.9999990452960076, iteration: 36603
loss: 1.0082550048828125,grad_norm: 0.9486412096053394, iteration: 36604
loss: 1.0031241178512573,grad_norm: 0.9283259704302259, iteration: 36605
loss: 0.9949870109558105,grad_norm: 0.9484912528856638, iteration: 36606
loss: 1.0264281034469604,grad_norm: 0.9999990316713523, iteration: 36607
loss: 1.0302097797393799,grad_norm: 0.9999994355166283, iteration: 36608
loss: 1.0197120904922485,grad_norm: 0.9999991171131158, iteration: 36609
loss: 1.0011745691299438,grad_norm: 0.8258971627873516, iteration: 36610
loss: 1.0049782991409302,grad_norm: 0.9999992835689684, iteration: 36611
loss: 0.9957148432731628,grad_norm: 0.9038127041068719, iteration: 36612
loss: 0.9447219967842102,grad_norm: 0.7949319205058027, iteration: 36613
loss: 0.994256854057312,grad_norm: 0.9999991032723862, iteration: 36614
loss: 1.0420315265655518,grad_norm: 0.9999993114771254, iteration: 36615
loss: 1.0628315210342407,grad_norm: 0.9999995868528907, iteration: 36616
loss: 1.0122970342636108,grad_norm: 0.7809517281251219, iteration: 36617
loss: 1.0018764734268188,grad_norm: 0.9999990927706223, iteration: 36618
loss: 1.027935266494751,grad_norm: 0.9999995886663381, iteration: 36619
loss: 1.0090649127960205,grad_norm: 0.9102113031806861, iteration: 36620
loss: 1.006691336631775,grad_norm: 0.9622364462778501, iteration: 36621
loss: 1.0096431970596313,grad_norm: 0.9524044300861011, iteration: 36622
loss: 1.0276654958724976,grad_norm: 0.999999717080073, iteration: 36623
loss: 1.0054131746292114,grad_norm: 0.9999996008284494, iteration: 36624
loss: 0.9797402024269104,grad_norm: 0.8587166809595118, iteration: 36625
loss: 1.0033358335494995,grad_norm: 0.8707473758903354, iteration: 36626
loss: 0.9517348408699036,grad_norm: 0.9987923432353591, iteration: 36627
loss: 1.0195943117141724,grad_norm: 0.9999991137923621, iteration: 36628
loss: 1.0043669939041138,grad_norm: 0.9999991492352271, iteration: 36629
loss: 1.023389220237732,grad_norm: 0.99999927586642, iteration: 36630
loss: 0.9981894493103027,grad_norm: 0.9738318067999301, iteration: 36631
loss: 0.9687954783439636,grad_norm: 0.9999990323487692, iteration: 36632
loss: 1.0327129364013672,grad_norm: 0.8726729164140892, iteration: 36633
loss: 0.9820570349693298,grad_norm: 0.9124908789397124, iteration: 36634
loss: 0.9964285492897034,grad_norm: 0.9999992306541609, iteration: 36635
loss: 1.0265158414840698,grad_norm: 0.9999993546705447, iteration: 36636
loss: 0.9921965003013611,grad_norm: 0.8631118172220159, iteration: 36637
loss: 1.0716967582702637,grad_norm: 0.9999998479823186, iteration: 36638
loss: 0.9956836104393005,grad_norm: 0.9999997421637865, iteration: 36639
loss: 1.014227271080017,grad_norm: 0.9174075630319671, iteration: 36640
loss: 1.0190305709838867,grad_norm: 0.8796135205882447, iteration: 36641
loss: 0.9940264821052551,grad_norm: 0.9351695040787885, iteration: 36642
loss: 0.9742903709411621,grad_norm: 0.9509239293927837, iteration: 36643
loss: 1.0038979053497314,grad_norm: 0.9149046031928605, iteration: 36644
loss: 1.0477474927902222,grad_norm: 0.9999995389774177, iteration: 36645
loss: 1.106141448020935,grad_norm: 0.9999993553849483, iteration: 36646
loss: 1.0244624614715576,grad_norm: 0.9999990845172874, iteration: 36647
loss: 0.970649242401123,grad_norm: 0.9999991238585967, iteration: 36648
loss: 1.0425658226013184,grad_norm: 0.9523668509267524, iteration: 36649
loss: 0.9676454663276672,grad_norm: 0.8982194142187603, iteration: 36650
loss: 1.0009647607803345,grad_norm: 0.9999990222729299, iteration: 36651
loss: 1.0163908004760742,grad_norm: 0.826382066070474, iteration: 36652
loss: 0.969416081905365,grad_norm: 0.9999991582310258, iteration: 36653
loss: 1.02493417263031,grad_norm: 0.9245706598878872, iteration: 36654
loss: 0.988335371017456,grad_norm: 0.9407401954355579, iteration: 36655
loss: 1.0295156240463257,grad_norm: 0.9999991222331184, iteration: 36656
loss: 0.9969477653503418,grad_norm: 0.9999992982324099, iteration: 36657
loss: 0.9957935810089111,grad_norm: 0.8142769993005081, iteration: 36658
loss: 0.9971787929534912,grad_norm: 0.9999990450659721, iteration: 36659
loss: 0.995265781879425,grad_norm: 0.8953224623909777, iteration: 36660
loss: 0.9752874374389648,grad_norm: 0.9999990755051688, iteration: 36661
loss: 0.9946243166923523,grad_norm: 0.8600702701219863, iteration: 36662
loss: 0.9735236167907715,grad_norm: 0.8948490175661372, iteration: 36663
loss: 1.0414807796478271,grad_norm: 0.9999994515432125, iteration: 36664
loss: 0.9785776734352112,grad_norm: 0.8167094417773049, iteration: 36665
loss: 0.9683001041412354,grad_norm: 0.981876327917579, iteration: 36666
loss: 0.9870953559875488,grad_norm: 0.9433058745628162, iteration: 36667
loss: 0.951033890247345,grad_norm: 0.8974420930125222, iteration: 36668
loss: 0.9994896054267883,grad_norm: 0.9999997424106395, iteration: 36669
loss: 1.038460373878479,grad_norm: 0.9641794589530253, iteration: 36670
loss: 1.0054670572280884,grad_norm: 0.9010261119757969, iteration: 36671
loss: 1.0314782857894897,grad_norm: 0.9709994641519063, iteration: 36672
loss: 1.0058200359344482,grad_norm: 0.9612849120771998, iteration: 36673
loss: 1.016188144683838,grad_norm: 0.9999990631813765, iteration: 36674
loss: 0.9716563820838928,grad_norm: 0.8042986086985762, iteration: 36675
loss: 1.006502628326416,grad_norm: 0.9999993968386716, iteration: 36676
loss: 0.9639884233474731,grad_norm: 0.9479551903803229, iteration: 36677
loss: 0.9859079718589783,grad_norm: 0.9608697096963078, iteration: 36678
loss: 1.0449533462524414,grad_norm: 0.9999991868204314, iteration: 36679
loss: 1.0721219778060913,grad_norm: 0.9255745605087449, iteration: 36680
loss: 0.977564811706543,grad_norm: 0.9999991514113521, iteration: 36681
loss: 1.0247033834457397,grad_norm: 0.9999990235951145, iteration: 36682
loss: 1.039291262626648,grad_norm: 0.9999998943854261, iteration: 36683
loss: 1.0097862482070923,grad_norm: 0.895644846127393, iteration: 36684
loss: 1.0652552843093872,grad_norm: 0.9999995400484037, iteration: 36685
loss: 0.985282301902771,grad_norm: 0.9457133486642947, iteration: 36686
loss: 1.0115633010864258,grad_norm: 0.9999989406244774, iteration: 36687
loss: 0.9851352572441101,grad_norm: 0.999998993252869, iteration: 36688
loss: 0.9969368577003479,grad_norm: 0.9999992336433108, iteration: 36689
loss: 1.019652247428894,grad_norm: 0.999999148451684, iteration: 36690
loss: 0.992566704750061,grad_norm: 0.9999990182580141, iteration: 36691
loss: 1.0474541187286377,grad_norm: 0.9999990698504494, iteration: 36692
loss: 0.9797949194908142,grad_norm: 0.8368128037530156, iteration: 36693
loss: 1.0102261304855347,grad_norm: 0.999999161342296, iteration: 36694
loss: 1.0549025535583496,grad_norm: 0.997227721509466, iteration: 36695
loss: 0.9827187061309814,grad_norm: 0.9999991828314742, iteration: 36696
loss: 1.010061264038086,grad_norm: 0.937357420493702, iteration: 36697
loss: 0.9608814716339111,grad_norm: 0.962867244268631, iteration: 36698
loss: 1.006393551826477,grad_norm: 0.9999991733530881, iteration: 36699
loss: 0.9713955521583557,grad_norm: 0.9190779183218295, iteration: 36700
loss: 0.9923415184020996,grad_norm: 0.9999991769740905, iteration: 36701
loss: 1.0131592750549316,grad_norm: 0.9999992423188273, iteration: 36702
loss: 1.004245400428772,grad_norm: 0.9999991952261436, iteration: 36703
loss: 1.1411997079849243,grad_norm: 0.9999999508121761, iteration: 36704
loss: 1.0168315172195435,grad_norm: 0.9362266200018352, iteration: 36705
loss: 1.0888172388076782,grad_norm: 0.999999700423682, iteration: 36706
loss: 1.362581729888916,grad_norm: 0.9999996723988815, iteration: 36707
loss: 1.3334237337112427,grad_norm: 0.999999749868436, iteration: 36708
loss: 1.3468061685562134,grad_norm: 0.99999963582053, iteration: 36709
loss: 1.1625497341156006,grad_norm: 1.0000000433539824, iteration: 36710
loss: 1.23674738407135,grad_norm: 0.9999999408448892, iteration: 36711
loss: 1.1374937295913696,grad_norm: 0.9999993862400629, iteration: 36712
loss: 1.0655587911605835,grad_norm: 0.9999993379891345, iteration: 36713
loss: 1.0974258184432983,grad_norm: 0.9999999829891313, iteration: 36714
loss: 1.1043806076049805,grad_norm: 0.9999998338497006, iteration: 36715
loss: 1.059687852859497,grad_norm: 0.9999995443703565, iteration: 36716
loss: 1.053849458694458,grad_norm: 0.9999993616493845, iteration: 36717
loss: 1.0025743246078491,grad_norm: 0.9999992026832086, iteration: 36718
loss: 1.0859405994415283,grad_norm: 0.999999739969344, iteration: 36719
loss: 1.0243662595748901,grad_norm: 0.9999991895847198, iteration: 36720
loss: 0.9991528987884521,grad_norm: 0.9999990947793563, iteration: 36721
loss: 1.0334162712097168,grad_norm: 0.9999995149964179, iteration: 36722
loss: 1.0082145929336548,grad_norm: 0.9999991284958499, iteration: 36723
loss: 1.0839786529541016,grad_norm: 0.9999992679406635, iteration: 36724
loss: 1.0340895652770996,grad_norm: 0.9999992123446891, iteration: 36725
loss: 1.0648317337036133,grad_norm: 0.9999993713063805, iteration: 36726
loss: 1.0975319147109985,grad_norm: 0.9999999047586976, iteration: 36727
loss: 0.9954478144645691,grad_norm: 0.8660767661854673, iteration: 36728
loss: 1.0167288780212402,grad_norm: 0.9612809304739695, iteration: 36729
loss: 1.162325143814087,grad_norm: 0.9999995456584952, iteration: 36730
loss: 1.1178215742111206,grad_norm: 0.9999993944038027, iteration: 36731
loss: 1.030564546585083,grad_norm: 0.9717345431748068, iteration: 36732
loss: 1.0324888229370117,grad_norm: 0.9999991113573786, iteration: 36733
loss: 1.0106985569000244,grad_norm: 0.9999991520591256, iteration: 36734
loss: 1.0738720893859863,grad_norm: 0.9999991310816138, iteration: 36735
loss: 0.9712632298469543,grad_norm: 0.9999990173510334, iteration: 36736
loss: 1.0108803510665894,grad_norm: 0.9999990721758013, iteration: 36737
loss: 1.0904219150543213,grad_norm: 0.8324239788824978, iteration: 36738
loss: 1.028574824333191,grad_norm: 0.9999999385740653, iteration: 36739
loss: 1.007926106452942,grad_norm: 0.9038724924327254, iteration: 36740
loss: 1.0935481786727905,grad_norm: 0.9999991197801591, iteration: 36741
loss: 1.0609432458877563,grad_norm: 0.9999995545348003, iteration: 36742
loss: 1.0402926206588745,grad_norm: 0.9999991602374321, iteration: 36743
loss: 0.9614671468734741,grad_norm: 0.9740905273303014, iteration: 36744
loss: 0.9813868999481201,grad_norm: 0.9149015091722718, iteration: 36745
loss: 1.0693814754486084,grad_norm: 0.9999996638327228, iteration: 36746
loss: 1.0325708389282227,grad_norm: 0.9978515938746303, iteration: 36747
loss: 0.9502328038215637,grad_norm: 0.9999990551664824, iteration: 36748
loss: 1.049635410308838,grad_norm: 0.9999995856505008, iteration: 36749
loss: 1.026784062385559,grad_norm: 0.9200447920500099, iteration: 36750
loss: 1.0195118188858032,grad_norm: 0.8019347932090781, iteration: 36751
loss: 1.0007400512695312,grad_norm: 0.9999990591322655, iteration: 36752
loss: 0.9980536699295044,grad_norm: 0.8744521764067638, iteration: 36753
loss: 0.9870667457580566,grad_norm: 0.812833522222695, iteration: 36754
loss: 1.038790225982666,grad_norm: 0.9701814352557874, iteration: 36755
loss: 0.9942846298217773,grad_norm: 0.8786366061456053, iteration: 36756
loss: 1.0431593656539917,grad_norm: 0.9999995121497075, iteration: 36757
loss: 1.014710545539856,grad_norm: 0.999999115883846, iteration: 36758
loss: 1.1209979057312012,grad_norm: 0.9999990819040184, iteration: 36759
loss: 1.0430777072906494,grad_norm: 0.9999992876304289, iteration: 36760
loss: 1.0138092041015625,grad_norm: 0.89586376028921, iteration: 36761
loss: 0.989027202129364,grad_norm: 0.9999990444447612, iteration: 36762
loss: 1.0249910354614258,grad_norm: 0.9999990542163377, iteration: 36763
loss: 1.0594009160995483,grad_norm: 0.9999996975365025, iteration: 36764
loss: 1.0045297145843506,grad_norm: 0.9999991870976697, iteration: 36765
loss: 0.9821479916572571,grad_norm: 0.9999995075445176, iteration: 36766
loss: 1.02352774143219,grad_norm: 0.9999993852747697, iteration: 36767
loss: 1.0437296628952026,grad_norm: 0.999999486075236, iteration: 36768
loss: 0.9985659122467041,grad_norm: 0.9839750283570249, iteration: 36769
loss: 1.0074660778045654,grad_norm: 0.9999990761483937, iteration: 36770
loss: 0.9910547137260437,grad_norm: 0.9999991833325534, iteration: 36771
loss: 1.0050355195999146,grad_norm: 0.8853297912509095, iteration: 36772
loss: 0.9765770435333252,grad_norm: 0.9366765954730789, iteration: 36773
loss: 0.9810622930526733,grad_norm: 0.9999992229515772, iteration: 36774
loss: 0.9641914963722229,grad_norm: 0.9966139923146757, iteration: 36775
loss: 1.0801764726638794,grad_norm: 0.9999994777046122, iteration: 36776
loss: 1.0462547540664673,grad_norm: 0.999999212673256, iteration: 36777
loss: 1.0410107374191284,grad_norm: 0.9815087291345447, iteration: 36778
loss: 0.9848104119300842,grad_norm: 0.9945218552828023, iteration: 36779
loss: 1.0307379961013794,grad_norm: 0.9999991509223988, iteration: 36780
loss: 1.191200613975525,grad_norm: 0.9999994252112631, iteration: 36781
loss: 1.0760318040847778,grad_norm: 0.9999991676319038, iteration: 36782
loss: 1.2973674535751343,grad_norm: 0.9999993350115651, iteration: 36783
loss: 0.9843885898590088,grad_norm: 0.9999991881430118, iteration: 36784
loss: 1.0735493898391724,grad_norm: 0.999999479106715, iteration: 36785
loss: 1.0484391450881958,grad_norm: 0.9999990174294303, iteration: 36786
loss: 0.987616777420044,grad_norm: 0.891348452964371, iteration: 36787
loss: 1.0138918161392212,grad_norm: 0.8851521977009147, iteration: 36788
loss: 1.0743650197982788,grad_norm: 0.9999992326188957, iteration: 36789
loss: 1.0959392786026,grad_norm: 0.9999996301489059, iteration: 36790
loss: 1.0467182397842407,grad_norm: 0.9999998514250893, iteration: 36791
loss: 1.0401984453201294,grad_norm: 0.9999991755312587, iteration: 36792
loss: 1.0481728315353394,grad_norm: 0.9999994970776422, iteration: 36793
loss: 1.004761815071106,grad_norm: 0.9999991291350987, iteration: 36794
loss: 1.1992546319961548,grad_norm: 0.9999998036177101, iteration: 36795
loss: 1.027441143989563,grad_norm: 0.9999998470705639, iteration: 36796
loss: 0.9941003322601318,grad_norm: 0.9999991108530306, iteration: 36797
loss: 1.0557096004486084,grad_norm: 0.9999993506947266, iteration: 36798
loss: 1.0015673637390137,grad_norm: 0.9849748694066743, iteration: 36799
loss: 0.9959679245948792,grad_norm: 0.8107182425484137, iteration: 36800
loss: 0.9873148798942566,grad_norm: 0.8432071654267902, iteration: 36801
loss: 0.9869961142539978,grad_norm: 0.9078139092929929, iteration: 36802
loss: 1.0076048374176025,grad_norm: 0.9844077761173979, iteration: 36803
loss: 1.0185463428497314,grad_norm: 0.8348970809058921, iteration: 36804
loss: 0.9767591953277588,grad_norm: 0.9999992053800224, iteration: 36805
loss: 0.9717857837677002,grad_norm: 0.9999991838822326, iteration: 36806
loss: 1.0449498891830444,grad_norm: 0.9999992593116013, iteration: 36807
loss: 0.9806998372077942,grad_norm: 0.8895171372034918, iteration: 36808
loss: 1.0318981409072876,grad_norm: 0.7978039288037105, iteration: 36809
loss: 1.010454773902893,grad_norm: 0.9999991327500912, iteration: 36810
loss: 1.0296350717544556,grad_norm: 0.9250415368078543, iteration: 36811
loss: 1.0286206007003784,grad_norm: 0.9999993256557372, iteration: 36812
loss: 0.9598628282546997,grad_norm: 0.8466292500238739, iteration: 36813
loss: 1.0228437185287476,grad_norm: 0.999999358208473, iteration: 36814
loss: 1.0614378452301025,grad_norm: 0.9999991905303994, iteration: 36815
loss: 1.0073764324188232,grad_norm: 0.9999991952536689, iteration: 36816
loss: 1.0249263048171997,grad_norm: 0.8199676537111753, iteration: 36817
loss: 1.1140286922454834,grad_norm: 0.9999998370555496, iteration: 36818
loss: 1.1088190078735352,grad_norm: 0.9999998427240424, iteration: 36819
loss: 1.0290770530700684,grad_norm: 0.9999991043827416, iteration: 36820
loss: 1.0276057720184326,grad_norm: 0.9999998508246852, iteration: 36821
loss: 1.0240200757980347,grad_norm: 0.8674267849080385, iteration: 36822
loss: 1.0302282571792603,grad_norm: 0.9999990914687809, iteration: 36823
loss: 0.9900588393211365,grad_norm: 0.9999991314903096, iteration: 36824
loss: 0.9685521125793457,grad_norm: 0.8259564477257583, iteration: 36825
loss: 1.0349397659301758,grad_norm: 0.9228040397026173, iteration: 36826
loss: 1.0108379125595093,grad_norm: 0.9999994357036723, iteration: 36827
loss: 1.0544641017913818,grad_norm: 0.9999997027171734, iteration: 36828
loss: 0.9950515031814575,grad_norm: 0.9694629620251519, iteration: 36829
loss: 1.0343507528305054,grad_norm: 0.9999996847719727, iteration: 36830
loss: 1.0156875848770142,grad_norm: 0.9999992183201349, iteration: 36831
loss: 1.0167628526687622,grad_norm: 0.9999994164536754, iteration: 36832
loss: 1.0339962244033813,grad_norm: 0.9763806185005626, iteration: 36833
loss: 1.02419912815094,grad_norm: 0.9999991580027259, iteration: 36834
loss: 0.9741474986076355,grad_norm: 0.8684841277423941, iteration: 36835
loss: 1.041393756866455,grad_norm: 0.9463725526162784, iteration: 36836
loss: 0.9896772503852844,grad_norm: 0.9041306493146953, iteration: 36837
loss: 1.0178520679473877,grad_norm: 0.9999989522297994, iteration: 36838
loss: 0.9727559089660645,grad_norm: 0.9999990880585645, iteration: 36839
loss: 1.012105107307434,grad_norm: 0.9999991143764719, iteration: 36840
loss: 0.9756861925125122,grad_norm: 0.8365665595551626, iteration: 36841
loss: 1.0029547214508057,grad_norm: 0.9042503632936082, iteration: 36842
loss: 0.9909284114837646,grad_norm: 0.9999990574441109, iteration: 36843
loss: 1.0727949142456055,grad_norm: 0.9999995633770917, iteration: 36844
loss: 1.0344865322113037,grad_norm: 0.9999991364240419, iteration: 36845
loss: 1.0493265390396118,grad_norm: 0.9999992390751546, iteration: 36846
loss: 0.9764137268066406,grad_norm: 0.8637871106482482, iteration: 36847
loss: 1.0972387790679932,grad_norm: 0.9999992121109333, iteration: 36848
loss: 1.1288354396820068,grad_norm: 0.9999998335679943, iteration: 36849
loss: 1.0275239944458008,grad_norm: 0.9704166586181556, iteration: 36850
loss: 1.0200549364089966,grad_norm: 0.953120055169714, iteration: 36851
loss: 1.0080605745315552,grad_norm: 0.8768974041482164, iteration: 36852
loss: 0.9662144184112549,grad_norm: 0.8427828902557459, iteration: 36853
loss: 1.2068079710006714,grad_norm: 0.9999998503450865, iteration: 36854
loss: 1.0047078132629395,grad_norm: 0.8859381050549597, iteration: 36855
loss: 1.02037513256073,grad_norm: 0.8143819430754836, iteration: 36856
loss: 0.9892948269844055,grad_norm: 0.9999991676575596, iteration: 36857
loss: 1.0009077787399292,grad_norm: 0.9330265536369675, iteration: 36858
loss: 1.0378097295761108,grad_norm: 0.8606439788076633, iteration: 36859
loss: 0.9834790825843811,grad_norm: 0.9999991456220434, iteration: 36860
loss: 0.9909912943840027,grad_norm: 0.9009674798056765, iteration: 36861
loss: 1.0182087421417236,grad_norm: 0.9855816609998165, iteration: 36862
loss: 0.9779652953147888,grad_norm: 0.9999991899947656, iteration: 36863
loss: 1.0488839149475098,grad_norm: 0.9784924947487923, iteration: 36864
loss: 0.9994629621505737,grad_norm: 0.9999991643201155, iteration: 36865
loss: 0.9822408556938171,grad_norm: 0.9999990533064536, iteration: 36866
loss: 0.9903635382652283,grad_norm: 0.914519565855477, iteration: 36867
loss: 0.984305202960968,grad_norm: 0.9180775669075998, iteration: 36868
loss: 0.9941163063049316,grad_norm: 0.966868782325681, iteration: 36869
loss: 0.9597455263137817,grad_norm: 0.9999990411686639, iteration: 36870
loss: 1.0360500812530518,grad_norm: 0.8962548586931063, iteration: 36871
loss: 0.9761610627174377,grad_norm: 0.9452542339120249, iteration: 36872
loss: 1.045073390007019,grad_norm: 0.9999997092815154, iteration: 36873
loss: 1.0081475973129272,grad_norm: 0.9999991565423186, iteration: 36874
loss: 0.998599648475647,grad_norm: 0.8958079732176666, iteration: 36875
loss: 1.0330753326416016,grad_norm: 0.9999991449524143, iteration: 36876
loss: 1.043777346611023,grad_norm: 0.8494023500435423, iteration: 36877
loss: 1.0010911226272583,grad_norm: 0.9242850005179114, iteration: 36878
loss: 1.0256338119506836,grad_norm: 0.8973362977438513, iteration: 36879
loss: 1.00556218624115,grad_norm: 0.9512879782990992, iteration: 36880
loss: 1.0540186166763306,grad_norm: 0.9999997775898786, iteration: 36881
loss: 1.1659224033355713,grad_norm: 0.9999991729566549, iteration: 36882
loss: 0.9752910137176514,grad_norm: 0.9999990555620583, iteration: 36883
loss: 1.1411595344543457,grad_norm: 0.999999821176863, iteration: 36884
loss: 1.0186119079589844,grad_norm: 0.9999990974091015, iteration: 36885
loss: 0.9539496302604675,grad_norm: 0.9999990720395971, iteration: 36886
loss: 0.9980599880218506,grad_norm: 0.9344426896523836, iteration: 36887
loss: 1.021201729774475,grad_norm: 0.999999849754879, iteration: 36888
loss: 1.0296677350997925,grad_norm: 0.951342566354237, iteration: 36889
loss: 0.9726762771606445,grad_norm: 0.9355167612888251, iteration: 36890
loss: 1.0290244817733765,grad_norm: 0.9142055040346202, iteration: 36891
loss: 1.0130373239517212,grad_norm: 0.9999995491619322, iteration: 36892
loss: 1.0047787427902222,grad_norm: 0.8003904341923107, iteration: 36893
loss: 1.0184375047683716,grad_norm: 0.9999993089758293, iteration: 36894
loss: 1.0088531970977783,grad_norm: 0.9999990382282052, iteration: 36895
loss: 1.0031899213790894,grad_norm: 0.8780365689131217, iteration: 36896
loss: 1.0019421577453613,grad_norm: 0.9999990565653925, iteration: 36897
loss: 1.0098960399627686,grad_norm: 0.9439081967960072, iteration: 36898
loss: 1.069180965423584,grad_norm: 0.9999992766670645, iteration: 36899
loss: 0.977776050567627,grad_norm: 0.999998937060322, iteration: 36900
loss: 1.0008432865142822,grad_norm: 0.868572295210209, iteration: 36901
loss: 1.0841093063354492,grad_norm: 0.9999996046139445, iteration: 36902
loss: 0.9940246343612671,grad_norm: 0.9263635173242748, iteration: 36903
loss: 1.0109318494796753,grad_norm: 0.9999989801349263, iteration: 36904
loss: 1.014541745185852,grad_norm: 0.9999991429764933, iteration: 36905
loss: 1.0660052299499512,grad_norm: 0.9999993396664156, iteration: 36906
loss: 1.0697301626205444,grad_norm: 0.9999991960453934, iteration: 36907
loss: 0.9848041534423828,grad_norm: 0.9260112984137596, iteration: 36908
loss: 1.0123302936553955,grad_norm: 0.9194711408440953, iteration: 36909
loss: 0.954201340675354,grad_norm: 0.859345932068574, iteration: 36910
loss: 1.0774461030960083,grad_norm: 0.9910069496407317, iteration: 36911
loss: 0.9815255999565125,grad_norm: 0.8610686045471111, iteration: 36912
loss: 1.0843102931976318,grad_norm: 0.9999992995848634, iteration: 36913
loss: 1.020526647567749,grad_norm: 0.999999242661459, iteration: 36914
loss: 1.0394307374954224,grad_norm: 0.9999995034854473, iteration: 36915
loss: 1.047553539276123,grad_norm: 0.9999991492732019, iteration: 36916
loss: 0.9731895923614502,grad_norm: 0.8635772180846502, iteration: 36917
loss: 0.9727405309677124,grad_norm: 0.8916771070517469, iteration: 36918
loss: 1.2119064331054688,grad_norm: 0.9999997535282029, iteration: 36919
loss: 1.018149495124817,grad_norm: 0.9999992714152992, iteration: 36920
loss: 1.0827933549880981,grad_norm: 0.9999998768956804, iteration: 36921
loss: 1.009393572807312,grad_norm: 0.9999992424231409, iteration: 36922
loss: 1.0228747129440308,grad_norm: 0.9958184179009554, iteration: 36923
loss: 1.0365054607391357,grad_norm: 0.9999993553130742, iteration: 36924
loss: 1.0339746475219727,grad_norm: 0.9999990042165201, iteration: 36925
loss: 0.9876649379730225,grad_norm: 0.7298059499829318, iteration: 36926
loss: 1.0891717672348022,grad_norm: 0.999999298383846, iteration: 36927
loss: 0.9873996376991272,grad_norm: 0.9265016008409622, iteration: 36928
loss: 1.019490361213684,grad_norm: 0.9860798007345988, iteration: 36929
loss: 1.1461485624313354,grad_norm: 0.9999996820031998, iteration: 36930
loss: 1.0113462209701538,grad_norm: 0.989697564792079, iteration: 36931
loss: 1.009089708328247,grad_norm: 0.9999990361551975, iteration: 36932
loss: 0.9910888671875,grad_norm: 0.985242964543672, iteration: 36933
loss: 0.9687528014183044,grad_norm: 0.855748523750126, iteration: 36934
loss: 1.023821234703064,grad_norm: 0.9999990825874182, iteration: 36935
loss: 0.9817333221435547,grad_norm: 0.8284799280031838, iteration: 36936
loss: 1.0132676362991333,grad_norm: 0.9347005452914148, iteration: 36937
loss: 0.9724384546279907,grad_norm: 0.8287010395562047, iteration: 36938
loss: 0.9563692808151245,grad_norm: 0.9999990707184309, iteration: 36939
loss: 1.005729079246521,grad_norm: 0.9999991168225639, iteration: 36940
loss: 0.9932920336723328,grad_norm: 0.8701149157854177, iteration: 36941
loss: 1.0024770498275757,grad_norm: 0.94663033651585, iteration: 36942
loss: 0.9655898213386536,grad_norm: 0.9999991312337008, iteration: 36943
loss: 1.0392801761627197,grad_norm: 0.9999990556438728, iteration: 36944
loss: 1.01967453956604,grad_norm: 0.9999998258065159, iteration: 36945
loss: 1.0010707378387451,grad_norm: 0.8944209459279886, iteration: 36946
loss: 0.9995786547660828,grad_norm: 0.999999701593538, iteration: 36947
loss: 0.9869654774665833,grad_norm: 0.9246022903062734, iteration: 36948
loss: 0.9855712056159973,grad_norm: 0.8777590719930538, iteration: 36949
loss: 1.059261679649353,grad_norm: 0.9999999632945534, iteration: 36950
loss: 0.9813569784164429,grad_norm: 0.9762508169258901, iteration: 36951
loss: 1.0204224586486816,grad_norm: 0.9554201897667696, iteration: 36952
loss: 1.0107380151748657,grad_norm: 0.8609295284254967, iteration: 36953
loss: 1.0103260278701782,grad_norm: 0.9710646577313681, iteration: 36954
loss: 0.9880802035331726,grad_norm: 0.941856193163722, iteration: 36955
loss: 1.020439624786377,grad_norm: 0.9676887042035712, iteration: 36956
loss: 1.019526481628418,grad_norm: 0.9183150811198304, iteration: 36957
loss: 1.0321855545043945,grad_norm: 0.999999149303397, iteration: 36958
loss: 0.9723567366600037,grad_norm: 0.9999992021760296, iteration: 36959
loss: 1.0485854148864746,grad_norm: 0.9999992405388954, iteration: 36960
loss: 0.9733015298843384,grad_norm: 0.9999990887053045, iteration: 36961
loss: 1.0774950981140137,grad_norm: 0.9999999078804835, iteration: 36962
loss: 1.0265591144561768,grad_norm: 0.9281340709479202, iteration: 36963
loss: 1.0422923564910889,grad_norm: 0.9999990374756229, iteration: 36964
loss: 0.9952006340026855,grad_norm: 0.8866120549585732, iteration: 36965
loss: 0.9879096746444702,grad_norm: 0.8488282365414318, iteration: 36966
loss: 1.0043137073516846,grad_norm: 0.9736768322104351, iteration: 36967
loss: 1.0623914003372192,grad_norm: 0.9999994798840787, iteration: 36968
loss: 0.9983881115913391,grad_norm: 0.9999991504175663, iteration: 36969
loss: 1.037689208984375,grad_norm: 0.9447076455245884, iteration: 36970
loss: 1.0001716613769531,grad_norm: 0.9878636270494796, iteration: 36971
loss: 1.0026980638504028,grad_norm: 0.9999989817715866, iteration: 36972
loss: 0.9958677887916565,grad_norm: 0.9175760228709, iteration: 36973
loss: 1.002442479133606,grad_norm: 0.9999990778713015, iteration: 36974
loss: 0.9844731092453003,grad_norm: 0.9955970108545922, iteration: 36975
loss: 0.9843752384185791,grad_norm: 0.8459456518686131, iteration: 36976
loss: 1.0354610681533813,grad_norm: 0.9999996719940535, iteration: 36977
loss: 0.9933595061302185,grad_norm: 0.999999371816304, iteration: 36978
loss: 1.0007178783416748,grad_norm: 0.9999992071589348, iteration: 36979
loss: 1.0075453519821167,grad_norm: 0.9681666700196875, iteration: 36980
loss: 0.9852098822593689,grad_norm: 0.9240756912456941, iteration: 36981
loss: 0.9944362640380859,grad_norm: 0.9174668311368361, iteration: 36982
loss: 1.0385410785675049,grad_norm: 0.9999991067127005, iteration: 36983
loss: 1.0005830526351929,grad_norm: 0.9999991450144855, iteration: 36984
loss: 1.0395257472991943,grad_norm: 0.9624329717148189, iteration: 36985
loss: 1.027728796005249,grad_norm: 0.9999997446836535, iteration: 36986
loss: 0.9934084415435791,grad_norm: 0.8859385932290755, iteration: 36987
loss: 0.9970657229423523,grad_norm: 0.9999991506177408, iteration: 36988
loss: 1.0049453973770142,grad_norm: 0.9391597290115904, iteration: 36989
loss: 1.0000247955322266,grad_norm: 0.9999992200230629, iteration: 36990
loss: 1.014469027519226,grad_norm: 0.9999990796554952, iteration: 36991
loss: 1.0244032144546509,grad_norm: 0.9999991350860212, iteration: 36992
loss: 0.993858277797699,grad_norm: 0.9999990629462687, iteration: 36993
loss: 1.001763105392456,grad_norm: 0.9999995715109189, iteration: 36994
loss: 0.9990572333335876,grad_norm: 0.9999992058458513, iteration: 36995
loss: 0.9910857081413269,grad_norm: 0.8393938574135303, iteration: 36996
loss: 0.9696648716926575,grad_norm: 0.9883617971302044, iteration: 36997
loss: 1.0604884624481201,grad_norm: 0.827878946288637, iteration: 36998
loss: 1.039188265800476,grad_norm: 0.9999991998615413, iteration: 36999
loss: 0.9900654554367065,grad_norm: 0.9999990259634972, iteration: 37000
loss: 1.0174874067306519,grad_norm: 0.7853665902857089, iteration: 37001
loss: 1.1149991750717163,grad_norm: 0.9999994973220415, iteration: 37002
loss: 1.0050652027130127,grad_norm: 0.9566157538303857, iteration: 37003
loss: 0.9938111901283264,grad_norm: 0.9999993066801435, iteration: 37004
loss: 1.0847208499908447,grad_norm: 0.999999759230551, iteration: 37005
loss: 0.974591076374054,grad_norm: 0.9550589751675997, iteration: 37006
loss: 0.9564570188522339,grad_norm: 0.8879458395087985, iteration: 37007
loss: 0.990318775177002,grad_norm: 0.8611711487002788, iteration: 37008
loss: 0.9905502796173096,grad_norm: 0.999999115130677, iteration: 37009
loss: 0.9580789804458618,grad_norm: 0.9999990613030113, iteration: 37010
loss: 1.0380100011825562,grad_norm: 0.9999996558714909, iteration: 37011
loss: 0.9967513084411621,grad_norm: 0.999999182621375, iteration: 37012
loss: 1.042858600616455,grad_norm: 0.9999993748342177, iteration: 37013
loss: 0.9835200905799866,grad_norm: 0.9999993527403427, iteration: 37014
loss: 0.996804416179657,grad_norm: 0.9835976685437847, iteration: 37015
loss: 0.9816427230834961,grad_norm: 0.9823597431493797, iteration: 37016
loss: 0.9716820120811462,grad_norm: 0.8970847666399532, iteration: 37017
loss: 1.0576833486557007,grad_norm: 0.9999991878670695, iteration: 37018
loss: 0.9816733002662659,grad_norm: 0.9999989995816866, iteration: 37019
loss: 1.0627923011779785,grad_norm: 0.9999992535756933, iteration: 37020
loss: 1.0286693572998047,grad_norm: 0.8992577908994654, iteration: 37021
loss: 0.9859625697135925,grad_norm: 0.858783855882832, iteration: 37022
loss: 1.029262661933899,grad_norm: 0.9999991913682734, iteration: 37023
loss: 1.0038650035858154,grad_norm: 0.9835208736350937, iteration: 37024
loss: 0.9983977675437927,grad_norm: 0.948436133258693, iteration: 37025
loss: 1.0367127656936646,grad_norm: 0.9202895634879853, iteration: 37026
loss: 1.0436428785324097,grad_norm: 0.9999998937542773, iteration: 37027
loss: 0.996816098690033,grad_norm: 0.9982663791866465, iteration: 37028
loss: 0.9878695607185364,grad_norm: 0.9999992931473893, iteration: 37029
loss: 1.0356029272079468,grad_norm: 0.9999991227455466, iteration: 37030
loss: 0.9885447025299072,grad_norm: 0.8859774720071713, iteration: 37031
loss: 1.0315390825271606,grad_norm: 0.9999991848710682, iteration: 37032
loss: 1.0318608283996582,grad_norm: 0.9999991299066505, iteration: 37033
loss: 0.9804732203483582,grad_norm: 0.9999993558980097, iteration: 37034
loss: 1.0091304779052734,grad_norm: 0.9999993139905682, iteration: 37035
loss: 0.9842914342880249,grad_norm: 0.9039497859069368, iteration: 37036
loss: 0.9702553749084473,grad_norm: 0.9999992019684666, iteration: 37037
loss: 0.9876349568367004,grad_norm: 0.9154091872993398, iteration: 37038
loss: 0.988538384437561,grad_norm: 0.9999995763747288, iteration: 37039
loss: 1.0200308561325073,grad_norm: 0.9999991052472573, iteration: 37040
loss: 1.025531530380249,grad_norm: 0.9999992479581238, iteration: 37041
loss: 1.0152016878128052,grad_norm: 0.9337897914213997, iteration: 37042
loss: 0.9892882704734802,grad_norm: 0.8921869612971352, iteration: 37043
loss: 1.0171997547149658,grad_norm: 0.9999990882458093, iteration: 37044
loss: 0.9876376986503601,grad_norm: 0.857030124199647, iteration: 37045
loss: 1.022828221321106,grad_norm: 0.880376901985694, iteration: 37046
loss: 0.9800369143486023,grad_norm: 0.9999991129347804, iteration: 37047
loss: 1.1647871732711792,grad_norm: 0.9999999094939939, iteration: 37048
loss: 0.9899731874465942,grad_norm: 0.9999990217063016, iteration: 37049
loss: 0.9965665340423584,grad_norm: 0.8422939731361052, iteration: 37050
loss: 1.0010157823562622,grad_norm: 0.9999991214846121, iteration: 37051
loss: 1.0363750457763672,grad_norm: 0.9999993841850227, iteration: 37052
loss: 1.0362129211425781,grad_norm: 0.9999997715118092, iteration: 37053
loss: 1.0263162851333618,grad_norm: 0.9999994188459438, iteration: 37054
loss: 1.0022004842758179,grad_norm: 0.9999991998287328, iteration: 37055
loss: 0.996411144733429,grad_norm: 0.9999991492625824, iteration: 37056
loss: 0.993384838104248,grad_norm: 0.8679256561689525, iteration: 37057
loss: 1.0219687223434448,grad_norm: 0.8601867175229212, iteration: 37058
loss: 1.0260738134384155,grad_norm: 0.9999993589005797, iteration: 37059
loss: 1.0251820087432861,grad_norm: 0.9999995971686739, iteration: 37060
loss: 0.9706365466117859,grad_norm: 0.9999991719888477, iteration: 37061
loss: 1.004524827003479,grad_norm: 0.9999992394516259, iteration: 37062
loss: 1.0691298246383667,grad_norm: 0.9999997449416317, iteration: 37063
loss: 1.022691011428833,grad_norm: 0.9999990722841938, iteration: 37064
loss: 0.9983811378479004,grad_norm: 0.9999995180728583, iteration: 37065
loss: 1.0113078355789185,grad_norm: 0.9666801390239704, iteration: 37066
loss: 0.9896324872970581,grad_norm: 0.9930602005306893, iteration: 37067
loss: 1.0383656024932861,grad_norm: 0.9204730976091304, iteration: 37068
loss: 1.0336103439331055,grad_norm: 0.9999990540070796, iteration: 37069
loss: 1.0146751403808594,grad_norm: 0.9600533776151544, iteration: 37070
loss: 1.0037561655044556,grad_norm: 0.9999992705368331, iteration: 37071
loss: 0.9810359477996826,grad_norm: 0.8913825008486059, iteration: 37072
loss: 0.9878506064414978,grad_norm: 0.9999992570731124, iteration: 37073
loss: 0.9817898869514465,grad_norm: 0.9748842035810107, iteration: 37074
loss: 1.016998052597046,grad_norm: 0.9679384916678178, iteration: 37075
loss: 1.0215598344802856,grad_norm: 0.8734198763464128, iteration: 37076
loss: 1.0456587076187134,grad_norm: 0.9600690038960815, iteration: 37077
loss: 1.034111738204956,grad_norm: 0.9999992165761502, iteration: 37078
loss: 1.09272038936615,grad_norm: 0.9999996592800697, iteration: 37079
loss: 0.9913619160652161,grad_norm: 0.9999991745884841, iteration: 37080
loss: 1.0145072937011719,grad_norm: 0.9148186977838829, iteration: 37081
loss: 1.0288070440292358,grad_norm: 0.9999992751130573, iteration: 37082
loss: 0.9791775941848755,grad_norm: 0.8983230170756014, iteration: 37083
loss: 1.0102044343948364,grad_norm: 0.8123069105301371, iteration: 37084
loss: 1.0386462211608887,grad_norm: 0.999999163078773, iteration: 37085
loss: 0.9991008639335632,grad_norm: 0.9999993588807986, iteration: 37086
loss: 0.9945975542068481,grad_norm: 0.9999992347607535, iteration: 37087
loss: 1.1143673658370972,grad_norm: 0.9999998208602145, iteration: 37088
loss: 1.0603163242340088,grad_norm: 0.9999998708260057, iteration: 37089
loss: 1.002023696899414,grad_norm: 0.9999994742971674, iteration: 37090
loss: 1.0440129041671753,grad_norm: 0.9999998670529139, iteration: 37091
loss: 0.9822754263877869,grad_norm: 0.9999991120181085, iteration: 37092
loss: 1.021480917930603,grad_norm: 0.9608504899804465, iteration: 37093
loss: 1.0168321132659912,grad_norm: 0.9958762559081169, iteration: 37094
loss: 1.0223660469055176,grad_norm: 0.9999990877338778, iteration: 37095
loss: 1.0115660429000854,grad_norm: 0.9168675488843842, iteration: 37096
loss: 1.0027530193328857,grad_norm: 0.9388732356242565, iteration: 37097
loss: 1.0054881572723389,grad_norm: 0.986028424449622, iteration: 37098
loss: 1.030146837234497,grad_norm: 0.8354034106681629, iteration: 37099
loss: 1.0185497999191284,grad_norm: 0.9312543243073925, iteration: 37100
loss: 0.9966937899589539,grad_norm: 0.9999991285654035, iteration: 37101
loss: 1.0112574100494385,grad_norm: 0.9999992402516472, iteration: 37102
loss: 1.02879798412323,grad_norm: 0.9999993612581002, iteration: 37103
loss: 1.0239436626434326,grad_norm: 0.8836655732730182, iteration: 37104
loss: 1.0545616149902344,grad_norm: 0.9999993507465686, iteration: 37105
loss: 0.9860754013061523,grad_norm: 0.8938884851372758, iteration: 37106
loss: 1.026536226272583,grad_norm: 0.9999998355074657, iteration: 37107
loss: 1.0094491243362427,grad_norm: 0.9193279493092916, iteration: 37108
loss: 1.0674020051956177,grad_norm: 0.9999992114786918, iteration: 37109
loss: 1.0342700481414795,grad_norm: 0.992160782847147, iteration: 37110
loss: 1.0139943361282349,grad_norm: 0.8355711863885598, iteration: 37111
loss: 1.02116060256958,grad_norm: 0.9999998830549962, iteration: 37112
loss: 0.9983826279640198,grad_norm: 0.8229793205919186, iteration: 37113
loss: 0.9948598146438599,grad_norm: 0.9999992460956243, iteration: 37114
loss: 1.0032182931900024,grad_norm: 0.9393397386933833, iteration: 37115
loss: 0.9804103374481201,grad_norm: 0.9999991212811842, iteration: 37116
loss: 0.9907693862915039,grad_norm: 0.8560328524388042, iteration: 37117
loss: 0.9756564497947693,grad_norm: 0.9999992149876209, iteration: 37118
loss: 1.0125683546066284,grad_norm: 0.9999990274633054, iteration: 37119
loss: 1.0251859426498413,grad_norm: 0.8601786539106219, iteration: 37120
loss: 0.9715850949287415,grad_norm: 0.8558610572806761, iteration: 37121
loss: 0.9967427253723145,grad_norm: 0.9999994006412048, iteration: 37122
loss: 1.047162413597107,grad_norm: 0.9999991961808011, iteration: 37123
loss: 1.073024868965149,grad_norm: 0.9999996128216655, iteration: 37124
loss: 0.9946518540382385,grad_norm: 0.9999990710830982, iteration: 37125
loss: 1.0257201194763184,grad_norm: 0.8591636661810074, iteration: 37126
loss: 0.9945368766784668,grad_norm: 0.9917526392970265, iteration: 37127
loss: 0.9897881150245667,grad_norm: 0.9999996874336903, iteration: 37128
loss: 1.0093241930007935,grad_norm: 0.914357457076637, iteration: 37129
loss: 1.0714094638824463,grad_norm: 0.9999991486965424, iteration: 37130
loss: 0.9958974719047546,grad_norm: 0.9478871476819617, iteration: 37131
loss: 1.0274256467819214,grad_norm: 0.9999992777459853, iteration: 37132
loss: 0.9564923048019409,grad_norm: 0.9999992014416593, iteration: 37133
loss: 1.0227818489074707,grad_norm: 0.9999990596199936, iteration: 37134
loss: 1.022398591041565,grad_norm: 0.9999993752614927, iteration: 37135
loss: 1.0188381671905518,grad_norm: 0.9999991322511385, iteration: 37136
loss: 1.035886287689209,grad_norm: 0.9999995844397833, iteration: 37137
loss: 0.9904435873031616,grad_norm: 0.9999989877422576, iteration: 37138
loss: 1.009344220161438,grad_norm: 0.9999990573208736, iteration: 37139
loss: 1.0557218790054321,grad_norm: 0.9999995271003635, iteration: 37140
loss: 1.0170375108718872,grad_norm: 0.834429090689204, iteration: 37141
loss: 0.991067111492157,grad_norm: 0.9121667728028072, iteration: 37142
loss: 1.0151516199111938,grad_norm: 0.999999174573952, iteration: 37143
loss: 0.991389274597168,grad_norm: 0.9424225390523399, iteration: 37144
loss: 1.0961008071899414,grad_norm: 0.999999758091319, iteration: 37145
loss: 0.9986374974250793,grad_norm: 0.9999995562098746, iteration: 37146
loss: 1.097504734992981,grad_norm: 0.999999863690557, iteration: 37147
loss: 1.0005128383636475,grad_norm: 0.9350609979110486, iteration: 37148
loss: 1.0200657844543457,grad_norm: 0.9111111962716009, iteration: 37149
loss: 1.0135325193405151,grad_norm: 0.9999993189207282, iteration: 37150
loss: 1.0074626207351685,grad_norm: 0.9999991031960614, iteration: 37151
loss: 1.0092825889587402,grad_norm: 0.9999990543382857, iteration: 37152
loss: 1.0407135486602783,grad_norm: 0.8155771510581514, iteration: 37153
loss: 1.0561785697937012,grad_norm: 0.9999994321099419, iteration: 37154
loss: 0.9975880980491638,grad_norm: 0.8614301979579713, iteration: 37155
loss: 1.031761884689331,grad_norm: 0.9999990823345012, iteration: 37156
loss: 0.9909401535987854,grad_norm: 0.9999992075042541, iteration: 37157
loss: 1.029083490371704,grad_norm: 0.9885269385564315, iteration: 37158
loss: 0.9941466450691223,grad_norm: 0.9999989717864751, iteration: 37159
loss: 0.9581314325332642,grad_norm: 0.9999990728670473, iteration: 37160
loss: 1.0052456855773926,grad_norm: 0.7985326846911884, iteration: 37161
loss: 0.9908980131149292,grad_norm: 0.8972260863597777, iteration: 37162
loss: 1.0010303258895874,grad_norm: 0.9247901646855505, iteration: 37163
loss: 1.0123459100723267,grad_norm: 0.8101899252113386, iteration: 37164
loss: 1.0067760944366455,grad_norm: 0.9999991382130556, iteration: 37165
loss: 0.9957603216171265,grad_norm: 0.8782992871431073, iteration: 37166
loss: 0.9847713708877563,grad_norm: 0.9999995009016788, iteration: 37167
loss: 0.9878028035163879,grad_norm: 0.9417440930642904, iteration: 37168
loss: 0.9886909127235413,grad_norm: 0.8905454365034373, iteration: 37169
loss: 1.0317461490631104,grad_norm: 0.9999995516557031, iteration: 37170
loss: 1.0559972524642944,grad_norm: 0.9999990528704621, iteration: 37171
loss: 1.046581506729126,grad_norm: 0.9999994407460339, iteration: 37172
loss: 1.0754038095474243,grad_norm: 0.9999996073712473, iteration: 37173
loss: 0.9778246283531189,grad_norm: 0.9999991989572757, iteration: 37174
loss: 1.02797269821167,grad_norm: 0.8594555954883438, iteration: 37175
loss: 1.0127314329147339,grad_norm: 0.9999993345677696, iteration: 37176
loss: 1.0335959196090698,grad_norm: 0.999999600762489, iteration: 37177
loss: 0.9684212803840637,grad_norm: 0.785833014147749, iteration: 37178
loss: 1.0656486749649048,grad_norm: 0.9999999287335865, iteration: 37179
loss: 0.9936904311180115,grad_norm: 0.999999411754501, iteration: 37180
loss: 0.9969280958175659,grad_norm: 0.9999993834774024, iteration: 37181
loss: 0.9669519066810608,grad_norm: 0.9999991056774159, iteration: 37182
loss: 0.9920811653137207,grad_norm: 0.9491276306436894, iteration: 37183
loss: 0.9828824996948242,grad_norm: 0.8969944688996805, iteration: 37184
loss: 1.0028570890426636,grad_norm: 0.9949841487293585, iteration: 37185
loss: 1.0870234966278076,grad_norm: 0.9999993287587454, iteration: 37186
loss: 0.9827086329460144,grad_norm: 0.9999991944903772, iteration: 37187
loss: 1.0174062252044678,grad_norm: 0.9583795672065988, iteration: 37188
loss: 1.0379561185836792,grad_norm: 0.9999998841451437, iteration: 37189
loss: 1.0345515012741089,grad_norm: 0.9999995795971538, iteration: 37190
loss: 0.9885509014129639,grad_norm: 0.9999991599603518, iteration: 37191
loss: 0.9727696776390076,grad_norm: 0.9111501819620347, iteration: 37192
loss: 1.0342949628829956,grad_norm: 0.9999995441596509, iteration: 37193
loss: 1.0157082080841064,grad_norm: 0.9999994407764349, iteration: 37194
loss: 1.0299879312515259,grad_norm: 0.9999992279562244, iteration: 37195
loss: 0.9923359751701355,grad_norm: 0.9999990633174012, iteration: 37196
loss: 1.0229476690292358,grad_norm: 0.9999990738473192, iteration: 37197
loss: 0.9973530769348145,grad_norm: 0.9999990743799398, iteration: 37198
loss: 1.0550100803375244,grad_norm: 0.999999127999357, iteration: 37199
loss: 1.0094355344772339,grad_norm: 0.999999171387222, iteration: 37200
loss: 1.0039716958999634,grad_norm: 0.8503142436594386, iteration: 37201
loss: 0.9986427426338196,grad_norm: 0.9999996851754784, iteration: 37202
loss: 0.986009955406189,grad_norm: 0.93358586191328, iteration: 37203
loss: 1.0118519067764282,grad_norm: 0.9983995049513397, iteration: 37204
loss: 0.9916218519210815,grad_norm: 0.9116096449818498, iteration: 37205
loss: 1.006950855255127,grad_norm: 0.8931958311281943, iteration: 37206
loss: 1.015365481376648,grad_norm: 0.9999992777331131, iteration: 37207
loss: 1.0098572969436646,grad_norm: 0.9139824681292078, iteration: 37208
loss: 1.0183229446411133,grad_norm: 0.92798403378905, iteration: 37209
loss: 1.062659740447998,grad_norm: 0.9999992374103002, iteration: 37210
loss: 1.0298086404800415,grad_norm: 0.9999991005690474, iteration: 37211
loss: 1.022504448890686,grad_norm: 0.8045837874716936, iteration: 37212
loss: 1.0153621435165405,grad_norm: 0.999999232561127, iteration: 37213
loss: 1.0092740058898926,grad_norm: 0.875777890412661, iteration: 37214
loss: 1.0279778242111206,grad_norm: 0.9999993616645464, iteration: 37215
loss: 0.9904912114143372,grad_norm: 0.9518454115848449, iteration: 37216
loss: 1.0210481882095337,grad_norm: 0.9741479736673901, iteration: 37217
loss: 1.0267400741577148,grad_norm: 0.9999992575894326, iteration: 37218
loss: 1.0131644010543823,grad_norm: 0.9999998254034397, iteration: 37219
loss: 0.9901784658432007,grad_norm: 0.9999990215251444, iteration: 37220
loss: 1.033035159111023,grad_norm: 0.9999994359522055, iteration: 37221
loss: 1.0133371353149414,grad_norm: 0.9631581476165725, iteration: 37222
loss: 1.0273903608322144,grad_norm: 0.942312396143268, iteration: 37223
loss: 1.005682349205017,grad_norm: 0.9005083107697452, iteration: 37224
loss: 1.0001835823059082,grad_norm: 0.9999991104065897, iteration: 37225
loss: 1.0050971508026123,grad_norm: 0.9999991295405395, iteration: 37226
loss: 0.9791946411132812,grad_norm: 0.999999350752365, iteration: 37227
loss: 1.0137966871261597,grad_norm: 0.9999992527820005, iteration: 37228
loss: 0.9467585682868958,grad_norm: 0.9724889001988285, iteration: 37229
loss: 1.0619099140167236,grad_norm: 0.9999993568379764, iteration: 37230
loss: 1.0666619539260864,grad_norm: 0.9999993188322497, iteration: 37231
loss: 1.004460096359253,grad_norm: 0.7849094763099079, iteration: 37232
loss: 0.9806841611862183,grad_norm: 0.973224786866032, iteration: 37233
loss: 0.9570323824882507,grad_norm: 0.9999991783882388, iteration: 37234
loss: 1.050235390663147,grad_norm: 0.9452716329215534, iteration: 37235
loss: 1.041245698928833,grad_norm: 0.8931040952938596, iteration: 37236
loss: 1.019659399986267,grad_norm: 0.9937351098703545, iteration: 37237
loss: 0.9873385429382324,grad_norm: 0.9999991355249065, iteration: 37238
loss: 1.0037479400634766,grad_norm: 0.9999991599313824, iteration: 37239
loss: 1.0288639068603516,grad_norm: 0.9999991288091039, iteration: 37240
loss: 0.9721750617027283,grad_norm: 0.9999991875326876, iteration: 37241
loss: 1.0370150804519653,grad_norm: 0.9999995896575252, iteration: 37242
loss: 1.0193120241165161,grad_norm: 0.9999993170827115, iteration: 37243
loss: 1.0294781923294067,grad_norm: 0.999999090811399, iteration: 37244
loss: 1.020670771598816,grad_norm: 0.9999990886502473, iteration: 37245
loss: 1.0030546188354492,grad_norm: 0.999999091818999, iteration: 37246
loss: 1.061800241470337,grad_norm: 0.9999994809130618, iteration: 37247
loss: 1.0458570718765259,grad_norm: 0.99999992544983, iteration: 37248
loss: 1.0136100053787231,grad_norm: 0.9999992219662877, iteration: 37249
loss: 0.9822443723678589,grad_norm: 0.9994692451604851, iteration: 37250
loss: 1.02179753780365,grad_norm: 0.8363435327611792, iteration: 37251
loss: 1.0065361261367798,grad_norm: 0.9485480718674454, iteration: 37252
loss: 1.0024555921554565,grad_norm: 0.989596337803876, iteration: 37253
loss: 0.9969059228897095,grad_norm: 0.9432723872275159, iteration: 37254
loss: 1.0091543197631836,grad_norm: 0.9310139884234456, iteration: 37255
loss: 0.9992987513542175,grad_norm: 0.9999994584374047, iteration: 37256
loss: 1.020337462425232,grad_norm: 0.9999991692228496, iteration: 37257
loss: 1.042116641998291,grad_norm: 0.8757355446312527, iteration: 37258
loss: 0.9917178750038147,grad_norm: 0.8756805355670118, iteration: 37259
loss: 1.0413618087768555,grad_norm: 0.923161042150945, iteration: 37260
loss: 1.0370301008224487,grad_norm: 0.918774655050836, iteration: 37261
loss: 1.0050476789474487,grad_norm: 0.9930505804238293, iteration: 37262
loss: 1.007688283920288,grad_norm: 0.9692522133509441, iteration: 37263
loss: 0.9875254034996033,grad_norm: 0.9999990968151911, iteration: 37264
loss: 1.0210697650909424,grad_norm: 0.9999992471858246, iteration: 37265
loss: 1.0212570428848267,grad_norm: 0.9999991284487888, iteration: 37266
loss: 1.0207120180130005,grad_norm: 0.9999993171610814, iteration: 37267
loss: 0.9634708762168884,grad_norm: 0.9284734471798934, iteration: 37268
loss: 1.0334150791168213,grad_norm: 0.9999994885248482, iteration: 37269
loss: 1.0401426553726196,grad_norm: 0.9999993962868649, iteration: 37270
loss: 1.0147346258163452,grad_norm: 0.9999990663190368, iteration: 37271
loss: 1.0024757385253906,grad_norm: 0.9999992177969502, iteration: 37272
loss: 0.9969483613967896,grad_norm: 0.9999990370995973, iteration: 37273
loss: 1.0269891023635864,grad_norm: 0.9361451903347942, iteration: 37274
loss: 0.9934021234512329,grad_norm: 0.8860907417975257, iteration: 37275
loss: 0.9923327565193176,grad_norm: 0.99999922136866, iteration: 37276
loss: 1.0570323467254639,grad_norm: 0.9999996903102837, iteration: 37277
loss: 1.0295597314834595,grad_norm: 0.9999991064987322, iteration: 37278
loss: 1.0272130966186523,grad_norm: 0.999999279753911, iteration: 37279
loss: 1.0202770233154297,grad_norm: 0.9999991517256915, iteration: 37280
loss: 1.0149644613265991,grad_norm: 0.7673939857574322, iteration: 37281
loss: 1.042960286140442,grad_norm: 0.9999992964377105, iteration: 37282
loss: 1.0394184589385986,grad_norm: 0.8541724159465188, iteration: 37283
loss: 1.0358757972717285,grad_norm: 0.8528406513228763, iteration: 37284
loss: 0.9941126108169556,grad_norm: 0.8903033729560599, iteration: 37285
loss: 0.9975793361663818,grad_norm: 0.9999997208151509, iteration: 37286
loss: 1.0029480457305908,grad_norm: 0.9999993297334716, iteration: 37287
loss: 0.96586012840271,grad_norm: 0.9999993310547131, iteration: 37288
loss: 0.9961200952529907,grad_norm: 0.999999198630209, iteration: 37289
loss: 0.9990758299827576,grad_norm: 0.9999993361056205, iteration: 37290
loss: 0.9573484063148499,grad_norm: 0.7853420030581731, iteration: 37291
loss: 0.978426992893219,grad_norm: 0.9999992847528113, iteration: 37292
loss: 1.0195902585983276,grad_norm: 0.9872816483280507, iteration: 37293
loss: 0.9770803451538086,grad_norm: 0.848712768131808, iteration: 37294
loss: 0.973167896270752,grad_norm: 0.9811382824002859, iteration: 37295
loss: 1.036442518234253,grad_norm: 0.9999993521182463, iteration: 37296
loss: 0.9879519939422607,grad_norm: 0.9601454767265255, iteration: 37297
loss: 1.0656894445419312,grad_norm: 0.9999994672875436, iteration: 37298
loss: 0.9693413376808167,grad_norm: 0.8385011725931989, iteration: 37299
loss: 1.0036656856536865,grad_norm: 0.8405271158041122, iteration: 37300
loss: 1.029140830039978,grad_norm: 0.9486748580598205, iteration: 37301
loss: 0.9912822842597961,grad_norm: 0.8619617019218146, iteration: 37302
loss: 1.0226590633392334,grad_norm: 0.9999990123530176, iteration: 37303
loss: 0.9891402125358582,grad_norm: 0.8404631354058215, iteration: 37304
loss: 1.025254249572754,grad_norm: 0.9999990614926078, iteration: 37305
loss: 1.032201886177063,grad_norm: 0.9999997406125609, iteration: 37306
loss: 0.9451342821121216,grad_norm: 0.8509895759808733, iteration: 37307
loss: 0.9897645115852356,grad_norm: 0.9253815594562792, iteration: 37308
loss: 1.0079265832901,grad_norm: 0.9595787042363635, iteration: 37309
loss: 0.9830880165100098,grad_norm: 0.9471157859740804, iteration: 37310
loss: 0.9898708462715149,grad_norm: 0.9999991696551445, iteration: 37311
loss: 1.0153251886367798,grad_norm: 1.0000000162991645, iteration: 37312
loss: 0.9777940511703491,grad_norm: 0.9999993175602065, iteration: 37313
loss: 1.0396602153778076,grad_norm: 0.9999992957447702, iteration: 37314
loss: 0.9624168872833252,grad_norm: 0.9411661973009751, iteration: 37315
loss: 1.0044896602630615,grad_norm: 0.9414600920322603, iteration: 37316
loss: 1.01365327835083,grad_norm: 0.8860428968534294, iteration: 37317
loss: 0.9998263120651245,grad_norm: 0.9497085173759943, iteration: 37318
loss: 1.0095094442367554,grad_norm: 0.999999100913502, iteration: 37319
loss: 0.9882524013519287,grad_norm: 0.99999933088725, iteration: 37320
loss: 1.1108930110931396,grad_norm: 0.9999996913893717, iteration: 37321
loss: 0.9736648201942444,grad_norm: 0.9999997567631204, iteration: 37322
loss: 0.9612415432929993,grad_norm: 0.9999993652106621, iteration: 37323
loss: 1.033162236213684,grad_norm: 0.9999996005238316, iteration: 37324
loss: 0.9941380023956299,grad_norm: 0.914963054678496, iteration: 37325
loss: 1.0322825908660889,grad_norm: 0.9737115150733806, iteration: 37326
loss: 1.013934850692749,grad_norm: 0.9999993332563218, iteration: 37327
loss: 1.0372086763381958,grad_norm: 0.8711330587939613, iteration: 37328
loss: 1.0298789739608765,grad_norm: 0.9999993353752128, iteration: 37329
loss: 1.0226715803146362,grad_norm: 0.999999775853822, iteration: 37330
loss: 1.01399564743042,grad_norm: 0.9753317213437342, iteration: 37331
loss: 0.9674626588821411,grad_norm: 0.7539480507423362, iteration: 37332
loss: 1.032528042793274,grad_norm: 0.999999682614848, iteration: 37333
loss: 1.0093257427215576,grad_norm: 0.8662869348036455, iteration: 37334
loss: 0.9860246181488037,grad_norm: 0.9999992031153504, iteration: 37335
loss: 1.0190221071243286,grad_norm: 0.9999990895321884, iteration: 37336
loss: 0.9802318811416626,grad_norm: 0.99999899931677, iteration: 37337
loss: 1.0219862461090088,grad_norm: 0.9999995956270034, iteration: 37338
loss: 1.0176615715026855,grad_norm: 0.9999992477778016, iteration: 37339
loss: 1.0308037996292114,grad_norm: 0.999999413216451, iteration: 37340
loss: 1.0113850831985474,grad_norm: 0.8112004217903959, iteration: 37341
loss: 1.0108181238174438,grad_norm: 0.8882455413693934, iteration: 37342
loss: 1.0016424655914307,grad_norm: 0.9999990504618105, iteration: 37343
loss: 1.0204896926879883,grad_norm: 0.9161467341760511, iteration: 37344
loss: 1.0359110832214355,grad_norm: 0.9869130567637219, iteration: 37345
loss: 1.0530000925064087,grad_norm: 0.9999991338206435, iteration: 37346
loss: 1.0135667324066162,grad_norm: 0.7698497798254177, iteration: 37347
loss: 0.9938198328018188,grad_norm: 0.9999990129572008, iteration: 37348
loss: 1.0149481296539307,grad_norm: 0.9588786511794797, iteration: 37349
loss: 1.0088725090026855,grad_norm: 0.8784110892001052, iteration: 37350
loss: 1.0163381099700928,grad_norm: 0.919358016112787, iteration: 37351
loss: 1.0019538402557373,grad_norm: 0.9999990985582532, iteration: 37352
loss: 1.0481849908828735,grad_norm: 0.9999992281171115, iteration: 37353
loss: 0.9895709753036499,grad_norm: 0.9086186956892662, iteration: 37354
loss: 1.009264588356018,grad_norm: 0.7984496287278169, iteration: 37355
loss: 1.0288580656051636,grad_norm: 0.8511292526690001, iteration: 37356
loss: 1.0165691375732422,grad_norm: 0.9813847969416605, iteration: 37357
loss: 1.0086876153945923,grad_norm: 0.9999990697157611, iteration: 37358
loss: 1.0211470127105713,grad_norm: 0.8903674112880513, iteration: 37359
loss: 0.9958192706108093,grad_norm: 0.9999993366937123, iteration: 37360
loss: 1.0202184915542603,grad_norm: 0.9999992257532081, iteration: 37361
loss: 1.0199649333953857,grad_norm: 0.9127964282612059, iteration: 37362
loss: 1.0676227807998657,grad_norm: 0.99999976439583, iteration: 37363
loss: 0.9967334866523743,grad_norm: 0.7887116996761615, iteration: 37364
loss: 1.0237420797348022,grad_norm: 0.9459063393646486, iteration: 37365
loss: 1.0187666416168213,grad_norm: 0.7805198142034256, iteration: 37366
loss: 1.0209304094314575,grad_norm: 0.9999991247622628, iteration: 37367
loss: 1.0004819631576538,grad_norm: 0.9075290178758508, iteration: 37368
loss: 0.9991117715835571,grad_norm: 0.9999991105585886, iteration: 37369
loss: 0.9981113076210022,grad_norm: 0.9401885370295323, iteration: 37370
loss: 1.0290138721466064,grad_norm: 0.9999990915795927, iteration: 37371
loss: 1.051317811012268,grad_norm: 0.8910464606448047, iteration: 37372
loss: 1.0118532180786133,grad_norm: 0.8353204563476145, iteration: 37373
loss: 0.9896065592765808,grad_norm: 0.8711572853662458, iteration: 37374
loss: 1.0159952640533447,grad_norm: 0.8654144014894748, iteration: 37375
loss: 0.994145929813385,grad_norm: 0.9999991134146726, iteration: 37376
loss: 0.9855944514274597,grad_norm: 0.9804273306069071, iteration: 37377
loss: 1.0550233125686646,grad_norm: 0.9879025513031787, iteration: 37378
loss: 0.9862360954284668,grad_norm: 0.9912689256506224, iteration: 37379
loss: 1.0118955373764038,grad_norm: 0.9999991037578742, iteration: 37380
loss: 0.9750412106513977,grad_norm: 0.9147307818321593, iteration: 37381
loss: 1.0033092498779297,grad_norm: 0.9999995937307706, iteration: 37382
loss: 1.032415509223938,grad_norm: 0.969900717066664, iteration: 37383
loss: 1.027674913406372,grad_norm: 0.9259120213189658, iteration: 37384
loss: 0.9916756749153137,grad_norm: 0.9999997151804785, iteration: 37385
loss: 0.9963976144790649,grad_norm: 0.9758158180191865, iteration: 37386
loss: 1.013694405555725,grad_norm: 0.844589774830758, iteration: 37387
loss: 0.9780770540237427,grad_norm: 0.9177749173526639, iteration: 37388
loss: 1.011564016342163,grad_norm: 0.9999999134389317, iteration: 37389
loss: 0.9825038313865662,grad_norm: 0.9999991423850931, iteration: 37390
loss: 1.021134614944458,grad_norm: 0.9999992394268366, iteration: 37391
loss: 1.0275769233703613,grad_norm: 0.9264702828609331, iteration: 37392
loss: 1.0424302816390991,grad_norm: 0.9999994298835613, iteration: 37393
loss: 1.0246628522872925,grad_norm: 0.999999094850297, iteration: 37394
loss: 1.0927261114120483,grad_norm: 0.9999992267122793, iteration: 37395
loss: 1.001092553138733,grad_norm: 0.9999991287035023, iteration: 37396
loss: 1.000844955444336,grad_norm: 0.9740708651545863, iteration: 37397
loss: 1.012324333190918,grad_norm: 0.9999991514512531, iteration: 37398
loss: 1.0004273653030396,grad_norm: 0.8557562381442883, iteration: 37399
loss: 1.012062907218933,grad_norm: 0.9366946743524942, iteration: 37400
loss: 1.0361628532409668,grad_norm: 0.9999991130198894, iteration: 37401
loss: 1.0626188516616821,grad_norm: 0.9999992833138984, iteration: 37402
loss: 1.0007981061935425,grad_norm: 0.8435308968490497, iteration: 37403
loss: 1.0174288749694824,grad_norm: 0.9999991790142121, iteration: 37404
loss: 1.0406872034072876,grad_norm: 0.9596346062630683, iteration: 37405
loss: 0.979853093624115,grad_norm: 0.8567377098057611, iteration: 37406
loss: 1.0126162767410278,grad_norm: 0.8090999254295284, iteration: 37407
loss: 0.9984831809997559,grad_norm: 0.8448847818703037, iteration: 37408
loss: 0.9963609576225281,grad_norm: 0.9999991224514063, iteration: 37409
loss: 1.034059762954712,grad_norm: 0.9999992415672496, iteration: 37410
loss: 0.9884672164916992,grad_norm: 0.9439642001069627, iteration: 37411
loss: 0.9429423809051514,grad_norm: 0.9999991129747884, iteration: 37412
loss: 1.058232069015503,grad_norm: 0.9999991055152199, iteration: 37413
loss: 1.1135644912719727,grad_norm: 0.9999997398646246, iteration: 37414
loss: 1.021788477897644,grad_norm: 0.9333202269391123, iteration: 37415
loss: 1.082571029663086,grad_norm: 0.9999996315147378, iteration: 37416
loss: 0.9845930337905884,grad_norm: 0.8355480319382066, iteration: 37417
loss: 1.017058253288269,grad_norm: 0.8846721975696443, iteration: 37418
loss: 1.0560702085494995,grad_norm: 0.9999997441932122, iteration: 37419
loss: 1.0287103652954102,grad_norm: 0.9999991299952518, iteration: 37420
loss: 1.0006232261657715,grad_norm: 0.9613636143809431, iteration: 37421
loss: 0.9781172871589661,grad_norm: 0.9999991306708766, iteration: 37422
loss: 1.0109635591506958,grad_norm: 0.9362192707265613, iteration: 37423
loss: 1.003193736076355,grad_norm: 0.9681148296979893, iteration: 37424
loss: 1.0158199071884155,grad_norm: 0.9999990112678594, iteration: 37425
loss: 0.9899961948394775,grad_norm: 0.999999173388159, iteration: 37426
loss: 0.9849721193313599,grad_norm: 0.8275831437068423, iteration: 37427
loss: 1.009844183921814,grad_norm: 0.9569515617937843, iteration: 37428
loss: 1.0226787328720093,grad_norm: 0.9207508697735183, iteration: 37429
loss: 0.993747353553772,grad_norm: 0.8444696517093285, iteration: 37430
loss: 1.0159151554107666,grad_norm: 0.9419529363825238, iteration: 37431
loss: 1.0257970094680786,grad_norm: 0.9999990332548903, iteration: 37432
loss: 0.9857040643692017,grad_norm: 0.9999990894683896, iteration: 37433
loss: 1.0121314525604248,grad_norm: 0.9999990688975336, iteration: 37434
loss: 0.9996304512023926,grad_norm: 0.896823546413431, iteration: 37435
loss: 1.0198776721954346,grad_norm: 0.999999138324204, iteration: 37436
loss: 1.0040309429168701,grad_norm: 0.999999161632071, iteration: 37437
loss: 1.041111946105957,grad_norm: 0.8374592833499418, iteration: 37438
loss: 1.0103837251663208,grad_norm: 0.9482781144280181, iteration: 37439
loss: 1.0119725465774536,grad_norm: 0.9999990885911954, iteration: 37440
loss: 0.9756033420562744,grad_norm: 0.9812461661319853, iteration: 37441
loss: 1.0130820274353027,grad_norm: 0.9999991037732734, iteration: 37442
loss: 1.0151371955871582,grad_norm: 0.9655546621155766, iteration: 37443
loss: 0.9803792238235474,grad_norm: 0.9999990411500284, iteration: 37444
loss: 1.0132073163986206,grad_norm: 0.8087893777211116, iteration: 37445
loss: 1.0055584907531738,grad_norm: 0.7900208133416281, iteration: 37446
loss: 1.0329668521881104,grad_norm: 0.9164785572437836, iteration: 37447
loss: 0.9946010708808899,grad_norm: 0.9999992593419673, iteration: 37448
loss: 0.9992276430130005,grad_norm: 0.9046366700547633, iteration: 37449
loss: 0.9922467470169067,grad_norm: 0.9144777343724358, iteration: 37450
loss: 1.022472620010376,grad_norm: 0.8833935875406898, iteration: 37451
loss: 1.0752359628677368,grad_norm: 0.9999991291796174, iteration: 37452
loss: 0.9974139928817749,grad_norm: 0.9551694314069749, iteration: 37453
loss: 1.0087664127349854,grad_norm: 0.9999992148719858, iteration: 37454
loss: 0.9942457675933838,grad_norm: 0.9999991708848508, iteration: 37455
loss: 1.0443177223205566,grad_norm: 0.999999829350989, iteration: 37456
loss: 0.9901245832443237,grad_norm: 0.8909990932733994, iteration: 37457
loss: 1.0059762001037598,grad_norm: 0.9350971821802295, iteration: 37458
loss: 0.9746937155723572,grad_norm: 0.9999991842306033, iteration: 37459
loss: 1.01943838596344,grad_norm: 0.8946195735684652, iteration: 37460
loss: 0.9820415377616882,grad_norm: 0.9349244076122444, iteration: 37461
loss: 0.9796005487442017,grad_norm: 0.9999990647483754, iteration: 37462
loss: 0.9733641147613525,grad_norm: 0.9049141670566239, iteration: 37463
loss: 0.997295618057251,grad_norm: 0.8999902518999262, iteration: 37464
loss: 0.9785275459289551,grad_norm: 0.9049841738800983, iteration: 37465
loss: 1.0115538835525513,grad_norm: 0.9138837395700773, iteration: 37466
loss: 1.0284560918807983,grad_norm: 0.8752661337944537, iteration: 37467
loss: 0.9888787269592285,grad_norm: 0.7995142283682157, iteration: 37468
loss: 1.0160292387008667,grad_norm: 0.9342661832559866, iteration: 37469
loss: 1.014035940170288,grad_norm: 0.9100054121599154, iteration: 37470
loss: 0.987519383430481,grad_norm: 0.9999991070094998, iteration: 37471
loss: 1.0001922845840454,grad_norm: 0.9999991186751459, iteration: 37472
loss: 0.9806106686592102,grad_norm: 0.9999992821218652, iteration: 37473
loss: 1.0169848203659058,grad_norm: 0.9219217656752093, iteration: 37474
loss: 1.0308934450149536,grad_norm: 0.9771408940906975, iteration: 37475
loss: 0.974614143371582,grad_norm: 0.973664880702522, iteration: 37476
loss: 1.006346583366394,grad_norm: 0.9849081652019323, iteration: 37477
loss: 1.0001933574676514,grad_norm: 0.7582372004376012, iteration: 37478
loss: 0.9989643692970276,grad_norm: 0.9603564952093465, iteration: 37479
loss: 1.0216870307922363,grad_norm: 0.9725398186192495, iteration: 37480
loss: 0.9927740693092346,grad_norm: 0.8973670486006079, iteration: 37481
loss: 0.9935613870620728,grad_norm: 0.9999991470400452, iteration: 37482
loss: 0.9946897625923157,grad_norm: 0.9999991676834294, iteration: 37483
loss: 1.0151255130767822,grad_norm: 0.9999996072372789, iteration: 37484
loss: 1.0406861305236816,grad_norm: 0.9999998074045283, iteration: 37485
loss: 1.002079725265503,grad_norm: 0.8252145380433482, iteration: 37486
loss: 1.0178756713867188,grad_norm: 0.8098575937376706, iteration: 37487
loss: 1.0380940437316895,grad_norm: 0.9999990712439728, iteration: 37488
loss: 1.0117489099502563,grad_norm: 0.9787254054283877, iteration: 37489
loss: 1.0174179077148438,grad_norm: 0.9999991127552773, iteration: 37490
loss: 0.9788376092910767,grad_norm: 0.9999990475407103, iteration: 37491
loss: 0.9786540269851685,grad_norm: 0.9479051652623917, iteration: 37492
loss: 0.9836221933364868,grad_norm: 0.901635435398136, iteration: 37493
loss: 1.0182642936706543,grad_norm: 0.9999991598683755, iteration: 37494
loss: 1.03396475315094,grad_norm: 0.9999994736448978, iteration: 37495
loss: 0.962433397769928,grad_norm: 0.9997128227396956, iteration: 37496
loss: 1.014295220375061,grad_norm: 0.9999992589908838, iteration: 37497
loss: 0.9994387030601501,grad_norm: 0.712311666275747, iteration: 37498
loss: 1.0007480382919312,grad_norm: 0.9999992254838626, iteration: 37499
loss: 0.9818584322929382,grad_norm: 0.9970061556400681, iteration: 37500
loss: 1.0013278722763062,grad_norm: 0.9999990987787133, iteration: 37501
loss: 0.9797309041023254,grad_norm: 0.9528613231451291, iteration: 37502
loss: 1.0261683464050293,grad_norm: 0.999999385011915, iteration: 37503
loss: 0.9996755719184875,grad_norm: 0.971296601798321, iteration: 37504
loss: 1.020766258239746,grad_norm: 0.8466983817408976, iteration: 37505
loss: 1.007153034210205,grad_norm: 0.9999995214522612, iteration: 37506
loss: 1.0271949768066406,grad_norm: 0.8568844824758481, iteration: 37507
loss: 1.0161371231079102,grad_norm: 0.9020264767573756, iteration: 37508
loss: 1.0185154676437378,grad_norm: 0.9024751911876449, iteration: 37509
loss: 0.9998583197593689,grad_norm: 0.9509010400681177, iteration: 37510
loss: 1.0589114427566528,grad_norm: 0.8999227381083792, iteration: 37511
loss: 0.9784736633300781,grad_norm: 0.8026920193953634, iteration: 37512
loss: 0.9978967905044556,grad_norm: 0.8733489821008721, iteration: 37513
loss: 1.0159465074539185,grad_norm: 0.8151666729874102, iteration: 37514
loss: 1.0213404893875122,grad_norm: 0.9999998812046649, iteration: 37515
loss: 1.0503978729248047,grad_norm: 0.9509805046798833, iteration: 37516
loss: 1.0252506732940674,grad_norm: 0.9999993009730791, iteration: 37517
loss: 1.0053668022155762,grad_norm: 0.8199098696887424, iteration: 37518
loss: 0.9785957932472229,grad_norm: 0.9733723315123058, iteration: 37519
loss: 0.9948510527610779,grad_norm: 0.9064463972357223, iteration: 37520
loss: 1.0042630434036255,grad_norm: 0.939823373522826, iteration: 37521
loss: 1.0274630784988403,grad_norm: 0.9999994369486284, iteration: 37522
loss: 1.0246933698654175,grad_norm: 0.9788462673074145, iteration: 37523
loss: 0.9951646327972412,grad_norm: 0.9090603797179927, iteration: 37524
loss: 0.993740439414978,grad_norm: 0.7406384469509167, iteration: 37525
loss: 1.037481427192688,grad_norm: 0.9999991365429336, iteration: 37526
loss: 1.0025572776794434,grad_norm: 0.8785924553111404, iteration: 37527
loss: 1.1382973194122314,grad_norm: 0.9999999461445878, iteration: 37528
loss: 0.9958778619766235,grad_norm: 0.9999992354464715, iteration: 37529
loss: 1.0254324674606323,grad_norm: 0.9999991358931626, iteration: 37530
loss: 0.991301417350769,grad_norm: 0.9098214070188426, iteration: 37531
loss: 1.0115824937820435,grad_norm: 0.9338238496609128, iteration: 37532
loss: 1.0381674766540527,grad_norm: 0.9999991034671938, iteration: 37533
loss: 1.026118516921997,grad_norm: 0.9962999114789908, iteration: 37534
loss: 1.0171607732772827,grad_norm: 0.8914354964468479, iteration: 37535
loss: 0.9904255867004395,grad_norm: 0.9999990835078053, iteration: 37536
loss: 0.9848037958145142,grad_norm: 0.9999989652563721, iteration: 37537
loss: 0.999916672706604,grad_norm: 0.9999990615608071, iteration: 37538
loss: 1.001866102218628,grad_norm: 0.7934879949251629, iteration: 37539
loss: 0.9787673950195312,grad_norm: 0.7986335318622116, iteration: 37540
loss: 0.9807402491569519,grad_norm: 0.9999992774332572, iteration: 37541
loss: 1.0105140209197998,grad_norm: 0.9268136862674912, iteration: 37542
loss: 1.0435711145401,grad_norm: 0.9999992038281803, iteration: 37543
loss: 1.032206654548645,grad_norm: 0.9999992659270355, iteration: 37544
loss: 1.0244637727737427,grad_norm: 0.9999993045931844, iteration: 37545
loss: 1.0303820371627808,grad_norm: 0.9536247026802012, iteration: 37546
loss: 1.051092505455017,grad_norm: 0.9420563259486716, iteration: 37547
loss: 1.0471330881118774,grad_norm: 0.9999990915281151, iteration: 37548
loss: 1.0423355102539062,grad_norm: 0.9999993766225419, iteration: 37549
loss: 0.9728489518165588,grad_norm: 0.9999993717033243, iteration: 37550
loss: 1.0248712301254272,grad_norm: 0.9225045221978995, iteration: 37551
loss: 0.985645055770874,grad_norm: 0.9738902136660772, iteration: 37552
loss: 0.9921890497207642,grad_norm: 0.9999991519200045, iteration: 37553
loss: 1.0496906042099,grad_norm: 0.9999998228509909, iteration: 37554
loss: 1.0275996923446655,grad_norm: 0.8879966698972148, iteration: 37555
loss: 0.9879697561264038,grad_norm: 0.8483436033147821, iteration: 37556
loss: 1.0538511276245117,grad_norm: 0.9999990544032616, iteration: 37557
loss: 1.0195214748382568,grad_norm: 0.9999995938213733, iteration: 37558
loss: 1.0440515279769897,grad_norm: 0.9999991538109314, iteration: 37559
loss: 1.0332555770874023,grad_norm: 0.9999991243854689, iteration: 37560
loss: 1.0224149227142334,grad_norm: 0.882652911243264, iteration: 37561
loss: 1.007265567779541,grad_norm: 0.9760011517667749, iteration: 37562
loss: 1.0276696681976318,grad_norm: 0.9213731829032525, iteration: 37563
loss: 1.0039358139038086,grad_norm: 0.9999990082111629, iteration: 37564
loss: 0.9942548871040344,grad_norm: 0.999999249190455, iteration: 37565
loss: 0.9866407513618469,grad_norm: 0.9977784443743236, iteration: 37566
loss: 1.0224441289901733,grad_norm: 0.871230797975751, iteration: 37567
loss: 1.02578866481781,grad_norm: 0.9999996101303871, iteration: 37568
loss: 1.0217558145523071,grad_norm: 0.8436806587163994, iteration: 37569
loss: 1.0135672092437744,grad_norm: 0.9178091267674618, iteration: 37570
loss: 0.9831375479698181,grad_norm: 0.9515335768286501, iteration: 37571
loss: 0.9950301051139832,grad_norm: 0.7402403066227021, iteration: 37572
loss: 0.9792089462280273,grad_norm: 0.9833066566068556, iteration: 37573
loss: 1.015690565109253,grad_norm: 0.9090811212378362, iteration: 37574
loss: 0.9987725615501404,grad_norm: 0.9626688463962098, iteration: 37575
loss: 1.1353071928024292,grad_norm: 0.999999141415965, iteration: 37576
loss: 0.9915420413017273,grad_norm: 0.9924446758624038, iteration: 37577
loss: 1.0449119806289673,grad_norm: 0.9999998163743493, iteration: 37578
loss: 1.0422401428222656,grad_norm: 0.9999991179303173, iteration: 37579
loss: 1.0374767780303955,grad_norm: 0.9999996175904778, iteration: 37580
loss: 0.9721370935440063,grad_norm: 0.9945555801694739, iteration: 37581
loss: 1.0407168865203857,grad_norm: 0.9999991165569353, iteration: 37582
loss: 0.9844870567321777,grad_norm: 0.9334331783070735, iteration: 37583
loss: 1.020910382270813,grad_norm: 0.8852687891563082, iteration: 37584
loss: 1.0190644264221191,grad_norm: 0.9999999073582245, iteration: 37585
loss: 0.9740476012229919,grad_norm: 0.9604838326319377, iteration: 37586
loss: 0.9898660778999329,grad_norm: 0.8831813616613958, iteration: 37587
loss: 1.0105429887771606,grad_norm: 0.8595761840135355, iteration: 37588
loss: 1.0239535570144653,grad_norm: 0.9999991164185587, iteration: 37589
loss: 1.009622573852539,grad_norm: 0.999999002903861, iteration: 37590
loss: 1.007885217666626,grad_norm: 0.9999990405060956, iteration: 37591
loss: 1.0197913646697998,grad_norm: 0.9999997705362272, iteration: 37592
loss: 0.9621304869651794,grad_norm: 0.9999993023595753, iteration: 37593
loss: 1.0989338159561157,grad_norm: 0.9999998551726844, iteration: 37594
loss: 1.0059956312179565,grad_norm: 0.8824696327597334, iteration: 37595
loss: 1.0429177284240723,grad_norm: 0.9999995046535815, iteration: 37596
loss: 1.0395069122314453,grad_norm: 0.999999064965591, iteration: 37597
loss: 1.0077977180480957,grad_norm: 0.9999989597843795, iteration: 37598
loss: 1.0078155994415283,grad_norm: 0.8361488031721881, iteration: 37599
loss: 1.0231680870056152,grad_norm: 0.9999992567627042, iteration: 37600
loss: 0.9612928032875061,grad_norm: 0.9643414922439479, iteration: 37601
loss: 0.9935703277587891,grad_norm: 0.9999990231192192, iteration: 37602
loss: 1.0255489349365234,grad_norm: 0.8623297046818587, iteration: 37603
loss: 1.0273315906524658,grad_norm: 0.8806981009132939, iteration: 37604
loss: 0.949614942073822,grad_norm: 0.9999995025180811, iteration: 37605
loss: 1.0551345348358154,grad_norm: 0.9453456894745673, iteration: 37606
loss: 1.0030555725097656,grad_norm: 0.9448365354373576, iteration: 37607
loss: 1.0444753170013428,grad_norm: 0.8397286728061178, iteration: 37608
loss: 1.055337905883789,grad_norm: 0.9999998628263611, iteration: 37609
loss: 0.9682653546333313,grad_norm: 0.9437562422989675, iteration: 37610
loss: 1.0075187683105469,grad_norm: 0.828400661646333, iteration: 37611
loss: 0.9767719507217407,grad_norm: 0.9441100898153449, iteration: 37612
loss: 1.0359023809432983,grad_norm: 0.8359403162850975, iteration: 37613
loss: 1.0060644149780273,grad_norm: 0.9999991001184727, iteration: 37614
loss: 1.0157828330993652,grad_norm: 0.999999287505743, iteration: 37615
loss: 1.004780650138855,grad_norm: 0.9999992928390145, iteration: 37616
loss: 1.0322785377502441,grad_norm: 0.9999997815861534, iteration: 37617
loss: 0.9675570130348206,grad_norm: 0.9064698765911599, iteration: 37618
loss: 1.0121300220489502,grad_norm: 0.9152308766145434, iteration: 37619
loss: 1.026214599609375,grad_norm: 0.8568000178366459, iteration: 37620
loss: 0.9634959697723389,grad_norm: 0.9355269067141092, iteration: 37621
loss: 1.0462605953216553,grad_norm: 0.9999994909850651, iteration: 37622
loss: 1.0144593715667725,grad_norm: 0.9999991807897513, iteration: 37623
loss: 1.1220749616622925,grad_norm: 0.9999994018812551, iteration: 37624
loss: 1.020932912826538,grad_norm: 0.999999394201071, iteration: 37625
loss: 1.091788411140442,grad_norm: 0.9999993325839233, iteration: 37626
loss: 1.043015718460083,grad_norm: 0.9999993599502512, iteration: 37627
loss: 1.0150642395019531,grad_norm: 0.9999989608305665, iteration: 37628
loss: 0.9837479591369629,grad_norm: 0.7530059040094608, iteration: 37629
loss: 0.9942153692245483,grad_norm: 0.9999998764736014, iteration: 37630
loss: 1.0135035514831543,grad_norm: 0.9366931233478241, iteration: 37631
loss: 1.060097575187683,grad_norm: 0.9999998819759844, iteration: 37632
loss: 1.0701985359191895,grad_norm: 0.9999991592240294, iteration: 37633
loss: 1.0415946245193481,grad_norm: 0.999999173534204, iteration: 37634
loss: 0.9814767241477966,grad_norm: 0.9999991713372366, iteration: 37635
loss: 0.9798736572265625,grad_norm: 0.9499188762592127, iteration: 37636
loss: 1.0042308568954468,grad_norm: 0.9147427480241265, iteration: 37637
loss: 1.0136151313781738,grad_norm: 0.8567540474736318, iteration: 37638
loss: 1.0059304237365723,grad_norm: 0.9999989327878833, iteration: 37639
loss: 0.9934885501861572,grad_norm: 0.9749138120043327, iteration: 37640
loss: 1.0293195247650146,grad_norm: 0.9999992520574271, iteration: 37641
loss: 1.0192404985427856,grad_norm: 0.9999990469492541, iteration: 37642
loss: 1.0304683446884155,grad_norm: 0.9497501697091933, iteration: 37643
loss: 1.0162442922592163,grad_norm: 0.9270875111397691, iteration: 37644
loss: 1.0053750276565552,grad_norm: 0.999999211385977, iteration: 37645
loss: 1.0075263977050781,grad_norm: 0.968876708543492, iteration: 37646
loss: 1.0269296169281006,grad_norm: 0.9999991328738805, iteration: 37647
loss: 1.0591869354248047,grad_norm: 0.9999997194573481, iteration: 37648
loss: 1.014957070350647,grad_norm: 0.9999991960136351, iteration: 37649
loss: 1.003284215927124,grad_norm: 0.9999992562134578, iteration: 37650
loss: 0.9794589877128601,grad_norm: 0.9127474746280985, iteration: 37651
loss: 0.999461829662323,grad_norm: 0.9999993198565538, iteration: 37652
loss: 0.9904616475105286,grad_norm: 0.8822871266375136, iteration: 37653
loss: 0.9990032911300659,grad_norm: 0.9089049164593834, iteration: 37654
loss: 0.984076976776123,grad_norm: 0.8585128531391323, iteration: 37655
loss: 1.0328646898269653,grad_norm: 0.9965563249038107, iteration: 37656
loss: 1.0020244121551514,grad_norm: 0.7908765632404566, iteration: 37657
loss: 0.9724130034446716,grad_norm: 0.9999995859036187, iteration: 37658
loss: 1.0340557098388672,grad_norm: 0.9999992273266433, iteration: 37659
loss: 1.0288671255111694,grad_norm: 0.9208297588087239, iteration: 37660
loss: 1.0270847082138062,grad_norm: 0.9983095952397278, iteration: 37661
loss: 1.0064231157302856,grad_norm: 0.9411103483498886, iteration: 37662
loss: 0.976799488067627,grad_norm: 0.9999990790952823, iteration: 37663
loss: 0.9978657960891724,grad_norm: 0.8125275406560195, iteration: 37664
loss: 1.0257484912872314,grad_norm: 0.9999991930565137, iteration: 37665
loss: 1.0375990867614746,grad_norm: 0.9999997704421847, iteration: 37666
loss: 0.9675359725952148,grad_norm: 0.9437010649824692, iteration: 37667
loss: 1.0168609619140625,grad_norm: 0.9999998478746329, iteration: 37668
loss: 0.9711440801620483,grad_norm: 0.999999275191342, iteration: 37669
loss: 0.9696165323257446,grad_norm: 0.9999991770771892, iteration: 37670
loss: 1.0032151937484741,grad_norm: 0.833824351770988, iteration: 37671
loss: 0.9885034561157227,grad_norm: 0.8658859733077422, iteration: 37672
loss: 1.008162260055542,grad_norm: 0.9999990284637477, iteration: 37673
loss: 0.9801511764526367,grad_norm: 0.9652307681598998, iteration: 37674
loss: 1.0323466062545776,grad_norm: 0.9999990452874946, iteration: 37675
loss: 0.9763945937156677,grad_norm: 0.883457327363014, iteration: 37676
loss: 0.9954401850700378,grad_norm: 0.9999991299287211, iteration: 37677
loss: 0.9637266397476196,grad_norm: 0.9999990924351788, iteration: 37678
loss: 1.058582067489624,grad_norm: 0.9999990209963373, iteration: 37679
loss: 0.9886161684989929,grad_norm: 0.9429404450142315, iteration: 37680
loss: 0.9921443462371826,grad_norm: 0.9999991125101144, iteration: 37681
loss: 1.0122885704040527,grad_norm: 0.9999992291261938, iteration: 37682
loss: 1.0307776927947998,grad_norm: 0.9999997616700931, iteration: 37683
loss: 1.006618618965149,grad_norm: 0.9999990517517996, iteration: 37684
loss: 1.0361078977584839,grad_norm: 0.9999991684531938, iteration: 37685
loss: 0.9910421371459961,grad_norm: 0.9999991306957419, iteration: 37686
loss: 1.024087905883789,grad_norm: 0.9999991813934626, iteration: 37687
loss: 0.9675604104995728,grad_norm: 0.9999991192590679, iteration: 37688
loss: 1.0701777935028076,grad_norm: 0.9999991145222806, iteration: 37689
loss: 1.002241611480713,grad_norm: 0.9496020820776165, iteration: 37690
loss: 0.9960326552391052,grad_norm: 0.8274363658492422, iteration: 37691
loss: 1.0249602794647217,grad_norm: 0.9452319927114383, iteration: 37692
loss: 0.9713678956031799,grad_norm: 0.9416351022400062, iteration: 37693
loss: 1.0305520296096802,grad_norm: 0.8419740246932784, iteration: 37694
loss: 1.112830638885498,grad_norm: 0.9999991871112982, iteration: 37695
loss: 0.980208694934845,grad_norm: 0.867246956553437, iteration: 37696
loss: 1.0085593461990356,grad_norm: 0.9097777837202564, iteration: 37697
loss: 1.0675415992736816,grad_norm: 0.9999989837364319, iteration: 37698
loss: 1.0253188610076904,grad_norm: 0.9999996780215396, iteration: 37699
loss: 1.0746654272079468,grad_norm: 0.9999999107360223, iteration: 37700
loss: 1.026821494102478,grad_norm: 0.8534268382990899, iteration: 37701
loss: 1.020190715789795,grad_norm: 0.9999995098943014, iteration: 37702
loss: 1.0201537609100342,grad_norm: 0.9879914287945838, iteration: 37703
loss: 1.0108070373535156,grad_norm: 0.9999993721714278, iteration: 37704
loss: 1.0204271078109741,grad_norm: 0.9999992585665889, iteration: 37705
loss: 1.0830268859863281,grad_norm: 0.9313579570499562, iteration: 37706
loss: 1.0133572816848755,grad_norm: 0.7874120765371886, iteration: 37707
loss: 1.0191258192062378,grad_norm: 0.9999990767050801, iteration: 37708
loss: 1.0166183710098267,grad_norm: 0.9318705979255117, iteration: 37709
loss: 1.007606029510498,grad_norm: 0.9476569051915317, iteration: 37710
loss: 1.0867109298706055,grad_norm: 0.9999992822088141, iteration: 37711
loss: 1.0799094438552856,grad_norm: 0.9999991872715397, iteration: 37712
loss: 0.9885066151618958,grad_norm: 0.9999990425315574, iteration: 37713
loss: 1.0955944061279297,grad_norm: 0.9999998034696853, iteration: 37714
loss: 0.9681318402290344,grad_norm: 0.9754817046091974, iteration: 37715
loss: 1.0309617519378662,grad_norm: 0.9355014515940732, iteration: 37716
loss: 0.9696998000144958,grad_norm: 0.9671945484947134, iteration: 37717
loss: 1.0113588571548462,grad_norm: 0.9999991157900667, iteration: 37718
loss: 1.0247939825057983,grad_norm: 0.999999080661828, iteration: 37719
loss: 1.0135153532028198,grad_norm: 0.9288565299732898, iteration: 37720
loss: 0.9899013042449951,grad_norm: 0.9660596515108498, iteration: 37721
loss: 1.0455009937286377,grad_norm: 0.9999991246117746, iteration: 37722
loss: 0.9978783130645752,grad_norm: 0.9216300956741441, iteration: 37723
loss: 1.0443949699401855,grad_norm: 0.956593809478941, iteration: 37724
loss: 1.0040332078933716,grad_norm: 0.9999990938984853, iteration: 37725
loss: 1.0119764804840088,grad_norm: 0.9474659245838568, iteration: 37726
loss: 1.0159350633621216,grad_norm: 0.9999996819890096, iteration: 37727
loss: 1.0031359195709229,grad_norm: 0.9167120630168835, iteration: 37728
loss: 1.0312073230743408,grad_norm: 0.942846665176683, iteration: 37729
loss: 0.9872976541519165,grad_norm: 0.9999994915070232, iteration: 37730
loss: 1.0053305625915527,grad_norm: 0.9204405404641383, iteration: 37731
loss: 1.0115861892700195,grad_norm: 0.9467709145366126, iteration: 37732
loss: 1.0491900444030762,grad_norm: 0.9999990898644223, iteration: 37733
loss: 1.0099804401397705,grad_norm: 0.9999991886473335, iteration: 37734
loss: 1.0448739528656006,grad_norm: 0.9629903438657009, iteration: 37735
loss: 1.039997935295105,grad_norm: 0.8944390977054569, iteration: 37736
loss: 1.047376275062561,grad_norm: 0.9999990147225648, iteration: 37737
loss: 0.9836316704750061,grad_norm: 0.9999991104520654, iteration: 37738
loss: 1.065536379814148,grad_norm: 0.9999993114686812, iteration: 37739
loss: 1.0254515409469604,grad_norm: 0.9103028557167119, iteration: 37740
loss: 1.0013420581817627,grad_norm: 0.9999991801015855, iteration: 37741
loss: 0.9949564337730408,grad_norm: 0.9999991497777079, iteration: 37742
loss: 1.014351487159729,grad_norm: 0.9054572168768386, iteration: 37743
loss: 0.9917070269584656,grad_norm: 0.9999992261133743, iteration: 37744
loss: 1.0334707498550415,grad_norm: 0.8561682545110004, iteration: 37745
loss: 1.023558497428894,grad_norm: 0.9999993361372795, iteration: 37746
loss: 1.0293004512786865,grad_norm: 0.9999990330167011, iteration: 37747
loss: 0.9912304282188416,grad_norm: 0.9465037456201804, iteration: 37748
loss: 0.979724109172821,grad_norm: 0.9411552635886272, iteration: 37749
loss: 1.0484682321548462,grad_norm: 0.8321273103493233, iteration: 37750
loss: 1.0089223384857178,grad_norm: 0.9108807599045949, iteration: 37751
loss: 1.015171766281128,grad_norm: 0.9999991850717702, iteration: 37752
loss: 0.9755657911300659,grad_norm: 0.9850398385924373, iteration: 37753
loss: 1.0755507946014404,grad_norm: 0.9999992945127192, iteration: 37754
loss: 1.0066726207733154,grad_norm: 0.9999992175697938, iteration: 37755
loss: 0.9597682952880859,grad_norm: 0.9999990865838777, iteration: 37756
loss: 1.0110759735107422,grad_norm: 0.9999993439943223, iteration: 37757
loss: 1.042221188545227,grad_norm: 0.9999990977012195, iteration: 37758
loss: 1.008423089981079,grad_norm: 0.9999990935371571, iteration: 37759
loss: 0.9891391396522522,grad_norm: 0.9883451996459034, iteration: 37760
loss: 1.0144736766815186,grad_norm: 0.9999992782456288, iteration: 37761
loss: 1.02566659450531,grad_norm: 0.9999992027790174, iteration: 37762
loss: 0.9967607855796814,grad_norm: 0.8081774734509815, iteration: 37763
loss: 1.016144037246704,grad_norm: 0.9999990733214728, iteration: 37764
loss: 0.982234001159668,grad_norm: 0.999999256644935, iteration: 37765
loss: 1.035791277885437,grad_norm: 0.9999990219220474, iteration: 37766
loss: 1.0369597673416138,grad_norm: 0.9999994880755262, iteration: 37767
loss: 1.0179147720336914,grad_norm: 0.9999997557966402, iteration: 37768
loss: 0.9904242753982544,grad_norm: 0.8432977143149374, iteration: 37769
loss: 0.9868190288543701,grad_norm: 0.8390668061581142, iteration: 37770
loss: 0.973419189453125,grad_norm: 0.9999992913654641, iteration: 37771
loss: 1.0224765539169312,grad_norm: 0.8594892325337228, iteration: 37772
loss: 1.027555227279663,grad_norm: 0.999999356121919, iteration: 37773
loss: 1.0009591579437256,grad_norm: 0.9999991398052215, iteration: 37774
loss: 1.0062909126281738,grad_norm: 0.9999990620709966, iteration: 37775
loss: 1.0079972743988037,grad_norm: 0.9999997814734104, iteration: 37776
loss: 1.056025505065918,grad_norm: 0.9999990461739874, iteration: 37777
loss: 0.9981655478477478,grad_norm: 0.9999994646228905, iteration: 37778
loss: 0.9736083149909973,grad_norm: 0.9999997434118746, iteration: 37779
loss: 1.0187342166900635,grad_norm: 0.9476631728260455, iteration: 37780
loss: 1.0312358140945435,grad_norm: 0.9999991177180287, iteration: 37781
loss: 1.0088340044021606,grad_norm: 0.7967826667537675, iteration: 37782
loss: 0.9585733413696289,grad_norm: 0.9999990935840744, iteration: 37783
loss: 0.9302030801773071,grad_norm: 0.9999991907726525, iteration: 37784
loss: 0.9974347949028015,grad_norm: 0.9999990667919517, iteration: 37785
loss: 1.039910912513733,grad_norm: 1.0000000899127088, iteration: 37786
loss: 0.9652615785598755,grad_norm: 0.7866661014364886, iteration: 37787
loss: 1.0297534465789795,grad_norm: 0.9999991212932808, iteration: 37788
loss: 1.0533759593963623,grad_norm: 0.9999995991955859, iteration: 37789
loss: 1.070704698562622,grad_norm: 0.9999993845001598, iteration: 37790
loss: 1.0330625772476196,grad_norm: 0.99999956292199, iteration: 37791
loss: 1.0267865657806396,grad_norm: 0.8438162034829724, iteration: 37792
loss: 1.033799171447754,grad_norm: 0.9999993132171501, iteration: 37793
loss: 1.0252066850662231,grad_norm: 0.9999990517530256, iteration: 37794
loss: 1.0128940343856812,grad_norm: 0.9999991559001622, iteration: 37795
loss: 0.9803661108016968,grad_norm: 0.9999991010855043, iteration: 37796
loss: 1.0166642665863037,grad_norm: 0.9606713452652588, iteration: 37797
loss: 0.9768522381782532,grad_norm: 0.9999989332188756, iteration: 37798
loss: 1.03231942653656,grad_norm: 0.9897352919555137, iteration: 37799
loss: 1.0139412879943848,grad_norm: 0.9999993316554898, iteration: 37800
loss: 1.029294729232788,grad_norm: 0.9999992019568801, iteration: 37801
loss: 1.008836269378662,grad_norm: 0.9411169514019345, iteration: 37802
loss: 1.004563808441162,grad_norm: 0.9695577842395526, iteration: 37803
loss: 1.0108444690704346,grad_norm: 0.8429247490327011, iteration: 37804
loss: 1.0250678062438965,grad_norm: 0.949161169798602, iteration: 37805
loss: 0.9917425513267517,grad_norm: 0.9661103500605046, iteration: 37806
loss: 1.0168774127960205,grad_norm: 0.8238394326579116, iteration: 37807
loss: 1.0096460580825806,grad_norm: 0.9328271234155411, iteration: 37808
loss: 1.0049103498458862,grad_norm: 0.9999990805477628, iteration: 37809
loss: 1.0077378749847412,grad_norm: 0.9999992011241873, iteration: 37810
loss: 1.0572649240493774,grad_norm: 0.9931873186527175, iteration: 37811
loss: 0.9687709212303162,grad_norm: 0.9916297723395932, iteration: 37812
loss: 0.9797021746635437,grad_norm: 0.9961955954428003, iteration: 37813
loss: 0.9994561672210693,grad_norm: 0.8685744740157874, iteration: 37814
loss: 0.9646607041358948,grad_norm: 0.9999992460419562, iteration: 37815
loss: 1.0347323417663574,grad_norm: 0.8519455324432905, iteration: 37816
loss: 1.0212435722351074,grad_norm: 0.9999992465873788, iteration: 37817
loss: 1.0307064056396484,grad_norm: 0.9999993605425509, iteration: 37818
loss: 1.0467830896377563,grad_norm: 0.9999996983562845, iteration: 37819
loss: 1.004860281944275,grad_norm: 0.9999992801311032, iteration: 37820
loss: 1.0853493213653564,grad_norm: 0.9999997102026145, iteration: 37821
loss: 0.9906877279281616,grad_norm: 0.9037762187466015, iteration: 37822
loss: 0.9987837076187134,grad_norm: 0.9999993084493394, iteration: 37823
loss: 1.0730818510055542,grad_norm: 0.9999996418240127, iteration: 37824
loss: 0.999582827091217,grad_norm: 0.8089563859875025, iteration: 37825
loss: 1.0492184162139893,grad_norm: 0.9999996613249298, iteration: 37826
loss: 0.9627504944801331,grad_norm: 0.9237533860439888, iteration: 37827
loss: 1.0591676235198975,grad_norm: 0.999999643846571, iteration: 37828
loss: 1.0034633874893188,grad_norm: 0.9999995194733772, iteration: 37829
loss: 1.0313819646835327,grad_norm: 0.9524008224211167, iteration: 37830
loss: 1.017591953277588,grad_norm: 0.999999464285494, iteration: 37831
loss: 1.002266764640808,grad_norm: 0.8993631320643679, iteration: 37832
loss: 1.0223368406295776,grad_norm: 0.9276134698736269, iteration: 37833
loss: 1.0189989805221558,grad_norm: 0.871531560089509, iteration: 37834
loss: 1.0551730394363403,grad_norm: 0.9999996770404843, iteration: 37835
loss: 1.014955997467041,grad_norm: 0.999999342835927, iteration: 37836
loss: 0.992572009563446,grad_norm: 0.9498378513811467, iteration: 37837
loss: 1.020065426826477,grad_norm: 0.9999990916293436, iteration: 37838
loss: 1.0368260145187378,grad_norm: 0.9597469187272407, iteration: 37839
loss: 1.0429832935333252,grad_norm: 0.9999991522685224, iteration: 37840
loss: 1.0199042558670044,grad_norm: 0.8141869468875671, iteration: 37841
loss: 0.9868453145027161,grad_norm: 0.9999989325223106, iteration: 37842
loss: 0.9990966320037842,grad_norm: 0.9999993324648627, iteration: 37843
loss: 1.0077122449874878,grad_norm: 0.8409399324043207, iteration: 37844
loss: 1.0318230390548706,grad_norm: 0.999999374637367, iteration: 37845
loss: 0.9908350110054016,grad_norm: 0.9742788583102386, iteration: 37846
loss: 1.011916160583496,grad_norm: 0.9999996807666216, iteration: 37847
loss: 1.004653811454773,grad_norm: 0.9732594447607233, iteration: 37848
loss: 1.0018104314804077,grad_norm: 0.9999992894946567, iteration: 37849
loss: 1.0055402517318726,grad_norm: 0.9999992126440738, iteration: 37850
loss: 0.9937637448310852,grad_norm: 0.9999990979877973, iteration: 37851
loss: 1.0896373987197876,grad_norm: 0.999999622088662, iteration: 37852
loss: 0.9894278049468994,grad_norm: 0.9999990509622708, iteration: 37853
loss: 1.0016307830810547,grad_norm: 0.837472154810573, iteration: 37854
loss: 1.0399062633514404,grad_norm: 0.9999994559909846, iteration: 37855
loss: 1.042063593864441,grad_norm: 0.9999995819423159, iteration: 37856
loss: 1.0413298606872559,grad_norm: 0.9581761553669857, iteration: 37857
loss: 1.0171104669570923,grad_norm: 0.9999996142524428, iteration: 37858
loss: 0.9845355153083801,grad_norm: 0.9999991698610448, iteration: 37859
loss: 1.0028817653656006,grad_norm: 0.9999990525855796, iteration: 37860
loss: 1.0013861656188965,grad_norm: 0.9999990414660436, iteration: 37861
loss: 1.0475034713745117,grad_norm: 0.9999991098020895, iteration: 37862
loss: 0.9812146425247192,grad_norm: 0.999999220852922, iteration: 37863
loss: 1.0475863218307495,grad_norm: 0.9999995586356214, iteration: 37864
loss: 1.0537643432617188,grad_norm: 0.9999995487104215, iteration: 37865
loss: 1.0215656757354736,grad_norm: 0.9999991389834052, iteration: 37866
loss: 1.0127664804458618,grad_norm: 0.9999994838260516, iteration: 37867
loss: 1.057009220123291,grad_norm: 0.999999712137874, iteration: 37868
loss: 1.0090688467025757,grad_norm: 0.9999996527363464, iteration: 37869
loss: 1.066886305809021,grad_norm: 0.9999991648358418, iteration: 37870
loss: 1.0631877183914185,grad_norm: 0.9999996651709763, iteration: 37871
loss: 1.0065670013427734,grad_norm: 0.9392475380736407, iteration: 37872
loss: 1.0591022968292236,grad_norm: 0.9999993532814866, iteration: 37873
loss: 0.9656513333320618,grad_norm: 0.8242989704360608, iteration: 37874
loss: 1.0157448053359985,grad_norm: 0.9999992777118689, iteration: 37875
loss: 1.0096049308776855,grad_norm: 0.9999994717130537, iteration: 37876
loss: 1.030746340751648,grad_norm: 0.8360385291445959, iteration: 37877
loss: 1.025899887084961,grad_norm: 0.9999990371865658, iteration: 37878
loss: 1.0174692869186401,grad_norm: 0.9267833846647361, iteration: 37879
loss: 0.9819259643554688,grad_norm: 0.9999991755503681, iteration: 37880
loss: 1.0083239078521729,grad_norm: 0.9999994478678691, iteration: 37881
loss: 1.0147004127502441,grad_norm: 0.8558532521321383, iteration: 37882
loss: 0.9986939430236816,grad_norm: 0.9999993232768499, iteration: 37883
loss: 1.0277892351150513,grad_norm: 0.8155412739758826, iteration: 37884
loss: 1.018622636795044,grad_norm: 0.8904858618002917, iteration: 37885
loss: 0.9968684911727905,grad_norm: 0.9999990824427791, iteration: 37886
loss: 1.0023740530014038,grad_norm: 0.7951155883711392, iteration: 37887
loss: 1.0042438507080078,grad_norm: 0.9999995341137279, iteration: 37888
loss: 0.9978402256965637,grad_norm: 0.9031539640147461, iteration: 37889
loss: 1.054349660873413,grad_norm: 0.9999993664130963, iteration: 37890
loss: 1.027800440788269,grad_norm: 0.7753920818067533, iteration: 37891
loss: 0.9846011996269226,grad_norm: 0.9999992323360617, iteration: 37892
loss: 1.054146647453308,grad_norm: 0.9664706897972837, iteration: 37893
loss: 1.0676530599594116,grad_norm: 0.9999990917172844, iteration: 37894
loss: 1.00569486618042,grad_norm: 0.9185570823511372, iteration: 37895
loss: 0.9920486211776733,grad_norm: 0.9999991620410957, iteration: 37896
loss: 1.2052127122879028,grad_norm: 0.9999992709738865, iteration: 37897
loss: 1.0005069971084595,grad_norm: 0.9247170394513167, iteration: 37898
loss: 0.9927502274513245,grad_norm: 0.8808181947970077, iteration: 37899
loss: 1.054269790649414,grad_norm: 0.9999995169177018, iteration: 37900
loss: 1.0061427354812622,grad_norm: 0.9999991751674707, iteration: 37901
loss: 1.0035394430160522,grad_norm: 0.9999991048971671, iteration: 37902
loss: 1.0350139141082764,grad_norm: 0.8424848269759192, iteration: 37903
loss: 1.0030659437179565,grad_norm: 0.9999990304193715, iteration: 37904
loss: 1.0222793817520142,grad_norm: 0.867301874030624, iteration: 37905
loss: 1.019553303718567,grad_norm: 0.9090300017813001, iteration: 37906
loss: 1.0103739500045776,grad_norm: 0.9999995376330764, iteration: 37907
loss: 1.0405211448669434,grad_norm: 0.9999991567878735, iteration: 37908
loss: 1.029969573020935,grad_norm: 0.8841968948667612, iteration: 37909
loss: 1.0110186338424683,grad_norm: 0.9999991371357531, iteration: 37910
loss: 1.0723018646240234,grad_norm: 0.9999995312859349, iteration: 37911
loss: 1.0188654661178589,grad_norm: 0.9571200100107221, iteration: 37912
loss: 0.9895313382148743,grad_norm: 0.9999996163623497, iteration: 37913
loss: 1.0212262868881226,grad_norm: 0.9999992220600938, iteration: 37914
loss: 1.003220796585083,grad_norm: 0.8656031213233389, iteration: 37915
loss: 1.063820719718933,grad_norm: 0.9999995802214128, iteration: 37916
loss: 1.0025931596755981,grad_norm: 0.9093215029628008, iteration: 37917
loss: 1.030456304550171,grad_norm: 0.9999994226634497, iteration: 37918
loss: 0.9927389025688171,grad_norm: 0.936467423644879, iteration: 37919
loss: 1.0096632242202759,grad_norm: 0.8098457371735067, iteration: 37920
loss: 0.9894938468933105,grad_norm: 0.999998998953837, iteration: 37921
loss: 1.0049645900726318,grad_norm: 0.9008502550069912, iteration: 37922
loss: 1.0016734600067139,grad_norm: 0.9999991494231292, iteration: 37923
loss: 0.9935680627822876,grad_norm: 0.9258327450224787, iteration: 37924
loss: 1.1948833465576172,grad_norm: 0.9999996119722889, iteration: 37925
loss: 1.0529091358184814,grad_norm: 0.9999993170976458, iteration: 37926
loss: 1.0440677404403687,grad_norm: 0.9999994400623007, iteration: 37927
loss: 1.0277034044265747,grad_norm: 0.9581394730865277, iteration: 37928
loss: 1.0432164669036865,grad_norm: 0.8385030170611552, iteration: 37929
loss: 1.0254387855529785,grad_norm: 0.9918060723993593, iteration: 37930
loss: 1.019737958908081,grad_norm: 0.9999991954523836, iteration: 37931
loss: 1.0053588151931763,grad_norm: 0.999999064057829, iteration: 37932
loss: 0.9562262892723083,grad_norm: 0.9999992946776108, iteration: 37933
loss: 0.9686676263809204,grad_norm: 0.9999990784093046, iteration: 37934
loss: 0.9692859053611755,grad_norm: 0.9999993108422267, iteration: 37935
loss: 0.9927870035171509,grad_norm: 0.9999991697159044, iteration: 37936
loss: 1.0040894746780396,grad_norm: 0.8865544159500974, iteration: 37937
loss: 1.0290448665618896,grad_norm: 0.9999992030162618, iteration: 37938
loss: 1.0238863229751587,grad_norm: 0.9999993433198744, iteration: 37939
loss: 0.9918712377548218,grad_norm: 0.7545574185194353, iteration: 37940
loss: 1.011541724205017,grad_norm: 0.9914549047843363, iteration: 37941
loss: 1.015736699104309,grad_norm: 0.9999992271966939, iteration: 37942
loss: 1.0973988771438599,grad_norm: 0.9999992153973306, iteration: 37943
loss: 1.0207141637802124,grad_norm: 0.9999990037011846, iteration: 37944
loss: 1.0317555665969849,grad_norm: 0.9576445755988005, iteration: 37945
loss: 0.9859641194343567,grad_norm: 0.8053899875584551, iteration: 37946
loss: 0.9535075426101685,grad_norm: 0.8076112908831471, iteration: 37947
loss: 1.0022501945495605,grad_norm: 0.8583068233820694, iteration: 37948
loss: 0.9879634976387024,grad_norm: 0.9999992885005797, iteration: 37949
loss: 1.0020713806152344,grad_norm: 0.9999990747482842, iteration: 37950
loss: 0.9723859429359436,grad_norm: 0.999999199933445, iteration: 37951
loss: 1.0147225856781006,grad_norm: 0.9999990843937558, iteration: 37952
loss: 1.0363185405731201,grad_norm: 0.9999992165647013, iteration: 37953
loss: 1.1709789037704468,grad_norm: 0.999999535994275, iteration: 37954
loss: 1.0399816036224365,grad_norm: 0.9999997087490331, iteration: 37955
loss: 1.0030359029769897,grad_norm: 0.8450319611983785, iteration: 37956
loss: 0.9900369644165039,grad_norm: 0.8918599671506491, iteration: 37957
loss: 1.039259672164917,grad_norm: 0.9424779379319237, iteration: 37958
loss: 1.018438458442688,grad_norm: 0.9159577482079274, iteration: 37959
loss: 1.0131644010543823,grad_norm: 0.9866280750003894, iteration: 37960
loss: 1.0037522315979004,grad_norm: 0.9999997432280053, iteration: 37961
loss: 0.9919029474258423,grad_norm: 0.9308137451796584, iteration: 37962
loss: 1.000121831893921,grad_norm: 0.9633346289481287, iteration: 37963
loss: 0.9919219017028809,grad_norm: 0.9999991033705224, iteration: 37964
loss: 1.0284996032714844,grad_norm: 0.9999990645139005, iteration: 37965
loss: 1.022634506225586,grad_norm: 0.9769348186926052, iteration: 37966
loss: 0.9463721513748169,grad_norm: 0.9999991607143767, iteration: 37967
loss: 1.0228923559188843,grad_norm: 0.8243605434932442, iteration: 37968
loss: 1.0163201093673706,grad_norm: 0.9999993216701567, iteration: 37969
loss: 1.016708493232727,grad_norm: 0.9135282328698663, iteration: 37970
loss: 1.0496087074279785,grad_norm: 0.9999991306587082, iteration: 37971
loss: 1.0158699750900269,grad_norm: 0.9999990272019602, iteration: 37972
loss: 0.9825701117515564,grad_norm: 0.9999992084648647, iteration: 37973
loss: 1.013124942779541,grad_norm: 0.9207661272589273, iteration: 37974
loss: 1.0019409656524658,grad_norm: 0.9999990237829178, iteration: 37975
loss: 1.0603724718093872,grad_norm: 0.9999996525924243, iteration: 37976
loss: 1.0436171293258667,grad_norm: 0.9999990240046404, iteration: 37977
loss: 0.9763851165771484,grad_norm: 0.9999990221132983, iteration: 37978
loss: 1.0425478219985962,grad_norm: 0.9999994685784626, iteration: 37979
loss: 1.0179258584976196,grad_norm: 0.9849747453340113, iteration: 37980
loss: 1.0100122690200806,grad_norm: 0.9999992642208373, iteration: 37981
loss: 1.0147968530654907,grad_norm: 0.9945944311710927, iteration: 37982
loss: 1.3158007860183716,grad_norm: 0.9999996530584718, iteration: 37983
loss: 0.9981610178947449,grad_norm: 0.9999990481491116, iteration: 37984
loss: 1.0467602014541626,grad_norm: 0.9655703268337413, iteration: 37985
loss: 0.9953716397285461,grad_norm: 0.9466513641032723, iteration: 37986
loss: 0.9914116263389587,grad_norm: 0.8384673782510701, iteration: 37987
loss: 1.0059401988983154,grad_norm: 0.9809536043295587, iteration: 37988
loss: 1.0276710987091064,grad_norm: 0.9999992786380373, iteration: 37989
loss: 0.9900688529014587,grad_norm: 0.8519779583409062, iteration: 37990
loss: 1.0326379537582397,grad_norm: 0.9742002430436724, iteration: 37991
loss: 1.0231596231460571,grad_norm: 0.9787632363200756, iteration: 37992
loss: 1.020902395248413,grad_norm: 0.9999994316483622, iteration: 37993
loss: 1.0417332649230957,grad_norm: 0.9999994777335515, iteration: 37994
loss: 1.023934245109558,grad_norm: 0.9148126554471033, iteration: 37995
loss: 1.1555969715118408,grad_norm: 0.9999998189773003, iteration: 37996
loss: 1.0308239459991455,grad_norm: 0.999999315819218, iteration: 37997
loss: 1.0503227710723877,grad_norm: 0.9999992786798868, iteration: 37998
loss: 1.0550978183746338,grad_norm: 0.9999997339832128, iteration: 37999
loss: 0.9893312454223633,grad_norm: 0.9030689878623829, iteration: 38000
loss: 1.0259497165679932,grad_norm: 0.9419404603773126, iteration: 38001
loss: 1.0099289417266846,grad_norm: 0.9568812287936811, iteration: 38002
loss: 1.0041561126708984,grad_norm: 0.9869065333493167, iteration: 38003
loss: 1.0185645818710327,grad_norm: 0.9000070628926488, iteration: 38004
loss: 1.0407822132110596,grad_norm: 0.9999993944498301, iteration: 38005
loss: 1.0532654523849487,grad_norm: 0.9999994108097098, iteration: 38006
loss: 1.0510610342025757,grad_norm: 0.9999990369645154, iteration: 38007
loss: 1.0024899244308472,grad_norm: 0.9999991231243823, iteration: 38008
loss: 1.0712827444076538,grad_norm: 0.9999990880703062, iteration: 38009
loss: 1.0457466840744019,grad_norm: 0.9370133258393307, iteration: 38010
loss: 1.0213582515716553,grad_norm: 0.9690839115511706, iteration: 38011
loss: 0.996394693851471,grad_norm: 0.8400433362976437, iteration: 38012
loss: 1.0814491510391235,grad_norm: 0.9999996897873046, iteration: 38013
loss: 1.0727379322052002,grad_norm: 0.9999996785850545, iteration: 38014
loss: 1.0170378684997559,grad_norm: 0.9999995755187187, iteration: 38015
loss: 1.0108401775360107,grad_norm: 0.999999136124009, iteration: 38016
loss: 1.0117501020431519,grad_norm: 0.9337715751932245, iteration: 38017
loss: 1.045005440711975,grad_norm: 0.9416233828544122, iteration: 38018
loss: 0.9886829853057861,grad_norm: 0.9545375434267616, iteration: 38019
loss: 0.9771870970726013,grad_norm: 0.9999989964726393, iteration: 38020
loss: 0.9864175319671631,grad_norm: 0.999999058599817, iteration: 38021
loss: 1.0004743337631226,grad_norm: 0.8465727873801961, iteration: 38022
loss: 1.0020047426223755,grad_norm: 0.8140003987647912, iteration: 38023
loss: 1.0084764957427979,grad_norm: 0.961823395637826, iteration: 38024
loss: 1.0165647268295288,grad_norm: 0.889278426283528, iteration: 38025
loss: 1.0434473752975464,grad_norm: 0.9445349679331884, iteration: 38026
loss: 1.0225826501846313,grad_norm: 0.999999128392585, iteration: 38027
loss: 0.989130437374115,grad_norm: 0.9730877320308844, iteration: 38028
loss: 1.0062223672866821,grad_norm: 0.8408986837430741, iteration: 38029
loss: 1.021615743637085,grad_norm: 0.9999991321452645, iteration: 38030
loss: 0.9975602030754089,grad_norm: 0.9999991086059419, iteration: 38031
loss: 0.9876307249069214,grad_norm: 0.9248975174115287, iteration: 38032
loss: 1.0055441856384277,grad_norm: 0.9426328907064966, iteration: 38033
loss: 0.9964883923530579,grad_norm: 0.9999989633639412, iteration: 38034
loss: 1.001111626625061,grad_norm: 0.9999993472358437, iteration: 38035
loss: 0.9799084663391113,grad_norm: 0.8752659601269079, iteration: 38036
loss: 0.9958648085594177,grad_norm: 0.9019757982061383, iteration: 38037
loss: 1.0179500579833984,grad_norm: 0.9999991927139389, iteration: 38038
loss: 1.1131024360656738,grad_norm: 0.9999991787969811, iteration: 38039
loss: 1.0741077661514282,grad_norm: 0.9999990594359522, iteration: 38040
loss: 1.055582046508789,grad_norm: 0.9999990072819891, iteration: 38041
loss: 1.0095689296722412,grad_norm: 0.99999922070786, iteration: 38042
loss: 0.9967550039291382,grad_norm: 0.9999991674132417, iteration: 38043
loss: 1.0431667566299438,grad_norm: 0.999999791297013, iteration: 38044
loss: 0.9927762746810913,grad_norm: 0.9561418115421001, iteration: 38045
loss: 0.9736194014549255,grad_norm: 0.9999990986668186, iteration: 38046
loss: 1.0135796070098877,grad_norm: 0.999999224456587, iteration: 38047
loss: 1.0293890237808228,grad_norm: 0.8263910545446581, iteration: 38048
loss: 0.9979278445243835,grad_norm: 0.8436548442490577, iteration: 38049
loss: 1.002992868423462,grad_norm: 0.9254968398815302, iteration: 38050
loss: 0.989529013633728,grad_norm: 0.999999137958067, iteration: 38051
loss: 1.051326870918274,grad_norm: 0.8637575702052611, iteration: 38052
loss: 1.0161404609680176,grad_norm: 0.9186098368833149, iteration: 38053
loss: 0.9933945536613464,grad_norm: 0.8520824894041866, iteration: 38054
loss: 1.0138700008392334,grad_norm: 0.9999992105645624, iteration: 38055
loss: 0.9840139746665955,grad_norm: 0.9999990487629322, iteration: 38056
loss: 1.0097622871398926,grad_norm: 0.9999991376868506, iteration: 38057
loss: 0.9901624321937561,grad_norm: 0.9801733947693265, iteration: 38058
loss: 1.0150424242019653,grad_norm: 0.8425404110707295, iteration: 38059
loss: 1.0113486051559448,grad_norm: 0.988140635892891, iteration: 38060
loss: 1.0022205114364624,grad_norm: 0.9999994054926493, iteration: 38061
loss: 0.9710294604301453,grad_norm: 0.9999990111409063, iteration: 38062
loss: 1.0006158351898193,grad_norm: 0.9999989516573237, iteration: 38063
loss: 1.0032856464385986,grad_norm: 0.8873290328091509, iteration: 38064
loss: 1.014440655708313,grad_norm: 0.9999996056217748, iteration: 38065
loss: 1.00117826461792,grad_norm: 0.9999990270455672, iteration: 38066
loss: 0.9760203957557678,grad_norm: 0.8211042845971567, iteration: 38067
loss: 1.0210448503494263,grad_norm: 0.9999991108272145, iteration: 38068
loss: 1.008528709411621,grad_norm: 0.8192861093133563, iteration: 38069
loss: 1.288018822669983,grad_norm: 0.9999994971579265, iteration: 38070
loss: 1.0158034563064575,grad_norm: 0.9999997496225366, iteration: 38071
loss: 1.1347986459732056,grad_norm: 0.999999425680332, iteration: 38072
loss: 0.9612101912498474,grad_norm: 0.8433439925650629, iteration: 38073
loss: 1.009529948234558,grad_norm: 0.7538841305274603, iteration: 38074
loss: 0.9909135699272156,grad_norm: 0.9999991018783057, iteration: 38075
loss: 0.9901151061058044,grad_norm: 0.7471077893368596, iteration: 38076
loss: 1.051157832145691,grad_norm: 0.9489489201696746, iteration: 38077
loss: 1.0336458683013916,grad_norm: 0.9999992913635887, iteration: 38078
loss: 1.026047945022583,grad_norm: 0.9999992714663749, iteration: 38079
loss: 1.0000969171524048,grad_norm: 0.8927613313534782, iteration: 38080
loss: 1.0600204467773438,grad_norm: 0.9999995661987036, iteration: 38081
loss: 0.9952192902565002,grad_norm: 0.9296444993988351, iteration: 38082
loss: 1.0115272998809814,grad_norm: 0.9999991907162303, iteration: 38083
loss: 0.9851861000061035,grad_norm: 0.9071039241338584, iteration: 38084
loss: 1.0224413871765137,grad_norm: 0.9999992379836286, iteration: 38085
loss: 0.9815332889556885,grad_norm: 0.9999991277169347, iteration: 38086
loss: 1.012610673904419,grad_norm: 0.9195496690976688, iteration: 38087
loss: 1.0592234134674072,grad_norm: 0.9999995563380109, iteration: 38088
loss: 1.069857120513916,grad_norm: 0.9999990638252376, iteration: 38089
loss: 1.074509859085083,grad_norm: 0.9473578471657887, iteration: 38090
loss: 1.036849021911621,grad_norm: 0.9999992104842509, iteration: 38091
loss: 0.975783109664917,grad_norm: 0.9223423695767324, iteration: 38092
loss: 1.0246158838272095,grad_norm: 0.8993452134319599, iteration: 38093
loss: 1.0543113946914673,grad_norm: 0.9999996597686526, iteration: 38094
loss: 0.9872693419456482,grad_norm: 0.9245402281981273, iteration: 38095
loss: 0.9834656119346619,grad_norm: 0.9999992420820452, iteration: 38096
loss: 0.9784393310546875,grad_norm: 0.9999990666022702, iteration: 38097
loss: 1.0667506456375122,grad_norm: 0.9999994710805751, iteration: 38098
loss: 0.9681863188743591,grad_norm: 0.999999241460905, iteration: 38099
loss: 1.0058540105819702,grad_norm: 0.9999991757040931, iteration: 38100
loss: 1.0316461324691772,grad_norm: 0.9999997907958516, iteration: 38101
loss: 1.0342448949813843,grad_norm: 0.9166436979122667, iteration: 38102
loss: 1.0362436771392822,grad_norm: 0.9999995673757194, iteration: 38103
loss: 1.0215564966201782,grad_norm: 0.8121157457611659, iteration: 38104
loss: 0.977992594242096,grad_norm: 0.877374631642503, iteration: 38105
loss: 0.9973458051681519,grad_norm: 0.9496891153576221, iteration: 38106
loss: 1.01896071434021,grad_norm: 0.9999993145738094, iteration: 38107
loss: 1.016674518585205,grad_norm: 0.9900447593769687, iteration: 38108
loss: 1.020632266998291,grad_norm: 0.9999991299046354, iteration: 38109
loss: 1.0226622819900513,grad_norm: 0.9999998903536228, iteration: 38110
loss: 1.0410860776901245,grad_norm: 0.9999993809615437, iteration: 38111
loss: 1.0553251504898071,grad_norm: 0.9999995957024362, iteration: 38112
loss: 0.9874857068061829,grad_norm: 0.999999526852271, iteration: 38113
loss: 0.9786940217018127,grad_norm: 0.9779364799556162, iteration: 38114
loss: 1.0271141529083252,grad_norm: 0.9527379173573471, iteration: 38115
loss: 0.9845823645591736,grad_norm: 0.8391734403442204, iteration: 38116
loss: 1.0516871213912964,grad_norm: 0.9842705806974186, iteration: 38117
loss: 1.0023514032363892,grad_norm: 0.9999990243325264, iteration: 38118
loss: 1.0546674728393555,grad_norm: 0.9999989951876695, iteration: 38119
loss: 1.062569499015808,grad_norm: 0.9999990845593482, iteration: 38120
loss: 1.0292786359786987,grad_norm: 0.9281792041152901, iteration: 38121
loss: 1.0295641422271729,grad_norm: 0.9131609974572831, iteration: 38122
loss: 1.0054974555969238,grad_norm: 0.9257153123911716, iteration: 38123
loss: 1.0117344856262207,grad_norm: 0.8857123194706003, iteration: 38124
loss: 0.9909863471984863,grad_norm: 0.8556661981205953, iteration: 38125
loss: 1.056695580482483,grad_norm: 0.781912629531891, iteration: 38126
loss: 1.0249422788619995,grad_norm: 0.8541462863413857, iteration: 38127
loss: 1.0215617418289185,grad_norm: 0.9999995738957955, iteration: 38128
loss: 1.0153193473815918,grad_norm: 0.9943104703275135, iteration: 38129
loss: 0.9747524857521057,grad_norm: 0.9999996141534007, iteration: 38130
loss: 0.9743802547454834,grad_norm: 0.9975442640987113, iteration: 38131
loss: 0.994784951210022,grad_norm: 0.9999990585494026, iteration: 38132
loss: 1.012519359588623,grad_norm: 0.91062092337758, iteration: 38133
loss: 1.010407567024231,grad_norm: 0.9999995365662633, iteration: 38134
loss: 1.013709545135498,grad_norm: 0.783919655814559, iteration: 38135
loss: 1.042165994644165,grad_norm: 0.9999991676960633, iteration: 38136
loss: 1.023945927619934,grad_norm: 0.9723725459971516, iteration: 38137
loss: 0.9748800992965698,grad_norm: 0.9569641558892147, iteration: 38138
loss: 1.0107297897338867,grad_norm: 0.9635963637587025, iteration: 38139
loss: 0.9934533834457397,grad_norm: 0.9999994531247152, iteration: 38140
loss: 1.0147908926010132,grad_norm: 0.9673264724348909, iteration: 38141
loss: 0.9835535883903503,grad_norm: 0.871537424262495, iteration: 38142
loss: 1.037197232246399,grad_norm: 0.7789080565471906, iteration: 38143
loss: 0.98674076795578,grad_norm: 0.8500085345305023, iteration: 38144
loss: 0.9984902739524841,grad_norm: 0.9875797799335594, iteration: 38145
loss: 0.999625563621521,grad_norm: 0.9260066080987143, iteration: 38146
loss: 0.959855854511261,grad_norm: 0.9018947757406569, iteration: 38147
loss: 1.0030008554458618,grad_norm: 0.875587553602272, iteration: 38148
loss: 1.0335646867752075,grad_norm: 0.8695777799456759, iteration: 38149
loss: 0.9938555955886841,grad_norm: 0.9999990849227229, iteration: 38150
loss: 1.0084704160690308,grad_norm: 0.9999990705477628, iteration: 38151
loss: 1.0199851989746094,grad_norm: 0.9999991099883931, iteration: 38152
loss: 0.9957369565963745,grad_norm: 0.9974773979539621, iteration: 38153
loss: 1.0084079504013062,grad_norm: 0.9031613911651749, iteration: 38154
loss: 1.0253093242645264,grad_norm: 0.999999339387359, iteration: 38155
loss: 0.9624946117401123,grad_norm: 0.9999992285168849, iteration: 38156
loss: 0.9818574786186218,grad_norm: 0.9999990897539609, iteration: 38157
loss: 1.0833112001419067,grad_norm: 0.9999995418696683, iteration: 38158
loss: 1.0349186658859253,grad_norm: 0.9999994746484023, iteration: 38159
loss: 1.0434671640396118,grad_norm: 0.9999996047373887, iteration: 38160
loss: 0.9880642294883728,grad_norm: 0.8815518886556458, iteration: 38161
loss: 1.0146557092666626,grad_norm: 0.9048765974574239, iteration: 38162
loss: 0.9779638051986694,grad_norm: 0.9999991650106405, iteration: 38163
loss: 1.0216275453567505,grad_norm: 0.9999990236292953, iteration: 38164
loss: 1.0264497995376587,grad_norm: 0.9999990511086652, iteration: 38165
loss: 1.0291876792907715,grad_norm: 0.9999992097930057, iteration: 38166
loss: 1.0144929885864258,grad_norm: 0.9999990680616563, iteration: 38167
loss: 1.0114868879318237,grad_norm: 0.9999991346589749, iteration: 38168
loss: 1.0039970874786377,grad_norm: 0.9999992007274346, iteration: 38169
loss: 0.9600445032119751,grad_norm: 0.9434461183335255, iteration: 38170
loss: 1.0107815265655518,grad_norm: 0.920027468639794, iteration: 38171
loss: 1.0470565557479858,grad_norm: 0.9999990138636554, iteration: 38172
loss: 1.0765734910964966,grad_norm: 0.9999996117960603, iteration: 38173
loss: 0.9710308909416199,grad_norm: 0.9275264903955215, iteration: 38174
loss: 1.0330498218536377,grad_norm: 0.8867314993559987, iteration: 38175
loss: 0.9886966347694397,grad_norm: 0.9699682974892275, iteration: 38176
loss: 0.9510673880577087,grad_norm: 0.9014057966003478, iteration: 38177
loss: 1.0309925079345703,grad_norm: 0.7518725379600514, iteration: 38178
loss: 0.9721242785453796,grad_norm: 0.9923669004821596, iteration: 38179
loss: 1.0223333835601807,grad_norm: 0.9999995966638975, iteration: 38180
loss: 0.9672984480857849,grad_norm: 0.9828069799888572, iteration: 38181
loss: 1.1009442806243896,grad_norm: 0.999999427993072, iteration: 38182
loss: 0.9858536124229431,grad_norm: 0.9996956389303703, iteration: 38183
loss: 0.9941238164901733,grad_norm: 0.9382023404654444, iteration: 38184
loss: 0.9910954833030701,grad_norm: 0.9113454720015831, iteration: 38185
loss: 1.0472867488861084,grad_norm: 0.9999992977109878, iteration: 38186
loss: 0.9846521019935608,grad_norm: 0.9999990615136591, iteration: 38187
loss: 1.0472583770751953,grad_norm: 0.9999991731938941, iteration: 38188
loss: 0.9863035678863525,grad_norm: 0.9642200154340343, iteration: 38189
loss: 0.9941734075546265,grad_norm: 0.9999992427734808, iteration: 38190
loss: 0.9958417415618896,grad_norm: 0.9737963023313613, iteration: 38191
loss: 1.0219417810440063,grad_norm: 0.9999999523121932, iteration: 38192
loss: 0.9883397817611694,grad_norm: 0.9999991357202017, iteration: 38193
loss: 1.009372353553772,grad_norm: 0.8024561394870608, iteration: 38194
loss: 0.9988774657249451,grad_norm: 0.9999989946005744, iteration: 38195
loss: 1.0144094228744507,grad_norm: 0.9999990239830592, iteration: 38196
loss: 1.0163220167160034,grad_norm: 0.9076528126033192, iteration: 38197
loss: 1.0183316469192505,grad_norm: 0.9999993166432897, iteration: 38198
loss: 0.9636095762252808,grad_norm: 0.9999990801920943, iteration: 38199
loss: 0.9737516045570374,grad_norm: 0.8857392585805284, iteration: 38200
loss: 1.0295881032943726,grad_norm: 0.9607545454496659, iteration: 38201
loss: 0.9954152703285217,grad_norm: 0.9999991466275461, iteration: 38202
loss: 1.0028843879699707,grad_norm: 0.8247924037503285, iteration: 38203
loss: 1.0074418783187866,grad_norm: 0.9999991343054292, iteration: 38204
loss: 1.0432393550872803,grad_norm: 0.9999996547720198, iteration: 38205
loss: 0.9613261222839355,grad_norm: 0.9394035055346414, iteration: 38206
loss: 1.036948800086975,grad_norm: 0.8134679494019376, iteration: 38207
loss: 0.9539852142333984,grad_norm: 0.8490776091061947, iteration: 38208
loss: 1.0123422145843506,grad_norm: 0.9999991922297856, iteration: 38209
loss: 1.012635350227356,grad_norm: 0.9999991027250997, iteration: 38210
loss: 1.0447943210601807,grad_norm: 0.9999996737225315, iteration: 38211
loss: 0.9818845987319946,grad_norm: 0.8421871904718866, iteration: 38212
loss: 1.0080764293670654,grad_norm: 0.9814820078581309, iteration: 38213
loss: 0.9791035056114197,grad_norm: 0.8674359405391452, iteration: 38214
loss: 1.038650631904602,grad_norm: 0.8603719273723279, iteration: 38215
loss: 0.9684813022613525,grad_norm: 0.8217497889883845, iteration: 38216
loss: 0.9834035038948059,grad_norm: 0.9619501841930282, iteration: 38217
loss: 1.0000652074813843,grad_norm: 0.7865626651492769, iteration: 38218
loss: 1.0500677824020386,grad_norm: 0.9999991705167214, iteration: 38219
loss: 1.0050429105758667,grad_norm: 0.9999991327556429, iteration: 38220
loss: 1.0108380317687988,grad_norm: 0.9999992084999862, iteration: 38221
loss: 1.005643367767334,grad_norm: 0.9999992730746974, iteration: 38222
loss: 1.0100975036621094,grad_norm: 0.9999990399225133, iteration: 38223
loss: 1.0026484727859497,grad_norm: 0.9999991331869437, iteration: 38224
loss: 1.002485752105713,grad_norm: 0.8500755181763492, iteration: 38225
loss: 1.043765664100647,grad_norm: 0.9999989578494555, iteration: 38226
loss: 0.9954108595848083,grad_norm: 0.7568862950012212, iteration: 38227
loss: 0.9803560376167297,grad_norm: 0.9635713245423536, iteration: 38228
loss: 0.9906772375106812,grad_norm: 0.9502759820806213, iteration: 38229
loss: 0.9816112518310547,grad_norm: 0.8931704493136389, iteration: 38230
loss: 1.025839924812317,grad_norm: 0.838148058471874, iteration: 38231
loss: 1.0189158916473389,grad_norm: 0.9642173276912543, iteration: 38232
loss: 1.0261096954345703,grad_norm: 0.9603584020649392, iteration: 38233
loss: 1.0167263746261597,grad_norm: 0.8424453305078118, iteration: 38234
loss: 1.0333327054977417,grad_norm: 0.9465695612561866, iteration: 38235
loss: 1.005207896232605,grad_norm: 0.8323893108783805, iteration: 38236
loss: 1.0364720821380615,grad_norm: 0.9694955910615446, iteration: 38237
loss: 1.0108400583267212,grad_norm: 0.960878157386166, iteration: 38238
loss: 0.9969671368598938,grad_norm: 0.8523831677306815, iteration: 38239
loss: 1.0165259838104248,grad_norm: 0.999998981065394, iteration: 38240
loss: 1.02829909324646,grad_norm: 0.931560465572099, iteration: 38241
loss: 1.0035406351089478,grad_norm: 0.721373586991212, iteration: 38242
loss: 1.000235676765442,grad_norm: 0.9999994315356552, iteration: 38243
loss: 1.0457724332809448,grad_norm: 0.9999990763707473, iteration: 38244
loss: 1.0253373384475708,grad_norm: 0.999999142801513, iteration: 38245
loss: 0.9659850001335144,grad_norm: 0.8979731004474493, iteration: 38246
loss: 1.0345494747161865,grad_norm: 0.8946487452280145, iteration: 38247
loss: 1.0231643915176392,grad_norm: 0.9965136591214445, iteration: 38248
loss: 1.0021235942840576,grad_norm: 0.999999153832215, iteration: 38249
loss: 1.0098270177841187,grad_norm: 0.9999990893119507, iteration: 38250
loss: 1.024349570274353,grad_norm: 0.9235687788779796, iteration: 38251
loss: 0.9980711936950684,grad_norm: 0.9498461644539165, iteration: 38252
loss: 1.00515878200531,grad_norm: 0.9999990871599944, iteration: 38253
loss: 1.009907603263855,grad_norm: 0.999999216913795, iteration: 38254
loss: 1.0097156763076782,grad_norm: 0.9999992642128005, iteration: 38255
loss: 1.0172253847122192,grad_norm: 0.8989034268725237, iteration: 38256
loss: 0.9950955510139465,grad_norm: 0.9999990461968111, iteration: 38257
loss: 1.0428855419158936,grad_norm: 0.9999995575516574, iteration: 38258
loss: 1.0049587488174438,grad_norm: 0.9234869661876935, iteration: 38259
loss: 1.0157508850097656,grad_norm: 0.9999991197244381, iteration: 38260
loss: 1.008817434310913,grad_norm: 0.9697323629286371, iteration: 38261
loss: 1.003620982170105,grad_norm: 0.9999991306982138, iteration: 38262
loss: 0.9903122186660767,grad_norm: 0.9999992296335316, iteration: 38263
loss: 0.9648654460906982,grad_norm: 0.9002320534188478, iteration: 38264
loss: 1.0115447044372559,grad_norm: 0.738026243623135, iteration: 38265
loss: 1.0307369232177734,grad_norm: 0.9999991264419051, iteration: 38266
loss: 0.9927429556846619,grad_norm: 0.8913015873299385, iteration: 38267
loss: 1.0190349817276,grad_norm: 0.9999997470020647, iteration: 38268
loss: 1.043624758720398,grad_norm: 0.9999990768241782, iteration: 38269
loss: 1.0037331581115723,grad_norm: 0.8265444808607213, iteration: 38270
loss: 1.0199657678604126,grad_norm: 0.8393404097488575, iteration: 38271
loss: 1.011939525604248,grad_norm: 0.8509473894216675, iteration: 38272
loss: 1.013370156288147,grad_norm: 0.9999991118929432, iteration: 38273
loss: 1.038554072380066,grad_norm: 0.9999992944841232, iteration: 38274
loss: 1.009513020515442,grad_norm: 0.844367611741026, iteration: 38275
loss: 1.0527198314666748,grad_norm: 0.9999991168246596, iteration: 38276
loss: 0.9942274689674377,grad_norm: 0.9876934358784433, iteration: 38277
loss: 1.0253562927246094,grad_norm: 0.8044136763491911, iteration: 38278
loss: 1.137561321258545,grad_norm: 0.9999997337837198, iteration: 38279
loss: 0.9901682734489441,grad_norm: 0.7880762921267108, iteration: 38280
loss: 1.0114933252334595,grad_norm: 0.999999194173642, iteration: 38281
loss: 1.0150614976882935,grad_norm: 0.877237053616447, iteration: 38282
loss: 1.0049827098846436,grad_norm: 0.9999991651900951, iteration: 38283
loss: 1.0527377128601074,grad_norm: 0.9222030688261506, iteration: 38284
loss: 1.0568243265151978,grad_norm: 0.9999990961386916, iteration: 38285
loss: 1.013823390007019,grad_norm: 0.9999990426326013, iteration: 38286
loss: 0.9824968576431274,grad_norm: 0.9999991274124189, iteration: 38287
loss: 1.0196871757507324,grad_norm: 0.7863099489589332, iteration: 38288
loss: 1.023577332496643,grad_norm: 0.9999998805193576, iteration: 38289
loss: 0.9582421183586121,grad_norm: 0.981453384760094, iteration: 38290
loss: 0.9934977889060974,grad_norm: 0.9999991068844772, iteration: 38291
loss: 0.9731395840644836,grad_norm: 0.9999991610003623, iteration: 38292
loss: 1.023362159729004,grad_norm: 0.8329536963326832, iteration: 38293
loss: 1.035510540008545,grad_norm: 0.9692917899668972, iteration: 38294
loss: 1.0335438251495361,grad_norm: 0.9999990603757902, iteration: 38295
loss: 1.0024957656860352,grad_norm: 0.9999991738775449, iteration: 38296
loss: 0.992049515247345,grad_norm: 0.9918957880894989, iteration: 38297
loss: 1.0276098251342773,grad_norm: 0.9999996428923356, iteration: 38298
loss: 1.0442005395889282,grad_norm: 0.9999992828422261, iteration: 38299
loss: 1.0379637479782104,grad_norm: 0.9999992909600254, iteration: 38300
loss: 0.9898388385772705,grad_norm: 0.837923383297512, iteration: 38301
loss: 1.0156519412994385,grad_norm: 0.98510298804928, iteration: 38302
loss: 1.0172827243804932,grad_norm: 0.9999996838150103, iteration: 38303
loss: 0.9839813113212585,grad_norm: 0.7289565645134457, iteration: 38304
loss: 1.0072675943374634,grad_norm: 0.9682660859595685, iteration: 38305
loss: 1.0134035348892212,grad_norm: 0.9371671153042364, iteration: 38306
loss: 1.0068079233169556,grad_norm: 0.9999993651949678, iteration: 38307
loss: 1.009099006652832,grad_norm: 0.76182140868313, iteration: 38308
loss: 0.9962805509567261,grad_norm: 0.9318041791983286, iteration: 38309
loss: 1.0083776712417603,grad_norm: 0.9999997317274417, iteration: 38310
loss: 1.0017162561416626,grad_norm: 0.7499646492980854, iteration: 38311
loss: 0.980759859085083,grad_norm: 0.9999993444917593, iteration: 38312
loss: 1.021891713142395,grad_norm: 0.9430517661426419, iteration: 38313
loss: 1.0068632364273071,grad_norm: 0.9317548212566663, iteration: 38314
loss: 1.029808759689331,grad_norm: 0.9999991716878586, iteration: 38315
loss: 1.0250661373138428,grad_norm: 0.9999996601515265, iteration: 38316
loss: 1.0259299278259277,grad_norm: 0.9484875007356185, iteration: 38317
loss: 0.9971494078636169,grad_norm: 0.972521672437347, iteration: 38318
loss: 1.0024281740188599,grad_norm: 0.9172879856330153, iteration: 38319
loss: 0.9993920922279358,grad_norm: 0.9999992602389567, iteration: 38320
loss: 1.0236005783081055,grad_norm: 0.999999095979923, iteration: 38321
loss: 1.0253130197525024,grad_norm: 0.9999990497518255, iteration: 38322
loss: 1.0079032182693481,grad_norm: 0.9840869504422316, iteration: 38323
loss: 1.0306466817855835,grad_norm: 0.9999994990490042, iteration: 38324
loss: 1.0103870630264282,grad_norm: 0.9999991221047958, iteration: 38325
loss: 1.0110092163085938,grad_norm: 0.999999097858095, iteration: 38326
loss: 0.9851818084716797,grad_norm: 0.7499044316288177, iteration: 38327
loss: 1.0466011762619019,grad_norm: 0.9064236211019273, iteration: 38328
loss: 0.974173367023468,grad_norm: 0.936590975787369, iteration: 38329
loss: 1.0054374933242798,grad_norm: 0.9999990501635595, iteration: 38330
loss: 1.0126113891601562,grad_norm: 0.999999110351453, iteration: 38331
loss: 0.9922035932540894,grad_norm: 0.8950857360188142, iteration: 38332
loss: 1.0047597885131836,grad_norm: 0.9221936252415542, iteration: 38333
loss: 0.9970778822898865,grad_norm: 0.992032065358005, iteration: 38334
loss: 0.979631781578064,grad_norm: 0.9999990512629375, iteration: 38335
loss: 1.0378849506378174,grad_norm: 0.9999993251169872, iteration: 38336
loss: 1.0224491357803345,grad_norm: 0.9999990086335052, iteration: 38337
loss: 0.9823488593101501,grad_norm: 0.9999990764976722, iteration: 38338
loss: 0.9425179362297058,grad_norm: 0.9999991158284851, iteration: 38339
loss: 0.9757670760154724,grad_norm: 0.9999991242014596, iteration: 38340
loss: 0.9687886238098145,grad_norm: 0.8013314411137131, iteration: 38341
loss: 1.0195159912109375,grad_norm: 0.8747096950938965, iteration: 38342
loss: 1.0173484086990356,grad_norm: 0.9267608459383345, iteration: 38343
loss: 0.9637672305107117,grad_norm: 0.99999911135495, iteration: 38344
loss: 1.0336507558822632,grad_norm: 0.9999993321619343, iteration: 38345
loss: 1.005510926246643,grad_norm: 0.9999991114948602, iteration: 38346
loss: 1.0360668897628784,grad_norm: 0.9853293421945178, iteration: 38347
loss: 0.9720736145973206,grad_norm: 0.8397953520489204, iteration: 38348
loss: 1.032036304473877,grad_norm: 0.8873527926153588, iteration: 38349
loss: 1.0589178800582886,grad_norm: 0.9999997269510331, iteration: 38350
loss: 1.011999249458313,grad_norm: 0.9999996655372998, iteration: 38351
loss: 1.073816180229187,grad_norm: 0.999999932160345, iteration: 38352
loss: 1.0065207481384277,grad_norm: 0.999999424242814, iteration: 38353
loss: 1.0684690475463867,grad_norm: 0.9999993760463082, iteration: 38354
loss: 0.9929020404815674,grad_norm: 0.99999933731713, iteration: 38355
loss: 0.9872713685035706,grad_norm: 0.9999994744825559, iteration: 38356
loss: 1.0558902025222778,grad_norm: 0.9999995960097721, iteration: 38357
loss: 1.3155368566513062,grad_norm: 0.9999997999856755, iteration: 38358
loss: 1.0379612445831299,grad_norm: 0.997565020206073, iteration: 38359
loss: 1.0030021667480469,grad_norm: 0.9399577863731999, iteration: 38360
loss: 0.9846293926239014,grad_norm: 0.9832677372473247, iteration: 38361
loss: 1.0049227476119995,grad_norm: 0.894339290328424, iteration: 38362
loss: 0.9778468608856201,grad_norm: 0.9999991619419121, iteration: 38363
loss: 1.0244206190109253,grad_norm: 0.9999997235717609, iteration: 38364
loss: 1.000878930091858,grad_norm: 0.9999990762704082, iteration: 38365
loss: 1.0849964618682861,grad_norm: 0.9999998134273786, iteration: 38366
loss: 0.9854433536529541,grad_norm: 0.8419862165619825, iteration: 38367
loss: 1.0096309185028076,grad_norm: 0.9999990647081357, iteration: 38368
loss: 1.0099916458129883,grad_norm: 0.9999990469791847, iteration: 38369
loss: 1.0776594877243042,grad_norm: 0.9999996018874485, iteration: 38370
loss: 1.0253515243530273,grad_norm: 0.9980982914582746, iteration: 38371
loss: 1.033320665359497,grad_norm: 0.9999992275144349, iteration: 38372
loss: 1.0257692337036133,grad_norm: 0.8569524808826868, iteration: 38373
loss: 0.9848756194114685,grad_norm: 0.9031200049896508, iteration: 38374
loss: 1.0715478658676147,grad_norm: 0.9999996606344934, iteration: 38375
loss: 1.0104795694351196,grad_norm: 0.93455982401457, iteration: 38376
loss: 1.0067520141601562,grad_norm: 0.9999990943695518, iteration: 38377
loss: 1.0127934217453003,grad_norm: 0.7920971872008202, iteration: 38378
loss: 1.0000578165054321,grad_norm: 0.9999993422050106, iteration: 38379
loss: 1.0167386531829834,grad_norm: 0.9999990525472013, iteration: 38380
loss: 1.0007603168487549,grad_norm: 0.9999990919010311, iteration: 38381
loss: 1.0101490020751953,grad_norm: 0.9999990739672332, iteration: 38382
loss: 1.0262514352798462,grad_norm: 0.9999990118021526, iteration: 38383
loss: 0.9855591654777527,grad_norm: 0.8529706003575808, iteration: 38384
loss: 1.0181372165679932,grad_norm: 0.8616533142720537, iteration: 38385
loss: 1.0723520517349243,grad_norm: 0.9999996161790015, iteration: 38386
loss: 0.9768140912055969,grad_norm: 0.9688712809813855, iteration: 38387
loss: 1.0732005834579468,grad_norm: 0.9999992231402363, iteration: 38388
loss: 0.9905444383621216,grad_norm: 0.9999992598200647, iteration: 38389
loss: 1.0670411586761475,grad_norm: 0.9999992338403074, iteration: 38390
loss: 0.9856621623039246,grad_norm: 0.9999989486420422, iteration: 38391
loss: 1.037244439125061,grad_norm: 0.9256825214644, iteration: 38392
loss: 1.0056841373443604,grad_norm: 0.999999144004481, iteration: 38393
loss: 0.981330156326294,grad_norm: 0.99999914412214, iteration: 38394
loss: 1.0100423097610474,grad_norm: 0.9442967534400019, iteration: 38395
loss: 0.9781774878501892,grad_norm: 0.9999990678969404, iteration: 38396
loss: 0.9859339594841003,grad_norm: 0.9939122378079387, iteration: 38397
loss: 1.0379599332809448,grad_norm: 0.99999964695621, iteration: 38398
loss: 0.9761055111885071,grad_norm: 0.9897860062528181, iteration: 38399
loss: 1.0674400329589844,grad_norm: 0.9999998237982503, iteration: 38400
loss: 1.0315254926681519,grad_norm: 0.999999283096135, iteration: 38401
loss: 1.0429288148880005,grad_norm: 0.9999996689665103, iteration: 38402
loss: 1.0297143459320068,grad_norm: 0.9271945142186011, iteration: 38403
loss: 0.9907746911048889,grad_norm: 0.9999990901251224, iteration: 38404
loss: 1.0292118787765503,grad_norm: 0.999999355749301, iteration: 38405
loss: 1.0668585300445557,grad_norm: 0.9999993544819549, iteration: 38406
loss: 1.0633010864257812,grad_norm: 0.9999998500394629, iteration: 38407
loss: 1.0577672719955444,grad_norm: 0.9999993551079654, iteration: 38408
loss: 1.002707839012146,grad_norm: 0.9999998006988527, iteration: 38409
loss: 0.9828658699989319,grad_norm: 0.9149275249087258, iteration: 38410
loss: 0.9792173504829407,grad_norm: 0.9780664528105469, iteration: 38411
loss: 0.9807273745536804,grad_norm: 0.9999992266267878, iteration: 38412
loss: 0.9965400099754333,grad_norm: 0.9988880734245902, iteration: 38413
loss: 0.9442359805107117,grad_norm: 0.9736226515085754, iteration: 38414
loss: 1.005280613899231,grad_norm: 0.9667767388002003, iteration: 38415
loss: 0.9741272330284119,grad_norm: 0.9999991479648984, iteration: 38416
loss: 0.9733103513717651,grad_norm: 0.9689224213353167, iteration: 38417
loss: 0.9714479446411133,grad_norm: 0.9787794294068838, iteration: 38418
loss: 1.0233843326568604,grad_norm: 0.9999991388409373, iteration: 38419
loss: 0.9719673991203308,grad_norm: 0.9999992889831167, iteration: 38420
loss: 1.0225434303283691,grad_norm: 0.9999989703911681, iteration: 38421
loss: 1.0271098613739014,grad_norm: 0.9999991017115856, iteration: 38422
loss: 0.9986169934272766,grad_norm: 0.9999991734199818, iteration: 38423
loss: 1.0020040273666382,grad_norm: 0.9999992795733667, iteration: 38424
loss: 1.071227788925171,grad_norm: 0.9999999573603405, iteration: 38425
loss: 0.970114529132843,grad_norm: 0.9329786100393821, iteration: 38426
loss: 1.0153244733810425,grad_norm: 0.9999991806351031, iteration: 38427
loss: 1.0343412160873413,grad_norm: 0.7999151265242659, iteration: 38428
loss: 1.0181037187576294,grad_norm: 0.7947700624198737, iteration: 38429
loss: 1.0261967182159424,grad_norm: 0.9225639934959873, iteration: 38430
loss: 1.0049277544021606,grad_norm: 0.9999992053971263, iteration: 38431
loss: 1.0292699337005615,grad_norm: 0.99999920273208, iteration: 38432
loss: 0.9924252033233643,grad_norm: 0.9999998332845743, iteration: 38433
loss: 1.0572173595428467,grad_norm: 0.9433829931371979, iteration: 38434
loss: 1.0415704250335693,grad_norm: 0.7630350658784542, iteration: 38435
loss: 0.9983363151550293,grad_norm: 0.9243037360884339, iteration: 38436
loss: 1.039589524269104,grad_norm: 0.9470656797806388, iteration: 38437
loss: 1.0237295627593994,grad_norm: 0.9999990882409556, iteration: 38438
loss: 1.0409399271011353,grad_norm: 0.9945757112166014, iteration: 38439
loss: 1.0752609968185425,grad_norm: 0.9999997766147178, iteration: 38440
loss: 0.9750566482543945,grad_norm: 0.9999991412263801, iteration: 38441
loss: 1.061006784439087,grad_norm: 0.9999995447304423, iteration: 38442
loss: 0.9596889615058899,grad_norm: 0.9575135072752893, iteration: 38443
loss: 1.0467486381530762,grad_norm: 0.9457143401374627, iteration: 38444
loss: 0.97119140625,grad_norm: 0.9999991478825407, iteration: 38445
loss: 0.9741232395172119,grad_norm: 0.9999991496558627, iteration: 38446
loss: 0.9951727390289307,grad_norm: 0.9889484243371014, iteration: 38447
loss: 1.0535478591918945,grad_norm: 0.9999994982771484, iteration: 38448
loss: 1.140849232673645,grad_norm: 0.9999997405653906, iteration: 38449
loss: 1.0120270252227783,grad_norm: 0.9999992421211792, iteration: 38450
loss: 0.982325553894043,grad_norm: 0.889829538040533, iteration: 38451
loss: 1.0509943962097168,grad_norm: 0.9999991347069382, iteration: 38452
loss: 0.9990989565849304,grad_norm: 0.9999994127738125, iteration: 38453
loss: 0.9979267120361328,grad_norm: 0.9598497892113792, iteration: 38454
loss: 1.000669002532959,grad_norm: 0.9307361962940688, iteration: 38455
loss: 1.0664336681365967,grad_norm: 0.9999993884318983, iteration: 38456
loss: 1.021742582321167,grad_norm: 0.824667208186624, iteration: 38457
loss: 1.0037375688552856,grad_norm: 0.9999993988975996, iteration: 38458
loss: 1.0376089811325073,grad_norm: 0.9999990664712549, iteration: 38459
loss: 0.958014726638794,grad_norm: 0.9452377157289513, iteration: 38460
loss: 1.0326634645462036,grad_norm: 0.9999991849066897, iteration: 38461
loss: 0.9970685839653015,grad_norm: 0.8824437503711535, iteration: 38462
loss: 0.990883469581604,grad_norm: 0.9999991177537954, iteration: 38463
loss: 1.0330320596694946,grad_norm: 0.9999995317509529, iteration: 38464
loss: 1.047550082206726,grad_norm: 0.9999998649448318, iteration: 38465
loss: 1.0675679445266724,grad_norm: 0.9999993413653768, iteration: 38466
loss: 1.0152873992919922,grad_norm: 0.999999045285245, iteration: 38467
loss: 1.0613460540771484,grad_norm: 0.9999994803582916, iteration: 38468
loss: 1.014664649963379,grad_norm: 0.9999990731599191, iteration: 38469
loss: 1.0639008283615112,grad_norm: 0.9999998015266004, iteration: 38470
loss: 1.0091477632522583,grad_norm: 0.9459975474026148, iteration: 38471
loss: 0.9954047799110413,grad_norm: 0.9999990879455358, iteration: 38472
loss: 0.9821907877922058,grad_norm: 0.9999990352331123, iteration: 38473
loss: 0.9788253307342529,grad_norm: 0.8945253980063212, iteration: 38474
loss: 0.9910643696784973,grad_norm: 0.7883616577357989, iteration: 38475
loss: 1.0193284749984741,grad_norm: 0.9999991710295434, iteration: 38476
loss: 0.9737838506698608,grad_norm: 0.9999992598836438, iteration: 38477
loss: 1.0333213806152344,grad_norm: 0.9999990028462997, iteration: 38478
loss: 0.9825882315635681,grad_norm: 0.9157165832476069, iteration: 38479
loss: 1.0135934352874756,grad_norm: 0.9999990321153485, iteration: 38480
loss: 1.0053378343582153,grad_norm: 0.8608674323693907, iteration: 38481
loss: 1.0414859056472778,grad_norm: 0.9999990994233329, iteration: 38482
loss: 1.047389268875122,grad_norm: 0.9999992665013153, iteration: 38483
loss: 1.0336545705795288,grad_norm: 0.999999226747019, iteration: 38484
loss: 1.0258736610412598,grad_norm: 0.9999991185038394, iteration: 38485
loss: 1.0115963220596313,grad_norm: 0.999999154823203, iteration: 38486
loss: 1.0336246490478516,grad_norm: 0.9999998497097246, iteration: 38487
loss: 0.9983250498771667,grad_norm: 0.9999995322626568, iteration: 38488
loss: 1.026705026626587,grad_norm: 0.9999991188420401, iteration: 38489
loss: 1.029259443283081,grad_norm: 0.9999990270022173, iteration: 38490
loss: 1.0313130617141724,grad_norm: 0.9999992793712341, iteration: 38491
loss: 1.02353036403656,grad_norm: 0.9580042979163674, iteration: 38492
loss: 0.970217227935791,grad_norm: 0.9999990766742777, iteration: 38493
loss: 1.0341989994049072,grad_norm: 0.9999994618754731, iteration: 38494
loss: 0.9963863492012024,grad_norm: 0.9999990958909311, iteration: 38495
loss: 1.0405842065811157,grad_norm: 0.9971578279640809, iteration: 38496
loss: 0.9928745627403259,grad_norm: 0.9999992828199572, iteration: 38497
loss: 1.0555037260055542,grad_norm: 0.9999993758572724, iteration: 38498
loss: 0.9981217384338379,grad_norm: 0.7395733427473896, iteration: 38499
loss: 1.0326740741729736,grad_norm: 0.9999993377201699, iteration: 38500
loss: 1.012026309967041,grad_norm: 0.7900448437366051, iteration: 38501
loss: 1.0775227546691895,grad_norm: 0.9999994040782126, iteration: 38502
loss: 1.0070619583129883,grad_norm: 0.9999991065214207, iteration: 38503
loss: 1.026310920715332,grad_norm: 0.9999992376316531, iteration: 38504
loss: 0.9895002245903015,grad_norm: 0.9049482821178687, iteration: 38505
loss: 1.0034723281860352,grad_norm: 0.9999991606773161, iteration: 38506
loss: 0.9807984828948975,grad_norm: 0.9999991624420189, iteration: 38507
loss: 1.0448634624481201,grad_norm: 0.9999994657558449, iteration: 38508
loss: 0.9735531210899353,grad_norm: 0.9999999734492312, iteration: 38509
loss: 1.071657657623291,grad_norm: 0.9999997481812216, iteration: 38510
loss: 1.0347994565963745,grad_norm: 0.999999577816445, iteration: 38511
loss: 1.0183781385421753,grad_norm: 0.9999998211256862, iteration: 38512
loss: 1.0479427576065063,grad_norm: 0.9999995996825471, iteration: 38513
loss: 1.0087203979492188,grad_norm: 0.9482616072329343, iteration: 38514
loss: 1.0395921468734741,grad_norm: 0.9999996873192737, iteration: 38515
loss: 0.9638018608093262,grad_norm: 0.8208048605515007, iteration: 38516
loss: 1.0302588939666748,grad_norm: 0.8155183407580938, iteration: 38517
loss: 1.0056716203689575,grad_norm: 0.9071758617433369, iteration: 38518
loss: 1.0704221725463867,grad_norm: 0.9999997171701336, iteration: 38519
loss: 1.0292171239852905,grad_norm: 0.9999991911764842, iteration: 38520
loss: 1.04347562789917,grad_norm: 0.999999124424415, iteration: 38521
loss: 0.985478401184082,grad_norm: 0.9999993545362361, iteration: 38522
loss: 1.0220556259155273,grad_norm: 0.9453188531071576, iteration: 38523
loss: 1.0056475400924683,grad_norm: 0.9999996083313599, iteration: 38524
loss: 0.9903618097305298,grad_norm: 0.9999998930335287, iteration: 38525
loss: 1.0586720705032349,grad_norm: 0.999999164136694, iteration: 38526
loss: 1.012823462486267,grad_norm: 0.9158894494120104, iteration: 38527
loss: 1.0231989622116089,grad_norm: 0.973727676900114, iteration: 38528
loss: 1.0435394048690796,grad_norm: 0.9999994158168917, iteration: 38529
loss: 1.0139364004135132,grad_norm: 0.897079775706564, iteration: 38530
loss: 0.9547878503799438,grad_norm: 0.9808876232420083, iteration: 38531
loss: 1.0475929975509644,grad_norm: 0.9999994626486791, iteration: 38532
loss: 1.0034176111221313,grad_norm: 0.9999991212221878, iteration: 38533
loss: 1.023794174194336,grad_norm: 0.9999991617798213, iteration: 38534
loss: 0.9867461919784546,grad_norm: 0.8486656695494085, iteration: 38535
loss: 1.0188961029052734,grad_norm: 0.9999991725324363, iteration: 38536
loss: 1.0418145656585693,grad_norm: 0.9999996410404056, iteration: 38537
loss: 1.0317952632904053,grad_norm: 0.9999990859386132, iteration: 38538
loss: 1.0278441905975342,grad_norm: 0.997929651314749, iteration: 38539
loss: 0.9989306926727295,grad_norm: 0.999999324092856, iteration: 38540
loss: 1.015375018119812,grad_norm: 0.7497875506075693, iteration: 38541
loss: 0.9713983535766602,grad_norm: 0.9449850121761785, iteration: 38542
loss: 1.0140568017959595,grad_norm: 0.9999994485751265, iteration: 38543
loss: 1.0253849029541016,grad_norm: 0.9283634014558259, iteration: 38544
loss: 1.0168793201446533,grad_norm: 0.9999993656439914, iteration: 38545
loss: 1.0386019945144653,grad_norm: 0.9454519582812825, iteration: 38546
loss: 1.0203909873962402,grad_norm: 0.9999996352415792, iteration: 38547
loss: 1.0507009029388428,grad_norm: 0.9999999317167869, iteration: 38548
loss: 1.0791895389556885,grad_norm: 0.999999225048071, iteration: 38549
loss: 1.0203896760940552,grad_norm: 0.9999996136232333, iteration: 38550
loss: 0.9780546426773071,grad_norm: 0.914106923669596, iteration: 38551
loss: 0.9786805510520935,grad_norm: 0.9999994546597099, iteration: 38552
loss: 0.9970378279685974,grad_norm: 0.999999130337067, iteration: 38553
loss: 0.9546266794204712,grad_norm: 0.9362536475530991, iteration: 38554
loss: 0.9986516237258911,grad_norm: 0.9999990344475552, iteration: 38555
loss: 0.9964028596878052,grad_norm: 0.9999991204490684, iteration: 38556
loss: 1.172209620475769,grad_norm: 0.999999630021876, iteration: 38557
loss: 1.0570104122161865,grad_norm: 0.9999990483117416, iteration: 38558
loss: 1.0618236064910889,grad_norm: 0.9999995888300902, iteration: 38559
loss: 1.0150998830795288,grad_norm: 0.9999991256606293, iteration: 38560
loss: 1.0751439332962036,grad_norm: 0.9999998970810716, iteration: 38561
loss: 1.0054190158843994,grad_norm: 0.8037700122867174, iteration: 38562
loss: 1.1047375202178955,grad_norm: 0.9999998425680825, iteration: 38563
loss: 1.0609376430511475,grad_norm: 0.9999991386221831, iteration: 38564
loss: 0.9915130138397217,grad_norm: 0.9999992890237724, iteration: 38565
loss: 0.9923718571662903,grad_norm: 0.999999271308039, iteration: 38566
loss: 1.0002304315567017,grad_norm: 0.9999993852543014, iteration: 38567
loss: 1.0155049562454224,grad_norm: 0.999999365875609, iteration: 38568
loss: 1.0084254741668701,grad_norm: 0.8886573775118544, iteration: 38569
loss: 1.0217525959014893,grad_norm: 0.903102545212418, iteration: 38570
loss: 0.9909286499023438,grad_norm: 0.9029751688931748, iteration: 38571
loss: 0.9732427000999451,grad_norm: 0.9999991563404831, iteration: 38572
loss: 1.004624366760254,grad_norm: 0.9999992506134211, iteration: 38573
loss: 0.9885315895080566,grad_norm: 0.9999991789677979, iteration: 38574
loss: 1.0275745391845703,grad_norm: 0.9061466219728017, iteration: 38575
loss: 1.0173614025115967,grad_norm: 0.8962648626207279, iteration: 38576
loss: 1.034598708152771,grad_norm: 0.9999990767918467, iteration: 38577
loss: 0.9880122542381287,grad_norm: 0.9893088085489217, iteration: 38578
loss: 1.0179201364517212,grad_norm: 0.9999993159085654, iteration: 38579
loss: 0.9703342318534851,grad_norm: 0.8347290528641681, iteration: 38580
loss: 1.1008368730545044,grad_norm: 0.9597901175215057, iteration: 38581
loss: 1.044916033744812,grad_norm: 0.9999997978890436, iteration: 38582
loss: 1.0937563180923462,grad_norm: 0.999999355829784, iteration: 38583
loss: 0.9686559438705444,grad_norm: 0.9702129354332563, iteration: 38584
loss: 0.9857510924339294,grad_norm: 0.9999990755638425, iteration: 38585
loss: 1.0981016159057617,grad_norm: 0.9999997279731279, iteration: 38586
loss: 1.1121338605880737,grad_norm: 0.9999999573955273, iteration: 38587
loss: 0.9791966676712036,grad_norm: 0.9118350858660187, iteration: 38588
loss: 1.013379454612732,grad_norm: 0.9999991161602902, iteration: 38589
loss: 0.974044144153595,grad_norm: 0.8936718878806504, iteration: 38590
loss: 1.0542495250701904,grad_norm: 0.999999640757361, iteration: 38591
loss: 1.0034087896347046,grad_norm: 0.9999995523123021, iteration: 38592
loss: 1.060030221939087,grad_norm: 0.9999997700084791, iteration: 38593
loss: 0.9682924151420593,grad_norm: 0.9999990893660715, iteration: 38594
loss: 1.0134685039520264,grad_norm: 0.9999991605583084, iteration: 38595
loss: 1.033754587173462,grad_norm: 0.9999992437609925, iteration: 38596
loss: 1.029952883720398,grad_norm: 0.9999992725470976, iteration: 38597
loss: 0.9779671430587769,grad_norm: 0.999998957157612, iteration: 38598
loss: 0.995228111743927,grad_norm: 0.9999994673633673, iteration: 38599
loss: 1.0237269401550293,grad_norm: 0.9213879059761718, iteration: 38600
loss: 0.9597088098526001,grad_norm: 0.9410674801133923, iteration: 38601
loss: 1.047398567199707,grad_norm: 0.9999996327405161, iteration: 38602
loss: 1.0183384418487549,grad_norm: 0.8357125740467196, iteration: 38603
loss: 1.0377895832061768,grad_norm: 0.9999993435520018, iteration: 38604
loss: 1.1332732439041138,grad_norm: 0.9999996826817898, iteration: 38605
loss: 1.0614029169082642,grad_norm: 0.9999990484301107, iteration: 38606
loss: 1.006386160850525,grad_norm: 0.9938618048584822, iteration: 38607
loss: 0.9847835898399353,grad_norm: 0.9999991624524309, iteration: 38608
loss: 1.0239222049713135,grad_norm: 0.9913495933438398, iteration: 38609
loss: 1.0291025638580322,grad_norm: 0.9999993795197815, iteration: 38610
loss: 1.0514265298843384,grad_norm: 0.7655268146887785, iteration: 38611
loss: 1.034155011177063,grad_norm: 0.9999994268222876, iteration: 38612
loss: 0.9891220331192017,grad_norm: 0.9673637894710627, iteration: 38613
loss: 1.017153263092041,grad_norm: 0.9493012767528104, iteration: 38614
loss: 0.9967003464698792,grad_norm: 0.9655624237005517, iteration: 38615
loss: 0.9974303245544434,grad_norm: 0.9895299613508493, iteration: 38616
loss: 0.9610984325408936,grad_norm: 0.9595856377916109, iteration: 38617
loss: 0.9676562547683716,grad_norm: 0.9999989869688073, iteration: 38618
loss: 1.1642746925354004,grad_norm: 1.000000092218772, iteration: 38619
loss: 0.9903388023376465,grad_norm: 0.9999993657855272, iteration: 38620
loss: 0.9987073540687561,grad_norm: 0.8913190360070165, iteration: 38621
loss: 1.042248010635376,grad_norm: 0.9598330926592118, iteration: 38622
loss: 0.9589254856109619,grad_norm: 0.9999991507333914, iteration: 38623
loss: 1.0308765172958374,grad_norm: 0.9767762252375249, iteration: 38624
loss: 1.018583059310913,grad_norm: 0.8776248849654564, iteration: 38625
loss: 0.9873504042625427,grad_norm: 0.8220681173477474, iteration: 38626
loss: 1.0078943967819214,grad_norm: 0.9999993638517344, iteration: 38627
loss: 1.0142940282821655,grad_norm: 0.9999991968302682, iteration: 38628
loss: 0.97104811668396,grad_norm: 0.9999992550490764, iteration: 38629
loss: 0.9925819635391235,grad_norm: 0.8764840584602738, iteration: 38630
loss: 0.970235288143158,grad_norm: 0.9999994468385943, iteration: 38631
loss: 1.0888445377349854,grad_norm: 0.9999993646328695, iteration: 38632
loss: 0.9934468865394592,grad_norm: 0.9999990702879382, iteration: 38633
loss: 1.0500422716140747,grad_norm: 0.9999992786905413, iteration: 38634
loss: 0.9878511428833008,grad_norm: 0.9999996967719661, iteration: 38635
loss: 0.9979909062385559,grad_norm: 0.9572311763262259, iteration: 38636
loss: 0.9978658556938171,grad_norm: 0.9924040922506187, iteration: 38637
loss: 0.9892590045928955,grad_norm: 0.7711604018241741, iteration: 38638
loss: 1.0200389623641968,grad_norm: 0.9074634508054459, iteration: 38639
loss: 1.004915714263916,grad_norm: 0.9865065808014716, iteration: 38640
loss: 0.9727467894554138,grad_norm: 0.9999992181293851, iteration: 38641
loss: 1.0404129028320312,grad_norm: 0.9999999699388737, iteration: 38642
loss: 1.027104139328003,grad_norm: 0.9978148536136615, iteration: 38643
loss: 1.018351674079895,grad_norm: 0.9848807012537747, iteration: 38644
loss: 1.0090982913970947,grad_norm: 0.9999992213386688, iteration: 38645
loss: 1.0855729579925537,grad_norm: 0.999999960619762, iteration: 38646
loss: 1.0368047952651978,grad_norm: 0.8978337875256189, iteration: 38647
loss: 1.0077182054519653,grad_norm: 0.9079392992539297, iteration: 38648
loss: 1.0004984140396118,grad_norm: 0.9999991995480347, iteration: 38649
loss: 1.0399174690246582,grad_norm: 0.9999997112494594, iteration: 38650
loss: 0.9984515905380249,grad_norm: 0.9999990051708081, iteration: 38651
loss: 1.0439962148666382,grad_norm: 0.9999996210498568, iteration: 38652
loss: 1.0193625688552856,grad_norm: 0.9999989343936126, iteration: 38653
loss: 0.9766401648521423,grad_norm: 0.9987794971278887, iteration: 38654
loss: 1.0120593309402466,grad_norm: 0.9700271515321262, iteration: 38655
loss: 1.0031946897506714,grad_norm: 0.9999991152989046, iteration: 38656
loss: 0.9715437889099121,grad_norm: 0.9735013111968526, iteration: 38657
loss: 0.9987221360206604,grad_norm: 0.9447584308946413, iteration: 38658
loss: 1.0149016380310059,grad_norm: 0.8322180575391981, iteration: 38659
loss: 1.2315043210983276,grad_norm: 0.9999996943380899, iteration: 38660
loss: 0.9870443344116211,grad_norm: 0.9343916827782613, iteration: 38661
loss: 1.0239425897598267,grad_norm: 0.97915019705462, iteration: 38662
loss: 1.0042195320129395,grad_norm: 0.8092619408885515, iteration: 38663
loss: 0.9954351782798767,grad_norm: 0.9205331975651501, iteration: 38664
loss: 0.9881995320320129,grad_norm: 0.89319318558091, iteration: 38665
loss: 0.9884378910064697,grad_norm: 0.9999991703639849, iteration: 38666
loss: 0.9863966107368469,grad_norm: 0.9999991590752626, iteration: 38667
loss: 1.0256174802780151,grad_norm: 0.9999992014451745, iteration: 38668
loss: 1.041408896446228,grad_norm: 0.9999992849456525, iteration: 38669
loss: 0.9717326760292053,grad_norm: 0.9309519457940285, iteration: 38670
loss: 0.9754831790924072,grad_norm: 0.9277477722207514, iteration: 38671
loss: 1.069437026977539,grad_norm: 0.9999994077808674, iteration: 38672
loss: 0.994414746761322,grad_norm: 0.9999990711935985, iteration: 38673
loss: 0.9843981862068176,grad_norm: 0.9999992063074459, iteration: 38674
loss: 1.0276801586151123,grad_norm: 0.8357078989216369, iteration: 38675
loss: 1.015464425086975,grad_norm: 0.7970225544992369, iteration: 38676
loss: 1.1766085624694824,grad_norm: 0.9999991470131556, iteration: 38677
loss: 1.0090011358261108,grad_norm: 0.9999991433577406, iteration: 38678
loss: 1.0337411165237427,grad_norm: 0.9999996485945537, iteration: 38679
loss: 1.007938027381897,grad_norm: 0.9344362964223073, iteration: 38680
loss: 1.016633152961731,grad_norm: 0.776170999945918, iteration: 38681
loss: 1.0858254432678223,grad_norm: 0.9999998677697421, iteration: 38682
loss: 1.0082013607025146,grad_norm: 0.9048128745085621, iteration: 38683
loss: 1.0040345191955566,grad_norm: 0.9999991029425775, iteration: 38684
loss: 0.9794330596923828,grad_norm: 0.9999991176872498, iteration: 38685
loss: 1.033293604850769,grad_norm: 0.9999992669224003, iteration: 38686
loss: 0.9801176190376282,grad_norm: 0.9999992881905954, iteration: 38687
loss: 1.0842232704162598,grad_norm: 0.9999997331228012, iteration: 38688
loss: 0.9775219559669495,grad_norm: 0.955320146954471, iteration: 38689
loss: 1.106424331665039,grad_norm: 0.9999990859193914, iteration: 38690
loss: 1.0034971237182617,grad_norm: 0.9999990103644133, iteration: 38691
loss: 0.9881054162979126,grad_norm: 0.9552891862090973, iteration: 38692
loss: 1.0986855030059814,grad_norm: 0.9999995547429641, iteration: 38693
loss: 1.0221350193023682,grad_norm: 0.9319205556649612, iteration: 38694
loss: 1.0061851739883423,grad_norm: 0.9999991103367282, iteration: 38695
loss: 1.0225898027420044,grad_norm: 0.9952921369694644, iteration: 38696
loss: 1.116479754447937,grad_norm: 0.999999712204792, iteration: 38697
loss: 1.0116512775421143,grad_norm: 0.9999991418031801, iteration: 38698
loss: 1.0704818964004517,grad_norm: 0.9999993931938286, iteration: 38699
loss: 1.0282355546951294,grad_norm: 0.9999992531575621, iteration: 38700
loss: 1.02119779586792,grad_norm: 0.9999990723759496, iteration: 38701
loss: 1.0254454612731934,grad_norm: 0.9999994610867833, iteration: 38702
loss: 1.0025527477264404,grad_norm: 0.9999992869525667, iteration: 38703
loss: 1.017496109008789,grad_norm: 0.7286959684550206, iteration: 38704
loss: 0.9961497783660889,grad_norm: 0.7798947061258887, iteration: 38705
loss: 1.0353741645812988,grad_norm: 0.9999990871681635, iteration: 38706
loss: 1.068236231803894,grad_norm: 0.9999992568724574, iteration: 38707
loss: 0.973788321018219,grad_norm: 0.9807070734053828, iteration: 38708
loss: 0.961093008518219,grad_norm: 0.9999990608625082, iteration: 38709
loss: 0.9932511448860168,grad_norm: 0.9999995743564374, iteration: 38710
loss: 1.0040558576583862,grad_norm: 0.8991976572388211, iteration: 38711
loss: 1.0744078159332275,grad_norm: 0.9999993336306506, iteration: 38712
loss: 1.0385279655456543,grad_norm: 0.9999995808718096, iteration: 38713
loss: 0.9973129034042358,grad_norm: 0.9431149688122591, iteration: 38714
loss: 1.0205847024917603,grad_norm: 0.9192638265854158, iteration: 38715
loss: 1.0823255777359009,grad_norm: 0.9999992001016615, iteration: 38716
loss: 1.0205225944519043,grad_norm: 0.9999996816662715, iteration: 38717
loss: 1.0654124021530151,grad_norm: 0.9999995951499783, iteration: 38718
loss: 1.029988169670105,grad_norm: 0.853482413492219, iteration: 38719
loss: 0.9571396708488464,grad_norm: 0.9217771259153973, iteration: 38720
loss: 1.0237959623336792,grad_norm: 0.9999996266091629, iteration: 38721
loss: 0.9852515459060669,grad_norm: 0.9999996322483387, iteration: 38722
loss: 1.0796918869018555,grad_norm: 0.9999996528248861, iteration: 38723
loss: 1.030164361000061,grad_norm: 0.9999989903736226, iteration: 38724
loss: 1.0430129766464233,grad_norm: 0.9999995541312509, iteration: 38725
loss: 0.9534244537353516,grad_norm: 0.9999990013175725, iteration: 38726
loss: 1.036132574081421,grad_norm: 0.999999659780584, iteration: 38727
loss: 1.029701828956604,grad_norm: 0.9999990207396297, iteration: 38728
loss: 0.9881367683410645,grad_norm: 0.9946720502256755, iteration: 38729
loss: 1.0185304880142212,grad_norm: 0.9999990957474144, iteration: 38730
loss: 0.9649898409843445,grad_norm: 0.8562734035210596, iteration: 38731
loss: 0.9923185110092163,grad_norm: 0.9169111161016221, iteration: 38732
loss: 1.0176265239715576,grad_norm: 0.9999991084692901, iteration: 38733
loss: 1.0057979822158813,grad_norm: 0.781886822775685, iteration: 38734
loss: 0.9829931855201721,grad_norm: 0.9999997022434468, iteration: 38735
loss: 0.9817166328430176,grad_norm: 0.9999992200624949, iteration: 38736
loss: 1.0247697830200195,grad_norm: 0.7980650266417099, iteration: 38737
loss: 1.010508418083191,grad_norm: 0.9752880983184435, iteration: 38738
loss: 1.0370676517486572,grad_norm: 0.9999991461096674, iteration: 38739
loss: 1.0462595224380493,grad_norm: 0.9999995565059094, iteration: 38740
loss: 0.9836992025375366,grad_norm: 0.9787299378121248, iteration: 38741
loss: 1.0674039125442505,grad_norm: 0.9999995925209424, iteration: 38742
loss: 1.0335490703582764,grad_norm: 0.9999994586532508, iteration: 38743
loss: 1.00286066532135,grad_norm: 0.8554856138720899, iteration: 38744
loss: 1.0004712343215942,grad_norm: 0.9999990414956187, iteration: 38745
loss: 0.9781822562217712,grad_norm: 0.9008448694911579, iteration: 38746
loss: 0.9914422035217285,grad_norm: 0.8148681040711473, iteration: 38747
loss: 1.0059140920639038,grad_norm: 0.9999990772648597, iteration: 38748
loss: 1.000415563583374,grad_norm: 0.8479782614919719, iteration: 38749
loss: 0.9994702935218811,grad_norm: 0.8067894985439291, iteration: 38750
loss: 0.9848324060440063,grad_norm: 0.8946711653233267, iteration: 38751
loss: 1.0204908847808838,grad_norm: 0.9999996578584984, iteration: 38752
loss: 1.098326325416565,grad_norm: 0.9999992188753426, iteration: 38753
loss: 0.9885349869728088,grad_norm: 0.9100032942667302, iteration: 38754
loss: 0.9911862015724182,grad_norm: 0.9999991862693471, iteration: 38755
loss: 1.0095139741897583,grad_norm: 0.999998996315862, iteration: 38756
loss: 0.9931476712226868,grad_norm: 0.9999993313056639, iteration: 38757
loss: 1.0602285861968994,grad_norm: 0.9999992975496224, iteration: 38758
loss: 1.0697475671768188,grad_norm: 0.9999993069895388, iteration: 38759
loss: 1.0171310901641846,grad_norm: 0.999999220467315, iteration: 38760
loss: 1.0259941816329956,grad_norm: 0.9999992971566687, iteration: 38761
loss: 1.0252327919006348,grad_norm: 0.9999990001620467, iteration: 38762
loss: 1.0284638404846191,grad_norm: 0.9999995848032753, iteration: 38763
loss: 0.9884546399116516,grad_norm: 0.9999997774440161, iteration: 38764
loss: 0.9785226583480835,grad_norm: 0.9999990983557181, iteration: 38765
loss: 1.0589210987091064,grad_norm: 0.9999993413308654, iteration: 38766
loss: 0.9618032574653625,grad_norm: 0.9999992751932798, iteration: 38767
loss: 1.010114073753357,grad_norm: 0.9999991171177419, iteration: 38768
loss: 0.989011287689209,grad_norm: 0.8436778706624166, iteration: 38769
loss: 0.9815473556518555,grad_norm: 0.9212815906594769, iteration: 38770
loss: 1.0097548961639404,grad_norm: 0.7667957986849142, iteration: 38771
loss: 1.0096755027770996,grad_norm: 0.999999055554586, iteration: 38772
loss: 0.9984527230262756,grad_norm: 0.9545548230607637, iteration: 38773
loss: 1.0274629592895508,grad_norm: 0.9999990253691244, iteration: 38774
loss: 1.0088437795639038,grad_norm: 0.9999991280832748, iteration: 38775
loss: 1.050887942314148,grad_norm: 0.9999990446546999, iteration: 38776
loss: 1.032397747039795,grad_norm: 0.9999995615460052, iteration: 38777
loss: 1.0065563917160034,grad_norm: 0.8333674129687003, iteration: 38778
loss: 0.9741849303245544,grad_norm: 0.9999992248281572, iteration: 38779
loss: 1.0815355777740479,grad_norm: 0.999999577975011, iteration: 38780
loss: 1.0427911281585693,grad_norm: 0.9523228462549043, iteration: 38781
loss: 1.0025644302368164,grad_norm: 0.849551332283029, iteration: 38782
loss: 1.0320775508880615,grad_norm: 0.999999690805519, iteration: 38783
loss: 0.9949303269386292,grad_norm: 0.9592512596496168, iteration: 38784
loss: 1.009135127067566,grad_norm: 0.9999995183427846, iteration: 38785
loss: 1.0258656740188599,grad_norm: 0.9368375087181906, iteration: 38786
loss: 1.022180199623108,grad_norm: 0.9897315969044403, iteration: 38787
loss: 1.055639386177063,grad_norm: 0.9999991795247353, iteration: 38788
loss: 1.0527280569076538,grad_norm: 0.9999991157532564, iteration: 38789
loss: 1.0200512409210205,grad_norm: 0.8584023777584926, iteration: 38790
loss: 1.0497257709503174,grad_norm: 0.9999995787752887, iteration: 38791
loss: 1.0124355554580688,grad_norm: 0.9999991517950698, iteration: 38792
loss: 1.0390641689300537,grad_norm: 0.9999997029670409, iteration: 38793
loss: 1.0372607707977295,grad_norm: 0.9999992680111284, iteration: 38794
loss: 1.0272966623306274,grad_norm: 0.9999996391040639, iteration: 38795
loss: 1.0260339975357056,grad_norm: 0.9771130713865549, iteration: 38796
loss: 0.9919497966766357,grad_norm: 0.999999141797955, iteration: 38797
loss: 0.9988782405853271,grad_norm: 0.9865899302701702, iteration: 38798
loss: 1.0167570114135742,grad_norm: 0.9999993854138186, iteration: 38799
loss: 1.0173423290252686,grad_norm: 0.9137105723338345, iteration: 38800
loss: 1.0344984531402588,grad_norm: 0.9999996397633344, iteration: 38801
loss: 1.0069856643676758,grad_norm: 0.9141158536045617, iteration: 38802
loss: 1.0102499723434448,grad_norm: 0.9999991068068909, iteration: 38803
loss: 1.0349959135055542,grad_norm: 0.9999993015365775, iteration: 38804
loss: 1.0334513187408447,grad_norm: 0.9999996391511733, iteration: 38805
loss: 1.056444525718689,grad_norm: 0.9999991949590017, iteration: 38806
loss: 1.0390535593032837,grad_norm: 0.9999991140077189, iteration: 38807
loss: 1.0062824487686157,grad_norm: 0.9999992210524876, iteration: 38808
loss: 1.0429363250732422,grad_norm: 0.8737824439684932, iteration: 38809
loss: 1.002191185951233,grad_norm: 0.9997901379635455, iteration: 38810
loss: 0.9975267648696899,grad_norm: 0.8111201333424043, iteration: 38811
loss: 0.9999975562095642,grad_norm: 0.9723659514544789, iteration: 38812
loss: 0.9909560680389404,grad_norm: 0.7747474809253437, iteration: 38813
loss: 1.0768736600875854,grad_norm: 0.9999997419710047, iteration: 38814
loss: 1.0884513854980469,grad_norm: 0.9999993367112484, iteration: 38815
loss: 1.0222195386886597,grad_norm: 0.9999993070859707, iteration: 38816
loss: 1.0349196195602417,grad_norm: 0.9999993180987063, iteration: 38817
loss: 1.0183416604995728,grad_norm: 0.9999991755985579, iteration: 38818
loss: 1.0397177934646606,grad_norm: 0.9999989336797958, iteration: 38819
loss: 1.0112472772598267,grad_norm: 0.9032770200419054, iteration: 38820
loss: 1.0374027490615845,grad_norm: 0.8186252748379516, iteration: 38821
loss: 0.9440202116966248,grad_norm: 0.9999990689357583, iteration: 38822
loss: 0.9901167154312134,grad_norm: 0.9293135932246605, iteration: 38823
loss: 1.0232484340667725,grad_norm: 0.999999781474568, iteration: 38824
loss: 1.072124719619751,grad_norm: 0.9999990077506619, iteration: 38825
loss: 0.981501579284668,grad_norm: 0.9999991296113805, iteration: 38826
loss: 1.0141410827636719,grad_norm: 0.9999993102735598, iteration: 38827
loss: 1.0114414691925049,grad_norm: 0.9388076718520263, iteration: 38828
loss: 1.0126477479934692,grad_norm: 0.9999992412535609, iteration: 38829
loss: 0.9734926223754883,grad_norm: 0.9376319755339259, iteration: 38830
loss: 0.9996145963668823,grad_norm: 0.8111852769876361, iteration: 38831
loss: 1.0013912916183472,grad_norm: 0.9999997536728525, iteration: 38832
loss: 1.0673662424087524,grad_norm: 0.9999995589014229, iteration: 38833
loss: 1.0184224843978882,grad_norm: 0.8501759858901362, iteration: 38834
loss: 1.0327568054199219,grad_norm: 0.9999992576671792, iteration: 38835
loss: 1.0509451627731323,grad_norm: 0.9999993746769266, iteration: 38836
loss: 1.0175285339355469,grad_norm: 0.9999998356076386, iteration: 38837
loss: 0.9918168783187866,grad_norm: 0.9999991209620903, iteration: 38838
loss: 1.0439988374710083,grad_norm: 0.9999995945038513, iteration: 38839
loss: 1.0312929153442383,grad_norm: 0.9999995320632874, iteration: 38840
loss: 0.9572044610977173,grad_norm: 0.8900950146120051, iteration: 38841
loss: 0.9699753522872925,grad_norm: 0.8698124553962566, iteration: 38842
loss: 1.0409858226776123,grad_norm: 0.8824470542140961, iteration: 38843
loss: 0.9891999959945679,grad_norm: 0.9404846874235272, iteration: 38844
loss: 1.008245825767517,grad_norm: 0.9268884683961568, iteration: 38845
loss: 1.0055334568023682,grad_norm: 0.9257211637074102, iteration: 38846
loss: 1.0118157863616943,grad_norm: 0.9999991810040445, iteration: 38847
loss: 1.00722336769104,grad_norm: 0.9255210475145196, iteration: 38848
loss: 1.0458347797393799,grad_norm: 0.9999990406381101, iteration: 38849
loss: 0.9683005809783936,grad_norm: 0.9999990492922708, iteration: 38850
loss: 1.0160274505615234,grad_norm: 0.895267869970142, iteration: 38851
loss: 1.0050777196884155,grad_norm: 0.8912891568441155, iteration: 38852
loss: 1.014057993888855,grad_norm: 0.9854727100580799, iteration: 38853
loss: 1.3733060359954834,grad_norm: 0.9999996724364896, iteration: 38854
loss: 0.9646995067596436,grad_norm: 0.9999990644112673, iteration: 38855
loss: 1.0222545862197876,grad_norm: 0.9999991399949308, iteration: 38856
loss: 0.9977624416351318,grad_norm: 0.8867932201971878, iteration: 38857
loss: 1.0322321653366089,grad_norm: 0.8886302407536926, iteration: 38858
loss: 1.0213536024093628,grad_norm: 0.9999995224319533, iteration: 38859
loss: 1.0144339799880981,grad_norm: 0.9999998243421001, iteration: 38860
loss: 0.9812162518501282,grad_norm: 0.9999997054476678, iteration: 38861
loss: 0.989563524723053,grad_norm: 0.7476110010019362, iteration: 38862
loss: 0.9920718669891357,grad_norm: 0.9999990255529012, iteration: 38863
loss: 1.0263077020645142,grad_norm: 0.9876811451892792, iteration: 38864
loss: 0.9529919028282166,grad_norm: 0.9455448884065081, iteration: 38865
loss: 1.076307773590088,grad_norm: 0.9999994318479348, iteration: 38866
loss: 1.0060516595840454,grad_norm: 0.9999990722068871, iteration: 38867
loss: 1.002282738685608,grad_norm: 0.937918515188071, iteration: 38868
loss: 0.9699356555938721,grad_norm: 0.9022562819041594, iteration: 38869
loss: 0.972659707069397,grad_norm: 0.9999991702035235, iteration: 38870
loss: 0.9694302082061768,grad_norm: 0.9420409426370507, iteration: 38871
loss: 1.0094139575958252,grad_norm: 0.998149863361829, iteration: 38872
loss: 1.0124807357788086,grad_norm: 0.9676473187564159, iteration: 38873
loss: 0.9907477498054504,grad_norm: 0.8822103897749644, iteration: 38874
loss: 0.9949931502342224,grad_norm: 0.9999992325325688, iteration: 38875
loss: 0.9719082117080688,grad_norm: 0.9157699502892535, iteration: 38876
loss: 0.9984617829322815,grad_norm: 0.9144329955054435, iteration: 38877
loss: 1.0316818952560425,grad_norm: 0.9503215720694413, iteration: 38878
loss: 1.0590503215789795,grad_norm: 0.9999996162651211, iteration: 38879
loss: 1.0166621208190918,grad_norm: 0.9999991578662186, iteration: 38880
loss: 1.020240068435669,grad_norm: 0.9605169951973384, iteration: 38881
loss: 0.9874396324157715,grad_norm: 0.8460671195217452, iteration: 38882
loss: 1.02009916305542,grad_norm: 0.999999636119974, iteration: 38883
loss: 1.0224519968032837,grad_norm: 0.9999990074173529, iteration: 38884
loss: 1.1012930870056152,grad_norm: 0.9999995764013138, iteration: 38885
loss: 0.9854617714881897,grad_norm: 0.9999991439213304, iteration: 38886
loss: 1.024566650390625,grad_norm: 0.9999991707162675, iteration: 38887
loss: 1.0606434345245361,grad_norm: 0.9393936572500957, iteration: 38888
loss: 1.0258103609085083,grad_norm: 0.8503395090764956, iteration: 38889
loss: 0.9980053305625916,grad_norm: 0.8639536945361992, iteration: 38890
loss: 1.0351765155792236,grad_norm: 0.9999990678690207, iteration: 38891
loss: 1.007988691329956,grad_norm: 0.9851147951895775, iteration: 38892
loss: 0.9876420497894287,grad_norm: 0.9999997813758735, iteration: 38893
loss: 0.9606384634971619,grad_norm: 0.9754906183220439, iteration: 38894
loss: 1.0315730571746826,grad_norm: 0.999999099175466, iteration: 38895
loss: 1.011964201927185,grad_norm: 0.9341948517181016, iteration: 38896
loss: 1.0322003364562988,grad_norm: 0.9999996357217071, iteration: 38897
loss: 0.963979184627533,grad_norm: 0.9999990948017553, iteration: 38898
loss: 0.976859450340271,grad_norm: 0.9999990530340789, iteration: 38899
loss: 1.037432074546814,grad_norm: 0.999999660865612, iteration: 38900
loss: 1.0218605995178223,grad_norm: 0.9537049173513926, iteration: 38901
loss: 1.0145018100738525,grad_norm: 0.9999998948295816, iteration: 38902
loss: 0.999092698097229,grad_norm: 0.9219353211706365, iteration: 38903
loss: 0.9892010688781738,grad_norm: 0.8700292231640377, iteration: 38904
loss: 0.9724814295768738,grad_norm: 0.826377712049361, iteration: 38905
loss: 0.9886655211448669,grad_norm: 0.9999992378590817, iteration: 38906
loss: 0.9925841093063354,grad_norm: 0.9999990463888017, iteration: 38907
loss: 1.0080000162124634,grad_norm: 0.9999991529958905, iteration: 38908
loss: 0.9912449717521667,grad_norm: 0.9496730810422617, iteration: 38909
loss: 1.0232038497924805,grad_norm: 0.8541366938306353, iteration: 38910
loss: 1.0029296875,grad_norm: 0.9030971217588584, iteration: 38911
loss: 1.0137035846710205,grad_norm: 0.9999992172351535, iteration: 38912
loss: 1.0472354888916016,grad_norm: 0.9999991795222882, iteration: 38913
loss: 1.0760142803192139,grad_norm: 0.894421925044513, iteration: 38914
loss: 1.028433918952942,grad_norm: 0.9999994476212505, iteration: 38915
loss: 1.0065839290618896,grad_norm: 0.9999991259163713, iteration: 38916
loss: 0.9666440486907959,grad_norm: 0.9050735161513511, iteration: 38917
loss: 1.041650414466858,grad_norm: 0.9999991177557032, iteration: 38918
loss: 1.0571080446243286,grad_norm: 0.9999996232918429, iteration: 38919
loss: 1.0009313821792603,grad_norm: 0.9999990319355267, iteration: 38920
loss: 0.9957591891288757,grad_norm: 0.9110665560474476, iteration: 38921
loss: 0.9807208180427551,grad_norm: 0.9999991934031612, iteration: 38922
loss: 1.0170576572418213,grad_norm: 0.9999994079268739, iteration: 38923
loss: 0.9976140856742859,grad_norm: 0.8878606784942366, iteration: 38924
loss: 0.9943932294845581,grad_norm: 0.8997169924736025, iteration: 38925
loss: 0.9976808428764343,grad_norm: 0.9999994367202945, iteration: 38926
loss: 0.9708470702171326,grad_norm: 0.9999991949169166, iteration: 38927
loss: 1.0027557611465454,grad_norm: 0.9999990143351943, iteration: 38928
loss: 0.9910567998886108,grad_norm: 0.8612905463086629, iteration: 38929
loss: 1.0197458267211914,grad_norm: 0.9999991024195848, iteration: 38930
loss: 1.0225024223327637,grad_norm: 0.6911431307235577, iteration: 38931
loss: 1.0022097826004028,grad_norm: 0.9999992168155926, iteration: 38932
loss: 0.9879288077354431,grad_norm: 0.8535408563472612, iteration: 38933
loss: 1.0188270807266235,grad_norm: 0.8091600747379859, iteration: 38934
loss: 0.9963418245315552,grad_norm: 0.9999989915466826, iteration: 38935
loss: 0.984189510345459,grad_norm: 0.9999990084048426, iteration: 38936
loss: 0.9953051805496216,grad_norm: 0.8531960402214176, iteration: 38937
loss: 1.034136414527893,grad_norm: 0.9999994646861422, iteration: 38938
loss: 1.0046719312667847,grad_norm: 0.9615615986408366, iteration: 38939
loss: 1.0469156503677368,grad_norm: 0.9999994270261093, iteration: 38940
loss: 1.0161200761795044,grad_norm: 0.9150717847350345, iteration: 38941
loss: 1.0338597297668457,grad_norm: 0.9999991112238834, iteration: 38942
loss: 0.9792281985282898,grad_norm: 0.9999999061278443, iteration: 38943
loss: 0.9697327613830566,grad_norm: 0.9999993747990313, iteration: 38944
loss: 0.9877588748931885,grad_norm: 0.8748965577600679, iteration: 38945
loss: 0.976451575756073,grad_norm: 0.9974365646426684, iteration: 38946
loss: 1.0257011651992798,grad_norm: 0.9707333673674456, iteration: 38947
loss: 1.068625569343567,grad_norm: 0.9999994847459436, iteration: 38948
loss: 0.9785875082015991,grad_norm: 0.9999990642188362, iteration: 38949
loss: 0.9998596906661987,grad_norm: 0.999999059074855, iteration: 38950
loss: 0.9725239276885986,grad_norm: 0.9423048724988853, iteration: 38951
loss: 1.008068561553955,grad_norm: 0.9759732501629629, iteration: 38952
loss: 0.970861554145813,grad_norm: 0.8887499906362907, iteration: 38953
loss: 0.9615407586097717,grad_norm: 0.9337226252524189, iteration: 38954
loss: 1.063629150390625,grad_norm: 0.9999996090478585, iteration: 38955
loss: 1.011258840560913,grad_norm: 0.9906361596094023, iteration: 38956
loss: 0.968434751033783,grad_norm: 0.8166814108679482, iteration: 38957
loss: 1.0375866889953613,grad_norm: 0.84589161214278, iteration: 38958
loss: 0.9888579249382019,grad_norm: 0.9999991774987954, iteration: 38959
loss: 1.0318957567214966,grad_norm: 0.9999998141083375, iteration: 38960
loss: 1.033950686454773,grad_norm: 0.9999991464456753, iteration: 38961
loss: 0.9721689820289612,grad_norm: 0.999999182887995, iteration: 38962
loss: 1.0336865186691284,grad_norm: 0.9743594946996917, iteration: 38963
loss: 1.0014313459396362,grad_norm: 0.9999990936881934, iteration: 38964
loss: 0.9996585249900818,grad_norm: 0.885539209107619, iteration: 38965
loss: 1.084306001663208,grad_norm: 0.9999996105244189, iteration: 38966
loss: 1.016843557357788,grad_norm: 0.8361874546616814, iteration: 38967
loss: 1.0313942432403564,grad_norm: 0.9999990652230245, iteration: 38968
loss: 0.999549925327301,grad_norm: 0.7924671247435274, iteration: 38969
loss: 1.0223450660705566,grad_norm: 0.9371739804207008, iteration: 38970
loss: 1.027579426765442,grad_norm: 0.9437600815983119, iteration: 38971
loss: 0.990063488483429,grad_norm: 0.8945545217317583, iteration: 38972
loss: 0.9956112504005432,grad_norm: 0.953273813507209, iteration: 38973
loss: 1.0104397535324097,grad_norm: 0.9999990235386998, iteration: 38974
loss: 0.9816029071807861,grad_norm: 0.9373391198584284, iteration: 38975
loss: 0.9554582238197327,grad_norm: 0.9650147285334348, iteration: 38976
loss: 0.9894987940788269,grad_norm: 0.9380094052237163, iteration: 38977
loss: 0.9696167707443237,grad_norm: 0.8884724035463942, iteration: 38978
loss: 1.0168370008468628,grad_norm: 0.9606911122129259, iteration: 38979
loss: 1.0501097440719604,grad_norm: 0.9216007771554205, iteration: 38980
loss: 0.9617729187011719,grad_norm: 0.9999991745270543, iteration: 38981
loss: 0.9901437163352966,grad_norm: 0.9999991030831659, iteration: 38982
loss: 1.0640231370925903,grad_norm: 0.9999991084953067, iteration: 38983
loss: 1.0135860443115234,grad_norm: 0.9574080316055296, iteration: 38984
loss: 0.986939013004303,grad_norm: 0.8662525562115355, iteration: 38985
loss: 1.043739914894104,grad_norm: 0.9999997036220283, iteration: 38986
loss: 1.0117703676223755,grad_norm: 0.9999992005424521, iteration: 38987
loss: 0.9828003644943237,grad_norm: 0.999999889971388, iteration: 38988
loss: 1.00885808467865,grad_norm: 0.9999992203165282, iteration: 38989
loss: 0.9888955354690552,grad_norm: 0.999999139661779, iteration: 38990
loss: 0.9986865520477295,grad_norm: 0.7756133051784002, iteration: 38991
loss: 1.047369360923767,grad_norm: 0.9999993878781689, iteration: 38992
loss: 1.0251047611236572,grad_norm: 0.9517873559230976, iteration: 38993
loss: 1.0308313369750977,grad_norm: 0.9999990323981078, iteration: 38994
loss: 1.0334712266921997,grad_norm: 0.9995613463771069, iteration: 38995
loss: 0.9783002734184265,grad_norm: 0.999999123635796, iteration: 38996
loss: 1.0035455226898193,grad_norm: 1.0000000711761174, iteration: 38997
loss: 0.9812345504760742,grad_norm: 0.9999989697629845, iteration: 38998
loss: 1.0284314155578613,grad_norm: 0.8160179798382934, iteration: 38999
loss: 0.9937782883644104,grad_norm: 0.8672562703155131, iteration: 39000
loss: 0.9770033955574036,grad_norm: 0.9999992218850358, iteration: 39001
loss: 1.0184636116027832,grad_norm: 0.8402000930973144, iteration: 39002
loss: 1.0327726602554321,grad_norm: 0.9161025158884117, iteration: 39003
loss: 1.0417022705078125,grad_norm: 0.9866610487146469, iteration: 39004
loss: 0.9641416668891907,grad_norm: 0.9005708962445118, iteration: 39005
loss: 0.9881696105003357,grad_norm: 0.9607390383563104, iteration: 39006
loss: 1.031115174293518,grad_norm: 0.8422594699121917, iteration: 39007
loss: 1.0188003778457642,grad_norm: 0.9999991178678593, iteration: 39008
loss: 1.0410362482070923,grad_norm: 0.9999995739780617, iteration: 39009
loss: 0.9825038313865662,grad_norm: 0.9174555764459172, iteration: 39010
loss: 1.0449618101119995,grad_norm: 0.9999996929843528, iteration: 39011
loss: 1.0084439516067505,grad_norm: 0.7414070318694856, iteration: 39012
loss: 1.0070219039916992,grad_norm: 0.8117661161485015, iteration: 39013
loss: 1.0293411016464233,grad_norm: 0.9999989776135942, iteration: 39014
loss: 1.020033836364746,grad_norm: 0.8224292058174465, iteration: 39015
loss: 1.00435471534729,grad_norm: 0.9989811600054549, iteration: 39016
loss: 1.0243982076644897,grad_norm: 0.8580153051608347, iteration: 39017
loss: 0.9955623149871826,grad_norm: 0.9748680464564643, iteration: 39018
loss: 1.0298100709915161,grad_norm: 0.9999996049103019, iteration: 39019
loss: 0.9766309857368469,grad_norm: 0.9999992060336746, iteration: 39020
loss: 0.9905738234519958,grad_norm: 0.925901839557491, iteration: 39021
loss: 0.9970225095748901,grad_norm: 0.9999991551912927, iteration: 39022
loss: 0.9939451217651367,grad_norm: 0.9422754139517091, iteration: 39023
loss: 0.9994781613349915,grad_norm: 0.8140833688921604, iteration: 39024
loss: 1.0537680387496948,grad_norm: 0.9999991233541623, iteration: 39025
loss: 1.0216310024261475,grad_norm: 0.9327112466771048, iteration: 39026
loss: 0.9675045609474182,grad_norm: 0.8339804246560683, iteration: 39027
loss: 0.9797159433364868,grad_norm: 0.8913967961510163, iteration: 39028
loss: 0.9890404939651489,grad_norm: 0.8568798468151209, iteration: 39029
loss: 1.0341423749923706,grad_norm: 0.9999991411002821, iteration: 39030
loss: 1.032033085823059,grad_norm: 0.7172745361876839, iteration: 39031
loss: 1.004259467124939,grad_norm: 0.9913755996808766, iteration: 39032
loss: 1.0516974925994873,grad_norm: 0.999999799409315, iteration: 39033
loss: 1.0431877374649048,grad_norm: 0.9999996600377982, iteration: 39034
loss: 1.0121104717254639,grad_norm: 0.931093194183357, iteration: 39035
loss: 1.0238991975784302,grad_norm: 0.9999996256439105, iteration: 39036
loss: 1.0033848285675049,grad_norm: 0.9486028387748388, iteration: 39037
loss: 1.249588131904602,grad_norm: 0.9999992194851509, iteration: 39038
loss: 1.0795247554779053,grad_norm: 0.9999998812778003, iteration: 39039
loss: 1.1279736757278442,grad_norm: 0.9999994089608001, iteration: 39040
loss: 1.1135786771774292,grad_norm: 0.999999779042235, iteration: 39041
loss: 1.051145076751709,grad_norm: 0.999999663533656, iteration: 39042
loss: 1.0086318254470825,grad_norm: 0.903050075939686, iteration: 39043
loss: 1.053580641746521,grad_norm: 0.9052198026637398, iteration: 39044
loss: 1.0602080821990967,grad_norm: 0.9999994111894144, iteration: 39045
loss: 1.0005275011062622,grad_norm: 0.9999992453784743, iteration: 39046
loss: 1.0855578184127808,grad_norm: 0.9999996267586794, iteration: 39047
loss: 1.0480738878250122,grad_norm: 0.9009963561982542, iteration: 39048
loss: 1.02132248878479,grad_norm: 0.9764193138965618, iteration: 39049
loss: 1.0462749004364014,grad_norm: 0.9999994266701562, iteration: 39050
loss: 1.0617705583572388,grad_norm: 0.9999994252356259, iteration: 39051
loss: 1.0371551513671875,grad_norm: 0.9999991448556923, iteration: 39052
loss: 1.0116838216781616,grad_norm: 0.9999991154916874, iteration: 39053
loss: 1.0380337238311768,grad_norm: 0.9999992990640868, iteration: 39054
loss: 0.9772588610649109,grad_norm: 0.9911759700454407, iteration: 39055
loss: 1.0341522693634033,grad_norm: 0.9999992062687837, iteration: 39056
loss: 1.0204309225082397,grad_norm: 0.999999324431445, iteration: 39057
loss: 1.0056278705596924,grad_norm: 0.9999990966250516, iteration: 39058
loss: 0.9749448299407959,grad_norm: 0.9999996285914129, iteration: 39059
loss: 0.9991592764854431,grad_norm: 0.7726691164801248, iteration: 39060
loss: 0.9853832721710205,grad_norm: 0.9682836423809641, iteration: 39061
loss: 1.0108104944229126,grad_norm: 0.9999990905066488, iteration: 39062
loss: 0.9929230213165283,grad_norm: 0.9768937612907174, iteration: 39063
loss: 1.000609040260315,grad_norm: 0.9999991400286947, iteration: 39064
loss: 1.0267326831817627,grad_norm: 0.9782037727803982, iteration: 39065
loss: 1.0069222450256348,grad_norm: 0.9999992462368043, iteration: 39066
loss: 1.0155519247055054,grad_norm: 0.8919384222117752, iteration: 39067
loss: 0.9893041849136353,grad_norm: 0.9999991895505349, iteration: 39068
loss: 1.0240365266799927,grad_norm: 0.999999109163215, iteration: 39069
loss: 1.0008753538131714,grad_norm: 0.9999991458098385, iteration: 39070
loss: 0.999607264995575,grad_norm: 0.7476919464441338, iteration: 39071
loss: 1.0129764080047607,grad_norm: 0.9747783882749547, iteration: 39072
loss: 1.0175288915634155,grad_norm: 0.9999991965908713, iteration: 39073
loss: 1.0123474597930908,grad_norm: 0.9999995356687696, iteration: 39074
loss: 0.9835960865020752,grad_norm: 0.8787701376241883, iteration: 39075
loss: 1.0397355556488037,grad_norm: 0.9552057564886998, iteration: 39076
loss: 1.0249791145324707,grad_norm: 0.999999317750001, iteration: 39077
loss: 1.0092161893844604,grad_norm: 0.913601007918414, iteration: 39078
loss: 1.0052378177642822,grad_norm: 0.9999991300154785, iteration: 39079
loss: 1.0011383295059204,grad_norm: 0.9363849053183834, iteration: 39080
loss: 1.0014002323150635,grad_norm: 0.9999990023324696, iteration: 39081
loss: 1.013895869255066,grad_norm: 0.9999990367012414, iteration: 39082
loss: 0.9481242299079895,grad_norm: 0.9806603381040413, iteration: 39083
loss: 1.0085233449935913,grad_norm: 0.9480709116411363, iteration: 39084
loss: 1.0315221548080444,grad_norm: 0.9999992176191242, iteration: 39085
loss: 1.0172797441482544,grad_norm: 0.8532540243414286, iteration: 39086
loss: 0.9813264608383179,grad_norm: 0.9999990035921541, iteration: 39087
loss: 1.0081506967544556,grad_norm: 0.7956859689301045, iteration: 39088
loss: 1.0130714178085327,grad_norm: 0.9999990621695585, iteration: 39089
loss: 0.9879765510559082,grad_norm: 0.8740953605363777, iteration: 39090
loss: 0.95475172996521,grad_norm: 0.9999994644582237, iteration: 39091
loss: 0.9570943117141724,grad_norm: 0.9999989975457951, iteration: 39092
loss: 1.0275553464889526,grad_norm: 0.9999995209788052, iteration: 39093
loss: 0.9793152809143066,grad_norm: 0.99999917229722, iteration: 39094
loss: 0.9807570576667786,grad_norm: 0.980026322899669, iteration: 39095
loss: 1.0460671186447144,grad_norm: 0.9999990684594464, iteration: 39096
loss: 0.9984060525894165,grad_norm: 0.7391510417889638, iteration: 39097
loss: 0.971477746963501,grad_norm: 0.9999992449317349, iteration: 39098
loss: 0.9679147005081177,grad_norm: 0.9384004530327021, iteration: 39099
loss: 1.0005593299865723,grad_norm: 0.9999991341172064, iteration: 39100
loss: 1.0307508707046509,grad_norm: 0.9999992363069142, iteration: 39101
loss: 0.9498195052146912,grad_norm: 0.8600461506494633, iteration: 39102
loss: 0.9725666046142578,grad_norm: 0.9999991427562059, iteration: 39103
loss: 0.987991988658905,grad_norm: 0.9441499656618644, iteration: 39104
loss: 1.0111308097839355,grad_norm: 0.9999994733472879, iteration: 39105
loss: 0.9770992398262024,grad_norm: 0.9999991245391076, iteration: 39106
loss: 0.9795609712600708,grad_norm: 0.9411981617732701, iteration: 39107
loss: 0.9909676909446716,grad_norm: 0.9809828520313237, iteration: 39108
loss: 0.9819439053535461,grad_norm: 0.9347352513202379, iteration: 39109
loss: 1.0387829542160034,grad_norm: 0.999999262980608, iteration: 39110
loss: 0.962196409702301,grad_norm: 0.8726963081279457, iteration: 39111
loss: 0.9763084053993225,grad_norm: 0.9514812151828119, iteration: 39112
loss: 1.0006974935531616,grad_norm: 0.9999990478750362, iteration: 39113
loss: 1.0180411338806152,grad_norm: 0.9418871981906606, iteration: 39114
loss: 1.0408707857131958,grad_norm: 0.9854028428629882, iteration: 39115
loss: 1.0449223518371582,grad_norm: 0.9081127118162686, iteration: 39116
loss: 0.998758852481842,grad_norm: 0.8813853697019026, iteration: 39117
loss: 1.022547960281372,grad_norm: 0.9999996619561438, iteration: 39118
loss: 1.0986369848251343,grad_norm: 0.9991425920601348, iteration: 39119
loss: 1.0159226655960083,grad_norm: 0.7721529850835451, iteration: 39120
loss: 1.0139576196670532,grad_norm: 0.9999990353187037, iteration: 39121
loss: 1.0008372068405151,grad_norm: 0.9999992544862533, iteration: 39122
loss: 0.988675594329834,grad_norm: 0.9999991869149596, iteration: 39123
loss: 1.0127511024475098,grad_norm: 0.9999990769336085, iteration: 39124
loss: 0.9924049377441406,grad_norm: 0.9999989527218195, iteration: 39125
loss: 1.0231497287750244,grad_norm: 0.9999995041476298, iteration: 39126
loss: 0.9850391149520874,grad_norm: 0.8656779823968017, iteration: 39127
loss: 0.9925169944763184,grad_norm: 0.9999992468117288, iteration: 39128
loss: 0.9900506138801575,grad_norm: 0.9129924651502958, iteration: 39129
loss: 1.052720069885254,grad_norm: 0.9999996489141948, iteration: 39130
loss: 1.0175895690917969,grad_norm: 0.9999994856393849, iteration: 39131
loss: 1.0230956077575684,grad_norm: 0.8980450865061216, iteration: 39132
loss: 1.0032868385314941,grad_norm: 0.9955189673925519, iteration: 39133
loss: 1.011427640914917,grad_norm: 0.8730818832772904, iteration: 39134
loss: 1.017107367515564,grad_norm: 0.9826204952789307, iteration: 39135
loss: 0.9958839416503906,grad_norm: 0.9506587162942816, iteration: 39136
loss: 1.00783371925354,grad_norm: 0.9999994612898909, iteration: 39137
loss: 1.0084391832351685,grad_norm: 0.9452737882191569, iteration: 39138
loss: 1.039333462715149,grad_norm: 0.9999993239552282, iteration: 39139
loss: 1.0187816619873047,grad_norm: 0.9999995633780675, iteration: 39140
loss: 0.9975398182868958,grad_norm: 0.9999991807338003, iteration: 39141
loss: 0.9927542209625244,grad_norm: 0.7744192946763675, iteration: 39142
loss: 0.9545001983642578,grad_norm: 0.9573551105076537, iteration: 39143
loss: 1.0072863101959229,grad_norm: 0.9999992793306416, iteration: 39144
loss: 1.0037659406661987,grad_norm: 0.6897323449898266, iteration: 39145
loss: 0.9969314336776733,grad_norm: 0.9999991449853909, iteration: 39146
loss: 1.0023084878921509,grad_norm: 0.9999990511908119, iteration: 39147
loss: 1.021346092224121,grad_norm: 0.9999996364914417, iteration: 39148
loss: 1.037514328956604,grad_norm: 0.9999994697893299, iteration: 39149
loss: 1.0129446983337402,grad_norm: 0.918322954309643, iteration: 39150
loss: 1.002971887588501,grad_norm: 0.999999862051182, iteration: 39151
loss: 0.9863920211791992,grad_norm: 0.9999991467011411, iteration: 39152
loss: 1.0060521364212036,grad_norm: 0.9999994759309796, iteration: 39153
loss: 1.0069186687469482,grad_norm: 0.9999996066691209, iteration: 39154
loss: 0.9992931485176086,grad_norm: 0.7443830172456668, iteration: 39155
loss: 1.064285397529602,grad_norm: 0.9999994878136843, iteration: 39156
loss: 0.9371352791786194,grad_norm: 0.9999990124420609, iteration: 39157
loss: 1.0035045146942139,grad_norm: 0.9790915076550664, iteration: 39158
loss: 1.0109360218048096,grad_norm: 0.9999990902826793, iteration: 39159
loss: 0.9973390102386475,grad_norm: 0.900422161095873, iteration: 39160
loss: 1.031410574913025,grad_norm: 0.9887696896418423, iteration: 39161
loss: 0.97606360912323,grad_norm: 0.9091581067225366, iteration: 39162
loss: 1.0335136651992798,grad_norm: 0.9999991421489786, iteration: 39163
loss: 1.038253664970398,grad_norm: 0.9999992272550815, iteration: 39164
loss: 0.9487209916114807,grad_norm: 0.920970774708453, iteration: 39165
loss: 0.997353196144104,grad_norm: 0.9606974109997669, iteration: 39166
loss: 1.0297609567642212,grad_norm: 0.9999991840860356, iteration: 39167
loss: 1.020034909248352,grad_norm: 0.9999993883372594, iteration: 39168
loss: 0.9807106256484985,grad_norm: 0.9899195800088807, iteration: 39169
loss: 1.0101085901260376,grad_norm: 0.8094937870030195, iteration: 39170
loss: 1.0040297508239746,grad_norm: 0.9999991230397781, iteration: 39171
loss: 1.0778778791427612,grad_norm: 0.9999996815729594, iteration: 39172
loss: 0.9807279706001282,grad_norm: 0.9669267568569734, iteration: 39173
loss: 0.9909043908119202,grad_norm: 0.866009649588429, iteration: 39174
loss: 1.0126672983169556,grad_norm: 0.8356820763129211, iteration: 39175
loss: 0.951093316078186,grad_norm: 0.9999992619397909, iteration: 39176
loss: 1.0264419317245483,grad_norm: 0.9999994442744146, iteration: 39177
loss: 1.0025813579559326,grad_norm: 0.9637951432118108, iteration: 39178
loss: 1.000109076499939,grad_norm: 0.9999993103394662, iteration: 39179
loss: 1.000877022743225,grad_norm: 0.9999992416589595, iteration: 39180
loss: 1.0122613906860352,grad_norm: 0.8490240565019571, iteration: 39181
loss: 0.9720388650894165,grad_norm: 0.9797508098168086, iteration: 39182
loss: 1.0585485696792603,grad_norm: 0.9999994192395218, iteration: 39183
loss: 0.9963847994804382,grad_norm: 0.9999990427495029, iteration: 39184
loss: 1.0105490684509277,grad_norm: 0.9999995038474258, iteration: 39185
loss: 1.068225622177124,grad_norm: 0.9999996479498479, iteration: 39186
loss: 1.0237126350402832,grad_norm: 0.999999097353804, iteration: 39187
loss: 1.0222392082214355,grad_norm: 0.7578782979232651, iteration: 39188
loss: 1.0320254564285278,grad_norm: 0.9999994571264557, iteration: 39189
loss: 0.9968865513801575,grad_norm: 0.9999990199689608, iteration: 39190
loss: 0.9722762703895569,grad_norm: 0.9999991752827515, iteration: 39191
loss: 1.039061188697815,grad_norm: 0.8500495472863476, iteration: 39192
loss: 1.0272494554519653,grad_norm: 0.9999991696413195, iteration: 39193
loss: 1.024573802947998,grad_norm: 0.8564517039803718, iteration: 39194
loss: 1.032109022140503,grad_norm: 0.9999994252024563, iteration: 39195
loss: 1.0058265924453735,grad_norm: 0.9999990821018704, iteration: 39196
loss: 0.9921628832817078,grad_norm: 0.8942566271102146, iteration: 39197
loss: 1.0352888107299805,grad_norm: 0.9999993692160768, iteration: 39198
loss: 0.9837114214897156,grad_norm: 0.9999996672658606, iteration: 39199
loss: 1.0190688371658325,grad_norm: 0.999999038535918, iteration: 39200
loss: 1.0204590559005737,grad_norm: 0.9051506070019837, iteration: 39201
loss: 1.0231733322143555,grad_norm: 0.807138517595372, iteration: 39202
loss: 1.035422921180725,grad_norm: 0.8752000508385215, iteration: 39203
loss: 1.0041606426239014,grad_norm: 0.999999319302098, iteration: 39204
loss: 1.0006872415542603,grad_norm: 0.9663682820648973, iteration: 39205
loss: 1.0052329301834106,grad_norm: 0.9161050164700018, iteration: 39206
loss: 1.0000680685043335,grad_norm: 0.8505980419801551, iteration: 39207
loss: 1.0923042297363281,grad_norm: 0.9999998640850172, iteration: 39208
loss: 1.0213127136230469,grad_norm: 0.9999990783534827, iteration: 39209
loss: 1.0015528202056885,grad_norm: 0.9726691415750814, iteration: 39210
loss: 1.008133888244629,grad_norm: 0.7630313435572551, iteration: 39211
loss: 0.9962925910949707,grad_norm: 0.8156680881092533, iteration: 39212
loss: 1.0293210744857788,grad_norm: 0.9999995791658826, iteration: 39213
loss: 1.0227909088134766,grad_norm: 0.9999994491548533, iteration: 39214
loss: 0.9889848232269287,grad_norm: 0.775010833918705, iteration: 39215
loss: 1.0229607820510864,grad_norm: 0.8782947459995237, iteration: 39216
loss: 1.0672858953475952,grad_norm: 0.9999991046824597, iteration: 39217
loss: 0.9907984137535095,grad_norm: 0.9999993058625783, iteration: 39218
loss: 1.0080251693725586,grad_norm: 0.8708452521605741, iteration: 39219
loss: 1.0139985084533691,grad_norm: 0.9706547848611127, iteration: 39220
loss: 1.0590128898620605,grad_norm: 0.999999588744161, iteration: 39221
loss: 0.9955296516418457,grad_norm: 0.9999991341168604, iteration: 39222
loss: 0.9790886044502258,grad_norm: 0.9999990476467894, iteration: 39223
loss: 0.9819751381874084,grad_norm: 0.9999992539515411, iteration: 39224
loss: 1.0112049579620361,grad_norm: 0.8617928561948194, iteration: 39225
loss: 1.0320581197738647,grad_norm: 0.9999991699005638, iteration: 39226
loss: 1.0113896131515503,grad_norm: 0.9999993130234813, iteration: 39227
loss: 0.9879525899887085,grad_norm: 0.900798475032605, iteration: 39228
loss: 0.974876880645752,grad_norm: 0.9999990185391004, iteration: 39229
loss: 1.018882155418396,grad_norm: 0.9999997589779703, iteration: 39230
loss: 1.0120714902877808,grad_norm: 0.8808956021935141, iteration: 39231
loss: 0.9940985441207886,grad_norm: 0.9999990901746103, iteration: 39232
loss: 1.0242241621017456,grad_norm: 0.9406839475134279, iteration: 39233
loss: 1.01363205909729,grad_norm: 0.9403245791491445, iteration: 39234
loss: 0.9728543758392334,grad_norm: 0.9999989693510264, iteration: 39235
loss: 0.9870370626449585,grad_norm: 0.9999992782607371, iteration: 39236
loss: 1.0568125247955322,grad_norm: 0.9999994985387024, iteration: 39237
loss: 1.048743486404419,grad_norm: 0.9999999223623642, iteration: 39238
loss: 1.0112766027450562,grad_norm: 0.9999990835132218, iteration: 39239
loss: 0.9960163831710815,grad_norm: 0.9999998399273089, iteration: 39240
loss: 1.0050021409988403,grad_norm: 0.99999911430731, iteration: 39241
loss: 1.0022404193878174,grad_norm: 0.9999992116479244, iteration: 39242
loss: 0.9832445979118347,grad_norm: 0.999999240681706, iteration: 39243
loss: 1.003782033920288,grad_norm: 0.9661208180529972, iteration: 39244
loss: 0.9765746593475342,grad_norm: 0.8032675740987562, iteration: 39245
loss: 0.9858814477920532,grad_norm: 0.8434413431731066, iteration: 39246
loss: 1.0295027494430542,grad_norm: 0.8548662651572165, iteration: 39247
loss: 0.9926732778549194,grad_norm: 0.8113464928943611, iteration: 39248
loss: 1.0241875648498535,grad_norm: 0.8735571239044803, iteration: 39249
loss: 1.0044748783111572,grad_norm: 0.9999989970135265, iteration: 39250
loss: 1.0137611627578735,grad_norm: 0.887157352356992, iteration: 39251
loss: 1.0134292840957642,grad_norm: 0.999999061067798, iteration: 39252
loss: 0.9770365357398987,grad_norm: 0.9519965404169176, iteration: 39253
loss: 0.9926840662956238,grad_norm: 0.8546375420025867, iteration: 39254
loss: 0.984994649887085,grad_norm: 0.8724664849809655, iteration: 39255
loss: 0.9775760173797607,grad_norm: 0.9266241019851563, iteration: 39256
loss: 1.04082190990448,grad_norm: 0.9999992806877408, iteration: 39257
loss: 0.9692230820655823,grad_norm: 0.9999993013037891, iteration: 39258
loss: 1.105643630027771,grad_norm: 0.999999623855316, iteration: 39259
loss: 1.0120017528533936,grad_norm: 0.7682805216196422, iteration: 39260
loss: 0.9729923009872437,grad_norm: 0.9999993209844859, iteration: 39261
loss: 1.0076252222061157,grad_norm: 0.999999432725643, iteration: 39262
loss: 1.0088006258010864,grad_norm: 0.9147203511178396, iteration: 39263
loss: 1.0560606718063354,grad_norm: 0.9999994662518432, iteration: 39264
loss: 0.9824991822242737,grad_norm: 0.9861709381442751, iteration: 39265
loss: 1.0091674327850342,grad_norm: 0.9681795709840306, iteration: 39266
loss: 0.9913126230239868,grad_norm: 0.8842237223044176, iteration: 39267
loss: 0.9561154842376709,grad_norm: 0.9363372272411793, iteration: 39268
loss: 1.009124994277954,grad_norm: 0.9989698729950566, iteration: 39269
loss: 1.0530765056610107,grad_norm: 0.9999990674876499, iteration: 39270
loss: 0.9920498132705688,grad_norm: 0.9999990978657703, iteration: 39271
loss: 1.0460392236709595,grad_norm: 0.9999991342108454, iteration: 39272
loss: 1.001022458076477,grad_norm: 0.8301936578115201, iteration: 39273
loss: 1.0084941387176514,grad_norm: 0.8319901373984103, iteration: 39274
loss: 1.0366911888122559,grad_norm: 0.999999031792723, iteration: 39275
loss: 1.0269526243209839,grad_norm: 0.9294453026963083, iteration: 39276
loss: 0.9922603368759155,grad_norm: 0.7596932465965216, iteration: 39277
loss: 0.9984576106071472,grad_norm: 0.9999992643622435, iteration: 39278
loss: 1.030177354812622,grad_norm: 0.9999992728345123, iteration: 39279
loss: 0.9894713759422302,grad_norm: 0.9699215909357972, iteration: 39280
loss: 1.013916015625,grad_norm: 0.9999989916484686, iteration: 39281
loss: 0.9689728617668152,grad_norm: 0.9999990385043936, iteration: 39282
loss: 0.9870197176933289,grad_norm: 0.8004546195678706, iteration: 39283
loss: 1.0100209712982178,grad_norm: 0.9999992423847051, iteration: 39284
loss: 1.0174680948257446,grad_norm: 0.8162313171757563, iteration: 39285
loss: 0.9826422333717346,grad_norm: 0.9999990986368053, iteration: 39286
loss: 1.0452206134796143,grad_norm: 0.99999925419697, iteration: 39287
loss: 0.9850579500198364,grad_norm: 0.9094653834030548, iteration: 39288
loss: 0.9577926993370056,grad_norm: 0.9851083034486444, iteration: 39289
loss: 1.011343240737915,grad_norm: 0.9752616937881621, iteration: 39290
loss: 0.9808029532432556,grad_norm: 0.9999992061882828, iteration: 39291
loss: 0.9611091017723083,grad_norm: 0.9999993311429131, iteration: 39292
loss: 0.9905419945716858,grad_norm: 0.9677928342644613, iteration: 39293
loss: 1.0127290487289429,grad_norm: 0.981397693764745, iteration: 39294
loss: 1.0046268701553345,grad_norm: 0.7909690090605292, iteration: 39295
loss: 0.9690033197402954,grad_norm: 0.999999075224733, iteration: 39296
loss: 0.9800440073013306,grad_norm: 0.8845138686523888, iteration: 39297
loss: 1.0018277168273926,grad_norm: 0.9673721748415525, iteration: 39298
loss: 1.022247076034546,grad_norm: 0.9999995122767247, iteration: 39299
loss: 1.0008430480957031,grad_norm: 0.9014548044036917, iteration: 39300
loss: 1.0651582479476929,grad_norm: 0.9999990241334734, iteration: 39301
loss: 0.9784942269325256,grad_norm: 0.8622251850794932, iteration: 39302
loss: 1.0316468477249146,grad_norm: 0.9999992153633759, iteration: 39303
loss: 1.0080299377441406,grad_norm: 0.9999995500451218, iteration: 39304
loss: 1.048952341079712,grad_norm: 0.7657033889211252, iteration: 39305
loss: 0.9853247404098511,grad_norm: 0.9999997384295285, iteration: 39306
loss: 0.9837438464164734,grad_norm: 0.9794097742267335, iteration: 39307
loss: 0.969817578792572,grad_norm: 0.9856985622205559, iteration: 39308
loss: 1.012289047241211,grad_norm: 0.9999990334174156, iteration: 39309
loss: 1.011641025543213,grad_norm: 0.8658715660332259, iteration: 39310
loss: 1.0059850215911865,grad_norm: 0.9695459351366975, iteration: 39311
loss: 0.9813200235366821,grad_norm: 0.9562761135014226, iteration: 39312
loss: 1.0278077125549316,grad_norm: 0.9999990152796664, iteration: 39313
loss: 1.029927372932434,grad_norm: 0.9999991945205736, iteration: 39314
loss: 0.9844008088111877,grad_norm: 0.9691854711802047, iteration: 39315
loss: 0.9961674213409424,grad_norm: 0.925728499470149, iteration: 39316
loss: 1.011376976966858,grad_norm: 0.9528490011748761, iteration: 39317
loss: 1.0052562952041626,grad_norm: 0.9999993441407665, iteration: 39318
loss: 1.0344785451889038,grad_norm: 0.8476234911610441, iteration: 39319
loss: 1.170940637588501,grad_norm: 0.9999999096075384, iteration: 39320
loss: 1.0096977949142456,grad_norm: 0.8946920781662442, iteration: 39321
loss: 1.0036925077438354,grad_norm: 0.9312202306423025, iteration: 39322
loss: 1.0037976503372192,grad_norm: 0.9999990968668782, iteration: 39323
loss: 1.1895893812179565,grad_norm: 0.999999398016428, iteration: 39324
loss: 1.650059461593628,grad_norm: 0.9999993787102173, iteration: 39325
loss: 1.006334900856018,grad_norm: 0.9477085511883939, iteration: 39326
loss: 1.3114056587219238,grad_norm: 0.9999996151347569, iteration: 39327
loss: 1.0104827880859375,grad_norm: 0.9999991122704847, iteration: 39328
loss: 1.0423892736434937,grad_norm: 0.9535633918114259, iteration: 39329
loss: 1.0455456972122192,grad_norm: 1.0000000386295531, iteration: 39330
loss: 1.153889536857605,grad_norm: 0.9999996099150665, iteration: 39331
loss: 1.2166337966918945,grad_norm: 0.9999992115006355, iteration: 39332
loss: 1.1811920404434204,grad_norm: 0.9999993002970111, iteration: 39333
loss: 1.0238994359970093,grad_norm: 0.9999994787833857, iteration: 39334
loss: 1.0805943012237549,grad_norm: 0.9999994108929031, iteration: 39335
loss: 1.0393143892288208,grad_norm: 0.9999992124314606, iteration: 39336
loss: 1.0620455741882324,grad_norm: 0.9999996292608637, iteration: 39337
loss: 1.0098460912704468,grad_norm: 0.9999992181862859, iteration: 39338
loss: 1.0177794694900513,grad_norm: 0.9174702831031742, iteration: 39339
loss: 1.0431723594665527,grad_norm: 0.9999990175906985, iteration: 39340
loss: 1.0470077991485596,grad_norm: 0.9999994502882311, iteration: 39341
loss: 0.9996911883354187,grad_norm: 0.9634675679119086, iteration: 39342
loss: 0.9978304505348206,grad_norm: 0.951753743590219, iteration: 39343
loss: 1.0130031108856201,grad_norm: 0.9726026182293513, iteration: 39344
loss: 1.0245376825332642,grad_norm: 0.9999996469705418, iteration: 39345
loss: 0.9991870522499084,grad_norm: 0.8963555682976373, iteration: 39346
loss: 1.0094797611236572,grad_norm: 0.7413975397916898, iteration: 39347
loss: 1.0362943410873413,grad_norm: 0.9967318350359703, iteration: 39348
loss: 0.98773592710495,grad_norm: 0.9400504440863993, iteration: 39349
loss: 1.030788540840149,grad_norm: 0.9999991619787075, iteration: 39350
loss: 1.0085381269454956,grad_norm: 0.8596346962426981, iteration: 39351
loss: 1.084319829940796,grad_norm: 0.9999996787544511, iteration: 39352
loss: 1.0390409231185913,grad_norm: 0.9999995944801287, iteration: 39353
loss: 0.9907522201538086,grad_norm: 0.9999991537871654, iteration: 39354
loss: 0.9884651899337769,grad_norm: 0.9999990404697124, iteration: 39355
loss: 1.0151132345199585,grad_norm: 0.9765498840115069, iteration: 39356
loss: 1.0054576396942139,grad_norm: 0.9999991529117813, iteration: 39357
loss: 1.045829176902771,grad_norm: 0.9999999071378913, iteration: 39358
loss: 0.9940400719642639,grad_norm: 0.9859421334392486, iteration: 39359
loss: 1.0824588537216187,grad_norm: 0.9604543965191188, iteration: 39360
loss: 1.0678088665008545,grad_norm: 0.9999992513130783, iteration: 39361
loss: 1.063638687133789,grad_norm: 0.9999992029572856, iteration: 39362
loss: 1.0271950960159302,grad_norm: 0.8455273005342773, iteration: 39363
loss: 1.0201146602630615,grad_norm: 0.9999990777507719, iteration: 39364
loss: 1.0084713697433472,grad_norm: 0.9999993218215946, iteration: 39365
loss: 0.9942675828933716,grad_norm: 0.8682772880912445, iteration: 39366
loss: 1.002553105354309,grad_norm: 0.8996551424089797, iteration: 39367
loss: 1.0504087209701538,grad_norm: 0.9999993557803857, iteration: 39368
loss: 1.0811924934387207,grad_norm: 0.9538356626359424, iteration: 39369
loss: 0.9876006245613098,grad_norm: 0.821350825070301, iteration: 39370
loss: 1.0143883228302002,grad_norm: 0.8471674079439209, iteration: 39371
loss: 1.0559501647949219,grad_norm: 0.8588397012018768, iteration: 39372
loss: 0.997032880783081,grad_norm: 0.9999991679775693, iteration: 39373
loss: 1.0619938373565674,grad_norm: 0.9999994653834637, iteration: 39374
loss: 0.9846327900886536,grad_norm: 0.9901819090708007, iteration: 39375
loss: 1.019411563873291,grad_norm: 0.9999991761545254, iteration: 39376
loss: 1.0048036575317383,grad_norm: 0.999999393551251, iteration: 39377
loss: 1.0001226663589478,grad_norm: 0.9999996736265582, iteration: 39378
loss: 1.0349820852279663,grad_norm: 0.9999992897905251, iteration: 39379
loss: 1.0185130834579468,grad_norm: 0.8850182077188337, iteration: 39380
loss: 1.0062124729156494,grad_norm: 0.9999990247013765, iteration: 39381
loss: 0.9960330724716187,grad_norm: 0.9999990742532867, iteration: 39382
loss: 1.0019253492355347,grad_norm: 0.9999990422193235, iteration: 39383
loss: 1.0285989046096802,grad_norm: 0.8515626259109951, iteration: 39384
loss: 0.9896416664123535,grad_norm: 0.8736276881248486, iteration: 39385
loss: 0.990691602230072,grad_norm: 0.9999991398565132, iteration: 39386
loss: 1.0783002376556396,grad_norm: 0.9999992456458764, iteration: 39387
loss: 0.9686514735221863,grad_norm: 0.9889992894431967, iteration: 39388
loss: 1.0089577436447144,grad_norm: 0.8695043594443718, iteration: 39389
loss: 0.9967162609100342,grad_norm: 0.8860109834552643, iteration: 39390
loss: 0.9790657758712769,grad_norm: 0.9999990827626348, iteration: 39391
loss: 0.9782657027244568,grad_norm: 0.9999990571334924, iteration: 39392
loss: 1.0445177555084229,grad_norm: 0.9999991517908493, iteration: 39393
loss: 1.0324976444244385,grad_norm: 0.9999994245328904, iteration: 39394
loss: 0.9495559334754944,grad_norm: 0.9309387945899228, iteration: 39395
loss: 1.0404767990112305,grad_norm: 0.9999990550812788, iteration: 39396
loss: 0.9936171770095825,grad_norm: 0.9999992864956654, iteration: 39397
loss: 1.0016645193099976,grad_norm: 0.9999990206689174, iteration: 39398
loss: 0.9719011783599854,grad_norm: 0.9999992910619929, iteration: 39399
loss: 0.9976086020469666,grad_norm: 0.9287608473010521, iteration: 39400
loss: 1.0360876321792603,grad_norm: 0.9999993993207996, iteration: 39401
loss: 0.9761261343955994,grad_norm: 0.7290474007586649, iteration: 39402
loss: 0.9998601675033569,grad_norm: 0.9999992321269102, iteration: 39403
loss: 0.9903519153594971,grad_norm: 0.9999995874728388, iteration: 39404
loss: 0.9912335872650146,grad_norm: 0.9477465933115506, iteration: 39405
loss: 0.9843649864196777,grad_norm: 0.9999992780163683, iteration: 39406
loss: 0.997288703918457,grad_norm: 0.9999989863930294, iteration: 39407
loss: 1.003769040107727,grad_norm: 0.9097965420781609, iteration: 39408
loss: 0.9987369179725647,grad_norm: 0.9337437877009569, iteration: 39409
loss: 1.0291939973831177,grad_norm: 0.999999181402433, iteration: 39410
loss: 0.9931700229644775,grad_norm: 0.8508655765474694, iteration: 39411
loss: 1.0081392526626587,grad_norm: 0.8387551532960708, iteration: 39412
loss: 0.9614593982696533,grad_norm: 0.9999990604740807, iteration: 39413
loss: 1.0033010244369507,grad_norm: 0.9999990507832873, iteration: 39414
loss: 0.987060010433197,grad_norm: 0.9999990750327932, iteration: 39415
loss: 1.0313609838485718,grad_norm: 0.8834363976698058, iteration: 39416
loss: 0.9949304461479187,grad_norm: 0.7735029259864218, iteration: 39417
loss: 0.9842402338981628,grad_norm: 0.9999990491496326, iteration: 39418
loss: 0.9799327254295349,grad_norm: 0.9999990742420487, iteration: 39419
loss: 1.026483416557312,grad_norm: 0.9999997899355455, iteration: 39420
loss: 1.0229099988937378,grad_norm: 0.9999990759249046, iteration: 39421
loss: 1.0551670789718628,grad_norm: 0.9999999075150547, iteration: 39422
loss: 1.0068273544311523,grad_norm: 0.8728259822147244, iteration: 39423
loss: 1.005495548248291,grad_norm: 0.9999991306155119, iteration: 39424
loss: 1.0902230739593506,grad_norm: 0.9999997018899563, iteration: 39425
loss: 0.9984691739082336,grad_norm: 0.8531514840424509, iteration: 39426
loss: 1.0097049474716187,grad_norm: 0.9431203414323315, iteration: 39427
loss: 1.0010193586349487,grad_norm: 0.9659876126076739, iteration: 39428
loss: 1.0545692443847656,grad_norm: 0.9914556498909163, iteration: 39429
loss: 0.9621206521987915,grad_norm: 0.9999995134087903, iteration: 39430
loss: 1.01298189163208,grad_norm: 0.9999990583724546, iteration: 39431
loss: 1.0014787912368774,grad_norm: 0.9623411715150958, iteration: 39432
loss: 0.9817711710929871,grad_norm: 0.9999990030803375, iteration: 39433
loss: 0.9965379238128662,grad_norm: 0.7654268647148784, iteration: 39434
loss: 1.0086709260940552,grad_norm: 0.9829931912830251, iteration: 39435
loss: 0.9810367822647095,grad_norm: 0.8330501279703945, iteration: 39436
loss: 1.0141860246658325,grad_norm: 0.798664027396793, iteration: 39437
loss: 0.9930985569953918,grad_norm: 0.9999991090903437, iteration: 39438
loss: 0.9797163605690002,grad_norm: 0.8354290056108503, iteration: 39439
loss: 0.9880370497703552,grad_norm: 0.9999990890659951, iteration: 39440
loss: 0.9963730573654175,grad_norm: 0.8653587095612909, iteration: 39441
loss: 1.000653624534607,grad_norm: 0.7692755736953332, iteration: 39442
loss: 1.0196932554244995,grad_norm: 0.8491976557657833, iteration: 39443
loss: 0.9842996001243591,grad_norm: 0.8481971480612585, iteration: 39444
loss: 1.0165951251983643,grad_norm: 0.9467048759400377, iteration: 39445
loss: 1.0240792036056519,grad_norm: 0.9915015004510528, iteration: 39446
loss: 1.0275413990020752,grad_norm: 0.8253528732331217, iteration: 39447
loss: 1.0263429880142212,grad_norm: 0.9999991466813549, iteration: 39448
loss: 0.9986038208007812,grad_norm: 0.9999991895684253, iteration: 39449
loss: 1.0037020444869995,grad_norm: 0.8289348806099461, iteration: 39450
loss: 1.0485036373138428,grad_norm: 0.9999996976427401, iteration: 39451
loss: 1.0398036241531372,grad_norm: 0.9029947769896272, iteration: 39452
loss: 0.9916172027587891,grad_norm: 0.9271983971769345, iteration: 39453
loss: 1.0062223672866821,grad_norm: 0.9999990838169339, iteration: 39454
loss: 1.0191128253936768,grad_norm: 0.9872721220404113, iteration: 39455
loss: 0.9701594114303589,grad_norm: 0.8994296059021089, iteration: 39456
loss: 1.0457884073257446,grad_norm: 0.9670251945797913, iteration: 39457
loss: 1.0180799961090088,grad_norm: 0.9999994584973494, iteration: 39458
loss: 1.0444865226745605,grad_norm: 0.9999996250836878, iteration: 39459
loss: 0.9851386547088623,grad_norm: 0.8039252439734246, iteration: 39460
loss: 0.9916408658027649,grad_norm: 0.9999990633289663, iteration: 39461
loss: 1.0825202465057373,grad_norm: 0.8214767084857709, iteration: 39462
loss: 1.017926573753357,grad_norm: 0.999999296393895, iteration: 39463
loss: 1.04148268699646,grad_norm: 0.9999991720163789, iteration: 39464
loss: 1.010505199432373,grad_norm: 0.8902934387104371, iteration: 39465
loss: 0.9901729226112366,grad_norm: 0.9999991259021325, iteration: 39466
loss: 1.0186302661895752,grad_norm: 0.9999991454115823, iteration: 39467
loss: 1.0064918994903564,grad_norm: 0.9999990955509187, iteration: 39468
loss: 1.0231155157089233,grad_norm: 0.95217582790698, iteration: 39469
loss: 0.9733692407608032,grad_norm: 0.9999992645874405, iteration: 39470
loss: 0.9609668254852295,grad_norm: 0.8937260137182693, iteration: 39471
loss: 1.0151671171188354,grad_norm: 0.8706542804737092, iteration: 39472
loss: 1.00347900390625,grad_norm: 0.9999995687446386, iteration: 39473
loss: 1.0126432180404663,grad_norm: 0.8695202808484568, iteration: 39474
loss: 1.0303963422775269,grad_norm: 0.9363861266170781, iteration: 39475
loss: 1.0491622686386108,grad_norm: 0.8701817477569315, iteration: 39476
loss: 1.0325268507003784,grad_norm: 0.8962390016776745, iteration: 39477
loss: 0.9967159032821655,grad_norm: 0.9194387674374994, iteration: 39478
loss: 1.00821053981781,grad_norm: 0.9851951163680668, iteration: 39479
loss: 1.001792311668396,grad_norm: 0.9117694082722146, iteration: 39480
loss: 0.9948642253875732,grad_norm: 0.7270701463375305, iteration: 39481
loss: 0.9837259650230408,grad_norm: 0.953060688237967, iteration: 39482
loss: 0.9965497255325317,grad_norm: 0.9999992102275503, iteration: 39483
loss: 1.0527675151824951,grad_norm: 0.9999993246560002, iteration: 39484
loss: 1.0050355195999146,grad_norm: 0.8525242431645447, iteration: 39485
loss: 0.9875008463859558,grad_norm: 0.8260588767057857, iteration: 39486
loss: 0.9908339977264404,grad_norm: 0.9999989979863331, iteration: 39487
loss: 1.0290707349777222,grad_norm: 0.9999990296886265, iteration: 39488
loss: 1.0237454175949097,grad_norm: 0.8326018446351554, iteration: 39489
loss: 1.0197877883911133,grad_norm: 0.9999992834338292, iteration: 39490
loss: 0.9950699210166931,grad_norm: 0.9313161743342566, iteration: 39491
loss: 1.053898572921753,grad_norm: 0.9999990619549629, iteration: 39492
loss: 0.9774078130722046,grad_norm: 0.8553822622505786, iteration: 39493
loss: 0.9860303401947021,grad_norm: 0.9930236981594046, iteration: 39494
loss: 0.9951671361923218,grad_norm: 0.9999991896497752, iteration: 39495
loss: 0.9650568962097168,grad_norm: 0.9999989728821685, iteration: 39496
loss: 1.0310566425323486,grad_norm: 0.9999989634325427, iteration: 39497
loss: 1.0120927095413208,grad_norm: 0.9498989679298532, iteration: 39498
loss: 1.1152666807174683,grad_norm: 0.9999991968415788, iteration: 39499
loss: 0.9723658561706543,grad_norm: 0.9999996662622117, iteration: 39500
loss: 0.994287371635437,grad_norm: 0.9440423775022719, iteration: 39501
loss: 1.0203114748001099,grad_norm: 0.8652618642833778, iteration: 39502
loss: 1.0124918222427368,grad_norm: 0.9999992257367094, iteration: 39503
loss: 1.0006537437438965,grad_norm: 0.9885215221486919, iteration: 39504
loss: 1.0079433917999268,grad_norm: 0.9426248156966212, iteration: 39505
loss: 0.9665088057518005,grad_norm: 0.9999990246739008, iteration: 39506
loss: 0.9775983095169067,grad_norm: 0.8593131792390082, iteration: 39507
loss: 1.002304196357727,grad_norm: 0.9903056460127975, iteration: 39508
loss: 0.9967669248580933,grad_norm: 0.9082414077382249, iteration: 39509
loss: 0.9776749014854431,grad_norm: 0.9765013213498, iteration: 39510
loss: 1.0051510334014893,grad_norm: 0.999999242661874, iteration: 39511
loss: 0.9769909977912903,grad_norm: 0.9508443133202454, iteration: 39512
loss: 0.9930380582809448,grad_norm: 0.9272057897106257, iteration: 39513
loss: 1.024592399597168,grad_norm: 0.8558213462852793, iteration: 39514
loss: 1.0174809694290161,grad_norm: 0.8294978925467317, iteration: 39515
loss: 0.9831792116165161,grad_norm: 0.9999995040319937, iteration: 39516
loss: 1.0041931867599487,grad_norm: 0.8722123086707, iteration: 39517
loss: 1.0112669467926025,grad_norm: 0.999999248425983, iteration: 39518
loss: 0.989910364151001,grad_norm: 0.7751551954891286, iteration: 39519
loss: 0.9737822413444519,grad_norm: 0.793201476688545, iteration: 39520
loss: 0.9977440237998962,grad_norm: 0.9156677288569907, iteration: 39521
loss: 1.0025192499160767,grad_norm: 0.8127026134945836, iteration: 39522
loss: 1.0099161863327026,grad_norm: 0.9666325308163695, iteration: 39523
loss: 0.9886478781700134,grad_norm: 0.912915411381205, iteration: 39524
loss: 0.9640328288078308,grad_norm: 0.9364148291446354, iteration: 39525
loss: 1.0218188762664795,grad_norm: 0.7290083671042777, iteration: 39526
loss: 1.0018292665481567,grad_norm: 0.9440965293556643, iteration: 39527
loss: 1.0039795637130737,grad_norm: 0.9144497124730434, iteration: 39528
loss: 1.0475752353668213,grad_norm: 0.9999991420049702, iteration: 39529
loss: 1.0181269645690918,grad_norm: 0.9999990066175384, iteration: 39530
loss: 1.011795163154602,grad_norm: 0.938846861256864, iteration: 39531
loss: 1.0074492692947388,grad_norm: 0.8373400476985041, iteration: 39532
loss: 0.9692045450210571,grad_norm: 0.9999992575581041, iteration: 39533
loss: 1.0140854120254517,grad_norm: 0.9766618170282545, iteration: 39534
loss: 0.9432139992713928,grad_norm: 0.9999992092207614, iteration: 39535
loss: 1.001118779182434,grad_norm: 0.8011737101978255, iteration: 39536
loss: 1.0228044986724854,grad_norm: 0.9469222597049997, iteration: 39537
loss: 1.0087382793426514,grad_norm: 0.999999065136256, iteration: 39538
loss: 1.034157156944275,grad_norm: 0.9633428088478702, iteration: 39539
loss: 1.0118331909179688,grad_norm: 0.8454511364460827, iteration: 39540
loss: 0.9789933562278748,grad_norm: 0.9636007554387844, iteration: 39541
loss: 1.0058850049972534,grad_norm: 0.8065546969220383, iteration: 39542
loss: 1.0085384845733643,grad_norm: 0.9477751258074728, iteration: 39543
loss: 1.0609281063079834,grad_norm: 0.9999992264339764, iteration: 39544
loss: 1.0103073120117188,grad_norm: 0.8324508519911854, iteration: 39545
loss: 1.0167326927185059,grad_norm: 0.8491575330987621, iteration: 39546
loss: 1.061057448387146,grad_norm: 0.9999990862617428, iteration: 39547
loss: 0.9618397355079651,grad_norm: 0.8286628490304334, iteration: 39548
loss: 1.0557849407196045,grad_norm: 0.9451643363916222, iteration: 39549
loss: 0.9779348969459534,grad_norm: 0.9379689664937757, iteration: 39550
loss: 0.9821170568466187,grad_norm: 0.8685475246677714, iteration: 39551
loss: 0.9962518215179443,grad_norm: 0.8017924777226222, iteration: 39552
loss: 1.0105464458465576,grad_norm: 0.8773092949104558, iteration: 39553
loss: 1.0135422945022583,grad_norm: 0.9999992929709638, iteration: 39554
loss: 0.95735764503479,grad_norm: 0.9911565453922707, iteration: 39555
loss: 1.0362790822982788,grad_norm: 0.9999992613248364, iteration: 39556
loss: 1.0169048309326172,grad_norm: 0.7634499273100471, iteration: 39557
loss: 0.9876394867897034,grad_norm: 0.9489686959086716, iteration: 39558
loss: 1.049731731414795,grad_norm: 0.9999991684617324, iteration: 39559
loss: 1.0161789655685425,grad_norm: 0.7978378139860731, iteration: 39560
loss: 0.9940676093101501,grad_norm: 0.9999992878261176, iteration: 39561
loss: 1.0562279224395752,grad_norm: 0.9999999089195131, iteration: 39562
loss: 1.0077645778656006,grad_norm: 0.7484713869340628, iteration: 39563
loss: 1.0254429578781128,grad_norm: 0.8773562092832113, iteration: 39564
loss: 0.9975319504737854,grad_norm: 0.9660519242092866, iteration: 39565
loss: 1.0327926874160767,grad_norm: 0.9819811911945497, iteration: 39566
loss: 0.9900819659233093,grad_norm: 0.9999989445832036, iteration: 39567
loss: 0.9959643483161926,grad_norm: 0.9487680913292041, iteration: 39568
loss: 1.0023099184036255,grad_norm: 0.8774748814782045, iteration: 39569
loss: 1.033608317375183,grad_norm: 0.9509735029302219, iteration: 39570
loss: 0.9919872283935547,grad_norm: 0.9449114457817697, iteration: 39571
loss: 0.9682566523551941,grad_norm: 0.9603126785120163, iteration: 39572
loss: 0.9954707622528076,grad_norm: 0.8729307756603109, iteration: 39573
loss: 0.9970571398735046,grad_norm: 0.9999991777679356, iteration: 39574
loss: 0.9710706472396851,grad_norm: 0.999999283489595, iteration: 39575
loss: 1.0192625522613525,grad_norm: 0.8485905803024298, iteration: 39576
loss: 0.9996924996376038,grad_norm: 0.8924500676915263, iteration: 39577
loss: 1.0316205024719238,grad_norm: 0.9730029080222719, iteration: 39578
loss: 1.0272701978683472,grad_norm: 0.8552959367562556, iteration: 39579
loss: 0.9711863994598389,grad_norm: 0.903369112630991, iteration: 39580
loss: 1.0184812545776367,grad_norm: 0.9999995729635749, iteration: 39581
loss: 0.9737325310707092,grad_norm: 0.8032060601759039, iteration: 39582
loss: 0.9810287356376648,grad_norm: 0.9082214209996881, iteration: 39583
loss: 0.9882723689079285,grad_norm: 0.9999991137103855, iteration: 39584
loss: 1.0315252542495728,grad_norm: 0.9999990941858249, iteration: 39585
loss: 1.0122122764587402,grad_norm: 0.8686899388454248, iteration: 39586
loss: 0.9992555975914001,grad_norm: 0.9351273318909813, iteration: 39587
loss: 1.0212539434432983,grad_norm: 0.9999992316280266, iteration: 39588
loss: 1.0533933639526367,grad_norm: 0.9555024378637462, iteration: 39589
loss: 1.0596647262573242,grad_norm: 0.9999991970620191, iteration: 39590
loss: 0.9673539996147156,grad_norm: 0.9057928894665193, iteration: 39591
loss: 1.063787579536438,grad_norm: 0.9999994664939174, iteration: 39592
loss: 1.0287879705429077,grad_norm: 0.9999990698592927, iteration: 39593
loss: 1.00358247756958,grad_norm: 0.9999995347168237, iteration: 39594
loss: 1.0772658586502075,grad_norm: 0.9999994252099695, iteration: 39595
loss: 1.0234507322311401,grad_norm: 0.9999991674439231, iteration: 39596
loss: 0.997735321521759,grad_norm: 0.9960919590355324, iteration: 39597
loss: 1.0075547695159912,grad_norm: 0.9999992356243089, iteration: 39598
loss: 0.9829245209693909,grad_norm: 0.973051051950426, iteration: 39599
loss: 1.0122874975204468,grad_norm: 0.9999994995764634, iteration: 39600
loss: 0.9750953316688538,grad_norm: 0.9605771420062278, iteration: 39601
loss: 1.0270427465438843,grad_norm: 0.8080912449998591, iteration: 39602
loss: 0.9741418361663818,grad_norm: 0.8614433132836075, iteration: 39603
loss: 0.9765270948410034,grad_norm: 0.99999915666735, iteration: 39604
loss: 1.013499140739441,grad_norm: 0.9999990841087586, iteration: 39605
loss: 1.0108065605163574,grad_norm: 0.9999991448481198, iteration: 39606
loss: 1.0151546001434326,grad_norm: 0.9999990585701755, iteration: 39607
loss: 0.9959926605224609,grad_norm: 0.9999990663602792, iteration: 39608
loss: 0.9773085713386536,grad_norm: 0.8355379182705487, iteration: 39609
loss: 0.9848719835281372,grad_norm: 0.9999990153307795, iteration: 39610
loss: 0.9955164790153503,grad_norm: 0.9593039385370837, iteration: 39611
loss: 0.9958505630493164,grad_norm: 0.9999990129573586, iteration: 39612
loss: 0.9990315437316895,grad_norm: 0.8932865217215034, iteration: 39613
loss: 1.0003454685211182,grad_norm: 0.9615539416823607, iteration: 39614
loss: 0.9820111393928528,grad_norm: 0.9036266432341659, iteration: 39615
loss: 1.0119333267211914,grad_norm: 0.8881903520109623, iteration: 39616
loss: 0.9914728403091431,grad_norm: 0.9274256459590849, iteration: 39617
loss: 1.0284839868545532,grad_norm: 0.9999991268018391, iteration: 39618
loss: 1.0054121017456055,grad_norm: 0.8605221281762686, iteration: 39619
loss: 1.0163211822509766,grad_norm: 0.9619575647534723, iteration: 39620
loss: 1.0143976211547852,grad_norm: 0.9999991666534204, iteration: 39621
loss: 1.010783314704895,grad_norm: 0.9498080656859249, iteration: 39622
loss: 1.011277198791504,grad_norm: 0.9615946141764058, iteration: 39623
loss: 1.0316030979156494,grad_norm: 0.9999991857119, iteration: 39624
loss: 1.0469954013824463,grad_norm: 0.788063359179988, iteration: 39625
loss: 0.9896469116210938,grad_norm: 0.9330316408326538, iteration: 39626
loss: 0.9844370484352112,grad_norm: 0.8699153660044329, iteration: 39627
loss: 1.0214768648147583,grad_norm: 0.8211438164576202, iteration: 39628
loss: 0.9616367816925049,grad_norm: 0.8667337374649852, iteration: 39629
loss: 1.0081541538238525,grad_norm: 0.8697447555501288, iteration: 39630
loss: 0.9890924096107483,grad_norm: 0.9481324956286797, iteration: 39631
loss: 1.0376543998718262,grad_norm: 0.7782919814759551, iteration: 39632
loss: 0.9813945889472961,grad_norm: 0.9176501915745694, iteration: 39633
loss: 0.9873071908950806,grad_norm: 0.8365738068563795, iteration: 39634
loss: 1.0114877223968506,grad_norm: 0.9999990322194178, iteration: 39635
loss: 1.0350456237792969,grad_norm: 0.9999992109058156, iteration: 39636
loss: 0.967307448387146,grad_norm: 0.8876498804222063, iteration: 39637
loss: 1.0143871307373047,grad_norm: 0.9999990188421966, iteration: 39638
loss: 0.9820976853370667,grad_norm: 0.9999990841282115, iteration: 39639
loss: 1.0173346996307373,grad_norm: 0.9852915038191093, iteration: 39640
loss: 1.0543646812438965,grad_norm: 0.7611092850951692, iteration: 39641
loss: 0.9801993370056152,grad_norm: 0.9999992176606487, iteration: 39642
loss: 1.0390223264694214,grad_norm: 0.9889848001135878, iteration: 39643
loss: 0.9692116379737854,grad_norm: 0.9076697454385257, iteration: 39644
loss: 1.0078985691070557,grad_norm: 0.9785726123212161, iteration: 39645
loss: 1.0138013362884521,grad_norm: 0.9673136790046853, iteration: 39646
loss: 1.0044256448745728,grad_norm: 0.8845671473252229, iteration: 39647
loss: 1.0124993324279785,grad_norm: 0.7878735343683736, iteration: 39648
loss: 0.9814017415046692,grad_norm: 0.999999208234981, iteration: 39649
loss: 1.0104018449783325,grad_norm: 0.9999990752457439, iteration: 39650
loss: 0.983426570892334,grad_norm: 0.9713078067141923, iteration: 39651
loss: 1.0535798072814941,grad_norm: 0.8801233765610479, iteration: 39652
loss: 1.0059138536453247,grad_norm: 0.79640664793554, iteration: 39653
loss: 0.9974561929702759,grad_norm: 0.9549532275152575, iteration: 39654
loss: 0.9934887886047363,grad_norm: 0.7706201269449788, iteration: 39655
loss: 1.0314806699752808,grad_norm: 0.9999991553117089, iteration: 39656
loss: 1.0050119161605835,grad_norm: 0.999998962244884, iteration: 39657
loss: 1.0461690425872803,grad_norm: 0.9999990033478396, iteration: 39658
loss: 1.0150988101959229,grad_norm: 0.9245555719693623, iteration: 39659
loss: 1.0001543760299683,grad_norm: 0.9341311511430944, iteration: 39660
loss: 0.996294379234314,grad_norm: 0.969102696207084, iteration: 39661
loss: 1.136199951171875,grad_norm: 1.000000016564418, iteration: 39662
loss: 1.0102112293243408,grad_norm: 0.9999991691530097, iteration: 39663
loss: 1.014357328414917,grad_norm: 0.9388285023502677, iteration: 39664
loss: 0.9639090895652771,grad_norm: 0.8387716265704666, iteration: 39665
loss: 0.9829745888710022,grad_norm: 0.9999992485493044, iteration: 39666
loss: 0.9691868424415588,grad_norm: 0.9261958053718391, iteration: 39667
loss: 1.0035970211029053,grad_norm: 0.8414968688136298, iteration: 39668
loss: 0.9998579025268555,grad_norm: 0.9402352623268164, iteration: 39669
loss: 1.0525236129760742,grad_norm: 0.999999021692354, iteration: 39670
loss: 1.0082275867462158,grad_norm: 0.915421648096329, iteration: 39671
loss: 1.0149134397506714,grad_norm: 0.9100861244982252, iteration: 39672
loss: 1.0234334468841553,grad_norm: 0.8429050097489235, iteration: 39673
loss: 1.026741862297058,grad_norm: 0.9658919675984647, iteration: 39674
loss: 1.017694115638733,grad_norm: 0.9805879864255644, iteration: 39675
loss: 1.0199930667877197,grad_norm: 0.8120682766148758, iteration: 39676
loss: 0.9894216656684875,grad_norm: 0.8896407657587991, iteration: 39677
loss: 1.0520201921463013,grad_norm: 0.9999991465823009, iteration: 39678
loss: 0.9652793407440186,grad_norm: 0.8844525000946041, iteration: 39679
loss: 1.0093814134597778,grad_norm: 0.9999992350902239, iteration: 39680
loss: 1.0105432271957397,grad_norm: 0.8905959042890615, iteration: 39681
loss: 1.0045549869537354,grad_norm: 0.9999998879924421, iteration: 39682
loss: 0.9960241317749023,grad_norm: 0.9272545590617473, iteration: 39683
loss: 0.9762074947357178,grad_norm: 0.9999992536119291, iteration: 39684
loss: 1.0131919384002686,grad_norm: 0.9999993842808381, iteration: 39685
loss: 0.9810954332351685,grad_norm: 0.9999992161708363, iteration: 39686
loss: 0.9841093420982361,grad_norm: 0.922000094916623, iteration: 39687
loss: 1.0111364126205444,grad_norm: 0.9999992389996992, iteration: 39688
loss: 1.0517916679382324,grad_norm: 0.9999992271290492, iteration: 39689
loss: 0.9893542528152466,grad_norm: 0.9472200249694775, iteration: 39690
loss: 1.0829229354858398,grad_norm: 0.9999994144059974, iteration: 39691
loss: 0.9999446868896484,grad_norm: 0.8323496457644999, iteration: 39692
loss: 1.0191476345062256,grad_norm: 0.8833740298538775, iteration: 39693
loss: 1.010847806930542,grad_norm: 0.8517009857890878, iteration: 39694
loss: 1.0236390829086304,grad_norm: 0.8792461760276308, iteration: 39695
loss: 1.0047800540924072,grad_norm: 0.9204457489289546, iteration: 39696
loss: 0.9957950711250305,grad_norm: 0.9952395525655576, iteration: 39697
loss: 1.0266231298446655,grad_norm: 0.9999992116472693, iteration: 39698
loss: 0.99853515625,grad_norm: 0.9999992503169103, iteration: 39699
loss: 0.9933670163154602,grad_norm: 0.999999107641301, iteration: 39700
loss: 0.9989767670631409,grad_norm: 0.9999990593589168, iteration: 39701
loss: 0.977033257484436,grad_norm: 0.9240156945107628, iteration: 39702
loss: 1.0111645460128784,grad_norm: 0.9999990121664442, iteration: 39703
loss: 0.9825266003608704,grad_norm: 0.9999989670993814, iteration: 39704
loss: 0.972838282585144,grad_norm: 0.9999990419912477, iteration: 39705
loss: 1.0289186239242554,grad_norm: 0.9486811482448027, iteration: 39706
loss: 0.9731951951980591,grad_norm: 0.9299887678679728, iteration: 39707
loss: 1.015174388885498,grad_norm: 0.8644165636311779, iteration: 39708
loss: 0.9979169964790344,grad_norm: 0.9725130880575692, iteration: 39709
loss: 1.0318409204483032,grad_norm: 0.9999990618384766, iteration: 39710
loss: 0.9969661235809326,grad_norm: 0.8578004864055597, iteration: 39711
loss: 1.0288691520690918,grad_norm: 0.9286745183594984, iteration: 39712
loss: 1.0151420831680298,grad_norm: 0.9999990883601118, iteration: 39713
loss: 1.0284538269042969,grad_norm: 0.76675020259346, iteration: 39714
loss: 1.0072296857833862,grad_norm: 0.9981756139675414, iteration: 39715
loss: 0.9858178496360779,grad_norm: 0.9999993243904937, iteration: 39716
loss: 1.0408247709274292,grad_norm: 0.999999122712839, iteration: 39717
loss: 1.052716851234436,grad_norm: 0.9076142075175239, iteration: 39718
loss: 1.0316119194030762,grad_norm: 0.9847254092565076, iteration: 39719
loss: 1.0324631929397583,grad_norm: 0.8548842843049367, iteration: 39720
loss: 0.9962894916534424,grad_norm: 0.9009191017062268, iteration: 39721
loss: 1.0316441059112549,grad_norm: 0.8752165555559946, iteration: 39722
loss: 0.9910587668418884,grad_norm: 0.9999991126347852, iteration: 39723
loss: 1.0108470916748047,grad_norm: 0.9999991784491401, iteration: 39724
loss: 1.0093059539794922,grad_norm: 0.9999990527407608, iteration: 39725
loss: 1.0355509519577026,grad_norm: 0.9999990159177427, iteration: 39726
loss: 1.0205048322677612,grad_norm: 0.83642020359785, iteration: 39727
loss: 1.0029785633087158,grad_norm: 0.7880663474592231, iteration: 39728
loss: 0.9667521119117737,grad_norm: 0.8894804410144178, iteration: 39729
loss: 1.0218796730041504,grad_norm: 0.8196854371602429, iteration: 39730
loss: 0.9806728363037109,grad_norm: 0.970068040191015, iteration: 39731
loss: 1.0007977485656738,grad_norm: 0.9999991116607285, iteration: 39732
loss: 1.028454303741455,grad_norm: 0.9999990137786401, iteration: 39733
loss: 0.999915599822998,grad_norm: 0.999999147443845, iteration: 39734
loss: 0.9508991241455078,grad_norm: 0.8666686280937117, iteration: 39735
loss: 1.0145058631896973,grad_norm: 0.9959955992506848, iteration: 39736
loss: 0.9945262670516968,grad_norm: 0.8050478809602017, iteration: 39737
loss: 1.005009412765503,grad_norm: 0.9999990347301768, iteration: 39738
loss: 1.0073819160461426,grad_norm: 0.8772330981349625, iteration: 39739
loss: 0.9764106869697571,grad_norm: 0.9999991363744474, iteration: 39740
loss: 1.0028135776519775,grad_norm: 0.9999991758073509, iteration: 39741
loss: 1.0134375095367432,grad_norm: 0.7790991709557427, iteration: 39742
loss: 1.0060691833496094,grad_norm: 0.9999993987610275, iteration: 39743
loss: 0.9527295231819153,grad_norm: 0.9191525658927958, iteration: 39744
loss: 1.035869836807251,grad_norm: 0.8964569640535212, iteration: 39745
loss: 1.0118942260742188,grad_norm: 0.8701122221797113, iteration: 39746
loss: 1.0312762260437012,grad_norm: 0.8373991735141911, iteration: 39747
loss: 1.0244872570037842,grad_norm: 0.9999991251176098, iteration: 39748
loss: 0.9794402122497559,grad_norm: 0.8180900710453348, iteration: 39749
loss: 0.9762253761291504,grad_norm: 0.898023303677457, iteration: 39750
loss: 1.0107390880584717,grad_norm: 0.8717574815775329, iteration: 39751
loss: 1.0524934530258179,grad_norm: 0.9115686438971895, iteration: 39752
loss: 0.991405725479126,grad_norm: 0.9999990460207395, iteration: 39753
loss: 1.0058398246765137,grad_norm: 0.8166264838018958, iteration: 39754
loss: 1.0007586479187012,grad_norm: 0.9593476404545159, iteration: 39755
loss: 1.0620057582855225,grad_norm: 0.9999993351417682, iteration: 39756
loss: 1.0818731784820557,grad_norm: 0.98336893035359, iteration: 39757
loss: 0.9679125547409058,grad_norm: 0.9089673603814588, iteration: 39758
loss: 1.0568116903305054,grad_norm: 0.9648952452858452, iteration: 39759
loss: 1.0060678720474243,grad_norm: 0.9999992990629823, iteration: 39760
loss: 1.0318564176559448,grad_norm: 0.8342627795952143, iteration: 39761
loss: 1.0024631023406982,grad_norm: 0.9999990097018387, iteration: 39762
loss: 1.0048495531082153,grad_norm: 0.8814981350106396, iteration: 39763
loss: 0.9878025650978088,grad_norm: 0.8293829047997539, iteration: 39764
loss: 1.0244032144546509,grad_norm: 0.8637970159614577, iteration: 39765
loss: 0.9869397878646851,grad_norm: 0.9999997074874711, iteration: 39766
loss: 1.0109834671020508,grad_norm: 0.9076757134015971, iteration: 39767
loss: 1.0096911191940308,grad_norm: 0.9855126961585988, iteration: 39768
loss: 1.0202139616012573,grad_norm: 0.9999991912858638, iteration: 39769
loss: 0.9864700436592102,grad_norm: 0.882941850954635, iteration: 39770
loss: 0.9882575273513794,grad_norm: 0.9562421298489691, iteration: 39771
loss: 1.024598240852356,grad_norm: 0.9202396574215141, iteration: 39772
loss: 1.0071488618850708,grad_norm: 0.9999995603961227, iteration: 39773
loss: 1.0092648267745972,grad_norm: 0.9924376735603251, iteration: 39774
loss: 1.0136311054229736,grad_norm: 0.9999996917196161, iteration: 39775
loss: 1.068429946899414,grad_norm: 0.9999991504474209, iteration: 39776
loss: 0.9977031946182251,grad_norm: 0.9999989908957555, iteration: 39777
loss: 0.9873312711715698,grad_norm: 0.7946436871536252, iteration: 39778
loss: 1.060869574546814,grad_norm: 0.9999992466553658, iteration: 39779
loss: 0.9802889227867126,grad_norm: 0.9528011661843349, iteration: 39780
loss: 0.9806471467018127,grad_norm: 0.942928426399249, iteration: 39781
loss: 1.038760781288147,grad_norm: 0.9181795700960597, iteration: 39782
loss: 0.9854989647865295,grad_norm: 0.9999990249745666, iteration: 39783
loss: 0.9996592402458191,grad_norm: 0.8006691901423088, iteration: 39784
loss: 0.9924448132514954,grad_norm: 0.9999996496333756, iteration: 39785
loss: 0.9648674726486206,grad_norm: 0.821924909477249, iteration: 39786
loss: 0.9863879680633545,grad_norm: 0.9999994354642836, iteration: 39787
loss: 0.9749414324760437,grad_norm: 0.8441380403223967, iteration: 39788
loss: 0.9974994659423828,grad_norm: 0.9825601592012204, iteration: 39789
loss: 0.9780948758125305,grad_norm: 0.8406673954425812, iteration: 39790
loss: 0.9727697968482971,grad_norm: 0.999999079545737, iteration: 39791
loss: 1.0011558532714844,grad_norm: 0.9205092725287222, iteration: 39792
loss: 1.0243016481399536,grad_norm: 0.9999991644303894, iteration: 39793
loss: 0.9942325353622437,grad_norm: 0.999999039666346, iteration: 39794
loss: 1.0203059911727905,grad_norm: 0.9999992591448462, iteration: 39795
loss: 1.0558314323425293,grad_norm: 0.9479132949951601, iteration: 39796
loss: 1.0035878419876099,grad_norm: 0.9999991021540613, iteration: 39797
loss: 0.9772160053253174,grad_norm: 0.8081817890498385, iteration: 39798
loss: 1.054370403289795,grad_norm: 0.9999992696631448, iteration: 39799
loss: 1.133899211883545,grad_norm: 0.9999995152764523, iteration: 39800
loss: 1.0088013410568237,grad_norm: 0.9999991672208882, iteration: 39801
loss: 0.9367910027503967,grad_norm: 0.9870165657811192, iteration: 39802
loss: 1.0163227319717407,grad_norm: 0.9999991737481283, iteration: 39803
loss: 0.9872129559516907,grad_norm: 0.8662032655950556, iteration: 39804
loss: 1.0111818313598633,grad_norm: 0.9999990480050481, iteration: 39805
loss: 1.0404659509658813,grad_norm: 0.8914020663907996, iteration: 39806
loss: 1.0180689096450806,grad_norm: 0.8255363776255146, iteration: 39807
loss: 0.9960547089576721,grad_norm: 0.9999990640276736, iteration: 39808
loss: 1.0042428970336914,grad_norm: 0.9158505071829848, iteration: 39809
loss: 1.0309501886367798,grad_norm: 0.9401011278175282, iteration: 39810
loss: 1.0113409757614136,grad_norm: 0.999999537116163, iteration: 39811
loss: 0.9900844097137451,grad_norm: 0.9316820559944055, iteration: 39812
loss: 1.014168620109558,grad_norm: 0.9999993750130239, iteration: 39813
loss: 0.9702344536781311,grad_norm: 0.9999989511754176, iteration: 39814
loss: 0.9840905666351318,grad_norm: 0.9196672702245651, iteration: 39815
loss: 1.014397144317627,grad_norm: 0.8221800758528581, iteration: 39816
loss: 1.0334097146987915,grad_norm: 0.9999995751068811, iteration: 39817
loss: 0.9885885715484619,grad_norm: 0.9109392834749936, iteration: 39818
loss: 1.0005873441696167,grad_norm: 0.999999238602767, iteration: 39819
loss: 0.9812220931053162,grad_norm: 0.9276071954031412, iteration: 39820
loss: 1.0305577516555786,grad_norm: 0.9369737024053314, iteration: 39821
loss: 0.9676602482795715,grad_norm: 0.9999990489801124, iteration: 39822
loss: 1.0282450914382935,grad_norm: 0.9999991164268183, iteration: 39823
loss: 1.0020054578781128,grad_norm: 0.8834583976439216, iteration: 39824
loss: 1.0015642642974854,grad_norm: 0.9107431673855185, iteration: 39825
loss: 1.0134469270706177,grad_norm: 0.9999992631519323, iteration: 39826
loss: 1.0123404264450073,grad_norm: 0.9999990139208847, iteration: 39827
loss: 1.0305427312850952,grad_norm: 0.9481097988541443, iteration: 39828
loss: 0.9977309107780457,grad_norm: 0.8761185870443197, iteration: 39829
loss: 0.9478027820587158,grad_norm: 0.9740222160324541, iteration: 39830
loss: 1.0378941297531128,grad_norm: 0.9999994147501347, iteration: 39831
loss: 1.0437767505645752,grad_norm: 0.9999993324897282, iteration: 39832
loss: 0.9669986367225647,grad_norm: 0.7613546578769377, iteration: 39833
loss: 0.9979009032249451,grad_norm: 0.9999991821987315, iteration: 39834
loss: 0.9819328784942627,grad_norm: 0.9075273802745014, iteration: 39835
loss: 0.9500444531440735,grad_norm: 0.7375465420675748, iteration: 39836
loss: 0.9977782964706421,grad_norm: 0.7946629756305293, iteration: 39837
loss: 1.0670361518859863,grad_norm: 0.9999993509830025, iteration: 39838
loss: 1.025538682937622,grad_norm: 0.9043428732870511, iteration: 39839
loss: 0.9877086281776428,grad_norm: 0.8029885286834868, iteration: 39840
loss: 1.0002250671386719,grad_norm: 0.9229436330313322, iteration: 39841
loss: 1.0144637823104858,grad_norm: 0.8965809599729265, iteration: 39842
loss: 0.974025309085846,grad_norm: 0.8610269112998608, iteration: 39843
loss: 1.0037909746170044,grad_norm: 0.9980065123624848, iteration: 39844
loss: 1.0022245645523071,grad_norm: 0.9999990294971263, iteration: 39845
loss: 0.9786217212677002,grad_norm: 0.9531175538662605, iteration: 39846
loss: 1.0318130254745483,grad_norm: 0.9409406302903112, iteration: 39847
loss: 1.031460165977478,grad_norm: 0.9999991638325972, iteration: 39848
loss: 0.9930136799812317,grad_norm: 0.95046567552656, iteration: 39849
loss: 0.9891284704208374,grad_norm: 0.8601736736881693, iteration: 39850
loss: 1.010359287261963,grad_norm: 0.8461693930242363, iteration: 39851
loss: 1.0246117115020752,grad_norm: 0.9999991987062724, iteration: 39852
loss: 1.0694791078567505,grad_norm: 0.9999991617310148, iteration: 39853
loss: 1.0403271913528442,grad_norm: 0.8381627668961017, iteration: 39854
loss: 0.9683045744895935,grad_norm: 0.9113134824358071, iteration: 39855
loss: 1.0062357187271118,grad_norm: 0.8005905862403123, iteration: 39856
loss: 1.0282411575317383,grad_norm: 0.8338380701336012, iteration: 39857
loss: 0.9953680634498596,grad_norm: 0.999999107002576, iteration: 39858
loss: 0.9741238355636597,grad_norm: 0.822503971256375, iteration: 39859
loss: 0.9992780089378357,grad_norm: 0.963150634894674, iteration: 39860
loss: 1.0459500551223755,grad_norm: 0.9999990460429913, iteration: 39861
loss: 1.0157527923583984,grad_norm: 0.999999154163307, iteration: 39862
loss: 1.0208579301834106,grad_norm: 0.9657667776366348, iteration: 39863
loss: 0.9965530633926392,grad_norm: 0.9999992795016647, iteration: 39864
loss: 0.985995352268219,grad_norm: 0.9877781939493425, iteration: 39865
loss: 0.9809331893920898,grad_norm: 0.8596440657159236, iteration: 39866
loss: 1.0140705108642578,grad_norm: 0.8791868995286364, iteration: 39867
loss: 1.07965886592865,grad_norm: 0.9999991760613727, iteration: 39868
loss: 1.0008726119995117,grad_norm: 0.8036376749720012, iteration: 39869
loss: 0.9964367151260376,grad_norm: 0.9999989959046734, iteration: 39870
loss: 0.9977184534072876,grad_norm: 0.9264318011297709, iteration: 39871
loss: 0.9610018730163574,grad_norm: 0.9999992379548776, iteration: 39872
loss: 0.9948164224624634,grad_norm: 0.999998894928002, iteration: 39873
loss: 0.9947590827941895,grad_norm: 0.9351380939529985, iteration: 39874
loss: 1.0309964418411255,grad_norm: 0.7867997692068143, iteration: 39875
loss: 0.9795764088630676,grad_norm: 0.9999991000460198, iteration: 39876
loss: 0.954858124256134,grad_norm: 0.9773076560326516, iteration: 39877
loss: 0.9563384056091309,grad_norm: 0.9999991672491914, iteration: 39878
loss: 1.002867579460144,grad_norm: 0.9999991462690644, iteration: 39879
loss: 1.019180178642273,grad_norm: 0.8625615688954476, iteration: 39880
loss: 0.9711556434631348,grad_norm: 0.9361889281957061, iteration: 39881
loss: 1.0075446367263794,grad_norm: 0.9353577679953405, iteration: 39882
loss: 0.992161750793457,grad_norm: 0.8247452803721961, iteration: 39883
loss: 0.9945672154426575,grad_norm: 0.9576275204179052, iteration: 39884
loss: 1.0292993783950806,grad_norm: 0.9896669260784067, iteration: 39885
loss: 0.9920818209648132,grad_norm: 0.8694642607351547, iteration: 39886
loss: 0.9513686895370483,grad_norm: 0.9464906126436392, iteration: 39887
loss: 1.001432180404663,grad_norm: 0.9165111090995597, iteration: 39888
loss: 1.0259979963302612,grad_norm: 0.9999998832108687, iteration: 39889
loss: 1.0238538980484009,grad_norm: 0.9167650133824016, iteration: 39890
loss: 1.0105969905853271,grad_norm: 0.765472834222052, iteration: 39891
loss: 1.0773502588272095,grad_norm: 0.9999997768622274, iteration: 39892
loss: 1.0004167556762695,grad_norm: 0.9479296498709584, iteration: 39893
loss: 0.9679587483406067,grad_norm: 0.8537214066974872, iteration: 39894
loss: 0.9898236989974976,grad_norm: 0.9999991454228815, iteration: 39895
loss: 1.005987286567688,grad_norm: 0.9999991977800499, iteration: 39896
loss: 1.007642388343811,grad_norm: 0.9999997033234068, iteration: 39897
loss: 1.0182565450668335,grad_norm: 0.8869521005144647, iteration: 39898
loss: 1.0416419506072998,grad_norm: 0.9999998216425702, iteration: 39899
loss: 0.9380558133125305,grad_norm: 0.9999991137464785, iteration: 39900
loss: 1.0508683919906616,grad_norm: 0.9321740979960055, iteration: 39901
loss: 0.9761861562728882,grad_norm: 0.8716397232546095, iteration: 39902
loss: 1.0104117393493652,grad_norm: 0.9210384489345771, iteration: 39903
loss: 1.0373313426971436,grad_norm: 0.9999991631677815, iteration: 39904
loss: 0.9964251518249512,grad_norm: 0.9999994920015907, iteration: 39905
loss: 1.003536581993103,grad_norm: 0.9999991421953079, iteration: 39906
loss: 1.0383398532867432,grad_norm: 0.9999996428067311, iteration: 39907
loss: 1.0067425966262817,grad_norm: 0.9073735007970879, iteration: 39908
loss: 0.9718292355537415,grad_norm: 0.9999997778704052, iteration: 39909
loss: 0.98480224609375,grad_norm: 0.9997112695332738, iteration: 39910
loss: 0.995030403137207,grad_norm: 0.9999992045884252, iteration: 39911
loss: 0.9829687476158142,grad_norm: 0.9142753301148905, iteration: 39912
loss: 0.9818491339683533,grad_norm: 0.9999990293068768, iteration: 39913
loss: 0.9910812973976135,grad_norm: 0.9999994811635183, iteration: 39914
loss: 1.131279706954956,grad_norm: 0.9999991339744381, iteration: 39915
loss: 0.9950723648071289,grad_norm: 0.9976024232458152, iteration: 39916
loss: 1.0412946939468384,grad_norm: 0.9999990442685386, iteration: 39917
loss: 0.9956056475639343,grad_norm: 0.8686139428869185, iteration: 39918
loss: 1.012178659439087,grad_norm: 0.9999991176344855, iteration: 39919
loss: 1.019237756729126,grad_norm: 0.8782227018860308, iteration: 39920
loss: 1.0027981996536255,grad_norm: 0.7631327405095317, iteration: 39921
loss: 0.965522289276123,grad_norm: 0.9226576914490442, iteration: 39922
loss: 1.0051475763320923,grad_norm: 0.9999990903991539, iteration: 39923
loss: 0.9870356321334839,grad_norm: 0.9999990949617074, iteration: 39924
loss: 1.035030722618103,grad_norm: 0.9999994036515832, iteration: 39925
loss: 1.1054558753967285,grad_norm: 0.9999996223542077, iteration: 39926
loss: 0.9801338911056519,grad_norm: 0.9036956948529141, iteration: 39927
loss: 1.0246038436889648,grad_norm: 0.8543440232344989, iteration: 39928
loss: 1.0149931907653809,grad_norm: 0.9999990908928347, iteration: 39929
loss: 1.0337340831756592,grad_norm: 0.9999993813708566, iteration: 39930
loss: 0.9798620939254761,grad_norm: 0.9999990835706073, iteration: 39931
loss: 1.0057326555252075,grad_norm: 0.8559817123994674, iteration: 39932
loss: 1.0124377012252808,grad_norm: 0.9999993014233937, iteration: 39933
loss: 0.9677823185920715,grad_norm: 0.858540961175294, iteration: 39934
loss: 1.0036425590515137,grad_norm: 0.8792417331246538, iteration: 39935
loss: 0.9992426037788391,grad_norm: 0.9999993042519802, iteration: 39936
loss: 1.0305516719818115,grad_norm: 0.9874114489346091, iteration: 39937
loss: 1.0045937299728394,grad_norm: 0.7785141018463128, iteration: 39938
loss: 1.027361512184143,grad_norm: 0.999999106666438, iteration: 39939
loss: 1.1140598058700562,grad_norm: 0.9999996326390467, iteration: 39940
loss: 1.0156971216201782,grad_norm: 0.9442216136493052, iteration: 39941
loss: 1.0303924083709717,grad_norm: 0.9901661494866787, iteration: 39942
loss: 1.0098904371261597,grad_norm: 0.9999990680495872, iteration: 39943
loss: 0.9892670512199402,grad_norm: 0.9999995198075284, iteration: 39944
loss: 1.092103362083435,grad_norm: 0.9999996663378012, iteration: 39945
loss: 1.0000461339950562,grad_norm: 0.8950709728879764, iteration: 39946
loss: 1.000878095626831,grad_norm: 0.8355543390019766, iteration: 39947
loss: 0.9798113107681274,grad_norm: 0.9507984575618367, iteration: 39948
loss: 0.9889325499534607,grad_norm: 0.9999992658450318, iteration: 39949
loss: 0.9804844260215759,grad_norm: 0.999999088112284, iteration: 39950
loss: 1.0328271389007568,grad_norm: 0.999999435695068, iteration: 39951
loss: 1.0418528318405151,grad_norm: 0.9999994200599688, iteration: 39952
loss: 0.9953141212463379,grad_norm: 0.9999993356845007, iteration: 39953
loss: 1.0111827850341797,grad_norm: 0.9999995551055598, iteration: 39954
loss: 0.9686484336853027,grad_norm: 0.8628909817795861, iteration: 39955
loss: 1.0441008806228638,grad_norm: 0.9999991063234264, iteration: 39956
loss: 1.0506839752197266,grad_norm: 0.9999998857558341, iteration: 39957
loss: 0.9933568835258484,grad_norm: 0.9999989993060072, iteration: 39958
loss: 1.001822590827942,grad_norm: 0.9588949936450137, iteration: 39959
loss: 1.0318398475646973,grad_norm: 0.9999994039437793, iteration: 39960
loss: 0.9984033703804016,grad_norm: 0.9974427045332813, iteration: 39961
loss: 1.0113871097564697,grad_norm: 0.9999994871247692, iteration: 39962
loss: 1.029235601425171,grad_norm: 0.9999991957791904, iteration: 39963
loss: 1.021382212638855,grad_norm: 0.9999990652054742, iteration: 39964
loss: 1.012821912765503,grad_norm: 0.9999995809224975, iteration: 39965
loss: 1.025008201599121,grad_norm: 0.9999994797841312, iteration: 39966
loss: 1.001496434211731,grad_norm: 0.9405481308744302, iteration: 39967
loss: 1.0036286115646362,grad_norm: 0.9317712192184652, iteration: 39968
loss: 1.0489298105239868,grad_norm: 0.9999995461916535, iteration: 39969
loss: 0.9745232462882996,grad_norm: 0.9999994881834484, iteration: 39970
loss: 1.0010591745376587,grad_norm: 0.9999991408742102, iteration: 39971
loss: 1.000449776649475,grad_norm: 0.9999993866324111, iteration: 39972
loss: 1.0327889919281006,grad_norm: 0.9999989594823459, iteration: 39973
loss: 0.9536609649658203,grad_norm: 0.9787097626486327, iteration: 39974
loss: 0.9853608012199402,grad_norm: 0.9307982264783079, iteration: 39975
loss: 1.0485711097717285,grad_norm: 0.9999992958774439, iteration: 39976
loss: 0.9704971313476562,grad_norm: 0.877575114425249, iteration: 39977
loss: 1.0649213790893555,grad_norm: 0.9999994997583208, iteration: 39978
loss: 0.9476394057273865,grad_norm: 0.9999992218818374, iteration: 39979
loss: 0.9558312296867371,grad_norm: 0.9458964082826001, iteration: 39980
loss: 1.0016579627990723,grad_norm: 0.9303812867495515, iteration: 39981
loss: 0.9893025755882263,grad_norm: 0.9478179045117471, iteration: 39982
loss: 1.0031332969665527,grad_norm: 0.9999992154075013, iteration: 39983
loss: 0.9954372048377991,grad_norm: 0.9999990911771723, iteration: 39984
loss: 0.9870588183403015,grad_norm: 0.9690109794557089, iteration: 39985
loss: 0.9789022207260132,grad_norm: 0.9999995850205312, iteration: 39986
loss: 1.0222280025482178,grad_norm: 0.9999992441613054, iteration: 39987
loss: 1.1146800518035889,grad_norm: 0.9999996124078447, iteration: 39988
loss: 1.0394281148910522,grad_norm: 0.8875373250529635, iteration: 39989
loss: 0.9968327879905701,grad_norm: 0.908496724648978, iteration: 39990
loss: 1.0015567541122437,grad_norm: 0.9933850085545073, iteration: 39991
loss: 1.0286275148391724,grad_norm: 0.9999993968931861, iteration: 39992
loss: 1.027321696281433,grad_norm: 0.8485595199662992, iteration: 39993
loss: 1.0396896600723267,grad_norm: 0.9999993559709012, iteration: 39994
loss: 0.9871824383735657,grad_norm: 0.999999118266868, iteration: 39995
loss: 0.9731621146202087,grad_norm: 0.9999991434945263, iteration: 39996
loss: 1.0343842506408691,grad_norm: 0.8460457940437164, iteration: 39997
loss: 1.0326826572418213,grad_norm: 0.9999993448292172, iteration: 39998
loss: 0.9992609620094299,grad_norm: 0.8009282017037536, iteration: 39999
loss: 1.1878316402435303,grad_norm: 0.9186475315939362, iteration: 40000
Evaluating at step 40000
{'val': 0.9995872136205435, 'test': 2.729022965263635}
loss: 1.0667372941970825,grad_norm: 0.999999302857306, iteration: 40001
loss: 1.049274206161499,grad_norm: 0.9999992450910022, iteration: 40002
loss: 1.0273857116699219,grad_norm: 0.9999992992575447, iteration: 40003
loss: 1.025245189666748,grad_norm: 0.99999913790908, iteration: 40004
loss: 1.0626723766326904,grad_norm: 0.9831640643923184, iteration: 40005
loss: 1.0191543102264404,grad_norm: 0.9999992441888981, iteration: 40006
loss: 1.000468134880066,grad_norm: 0.9178218456450689, iteration: 40007
loss: 1.016094446182251,grad_norm: 0.999999103586124, iteration: 40008
loss: 1.0650633573532104,grad_norm: 0.9928921716807185, iteration: 40009
loss: 1.0489367246627808,grad_norm: 0.9999996210675896, iteration: 40010
loss: 1.0074708461761475,grad_norm: 0.8378123625000199, iteration: 40011
loss: 1.0059863328933716,grad_norm: 0.9999991952195394, iteration: 40012
loss: 1.0202785730361938,grad_norm: 0.9010492562380639, iteration: 40013
loss: 0.9946876764297485,grad_norm: 0.8282954424951036, iteration: 40014
loss: 1.0611425638198853,grad_norm: 0.9999998132088265, iteration: 40015
loss: 1.1452875137329102,grad_norm: 0.9999996690955045, iteration: 40016
loss: 0.9888858795166016,grad_norm: 0.8888548701343887, iteration: 40017
loss: 0.9933050274848938,grad_norm: 0.999999403401355, iteration: 40018
loss: 1.0202934741973877,grad_norm: 0.9406924393418996, iteration: 40019
loss: 1.0040603876113892,grad_norm: 0.9999992081583305, iteration: 40020
loss: 0.9815940260887146,grad_norm: 0.9122088382470963, iteration: 40021
loss: 1.038339376449585,grad_norm: 0.9760081500180409, iteration: 40022
loss: 1.0149303674697876,grad_norm: 0.9481158321756177, iteration: 40023
loss: 1.0125452280044556,grad_norm: 0.9999992129151153, iteration: 40024
loss: 1.008285641670227,grad_norm: 0.9557921365335503, iteration: 40025
loss: 1.069405198097229,grad_norm: 0.8692162820833699, iteration: 40026
loss: 0.9970511198043823,grad_norm: 0.9647941375461143, iteration: 40027
loss: 1.0127047300338745,grad_norm: 0.999999162802491, iteration: 40028
loss: 1.001543402671814,grad_norm: 0.9647581560674463, iteration: 40029
loss: 1.0022937059402466,grad_norm: 0.9999991631434656, iteration: 40030
loss: 1.0139607191085815,grad_norm: 0.9999990176708098, iteration: 40031
loss: 1.0450056791305542,grad_norm: 0.9999991847836163, iteration: 40032
loss: 1.0233913660049438,grad_norm: 0.9999990911823685, iteration: 40033
loss: 1.0452359914779663,grad_norm: 0.9999994229907178, iteration: 40034
loss: 1.0416181087493896,grad_norm: 0.9877332644712817, iteration: 40035
loss: 1.0175029039382935,grad_norm: 0.9999991894053882, iteration: 40036
loss: 1.016434669494629,grad_norm: 0.9999991707216225, iteration: 40037
loss: 1.0213518142700195,grad_norm: 0.9999993491321908, iteration: 40038
loss: 1.0024912357330322,grad_norm: 0.9999991655577517, iteration: 40039
loss: 1.037455677986145,grad_norm: 0.9999991438843118, iteration: 40040
loss: 1.004900336265564,grad_norm: 0.9999992028613393, iteration: 40041
loss: 1.0274970531463623,grad_norm: 0.9625782184159057, iteration: 40042
loss: 1.022035837173462,grad_norm: 0.9999992977725531, iteration: 40043
loss: 1.031456470489502,grad_norm: 0.9999990095244861, iteration: 40044
loss: 0.9926884174346924,grad_norm: 0.9780163049814005, iteration: 40045
loss: 0.9834175109863281,grad_norm: 0.9999992285792414, iteration: 40046
loss: 1.009863257408142,grad_norm: 0.9999992474285887, iteration: 40047
loss: 0.9830018281936646,grad_norm: 0.8768338479503971, iteration: 40048
loss: 1.0084152221679688,grad_norm: 0.8130047176634794, iteration: 40049
loss: 1.0140568017959595,grad_norm: 0.9999995023358205, iteration: 40050
loss: 0.9744443893432617,grad_norm: 0.8616313042125382, iteration: 40051
loss: 1.0243967771530151,grad_norm: 0.9564531090197743, iteration: 40052
loss: 1.0246095657348633,grad_norm: 1.0000000252764916, iteration: 40053
loss: 0.9814879894256592,grad_norm: 0.9986536081239605, iteration: 40054
loss: 1.0053682327270508,grad_norm: 0.9159514126811215, iteration: 40055
loss: 1.0292938947677612,grad_norm: 0.999999038310454, iteration: 40056
loss: 1.0005922317504883,grad_norm: 0.9999992191398164, iteration: 40057
loss: 0.9981228709220886,grad_norm: 0.999999102426462, iteration: 40058
loss: 1.0140442848205566,grad_norm: 0.9445681286023601, iteration: 40059
loss: 1.0052094459533691,grad_norm: 0.9999994107114982, iteration: 40060
loss: 1.0288071632385254,grad_norm: 0.9909631791940506, iteration: 40061
loss: 0.9946913123130798,grad_norm: 0.9878696301440707, iteration: 40062
loss: 1.0387400388717651,grad_norm: 0.9999992871577852, iteration: 40063
loss: 0.987753689289093,grad_norm: 0.9999994316965339, iteration: 40064
loss: 0.9935208559036255,grad_norm: 0.8412296045966305, iteration: 40065
loss: 0.9804012179374695,grad_norm: 0.9999991408904103, iteration: 40066
loss: 1.046281337738037,grad_norm: 0.9999994748241399, iteration: 40067
loss: 1.0323269367218018,grad_norm: 0.9999998841629515, iteration: 40068
loss: 1.001517415046692,grad_norm: 0.999999111063519, iteration: 40069
loss: 1.0486191511154175,grad_norm: 0.9999992337663461, iteration: 40070
loss: 1.057080864906311,grad_norm: 0.9999995071364401, iteration: 40071
loss: 1.069602131843567,grad_norm: 0.9999995377797103, iteration: 40072
loss: 1.0785399675369263,grad_norm: 0.9492940998938988, iteration: 40073
loss: 1.032098412513733,grad_norm: 0.9999994551014751, iteration: 40074
loss: 1.0262887477874756,grad_norm: 0.9999991610224359, iteration: 40075
loss: 0.990779459476471,grad_norm: 0.9952635827460392, iteration: 40076
loss: 1.0624902248382568,grad_norm: 0.9999996734170671, iteration: 40077
loss: 1.0099505186080933,grad_norm: 0.9800050833364371, iteration: 40078
loss: 0.9979103207588196,grad_norm: 0.999999049675845, iteration: 40079
loss: 1.0604372024536133,grad_norm: 0.9999997850440311, iteration: 40080
loss: 1.000137448310852,grad_norm: 0.9063352576831478, iteration: 40081
loss: 0.9823579788208008,grad_norm: 0.999999851242822, iteration: 40082
loss: 1.0317963361740112,grad_norm: 0.9999994026491849, iteration: 40083
loss: 1.1174311637878418,grad_norm: 0.9999995450630375, iteration: 40084
loss: 1.003572940826416,grad_norm: 0.9999992851941615, iteration: 40085
loss: 1.1422632932662964,grad_norm: 0.9999992529281718, iteration: 40086
loss: 1.2384144067764282,grad_norm: 0.9999999362514079, iteration: 40087
loss: 0.9883589148521423,grad_norm: 0.9999998254406676, iteration: 40088
loss: 1.2044521570205688,grad_norm: 0.9999998508471114, iteration: 40089
loss: 1.0942109823226929,grad_norm: 0.9999999064936824, iteration: 40090
loss: 0.9937359690666199,grad_norm: 0.8056972145762236, iteration: 40091
loss: 1.015604019165039,grad_norm: 0.9440697456375556, iteration: 40092
loss: 0.9995443224906921,grad_norm: 0.9999995561180473, iteration: 40093
loss: 1.024092197418213,grad_norm: 0.9999990545003484, iteration: 40094
loss: 1.0143781900405884,grad_norm: 0.8588741367685828, iteration: 40095
loss: 1.0340291261672974,grad_norm: 0.9955436556328187, iteration: 40096
loss: 1.0064365863800049,grad_norm: 0.9999991166205816, iteration: 40097
loss: 1.0171988010406494,grad_norm: 0.9452517554401464, iteration: 40098
loss: 1.0415573120117188,grad_norm: 0.9999996095767534, iteration: 40099
loss: 1.0111292600631714,grad_norm: 0.905802992066031, iteration: 40100
loss: 1.0324711799621582,grad_norm: 0.8080936952625073, iteration: 40101
loss: 0.9861167669296265,grad_norm: 0.9999992662670368, iteration: 40102
loss: 1.065751552581787,grad_norm: 0.9870002362474213, iteration: 40103
loss: 1.1415492296218872,grad_norm: 0.9999999761647832, iteration: 40104
loss: 1.005489468574524,grad_norm: 0.9999990660725134, iteration: 40105
loss: 1.1482627391815186,grad_norm: 0.9999997045528427, iteration: 40106
loss: 1.2263555526733398,grad_norm: 0.9999994928472277, iteration: 40107
loss: 1.07343590259552,grad_norm: 0.999999080675192, iteration: 40108
loss: 1.191699743270874,grad_norm: 0.9999999189048335, iteration: 40109
loss: 1.1736193895339966,grad_norm: 0.9999998479337334, iteration: 40110
loss: 0.9946075081825256,grad_norm: 0.9999990883490134, iteration: 40111
loss: 1.1047238111495972,grad_norm: 0.9999993556156754, iteration: 40112
loss: 1.043128490447998,grad_norm: 0.9999998119959979, iteration: 40113
loss: 1.1541683673858643,grad_norm: 0.9999993899007078, iteration: 40114
loss: 1.2874568700790405,grad_norm: 1.0000000042579011, iteration: 40115
loss: 1.052315592765808,grad_norm: 0.999999769578274, iteration: 40116
loss: 1.036697506904602,grad_norm: 0.9999991199906455, iteration: 40117
loss: 0.9862919449806213,grad_norm: 0.9709094294712902, iteration: 40118
loss: 1.0391908884048462,grad_norm: 0.9999991754940447, iteration: 40119
loss: 1.0866785049438477,grad_norm: 0.9999994127122883, iteration: 40120
loss: 1.0233556032180786,grad_norm: 0.9999995080653729, iteration: 40121
loss: 1.0619927644729614,grad_norm: 0.9793492352288689, iteration: 40122
loss: 1.0222960710525513,grad_norm: 0.8886180928086244, iteration: 40123
loss: 1.1641594171524048,grad_norm: 0.9999997104844631, iteration: 40124
loss: 1.0641297101974487,grad_norm: 0.9999998655340109, iteration: 40125
loss: 1.0144752264022827,grad_norm: 0.9789846598603388, iteration: 40126
loss: 1.00303316116333,grad_norm: 0.9999991862997242, iteration: 40127
loss: 0.9801263809204102,grad_norm: 0.9729240096690762, iteration: 40128
loss: 1.0982874631881714,grad_norm: 0.9999992248289071, iteration: 40129
loss: 0.9862748980522156,grad_norm: 0.9999991242172036, iteration: 40130
loss: 1.1390982866287231,grad_norm: 0.999999868395894, iteration: 40131
loss: 1.2560584545135498,grad_norm: 0.999999593958813, iteration: 40132
loss: 1.0469419956207275,grad_norm: 0.9999993254504074, iteration: 40133
loss: 1.023874282836914,grad_norm: 0.9999998816066927, iteration: 40134
loss: 1.030256748199463,grad_norm: 0.9999999085129176, iteration: 40135
loss: 1.089924931526184,grad_norm: 0.9999991546080081, iteration: 40136
loss: 1.1054797172546387,grad_norm: 0.9999997497907108, iteration: 40137
loss: 1.1237653493881226,grad_norm: 0.9999995357472208, iteration: 40138
loss: 1.083656907081604,grad_norm: 0.9999994133907008, iteration: 40139
loss: 1.1394078731536865,grad_norm: 0.9999998403374893, iteration: 40140
loss: 0.9925479292869568,grad_norm: 0.8823677498291453, iteration: 40141
loss: 1.090990424156189,grad_norm: 0.9999997289271713, iteration: 40142
loss: 1.1999133825302124,grad_norm: 0.9999999333886945, iteration: 40143
loss: 1.085608959197998,grad_norm: 0.9999995666175617, iteration: 40144
loss: 1.0380802154541016,grad_norm: 0.9999994249472592, iteration: 40145
loss: 1.3391153812408447,grad_norm: 0.9999996781363439, iteration: 40146
loss: 1.0576738119125366,grad_norm: 0.9999992546990942, iteration: 40147
loss: 1.12311589717865,grad_norm: 0.9999996640903771, iteration: 40148
loss: 1.1728720664978027,grad_norm: 0.9999994983839582, iteration: 40149
loss: 1.083052635192871,grad_norm: 0.9999999442168322, iteration: 40150
loss: 1.030266523361206,grad_norm: 0.9999992470973752, iteration: 40151
loss: 1.0850982666015625,grad_norm: 0.9999998021801726, iteration: 40152
loss: 0.9816986322402954,grad_norm: 0.9999992941947646, iteration: 40153
loss: 1.0579118728637695,grad_norm: 0.9999993271501196, iteration: 40154
loss: 1.0997340679168701,grad_norm: 0.9999992794245679, iteration: 40155
loss: 1.0447192192077637,grad_norm: 0.9999994146155609, iteration: 40156
loss: 1.163674235343933,grad_norm: 0.9999991729932803, iteration: 40157
loss: 1.1134586334228516,grad_norm: 0.9999995041749189, iteration: 40158
loss: 1.0643709897994995,grad_norm: 0.9999997615696254, iteration: 40159
loss: 1.0371832847595215,grad_norm: 0.9999992476020382, iteration: 40160
loss: 1.0290629863739014,grad_norm: 0.9999989778389634, iteration: 40161
loss: 1.145606279373169,grad_norm: 0.9999991183054743, iteration: 40162
loss: 1.2345305681228638,grad_norm: 0.9999998518872021, iteration: 40163
loss: 1.1394271850585938,grad_norm: 0.9999994315311633, iteration: 40164
loss: 1.0791267156600952,grad_norm: 0.999999435157854, iteration: 40165
loss: 1.1412075757980347,grad_norm: 0.9999998624682539, iteration: 40166
loss: 1.1636959314346313,grad_norm: 0.9999996271587638, iteration: 40167
loss: 1.1063761711120605,grad_norm: 0.9999994930010692, iteration: 40168
loss: 1.1126031875610352,grad_norm: 0.9999996330570383, iteration: 40169
loss: 1.0127930641174316,grad_norm: 0.9999997146184766, iteration: 40170
loss: 1.166107177734375,grad_norm: 0.9999993995844213, iteration: 40171
loss: 1.0469083786010742,grad_norm: 0.999999080744055, iteration: 40172
loss: 0.9981791377067566,grad_norm: 0.8973126955326256, iteration: 40173
loss: 1.2435545921325684,grad_norm: 0.999999894290044, iteration: 40174
loss: 1.116591453552246,grad_norm: 0.9999993223972065, iteration: 40175
loss: 1.0044975280761719,grad_norm: 0.9999995766477515, iteration: 40176
loss: 1.128057837486267,grad_norm: 0.9999993846643184, iteration: 40177
loss: 1.1194456815719604,grad_norm: 1.0000000627606511, iteration: 40178
loss: 1.273558497428894,grad_norm: 0.9999992700807449, iteration: 40179
loss: 1.033279538154602,grad_norm: 0.9999993270565511, iteration: 40180
loss: 1.0129891633987427,grad_norm: 0.9999992423007508, iteration: 40181
loss: 1.022550106048584,grad_norm: 0.9999992983385351, iteration: 40182
loss: 1.1465566158294678,grad_norm: 0.9999996640280823, iteration: 40183
loss: 1.0244927406311035,grad_norm: 0.9999998748294819, iteration: 40184
loss: 1.1011013984680176,grad_norm: 0.9999996047504286, iteration: 40185
loss: 1.209972858428955,grad_norm: 0.9999993179880899, iteration: 40186
loss: 1.0552552938461304,grad_norm: 0.9999993385962795, iteration: 40187
loss: 1.0270239114761353,grad_norm: 0.8283988061241532, iteration: 40188
loss: 1.0047481060028076,grad_norm: 0.9239675067245015, iteration: 40189
loss: 0.9878851771354675,grad_norm: 0.9593841248676136, iteration: 40190
loss: 1.0620754957199097,grad_norm: 0.9999996694102837, iteration: 40191
loss: 1.0148004293441772,grad_norm: 0.9999995857005956, iteration: 40192
loss: 1.0020246505737305,grad_norm: 0.9999989939301782, iteration: 40193
loss: 1.0114187002182007,grad_norm: 0.9999990146098798, iteration: 40194
loss: 1.0718241930007935,grad_norm: 0.999999291948771, iteration: 40195
loss: 1.0376152992248535,grad_norm: 0.9999991245287999, iteration: 40196
loss: 1.2018778324127197,grad_norm: 0.9999993530134318, iteration: 40197
loss: 0.9979476928710938,grad_norm: 0.9726282624573609, iteration: 40198
loss: 1.1210975646972656,grad_norm: 0.9999991854453479, iteration: 40199
loss: 1.085580587387085,grad_norm: 0.9999998110716338, iteration: 40200
loss: 1.1013820171356201,grad_norm: 0.9999994324543884, iteration: 40201
loss: 1.0812417268753052,grad_norm: 0.9999994264625988, iteration: 40202
loss: 1.0077182054519653,grad_norm: 0.9999994136312008, iteration: 40203
loss: 1.0233913660049438,grad_norm: 0.9867273670254048, iteration: 40204
loss: 0.9790616035461426,grad_norm: 0.9379088963298782, iteration: 40205
loss: 1.0064722299575806,grad_norm: 0.9999991376768659, iteration: 40206
loss: 1.0902410745620728,grad_norm: 0.9999996287145445, iteration: 40207
loss: 0.9931933879852295,grad_norm: 0.9999991312958395, iteration: 40208
loss: 0.9824878573417664,grad_norm: 0.9638710353444996, iteration: 40209
loss: 1.0981594324111938,grad_norm: 0.9999993393464296, iteration: 40210
loss: 1.1822584867477417,grad_norm: 0.9999995326853413, iteration: 40211
loss: 1.0491466522216797,grad_norm: 0.9999992943560888, iteration: 40212
loss: 1.201472520828247,grad_norm: 0.9999995355554738, iteration: 40213
loss: 0.9903814196586609,grad_norm: 0.9999992148152488, iteration: 40214
loss: 0.9984702467918396,grad_norm: 0.9999998720987884, iteration: 40215
loss: 1.0226093530654907,grad_norm: 0.8239027909838409, iteration: 40216
loss: 1.0437628030776978,grad_norm: 0.9999994310295274, iteration: 40217
loss: 0.984533429145813,grad_norm: 0.9852070682423087, iteration: 40218
loss: 1.0007970333099365,grad_norm: 0.9999991341865008, iteration: 40219
loss: 1.0416457653045654,grad_norm: 0.9786714967989079, iteration: 40220
loss: 1.1705659627914429,grad_norm: 0.9999994280036973, iteration: 40221
loss: 1.000756025314331,grad_norm: 0.9999990850411935, iteration: 40222
loss: 0.9711374640464783,grad_norm: 0.9999994302837621, iteration: 40223
loss: 1.0433977842330933,grad_norm: 0.9999992348425777, iteration: 40224
loss: 1.0988311767578125,grad_norm: 0.9999993666637749, iteration: 40225
loss: 1.0006431341171265,grad_norm: 0.9063782387771697, iteration: 40226
loss: 1.0693901777267456,grad_norm: 0.9999991655952624, iteration: 40227
loss: 1.1283222436904907,grad_norm: 0.9999991349745625, iteration: 40228
loss: 1.2013291120529175,grad_norm: 0.9999997643402334, iteration: 40229
loss: 1.1531046628952026,grad_norm: 0.9999992426458865, iteration: 40230
loss: 1.0809316635131836,grad_norm: 0.9999995169480785, iteration: 40231
loss: 1.0136324167251587,grad_norm: 0.9999997486281803, iteration: 40232
loss: 1.0151242017745972,grad_norm: 0.9999990658663974, iteration: 40233
loss: 1.108910322189331,grad_norm: 0.9999996374800546, iteration: 40234
loss: 1.0108195543289185,grad_norm: 0.9999990206886985, iteration: 40235
loss: 1.064227819442749,grad_norm: 0.9999997186205931, iteration: 40236
loss: 1.0265483856201172,grad_norm: 0.9999994645236285, iteration: 40237
loss: 1.0404367446899414,grad_norm: 0.9999999089055468, iteration: 40238
loss: 1.1366655826568604,grad_norm: 0.9999993575540235, iteration: 40239
loss: 1.0993187427520752,grad_norm: 0.9999989645237366, iteration: 40240
loss: 1.124929666519165,grad_norm: 0.9999998192706524, iteration: 40241
loss: 1.2323447465896606,grad_norm: 0.9999999468078578, iteration: 40242
loss: 1.0946558713912964,grad_norm: 0.9999997931780388, iteration: 40243
loss: 0.9861357808113098,grad_norm: 0.8772947985322181, iteration: 40244
loss: 0.9844931960105896,grad_norm: 0.9352492212040917, iteration: 40245
loss: 1.03824782371521,grad_norm: 0.9999996670969109, iteration: 40246
loss: 1.0453416109085083,grad_norm: 0.9999995546197779, iteration: 40247
loss: 1.0957717895507812,grad_norm: 0.9999993379607245, iteration: 40248
loss: 1.0198854207992554,grad_norm: 0.9771672839661414, iteration: 40249
loss: 1.1386244297027588,grad_norm: 0.999999814727079, iteration: 40250
loss: 1.154597282409668,grad_norm: 0.999999708177658, iteration: 40251
loss: 1.0493659973144531,grad_norm: 0.9999993326409283, iteration: 40252
loss: 1.0783699750900269,grad_norm: 0.9999992762223743, iteration: 40253
loss: 1.0855838060379028,grad_norm: 0.9999997011941507, iteration: 40254
loss: 1.0625381469726562,grad_norm: 0.9999993506693167, iteration: 40255
loss: 1.196608304977417,grad_norm: 0.9999996447642973, iteration: 40256
loss: 1.1430550813674927,grad_norm: 0.9999996764143891, iteration: 40257
loss: 1.0592896938323975,grad_norm: 0.9999997849986106, iteration: 40258
loss: 1.1558815240859985,grad_norm: 0.9999996910705743, iteration: 40259
loss: 0.9953163862228394,grad_norm: 0.9999990630398221, iteration: 40260
loss: 1.036820650100708,grad_norm: 0.9999992057436133, iteration: 40261
loss: 1.020748257637024,grad_norm: 0.9999990638990998, iteration: 40262
loss: 1.0988948345184326,grad_norm: 0.9999995820126407, iteration: 40263
loss: 1.0315520763397217,grad_norm: 0.9999992120993383, iteration: 40264
loss: 1.2145575284957886,grad_norm: 0.9999996830854131, iteration: 40265
loss: 1.0313775539398193,grad_norm: 0.9999994752467066, iteration: 40266
loss: 1.062148094177246,grad_norm: 0.9999997062176186, iteration: 40267
loss: 1.0146796703338623,grad_norm: 0.8928071701344672, iteration: 40268
loss: 1.048659324645996,grad_norm: 0.9999997714560168, iteration: 40269
loss: 1.1113296747207642,grad_norm: 0.9999997758878904, iteration: 40270
loss: 1.038243293762207,grad_norm: 0.9999991230855554, iteration: 40271
loss: 1.0303301811218262,grad_norm: 0.999999429783608, iteration: 40272
loss: 1.0337965488433838,grad_norm: 0.9999990416654501, iteration: 40273
loss: 1.048600435256958,grad_norm: 0.9999993451943382, iteration: 40274
loss: 1.0934010744094849,grad_norm: 0.9999996479787302, iteration: 40275
loss: 1.0867371559143066,grad_norm: 0.9999993954404744, iteration: 40276
loss: 1.0838998556137085,grad_norm: 0.9999992559337783, iteration: 40277
loss: 1.0180468559265137,grad_norm: 0.9012813515110014, iteration: 40278
loss: 1.0731549263000488,grad_norm: 0.9999992172262955, iteration: 40279
loss: 1.1411052942276,grad_norm: 0.999999428702359, iteration: 40280
loss: 1.019771933555603,grad_norm: 0.9734296132474957, iteration: 40281
loss: 1.1000101566314697,grad_norm: 0.9999995834906937, iteration: 40282
loss: 1.024928331375122,grad_norm: 0.9999993703827145, iteration: 40283
loss: 1.0579607486724854,grad_norm: 0.9999991886831868, iteration: 40284
loss: 1.0230929851531982,grad_norm: 0.999999501636792, iteration: 40285
loss: 0.9997274875640869,grad_norm: 0.9999992940614991, iteration: 40286
loss: 0.9743342399597168,grad_norm: 0.8305340803277643, iteration: 40287
loss: 1.0385797023773193,grad_norm: 0.9999991237795349, iteration: 40288
loss: 1.0661921501159668,grad_norm: 0.9999991148878661, iteration: 40289
loss: 1.0401554107666016,grad_norm: 0.8799844909116791, iteration: 40290
loss: 0.9865341782569885,grad_norm: 0.9999990845306225, iteration: 40291
loss: 0.9870312213897705,grad_norm: 0.9999991361423634, iteration: 40292
loss: 1.030100703239441,grad_norm: 0.9999991499516966, iteration: 40293
loss: 1.042379379272461,grad_norm: 0.9999992205654066, iteration: 40294
loss: 1.018126130104065,grad_norm: 0.9999994052905247, iteration: 40295
loss: 1.161150336265564,grad_norm: 0.9999995877963407, iteration: 40296
loss: 1.0051440000534058,grad_norm: 0.9999993028785092, iteration: 40297
loss: 1.1692657470703125,grad_norm: 0.9999997756721446, iteration: 40298
loss: 1.026206374168396,grad_norm: 0.999999388461891, iteration: 40299
loss: 1.08762788772583,grad_norm: 0.999999139922675, iteration: 40300
loss: 1.1023478507995605,grad_norm: 0.9999997462003124, iteration: 40301
loss: 0.9762943387031555,grad_norm: 0.9999994902054021, iteration: 40302
loss: 1.1031746864318848,grad_norm: 0.9999995512353695, iteration: 40303
loss: 0.9645593762397766,grad_norm: 0.9999990689002322, iteration: 40304
loss: 1.0178583860397339,grad_norm: 0.9146649635579421, iteration: 40305
loss: 1.0287059545516968,grad_norm: 0.9999993642022312, iteration: 40306
loss: 1.0573674440383911,grad_norm: 0.9999992987472649, iteration: 40307
loss: 1.0074172019958496,grad_norm: 0.7865241958388655, iteration: 40308
loss: 1.0477392673492432,grad_norm: 0.9999991067196, iteration: 40309
loss: 1.0083314180374146,grad_norm: 0.9999991677960917, iteration: 40310
loss: 1.0370545387268066,grad_norm: 0.8261541755225196, iteration: 40311
loss: 1.0405749082565308,grad_norm: 0.9430007822963435, iteration: 40312
loss: 0.9729424118995667,grad_norm: 0.9999992006731563, iteration: 40313
loss: 1.1427638530731201,grad_norm: 0.9999993929672357, iteration: 40314
loss: 1.0421031713485718,grad_norm: 0.9999992268344677, iteration: 40315
loss: 1.059544324874878,grad_norm: 0.9999994918834335, iteration: 40316
loss: 1.1450966596603394,grad_norm: 0.9999997823485838, iteration: 40317
loss: 1.085229516029358,grad_norm: 0.9999992168957107, iteration: 40318
loss: 0.9874067902565002,grad_norm: 0.84863432877942, iteration: 40319
loss: 1.1115106344223022,grad_norm: 0.9999995393606407, iteration: 40320
loss: 1.1920074224472046,grad_norm: 0.9999996227833988, iteration: 40321
loss: 1.1777693033218384,grad_norm: 0.9999999066348719, iteration: 40322
loss: 1.3136651515960693,grad_norm: 0.9999994782190421, iteration: 40323
loss: 1.1414531469345093,grad_norm: 0.9999991731720372, iteration: 40324
loss: 1.5482627153396606,grad_norm: 0.9999999028634444, iteration: 40325
loss: 1.7168362140655518,grad_norm: 1.0000000518827283, iteration: 40326
loss: 1.3777127265930176,grad_norm: 0.9999998796042604, iteration: 40327
loss: 1.9282021522521973,grad_norm: 0.9999997770146959, iteration: 40328
loss: 1.710582971572876,grad_norm: 0.9999998609749932, iteration: 40329
loss: 1.7270258665084839,grad_norm: 0.9999999318663989, iteration: 40330
loss: 1.6528100967407227,grad_norm: 0.9999999831483981, iteration: 40331
loss: 1.7582610845565796,grad_norm: 0.9999996293226897, iteration: 40332
loss: 1.5823990106582642,grad_norm: 0.9999998204465299, iteration: 40333
loss: 1.6053670644760132,grad_norm: 0.999999810348807, iteration: 40334
loss: 1.4059399366378784,grad_norm: 0.9999997985204325, iteration: 40335
loss: 1.1923551559448242,grad_norm: 0.9999998650170692, iteration: 40336
loss: 1.3226406574249268,grad_norm: 0.999999576865904, iteration: 40337
loss: 1.3596901893615723,grad_norm: 0.9999996264180672, iteration: 40338
loss: 1.2058563232421875,grad_norm: 0.9999995039645868, iteration: 40339
loss: 1.33344566822052,grad_norm: 0.9999999479647254, iteration: 40340
loss: 1.020559310913086,grad_norm: 0.9999992814730418, iteration: 40341
loss: 1.0726869106292725,grad_norm: 0.9999991145223507, iteration: 40342
loss: 0.9827826619148254,grad_norm: 0.9999991315965087, iteration: 40343
loss: 1.0053741931915283,grad_norm: 0.9447559083320574, iteration: 40344
loss: 1.072365641593933,grad_norm: 0.9999994530022324, iteration: 40345
loss: 1.0347445011138916,grad_norm: 0.8660569034035199, iteration: 40346
loss: 1.2186245918273926,grad_norm: 0.9999997689367082, iteration: 40347
loss: 0.9688713550567627,grad_norm: 0.8963952550559895, iteration: 40348
loss: 1.068816065788269,grad_norm: 0.9999991394458503, iteration: 40349
loss: 1.013993263244629,grad_norm: 0.9092029414190246, iteration: 40350
loss: 1.1512326002120972,grad_norm: 0.9999996408554046, iteration: 40351
loss: 1.0537914037704468,grad_norm: 0.9999990432276002, iteration: 40352
loss: 1.0067979097366333,grad_norm: 0.9125377751706879, iteration: 40353
loss: 1.097041130065918,grad_norm: 0.9999997659061762, iteration: 40354
loss: 1.0641461610794067,grad_norm: 0.9999992551262327, iteration: 40355
loss: 1.1033148765563965,grad_norm: 0.9999996411987143, iteration: 40356
loss: 1.0737589597702026,grad_norm: 0.9999997345218602, iteration: 40357
loss: 1.0555437803268433,grad_norm: 0.9999991322754964, iteration: 40358
loss: 0.9800626039505005,grad_norm: 0.8709213647438458, iteration: 40359
loss: 1.0044418573379517,grad_norm: 0.9999997930118474, iteration: 40360
loss: 1.0079971551895142,grad_norm: 0.9999990493486774, iteration: 40361
loss: 1.0058702230453491,grad_norm: 0.9825280217102207, iteration: 40362
loss: 1.0067096948623657,grad_norm: 0.8258592722666531, iteration: 40363
loss: 1.0864602327346802,grad_norm: 0.9999998812063032, iteration: 40364
loss: 1.0336264371871948,grad_norm: 0.9999997420759642, iteration: 40365
loss: 1.078904390335083,grad_norm: 0.880262393741265, iteration: 40366
loss: 0.9909762144088745,grad_norm: 0.9585762935422028, iteration: 40367
loss: 1.0225664377212524,grad_norm: 0.9999993481031934, iteration: 40368
loss: 1.0330957174301147,grad_norm: 0.8225870644918964, iteration: 40369
loss: 1.048953652381897,grad_norm: 0.9999998394451411, iteration: 40370
loss: 0.9869807362556458,grad_norm: 0.8707264439097006, iteration: 40371
loss: 1.0196483135223389,grad_norm: 0.9999990771288906, iteration: 40372
loss: 0.9868664145469666,grad_norm: 0.9999991617619794, iteration: 40373
loss: 1.0269972085952759,grad_norm: 0.9012905913678658, iteration: 40374
loss: 1.0433518886566162,grad_norm: 0.930929467147172, iteration: 40375
loss: 1.0046789646148682,grad_norm: 0.9999991279243324, iteration: 40376
loss: 1.0286318063735962,grad_norm: 0.8890548900406383, iteration: 40377
loss: 1.1000893115997314,grad_norm: 0.9999994163024757, iteration: 40378
loss: 1.0285680294036865,grad_norm: 0.9999991280399156, iteration: 40379
loss: 1.0478012561798096,grad_norm: 0.9999991273269606, iteration: 40380
loss: 1.0070992708206177,grad_norm: 0.7483314258875344, iteration: 40381
loss: 1.042005181312561,grad_norm: 0.9999993518750753, iteration: 40382
loss: 0.9648945331573486,grad_norm: 0.9845043713455002, iteration: 40383
loss: 1.0167665481567383,grad_norm: 0.9676767581751076, iteration: 40384
loss: 1.0374850034713745,grad_norm: 0.9999991676586932, iteration: 40385
loss: 1.1011654138565063,grad_norm: 0.9999997832920149, iteration: 40386
loss: 1.0856196880340576,grad_norm: 0.9999995559380738, iteration: 40387
loss: 1.0211539268493652,grad_norm: 0.9552207683817466, iteration: 40388
loss: 1.0108444690704346,grad_norm: 0.9085566221447635, iteration: 40389
loss: 0.9758267402648926,grad_norm: 0.9999993225081469, iteration: 40390
loss: 0.9987229108810425,grad_norm: 0.9999990714217277, iteration: 40391
loss: 1.0121691226959229,grad_norm: 0.9999995635993444, iteration: 40392
loss: 0.9718559384346008,grad_norm: 0.9999989593982629, iteration: 40393
loss: 1.011966347694397,grad_norm: 0.9999989925036619, iteration: 40394
loss: 1.0577102899551392,grad_norm: 0.9999998138010393, iteration: 40395
loss: 0.9992484450340271,grad_norm: 0.9354118841847159, iteration: 40396
loss: 0.959116518497467,grad_norm: 0.7671186182848055, iteration: 40397
loss: 1.1713210344314575,grad_norm: 0.9999996101255477, iteration: 40398
loss: 1.0860151052474976,grad_norm: 0.9999998633137361, iteration: 40399
loss: 1.0714137554168701,grad_norm: 0.9841998235500594, iteration: 40400
loss: 1.043248176574707,grad_norm: 0.9999992739793956, iteration: 40401
loss: 0.9613090753555298,grad_norm: 0.9972013522760443, iteration: 40402
loss: 1.0547453165054321,grad_norm: 0.9999995183083765, iteration: 40403
loss: 1.0363965034484863,grad_norm: 0.9999991284109763, iteration: 40404
loss: 0.9613227248191833,grad_norm: 0.9999991272181297, iteration: 40405
loss: 1.1112267971038818,grad_norm: 0.9999998453991629, iteration: 40406
loss: 0.9991229772567749,grad_norm: 0.9999990546896501, iteration: 40407
loss: 1.0490713119506836,grad_norm: 0.9999992853704601, iteration: 40408
loss: 1.0849374532699585,grad_norm: 0.999999707743004, iteration: 40409
loss: 1.122018575668335,grad_norm: 0.9999993130968092, iteration: 40410
loss: 1.0074740648269653,grad_norm: 0.9999994944774785, iteration: 40411
loss: 1.0756107568740845,grad_norm: 0.9999992518748776, iteration: 40412
loss: 1.1111468076705933,grad_norm: 0.999999841124302, iteration: 40413
loss: 1.014086365699768,grad_norm: 0.8129196951440644, iteration: 40414
loss: 1.0268508195877075,grad_norm: 0.7470300318625983, iteration: 40415
loss: 1.09415864944458,grad_norm: 0.9999996834041578, iteration: 40416
loss: 1.0328792333602905,grad_norm: 0.9999990588541215, iteration: 40417
loss: 1.011594533920288,grad_norm: 0.9999990482251377, iteration: 40418
loss: 1.0600523948669434,grad_norm: 0.9999991921632951, iteration: 40419
loss: 0.9721357822418213,grad_norm: 0.881650278878999, iteration: 40420
loss: 0.997388482093811,grad_norm: 0.9999994960727916, iteration: 40421
loss: 1.0252814292907715,grad_norm: 0.9999990598458869, iteration: 40422
loss: 0.9958945512771606,grad_norm: 0.9999993177838844, iteration: 40423
loss: 0.9812906980514526,grad_norm: 0.9947437072338517, iteration: 40424
loss: 0.966905415058136,grad_norm: 0.9595284930494122, iteration: 40425
loss: 0.9943137168884277,grad_norm: 0.9526749914984444, iteration: 40426
loss: 0.9858952760696411,grad_norm: 0.8845161989421265, iteration: 40427
loss: 1.0424582958221436,grad_norm: 0.9999999229870816, iteration: 40428
loss: 1.008795142173767,grad_norm: 0.9999989832149877, iteration: 40429
loss: 1.0240519046783447,grad_norm: 0.9999991423894742, iteration: 40430
loss: 0.9677988886833191,grad_norm: 0.9986120722548087, iteration: 40431
loss: 1.0072154998779297,grad_norm: 0.9999989713258203, iteration: 40432
loss: 1.0122582912445068,grad_norm: 0.9999990554682341, iteration: 40433
loss: 0.9836634397506714,grad_norm: 0.9999990961713252, iteration: 40434
loss: 1.0156325101852417,grad_norm: 0.9329298898056478, iteration: 40435
loss: 1.1298190355300903,grad_norm: 0.9999998738969281, iteration: 40436
loss: 0.9954332709312439,grad_norm: 0.7872824389306594, iteration: 40437
loss: 1.1343492269515991,grad_norm: 0.9999996210111325, iteration: 40438
loss: 1.0318028926849365,grad_norm: 0.9396194995735426, iteration: 40439
loss: 1.022983431816101,grad_norm: 0.999999271401423, iteration: 40440
loss: 0.9894384145736694,grad_norm: 0.8298306325314575, iteration: 40441
loss: 0.9949344992637634,grad_norm: 0.9999998587217792, iteration: 40442
loss: 1.037367582321167,grad_norm: 0.9999990390126122, iteration: 40443
loss: 1.0844842195510864,grad_norm: 0.9999996116207356, iteration: 40444
loss: 1.0274256467819214,grad_norm: 0.999999958780562, iteration: 40445
loss: 0.9903590083122253,grad_norm: 0.9999990727647615, iteration: 40446
loss: 0.9878956079483032,grad_norm: 0.9999991158208426, iteration: 40447
loss: 1.0290441513061523,grad_norm: 0.9602240727472301, iteration: 40448
loss: 1.0423402786254883,grad_norm: 0.9999992342660952, iteration: 40449
loss: 1.0906294584274292,grad_norm: 0.9999996534870762, iteration: 40450
loss: 1.0848525762557983,grad_norm: 0.9999990819643277, iteration: 40451
loss: 1.0951662063598633,grad_norm: 0.9999997903530123, iteration: 40452
loss: 1.114176630973816,grad_norm: 0.9999996149179554, iteration: 40453
loss: 1.014507532119751,grad_norm: 0.9663729542669127, iteration: 40454
loss: 1.0275945663452148,grad_norm: 0.9999993137232804, iteration: 40455
loss: 1.181828498840332,grad_norm: 0.9999995662624159, iteration: 40456
loss: 0.983127236366272,grad_norm: 0.9999991269181946, iteration: 40457
loss: 1.0111231803894043,grad_norm: 0.9999991826024031, iteration: 40458
loss: 0.9717485308647156,grad_norm: 0.999999079393939, iteration: 40459
loss: 0.979243278503418,grad_norm: 0.9999990807052291, iteration: 40460
loss: 1.036304235458374,grad_norm: 0.999999815210635, iteration: 40461
loss: 0.9975308179855347,grad_norm: 0.8777876645502857, iteration: 40462
loss: 0.9983965754508972,grad_norm: 0.9957952839924258, iteration: 40463
loss: 0.9655240178108215,grad_norm: 0.9259055282956775, iteration: 40464
loss: 0.98138427734375,grad_norm: 0.8231742134582944, iteration: 40465
loss: 1.0075420141220093,grad_norm: 0.9999991183250304, iteration: 40466
loss: 1.0466996431350708,grad_norm: 0.9999998077802024, iteration: 40467
loss: 1.0071791410446167,grad_norm: 0.9999993809575977, iteration: 40468
loss: 0.9741576910018921,grad_norm: 0.9999999292773794, iteration: 40469
loss: 0.9786053895950317,grad_norm: 0.845188922498628, iteration: 40470
loss: 0.9899184703826904,grad_norm: 0.9999991774900271, iteration: 40471
loss: 0.9997971653938293,grad_norm: 0.9842864974919341, iteration: 40472
loss: 1.041354775428772,grad_norm: 0.8943698125152012, iteration: 40473
loss: 1.0592198371887207,grad_norm: 0.9999992637842199, iteration: 40474
loss: 1.003897786140442,grad_norm: 0.9722586607721302, iteration: 40475
loss: 0.9865995049476624,grad_norm: 0.9999992914724503, iteration: 40476
loss: 1.0254456996917725,grad_norm: 0.9999991348703697, iteration: 40477
loss: 0.9992830157279968,grad_norm: 0.9999991079281308, iteration: 40478
loss: 1.0634846687316895,grad_norm: 0.9999992707884221, iteration: 40479
loss: 1.0467959642410278,grad_norm: 0.9999993290794392, iteration: 40480
loss: 1.0233173370361328,grad_norm: 0.7223673081009245, iteration: 40481
loss: 0.9924628734588623,grad_norm: 0.801187226113261, iteration: 40482
loss: 1.210543155670166,grad_norm: 0.9999998691661165, iteration: 40483
loss: 0.9964772462844849,grad_norm: 0.9819996985471406, iteration: 40484
loss: 1.0080398321151733,grad_norm: 0.8162326899612043, iteration: 40485
loss: 1.0002385377883911,grad_norm: 0.7929795597004045, iteration: 40486
loss: 1.0586116313934326,grad_norm: 0.9999992015989175, iteration: 40487
loss: 1.0073421001434326,grad_norm: 0.9914121868880866, iteration: 40488
loss: 1.002274751663208,grad_norm: 0.9533789360504269, iteration: 40489
loss: 0.9624976515769958,grad_norm: 0.9370199674934415, iteration: 40490
loss: 1.0232206583023071,grad_norm: 0.9999992324423499, iteration: 40491
loss: 0.9852297306060791,grad_norm: 0.9999993305559226, iteration: 40492
loss: 1.01038658618927,grad_norm: 0.9999993893666006, iteration: 40493
loss: 1.0695089101791382,grad_norm: 0.9999992238907796, iteration: 40494
loss: 1.007996678352356,grad_norm: 0.9999990731129135, iteration: 40495
loss: 1.0616669654846191,grad_norm: 0.999999383285862, iteration: 40496
loss: 1.0479563474655151,grad_norm: 0.8445053163510546, iteration: 40497
loss: 1.0677000284194946,grad_norm: 0.999999694007478, iteration: 40498
loss: 0.9774727821350098,grad_norm: 0.9999991368738002, iteration: 40499
loss: 1.0605875253677368,grad_norm: 0.9999992766648746, iteration: 40500
loss: 0.9808605909347534,grad_norm: 0.9999994659534776, iteration: 40501
loss: 1.040017008781433,grad_norm: 0.9999993452440886, iteration: 40502
loss: 0.991714596748352,grad_norm: 0.9272159028039874, iteration: 40503
loss: 1.0117028951644897,grad_norm: 0.8599357933092046, iteration: 40504
loss: 1.1797209978103638,grad_norm: 0.9999998384716757, iteration: 40505
loss: 1.0701618194580078,grad_norm: 0.9999994585099191, iteration: 40506
loss: 1.0515755414962769,grad_norm: 0.982443224684523, iteration: 40507
loss: 1.023461937904358,grad_norm: 0.9542878321317458, iteration: 40508
loss: 1.0206507444381714,grad_norm: 0.8470668732602484, iteration: 40509
loss: 1.028805136680603,grad_norm: 0.9999998727814555, iteration: 40510
loss: 1.1214302778244019,grad_norm: 0.9999999288030158, iteration: 40511
loss: 1.0510296821594238,grad_norm: 0.8101698148504785, iteration: 40512
loss: 0.9896852374076843,grad_norm: 0.9286127901615664, iteration: 40513
loss: 1.0008342266082764,grad_norm: 0.7368216137072658, iteration: 40514
loss: 1.0072134733200073,grad_norm: 0.999999739073981, iteration: 40515
loss: 1.0074653625488281,grad_norm: 0.9661759959663687, iteration: 40516
loss: 1.0232927799224854,grad_norm: 0.9999996458557113, iteration: 40517
loss: 0.9613155722618103,grad_norm: 0.9999991025674122, iteration: 40518
loss: 1.0274306535720825,grad_norm: 0.9693695187503438, iteration: 40519
loss: 1.0660228729248047,grad_norm: 0.9999990553729708, iteration: 40520
loss: 1.0043917894363403,grad_norm: 0.8966768799292327, iteration: 40521
loss: 1.0222634077072144,grad_norm: 0.9999998694680639, iteration: 40522
loss: 1.0072803497314453,grad_norm: 0.9999990315621121, iteration: 40523
loss: 1.0217199325561523,grad_norm: 0.999999734783378, iteration: 40524
loss: 1.0201356410980225,grad_norm: 0.8794323041412924, iteration: 40525
loss: 1.025687336921692,grad_norm: 0.9708108737677964, iteration: 40526
loss: 0.9660412073135376,grad_norm: 0.9999991229681375, iteration: 40527
loss: 1.0124030113220215,grad_norm: 0.9999993413421949, iteration: 40528
loss: 1.0631940364837646,grad_norm: 0.9999992468459058, iteration: 40529
loss: 1.0050026178359985,grad_norm: 0.9999994942172103, iteration: 40530
loss: 1.0175045728683472,grad_norm: 0.9999993109050536, iteration: 40531
loss: 1.044224739074707,grad_norm: 0.9999993982985156, iteration: 40532
loss: 0.9917778372764587,grad_norm: 0.9999990758133994, iteration: 40533
loss: 1.0485827922821045,grad_norm: 0.9999996562825462, iteration: 40534
loss: 1.0852535963058472,grad_norm: 0.9999996684818877, iteration: 40535
loss: 1.0648319721221924,grad_norm: 0.9999993385548924, iteration: 40536
loss: 1.2184759378433228,grad_norm: 1.0000000662819668, iteration: 40537
loss: 1.016867756843567,grad_norm: 0.9999991653432156, iteration: 40538
loss: 1.0383400917053223,grad_norm: 0.9999992009918361, iteration: 40539
loss: 1.1479406356811523,grad_norm: 0.999999363544236, iteration: 40540
loss: 1.0179271697998047,grad_norm: 0.9999997350317079, iteration: 40541
loss: 1.1576299667358398,grad_norm: 0.9999999127908533, iteration: 40542
loss: 1.1294293403625488,grad_norm: 0.9999999000997121, iteration: 40543
loss: 0.9912832975387573,grad_norm: 0.8925290961860197, iteration: 40544
loss: 1.046216607093811,grad_norm: 0.9575805370626645, iteration: 40545
loss: 1.0925320386886597,grad_norm: 0.9999994174139349, iteration: 40546
loss: 1.0836483240127563,grad_norm: 0.9999993171501708, iteration: 40547
loss: 1.0578557252883911,grad_norm: 0.947883425420873, iteration: 40548
loss: 1.140670895576477,grad_norm: 0.9999999513100067, iteration: 40549
loss: 0.9897621870040894,grad_norm: 0.9655841945833374, iteration: 40550
loss: 1.0199007987976074,grad_norm: 0.855218415747771, iteration: 40551
loss: 1.080649495124817,grad_norm: 0.9999999892728051, iteration: 40552
loss: 1.0000183582305908,grad_norm: 0.9999992530586063, iteration: 40553
loss: 1.0443694591522217,grad_norm: 0.9999998244179465, iteration: 40554
loss: 1.2077351808547974,grad_norm: 0.9999997480853824, iteration: 40555
loss: 1.3467179536819458,grad_norm: 0.9999997646796956, iteration: 40556
loss: 1.0349762439727783,grad_norm: 0.9478795868207754, iteration: 40557
loss: 1.1202174425125122,grad_norm: 0.9999998810953566, iteration: 40558
loss: 1.1500943899154663,grad_norm: 0.9999995282414258, iteration: 40559
loss: 1.0568898916244507,grad_norm: 0.9999991833676299, iteration: 40560
loss: 1.0627996921539307,grad_norm: 0.9999998410527754, iteration: 40561
loss: 1.1204410791397095,grad_norm: 0.999999519432325, iteration: 40562
loss: 1.0022099018096924,grad_norm: 0.87463724000768, iteration: 40563
loss: 1.063626766204834,grad_norm: 0.9887777591291947, iteration: 40564
loss: 1.2364379167556763,grad_norm: 0.9999998388354486, iteration: 40565
loss: 1.2322877645492554,grad_norm: 0.9999999695434548, iteration: 40566
loss: 1.0598442554473877,grad_norm: 0.9999998331324531, iteration: 40567
loss: 1.023888111114502,grad_norm: 0.9382477433783477, iteration: 40568
loss: 1.0064442157745361,grad_norm: 0.8442522286505258, iteration: 40569
loss: 1.0501683950424194,grad_norm: 0.9999991982635877, iteration: 40570
loss: 1.0487816333770752,grad_norm: 0.9999990933682735, iteration: 40571
loss: 1.1168383359909058,grad_norm: 0.9999998663917489, iteration: 40572
loss: 1.032543659210205,grad_norm: 0.9999995107449862, iteration: 40573
loss: 1.0640519857406616,grad_norm: 0.9999993413693665, iteration: 40574
loss: 1.0696772336959839,grad_norm: 0.9999991882280013, iteration: 40575
loss: 1.0469906330108643,grad_norm: 0.9999998008729788, iteration: 40576
loss: 1.0362659692764282,grad_norm: 0.9999993603550277, iteration: 40577
loss: 1.087244987487793,grad_norm: 0.9999996967949566, iteration: 40578
loss: 1.092238426208496,grad_norm: 0.9999994454124449, iteration: 40579
loss: 1.109863519668579,grad_norm: 0.999999761391938, iteration: 40580
loss: 1.0186011791229248,grad_norm: 0.9999993543467366, iteration: 40581
loss: 1.0047649145126343,grad_norm: 0.9716469180231115, iteration: 40582
loss: 1.0226144790649414,grad_norm: 0.9999994214166856, iteration: 40583
loss: 1.0338504314422607,grad_norm: 0.9999990682890821, iteration: 40584
loss: 0.9719483852386475,grad_norm: 0.8817161512123236, iteration: 40585
loss: 1.0254883766174316,grad_norm: 0.9999995416253732, iteration: 40586
loss: 1.0424069166183472,grad_norm: 0.965606953952335, iteration: 40587
loss: 1.0544700622558594,grad_norm: 0.9999994879967943, iteration: 40588
loss: 1.02400541305542,grad_norm: 0.9999991367270965, iteration: 40589
loss: 1.0016627311706543,grad_norm: 0.9484755407782068, iteration: 40590
loss: 1.0239555835723877,grad_norm: 0.9999992008414988, iteration: 40591
loss: 1.005752682685852,grad_norm: 0.8414904762260303, iteration: 40592
loss: 1.058811902999878,grad_norm: 0.9999991944624939, iteration: 40593
loss: 1.0862699747085571,grad_norm: 0.9999992108236148, iteration: 40594
loss: 1.0269972085952759,grad_norm: 0.9739871868175148, iteration: 40595
loss: 0.9800147414207458,grad_norm: 0.869096831305905, iteration: 40596
loss: 1.0336458683013916,grad_norm: 0.9999992073487759, iteration: 40597
loss: 1.0688326358795166,grad_norm: 0.9999993609910194, iteration: 40598
loss: 1.0886930227279663,grad_norm: 0.9999997185934362, iteration: 40599
loss: 1.0111229419708252,grad_norm: 0.9999991218876254, iteration: 40600
loss: 1.016746163368225,grad_norm: 0.9999992783032003, iteration: 40601
loss: 0.9908010959625244,grad_norm: 0.9327456187477308, iteration: 40602
loss: 1.0098050832748413,grad_norm: 0.951265650289932, iteration: 40603
loss: 1.004128336906433,grad_norm: 0.9999989745743058, iteration: 40604
loss: 1.0190880298614502,grad_norm: 0.9999992376932586, iteration: 40605
loss: 0.9680785536766052,grad_norm: 0.9157936967007528, iteration: 40606
loss: 1.0608394145965576,grad_norm: 0.9999990178576449, iteration: 40607
loss: 1.0533956289291382,grad_norm: 0.9999993564499219, iteration: 40608
loss: 1.0349578857421875,grad_norm: 0.9867832772514827, iteration: 40609
loss: 1.0482897758483887,grad_norm: 0.9999989809892719, iteration: 40610
loss: 0.9705288410186768,grad_norm: 0.9858772858187783, iteration: 40611
loss: 1.0087403059005737,grad_norm: 0.8394006999468833, iteration: 40612
loss: 0.9918221831321716,grad_norm: 0.9999997798875886, iteration: 40613
loss: 1.037545919418335,grad_norm: 0.9999990908651946, iteration: 40614
loss: 1.0682214498519897,grad_norm: 0.9999991776125912, iteration: 40615
loss: 1.0213634967803955,grad_norm: 0.9943753268653477, iteration: 40616
loss: 1.0577707290649414,grad_norm: 0.9999990998341983, iteration: 40617
loss: 1.1000795364379883,grad_norm: 0.9999992287090181, iteration: 40618
loss: 1.0153366327285767,grad_norm: 0.9999991273873786, iteration: 40619
loss: 1.0934046506881714,grad_norm: 0.9999995961376146, iteration: 40620
loss: 0.9920943975448608,grad_norm: 0.9867411664671305, iteration: 40621
loss: 1.1145493984222412,grad_norm: 0.9999996999655286, iteration: 40622
loss: 1.008489727973938,grad_norm: 0.9999989812562223, iteration: 40623
loss: 1.0667564868927002,grad_norm: 0.9999995058407598, iteration: 40624
loss: 1.052830457687378,grad_norm: 0.9999992328077492, iteration: 40625
loss: 0.9641479849815369,grad_norm: 0.8880137259583211, iteration: 40626
loss: 1.0118895769119263,grad_norm: 0.9999991278160881, iteration: 40627
loss: 0.9977415204048157,grad_norm: 0.8764585749240409, iteration: 40628
loss: 1.0194272994995117,grad_norm: 0.9999995162383456, iteration: 40629
loss: 1.0208827257156372,grad_norm: 0.8590700735860071, iteration: 40630
loss: 1.0155258178710938,grad_norm: 0.9196403564182822, iteration: 40631
loss: 1.0626155138015747,grad_norm: 0.9573103282667763, iteration: 40632
loss: 0.9925255179405212,grad_norm: 0.8743813376788321, iteration: 40633
loss: 0.9980782270431519,grad_norm: 0.9554242155656308, iteration: 40634
loss: 1.0610082149505615,grad_norm: 0.9999993769711519, iteration: 40635
loss: 1.0133004188537598,grad_norm: 0.9342432162587256, iteration: 40636
loss: 1.0195380449295044,grad_norm: 0.9999996625189109, iteration: 40637
loss: 1.0312021970748901,grad_norm: 0.9999999274189653, iteration: 40638
loss: 1.0565414428710938,grad_norm: 0.9999992212928734, iteration: 40639
loss: 1.0485128164291382,grad_norm: 0.9999994597449489, iteration: 40640
loss: 0.9866259694099426,grad_norm: 0.9725990940946451, iteration: 40641
loss: 1.0349141359329224,grad_norm: 0.9999997928564408, iteration: 40642
loss: 1.035446047782898,grad_norm: 0.9999992496781669, iteration: 40643
loss: 1.092618465423584,grad_norm: 0.9999991889806814, iteration: 40644
loss: 1.0458946228027344,grad_norm: 0.9999990866387456, iteration: 40645
loss: 0.9819889068603516,grad_norm: 0.9999992110683082, iteration: 40646
loss: 1.0226356983184814,grad_norm: 0.9999991685847345, iteration: 40647
loss: 1.0949472188949585,grad_norm: 0.9999993741710789, iteration: 40648
loss: 1.02485990524292,grad_norm: 0.9999991577403481, iteration: 40649
loss: 1.032660961151123,grad_norm: 0.9999997302368211, iteration: 40650
loss: 1.182930588722229,grad_norm: 0.9999994532943027, iteration: 40651
loss: 1.0059974193572998,grad_norm: 0.8794646269022912, iteration: 40652
loss: 1.09650719165802,grad_norm: 0.9999993645624421, iteration: 40653
loss: 1.039002537727356,grad_norm: 0.9999991708314689, iteration: 40654
loss: 1.0929698944091797,grad_norm: 0.99999932646434, iteration: 40655
loss: 1.0054839849472046,grad_norm: 0.8810662425477603, iteration: 40656
loss: 1.0070923566818237,grad_norm: 0.7072868539887165, iteration: 40657
loss: 1.036162257194519,grad_norm: 0.9999991549826285, iteration: 40658
loss: 1.0111992359161377,grad_norm: 0.9999995476984147, iteration: 40659
loss: 1.0183663368225098,grad_norm: 0.8548822051881148, iteration: 40660
loss: 1.0232216119766235,grad_norm: 0.9999990758711527, iteration: 40661
loss: 1.1464885473251343,grad_norm: 0.9999994542834432, iteration: 40662
loss: 1.1059508323669434,grad_norm: 0.9999999311940231, iteration: 40663
loss: 1.0311685800552368,grad_norm: 0.9999998510857648, iteration: 40664
loss: 0.9932700991630554,grad_norm: 0.9999993199134884, iteration: 40665
loss: 1.0212780237197876,grad_norm: 0.9999994970389914, iteration: 40666
loss: 1.0523900985717773,grad_norm: 0.999999164915637, iteration: 40667
loss: 1.0544095039367676,grad_norm: 0.9999992189144425, iteration: 40668
loss: 1.0421215295791626,grad_norm: 0.9999995995522909, iteration: 40669
loss: 1.0330986976623535,grad_norm: 0.9999996687488039, iteration: 40670
loss: 1.1541128158569336,grad_norm: 0.9999995689286866, iteration: 40671
loss: 1.0630824565887451,grad_norm: 0.9999993832805925, iteration: 40672
loss: 1.0837043523788452,grad_norm: 0.9999997345028255, iteration: 40673
loss: 1.008675217628479,grad_norm: 0.9400739007061081, iteration: 40674
loss: 1.0078818798065186,grad_norm: 0.9999991016007588, iteration: 40675
loss: 0.9864615201950073,grad_norm: 0.9999991962327852, iteration: 40676
loss: 1.006499171257019,grad_norm: 0.8322355638462159, iteration: 40677
loss: 0.9867584109306335,grad_norm: 0.9999991115612555, iteration: 40678
loss: 1.0454305410385132,grad_norm: 0.999999471827138, iteration: 40679
loss: 1.0027105808258057,grad_norm: 0.999999317383825, iteration: 40680
loss: 1.0102421045303345,grad_norm: 0.9999999950275636, iteration: 40681
loss: 1.0471950769424438,grad_norm: 0.9999992148606922, iteration: 40682
loss: 1.0013231039047241,grad_norm: 0.9999991406658391, iteration: 40683
loss: 1.0251235961914062,grad_norm: 0.999999671281082, iteration: 40684
loss: 1.0655261278152466,grad_norm: 0.9999996047299392, iteration: 40685
loss: 1.0256069898605347,grad_norm: 0.9472777878791423, iteration: 40686
loss: 1.0227222442626953,grad_norm: 0.9999990904305767, iteration: 40687
loss: 0.9707241654396057,grad_norm: 0.9013923702942828, iteration: 40688
loss: 1.0051687955856323,grad_norm: 0.9546675181238666, iteration: 40689
loss: 1.0731061697006226,grad_norm: 0.999999061211779, iteration: 40690
loss: 1.0137804746627808,grad_norm: 0.9999991113673379, iteration: 40691
loss: 1.0227110385894775,grad_norm: 0.9377219763465959, iteration: 40692
loss: 0.9915943741798401,grad_norm: 0.954588694867092, iteration: 40693
loss: 1.1582183837890625,grad_norm: 0.9999999020484062, iteration: 40694
loss: 1.0042463541030884,grad_norm: 0.8529192805544117, iteration: 40695
loss: 1.0367119312286377,grad_norm: 0.8345915918548042, iteration: 40696
loss: 1.0376883745193481,grad_norm: 0.9999996137170521, iteration: 40697
loss: 1.0671172142028809,grad_norm: 0.9999997812948994, iteration: 40698
loss: 1.0636341571807861,grad_norm: 0.9999991615972119, iteration: 40699
loss: 0.9857866168022156,grad_norm: 0.991967860277889, iteration: 40700
loss: 0.9928960204124451,grad_norm: 0.9660258780635967, iteration: 40701
loss: 1.0903925895690918,grad_norm: 0.9999992027929107, iteration: 40702
loss: 1.033796787261963,grad_norm: 0.9999993056821195, iteration: 40703
loss: 0.9948880672454834,grad_norm: 0.9575626843776823, iteration: 40704
loss: 0.9927393198013306,grad_norm: 0.9999996045124316, iteration: 40705
loss: 0.9980427622795105,grad_norm: 0.7591116518802544, iteration: 40706
loss: 1.0764760971069336,grad_norm: 0.9999992330128136, iteration: 40707
loss: 1.0478627681732178,grad_norm: 0.9999998347616627, iteration: 40708
loss: 0.9914569854736328,grad_norm: 0.8771463422526677, iteration: 40709
loss: 0.9814754724502563,grad_norm: 0.9999994625051529, iteration: 40710
loss: 1.0126328468322754,grad_norm: 0.9349942664437306, iteration: 40711
loss: 0.9969286918640137,grad_norm: 0.8544234530392915, iteration: 40712
loss: 1.01700758934021,grad_norm: 0.8897311168014064, iteration: 40713
loss: 1.0158413648605347,grad_norm: 0.9999993639283017, iteration: 40714
loss: 1.084783911705017,grad_norm: 1.0000000757541485, iteration: 40715
loss: 1.0146671533584595,grad_norm: 0.9810021763253091, iteration: 40716
loss: 1.0103825330734253,grad_norm: 0.9999991579571679, iteration: 40717
loss: 1.026666522026062,grad_norm: 0.9366222094756252, iteration: 40718
loss: 0.9707664251327515,grad_norm: 0.9999990863182064, iteration: 40719
loss: 1.0538828372955322,grad_norm: 0.9999994879689497, iteration: 40720
loss: 0.9597444534301758,grad_norm: 0.8598607629258754, iteration: 40721
loss: 1.0051532983779907,grad_norm: 0.9999997215408812, iteration: 40722
loss: 1.1566404104232788,grad_norm: 0.9999999119891283, iteration: 40723
loss: 0.997687578201294,grad_norm: 0.895182875686201, iteration: 40724
loss: 1.0154235363006592,grad_norm: 0.959617532613753, iteration: 40725
loss: 0.9879744648933411,grad_norm: 0.9999994255630508, iteration: 40726
loss: 1.0161632299423218,grad_norm: 0.8053019710464209, iteration: 40727
loss: 1.0782885551452637,grad_norm: 0.9999994987145132, iteration: 40728
loss: 0.9680197238922119,grad_norm: 0.9999993278419087, iteration: 40729
loss: 1.0743852853775024,grad_norm: 0.9999998527579353, iteration: 40730
loss: 1.0195268392562866,grad_norm: 0.9127165513677256, iteration: 40731
loss: 1.0043079853057861,grad_norm: 0.8452911152390803, iteration: 40732
loss: 1.0219770669937134,grad_norm: 0.8396821393454016, iteration: 40733
loss: 1.0829081535339355,grad_norm: 0.9999996029084489, iteration: 40734
loss: 1.0098434686660767,grad_norm: 0.999999108341461, iteration: 40735
loss: 1.0489522218704224,grad_norm: 0.9999997889203615, iteration: 40736
loss: 1.1081980466842651,grad_norm: 0.9701565419392183, iteration: 40737
loss: 1.0819753408432007,grad_norm: 0.9999997674472183, iteration: 40738
loss: 1.0732380151748657,grad_norm: 0.99999905367267, iteration: 40739
loss: 1.0613698959350586,grad_norm: 0.9999998960708206, iteration: 40740
loss: 1.0142569541931152,grad_norm: 0.999999074729937, iteration: 40741
loss: 0.9968062043190002,grad_norm: 0.9999990436242912, iteration: 40742
loss: 1.0100983381271362,grad_norm: 0.9025882836414978, iteration: 40743
loss: 1.100455641746521,grad_norm: 0.9999991947624395, iteration: 40744
loss: 0.9865974187850952,grad_norm: 0.999999260363324, iteration: 40745
loss: 1.0297545194625854,grad_norm: 0.9999994718584463, iteration: 40746
loss: 0.9997106790542603,grad_norm: 0.9999994058212087, iteration: 40747
loss: 0.9942991733551025,grad_norm: 0.9999990578402781, iteration: 40748
loss: 1.0580825805664062,grad_norm: 0.9999990876081082, iteration: 40749
loss: 1.056403636932373,grad_norm: 0.999999767841404, iteration: 40750
loss: 0.9774051308631897,grad_norm: 0.959411988081616, iteration: 40751
loss: 0.9773568511009216,grad_norm: 0.9999991428407313, iteration: 40752
loss: 1.0413345098495483,grad_norm: 0.9999993784435711, iteration: 40753
loss: 1.0175838470458984,grad_norm: 0.9999998379904479, iteration: 40754
loss: 1.0422106981277466,grad_norm: 0.9633194879511172, iteration: 40755
loss: 1.0089306831359863,grad_norm: 0.8614053476785729, iteration: 40756
loss: 0.9958556890487671,grad_norm: 0.999999099060705, iteration: 40757
loss: 1.0119844675064087,grad_norm: 0.9999990940066558, iteration: 40758
loss: 1.0327152013778687,grad_norm: 0.9999991243043221, iteration: 40759
loss: 1.0543380975723267,grad_norm: 0.9999993925440678, iteration: 40760
loss: 1.0006382465362549,grad_norm: 0.8669443303692218, iteration: 40761
loss: 0.9669011235237122,grad_norm: 0.9177109516287896, iteration: 40762
loss: 1.0612694025039673,grad_norm: 0.9999995733439092, iteration: 40763
loss: 0.9861960411071777,grad_norm: 0.8930534894498715, iteration: 40764
loss: 1.0012520551681519,grad_norm: 0.9999992550520863, iteration: 40765
loss: 1.048872470855713,grad_norm: 0.9289170400835268, iteration: 40766
loss: 0.9983707666397095,grad_norm: 0.9220144186116321, iteration: 40767
loss: 1.027601957321167,grad_norm: 0.7274518505120253, iteration: 40768
loss: 0.9900667071342468,grad_norm: 0.9999991338756038, iteration: 40769
loss: 1.0400772094726562,grad_norm: 0.9999996164654151, iteration: 40770
loss: 1.0009032487869263,grad_norm: 0.9999992776961557, iteration: 40771
loss: 1.0399954319000244,grad_norm: 0.9605386149666625, iteration: 40772
loss: 1.0369333028793335,grad_norm: 0.9999997017940523, iteration: 40773
loss: 1.0365679264068604,grad_norm: 0.9999994838722531, iteration: 40774
loss: 1.0557769536972046,grad_norm: 0.9999998152352553, iteration: 40775
loss: 1.0482394695281982,grad_norm: 0.9999993598243431, iteration: 40776
loss: 1.0132317543029785,grad_norm: 0.8298916987421369, iteration: 40777
loss: 0.9405440092086792,grad_norm: 0.9999989667779221, iteration: 40778
loss: 1.020682454109192,grad_norm: 0.9999996530160424, iteration: 40779
loss: 1.0022884607315063,grad_norm: 0.8795080397221523, iteration: 40780
loss: 1.0308115482330322,grad_norm: 0.8954784673190671, iteration: 40781
loss: 0.9924566149711609,grad_norm: 0.8522678767272404, iteration: 40782
loss: 1.0101267099380493,grad_norm: 0.9999990752384871, iteration: 40783
loss: 1.0272572040557861,grad_norm: 0.8314507072923493, iteration: 40784
loss: 0.994081974029541,grad_norm: 0.8695213892554402, iteration: 40785
loss: 1.0237584114074707,grad_norm: 0.9999992759927289, iteration: 40786
loss: 1.0084342956542969,grad_norm: 0.9371335374453664, iteration: 40787
loss: 1.0044896602630615,grad_norm: 0.7679988284145278, iteration: 40788
loss: 1.0301162004470825,grad_norm: 0.9999992295600224, iteration: 40789
loss: 1.0278947353363037,grad_norm: 0.9074197408020863, iteration: 40790
loss: 0.9847884774208069,grad_norm: 0.9046385541639287, iteration: 40791
loss: 0.9894407987594604,grad_norm: 0.9999996025562994, iteration: 40792
loss: 1.0082955360412598,grad_norm: 0.9999993114911016, iteration: 40793
loss: 0.9788466095924377,grad_norm: 0.7455968890131484, iteration: 40794
loss: 0.9778901934623718,grad_norm: 0.99999900068808, iteration: 40795
loss: 0.9932321906089783,grad_norm: 0.9728144721656371, iteration: 40796
loss: 1.0124425888061523,grad_norm: 0.7767880975971378, iteration: 40797
loss: 0.9607194662094116,grad_norm: 0.8964943547292388, iteration: 40798
loss: 1.04500412940979,grad_norm: 0.8370334256846057, iteration: 40799
loss: 1.046678900718689,grad_norm: 0.9999991703588972, iteration: 40800
loss: 0.993999719619751,grad_norm: 0.999999424762665, iteration: 40801
loss: 1.0075938701629639,grad_norm: 0.9999994036438751, iteration: 40802
loss: 0.9923746585845947,grad_norm: 0.9999992808780049, iteration: 40803
loss: 0.9604763984680176,grad_norm: 0.999999191001714, iteration: 40804
loss: 1.0061336755752563,grad_norm: 0.904449988292456, iteration: 40805
loss: 1.0006508827209473,grad_norm: 0.9999990388865809, iteration: 40806
loss: 0.9758585691452026,grad_norm: 0.9999990307164404, iteration: 40807
loss: 0.9907410740852356,grad_norm: 0.9999991174735451, iteration: 40808
loss: 1.0092846155166626,grad_norm: 0.8788556206209769, iteration: 40809
loss: 1.0130469799041748,grad_norm: 0.9999991065440624, iteration: 40810
loss: 0.9422595500946045,grad_norm: 0.9313351060367286, iteration: 40811
loss: 1.035935640335083,grad_norm: 0.9999990658815423, iteration: 40812
loss: 1.0059798955917358,grad_norm: 0.9999991199554176, iteration: 40813
loss: 1.0252841711044312,grad_norm: 0.9391532749662902, iteration: 40814
loss: 1.0351659059524536,grad_norm: 0.9999992940766868, iteration: 40815
loss: 1.0093567371368408,grad_norm: 0.8937265811739078, iteration: 40816
loss: 1.0023488998413086,grad_norm: 0.9365109966516706, iteration: 40817
loss: 1.0081812143325806,grad_norm: 0.9999994735878291, iteration: 40818
loss: 0.9775387048721313,grad_norm: 0.8804107308636032, iteration: 40819
loss: 1.0104128122329712,grad_norm: 0.9999998810954204, iteration: 40820
loss: 1.0151219367980957,grad_norm: 0.942022788055953, iteration: 40821
loss: 0.973514199256897,grad_norm: 0.9810591697664443, iteration: 40822
loss: 1.0102558135986328,grad_norm: 0.9999991171674357, iteration: 40823
loss: 1.0037518739700317,grad_norm: 0.9999990851021371, iteration: 40824
loss: 1.0141713619232178,grad_norm: 0.9643304032395826, iteration: 40825
loss: 1.003930687904358,grad_norm: 0.8355190158357622, iteration: 40826
loss: 0.9770187139511108,grad_norm: 0.8985444364442787, iteration: 40827
loss: 0.9635750651359558,grad_norm: 0.9875823140962893, iteration: 40828
loss: 0.973314106464386,grad_norm: 0.9999989622558925, iteration: 40829
loss: 0.9681146144866943,grad_norm: 0.9999990307862577, iteration: 40830
loss: 0.9723628759384155,grad_norm: 0.9155952144908078, iteration: 40831
loss: 0.9570040106773376,grad_norm: 0.9392365064029363, iteration: 40832
loss: 1.0217856168746948,grad_norm: 0.8028426556223414, iteration: 40833
loss: 1.0208927392959595,grad_norm: 0.8908235447242878, iteration: 40834
loss: 1.1362719535827637,grad_norm: 0.9999997471618717, iteration: 40835
loss: 1.0189950466156006,grad_norm: 0.9668374896512942, iteration: 40836
loss: 1.0373787879943848,grad_norm: 0.9999990184761375, iteration: 40837
loss: 1.0661412477493286,grad_norm: 0.9999992035546279, iteration: 40838
loss: 1.0150628089904785,grad_norm: 0.928061729454314, iteration: 40839
loss: 0.9335816502571106,grad_norm: 0.8900926742867626, iteration: 40840
loss: 1.039146900177002,grad_norm: 0.8294352333391037, iteration: 40841
loss: 0.9978672862052917,grad_norm: 0.999999193915883, iteration: 40842
loss: 0.9392857551574707,grad_norm: 0.999999064067046, iteration: 40843
loss: 1.0434805154800415,grad_norm: 0.9999992397170807, iteration: 40844
loss: 1.013573169708252,grad_norm: 0.8966699566165096, iteration: 40845
loss: 1.0532163381576538,grad_norm: 1.0000000232014867, iteration: 40846
loss: 1.0130647420883179,grad_norm: 0.9999991297604238, iteration: 40847
loss: 1.0117182731628418,grad_norm: 0.7617299732443705, iteration: 40848
loss: 1.0325700044631958,grad_norm: 0.9999993848275092, iteration: 40849
loss: 0.9914669990539551,grad_norm: 0.8668024580910065, iteration: 40850
loss: 0.9986096620559692,grad_norm: 0.9143036014554982, iteration: 40851
loss: 1.0612597465515137,grad_norm: 0.9999997582783106, iteration: 40852
loss: 1.023972749710083,grad_norm: 0.7719129533713721, iteration: 40853
loss: 1.0200614929199219,grad_norm: 0.9999993422487129, iteration: 40854
loss: 1.0185872316360474,grad_norm: 0.9939051657926831, iteration: 40855
loss: 1.0262956619262695,grad_norm: 0.7138914142305727, iteration: 40856
loss: 1.0057568550109863,grad_norm: 0.9441017574776983, iteration: 40857
loss: 1.003235936164856,grad_norm: 0.8134493945728374, iteration: 40858
loss: 0.980830729007721,grad_norm: 0.9999990943948037, iteration: 40859
loss: 1.0201219320297241,grad_norm: 0.9999990614976969, iteration: 40860
loss: 1.0183615684509277,grad_norm: 0.8315628315537775, iteration: 40861
loss: 0.9706673622131348,grad_norm: 0.8290117202784515, iteration: 40862
loss: 1.0230529308319092,grad_norm: 0.9999991011323167, iteration: 40863
loss: 0.9764654040336609,grad_norm: 0.8300193363140377, iteration: 40864
loss: 1.006308913230896,grad_norm: 0.9441023341616134, iteration: 40865
loss: 1.0225903987884521,grad_norm: 0.9999990816273836, iteration: 40866
loss: 0.9704592823982239,grad_norm: 0.999999091530463, iteration: 40867
loss: 1.021549940109253,grad_norm: 0.9999991010835573, iteration: 40868
loss: 1.1462346315383911,grad_norm: 0.9999996191802986, iteration: 40869
loss: 1.0015634298324585,grad_norm: 0.9999992742846452, iteration: 40870
loss: 0.9782508611679077,grad_norm: 0.9766654421552752, iteration: 40871
loss: 1.0023994445800781,grad_norm: 0.9999992727181302, iteration: 40872
loss: 0.9782963395118713,grad_norm: 0.9850651230413047, iteration: 40873
loss: 1.0546321868896484,grad_norm: 0.983611211438779, iteration: 40874
loss: 1.0283218622207642,grad_norm: 0.8624837857408203, iteration: 40875
loss: 1.0246598720550537,grad_norm: 0.8804105123082829, iteration: 40876
loss: 1.0066922903060913,grad_norm: 0.734214295135156, iteration: 40877
loss: 0.9924759268760681,grad_norm: 0.9999999411997316, iteration: 40878
loss: 0.9885883927345276,grad_norm: 0.9113209181600211, iteration: 40879
loss: 1.021441102027893,grad_norm: 0.8438122475135953, iteration: 40880
loss: 1.06136155128479,grad_norm: 0.814815134015012, iteration: 40881
loss: 1.028772234916687,grad_norm: 0.9618662148830202, iteration: 40882
loss: 1.0151145458221436,grad_norm: 0.9999990792123525, iteration: 40883
loss: 0.9925389289855957,grad_norm: 0.9046171565224385, iteration: 40884
loss: 1.0184897184371948,grad_norm: 0.8692113501366727, iteration: 40885
loss: 0.9574332237243652,grad_norm: 0.8822917157350968, iteration: 40886
loss: 1.0412112474441528,grad_norm: 0.8234036700800411, iteration: 40887
loss: 1.0083295106887817,grad_norm: 0.9584847671339051, iteration: 40888
loss: 1.0410560369491577,grad_norm: 0.9497537105222208, iteration: 40889
loss: 1.0105901956558228,grad_norm: 0.9999993408476611, iteration: 40890
loss: 0.9677292704582214,grad_norm: 0.9455484898813246, iteration: 40891
loss: 1.0037062168121338,grad_norm: 0.9067461074847711, iteration: 40892
loss: 1.017074704170227,grad_norm: 0.7568711670734382, iteration: 40893
loss: 0.9828587174415588,grad_norm: 0.7796004733264865, iteration: 40894
loss: 1.022985816001892,grad_norm: 0.929637194473731, iteration: 40895
loss: 0.9719590544700623,grad_norm: 0.9999994960004194, iteration: 40896
loss: 0.9951092004776001,grad_norm: 0.917274585291504, iteration: 40897
loss: 1.0151609182357788,grad_norm: 0.9938702049838157, iteration: 40898
loss: 0.9980888366699219,grad_norm: 0.917146519458533, iteration: 40899
loss: 0.9978067278862,grad_norm: 0.8583034407900881, iteration: 40900
loss: 1.070137858390808,grad_norm: 0.9999993563788516, iteration: 40901
loss: 0.9870737791061401,grad_norm: 0.9999991767158891, iteration: 40902
loss: 0.9207911491394043,grad_norm: 0.9991091798401284, iteration: 40903
loss: 0.9565626978874207,grad_norm: 0.8656678416588824, iteration: 40904
loss: 0.9959248900413513,grad_norm: 0.99999910319918, iteration: 40905
loss: 0.9856014251708984,grad_norm: 0.9999989076205492, iteration: 40906
loss: 0.9978469014167786,grad_norm: 0.9999990937642901, iteration: 40907
loss: 1.0182039737701416,grad_norm: 0.999999116557245, iteration: 40908
loss: 1.0621674060821533,grad_norm: 0.9999998421854244, iteration: 40909
loss: 0.9621629118919373,grad_norm: 0.8590469621522633, iteration: 40910
loss: 1.0234836339950562,grad_norm: 0.9999990581239258, iteration: 40911
loss: 1.0117636919021606,grad_norm: 0.8699478061267251, iteration: 40912
loss: 0.9996966123580933,grad_norm: 0.9999991328634108, iteration: 40913
loss: 1.0011639595031738,grad_norm: 0.8949512091288614, iteration: 40914
loss: 1.0252904891967773,grad_norm: 0.9999993794084157, iteration: 40915
loss: 0.9779842495918274,grad_norm: 0.9999990393846762, iteration: 40916
loss: 1.0170016288757324,grad_norm: 0.9999990418518728, iteration: 40917
loss: 1.0261694192886353,grad_norm: 0.9999995175923065, iteration: 40918
loss: 0.9858303070068359,grad_norm: 0.9999990080598596, iteration: 40919
loss: 1.0186768770217896,grad_norm: 0.999999243466395, iteration: 40920
loss: 1.0251482725143433,grad_norm: 0.96408960540423, iteration: 40921
loss: 0.9441911578178406,grad_norm: 0.9745893949566806, iteration: 40922
loss: 1.0400303602218628,grad_norm: 0.9999993448946822, iteration: 40923
loss: 0.9852182269096375,grad_norm: 0.9314136033074109, iteration: 40924
loss: 1.006798505783081,grad_norm: 0.9999990787105099, iteration: 40925
loss: 1.0113153457641602,grad_norm: 0.9999992243330732, iteration: 40926
loss: 1.0211867094039917,grad_norm: 0.9999992122997352, iteration: 40927
loss: 1.0434319972991943,grad_norm: 0.9999996626771329, iteration: 40928
loss: 1.011702060699463,grad_norm: 0.9999992932699884, iteration: 40929
loss: 1.000449776649475,grad_norm: 0.9975457836270336, iteration: 40930
loss: 1.0669993162155151,grad_norm: 0.99999925459108, iteration: 40931
loss: 1.00933837890625,grad_norm: 0.8424007166435362, iteration: 40932
loss: 0.957001268863678,grad_norm: 0.9342447069336369, iteration: 40933
loss: 0.9823695421218872,grad_norm: 0.9067835083627941, iteration: 40934
loss: 0.9871742725372314,grad_norm: 0.9999991998482528, iteration: 40935
loss: 1.0133963823318481,grad_norm: 0.9133041162942583, iteration: 40936
loss: 1.0234252214431763,grad_norm: 0.9999994613592572, iteration: 40937
loss: 0.9797802567481995,grad_norm: 0.9999991511786478, iteration: 40938
loss: 1.0019567012786865,grad_norm: 0.8919593329963197, iteration: 40939
loss: 0.9917962551116943,grad_norm: 0.9999990965633284, iteration: 40940
loss: 1.005431056022644,grad_norm: 0.9972293806797072, iteration: 40941
loss: 1.0239105224609375,grad_norm: 0.8557903566281049, iteration: 40942
loss: 1.0244721174240112,grad_norm: 0.999999229530071, iteration: 40943
loss: 1.0301388502120972,grad_norm: 0.7875595817408332, iteration: 40944
loss: 0.9829550385475159,grad_norm: 0.9834029242963905, iteration: 40945
loss: 1.048222541809082,grad_norm: 0.7415979294920567, iteration: 40946
loss: 1.0170353651046753,grad_norm: 0.937165317653496, iteration: 40947
loss: 0.9837372899055481,grad_norm: 0.9294111543158213, iteration: 40948
loss: 0.9689962863922119,grad_norm: 0.911353432932484, iteration: 40949
loss: 1.0042293071746826,grad_norm: 0.8386883474196002, iteration: 40950
loss: 1.018219232559204,grad_norm: 0.9741389434470987, iteration: 40951
loss: 0.9713535308837891,grad_norm: 0.776241660765644, iteration: 40952
loss: 1.0249226093292236,grad_norm: 0.9999996935644552, iteration: 40953
loss: 1.0617821216583252,grad_norm: 0.9999993819671888, iteration: 40954
loss: 1.007016897201538,grad_norm: 0.8299414504495289, iteration: 40955
loss: 0.9526241421699524,grad_norm: 0.9880202871451411, iteration: 40956
loss: 0.9727928042411804,grad_norm: 0.9999994186202433, iteration: 40957
loss: 1.0121351480484009,grad_norm: 0.9999993031178901, iteration: 40958
loss: 1.0891834497451782,grad_norm: 0.9999997973861019, iteration: 40959
loss: 1.0450313091278076,grad_norm: 0.9942296932458011, iteration: 40960
loss: 1.0160843133926392,grad_norm: 0.9999992383089186, iteration: 40961
loss: 1.0028568506240845,grad_norm: 0.9999992735379747, iteration: 40962
loss: 0.9777487516403198,grad_norm: 0.8832232364569536, iteration: 40963
loss: 0.9792081117630005,grad_norm: 0.9999989639885875, iteration: 40964
loss: 1.0596778392791748,grad_norm: 0.9999991787596914, iteration: 40965
loss: 1.0078585147857666,grad_norm: 0.9999991435828374, iteration: 40966
loss: 1.0834071636199951,grad_norm: 0.9999998947376944, iteration: 40967
loss: 1.0301052331924438,grad_norm: 0.9999991436478604, iteration: 40968
loss: 1.030774474143982,grad_norm: 0.9903322838815629, iteration: 40969
loss: 1.0295037031173706,grad_norm: 0.9999994344301294, iteration: 40970
loss: 0.9729742407798767,grad_norm: 0.9999991299346254, iteration: 40971
loss: 0.9926198720932007,grad_norm: 0.9999998558322638, iteration: 40972
loss: 1.019376277923584,grad_norm: 0.9943682898256102, iteration: 40973
loss: 1.0261822938919067,grad_norm: 0.9452971697507377, iteration: 40974
loss: 1.007227897644043,grad_norm: 0.7852454280345705, iteration: 40975
loss: 0.9843422174453735,grad_norm: 0.9999992430589998, iteration: 40976
loss: 1.0385040044784546,grad_norm: 0.959958963882343, iteration: 40977
loss: 1.0096184015274048,grad_norm: 0.9999991625048691, iteration: 40978
loss: 1.0256834030151367,grad_norm: 0.9999993671244262, iteration: 40979
loss: 1.0139845609664917,grad_norm: 0.9999991028162013, iteration: 40980
loss: 0.9790425300598145,grad_norm: 0.9198352396041646, iteration: 40981
loss: 0.9642552733421326,grad_norm: 0.9999999089972549, iteration: 40982
loss: 1.026601791381836,grad_norm: 0.9713209290847202, iteration: 40983
loss: 1.0347638130187988,grad_norm: 0.9999993272726433, iteration: 40984
loss: 1.008267879486084,grad_norm: 0.8355544691662007, iteration: 40985
loss: 1.0037598609924316,grad_norm: 0.9499594811522917, iteration: 40986
loss: 1.0283631086349487,grad_norm: 0.9999992005824158, iteration: 40987
loss: 1.0713860988616943,grad_norm: 0.9999990579320965, iteration: 40988
loss: 0.9804943203926086,grad_norm: 0.9312985023421425, iteration: 40989
loss: 0.9769524335861206,grad_norm: 0.8158582954344756, iteration: 40990
loss: 1.0164108276367188,grad_norm: 0.9999990651073541, iteration: 40991
loss: 1.0367945432662964,grad_norm: 0.999999208147555, iteration: 40992
loss: 1.0197433233261108,grad_norm: 0.9593569188961641, iteration: 40993
loss: 0.985761821269989,grad_norm: 0.9999991513118396, iteration: 40994
loss: 1.0006855726242065,grad_norm: 0.9999996867671159, iteration: 40995
loss: 0.9852005839347839,grad_norm: 0.9324875407229791, iteration: 40996
loss: 1.0157026052474976,grad_norm: 0.9642252510064148, iteration: 40997
loss: 1.0017012357711792,grad_norm: 0.8276760548078801, iteration: 40998
loss: 1.0036344528198242,grad_norm: 0.9210995448830029, iteration: 40999
loss: 0.9930028319358826,grad_norm: 0.9999991200584523, iteration: 41000
loss: 1.0089393854141235,grad_norm: 0.9999992683086092, iteration: 41001
loss: 1.0467761754989624,grad_norm: 0.9612045992091156, iteration: 41002
loss: 1.4549697637557983,grad_norm: 0.9999998402870983, iteration: 41003
loss: 0.9812191128730774,grad_norm: 0.9999990042853064, iteration: 41004
loss: 1.0205806493759155,grad_norm: 0.831838825608182, iteration: 41005
loss: 0.9742405414581299,grad_norm: 0.9462568858289643, iteration: 41006
loss: 1.0106602907180786,grad_norm: 0.9007911289910315, iteration: 41007
loss: 1.0101524591445923,grad_norm: 0.8144522588886247, iteration: 41008
loss: 1.0026260614395142,grad_norm: 0.8971383046612601, iteration: 41009
loss: 1.00822114944458,grad_norm: 0.973962275303243, iteration: 41010
loss: 1.0182886123657227,grad_norm: 0.8066841325563314, iteration: 41011
loss: 1.0023759603500366,grad_norm: 0.9999990892888624, iteration: 41012
loss: 0.9820002317428589,grad_norm: 0.8149299176649525, iteration: 41013
loss: 0.9895613193511963,grad_norm: 0.9083115131302583, iteration: 41014
loss: 1.1023520231246948,grad_norm: 0.9999992401766485, iteration: 41015
loss: 1.0161149501800537,grad_norm: 0.999999009221699, iteration: 41016
loss: 1.021035075187683,grad_norm: 0.999999039815375, iteration: 41017
loss: 0.9811497330665588,grad_norm: 0.9995369583606867, iteration: 41018
loss: 0.9982305765151978,grad_norm: 0.882544998782385, iteration: 41019
loss: 1.006980538368225,grad_norm: 0.8456814706790429, iteration: 41020
loss: 0.9631673693656921,grad_norm: 0.9158749263537365, iteration: 41021
loss: 0.9662282466888428,grad_norm: 0.978617793566178, iteration: 41022
loss: 0.9954771995544434,grad_norm: 0.9999991480009849, iteration: 41023
loss: 1.0258036851882935,grad_norm: 0.9999992117542321, iteration: 41024
loss: 0.9796781539916992,grad_norm: 0.8608982853861112, iteration: 41025
loss: 1.0399980545043945,grad_norm: 0.9999990841105617, iteration: 41026
loss: 0.9934118986129761,grad_norm: 0.9287367328639822, iteration: 41027
loss: 1.0316804647445679,grad_norm: 0.9361087529685185, iteration: 41028
loss: 1.2863175868988037,grad_norm: 0.9999995890493437, iteration: 41029
loss: 0.9881449341773987,grad_norm: 0.9579902657375454, iteration: 41030
loss: 0.9958358407020569,grad_norm: 0.9999991506217585, iteration: 41031
loss: 1.2231824398040771,grad_norm: 0.9999998826993544, iteration: 41032
loss: 1.0015305280685425,grad_norm: 0.9424149091451801, iteration: 41033
loss: 1.094759464263916,grad_norm: 0.999999382326034, iteration: 41034
loss: 1.0042797327041626,grad_norm: 0.8912446121099766, iteration: 41035
loss: 1.0111565589904785,grad_norm: 0.9999991354438105, iteration: 41036
loss: 1.0053223371505737,grad_norm: 0.9999993052047319, iteration: 41037
loss: 0.999250054359436,grad_norm: 0.9888317036720846, iteration: 41038
loss: 1.1388274431228638,grad_norm: 0.9999993781331229, iteration: 41039
loss: 1.0355565547943115,grad_norm: 0.9999992429046781, iteration: 41040
loss: 0.9752699732780457,grad_norm: 0.7982643628014865, iteration: 41041
loss: 1.036376714706421,grad_norm: 0.866545230380136, iteration: 41042
loss: 1.0336625576019287,grad_norm: 0.999999222775028, iteration: 41043
loss: 0.9960098266601562,grad_norm: 0.9999989776557641, iteration: 41044
loss: 0.9733394384384155,grad_norm: 0.9999991355152698, iteration: 41045
loss: 1.0318995714187622,grad_norm: 0.9665030816654635, iteration: 41046
loss: 0.9712532758712769,grad_norm: 0.7934952448657252, iteration: 41047
loss: 1.0465686321258545,grad_norm: 0.9999992475553162, iteration: 41048
loss: 0.9958117604255676,grad_norm: 0.9999989310936166, iteration: 41049
loss: 1.0312073230743408,grad_norm: 0.9215074380341708, iteration: 41050
loss: 0.986476480960846,grad_norm: 0.9234384357425923, iteration: 41051
loss: 1.0396682024002075,grad_norm: 0.9999991606491698, iteration: 41052
loss: 1.0497801303863525,grad_norm: 0.9999995765084094, iteration: 41053
loss: 1.0013600587844849,grad_norm: 0.9709254814034667, iteration: 41054
loss: 1.0011988878250122,grad_norm: 0.9566117952818042, iteration: 41055
loss: 0.9493303298950195,grad_norm: 0.9999991928464522, iteration: 41056
loss: 0.9949067831039429,grad_norm: 0.7353771898314044, iteration: 41057
loss: 1.0993179082870483,grad_norm: 0.9999997093413584, iteration: 41058
loss: 0.9883973598480225,grad_norm: 0.8258901941374018, iteration: 41059
loss: 0.975803554058075,grad_norm: 0.9671146865059593, iteration: 41060
loss: 0.9640188813209534,grad_norm: 0.9999994966205336, iteration: 41061
loss: 0.9950997829437256,grad_norm: 0.9999992916722754, iteration: 41062
loss: 0.9770434498786926,grad_norm: 0.8637832122247657, iteration: 41063
loss: 1.009352684020996,grad_norm: 0.9999990093231684, iteration: 41064
loss: 1.0684889554977417,grad_norm: 0.8572957975900233, iteration: 41065
loss: 0.9825546145439148,grad_norm: 0.7644554989599573, iteration: 41066
loss: 0.9969832897186279,grad_norm: 0.8559452428829705, iteration: 41067
loss: 0.9706061482429504,grad_norm: 0.919281094199452, iteration: 41068
loss: 1.0864101648330688,grad_norm: 0.9999995850026361, iteration: 41069
loss: 1.0228639841079712,grad_norm: 0.8530835446074269, iteration: 41070
loss: 1.0187844038009644,grad_norm: 0.999999044421505, iteration: 41071
loss: 0.9969213604927063,grad_norm: 0.8334678751319528, iteration: 41072
loss: 0.9850344657897949,grad_norm: 0.9999994235127125, iteration: 41073
loss: 1.0044031143188477,grad_norm: 0.964991380164975, iteration: 41074
loss: 1.0377163887023926,grad_norm: 0.9999991610506643, iteration: 41075
loss: 1.0744783878326416,grad_norm: 0.9999998603081767, iteration: 41076
loss: 1.009818434715271,grad_norm: 0.9999996439503471, iteration: 41077
loss: 0.9793336391448975,grad_norm: 0.9124690300808029, iteration: 41078
loss: 1.021162748336792,grad_norm: 0.8573433396147226, iteration: 41079
loss: 0.971311092376709,grad_norm: 0.8302861259858015, iteration: 41080
loss: 1.0297247171401978,grad_norm: 0.9846548383454663, iteration: 41081
loss: 0.9847301840782166,grad_norm: 0.7955075472697439, iteration: 41082
loss: 1.0032497644424438,grad_norm: 0.9999991337625173, iteration: 41083
loss: 0.9862226247787476,grad_norm: 0.8242352308174499, iteration: 41084
loss: 1.0186148881912231,grad_norm: 0.8541685255726998, iteration: 41085
loss: 1.0482664108276367,grad_norm: 0.9999991966254334, iteration: 41086
loss: 1.056210994720459,grad_norm: 0.9073024292368739, iteration: 41087
loss: 1.0118141174316406,grad_norm: 0.9753133312853141, iteration: 41088
loss: 1.0148640871047974,grad_norm: 0.9999992099749353, iteration: 41089
loss: 0.9657512903213501,grad_norm: 0.8775428811695617, iteration: 41090
loss: 0.9884719848632812,grad_norm: 0.9999990670194269, iteration: 41091
loss: 0.9946619272232056,grad_norm: 0.8659248045644984, iteration: 41092
loss: 1.042070984840393,grad_norm: 0.9999998809749961, iteration: 41093
loss: 1.0541247129440308,grad_norm: 0.9999989895311798, iteration: 41094
loss: 1.0163912773132324,grad_norm: 0.9999992333023885, iteration: 41095
loss: 1.0756537914276123,grad_norm: 0.9999998336459086, iteration: 41096
loss: 0.9807426929473877,grad_norm: 0.9820066423679069, iteration: 41097
loss: 1.0077835321426392,grad_norm: 0.8844531750337234, iteration: 41098
loss: 1.0148842334747314,grad_norm: 0.9999990451550403, iteration: 41099
loss: 0.9852513670921326,grad_norm: 0.9999991565979067, iteration: 41100
loss: 0.9781067967414856,grad_norm: 0.8281505444317593, iteration: 41101
loss: 0.9907642006874084,grad_norm: 0.8174172505134147, iteration: 41102
loss: 0.980896532535553,grad_norm: 0.9999989005249336, iteration: 41103
loss: 1.1060069799423218,grad_norm: 0.9999997794768695, iteration: 41104
loss: 0.9665772318840027,grad_norm: 0.9999991276149283, iteration: 41105
loss: 0.9733937978744507,grad_norm: 0.9546093073688062, iteration: 41106
loss: 1.0434211492538452,grad_norm: 0.9999990043643127, iteration: 41107
loss: 0.9566761255264282,grad_norm: 0.9269898333450243, iteration: 41108
loss: 0.987797200679779,grad_norm: 0.9012586816819849, iteration: 41109
loss: 1.0472853183746338,grad_norm: 0.9999996655132845, iteration: 41110
loss: 1.0245190858840942,grad_norm: 0.9868147382085743, iteration: 41111
loss: 1.0160558223724365,grad_norm: 0.8391875510252832, iteration: 41112
loss: 1.0028622150421143,grad_norm: 0.9999992407684279, iteration: 41113
loss: 1.052364468574524,grad_norm: 0.9999992319433765, iteration: 41114
loss: 1.03377366065979,grad_norm: 0.7367488062646143, iteration: 41115
loss: 1.0237107276916504,grad_norm: 0.8977477267883741, iteration: 41116
loss: 0.9594866037368774,grad_norm: 0.9326485772110517, iteration: 41117
loss: 0.9665807485580444,grad_norm: 0.9999991107370944, iteration: 41118
loss: 1.0466002225875854,grad_norm: 0.7954974011309761, iteration: 41119
loss: 1.008687138557434,grad_norm: 0.9584696267447286, iteration: 41120
loss: 0.9761525392532349,grad_norm: 0.9999991579157271, iteration: 41121
loss: 1.0032527446746826,grad_norm: 0.8831280411975122, iteration: 41122
loss: 1.0281057357788086,grad_norm: 0.999999143930648, iteration: 41123
loss: 1.0075701475143433,grad_norm: 0.9065446941586094, iteration: 41124
loss: 1.0111632347106934,grad_norm: 0.9560166038403701, iteration: 41125
loss: 1.0260396003723145,grad_norm: 0.9999989518176192, iteration: 41126
loss: 0.9972700476646423,grad_norm: 0.8783486402255548, iteration: 41127
loss: 1.1660528182983398,grad_norm: 0.9999999300335167, iteration: 41128
loss: 1.037778377532959,grad_norm: 0.999999961788529, iteration: 41129
loss: 1.0418504476547241,grad_norm: 0.999999539789864, iteration: 41130
loss: 0.958104133605957,grad_norm: 0.999999194118554, iteration: 41131
loss: 1.0199202299118042,grad_norm: 0.9999991712817824, iteration: 41132
loss: 0.9934097528457642,grad_norm: 0.9999993621413912, iteration: 41133
loss: 0.9785358309745789,grad_norm: 0.8997911148733895, iteration: 41134
loss: 1.039259433746338,grad_norm: 0.999999506620509, iteration: 41135
loss: 0.9695241451263428,grad_norm: 0.8904295489504086, iteration: 41136
loss: 1.0014126300811768,grad_norm: 0.9150559449766574, iteration: 41137
loss: 1.000837802886963,grad_norm: 0.9324771583743615, iteration: 41138
loss: 1.0300922393798828,grad_norm: 0.9419454986767292, iteration: 41139
loss: 1.0063533782958984,grad_norm: 0.9999991413660365, iteration: 41140
loss: 0.9676772952079773,grad_norm: 0.8007674138829846, iteration: 41141
loss: 1.0187581777572632,grad_norm: 0.9323088289525843, iteration: 41142
loss: 1.0146809816360474,grad_norm: 0.9999992518192121, iteration: 41143
loss: 0.9895207285881042,grad_norm: 0.8365348129317668, iteration: 41144
loss: 1.0206756591796875,grad_norm: 0.9694351090837656, iteration: 41145
loss: 1.0668071508407593,grad_norm: 0.9830026001368023, iteration: 41146
loss: 1.0027521848678589,grad_norm: 0.9999990265047712, iteration: 41147
loss: 0.9490241408348083,grad_norm: 0.9999990392084277, iteration: 41148
loss: 1.0013339519500732,grad_norm: 0.8248317961201933, iteration: 41149
loss: 0.9708846807479858,grad_norm: 0.9999990755677481, iteration: 41150
loss: 1.0253095626831055,grad_norm: 0.9999990648273307, iteration: 41151
loss: 1.0113931894302368,grad_norm: 0.7806071280499295, iteration: 41152
loss: 1.0168120861053467,grad_norm: 0.9765129546399066, iteration: 41153
loss: 0.9652627110481262,grad_norm: 0.8495586024069187, iteration: 41154
loss: 1.023525595664978,grad_norm: 0.9136780787012201, iteration: 41155
loss: 1.0434610843658447,grad_norm: 0.9726177024851032, iteration: 41156
loss: 1.014604926109314,grad_norm: 0.9438038124711404, iteration: 41157
loss: 0.9921295046806335,grad_norm: 0.8618844799224801, iteration: 41158
loss: 1.008804440498352,grad_norm: 0.9999995934593671, iteration: 41159
loss: 0.9778512716293335,grad_norm: 0.9999990584652906, iteration: 41160
loss: 0.9873606562614441,grad_norm: 0.9999989698289571, iteration: 41161
loss: 1.005483865737915,grad_norm: 0.8029019981371476, iteration: 41162
loss: 0.9881037473678589,grad_norm: 0.8815887207063836, iteration: 41163
loss: 0.9966434836387634,grad_norm: 0.9603759796944774, iteration: 41164
loss: 1.0016634464263916,grad_norm: 0.9999990263648231, iteration: 41165
loss: 0.9617692828178406,grad_norm: 0.9999991258477792, iteration: 41166
loss: 0.9970505833625793,grad_norm: 0.8779548711267527, iteration: 41167
loss: 1.0278165340423584,grad_norm: 0.9973710893125652, iteration: 41168
loss: 1.0148303508758545,grad_norm: 0.936527892639017, iteration: 41169
loss: 1.016780972480774,grad_norm: 0.9925904092865968, iteration: 41170
loss: 0.987058162689209,grad_norm: 0.804090300461552, iteration: 41171
loss: 0.9554436206817627,grad_norm: 0.9946267076798769, iteration: 41172
loss: 1.036714792251587,grad_norm: 0.9999991717931327, iteration: 41173
loss: 0.9800025224685669,grad_norm: 0.9999990553553403, iteration: 41174
loss: 1.0378024578094482,grad_norm: 0.999999097333069, iteration: 41175
loss: 1.0265803337097168,grad_norm: 0.8635225828370667, iteration: 41176
loss: 0.9855664968490601,grad_norm: 0.942315698298143, iteration: 41177
loss: 1.0380014181137085,grad_norm: 0.8999170880353944, iteration: 41178
loss: 0.963159441947937,grad_norm: 0.7764657788400686, iteration: 41179
loss: 0.9926058650016785,grad_norm: 0.9999992213678933, iteration: 41180
loss: 1.007849097251892,grad_norm: 0.9999995056784471, iteration: 41181
loss: 1.0416587591171265,grad_norm: 0.9999991156528953, iteration: 41182
loss: 1.0213478803634644,grad_norm: 0.9524149119528315, iteration: 41183
loss: 1.0637083053588867,grad_norm: 0.9999994711379566, iteration: 41184
loss: 1.0256584882736206,grad_norm: 0.999999373542828, iteration: 41185
loss: 1.0098438262939453,grad_norm: 0.9999997031720949, iteration: 41186
loss: 1.0338181257247925,grad_norm: 0.999999424864765, iteration: 41187
loss: 1.022667407989502,grad_norm: 0.9999991103333711, iteration: 41188
loss: 0.9800629019737244,grad_norm: 0.8333296682361415, iteration: 41189
loss: 1.0281152725219727,grad_norm: 0.7630676296824606, iteration: 41190
loss: 1.0162954330444336,grad_norm: 0.9191050108598505, iteration: 41191
loss: 1.0090789794921875,grad_norm: 0.9463389712800468, iteration: 41192
loss: 0.9997153878211975,grad_norm: 0.9999989946504888, iteration: 41193
loss: 1.002655029296875,grad_norm: 0.9999992847967911, iteration: 41194
loss: 1.105054259300232,grad_norm: 0.9999991902123744, iteration: 41195
loss: 1.0462713241577148,grad_norm: 0.9999991695559965, iteration: 41196
loss: 0.9917416572570801,grad_norm: 0.8440522845819235, iteration: 41197
loss: 1.0103157758712769,grad_norm: 0.9999992659369863, iteration: 41198
loss: 1.0785467624664307,grad_norm: 0.999999491770805, iteration: 41199
loss: 0.9968973398208618,grad_norm: 0.9999990963399286, iteration: 41200
loss: 0.9803466796875,grad_norm: 0.9806344509676255, iteration: 41201
loss: 0.9977928996086121,grad_norm: 0.9919020692116707, iteration: 41202
loss: 1.1016422510147095,grad_norm: 0.9281494697364555, iteration: 41203
loss: 0.9966294169425964,grad_norm: 0.9999992486981563, iteration: 41204
loss: 1.0541077852249146,grad_norm: 0.9999992208876691, iteration: 41205
loss: 1.0025875568389893,grad_norm: 0.9999998308881424, iteration: 41206
loss: 0.9907193183898926,grad_norm: 0.9999990848814585, iteration: 41207
loss: 0.9707435965538025,grad_norm: 0.9837715782683807, iteration: 41208
loss: 1.012587070465088,grad_norm: 0.9999995017606113, iteration: 41209
loss: 1.0414694547653198,grad_norm: 0.9505672208865044, iteration: 41210
loss: 1.0024179220199585,grad_norm: 0.7999925826576793, iteration: 41211
loss: 1.0368012189865112,grad_norm: 0.999999658589121, iteration: 41212
loss: 0.9956469535827637,grad_norm: 0.8398376209454758, iteration: 41213
loss: 1.0179851055145264,grad_norm: 0.9999991275331364, iteration: 41214
loss: 0.9921247959136963,grad_norm: 0.8384458696837532, iteration: 41215
loss: 0.9908327460289001,grad_norm: 0.9999991416754358, iteration: 41216
loss: 1.0327162742614746,grad_norm: 0.9999997078332253, iteration: 41217
loss: 1.0153502225875854,grad_norm: 0.9999990984150584, iteration: 41218
loss: 1.019432544708252,grad_norm: 0.9999991030220267, iteration: 41219
loss: 0.9763091802597046,grad_norm: 0.9346248840082242, iteration: 41220
loss: 0.9770943522453308,grad_norm: 0.9999994185403992, iteration: 41221
loss: 0.9999710917472839,grad_norm: 0.9999992306304984, iteration: 41222
loss: 1.0348159074783325,grad_norm: 0.9317265428522354, iteration: 41223
loss: 0.9868451952934265,grad_norm: 0.9752158990507687, iteration: 41224
loss: 0.9952568411827087,grad_norm: 0.999999022006424, iteration: 41225
loss: 0.9964291453361511,grad_norm: 0.7807342952228609, iteration: 41226
loss: 1.009427785873413,grad_norm: 0.9999990366904999, iteration: 41227
loss: 1.0014846324920654,grad_norm: 0.9999995090647662, iteration: 41228
loss: 0.9896325469017029,grad_norm: 0.9999991359979379, iteration: 41229
loss: 0.9637922644615173,grad_norm: 0.9431340366176706, iteration: 41230
loss: 1.0555816888809204,grad_norm: 0.9999990785879005, iteration: 41231
loss: 1.0501809120178223,grad_norm: 0.9999991403938089, iteration: 41232
loss: 0.9162715077400208,grad_norm: 0.9770076453767931, iteration: 41233
loss: 1.0171401500701904,grad_norm: 0.9337215563218337, iteration: 41234
loss: 0.9902073740959167,grad_norm: 0.9999995740790527, iteration: 41235
loss: 0.9668057560920715,grad_norm: 0.8208867966036297, iteration: 41236
loss: 0.9850844740867615,grad_norm: 0.9696804468882279, iteration: 41237
loss: 1.025830626487732,grad_norm: 0.9999991197116992, iteration: 41238
loss: 1.0125383138656616,grad_norm: 0.9711950561990944, iteration: 41239
loss: 0.9956852197647095,grad_norm: 0.9600670425187504, iteration: 41240
loss: 0.9707672595977783,grad_norm: 0.9999991019687221, iteration: 41241
loss: 0.9719495177268982,grad_norm: 0.9487487425683073, iteration: 41242
loss: 0.9744200706481934,grad_norm: 0.9193533993420555, iteration: 41243
loss: 0.9723888039588928,grad_norm: 0.9999991045378372, iteration: 41244
loss: 1.0302656888961792,grad_norm: 0.9999992128887162, iteration: 41245
loss: 0.976240336894989,grad_norm: 0.850027233090585, iteration: 41246
loss: 1.010217308998108,grad_norm: 0.886555785825512, iteration: 41247
loss: 1.0809533596038818,grad_norm: 0.9999996132543767, iteration: 41248
loss: 1.0751765966415405,grad_norm: 0.9999994230013405, iteration: 41249
loss: 1.012323260307312,grad_norm: 0.9871421794641384, iteration: 41250
loss: 1.0828500986099243,grad_norm: 0.9999997885177606, iteration: 41251
loss: 1.014135718345642,grad_norm: 0.999999319322104, iteration: 41252
loss: 1.01240873336792,grad_norm: 0.8150633627000118, iteration: 41253
loss: 1.0091496706008911,grad_norm: 0.929882823512726, iteration: 41254
loss: 0.9827529191970825,grad_norm: 0.8745746101612752, iteration: 41255
loss: 1.0088976621627808,grad_norm: 0.92983330344709, iteration: 41256
loss: 1.0217994451522827,grad_norm: 0.8306880991398189, iteration: 41257
loss: 0.995445966720581,grad_norm: 0.9444757516571967, iteration: 41258
loss: 0.9735754728317261,grad_norm: 0.9999990011164606, iteration: 41259
loss: 1.0046216249465942,grad_norm: 0.9999990657023171, iteration: 41260
loss: 0.9910539388656616,grad_norm: 0.9999991369645896, iteration: 41261
loss: 1.1288481950759888,grad_norm: 0.99999946190312, iteration: 41262
loss: 1.0371499061584473,grad_norm: 0.8537731278214862, iteration: 41263
loss: 0.951610267162323,grad_norm: 0.9999992561281488, iteration: 41264
loss: 0.9988211393356323,grad_norm: 0.9727331535731571, iteration: 41265
loss: 1.0445888042449951,grad_norm: 0.9999991568972973, iteration: 41266
loss: 1.0238807201385498,grad_norm: 0.8669631865806798, iteration: 41267
loss: 1.0171394348144531,grad_norm: 0.9999991577876096, iteration: 41268
loss: 0.9995540380477905,grad_norm: 0.9999991727670968, iteration: 41269
loss: 0.9853816032409668,grad_norm: 0.9999991712856017, iteration: 41270
loss: 1.044335126876831,grad_norm: 0.999999011242219, iteration: 41271
loss: 1.0759605169296265,grad_norm: 0.9999999547410177, iteration: 41272
loss: 0.9973916411399841,grad_norm: 0.9466027209008497, iteration: 41273
loss: 1.0092533826828003,grad_norm: 0.998759438662281, iteration: 41274
loss: 1.0472912788391113,grad_norm: 0.8202033673373375, iteration: 41275
loss: 1.0153684616088867,grad_norm: 0.9999991972172776, iteration: 41276
loss: 0.980820894241333,grad_norm: 0.999999090291787, iteration: 41277
loss: 1.0281651020050049,grad_norm: 0.9999991784087253, iteration: 41278
loss: 1.0028802156448364,grad_norm: 0.9274690496532061, iteration: 41279
loss: 1.0101701021194458,grad_norm: 0.7972180070697283, iteration: 41280
loss: 1.030404806137085,grad_norm: 0.8907973482256284, iteration: 41281
loss: 1.0160305500030518,grad_norm: 0.8556216513399868, iteration: 41282
loss: 0.9998328685760498,grad_norm: 0.9466476705510555, iteration: 41283
loss: 1.0300352573394775,grad_norm: 0.9999995828835608, iteration: 41284
loss: 1.0434895753860474,grad_norm: 0.9999991274766702, iteration: 41285
loss: 1.0038998126983643,grad_norm: 0.8417999239925532, iteration: 41286
loss: 0.96918123960495,grad_norm: 0.9952236334782085, iteration: 41287
loss: 0.996417224407196,grad_norm: 0.8600594081235545, iteration: 41288
loss: 0.9999964833259583,grad_norm: 0.9999992586429479, iteration: 41289
loss: 1.0144344568252563,grad_norm: 0.9999991921707065, iteration: 41290
loss: 0.9842501878738403,grad_norm: 0.919910338397697, iteration: 41291
loss: 0.9990736246109009,grad_norm: 0.9999997974583995, iteration: 41292
loss: 1.0192371606826782,grad_norm: 0.999999087171735, iteration: 41293
loss: 0.9795630574226379,grad_norm: 0.9744152025233612, iteration: 41294
loss: 0.9948067665100098,grad_norm: 0.9999990681700557, iteration: 41295
loss: 1.0173181295394897,grad_norm: 0.9999998657680486, iteration: 41296
loss: 1.0031017065048218,grad_norm: 0.7789960992514027, iteration: 41297
loss: 0.9892584681510925,grad_norm: 0.9999991405744136, iteration: 41298
loss: 1.0018008947372437,grad_norm: 0.8184186695379945, iteration: 41299
loss: 1.0168750286102295,grad_norm: 0.9999993119054938, iteration: 41300
loss: 1.0229555368423462,grad_norm: 0.9999990663476762, iteration: 41301
loss: 0.9743430018424988,grad_norm: 0.9673813388783024, iteration: 41302
loss: 1.0276457071304321,grad_norm: 0.9863676219077407, iteration: 41303
loss: 1.0931384563446045,grad_norm: 0.9999998458327755, iteration: 41304
loss: 1.0236066579818726,grad_norm: 0.9999999094639772, iteration: 41305
loss: 1.007724404335022,grad_norm: 0.871542470852421, iteration: 41306
loss: 1.0405025482177734,grad_norm: 0.9999993091144515, iteration: 41307
loss: 1.0114666223526,grad_norm: 0.8614914058300971, iteration: 41308
loss: 1.0530999898910522,grad_norm: 0.9981747397183337, iteration: 41309
loss: 1.0212526321411133,grad_norm: 0.9832886410024552, iteration: 41310
loss: 1.254390835762024,grad_norm: 0.9999992667494387, iteration: 41311
loss: 1.0738548040390015,grad_norm: 0.9999997212321002, iteration: 41312
loss: 1.032405972480774,grad_norm: 0.999999163759887, iteration: 41313
loss: 0.9947274327278137,grad_norm: 0.9992246716374469, iteration: 41314
loss: 1.009502649307251,grad_norm: 0.9999992206153753, iteration: 41315
loss: 1.0328065156936646,grad_norm: 0.8294195725788047, iteration: 41316
loss: 1.000026822090149,grad_norm: 0.9999993568214394, iteration: 41317
loss: 0.9978602528572083,grad_norm: 0.9999992062308698, iteration: 41318
loss: 0.9877484440803528,grad_norm: 0.7920232507897542, iteration: 41319
loss: 0.9849123358726501,grad_norm: 0.9138424474059654, iteration: 41320
loss: 1.0115832090377808,grad_norm: 0.9999994103087223, iteration: 41321
loss: 1.0200467109680176,grad_norm: 0.9999996593329686, iteration: 41322
loss: 1.0175268650054932,grad_norm: 0.8457290462406949, iteration: 41323
loss: 1.0702344179153442,grad_norm: 0.9999994548929894, iteration: 41324
loss: 1.0167300701141357,grad_norm: 0.9999995724154863, iteration: 41325
loss: 1.0503878593444824,grad_norm: 0.9768904657984965, iteration: 41326
loss: 1.0720367431640625,grad_norm: 0.9999993770883311, iteration: 41327
loss: 1.1020509004592896,grad_norm: 0.9999990506490819, iteration: 41328
loss: 0.9929893612861633,grad_norm: 0.9259244454388013, iteration: 41329
loss: 0.9546317458152771,grad_norm: 0.918861697866269, iteration: 41330
loss: 1.0101813077926636,grad_norm: 0.9200795239530581, iteration: 41331
loss: 1.0468571186065674,grad_norm: 0.9610984311619025, iteration: 41332
loss: 1.0085209608078003,grad_norm: 0.9999989975667662, iteration: 41333
loss: 1.016101360321045,grad_norm: 0.9999994204807313, iteration: 41334
loss: 1.0164765119552612,grad_norm: 0.9551466105106594, iteration: 41335
loss: 0.9759104251861572,grad_norm: 0.7870159574119144, iteration: 41336
loss: 1.0223499536514282,grad_norm: 0.9999990598471182, iteration: 41337
loss: 1.0240525007247925,grad_norm: 0.9999991748307039, iteration: 41338
loss: 1.0033293962478638,grad_norm: 0.871393639486644, iteration: 41339
loss: 1.0880296230316162,grad_norm: 0.9999999111249593, iteration: 41340
loss: 1.040576696395874,grad_norm: 0.9999993433951334, iteration: 41341
loss: 0.9978417754173279,grad_norm: 0.9940624410598596, iteration: 41342
loss: 0.9744958877563477,grad_norm: 0.889629592637965, iteration: 41343
loss: 1.0102143287658691,grad_norm: 0.9578216201773788, iteration: 41344
loss: 0.9716683030128479,grad_norm: 0.8176647022860083, iteration: 41345
loss: 1.0439633131027222,grad_norm: 0.9999994619525793, iteration: 41346
loss: 0.9984426498413086,grad_norm: 0.9770664492626002, iteration: 41347
loss: 1.0095751285552979,grad_norm: 0.9999995043076125, iteration: 41348
loss: 1.057964563369751,grad_norm: 0.9999998337737432, iteration: 41349
loss: 1.0493861436843872,grad_norm: 0.999999683100516, iteration: 41350
loss: 1.046299934387207,grad_norm: 0.974782900192462, iteration: 41351
loss: 0.9774845838546753,grad_norm: 0.9999990243941896, iteration: 41352
loss: 0.9634788632392883,grad_norm: 0.8188558402225802, iteration: 41353
loss: 1.0141457319259644,grad_norm: 0.9500217093869011, iteration: 41354
loss: 1.0222899913787842,grad_norm: 0.9406964004065062, iteration: 41355
loss: 0.9956746101379395,grad_norm: 0.9999991032918079, iteration: 41356
loss: 1.009187936782837,grad_norm: 0.9882536187223501, iteration: 41357
loss: 1.0029536485671997,grad_norm: 0.9063409486945492, iteration: 41358
loss: 1.0075945854187012,grad_norm: 0.9999991787512962, iteration: 41359
loss: 1.0054510831832886,grad_norm: 0.9999992859870777, iteration: 41360
loss: 1.0437712669372559,grad_norm: 0.9471764102835429, iteration: 41361
loss: 1.0276448726654053,grad_norm: 0.9999991279798842, iteration: 41362
loss: 1.0134365558624268,grad_norm: 0.9999990978452147, iteration: 41363
loss: 1.018211007118225,grad_norm: 0.873013779106626, iteration: 41364
loss: 1.0319249629974365,grad_norm: 0.9999991216563386, iteration: 41365
loss: 1.002752423286438,grad_norm: 0.7718714919606973, iteration: 41366
loss: 0.9936104416847229,grad_norm: 0.9327007377042559, iteration: 41367
loss: 1.0136375427246094,grad_norm: 0.9040673094234357, iteration: 41368
loss: 1.0223580598831177,grad_norm: 0.934325522780742, iteration: 41369
loss: 1.0193190574645996,grad_norm: 0.979355321953091, iteration: 41370
loss: 0.9956648945808411,grad_norm: 0.8554237161821457, iteration: 41371
loss: 0.9845947623252869,grad_norm: 0.943090410554835, iteration: 41372
loss: 1.0709595680236816,grad_norm: 0.9999992670924169, iteration: 41373
loss: 1.0424269437789917,grad_norm: 0.7845082731267109, iteration: 41374
loss: 1.0636547803878784,grad_norm: 0.9999998016068828, iteration: 41375
loss: 1.0393645763397217,grad_norm: 0.8031863863941006, iteration: 41376
loss: 1.0467511415481567,grad_norm: 0.9999996324288681, iteration: 41377
loss: 1.0045446157455444,grad_norm: 0.9180136171313357, iteration: 41378
loss: 1.0131537914276123,grad_norm: 0.8448088166308123, iteration: 41379
loss: 1.0107581615447998,grad_norm: 0.999999651000037, iteration: 41380
loss: 1.0045994520187378,grad_norm: 0.9999994934661824, iteration: 41381
loss: 1.0369740724563599,grad_norm: 0.9999993238418036, iteration: 41382
loss: 1.0161210298538208,grad_norm: 0.9463980660689133, iteration: 41383
loss: 0.9631826281547546,grad_norm: 0.9999997263617885, iteration: 41384
loss: 1.0133607387542725,grad_norm: 0.8472238350533254, iteration: 41385
loss: 0.9920461773872375,grad_norm: 0.9999990072711495, iteration: 41386
loss: 0.9774091243743896,grad_norm: 0.9279084355797159, iteration: 41387
loss: 0.9826556444168091,grad_norm: 0.9999996775009157, iteration: 41388
loss: 1.000084400177002,grad_norm: 0.8361966115548245, iteration: 41389
loss: 1.0206172466278076,grad_norm: 0.9999990346680542, iteration: 41390
loss: 1.0289918184280396,grad_norm: 0.8785478431020755, iteration: 41391
loss: 1.002314805984497,grad_norm: 0.9999993637604752, iteration: 41392
loss: 0.9966000318527222,grad_norm: 0.9999992838418003, iteration: 41393
loss: 1.0116093158721924,grad_norm: 0.9568700741789864, iteration: 41394
loss: 0.9927646517753601,grad_norm: 0.9999994172731387, iteration: 41395
loss: 1.0106720924377441,grad_norm: 0.8168296159340339, iteration: 41396
loss: 1.0067239999771118,grad_norm: 0.9999991939407211, iteration: 41397
loss: 1.0300573110580444,grad_norm: 0.7988638538335148, iteration: 41398
loss: 1.0030425786972046,grad_norm: 0.9999989631227769, iteration: 41399
loss: 1.0149224996566772,grad_norm: 0.9620138880295236, iteration: 41400
loss: 1.0468063354492188,grad_norm: 0.9999995978582135, iteration: 41401
loss: 0.9927562475204468,grad_norm: 0.999999322095952, iteration: 41402
loss: 1.0057400465011597,grad_norm: 0.9760930005180775, iteration: 41403
loss: 0.9925107955932617,grad_norm: 0.8743569631385275, iteration: 41404
loss: 1.0264744758605957,grad_norm: 0.9999989741613523, iteration: 41405
loss: 0.9463663697242737,grad_norm: 0.8309001101153578, iteration: 41406
loss: 1.1570316553115845,grad_norm: 0.9999991926879361, iteration: 41407
loss: 1.016040563583374,grad_norm: 0.9999992931658704, iteration: 41408
loss: 1.0103645324707031,grad_norm: 0.9368092037474756, iteration: 41409
loss: 0.9907755851745605,grad_norm: 0.8218095320263821, iteration: 41410
loss: 1.0432324409484863,grad_norm: 0.9999999918117478, iteration: 41411
loss: 1.0586744546890259,grad_norm: 0.8863183900113518, iteration: 41412
loss: 1.0075722932815552,grad_norm: 0.908011254342231, iteration: 41413
loss: 1.0264590978622437,grad_norm: 0.9419401209057534, iteration: 41414
loss: 1.0120588541030884,grad_norm: 0.9244494869717833, iteration: 41415
loss: 1.0071146488189697,grad_norm: 0.9999990609220613, iteration: 41416
loss: 1.0687328577041626,grad_norm: 0.9999997175781804, iteration: 41417
loss: 1.0449200868606567,grad_norm: 0.9999991976605636, iteration: 41418
loss: 1.0309457778930664,grad_norm: 0.9719899576388191, iteration: 41419
loss: 1.0253783464431763,grad_norm: 0.9191194413985911, iteration: 41420
loss: 1.0043096542358398,grad_norm: 0.9999990289710476, iteration: 41421
loss: 0.9619236588478088,grad_norm: 0.9999994011252615, iteration: 41422
loss: 1.1099997758865356,grad_norm: 0.9999995194752883, iteration: 41423
loss: 0.990129292011261,grad_norm: 0.8779850398484191, iteration: 41424
loss: 1.0119632482528687,grad_norm: 0.9999990937907446, iteration: 41425
loss: 1.0278619527816772,grad_norm: 0.9999991078645915, iteration: 41426
loss: 0.9646466970443726,grad_norm: 0.9274061082787575, iteration: 41427
loss: 0.9678625464439392,grad_norm: 0.8819425698314468, iteration: 41428
loss: 1.0108675956726074,grad_norm: 0.8911924328182471, iteration: 41429
loss: 1.0688204765319824,grad_norm: 0.9999990785960419, iteration: 41430
loss: 1.019648790359497,grad_norm: 0.9999993708068605, iteration: 41431
loss: 1.0515618324279785,grad_norm: 0.9999997171251808, iteration: 41432
loss: 1.0660964250564575,grad_norm: 0.9999993763098585, iteration: 41433
loss: 0.9881467223167419,grad_norm: 0.8796526041309561, iteration: 41434
loss: 1.057119369506836,grad_norm: 0.9999999789157417, iteration: 41435
loss: 1.0034055709838867,grad_norm: 0.9999996734788028, iteration: 41436
loss: 0.9994836449623108,grad_norm: 0.9999990776191614, iteration: 41437
loss: 0.9854791760444641,grad_norm: 0.9596979604346305, iteration: 41438
loss: 1.0003782510757446,grad_norm: 0.8844309093080055, iteration: 41439
loss: 1.0251259803771973,grad_norm: 0.9999998576400748, iteration: 41440
loss: 1.0994035005569458,grad_norm: 0.999999212308098, iteration: 41441
loss: 1.0968552827835083,grad_norm: 1.0000000700252256, iteration: 41442
loss: 1.040650725364685,grad_norm: 0.9999992203202429, iteration: 41443
loss: 0.9721808433532715,grad_norm: 0.9458372152281684, iteration: 41444
loss: 1.000205159187317,grad_norm: 0.8643783791008052, iteration: 41445
loss: 1.004380702972412,grad_norm: 0.999999647343472, iteration: 41446
loss: 1.0190916061401367,grad_norm: 0.9551958816626213, iteration: 41447
loss: 1.0055246353149414,grad_norm: 0.9582948056926058, iteration: 41448
loss: 1.0479769706726074,grad_norm: 0.847153513700589, iteration: 41449
loss: 0.9760118722915649,grad_norm: 0.8955795882151228, iteration: 41450
loss: 1.03811514377594,grad_norm: 0.9923529117380033, iteration: 41451
loss: 1.0355839729309082,grad_norm: 0.9389372886622341, iteration: 41452
loss: 0.9984680414199829,grad_norm: 0.9999991369522241, iteration: 41453
loss: 1.0357240438461304,grad_norm: 0.9999992029932546, iteration: 41454
loss: 1.042730450630188,grad_norm: 0.9999995126080256, iteration: 41455
loss: 1.0315290689468384,grad_norm: 0.8695809668563851, iteration: 41456
loss: 1.0026346445083618,grad_norm: 0.9999990487165626, iteration: 41457
loss: 1.0061757564544678,grad_norm: 0.9999990597464086, iteration: 41458
loss: 0.9920287728309631,grad_norm: 0.7730658103193432, iteration: 41459
loss: 1.0182974338531494,grad_norm: 0.9999992018304045, iteration: 41460
loss: 0.9864510893821716,grad_norm: 0.8310192589611192, iteration: 41461
loss: 0.9916031956672668,grad_norm: 0.9999991621844949, iteration: 41462
loss: 0.9804238080978394,grad_norm: 0.8430229385186188, iteration: 41463
loss: 1.0760881900787354,grad_norm: 0.9999991874089349, iteration: 41464
loss: 1.0209143161773682,grad_norm: 0.9669226962838252, iteration: 41465
loss: 1.1325039863586426,grad_norm: 0.9999997226526769, iteration: 41466
loss: 1.0102126598358154,grad_norm: 0.9999990952856866, iteration: 41467
loss: 1.0816211700439453,grad_norm: 0.9999996024132233, iteration: 41468
loss: 1.1809425354003906,grad_norm: 0.9999993038519762, iteration: 41469
loss: 1.002220869064331,grad_norm: 0.9999991046721154, iteration: 41470
loss: 1.026368260383606,grad_norm: 0.932551305823083, iteration: 41471
loss: 1.0045353174209595,grad_norm: 0.9999989952573602, iteration: 41472
loss: 0.94932621717453,grad_norm: 0.9999991971799161, iteration: 41473
loss: 1.0384461879730225,grad_norm: 0.9999991270641714, iteration: 41474
loss: 1.0068227052688599,grad_norm: 0.8094982415886544, iteration: 41475
loss: 0.9822455644607544,grad_norm: 0.8699479491634591, iteration: 41476
loss: 1.027269721031189,grad_norm: 0.9999989164845082, iteration: 41477
loss: 1.0284384489059448,grad_norm: 0.9999990345814368, iteration: 41478
loss: 1.010036826133728,grad_norm: 0.9550852773908546, iteration: 41479
loss: 0.960626482963562,grad_norm: 0.8676266856533055, iteration: 41480
loss: 0.9569701552391052,grad_norm: 0.9999991411888911, iteration: 41481
loss: 1.0687175989151,grad_norm: 0.9999992055833711, iteration: 41482
loss: 0.9995554685592651,grad_norm: 0.9999993198688324, iteration: 41483
loss: 0.9869775772094727,grad_norm: 0.9999991258101273, iteration: 41484
loss: 1.0682973861694336,grad_norm: 0.9999998128152051, iteration: 41485
loss: 0.9788352251052856,grad_norm: 0.9999991515260022, iteration: 41486
loss: 0.9978713989257812,grad_norm: 0.9672128890625918, iteration: 41487
loss: 1.0606074333190918,grad_norm: 0.9999995174633921, iteration: 41488
loss: 1.0842173099517822,grad_norm: 0.9999993546407789, iteration: 41489
loss: 1.3086875677108765,grad_norm: 0.9999996731143156, iteration: 41490
loss: 1.0645241737365723,grad_norm: 0.9999993378591591, iteration: 41491
loss: 1.2656115293502808,grad_norm: 0.9999994003948433, iteration: 41492
loss: 0.9525552988052368,grad_norm: 0.916431909002808, iteration: 41493
loss: 0.9774559736251831,grad_norm: 0.9606378846258042, iteration: 41494
loss: 1.0180723667144775,grad_norm: 0.9999991478692535, iteration: 41495
loss: 0.9914476871490479,grad_norm: 0.9999993789855125, iteration: 41496
loss: 1.0081732273101807,grad_norm: 0.9999992198973998, iteration: 41497
loss: 0.9814106822013855,grad_norm: 0.999999464932568, iteration: 41498
loss: 1.0432921648025513,grad_norm: 0.9999998045129278, iteration: 41499
loss: 1.0240812301635742,grad_norm: 0.9999991314720886, iteration: 41500
loss: 1.0594168901443481,grad_norm: 0.9999993821858231, iteration: 41501
loss: 0.970649003982544,grad_norm: 0.9999990524825987, iteration: 41502
loss: 1.021282434463501,grad_norm: 0.9999990544119061, iteration: 41503
loss: 1.0780311822891235,grad_norm: 1.0000000059430905, iteration: 41504
loss: 1.0793222188949585,grad_norm: 0.9999992769521098, iteration: 41505
loss: 1.0826528072357178,grad_norm: 0.9255197164310658, iteration: 41506
loss: 1.0000721216201782,grad_norm: 0.9999990286643435, iteration: 41507
loss: 1.0395863056182861,grad_norm: 0.9999997638997721, iteration: 41508
loss: 0.9941917061805725,grad_norm: 0.9999990634362063, iteration: 41509
loss: 1.0117740631103516,grad_norm: 0.9999995046127647, iteration: 41510
loss: 1.0131722688674927,grad_norm: 0.9999993154058574, iteration: 41511
loss: 1.0428909063339233,grad_norm: 0.9999992827502492, iteration: 41512
loss: 1.0390769243240356,grad_norm: 0.9999996153978032, iteration: 41513
loss: 0.9661564826965332,grad_norm: 0.9309047722284403, iteration: 41514
loss: 0.9999217391014099,grad_norm: 0.9999992596069768, iteration: 41515
loss: 0.984710156917572,grad_norm: 0.9999990274902979, iteration: 41516
loss: 0.9873076677322388,grad_norm: 0.8320773880449971, iteration: 41517
loss: 0.9955385327339172,grad_norm: 0.9254341748085272, iteration: 41518
loss: 1.0047136545181274,grad_norm: 0.9933609050651996, iteration: 41519
loss: 1.0468636751174927,grad_norm: 0.999999216661728, iteration: 41520
loss: 0.960077702999115,grad_norm: 0.9999991713530931, iteration: 41521
loss: 1.054386019706726,grad_norm: 0.9999998224190179, iteration: 41522
loss: 0.9700878262519836,grad_norm: 0.8716193511393403, iteration: 41523
loss: 0.9893174767494202,grad_norm: 0.9999991712604643, iteration: 41524
loss: 1.0714250802993774,grad_norm: 0.9999992106298826, iteration: 41525
loss: 0.9640669822692871,grad_norm: 0.9999992891006134, iteration: 41526
loss: 1.059578537940979,grad_norm: 0.9999992633753454, iteration: 41527
loss: 0.9884976148605347,grad_norm: 0.9999992207717057, iteration: 41528
loss: 0.9814987778663635,grad_norm: 0.8652965214734812, iteration: 41529
loss: 1.2845127582550049,grad_norm: 0.9999999279329024, iteration: 41530
loss: 1.033987283706665,grad_norm: 0.9999991406773199, iteration: 41531
loss: 1.00446355342865,grad_norm: 0.9794690985728413, iteration: 41532
loss: 1.0155460834503174,grad_norm: 0.9999994529026767, iteration: 41533
loss: 1.0094062089920044,grad_norm: 0.9999991125684204, iteration: 41534
loss: 0.9770698547363281,grad_norm: 0.9934493278400739, iteration: 41535
loss: 1.00074303150177,grad_norm: 0.9202522711570695, iteration: 41536
loss: 1.0446686744689941,grad_norm: 0.9999998254731628, iteration: 41537
loss: 1.0499701499938965,grad_norm: 0.9999992668329872, iteration: 41538
loss: 1.032028079032898,grad_norm: 0.9241391469853169, iteration: 41539
loss: 0.9834861755371094,grad_norm: 0.9999990712791478, iteration: 41540
loss: 1.0104687213897705,grad_norm: 0.8412418501340089, iteration: 41541
loss: 0.9588851928710938,grad_norm: 0.9999991255928457, iteration: 41542
loss: 1.0219873189926147,grad_norm: 0.9999992053075347, iteration: 41543
loss: 1.0230910778045654,grad_norm: 0.9999990961715512, iteration: 41544
loss: 0.9850127100944519,grad_norm: 0.9702486890857205, iteration: 41545
loss: 1.0023143291473389,grad_norm: 0.9999991651468372, iteration: 41546
loss: 1.058740258216858,grad_norm: 0.9999994745648727, iteration: 41547
loss: 1.0060243606567383,grad_norm: 0.9027505563946476, iteration: 41548
loss: 1.0317813158035278,grad_norm: 0.9884141772774251, iteration: 41549
loss: 1.0654358863830566,grad_norm: 0.9999995861761263, iteration: 41550
loss: 1.0649735927581787,grad_norm: 0.999999612311066, iteration: 41551
loss: 0.9870625138282776,grad_norm: 0.9999991260832173, iteration: 41552
loss: 0.9798262119293213,grad_norm: 0.9775504840318228, iteration: 41553
loss: 1.0045884847640991,grad_norm: 0.9999995164687263, iteration: 41554
loss: 1.0050357580184937,grad_norm: 0.8218907077114835, iteration: 41555
loss: 0.982531726360321,grad_norm: 0.8537757210966854, iteration: 41556
loss: 1.0465824604034424,grad_norm: 0.9810715092681909, iteration: 41557
loss: 0.9583794474601746,grad_norm: 0.8955854915076915, iteration: 41558
loss: 0.9916234612464905,grad_norm: 0.9999989299353192, iteration: 41559
loss: 1.090520977973938,grad_norm: 0.9512184828517132, iteration: 41560
loss: 0.9714866280555725,grad_norm: 0.7867445038065485, iteration: 41561
loss: 1.0030243396759033,grad_norm: 0.9078256616563422, iteration: 41562
loss: 1.0151453018188477,grad_norm: 0.8110449007701405, iteration: 41563
loss: 1.0063133239746094,grad_norm: 0.9999992308582302, iteration: 41564
loss: 0.9906901717185974,grad_norm: 0.9897001959376811, iteration: 41565
loss: 0.9811522960662842,grad_norm: 0.9511854331505737, iteration: 41566
loss: 1.0640654563903809,grad_norm: 0.9999989654557473, iteration: 41567
loss: 1.018572211265564,grad_norm: 0.999999164420359, iteration: 41568
loss: 1.011487364768982,grad_norm: 0.8961586060536281, iteration: 41569
loss: 1.0172284841537476,grad_norm: 0.9999993337895919, iteration: 41570
loss: 1.023404836654663,grad_norm: 0.8507022586840589, iteration: 41571
loss: 1.027522325515747,grad_norm: 0.9999994151054177, iteration: 41572
loss: 1.0120612382888794,grad_norm: 0.9999989976470357, iteration: 41573
loss: 1.024018406867981,grad_norm: 0.9701763959609174, iteration: 41574
loss: 1.0201081037521362,grad_norm: 0.7902881627407771, iteration: 41575
loss: 0.982310950756073,grad_norm: 0.9999992748745025, iteration: 41576
loss: 1.0464197397232056,grad_norm: 0.9999994221801439, iteration: 41577
loss: 0.9890148043632507,grad_norm: 0.9912361820194376, iteration: 41578
loss: 1.0105388164520264,grad_norm: 0.9999991784652967, iteration: 41579
loss: 1.006173014640808,grad_norm: 0.8929654353047296, iteration: 41580
loss: 0.9673104882240295,grad_norm: 0.9025310799722135, iteration: 41581
loss: 1.0367408990859985,grad_norm: 0.9999992521142929, iteration: 41582
loss: 1.0756499767303467,grad_norm: 0.9999995174835943, iteration: 41583
loss: 1.0022836923599243,grad_norm: 0.9352244980298072, iteration: 41584
loss: 0.9664220213890076,grad_norm: 0.8898653118157486, iteration: 41585
loss: 1.000004768371582,grad_norm: 0.9635625497349345, iteration: 41586
loss: 1.0273878574371338,grad_norm: 0.9474845802113802, iteration: 41587
loss: 0.9948339462280273,grad_norm: 0.7332876604813541, iteration: 41588
loss: 1.0126255750656128,grad_norm: 0.9999991068078344, iteration: 41589
loss: 1.0037039518356323,grad_norm: 0.8178896977619339, iteration: 41590
loss: 0.9354386925697327,grad_norm: 0.8994231672832348, iteration: 41591
loss: 1.0184844732284546,grad_norm: 0.9999997269746016, iteration: 41592
loss: 1.0384738445281982,grad_norm: 0.9999989826122749, iteration: 41593
loss: 1.002267599105835,grad_norm: 0.992201262146722, iteration: 41594
loss: 1.0327519178390503,grad_norm: 0.9999996102777787, iteration: 41595
loss: 1.0480010509490967,grad_norm: 0.9828088651640371, iteration: 41596
loss: 1.0027475357055664,grad_norm: 0.9218320473199568, iteration: 41597
loss: 1.0665401220321655,grad_norm: 0.9999999526647424, iteration: 41598
loss: 0.9765504002571106,grad_norm: 0.9768352310695072, iteration: 41599
loss: 1.006376028060913,grad_norm: 0.8761335654935193, iteration: 41600
loss: 0.9852582812309265,grad_norm: 0.972957130566676, iteration: 41601
loss: 1.0197306871414185,grad_norm: 0.9999991938385256, iteration: 41602
loss: 1.0187478065490723,grad_norm: 0.9999992229633867, iteration: 41603
loss: 1.0032604932785034,grad_norm: 0.7888999690540788, iteration: 41604
loss: 0.9681324362754822,grad_norm: 0.9170966171733443, iteration: 41605
loss: 1.0356031656265259,grad_norm: 0.9999998198369996, iteration: 41606
loss: 1.0085386037826538,grad_norm: 0.9999989840252703, iteration: 41607
loss: 1.0046435594558716,grad_norm: 0.9460545930028738, iteration: 41608
loss: 0.9807560443878174,grad_norm: 0.9999990567264425, iteration: 41609
loss: 0.9821507334709167,grad_norm: 0.8648852962221818, iteration: 41610
loss: 1.041028380393982,grad_norm: 0.9999995019846691, iteration: 41611
loss: 1.0052852630615234,grad_norm: 0.9999991795588913, iteration: 41612
loss: 0.9924023151397705,grad_norm: 0.9925910497360991, iteration: 41613
loss: 1.0388864278793335,grad_norm: 0.9659418948903644, iteration: 41614
loss: 1.1242940425872803,grad_norm: 0.9999994664408479, iteration: 41615
loss: 1.0021847486495972,grad_norm: 0.9999992082007186, iteration: 41616
loss: 0.9621912837028503,grad_norm: 0.9065579954240529, iteration: 41617
loss: 1.0111041069030762,grad_norm: 0.8348162854235389, iteration: 41618
loss: 0.9951241612434387,grad_norm: 0.9999991191219797, iteration: 41619
loss: 1.0292481184005737,grad_norm: 0.9999990874831988, iteration: 41620
loss: 0.9778659343719482,grad_norm: 0.8707571786932513, iteration: 41621
loss: 1.0275465250015259,grad_norm: 0.936653765212606, iteration: 41622
loss: 1.0014256238937378,grad_norm: 0.9802929650705755, iteration: 41623
loss: 1.0413310527801514,grad_norm: 0.9999995441517175, iteration: 41624
loss: 1.0259901285171509,grad_norm: 0.7821831657534474, iteration: 41625
loss: 1.0208057165145874,grad_norm: 0.9165327514837719, iteration: 41626
loss: 0.9917725920677185,grad_norm: 0.9512353927687007, iteration: 41627
loss: 0.9898483157157898,grad_norm: 0.9999990422295059, iteration: 41628
loss: 1.021503210067749,grad_norm: 0.9106761157448507, iteration: 41629
loss: 1.0248825550079346,grad_norm: 0.9948113701920515, iteration: 41630
loss: 1.0351256132125854,grad_norm: 0.8965790067582505, iteration: 41631
loss: 1.0576599836349487,grad_norm: 0.7748107488728588, iteration: 41632
loss: 1.0534149408340454,grad_norm: 0.9256984356464252, iteration: 41633
loss: 0.9746310114860535,grad_norm: 0.9999993141864892, iteration: 41634
loss: 1.096267580986023,grad_norm: 0.9999999998434198, iteration: 41635
loss: 0.9809255003929138,grad_norm: 0.8663376503070023, iteration: 41636
loss: 0.9835138320922852,grad_norm: 0.999999018550764, iteration: 41637
loss: 1.002785325050354,grad_norm: 0.9999991144955561, iteration: 41638
loss: 1.0160173177719116,grad_norm: 0.8836063331839779, iteration: 41639
loss: 0.9754636883735657,grad_norm: 0.8910135821787172, iteration: 41640
loss: 1.0019631385803223,grad_norm: 0.8674828449934183, iteration: 41641
loss: 0.9880296587944031,grad_norm: 0.8989934108589617, iteration: 41642
loss: 1.066334843635559,grad_norm: 0.999999247851573, iteration: 41643
loss: 0.9927324652671814,grad_norm: 0.9999990732352015, iteration: 41644
loss: 0.9847565293312073,grad_norm: 0.9999992727505757, iteration: 41645
loss: 1.0055376291275024,grad_norm: 0.9999996196512285, iteration: 41646
loss: 0.9794489145278931,grad_norm: 0.9999990890881374, iteration: 41647
loss: 1.0278464555740356,grad_norm: 0.9799825735450597, iteration: 41648
loss: 1.0312635898590088,grad_norm: 0.7912772753598786, iteration: 41649
loss: 1.0674396753311157,grad_norm: 0.9999992117413428, iteration: 41650
loss: 1.0148701667785645,grad_norm: 0.9999991142317237, iteration: 41651
loss: 0.9615213871002197,grad_norm: 0.9695660477846412, iteration: 41652
loss: 1.034246563911438,grad_norm: 0.999999000722758, iteration: 41653
loss: 1.0072295665740967,grad_norm: 0.9999990756299632, iteration: 41654
loss: 1.0382378101348877,grad_norm: 0.9999991953647355, iteration: 41655
loss: 0.9985016584396362,grad_norm: 0.7360437827873222, iteration: 41656
loss: 0.9443880915641785,grad_norm: 0.9999991201648367, iteration: 41657
loss: 1.0435692071914673,grad_norm: 0.9999991313708333, iteration: 41658
loss: 0.9849814772605896,grad_norm: 0.8574938957475964, iteration: 41659
loss: 0.9755225777626038,grad_norm: 0.9999989896242265, iteration: 41660
loss: 0.9879814982414246,grad_norm: 0.7938165685555771, iteration: 41661
loss: 1.0173242092132568,grad_norm: 0.9880104172957208, iteration: 41662
loss: 1.0376225709915161,grad_norm: 0.9451829874172781, iteration: 41663
loss: 1.032808542251587,grad_norm: 0.9999994965921585, iteration: 41664
loss: 1.0363408327102661,grad_norm: 0.9999991631303696, iteration: 41665
loss: 1.068463683128357,grad_norm: 0.989897468748115, iteration: 41666
loss: 0.966700553894043,grad_norm: 0.9710305984180548, iteration: 41667
loss: 0.9804162979125977,grad_norm: 0.9461526685766404, iteration: 41668
loss: 1.0065516233444214,grad_norm: 0.9999990724631996, iteration: 41669
loss: 1.010026454925537,grad_norm: 0.7379126421640776, iteration: 41670
loss: 1.085653305053711,grad_norm: 0.9999997659566136, iteration: 41671
loss: 1.015843152999878,grad_norm: 0.9049792979322603, iteration: 41672
loss: 1.036828875541687,grad_norm: 0.9843323795711454, iteration: 41673
loss: 0.9945180416107178,grad_norm: 0.9591254389224925, iteration: 41674
loss: 1.028594732284546,grad_norm: 0.841856027513315, iteration: 41675
loss: 0.9787716865539551,grad_norm: 0.9204158366210611, iteration: 41676
loss: 1.0116857290267944,grad_norm: 0.9999997107853875, iteration: 41677
loss: 1.0248751640319824,grad_norm: 0.9394571599976912, iteration: 41678
loss: 1.0005139112472534,grad_norm: 0.9999990607104575, iteration: 41679
loss: 1.0153636932373047,grad_norm: 0.9816796013769015, iteration: 41680
loss: 1.0129166841506958,grad_norm: 0.9322247558183541, iteration: 41681
loss: 1.0062508583068848,grad_norm: 0.9657113428059244, iteration: 41682
loss: 1.0553873777389526,grad_norm: 0.9999991982565487, iteration: 41683
loss: 1.0499507188796997,grad_norm: 0.9999992352331106, iteration: 41684
loss: 1.0118998289108276,grad_norm: 0.9999998987671358, iteration: 41685
loss: 1.0293627977371216,grad_norm: 0.8008095342812591, iteration: 41686
loss: 1.0370382070541382,grad_norm: 0.9999992989321909, iteration: 41687
loss: 1.0365246534347534,grad_norm: 0.999998995773651, iteration: 41688
loss: 1.0051867961883545,grad_norm: 0.999999096536001, iteration: 41689
loss: 1.0056813955307007,grad_norm: 0.9976275545878189, iteration: 41690
loss: 1.0127695798873901,grad_norm: 0.9103935917884949, iteration: 41691
loss: 1.0020633935928345,grad_norm: 0.9999991534665116, iteration: 41692
loss: 0.9640244245529175,grad_norm: 0.9527777799894341, iteration: 41693
loss: 1.0059702396392822,grad_norm: 0.7583339213258717, iteration: 41694
loss: 1.01177179813385,grad_norm: 0.921357230930568, iteration: 41695
loss: 1.0608824491500854,grad_norm: 0.9999999694372627, iteration: 41696
loss: 1.0443588495254517,grad_norm: 0.9999994595997549, iteration: 41697
loss: 0.9941520094871521,grad_norm: 0.772850007695193, iteration: 41698
loss: 1.0618687868118286,grad_norm: 0.9999996261284184, iteration: 41699
loss: 0.9608290195465088,grad_norm: 0.8819001946927735, iteration: 41700
loss: 0.9780283570289612,grad_norm: 0.9740922725992344, iteration: 41701
loss: 0.9880895614624023,grad_norm: 0.8907478311344648, iteration: 41702
loss: 1.0067821741104126,grad_norm: 0.9999992830240261, iteration: 41703
loss: 0.9851573705673218,grad_norm: 0.9447145094273807, iteration: 41704
loss: 0.9963706731796265,grad_norm: 0.7425242236024678, iteration: 41705
loss: 0.9811524152755737,grad_norm: 0.984902183386925, iteration: 41706
loss: 1.0877984762191772,grad_norm: 0.9999994606773002, iteration: 41707
loss: 1.0363948345184326,grad_norm: 0.9999992143299975, iteration: 41708
loss: 1.043089747428894,grad_norm: 0.9999992483191935, iteration: 41709
loss: 0.9860445857048035,grad_norm: 0.9999990224320765, iteration: 41710
loss: 0.9955169558525085,grad_norm: 0.9999991978567974, iteration: 41711
loss: 1.030756950378418,grad_norm: 0.999999040350859, iteration: 41712
loss: 1.032434344291687,grad_norm: 0.9083403251050195, iteration: 41713
loss: 1.0221806764602661,grad_norm: 0.9864720612890652, iteration: 41714
loss: 1.0249017477035522,grad_norm: 0.9073140453375615, iteration: 41715
loss: 1.0768013000488281,grad_norm: 0.9999994269268191, iteration: 41716
loss: 1.013826608657837,grad_norm: 0.9999997655189824, iteration: 41717
loss: 1.084235429763794,grad_norm: 0.9641964005995906, iteration: 41718
loss: 1.0032529830932617,grad_norm: 0.9999991942696029, iteration: 41719
loss: 0.9940139651298523,grad_norm: 0.9450087712409675, iteration: 41720
loss: 1.014209508895874,grad_norm: 0.7887884743141625, iteration: 41721
loss: 1.022688388824463,grad_norm: 0.952471641833321, iteration: 41722
loss: 1.062748908996582,grad_norm: 0.999999433442081, iteration: 41723
loss: 1.0336244106292725,grad_norm: 0.9999993056712588, iteration: 41724
loss: 0.9847680926322937,grad_norm: 0.8093780532965966, iteration: 41725
loss: 1.0570820569992065,grad_norm: 0.9793434915564181, iteration: 41726
loss: 0.9628741145133972,grad_norm: 0.9778650894873545, iteration: 41727
loss: 0.9519690871238708,grad_norm: 0.9423889296472564, iteration: 41728
loss: 1.0233039855957031,grad_norm: 0.9409311636719817, iteration: 41729
loss: 1.0004727840423584,grad_norm: 0.8642811161875918, iteration: 41730
loss: 1.0354734659194946,grad_norm: 0.954710129907941, iteration: 41731
loss: 1.0229984521865845,grad_norm: 0.8764573988225898, iteration: 41732
loss: 1.0650194883346558,grad_norm: 0.9999998282195202, iteration: 41733
loss: 1.0215040445327759,grad_norm: 0.9429144795300444, iteration: 41734
loss: 1.0493718385696411,grad_norm: 0.9999994131959175, iteration: 41735
loss: 1.0138007402420044,grad_norm: 0.849705863278279, iteration: 41736
loss: 1.0258127450942993,grad_norm: 0.9999996881557583, iteration: 41737
loss: 0.9913092851638794,grad_norm: 0.8769159007397578, iteration: 41738
loss: 1.0022355318069458,grad_norm: 0.9999994252938224, iteration: 41739
loss: 1.022822380065918,grad_norm: 0.9999991580861625, iteration: 41740
loss: 1.0126702785491943,grad_norm: 0.8806914438970148, iteration: 41741
loss: 1.0015783309936523,grad_norm: 0.9252463805409024, iteration: 41742
loss: 1.0281862020492554,grad_norm: 0.9999999031736886, iteration: 41743
loss: 1.021264910697937,grad_norm: 0.9999997131090494, iteration: 41744
loss: 1.0000874996185303,grad_norm: 0.9045415744238657, iteration: 41745
loss: 0.9770091772079468,grad_norm: 0.9836941853067632, iteration: 41746
loss: 1.008908748626709,grad_norm: 0.8812417667757918, iteration: 41747
loss: 1.0409483909606934,grad_norm: 0.9999991355648541, iteration: 41748
loss: 1.0031154155731201,grad_norm: 0.9999990765555427, iteration: 41749
loss: 1.0117686986923218,grad_norm: 0.999999059425152, iteration: 41750
loss: 1.0712865591049194,grad_norm: 0.9999991120468882, iteration: 41751
loss: 1.0538249015808105,grad_norm: 0.8624993016814292, iteration: 41752
loss: 1.0136537551879883,grad_norm: 0.9019035646081038, iteration: 41753
loss: 1.0401815176010132,grad_norm: 0.9999994065183193, iteration: 41754
loss: 1.02937912940979,grad_norm: 0.7978265187519203, iteration: 41755
loss: 1.02482008934021,grad_norm: 0.939593083376087, iteration: 41756
loss: 1.0363128185272217,grad_norm: 0.9999994654623513, iteration: 41757
loss: 1.0787708759307861,grad_norm: 0.9999996044498698, iteration: 41758
loss: 0.9872860908508301,grad_norm: 0.7520008159883325, iteration: 41759
loss: 0.9921883940696716,grad_norm: 0.8285545337974464, iteration: 41760
loss: 0.9989942908287048,grad_norm: 0.9845117841168843, iteration: 41761
loss: 1.0036871433258057,grad_norm: 0.9653147675501834, iteration: 41762
loss: 1.036063313484192,grad_norm: 0.9999990195395593, iteration: 41763
loss: 0.9825225472450256,grad_norm: 0.9999992845220131, iteration: 41764
loss: 0.9685146808624268,grad_norm: 0.9999991176319316, iteration: 41765
loss: 1.0299224853515625,grad_norm: 0.8213598573507133, iteration: 41766
loss: 1.011078953742981,grad_norm: 0.9773335479921955, iteration: 41767
loss: 1.0223543643951416,grad_norm: 0.9999995646435456, iteration: 41768
loss: 1.0167104005813599,grad_norm: 0.9566663801207342, iteration: 41769
loss: 1.0184154510498047,grad_norm: 0.9509790156643982, iteration: 41770
loss: 1.0181021690368652,grad_norm: 0.9999993643953027, iteration: 41771
loss: 1.0210403203964233,grad_norm: 0.999999134398354, iteration: 41772
loss: 1.093430519104004,grad_norm: 0.9999991179231029, iteration: 41773
loss: 1.0197614431381226,grad_norm: 0.9157949449430813, iteration: 41774
loss: 1.0016306638717651,grad_norm: 0.9542793564560805, iteration: 41775
loss: 1.0383309125900269,grad_norm: 0.9463637593526337, iteration: 41776
loss: 1.0377143621444702,grad_norm: 0.9999997357796138, iteration: 41777
loss: 1.0039728879928589,grad_norm: 0.927871429803545, iteration: 41778
loss: 1.0261703729629517,grad_norm: 0.9000086071731955, iteration: 41779
loss: 1.005276083946228,grad_norm: 0.9214242720958133, iteration: 41780
loss: 1.0679889917373657,grad_norm: 0.9999996701843271, iteration: 41781
loss: 1.0142017602920532,grad_norm: 0.9893899324962953, iteration: 41782
loss: 0.9816038012504578,grad_norm: 0.8870422808645673, iteration: 41783
loss: 1.0019981861114502,grad_norm: 0.9111601208188854, iteration: 41784
loss: 1.0200973749160767,grad_norm: 0.9999990354228361, iteration: 41785
loss: 1.0315674543380737,grad_norm: 0.8651665910042224, iteration: 41786
loss: 0.9848564267158508,grad_norm: 0.9074325593888858, iteration: 41787
loss: 1.0033739805221558,grad_norm: 0.8376510502208934, iteration: 41788
loss: 1.0068111419677734,grad_norm: 0.8601473370952691, iteration: 41789
loss: 1.0401703119277954,grad_norm: 0.9677996215694972, iteration: 41790
loss: 0.9793069958686829,grad_norm: 0.9999990912100282, iteration: 41791
loss: 0.9740533828735352,grad_norm: 0.9999992386847718, iteration: 41792
loss: 0.9888036847114563,grad_norm: 0.8558030970010408, iteration: 41793
loss: 1.007746934890747,grad_norm: 0.9999990657982344, iteration: 41794
loss: 0.9959732294082642,grad_norm: 0.9999990917030828, iteration: 41795
loss: 1.0060820579528809,grad_norm: 0.9563328726754374, iteration: 41796
loss: 0.9956128001213074,grad_norm: 0.9999991709629003, iteration: 41797
loss: 0.9736590385437012,grad_norm: 0.9999991302527863, iteration: 41798
loss: 1.001865267753601,grad_norm: 0.9999991659345467, iteration: 41799
loss: 1.0026148557662964,grad_norm: 0.9999994643475233, iteration: 41800
loss: 0.9738715291023254,grad_norm: 0.9637699990534042, iteration: 41801
loss: 0.9646494388580322,grad_norm: 0.9493063094386625, iteration: 41802
loss: 1.0313763618469238,grad_norm: 0.9052392871197718, iteration: 41803
loss: 1.0265755653381348,grad_norm: 0.999999150720012, iteration: 41804
loss: 0.9874915480613708,grad_norm: 0.9999991630355937, iteration: 41805
loss: 1.0512497425079346,grad_norm: 0.9999989585299301, iteration: 41806
loss: 1.0205192565917969,grad_norm: 0.9999998990107597, iteration: 41807
loss: 0.9842993021011353,grad_norm: 0.9905613455015148, iteration: 41808
loss: 0.964324951171875,grad_norm: 0.9572825186075128, iteration: 41809
loss: 0.985816240310669,grad_norm: 0.9779392314982175, iteration: 41810
loss: 0.9847080111503601,grad_norm: 0.9561569171644281, iteration: 41811
loss: 0.9995861053466797,grad_norm: 0.8788341527307285, iteration: 41812
loss: 1.030383586883545,grad_norm: 0.9391773750546012, iteration: 41813
loss: 1.04660964012146,grad_norm: 0.9999992569875767, iteration: 41814
loss: 1.0009515285491943,grad_norm: 0.982329871490682, iteration: 41815
loss: 1.0701781511306763,grad_norm: 0.9999999579873513, iteration: 41816
loss: 1.0071148872375488,grad_norm: 0.9999991354020007, iteration: 41817
loss: 0.971893310546875,grad_norm: 0.8762138362285371, iteration: 41818
loss: 1.0411843061447144,grad_norm: 0.9999989728553629, iteration: 41819
loss: 0.9685958623886108,grad_norm: 0.9999990403738883, iteration: 41820
loss: 0.9953752160072327,grad_norm: 0.8851502525598867, iteration: 41821
loss: 0.998466968536377,grad_norm: 0.9196802417484404, iteration: 41822
loss: 1.0349851846694946,grad_norm: 0.9999999087823892, iteration: 41823
loss: 1.0061362981796265,grad_norm: 0.9999992679786034, iteration: 41824
loss: 1.029139757156372,grad_norm: 0.9999990131832299, iteration: 41825
loss: 1.0131360292434692,grad_norm: 0.9999990405021396, iteration: 41826
loss: 1.022618293762207,grad_norm: 0.9999990140498721, iteration: 41827
loss: 1.023363709449768,grad_norm: 0.9999991353087274, iteration: 41828
loss: 0.990814745426178,grad_norm: 0.9999990807016261, iteration: 41829
loss: 0.9542250633239746,grad_norm: 0.9662800902961832, iteration: 41830
loss: 1.0213170051574707,grad_norm: 0.8511680149233167, iteration: 41831
loss: 1.0363928079605103,grad_norm: 0.9570140668674234, iteration: 41832
loss: 1.0133984088897705,grad_norm: 0.7823808865679943, iteration: 41833
loss: 1.0284117460250854,grad_norm: 0.999999077591878, iteration: 41834
loss: 0.9856767058372498,grad_norm: 0.9999991075931531, iteration: 41835
loss: 1.046023964881897,grad_norm: 0.9999990812977013, iteration: 41836
loss: 0.9740668535232544,grad_norm: 0.9999990933263755, iteration: 41837
loss: 1.0265007019042969,grad_norm: 0.9563700586241236, iteration: 41838
loss: 1.0020034313201904,grad_norm: 0.9649661873051826, iteration: 41839
loss: 0.9644631743431091,grad_norm: 0.8979173073060377, iteration: 41840
loss: 1.0409541130065918,grad_norm: 0.9999990919185718, iteration: 41841
loss: 1.0496199131011963,grad_norm: 0.9999991841096042, iteration: 41842
loss: 1.0033994913101196,grad_norm: 0.9519606643986764, iteration: 41843
loss: 0.9966055154800415,grad_norm: 0.9955309019166538, iteration: 41844
loss: 1.015722632408142,grad_norm: 0.9874868312731369, iteration: 41845
loss: 0.9940348863601685,grad_norm: 0.9895362705602889, iteration: 41846
loss: 1.0202089548110962,grad_norm: 0.8599427812256314, iteration: 41847
loss: 0.9974552989006042,grad_norm: 0.8701609132513226, iteration: 41848
loss: 1.0147452354431152,grad_norm: 0.9999991086929682, iteration: 41849
loss: 0.9992751479148865,grad_norm: 0.8955447321779755, iteration: 41850
loss: 1.0399514436721802,grad_norm: 0.9999993572655808, iteration: 41851
loss: 0.9851197004318237,grad_norm: 0.9781286982355356, iteration: 41852
loss: 1.034956932067871,grad_norm: 0.9999997245492024, iteration: 41853
loss: 1.0109226703643799,grad_norm: 0.8527740429423665, iteration: 41854
loss: 1.0021716356277466,grad_norm: 0.9999991465051264, iteration: 41855
loss: 1.0174757242202759,grad_norm: 0.9262520757228403, iteration: 41856
loss: 1.0474718809127808,grad_norm: 0.9999998145523503, iteration: 41857
loss: 1.004947543144226,grad_norm: 0.9999994158969472, iteration: 41858
loss: 1.026727318763733,grad_norm: 0.9500611346509336, iteration: 41859
loss: 0.9887717366218567,grad_norm: 0.8954378220433167, iteration: 41860
loss: 1.0330272912979126,grad_norm: 0.9890000789564531, iteration: 41861
loss: 0.9867839217185974,grad_norm: 0.9999992150419096, iteration: 41862
loss: 0.9463300108909607,grad_norm: 0.8227936814769213, iteration: 41863
loss: 1.0111122131347656,grad_norm: 0.9999994741370926, iteration: 41864
loss: 1.0074896812438965,grad_norm: 0.8820570144486143, iteration: 41865
loss: 1.0271503925323486,grad_norm: 0.878205884945404, iteration: 41866
loss: 1.0087846517562866,grad_norm: 0.931180041972636, iteration: 41867
loss: 1.0293798446655273,grad_norm: 0.9255634194558869, iteration: 41868
loss: 0.986271858215332,grad_norm: 0.9583159428836973, iteration: 41869
loss: 0.9839153289794922,grad_norm: 0.9999991692390384, iteration: 41870
loss: 1.0264204740524292,grad_norm: 0.848821868649659, iteration: 41871
loss: 0.9594573974609375,grad_norm: 0.9999990852014774, iteration: 41872
loss: 1.0066654682159424,grad_norm: 0.9153194393657156, iteration: 41873
loss: 1.0310616493225098,grad_norm: 0.9999994416197875, iteration: 41874
loss: 1.0455269813537598,grad_norm: 0.9999995379544487, iteration: 41875
loss: 1.0019783973693848,grad_norm: 0.9867748804852647, iteration: 41876
loss: 0.9960806369781494,grad_norm: 0.9448079343352309, iteration: 41877
loss: 1.039584994316101,grad_norm: 0.9999991680199976, iteration: 41878
loss: 1.023927927017212,grad_norm: 0.9999993291951926, iteration: 41879
loss: 0.9842888712882996,grad_norm: 0.9999991351906125, iteration: 41880
loss: 1.0008931159973145,grad_norm: 0.8134457304136898, iteration: 41881
loss: 0.9976675510406494,grad_norm: 0.9348703832437553, iteration: 41882
loss: 0.9944083094596863,grad_norm: 0.9999992798997183, iteration: 41883
loss: 1.047155499458313,grad_norm: 0.9999991680795105, iteration: 41884
loss: 1.0267812013626099,grad_norm: 0.9999991711180491, iteration: 41885
loss: 1.0460479259490967,grad_norm: 0.9999993968496069, iteration: 41886
loss: 1.0189882516860962,grad_norm: 0.9637528913866267, iteration: 41887
loss: 0.9865334630012512,grad_norm: 0.8797300729095654, iteration: 41888
loss: 1.0001287460327148,grad_norm: 0.7974713319062178, iteration: 41889
loss: 1.0275574922561646,grad_norm: 0.9129799543713907, iteration: 41890
loss: 0.9886999130249023,grad_norm: 0.9999991616502459, iteration: 41891
loss: 0.9569149017333984,grad_norm: 0.8513735440319385, iteration: 41892
loss: 0.9825541973114014,grad_norm: 0.9999991058520911, iteration: 41893
loss: 1.0006630420684814,grad_norm: 0.8828558042311488, iteration: 41894
loss: 1.0086617469787598,grad_norm: 0.8576790631996362, iteration: 41895
loss: 1.0366047620773315,grad_norm: 0.9999993945212261, iteration: 41896
loss: 1.0144248008728027,grad_norm: 0.9999996090574251, iteration: 41897
loss: 1.001023530960083,grad_norm: 0.9561005739520922, iteration: 41898
loss: 0.9799277782440186,grad_norm: 0.9239768674230009, iteration: 41899
loss: 1.014847993850708,grad_norm: 0.9999988789549012, iteration: 41900
loss: 1.01054048538208,grad_norm: 0.9999994687449341, iteration: 41901
loss: 1.0759774446487427,grad_norm: 0.9999994673646926, iteration: 41902
loss: 1.0304198265075684,grad_norm: 0.9637278128405793, iteration: 41903
loss: 1.0097808837890625,grad_norm: 0.999999579309931, iteration: 41904
loss: 0.9643710851669312,grad_norm: 0.8588913989183427, iteration: 41905
loss: 1.0189571380615234,grad_norm: 0.9999990163232672, iteration: 41906
loss: 0.9906038045883179,grad_norm: 0.93893879279807, iteration: 41907
loss: 1.0050740242004395,grad_norm: 0.8426280893951361, iteration: 41908
loss: 1.0541882514953613,grad_norm: 0.999999355510055, iteration: 41909
loss: 1.0014597177505493,grad_norm: 0.8526130213972769, iteration: 41910
loss: 1.0474772453308105,grad_norm: 0.9999992163813142, iteration: 41911
loss: 1.0019856691360474,grad_norm: 0.9899670640218121, iteration: 41912
loss: 1.0629734992980957,grad_norm: 0.999999340993753, iteration: 41913
loss: 1.019622802734375,grad_norm: 0.9999994253781591, iteration: 41914
loss: 1.015735387802124,grad_norm: 0.9463339260766984, iteration: 41915
loss: 1.0084398984909058,grad_norm: 0.9999991274194325, iteration: 41916
loss: 1.002273678779602,grad_norm: 0.9999990867912548, iteration: 41917
loss: 0.9821839928627014,grad_norm: 0.7555669855326591, iteration: 41918
loss: 1.0233278274536133,grad_norm: 0.9999992387842277, iteration: 41919
loss: 1.004787802696228,grad_norm: 0.9999990237692526, iteration: 41920
loss: 0.9878664016723633,grad_norm: 0.9134278999421847, iteration: 41921
loss: 1.0131858587265015,grad_norm: 0.7884766645124476, iteration: 41922
loss: 1.009818196296692,grad_norm: 0.8471321865752836, iteration: 41923
loss: 1.0130774974822998,grad_norm: 0.8850423507267766, iteration: 41924
loss: 1.0105178356170654,grad_norm: 0.9999991648507534, iteration: 41925
loss: 0.9945751428604126,grad_norm: 0.9999991157141312, iteration: 41926
loss: 1.060105800628662,grad_norm: 0.869708559397776, iteration: 41927
loss: 1.0100513696670532,grad_norm: 0.858656636428875, iteration: 41928
loss: 1.0299360752105713,grad_norm: 0.8416079074769134, iteration: 41929
loss: 0.9949783086776733,grad_norm: 0.9999991321587518, iteration: 41930
loss: 0.9786652326583862,grad_norm: 0.9846025032523735, iteration: 41931
loss: 1.0099225044250488,grad_norm: 0.875905214417797, iteration: 41932
loss: 1.0155384540557861,grad_norm: 0.9999992600934999, iteration: 41933
loss: 0.9782742261886597,grad_norm: 0.9999991395332274, iteration: 41934
loss: 1.0000009536743164,grad_norm: 0.8937458164260161, iteration: 41935
loss: 0.960808515548706,grad_norm: 0.9022121878726296, iteration: 41936
loss: 1.066643476486206,grad_norm: 0.9999997578405301, iteration: 41937
loss: 1.0309638977050781,grad_norm: 0.9999996045331553, iteration: 41938
loss: 0.9772092700004578,grad_norm: 0.9999989175699706, iteration: 41939
loss: 1.0046831369400024,grad_norm: 0.8434926701937288, iteration: 41940
loss: 1.039030909538269,grad_norm: 0.999999493013101, iteration: 41941
loss: 0.9971968531608582,grad_norm: 0.9999997024896219, iteration: 41942
loss: 1.0002374649047852,grad_norm: 0.8662594896123523, iteration: 41943
loss: 1.0235828161239624,grad_norm: 0.8661306142623033, iteration: 41944
loss: 0.9788402318954468,grad_norm: 0.9328322493157089, iteration: 41945
loss: 1.0102005004882812,grad_norm: 0.9999993631638465, iteration: 41946
loss: 1.0296151638031006,grad_norm: 0.8939735022210263, iteration: 41947
loss: 0.9838049411773682,grad_norm: 0.99999919509367, iteration: 41948
loss: 0.9990280270576477,grad_norm: 0.8892776469278741, iteration: 41949
loss: 0.953663170337677,grad_norm: 0.9999991173681005, iteration: 41950
loss: 0.9941728115081787,grad_norm: 0.9999992430853767, iteration: 41951
loss: 1.0311274528503418,grad_norm: 0.9623672889938839, iteration: 41952
loss: 1.0376371145248413,grad_norm: 0.9999993082692811, iteration: 41953
loss: 0.9607471227645874,grad_norm: 0.9999992386474944, iteration: 41954
loss: 1.0746365785598755,grad_norm: 0.9210003900292744, iteration: 41955
loss: 1.0252546072006226,grad_norm: 0.809848958540208, iteration: 41956
loss: 1.0075796842575073,grad_norm: 0.99999901836395, iteration: 41957
loss: 0.9840576648712158,grad_norm: 0.9999991313542723, iteration: 41958
loss: 0.9717576503753662,grad_norm: 0.952847509963446, iteration: 41959
loss: 1.0128684043884277,grad_norm: 0.9999993841057955, iteration: 41960
loss: 1.0335607528686523,grad_norm: 0.9999999076029153, iteration: 41961
loss: 1.0248101949691772,grad_norm: 0.970107761535183, iteration: 41962
loss: 1.0223749876022339,grad_norm: 0.8969471140582489, iteration: 41963
loss: 0.9968722462654114,grad_norm: 0.9063530364493187, iteration: 41964
loss: 1.0133963823318481,grad_norm: 0.9874405392428602, iteration: 41965
loss: 1.0211695432662964,grad_norm: 0.8854486472396864, iteration: 41966
loss: 0.985360324382782,grad_norm: 0.999999169594651, iteration: 41967
loss: 0.9799165725708008,grad_norm: 0.9999990978000729, iteration: 41968
loss: 1.0029728412628174,grad_norm: 0.880998189002065, iteration: 41969
loss: 1.0012247562408447,grad_norm: 0.9999993622863897, iteration: 41970
loss: 1.0058977603912354,grad_norm: 0.9447343480575521, iteration: 41971
loss: 1.0748732089996338,grad_norm: 0.9999993151356, iteration: 41972
loss: 0.9895914196968079,grad_norm: 0.9356801686621257, iteration: 41973
loss: 1.0246883630752563,grad_norm: 0.887968690053494, iteration: 41974
loss: 1.0023794174194336,grad_norm: 0.9999990923058004, iteration: 41975
loss: 1.0092393159866333,grad_norm: 0.8735907394939775, iteration: 41976
loss: 0.9852692484855652,grad_norm: 0.9999990546078538, iteration: 41977
loss: 0.9425647258758545,grad_norm: 0.9458739401339663, iteration: 41978
loss: 1.0458130836486816,grad_norm: 0.9999993603421776, iteration: 41979
loss: 0.9674757719039917,grad_norm: 0.9926454791078989, iteration: 41980
loss: 0.996208667755127,grad_norm: 0.9881968931160527, iteration: 41981
loss: 0.9846200942993164,grad_norm: 0.9651621638802862, iteration: 41982
loss: 1.0414866209030151,grad_norm: 0.9999992879604828, iteration: 41983
loss: 1.0253673791885376,grad_norm: 0.9077694993597363, iteration: 41984
loss: 0.9924642443656921,grad_norm: 0.9344144461217815, iteration: 41985
loss: 1.000567078590393,grad_norm: 0.9999990389225847, iteration: 41986
loss: 1.0173444747924805,grad_norm: 0.9999998226140374, iteration: 41987
loss: 1.0240294933319092,grad_norm: 0.9999991884339162, iteration: 41988
loss: 0.9889446496963501,grad_norm: 0.9118684153404031, iteration: 41989
loss: 0.9859452843666077,grad_norm: 0.9999989852492109, iteration: 41990
loss: 0.9963590502738953,grad_norm: 0.9999991607093345, iteration: 41991
loss: 1.0107526779174805,grad_norm: 0.9352840311295281, iteration: 41992
loss: 1.0103667974472046,grad_norm: 0.9023192488927568, iteration: 41993
loss: 1.0112498998641968,grad_norm: 0.9632319010551634, iteration: 41994
loss: 1.0157636404037476,grad_norm: 0.8364199188986045, iteration: 41995
loss: 1.0121545791625977,grad_norm: 0.9999996014251462, iteration: 41996
loss: 1.016856074333191,grad_norm: 0.8709452504670414, iteration: 41997
loss: 1.0075830221176147,grad_norm: 0.9999990962732018, iteration: 41998
loss: 1.0265132188796997,grad_norm: 0.962496792042317, iteration: 41999
loss: 1.0290800333023071,grad_norm: 0.9653882500365488, iteration: 42000
loss: 1.0249762535095215,grad_norm: 0.999999364879509, iteration: 42001
loss: 0.9680954813957214,grad_norm: 0.9999989406169896, iteration: 42002
loss: 1.0191532373428345,grad_norm: 0.9999990769779387, iteration: 42003
loss: 1.0322868824005127,grad_norm: 0.9999997515876308, iteration: 42004
loss: 1.035833477973938,grad_norm: 0.9999991585269182, iteration: 42005
loss: 1.021657943725586,grad_norm: 0.9999991588572948, iteration: 42006
loss: 1.0556985139846802,grad_norm: 0.9999991265169275, iteration: 42007
loss: 1.0046014785766602,grad_norm: 0.9627219765357508, iteration: 42008
loss: 1.0380420684814453,grad_norm: 0.8011972014913746, iteration: 42009
loss: 1.0299683809280396,grad_norm: 0.9052133665742504, iteration: 42010
loss: 0.9954248666763306,grad_norm: 0.9999990633557655, iteration: 42011
loss: 1.021769642829895,grad_norm: 0.8530240267330079, iteration: 42012
loss: 0.9456735849380493,grad_norm: 0.8404434419645185, iteration: 42013
loss: 0.9652935862541199,grad_norm: 0.9999991799600987, iteration: 42014
loss: 0.9954210519790649,grad_norm: 0.928699111376296, iteration: 42015
loss: 1.0308432579040527,grad_norm: 0.9999991060650536, iteration: 42016
loss: 0.9581001996994019,grad_norm: 0.9448429591806797, iteration: 42017
loss: 0.9683523178100586,grad_norm: 0.8562699550640557, iteration: 42018
loss: 1.0354797840118408,grad_norm: 0.8974664273538272, iteration: 42019
loss: 1.0542811155319214,grad_norm: 0.9999990654102183, iteration: 42020
loss: 1.01787531375885,grad_norm: 0.9999989906120726, iteration: 42021
loss: 1.0343683958053589,grad_norm: 0.9999991161402237, iteration: 42022
loss: 0.9994333982467651,grad_norm: 0.9660503069130355, iteration: 42023
loss: 0.9701746106147766,grad_norm: 0.9999989312729504, iteration: 42024
loss: 0.9867097735404968,grad_norm: 0.9999991899822281, iteration: 42025
loss: 0.9951593279838562,grad_norm: 0.9555242932299095, iteration: 42026
loss: 1.0282045602798462,grad_norm: 0.9999990422926802, iteration: 42027
loss: 0.9927205443382263,grad_norm: 0.9903922789971622, iteration: 42028
loss: 0.9814367294311523,grad_norm: 0.810784027443431, iteration: 42029
loss: 1.00730562210083,grad_norm: 0.7666064505494052, iteration: 42030
loss: 1.0359044075012207,grad_norm: 0.9020247450397447, iteration: 42031
loss: 1.0199934244155884,grad_norm: 0.9211604951476452, iteration: 42032
loss: 0.9857698082923889,grad_norm: 0.8791592377915799, iteration: 42033
loss: 0.9864314198493958,grad_norm: 0.8659170105129971, iteration: 42034
loss: 0.9937408566474915,grad_norm: 0.9999991302774881, iteration: 42035
loss: 1.013034701347351,grad_norm: 0.9440921059804273, iteration: 42036
loss: 1.024208903312683,grad_norm: 0.9999991414326685, iteration: 42037
loss: 1.0200657844543457,grad_norm: 0.9542615182305093, iteration: 42038
loss: 1.0004769563674927,grad_norm: 0.8821012822176296, iteration: 42039
loss: 1.0012636184692383,grad_norm: 0.999999022621227, iteration: 42040
loss: 1.0299954414367676,grad_norm: 0.7896646078314272, iteration: 42041
loss: 1.0046690702438354,grad_norm: 0.9349043078592107, iteration: 42042
loss: 1.0386086702346802,grad_norm: 0.9999991432444237, iteration: 42043
loss: 1.0345879793167114,grad_norm: 0.9999996430940273, iteration: 42044
loss: 0.9580487608909607,grad_norm: 0.9999990311829279, iteration: 42045
loss: 1.0187541246414185,grad_norm: 0.9999990643560202, iteration: 42046
loss: 0.973311722278595,grad_norm: 0.9212680662016167, iteration: 42047
loss: 0.9893361330032349,grad_norm: 0.8926820225499823, iteration: 42048
loss: 1.010270357131958,grad_norm: 0.9999990911609968, iteration: 42049
loss: 0.9831928610801697,grad_norm: 0.9693155130656769, iteration: 42050
loss: 0.9864105582237244,grad_norm: 0.9980391397048506, iteration: 42051
loss: 1.0018506050109863,grad_norm: 0.9999989630529524, iteration: 42052
loss: 1.0012248754501343,grad_norm: 0.8685161821471152, iteration: 42053
loss: 1.0261598825454712,grad_norm: 0.8624754445217501, iteration: 42054
loss: 1.0077317953109741,grad_norm: 0.9034722895282675, iteration: 42055
loss: 1.0121121406555176,grad_norm: 0.9079551954732755, iteration: 42056
loss: 1.0528771877288818,grad_norm: 0.9999994302642493, iteration: 42057
loss: 1.0233488082885742,grad_norm: 0.8729360276123407, iteration: 42058
loss: 0.9883115887641907,grad_norm: 0.9313587779719041, iteration: 42059
loss: 1.0005760192871094,grad_norm: 0.961965525227356, iteration: 42060
loss: 1.0007414817810059,grad_norm: 0.8357001248784232, iteration: 42061
loss: 1.0071508884429932,grad_norm: 0.9326248696822813, iteration: 42062
loss: 1.0218602418899536,grad_norm: 0.9999992311214817, iteration: 42063
loss: 1.0142536163330078,grad_norm: 0.8633963047914732, iteration: 42064
loss: 0.9881924390792847,grad_norm: 0.9992465049322433, iteration: 42065
loss: 1.0115114450454712,grad_norm: 0.9999990165601399, iteration: 42066
loss: 1.0156214237213135,grad_norm: 0.853121895044623, iteration: 42067
loss: 1.0172570943832397,grad_norm: 0.9999991743635342, iteration: 42068
loss: 0.9462541341781616,grad_norm: 0.9999991635180693, iteration: 42069
loss: 0.9887776374816895,grad_norm: 0.9198227955762419, iteration: 42070
loss: 1.015356183052063,grad_norm: 0.9999990284427513, iteration: 42071
loss: 1.0254346132278442,grad_norm: 0.9999991354453087, iteration: 42072
loss: 1.0049608945846558,grad_norm: 0.9999990549242161, iteration: 42073
loss: 1.019029974937439,grad_norm: 0.8542276904001344, iteration: 42074
loss: 0.9877789616584778,grad_norm: 0.8519485662883571, iteration: 42075
loss: 0.9818745255470276,grad_norm: 0.9296265279851446, iteration: 42076
loss: 0.9939362406730652,grad_norm: 0.9509581638622275, iteration: 42077
loss: 1.0162631273269653,grad_norm: 0.9999990939028615, iteration: 42078
loss: 1.0160926580429077,grad_norm: 0.9370910186286839, iteration: 42079
loss: 0.9773516058921814,grad_norm: 0.9566444803323628, iteration: 42080
loss: 0.9701958894729614,grad_norm: 0.9999990975884651, iteration: 42081
loss: 1.043992042541504,grad_norm: 0.9465556392758198, iteration: 42082
loss: 0.995400607585907,grad_norm: 0.8948145545944843, iteration: 42083
loss: 0.9923456311225891,grad_norm: 0.7736893301008134, iteration: 42084
loss: 0.9900474548339844,grad_norm: 0.8828808938664994, iteration: 42085
loss: 1.0158417224884033,grad_norm: 0.9143317130922772, iteration: 42086
loss: 0.9687485098838806,grad_norm: 0.92203294995758, iteration: 42087
loss: 1.0283141136169434,grad_norm: 0.9999996430975571, iteration: 42088
loss: 1.0311154127120972,grad_norm: 0.8803625444280933, iteration: 42089
loss: 0.9931291341781616,grad_norm: 0.9547428762721282, iteration: 42090
loss: 0.9996064305305481,grad_norm: 0.9679698416592186, iteration: 42091
loss: 0.9911547303199768,grad_norm: 0.9999990335650475, iteration: 42092
loss: 1.004475712776184,grad_norm: 0.9999991676882548, iteration: 42093
loss: 0.986421525478363,grad_norm: 0.9817328660034282, iteration: 42094
loss: 1.016312837600708,grad_norm: 0.9999992094850232, iteration: 42095
loss: 1.002432107925415,grad_norm: 0.8755099080112335, iteration: 42096
loss: 1.0097532272338867,grad_norm: 0.8027927205554589, iteration: 42097
loss: 0.9874939918518066,grad_norm: 0.9259387009818546, iteration: 42098
loss: 1.059348464012146,grad_norm: 0.9524178022897065, iteration: 42099
loss: 0.9682247638702393,grad_norm: 0.8235017175017385, iteration: 42100
loss: 0.9770195484161377,grad_norm: 0.9999991742144406, iteration: 42101
loss: 1.0121365785598755,grad_norm: 0.8664511628572193, iteration: 42102
loss: 1.0149179697036743,grad_norm: 0.9513806405610042, iteration: 42103
loss: 1.011527419090271,grad_norm: 0.9145259229635027, iteration: 42104
loss: 1.0182617902755737,grad_norm: 0.8910916790553425, iteration: 42105
loss: 1.029209852218628,grad_norm: 0.9999995781755301, iteration: 42106
loss: 1.0446070432662964,grad_norm: 0.9999997097494397, iteration: 42107
loss: 0.9996373057365417,grad_norm: 0.8739183048498166, iteration: 42108
loss: 0.9404148459434509,grad_norm: 0.9479106620812111, iteration: 42109
loss: 1.0195972919464111,grad_norm: 0.8302689078570852, iteration: 42110
loss: 1.0136945247650146,grad_norm: 0.9289302858896783, iteration: 42111
loss: 0.9935020804405212,grad_norm: 0.9999991705604411, iteration: 42112
loss: 0.9837024211883545,grad_norm: 0.9999991065205756, iteration: 42113
loss: 1.0054512023925781,grad_norm: 0.9852189048115495, iteration: 42114
loss: 1.0411479473114014,grad_norm: 0.9999990943241565, iteration: 42115
loss: 0.9941553473472595,grad_norm: 0.9830617443709255, iteration: 42116
loss: 0.9907613396644592,grad_norm: 0.9237074591535384, iteration: 42117
loss: 1.0135186910629272,grad_norm: 0.8192726832897869, iteration: 42118
loss: 1.009386658668518,grad_norm: 0.9999991975158684, iteration: 42119
loss: 0.9811545014381409,grad_norm: 0.9999991723883573, iteration: 42120
loss: 0.9953773021697998,grad_norm: 0.9821765124596351, iteration: 42121
loss: 0.9818267822265625,grad_norm: 0.9351965469442254, iteration: 42122
loss: 0.9787523746490479,grad_norm: 0.841558401726012, iteration: 42123
loss: 1.020431399345398,grad_norm: 0.9302620318203757, iteration: 42124
loss: 1.0141404867172241,grad_norm: 0.9548575465739875, iteration: 42125
loss: 1.0325300693511963,grad_norm: 0.8909384224517926, iteration: 42126
loss: 1.0369656085968018,grad_norm: 0.9999992940273371, iteration: 42127
loss: 1.0371434688568115,grad_norm: 0.9999997282249179, iteration: 42128
loss: 1.0012108087539673,grad_norm: 0.8646803867759087, iteration: 42129
loss: 1.000797152519226,grad_norm: 0.9999990686223896, iteration: 42130
loss: 0.962498128414154,grad_norm: 0.9999992510193911, iteration: 42131
loss: 1.0656906366348267,grad_norm: 0.986316741277796, iteration: 42132
loss: 1.0287234783172607,grad_norm: 0.9826881502038093, iteration: 42133
loss: 0.9742659330368042,grad_norm: 0.99999911200305, iteration: 42134
loss: 0.9909917712211609,grad_norm: 0.9999989801757714, iteration: 42135
loss: 1.0072779655456543,grad_norm: 0.793655060882901, iteration: 42136
loss: 1.0057032108306885,grad_norm: 0.8539726321140328, iteration: 42137
loss: 1.016010046005249,grad_norm: 0.8298133589943174, iteration: 42138
loss: 1.0317301750183105,grad_norm: 0.9202097976991725, iteration: 42139
loss: 1.0008453130722046,grad_norm: 0.9578642309034614, iteration: 42140
loss: 0.9792574644088745,grad_norm: 0.9407716786278516, iteration: 42141
loss: 1.0058668851852417,grad_norm: 0.9036953307297586, iteration: 42142
loss: 1.0126763582229614,grad_norm: 0.9999989985762716, iteration: 42143
loss: 1.0306463241577148,grad_norm: 0.9999989771368057, iteration: 42144
loss: 1.0199955701828003,grad_norm: 0.9999990016591728, iteration: 42145
loss: 0.9984796643257141,grad_norm: 0.9109604586684663, iteration: 42146
loss: 1.0161099433898926,grad_norm: 0.9259320659714092, iteration: 42147
loss: 1.0173008441925049,grad_norm: 0.8836426924737537, iteration: 42148
loss: 0.9949305653572083,grad_norm: 0.9999992029310104, iteration: 42149
loss: 1.0332708358764648,grad_norm: 0.819277663969421, iteration: 42150
loss: 1.1226061582565308,grad_norm: 0.9999991827957425, iteration: 42151
loss: 0.9861827492713928,grad_norm: 0.8358713304322348, iteration: 42152
loss: 1.0099714994430542,grad_norm: 0.9999991921233479, iteration: 42153
loss: 1.007725715637207,grad_norm: 0.9164117314482463, iteration: 42154
loss: 1.0265798568725586,grad_norm: 0.9999992379276369, iteration: 42155
loss: 1.0070899724960327,grad_norm: 0.9999991345180613, iteration: 42156
loss: 1.0756763219833374,grad_norm: 0.9999995908334751, iteration: 42157
loss: 0.9742532968521118,grad_norm: 0.9999992080824286, iteration: 42158
loss: 1.0049012899398804,grad_norm: 0.9999990313006966, iteration: 42159
loss: 1.0479239225387573,grad_norm: 0.748158559901985, iteration: 42160
loss: 1.0255681276321411,grad_norm: 0.9999991925470882, iteration: 42161
loss: 1.0334858894348145,grad_norm: 0.9608771264105855, iteration: 42162
loss: 0.9932231307029724,grad_norm: 0.9999991342667215, iteration: 42163
loss: 0.9757781028747559,grad_norm: 0.8728923969991598, iteration: 42164
loss: 0.9673060774803162,grad_norm: 0.9713369014748776, iteration: 42165
loss: 0.9974179863929749,grad_norm: 0.9951690167164895, iteration: 42166
loss: 1.0211849212646484,grad_norm: 0.9568857265161528, iteration: 42167
loss: 1.0006428956985474,grad_norm: 0.7996232243871215, iteration: 42168
loss: 1.003543734550476,grad_norm: 0.9999991392223591, iteration: 42169
loss: 1.020736575126648,grad_norm: 0.9999989919040017, iteration: 42170
loss: 1.006562352180481,grad_norm: 0.9395707210087504, iteration: 42171
loss: 1.0060625076293945,grad_norm: 0.8871932684394919, iteration: 42172
loss: 0.9891767501831055,grad_norm: 0.9999991388254343, iteration: 42173
loss: 1.008310079574585,grad_norm: 0.9447857088380246, iteration: 42174
loss: 1.0321385860443115,grad_norm: 0.822470485526645, iteration: 42175
loss: 1.002680778503418,grad_norm: 0.8608656717376344, iteration: 42176
loss: 1.0312386751174927,grad_norm: 0.9999990415529333, iteration: 42177
loss: 1.0234829187393188,grad_norm: 0.9999990268226372, iteration: 42178
loss: 1.0403367280960083,grad_norm: 0.9613024538803195, iteration: 42179
loss: 1.0436815023422241,grad_norm: 0.9999999636197344, iteration: 42180
loss: 0.9690626263618469,grad_norm: 0.9543891332274853, iteration: 42181
loss: 0.9858415722846985,grad_norm: 0.9731628820440086, iteration: 42182
loss: 0.9888200759887695,grad_norm: 0.8844149568559261, iteration: 42183
loss: 1.0214555263519287,grad_norm: 0.7383299226952166, iteration: 42184
loss: 1.0514975786209106,grad_norm: 0.9378180074776203, iteration: 42185
loss: 1.000710368156433,grad_norm: 0.999999002783308, iteration: 42186
loss: 0.9876344203948975,grad_norm: 0.9898217298978591, iteration: 42187
loss: 1.038158893585205,grad_norm: 0.9999992522121939, iteration: 42188
loss: 0.996084451675415,grad_norm: 0.9999991315159709, iteration: 42189
loss: 1.0207704305648804,grad_norm: 0.7441007152070219, iteration: 42190
loss: 0.9683807492256165,grad_norm: 0.9999990891038313, iteration: 42191
loss: 0.9701022505760193,grad_norm: 0.949046219549719, iteration: 42192
loss: 1.0071333646774292,grad_norm: 0.9999991971482809, iteration: 42193
loss: 0.969774603843689,grad_norm: 0.9999991668891953, iteration: 42194
loss: 1.0146416425704956,grad_norm: 0.9999989304520964, iteration: 42195
loss: 1.00885009765625,grad_norm: 0.9999990856516394, iteration: 42196
loss: 0.9910634756088257,grad_norm: 0.9999990648801003, iteration: 42197
loss: 1.0259894132614136,grad_norm: 0.9999995477750035, iteration: 42198
loss: 1.028444766998291,grad_norm: 0.9692640421318204, iteration: 42199
loss: 0.9975090622901917,grad_norm: 0.9540543078591115, iteration: 42200
loss: 0.9754654765129089,grad_norm: 0.9654782517100914, iteration: 42201
loss: 1.0242729187011719,grad_norm: 0.8979563418703996, iteration: 42202
loss: 1.0093867778778076,grad_norm: 0.9468672822959073, iteration: 42203
loss: 1.0193350315093994,grad_norm: 0.9710728581661195, iteration: 42204
loss: 1.0074301958084106,grad_norm: 0.999999261647885, iteration: 42205
loss: 1.0502854585647583,grad_norm: 0.9999994311492562, iteration: 42206
loss: 1.0515263080596924,grad_norm: 0.9999994429871947, iteration: 42207
loss: 1.0205234289169312,grad_norm: 0.9227735568146712, iteration: 42208
loss: 0.9906471371650696,grad_norm: 0.9172957458296885, iteration: 42209
loss: 1.0400341749191284,grad_norm: 0.9999997503858545, iteration: 42210
loss: 0.9810409545898438,grad_norm: 0.9999991751742335, iteration: 42211
loss: 1.0278204679489136,grad_norm: 0.9120706203463342, iteration: 42212
loss: 0.99077308177948,grad_norm: 0.9022160081506962, iteration: 42213
loss: 0.9847559332847595,grad_norm: 0.9097338151042306, iteration: 42214
loss: 1.0150983333587646,grad_norm: 0.9999992551333696, iteration: 42215
loss: 1.0162616968154907,grad_norm: 0.8399660885957018, iteration: 42216
loss: 0.9956279397010803,grad_norm: 0.9999990792279625, iteration: 42217
loss: 0.9929294586181641,grad_norm: 0.999999225468141, iteration: 42218
loss: 1.0158056020736694,grad_norm: 0.9999993214973156, iteration: 42219
loss: 1.060355544090271,grad_norm: 0.9999996410738858, iteration: 42220
loss: 1.028869390487671,grad_norm: 0.9999991941375743, iteration: 42221
loss: 1.0155671834945679,grad_norm: 0.9999995127135134, iteration: 42222
loss: 1.054372787475586,grad_norm: 0.9999993933768383, iteration: 42223
loss: 1.006667971611023,grad_norm: 0.8489758977370329, iteration: 42224
loss: 0.9912364482879639,grad_norm: 0.9781847599568535, iteration: 42225
loss: 1.0061521530151367,grad_norm: 0.8846403144913528, iteration: 42226
loss: 1.004934310913086,grad_norm: 0.9110428335858126, iteration: 42227
loss: 0.9971803426742554,grad_norm: 0.9454167523359555, iteration: 42228
loss: 0.997915506362915,grad_norm: 0.9999992657701858, iteration: 42229
loss: 0.9980514645576477,grad_norm: 0.851777649727106, iteration: 42230
loss: 1.007876992225647,grad_norm: 0.9005762392339639, iteration: 42231
loss: 1.0149306058883667,grad_norm: 0.9777142971206715, iteration: 42232
loss: 1.0263856649398804,grad_norm: 0.9739202391671594, iteration: 42233
loss: 1.023348093032837,grad_norm: 0.9982932658108765, iteration: 42234
loss: 0.9819574952125549,grad_norm: 0.9999991156087066, iteration: 42235
loss: 1.016459345817566,grad_norm: 0.9999992859732519, iteration: 42236
loss: 0.9971746206283569,grad_norm: 0.9762376951212944, iteration: 42237
loss: 0.9683443307876587,grad_norm: 0.9999991479497686, iteration: 42238
loss: 1.0175265073776245,grad_norm: 0.7918607780290023, iteration: 42239
loss: 1.030144453048706,grad_norm: 0.9999994457172064, iteration: 42240
loss: 0.9918713569641113,grad_norm: 0.9999990698851993, iteration: 42241
loss: 1.0199381113052368,grad_norm: 0.9999996349488879, iteration: 42242
loss: 1.0078846216201782,grad_norm: 0.9304560584947152, iteration: 42243
loss: 0.9783012270927429,grad_norm: 0.9999990605923335, iteration: 42244
loss: 1.0097182989120483,grad_norm: 0.9999991111029921, iteration: 42245
loss: 1.0492033958435059,grad_norm: 0.9999991020880217, iteration: 42246
loss: 0.9898149371147156,grad_norm: 0.8159496706220944, iteration: 42247
loss: 0.9845091104507446,grad_norm: 0.9204009372420285, iteration: 42248
loss: 1.0185208320617676,grad_norm: 0.9999991714953775, iteration: 42249
loss: 0.991989254951477,grad_norm: 0.9999991212933176, iteration: 42250
loss: 1.0443413257598877,grad_norm: 0.9999992912930407, iteration: 42251
loss: 1.0146132707595825,grad_norm: 0.9999995110961937, iteration: 42252
loss: 0.955219566822052,grad_norm: 0.992589544372424, iteration: 42253
loss: 1.0119407176971436,grad_norm: 0.9999991246434777, iteration: 42254
loss: 1.025184988975525,grad_norm: 0.9106772990298052, iteration: 42255
loss: 1.0448758602142334,grad_norm: 0.9999990974471853, iteration: 42256
loss: 1.0417195558547974,grad_norm: 0.9999990873116763, iteration: 42257
loss: 0.9885400533676147,grad_norm: 0.9330469040596762, iteration: 42258
loss: 0.9893611073493958,grad_norm: 0.8222912066604251, iteration: 42259
loss: 1.0102477073669434,grad_norm: 0.9407351308460482, iteration: 42260
loss: 1.0103020668029785,grad_norm: 0.9999991501431471, iteration: 42261
loss: 0.9746997356414795,grad_norm: 0.9994193665282585, iteration: 42262
loss: 1.0260287523269653,grad_norm: 0.9295404057733958, iteration: 42263
loss: 1.0091615915298462,grad_norm: 0.8843271015505823, iteration: 42264
loss: 1.0021711587905884,grad_norm: 0.9999990267773973, iteration: 42265
loss: 0.9889712929725647,grad_norm: 0.9920220064929042, iteration: 42266
loss: 0.9769154787063599,grad_norm: 0.8869562093328798, iteration: 42267
loss: 1.0257303714752197,grad_norm: 0.9999990871714277, iteration: 42268
loss: 1.0183311700820923,grad_norm: 0.9814450150488667, iteration: 42269
loss: 0.9759756922721863,grad_norm: 0.9999989699440948, iteration: 42270
loss: 0.9753666520118713,grad_norm: 0.999999001534595, iteration: 42271
loss: 0.986131489276886,grad_norm: 0.850627097923455, iteration: 42272
loss: 0.9857304692268372,grad_norm: 0.9474009854478131, iteration: 42273
loss: 1.0521875619888306,grad_norm: 0.9159716177625711, iteration: 42274
loss: 0.9935071468353271,grad_norm: 0.8080821864944078, iteration: 42275
loss: 1.0390326976776123,grad_norm: 0.966311289599263, iteration: 42276
loss: 1.0046141147613525,grad_norm: 0.9999995694445756, iteration: 42277
loss: 0.998456597328186,grad_norm: 0.9866757477821424, iteration: 42278
loss: 1.0325102806091309,grad_norm: 0.9694198127870756, iteration: 42279
loss: 0.9823823571205139,grad_norm: 0.9999991234398022, iteration: 42280
loss: 1.0030449628829956,grad_norm: 0.8015814629131279, iteration: 42281
loss: 1.0103107690811157,grad_norm: 0.9152564798533296, iteration: 42282
loss: 1.0365146398544312,grad_norm: 0.8841216531513885, iteration: 42283
loss: 0.9635239839553833,grad_norm: 0.9999993401706916, iteration: 42284
loss: 0.9920202493667603,grad_norm: 0.8778662493619004, iteration: 42285
loss: 0.961733877658844,grad_norm: 0.999999101203226, iteration: 42286
loss: 0.9797722101211548,grad_norm: 0.9999991626179512, iteration: 42287
loss: 1.0386909246444702,grad_norm: 0.9999991534011154, iteration: 42288
loss: 1.0520925521850586,grad_norm: 0.9919304939190444, iteration: 42289
loss: 1.0052340030670166,grad_norm: 0.9999991546562864, iteration: 42290
loss: 0.9922192692756653,grad_norm: 0.9720340268874639, iteration: 42291
loss: 1.0423479080200195,grad_norm: 0.999999234126092, iteration: 42292
loss: 1.0215636491775513,grad_norm: 0.9999995829257006, iteration: 42293
loss: 1.024049997329712,grad_norm: 0.9999989849515082, iteration: 42294
loss: 0.9995166659355164,grad_norm: 0.9624994203140975, iteration: 42295
loss: 1.0105396509170532,grad_norm: 0.9999991139224473, iteration: 42296
loss: 1.0061144828796387,grad_norm: 0.9662001064632755, iteration: 42297
loss: 1.0952775478363037,grad_norm: 0.9999999025242696, iteration: 42298
loss: 0.9990942478179932,grad_norm: 0.8895812448824107, iteration: 42299
loss: 1.0556553602218628,grad_norm: 0.8664867432950205, iteration: 42300
loss: 1.0163605213165283,grad_norm: 0.8558812914113347, iteration: 42301
loss: 1.005133032798767,grad_norm: 0.8867272683549582, iteration: 42302
loss: 1.0046461820602417,grad_norm: 0.9999989948633934, iteration: 42303
loss: 1.01291823387146,grad_norm: 0.9999996580731659, iteration: 42304
loss: 0.9484936594963074,grad_norm: 0.9222263673954532, iteration: 42305
loss: 0.9947753548622131,grad_norm: 0.8806641273779173, iteration: 42306
loss: 0.9881111979484558,grad_norm: 0.9970737646388953, iteration: 42307
loss: 1.0402065515518188,grad_norm: 0.9999994249586951, iteration: 42308
loss: 1.0156874656677246,grad_norm: 0.9999995259351978, iteration: 42309
loss: 1.0532336235046387,grad_norm: 0.9053479179050896, iteration: 42310
loss: 0.979010283946991,grad_norm: 0.9477918204642972, iteration: 42311
loss: 1.0080105066299438,grad_norm: 0.9999990943557737, iteration: 42312
loss: 1.0027287006378174,grad_norm: 0.9999990051441076, iteration: 42313
loss: 0.9969619512557983,grad_norm: 0.9620870418031433, iteration: 42314
loss: 1.0586270093917847,grad_norm: 0.9999990856022827, iteration: 42315
loss: 1.0028091669082642,grad_norm: 0.8775362101706845, iteration: 42316
loss: 0.9543322920799255,grad_norm: 0.9960678659108796, iteration: 42317
loss: 0.9943155646324158,grad_norm: 0.9758527569938921, iteration: 42318
loss: 1.038522481918335,grad_norm: 0.9009877993483439, iteration: 42319
loss: 1.0107663869857788,grad_norm: 0.9044400265357226, iteration: 42320
loss: 1.0087237358093262,grad_norm: 0.7811460812808805, iteration: 42321
loss: 0.986596941947937,grad_norm: 0.973808902520815, iteration: 42322
loss: 1.0095607042312622,grad_norm: 0.866842853996594, iteration: 42323
loss: 1.0359344482421875,grad_norm: 0.8593016046715799, iteration: 42324
loss: 1.0072656869888306,grad_norm: 0.8157916328310508, iteration: 42325
loss: 0.9912967085838318,grad_norm: 0.8554916080151118, iteration: 42326
loss: 0.9701169729232788,grad_norm: 0.9168787316456571, iteration: 42327
loss: 1.0311737060546875,grad_norm: 0.9999993845428824, iteration: 42328
loss: 0.9918043613433838,grad_norm: 0.9999998354033511, iteration: 42329
loss: 1.0704630613327026,grad_norm: 0.9464420317587567, iteration: 42330
loss: 1.020246148109436,grad_norm: 0.9999991770417408, iteration: 42331
loss: 1.0079680681228638,grad_norm: 0.9428302562054044, iteration: 42332
loss: 0.9965522289276123,grad_norm: 0.9999995827110054, iteration: 42333
loss: 1.0024518966674805,grad_norm: 0.9999991103459951, iteration: 42334
loss: 1.1040464639663696,grad_norm: 0.9999998808459547, iteration: 42335
loss: 0.9949917793273926,grad_norm: 0.9930580934341703, iteration: 42336
loss: 1.032082200050354,grad_norm: 0.9999991964690492, iteration: 42337
loss: 0.968604564666748,grad_norm: 0.999999061096255, iteration: 42338
loss: 1.006065011024475,grad_norm: 0.7088334405588063, iteration: 42339
loss: 1.0251396894454956,grad_norm: 0.9999991159913116, iteration: 42340
loss: 1.0040397644042969,grad_norm: 0.9999991608288821, iteration: 42341
loss: 1.0063393115997314,grad_norm: 0.9999990306189065, iteration: 42342
loss: 1.0322394371032715,grad_norm: 0.9999992046376724, iteration: 42343
loss: 0.9805724620819092,grad_norm: 0.8813265542971238, iteration: 42344
loss: 1.0352895259857178,grad_norm: 0.9812449304643501, iteration: 42345
loss: 1.0081816911697388,grad_norm: 0.9999997120341478, iteration: 42346
loss: 1.0157461166381836,grad_norm: 0.9999992800123595, iteration: 42347
loss: 1.0283249616622925,grad_norm: 0.8340296526663395, iteration: 42348
loss: 1.0128259658813477,grad_norm: 0.7413435821700944, iteration: 42349
loss: 1.0222808122634888,grad_norm: 0.9999991219989397, iteration: 42350
loss: 0.9942578077316284,grad_norm: 0.8767915276548593, iteration: 42351
loss: 1.082537293434143,grad_norm: 0.999999727453742, iteration: 42352
loss: 0.9801961779594421,grad_norm: 0.9999992770020374, iteration: 42353
loss: 0.979720950126648,grad_norm: 0.9999989698461341, iteration: 42354
loss: 1.0282403230667114,grad_norm: 0.9999990810083546, iteration: 42355
loss: 0.9900805354118347,grad_norm: 0.9999991544205762, iteration: 42356
loss: 1.0020307302474976,grad_norm: 0.9083698351019724, iteration: 42357
loss: 0.9827648997306824,grad_norm: 0.9999989927222829, iteration: 42358
loss: 1.0155603885650635,grad_norm: 0.9999991258551616, iteration: 42359
loss: 0.9886011481285095,grad_norm: 0.8825404332899627, iteration: 42360
loss: 0.9792352914810181,grad_norm: 0.9999998346761151, iteration: 42361
loss: 0.9888865947723389,grad_norm: 0.8756625682718132, iteration: 42362
loss: 1.024296522140503,grad_norm: 0.7834653443231642, iteration: 42363
loss: 1.0093427896499634,grad_norm: 0.8794629252447538, iteration: 42364
loss: 0.9831556677818298,grad_norm: 0.956084510492213, iteration: 42365
loss: 1.002920389175415,grad_norm: 0.9999990911750948, iteration: 42366
loss: 0.9883787035942078,grad_norm: 0.9999992072973288, iteration: 42367
loss: 0.9896484613418579,grad_norm: 0.9352521701198553, iteration: 42368
loss: 1.0069434642791748,grad_norm: 0.999999591987766, iteration: 42369
loss: 1.0127592086791992,grad_norm: 0.9999992468197579, iteration: 42370
loss: 1.0156747102737427,grad_norm: 0.9349686204542177, iteration: 42371
loss: 1.0095024108886719,grad_norm: 0.9999991345981475, iteration: 42372
loss: 1.0906517505645752,grad_norm: 0.9999994518859581, iteration: 42373
loss: 1.0713732242584229,grad_norm: 0.9999995566970759, iteration: 42374
loss: 0.9800206422805786,grad_norm: 0.9999991671563538, iteration: 42375
loss: 1.0456736087799072,grad_norm: 0.9999999692646347, iteration: 42376
loss: 1.043739914894104,grad_norm: 0.8392914022883087, iteration: 42377
loss: 1.007789134979248,grad_norm: 0.7921535286657756, iteration: 42378
loss: 1.0484949350357056,grad_norm: 0.9999997162783884, iteration: 42379
loss: 1.030052900314331,grad_norm: 0.9470052851151579, iteration: 42380
loss: 1.000589370727539,grad_norm: 0.9999992174667772, iteration: 42381
loss: 1.0330387353897095,grad_norm: 0.9999994961867313, iteration: 42382
loss: 0.9523624181747437,grad_norm: 0.9999998382828166, iteration: 42383
loss: 0.9630984663963318,grad_norm: 0.9999990703190512, iteration: 42384
loss: 0.9530944228172302,grad_norm: 0.9463992738222926, iteration: 42385
loss: 1.0849875211715698,grad_norm: 0.9999994581112858, iteration: 42386
loss: 1.0168579816818237,grad_norm: 0.9999990776592318, iteration: 42387
loss: 1.0022382736206055,grad_norm: 0.874501032075187, iteration: 42388
loss: 1.0464953184127808,grad_norm: 0.9999990628351328, iteration: 42389
loss: 1.0104601383209229,grad_norm: 0.9999990872885042, iteration: 42390
loss: 1.0013569593429565,grad_norm: 0.9999992446028303, iteration: 42391
loss: 1.0354948043823242,grad_norm: 0.9859116008645988, iteration: 42392
loss: 1.0102406740188599,grad_norm: 0.8973986027625174, iteration: 42393
loss: 1.04471755027771,grad_norm: 0.9999991302090191, iteration: 42394
loss: 1.012528419494629,grad_norm: 0.9259576200780939, iteration: 42395
loss: 1.056756615638733,grad_norm: 0.9227034920750468, iteration: 42396
loss: 0.9836235046386719,grad_norm: 0.8445961991002783, iteration: 42397
loss: 1.030511498451233,grad_norm: 0.9999995227612931, iteration: 42398
loss: 1.0026466846466064,grad_norm: 0.999999056920818, iteration: 42399
loss: 1.05612313747406,grad_norm: 0.9999994704385219, iteration: 42400
loss: 1.0058869123458862,grad_norm: 0.8952934582996228, iteration: 42401
loss: 0.9845377802848816,grad_norm: 0.9703314749388718, iteration: 42402
loss: 1.0000227689743042,grad_norm: 0.8510080567360955, iteration: 42403
loss: 1.0038397312164307,grad_norm: 0.9999990576029537, iteration: 42404
loss: 1.0061789751052856,grad_norm: 0.9999992122665986, iteration: 42405
loss: 1.024729609489441,grad_norm: 0.9999996181885952, iteration: 42406
loss: 1.0378954410552979,grad_norm: 0.9649055794158753, iteration: 42407
loss: 1.0125513076782227,grad_norm: 0.8382744300607174, iteration: 42408
loss: 1.0559366941452026,grad_norm: 0.9999999720026391, iteration: 42409
loss: 1.0257322788238525,grad_norm: 0.9999993670781498, iteration: 42410
loss: 1.0738449096679688,grad_norm: 0.9999997686298917, iteration: 42411
loss: 1.0452852249145508,grad_norm: 0.9342256610198296, iteration: 42412
loss: 1.0059531927108765,grad_norm: 0.9999992591305549, iteration: 42413
loss: 1.035498023033142,grad_norm: 0.999999356423659, iteration: 42414
loss: 1.0070841312408447,grad_norm: 0.9999996063413342, iteration: 42415
loss: 0.9911947250366211,grad_norm: 0.8726008134256716, iteration: 42416
loss: 0.9987368583679199,grad_norm: 0.8866707406226975, iteration: 42417
loss: 1.0006345510482788,grad_norm: 0.9130378138717963, iteration: 42418
loss: 1.0809658765792847,grad_norm: 0.999999767833531, iteration: 42419
loss: 1.0053133964538574,grad_norm: 0.9302186333935246, iteration: 42420
loss: 1.0013020038604736,grad_norm: 0.9439755487624597, iteration: 42421
loss: 1.0098628997802734,grad_norm: 0.9999991464405179, iteration: 42422
loss: 0.9982891082763672,grad_norm: 0.8468694485499136, iteration: 42423
loss: 0.9563640356063843,grad_norm: 0.9325645266912023, iteration: 42424
loss: 1.0196692943572998,grad_norm: 0.8717011063169249, iteration: 42425
loss: 1.0114811658859253,grad_norm: 0.9099287874728034, iteration: 42426
loss: 0.9417371153831482,grad_norm: 0.9648670342100305, iteration: 42427
loss: 0.9994524717330933,grad_norm: 0.8653188493353369, iteration: 42428
loss: 1.0083059072494507,grad_norm: 0.999999246338907, iteration: 42429
loss: 0.9435786604881287,grad_norm: 0.9318786312777763, iteration: 42430
loss: 1.041293740272522,grad_norm: 0.9895523035194793, iteration: 42431
loss: 1.0164483785629272,grad_norm: 0.8424389135642633, iteration: 42432
loss: 1.0749765634536743,grad_norm: 0.9999994855803823, iteration: 42433
loss: 1.005822777748108,grad_norm: 0.9850454108421367, iteration: 42434
loss: 1.013791799545288,grad_norm: 0.9818573553249745, iteration: 42435
loss: 1.015321135520935,grad_norm: 0.9999994763145855, iteration: 42436
loss: 1.0139981508255005,grad_norm: 0.9999990044800384, iteration: 42437
loss: 0.9886593222618103,grad_norm: 0.8726960977125001, iteration: 42438
loss: 1.005776286125183,grad_norm: 0.8439838526738144, iteration: 42439
loss: 1.0081313848495483,grad_norm: 0.7656113887232048, iteration: 42440
loss: 1.057078242301941,grad_norm: 0.8017561580510508, iteration: 42441
loss: 0.9816662073135376,grad_norm: 0.8376774735470556, iteration: 42442
loss: 0.9858417510986328,grad_norm: 0.879175602484786, iteration: 42443
loss: 0.9589991569519043,grad_norm: 0.9999996279075296, iteration: 42444
loss: 1.0240784883499146,grad_norm: 0.8124200234597404, iteration: 42445
loss: 1.0058563947677612,grad_norm: 0.9856796356118193, iteration: 42446
loss: 1.0345507860183716,grad_norm: 0.9999993294200585, iteration: 42447
loss: 1.0127984285354614,grad_norm: 0.9999992555463678, iteration: 42448
loss: 0.9722440838813782,grad_norm: 0.8917830270351262, iteration: 42449
loss: 1.0196402072906494,grad_norm: 0.8415150836579289, iteration: 42450
loss: 0.9808282852172852,grad_norm: 0.9999991011133244, iteration: 42451
loss: 1.006270170211792,grad_norm: 0.9999996710625174, iteration: 42452
loss: 0.9439281225204468,grad_norm: 0.8659779900553077, iteration: 42453
loss: 0.9802729487419128,grad_norm: 0.9999991595575228, iteration: 42454
loss: 1.0413521528244019,grad_norm: 0.9894690408503708, iteration: 42455
loss: 0.9697770476341248,grad_norm: 0.9557917536894674, iteration: 42456
loss: 0.9713774919509888,grad_norm: 0.9727941436992809, iteration: 42457
loss: 0.9993550777435303,grad_norm: 0.8550010618328494, iteration: 42458
loss: 1.0621342658996582,grad_norm: 0.9999999130600301, iteration: 42459
loss: 1.0245712995529175,grad_norm: 0.9999992034925105, iteration: 42460
loss: 1.0165091753005981,grad_norm: 0.9999990947623049, iteration: 42461
loss: 0.9723641872406006,grad_norm: 0.7660153117858538, iteration: 42462
loss: 1.0688354969024658,grad_norm: 0.9999993426968953, iteration: 42463
loss: 1.0212852954864502,grad_norm: 0.9999990840504559, iteration: 42464
loss: 1.0362275838851929,grad_norm: 0.9999997229835995, iteration: 42465
loss: 1.0004642009735107,grad_norm: 0.9999990710522318, iteration: 42466
loss: 1.012880563735962,grad_norm: 0.999999119169486, iteration: 42467
loss: 0.9616327881813049,grad_norm: 0.999999041463625, iteration: 42468
loss: 1.0189411640167236,grad_norm: 0.9999991087737943, iteration: 42469
loss: 1.0757215023040771,grad_norm: 0.9999995871131895, iteration: 42470
loss: 1.2040647268295288,grad_norm: 0.9999997697215582, iteration: 42471
loss: 1.067245364189148,grad_norm: 0.9999996219175085, iteration: 42472
loss: 1.0002057552337646,grad_norm: 0.9999990949165702, iteration: 42473
loss: 0.9880164861679077,grad_norm: 0.99999921656776, iteration: 42474
loss: 0.9995459914207458,grad_norm: 0.9999991004616015, iteration: 42475
loss: 0.997198760509491,grad_norm: 0.9128286415227393, iteration: 42476
loss: 1.1153759956359863,grad_norm: 0.9999997637487434, iteration: 42477
loss: 1.0454472303390503,grad_norm: 0.9890042457330739, iteration: 42478
loss: 1.0138239860534668,grad_norm: 0.9999990042880096, iteration: 42479
loss: 1.0890604257583618,grad_norm: 0.9999995161170531, iteration: 42480
loss: 0.9893760085105896,grad_norm: 0.9999990154239368, iteration: 42481
loss: 1.022698163986206,grad_norm: 0.9999992840505958, iteration: 42482
loss: 1.022515058517456,grad_norm: 0.9999991858544723, iteration: 42483
loss: 1.000173807144165,grad_norm: 0.696185534051112, iteration: 42484
loss: 1.0790520906448364,grad_norm: 0.9999994031642558, iteration: 42485
loss: 1.0167516469955444,grad_norm: 0.896680684997091, iteration: 42486
loss: 1.044704556465149,grad_norm: 1.0000000263083508, iteration: 42487
loss: 1.0203431844711304,grad_norm: 0.9074239609446447, iteration: 42488
loss: 0.9470288157463074,grad_norm: 0.9574023248549316, iteration: 42489
loss: 1.1096724271774292,grad_norm: 0.9999992639985161, iteration: 42490
loss: 1.0011032819747925,grad_norm: 0.9999992903491072, iteration: 42491
loss: 1.0537062883377075,grad_norm: 0.9999992134997636, iteration: 42492
loss: 1.0336333513259888,grad_norm: 0.9999994842251653, iteration: 42493
loss: 1.0040218830108643,grad_norm: 0.8832141723424293, iteration: 42494
loss: 1.0147804021835327,grad_norm: 0.9999990533917664, iteration: 42495
loss: 1.0449621677398682,grad_norm: 0.9999997402735719, iteration: 42496
loss: 0.9604524970054626,grad_norm: 0.8793336738067349, iteration: 42497
loss: 1.0930845737457275,grad_norm: 0.9999995283513249, iteration: 42498
loss: 0.9894804358482361,grad_norm: 0.9863334690749826, iteration: 42499
loss: 1.0213357210159302,grad_norm: 0.9999993079803003, iteration: 42500
loss: 0.9959168434143066,grad_norm: 0.9999992017984862, iteration: 42501
loss: 0.9910638332366943,grad_norm: 0.938544474658211, iteration: 42502
loss: 1.179872989654541,grad_norm: 0.9999994932655981, iteration: 42503
loss: 0.9873204231262207,grad_norm: 0.9620777893111638, iteration: 42504
loss: 1.0238462686538696,grad_norm: 0.919881056467732, iteration: 42505
loss: 1.0034022331237793,grad_norm: 0.9999992974081102, iteration: 42506
loss: 1.0352011919021606,grad_norm: 0.8843609525806805, iteration: 42507
loss: 1.0296651124954224,grad_norm: 0.9119781609653576, iteration: 42508
loss: 0.9880771636962891,grad_norm: 0.8681416531238638, iteration: 42509
loss: 1.000152826309204,grad_norm: 0.9999992333588771, iteration: 42510
loss: 1.0096845626831055,grad_norm: 0.9261041986277972, iteration: 42511
loss: 1.0298806428909302,grad_norm: 0.9629925223700481, iteration: 42512
loss: 0.9658991098403931,grad_norm: 0.9999990714356126, iteration: 42513
loss: 1.0224372148513794,grad_norm: 0.8617448222939099, iteration: 42514
loss: 1.0408395528793335,grad_norm: 0.9999996118003871, iteration: 42515
loss: 1.0024205446243286,grad_norm: 0.8244093290682191, iteration: 42516
loss: 1.0223134756088257,grad_norm: 1.0000000636395947, iteration: 42517
loss: 0.99408358335495,grad_norm: 0.9999990987877905, iteration: 42518
loss: 0.9914174675941467,grad_norm: 0.8574696037616091, iteration: 42519
loss: 0.9941898584365845,grad_norm: 0.9463038117775283, iteration: 42520
loss: 1.0523877143859863,grad_norm: 0.8868469301212955, iteration: 42521
loss: 0.982888400554657,grad_norm: 0.8586646927362062, iteration: 42522
loss: 1.013447880744934,grad_norm: 0.9999991573473619, iteration: 42523
loss: 0.9702686667442322,grad_norm: 0.893348297758145, iteration: 42524
loss: 0.9989753365516663,grad_norm: 0.94661335813913, iteration: 42525
loss: 1.0059677362442017,grad_norm: 0.999999224556835, iteration: 42526
loss: 0.984921395778656,grad_norm: 0.9543755736721747, iteration: 42527
loss: 1.046850323677063,grad_norm: 0.9441020582162315, iteration: 42528
loss: 0.9914659261703491,grad_norm: 0.9999990373674903, iteration: 42529
loss: 1.0365370512008667,grad_norm: 0.9999994019909256, iteration: 42530
loss: 1.0285537242889404,grad_norm: 0.926261499095507, iteration: 42531
loss: 1.0453274250030518,grad_norm: 0.9999994324513757, iteration: 42532
loss: 0.9978041648864746,grad_norm: 0.9999993228180029, iteration: 42533
loss: 1.0255662202835083,grad_norm: 0.9653776943618613, iteration: 42534
loss: 1.0326482057571411,grad_norm: 0.9999991300959837, iteration: 42535
loss: 1.034203052520752,grad_norm: 0.9999994085007102, iteration: 42536
loss: 0.990332305431366,grad_norm: 0.7669290487266299, iteration: 42537
loss: 1.0437898635864258,grad_norm: 0.9584522239058343, iteration: 42538
loss: 0.997093915939331,grad_norm: 0.9273744087890763, iteration: 42539
loss: 1.0198020935058594,grad_norm: 0.9999990575601434, iteration: 42540
loss: 0.9714804887771606,grad_norm: 0.8729516235546801, iteration: 42541
loss: 0.9981322884559631,grad_norm: 0.8738404562086808, iteration: 42542
loss: 0.9757833480834961,grad_norm: 0.8760343386846494, iteration: 42543
loss: 1.0305887460708618,grad_norm: 0.9416684385863473, iteration: 42544
loss: 1.0009773969650269,grad_norm: 0.9050219055435633, iteration: 42545
loss: 1.0184571743011475,grad_norm: 0.9630777875255624, iteration: 42546
loss: 0.9814549088478088,grad_norm: 0.9999997334383849, iteration: 42547
loss: 1.0026668310165405,grad_norm: 0.9999990767229184, iteration: 42548
loss: 0.9528636932373047,grad_norm: 0.9999991794894494, iteration: 42549
loss: 1.1184099912643433,grad_norm: 0.9339771183733048, iteration: 42550
loss: 0.9680504202842712,grad_norm: 0.9599216429056133, iteration: 42551
loss: 1.006415843963623,grad_norm: 0.9424622892757017, iteration: 42552
loss: 0.9957822561264038,grad_norm: 0.9102262599348588, iteration: 42553
loss: 0.9605004191398621,grad_norm: 0.9733087626305322, iteration: 42554
loss: 0.9747014045715332,grad_norm: 0.938216751495208, iteration: 42555
loss: 0.9686968326568604,grad_norm: 0.9999991571683292, iteration: 42556
loss: 1.0021464824676514,grad_norm: 0.9999990501950572, iteration: 42557
loss: 1.0027737617492676,grad_norm: 0.9999993640502017, iteration: 42558
loss: 1.0515815019607544,grad_norm: 0.9999990404400902, iteration: 42559
loss: 0.9960897564888,grad_norm: 0.9615180628457857, iteration: 42560
loss: 1.0137965679168701,grad_norm: 0.9999991866542041, iteration: 42561
loss: 1.0135018825531006,grad_norm: 0.9999991282017682, iteration: 42562
loss: 0.9830854535102844,grad_norm: 0.9999991167221023, iteration: 42563
loss: 0.9692885279655457,grad_norm: 0.9999988654178196, iteration: 42564
loss: 1.0430750846862793,grad_norm: 0.999999514155829, iteration: 42565
loss: 0.9944579005241394,grad_norm: 0.9999989893712087, iteration: 42566
loss: 1.0375683307647705,grad_norm: 0.9999992208822653, iteration: 42567
loss: 0.9984489679336548,grad_norm: 0.9213107891506639, iteration: 42568
loss: 0.9850138425827026,grad_norm: 0.8123939776495833, iteration: 42569
loss: 1.0005865097045898,grad_norm: 0.8771456429932947, iteration: 42570
loss: 1.0194382667541504,grad_norm: 0.9366118239831505, iteration: 42571
loss: 0.9995692372322083,grad_norm: 0.9999993309175237, iteration: 42572
loss: 0.998620331287384,grad_norm: 0.9999991744397915, iteration: 42573
loss: 1.0792021751403809,grad_norm: 0.9999992705534325, iteration: 42574
loss: 0.9893162250518799,grad_norm: 0.9452339593331114, iteration: 42575
loss: 1.0011061429977417,grad_norm: 0.7695043659515161, iteration: 42576
loss: 0.9899860620498657,grad_norm: 0.9840315708400696, iteration: 42577
loss: 1.0468223094940186,grad_norm: 0.8872235267549496, iteration: 42578
loss: 1.0078074932098389,grad_norm: 0.9024402220585127, iteration: 42579
loss: 1.0234774351119995,grad_norm: 0.9651388841398281, iteration: 42580
loss: 1.0225857496261597,grad_norm: 0.973741399542416, iteration: 42581
loss: 0.9938746094703674,grad_norm: 0.7054135775078423, iteration: 42582
loss: 1.0253715515136719,grad_norm: 0.9999993894768128, iteration: 42583
loss: 1.0135692358016968,grad_norm: 0.9895152998952055, iteration: 42584
loss: 1.0644330978393555,grad_norm: 0.8916367518664634, iteration: 42585
loss: 0.9799105525016785,grad_norm: 0.999999237469686, iteration: 42586
loss: 0.9182618260383606,grad_norm: 0.9999991851047288, iteration: 42587
loss: 1.0134769678115845,grad_norm: 0.9698716912219488, iteration: 42588
loss: 1.0363991260528564,grad_norm: 0.8670318021667794, iteration: 42589
loss: 0.9805687069892883,grad_norm: 0.9496941682420327, iteration: 42590
loss: 0.98515784740448,grad_norm: 0.9999997132934216, iteration: 42591
loss: 1.007993221282959,grad_norm: 0.999999095159203, iteration: 42592
loss: 0.9821141958236694,grad_norm: 0.9433657051851521, iteration: 42593
loss: 0.960064709186554,grad_norm: 0.9988936007222073, iteration: 42594
loss: 1.0246167182922363,grad_norm: 0.9999993651726008, iteration: 42595
loss: 1.0120590925216675,grad_norm: 0.9999991110511555, iteration: 42596
loss: 1.0014503002166748,grad_norm: 0.9009162753167637, iteration: 42597
loss: 0.9646185040473938,grad_norm: 0.9591137891177137, iteration: 42598
loss: 1.0550453662872314,grad_norm: 0.9999991623226115, iteration: 42599
loss: 1.0206924676895142,grad_norm: 0.9937574580985892, iteration: 42600
loss: 0.9752259254455566,grad_norm: 0.9999991041647749, iteration: 42601
loss: 0.9867314696311951,grad_norm: 0.8526192026783993, iteration: 42602
loss: 1.0037144422531128,grad_norm: 0.8034199166193716, iteration: 42603
loss: 1.0087692737579346,grad_norm: 0.9999992101053747, iteration: 42604
loss: 1.0762442350387573,grad_norm: 0.9999990750319387, iteration: 42605
loss: 1.005078673362732,grad_norm: 0.9999993847377959, iteration: 42606
loss: 1.0843231678009033,grad_norm: 0.8901685263442816, iteration: 42607
loss: 1.0338994264602661,grad_norm: 0.9999991148721623, iteration: 42608
loss: 1.0897367000579834,grad_norm: 0.9999988828724606, iteration: 42609
loss: 1.0143516063690186,grad_norm: 0.9885550407711605, iteration: 42610
loss: 1.0591367483139038,grad_norm: 0.972140708460361, iteration: 42611
loss: 1.0417543649673462,grad_norm: 0.9999999078148893, iteration: 42612
loss: 1.0522379875183105,grad_norm: 0.9308758602162727, iteration: 42613
loss: 1.0355509519577026,grad_norm: 0.9999994480207943, iteration: 42614
loss: 0.9781665802001953,grad_norm: 0.8258780047153024, iteration: 42615
loss: 1.0196443796157837,grad_norm: 0.8824714476232974, iteration: 42616
loss: 1.0185836553573608,grad_norm: 0.999999069616547, iteration: 42617
loss: 0.9704998731613159,grad_norm: 0.8673723333964133, iteration: 42618
loss: 1.0667954683303833,grad_norm: 0.999999105916394, iteration: 42619
loss: 0.9879915714263916,grad_norm: 0.8849804242094702, iteration: 42620
loss: 1.0683536529541016,grad_norm: 0.9999990335872407, iteration: 42621
loss: 1.0830177068710327,grad_norm: 0.9999994366223137, iteration: 42622
loss: 1.1622779369354248,grad_norm: 0.9999998897558905, iteration: 42623
loss: 1.0704237222671509,grad_norm: 0.9999996525153849, iteration: 42624
loss: 1.0065202713012695,grad_norm: 0.9999989796381231, iteration: 42625
loss: 0.9893075823783875,grad_norm: 0.9999991415366908, iteration: 42626
loss: 1.081891417503357,grad_norm: 0.9999993396935635, iteration: 42627
loss: 1.0067955255508423,grad_norm: 0.9105367644012209, iteration: 42628
loss: 1.0719677209854126,grad_norm: 0.9999995496258214, iteration: 42629
loss: 1.007245659828186,grad_norm: 0.9970315550439525, iteration: 42630
loss: 1.049226999282837,grad_norm: 0.9348577805359427, iteration: 42631
loss: 1.0199060440063477,grad_norm: 0.9999992644202224, iteration: 42632
loss: 1.0315169095993042,grad_norm: 0.9999994285420561, iteration: 42633
loss: 0.9869468808174133,grad_norm: 0.8013933157899202, iteration: 42634
loss: 0.9850249290466309,grad_norm: 0.8725375775608039, iteration: 42635
loss: 0.9795151352882385,grad_norm: 0.9999990050930163, iteration: 42636
loss: 1.0002390146255493,grad_norm: 0.9999992711174608, iteration: 42637
loss: 1.024499773979187,grad_norm: 0.9271595848965619, iteration: 42638
loss: 1.0440417528152466,grad_norm: 0.9999992714773797, iteration: 42639
loss: 1.0037137269973755,grad_norm: 0.8369080612388836, iteration: 42640
loss: 1.0737178325653076,grad_norm: 0.9999993869170288, iteration: 42641
loss: 1.0289937257766724,grad_norm: 0.999999015930068, iteration: 42642
loss: 1.0099858045578003,grad_norm: 0.9008228931378509, iteration: 42643
loss: 0.9769359827041626,grad_norm: 0.9091927745000011, iteration: 42644
loss: 1.0361799001693726,grad_norm: 0.9999992310498925, iteration: 42645
loss: 0.9823248982429504,grad_norm: 0.9389769816962298, iteration: 42646
loss: 1.0393469333648682,grad_norm: 0.9999990518807875, iteration: 42647
loss: 1.0277397632598877,grad_norm: 0.9999993055480634, iteration: 42648
loss: 1.020356297492981,grad_norm: 0.9999991477975058, iteration: 42649
loss: 0.9809010028839111,grad_norm: 0.9999997709154853, iteration: 42650
loss: 0.989732563495636,grad_norm: 0.9999990397953221, iteration: 42651
loss: 0.9777289032936096,grad_norm: 0.9956184517363961, iteration: 42652
loss: 1.0021979808807373,grad_norm: 0.969428224856422, iteration: 42653
loss: 0.9865579605102539,grad_norm: 0.9999990970151846, iteration: 42654
loss: 0.9799620509147644,grad_norm: 0.7738163774391165, iteration: 42655
loss: 1.0179740190505981,grad_norm: 0.8675134109910161, iteration: 42656
loss: 0.9987869262695312,grad_norm: 0.9999992299710981, iteration: 42657
loss: 1.0334950685501099,grad_norm: 0.999999510319848, iteration: 42658
loss: 1.0140928030014038,grad_norm: 0.9068904726073521, iteration: 42659
loss: 0.9552825093269348,grad_norm: 0.9621025621057705, iteration: 42660
loss: 1.0009905099868774,grad_norm: 0.7991486392559802, iteration: 42661
loss: 1.0009876489639282,grad_norm: 0.9307302493353536, iteration: 42662
loss: 1.0115277767181396,grad_norm: 0.933692199765279, iteration: 42663
loss: 0.9873125553131104,grad_norm: 0.9999994358246176, iteration: 42664
loss: 1.0265706777572632,grad_norm: 0.7368884089868467, iteration: 42665
loss: 1.0112040042877197,grad_norm: 0.9320404052304193, iteration: 42666
loss: 0.9958393573760986,grad_norm: 0.9764462514056786, iteration: 42667
loss: 1.0390492677688599,grad_norm: 0.9999991635995569, iteration: 42668
loss: 0.9879192113876343,grad_norm: 0.8009229975838702, iteration: 42669
loss: 0.9910358786582947,grad_norm: 0.9778476752072863, iteration: 42670
loss: 1.0202313661575317,grad_norm: 0.9999994847669482, iteration: 42671
loss: 0.9963175058364868,grad_norm: 0.7492005663573292, iteration: 42672
loss: 1.0237681865692139,grad_norm: 0.9999989719384362, iteration: 42673
loss: 1.021929383277893,grad_norm: 0.9999992299152858, iteration: 42674
loss: 1.0144292116165161,grad_norm: 0.9999990403510121, iteration: 42675
loss: 1.024910569190979,grad_norm: 0.8904764845050692, iteration: 42676
loss: 1.050988793373108,grad_norm: 0.9999993290046569, iteration: 42677
loss: 1.018955111503601,grad_norm: 0.8041933401343658, iteration: 42678
loss: 1.0045859813690186,grad_norm: 0.9999991232239862, iteration: 42679
loss: 1.0319111347198486,grad_norm: 0.9999994433544622, iteration: 42680
loss: 0.9812539219856262,grad_norm: 0.9999991468738763, iteration: 42681
loss: 0.9916631579399109,grad_norm: 0.9522370370423926, iteration: 42682
loss: 0.9726466536521912,grad_norm: 0.9999991755693604, iteration: 42683
loss: 0.9941504001617432,grad_norm: 0.9729756071149837, iteration: 42684
loss: 0.9531697034835815,grad_norm: 0.8550465901962401, iteration: 42685
loss: 1.0131359100341797,grad_norm: 0.8639064414283275, iteration: 42686
loss: 1.0054593086242676,grad_norm: 0.9899371670284127, iteration: 42687
loss: 0.9939117431640625,grad_norm: 0.999999545292925, iteration: 42688
loss: 1.0337995290756226,grad_norm: 0.8124330006192003, iteration: 42689
loss: 0.994179368019104,grad_norm: 0.8714071319024624, iteration: 42690
loss: 1.037894606590271,grad_norm: 0.9701245352755247, iteration: 42691
loss: 1.0153133869171143,grad_norm: 0.9360476942700042, iteration: 42692
loss: 1.0149095058441162,grad_norm: 0.8173052171815596, iteration: 42693
loss: 1.0234276056289673,grad_norm: 0.9999994550460838, iteration: 42694
loss: 1.0146260261535645,grad_norm: 0.999586218379937, iteration: 42695
loss: 1.0197197198867798,grad_norm: 0.8534030506744742, iteration: 42696
loss: 0.9854789972305298,grad_norm: 0.9999990879682928, iteration: 42697
loss: 1.0639265775680542,grad_norm: 0.9999994477419448, iteration: 42698
loss: 1.0263816118240356,grad_norm: 0.8831081230143154, iteration: 42699
loss: 0.9651033282279968,grad_norm: 0.8746919849358948, iteration: 42700
loss: 1.0216490030288696,grad_norm: 0.9246845179611193, iteration: 42701
loss: 1.0196499824523926,grad_norm: 0.9999991301090928, iteration: 42702
loss: 1.0169678926467896,grad_norm: 0.7429635735729185, iteration: 42703
loss: 0.990200400352478,grad_norm: 0.9030232341131927, iteration: 42704
loss: 1.0309256315231323,grad_norm: 0.9999992891910977, iteration: 42705
loss: 0.9947118163108826,grad_norm: 0.9999992012620443, iteration: 42706
loss: 1.033225655555725,grad_norm: 0.8845601508227955, iteration: 42707
loss: 0.9527214169502258,grad_norm: 0.9204253631914047, iteration: 42708
loss: 0.9915140271186829,grad_norm: 0.9211527358472822, iteration: 42709
loss: 1.0630749464035034,grad_norm: 0.9999997168134545, iteration: 42710
loss: 0.9753684401512146,grad_norm: 0.9193864486979889, iteration: 42711
loss: 1.0084694623947144,grad_norm: 0.9999993360223981, iteration: 42712
loss: 1.0757015943527222,grad_norm: 0.952166302559508, iteration: 42713
loss: 0.9651163220405579,grad_norm: 0.8605097890280718, iteration: 42714
loss: 1.0295820236206055,grad_norm: 0.8420849046080627, iteration: 42715
loss: 0.9742103219032288,grad_norm: 0.6983467130600999, iteration: 42716
loss: 1.0297983884811401,grad_norm: 0.999999357026679, iteration: 42717
loss: 0.991288423538208,grad_norm: 0.797063560489204, iteration: 42718
loss: 1.0276012420654297,grad_norm: 0.9999991634889656, iteration: 42719
loss: 0.9980706572532654,grad_norm: 0.887234521740213, iteration: 42720
loss: 1.0039504766464233,grad_norm: 0.9530975299360913, iteration: 42721
loss: 1.008796215057373,grad_norm: 0.9126421847851043, iteration: 42722
loss: 0.9967294931411743,grad_norm: 0.9143997801446855, iteration: 42723
loss: 1.0492674112319946,grad_norm: 0.9999994769856483, iteration: 42724
loss: 1.004691481590271,grad_norm: 0.9999991320507026, iteration: 42725
loss: 1.0722241401672363,grad_norm: 0.9999992216833637, iteration: 42726
loss: 1.0330455303192139,grad_norm: 0.9947135381039331, iteration: 42727
loss: 0.9879304766654968,grad_norm: 0.9999990521477876, iteration: 42728
loss: 1.0215551853179932,grad_norm: 0.9999992899538606, iteration: 42729
loss: 0.9976213574409485,grad_norm: 0.8190491304902975, iteration: 42730
loss: 1.0327174663543701,grad_norm: 0.9156069218772335, iteration: 42731
loss: 1.0174765586853027,grad_norm: 0.9979532777551071, iteration: 42732
loss: 1.0342888832092285,grad_norm: 0.9438449036422221, iteration: 42733
loss: 1.0117430686950684,grad_norm: 0.9999990769450502, iteration: 42734
loss: 1.0247323513031006,grad_norm: 0.9999990774499958, iteration: 42735
loss: 1.007305383682251,grad_norm: 0.9999990314785964, iteration: 42736
loss: 0.9957548379898071,grad_norm: 0.8423151545214865, iteration: 42737
loss: 0.9813651442527771,grad_norm: 0.8360911304585267, iteration: 42738
loss: 0.9968250393867493,grad_norm: 0.8901734826517046, iteration: 42739
loss: 1.0113154649734497,grad_norm: 0.9999992796524598, iteration: 42740
loss: 1.0044199228286743,grad_norm: 0.9195564049857471, iteration: 42741
loss: 1.00783371925354,grad_norm: 0.9485797925883406, iteration: 42742
loss: 1.0099525451660156,grad_norm: 0.9999991766862346, iteration: 42743
loss: 1.0022344589233398,grad_norm: 0.9999992956273099, iteration: 42744
loss: 1.0637927055358887,grad_norm: 0.9999997026809103, iteration: 42745
loss: 1.007412314414978,grad_norm: 0.8994355606861312, iteration: 42746
loss: 1.017845630645752,grad_norm: 0.9142849474488163, iteration: 42747
loss: 1.0121434926986694,grad_norm: 0.8895383340512248, iteration: 42748
loss: 1.0431723594665527,grad_norm: 0.9999991564578008, iteration: 42749
loss: 0.9823909997940063,grad_norm: 0.9999992219064004, iteration: 42750
loss: 0.9933473467826843,grad_norm: 0.9999997719120592, iteration: 42751
loss: 0.985984206199646,grad_norm: 0.9999990905260215, iteration: 42752
loss: 1.0298112630844116,grad_norm: 0.9529799733461362, iteration: 42753
loss: 0.9996240735054016,grad_norm: 0.8998336563472854, iteration: 42754
loss: 1.0431808233261108,grad_norm: 0.9383754392025195, iteration: 42755
loss: 1.0119118690490723,grad_norm: 0.9965964857848509, iteration: 42756
loss: 0.9811044931411743,grad_norm: 0.8389973611636824, iteration: 42757
loss: 0.9878746271133423,grad_norm: 0.9437358096505148, iteration: 42758
loss: 1.0299595594406128,grad_norm: 0.9999994983551672, iteration: 42759
loss: 1.03452467918396,grad_norm: 0.9245205044718995, iteration: 42760
loss: 1.0235332250595093,grad_norm: 0.999999562332588, iteration: 42761
loss: 0.9730892181396484,grad_norm: 0.7765399723997197, iteration: 42762
loss: 1.0339604616165161,grad_norm: 0.9999992127148735, iteration: 42763
loss: 0.9605266451835632,grad_norm: 0.964890429679878, iteration: 42764
loss: 0.9581350088119507,grad_norm: 0.9016381388943568, iteration: 42765
loss: 1.004326581954956,grad_norm: 0.9999990127037783, iteration: 42766
loss: 0.9853191375732422,grad_norm: 0.9999991939532075, iteration: 42767
loss: 0.9921350479125977,grad_norm: 0.9999992016129827, iteration: 42768
loss: 1.035422682762146,grad_norm: 0.9899487574252981, iteration: 42769
loss: 0.9929160475730896,grad_norm: 0.9999991280293783, iteration: 42770
loss: 1.0323412418365479,grad_norm: 0.8442194522807246, iteration: 42771
loss: 0.9792442321777344,grad_norm: 0.8954534144446723, iteration: 42772
loss: 1.0625715255737305,grad_norm: 0.9999990096248181, iteration: 42773
loss: 1.0282407999038696,grad_norm: 0.8562316764744953, iteration: 42774
loss: 1.0508626699447632,grad_norm: 0.9999996267882201, iteration: 42775
loss: 0.9937276840209961,grad_norm: 0.9814955059689029, iteration: 42776
loss: 0.9554818868637085,grad_norm: 0.8693902932649821, iteration: 42777
loss: 1.0470911264419556,grad_norm: 0.9999996606418946, iteration: 42778
loss: 0.9865018725395203,grad_norm: 0.9523416347929308, iteration: 42779
loss: 1.010369062423706,grad_norm: 0.9999995951879137, iteration: 42780
loss: 1.1147032976150513,grad_norm: 0.9999992120424768, iteration: 42781
loss: 1.0972610712051392,grad_norm: 0.9999994327616517, iteration: 42782
loss: 1.182065486907959,grad_norm: 0.9999998751875822, iteration: 42783
loss: 1.0121015310287476,grad_norm: 0.9999991214032155, iteration: 42784
loss: 1.0220298767089844,grad_norm: 0.9999992879951204, iteration: 42785
loss: 1.0209887027740479,grad_norm: 0.9999991817137774, iteration: 42786
loss: 1.0301337242126465,grad_norm: 0.9998754579486518, iteration: 42787
loss: 1.020572543144226,grad_norm: 0.9358640315842237, iteration: 42788
loss: 1.040457010269165,grad_norm: 0.9033278697735532, iteration: 42789
loss: 1.0315258502960205,grad_norm: 0.9769646831944369, iteration: 42790
loss: 0.982177197933197,grad_norm: 0.9465911892072202, iteration: 42791
loss: 0.9952122569084167,grad_norm: 0.957623792978142, iteration: 42792
loss: 1.0015125274658203,grad_norm: 0.895497624259067, iteration: 42793
loss: 1.0511358976364136,grad_norm: 0.9999998145194062, iteration: 42794
loss: 0.993092954158783,grad_norm: 0.8843075633115279, iteration: 42795
loss: 1.0114420652389526,grad_norm: 0.7526403223213163, iteration: 42796
loss: 1.0127869844436646,grad_norm: 0.9999997440982704, iteration: 42797
loss: 1.0190415382385254,grad_norm: 0.9999993412635498, iteration: 42798
loss: 1.0763516426086426,grad_norm: 0.9999991184727653, iteration: 42799
loss: 0.9974943995475769,grad_norm: 0.9999992659109959, iteration: 42800
loss: 1.0474666357040405,grad_norm: 0.9999995421589547, iteration: 42801
loss: 1.015148401260376,grad_norm: 0.9999990471822598, iteration: 42802
loss: 1.0223537683486938,grad_norm: 0.8667442961542775, iteration: 42803
loss: 0.9901425838470459,grad_norm: 0.8332675769265967, iteration: 42804
loss: 0.9812639355659485,grad_norm: 0.9961016701686584, iteration: 42805
loss: 0.9991917014122009,grad_norm: 0.9999994253018759, iteration: 42806
loss: 0.9989827275276184,grad_norm: 0.9348898918114503, iteration: 42807
loss: 0.9830386638641357,grad_norm: 0.960159344356854, iteration: 42808
loss: 0.9733046889305115,grad_norm: 0.9999991765952185, iteration: 42809
loss: 1.0253041982650757,grad_norm: 0.9999991339694853, iteration: 42810
loss: 1.0100876092910767,grad_norm: 0.9999992127167256, iteration: 42811
loss: 1.0930722951889038,grad_norm: 0.9999995467731596, iteration: 42812
loss: 1.0036635398864746,grad_norm: 0.9999995051995654, iteration: 42813
loss: 1.0121058225631714,grad_norm: 0.9999993020272987, iteration: 42814
loss: 0.9807429909706116,grad_norm: 0.7577393410193889, iteration: 42815
loss: 0.9838218092918396,grad_norm: 0.9999991831143421, iteration: 42816
loss: 0.98687744140625,grad_norm: 0.9020193778857859, iteration: 42817
loss: 1.006973147392273,grad_norm: 0.9199643873698273, iteration: 42818
loss: 1.0271559953689575,grad_norm: 0.999999509213439, iteration: 42819
loss: 1.0868061780929565,grad_norm: 0.9999994819545653, iteration: 42820
loss: 1.0234313011169434,grad_norm: 0.9048045721910754, iteration: 42821
loss: 1.0371060371398926,grad_norm: 0.9999991443202595, iteration: 42822
loss: 1.073236346244812,grad_norm: 0.999999494718708, iteration: 42823
loss: 1.015993356704712,grad_norm: 0.9999991126359851, iteration: 42824
loss: 1.0069540739059448,grad_norm: 0.9999992435003205, iteration: 42825
loss: 0.969348669052124,grad_norm: 0.9984110274063493, iteration: 42826
loss: 1.0518571138381958,grad_norm: 0.999999539036911, iteration: 42827
loss: 0.9832829833030701,grad_norm: 0.914927858088934, iteration: 42828
loss: 1.1449276208877563,grad_norm: 0.9999997040054948, iteration: 42829
loss: 1.0043022632598877,grad_norm: 0.9076050523979889, iteration: 42830
loss: 0.9541462063789368,grad_norm: 0.9366550484830298, iteration: 42831
loss: 1.0313166379928589,grad_norm: 0.9999997369311523, iteration: 42832
loss: 1.0489448308944702,grad_norm: 0.9981618613447016, iteration: 42833
loss: 0.9867093563079834,grad_norm: 0.9124479247447311, iteration: 42834
loss: 0.9987229704856873,grad_norm: 0.8882689323534813, iteration: 42835
loss: 1.0756711959838867,grad_norm: 0.9999993054128344, iteration: 42836
loss: 1.018362283706665,grad_norm: 0.9754907828343464, iteration: 42837
loss: 1.0083776712417603,grad_norm: 0.9682527819045855, iteration: 42838
loss: 0.9803435206413269,grad_norm: 0.8776818388194846, iteration: 42839
loss: 0.9485185742378235,grad_norm: 0.9212980674357714, iteration: 42840
loss: 1.0610615015029907,grad_norm: 0.9999992959398115, iteration: 42841
loss: 1.019585132598877,grad_norm: 0.9999991159944287, iteration: 42842
loss: 1.0341298580169678,grad_norm: 0.9999999245960637, iteration: 42843
loss: 1.0270332098007202,grad_norm: 0.9999993928464034, iteration: 42844
loss: 1.0197858810424805,grad_norm: 0.999999235134015, iteration: 42845
loss: 1.0108367204666138,grad_norm: 0.999999699815267, iteration: 42846
loss: 1.0051720142364502,grad_norm: 0.9309147017574917, iteration: 42847
loss: 0.9782310724258423,grad_norm: 0.9999989790577736, iteration: 42848
loss: 1.0999536514282227,grad_norm: 0.8971345411140372, iteration: 42849
loss: 1.0424458980560303,grad_norm: 0.999999021398853, iteration: 42850
loss: 0.9968432784080505,grad_norm: 0.999999090892142, iteration: 42851
loss: 1.0310777425765991,grad_norm: 0.9118625077361415, iteration: 42852
loss: 1.083884358406067,grad_norm: 0.9999995956950231, iteration: 42853
loss: 1.003727674484253,grad_norm: 0.9999990758476069, iteration: 42854
loss: 0.9793212413787842,grad_norm: 0.9999991396768488, iteration: 42855
loss: 1.005767822265625,grad_norm: 0.9999990767441762, iteration: 42856
loss: 1.0535961389541626,grad_norm: 0.9999992977812457, iteration: 42857
loss: 1.0160884857177734,grad_norm: 0.9999996824473036, iteration: 42858
loss: 1.0148952007293701,grad_norm: 0.9999992155578645, iteration: 42859
loss: 0.9923557043075562,grad_norm: 0.8355926303611505, iteration: 42860
loss: 1.0275589227676392,grad_norm: 0.9212045988407527, iteration: 42861
loss: 1.00589120388031,grad_norm: 0.999999093613491, iteration: 42862
loss: 0.9451478719711304,grad_norm: 0.9999990140818288, iteration: 42863
loss: 0.9870873093605042,grad_norm: 0.8004727420479004, iteration: 42864
loss: 1.0250508785247803,grad_norm: 0.9999993849341227, iteration: 42865
loss: 1.0078461170196533,grad_norm: 0.883137088228739, iteration: 42866
loss: 0.9836037158966064,grad_norm: 0.8843198377283928, iteration: 42867
loss: 1.007941484451294,grad_norm: 0.9061368195650905, iteration: 42868
loss: 0.9938150644302368,grad_norm: 0.999999155224136, iteration: 42869
loss: 1.009184718132019,grad_norm: 0.9999993803725712, iteration: 42870
loss: 1.014775276184082,grad_norm: 0.9093543849397434, iteration: 42871
loss: 0.9859194159507751,grad_norm: 0.9999991021465902, iteration: 42872
loss: 1.0098435878753662,grad_norm: 0.9999991221873309, iteration: 42873
loss: 0.9935423135757446,grad_norm: 0.8807312850931718, iteration: 42874
loss: 1.0030498504638672,grad_norm: 0.999999307159465, iteration: 42875
loss: 0.9822929501533508,grad_norm: 0.971980621759069, iteration: 42876
loss: 0.9764347076416016,grad_norm: 0.9999995626876099, iteration: 42877
loss: 1.0014771223068237,grad_norm: 0.9560771155918928, iteration: 42878
loss: 0.9997249841690063,grad_norm: 0.946042279155845, iteration: 42879
loss: 1.0319130420684814,grad_norm: 0.9408555449132477, iteration: 42880
loss: 0.9868021011352539,grad_norm: 0.8737864599794773, iteration: 42881
loss: 1.0303456783294678,grad_norm: 0.8584780947935754, iteration: 42882
loss: 0.9956578016281128,grad_norm: 0.9999993947463832, iteration: 42883
loss: 0.9809538722038269,grad_norm: 0.8021446179987066, iteration: 42884
loss: 1.0114625692367554,grad_norm: 0.9999991731218616, iteration: 42885
loss: 1.0182957649230957,grad_norm: 0.9999994077042107, iteration: 42886
loss: 1.0274485349655151,grad_norm: 0.9999990784576854, iteration: 42887
loss: 1.0131875276565552,grad_norm: 0.9999990094356852, iteration: 42888
loss: 1.0437471866607666,grad_norm: 0.9999992203027068, iteration: 42889
loss: 1.012065052986145,grad_norm: 0.9999991278898904, iteration: 42890
loss: 1.0367470979690552,grad_norm: 0.999999036635821, iteration: 42891
loss: 1.0003619194030762,grad_norm: 0.8063738625783136, iteration: 42892
loss: 0.9548436403274536,grad_norm: 0.7879061503571136, iteration: 42893
loss: 1.0246801376342773,grad_norm: 0.9999990303782217, iteration: 42894
loss: 1.0129376649856567,grad_norm: 0.9747576291135606, iteration: 42895
loss: 1.0234986543655396,grad_norm: 0.9999990262735177, iteration: 42896
loss: 1.06116783618927,grad_norm: 0.9999993218371661, iteration: 42897
loss: 0.996194064617157,grad_norm: 0.7942435528438249, iteration: 42898
loss: 0.9933695793151855,grad_norm: 0.8897995194411806, iteration: 42899
loss: 1.0196589231491089,grad_norm: 0.9999990175170249, iteration: 42900
loss: 1.0613139867782593,grad_norm: 0.9999993846084598, iteration: 42901
loss: 1.0562713146209717,grad_norm: 0.999999712141062, iteration: 42902
loss: 1.0520020723342896,grad_norm: 0.9999993697187974, iteration: 42903
loss: 1.0143256187438965,grad_norm: 0.9949406398072853, iteration: 42904
loss: 1.0047084093093872,grad_norm: 0.9487915628411023, iteration: 42905
loss: 1.0003769397735596,grad_norm: 0.9571865762636866, iteration: 42906
loss: 1.0015138387680054,grad_norm: 0.9999994841872896, iteration: 42907
loss: 1.0290054082870483,grad_norm: 0.9999992661698205, iteration: 42908
loss: 0.9919286370277405,grad_norm: 0.9181529577594123, iteration: 42909
loss: 1.0031591653823853,grad_norm: 0.9966830147668266, iteration: 42910
loss: 1.0116029977798462,grad_norm: 0.9999989501630577, iteration: 42911
loss: 0.999630331993103,grad_norm: 0.999999095518805, iteration: 42912
loss: 1.0323033332824707,grad_norm: 0.9999995161620626, iteration: 42913
loss: 1.049768328666687,grad_norm: 0.960270215218131, iteration: 42914
loss: 1.0265662670135498,grad_norm: 0.9999999208582147, iteration: 42915
loss: 0.9919248819351196,grad_norm: 0.8739249566501397, iteration: 42916
loss: 1.0126675367355347,grad_norm: 0.9999991395578957, iteration: 42917
loss: 1.0068117380142212,grad_norm: 0.9811629802541688, iteration: 42918
loss: 0.9779692888259888,grad_norm: 0.9999991881373607, iteration: 42919
loss: 1.0202503204345703,grad_norm: 0.9644730889021476, iteration: 42920
loss: 1.0129765272140503,grad_norm: 0.9999992368554209, iteration: 42921
loss: 1.0535171031951904,grad_norm: 1.000000019685567, iteration: 42922
loss: 0.9897257685661316,grad_norm: 0.9835687704791397, iteration: 42923
loss: 1.050413966178894,grad_norm: 0.9999993157156063, iteration: 42924
loss: 0.9940686225891113,grad_norm: 0.8706261135716672, iteration: 42925
loss: 1.0413618087768555,grad_norm: 0.9999999358723408, iteration: 42926
loss: 1.0531353950500488,grad_norm: 0.9999992547662638, iteration: 42927
loss: 1.0284429788589478,grad_norm: 0.9999990212650928, iteration: 42928
loss: 1.0560832023620605,grad_norm: 0.9999993279853285, iteration: 42929
loss: 1.0571701526641846,grad_norm: 0.9999990693316588, iteration: 42930
loss: 1.0235669612884521,grad_norm: 0.9999996938144127, iteration: 42931
loss: 1.0165826082229614,grad_norm: 0.8076623275462481, iteration: 42932
loss: 1.013606309890747,grad_norm: 0.9999995299373138, iteration: 42933
loss: 1.037333369255066,grad_norm: 0.9999999871304502, iteration: 42934
loss: 1.0044881105422974,grad_norm: 0.9999990232966404, iteration: 42935
loss: 1.071584939956665,grad_norm: 0.9956752632626157, iteration: 42936
loss: 0.9961724281311035,grad_norm: 0.9999995935060707, iteration: 42937
loss: 1.0133219957351685,grad_norm: 0.9647478838722449, iteration: 42938
loss: 1.0339879989624023,grad_norm: 0.9999999126346777, iteration: 42939
loss: 1.315265417098999,grad_norm: 0.9999998797695032, iteration: 42940
loss: 1.0117881298065186,grad_norm: 0.9999991858249038, iteration: 42941
loss: 1.026788353919983,grad_norm: 0.9319098816290373, iteration: 42942
loss: 1.0301735401153564,grad_norm: 0.8287259115931651, iteration: 42943
loss: 1.0281590223312378,grad_norm: 0.9999993282830517, iteration: 42944
loss: 1.0615284442901611,grad_norm: 0.892486329458228, iteration: 42945
loss: 1.0040018558502197,grad_norm: 0.9999995201923533, iteration: 42946
loss: 1.0365523099899292,grad_norm: 0.9385029257500722, iteration: 42947
loss: 1.0218477249145508,grad_norm: 0.9854172492530802, iteration: 42948
loss: 1.0049668550491333,grad_norm: 0.7993399285470613, iteration: 42949
loss: 1.004980206489563,grad_norm: 0.9652446045881925, iteration: 42950
loss: 1.0129743814468384,grad_norm: 0.9258617919365394, iteration: 42951
loss: 0.9952585101127625,grad_norm: 0.7386494786734809, iteration: 42952
loss: 1.0080782175064087,grad_norm: 0.9999990474585346, iteration: 42953
loss: 0.9799574017524719,grad_norm: 0.9999991205202671, iteration: 42954
loss: 1.0111433267593384,grad_norm: 0.9290131578875065, iteration: 42955
loss: 1.0039571523666382,grad_norm: 0.8170374331156773, iteration: 42956
loss: 1.0124248266220093,grad_norm: 0.8084260586606555, iteration: 42957
loss: 1.0003665685653687,grad_norm: 0.999999181304302, iteration: 42958
loss: 1.0411626100540161,grad_norm: 0.8989785165680579, iteration: 42959
loss: 1.0193228721618652,grad_norm: 0.7971087700846304, iteration: 42960
loss: 1.0386444330215454,grad_norm: 0.9999994401856953, iteration: 42961
loss: 0.9436745047569275,grad_norm: 0.9672240557403352, iteration: 42962
loss: 1.0022826194763184,grad_norm: 0.9999991060129446, iteration: 42963
loss: 1.043425440788269,grad_norm: 0.9999990791839203, iteration: 42964
loss: 1.0042222738265991,grad_norm: 0.9999997024606998, iteration: 42965
loss: 0.9802526235580444,grad_norm: 0.9999990806604705, iteration: 42966
loss: 0.999281108379364,grad_norm: 0.99999895903197, iteration: 42967
loss: 1.0078701972961426,grad_norm: 0.9999992073189239, iteration: 42968
loss: 0.9816877245903015,grad_norm: 0.999999170053812, iteration: 42969
loss: 0.9883860945701599,grad_norm: 0.8241238310128267, iteration: 42970
loss: 0.9720736145973206,grad_norm: 0.9999990045419987, iteration: 42971
loss: 1.0460633039474487,grad_norm: 0.9999991123002885, iteration: 42972
loss: 1.0298476219177246,grad_norm: 0.8208611223614306, iteration: 42973
loss: 1.0317085981369019,grad_norm: 0.9999992247599726, iteration: 42974
loss: 0.9940388202667236,grad_norm: 0.9273663687332134, iteration: 42975
loss: 1.0044546127319336,grad_norm: 0.9999991108115899, iteration: 42976
loss: 1.0115114450454712,grad_norm: 0.8585843417928936, iteration: 42977
loss: 1.0441380739212036,grad_norm: 0.9999990125787914, iteration: 42978
loss: 1.0307350158691406,grad_norm: 0.9999990045329716, iteration: 42979
loss: 1.076684832572937,grad_norm: 0.999999455552283, iteration: 42980
loss: 1.0262370109558105,grad_norm: 0.9999993237020096, iteration: 42981
loss: 1.0480566024780273,grad_norm: 0.9999998759608854, iteration: 42982
loss: 1.0085105895996094,grad_norm: 0.9352002096118878, iteration: 42983
loss: 0.9944359064102173,grad_norm: 0.8340151288535075, iteration: 42984
loss: 1.0609749555587769,grad_norm: 0.9704243226010265, iteration: 42985
loss: 1.0102518796920776,grad_norm: 0.9521540433984172, iteration: 42986
loss: 1.0634394884109497,grad_norm: 0.9999998830603376, iteration: 42987
loss: 0.9782872796058655,grad_norm: 0.9999995620412202, iteration: 42988
loss: 1.145727515220642,grad_norm: 0.9999997267988134, iteration: 42989
loss: 1.0952026844024658,grad_norm: 0.9999993877259956, iteration: 42990
loss: 0.9860268831253052,grad_norm: 0.9999992981384823, iteration: 42991
loss: 1.0119282007217407,grad_norm: 0.9999994839908197, iteration: 42992
loss: 1.0189238786697388,grad_norm: 0.9999992067084633, iteration: 42993
loss: 0.9822672009468079,grad_norm: 0.9999991749469077, iteration: 42994
loss: 1.388893723487854,grad_norm: 0.9999995919722158, iteration: 42995
loss: 1.6560049057006836,grad_norm: 0.9999998784016179, iteration: 42996
loss: 0.9993724822998047,grad_norm: 0.9999994815372655, iteration: 42997
loss: 1.0537596940994263,grad_norm: 0.9999998749800487, iteration: 42998
loss: 1.0930376052856445,grad_norm: 0.9999996908410579, iteration: 42999
loss: 1.0078001022338867,grad_norm: 0.9552986354270214, iteration: 43000
loss: 1.1659985780715942,grad_norm: 0.9999995763902915, iteration: 43001
loss: 1.1525404453277588,grad_norm: 0.9999998274893054, iteration: 43002
loss: 1.0153250694274902,grad_norm: 0.961442081389631, iteration: 43003
loss: 1.0159094333648682,grad_norm: 0.999999114761568, iteration: 43004
loss: 1.0093501806259155,grad_norm: 0.999999361013462, iteration: 43005
loss: 1.0253398418426514,grad_norm: 0.9999994113051379, iteration: 43006
loss: 1.0359909534454346,grad_norm: 0.9999998143257208, iteration: 43007
loss: 1.014812707901001,grad_norm: 0.9999995256497982, iteration: 43008
loss: 1.0536446571350098,grad_norm: 0.9999997666291277, iteration: 43009
loss: 1.1212115287780762,grad_norm: 0.9999996131124876, iteration: 43010
loss: 1.3238383531570435,grad_norm: 0.9999997249569176, iteration: 43011
loss: 1.0418542623519897,grad_norm: 0.9999995044629374, iteration: 43012
loss: 1.008180022239685,grad_norm: 0.9999997990961709, iteration: 43013
loss: 0.9716758131980896,grad_norm: 0.9612434335924398, iteration: 43014
loss: 1.0070035457611084,grad_norm: 0.9999993004361201, iteration: 43015
loss: 1.0100181102752686,grad_norm: 0.9999993580689865, iteration: 43016
loss: 1.024710774421692,grad_norm: 0.9002354054905145, iteration: 43017
loss: 1.061057686805725,grad_norm: 0.8621692817449457, iteration: 43018
loss: 1.0106867551803589,grad_norm: 0.9999989443622945, iteration: 43019
loss: 1.0019563436508179,grad_norm: 0.9999991517842975, iteration: 43020
loss: 0.9796996116638184,grad_norm: 0.9999992219137295, iteration: 43021
loss: 1.133529543876648,grad_norm: 0.9999998945636407, iteration: 43022
loss: 1.0246851444244385,grad_norm: 0.9999991831908395, iteration: 43023
loss: 1.0112377405166626,grad_norm: 0.9999991240975754, iteration: 43024
loss: 0.9866568446159363,grad_norm: 0.9999994356066162, iteration: 43025
loss: 1.078018069267273,grad_norm: 0.9999993388120154, iteration: 43026
loss: 0.9631143808364868,grad_norm: 0.9999995676179758, iteration: 43027
loss: 1.0172646045684814,grad_norm: 0.9290455312354245, iteration: 43028
loss: 0.9996499419212341,grad_norm: 0.9185167672135637, iteration: 43029
loss: 1.0258073806762695,grad_norm: 0.9999995937900124, iteration: 43030
loss: 1.0414150953292847,grad_norm: 0.9999993940414206, iteration: 43031
loss: 1.0683213472366333,grad_norm: 0.9999993912573462, iteration: 43032
loss: 0.9561204314231873,grad_norm: 0.9766686390579026, iteration: 43033
loss: 1.0187091827392578,grad_norm: 0.9999990835377838, iteration: 43034
loss: 1.0798053741455078,grad_norm: 0.9999995571745162, iteration: 43035
loss: 1.0209190845489502,grad_norm: 0.9999991080804865, iteration: 43036
loss: 1.0157890319824219,grad_norm: 0.7877552534511548, iteration: 43037
loss: 1.0232183933258057,grad_norm: 0.9999993613226957, iteration: 43038
loss: 1.049410104751587,grad_norm: 0.999999925185122, iteration: 43039
loss: 1.0014057159423828,grad_norm: 0.9532428850916481, iteration: 43040
loss: 0.9854670166969299,grad_norm: 0.9999990920088899, iteration: 43041
loss: 1.0496025085449219,grad_norm: 0.9999997653081079, iteration: 43042
loss: 1.0064946413040161,grad_norm: 0.8893487961073817, iteration: 43043
loss: 1.014119029045105,grad_norm: 0.9736362227091562, iteration: 43044
loss: 0.9976155757904053,grad_norm: 0.915994728647454, iteration: 43045
loss: 1.063768982887268,grad_norm: 0.9999992671316397, iteration: 43046
loss: 1.0193742513656616,grad_norm: 0.9999993516409299, iteration: 43047
loss: 1.0651055574417114,grad_norm: 0.9999997262671343, iteration: 43048
loss: 1.0291469097137451,grad_norm: 0.9999990397673715, iteration: 43049
loss: 0.9894856810569763,grad_norm: 0.9999992192517051, iteration: 43050
loss: 0.9768874049186707,grad_norm: 0.9999990913690171, iteration: 43051
loss: 1.0584055185317993,grad_norm: 0.9999992389649262, iteration: 43052
loss: 1.0000638961791992,grad_norm: 0.9999994605150155, iteration: 43053
loss: 1.0179587602615356,grad_norm: 0.9999990357829472, iteration: 43054
loss: 0.9861939549446106,grad_norm: 0.8706822001784013, iteration: 43055
loss: 0.9993190765380859,grad_norm: 0.9999993786630117, iteration: 43056
loss: 0.982710063457489,grad_norm: 0.9678445090229607, iteration: 43057
loss: 1.0895272493362427,grad_norm: 0.9999996978433939, iteration: 43058
loss: 1.0303092002868652,grad_norm: 0.9999994101057439, iteration: 43059
loss: 1.0047677755355835,grad_norm: 0.9705381916387811, iteration: 43060
loss: 0.9914063811302185,grad_norm: 0.9999992806174617, iteration: 43061
loss: 0.9795371890068054,grad_norm: 0.8404856361558618, iteration: 43062
loss: 1.005646824836731,grad_norm: 0.8954478289731892, iteration: 43063
loss: 1.037410020828247,grad_norm: 0.9216785410859183, iteration: 43064
loss: 1.048368215560913,grad_norm: 0.9240090481675944, iteration: 43065
loss: 1.0280698537826538,grad_norm: 0.8731571933325868, iteration: 43066
loss: 1.01206636428833,grad_norm: 0.9382487850044513, iteration: 43067
loss: 1.0056530237197876,grad_norm: 0.9999994068186842, iteration: 43068
loss: 0.9677872657775879,grad_norm: 0.9999992377619317, iteration: 43069
loss: 1.0307984352111816,grad_norm: 0.9999990990322931, iteration: 43070
loss: 1.0221354961395264,grad_norm: 0.8280239122673356, iteration: 43071
loss: 0.9921664595603943,grad_norm: 0.9999991102482508, iteration: 43072
loss: 1.0446490049362183,grad_norm: 0.9999991109321177, iteration: 43073
loss: 0.9818196892738342,grad_norm: 0.9999990527937901, iteration: 43074
loss: 1.020496129989624,grad_norm: 0.9999990214756392, iteration: 43075
loss: 1.1075990200042725,grad_norm: 0.9999993156017558, iteration: 43076
loss: 1.0061007738113403,grad_norm: 0.9403013726844034, iteration: 43077
loss: 1.0002084970474243,grad_norm: 0.940437215018482, iteration: 43078
loss: 1.231231451034546,grad_norm: 0.9999993192045374, iteration: 43079
loss: 1.0621074438095093,grad_norm: 0.9999995292509126, iteration: 43080
loss: 0.9719619750976562,grad_norm: 0.96414975383597, iteration: 43081
loss: 0.9913881421089172,grad_norm: 0.9999991332820389, iteration: 43082
loss: 0.9906318187713623,grad_norm: 0.9999990316517917, iteration: 43083
loss: 0.9925516843795776,grad_norm: 0.9999991354476624, iteration: 43084
loss: 1.0454336404800415,grad_norm: 0.9999992392677741, iteration: 43085
loss: 1.0150213241577148,grad_norm: 0.9664599427571091, iteration: 43086
loss: 1.0117735862731934,grad_norm: 0.9999992370182327, iteration: 43087
loss: 1.0321139097213745,grad_norm: 0.9999991661738593, iteration: 43088
loss: 0.9871814250946045,grad_norm: 0.874152401150477, iteration: 43089
loss: 0.9826173782348633,grad_norm: 0.8331861224711035, iteration: 43090
loss: 0.975067675113678,grad_norm: 0.8969572071811993, iteration: 43091
loss: 1.0091257095336914,grad_norm: 0.9027845854949846, iteration: 43092
loss: 0.9848703742027283,grad_norm: 0.9364229811521416, iteration: 43093
loss: 1.001386046409607,grad_norm: 0.9241781276223732, iteration: 43094
loss: 0.9970596432685852,grad_norm: 0.8445105576510877, iteration: 43095
loss: 1.1064527034759521,grad_norm: 0.9999996531651737, iteration: 43096
loss: 1.0137757062911987,grad_norm: 0.9999997403292723, iteration: 43097
loss: 1.0180714130401611,grad_norm: 0.9999996325487985, iteration: 43098
loss: 1.0416181087493896,grad_norm: 0.9965790517204951, iteration: 43099
loss: 1.0032734870910645,grad_norm: 0.999999847468054, iteration: 43100
loss: 1.017514705657959,grad_norm: 0.9999996088718468, iteration: 43101
loss: 1.0234770774841309,grad_norm: 0.9999999680095412, iteration: 43102
loss: 1.0241018533706665,grad_norm: 0.7974464608802356, iteration: 43103
loss: 1.004679799079895,grad_norm: 0.9566161551938457, iteration: 43104
loss: 0.9906401634216309,grad_norm: 0.9999990870615406, iteration: 43105
loss: 0.9725202322006226,grad_norm: 0.9143304159339803, iteration: 43106
loss: 1.044524073600769,grad_norm: 0.9668422546141608, iteration: 43107
loss: 1.0266969203948975,grad_norm: 0.9999989518284722, iteration: 43108
loss: 0.9945857524871826,grad_norm: 0.862401466547181, iteration: 43109
loss: 1.0253238677978516,grad_norm: 0.8140467999368086, iteration: 43110
loss: 1.0078855752944946,grad_norm: 0.999999173379806, iteration: 43111
loss: 1.0262523889541626,grad_norm: 0.9999991927869334, iteration: 43112
loss: 0.9814369082450867,grad_norm: 0.9999990298013436, iteration: 43113
loss: 0.9841800928115845,grad_norm: 0.99999926284079, iteration: 43114
loss: 1.0096677541732788,grad_norm: 0.9999990925387336, iteration: 43115
loss: 1.0475348234176636,grad_norm: 0.9999991678523158, iteration: 43116
loss: 0.9786649346351624,grad_norm: 0.9999998381401901, iteration: 43117
loss: 0.9700235724449158,grad_norm: 0.9735966774931898, iteration: 43118
loss: 1.010445475578308,grad_norm: 0.9198564603612345, iteration: 43119
loss: 1.010637879371643,grad_norm: 0.9999991592704809, iteration: 43120
loss: 1.031539797782898,grad_norm: 0.9277630969955764, iteration: 43121
loss: 1.0073719024658203,grad_norm: 0.9125211650778335, iteration: 43122
loss: 0.9662940502166748,grad_norm: 0.9999991446483806, iteration: 43123
loss: 1.013447642326355,grad_norm: 0.9999993500702287, iteration: 43124
loss: 1.03518545627594,grad_norm: 0.9999990048281041, iteration: 43125
loss: 0.9766515493392944,grad_norm: 0.9999990659141784, iteration: 43126
loss: 1.0068973302841187,grad_norm: 0.8576507745221638, iteration: 43127
loss: 1.002954125404358,grad_norm: 0.9999996956646339, iteration: 43128
loss: 0.9913613200187683,grad_norm: 0.8780991545666965, iteration: 43129
loss: 1.015939474105835,grad_norm: 0.9024276223495128, iteration: 43130
loss: 1.0245511531829834,grad_norm: 0.854856861558562, iteration: 43131
loss: 0.9852444529533386,grad_norm: 0.9999990758657131, iteration: 43132
loss: 0.9926168918609619,grad_norm: 0.9999988904785032, iteration: 43133
loss: 0.9747449159622192,grad_norm: 0.9771479680205747, iteration: 43134
loss: 1.0149401426315308,grad_norm: 0.8456241169108902, iteration: 43135
loss: 1.083017110824585,grad_norm: 0.9750770820332612, iteration: 43136
loss: 1.006357192993164,grad_norm: 0.7802999642773023, iteration: 43137
loss: 1.0163638591766357,grad_norm: 0.8845976509747524, iteration: 43138
loss: 1.0001400709152222,grad_norm: 0.9999991604332955, iteration: 43139
loss: 1.0615708827972412,grad_norm: 0.9999992290731086, iteration: 43140
loss: 1.0306122303009033,grad_norm: 0.8387010917375932, iteration: 43141
loss: 0.9586737155914307,grad_norm: 0.8850611622835051, iteration: 43142
loss: 0.9867411851882935,grad_norm: 0.9999990707986518, iteration: 43143
loss: 0.9729381799697876,grad_norm: 0.9999991562807762, iteration: 43144
loss: 0.9992299675941467,grad_norm: 0.9999992244810867, iteration: 43145
loss: 1.0313305854797363,grad_norm: 0.9494198380471355, iteration: 43146
loss: 1.0023882389068604,grad_norm: 0.8608400864663988, iteration: 43147
loss: 1.0411750078201294,grad_norm: 0.9999995774124699, iteration: 43148
loss: 0.9924852252006531,grad_norm: 0.9999989678386355, iteration: 43149
loss: 0.9976051449775696,grad_norm: 0.9999993702225309, iteration: 43150
loss: 0.9641663432121277,grad_norm: 0.8565359410526315, iteration: 43151
loss: 1.0219030380249023,grad_norm: 0.9999991058701369, iteration: 43152
loss: 1.084558129310608,grad_norm: 0.9999996201080518, iteration: 43153
loss: 0.9779417514801025,grad_norm: 0.8980473245864274, iteration: 43154
loss: 0.9736220240592957,grad_norm: 0.8663302542301063, iteration: 43155
loss: 1.016861915588379,grad_norm: 0.9999992448139803, iteration: 43156
loss: 1.0614855289459229,grad_norm: 0.9999996639757894, iteration: 43157
loss: 0.9999187588691711,grad_norm: 0.9332314763391987, iteration: 43158
loss: 0.9900924563407898,grad_norm: 0.9999992165836298, iteration: 43159
loss: 1.0104942321777344,grad_norm: 0.9942784154688845, iteration: 43160
loss: 1.007081151008606,grad_norm: 0.87211921156542, iteration: 43161
loss: 1.0136191844940186,grad_norm: 0.9999991200449376, iteration: 43162
loss: 1.012055516242981,grad_norm: 0.7930000906388204, iteration: 43163
loss: 1.0132896900177002,grad_norm: 0.747958465574084, iteration: 43164
loss: 1.0741740465164185,grad_norm: 0.9999996155623854, iteration: 43165
loss: 0.9927646517753601,grad_norm: 0.8733153638761523, iteration: 43166
loss: 1.0051379203796387,grad_norm: 0.858241105881532, iteration: 43167
loss: 1.0130170583724976,grad_norm: 0.7325684321686227, iteration: 43168
loss: 1.0272384881973267,grad_norm: 0.9244831483436325, iteration: 43169
loss: 0.9881793260574341,grad_norm: 0.9999992589653605, iteration: 43170
loss: 1.024628758430481,grad_norm: 0.99999922985348, iteration: 43171
loss: 0.9799800515174866,grad_norm: 0.9999991413982174, iteration: 43172
loss: 1.0262396335601807,grad_norm: 0.9999999053765035, iteration: 43173
loss: 0.9668523073196411,grad_norm: 0.9347587319139333, iteration: 43174
loss: 0.9636004567146301,grad_norm: 0.7472923713054848, iteration: 43175
loss: 1.0305542945861816,grad_norm: 0.9999992157107533, iteration: 43176
loss: 1.0486336946487427,grad_norm: 0.9999996778244671, iteration: 43177
loss: 0.9917960166931152,grad_norm: 0.9999994043012137, iteration: 43178
loss: 1.0792829990386963,grad_norm: 0.999999207373449, iteration: 43179
loss: 1.1354217529296875,grad_norm: 0.9999998192427089, iteration: 43180
loss: 1.028778076171875,grad_norm: 0.8553960245572183, iteration: 43181
loss: 0.979907214641571,grad_norm: 0.7970329676975596, iteration: 43182
loss: 1.0355569124221802,grad_norm: 0.9978305369824814, iteration: 43183
loss: 0.9749062657356262,grad_norm: 0.9181463691981113, iteration: 43184
loss: 0.9870493412017822,grad_norm: 0.9999990841474388, iteration: 43185
loss: 0.9763861298561096,grad_norm: 0.9789298732851819, iteration: 43186
loss: 1.001152515411377,grad_norm: 0.7886828431184615, iteration: 43187
loss: 1.0550822019577026,grad_norm: 0.9292176810911967, iteration: 43188
loss: 0.9669666886329651,grad_norm: 0.999999446483458, iteration: 43189
loss: 1.0634585618972778,grad_norm: 0.999999805642306, iteration: 43190
loss: 0.9987533092498779,grad_norm: 0.9999992839506248, iteration: 43191
loss: 0.9735279679298401,grad_norm: 0.9241177509117783, iteration: 43192
loss: 1.1629595756530762,grad_norm: 0.9999999239978186, iteration: 43193
loss: 0.9908420443534851,grad_norm: 0.8739797633058469, iteration: 43194
loss: 0.9892810583114624,grad_norm: 0.9999992040933045, iteration: 43195
loss: 0.9838213920593262,grad_norm: 0.9999994662115684, iteration: 43196
loss: 1.0144360065460205,grad_norm: 0.7795425934294505, iteration: 43197
loss: 1.0906299352645874,grad_norm: 0.9999997581185085, iteration: 43198
loss: 1.055040955543518,grad_norm: 0.999999222881319, iteration: 43199
loss: 0.9941940307617188,grad_norm: 0.9999993450826217, iteration: 43200
loss: 1.21600341796875,grad_norm: 0.9999999776766115, iteration: 43201
loss: 1.134522557258606,grad_norm: 0.9999995967528376, iteration: 43202
loss: 1.0638153553009033,grad_norm: 0.999999615424462, iteration: 43203
loss: 0.9807469248771667,grad_norm: 0.8540568468174389, iteration: 43204
loss: 1.0102027654647827,grad_norm: 0.9999991328099068, iteration: 43205
loss: 1.0089917182922363,grad_norm: 0.9999993270393234, iteration: 43206
loss: 1.1221140623092651,grad_norm: 0.9999995896153724, iteration: 43207
loss: 1.0711429119110107,grad_norm: 0.9999999711793398, iteration: 43208
loss: 1.0346827507019043,grad_norm: 0.999999723701256, iteration: 43209
loss: 1.0499377250671387,grad_norm: 0.999999138352006, iteration: 43210
loss: 1.0355629920959473,grad_norm: 0.8432321866386792, iteration: 43211
loss: 0.9859540462493896,grad_norm: 0.9897415591506175, iteration: 43212
loss: 0.970043957233429,grad_norm: 0.9999991559883977, iteration: 43213
loss: 1.0138627290725708,grad_norm: 0.9999995356231393, iteration: 43214
loss: 1.026992678642273,grad_norm: 0.878291563780823, iteration: 43215
loss: 1.0841617584228516,grad_norm: 0.9914431765528667, iteration: 43216
loss: 1.0300432443618774,grad_norm: 0.993180067012141, iteration: 43217
loss: 1.005569577217102,grad_norm: 0.9093053452649648, iteration: 43218
loss: 1.0337783098220825,grad_norm: 0.8566143450534354, iteration: 43219
loss: 1.1071542501449585,grad_norm: 0.9999991938421721, iteration: 43220
loss: 1.0051050186157227,grad_norm: 0.9999991097880707, iteration: 43221
loss: 1.0345646142959595,grad_norm: 0.9498179588625766, iteration: 43222
loss: 1.0310441255569458,grad_norm: 0.999999128254069, iteration: 43223
loss: 0.9993259310722351,grad_norm: 0.9999991871912204, iteration: 43224
loss: 1.0301800966262817,grad_norm: 0.9999995162636035, iteration: 43225
loss: 0.9988012313842773,grad_norm: 0.8536467147887569, iteration: 43226
loss: 0.9972303509712219,grad_norm: 0.999168919021665, iteration: 43227
loss: 1.0154707431793213,grad_norm: 0.9612733510828982, iteration: 43228
loss: 1.1466021537780762,grad_norm: 0.9999996648015199, iteration: 43229
loss: 1.0438637733459473,grad_norm: 0.9999994218967049, iteration: 43230
loss: 1.0222196578979492,grad_norm: 0.9397937509434736, iteration: 43231
loss: 0.9538528323173523,grad_norm: 0.9132907055313536, iteration: 43232
loss: 1.0287415981292725,grad_norm: 0.8776382979394549, iteration: 43233
loss: 0.9888151288032532,grad_norm: 0.9999991377719422, iteration: 43234
loss: 0.9993712902069092,grad_norm: 0.9999992882513562, iteration: 43235
loss: 0.9901580810546875,grad_norm: 0.9999990914350207, iteration: 43236
loss: 1.0265613794326782,grad_norm: 0.992916629455372, iteration: 43237
loss: 1.005187749862671,grad_norm: 0.9325011892975603, iteration: 43238
loss: 1.0407592058181763,grad_norm: 0.8559567724610903, iteration: 43239
loss: 1.0496547222137451,grad_norm: 0.9489917521444281, iteration: 43240
loss: 1.0142486095428467,grad_norm: 0.8616109634459587, iteration: 43241
loss: 1.0095183849334717,grad_norm: 0.9999991549371003, iteration: 43242
loss: 1.0470553636550903,grad_norm: 0.9999991925513607, iteration: 43243
loss: 1.0174803733825684,grad_norm: 0.9999996127780374, iteration: 43244
loss: 1.0310715436935425,grad_norm: 0.9999995117719517, iteration: 43245
loss: 1.0125601291656494,grad_norm: 0.9338598475327902, iteration: 43246
loss: 1.0749543905258179,grad_norm: 0.9999995031228482, iteration: 43247
loss: 0.9920784831047058,grad_norm: 0.8637257624342362, iteration: 43248
loss: 0.962194561958313,grad_norm: 0.9999990757946373, iteration: 43249
loss: 1.0284874439239502,grad_norm: 0.9999994831610439, iteration: 43250
loss: 0.97593092918396,grad_norm: 0.921970976848236, iteration: 43251
loss: 0.9688361287117004,grad_norm: 0.9465607786322194, iteration: 43252
loss: 0.9913780689239502,grad_norm: 0.9381557147522435, iteration: 43253
loss: 0.9719573259353638,grad_norm: 0.9491304787131236, iteration: 43254
loss: 1.0338302850723267,grad_norm: 0.999999383702286, iteration: 43255
loss: 0.9986950755119324,grad_norm: 0.9481638361401011, iteration: 43256
loss: 1.0125254392623901,grad_norm: 0.9999993722238233, iteration: 43257
loss: 1.0433096885681152,grad_norm: 0.9999993556969657, iteration: 43258
loss: 1.0425591468811035,grad_norm: 0.9999991881955608, iteration: 43259
loss: 0.9981104135513306,grad_norm: 0.8585765966636488, iteration: 43260
loss: 1.0021131038665771,grad_norm: 0.9999990306922426, iteration: 43261
loss: 0.9867725968360901,grad_norm: 0.9999991597899094, iteration: 43262
loss: 1.0311546325683594,grad_norm: 0.9999992226963937, iteration: 43263
loss: 1.0419971942901611,grad_norm: 0.9999995127684771, iteration: 43264
loss: 1.0042191743850708,grad_norm: 0.9132757684570584, iteration: 43265
loss: 1.0802338123321533,grad_norm: 0.9999995925766054, iteration: 43266
loss: 1.0169800519943237,grad_norm: 0.9999991108177785, iteration: 43267
loss: 1.013486623764038,grad_norm: 0.9284812855978828, iteration: 43268
loss: 1.0118823051452637,grad_norm: 0.9999990500893233, iteration: 43269
loss: 1.0016188621520996,grad_norm: 0.9078998186254785, iteration: 43270
loss: 0.997802734375,grad_norm: 0.9999991199739678, iteration: 43271
loss: 0.9831658005714417,grad_norm: 0.904920355773197, iteration: 43272
loss: 1.035270094871521,grad_norm: 0.999999691911319, iteration: 43273
loss: 1.0171846151351929,grad_norm: 0.8032438642433449, iteration: 43274
loss: 0.997107982635498,grad_norm: 0.9999992101150006, iteration: 43275
loss: 0.987250804901123,grad_norm: 0.9405361172703539, iteration: 43276
loss: 0.9835073351860046,grad_norm: 0.9999991185025618, iteration: 43277
loss: 0.9941067695617676,grad_norm: 0.9386695848795802, iteration: 43278
loss: 0.9921640753746033,grad_norm: 0.9999993620181901, iteration: 43279
loss: 0.998443603515625,grad_norm: 0.9014921760856492, iteration: 43280
loss: 1.0248889923095703,grad_norm: 0.9999995824094536, iteration: 43281
loss: 1.0181989669799805,grad_norm: 0.9999990763050268, iteration: 43282
loss: 1.004945158958435,grad_norm: 0.9999992060151597, iteration: 43283
loss: 1.0153471231460571,grad_norm: 0.9999994840498387, iteration: 43284
loss: 1.030540943145752,grad_norm: 0.9999995771319413, iteration: 43285
loss: 0.9736545085906982,grad_norm: 0.9372553488316896, iteration: 43286
loss: 1.0120513439178467,grad_norm: 0.9867880015692182, iteration: 43287
loss: 1.0047028064727783,grad_norm: 0.8841952741066499, iteration: 43288
loss: 0.9856272339820862,grad_norm: 0.9180944173395504, iteration: 43289
loss: 1.0069423913955688,grad_norm: 0.999999261818246, iteration: 43290
loss: 1.0252279043197632,grad_norm: 0.9999992507140163, iteration: 43291
loss: 1.0379453897476196,grad_norm: 0.9999990045517667, iteration: 43292
loss: 1.017842173576355,grad_norm: 0.8033989407180594, iteration: 43293
loss: 1.007914423942566,grad_norm: 0.9828319481705269, iteration: 43294
loss: 1.0232094526290894,grad_norm: 0.9999990350275133, iteration: 43295
loss: 1.023916244506836,grad_norm: 0.861055533177209, iteration: 43296
loss: 0.9838809370994568,grad_norm: 0.9083431300145824, iteration: 43297
loss: 0.9942509531974792,grad_norm: 0.9999991723682629, iteration: 43298
loss: 0.9909413456916809,grad_norm: 0.8088271928077471, iteration: 43299
loss: 1.0120561122894287,grad_norm: 0.9999991300001457, iteration: 43300
loss: 0.9747142195701599,grad_norm: 0.8294703523086964, iteration: 43301
loss: 1.0201234817504883,grad_norm: 0.9802797530037354, iteration: 43302
loss: 1.0193921327590942,grad_norm: 0.9057391107156108, iteration: 43303
loss: 0.979249894618988,grad_norm: 0.9999991761121072, iteration: 43304
loss: 1.0228619575500488,grad_norm: 0.8315512987594136, iteration: 43305
loss: 0.991678774356842,grad_norm: 0.8610445636518501, iteration: 43306
loss: 0.9803889393806458,grad_norm: 0.786621999007957, iteration: 43307
loss: 1.012939691543579,grad_norm: 0.9999991914095085, iteration: 43308
loss: 1.0262070894241333,grad_norm: 0.9183907733325684, iteration: 43309
loss: 0.9906097650527954,grad_norm: 0.9999990935871503, iteration: 43310
loss: 1.0110359191894531,grad_norm: 0.9198058169803335, iteration: 43311
loss: 1.017288327217102,grad_norm: 0.9999992697298681, iteration: 43312
loss: 1.0415929555892944,grad_norm: 0.893101025262243, iteration: 43313
loss: 1.0089762210845947,grad_norm: 0.9877325853021728, iteration: 43314
loss: 1.0166704654693604,grad_norm: 0.8473993358589332, iteration: 43315
loss: 1.0077674388885498,grad_norm: 0.8100938516308942, iteration: 43316
loss: 0.9863433241844177,grad_norm: 0.9532575764159488, iteration: 43317
loss: 0.9986838102340698,grad_norm: 0.8788650283309672, iteration: 43318
loss: 1.003070592880249,grad_norm: 0.9224056614693118, iteration: 43319
loss: 1.0800306797027588,grad_norm: 0.9999992594874144, iteration: 43320
loss: 1.0091935396194458,grad_norm: 0.999999239830326, iteration: 43321
loss: 1.005416750907898,grad_norm: 0.9999994290087018, iteration: 43322
loss: 1.0057028532028198,grad_norm: 0.9581963101369947, iteration: 43323
loss: 1.0285271406173706,grad_norm: 0.9263091427121657, iteration: 43324
loss: 0.9959057569503784,grad_norm: 0.9285805999880948, iteration: 43325
loss: 0.9885234832763672,grad_norm: 0.769289716385892, iteration: 43326
loss: 0.9834509491920471,grad_norm: 0.9999990411084817, iteration: 43327
loss: 0.9995288848876953,grad_norm: 0.9778351955459066, iteration: 43328
loss: 1.0015867948532104,grad_norm: 0.9999993674382298, iteration: 43329
loss: 1.0089975595474243,grad_norm: 0.947061304701257, iteration: 43330
loss: 1.024803638458252,grad_norm: 0.8238468276131616, iteration: 43331
loss: 1.00505530834198,grad_norm: 0.7944929322972205, iteration: 43332
loss: 0.9912110567092896,grad_norm: 0.9999991275229031, iteration: 43333
loss: 1.023378610610962,grad_norm: 0.9415516024103315, iteration: 43334
loss: 1.0797497034072876,grad_norm: 0.9999995886564311, iteration: 43335
loss: 1.0242904424667358,grad_norm: 0.9999990678051316, iteration: 43336
loss: 1.017873764038086,grad_norm: 0.9141563607741635, iteration: 43337
loss: 1.0140050649642944,grad_norm: 0.9999995944066247, iteration: 43338
loss: 0.9547749757766724,grad_norm: 0.9234141948177925, iteration: 43339
loss: 0.9928708076477051,grad_norm: 0.8794514972431726, iteration: 43340
loss: 0.9923047423362732,grad_norm: 0.9824329682292112, iteration: 43341
loss: 1.0256075859069824,grad_norm: 0.9341328070694316, iteration: 43342
loss: 0.9605444073677063,grad_norm: 0.9781908931898188, iteration: 43343
loss: 0.9964799284934998,grad_norm: 0.999999044220736, iteration: 43344
loss: 1.0131837129592896,grad_norm: 0.9067101603715482, iteration: 43345
loss: 0.9954711198806763,grad_norm: 0.855160714032265, iteration: 43346
loss: 1.0148355960845947,grad_norm: 0.9748976569711677, iteration: 43347
loss: 1.0335655212402344,grad_norm: 0.9417267232924811, iteration: 43348
loss: 1.012044072151184,grad_norm: 0.9409928288730066, iteration: 43349
loss: 0.958926260471344,grad_norm: 0.8821721093274606, iteration: 43350
loss: 0.9997197985649109,grad_norm: 0.9999991623806053, iteration: 43351
loss: 1.01325523853302,grad_norm: 0.9999990558844943, iteration: 43352
loss: 0.9855213761329651,grad_norm: 0.9543631513895018, iteration: 43353
loss: 0.9878548979759216,grad_norm: 0.9732691456369482, iteration: 43354
loss: 0.9923315048217773,grad_norm: 0.9999990343448407, iteration: 43355
loss: 1.016227126121521,grad_norm: 0.9999991873805243, iteration: 43356
loss: 1.0075321197509766,grad_norm: 0.8817214677501918, iteration: 43357
loss: 0.9812683463096619,grad_norm: 0.8421446248260509, iteration: 43358
loss: 1.029604196548462,grad_norm: 0.9999991167979352, iteration: 43359
loss: 1.138695478439331,grad_norm: 0.966656560036458, iteration: 43360
loss: 0.9978291392326355,grad_norm: 0.9999991310387768, iteration: 43361
loss: 1.1031814813613892,grad_norm: 0.9999997258649066, iteration: 43362
loss: 1.0188108682632446,grad_norm: 0.999999263141579, iteration: 43363
loss: 1.2314811944961548,grad_norm: 0.9999996493669575, iteration: 43364
loss: 1.1996946334838867,grad_norm: 0.9999997214404109, iteration: 43365
loss: 1.048802137374878,grad_norm: 0.9999990105363625, iteration: 43366
loss: 1.1085882186889648,grad_norm: 0.9999990899586961, iteration: 43367
loss: 1.034545660018921,grad_norm: 0.8522030514283868, iteration: 43368
loss: 1.0306694507598877,grad_norm: 0.9999998910843478, iteration: 43369
loss: 0.9838455319404602,grad_norm: 0.9999991326635307, iteration: 43370
loss: 1.1710875034332275,grad_norm: 0.9999992673458474, iteration: 43371
loss: 1.0642026662826538,grad_norm: 0.9999997704045307, iteration: 43372
loss: 1.180308222770691,grad_norm: 0.9999994144768539, iteration: 43373
loss: 0.9797266721725464,grad_norm: 0.9999992818544038, iteration: 43374
loss: 0.9948517084121704,grad_norm: 0.9999990394531927, iteration: 43375
loss: 0.9997236728668213,grad_norm: 0.9999990792481457, iteration: 43376
loss: 0.9563351273536682,grad_norm: 0.9210233639829116, iteration: 43377
loss: 0.9902031421661377,grad_norm: 0.9999990383186174, iteration: 43378
loss: 1.0109857320785522,grad_norm: 0.9999991700024085, iteration: 43379
loss: 0.9841267466545105,grad_norm: 0.8334748163445458, iteration: 43380
loss: 1.025352954864502,grad_norm: 0.9999992565359073, iteration: 43381
loss: 1.0131834745407104,grad_norm: 0.9999991054453131, iteration: 43382
loss: 1.002984881401062,grad_norm: 0.9999998556914287, iteration: 43383
loss: 0.988078236579895,grad_norm: 0.8610568493402022, iteration: 43384
loss: 0.9906737804412842,grad_norm: 0.8692737427509029, iteration: 43385
loss: 0.9929659962654114,grad_norm: 0.9460864963959669, iteration: 43386
loss: 0.9933052659034729,grad_norm: 0.9999995289136101, iteration: 43387
loss: 0.9816094636917114,grad_norm: 0.999998997451666, iteration: 43388
loss: 0.9900963306427002,grad_norm: 0.9999994936887009, iteration: 43389
loss: 1.0940934419631958,grad_norm: 0.9999996677356524, iteration: 43390
loss: 0.9884931445121765,grad_norm: 0.9485054295843209, iteration: 43391
loss: 1.1263391971588135,grad_norm: 0.9999997267275301, iteration: 43392
loss: 1.1873661279678345,grad_norm: 0.9999996987350713, iteration: 43393
loss: 0.9962210655212402,grad_norm: 0.9999991358535071, iteration: 43394
loss: 0.9596741199493408,grad_norm: 0.9999990554708227, iteration: 43395
loss: 1.1046383380889893,grad_norm: 0.9999991735113162, iteration: 43396
loss: 1.1109250783920288,grad_norm: 0.9999993899949401, iteration: 43397
loss: 0.9387377500534058,grad_norm: 0.9246151961555175, iteration: 43398
loss: 0.9948962330818176,grad_norm: 0.9999990370783762, iteration: 43399
loss: 1.2173645496368408,grad_norm: 0.9999997377248782, iteration: 43400
loss: 1.2196177244186401,grad_norm: 0.999999651783994, iteration: 43401
loss: 1.0352858304977417,grad_norm: 0.9999993167702421, iteration: 43402
loss: 0.9900768399238586,grad_norm: 0.999999022991242, iteration: 43403
loss: 0.9962771534919739,grad_norm: 0.9999992447904388, iteration: 43404
loss: 1.227256417274475,grad_norm: 0.9999996487910496, iteration: 43405
loss: 1.0072380304336548,grad_norm: 0.9296241488567003, iteration: 43406
loss: 1.0878231525421143,grad_norm: 0.9999991434082062, iteration: 43407
loss: 1.0720189809799194,grad_norm: 0.9999991728041332, iteration: 43408
loss: 1.1100190877914429,grad_norm: 0.9999998517985096, iteration: 43409
loss: 1.0079686641693115,grad_norm: 0.9755332948021181, iteration: 43410
loss: 1.0473096370697021,grad_norm: 0.9999995743298176, iteration: 43411
loss: 0.9904015064239502,grad_norm: 0.9334910450647206, iteration: 43412
loss: 1.014815092086792,grad_norm: 0.9436685961286955, iteration: 43413
loss: 1.0637869834899902,grad_norm: 0.9999995748818692, iteration: 43414
loss: 1.0209065675735474,grad_norm: 0.8414907213794556, iteration: 43415
loss: 1.0766180753707886,grad_norm: 0.8747560895527133, iteration: 43416
loss: 1.0213972330093384,grad_norm: 0.9221667728145986, iteration: 43417
loss: 1.0880814790725708,grad_norm: 0.9999994914629166, iteration: 43418
loss: 0.9775521755218506,grad_norm: 0.9999991187372974, iteration: 43419
loss: 0.9913683533668518,grad_norm: 0.9999996619421111, iteration: 43420
loss: 0.9948335289955139,grad_norm: 0.9700586552433283, iteration: 43421
loss: 1.0506889820098877,grad_norm: 0.9999990340313243, iteration: 43422
loss: 1.0348566770553589,grad_norm: 0.9679565065029767, iteration: 43423
loss: 1.03842294216156,grad_norm: 0.9999995913896714, iteration: 43424
loss: 1.0168379545211792,grad_norm: 0.8797754010651642, iteration: 43425
loss: 0.9901612997055054,grad_norm: 0.8021709789872613, iteration: 43426
loss: 1.0093672275543213,grad_norm: 0.9999991358990737, iteration: 43427
loss: 1.0319733619689941,grad_norm: 0.9999993448111849, iteration: 43428
loss: 0.9930341839790344,grad_norm: 0.9999990603949347, iteration: 43429
loss: 0.9991530179977417,grad_norm: 0.9385448313228082, iteration: 43430
loss: 1.06303071975708,grad_norm: 0.9999992308484809, iteration: 43431
loss: 1.033597469329834,grad_norm: 0.9999993357175223, iteration: 43432
loss: 1.0366125106811523,grad_norm: 0.9999991478395679, iteration: 43433
loss: 1.016850471496582,grad_norm: 0.8301998551726079, iteration: 43434
loss: 1.0708459615707397,grad_norm: 0.9999997883659842, iteration: 43435
loss: 1.012954831123352,grad_norm: 0.9999997190276696, iteration: 43436
loss: 1.0295555591583252,grad_norm: 0.982679207055204, iteration: 43437
loss: 0.9849003553390503,grad_norm: 0.7625015073450073, iteration: 43438
loss: 1.0114309787750244,grad_norm: 0.9999995239828015, iteration: 43439
loss: 1.045697569847107,grad_norm: 0.8980264527734311, iteration: 43440
loss: 1.0300956964492798,grad_norm: 0.9999992154149222, iteration: 43441
loss: 1.0363876819610596,grad_norm: 0.9999990122282908, iteration: 43442
loss: 0.9860342741012573,grad_norm: 0.8536131779084006, iteration: 43443
loss: 1.0288896560668945,grad_norm: 0.9999993130523, iteration: 43444
loss: 1.0589780807495117,grad_norm: 0.9999993870925551, iteration: 43445
loss: 1.016906976699829,grad_norm: 0.9999993949788274, iteration: 43446
loss: 1.016609787940979,grad_norm: 0.9999990330320199, iteration: 43447
loss: 1.0651085376739502,grad_norm: 0.9999997480610582, iteration: 43448
loss: 1.0219242572784424,grad_norm: 0.904011110014781, iteration: 43449
loss: 1.0412615537643433,grad_norm: 0.9999991632674288, iteration: 43450
loss: 1.062793493270874,grad_norm: 0.9999994480364113, iteration: 43451
loss: 1.0116190910339355,grad_norm: 0.9999992396736895, iteration: 43452
loss: 0.9637998342514038,grad_norm: 0.8425481331955743, iteration: 43453
loss: 0.9903748631477356,grad_norm: 0.9999992581949766, iteration: 43454
loss: 1.0212637186050415,grad_norm: 0.9999992614190323, iteration: 43455
loss: 1.0089359283447266,grad_norm: 0.9999989726841049, iteration: 43456
loss: 1.0502369403839111,grad_norm: 0.9999996027241325, iteration: 43457
loss: 1.0879393815994263,grad_norm: 0.9999992293372445, iteration: 43458
loss: 1.0146373510360718,grad_norm: 0.9999990882756641, iteration: 43459
loss: 1.0046923160552979,grad_norm: 0.8633668942691961, iteration: 43460
loss: 1.0678216218948364,grad_norm: 0.9999996322028366, iteration: 43461
loss: 1.0233216285705566,grad_norm: 0.9999996367068443, iteration: 43462
loss: 1.0135940313339233,grad_norm: 0.9999991018086567, iteration: 43463
loss: 1.1507835388183594,grad_norm: 0.9999996412225041, iteration: 43464
loss: 0.9863972067832947,grad_norm: 0.9999990428414697, iteration: 43465
loss: 0.9929578304290771,grad_norm: 0.9115238003668852, iteration: 43466
loss: 0.9820324778556824,grad_norm: 0.8445878243519076, iteration: 43467
loss: 1.0840139389038086,grad_norm: 0.999999707074563, iteration: 43468
loss: 0.9820374846458435,grad_norm: 0.9999991598900906, iteration: 43469
loss: 1.0113707780838013,grad_norm: 0.9999995268136411, iteration: 43470
loss: 1.014230489730835,grad_norm: 0.763885397282682, iteration: 43471
loss: 1.0947211980819702,grad_norm: 0.9999993199993062, iteration: 43472
loss: 1.1112717390060425,grad_norm: 0.9999999122879093, iteration: 43473
loss: 1.0095112323760986,grad_norm: 0.8861008908622515, iteration: 43474
loss: 1.0645571947097778,grad_norm: 0.9999993825769232, iteration: 43475
loss: 0.9985852241516113,grad_norm: 0.8704397832384426, iteration: 43476
loss: 0.9853928685188293,grad_norm: 0.981792721089501, iteration: 43477
loss: 1.0198512077331543,grad_norm: 0.9999993221804327, iteration: 43478
loss: 1.0312360525131226,grad_norm: 0.8853183608165621, iteration: 43479
loss: 1.0227866172790527,grad_norm: 0.9999990780428598, iteration: 43480
loss: 1.0776560306549072,grad_norm: 0.9999992003496241, iteration: 43481
loss: 1.076330304145813,grad_norm: 0.9999991607073, iteration: 43482
loss: 1.0809521675109863,grad_norm: 0.9999997420991531, iteration: 43483
loss: 1.004240870475769,grad_norm: 0.9999992170197907, iteration: 43484
loss: 1.0227582454681396,grad_norm: 0.9999992739096164, iteration: 43485
loss: 0.9750747680664062,grad_norm: 0.9999991083901709, iteration: 43486
loss: 0.9831933379173279,grad_norm: 0.9344557404945203, iteration: 43487
loss: 0.9761338829994202,grad_norm: 0.9999991791956306, iteration: 43488
loss: 0.9822114109992981,grad_norm: 0.9999990321417723, iteration: 43489
loss: 0.9988996386528015,grad_norm: 0.837559904150296, iteration: 43490
loss: 1.0069177150726318,grad_norm: 0.9494237662537893, iteration: 43491
loss: 1.0620464086532593,grad_norm: 0.9999989379231394, iteration: 43492
loss: 1.1274323463439941,grad_norm: 0.9999995286336617, iteration: 43493
loss: 1.0817643404006958,grad_norm: 0.9999991123841826, iteration: 43494
loss: 0.9875363707542419,grad_norm: 0.9999997794093626, iteration: 43495
loss: 1.0053980350494385,grad_norm: 0.8881146410671041, iteration: 43496
loss: 1.0420348644256592,grad_norm: 0.9999993274220815, iteration: 43497
loss: 1.0541448593139648,grad_norm: 0.9999996153573082, iteration: 43498
loss: 1.0197993516921997,grad_norm: 0.9117558168737473, iteration: 43499
loss: 1.006095290184021,grad_norm: 0.9999995810639268, iteration: 43500
loss: 1.0442688465118408,grad_norm: 0.9999992284041918, iteration: 43501
loss: 1.0265480279922485,grad_norm: 0.9999991902938932, iteration: 43502
loss: 1.0264650583267212,grad_norm: 0.9999995720952048, iteration: 43503
loss: 1.0034384727478027,grad_norm: 0.9064788506134813, iteration: 43504
loss: 1.0156292915344238,grad_norm: 0.8925737438012002, iteration: 43505
loss: 1.0306954383850098,grad_norm: 0.9999990825226601, iteration: 43506
loss: 1.061687707901001,grad_norm: 0.9999994354778766, iteration: 43507
loss: 1.0095020532608032,grad_norm: 0.9999991043955162, iteration: 43508
loss: 1.0179400444030762,grad_norm: 0.9999990950192573, iteration: 43509
loss: 1.0171523094177246,grad_norm: 0.9999991122622434, iteration: 43510
loss: 0.9773768782615662,grad_norm: 0.9999992061174201, iteration: 43511
loss: 1.1154916286468506,grad_norm: 0.9999998310421513, iteration: 43512
loss: 1.0690827369689941,grad_norm: 0.9999990050690646, iteration: 43513
loss: 1.0290746688842773,grad_norm: 0.9999991966840333, iteration: 43514
loss: 0.9942748546600342,grad_norm: 0.9871485974349699, iteration: 43515
loss: 1.0155912637710571,grad_norm: 0.9999992078811475, iteration: 43516
loss: 0.9670537114143372,grad_norm: 0.999999152750757, iteration: 43517
loss: 0.9990285634994507,grad_norm: 0.999999281753099, iteration: 43518
loss: 1.0161346197128296,grad_norm: 0.85327273788778, iteration: 43519
loss: 1.1185873746871948,grad_norm: 0.9999999624825042, iteration: 43520
loss: 1.0264348983764648,grad_norm: 0.9999993345645366, iteration: 43521
loss: 1.0268131494522095,grad_norm: 0.999999247933209, iteration: 43522
loss: 0.9754196405410767,grad_norm: 0.999999105899725, iteration: 43523
loss: 1.0405216217041016,grad_norm: 0.9999993545742613, iteration: 43524
loss: 1.0029221773147583,grad_norm: 0.8642925750262835, iteration: 43525
loss: 0.9995163679122925,grad_norm: 0.9179031989441696, iteration: 43526
loss: 1.0389279127120972,grad_norm: 0.8510359413947485, iteration: 43527
loss: 1.0200107097625732,grad_norm: 0.9999990287446056, iteration: 43528
loss: 0.9893504977226257,grad_norm: 0.9999989072210715, iteration: 43529
loss: 0.9868292808532715,grad_norm: 0.9999997819407846, iteration: 43530
loss: 0.9853758215904236,grad_norm: 0.9999990167906981, iteration: 43531
loss: 0.9894503355026245,grad_norm: 0.9999989551757287, iteration: 43532
loss: 0.9789589643478394,grad_norm: 0.8954177075006868, iteration: 43533
loss: 1.1552369594573975,grad_norm: 0.9999996723145566, iteration: 43534
loss: 1.0345615148544312,grad_norm: 0.999999295675587, iteration: 43535
loss: 0.9820690155029297,grad_norm: 0.8944692306458469, iteration: 43536
loss: 1.1743669509887695,grad_norm: 0.9999998858190549, iteration: 43537
loss: 0.9971627593040466,grad_norm: 0.9999991527417963, iteration: 43538
loss: 0.9698473215103149,grad_norm: 0.9999991041570122, iteration: 43539
loss: 1.0327974557876587,grad_norm: 0.9999990479891803, iteration: 43540
loss: 1.0497688055038452,grad_norm: 0.9999990813868354, iteration: 43541
loss: 1.0679361820220947,grad_norm: 0.9999998646211115, iteration: 43542
loss: 1.034584403038025,grad_norm: 0.9999991132635951, iteration: 43543
loss: 1.0005695819854736,grad_norm: 0.9277365037316988, iteration: 43544
loss: 1.0174061059951782,grad_norm: 0.9999989845789782, iteration: 43545
loss: 1.0029351711273193,grad_norm: 0.9999991813814226, iteration: 43546
loss: 1.03715980052948,grad_norm: 0.9999999963139404, iteration: 43547
loss: 1.0864191055297852,grad_norm: 0.9999998250043424, iteration: 43548
loss: 1.045119047164917,grad_norm: 0.9999992502825676, iteration: 43549
loss: 1.0228747129440308,grad_norm: 0.999999261421425, iteration: 43550
loss: 1.076461911201477,grad_norm: 0.9999993062299081, iteration: 43551
loss: 0.9870020151138306,grad_norm: 0.9999990713586242, iteration: 43552
loss: 1.0121243000030518,grad_norm: 0.9999990973779482, iteration: 43553
loss: 1.0232243537902832,grad_norm: 0.7927820839308731, iteration: 43554
loss: 1.0384433269500732,grad_norm: 0.9999996198820911, iteration: 43555
loss: 0.981788158416748,grad_norm: 0.9483826490339003, iteration: 43556
loss: 0.9820398092269897,grad_norm: 0.9769495760598451, iteration: 43557
loss: 1.0015674829483032,grad_norm: 0.999999036052209, iteration: 43558
loss: 0.9525144100189209,grad_norm: 0.9999992072160773, iteration: 43559
loss: 1.062719702720642,grad_norm: 0.9700910970103468, iteration: 43560
loss: 1.0518654584884644,grad_norm: 0.9999998079732416, iteration: 43561
loss: 1.0060423612594604,grad_norm: 0.9999994204690822, iteration: 43562
loss: 0.9700917601585388,grad_norm: 0.9010109220225702, iteration: 43563
loss: 1.048915982246399,grad_norm: 0.9999999408296807, iteration: 43564
loss: 0.9680922031402588,grad_norm: 0.904773685995819, iteration: 43565
loss: 0.9947818517684937,grad_norm: 0.8555259311302518, iteration: 43566
loss: 1.0251362323760986,grad_norm: 0.9902613320564264, iteration: 43567
loss: 0.9902430772781372,grad_norm: 0.9999991535488723, iteration: 43568
loss: 0.9732823967933655,grad_norm: 0.8391395660008257, iteration: 43569
loss: 1.0246516466140747,grad_norm: 0.9999991992102513, iteration: 43570
loss: 0.9836090207099915,grad_norm: 0.9999990596041418, iteration: 43571
loss: 1.0254395008087158,grad_norm: 0.9496026981364454, iteration: 43572
loss: 1.0042575597763062,grad_norm: 0.9999993395953084, iteration: 43573
loss: 1.0625874996185303,grad_norm: 0.9999990669105019, iteration: 43574
loss: 1.0174458026885986,grad_norm: 0.8513572602557563, iteration: 43575
loss: 0.9947717189788818,grad_norm: 0.9999995330947324, iteration: 43576
loss: 1.0260257720947266,grad_norm: 0.8946623201326357, iteration: 43577
loss: 0.995107114315033,grad_norm: 0.9305722965039465, iteration: 43578
loss: 1.0345511436462402,grad_norm: 0.9999991084223482, iteration: 43579
loss: 0.9917052388191223,grad_norm: 0.8536233572657335, iteration: 43580
loss: 0.9956334233283997,grad_norm: 0.9999994579709475, iteration: 43581
loss: 1.0303531885147095,grad_norm: 0.8956056986064989, iteration: 43582
loss: 0.9996024966239929,grad_norm: 0.9999990202106985, iteration: 43583
loss: 1.02445650100708,grad_norm: 0.9999991493786646, iteration: 43584
loss: 1.0288363695144653,grad_norm: 0.8955598460854018, iteration: 43585
loss: 1.0282896757125854,grad_norm: 0.942981532153656, iteration: 43586
loss: 1.0316613912582397,grad_norm: 0.8080843315487506, iteration: 43587
loss: 1.0102883577346802,grad_norm: 0.9639052220341953, iteration: 43588
loss: 1.0005708932876587,grad_norm: 0.9999992398110313, iteration: 43589
loss: 1.0076137781143188,grad_norm: 0.8628507925442982, iteration: 43590
loss: 0.9930229783058167,grad_norm: 0.999999245498054, iteration: 43591
loss: 1.0762968063354492,grad_norm: 0.999999225716006, iteration: 43592
loss: 1.001283049583435,grad_norm: 0.9124504015522225, iteration: 43593
loss: 1.0159821510314941,grad_norm: 0.9999995961544716, iteration: 43594
loss: 1.0037773847579956,grad_norm: 0.9999998204666734, iteration: 43595
loss: 1.0116804838180542,grad_norm: 0.999999089819127, iteration: 43596
loss: 1.0140246152877808,grad_norm: 0.8532912084331487, iteration: 43597
loss: 1.0123224258422852,grad_norm: 0.7385696727407292, iteration: 43598
loss: 1.017521858215332,grad_norm: 0.9999994590175806, iteration: 43599
loss: 0.975777268409729,grad_norm: 0.8230150849865806, iteration: 43600
loss: 1.0104115009307861,grad_norm: 0.9821713606874517, iteration: 43601
loss: 1.0018435716629028,grad_norm: 0.9999991009457567, iteration: 43602
loss: 1.036248803138733,grad_norm: 0.9999991407171551, iteration: 43603
loss: 0.9785040020942688,grad_norm: 0.9973084298765216, iteration: 43604
loss: 0.9952483773231506,grad_norm: 0.8348098731065015, iteration: 43605
loss: 0.969710111618042,grad_norm: 0.8900409712631956, iteration: 43606
loss: 1.0116407871246338,grad_norm: 0.8372835272715592, iteration: 43607
loss: 1.0174988508224487,grad_norm: 0.9999995074230774, iteration: 43608
loss: 1.0131384134292603,grad_norm: 0.7799455999849668, iteration: 43609
loss: 1.0448819398880005,grad_norm: 0.9999991121067386, iteration: 43610
loss: 1.0619089603424072,grad_norm: 0.9999994517726105, iteration: 43611
loss: 1.000177025794983,grad_norm: 0.9999991992563881, iteration: 43612
loss: 1.0181348323822021,grad_norm: 0.9999990088793902, iteration: 43613
loss: 0.9927761554718018,grad_norm: 0.9721003215137769, iteration: 43614
loss: 1.0007919073104858,grad_norm: 0.9999991940919435, iteration: 43615
loss: 1.0076359510421753,grad_norm: 0.9667508957218971, iteration: 43616
loss: 0.9940728545188904,grad_norm: 0.9024930134912356, iteration: 43617
loss: 1.0077598094940186,grad_norm: 0.855025522310911, iteration: 43618
loss: 1.014711856842041,grad_norm: 0.8603647972690251, iteration: 43619
loss: 0.9854585528373718,grad_norm: 0.986067471694105, iteration: 43620
loss: 1.0109158754348755,grad_norm: 0.99999927140943, iteration: 43621
loss: 1.0181634426116943,grad_norm: 0.9240925707751579, iteration: 43622
loss: 0.9970981478691101,grad_norm: 0.9999990861315369, iteration: 43623
loss: 1.0165777206420898,grad_norm: 0.9999995583926624, iteration: 43624
loss: 0.9806804656982422,grad_norm: 0.7974950781281169, iteration: 43625
loss: 1.0212417840957642,grad_norm: 0.9146854729059263, iteration: 43626
loss: 0.9916769862174988,grad_norm: 0.9999990661854427, iteration: 43627
loss: 1.0213298797607422,grad_norm: 0.9999989750435483, iteration: 43628
loss: 0.9949718713760376,grad_norm: 0.9999990059992401, iteration: 43629
loss: 1.0422340631484985,grad_norm: 0.9999993008310986, iteration: 43630
loss: 0.9897608757019043,grad_norm: 0.9999990920024561, iteration: 43631
loss: 1.013451337814331,grad_norm: 0.8885850090561702, iteration: 43632
loss: 1.0124996900558472,grad_norm: 0.9614776314920627, iteration: 43633
loss: 1.0029709339141846,grad_norm: 0.9361110974611919, iteration: 43634
loss: 1.0271579027175903,grad_norm: 0.9999991067806406, iteration: 43635
loss: 0.9901746511459351,grad_norm: 0.7799374722608278, iteration: 43636
loss: 0.9971396327018738,grad_norm: 0.9010211173433689, iteration: 43637
loss: 0.9800118803977966,grad_norm: 0.9999990782198973, iteration: 43638
loss: 1.0112396478652954,grad_norm: 0.9999990471538023, iteration: 43639
loss: 0.9985416531562805,grad_norm: 0.8461295850874336, iteration: 43640
loss: 1.0002448558807373,grad_norm: 0.9999995342819872, iteration: 43641
loss: 1.0038628578186035,grad_norm: 0.9176906707524692, iteration: 43642
loss: 1.0329527854919434,grad_norm: 0.9999991268909819, iteration: 43643
loss: 1.0090179443359375,grad_norm: 0.9999991541598076, iteration: 43644
loss: 1.029515266418457,grad_norm: 0.9757117203277703, iteration: 43645
loss: 1.0170375108718872,grad_norm: 0.9999998279915318, iteration: 43646
loss: 0.9833568930625916,grad_norm: 0.9999991427681585, iteration: 43647
loss: 1.0177137851715088,grad_norm: 0.99999929219973, iteration: 43648
loss: 0.9912779331207275,grad_norm: 0.9383537240367131, iteration: 43649
loss: 1.000373125076294,grad_norm: 0.9999990555162203, iteration: 43650
loss: 0.9972110986709595,grad_norm: 0.8237846777865006, iteration: 43651
loss: 1.024192452430725,grad_norm: 0.9939253628891496, iteration: 43652
loss: 1.0023889541625977,grad_norm: 0.9999991144571364, iteration: 43653
loss: 1.0159658193588257,grad_norm: 0.7396356724205015, iteration: 43654
loss: 1.0315752029418945,grad_norm: 0.9703131756500126, iteration: 43655
loss: 1.013370394706726,grad_norm: 0.9869800780491373, iteration: 43656
loss: 0.9909378290176392,grad_norm: 0.9999990280598362, iteration: 43657
loss: 1.0096259117126465,grad_norm: 0.834516943860652, iteration: 43658
loss: 1.0219181776046753,grad_norm: 0.9999991494713444, iteration: 43659
loss: 0.9589923024177551,grad_norm: 0.9090876828704968, iteration: 43660
loss: 1.0017701387405396,grad_norm: 0.9999992120442955, iteration: 43661
loss: 1.003865361213684,grad_norm: 0.9276724210377364, iteration: 43662
loss: 0.99375319480896,grad_norm: 0.8747050866299194, iteration: 43663
loss: 1.01027512550354,grad_norm: 0.8028177797501428, iteration: 43664
loss: 0.9720467925071716,grad_norm: 0.9901323470241722, iteration: 43665
loss: 1.0138237476348877,grad_norm: 0.8518545032263088, iteration: 43666
loss: 0.9937840700149536,grad_norm: 0.9029263312733166, iteration: 43667
loss: 1.004428505897522,grad_norm: 0.9040964379891027, iteration: 43668
loss: 1.0168100595474243,grad_norm: 0.9999994853212303, iteration: 43669
loss: 1.0178592205047607,grad_norm: 0.8384085040307009, iteration: 43670
loss: 1.0247406959533691,grad_norm: 0.8750408621447286, iteration: 43671
loss: 1.0355024337768555,grad_norm: 0.9999992293446096, iteration: 43672
loss: 0.9819421768188477,grad_norm: 0.9062486781575609, iteration: 43673
loss: 1.0231927633285522,grad_norm: 0.9999992266196808, iteration: 43674
loss: 0.9849087595939636,grad_norm: 0.9072822082664119, iteration: 43675
loss: 1.050613522529602,grad_norm: 0.9999992219325964, iteration: 43676
loss: 1.0345921516418457,grad_norm: 0.999999849928014, iteration: 43677
loss: 1.0158884525299072,grad_norm: 0.9257735868048473, iteration: 43678
loss: 1.020041584968567,grad_norm: 0.9999992976161993, iteration: 43679
loss: 0.9647964835166931,grad_norm: 0.8546053876696319, iteration: 43680
loss: 0.9731763601303101,grad_norm: 0.8647150701935128, iteration: 43681
loss: 1.0303325653076172,grad_norm: 0.9277112285591954, iteration: 43682
loss: 1.00523841381073,grad_norm: 0.999998999737061, iteration: 43683
loss: 1.0091726779937744,grad_norm: 0.9999991147545877, iteration: 43684
loss: 1.0925308465957642,grad_norm: 0.9999993199051963, iteration: 43685
loss: 0.9676207900047302,grad_norm: 0.9414201726401195, iteration: 43686
loss: 1.0091513395309448,grad_norm: 0.819334475210999, iteration: 43687
loss: 0.9754445552825928,grad_norm: 0.7948033726789631, iteration: 43688
loss: 0.9944057464599609,grad_norm: 0.9778977160048712, iteration: 43689
loss: 1.0217825174331665,grad_norm: 0.9999991251774787, iteration: 43690
loss: 0.9314541816711426,grad_norm: 0.912832300745264, iteration: 43691
loss: 0.9921656847000122,grad_norm: 0.9999991123927746, iteration: 43692
loss: 0.9808288812637329,grad_norm: 0.8003993598927097, iteration: 43693
loss: 1.008271336555481,grad_norm: 0.9762622085873834, iteration: 43694
loss: 1.012168526649475,grad_norm: 0.9999992424087776, iteration: 43695
loss: 0.9952825903892517,grad_norm: 0.9999996370334916, iteration: 43696
loss: 1.0086408853530884,grad_norm: 0.8960742398171834, iteration: 43697
loss: 1.02622389793396,grad_norm: 0.999999068958366, iteration: 43698
loss: 1.0418232679367065,grad_norm: 0.9999990843771593, iteration: 43699
loss: 1.0550917387008667,grad_norm: 0.9999998595926277, iteration: 43700
loss: 0.9701544642448425,grad_norm: 0.9136368639572046, iteration: 43701
loss: 0.9857929944992065,grad_norm: 0.9622652100107677, iteration: 43702
loss: 0.9727360010147095,grad_norm: 0.8400511542872059, iteration: 43703
loss: 1.0014902353286743,grad_norm: 0.9999992176721408, iteration: 43704
loss: 1.0978635549545288,grad_norm: 0.9999990694654307, iteration: 43705
loss: 1.0402159690856934,grad_norm: 0.8936420204271747, iteration: 43706
loss: 0.9482465386390686,grad_norm: 0.9674175151191484, iteration: 43707
loss: 1.004725456237793,grad_norm: 0.9999990200765215, iteration: 43708
loss: 1.0058226585388184,grad_norm: 0.897041287145356, iteration: 43709
loss: 0.9853788614273071,grad_norm: 0.9999990230839355, iteration: 43710
loss: 0.975868284702301,grad_norm: 0.9333330247130195, iteration: 43711
loss: 0.9869189858436584,grad_norm: 0.9757941240666754, iteration: 43712
loss: 1.0061970949172974,grad_norm: 0.8124167699494937, iteration: 43713
loss: 1.0008455514907837,grad_norm: 0.9999993355856093, iteration: 43714
loss: 0.9805524945259094,grad_norm: 0.9999991455262721, iteration: 43715
loss: 1.0000360012054443,grad_norm: 0.9924520400679778, iteration: 43716
loss: 1.0377107858657837,grad_norm: 0.999999759517094, iteration: 43717
loss: 1.0219707489013672,grad_norm: 0.8846314621248634, iteration: 43718
loss: 0.9817891120910645,grad_norm: 0.9999992664004039, iteration: 43719
loss: 0.9930791258811951,grad_norm: 0.9999992298808597, iteration: 43720
loss: 0.9407724738121033,grad_norm: 0.8344115158039241, iteration: 43721
loss: 0.9503782391548157,grad_norm: 0.9999990973693801, iteration: 43722
loss: 1.0282737016677856,grad_norm: 0.9999998250073714, iteration: 43723
loss: 1.0213303565979004,grad_norm: 0.9058430138974097, iteration: 43724
loss: 0.9898535013198853,grad_norm: 0.9118509448335695, iteration: 43725
loss: 0.980079174041748,grad_norm: 0.9999991913773014, iteration: 43726
loss: 1.030354619026184,grad_norm: 0.9999991296786767, iteration: 43727
loss: 1.0332874059677124,grad_norm: 0.9999994446188298, iteration: 43728
loss: 0.9999245405197144,grad_norm: 0.9822295322728946, iteration: 43729
loss: 0.9806965589523315,grad_norm: 0.9999993117654604, iteration: 43730
loss: 1.0275691747665405,grad_norm: 0.9643188120768726, iteration: 43731
loss: 0.9985868334770203,grad_norm: 0.8014674680799909, iteration: 43732
loss: 1.0026488304138184,grad_norm: 0.920928800350063, iteration: 43733
loss: 1.0287353992462158,grad_norm: 0.7954129949843299, iteration: 43734
loss: 1.010756254196167,grad_norm: 0.9251287441914311, iteration: 43735
loss: 0.9898486733436584,grad_norm: 0.7162424772779659, iteration: 43736
loss: 1.010946273803711,grad_norm: 0.8195894447346166, iteration: 43737
loss: 0.9690128564834595,grad_norm: 0.8976705403629123, iteration: 43738
loss: 0.9913722276687622,grad_norm: 0.7678585653876113, iteration: 43739
loss: 1.0630099773406982,grad_norm: 0.9208495611807721, iteration: 43740
loss: 1.0281174182891846,grad_norm: 0.9540134519223711, iteration: 43741
loss: 1.0178542137145996,grad_norm: 0.9775351075772917, iteration: 43742
loss: 0.9732559323310852,grad_norm: 0.9999995027877978, iteration: 43743
loss: 1.045233130455017,grad_norm: 0.9999990980617361, iteration: 43744
loss: 0.985837459564209,grad_norm: 0.8991506393692571, iteration: 43745
loss: 1.0258108377456665,grad_norm: 0.8511183738986814, iteration: 43746
loss: 0.9863589406013489,grad_norm: 0.9193806603459019, iteration: 43747
loss: 1.0179641246795654,grad_norm: 0.8930643487647543, iteration: 43748
loss: 1.0191168785095215,grad_norm: 0.999999186736781, iteration: 43749
loss: 1.0367363691329956,grad_norm: 0.9999994499878253, iteration: 43750
loss: 1.0060616731643677,grad_norm: 0.9166674773230439, iteration: 43751
loss: 1.0069745779037476,grad_norm: 0.9999990868586662, iteration: 43752
loss: 0.9983645081520081,grad_norm: 0.7953660085055189, iteration: 43753
loss: 1.1912715435028076,grad_norm: 0.9999993962621352, iteration: 43754
loss: 1.0148288011550903,grad_norm: 0.9248059316287761, iteration: 43755
loss: 1.012650966644287,grad_norm: 0.8556905360399355, iteration: 43756
loss: 1.0008940696716309,grad_norm: 0.9999991077237228, iteration: 43757
loss: 1.0236107110977173,grad_norm: 0.9999997450718849, iteration: 43758
loss: 1.0038038492202759,grad_norm: 0.9999990433485542, iteration: 43759
loss: 0.9878117442131042,grad_norm: 0.9973728954154296, iteration: 43760
loss: 0.9846848249435425,grad_norm: 0.8913849880223247, iteration: 43761
loss: 0.9975267052650452,grad_norm: 0.8431906398837203, iteration: 43762
loss: 0.9909619092941284,grad_norm: 0.8208902617697569, iteration: 43763
loss: 1.0319877862930298,grad_norm: 0.9999993016421999, iteration: 43764
loss: 0.9806839823722839,grad_norm: 0.7117210343976283, iteration: 43765
loss: 0.9531763792037964,grad_norm: 0.9262810925929778, iteration: 43766
loss: 1.0302075147628784,grad_norm: 0.999999044445468, iteration: 43767
loss: 0.9899488687515259,grad_norm: 0.9999992489418446, iteration: 43768
loss: 1.072206974029541,grad_norm: 0.9999997148683972, iteration: 43769
loss: 1.0100114345550537,grad_norm: 0.834862721737634, iteration: 43770
loss: 0.9942062497138977,grad_norm: 0.9884746675111842, iteration: 43771
loss: 1.023068904876709,grad_norm: 0.9999991682590768, iteration: 43772
loss: 0.9906797409057617,grad_norm: 0.9167657884397414, iteration: 43773
loss: 1.0235791206359863,grad_norm: 0.8139461564578762, iteration: 43774
loss: 1.0318708419799805,grad_norm: 0.9999992872164425, iteration: 43775
loss: 0.9992759227752686,grad_norm: 0.937632775162098, iteration: 43776
loss: 0.9848562479019165,grad_norm: 0.8396490371730403, iteration: 43777
loss: 0.987430214881897,grad_norm: 0.9593163108227379, iteration: 43778
loss: 0.997258186340332,grad_norm: 0.9999988819065813, iteration: 43779
loss: 0.973876953125,grad_norm: 0.9999991617634687, iteration: 43780
loss: 1.0115442276000977,grad_norm: 0.9999997006512666, iteration: 43781
loss: 1.064206838607788,grad_norm: 0.9999994395874657, iteration: 43782
loss: 0.9572712182998657,grad_norm: 0.8346626662882098, iteration: 43783
loss: 1.0300174951553345,grad_norm: 0.9999992360547499, iteration: 43784
loss: 1.0508818626403809,grad_norm: 0.9999996280795043, iteration: 43785
loss: 0.9530523419380188,grad_norm: 0.8361712815341472, iteration: 43786
loss: 1.0496031045913696,grad_norm: 0.9999991467217838, iteration: 43787
loss: 1.0229454040527344,grad_norm: 0.999999399368411, iteration: 43788
loss: 0.9748200178146362,grad_norm: 0.9343080867902158, iteration: 43789
loss: 0.9838982224464417,grad_norm: 0.851834149381347, iteration: 43790
loss: 0.9946175217628479,grad_norm: 0.9583274174768259, iteration: 43791
loss: 1.0066601037979126,grad_norm: 0.9688946308405407, iteration: 43792
loss: 1.034356713294983,grad_norm: 0.9139835095525456, iteration: 43793
loss: 1.041254997253418,grad_norm: 0.9437472346651992, iteration: 43794
loss: 1.0342190265655518,grad_norm: 0.7484767060476082, iteration: 43795
loss: 1.0448055267333984,grad_norm: 0.9999991579808034, iteration: 43796
loss: 1.0060127973556519,grad_norm: 0.9999991627306175, iteration: 43797
loss: 1.0344876050949097,grad_norm: 0.9999992421273232, iteration: 43798
loss: 0.9973359704017639,grad_norm: 0.9999991306326638, iteration: 43799
loss: 1.0127224922180176,grad_norm: 0.7750092014560246, iteration: 43800
loss: 1.0279799699783325,grad_norm: 0.9999991447636106, iteration: 43801
loss: 1.0283414125442505,grad_norm: 0.94224789753034, iteration: 43802
loss: 0.9902216792106628,grad_norm: 0.9470126673375783, iteration: 43803
loss: 1.0183862447738647,grad_norm: 0.999999371079221, iteration: 43804
loss: 1.0040791034698486,grad_norm: 0.9967321274535033, iteration: 43805
loss: 1.0010924339294434,grad_norm: 0.9999989270212297, iteration: 43806
loss: 0.9928754568099976,grad_norm: 0.9210800404820619, iteration: 43807
loss: 1.0086321830749512,grad_norm: 0.9729688266848858, iteration: 43808
loss: 1.0413285493850708,grad_norm: 0.9999994215717143, iteration: 43809
loss: 1.0325556993484497,grad_norm: 0.999999380359391, iteration: 43810
loss: 1.0192499160766602,grad_norm: 0.9999990797481363, iteration: 43811
loss: 1.0299146175384521,grad_norm: 0.9987484302559949, iteration: 43812
loss: 1.0198276042938232,grad_norm: 0.9999990614518315, iteration: 43813
loss: 0.9569063782691956,grad_norm: 0.9999993113764075, iteration: 43814
loss: 0.9962021708488464,grad_norm: 0.9999990262936933, iteration: 43815
loss: 0.9732502698898315,grad_norm: 0.9999991716766046, iteration: 43816
loss: 1.009198546409607,grad_norm: 0.9999991904329582, iteration: 43817
loss: 0.9923126697540283,grad_norm: 0.9339137215031414, iteration: 43818
loss: 1.0274817943572998,grad_norm: 0.9999993706579469, iteration: 43819
loss: 1.016153335571289,grad_norm: 0.9999995695395412, iteration: 43820
loss: 1.0021196603775024,grad_norm: 0.9999991469021222, iteration: 43821
loss: 0.9863961338996887,grad_norm: 0.9999990941893251, iteration: 43822
loss: 1.0315279960632324,grad_norm: 0.8253104997614013, iteration: 43823
loss: 1.0086122751235962,grad_norm: 0.9469397931721667, iteration: 43824
loss: 0.9680047631263733,grad_norm: 0.9537283103751549, iteration: 43825
loss: 1.0289770364761353,grad_norm: 0.8632836840111573, iteration: 43826
loss: 0.9727596044540405,grad_norm: 0.9613826650592592, iteration: 43827
loss: 1.0072747468948364,grad_norm: 0.8554843253047837, iteration: 43828
loss: 0.9940924048423767,grad_norm: 0.9069084701592627, iteration: 43829
loss: 1.0061143636703491,grad_norm: 0.9999992502965286, iteration: 43830
loss: 1.0057138204574585,grad_norm: 0.8303247920812451, iteration: 43831
loss: 0.9839606881141663,grad_norm: 0.9839184569254946, iteration: 43832
loss: 0.9447378516197205,grad_norm: 0.9827167543104344, iteration: 43833
loss: 1.0312682390213013,grad_norm: 0.9999999135586842, iteration: 43834
loss: 0.9993793964385986,grad_norm: 0.9999992660013093, iteration: 43835
loss: 0.9701024293899536,grad_norm: 0.9999992791714768, iteration: 43836
loss: 1.0398017168045044,grad_norm: 0.9999991165237972, iteration: 43837
loss: 1.0303764343261719,grad_norm: 0.9538954985694517, iteration: 43838
loss: 1.108156442642212,grad_norm: 0.9999995991228386, iteration: 43839
loss: 1.0989409685134888,grad_norm: 0.8529118850223567, iteration: 43840
loss: 0.9893274307250977,grad_norm: 0.9169798392855004, iteration: 43841
loss: 0.9770753979682922,grad_norm: 0.999999206348227, iteration: 43842
loss: 0.9774657487869263,grad_norm: 0.9999992339080536, iteration: 43843
loss: 1.0201834440231323,grad_norm: 0.7062222614040015, iteration: 43844
loss: 0.9888341426849365,grad_norm: 0.9999990561627105, iteration: 43845
loss: 1.0135307312011719,grad_norm: 0.8658634863288741, iteration: 43846
loss: 1.2053890228271484,grad_norm: 0.9999998046591088, iteration: 43847
loss: 1.058140754699707,grad_norm: 0.9999997014149631, iteration: 43848
loss: 0.9703381061553955,grad_norm: 0.8757999658286415, iteration: 43849
loss: 0.9982680082321167,grad_norm: 0.9148780730945572, iteration: 43850
loss: 1.014580488204956,grad_norm: 0.9022259133707019, iteration: 43851
loss: 1.1077873706817627,grad_norm: 0.9999998795018498, iteration: 43852
loss: 0.9904839396476746,grad_norm: 0.9999990217183348, iteration: 43853
loss: 0.9810243248939514,grad_norm: 0.9999992383771167, iteration: 43854
loss: 0.9867596626281738,grad_norm: 0.9999991750804909, iteration: 43855
loss: 1.0072746276855469,grad_norm: 0.9284797172250909, iteration: 43856
loss: 1.0575147867202759,grad_norm: 0.9999992398321607, iteration: 43857
loss: 1.0894503593444824,grad_norm: 0.9999997527421318, iteration: 43858
loss: 1.0410761833190918,grad_norm: 0.9999997358825341, iteration: 43859
loss: 1.0794470310211182,grad_norm: 0.999999401515175, iteration: 43860
loss: 0.9987242221832275,grad_norm: 0.7903634406932539, iteration: 43861
loss: 0.9944437146186829,grad_norm: 0.9515461822234196, iteration: 43862
loss: 1.0149976015090942,grad_norm: 0.9999996012900719, iteration: 43863
loss: 1.0287096500396729,grad_norm: 0.9999990800916049, iteration: 43864
loss: 1.0099537372589111,grad_norm: 0.9207617873889766, iteration: 43865
loss: 1.0156214237213135,grad_norm: 0.9999990406499888, iteration: 43866
loss: 1.0729045867919922,grad_norm: 0.9999996071920594, iteration: 43867
loss: 1.0471117496490479,grad_norm: 0.906721536967621, iteration: 43868
loss: 1.0225446224212646,grad_norm: 0.9999991673518461, iteration: 43869
loss: 1.0267901420593262,grad_norm: 0.9999990469648156, iteration: 43870
loss: 0.9987176060676575,grad_norm: 0.999999135574401, iteration: 43871
loss: 1.0027942657470703,grad_norm: 0.8629113423966882, iteration: 43872
loss: 1.0735303163528442,grad_norm: 0.9999992252225851, iteration: 43873
loss: 1.008088231086731,grad_norm: 0.9999991358372498, iteration: 43874
loss: 0.9874623417854309,grad_norm: 0.9999991840029191, iteration: 43875
loss: 0.986635148525238,grad_norm: 0.9471152580201437, iteration: 43876
loss: 1.0207927227020264,grad_norm: 0.9999990839902781, iteration: 43877
loss: 0.9966866970062256,grad_norm: 0.9945710111252425, iteration: 43878
loss: 1.0153453350067139,grad_norm: 0.9937741524775785, iteration: 43879
loss: 1.0008281469345093,grad_norm: 0.9999991643656451, iteration: 43880
loss: 1.0996971130371094,grad_norm: 0.9999994261048736, iteration: 43881
loss: 1.013096570968628,grad_norm: 0.93032599596547, iteration: 43882
loss: 0.9878236651420593,grad_norm: 0.8989267949630322, iteration: 43883
loss: 0.9749757051467896,grad_norm: 0.8962368318530124, iteration: 43884
loss: 0.991028904914856,grad_norm: 0.9999992390447714, iteration: 43885
loss: 0.9774246215820312,grad_norm: 0.9999991959699674, iteration: 43886
loss: 0.9721570611000061,grad_norm: 0.8883813871705901, iteration: 43887
loss: 1.0453311204910278,grad_norm: 0.9999995370960316, iteration: 43888
loss: 0.9824770092964172,grad_norm: 0.8143448368992553, iteration: 43889
loss: 0.9877001643180847,grad_norm: 0.9999167350713796, iteration: 43890
loss: 1.019822120666504,grad_norm: 0.951759405033086, iteration: 43891
loss: 1.0055620670318604,grad_norm: 0.9999991402275813, iteration: 43892
loss: 1.015459418296814,grad_norm: 0.999999188245242, iteration: 43893
loss: 0.9789187908172607,grad_norm: 0.999998975218431, iteration: 43894
loss: 0.9672927856445312,grad_norm: 0.9999990226959526, iteration: 43895
loss: 0.9708629846572876,grad_norm: 0.9991633600102026, iteration: 43896
loss: 1.075803279876709,grad_norm: 0.9999991184707379, iteration: 43897
loss: 0.9737253189086914,grad_norm: 0.9098679381821573, iteration: 43898
loss: 1.0244510173797607,grad_norm: 0.9463055963457065, iteration: 43899
loss: 0.9516279101371765,grad_norm: 0.9134401019831062, iteration: 43900
loss: 0.9946867823600769,grad_norm: 0.9999989790799949, iteration: 43901
loss: 0.976185142993927,grad_norm: 0.9999991159826798, iteration: 43902
loss: 0.9975464940071106,grad_norm: 0.9999990449286356, iteration: 43903
loss: 0.9949914216995239,grad_norm: 0.9999991244529272, iteration: 43904
loss: 1.0157018899917603,grad_norm: 0.9444796983934803, iteration: 43905
loss: 1.0210254192352295,grad_norm: 0.9999996477411099, iteration: 43906
loss: 1.011573314666748,grad_norm: 0.9999991318725097, iteration: 43907
loss: 0.993870735168457,grad_norm: 0.8266943295353237, iteration: 43908
loss: 1.020695447921753,grad_norm: 0.9999990653556186, iteration: 43909
loss: 1.041804552078247,grad_norm: 0.9999990164985296, iteration: 43910
loss: 1.0004173517227173,grad_norm: 0.999999018196234, iteration: 43911
loss: 0.9928721189498901,grad_norm: 0.9357551459461682, iteration: 43912
loss: 0.9984914660453796,grad_norm: 0.9999990884707268, iteration: 43913
loss: 0.9879032373428345,grad_norm: 0.9023793451936293, iteration: 43914
loss: 1.0006928443908691,grad_norm: 0.7699713228664753, iteration: 43915
loss: 0.998624861240387,grad_norm: 0.9999990669173331, iteration: 43916
loss: 0.9988712072372437,grad_norm: 0.9999991175315734, iteration: 43917
loss: 1.067997932434082,grad_norm: 0.9999997549900725, iteration: 43918
loss: 1.024635910987854,grad_norm: 0.8204182777437915, iteration: 43919
loss: 1.0013145208358765,grad_norm: 0.9999990483199699, iteration: 43920
loss: 1.0370471477508545,grad_norm: 0.9999991093567777, iteration: 43921
loss: 1.011459469795227,grad_norm: 0.9999997390695878, iteration: 43922
loss: 1.053268313407898,grad_norm: 0.8968055166559639, iteration: 43923
loss: 0.9923531413078308,grad_norm: 0.9133013324085537, iteration: 43924
loss: 1.017276406288147,grad_norm: 0.8624233878926844, iteration: 43925
loss: 1.0168101787567139,grad_norm: 0.9838345428658668, iteration: 43926
loss: 1.0194016695022583,grad_norm: 0.9999994083285739, iteration: 43927
loss: 1.0244425535202026,grad_norm: 0.9999995038004946, iteration: 43928
loss: 1.026820421218872,grad_norm: 0.8589323067755219, iteration: 43929
loss: 1.0387940406799316,grad_norm: 0.9543165331843612, iteration: 43930
loss: 1.025097370147705,grad_norm: 0.8429929615715736, iteration: 43931
loss: 1.0094716548919678,grad_norm: 0.7769915781018347, iteration: 43932
loss: 1.0121464729309082,grad_norm: 0.7788168089839298, iteration: 43933
loss: 1.0453977584838867,grad_norm: 0.9475607252699082, iteration: 43934
loss: 0.9775734543800354,grad_norm: 0.9812233454741144, iteration: 43935
loss: 0.9844554662704468,grad_norm: 0.9999990230192275, iteration: 43936
loss: 0.9658796787261963,grad_norm: 0.9999991833086339, iteration: 43937
loss: 1.0107998847961426,grad_norm: 0.96596227416634, iteration: 43938
loss: 1.0140382051467896,grad_norm: 0.9999991344089855, iteration: 43939
loss: 0.9913222193717957,grad_norm: 0.8831603579274808, iteration: 43940
loss: 1.0167831182479858,grad_norm: 0.9999990880975281, iteration: 43941
loss: 1.0219850540161133,grad_norm: 0.8399000478084216, iteration: 43942
loss: 1.06787109375,grad_norm: 0.9999991827161404, iteration: 43943
loss: 1.0090116262435913,grad_norm: 0.999999135249129, iteration: 43944
loss: 0.9726964235305786,grad_norm: 0.9067266953505466, iteration: 43945
loss: 0.9996907114982605,grad_norm: 0.877267995223308, iteration: 43946
loss: 1.0298314094543457,grad_norm: 0.9999993057749915, iteration: 43947
loss: 0.9854577779769897,grad_norm: 0.7746775099093715, iteration: 43948
loss: 1.0390663146972656,grad_norm: 0.9314340231854138, iteration: 43949
loss: 0.9869155883789062,grad_norm: 0.9959950930748827, iteration: 43950
loss: 1.041664958000183,grad_norm: 0.9999991408358471, iteration: 43951
loss: 0.9863711595535278,grad_norm: 0.9999991792003099, iteration: 43952
loss: 0.9887610077857971,grad_norm: 0.9357644160732659, iteration: 43953
loss: 1.0044240951538086,grad_norm: 0.9973978686584839, iteration: 43954
loss: 1.0272268056869507,grad_norm: 0.9999991042851305, iteration: 43955
loss: 1.036594033241272,grad_norm: 0.8047438976440721, iteration: 43956
loss: 1.0287615060806274,grad_norm: 0.9501047038166606, iteration: 43957
loss: 1.030383586883545,grad_norm: 0.865725232707833, iteration: 43958
loss: 1.0089350938796997,grad_norm: 0.9237309487725088, iteration: 43959
loss: 1.022282600402832,grad_norm: 0.9999991715119418, iteration: 43960
loss: 0.980274498462677,grad_norm: 0.9999990625163889, iteration: 43961
loss: 1.0016491413116455,grad_norm: 0.999999235802615, iteration: 43962
loss: 0.9807107448577881,grad_norm: 0.9999991059395371, iteration: 43963
loss: 1.004300832748413,grad_norm: 0.9274138764607502, iteration: 43964
loss: 0.9915322661399841,grad_norm: 0.9999991153464829, iteration: 43965
loss: 1.004371166229248,grad_norm: 0.9999996357965615, iteration: 43966
loss: 1.0249485969543457,grad_norm: 0.9999991106186741, iteration: 43967
loss: 1.0177686214447021,grad_norm: 0.806265164184735, iteration: 43968
loss: 1.0053337812423706,grad_norm: 0.7031477648291028, iteration: 43969
loss: 1.0045593976974487,grad_norm: 0.7895895753622734, iteration: 43970
loss: 1.010046362876892,grad_norm: 0.8218896068996313, iteration: 43971
loss: 0.9967754483222961,grad_norm: 0.9549265444077095, iteration: 43972
loss: 1.0216139554977417,grad_norm: 0.9596675637736335, iteration: 43973
loss: 1.002226710319519,grad_norm: 0.9999992280001029, iteration: 43974
loss: 0.9939373135566711,grad_norm: 0.9815687203510857, iteration: 43975
loss: 0.9463637471199036,grad_norm: 0.9848571493095978, iteration: 43976
loss: 0.9799326658248901,grad_norm: 0.9255585070345559, iteration: 43977
loss: 0.9584009051322937,grad_norm: 0.8418780617527647, iteration: 43978
loss: 0.9707578420639038,grad_norm: 0.9961972389189648, iteration: 43979
loss: 1.0091972351074219,grad_norm: 0.8187474868006794, iteration: 43980
loss: 0.9864442348480225,grad_norm: 0.7624559120598783, iteration: 43981
loss: 0.9944465160369873,grad_norm: 0.937196450481456, iteration: 43982
loss: 1.0174763202667236,grad_norm: 0.9190998527966825, iteration: 43983
loss: 1.0170081853866577,grad_norm: 0.9130794506655194, iteration: 43984
loss: 1.1035922765731812,grad_norm: 0.9999997547542325, iteration: 43985
loss: 0.9875703454017639,grad_norm: 0.9224716696723161, iteration: 43986
loss: 1.018341302871704,grad_norm: 0.9999990474891931, iteration: 43987
loss: 1.012460708618164,grad_norm: 0.7942098413923535, iteration: 43988
loss: 0.9945949912071228,grad_norm: 0.9681942842972351, iteration: 43989
loss: 1.0015836954116821,grad_norm: 0.8638749255314839, iteration: 43990
loss: 0.9772785902023315,grad_norm: 0.9999992091139315, iteration: 43991
loss: 0.9943777918815613,grad_norm: 0.8654970462607174, iteration: 43992
loss: 0.9519103169441223,grad_norm: 0.9999990609695771, iteration: 43993
loss: 1.057379126548767,grad_norm: 0.8407024413796766, iteration: 43994
loss: 0.9970299601554871,grad_norm: 0.9999991518251745, iteration: 43995
loss: 0.985123336315155,grad_norm: 0.859129975169292, iteration: 43996
loss: 1.0324572324752808,grad_norm: 0.9999991525875885, iteration: 43997
loss: 0.980988085269928,grad_norm: 0.9139939557528598, iteration: 43998
loss: 0.9999259114265442,grad_norm: 0.9999997282923878, iteration: 43999
loss: 1.0004124641418457,grad_norm: 0.9804504107231969, iteration: 44000
loss: 0.9767252802848816,grad_norm: 0.9999990250468769, iteration: 44001
loss: 0.9971550703048706,grad_norm: 0.9461141625875034, iteration: 44002
loss: 1.0655553340911865,grad_norm: 0.9999995959397794, iteration: 44003
loss: 1.0342085361480713,grad_norm: 0.9999993012211355, iteration: 44004
loss: 0.9604871273040771,grad_norm: 0.9197398760821859, iteration: 44005
loss: 1.0272818803787231,grad_norm: 0.9999991152757147, iteration: 44006
loss: 1.0046395063400269,grad_norm: 0.9999996796674646, iteration: 44007
loss: 1.0289844274520874,grad_norm: 0.9999989706564063, iteration: 44008
loss: 0.9956997036933899,grad_norm: 0.9961087489163166, iteration: 44009
loss: 1.0000100135803223,grad_norm: 0.9999990410548608, iteration: 44010
loss: 0.9938156008720398,grad_norm: 0.8137994505641394, iteration: 44011
loss: 1.0002731084823608,grad_norm: 0.884262417794726, iteration: 44012
loss: 1.027066707611084,grad_norm: 0.9326658766833122, iteration: 44013
loss: 0.9836450815200806,grad_norm: 0.999999043813645, iteration: 44014
loss: 1.0402932167053223,grad_norm: 0.8123340390786494, iteration: 44015
loss: 1.007952332496643,grad_norm: 0.9999990340783909, iteration: 44016
loss: 0.9846532344818115,grad_norm: 0.956138114176315, iteration: 44017
loss: 1.0156846046447754,grad_norm: 0.9962282506717794, iteration: 44018
loss: 1.0318117141723633,grad_norm: 0.9999989960676237, iteration: 44019
loss: 1.0014556646347046,grad_norm: 0.9814474733040587, iteration: 44020
loss: 0.9997937083244324,grad_norm: 0.9999992850216936, iteration: 44021
loss: 1.0266754627227783,grad_norm: 0.9999996144383679, iteration: 44022
loss: 0.994413435459137,grad_norm: 0.9999992474065349, iteration: 44023
loss: 1.0665812492370605,grad_norm: 0.9999996395978941, iteration: 44024
loss: 0.9751944541931152,grad_norm: 0.9999990569357448, iteration: 44025
loss: 1.0070446729660034,grad_norm: 0.8017342892631776, iteration: 44026
loss: 0.9854733347892761,grad_norm: 0.9321643615231779, iteration: 44027
loss: 1.0102250576019287,grad_norm: 0.9787656552174603, iteration: 44028
loss: 0.9934344291687012,grad_norm: 0.9317573404989948, iteration: 44029
loss: 1.0022095441818237,grad_norm: 0.9999991562110175, iteration: 44030
loss: 1.0275719165802002,grad_norm: 0.9623962061290308, iteration: 44031
loss: 1.0032751560211182,grad_norm: 0.8645530539783564, iteration: 44032
loss: 0.981378972530365,grad_norm: 0.8771388346511992, iteration: 44033
loss: 0.9704286456108093,grad_norm: 0.9999990841764482, iteration: 44034
loss: 0.9964977502822876,grad_norm: 0.9999989884834966, iteration: 44035
loss: 1.0272462368011475,grad_norm: 0.9847367202943436, iteration: 44036
loss: 1.041546106338501,grad_norm: 0.9999997513694359, iteration: 44037
loss: 1.000980257987976,grad_norm: 0.9999991783827055, iteration: 44038
loss: 1.0011749267578125,grad_norm: 0.8119804489079753, iteration: 44039
loss: 1.0306434631347656,grad_norm: 0.8285547846778776, iteration: 44040
loss: 1.0293830633163452,grad_norm: 0.9999989662563227, iteration: 44041
loss: 1.007234811782837,grad_norm: 0.8943938141566103, iteration: 44042
loss: 1.010764241218567,grad_norm: 0.9999991938317625, iteration: 44043
loss: 0.9940695762634277,grad_norm: 0.9999992494570568, iteration: 44044
loss: 1.0047242641448975,grad_norm: 0.9900220636237111, iteration: 44045
loss: 1.0236048698425293,grad_norm: 0.9999995237520294, iteration: 44046
loss: 0.9929405450820923,grad_norm: 0.8894125902020812, iteration: 44047
loss: 0.9772542119026184,grad_norm: 0.8596877286916697, iteration: 44048
loss: 1.0148979425430298,grad_norm: 0.9999992359336103, iteration: 44049
loss: 1.0029867887496948,grad_norm: 0.9999993813110913, iteration: 44050
loss: 0.9869385361671448,grad_norm: 0.9609590620812468, iteration: 44051
loss: 0.9891045093536377,grad_norm: 0.9800518666445572, iteration: 44052
loss: 1.0285178422927856,grad_norm: 0.8895791310587234, iteration: 44053
loss: 0.9381916522979736,grad_norm: 0.999999103345223, iteration: 44054
loss: 0.955242395401001,grad_norm: 0.9999990681406109, iteration: 44055
loss: 1.0029189586639404,grad_norm: 0.9999989876040803, iteration: 44056
loss: 0.9766256213188171,grad_norm: 0.9903512437425697, iteration: 44057
loss: 0.9848296642303467,grad_norm: 0.9340353247487538, iteration: 44058
loss: 1.007964015007019,grad_norm: 0.7319029908415465, iteration: 44059
loss: 0.9849715828895569,grad_norm: 0.9126419142986695, iteration: 44060
loss: 0.993087112903595,grad_norm: 0.9999992504207269, iteration: 44061
loss: 1.0887458324432373,grad_norm: 0.9999995491503159, iteration: 44062
loss: 1.0162404775619507,grad_norm: 0.9999991143951024, iteration: 44063
loss: 1.0197685956954956,grad_norm: 0.9310603032875137, iteration: 44064
loss: 1.056912899017334,grad_norm: 0.8604911452662222, iteration: 44065
loss: 1.0479934215545654,grad_norm: 0.8489106436308723, iteration: 44066
loss: 0.9737902283668518,grad_norm: 0.9494239498248087, iteration: 44067
loss: 0.9770432710647583,grad_norm: 0.9999991743896002, iteration: 44068
loss: 0.955081582069397,grad_norm: 0.8757195645849299, iteration: 44069
loss: 1.0587117671966553,grad_norm: 0.9999993602575136, iteration: 44070
loss: 1.0424282550811768,grad_norm: 0.9999990582271464, iteration: 44071
loss: 1.015128493309021,grad_norm: 0.9999994411471809, iteration: 44072
loss: 1.0053647756576538,grad_norm: 0.9713663332312957, iteration: 44073
loss: 1.0142604112625122,grad_norm: 0.9999994116608625, iteration: 44074
loss: 1.0289565324783325,grad_norm: 0.9982673860992564, iteration: 44075
loss: 0.9995000958442688,grad_norm: 0.9999990135844591, iteration: 44076
loss: 1.0808848142623901,grad_norm: 0.9999990109874769, iteration: 44077
loss: 0.9858229756355286,grad_norm: 0.9715405751855324, iteration: 44078
loss: 1.013979434967041,grad_norm: 0.8846702484006264, iteration: 44079
loss: 0.9933891892433167,grad_norm: 0.8369277576215105, iteration: 44080
loss: 0.9975237846374512,grad_norm: 0.9999991776307876, iteration: 44081
loss: 1.0169328451156616,grad_norm: 0.9999992840939455, iteration: 44082
loss: 0.9994924664497375,grad_norm: 0.9999993127583838, iteration: 44083
loss: 0.9578230381011963,grad_norm: 0.8158973642882376, iteration: 44084
loss: 1.0192654132843018,grad_norm: 0.999998943271279, iteration: 44085
loss: 1.027733325958252,grad_norm: 0.9999995959338037, iteration: 44086
loss: 0.9904319047927856,grad_norm: 0.9627322589672627, iteration: 44087
loss: 1.0080026388168335,grad_norm: 0.9999993455796358, iteration: 44088
loss: 1.0023198127746582,grad_norm: 0.9603207593279673, iteration: 44089
loss: 1.0030622482299805,grad_norm: 0.9599473122006751, iteration: 44090
loss: 1.037996768951416,grad_norm: 0.8694815609204499, iteration: 44091
loss: 1.0119307041168213,grad_norm: 0.9303393989763578, iteration: 44092
loss: 1.0691044330596924,grad_norm: 0.9999993405008566, iteration: 44093
loss: 0.9969842433929443,grad_norm: 0.9643375127550753, iteration: 44094
loss: 0.9891245365142822,grad_norm: 0.9999991325152674, iteration: 44095
loss: 0.9697449803352356,grad_norm: 0.9659676500838917, iteration: 44096
loss: 1.085600733757019,grad_norm: 0.9825230024814329, iteration: 44097
loss: 1.0033037662506104,grad_norm: 0.9221634405836461, iteration: 44098
loss: 1.1304430961608887,grad_norm: 0.9999994874229925, iteration: 44099
loss: 1.0033221244812012,grad_norm: 0.988269725044774, iteration: 44100
loss: 0.9631999135017395,grad_norm: 0.7736797194359613, iteration: 44101
loss: 1.0224539041519165,grad_norm: 0.999999085858511, iteration: 44102
loss: 1.062338948249817,grad_norm: 0.9999996889643727, iteration: 44103
loss: 1.0004656314849854,grad_norm: 0.9959214781464654, iteration: 44104
loss: 1.0044567584991455,grad_norm: 0.8000834115413613, iteration: 44105
loss: 1.0591578483581543,grad_norm: 0.9999995611253731, iteration: 44106
loss: 1.0243632793426514,grad_norm: 0.7510855192615323, iteration: 44107
loss: 1.0011928081512451,grad_norm: 0.9999990245532177, iteration: 44108
loss: 1.0276578664779663,grad_norm: 0.9999996846501304, iteration: 44109
loss: 1.012708067893982,grad_norm: 0.9041688302629327, iteration: 44110
loss: 1.0351983308792114,grad_norm: 0.8680131004520335, iteration: 44111
loss: 1.0076581239700317,grad_norm: 0.9999991623325744, iteration: 44112
loss: 1.0136675834655762,grad_norm: 0.9999990580469703, iteration: 44113
loss: 1.0722426176071167,grad_norm: 0.9999997084130062, iteration: 44114
loss: 1.00255286693573,grad_norm: 0.8426626997904982, iteration: 44115
loss: 0.9923800230026245,grad_norm: 0.9999992895572865, iteration: 44116
loss: 0.96990567445755,grad_norm: 0.9999990925845723, iteration: 44117
loss: 1.0199146270751953,grad_norm: 0.9999991574005586, iteration: 44118
loss: 1.031975507736206,grad_norm: 0.9174393183009567, iteration: 44119
loss: 1.0062847137451172,grad_norm: 0.8367405490579223, iteration: 44120
loss: 0.9991350173950195,grad_norm: 0.963434928567045, iteration: 44121
loss: 1.0849008560180664,grad_norm: 0.9999997991064931, iteration: 44122
loss: 1.1306933164596558,grad_norm: 0.9999996610839731, iteration: 44123
loss: 1.0176979303359985,grad_norm: 0.755897042242465, iteration: 44124
loss: 1.0020328760147095,grad_norm: 0.8858138113516446, iteration: 44125
loss: 1.0293304920196533,grad_norm: 0.9999997034157893, iteration: 44126
loss: 1.1013641357421875,grad_norm: 0.9999992124725091, iteration: 44127
loss: 1.0247530937194824,grad_norm: 0.8829061341088025, iteration: 44128
loss: 1.013109803199768,grad_norm: 0.9999998939955518, iteration: 44129
loss: 1.0036178827285767,grad_norm: 0.9999993858396932, iteration: 44130
loss: 0.9975453615188599,grad_norm: 0.7719428196014567, iteration: 44131
loss: 1.012505054473877,grad_norm: 0.9266475889436568, iteration: 44132
loss: 1.0154956579208374,grad_norm: 0.9999992146661064, iteration: 44133
loss: 0.9943403601646423,grad_norm: 0.8835449349210367, iteration: 44134
loss: 1.0403947830200195,grad_norm: 0.9999993412870525, iteration: 44135
loss: 0.9691234230995178,grad_norm: 0.9317715904137747, iteration: 44136
loss: 1.0149503946304321,grad_norm: 0.8648332312386247, iteration: 44137
loss: 1.0262423753738403,grad_norm: 0.9999988912569489, iteration: 44138
loss: 1.0296574831008911,grad_norm: 0.9999994238477988, iteration: 44139
loss: 1.0050331354141235,grad_norm: 0.9667187817306562, iteration: 44140
loss: 1.0029850006103516,grad_norm: 0.9999989979384002, iteration: 44141
loss: 1.1011186838150024,grad_norm: 0.9999994808406671, iteration: 44142
loss: 0.9832043051719666,grad_norm: 0.9999990691503023, iteration: 44143
loss: 1.0459667444229126,grad_norm: 0.9502609501682401, iteration: 44144
loss: 1.0079305171966553,grad_norm: 0.9343061613963771, iteration: 44145
loss: 1.0173687934875488,grad_norm: 0.9999991580135585, iteration: 44146
loss: 1.0457104444503784,grad_norm: 0.9999997004306884, iteration: 44147
loss: 1.0162384510040283,grad_norm: 0.8015358750087892, iteration: 44148
loss: 1.0305026769638062,grad_norm: 0.8439203964058937, iteration: 44149
loss: 1.0970083475112915,grad_norm: 0.9999994517993587, iteration: 44150
loss: 0.9880064725875854,grad_norm: 0.9999991561435636, iteration: 44151
loss: 1.026702642440796,grad_norm: 0.9999992998343861, iteration: 44152
loss: 1.009260654449463,grad_norm: 0.9999991771680304, iteration: 44153
loss: 0.9860218167304993,grad_norm: 0.9234132193471565, iteration: 44154
loss: 1.0004247426986694,grad_norm: 0.9999990882711526, iteration: 44155
loss: 1.0125638246536255,grad_norm: 0.9999992890626261, iteration: 44156
loss: 1.03041410446167,grad_norm: 0.9999992661952555, iteration: 44157
loss: 1.0050241947174072,grad_norm: 0.9999994536656919, iteration: 44158
loss: 1.0270944833755493,grad_norm: 0.9999990690561342, iteration: 44159
loss: 0.9910986423492432,grad_norm: 0.8953817803310596, iteration: 44160
loss: 1.0095832347869873,grad_norm: 0.8313696424076384, iteration: 44161
loss: 0.9839432835578918,grad_norm: 0.999999211934528, iteration: 44162
loss: 1.005658507347107,grad_norm: 0.8682554364952131, iteration: 44163
loss: 1.021390676498413,grad_norm: 0.9999994890936462, iteration: 44164
loss: 1.030385971069336,grad_norm: 1.0000000267811473, iteration: 44165
loss: 1.042351245880127,grad_norm: 0.855363062618999, iteration: 44166
loss: 1.0249377489089966,grad_norm: 0.9999992585625523, iteration: 44167
loss: 0.9879394173622131,grad_norm: 0.9999991959719274, iteration: 44168
loss: 1.0201297998428345,grad_norm: 0.9240308076915105, iteration: 44169
loss: 1.0492852926254272,grad_norm: 0.9999996252701212, iteration: 44170
loss: 1.055345892906189,grad_norm: 0.9999995486161086, iteration: 44171
loss: 1.0225359201431274,grad_norm: 0.9999994391740118, iteration: 44172
loss: 1.0010722875595093,grad_norm: 0.977029403352998, iteration: 44173
loss: 1.0566505193710327,grad_norm: 0.9999994707614333, iteration: 44174
loss: 0.9966490864753723,grad_norm: 0.9999990796363027, iteration: 44175
loss: 1.0176812410354614,grad_norm: 0.8827098935880551, iteration: 44176
loss: 1.0016378164291382,grad_norm: 0.9999996060543206, iteration: 44177
loss: 0.9813516139984131,grad_norm: 0.9931855603356149, iteration: 44178
loss: 1.0272033214569092,grad_norm: 0.9999991446109332, iteration: 44179
loss: 0.9960528612136841,grad_norm: 0.9999992576949654, iteration: 44180
loss: 0.9857069849967957,grad_norm: 0.9999993359841953, iteration: 44181
loss: 0.9719367623329163,grad_norm: 0.9999992065456939, iteration: 44182
loss: 0.9992426037788391,grad_norm: 0.9002161835345035, iteration: 44183
loss: 1.0110336542129517,grad_norm: 0.9999993370214986, iteration: 44184
loss: 1.0126385688781738,grad_norm: 0.9999990407656567, iteration: 44185
loss: 0.9766010642051697,grad_norm: 0.9999991525809907, iteration: 44186
loss: 1.0407018661499023,grad_norm: 0.9999992922018929, iteration: 44187
loss: 1.0028642416000366,grad_norm: 0.8364513293779097, iteration: 44188
loss: 1.030747652053833,grad_norm: 0.9733403555275437, iteration: 44189
loss: 0.9980944395065308,grad_norm: 0.9999992703364753, iteration: 44190
loss: 0.9859815835952759,grad_norm: 0.9999991903388202, iteration: 44191
loss: 0.9999789595603943,grad_norm: 0.9936511377197855, iteration: 44192
loss: 0.9555138349533081,grad_norm: 0.9905827035456249, iteration: 44193
loss: 1.0093462467193604,grad_norm: 0.8315754658494812, iteration: 44194
loss: 1.0128735303878784,grad_norm: 0.9999991178376465, iteration: 44195
loss: 0.9773598909378052,grad_norm: 0.8354554309423147, iteration: 44196
loss: 0.967103898525238,grad_norm: 0.9999998417400972, iteration: 44197
loss: 0.9969927072525024,grad_norm: 0.9813225685581426, iteration: 44198
loss: 1.0534675121307373,grad_norm: 0.9999993209235584, iteration: 44199
loss: 1.00357985496521,grad_norm: 0.9090708772912539, iteration: 44200
loss: 1.0398378372192383,grad_norm: 0.8963094433723592, iteration: 44201
loss: 0.9598224759101868,grad_norm: 0.7788430982284115, iteration: 44202
loss: 1.0338462591171265,grad_norm: 0.9999992050337551, iteration: 44203
loss: 0.9859597086906433,grad_norm: 0.9260927218906079, iteration: 44204
loss: 1.0258620977401733,grad_norm: 0.9999990492013394, iteration: 44205
loss: 0.9997974634170532,grad_norm: 0.8900033452580444, iteration: 44206
loss: 0.9738126397132874,grad_norm: 0.9439765331265628, iteration: 44207
loss: 1.009980320930481,grad_norm: 0.9999996897031133, iteration: 44208
loss: 1.0625077486038208,grad_norm: 0.999999243461828, iteration: 44209
loss: 1.0308048725128174,grad_norm: 0.9731885969149767, iteration: 44210
loss: 1.0467332601547241,grad_norm: 0.8228979539210062, iteration: 44211
loss: 1.001631736755371,grad_norm: 0.9999990559544005, iteration: 44212
loss: 1.0013606548309326,grad_norm: 0.9999993133753371, iteration: 44213
loss: 1.060707449913025,grad_norm: 0.9999997227566286, iteration: 44214
loss: 1.00692880153656,grad_norm: 0.8558066828906785, iteration: 44215
loss: 0.9886754751205444,grad_norm: 0.9999995853196029, iteration: 44216
loss: 1.016073226928711,grad_norm: 0.9999991165523264, iteration: 44217
loss: 1.0195449590682983,grad_norm: 0.9303949296455389, iteration: 44218
loss: 0.965854823589325,grad_norm: 0.9108817782377789, iteration: 44219
loss: 1.0170737504959106,grad_norm: 0.9999990382113234, iteration: 44220
loss: 1.0391796827316284,grad_norm: 0.9341740691719558, iteration: 44221
loss: 1.0262787342071533,grad_norm: 0.9999994252574741, iteration: 44222
loss: 0.9555037021636963,grad_norm: 0.9999993233925645, iteration: 44223
loss: 1.0333079099655151,grad_norm: 0.9999998556117936, iteration: 44224
loss: 1.1176756620407104,grad_norm: 0.9999999019201739, iteration: 44225
loss: 0.9847348928451538,grad_norm: 0.8923905931550017, iteration: 44226
loss: 0.9918600916862488,grad_norm: 0.9999991754426797, iteration: 44227
loss: 1.0111353397369385,grad_norm: 0.9861088034769571, iteration: 44228
loss: 1.0116078853607178,grad_norm: 0.9999998210174349, iteration: 44229
loss: 1.02888822555542,grad_norm: 0.8464616968739354, iteration: 44230
loss: 0.9760069251060486,grad_norm: 0.7756390306340671, iteration: 44231
loss: 0.9649429321289062,grad_norm: 0.8917595743778942, iteration: 44232
loss: 1.044378399848938,grad_norm: 0.9999997198649562, iteration: 44233
loss: 1.0122411251068115,grad_norm: 0.9999989855393583, iteration: 44234
loss: 1.0055184364318848,grad_norm: 0.9999991056207138, iteration: 44235
loss: 1.0026664733886719,grad_norm: 0.7889041089977763, iteration: 44236
loss: 1.0450769662857056,grad_norm: 0.9999994673805678, iteration: 44237
loss: 0.9676728844642639,grad_norm: 0.8860939539014923, iteration: 44238
loss: 1.028087854385376,grad_norm: 0.9715798012655311, iteration: 44239
loss: 1.0068150758743286,grad_norm: 0.8735058796408467, iteration: 44240
loss: 0.9536041617393494,grad_norm: 0.9999992034368592, iteration: 44241
loss: 1.004813313484192,grad_norm: 0.9999991390359458, iteration: 44242
loss: 1.0400738716125488,grad_norm: 0.999999521197903, iteration: 44243
loss: 1.0415089130401611,grad_norm: 0.9739437922849933, iteration: 44244
loss: 0.9812669157981873,grad_norm: 0.8618576649020853, iteration: 44245
loss: 1.00813627243042,grad_norm: 0.9999998479895771, iteration: 44246
loss: 1.038257360458374,grad_norm: 0.9999991985391591, iteration: 44247
loss: 0.9829705357551575,grad_norm: 0.7358386195926013, iteration: 44248
loss: 1.0057958364486694,grad_norm: 0.9999992291590571, iteration: 44249
loss: 1.0595753192901611,grad_norm: 0.9999996264209082, iteration: 44250
loss: 1.003914713859558,grad_norm: 0.9999990648244751, iteration: 44251
loss: 0.9980480670928955,grad_norm: 0.8876634639488873, iteration: 44252
loss: 0.9694236516952515,grad_norm: 0.9101521591372228, iteration: 44253
loss: 0.9475065469741821,grad_norm: 0.9999992154024879, iteration: 44254
loss: 1.0094155073165894,grad_norm: 0.8555770457804774, iteration: 44255
loss: 1.0262174606323242,grad_norm: 0.9999990025852676, iteration: 44256
loss: 1.0288761854171753,grad_norm: 0.9999993014219359, iteration: 44257
loss: 1.00267493724823,grad_norm: 0.9255464846735444, iteration: 44258
loss: 1.00359308719635,grad_norm: 0.9999990021501578, iteration: 44259
loss: 1.0592082738876343,grad_norm: 0.999999383645622, iteration: 44260
loss: 1.0502020120620728,grad_norm: 0.9999991262616427, iteration: 44261
loss: 1.0202422142028809,grad_norm: 0.9999991228763001, iteration: 44262
loss: 1.0725167989730835,grad_norm: 0.9999992174228335, iteration: 44263
loss: 1.0571602582931519,grad_norm: 0.9999996980201589, iteration: 44264
loss: 0.9826555252075195,grad_norm: 0.9542887518074994, iteration: 44265
loss: 0.9932584762573242,grad_norm: 0.7968933919070852, iteration: 44266
loss: 0.9490286111831665,grad_norm: 0.9308393362291323, iteration: 44267
loss: 1.09520423412323,grad_norm: 0.9999999260014373, iteration: 44268
loss: 1.0361768007278442,grad_norm: 0.9999992544437775, iteration: 44269
loss: 1.075674057006836,grad_norm: 0.9999995426293288, iteration: 44270
loss: 1.0084102153778076,grad_norm: 0.8804893581278703, iteration: 44271
loss: 1.015275239944458,grad_norm: 0.999999308729523, iteration: 44272
loss: 0.9836680889129639,grad_norm: 0.9999992191133686, iteration: 44273
loss: 1.02159583568573,grad_norm: 0.9999994949096866, iteration: 44274
loss: 0.961581826210022,grad_norm: 0.8245639701330234, iteration: 44275
loss: 1.0238518714904785,grad_norm: 0.7106746867587813, iteration: 44276
loss: 1.0755019187927246,grad_norm: 0.9999991304885553, iteration: 44277
loss: 1.0191121101379395,grad_norm: 0.9834999150242669, iteration: 44278
loss: 0.9946901202201843,grad_norm: 0.9135601973269895, iteration: 44279
loss: 0.9771920442581177,grad_norm: 0.8477651484016718, iteration: 44280
loss: 0.9980349540710449,grad_norm: 0.9972636400885967, iteration: 44281
loss: 1.0083481073379517,grad_norm: 0.8036719880139337, iteration: 44282
loss: 0.94504314661026,grad_norm: 0.8086516152152707, iteration: 44283
loss: 0.9864389300346375,grad_norm: 0.9999989277695742, iteration: 44284
loss: 1.015578031539917,grad_norm: 0.8797359945538137, iteration: 44285
loss: 0.9996328949928284,grad_norm: 0.9999991590176632, iteration: 44286
loss: 1.010961890220642,grad_norm: 0.9464677884799862, iteration: 44287
loss: 1.0919163227081299,grad_norm: 0.9999999120876326, iteration: 44288
loss: 1.0054856538772583,grad_norm: 0.7018362341246585, iteration: 44289
loss: 1.0263432264328003,grad_norm: 0.9999995921720892, iteration: 44290
loss: 1.0246611833572388,grad_norm: 0.9999990342645556, iteration: 44291
loss: 1.0092394351959229,grad_norm: 0.9283571991285465, iteration: 44292
loss: 0.9883372783660889,grad_norm: 0.8783478574615976, iteration: 44293
loss: 1.0263943672180176,grad_norm: 0.9999991001048152, iteration: 44294
loss: 1.1025991439819336,grad_norm: 0.9999994012257135, iteration: 44295
loss: 0.9942787289619446,grad_norm: 0.9999991617261962, iteration: 44296
loss: 0.9868139624595642,grad_norm: 0.9294162986752506, iteration: 44297
loss: 1.028939127922058,grad_norm: 0.9999996968442846, iteration: 44298
loss: 0.9869731068611145,grad_norm: 0.8080921515767236, iteration: 44299
loss: 1.0106419324874878,grad_norm: 0.9999996639905981, iteration: 44300
loss: 1.0130596160888672,grad_norm: 0.9999991904366382, iteration: 44301
loss: 1.0143630504608154,grad_norm: 0.9924410772562057, iteration: 44302
loss: 1.004030704498291,grad_norm: 0.9999995260439244, iteration: 44303
loss: 1.0375771522521973,grad_norm: 0.8405346704628677, iteration: 44304
loss: 0.9879961013793945,grad_norm: 0.8963820926366647, iteration: 44305
loss: 1.0217251777648926,grad_norm: 0.9859861295802737, iteration: 44306
loss: 1.0161231756210327,grad_norm: 0.9761160428253359, iteration: 44307
loss: 0.9641876220703125,grad_norm: 0.9999990089269749, iteration: 44308
loss: 0.9951651692390442,grad_norm: 0.9516635366306513, iteration: 44309
loss: 1.0230470895767212,grad_norm: 0.9999992127973942, iteration: 44310
loss: 1.007779836654663,grad_norm: 0.9999989879744483, iteration: 44311
loss: 1.101753830909729,grad_norm: 0.9999993883917053, iteration: 44312
loss: 1.0389091968536377,grad_norm: 0.999999119320956, iteration: 44313
loss: 1.0225965976715088,grad_norm: 0.9446720457905293, iteration: 44314
loss: 1.0462666749954224,grad_norm: 0.9999992980354686, iteration: 44315
loss: 0.9857946634292603,grad_norm: 0.9999991546915483, iteration: 44316
loss: 1.0110301971435547,grad_norm: 0.9999991070276705, iteration: 44317
loss: 0.9853575229644775,grad_norm: 0.8964982758564207, iteration: 44318
loss: 1.0140327215194702,grad_norm: 0.8174499066784486, iteration: 44319
loss: 0.9885485768318176,grad_norm: 0.8850745468861292, iteration: 44320
loss: 0.9901408553123474,grad_norm: 0.9075775429031058, iteration: 44321
loss: 1.0428805351257324,grad_norm: 0.8641121969427146, iteration: 44322
loss: 0.9666889905929565,grad_norm: 0.9767144579447046, iteration: 44323
loss: 1.0221996307373047,grad_norm: 0.9999991137273568, iteration: 44324
loss: 0.9907753467559814,grad_norm: 0.9576462765936159, iteration: 44325
loss: 1.0307987928390503,grad_norm: 0.9999991135927425, iteration: 44326
loss: 0.9867782592773438,grad_norm: 0.8402823529976816, iteration: 44327
loss: 1.0573277473449707,grad_norm: 0.9999998185861154, iteration: 44328
loss: 1.0348902940750122,grad_norm: 0.9999994102286095, iteration: 44329
loss: 0.9893155694007874,grad_norm: 0.8081714766622243, iteration: 44330
loss: 0.9635818600654602,grad_norm: 0.981303523171169, iteration: 44331
loss: 1.0165153741836548,grad_norm: 0.9999991340140407, iteration: 44332
loss: 1.0170714855194092,grad_norm: 0.8708849788140023, iteration: 44333
loss: 1.0041019916534424,grad_norm: 0.9999994322485084, iteration: 44334
loss: 1.1122918128967285,grad_norm: 0.9999995770524416, iteration: 44335
loss: 1.0110440254211426,grad_norm: 0.9852711257778903, iteration: 44336
loss: 0.9827396273612976,grad_norm: 0.9999993831576806, iteration: 44337
loss: 0.9686371684074402,grad_norm: 0.9878745669796026, iteration: 44338
loss: 1.0135774612426758,grad_norm: 0.8650000647663622, iteration: 44339
loss: 1.0266691446304321,grad_norm: 0.8917943982286755, iteration: 44340
loss: 1.0430723428726196,grad_norm: 0.9350004844256706, iteration: 44341
loss: 0.9866721630096436,grad_norm: 0.9041510252317492, iteration: 44342
loss: 1.0018523931503296,grad_norm: 0.8403514693863045, iteration: 44343
loss: 1.0246738195419312,grad_norm: 0.9999990323109854, iteration: 44344
loss: 0.9794197678565979,grad_norm: 0.8590294190233586, iteration: 44345
loss: 0.9841676354408264,grad_norm: 0.937906548848959, iteration: 44346
loss: 0.9832586646080017,grad_norm: 0.8130335003293528, iteration: 44347
loss: 0.9939167499542236,grad_norm: 0.9999990509281769, iteration: 44348
loss: 1.0132405757904053,grad_norm: 0.9038438328742888, iteration: 44349
loss: 1.0052447319030762,grad_norm: 0.9999996186665708, iteration: 44350
loss: 0.9530767798423767,grad_norm: 0.8723805508720766, iteration: 44351
loss: 1.01869797706604,grad_norm: 0.9941704803217276, iteration: 44352
loss: 1.0149809122085571,grad_norm: 0.9999991151149142, iteration: 44353
loss: 0.948935329914093,grad_norm: 0.8896142146578901, iteration: 44354
loss: 1.0335237979888916,grad_norm: 0.9027195091084588, iteration: 44355
loss: 1.0799686908721924,grad_norm: 0.9999997703085096, iteration: 44356
loss: 0.9613114595413208,grad_norm: 0.9266749510066834, iteration: 44357
loss: 0.9996665120124817,grad_norm: 0.8979023573004018, iteration: 44358
loss: 0.9843112826347351,grad_norm: 0.9999991296954177, iteration: 44359
loss: 0.9700160026550293,grad_norm: 0.9649203093921231, iteration: 44360
loss: 1.01533043384552,grad_norm: 0.9269011157787416, iteration: 44361
loss: 0.9995246529579163,grad_norm: 0.9999991743197059, iteration: 44362
loss: 1.0211803913116455,grad_norm: 0.8206993388550139, iteration: 44363
loss: 0.9459037184715271,grad_norm: 0.9220675368009446, iteration: 44364
loss: 0.9992701411247253,grad_norm: 0.9999990432527601, iteration: 44365
loss: 0.9471706748008728,grad_norm: 0.9721612640288101, iteration: 44366
loss: 1.0361121892929077,grad_norm: 0.9899904147437203, iteration: 44367
loss: 1.015000581741333,grad_norm: 0.9999990774464513, iteration: 44368
loss: 1.0390832424163818,grad_norm: 0.9999991563169924, iteration: 44369
loss: 1.0685529708862305,grad_norm: 0.9999997489137755, iteration: 44370
loss: 1.0428560972213745,grad_norm: 0.8547592845935307, iteration: 44371
loss: 0.988254189491272,grad_norm: 0.8043050777366607, iteration: 44372
loss: 0.9909486174583435,grad_norm: 0.9150101764892516, iteration: 44373
loss: 1.1031521558761597,grad_norm: 0.9999998948495535, iteration: 44374
loss: 0.9532597064971924,grad_norm: 0.999999116922926, iteration: 44375
loss: 0.9935826659202576,grad_norm: 0.9416859101498496, iteration: 44376
loss: 0.9678107500076294,grad_norm: 0.8793538547014105, iteration: 44377
loss: 1.0195095539093018,grad_norm: 0.9999994731803014, iteration: 44378
loss: 1.0167535543441772,grad_norm: 0.8489824534877168, iteration: 44379
loss: 0.9809300899505615,grad_norm: 0.9200092513521259, iteration: 44380
loss: 1.019896149635315,grad_norm: 0.9999995736206201, iteration: 44381
loss: 0.9989577531814575,grad_norm: 0.9999993291531429, iteration: 44382
loss: 0.9619352221488953,grad_norm: 0.934655152391367, iteration: 44383
loss: 0.9876638054847717,grad_norm: 0.9244416357073775, iteration: 44384
loss: 0.9950445294380188,grad_norm: 0.7376599331413494, iteration: 44385
loss: 1.0531452894210815,grad_norm: 0.9999990704237681, iteration: 44386
loss: 1.0277233123779297,grad_norm: 0.9999997621080094, iteration: 44387
loss: 0.9980418682098389,grad_norm: 0.8247236494337398, iteration: 44388
loss: 0.9862154722213745,grad_norm: 0.9999996655976866, iteration: 44389
loss: 1.0748324394226074,grad_norm: 0.9999997516050098, iteration: 44390
loss: 1.0470404624938965,grad_norm: 0.8376647254250271, iteration: 44391
loss: 1.0358476638793945,grad_norm: 0.9999993686355628, iteration: 44392
loss: 1.0767896175384521,grad_norm: 0.9999996190121916, iteration: 44393
loss: 1.022637128829956,grad_norm: 0.999999643375606, iteration: 44394
loss: 1.0031856298446655,grad_norm: 0.8588731313294342, iteration: 44395
loss: 1.0059964656829834,grad_norm: 0.902340582604084, iteration: 44396
loss: 1.0237751007080078,grad_norm: 0.9999992094263168, iteration: 44397
loss: 0.9730852246284485,grad_norm: 0.9068281097318734, iteration: 44398
loss: 0.995856761932373,grad_norm: 0.9999991962687987, iteration: 44399
loss: 0.9876059889793396,grad_norm: 0.9999995356964886, iteration: 44400
loss: 1.1137746572494507,grad_norm: 0.9999994271412687, iteration: 44401
loss: 1.0146493911743164,grad_norm: 0.9999990738183437, iteration: 44402
loss: 1.0288480520248413,grad_norm: 0.9999997526029113, iteration: 44403
loss: 1.0053784847259521,grad_norm: 0.9793700319506305, iteration: 44404
loss: 1.114067554473877,grad_norm: 0.9999994857194353, iteration: 44405
loss: 0.9612205028533936,grad_norm: 0.999999112681484, iteration: 44406
loss: 0.9951545596122742,grad_norm: 0.999999140595713, iteration: 44407
loss: 1.0219757556915283,grad_norm: 0.9874570711456042, iteration: 44408
loss: 1.024398922920227,grad_norm: 0.987735080953123, iteration: 44409
loss: 0.9907119870185852,grad_norm: 0.9999991840038122, iteration: 44410
loss: 0.9744374752044678,grad_norm: 0.9999990216917188, iteration: 44411
loss: 1.0020183324813843,grad_norm: 0.9999990782553709, iteration: 44412
loss: 1.0355807542800903,grad_norm: 0.9999996070542799, iteration: 44413
loss: 1.4836759567260742,grad_norm: 0.9999997501663694, iteration: 44414
loss: 1.212214469909668,grad_norm: 0.9999998937715115, iteration: 44415
loss: 0.9704990983009338,grad_norm: 0.8222797648398968, iteration: 44416
loss: 1.1224322319030762,grad_norm: 0.9999990653335789, iteration: 44417
loss: 0.985318124294281,grad_norm: 0.9999992004406293, iteration: 44418
loss: 1.0205559730529785,grad_norm: 0.8924006024922475, iteration: 44419
loss: 0.9953624606132507,grad_norm: 0.9999993657401041, iteration: 44420
loss: 1.0205541849136353,grad_norm: 0.8224989742613444, iteration: 44421
loss: 0.9987084269523621,grad_norm: 0.7415190555479078, iteration: 44422
loss: 1.0230478048324585,grad_norm: 0.9999990795609575, iteration: 44423
loss: 0.9986671805381775,grad_norm: 0.9190583099990516, iteration: 44424
loss: 1.025073766708374,grad_norm: 0.9999993734213865, iteration: 44425
loss: 0.9989713430404663,grad_norm: 0.9999997326208482, iteration: 44426
loss: 1.0348761081695557,grad_norm: 0.9999998916422489, iteration: 44427
loss: 1.0220346450805664,grad_norm: 0.9392562851183852, iteration: 44428
loss: 0.9686895608901978,grad_norm: 0.9015276159654155, iteration: 44429
loss: 1.0082480907440186,grad_norm: 0.9216974798666726, iteration: 44430
loss: 1.0195894241333008,grad_norm: 0.9999993070769644, iteration: 44431
loss: 1.0630606412887573,grad_norm: 0.9999993361646613, iteration: 44432
loss: 1.031701922416687,grad_norm: 0.9999992089945932, iteration: 44433
loss: 1.0098237991333008,grad_norm: 0.9999994983075705, iteration: 44434
loss: 1.0162872076034546,grad_norm: 0.824985968105212, iteration: 44435
loss: 1.011556625366211,grad_norm: 0.8097751830986837, iteration: 44436
loss: 1.0247373580932617,grad_norm: 0.9999992454063864, iteration: 44437
loss: 0.985302209854126,grad_norm: 0.9999992064267794, iteration: 44438
loss: 1.016157627105713,grad_norm: 0.870415686547034, iteration: 44439
loss: 1.010076880455017,grad_norm: 0.9343048192884312, iteration: 44440
loss: 0.9931467175483704,grad_norm: 0.999999210581205, iteration: 44441
loss: 1.003740668296814,grad_norm: 0.9308952404415448, iteration: 44442
loss: 1.0447190999984741,grad_norm: 0.999999359069487, iteration: 44443
loss: 1.0161408185958862,grad_norm: 0.9999991522411225, iteration: 44444
loss: 1.0318046808242798,grad_norm: 0.9999994171731377, iteration: 44445
loss: 1.0157288312911987,grad_norm: 0.9959352537432307, iteration: 44446
loss: 0.988979160785675,grad_norm: 0.999999897977042, iteration: 44447
loss: 1.0276902914047241,grad_norm: 0.9999992388551585, iteration: 44448
loss: 0.9875552654266357,grad_norm: 0.8387995327451405, iteration: 44449
loss: 1.068447470664978,grad_norm: 0.9999995941361013, iteration: 44450
loss: 1.0202722549438477,grad_norm: 0.9858038245924592, iteration: 44451
loss: 0.9918314814567566,grad_norm: 0.9999996606349189, iteration: 44452
loss: 0.9938144683837891,grad_norm: 0.9999991335152898, iteration: 44453
loss: 1.0176564455032349,grad_norm: 0.999999279074425, iteration: 44454
loss: 1.026869535446167,grad_norm: 0.9999994249667051, iteration: 44455
loss: 1.0183571577072144,grad_norm: 0.8290364899566419, iteration: 44456
loss: 1.0379536151885986,grad_norm: 0.9688427992929224, iteration: 44457
loss: 1.0163508653640747,grad_norm: 0.9053622766107033, iteration: 44458
loss: 1.0789207220077515,grad_norm: 0.999999620166145, iteration: 44459
loss: 1.0382384061813354,grad_norm: 0.9999990112243244, iteration: 44460
loss: 1.0230001211166382,grad_norm: 0.9999989504435769, iteration: 44461
loss: 1.0186208486557007,grad_norm: 0.9999992516045595, iteration: 44462
loss: 1.0151270627975464,grad_norm: 0.9543100515174612, iteration: 44463
loss: 1.0029250383377075,grad_norm: 0.9426863663510479, iteration: 44464
loss: 1.1620830297470093,grad_norm: 0.99999918249087, iteration: 44465
loss: 1.0289764404296875,grad_norm: 0.9006000575326323, iteration: 44466
loss: 0.9653687477111816,grad_norm: 0.999999490629307, iteration: 44467
loss: 1.0370906591415405,grad_norm: 0.9451294840172264, iteration: 44468
loss: 1.0993787050247192,grad_norm: 0.9999999205206448, iteration: 44469
loss: 1.0003970861434937,grad_norm: 0.999999073100028, iteration: 44470
loss: 1.0224379301071167,grad_norm: 0.8755544053994316, iteration: 44471
loss: 1.049787163734436,grad_norm: 0.9999990803786907, iteration: 44472
loss: 0.97993403673172,grad_norm: 0.983348577205823, iteration: 44473
loss: 1.0204397439956665,grad_norm: 0.965875176618971, iteration: 44474
loss: 1.0377658605575562,grad_norm: 0.9999993668819397, iteration: 44475
loss: 1.0091310739517212,grad_norm: 0.829844348935914, iteration: 44476
loss: 1.0303354263305664,grad_norm: 0.9999990851547345, iteration: 44477
loss: 1.0310869216918945,grad_norm: 0.9999990679517726, iteration: 44478
loss: 1.023937702178955,grad_norm: 0.9999997978150386, iteration: 44479
loss: 1.0459309816360474,grad_norm: 0.9999990729999967, iteration: 44480
loss: 1.0106831789016724,grad_norm: 0.85009756617354, iteration: 44481
loss: 1.0226449966430664,grad_norm: 0.9999990328159077, iteration: 44482
loss: 1.002200722694397,grad_norm: 0.8655198123980936, iteration: 44483
loss: 1.0521184206008911,grad_norm: 0.9999990280910621, iteration: 44484
loss: 0.9845613241195679,grad_norm: 0.9107305513821502, iteration: 44485
loss: 0.9843528270721436,grad_norm: 0.9999993104113685, iteration: 44486
loss: 1.018480896949768,grad_norm: 0.999999172370856, iteration: 44487
loss: 1.032149076461792,grad_norm: 0.9999994133044443, iteration: 44488
loss: 0.9875378608703613,grad_norm: 0.780720995099694, iteration: 44489
loss: 1.001203179359436,grad_norm: 0.9999992968568452, iteration: 44490
loss: 1.0030546188354492,grad_norm: 0.9044999782579118, iteration: 44491
loss: 1.0234099626541138,grad_norm: 0.9999990665402338, iteration: 44492
loss: 0.993036150932312,grad_norm: 0.9999990890415652, iteration: 44493
loss: 0.9901657700538635,grad_norm: 0.9999992984385656, iteration: 44494
loss: 1.0121753215789795,grad_norm: 0.9131011739353123, iteration: 44495
loss: 1.0303422212600708,grad_norm: 0.9999990989168297, iteration: 44496
loss: 1.022318959236145,grad_norm: 0.8846417392086758, iteration: 44497
loss: 1.0403274297714233,grad_norm: 0.999999129619649, iteration: 44498
loss: 0.9658581018447876,grad_norm: 0.865226673864234, iteration: 44499
loss: 1.0127511024475098,grad_norm: 0.8068254597651888, iteration: 44500
loss: 0.989375114440918,grad_norm: 0.999999652142854, iteration: 44501
loss: 1.0250364542007446,grad_norm: 0.9999992628589646, iteration: 44502
loss: 1.0140881538391113,grad_norm: 0.8476434277507312, iteration: 44503
loss: 1.0524487495422363,grad_norm: 0.999999130130849, iteration: 44504
loss: 1.017988681793213,grad_norm: 0.9999990235609855, iteration: 44505
loss: 1.0505422353744507,grad_norm: 0.9999996510172254, iteration: 44506
loss: 1.0087355375289917,grad_norm: 0.9665319243208775, iteration: 44507
loss: 1.0232090950012207,grad_norm: 0.9999990866463273, iteration: 44508
loss: 1.0155324935913086,grad_norm: 0.975703753184155, iteration: 44509
loss: 1.012858271598816,grad_norm: 0.9999990838883287, iteration: 44510
loss: 1.0927348136901855,grad_norm: 0.9999991973322583, iteration: 44511
loss: 0.9979246854782104,grad_norm: 0.9703306556532841, iteration: 44512
loss: 1.013975739479065,grad_norm: 0.9999990838655571, iteration: 44513
loss: 1.013387680053711,grad_norm: 0.8671216650117495, iteration: 44514
loss: 1.0401690006256104,grad_norm: 0.9266575314118314, iteration: 44515
loss: 1.0004301071166992,grad_norm: 0.9308596692987777, iteration: 44516
loss: 1.012288212776184,grad_norm: 0.9999995184717065, iteration: 44517
loss: 1.0206760168075562,grad_norm: 0.9999991197446855, iteration: 44518
loss: 0.9951203465461731,grad_norm: 0.8187453160798914, iteration: 44519
loss: 1.0353449583053589,grad_norm: 0.9943607277689105, iteration: 44520
loss: 1.032728910446167,grad_norm: 0.9230914940162063, iteration: 44521
loss: 1.0058296918869019,grad_norm: 0.9999995487683296, iteration: 44522
loss: 1.017414927482605,grad_norm: 0.9007359936737842, iteration: 44523
loss: 1.0262644290924072,grad_norm: 0.9235718626566268, iteration: 44524
loss: 1.0337743759155273,grad_norm: 0.9999993101590036, iteration: 44525
loss: 1.0435832738876343,grad_norm: 0.9999993102613629, iteration: 44526
loss: 0.977122962474823,grad_norm: 0.999999072271041, iteration: 44527
loss: 0.992143988609314,grad_norm: 0.8681402697038473, iteration: 44528
loss: 0.9828306436538696,grad_norm: 0.756300055983145, iteration: 44529
loss: 1.0226751565933228,grad_norm: 0.9999995466985928, iteration: 44530
loss: 0.9974859356880188,grad_norm: 0.8129075710406486, iteration: 44531
loss: 1.0297267436981201,grad_norm: 0.9999993208817596, iteration: 44532
loss: 1.0450947284698486,grad_norm: 0.8977068382086977, iteration: 44533
loss: 1.0033766031265259,grad_norm: 0.9263554044180422, iteration: 44534
loss: 1.0277984142303467,grad_norm: 0.9999989769275935, iteration: 44535
loss: 1.0760701894760132,grad_norm: 0.9999995438346905, iteration: 44536
loss: 1.039281964302063,grad_norm: 0.9999992016702127, iteration: 44537
loss: 0.9882423877716064,grad_norm: 0.9859674729850805, iteration: 44538
loss: 0.9983859062194824,grad_norm: 0.9999991590068726, iteration: 44539
loss: 1.0174548625946045,grad_norm: 0.9999998411948133, iteration: 44540
loss: 0.98155677318573,grad_norm: 0.7568557629038418, iteration: 44541
loss: 1.0162657499313354,grad_norm: 0.8798968060242013, iteration: 44542
loss: 1.0167572498321533,grad_norm: 0.9999994021673956, iteration: 44543
loss: 0.9571922421455383,grad_norm: 0.832079275174564, iteration: 44544
loss: 1.0663777589797974,grad_norm: 0.999999693536902, iteration: 44545
loss: 1.0077623128890991,grad_norm: 0.9999991443995744, iteration: 44546
loss: 1.0004663467407227,grad_norm: 0.9999990741233963, iteration: 44547
loss: 0.9855408072471619,grad_norm: 0.9999992720321882, iteration: 44548
loss: 1.0067306756973267,grad_norm: 0.9600123493806179, iteration: 44549
loss: 1.0244349241256714,grad_norm: 0.9999991000711328, iteration: 44550
loss: 1.0027529001235962,grad_norm: 0.9999996957462942, iteration: 44551
loss: 1.0040779113769531,grad_norm: 0.8088499088773263, iteration: 44552
loss: 0.9857801795005798,grad_norm: 0.9551045065663953, iteration: 44553
loss: 0.9773248434066772,grad_norm: 0.9999990323944161, iteration: 44554
loss: 0.98267662525177,grad_norm: 0.9999994551416048, iteration: 44555
loss: 1.0060908794403076,grad_norm: 0.9078206867217964, iteration: 44556
loss: 1.0098931789398193,grad_norm: 0.9999992011070651, iteration: 44557
loss: 1.0248156785964966,grad_norm: 0.9617802906817904, iteration: 44558
loss: 1.0076388120651245,grad_norm: 0.9492646118599062, iteration: 44559
loss: 0.9479811787605286,grad_norm: 0.855657380636408, iteration: 44560
loss: 0.9886474013328552,grad_norm: 0.9999992168639461, iteration: 44561
loss: 1.0161738395690918,grad_norm: 0.9999995674500961, iteration: 44562
loss: 0.9749369025230408,grad_norm: 0.9659825009039767, iteration: 44563
loss: 0.9749801158905029,grad_norm: 0.8636486397132276, iteration: 44564
loss: 1.0105340480804443,grad_norm: 0.9999991820939483, iteration: 44565
loss: 0.9823472499847412,grad_norm: 0.8580198953430156, iteration: 44566
loss: 1.029388427734375,grad_norm: 0.8712290142387237, iteration: 44567
loss: 0.9712113738059998,grad_norm: 0.9356447871318277, iteration: 44568
loss: 1.0048913955688477,grad_norm: 0.9999992487403312, iteration: 44569
loss: 0.9862728118896484,grad_norm: 0.9999991061176544, iteration: 44570
loss: 1.0205775499343872,grad_norm: 0.9806910211488609, iteration: 44571
loss: 1.0018378496170044,grad_norm: 0.9999991847324726, iteration: 44572
loss: 1.0300753116607666,grad_norm: 0.9049762792941776, iteration: 44573
loss: 1.0123249292373657,grad_norm: 0.8465695059579461, iteration: 44574
loss: 1.0077608823776245,grad_norm: 0.9557550760435328, iteration: 44575
loss: 1.036930799484253,grad_norm: 0.9116616974929143, iteration: 44576
loss: 1.024055004119873,grad_norm: 0.9999992672004643, iteration: 44577
loss: 0.9915948510169983,grad_norm: 0.8561469800511364, iteration: 44578
loss: 0.9381119012832642,grad_norm: 0.934718086422091, iteration: 44579
loss: 0.9948400259017944,grad_norm: 0.9087447899193806, iteration: 44580
loss: 0.997988760471344,grad_norm: 0.8592814270344802, iteration: 44581
loss: 1.040706992149353,grad_norm: 0.9999994079985794, iteration: 44582
loss: 1.0175232887268066,grad_norm: 0.9999991580127867, iteration: 44583
loss: 0.9999589920043945,grad_norm: 0.9468304858093154, iteration: 44584
loss: 0.9605432152748108,grad_norm: 0.9206078447527446, iteration: 44585
loss: 1.051203727722168,grad_norm: 0.9999992684764475, iteration: 44586
loss: 1.0360544919967651,grad_norm: 0.9999991206007992, iteration: 44587
loss: 1.036651849746704,grad_norm: 0.9542384189728682, iteration: 44588
loss: 1.0049983263015747,grad_norm: 0.8112435834993957, iteration: 44589
loss: 1.0328174829483032,grad_norm: 0.9336592968451709, iteration: 44590
loss: 1.0138540267944336,grad_norm: 0.9999993662907354, iteration: 44591
loss: 1.0193408727645874,grad_norm: 0.8656367248837399, iteration: 44592
loss: 1.040238618850708,grad_norm: 0.9999993348396504, iteration: 44593
loss: 1.0128978490829468,grad_norm: 0.9999991631152608, iteration: 44594
loss: 1.0434812307357788,grad_norm: 0.9999991583056947, iteration: 44595
loss: 0.9628674983978271,grad_norm: 0.8527174925952665, iteration: 44596
loss: 1.012860655784607,grad_norm: 0.9999996241530598, iteration: 44597
loss: 0.9949732422828674,grad_norm: 0.9999990057768453, iteration: 44598
loss: 0.9872317910194397,grad_norm: 0.9999990146897726, iteration: 44599
loss: 0.9722194075584412,grad_norm: 0.9999995076025234, iteration: 44600
loss: 1.0407685041427612,grad_norm: 0.8575616372066233, iteration: 44601
loss: 0.9962844252586365,grad_norm: 0.9999994427520927, iteration: 44602
loss: 1.0352157354354858,grad_norm: 0.9999991931152934, iteration: 44603
loss: 0.9920310974121094,grad_norm: 0.9999994059024173, iteration: 44604
loss: 0.9999498128890991,grad_norm: 0.8865393293321867, iteration: 44605
loss: 1.0651555061340332,grad_norm: 0.9999993543942656, iteration: 44606
loss: 0.9902265071868896,grad_norm: 0.999999067615553, iteration: 44607
loss: 1.0712246894836426,grad_norm: 0.9999994213277509, iteration: 44608
loss: 0.992350697517395,grad_norm: 0.9999999832376355, iteration: 44609
loss: 1.010830283164978,grad_norm: 0.9999991673199657, iteration: 44610
loss: 1.0295562744140625,grad_norm: 0.9999990232662495, iteration: 44611
loss: 1.032310962677002,grad_norm: 0.9999997122809228, iteration: 44612
loss: 0.9990843534469604,grad_norm: 0.8042864639677002, iteration: 44613
loss: 1.0398199558258057,grad_norm: 0.9999990664815304, iteration: 44614
loss: 1.0232408046722412,grad_norm: 0.9999992558035355, iteration: 44615
loss: 1.0264405012130737,grad_norm: 0.9999996608960808, iteration: 44616
loss: 1.000930905342102,grad_norm: 0.9999989818512194, iteration: 44617
loss: 1.0122534036636353,grad_norm: 0.916572594060036, iteration: 44618
loss: 1.0073580741882324,grad_norm: 0.9785648529135396, iteration: 44619
loss: 1.0130711793899536,grad_norm: 0.9938778229028155, iteration: 44620
loss: 1.0375224351882935,grad_norm: 0.9999994868419589, iteration: 44621
loss: 1.0266835689544678,grad_norm: 0.9999992285513931, iteration: 44622
loss: 1.0202534198760986,grad_norm: 0.9929620545587753, iteration: 44623
loss: 1.0178143978118896,grad_norm: 0.9999989817772015, iteration: 44624
loss: 1.0082963705062866,grad_norm: 0.7407631624814243, iteration: 44625
loss: 0.9746573567390442,grad_norm: 0.9999991443096325, iteration: 44626
loss: 1.0157588720321655,grad_norm: 0.9999996885320291, iteration: 44627
loss: 1.0160449743270874,grad_norm: 0.999999299745942, iteration: 44628
loss: 1.0384175777435303,grad_norm: 0.8807477971427874, iteration: 44629
loss: 1.0193430185317993,grad_norm: 0.9999990191873266, iteration: 44630
loss: 0.9864408373832703,grad_norm: 0.893732350444985, iteration: 44631
loss: 0.9806991219520569,grad_norm: 0.9636481093535542, iteration: 44632
loss: 1.0171924829483032,grad_norm: 0.9999996434476249, iteration: 44633
loss: 1.0090057849884033,grad_norm: 0.8321457081909622, iteration: 44634
loss: 1.0011670589447021,grad_norm: 0.9999992733828795, iteration: 44635
loss: 1.0086685419082642,grad_norm: 0.9999994479110851, iteration: 44636
loss: 1.0381686687469482,grad_norm: 0.9999994505682845, iteration: 44637
loss: 1.0331249237060547,grad_norm: 0.9999990308705474, iteration: 44638
loss: 0.9847778081893921,grad_norm: 0.8847526721928266, iteration: 44639
loss: 1.0043936967849731,grad_norm: 0.9999991137464674, iteration: 44640
loss: 1.024001121520996,grad_norm: 0.9999992162964705, iteration: 44641
loss: 0.9989287853240967,grad_norm: 0.7608758603764507, iteration: 44642
loss: 1.01028311252594,grad_norm: 0.9444301234860477, iteration: 44643
loss: 0.9989209771156311,grad_norm: 0.9999989826643005, iteration: 44644
loss: 1.0274808406829834,grad_norm: 0.8464310399872884, iteration: 44645
loss: 1.00571870803833,grad_norm: 0.9999996965183932, iteration: 44646
loss: 1.0651284456253052,grad_norm: 0.9999995225094435, iteration: 44647
loss: 0.9993419051170349,grad_norm: 0.9999992376445933, iteration: 44648
loss: 1.0225841999053955,grad_norm: 0.9568465791676397, iteration: 44649
loss: 1.020728349685669,grad_norm: 0.9999990297869531, iteration: 44650
loss: 1.015063762664795,grad_norm: 0.9999994392363956, iteration: 44651
loss: 1.096704125404358,grad_norm: 0.8629062996257211, iteration: 44652
loss: 1.0020256042480469,grad_norm: 0.8906731096227707, iteration: 44653
loss: 1.0599305629730225,grad_norm: 0.9999998267720737, iteration: 44654
loss: 0.9948291778564453,grad_norm: 0.8798723713221243, iteration: 44655
loss: 1.018620252609253,grad_norm: 0.7881070610895418, iteration: 44656
loss: 0.9997566938400269,grad_norm: 0.9999998070806604, iteration: 44657
loss: 1.0220471620559692,grad_norm: 0.9999990042212611, iteration: 44658
loss: 0.9994962811470032,grad_norm: 0.9999991434781332, iteration: 44659
loss: 1.0422956943511963,grad_norm: 0.9999993123999897, iteration: 44660
loss: 1.0167174339294434,grad_norm: 0.8937285574263192, iteration: 44661
loss: 0.9935749769210815,grad_norm: 0.9021465311926404, iteration: 44662
loss: 1.0397491455078125,grad_norm: 0.9999995673528764, iteration: 44663
loss: 1.0175784826278687,grad_norm: 0.9506857232860876, iteration: 44664
loss: 0.9773725271224976,grad_norm: 0.9999990144983159, iteration: 44665
loss: 1.0000897645950317,grad_norm: 0.7729234484670505, iteration: 44666
loss: 0.9993427395820618,grad_norm: 0.9847029218169243, iteration: 44667
loss: 1.0287830829620361,grad_norm: 0.9999990345914621, iteration: 44668
loss: 1.08345627784729,grad_norm: 0.9999992234263887, iteration: 44669
loss: 1.0961953401565552,grad_norm: 0.9999995752659148, iteration: 44670
loss: 1.0101041793823242,grad_norm: 0.9999993115047888, iteration: 44671
loss: 0.9998655319213867,grad_norm: 0.7824502501631265, iteration: 44672
loss: 1.0369107723236084,grad_norm: 0.9999995660520863, iteration: 44673
loss: 1.0219241380691528,grad_norm: 0.9999992971955792, iteration: 44674
loss: 1.007468819618225,grad_norm: 0.9999990545553499, iteration: 44675
loss: 0.9816953539848328,grad_norm: 0.8708005507353968, iteration: 44676
loss: 1.0202349424362183,grad_norm: 0.9246901908634076, iteration: 44677
loss: 0.9977468252182007,grad_norm: 0.9999991632517633, iteration: 44678
loss: 1.0023109912872314,grad_norm: 0.999999037618395, iteration: 44679
loss: 1.0281926393508911,grad_norm: 0.8491773344979794, iteration: 44680
loss: 1.0778430700302124,grad_norm: 0.9999991737164295, iteration: 44681
loss: 0.9417656660079956,grad_norm: 0.995425096223512, iteration: 44682
loss: 0.9864257574081421,grad_norm: 0.8039874226171594, iteration: 44683
loss: 1.0232744216918945,grad_norm: 0.9999992705572534, iteration: 44684
loss: 1.0154985189437866,grad_norm: 0.9999991923644916, iteration: 44685
loss: 1.069928526878357,grad_norm: 0.9999999067350938, iteration: 44686
loss: 1.03347909450531,grad_norm: 0.8905715741391513, iteration: 44687
loss: 1.0238447189331055,grad_norm: 0.7533792322531613, iteration: 44688
loss: 1.016985535621643,grad_norm: 0.938564789292321, iteration: 44689
loss: 1.0002647638320923,grad_norm: 0.9551220205340738, iteration: 44690
loss: 0.9662148952484131,grad_norm: 0.9234700129017712, iteration: 44691
loss: 1.0480984449386597,grad_norm: 0.9999997760308817, iteration: 44692
loss: 1.0074653625488281,grad_norm: 0.7040120023512421, iteration: 44693
loss: 1.057963490486145,grad_norm: 0.885051487494847, iteration: 44694
loss: 1.0380593538284302,grad_norm: 0.9999990674341773, iteration: 44695
loss: 0.9947583079338074,grad_norm: 0.999999181189334, iteration: 44696
loss: 0.9524618983268738,grad_norm: 0.8730312658155298, iteration: 44697
loss: 1.0083861351013184,grad_norm: 0.9999996142606493, iteration: 44698
loss: 1.0199075937271118,grad_norm: 0.9999992692125937, iteration: 44699
loss: 1.0276650190353394,grad_norm: 0.9999996205059135, iteration: 44700
loss: 1.0360125303268433,grad_norm: 0.9999990535172665, iteration: 44701
loss: 1.0750807523727417,grad_norm: 0.9999991550649947, iteration: 44702
loss: 1.0080918073654175,grad_norm: 0.9999990758500298, iteration: 44703
loss: 1.0872840881347656,grad_norm: 0.9999998933291163, iteration: 44704
loss: 1.0425690412521362,grad_norm: 0.999999215165345, iteration: 44705
loss: 1.0124330520629883,grad_norm: 0.9999990060677184, iteration: 44706
loss: 1.0052387714385986,grad_norm: 0.9189077668188352, iteration: 44707
loss: 1.3244664669036865,grad_norm: 0.9999995472714073, iteration: 44708
loss: 0.9894004464149475,grad_norm: 0.9999995338461117, iteration: 44709
loss: 1.0004220008850098,grad_norm: 0.9999991844690248, iteration: 44710
loss: 1.0172173976898193,grad_norm: 0.730965424026692, iteration: 44711
loss: 1.0209250450134277,grad_norm: 0.9911702402319916, iteration: 44712
loss: 1.0461854934692383,grad_norm: 0.9999990445436678, iteration: 44713
loss: 1.0907217264175415,grad_norm: 0.9999993068838656, iteration: 44714
loss: 1.0724067687988281,grad_norm: 0.9999992594332422, iteration: 44715
loss: 0.9896316528320312,grad_norm: 0.999998895695769, iteration: 44716
loss: 1.0594245195388794,grad_norm: 0.9999997961974552, iteration: 44717
loss: 0.9942732453346252,grad_norm: 0.9917240153081258, iteration: 44718
loss: 0.9806908369064331,grad_norm: 0.8283868310106988, iteration: 44719
loss: 0.995104968547821,grad_norm: 0.9502508069762268, iteration: 44720
loss: 1.02845299243927,grad_norm: 0.9999990548615092, iteration: 44721
loss: 0.9892529249191284,grad_norm: 0.8664638749076033, iteration: 44722
loss: 1.014498233795166,grad_norm: 0.9999990056073004, iteration: 44723
loss: 1.0032371282577515,grad_norm: 0.8715163743217494, iteration: 44724
loss: 1.026448369026184,grad_norm: 0.99999929475947, iteration: 44725
loss: 1.04109525680542,grad_norm: 0.9999993594681635, iteration: 44726
loss: 1.0630782842636108,grad_norm: 0.9999998634075852, iteration: 44727
loss: 0.9872423410415649,grad_norm: 0.9999990876809142, iteration: 44728
loss: 1.0337488651275635,grad_norm: 0.9999990362059423, iteration: 44729
loss: 1.0303460359573364,grad_norm: 0.9999999640106991, iteration: 44730
loss: 1.048197865486145,grad_norm: 0.9999992218611707, iteration: 44731
loss: 1.0355151891708374,grad_norm: 0.9974421091089263, iteration: 44732
loss: 0.9924431443214417,grad_norm: 0.9405451174905481, iteration: 44733
loss: 1.0054441690444946,grad_norm: 0.8632737131082625, iteration: 44734
loss: 1.1036244630813599,grad_norm: 0.9999991244838358, iteration: 44735
loss: 1.0026096105575562,grad_norm: 0.9999993361043128, iteration: 44736
loss: 0.9895861148834229,grad_norm: 0.9999998296825405, iteration: 44737
loss: 1.0170707702636719,grad_norm: 0.9406214034555108, iteration: 44738
loss: 0.9883177280426025,grad_norm: 0.9492100583205744, iteration: 44739
loss: 1.0048080682754517,grad_norm: 0.8713471700062891, iteration: 44740
loss: 1.120157241821289,grad_norm: 0.999999770212125, iteration: 44741
loss: 1.0086016654968262,grad_norm: 0.934260851542611, iteration: 44742
loss: 0.998951256275177,grad_norm: 0.999999330129047, iteration: 44743
loss: 1.0911818742752075,grad_norm: 0.9999997674367429, iteration: 44744
loss: 1.0817797183990479,grad_norm: 0.9999994943658348, iteration: 44745
loss: 1.0407086610794067,grad_norm: 0.9795097535109906, iteration: 44746
loss: 1.0787286758422852,grad_norm: 0.9999993262144821, iteration: 44747
loss: 1.0059419870376587,grad_norm: 0.9854139843081631, iteration: 44748
loss: 1.145353078842163,grad_norm: 0.9999999464674943, iteration: 44749
loss: 0.9659258127212524,grad_norm: 0.9900973478686873, iteration: 44750
loss: 1.0090357065200806,grad_norm: 0.9999991866731963, iteration: 44751
loss: 1.0238219499588013,grad_norm: 0.8855381589391197, iteration: 44752
loss: 1.038427472114563,grad_norm: 0.9999992416814824, iteration: 44753
loss: 1.0081110000610352,grad_norm: 0.9999991905200132, iteration: 44754
loss: 1.0043600797653198,grad_norm: 0.9999991897707607, iteration: 44755
loss: 1.0731674432754517,grad_norm: 0.9999994854528444, iteration: 44756
loss: 0.9964596033096313,grad_norm: 0.9948353375319066, iteration: 44757
loss: 0.9568737149238586,grad_norm: 0.8231086996622223, iteration: 44758
loss: 1.0260263681411743,grad_norm: 0.9999991527013571, iteration: 44759
loss: 1.0343612432479858,grad_norm: 0.9999990380416679, iteration: 44760
loss: 1.0294547080993652,grad_norm: 0.999999895138891, iteration: 44761
loss: 1.0332783460617065,grad_norm: 0.9704863357142295, iteration: 44762
loss: 1.0107165575027466,grad_norm: 0.9999991797952273, iteration: 44763
loss: 1.0213226079940796,grad_norm: 0.942462986914614, iteration: 44764
loss: 0.9945273399353027,grad_norm: 0.9777847311221829, iteration: 44765
loss: 0.9814913272857666,grad_norm: 0.9999991744585093, iteration: 44766
loss: 1.0523382425308228,grad_norm: 0.9999992729442189, iteration: 44767
loss: 1.008530855178833,grad_norm: 0.9883384564742217, iteration: 44768
loss: 1.018241286277771,grad_norm: 0.9418183920107106, iteration: 44769
loss: 1.0144306421279907,grad_norm: 0.9999991798367148, iteration: 44770
loss: 1.0048472881317139,grad_norm: 0.9999989693443679, iteration: 44771
loss: 1.053328037261963,grad_norm: 0.9863794232653571, iteration: 44772
loss: 1.0516304969787598,grad_norm: 0.9999991712235204, iteration: 44773
loss: 1.0201704502105713,grad_norm: 0.9287074446634931, iteration: 44774
loss: 0.9996036291122437,grad_norm: 0.9999993721659943, iteration: 44775
loss: 1.1267706155776978,grad_norm: 0.9999998954985713, iteration: 44776
loss: 1.0599820613861084,grad_norm: 0.9999991836168773, iteration: 44777
loss: 1.0113130807876587,grad_norm: 0.9999990359532707, iteration: 44778
loss: 1.0251028537750244,grad_norm: 0.9658184111486061, iteration: 44779
loss: 0.9736002087593079,grad_norm: 0.8092709360950276, iteration: 44780
loss: 1.012651801109314,grad_norm: 0.9999993318174262, iteration: 44781
loss: 1.1547307968139648,grad_norm: 0.9999991710558517, iteration: 44782
loss: 1.110152244567871,grad_norm: 0.9999999332146552, iteration: 44783
loss: 1.0791199207305908,grad_norm: 0.999999243822494, iteration: 44784
loss: 0.9917792081832886,grad_norm: 0.8836665667393424, iteration: 44785
loss: 1.0190445184707642,grad_norm: 0.9999997791439738, iteration: 44786
loss: 0.9883958697319031,grad_norm: 0.8501996062307796, iteration: 44787
loss: 1.0054981708526611,grad_norm: 0.9999993327945831, iteration: 44788
loss: 0.9777683615684509,grad_norm: 0.7808506825546021, iteration: 44789
loss: 1.1511152982711792,grad_norm: 0.9999995577752634, iteration: 44790
loss: 1.1048600673675537,grad_norm: 0.9999997287120956, iteration: 44791
loss: 1.0897639989852905,grad_norm: 0.999999551085554, iteration: 44792
loss: 1.2629016637802124,grad_norm: 0.9999997057361526, iteration: 44793
loss: 1.1632815599441528,grad_norm: 0.9999997028710209, iteration: 44794
loss: 1.1526073217391968,grad_norm: 0.999999328045723, iteration: 44795
loss: 0.9896925091743469,grad_norm: 0.9999991511251619, iteration: 44796
loss: 1.022432565689087,grad_norm: 0.9897801809783429, iteration: 44797
loss: 1.012248158454895,grad_norm: 0.9999990249277604, iteration: 44798
loss: 0.992676317691803,grad_norm: 0.9836031593451412, iteration: 44799
loss: 1.0568903684616089,grad_norm: 0.9434701899405824, iteration: 44800
loss: 1.0050678253173828,grad_norm: 0.9109080305544995, iteration: 44801
loss: 1.017543911933899,grad_norm: 0.9999993260454227, iteration: 44802
loss: 1.0371177196502686,grad_norm: 0.9999996982939354, iteration: 44803
loss: 1.0317128896713257,grad_norm: 0.9999997233904288, iteration: 44804
loss: 1.0206897258758545,grad_norm: 0.9999990349067682, iteration: 44805
loss: 1.1747617721557617,grad_norm: 0.9999998540324034, iteration: 44806
loss: 1.0137135982513428,grad_norm: 0.8582832806146241, iteration: 44807
loss: 1.0461007356643677,grad_norm: 0.999999724655208, iteration: 44808
loss: 1.0544075965881348,grad_norm: 0.9999992814432473, iteration: 44809
loss: 1.0659418106079102,grad_norm: 0.9999993920082676, iteration: 44810
loss: 1.0216747522354126,grad_norm: 0.999999673189504, iteration: 44811
loss: 1.1423252820968628,grad_norm: 0.9999992541839754, iteration: 44812
loss: 1.0780969858169556,grad_norm: 0.9737521729414954, iteration: 44813
loss: 1.0187644958496094,grad_norm: 0.9999991014753722, iteration: 44814
loss: 1.022944450378418,grad_norm: 0.934038450403596, iteration: 44815
loss: 1.0940287113189697,grad_norm: 0.9999995381645391, iteration: 44816
loss: 1.0341256856918335,grad_norm: 0.9999995752829388, iteration: 44817
loss: 1.0234633684158325,grad_norm: 0.9999994378764571, iteration: 44818
loss: 1.0111721754074097,grad_norm: 0.9999993317731586, iteration: 44819
loss: 1.0173205137252808,grad_norm: 0.9999993296395598, iteration: 44820
loss: 0.9686933159828186,grad_norm: 0.9999991180509481, iteration: 44821
loss: 1.0449942350387573,grad_norm: 0.9999997223399116, iteration: 44822
loss: 1.0084803104400635,grad_norm: 0.9999993180212053, iteration: 44823
loss: 1.0005005598068237,grad_norm: 0.8872650406308648, iteration: 44824
loss: 1.047167181968689,grad_norm: 0.9999990577379007, iteration: 44825
loss: 1.0219600200653076,grad_norm: 0.9436203223004312, iteration: 44826
loss: 0.9678182601928711,grad_norm: 0.9999991478375732, iteration: 44827
loss: 1.0299803018569946,grad_norm: 0.9999999151840634, iteration: 44828
loss: 0.9988281726837158,grad_norm: 0.9990741668102053, iteration: 44829
loss: 0.9888662099838257,grad_norm: 0.9849725093668781, iteration: 44830
loss: 0.9967519044876099,grad_norm: 0.9999990262195292, iteration: 44831
loss: 1.007482886314392,grad_norm: 0.9719201504865301, iteration: 44832
loss: 1.0184918642044067,grad_norm: 0.9452225017191432, iteration: 44833
loss: 1.046733021736145,grad_norm: 0.9999995281915033, iteration: 44834
loss: 1.0189989805221558,grad_norm: 0.8642921589602378, iteration: 44835
loss: 1.07857346534729,grad_norm: 0.9999991499490718, iteration: 44836
loss: 1.03099524974823,grad_norm: 0.9999992313454924, iteration: 44837
loss: 1.0110883712768555,grad_norm: 0.9999990411737519, iteration: 44838
loss: 1.0285588502883911,grad_norm: 0.9999993874955917, iteration: 44839
loss: 1.0308727025985718,grad_norm: 0.9999993265217632, iteration: 44840
loss: 1.0506489276885986,grad_norm: 0.9999993999085757, iteration: 44841
loss: 0.9659867882728577,grad_norm: 0.9999991705478791, iteration: 44842
loss: 1.0099431276321411,grad_norm: 0.793489712943084, iteration: 44843
loss: 1.0323938131332397,grad_norm: 0.8111423553485615, iteration: 44844
loss: 1.0161398649215698,grad_norm: 0.999999170147291, iteration: 44845
loss: 1.0181186199188232,grad_norm: 0.9999991569463582, iteration: 44846
loss: 1.0068012475967407,grad_norm: 0.8387379747329722, iteration: 44847
loss: 0.97589111328125,grad_norm: 0.9999996605579824, iteration: 44848
loss: 1.0746376514434814,grad_norm: 0.9999997437550782, iteration: 44849
loss: 1.037400722503662,grad_norm: 0.9999992625419727, iteration: 44850
loss: 1.014059066772461,grad_norm: 0.9999990007325836, iteration: 44851
loss: 0.9672108292579651,grad_norm: 0.8598524499396806, iteration: 44852
loss: 1.0060040950775146,grad_norm: 0.9999990091714093, iteration: 44853
loss: 1.0068062543869019,grad_norm: 0.9999991409233024, iteration: 44854
loss: 0.9919093251228333,grad_norm: 0.9999990381356517, iteration: 44855
loss: 1.01664400100708,grad_norm: 0.9274700333576076, iteration: 44856
loss: 1.0274969339370728,grad_norm: 0.999999562525936, iteration: 44857
loss: 0.9604380130767822,grad_norm: 0.9999991222559506, iteration: 44858
loss: 0.9988691210746765,grad_norm: 0.9243526121503012, iteration: 44859
loss: 1.007890224456787,grad_norm: 0.9999996427923791, iteration: 44860
loss: 0.9829626679420471,grad_norm: 0.9999991137317843, iteration: 44861
loss: 1.0012398958206177,grad_norm: 0.9999990805365775, iteration: 44862
loss: 1.0268350839614868,grad_norm: 0.9032692667855564, iteration: 44863
loss: 1.0253989696502686,grad_norm: 0.9999990330499421, iteration: 44864
loss: 1.0355952978134155,grad_norm: 0.9999997326049065, iteration: 44865
loss: 1.0492637157440186,grad_norm: 0.793194105945689, iteration: 44866
loss: 1.005704402923584,grad_norm: 0.9999989563869924, iteration: 44867
loss: 1.007637619972229,grad_norm: 0.9999992666190506, iteration: 44868
loss: 1.0363643169403076,grad_norm: 0.9735985632875844, iteration: 44869
loss: 0.9934248924255371,grad_norm: 0.9648138890557227, iteration: 44870
loss: 1.0134257078170776,grad_norm: 0.9999991973666263, iteration: 44871
loss: 1.0100607872009277,grad_norm: 0.9217764191607335, iteration: 44872
loss: 1.0098152160644531,grad_norm: 0.9999990100381438, iteration: 44873
loss: 1.1602416038513184,grad_norm: 0.9999996910858298, iteration: 44874
loss: 1.0423296689987183,grad_norm: 0.9999999575444501, iteration: 44875
loss: 0.9796197414398193,grad_norm: 0.8133066928498603, iteration: 44876
loss: 1.072456955909729,grad_norm: 0.9999999046995174, iteration: 44877
loss: 1.0165934562683105,grad_norm: 0.9594662842894055, iteration: 44878
loss: 0.9991472959518433,grad_norm: 0.9996019934645521, iteration: 44879
loss: 1.0645315647125244,grad_norm: 0.9999997096978858, iteration: 44880
loss: 0.9976758360862732,grad_norm: 0.9891795330686503, iteration: 44881
loss: 1.014790415763855,grad_norm: 0.9149623863121172, iteration: 44882
loss: 1.0595142841339111,grad_norm: 0.9999994272776783, iteration: 44883
loss: 1.0158262252807617,grad_norm: 0.99999942182895, iteration: 44884
loss: 1.0184625387191772,grad_norm: 0.9999990172166788, iteration: 44885
loss: 1.0569536685943604,grad_norm: 0.9999996837367869, iteration: 44886
loss: 0.9847332239151001,grad_norm: 0.9999998216760476, iteration: 44887
loss: 0.9907613396644592,grad_norm: 0.8360985685056171, iteration: 44888
loss: 0.9810884594917297,grad_norm: 0.9718797891266829, iteration: 44889
loss: 0.9924584627151489,grad_norm: 0.9999990968579808, iteration: 44890
loss: 1.00576913356781,grad_norm: 0.9790874556017514, iteration: 44891
loss: 0.9975787401199341,grad_norm: 0.9761226688941461, iteration: 44892
loss: 0.9921218752861023,grad_norm: 0.9999991605936469, iteration: 44893
loss: 1.001235842704773,grad_norm: 0.8613353525273996, iteration: 44894
loss: 0.9838396906852722,grad_norm: 0.8707338053464873, iteration: 44895
loss: 1.0000135898590088,grad_norm: 0.9999993324958051, iteration: 44896
loss: 0.9989342093467712,grad_norm: 0.937144403505343, iteration: 44897
loss: 0.9850337505340576,grad_norm: 0.903911005324579, iteration: 44898
loss: 1.039717197418213,grad_norm: 0.9999992452974821, iteration: 44899
loss: 1.0206222534179688,grad_norm: 0.9078097661945069, iteration: 44900
loss: 0.9955727458000183,grad_norm: 0.8619166032430903, iteration: 44901
loss: 0.9792510271072388,grad_norm: 0.8971841203638018, iteration: 44902
loss: 1.0208477973937988,grad_norm: 0.9010417350009224, iteration: 44903
loss: 0.9724598526954651,grad_norm: 0.7199071219556942, iteration: 44904
loss: 1.0125399827957153,grad_norm: 0.999999870633914, iteration: 44905
loss: 0.9707780480384827,grad_norm: 0.9999996632433974, iteration: 44906
loss: 0.9996258020401001,grad_norm: 0.7771027072083073, iteration: 44907
loss: 0.9695671796798706,grad_norm: 0.9535963779279293, iteration: 44908
loss: 1.0070030689239502,grad_norm: 0.9777092330104749, iteration: 44909
loss: 1.0328253507614136,grad_norm: 0.920765328821257, iteration: 44910
loss: 0.9645983576774597,grad_norm: 0.9086875706384598, iteration: 44911
loss: 1.0317447185516357,grad_norm: 0.9999989975247383, iteration: 44912
loss: 1.017594814300537,grad_norm: 0.9134315618052329, iteration: 44913
loss: 0.9701095223426819,grad_norm: 0.8772300562879382, iteration: 44914
loss: 1.008906602859497,grad_norm: 0.9999991754846317, iteration: 44915
loss: 0.974943995475769,grad_norm: 0.7663706216495044, iteration: 44916
loss: 0.998741626739502,grad_norm: 0.8905873295891404, iteration: 44917
loss: 1.0029065608978271,grad_norm: 0.9999989426916517, iteration: 44918
loss: 0.9932960867881775,grad_norm: 0.8146290912530063, iteration: 44919
loss: 1.0971187353134155,grad_norm: 0.9999998317819819, iteration: 44920
loss: 1.0238298177719116,grad_norm: 0.9999994672848244, iteration: 44921
loss: 1.004311203956604,grad_norm: 0.8849597258469635, iteration: 44922
loss: 1.0185372829437256,grad_norm: 0.9999995515138188, iteration: 44923
loss: 1.0430516004562378,grad_norm: 0.862846596705768, iteration: 44924
loss: 1.0193687677383423,grad_norm: 0.9999990846542088, iteration: 44925
loss: 0.9866137504577637,grad_norm: 0.9999990463261231, iteration: 44926
loss: 1.0435384511947632,grad_norm: 0.9999991863708715, iteration: 44927
loss: 1.0110231637954712,grad_norm: 0.949292017533082, iteration: 44928
loss: 1.0480186939239502,grad_norm: 0.9999993888811424, iteration: 44929
loss: 1.0183149576187134,grad_norm: 0.8303679209260909, iteration: 44930
loss: 0.9797424674034119,grad_norm: 0.8459075292092777, iteration: 44931
loss: 1.1128158569335938,grad_norm: 0.9999992714906127, iteration: 44932
loss: 1.023098349571228,grad_norm: 0.9418877449202577, iteration: 44933
loss: 1.0071049928665161,grad_norm: 0.9999990037784323, iteration: 44934
loss: 0.995399534702301,grad_norm: 0.9999990366126671, iteration: 44935
loss: 1.05942964553833,grad_norm: 0.999999297788466, iteration: 44936
loss: 1.0228462219238281,grad_norm: 0.9999993141763472, iteration: 44937
loss: 1.0272808074951172,grad_norm: 0.910616422999591, iteration: 44938
loss: 0.9836410880088806,grad_norm: 0.8627772345468776, iteration: 44939
loss: 0.9468314051628113,grad_norm: 0.9443745394731126, iteration: 44940
loss: 1.0515339374542236,grad_norm: 0.9351564130015837, iteration: 44941
loss: 0.9804385304450989,grad_norm: 0.9999991218291291, iteration: 44942
loss: 1.0311403274536133,grad_norm: 0.9999990611945345, iteration: 44943
loss: 0.9877128005027771,grad_norm: 0.867434755344833, iteration: 44944
loss: 1.0062215328216553,grad_norm: 0.9999991499582046, iteration: 44945
loss: 1.016204595565796,grad_norm: 0.9999995580832994, iteration: 44946
loss: 1.0424628257751465,grad_norm: 0.9973147122998209, iteration: 44947
loss: 0.9832273721694946,grad_norm: 0.999999082326857, iteration: 44948
loss: 0.9848095178604126,grad_norm: 0.9949212309167249, iteration: 44949
loss: 1.0052855014801025,grad_norm: 0.8590077048551985, iteration: 44950
loss: 1.0019196271896362,grad_norm: 0.8151517610981308, iteration: 44951
loss: 1.030483365058899,grad_norm: 0.9999993312918337, iteration: 44952
loss: 1.0536725521087646,grad_norm: 0.9999993198912804, iteration: 44953
loss: 1.0373724699020386,grad_norm: 0.9999991135954011, iteration: 44954
loss: 1.0116668939590454,grad_norm: 0.9874997616314255, iteration: 44955
loss: 0.9850261807441711,grad_norm: 0.9652875650280794, iteration: 44956
loss: 0.9828622341156006,grad_norm: 0.9999990571940439, iteration: 44957
loss: 1.0318917036056519,grad_norm: 0.9999992999011307, iteration: 44958
loss: 0.9848288297653198,grad_norm: 0.9097787884149436, iteration: 44959
loss: 1.0231918096542358,grad_norm: 0.9999996792862151, iteration: 44960
loss: 0.9630592465400696,grad_norm: 0.9999993941418133, iteration: 44961
loss: 0.9939324259757996,grad_norm: 0.9999991296061207, iteration: 44962
loss: 0.998240053653717,grad_norm: 0.9814887752342472, iteration: 44963
loss: 1.0081777572631836,grad_norm: 0.851693122193477, iteration: 44964
loss: 1.067012906074524,grad_norm: 0.9999991438899571, iteration: 44965
loss: 1.098116159439087,grad_norm: 0.9999999395104573, iteration: 44966
loss: 0.9960578680038452,grad_norm: 0.9999990221190377, iteration: 44967
loss: 1.0018967390060425,grad_norm: 0.955313806007464, iteration: 44968
loss: 1.038211703300476,grad_norm: 0.9766514921028986, iteration: 44969
loss: 0.9991599321365356,grad_norm: 0.9999990174977199, iteration: 44970
loss: 1.0585129261016846,grad_norm: 0.9999999712414669, iteration: 44971
loss: 1.0161443948745728,grad_norm: 0.9020902078671373, iteration: 44972
loss: 1.0185294151306152,grad_norm: 0.9407692432566912, iteration: 44973
loss: 1.0263655185699463,grad_norm: 0.9999991228619365, iteration: 44974
loss: 1.01333487033844,grad_norm: 0.9999990224606946, iteration: 44975
loss: 0.9568630456924438,grad_norm: 0.8767813158131109, iteration: 44976
loss: 0.9845548868179321,grad_norm: 0.9999992004067174, iteration: 44977
loss: 1.0155500173568726,grad_norm: 0.9999997991575514, iteration: 44978
loss: 1.0868242979049683,grad_norm: 0.9999995801167862, iteration: 44979
loss: 0.9830427765846252,grad_norm: 0.9999992284992766, iteration: 44980
loss: 0.9854499697685242,grad_norm: 0.9999990794547333, iteration: 44981
loss: 0.9920706748962402,grad_norm: 0.8385646591635801, iteration: 44982
loss: 1.0410714149475098,grad_norm: 0.9999993458810253, iteration: 44983
loss: 1.181633710861206,grad_norm: 0.99999907842892, iteration: 44984
loss: 1.0012348890304565,grad_norm: 0.8313534680548043, iteration: 44985
loss: 0.9880937337875366,grad_norm: 0.8648406521090112, iteration: 44986
loss: 0.972210705280304,grad_norm: 0.863552788754154, iteration: 44987
loss: 1.0136173963546753,grad_norm: 0.9999993470501767, iteration: 44988
loss: 0.9652465581893921,grad_norm: 0.9123312245573678, iteration: 44989
loss: 1.000175952911377,grad_norm: 0.9999992589163217, iteration: 44990
loss: 0.9874477982521057,grad_norm: 0.999999098926704, iteration: 44991
loss: 1.0227824449539185,grad_norm: 0.9999993035887079, iteration: 44992
loss: 1.006869912147522,grad_norm: 0.8970519151888757, iteration: 44993
loss: 1.040409803390503,grad_norm: 0.9999989780360149, iteration: 44994
loss: 1.032823085784912,grad_norm: 0.8750080958256754, iteration: 44995
loss: 0.9600381255149841,grad_norm: 0.9503236007026156, iteration: 44996
loss: 1.0471093654632568,grad_norm: 0.9999992577546312, iteration: 44997
loss: 0.9790168404579163,grad_norm: 0.9310307472642978, iteration: 44998
loss: 0.9885750412940979,grad_norm: 0.9368327116197359, iteration: 44999
loss: 1.0638651847839355,grad_norm: 0.9451120185470804, iteration: 45000
loss: 1.027349591255188,grad_norm: 0.9999994143982582, iteration: 45001
loss: 0.9873488545417786,grad_norm: 0.9999992654642539, iteration: 45002
loss: 1.026826024055481,grad_norm: 0.9999989885228545, iteration: 45003
loss: 1.0128462314605713,grad_norm: 0.9999997575653423, iteration: 45004
loss: 1.012866497039795,grad_norm: 0.9458233890264462, iteration: 45005
loss: 0.9779905080795288,grad_norm: 0.8639165000287847, iteration: 45006
loss: 1.0029767751693726,grad_norm: 0.9433840038251289, iteration: 45007
loss: 0.9898874759674072,grad_norm: 0.7971131411936795, iteration: 45008
loss: 0.9840772747993469,grad_norm: 0.7315619300616099, iteration: 45009
loss: 1.1479507684707642,grad_norm: 0.9999995600347568, iteration: 45010
loss: 0.9950699806213379,grad_norm: 0.9999995103216681, iteration: 45011
loss: 1.029355764389038,grad_norm: 0.999999781524708, iteration: 45012
loss: 1.0117456912994385,grad_norm: 0.9999994492173916, iteration: 45013
loss: 1.0350966453552246,grad_norm: 0.9999992028922488, iteration: 45014
loss: 1.0811578035354614,grad_norm: 0.9999998627910432, iteration: 45015
loss: 1.041007161140442,grad_norm: 0.9999991966686657, iteration: 45016
loss: 0.9608603119850159,grad_norm: 0.9999991952790845, iteration: 45017
loss: 1.0141421556472778,grad_norm: 0.9999993418719659, iteration: 45018
loss: 0.9984288215637207,grad_norm: 0.9999990214030791, iteration: 45019
loss: 0.9928093552589417,grad_norm: 0.7472683245911839, iteration: 45020
loss: 0.981167197227478,grad_norm: 0.8616947416752202, iteration: 45021
loss: 1.0235803127288818,grad_norm: 0.999999677026304, iteration: 45022
loss: 1.0096346139907837,grad_norm: 0.9892366858988795, iteration: 45023
loss: 1.0181993246078491,grad_norm: 0.9052518932711752, iteration: 45024
loss: 1.0284039974212646,grad_norm: 0.9999991769724561, iteration: 45025
loss: 1.0286790132522583,grad_norm: 0.9999991296464823, iteration: 45026
loss: 1.0025665760040283,grad_norm: 0.9999990614632371, iteration: 45027
loss: 1.017676830291748,grad_norm: 0.9344258480128083, iteration: 45028
loss: 1.0711742639541626,grad_norm: 0.9999995187181145, iteration: 45029
loss: 0.9763535261154175,grad_norm: 0.9999991019787293, iteration: 45030
loss: 1.0949205160140991,grad_norm: 0.99999950786313, iteration: 45031
loss: 1.0052001476287842,grad_norm: 0.9792793589398077, iteration: 45032
loss: 0.9982631206512451,grad_norm: 0.9999998232461866, iteration: 45033
loss: 0.9931111931800842,grad_norm: 0.9023750934626491, iteration: 45034
loss: 0.982151210308075,grad_norm: 0.9503833964493233, iteration: 45035
loss: 0.992340087890625,grad_norm: 0.9999989920025961, iteration: 45036
loss: 0.9967322945594788,grad_norm: 0.9120325669406458, iteration: 45037
loss: 1.0145617723464966,grad_norm: 0.9999992218644161, iteration: 45038
loss: 0.9879564046859741,grad_norm: 0.9258065470335242, iteration: 45039
loss: 0.993988037109375,grad_norm: 0.999999213609757, iteration: 45040
loss: 1.0114859342575073,grad_norm: 0.9999991298681874, iteration: 45041
loss: 1.0726194381713867,grad_norm: 0.9999994299097545, iteration: 45042
loss: 1.010449767112732,grad_norm: 0.9999989910310538, iteration: 45043
loss: 1.0150048732757568,grad_norm: 0.9999991708707082, iteration: 45044
loss: 0.9870796799659729,grad_norm: 0.8853320187899235, iteration: 45045
loss: 1.0931000709533691,grad_norm: 0.9999993406285128, iteration: 45046
loss: 1.0042071342468262,grad_norm: 0.9999993322197168, iteration: 45047
loss: 1.0612263679504395,grad_norm: 0.9999992982982384, iteration: 45048
loss: 1.0113275051116943,grad_norm: 0.9972356102147287, iteration: 45049
loss: 1.1038919687271118,grad_norm: 0.9999998376825181, iteration: 45050
loss: 0.9902957677841187,grad_norm: 0.9869603247777123, iteration: 45051
loss: 1.0197677612304688,grad_norm: 0.9999990404530146, iteration: 45052
loss: 1.0066951513290405,grad_norm: 0.9999990435493963, iteration: 45053
loss: 0.9735820293426514,grad_norm: 0.8828505347097074, iteration: 45054
loss: 1.0694698095321655,grad_norm: 0.9999992846851287, iteration: 45055
loss: 1.0692768096923828,grad_norm: 0.9999995084055082, iteration: 45056
loss: 1.031362533569336,grad_norm: 0.99999907330921, iteration: 45057
loss: 0.9857704043388367,grad_norm: 0.9999990802325827, iteration: 45058
loss: 0.9752092957496643,grad_norm: 0.9884151763472443, iteration: 45059
loss: 0.9981773495674133,grad_norm: 0.9999991096784664, iteration: 45060
loss: 1.0127979516983032,grad_norm: 0.9999990528067048, iteration: 45061
loss: 1.0580394268035889,grad_norm: 0.9999991290430061, iteration: 45062
loss: 1.0824341773986816,grad_norm: 0.9999998074579187, iteration: 45063
loss: 1.0093846321105957,grad_norm: 0.9999992897858618, iteration: 45064
loss: 0.9977535009384155,grad_norm: 0.8851336755096918, iteration: 45065
loss: 1.0327391624450684,grad_norm: 0.8856561394486366, iteration: 45066
loss: 0.9943435192108154,grad_norm: 0.9017232466796181, iteration: 45067
loss: 1.0272893905639648,grad_norm: 0.9999992015357991, iteration: 45068
loss: 1.006360650062561,grad_norm: 0.9999991283578074, iteration: 45069
loss: 1.003903865814209,grad_norm: 0.8562989268095665, iteration: 45070
loss: 1.04463529586792,grad_norm: 0.9999993807230213, iteration: 45071
loss: 1.016827940940857,grad_norm: 0.9641567520152566, iteration: 45072
loss: 0.9958457946777344,grad_norm: 0.9999992546985846, iteration: 45073
loss: 0.98758465051651,grad_norm: 0.9999990133677649, iteration: 45074
loss: 1.0292848348617554,grad_norm: 0.8590451541920344, iteration: 45075
loss: 1.0270472764968872,grad_norm: 0.9999990140829504, iteration: 45076
loss: 0.9923555254936218,grad_norm: 0.8154525446999868, iteration: 45077
loss: 1.0036050081253052,grad_norm: 0.9999997023124914, iteration: 45078
loss: 0.989152193069458,grad_norm: 0.8705634591342244, iteration: 45079
loss: 1.009706735610962,grad_norm: 0.8917501605129488, iteration: 45080
loss: 0.9968288540840149,grad_norm: 0.9999990845609075, iteration: 45081
loss: 0.9919355511665344,grad_norm: 0.8754024649358868, iteration: 45082
loss: 1.0604302883148193,grad_norm: 0.999999429988707, iteration: 45083
loss: 1.006942868232727,grad_norm: 0.8016360901836177, iteration: 45084
loss: 1.0529180765151978,grad_norm: 0.9999999048032443, iteration: 45085
loss: 1.0044444799423218,grad_norm: 0.915000229872488, iteration: 45086
loss: 1.0600874423980713,grad_norm: 0.9999991066961349, iteration: 45087
loss: 1.0003231763839722,grad_norm: 0.9999995262765727, iteration: 45088
loss: 1.0334539413452148,grad_norm: 0.999999463404458, iteration: 45089
loss: 1.0058127641677856,grad_norm: 0.9179176113600938, iteration: 45090
loss: 0.9880832433700562,grad_norm: 0.9568253054809273, iteration: 45091
loss: 1.0152353048324585,grad_norm: 0.9254001422498492, iteration: 45092
loss: 1.061669111251831,grad_norm: 0.9999998675898457, iteration: 45093
loss: 1.0134594440460205,grad_norm: 0.966900881443402, iteration: 45094
loss: 0.9561082720756531,grad_norm: 0.8453587676793476, iteration: 45095
loss: 1.0325021743774414,grad_norm: 0.9999994676841751, iteration: 45096
loss: 1.0196144580841064,grad_norm: 0.9999990924566011, iteration: 45097
loss: 1.0403945446014404,grad_norm: 0.9999989719762798, iteration: 45098
loss: 0.9900236129760742,grad_norm: 0.9999991347898013, iteration: 45099
loss: 0.9914190173149109,grad_norm: 0.976717725746033, iteration: 45100
loss: 0.986492395401001,grad_norm: 0.9550360377573266, iteration: 45101
loss: 1.0035985708236694,grad_norm: 0.7691426237086091, iteration: 45102
loss: 1.0407642126083374,grad_norm: 0.8936583616958688, iteration: 45103
loss: 1.0161043405532837,grad_norm: 0.9999992654180225, iteration: 45104
loss: 1.070719838142395,grad_norm: 0.9999994293022249, iteration: 45105
loss: 1.0219377279281616,grad_norm: 0.9999990535292078, iteration: 45106
loss: 1.0395406484603882,grad_norm: 0.9999995917769331, iteration: 45107
loss: 1.004981279373169,grad_norm: 0.9999990403816675, iteration: 45108
loss: 1.0675023794174194,grad_norm: 0.9999991721988071, iteration: 45109
loss: 0.9789660573005676,grad_norm: 0.9770597879525817, iteration: 45110
loss: 1.0269182920455933,grad_norm: 0.9999993738971432, iteration: 45111
loss: 0.9964409470558167,grad_norm: 0.9999991520303676, iteration: 45112
loss: 1.0232809782028198,grad_norm: 0.8430650853733505, iteration: 45113
loss: 1.007662296295166,grad_norm: 0.9999989610331242, iteration: 45114
loss: 1.0314671993255615,grad_norm: 0.9434600343625028, iteration: 45115
loss: 0.9863079786300659,grad_norm: 0.9417253047079771, iteration: 45116
loss: 0.986993670463562,grad_norm: 0.8299566845607513, iteration: 45117
loss: 1.0881102085113525,grad_norm: 0.9999990779222699, iteration: 45118
loss: 1.0189558267593384,grad_norm: 0.9999992734242682, iteration: 45119
loss: 1.0033842325210571,grad_norm: 0.9999991843247606, iteration: 45120
loss: 1.004559874534607,grad_norm: 0.9976553710448528, iteration: 45121
loss: 1.029018759727478,grad_norm: 0.8039159516828847, iteration: 45122
loss: 1.0021206140518188,grad_norm: 0.9999996037631951, iteration: 45123
loss: 1.0307729244232178,grad_norm: 0.9999991195178786, iteration: 45124
loss: 1.0382089614868164,grad_norm: 0.9999991342153524, iteration: 45125
loss: 0.9777679443359375,grad_norm: 0.9999989997396249, iteration: 45126
loss: 1.0160752534866333,grad_norm: 0.9880791623828307, iteration: 45127
loss: 0.993843674659729,grad_norm: 0.7923887533448526, iteration: 45128
loss: 0.9844741225242615,grad_norm: 0.9999993623108713, iteration: 45129
loss: 1.0016332864761353,grad_norm: 0.93365366263613, iteration: 45130
loss: 1.0651012659072876,grad_norm: 0.999999206570147, iteration: 45131
loss: 1.0464824438095093,grad_norm: 0.8983670097056491, iteration: 45132
loss: 1.0118420124053955,grad_norm: 0.903371551600558, iteration: 45133
loss: 1.0054361820220947,grad_norm: 0.9573418420089703, iteration: 45134
loss: 1.0240017175674438,grad_norm: 0.9910124693570482, iteration: 45135
loss: 1.022312879562378,grad_norm: 0.9683058755809507, iteration: 45136
loss: 1.028422236442566,grad_norm: 0.9037034743263925, iteration: 45137
loss: 1.0044536590576172,grad_norm: 0.920166267805689, iteration: 45138
loss: 0.9653730392456055,grad_norm: 0.8559729892415842, iteration: 45139
loss: 1.0189292430877686,grad_norm: 0.9999991709371828, iteration: 45140
loss: 0.9912053942680359,grad_norm: 0.8069821849714629, iteration: 45141
loss: 0.9960329532623291,grad_norm: 0.9999991879632771, iteration: 45142
loss: 1.023636817932129,grad_norm: 0.9723402319699663, iteration: 45143
loss: 1.0165767669677734,grad_norm: 0.9658944987725064, iteration: 45144
loss: 1.0250707864761353,grad_norm: 0.9999992549647733, iteration: 45145
loss: 1.030680537223816,grad_norm: 0.9999990676793066, iteration: 45146
loss: 1.0213795900344849,grad_norm: 0.862787816002973, iteration: 45147
loss: 0.9685242176055908,grad_norm: 0.9193759343178282, iteration: 45148
loss: 1.0011683702468872,grad_norm: 0.9999992597804881, iteration: 45149
loss: 0.9789933562278748,grad_norm: 0.999998981024016, iteration: 45150
loss: 1.0187650918960571,grad_norm: 0.9999991421457669, iteration: 45151
loss: 0.9903016090393066,grad_norm: 0.9481467926091905, iteration: 45152
loss: 1.0000723600387573,grad_norm: 0.9391394303504255, iteration: 45153
loss: 0.975753128528595,grad_norm: 0.9315836653147721, iteration: 45154
loss: 1.0187941789627075,grad_norm: 0.9999997115285129, iteration: 45155
loss: 0.9617189168930054,grad_norm: 0.9999991032410271, iteration: 45156
loss: 0.9816036224365234,grad_norm: 0.999999073786277, iteration: 45157
loss: 0.9761216044425964,grad_norm: 0.8128119508927487, iteration: 45158
loss: 1.0500361919403076,grad_norm: 0.999999746078746, iteration: 45159
loss: 0.9926201701164246,grad_norm: 0.9652698380791107, iteration: 45160
loss: 1.0543590784072876,grad_norm: 0.9999995363248042, iteration: 45161
loss: 1.0346211194992065,grad_norm: 0.9999990014081994, iteration: 45162
loss: 1.0060887336730957,grad_norm: 0.9654664425041787, iteration: 45163
loss: 0.9970479011535645,grad_norm: 0.9999991635424583, iteration: 45164
loss: 1.010352611541748,grad_norm: 0.999999169147788, iteration: 45165
loss: 0.9899775385856628,grad_norm: 0.8266586961366417, iteration: 45166
loss: 1.0244868993759155,grad_norm: 0.9999991798873957, iteration: 45167
loss: 1.0304818153381348,grad_norm: 0.8628040279076138, iteration: 45168
loss: 0.9896740317344666,grad_norm: 0.7639597754617102, iteration: 45169
loss: 0.9771923422813416,grad_norm: 0.9197241967800431, iteration: 45170
loss: 0.9708215594291687,grad_norm: 0.9999991852884593, iteration: 45171
loss: 1.0200469493865967,grad_norm: 0.9999994177541132, iteration: 45172
loss: 1.0074093341827393,grad_norm: 0.9999991908866477, iteration: 45173
loss: 1.0142141580581665,grad_norm: 0.8930211993380909, iteration: 45174
loss: 1.0285522937774658,grad_norm: 0.9112681458284616, iteration: 45175
loss: 1.0048816204071045,grad_norm: 0.9999995025612785, iteration: 45176
loss: 0.938385546207428,grad_norm: 0.9898562115983041, iteration: 45177
loss: 1.0204367637634277,grad_norm: 0.999999015419293, iteration: 45178
loss: 1.0120655298233032,grad_norm: 0.999999450522746, iteration: 45179
loss: 1.0469684600830078,grad_norm: 0.9912488750125066, iteration: 45180
loss: 0.9733726978302002,grad_norm: 0.7783488000455385, iteration: 45181
loss: 1.048708200454712,grad_norm: 0.9999991551841294, iteration: 45182
loss: 0.9956005811691284,grad_norm: 0.9706395468548744, iteration: 45183
loss: 1.035556435585022,grad_norm: 0.9999996499112543, iteration: 45184
loss: 0.994064450263977,grad_norm: 0.9999992107862183, iteration: 45185
loss: 0.9927486181259155,grad_norm: 0.9537261580676809, iteration: 45186
loss: 0.9629042148590088,grad_norm: 0.8965632571914082, iteration: 45187
loss: 0.992447555065155,grad_norm: 0.8042815877019334, iteration: 45188
loss: 1.0356981754302979,grad_norm: 0.9999991560409068, iteration: 45189
loss: 0.9818330407142639,grad_norm: 0.9367092052742905, iteration: 45190
loss: 0.9919593930244446,grad_norm: 0.9285337637362466, iteration: 45191
loss: 1.015315294265747,grad_norm: 0.7620936487622646, iteration: 45192
loss: 1.032441258430481,grad_norm: 0.9999991337835106, iteration: 45193
loss: 1.0029712915420532,grad_norm: 0.8970446161561489, iteration: 45194
loss: 1.0809828042984009,grad_norm: 0.9999998194999884, iteration: 45195
loss: 0.9951489567756653,grad_norm: 0.9999992052082547, iteration: 45196
loss: 1.077823281288147,grad_norm: 0.9999994803905385, iteration: 45197
loss: 0.9941127300262451,grad_norm: 0.9205264532734526, iteration: 45198
loss: 0.97414231300354,grad_norm: 0.9999990378056489, iteration: 45199
loss: 1.0425703525543213,grad_norm: 0.8733835487421008, iteration: 45200
loss: 0.966575026512146,grad_norm: 0.9999989782014028, iteration: 45201
loss: 1.02249276638031,grad_norm: 0.9999992239008203, iteration: 45202
loss: 0.9661693572998047,grad_norm: 0.9047251907232818, iteration: 45203
loss: 0.9940522909164429,grad_norm: 0.9999991842922012, iteration: 45204
loss: 0.9937524199485779,grad_norm: 0.9670712858232079, iteration: 45205
loss: 1.0137840509414673,grad_norm: 0.9999994750765585, iteration: 45206
loss: 0.9640287160873413,grad_norm: 0.8987064795684472, iteration: 45207
loss: 0.9605023860931396,grad_norm: 0.9999991616704564, iteration: 45208
loss: 1.0375255346298218,grad_norm: 0.7862971085560752, iteration: 45209
loss: 1.0070950984954834,grad_norm: 0.9999997456898347, iteration: 45210
loss: 1.0962213277816772,grad_norm: 0.9999997345644064, iteration: 45211
loss: 1.2679164409637451,grad_norm: 0.9999998567421359, iteration: 45212
loss: 1.0597120523452759,grad_norm: 0.999999789373784, iteration: 45213
loss: 1.004035472869873,grad_norm: 0.9313415355905614, iteration: 45214
loss: 0.9711699485778809,grad_norm: 0.9435964447600824, iteration: 45215
loss: 1.0244276523590088,grad_norm: 0.9999992538129434, iteration: 45216
loss: 1.029355764389038,grad_norm: 0.9999992435718307, iteration: 45217
loss: 1.017502784729004,grad_norm: 0.9999991112673061, iteration: 45218
loss: 1.003653645515442,grad_norm: 0.9999997069066481, iteration: 45219
loss: 1.0636484622955322,grad_norm: 0.8604577256671345, iteration: 45220
loss: 1.0098824501037598,grad_norm: 0.8843014171126056, iteration: 45221
loss: 1.0499811172485352,grad_norm: 0.9999990515115927, iteration: 45222
loss: 1.0982738733291626,grad_norm: 0.9999996285853561, iteration: 45223
loss: 1.0355465412139893,grad_norm: 0.9999994182698487, iteration: 45224
loss: 0.988065779209137,grad_norm: 0.9999992959942454, iteration: 45225
loss: 0.9905189871788025,grad_norm: 0.9999990321725902, iteration: 45226
loss: 1.0143213272094727,grad_norm: 0.9999990260544828, iteration: 45227
loss: 0.9923408031463623,grad_norm: 0.9027202629344324, iteration: 45228
loss: 1.0204896926879883,grad_norm: 0.999999414125442, iteration: 45229
loss: 0.9725182056427002,grad_norm: 0.8972232318149758, iteration: 45230
loss: 0.9343095421791077,grad_norm: 0.9999990049859842, iteration: 45231
loss: 0.9648804664611816,grad_norm: 0.9999991188855503, iteration: 45232
loss: 1.0353633165359497,grad_norm: 0.9999994402861729, iteration: 45233
loss: 1.0098049640655518,grad_norm: 0.9602142233302284, iteration: 45234
loss: 1.0364724397659302,grad_norm: 0.9999991483253999, iteration: 45235
loss: 1.0040194988250732,grad_norm: 0.8233148152551353, iteration: 45236
loss: 1.0203009843826294,grad_norm: 0.9618241828488767, iteration: 45237
loss: 1.0141371488571167,grad_norm: 0.9999991039593946, iteration: 45238
loss: 1.0090394020080566,grad_norm: 0.9999997789612741, iteration: 45239
loss: 0.9836379885673523,grad_norm: 0.8809697491633052, iteration: 45240
loss: 1.022645354270935,grad_norm: 0.9999998305138197, iteration: 45241
loss: 1.0111355781555176,grad_norm: 0.907861148090608, iteration: 45242
loss: 0.9955053925514221,grad_norm: 0.9991137267464132, iteration: 45243
loss: 0.9706935882568359,grad_norm: 0.9999991252760118, iteration: 45244
loss: 1.0253732204437256,grad_norm: 0.9131676603462808, iteration: 45245
loss: 0.9839664101600647,grad_norm: 0.9822061692024961, iteration: 45246
loss: 0.963932991027832,grad_norm: 0.9618005357348774, iteration: 45247
loss: 1.0405248403549194,grad_norm: 0.7596014614884288, iteration: 45248
loss: 1.0547481775283813,grad_norm: 0.9999994366599445, iteration: 45249
loss: 1.0756640434265137,grad_norm: 0.9999992318069897, iteration: 45250
loss: 1.002626657485962,grad_norm: 0.9826009375065876, iteration: 45251
loss: 0.9968571066856384,grad_norm: 0.8679185931598578, iteration: 45252
loss: 1.1190487146377563,grad_norm: 0.9999996917373163, iteration: 45253
loss: 1.0078977346420288,grad_norm: 0.8305570467948078, iteration: 45254
loss: 1.0242761373519897,grad_norm: 0.8504681087476547, iteration: 45255
loss: 1.0105705261230469,grad_norm: 0.9970802740885978, iteration: 45256
loss: 0.9741479754447937,grad_norm: 0.9999992080699042, iteration: 45257
loss: 0.9784582853317261,grad_norm: 0.9971873347695325, iteration: 45258
loss: 1.0284096002578735,grad_norm: 0.9999998793220998, iteration: 45259
loss: 1.0047324895858765,grad_norm: 0.8897700017251065, iteration: 45260
loss: 0.98150235414505,grad_norm: 0.8164765895172804, iteration: 45261
loss: 0.969203531742096,grad_norm: 0.8613608416014095, iteration: 45262
loss: 1.0062575340270996,grad_norm: 0.9950818863851411, iteration: 45263
loss: 1.0958082675933838,grad_norm: 0.9999992351244379, iteration: 45264
loss: 1.0151270627975464,grad_norm: 0.9439226589165951, iteration: 45265
loss: 0.9788379073143005,grad_norm: 0.9916482683226499, iteration: 45266
loss: 1.0373563766479492,grad_norm: 0.9999990530442442, iteration: 45267
loss: 1.0223054885864258,grad_norm: 0.9999993661734622, iteration: 45268
loss: 1.0043030977249146,grad_norm: 0.9999990631229126, iteration: 45269
loss: 1.0082467794418335,grad_norm: 0.8098176517571452, iteration: 45270
loss: 1.0611709356307983,grad_norm: 0.9212364633005721, iteration: 45271
loss: 1.010605812072754,grad_norm: 0.9999997274854486, iteration: 45272
loss: 0.9657250642776489,grad_norm: 0.9690724373175285, iteration: 45273
loss: 1.0207902193069458,grad_norm: 0.9999992064836889, iteration: 45274
loss: 1.016133189201355,grad_norm: 0.9999989795662224, iteration: 45275
loss: 1.0331201553344727,grad_norm: 0.9999992129157153, iteration: 45276
loss: 1.1039767265319824,grad_norm: 0.9999990545954415, iteration: 45277
loss: 1.0275545120239258,grad_norm: 0.9999995721522018, iteration: 45278
loss: 1.0241037607192993,grad_norm: 0.9848139899587126, iteration: 45279
loss: 1.0245968103408813,grad_norm: 0.9999994250804808, iteration: 45280
loss: 1.0554004907608032,grad_norm: 0.9999994402252271, iteration: 45281
loss: 0.9921838641166687,grad_norm: 0.8643747265269334, iteration: 45282
loss: 0.9985485076904297,grad_norm: 0.9999990659304963, iteration: 45283
loss: 1.0049651861190796,grad_norm: 0.9999996024573118, iteration: 45284
loss: 0.9916624426841736,grad_norm: 0.8282828007956964, iteration: 45285
loss: 1.0033985376358032,grad_norm: 0.9999990317462746, iteration: 45286
loss: 1.0369480848312378,grad_norm: 0.91774432046724, iteration: 45287
loss: 1.0429136753082275,grad_norm: 0.9999991825361007, iteration: 45288
loss: 1.035057783126831,grad_norm: 0.7917450746202271, iteration: 45289
loss: 1.068683385848999,grad_norm: 0.9746508470970509, iteration: 45290
loss: 1.0936169624328613,grad_norm: 0.8127728825508285, iteration: 45291
loss: 1.0542105436325073,grad_norm: 0.9999992524640583, iteration: 45292
loss: 1.030455470085144,grad_norm: 0.8955070399953433, iteration: 45293
loss: 1.0324679613113403,grad_norm: 0.9873549438499412, iteration: 45294
loss: 1.0638203620910645,grad_norm: 0.999999124087947, iteration: 45295
loss: 0.9867051839828491,grad_norm: 0.9999998412563085, iteration: 45296
loss: 1.0457749366760254,grad_norm: 0.9999990526398554, iteration: 45297
loss: 0.9867681264877319,grad_norm: 0.9961777578062566, iteration: 45298
loss: 1.072037696838379,grad_norm: 0.999999653471185, iteration: 45299
loss: 1.0327365398406982,grad_norm: 0.9903086378460731, iteration: 45300
loss: 1.0806394815444946,grad_norm: 0.897104305629367, iteration: 45301
loss: 1.0133994817733765,grad_norm: 0.9999996973858915, iteration: 45302
loss: 1.0415449142456055,grad_norm: 0.959172196707595, iteration: 45303
loss: 1.0137075185775757,grad_norm: 0.8338618911825544, iteration: 45304
loss: 1.098441481590271,grad_norm: 0.8920144773899289, iteration: 45305
loss: 0.9830158948898315,grad_norm: 0.9615322157416657, iteration: 45306
loss: 1.0121454000473022,grad_norm: 0.974410373591805, iteration: 45307
loss: 1.0144344568252563,grad_norm: 0.9942739320461026, iteration: 45308
loss: 1.010648250579834,grad_norm: 0.9452632679295336, iteration: 45309
loss: 1.017993450164795,grad_norm: 0.8739469046102203, iteration: 45310
loss: 1.0453320741653442,grad_norm: 0.9487163857418642, iteration: 45311
loss: 1.0004321336746216,grad_norm: 0.9999991252238116, iteration: 45312
loss: 1.0157212018966675,grad_norm: 0.9479634343847926, iteration: 45313
loss: 1.0102916955947876,grad_norm: 0.9999989687818087, iteration: 45314
loss: 1.0072678327560425,grad_norm: 0.8661299366695846, iteration: 45315
loss: 1.0350193977355957,grad_norm: 0.9999996242967484, iteration: 45316
loss: 1.0466598272323608,grad_norm: 0.9495437198173297, iteration: 45317
loss: 0.9755697846412659,grad_norm: 0.9999990919359321, iteration: 45318
loss: 1.0762276649475098,grad_norm: 0.9205521982448472, iteration: 45319
loss: 0.975040853023529,grad_norm: 0.790620651562684, iteration: 45320
loss: 0.9791800379753113,grad_norm: 0.8299942560589264, iteration: 45321
loss: 0.9647000432014465,grad_norm: 0.9999990494319786, iteration: 45322
loss: 1.0540637969970703,grad_norm: 0.9999995270559909, iteration: 45323
loss: 1.0043948888778687,grad_norm: 0.999999132827416, iteration: 45324
loss: 1.0790603160858154,grad_norm: 0.9999995252344612, iteration: 45325
loss: 1.0335583686828613,grad_norm: 0.978921748559395, iteration: 45326
loss: 0.9860923886299133,grad_norm: 0.9999992823686382, iteration: 45327
loss: 1.0534988641738892,grad_norm: 0.9999990828761052, iteration: 45328
loss: 1.034469723701477,grad_norm: 0.9999997924814504, iteration: 45329
loss: 1.021263599395752,grad_norm: 0.9327421709388025, iteration: 45330
loss: 0.9880026578903198,grad_norm: 0.9205400860501586, iteration: 45331
loss: 1.0362893342971802,grad_norm: 0.9999991363154644, iteration: 45332
loss: 0.9746298789978027,grad_norm: 0.8147705840015634, iteration: 45333
loss: 1.0244472026824951,grad_norm: 0.9262581236920898, iteration: 45334
loss: 0.9825757741928101,grad_norm: 0.8065548638438289, iteration: 45335
loss: 1.029103398323059,grad_norm: 0.9999990592198528, iteration: 45336
loss: 0.9914716482162476,grad_norm: 0.9376956226862289, iteration: 45337
loss: 1.0086525678634644,grad_norm: 0.9999990163356295, iteration: 45338
loss: 0.9602141976356506,grad_norm: 0.939012607284132, iteration: 45339
loss: 0.9758845567703247,grad_norm: 0.9765247722205302, iteration: 45340
loss: 0.99737548828125,grad_norm: 0.8867062027643583, iteration: 45341
loss: 0.9948256015777588,grad_norm: 0.9999990501814119, iteration: 45342
loss: 1.0566537380218506,grad_norm: 0.9999993809619898, iteration: 45343
loss: 0.9840556979179382,grad_norm: 0.9999990936199874, iteration: 45344
loss: 1.080548644065857,grad_norm: 0.9999995615156496, iteration: 45345
loss: 1.0912784337997437,grad_norm: 0.9884353837559504, iteration: 45346
loss: 0.9630438089370728,grad_norm: 0.9999989254606197, iteration: 45347
loss: 0.9664976596832275,grad_norm: 0.8950315496527719, iteration: 45348
loss: 0.9753544330596924,grad_norm: 0.7550471552443165, iteration: 45349
loss: 1.025465726852417,grad_norm: 0.9999990611440074, iteration: 45350
loss: 0.9924543499946594,grad_norm: 0.9999989879034017, iteration: 45351
loss: 0.9793157577514648,grad_norm: 0.8695861946066769, iteration: 45352
loss: 1.0453673601150513,grad_norm: 0.999999318526222, iteration: 45353
loss: 1.0849072933197021,grad_norm: 0.9999995568804946, iteration: 45354
loss: 1.044832706451416,grad_norm: 0.9999992016286887, iteration: 45355
loss: 0.9950065016746521,grad_norm: 0.9999992034821232, iteration: 45356
loss: 1.0266982316970825,grad_norm: 0.9999990018026638, iteration: 45357
loss: 1.0314955711364746,grad_norm: 0.9170743916061288, iteration: 45358
loss: 0.9850346446037292,grad_norm: 0.9999991755026254, iteration: 45359
loss: 1.0135669708251953,grad_norm: 0.8720168819353116, iteration: 45360
loss: 1.0108072757720947,grad_norm: 0.9636117949210138, iteration: 45361
loss: 0.9902380704879761,grad_norm: 0.840539485665554, iteration: 45362
loss: 0.9951680302619934,grad_norm: 0.9999991112302821, iteration: 45363
loss: 1.068761944770813,grad_norm: 0.9999991194281552, iteration: 45364
loss: 1.0252426862716675,grad_norm: 0.9791658630076391, iteration: 45365
loss: 0.9852094054222107,grad_norm: 0.9999990832090069, iteration: 45366
loss: 0.9933834075927734,grad_norm: 0.9999991922001492, iteration: 45367
loss: 0.989436686038971,grad_norm: 0.938005176516088, iteration: 45368
loss: 0.9979541897773743,grad_norm: 0.9022018547844213, iteration: 45369
loss: 1.014585256576538,grad_norm: 0.9943665589207525, iteration: 45370
loss: 1.0015572309494019,grad_norm: 0.9999991687637407, iteration: 45371
loss: 1.0387922525405884,grad_norm: 0.9999991343807286, iteration: 45372
loss: 1.0156885385513306,grad_norm: 0.7178505586352827, iteration: 45373
loss: 1.0074117183685303,grad_norm: 0.9999996613386247, iteration: 45374
loss: 1.048761248588562,grad_norm: 0.9887590210989783, iteration: 45375
loss: 0.9926769137382507,grad_norm: 0.9999992801913012, iteration: 45376
loss: 1.0188716650009155,grad_norm: 0.9812421925433384, iteration: 45377
loss: 0.9443410634994507,grad_norm: 0.9999991448366163, iteration: 45378
loss: 0.9928795099258423,grad_norm: 0.8273894735906077, iteration: 45379
loss: 0.9827821850776672,grad_norm: 0.9999991817290287, iteration: 45380
loss: 1.0262277126312256,grad_norm: 0.9070096543180556, iteration: 45381
loss: 0.9734932780265808,grad_norm: 0.9121229375018874, iteration: 45382
loss: 1.0072615146636963,grad_norm: 0.999999090266277, iteration: 45383
loss: 0.9672365188598633,grad_norm: 0.8856643639204196, iteration: 45384
loss: 0.9149194359779358,grad_norm: 0.9167025909102228, iteration: 45385
loss: 1.0266247987747192,grad_norm: 0.9999991301413252, iteration: 45386
loss: 1.0242470502853394,grad_norm: 0.8569327215013285, iteration: 45387
loss: 1.027127742767334,grad_norm: 0.8344814579028237, iteration: 45388
loss: 1.0407718420028687,grad_norm: 0.7438533639868822, iteration: 45389
loss: 0.9874384999275208,grad_norm: 0.8953401307990005, iteration: 45390
loss: 0.95013827085495,grad_norm: 0.9599904724220935, iteration: 45391
loss: 0.9770278334617615,grad_norm: 0.940691397490948, iteration: 45392
loss: 1.0021674633026123,grad_norm: 0.9999991556151054, iteration: 45393
loss: 1.043062686920166,grad_norm: 0.9999991688961116, iteration: 45394
loss: 1.0122172832489014,grad_norm: 0.7160875837821603, iteration: 45395
loss: 0.9971472024917603,grad_norm: 0.9999992450386418, iteration: 45396
loss: 0.9905105829238892,grad_norm: 0.8858770768841736, iteration: 45397
loss: 0.9835783243179321,grad_norm: 0.9424710237618683, iteration: 45398
loss: 0.9583331346511841,grad_norm: 0.8893902919551685, iteration: 45399
loss: 0.9837206602096558,grad_norm: 0.9999990385538264, iteration: 45400
loss: 0.9503801465034485,grad_norm: 0.9999992068062689, iteration: 45401
loss: 1.0006059408187866,grad_norm: 0.982852031831806, iteration: 45402
loss: 1.032235026359558,grad_norm: 0.9181842913865944, iteration: 45403
loss: 1.0016285181045532,grad_norm: 0.9591082819704425, iteration: 45404
loss: 1.0173165798187256,grad_norm: 0.949768498876932, iteration: 45405
loss: 0.9884825348854065,grad_norm: 0.8247846221224626, iteration: 45406
loss: 1.032081127166748,grad_norm: 0.9999992060256273, iteration: 45407
loss: 1.0855475664138794,grad_norm: 0.9999997807583227, iteration: 45408
loss: 1.054506540298462,grad_norm: 0.9999993363087476, iteration: 45409
loss: 0.986262321472168,grad_norm: 0.9999999184690644, iteration: 45410
loss: 0.9938187599182129,grad_norm: 0.9349798190373289, iteration: 45411
loss: 1.0039560794830322,grad_norm: 0.8538011475666012, iteration: 45412
loss: 1.0611602067947388,grad_norm: 0.9999992598858257, iteration: 45413
loss: 0.9927042126655579,grad_norm: 0.9999990882780364, iteration: 45414
loss: 1.0106346607208252,grad_norm: 0.819662667922796, iteration: 45415
loss: 1.0137468576431274,grad_norm: 0.9500503862451113, iteration: 45416
loss: 0.9993355870246887,grad_norm: 0.9999991478924771, iteration: 45417
loss: 0.999322235584259,grad_norm: 0.9381347635554096, iteration: 45418
loss: 1.0511990785598755,grad_norm: 0.9999990963804574, iteration: 45419
loss: 1.0070315599441528,grad_norm: 0.9999992558283091, iteration: 45420
loss: 1.028823971748352,grad_norm: 0.9999997687357945, iteration: 45421
loss: 1.0084232091903687,grad_norm: 0.7972150320596781, iteration: 45422
loss: 1.0381847620010376,grad_norm: 0.9999992016876517, iteration: 45423
loss: 1.002914547920227,grad_norm: 0.9999991348505591, iteration: 45424
loss: 1.078465223312378,grad_norm: 0.999999775977572, iteration: 45425
loss: 0.9675564169883728,grad_norm: 0.9517366855748501, iteration: 45426
loss: 0.9946315884590149,grad_norm: 0.9343999802361774, iteration: 45427
loss: 1.0051519870758057,grad_norm: 0.9985592773442487, iteration: 45428
loss: 0.9915778636932373,grad_norm: 0.9999991231740861, iteration: 45429
loss: 1.028172492980957,grad_norm: 0.9686839801956869, iteration: 45430
loss: 1.0193588733673096,grad_norm: 0.9822490850846574, iteration: 45431
loss: 1.003768801689148,grad_norm: 0.7511123611436957, iteration: 45432
loss: 1.0489811897277832,grad_norm: 0.9999991976901034, iteration: 45433
loss: 1.0288563966751099,grad_norm: 0.867578133119873, iteration: 45434
loss: 1.0405484437942505,grad_norm: 0.9999992409637621, iteration: 45435
loss: 1.0010449886322021,grad_norm: 0.8907834751660786, iteration: 45436
loss: 1.0840928554534912,grad_norm: 0.9999991211714254, iteration: 45437
loss: 1.1778658628463745,grad_norm: 0.9999994080667747, iteration: 45438
loss: 1.045304536819458,grad_norm: 0.9999991880098628, iteration: 45439
loss: 0.9991399645805359,grad_norm: 0.9487689282580105, iteration: 45440
loss: 1.0152240991592407,grad_norm: 0.8633030950767946, iteration: 45441
loss: 1.0312128067016602,grad_norm: 0.9999990750666529, iteration: 45442
loss: 1.0097236633300781,grad_norm: 0.8820992125985905, iteration: 45443
loss: 1.0012726783752441,grad_norm: 0.7684318211062284, iteration: 45444
loss: 1.0256768465042114,grad_norm: 0.9999990849464991, iteration: 45445
loss: 1.1951649188995361,grad_norm: 0.9999993308957423, iteration: 45446
loss: 0.9709792733192444,grad_norm: 0.9130633335548745, iteration: 45447
loss: 0.9867959022521973,grad_norm: 0.9306816898500656, iteration: 45448
loss: 0.9995012283325195,grad_norm: 0.9517789509489714, iteration: 45449
loss: 0.9798449277877808,grad_norm: 0.9999990612107085, iteration: 45450
loss: 1.045688509941101,grad_norm: 0.9999993872386839, iteration: 45451
loss: 1.049092411994934,grad_norm: 0.9999990636928836, iteration: 45452
loss: 0.9972586631774902,grad_norm: 0.8747625191292936, iteration: 45453
loss: 1.002098798751831,grad_norm: 0.9999992194981903, iteration: 45454
loss: 0.9599766135215759,grad_norm: 0.948844443690974, iteration: 45455
loss: 1.0278689861297607,grad_norm: 0.9594138157561947, iteration: 45456
loss: 1.0784837007522583,grad_norm: 0.9999995822536384, iteration: 45457
loss: 1.034404993057251,grad_norm: 0.9999992538063218, iteration: 45458
loss: 1.069251298904419,grad_norm: 0.9999992092156119, iteration: 45459
loss: 1.0230209827423096,grad_norm: 0.9725914926971728, iteration: 45460
loss: 0.9803711771965027,grad_norm: 0.8420984573291964, iteration: 45461
loss: 1.0369821786880493,grad_norm: 0.997703483743876, iteration: 45462
loss: 0.989495038986206,grad_norm: 0.999999079389089, iteration: 45463
loss: 1.0453517436981201,grad_norm: 0.9999990564501271, iteration: 45464
loss: 0.9643048048019409,grad_norm: 0.8094105599126326, iteration: 45465
loss: 1.010148525238037,grad_norm: 0.9999994290022769, iteration: 45466
loss: 1.0498552322387695,grad_norm: 0.9999992613727262, iteration: 45467
loss: 1.0650596618652344,grad_norm: 0.7840102896055146, iteration: 45468
loss: 1.0043840408325195,grad_norm: 0.9378466285799162, iteration: 45469
loss: 0.9768310785293579,grad_norm: 0.8110170664734455, iteration: 45470
loss: 1.0370347499847412,grad_norm: 0.8200433949668887, iteration: 45471
loss: 1.0296270847320557,grad_norm: 0.9999993268485825, iteration: 45472
loss: 1.0040266513824463,grad_norm: 0.9999991217369378, iteration: 45473
loss: 1.0303316116333008,grad_norm: 0.9608050395167265, iteration: 45474
loss: 0.9955068230628967,grad_norm: 0.9963462346472688, iteration: 45475
loss: 1.0030802488327026,grad_norm: 0.9999991992279953, iteration: 45476
loss: 1.0285543203353882,grad_norm: 0.9999994882744503, iteration: 45477
loss: 0.9875638484954834,grad_norm: 0.8738943169818435, iteration: 45478
loss: 0.9734464287757874,grad_norm: 0.9999992000789469, iteration: 45479
loss: 1.0100185871124268,grad_norm: 0.9999992319034294, iteration: 45480
loss: 0.9866378903388977,grad_norm: 0.9827479956666779, iteration: 45481
loss: 1.0245099067687988,grad_norm: 0.9053533735525376, iteration: 45482
loss: 0.9773552417755127,grad_norm: 0.9999993979262871, iteration: 45483
loss: 1.0279653072357178,grad_norm: 0.9999992424305326, iteration: 45484
loss: 1.0362035036087036,grad_norm: 0.8983970071168105, iteration: 45485
loss: 0.9937902092933655,grad_norm: 0.9071380348867808, iteration: 45486
loss: 0.9924527406692505,grad_norm: 0.9999996857028689, iteration: 45487
loss: 1.0075780153274536,grad_norm: 0.9999990989081358, iteration: 45488
loss: 1.05344820022583,grad_norm: 0.9789226484468028, iteration: 45489
loss: 1.0412650108337402,grad_norm: 0.9999990845274186, iteration: 45490
loss: 1.0049570798873901,grad_norm: 0.97311522966614, iteration: 45491
loss: 0.9924519062042236,grad_norm: 0.8466318994078206, iteration: 45492
loss: 0.9939743876457214,grad_norm: 0.9999991990258295, iteration: 45493
loss: 1.0368494987487793,grad_norm: 0.9999991896229266, iteration: 45494
loss: 1.03483247756958,grad_norm: 0.9301723916310453, iteration: 45495
loss: 1.0073825120925903,grad_norm: 0.8025417495661095, iteration: 45496
loss: 0.9999008178710938,grad_norm: 0.8775943861766148, iteration: 45497
loss: 0.974744439125061,grad_norm: 0.9999990057258996, iteration: 45498
loss: 1.0012415647506714,grad_norm: 0.9376568900493134, iteration: 45499
loss: 1.0226243734359741,grad_norm: 0.9755356851440687, iteration: 45500
loss: 1.0059453248977661,grad_norm: 0.930410698408542, iteration: 45501
loss: 1.0007946491241455,grad_norm: 0.9999989736610455, iteration: 45502
loss: 1.0349560976028442,grad_norm: 0.9999998449874877, iteration: 45503
loss: 1.0222063064575195,grad_norm: 0.9288222777682776, iteration: 45504
loss: 0.9622716903686523,grad_norm: 0.749671556857221, iteration: 45505
loss: 1.0565816164016724,grad_norm: 0.9999996466713715, iteration: 45506
loss: 1.0223783254623413,grad_norm: 0.9999990360865487, iteration: 45507
loss: 1.0339761972427368,grad_norm: 0.9999993513060119, iteration: 45508
loss: 0.9856545329093933,grad_norm: 0.8992036084309958, iteration: 45509
loss: 1.045760989189148,grad_norm: 0.925197178978235, iteration: 45510
loss: 1.0168360471725464,grad_norm: 0.9999992545230513, iteration: 45511
loss: 1.0068100690841675,grad_norm: 0.9370885327196248, iteration: 45512
loss: 0.9788629412651062,grad_norm: 0.9999992485272972, iteration: 45513
loss: 1.0027517080307007,grad_norm: 0.9527936566839581, iteration: 45514
loss: 0.99021977186203,grad_norm: 0.9999990295476889, iteration: 45515
loss: 0.9872442483901978,grad_norm: 0.9999991735856789, iteration: 45516
loss: 1.0065094232559204,grad_norm: 0.9860510666959156, iteration: 45517
loss: 1.1338151693344116,grad_norm: 1.000000093050827, iteration: 45518
loss: 0.983525812625885,grad_norm: 0.7989318250106922, iteration: 45519
loss: 0.9720660448074341,grad_norm: 0.8751549716448677, iteration: 45520
loss: 1.0079452991485596,grad_norm: 0.9999990790665437, iteration: 45521
loss: 1.0378080606460571,grad_norm: 0.7802018396646332, iteration: 45522
loss: 1.00597083568573,grad_norm: 0.9999990463442335, iteration: 45523
loss: 1.0144771337509155,grad_norm: 0.8455102139336182, iteration: 45524
loss: 1.0644680261611938,grad_norm: 0.9004855386692897, iteration: 45525
loss: 1.0548820495605469,grad_norm: 0.9923135774055172, iteration: 45526
loss: 0.9833651185035706,grad_norm: 0.8841657477120758, iteration: 45527
loss: 1.0338457822799683,grad_norm: 0.8528351848106358, iteration: 45528
loss: 1.0205464363098145,grad_norm: 0.9211882934527403, iteration: 45529
loss: 0.97149658203125,grad_norm: 0.8416636686420382, iteration: 45530
loss: 1.005345344543457,grad_norm: 0.8115748202297876, iteration: 45531
loss: 1.0251408815383911,grad_norm: 0.9999990352294439, iteration: 45532
loss: 1.0135635137557983,grad_norm: 0.8162889901803602, iteration: 45533
loss: 0.9450520873069763,grad_norm: 0.9885948986000842, iteration: 45534
loss: 0.9887592792510986,grad_norm: 0.9999995514293712, iteration: 45535
loss: 1.0314362049102783,grad_norm: 0.8521388999982759, iteration: 45536
loss: 1.0282225608825684,grad_norm: 0.9385223848716574, iteration: 45537
loss: 0.9610603451728821,grad_norm: 0.8141422045235245, iteration: 45538
loss: 1.031883955001831,grad_norm: 0.9999990150033954, iteration: 45539
loss: 0.9731053709983826,grad_norm: 0.8414507068930507, iteration: 45540
loss: 0.978579044342041,grad_norm: 0.8920667122895668, iteration: 45541
loss: 1.0252962112426758,grad_norm: 0.9999991613932849, iteration: 45542
loss: 0.9722455739974976,grad_norm: 0.8507923387149932, iteration: 45543
loss: 1.0119022130966187,grad_norm: 0.9999991345136661, iteration: 45544
loss: 0.9984093308448792,grad_norm: 0.8006541418177817, iteration: 45545
loss: 1.0160435438156128,grad_norm: 0.8587334138787346, iteration: 45546
loss: 0.9856885075569153,grad_norm: 0.8951563495129423, iteration: 45547
loss: 0.992544949054718,grad_norm: 0.9459923013439687, iteration: 45548
loss: 1.0318262577056885,grad_norm: 0.9071317437622423, iteration: 45549
loss: 0.9654481410980225,grad_norm: 0.9028963411147454, iteration: 45550
loss: 1.0075592994689941,grad_norm: 0.9999990559205995, iteration: 45551
loss: 1.0525932312011719,grad_norm: 0.99999970491238, iteration: 45552
loss: 1.0245246887207031,grad_norm: 0.9999994925069801, iteration: 45553
loss: 1.0574398040771484,grad_norm: 0.9999991641544709, iteration: 45554
loss: 1.0252424478530884,grad_norm: 0.9999991936394548, iteration: 45555
loss: 1.0494111776351929,grad_norm: 0.9999991813601394, iteration: 45556
loss: 1.0208076238632202,grad_norm: 0.9999991478925838, iteration: 45557
loss: 0.9860380291938782,grad_norm: 0.8207030723906674, iteration: 45558
loss: 1.0177196264266968,grad_norm: 0.8321021074126295, iteration: 45559
loss: 0.9640023708343506,grad_norm: 0.8777824634182659, iteration: 45560
loss: 1.0528833866119385,grad_norm: 0.8781301001396059, iteration: 45561
loss: 1.0163956880569458,grad_norm: 0.9999992354766188, iteration: 45562
loss: 0.9600935578346252,grad_norm: 0.8327969544102986, iteration: 45563
loss: 1.006023645401001,grad_norm: 0.9107354783986863, iteration: 45564
loss: 0.9726637601852417,grad_norm: 0.9999990651477698, iteration: 45565
loss: 1.018937587738037,grad_norm: 0.9736857296436735, iteration: 45566
loss: 0.9918422102928162,grad_norm: 0.837782498791476, iteration: 45567
loss: 1.0088765621185303,grad_norm: 0.97300450921082, iteration: 45568
loss: 0.9619305729866028,grad_norm: 0.9999992021917938, iteration: 45569
loss: 1.0172529220581055,grad_norm: 0.9999992207835541, iteration: 45570
loss: 1.029567003250122,grad_norm: 0.9385675558103821, iteration: 45571
loss: 1.0003368854522705,grad_norm: 0.7647184024971744, iteration: 45572
loss: 0.9991605877876282,grad_norm: 0.9826789861917341, iteration: 45573
loss: 0.9345797300338745,grad_norm: 0.999999101474365, iteration: 45574
loss: 0.9986007213592529,grad_norm: 0.9428518355446053, iteration: 45575
loss: 1.0516563653945923,grad_norm: 0.9999995873620399, iteration: 45576
loss: 1.0086033344268799,grad_norm: 0.9237695185053441, iteration: 45577
loss: 1.0250519514083862,grad_norm: 0.92223608427823, iteration: 45578
loss: 1.0242652893066406,grad_norm: 0.9999991473543441, iteration: 45579
loss: 1.030577540397644,grad_norm: 0.9250491032565873, iteration: 45580
loss: 0.9675069451332092,grad_norm: 0.800391661494333, iteration: 45581
loss: 1.0174423456192017,grad_norm: 0.9434453791429337, iteration: 45582
loss: 1.0075492858886719,grad_norm: 0.9880836654664102, iteration: 45583
loss: 0.9917415380477905,grad_norm: 0.9228026398110484, iteration: 45584
loss: 1.0156434774398804,grad_norm: 0.9754492375186313, iteration: 45585
loss: 0.9715398550033569,grad_norm: 0.9444940826252641, iteration: 45586
loss: 1.0164967775344849,grad_norm: 0.8928567376701699, iteration: 45587
loss: 1.0802960395812988,grad_norm: 0.9999997328010137, iteration: 45588
loss: 0.9912087321281433,grad_norm: 0.7965001821402066, iteration: 45589
loss: 1.0124684572219849,grad_norm: 0.8273188652158466, iteration: 45590
loss: 0.9677513837814331,grad_norm: 0.9557663347115603, iteration: 45591
loss: 1.0348347425460815,grad_norm: 0.9999996164429067, iteration: 45592
loss: 0.9674868583679199,grad_norm: 0.8332633874838287, iteration: 45593
loss: 1.0000640153884888,grad_norm: 0.9999992174581402, iteration: 45594
loss: 1.0565418004989624,grad_norm: 0.9999995647320105, iteration: 45595
loss: 0.9995347261428833,grad_norm: 0.8770043687546247, iteration: 45596
loss: 1.0671087503433228,grad_norm: 0.999999177730919, iteration: 45597
loss: 0.9134960770606995,grad_norm: 0.9999991962911111, iteration: 45598
loss: 0.9967637062072754,grad_norm: 0.9766717412758704, iteration: 45599
loss: 0.997639000415802,grad_norm: 0.9999991456928669, iteration: 45600
loss: 1.0268828868865967,grad_norm: 0.9999997063725553, iteration: 45601
loss: 1.0417813062667847,grad_norm: 0.9999990233383809, iteration: 45602
loss: 1.017440915107727,grad_norm: 0.9999991395816819, iteration: 45603
loss: 1.014366626739502,grad_norm: 0.9205210715991012, iteration: 45604
loss: 0.9742165207862854,grad_norm: 0.9588206738910378, iteration: 45605
loss: 0.9710646867752075,grad_norm: 0.9312494506188587, iteration: 45606
loss: 0.9854056239128113,grad_norm: 0.9999990106834373, iteration: 45607
loss: 1.0182918310165405,grad_norm: 0.773736462272988, iteration: 45608
loss: 1.0389926433563232,grad_norm: 0.9999997598362862, iteration: 45609
loss: 1.001205325126648,grad_norm: 0.9999989491199057, iteration: 45610
loss: 1.0098501443862915,grad_norm: 0.9999995669893252, iteration: 45611
loss: 1.0278191566467285,grad_norm: 0.9999992563365983, iteration: 45612
loss: 0.959356963634491,grad_norm: 0.9904200979732255, iteration: 45613
loss: 0.9957992434501648,grad_norm: 0.9999993392495795, iteration: 45614
loss: 1.0181199312210083,grad_norm: 0.9899201050527713, iteration: 45615
loss: 0.975028932094574,grad_norm: 0.9690147266833071, iteration: 45616
loss: 1.0058788061141968,grad_norm: 0.8798249762836304, iteration: 45617
loss: 1.0100343227386475,grad_norm: 0.9999990678020151, iteration: 45618
loss: 1.0304204225540161,grad_norm: 0.8488296705629419, iteration: 45619
loss: 0.9816934466362,grad_norm: 0.6675296517006198, iteration: 45620
loss: 1.0437800884246826,grad_norm: 0.9186279689386855, iteration: 45621
loss: 0.9926013350486755,grad_norm: 0.9367312175366037, iteration: 45622
loss: 1.0128884315490723,grad_norm: 0.9063706937431572, iteration: 45623
loss: 0.9985449314117432,grad_norm: 0.9554674591936358, iteration: 45624
loss: 0.9703349471092224,grad_norm: 0.999999816614217, iteration: 45625
loss: 0.9661299586296082,grad_norm: 0.9128815208642895, iteration: 45626
loss: 1.0199317932128906,grad_norm: 0.9999990593993529, iteration: 45627
loss: 1.0108823776245117,grad_norm: 0.8703435605811887, iteration: 45628
loss: 1.0500134229660034,grad_norm: 0.9999991408927204, iteration: 45629
loss: 0.9662214517593384,grad_norm: 0.9999991077769024, iteration: 45630
loss: 1.0196033716201782,grad_norm: 0.9799483256408182, iteration: 45631
loss: 1.0241756439208984,grad_norm: 0.8431408737912989, iteration: 45632
loss: 1.0194495916366577,grad_norm: 0.8866601767780986, iteration: 45633
loss: 1.0193753242492676,grad_norm: 0.9999989438635885, iteration: 45634
loss: 1.0375173091888428,grad_norm: 0.8306058673892981, iteration: 45635
loss: 1.0189805030822754,grad_norm: 0.9999997581550965, iteration: 45636
loss: 0.98506098985672,grad_norm: 0.9999990473207516, iteration: 45637
loss: 1.0036612749099731,grad_norm: 0.9264229145856491, iteration: 45638
loss: 0.9930534958839417,grad_norm: 0.951327307518386, iteration: 45639
loss: 1.0232656002044678,grad_norm: 0.9999993263162634, iteration: 45640
loss: 1.034146785736084,grad_norm: 0.9802704860340574, iteration: 45641
loss: 0.9959860444068909,grad_norm: 0.9236002212330461, iteration: 45642
loss: 0.9910938739776611,grad_norm: 0.8960605966980552, iteration: 45643
loss: 0.9735986590385437,grad_norm: 0.886625057741602, iteration: 45644
loss: 1.037487506866455,grad_norm: 0.8794479979439855, iteration: 45645
loss: 1.0308517217636108,grad_norm: 0.9999990897604963, iteration: 45646
loss: 1.0301028490066528,grad_norm: 0.9999990806267468, iteration: 45647
loss: 0.9869105219841003,grad_norm: 0.9999991466668167, iteration: 45648
loss: 1.0031788349151611,grad_norm: 0.952748069961232, iteration: 45649
loss: 0.9982179403305054,grad_norm: 0.8081087204637012, iteration: 45650
loss: 1.028826117515564,grad_norm: 0.9999990394223329, iteration: 45651
loss: 1.0521063804626465,grad_norm: 0.9574741615774034, iteration: 45652
loss: 1.0150482654571533,grad_norm: 0.9159327718004734, iteration: 45653
loss: 1.0098795890808105,grad_norm: 0.954180462259713, iteration: 45654
loss: 1.0083855390548706,grad_norm: 0.9999994113222764, iteration: 45655
loss: 0.9741270542144775,grad_norm: 0.9962848046404532, iteration: 45656
loss: 1.0240366458892822,grad_norm: 0.9999990919374101, iteration: 45657
loss: 1.0085784196853638,grad_norm: 0.9999989160181317, iteration: 45658
loss: 1.0059934854507446,grad_norm: 0.8250238484567811, iteration: 45659
loss: 1.0386743545532227,grad_norm: 0.840006525840973, iteration: 45660
loss: 0.9866825342178345,grad_norm: 0.9999992600896823, iteration: 45661
loss: 0.9964784383773804,grad_norm: 0.9912400989842933, iteration: 45662
loss: 1.016302227973938,grad_norm: 0.8955817465766482, iteration: 45663
loss: 0.9782776236534119,grad_norm: 0.9390512281739091, iteration: 45664
loss: 1.0158666372299194,grad_norm: 0.9999992608097606, iteration: 45665
loss: 1.000510334968567,grad_norm: 0.9999994015664413, iteration: 45666
loss: 1.0405770540237427,grad_norm: 0.9999993753874535, iteration: 45667
loss: 0.9564600586891174,grad_norm: 0.9999990188077494, iteration: 45668
loss: 0.9798848032951355,grad_norm: 0.9311118435788587, iteration: 45669
loss: 1.0627058744430542,grad_norm: 0.9999993320892704, iteration: 45670
loss: 1.0048688650131226,grad_norm: 0.9272366585151496, iteration: 45671
loss: 0.9967004060745239,grad_norm: 0.9440664581700562, iteration: 45672
loss: 1.038954496383667,grad_norm: 0.9999992872449628, iteration: 45673
loss: 1.12220299243927,grad_norm: 0.9999997919438991, iteration: 45674
loss: 0.9649047255516052,grad_norm: 0.8719236789161448, iteration: 45675
loss: 0.9665570259094238,grad_norm: 0.929793814468461, iteration: 45676
loss: 0.9775717854499817,grad_norm: 0.9302167100215436, iteration: 45677
loss: 0.9473475217819214,grad_norm: 0.7948439755572793, iteration: 45678
loss: 1.0221495628356934,grad_norm: 0.9999991841365284, iteration: 45679
loss: 1.0037604570388794,grad_norm: 0.9076940754131123, iteration: 45680
loss: 1.0371545553207397,grad_norm: 0.9999990189118121, iteration: 45681
loss: 0.9778135418891907,grad_norm: 0.864067184960277, iteration: 45682
loss: 1.012135624885559,grad_norm: 0.9258063425992825, iteration: 45683
loss: 1.0163456201553345,grad_norm: 0.950751747147895, iteration: 45684
loss: 0.9754326343536377,grad_norm: 0.9999992225878662, iteration: 45685
loss: 0.994595468044281,grad_norm: 0.8873168690543133, iteration: 45686
loss: 1.0090134143829346,grad_norm: 0.9999991173292775, iteration: 45687
loss: 1.0012494325637817,grad_norm: 0.9999996495765072, iteration: 45688
loss: 1.0160683393478394,grad_norm: 0.8784609110764339, iteration: 45689
loss: 1.0362759828567505,grad_norm: 0.9999991135911048, iteration: 45690
loss: 0.9669790267944336,grad_norm: 0.9365306312771007, iteration: 45691
loss: 0.963630199432373,grad_norm: 0.885833928489885, iteration: 45692
loss: 1.0141205787658691,grad_norm: 0.9999990059121469, iteration: 45693
loss: 1.070216417312622,grad_norm: 0.9999995472182185, iteration: 45694
loss: 1.0111182928085327,grad_norm: 0.9115488292385262, iteration: 45695
loss: 1.0532201528549194,grad_norm: 0.9999998625854833, iteration: 45696
loss: 0.9700931310653687,grad_norm: 0.9999990150266941, iteration: 45697
loss: 1.002032995223999,grad_norm: 0.9999994427651356, iteration: 45698
loss: 0.9949713945388794,grad_norm: 0.8309792232883252, iteration: 45699
loss: 1.0496244430541992,grad_norm: 0.9999993243329236, iteration: 45700
loss: 1.0148650407791138,grad_norm: 0.9011907678400778, iteration: 45701
loss: 0.9962471127510071,grad_norm: 0.9999993477482185, iteration: 45702
loss: 1.011596918106079,grad_norm: 0.9999991206764028, iteration: 45703
loss: 0.9914485216140747,grad_norm: 0.9999990646691982, iteration: 45704
loss: 1.0080054998397827,grad_norm: 0.8569294793833458, iteration: 45705
loss: 1.0256860256195068,grad_norm: 0.7727802187335315, iteration: 45706
loss: 0.9984256029129028,grad_norm: 0.922505374738058, iteration: 45707
loss: 1.1122034788131714,grad_norm: 0.9999996000115468, iteration: 45708
loss: 0.9868613481521606,grad_norm: 0.78177834865414, iteration: 45709
loss: 1.009009838104248,grad_norm: 0.9999994865114314, iteration: 45710
loss: 0.9984768629074097,grad_norm: 0.9999990294569525, iteration: 45711
loss: 1.0085567235946655,grad_norm: 0.9999991246369723, iteration: 45712
loss: 1.027982473373413,grad_norm: 0.9933873057793645, iteration: 45713
loss: 0.9855360388755798,grad_norm: 0.9583846386702329, iteration: 45714
loss: 0.9728775024414062,grad_norm: 0.9999991905208959, iteration: 45715
loss: 1.027252197265625,grad_norm: 0.9999994264861842, iteration: 45716
loss: 0.9644294381141663,grad_norm: 0.820192931390839, iteration: 45717
loss: 1.035658836364746,grad_norm: 0.9999998129158328, iteration: 45718
loss: 0.9969258904457092,grad_norm: 0.86970722242244, iteration: 45719
loss: 1.079128623008728,grad_norm: 0.9999996050998619, iteration: 45720
loss: 1.0044647455215454,grad_norm: 0.8018771835019762, iteration: 45721
loss: 0.9936360716819763,grad_norm: 0.9999993410954318, iteration: 45722
loss: 1.025519609451294,grad_norm: 0.9999990333491743, iteration: 45723
loss: 0.9803436398506165,grad_norm: 0.9999990773738884, iteration: 45724
loss: 1.005749225616455,grad_norm: 0.9999997272758712, iteration: 45725
loss: 1.0151950120925903,grad_norm: 0.9999992613347185, iteration: 45726
loss: 1.0449734926223755,grad_norm: 0.9999991904305461, iteration: 45727
loss: 0.9862247705459595,grad_norm: 0.9999991456254257, iteration: 45728
loss: 1.002524971961975,grad_norm: 0.9999990396281315, iteration: 45729
loss: 1.071269154548645,grad_norm: 0.9999995812966658, iteration: 45730
loss: 1.038421869277954,grad_norm: 0.813151751290223, iteration: 45731
loss: 0.9697858095169067,grad_norm: 0.9999991926887438, iteration: 45732
loss: 0.9773072600364685,grad_norm: 0.9654135522811548, iteration: 45733
loss: 0.9634329080581665,grad_norm: 0.9751201073305216, iteration: 45734
loss: 0.99949049949646,grad_norm: 0.9999991832351132, iteration: 45735
loss: 1.0246458053588867,grad_norm: 0.8560487700947291, iteration: 45736
loss: 1.0007623434066772,grad_norm: 0.9999994358748495, iteration: 45737
loss: 1.0073232650756836,grad_norm: 0.9291176035089216, iteration: 45738
loss: 1.0449467897415161,grad_norm: 0.9968013106963789, iteration: 45739
loss: 1.020426869392395,grad_norm: 0.9999995896097551, iteration: 45740
loss: 0.9956954717636108,grad_norm: 0.9999993343216131, iteration: 45741
loss: 0.97605299949646,grad_norm: 0.9565136415066595, iteration: 45742
loss: 0.9939851760864258,grad_norm: 0.9762157632819233, iteration: 45743
loss: 1.0007513761520386,grad_norm: 0.8365808049970096, iteration: 45744
loss: 1.099700927734375,grad_norm: 0.9999996306805264, iteration: 45745
loss: 0.9921918511390686,grad_norm: 0.8871960885188696, iteration: 45746
loss: 1.005895733833313,grad_norm: 0.9999993332592985, iteration: 45747
loss: 1.0494284629821777,grad_norm: 0.9999990910355402, iteration: 45748
loss: 0.9852878451347351,grad_norm: 0.9157763394875439, iteration: 45749
loss: 1.0207021236419678,grad_norm: 0.9061631616988142, iteration: 45750
loss: 1.0066328048706055,grad_norm: 0.9999995990401379, iteration: 45751
loss: 1.1121100187301636,grad_norm: 0.9999991004580282, iteration: 45752
loss: 1.0325299501419067,grad_norm: 0.9999991603616173, iteration: 45753
loss: 1.0509586334228516,grad_norm: 0.9999993690727831, iteration: 45754
loss: 1.0252832174301147,grad_norm: 0.9999992344221875, iteration: 45755
loss: 0.9844814538955688,grad_norm: 0.9999995906294169, iteration: 45756
loss: 1.021865963935852,grad_norm: 0.8035961699980391, iteration: 45757
loss: 1.0338901281356812,grad_norm: 0.9999994434937648, iteration: 45758
loss: 1.0015416145324707,grad_norm: 0.9106213159273828, iteration: 45759
loss: 0.9670438170433044,grad_norm: 0.9999991721174505, iteration: 45760
loss: 0.9959400296211243,grad_norm: 0.9226907148518947, iteration: 45761
loss: 0.9620066285133362,grad_norm: 0.9030975921651854, iteration: 45762
loss: 0.9958710670471191,grad_norm: 0.9999991352082437, iteration: 45763
loss: 1.0273345708847046,grad_norm: 0.9999990784497745, iteration: 45764
loss: 1.006883144378662,grad_norm: 0.9526296598998992, iteration: 45765
loss: 0.956634521484375,grad_norm: 0.8842551726115425, iteration: 45766
loss: 0.9915665984153748,grad_norm: 0.9227567986997965, iteration: 45767
loss: 1.0548685789108276,grad_norm: 0.9999996268645762, iteration: 45768
loss: 1.068145990371704,grad_norm: 0.999999059719562, iteration: 45769
loss: 1.0250076055526733,grad_norm: 0.913016387396363, iteration: 45770
loss: 1.0196393728256226,grad_norm: 0.9999990212240748, iteration: 45771
loss: 1.0040781497955322,grad_norm: 0.8655929657082572, iteration: 45772
loss: 1.1046377420425415,grad_norm: 0.9999993658987009, iteration: 45773
loss: 0.9848484992980957,grad_norm: 0.9999991207872954, iteration: 45774
loss: 1.0024440288543701,grad_norm: 0.9037849788054731, iteration: 45775
loss: 0.9827630519866943,grad_norm: 0.9256982884446522, iteration: 45776
loss: 1.0307033061981201,grad_norm: 0.9844714567336238, iteration: 45777
loss: 1.0079392194747925,grad_norm: 0.9619943007119126, iteration: 45778
loss: 1.0615649223327637,grad_norm: 0.999999434415487, iteration: 45779
loss: 0.9742256999015808,grad_norm: 0.8705524209578608, iteration: 45780
loss: 1.0032328367233276,grad_norm: 0.9392952868501376, iteration: 45781
loss: 1.055668592453003,grad_norm: 0.9999993587489213, iteration: 45782
loss: 1.0079880952835083,grad_norm: 0.9999991861274654, iteration: 45783
loss: 0.9960445761680603,grad_norm: 0.8539928610951873, iteration: 45784
loss: 1.009893536567688,grad_norm: 0.9999992117752358, iteration: 45785
loss: 1.022383689880371,grad_norm: 0.9563318179076664, iteration: 45786
loss: 1.0350747108459473,grad_norm: 0.9999991195198306, iteration: 45787
loss: 0.9917559623718262,grad_norm: 0.9273810323106938, iteration: 45788
loss: 1.1365633010864258,grad_norm: 0.9999993623381292, iteration: 45789
loss: 1.0095609426498413,grad_norm: 0.9999996649256652, iteration: 45790
loss: 1.0217151641845703,grad_norm: 0.8587372441842611, iteration: 45791
loss: 0.9767415523529053,grad_norm: 0.9937348747962504, iteration: 45792
loss: 0.9894649386405945,grad_norm: 0.8163225007156606, iteration: 45793
loss: 0.9825707077980042,grad_norm: 0.964960462669313, iteration: 45794
loss: 1.0014779567718506,grad_norm: 0.9999989955639692, iteration: 45795
loss: 0.9851416349411011,grad_norm: 0.8916267829456705, iteration: 45796
loss: 0.9780722856521606,grad_norm: 0.9999992454594566, iteration: 45797
loss: 0.9855726361274719,grad_norm: 0.9944033226052401, iteration: 45798
loss: 1.0294630527496338,grad_norm: 0.999999042096122, iteration: 45799
loss: 1.0116522312164307,grad_norm: 0.9883564214839209, iteration: 45800
loss: 1.0111994743347168,grad_norm: 0.9376065414561116, iteration: 45801
loss: 1.0265460014343262,grad_norm: 0.9999989559977707, iteration: 45802
loss: 1.0553523302078247,grad_norm: 0.9999995293853401, iteration: 45803
loss: 1.008732557296753,grad_norm: 0.7954213583378192, iteration: 45804
loss: 1.0151450634002686,grad_norm: 0.9999991541931403, iteration: 45805
loss: 0.9906048774719238,grad_norm: 0.8614336017143012, iteration: 45806
loss: 0.9791337847709656,grad_norm: 0.999999008487183, iteration: 45807
loss: 1.0228486061096191,grad_norm: 0.9999989860970179, iteration: 45808
loss: 0.9845978617668152,grad_norm: 0.9999995092101864, iteration: 45809
loss: 1.0428476333618164,grad_norm: 0.9999994888788786, iteration: 45810
loss: 1.1236053705215454,grad_norm: 0.9999992108717025, iteration: 45811
loss: 1.362609624862671,grad_norm: 0.9999993867990202, iteration: 45812
loss: 1.0329315662384033,grad_norm: 0.8538493221631427, iteration: 45813
loss: 1.0606001615524292,grad_norm: 0.9516758661983714, iteration: 45814
loss: 1.2272785902023315,grad_norm: 0.9999992019802536, iteration: 45815
loss: 0.9844142198562622,grad_norm: 0.8319144152570334, iteration: 45816
loss: 1.032435417175293,grad_norm: 0.9872557908234452, iteration: 45817
loss: 0.9916837215423584,grad_norm: 0.9264311902325401, iteration: 45818
loss: 1.1302790641784668,grad_norm: 0.9999994642393887, iteration: 45819
loss: 1.0377002954483032,grad_norm: 0.9999993434071006, iteration: 45820
loss: 1.0011727809906006,grad_norm: 0.9999990881814758, iteration: 45821
loss: 0.9822558760643005,grad_norm: 0.9999990639400965, iteration: 45822
loss: 1.0262203216552734,grad_norm: 0.9318088163972599, iteration: 45823
loss: 1.033922791481018,grad_norm: 0.999999074040853, iteration: 45824
loss: 1.1335065364837646,grad_norm: 0.9999995285824773, iteration: 45825
loss: 1.0738706588745117,grad_norm: 0.9999991763082304, iteration: 45826
loss: 1.033655047416687,grad_norm: 0.8277893207329788, iteration: 45827
loss: 1.0635849237442017,grad_norm: 0.9999991981072781, iteration: 45828
loss: 1.0712391138076782,grad_norm: 0.9282411778500369, iteration: 45829
loss: 1.1938436031341553,grad_norm: 0.9999993673569523, iteration: 45830
loss: 1.0449891090393066,grad_norm: 0.9999991923436707, iteration: 45831
loss: 1.1442209482192993,grad_norm: 0.9999998303714716, iteration: 45832
loss: 1.0291082859039307,grad_norm: 0.9999991277134159, iteration: 45833
loss: 1.0223207473754883,grad_norm: 0.9999994067791548, iteration: 45834
loss: 1.0228954553604126,grad_norm: 0.9999990625647102, iteration: 45835
loss: 0.9937406182289124,grad_norm: 0.7577632473528677, iteration: 45836
loss: 0.9704796671867371,grad_norm: 0.9999991425849886, iteration: 45837
loss: 1.1660058498382568,grad_norm: 0.9999990588179313, iteration: 45838
loss: 1.0222272872924805,grad_norm: 0.9999990270927565, iteration: 45839
loss: 1.0791561603546143,grad_norm: 0.9999992133781177, iteration: 45840
loss: 1.0047498941421509,grad_norm: 0.9592406601162912, iteration: 45841
loss: 1.0341873168945312,grad_norm: 0.9999992162170658, iteration: 45842
loss: 1.325296401977539,grad_norm: 0.9999996720770459, iteration: 45843
loss: 1.1234201192855835,grad_norm: 0.9999990800774796, iteration: 45844
loss: 1.0311074256896973,grad_norm: 0.9421973152789739, iteration: 45845
loss: 1.1010873317718506,grad_norm: 0.9999991144812951, iteration: 45846
loss: 1.099909782409668,grad_norm: 0.999999174904086, iteration: 45847
loss: 1.1842299699783325,grad_norm: 0.9999995948907644, iteration: 45848
loss: 0.9945871233940125,grad_norm: 0.999999160422056, iteration: 45849
loss: 0.9789393544197083,grad_norm: 0.999999516030929, iteration: 45850
loss: 1.0522371530532837,grad_norm: 0.9580616104976016, iteration: 45851
loss: 1.0299689769744873,grad_norm: 0.9923988353134249, iteration: 45852
loss: 1.035689353942871,grad_norm: 0.9999993399441516, iteration: 45853
loss: 1.007128357887268,grad_norm: 0.8514689532311691, iteration: 45854
loss: 0.9918792843818665,grad_norm: 0.9397196498683849, iteration: 45855
loss: 1.0510315895080566,grad_norm: 0.9999991253982597, iteration: 45856
loss: 1.0565154552459717,grad_norm: 0.999999194128025, iteration: 45857
loss: 1.0324374437332153,grad_norm: 0.9999990723682339, iteration: 45858
loss: 0.9489884972572327,grad_norm: 0.9999991049872481, iteration: 45859
loss: 1.0399221181869507,grad_norm: 0.9999997624683847, iteration: 45860
loss: 1.0497143268585205,grad_norm: 0.9999991646065588, iteration: 45861
loss: 1.0236608982086182,grad_norm: 0.824997244624478, iteration: 45862
loss: 1.0088521242141724,grad_norm: 0.9999998373106509, iteration: 45863
loss: 1.008617639541626,grad_norm: 0.9999996919061868, iteration: 45864
loss: 0.9777818322181702,grad_norm: 0.9913140372871948, iteration: 45865
loss: 1.010595440864563,grad_norm: 0.9999995517527973, iteration: 45866
loss: 0.9780341982841492,grad_norm: 0.9999990587371175, iteration: 45867
loss: 0.9825073480606079,grad_norm: 0.9629836791558827, iteration: 45868
loss: 0.9937961101531982,grad_norm: 0.9965480134351465, iteration: 45869
loss: 0.9795368909835815,grad_norm: 0.9227783327645853, iteration: 45870
loss: 1.0003198385238647,grad_norm: 0.8427008099850024, iteration: 45871
loss: 1.0253887176513672,grad_norm: 0.9244508826044514, iteration: 45872
loss: 1.026975393295288,grad_norm: 0.8964117030859537, iteration: 45873
loss: 1.0104609727859497,grad_norm: 0.9551263021619415, iteration: 45874
loss: 1.0149046182632446,grad_norm: 0.9808012310400369, iteration: 45875
loss: 0.9537280797958374,grad_norm: 0.999999438665449, iteration: 45876
loss: 0.9992939233779907,grad_norm: 0.9103795272486074, iteration: 45877
loss: 0.9814891219139099,grad_norm: 0.9412464736722113, iteration: 45878
loss: 1.1024997234344482,grad_norm: 0.9999990591808614, iteration: 45879
loss: 0.9946185350418091,grad_norm: 0.9153196616331539, iteration: 45880
loss: 1.017747402191162,grad_norm: 0.7478411700741142, iteration: 45881
loss: 1.019968032836914,grad_norm: 0.8595963694997184, iteration: 45882
loss: 1.0072349309921265,grad_norm: 0.999999200266152, iteration: 45883
loss: 0.9712470173835754,grad_norm: 0.9999992321032503, iteration: 45884
loss: 1.0327026844024658,grad_norm: 0.9999991547409307, iteration: 45885
loss: 1.0731022357940674,grad_norm: 0.9999997380890246, iteration: 45886
loss: 0.9802649021148682,grad_norm: 0.9135753550613808, iteration: 45887
loss: 0.9716538190841675,grad_norm: 0.9999995575710201, iteration: 45888
loss: 1.0417789220809937,grad_norm: 0.9999993709784808, iteration: 45889
loss: 0.9935175776481628,grad_norm: 0.8951711543171765, iteration: 45890
loss: 1.0152642726898193,grad_norm: 0.8187543427773973, iteration: 45891
loss: 0.9925227165222168,grad_norm: 0.9999996791142324, iteration: 45892
loss: 1.00601327419281,grad_norm: 0.9999994050027347, iteration: 45893
loss: 1.1146060228347778,grad_norm: 0.9999992053965414, iteration: 45894
loss: 1.0491664409637451,grad_norm: 0.9999995128259129, iteration: 45895
loss: 1.0399209260940552,grad_norm: 0.9014233156884925, iteration: 45896
loss: 1.0148237943649292,grad_norm: 0.9138526259780795, iteration: 45897
loss: 0.9744405150413513,grad_norm: 0.9176560175058414, iteration: 45898
loss: 1.0084178447723389,grad_norm: 0.9999990358079002, iteration: 45899
loss: 1.0094610452651978,grad_norm: 0.9999989906637458, iteration: 45900
loss: 1.0063327550888062,grad_norm: 0.9238027010696275, iteration: 45901
loss: 0.9992296099662781,grad_norm: 0.9999992916042262, iteration: 45902
loss: 1.0126694440841675,grad_norm: 0.9999998440149651, iteration: 45903
loss: 0.9539743065834045,grad_norm: 0.905253612732752, iteration: 45904
loss: 0.9896095991134644,grad_norm: 0.9472536910113203, iteration: 45905
loss: 1.089478850364685,grad_norm: 0.9135531711036914, iteration: 45906
loss: 1.0040054321289062,grad_norm: 0.8756343000770651, iteration: 45907
loss: 1.0073109865188599,grad_norm: 0.986969868054328, iteration: 45908
loss: 0.9757259488105774,grad_norm: 0.9562935844219552, iteration: 45909
loss: 1.076128602027893,grad_norm: 0.9999990225315295, iteration: 45910
loss: 1.116469383239746,grad_norm: 0.8089949675826925, iteration: 45911
loss: 1.0211163759231567,grad_norm: 0.963884736633231, iteration: 45912
loss: 1.0166457891464233,grad_norm: 0.9999991122522467, iteration: 45913
loss: 1.0182921886444092,grad_norm: 0.9999993739185751, iteration: 45914
loss: 0.9833569526672363,grad_norm: 0.8356836629143648, iteration: 45915
loss: 1.104767918586731,grad_norm: 0.9999991120242312, iteration: 45916
loss: 1.0822052955627441,grad_norm: 0.9999990134421937, iteration: 45917
loss: 1.0172244310379028,grad_norm: 0.9999990355059876, iteration: 45918
loss: 1.061782717704773,grad_norm: 0.9999991203015424, iteration: 45919
loss: 1.062078833580017,grad_norm: 0.999999257960065, iteration: 45920
loss: 1.083427906036377,grad_norm: 0.9999991573731695, iteration: 45921
loss: 1.0129588842391968,grad_norm: 0.9821551800555527, iteration: 45922
loss: 1.0752979516983032,grad_norm: 0.9999992600793617, iteration: 45923
loss: 1.0799013376235962,grad_norm: 0.9999990569821166, iteration: 45924
loss: 0.9892061352729797,grad_norm: 0.9009271377251785, iteration: 45925
loss: 1.0160295963287354,grad_norm: 0.9999989861016534, iteration: 45926
loss: 0.9862399697303772,grad_norm: 0.7731979497260703, iteration: 45927
loss: 1.0779396295547485,grad_norm: 0.999999710054454, iteration: 45928
loss: 1.0257591009140015,grad_norm: 0.8771072859735703, iteration: 45929
loss: 1.0163785219192505,grad_norm: 0.9999993695516282, iteration: 45930
loss: 0.9877892136573792,grad_norm: 0.7696253289117239, iteration: 45931
loss: 0.9975472092628479,grad_norm: 0.9999990945559244, iteration: 45932
loss: 1.0249873399734497,grad_norm: 0.8969250061031865, iteration: 45933
loss: 0.9816223382949829,grad_norm: 0.9999992627757506, iteration: 45934
loss: 1.004326581954956,grad_norm: 0.908526709143578, iteration: 45935
loss: 1.0612499713897705,grad_norm: 0.9999997394935461, iteration: 45936
loss: 0.996728777885437,grad_norm: 0.9782648066041357, iteration: 45937
loss: 0.9664148688316345,grad_norm: 0.9999990788712265, iteration: 45938
loss: 0.9830600023269653,grad_norm: 0.964616527580996, iteration: 45939
loss: 0.9991469979286194,grad_norm: 0.9999990183835984, iteration: 45940
loss: 1.043738842010498,grad_norm: 0.9999996212372902, iteration: 45941
loss: 1.0302375555038452,grad_norm: 0.9999994320075651, iteration: 45942
loss: 0.9834340214729309,grad_norm: 0.853496498270115, iteration: 45943
loss: 1.000869631767273,grad_norm: 0.9241110603602044, iteration: 45944
loss: 1.0122603178024292,grad_norm: 0.8746757999968142, iteration: 45945
loss: 0.960736095905304,grad_norm: 0.9700119063057676, iteration: 45946
loss: 1.0170059204101562,grad_norm: 0.9647306596599041, iteration: 45947
loss: 1.0029882192611694,grad_norm: 0.9534879333363495, iteration: 45948
loss: 0.9972639679908752,grad_norm: 0.9337127388389567, iteration: 45949
loss: 1.0448118448257446,grad_norm: 0.9999993590934636, iteration: 45950
loss: 1.0119364261627197,grad_norm: 0.8437164713296231, iteration: 45951
loss: 1.0143488645553589,grad_norm: 0.9999990651555785, iteration: 45952
loss: 1.043670654296875,grad_norm: 0.9428502746022785, iteration: 45953
loss: 1.233173131942749,grad_norm: 0.9999992476858889, iteration: 45954
loss: 0.9764418005943298,grad_norm: 0.868779125108397, iteration: 45955
loss: 1.0001052618026733,grad_norm: 0.9558458897312303, iteration: 45956
loss: 1.7144978046417236,grad_norm: 0.9999996174771553, iteration: 45957
loss: 1.2978250980377197,grad_norm: 0.9999993032353298, iteration: 45958
loss: 1.3447233438491821,grad_norm: 0.999999541909158, iteration: 45959
loss: 1.0036993026733398,grad_norm: 0.9999992590512515, iteration: 45960
loss: 1.1401689052581787,grad_norm: 0.9999991961034741, iteration: 45961
loss: 1.0566571950912476,grad_norm: 0.9999991568448943, iteration: 45962
loss: 1.0856128931045532,grad_norm: 0.9999990808610102, iteration: 45963
loss: 1.5296200513839722,grad_norm: 0.9999995631157346, iteration: 45964
loss: 1.007921576499939,grad_norm: 0.8706613605506112, iteration: 45965
loss: 1.0658953189849854,grad_norm: 0.9999993780050074, iteration: 45966
loss: 1.2828773260116577,grad_norm: 0.9999994520538019, iteration: 45967
loss: 1.2381591796875,grad_norm: 0.9999995464368031, iteration: 45968
loss: 1.1093027591705322,grad_norm: 0.9999993595280561, iteration: 45969
loss: 1.1478546857833862,grad_norm: 1.0000000413145211, iteration: 45970
loss: 1.1183520555496216,grad_norm: 0.9999997128438044, iteration: 45971
loss: 1.0271236896514893,grad_norm: 0.9999992954369626, iteration: 45972
loss: 1.156699776649475,grad_norm: 0.9999993865588092, iteration: 45973
loss: 1.076925277709961,grad_norm: 0.9999991458455431, iteration: 45974
loss: 1.2275527715682983,grad_norm: 0.9999995263273799, iteration: 45975
loss: 0.9968683123588562,grad_norm: 0.9999993157420576, iteration: 45976
loss: 1.123894453048706,grad_norm: 0.9999993940392468, iteration: 45977
loss: 1.2271736860275269,grad_norm: 0.9999999858083541, iteration: 45978
loss: 1.0972660779953003,grad_norm: 0.999999566499421, iteration: 45979
loss: 1.0190798044204712,grad_norm: 0.9999998524716941, iteration: 45980
loss: 0.99521803855896,grad_norm: 0.999999464897771, iteration: 45981
loss: 1.1912554502487183,grad_norm: 0.9999993113271521, iteration: 45982
loss: 1.1018937826156616,grad_norm: 0.9999994934897458, iteration: 45983
loss: 1.0149540901184082,grad_norm: 0.9999995345786192, iteration: 45984
loss: 1.1436657905578613,grad_norm: 0.9999997190435794, iteration: 45985
loss: 1.2346049547195435,grad_norm: 0.9999999115807281, iteration: 45986
loss: 1.0862525701522827,grad_norm: 0.99999961446442, iteration: 45987
loss: 1.217724084854126,grad_norm: 0.9999997084901022, iteration: 45988
loss: 1.0544675588607788,grad_norm: 0.9999991240648893, iteration: 45989
loss: 1.0680238008499146,grad_norm: 0.9999991783181683, iteration: 45990
loss: 1.2931007146835327,grad_norm: 0.9999997926465055, iteration: 45991
loss: 1.2601977586746216,grad_norm: 0.9999994002456917, iteration: 45992
loss: 1.1704235076904297,grad_norm: 0.9999997584847593, iteration: 45993
loss: 1.0839717388153076,grad_norm: 0.9999998746754751, iteration: 45994
loss: 1.1663752794265747,grad_norm: 0.9999995060973014, iteration: 45995
loss: 1.2096515893936157,grad_norm: 0.9999998103399885, iteration: 45996
loss: 1.1395059823989868,grad_norm: 0.9999997771166025, iteration: 45997
loss: 1.1154201030731201,grad_norm: 0.999999205838761, iteration: 45998
loss: 1.079514980316162,grad_norm: 0.9999994310641096, iteration: 45999
loss: 1.042415976524353,grad_norm: 0.9735038402524943, iteration: 46000
loss: 1.1130701303482056,grad_norm: 0.9999994067749498, iteration: 46001
loss: 1.1017779111862183,grad_norm: 0.9999999585107144, iteration: 46002
loss: 1.1922024488449097,grad_norm: 0.9999994549249791, iteration: 46003
loss: 1.0952192544937134,grad_norm: 0.9999992496467126, iteration: 46004
loss: 1.0990267992019653,grad_norm: 0.9999997470815084, iteration: 46005
loss: 1.0447357892990112,grad_norm: 0.9999998348752451, iteration: 46006
loss: 1.0119147300720215,grad_norm: 0.8932955302174834, iteration: 46007
loss: 1.0892051458358765,grad_norm: 0.9999995780738388, iteration: 46008
loss: 0.9991291761398315,grad_norm: 0.9999990255165406, iteration: 46009
loss: 1.110020637512207,grad_norm: 0.9999999536870434, iteration: 46010
loss: 1.04035484790802,grad_norm: 0.999999239176895, iteration: 46011
loss: 1.019851565361023,grad_norm: 0.9999991093399334, iteration: 46012
loss: 1.0471088886260986,grad_norm: 0.9127861783807658, iteration: 46013
loss: 1.0814112424850464,grad_norm: 0.9999995012545352, iteration: 46014
loss: 1.040406584739685,grad_norm: 0.99999998671123, iteration: 46015
loss: 1.0388867855072021,grad_norm: 0.999999262903351, iteration: 46016
loss: 1.188941240310669,grad_norm: 0.9999998337498512, iteration: 46017
loss: 1.152689814567566,grad_norm: 1.0000000285557602, iteration: 46018
loss: 1.0366157293319702,grad_norm: 0.9999995227023076, iteration: 46019
loss: 1.061452865600586,grad_norm: 0.9999995137197552, iteration: 46020
loss: 1.1399877071380615,grad_norm: 0.999999075664664, iteration: 46021
loss: 1.274880290031433,grad_norm: 0.9999997846980837, iteration: 46022
loss: 0.9608853459358215,grad_norm: 0.9999990138190555, iteration: 46023
loss: 1.0403590202331543,grad_norm: 0.9121082048087864, iteration: 46024
loss: 1.1157300472259521,grad_norm: 0.9999998855050836, iteration: 46025
loss: 1.0834592580795288,grad_norm: 0.9999993736713157, iteration: 46026
loss: 1.0173983573913574,grad_norm: 0.8394664971426673, iteration: 46027
loss: 1.2197744846343994,grad_norm: 0.9999993854977135, iteration: 46028
loss: 1.3023310899734497,grad_norm: 0.9999993759973569, iteration: 46029
loss: 1.1008403301239014,grad_norm: 0.9999996007490882, iteration: 46030
loss: 1.0802841186523438,grad_norm: 0.9999997747664863, iteration: 46031
loss: 1.2650033235549927,grad_norm: 0.9999999142507299, iteration: 46032
loss: 1.0549516677856445,grad_norm: 0.9999994239928379, iteration: 46033
loss: 1.0325285196304321,grad_norm: 0.9999993897412812, iteration: 46034
loss: 1.100759744644165,grad_norm: 0.9999990784055356, iteration: 46035
loss: 1.1124601364135742,grad_norm: 0.9999997420260793, iteration: 46036
loss: 1.1558994054794312,grad_norm: 0.9999997739073159, iteration: 46037
loss: 1.0752909183502197,grad_norm: 0.9999993339021299, iteration: 46038
loss: 1.082484483718872,grad_norm: 0.9999994597404925, iteration: 46039
loss: 1.0796817541122437,grad_norm: 0.9999998654233149, iteration: 46040
loss: 1.0538519620895386,grad_norm: 0.844784825897083, iteration: 46041
loss: 1.0839368104934692,grad_norm: 0.9999996229667873, iteration: 46042
loss: 1.0822275876998901,grad_norm: 0.9999994176728626, iteration: 46043
loss: 1.0158671140670776,grad_norm: 0.9999996020539271, iteration: 46044
loss: 1.050951361656189,grad_norm: 0.9999998387977331, iteration: 46045
loss: 1.0250709056854248,grad_norm: 0.9999992627872776, iteration: 46046
loss: 1.016899824142456,grad_norm: 0.999999124161469, iteration: 46047
loss: 1.040860891342163,grad_norm: 0.9999990026266635, iteration: 46048
loss: 1.0068621635437012,grad_norm: 0.8610907747752721, iteration: 46049
loss: 1.0089521408081055,grad_norm: 0.9999990765153344, iteration: 46050
loss: 1.0003035068511963,grad_norm: 0.999999110000223, iteration: 46051
loss: 1.122910737991333,grad_norm: 0.9999994391504773, iteration: 46052
loss: 1.0458614826202393,grad_norm: 0.9999999011804237, iteration: 46053
loss: 1.0449244976043701,grad_norm: 0.9999991642057027, iteration: 46054
loss: 1.0424124002456665,grad_norm: 0.9999992884329458, iteration: 46055
loss: 1.0113089084625244,grad_norm: 0.9999993271019955, iteration: 46056
loss: 1.0702077150344849,grad_norm: 0.9999994807077207, iteration: 46057
loss: 1.0765553712844849,grad_norm: 0.9999995712147962, iteration: 46058
loss: 1.0611777305603027,grad_norm: 0.999999430617162, iteration: 46059
loss: 1.0267163515090942,grad_norm: 0.8100992383268544, iteration: 46060
loss: 1.0971553325653076,grad_norm: 0.9999992048041977, iteration: 46061
loss: 1.0835589170455933,grad_norm: 0.999999854334313, iteration: 46062
loss: 0.9941799640655518,grad_norm: 0.7934743491004494, iteration: 46063
loss: 1.0105069875717163,grad_norm: 0.9580462320519171, iteration: 46064
loss: 1.0298173427581787,grad_norm: 0.9999994469001977, iteration: 46065
loss: 1.020402193069458,grad_norm: 0.9999991278101962, iteration: 46066
loss: 0.9976530075073242,grad_norm: 0.9999999733528135, iteration: 46067
loss: 0.9997316598892212,grad_norm: 0.8630948959763356, iteration: 46068
loss: 0.9753233790397644,grad_norm: 0.9260792151708717, iteration: 46069
loss: 1.0166122913360596,grad_norm: 0.9999994266154639, iteration: 46070
loss: 1.0390920639038086,grad_norm: 0.9999992605932371, iteration: 46071
loss: 0.9596220850944519,grad_norm: 0.9999991624853971, iteration: 46072
loss: 1.2385692596435547,grad_norm: 0.999999861966822, iteration: 46073
loss: 0.9853082895278931,grad_norm: 0.908600929574695, iteration: 46074
loss: 1.055552363395691,grad_norm: 0.9999999655117422, iteration: 46075
loss: 1.0002528429031372,grad_norm: 0.8683540546526431, iteration: 46076
loss: 1.0266170501708984,grad_norm: 0.9999991612899718, iteration: 46077
loss: 0.9572120308876038,grad_norm: 0.9999991438621809, iteration: 46078
loss: 1.081998348236084,grad_norm: 0.9999996551790915, iteration: 46079
loss: 1.0828561782836914,grad_norm: 0.9999998419434264, iteration: 46080
loss: 1.0090553760528564,grad_norm: 0.9999991312159131, iteration: 46081
loss: 1.017320156097412,grad_norm: 0.8898425360994687, iteration: 46082
loss: 1.1036547422409058,grad_norm: 0.9999996699601694, iteration: 46083
loss: 1.103338360786438,grad_norm: 0.9999998345662032, iteration: 46084
loss: 0.9942320585250854,grad_norm: 0.8339026521888504, iteration: 46085
loss: 1.0282422304153442,grad_norm: 0.9999990596833707, iteration: 46086
loss: 1.0681096315383911,grad_norm: 0.9999994742113097, iteration: 46087
loss: 1.033621907234192,grad_norm: 0.9999994572086136, iteration: 46088
loss: 0.9861142635345459,grad_norm: 0.9402756013422559, iteration: 46089
loss: 1.1538002490997314,grad_norm: 0.9999999096137058, iteration: 46090
loss: 1.0136469602584839,grad_norm: 0.9999991968574437, iteration: 46091
loss: 1.1146329641342163,grad_norm: 0.999999688102279, iteration: 46092
loss: 1.0287097692489624,grad_norm: 0.9046081369778555, iteration: 46093
loss: 1.0250506401062012,grad_norm: 0.9999991237061985, iteration: 46094
loss: 0.9899636507034302,grad_norm: 0.9999993801871137, iteration: 46095
loss: 1.021134853363037,grad_norm: 0.9999995283625914, iteration: 46096
loss: 1.0161024332046509,grad_norm: 0.9999998720200008, iteration: 46097
loss: 0.987635612487793,grad_norm: 0.9219656247747055, iteration: 46098
loss: 1.008349061012268,grad_norm: 0.9011710236500093, iteration: 46099
loss: 1.0144412517547607,grad_norm: 0.9999995231016352, iteration: 46100
loss: 0.9719653129577637,grad_norm: 0.8428163975798753, iteration: 46101
loss: 1.1470634937286377,grad_norm: 0.9999994414409885, iteration: 46102
loss: 0.9916544556617737,grad_norm: 0.9999992096349404, iteration: 46103
loss: 1.0054396390914917,grad_norm: 0.7763877572438933, iteration: 46104
loss: 1.0235109329223633,grad_norm: 0.9999997372339787, iteration: 46105
loss: 0.994591474533081,grad_norm: 0.9179863490102974, iteration: 46106
loss: 0.9755029678344727,grad_norm: 0.8471707009435105, iteration: 46107
loss: 1.010802149772644,grad_norm: 0.9999996496030479, iteration: 46108
loss: 1.0180493593215942,grad_norm: 0.9999992512176212, iteration: 46109
loss: 1.0530672073364258,grad_norm: 0.9999993086396252, iteration: 46110
loss: 0.9967195987701416,grad_norm: 0.9854042141063881, iteration: 46111
loss: 1.079369068145752,grad_norm: 0.9999996019861436, iteration: 46112
loss: 0.9694979190826416,grad_norm: 0.9458291006727192, iteration: 46113
loss: 1.0190492868423462,grad_norm: 0.9999990952153199, iteration: 46114
loss: 1.1429704427719116,grad_norm: 0.9999998834722208, iteration: 46115
loss: 0.9668359160423279,grad_norm: 0.9999990234516938, iteration: 46116
loss: 0.9800957441329956,grad_norm: 0.9999991658576523, iteration: 46117
loss: 1.0498487949371338,grad_norm: 0.9999991746467454, iteration: 46118
loss: 1.036461591720581,grad_norm: 0.8942844634416353, iteration: 46119
loss: 1.014572024345398,grad_norm: 0.8868095755988034, iteration: 46120
loss: 1.017246127128601,grad_norm: 0.9999991000059211, iteration: 46121
loss: 1.0073784589767456,grad_norm: 0.999999135680204, iteration: 46122
loss: 0.9959335923194885,grad_norm: 0.999999148748379, iteration: 46123
loss: 1.0136562585830688,grad_norm: 0.7842107771272029, iteration: 46124
loss: 0.9716131091117859,grad_norm: 0.9996308211852654, iteration: 46125
loss: 0.9978476762771606,grad_norm: 0.9999992433050161, iteration: 46126
loss: 1.0174404382705688,grad_norm: 0.9977009599305718, iteration: 46127
loss: 1.0555049180984497,grad_norm: 0.9999991019163972, iteration: 46128
loss: 0.969464898109436,grad_norm: 0.9999990452909094, iteration: 46129
loss: 1.0019816160202026,grad_norm: 0.9999989607191874, iteration: 46130
loss: 1.0090364217758179,grad_norm: 0.9999994154123607, iteration: 46131
loss: 0.9955605268478394,grad_norm: 0.9999994220729228, iteration: 46132
loss: 1.0368469953536987,grad_norm: 0.9999995796676243, iteration: 46133
loss: 1.0411914587020874,grad_norm: 0.9374053937919292, iteration: 46134
loss: 1.0178406238555908,grad_norm: 0.9114911560135758, iteration: 46135
loss: 1.0429860353469849,grad_norm: 0.9999994385005603, iteration: 46136
loss: 1.034008502960205,grad_norm: 0.9999995245887523, iteration: 46137
loss: 1.0378402471542358,grad_norm: 0.8970261418822558, iteration: 46138
loss: 0.9641152024269104,grad_norm: 0.9999989804789063, iteration: 46139
loss: 0.9963517189025879,grad_norm: 0.8915379969176199, iteration: 46140
loss: 1.0115777254104614,grad_norm: 0.8181801185939427, iteration: 46141
loss: 1.0175447463989258,grad_norm: 0.9999992512015894, iteration: 46142
loss: 1.0288243293762207,grad_norm: 0.9999990563920065, iteration: 46143
loss: 0.9891124963760376,grad_norm: 0.9040216598008369, iteration: 46144
loss: 1.020573616027832,grad_norm: 0.9999996684104232, iteration: 46145
loss: 1.0029830932617188,grad_norm: 0.9999992138965136, iteration: 46146
loss: 1.0912402868270874,grad_norm: 0.9999993391279, iteration: 46147
loss: 1.0309722423553467,grad_norm: 0.9052428723524812, iteration: 46148
loss: 0.9918462038040161,grad_norm: 0.8143545142856934, iteration: 46149
loss: 0.9623677730560303,grad_norm: 0.9631863974265573, iteration: 46150
loss: 1.0397247076034546,grad_norm: 0.9999990783171735, iteration: 46151
loss: 0.9771954417228699,grad_norm: 0.9999991551582058, iteration: 46152
loss: 0.9824029803276062,grad_norm: 0.9203861680534984, iteration: 46153
loss: 0.9718531370162964,grad_norm: 0.9686743901034195, iteration: 46154
loss: 1.0158225297927856,grad_norm: 0.9213754360110576, iteration: 46155
loss: 0.9887896180152893,grad_norm: 0.9475752157835726, iteration: 46156
loss: 1.0251052379608154,grad_norm: 0.9688338259815394, iteration: 46157
loss: 0.9982874989509583,grad_norm: 0.9685056799380026, iteration: 46158
loss: 0.9996082782745361,grad_norm: 0.9999997039962814, iteration: 46159
loss: 1.0453236103057861,grad_norm: 0.9840343121066489, iteration: 46160
loss: 1.0104888677597046,grad_norm: 0.989684486876167, iteration: 46161
loss: 1.0087778568267822,grad_norm: 0.9453895675851047, iteration: 46162
loss: 0.9850059151649475,grad_norm: 0.9999994690136872, iteration: 46163
loss: 1.0230821371078491,grad_norm: 0.8886592175168768, iteration: 46164
loss: 1.0115214586257935,grad_norm: 0.999999301883557, iteration: 46165
loss: 1.004065990447998,grad_norm: 0.8425011339553867, iteration: 46166
loss: 1.027248501777649,grad_norm: 0.8687819697941843, iteration: 46167
loss: 1.0224635601043701,grad_norm: 0.9422226259897012, iteration: 46168
loss: 1.0078210830688477,grad_norm: 0.8294852144269225, iteration: 46169
loss: 1.0505479574203491,grad_norm: 0.9999988631309743, iteration: 46170
loss: 1.0131430625915527,grad_norm: 0.9032211681629013, iteration: 46171
loss: 0.9907453060150146,grad_norm: 0.9999993628984265, iteration: 46172
loss: 1.0024291276931763,grad_norm: 0.9999991586364511, iteration: 46173
loss: 0.9998018741607666,grad_norm: 0.9267140701532163, iteration: 46174
loss: 0.9768775105476379,grad_norm: 0.8813432179476465, iteration: 46175
loss: 0.9602383375167847,grad_norm: 0.9995163524213226, iteration: 46176
loss: 1.1298282146453857,grad_norm: 0.999999856076133, iteration: 46177
loss: 0.9818158745765686,grad_norm: 0.7709437788367043, iteration: 46178
loss: 0.9980000853538513,grad_norm: 0.9999989832943251, iteration: 46179
loss: 1.0308208465576172,grad_norm: 0.8401749979730293, iteration: 46180
loss: 1.0221667289733887,grad_norm: 0.7535686552284482, iteration: 46181
loss: 1.0045055150985718,grad_norm: 0.99999952815664, iteration: 46182
loss: 0.990790843963623,grad_norm: 0.8628145008515193, iteration: 46183
loss: 0.9898065328598022,grad_norm: 0.9999991879687912, iteration: 46184
loss: 0.976757824420929,grad_norm: 0.8405654951156972, iteration: 46185
loss: 0.9624242782592773,grad_norm: 0.9999993786679429, iteration: 46186
loss: 1.0091842412948608,grad_norm: 0.9368440661991004, iteration: 46187
loss: 0.9911701679229736,grad_norm: 0.9159128046040242, iteration: 46188
loss: 0.9976503849029541,grad_norm: 0.9999993884471089, iteration: 46189
loss: 0.9943180680274963,grad_norm: 0.8644954632746246, iteration: 46190
loss: 0.9963901042938232,grad_norm: 0.9238285122647418, iteration: 46191
loss: 0.9853457808494568,grad_norm: 0.9999989664143911, iteration: 46192
loss: 0.9766566157341003,grad_norm: 0.9999991450316402, iteration: 46193
loss: 1.1902214288711548,grad_norm: 0.9999998669293373, iteration: 46194
loss: 1.033572793006897,grad_norm: 0.9999991831171476, iteration: 46195
loss: 1.0106163024902344,grad_norm: 0.9631101772393778, iteration: 46196
loss: 1.052924394607544,grad_norm: 0.9414577385536075, iteration: 46197
loss: 1.1265685558319092,grad_norm: 0.9999992885616664, iteration: 46198
loss: 1.0586851835250854,grad_norm: 0.9999997706545879, iteration: 46199
loss: 1.157439112663269,grad_norm: 0.99999980565785, iteration: 46200
loss: 1.0345816612243652,grad_norm: 0.9902644870920879, iteration: 46201
loss: 1.0927447080612183,grad_norm: 0.9999996169267558, iteration: 46202
loss: 1.0950380563735962,grad_norm: 0.9999994401945602, iteration: 46203
loss: 1.1291656494140625,grad_norm: 0.9999996164812882, iteration: 46204
loss: 0.9818006753921509,grad_norm: 0.9999993281486576, iteration: 46205
loss: 1.0006757974624634,grad_norm: 0.9999992343748199, iteration: 46206
loss: 1.124571442604065,grad_norm: 0.9894558576698357, iteration: 46207
loss: 1.0287870168685913,grad_norm: 0.9999991204900441, iteration: 46208
loss: 1.017871618270874,grad_norm: 0.9999994835088957, iteration: 46209
loss: 1.0366219282150269,grad_norm: 0.9978583257351464, iteration: 46210
loss: 1.0314441919326782,grad_norm: 0.8965444959023168, iteration: 46211
loss: 1.005863904953003,grad_norm: 0.9999990746095605, iteration: 46212
loss: 1.0286303758621216,grad_norm: 0.9999993575572059, iteration: 46213
loss: 0.9916837215423584,grad_norm: 0.9370027491324115, iteration: 46214
loss: 1.0116798877716064,grad_norm: 0.9999989809840005, iteration: 46215
loss: 1.0507904291152954,grad_norm: 0.9999990779569035, iteration: 46216
loss: 1.0068937540054321,grad_norm: 0.8502434363305041, iteration: 46217
loss: 1.0094038248062134,grad_norm: 0.9999992937456194, iteration: 46218
loss: 1.110474944114685,grad_norm: 0.9999994272262014, iteration: 46219
loss: 1.0234142541885376,grad_norm: 0.9999990722574224, iteration: 46220
loss: 0.9958785772323608,grad_norm: 0.9468543351744686, iteration: 46221
loss: 1.1199924945831299,grad_norm: 0.9999991467653087, iteration: 46222
loss: 0.9961285591125488,grad_norm: 0.8057071016684634, iteration: 46223
loss: 1.0354267358779907,grad_norm: 0.9770431262001572, iteration: 46224
loss: 1.062056541442871,grad_norm: 0.9860076504213632, iteration: 46225
loss: 1.0360794067382812,grad_norm: 0.8811606061660203, iteration: 46226
loss: 1.0360679626464844,grad_norm: 0.9999991237364291, iteration: 46227
loss: 1.0149070024490356,grad_norm: 0.9731924294780763, iteration: 46228
loss: 0.9881068468093872,grad_norm: 0.8805498571934971, iteration: 46229
loss: 1.0176219940185547,grad_norm: 0.9138107441562341, iteration: 46230
loss: 0.9906572103500366,grad_norm: 0.9999991794136699, iteration: 46231
loss: 1.0163524150848389,grad_norm: 0.8454644640037282, iteration: 46232
loss: 1.0224218368530273,grad_norm: 0.9459513134651784, iteration: 46233
loss: 1.0205936431884766,grad_norm: 0.9036275512973436, iteration: 46234
loss: 1.0274609327316284,grad_norm: 0.8935489949032432, iteration: 46235
loss: 1.022521734237671,grad_norm: 0.8791453655536975, iteration: 46236
loss: 1.0044870376586914,grad_norm: 0.9999990387474329, iteration: 46237
loss: 0.9635640382766724,grad_norm: 0.9999990889209585, iteration: 46238
loss: 1.0381194353103638,grad_norm: 0.999999451042201, iteration: 46239
loss: 0.9997007846832275,grad_norm: 0.910720873273808, iteration: 46240
loss: 0.9880819916725159,grad_norm: 0.90810388164346, iteration: 46241
loss: 1.0032151937484741,grad_norm: 0.9848490415607143, iteration: 46242
loss: 0.996041476726532,grad_norm: 0.9597769264155477, iteration: 46243
loss: 0.9812774062156677,grad_norm: 0.9321838417613169, iteration: 46244
loss: 1.003983497619629,grad_norm: 0.920866477243429, iteration: 46245
loss: 0.9937352538108826,grad_norm: 0.9999993910133022, iteration: 46246
loss: 1.0083609819412231,grad_norm: 0.9999991546852132, iteration: 46247
loss: 1.0253331661224365,grad_norm: 0.9999993390798029, iteration: 46248
loss: 0.9978352189064026,grad_norm: 0.9695330142616677, iteration: 46249
loss: 1.0643442869186401,grad_norm: 0.9999990592155215, iteration: 46250
loss: 1.0122004747390747,grad_norm: 0.8745681085040719, iteration: 46251
loss: 1.026029109954834,grad_norm: 0.9999991123711526, iteration: 46252
loss: 1.0230222940444946,grad_norm: 0.9999990611576498, iteration: 46253
loss: 1.007110595703125,grad_norm: 0.9999992843880406, iteration: 46254
loss: 1.0135596990585327,grad_norm: 0.9999993057753025, iteration: 46255
loss: 1.029061198234558,grad_norm: 0.8508639993386153, iteration: 46256
loss: 0.9985411763191223,grad_norm: 0.9999992383474794, iteration: 46257
loss: 1.0174219608306885,grad_norm: 0.9999990484413017, iteration: 46258
loss: 0.9891027212142944,grad_norm: 0.9655767730615865, iteration: 46259
loss: 1.0211880207061768,grad_norm: 0.7528016328046524, iteration: 46260
loss: 1.020865797996521,grad_norm: 0.9999993167213599, iteration: 46261
loss: 1.0102286338806152,grad_norm: 0.7901602138409914, iteration: 46262
loss: 1.020452618598938,grad_norm: 0.9999990384164376, iteration: 46263
loss: 1.0340220928192139,grad_norm: 0.8681198938040752, iteration: 46264
loss: 1.0068955421447754,grad_norm: 0.9999991462939928, iteration: 46265
loss: 0.9970353841781616,grad_norm: 0.9999989940204557, iteration: 46266
loss: 0.9887240529060364,grad_norm: 0.876908836947751, iteration: 46267
loss: 0.9853742718696594,grad_norm: 0.9299476611796901, iteration: 46268
loss: 0.9777580499649048,grad_norm: 0.7405894591249889, iteration: 46269
loss: 0.9894241690635681,grad_norm: 0.882680950992188, iteration: 46270
loss: 1.0364947319030762,grad_norm: 0.9999992186452039, iteration: 46271
loss: 1.041051983833313,grad_norm: 0.9999990686254308, iteration: 46272
loss: 0.9951845407485962,grad_norm: 0.9112861122392757, iteration: 46273
loss: 0.9797346591949463,grad_norm: 0.9082111553892959, iteration: 46274
loss: 1.0169737339019775,grad_norm: 0.9213497526467178, iteration: 46275
loss: 0.9795891046524048,grad_norm: 0.8067324139275083, iteration: 46276
loss: 0.9785373210906982,grad_norm: 0.974107956236995, iteration: 46277
loss: 1.0333645343780518,grad_norm: 0.9534303306239, iteration: 46278
loss: 1.0320022106170654,grad_norm: 0.8797492535014607, iteration: 46279
loss: 1.0113447904586792,grad_norm: 0.9999994137490436, iteration: 46280
loss: 0.9834973216056824,grad_norm: 0.9273554304763082, iteration: 46281
loss: 0.9923145174980164,grad_norm: 0.9196288334113999, iteration: 46282
loss: 1.003968596458435,grad_norm: 0.7964034017813744, iteration: 46283
loss: 1.0124878883361816,grad_norm: 0.9309981653219365, iteration: 46284
loss: 1.0016652345657349,grad_norm: 0.8784004111456667, iteration: 46285
loss: 1.0031095743179321,grad_norm: 0.849301539451415, iteration: 46286
loss: 0.9949991703033447,grad_norm: 0.9999990959402943, iteration: 46287
loss: 1.0182570219039917,grad_norm: 0.9865477996041875, iteration: 46288
loss: 1.1146808862686157,grad_norm: 0.999999860438373, iteration: 46289
loss: 1.0086886882781982,grad_norm: 0.9999989951081146, iteration: 46290
loss: 1.0051804780960083,grad_norm: 0.9186278546345845, iteration: 46291
loss: 1.0040249824523926,grad_norm: 0.9999990753482002, iteration: 46292
loss: 0.9822791218757629,grad_norm: 0.9392164031426699, iteration: 46293
loss: 1.0160828828811646,grad_norm: 0.9166487622116463, iteration: 46294
loss: 1.0566478967666626,grad_norm: 0.9999998936938238, iteration: 46295
loss: 0.9706963300704956,grad_norm: 0.9999989446025883, iteration: 46296
loss: 0.9925521612167358,grad_norm: 0.99999908742689, iteration: 46297
loss: 0.974984884262085,grad_norm: 0.9999991776512297, iteration: 46298
loss: 1.0427693128585815,grad_norm: 0.9999996025811753, iteration: 46299
loss: 0.9959961771965027,grad_norm: 0.9999992747290827, iteration: 46300
loss: 0.9670130610466003,grad_norm: 0.9999993571139763, iteration: 46301
loss: 1.0130075216293335,grad_norm: 0.9999991648217857, iteration: 46302
loss: 0.9936005473136902,grad_norm: 0.9999996239954958, iteration: 46303
loss: 1.020567536354065,grad_norm: 0.9000272886624117, iteration: 46304
loss: 0.9870168566703796,grad_norm: 0.9999992302777182, iteration: 46305
loss: 1.0193264484405518,grad_norm: 0.8215273389696888, iteration: 46306
loss: 1.0839685201644897,grad_norm: 0.9999990907122684, iteration: 46307
loss: 1.0133016109466553,grad_norm: 0.8624514279668587, iteration: 46308
loss: 0.965072512626648,grad_norm: 0.949158272062622, iteration: 46309
loss: 1.0004310607910156,grad_norm: 0.9999990287899732, iteration: 46310
loss: 1.0109496116638184,grad_norm: 0.970023309139862, iteration: 46311
loss: 0.9745789766311646,grad_norm: 0.9926301117823361, iteration: 46312
loss: 0.9915691018104553,grad_norm: 0.8997006778704608, iteration: 46313
loss: 1.0179189443588257,grad_norm: 0.9009660987260395, iteration: 46314
loss: 0.9886012673377991,grad_norm: 0.9999995830082204, iteration: 46315
loss: 1.008766770362854,grad_norm: 0.9999996239887948, iteration: 46316
loss: 0.9892666339874268,grad_norm: 0.9568154888100883, iteration: 46317
loss: 1.2093461751937866,grad_norm: 0.9999996938253637, iteration: 46318
loss: 1.0256366729736328,grad_norm: 0.9999989198708441, iteration: 46319
loss: 1.0171756744384766,grad_norm: 0.9999992023844778, iteration: 46320
loss: 1.045041799545288,grad_norm: 0.848273202878287, iteration: 46321
loss: 1.091162919998169,grad_norm: 0.999999527362483, iteration: 46322
loss: 0.983572244644165,grad_norm: 0.9999996647824763, iteration: 46323
loss: 1.0287351608276367,grad_norm: 0.953919504718541, iteration: 46324
loss: 1.047932744026184,grad_norm: 0.8735006529307592, iteration: 46325
loss: 1.018326997756958,grad_norm: 0.906667824127458, iteration: 46326
loss: 0.9750502705574036,grad_norm: 0.9778403375918163, iteration: 46327
loss: 1.0069169998168945,grad_norm: 0.9526954610345456, iteration: 46328
loss: 0.9562750458717346,grad_norm: 0.9999992334902605, iteration: 46329
loss: 1.0403294563293457,grad_norm: 0.9999995550520466, iteration: 46330
loss: 1.0146291255950928,grad_norm: 0.8316834834163321, iteration: 46331
loss: 0.9833768010139465,grad_norm: 0.7507514528762056, iteration: 46332
loss: 1.0054630041122437,grad_norm: 0.9999991783324229, iteration: 46333
loss: 1.0363858938217163,grad_norm: 0.9999991057982874, iteration: 46334
loss: 1.0453126430511475,grad_norm: 0.9692985096693107, iteration: 46335
loss: 1.0124835968017578,grad_norm: 0.9999991936550127, iteration: 46336
loss: 1.1044347286224365,grad_norm: 0.9999995909496662, iteration: 46337
loss: 1.044467806816101,grad_norm: 0.9009033339213767, iteration: 46338
loss: 0.9936678409576416,grad_norm: 0.996296844992271, iteration: 46339
loss: 1.0002083778381348,grad_norm: 0.8974611814309482, iteration: 46340
loss: 1.0569052696228027,grad_norm: 0.9999996592645553, iteration: 46341
loss: 0.9861904382705688,grad_norm: 0.8528019697580881, iteration: 46342
loss: 1.0133094787597656,grad_norm: 0.7421276674653486, iteration: 46343
loss: 1.0084930658340454,grad_norm: 0.9846424529573563, iteration: 46344
loss: 0.9734869599342346,grad_norm: 0.9303006717922131, iteration: 46345
loss: 1.0236530303955078,grad_norm: 0.8263310554157496, iteration: 46346
loss: 0.9951021075248718,grad_norm: 0.9999990453960219, iteration: 46347
loss: 1.0220894813537598,grad_norm: 0.9264771889306209, iteration: 46348
loss: 1.0411651134490967,grad_norm: 0.9983223794947377, iteration: 46349
loss: 1.0024079084396362,grad_norm: 0.972969923696185, iteration: 46350
loss: 0.9788217544555664,grad_norm: 0.9999992151900943, iteration: 46351
loss: 1.132827877998352,grad_norm: 0.9999992278761731, iteration: 46352
loss: 1.0032737255096436,grad_norm: 0.9545527846889498, iteration: 46353
loss: 1.0005229711532593,grad_norm: 0.9999989854394999, iteration: 46354
loss: 0.9805229902267456,grad_norm: 0.9999990335635766, iteration: 46355
loss: 1.042245864868164,grad_norm: 0.9309902059622934, iteration: 46356
loss: 1.0282313823699951,grad_norm: 0.9999995914832963, iteration: 46357
loss: 1.0079249143600464,grad_norm: 0.9999991217292789, iteration: 46358
loss: 1.044211745262146,grad_norm: 0.9999994546882506, iteration: 46359
loss: 0.9605638384819031,grad_norm: 0.999999068297649, iteration: 46360
loss: 1.000402569770813,grad_norm: 0.9999991139613116, iteration: 46361
loss: 1.0895798206329346,grad_norm: 0.9999994468462406, iteration: 46362
loss: 1.053708791732788,grad_norm: 0.9999992863065007, iteration: 46363
loss: 0.9627143740653992,grad_norm: 0.9650536689103999, iteration: 46364
loss: 1.0339571237564087,grad_norm: 0.9653969152469791, iteration: 46365
loss: 1.0430855751037598,grad_norm: 0.9999991031708471, iteration: 46366
loss: 1.0212701559066772,grad_norm: 0.9271084031109241, iteration: 46367
loss: 1.0000349283218384,grad_norm: 0.947203738045885, iteration: 46368
loss: 1.033653974533081,grad_norm: 0.9999990441271318, iteration: 46369
loss: 0.9854166507720947,grad_norm: 0.7916477584040598, iteration: 46370
loss: 1.0014232397079468,grad_norm: 0.9446337019419824, iteration: 46371
loss: 0.9960812330245972,grad_norm: 0.9158340489944765, iteration: 46372
loss: 1.0062965154647827,grad_norm: 0.9999990229891651, iteration: 46373
loss: 0.9752393960952759,grad_norm: 0.9999991346118547, iteration: 46374
loss: 0.9709077477455139,grad_norm: 0.999999094485253, iteration: 46375
loss: 1.048707365989685,grad_norm: 0.7736795721661186, iteration: 46376
loss: 0.9766716957092285,grad_norm: 0.9773791436411688, iteration: 46377
loss: 0.9600040912628174,grad_norm: 0.999998985462522, iteration: 46378
loss: 1.0358046293258667,grad_norm: 0.9999998110809569, iteration: 46379
loss: 1.0288782119750977,grad_norm: 0.9999993171611743, iteration: 46380
loss: 1.0121275186538696,grad_norm: 0.8561023780457362, iteration: 46381
loss: 1.0062114000320435,grad_norm: 0.9999990772118112, iteration: 46382
loss: 1.0156060457229614,grad_norm: 0.9158492668512912, iteration: 46383
loss: 1.008453369140625,grad_norm: 0.8865294064728619, iteration: 46384
loss: 1.0040383338928223,grad_norm: 0.9640755543993978, iteration: 46385
loss: 1.0187727212905884,grad_norm: 0.9999991227389847, iteration: 46386
loss: 1.0294675827026367,grad_norm: 0.7643735848176344, iteration: 46387
loss: 1.1102228164672852,grad_norm: 0.9999992813683594, iteration: 46388
loss: 1.0146174430847168,grad_norm: 0.9999993502676354, iteration: 46389
loss: 1.0040384531021118,grad_norm: 0.999999757001517, iteration: 46390
loss: 0.9759847521781921,grad_norm: 0.8353484314379119, iteration: 46391
loss: 0.9672079086303711,grad_norm: 0.888195209688098, iteration: 46392
loss: 1.0251559019088745,grad_norm: 0.999999750264577, iteration: 46393
loss: 0.9895697236061096,grad_norm: 0.999999147626566, iteration: 46394
loss: 0.9775844216346741,grad_norm: 0.9583022800642854, iteration: 46395
loss: 0.9856852293014526,grad_norm: 0.999999028176624, iteration: 46396
loss: 1.0197041034698486,grad_norm: 0.9999998370539787, iteration: 46397
loss: 1.0399434566497803,grad_norm: 0.9999990322196649, iteration: 46398
loss: 1.0308643579483032,grad_norm: 0.8659824637908303, iteration: 46399
loss: 1.008906602859497,grad_norm: 0.9999990782603505, iteration: 46400
loss: 0.9969848990440369,grad_norm: 0.9648539789994572, iteration: 46401
loss: 1.0479744672775269,grad_norm: 0.9999992621885049, iteration: 46402
loss: 0.9833229780197144,grad_norm: 0.8978573366622093, iteration: 46403
loss: 0.9499932527542114,grad_norm: 0.9586636261868581, iteration: 46404
loss: 0.9835945963859558,grad_norm: 0.9159393620300655, iteration: 46405
loss: 0.9812917113304138,grad_norm: 0.972532317679222, iteration: 46406
loss: 1.0157157182693481,grad_norm: 0.7987534477127957, iteration: 46407
loss: 0.9918868541717529,grad_norm: 0.9999992142673034, iteration: 46408
loss: 1.0026146173477173,grad_norm: 0.9176939161473086, iteration: 46409
loss: 0.9992621541023254,grad_norm: 0.9999992402186881, iteration: 46410
loss: 0.964510977268219,grad_norm: 0.9469326007163659, iteration: 46411
loss: 0.9551697969436646,grad_norm: 0.9999994696373893, iteration: 46412
loss: 1.0703933238983154,grad_norm: 0.999999496414819, iteration: 46413
loss: 0.9877424836158752,grad_norm: 0.7796624243309377, iteration: 46414
loss: 0.9833545088768005,grad_norm: 0.8921468781415909, iteration: 46415
loss: 1.0216434001922607,grad_norm: 0.9999998408371066, iteration: 46416
loss: 1.0410689115524292,grad_norm: 0.9999991182890506, iteration: 46417
loss: 1.0122921466827393,grad_norm: 0.9999996767519872, iteration: 46418
loss: 1.0224796533584595,grad_norm: 0.876505605813593, iteration: 46419
loss: 1.0199187994003296,grad_norm: 0.8531229498958576, iteration: 46420
loss: 0.9793283939361572,grad_norm: 0.8023709548114369, iteration: 46421
loss: 0.9678539633750916,grad_norm: 0.9833750939603164, iteration: 46422
loss: 0.9951137900352478,grad_norm: 0.9999994151697167, iteration: 46423
loss: 1.0385907888412476,grad_norm: 0.9999990660484471, iteration: 46424
loss: 0.9863299131393433,grad_norm: 0.9999996483807434, iteration: 46425
loss: 1.0222129821777344,grad_norm: 0.9999996321154698, iteration: 46426
loss: 1.0300788879394531,grad_norm: 0.9999990938869426, iteration: 46427
loss: 0.9893816113471985,grad_norm: 0.9999992527321633, iteration: 46428
loss: 1.0003595352172852,grad_norm: 0.8490868471287484, iteration: 46429
loss: 0.9873670935630798,grad_norm: 0.9269727562147294, iteration: 46430
loss: 0.9575772285461426,grad_norm: 0.9150032670515266, iteration: 46431
loss: 1.0385984182357788,grad_norm: 0.9999991459712572, iteration: 46432
loss: 1.0379493236541748,grad_norm: 0.9999995774511256, iteration: 46433
loss: 1.0312755107879639,grad_norm: 0.7471036326152026, iteration: 46434
loss: 0.9743841290473938,grad_norm: 0.9081463249986815, iteration: 46435
loss: 0.9890851974487305,grad_norm: 0.9999991228930541, iteration: 46436
loss: 0.9742069244384766,grad_norm: 0.9789482701055907, iteration: 46437
loss: 1.0217746496200562,grad_norm: 0.9999994733422358, iteration: 46438
loss: 1.0193887948989868,grad_norm: 0.9999993132644257, iteration: 46439
loss: 1.0244652032852173,grad_norm: 0.9999992076385907, iteration: 46440
loss: 1.0213947296142578,grad_norm: 0.9999992663796561, iteration: 46441
loss: 0.982280433177948,grad_norm: 0.9999992510854203, iteration: 46442
loss: 1.0067460536956787,grad_norm: 0.816207611548255, iteration: 46443
loss: 0.9698931574821472,grad_norm: 0.9999990473434757, iteration: 46444
loss: 0.9544869661331177,grad_norm: 0.9999990233246148, iteration: 46445
loss: 0.9945864677429199,grad_norm: 0.9349248976670665, iteration: 46446
loss: 0.9762511849403381,grad_norm: 0.9999989127163108, iteration: 46447
loss: 0.9747549891471863,grad_norm: 0.8787946196406389, iteration: 46448
loss: 0.9889476895332336,grad_norm: 0.7690656291482061, iteration: 46449
loss: 0.9905310273170471,grad_norm: 0.999999118465482, iteration: 46450
loss: 1.09746515750885,grad_norm: 0.9999992616037793, iteration: 46451
loss: 0.9582808017730713,grad_norm: 0.9999993082457873, iteration: 46452
loss: 1.0265603065490723,grad_norm: 0.9053634302647293, iteration: 46453
loss: 0.9762552976608276,grad_norm: 0.9999992662737074, iteration: 46454
loss: 1.0023236274719238,grad_norm: 0.9999991280243691, iteration: 46455
loss: 0.9959958791732788,grad_norm: 0.9101944668023385, iteration: 46456
loss: 0.9977560043334961,grad_norm: 0.9999990071208378, iteration: 46457
loss: 1.0060830116271973,grad_norm: 0.9999992877243207, iteration: 46458
loss: 0.9984301328659058,grad_norm: 0.8982810966984682, iteration: 46459
loss: 1.024919033050537,grad_norm: 0.8465255990592664, iteration: 46460
loss: 1.0022673606872559,grad_norm: 0.9999990408496123, iteration: 46461
loss: 1.0113855600357056,grad_norm: 0.833716611347475, iteration: 46462
loss: 0.9907184839248657,grad_norm: 0.857154363053847, iteration: 46463
loss: 1.0377898216247559,grad_norm: 0.8928843528937965, iteration: 46464
loss: 1.0292986631393433,grad_norm: 0.9999996137847824, iteration: 46465
loss: 1.014257550239563,grad_norm: 0.8318776498634366, iteration: 46466
loss: 1.0257896184921265,grad_norm: 0.9999992645877455, iteration: 46467
loss: 1.014723300933838,grad_norm: 0.9872318243186728, iteration: 46468
loss: 1.004587173461914,grad_norm: 0.8850189634580544, iteration: 46469
loss: 1.0185542106628418,grad_norm: 0.8503447809530055, iteration: 46470
loss: 0.9942733645439148,grad_norm: 0.8649632435668867, iteration: 46471
loss: 1.132312536239624,grad_norm: 0.999999865534024, iteration: 46472
loss: 1.0094435214996338,grad_norm: 0.9999990688200053, iteration: 46473
loss: 1.0303878784179688,grad_norm: 0.9999992499883428, iteration: 46474
loss: 0.978491485118866,grad_norm: 0.8912098312596859, iteration: 46475
loss: 1.01025390625,grad_norm: 0.8794048129431677, iteration: 46476
loss: 1.0198925733566284,grad_norm: 0.9999996642778466, iteration: 46477
loss: 1.0158278942108154,grad_norm: 0.8152168991903617, iteration: 46478
loss: 1.0291790962219238,grad_norm: 0.9999991373448245, iteration: 46479
loss: 1.0349022150039673,grad_norm: 0.9999991595706729, iteration: 46480
loss: 0.9879327416419983,grad_norm: 0.9999992803500205, iteration: 46481
loss: 1.0290477275848389,grad_norm: 0.9999990036615559, iteration: 46482
loss: 1.0324064493179321,grad_norm: 0.9351633445852803, iteration: 46483
loss: 0.9771259427070618,grad_norm: 0.9999993606569982, iteration: 46484
loss: 1.018673300743103,grad_norm: 0.773322435432135, iteration: 46485
loss: 0.9536373019218445,grad_norm: 0.9999990173629487, iteration: 46486
loss: 1.0305525064468384,grad_norm: 0.9999990773252833, iteration: 46487
loss: 1.062854290008545,grad_norm: 0.9999994248071709, iteration: 46488
loss: 1.008407711982727,grad_norm: 0.9999991873833549, iteration: 46489
loss: 1.0129183530807495,grad_norm: 0.7846078473031647, iteration: 46490
loss: 1.013331413269043,grad_norm: 0.9909558806509796, iteration: 46491
loss: 0.9872720241546631,grad_norm: 0.9507800075260447, iteration: 46492
loss: 1.01079523563385,grad_norm: 0.910883285199125, iteration: 46493
loss: 0.9786266088485718,grad_norm: 0.9178307695503631, iteration: 46494
loss: 1.0134106874465942,grad_norm: 0.9999990142134346, iteration: 46495
loss: 0.98228919506073,grad_norm: 0.9462740105642977, iteration: 46496
loss: 1.034989833831787,grad_norm: 0.8679123146639964, iteration: 46497
loss: 0.9979528188705444,grad_norm: 0.9999991156311577, iteration: 46498
loss: 1.0455068349838257,grad_norm: 0.9999994293268498, iteration: 46499
loss: 1.0277782678604126,grad_norm: 0.9765092615498582, iteration: 46500
loss: 0.9795339107513428,grad_norm: 0.9999991309702495, iteration: 46501
loss: 0.9887953996658325,grad_norm: 0.9432039144513896, iteration: 46502
loss: 0.9962256550788879,grad_norm: 0.9999991857708326, iteration: 46503
loss: 0.9556277990341187,grad_norm: 0.8297103045348173, iteration: 46504
loss: 0.9816374778747559,grad_norm: 0.9403816593257436, iteration: 46505
loss: 1.0270601511001587,grad_norm: 0.9720032525795328, iteration: 46506
loss: 1.02802574634552,grad_norm: 0.9999994294474912, iteration: 46507
loss: 0.9791097044944763,grad_norm: 0.7207234138235411, iteration: 46508
loss: 0.9952541589736938,grad_norm: 0.8553961907932132, iteration: 46509
loss: 1.0269745588302612,grad_norm: 0.7949643619362361, iteration: 46510
loss: 0.9867447018623352,grad_norm: 0.9999993036480767, iteration: 46511
loss: 1.0202292203903198,grad_norm: 0.8525506669948324, iteration: 46512
loss: 0.9919322729110718,grad_norm: 0.9999992307744411, iteration: 46513
loss: 0.9648609161376953,grad_norm: 0.876114172415551, iteration: 46514
loss: 1.0157833099365234,grad_norm: 0.9606506415449393, iteration: 46515
loss: 0.9913753271102905,grad_norm: 0.9999992221753942, iteration: 46516
loss: 1.034067153930664,grad_norm: 0.9999993378002033, iteration: 46517
loss: 1.0019725561141968,grad_norm: 0.9995499621184096, iteration: 46518
loss: 0.9928989410400391,grad_norm: 0.8716049888531144, iteration: 46519
loss: 1.0457316637039185,grad_norm: 0.9999990640688051, iteration: 46520
loss: 0.9607356786727905,grad_norm: 0.9999990643608806, iteration: 46521
loss: 0.9755569100379944,grad_norm: 0.9166820363896444, iteration: 46522
loss: 1.0142260789871216,grad_norm: 0.8581531792326074, iteration: 46523
loss: 1.0101181268692017,grad_norm: 0.9999991685939256, iteration: 46524
loss: 1.0012295246124268,grad_norm: 0.8564307095494372, iteration: 46525
loss: 1.0178873538970947,grad_norm: 0.9999992079517103, iteration: 46526
loss: 0.9858028888702393,grad_norm: 0.8432698226669952, iteration: 46527
loss: 1.0170972347259521,grad_norm: 0.9999990278278261, iteration: 46528
loss: 0.9841139316558838,grad_norm: 0.8488190847589936, iteration: 46529
loss: 1.0170786380767822,grad_norm: 0.9999994450198202, iteration: 46530
loss: 1.0048129558563232,grad_norm: 0.9999992655127697, iteration: 46531
loss: 1.0425004959106445,grad_norm: 0.8662946688321248, iteration: 46532
loss: 0.9966511726379395,grad_norm: 0.9999990031264095, iteration: 46533
loss: 1.0021005868911743,grad_norm: 0.999999257064702, iteration: 46534
loss: 1.0476140975952148,grad_norm: 0.9938471111820476, iteration: 46535
loss: 1.0230309963226318,grad_norm: 0.9936509600354215, iteration: 46536
loss: 0.9922736287117004,grad_norm: 0.8921879940416002, iteration: 46537
loss: 0.9961948990821838,grad_norm: 0.9999990541959294, iteration: 46538
loss: 0.9664723873138428,grad_norm: 0.8553476342602222, iteration: 46539
loss: 0.9956247210502625,grad_norm: 0.9999990982053878, iteration: 46540
loss: 1.011717677116394,grad_norm: 0.85724849891646, iteration: 46541
loss: 1.0012627840042114,grad_norm: 0.9461646237614005, iteration: 46542
loss: 1.007109522819519,grad_norm: 0.8659834703887979, iteration: 46543
loss: 0.9596607685089111,grad_norm: 0.9629694469584134, iteration: 46544
loss: 1.0154168605804443,grad_norm: 0.9999991178708176, iteration: 46545
loss: 1.009898066520691,grad_norm: 0.9100538489529768, iteration: 46546
loss: 1.0150200128555298,grad_norm: 0.999999676679135, iteration: 46547
loss: 0.981343686580658,grad_norm: 0.9999989562596154, iteration: 46548
loss: 1.0079741477966309,grad_norm: 0.7671170203919926, iteration: 46549
loss: 1.0410140752792358,grad_norm: 0.9999996392479822, iteration: 46550
loss: 0.9946101307868958,grad_norm: 0.9999991221647151, iteration: 46551
loss: 0.9957627058029175,grad_norm: 0.850154910929293, iteration: 46552
loss: 1.0082870721817017,grad_norm: 0.840510243293371, iteration: 46553
loss: 1.0038336515426636,grad_norm: 0.9253796646778157, iteration: 46554
loss: 1.00527024269104,grad_norm: 0.8377058024656041, iteration: 46555
loss: 1.0612707138061523,grad_norm: 0.9999997698831409, iteration: 46556
loss: 0.9914782047271729,grad_norm: 0.8894717922869908, iteration: 46557
loss: 1.0458226203918457,grad_norm: 0.9999998242854515, iteration: 46558
loss: 0.9958822727203369,grad_norm: 0.8127001654675212, iteration: 46559
loss: 0.9920686483383179,grad_norm: 0.9999991931806765, iteration: 46560
loss: 1.0016160011291504,grad_norm: 0.7675434912030316, iteration: 46561
loss: 0.9981233477592468,grad_norm: 0.9999992633198733, iteration: 46562
loss: 1.0162838697433472,grad_norm: 0.9999990609762829, iteration: 46563
loss: 0.9899767637252808,grad_norm: 0.9999992483393506, iteration: 46564
loss: 0.9911465644836426,grad_norm: 0.8345759837994374, iteration: 46565
loss: 1.0223082304000854,grad_norm: 0.9999993427238646, iteration: 46566
loss: 1.1175178289413452,grad_norm: 0.9999994884841703, iteration: 46567
loss: 0.9860518574714661,grad_norm: 0.9999991960318576, iteration: 46568
loss: 0.9714398384094238,grad_norm: 0.9098765474330974, iteration: 46569
loss: 1.0217490196228027,grad_norm: 0.9999992036905869, iteration: 46570
loss: 1.0344711542129517,grad_norm: 0.880322902449144, iteration: 46571
loss: 0.9754701256752014,grad_norm: 0.8949706595545113, iteration: 46572
loss: 1.019972324371338,grad_norm: 0.8363977129189232, iteration: 46573
loss: 0.9804330468177795,grad_norm: 0.9096306850599916, iteration: 46574
loss: 0.9713028073310852,grad_norm: 0.7691386775835246, iteration: 46575
loss: 1.0075910091400146,grad_norm: 0.9999993715741712, iteration: 46576
loss: 0.9924035668373108,grad_norm: 0.9999990729296799, iteration: 46577
loss: 1.0183912515640259,grad_norm: 0.983870231750422, iteration: 46578
loss: 0.9997764229774475,grad_norm: 0.9052199538326079, iteration: 46579
loss: 0.9631533026695251,grad_norm: 0.9001815317747907, iteration: 46580
loss: 1.031955599784851,grad_norm: 0.9690230537012389, iteration: 46581
loss: 1.0496689081192017,grad_norm: 0.9999997402535434, iteration: 46582
loss: 1.0038663148880005,grad_norm: 0.9999991299860926, iteration: 46583
loss: 1.0020180940628052,grad_norm: 0.9148861995475973, iteration: 46584
loss: 0.9747875332832336,grad_norm: 0.9999991743854078, iteration: 46585
loss: 1.0352506637573242,grad_norm: 0.8457567106233161, iteration: 46586
loss: 0.9843341112136841,grad_norm: 0.8950769859835506, iteration: 46587
loss: 0.9700186848640442,grad_norm: 0.8486812928036238, iteration: 46588
loss: 1.0386747121810913,grad_norm: 0.9999993417370987, iteration: 46589
loss: 0.9613577723503113,grad_norm: 0.9292046752828798, iteration: 46590
loss: 1.0358099937438965,grad_norm: 0.9999997866780476, iteration: 46591
loss: 1.0031890869140625,grad_norm: 0.9999992146563355, iteration: 46592
loss: 0.9819127917289734,grad_norm: 0.9999992645573677, iteration: 46593
loss: 0.9980981945991516,grad_norm: 0.955436276742866, iteration: 46594
loss: 1.024323582649231,grad_norm: 0.9161118820605121, iteration: 46595
loss: 0.9952688813209534,grad_norm: 0.9999990786629369, iteration: 46596
loss: 0.980045735836029,grad_norm: 0.9999990544115178, iteration: 46597
loss: 0.9989721775054932,grad_norm: 0.8434467703747913, iteration: 46598
loss: 1.057000994682312,grad_norm: 0.9999990692440718, iteration: 46599
loss: 1.0041897296905518,grad_norm: 0.9999996045199621, iteration: 46600
loss: 0.9953065514564514,grad_norm: 0.9999990799392138, iteration: 46601
loss: 1.0171983242034912,grad_norm: 0.9723514911761315, iteration: 46602
loss: 0.9712895750999451,grad_norm: 0.8935624937339754, iteration: 46603
loss: 1.0516785383224487,grad_norm: 0.9926081803734875, iteration: 46604
loss: 0.9922711849212646,grad_norm: 0.8934069250664715, iteration: 46605
loss: 1.0174684524536133,grad_norm: 0.9649854131382261, iteration: 46606
loss: 1.1790719032287598,grad_norm: 0.9999997134196614, iteration: 46607
loss: 1.0340219736099243,grad_norm: 0.9999990505191075, iteration: 46608
loss: 0.9903711676597595,grad_norm: 0.9486310667068615, iteration: 46609
loss: 0.9754899144172668,grad_norm: 0.837569315233353, iteration: 46610
loss: 1.02383553981781,grad_norm: 0.999999168789058, iteration: 46611
loss: 0.9997776746749878,grad_norm: 0.9999991404326374, iteration: 46612
loss: 1.031522512435913,grad_norm: 0.9999998193810056, iteration: 46613
loss: 1.0486971139907837,grad_norm: 0.9999997904081679, iteration: 46614
loss: 0.9832964539527893,grad_norm: 0.9210323968128088, iteration: 46615
loss: 1.0029575824737549,grad_norm: 0.9484250703323016, iteration: 46616
loss: 1.0252681970596313,grad_norm: 0.9260636687069906, iteration: 46617
loss: 0.9748238325119019,grad_norm: 0.9945878139718194, iteration: 46618
loss: 1.0073604583740234,grad_norm: 0.9409369517540044, iteration: 46619
loss: 1.016119122505188,grad_norm: 0.9999990477715645, iteration: 46620
loss: 0.9908266067504883,grad_norm: 0.9094618444384865, iteration: 46621
loss: 0.9819385409355164,grad_norm: 0.820303659049993, iteration: 46622
loss: 1.035607099533081,grad_norm: 0.9888954728951579, iteration: 46623
loss: 0.9927433133125305,grad_norm: 0.9999988748455689, iteration: 46624
loss: 0.9970261454582214,grad_norm: 0.8892448036907886, iteration: 46625
loss: 1.0462241172790527,grad_norm: 0.9999996057909885, iteration: 46626
loss: 0.9917792677879333,grad_norm: 0.938283061563774, iteration: 46627
loss: 0.9821051955223083,grad_norm: 0.9481198216779579, iteration: 46628
loss: 1.0018435716629028,grad_norm: 0.9999992029693368, iteration: 46629
loss: 1.038649320602417,grad_norm: 0.978318368476931, iteration: 46630
loss: 1.0528879165649414,grad_norm: 0.9999991358806631, iteration: 46631
loss: 0.9954615235328674,grad_norm: 0.9361843858511993, iteration: 46632
loss: 0.964920163154602,grad_norm: 0.934854592387577, iteration: 46633
loss: 0.995678186416626,grad_norm: 0.8465502076899368, iteration: 46634
loss: 0.9874829053878784,grad_norm: 0.9491509206456025, iteration: 46635
loss: 1.0279368162155151,grad_norm: 0.845842501458317, iteration: 46636
loss: 1.0025463104248047,grad_norm: 0.9999990421510673, iteration: 46637
loss: 0.9637860059738159,grad_norm: 0.9024098499651273, iteration: 46638
loss: 0.9853070974349976,grad_norm: 0.8948859504542696, iteration: 46639
loss: 0.9888136982917786,grad_norm: 0.9999991744745841, iteration: 46640
loss: 1.00287663936615,grad_norm: 0.9433446128811508, iteration: 46641
loss: 0.9655916690826416,grad_norm: 0.999998906247088, iteration: 46642
loss: 1.0531071424484253,grad_norm: 0.837421806295839, iteration: 46643
loss: 0.9803555607795715,grad_norm: 0.8574159337914918, iteration: 46644
loss: 1.048631191253662,grad_norm: 0.9584206457899022, iteration: 46645
loss: 1.0025501251220703,grad_norm: 0.939728277421209, iteration: 46646
loss: 1.0474156141281128,grad_norm: 0.9999991570009981, iteration: 46647
loss: 1.0397838354110718,grad_norm: 0.9999993546526136, iteration: 46648
loss: 0.9664616584777832,grad_norm: 0.9141675733757134, iteration: 46649
loss: 1.0358198881149292,grad_norm: 0.9999991026530345, iteration: 46650
loss: 0.9390578866004944,grad_norm: 0.9999991399972655, iteration: 46651
loss: 0.9555670619010925,grad_norm: 0.999999107342188, iteration: 46652
loss: 0.9846705794334412,grad_norm: 0.8825628420302344, iteration: 46653
loss: 1.0028458833694458,grad_norm: 0.97677339406171, iteration: 46654
loss: 1.0107899904251099,grad_norm: 0.9113225325422204, iteration: 46655
loss: 1.0055919885635376,grad_norm: 0.8097942548854122, iteration: 46656
loss: 0.9815075993537903,grad_norm: 0.9999990077404481, iteration: 46657
loss: 0.9695360064506531,grad_norm: 0.9978658169316739, iteration: 46658
loss: 1.0016052722930908,grad_norm: 0.9190043890330342, iteration: 46659
loss: 1.0160913467407227,grad_norm: 0.9999990277308932, iteration: 46660
loss: 0.9675905108451843,grad_norm: 0.8787371781783312, iteration: 46661
loss: 1.0354609489440918,grad_norm: 0.7653952180177497, iteration: 46662
loss: 1.0078710317611694,grad_norm: 0.858442760356421, iteration: 46663
loss: 1.0218958854675293,grad_norm: 0.9999994801173092, iteration: 46664
loss: 0.9807825088500977,grad_norm: 0.9999990360469428, iteration: 46665
loss: 1.0068391561508179,grad_norm: 0.852354459058106, iteration: 46666
loss: 1.003921389579773,grad_norm: 0.9774196388766064, iteration: 46667
loss: 0.9755428433418274,grad_norm: 0.9332098611174068, iteration: 46668
loss: 1.0193731784820557,grad_norm: 0.9999991670566485, iteration: 46669
loss: 1.0432531833648682,grad_norm: 0.9999990806719814, iteration: 46670
loss: 1.0156749486923218,grad_norm: 0.8721174443715932, iteration: 46671
loss: 0.996979832649231,grad_norm: 0.8812021197942275, iteration: 46672
loss: 0.9856336116790771,grad_norm: 0.9425367397393586, iteration: 46673
loss: 1.00783109664917,grad_norm: 0.9134976154183126, iteration: 46674
loss: 1.0209518671035767,grad_norm: 0.8275403308548339, iteration: 46675
loss: 0.9579432606697083,grad_norm: 0.999999187267656, iteration: 46676
loss: 1.02423894405365,grad_norm: 0.999999179310497, iteration: 46677
loss: 1.0056097507476807,grad_norm: 0.915780832246682, iteration: 46678
loss: 1.0151406526565552,grad_norm: 0.9046802013144332, iteration: 46679
loss: 1.0001226663589478,grad_norm: 0.9999991053009545, iteration: 46680
loss: 0.9958056807518005,grad_norm: 0.9999992269573229, iteration: 46681
loss: 1.012590765953064,grad_norm: 0.8534808667055164, iteration: 46682
loss: 1.047083854675293,grad_norm: 0.9999990967406462, iteration: 46683
loss: 0.9521430730819702,grad_norm: 0.9318872228810288, iteration: 46684
loss: 1.0368387699127197,grad_norm: 0.9823836920491891, iteration: 46685
loss: 1.0109878778457642,grad_norm: 0.9999990814156057, iteration: 46686
loss: 1.036030888557434,grad_norm: 0.9666528954661154, iteration: 46687
loss: 0.9924876093864441,grad_norm: 0.97272867301479, iteration: 46688
loss: 1.0397512912750244,grad_norm: 0.9999992565212106, iteration: 46689
loss: 0.9883543848991394,grad_norm: 0.87772334372122, iteration: 46690
loss: 0.9894049167633057,grad_norm: 0.9999991894696888, iteration: 46691
loss: 1.0377124547958374,grad_norm: 0.9999993678388077, iteration: 46692
loss: 0.967563271522522,grad_norm: 0.9569148294788126, iteration: 46693
loss: 0.9974818825721741,grad_norm: 0.9027968053022404, iteration: 46694
loss: 1.0159014463424683,grad_norm: 0.7865658363979384, iteration: 46695
loss: 1.0012534856796265,grad_norm: 0.8156520311834027, iteration: 46696
loss: 1.0232735872268677,grad_norm: 0.9302008987856607, iteration: 46697
loss: 1.0392041206359863,grad_norm: 0.9999990907746952, iteration: 46698
loss: 1.0392658710479736,grad_norm: 0.9561011854758005, iteration: 46699
loss: 1.0281161069869995,grad_norm: 0.999999066387529, iteration: 46700
loss: 1.0328369140625,grad_norm: 0.8880844104153477, iteration: 46701
loss: 1.018797755241394,grad_norm: 0.999998962910417, iteration: 46702
loss: 1.0005797147750854,grad_norm: 0.9921368889181917, iteration: 46703
loss: 1.0261874198913574,grad_norm: 0.8735998841638147, iteration: 46704
loss: 0.9907529950141907,grad_norm: 0.9163275187060136, iteration: 46705
loss: 0.991006076335907,grad_norm: 0.8730949747853666, iteration: 46706
loss: 0.9864712953567505,grad_norm: 0.8721695686200832, iteration: 46707
loss: 1.01704740524292,grad_norm: 0.9958633294196125, iteration: 46708
loss: 1.026502251625061,grad_norm: 0.9999991244354828, iteration: 46709
loss: 0.9863126873970032,grad_norm: 0.9775940488784298, iteration: 46710
loss: 1.0223486423492432,grad_norm: 0.942730809541869, iteration: 46711
loss: 0.9914655089378357,grad_norm: 0.8675895632595297, iteration: 46712
loss: 0.9936503767967224,grad_norm: 0.8177337937528745, iteration: 46713
loss: 1.0309842824935913,grad_norm: 0.9999992913437384, iteration: 46714
loss: 1.0024844408035278,grad_norm: 0.9618629495385235, iteration: 46715
loss: 1.0404953956604004,grad_norm: 0.9999994475493699, iteration: 46716
loss: 1.0122970342636108,grad_norm: 0.9999989978308174, iteration: 46717
loss: 0.9936931729316711,grad_norm: 0.8949339140619491, iteration: 46718
loss: 0.9807859659194946,grad_norm: 0.8393302931325939, iteration: 46719
loss: 1.007567048072815,grad_norm: 0.9302544301500216, iteration: 46720
loss: 1.017061471939087,grad_norm: 0.9819915090775447, iteration: 46721
loss: 1.0157532691955566,grad_norm: 0.999999118167967, iteration: 46722
loss: 1.0131890773773193,grad_norm: 0.9145128921289408, iteration: 46723
loss: 0.9886378645896912,grad_norm: 0.9999990953916025, iteration: 46724
loss: 0.9669630527496338,grad_norm: 0.908889826745499, iteration: 46725
loss: 1.0217232704162598,grad_norm: 0.9999993752798113, iteration: 46726
loss: 0.9920466542243958,grad_norm: 0.9707274774551725, iteration: 46727
loss: 0.9981450438499451,grad_norm: 0.8459469529407874, iteration: 46728
loss: 0.987720251083374,grad_norm: 0.9999992744741946, iteration: 46729
loss: 1.040358543395996,grad_norm: 0.9803253229970614, iteration: 46730
loss: 0.9926960468292236,grad_norm: 0.999999119014797, iteration: 46731
loss: 1.0193839073181152,grad_norm: 0.9393987996960823, iteration: 46732
loss: 1.007468342781067,grad_norm: 0.9119154702977171, iteration: 46733
loss: 0.9966168999671936,grad_norm: 0.9261297342527992, iteration: 46734
loss: 1.0471224784851074,grad_norm: 0.9418263329711065, iteration: 46735
loss: 1.0408445596694946,grad_norm: 0.8979209424579028, iteration: 46736
loss: 0.9763113260269165,grad_norm: 0.7041686496479039, iteration: 46737
loss: 0.9723401665687561,grad_norm: 0.9999990654901215, iteration: 46738
loss: 1.0186614990234375,grad_norm: 0.7852578008702727, iteration: 46739
loss: 1.0121973752975464,grad_norm: 0.8462172807789711, iteration: 46740
loss: 1.0478578805923462,grad_norm: 0.9432634474311642, iteration: 46741
loss: 1.0108773708343506,grad_norm: 0.8010989008371203, iteration: 46742
loss: 0.9857486486434937,grad_norm: 0.9999990843543808, iteration: 46743
loss: 1.0056849718093872,grad_norm: 0.7912460981389877, iteration: 46744
loss: 1.0422873497009277,grad_norm: 0.8679236048588732, iteration: 46745
loss: 0.9959525465965271,grad_norm: 0.8857331085617923, iteration: 46746
loss: 1.014255166053772,grad_norm: 0.7941424074645129, iteration: 46747
loss: 1.0391179323196411,grad_norm: 0.9999991579533594, iteration: 46748
loss: 0.9765358567237854,grad_norm: 0.9746932573668926, iteration: 46749
loss: 1.0198276042938232,grad_norm: 0.9965498530944973, iteration: 46750
loss: 1.0011399984359741,grad_norm: 0.8379504023495534, iteration: 46751
loss: 1.0155901908874512,grad_norm: 0.9999991319772512, iteration: 46752
loss: 0.9973297119140625,grad_norm: 0.8729538141230897, iteration: 46753
loss: 1.0205579996109009,grad_norm: 0.9999992516714618, iteration: 46754
loss: 1.0165460109710693,grad_norm: 0.9999993554365484, iteration: 46755
loss: 1.0388120412826538,grad_norm: 0.9999994570211007, iteration: 46756
loss: 1.0464518070220947,grad_norm: 0.896007435675114, iteration: 46757
loss: 0.9621760845184326,grad_norm: 0.9999991200851733, iteration: 46758
loss: 1.0170605182647705,grad_norm: 0.9999990933617554, iteration: 46759
loss: 0.9833799004554749,grad_norm: 0.9657601403581196, iteration: 46760
loss: 0.9572873711585999,grad_norm: 0.8267997011940935, iteration: 46761
loss: 1.002192497253418,grad_norm: 0.9859280339335955, iteration: 46762
loss: 1.0231059789657593,grad_norm: 0.9999990614648996, iteration: 46763
loss: 0.9967643022537231,grad_norm: 0.8152571401022527, iteration: 46764
loss: 1.018734335899353,grad_norm: 0.9999989859093276, iteration: 46765
loss: 1.013534665107727,grad_norm: 0.8225882756251839, iteration: 46766
loss: 1.0045883655548096,grad_norm: 0.8747093785152116, iteration: 46767
loss: 0.984498918056488,grad_norm: 0.8739538505784917, iteration: 46768
loss: 1.0353951454162598,grad_norm: 0.8307937702874627, iteration: 46769
loss: 0.9740728139877319,grad_norm: 0.9719872640956003, iteration: 46770
loss: 1.0032743215560913,grad_norm: 0.9999991413144789, iteration: 46771
loss: 1.0299725532531738,grad_norm: 0.8469732051547885, iteration: 46772
loss: 1.0009181499481201,grad_norm: 0.9675773265454565, iteration: 46773
loss: 1.0473304986953735,grad_norm: 0.8624065488957173, iteration: 46774
loss: 0.9766697287559509,grad_norm: 0.9999989885161996, iteration: 46775
loss: 1.0209052562713623,grad_norm: 0.7847110727953062, iteration: 46776
loss: 1.002785325050354,grad_norm: 0.9999995027587845, iteration: 46777
loss: 0.9752197861671448,grad_norm: 0.8410940947499745, iteration: 46778
loss: 0.9915421605110168,grad_norm: 0.8294376090659435, iteration: 46779
loss: 1.0190174579620361,grad_norm: 0.884294007223182, iteration: 46780
loss: 1.024483561515808,grad_norm: 0.9556104653091273, iteration: 46781
loss: 0.9958093166351318,grad_norm: 0.982613517778249, iteration: 46782
loss: 1.0245753526687622,grad_norm: 0.9999993081885551, iteration: 46783
loss: 1.0242047309875488,grad_norm: 0.9999992208129366, iteration: 46784
loss: 1.019181728363037,grad_norm: 0.8836478444630627, iteration: 46785
loss: 1.0198498964309692,grad_norm: 0.9992170069731616, iteration: 46786
loss: 1.0307958126068115,grad_norm: 0.9999990051942153, iteration: 46787
loss: 1.0487620830535889,grad_norm: 0.9057395813992198, iteration: 46788
loss: 0.9936949610710144,grad_norm: 0.8107205001830425, iteration: 46789
loss: 1.004672884941101,grad_norm: 0.9433348053212288, iteration: 46790
loss: 1.051021695137024,grad_norm: 0.9999993294276212, iteration: 46791
loss: 1.0161736011505127,grad_norm: 0.9845803799695814, iteration: 46792
loss: 0.9730780720710754,grad_norm: 0.9999992126943146, iteration: 46793
loss: 0.9651058912277222,grad_norm: 0.9466210818107106, iteration: 46794
loss: 0.9495856761932373,grad_norm: 0.9615026233391832, iteration: 46795
loss: 1.0314912796020508,grad_norm: 0.9106974882561366, iteration: 46796
loss: 0.9794965982437134,grad_norm: 0.9999993645046932, iteration: 46797
loss: 0.9771307706832886,grad_norm: 0.8140994965390199, iteration: 46798
loss: 1.0071892738342285,grad_norm: 0.9999991071750481, iteration: 46799
loss: 1.0098166465759277,grad_norm: 0.9051288280269506, iteration: 46800
loss: 1.0886105298995972,grad_norm: 0.9999995353236246, iteration: 46801
loss: 1.0494565963745117,grad_norm: 0.9999991239257281, iteration: 46802
loss: 0.9791896939277649,grad_norm: 0.9910338660787691, iteration: 46803
loss: 1.0297197103500366,grad_norm: 0.8904855978349724, iteration: 46804
loss: 1.0166006088256836,grad_norm: 0.961282762022917, iteration: 46805
loss: 0.9891583323478699,grad_norm: 0.9683149918378309, iteration: 46806
loss: 0.9888683557510376,grad_norm: 0.9073055789569039, iteration: 46807
loss: 0.9885817766189575,grad_norm: 0.9999990916740232, iteration: 46808
loss: 1.0160493850708008,grad_norm: 0.9999989697653366, iteration: 46809
loss: 0.9932084083557129,grad_norm: 0.8084783436576791, iteration: 46810
loss: 0.9877858757972717,grad_norm: 0.9999990980871122, iteration: 46811
loss: 0.9808571338653564,grad_norm: 0.9999991562644293, iteration: 46812
loss: 1.0186185836791992,grad_norm: 0.9999991915129323, iteration: 46813
loss: 0.9757981896400452,grad_norm: 0.8512234728660972, iteration: 46814
loss: 0.98641037940979,grad_norm: 0.9840167531247965, iteration: 46815
loss: 1.0061784982681274,grad_norm: 0.999999004331316, iteration: 46816
loss: 1.0011957883834839,grad_norm: 0.9480115749642705, iteration: 46817
loss: 0.9993448257446289,grad_norm: 0.8397333975589741, iteration: 46818
loss: 1.015166163444519,grad_norm: 0.999999159948741, iteration: 46819
loss: 1.0048326253890991,grad_norm: 0.9999991267493253, iteration: 46820
loss: 1.0075100660324097,grad_norm: 0.9457941715136007, iteration: 46821
loss: 0.9811335802078247,grad_norm: 0.9999994150702404, iteration: 46822
loss: 1.0167837142944336,grad_norm: 0.886658351887933, iteration: 46823
loss: 0.9893354773521423,grad_norm: 0.9999990196559343, iteration: 46824
loss: 0.9958376884460449,grad_norm: 0.843883004802362, iteration: 46825
loss: 0.981045126914978,grad_norm: 0.7679044453353134, iteration: 46826
loss: 1.0193867683410645,grad_norm: 0.9999991708062684, iteration: 46827
loss: 1.0258504152297974,grad_norm: 0.9999991041067988, iteration: 46828
loss: 1.0041173696517944,grad_norm: 0.999998985517543, iteration: 46829
loss: 0.9957590699195862,grad_norm: 0.8800218800983441, iteration: 46830
loss: 1.0164259672164917,grad_norm: 0.9712816784290154, iteration: 46831
loss: 0.9748109579086304,grad_norm: 0.9999996140408626, iteration: 46832
loss: 1.0157780647277832,grad_norm: 0.96880799495032, iteration: 46833
loss: 0.9851891398429871,grad_norm: 0.9886182946028759, iteration: 46834
loss: 1.0514600276947021,grad_norm: 0.9999990858738055, iteration: 46835
loss: 1.038200855255127,grad_norm: 0.9999996846159885, iteration: 46836
loss: 1.0514603853225708,grad_norm: 0.9999999470504971, iteration: 46837
loss: 1.022936463356018,grad_norm: 0.9999998011501859, iteration: 46838
loss: 1.0045592784881592,grad_norm: 0.9999992910509065, iteration: 46839
loss: 1.0077039003372192,grad_norm: 0.8808871672269221, iteration: 46840
loss: 1.0131748914718628,grad_norm: 0.9948740976807887, iteration: 46841
loss: 1.0043225288391113,grad_norm: 0.9999990453147061, iteration: 46842
loss: 1.0373144149780273,grad_norm: 0.7793610093843549, iteration: 46843
loss: 0.9848151206970215,grad_norm: 0.9251805583485829, iteration: 46844
loss: 1.0215504169464111,grad_norm: 0.9868969445370198, iteration: 46845
loss: 0.9863576889038086,grad_norm: 0.9166070638056175, iteration: 46846
loss: 0.9972546696662903,grad_norm: 0.94909955797621, iteration: 46847
loss: 1.0236806869506836,grad_norm: 0.9806730409470529, iteration: 46848
loss: 0.9815583825111389,grad_norm: 0.8788410991677508, iteration: 46849
loss: 1.0506854057312012,grad_norm: 0.9343109275227038, iteration: 46850
loss: 0.9801998138427734,grad_norm: 0.9999991671739076, iteration: 46851
loss: 1.0381810665130615,grad_norm: 0.9227594743473772, iteration: 46852
loss: 1.0400222539901733,grad_norm: 0.9776053058188243, iteration: 46853
loss: 1.0300167798995972,grad_norm: 0.9723734921291871, iteration: 46854
loss: 1.0235490798950195,grad_norm: 0.7281595502654412, iteration: 46855
loss: 0.9889096021652222,grad_norm: 0.9999992223365578, iteration: 46856
loss: 1.036898136138916,grad_norm: 0.999999161075368, iteration: 46857
loss: 1.027545690536499,grad_norm: 0.9133004148200392, iteration: 46858
loss: 0.9892211556434631,grad_norm: 0.8468593967662492, iteration: 46859
loss: 0.9969713687896729,grad_norm: 0.9999991049601128, iteration: 46860
loss: 0.9946002960205078,grad_norm: 0.8725585312304404, iteration: 46861
loss: 1.0545958280563354,grad_norm: 0.844335431943657, iteration: 46862
loss: 1.0498417615890503,grad_norm: 0.9999990894621642, iteration: 46863
loss: 0.9790788888931274,grad_norm: 0.7620543113658367, iteration: 46864
loss: 1.0183528661727905,grad_norm: 0.9999991593406015, iteration: 46865
loss: 0.9846678972244263,grad_norm: 0.9770000516883981, iteration: 46866
loss: 0.9965852499008179,grad_norm: 0.8833159464270485, iteration: 46867
loss: 1.029272198677063,grad_norm: 0.9379549362295205, iteration: 46868
loss: 1.0259710550308228,grad_norm: 0.842277190646158, iteration: 46869
loss: 1.003182291984558,grad_norm: 0.9628660731539612, iteration: 46870
loss: 0.9958541989326477,grad_norm: 0.8172269870055201, iteration: 46871
loss: 1.0376255512237549,grad_norm: 0.9999998613978108, iteration: 46872
loss: 1.0076313018798828,grad_norm: 0.8945915154170615, iteration: 46873
loss: 0.9375371336936951,grad_norm: 0.9999992122188579, iteration: 46874
loss: 1.000842809677124,grad_norm: 0.836646461453963, iteration: 46875
loss: 0.98100745677948,grad_norm: 0.9999990208752307, iteration: 46876
loss: 0.9829950928688049,grad_norm: 0.832210282500253, iteration: 46877
loss: 1.0121054649353027,grad_norm: 0.8458884712345467, iteration: 46878
loss: 0.9984668493270874,grad_norm: 0.992406457522182, iteration: 46879
loss: 1.0795968770980835,grad_norm: 0.9999995629252201, iteration: 46880
loss: 0.9625155925750732,grad_norm: 0.9223270285470299, iteration: 46881
loss: 0.9993816018104553,grad_norm: 0.9214601800407934, iteration: 46882
loss: 0.9957409501075745,grad_norm: 0.9999991656550102, iteration: 46883
loss: 1.0048543214797974,grad_norm: 0.9999991360361709, iteration: 46884
loss: 1.0009329319000244,grad_norm: 0.9754539056677718, iteration: 46885
loss: 1.0112236738204956,grad_norm: 0.9770850255533011, iteration: 46886
loss: 1.0101349353790283,grad_norm: 0.9999991484302958, iteration: 46887
loss: 0.9883083701133728,grad_norm: 0.8744730020768693, iteration: 46888
loss: 0.9794572591781616,grad_norm: 0.9107129521171955, iteration: 46889
loss: 1.0523416996002197,grad_norm: 0.9811666039261924, iteration: 46890
loss: 1.0149023532867432,grad_norm: 0.9999990109847519, iteration: 46891
loss: 1.0156766176223755,grad_norm: 0.9999997845502381, iteration: 46892
loss: 1.0343090295791626,grad_norm: 0.9954181465455951, iteration: 46893
loss: 1.0319709777832031,grad_norm: 0.953990378610449, iteration: 46894
loss: 0.9741954803466797,grad_norm: 0.8194597110973565, iteration: 46895
loss: 0.9773173928260803,grad_norm: 0.8879775665096121, iteration: 46896
loss: 0.9584690928459167,grad_norm: 0.8564472790664368, iteration: 46897
loss: 1.029478669166565,grad_norm: 0.9949948600177702, iteration: 46898
loss: 0.9929864406585693,grad_norm: 0.9897052340074569, iteration: 46899
loss: 0.9831159114837646,grad_norm: 0.9999992437007984, iteration: 46900
loss: 0.9839270114898682,grad_norm: 0.9999989767096851, iteration: 46901
loss: 0.9789025783538818,grad_norm: 0.955925868286752, iteration: 46902
loss: 0.9848440289497375,grad_norm: 0.9999991256093598, iteration: 46903
loss: 1.015220284461975,grad_norm: 0.9544436610017057, iteration: 46904
loss: 0.9861891269683838,grad_norm: 0.970719748051494, iteration: 46905
loss: 0.9840715527534485,grad_norm: 0.8727561259187557, iteration: 46906
loss: 1.0075664520263672,grad_norm: 0.901188150508521, iteration: 46907
loss: 0.9855729937553406,grad_norm: 0.9999990988084807, iteration: 46908
loss: 0.9739776253700256,grad_norm: 0.8988524718916222, iteration: 46909
loss: 1.0132681131362915,grad_norm: 0.9537922139276389, iteration: 46910
loss: 0.958533525466919,grad_norm: 0.9999991101590876, iteration: 46911
loss: 1.0331453084945679,grad_norm: 0.9999995566073359, iteration: 46912
loss: 1.008880615234375,grad_norm: 0.861603421482454, iteration: 46913
loss: 1.063322901725769,grad_norm: 0.9999993749244923, iteration: 46914
loss: 1.0462753772735596,grad_norm: 0.9999996716856003, iteration: 46915
loss: 1.0175632238388062,grad_norm: 0.9999991334716644, iteration: 46916
loss: 0.9785741567611694,grad_norm: 0.9999990310424901, iteration: 46917
loss: 1.0798001289367676,grad_norm: 0.9999995753255637, iteration: 46918
loss: 0.9894261956214905,grad_norm: 0.9994505078651708, iteration: 46919
loss: 0.9959068298339844,grad_norm: 0.8969273623094074, iteration: 46920
loss: 0.997972309589386,grad_norm: 0.9485870212996093, iteration: 46921
loss: 0.9729377627372742,grad_norm: 0.972707315567184, iteration: 46922
loss: 0.9940749406814575,grad_norm: 0.873058085850854, iteration: 46923
loss: 0.9841033816337585,grad_norm: 0.9999991872760693, iteration: 46924
loss: 1.0257169008255005,grad_norm: 0.9999997918678762, iteration: 46925
loss: 1.0102410316467285,grad_norm: 0.9078666783197991, iteration: 46926
loss: 1.0455169677734375,grad_norm: 0.9999995925000472, iteration: 46927
loss: 0.9995879530906677,grad_norm: 0.8339038667984555, iteration: 46928
loss: 1.0072968006134033,grad_norm: 0.9161324164570489, iteration: 46929
loss: 0.9639295339584351,grad_norm: 0.8302616401546333, iteration: 46930
loss: 1.0192630290985107,grad_norm: 0.8841043246909652, iteration: 46931
loss: 1.0385282039642334,grad_norm: 0.9999989612562379, iteration: 46932
loss: 1.0100799798965454,grad_norm: 0.956969258521515, iteration: 46933
loss: 1.0281928777694702,grad_norm: 0.9999990299806695, iteration: 46934
loss: 0.9866252541542053,grad_norm: 0.8606557574650885, iteration: 46935
loss: 1.0032527446746826,grad_norm: 0.7904677525719855, iteration: 46936
loss: 1.023274302482605,grad_norm: 0.8481945647281717, iteration: 46937
loss: 1.011459469795227,grad_norm: 0.9878715468001964, iteration: 46938
loss: 0.9978058338165283,grad_norm: 0.9999996117623872, iteration: 46939
loss: 0.9883869886398315,grad_norm: 0.9044672267729761, iteration: 46940
loss: 0.9871984124183655,grad_norm: 0.9672109120316742, iteration: 46941
loss: 1.0195196866989136,grad_norm: 0.99999905662943, iteration: 46942
loss: 0.985816240310669,grad_norm: 0.8761388594842681, iteration: 46943
loss: 1.0719540119171143,grad_norm: 0.9999991320449992, iteration: 46944
loss: 0.9869530200958252,grad_norm: 0.9039080984910027, iteration: 46945
loss: 1.131492018699646,grad_norm: 0.9999997273829418, iteration: 46946
loss: 1.0161361694335938,grad_norm: 0.8391053359793252, iteration: 46947
loss: 1.00252366065979,grad_norm: 0.8872104385252412, iteration: 46948
loss: 1.0048565864562988,grad_norm: 0.8809691348160364, iteration: 46949
loss: 0.9877551198005676,grad_norm: 0.9780214326134149, iteration: 46950
loss: 1.0264240503311157,grad_norm: 0.9253424238463154, iteration: 46951
loss: 1.0095536708831787,grad_norm: 0.9999990818652086, iteration: 46952
loss: 1.0314825773239136,grad_norm: 0.9999991165858135, iteration: 46953
loss: 1.030985951423645,grad_norm: 0.9999992194054074, iteration: 46954
loss: 1.0237576961517334,grad_norm: 0.9002656173409721, iteration: 46955
loss: 1.0054904222488403,grad_norm: 0.8824248689023438, iteration: 46956
loss: 1.0500129461288452,grad_norm: 0.9999992414478558, iteration: 46957
loss: 0.9867857098579407,grad_norm: 0.9999990759579429, iteration: 46958
loss: 0.9701734185218811,grad_norm: 0.9999992475716849, iteration: 46959
loss: 1.0092803239822388,grad_norm: 0.9108325285964911, iteration: 46960
loss: 0.9868005514144897,grad_norm: 0.9267526955517241, iteration: 46961
loss: 1.0369497537612915,grad_norm: 0.8879029186653983, iteration: 46962
loss: 0.9823115468025208,grad_norm: 0.9149165191085395, iteration: 46963
loss: 1.0174676179885864,grad_norm: 0.9999990874268674, iteration: 46964
loss: 1.0211747884750366,grad_norm: 0.9999990161617544, iteration: 46965
loss: 1.0221019983291626,grad_norm: 0.8577850134150349, iteration: 46966
loss: 0.9992609620094299,grad_norm: 0.8704969007115946, iteration: 46967
loss: 1.039254069328308,grad_norm: 0.9999994251364549, iteration: 46968
loss: 0.9946972131729126,grad_norm: 0.8740203292450254, iteration: 46969
loss: 1.0007712841033936,grad_norm: 0.9327729475430492, iteration: 46970
loss: 1.0098158121109009,grad_norm: 0.9999991217479153, iteration: 46971
loss: 1.0111196041107178,grad_norm: 0.9575079558862202, iteration: 46972
loss: 0.9891580939292908,grad_norm: 0.9999995586388617, iteration: 46973
loss: 0.9885701537132263,grad_norm: 0.8662797092954307, iteration: 46974
loss: 1.0357848405838013,grad_norm: 0.9999992297140478, iteration: 46975
loss: 1.022403359413147,grad_norm: 0.8345460403625035, iteration: 46976
loss: 0.9854991436004639,grad_norm: 0.811218805533466, iteration: 46977
loss: 1.0318201780319214,grad_norm: 0.8183669468524094, iteration: 46978
loss: 0.9620335698127747,grad_norm: 0.9196020251119073, iteration: 46979
loss: 1.0080381631851196,grad_norm: 0.9626262007589121, iteration: 46980
loss: 0.9919878244400024,grad_norm: 0.9999992433665363, iteration: 46981
loss: 1.0169906616210938,grad_norm: 0.9739453163406003, iteration: 46982
loss: 1.031941294670105,grad_norm: 0.9615597752937374, iteration: 46983
loss: 1.0170445442199707,grad_norm: 0.8109918277408401, iteration: 46984
loss: 1.0089575052261353,grad_norm: 0.9999989724160036, iteration: 46985
loss: 1.0172358751296997,grad_norm: 0.9999990069723841, iteration: 46986
loss: 0.9974560737609863,grad_norm: 0.829055261760507, iteration: 46987
loss: 0.9887990355491638,grad_norm: 0.8761740958739879, iteration: 46988
loss: 1.0077263116836548,grad_norm: 0.8833899618923288, iteration: 46989
loss: 1.000542163848877,grad_norm: 0.9999989695351138, iteration: 46990
loss: 0.9655995965003967,grad_norm: 0.89736866161246, iteration: 46991
loss: 1.0182805061340332,grad_norm: 0.7696723725292589, iteration: 46992
loss: 1.0093119144439697,grad_norm: 0.9899549705106855, iteration: 46993
loss: 0.9932557940483093,grad_norm: 0.9999990288824671, iteration: 46994
loss: 1.015473484992981,grad_norm: 0.858281318361915, iteration: 46995
loss: 1.0246793031692505,grad_norm: 0.9999993314652288, iteration: 46996
loss: 1.0655301809310913,grad_norm: 0.9999994141220448, iteration: 46997
loss: 1.0025569200515747,grad_norm: 0.8313628162344064, iteration: 46998
loss: 0.9932491183280945,grad_norm: 0.8446212621454235, iteration: 46999
loss: 1.032370686531067,grad_norm: 0.9456916931519846, iteration: 47000
loss: 1.0114022493362427,grad_norm: 0.9608063198814132, iteration: 47001
loss: 1.0117141008377075,grad_norm: 0.9088553869532789, iteration: 47002
loss: 1.0254278182983398,grad_norm: 0.8682261209488024, iteration: 47003
loss: 0.9829835891723633,grad_norm: 0.7433550188503787, iteration: 47004
loss: 1.0039719343185425,grad_norm: 0.8377962599049122, iteration: 47005
loss: 1.0191600322723389,grad_norm: 0.8847057586584263, iteration: 47006
loss: 1.0547585487365723,grad_norm: 0.9999996047578865, iteration: 47007
loss: 0.9810054898262024,grad_norm: 0.8731943940043496, iteration: 47008
loss: 1.0131665468215942,grad_norm: 0.9999990833378158, iteration: 47009
loss: 0.9739122986793518,grad_norm: 0.8994463080153714, iteration: 47010
loss: 1.0042341947555542,grad_norm: 0.9039179098486265, iteration: 47011
loss: 0.9939910769462585,grad_norm: 0.8276424877763404, iteration: 47012
loss: 1.0025304555892944,grad_norm: 0.9999991171032983, iteration: 47013
loss: 1.0105890035629272,grad_norm: 0.8543015466166828, iteration: 47014
loss: 0.997668445110321,grad_norm: 0.9709313352795926, iteration: 47015
loss: 0.9931143522262573,grad_norm: 0.999999142243397, iteration: 47016
loss: 1.0103627443313599,grad_norm: 0.9928622572351016, iteration: 47017
loss: 1.033119797706604,grad_norm: 0.9999989947626586, iteration: 47018
loss: 0.9920079708099365,grad_norm: 0.882494296849497, iteration: 47019
loss: 1.0128204822540283,grad_norm: 0.8479806424689933, iteration: 47020
loss: 1.069878101348877,grad_norm: 0.9999995315719549, iteration: 47021
loss: 0.9952971935272217,grad_norm: 0.8824528588701536, iteration: 47022
loss: 1.0066770315170288,grad_norm: 0.9999989904015524, iteration: 47023
loss: 1.030155897140503,grad_norm: 0.9044334218246112, iteration: 47024
loss: 1.0001050233840942,grad_norm: 0.9393970354864641, iteration: 47025
loss: 1.0028122663497925,grad_norm: 0.9311688565341292, iteration: 47026
loss: 1.0317001342773438,grad_norm: 0.9999992931949732, iteration: 47027
loss: 1.0129554271697998,grad_norm: 0.8485035855877805, iteration: 47028
loss: 1.0021547079086304,grad_norm: 0.9999993010623889, iteration: 47029
loss: 1.013816237449646,grad_norm: 0.9835097040121578, iteration: 47030
loss: 1.00276780128479,grad_norm: 0.9087393547962264, iteration: 47031
loss: 0.9940564036369324,grad_norm: 0.9379742243309026, iteration: 47032
loss: 1.0055886507034302,grad_norm: 0.9821451410491552, iteration: 47033
loss: 1.0121417045593262,grad_norm: 0.9036473833316944, iteration: 47034
loss: 1.0050013065338135,grad_norm: 0.9667210733837727, iteration: 47035
loss: 0.9619176387786865,grad_norm: 0.9999990657310052, iteration: 47036
loss: 1.0495731830596924,grad_norm: 0.9250702548577995, iteration: 47037
loss: 1.0158799886703491,grad_norm: 0.9859110366456325, iteration: 47038
loss: 1.057183027267456,grad_norm: 0.9999992379826848, iteration: 47039
loss: 0.9786466360092163,grad_norm: 0.7996999425766725, iteration: 47040
loss: 0.9864561557769775,grad_norm: 0.9538861635037615, iteration: 47041
loss: 0.9755328893661499,grad_norm: 0.9302663529279181, iteration: 47042
loss: 0.9711076021194458,grad_norm: 0.862735769456938, iteration: 47043
loss: 0.99552983045578,grad_norm: 0.9999990628612987, iteration: 47044
loss: 1.0457963943481445,grad_norm: 0.9597317174001078, iteration: 47045
loss: 1.0129125118255615,grad_norm: 0.7422583758993414, iteration: 47046
loss: 1.0048669576644897,grad_norm: 0.9264494374248413, iteration: 47047
loss: 0.9725040197372437,grad_norm: 0.9999990687145538, iteration: 47048
loss: 0.9653275609016418,grad_norm: 0.9212320393888567, iteration: 47049
loss: 0.9746913313865662,grad_norm: 0.9588159685484334, iteration: 47050
loss: 1.0315208435058594,grad_norm: 0.9537160739962337, iteration: 47051
loss: 0.9803869128227234,grad_norm: 0.9999992079450838, iteration: 47052
loss: 1.0035078525543213,grad_norm: 0.9999996165815709, iteration: 47053
loss: 0.9855380058288574,grad_norm: 0.9248288044776992, iteration: 47054
loss: 1.0064886808395386,grad_norm: 0.9999993259799893, iteration: 47055
loss: 1.0057846307754517,grad_norm: 0.9999993423215985, iteration: 47056
loss: 1.014949083328247,grad_norm: 0.747932663278081, iteration: 47057
loss: 1.0147403478622437,grad_norm: 0.8826561740813573, iteration: 47058
loss: 0.9760485291481018,grad_norm: 0.999999348516385, iteration: 47059
loss: 0.9618882536888123,grad_norm: 0.8856346355619017, iteration: 47060
loss: 1.007399559020996,grad_norm: 0.9778607090995126, iteration: 47061
loss: 1.0241436958312988,grad_norm: 0.8742134735866199, iteration: 47062
loss: 0.9389446973800659,grad_norm: 0.9999990964820518, iteration: 47063
loss: 1.0369917154312134,grad_norm: 0.9369424423614412, iteration: 47064
loss: 0.9709652662277222,grad_norm: 0.9510518183851977, iteration: 47065
loss: 1.0138027667999268,grad_norm: 0.9127565877984744, iteration: 47066
loss: 1.0018799304962158,grad_norm: 0.9999993151064058, iteration: 47067
loss: 1.0258278846740723,grad_norm: 0.9202598243762427, iteration: 47068
loss: 1.0341171026229858,grad_norm: 0.9999990913858785, iteration: 47069
loss: 1.0324276685714722,grad_norm: 0.9999993279942887, iteration: 47070
loss: 1.012678861618042,grad_norm: 0.997874411586412, iteration: 47071
loss: 0.9908888339996338,grad_norm: 0.9999997423514962, iteration: 47072
loss: 1.0509649515151978,grad_norm: 0.9999990570710408, iteration: 47073
loss: 0.99896240234375,grad_norm: 0.9999992133926862, iteration: 47074
loss: 0.9974547624588013,grad_norm: 0.9285565854793699, iteration: 47075
loss: 1.0168790817260742,grad_norm: 0.8757171258106792, iteration: 47076
loss: 1.0244920253753662,grad_norm: 0.761886926712167, iteration: 47077
loss: 0.9765703678131104,grad_norm: 0.9379804550225116, iteration: 47078
loss: 0.9973146319389343,grad_norm: 0.8651079837775173, iteration: 47079
loss: 0.9815760850906372,grad_norm: 0.9289155365351393, iteration: 47080
loss: 1.0253418684005737,grad_norm: 0.999999594111625, iteration: 47081
loss: 1.0322253704071045,grad_norm: 0.8486248564117372, iteration: 47082
loss: 1.005675196647644,grad_norm: 0.999999636701896, iteration: 47083
loss: 0.9740151762962341,grad_norm: 0.950401582661425, iteration: 47084
loss: 0.9882022738456726,grad_norm: 0.964591951771938, iteration: 47085
loss: 0.9890785813331604,grad_norm: 0.9267564781966984, iteration: 47086
loss: 1.0146281719207764,grad_norm: 0.9488649139479437, iteration: 47087
loss: 1.0187393426895142,grad_norm: 0.9999989517193435, iteration: 47088
loss: 1.026475429534912,grad_norm: 0.8704775757312994, iteration: 47089
loss: 1.0371425151824951,grad_norm: 0.9999990669011629, iteration: 47090
loss: 1.0464065074920654,grad_norm: 0.9854531139853798, iteration: 47091
loss: 1.0158636569976807,grad_norm: 0.9146389546973736, iteration: 47092
loss: 1.0103514194488525,grad_norm: 0.999999085889071, iteration: 47093
loss: 0.9943064451217651,grad_norm: 0.8451691132175064, iteration: 47094
loss: 0.9973082542419434,grad_norm: 0.9241048042778048, iteration: 47095
loss: 1.0355006456375122,grad_norm: 0.9999991600083115, iteration: 47096
loss: 0.9784858822822571,grad_norm: 0.9752096072731437, iteration: 47097
loss: 1.024113416671753,grad_norm: 0.9999990767524211, iteration: 47098
loss: 1.0285662412643433,grad_norm: 0.9692514980760786, iteration: 47099
loss: 0.9678927659988403,grad_norm: 0.956339746880703, iteration: 47100
loss: 1.009100079536438,grad_norm: 0.9636867381127486, iteration: 47101
loss: 1.003051519393921,grad_norm: 0.9869698239953096, iteration: 47102
loss: 0.9944453239440918,grad_norm: 0.8080880213139979, iteration: 47103
loss: 1.0017290115356445,grad_norm: 0.6955271188270251, iteration: 47104
loss: 0.9971472024917603,grad_norm: 0.975793275617036, iteration: 47105
loss: 1.0029281377792358,grad_norm: 0.7879160899261628, iteration: 47106
loss: 1.0343436002731323,grad_norm: 0.9999992764046318, iteration: 47107
loss: 1.048961877822876,grad_norm: 0.9999997027677217, iteration: 47108
loss: 1.0204713344573975,grad_norm: 0.9999991212692934, iteration: 47109
loss: 0.9932806491851807,grad_norm: 0.9949577659080796, iteration: 47110
loss: 1.0027481317520142,grad_norm: 0.9230472135427037, iteration: 47111
loss: 0.9902827143669128,grad_norm: 0.9999991067573287, iteration: 47112
loss: 0.9530560970306396,grad_norm: 0.9962364403314874, iteration: 47113
loss: 1.046656847000122,grad_norm: 0.9999993116479322, iteration: 47114
loss: 0.9971336722373962,grad_norm: 0.9802278268147749, iteration: 47115
loss: 0.9773406982421875,grad_norm: 0.9999991771746825, iteration: 47116
loss: 1.0220389366149902,grad_norm: 0.8033826345139088, iteration: 47117
loss: 1.019901156425476,grad_norm: 0.8563324465216238, iteration: 47118
loss: 0.994729220867157,grad_norm: 0.8170806799925057, iteration: 47119
loss: 1.0082403421401978,grad_norm: 0.8784678247985733, iteration: 47120
loss: 0.9899531006813049,grad_norm: 0.934116492327629, iteration: 47121
loss: 1.004686951637268,grad_norm: 0.9922208545575809, iteration: 47122
loss: 0.9828781485557556,grad_norm: 0.8438976452241382, iteration: 47123
loss: 1.0134202241897583,grad_norm: 0.9999995265802727, iteration: 47124
loss: 0.9990673065185547,grad_norm: 0.8324211522310262, iteration: 47125
loss: 1.0050631761550903,grad_norm: 0.832137556872752, iteration: 47126
loss: 1.015455961227417,grad_norm: 0.9999990833027896, iteration: 47127
loss: 0.9845083355903625,grad_norm: 0.9008339275343218, iteration: 47128
loss: 0.9885371327400208,grad_norm: 0.8345719726955364, iteration: 47129
loss: 1.0111275911331177,grad_norm: 0.9191522606744653, iteration: 47130
loss: 0.9724130630493164,grad_norm: 0.9999995316772615, iteration: 47131
loss: 1.1222466230392456,grad_norm: 0.9999993216458491, iteration: 47132
loss: 1.0223684310913086,grad_norm: 0.9721364696810849, iteration: 47133
loss: 0.9914198517799377,grad_norm: 0.8232960847342089, iteration: 47134
loss: 0.9932132363319397,grad_norm: 0.9999991648257057, iteration: 47135
loss: 0.9821721315383911,grad_norm: 0.9999989784505533, iteration: 47136
loss: 0.9900506138801575,grad_norm: 0.8639065271297982, iteration: 47137
loss: 1.0906267166137695,grad_norm: 0.9465151222377922, iteration: 47138
loss: 1.0274872779846191,grad_norm: 0.9999997695404262, iteration: 47139
loss: 1.010010004043579,grad_norm: 0.9999993173204943, iteration: 47140
loss: 1.007294774055481,grad_norm: 0.8407235912347464, iteration: 47141
loss: 1.0618088245391846,grad_norm: 0.999999335574567, iteration: 47142
loss: 1.0618290901184082,grad_norm: 0.9583996400875882, iteration: 47143
loss: 1.0199589729309082,grad_norm: 0.81820203237802, iteration: 47144
loss: 1.0232148170471191,grad_norm: 0.9893484524270573, iteration: 47145
loss: 1.0082871913909912,grad_norm: 0.9999991894154656, iteration: 47146
loss: 1.0625038146972656,grad_norm: 0.8604730972582059, iteration: 47147
loss: 1.032041072845459,grad_norm: 0.9906391191862832, iteration: 47148
loss: 1.0182619094848633,grad_norm: 0.9999990343217812, iteration: 47149
loss: 1.0973702669143677,grad_norm: 0.9999993319108537, iteration: 47150
loss: 0.9761625528335571,grad_norm: 0.9512964084982755, iteration: 47151
loss: 1.0121649503707886,grad_norm: 0.9999990633754996, iteration: 47152
loss: 1.0352054834365845,grad_norm: 0.9999989528061805, iteration: 47153
loss: 1.0520747900009155,grad_norm: 0.9999990941325004, iteration: 47154
loss: 0.9871200919151306,grad_norm: 0.9945859256979486, iteration: 47155
loss: 0.9680320024490356,grad_norm: 0.985664233609873, iteration: 47156
loss: 1.0430028438568115,grad_norm: 0.9999990596177797, iteration: 47157
loss: 1.000471830368042,grad_norm: 0.8093041040021218, iteration: 47158
loss: 0.9669812321662903,grad_norm: 0.9999990720067903, iteration: 47159
loss: 1.0034927129745483,grad_norm: 0.8629416040770029, iteration: 47160
loss: 1.021348237991333,grad_norm: 0.726311784494512, iteration: 47161
loss: 1.0236644744873047,grad_norm: 0.9999990519016413, iteration: 47162
loss: 1.0150808095932007,grad_norm: 0.9999990960535825, iteration: 47163
loss: 1.026098370552063,grad_norm: 0.8920819969845228, iteration: 47164
loss: 0.9948957562446594,grad_norm: 0.9992624919456446, iteration: 47165
loss: 0.9869657158851624,grad_norm: 0.875086228939496, iteration: 47166
loss: 1.0239059925079346,grad_norm: 0.9999990675594703, iteration: 47167
loss: 1.0333446264266968,grad_norm: 0.9277222338247624, iteration: 47168
loss: 0.9811291694641113,grad_norm: 0.9999991095355905, iteration: 47169
loss: 0.9664451479911804,grad_norm: 0.918688825550276, iteration: 47170
loss: 0.9713799953460693,grad_norm: 0.9999990529949012, iteration: 47171
loss: 1.0251630544662476,grad_norm: 0.8812202389897902, iteration: 47172
loss: 1.0064074993133545,grad_norm: 0.9002100650700012, iteration: 47173
loss: 1.0356508493423462,grad_norm: 0.9999993114789534, iteration: 47174
loss: 0.989517867565155,grad_norm: 0.9999991779233558, iteration: 47175
loss: 1.02140474319458,grad_norm: 0.7053957744453422, iteration: 47176
loss: 1.0002150535583496,grad_norm: 0.9999992439398981, iteration: 47177
loss: 1.0259584188461304,grad_norm: 0.9999989523671732, iteration: 47178
loss: 1.0328993797302246,grad_norm: 0.9802941716706943, iteration: 47179
loss: 1.0065382719039917,grad_norm: 0.9999992228587171, iteration: 47180
loss: 1.0268123149871826,grad_norm: 0.9999991932942969, iteration: 47181
loss: 0.9548318386077881,grad_norm: 0.9999990121381137, iteration: 47182
loss: 1.0155730247497559,grad_norm: 0.7668648906674267, iteration: 47183
loss: 1.012662649154663,grad_norm: 0.9732419074600227, iteration: 47184
loss: 1.0273308753967285,grad_norm: 0.9999989850409681, iteration: 47185
loss: 1.0151448249816895,grad_norm: 0.9700580513327746, iteration: 47186
loss: 1.0544105768203735,grad_norm: 0.8513949234852805, iteration: 47187
loss: 0.9752109050750732,grad_norm: 0.9705291511899439, iteration: 47188
loss: 0.9857491850852966,grad_norm: 0.8872535445571057, iteration: 47189
loss: 1.0042630434036255,grad_norm: 0.9208904656232451, iteration: 47190
loss: 0.9960452914237976,grad_norm: 0.8839350916804077, iteration: 47191
loss: 1.0008741617202759,grad_norm: 0.8964774191815708, iteration: 47192
loss: 0.9749797582626343,grad_norm: 0.9499221043960635, iteration: 47193
loss: 0.9851025938987732,grad_norm: 0.9999990229027502, iteration: 47194
loss: 1.0126631259918213,grad_norm: 0.9999991746465199, iteration: 47195
loss: 0.9854106903076172,grad_norm: 0.9550266915624188, iteration: 47196
loss: 1.125135064125061,grad_norm: 0.9999999133104388, iteration: 47197
loss: 1.019756555557251,grad_norm: 0.9182989685842553, iteration: 47198
loss: 1.0235880613327026,grad_norm: 0.9707544537953015, iteration: 47199
loss: 1.0110242366790771,grad_norm: 0.8667279656242102, iteration: 47200
loss: 0.9779615998268127,grad_norm: 0.9999992259266601, iteration: 47201
loss: 0.9775587916374207,grad_norm: 0.999999081876389, iteration: 47202
loss: 0.9998103380203247,grad_norm: 0.830950536804128, iteration: 47203
loss: 1.0637576580047607,grad_norm: 0.9999992667771227, iteration: 47204
loss: 1.0500636100769043,grad_norm: 0.9999995888154649, iteration: 47205
loss: 1.0599727630615234,grad_norm: 0.99999920041486, iteration: 47206
loss: 1.0412302017211914,grad_norm: 0.9778177925904825, iteration: 47207
loss: 1.101448655128479,grad_norm: 0.9717173165021739, iteration: 47208
loss: 1.0407001972198486,grad_norm: 0.9999998602947515, iteration: 47209
loss: 1.0002357959747314,grad_norm: 0.9689524993937076, iteration: 47210
loss: 1.0045746564865112,grad_norm: 0.9000811674771173, iteration: 47211
loss: 0.9608813524246216,grad_norm: 0.9999991505542359, iteration: 47212
loss: 0.9880567789077759,grad_norm: 0.999999094584124, iteration: 47213
loss: 1.0105983018875122,grad_norm: 0.9999990721224099, iteration: 47214
loss: 0.9908173084259033,grad_norm: 0.9910295840269246, iteration: 47215
loss: 0.9743981957435608,grad_norm: 0.8897324358138514, iteration: 47216
loss: 1.011851191520691,grad_norm: 0.8965037414024016, iteration: 47217
loss: 1.0259439945220947,grad_norm: 0.8757746742190183, iteration: 47218
loss: 0.9983043074607849,grad_norm: 0.9999991991768004, iteration: 47219
loss: 1.0869871377944946,grad_norm: 0.9999996393452876, iteration: 47220
loss: 1.0226094722747803,grad_norm: 0.9576321391230175, iteration: 47221
loss: 1.0192800760269165,grad_norm: 0.9519572664116306, iteration: 47222
loss: 1.0159834623336792,grad_norm: 0.8099596825796322, iteration: 47223
loss: 1.0384336709976196,grad_norm: 0.9999990020409867, iteration: 47224
loss: 0.9809175729751587,grad_norm: 0.9999990979372775, iteration: 47225
loss: 0.9791538715362549,grad_norm: 0.9999991539553511, iteration: 47226
loss: 0.967642605304718,grad_norm: 0.999999186052652, iteration: 47227
loss: 1.0517020225524902,grad_norm: 0.999999499220989, iteration: 47228
loss: 1.0163283348083496,grad_norm: 0.8472908267705638, iteration: 47229
loss: 0.9856431484222412,grad_norm: 0.9999991184272287, iteration: 47230
loss: 1.0300567150115967,grad_norm: 0.7497756082742588, iteration: 47231
loss: 0.9992468953132629,grad_norm: 0.8932512869517983, iteration: 47232
loss: 0.9840095639228821,grad_norm: 0.7540913046861341, iteration: 47233
loss: 1.0216068029403687,grad_norm: 0.9406388471453804, iteration: 47234
loss: 1.0084375143051147,grad_norm: 0.8223322593385783, iteration: 47235
loss: 0.979239284992218,grad_norm: 0.9873973901815175, iteration: 47236
loss: 0.9920893311500549,grad_norm: 0.9999990773291741, iteration: 47237
loss: 1.0070825815200806,grad_norm: 0.9682127772934058, iteration: 47238
loss: 1.008976697921753,grad_norm: 0.8492588973935467, iteration: 47239
loss: 1.0802061557769775,grad_norm: 0.9999996773053356, iteration: 47240
loss: 0.9868797659873962,grad_norm: 0.8541836194847944, iteration: 47241
loss: 0.9996288418769836,grad_norm: 0.8679894903947412, iteration: 47242
loss: 1.0009795427322388,grad_norm: 0.8350448938909464, iteration: 47243
loss: 0.9869011640548706,grad_norm: 0.9935899218635025, iteration: 47244
loss: 1.0387006998062134,grad_norm: 0.9031661360039508, iteration: 47245
loss: 1.0368160009384155,grad_norm: 0.9999992482284099, iteration: 47246
loss: 1.0239899158477783,grad_norm: 0.9670178565316039, iteration: 47247
loss: 1.0296834707260132,grad_norm: 0.9217090918089392, iteration: 47248
loss: 0.9845324754714966,grad_norm: 0.9863568801203966, iteration: 47249
loss: 0.993160605430603,grad_norm: 0.9393186033082519, iteration: 47250
loss: 1.029313087463379,grad_norm: 0.9343682444017273, iteration: 47251
loss: 1.00066339969635,grad_norm: 0.999999172430584, iteration: 47252
loss: 1.0064371824264526,grad_norm: 0.8268909555065946, iteration: 47253
loss: 1.0091955661773682,grad_norm: 0.8201100369514762, iteration: 47254
loss: 0.9847584366798401,grad_norm: 0.9558624894962459, iteration: 47255
loss: 1.0032421350479126,grad_norm: 0.9409514616907987, iteration: 47256
loss: 1.0196220874786377,grad_norm: 0.9999991482324717, iteration: 47257
loss: 1.0458695888519287,grad_norm: 0.9999991000423502, iteration: 47258
loss: 1.0014963150024414,grad_norm: 0.9823721525293583, iteration: 47259
loss: 1.0035535097122192,grad_norm: 0.8997923901462216, iteration: 47260
loss: 1.0371267795562744,grad_norm: 0.8717237936372276, iteration: 47261
loss: 0.9980536699295044,grad_norm: 0.9999991673602528, iteration: 47262
loss: 0.9905183911323547,grad_norm: 0.9231687082741481, iteration: 47263
loss: 0.9759075045585632,grad_norm: 0.8303632200358178, iteration: 47264
loss: 1.0459264516830444,grad_norm: 0.9177586759559893, iteration: 47265
loss: 1.0314313173294067,grad_norm: 0.9338121335734861, iteration: 47266
loss: 1.068878173828125,grad_norm: 0.9999992525457396, iteration: 47267
loss: 1.0180383920669556,grad_norm: 0.9999992176477245, iteration: 47268
loss: 0.991725742816925,grad_norm: 0.999999456060556, iteration: 47269
loss: 1.00626802444458,grad_norm: 0.9999991571686152, iteration: 47270
loss: 0.99325031042099,grad_norm: 0.9056445590263563, iteration: 47271
loss: 0.9997657537460327,grad_norm: 0.830201513475981, iteration: 47272
loss: 1.0307220220565796,grad_norm: 0.957506121582103, iteration: 47273
loss: 1.0016733407974243,grad_norm: 0.9999989026093787, iteration: 47274
loss: 1.0235475301742554,grad_norm: 0.9999991238253529, iteration: 47275
loss: 0.9890022277832031,grad_norm: 0.8794308311951062, iteration: 47276
loss: 1.0119216442108154,grad_norm: 0.999999038980695, iteration: 47277
loss: 1.0152047872543335,grad_norm: 0.9416926866656616, iteration: 47278
loss: 1.0060398578643799,grad_norm: 0.9924856698764002, iteration: 47279
loss: 1.000686764717102,grad_norm: 0.9564088651896216, iteration: 47280
loss: 1.0098267793655396,grad_norm: 0.9744966356035691, iteration: 47281
loss: 1.0193947553634644,grad_norm: 0.8857150528356603, iteration: 47282
loss: 0.9884569048881531,grad_norm: 0.999999231264165, iteration: 47283
loss: 1.017516851425171,grad_norm: 0.9999991069947518, iteration: 47284
loss: 0.9793276190757751,grad_norm: 0.9967699509078907, iteration: 47285
loss: 0.9875928163528442,grad_norm: 0.8563584427475489, iteration: 47286
loss: 1.0186461210250854,grad_norm: 0.9999995164303243, iteration: 47287
loss: 1.0079703330993652,grad_norm: 0.8249490466259793, iteration: 47288
loss: 0.9627925753593445,grad_norm: 0.9385610380471244, iteration: 47289
loss: 1.0191974639892578,grad_norm: 0.9999993185067502, iteration: 47290
loss: 1.0154222249984741,grad_norm: 0.9999996058887853, iteration: 47291
loss: 0.9826655387878418,grad_norm: 0.99999916173962, iteration: 47292
loss: 1.028985857963562,grad_norm: 0.9397728231243391, iteration: 47293
loss: 0.9949842691421509,grad_norm: 0.9799669763619986, iteration: 47294
loss: 0.999502956867218,grad_norm: 0.7736141832289261, iteration: 47295
loss: 0.9972233772277832,grad_norm: 0.9999992961457906, iteration: 47296
loss: 1.009233832359314,grad_norm: 0.9639065275973526, iteration: 47297
loss: 1.0186494588851929,grad_norm: 0.999999062978498, iteration: 47298
loss: 1.0022616386413574,grad_norm: 0.8807633907950411, iteration: 47299
loss: 1.0055588483810425,grad_norm: 0.9999991754284964, iteration: 47300
loss: 1.0336370468139648,grad_norm: 0.9999992638843552, iteration: 47301
loss: 0.987525463104248,grad_norm: 0.9377988839091183, iteration: 47302
loss: 1.0047991275787354,grad_norm: 0.9554563381855052, iteration: 47303
loss: 1.0019176006317139,grad_norm: 0.882777928429954, iteration: 47304
loss: 1.0410988330841064,grad_norm: 0.999999204116265, iteration: 47305
loss: 0.9939247369766235,grad_norm: 0.999999163126797, iteration: 47306
loss: 1.0074981451034546,grad_norm: 0.8647512667257601, iteration: 47307
loss: 1.0343163013458252,grad_norm: 0.9999992320253704, iteration: 47308
loss: 1.0058634281158447,grad_norm: 0.9475911242768821, iteration: 47309
loss: 1.0033798217773438,grad_norm: 0.8850154225235068, iteration: 47310
loss: 1.00645112991333,grad_norm: 0.9575219326476392, iteration: 47311
loss: 1.0020897388458252,grad_norm: 0.9519069659841025, iteration: 47312
loss: 0.9797224998474121,grad_norm: 0.8496336973135121, iteration: 47313
loss: 1.0400007963180542,grad_norm: 0.9232187636518495, iteration: 47314
loss: 1.065675973892212,grad_norm: 0.9999994048987212, iteration: 47315
loss: 1.0488717555999756,grad_norm: 0.9999992658231105, iteration: 47316
loss: 0.9565332531929016,grad_norm: 0.941975638969354, iteration: 47317
loss: 1.0199023485183716,grad_norm: 0.8887777191755729, iteration: 47318
loss: 1.0374053716659546,grad_norm: 0.8279783224978464, iteration: 47319
loss: 1.009331464767456,grad_norm: 0.9999990317901323, iteration: 47320
loss: 0.9970631003379822,grad_norm: 0.9669496538874311, iteration: 47321
loss: 1.0120497941970825,grad_norm: 0.9999991734313167, iteration: 47322
loss: 1.003354549407959,grad_norm: 0.9564171715106184, iteration: 47323
loss: 1.0051045417785645,grad_norm: 0.8372892862019196, iteration: 47324
loss: 0.9737511277198792,grad_norm: 0.9843976257264421, iteration: 47325
loss: 1.0253796577453613,grad_norm: 0.9999989859552967, iteration: 47326
loss: 0.9910722374916077,grad_norm: 0.7604678100142968, iteration: 47327
loss: 1.029782772064209,grad_norm: 0.9510099156730367, iteration: 47328
loss: 0.9855048656463623,grad_norm: 0.8983601357141706, iteration: 47329
loss: 1.1352641582489014,grad_norm: 0.9999991154883089, iteration: 47330
loss: 0.9927537441253662,grad_norm: 0.8892359771481634, iteration: 47331
loss: 1.0702346563339233,grad_norm: 0.9999993086590465, iteration: 47332
loss: 1.0303187370300293,grad_norm: 0.9999991811904705, iteration: 47333
loss: 1.005491018295288,grad_norm: 0.9223289537290047, iteration: 47334
loss: 1.0071467161178589,grad_norm: 0.9999992628780432, iteration: 47335
loss: 0.9973043203353882,grad_norm: 0.9852572545523922, iteration: 47336
loss: 0.9773576259613037,grad_norm: 0.9225876788270485, iteration: 47337
loss: 1.0119328498840332,grad_norm: 0.9436614146541618, iteration: 47338
loss: 0.9987245798110962,grad_norm: 0.9999991041914364, iteration: 47339
loss: 1.0048882961273193,grad_norm: 0.8099157397436832, iteration: 47340
loss: 0.9793314933776855,grad_norm: 0.94044435818531, iteration: 47341
loss: 0.9724079370498657,grad_norm: 0.9999991809223809, iteration: 47342
loss: 0.9968755841255188,grad_norm: 0.9999994065782919, iteration: 47343
loss: 1.0200060606002808,grad_norm: 0.9686659550366068, iteration: 47344
loss: 1.0030893087387085,grad_norm: 0.905257475128968, iteration: 47345
loss: 1.0135822296142578,grad_norm: 0.9616881995904133, iteration: 47346
loss: 0.9687466025352478,grad_norm: 0.843904252961389, iteration: 47347
loss: 1.0318801403045654,grad_norm: 0.9999994070137266, iteration: 47348
loss: 0.9812530279159546,grad_norm: 0.9999991428249798, iteration: 47349
loss: 1.0100071430206299,grad_norm: 0.9170105494380123, iteration: 47350
loss: 1.0063557624816895,grad_norm: 0.9999999871585032, iteration: 47351
loss: 1.0216134786605835,grad_norm: 0.9999991721618755, iteration: 47352
loss: 0.9505999088287354,grad_norm: 0.9345667324448057, iteration: 47353
loss: 1.0015037059783936,grad_norm: 0.8979140002118282, iteration: 47354
loss: 1.007851004600525,grad_norm: 0.8792578012925581, iteration: 47355
loss: 1.0002528429031372,grad_norm: 0.8857912875057428, iteration: 47356
loss: 1.0203901529312134,grad_norm: 0.9955052284696901, iteration: 47357
loss: 1.0803965330123901,grad_norm: 0.999999303905186, iteration: 47358
loss: 0.9914379119873047,grad_norm: 0.8925528534171344, iteration: 47359
loss: 0.9923405647277832,grad_norm: 0.999999234858247, iteration: 47360
loss: 0.9738035798072815,grad_norm: 0.9096669893869412, iteration: 47361
loss: 1.0831502676010132,grad_norm: 0.999999099678583, iteration: 47362
loss: 1.010359525680542,grad_norm: 0.8987374663870443, iteration: 47363
loss: 0.9821684956550598,grad_norm: 0.9999996635772452, iteration: 47364
loss: 1.0283591747283936,grad_norm: 0.8362481618893803, iteration: 47365
loss: 0.9973418116569519,grad_norm: 0.9561029736528605, iteration: 47366
loss: 0.973724365234375,grad_norm: 0.8854375580115367, iteration: 47367
loss: 1.009992003440857,grad_norm: 0.9999991113922667, iteration: 47368
loss: 1.0293068885803223,grad_norm: 0.972681868539784, iteration: 47369
loss: 0.9671352505683899,grad_norm: 0.9221711987586705, iteration: 47370
loss: 1.139234185218811,grad_norm: 0.9999994290327723, iteration: 47371
loss: 1.0026495456695557,grad_norm: 0.9999990067505976, iteration: 47372
loss: 1.0768980979919434,grad_norm: 0.999999898521997, iteration: 47373
loss: 1.0079121589660645,grad_norm: 0.9999991284101695, iteration: 47374
loss: 1.0170018672943115,grad_norm: 0.9999997214635964, iteration: 47375
loss: 1.010910987854004,grad_norm: 0.9999995486199722, iteration: 47376
loss: 0.9840392470359802,grad_norm: 0.8590288661581447, iteration: 47377
loss: 1.0277820825576782,grad_norm: 0.9999999636009478, iteration: 47378
loss: 1.0596548318862915,grad_norm: 0.9999997488492791, iteration: 47379
loss: 0.9960052371025085,grad_norm: 0.8478069444107076, iteration: 47380
loss: 1.005730390548706,grad_norm: 0.9782409796828784, iteration: 47381
loss: 1.0195437669754028,grad_norm: 0.99999912916676, iteration: 47382
loss: 1.037070870399475,grad_norm: 0.9783473275114827, iteration: 47383
loss: 1.006143569946289,grad_norm: 0.9094158486732596, iteration: 47384
loss: 1.0033317804336548,grad_norm: 0.9999992684954582, iteration: 47385
loss: 1.0624414682388306,grad_norm: 0.9999992654172092, iteration: 47386
loss: 1.003442645072937,grad_norm: 0.9999996836232011, iteration: 47387
loss: 1.0100010633468628,grad_norm: 0.9999991368037382, iteration: 47388
loss: 1.0026895999908447,grad_norm: 0.7708768595869951, iteration: 47389
loss: 1.0149153470993042,grad_norm: 0.9999993266842353, iteration: 47390
loss: 1.009016752243042,grad_norm: 0.9502421597880019, iteration: 47391
loss: 1.0073410272598267,grad_norm: 0.9999998766185819, iteration: 47392
loss: 0.9754900336265564,grad_norm: 0.9444599605981978, iteration: 47393
loss: 0.9682512879371643,grad_norm: 0.9945992779417195, iteration: 47394
loss: 0.9669301509857178,grad_norm: 0.9999991676072321, iteration: 47395
loss: 1.0229605436325073,grad_norm: 0.9999992829206719, iteration: 47396
loss: 0.9981613159179688,grad_norm: 0.9999990907400038, iteration: 47397
loss: 1.2311705350875854,grad_norm: 0.9999997994799384, iteration: 47398
loss: 1.037333369255066,grad_norm: 0.9999996540938406, iteration: 47399
loss: 1.064027190208435,grad_norm: 0.999999052801285, iteration: 47400
loss: 1.0033068656921387,grad_norm: 0.7920154274128964, iteration: 47401
loss: 0.9736863374710083,grad_norm: 0.9043302155476747, iteration: 47402
loss: 1.0323611497879028,grad_norm: 0.9999990108431597, iteration: 47403
loss: 0.9835428595542908,grad_norm: 0.9522949352182296, iteration: 47404
loss: 0.988981306552887,grad_norm: 0.9999992954410369, iteration: 47405
loss: 0.9873561263084412,grad_norm: 0.9189691096999109, iteration: 47406
loss: 0.9956813454627991,grad_norm: 0.9999990876417959, iteration: 47407
loss: 0.9916387796401978,grad_norm: 0.9998276730787704, iteration: 47408
loss: 1.1047232151031494,grad_norm: 0.9999998873804484, iteration: 47409
loss: 1.045982003211975,grad_norm: 0.9999992633143581, iteration: 47410
loss: 1.0544242858886719,grad_norm: 0.9999998121295878, iteration: 47411
loss: 1.0174767971038818,grad_norm: 0.9999992066667197, iteration: 47412
loss: 0.994012176990509,grad_norm: 0.7675044743809989, iteration: 47413
loss: 1.1361000537872314,grad_norm: 0.9999997910409673, iteration: 47414
loss: 0.9920824766159058,grad_norm: 0.8579922942443919, iteration: 47415
loss: 1.0299395322799683,grad_norm: 0.9999994702598705, iteration: 47416
loss: 0.9853095412254333,grad_norm: 0.8898144222596372, iteration: 47417
loss: 0.9927181601524353,grad_norm: 0.9999992221865156, iteration: 47418
loss: 1.0141838788986206,grad_norm: 0.9999991786350076, iteration: 47419
loss: 0.9985997080802917,grad_norm: 0.9850004927677306, iteration: 47420
loss: 1.0200272798538208,grad_norm: 0.9999993846134699, iteration: 47421
loss: 0.9920107126235962,grad_norm: 0.9999990614185579, iteration: 47422
loss: 1.0529118776321411,grad_norm: 0.9431628840915595, iteration: 47423
loss: 0.9939671754837036,grad_norm: 0.9999991919616295, iteration: 47424
loss: 0.9857906699180603,grad_norm: 0.945482291888639, iteration: 47425
loss: 0.9831271767616272,grad_norm: 0.9648414892499896, iteration: 47426
loss: 1.0367822647094727,grad_norm: 0.9999992706127813, iteration: 47427
loss: 0.9907912611961365,grad_norm: 0.9999991525163522, iteration: 47428
loss: 1.0063120126724243,grad_norm: 0.9999991884516248, iteration: 47429
loss: 1.0917444229125977,grad_norm: 0.9999999651504067, iteration: 47430
loss: 1.0018181800842285,grad_norm: 0.8260910674533493, iteration: 47431
loss: 1.0627403259277344,grad_norm: 0.9999991107666337, iteration: 47432
loss: 1.060701847076416,grad_norm: 0.9999993716910619, iteration: 47433
loss: 1.0050228834152222,grad_norm: 0.999999057584176, iteration: 47434
loss: 0.9974817633628845,grad_norm: 0.9999991769738894, iteration: 47435
loss: 1.0561615228652954,grad_norm: 0.9999992373142785, iteration: 47436
loss: 0.9482744336128235,grad_norm: 0.9999991376976441, iteration: 47437
loss: 1.0153172016143799,grad_norm: 0.9999995814914961, iteration: 47438
loss: 1.0138847827911377,grad_norm: 0.9999991788798764, iteration: 47439
loss: 0.9995513558387756,grad_norm: 0.9999996742673325, iteration: 47440
loss: 1.0073459148406982,grad_norm: 0.9764376850770995, iteration: 47441
loss: 1.0332082509994507,grad_norm: 0.7564669354926451, iteration: 47442
loss: 1.029084324836731,grad_norm: 0.9999990886486296, iteration: 47443
loss: 0.9902756214141846,grad_norm: 0.9398061994430655, iteration: 47444
loss: 0.9613727927207947,grad_norm: 0.9999990283749487, iteration: 47445
loss: 1.0188645124435425,grad_norm: 0.9999990394954453, iteration: 47446
loss: 1.0044769048690796,grad_norm: 0.9999990406555138, iteration: 47447
loss: 0.9952512383460999,grad_norm: 0.9999994458017291, iteration: 47448
loss: 1.0079432725906372,grad_norm: 0.9612952189539252, iteration: 47449
loss: 1.016656517982483,grad_norm: 0.9412589288962433, iteration: 47450
loss: 1.0042338371276855,grad_norm: 0.9359959466868178, iteration: 47451
loss: 1.0277308225631714,grad_norm: 0.9999990558999603, iteration: 47452
loss: 1.0026687383651733,grad_norm: 0.9278442448247236, iteration: 47453
loss: 1.0297865867614746,grad_norm: 0.955414502911478, iteration: 47454
loss: 1.0195426940917969,grad_norm: 0.9999996835747317, iteration: 47455
loss: 0.9672955870628357,grad_norm: 0.9653753436318987, iteration: 47456
loss: 1.011898159980774,grad_norm: 0.9055000866032449, iteration: 47457
loss: 1.0420374870300293,grad_norm: 0.8918402431276796, iteration: 47458
loss: 0.9803552031517029,grad_norm: 0.8870089377018398, iteration: 47459
loss: 1.0859936475753784,grad_norm: 0.9999995999768783, iteration: 47460
loss: 1.0007213354110718,grad_norm: 0.9824026747450049, iteration: 47461
loss: 1.0000392198562622,grad_norm: 0.8397628372662579, iteration: 47462
loss: 1.0123801231384277,grad_norm: 0.917034543593061, iteration: 47463
loss: 0.987860381603241,grad_norm: 0.999999848271808, iteration: 47464
loss: 1.0962320566177368,grad_norm: 0.999999113555346, iteration: 47465
loss: 1.0041335821151733,grad_norm: 0.9999994710130233, iteration: 47466
loss: 1.0182985067367554,grad_norm: 0.9999992135600606, iteration: 47467
loss: 1.0080844163894653,grad_norm: 0.8711581925032608, iteration: 47468
loss: 1.028347373008728,grad_norm: 0.981909717503136, iteration: 47469
loss: 0.9997768402099609,grad_norm: 0.9849409729884652, iteration: 47470
loss: 1.026332974433899,grad_norm: 0.9999991844008059, iteration: 47471
loss: 1.103332757949829,grad_norm: 0.9999997451250914, iteration: 47472
loss: 0.9759997129440308,grad_norm: 0.8994866779007857, iteration: 47473
loss: 1.0559860467910767,grad_norm: 0.9999990991488545, iteration: 47474
loss: 1.0540738105773926,grad_norm: 0.9999994162695305, iteration: 47475
loss: 1.1361873149871826,grad_norm: 0.999999933756327, iteration: 47476
loss: 1.1917691230773926,grad_norm: 0.9999992576996757, iteration: 47477
loss: 1.038048505783081,grad_norm: 0.9999996811809291, iteration: 47478
loss: 1.0675967931747437,grad_norm: 0.9999991114152602, iteration: 47479
loss: 1.0326100587844849,grad_norm: 0.8353870933356748, iteration: 47480
loss: 1.0229891538619995,grad_norm: 0.9206974282860038, iteration: 47481
loss: 1.0525604486465454,grad_norm: 0.9553744683207074, iteration: 47482
loss: 1.0072122812271118,grad_norm: 0.9999991907105019, iteration: 47483
loss: 1.0053821802139282,grad_norm: 0.8980827395300017, iteration: 47484
loss: 1.0282886028289795,grad_norm: 0.7720408049689244, iteration: 47485
loss: 1.0324561595916748,grad_norm: 0.8605344079158607, iteration: 47486
loss: 1.0681304931640625,grad_norm: 0.924305295601379, iteration: 47487
loss: 1.0467978715896606,grad_norm: 0.8871804319838684, iteration: 47488
loss: 1.0204238891601562,grad_norm: 0.9661863802054187, iteration: 47489
loss: 1.0120606422424316,grad_norm: 0.890044061167268, iteration: 47490
loss: 1.021615982055664,grad_norm: 0.999999082592296, iteration: 47491
loss: 1.0150022506713867,grad_norm: 0.8432177674244462, iteration: 47492
loss: 1.0040524005889893,grad_norm: 0.7110805022980837, iteration: 47493
loss: 1.0188063383102417,grad_norm: 0.9999998977528064, iteration: 47494
loss: 1.0471608638763428,grad_norm: 0.9999993593684142, iteration: 47495
loss: 1.0253281593322754,grad_norm: 0.9999991567434096, iteration: 47496
loss: 1.008967638015747,grad_norm: 0.9772278053989593, iteration: 47497
loss: 0.9834002256393433,grad_norm: 0.8816987003868345, iteration: 47498
loss: 0.9788674712181091,grad_norm: 0.9144749833741622, iteration: 47499
loss: 1.0162304639816284,grad_norm: 0.9999992455002669, iteration: 47500
loss: 1.0122272968292236,grad_norm: 0.8733373205735516, iteration: 47501
loss: 1.027437686920166,grad_norm: 0.9999989906880343, iteration: 47502
loss: 1.000362515449524,grad_norm: 0.9999991350133323, iteration: 47503
loss: 0.9962844848632812,grad_norm: 0.9856808657210666, iteration: 47504
loss: 1.063662052154541,grad_norm: 0.9976931277830621, iteration: 47505
loss: 1.0002111196517944,grad_norm: 0.9999992103271214, iteration: 47506
loss: 1.04132878780365,grad_norm: 0.8915451540630482, iteration: 47507
loss: 0.9811241626739502,grad_norm: 0.9999990435586406, iteration: 47508
loss: 1.0652369260787964,grad_norm: 0.9999997103970932, iteration: 47509
loss: 0.9856945276260376,grad_norm: 0.9891540507826333, iteration: 47510
loss: 1.0016014575958252,grad_norm: 0.9999991137116253, iteration: 47511
loss: 0.9669488072395325,grad_norm: 0.9999990564257878, iteration: 47512
loss: 0.9762521982192993,grad_norm: 0.9927322219119289, iteration: 47513
loss: 0.98508220911026,grad_norm: 0.9999990642842137, iteration: 47514
loss: 1.0623031854629517,grad_norm: 0.9580331849220723, iteration: 47515
loss: 0.99472576379776,grad_norm: 0.999999299207755, iteration: 47516
loss: 1.011279821395874,grad_norm: 0.9999995398567579, iteration: 47517
loss: 1.0048787593841553,grad_norm: 0.8883822870477368, iteration: 47518
loss: 0.9968042373657227,grad_norm: 0.9033097691785756, iteration: 47519
loss: 0.9990248680114746,grad_norm: 0.9999996146821291, iteration: 47520
loss: 0.9875978827476501,grad_norm: 0.9999992711718964, iteration: 47521
loss: 1.0151125192642212,grad_norm: 0.804785336818814, iteration: 47522
loss: 0.9923904538154602,grad_norm: 0.9048150559066517, iteration: 47523
loss: 0.974907398223877,grad_norm: 0.9951416134677558, iteration: 47524
loss: 0.9876178503036499,grad_norm: 0.9095419342374866, iteration: 47525
loss: 1.0105406045913696,grad_norm: 0.9342439495905301, iteration: 47526
loss: 1.022800087928772,grad_norm: 0.9999991678160661, iteration: 47527
loss: 1.0125349760055542,grad_norm: 0.9999998156256295, iteration: 47528
loss: 0.982097864151001,grad_norm: 0.884742904986341, iteration: 47529
loss: 0.9883707165718079,grad_norm: 0.9638533051394608, iteration: 47530
loss: 1.0095142126083374,grad_norm: 0.759499606906534, iteration: 47531
loss: 1.0640286207199097,grad_norm: 0.9999999493964051, iteration: 47532
loss: 0.9829234480857849,grad_norm: 0.9999991286446965, iteration: 47533
loss: 0.9674975275993347,grad_norm: 0.9999990469687262, iteration: 47534
loss: 1.0505685806274414,grad_norm: 0.9999992815995257, iteration: 47535
loss: 1.0051004886627197,grad_norm: 0.9999995478126924, iteration: 47536
loss: 0.9919182658195496,grad_norm: 0.896765744414941, iteration: 47537
loss: 1.011153221130371,grad_norm: 0.9999992468978524, iteration: 47538
loss: 0.9988235235214233,grad_norm: 0.9454829445466151, iteration: 47539
loss: 0.9787985682487488,grad_norm: 0.9999995159139553, iteration: 47540
loss: 1.0017421245574951,grad_norm: 0.8732525794395857, iteration: 47541
loss: 1.0414918661117554,grad_norm: 0.9999998190339128, iteration: 47542
loss: 1.001941204071045,grad_norm: 0.9999993324134905, iteration: 47543
loss: 1.0339115858078003,grad_norm: 0.8738468055909726, iteration: 47544
loss: 0.9700311422348022,grad_norm: 0.7728586229641774, iteration: 47545
loss: 1.0372000932693481,grad_norm: 0.9999996020455256, iteration: 47546
loss: 0.9951220154762268,grad_norm: 0.9999990518497615, iteration: 47547
loss: 1.0119225978851318,grad_norm: 0.9999990190329204, iteration: 47548
loss: 1.0299054384231567,grad_norm: 0.9999989935278611, iteration: 47549
loss: 0.9816990494728088,grad_norm: 0.9210874226669946, iteration: 47550
loss: 0.9956656098365784,grad_norm: 0.9999992231392729, iteration: 47551
loss: 0.9866102933883667,grad_norm: 0.9999991484222168, iteration: 47552
loss: 0.93630450963974,grad_norm: 0.9772848286825759, iteration: 47553
loss: 1.0152099132537842,grad_norm: 0.7610890258739298, iteration: 47554
loss: 1.050323724746704,grad_norm: 0.9999997372992775, iteration: 47555
loss: 1.0508941411972046,grad_norm: 1.0000000281029553, iteration: 47556
loss: 0.9850742220878601,grad_norm: 0.7340727606174156, iteration: 47557
loss: 0.9610485434532166,grad_norm: 0.9534442795976171, iteration: 47558
loss: 0.9924280643463135,grad_norm: 0.8903526061969406, iteration: 47559
loss: 0.986262321472168,grad_norm: 0.9860309761252694, iteration: 47560
loss: 1.0100466012954712,grad_norm: 0.8555733384829309, iteration: 47561
loss: 0.9793484807014465,grad_norm: 0.9999998336486827, iteration: 47562
loss: 1.1027967929840088,grad_norm: 0.9999992812349519, iteration: 47563
loss: 1.0199472904205322,grad_norm: 0.9602462321116969, iteration: 47564
loss: 1.008524775505066,grad_norm: 0.9999996775882697, iteration: 47565
loss: 0.9795119166374207,grad_norm: 0.9999988543122996, iteration: 47566
loss: 0.9878206253051758,grad_norm: 0.9999989911697038, iteration: 47567
loss: 1.0192031860351562,grad_norm: 0.9703958324180169, iteration: 47568
loss: 0.9983252882957458,grad_norm: 0.9999991723886852, iteration: 47569
loss: 0.9924565553665161,grad_norm: 0.9999991139345372, iteration: 47570
loss: 1.0479027032852173,grad_norm: 0.999999341035224, iteration: 47571
loss: 1.0724130868911743,grad_norm: 0.999999159818502, iteration: 47572
loss: 0.9902627468109131,grad_norm: 0.9999990637253047, iteration: 47573
loss: 1.004353642463684,grad_norm: 0.8884770632059951, iteration: 47574
loss: 1.1221057176589966,grad_norm: 0.965937388439897, iteration: 47575
loss: 1.0425776243209839,grad_norm: 0.9999991581020152, iteration: 47576
loss: 1.0550001859664917,grad_norm: 0.9999997987395447, iteration: 47577
loss: 1.0392452478408813,grad_norm: 0.9999996801436415, iteration: 47578
loss: 1.0802797079086304,grad_norm: 0.9999998388667045, iteration: 47579
loss: 1.0340700149536133,grad_norm: 0.9999993348750381, iteration: 47580
loss: 1.0481677055358887,grad_norm: 0.999999467465551, iteration: 47581
loss: 1.0334511995315552,grad_norm: 0.9858791038057476, iteration: 47582
loss: 1.0319288969039917,grad_norm: 0.9999996218530489, iteration: 47583
loss: 1.0619901418685913,grad_norm: 0.9999993850760015, iteration: 47584
loss: 0.9947767853736877,grad_norm: 0.8553553779694295, iteration: 47585
loss: 1.005826711654663,grad_norm: 0.6899593474951904, iteration: 47586
loss: 0.9779011607170105,grad_norm: 0.99999924322065, iteration: 47587
loss: 1.0228490829467773,grad_norm: 0.9999990982137953, iteration: 47588
loss: 1.0013551712036133,grad_norm: 0.9999996179494635, iteration: 47589
loss: 0.9820632338523865,grad_norm: 0.8686605722849277, iteration: 47590
loss: 0.9682920575141907,grad_norm: 0.9128835396680874, iteration: 47591
loss: 0.9930831789970398,grad_norm: 0.999999169344733, iteration: 47592
loss: 1.001702904701233,grad_norm: 0.9849887173986902, iteration: 47593
loss: 1.0130776166915894,grad_norm: 0.9999992892777286, iteration: 47594
loss: 0.9798799753189087,grad_norm: 0.9999990426576197, iteration: 47595
loss: 1.0064133405685425,grad_norm: 0.9449978029470119, iteration: 47596
loss: 0.985424816608429,grad_norm: 0.9999990691233808, iteration: 47597
loss: 1.029177188873291,grad_norm: 0.9999994676204884, iteration: 47598
loss: 1.003127098083496,grad_norm: 0.977819528254548, iteration: 47599
loss: 1.0129972696304321,grad_norm: 0.9999991978583964, iteration: 47600
loss: 1.0549116134643555,grad_norm: 0.9999997339010839, iteration: 47601
loss: 1.02891206741333,grad_norm: 0.9999989959642067, iteration: 47602
loss: 0.9848564863204956,grad_norm: 0.8869289445249643, iteration: 47603
loss: 0.9983951449394226,grad_norm: 0.9072999059906176, iteration: 47604
loss: 1.0890491008758545,grad_norm: 0.9999993165301141, iteration: 47605
loss: 1.0649141073226929,grad_norm: 0.999999878142958, iteration: 47606
loss: 1.0393608808517456,grad_norm: 0.9999992929009686, iteration: 47607
loss: 1.0630041360855103,grad_norm: 0.9999996809073632, iteration: 47608
loss: 0.9854364395141602,grad_norm: 0.9999992622716269, iteration: 47609
loss: 1.064998984336853,grad_norm: 0.9999995359264496, iteration: 47610
loss: 1.0566437244415283,grad_norm: 0.9999992520740447, iteration: 47611
loss: 1.0492709875106812,grad_norm: 0.9515373804232045, iteration: 47612
loss: 1.0019769668579102,grad_norm: 0.9999990798343571, iteration: 47613
loss: 1.0036466121673584,grad_norm: 0.9999993003332729, iteration: 47614
loss: 0.9363523721694946,grad_norm: 0.9293314479074962, iteration: 47615
loss: 1.076048493385315,grad_norm: 0.9999996736554626, iteration: 47616
loss: 0.9870156645774841,grad_norm: 0.9818362418789849, iteration: 47617
loss: 1.0234053134918213,grad_norm: 0.9837115389137652, iteration: 47618
loss: 1.0544759035110474,grad_norm: 0.9999995546569832, iteration: 47619
loss: 1.021449089050293,grad_norm: 0.9999992814665807, iteration: 47620
loss: 1.0088006258010864,grad_norm: 0.9999990924438087, iteration: 47621
loss: 0.9824248552322388,grad_norm: 0.8916524117657231, iteration: 47622
loss: 1.0047770738601685,grad_norm: 0.8889333757674582, iteration: 47623
loss: 0.9846399426460266,grad_norm: 0.9999993919922906, iteration: 47624
loss: 1.006407380104065,grad_norm: 0.9362237248916639, iteration: 47625
loss: 1.0085557699203491,grad_norm: 0.9305977842720944, iteration: 47626
loss: 0.9879997968673706,grad_norm: 0.9999991815252363, iteration: 47627
loss: 1.0509439706802368,grad_norm: 0.9945525438202546, iteration: 47628
loss: 0.9822173118591309,grad_norm: 0.8806131303448154, iteration: 47629
loss: 0.9903956651687622,grad_norm: 0.9642271680782689, iteration: 47630
loss: 0.993497908115387,grad_norm: 0.9199985079396653, iteration: 47631
loss: 1.0244134664535522,grad_norm: 0.9999996143759576, iteration: 47632
loss: 1.0408493280410767,grad_norm: 0.9999994153986168, iteration: 47633
loss: 1.0067710876464844,grad_norm: 0.999999073297908, iteration: 47634
loss: 0.9928986430168152,grad_norm: 0.9195873628113698, iteration: 47635
loss: 1.032724380493164,grad_norm: 0.9291162012743465, iteration: 47636
loss: 0.9802405834197998,grad_norm: 0.9710320246526244, iteration: 47637
loss: 0.9604798555374146,grad_norm: 0.9999989792089521, iteration: 47638
loss: 1.0070403814315796,grad_norm: 0.8773035211400317, iteration: 47639
loss: 1.0018699169158936,grad_norm: 0.8064202917673966, iteration: 47640
loss: 1.0309072732925415,grad_norm: 0.9999992275064088, iteration: 47641
loss: 0.979868471622467,grad_norm: 0.9999990209189372, iteration: 47642
loss: 0.9869145750999451,grad_norm: 0.964405097177299, iteration: 47643
loss: 0.9874350428581238,grad_norm: 0.9513338825241419, iteration: 47644
loss: 1.019840955734253,grad_norm: 0.9999996476020753, iteration: 47645
loss: 1.0531316995620728,grad_norm: 0.9999994091526248, iteration: 47646
loss: 1.0511690378189087,grad_norm: 0.9999990603492539, iteration: 47647
loss: 0.9842422604560852,grad_norm: 0.9339588932128244, iteration: 47648
loss: 1.1001907587051392,grad_norm: 0.9999996472891441, iteration: 47649
loss: 1.0353407859802246,grad_norm: 0.9999991844955959, iteration: 47650
loss: 1.0107831954956055,grad_norm: 0.999999345662843, iteration: 47651
loss: 1.0441217422485352,grad_norm: 0.7750911870491355, iteration: 47652
loss: 0.9889454245567322,grad_norm: 0.9999991340337454, iteration: 47653
loss: 1.0163682699203491,grad_norm: 0.999999631171489, iteration: 47654
loss: 0.9981907606124878,grad_norm: 1.000000021361783, iteration: 47655
loss: 1.0182063579559326,grad_norm: 0.9999993583804698, iteration: 47656
loss: 0.9923405647277832,grad_norm: 0.9999991688103718, iteration: 47657
loss: 1.0523260831832886,grad_norm: 0.9999998214228514, iteration: 47658
loss: 0.9812579154968262,grad_norm: 0.8741656982033029, iteration: 47659
loss: 1.0028362274169922,grad_norm: 0.9999991212500627, iteration: 47660
loss: 1.1495473384857178,grad_norm: 0.9999995788571486, iteration: 47661
loss: 0.962668240070343,grad_norm: 0.9999992150507366, iteration: 47662
loss: 1.0211572647094727,grad_norm: 0.9032768921685874, iteration: 47663
loss: 0.9916248917579651,grad_norm: 0.7786838146035526, iteration: 47664
loss: 0.9685412049293518,grad_norm: 0.9027860916374041, iteration: 47665
loss: 1.036896824836731,grad_norm: 0.9999991237926865, iteration: 47666
loss: 1.129639983177185,grad_norm: 0.957677014941427, iteration: 47667
loss: 1.035876750946045,grad_norm: 0.9999993630047256, iteration: 47668
loss: 0.9676112532615662,grad_norm: 0.9869808422938415, iteration: 47669
loss: 0.9679325819015503,grad_norm: 0.9608470595568076, iteration: 47670
loss: 0.9929882287979126,grad_norm: 0.8377319219552971, iteration: 47671
loss: 1.0289040803909302,grad_norm: 0.9999991269936127, iteration: 47672
loss: 1.0061321258544922,grad_norm: 0.9999992161513731, iteration: 47673
loss: 1.0290467739105225,grad_norm: 0.9999991691454562, iteration: 47674
loss: 1.0088751316070557,grad_norm: 0.9999991684571099, iteration: 47675
loss: 0.9918783903121948,grad_norm: 0.9295886018604683, iteration: 47676
loss: 1.0456455945968628,grad_norm: 0.9999991620111494, iteration: 47677
loss: 0.996451735496521,grad_norm: 0.99999914102557, iteration: 47678
loss: 1.0346262454986572,grad_norm: 0.9999995575765865, iteration: 47679
loss: 1.024431824684143,grad_norm: 0.9999991101207233, iteration: 47680
loss: 0.9900250434875488,grad_norm: 0.9999989377114179, iteration: 47681
loss: 1.035513162612915,grad_norm: 0.9999991418929159, iteration: 47682
loss: 1.0508545637130737,grad_norm: 0.9999991237944537, iteration: 47683
loss: 0.9566419720649719,grad_norm: 0.9999990735123878, iteration: 47684
loss: 0.9921951293945312,grad_norm: 0.9671443621886622, iteration: 47685
loss: 1.0221894979476929,grad_norm: 0.9999990994336251, iteration: 47686
loss: 1.0696676969528198,grad_norm: 0.9999991641841635, iteration: 47687
loss: 1.0135135650634766,grad_norm: 0.8819396996824372, iteration: 47688
loss: 1.1087638139724731,grad_norm: 0.999999096607545, iteration: 47689
loss: 1.0144721269607544,grad_norm: 0.9999992714051364, iteration: 47690
loss: 1.0568840503692627,grad_norm: 0.9999995409094259, iteration: 47691
loss: 1.0188636779785156,grad_norm: 0.9999998967204198, iteration: 47692
loss: 0.981241762638092,grad_norm: 0.9999993679218161, iteration: 47693
loss: 1.072202205657959,grad_norm: 0.9999990445466569, iteration: 47694
loss: 1.0222547054290771,grad_norm: 0.9999993597120301, iteration: 47695
loss: 0.9977594017982483,grad_norm: 0.7865214827704167, iteration: 47696
loss: 1.049823522567749,grad_norm: 0.9999993158811207, iteration: 47697
loss: 1.003791332244873,grad_norm: 0.9978759596627378, iteration: 47698
loss: 1.003624439239502,grad_norm: 0.9999995863011351, iteration: 47699
loss: 0.9781184196472168,grad_norm: 0.9505299230073043, iteration: 47700
loss: 0.9944811463356018,grad_norm: 0.9610567150985084, iteration: 47701
loss: 1.0072135925292969,grad_norm: 0.9999992194817261, iteration: 47702
loss: 1.0603277683258057,grad_norm: 0.9999995283390942, iteration: 47703
loss: 1.0112767219543457,grad_norm: 0.9413440687472939, iteration: 47704
loss: 1.0321415662765503,grad_norm: 0.9999993020066461, iteration: 47705
loss: 1.0093647241592407,grad_norm: 0.9326146558147648, iteration: 47706
loss: 1.0102112293243408,grad_norm: 0.999999598279668, iteration: 47707
loss: 1.0068334341049194,grad_norm: 0.9999997164345072, iteration: 47708
loss: 1.006711721420288,grad_norm: 0.9999991553204314, iteration: 47709
loss: 0.9906765222549438,grad_norm: 0.9045583862419979, iteration: 47710
loss: 1.021630883216858,grad_norm: 0.9614355479593891, iteration: 47711
loss: 1.0099822282791138,grad_norm: 0.9999992944615581, iteration: 47712
loss: 1.0086828470230103,grad_norm: 0.9999991293341122, iteration: 47713
loss: 1.0718837976455688,grad_norm: 0.9999997717961802, iteration: 47714
loss: 1.0290902853012085,grad_norm: 0.9999996570019297, iteration: 47715
loss: 1.0153412818908691,grad_norm: 0.9999990533096665, iteration: 47716
loss: 0.9959299564361572,grad_norm: 0.8768320154651172, iteration: 47717
loss: 0.9828342795372009,grad_norm: 0.8467218592313818, iteration: 47718
loss: 1.0206469297409058,grad_norm: 0.999998945658896, iteration: 47719
loss: 1.0258179903030396,grad_norm: 0.8810417462676492, iteration: 47720
loss: 0.9697341322898865,grad_norm: 0.8197347429443738, iteration: 47721
loss: 0.9690351486206055,grad_norm: 0.9999993166502069, iteration: 47722
loss: 0.9777737855911255,grad_norm: 0.9999994303462564, iteration: 47723
loss: 1.0082777738571167,grad_norm: 0.9999990670365568, iteration: 47724
loss: 0.9813708066940308,grad_norm: 0.9999990395802203, iteration: 47725
loss: 0.9804467558860779,grad_norm: 0.999999077744217, iteration: 47726
loss: 0.9917048811912537,grad_norm: 0.9999991514503666, iteration: 47727
loss: 0.9846886992454529,grad_norm: 0.9999990998324922, iteration: 47728
loss: 1.014785647392273,grad_norm: 0.9218122449356725, iteration: 47729
loss: 1.0072225332260132,grad_norm: 0.9999994273585748, iteration: 47730
loss: 1.0309830904006958,grad_norm: 0.9999993269381595, iteration: 47731
loss: 0.9929668307304382,grad_norm: 0.9436092819590604, iteration: 47732
loss: 1.085252285003662,grad_norm: 0.9623083063205813, iteration: 47733
loss: 0.9637360572814941,grad_norm: 0.9063916139448356, iteration: 47734
loss: 1.0246992111206055,grad_norm: 0.9999992210036949, iteration: 47735
loss: 0.9923840761184692,grad_norm: 0.8941222848078592, iteration: 47736
loss: 0.9696069359779358,grad_norm: 0.999999091193469, iteration: 47737
loss: 0.9923973679542542,grad_norm: 0.8302785680798045, iteration: 47738
loss: 1.015433430671692,grad_norm: 0.8696302880887623, iteration: 47739
loss: 0.9801352024078369,grad_norm: 0.8908563487563145, iteration: 47740
loss: 1.0658466815948486,grad_norm: 0.9999992507804248, iteration: 47741
loss: 0.9880428314208984,grad_norm: 0.9999990401367517, iteration: 47742
loss: 1.0320725440979004,grad_norm: 0.9999991172793984, iteration: 47743
loss: 0.9566100239753723,grad_norm: 0.999999046989113, iteration: 47744
loss: 1.0320439338684082,grad_norm: 0.9999990780688988, iteration: 47745
loss: 1.0514931678771973,grad_norm: 0.9999998865734352, iteration: 47746
loss: 1.0389394760131836,grad_norm: 0.9999996722283823, iteration: 47747
loss: 1.0918916463851929,grad_norm: 1.0000000125641062, iteration: 47748
loss: 1.0115622282028198,grad_norm: 0.9205958784472872, iteration: 47749
loss: 1.0193263292312622,grad_norm: 0.9362493474617865, iteration: 47750
loss: 1.008665919303894,grad_norm: 0.8515092653991584, iteration: 47751
loss: 1.0105078220367432,grad_norm: 0.999999112383354, iteration: 47752
loss: 0.9894576072692871,grad_norm: 0.9999991706270329, iteration: 47753
loss: 1.0206164121627808,grad_norm: 0.9252419563431609, iteration: 47754
loss: 0.9698196053504944,grad_norm: 0.9227835958715344, iteration: 47755
loss: 0.9888784885406494,grad_norm: 0.9400328614106201, iteration: 47756
loss: 1.0312789678573608,grad_norm: 0.9251703089737339, iteration: 47757
loss: 0.9895937442779541,grad_norm: 0.9999990384955121, iteration: 47758
loss: 1.0509099960327148,grad_norm: 0.9999993056930309, iteration: 47759
loss: 1.0690348148345947,grad_norm: 0.999999131501773, iteration: 47760
loss: 1.0316425561904907,grad_norm: 0.9999995912880421, iteration: 47761
loss: 1.0197874307632446,grad_norm: 0.9999992382122468, iteration: 47762
loss: 1.0040974617004395,grad_norm: 0.8371499241734123, iteration: 47763
loss: 1.021547794342041,grad_norm: 0.783736447411662, iteration: 47764
loss: 0.9673694372177124,grad_norm: 0.9556827845589292, iteration: 47765
loss: 0.9948825240135193,grad_norm: 0.7690156157773359, iteration: 47766
loss: 0.9654811024665833,grad_norm: 0.9465356905567368, iteration: 47767
loss: 1.0246697664260864,grad_norm: 0.9999991829134192, iteration: 47768
loss: 1.0237010717391968,grad_norm: 0.9999992176654681, iteration: 47769
loss: 1.0420242547988892,grad_norm: 0.9999991347660864, iteration: 47770
loss: 1.0065827369689941,grad_norm: 0.794188736897913, iteration: 47771
loss: 1.2309321165084839,grad_norm: 0.9999992871143557, iteration: 47772
loss: 0.9908480644226074,grad_norm: 0.9999992556048434, iteration: 47773
loss: 0.9958731532096863,grad_norm: 0.999999328614538, iteration: 47774
loss: 1.0184543132781982,grad_norm: 0.9999991425766698, iteration: 47775
loss: 1.0332380533218384,grad_norm: 0.9665967073152959, iteration: 47776
loss: 1.0858432054519653,grad_norm: 0.849006207705167, iteration: 47777
loss: 0.9346485733985901,grad_norm: 0.9644351491616846, iteration: 47778
loss: 1.0092551708221436,grad_norm: 0.9999992067691458, iteration: 47779
loss: 1.0224899053573608,grad_norm: 0.8804085838107644, iteration: 47780
loss: 1.0117385387420654,grad_norm: 0.9999991620892247, iteration: 47781
loss: 1.0259140729904175,grad_norm: 0.8942356479665279, iteration: 47782
loss: 0.9892200827598572,grad_norm: 0.99999978944877, iteration: 47783
loss: 0.9782370924949646,grad_norm: 0.8431083469942473, iteration: 47784
loss: 1.01392662525177,grad_norm: 0.9999990172072505, iteration: 47785
loss: 0.9674391746520996,grad_norm: 0.9867559915591452, iteration: 47786
loss: 1.0083191394805908,grad_norm: 0.9999993842746626, iteration: 47787
loss: 1.0369138717651367,grad_norm: 0.9999992949245908, iteration: 47788
loss: 1.0055177211761475,grad_norm: 0.8776940008728608, iteration: 47789
loss: 1.0292145013809204,grad_norm: 0.9357963271925352, iteration: 47790
loss: 1.0312108993530273,grad_norm: 0.9999997336012355, iteration: 47791
loss: 1.020969271659851,grad_norm: 0.9999991773095296, iteration: 47792
loss: 1.054993987083435,grad_norm: 0.999999375149224, iteration: 47793
loss: 1.0031131505966187,grad_norm: 0.9661035951364481, iteration: 47794
loss: 0.9801691770553589,grad_norm: 0.8476582667076906, iteration: 47795
loss: 1.0146174430847168,grad_norm: 0.9843431754909012, iteration: 47796
loss: 0.9874662756919861,grad_norm: 0.9899749537365807, iteration: 47797
loss: 0.9509004354476929,grad_norm: 0.8531925474729786, iteration: 47798
loss: 0.9857997298240662,grad_norm: 0.8607589579980452, iteration: 47799
loss: 1.0072442293167114,grad_norm: 0.9999991327132007, iteration: 47800
loss: 1.0259815454483032,grad_norm: 0.9999993787668895, iteration: 47801
loss: 0.9886732697486877,grad_norm: 0.9336722203214294, iteration: 47802
loss: 1.0292714834213257,grad_norm: 0.9896484373575609, iteration: 47803
loss: 0.9871717095375061,grad_norm: 0.815912324776687, iteration: 47804
loss: 1.0557353496551514,grad_norm: 0.9140370473690387, iteration: 47805
loss: 1.0781323909759521,grad_norm: 0.9999998543407999, iteration: 47806
loss: 0.9873728156089783,grad_norm: 0.9999997340757518, iteration: 47807
loss: 1.0784814357757568,grad_norm: 0.9999992451269765, iteration: 47808
loss: 1.0304441452026367,grad_norm: 0.9999992664002545, iteration: 47809
loss: 0.9572297930717468,grad_norm: 0.9999990013777528, iteration: 47810
loss: 1.0171442031860352,grad_norm: 0.9804715587764267, iteration: 47811
loss: 1.035987138748169,grad_norm: 0.999999072579591, iteration: 47812
loss: 0.9761616587638855,grad_norm: 0.9999992373249468, iteration: 47813
loss: 0.9768671989440918,grad_norm: 0.999999182239625, iteration: 47814
loss: 1.0131595134735107,grad_norm: 0.9999992173821348, iteration: 47815
loss: 1.0265161991119385,grad_norm: 0.9999990734551882, iteration: 47816
loss: 1.0800992250442505,grad_norm: 0.9999995047443571, iteration: 47817
loss: 1.0018302202224731,grad_norm: 0.9999992575202816, iteration: 47818
loss: 0.9940438866615295,grad_norm: 0.8763290475803589, iteration: 47819
loss: 1.005826711654663,grad_norm: 0.9232274132051357, iteration: 47820
loss: 1.0921329259872437,grad_norm: 0.9448974381701937, iteration: 47821
loss: 1.0263739824295044,grad_norm: 0.99999920156998, iteration: 47822
loss: 1.0423493385314941,grad_norm: 0.9999993301412387, iteration: 47823
loss: 1.0842995643615723,grad_norm: 0.9999991332381716, iteration: 47824
loss: 1.0728232860565186,grad_norm: 0.9999994977435407, iteration: 47825
loss: 1.020493745803833,grad_norm: 0.9999991819203677, iteration: 47826
loss: 1.003114104270935,grad_norm: 0.9999990977594865, iteration: 47827
loss: 1.021628975868225,grad_norm: 0.9999992386428228, iteration: 47828
loss: 1.0364917516708374,grad_norm: 0.9999990899420368, iteration: 47829
loss: 1.004060983657837,grad_norm: 0.9999991631079297, iteration: 47830
loss: 0.9848553538322449,grad_norm: 0.8270448114511157, iteration: 47831
loss: 1.0174399614334106,grad_norm: 0.9999998638941361, iteration: 47832
loss: 1.046270728111267,grad_norm: 0.9999996221468866, iteration: 47833
loss: 1.0048761367797852,grad_norm: 0.8974900196532436, iteration: 47834
loss: 0.9935459494590759,grad_norm: 0.9999992028030921, iteration: 47835
loss: 0.9396625757217407,grad_norm: 0.9999992047060801, iteration: 47836
loss: 0.9786714315414429,grad_norm: 0.9999991526904607, iteration: 47837
loss: 1.0308315753936768,grad_norm: 0.9999991592501739, iteration: 47838
loss: 0.9796808362007141,grad_norm: 0.9999992312114304, iteration: 47839
loss: 1.0681260824203491,grad_norm: 0.9999993205888857, iteration: 47840
loss: 1.0333775281906128,grad_norm: 0.8619748551628157, iteration: 47841
loss: 1.0082405805587769,grad_norm: 0.9999991712075238, iteration: 47842
loss: 1.0047402381896973,grad_norm: 0.9823829734225088, iteration: 47843
loss: 1.0005007982254028,grad_norm: 0.9340774902977848, iteration: 47844
loss: 1.0020191669464111,grad_norm: 0.9999991708032873, iteration: 47845
loss: 0.992811918258667,grad_norm: 0.9999990733676493, iteration: 47846
loss: 1.040328860282898,grad_norm: 0.9999999002654133, iteration: 47847
loss: 1.0413463115692139,grad_norm: 0.9999990807678083, iteration: 47848
loss: 1.040805697441101,grad_norm: 0.9999991757890256, iteration: 47849
loss: 0.9908506870269775,grad_norm: 0.9027768736920296, iteration: 47850
loss: 1.004217505455017,grad_norm: 0.999999059744798, iteration: 47851
loss: 1.1153420209884644,grad_norm: 0.9359216479990639, iteration: 47852
loss: 1.0263770818710327,grad_norm: 0.9999991472920837, iteration: 47853
loss: 0.9966832995414734,grad_norm: 0.9968252740060529, iteration: 47854
loss: 1.108317494392395,grad_norm: 0.9999993469639927, iteration: 47855
loss: 1.0487807989120483,grad_norm: 0.9999990780746942, iteration: 47856
loss: 0.9720388054847717,grad_norm: 0.9860236846130566, iteration: 47857
loss: 1.0398441553115845,grad_norm: 0.964265945570917, iteration: 47858
loss: 1.0014749765396118,grad_norm: 0.9999991741414729, iteration: 47859
loss: 1.0126349925994873,grad_norm: 0.9999993029623242, iteration: 47860
loss: 0.9850999116897583,grad_norm: 0.8111689233076543, iteration: 47861
loss: 1.0320533514022827,grad_norm: 0.9999990873266942, iteration: 47862
loss: 1.054430603981018,grad_norm: 0.9999990768778602, iteration: 47863
loss: 1.0044487714767456,grad_norm: 0.9999990904981142, iteration: 47864
loss: 1.0508651733398438,grad_norm: 0.9999991769400554, iteration: 47865
loss: 1.1157854795455933,grad_norm: 0.9999992030262054, iteration: 47866
loss: 0.9412373304367065,grad_norm: 0.8755511232474252, iteration: 47867
loss: 1.010351300239563,grad_norm: 0.9488895990218197, iteration: 47868
loss: 0.9729422926902771,grad_norm: 0.8752044439455112, iteration: 47869
loss: 0.9777294397354126,grad_norm: 0.9425235706149389, iteration: 47870
loss: 0.9998488426208496,grad_norm: 0.8256642908511015, iteration: 47871
loss: 1.0916136503219604,grad_norm: 0.9999995301438301, iteration: 47872
loss: 0.99986732006073,grad_norm: 0.8763739856242035, iteration: 47873
loss: 1.0286390781402588,grad_norm: 0.9999995521523465, iteration: 47874
loss: 1.0733755826950073,grad_norm: 0.9999990917379872, iteration: 47875
loss: 0.9844116568565369,grad_norm: 0.9242972882642507, iteration: 47876
loss: 1.047363042831421,grad_norm: 0.999999352675409, iteration: 47877
loss: 0.9885015487670898,grad_norm: 0.8134480440671654, iteration: 47878
loss: 1.1048855781555176,grad_norm: 0.9999999558900305, iteration: 47879
loss: 1.061702847480774,grad_norm: 0.9999996734316018, iteration: 47880
loss: 1.0425664186477661,grad_norm: 0.999999288240019, iteration: 47881
loss: 1.0337896347045898,grad_norm: 0.9999991288163201, iteration: 47882
loss: 1.0382264852523804,grad_norm: 0.9999994253672648, iteration: 47883
loss: 0.9984034299850464,grad_norm: 0.9999992431118867, iteration: 47884
loss: 0.980260968208313,grad_norm: 0.9382547791114826, iteration: 47885
loss: 1.0298750400543213,grad_norm: 0.941290493871872, iteration: 47886
loss: 1.0252028703689575,grad_norm: 0.99999929906786, iteration: 47887
loss: 1.0766634941101074,grad_norm: 0.9999991411090009, iteration: 47888
loss: 1.0307353734970093,grad_norm: 0.9999995506544468, iteration: 47889
loss: 1.0636897087097168,grad_norm: 0.9999999053760534, iteration: 47890
loss: 0.9940925240516663,grad_norm: 0.9999990478649244, iteration: 47891
loss: 1.021897554397583,grad_norm: 0.9999997837950594, iteration: 47892
loss: 1.028806209564209,grad_norm: 0.9127619211140073, iteration: 47893
loss: 1.0260995626449585,grad_norm: 0.9999991515142965, iteration: 47894
loss: 1.0281550884246826,grad_norm: 0.9785469949616639, iteration: 47895
loss: 1.0347708463668823,grad_norm: 0.8937580540947621, iteration: 47896
loss: 1.0310691595077515,grad_norm: 0.8937886996553894, iteration: 47897
loss: 1.0245543718338013,grad_norm: 0.8728640349581727, iteration: 47898
loss: 1.0299898386001587,grad_norm: 0.9999991154907453, iteration: 47899
loss: 0.9959462285041809,grad_norm: 0.9999990085933366, iteration: 47900
loss: 1.010973334312439,grad_norm: 0.918236416969666, iteration: 47901
loss: 1.93532133102417,grad_norm: 0.9999998356294125, iteration: 47902
loss: 1.0551484823226929,grad_norm: 0.9999990383467002, iteration: 47903
loss: 1.2892403602600098,grad_norm: 0.9999998763473015, iteration: 47904
loss: 1.1241756677627563,grad_norm: 0.9840004044613208, iteration: 47905
loss: 1.0838863849639893,grad_norm: 0.9999992632064982, iteration: 47906
loss: 1.268064022064209,grad_norm: 0.9999992083733429, iteration: 47907
loss: 1.3232225179672241,grad_norm: 0.9999997511408432, iteration: 47908
loss: 1.024479866027832,grad_norm: 0.9999995323572707, iteration: 47909
loss: 1.1286262273788452,grad_norm: 0.9999992047578742, iteration: 47910
loss: 1.0055253505706787,grad_norm: 0.9999998485236453, iteration: 47911
loss: 1.0709396600723267,grad_norm: 0.9999993665829623, iteration: 47912
loss: 1.0389513969421387,grad_norm: 0.9999991771839499, iteration: 47913
loss: 1.0087296962738037,grad_norm: 0.8987293105087858, iteration: 47914
loss: 0.9737158417701721,grad_norm: 0.8410197110662178, iteration: 47915
loss: 1.0215214490890503,grad_norm: 0.9999992537073665, iteration: 47916
loss: 1.0945490598678589,grad_norm: 0.9999995623285615, iteration: 47917
loss: 1.0302772521972656,grad_norm: 0.9999996529998953, iteration: 47918
loss: 1.0867457389831543,grad_norm: 0.9999998678617644, iteration: 47919
loss: 1.0200554132461548,grad_norm: 0.9999991228337054, iteration: 47920
loss: 0.9905139803886414,grad_norm: 0.9999990900073346, iteration: 47921
loss: 1.0157437324523926,grad_norm: 0.9999990352659215, iteration: 47922
loss: 1.0135880708694458,grad_norm: 0.9999999171718194, iteration: 47923
loss: 1.243752121925354,grad_norm: 0.9999998337961272, iteration: 47924
loss: 1.0134847164154053,grad_norm: 0.9999990775037997, iteration: 47925
loss: 1.0103840827941895,grad_norm: 0.8924875661783941, iteration: 47926
loss: 1.0115503072738647,grad_norm: 0.9648649891429819, iteration: 47927
loss: 0.9797089695930481,grad_norm: 0.814176452023345, iteration: 47928
loss: 1.0815829038619995,grad_norm: 0.8397858474473064, iteration: 47929
loss: 1.0132521390914917,grad_norm: 0.9999998056609476, iteration: 47930
loss: 1.1669237613677979,grad_norm: 0.999999272476713, iteration: 47931
loss: 1.0867846012115479,grad_norm: 0.9999994523680904, iteration: 47932
loss: 1.2262543439865112,grad_norm: 0.9999990550495367, iteration: 47933
loss: 0.9973481297492981,grad_norm: 0.929029499305322, iteration: 47934
loss: 1.0655030012130737,grad_norm: 0.9999991720688877, iteration: 47935
loss: 1.2173911333084106,grad_norm: 0.9999994894678698, iteration: 47936
loss: 1.0094971656799316,grad_norm: 0.8341269750322545, iteration: 47937
loss: 1.0102087259292603,grad_norm: 0.956403830782266, iteration: 47938
loss: 0.951414942741394,grad_norm: 0.9999991592057876, iteration: 47939
loss: 1.0546846389770508,grad_norm: 0.9596951847950588, iteration: 47940
loss: 1.0469945669174194,grad_norm: 0.9999995618246549, iteration: 47941
loss: 1.0381762981414795,grad_norm: 0.9999996133071268, iteration: 47942
loss: 0.9998788237571716,grad_norm: 0.8994615567014137, iteration: 47943
loss: 0.9871727824211121,grad_norm: 0.9999992588668738, iteration: 47944
loss: 0.9863650798797607,grad_norm: 0.9129537635060824, iteration: 47945
loss: 1.0395522117614746,grad_norm: 0.9538353456603792, iteration: 47946
loss: 1.0480904579162598,grad_norm: 0.9323801766419008, iteration: 47947
loss: 0.9736940860748291,grad_norm: 0.7987745967644264, iteration: 47948
loss: 0.9905971884727478,grad_norm: 0.9900551729067996, iteration: 47949
loss: 1.0010260343551636,grad_norm: 0.9961671379658643, iteration: 47950
loss: 0.9762109518051147,grad_norm: 0.7623963447648023, iteration: 47951
loss: 1.0160008668899536,grad_norm: 0.9999989900629173, iteration: 47952
loss: 0.9854134917259216,grad_norm: 0.9999991293937561, iteration: 47953
loss: 1.0367634296417236,grad_norm: 0.7845371380344692, iteration: 47954
loss: 0.9995876550674438,grad_norm: 0.986975795370893, iteration: 47955
loss: 0.9982168674468994,grad_norm: 0.8818655247753161, iteration: 47956
loss: 1.017594814300537,grad_norm: 0.9999991354622481, iteration: 47957
loss: 0.9958181381225586,grad_norm: 0.9999992230375723, iteration: 47958
loss: 1.0653603076934814,grad_norm: 0.9999996591872421, iteration: 47959
loss: 1.0453399419784546,grad_norm: 0.9999999799110391, iteration: 47960
loss: 0.963586151599884,grad_norm: 0.9491310514413586, iteration: 47961
loss: 0.9954984188079834,grad_norm: 0.9298031457691849, iteration: 47962
loss: 0.9906442761421204,grad_norm: 0.9999990133420565, iteration: 47963
loss: 0.9866337776184082,grad_norm: 0.9999990678583002, iteration: 47964
loss: 0.9855501651763916,grad_norm: 0.999999352415737, iteration: 47965
loss: 1.0070639848709106,grad_norm: 0.9281103403159975, iteration: 47966
loss: 0.9991593360900879,grad_norm: 0.7834387267602113, iteration: 47967
loss: 0.9988237619400024,grad_norm: 0.8978407880450485, iteration: 47968
loss: 1.0235785245895386,grad_norm: 0.9999990847641641, iteration: 47969
loss: 1.0342997312545776,grad_norm: 0.9999990599385065, iteration: 47970
loss: 1.0211488008499146,grad_norm: 0.9497982587557838, iteration: 47971
loss: 1.0286964178085327,grad_norm: 0.9999989440006841, iteration: 47972
loss: 0.9697286486625671,grad_norm: 0.9869487773362895, iteration: 47973
loss: 0.9491398930549622,grad_norm: 0.8905190800193058, iteration: 47974
loss: 0.9851441979408264,grad_norm: 0.9688191573258847, iteration: 47975
loss: 1.0035107135772705,grad_norm: 0.9999991406473806, iteration: 47976
loss: 1.0765939950942993,grad_norm: 0.9040464509470851, iteration: 47977
loss: 1.0171648263931274,grad_norm: 0.99999951509762, iteration: 47978
loss: 1.0718964338302612,grad_norm: 0.999999421246835, iteration: 47979
loss: 1.0168503522872925,grad_norm: 0.9999991777546362, iteration: 47980
loss: 0.954741895198822,grad_norm: 0.9999991195123237, iteration: 47981
loss: 1.0144239664077759,grad_norm: 0.9004864846598661, iteration: 47982
loss: 0.9495145678520203,grad_norm: 0.9999989923586086, iteration: 47983
loss: 1.090934157371521,grad_norm: 0.9999996779812605, iteration: 47984
loss: 0.9788663387298584,grad_norm: 0.9675156836205481, iteration: 47985
loss: 0.9915907382965088,grad_norm: 0.9999993609902521, iteration: 47986
loss: 0.9602022171020508,grad_norm: 0.9999991401733053, iteration: 47987
loss: 0.9748069047927856,grad_norm: 0.9011727161559424, iteration: 47988
loss: 1.017141342163086,grad_norm: 0.9999992421560553, iteration: 47989
loss: 0.9750797748565674,grad_norm: 0.99999903369951, iteration: 47990
loss: 0.9547016024589539,grad_norm: 0.9462422400458613, iteration: 47991
loss: 0.9730464220046997,grad_norm: 0.8295321258661105, iteration: 47992
loss: 0.9882481694221497,grad_norm: 0.999999090457815, iteration: 47993
loss: 1.0212258100509644,grad_norm: 0.8855901982974016, iteration: 47994
loss: 1.038278341293335,grad_norm: 0.9999990926118616, iteration: 47995
loss: 0.9847416281700134,grad_norm: 0.9999989628831828, iteration: 47996
loss: 1.0196959972381592,grad_norm: 0.9999991431486582, iteration: 47997
loss: 0.9441528916358948,grad_norm: 0.7801613177508286, iteration: 47998
loss: 1.2763373851776123,grad_norm: 0.9999993805073603, iteration: 47999
loss: 0.976538896560669,grad_norm: 0.9999995190140996, iteration: 48000
loss: 1.0499281883239746,grad_norm: 0.9179701601851238, iteration: 48001
loss: 1.0078120231628418,grad_norm: 0.9999990214751605, iteration: 48002
loss: 1.0323094129562378,grad_norm: 0.9999995905971262, iteration: 48003
loss: 0.9937228560447693,grad_norm: 0.9999989921210767, iteration: 48004
loss: 1.0320450067520142,grad_norm: 0.7755527836058155, iteration: 48005
loss: 1.0089391469955444,grad_norm: 0.9999990794912224, iteration: 48006
loss: 1.0144723653793335,grad_norm: 0.878431729873477, iteration: 48007
loss: 1.02754807472229,grad_norm: 0.7620414658199144, iteration: 48008
loss: 1.0262454748153687,grad_norm: 0.9999991723352079, iteration: 48009
loss: 1.0241451263427734,grad_norm: 0.9999996399489317, iteration: 48010
loss: 1.1064661741256714,grad_norm: 0.9999996793907646, iteration: 48011
loss: 0.9724918007850647,grad_norm: 0.9999992460566607, iteration: 48012
loss: 1.0435909032821655,grad_norm: 0.9999991779529249, iteration: 48013
loss: 1.0018033981323242,grad_norm: 0.9999997606612889, iteration: 48014
loss: 1.0358840227127075,grad_norm: 0.9999989351047386, iteration: 48015
loss: 0.9902841448783875,grad_norm: 0.9597074563195106, iteration: 48016
loss: 1.035492181777954,grad_norm: 0.7630633322430327, iteration: 48017
loss: 0.985106885433197,grad_norm: 0.9999993986092304, iteration: 48018
loss: 1.0294535160064697,grad_norm: 0.9220325670664643, iteration: 48019
loss: 1.0498204231262207,grad_norm: 0.9999996701647375, iteration: 48020
loss: 1.0076197385787964,grad_norm: 0.9795390370597776, iteration: 48021
loss: 1.0174615383148193,grad_norm: 0.9999998826642769, iteration: 48022
loss: 0.9934266805648804,grad_norm: 0.9999989957681983, iteration: 48023
loss: 0.9979941844940186,grad_norm: 0.9999991138547351, iteration: 48024
loss: 1.038723349571228,grad_norm: 0.9999990520757209, iteration: 48025
loss: 0.9938039779663086,grad_norm: 0.9999991445638596, iteration: 48026
loss: 1.0923755168914795,grad_norm: 0.9999995245587932, iteration: 48027
loss: 1.1557682752609253,grad_norm: 0.9999995622009995, iteration: 48028
loss: 1.011518955230713,grad_norm: 0.9999993775346425, iteration: 48029
loss: 1.016515851020813,grad_norm: 0.9999992611898129, iteration: 48030
loss: 1.0752415657043457,grad_norm: 0.9999994652420787, iteration: 48031
loss: 1.0131875276565552,grad_norm: 0.9999991367076302, iteration: 48032
loss: 1.0033926963806152,grad_norm: 0.8662057116084151, iteration: 48033
loss: 1.0052988529205322,grad_norm: 0.996559909369556, iteration: 48034
loss: 0.9979535341262817,grad_norm: 0.9999989807292977, iteration: 48035
loss: 1.0796589851379395,grad_norm: 0.999999341460495, iteration: 48036
loss: 0.9739517569541931,grad_norm: 0.8824401941718679, iteration: 48037
loss: 0.9814141988754272,grad_norm: 0.9999991216472933, iteration: 48038
loss: 0.979534924030304,grad_norm: 0.9999990624258263, iteration: 48039
loss: 0.9634231925010681,grad_norm: 0.8404441994243937, iteration: 48040
loss: 0.97657310962677,grad_norm: 0.829216092875024, iteration: 48041
loss: 0.9530996680259705,grad_norm: 0.8695009760576479, iteration: 48042
loss: 0.9366945624351501,grad_norm: 0.7805901398687513, iteration: 48043
loss: 0.9766906499862671,grad_norm: 0.9999996885124204, iteration: 48044
loss: 1.0445430278778076,grad_norm: 0.9999990758921872, iteration: 48045
loss: 1.0456550121307373,grad_norm: 0.9999998877719672, iteration: 48046
loss: 1.0252048969268799,grad_norm: 0.7943362699177993, iteration: 48047
loss: 0.9867348074913025,grad_norm: 0.9999993997259631, iteration: 48048
loss: 1.0321216583251953,grad_norm: 0.9999998339217802, iteration: 48049
loss: 0.99756920337677,grad_norm: 0.9919085731183984, iteration: 48050
loss: 1.00395929813385,grad_norm: 0.9185580819740684, iteration: 48051
loss: 1.013491153717041,grad_norm: 0.9999992018116017, iteration: 48052
loss: 0.9843113422393799,grad_norm: 0.8575448271089141, iteration: 48053
loss: 1.0302730798721313,grad_norm: 0.9999991605003737, iteration: 48054
loss: 0.9936550855636597,grad_norm: 0.9999992198070208, iteration: 48055
loss: 1.0124555826187134,grad_norm: 0.8722717166961008, iteration: 48056
loss: 0.9919137358665466,grad_norm: 0.999999424441404, iteration: 48057
loss: 1.0248801708221436,grad_norm: 0.9083475195018881, iteration: 48058
loss: 1.0491480827331543,grad_norm: 0.9999998018294345, iteration: 48059
loss: 1.0048609972000122,grad_norm: 0.9999990047887798, iteration: 48060
loss: 0.9667330980300903,grad_norm: 0.8831061480268954, iteration: 48061
loss: 1.0459198951721191,grad_norm: 0.9999996982172179, iteration: 48062
loss: 1.0213128328323364,grad_norm: 0.9999990044233508, iteration: 48063
loss: 1.0613524913787842,grad_norm: 0.9999991119044844, iteration: 48064
loss: 1.0286531448364258,grad_norm: 0.9525958853056935, iteration: 48065
loss: 1.1144713163375854,grad_norm: 0.999998977805629, iteration: 48066
loss: 1.014432430267334,grad_norm: 0.9999992185761214, iteration: 48067
loss: 0.9855365753173828,grad_norm: 0.9343366935214683, iteration: 48068
loss: 0.9775602221488953,grad_norm: 0.9106872413518138, iteration: 48069
loss: 1.002126932144165,grad_norm: 0.9999992445372483, iteration: 48070
loss: 1.0130877494812012,grad_norm: 0.8289876859572217, iteration: 48071
loss: 0.9934769868850708,grad_norm: 0.9797083983930077, iteration: 48072
loss: 1.0057393312454224,grad_norm: 0.8205209548236366, iteration: 48073
loss: 0.9901518821716309,grad_norm: 0.9999990334011358, iteration: 48074
loss: 0.9713202118873596,grad_norm: 0.7776890998517793, iteration: 48075
loss: 0.9785563945770264,grad_norm: 0.9999990389595573, iteration: 48076
loss: 1.0118736028671265,grad_norm: 0.9999992046463837, iteration: 48077
loss: 0.98981773853302,grad_norm: 0.9665636971239359, iteration: 48078
loss: 1.016931414604187,grad_norm: 0.8834026305112674, iteration: 48079
loss: 1.0258644819259644,grad_norm: 0.8742599624988056, iteration: 48080
loss: 1.00822114944458,grad_norm: 0.9905403758346468, iteration: 48081
loss: 0.9572901129722595,grad_norm: 0.9999990996399607, iteration: 48082
loss: 1.0051236152648926,grad_norm: 0.7991860677993984, iteration: 48083
loss: 1.0256658792495728,grad_norm: 0.9768667848824901, iteration: 48084
loss: 0.9890108108520508,grad_norm: 0.9062606110835213, iteration: 48085
loss: 1.0381929874420166,grad_norm: 0.9999992939142641, iteration: 48086
loss: 1.0449156761169434,grad_norm: 0.9999994089741865, iteration: 48087
loss: 1.0139455795288086,grad_norm: 0.8504626355605015, iteration: 48088
loss: 0.9432817101478577,grad_norm: 0.8622538524148607, iteration: 48089
loss: 0.9950593113899231,grad_norm: 0.9999991147889805, iteration: 48090
loss: 0.9987421035766602,grad_norm: 0.9999993862999789, iteration: 48091
loss: 0.9654731154441833,grad_norm: 0.9999991394127236, iteration: 48092
loss: 0.9891065359115601,grad_norm: 0.9875050731024317, iteration: 48093
loss: 0.9890331625938416,grad_norm: 0.999999076578358, iteration: 48094
loss: 1.0246870517730713,grad_norm: 0.9999995328302372, iteration: 48095
loss: 1.0352672338485718,grad_norm: 0.9493310061529945, iteration: 48096
loss: 1.0363337993621826,grad_norm: 0.9999993300418918, iteration: 48097
loss: 0.9889948964118958,grad_norm: 0.9999992854151856, iteration: 48098
loss: 1.0613356828689575,grad_norm: 0.9999993231131444, iteration: 48099
loss: 1.1236639022827148,grad_norm: 0.9999997348598502, iteration: 48100
loss: 1.0130418539047241,grad_norm: 0.9999995082034088, iteration: 48101
loss: 1.0100979804992676,grad_norm: 0.9999996884613961, iteration: 48102
loss: 1.0308711528778076,grad_norm: 0.9787883813661411, iteration: 48103
loss: 0.9963341355323792,grad_norm: 0.8736474681927895, iteration: 48104
loss: 1.0349555015563965,grad_norm: 0.9999994003333218, iteration: 48105
loss: 1.0030823945999146,grad_norm: 0.9549468735015239, iteration: 48106
loss: 1.0068515539169312,grad_norm: 0.8315080598917239, iteration: 48107
loss: 0.9946761727333069,grad_norm: 0.8457424012275867, iteration: 48108
loss: 0.9912707805633545,grad_norm: 0.8986088840424103, iteration: 48109
loss: 0.9677901268005371,grad_norm: 0.9061798405653917, iteration: 48110
loss: 1.0012469291687012,grad_norm: 0.9999996317123016, iteration: 48111
loss: 1.0250623226165771,grad_norm: 0.9999991908397295, iteration: 48112
loss: 0.9874405860900879,grad_norm: 0.999999082171865, iteration: 48113
loss: 0.9747945070266724,grad_norm: 0.9628012921205262, iteration: 48114
loss: 1.0107061862945557,grad_norm: 0.9677019300694703, iteration: 48115
loss: 0.9857971668243408,grad_norm: 0.9251118271621412, iteration: 48116
loss: 1.005334734916687,grad_norm: 0.9999994425629173, iteration: 48117
loss: 1.0098260641098022,grad_norm: 0.9813006552064945, iteration: 48118
loss: 0.9990949034690857,grad_norm: 0.9999991847801547, iteration: 48119
loss: 0.975130558013916,grad_norm: 0.805197811488982, iteration: 48120
loss: 1.0129128694534302,grad_norm: 0.8858398903985956, iteration: 48121
loss: 1.0127307176589966,grad_norm: 0.9110605704776568, iteration: 48122
loss: 1.0499683618545532,grad_norm: 0.9999993312069917, iteration: 48123
loss: 1.0302826166152954,grad_norm: 0.9789218489921127, iteration: 48124
loss: 0.9768941402435303,grad_norm: 0.9999993090000378, iteration: 48125
loss: 0.9857645034790039,grad_norm: 0.8556419000648084, iteration: 48126
loss: 1.1384882926940918,grad_norm: 0.9999989828316921, iteration: 48127
loss: 1.060566782951355,grad_norm: 0.9999992408860598, iteration: 48128
loss: 1.0395781993865967,grad_norm: 0.9999999590178933, iteration: 48129
loss: 1.0067853927612305,grad_norm: 0.9999991491156635, iteration: 48130
loss: 1.0098779201507568,grad_norm: 0.908312995733856, iteration: 48131
loss: 1.0243518352508545,grad_norm: 0.9999990048691977, iteration: 48132
loss: 1.0185017585754395,grad_norm: 0.9999990649001487, iteration: 48133
loss: 1.0055458545684814,grad_norm: 0.9999990986627586, iteration: 48134
loss: 1.0100631713867188,grad_norm: 0.9999992895409091, iteration: 48135
loss: 1.0239564180374146,grad_norm: 0.8939595956477305, iteration: 48136
loss: 1.0235803127288818,grad_norm: 0.9898291175489128, iteration: 48137
loss: 1.056618332862854,grad_norm: 0.9999993896251206, iteration: 48138
loss: 1.0162538290023804,grad_norm: 0.9951697824268565, iteration: 48139
loss: 1.0330933332443237,grad_norm: 0.8483400813421691, iteration: 48140
loss: 1.0468833446502686,grad_norm: 0.9999991658086627, iteration: 48141
loss: 0.9951770901679993,grad_norm: 0.99999915221042, iteration: 48142
loss: 1.0203807353973389,grad_norm: 0.9979339409835972, iteration: 48143
loss: 1.1437404155731201,grad_norm: 0.9705906368044436, iteration: 48144
loss: 1.0218926668167114,grad_norm: 0.9999989893768506, iteration: 48145
loss: 1.0243148803710938,grad_norm: 0.8774592133690966, iteration: 48146
loss: 1.011646032333374,grad_norm: 0.9295279979632253, iteration: 48147
loss: 1.0496156215667725,grad_norm: 0.999999191456302, iteration: 48148
loss: 1.066558837890625,grad_norm: 0.9999990846020708, iteration: 48149
loss: 1.1323540210723877,grad_norm: 0.9999996442975291, iteration: 48150
loss: 0.9956513047218323,grad_norm: 0.8800756496160379, iteration: 48151
loss: 1.0244710445404053,grad_norm: 0.889847756395166, iteration: 48152
loss: 0.9766917824745178,grad_norm: 0.9999990655731679, iteration: 48153
loss: 1.0281486511230469,grad_norm: 0.9269300359252673, iteration: 48154
loss: 1.0291310548782349,grad_norm: 0.9051673264515963, iteration: 48155
loss: 1.0221238136291504,grad_norm: 0.9999991860858083, iteration: 48156
loss: 1.0047736167907715,grad_norm: 0.9999990424864424, iteration: 48157
loss: 1.0144468545913696,grad_norm: 0.9999992091379976, iteration: 48158
loss: 1.000666618347168,grad_norm: 0.8806913523152007, iteration: 48159
loss: 1.124742031097412,grad_norm: 0.9999998077936287, iteration: 48160
loss: 1.0088971853256226,grad_norm: 0.9999993652546512, iteration: 48161
loss: 1.0164905786514282,grad_norm: 0.9999999114740384, iteration: 48162
loss: 1.2348129749298096,grad_norm: 0.999999899140818, iteration: 48163
loss: 1.0526524782180786,grad_norm: 0.9999998701301369, iteration: 48164
loss: 1.014160394668579,grad_norm: 0.8279395106617052, iteration: 48165
loss: 1.0631765127182007,grad_norm: 0.9999992263287276, iteration: 48166
loss: 1.2128267288208008,grad_norm: 0.9999995229472017, iteration: 48167
loss: 1.0723044872283936,grad_norm: 0.9999991255482452, iteration: 48168
loss: 1.0103647708892822,grad_norm: 0.9999999352380102, iteration: 48169
loss: 0.9855661392211914,grad_norm: 0.8566963060699885, iteration: 48170
loss: 0.999752402305603,grad_norm: 0.8948176629446789, iteration: 48171
loss: 0.9959630370140076,grad_norm: 0.8873640669256202, iteration: 48172
loss: 0.9863101243972778,grad_norm: 0.9999991775127994, iteration: 48173
loss: 1.0069315433502197,grad_norm: 0.9999991328682621, iteration: 48174
loss: 1.033819317817688,grad_norm: 0.999999396448542, iteration: 48175
loss: 1.0089086294174194,grad_norm: 0.9999990036283336, iteration: 48176
loss: 1.0917670726776123,grad_norm: 0.9999994027624829, iteration: 48177
loss: 1.1719011068344116,grad_norm: 0.999999832322852, iteration: 48178
loss: 1.0199893712997437,grad_norm: 0.9999991564670929, iteration: 48179
loss: 0.9825643301010132,grad_norm: 0.9800424787051458, iteration: 48180
loss: 1.0031945705413818,grad_norm: 0.9367755865835441, iteration: 48181
loss: 1.0103610754013062,grad_norm: 0.9999990576514564, iteration: 48182
loss: 1.0662236213684082,grad_norm: 0.7698320857988132, iteration: 48183
loss: 0.9895833134651184,grad_norm: 0.9112496794910742, iteration: 48184
loss: 0.9744028449058533,grad_norm: 0.8679443842735961, iteration: 48185
loss: 1.0306614637374878,grad_norm: 0.999999573463424, iteration: 48186
loss: 1.1950874328613281,grad_norm: 0.9999992644286622, iteration: 48187
loss: 1.0069421529769897,grad_norm: 0.9999991736019496, iteration: 48188
loss: 1.1625126600265503,grad_norm: 0.99999924808487, iteration: 48189
loss: 1.178642749786377,grad_norm: 0.9999997715352986, iteration: 48190
loss: 1.0115106105804443,grad_norm: 0.9999991105487214, iteration: 48191
loss: 1.0738787651062012,grad_norm: 1.0000000211575386, iteration: 48192
loss: 1.102267861366272,grad_norm: 0.9999991917271518, iteration: 48193
loss: 1.0637987852096558,grad_norm: 0.9999989657213757, iteration: 48194
loss: 1.0388977527618408,grad_norm: 0.9999996347334268, iteration: 48195
loss: 1.1798021793365479,grad_norm: 0.9999994741301731, iteration: 48196
loss: 1.0271719694137573,grad_norm: 0.8653067263155975, iteration: 48197
loss: 1.1668790578842163,grad_norm: 0.999999748740135, iteration: 48198
loss: 0.9941124320030212,grad_norm: 0.9999992500498696, iteration: 48199
loss: 1.0653555393218994,grad_norm: 0.999999690893006, iteration: 48200
loss: 1.0126674175262451,grad_norm: 0.9999993600862138, iteration: 48201
loss: 1.022274136543274,grad_norm: 0.9999994469842277, iteration: 48202
loss: 1.1296780109405518,grad_norm: 0.9999997849653717, iteration: 48203
loss: 1.0531560182571411,grad_norm: 0.9999995645966384, iteration: 48204
loss: 1.147429347038269,grad_norm: 0.9999990819400479, iteration: 48205
loss: 1.1793649196624756,grad_norm: 0.9999997183054531, iteration: 48206
loss: 1.259804368019104,grad_norm: 0.9999994520169095, iteration: 48207
loss: 1.0689005851745605,grad_norm: 0.9999994720837078, iteration: 48208
loss: 1.2554296255111694,grad_norm: 0.9999992382050568, iteration: 48209
loss: 1.0880523920059204,grad_norm: 0.9999991894204188, iteration: 48210
loss: 1.0568917989730835,grad_norm: 0.9999995276885668, iteration: 48211
loss: 1.361662745475769,grad_norm: 0.9999999379561275, iteration: 48212
loss: 1.0613213777542114,grad_norm: 0.9999995044486056, iteration: 48213
loss: 1.1229848861694336,grad_norm: 0.9999994223369327, iteration: 48214
loss: 1.2812517881393433,grad_norm: 0.99999952397768, iteration: 48215
loss: 0.9979191422462463,grad_norm: 0.9935508266274687, iteration: 48216
loss: 1.049222469329834,grad_norm: 0.999999012091379, iteration: 48217
loss: 1.0179311037063599,grad_norm: 0.9999991783386907, iteration: 48218
loss: 1.120741605758667,grad_norm: 0.9999996256172742, iteration: 48219
loss: 1.0242544412612915,grad_norm: 0.8730729630800698, iteration: 48220
loss: 1.1902927160263062,grad_norm: 0.9999998098054074, iteration: 48221
loss: 1.0555596351623535,grad_norm: 0.9999995181588489, iteration: 48222
loss: 1.1055941581726074,grad_norm: 0.9999994654612653, iteration: 48223
loss: 1.2754547595977783,grad_norm: 1.0000000202231427, iteration: 48224
loss: 1.0604802370071411,grad_norm: 0.9999993394421293, iteration: 48225
loss: 1.0745769739151,grad_norm: 0.9999993038585546, iteration: 48226
loss: 1.0903445482254028,grad_norm: 0.999999838686358, iteration: 48227
loss: 1.219735860824585,grad_norm: 0.9999992740970057, iteration: 48228
loss: 0.98334801197052,grad_norm: 0.9517443732769656, iteration: 48229
loss: 1.013574481010437,grad_norm: 0.9999996247049333, iteration: 48230
loss: 1.0227265357971191,grad_norm: 0.9999989295924332, iteration: 48231
loss: 1.17410147190094,grad_norm: 0.9999998584924455, iteration: 48232
loss: 1.073432207107544,grad_norm: 0.9999990974965569, iteration: 48233
loss: 1.0736427307128906,grad_norm: 0.9999995892228455, iteration: 48234
loss: 1.092787265777588,grad_norm: 0.9999996069923777, iteration: 48235
loss: 1.0160385370254517,grad_norm: 0.9999991665945257, iteration: 48236
loss: 1.0129567384719849,grad_norm: 0.9421880336302872, iteration: 48237
loss: 1.0912607908248901,grad_norm: 0.9999996620924723, iteration: 48238
loss: 0.9970124959945679,grad_norm: 0.8487580917958447, iteration: 48239
loss: 1.139017939567566,grad_norm: 0.9999994165398514, iteration: 48240
loss: 0.9736884236335754,grad_norm: 0.9999993794734089, iteration: 48241
loss: 1.0364038944244385,grad_norm: 0.9999992625438986, iteration: 48242
loss: 0.9838525056838989,grad_norm: 0.9249673355894549, iteration: 48243
loss: 1.173951268196106,grad_norm: 0.9999992291361243, iteration: 48244
loss: 1.0847980976104736,grad_norm: 0.9999996865644539, iteration: 48245
loss: 1.1244455575942993,grad_norm: 0.9999997471513397, iteration: 48246
loss: 0.9808136224746704,grad_norm: 0.7947009943957927, iteration: 48247
loss: 1.0121110677719116,grad_norm: 0.9999993012622155, iteration: 48248
loss: 0.9473626017570496,grad_norm: 0.9999989820536244, iteration: 48249
loss: 1.0375280380249023,grad_norm: 0.9999992318465117, iteration: 48250
loss: 1.0346875190734863,grad_norm: 0.9999996936879311, iteration: 48251
loss: 1.054843544960022,grad_norm: 0.9999996863311807, iteration: 48252
loss: 1.0221953392028809,grad_norm: 0.9999992376207353, iteration: 48253
loss: 0.986950695514679,grad_norm: 0.9999991831708417, iteration: 48254
loss: 1.0277912616729736,grad_norm: 0.999999633219325, iteration: 48255
loss: 1.0177786350250244,grad_norm: 0.9999991077932022, iteration: 48256
loss: 1.0205533504486084,grad_norm: 0.999999855029689, iteration: 48257
loss: 1.1162184476852417,grad_norm: 0.9999993714160894, iteration: 48258
loss: 1.0862864255905151,grad_norm: 0.9999999530599365, iteration: 48259
loss: 1.0808439254760742,grad_norm: 0.9999994402724092, iteration: 48260
loss: 1.082962989807129,grad_norm: 0.9999992378179119, iteration: 48261
loss: 1.069778323173523,grad_norm: 0.9999993333006177, iteration: 48262
loss: 1.0535340309143066,grad_norm: 0.9999992702424695, iteration: 48263
loss: 1.0030535459518433,grad_norm: 0.833790250275405, iteration: 48264
loss: 1.0559476613998413,grad_norm: 0.9999990921098904, iteration: 48265
loss: 0.9822648763656616,grad_norm: 0.920025305883503, iteration: 48266
loss: 0.9975669980049133,grad_norm: 0.9999994547395604, iteration: 48267
loss: 1.008849024772644,grad_norm: 0.8846157083575542, iteration: 48268
loss: 1.0154608488082886,grad_norm: 0.9969173536635337, iteration: 48269
loss: 1.0193954706192017,grad_norm: 0.9907364300738286, iteration: 48270
loss: 0.9437500238418579,grad_norm: 0.8784053313545069, iteration: 48271
loss: 0.9797472953796387,grad_norm: 0.8883889432367783, iteration: 48272
loss: 0.9984881281852722,grad_norm: 0.9999993567862991, iteration: 48273
loss: 1.0501132011413574,grad_norm: 0.9999993212098683, iteration: 48274
loss: 0.972456693649292,grad_norm: 0.9999991189643335, iteration: 48275
loss: 0.9712561964988708,grad_norm: 0.999998875580814, iteration: 48276
loss: 0.9725854992866516,grad_norm: 0.9424921010889146, iteration: 48277
loss: 1.0224626064300537,grad_norm: 0.9999993512373412, iteration: 48278
loss: 0.9859004616737366,grad_norm: 0.9999997943631962, iteration: 48279
loss: 0.9942148327827454,grad_norm: 0.999998993028703, iteration: 48280
loss: 1.0149494409561157,grad_norm: 0.9999991965413942, iteration: 48281
loss: 0.9766941070556641,grad_norm: 0.8037637066888631, iteration: 48282
loss: 1.0086723566055298,grad_norm: 0.7899239842394601, iteration: 48283
loss: 1.0046088695526123,grad_norm: 0.9999992522195283, iteration: 48284
loss: 0.9701750874519348,grad_norm: 0.789378240433992, iteration: 48285
loss: 1.0526412725448608,grad_norm: 0.999999911897107, iteration: 48286
loss: 1.0611802339553833,grad_norm: 0.9237966756161159, iteration: 48287
loss: 1.0238031148910522,grad_norm: 0.9999993793719311, iteration: 48288
loss: 1.0029724836349487,grad_norm: 0.9999990175537762, iteration: 48289
loss: 0.9970633387565613,grad_norm: 0.9993075521707165, iteration: 48290
loss: 0.9726427793502808,grad_norm: 0.9999990901798651, iteration: 48291
loss: 1.0387895107269287,grad_norm: 0.9894699148387329, iteration: 48292
loss: 1.00770103931427,grad_norm: 0.8977755261824777, iteration: 48293
loss: 0.9671233892440796,grad_norm: 0.9999989621862977, iteration: 48294
loss: 0.9692457914352417,grad_norm: 0.9999991395036057, iteration: 48295
loss: 1.061825156211853,grad_norm: 0.9999999987445948, iteration: 48296
loss: 0.993441641330719,grad_norm: 0.9999990900149276, iteration: 48297
loss: 1.0381495952606201,grad_norm: 0.9204647736885233, iteration: 48298
loss: 0.994726300239563,grad_norm: 0.8801796295448635, iteration: 48299
loss: 1.0005179643630981,grad_norm: 0.999999119313221, iteration: 48300
loss: 0.9959855675697327,grad_norm: 0.9999991633244147, iteration: 48301
loss: 0.992967963218689,grad_norm: 0.9999990060653051, iteration: 48302
loss: 1.122922420501709,grad_norm: 0.9999999768998412, iteration: 48303
loss: 1.070642352104187,grad_norm: 0.9999993265895966, iteration: 48304
loss: 1.008226752281189,grad_norm: 0.9999992246046213, iteration: 48305
loss: 1.0492496490478516,grad_norm: 0.9999989790337859, iteration: 48306
loss: 1.0179163217544556,grad_norm: 0.9740990247920364, iteration: 48307
loss: 0.9678887128829956,grad_norm: 0.9198702892626262, iteration: 48308
loss: 0.9757224321365356,grad_norm: 0.794184255601392, iteration: 48309
loss: 1.0693798065185547,grad_norm: 0.9999993743769121, iteration: 48310
loss: 0.9952563047409058,grad_norm: 0.9999994690731756, iteration: 48311
loss: 1.0382198095321655,grad_norm: 0.8866592979919027, iteration: 48312
loss: 0.9571879506111145,grad_norm: 0.9442870627593072, iteration: 48313
loss: 0.9985160231590271,grad_norm: 0.9999997265188156, iteration: 48314
loss: 1.1087284088134766,grad_norm: 0.9999993736482521, iteration: 48315
loss: 0.9823070764541626,grad_norm: 0.9999991733697654, iteration: 48316
loss: 1.0203206539154053,grad_norm: 0.9999990462476606, iteration: 48317
loss: 1.0174007415771484,grad_norm: 1.000000015504157, iteration: 48318
loss: 1.0254698991775513,grad_norm: 0.9476918197223375, iteration: 48319
loss: 1.020048975944519,grad_norm: 0.7584026197372165, iteration: 48320
loss: 0.9780195355415344,grad_norm: 1.0000000203208865, iteration: 48321
loss: 0.9863560199737549,grad_norm: 0.9422423808367401, iteration: 48322
loss: 0.9740559458732605,grad_norm: 0.9061272776864268, iteration: 48323
loss: 0.9717163443565369,grad_norm: 0.8011422626714982, iteration: 48324
loss: 0.990236222743988,grad_norm: 0.9149562748750725, iteration: 48325
loss: 1.065224289894104,grad_norm: 0.9999994278218234, iteration: 48326
loss: 1.0306034088134766,grad_norm: 0.9864630554596288, iteration: 48327
loss: 1.0641838312149048,grad_norm: 0.9999997290605084, iteration: 48328
loss: 1.0060433149337769,grad_norm: 0.9999990893366731, iteration: 48329
loss: 0.9766340255737305,grad_norm: 0.8841609144124906, iteration: 48330
loss: 1.1218562126159668,grad_norm: 0.9999996194974166, iteration: 48331
loss: 1.0019780397415161,grad_norm: 0.9999994133669937, iteration: 48332
loss: 1.027627944946289,grad_norm: 0.9999989777729988, iteration: 48333
loss: 0.9334278106689453,grad_norm: 0.9999991332824408, iteration: 48334
loss: 0.9987546801567078,grad_norm: 0.9233984255983283, iteration: 48335
loss: 0.9899007081985474,grad_norm: 0.999999496487988, iteration: 48336
loss: 1.0022175312042236,grad_norm: 0.999999023417543, iteration: 48337
loss: 1.25120210647583,grad_norm: 0.9999993682078545, iteration: 48338
loss: 0.9756027460098267,grad_norm: 0.8919112205583754, iteration: 48339
loss: 0.9597395062446594,grad_norm: 0.9966898764102297, iteration: 48340
loss: 1.1477023363113403,grad_norm: 0.9999995821028453, iteration: 48341
loss: 1.1191045045852661,grad_norm: 0.9999996416463363, iteration: 48342
loss: 1.0226263999938965,grad_norm: 0.9999990529789482, iteration: 48343
loss: 0.9742178320884705,grad_norm: 0.9873056413231566, iteration: 48344
loss: 0.9825210571289062,grad_norm: 0.99999914950652, iteration: 48345
loss: 1.0399603843688965,grad_norm: 0.999999355441921, iteration: 48346
loss: 1.1041874885559082,grad_norm: 0.9999995741022839, iteration: 48347
loss: 1.008653163909912,grad_norm: 0.9999991633666526, iteration: 48348
loss: 1.0582623481750488,grad_norm: 0.9999990768514874, iteration: 48349
loss: 0.9527814984321594,grad_norm: 0.999999412490783, iteration: 48350
loss: 1.025942087173462,grad_norm: 0.9883049278511759, iteration: 48351
loss: 1.0105886459350586,grad_norm: 0.9999992434346544, iteration: 48352
loss: 0.9760855436325073,grad_norm: 0.9999992412974859, iteration: 48353
loss: 0.9962394833564758,grad_norm: 0.9976790367343001, iteration: 48354
loss: 1.0082247257232666,grad_norm: 0.9169816135763462, iteration: 48355
loss: 1.0164567232131958,grad_norm: 0.8393063818254834, iteration: 48356
loss: 1.0299886465072632,grad_norm: 0.9999991091912922, iteration: 48357
loss: 0.9809882640838623,grad_norm: 0.9999991956217171, iteration: 48358
loss: 1.020758032798767,grad_norm: 0.999999193588917, iteration: 48359
loss: 1.0132262706756592,grad_norm: 0.999999010163913, iteration: 48360
loss: 1.0437846183776855,grad_norm: 0.9999990570091234, iteration: 48361
loss: 1.0177626609802246,grad_norm: 0.892830907363417, iteration: 48362
loss: 0.9718298316001892,grad_norm: 0.9658206714060511, iteration: 48363
loss: 1.0097070932388306,grad_norm: 0.8808227613981153, iteration: 48364
loss: 0.9508293271064758,grad_norm: 0.9999993107836699, iteration: 48365
loss: 1.0046169757843018,grad_norm: 0.9468311253915774, iteration: 48366
loss: 1.034723162651062,grad_norm: 0.9999991919724094, iteration: 48367
loss: 0.9793208837509155,grad_norm: 0.7770956526848752, iteration: 48368
loss: 0.979173481464386,grad_norm: 0.9999995362825423, iteration: 48369
loss: 1.0125198364257812,grad_norm: 0.9978328255683754, iteration: 48370
loss: 1.0277434587478638,grad_norm: 0.9999991566724804, iteration: 48371
loss: 1.0049947500228882,grad_norm: 0.9999999459400905, iteration: 48372
loss: 1.0104402303695679,grad_norm: 0.99999929577971, iteration: 48373
loss: 1.0349092483520508,grad_norm: 0.9999993802790342, iteration: 48374
loss: 0.9925604462623596,grad_norm: 0.9999990244723027, iteration: 48375
loss: 1.0140875577926636,grad_norm: 0.9198148983836462, iteration: 48376
loss: 0.9932246208190918,grad_norm: 0.9553171299194438, iteration: 48377
loss: 1.0060086250305176,grad_norm: 0.9770684200736914, iteration: 48378
loss: 0.9924418330192566,grad_norm: 0.9999990011408394, iteration: 48379
loss: 1.024857759475708,grad_norm: 0.9999999465882721, iteration: 48380
loss: 1.013984203338623,grad_norm: 0.9999990589527201, iteration: 48381
loss: 0.9778764843940735,grad_norm: 0.9999991777971997, iteration: 48382
loss: 1.018431305885315,grad_norm: 0.9999997910699706, iteration: 48383
loss: 1.0026370286941528,grad_norm: 0.99999960128678, iteration: 48384
loss: 1.0116809606552124,grad_norm: 0.9135408292537149, iteration: 48385
loss: 1.0084102153778076,grad_norm: 0.9999992890116234, iteration: 48386
loss: 0.9912212491035461,grad_norm: 0.9999992353934287, iteration: 48387
loss: 1.075660228729248,grad_norm: 0.9999996223954123, iteration: 48388
loss: 0.9880837202072144,grad_norm: 0.7633553887456545, iteration: 48389
loss: 1.0193521976470947,grad_norm: 0.9999991021884043, iteration: 48390
loss: 0.9769007563591003,grad_norm: 0.9941641824223857, iteration: 48391
loss: 1.0081778764724731,grad_norm: 0.9555037726354874, iteration: 48392
loss: 1.0729522705078125,grad_norm: 0.9999997875044803, iteration: 48393
loss: 1.0199542045593262,grad_norm: 0.9678337574711823, iteration: 48394
loss: 1.0010443925857544,grad_norm: 0.9999991582653067, iteration: 48395
loss: 1.0265045166015625,grad_norm: 0.9999992301485399, iteration: 48396
loss: 1.0227643251419067,grad_norm: 0.9999994829416105, iteration: 48397
loss: 1.0208909511566162,grad_norm: 0.7987894113099129, iteration: 48398
loss: 1.000731348991394,grad_norm: 0.987060632754835, iteration: 48399
loss: 1.0485286712646484,grad_norm: 0.9999994722972376, iteration: 48400
loss: 1.0409847497940063,grad_norm: 0.9935495070341721, iteration: 48401
loss: 1.0230318307876587,grad_norm: 0.9027576718741057, iteration: 48402
loss: 0.9787461757659912,grad_norm: 0.8924586749010058, iteration: 48403
loss: 0.9993059039115906,grad_norm: 0.7541862701242503, iteration: 48404
loss: 0.9868666529655457,grad_norm: 0.8298739009107192, iteration: 48405
loss: 1.0446507930755615,grad_norm: 0.9999992216177168, iteration: 48406
loss: 1.0070117712020874,grad_norm: 0.9282181351255493, iteration: 48407
loss: 1.058470606803894,grad_norm: 0.9999992221380427, iteration: 48408
loss: 0.9897819757461548,grad_norm: 0.9999992634966461, iteration: 48409
loss: 1.0558958053588867,grad_norm: 0.9999994207516263, iteration: 48410
loss: 1.0081013441085815,grad_norm: 0.9041458893635118, iteration: 48411
loss: 1.0341269969940186,grad_norm: 0.9866176151661789, iteration: 48412
loss: 1.0673906803131104,grad_norm: 0.9999995001435659, iteration: 48413
loss: 0.9528481364250183,grad_norm: 0.9205969411552992, iteration: 48414
loss: 1.0290366411209106,grad_norm: 0.9999991762461715, iteration: 48415
loss: 1.020603060722351,grad_norm: 0.9999995293543088, iteration: 48416
loss: 1.0490671396255493,grad_norm: 0.999999423737105, iteration: 48417
loss: 0.9864716529846191,grad_norm: 0.9999992498458731, iteration: 48418
loss: 0.9761108756065369,grad_norm: 0.9999991050467072, iteration: 48419
loss: 1.0509023666381836,grad_norm: 0.999999322477528, iteration: 48420
loss: 1.0190353393554688,grad_norm: 0.9301562596283064, iteration: 48421
loss: 1.0124101638793945,grad_norm: 0.8148687438708012, iteration: 48422
loss: 1.003091812133789,grad_norm: 0.8659011084116599, iteration: 48423
loss: 1.1034842729568481,grad_norm: 0.9999994173996551, iteration: 48424
loss: 1.0958539247512817,grad_norm: 1.000000001972951, iteration: 48425
loss: 0.9939176440238953,grad_norm: 0.9999990703258175, iteration: 48426
loss: 1.0090694427490234,grad_norm: 0.9999997895849408, iteration: 48427
loss: 0.9984990358352661,grad_norm: 0.9951247165197771, iteration: 48428
loss: 1.0146409273147583,grad_norm: 0.9999994258247678, iteration: 48429
loss: 1.0058917999267578,grad_norm: 0.963562572387693, iteration: 48430
loss: 1.0205745697021484,grad_norm: 0.9999991345300533, iteration: 48431
loss: 0.989488959312439,grad_norm: 0.9257336174795653, iteration: 48432
loss: 0.9879047870635986,grad_norm: 0.9999990306818537, iteration: 48433
loss: 0.9871736764907837,grad_norm: 0.8587362572011666, iteration: 48434
loss: 0.9878625869750977,grad_norm: 0.9954533954468006, iteration: 48435
loss: 0.9468128085136414,grad_norm: 0.8508970938180855, iteration: 48436
loss: 1.0424081087112427,grad_norm: 0.9999993440410527, iteration: 48437
loss: 1.019004464149475,grad_norm: 0.845961159671306, iteration: 48438
loss: 1.0303410291671753,grad_norm: 0.9999991986564475, iteration: 48439
loss: 0.9993690252304077,grad_norm: 0.9875742996778156, iteration: 48440
loss: 0.9932877421379089,grad_norm: 0.9515443242008004, iteration: 48441
loss: 1.0203851461410522,grad_norm: 0.999999565823382, iteration: 48442
loss: 0.9987139701843262,grad_norm: 0.9999990594363234, iteration: 48443
loss: 1.0657780170440674,grad_norm: 0.999999367750328, iteration: 48444
loss: 1.0551403760910034,grad_norm: 0.9999993508396862, iteration: 48445
loss: 1.097676157951355,grad_norm: 0.9999999308395408, iteration: 48446
loss: 1.0468827486038208,grad_norm: 0.9999991353793247, iteration: 48447
loss: 1.0535486936569214,grad_norm: 0.9999996995996595, iteration: 48448
loss: 0.9706255197525024,grad_norm: 0.9654969039936504, iteration: 48449
loss: 0.9999165534973145,grad_norm: 0.999999084858714, iteration: 48450
loss: 0.9665252566337585,grad_norm: 0.8213819171251908, iteration: 48451
loss: 1.012360692024231,grad_norm: 0.8392335750611395, iteration: 48452
loss: 1.0034306049346924,grad_norm: 0.9999990645862435, iteration: 48453
loss: 0.9898375868797302,grad_norm: 0.9999994035278934, iteration: 48454
loss: 1.0356976985931396,grad_norm: 0.9917771674188683, iteration: 48455
loss: 1.0283702611923218,grad_norm: 0.9999997258901285, iteration: 48456
loss: 1.0940179824829102,grad_norm: 0.9999992433598255, iteration: 48457
loss: 0.978926420211792,grad_norm: 0.7962950647815243, iteration: 48458
loss: 1.027034044265747,grad_norm: 0.999999194281035, iteration: 48459
loss: 1.0275002717971802,grad_norm: 0.9999993078308103, iteration: 48460
loss: 0.9978790283203125,grad_norm: 0.8183811712200219, iteration: 48461
loss: 1.0097116231918335,grad_norm: 0.8430661801241028, iteration: 48462
loss: 1.0464227199554443,grad_norm: 0.9999993959164949, iteration: 48463
loss: 1.0178083181381226,grad_norm: 0.9999995634360892, iteration: 48464
loss: 1.0080718994140625,grad_norm: 0.9508958088409932, iteration: 48465
loss: 0.975959062576294,grad_norm: 0.8430557081457805, iteration: 48466
loss: 1.08185875415802,grad_norm: 0.9999993161008602, iteration: 48467
loss: 0.9975378513336182,grad_norm: 0.9999990530929266, iteration: 48468
loss: 0.987682580947876,grad_norm: 0.8710510880525714, iteration: 48469
loss: 0.9789374470710754,grad_norm: 0.999998944524582, iteration: 48470
loss: 1.012526273727417,grad_norm: 0.9999991919636582, iteration: 48471
loss: 0.9818967580795288,grad_norm: 0.9231249042453901, iteration: 48472
loss: 0.9860421419143677,grad_norm: 0.9999990737166585, iteration: 48473
loss: 0.9972465634346008,grad_norm: 0.9148781047257074, iteration: 48474
loss: 1.0451470613479614,grad_norm: 0.9999997644634985, iteration: 48475
loss: 0.9700174927711487,grad_norm: 0.999998880466558, iteration: 48476
loss: 1.0039583444595337,grad_norm: 0.9999991131631748, iteration: 48477
loss: 1.0122474431991577,grad_norm: 0.9035979536636549, iteration: 48478
loss: 1.06041419506073,grad_norm: 0.8816540856999745, iteration: 48479
loss: 1.003832459449768,grad_norm: 0.999999240955167, iteration: 48480
loss: 0.9829602241516113,grad_norm: 0.9736761923523054, iteration: 48481
loss: 1.0149558782577515,grad_norm: 0.9999993609424523, iteration: 48482
loss: 1.007624626159668,grad_norm: 0.7947744127405485, iteration: 48483
loss: 1.0118122100830078,grad_norm: 0.9999996248185232, iteration: 48484
loss: 0.9845673441886902,grad_norm: 0.861313283383711, iteration: 48485
loss: 0.987180233001709,grad_norm: 0.8985646057764716, iteration: 48486
loss: 0.9957849383354187,grad_norm: 0.9999990528503241, iteration: 48487
loss: 0.9956893920898438,grad_norm: 0.9310033762657032, iteration: 48488
loss: 0.9932888746261597,grad_norm: 0.7652671590592038, iteration: 48489
loss: 0.9847854971885681,grad_norm: 0.9055262370309352, iteration: 48490
loss: 1.0650358200073242,grad_norm: 0.9999992578013052, iteration: 48491
loss: 0.9915356636047363,grad_norm: 0.7712381476575073, iteration: 48492
loss: 1.0490037202835083,grad_norm: 0.9999995909571762, iteration: 48493
loss: 1.0300313234329224,grad_norm: 0.9999992409037508, iteration: 48494
loss: 1.057536005973816,grad_norm: 0.9375567597122796, iteration: 48495
loss: 1.006417155265808,grad_norm: 0.8320073944385467, iteration: 48496
loss: 1.004162311553955,grad_norm: 0.9999993760760261, iteration: 48497
loss: 1.0029613971710205,grad_norm: 0.8307354798821498, iteration: 48498
loss: 0.9715609550476074,grad_norm: 0.999999429857119, iteration: 48499
loss: 1.0392680168151855,grad_norm: 0.9999997167364398, iteration: 48500
loss: 1.0251322984695435,grad_norm: 0.9999989338281582, iteration: 48501
loss: 1.0187002420425415,grad_norm: 0.9999991920099714, iteration: 48502
loss: 1.0020173788070679,grad_norm: 0.9999991216995368, iteration: 48503
loss: 0.9582808613777161,grad_norm: 0.9999990203326735, iteration: 48504
loss: 1.0587754249572754,grad_norm: 0.9999994367735252, iteration: 48505
loss: 1.0142509937286377,grad_norm: 0.7996066848151617, iteration: 48506
loss: 0.9838850498199463,grad_norm: 0.9026627858124595, iteration: 48507
loss: 1.0564637184143066,grad_norm: 0.9201433914761459, iteration: 48508
loss: 1.0069916248321533,grad_norm: 0.9999995516378791, iteration: 48509
loss: 0.9872514605522156,grad_norm: 0.8627907538269416, iteration: 48510
loss: 1.0073884725570679,grad_norm: 0.9999993143970352, iteration: 48511
loss: 0.9994547367095947,grad_norm: 0.9999992291419495, iteration: 48512
loss: 0.997130274772644,grad_norm: 0.9749091684247312, iteration: 48513
loss: 0.993946373462677,grad_norm: 0.8078215876035574, iteration: 48514
loss: 1.0053670406341553,grad_norm: 0.9999995734405152, iteration: 48515
loss: 1.0214895009994507,grad_norm: 0.9589501025337887, iteration: 48516
loss: 0.9809005856513977,grad_norm: 0.9999991455794983, iteration: 48517
loss: 0.9439710974693298,grad_norm: 0.7351644743961752, iteration: 48518
loss: 1.0172010660171509,grad_norm: 0.9999990660398977, iteration: 48519
loss: 0.9776077270507812,grad_norm: 0.9999995402724842, iteration: 48520
loss: 0.9925905466079712,grad_norm: 0.8860148461655589, iteration: 48521
loss: 1.0102903842926025,grad_norm: 0.9999990256896885, iteration: 48522
loss: 1.0419466495513916,grad_norm: 0.9999993353872124, iteration: 48523
loss: 1.0645103454589844,grad_norm: 0.9999997496819403, iteration: 48524
loss: 0.9509742856025696,grad_norm: 0.999999116722745, iteration: 48525
loss: 0.9831522703170776,grad_norm: 0.9999991247884049, iteration: 48526
loss: 1.0234432220458984,grad_norm: 0.9999997402321111, iteration: 48527
loss: 0.9724494218826294,grad_norm: 0.8472554411977998, iteration: 48528
loss: 0.996680736541748,grad_norm: 0.7943171140111144, iteration: 48529
loss: 1.003256916999817,grad_norm: 0.9999989480522075, iteration: 48530
loss: 1.014913558959961,grad_norm: 0.9999990745245487, iteration: 48531
loss: 1.0172182321548462,grad_norm: 0.8300867420697627, iteration: 48532
loss: 1.020797848701477,grad_norm: 0.9243744534552606, iteration: 48533
loss: 1.0052837133407593,grad_norm: 0.9999990235051106, iteration: 48534
loss: 1.0314114093780518,grad_norm: 0.9999997001033927, iteration: 48535
loss: 1.0224201679229736,grad_norm: 0.9999991682685326, iteration: 48536
loss: 1.0272305011749268,grad_norm: 0.9999997614031817, iteration: 48537
loss: 1.022284984588623,grad_norm: 0.9999992229098381, iteration: 48538
loss: 1.065664529800415,grad_norm: 0.9999991349559021, iteration: 48539
loss: 1.0201952457427979,grad_norm: 0.9999991914270038, iteration: 48540
loss: 1.037007212638855,grad_norm: 0.9920276651870895, iteration: 48541
loss: 0.9929195642471313,grad_norm: 0.9024707197987215, iteration: 48542
loss: 1.0111194849014282,grad_norm: 0.9702798969445919, iteration: 48543
loss: 1.0460612773895264,grad_norm: 0.8049381589311598, iteration: 48544
loss: 1.0376615524291992,grad_norm: 0.9999992930205547, iteration: 48545
loss: 1.0731757879257202,grad_norm: 0.9064590264747888, iteration: 48546
loss: 0.9681012034416199,grad_norm: 0.9531703793030314, iteration: 48547
loss: 1.0321557521820068,grad_norm: 0.9999991393638686, iteration: 48548
loss: 1.084418535232544,grad_norm: 0.9999996002522885, iteration: 48549
loss: 1.0442448854446411,grad_norm: 0.9999994728276311, iteration: 48550
loss: 1.0552538633346558,grad_norm: 0.9999998498377121, iteration: 48551
loss: 1.0020970106124878,grad_norm: 0.7938432947506707, iteration: 48552
loss: 1.0207195281982422,grad_norm: 0.9999994096850462, iteration: 48553
loss: 1.0217158794403076,grad_norm: 0.9999992880588509, iteration: 48554
loss: 1.2039340734481812,grad_norm: 0.9999996951025236, iteration: 48555
loss: 1.2802990674972534,grad_norm: 0.9999993801502796, iteration: 48556
loss: 1.0938085317611694,grad_norm: 0.9999991816542941, iteration: 48557
loss: 1.0919294357299805,grad_norm: 0.9999994062316545, iteration: 48558
loss: 1.2194043397903442,grad_norm: 0.9999999065190714, iteration: 48559
loss: 1.142236590385437,grad_norm: 0.999999389408548, iteration: 48560
loss: 1.1556270122528076,grad_norm: 0.9999995538584595, iteration: 48561
loss: 1.102744221687317,grad_norm: 0.999999099604599, iteration: 48562
loss: 1.1523826122283936,grad_norm: 0.9999991967728936, iteration: 48563
loss: 1.2657376527786255,grad_norm: 0.9999996584946946, iteration: 48564
loss: 1.3337514400482178,grad_norm: 1.000000067412443, iteration: 48565
loss: 1.2916840314865112,grad_norm: 0.9999998583183433, iteration: 48566
loss: 1.246636986732483,grad_norm: 0.9999993686671557, iteration: 48567
loss: 1.283753514289856,grad_norm: 0.9999995510540651, iteration: 48568
loss: 1.3897054195404053,grad_norm: 0.999999776080807, iteration: 48569
loss: 1.360832929611206,grad_norm: 0.9999997362644171, iteration: 48570
loss: 1.2113730907440186,grad_norm: 0.9999994922547407, iteration: 48571
loss: 1.10194730758667,grad_norm: 0.9999999100489992, iteration: 48572
loss: 1.2731366157531738,grad_norm: 0.9999999315290666, iteration: 48573
loss: 1.401512861251831,grad_norm: 0.9999999640559529, iteration: 48574
loss: 1.2100906372070312,grad_norm: 0.9999999136498292, iteration: 48575
loss: 1.1511709690093994,grad_norm: 0.999999835117125, iteration: 48576
loss: 1.3768205642700195,grad_norm: 0.9999994780417362, iteration: 48577
loss: 1.238061785697937,grad_norm: 0.999999605887859, iteration: 48578
loss: 1.140892744064331,grad_norm: 1.0000000214803026, iteration: 48579
loss: 1.1786038875579834,grad_norm: 0.9999998664291956, iteration: 48580
loss: 1.1759424209594727,grad_norm: 0.9999998397204595, iteration: 48581
loss: 1.2107934951782227,grad_norm: 0.9999998780152839, iteration: 48582
loss: 1.2362699508666992,grad_norm: 0.999999479924852, iteration: 48583
loss: 1.1398310661315918,grad_norm: 0.9999995822963659, iteration: 48584
loss: 1.1795639991760254,grad_norm: 0.99999971223056, iteration: 48585
loss: 1.1760821342468262,grad_norm: 0.999999812684306, iteration: 48586
loss: 1.1648508310317993,grad_norm: 0.999999683291892, iteration: 48587
loss: 1.099291443824768,grad_norm: 0.9999996351355045, iteration: 48588
loss: 1.0873030424118042,grad_norm: 1.000000038599359, iteration: 48589
loss: 0.9915637373924255,grad_norm: 0.9576851299124516, iteration: 48590
loss: 1.1635513305664062,grad_norm: 0.9999996997677063, iteration: 48591
loss: 1.0237597227096558,grad_norm: 0.9999997961770495, iteration: 48592
loss: 1.04068922996521,grad_norm: 0.9448615061397663, iteration: 48593
loss: 1.1257632970809937,grad_norm: 0.9999991682770242, iteration: 48594
loss: 1.088512659072876,grad_norm: 0.999999879081726, iteration: 48595
loss: 1.0220354795455933,grad_norm: 0.9999993168758997, iteration: 48596
loss: 0.995155394077301,grad_norm: 0.9983325439832104, iteration: 48597
loss: 1.0019772052764893,grad_norm: 0.9999991097872246, iteration: 48598
loss: 1.0499173402786255,grad_norm: 0.9999992107449028, iteration: 48599
loss: 1.0893261432647705,grad_norm: 0.9999992452047103, iteration: 48600
loss: 1.0018913745880127,grad_norm: 0.9270375539358395, iteration: 48601
loss: 1.00742769241333,grad_norm: 0.9999993172563415, iteration: 48602
loss: 1.0524423122406006,grad_norm: 0.9713134460449936, iteration: 48603
loss: 0.9969569444656372,grad_norm: 0.8115290436072343, iteration: 48604
loss: 1.0726205110549927,grad_norm: 0.9999999293147814, iteration: 48605
loss: 1.0022072792053223,grad_norm: 0.9307570297360352, iteration: 48606
loss: 0.9948710203170776,grad_norm: 0.9999990527845977, iteration: 48607
loss: 1.0242414474487305,grad_norm: 0.8849233815158716, iteration: 48608
loss: 1.0281587839126587,grad_norm: 0.9999993287388398, iteration: 48609
loss: 1.129012107849121,grad_norm: 0.9999995600994414, iteration: 48610
loss: 1.1168276071548462,grad_norm: 0.949185580891321, iteration: 48611
loss: 0.9987373948097229,grad_norm: 0.9999991730860928, iteration: 48612
loss: 1.0102301836013794,grad_norm: 0.9999998056949909, iteration: 48613
loss: 1.0945383310317993,grad_norm: 0.9999993884780615, iteration: 48614
loss: 1.0684335231781006,grad_norm: 0.999999002461304, iteration: 48615
loss: 0.9962437748908997,grad_norm: 0.8305699038987495, iteration: 48616
loss: 1.0038844347000122,grad_norm: 0.9999993761556433, iteration: 48617
loss: 0.9661154747009277,grad_norm: 0.970987176253155, iteration: 48618
loss: 0.9699509739875793,grad_norm: 0.9999998919009468, iteration: 48619
loss: 0.996724545955658,grad_norm: 0.9509115943090513, iteration: 48620
loss: 0.9947429895401001,grad_norm: 0.9999991713790307, iteration: 48621
loss: 1.0100148916244507,grad_norm: 0.999999743046489, iteration: 48622
loss: 0.9663479328155518,grad_norm: 0.9888628383851227, iteration: 48623
loss: 1.0077661275863647,grad_norm: 0.9545894136209171, iteration: 48624
loss: 1.0718148946762085,grad_norm: 0.9999998313054316, iteration: 48625
loss: 1.0075212717056274,grad_norm: 0.9999992868240994, iteration: 48626
loss: 1.0376180410385132,grad_norm: 0.9181579669693778, iteration: 48627
loss: 1.025825023651123,grad_norm: 0.7617054758916587, iteration: 48628
loss: 1.0069282054901123,grad_norm: 0.8720301235365343, iteration: 48629
loss: 0.9843368530273438,grad_norm: 0.9456021438436726, iteration: 48630
loss: 1.0080292224884033,grad_norm: 0.8551241390812536, iteration: 48631
loss: 0.9859095811843872,grad_norm: 0.9278358818350796, iteration: 48632
loss: 1.0559136867523193,grad_norm: 0.9999991673867611, iteration: 48633
loss: 1.0231493711471558,grad_norm: 0.9999990722335476, iteration: 48634
loss: 0.9947076439857483,grad_norm: 0.9173123064722448, iteration: 48635
loss: 1.0396226644515991,grad_norm: 0.7735222751728988, iteration: 48636
loss: 1.0155056715011597,grad_norm: 0.9999996341723719, iteration: 48637
loss: 0.9978847503662109,grad_norm: 0.9999991992546643, iteration: 48638
loss: 1.0107046365737915,grad_norm: 0.999999089432453, iteration: 48639
loss: 0.9895223379135132,grad_norm: 0.8226035257119033, iteration: 48640
loss: 1.00205397605896,grad_norm: 0.8229937176387968, iteration: 48641
loss: 1.0560632944107056,grad_norm: 0.9999994946797445, iteration: 48642
loss: 1.0396891832351685,grad_norm: 0.9999990035486994, iteration: 48643
loss: 1.0474520921707153,grad_norm: 0.8309657850148929, iteration: 48644
loss: 1.0014519691467285,grad_norm: 0.9999993607070615, iteration: 48645
loss: 0.9970898628234863,grad_norm: 0.9999990484176884, iteration: 48646
loss: 1.0159776210784912,grad_norm: 0.8569642613340133, iteration: 48647
loss: 0.993095338344574,grad_norm: 0.992350537746772, iteration: 48648
loss: 1.0091273784637451,grad_norm: 0.9999990652194073, iteration: 48649
loss: 0.9800573587417603,grad_norm: 0.9999991260509594, iteration: 48650
loss: 1.0228098630905151,grad_norm: 0.8575378837410733, iteration: 48651
loss: 1.055177927017212,grad_norm: 0.9999990846056037, iteration: 48652
loss: 0.9969804883003235,grad_norm: 0.840959129633266, iteration: 48653
loss: 0.9826405644416809,grad_norm: 0.8123246033408285, iteration: 48654
loss: 1.0322935581207275,grad_norm: 0.9999991938887168, iteration: 48655
loss: 1.0184102058410645,grad_norm: 0.9999989963997429, iteration: 48656
loss: 1.0068771839141846,grad_norm: 0.9970156762683434, iteration: 48657
loss: 0.9801668524742126,grad_norm: 0.9999990948839174, iteration: 48658
loss: 0.9940713047981262,grad_norm: 0.9999990885717099, iteration: 48659
loss: 1.034529447555542,grad_norm: 0.9999997752205683, iteration: 48660
loss: 0.9900730848312378,grad_norm: 0.9857754861394292, iteration: 48661
loss: 1.005162000656128,grad_norm: 0.9999995459535924, iteration: 48662
loss: 1.0074830055236816,grad_norm: 0.8767528401937226, iteration: 48663
loss: 1.0098540782928467,grad_norm: 0.8903402960330535, iteration: 48664
loss: 0.9949537515640259,grad_norm: 0.6647662088364147, iteration: 48665
loss: 1.069383144378662,grad_norm: 0.999999803950002, iteration: 48666
loss: 1.0283994674682617,grad_norm: 0.9999991066929775, iteration: 48667
loss: 1.0159486532211304,grad_norm: 0.9526701109741164, iteration: 48668
loss: 1.0177416801452637,grad_norm: 0.956312884642865, iteration: 48669
loss: 1.0264779329299927,grad_norm: 0.9999991016286157, iteration: 48670
loss: 0.9987260699272156,grad_norm: 0.9631064714757859, iteration: 48671
loss: 1.0184603929519653,grad_norm: 0.9029415167143033, iteration: 48672
loss: 0.9662743806838989,grad_norm: 0.8588105388198941, iteration: 48673
loss: 1.001478910446167,grad_norm: 0.9958277000455135, iteration: 48674
loss: 0.9815307855606079,grad_norm: 0.9755802763891853, iteration: 48675
loss: 0.9990533590316772,grad_norm: 0.999998991103376, iteration: 48676
loss: 1.0059469938278198,grad_norm: 0.8555009173999055, iteration: 48677
loss: 1.0346970558166504,grad_norm: 0.9999990644279685, iteration: 48678
loss: 0.9766653180122375,grad_norm: 0.9411345429593331, iteration: 48679
loss: 1.0328103303909302,grad_norm: 0.999999676684522, iteration: 48680
loss: 1.0277738571166992,grad_norm: 0.9999990026143877, iteration: 48681
loss: 0.9837195873260498,grad_norm: 0.8595791568357075, iteration: 48682
loss: 1.0074695348739624,grad_norm: 0.9999991642539813, iteration: 48683
loss: 1.050837516784668,grad_norm: 0.9999992273392684, iteration: 48684
loss: 1.0096495151519775,grad_norm: 0.917552038416966, iteration: 48685
loss: 1.0194011926651,grad_norm: 0.9510450889894085, iteration: 48686
loss: 0.9905058145523071,grad_norm: 0.8660074815985754, iteration: 48687
loss: 1.0544874668121338,grad_norm: 0.9999995154515186, iteration: 48688
loss: 0.9799325466156006,grad_norm: 0.9367647720916004, iteration: 48689
loss: 1.0038976669311523,grad_norm: 0.8217643327252373, iteration: 48690
loss: 1.0055487155914307,grad_norm: 0.9999991790834991, iteration: 48691
loss: 1.0413099527359009,grad_norm: 0.9999993425005596, iteration: 48692
loss: 1.0544122457504272,grad_norm: 0.8756990085924988, iteration: 48693
loss: 0.9869192838668823,grad_norm: 0.9939082543314066, iteration: 48694
loss: 1.00385320186615,grad_norm: 0.9999990732899329, iteration: 48695
loss: 1.0559649467468262,grad_norm: 0.9999992134925464, iteration: 48696
loss: 1.0169094800949097,grad_norm: 0.9999990260732525, iteration: 48697
loss: 1.0193027257919312,grad_norm: 0.835799324416042, iteration: 48698
loss: 1.2705180644989014,grad_norm: 0.999999728450198, iteration: 48699
loss: 0.9871753454208374,grad_norm: 0.9763911691226453, iteration: 48700
loss: 1.052945852279663,grad_norm: 0.9712550452408772, iteration: 48701
loss: 0.9962383508682251,grad_norm: 0.8911334685634609, iteration: 48702
loss: 1.0082483291625977,grad_norm: 0.9604392034230052, iteration: 48703
loss: 0.9747462868690491,grad_norm: 0.9999992190542245, iteration: 48704
loss: 0.9805504679679871,grad_norm: 0.9999992420732481, iteration: 48705
loss: 1.0094107389450073,grad_norm: 0.8193354312913484, iteration: 48706
loss: 0.9787876009941101,grad_norm: 0.99999934755365, iteration: 48707
loss: 0.9637110829353333,grad_norm: 0.9662985427770234, iteration: 48708
loss: 1.0986888408660889,grad_norm: 0.9999997283499816, iteration: 48709
loss: 1.0124746561050415,grad_norm: 0.9309191539687915, iteration: 48710
loss: 1.0063247680664062,grad_norm: 0.9999994980892485, iteration: 48711
loss: 0.9679803252220154,grad_norm: 0.7564361051611397, iteration: 48712
loss: 1.0639910697937012,grad_norm: 0.999999283289047, iteration: 48713
loss: 1.2081596851348877,grad_norm: 0.9999997798102285, iteration: 48714
loss: 0.9775659441947937,grad_norm: 0.9999990965314773, iteration: 48715
loss: 1.1817984580993652,grad_norm: 0.9999993756333897, iteration: 48716
loss: 1.194931983947754,grad_norm: 0.9999992726113325, iteration: 48717
loss: 1.017981767654419,grad_norm: 0.9999992009511937, iteration: 48718
loss: 0.9970667362213135,grad_norm: 0.9634887477517899, iteration: 48719
loss: 1.209086537361145,grad_norm: 0.9999997941989152, iteration: 48720
loss: 0.9856616258621216,grad_norm: 0.9999991921040945, iteration: 48721
loss: 1.0153331756591797,grad_norm: 0.9473706846238736, iteration: 48722
loss: 1.0011630058288574,grad_norm: 0.9683900053629078, iteration: 48723
loss: 0.9565765261650085,grad_norm: 0.9436864420619604, iteration: 48724
loss: 1.0107760429382324,grad_norm: 0.8827868830231891, iteration: 48725
loss: 1.0413365364074707,grad_norm: 0.9999993166944471, iteration: 48726
loss: 1.0164966583251953,grad_norm: 0.7998020219104872, iteration: 48727
loss: 0.9881790280342102,grad_norm: 0.7235733478073915, iteration: 48728
loss: 1.0249285697937012,grad_norm: 0.9999992520901174, iteration: 48729
loss: 1.0494639873504639,grad_norm: 0.9999992653580647, iteration: 48730
loss: 1.0052533149719238,grad_norm: 0.9999994670999299, iteration: 48731
loss: 0.9986324310302734,grad_norm: 0.8026590770194086, iteration: 48732
loss: 1.0104321241378784,grad_norm: 0.9718605306658155, iteration: 48733
loss: 1.0070194005966187,grad_norm: 0.9103341405241603, iteration: 48734
loss: 1.005418062210083,grad_norm: 0.9999996978236351, iteration: 48735
loss: 1.0369418859481812,grad_norm: 0.9999989954452169, iteration: 48736
loss: 0.9651179313659668,grad_norm: 0.8810915968791999, iteration: 48737
loss: 1.0328333377838135,grad_norm: 0.9999992787326686, iteration: 48738
loss: 1.065016746520996,grad_norm: 0.9999992096371543, iteration: 48739
loss: 1.019884467124939,grad_norm: 0.8167852331506222, iteration: 48740
loss: 0.9779719114303589,grad_norm: 0.9999990537129811, iteration: 48741
loss: 1.0136700868606567,grad_norm: 0.9615997288669443, iteration: 48742
loss: 0.9741334319114685,grad_norm: 0.9735726065439847, iteration: 48743
loss: 1.01235032081604,grad_norm: 0.9999990273211353, iteration: 48744
loss: 1.0207995176315308,grad_norm: 0.9999995872898009, iteration: 48745
loss: 0.9876459240913391,grad_norm: 0.9985228558235197, iteration: 48746
loss: 1.0069921016693115,grad_norm: 0.8007189147376478, iteration: 48747
loss: 0.9913599491119385,grad_norm: 0.9422676601051775, iteration: 48748
loss: 0.999965488910675,grad_norm: 0.9999994231357209, iteration: 48749
loss: 1.0300520658493042,grad_norm: 0.8654165244353513, iteration: 48750
loss: 0.956779956817627,grad_norm: 0.9080965773246754, iteration: 48751
loss: 0.9841020107269287,grad_norm: 0.9999989785487168, iteration: 48752
loss: 0.9998261332511902,grad_norm: 0.9435976468763854, iteration: 48753
loss: 1.0004771947860718,grad_norm: 0.949129974401916, iteration: 48754
loss: 1.0079704523086548,grad_norm: 0.9025316039042591, iteration: 48755
loss: 1.062915563583374,grad_norm: 0.9999999622475604, iteration: 48756
loss: 0.9978245496749878,grad_norm: 0.9544139797171004, iteration: 48757
loss: 0.9640912413597107,grad_norm: 0.9931398034040493, iteration: 48758
loss: 1.0008982419967651,grad_norm: 0.9420073990663567, iteration: 48759
loss: 1.0278680324554443,grad_norm: 0.9999993914475629, iteration: 48760
loss: 1.0144072771072388,grad_norm: 0.8275204687241652, iteration: 48761
loss: 1.0011258125305176,grad_norm: 0.9999989718051576, iteration: 48762
loss: 1.0023980140686035,grad_norm: 0.9327430461231063, iteration: 48763
loss: 0.9922269582748413,grad_norm: 0.9999991587023488, iteration: 48764
loss: 0.9919588565826416,grad_norm: 0.9999991065941267, iteration: 48765
loss: 1.0186268091201782,grad_norm: 0.9999990769325362, iteration: 48766
loss: 1.012937307357788,grad_norm: 0.8607680221732106, iteration: 48767
loss: 1.02578866481781,grad_norm: 1.000000017109871, iteration: 48768
loss: 1.030219316482544,grad_norm: 0.9999991395735975, iteration: 48769
loss: 1.008012294769287,grad_norm: 0.999999097147244, iteration: 48770
loss: 0.9771935939788818,grad_norm: 0.9385885569376641, iteration: 48771
loss: 0.9990388751029968,grad_norm: 0.9999990639493564, iteration: 48772
loss: 0.9908583760261536,grad_norm: 0.9999992070885242, iteration: 48773
loss: 1.0100771188735962,grad_norm: 0.9999995962931174, iteration: 48774
loss: 1.0634230375289917,grad_norm: 0.9999995105965889, iteration: 48775
loss: 0.994201123714447,grad_norm: 0.9999992896283347, iteration: 48776
loss: 1.0138888359069824,grad_norm: 0.8845462587521954, iteration: 48777
loss: 0.9916194677352905,grad_norm: 0.9364216591278914, iteration: 48778
loss: 1.0342223644256592,grad_norm: 0.9999990729246683, iteration: 48779
loss: 1.0279226303100586,grad_norm: 0.819706292304215, iteration: 48780
loss: 1.0511053800582886,grad_norm: 0.9999996435644066, iteration: 48781
loss: 1.0166772603988647,grad_norm: 0.9999994350352364, iteration: 48782
loss: 1.0047972202301025,grad_norm: 0.9999991173915941, iteration: 48783
loss: 1.042191505432129,grad_norm: 0.99999923351176, iteration: 48784
loss: 1.0465115308761597,grad_norm: 0.9572895666610377, iteration: 48785
loss: 1.0130645036697388,grad_norm: 0.9999991373276834, iteration: 48786
loss: 1.0137476921081543,grad_norm: 0.9382669269747846, iteration: 48787
loss: 0.9828903675079346,grad_norm: 0.9999991516411875, iteration: 48788
loss: 0.9748223423957825,grad_norm: 0.8688116416519449, iteration: 48789
loss: 0.9878128170967102,grad_norm: 0.9318489757264002, iteration: 48790
loss: 1.0336271524429321,grad_norm: 0.8691904404920335, iteration: 48791
loss: 1.0275685787200928,grad_norm: 0.999999354594412, iteration: 48792
loss: 1.0000871419906616,grad_norm: 0.9575263100002184, iteration: 48793
loss: 1.0222920179367065,grad_norm: 0.9999994449160828, iteration: 48794
loss: 0.9849411249160767,grad_norm: 0.9999990981953158, iteration: 48795
loss: 1.0074577331542969,grad_norm: 0.9999989604160304, iteration: 48796
loss: 0.9769567251205444,grad_norm: 0.9999994667215997, iteration: 48797
loss: 1.0011736154556274,grad_norm: 0.9999992528832022, iteration: 48798
loss: 1.0095220804214478,grad_norm: 0.9999990853279691, iteration: 48799
loss: 0.9939085245132446,grad_norm: 0.8779776546848909, iteration: 48800
loss: 1.0115629434585571,grad_norm: 0.9172360658888987, iteration: 48801
loss: 0.9854053258895874,grad_norm: 0.9999990230064253, iteration: 48802
loss: 1.0259974002838135,grad_norm: 0.999999411445639, iteration: 48803
loss: 1.0170985460281372,grad_norm: 0.8560800846412953, iteration: 48804
loss: 1.005879521369934,grad_norm: 0.7573923862861925, iteration: 48805
loss: 1.037826657295227,grad_norm: 0.9837912873497779, iteration: 48806
loss: 1.0234906673431396,grad_norm: 0.9999995865519322, iteration: 48807
loss: 1.012884259223938,grad_norm: 0.9999990889685503, iteration: 48808
loss: 1.02936851978302,grad_norm: 0.999999148887197, iteration: 48809
loss: 1.01677405834198,grad_norm: 0.9231728744327222, iteration: 48810
loss: 1.0073095560073853,grad_norm: 0.8892214147975858, iteration: 48811
loss: 1.057562232017517,grad_norm: 0.9999993954730709, iteration: 48812
loss: 0.9870807528495789,grad_norm: 0.8623910678815593, iteration: 48813
loss: 0.9876582026481628,grad_norm: 0.7467886873066386, iteration: 48814
loss: 1.0109665393829346,grad_norm: 0.9501974576201945, iteration: 48815
loss: 1.0011132955551147,grad_norm: 0.9844632231895828, iteration: 48816
loss: 0.9939067363739014,grad_norm: 0.7504048139607478, iteration: 48817
loss: 1.049367904663086,grad_norm: 0.9999990507233046, iteration: 48818
loss: 1.0284048318862915,grad_norm: 0.9306649938319345, iteration: 48819
loss: 1.0087233781814575,grad_norm: 0.9999998009429794, iteration: 48820
loss: 0.9904350638389587,grad_norm: 0.9999992072638155, iteration: 48821
loss: 0.9808883666992188,grad_norm: 0.8687471950686542, iteration: 48822
loss: 1.0221651792526245,grad_norm: 0.999999272984242, iteration: 48823
loss: 0.9860559701919556,grad_norm: 0.9999992671100155, iteration: 48824
loss: 1.0375304222106934,grad_norm: 0.9999991794470581, iteration: 48825
loss: 1.0263460874557495,grad_norm: 0.999999567381577, iteration: 48826
loss: 1.0416264533996582,grad_norm: 0.9999991318022302, iteration: 48827
loss: 0.9846082329750061,grad_norm: 0.9999990685978354, iteration: 48828
loss: 1.0329678058624268,grad_norm: 0.9999994378382885, iteration: 48829
loss: 0.9859362244606018,grad_norm: 0.9273248645128913, iteration: 48830
loss: 1.0215976238250732,grad_norm: 0.9075539386210882, iteration: 48831
loss: 0.9998919367790222,grad_norm: 0.8276422722995026, iteration: 48832
loss: 1.0237082242965698,grad_norm: 0.8629401812231653, iteration: 48833
loss: 0.9579330682754517,grad_norm: 0.9999990604078801, iteration: 48834
loss: 1.0510692596435547,grad_norm: 0.9999992339993092, iteration: 48835
loss: 1.0199440717697144,grad_norm: 0.7506615247796926, iteration: 48836
loss: 1.0184123516082764,grad_norm: 0.8226925618402704, iteration: 48837
loss: 1.002210259437561,grad_norm: 0.9918923584506694, iteration: 48838
loss: 0.9898071885108948,grad_norm: 0.9999990609208006, iteration: 48839
loss: 1.0377322435379028,grad_norm: 0.9008923785598583, iteration: 48840
loss: 0.9834809303283691,grad_norm: 0.9999991866563297, iteration: 48841
loss: 1.0235753059387207,grad_norm: 0.735485134766809, iteration: 48842
loss: 1.045661211013794,grad_norm: 0.8511721501562274, iteration: 48843
loss: 1.0399839878082275,grad_norm: 0.9999996366749795, iteration: 48844
loss: 0.9946783185005188,grad_norm: 0.9999990596069668, iteration: 48845
loss: 1.00080406665802,grad_norm: 0.9999990842311652, iteration: 48846
loss: 1.0191820859909058,grad_norm: 0.8858201511092798, iteration: 48847
loss: 0.9980667233467102,grad_norm: 0.8570275707112558, iteration: 48848
loss: 0.9946381449699402,grad_norm: 0.9999995290030612, iteration: 48849
loss: 0.9999982118606567,grad_norm: 0.8407763147865008, iteration: 48850
loss: 1.0564039945602417,grad_norm: 0.9999993088374185, iteration: 48851
loss: 0.9880618453025818,grad_norm: 0.79798166062876, iteration: 48852
loss: 1.0323151350021362,grad_norm: 0.8953128713437479, iteration: 48853
loss: 0.9940608143806458,grad_norm: 0.999999012659236, iteration: 48854
loss: 1.009657859802246,grad_norm: 0.95969902465433, iteration: 48855
loss: 1.0268782377243042,grad_norm: 0.9999992593733101, iteration: 48856
loss: 1.0228657722473145,grad_norm: 0.8106898618106037, iteration: 48857
loss: 0.9970316886901855,grad_norm: 0.9999992404823962, iteration: 48858
loss: 1.0145511627197266,grad_norm: 0.9999998076415267, iteration: 48859
loss: 1.0331594944000244,grad_norm: 0.9999992513248587, iteration: 48860
loss: 1.0435429811477661,grad_norm: 0.9999991836519677, iteration: 48861
loss: 1.0137280225753784,grad_norm: 0.9684508257067478, iteration: 48862
loss: 0.9878233671188354,grad_norm: 0.9999995063925647, iteration: 48863
loss: 1.0343334674835205,grad_norm: 0.9999994286872357, iteration: 48864
loss: 0.9924982786178589,grad_norm: 0.999999079288511, iteration: 48865
loss: 0.9877080917358398,grad_norm: 0.9999991021777002, iteration: 48866
loss: 1.0224729776382446,grad_norm: 0.999999667558027, iteration: 48867
loss: 1.0115985870361328,grad_norm: 0.9999993580991868, iteration: 48868
loss: 1.0347262620925903,grad_norm: 0.9207282249454292, iteration: 48869
loss: 1.0261871814727783,grad_norm: 0.8826107581662402, iteration: 48870
loss: 1.0834072828292847,grad_norm: 0.999999861149207, iteration: 48871
loss: 1.0193915367126465,grad_norm: 0.9999990988991917, iteration: 48872
loss: 1.0317554473876953,grad_norm: 0.9999994008113958, iteration: 48873
loss: 1.0050711631774902,grad_norm: 0.7744425294723318, iteration: 48874
loss: 0.9880508780479431,grad_norm: 0.9603969531450303, iteration: 48875
loss: 1.062623381614685,grad_norm: 0.9499699792919833, iteration: 48876
loss: 1.0464831590652466,grad_norm: 0.9999994408588814, iteration: 48877
loss: 0.9813946485519409,grad_norm: 0.8980890054292405, iteration: 48878
loss: 0.9778921604156494,grad_norm: 0.8751935459542929, iteration: 48879
loss: 1.0001473426818848,grad_norm: 0.8749460259052355, iteration: 48880
loss: 1.018454909324646,grad_norm: 0.9162061978356173, iteration: 48881
loss: 1.0378268957138062,grad_norm: 0.9999992588974119, iteration: 48882
loss: 1.0126041173934937,grad_norm: 0.888110318448611, iteration: 48883
loss: 1.0235878229141235,grad_norm: 0.9999993429909396, iteration: 48884
loss: 1.039305567741394,grad_norm: 0.9999990853210821, iteration: 48885
loss: 1.0030994415283203,grad_norm: 0.9243092587583193, iteration: 48886
loss: 1.0212368965148926,grad_norm: 0.999999092185979, iteration: 48887
loss: 0.9591844081878662,grad_norm: 0.9906370063304681, iteration: 48888
loss: 0.9590173959732056,grad_norm: 0.9209322481510462, iteration: 48889
loss: 0.9666832089424133,grad_norm: 0.9270136608020134, iteration: 48890
loss: 0.9777427315711975,grad_norm: 0.9746286243248649, iteration: 48891
loss: 1.0297235250473022,grad_norm: 0.9032667501232373, iteration: 48892
loss: 0.9916289448738098,grad_norm: 0.9682515756054741, iteration: 48893
loss: 0.9630407691001892,grad_norm: 0.9324106340998544, iteration: 48894
loss: 0.9661992192268372,grad_norm: 0.7744308757655828, iteration: 48895
loss: 0.99448561668396,grad_norm: 0.9749442380670044, iteration: 48896
loss: 1.0190489292144775,grad_norm: 0.9999991762527555, iteration: 48897
loss: 0.9906349778175354,grad_norm: 0.7595519015065986, iteration: 48898
loss: 0.971305787563324,grad_norm: 0.9480959998439845, iteration: 48899
loss: 1.0231330394744873,grad_norm: 0.9999990693193943, iteration: 48900
loss: 1.0065641403198242,grad_norm: 0.973203650494524, iteration: 48901
loss: 1.0129051208496094,grad_norm: 0.9291855562429779, iteration: 48902
loss: 1.0255144834518433,grad_norm: 0.9999992447906592, iteration: 48903
loss: 1.0088974237442017,grad_norm: 0.8899485894645599, iteration: 48904
loss: 1.034192681312561,grad_norm: 0.9999992245104801, iteration: 48905
loss: 0.9799793362617493,grad_norm: 0.9685962732269519, iteration: 48906
loss: 1.017977237701416,grad_norm: 0.8844126372556536, iteration: 48907
loss: 1.027956247329712,grad_norm: 0.8905455290921362, iteration: 48908
loss: 1.1066272258758545,grad_norm: 0.876296213140454, iteration: 48909
loss: 1.0398348569869995,grad_norm: 0.9999994932375857, iteration: 48910
loss: 1.0498040914535522,grad_norm: 0.9840026933354813, iteration: 48911
loss: 1.0043532848358154,grad_norm: 0.9999996770428177, iteration: 48912
loss: 1.0127276182174683,grad_norm: 0.9999992620135215, iteration: 48913
loss: 0.975654125213623,grad_norm: 0.9999991759578258, iteration: 48914
loss: 1.006568193435669,grad_norm: 0.9999992118774689, iteration: 48915
loss: 0.9627916216850281,grad_norm: 0.9999991229000248, iteration: 48916
loss: 0.9739370346069336,grad_norm: 0.8288668101675908, iteration: 48917
loss: 1.0243909358978271,grad_norm: 0.9999991995618926, iteration: 48918
loss: 1.0112353563308716,grad_norm: 0.9999993506855582, iteration: 48919
loss: 0.9933447241783142,grad_norm: 0.9310751368796691, iteration: 48920
loss: 1.0134379863739014,grad_norm: 0.9844090998154896, iteration: 48921
loss: 1.0509767532348633,grad_norm: 0.9735005291092588, iteration: 48922
loss: 0.9918334484100342,grad_norm: 0.9634972885330213, iteration: 48923
loss: 0.9509312510490417,grad_norm: 0.9931313744059287, iteration: 48924
loss: 0.9880963563919067,grad_norm: 0.9091889362397344, iteration: 48925
loss: 1.0036183595657349,grad_norm: 0.9672576439731309, iteration: 48926
loss: 1.0507146120071411,grad_norm: 0.8489320787056651, iteration: 48927
loss: 1.0210946798324585,grad_norm: 0.9805959903149367, iteration: 48928
loss: 1.0074156522750854,grad_norm: 0.9999995731220341, iteration: 48929
loss: 1.0017664432525635,grad_norm: 0.9259399907086023, iteration: 48930
loss: 1.0380817651748657,grad_norm: 0.8995817300675875, iteration: 48931
loss: 1.0207509994506836,grad_norm: 0.9999998436561197, iteration: 48932
loss: 1.0198811292648315,grad_norm: 0.9711676993502979, iteration: 48933
loss: 1.0812588930130005,grad_norm: 0.9999998827900447, iteration: 48934
loss: 1.0043504238128662,grad_norm: 0.9427018903377747, iteration: 48935
loss: 0.9977303743362427,grad_norm: 0.9265971625932912, iteration: 48936
loss: 1.016103982925415,grad_norm: 0.8361992776872539, iteration: 48937
loss: 0.9797162413597107,grad_norm: 0.9250212090020392, iteration: 48938
loss: 1.0545954704284668,grad_norm: 0.9999990110206585, iteration: 48939
loss: 0.9846850633621216,grad_norm: 0.9758387379590772, iteration: 48940
loss: 1.028167486190796,grad_norm: 0.9721696440736636, iteration: 48941
loss: 0.9821218252182007,grad_norm: 0.9999991692321467, iteration: 48942
loss: 0.9879915118217468,grad_norm: 0.8020342493757207, iteration: 48943
loss: 1.0068297386169434,grad_norm: 0.9346119340703833, iteration: 48944
loss: 1.0018233060836792,grad_norm: 0.9330264929733462, iteration: 48945
loss: 1.0077322721481323,grad_norm: 0.9696678066508483, iteration: 48946
loss: 1.0682382583618164,grad_norm: 0.9999996107474064, iteration: 48947
loss: 0.9831876158714294,grad_norm: 0.9999990591391719, iteration: 48948
loss: 1.1505428552627563,grad_norm: 0.9999991009679632, iteration: 48949
loss: 0.9742109179496765,grad_norm: 0.84891751122065, iteration: 48950
loss: 0.9862826466560364,grad_norm: 0.9258954977788089, iteration: 48951
loss: 1.0278265476226807,grad_norm: 0.999999208567069, iteration: 48952
loss: 0.9979093670845032,grad_norm: 0.999999100316754, iteration: 48953
loss: 0.9947283267974854,grad_norm: 0.9999990276581464, iteration: 48954
loss: 0.9978932738304138,grad_norm: 0.798042409590383, iteration: 48955
loss: 0.9920640587806702,grad_norm: 0.9061349656370326, iteration: 48956
loss: 0.9906606078147888,grad_norm: 0.9428333403791914, iteration: 48957
loss: 1.0100535154342651,grad_norm: 0.887891545361801, iteration: 48958
loss: 1.0097453594207764,grad_norm: 0.8951239036659591, iteration: 48959
loss: 1.0064269304275513,grad_norm: 0.8532854051369436, iteration: 48960
loss: 1.011689305305481,grad_norm: 0.8915119299622414, iteration: 48961
loss: 0.971825897693634,grad_norm: 0.7952290656719428, iteration: 48962
loss: 1.017709493637085,grad_norm: 0.8242634941676941, iteration: 48963
loss: 1.0129563808441162,grad_norm: 0.8566683331479215, iteration: 48964
loss: 0.9696629047393799,grad_norm: 0.9459860189579332, iteration: 48965
loss: 1.012719988822937,grad_norm: 0.9999996776860925, iteration: 48966
loss: 1.013261079788208,grad_norm: 0.8679132178593723, iteration: 48967
loss: 0.9668554067611694,grad_norm: 0.9766235162476741, iteration: 48968
loss: 1.0165364742279053,grad_norm: 0.9999991281170244, iteration: 48969
loss: 0.993051290512085,grad_norm: 0.8710517296708108, iteration: 48970
loss: 1.0178810358047485,grad_norm: 0.9999990764291911, iteration: 48971
loss: 1.0290905237197876,grad_norm: 0.9162598038489426, iteration: 48972
loss: 0.9858682751655579,grad_norm: 0.999999103898421, iteration: 48973
loss: 0.9648154377937317,grad_norm: 0.9999992352673142, iteration: 48974
loss: 1.0080866813659668,grad_norm: 0.979811362655557, iteration: 48975
loss: 1.0476107597351074,grad_norm: 0.9075478129460857, iteration: 48976
loss: 0.9336143136024475,grad_norm: 0.999999230047786, iteration: 48977
loss: 0.9758190512657166,grad_norm: 0.9709255805274526, iteration: 48978
loss: 1.0977238416671753,grad_norm: 0.9999992130395469, iteration: 48979
loss: 0.9920191168785095,grad_norm: 0.9159150520799801, iteration: 48980
loss: 0.964373767375946,grad_norm: 0.9108813120531444, iteration: 48981
loss: 1.01144540309906,grad_norm: 0.9999992926427046, iteration: 48982
loss: 1.0117175579071045,grad_norm: 0.9632222029367926, iteration: 48983
loss: 1.0077455043792725,grad_norm: 0.945440144554383, iteration: 48984
loss: 1.0182008743286133,grad_norm: 0.9130893896953715, iteration: 48985
loss: 1.0381845235824585,grad_norm: 0.9229358071286576, iteration: 48986
loss: 1.021980881690979,grad_norm: 0.9217139385303001, iteration: 48987
loss: 0.9689674973487854,grad_norm: 0.9534179761728448, iteration: 48988
loss: 0.9924856424331665,grad_norm: 0.9373153706444488, iteration: 48989
loss: 1.0330171585083008,grad_norm: 0.9999991166561853, iteration: 48990
loss: 1.028240442276001,grad_norm: 0.8680374680153324, iteration: 48991
loss: 0.9682669639587402,grad_norm: 0.8341508104434572, iteration: 48992
loss: 1.026563048362732,grad_norm: 0.894806477712699, iteration: 48993
loss: 1.0020755529403687,grad_norm: 0.9999990615989257, iteration: 48994
loss: 0.9686196446418762,grad_norm: 0.9999993131291868, iteration: 48995
loss: 0.995492160320282,grad_norm: 0.7570520340944771, iteration: 48996
loss: 1.0214524269104004,grad_norm: 0.978134730402379, iteration: 48997
loss: 1.0298343896865845,grad_norm: 0.9693416932205833, iteration: 48998
loss: 0.9731295704841614,grad_norm: 0.9359638311631419, iteration: 48999
loss: 1.0515762567520142,grad_norm: 0.9999993050549748, iteration: 49000
loss: 0.9824121594429016,grad_norm: 0.9954921235527047, iteration: 49001
loss: 1.0071637630462646,grad_norm: 0.9680633619475693, iteration: 49002
loss: 1.0303677320480347,grad_norm: 0.9999998611934077, iteration: 49003
loss: 1.0014667510986328,grad_norm: 0.9091328395987877, iteration: 49004
loss: 1.0122663974761963,grad_norm: 0.8599718944347771, iteration: 49005
loss: 1.0107672214508057,grad_norm: 0.8890395195777985, iteration: 49006
loss: 0.964022159576416,grad_norm: 0.9469031326156503, iteration: 49007
loss: 1.0081957578659058,grad_norm: 0.7828556962675574, iteration: 49008
loss: 1.0059196949005127,grad_norm: 0.9031413168012064, iteration: 49009
loss: 1.114235758781433,grad_norm: 0.8163328497484582, iteration: 49010
loss: 0.9908356070518494,grad_norm: 0.9290819059270184, iteration: 49011
loss: 0.9891701340675354,grad_norm: 0.9999990675633441, iteration: 49012
loss: 1.033850908279419,grad_norm: 0.999998992900679, iteration: 49013
loss: 0.996965229511261,grad_norm: 0.934628823715121, iteration: 49014
loss: 0.9897527694702148,grad_norm: 0.8751074455925478, iteration: 49015
loss: 0.9835684895515442,grad_norm: 0.9889987044442993, iteration: 49016
loss: 0.9849567413330078,grad_norm: 0.8385047428064355, iteration: 49017
loss: 1.0115057229995728,grad_norm: 0.999999142437208, iteration: 49018
loss: 1.0020781755447388,grad_norm: 0.8654355525077992, iteration: 49019
loss: 1.0230571031570435,grad_norm: 0.9999992324536344, iteration: 49020
loss: 1.0240833759307861,grad_norm: 0.8692443596773451, iteration: 49021
loss: 1.0269873142242432,grad_norm: 0.9999991428552124, iteration: 49022
loss: 1.0225003957748413,grad_norm: 0.9999992282185097, iteration: 49023
loss: 1.053148865699768,grad_norm: 0.9999997405473693, iteration: 49024
loss: 1.0282022953033447,grad_norm: 0.9883747466930789, iteration: 49025
loss: 0.9866661429405212,grad_norm: 0.9999992350589286, iteration: 49026
loss: 0.9700016379356384,grad_norm: 0.955788164892965, iteration: 49027
loss: 0.972589373588562,grad_norm: 0.9341586918548773, iteration: 49028
loss: 1.022529125213623,grad_norm: 0.9629113638727551, iteration: 49029
loss: 0.9963609576225281,grad_norm: 0.8355214558131162, iteration: 49030
loss: 1.0497299432754517,grad_norm: 0.9937113312908094, iteration: 49031
loss: 1.0068342685699463,grad_norm: 0.9999990606848032, iteration: 49032
loss: 1.0390053987503052,grad_norm: 0.907882435065267, iteration: 49033
loss: 1.0583982467651367,grad_norm: 0.9999989544306312, iteration: 49034
loss: 1.0098692178726196,grad_norm: 0.9999993520225988, iteration: 49035
loss: 0.9838777780532837,grad_norm: 0.9960962282339473, iteration: 49036
loss: 0.9974597692489624,grad_norm: 0.9000486309509513, iteration: 49037
loss: 0.9802610278129578,grad_norm: 0.9999995128637655, iteration: 49038
loss: 1.0835014581680298,grad_norm: 0.9999991785503873, iteration: 49039
loss: 0.9720842838287354,grad_norm: 0.9969281566884074, iteration: 49040
loss: 0.9835798740386963,grad_norm: 0.9999990970969207, iteration: 49041
loss: 1.028225302696228,grad_norm: 0.8747557925348018, iteration: 49042
loss: 1.017386794090271,grad_norm: 0.9789380599995495, iteration: 49043
loss: 1.0038626194000244,grad_norm: 0.9999992484183226, iteration: 49044
loss: 0.9852437376976013,grad_norm: 0.940880130815135, iteration: 49045
loss: 1.0149980783462524,grad_norm: 0.9999994030003754, iteration: 49046
loss: 0.989938497543335,grad_norm: 0.752257438168241, iteration: 49047
loss: 1.002060055732727,grad_norm: 0.9055781675078741, iteration: 49048
loss: 0.9943205714225769,grad_norm: 0.9303654318937972, iteration: 49049
loss: 0.9845986366271973,grad_norm: 0.908335013062404, iteration: 49050
loss: 1.0094292163848877,grad_norm: 0.7996650496215657, iteration: 49051
loss: 1.0519999265670776,grad_norm: 0.9999997345711682, iteration: 49052
loss: 0.9601791501045227,grad_norm: 0.9888794783572487, iteration: 49053
loss: 0.9935383200645447,grad_norm: 0.9195842094308745, iteration: 49054
loss: 1.025144100189209,grad_norm: 0.9999996158016791, iteration: 49055
loss: 1.0019210577011108,grad_norm: 0.9999991293759174, iteration: 49056
loss: 1.0290508270263672,grad_norm: 0.9236305972658372, iteration: 49057
loss: 0.9614114165306091,grad_norm: 0.9999990787799801, iteration: 49058
loss: 0.9727786779403687,grad_norm: 0.9874423728968335, iteration: 49059
loss: 1.0064841508865356,grad_norm: 0.9248203199834991, iteration: 49060
loss: 1.0216230154037476,grad_norm: 0.9999994442471845, iteration: 49061
loss: 1.020513892173767,grad_norm: 0.9999990232634399, iteration: 49062
loss: 0.9915909171104431,grad_norm: 0.999999057783568, iteration: 49063
loss: 1.0088127851486206,grad_norm: 0.999999050422365, iteration: 49064
loss: 0.9994688034057617,grad_norm: 0.7936048293619867, iteration: 49065
loss: 0.9957414269447327,grad_norm: 0.9999991554634797, iteration: 49066
loss: 1.0173978805541992,grad_norm: 0.9450172760781849, iteration: 49067
loss: 1.024041771888733,grad_norm: 0.8410635398658775, iteration: 49068
loss: 1.0513309240341187,grad_norm: 0.9999990686798157, iteration: 49069
loss: 0.9893827438354492,grad_norm: 0.9999990839196095, iteration: 49070
loss: 1.0136783123016357,grad_norm: 0.9999992634292643, iteration: 49071
loss: 1.030123233795166,grad_norm: 0.9999991960856887, iteration: 49072
loss: 0.9882960319519043,grad_norm: 0.9999995580276118, iteration: 49073
loss: 1.0192937850952148,grad_norm: 0.9999991395406099, iteration: 49074
loss: 0.9974724650382996,grad_norm: 0.9999991785770141, iteration: 49075
loss: 0.9372953772544861,grad_norm: 0.9999990592947513, iteration: 49076
loss: 0.9985090494155884,grad_norm: 0.9999992935244126, iteration: 49077
loss: 0.977668821811676,grad_norm: 0.9999992442074686, iteration: 49078
loss: 1.0034003257751465,grad_norm: 0.9021255133185245, iteration: 49079
loss: 1.0367136001586914,grad_norm: 0.9280552290559472, iteration: 49080
loss: 1.034411072731018,grad_norm: 0.8743140352761436, iteration: 49081
loss: 0.995769202709198,grad_norm: 0.7998443466430202, iteration: 49082
loss: 1.0113691091537476,grad_norm: 0.999999181263896, iteration: 49083
loss: 1.0486317873001099,grad_norm: 0.9610440410579105, iteration: 49084
loss: 0.9942517280578613,grad_norm: 0.9285902517685621, iteration: 49085
loss: 0.9867956042289734,grad_norm: 0.9211440152566763, iteration: 49086
loss: 0.9623331427574158,grad_norm: 0.9999992063314819, iteration: 49087
loss: 0.9842961430549622,grad_norm: 0.8598561581228293, iteration: 49088
loss: 0.9897860884666443,grad_norm: 0.9130108159901231, iteration: 49089
loss: 1.006070852279663,grad_norm: 0.9999989848163618, iteration: 49090
loss: 1.0269970893859863,grad_norm: 0.8078001366062588, iteration: 49091
loss: 0.9942886829376221,grad_norm: 0.9264516139290657, iteration: 49092
loss: 0.9778767824172974,grad_norm: 0.9999990543974215, iteration: 49093
loss: 0.9881733059883118,grad_norm: 0.9999989835436945, iteration: 49094
loss: 1.0022655725479126,grad_norm: 0.8577891160493331, iteration: 49095
loss: 1.0371792316436768,grad_norm: 0.9999993818277229, iteration: 49096
loss: 0.9886613488197327,grad_norm: 0.8467407093857309, iteration: 49097
loss: 1.0529587268829346,grad_norm: 0.9999992726403281, iteration: 49098
loss: 1.0281367301940918,grad_norm: 0.9751907910242194, iteration: 49099
loss: 0.9606959819793701,grad_norm: 0.9999991733257758, iteration: 49100
loss: 1.013311743736267,grad_norm: 0.8717221982146072, iteration: 49101
loss: 1.0342857837677002,grad_norm: 0.8920092536539319, iteration: 49102
loss: 1.0063235759735107,grad_norm: 0.9536635551120155, iteration: 49103
loss: 0.9868406057357788,grad_norm: 0.9999992071611885, iteration: 49104
loss: 1.0066550970077515,grad_norm: 0.6788879934383594, iteration: 49105
loss: 0.9739428162574768,grad_norm: 0.8755537254135439, iteration: 49106
loss: 1.0091636180877686,grad_norm: 0.9999990028916963, iteration: 49107
loss: 1.0064363479614258,grad_norm: 0.9999993272576378, iteration: 49108
loss: 1.0126259326934814,grad_norm: 0.9999992278523632, iteration: 49109
loss: 1.0371222496032715,grad_norm: 0.9999991187467339, iteration: 49110
loss: 0.9870402216911316,grad_norm: 0.9999993020465981, iteration: 49111
loss: 1.0038652420043945,grad_norm: 0.9999990030869053, iteration: 49112
loss: 1.0099139213562012,grad_norm: 0.9999991736450099, iteration: 49113
loss: 1.017638087272644,grad_norm: 0.999999175837842, iteration: 49114
loss: 0.9652382135391235,grad_norm: 0.8307242309395417, iteration: 49115
loss: 1.4957377910614014,grad_norm: 0.999999976728303, iteration: 49116
loss: 1.0125713348388672,grad_norm: 0.999999164936401, iteration: 49117
loss: 1.001062273979187,grad_norm: 0.9999994354910697, iteration: 49118
loss: 1.0111716985702515,grad_norm: 0.9999992894083605, iteration: 49119
loss: 0.9892265796661377,grad_norm: 0.8840181364954015, iteration: 49120
loss: 1.026428461074829,grad_norm: 0.9999989940885482, iteration: 49121
loss: 1.0676701068878174,grad_norm: 0.999999555198317, iteration: 49122
loss: 0.9977937340736389,grad_norm: 0.9999989833916196, iteration: 49123
loss: 1.0063074827194214,grad_norm: 0.9999991457301792, iteration: 49124
loss: 1.0217137336730957,grad_norm: 0.9731374299843587, iteration: 49125
loss: 1.0218544006347656,grad_norm: 0.9999991223152664, iteration: 49126
loss: 1.0108546018600464,grad_norm: 0.9999995926568184, iteration: 49127
loss: 1.0692294836044312,grad_norm: 0.8704284344161208, iteration: 49128
loss: 1.007684588432312,grad_norm: 0.808174477123479, iteration: 49129
loss: 0.9699525833129883,grad_norm: 0.9783149798036324, iteration: 49130
loss: 0.9763397574424744,grad_norm: 0.9399156109646626, iteration: 49131
loss: 0.97690749168396,grad_norm: 0.7699975649947542, iteration: 49132
loss: 1.0537238121032715,grad_norm: 0.9999996871536132, iteration: 49133
loss: 1.1294972896575928,grad_norm: 0.9999996062782545, iteration: 49134
loss: 1.035264492034912,grad_norm: 0.9260405406035391, iteration: 49135
loss: 0.9978206753730774,grad_norm: 0.8388066242832983, iteration: 49136
loss: 1.0752028226852417,grad_norm: 0.9999990217608706, iteration: 49137
loss: 1.0810651779174805,grad_norm: 0.9999992773951364, iteration: 49138
loss: 1.0214447975158691,grad_norm: 0.9111095870497423, iteration: 49139
loss: 1.038291096687317,grad_norm: 0.8314044660992786, iteration: 49140
loss: 1.0551413297653198,grad_norm: 0.9999995996331866, iteration: 49141
loss: 1.0604307651519775,grad_norm: 0.9043824392331864, iteration: 49142
loss: 1.0006675720214844,grad_norm: 0.8924831380606022, iteration: 49143
loss: 1.0257396697998047,grad_norm: 0.9999989557830269, iteration: 49144
loss: 0.98427814245224,grad_norm: 0.8789450487572846, iteration: 49145
loss: 1.0332480669021606,grad_norm: 0.9999991779531593, iteration: 49146
loss: 1.040071725845337,grad_norm: 0.9999998234127825, iteration: 49147
loss: 1.0663255453109741,grad_norm: 0.9999991186674392, iteration: 49148
loss: 1.1301325559616089,grad_norm: 0.9999999312505639, iteration: 49149
loss: 0.9811350107192993,grad_norm: 0.9999991406783951, iteration: 49150
loss: 1.0092273950576782,grad_norm: 0.9999991710693359, iteration: 49151
loss: 0.9972975254058838,grad_norm: 0.7828593501515221, iteration: 49152
loss: 1.0148158073425293,grad_norm: 0.7851123007104968, iteration: 49153
loss: 1.0062615871429443,grad_norm: 0.9999992275503227, iteration: 49154
loss: 0.9961370229721069,grad_norm: 0.7887917448950402, iteration: 49155
loss: 1.0091770887374878,grad_norm: 0.82981406410377, iteration: 49156
loss: 0.9626364707946777,grad_norm: 0.9999991177414101, iteration: 49157
loss: 1.0005912780761719,grad_norm: 0.9632169778536501, iteration: 49158
loss: 1.0022245645523071,grad_norm: 0.9999991141575949, iteration: 49159
loss: 1.0264840126037598,grad_norm: 0.8737272059829834, iteration: 49160
loss: 1.0341627597808838,grad_norm: 0.9356076479714761, iteration: 49161
loss: 1.0310683250427246,grad_norm: 0.9999995222074669, iteration: 49162
loss: 1.0259759426116943,grad_norm: 0.9999995571546126, iteration: 49163
loss: 0.9901370406150818,grad_norm: 0.999999039843604, iteration: 49164
loss: 1.0558199882507324,grad_norm: 0.9999992033083989, iteration: 49165
loss: 0.9903642535209656,grad_norm: 0.8573578628902948, iteration: 49166
loss: 0.9675920009613037,grad_norm: 0.8137911804875271, iteration: 49167
loss: 1.0197169780731201,grad_norm: 0.8852481510990433, iteration: 49168
loss: 0.9867303967475891,grad_norm: 0.905511472151331, iteration: 49169
loss: 0.9899764060974121,grad_norm: 0.9999990742769986, iteration: 49170
loss: 1.0332287549972534,grad_norm: 0.9999996120061841, iteration: 49171
loss: 1.0007178783416748,grad_norm: 0.866328896038157, iteration: 49172
loss: 0.9850181937217712,grad_norm: 0.9999990883511737, iteration: 49173
loss: 1.0488195419311523,grad_norm: 0.999999686933782, iteration: 49174
loss: 0.9584697484970093,grad_norm: 0.9984685212527764, iteration: 49175
loss: 0.9912661910057068,grad_norm: 0.9425688834133567, iteration: 49176
loss: 1.0104707479476929,grad_norm: 0.9999991831085334, iteration: 49177
loss: 0.9834497570991516,grad_norm: 0.9999991633553547, iteration: 49178
loss: 1.0157721042633057,grad_norm: 0.8604906520235297, iteration: 49179
loss: 1.0064867734909058,grad_norm: 0.9999993175587134, iteration: 49180
loss: 0.9959556460380554,grad_norm: 0.9999989494827018, iteration: 49181
loss: 0.983578085899353,grad_norm: 0.9999991932465039, iteration: 49182
loss: 0.987148106098175,grad_norm: 0.9999992624943749, iteration: 49183
loss: 1.0865986347198486,grad_norm: 0.9999993622409837, iteration: 49184
loss: 0.9588049650192261,grad_norm: 0.9145587992434435, iteration: 49185
loss: 0.9744600653648376,grad_norm: 0.9999995126205987, iteration: 49186
loss: 0.9682633280754089,grad_norm: 0.867213638222592, iteration: 49187
loss: 1.0037472248077393,grad_norm: 0.99999970774094, iteration: 49188
loss: 1.0430595874786377,grad_norm: 0.9999994884248354, iteration: 49189
loss: 0.9866905808448792,grad_norm: 0.9999992753504194, iteration: 49190
loss: 1.0065600872039795,grad_norm: 0.9999991587231366, iteration: 49191
loss: 1.030166506767273,grad_norm: 0.999999107502661, iteration: 49192
loss: 0.9636279344558716,grad_norm: 0.9262162446591695, iteration: 49193
loss: 1.0732355117797852,grad_norm: 0.9999994768264792, iteration: 49194
loss: 0.992363452911377,grad_norm: 0.9999989286044735, iteration: 49195
loss: 1.0298277139663696,grad_norm: 0.9999991393906413, iteration: 49196
loss: 0.9961977005004883,grad_norm: 0.8864422729856621, iteration: 49197
loss: 1.0323532819747925,grad_norm: 0.9999992453282183, iteration: 49198
loss: 1.0025899410247803,grad_norm: 0.7943120837340849, iteration: 49199
loss: 0.9853785037994385,grad_norm: 0.8423860900261808, iteration: 49200
loss: 1.0324245691299438,grad_norm: 0.9999992376468538, iteration: 49201
loss: 1.0111242532730103,grad_norm: 0.9999990119991659, iteration: 49202
loss: 1.0346843004226685,grad_norm: 0.9999990086259108, iteration: 49203
loss: 1.0150372982025146,grad_norm: 0.999999092379154, iteration: 49204
loss: 1.0578631162643433,grad_norm: 0.9999989986912814, iteration: 49205
loss: 0.9982660412788391,grad_norm: 0.999998921244824, iteration: 49206
loss: 1.0206363201141357,grad_norm: 0.9999990709743679, iteration: 49207
loss: 1.0321165323257446,grad_norm: 0.7394004289118183, iteration: 49208
loss: 0.9741626381874084,grad_norm: 0.9999991790537848, iteration: 49209
loss: 0.9696065187454224,grad_norm: 0.9999991872736588, iteration: 49210
loss: 0.9821733236312866,grad_norm: 0.999998960199811, iteration: 49211
loss: 1.0103528499603271,grad_norm: 0.9547462308045301, iteration: 49212
loss: 1.0562504529953003,grad_norm: 0.999999905888171, iteration: 49213
loss: 1.0453252792358398,grad_norm: 0.9999992683199984, iteration: 49214
loss: 1.1525611877441406,grad_norm: 0.9999992552649141, iteration: 49215
loss: 0.9890578389167786,grad_norm: 0.9999995236995861, iteration: 49216
loss: 1.0134930610656738,grad_norm: 0.7595503282679209, iteration: 49217
loss: 1.0183736085891724,grad_norm: 0.9999990934208586, iteration: 49218
loss: 1.0252593755722046,grad_norm: 0.9999993033922978, iteration: 49219
loss: 0.9885098338127136,grad_norm: 0.999999070667769, iteration: 49220
loss: 1.043516993522644,grad_norm: 0.9999993065537539, iteration: 49221
loss: 1.0276691913604736,grad_norm: 0.9396644170162635, iteration: 49222
loss: 1.0136176347732544,grad_norm: 0.9999990996151865, iteration: 49223
loss: 1.036558747291565,grad_norm: 0.9788382290742103, iteration: 49224
loss: 1.0021929740905762,grad_norm: 0.9999991667535746, iteration: 49225
loss: 1.0089387893676758,grad_norm: 0.9999992448055679, iteration: 49226
loss: 0.9878660440444946,grad_norm: 0.9999997551722482, iteration: 49227
loss: 1.0548882484436035,grad_norm: 0.7950232149351153, iteration: 49228
loss: 0.9760661125183105,grad_norm: 0.8943643249631452, iteration: 49229
loss: 1.0204764604568481,grad_norm: 0.8493789986550546, iteration: 49230
loss: 0.9778132438659668,grad_norm: 0.9487842900287332, iteration: 49231
loss: 1.0537794828414917,grad_norm: 0.9999995627490472, iteration: 49232
loss: 1.0210553407669067,grad_norm: 0.9999990561608831, iteration: 49233
loss: 1.0359946489334106,grad_norm: 0.9999991440094239, iteration: 49234
loss: 0.986981213092804,grad_norm: 0.9999990679887608, iteration: 49235
loss: 1.0006746053695679,grad_norm: 0.8794332792757352, iteration: 49236
loss: 0.9886229634284973,grad_norm: 0.935759603059553, iteration: 49237
loss: 0.992855429649353,grad_norm: 0.9999991641053065, iteration: 49238
loss: 0.9753412008285522,grad_norm: 0.9017370833519501, iteration: 49239
loss: 1.0031803846359253,grad_norm: 0.8761817056884954, iteration: 49240
loss: 0.9971502423286438,grad_norm: 0.9676473923674773, iteration: 49241
loss: 0.9652302861213684,grad_norm: 0.9948099735856016, iteration: 49242
loss: 1.0285453796386719,grad_norm: 0.9999991892182017, iteration: 49243
loss: 0.9656802415847778,grad_norm: 0.9532703186279923, iteration: 49244
loss: 0.9873405694961548,grad_norm: 0.9999997109308418, iteration: 49245
loss: 1.0447100400924683,grad_norm: 0.9999993828266005, iteration: 49246
loss: 1.0069702863693237,grad_norm: 0.9999991955038346, iteration: 49247
loss: 1.0471343994140625,grad_norm: 0.9999992019905396, iteration: 49248
loss: 1.0519886016845703,grad_norm: 0.9999992390120249, iteration: 49249
loss: 1.0293102264404297,grad_norm: 0.999998994592296, iteration: 49250
loss: 1.0715458393096924,grad_norm: 0.9999996098326496, iteration: 49251
loss: 1.0783565044403076,grad_norm: 0.9999995403698475, iteration: 49252
loss: 0.9988654255867004,grad_norm: 0.8064896017164299, iteration: 49253
loss: 0.978534996509552,grad_norm: 0.999999045851149, iteration: 49254
loss: 0.9749919176101685,grad_norm: 0.9983479829182235, iteration: 49255
loss: 0.9934797883033752,grad_norm: 0.955764907580474, iteration: 49256
loss: 1.0473636388778687,grad_norm: 0.9999990266073955, iteration: 49257
loss: 0.9804254174232483,grad_norm: 0.9058573553124374, iteration: 49258
loss: 0.9887256622314453,grad_norm: 0.9999993213876687, iteration: 49259
loss: 0.9960863590240479,grad_norm: 0.9999992597084207, iteration: 49260
loss: 1.0013220310211182,grad_norm: 0.8781212292684671, iteration: 49261
loss: 1.0239957571029663,grad_norm: 0.896466651598955, iteration: 49262
loss: 0.9849784970283508,grad_norm: 0.9999989995542216, iteration: 49263
loss: 1.0916188955307007,grad_norm: 0.9999992022719372, iteration: 49264
loss: 1.038690447807312,grad_norm: 0.8903658937450458, iteration: 49265
loss: 0.9953101277351379,grad_norm: 0.9738060044894813, iteration: 49266
loss: 1.0193978548049927,grad_norm: 0.8807292337807332, iteration: 49267
loss: 0.9927722811698914,grad_norm: 0.9662466748767681, iteration: 49268
loss: 1.0120090246200562,grad_norm: 0.9999991656742957, iteration: 49269
loss: 1.0422943830490112,grad_norm: 0.9999995619436451, iteration: 49270
loss: 1.006576657295227,grad_norm: 0.9999993266227419, iteration: 49271
loss: 0.9939249753952026,grad_norm: 0.8823474106705911, iteration: 49272
loss: 1.01710844039917,grad_norm: 0.8300428261043435, iteration: 49273
loss: 1.0030678510665894,grad_norm: 0.9935265456072001, iteration: 49274
loss: 1.0043878555297852,grad_norm: 0.8816366463509291, iteration: 49275
loss: 1.0297942161560059,grad_norm: 0.8490925418397292, iteration: 49276
loss: 1.0242817401885986,grad_norm: 0.9999991111667252, iteration: 49277
loss: 0.9737719893455505,grad_norm: 0.9999996728587076, iteration: 49278
loss: 1.008376121520996,grad_norm: 0.9999996045949612, iteration: 49279
loss: 1.0410499572753906,grad_norm: 0.9885083091048219, iteration: 49280
loss: 0.9863007664680481,grad_norm: 0.999999020546891, iteration: 49281
loss: 0.9916142225265503,grad_norm: 0.8029500195479193, iteration: 49282
loss: 0.9875043630599976,grad_norm: 0.9233572897277158, iteration: 49283
loss: 1.0059130191802979,grad_norm: 0.9999989736736314, iteration: 49284
loss: 1.0150551795959473,grad_norm: 0.9272358077809426, iteration: 49285
loss: 1.0371288061141968,grad_norm: 0.8214377702534057, iteration: 49286
loss: 1.027877688407898,grad_norm: 0.899575339502755, iteration: 49287
loss: 1.032165765762329,grad_norm: 0.9999992922809904, iteration: 49288
loss: 0.9945228695869446,grad_norm: 0.8208738608110303, iteration: 49289
loss: 1.1404772996902466,grad_norm: 0.999999550071508, iteration: 49290
loss: 0.9915074110031128,grad_norm: 0.9999992014253172, iteration: 49291
loss: 1.0434871912002563,grad_norm: 1.0000000900902064, iteration: 49292
loss: 1.0157426595687866,grad_norm: 0.8800220942750975, iteration: 49293
loss: 0.993008017539978,grad_norm: 0.9950099129289235, iteration: 49294
loss: 0.9657758474349976,grad_norm: 0.9999990688544035, iteration: 49295
loss: 1.0014735460281372,grad_norm: 0.9999991768148656, iteration: 49296
loss: 1.004007339477539,grad_norm: 0.8666228253595935, iteration: 49297
loss: 1.1169850826263428,grad_norm: 0.9999990872007871, iteration: 49298
loss: 0.9812752604484558,grad_norm: 0.9191137006343612, iteration: 49299
loss: 0.9693987965583801,grad_norm: 0.9696211725148394, iteration: 49300
loss: 1.0537608861923218,grad_norm: 0.9425581093083034, iteration: 49301
loss: 0.9957565665245056,grad_norm: 0.9999993207829039, iteration: 49302
loss: 0.9940779805183411,grad_norm: 0.9763211461595471, iteration: 49303
loss: 0.9965099096298218,grad_norm: 0.9999993767478068, iteration: 49304
loss: 1.100401520729065,grad_norm: 0.9999997270037921, iteration: 49305
loss: 0.959857165813446,grad_norm: 0.9999991870083951, iteration: 49306
loss: 0.9987982511520386,grad_norm: 0.8945975140671415, iteration: 49307
loss: 0.9682034254074097,grad_norm: 0.8223067012399122, iteration: 49308
loss: 1.0066163539886475,grad_norm: 0.9999992498641838, iteration: 49309
loss: 0.9970038533210754,grad_norm: 0.8896253962416091, iteration: 49310
loss: 0.9581090807914734,grad_norm: 0.8497810835285612, iteration: 49311
loss: 1.0707305669784546,grad_norm: 0.9999999826157882, iteration: 49312
loss: 1.113097071647644,grad_norm: 0.9999993333429071, iteration: 49313
loss: 0.9846234917640686,grad_norm: 0.9004919007645533, iteration: 49314
loss: 1.0673143863677979,grad_norm: 0.9999996595209659, iteration: 49315
loss: 0.9980261921882629,grad_norm: 0.9304131600073099, iteration: 49316
loss: 1.0221943855285645,grad_norm: 0.8835148065232313, iteration: 49317
loss: 0.990668773651123,grad_norm: 0.8584614904107497, iteration: 49318
loss: 0.9911519885063171,grad_norm: 0.9028618125545974, iteration: 49319
loss: 1.0351057052612305,grad_norm: 0.9999995336764445, iteration: 49320
loss: 1.0066789388656616,grad_norm: 0.9999994903389277, iteration: 49321
loss: 1.0031583309173584,grad_norm: 0.9999992290719198, iteration: 49322
loss: 0.9801574349403381,grad_norm: 0.9534768135549929, iteration: 49323
loss: 1.0077935457229614,grad_norm: 0.9427799556543672, iteration: 49324
loss: 1.0181776285171509,grad_norm: 0.9999998066162548, iteration: 49325
loss: 1.0062785148620605,grad_norm: 0.9999997184358239, iteration: 49326
loss: 1.0260626077651978,grad_norm: 0.9690506062429599, iteration: 49327
loss: 1.0139074325561523,grad_norm: 0.9999990067035187, iteration: 49328
loss: 1.031467080116272,grad_norm: 0.9999990972595256, iteration: 49329
loss: 1.016470193862915,grad_norm: 0.9999990683816359, iteration: 49330
loss: 1.0106585025787354,grad_norm: 0.7611055319704947, iteration: 49331
loss: 1.033679485321045,grad_norm: 0.9999999283047406, iteration: 49332
loss: 1.0378611087799072,grad_norm: 0.9328449293334264, iteration: 49333
loss: 1.0349291563034058,grad_norm: 0.9775201665109298, iteration: 49334
loss: 1.0027530193328857,grad_norm: 0.8794015715769188, iteration: 49335
loss: 1.0003557205200195,grad_norm: 0.9999991184368381, iteration: 49336
loss: 1.0114192962646484,grad_norm: 0.9999990144643068, iteration: 49337
loss: 1.0033527612686157,grad_norm: 0.8752296728014074, iteration: 49338
loss: 1.0156711339950562,grad_norm: 0.9999990974134363, iteration: 49339
loss: 0.9951502084732056,grad_norm: 0.9999990982841012, iteration: 49340
loss: 1.0655403137207031,grad_norm: 0.999999113882452, iteration: 49341
loss: 1.0229562520980835,grad_norm: 0.918122664397046, iteration: 49342
loss: 0.9953852891921997,grad_norm: 0.9999990563588758, iteration: 49343
loss: 1.000636339187622,grad_norm: 0.9999996491642499, iteration: 49344
loss: 0.992024302482605,grad_norm: 0.9999991727925038, iteration: 49345
loss: 0.9699515700340271,grad_norm: 0.8920651935656174, iteration: 49346
loss: 0.9918693900108337,grad_norm: 0.9385023056724364, iteration: 49347
loss: 0.972815752029419,grad_norm: 0.8445140124834922, iteration: 49348
loss: 1.0245826244354248,grad_norm: 0.9999992581430301, iteration: 49349
loss: 0.9930623769760132,grad_norm: 0.9638634537538519, iteration: 49350
loss: 0.9733012318611145,grad_norm: 0.9969909243431595, iteration: 49351
loss: 1.0008034706115723,grad_norm: 0.8524725951686956, iteration: 49352
loss: 0.9544017314910889,grad_norm: 0.9999992615031236, iteration: 49353
loss: 1.028113842010498,grad_norm: 0.8400095514874194, iteration: 49354
loss: 1.0125318765640259,grad_norm: 0.9895252066867047, iteration: 49355
loss: 1.0271539688110352,grad_norm: 0.9999991287843597, iteration: 49356
loss: 0.9973095655441284,grad_norm: 0.9999991170398198, iteration: 49357
loss: 0.9855561256408691,grad_norm: 0.914575160814565, iteration: 49358
loss: 0.9888190031051636,grad_norm: 0.9193588379990448, iteration: 49359
loss: 1.0155147314071655,grad_norm: 0.7258193647701956, iteration: 49360
loss: 0.9886830449104309,grad_norm: 0.8659466224845719, iteration: 49361
loss: 1.011679768562317,grad_norm: 0.9999990641666561, iteration: 49362
loss: 1.0084850788116455,grad_norm: 0.8772517700192255, iteration: 49363
loss: 1.1001248359680176,grad_norm: 0.9999991934013671, iteration: 49364
loss: 1.0201380252838135,grad_norm: 0.97593217444965, iteration: 49365
loss: 1.0135390758514404,grad_norm: 0.9999990509411454, iteration: 49366
loss: 1.0991896390914917,grad_norm: 0.9999996266665031, iteration: 49367
loss: 0.995832085609436,grad_norm: 0.9999994718121648, iteration: 49368
loss: 0.9691655039787292,grad_norm: 0.9847653892666379, iteration: 49369
loss: 1.0002316236495972,grad_norm: 0.999999142687205, iteration: 49370
loss: 1.0631049871444702,grad_norm: 0.9999994268778932, iteration: 49371
loss: 0.9802607893943787,grad_norm: 0.9081976108536087, iteration: 49372
loss: 1.0056220293045044,grad_norm: 0.9129503092511632, iteration: 49373
loss: 0.9871070384979248,grad_norm: 0.9999991654032699, iteration: 49374
loss: 1.0867501497268677,grad_norm: 0.9999992476798995, iteration: 49375
loss: 1.007962703704834,grad_norm: 0.9999995165697765, iteration: 49376
loss: 1.00033700466156,grad_norm: 0.9999990902293844, iteration: 49377
loss: 1.0118995904922485,grad_norm: 0.9448783084785417, iteration: 49378
loss: 1.0439821481704712,grad_norm: 0.8372966076674486, iteration: 49379
loss: 1.0454840660095215,grad_norm: 0.9318670962402971, iteration: 49380
loss: 1.0014925003051758,grad_norm: 0.9999990613093449, iteration: 49381
loss: 0.9904837608337402,grad_norm: 0.9479123210898902, iteration: 49382
loss: 1.011743426322937,grad_norm: 0.9999990940813904, iteration: 49383
loss: 1.082400918006897,grad_norm: 0.9670580952417407, iteration: 49384
loss: 0.9874606728553772,grad_norm: 0.9473800855534771, iteration: 49385
loss: 1.0168637037277222,grad_norm: 0.9999993278639554, iteration: 49386
loss: 0.9843059182167053,grad_norm: 0.9508575066862106, iteration: 49387
loss: 1.0480847358703613,grad_norm: 0.9999995343584032, iteration: 49388
loss: 1.0078884363174438,grad_norm: 0.9633772751743439, iteration: 49389
loss: 0.9943519830703735,grad_norm: 0.9999990938481137, iteration: 49390
loss: 1.01075279712677,grad_norm: 0.8294959799122748, iteration: 49391
loss: 0.9735193848609924,grad_norm: 0.9999991938907349, iteration: 49392
loss: 1.008363127708435,grad_norm: 0.9479062696228066, iteration: 49393
loss: 1.023196816444397,grad_norm: 0.8746128596645422, iteration: 49394
loss: 1.0168567895889282,grad_norm: 0.8448252552098845, iteration: 49395
loss: 1.0054521560668945,grad_norm: 0.9999992038543019, iteration: 49396
loss: 1.0132797956466675,grad_norm: 0.9999993505146304, iteration: 49397
loss: 0.993324339389801,grad_norm: 0.9999992968903645, iteration: 49398
loss: 0.9736947417259216,grad_norm: 0.9341875493440143, iteration: 49399
loss: 0.9856594204902649,grad_norm: 0.9998284064042599, iteration: 49400
loss: 1.0896580219268799,grad_norm: 0.9999992299742343, iteration: 49401
loss: 1.0039347410202026,grad_norm: 0.820945188554244, iteration: 49402
loss: 0.9941058158874512,grad_norm: 0.9999990338206507, iteration: 49403
loss: 1.001172423362732,grad_norm: 0.999999121420687, iteration: 49404
loss: 0.999692976474762,grad_norm: 0.9999995741989475, iteration: 49405
loss: 0.994295597076416,grad_norm: 0.9999991795345624, iteration: 49406
loss: 0.996928870677948,grad_norm: 0.8793865011096513, iteration: 49407
loss: 1.0094977617263794,grad_norm: 0.9999990534916309, iteration: 49408
loss: 0.995272159576416,grad_norm: 0.8935507633764678, iteration: 49409
loss: 1.0115965604782104,grad_norm: 0.9999991739326197, iteration: 49410
loss: 1.016783356666565,grad_norm: 0.9999991472220784, iteration: 49411
loss: 1.021598219871521,grad_norm: 0.9999994062006062, iteration: 49412
loss: 0.976700484752655,grad_norm: 0.9332766351505215, iteration: 49413
loss: 1.0475797653198242,grad_norm: 0.9999995955824231, iteration: 49414
loss: 0.9692764282226562,grad_norm: 0.9999991976443766, iteration: 49415
loss: 1.0169384479522705,grad_norm: 0.9962761195357387, iteration: 49416
loss: 0.9886260032653809,grad_norm: 0.9907675682690567, iteration: 49417
loss: 1.0222653150558472,grad_norm: 0.9999991131874365, iteration: 49418
loss: 1.0213998556137085,grad_norm: 0.8737789091096178, iteration: 49419
loss: 1.0028337240219116,grad_norm: 0.9999991009154542, iteration: 49420
loss: 1.024169921875,grad_norm: 0.9999995201559763, iteration: 49421
loss: 1.0932018756866455,grad_norm: 0.9999994835279294, iteration: 49422
loss: 0.9790704250335693,grad_norm: 0.959081922948634, iteration: 49423
loss: 0.981200098991394,grad_norm: 0.9999993107334391, iteration: 49424
loss: 1.0542198419570923,grad_norm: 0.837445026423972, iteration: 49425
loss: 0.9777834415435791,grad_norm: 0.8990021679999562, iteration: 49426
loss: 1.0332188606262207,grad_norm: 0.9525450719452508, iteration: 49427
loss: 0.9549989700317383,grad_norm: 0.9553218008047544, iteration: 49428
loss: 1.0382624864578247,grad_norm: 0.9999991642561153, iteration: 49429
loss: 1.0183340311050415,grad_norm: 0.9264056762596766, iteration: 49430
loss: 0.9807034134864807,grad_norm: 0.9102720615461012, iteration: 49431
loss: 0.9984182119369507,grad_norm: 0.7846015867786815, iteration: 49432
loss: 0.9863898754119873,grad_norm: 0.8936744568156972, iteration: 49433
loss: 1.0045992136001587,grad_norm: 0.9999994133305335, iteration: 49434
loss: 0.9973788261413574,grad_norm: 0.9999996912830145, iteration: 49435
loss: 1.015799880027771,grad_norm: 0.9020834303927262, iteration: 49436
loss: 1.021202802658081,grad_norm: 0.9677562829977242, iteration: 49437
loss: 0.9934577345848083,grad_norm: 0.8386533972713297, iteration: 49438
loss: 1.1064642667770386,grad_norm: 0.9999996822771868, iteration: 49439
loss: 0.9762062430381775,grad_norm: 0.996580344585586, iteration: 49440
loss: 0.9761060476303101,grad_norm: 0.9047813438684964, iteration: 49441
loss: 1.0072970390319824,grad_norm: 0.9999994597362389, iteration: 49442
loss: 0.9830880761146545,grad_norm: 0.9462484952561888, iteration: 49443
loss: 0.9758985638618469,grad_norm: 0.8848088494414716, iteration: 49444
loss: 0.9953315258026123,grad_norm: 0.9524973407139422, iteration: 49445
loss: 0.9837034344673157,grad_norm: 0.8576201041796279, iteration: 49446
loss: 1.0475094318389893,grad_norm: 0.9999998105285028, iteration: 49447
loss: 1.0032930374145508,grad_norm: 0.9497858776291718, iteration: 49448
loss: 1.0375794172286987,grad_norm: 0.9655856811223161, iteration: 49449
loss: 0.9836050271987915,grad_norm: 0.9323301806886052, iteration: 49450
loss: 1.033125638961792,grad_norm: 0.9999990721341765, iteration: 49451
loss: 0.9563000202178955,grad_norm: 0.9119495274846924, iteration: 49452
loss: 0.9913545846939087,grad_norm: 0.9999992691601897, iteration: 49453
loss: 0.9822515249252319,grad_norm: 0.9999990113140432, iteration: 49454
loss: 1.014232873916626,grad_norm: 0.999999082986387, iteration: 49455
loss: 0.9892992377281189,grad_norm: 0.8902359850399567, iteration: 49456
loss: 1.0122164487838745,grad_norm: 0.9999996155964915, iteration: 49457
loss: 0.9798059463500977,grad_norm: 0.9999993696148288, iteration: 49458
loss: 0.9857691526412964,grad_norm: 0.9999990609972235, iteration: 49459
loss: 0.9887287020683289,grad_norm: 0.9384121171867849, iteration: 49460
loss: 1.0155889987945557,grad_norm: 0.9807590791728885, iteration: 49461
loss: 0.9963677525520325,grad_norm: 0.9999992069752812, iteration: 49462
loss: 1.023632287979126,grad_norm: 0.9999995479631006, iteration: 49463
loss: 0.9803738594055176,grad_norm: 0.8483811574780771, iteration: 49464
loss: 1.0044373273849487,grad_norm: 0.9999991096934853, iteration: 49465
loss: 1.017026662826538,grad_norm: 0.8781519206609065, iteration: 49466
loss: 1.0100914239883423,grad_norm: 0.9999991048989549, iteration: 49467
loss: 1.0084307193756104,grad_norm: 0.8916966191024507, iteration: 49468
loss: 1.0057954788208008,grad_norm: 0.8419305622897474, iteration: 49469
loss: 1.0569297075271606,grad_norm: 0.9999997252990993, iteration: 49470
loss: 1.0496100187301636,grad_norm: 0.961124372254375, iteration: 49471
loss: 1.0329110622406006,grad_norm: 0.8497244936516255, iteration: 49472
loss: 1.0012112855911255,grad_norm: 0.862879933138491, iteration: 49473
loss: 1.017185926437378,grad_norm: 0.9999990296271455, iteration: 49474
loss: 1.0506895780563354,grad_norm: 0.9999995402200229, iteration: 49475
loss: 0.9993197917938232,grad_norm: 0.9585381081993809, iteration: 49476
loss: 0.9899181127548218,grad_norm: 0.9999991564999975, iteration: 49477
loss: 0.989328920841217,grad_norm: 0.9999990783559961, iteration: 49478
loss: 1.0233925580978394,grad_norm: 0.986731900047692, iteration: 49479
loss: 1.0258947610855103,grad_norm: 0.8292512298853194, iteration: 49480
loss: 1.007782220840454,grad_norm: 0.9999993201444184, iteration: 49481
loss: 1.0671558380126953,grad_norm: 0.9999998138058843, iteration: 49482
loss: 0.9875466227531433,grad_norm: 0.9999991508895457, iteration: 49483
loss: 1.0252032279968262,grad_norm: 0.9999990244944819, iteration: 49484
loss: 0.9597306847572327,grad_norm: 0.8624068914896292, iteration: 49485
loss: 1.0424705743789673,grad_norm: 0.9999991328466041, iteration: 49486
loss: 1.0032634735107422,grad_norm: 0.9624893777381722, iteration: 49487
loss: 1.013329267501831,grad_norm: 0.8623858511365138, iteration: 49488
loss: 1.0164313316345215,grad_norm: 0.9999997365088427, iteration: 49489
loss: 1.0028700828552246,grad_norm: 0.8260924266896982, iteration: 49490
loss: 0.9765235781669617,grad_norm: 0.9999990908782798, iteration: 49491
loss: 0.9938249588012695,grad_norm: 0.967152810101741, iteration: 49492
loss: 1.020901083946228,grad_norm: 0.9999990280950334, iteration: 49493
loss: 1.0322332382202148,grad_norm: 0.9999995147331019, iteration: 49494
loss: 0.9998202919960022,grad_norm: 0.9999991269908194, iteration: 49495
loss: 1.040587306022644,grad_norm: 0.9999998983321041, iteration: 49496
loss: 0.9916413426399231,grad_norm: 0.8970295144847354, iteration: 49497
loss: 1.012524127960205,grad_norm: 0.8749941937212994, iteration: 49498
loss: 0.9865473508834839,grad_norm: 0.999999184319375, iteration: 49499
loss: 1.0178871154785156,grad_norm: 0.8976855638050141, iteration: 49500
loss: 0.9592555165290833,grad_norm: 0.8350372356176703, iteration: 49501
loss: 1.0137368440628052,grad_norm: 0.9623418214931883, iteration: 49502
loss: 1.0011016130447388,grad_norm: 0.9906462374921196, iteration: 49503
loss: 1.0066897869110107,grad_norm: 0.9999992780551372, iteration: 49504
loss: 1.0430598258972168,grad_norm: 0.9253619737779826, iteration: 49505
loss: 1.0166852474212646,grad_norm: 0.9999991721645793, iteration: 49506
loss: 1.014952540397644,grad_norm: 0.853819618328594, iteration: 49507
loss: 1.0377107858657837,grad_norm: 0.999999816890434, iteration: 49508
loss: 1.0142461061477661,grad_norm: 0.9101553324091927, iteration: 49509
loss: 1.014456868171692,grad_norm: 0.8647051582956455, iteration: 49510
loss: 0.9854421019554138,grad_norm: 0.9294541580988261, iteration: 49511
loss: 1.0022132396697998,grad_norm: 0.9762149735361786, iteration: 49512
loss: 1.080999493598938,grad_norm: 0.9999990388496967, iteration: 49513
loss: 0.967719316482544,grad_norm: 0.9507863276023957, iteration: 49514
loss: 1.0165998935699463,grad_norm: 0.9999994646237936, iteration: 49515
loss: 1.0008559226989746,grad_norm: 0.7813320365776949, iteration: 49516
loss: 1.1288648843765259,grad_norm: 0.999999855533881, iteration: 49517
loss: 0.9762921333312988,grad_norm: 0.9367599715876385, iteration: 49518
loss: 1.0485081672668457,grad_norm: 0.9999992629325115, iteration: 49519
loss: 0.9655601978302002,grad_norm: 0.888463024621121, iteration: 49520
loss: 1.0058281421661377,grad_norm: 0.8585844053967091, iteration: 49521
loss: 1.049291968345642,grad_norm: 0.79941426808773, iteration: 49522
loss: 1.0037939548492432,grad_norm: 0.9999990855512513, iteration: 49523
loss: 0.982828676700592,grad_norm: 0.9698287753816522, iteration: 49524
loss: 0.9800935387611389,grad_norm: 0.8725091663539565, iteration: 49525
loss: 0.972448468208313,grad_norm: 0.8013394918634097, iteration: 49526
loss: 0.9752285480499268,grad_norm: 0.9822718539339517, iteration: 49527
loss: 0.9887324571609497,grad_norm: 0.8727007174551885, iteration: 49528
loss: 1.009050965309143,grad_norm: 0.9780883983040352, iteration: 49529
loss: 1.0054537057876587,grad_norm: 0.9999990152685414, iteration: 49530
loss: 1.0406688451766968,grad_norm: 0.9999995684483011, iteration: 49531
loss: 0.959952712059021,grad_norm: 0.8561000640003233, iteration: 49532
loss: 1.0004066228866577,grad_norm: 0.9714803009600529, iteration: 49533
loss: 0.9843913316726685,grad_norm: 0.9999991765978665, iteration: 49534
loss: 1.0192099809646606,grad_norm: 0.8584982700326406, iteration: 49535
loss: 1.0168588161468506,grad_norm: 0.8780487134052702, iteration: 49536
loss: 1.019489049911499,grad_norm: 0.9999990932195439, iteration: 49537
loss: 1.0008984804153442,grad_norm: 0.8733244933739088, iteration: 49538
loss: 1.0293723344802856,grad_norm: 0.8790518837829238, iteration: 49539
loss: 0.9949501156806946,grad_norm: 0.9999990371386974, iteration: 49540
loss: 0.9904448986053467,grad_norm: 0.9999993642874423, iteration: 49541
loss: 1.0164191722869873,grad_norm: 0.9820128570793732, iteration: 49542
loss: 0.9754330515861511,grad_norm: 0.8000232627385624, iteration: 49543
loss: 0.9978389143943787,grad_norm: 0.9116056287696715, iteration: 49544
loss: 0.992411196231842,grad_norm: 0.9150075282301278, iteration: 49545
loss: 0.9909014105796814,grad_norm: 0.9350231520955686, iteration: 49546
loss: 1.0560581684112549,grad_norm: 0.9973324239478323, iteration: 49547
loss: 1.0203555822372437,grad_norm: 0.8455310594398615, iteration: 49548
loss: 1.0007719993591309,grad_norm: 0.88628224786983, iteration: 49549
loss: 0.9398066401481628,grad_norm: 0.9999990898139984, iteration: 49550
loss: 1.0315115451812744,grad_norm: 0.9218891530166532, iteration: 49551
loss: 1.0285338163375854,grad_norm: 0.8939938475801462, iteration: 49552
loss: 0.9921362400054932,grad_norm: 0.9596875564751425, iteration: 49553
loss: 1.0189265012741089,grad_norm: 0.9999989428985642, iteration: 49554
loss: 1.018410325050354,grad_norm: 0.9116814624149516, iteration: 49555
loss: 0.984851062297821,grad_norm: 0.7682993039597587, iteration: 49556
loss: 1.0150071382522583,grad_norm: 0.867780424796853, iteration: 49557
loss: 0.9600856900215149,grad_norm: 0.8427955947901218, iteration: 49558
loss: 1.032537579536438,grad_norm: 0.9837618566244898, iteration: 49559
loss: 0.9768953919410706,grad_norm: 0.8796840180987991, iteration: 49560
loss: 0.9986565709114075,grad_norm: 0.9999989702253719, iteration: 49561
loss: 1.0180184841156006,grad_norm: 0.942116712267454, iteration: 49562
loss: 1.0134985446929932,grad_norm: 0.8485419096119472, iteration: 49563
loss: 0.9950963854789734,grad_norm: 0.8451388209431224, iteration: 49564
loss: 1.018127202987671,grad_norm: 0.9999990631566109, iteration: 49565
loss: 0.9980230331420898,grad_norm: 0.9999991617547322, iteration: 49566
loss: 0.9763789772987366,grad_norm: 0.8952354630897029, iteration: 49567
loss: 0.9711765050888062,grad_norm: 0.8901379353246207, iteration: 49568
loss: 1.0210654735565186,grad_norm: 0.9541600879181539, iteration: 49569
loss: 1.0291551351547241,grad_norm: 0.999999097335789, iteration: 49570
loss: 1.0068550109863281,grad_norm: 0.9087227209570685, iteration: 49571
loss: 1.0084837675094604,grad_norm: 0.8974788817845598, iteration: 49572
loss: 0.9372896552085876,grad_norm: 0.9999989886028282, iteration: 49573
loss: 0.99201500415802,grad_norm: 0.733006685729636, iteration: 49574
loss: 0.9867752194404602,grad_norm: 0.9999996002021245, iteration: 49575
loss: 0.9893372058868408,grad_norm: 0.8344767988011136, iteration: 49576
loss: 1.0404356718063354,grad_norm: 0.9999992642392156, iteration: 49577
loss: 1.0573420524597168,grad_norm: 0.9534178366679738, iteration: 49578
loss: 1.0149986743927002,grad_norm: 0.9538890156715717, iteration: 49579
loss: 1.0158251523971558,grad_norm: 0.9361452536608279, iteration: 49580
loss: 0.9982047080993652,grad_norm: 0.8015987070013371, iteration: 49581
loss: 1.003040075302124,grad_norm: 0.99999904020152, iteration: 49582
loss: 0.996886670589447,grad_norm: 0.8689478212082333, iteration: 49583
loss: 1.0196030139923096,grad_norm: 0.9999999613232862, iteration: 49584
loss: 0.9618882536888123,grad_norm: 0.999999021664648, iteration: 49585
loss: 0.9623463153839111,grad_norm: 0.9999991642377358, iteration: 49586
loss: 1.0351704359054565,grad_norm: 0.9194351632043236, iteration: 49587
loss: 0.9964500665664673,grad_norm: 0.9999992287555034, iteration: 49588
loss: 0.9975062608718872,grad_norm: 0.9999992201992375, iteration: 49589
loss: 0.9760128855705261,grad_norm: 0.8248491933580641, iteration: 49590
loss: 1.012844443321228,grad_norm: 0.8299803165888461, iteration: 49591
loss: 0.9823272228240967,grad_norm: 0.9999989870051498, iteration: 49592
loss: 0.9784227609634399,grad_norm: 0.8227705121001387, iteration: 49593
loss: 1.0185543298721313,grad_norm: 0.9999990930206888, iteration: 49594
loss: 0.9832665920257568,grad_norm: 0.900036372900061, iteration: 49595
loss: 1.021485686302185,grad_norm: 0.9963820896693979, iteration: 49596
loss: 1.0089482069015503,grad_norm: 0.8956910204419023, iteration: 49597
loss: 1.0439977645874023,grad_norm: 0.9165367815199231, iteration: 49598
loss: 1.0177838802337646,grad_norm: 0.9999990240315082, iteration: 49599
loss: 0.9980784058570862,grad_norm: 0.902715318885693, iteration: 49600
loss: 0.9819048643112183,grad_norm: 0.999999118367337, iteration: 49601
loss: 1.0937436819076538,grad_norm: 0.9999994993427467, iteration: 49602
loss: 1.0110834836959839,grad_norm: 0.994438233818189, iteration: 49603
loss: 1.0093464851379395,grad_norm: 0.9756469534644132, iteration: 49604
loss: 1.0079790353775024,grad_norm: 0.9334147257669152, iteration: 49605
loss: 0.9911224842071533,grad_norm: 0.9999995586196434, iteration: 49606
loss: 1.014389991760254,grad_norm: 0.9999993998173714, iteration: 49607
loss: 0.9719199538230896,grad_norm: 0.9728111489588549, iteration: 49608
loss: 0.9996443390846252,grad_norm: 0.9999991889418343, iteration: 49609
loss: 1.0391955375671387,grad_norm: 0.9581213287385503, iteration: 49610
loss: 1.0180364847183228,grad_norm: 0.9999990563187551, iteration: 49611
loss: 0.9925353527069092,grad_norm: 0.8361926339940029, iteration: 49612
loss: 0.9775550365447998,grad_norm: 0.9999991786809385, iteration: 49613
loss: 1.051746129989624,grad_norm: 0.9999993940131834, iteration: 49614
loss: 1.0154447555541992,grad_norm: 0.895337878518775, iteration: 49615
loss: 0.9597784280776978,grad_norm: 0.8956886789492363, iteration: 49616
loss: 1.0059599876403809,grad_norm: 0.9999991889776436, iteration: 49617
loss: 1.034377932548523,grad_norm: 0.9909893079459237, iteration: 49618
loss: 1.0247644186019897,grad_norm: 0.9060774805248587, iteration: 49619
loss: 1.0218225717544556,grad_norm: 0.999999756386685, iteration: 49620
loss: 0.9883416891098022,grad_norm: 0.9999991373724226, iteration: 49621
loss: 0.9758568406105042,grad_norm: 0.9999990855647249, iteration: 49622
loss: 1.021893858909607,grad_norm: 0.830385204219608, iteration: 49623
loss: 0.9728820323944092,grad_norm: 0.9272338899612921, iteration: 49624
loss: 1.0190848112106323,grad_norm: 0.9346154449853662, iteration: 49625
loss: 1.0038939714431763,grad_norm: 0.9999991186393056, iteration: 49626
loss: 1.0542840957641602,grad_norm: 0.9999993416882098, iteration: 49627
loss: 1.0345157384872437,grad_norm: 0.9999993919208489, iteration: 49628
loss: 1.0198428630828857,grad_norm: 0.9172315302581093, iteration: 49629
loss: 0.9916797876358032,grad_norm: 0.8073303135953592, iteration: 49630
loss: 0.9842492938041687,grad_norm: 0.9999990438006109, iteration: 49631
loss: 1.0416233539581299,grad_norm: 0.9999995995141662, iteration: 49632
loss: 0.9772825241088867,grad_norm: 0.8807498658914525, iteration: 49633
loss: 1.011449933052063,grad_norm: 0.9999990674077072, iteration: 49634
loss: 1.0142879486083984,grad_norm: 0.9199927867217159, iteration: 49635
loss: 1.0206952095031738,grad_norm: 0.9999993413114009, iteration: 49636
loss: 0.9981129765510559,grad_norm: 0.9999992187067127, iteration: 49637
loss: 1.0389423370361328,grad_norm: 0.9999994443659692, iteration: 49638
loss: 1.0091265439987183,grad_norm: 0.8622639313886852, iteration: 49639
loss: 0.984004020690918,grad_norm: 0.9999989300439655, iteration: 49640
loss: 1.0041433572769165,grad_norm: 0.7509863333440562, iteration: 49641
loss: 1.0426626205444336,grad_norm: 0.9999995081708876, iteration: 49642
loss: 0.9837323427200317,grad_norm: 0.8474308012500977, iteration: 49643
loss: 1.0308330059051514,grad_norm: 0.7816700522599149, iteration: 49644
loss: 0.9449185132980347,grad_norm: 0.7527418651213289, iteration: 49645
loss: 1.0144997835159302,grad_norm: 0.7979013873462631, iteration: 49646
loss: 1.028746485710144,grad_norm: 0.8475821001294853, iteration: 49647
loss: 1.021990180015564,grad_norm: 0.9999990363074864, iteration: 49648
loss: 1.029742956161499,grad_norm: 0.9029564905546184, iteration: 49649
loss: 1.0047845840454102,grad_norm: 0.926234845570239, iteration: 49650
loss: 1.0904542207717896,grad_norm: 0.9999995293465949, iteration: 49651
loss: 1.005116581916809,grad_norm: 0.9425436140270385, iteration: 49652
loss: 0.9874265789985657,grad_norm: 0.8863841319142319, iteration: 49653
loss: 1.0320430994033813,grad_norm: 0.9999991003602331, iteration: 49654
loss: 1.0500974655151367,grad_norm: 0.999999236536854, iteration: 49655
loss: 0.959883987903595,grad_norm: 0.7772840384665345, iteration: 49656
loss: 0.9971058368682861,grad_norm: 0.9915441895158267, iteration: 49657
loss: 0.9976833462715149,grad_norm: 0.7157006094282151, iteration: 49658
loss: 1.0052891969680786,grad_norm: 0.999999207827087, iteration: 49659
loss: 1.0176948308944702,grad_norm: 0.9999991647601065, iteration: 49660
loss: 1.003529667854309,grad_norm: 0.7230217544335227, iteration: 49661
loss: 1.013064980506897,grad_norm: 0.9999993339061786, iteration: 49662
loss: 1.0341590642929077,grad_norm: 0.9999991670580397, iteration: 49663
loss: 1.0169975757598877,grad_norm: 0.9999991285587041, iteration: 49664
loss: 1.0050990581512451,grad_norm: 0.9999998621130194, iteration: 49665
loss: 0.9919662475585938,grad_norm: 0.889594186874033, iteration: 49666
loss: 1.0383671522140503,grad_norm: 0.9998813567326158, iteration: 49667
loss: 1.0462901592254639,grad_norm: 0.9999993467337568, iteration: 49668
loss: 1.020281195640564,grad_norm: 0.9247473225293765, iteration: 49669
loss: 1.011845588684082,grad_norm: 0.999999173380959, iteration: 49670
loss: 0.9854974150657654,grad_norm: 0.7790498336194107, iteration: 49671
loss: 0.951561689376831,grad_norm: 0.8423825814656581, iteration: 49672
loss: 0.9832333326339722,grad_norm: 0.9035930406785695, iteration: 49673
loss: 1.0147876739501953,grad_norm: 0.8652299385143444, iteration: 49674
loss: 0.9727003574371338,grad_norm: 0.9713533496251646, iteration: 49675
loss: 1.0556740760803223,grad_norm: 0.7902116328590443, iteration: 49676
loss: 1.0024234056472778,grad_norm: 0.8593764389255732, iteration: 49677
loss: 1.0065299272537231,grad_norm: 0.9999991045467151, iteration: 49678
loss: 0.9870268106460571,grad_norm: 0.9492154691168093, iteration: 49679
loss: 1.016158103942871,grad_norm: 0.9233227573531809, iteration: 49680
loss: 0.9705297946929932,grad_norm: 0.9999990948823416, iteration: 49681
loss: 1.07713782787323,grad_norm: 0.9999993325523736, iteration: 49682
loss: 1.0043457746505737,grad_norm: 0.8852715429257808, iteration: 49683
loss: 1.0084519386291504,grad_norm: 0.9397698586798131, iteration: 49684
loss: 0.9693403840065002,grad_norm: 0.9506711074543632, iteration: 49685
loss: 0.9997185468673706,grad_norm: 0.9396851290323615, iteration: 49686
loss: 1.0345720052719116,grad_norm: 0.9604662856925819, iteration: 49687
loss: 1.0521693229675293,grad_norm: 0.9999994443368567, iteration: 49688
loss: 1.0055197477340698,grad_norm: 0.8407043090224562, iteration: 49689
loss: 0.9706442952156067,grad_norm: 0.8741894857709455, iteration: 49690
loss: 0.9806769490242004,grad_norm: 0.9533684781266261, iteration: 49691
loss: 0.998029351234436,grad_norm: 0.9778147399423773, iteration: 49692
loss: 1.0126806497573853,grad_norm: 0.9999994609493884, iteration: 49693
loss: 1.0111783742904663,grad_norm: 0.9931337750672161, iteration: 49694
loss: 1.0168471336364746,grad_norm: 0.9999997173480283, iteration: 49695
loss: 1.0004702806472778,grad_norm: 0.8073613634027098, iteration: 49696
loss: 0.9942784309387207,grad_norm: 0.8476216577046395, iteration: 49697
loss: 1.041693925857544,grad_norm: 0.9348512556004213, iteration: 49698
loss: 0.9844454526901245,grad_norm: 0.9544940445766797, iteration: 49699
loss: 0.9840179085731506,grad_norm: 0.9999991207988177, iteration: 49700
loss: 1.00205659866333,grad_norm: 0.92959540077454, iteration: 49701
loss: 0.9898820519447327,grad_norm: 0.999999167914403, iteration: 49702
loss: 0.9932544231414795,grad_norm: 0.9423844077465909, iteration: 49703
loss: 1.009892225265503,grad_norm: 0.8560820719162017, iteration: 49704
loss: 1.0425907373428345,grad_norm: 0.9999990833524179, iteration: 49705
loss: 0.995037317276001,grad_norm: 0.9999990388128288, iteration: 49706
loss: 0.9951401948928833,grad_norm: 0.9999992320080544, iteration: 49707
loss: 1.017790675163269,grad_norm: 0.9164619760784503, iteration: 49708
loss: 1.0539954900741577,grad_norm: 0.9999998738613405, iteration: 49709
loss: 1.094114899635315,grad_norm: 0.9999994989005522, iteration: 49710
loss: 1.0687549114227295,grad_norm: 0.9999994432879468, iteration: 49711
loss: 1.006435513496399,grad_norm: 0.9999992006402759, iteration: 49712
loss: 1.0563530921936035,grad_norm: 0.9999990881316084, iteration: 49713
loss: 1.0556432008743286,grad_norm: 0.9999996116364687, iteration: 49714
loss: 1.0064657926559448,grad_norm: 0.952383696490831, iteration: 49715
loss: 1.0987849235534668,grad_norm: 0.9999997454025605, iteration: 49716
loss: 1.0241118669509888,grad_norm: 0.9195818068029156, iteration: 49717
loss: 1.04951012134552,grad_norm: 0.9999991554983404, iteration: 49718
loss: 0.9866333603858948,grad_norm: 0.9999995012406308, iteration: 49719
loss: 0.9890339374542236,grad_norm: 0.9999998405770786, iteration: 49720
loss: 1.0233107805252075,grad_norm: 0.8546171813097826, iteration: 49721
loss: 0.9786151051521301,grad_norm: 0.967648040280399, iteration: 49722
loss: 1.0039466619491577,grad_norm: 0.9092741964776321, iteration: 49723
loss: 1.0072144269943237,grad_norm: 0.995694972666932, iteration: 49724
loss: 1.0189613103866577,grad_norm: 0.9999997570507176, iteration: 49725
loss: 1.0121653079986572,grad_norm: 0.9999993036042027, iteration: 49726
loss: 0.9930801391601562,grad_norm: 0.9999990994738395, iteration: 49727
loss: 0.9844631552696228,grad_norm: 0.9999992327224759, iteration: 49728
loss: 0.9936606884002686,grad_norm: 0.999999127806137, iteration: 49729
loss: 1.0008094310760498,grad_norm: 0.9999990411846646, iteration: 49730
loss: 0.9922647476196289,grad_norm: 0.906941334423082, iteration: 49731
loss: 0.9883765578269958,grad_norm: 0.92000070626664, iteration: 49732
loss: 1.012853741645813,grad_norm: 0.9048151734100077, iteration: 49733
loss: 1.050940752029419,grad_norm: 0.999999937985584, iteration: 49734
loss: 1.0384776592254639,grad_norm: 0.9999990246272664, iteration: 49735
loss: 1.0328402519226074,grad_norm: 0.9357453279560152, iteration: 49736
loss: 1.0561132431030273,grad_norm: 0.9999990599396765, iteration: 49737
loss: 0.9702935814857483,grad_norm: 0.9999990343599656, iteration: 49738
loss: 1.0171449184417725,grad_norm: 0.9999995121213442, iteration: 49739
loss: 1.000610589981079,grad_norm: 0.9999991507378134, iteration: 49740
loss: 1.0166126489639282,grad_norm: 0.999999118015543, iteration: 49741
loss: 0.9867146611213684,grad_norm: 0.9999990789024752, iteration: 49742
loss: 0.9942498207092285,grad_norm: 0.9234059462768642, iteration: 49743
loss: 1.0066148042678833,grad_norm: 0.9999996189794529, iteration: 49744
loss: 0.988402783870697,grad_norm: 0.990379708020347, iteration: 49745
loss: 1.1000949144363403,grad_norm: 0.9999998106037139, iteration: 49746
loss: 1.0421644449234009,grad_norm: 0.9999991164225539, iteration: 49747
loss: 0.9681389331817627,grad_norm: 0.8687088577490746, iteration: 49748
loss: 0.9931773543357849,grad_norm: 0.999999195738572, iteration: 49749
loss: 0.9693376421928406,grad_norm: 0.8043001137236129, iteration: 49750
loss: 1.0711443424224854,grad_norm: 0.999999150417421, iteration: 49751
loss: 0.9870823621749878,grad_norm: 0.8264218590162201, iteration: 49752
loss: 1.000601887702942,grad_norm: 0.8315296286807603, iteration: 49753
loss: 1.0104109048843384,grad_norm: 0.9999992409738423, iteration: 49754
loss: 1.0342658758163452,grad_norm: 0.9999990028156432, iteration: 49755
loss: 0.9909660220146179,grad_norm: 0.8949222598634673, iteration: 49756
loss: 0.9850234389305115,grad_norm: 0.9136616177253793, iteration: 49757
loss: 1.034128189086914,grad_norm: 0.9477442256582191, iteration: 49758
loss: 1.0206245183944702,grad_norm: 0.9999990161755402, iteration: 49759
loss: 1.0037099123001099,grad_norm: 0.9131395231895327, iteration: 49760
loss: 0.9913444519042969,grad_norm: 0.9999989044700379, iteration: 49761
loss: 0.9881139993667603,grad_norm: 0.999999167576976, iteration: 49762
loss: 1.0192562341690063,grad_norm: 0.9739646944569943, iteration: 49763
loss: 1.0234951972961426,grad_norm: 0.9999992825782087, iteration: 49764
loss: 1.003035545349121,grad_norm: 0.955471011861594, iteration: 49765
loss: 1.0257402658462524,grad_norm: 0.9999994036672907, iteration: 49766
loss: 0.9949556589126587,grad_norm: 0.969470084620132, iteration: 49767
loss: 1.0170574188232422,grad_norm: 0.9999994002248195, iteration: 49768
loss: 1.011162519454956,grad_norm: 0.8471850628137313, iteration: 49769
loss: 1.031443476676941,grad_norm: 0.9999992916321638, iteration: 49770
loss: 1.0314044952392578,grad_norm: 0.9999991918698483, iteration: 49771
loss: 0.994539201259613,grad_norm: 0.9294733289882272, iteration: 49772
loss: 1.0186294317245483,grad_norm: 0.9779148361578386, iteration: 49773
loss: 1.0368878841400146,grad_norm: 0.9999991155577421, iteration: 49774
loss: 1.0195763111114502,grad_norm: 0.9999994931755326, iteration: 49775
loss: 1.0126210451126099,grad_norm: 0.9999994856648062, iteration: 49776
loss: 1.04989492893219,grad_norm: 0.9999993011084276, iteration: 49777
loss: 1.0041896104812622,grad_norm: 0.9999991384715538, iteration: 49778
loss: 1.0065892934799194,grad_norm: 0.9343943934105111, iteration: 49779
loss: 1.0323143005371094,grad_norm: 0.8125729133112269, iteration: 49780
loss: 1.0167615413665771,grad_norm: 0.9237754048311875, iteration: 49781
loss: 1.0210245847702026,grad_norm: 0.7512857983182086, iteration: 49782
loss: 1.044052004814148,grad_norm: 0.8020021295644617, iteration: 49783
loss: 1.0290557146072388,grad_norm: 0.999999332044935, iteration: 49784
loss: 1.0137611627578735,grad_norm: 0.9999994620014092, iteration: 49785
loss: 1.0310441255569458,grad_norm: 0.9999990112701794, iteration: 49786
loss: 0.9790445566177368,grad_norm: 0.9999990691911524, iteration: 49787
loss: 1.0381076335906982,grad_norm: 0.999999291842623, iteration: 49788
loss: 1.0169823169708252,grad_norm: 0.9999996525652368, iteration: 49789
loss: 1.0089657306671143,grad_norm: 0.9999991275942638, iteration: 49790
loss: 1.0190492868423462,grad_norm: 0.9999996546647746, iteration: 49791
loss: 1.0694348812103271,grad_norm: 0.9999992338089633, iteration: 49792
loss: 1.1134192943572998,grad_norm: 0.9999995051445009, iteration: 49793
loss: 1.0302220582962036,grad_norm: 0.9114021611802507, iteration: 49794
loss: 0.9749175310134888,grad_norm: 0.9149018352500625, iteration: 49795
loss: 1.0356504917144775,grad_norm: 0.99999922350301, iteration: 49796
loss: 0.9969274401664734,grad_norm: 0.9383697766350231, iteration: 49797
loss: 1.0159072875976562,grad_norm: 0.8496370659304172, iteration: 49798
loss: 1.014894723892212,grad_norm: 0.9999992630980592, iteration: 49799
loss: 0.9751479029655457,grad_norm: 0.8324825169397356, iteration: 49800
loss: 1.0066322088241577,grad_norm: 0.9999999155165731, iteration: 49801
loss: 0.9878755211830139,grad_norm: 0.9999992817730439, iteration: 49802
loss: 1.0307537317276,grad_norm: 0.8170864810473694, iteration: 49803
loss: 0.9934948086738586,grad_norm: 0.8864207834355423, iteration: 49804
loss: 1.003754734992981,grad_norm: 0.9999992085198242, iteration: 49805
loss: 0.9847186207771301,grad_norm: 0.9999990538516521, iteration: 49806
loss: 1.050733208656311,grad_norm: 0.9999993360663547, iteration: 49807
loss: 1.0164928436279297,grad_norm: 0.9999989971468823, iteration: 49808
loss: 1.0029853582382202,grad_norm: 0.8694054028015876, iteration: 49809
loss: 1.0295610427856445,grad_norm: 0.9338233153793368, iteration: 49810
loss: 1.11006498336792,grad_norm: 0.9999996460645005, iteration: 49811
loss: 1.0069729089736938,grad_norm: 0.9999989633255808, iteration: 49812
loss: 1.041491150856018,grad_norm: 0.9999996473529172, iteration: 49813
loss: 1.0289229154586792,grad_norm: 0.9999991592606435, iteration: 49814
loss: 1.0070216655731201,grad_norm: 0.9999998414923857, iteration: 49815
loss: 0.9949889183044434,grad_norm: 0.8386604024248282, iteration: 49816
loss: 0.9924229383468628,grad_norm: 0.9269566837431027, iteration: 49817
loss: 1.0265387296676636,grad_norm: 0.9999991417713978, iteration: 49818
loss: 0.9911607503890991,grad_norm: 0.8096274451005, iteration: 49819
loss: 1.0076693296432495,grad_norm: 0.7433783155492687, iteration: 49820
loss: 1.0249186754226685,grad_norm: 0.870810363352303, iteration: 49821
loss: 1.002625823020935,grad_norm: 0.958716397955657, iteration: 49822
loss: 1.0429221391677856,grad_norm: 0.999999123176011, iteration: 49823
loss: 0.9993364810943604,grad_norm: 0.9999990817348537, iteration: 49824
loss: 1.048675775527954,grad_norm: 0.9999994947089895, iteration: 49825
loss: 1.0639066696166992,grad_norm: 0.9999997218386477, iteration: 49826
loss: 1.0649147033691406,grad_norm: 0.9999991525814032, iteration: 49827
loss: 1.0458077192306519,grad_norm: 0.9999992362603055, iteration: 49828
loss: 1.0509958267211914,grad_norm: 0.9999997563326722, iteration: 49829
loss: 1.1249749660491943,grad_norm: 0.9999992091341223, iteration: 49830
loss: 1.0136314630508423,grad_norm: 0.9999992856799544, iteration: 49831
loss: 1.0435395240783691,grad_norm: 0.9999996812570279, iteration: 49832
loss: 0.9846490025520325,grad_norm: 0.9792595446978141, iteration: 49833
loss: 1.0801597833633423,grad_norm: 0.9328237038111606, iteration: 49834
loss: 1.026259422302246,grad_norm: 0.9999992515113809, iteration: 49835
loss: 0.9943453669548035,grad_norm: 0.9999995047169844, iteration: 49836
loss: 1.0479522943496704,grad_norm: 0.9999995773443523, iteration: 49837
loss: 1.010702133178711,grad_norm: 0.8685033677711549, iteration: 49838
loss: 1.0204576253890991,grad_norm: 0.9999991788428827, iteration: 49839
loss: 1.0205434560775757,grad_norm: 0.9999995058448777, iteration: 49840
loss: 0.968194305896759,grad_norm: 0.9387608715343141, iteration: 49841
loss: 1.000401496887207,grad_norm: 0.9886259104378318, iteration: 49842
loss: 1.0096547603607178,grad_norm: 0.8657737656707251, iteration: 49843
loss: 1.0134748220443726,grad_norm: 0.9999992775387668, iteration: 49844
loss: 0.9971967935562134,grad_norm: 0.9999999347320391, iteration: 49845
loss: 1.026573896408081,grad_norm: 0.8332618609494621, iteration: 49846
loss: 1.0349656343460083,grad_norm: 0.9999996453389378, iteration: 49847
loss: 0.9824517369270325,grad_norm: 0.9999993783657437, iteration: 49848
loss: 1.0075193643569946,grad_norm: 0.9999991902779007, iteration: 49849
loss: 0.9864996671676636,grad_norm: 0.9999990980775944, iteration: 49850
loss: 1.1017577648162842,grad_norm: 0.9999999642232166, iteration: 49851
loss: 1.0055928230285645,grad_norm: 0.9999994312052474, iteration: 49852
loss: 1.0003516674041748,grad_norm: 0.999999920778653, iteration: 49853
loss: 1.0172195434570312,grad_norm: 0.9900782330475402, iteration: 49854
loss: 1.1042448282241821,grad_norm: 0.9999994384980422, iteration: 49855
loss: 1.0216249227523804,grad_norm: 0.9999997949491822, iteration: 49856
loss: 1.0082001686096191,grad_norm: 0.9999997644375465, iteration: 49857
loss: 0.9889327883720398,grad_norm: 0.9982324169967377, iteration: 49858
loss: 1.050646185874939,grad_norm: 0.9999992950788156, iteration: 49859
loss: 0.9780803918838501,grad_norm: 0.999999073384186, iteration: 49860
loss: 1.0266119241714478,grad_norm: 0.9300514648924283, iteration: 49861
loss: 1.0215340852737427,grad_norm: 0.9393631178778404, iteration: 49862
loss: 0.9948574900627136,grad_norm: 0.9999990891050402, iteration: 49863
loss: 1.032352328300476,grad_norm: 0.9999996622636466, iteration: 49864
loss: 1.0337485074996948,grad_norm: 0.9999993718152425, iteration: 49865
loss: 1.0012307167053223,grad_norm: 0.9999991959001857, iteration: 49866
loss: 1.060202717781067,grad_norm: 0.999999281340298, iteration: 49867
loss: 1.0068293809890747,grad_norm: 0.9038234782887042, iteration: 49868
loss: 1.0321695804595947,grad_norm: 0.9999992102736787, iteration: 49869
loss: 1.0611498355865479,grad_norm: 0.9999998607518603, iteration: 49870
loss: 1.0206536054611206,grad_norm: 0.9999995207292891, iteration: 49871
loss: 1.00136137008667,grad_norm: 0.8934846685848412, iteration: 49872
loss: 1.0765753984451294,grad_norm: 0.9999999337201588, iteration: 49873
loss: 1.0293679237365723,grad_norm: 0.9999991753276599, iteration: 49874
loss: 1.0389493703842163,grad_norm: 0.9999991256504331, iteration: 49875
loss: 1.0322433710098267,grad_norm: 0.9999994877119475, iteration: 49876
loss: 1.1055045127868652,grad_norm: 0.9999996823122411, iteration: 49877
loss: 1.0116573572158813,grad_norm: 0.9705780505083171, iteration: 49878
loss: 0.9730758666992188,grad_norm: 0.9999994140661316, iteration: 49879
loss: 1.020150899887085,grad_norm: 0.9999994087243166, iteration: 49880
loss: 0.9998229742050171,grad_norm: 0.8966232100969943, iteration: 49881
loss: 0.9910685420036316,grad_norm: 0.9060373129396398, iteration: 49882
loss: 1.0621840953826904,grad_norm: 0.9999997102650982, iteration: 49883
loss: 1.0850862264633179,grad_norm: 0.9999991536871276, iteration: 49884
loss: 1.00967276096344,grad_norm: 0.9999995187741284, iteration: 49885
loss: 1.0068600177764893,grad_norm: 0.9267378022604906, iteration: 49886
loss: 1.0156360864639282,grad_norm: 0.9952543494624293, iteration: 49887
loss: 1.0398739576339722,grad_norm: 0.9999995835074583, iteration: 49888
loss: 1.126959204673767,grad_norm: 0.99999953700314, iteration: 49889
loss: 1.0491962432861328,grad_norm: 0.9999995749599789, iteration: 49890
loss: 1.0238986015319824,grad_norm: 0.9999995354629697, iteration: 49891
loss: 0.9945814609527588,grad_norm: 0.9931143125557086, iteration: 49892
loss: 1.0379698276519775,grad_norm: 0.9999992889344415, iteration: 49893
loss: 0.9941049814224243,grad_norm: 0.9999994081429164, iteration: 49894
loss: 0.9735866785049438,grad_norm: 0.9999994866476791, iteration: 49895
loss: 1.0160945653915405,grad_norm: 0.9999990308165738, iteration: 49896
loss: 1.041515588760376,grad_norm: 0.9605587291519486, iteration: 49897
loss: 1.11687433719635,grad_norm: 0.9999998738655678, iteration: 49898
loss: 1.054639458656311,grad_norm: 0.9999993495384093, iteration: 49899
loss: 1.047295093536377,grad_norm: 0.9999993067076145, iteration: 49900
loss: 1.0563794374465942,grad_norm: 0.9999990730932738, iteration: 49901
loss: 0.9886398911476135,grad_norm: 0.954464140238801, iteration: 49902
loss: 1.0218191146850586,grad_norm: 0.9999993341058885, iteration: 49903
loss: 0.9927854537963867,grad_norm: 0.8647798584865186, iteration: 49904
loss: 1.0302866697311401,grad_norm: 0.9999995794444032, iteration: 49905
loss: 0.9778521656990051,grad_norm: 0.8938940748213224, iteration: 49906
loss: 1.0306025743484497,grad_norm: 0.9999990306824222, iteration: 49907
loss: 0.9782185554504395,grad_norm: 0.8675038585945953, iteration: 49908
loss: 1.0252909660339355,grad_norm: 0.9999992049657301, iteration: 49909
loss: 0.9878426790237427,grad_norm: 0.9999990475530658, iteration: 49910
loss: 1.0194551944732666,grad_norm: 0.999999412871916, iteration: 49911
loss: 1.0452128648757935,grad_norm: 0.9999998696293497, iteration: 49912
loss: 1.0440173149108887,grad_norm: 0.9999994431187196, iteration: 49913
loss: 1.0248465538024902,grad_norm: 0.9999990497657347, iteration: 49914
loss: 0.9732002019882202,grad_norm: 0.999999402866243, iteration: 49915
loss: 1.081007480621338,grad_norm: 0.9999999033562366, iteration: 49916
loss: 0.9957855939865112,grad_norm: 0.8721670376072875, iteration: 49917
loss: 1.027730941772461,grad_norm: 0.9999996153645507, iteration: 49918
loss: 1.0980396270751953,grad_norm: 0.9999992204454514, iteration: 49919
loss: 0.9962440729141235,grad_norm: 0.9999992084946193, iteration: 49920
loss: 1.0359821319580078,grad_norm: 0.9999990996146915, iteration: 49921
loss: 1.0727229118347168,grad_norm: 0.9999994519780004, iteration: 49922
loss: 1.0454999208450317,grad_norm: 0.9802598762586101, iteration: 49923
loss: 0.9947206377983093,grad_norm: 0.8454924098764234, iteration: 49924
loss: 1.0153443813323975,grad_norm: 0.9999996470936021, iteration: 49925
loss: 1.0602819919586182,grad_norm: 0.9999991947788937, iteration: 49926
loss: 1.0204708576202393,grad_norm: 0.9999992144192724, iteration: 49927
loss: 0.99382483959198,grad_norm: 0.9547229058670873, iteration: 49928
loss: 0.9813245534896851,grad_norm: 0.9999992252285546, iteration: 49929
loss: 1.0039016008377075,grad_norm: 0.9999995321020211, iteration: 49930
loss: 1.0082571506500244,grad_norm: 0.9999992395929991, iteration: 49931
loss: 1.0048447847366333,grad_norm: 0.9112157909749217, iteration: 49932
loss: 1.0351176261901855,grad_norm: 0.9137384967006017, iteration: 49933
loss: 1.0583101511001587,grad_norm: 0.9215457404017002, iteration: 49934
loss: 0.9631262421607971,grad_norm: 0.8931840773344409, iteration: 49935
loss: 1.1000933647155762,grad_norm: 0.9999998223973326, iteration: 49936
loss: 1.0275746583938599,grad_norm: 0.9823351208625216, iteration: 49937
loss: 1.06001615524292,grad_norm: 0.9999994527962851, iteration: 49938
loss: 1.020639419555664,grad_norm: 0.9999990765775368, iteration: 49939
loss: 1.011869192123413,grad_norm: 0.9999996631654402, iteration: 49940
loss: 1.033597707748413,grad_norm: 0.9999991693035067, iteration: 49941
loss: 1.0288105010986328,grad_norm: 0.914131181987509, iteration: 49942
loss: 1.0675781965255737,grad_norm: 0.999999284438022, iteration: 49943
loss: 0.9965985417366028,grad_norm: 0.9999992515694089, iteration: 49944
loss: 1.0948666334152222,grad_norm: 0.9999991409711092, iteration: 49945
loss: 1.0581084489822388,grad_norm: 0.9999995788463542, iteration: 49946
loss: 0.9990442991256714,grad_norm: 0.9999990426739493, iteration: 49947
loss: 1.0069936513900757,grad_norm: 0.9606427292110048, iteration: 49948
loss: 0.9859902858734131,grad_norm: 0.907327693651464, iteration: 49949
loss: 0.9817532896995544,grad_norm: 0.9133710429868122, iteration: 49950
loss: 0.9605545401573181,grad_norm: 0.9999990201639709, iteration: 49951
loss: 0.9907989501953125,grad_norm: 0.8171513195788069, iteration: 49952
loss: 1.0055632591247559,grad_norm: 0.784923200818205, iteration: 49953
loss: 1.1035839319229126,grad_norm: 0.9999997820232097, iteration: 49954
loss: 1.0205234289169312,grad_norm: 0.9782377677574724, iteration: 49955
loss: 1.0322800874710083,grad_norm: 0.9999991596167732, iteration: 49956
loss: 0.9929417967796326,grad_norm: 0.9999990085279699, iteration: 49957
loss: 1.1731994152069092,grad_norm: 0.9999997358431983, iteration: 49958
loss: 0.9993278980255127,grad_norm: 0.9999991464342747, iteration: 49959
loss: 1.010726809501648,grad_norm: 0.9999990552200315, iteration: 49960
loss: 1.0875204801559448,grad_norm: 0.9999998108678974, iteration: 49961
loss: 1.0081813335418701,grad_norm: 0.999999777632817, iteration: 49962
loss: 1.049921989440918,grad_norm: 0.9803274955232804, iteration: 49963
loss: 0.9963999390602112,grad_norm: 0.8034164167133562, iteration: 49964
loss: 1.0410410165786743,grad_norm: 0.9999990848865918, iteration: 49965
loss: 1.0337955951690674,grad_norm: 0.873170317411915, iteration: 49966
loss: 1.0202683210372925,grad_norm: 0.999999055908348, iteration: 49967
loss: 0.9920482039451599,grad_norm: 0.9199359862043333, iteration: 49968
loss: 1.00962233543396,grad_norm: 0.9530720481295947, iteration: 49969
loss: 1.0055203437805176,grad_norm: 0.9999991049487901, iteration: 49970
loss: 1.044562816619873,grad_norm: 0.9999998513187207, iteration: 49971
loss: 0.9744188189506531,grad_norm: 0.9438417287562242, iteration: 49972
loss: 0.9835001230239868,grad_norm: 0.7558263284439212, iteration: 49973
loss: 1.0775363445281982,grad_norm: 0.859920157773757, iteration: 49974
loss: 1.0388849973678589,grad_norm: 0.8705877307059747, iteration: 49975
loss: 1.048671007156372,grad_norm: 0.9999995963586055, iteration: 49976
loss: 1.0336881875991821,grad_norm: 0.7709330433706534, iteration: 49977
loss: 1.0122593641281128,grad_norm: 0.9491492028219956, iteration: 49978
loss: 1.0202146768569946,grad_norm: 0.917802887466112, iteration: 49979
loss: 0.9968670606613159,grad_norm: 0.8723384196636212, iteration: 49980
loss: 1.0299283266067505,grad_norm: 0.9999992265296592, iteration: 49981
loss: 1.031838059425354,grad_norm: 0.999999491707601, iteration: 49982
loss: 1.014051914215088,grad_norm: 0.9057504693789694, iteration: 49983
loss: 1.0135599374771118,grad_norm: 0.9999995471271443, iteration: 49984
loss: 1.015760064125061,grad_norm: 0.8750088217780773, iteration: 49985
loss: 0.9999529123306274,grad_norm: 0.903619729401462, iteration: 49986
loss: 1.0661500692367554,grad_norm: 0.7729183144834368, iteration: 49987
loss: 1.0004258155822754,grad_norm: 0.8837364066320257, iteration: 49988
loss: 0.9923138618469238,grad_norm: 0.9189074395915146, iteration: 49989
loss: 0.9911371469497681,grad_norm: 0.8492940023685852, iteration: 49990
loss: 0.9943873882293701,grad_norm: 0.926089864371462, iteration: 49991
loss: 0.9832021594047546,grad_norm: 0.8274067342398942, iteration: 49992
loss: 0.9995729923248291,grad_norm: 0.9999989986201306, iteration: 49993
loss: 1.0196341276168823,grad_norm: 0.8434314918356618, iteration: 49994
loss: 1.04917311668396,grad_norm: 0.8339170039240622, iteration: 49995
loss: 1.0062800645828247,grad_norm: 0.882947722946433, iteration: 49996
loss: 1.0106350183486938,grad_norm: 0.9033606049526232, iteration: 49997
loss: 1.1118100881576538,grad_norm: 0.9999998811189975, iteration: 49998
loss: 1.1334484815597534,grad_norm: 0.9999994436419806, iteration: 49999
loss: 0.9667595028877258,grad_norm: 0.9999990244768071, iteration: 50000
Evaluating at step 50000
{'val': 0.9976180791854858, 'test': 2.3466980061211142}
loss: 1.0548962354660034,grad_norm: 0.9999996595484233, iteration: 50001
loss: 1.0205326080322266,grad_norm: 0.9526577563250455, iteration: 50002
loss: 0.982710599899292,grad_norm: 0.999999110563173, iteration: 50003
loss: 1.0489627122879028,grad_norm: 0.9999999213987038, iteration: 50004
loss: 1.0020229816436768,grad_norm: 0.9999992390353635, iteration: 50005
loss: 1.0250333547592163,grad_norm: 0.8854065487981437, iteration: 50006
loss: 1.0113110542297363,grad_norm: 0.9999991264359747, iteration: 50007
loss: 1.0113258361816406,grad_norm: 0.8206877507779319, iteration: 50008
loss: 0.9818835258483887,grad_norm: 0.9999991402342987, iteration: 50009
loss: 0.9645524024963379,grad_norm: 0.9525391676187517, iteration: 50010
loss: 1.0702693462371826,grad_norm: 0.9999991501736211, iteration: 50011
loss: 1.0369681119918823,grad_norm: 0.9999994029639544, iteration: 50012
loss: 0.9798775911331177,grad_norm: 0.935798058582546, iteration: 50013
loss: 1.0291460752487183,grad_norm: 0.9999990061754935, iteration: 50014
loss: 0.9979115724563599,grad_norm: 0.8007759161033928, iteration: 50015
loss: 1.024268627166748,grad_norm: 0.99999900716921, iteration: 50016
loss: 1.0293797254562378,grad_norm: 0.9333920502656642, iteration: 50017
loss: 1.027675986289978,grad_norm: 0.9999991459609567, iteration: 50018
loss: 1.2729899883270264,grad_norm: 0.9999992060811049, iteration: 50019
loss: 0.9876298904418945,grad_norm: 0.999999612231273, iteration: 50020
loss: 1.0328749418258667,grad_norm: 0.9999991803384995, iteration: 50021
loss: 1.0250518321990967,grad_norm: 0.9838154693147134, iteration: 50022
loss: 1.0285964012145996,grad_norm: 0.9523403642180207, iteration: 50023
loss: 0.994736909866333,grad_norm: 0.9999991073250566, iteration: 50024
loss: 1.0166670083999634,grad_norm: 0.9999997601451712, iteration: 50025
loss: 1.0037236213684082,grad_norm: 0.9999994861429239, iteration: 50026
loss: 0.9978885650634766,grad_norm: 0.8959925946156125, iteration: 50027
loss: 1.0281387567520142,grad_norm: 0.7246004304837461, iteration: 50028
loss: 1.001918077468872,grad_norm: 0.8764763073622595, iteration: 50029
loss: 1.0917147397994995,grad_norm: 0.9999996230208961, iteration: 50030
loss: 0.9880525469779968,grad_norm: 0.9999990093672962, iteration: 50031
loss: 0.9917290210723877,grad_norm: 0.9926104409872076, iteration: 50032
loss: 1.0159813165664673,grad_norm: 0.9999991772432997, iteration: 50033
loss: 0.9770340919494629,grad_norm: 0.9240802488884057, iteration: 50034
loss: 1.03961980342865,grad_norm: 0.999999901189513, iteration: 50035
loss: 1.0187420845031738,grad_norm: 0.9999992132637666, iteration: 50036
loss: 1.0122530460357666,grad_norm: 0.9738264362622457, iteration: 50037
loss: 1.0340396165847778,grad_norm: 0.9999999757427229, iteration: 50038
loss: 1.0719398260116577,grad_norm: 0.9999995058160194, iteration: 50039
loss: 1.0093592405319214,grad_norm: 0.8054951303671782, iteration: 50040
loss: 1.021242380142212,grad_norm: 0.9284587755947262, iteration: 50041
loss: 1.0110467672348022,grad_norm: 0.9688895321098344, iteration: 50042
loss: 1.0103436708450317,grad_norm: 0.9999993375357771, iteration: 50043
loss: 1.1282497644424438,grad_norm: 0.9999997383350773, iteration: 50044
loss: 1.0498631000518799,grad_norm: 0.9999989972182979, iteration: 50045
loss: 1.0954453945159912,grad_norm: 1.0000000252397379, iteration: 50046
loss: 1.0069266557693481,grad_norm: 0.999999893666593, iteration: 50047
loss: 1.0613409280776978,grad_norm: 0.9999990893788663, iteration: 50048
loss: 1.0739132165908813,grad_norm: 0.9674668889408509, iteration: 50049
loss: 1.0542610883712769,grad_norm: 0.9386818117097533, iteration: 50050
loss: 1.0581878423690796,grad_norm: 0.9999992337471209, iteration: 50051
loss: 1.0206680297851562,grad_norm: 0.9569223572112767, iteration: 50052
loss: 0.9863408803939819,grad_norm: 0.8952183475975269, iteration: 50053
loss: 1.0919733047485352,grad_norm: 0.999999352641275, iteration: 50054
loss: 1.098892331123352,grad_norm: 0.9999996873260798, iteration: 50055
loss: 1.0315784215927124,grad_norm: 0.9999991270850083, iteration: 50056
loss: 1.0081979036331177,grad_norm: 0.9342112129507697, iteration: 50057
loss: 1.025591492652893,grad_norm: 0.8128253399911114, iteration: 50058
loss: 1.0213966369628906,grad_norm: 0.8710613028050012, iteration: 50059
loss: 1.0257115364074707,grad_norm: 0.8663311552191215, iteration: 50060
loss: 1.0756367444992065,grad_norm: 0.9999997861740993, iteration: 50061
loss: 1.1232759952545166,grad_norm: 0.9999998790147578, iteration: 50062
loss: 1.0438058376312256,grad_norm: 0.999999556211903, iteration: 50063
loss: 1.0582165718078613,grad_norm: 0.9999999113587419, iteration: 50064
loss: 1.0190513134002686,grad_norm: 0.9999995347921159, iteration: 50065
loss: 1.1125813722610474,grad_norm: 0.9999998716381066, iteration: 50066
loss: 1.0244864225387573,grad_norm: 0.9999990208609166, iteration: 50067
loss: 1.0066587924957275,grad_norm: 0.854074355473517, iteration: 50068
loss: 1.0503623485565186,grad_norm: 0.9999997327324301, iteration: 50069
loss: 1.046918511390686,grad_norm: 0.9999995147969608, iteration: 50070
loss: 0.984563410282135,grad_norm: 0.9685388927923961, iteration: 50071
loss: 1.029404640197754,grad_norm: 0.9999989417455462, iteration: 50072
loss: 1.0252324342727661,grad_norm: 0.9999991411197942, iteration: 50073
loss: 1.145172357559204,grad_norm: 0.9999995718147068, iteration: 50074
loss: 1.0444437265396118,grad_norm: 0.9146613818184194, iteration: 50075
loss: 1.0130707025527954,grad_norm: 0.8993532779199795, iteration: 50076
loss: 1.0008751153945923,grad_norm: 0.8730083007622603, iteration: 50077
loss: 1.0188333988189697,grad_norm: 0.9999997908779757, iteration: 50078
loss: 1.0325701236724854,grad_norm: 0.9427864713989762, iteration: 50079
loss: 0.9812760949134827,grad_norm: 0.9046199982363483, iteration: 50080
loss: 0.986003577709198,grad_norm: 0.9999991582315357, iteration: 50081
loss: 1.1186652183532715,grad_norm: 0.9999997779090193, iteration: 50082
loss: 1.0016846656799316,grad_norm: 0.9999994576972117, iteration: 50083
loss: 0.9723518490791321,grad_norm: 0.9999990818566943, iteration: 50084
loss: 1.027010440826416,grad_norm: 0.9999991453191036, iteration: 50085
loss: 1.0420751571655273,grad_norm: 0.9866442499316346, iteration: 50086
loss: 0.9821146130561829,grad_norm: 0.9230999457657573, iteration: 50087
loss: 1.0271215438842773,grad_norm: 0.7591558696141674, iteration: 50088
loss: 1.0002999305725098,grad_norm: 0.9999990405260687, iteration: 50089
loss: 1.0288485288619995,grad_norm: 0.9372591902345034, iteration: 50090
loss: 1.052606463432312,grad_norm: 0.7697406104910458, iteration: 50091
loss: 0.961406409740448,grad_norm: 0.9999992243545178, iteration: 50092
loss: 1.0085194110870361,grad_norm: 0.8389332192160031, iteration: 50093
loss: 0.9405314326286316,grad_norm: 0.8871053073286042, iteration: 50094
loss: 1.0025062561035156,grad_norm: 0.9999992653566304, iteration: 50095
loss: 1.0387558937072754,grad_norm: 0.9999991120591463, iteration: 50096
loss: 1.0328569412231445,grad_norm: 0.999999390724742, iteration: 50097
loss: 0.9978739619255066,grad_norm: 0.8423861065035844, iteration: 50098
loss: 0.9895176291465759,grad_norm: 0.9999994121182659, iteration: 50099
loss: 1.0312327146530151,grad_norm: 0.9367884928455709, iteration: 50100
loss: 1.0059831142425537,grad_norm: 0.9425825460570236, iteration: 50101
loss: 1.0190867185592651,grad_norm: 0.9167227200615297, iteration: 50102
loss: 0.9909935593605042,grad_norm: 0.9999989679858086, iteration: 50103
loss: 1.019174575805664,grad_norm: 0.9837318279506265, iteration: 50104
loss: 0.9822407960891724,grad_norm: 0.8513591305593049, iteration: 50105
loss: 1.000952959060669,grad_norm: 0.9999991204435169, iteration: 50106
loss: 1.1845481395721436,grad_norm: 0.9999992533934845, iteration: 50107
loss: 1.022847056388855,grad_norm: 0.840346024012687, iteration: 50108
loss: 1.0336610078811646,grad_norm: 0.8389583354057302, iteration: 50109
loss: 1.0073288679122925,grad_norm: 0.7920686723495836, iteration: 50110
loss: 0.9720476269721985,grad_norm: 0.9927579570732862, iteration: 50111
loss: 1.029606580734253,grad_norm: 0.8908377636367052, iteration: 50112
loss: 1.0436160564422607,grad_norm: 0.9999993614924708, iteration: 50113
loss: 0.9946684837341309,grad_norm: 0.9999990965475036, iteration: 50114
loss: 1.0138285160064697,grad_norm: 0.9734461928143603, iteration: 50115
loss: 0.9946451783180237,grad_norm: 0.9881941678430015, iteration: 50116
loss: 1.003819227218628,grad_norm: 0.8721860253203897, iteration: 50117
loss: 1.0280427932739258,grad_norm: 0.8657259442102037, iteration: 50118
loss: 1.030357837677002,grad_norm: 0.999999181586025, iteration: 50119
loss: 1.002609372138977,grad_norm: 0.8995888444890175, iteration: 50120
loss: 1.0267690420150757,grad_norm: 0.8861645420298511, iteration: 50121
loss: 1.0154582262039185,grad_norm: 0.8706993937732426, iteration: 50122
loss: 0.9863637685775757,grad_norm: 0.9467346959941453, iteration: 50123
loss: 0.994914710521698,grad_norm: 0.9073651304637343, iteration: 50124
loss: 1.0055874586105347,grad_norm: 0.7844130406615271, iteration: 50125
loss: 1.0251065492630005,grad_norm: 0.9999990420567926, iteration: 50126
loss: 0.9994630217552185,grad_norm: 0.8721720825530213, iteration: 50127
loss: 0.9687456488609314,grad_norm: 0.9999991680484716, iteration: 50128
loss: 1.0343736410140991,grad_norm: 0.9537863636713343, iteration: 50129
loss: 1.0289902687072754,grad_norm: 0.9999992257125554, iteration: 50130
loss: 1.0213184356689453,grad_norm: 0.999999017767163, iteration: 50131
loss: 1.0095572471618652,grad_norm: 0.9470526183800606, iteration: 50132
loss: 1.0512210130691528,grad_norm: 0.9999995820334017, iteration: 50133
loss: 1.0076048374176025,grad_norm: 0.9999994202921262, iteration: 50134
loss: 1.0104864835739136,grad_norm: 0.9999994085056817, iteration: 50135
loss: 1.0344518423080444,grad_norm: 0.8685357750671929, iteration: 50136
loss: 1.00969660282135,grad_norm: 0.8045451863764796, iteration: 50137
loss: 1.0806597471237183,grad_norm: 0.9999996748078842, iteration: 50138
loss: 1.0086846351623535,grad_norm: 0.8787196520659167, iteration: 50139
loss: 1.0083156824111938,grad_norm: 0.9999990487556546, iteration: 50140
loss: 1.0154622793197632,grad_norm: 0.9999991766142349, iteration: 50141
loss: 1.0225000381469727,grad_norm: 0.9999992931264248, iteration: 50142
loss: 1.0076428651809692,grad_norm: 0.8492442372725066, iteration: 50143
loss: 1.0131051540374756,grad_norm: 0.879961555433667, iteration: 50144
loss: 1.0006030797958374,grad_norm: 0.9205610231387563, iteration: 50145
loss: 0.9917156100273132,grad_norm: 0.9999994651209361, iteration: 50146
loss: 0.9713302254676819,grad_norm: 0.9999989237321613, iteration: 50147
loss: 1.030950903892517,grad_norm: 0.9999995362016032, iteration: 50148
loss: 0.9924255609512329,grad_norm: 0.9334375589722838, iteration: 50149
loss: 1.017680048942566,grad_norm: 0.9128113692203833, iteration: 50150
loss: 0.9933598041534424,grad_norm: 0.9999990568057331, iteration: 50151
loss: 1.053941011428833,grad_norm: 0.9999996030987367, iteration: 50152
loss: 1.0444996356964111,grad_norm: 0.999999694173372, iteration: 50153
loss: 0.9707404971122742,grad_norm: 0.9999990496891454, iteration: 50154
loss: 1.034584879875183,grad_norm: 0.9999990491416295, iteration: 50155
loss: 1.0239262580871582,grad_norm: 0.9999990439084221, iteration: 50156
loss: 0.9835818409919739,grad_norm: 0.9999990526322452, iteration: 50157
loss: 1.0264527797698975,grad_norm: 0.9047690876757506, iteration: 50158
loss: 1.0115790367126465,grad_norm: 0.9604163082968653, iteration: 50159
loss: 0.9948706030845642,grad_norm: 0.9999991280250423, iteration: 50160
loss: 0.999244213104248,grad_norm: 0.9799317415584629, iteration: 50161
loss: 1.0328984260559082,grad_norm: 0.9999989790554505, iteration: 50162
loss: 0.986010730266571,grad_norm: 0.8082544949816617, iteration: 50163
loss: 1.017520785331726,grad_norm: 0.9999991281765088, iteration: 50164
loss: 0.9782295823097229,grad_norm: 0.9452754040315193, iteration: 50165
loss: 1.0213472843170166,grad_norm: 0.9999992019076467, iteration: 50166
loss: 0.9662628769874573,grad_norm: 0.7864820016933057, iteration: 50167
loss: 1.0342501401901245,grad_norm: 0.8227289961880815, iteration: 50168
loss: 0.9872627854347229,grad_norm: 0.999999729029415, iteration: 50169
loss: 1.028653860092163,grad_norm: 0.940900275105839, iteration: 50170
loss: 1.0373159646987915,grad_norm: 0.8202568929373267, iteration: 50171
loss: 0.9889310002326965,grad_norm: 0.9999995224443954, iteration: 50172
loss: 0.966539204120636,grad_norm: 0.9622619768222546, iteration: 50173
loss: 1.0507680177688599,grad_norm: 0.9999991001014339, iteration: 50174
loss: 0.988563060760498,grad_norm: 0.9573829213201481, iteration: 50175
loss: 1.0079736709594727,grad_norm: 0.999999180714788, iteration: 50176
loss: 0.9844777584075928,grad_norm: 0.9666456375760631, iteration: 50177
loss: 1.004624843597412,grad_norm: 0.7883484488902209, iteration: 50178
loss: 0.9770331382751465,grad_norm: 0.97780066372896, iteration: 50179
loss: 1.0756275653839111,grad_norm: 0.999999769651059, iteration: 50180
loss: 1.0131298303604126,grad_norm: 0.9599070254286999, iteration: 50181
loss: 1.0122020244598389,grad_norm: 0.9999997977468842, iteration: 50182
loss: 1.0277377367019653,grad_norm: 0.9636599636169371, iteration: 50183
loss: 1.0252997875213623,grad_norm: 0.9999991051265944, iteration: 50184
loss: 0.9960428476333618,grad_norm: 0.8263467406376924, iteration: 50185
loss: 1.032702922821045,grad_norm: 0.9999992654573242, iteration: 50186
loss: 1.0179510116577148,grad_norm: 0.8695184189728663, iteration: 50187
loss: 0.9850913882255554,grad_norm: 0.9999993020133346, iteration: 50188
loss: 1.0485833883285522,grad_norm: 0.999999433975721, iteration: 50189
loss: 1.0054136514663696,grad_norm: 0.9603859356993564, iteration: 50190
loss: 1.02061128616333,grad_norm: 0.9999990277062349, iteration: 50191
loss: 1.0084662437438965,grad_norm: 0.9999994635691485, iteration: 50192
loss: 0.9955936074256897,grad_norm: 0.9207978485207222, iteration: 50193
loss: 0.979734480381012,grad_norm: 0.9737028233106152, iteration: 50194
loss: 1.0041524171829224,grad_norm: 0.8539938580579812, iteration: 50195
loss: 0.9521801471710205,grad_norm: 0.9999990787530497, iteration: 50196
loss: 1.0317057371139526,grad_norm: 0.9999995779877187, iteration: 50197
loss: 1.0088995695114136,grad_norm: 0.9476861812185116, iteration: 50198
loss: 1.0145360231399536,grad_norm: 0.9999990716080315, iteration: 50199
loss: 0.9788967370986938,grad_norm: 0.8652746783886344, iteration: 50200
loss: 1.0340256690979004,grad_norm: 0.9999995511362442, iteration: 50201
loss: 0.9850110411643982,grad_norm: 0.9089959530031901, iteration: 50202
loss: 0.9995880126953125,grad_norm: 0.908514176925047, iteration: 50203
loss: 0.9775592684745789,grad_norm: 0.9772183237315192, iteration: 50204
loss: 1.0888340473175049,grad_norm: 0.999999976179778, iteration: 50205
loss: 1.0009347200393677,grad_norm: 0.8478638682309003, iteration: 50206
loss: 1.033989429473877,grad_norm: 0.9999995884104345, iteration: 50207
loss: 1.0451161861419678,grad_norm: 0.809220004686367, iteration: 50208
loss: 1.0085352659225464,grad_norm: 0.9999990827893708, iteration: 50209
loss: 1.015665054321289,grad_norm: 0.9999991936881726, iteration: 50210
loss: 0.9752928018569946,grad_norm: 0.9150133997838337, iteration: 50211
loss: 1.0720915794372559,grad_norm: 0.9999994249321079, iteration: 50212
loss: 0.9818267822265625,grad_norm: 0.7761804293403723, iteration: 50213
loss: 0.9917333722114563,grad_norm: 0.9999991262418596, iteration: 50214
loss: 1.0140496492385864,grad_norm: 0.999999631272799, iteration: 50215
loss: 0.9959497451782227,grad_norm: 0.999999146106375, iteration: 50216
loss: 1.00682532787323,grad_norm: 0.9866648651271132, iteration: 50217
loss: 0.9923449158668518,grad_norm: 0.9999992417566133, iteration: 50218
loss: 0.9951303601264954,grad_norm: 0.8226784083668105, iteration: 50219
loss: 1.0199984312057495,grad_norm: 0.7967908109608653, iteration: 50220
loss: 1.0379785299301147,grad_norm: 0.9999996419514717, iteration: 50221
loss: 1.0717915296554565,grad_norm: 0.9999994492019196, iteration: 50222
loss: 1.015016794204712,grad_norm: 0.8434142878204163, iteration: 50223
loss: 1.0162054300308228,grad_norm: 0.8651062570445694, iteration: 50224
loss: 1.0662070512771606,grad_norm: 0.9999994880339976, iteration: 50225
loss: 1.0172616243362427,grad_norm: 0.899141966997511, iteration: 50226
loss: 1.0048121213912964,grad_norm: 0.9999992440405419, iteration: 50227
loss: 0.9921742677688599,grad_norm: 0.7255209032188995, iteration: 50228
loss: 1.0213220119476318,grad_norm: 0.9999989750651499, iteration: 50229
loss: 0.9935938715934753,grad_norm: 0.8742421431011033, iteration: 50230
loss: 1.0693892240524292,grad_norm: 0.9999994333455918, iteration: 50231
loss: 1.0266038179397583,grad_norm: 0.8712467904497957, iteration: 50232
loss: 0.9757815599441528,grad_norm: 0.9999991278330059, iteration: 50233
loss: 0.9940577149391174,grad_norm: 0.8198054736527962, iteration: 50234
loss: 1.0027363300323486,grad_norm: 0.9054558003245256, iteration: 50235
loss: 1.0528194904327393,grad_norm: 0.9963071140649322, iteration: 50236
loss: 0.9970105290412903,grad_norm: 0.9274098560246177, iteration: 50237
loss: 1.0581295490264893,grad_norm: 0.9999999024777908, iteration: 50238
loss: 1.0321317911148071,grad_norm: 0.9999991153372737, iteration: 50239
loss: 0.9948186874389648,grad_norm: 0.9909030842381107, iteration: 50240
loss: 1.0194894075393677,grad_norm: 0.9999991609814628, iteration: 50241
loss: 0.9816845655441284,grad_norm: 0.9891690931913241, iteration: 50242
loss: 1.019039511680603,grad_norm: 0.8351779588375102, iteration: 50243
loss: 1.0441150665283203,grad_norm: 0.9999991746345948, iteration: 50244
loss: 1.0369528532028198,grad_norm: 0.8739402446759967, iteration: 50245
loss: 1.0024412870407104,grad_norm: 0.9769848653664082, iteration: 50246
loss: 0.989648163318634,grad_norm: 0.9999989223693544, iteration: 50247
loss: 0.9713786244392395,grad_norm: 0.9999990946599303, iteration: 50248
loss: 1.0001012086868286,grad_norm: 0.9417454771589558, iteration: 50249
loss: 1.013263463973999,grad_norm: 0.9999990760236307, iteration: 50250
loss: 0.9676206707954407,grad_norm: 0.9391798665917295, iteration: 50251
loss: 0.9860938191413879,grad_norm: 0.9507098735830547, iteration: 50252
loss: 0.9976249933242798,grad_norm: 0.8051668532782424, iteration: 50253
loss: 0.9803175926208496,grad_norm: 0.9520920951079872, iteration: 50254
loss: 1.0032614469528198,grad_norm: 0.9999991431810071, iteration: 50255
loss: 0.9922585487365723,grad_norm: 0.9891568046492616, iteration: 50256
loss: 1.0112618207931519,grad_norm: 0.8227555340284272, iteration: 50257
loss: 1.0125066041946411,grad_norm: 0.9861402667048143, iteration: 50258
loss: 1.0168209075927734,grad_norm: 0.999999075078569, iteration: 50259
loss: 1.0017889738082886,grad_norm: 0.9999990472074773, iteration: 50260
loss: 1.0138860940933228,grad_norm: 0.8977683192685502, iteration: 50261
loss: 0.9924191236495972,grad_norm: 0.9650651880015236, iteration: 50262
loss: 1.1499629020690918,grad_norm: 0.9241458312794608, iteration: 50263
loss: 1.014175295829773,grad_norm: 0.7774267594693516, iteration: 50264
loss: 0.9783339500427246,grad_norm: 0.7831394250967023, iteration: 50265
loss: 1.0879616737365723,grad_norm: 0.9999999989185686, iteration: 50266
loss: 0.983936071395874,grad_norm: 0.999999212844866, iteration: 50267
loss: 1.0218628644943237,grad_norm: 0.9067215537636828, iteration: 50268
loss: 0.9615182280540466,grad_norm: 0.999999569082257, iteration: 50269
loss: 1.0211083889007568,grad_norm: 0.9829495681252101, iteration: 50270
loss: 1.029069423675537,grad_norm: 0.9307953406359736, iteration: 50271
loss: 1.0195448398590088,grad_norm: 0.999999183732636, iteration: 50272
loss: 0.9481026530265808,grad_norm: 0.912004704650892, iteration: 50273
loss: 1.0321885347366333,grad_norm: 0.9999997006767325, iteration: 50274
loss: 1.0112229585647583,grad_norm: 0.7731158427456862, iteration: 50275
loss: 0.9900414943695068,grad_norm: 0.9381766922844242, iteration: 50276
loss: 0.9822903275489807,grad_norm: 0.9361763193538458, iteration: 50277
loss: 1.017562747001648,grad_norm: 0.999999864646203, iteration: 50278
loss: 1.0325697660446167,grad_norm: 0.8574998250231519, iteration: 50279
loss: 1.1036872863769531,grad_norm: 0.9999996291651808, iteration: 50280
loss: 1.0501075983047485,grad_norm: 0.8510023386873269, iteration: 50281
loss: 0.980685830116272,grad_norm: 0.8000046567967133, iteration: 50282
loss: 0.9619142413139343,grad_norm: 0.881159968779504, iteration: 50283
loss: 1.0204575061798096,grad_norm: 0.9286615883669938, iteration: 50284
loss: 0.9768221974372864,grad_norm: 0.9999991953589044, iteration: 50285
loss: 0.9934694766998291,grad_norm: 0.999999026625576, iteration: 50286
loss: 1.0227628946304321,grad_norm: 0.9999995541588442, iteration: 50287
loss: 0.9958893656730652,grad_norm: 0.9999991425082989, iteration: 50288
loss: 1.0002775192260742,grad_norm: 0.9293080097270309, iteration: 50289
loss: 1.004193902015686,grad_norm: 0.7372570075065482, iteration: 50290
loss: 1.130299687385559,grad_norm: 0.999999736915255, iteration: 50291
loss: 1.0406434535980225,grad_norm: 0.9999995138809327, iteration: 50292
loss: 1.0038319826126099,grad_norm: 0.9999991217946235, iteration: 50293
loss: 0.9953696727752686,grad_norm: 0.9894373877714827, iteration: 50294
loss: 1.007472038269043,grad_norm: 0.8048284022844538, iteration: 50295
loss: 1.0051743984222412,grad_norm: 0.9999990893285093, iteration: 50296
loss: 1.015333890914917,grad_norm: 0.9148926601846556, iteration: 50297
loss: 1.0126301050186157,grad_norm: 0.9366066473107693, iteration: 50298
loss: 1.0204129219055176,grad_norm: 0.9229992091620642, iteration: 50299
loss: 1.030696153640747,grad_norm: 0.9999991422739501, iteration: 50300
loss: 1.0148910284042358,grad_norm: 0.9781309945700837, iteration: 50301
loss: 1.0328154563903809,grad_norm: 0.9999994393654965, iteration: 50302
loss: 0.9650528430938721,grad_norm: 0.9593856899244799, iteration: 50303
loss: 0.9739043712615967,grad_norm: 0.9999991450787256, iteration: 50304
loss: 0.9816728234291077,grad_norm: 0.833860479621058, iteration: 50305
loss: 0.9861614108085632,grad_norm: 0.9536306643719341, iteration: 50306
loss: 0.9712278842926025,grad_norm: 0.8814940775324915, iteration: 50307
loss: 0.9922191500663757,grad_norm: 0.9427453851225478, iteration: 50308
loss: 1.0335599184036255,grad_norm: 0.9999994462191194, iteration: 50309
loss: 1.017931580543518,grad_norm: 0.9509999894591189, iteration: 50310
loss: 1.0013359785079956,grad_norm: 0.9999990834817467, iteration: 50311
loss: 1.0107553005218506,grad_norm: 0.9999991912832938, iteration: 50312
loss: 0.9991888999938965,grad_norm: 0.9999991888064205, iteration: 50313
loss: 1.0136744976043701,grad_norm: 0.9999990591830366, iteration: 50314
loss: 1.0663881301879883,grad_norm: 0.9999990692053718, iteration: 50315
loss: 1.0173547267913818,grad_norm: 0.8786557240697891, iteration: 50316
loss: 0.9983689188957214,grad_norm: 0.8125633305068495, iteration: 50317
loss: 1.006321907043457,grad_norm: 0.902666724649572, iteration: 50318
loss: 1.0141769647598267,grad_norm: 0.9348479640958639, iteration: 50319
loss: 1.0366352796554565,grad_norm: 0.8348928881107688, iteration: 50320
loss: 0.9910928606987,grad_norm: 0.8545729661803386, iteration: 50321
loss: 0.9848026037216187,grad_norm: 0.999999202095804, iteration: 50322
loss: 0.9984489679336548,grad_norm: 0.9985004944992231, iteration: 50323
loss: 1.0798956155776978,grad_norm: 0.9999991350322283, iteration: 50324
loss: 1.0011688470840454,grad_norm: 0.9999993982013646, iteration: 50325
loss: 1.0758861303329468,grad_norm: 0.999999722108686, iteration: 50326
loss: 1.0773221254348755,grad_norm: 0.9999998069499254, iteration: 50327
loss: 1.0166599750518799,grad_norm: 0.867759036342878, iteration: 50328
loss: 0.9437265396118164,grad_norm: 0.9644676654079469, iteration: 50329
loss: 1.0064150094985962,grad_norm: 0.9999991435712111, iteration: 50330
loss: 1.002233862876892,grad_norm: 0.895386568102032, iteration: 50331
loss: 1.031048059463501,grad_norm: 0.8251478614551739, iteration: 50332
loss: 1.0360292196273804,grad_norm: 0.9999993516939246, iteration: 50333
loss: 0.9709928631782532,grad_norm: 0.9660413033465481, iteration: 50334
loss: 1.0109988451004028,grad_norm: 0.8308784984015759, iteration: 50335
loss: 1.0126703977584839,grad_norm: 0.9496644209020398, iteration: 50336
loss: 0.9761884808540344,grad_norm: 0.9890687082027111, iteration: 50337
loss: 0.9981436133384705,grad_norm: 0.8933934123107946, iteration: 50338
loss: 1.0456581115722656,grad_norm: 0.9999992687903251, iteration: 50339
loss: 1.0414528846740723,grad_norm: 0.9999992117881427, iteration: 50340
loss: 1.008721947669983,grad_norm: 0.8618663333185934, iteration: 50341
loss: 0.9905466437339783,grad_norm: 0.8295589385227352, iteration: 50342
loss: 0.9815104007720947,grad_norm: 0.8341298866857223, iteration: 50343
loss: 1.0340639352798462,grad_norm: 0.9372380471202949, iteration: 50344
loss: 1.0100290775299072,grad_norm: 0.9999994529665942, iteration: 50345
loss: 0.9829657673835754,grad_norm: 0.8136592701508413, iteration: 50346
loss: 1.0022974014282227,grad_norm: 0.9999991314999755, iteration: 50347
loss: 1.0026441812515259,grad_norm: 0.9999991414890068, iteration: 50348
loss: 1.0057344436645508,grad_norm: 0.9999991647215973, iteration: 50349
loss: 1.0299686193466187,grad_norm: 0.9999991681662279, iteration: 50350
loss: 1.0152360200881958,grad_norm: 0.9999989738914504, iteration: 50351
loss: 1.0021336078643799,grad_norm: 0.999999045892084, iteration: 50352
loss: 0.9967950582504272,grad_norm: 0.9999990820208711, iteration: 50353
loss: 1.0066622495651245,grad_norm: 0.746815399342355, iteration: 50354
loss: 1.005770206451416,grad_norm: 0.9088230700055313, iteration: 50355
loss: 1.0693384408950806,grad_norm: 0.9999989567861611, iteration: 50356
loss: 1.0317068099975586,grad_norm: 0.9135437521762295, iteration: 50357
loss: 1.0072884559631348,grad_norm: 0.9272212774057008, iteration: 50358
loss: 1.0019623041152954,grad_norm: 0.9999990736887725, iteration: 50359
loss: 0.9838141202926636,grad_norm: 0.999999296096371, iteration: 50360
loss: 1.0382039546966553,grad_norm: 0.9999990060510504, iteration: 50361
loss: 0.986792802810669,grad_norm: 0.9999991677120504, iteration: 50362
loss: 1.0168079137802124,grad_norm: 0.8156087196155845, iteration: 50363
loss: 1.0186790227890015,grad_norm: 0.943943968678723, iteration: 50364
loss: 0.9970029592514038,grad_norm: 0.8569655581567042, iteration: 50365
loss: 1.0197782516479492,grad_norm: 0.9163055044061721, iteration: 50366
loss: 0.9958020448684692,grad_norm: 0.8507176484069225, iteration: 50367
loss: 1.0049710273742676,grad_norm: 0.9999991164187326, iteration: 50368
loss: 1.0364100933074951,grad_norm: 0.9349608777107327, iteration: 50369
loss: 1.0338926315307617,grad_norm: 0.8399591606175103, iteration: 50370
loss: 1.059613585472107,grad_norm: 0.932552416887032, iteration: 50371
loss: 0.9637120366096497,grad_norm: 0.9434191840507747, iteration: 50372
loss: 0.9646227955818176,grad_norm: 0.8636553086015988, iteration: 50373
loss: 1.0009775161743164,grad_norm: 0.9999995838745565, iteration: 50374
loss: 1.0262861251831055,grad_norm: 0.8922125664339596, iteration: 50375
loss: 1.010614037513733,grad_norm: 0.9999993561515823, iteration: 50376
loss: 1.009277105331421,grad_norm: 0.8267231736514209, iteration: 50377
loss: 1.0067728757858276,grad_norm: 0.999999026724571, iteration: 50378
loss: 1.0137871503829956,grad_norm: 0.9639965669582717, iteration: 50379
loss: 1.0395243167877197,grad_norm: 0.782337053802817, iteration: 50380
loss: 0.9857491254806519,grad_norm: 0.923332595593738, iteration: 50381
loss: 1.0398465394973755,grad_norm: 0.9999989738148669, iteration: 50382
loss: 0.9673370122909546,grad_norm: 0.9999993979589432, iteration: 50383
loss: 0.967232346534729,grad_norm: 0.938792496186774, iteration: 50384
loss: 0.9745480418205261,grad_norm: 0.999998982391384, iteration: 50385
loss: 0.9882389903068542,grad_norm: 0.9024154508044325, iteration: 50386
loss: 0.9853976964950562,grad_norm: 0.8680226121562973, iteration: 50387
loss: 0.9765837788581848,grad_norm: 0.9999991463928402, iteration: 50388
loss: 1.0102405548095703,grad_norm: 0.8789603673373021, iteration: 50389
loss: 1.0294227600097656,grad_norm: 0.9623298013106139, iteration: 50390
loss: 1.0406886339187622,grad_norm: 0.9506278630440904, iteration: 50391
loss: 1.006338119506836,grad_norm: 0.9590045066687001, iteration: 50392
loss: 0.9960485100746155,grad_norm: 0.9394682265917674, iteration: 50393
loss: 1.020225167274475,grad_norm: 0.9999991351089151, iteration: 50394
loss: 1.1079747676849365,grad_norm: 0.9999996248896865, iteration: 50395
loss: 0.9911864399909973,grad_norm: 0.9063337951981784, iteration: 50396
loss: 1.0040968656539917,grad_norm: 0.9999990530977869, iteration: 50397
loss: 0.9744912385940552,grad_norm: 0.9065932365617227, iteration: 50398
loss: 0.9951866865158081,grad_norm: 0.8261455359424326, iteration: 50399
loss: 1.0258015394210815,grad_norm: 0.7807479659981766, iteration: 50400
loss: 0.978614330291748,grad_norm: 0.9281729542549576, iteration: 50401
loss: 1.0575859546661377,grad_norm: 0.999999453988577, iteration: 50402
loss: 1.026281714439392,grad_norm: 0.8572373715284854, iteration: 50403
loss: 1.079997181892395,grad_norm: 0.999999212356605, iteration: 50404
loss: 1.0138463973999023,grad_norm: 0.9694462542135931, iteration: 50405
loss: 1.0540868043899536,grad_norm: 0.9999997124516813, iteration: 50406
loss: 1.0362061262130737,grad_norm: 0.9999998722998681, iteration: 50407
loss: 1.001620888710022,grad_norm: 0.9999990240245128, iteration: 50408
loss: 1.0142922401428223,grad_norm: 0.8511003828434471, iteration: 50409
loss: 1.0291452407836914,grad_norm: 0.953241846522233, iteration: 50410
loss: 0.9899721145629883,grad_norm: 0.9999990649046382, iteration: 50411
loss: 0.9924084544181824,grad_norm: 0.9966117559785858, iteration: 50412
loss: 1.0188143253326416,grad_norm: 0.8903050065469028, iteration: 50413
loss: 0.9562312960624695,grad_norm: 0.9999994378261813, iteration: 50414
loss: 1.0169068574905396,grad_norm: 0.9292832345815467, iteration: 50415
loss: 1.007137656211853,grad_norm: 0.9999989380643893, iteration: 50416
loss: 0.9995916485786438,grad_norm: 0.9999992774957209, iteration: 50417
loss: 0.9934635162353516,grad_norm: 0.9999992442245181, iteration: 50418
loss: 0.9560540318489075,grad_norm: 0.9999993937868107, iteration: 50419
loss: 1.0072815418243408,grad_norm: 0.9104961985884803, iteration: 50420
loss: 1.007468819618225,grad_norm: 0.9999992532075856, iteration: 50421
loss: 0.9928452968597412,grad_norm: 0.9999992743321335, iteration: 50422
loss: 0.9889151453971863,grad_norm: 0.9999992723206873, iteration: 50423
loss: 1.0175248384475708,grad_norm: 0.9530223725847151, iteration: 50424
loss: 1.0018234252929688,grad_norm: 0.8988945936129679, iteration: 50425
loss: 1.002548098564148,grad_norm: 0.9999990685798359, iteration: 50426
loss: 1.0114006996154785,grad_norm: 0.8314371364989077, iteration: 50427
loss: 1.0491511821746826,grad_norm: 0.999999205188201, iteration: 50428
loss: 0.9962233901023865,grad_norm: 0.9999989476471682, iteration: 50429
loss: 1.0368125438690186,grad_norm: 0.9999998679453851, iteration: 50430
loss: 0.9926145672798157,grad_norm: 0.9988816074355262, iteration: 50431
loss: 1.0112581253051758,grad_norm: 0.9999992854426812, iteration: 50432
loss: 1.0271672010421753,grad_norm: 0.990879562360805, iteration: 50433
loss: 0.9728230237960815,grad_norm: 0.8674096618090487, iteration: 50434
loss: 1.0118132829666138,grad_norm: 0.9999996481885326, iteration: 50435
loss: 0.9931591749191284,grad_norm: 0.9812300183047233, iteration: 50436
loss: 1.0787665843963623,grad_norm: 0.9999996851904709, iteration: 50437
loss: 1.0264625549316406,grad_norm: 0.8681961924019767, iteration: 50438
loss: 1.0008838176727295,grad_norm: 0.9182775874774192, iteration: 50439
loss: 0.9752292037010193,grad_norm: 0.9999992473211822, iteration: 50440
loss: 0.9976014494895935,grad_norm: 0.9427540164708637, iteration: 50441
loss: 1.0132176876068115,grad_norm: 0.8548307415584924, iteration: 50442
loss: 0.991767406463623,grad_norm: 0.9338961151627397, iteration: 50443
loss: 1.0338701009750366,grad_norm: 0.9292598803918598, iteration: 50444
loss: 1.0237575769424438,grad_norm: 0.9999991994333618, iteration: 50445
loss: 0.962704598903656,grad_norm: 0.9999992717353768, iteration: 50446
loss: 1.0487744808197021,grad_norm: 0.9999992067659207, iteration: 50447
loss: 1.0455198287963867,grad_norm: 0.9906374656450426, iteration: 50448
loss: 1.0326414108276367,grad_norm: 0.8041608966889179, iteration: 50449
loss: 0.9567534923553467,grad_norm: 0.9999991064025333, iteration: 50450
loss: 1.0938364267349243,grad_norm: 0.9999991956964992, iteration: 50451
loss: 0.9968165159225464,grad_norm: 0.9805098572216343, iteration: 50452
loss: 0.9749249219894409,grad_norm: 0.9999993094592796, iteration: 50453
loss: 1.061875581741333,grad_norm: 0.9999992715619755, iteration: 50454
loss: 0.9892740845680237,grad_norm: 0.994382748491551, iteration: 50455
loss: 0.9932966232299805,grad_norm: 0.9999992397371544, iteration: 50456
loss: 1.03693687915802,grad_norm: 0.970613858829721, iteration: 50457
loss: 1.0096031427383423,grad_norm: 0.8872833708821546, iteration: 50458
loss: 0.986751914024353,grad_norm: 0.944575133745297, iteration: 50459
loss: 0.9946614503860474,grad_norm: 0.9983231518930176, iteration: 50460
loss: 1.028753638267517,grad_norm: 0.9999998500648126, iteration: 50461
loss: 0.9786177277565002,grad_norm: 0.8620164035110299, iteration: 50462
loss: 1.0049867630004883,grad_norm: 0.9999991704057384, iteration: 50463
loss: 1.0023895502090454,grad_norm: 0.8129670484500157, iteration: 50464
loss: 1.021347165107727,grad_norm: 0.9999994323539717, iteration: 50465
loss: 1.0091718435287476,grad_norm: 0.9999991862673924, iteration: 50466
loss: 0.9928630590438843,grad_norm: 0.9526267922626416, iteration: 50467
loss: 0.9730939269065857,grad_norm: 0.9695898442642628, iteration: 50468
loss: 1.0179225206375122,grad_norm: 0.9235514254043913, iteration: 50469
loss: 1.0271917581558228,grad_norm: 0.8940010605020746, iteration: 50470
loss: 1.0100754499435425,grad_norm: 0.9648560237036, iteration: 50471
loss: 1.0338670015335083,grad_norm: 0.9047195354246894, iteration: 50472
loss: 1.021485447883606,grad_norm: 0.9066776372400482, iteration: 50473
loss: 0.9873447418212891,grad_norm: 0.8687875789693327, iteration: 50474
loss: 0.9870803952217102,grad_norm: 0.9442119857240046, iteration: 50475
loss: 0.9915280342102051,grad_norm: 0.9999990524959671, iteration: 50476
loss: 1.0136431455612183,grad_norm: 0.7498502863878674, iteration: 50477
loss: 1.0247962474822998,grad_norm: 0.8080573957201893, iteration: 50478
loss: 1.0813188552856445,grad_norm: 0.9999990945674762, iteration: 50479
loss: 1.0093460083007812,grad_norm: 0.9999992729972302, iteration: 50480
loss: 0.9979662895202637,grad_norm: 0.9782748403865676, iteration: 50481
loss: 1.004571795463562,grad_norm: 0.9657428888890518, iteration: 50482
loss: 1.0023385286331177,grad_norm: 0.9701261995478105, iteration: 50483
loss: 1.0650889873504639,grad_norm: 0.9999998797150841, iteration: 50484
loss: 1.0072667598724365,grad_norm: 0.9999990729009613, iteration: 50485
loss: 1.0005279779434204,grad_norm: 0.9999991336810017, iteration: 50486
loss: 0.9925830364227295,grad_norm: 0.9999992886016074, iteration: 50487
loss: 1.0591927766799927,grad_norm: 0.9813746000237775, iteration: 50488
loss: 1.091423749923706,grad_norm: 0.9999994773910378, iteration: 50489
loss: 1.0000231266021729,grad_norm: 0.9039800610513795, iteration: 50490
loss: 1.0272738933563232,grad_norm: 0.9999997344383615, iteration: 50491
loss: 1.0330464839935303,grad_norm: 0.9999995367106004, iteration: 50492
loss: 1.0182368755340576,grad_norm: 0.9999995466698312, iteration: 50493
loss: 1.0425876379013062,grad_norm: 0.9999992274853551, iteration: 50494
loss: 0.9999488592147827,grad_norm: 0.9910407395682129, iteration: 50495
loss: 0.9906111359596252,grad_norm: 0.9999992015109962, iteration: 50496
loss: 0.9912633895874023,grad_norm: 0.8524816454615287, iteration: 50497
loss: 0.9935057163238525,grad_norm: 0.7967312668447755, iteration: 50498
loss: 1.0000383853912354,grad_norm: 0.9390786080057865, iteration: 50499
loss: 1.0061954259872437,grad_norm: 0.9999990803695801, iteration: 50500
loss: 0.9833623766899109,grad_norm: 0.9999992603288518, iteration: 50501
loss: 1.0021215677261353,grad_norm: 0.8757602384423332, iteration: 50502
loss: 0.9872597455978394,grad_norm: 0.8470280877431979, iteration: 50503
loss: 1.0139657258987427,grad_norm: 0.9999991976108922, iteration: 50504
loss: 0.9726096987724304,grad_norm: 0.9999991157904147, iteration: 50505
loss: 0.9638631343841553,grad_norm: 0.9747502476681567, iteration: 50506
loss: 1.001440167427063,grad_norm: 0.8977911497475759, iteration: 50507
loss: 1.0101659297943115,grad_norm: 0.9230454721206031, iteration: 50508
loss: 1.0214390754699707,grad_norm: 0.9850131001058661, iteration: 50509
loss: 1.027462124824524,grad_norm: 0.9224064251051778, iteration: 50510
loss: 0.9857768416404724,grad_norm: 0.9999990465204989, iteration: 50511
loss: 1.0283538103103638,grad_norm: 0.9999991533197577, iteration: 50512
loss: 1.0032254457473755,grad_norm: 0.9670068326301127, iteration: 50513
loss: 1.0451823472976685,grad_norm: 0.9999996653110154, iteration: 50514
loss: 1.0494568347930908,grad_norm: 0.999999082934672, iteration: 50515
loss: 0.992148756980896,grad_norm: 0.9390925377651802, iteration: 50516
loss: 0.9970800876617432,grad_norm: 0.9999992053421375, iteration: 50517
loss: 0.9720529317855835,grad_norm: 0.8779793546718212, iteration: 50518
loss: 1.0019534826278687,grad_norm: 0.8512051042985292, iteration: 50519
loss: 1.0642439126968384,grad_norm: 0.9999990636825273, iteration: 50520
loss: 1.0114883184432983,grad_norm: 0.8958288910139722, iteration: 50521
loss: 1.0539482831954956,grad_norm: 0.9278186070043125, iteration: 50522
loss: 0.9835972189903259,grad_norm: 0.8222110383329945, iteration: 50523
loss: 1.0489760637283325,grad_norm: 0.9999991531924581, iteration: 50524
loss: 1.013173222541809,grad_norm: 0.9999993909712862, iteration: 50525
loss: 1.0142443180084229,grad_norm: 0.99999919445207, iteration: 50526
loss: 1.0762664079666138,grad_norm: 0.9999996284102178, iteration: 50527
loss: 1.0195571184158325,grad_norm: 0.9999990338370692, iteration: 50528
loss: 1.008546233177185,grad_norm: 0.9350884783069784, iteration: 50529
loss: 0.9605180621147156,grad_norm: 0.7919408432097407, iteration: 50530
loss: 0.9823687076568604,grad_norm: 0.8733721483144983, iteration: 50531
loss: 0.9939492344856262,grad_norm: 0.9999992629836374, iteration: 50532
loss: 1.0120922327041626,grad_norm: 0.9999994601345524, iteration: 50533
loss: 1.0126255750656128,grad_norm: 0.8604931491525345, iteration: 50534
loss: 1.0507900714874268,grad_norm: 0.9999998815317577, iteration: 50535
loss: 1.002531886100769,grad_norm: 0.8004082441022199, iteration: 50536
loss: 1.0107802152633667,grad_norm: 0.9041143281703645, iteration: 50537
loss: 1.0113272666931152,grad_norm: 0.9561710010846803, iteration: 50538
loss: 0.963230550289154,grad_norm: 0.8651433755056724, iteration: 50539
loss: 1.0141489505767822,grad_norm: 0.7927891401591338, iteration: 50540
loss: 0.9987911581993103,grad_norm: 0.9999994168902923, iteration: 50541
loss: 1.0152747631072998,grad_norm: 0.855075088097448, iteration: 50542
loss: 1.0025547742843628,grad_norm: 0.9999990797553088, iteration: 50543
loss: 0.9917957782745361,grad_norm: 0.9999996915552034, iteration: 50544
loss: 0.9927235245704651,grad_norm: 0.9999993071008887, iteration: 50545
loss: 1.0149145126342773,grad_norm: 0.9076750141523203, iteration: 50546
loss: 1.0156419277191162,grad_norm: 0.9999991271785698, iteration: 50547
loss: 1.0586735010147095,grad_norm: 0.9999992382318175, iteration: 50548
loss: 1.008907437324524,grad_norm: 0.8397757550301244, iteration: 50549
loss: 0.9688798189163208,grad_norm: 0.9556742448050101, iteration: 50550
loss: 1.0571120977401733,grad_norm: 0.9999992378209849, iteration: 50551
loss: 0.9878374934196472,grad_norm: 0.9095753288740633, iteration: 50552
loss: 0.961520791053772,grad_norm: 0.9999990272895911, iteration: 50553
loss: 1.0445247888565063,grad_norm: 0.9999990867046957, iteration: 50554
loss: 0.9752225875854492,grad_norm: 0.9999992678719879, iteration: 50555
loss: 1.0613389015197754,grad_norm: 0.999999035802247, iteration: 50556
loss: 0.9883067011833191,grad_norm: 0.8385678356324511, iteration: 50557
loss: 1.0703016519546509,grad_norm: 0.9999991227433332, iteration: 50558
loss: 0.9853042364120483,grad_norm: 0.8833910484802451, iteration: 50559
loss: 1.0321035385131836,grad_norm: 0.945018848570205, iteration: 50560
loss: 0.9951032996177673,grad_norm: 0.9999991084779967, iteration: 50561
loss: 1.0097042322158813,grad_norm: 0.9687848163621134, iteration: 50562
loss: 1.0885941982269287,grad_norm: 0.9999999934947704, iteration: 50563
loss: 1.0185434818267822,grad_norm: 0.8015953416256589, iteration: 50564
loss: 0.9802499413490295,grad_norm: 0.9999995077262808, iteration: 50565
loss: 0.9954657554626465,grad_norm: 0.8124875007635536, iteration: 50566
loss: 0.9690507650375366,grad_norm: 0.9999991948236613, iteration: 50567
loss: 1.0063879489898682,grad_norm: 0.9999993840945228, iteration: 50568
loss: 0.9707958698272705,grad_norm: 0.9264257628986856, iteration: 50569
loss: 1.0130772590637207,grad_norm: 0.9999998475081119, iteration: 50570
loss: 1.0110836029052734,grad_norm: 0.9241170207904505, iteration: 50571
loss: 0.9914606809616089,grad_norm: 0.9665176632711865, iteration: 50572
loss: 1.006210207939148,grad_norm: 0.9747261221358314, iteration: 50573
loss: 0.9945927858352661,grad_norm: 0.9999991478086231, iteration: 50574
loss: 0.9978441596031189,grad_norm: 0.9999992260401693, iteration: 50575
loss: 1.0153706073760986,grad_norm: 0.9948967436669705, iteration: 50576
loss: 1.0184663534164429,grad_norm: 0.9110470685493643, iteration: 50577
loss: 1.0164636373519897,grad_norm: 0.9999991113994471, iteration: 50578
loss: 0.9925218224525452,grad_norm: 0.892448965143485, iteration: 50579
loss: 0.9987785816192627,grad_norm: 0.830396174415766, iteration: 50580
loss: 1.046038269996643,grad_norm: 0.9999992451855553, iteration: 50581
loss: 0.9700702428817749,grad_norm: 0.9411798445698646, iteration: 50582
loss: 1.014207363128662,grad_norm: 0.9703101565204098, iteration: 50583
loss: 1.0157248973846436,grad_norm: 0.9999992151667675, iteration: 50584
loss: 1.013155460357666,grad_norm: 0.9569615282395421, iteration: 50585
loss: 1.0168719291687012,grad_norm: 0.855127787316931, iteration: 50586
loss: 0.9925217032432556,grad_norm: 0.860597420096477, iteration: 50587
loss: 1.013168454170227,grad_norm: 0.9999990215749651, iteration: 50588
loss: 0.9904234409332275,grad_norm: 0.9999992363449498, iteration: 50589
loss: 1.02288818359375,grad_norm: 0.8645958042021414, iteration: 50590
loss: 0.9626256227493286,grad_norm: 0.9427134595117678, iteration: 50591
loss: 0.9897093772888184,grad_norm: 0.8489537618617766, iteration: 50592
loss: 1.0300716161727905,grad_norm: 0.999999250242882, iteration: 50593
loss: 1.0432664155960083,grad_norm: 0.9999998312159014, iteration: 50594
loss: 0.990046501159668,grad_norm: 0.8959499400494899, iteration: 50595
loss: 1.0154753923416138,grad_norm: 0.8496501255024984, iteration: 50596
loss: 1.0019623041152954,grad_norm: 0.9999994249476459, iteration: 50597
loss: 1.0412441492080688,grad_norm: 0.9999997013937133, iteration: 50598
loss: 1.0804450511932373,grad_norm: 0.9999996112769572, iteration: 50599
loss: 1.0066726207733154,grad_norm: 0.9999994104105807, iteration: 50600
loss: 1.008762240409851,grad_norm: 0.9999989933751122, iteration: 50601
loss: 1.0946335792541504,grad_norm: 0.9999998573357268, iteration: 50602
loss: 1.011258840560913,grad_norm: 0.9999991985680542, iteration: 50603
loss: 0.9910909533500671,grad_norm: 0.9350397170809637, iteration: 50604
loss: 1.0296502113342285,grad_norm: 0.9999993857056644, iteration: 50605
loss: 1.0295242071151733,grad_norm: 0.9721834769117174, iteration: 50606
loss: 1.0189573764801025,grad_norm: 0.8051992672195611, iteration: 50607
loss: 1.0364010334014893,grad_norm: 0.9999991073133652, iteration: 50608
loss: 1.0586096048355103,grad_norm: 0.9999996477027275, iteration: 50609
loss: 1.0043525695800781,grad_norm: 0.9534665999529728, iteration: 50610
loss: 1.0133082866668701,grad_norm: 0.9700705890033446, iteration: 50611
loss: 1.0789356231689453,grad_norm: 0.9999992128594934, iteration: 50612
loss: 1.0102958679199219,grad_norm: 0.9914310026020843, iteration: 50613
loss: 1.0293244123458862,grad_norm: 0.9569740876003696, iteration: 50614
loss: 1.0187519788742065,grad_norm: 0.823018355739356, iteration: 50615
loss: 0.9873827695846558,grad_norm: 0.9999989882998347, iteration: 50616
loss: 1.019364833831787,grad_norm: 0.8704364109369894, iteration: 50617
loss: 0.9975164532661438,grad_norm: 0.9179555435531267, iteration: 50618
loss: 1.073574423789978,grad_norm: 0.999999787486426, iteration: 50619
loss: 0.9634971022605896,grad_norm: 0.9999996875302819, iteration: 50620
loss: 1.0972602367401123,grad_norm: 0.9999998765297329, iteration: 50621
loss: 1.006734013557434,grad_norm: 0.9999991114573967, iteration: 50622
loss: 1.0308088064193726,grad_norm: 0.9999990965201884, iteration: 50623
loss: 1.0194060802459717,grad_norm: 0.9227722303518985, iteration: 50624
loss: 0.9807319045066833,grad_norm: 0.9178878089705691, iteration: 50625
loss: 1.0389533042907715,grad_norm: 0.9999997851792053, iteration: 50626
loss: 1.0182088613510132,grad_norm: 0.9999993411543243, iteration: 50627
loss: 0.9952021241188049,grad_norm: 0.9570214384738925, iteration: 50628
loss: 1.0009329319000244,grad_norm: 0.9434708529292845, iteration: 50629
loss: 0.9567734003067017,grad_norm: 0.939224222270999, iteration: 50630
loss: 1.0209805965423584,grad_norm: 0.9999991835418908, iteration: 50631
loss: 1.0054965019226074,grad_norm: 0.9999993265813372, iteration: 50632
loss: 0.9652027487754822,grad_norm: 0.9999990777806809, iteration: 50633
loss: 0.9973040223121643,grad_norm: 0.9999990562093353, iteration: 50634
loss: 1.0078198909759521,grad_norm: 0.8101361937637405, iteration: 50635
loss: 0.9839023351669312,grad_norm: 0.9999992948209065, iteration: 50636
loss: 0.9930107593536377,grad_norm: 0.9101656449565982, iteration: 50637
loss: 1.0145175457000732,grad_norm: 0.8348442526666653, iteration: 50638
loss: 1.0459240674972534,grad_norm: 0.9999993691906952, iteration: 50639
loss: 0.995690643787384,grad_norm: 0.9499232995130955, iteration: 50640
loss: 0.9915459156036377,grad_norm: 0.7880595059231064, iteration: 50641
loss: 1.1238658428192139,grad_norm: 0.9999991510471355, iteration: 50642
loss: 1.0142145156860352,grad_norm: 0.938020612975407, iteration: 50643
loss: 0.9944026470184326,grad_norm: 0.9351931727115464, iteration: 50644
loss: 1.0189937353134155,grad_norm: 0.9999997452412793, iteration: 50645
loss: 1.0258238315582275,grad_norm: 0.9999996199174458, iteration: 50646
loss: 0.9804733395576477,grad_norm: 0.8999482231304381, iteration: 50647
loss: 1.0542837381362915,grad_norm: 0.9060289586073899, iteration: 50648
loss: 0.9977239370346069,grad_norm: 0.9457429770594873, iteration: 50649
loss: 0.9874148368835449,grad_norm: 0.8719124383914432, iteration: 50650
loss: 0.9838575720787048,grad_norm: 0.9999993047645571, iteration: 50651
loss: 1.0088465213775635,grad_norm: 0.9999991609422441, iteration: 50652
loss: 1.0843926668167114,grad_norm: 0.9999990620380207, iteration: 50653
loss: 0.9688146114349365,grad_norm: 0.9999991521318905, iteration: 50654
loss: 0.9681216478347778,grad_norm: 0.9992078225576784, iteration: 50655
loss: 1.029638648033142,grad_norm: 0.9999992402469668, iteration: 50656
loss: 1.0608290433883667,grad_norm: 0.9999992460584278, iteration: 50657
loss: 1.0699552297592163,grad_norm: 0.9999999317527849, iteration: 50658
loss: 0.9695122241973877,grad_norm: 0.9999989642399347, iteration: 50659
loss: 1.0078805685043335,grad_norm: 0.9476015148365596, iteration: 50660
loss: 1.0001436471939087,grad_norm: 0.8682246058696217, iteration: 50661
loss: 1.0065876245498657,grad_norm: 0.8994315379578074, iteration: 50662
loss: 1.0175929069519043,grad_norm: 0.9999991101075519, iteration: 50663
loss: 1.016022801399231,grad_norm: 0.9999998701025211, iteration: 50664
loss: 1.0222039222717285,grad_norm: 0.9999995634526054, iteration: 50665
loss: 1.0087568759918213,grad_norm: 0.9999991317298298, iteration: 50666
loss: 1.0236670970916748,grad_norm: 0.9999990031560764, iteration: 50667
loss: 0.9605382084846497,grad_norm: 0.8487349032038902, iteration: 50668
loss: 0.9331710934638977,grad_norm: 0.9812902393665098, iteration: 50669
loss: 0.9618407487869263,grad_norm: 0.7943843448980563, iteration: 50670
loss: 0.9691957235336304,grad_norm: 0.9999992800994429, iteration: 50671
loss: 0.9945265650749207,grad_norm: 0.9588742895412857, iteration: 50672
loss: 0.9889112710952759,grad_norm: 0.9999991366059722, iteration: 50673
loss: 0.9922708868980408,grad_norm: 0.9999990921679804, iteration: 50674
loss: 0.993048906326294,grad_norm: 0.9881240317259236, iteration: 50675
loss: 1.002260684967041,grad_norm: 0.9999996578952467, iteration: 50676
loss: 1.0018929243087769,grad_norm: 0.9792917633956509, iteration: 50677
loss: 0.9915153980255127,grad_norm: 0.9667688471938028, iteration: 50678
loss: 0.9621112942695618,grad_norm: 0.8084671318073107, iteration: 50679
loss: 0.9862614274024963,grad_norm: 0.9216447699193807, iteration: 50680
loss: 1.01368248462677,grad_norm: 0.8351451599671765, iteration: 50681
loss: 0.9948512315750122,grad_norm: 0.9999995902360502, iteration: 50682
loss: 1.05221426486969,grad_norm: 0.9716287397323353, iteration: 50683
loss: 1.0221993923187256,grad_norm: 0.9999996942550957, iteration: 50684
loss: 1.0010236501693726,grad_norm: 0.8791497753697927, iteration: 50685
loss: 1.018921971321106,grad_norm: 0.9068242333525921, iteration: 50686
loss: 0.9898160696029663,grad_norm: 0.9999998591831141, iteration: 50687
loss: 1.0279698371887207,grad_norm: 0.9999994084849781, iteration: 50688
loss: 0.9544634222984314,grad_norm: 0.9999991936800469, iteration: 50689
loss: 0.9608652591705322,grad_norm: 0.9460792537050612, iteration: 50690
loss: 1.0616955757141113,grad_norm: 0.9999998548544979, iteration: 50691
loss: 1.0196031332015991,grad_norm: 0.9999992131226182, iteration: 50692
loss: 0.9469212293624878,grad_norm: 0.914034571981353, iteration: 50693
loss: 0.9761027097702026,grad_norm: 0.9297698719907671, iteration: 50694
loss: 1.0842196941375732,grad_norm: 0.9999990489671575, iteration: 50695
loss: 0.990882933139801,grad_norm: 0.888544434840965, iteration: 50696
loss: 1.0980372428894043,grad_norm: 0.9999997764171343, iteration: 50697
loss: 0.9922345280647278,grad_norm: 0.9999992066731492, iteration: 50698
loss: 1.0061590671539307,grad_norm: 0.9228077830649102, iteration: 50699
loss: 0.9776961803436279,grad_norm: 0.9999991325466497, iteration: 50700
loss: 1.0768578052520752,grad_norm: 0.9999990672948155, iteration: 50701
loss: 0.9861050844192505,grad_norm: 0.9999996386610581, iteration: 50702
loss: 1.0124822854995728,grad_norm: 0.865285277128844, iteration: 50703
loss: 1.02874755859375,grad_norm: 0.9999998597439914, iteration: 50704
loss: 1.009577751159668,grad_norm: 0.9999992204303539, iteration: 50705
loss: 1.0128949880599976,grad_norm: 0.9572813375780814, iteration: 50706
loss: 1.0378237962722778,grad_norm: 0.9999992127061513, iteration: 50707
loss: 0.9837982654571533,grad_norm: 0.9999991492093554, iteration: 50708
loss: 1.12288236618042,grad_norm: 0.9999992245095939, iteration: 50709
loss: 1.090566873550415,grad_norm: 0.9999998691279048, iteration: 50710
loss: 1.0449572801589966,grad_norm: 0.9999994233103896, iteration: 50711
loss: 1.0201144218444824,grad_norm: 0.7919542245793606, iteration: 50712
loss: 1.030014157295227,grad_norm: 0.9951972224052863, iteration: 50713
loss: 1.0065429210662842,grad_norm: 0.7927182423398477, iteration: 50714
loss: 1.0086476802825928,grad_norm: 0.9999992966796001, iteration: 50715
loss: 0.9698972105979919,grad_norm: 0.8935101207774052, iteration: 50716
loss: 0.9984408020973206,grad_norm: 0.9999991692041966, iteration: 50717
loss: 0.9961516857147217,grad_norm: 0.9414478461360221, iteration: 50718
loss: 1.0678359270095825,grad_norm: 0.9637880588935483, iteration: 50719
loss: 0.9872174263000488,grad_norm: 0.9724006352438334, iteration: 50720
loss: 1.0298268795013428,grad_norm: 0.7920078043326367, iteration: 50721
loss: 0.9849116206169128,grad_norm: 0.9999990468916663, iteration: 50722
loss: 0.9711509346961975,grad_norm: 0.8124937497083498, iteration: 50723
loss: 1.0111302137374878,grad_norm: 0.9999995502156471, iteration: 50724
loss: 1.0091102123260498,grad_norm: 0.999999119084742, iteration: 50725
loss: 1.0179271697998047,grad_norm: 0.8937377554072917, iteration: 50726
loss: 0.9980037808418274,grad_norm: 0.7549920134876223, iteration: 50727
loss: 1.000357747077942,grad_norm: 0.8355570313036613, iteration: 50728
loss: 1.00809645652771,grad_norm: 0.9999990173277906, iteration: 50729
loss: 1.006204605102539,grad_norm: 0.9074163985597464, iteration: 50730
loss: 0.9901682734489441,grad_norm: 0.9999990441697617, iteration: 50731
loss: 1.0138827562332153,grad_norm: 0.875038400198023, iteration: 50732
loss: 0.981101393699646,grad_norm: 0.9156808765740907, iteration: 50733
loss: 1.0537025928497314,grad_norm: 0.9999994495938662, iteration: 50734
loss: 0.980800211429596,grad_norm: 0.9317430097059003, iteration: 50735
loss: 1.013527274131775,grad_norm: 0.9999991769687117, iteration: 50736
loss: 0.9866644740104675,grad_norm: 0.9999990339313178, iteration: 50737
loss: 0.9986506104469299,grad_norm: 0.9999994030197022, iteration: 50738
loss: 1.0262970924377441,grad_norm: 0.8774745072873287, iteration: 50739
loss: 0.9932386875152588,grad_norm: 0.7175598074078618, iteration: 50740
loss: 0.9871365427970886,grad_norm: 0.9729278203623819, iteration: 50741
loss: 0.9621874094009399,grad_norm: 0.9999991892047291, iteration: 50742
loss: 0.9820953607559204,grad_norm: 0.9999992201873679, iteration: 50743
loss: 1.007164478302002,grad_norm: 0.7997985726224371, iteration: 50744
loss: 1.0191153287887573,grad_norm: 0.9999990927537393, iteration: 50745
loss: 1.0199440717697144,grad_norm: 0.9476944286053122, iteration: 50746
loss: 1.0417990684509277,grad_norm: 0.9999993915511809, iteration: 50747
loss: 1.0072238445281982,grad_norm: 0.9040895464734152, iteration: 50748
loss: 1.0115387439727783,grad_norm: 0.8705536593690326, iteration: 50749
loss: 0.9974381327629089,grad_norm: 0.99999906325931, iteration: 50750
loss: 1.0215272903442383,grad_norm: 0.9999990098046022, iteration: 50751
loss: 1.022162675857544,grad_norm: 0.9999991308120846, iteration: 50752
loss: 1.0471538305282593,grad_norm: 0.9999996254160299, iteration: 50753
loss: 1.0548385381698608,grad_norm: 0.9999998321542811, iteration: 50754
loss: 1.0099657773971558,grad_norm: 0.9785649838671525, iteration: 50755
loss: 0.9952728152275085,grad_norm: 0.9252204924047029, iteration: 50756
loss: 1.0030239820480347,grad_norm: 0.9131806585205186, iteration: 50757
loss: 1.0109556913375854,grad_norm: 0.9999991455506297, iteration: 50758
loss: 0.9542568922042847,grad_norm: 0.7059739389045815, iteration: 50759
loss: 0.9933927655220032,grad_norm: 0.9226763635243537, iteration: 50760
loss: 0.9732590913772583,grad_norm: 0.9999989798864266, iteration: 50761
loss: 1.0071258544921875,grad_norm: 0.9999992850940561, iteration: 50762
loss: 0.9444030523300171,grad_norm: 0.9999991074456439, iteration: 50763
loss: 1.0033575296401978,grad_norm: 0.9314774315312815, iteration: 50764
loss: 0.9957348108291626,grad_norm: 0.9879434315814049, iteration: 50765
loss: 1.0120207071304321,grad_norm: 0.9999994357626021, iteration: 50766
loss: 1.0368139743804932,grad_norm: 0.9999991668935654, iteration: 50767
loss: 0.9985970854759216,grad_norm: 0.8386087892422455, iteration: 50768
loss: 0.981870710849762,grad_norm: 0.9999997810702186, iteration: 50769
loss: 0.9882978200912476,grad_norm: 0.8045351945803464, iteration: 50770
loss: 1.0199419260025024,grad_norm: 0.9999993771387645, iteration: 50771
loss: 1.0091116428375244,grad_norm: 0.9092775769501423, iteration: 50772
loss: 0.9897488951683044,grad_norm: 0.8581963626412721, iteration: 50773
loss: 1.0431466102600098,grad_norm: 0.9999999282148778, iteration: 50774
loss: 1.0425622463226318,grad_norm: 0.893349613486098, iteration: 50775
loss: 0.9986551403999329,grad_norm: 0.9222923224268402, iteration: 50776
loss: 1.0225075483322144,grad_norm: 0.9999991332140086, iteration: 50777
loss: 1.0271857976913452,grad_norm: 0.8131547049566662, iteration: 50778
loss: 1.0084037780761719,grad_norm: 0.936598531850086, iteration: 50779
loss: 1.0350029468536377,grad_norm: 0.9850720958703597, iteration: 50780
loss: 1.006152868270874,grad_norm: 0.7994914311769317, iteration: 50781
loss: 0.9968795776367188,grad_norm: 0.999999318049964, iteration: 50782
loss: 1.0181697607040405,grad_norm: 0.9999992447652002, iteration: 50783
loss: 0.9842317700386047,grad_norm: 0.9999990434212125, iteration: 50784
loss: 0.982917070388794,grad_norm: 0.9831290303379117, iteration: 50785
loss: 1.0137158632278442,grad_norm: 0.825963559523414, iteration: 50786
loss: 1.0280684232711792,grad_norm: 0.9999994383014206, iteration: 50787
loss: 1.0159251689910889,grad_norm: 0.9999994190030095, iteration: 50788
loss: 0.9986269474029541,grad_norm: 0.9309607990820136, iteration: 50789
loss: 1.0196782350540161,grad_norm: 0.9946076371340136, iteration: 50790
loss: 1.0405445098876953,grad_norm: 0.8866737231154346, iteration: 50791
loss: 0.9882445931434631,grad_norm: 0.9999998294342253, iteration: 50792
loss: 0.9953503608703613,grad_norm: 0.9945209743847506, iteration: 50793
loss: 1.0204554796218872,grad_norm: 0.9855469475693159, iteration: 50794
loss: 0.9529080986976624,grad_norm: 0.9473769328521034, iteration: 50795
loss: 0.9892310500144958,grad_norm: 0.9999991352084301, iteration: 50796
loss: 0.9649672508239746,grad_norm: 0.9364109843456666, iteration: 50797
loss: 1.0152124166488647,grad_norm: 0.967391231886051, iteration: 50798
loss: 1.1038752794265747,grad_norm: 0.999999797770553, iteration: 50799
loss: 0.9525250792503357,grad_norm: 0.8436198587458966, iteration: 50800
loss: 0.9875277280807495,grad_norm: 0.9856318787885797, iteration: 50801
loss: 1.0354175567626953,grad_norm: 0.9999991079407086, iteration: 50802
loss: 1.0033091306686401,grad_norm: 0.9976497735599066, iteration: 50803
loss: 1.004184365272522,grad_norm: 0.9999992400095842, iteration: 50804
loss: 1.0235611200332642,grad_norm: 0.9999992515875171, iteration: 50805
loss: 1.020406723022461,grad_norm: 0.9172581042680821, iteration: 50806
loss: 1.0346198081970215,grad_norm: 0.9926325193845004, iteration: 50807
loss: 1.0016944408416748,grad_norm: 0.9999990258752927, iteration: 50808
loss: 1.0583971738815308,grad_norm: 0.9999995060408157, iteration: 50809
loss: 0.9957752227783203,grad_norm: 0.9999990384680242, iteration: 50810
loss: 0.9748455286026001,grad_norm: 0.88448207990653, iteration: 50811
loss: 1.027286410331726,grad_norm: 0.999999629951837, iteration: 50812
loss: 1.0168768167495728,grad_norm: 0.9040086226164084, iteration: 50813
loss: 1.0107316970825195,grad_norm: 0.8945511676440933, iteration: 50814
loss: 1.0309741497039795,grad_norm: 0.8843498733193401, iteration: 50815
loss: 1.0030652284622192,grad_norm: 0.8328326536467466, iteration: 50816
loss: 1.0272878408432007,grad_norm: 0.7718365357704431, iteration: 50817
loss: 1.0022683143615723,grad_norm: 0.9816838015545892, iteration: 50818
loss: 1.009028673171997,grad_norm: 0.9999993124942352, iteration: 50819
loss: 0.9930163621902466,grad_norm: 0.988632776687509, iteration: 50820
loss: 1.011404037475586,grad_norm: 0.9999989906967353, iteration: 50821
loss: 1.0028386116027832,grad_norm: 0.9354652235867926, iteration: 50822
loss: 1.0077005624771118,grad_norm: 0.8079257094278749, iteration: 50823
loss: 1.00884211063385,grad_norm: 0.8944197773367165, iteration: 50824
loss: 0.9774166941642761,grad_norm: 0.9999993933622926, iteration: 50825
loss: 1.0249574184417725,grad_norm: 0.9999992183212045, iteration: 50826
loss: 0.9813706874847412,grad_norm: 0.9038476296878988, iteration: 50827
loss: 0.963756263256073,grad_norm: 0.9999991886675197, iteration: 50828
loss: 1.0453271865844727,grad_norm: 0.9999990225686111, iteration: 50829
loss: 0.9703795313835144,grad_norm: 0.912707764578325, iteration: 50830
loss: 1.0117383003234863,grad_norm: 0.9999992134121388, iteration: 50831
loss: 0.9966508746147156,grad_norm: 0.8765454283750908, iteration: 50832
loss: 1.0109434127807617,grad_norm: 0.9731943512374348, iteration: 50833
loss: 1.019940733909607,grad_norm: 0.8391871689345413, iteration: 50834
loss: 0.9717982411384583,grad_norm: 0.9009014180456794, iteration: 50835
loss: 1.02523934841156,grad_norm: 0.7980220618623978, iteration: 50836
loss: 1.0066858530044556,grad_norm: 0.9163435395589762, iteration: 50837
loss: 1.0080009698867798,grad_norm: 0.9203902506102128, iteration: 50838
loss: 1.0314979553222656,grad_norm: 0.9999992894254538, iteration: 50839
loss: 1.0083627700805664,grad_norm: 0.999999115844532, iteration: 50840
loss: 0.9981670379638672,grad_norm: 0.9999991009560185, iteration: 50841
loss: 1.0125223398208618,grad_norm: 0.9999991938200242, iteration: 50842
loss: 0.9978308081626892,grad_norm: 0.9999990839949564, iteration: 50843
loss: 0.975046694278717,grad_norm: 0.9499315630804177, iteration: 50844
loss: 1.0125913619995117,grad_norm: 0.9455050370932436, iteration: 50845
loss: 1.0438263416290283,grad_norm: 0.999999358907299, iteration: 50846
loss: 0.9988536834716797,grad_norm: 0.9999990746578086, iteration: 50847
loss: 0.996166467666626,grad_norm: 0.8992225220089338, iteration: 50848
loss: 1.0215762853622437,grad_norm: 0.9999997559260133, iteration: 50849
loss: 1.0387753248214722,grad_norm: 0.9212031446121605, iteration: 50850
loss: 1.0323679447174072,grad_norm: 0.9999992642135521, iteration: 50851
loss: 1.0140905380249023,grad_norm: 0.9999989723232539, iteration: 50852
loss: 0.9957643151283264,grad_norm: 0.951085950302115, iteration: 50853
loss: 0.9807618260383606,grad_norm: 0.8893440353186571, iteration: 50854
loss: 1.0134960412979126,grad_norm: 0.9655160506085896, iteration: 50855
loss: 1.027757167816162,grad_norm: 0.9999993337197991, iteration: 50856
loss: 0.9655338525772095,grad_norm: 0.8447602309232702, iteration: 50857
loss: 1.0107316970825195,grad_norm: 0.95672408592733, iteration: 50858
loss: 0.9779733419418335,grad_norm: 0.8633442328283869, iteration: 50859
loss: 1.0403550863265991,grad_norm: 0.947513740630125, iteration: 50860
loss: 1.0348396301269531,grad_norm: 0.8017704625792352, iteration: 50861
loss: 0.9578289985656738,grad_norm: 0.898975198522859, iteration: 50862
loss: 1.03880774974823,grad_norm: 0.939928272945706, iteration: 50863
loss: 1.0022411346435547,grad_norm: 0.8167396838210181, iteration: 50864
loss: 1.0839577913284302,grad_norm: 0.9999989936031458, iteration: 50865
loss: 1.0660511255264282,grad_norm: 0.9999994532244871, iteration: 50866
loss: 0.9997117519378662,grad_norm: 0.9815458489807346, iteration: 50867
loss: 1.0536781549453735,grad_norm: 0.9999992304955124, iteration: 50868
loss: 0.9964238405227661,grad_norm: 0.9999990882299362, iteration: 50869
loss: 1.0864331722259521,grad_norm: 0.9999990565198786, iteration: 50870
loss: 1.0545395612716675,grad_norm: 0.9999991786212951, iteration: 50871
loss: 1.0013556480407715,grad_norm: 0.9999990439623588, iteration: 50872
loss: 0.9617652893066406,grad_norm: 0.928218911778567, iteration: 50873
loss: 1.0456324815750122,grad_norm: 0.9999994616584642, iteration: 50874
loss: 1.0027955770492554,grad_norm: 0.9999991423991622, iteration: 50875
loss: 1.0278058052062988,grad_norm: 0.9068153568325776, iteration: 50876
loss: 1.0045808553695679,grad_norm: 0.9999996934908526, iteration: 50877
loss: 0.9855974912643433,grad_norm: 0.999999168408368, iteration: 50878
loss: 1.0121413469314575,grad_norm: 0.9508969900742549, iteration: 50879
loss: 1.0109543800354004,grad_norm: 0.9999989620321416, iteration: 50880
loss: 1.0114798545837402,grad_norm: 0.7210714211837558, iteration: 50881
loss: 1.0129940509796143,grad_norm: 0.9376461110701692, iteration: 50882
loss: 1.038045048713684,grad_norm: 0.99999921538517, iteration: 50883
loss: 1.0181858539581299,grad_norm: 0.8034278708609667, iteration: 50884
loss: 1.0305637121200562,grad_norm: 0.9245434305472968, iteration: 50885
loss: 1.0172051191329956,grad_norm: 0.9154289239364246, iteration: 50886
loss: 1.0412741899490356,grad_norm: 0.976229155177644, iteration: 50887
loss: 1.0115432739257812,grad_norm: 0.8299576122560163, iteration: 50888
loss: 1.0050241947174072,grad_norm: 0.988292264631304, iteration: 50889
loss: 1.0348566770553589,grad_norm: 0.9999992445817474, iteration: 50890
loss: 1.040572166442871,grad_norm: 0.9728880599152017, iteration: 50891
loss: 1.0098203420639038,grad_norm: 0.8452756044642741, iteration: 50892
loss: 0.9887276291847229,grad_norm: 0.8669228520507747, iteration: 50893
loss: 1.023014783859253,grad_norm: 0.8948267655281635, iteration: 50894
loss: 1.1449787616729736,grad_norm: 0.9999998076163287, iteration: 50895
loss: 0.9643051028251648,grad_norm: 0.9999990997122614, iteration: 50896
loss: 1.0319324731826782,grad_norm: 0.9999993970202056, iteration: 50897
loss: 1.0427595376968384,grad_norm: 0.9528099969772956, iteration: 50898
loss: 0.9701582193374634,grad_norm: 0.999999049278092, iteration: 50899
loss: 1.014519453048706,grad_norm: 0.8812096269665461, iteration: 50900
loss: 0.9719745516777039,grad_norm: 0.999999165567306, iteration: 50901
loss: 0.9916818737983704,grad_norm: 0.8940109399672125, iteration: 50902
loss: 0.9992220401763916,grad_norm: 0.9286895312119187, iteration: 50903
loss: 0.983752965927124,grad_norm: 0.9003421913130034, iteration: 50904
loss: 1.157391905784607,grad_norm: 1.0000000079118758, iteration: 50905
loss: 0.9871892929077148,grad_norm: 0.9999990632679434, iteration: 50906
loss: 0.9831644892692566,grad_norm: 0.9398326084427232, iteration: 50907
loss: 1.0131096839904785,grad_norm: 0.9381262776407073, iteration: 50908
loss: 1.0021172761917114,grad_norm: 0.8859540282905273, iteration: 50909
loss: 0.9922358393669128,grad_norm: 0.9999989962331738, iteration: 50910
loss: 0.9991162419319153,grad_norm: 0.9999990147105587, iteration: 50911
loss: 0.994211733341217,grad_norm: 0.9999991315763768, iteration: 50912
loss: 1.0091222524642944,grad_norm: 0.99390920754068, iteration: 50913
loss: 0.9884626269340515,grad_norm: 0.8011696852528418, iteration: 50914
loss: 1.0127204656600952,grad_norm: 0.9635535543074518, iteration: 50915
loss: 0.9816929697990417,grad_norm: 0.9642481560037922, iteration: 50916
loss: 1.0186055898666382,grad_norm: 0.9791328535672861, iteration: 50917
loss: 1.0739424228668213,grad_norm: 0.9999996753353919, iteration: 50918
loss: 1.4210010766983032,grad_norm: 0.9999998631376956, iteration: 50919
loss: 1.0018364191055298,grad_norm: 0.9999989931417136, iteration: 50920
loss: 1.0460361242294312,grad_norm: 0.9999991286371475, iteration: 50921
loss: 0.9723368883132935,grad_norm: 0.8023421264734545, iteration: 50922
loss: 1.0100831985473633,grad_norm: 0.9902245044135694, iteration: 50923
loss: 1.025524377822876,grad_norm: 0.9685829754193988, iteration: 50924
loss: 1.0364545583724976,grad_norm: 0.9716486430511408, iteration: 50925
loss: 1.0427762269973755,grad_norm: 0.9999998193878277, iteration: 50926
loss: 1.11531400680542,grad_norm: 0.9999991484663012, iteration: 50927
loss: 1.0428425073623657,grad_norm: 0.9999995727209935, iteration: 50928
loss: 0.9812966585159302,grad_norm: 0.9999990679814823, iteration: 50929
loss: 1.2946679592132568,grad_norm: 0.9999998023299449, iteration: 50930
loss: 1.1195255517959595,grad_norm: 0.9999999318970441, iteration: 50931
loss: 1.0802565813064575,grad_norm: 0.9999991352095869, iteration: 50932
loss: 0.9613897204399109,grad_norm: 0.9222410633226512, iteration: 50933
loss: 1.0750317573547363,grad_norm: 0.9999995365178204, iteration: 50934
loss: 1.172088623046875,grad_norm: 0.9999998392114026, iteration: 50935
loss: 1.0061266422271729,grad_norm: 0.8747969438824789, iteration: 50936
loss: 1.0156186819076538,grad_norm: 0.9999993001574823, iteration: 50937
loss: 1.1491161584854126,grad_norm: 0.9999993258925717, iteration: 50938
loss: 1.0590626001358032,grad_norm: 0.9999992041681485, iteration: 50939
loss: 1.1714588403701782,grad_norm: 0.9999997365656406, iteration: 50940
loss: 1.0190250873565674,grad_norm: 0.9999996299128537, iteration: 50941
loss: 1.001832127571106,grad_norm: 0.9999997951259421, iteration: 50942
loss: 1.0377628803253174,grad_norm: 0.9999994827693265, iteration: 50943
loss: 1.014639973640442,grad_norm: 0.9999992325082336, iteration: 50944
loss: 1.3207058906555176,grad_norm: 0.9999993607488697, iteration: 50945
loss: 1.4115972518920898,grad_norm: 0.9999999038599123, iteration: 50946
loss: 1.4157925844192505,grad_norm: 0.9999999232837465, iteration: 50947
loss: 1.0688996315002441,grad_norm: 0.9999992504300942, iteration: 50948
loss: 1.1522319316864014,grad_norm: 0.9999996927717518, iteration: 50949
loss: 1.3357738256454468,grad_norm: 0.9999998991537845, iteration: 50950
loss: 1.1142735481262207,grad_norm: 0.9999997854285811, iteration: 50951
loss: 1.2671788930892944,grad_norm: 0.9999998786485241, iteration: 50952
loss: 1.1776020526885986,grad_norm: 0.9999997734801473, iteration: 50953
loss: 1.098239541053772,grad_norm: 0.9999993188654815, iteration: 50954
loss: 0.9910567402839661,grad_norm: 0.9360655433264622, iteration: 50955
loss: 1.1223996877670288,grad_norm: 0.9999996078320555, iteration: 50956
loss: 1.0096070766448975,grad_norm: 0.8374759986027148, iteration: 50957
loss: 1.1245458126068115,grad_norm: 0.9999997113085913, iteration: 50958
loss: 1.0603513717651367,grad_norm: 0.9999996929026701, iteration: 50959
loss: 1.0168249607086182,grad_norm: 0.9999990999311789, iteration: 50960
loss: 1.0326299667358398,grad_norm: 0.9999992486259508, iteration: 50961
loss: 1.0035501718521118,grad_norm: 0.8975987981844915, iteration: 50962
loss: 1.0076435804367065,grad_norm: 0.9999992969096249, iteration: 50963
loss: 1.1355572938919067,grad_norm: 0.9999998142642484, iteration: 50964
loss: 0.9972631335258484,grad_norm: 0.9999990476637902, iteration: 50965
loss: 1.1151609420776367,grad_norm: 0.9999995520258089, iteration: 50966
loss: 1.123549461364746,grad_norm: 0.999999596476942, iteration: 50967
loss: 1.1798651218414307,grad_norm: 0.9999994612184407, iteration: 50968
loss: 0.9859252572059631,grad_norm: 0.9861490517358094, iteration: 50969
loss: 1.0542954206466675,grad_norm: 0.9999998251980987, iteration: 50970
loss: 0.9937906265258789,grad_norm: 0.9812287748008012, iteration: 50971
loss: 1.0572819709777832,grad_norm: 0.9831807981979985, iteration: 50972
loss: 1.0891884565353394,grad_norm: 0.9999993568190866, iteration: 50973
loss: 1.022040843963623,grad_norm: 0.9999991353866255, iteration: 50974
loss: 1.117297887802124,grad_norm: 0.9999992498825924, iteration: 50975
loss: 1.0262151956558228,grad_norm: 0.9999993046818506, iteration: 50976
loss: 1.0882667303085327,grad_norm: 0.9999992486400091, iteration: 50977
loss: 1.107319951057434,grad_norm: 0.999999644399426, iteration: 50978
loss: 1.0395991802215576,grad_norm: 0.9616812954755677, iteration: 50979
loss: 1.0699807405471802,grad_norm: 0.9999992956286403, iteration: 50980
loss: 1.2396961450576782,grad_norm: 0.999999503091567, iteration: 50981
loss: 1.0008989572525024,grad_norm: 0.9999993402358048, iteration: 50982
loss: 1.0209908485412598,grad_norm: 0.9999997841830691, iteration: 50983
loss: 1.0885323286056519,grad_norm: 0.9999991671161217, iteration: 50984
loss: 1.0176706314086914,grad_norm: 0.894589237275414, iteration: 50985
loss: 1.1611812114715576,grad_norm: 0.9999996160582306, iteration: 50986
loss: 0.979236900806427,grad_norm: 0.9922720874792673, iteration: 50987
loss: 1.0274797677993774,grad_norm: 0.99999995262807, iteration: 50988
loss: 1.026520013809204,grad_norm: 0.9999993635972371, iteration: 50989
loss: 1.0101372003555298,grad_norm: 0.9999991014721704, iteration: 50990
loss: 1.0487793684005737,grad_norm: 0.9999997029567008, iteration: 50991
loss: 1.030577540397644,grad_norm: 0.9999996183115166, iteration: 50992
loss: 0.9991728067398071,grad_norm: 0.9999993163857696, iteration: 50993
loss: 1.0037882328033447,grad_norm: 0.9999991453245397, iteration: 50994
loss: 1.024173378944397,grad_norm: 0.9513679625668624, iteration: 50995
loss: 1.0149660110473633,grad_norm: 0.9999997211079853, iteration: 50996
loss: 1.10772705078125,grad_norm: 0.9999991431325268, iteration: 50997
loss: 1.00944185256958,grad_norm: 0.9383212108598754, iteration: 50998
loss: 1.0251045227050781,grad_norm: 0.999426420333743, iteration: 50999
loss: 1.0571621656417847,grad_norm: 0.9999992689595757, iteration: 51000
loss: 0.996830403804779,grad_norm: 0.8551226129611313, iteration: 51001
loss: 1.031867265701294,grad_norm: 0.9999995321064191, iteration: 51002
loss: 1.022534728050232,grad_norm: 0.9999991388335043, iteration: 51003
loss: 1.0181760787963867,grad_norm: 0.9101168925804729, iteration: 51004
loss: 1.1419235467910767,grad_norm: 0.9999997667948909, iteration: 51005
loss: 1.0544495582580566,grad_norm: 0.9999993968621435, iteration: 51006
loss: 0.9852138757705688,grad_norm: 0.9764857959976183, iteration: 51007
loss: 0.9976357221603394,grad_norm: 0.9999990292144959, iteration: 51008
loss: 1.032651662826538,grad_norm: 0.9999998979288205, iteration: 51009
loss: 0.9954701066017151,grad_norm: 0.7974771289688026, iteration: 51010
loss: 0.936352550983429,grad_norm: 0.9208337276110597, iteration: 51011
loss: 1.0277695655822754,grad_norm: 0.999999687225132, iteration: 51012
loss: 1.0241731405258179,grad_norm: 0.9999992640392524, iteration: 51013
loss: 1.040043592453003,grad_norm: 0.9999993160569486, iteration: 51014
loss: 1.015661597251892,grad_norm: 0.9194968106042961, iteration: 51015
loss: 1.0626121759414673,grad_norm: 0.9999991506832998, iteration: 51016
loss: 0.9807069301605225,grad_norm: 0.9731730910434753, iteration: 51017
loss: 1.0538692474365234,grad_norm: 0.9999996744100215, iteration: 51018
loss: 1.0178217887878418,grad_norm: 0.999999014871505, iteration: 51019
loss: 1.1096045970916748,grad_norm: 0.999999526324517, iteration: 51020
loss: 1.1910074949264526,grad_norm: 0.9999993730611685, iteration: 51021
loss: 1.0439653396606445,grad_norm: 0.8932049573161035, iteration: 51022
loss: 0.9911309480667114,grad_norm: 0.999999153136194, iteration: 51023
loss: 1.0357885360717773,grad_norm: 0.9999991790960009, iteration: 51024
loss: 0.9933346509933472,grad_norm: 0.9999996124620332, iteration: 51025
loss: 0.9830122590065002,grad_norm: 0.9829910632728363, iteration: 51026
loss: 1.0043809413909912,grad_norm: 0.9420218634866406, iteration: 51027
loss: 1.0032594203948975,grad_norm: 0.9999995869657392, iteration: 51028
loss: 1.016821026802063,grad_norm: 0.999999398616895, iteration: 51029
loss: 1.0045270919799805,grad_norm: 0.9488353848738496, iteration: 51030
loss: 0.9811453819274902,grad_norm: 0.9999999343474425, iteration: 51031
loss: 1.0218201875686646,grad_norm: 0.9999995476911117, iteration: 51032
loss: 0.9990172386169434,grad_norm: 0.8974181708347028, iteration: 51033
loss: 1.003272533416748,grad_norm: 0.9162056079030921, iteration: 51034
loss: 1.0112977027893066,grad_norm: 0.9999990367343077, iteration: 51035
loss: 1.0805108547210693,grad_norm: 0.9999996462910218, iteration: 51036
loss: 0.9862297177314758,grad_norm: 0.9694712250748634, iteration: 51037
loss: 1.0030648708343506,grad_norm: 0.9999992836140055, iteration: 51038
loss: 1.0029890537261963,grad_norm: 0.8438823065056265, iteration: 51039
loss: 1.0382834672927856,grad_norm: 0.9140994533060657, iteration: 51040
loss: 1.0721850395202637,grad_norm: 0.9999992268268013, iteration: 51041
loss: 1.0282469987869263,grad_norm: 0.8587329746770463, iteration: 51042
loss: 1.0027275085449219,grad_norm: 0.9912369578972058, iteration: 51043
loss: 1.0096211433410645,grad_norm: 0.9999990949843146, iteration: 51044
loss: 1.024158239364624,grad_norm: 0.89775141459835, iteration: 51045
loss: 0.9574865698814392,grad_norm: 0.9999993420978625, iteration: 51046
loss: 0.9764291644096375,grad_norm: 0.8908665254942931, iteration: 51047
loss: 0.9830384254455566,grad_norm: 0.9999990612688516, iteration: 51048
loss: 1.0302633047103882,grad_norm: 0.999999197223367, iteration: 51049
loss: 1.0358237028121948,grad_norm: 0.8431218005976668, iteration: 51050
loss: 1.0409274101257324,grad_norm: 0.9999991225678646, iteration: 51051
loss: 1.0373574495315552,grad_norm: 0.9982553989347696, iteration: 51052
loss: 1.0525654554367065,grad_norm: 0.9999990799613695, iteration: 51053
loss: 1.0153831243515015,grad_norm: 0.9999997741589977, iteration: 51054
loss: 0.9788671135902405,grad_norm: 0.8822716134903655, iteration: 51055
loss: 0.9894432425498962,grad_norm: 0.9940879473030158, iteration: 51056
loss: 1.0442594289779663,grad_norm: 0.9999991033350741, iteration: 51057
loss: 0.9888941645622253,grad_norm: 0.7503392311730334, iteration: 51058
loss: 1.0423933267593384,grad_norm: 0.9999989756287835, iteration: 51059
loss: 1.035204529762268,grad_norm: 0.9999991532907309, iteration: 51060
loss: 1.0212833881378174,grad_norm: 0.9999994111446228, iteration: 51061
loss: 1.0867362022399902,grad_norm: 0.9999999750265206, iteration: 51062
loss: 1.0363614559173584,grad_norm: 0.9999991891661032, iteration: 51063
loss: 0.9989120364189148,grad_norm: 0.889158539194533, iteration: 51064
loss: 1.0673909187316895,grad_norm: 0.9999996720366243, iteration: 51065
loss: 1.0282403230667114,grad_norm: 0.8066813643413612, iteration: 51066
loss: 1.0411983728408813,grad_norm: 0.9999995191766816, iteration: 51067
loss: 1.0895085334777832,grad_norm: 0.9999996548106305, iteration: 51068
loss: 1.0667704343795776,grad_norm: 0.9999998914833008, iteration: 51069
loss: 0.9831907153129578,grad_norm: 0.9999990725776758, iteration: 51070
loss: 1.0338534116744995,grad_norm: 0.9409582733712082, iteration: 51071
loss: 1.0209938287734985,grad_norm: 0.9999992108799028, iteration: 51072
loss: 1.0767697095870972,grad_norm: 0.9999997740106528, iteration: 51073
loss: 1.0201627016067505,grad_norm: 0.9999995651710553, iteration: 51074
loss: 1.0377615690231323,grad_norm: 0.9999996973401992, iteration: 51075
loss: 1.0702683925628662,grad_norm: 0.9999992156927033, iteration: 51076
loss: 1.0672576427459717,grad_norm: 0.9999996984496743, iteration: 51077
loss: 1.0329186916351318,grad_norm: 0.7934080329667205, iteration: 51078
loss: 0.9935466647148132,grad_norm: 0.9999994366699541, iteration: 51079
loss: 1.0443249940872192,grad_norm: 0.8559767552097067, iteration: 51080
loss: 1.0109148025512695,grad_norm: 0.9464617970938501, iteration: 51081
loss: 1.0293303728103638,grad_norm: 0.999999152770315, iteration: 51082
loss: 1.0159382820129395,grad_norm: 0.8931536864286898, iteration: 51083
loss: 0.9709222912788391,grad_norm: 0.9999991126822086, iteration: 51084
loss: 1.0019420385360718,grad_norm: 0.999999375669372, iteration: 51085
loss: 1.0177669525146484,grad_norm: 0.999999174640825, iteration: 51086
loss: 1.065476417541504,grad_norm: 0.9999998199804037, iteration: 51087
loss: 0.9603945016860962,grad_norm: 0.981828525670085, iteration: 51088
loss: 0.9927253127098083,grad_norm: 0.9999994584035126, iteration: 51089
loss: 1.113440990447998,grad_norm: 0.9999999003708747, iteration: 51090
loss: 1.006643533706665,grad_norm: 0.8672320017209155, iteration: 51091
loss: 1.1754393577575684,grad_norm: 1.0000000153445543, iteration: 51092
loss: 1.0181500911712646,grad_norm: 0.9999994786130086, iteration: 51093
loss: 0.992919385433197,grad_norm: 0.8714909636829425, iteration: 51094
loss: 0.9962794780731201,grad_norm: 0.8248458915577347, iteration: 51095
loss: 1.007609486579895,grad_norm: 0.9999990265086852, iteration: 51096
loss: 1.0440611839294434,grad_norm: 0.9999991243840113, iteration: 51097
loss: 0.9881616234779358,grad_norm: 0.9699219580894691, iteration: 51098
loss: 0.9769841432571411,grad_norm: 0.9999991869200429, iteration: 51099
loss: 1.0218133926391602,grad_norm: 0.8622488109186716, iteration: 51100
loss: 1.0401890277862549,grad_norm: 0.9979427304621552, iteration: 51101
loss: 1.0492870807647705,grad_norm: 0.9999994897034251, iteration: 51102
loss: 1.0081136226654053,grad_norm: 0.9999991252775302, iteration: 51103
loss: 1.0112406015396118,grad_norm: 0.9999999147081785, iteration: 51104
loss: 0.9970598816871643,grad_norm: 0.8343360660867637, iteration: 51105
loss: 0.9663359522819519,grad_norm: 0.9352980462245268, iteration: 51106
loss: 1.0211727619171143,grad_norm: 0.7603738151445726, iteration: 51107
loss: 0.9735180735588074,grad_norm: 0.9999990508688964, iteration: 51108
loss: 1.03097665309906,grad_norm: 0.9999991528697634, iteration: 51109
loss: 1.0852407217025757,grad_norm: 0.9240898034495565, iteration: 51110
loss: 0.9755223989486694,grad_norm: 0.9999990441808085, iteration: 51111
loss: 0.962935209274292,grad_norm: 0.9217289349447048, iteration: 51112
loss: 0.9922419190406799,grad_norm: 0.9033842110687765, iteration: 51113
loss: 1.040658712387085,grad_norm: 0.9999995159033351, iteration: 51114
loss: 0.9806206226348877,grad_norm: 0.7909880475482982, iteration: 51115
loss: 1.0285924673080444,grad_norm: 0.9663298502013671, iteration: 51116
loss: 0.9836349487304688,grad_norm: 0.9843211250556331, iteration: 51117
loss: 1.0425562858581543,grad_norm: 0.9443727345215776, iteration: 51118
loss: 0.9944362044334412,grad_norm: 0.9999996247017547, iteration: 51119
loss: 0.9772476553916931,grad_norm: 0.8556103208852849, iteration: 51120
loss: 1.0153450965881348,grad_norm: 0.9999994395285808, iteration: 51121
loss: 1.0606029033660889,grad_norm: 0.9999992301685845, iteration: 51122
loss: 1.0164886713027954,grad_norm: 0.9995016302596708, iteration: 51123
loss: 1.0085673332214355,grad_norm: 0.9999990723474123, iteration: 51124
loss: 1.069318413734436,grad_norm: 0.999999635726505, iteration: 51125
loss: 1.0268722772598267,grad_norm: 0.8722999827439017, iteration: 51126
loss: 1.0067542791366577,grad_norm: 0.961441256834755, iteration: 51127
loss: 1.0121554136276245,grad_norm: 0.9999996172586186, iteration: 51128
loss: 1.0173206329345703,grad_norm: 0.9999990609529982, iteration: 51129
loss: 1.0006957054138184,grad_norm: 0.9558563590737317, iteration: 51130
loss: 1.0268912315368652,grad_norm: 0.9999993220162274, iteration: 51131
loss: 1.135378360748291,grad_norm: 0.9999996680595971, iteration: 51132
loss: 1.0353596210479736,grad_norm: 0.7856985409004708, iteration: 51133
loss: 0.9689131379127502,grad_norm: 0.9999989973055027, iteration: 51134
loss: 1.018855333328247,grad_norm: 0.9999991970784755, iteration: 51135
loss: 1.0827178955078125,grad_norm: 0.99999987569693, iteration: 51136
loss: 1.0107526779174805,grad_norm: 0.9999992251500502, iteration: 51137
loss: 1.015543818473816,grad_norm: 0.9999992126177595, iteration: 51138
loss: 1.0172390937805176,grad_norm: 0.9999991398772022, iteration: 51139
loss: 1.0951236486434937,grad_norm: 0.9800871384819292, iteration: 51140
loss: 0.9835526943206787,grad_norm: 0.9572730150261921, iteration: 51141
loss: 0.9817289113998413,grad_norm: 0.9999990295808023, iteration: 51142
loss: 1.0275366306304932,grad_norm: 0.8233863793028364, iteration: 51143
loss: 1.0008556842803955,grad_norm: 0.9999993808536582, iteration: 51144
loss: 1.0006686449050903,grad_norm: 0.9669133413250315, iteration: 51145
loss: 1.0652177333831787,grad_norm: 0.9999994828116681, iteration: 51146
loss: 0.9769074320793152,grad_norm: 0.8806789187527787, iteration: 51147
loss: 1.0650533437728882,grad_norm: 0.9999993684233274, iteration: 51148
loss: 0.998266339302063,grad_norm: 0.9999991036375048, iteration: 51149
loss: 1.0123995542526245,grad_norm: 0.9999991803574826, iteration: 51150
loss: 1.0480629205703735,grad_norm: 0.7869902124751752, iteration: 51151
loss: 1.0407567024230957,grad_norm: 0.9999991117834377, iteration: 51152
loss: 1.0309560298919678,grad_norm: 0.9999654505565637, iteration: 51153
loss: 1.0276695489883423,grad_norm: 0.9999990227028457, iteration: 51154
loss: 1.0043516159057617,grad_norm: 0.9999991557215288, iteration: 51155
loss: 1.0328103303909302,grad_norm: 0.9999997247819088, iteration: 51156
loss: 0.9709597229957581,grad_norm: 0.9999989691618595, iteration: 51157
loss: 1.0167901515960693,grad_norm: 0.99999901415425, iteration: 51158
loss: 1.0384137630462646,grad_norm: 0.9589229683233861, iteration: 51159
loss: 0.9729365706443787,grad_norm: 0.9298451311321346, iteration: 51160
loss: 1.0220324993133545,grad_norm: 0.9999991212233182, iteration: 51161
loss: 1.0523760318756104,grad_norm: 0.9999995840526228, iteration: 51162
loss: 1.0376348495483398,grad_norm: 0.9612524476543499, iteration: 51163
loss: 1.02182936668396,grad_norm: 0.9246891558671994, iteration: 51164
loss: 0.9950768947601318,grad_norm: 0.9999998219781702, iteration: 51165
loss: 1.0059508085250854,grad_norm: 0.9493199726437539, iteration: 51166
loss: 1.0128318071365356,grad_norm: 0.9999993079311711, iteration: 51167
loss: 1.0198338031768799,grad_norm: 0.9999990979399682, iteration: 51168
loss: 1.0136384963989258,grad_norm: 0.8336620297359895, iteration: 51169
loss: 1.0087659358978271,grad_norm: 0.9999992177424554, iteration: 51170
loss: 1.0081992149353027,grad_norm: 0.8902732272142865, iteration: 51171
loss: 0.9836602210998535,grad_norm: 0.999999028366707, iteration: 51172
loss: 1.0404633283615112,grad_norm: 0.9999990443475697, iteration: 51173
loss: 0.9834206700325012,grad_norm: 0.9937098996749106, iteration: 51174
loss: 1.0254952907562256,grad_norm: 0.9999992506051599, iteration: 51175
loss: 1.0126322507858276,grad_norm: 0.9999993772969846, iteration: 51176
loss: 1.0525418519973755,grad_norm: 0.9999991516399658, iteration: 51177
loss: 1.0362863540649414,grad_norm: 0.9999991565491682, iteration: 51178
loss: 0.9849426746368408,grad_norm: 0.9999992992220367, iteration: 51179
loss: 1.0327531099319458,grad_norm: 0.9999993061498335, iteration: 51180
loss: 1.010992169380188,grad_norm: 0.940432185562984, iteration: 51181
loss: 1.0722007751464844,grad_norm: 0.9999993951373325, iteration: 51182
loss: 1.0737334489822388,grad_norm: 0.9999993885237854, iteration: 51183
loss: 1.1630531549453735,grad_norm: 0.9999999266806537, iteration: 51184
loss: 1.1249343156814575,grad_norm: 0.9999992899780168, iteration: 51185
loss: 1.0880664587020874,grad_norm: 0.9999996834412299, iteration: 51186
loss: 1.0322846174240112,grad_norm: 0.955963191666709, iteration: 51187
loss: 1.0750473737716675,grad_norm: 0.9999994763853242, iteration: 51188
loss: 1.0086389780044556,grad_norm: 0.9531770071836212, iteration: 51189
loss: 0.9921978712081909,grad_norm: 0.6987697864081474, iteration: 51190
loss: 1.0109082460403442,grad_norm: 0.9999994808393218, iteration: 51191
loss: 1.0009530782699585,grad_norm: 0.9999992039091138, iteration: 51192
loss: 1.0408525466918945,grad_norm: 0.9675189041440638, iteration: 51193
loss: 1.0163224935531616,grad_norm: 0.7898051932357492, iteration: 51194
loss: 1.0213181972503662,grad_norm: 0.8743513408944565, iteration: 51195
loss: 0.9939078092575073,grad_norm: 0.9999993643944967, iteration: 51196
loss: 1.0824296474456787,grad_norm: 0.9999997882438163, iteration: 51197
loss: 0.9755488634109497,grad_norm: 0.9440592766184531, iteration: 51198
loss: 1.0305904150009155,grad_norm: 0.9632133304828633, iteration: 51199
loss: 0.9928447604179382,grad_norm: 0.9451832555058438, iteration: 51200
loss: 0.9921826124191284,grad_norm: 0.9026101640151736, iteration: 51201
loss: 1.0528967380523682,grad_norm: 0.9999991513863904, iteration: 51202
loss: 0.9985949397087097,grad_norm: 0.9068539257220634, iteration: 51203
loss: 1.0095373392105103,grad_norm: 0.9999994899078307, iteration: 51204
loss: 1.021651268005371,grad_norm: 0.9491595947125787, iteration: 51205
loss: 1.0066125392913818,grad_norm: 0.9802121060754966, iteration: 51206
loss: 1.0171239376068115,grad_norm: 0.9999992048234505, iteration: 51207
loss: 1.0522536039352417,grad_norm: 0.9999996766860922, iteration: 51208
loss: 1.0432711839675903,grad_norm: 0.9112030827100593, iteration: 51209
loss: 1.0283137559890747,grad_norm: 0.9318497829648594, iteration: 51210
loss: 0.9848729372024536,grad_norm: 0.9999996089060684, iteration: 51211
loss: 1.0080300569534302,grad_norm: 0.9742359951274485, iteration: 51212
loss: 1.0023201704025269,grad_norm: 0.9999991036940971, iteration: 51213
loss: 0.9613254070281982,grad_norm: 0.9566350522819003, iteration: 51214
loss: 1.0150117874145508,grad_norm: 0.8867324380669158, iteration: 51215
loss: 0.9912344813346863,grad_norm: 0.9999991702884437, iteration: 51216
loss: 1.0102020502090454,grad_norm: 0.9999996021440096, iteration: 51217
loss: 0.9822534918785095,grad_norm: 0.9999989584866458, iteration: 51218
loss: 1.0073031187057495,grad_norm: 0.9942145763294046, iteration: 51219
loss: 1.0328153371810913,grad_norm: 0.99999987884217, iteration: 51220
loss: 1.005030870437622,grad_norm: 0.9999998793033702, iteration: 51221
loss: 1.0124411582946777,grad_norm: 0.9999990820039447, iteration: 51222
loss: 1.0298010110855103,grad_norm: 0.999999369687989, iteration: 51223
loss: 1.0032308101654053,grad_norm: 0.9199650329991238, iteration: 51224
loss: 1.0109577178955078,grad_norm: 0.8720214267269579, iteration: 51225
loss: 1.010002613067627,grad_norm: 0.8917573151964092, iteration: 51226
loss: 1.0210500955581665,grad_norm: 0.8728254051264264, iteration: 51227
loss: 1.1090507507324219,grad_norm: 0.999999485565564, iteration: 51228
loss: 1.0047510862350464,grad_norm: 0.999999879083075, iteration: 51229
loss: 1.0261180400848389,grad_norm: 0.9999996307529403, iteration: 51230
loss: 1.022645354270935,grad_norm: 0.9540772632024078, iteration: 51231
loss: 1.161623477935791,grad_norm: 0.999999958811873, iteration: 51232
loss: 0.9705702066421509,grad_norm: 0.9010363285790611, iteration: 51233
loss: 1.067870855331421,grad_norm: 0.9713370315020234, iteration: 51234
loss: 1.018196940422058,grad_norm: 0.9999994405167328, iteration: 51235
loss: 1.0057774782180786,grad_norm: 0.7986360999287432, iteration: 51236
loss: 0.9850819706916809,grad_norm: 0.9365390233699864, iteration: 51237
loss: 1.0139094591140747,grad_norm: 0.989684738485641, iteration: 51238
loss: 1.022578239440918,grad_norm: 0.9999992544867187, iteration: 51239
loss: 0.9870045781135559,grad_norm: 0.9999990910381612, iteration: 51240
loss: 1.1687865257263184,grad_norm: 0.9999993974155698, iteration: 51241
loss: 1.0493760108947754,grad_norm: 0.9999996425771339, iteration: 51242
loss: 1.0204213857650757,grad_norm: 0.9999992765853042, iteration: 51243
loss: 0.9789564609527588,grad_norm: 0.9633945023222887, iteration: 51244
loss: 0.99370276927948,grad_norm: 0.9999993181505207, iteration: 51245
loss: 1.0203137397766113,grad_norm: 0.9999997674436416, iteration: 51246
loss: 1.0199905633926392,grad_norm: 0.999999963096364, iteration: 51247
loss: 0.9768819212913513,grad_norm: 0.9848365702549572, iteration: 51248
loss: 1.042567491531372,grad_norm: 0.9999993811354925, iteration: 51249
loss: 1.015734314918518,grad_norm: 0.9999995481392284, iteration: 51250
loss: 1.008270025253296,grad_norm: 0.9999995958972829, iteration: 51251
loss: 1.0122992992401123,grad_norm: 0.9999993579095738, iteration: 51252
loss: 0.9708455204963684,grad_norm: 0.9999990575737105, iteration: 51253
loss: 1.022294044494629,grad_norm: 0.8527962854492537, iteration: 51254
loss: 0.9761577844619751,grad_norm: 0.9999993232630207, iteration: 51255
loss: 1.0956637859344482,grad_norm: 0.9999999150439999, iteration: 51256
loss: 1.0111987590789795,grad_norm: 0.9064984948365556, iteration: 51257
loss: 1.004761815071106,grad_norm: 0.999999306339169, iteration: 51258
loss: 1.0899955034255981,grad_norm: 0.9999994697299105, iteration: 51259
loss: 1.0553185939788818,grad_norm: 0.9999998891934122, iteration: 51260
loss: 1.034925103187561,grad_norm: 0.9999993013684603, iteration: 51261
loss: 0.9994045495986938,grad_norm: 0.9101608253716044, iteration: 51262
loss: 1.3839664459228516,grad_norm: 0.9999996048409633, iteration: 51263
loss: 1.5304515361785889,grad_norm: 0.9999999165195337, iteration: 51264
loss: 1.2813562154769897,grad_norm: 0.9999996859063988, iteration: 51265
loss: 1.348728060722351,grad_norm: 0.9999998529065804, iteration: 51266
loss: 1.2144160270690918,grad_norm: 0.9999993313724849, iteration: 51267
loss: 1.8978039026260376,grad_norm: 0.9999998230040742, iteration: 51268
loss: 1.3317863941192627,grad_norm: 0.999999865162317, iteration: 51269
loss: 1.0429587364196777,grad_norm: 0.999999328441998, iteration: 51270
loss: 1.352793574333191,grad_norm: 0.9999995439721454, iteration: 51271
loss: 1.2631930112838745,grad_norm: 0.9999998238953818, iteration: 51272
loss: 1.2326656579971313,grad_norm: 0.9999999946575122, iteration: 51273
loss: 1.0545941591262817,grad_norm: 0.9999995306129168, iteration: 51274
loss: 1.1345382928848267,grad_norm: 0.9999994397374851, iteration: 51275
loss: 1.0060608386993408,grad_norm: 0.9138738389093575, iteration: 51276
loss: 0.9988111257553101,grad_norm: 0.999999111757063, iteration: 51277
loss: 1.015591025352478,grad_norm: 0.999999433537093, iteration: 51278
loss: 1.315352439880371,grad_norm: 0.9999998555476531, iteration: 51279
loss: 1.011705994606018,grad_norm: 0.9876785394405171, iteration: 51280
loss: 1.1086204051971436,grad_norm: 0.9999998205277059, iteration: 51281
loss: 1.016605019569397,grad_norm: 0.999999639554478, iteration: 51282
loss: 1.0027811527252197,grad_norm: 0.999999098359461, iteration: 51283
loss: 1.0168358087539673,grad_norm: 0.8305989397943881, iteration: 51284
loss: 1.0522876977920532,grad_norm: 0.9999989785385671, iteration: 51285
loss: 1.0039805173873901,grad_norm: 0.9999993125720386, iteration: 51286
loss: 1.0388909578323364,grad_norm: 0.9999998233744356, iteration: 51287
loss: 1.0507338047027588,grad_norm: 0.9999991010931724, iteration: 51288
loss: 0.9879006743431091,grad_norm: 0.9619073733509164, iteration: 51289
loss: 1.0186086893081665,grad_norm: 0.9999990840691872, iteration: 51290
loss: 1.0271741151809692,grad_norm: 0.9999992246195412, iteration: 51291
loss: 1.0956934690475464,grad_norm: 0.9999999199117873, iteration: 51292
loss: 1.0454444885253906,grad_norm: 0.9999994563396265, iteration: 51293
loss: 1.0173395872116089,grad_norm: 0.9425688988009595, iteration: 51294
loss: 0.9979737997055054,grad_norm: 0.928189026586688, iteration: 51295
loss: 1.1224595308303833,grad_norm: 0.9999991830188578, iteration: 51296
loss: 1.0883409976959229,grad_norm: 0.9999994540424718, iteration: 51297
loss: 1.0142157077789307,grad_norm: 0.9127673946970826, iteration: 51298
loss: 1.0546144247055054,grad_norm: 0.9999990961249627, iteration: 51299
loss: 1.036720633506775,grad_norm: 0.9999992401997038, iteration: 51300
loss: 1.0102812051773071,grad_norm: 0.9999991503162287, iteration: 51301
loss: 1.0624865293502808,grad_norm: 0.9999998679950276, iteration: 51302
loss: 1.02216374874115,grad_norm: 0.9999997862896755, iteration: 51303
loss: 1.1001927852630615,grad_norm: 0.9999991028475119, iteration: 51304
loss: 0.9790161848068237,grad_norm: 0.9606922015149766, iteration: 51305
loss: 1.0423563718795776,grad_norm: 0.9999997921930488, iteration: 51306
loss: 1.0145105123519897,grad_norm: 0.9999994635633482, iteration: 51307
loss: 1.0293575525283813,grad_norm: 0.7891598073031757, iteration: 51308
loss: 1.006252646446228,grad_norm: 0.9999991210974353, iteration: 51309
loss: 1.0078312158584595,grad_norm: 0.9999997757161865, iteration: 51310
loss: 1.028461217880249,grad_norm: 0.9999990570095317, iteration: 51311
loss: 0.9947003126144409,grad_norm: 0.8573161856865127, iteration: 51312
loss: 1.007677674293518,grad_norm: 0.8976233613255287, iteration: 51313
loss: 1.0402582883834839,grad_norm: 1.0000000075012037, iteration: 51314
loss: 1.0443922281265259,grad_norm: 0.9999991330705261, iteration: 51315
loss: 0.9935429096221924,grad_norm: 0.9999990659324153, iteration: 51316
loss: 1.0118159055709839,grad_norm: 0.9999996263870099, iteration: 51317
loss: 1.165747046470642,grad_norm: 0.9593758500084577, iteration: 51318
loss: 1.005929708480835,grad_norm: 0.8634481032999963, iteration: 51319
loss: 1.227699875831604,grad_norm: 0.9999996182148297, iteration: 51320
loss: 1.0083798170089722,grad_norm: 0.8691053489449789, iteration: 51321
loss: 1.0367863178253174,grad_norm: 0.9999992931375374, iteration: 51322
loss: 0.9760531783103943,grad_norm: 0.8870956211620195, iteration: 51323
loss: 0.9910027980804443,grad_norm: 0.8344642013097012, iteration: 51324
loss: 1.0429185628890991,grad_norm: 0.9178743907081937, iteration: 51325
loss: 1.0230833292007446,grad_norm: 0.9999991689543553, iteration: 51326
loss: 1.0101559162139893,grad_norm: 0.9049808521144587, iteration: 51327
loss: 0.9758785963058472,grad_norm: 0.9825441021261255, iteration: 51328
loss: 1.0298399925231934,grad_norm: 0.9999989496181964, iteration: 51329
loss: 1.0733840465545654,grad_norm: 0.9999995780814515, iteration: 51330
loss: 0.9828422665596008,grad_norm: 0.9428257258830401, iteration: 51331
loss: 1.006447672843933,grad_norm: 0.9999991658632943, iteration: 51332
loss: 0.966809868812561,grad_norm: 0.999999894927192, iteration: 51333
loss: 1.0369141101837158,grad_norm: 0.9999990861986857, iteration: 51334
loss: 1.0812126398086548,grad_norm: 0.9999991564822714, iteration: 51335
loss: 0.9845820665359497,grad_norm: 0.9999990290736279, iteration: 51336
loss: 0.9907155632972717,grad_norm: 0.9999991215680943, iteration: 51337
loss: 1.023119330406189,grad_norm: 0.8860084506150716, iteration: 51338
loss: 1.0507934093475342,grad_norm: 0.999999538818261, iteration: 51339
loss: 1.062011957168579,grad_norm: 0.9999995034317176, iteration: 51340
loss: 1.0137180089950562,grad_norm: 0.9999993367146673, iteration: 51341
loss: 1.0858962535858154,grad_norm: 0.9999994821737745, iteration: 51342
loss: 0.9720981121063232,grad_norm: 0.8977950018475744, iteration: 51343
loss: 1.0294766426086426,grad_norm: 0.8285017208117769, iteration: 51344
loss: 1.062929630279541,grad_norm: 0.9999996970290487, iteration: 51345
loss: 0.9551731944084167,grad_norm: 0.9999991527436376, iteration: 51346
loss: 1.0132800340652466,grad_norm: 0.9999991249548668, iteration: 51347
loss: 1.0489619970321655,grad_norm: 0.9999991429617976, iteration: 51348
loss: 0.9784157872200012,grad_norm: 0.9999995443873565, iteration: 51349
loss: 1.0703850984573364,grad_norm: 0.9495168399816444, iteration: 51350
loss: 1.0321333408355713,grad_norm: 0.9384539059475046, iteration: 51351
loss: 1.0054080486297607,grad_norm: 0.9999991045544806, iteration: 51352
loss: 1.005895733833313,grad_norm: 0.9109271306475049, iteration: 51353
loss: 0.975324809551239,grad_norm: 0.9534371275304695, iteration: 51354
loss: 0.9745543003082275,grad_norm: 0.9999994294867675, iteration: 51355
loss: 1.015860915184021,grad_norm: 0.9900158887765401, iteration: 51356
loss: 0.9844178557395935,grad_norm: 0.8901517510715036, iteration: 51357
loss: 1.0291868448257446,grad_norm: 0.9999991972662438, iteration: 51358
loss: 0.9886847734451294,grad_norm: 0.9999990843206955, iteration: 51359
loss: 1.0174704790115356,grad_norm: 0.9999990728921889, iteration: 51360
loss: 1.0365043878555298,grad_norm: 0.9999991769848567, iteration: 51361
loss: 1.0797359943389893,grad_norm: 0.9999997518747926, iteration: 51362
loss: 1.0104848146438599,grad_norm: 0.923401493879873, iteration: 51363
loss: 1.0150748491287231,grad_norm: 0.9612236995238242, iteration: 51364
loss: 1.0017229318618774,grad_norm: 0.8875256565061663, iteration: 51365
loss: 0.9921740293502808,grad_norm: 0.86070221918085, iteration: 51366
loss: 0.9764484167098999,grad_norm: 0.8400800463032323, iteration: 51367
loss: 0.9698620438575745,grad_norm: 0.9849484835954461, iteration: 51368
loss: 1.005140781402588,grad_norm: 0.9147894513183632, iteration: 51369
loss: 0.9650383591651917,grad_norm: 0.8132313616962485, iteration: 51370
loss: 0.9893908500671387,grad_norm: 0.7838784691290095, iteration: 51371
loss: 1.021066427230835,grad_norm: 0.8437290497489446, iteration: 51372
loss: 1.0981014966964722,grad_norm: 0.9999992283721886, iteration: 51373
loss: 1.043861985206604,grad_norm: 0.9219606440421423, iteration: 51374
loss: 1.0376538038253784,grad_norm: 0.9999990766768057, iteration: 51375
loss: 1.038508653640747,grad_norm: 0.7824761783900998, iteration: 51376
loss: 1.007088541984558,grad_norm: 0.9999991400562729, iteration: 51377
loss: 0.94803786277771,grad_norm: 0.8970924077374314, iteration: 51378
loss: 0.9842851161956787,grad_norm: 0.9542361053532292, iteration: 51379
loss: 1.035939335823059,grad_norm: 0.9999998528107968, iteration: 51380
loss: 1.0229036808013916,grad_norm: 0.9999990478797263, iteration: 51381
loss: 1.0160387754440308,grad_norm: 0.8241056056886923, iteration: 51382
loss: 0.9944753646850586,grad_norm: 0.8935657409053227, iteration: 51383
loss: 1.0504151582717896,grad_norm: 0.994242929996722, iteration: 51384
loss: 1.0274559259414673,grad_norm: 0.9999990584146031, iteration: 51385
loss: 1.0146840810775757,grad_norm: 0.9999991084248158, iteration: 51386
loss: 0.9963551163673401,grad_norm: 0.9999992164062326, iteration: 51387
loss: 1.0128384828567505,grad_norm: 0.9999991936972369, iteration: 51388
loss: 0.9876305460929871,grad_norm: 0.9999993173671835, iteration: 51389
loss: 0.9636808633804321,grad_norm: 0.7910893500400675, iteration: 51390
loss: 1.020598292350769,grad_norm: 0.7946751368453503, iteration: 51391
loss: 0.9811416864395142,grad_norm: 0.8559646948371611, iteration: 51392
loss: 1.006913661956787,grad_norm: 0.916596985061223, iteration: 51393
loss: 1.007367491722107,grad_norm: 0.866681369916975, iteration: 51394
loss: 1.0221478939056396,grad_norm: 0.999999128886526, iteration: 51395
loss: 1.0205601453781128,grad_norm: 0.8709845240539005, iteration: 51396
loss: 0.9891858100891113,grad_norm: 0.9723978968674134, iteration: 51397
loss: 1.034537672996521,grad_norm: 0.8511833032155672, iteration: 51398
loss: 1.0502811670303345,grad_norm: 0.9999995679691338, iteration: 51399
loss: 0.9769831895828247,grad_norm: 0.9136092184625653, iteration: 51400
loss: 0.9819276928901672,grad_norm: 0.9999992049651885, iteration: 51401
loss: 1.0198224782943726,grad_norm: 0.9999997170548195, iteration: 51402
loss: 1.052428960800171,grad_norm: 0.999999141210237, iteration: 51403
loss: 1.04124915599823,grad_norm: 0.999999496138562, iteration: 51404
loss: 0.9729883670806885,grad_norm: 0.8691704832447186, iteration: 51405
loss: 0.9903802275657654,grad_norm: 0.9999990578128934, iteration: 51406
loss: 1.0290361642837524,grad_norm: 0.9999989875510701, iteration: 51407
loss: 0.9861432909965515,grad_norm: 0.8934113993121621, iteration: 51408
loss: 1.012384295463562,grad_norm: 0.9999992127534856, iteration: 51409
loss: 0.9995854496955872,grad_norm: 0.9693441923078502, iteration: 51410
loss: 1.0331400632858276,grad_norm: 0.9999994468679562, iteration: 51411
loss: 1.0215364694595337,grad_norm: 0.9642150510517942, iteration: 51412
loss: 0.9859079122543335,grad_norm: 0.9999992610251109, iteration: 51413
loss: 0.9532837271690369,grad_norm: 0.9264278648115705, iteration: 51414
loss: 0.9825363159179688,grad_norm: 0.9953970584181544, iteration: 51415
loss: 0.9866424202919006,grad_norm: 0.9999991546976447, iteration: 51416
loss: 0.9966520667076111,grad_norm: 0.9224127984660537, iteration: 51417
loss: 1.0177775621414185,grad_norm: 0.9999993100703674, iteration: 51418
loss: 1.0043747425079346,grad_norm: 0.8297231139377185, iteration: 51419
loss: 0.9762924909591675,grad_norm: 0.9190266481706005, iteration: 51420
loss: 0.9719433188438416,grad_norm: 0.9546914418843452, iteration: 51421
loss: 1.0137827396392822,grad_norm: 0.881460169169408, iteration: 51422
loss: 1.0505352020263672,grad_norm: 0.9117501234226044, iteration: 51423
loss: 1.0077368021011353,grad_norm: 0.9999991227137671, iteration: 51424
loss: 1.0108801126480103,grad_norm: 0.9999992663852769, iteration: 51425
loss: 1.0404466390609741,grad_norm: 0.9740432859171053, iteration: 51426
loss: 1.0166095495224,grad_norm: 0.9999993695749847, iteration: 51427
loss: 1.011317491531372,grad_norm: 0.9408356817220354, iteration: 51428
loss: 0.9884746670722961,grad_norm: 0.9335679227218752, iteration: 51429
loss: 1.0264993906021118,grad_norm: 0.9938973850904198, iteration: 51430
loss: 0.9930547475814819,grad_norm: 0.9434743955647088, iteration: 51431
loss: 1.0445449352264404,grad_norm: 0.999999378133803, iteration: 51432
loss: 1.0485377311706543,grad_norm: 0.9999996963976427, iteration: 51433
loss: 1.0586003065109253,grad_norm: 0.9999991696175151, iteration: 51434
loss: 1.030527114868164,grad_norm: 0.9999992484556286, iteration: 51435
loss: 1.0702722072601318,grad_norm: 0.9999999558420917, iteration: 51436
loss: 1.0020509958267212,grad_norm: 0.6888307251431764, iteration: 51437
loss: 0.9835125803947449,grad_norm: 0.8681611497497382, iteration: 51438
loss: 1.0189692974090576,grad_norm: 0.999999095252903, iteration: 51439
loss: 0.9954906105995178,grad_norm: 0.8093929712436428, iteration: 51440
loss: 0.9792722463607788,grad_norm: 0.9999990765942146, iteration: 51441
loss: 1.0104341506958008,grad_norm: 0.9999990470166744, iteration: 51442
loss: 0.9857537746429443,grad_norm: 0.9714713715786792, iteration: 51443
loss: 0.9635744690895081,grad_norm: 0.9672979371412366, iteration: 51444
loss: 0.9985269904136658,grad_norm: 0.8808482087630438, iteration: 51445
loss: 1.0035263299942017,grad_norm: 0.7285497399499736, iteration: 51446
loss: 0.9932341575622559,grad_norm: 0.8561545808688045, iteration: 51447
loss: 1.0243449211120605,grad_norm: 0.7826212484901354, iteration: 51448
loss: 1.004867434501648,grad_norm: 0.9999989582673612, iteration: 51449
loss: 0.9771521091461182,grad_norm: 0.8045455402318543, iteration: 51450
loss: 1.025209665298462,grad_norm: 0.9103040571460138, iteration: 51451
loss: 0.9970948696136475,grad_norm: 0.9780294264158705, iteration: 51452
loss: 0.9779390692710876,grad_norm: 0.9999989427883934, iteration: 51453
loss: 0.9912653565406799,grad_norm: 0.9999990752144107, iteration: 51454
loss: 1.0242701768875122,grad_norm: 0.9999991249924358, iteration: 51455
loss: 1.005663275718689,grad_norm: 0.9999990497476784, iteration: 51456
loss: 0.9840867519378662,grad_norm: 0.9034573331154531, iteration: 51457
loss: 1.0265780687332153,grad_norm: 0.9424601827349414, iteration: 51458
loss: 1.0364643335342407,grad_norm: 0.9342506586954893, iteration: 51459
loss: 0.9849676489830017,grad_norm: 0.9863958010991334, iteration: 51460
loss: 0.9925153851509094,grad_norm: 0.9999991709992353, iteration: 51461
loss: 0.9845119714736938,grad_norm: 0.9845233816232378, iteration: 51462
loss: 0.9975113272666931,grad_norm: 0.9277973879599548, iteration: 51463
loss: 0.9883935451507568,grad_norm: 0.9331232069888932, iteration: 51464
loss: 0.9976780414581299,grad_norm: 0.8901736103443082, iteration: 51465
loss: 1.0343189239501953,grad_norm: 0.999999376496472, iteration: 51466
loss: 0.9980566501617432,grad_norm: 0.9127792023386055, iteration: 51467
loss: 0.9683954119682312,grad_norm: 0.9999991914175604, iteration: 51468
loss: 1.047218918800354,grad_norm: 0.9999999159570178, iteration: 51469
loss: 1.0459965467453003,grad_norm: 0.9999989958524659, iteration: 51470
loss: 1.0429034233093262,grad_norm: 0.999999493082131, iteration: 51471
loss: 1.010032057762146,grad_norm: 0.9999999773863787, iteration: 51472
loss: 0.9696309566497803,grad_norm: 0.9999991051414974, iteration: 51473
loss: 1.0032846927642822,grad_norm: 0.9999990940764486, iteration: 51474
loss: 0.9577345848083496,grad_norm: 0.788970288450786, iteration: 51475
loss: 1.0256925821304321,grad_norm: 0.9687717871523477, iteration: 51476
loss: 0.9719727039337158,grad_norm: 0.903123702026978, iteration: 51477
loss: 1.016088843345642,grad_norm: 0.9999990734529649, iteration: 51478
loss: 1.0047390460968018,grad_norm: 0.9999990919521315, iteration: 51479
loss: 0.989083468914032,grad_norm: 0.6972802178276486, iteration: 51480
loss: 0.9715977907180786,grad_norm: 0.9999992088345202, iteration: 51481
loss: 0.9810730218887329,grad_norm: 0.9999990926533678, iteration: 51482
loss: 1.0447880029678345,grad_norm: 0.9999990960348613, iteration: 51483
loss: 0.9879533648490906,grad_norm: 0.9999990688080447, iteration: 51484
loss: 0.9866251945495605,grad_norm: 0.9999990477234735, iteration: 51485
loss: 1.0214682817459106,grad_norm: 0.8962672108176605, iteration: 51486
loss: 1.0247211456298828,grad_norm: 0.9999990548051578, iteration: 51487
loss: 0.9860241413116455,grad_norm: 0.9999990884454572, iteration: 51488
loss: 1.0127463340759277,grad_norm: 0.815464809336131, iteration: 51489
loss: 0.9733899831771851,grad_norm: 0.8448621540818916, iteration: 51490
loss: 1.0130831003189087,grad_norm: 0.9115322763090478, iteration: 51491
loss: 1.0018033981323242,grad_norm: 0.9702385009432746, iteration: 51492
loss: 1.0359035730361938,grad_norm: 0.9200050367191861, iteration: 51493
loss: 1.060150384902954,grad_norm: 0.9999992953652925, iteration: 51494
loss: 0.9810823202133179,grad_norm: 0.9296830434405704, iteration: 51495
loss: 1.182908535003662,grad_norm: 0.9999996907547654, iteration: 51496
loss: 1.00890052318573,grad_norm: 0.9456205337470488, iteration: 51497
loss: 1.0365639925003052,grad_norm: 0.8923526444185408, iteration: 51498
loss: 1.0802826881408691,grad_norm: 0.9999991444097261, iteration: 51499
loss: 1.0343661308288574,grad_norm: 0.8339729151253051, iteration: 51500
loss: 0.9740983247756958,grad_norm: 0.9999991760630108, iteration: 51501
loss: 1.0287102460861206,grad_norm: 0.9925884005413981, iteration: 51502
loss: 0.9888574481010437,grad_norm: 0.9893601690084629, iteration: 51503
loss: 1.0277599096298218,grad_norm: 0.941369520899522, iteration: 51504
loss: 1.0027779340744019,grad_norm: 0.9999990236247266, iteration: 51505
loss: 0.9904233813285828,grad_norm: 0.9999997556394823, iteration: 51506
loss: 1.0185831785202026,grad_norm: 0.7825812246305587, iteration: 51507
loss: 1.0058784484863281,grad_norm: 0.9999992443134965, iteration: 51508
loss: 0.9996598362922668,grad_norm: 0.9999992290090812, iteration: 51509
loss: 0.982100248336792,grad_norm: 0.9999989748165176, iteration: 51510
loss: 1.0062845945358276,grad_norm: 0.8743407012504574, iteration: 51511
loss: 0.9931915402412415,grad_norm: 0.9111932383305916, iteration: 51512
loss: 0.9687299132347107,grad_norm: 0.8107250191111992, iteration: 51513
loss: 0.9668397307395935,grad_norm: 0.9977384762468272, iteration: 51514
loss: 1.0078054666519165,grad_norm: 0.921593862451093, iteration: 51515
loss: 1.017317295074463,grad_norm: 0.836042130031516, iteration: 51516
loss: 1.00965416431427,grad_norm: 0.9999990303682782, iteration: 51517
loss: 1.00944983959198,grad_norm: 0.9287303014343367, iteration: 51518
loss: 1.0385401248931885,grad_norm: 0.9605789322141666, iteration: 51519
loss: 1.0373220443725586,grad_norm: 0.9485028911177316, iteration: 51520
loss: 1.0058069229125977,grad_norm: 0.9999991594579384, iteration: 51521
loss: 1.0114781856536865,grad_norm: 0.9999989691281125, iteration: 51522
loss: 1.0181608200073242,grad_norm: 0.9657644435081562, iteration: 51523
loss: 0.9964869618415833,grad_norm: 0.9999994016954669, iteration: 51524
loss: 1.0323944091796875,grad_norm: 0.9384471854695386, iteration: 51525
loss: 0.9917962551116943,grad_norm: 0.9999992236836436, iteration: 51526
loss: 0.9909438490867615,grad_norm: 0.8694675004004512, iteration: 51527
loss: 1.0370783805847168,grad_norm: 0.9190650311879193, iteration: 51528
loss: 1.0705384016036987,grad_norm: 0.9999996782187542, iteration: 51529
loss: 1.0335350036621094,grad_norm: 0.9999989469246051, iteration: 51530
loss: 0.9786267876625061,grad_norm: 0.9999996694166065, iteration: 51531
loss: 0.9974521398544312,grad_norm: 0.9243408145388731, iteration: 51532
loss: 0.9886941313743591,grad_norm: 0.8490204300984472, iteration: 51533
loss: 1.0207972526550293,grad_norm: 0.9608622098342475, iteration: 51534
loss: 0.9757224917411804,grad_norm: 0.9999991177557285, iteration: 51535
loss: 1.0029869079589844,grad_norm: 0.9957451138799683, iteration: 51536
loss: 0.9839234948158264,grad_norm: 0.7520246480879057, iteration: 51537
loss: 1.0039790868759155,grad_norm: 0.7851118986967675, iteration: 51538
loss: 1.006126880645752,grad_norm: 0.9707582768484533, iteration: 51539
loss: 1.0163030624389648,grad_norm: 0.8904556280856464, iteration: 51540
loss: 1.0072144269943237,grad_norm: 0.8521272174961165, iteration: 51541
loss: 1.0159778594970703,grad_norm: 0.9999992723270587, iteration: 51542
loss: 0.96108078956604,grad_norm: 0.9044818261408518, iteration: 51543
loss: 1.0320526361465454,grad_norm: 0.9999995955919594, iteration: 51544
loss: 0.9464471340179443,grad_norm: 0.8019617122269008, iteration: 51545
loss: 1.0503121614456177,grad_norm: 0.9963160604896357, iteration: 51546
loss: 1.0393809080123901,grad_norm: 0.9751962529503598, iteration: 51547
loss: 1.0179840326309204,grad_norm: 0.8936688763205077, iteration: 51548
loss: 1.0028948783874512,grad_norm: 0.9999990652218177, iteration: 51549
loss: 0.9905611276626587,grad_norm: 0.999999123707171, iteration: 51550
loss: 1.0262665748596191,grad_norm: 0.9999994998773379, iteration: 51551
loss: 1.0191773176193237,grad_norm: 0.9999990485157558, iteration: 51552
loss: 1.034173607826233,grad_norm: 0.8929031522887344, iteration: 51553
loss: 0.9937993884086609,grad_norm: 0.8925185422041326, iteration: 51554
loss: 1.041030764579773,grad_norm: 0.9999991433123472, iteration: 51555
loss: 0.973327100276947,grad_norm: 0.8130874535757842, iteration: 51556
loss: 1.0061949491500854,grad_norm: 0.9504265563717782, iteration: 51557
loss: 1.0225753784179688,grad_norm: 0.9999989826844664, iteration: 51558
loss: 0.9610785245895386,grad_norm: 0.9999990979512358, iteration: 51559
loss: 0.9621391892433167,grad_norm: 0.9999990806663891, iteration: 51560
loss: 0.9897732734680176,grad_norm: 0.9352250240943317, iteration: 51561
loss: 1.0087755918502808,grad_norm: 0.9999993596948744, iteration: 51562
loss: 1.0398598909378052,grad_norm: 0.9999991815388884, iteration: 51563
loss: 1.0240212678909302,grad_norm: 0.8355111021482503, iteration: 51564
loss: 0.9962351322174072,grad_norm: 0.8420264697511652, iteration: 51565
loss: 0.9493374228477478,grad_norm: 0.9987878329025509, iteration: 51566
loss: 1.0106191635131836,grad_norm: 0.999999165148139, iteration: 51567
loss: 0.9761907458305359,grad_norm: 0.9999992804513869, iteration: 51568
loss: 1.0235023498535156,grad_norm: 0.9911653843188807, iteration: 51569
loss: 1.0107512474060059,grad_norm: 0.9999994801830532, iteration: 51570
loss: 1.0591737031936646,grad_norm: 0.9999990620072572, iteration: 51571
loss: 0.9963371157646179,grad_norm: 0.8924083342546096, iteration: 51572
loss: 0.9843907356262207,grad_norm: 0.8037961715616653, iteration: 51573
loss: 1.0048329830169678,grad_norm: 0.9754922910112486, iteration: 51574
loss: 0.9893182516098022,grad_norm: 0.9187351822708038, iteration: 51575
loss: 1.0284385681152344,grad_norm: 0.9110135083020479, iteration: 51576
loss: 1.069490671157837,grad_norm: 0.9999995596595422, iteration: 51577
loss: 0.986151397228241,grad_norm: 0.9899364957428781, iteration: 51578
loss: 1.0677841901779175,grad_norm: 0.9999996078554589, iteration: 51579
loss: 0.9771654009819031,grad_norm: 0.9999992363359534, iteration: 51580
loss: 1.0109323263168335,grad_norm: 0.9083769008637077, iteration: 51581
loss: 1.0145726203918457,grad_norm: 0.9999991564133395, iteration: 51582
loss: 0.981325626373291,grad_norm: 0.8101036776874376, iteration: 51583
loss: 0.9897649884223938,grad_norm: 0.8517391095020184, iteration: 51584
loss: 0.9833304286003113,grad_norm: 0.9752146325719316, iteration: 51585
loss: 1.0117908716201782,grad_norm: 0.9999992685408512, iteration: 51586
loss: 1.04258131980896,grad_norm: 0.8595327640006247, iteration: 51587
loss: 0.983365535736084,grad_norm: 0.999999271591408, iteration: 51588
loss: 0.9981864094734192,grad_norm: 0.9999990399701071, iteration: 51589
loss: 1.013283371925354,grad_norm: 0.8128706890958834, iteration: 51590
loss: 1.023905873298645,grad_norm: 0.9999990878557484, iteration: 51591
loss: 0.9967149496078491,grad_norm: 0.9999991025673973, iteration: 51592
loss: 1.014196753501892,grad_norm: 0.9638567052662182, iteration: 51593
loss: 1.031895399093628,grad_norm: 0.9999993517164041, iteration: 51594
loss: 1.0066421031951904,grad_norm: 0.9999990991840738, iteration: 51595
loss: 0.9373915791511536,grad_norm: 0.8685335823561579, iteration: 51596
loss: 0.9860703945159912,grad_norm: 0.8427007956734169, iteration: 51597
loss: 0.9626094102859497,grad_norm: 0.9254303383759642, iteration: 51598
loss: 1.05580472946167,grad_norm: 0.9999994898843886, iteration: 51599
loss: 0.9906176924705505,grad_norm: 0.9543737967860687, iteration: 51600
loss: 0.9821576476097107,grad_norm: 0.9999995042657129, iteration: 51601
loss: 1.0113784074783325,grad_norm: 0.9999990571315288, iteration: 51602
loss: 0.9947906732559204,grad_norm: 0.9609847552412338, iteration: 51603
loss: 0.9926410913467407,grad_norm: 0.8117480749472787, iteration: 51604
loss: 1.034195065498352,grad_norm: 0.9999999394328648, iteration: 51605
loss: 1.0207399129867554,grad_norm: 0.8879810603344626, iteration: 51606
loss: 0.977904736995697,grad_norm: 0.9870806358358921, iteration: 51607
loss: 1.0220863819122314,grad_norm: 0.9687180206526632, iteration: 51608
loss: 1.0069228410720825,grad_norm: 0.9999990219141788, iteration: 51609
loss: 0.9843270778656006,grad_norm: 0.7393061109780256, iteration: 51610
loss: 1.0194735527038574,grad_norm: 0.9999990877989041, iteration: 51611
loss: 0.9937561750411987,grad_norm: 0.9999993092387992, iteration: 51612
loss: 0.9944999814033508,grad_norm: 0.7894927001553517, iteration: 51613
loss: 0.9899558424949646,grad_norm: 0.7730511236750687, iteration: 51614
loss: 0.995962381362915,grad_norm: 0.9722640497880619, iteration: 51615
loss: 1.0024627447128296,grad_norm: 0.9222849950671138, iteration: 51616
loss: 0.985378623008728,grad_norm: 0.9233741643339664, iteration: 51617
loss: 1.0981568098068237,grad_norm: 0.9999997435697535, iteration: 51618
loss: 0.9786933064460754,grad_norm: 0.9999991273471, iteration: 51619
loss: 1.0044909715652466,grad_norm: 0.9999991059158077, iteration: 51620
loss: 0.9581427574157715,grad_norm: 0.9999990030045972, iteration: 51621
loss: 0.9929124116897583,grad_norm: 0.9999991493563102, iteration: 51622
loss: 1.0782716274261475,grad_norm: 0.9999996234763987, iteration: 51623
loss: 1.017269492149353,grad_norm: 0.8896675494780443, iteration: 51624
loss: 1.0149122476577759,grad_norm: 0.9882156441534355, iteration: 51625
loss: 1.0218467712402344,grad_norm: 0.999999150016221, iteration: 51626
loss: 0.9818958640098572,grad_norm: 0.9999993807239956, iteration: 51627
loss: 1.0410493612289429,grad_norm: 0.9999991156696452, iteration: 51628
loss: 0.9861926436424255,grad_norm: 0.9999990277259039, iteration: 51629
loss: 0.9762607216835022,grad_norm: 0.9906796414305602, iteration: 51630
loss: 1.0385254621505737,grad_norm: 0.9024376745190373, iteration: 51631
loss: 0.9831947684288025,grad_norm: 0.9109257338657518, iteration: 51632
loss: 1.03799307346344,grad_norm: 0.9999990350647776, iteration: 51633
loss: 0.9837106466293335,grad_norm: 0.9829719347405454, iteration: 51634
loss: 1.0157108306884766,grad_norm: 0.7889045145356834, iteration: 51635
loss: 0.986766517162323,grad_norm: 0.9999989682249272, iteration: 51636
loss: 1.024871587753296,grad_norm: 0.9077094566837786, iteration: 51637
loss: 0.9973976612091064,grad_norm: 0.963964745806215, iteration: 51638
loss: 0.9565863609313965,grad_norm: 0.9999990750138624, iteration: 51639
loss: 1.0154627561569214,grad_norm: 0.8661346243895884, iteration: 51640
loss: 0.9880951046943665,grad_norm: 0.9457214785369051, iteration: 51641
loss: 0.9781818389892578,grad_norm: 0.9364268313995486, iteration: 51642
loss: 1.0559698343276978,grad_norm: 0.9999994846918305, iteration: 51643
loss: 1.0072181224822998,grad_norm: 0.9173731098861054, iteration: 51644
loss: 1.0758848190307617,grad_norm: 0.9999997838943181, iteration: 51645
loss: 0.9981971383094788,grad_norm: 0.9999990605324112, iteration: 51646
loss: 1.0160658359527588,grad_norm: 0.9177986943106873, iteration: 51647
loss: 1.023817777633667,grad_norm: 0.9846887541238116, iteration: 51648
loss: 1.0060867071151733,grad_norm: 0.8207120563841567, iteration: 51649
loss: 0.9981968998908997,grad_norm: 0.9999992374441422, iteration: 51650
loss: 1.0189157724380493,grad_norm: 0.9999991828245789, iteration: 51651
loss: 1.037086009979248,grad_norm: 0.999999168673584, iteration: 51652
loss: 1.0041539669036865,grad_norm: 0.8402359315879308, iteration: 51653
loss: 1.0266839265823364,grad_norm: 0.8786717893764036, iteration: 51654
loss: 0.983877956867218,grad_norm: 0.9999998117138992, iteration: 51655
loss: 1.0134093761444092,grad_norm: 0.8954678705582145, iteration: 51656
loss: 0.9906865358352661,grad_norm: 0.8110954313961188, iteration: 51657
loss: 1.0468860864639282,grad_norm: 0.9269205763548117, iteration: 51658
loss: 1.0153210163116455,grad_norm: 0.9999992327466444, iteration: 51659
loss: 1.0097389221191406,grad_norm: 0.9114969271412402, iteration: 51660
loss: 1.023720622062683,grad_norm: 0.9999989692666931, iteration: 51661
loss: 1.034252405166626,grad_norm: 0.9999992289174215, iteration: 51662
loss: 0.9894782900810242,grad_norm: 0.7964057096409908, iteration: 51663
loss: 0.985572874546051,grad_norm: 0.9999989683813713, iteration: 51664
loss: 1.030439853668213,grad_norm: 0.8403081965040683, iteration: 51665
loss: 1.0205179452896118,grad_norm: 0.9999990897370757, iteration: 51666
loss: 0.9935814738273621,grad_norm: 0.9999991758985327, iteration: 51667
loss: 1.0409510135650635,grad_norm: 0.9999996523564253, iteration: 51668
loss: 1.0049083232879639,grad_norm: 0.9061297993805912, iteration: 51669
loss: 0.9935439825057983,grad_norm: 0.7539518321753055, iteration: 51670
loss: 1.0079237222671509,grad_norm: 0.9679607055017282, iteration: 51671
loss: 1.0112066268920898,grad_norm: 0.8035655070799997, iteration: 51672
loss: 0.9803543090820312,grad_norm: 0.9020148020063286, iteration: 51673
loss: 1.0310858488082886,grad_norm: 0.9364437555241442, iteration: 51674
loss: 1.0129116773605347,grad_norm: 0.999999638118425, iteration: 51675
loss: 1.007850170135498,grad_norm: 0.9999990460627853, iteration: 51676
loss: 1.0627118349075317,grad_norm: 0.9999996645940376, iteration: 51677
loss: 0.9920855164527893,grad_norm: 0.9896168778885797, iteration: 51678
loss: 1.0426782369613647,grad_norm: 0.9785075041136105, iteration: 51679
loss: 1.041393756866455,grad_norm: 0.8537146833244281, iteration: 51680
loss: 1.0020313262939453,grad_norm: 0.8955849326446591, iteration: 51681
loss: 1.0069259405136108,grad_norm: 0.9999994178144606, iteration: 51682
loss: 1.0313392877578735,grad_norm: 0.9853586965932299, iteration: 51683
loss: 1.0142936706542969,grad_norm: 0.9999990426786551, iteration: 51684
loss: 1.035451889038086,grad_norm: 0.916043514663054, iteration: 51685
loss: 1.0656096935272217,grad_norm: 0.9999992795918687, iteration: 51686
loss: 1.002852201461792,grad_norm: 0.911222908705401, iteration: 51687
loss: 1.0163880586624146,grad_norm: 0.8639375523809757, iteration: 51688
loss: 0.9853806495666504,grad_norm: 0.9999991231618103, iteration: 51689
loss: 1.028054118156433,grad_norm: 0.9799444022962531, iteration: 51690
loss: 1.027551531791687,grad_norm: 0.9999993123986983, iteration: 51691
loss: 1.0203845500946045,grad_norm: 0.873072414320641, iteration: 51692
loss: 0.9891625642776489,grad_norm: 0.999999065993293, iteration: 51693
loss: 0.9926111102104187,grad_norm: 0.9999990501421242, iteration: 51694
loss: 1.0132898092269897,grad_norm: 0.9999994451370637, iteration: 51695
loss: 0.9908289313316345,grad_norm: 0.8550329617367499, iteration: 51696
loss: 0.9627501964569092,grad_norm: 0.9490910330616644, iteration: 51697
loss: 0.9986718893051147,grad_norm: 0.9169460167740691, iteration: 51698
loss: 1.0395808219909668,grad_norm: 0.9999990058642191, iteration: 51699
loss: 1.0360077619552612,grad_norm: 0.9999992291093592, iteration: 51700
loss: 0.9993025064468384,grad_norm: 0.9039197248955814, iteration: 51701
loss: 0.9854720830917358,grad_norm: 0.9999999187007722, iteration: 51702
loss: 0.9930816292762756,grad_norm: 0.999999118112703, iteration: 51703
loss: 1.0185431241989136,grad_norm: 0.7590252519383696, iteration: 51704
loss: 1.0181195735931396,grad_norm: 0.9999989530428653, iteration: 51705
loss: 1.005665898323059,grad_norm: 0.8588469494260951, iteration: 51706
loss: 0.986316978931427,grad_norm: 0.9999992438597537, iteration: 51707
loss: 0.9773548245429993,grad_norm: 0.9999991166192773, iteration: 51708
loss: 1.0213937759399414,grad_norm: 0.9999990351131174, iteration: 51709
loss: 0.9929508566856384,grad_norm: 0.9901871454844758, iteration: 51710
loss: 0.9924615025520325,grad_norm: 0.9999991984300515, iteration: 51711
loss: 1.0816965103149414,grad_norm: 0.9999993591662405, iteration: 51712
loss: 1.0945582389831543,grad_norm: 0.9999994611255334, iteration: 51713
loss: 1.0567314624786377,grad_norm: 0.8620608100141156, iteration: 51714
loss: 0.9707892537117004,grad_norm: 0.8569485042426138, iteration: 51715
loss: 1.003247857093811,grad_norm: 0.9588338286461069, iteration: 51716
loss: 1.0310122966766357,grad_norm: 0.8936447503376109, iteration: 51717
loss: 1.0119343996047974,grad_norm: 0.9999993866211822, iteration: 51718
loss: 0.9943640232086182,grad_norm: 0.999999141821788, iteration: 51719
loss: 0.9833520650863647,grad_norm: 0.7402949313195555, iteration: 51720
loss: 0.9692795276641846,grad_norm: 0.9229283607230546, iteration: 51721
loss: 1.0136221647262573,grad_norm: 0.8717556313376928, iteration: 51722
loss: 1.0081472396850586,grad_norm: 0.948267422525899, iteration: 51723
loss: 1.0493289232254028,grad_norm: 0.9999991057078396, iteration: 51724
loss: 1.0203006267547607,grad_norm: 0.9383490467468045, iteration: 51725
loss: 1.0404103994369507,grad_norm: 0.9999998402677955, iteration: 51726
loss: 1.008322834968567,grad_norm: 0.8937124965455668, iteration: 51727
loss: 1.0283198356628418,grad_norm: 0.9977372917992948, iteration: 51728
loss: 1.0332841873168945,grad_norm: 0.9040110840301885, iteration: 51729
loss: 1.0266380310058594,grad_norm: 0.9293599118418402, iteration: 51730
loss: 0.977493405342102,grad_norm: 0.9822352919924983, iteration: 51731
loss: 0.9908636808395386,grad_norm: 0.8977626279313118, iteration: 51732
loss: 1.024556279182434,grad_norm: 0.8877232461671027, iteration: 51733
loss: 0.9825523495674133,grad_norm: 0.756006809220217, iteration: 51734
loss: 1.016172170639038,grad_norm: 0.9999991389023786, iteration: 51735
loss: 1.0211853981018066,grad_norm: 0.9999992598138538, iteration: 51736
loss: 1.0246553421020508,grad_norm: 0.9999995614401206, iteration: 51737
loss: 1.007691502571106,grad_norm: 0.9999996089558983, iteration: 51738
loss: 1.0207338333129883,grad_norm: 0.9999995034349577, iteration: 51739
loss: 0.9830479621887207,grad_norm: 0.9999992372853088, iteration: 51740
loss: 0.9958848357200623,grad_norm: 0.8044179958325617, iteration: 51741
loss: 1.0126622915267944,grad_norm: 0.9324979350365478, iteration: 51742
loss: 0.9933725595474243,grad_norm: 0.9800982698414741, iteration: 51743
loss: 0.9982984662055969,grad_norm: 0.9309647203221957, iteration: 51744
loss: 1.0291467905044556,grad_norm: 0.8113542535182737, iteration: 51745
loss: 1.030009150505066,grad_norm: 0.9317761334664049, iteration: 51746
loss: 0.9892012476921082,grad_norm: 0.9444335138429365, iteration: 51747
loss: 0.9868828058242798,grad_norm: 0.9999994589218133, iteration: 51748
loss: 1.0090299844741821,grad_norm: 0.9892553283810459, iteration: 51749
loss: 1.0319973230361938,grad_norm: 0.9999991489523834, iteration: 51750
loss: 1.0027978420257568,grad_norm: 0.9450268035160143, iteration: 51751
loss: 1.0185247659683228,grad_norm: 0.7961269837410511, iteration: 51752
loss: 0.985083818435669,grad_norm: 0.9999994316394178, iteration: 51753
loss: 0.9795746803283691,grad_norm: 0.9999995356147328, iteration: 51754
loss: 1.0078710317611694,grad_norm: 0.8416206885524607, iteration: 51755
loss: 0.9521017074584961,grad_norm: 0.9165746258178556, iteration: 51756
loss: 0.9899678230285645,grad_norm: 0.8053623832342014, iteration: 51757
loss: 0.993412435054779,grad_norm: 0.9118387183578747, iteration: 51758
loss: 0.9580156803131104,grad_norm: 0.9508598311147559, iteration: 51759
loss: 1.0024806261062622,grad_norm: 0.8660221598459429, iteration: 51760
loss: 0.9841869473457336,grad_norm: 0.9999991029334104, iteration: 51761
loss: 0.9714560508728027,grad_norm: 0.9999992371403651, iteration: 51762
loss: 0.9949146509170532,grad_norm: 0.999999195283262, iteration: 51763
loss: 1.0122798681259155,grad_norm: 0.99999926535925, iteration: 51764
loss: 0.9898721575737,grad_norm: 0.7720978009362552, iteration: 51765
loss: 1.003387689590454,grad_norm: 0.9847886330139957, iteration: 51766
loss: 1.0031559467315674,grad_norm: 0.9517086933883275, iteration: 51767
loss: 1.011095404624939,grad_norm: 0.7878138916858339, iteration: 51768
loss: 0.9949746131896973,grad_norm: 0.8271126228030301, iteration: 51769
loss: 1.0328643321990967,grad_norm: 0.8036207682827704, iteration: 51770
loss: 0.9607961773872375,grad_norm: 0.9504061866552117, iteration: 51771
loss: 1.0424023866653442,grad_norm: 0.8671192597714634, iteration: 51772
loss: 1.0235586166381836,grad_norm: 0.9999989600563138, iteration: 51773
loss: 1.0072808265686035,grad_norm: 0.8799177071593899, iteration: 51774
loss: 0.9840531945228577,grad_norm: 0.9999991017247711, iteration: 51775
loss: 0.9907512664794922,grad_norm: 0.8784962309834257, iteration: 51776
loss: 1.0234774351119995,grad_norm: 0.9999998246165726, iteration: 51777
loss: 1.0795282125473022,grad_norm: 0.9999990638225841, iteration: 51778
loss: 1.042855978012085,grad_norm: 0.9999996737242103, iteration: 51779
loss: 1.0017813444137573,grad_norm: 0.8939862818506319, iteration: 51780
loss: 0.9774723052978516,grad_norm: 0.9678955268621696, iteration: 51781
loss: 1.0170329809188843,grad_norm: 0.9999993714330984, iteration: 51782
loss: 1.0158898830413818,grad_norm: 0.9999992012099614, iteration: 51783
loss: 0.9784491062164307,grad_norm: 0.9999990213783257, iteration: 51784
loss: 0.9920725226402283,grad_norm: 0.928783650697317, iteration: 51785
loss: 1.0132120847702026,grad_norm: 0.9999993995248105, iteration: 51786
loss: 1.0264599323272705,grad_norm: 0.9999991702241996, iteration: 51787
loss: 1.044669508934021,grad_norm: 0.9999994979368376, iteration: 51788
loss: 1.0357462167739868,grad_norm: 0.9999990337195684, iteration: 51789
loss: 0.9980583786964417,grad_norm: 0.8155064555415443, iteration: 51790
loss: 0.9786701798439026,grad_norm: 0.8315638913207601, iteration: 51791
loss: 1.031962275505066,grad_norm: 0.9890998538522822, iteration: 51792
loss: 1.0082958936691284,grad_norm: 0.9999992235815517, iteration: 51793
loss: 1.0087672472000122,grad_norm: 0.9194195016673958, iteration: 51794
loss: 0.999136745929718,grad_norm: 0.9043639719306181, iteration: 51795
loss: 0.992682695388794,grad_norm: 0.99999899386183, iteration: 51796
loss: 1.006635069847107,grad_norm: 0.9999992029869752, iteration: 51797
loss: 0.9935650825500488,grad_norm: 0.9999991535275834, iteration: 51798
loss: 1.0235892534255981,grad_norm: 0.9999990597375984, iteration: 51799
loss: 0.9849857687950134,grad_norm: 0.999998973011734, iteration: 51800
loss: 1.0221375226974487,grad_norm: 0.9822857802959474, iteration: 51801
loss: 0.9971879720687866,grad_norm: 0.9075795088463261, iteration: 51802
loss: 1.0150467157363892,grad_norm: 0.9099842849281932, iteration: 51803
loss: 1.0022730827331543,grad_norm: 0.8795715603436578, iteration: 51804
loss: 0.9331434369087219,grad_norm: 0.9999991224011018, iteration: 51805
loss: 1.063218355178833,grad_norm: 0.9999997997228242, iteration: 51806
loss: 1.00271737575531,grad_norm: 0.9247837889507401, iteration: 51807
loss: 0.9956446886062622,grad_norm: 0.836264823590136, iteration: 51808
loss: 0.9824063181877136,grad_norm: 0.9693500746463888, iteration: 51809
loss: 0.9909462332725525,grad_norm: 0.8876606555773583, iteration: 51810
loss: 0.990936279296875,grad_norm: 0.9999989631244391, iteration: 51811
loss: 1.0015970468521118,grad_norm: 0.8604882178003972, iteration: 51812
loss: 1.0365574359893799,grad_norm: 0.9108071210580456, iteration: 51813
loss: 1.0208755731582642,grad_norm: 0.9095928403161898, iteration: 51814
loss: 1.0243923664093018,grad_norm: 0.9572358322366953, iteration: 51815
loss: 0.9852484464645386,grad_norm: 0.9825195311078445, iteration: 51816
loss: 0.985663890838623,grad_norm: 0.82877014700346, iteration: 51817
loss: 0.9741228818893433,grad_norm: 0.9748159047747796, iteration: 51818
loss: 1.008684754371643,grad_norm: 0.9461086860193547, iteration: 51819
loss: 1.0018370151519775,grad_norm: 0.9737300411407668, iteration: 51820
loss: 0.9735565185546875,grad_norm: 0.9387749393625913, iteration: 51821
loss: 0.9806218147277832,grad_norm: 0.9999991989008667, iteration: 51822
loss: 0.9824365973472595,grad_norm: 0.9999993630287299, iteration: 51823
loss: 1.0423568487167358,grad_norm: 0.8538118514385598, iteration: 51824
loss: 1.0030406713485718,grad_norm: 0.9999990536169832, iteration: 51825
loss: 0.9837296605110168,grad_norm: 0.9999994205176742, iteration: 51826
loss: 0.9941592812538147,grad_norm: 0.9195873954150005, iteration: 51827
loss: 1.0047481060028076,grad_norm: 0.9292717183826225, iteration: 51828
loss: 1.0025838613510132,grad_norm: 0.8971499859432177, iteration: 51829
loss: 1.0311003923416138,grad_norm: 0.9999995390724448, iteration: 51830
loss: 1.0061644315719604,grad_norm: 0.8264363817284671, iteration: 51831
loss: 0.9820730090141296,grad_norm: 0.9464429463826287, iteration: 51832
loss: 0.9741008877754211,grad_norm: 0.9999990972208067, iteration: 51833
loss: 1.0117359161376953,grad_norm: 0.9999992271280331, iteration: 51834
loss: 0.9781495928764343,grad_norm: 0.9999991243277956, iteration: 51835
loss: 1.0388447046279907,grad_norm: 0.9999991645384071, iteration: 51836
loss: 0.9464024305343628,grad_norm: 0.9999991372560529, iteration: 51837
loss: 1.0219216346740723,grad_norm: 0.9999989970267074, iteration: 51838
loss: 0.994899570941925,grad_norm: 0.8141009926146816, iteration: 51839
loss: 0.9692818522453308,grad_norm: 0.8076732029046735, iteration: 51840
loss: 0.988427460193634,grad_norm: 0.8974598461655066, iteration: 51841
loss: 1.0045486688613892,grad_norm: 0.8684617965382895, iteration: 51842
loss: 1.0134785175323486,grad_norm: 0.8133661174672232, iteration: 51843
loss: 0.9793912172317505,grad_norm: 0.8656724665325151, iteration: 51844
loss: 1.007615566253662,grad_norm: 0.9994808704040411, iteration: 51845
loss: 1.0182445049285889,grad_norm: 0.8423310700999787, iteration: 51846
loss: 0.9988821744918823,grad_norm: 0.9999990754688906, iteration: 51847
loss: 0.9871129989624023,grad_norm: 0.9999991051716424, iteration: 51848
loss: 1.0003879070281982,grad_norm: 0.9999995334116228, iteration: 51849
loss: 1.0084031820297241,grad_norm: 0.8584710857660462, iteration: 51850
loss: 0.9909579753875732,grad_norm: 0.9999990912098621, iteration: 51851
loss: 1.0317559242248535,grad_norm: 0.9999992753888872, iteration: 51852
loss: 1.011412262916565,grad_norm: 0.999999136932331, iteration: 51853
loss: 0.9867454767227173,grad_norm: 0.9999990771407591, iteration: 51854
loss: 1.0128164291381836,grad_norm: 0.9449816666504854, iteration: 51855
loss: 1.0204623937606812,grad_norm: 0.9963789655934873, iteration: 51856
loss: 0.9738011956214905,grad_norm: 0.8686472983080903, iteration: 51857
loss: 0.9973113536834717,grad_norm: 0.9954401489027705, iteration: 51858
loss: 1.0269074440002441,grad_norm: 0.8893848264167891, iteration: 51859
loss: 0.9522437453269958,grad_norm: 0.8604524640665604, iteration: 51860
loss: 0.9806146025657654,grad_norm: 0.9927918825255212, iteration: 51861
loss: 0.9894677996635437,grad_norm: 0.9999991582355631, iteration: 51862
loss: 0.992127537727356,grad_norm: 0.929219019458793, iteration: 51863
loss: 1.0902619361877441,grad_norm: 0.9999991903672977, iteration: 51864
loss: 0.9628551006317139,grad_norm: 0.9999989942307683, iteration: 51865
loss: 0.9361583590507507,grad_norm: 0.8592832796515985, iteration: 51866
loss: 1.0906988382339478,grad_norm: 0.8260787659639908, iteration: 51867
loss: 1.0041158199310303,grad_norm: 0.950197216558504, iteration: 51868
loss: 0.9847045540809631,grad_norm: 0.8838614336640811, iteration: 51869
loss: 1.0071691274642944,grad_norm: 0.8918781318729618, iteration: 51870
loss: 0.9483178853988647,grad_norm: 0.9088444630952043, iteration: 51871
loss: 0.9873846173286438,grad_norm: 0.9360647397791861, iteration: 51872
loss: 1.0082945823669434,grad_norm: 0.9142777899837533, iteration: 51873
loss: 0.9883845448493958,grad_norm: 0.9504918186750212, iteration: 51874
loss: 1.0296229124069214,grad_norm: 0.9999991242587322, iteration: 51875
loss: 1.0353386402130127,grad_norm: 0.9886116136884767, iteration: 51876
loss: 0.9815847873687744,grad_norm: 0.9999991252456035, iteration: 51877
loss: 0.999950110912323,grad_norm: 0.9999993912607864, iteration: 51878
loss: 0.9863962531089783,grad_norm: 0.9139302833665344, iteration: 51879
loss: 1.0112601518630981,grad_norm: 0.9485033093959221, iteration: 51880
loss: 1.0204782485961914,grad_norm: 0.9999995517406594, iteration: 51881
loss: 0.9803370237350464,grad_norm: 0.9999991217866607, iteration: 51882
loss: 0.9849419593811035,grad_norm: 0.9008491800065775, iteration: 51883
loss: 1.0195987224578857,grad_norm: 0.9359882411797584, iteration: 51884
loss: 0.9761934280395508,grad_norm: 0.8655205841549154, iteration: 51885
loss: 1.0481846332550049,grad_norm: 0.9999991667097327, iteration: 51886
loss: 0.9784456491470337,grad_norm: 0.9999993221358034, iteration: 51887
loss: 1.0278267860412598,grad_norm: 0.8501700921202963, iteration: 51888
loss: 0.9884516000747681,grad_norm: 0.9891930421605211, iteration: 51889
loss: 0.9708871841430664,grad_norm: 0.9744258118491643, iteration: 51890
loss: 0.9758146405220032,grad_norm: 0.9999989991941578, iteration: 51891
loss: 1.0126640796661377,grad_norm: 0.9249837047382579, iteration: 51892
loss: 1.055293083190918,grad_norm: 0.9999996195750555, iteration: 51893
loss: 0.9658201336860657,grad_norm: 0.7601310329429398, iteration: 51894
loss: 0.9770577549934387,grad_norm: 0.9999994266973843, iteration: 51895
loss: 0.9716635942459106,grad_norm: 0.9999990083408361, iteration: 51896
loss: 1.0133540630340576,grad_norm: 0.9999989789490338, iteration: 51897
loss: 1.0029542446136475,grad_norm: 0.9187983095125571, iteration: 51898
loss: 0.9895285964012146,grad_norm: 0.9999989864021275, iteration: 51899
loss: 1.0024685859680176,grad_norm: 0.9999990669007355, iteration: 51900
loss: 0.9832643270492554,grad_norm: 0.9999990894981671, iteration: 51901
loss: 0.9705729484558105,grad_norm: 0.8470571035649155, iteration: 51902
loss: 0.9953321814537048,grad_norm: 0.9999992026822705, iteration: 51903
loss: 0.9973648190498352,grad_norm: 0.9999992538749957, iteration: 51904
loss: 1.0114500522613525,grad_norm: 0.9999992845932113, iteration: 51905
loss: 1.0366711616516113,grad_norm: 0.9696474652346806, iteration: 51906
loss: 1.019247055053711,grad_norm: 0.9887718031953832, iteration: 51907
loss: 1.0371196269989014,grad_norm: 0.8571164772664492, iteration: 51908
loss: 1.032347559928894,grad_norm: 0.9067920206080614, iteration: 51909
loss: 0.9489218592643738,grad_norm: 0.9999991167630881, iteration: 51910
loss: 1.0581116676330566,grad_norm: 0.9999992629657003, iteration: 51911
loss: 0.9780606627464294,grad_norm: 0.9732265405051477, iteration: 51912
loss: 1.0093467235565186,grad_norm: 0.8355446981787892, iteration: 51913
loss: 0.9897662401199341,grad_norm: 0.9999991246845202, iteration: 51914
loss: 0.9631292819976807,grad_norm: 0.9041917702991223, iteration: 51915
loss: 1.0243853330612183,grad_norm: 0.9999990118503372, iteration: 51916
loss: 1.0053443908691406,grad_norm: 0.9917048010137098, iteration: 51917
loss: 1.0137513875961304,grad_norm: 0.8697264957136313, iteration: 51918
loss: 1.0027498006820679,grad_norm: 0.9582068817347855, iteration: 51919
loss: 0.9767428040504456,grad_norm: 0.9999993065741013, iteration: 51920
loss: 0.9964064955711365,grad_norm: 0.8052691019637637, iteration: 51921
loss: 0.9774916172027588,grad_norm: 0.9475134086510502, iteration: 51922
loss: 0.9886791706085205,grad_norm: 0.9287031330378038, iteration: 51923
loss: 1.0047674179077148,grad_norm: 0.9232343216119339, iteration: 51924
loss: 0.976609468460083,grad_norm: 0.969625661477142, iteration: 51925
loss: 1.035933017730713,grad_norm: 0.9458165599056209, iteration: 51926
loss: 1.0109981298446655,grad_norm: 0.8458311591582119, iteration: 51927
loss: 1.0785869359970093,grad_norm: 0.999999269765187, iteration: 51928
loss: 0.9751673936843872,grad_norm: 0.9999991774537967, iteration: 51929
loss: 0.9788873791694641,grad_norm: 0.8579313470223718, iteration: 51930
loss: 1.0003387928009033,grad_norm: 0.8156540783247136, iteration: 51931
loss: 0.9835363030433655,grad_norm: 0.8319221618554106, iteration: 51932
loss: 1.0231397151947021,grad_norm: 0.7574255402438387, iteration: 51933
loss: 0.971441924571991,grad_norm: 0.9743280860108815, iteration: 51934
loss: 0.9809773564338684,grad_norm: 0.8072826241427102, iteration: 51935
loss: 0.9828206300735474,grad_norm: 0.9880955577867734, iteration: 51936
loss: 1.0023918151855469,grad_norm: 0.999998992070701, iteration: 51937
loss: 1.005568265914917,grad_norm: 0.9108990997564396, iteration: 51938
loss: 1.017282485961914,grad_norm: 0.8333267877322684, iteration: 51939
loss: 0.9879558682441711,grad_norm: 0.7098185613380052, iteration: 51940
loss: 1.0645860433578491,grad_norm: 0.9999997609736955, iteration: 51941
loss: 1.001083254814148,grad_norm: 0.9052933691283066, iteration: 51942
loss: 0.9571917057037354,grad_norm: 0.9562651608974064, iteration: 51943
loss: 1.0106061697006226,grad_norm: 0.9999993456229564, iteration: 51944
loss: 1.0059254169464111,grad_norm: 0.8554263759475969, iteration: 51945
loss: 1.0651196241378784,grad_norm: 0.9999992781384653, iteration: 51946
loss: 0.9876696467399597,grad_norm: 0.9999992353744604, iteration: 51947
loss: 1.0124491453170776,grad_norm: 0.9999990436948073, iteration: 51948
loss: 0.9754380583763123,grad_norm: 0.8913936446584234, iteration: 51949
loss: 0.9896298050880432,grad_norm: 0.9999990100819162, iteration: 51950
loss: 0.9807831048965454,grad_norm: 0.8028522937067988, iteration: 51951
loss: 0.9855766892433167,grad_norm: 0.940644799412963, iteration: 51952
loss: 1.0138086080551147,grad_norm: 0.8361697026371115, iteration: 51953
loss: 1.0484566688537598,grad_norm: 0.9999991524428955, iteration: 51954
loss: 0.9775038361549377,grad_norm: 0.9999993072202424, iteration: 51955
loss: 1.0099899768829346,grad_norm: 0.7662058273707003, iteration: 51956
loss: 0.9832868576049805,grad_norm: 0.9999991137226186, iteration: 51957
loss: 1.0281741619110107,grad_norm: 0.9999992590376235, iteration: 51958
loss: 1.0231937170028687,grad_norm: 0.9999990613353367, iteration: 51959
loss: 1.0308444499969482,grad_norm: 0.834330037233716, iteration: 51960
loss: 1.007508397102356,grad_norm: 0.8676930060150049, iteration: 51961
loss: 0.9879751801490784,grad_norm: 0.9999990775615156, iteration: 51962
loss: 1.0109961032867432,grad_norm: 0.8280156017331681, iteration: 51963
loss: 1.0233594179153442,grad_norm: 0.9999992017625677, iteration: 51964
loss: 0.9352456331253052,grad_norm: 0.9882235816065202, iteration: 51965
loss: 1.0111753940582275,grad_norm: 0.8735979049165078, iteration: 51966
loss: 1.0154142379760742,grad_norm: 0.9418876538502452, iteration: 51967
loss: 0.9913343191146851,grad_norm: 0.8188752008895136, iteration: 51968
loss: 1.0050714015960693,grad_norm: 0.9999989744530122, iteration: 51969
loss: 0.9945085048675537,grad_norm: 0.8576971205727153, iteration: 51970
loss: 1.0151945352554321,grad_norm: 0.9989062715971597, iteration: 51971
loss: 0.9831359386444092,grad_norm: 0.9999991965487937, iteration: 51972
loss: 1.001357913017273,grad_norm: 0.999999084688815, iteration: 51973
loss: 0.9795717597007751,grad_norm: 0.9678879481245678, iteration: 51974
loss: 0.9854745268821716,grad_norm: 0.931672124470183, iteration: 51975
loss: 1.0241025686264038,grad_norm: 0.9999991180019278, iteration: 51976
loss: 0.9807676076889038,grad_norm: 0.8860805549299731, iteration: 51977
loss: 0.9970868229866028,grad_norm: 0.946214899164038, iteration: 51978
loss: 0.9684848189353943,grad_norm: 0.8595182937045042, iteration: 51979
loss: 0.9989722967147827,grad_norm: 0.8862684267699462, iteration: 51980
loss: 1.0154523849487305,grad_norm: 0.9979926113278385, iteration: 51981
loss: 1.0275143384933472,grad_norm: 0.9999992263530564, iteration: 51982
loss: 1.0299594402313232,grad_norm: 0.9948753818269582, iteration: 51983
loss: 0.986924946308136,grad_norm: 0.8372063612897513, iteration: 51984
loss: 1.0195428133010864,grad_norm: 0.846736905950699, iteration: 51985
loss: 1.006534457206726,grad_norm: 0.999999411537528, iteration: 51986
loss: 1.0475679636001587,grad_norm: 0.999999465238153, iteration: 51987
loss: 1.011902928352356,grad_norm: 0.8401180048814326, iteration: 51988
loss: 1.008564829826355,grad_norm: 0.8514166945134597, iteration: 51989
loss: 1.0363796949386597,grad_norm: 0.8048056308480489, iteration: 51990
loss: 1.0136855840682983,grad_norm: 0.9820424852559487, iteration: 51991
loss: 1.0434008836746216,grad_norm: 0.8887910338998222, iteration: 51992
loss: 0.9913917183876038,grad_norm: 0.786278510457725, iteration: 51993
loss: 1.0131018161773682,grad_norm: 0.9999991632076679, iteration: 51994
loss: 0.9822695255279541,grad_norm: 0.9974865568003974, iteration: 51995
loss: 1.0067172050476074,grad_norm: 0.9999989506609133, iteration: 51996
loss: 0.9771336317062378,grad_norm: 0.9060500615074666, iteration: 51997
loss: 0.9994674324989319,grad_norm: 0.9057809704789712, iteration: 51998
loss: 1.0135444402694702,grad_norm: 0.8656368338987835, iteration: 51999
loss: 1.0262832641601562,grad_norm: 0.9836395586763431, iteration: 52000
loss: 1.0339072942733765,grad_norm: 0.9999995357946674, iteration: 52001
loss: 1.0248372554779053,grad_norm: 0.7539822697577141, iteration: 52002
loss: 1.0091718435287476,grad_norm: 0.9999992685258173, iteration: 52003
loss: 0.9849793314933777,grad_norm: 0.868805974577346, iteration: 52004
loss: 1.002629041671753,grad_norm: 0.7539023733574836, iteration: 52005
loss: 1.029393196105957,grad_norm: 0.7526966372762189, iteration: 52006
loss: 1.039771318435669,grad_norm: 0.9999991661525657, iteration: 52007
loss: 1.008885383605957,grad_norm: 0.9999995376311686, iteration: 52008
loss: 0.9819202423095703,grad_norm: 0.970468003399641, iteration: 52009
loss: 1.0337477922439575,grad_norm: 0.9999993512764616, iteration: 52010
loss: 1.0114928483963013,grad_norm: 0.9030145150706141, iteration: 52011
loss: 0.9952351450920105,grad_norm: 0.9421557846323452, iteration: 52012
loss: 0.9800913333892822,grad_norm: 0.9999990854072777, iteration: 52013
loss: 1.0395671129226685,grad_norm: 0.9999991493782248, iteration: 52014
loss: 1.0665580034255981,grad_norm: 0.9999994558095961, iteration: 52015
loss: 1.0408775806427002,grad_norm: 0.9999991099864047, iteration: 52016
loss: 1.0174604654312134,grad_norm: 0.9102442834056546, iteration: 52017
loss: 1.0324175357818604,grad_norm: 0.8979693348069917, iteration: 52018
loss: 1.0106858015060425,grad_norm: 0.9999989253557984, iteration: 52019
loss: 1.0357189178466797,grad_norm: 0.9999992334376795, iteration: 52020
loss: 1.0039769411087036,grad_norm: 0.9999990519720927, iteration: 52021
loss: 1.0827335119247437,grad_norm: 0.9999993626185577, iteration: 52022
loss: 1.0164573192596436,grad_norm: 0.7416659947492396, iteration: 52023
loss: 1.0065956115722656,grad_norm: 0.9869086442494495, iteration: 52024
loss: 0.9872502684593201,grad_norm: 0.9265125184014262, iteration: 52025
loss: 0.9957805275917053,grad_norm: 0.9406467256302149, iteration: 52026
loss: 0.9991348385810852,grad_norm: 0.9999991594700333, iteration: 52027
loss: 1.0303021669387817,grad_norm: 0.9873994347835899, iteration: 52028
loss: 0.9920012950897217,grad_norm: 0.9149320568429057, iteration: 52029
loss: 1.0556230545043945,grad_norm: 0.9999991143330258, iteration: 52030
loss: 1.014400839805603,grad_norm: 0.9999990842440282, iteration: 52031
loss: 1.0085173845291138,grad_norm: 0.8902024804826898, iteration: 52032
loss: 1.012278437614441,grad_norm: 0.9118210958956909, iteration: 52033
loss: 1.0018171072006226,grad_norm: 0.8629605998026919, iteration: 52034
loss: 1.0105246305465698,grad_norm: 0.9999989816295405, iteration: 52035
loss: 1.0532903671264648,grad_norm: 0.9203978433858959, iteration: 52036
loss: 0.9612874984741211,grad_norm: 0.9999991435274489, iteration: 52037
loss: 1.0083341598510742,grad_norm: 0.9523895648901298, iteration: 52038
loss: 1.001650333404541,grad_norm: 0.9999994145867657, iteration: 52039
loss: 1.0273051261901855,grad_norm: 0.977468140702074, iteration: 52040
loss: 0.9421062469482422,grad_norm: 0.948642133270677, iteration: 52041
loss: 0.9883378148078918,grad_norm: 0.9725033971180286, iteration: 52042
loss: 1.0018707513809204,grad_norm: 0.9999991052118828, iteration: 52043
loss: 1.0055400133132935,grad_norm: 0.9410274340102, iteration: 52044
loss: 0.9940128326416016,grad_norm: 0.9258092195761267, iteration: 52045
loss: 0.9832445383071899,grad_norm: 0.9999992004677156, iteration: 52046
loss: 0.9966117143630981,grad_norm: 0.9999992041469637, iteration: 52047
loss: 1.0117496252059937,grad_norm: 0.8853166496335491, iteration: 52048
loss: 1.035314917564392,grad_norm: 0.8982818770479659, iteration: 52049
loss: 1.022055983543396,grad_norm: 0.9443983147895522, iteration: 52050
loss: 1.0246657133102417,grad_norm: 0.9544795491587487, iteration: 52051
loss: 0.9719818234443665,grad_norm: 0.9770596381263483, iteration: 52052
loss: 1.0020431280136108,grad_norm: 0.8728882888957451, iteration: 52053
loss: 1.0065183639526367,grad_norm: 0.9999992249192868, iteration: 52054
loss: 0.9857790470123291,grad_norm: 0.8907311667040572, iteration: 52055
loss: 0.9964391589164734,grad_norm: 0.7970277956937459, iteration: 52056
loss: 0.9658619165420532,grad_norm: 0.793889192778973, iteration: 52057
loss: 0.9962568879127502,grad_norm: 0.9999991110191813, iteration: 52058
loss: 1.0155432224273682,grad_norm: 0.9999990269684917, iteration: 52059
loss: 0.9998276829719543,grad_norm: 0.8658961841407613, iteration: 52060
loss: 0.9752320051193237,grad_norm: 0.9597726572180953, iteration: 52061
loss: 0.9677978157997131,grad_norm: 0.8846321306429887, iteration: 52062
loss: 1.0350663661956787,grad_norm: 0.9999991256472609, iteration: 52063
loss: 0.9925689101219177,grad_norm: 0.8569783097856306, iteration: 52064
loss: 1.027597427368164,grad_norm: 0.9999989895350127, iteration: 52065
loss: 0.9807144403457642,grad_norm: 0.9999992216922375, iteration: 52066
loss: 1.0023726224899292,grad_norm: 0.8536179604506986, iteration: 52067
loss: 0.9596199989318848,grad_norm: 0.8184826923371058, iteration: 52068
loss: 1.0180679559707642,grad_norm: 0.8803498770804447, iteration: 52069
loss: 1.0015586614608765,grad_norm: 0.9558992752242753, iteration: 52070
loss: 0.9727071523666382,grad_norm: 0.8234047420416238, iteration: 52071
loss: 0.9806499481201172,grad_norm: 0.9999992022477497, iteration: 52072
loss: 1.0131807327270508,grad_norm: 0.884875561806051, iteration: 52073
loss: 0.9811467528343201,grad_norm: 0.8095792078182755, iteration: 52074
loss: 1.01837956905365,grad_norm: 0.9999990875405959, iteration: 52075
loss: 0.972358226776123,grad_norm: 0.99999902977682, iteration: 52076
loss: 1.0226240158081055,grad_norm: 0.999999024098767, iteration: 52077
loss: 0.9936773777008057,grad_norm: 0.9469340312189047, iteration: 52078
loss: 1.0034661293029785,grad_norm: 0.7318008858704838, iteration: 52079
loss: 0.9940736889839172,grad_norm: 0.999998940756849, iteration: 52080
loss: 1.0382375717163086,grad_norm: 0.8088142716725033, iteration: 52081
loss: 0.9743425250053406,grad_norm: 0.9999990923016776, iteration: 52082
loss: 1.0256545543670654,grad_norm: 0.7903325751699783, iteration: 52083
loss: 1.015219807624817,grad_norm: 0.8343395734176037, iteration: 52084
loss: 1.0121475458145142,grad_norm: 0.9048816038510268, iteration: 52085
loss: 1.020585298538208,grad_norm: 0.9550127272758671, iteration: 52086
loss: 1.0427002906799316,grad_norm: 0.9999992564279994, iteration: 52087
loss: 1.0132880210876465,grad_norm: 0.8703667915980869, iteration: 52088
loss: 1.0139086246490479,grad_norm: 0.993412401255443, iteration: 52089
loss: 0.987848162651062,grad_norm: 0.8722415733591217, iteration: 52090
loss: 1.0717945098876953,grad_norm: 0.8299448821318781, iteration: 52091
loss: 1.0012959241867065,grad_norm: 0.9132568635160755, iteration: 52092
loss: 0.9899129867553711,grad_norm: 0.999999232123513, iteration: 52093
loss: 0.9990387558937073,grad_norm: 0.9999992028685384, iteration: 52094
loss: 0.9926393628120422,grad_norm: 0.9012402471387502, iteration: 52095
loss: 0.9685019254684448,grad_norm: 0.7825897970940143, iteration: 52096
loss: 1.0365816354751587,grad_norm: 0.9999990222280962, iteration: 52097
loss: 1.0092463493347168,grad_norm: 0.9999991223002435, iteration: 52098
loss: 0.9642133116722107,grad_norm: 0.9973954860673929, iteration: 52099
loss: 0.9892178177833557,grad_norm: 0.8067617591189153, iteration: 52100
loss: 1.010514736175537,grad_norm: 0.9999995617594891, iteration: 52101
loss: 1.0111091136932373,grad_norm: 0.999999157240694, iteration: 52102
loss: 0.9525337815284729,grad_norm: 0.9999991444248296, iteration: 52103
loss: 1.012764811515808,grad_norm: 0.9663082962990699, iteration: 52104
loss: 1.017789602279663,grad_norm: 0.9999991464489104, iteration: 52105
loss: 1.0180166959762573,grad_norm: 0.9180673869625606, iteration: 52106
loss: 0.9866319894790649,grad_norm: 0.8895823546285675, iteration: 52107
loss: 1.0110410451889038,grad_norm: 0.9223489726424084, iteration: 52108
loss: 1.0305149555206299,grad_norm: 0.9999990335452237, iteration: 52109
loss: 1.0070922374725342,grad_norm: 0.9999990597810511, iteration: 52110
loss: 0.9530443549156189,grad_norm: 0.9529642137597596, iteration: 52111
loss: 0.9682726263999939,grad_norm: 0.9999989941107371, iteration: 52112
loss: 1.0182186365127563,grad_norm: 0.9695162891688198, iteration: 52113
loss: 0.9891154170036316,grad_norm: 0.9999991158943786, iteration: 52114
loss: 0.9978735446929932,grad_norm: 0.8510195620843237, iteration: 52115
loss: 1.025033712387085,grad_norm: 0.9999990903268211, iteration: 52116
loss: 0.9766535758972168,grad_norm: 0.8965795998822906, iteration: 52117
loss: 1.0206102132797241,grad_norm: 0.8097324638356188, iteration: 52118
loss: 0.9782018661499023,grad_norm: 0.823287718612664, iteration: 52119
loss: 1.0079669952392578,grad_norm: 0.7521132143479595, iteration: 52120
loss: 0.995737612247467,grad_norm: 0.9580546204565347, iteration: 52121
loss: 0.9976850152015686,grad_norm: 0.871143743297322, iteration: 52122
loss: 0.9865962862968445,grad_norm: 0.861020786351463, iteration: 52123
loss: 1.0748307704925537,grad_norm: 0.9999992883596264, iteration: 52124
loss: 1.017219066619873,grad_norm: 0.960167016252762, iteration: 52125
loss: 1.0128570795059204,grad_norm: 0.889715045472597, iteration: 52126
loss: 0.9937507510185242,grad_norm: 0.9999990638037278, iteration: 52127
loss: 0.9734387397766113,grad_norm: 0.8444404929981608, iteration: 52128
loss: 0.9845298528671265,grad_norm: 0.7627901292250175, iteration: 52129
loss: 1.0056002140045166,grad_norm: 0.9258881864124373, iteration: 52130
loss: 1.031579613685608,grad_norm: 0.9106911269754491, iteration: 52131
loss: 1.018706202507019,grad_norm: 0.9999992952538694, iteration: 52132
loss: 0.999066948890686,grad_norm: 0.9119680215611086, iteration: 52133
loss: 1.0157281160354614,grad_norm: 0.8788197646263302, iteration: 52134
loss: 0.9961950778961182,grad_norm: 0.7875294669931636, iteration: 52135
loss: 0.978844165802002,grad_norm: 0.8019630592570366, iteration: 52136
loss: 1.0228583812713623,grad_norm: 0.8247457997436741, iteration: 52137
loss: 0.9881327748298645,grad_norm: 0.999999613114209, iteration: 52138
loss: 0.9912707805633545,grad_norm: 0.8515131587078105, iteration: 52139
loss: 0.9716147184371948,grad_norm: 0.9225187618627221, iteration: 52140
loss: 0.99261075258255,grad_norm: 0.999999218540328, iteration: 52141
loss: 1.0198677778244019,grad_norm: 0.9565260358646593, iteration: 52142
loss: 0.9782660603523254,grad_norm: 0.964816406665597, iteration: 52143
loss: 1.0084693431854248,grad_norm: 0.9999990702900778, iteration: 52144
loss: 1.0225986242294312,grad_norm: 0.7706645820658031, iteration: 52145
loss: 1.0074588060379028,grad_norm: 0.9492503173716618, iteration: 52146
loss: 1.0130949020385742,grad_norm: 0.9887525673890885, iteration: 52147
loss: 0.9980278611183167,grad_norm: 0.849427168196794, iteration: 52148
loss: 1.011086106300354,grad_norm: 0.8928737338864847, iteration: 52149
loss: 1.0167009830474854,grad_norm: 0.9999990800089735, iteration: 52150
loss: 0.9978513121604919,grad_norm: 0.8374508439051525, iteration: 52151
loss: 1.031901478767395,grad_norm: 0.8541282983169074, iteration: 52152
loss: 1.0110324621200562,grad_norm: 0.7614265822431479, iteration: 52153
loss: 0.9821334481239319,grad_norm: 0.7577853307799157, iteration: 52154
loss: 0.974982500076294,grad_norm: 0.8663524438091891, iteration: 52155
loss: 0.9922794103622437,grad_norm: 0.9999991257627252, iteration: 52156
loss: 0.9807417392730713,grad_norm: 0.8889089511773641, iteration: 52157
loss: 1.0149147510528564,grad_norm: 0.8548955087994906, iteration: 52158
loss: 1.0182812213897705,grad_norm: 0.9388951152122248, iteration: 52159
loss: 1.0036903619766235,grad_norm: 0.9999990294627156, iteration: 52160
loss: 1.0019416809082031,grad_norm: 0.9999991648649316, iteration: 52161
loss: 1.024775505065918,grad_norm: 0.8834213303217389, iteration: 52162
loss: 1.0334311723709106,grad_norm: 0.9120192187554652, iteration: 52163
loss: 1.0451716184616089,grad_norm: 0.9999992298239642, iteration: 52164
loss: 1.0501137971878052,grad_norm: 0.9999994692665521, iteration: 52165
loss: 0.9800107479095459,grad_norm: 0.8794212065814595, iteration: 52166
loss: 1.0206223726272583,grad_norm: 0.8608296850565378, iteration: 52167
loss: 0.9959378242492676,grad_norm: 0.9947893147347305, iteration: 52168
loss: 0.98768550157547,grad_norm: 0.9999989762094151, iteration: 52169
loss: 0.9925611019134521,grad_norm: 0.8281209178405615, iteration: 52170
loss: 1.0166269540786743,grad_norm: 0.8792392306385697, iteration: 52171
loss: 0.9934903979301453,grad_norm: 0.9923707795319194, iteration: 52172
loss: 1.0343749523162842,grad_norm: 0.9999990951423214, iteration: 52173
loss: 1.0697712898254395,grad_norm: 0.9999991105794275, iteration: 52174
loss: 1.0046411752700806,grad_norm: 0.9999992887395639, iteration: 52175
loss: 0.9907405376434326,grad_norm: 0.947535003636234, iteration: 52176
loss: 1.0129674673080444,grad_norm: 0.8410526733566182, iteration: 52177
loss: 0.9910317659378052,grad_norm: 0.819769987276449, iteration: 52178
loss: 1.0088298320770264,grad_norm: 0.9999990710925348, iteration: 52179
loss: 1.0202959775924683,grad_norm: 0.9999990847698979, iteration: 52180
loss: 1.0078879594802856,grad_norm: 0.9365775772421504, iteration: 52181
loss: 1.002447247505188,grad_norm: 0.9643146260668477, iteration: 52182
loss: 0.989982545375824,grad_norm: 0.9432392391497447, iteration: 52183
loss: 1.0090278387069702,grad_norm: 0.8313825976867188, iteration: 52184
loss: 1.0120766162872314,grad_norm: 0.9356734579613801, iteration: 52185
loss: 0.9971133470535278,grad_norm: 0.8037397141495579, iteration: 52186
loss: 0.9890896677970886,grad_norm: 0.8893979019667732, iteration: 52187
loss: 1.0287139415740967,grad_norm: 0.9273070397169697, iteration: 52188
loss: 0.9750668406486511,grad_norm: 0.877498480140557, iteration: 52189
loss: 0.980988085269928,grad_norm: 0.9182744655333984, iteration: 52190
loss: 1.0039087533950806,grad_norm: 0.9171889589511977, iteration: 52191
loss: 1.018141269683838,grad_norm: 0.9599771006229953, iteration: 52192
loss: 1.0196666717529297,grad_norm: 0.9940882926735262, iteration: 52193
loss: 1.0651742219924927,grad_norm: 0.994481415211186, iteration: 52194
loss: 1.0071349143981934,grad_norm: 0.9367581317476207, iteration: 52195
loss: 0.9830561280250549,grad_norm: 0.9390094304266862, iteration: 52196
loss: 1.0124938488006592,grad_norm: 0.9999991359270842, iteration: 52197
loss: 1.0287227630615234,grad_norm: 0.9980508403944013, iteration: 52198
loss: 1.0279481410980225,grad_norm: 0.8742694101081564, iteration: 52199
loss: 1.0521103143692017,grad_norm: 0.9999989975439644, iteration: 52200
loss: 1.0388035774230957,grad_norm: 0.7901434652680086, iteration: 52201
loss: 0.9991121292114258,grad_norm: 0.8058687326170472, iteration: 52202
loss: 0.9885504841804504,grad_norm: 0.8016749496064167, iteration: 52203
loss: 1.0052087306976318,grad_norm: 0.7998300362125151, iteration: 52204
loss: 1.0116727352142334,grad_norm: 0.8965032917082099, iteration: 52205
loss: 1.018222689628601,grad_norm: 0.9252473435044077, iteration: 52206
loss: 1.0134646892547607,grad_norm: 0.9999992608818753, iteration: 52207
loss: 1.0120501518249512,grad_norm: 0.9999993580575962, iteration: 52208
loss: 1.020795226097107,grad_norm: 0.9949739756905273, iteration: 52209
loss: 1.0060628652572632,grad_norm: 0.8162645562050047, iteration: 52210
loss: 0.9991514682769775,grad_norm: 0.9999991603723204, iteration: 52211
loss: 0.9821183681488037,grad_norm: 0.9999990182422216, iteration: 52212
loss: 1.0200231075286865,grad_norm: 0.9016412241345593, iteration: 52213
loss: 1.034847617149353,grad_norm: 0.8988231394263644, iteration: 52214
loss: 0.9763859510421753,grad_norm: 0.9090657043175054, iteration: 52215
loss: 1.0559613704681396,grad_norm: 0.7690038169826339, iteration: 52216
loss: 1.0297489166259766,grad_norm: 0.9999991749709128, iteration: 52217
loss: 1.0249969959259033,grad_norm: 0.8237183346073911, iteration: 52218
loss: 1.0058974027633667,grad_norm: 0.9318701682695232, iteration: 52219
loss: 0.9947298169136047,grad_norm: 0.9999990055303005, iteration: 52220
loss: 0.9841849207878113,grad_norm: 0.9398605291751504, iteration: 52221
loss: 1.0005625486373901,grad_norm: 0.9215053744750443, iteration: 52222
loss: 0.9951660633087158,grad_norm: 0.9999991003373239, iteration: 52223
loss: 1.0067278146743774,grad_norm: 0.9999993143095239, iteration: 52224
loss: 0.9740896821022034,grad_norm: 0.8806142172471759, iteration: 52225
loss: 1.0513513088226318,grad_norm: 0.9138905873405782, iteration: 52226
loss: 1.0043524503707886,grad_norm: 0.9201414679643661, iteration: 52227
loss: 0.9868853092193604,grad_norm: 0.9999990514754287, iteration: 52228
loss: 0.9734780788421631,grad_norm: 0.9678245745133887, iteration: 52229
loss: 0.9949312806129456,grad_norm: 0.9999990210621477, iteration: 52230
loss: 1.007582187652588,grad_norm: 0.8341379155797896, iteration: 52231
loss: 0.997923731803894,grad_norm: 0.8262426442488976, iteration: 52232
loss: 1.0028581619262695,grad_norm: 0.8964192352339639, iteration: 52233
loss: 0.9872620701789856,grad_norm: 1.0000000770906465, iteration: 52234
loss: 0.9769616723060608,grad_norm: 0.8021729185340349, iteration: 52235
loss: 1.012862205505371,grad_norm: 0.999999912127499, iteration: 52236
loss: 1.0174822807312012,grad_norm: 0.9999990274346483, iteration: 52237
loss: 1.0044081211090088,grad_norm: 0.9326875096225893, iteration: 52238
loss: 0.9979758858680725,grad_norm: 0.9092694791645983, iteration: 52239
loss: 1.0213550329208374,grad_norm: 0.9999991279457727, iteration: 52240
loss: 0.9994099140167236,grad_norm: 0.9999991121789223, iteration: 52241
loss: 0.9627161026000977,grad_norm: 0.9999991887149222, iteration: 52242
loss: 0.9884114861488342,grad_norm: 0.9356260875018808, iteration: 52243
loss: 1.007223129272461,grad_norm: 0.8604309923042214, iteration: 52244
loss: 0.970870316028595,grad_norm: 0.9999990717485652, iteration: 52245
loss: 1.0160112380981445,grad_norm: 0.9999991365250527, iteration: 52246
loss: 0.9646345973014832,grad_norm: 0.889318669573766, iteration: 52247
loss: 1.0186285972595215,grad_norm: 0.9999990825624545, iteration: 52248
loss: 1.0024168491363525,grad_norm: 0.9999996149343783, iteration: 52249
loss: 0.9804375171661377,grad_norm: 0.9806336323561403, iteration: 52250
loss: 1.0334514379501343,grad_norm: 0.9999991211922837, iteration: 52251
loss: 1.0895793437957764,grad_norm: 0.8688517813964265, iteration: 52252
loss: 1.0086569786071777,grad_norm: 0.8321052776183259, iteration: 52253
loss: 0.972181499004364,grad_norm: 0.882229809891525, iteration: 52254
loss: 0.9763554334640503,grad_norm: 0.8187414076450993, iteration: 52255
loss: 1.0215400457382202,grad_norm: 0.9771434792662261, iteration: 52256
loss: 1.0028102397918701,grad_norm: 0.9518142500390241, iteration: 52257
loss: 1.0166232585906982,grad_norm: 0.9999991260380241, iteration: 52258
loss: 1.024013876914978,grad_norm: 0.9298313008845591, iteration: 52259
loss: 0.979376494884491,grad_norm: 0.9999995538716617, iteration: 52260
loss: 1.033206582069397,grad_norm: 0.9999991955557265, iteration: 52261
loss: 1.0436413288116455,grad_norm: 0.7998004000136975, iteration: 52262
loss: 1.0073342323303223,grad_norm: 0.8047004360180869, iteration: 52263
loss: 0.9998900294303894,grad_norm: 0.8403105243028042, iteration: 52264
loss: 1.0679394006729126,grad_norm: 0.9999992087011621, iteration: 52265
loss: 0.9656115770339966,grad_norm: 0.9999991759988442, iteration: 52266
loss: 1.0220407247543335,grad_norm: 0.8731385034336299, iteration: 52267
loss: 1.0465184450149536,grad_norm: 0.9951725398706326, iteration: 52268
loss: 1.0113006830215454,grad_norm: 0.999999852908338, iteration: 52269
loss: 1.01675546169281,grad_norm: 0.8825306717811556, iteration: 52270
loss: 1.0067225694656372,grad_norm: 0.8933352481068294, iteration: 52271
loss: 0.9786261916160583,grad_norm: 0.9332641181714438, iteration: 52272
loss: 1.0099318027496338,grad_norm: 0.999998983101969, iteration: 52273
loss: 1.0085194110870361,grad_norm: 0.9999991142542829, iteration: 52274
loss: 1.0250928401947021,grad_norm: 0.8331701294308761, iteration: 52275
loss: 1.0030872821807861,grad_norm: 0.9230994036050673, iteration: 52276
loss: 1.0114437341690063,grad_norm: 0.9999990442325944, iteration: 52277
loss: 1.0041732788085938,grad_norm: 0.8063331996892251, iteration: 52278
loss: 1.0034654140472412,grad_norm: 0.9922848659068809, iteration: 52279
loss: 1.044588565826416,grad_norm: 0.9999992354285134, iteration: 52280
loss: 1.0014804601669312,grad_norm: 0.8536054397634888, iteration: 52281
loss: 1.049299955368042,grad_norm: 0.999999867426285, iteration: 52282
loss: 1.0272146463394165,grad_norm: 0.8883674520222787, iteration: 52283
loss: 0.9765910506248474,grad_norm: 0.9999991441219992, iteration: 52284
loss: 0.9928176403045654,grad_norm: 0.9999991750317937, iteration: 52285
loss: 1.013643741607666,grad_norm: 0.9999990545518781, iteration: 52286
loss: 1.014499306678772,grad_norm: 0.9640133832059674, iteration: 52287
loss: 1.0353394746780396,grad_norm: 0.9871305777387573, iteration: 52288
loss: 0.9755785465240479,grad_norm: 0.9501925051986267, iteration: 52289
loss: 0.9593445658683777,grad_norm: 0.915284987564033, iteration: 52290
loss: 0.9961133003234863,grad_norm: 0.9389480089917527, iteration: 52291
loss: 0.9709488749504089,grad_norm: 0.9241508901395868, iteration: 52292
loss: 1.0391591787338257,grad_norm: 0.838829373506509, iteration: 52293
loss: 0.9718227386474609,grad_norm: 0.89203980560715, iteration: 52294
loss: 0.9505390524864197,grad_norm: 0.9749436571817381, iteration: 52295
loss: 0.9696180820465088,grad_norm: 0.9999989888760263, iteration: 52296
loss: 1.0093038082122803,grad_norm: 0.9791287046408503, iteration: 52297
loss: 0.9987199306488037,grad_norm: 0.9258683460924232, iteration: 52298
loss: 0.9879934191703796,grad_norm: 0.999999231891482, iteration: 52299
loss: 0.9983733892440796,grad_norm: 0.9999995771091262, iteration: 52300
loss: 1.0102125406265259,grad_norm: 0.9277196971747166, iteration: 52301
loss: 1.0098891258239746,grad_norm: 0.999999177569685, iteration: 52302
loss: 1.0271104574203491,grad_norm: 0.9875638959029297, iteration: 52303
loss: 1.0153090953826904,grad_norm: 0.7195650426367076, iteration: 52304
loss: 0.9666227698326111,grad_norm: 0.9892861112932383, iteration: 52305
loss: 1.0192068815231323,grad_norm: 0.9092459982641423, iteration: 52306
loss: 1.00022554397583,grad_norm: 0.898088383793636, iteration: 52307
loss: 0.999194860458374,grad_norm: 0.9126516637931646, iteration: 52308
loss: 0.9863524436950684,grad_norm: 0.9008716748313155, iteration: 52309
loss: 0.9805739521980286,grad_norm: 0.9999991653033058, iteration: 52310
loss: 1.0424985885620117,grad_norm: 0.9999995542829486, iteration: 52311
loss: 1.017418622970581,grad_norm: 1.000000023350068, iteration: 52312
loss: 1.0197513103485107,grad_norm: 0.9985966798644653, iteration: 52313
loss: 0.9755990505218506,grad_norm: 0.8523253140309766, iteration: 52314
loss: 1.0127825736999512,grad_norm: 0.9949922552848308, iteration: 52315
loss: 1.0276719331741333,grad_norm: 0.9999991679962668, iteration: 52316
loss: 1.0100163221359253,grad_norm: 0.9999991421259798, iteration: 52317
loss: 0.9806318879127502,grad_norm: 0.9531323047626785, iteration: 52318
loss: 1.0046600103378296,grad_norm: 0.9917064215017838, iteration: 52319
loss: 0.9837332367897034,grad_norm: 0.999999391515987, iteration: 52320
loss: 0.9977259039878845,grad_norm: 0.99999913373369, iteration: 52321
loss: 0.977863073348999,grad_norm: 0.999999020962262, iteration: 52322
loss: 0.979522705078125,grad_norm: 0.9876840384505422, iteration: 52323
loss: 0.9982249140739441,grad_norm: 0.9999991117472192, iteration: 52324
loss: 1.0422947406768799,grad_norm: 0.9432443360938179, iteration: 52325
loss: 0.9974629878997803,grad_norm: 0.9009338706062141, iteration: 52326
loss: 0.9750053882598877,grad_norm: 0.9515356107491358, iteration: 52327
loss: 1.0834542512893677,grad_norm: 0.9999994865671177, iteration: 52328
loss: 1.0409830808639526,grad_norm: 0.9999997825218178, iteration: 52329
loss: 1.0002399682998657,grad_norm: 0.9999999243667007, iteration: 52330
loss: 0.9830537438392639,grad_norm: 0.7683770134463397, iteration: 52331
loss: 1.0165055990219116,grad_norm: 0.9999993123192616, iteration: 52332
loss: 1.1194790601730347,grad_norm: 0.9999993114178354, iteration: 52333
loss: 1.0031641721725464,grad_norm: 0.8316836279663735, iteration: 52334
loss: 1.0180084705352783,grad_norm: 0.9999991118199677, iteration: 52335
loss: 1.0286928415298462,grad_norm: 0.999999535893332, iteration: 52336
loss: 1.0310550928115845,grad_norm: 0.7949651580198488, iteration: 52337
loss: 0.989571750164032,grad_norm: 0.9728942643639701, iteration: 52338
loss: 1.019049882888794,grad_norm: 0.9861217131507672, iteration: 52339
loss: 1.0321170091629028,grad_norm: 0.9999996867362492, iteration: 52340
loss: 1.0303388833999634,grad_norm: 0.9935754010268246, iteration: 52341
loss: 1.024117112159729,grad_norm: 0.8894291951358411, iteration: 52342
loss: 0.9946969151496887,grad_norm: 0.999999622497261, iteration: 52343
loss: 0.9934461116790771,grad_norm: 0.8737036439245232, iteration: 52344
loss: 0.9540817141532898,grad_norm: 0.999999156723845, iteration: 52345
loss: 1.0189635753631592,grad_norm: 0.9822513983765772, iteration: 52346
loss: 0.9976708889007568,grad_norm: 0.9353367163508605, iteration: 52347
loss: 1.0285460948944092,grad_norm: 0.9999991935456434, iteration: 52348
loss: 0.9791724681854248,grad_norm: 0.8661106633678206, iteration: 52349
loss: 0.9949190020561218,grad_norm: 0.9660604681635432, iteration: 52350
loss: 1.0458345413208008,grad_norm: 0.9999996211851063, iteration: 52351
loss: 1.0313860177993774,grad_norm: 0.872259212981107, iteration: 52352
loss: 1.1760085821151733,grad_norm: 0.9999992584209022, iteration: 52353
loss: 1.297194004058838,grad_norm: 0.9999996555989838, iteration: 52354
loss: 1.00038743019104,grad_norm: 0.9356076525512346, iteration: 52355
loss: 0.9925693869590759,grad_norm: 0.9999990676462183, iteration: 52356
loss: 1.044777274131775,grad_norm: 0.9999991717168921, iteration: 52357
loss: 0.9983643293380737,grad_norm: 0.9929878847457679, iteration: 52358
loss: 0.9811329245567322,grad_norm: 0.9999991263735246, iteration: 52359
loss: 0.993087112903595,grad_norm: 0.9999990360646435, iteration: 52360
loss: 1.29932701587677,grad_norm: 0.9999999476060285, iteration: 52361
loss: 1.0136878490447998,grad_norm: 0.980457305136488, iteration: 52362
loss: 0.9582789540290833,grad_norm: 0.9339173010291382, iteration: 52363
loss: 1.0221977233886719,grad_norm: 0.9884354774618992, iteration: 52364
loss: 1.0012140274047852,grad_norm: 0.9999992180767742, iteration: 52365
loss: 1.0220297574996948,grad_norm: 0.9999993140190517, iteration: 52366
loss: 0.9866558909416199,grad_norm: 0.9813577295665592, iteration: 52367
loss: 1.023331642150879,grad_norm: 0.9999994536542608, iteration: 52368
loss: 0.9815079569816589,grad_norm: 0.9999991767477879, iteration: 52369
loss: 0.9892789721488953,grad_norm: 0.8448088930567458, iteration: 52370
loss: 1.0023982524871826,grad_norm: 0.9999993349022487, iteration: 52371
loss: 1.0006591081619263,grad_norm: 0.9326573982524504, iteration: 52372
loss: 1.0166016817092896,grad_norm: 0.9414036578658158, iteration: 52373
loss: 1.0182907581329346,grad_norm: 0.8467715413211927, iteration: 52374
loss: 1.0699076652526855,grad_norm: 0.9999991193161106, iteration: 52375
loss: 1.0328105688095093,grad_norm: 0.9985726270490397, iteration: 52376
loss: 1.0025376081466675,grad_norm: 0.9445765222296666, iteration: 52377
loss: 0.9890027046203613,grad_norm: 0.9056730802738849, iteration: 52378
loss: 0.9840455651283264,grad_norm: 0.9999991963481184, iteration: 52379
loss: 1.041449785232544,grad_norm: 0.9999995906137933, iteration: 52380
loss: 0.9933473467826843,grad_norm: 0.9148422628458406, iteration: 52381
loss: 1.0160512924194336,grad_norm: 0.9999990809278887, iteration: 52382
loss: 1.0456401109695435,grad_norm: 0.9404362625455075, iteration: 52383
loss: 0.9889209866523743,grad_norm: 0.7573094464811713, iteration: 52384
loss: 1.0036430358886719,grad_norm: 0.9999991857332265, iteration: 52385
loss: 1.0131903886795044,grad_norm: 0.9444278007267307, iteration: 52386
loss: 1.005963921546936,grad_norm: 0.949343269821277, iteration: 52387
loss: 0.9728941917419434,grad_norm: 0.9999992993991331, iteration: 52388
loss: 0.9993804097175598,grad_norm: 0.9410424125088303, iteration: 52389
loss: 1.0187596082687378,grad_norm: 0.9300585135364404, iteration: 52390
loss: 1.0402079820632935,grad_norm: 0.9753369629136525, iteration: 52391
loss: 0.9888547658920288,grad_norm: 0.9999991996665988, iteration: 52392
loss: 1.0256279706954956,grad_norm: 0.999999569763909, iteration: 52393
loss: 0.987338125705719,grad_norm: 0.999999182980476, iteration: 52394
loss: 0.955022394657135,grad_norm: 0.8833883736913158, iteration: 52395
loss: 1.0009092092514038,grad_norm: 0.9999996917548885, iteration: 52396
loss: 0.9983969330787659,grad_norm: 0.9597267976005076, iteration: 52397
loss: 0.9837049245834351,grad_norm: 0.8323699689530587, iteration: 52398
loss: 0.9656426310539246,grad_norm: 0.8447212212743533, iteration: 52399
loss: 0.9876794219017029,grad_norm: 0.8630477640043427, iteration: 52400
loss: 0.9867745041847229,grad_norm: 0.9999991290729274, iteration: 52401
loss: 0.9758025407791138,grad_norm: 0.9116949098820226, iteration: 52402
loss: 1.0341906547546387,grad_norm: 0.9237129105522968, iteration: 52403
loss: 0.98097163438797,grad_norm: 0.97326345705593, iteration: 52404
loss: 1.1002525091171265,grad_norm: 0.9999992126733463, iteration: 52405
loss: 0.9965881705284119,grad_norm: 0.9999992042665593, iteration: 52406
loss: 0.964560866355896,grad_norm: 0.9999989565377242, iteration: 52407
loss: 0.9951481819152832,grad_norm: 0.9999997601350412, iteration: 52408
loss: 0.9901201725006104,grad_norm: 0.9999990868133584, iteration: 52409
loss: 1.0213593244552612,grad_norm: 0.9571360299648606, iteration: 52410
loss: 0.9946962594985962,grad_norm: 0.9999990266168813, iteration: 52411
loss: 1.0124151706695557,grad_norm: 0.8839307074531783, iteration: 52412
loss: 1.0111503601074219,grad_norm: 0.9999990253981461, iteration: 52413
loss: 1.0697962045669556,grad_norm: 0.9999995170155139, iteration: 52414
loss: 1.000077486038208,grad_norm: 0.9580094513127928, iteration: 52415
loss: 1.017032265663147,grad_norm: 0.8689313235370001, iteration: 52416
loss: 1.0214574337005615,grad_norm: 0.9999994919666056, iteration: 52417
loss: 1.0459539890289307,grad_norm: 0.9999990382503944, iteration: 52418
loss: 0.967660129070282,grad_norm: 0.9999991165230616, iteration: 52419
loss: 1.0340964794158936,grad_norm: 0.9227379963370227, iteration: 52420
loss: 1.0282446146011353,grad_norm: 0.9999992431515167, iteration: 52421
loss: 1.0233536958694458,grad_norm: 0.7749480702533469, iteration: 52422
loss: 0.9871357679367065,grad_norm: 0.9999991708467383, iteration: 52423
loss: 0.9780812859535217,grad_norm: 0.8618886919372603, iteration: 52424
loss: 0.9709349274635315,grad_norm: 0.9999992698848383, iteration: 52425
loss: 1.095304250717163,grad_norm: 0.9999990446801404, iteration: 52426
loss: 0.9962710738182068,grad_norm: 0.7844817049930377, iteration: 52427
loss: 0.9674317240715027,grad_norm: 0.9127800190967831, iteration: 52428
loss: 0.9692191481590271,grad_norm: 0.9376403810096486, iteration: 52429
loss: 1.0275920629501343,grad_norm: 0.9999990224710712, iteration: 52430
loss: 1.0630990266799927,grad_norm: 0.9999994580224894, iteration: 52431
loss: 1.020637035369873,grad_norm: 0.7726270391242758, iteration: 52432
loss: 1.0031192302703857,grad_norm: 0.9567285565324405, iteration: 52433
loss: 1.0044209957122803,grad_norm: 0.9464753933847838, iteration: 52434
loss: 0.9908728003501892,grad_norm: 0.9999991584359703, iteration: 52435
loss: 1.0028622150421143,grad_norm: 0.9999995675700307, iteration: 52436
loss: 1.0139060020446777,grad_norm: 0.9999992026746094, iteration: 52437
loss: 1.011237621307373,grad_norm: 0.9365628185794942, iteration: 52438
loss: 0.9611125588417053,grad_norm: 0.8021386583875812, iteration: 52439
loss: 1.009446620941162,grad_norm: 0.918725383038517, iteration: 52440
loss: 1.007857322692871,grad_norm: 0.8967607534675205, iteration: 52441
loss: 0.991981565952301,grad_norm: 0.999999248088203, iteration: 52442
loss: 0.9844262003898621,grad_norm: 0.8907313269935712, iteration: 52443
loss: 0.9944891929626465,grad_norm: 0.9999991585762023, iteration: 52444
loss: 0.9919322729110718,grad_norm: 0.8597538844589033, iteration: 52445
loss: 1.0231828689575195,grad_norm: 0.9604896983985654, iteration: 52446
loss: 1.0279542207717896,grad_norm: 0.8167734897657184, iteration: 52447
loss: 0.9637179970741272,grad_norm: 0.9999990686509348, iteration: 52448
loss: 1.0161844491958618,grad_norm: 0.7959220612873475, iteration: 52449
loss: 1.2335470914840698,grad_norm: 0.9999994722613622, iteration: 52450
loss: 1.0270864963531494,grad_norm: 0.9999989729432232, iteration: 52451
loss: 1.0048496723175049,grad_norm: 0.9583842126018308, iteration: 52452
loss: 0.9935813546180725,grad_norm: 0.999999060981524, iteration: 52453
loss: 0.9798079133033752,grad_norm: 0.970494347155185, iteration: 52454
loss: 1.015690803527832,grad_norm: 0.794357159239306, iteration: 52455
loss: 0.9813823699951172,grad_norm: 0.969396835963564, iteration: 52456
loss: 1.023689866065979,grad_norm: 0.8800438794377725, iteration: 52457
loss: 0.9543256163597107,grad_norm: 0.9999992250282633, iteration: 52458
loss: 1.0088506937026978,grad_norm: 0.9737964653835807, iteration: 52459
loss: 1.0175408124923706,grad_norm: 0.9361580285615311, iteration: 52460
loss: 1.0463082790374756,grad_norm: 0.9999994244994317, iteration: 52461
loss: 0.9980568885803223,grad_norm: 0.9999992748517732, iteration: 52462
loss: 0.9730667471885681,grad_norm: 0.8178035267965473, iteration: 52463
loss: 0.9847573041915894,grad_norm: 0.8114630980050522, iteration: 52464
loss: 1.0692898035049438,grad_norm: 0.9999997905346806, iteration: 52465
loss: 0.9938703775405884,grad_norm: 0.88322197196976, iteration: 52466
loss: 1.0186104774475098,grad_norm: 0.8191002340607556, iteration: 52467
loss: 0.9689393639564514,grad_norm: 0.9654927221961255, iteration: 52468
loss: 0.9880324006080627,grad_norm: 0.9999989899793912, iteration: 52469
loss: 1.0261261463165283,grad_norm: 0.9999990891193024, iteration: 52470
loss: 0.9817808270454407,grad_norm: 0.9999990896959036, iteration: 52471
loss: 1.0472071170806885,grad_norm: 0.9999997295220108, iteration: 52472
loss: 0.9930513501167297,grad_norm: 0.8969167567270342, iteration: 52473
loss: 0.9823758602142334,grad_norm: 0.8976536789920283, iteration: 52474
loss: 0.9756742119789124,grad_norm: 0.901168673860274, iteration: 52475
loss: 0.9722250699996948,grad_norm: 0.9508836002601728, iteration: 52476
loss: 1.0518876314163208,grad_norm: 0.9999989839556114, iteration: 52477
loss: 1.0036240816116333,grad_norm: 0.9834312858827735, iteration: 52478
loss: 1.0323278903961182,grad_norm: 0.9999995279469862, iteration: 52479
loss: 0.9877374768257141,grad_norm: 0.7823571204401514, iteration: 52480
loss: 1.0107808113098145,grad_norm: 0.9551509234860677, iteration: 52481
loss: 1.0242488384246826,grad_norm: 0.9829861147586644, iteration: 52482
loss: 1.0029175281524658,grad_norm: 0.999999004362878, iteration: 52483
loss: 1.035875678062439,grad_norm: 0.9999995067463917, iteration: 52484
loss: 1.0072404146194458,grad_norm: 0.952074989045328, iteration: 52485
loss: 0.9842507243156433,grad_norm: 0.9999991171548913, iteration: 52486
loss: 1.008592963218689,grad_norm: 0.9999991529752603, iteration: 52487
loss: 0.9755457043647766,grad_norm: 0.9465290698001113, iteration: 52488
loss: 0.9913504123687744,grad_norm: 0.9999990287452533, iteration: 52489
loss: 1.0056606531143188,grad_norm: 0.9852476701530145, iteration: 52490
loss: 1.021478533744812,grad_norm: 0.9259073157030101, iteration: 52491
loss: 0.996477484703064,grad_norm: 0.9350963393341843, iteration: 52492
loss: 1.0135220289230347,grad_norm: 0.9850270514556044, iteration: 52493
loss: 0.9867256879806519,grad_norm: 0.9999991121426166, iteration: 52494
loss: 1.0455207824707031,grad_norm: 0.9999998041591975, iteration: 52495
loss: 1.0159931182861328,grad_norm: 0.9417196935039583, iteration: 52496
loss: 0.9893715977668762,grad_norm: 0.9813612349315247, iteration: 52497
loss: 0.996035635471344,grad_norm: 0.9999994269441347, iteration: 52498
loss: 1.0022616386413574,grad_norm: 0.9646117797174232, iteration: 52499
loss: 0.992469310760498,grad_norm: 0.9999992239951219, iteration: 52500
loss: 1.018217921257019,grad_norm: 0.999999601686849, iteration: 52501
loss: 0.9959895014762878,grad_norm: 0.8825287550161676, iteration: 52502
loss: 1.0005079507827759,grad_norm: 0.9553344841333905, iteration: 52503
loss: 1.0510869026184082,grad_norm: 0.9999997875993838, iteration: 52504
loss: 0.9941116571426392,grad_norm: 0.8072160373515062, iteration: 52505
loss: 0.9956738352775574,grad_norm: 0.9999992761133368, iteration: 52506
loss: 0.9771962761878967,grad_norm: 0.9884669584036277, iteration: 52507
loss: 0.9998589158058167,grad_norm: 0.9191510210910562, iteration: 52508
loss: 1.005796194076538,grad_norm: 0.9999992804134531, iteration: 52509
loss: 0.9736737608909607,grad_norm: 0.9999993560947581, iteration: 52510
loss: 1.0123214721679688,grad_norm: 0.8469263275912656, iteration: 52511
loss: 1.0208287239074707,grad_norm: 0.7656391961297444, iteration: 52512
loss: 0.9984297156333923,grad_norm: 0.794022476400147, iteration: 52513
loss: 1.0418542623519897,grad_norm: 0.9999990725511689, iteration: 52514
loss: 0.9732747077941895,grad_norm: 0.8495009539368007, iteration: 52515
loss: 1.008129358291626,grad_norm: 0.9999993496265903, iteration: 52516
loss: 1.0016790628433228,grad_norm: 0.9999995983596135, iteration: 52517
loss: 1.0061343908309937,grad_norm: 0.9547006346266338, iteration: 52518
loss: 1.0346570014953613,grad_norm: 0.9999989791368536, iteration: 52519
loss: 0.9886485934257507,grad_norm: 0.9866778563445681, iteration: 52520
loss: 1.1467124223709106,grad_norm: 0.9999994279673516, iteration: 52521
loss: 1.0065351724624634,grad_norm: 0.9999993289454399, iteration: 52522
loss: 1.0667757987976074,grad_norm: 0.9999999413230869, iteration: 52523
loss: 0.9916419982910156,grad_norm: 0.8331022113598778, iteration: 52524
loss: 0.9768247008323669,grad_norm: 0.8904292501136225, iteration: 52525
loss: 1.0224465131759644,grad_norm: 0.9999991629419366, iteration: 52526
loss: 1.0505151748657227,grad_norm: 0.9999994537593917, iteration: 52527
loss: 1.0192146301269531,grad_norm: 0.8787544077956522, iteration: 52528
loss: 1.0017244815826416,grad_norm: 0.999999214823285, iteration: 52529
loss: 1.0364394187927246,grad_norm: 0.8833270383190623, iteration: 52530
loss: 1.0017194747924805,grad_norm: 0.9999992678325359, iteration: 52531
loss: 0.9897003173828125,grad_norm: 0.8408229447535466, iteration: 52532
loss: 1.0259487628936768,grad_norm: 0.9999996356594489, iteration: 52533
loss: 1.0486555099487305,grad_norm: 0.9999997482880941, iteration: 52534
loss: 0.9848960041999817,grad_norm: 0.8274671420061916, iteration: 52535
loss: 1.0188604593276978,grad_norm: 0.9999990519324391, iteration: 52536
loss: 0.9937731027603149,grad_norm: 0.9307307847539883, iteration: 52537
loss: 1.012732982635498,grad_norm: 0.9471513731833217, iteration: 52538
loss: 0.9991718530654907,grad_norm: 0.9176556156812126, iteration: 52539
loss: 1.0110889673233032,grad_norm: 0.6847718585728185, iteration: 52540
loss: 1.0174134969711304,grad_norm: 0.897394169160842, iteration: 52541
loss: 0.9746049642562866,grad_norm: 0.9999991591011664, iteration: 52542
loss: 0.9999999403953552,grad_norm: 0.9999996452358078, iteration: 52543
loss: 0.9836583733558655,grad_norm: 0.992234683774863, iteration: 52544
loss: 1.0510207414627075,grad_norm: 0.9999999026702558, iteration: 52545
loss: 1.087478756904602,grad_norm: 0.9999995639635461, iteration: 52546
loss: 1.0195640325546265,grad_norm: 0.8815012049460343, iteration: 52547
loss: 1.0276681184768677,grad_norm: 0.8897565188409748, iteration: 52548
loss: 1.0558998584747314,grad_norm: 0.8570195094722618, iteration: 52549
loss: 0.9955673217773438,grad_norm: 0.9999989846162343, iteration: 52550
loss: 1.0558608770370483,grad_norm: 0.9999995447981078, iteration: 52551
loss: 1.0666738748550415,grad_norm: 0.9999998134317106, iteration: 52552
loss: 1.0052812099456787,grad_norm: 0.8936496578588258, iteration: 52553
loss: 0.9977835416793823,grad_norm: 0.9999996439900374, iteration: 52554
loss: 0.9724910259246826,grad_norm: 0.9999989838121757, iteration: 52555
loss: 1.0332062244415283,grad_norm: 0.999163588198719, iteration: 52556
loss: 0.978393018245697,grad_norm: 0.9106082966486799, iteration: 52557
loss: 1.0262666940689087,grad_norm: 0.9728088287479377, iteration: 52558
loss: 1.0887736082077026,grad_norm: 0.9999991409524852, iteration: 52559
loss: 1.03730309009552,grad_norm: 0.9990294084034099, iteration: 52560
loss: 0.9962525963783264,grad_norm: 0.9809960894611356, iteration: 52561
loss: 0.9887731075286865,grad_norm: 0.8842277464095932, iteration: 52562
loss: 1.0003684759140015,grad_norm: 0.999999018194119, iteration: 52563
loss: 1.0061237812042236,grad_norm: 0.9999990731516027, iteration: 52564
loss: 1.0023202896118164,grad_norm: 0.999999338842475, iteration: 52565
loss: 1.0442157983779907,grad_norm: 0.9999995731837586, iteration: 52566
loss: 0.9935594201087952,grad_norm: 0.9341402821848425, iteration: 52567
loss: 1.0071053504943848,grad_norm: 0.9999990060192028, iteration: 52568
loss: 0.9695324897766113,grad_norm: 0.9999991620108624, iteration: 52569
loss: 0.9999532103538513,grad_norm: 0.9999997011766573, iteration: 52570
loss: 0.992529571056366,grad_norm: 0.8930472152131647, iteration: 52571
loss: 0.9842541813850403,grad_norm: 0.8246907477482659, iteration: 52572
loss: 1.0928139686584473,grad_norm: 0.9999990941554407, iteration: 52573
loss: 1.0450900793075562,grad_norm: 0.9999992695252338, iteration: 52574
loss: 0.9962130188941956,grad_norm: 0.9999993143273883, iteration: 52575
loss: 1.050377607345581,grad_norm: 0.9999991847563632, iteration: 52576
loss: 0.993968665599823,grad_norm: 0.7692164992941078, iteration: 52577
loss: 1.002773404121399,grad_norm: 0.9999996132936304, iteration: 52578
loss: 0.9793931841850281,grad_norm: 0.9853562818517896, iteration: 52579
loss: 0.9852989912033081,grad_norm: 0.999999060084729, iteration: 52580
loss: 0.9933212995529175,grad_norm: 0.9050776080143382, iteration: 52581
loss: 1.0067541599273682,grad_norm: 0.7778676004448555, iteration: 52582
loss: 1.0287365913391113,grad_norm: 0.8585281788864711, iteration: 52583
loss: 1.0282700061798096,grad_norm: 0.9999992173719101, iteration: 52584
loss: 0.9875022172927856,grad_norm: 0.8512335834157304, iteration: 52585
loss: 0.9910855293273926,grad_norm: 0.8248355509664373, iteration: 52586
loss: 1.0250773429870605,grad_norm: 0.9237747089226901, iteration: 52587
loss: 1.0216695070266724,grad_norm: 0.9280601765579689, iteration: 52588
loss: 1.0233681201934814,grad_norm: 0.7646628762453581, iteration: 52589
loss: 0.9935693144798279,grad_norm: 0.8640683305919541, iteration: 52590
loss: 1.0149614810943604,grad_norm: 0.9999989989077989, iteration: 52591
loss: 1.0478347539901733,grad_norm: 0.9957590752749378, iteration: 52592
loss: 1.0057247877120972,grad_norm: 0.9999997633923786, iteration: 52593
loss: 1.0258249044418335,grad_norm: 0.9999996325460115, iteration: 52594
loss: 1.0226707458496094,grad_norm: 0.8591724175073008, iteration: 52595
loss: 1.044814944267273,grad_norm: 0.9797717782746678, iteration: 52596
loss: 0.98465895652771,grad_norm: 0.9773190522757609, iteration: 52597
loss: 0.9674669504165649,grad_norm: 0.999999181217348, iteration: 52598
loss: 0.9890313148498535,grad_norm: 0.9624844156386804, iteration: 52599
loss: 1.0307763814926147,grad_norm: 0.9999990601866114, iteration: 52600
loss: 0.9820777177810669,grad_norm: 0.9923847521627198, iteration: 52601
loss: 1.0301995277404785,grad_norm: 0.9903075944236593, iteration: 52602
loss: 1.0358387231826782,grad_norm: 0.8869576709792127, iteration: 52603
loss: 1.0215363502502441,grad_norm: 0.962540982206602, iteration: 52604
loss: 0.9808523654937744,grad_norm: 0.7739047835368595, iteration: 52605
loss: 0.9923897981643677,grad_norm: 0.8719693488173744, iteration: 52606
loss: 0.9824546575546265,grad_norm: 0.9614426589817521, iteration: 52607
loss: 0.9938408136367798,grad_norm: 0.9999990520138454, iteration: 52608
loss: 0.9652531147003174,grad_norm: 0.999998984974432, iteration: 52609
loss: 1.0204583406448364,grad_norm: 0.9999993263554319, iteration: 52610
loss: 0.9729452729225159,grad_norm: 0.9943680060320137, iteration: 52611
loss: 0.9769319295883179,grad_norm: 0.9755870386965548, iteration: 52612
loss: 1.0305602550506592,grad_norm: 0.9999998260647674, iteration: 52613
loss: 1.1019967794418335,grad_norm: 0.9999998530477454, iteration: 52614
loss: 1.038104772567749,grad_norm: 0.9999994141471479, iteration: 52615
loss: 1.0382132530212402,grad_norm: 0.999999130818942, iteration: 52616
loss: 0.9819944500923157,grad_norm: 0.9999990497577442, iteration: 52617
loss: 1.004694938659668,grad_norm: 0.9171508405893748, iteration: 52618
loss: 0.9771257042884827,grad_norm: 0.9801915677767501, iteration: 52619
loss: 1.0155466794967651,grad_norm: 0.9999991187402937, iteration: 52620
loss: 1.0373637676239014,grad_norm: 0.8653374885588277, iteration: 52621
loss: 1.028546929359436,grad_norm: 0.9999997007242925, iteration: 52622
loss: 0.9972681999206543,grad_norm: 0.9999991777385706, iteration: 52623
loss: 1.0035637617111206,grad_norm: 0.9999991192517032, iteration: 52624
loss: 1.0124061107635498,grad_norm: 0.9229432992918905, iteration: 52625
loss: 0.9768429398536682,grad_norm: 0.9999990712563125, iteration: 52626
loss: 1.0040830373764038,grad_norm: 0.9242441602408021, iteration: 52627
loss: 0.9797412157058716,grad_norm: 0.9661981899349839, iteration: 52628
loss: 0.9859943389892578,grad_norm: 0.9712938305272376, iteration: 52629
loss: 1.0199886560440063,grad_norm: 0.7690759187889233, iteration: 52630
loss: 1.026740550994873,grad_norm: 0.8415321476974054, iteration: 52631
loss: 1.022688388824463,grad_norm: 0.9999991760060845, iteration: 52632
loss: 0.9994071125984192,grad_norm: 0.7789549060617166, iteration: 52633
loss: 1.0218029022216797,grad_norm: 0.9727724334687451, iteration: 52634
loss: 1.0270049571990967,grad_norm: 0.8220953302333652, iteration: 52635
loss: 1.0262210369110107,grad_norm: 0.9999993546260848, iteration: 52636
loss: 1.020730972290039,grad_norm: 0.7663256747289728, iteration: 52637
loss: 1.024383783340454,grad_norm: 0.9892888626393572, iteration: 52638
loss: 1.0752317905426025,grad_norm: 0.9999995129534639, iteration: 52639
loss: 1.0006263256072998,grad_norm: 0.8621395556377736, iteration: 52640
loss: 1.0042554140090942,grad_norm: 0.9999990308653396, iteration: 52641
loss: 1.0285285711288452,grad_norm: 0.9999991443393205, iteration: 52642
loss: 1.0002949237823486,grad_norm: 0.9999993795290251, iteration: 52643
loss: 1.0046508312225342,grad_norm: 0.8389493116446206, iteration: 52644
loss: 1.0320913791656494,grad_norm: 0.9999990048369375, iteration: 52645
loss: 1.0512398481369019,grad_norm: 0.9753743756225188, iteration: 52646
loss: 1.0174741744995117,grad_norm: 0.8255546217517922, iteration: 52647
loss: 0.9770035147666931,grad_norm: 0.8419958480478835, iteration: 52648
loss: 0.9795656204223633,grad_norm: 0.9999991026779309, iteration: 52649
loss: 1.0233408212661743,grad_norm: 0.8856135498786303, iteration: 52650
loss: 1.002310037612915,grad_norm: 0.999999816683463, iteration: 52651
loss: 1.0115090608596802,grad_norm: 0.9484958982680197, iteration: 52652
loss: 0.9824203252792358,grad_norm: 0.9211224503422875, iteration: 52653
loss: 0.9733531475067139,grad_norm: 0.9206032019196582, iteration: 52654
loss: 0.991950511932373,grad_norm: 0.9999995325722876, iteration: 52655
loss: 1.0952613353729248,grad_norm: 0.9999991411579571, iteration: 52656
loss: 0.9672408103942871,grad_norm: 0.837226424441613, iteration: 52657
loss: 1.0578289031982422,grad_norm: 0.9999998898916329, iteration: 52658
loss: 0.9698984622955322,grad_norm: 0.9883292424054957, iteration: 52659
loss: 0.9948127865791321,grad_norm: 0.9788596403349192, iteration: 52660
loss: 0.981277346611023,grad_norm: 0.9689661025240403, iteration: 52661
loss: 1.0556273460388184,grad_norm: 0.9999995921134394, iteration: 52662
loss: 1.0052268505096436,grad_norm: 0.999999161782573, iteration: 52663
loss: 0.9897387027740479,grad_norm: 0.9999991118994414, iteration: 52664
loss: 1.0323501825332642,grad_norm: 0.877541287795369, iteration: 52665
loss: 0.9785353541374207,grad_norm: 0.9999990927062761, iteration: 52666
loss: 0.9728848338127136,grad_norm: 0.9999990859511773, iteration: 52667
loss: 1.0080070495605469,grad_norm: 0.9999991698712936, iteration: 52668
loss: 0.9771230220794678,grad_norm: 0.9999996485327867, iteration: 52669
loss: 1.0518988370895386,grad_norm: 0.8630452422412842, iteration: 52670
loss: 0.984504222869873,grad_norm: 0.9999990856537412, iteration: 52671
loss: 0.9960054159164429,grad_norm: 0.957151666495863, iteration: 52672
loss: 1.3131964206695557,grad_norm: 0.9999999830706693, iteration: 52673
loss: 1.0040518045425415,grad_norm: 0.9999992504197496, iteration: 52674
loss: 0.9890813827514648,grad_norm: 0.9228948390346039, iteration: 52675
loss: 1.030704379081726,grad_norm: 0.9999995083830467, iteration: 52676
loss: 1.065903902053833,grad_norm: 0.9999995375656864, iteration: 52677
loss: 1.0224803686141968,grad_norm: 0.9999991824032746, iteration: 52678
loss: 1.0007485151290894,grad_norm: 0.9999994665928222, iteration: 52679
loss: 0.9985201954841614,grad_norm: 0.8491838535732457, iteration: 52680
loss: 0.9692386388778687,grad_norm: 0.9999991912561373, iteration: 52681
loss: 0.9756582379341125,grad_norm: 0.9768412549814778, iteration: 52682
loss: 1.0160189867019653,grad_norm: 0.9999993087691657, iteration: 52683
loss: 0.9874746203422546,grad_norm: 0.8632027866941954, iteration: 52684
loss: 0.9828184247016907,grad_norm: 0.8730860744787066, iteration: 52685
loss: 1.045723557472229,grad_norm: 0.9999992684347723, iteration: 52686
loss: 1.0454480648040771,grad_norm: 0.9999992399085417, iteration: 52687
loss: 1.0540931224822998,grad_norm: 0.9999995084721486, iteration: 52688
loss: 0.9730468392372131,grad_norm: 0.9218338821008126, iteration: 52689
loss: 0.997847318649292,grad_norm: 0.9999993539699542, iteration: 52690
loss: 1.0921672582626343,grad_norm: 0.8487413698134748, iteration: 52691
loss: 1.0076701641082764,grad_norm: 0.8120756706934678, iteration: 52692
loss: 0.982332706451416,grad_norm: 0.9116810134905889, iteration: 52693
loss: 1.043463110923767,grad_norm: 0.9772765914803605, iteration: 52694
loss: 1.0534920692443848,grad_norm: 0.9999994971345653, iteration: 52695
loss: 0.9689803719520569,grad_norm: 0.9999990998720297, iteration: 52696
loss: 1.072821021080017,grad_norm: 0.9999993849625444, iteration: 52697
loss: 1.0224609375,grad_norm: 0.9419503773324828, iteration: 52698
loss: 0.9737057089805603,grad_norm: 0.9356408501004222, iteration: 52699
loss: 1.0188062191009521,grad_norm: 0.9030056163727789, iteration: 52700
loss: 1.0078024864196777,grad_norm: 0.8836841642808899, iteration: 52701
loss: 0.9858614802360535,grad_norm: 0.9251893276392923, iteration: 52702
loss: 0.9961128830909729,grad_norm: 0.9323355015427509, iteration: 52703
loss: 1.0166975259780884,grad_norm: 0.9158812991567236, iteration: 52704
loss: 1.0374864339828491,grad_norm: 0.9999994654960505, iteration: 52705
loss: 1.0074809789657593,grad_norm: 0.999999484798521, iteration: 52706
loss: 1.0132331848144531,grad_norm: 0.7723435394090529, iteration: 52707
loss: 1.0203896760940552,grad_norm: 0.9999991827240055, iteration: 52708
loss: 1.025139570236206,grad_norm: 0.999999020866879, iteration: 52709
loss: 1.095381736755371,grad_norm: 0.999999763128544, iteration: 52710
loss: 1.0638115406036377,grad_norm: 0.9517250951610853, iteration: 52711
loss: 1.0134556293487549,grad_norm: 0.999999168655458, iteration: 52712
loss: 0.962780237197876,grad_norm: 0.9016775835987209, iteration: 52713
loss: 0.9695197939872742,grad_norm: 0.8990838409653371, iteration: 52714
loss: 1.0013505220413208,grad_norm: 0.9999991797667275, iteration: 52715
loss: 0.9648069143295288,grad_norm: 0.9999996456939403, iteration: 52716
loss: 1.0394246578216553,grad_norm: 0.9496732088413616, iteration: 52717
loss: 1.0949609279632568,grad_norm: 0.9999993431191334, iteration: 52718
loss: 0.9895351529121399,grad_norm: 0.8094427098489305, iteration: 52719
loss: 0.9434704184532166,grad_norm: 0.9233933477630959, iteration: 52720
loss: 0.9878332018852234,grad_norm: 0.7647008651724677, iteration: 52721
loss: 1.05148446559906,grad_norm: 0.9999991581563576, iteration: 52722
loss: 0.9664456248283386,grad_norm: 0.9693039882815989, iteration: 52723
loss: 0.997099757194519,grad_norm: 0.9999994570430338, iteration: 52724
loss: 1.031379222869873,grad_norm: 0.9999994078383068, iteration: 52725
loss: 1.005550742149353,grad_norm: 0.9330199723308488, iteration: 52726
loss: 1.01731538772583,grad_norm: 0.9549095360737068, iteration: 52727
loss: 0.9766971468925476,grad_norm: 0.8796045333843378, iteration: 52728
loss: 0.9859326481819153,grad_norm: 0.9171263659836137, iteration: 52729
loss: 1.0082553625106812,grad_norm: 0.9769478149743985, iteration: 52730
loss: 0.9987739324569702,grad_norm: 0.7956419251341931, iteration: 52731
loss: 1.1161028146743774,grad_norm: 0.9999992095219874, iteration: 52732
loss: 1.0386314392089844,grad_norm: 0.999999193058007, iteration: 52733
loss: 0.971177875995636,grad_norm: 0.999999238017661, iteration: 52734
loss: 0.9831374287605286,grad_norm: 0.7990199582316929, iteration: 52735
loss: 1.06020188331604,grad_norm: 0.9999996038683256, iteration: 52736
loss: 0.969044029712677,grad_norm: 0.9165297439628407, iteration: 52737
loss: 1.0041778087615967,grad_norm: 0.8791699854666355, iteration: 52738
loss: 1.019188642501831,grad_norm: 0.9999991209714061, iteration: 52739
loss: 0.9948779940605164,grad_norm: 0.9999991537951961, iteration: 52740
loss: 1.0215502977371216,grad_norm: 0.7654993816835554, iteration: 52741
loss: 1.0738353729248047,grad_norm: 0.9999995369883851, iteration: 52742
loss: 0.970493733882904,grad_norm: 0.8411536098965131, iteration: 52743
loss: 1.005986213684082,grad_norm: 0.9999996263695854, iteration: 52744
loss: 0.9970512986183167,grad_norm: 0.9999995074932045, iteration: 52745
loss: 1.0059924125671387,grad_norm: 0.9999996072709181, iteration: 52746
loss: 1.0037769079208374,grad_norm: 0.8115901301206827, iteration: 52747
loss: 0.9928681254386902,grad_norm: 0.999999096234859, iteration: 52748
loss: 1.0678154230117798,grad_norm: 0.9999994539251909, iteration: 52749
loss: 0.9717153906822205,grad_norm: 0.8176419339451089, iteration: 52750
loss: 1.042211651802063,grad_norm: 0.9999992004404576, iteration: 52751
loss: 0.9524734020233154,grad_norm: 0.9999991719067655, iteration: 52752
loss: 0.9953181147575378,grad_norm: 0.8910224769430835, iteration: 52753
loss: 1.1025547981262207,grad_norm: 0.9999993665673862, iteration: 52754
loss: 1.0716142654418945,grad_norm: 0.9999993900696179, iteration: 52755
loss: 0.9962684512138367,grad_norm: 0.8479417521991822, iteration: 52756
loss: 1.0265824794769287,grad_norm: 0.8158681961693411, iteration: 52757
loss: 1.054185152053833,grad_norm: 0.9999993641146628, iteration: 52758
loss: 1.0024574995040894,grad_norm: 0.9999991922217782, iteration: 52759
loss: 1.009290337562561,grad_norm: 0.9999997923383721, iteration: 52760
loss: 1.018893837928772,grad_norm: 0.906994286257552, iteration: 52761
loss: 1.0139265060424805,grad_norm: 0.8936459577377623, iteration: 52762
loss: 1.010071873664856,grad_norm: 0.8986932191458167, iteration: 52763
loss: 1.0619348287582397,grad_norm: 0.9999990496137979, iteration: 52764
loss: 1.064037561416626,grad_norm: 0.9999995030042899, iteration: 52765
loss: 0.9825159907341003,grad_norm: 0.7779787963131762, iteration: 52766
loss: 1.0052896738052368,grad_norm: 0.8478236464267869, iteration: 52767
loss: 1.0624076128005981,grad_norm: 0.9999996184148024, iteration: 52768
loss: 1.012403964996338,grad_norm: 0.9999991608327048, iteration: 52769
loss: 1.0552284717559814,grad_norm: 0.9999999565960255, iteration: 52770
loss: 1.0299125909805298,grad_norm: 0.9999992584747037, iteration: 52771
loss: 0.9901392459869385,grad_norm: 0.8714819519240665, iteration: 52772
loss: 1.0234930515289307,grad_norm: 0.99999903771263, iteration: 52773
loss: 1.0440012216567993,grad_norm: 0.9999991293416188, iteration: 52774
loss: 1.045066237449646,grad_norm: 0.9999998100784908, iteration: 52775
loss: 1.1495277881622314,grad_norm: 0.9999994285894177, iteration: 52776
loss: 1.1020110845565796,grad_norm: 0.9999993329029818, iteration: 52777
loss: 1.0006554126739502,grad_norm: 0.8940792333859018, iteration: 52778
loss: 0.9827229380607605,grad_norm: 0.9999993330234878, iteration: 52779
loss: 0.9971614480018616,grad_norm: 0.8310827368368469, iteration: 52780
loss: 1.03068208694458,grad_norm: 0.9999994029739945, iteration: 52781
loss: 1.0390124320983887,grad_norm: 0.9999992184946443, iteration: 52782
loss: 1.0871086120605469,grad_norm: 0.9999999341565662, iteration: 52783
loss: 1.0153740644454956,grad_norm: 0.9999990906942487, iteration: 52784
loss: 1.0036474466323853,grad_norm: 0.9999998292450036, iteration: 52785
loss: 1.0013905763626099,grad_norm: 0.8378694047246141, iteration: 52786
loss: 1.0079330205917358,grad_norm: 0.7425764685159288, iteration: 52787
loss: 0.9932945966720581,grad_norm: 0.7901762829377625, iteration: 52788
loss: 1.2264870405197144,grad_norm: 0.9999997695304772, iteration: 52789
loss: 1.008224368095398,grad_norm: 0.9653410614933895, iteration: 52790
loss: 1.0074976682662964,grad_norm: 0.9999991512469693, iteration: 52791
loss: 1.0046172142028809,grad_norm: 0.9999991762836047, iteration: 52792
loss: 1.0097167491912842,grad_norm: 0.9600468372638477, iteration: 52793
loss: 1.048229455947876,grad_norm: 0.9999991284584183, iteration: 52794
loss: 0.9674739837646484,grad_norm: 0.9999994144375426, iteration: 52795
loss: 0.9733529686927795,grad_norm: 0.9999991121342882, iteration: 52796
loss: 0.9942091107368469,grad_norm: 0.8507128453280784, iteration: 52797
loss: 1.0024992227554321,grad_norm: 0.9393517990510524, iteration: 52798
loss: 1.0891008377075195,grad_norm: 0.999999874491328, iteration: 52799
loss: 1.0648857355117798,grad_norm: 0.9999995396202542, iteration: 52800
loss: 1.010069727897644,grad_norm: 0.9999990227739797, iteration: 52801
loss: 1.019517421722412,grad_norm: 0.999999026809268, iteration: 52802
loss: 1.0263382196426392,grad_norm: 0.8384407232128089, iteration: 52803
loss: 1.0168551206588745,grad_norm: 0.9999991835456192, iteration: 52804
loss: 0.9880585074424744,grad_norm: 0.9864588169516362, iteration: 52805
loss: 1.059653401374817,grad_norm: 0.9999995549500663, iteration: 52806
loss: 0.999006450176239,grad_norm: 0.9613751399194899, iteration: 52807
loss: 0.9667568206787109,grad_norm: 0.9708114462644308, iteration: 52808
loss: 0.989130437374115,grad_norm: 0.8669378705833577, iteration: 52809
loss: 0.9860795736312866,grad_norm: 0.9586018456681034, iteration: 52810
loss: 1.0227943658828735,grad_norm: 0.8447025716211938, iteration: 52811
loss: 1.0074949264526367,grad_norm: 0.9999992265648118, iteration: 52812
loss: 1.0248329639434814,grad_norm: 0.9474075135530786, iteration: 52813
loss: 0.9494079351425171,grad_norm: 0.7671675282857592, iteration: 52814
loss: 1.0677515268325806,grad_norm: 0.9999998357805241, iteration: 52815
loss: 1.0230690240859985,grad_norm: 0.8824744755140034, iteration: 52816
loss: 1.0387707948684692,grad_norm: 0.9999997244559689, iteration: 52817
loss: 0.9898098707199097,grad_norm: 0.9247955591594247, iteration: 52818
loss: 0.9677390456199646,grad_norm: 0.9999991586678298, iteration: 52819
loss: 1.0177149772644043,grad_norm: 0.9999991741095061, iteration: 52820
loss: 1.0229911804199219,grad_norm: 0.7433628599834199, iteration: 52821
loss: 0.9972267746925354,grad_norm: 0.8485901783965222, iteration: 52822
loss: 1.002190113067627,grad_norm: 0.8623804282705652, iteration: 52823
loss: 1.0081806182861328,grad_norm: 0.9999990266530637, iteration: 52824
loss: 0.9603430032730103,grad_norm: 0.9999994680350262, iteration: 52825
loss: 1.0055413246154785,grad_norm: 0.8692793281165931, iteration: 52826
loss: 0.98515385389328,grad_norm: 0.9146348057434477, iteration: 52827
loss: 1.1238294839859009,grad_norm: 0.9999996225031064, iteration: 52828
loss: 1.024642825126648,grad_norm: 0.7975891037376546, iteration: 52829
loss: 1.0013247728347778,grad_norm: 0.9999991997033248, iteration: 52830
loss: 0.9865162372589111,grad_norm: 0.8679014809751024, iteration: 52831
loss: 0.9939233064651489,grad_norm: 0.9674874409067767, iteration: 52832
loss: 1.2523913383483887,grad_norm: 0.9999999791105365, iteration: 52833
loss: 1.0485525131225586,grad_norm: 0.9999996403481367, iteration: 52834
loss: 1.024074673652649,grad_norm: 0.7537621387433151, iteration: 52835
loss: 1.062771201133728,grad_norm: 0.9999992577507422, iteration: 52836
loss: 0.9882272481918335,grad_norm: 0.9129646589760324, iteration: 52837
loss: 1.0306551456451416,grad_norm: 0.9999995210616518, iteration: 52838
loss: 1.0074944496154785,grad_norm: 0.999999217452327, iteration: 52839
loss: 1.0113730430603027,grad_norm: 0.9399826407006914, iteration: 52840
loss: 1.0167479515075684,grad_norm: 0.9349345520617184, iteration: 52841
loss: 1.0283219814300537,grad_norm: 0.9999992462525796, iteration: 52842
loss: 1.0215187072753906,grad_norm: 0.9999992987118245, iteration: 52843
loss: 1.063016414642334,grad_norm: 0.9999992186407275, iteration: 52844
loss: 0.9716561436653137,grad_norm: 0.9999990251368062, iteration: 52845
loss: 1.0660547018051147,grad_norm: 0.9999991733196674, iteration: 52846
loss: 0.9904475808143616,grad_norm: 0.9999991677554806, iteration: 52847
loss: 0.9946568608283997,grad_norm: 0.8255251143835004, iteration: 52848
loss: 0.9648846983909607,grad_norm: 0.9999996068610381, iteration: 52849
loss: 1.0172065496444702,grad_norm: 0.9999991799939079, iteration: 52850
loss: 1.0066977739334106,grad_norm: 0.9999991430618951, iteration: 52851
loss: 0.9978640675544739,grad_norm: 0.9472651163028256, iteration: 52852
loss: 1.0511432886123657,grad_norm: 0.9999990511707788, iteration: 52853
loss: 0.9805309772491455,grad_norm: 0.9999992520324544, iteration: 52854
loss: 1.0219930410385132,grad_norm: 0.9775369643402466, iteration: 52855
loss: 1.0041310787200928,grad_norm: 0.9999990956862248, iteration: 52856
loss: 1.3836113214492798,grad_norm: 0.9999997546351752, iteration: 52857
loss: 0.9707997441291809,grad_norm: 0.999999146150319, iteration: 52858
loss: 1.0667961835861206,grad_norm: 0.9999993128368307, iteration: 52859
loss: 0.9998788237571716,grad_norm: 0.9999993324286824, iteration: 52860
loss: 0.9843807816505432,grad_norm: 0.9999992382856683, iteration: 52861
loss: 1.0283410549163818,grad_norm: 0.9999993494188747, iteration: 52862
loss: 1.1334402561187744,grad_norm: 0.9999997880595133, iteration: 52863
loss: 1.0157853364944458,grad_norm: 0.9999994492014922, iteration: 52864
loss: 0.9599207043647766,grad_norm: 0.8743814853557613, iteration: 52865
loss: 0.9719172120094299,grad_norm: 0.9640767368433859, iteration: 52866
loss: 1.1741743087768555,grad_norm: 0.9999999226680166, iteration: 52867
loss: 1.1593068838119507,grad_norm: 0.9999995766019837, iteration: 52868
loss: 1.0944490432739258,grad_norm: 0.9999991000768528, iteration: 52869
loss: 1.0976321697235107,grad_norm: 0.9999998731646924, iteration: 52870
loss: 1.037043809890747,grad_norm: 0.9999996588344553, iteration: 52871
loss: 1.010982871055603,grad_norm: 0.9614843690782509, iteration: 52872
loss: 1.0233372449874878,grad_norm: 0.9999995556641245, iteration: 52873
loss: 1.0295062065124512,grad_norm: 0.9999998798910609, iteration: 52874
loss: 1.1394846439361572,grad_norm: 0.999999046395653, iteration: 52875
loss: 1.253253698348999,grad_norm: 0.9999996480802326, iteration: 52876
loss: 1.0002765655517578,grad_norm: 0.8965950555693207, iteration: 52877
loss: 1.215608835220337,grad_norm: 0.9999995447192893, iteration: 52878
loss: 0.9923388957977295,grad_norm: 0.9095421080722494, iteration: 52879
loss: 1.3569809198379517,grad_norm: 0.9999997786054738, iteration: 52880
loss: 1.0665533542633057,grad_norm: 0.9999991695408991, iteration: 52881
loss: 0.9446741938591003,grad_norm: 0.9872563913426574, iteration: 52882
loss: 1.0138717889785767,grad_norm: 0.863965455685673, iteration: 52883
loss: 1.10642409324646,grad_norm: 0.9999993656507755, iteration: 52884
loss: 1.039959192276001,grad_norm: 0.9999996256065505, iteration: 52885
loss: 1.0390797853469849,grad_norm: 0.9999990082289486, iteration: 52886
loss: 1.4002569913864136,grad_norm: 0.9999996345565245, iteration: 52887
loss: 1.0712761878967285,grad_norm: 0.999999718821384, iteration: 52888
loss: 1.10382080078125,grad_norm: 0.9999997381875056, iteration: 52889
loss: 1.237600564956665,grad_norm: 0.9999993143418376, iteration: 52890
loss: 1.0155813694000244,grad_norm: 0.9999993711231852, iteration: 52891
loss: 1.085708498954773,grad_norm: 0.9999997520000288, iteration: 52892
loss: 1.0394906997680664,grad_norm: 0.9999992583030181, iteration: 52893
loss: 1.0320533514022827,grad_norm: 0.9840174551629862, iteration: 52894
loss: 1.0318443775177002,grad_norm: 0.9999993723258261, iteration: 52895
loss: 0.963373064994812,grad_norm: 0.9999991712004802, iteration: 52896
loss: 1.3481369018554688,grad_norm: 0.9999997636564405, iteration: 52897
loss: 1.0838429927825928,grad_norm: 0.9999994397845157, iteration: 52898
loss: 1.0226174592971802,grad_norm: 0.9999989887785462, iteration: 52899
loss: 1.0903797149658203,grad_norm: 0.999999345103049, iteration: 52900
loss: 1.1366368532180786,grad_norm: 0.999999541132061, iteration: 52901
loss: 1.0313953161239624,grad_norm: 0.9999990362719389, iteration: 52902
loss: 1.04901123046875,grad_norm: 0.9999996137635052, iteration: 52903
loss: 1.1373648643493652,grad_norm: 0.9999997463147781, iteration: 52904
loss: 1.0473244190216064,grad_norm: 0.9999998520140201, iteration: 52905
loss: 1.0592330694198608,grad_norm: 0.9416835910643896, iteration: 52906
loss: 1.0892008543014526,grad_norm: 0.9999997982787561, iteration: 52907
loss: 1.1578774452209473,grad_norm: 0.9999997858035088, iteration: 52908
loss: 1.003987193107605,grad_norm: 0.8870496873206739, iteration: 52909
loss: 1.119117021560669,grad_norm: 0.9999993904793751, iteration: 52910
loss: 1.0181653499603271,grad_norm: 0.9999992939598492, iteration: 52911
loss: 1.1129138469696045,grad_norm: 0.9999993919840761, iteration: 52912
loss: 1.0157521963119507,grad_norm: 0.8770475553442272, iteration: 52913
loss: 0.9880859851837158,grad_norm: 0.9999990931415271, iteration: 52914
loss: 1.0048683881759644,grad_norm: 0.8818296345601436, iteration: 52915
loss: 1.0292129516601562,grad_norm: 0.999999287555352, iteration: 52916
loss: 1.1248801946640015,grad_norm: 0.999999611654025, iteration: 52917
loss: 1.087355375289917,grad_norm: 0.9999994601082624, iteration: 52918
loss: 0.9877114295959473,grad_norm: 0.9999992347086105, iteration: 52919
loss: 1.0224746465682983,grad_norm: 0.9999991347609579, iteration: 52920
loss: 1.0127735137939453,grad_norm: 0.9999992132391786, iteration: 52921
loss: 1.0487664937973022,grad_norm: 0.9999994820735617, iteration: 52922
loss: 1.1697975397109985,grad_norm: 0.9999995580728928, iteration: 52923
loss: 1.0091300010681152,grad_norm: 0.9024724382091499, iteration: 52924
loss: 1.125533938407898,grad_norm: 0.9999998880306027, iteration: 52925
loss: 1.0200011730194092,grad_norm: 0.9999992032254741, iteration: 52926
loss: 1.0297892093658447,grad_norm: 0.9999994616098606, iteration: 52927
loss: 1.0495294332504272,grad_norm: 0.9999990757394236, iteration: 52928
loss: 1.0267444849014282,grad_norm: 0.9999995699517009, iteration: 52929
loss: 1.085900068283081,grad_norm: 0.99999969309265, iteration: 52930
loss: 1.0070788860321045,grad_norm: 0.999999073798111, iteration: 52931
loss: 0.9817696213722229,grad_norm: 0.8437248966877927, iteration: 52932
loss: 1.0107837915420532,grad_norm: 0.9999990574008746, iteration: 52933
loss: 1.0294004678726196,grad_norm: 0.9999998939520957, iteration: 52934
loss: 1.2003567218780518,grad_norm: 0.9999999621099541, iteration: 52935
loss: 1.0855427980422974,grad_norm: 0.9999996070703733, iteration: 52936
loss: 0.9959819316864014,grad_norm: 0.9999991805631908, iteration: 52937
loss: 1.0293079614639282,grad_norm: 0.9291970615401395, iteration: 52938
loss: 1.0160300731658936,grad_norm: 0.9999993836436385, iteration: 52939
loss: 1.0634195804595947,grad_norm: 0.9999994335287625, iteration: 52940
loss: 1.004311442375183,grad_norm: 0.984727305647722, iteration: 52941
loss: 1.034838080406189,grad_norm: 0.9999998028488907, iteration: 52942
loss: 0.9844825267791748,grad_norm: 0.9364175747229678, iteration: 52943
loss: 0.9735405445098877,grad_norm: 0.8738787498314277, iteration: 52944
loss: 1.0527724027633667,grad_norm: 0.999999221093929, iteration: 52945
loss: 1.0203099250793457,grad_norm: 0.86369001987398, iteration: 52946
loss: 1.001391887664795,grad_norm: 0.941851271606009, iteration: 52947
loss: 0.9959041476249695,grad_norm: 0.9999990833429219, iteration: 52948
loss: 1.0597078800201416,grad_norm: 0.9999992006742585, iteration: 52949
loss: 1.0581973791122437,grad_norm: 0.9999993211539108, iteration: 52950
loss: 1.0426198244094849,grad_norm: 0.9999992085690802, iteration: 52951
loss: 1.0095314979553223,grad_norm: 0.9660806873004627, iteration: 52952
loss: 0.9951207637786865,grad_norm: 0.9943320405783493, iteration: 52953
loss: 1.0104254484176636,grad_norm: 0.9999995263952781, iteration: 52954
loss: 0.9863585829734802,grad_norm: 0.962461439157995, iteration: 52955
loss: 1.0056872367858887,grad_norm: 0.9999990293891974, iteration: 52956
loss: 0.9942132234573364,grad_norm: 0.8405801827721553, iteration: 52957
loss: 1.007415533065796,grad_norm: 0.9999990559663003, iteration: 52958
loss: 1.0213520526885986,grad_norm: 0.9999994626386349, iteration: 52959
loss: 0.9946362376213074,grad_norm: 0.999999213135757, iteration: 52960
loss: 1.0497965812683105,grad_norm: 0.9999990791375802, iteration: 52961
loss: 0.9594393968582153,grad_norm: 0.8185667624318698, iteration: 52962
loss: 1.0223543643951416,grad_norm: 0.9999994873358399, iteration: 52963
loss: 0.9869515299797058,grad_norm: 0.7460519084402227, iteration: 52964
loss: 1.0042080879211426,grad_norm: 0.9999995393921178, iteration: 52965
loss: 0.9916113615036011,grad_norm: 0.9117486838182598, iteration: 52966
loss: 1.0481454133987427,grad_norm: 0.9999990791662041, iteration: 52967
loss: 0.9717925786972046,grad_norm: 0.8130622764992275, iteration: 52968
loss: 1.0910743474960327,grad_norm: 0.9999997762901774, iteration: 52969
loss: 1.005799412727356,grad_norm: 0.9999993393737842, iteration: 52970
loss: 0.9890027642250061,grad_norm: 0.8789491420561522, iteration: 52971
loss: 1.051779866218567,grad_norm: 0.9999991438484332, iteration: 52972
loss: 1.051222562789917,grad_norm: 0.9999998108759546, iteration: 52973
loss: 1.067596435546875,grad_norm: 0.9999994540323962, iteration: 52974
loss: 0.9797912836074829,grad_norm: 0.9999991713535311, iteration: 52975
loss: 0.9851472973823547,grad_norm: 0.9769794481255584, iteration: 52976
loss: 0.9711339473724365,grad_norm: 0.9999995163893731, iteration: 52977
loss: 1.0402812957763672,grad_norm: 0.9999991301037054, iteration: 52978
loss: 0.9368115663528442,grad_norm: 0.9759646320070517, iteration: 52979
loss: 1.0135751962661743,grad_norm: 0.8357440920988326, iteration: 52980
loss: 0.958745002746582,grad_norm: 0.8950968206250695, iteration: 52981
loss: 1.0596457719802856,grad_norm: 0.9999990136891191, iteration: 52982
loss: 0.9927483201026917,grad_norm: 0.8966422113970067, iteration: 52983
loss: 1.0425957441329956,grad_norm: 0.9032768237773398, iteration: 52984
loss: 1.174533724784851,grad_norm: 0.9999998861914988, iteration: 52985
loss: 1.0227160453796387,grad_norm: 0.8439847232630724, iteration: 52986
loss: 1.0011329650878906,grad_norm: 0.9730961340061247, iteration: 52987
loss: 1.0654972791671753,grad_norm: 0.9999992624086474, iteration: 52988
loss: 1.0055668354034424,grad_norm: 0.9468665823737418, iteration: 52989
loss: 0.98656165599823,grad_norm: 0.999999248165691, iteration: 52990
loss: 0.9977608323097229,grad_norm: 0.9682707867768675, iteration: 52991
loss: 0.998652458190918,grad_norm: 0.9999994213092024, iteration: 52992
loss: 1.1377826929092407,grad_norm: 0.9999998151518293, iteration: 52993
loss: 1.0317960977554321,grad_norm: 0.9999992657447037, iteration: 52994
loss: 1.0100390911102295,grad_norm: 0.8764083916463201, iteration: 52995
loss: 1.036533236503601,grad_norm: 0.999999472353162, iteration: 52996
loss: 1.0197986364364624,grad_norm: 0.9999991779681793, iteration: 52997
loss: 1.0221494436264038,grad_norm: 0.9718365789355484, iteration: 52998
loss: 1.0946199893951416,grad_norm: 0.9999993096661627, iteration: 52999
loss: 1.0287213325500488,grad_norm: 0.9999992660755453, iteration: 53000
loss: 1.0123960971832275,grad_norm: 0.9350949069783243, iteration: 53001
loss: 0.9843760132789612,grad_norm: 0.7821872137836686, iteration: 53002
loss: 1.1017652750015259,grad_norm: 0.9999998052622399, iteration: 53003
loss: 0.980841338634491,grad_norm: 0.9999989177815434, iteration: 53004
loss: 1.0935297012329102,grad_norm: 0.9999995355579178, iteration: 53005
loss: 1.013532042503357,grad_norm: 0.9059755120418447, iteration: 53006
loss: 1.098012924194336,grad_norm: 0.9999999258842986, iteration: 53007
loss: 1.2059361934661865,grad_norm: 0.9999994531587297, iteration: 53008
loss: 1.0066781044006348,grad_norm: 0.8117762507346551, iteration: 53009
loss: 1.0340734720230103,grad_norm: 0.9999991893993925, iteration: 53010
loss: 1.0675758123397827,grad_norm: 0.999999474195981, iteration: 53011
loss: 1.036974310874939,grad_norm: 0.9999997004569722, iteration: 53012
loss: 1.054864764213562,grad_norm: 0.9999991957268041, iteration: 53013
loss: 0.9975427985191345,grad_norm: 0.9999991146749195, iteration: 53014
loss: 1.0254005193710327,grad_norm: 0.9999992617396433, iteration: 53015
loss: 1.0226885080337524,grad_norm: 0.9999990889137188, iteration: 53016
loss: 1.0249367952346802,grad_norm: 0.9999993440350987, iteration: 53017
loss: 1.0684289932250977,grad_norm: 0.9999994605295486, iteration: 53018
loss: 0.9853593707084656,grad_norm: 0.8139156905811775, iteration: 53019
loss: 1.0122625827789307,grad_norm: 0.9999998261344608, iteration: 53020
loss: 0.9981527924537659,grad_norm: 0.8845401226395366, iteration: 53021
loss: 0.9943415522575378,grad_norm: 0.8915951954755604, iteration: 53022
loss: 0.9963367581367493,grad_norm: 0.9925449338185649, iteration: 53023
loss: 1.008341670036316,grad_norm: 0.9999993761757581, iteration: 53024
loss: 0.999219536781311,grad_norm: 0.9999994682663158, iteration: 53025
loss: 0.968701958656311,grad_norm: 0.9195871724386542, iteration: 53026
loss: 0.9938955307006836,grad_norm: 0.9226579612814623, iteration: 53027
loss: 1.0548038482666016,grad_norm: 0.9999996449843057, iteration: 53028
loss: 0.9775850176811218,grad_norm: 0.9999991959232485, iteration: 53029
loss: 0.9970397353172302,grad_norm: 0.9724632099855001, iteration: 53030
loss: 0.9996823668479919,grad_norm: 0.9999990923479318, iteration: 53031
loss: 1.238432765007019,grad_norm: 0.9999996556658376, iteration: 53032
loss: 0.985526442527771,grad_norm: 0.8759960087784587, iteration: 53033
loss: 0.9956125020980835,grad_norm: 0.8835228926791382, iteration: 53034
loss: 0.983957827091217,grad_norm: 0.9999990106736105, iteration: 53035
loss: 1.0134546756744385,grad_norm: 0.9999996065920931, iteration: 53036
loss: 0.9402373433113098,grad_norm: 0.9935115164195335, iteration: 53037
loss: 0.9962843060493469,grad_norm: 0.9999998059444803, iteration: 53038
loss: 1.191072940826416,grad_norm: 0.999999750165941, iteration: 53039
loss: 1.1536754369735718,grad_norm: 0.9999994803378166, iteration: 53040
loss: 0.969218909740448,grad_norm: 0.9693581373403137, iteration: 53041
loss: 1.0667859315872192,grad_norm: 0.9999991975239386, iteration: 53042
loss: 1.0186973810195923,grad_norm: 0.9846945623454126, iteration: 53043
loss: 0.9872535467147827,grad_norm: 0.9867020933052557, iteration: 53044
loss: 1.0399229526519775,grad_norm: 0.9999991678310451, iteration: 53045
loss: 0.9961233139038086,grad_norm: 0.9113975031378435, iteration: 53046
loss: 0.9784789085388184,grad_norm: 0.8348908911935308, iteration: 53047
loss: 1.014151692390442,grad_norm: 0.9999991689912507, iteration: 53048
loss: 0.9548380374908447,grad_norm: 0.9999991145617994, iteration: 53049
loss: 1.0243562459945679,grad_norm: 0.999999551870427, iteration: 53050
loss: 0.944385290145874,grad_norm: 0.9699842103130276, iteration: 53051
loss: 1.0254464149475098,grad_norm: 0.9999994668509273, iteration: 53052
loss: 1.030197262763977,grad_norm: 0.9999990548405131, iteration: 53053
loss: 1.0195651054382324,grad_norm: 0.9126974121325597, iteration: 53054
loss: 1.0439441204071045,grad_norm: 0.9999991374782079, iteration: 53055
loss: 0.9946250915527344,grad_norm: 0.9067512396475333, iteration: 53056
loss: 1.0022159814834595,grad_norm: 0.9999992557456717, iteration: 53057
loss: 0.9900380373001099,grad_norm: 0.8758443423233914, iteration: 53058
loss: 0.9955713748931885,grad_norm: 0.8876207722084243, iteration: 53059
loss: 1.1120539903640747,grad_norm: 0.9999995739393539, iteration: 53060
loss: 1.0072276592254639,grad_norm: 0.8779195192770238, iteration: 53061
loss: 1.049095869064331,grad_norm: 0.9999991112944971, iteration: 53062
loss: 1.014652967453003,grad_norm: 0.9999991453106569, iteration: 53063
loss: 1.0234955549240112,grad_norm: 0.9999989933063891, iteration: 53064
loss: 1.0040279626846313,grad_norm: 0.9554246011279932, iteration: 53065
loss: 1.1177988052368164,grad_norm: 0.9999996421823537, iteration: 53066
loss: 0.9828597903251648,grad_norm: 0.9999992161969714, iteration: 53067
loss: 1.0205341577529907,grad_norm: 0.9999990563743211, iteration: 53068
loss: 1.0314972400665283,grad_norm: 0.9999992315034112, iteration: 53069
loss: 1.0180864334106445,grad_norm: 0.9999991297194816, iteration: 53070
loss: 1.129337191581726,grad_norm: 0.9999998745765184, iteration: 53071
loss: 0.9905518889427185,grad_norm: 0.9999990165283532, iteration: 53072
loss: 1.0253406763076782,grad_norm: 0.9999991926927124, iteration: 53073
loss: 1.0443183183670044,grad_norm: 1.0000000576457315, iteration: 53074
loss: 1.0737278461456299,grad_norm: 0.999999488074684, iteration: 53075
loss: 1.0351406335830688,grad_norm: 0.9999993765242137, iteration: 53076
loss: 0.9539386630058289,grad_norm: 0.9453823722922652, iteration: 53077
loss: 0.9905847907066345,grad_norm: 0.9285185714148818, iteration: 53078
loss: 1.0107665061950684,grad_norm: 0.9999995853143244, iteration: 53079
loss: 0.9644624590873718,grad_norm: 0.8665716448186784, iteration: 53080
loss: 0.9890778660774231,grad_norm: 0.9979237430907532, iteration: 53081
loss: 1.0235189199447632,grad_norm: 0.8340335228929562, iteration: 53082
loss: 1.0620044469833374,grad_norm: 0.9999990107683399, iteration: 53083
loss: 1.075357437133789,grad_norm: 0.9999997022373001, iteration: 53084
loss: 1.016501784324646,grad_norm: 0.9524694370457885, iteration: 53085
loss: 1.191612720489502,grad_norm: 0.9999995773574626, iteration: 53086
loss: 1.0026432275772095,grad_norm: 0.8599764293026801, iteration: 53087
loss: 1.021851897239685,grad_norm: 0.9999992470745798, iteration: 53088
loss: 1.0236254930496216,grad_norm: 0.9999994570014717, iteration: 53089
loss: 0.9860898852348328,grad_norm: 0.9292641456318508, iteration: 53090
loss: 1.0728657245635986,grad_norm: 0.9999996530864268, iteration: 53091
loss: 1.0449039936065674,grad_norm: 0.9999994708383806, iteration: 53092
loss: 1.002288579940796,grad_norm: 0.921547794607795, iteration: 53093
loss: 1.016461968421936,grad_norm: 0.9999990847549731, iteration: 53094
loss: 0.9887773394584656,grad_norm: 0.9999992002434578, iteration: 53095
loss: 0.9464508295059204,grad_norm: 0.8833181527709, iteration: 53096
loss: 1.0933631658554077,grad_norm: 0.9999997828857644, iteration: 53097
loss: 1.0610640048980713,grad_norm: 0.9999991105719134, iteration: 53098
loss: 1.0609773397445679,grad_norm: 0.9999993272364621, iteration: 53099
loss: 0.9977922439575195,grad_norm: 0.9999991550461851, iteration: 53100
loss: 1.0766736268997192,grad_norm: 0.9143115861784451, iteration: 53101
loss: 1.029126763343811,grad_norm: 0.8412621193505765, iteration: 53102
loss: 1.0022464990615845,grad_norm: 0.9999994813004177, iteration: 53103
loss: 1.043766975402832,grad_norm: 0.999999496419457, iteration: 53104
loss: 1.023542046546936,grad_norm: 0.9999993215328755, iteration: 53105
loss: 1.050184726715088,grad_norm: 0.9314837362368007, iteration: 53106
loss: 0.9790390729904175,grad_norm: 0.999998994296098, iteration: 53107
loss: 1.0553549528121948,grad_norm: 0.999999429373343, iteration: 53108
loss: 1.0001535415649414,grad_norm: 0.9057707762444963, iteration: 53109
loss: 1.080875277519226,grad_norm: 0.9999997269783755, iteration: 53110
loss: 0.9799508452415466,grad_norm: 0.9086376950255491, iteration: 53111
loss: 1.0008578300476074,grad_norm: 0.9999996076048782, iteration: 53112
loss: 1.0035369396209717,grad_norm: 0.9339719752700492, iteration: 53113
loss: 1.1972076892852783,grad_norm: 0.9999991338314299, iteration: 53114
loss: 1.0551583766937256,grad_norm: 0.9999996782753569, iteration: 53115
loss: 1.0966432094573975,grad_norm: 0.8903494954327487, iteration: 53116
loss: 1.0156630277633667,grad_norm: 0.9999991830525383, iteration: 53117
loss: 1.0139131546020508,grad_norm: 0.9559378765957673, iteration: 53118
loss: 0.9782682061195374,grad_norm: 0.9999992164457339, iteration: 53119
loss: 1.0728181600570679,grad_norm: 0.9999998460806067, iteration: 53120
loss: 1.0150803327560425,grad_norm: 0.9999997529441221, iteration: 53121
loss: 0.9853891134262085,grad_norm: 0.8449701170073995, iteration: 53122
loss: 1.0217046737670898,grad_norm: 0.9825837573337979, iteration: 53123
loss: 1.0202128887176514,grad_norm: 0.9999990328208913, iteration: 53124
loss: 1.018792748451233,grad_norm: 0.989623641211987, iteration: 53125
loss: 0.991606593132019,grad_norm: 0.9262284998150345, iteration: 53126
loss: 1.024180293083191,grad_norm: 0.9999990797599146, iteration: 53127
loss: 1.0076642036437988,grad_norm: 0.9175691872201679, iteration: 53128
loss: 1.0477817058563232,grad_norm: 0.9999996318021401, iteration: 53129
loss: 1.0327571630477905,grad_norm: 0.8533291084168891, iteration: 53130
loss: 1.1585807800292969,grad_norm: 0.9999989646648875, iteration: 53131
loss: 0.9767070412635803,grad_norm: 0.9727503845802463, iteration: 53132
loss: 1.0439870357513428,grad_norm: 0.9999991044930365, iteration: 53133
loss: 1.0486948490142822,grad_norm: 0.9999992163991297, iteration: 53134
loss: 0.9947568774223328,grad_norm: 0.9196973016720199, iteration: 53135
loss: 1.0353699922561646,grad_norm: 0.9999996996050597, iteration: 53136
loss: 1.000529408454895,grad_norm: 0.9999998411421673, iteration: 53137
loss: 1.008823037147522,grad_norm: 0.779875257939823, iteration: 53138
loss: 1.047348976135254,grad_norm: 0.9999991001576533, iteration: 53139
loss: 0.9563243389129639,grad_norm: 0.8035024673569737, iteration: 53140
loss: 1.019197940826416,grad_norm: 0.9999998642492117, iteration: 53141
loss: 0.9812062382698059,grad_norm: 0.8319529098818286, iteration: 53142
loss: 1.032468557357788,grad_norm: 0.9999991955663182, iteration: 53143
loss: 1.0179179906845093,grad_norm: 0.8778242727100377, iteration: 53144
loss: 1.0253912210464478,grad_norm: 0.9363150551176029, iteration: 53145
loss: 1.0293179750442505,grad_norm: 0.9999997215370408, iteration: 53146
loss: 1.008270025253296,grad_norm: 0.9290918677195086, iteration: 53147
loss: 0.9820255637168884,grad_norm: 0.9999991121703902, iteration: 53148
loss: 0.9792929291725159,grad_norm: 0.8884910452433168, iteration: 53149
loss: 0.9844234585762024,grad_norm: 0.9999990941171814, iteration: 53150
loss: 1.0155432224273682,grad_norm: 0.9999999248598358, iteration: 53151
loss: 1.0868415832519531,grad_norm: 0.999999282209244, iteration: 53152
loss: 1.0454469919204712,grad_norm: 0.9999993423391087, iteration: 53153
loss: 0.9912000894546509,grad_norm: 0.9999991744982847, iteration: 53154
loss: 1.0091501474380493,grad_norm: 0.9999991628876773, iteration: 53155
loss: 1.01357901096344,grad_norm: 0.9312214742491531, iteration: 53156
loss: 0.99942946434021,grad_norm: 0.9999995310372285, iteration: 53157
loss: 1.0121996402740479,grad_norm: 0.9427439296763813, iteration: 53158
loss: 0.9957530498504639,grad_norm: 0.8678020422227903, iteration: 53159
loss: 1.1469552516937256,grad_norm: 0.9999999843989494, iteration: 53160
loss: 1.0424511432647705,grad_norm: 0.9640638701700509, iteration: 53161
loss: 0.9700143337249756,grad_norm: 0.962414835178328, iteration: 53162
loss: 1.0438421964645386,grad_norm: 0.999999270201084, iteration: 53163
loss: 1.082612156867981,grad_norm: 0.9999991998199907, iteration: 53164
loss: 1.0424563884735107,grad_norm: 0.9999993601941439, iteration: 53165
loss: 1.0179210901260376,grad_norm: 0.999999581985995, iteration: 53166
loss: 0.9982369542121887,grad_norm: 0.9999991228545587, iteration: 53167
loss: 1.0238780975341797,grad_norm: 0.9999992702778742, iteration: 53168
loss: 1.063442587852478,grad_norm: 0.9999993167168159, iteration: 53169
loss: 1.0125290155410767,grad_norm: 0.9166221349510263, iteration: 53170
loss: 1.0017201900482178,grad_norm: 0.9999989765390844, iteration: 53171
loss: 1.0247224569320679,grad_norm: 0.9999990441339744, iteration: 53172
loss: 1.0295473337173462,grad_norm: 0.999999626261722, iteration: 53173
loss: 1.0228283405303955,grad_norm: 0.9529894749964036, iteration: 53174
loss: 1.0477313995361328,grad_norm: 0.9999992825881301, iteration: 53175
loss: 1.081973910331726,grad_norm: 0.9307466343806866, iteration: 53176
loss: 1.0390031337738037,grad_norm: 0.9999996276202664, iteration: 53177
loss: 1.036177396774292,grad_norm: 0.9999992558388245, iteration: 53178
loss: 1.036389708518982,grad_norm: 0.9999991024829104, iteration: 53179
loss: 1.0306568145751953,grad_norm: 0.9999991474730638, iteration: 53180
loss: 0.9994952082633972,grad_norm: 0.9999993237207653, iteration: 53181
loss: 1.0371235609054565,grad_norm: 0.9999993037663223, iteration: 53182
loss: 1.0406432151794434,grad_norm: 0.9999990104778361, iteration: 53183
loss: 0.9949446320533752,grad_norm: 0.8268576916212015, iteration: 53184
loss: 1.0188546180725098,grad_norm: 0.9999990898336701, iteration: 53185
loss: 1.0341027975082397,grad_norm: 0.9999996105970631, iteration: 53186
loss: 0.9875810742378235,grad_norm: 0.9999993390213384, iteration: 53187
loss: 1.0419412851333618,grad_norm: 0.9999990400910348, iteration: 53188
loss: 0.9992457032203674,grad_norm: 0.9166976843173347, iteration: 53189
loss: 1.0179290771484375,grad_norm: 0.9999994071522975, iteration: 53190
loss: 0.9972426295280457,grad_norm: 0.950015822963461, iteration: 53191
loss: 1.028812289237976,grad_norm: 0.9999991829567642, iteration: 53192
loss: 1.027868628501892,grad_norm: 0.9999990970922219, iteration: 53193
loss: 0.9993718862533569,grad_norm: 0.8734615826570764, iteration: 53194
loss: 1.017754316329956,grad_norm: 0.9999993032978532, iteration: 53195
loss: 0.9826133847236633,grad_norm: 0.9999991386465907, iteration: 53196
loss: 1.0061957836151123,grad_norm: 0.968638766490863, iteration: 53197
loss: 1.0359750986099243,grad_norm: 0.9999996079936518, iteration: 53198
loss: 1.0022348165512085,grad_norm: 0.9999991395850083, iteration: 53199
loss: 0.9394194483757019,grad_norm: 0.8052663637109899, iteration: 53200
loss: 1.0346970558166504,grad_norm: 0.999999545449345, iteration: 53201
loss: 0.9962568283081055,grad_norm: 0.9999992179581111, iteration: 53202
loss: 0.9661946296691895,grad_norm: 0.9314976880898707, iteration: 53203
loss: 0.9979067444801331,grad_norm: 0.8880858159786325, iteration: 53204
loss: 1.0256856679916382,grad_norm: 0.9999998415821345, iteration: 53205
loss: 0.9763375520706177,grad_norm: 0.8188663542565927, iteration: 53206
loss: 0.9782631993293762,grad_norm: 0.9993041943022603, iteration: 53207
loss: 1.0166445970535278,grad_norm: 0.9999992711772804, iteration: 53208
loss: 0.9846398234367371,grad_norm: 0.9999994371506639, iteration: 53209
loss: 1.0363038778305054,grad_norm: 0.9999993639424039, iteration: 53210
loss: 1.0470662117004395,grad_norm: 0.9999998856162584, iteration: 53211
loss: 0.9901050329208374,grad_norm: 0.9576311659167968, iteration: 53212
loss: 0.9564188718795776,grad_norm: 0.958302652557582, iteration: 53213
loss: 1.001984715461731,grad_norm: 0.9615674723673949, iteration: 53214
loss: 1.0183179378509521,grad_norm: 0.9587044168533515, iteration: 53215
loss: 1.0009781122207642,grad_norm: 0.999999373716821, iteration: 53216
loss: 0.9802560806274414,grad_norm: 0.8521923898982346, iteration: 53217
loss: 0.9846939444541931,grad_norm: 0.8657879493124699, iteration: 53218
loss: 0.9835054874420166,grad_norm: 0.9999990443339415, iteration: 53219
loss: 1.0008463859558105,grad_norm: 0.9999995254645061, iteration: 53220
loss: 0.9912199378013611,grad_norm: 0.999999031121446, iteration: 53221
loss: 0.9967588186264038,grad_norm: 0.9999990783546115, iteration: 53222
loss: 0.9794113039970398,grad_norm: 0.9518337960066957, iteration: 53223
loss: 0.9897574782371521,grad_norm: 0.9787583238382213, iteration: 53224
loss: 0.9794413447380066,grad_norm: 0.8920165421266846, iteration: 53225
loss: 0.9942139983177185,grad_norm: 0.8276847022865501, iteration: 53226
loss: 0.9883854985237122,grad_norm: 0.8268037120406685, iteration: 53227
loss: 1.0261504650115967,grad_norm: 0.9999991486584352, iteration: 53228
loss: 0.9745039343833923,grad_norm: 0.9999991250413277, iteration: 53229
loss: 1.0973734855651855,grad_norm: 0.999999595030814, iteration: 53230
loss: 0.9901829957962036,grad_norm: 0.8319674104388947, iteration: 53231
loss: 1.069358468055725,grad_norm: 0.9999995135909986, iteration: 53232
loss: 0.998939037322998,grad_norm: 0.8736850874805082, iteration: 53233
loss: 0.953500509262085,grad_norm: 0.9765275533881931, iteration: 53234
loss: 1.0438950061798096,grad_norm: 0.8909632637248361, iteration: 53235
loss: 0.9226087331771851,grad_norm: 0.8463298112942403, iteration: 53236
loss: 1.0071884393692017,grad_norm: 0.930686270136317, iteration: 53237
loss: 1.0097156763076782,grad_norm: 0.9999990163370402, iteration: 53238
loss: 0.9988960027694702,grad_norm: 0.9888892571320382, iteration: 53239
loss: 1.1119509935379028,grad_norm: 1.000000011565462, iteration: 53240
loss: 1.0125703811645508,grad_norm: 0.7672600400834715, iteration: 53241
loss: 0.9862992167472839,grad_norm: 0.999999119525004, iteration: 53242
loss: 0.9911282658576965,grad_norm: 0.9999992808644924, iteration: 53243
loss: 0.9816181659698486,grad_norm: 0.841557085838994, iteration: 53244
loss: 0.9982383251190186,grad_norm: 0.9038863422223177, iteration: 53245
loss: 1.0635135173797607,grad_norm: 0.9999998019444326, iteration: 53246
loss: 0.9679829478263855,grad_norm: 0.9999991603654392, iteration: 53247
loss: 0.9926437735557556,grad_norm: 0.96222098597831, iteration: 53248
loss: 1.0042269229888916,grad_norm: 0.9060856232456976, iteration: 53249
loss: 0.9492200613021851,grad_norm: 0.837174398516933, iteration: 53250
loss: 1.099514126777649,grad_norm: 0.9999990922337668, iteration: 53251
loss: 0.9820898175239563,grad_norm: 0.9999991019927549, iteration: 53252
loss: 1.0225558280944824,grad_norm: 0.8482330851076154, iteration: 53253
loss: 0.9949891567230225,grad_norm: 0.9999990653850317, iteration: 53254
loss: 0.9658486843109131,grad_norm: 0.8919167605397, iteration: 53255
loss: 0.9986452460289001,grad_norm: 0.9029036476825056, iteration: 53256
loss: 0.976417064666748,grad_norm: 0.995386575339735, iteration: 53257
loss: 1.0021677017211914,grad_norm: 0.9538113897660934, iteration: 53258
loss: 1.0404253005981445,grad_norm: 0.9055250254833057, iteration: 53259
loss: 0.9582964181900024,grad_norm: 0.999999020497218, iteration: 53260
loss: 0.9493743181228638,grad_norm: 0.8871933031871588, iteration: 53261
loss: 0.9930068850517273,grad_norm: 0.8913445071744246, iteration: 53262
loss: 1.0100258588790894,grad_norm: 0.8574060835958078, iteration: 53263
loss: 1.0238244533538818,grad_norm: 0.9999991181696475, iteration: 53264
loss: 0.9830495715141296,grad_norm: 0.8845201437931476, iteration: 53265
loss: 1.290993571281433,grad_norm: 0.9999997170001684, iteration: 53266
loss: 0.9878355264663696,grad_norm: 0.9226493792279236, iteration: 53267
loss: 0.9701309204101562,grad_norm: 0.7819216726946907, iteration: 53268
loss: 0.9934770464897156,grad_norm: 0.9999993107220744, iteration: 53269
loss: 1.0100793838500977,grad_norm: 0.9999990770582526, iteration: 53270
loss: 1.0070757865905762,grad_norm: 0.9999989431437972, iteration: 53271
loss: 1.0136616230010986,grad_norm: 0.9457570452879802, iteration: 53272
loss: 1.269368290901184,grad_norm: 0.9999995949325723, iteration: 53273
loss: 1.0208438634872437,grad_norm: 0.8796313666762816, iteration: 53274
loss: 0.9894986748695374,grad_norm: 0.9999991356576455, iteration: 53275
loss: 1.0853463411331177,grad_norm: 0.9999993288693483, iteration: 53276
loss: 1.213158369064331,grad_norm: 0.9999992225265315, iteration: 53277
loss: 0.9839901924133301,grad_norm: 0.9744400525440103, iteration: 53278
loss: 0.9879814982414246,grad_norm: 0.9999990803149663, iteration: 53279
loss: 0.9871962070465088,grad_norm: 0.8198287507840224, iteration: 53280
loss: 1.133272409439087,grad_norm: 0.9999992519120892, iteration: 53281
loss: 1.1442762613296509,grad_norm: 0.9999993936627853, iteration: 53282
loss: 1.0838382244110107,grad_norm: 0.9365134014248414, iteration: 53283
loss: 1.0693047046661377,grad_norm: 0.9999991480244096, iteration: 53284
loss: 1.0778837203979492,grad_norm: 0.9999991877738337, iteration: 53285
loss: 1.384660005569458,grad_norm: 0.9999999391572323, iteration: 53286
loss: 1.0334445238113403,grad_norm: 0.8603519845752202, iteration: 53287
loss: 1.3111779689788818,grad_norm: 0.9999993391002744, iteration: 53288
loss: 1.118645191192627,grad_norm: 0.9999993346063352, iteration: 53289
loss: 1.0458256006240845,grad_norm: 0.9999994461185341, iteration: 53290
loss: 0.9752009510993958,grad_norm: 0.9403072421514984, iteration: 53291
loss: 1.1690990924835205,grad_norm: 0.9999992199812172, iteration: 53292
loss: 1.0551247596740723,grad_norm: 0.9999992633955908, iteration: 53293
loss: 1.1877323389053345,grad_norm: 0.9999993154831547, iteration: 53294
loss: 1.0305466651916504,grad_norm: 0.8065756910477101, iteration: 53295
loss: 1.056422472000122,grad_norm: 0.9999995364950824, iteration: 53296
loss: 1.015228271484375,grad_norm: 0.9369307898418879, iteration: 53297
loss: 1.3008534908294678,grad_norm: 0.9999997300212035, iteration: 53298
loss: 1.0116592645645142,grad_norm: 0.9663967765366572, iteration: 53299
loss: 1.0504511594772339,grad_norm: 0.9991102556969829, iteration: 53300
loss: 0.9851864576339722,grad_norm: 0.9999991077705519, iteration: 53301
loss: 1.0454928874969482,grad_norm: 0.9999994975206273, iteration: 53302
loss: 1.0293751955032349,grad_norm: 0.9999991068348252, iteration: 53303
loss: 0.9887087345123291,grad_norm: 0.9999990894525125, iteration: 53304
loss: 1.310610055923462,grad_norm: 0.9999996148336201, iteration: 53305
loss: 1.0557178258895874,grad_norm: 0.9506394630315802, iteration: 53306
loss: 1.0197303295135498,grad_norm: 0.9999998785588586, iteration: 53307
loss: 1.007423996925354,grad_norm: 0.9999992044520516, iteration: 53308
loss: 1.060237169265747,grad_norm: 0.9999994943795864, iteration: 53309
loss: 1.0163854360580444,grad_norm: 0.9999991114355069, iteration: 53310
loss: 1.0818326473236084,grad_norm: 0.9999991421899133, iteration: 53311
loss: 1.0790865421295166,grad_norm: 0.9999998147536482, iteration: 53312
loss: 1.017430067062378,grad_norm: 0.9999996136222772, iteration: 53313
loss: 1.2551189661026,grad_norm: 0.9999996616638267, iteration: 53314
loss: 1.003524899482727,grad_norm: 0.8782049516759494, iteration: 53315
loss: 0.9913979768753052,grad_norm: 0.9371435796136596, iteration: 53316
loss: 1.0009232759475708,grad_norm: 0.9999991876647177, iteration: 53317
loss: 1.1802719831466675,grad_norm: 0.9999994999427282, iteration: 53318
loss: 0.99285489320755,grad_norm: 0.9999991019472924, iteration: 53319
loss: 1.1182355880737305,grad_norm: 0.9999997043907182, iteration: 53320
loss: 1.2269535064697266,grad_norm: 0.9999993383274881, iteration: 53321
loss: 1.021707534790039,grad_norm: 0.9999990867341505, iteration: 53322
loss: 1.2201519012451172,grad_norm: 0.9999998815412366, iteration: 53323
loss: 1.0161323547363281,grad_norm: 0.9999998887423422, iteration: 53324
loss: 0.9991045594215393,grad_norm: 0.9547096510188496, iteration: 53325
loss: 1.2101452350616455,grad_norm: 0.9999995637618764, iteration: 53326
loss: 1.2050223350524902,grad_norm: 0.9999996904830428, iteration: 53327
loss: 0.9840105175971985,grad_norm: 0.9999992345317666, iteration: 53328
loss: 1.0615234375,grad_norm: 0.9999997676462374, iteration: 53329
loss: 1.0331507921218872,grad_norm: 0.9999997941161425, iteration: 53330
loss: 1.0922784805297852,grad_norm: 0.9999997522831039, iteration: 53331
loss: 1.0102626085281372,grad_norm: 0.999999090972799, iteration: 53332
loss: 1.0985134840011597,grad_norm: 0.9999997455268212, iteration: 53333
loss: 1.029160976409912,grad_norm: 0.8378550806910231, iteration: 53334
loss: 1.1543195247650146,grad_norm: 0.9999998687239551, iteration: 53335
loss: 1.0840978622436523,grad_norm: 1.0000000535887101, iteration: 53336
loss: 0.9981924295425415,grad_norm: 0.9999997212698155, iteration: 53337
loss: 1.058337688446045,grad_norm: 0.9999992644751634, iteration: 53338
loss: 1.1324082612991333,grad_norm: 0.9999999080826334, iteration: 53339
loss: 1.0948408842086792,grad_norm: 0.9026477251357552, iteration: 53340
loss: 1.055759310722351,grad_norm: 0.9999991939061104, iteration: 53341
loss: 1.092406153678894,grad_norm: 0.9999997284815572, iteration: 53342
loss: 1.0007374286651611,grad_norm: 0.9999995271097236, iteration: 53343
loss: 1.0307331085205078,grad_norm: 0.9350347254099289, iteration: 53344
loss: 1.0348528623580933,grad_norm: 0.9827067644325802, iteration: 53345
loss: 1.0336482524871826,grad_norm: 0.9483131120753104, iteration: 53346
loss: 1.0239650011062622,grad_norm: 0.999999281599891, iteration: 53347
loss: 1.0674384832382202,grad_norm: 0.9999990739410416, iteration: 53348
loss: 1.1719874143600464,grad_norm: 0.9999997420991467, iteration: 53349
loss: 1.1937127113342285,grad_norm: 0.9999992750205428, iteration: 53350
loss: 0.985297441482544,grad_norm: 0.9999992908102681, iteration: 53351
loss: 1.0283197164535522,grad_norm: 0.9999992853166508, iteration: 53352
loss: 1.188178300857544,grad_norm: 0.9999998473755947, iteration: 53353
loss: 1.0424151420593262,grad_norm: 0.9999991715450065, iteration: 53354
loss: 1.1005703210830688,grad_norm: 0.9999991950371658, iteration: 53355
loss: 1.2812103033065796,grad_norm: 0.9999999248288691, iteration: 53356
loss: 1.171599268913269,grad_norm: 0.9999996025806033, iteration: 53357
loss: 1.0169973373413086,grad_norm: 0.9999992290704423, iteration: 53358
loss: 1.163902759552002,grad_norm: 0.9999994604228962, iteration: 53359
loss: 1.0209121704101562,grad_norm: 0.9186478443922119, iteration: 53360
loss: 1.0740118026733398,grad_norm: 0.9999992275424591, iteration: 53361
loss: 1.0548385381698608,grad_norm: 0.9999994317321819, iteration: 53362
loss: 1.032339334487915,grad_norm: 0.9999990171893961, iteration: 53363
loss: 0.9687114357948303,grad_norm: 0.9999991562685986, iteration: 53364
loss: 1.0324194431304932,grad_norm: 0.7770176235675178, iteration: 53365
loss: 1.0197908878326416,grad_norm: 0.9999994962524624, iteration: 53366
loss: 1.0112090110778809,grad_norm: 0.9999998173455826, iteration: 53367
loss: 1.1134252548217773,grad_norm: 0.9999993297906674, iteration: 53368
loss: 0.9757691025733948,grad_norm: 0.9238773154781829, iteration: 53369
loss: 1.0228655338287354,grad_norm: 0.9999990955175073, iteration: 53370
loss: 1.0757358074188232,grad_norm: 0.999999606378308, iteration: 53371
loss: 1.0940862894058228,grad_norm: 0.9999997025856978, iteration: 53372
loss: 1.005822777748108,grad_norm: 0.9999990818421811, iteration: 53373
loss: 1.010949969291687,grad_norm: 0.9999991816168305, iteration: 53374
loss: 1.0076309442520142,grad_norm: 0.8656775784948094, iteration: 53375
loss: 1.0303212404251099,grad_norm: 0.9389777479411344, iteration: 53376
loss: 0.9952635765075684,grad_norm: 0.9999991314496225, iteration: 53377
loss: 1.044373869895935,grad_norm: 0.9999990332658477, iteration: 53378
loss: 1.0155695676803589,grad_norm: 0.945692901183816, iteration: 53379
loss: 1.051042079925537,grad_norm: 0.9810641400015868, iteration: 53380
loss: 1.0282413959503174,grad_norm: 0.999999221264226, iteration: 53381
loss: 1.0132988691329956,grad_norm: 0.9999993373321804, iteration: 53382
loss: 1.0212851762771606,grad_norm: 0.8215068741125345, iteration: 53383
loss: 1.0018659830093384,grad_norm: 0.8838270007336058, iteration: 53384
loss: 0.9727896451950073,grad_norm: 0.9999997456361154, iteration: 53385
loss: 1.048163890838623,grad_norm: 0.8771748629509115, iteration: 53386
loss: 0.9666843414306641,grad_norm: 0.8695135833711398, iteration: 53387
loss: 1.0306998491287231,grad_norm: 0.9999997496963677, iteration: 53388
loss: 0.9854038953781128,grad_norm: 0.9324149087597984, iteration: 53389
loss: 1.0530610084533691,grad_norm: 0.9764900263070324, iteration: 53390
loss: 1.0435138940811157,grad_norm: 0.9394251263957749, iteration: 53391
loss: 1.203874111175537,grad_norm: 0.9999995351946671, iteration: 53392
loss: 1.038196086883545,grad_norm: 0.9999990884002862, iteration: 53393
loss: 0.9853002429008484,grad_norm: 0.9284932122910726, iteration: 53394
loss: 1.0213922262191772,grad_norm: 0.9040514272375926, iteration: 53395
loss: 1.0343282222747803,grad_norm: 0.9999994291029796, iteration: 53396
loss: 0.9860544204711914,grad_norm: 0.9999991087108981, iteration: 53397
loss: 1.0133620500564575,grad_norm: 0.999999313253316, iteration: 53398
loss: 1.0151350498199463,grad_norm: 0.9999990562805237, iteration: 53399
loss: 0.967592716217041,grad_norm: 0.9999990603592877, iteration: 53400
loss: 1.0113247632980347,grad_norm: 0.6358010108230715, iteration: 53401
loss: 0.9973201751708984,grad_norm: 0.9999994119781086, iteration: 53402
loss: 1.1450592279434204,grad_norm: 0.9999996018757967, iteration: 53403
loss: 1.0272636413574219,grad_norm: 0.9999990806367538, iteration: 53404
loss: 1.0824668407440186,grad_norm: 0.9999997940402303, iteration: 53405
loss: 1.0207544565200806,grad_norm: 0.9999990156698817, iteration: 53406
loss: 1.0310372114181519,grad_norm: 0.9999993862739096, iteration: 53407
loss: 1.0028128623962402,grad_norm: 0.9380548897227314, iteration: 53408
loss: 1.014026165008545,grad_norm: 0.9236164777810818, iteration: 53409
loss: 1.0719969272613525,grad_norm: 0.9999998059517274, iteration: 53410
loss: 0.9943994283676147,grad_norm: 0.9999989741394297, iteration: 53411
loss: 1.067764163017273,grad_norm: 0.999999150546858, iteration: 53412
loss: 1.0363757610321045,grad_norm: 0.9999989939219807, iteration: 53413
loss: 0.997592568397522,grad_norm: 0.9999991408465577, iteration: 53414
loss: 1.0238202810287476,grad_norm: 0.9999992340183437, iteration: 53415
loss: 1.0046385526657104,grad_norm: 0.9452347534688256, iteration: 53416
loss: 1.0117415189743042,grad_norm: 0.999999291077643, iteration: 53417
loss: 0.9875403642654419,grad_norm: 0.9999990036537045, iteration: 53418
loss: 1.1025010347366333,grad_norm: 0.999999800297231, iteration: 53419
loss: 1.0068968534469604,grad_norm: 0.9999990806308414, iteration: 53420
loss: 1.0323988199234009,grad_norm: 0.9999996141506508, iteration: 53421
loss: 0.994218111038208,grad_norm: 0.9999992094402885, iteration: 53422
loss: 1.0213077068328857,grad_norm: 0.8799378156408318, iteration: 53423
loss: 1.0916423797607422,grad_norm: 0.9999994672691402, iteration: 53424
loss: 1.0944405794143677,grad_norm: 0.9999997198959324, iteration: 53425
loss: 0.9568169713020325,grad_norm: 0.9426318511065986, iteration: 53426
loss: 1.0136659145355225,grad_norm: 0.9999991946933289, iteration: 53427
loss: 1.0920202732086182,grad_norm: 0.9999993535180234, iteration: 53428
loss: 1.047156810760498,grad_norm: 0.9670331911326344, iteration: 53429
loss: 1.0330687761306763,grad_norm: 0.8243665719680752, iteration: 53430
loss: 1.0005824565887451,grad_norm: 0.9638986125127038, iteration: 53431
loss: 1.0541828870773315,grad_norm: 0.9999997204116801, iteration: 53432
loss: 1.0020825862884521,grad_norm: 0.999999339223915, iteration: 53433
loss: 1.0811901092529297,grad_norm: 0.999999051339287, iteration: 53434
loss: 1.0121723413467407,grad_norm: 0.9999991890867925, iteration: 53435
loss: 0.9519205093383789,grad_norm: 0.7698546990227093, iteration: 53436
loss: 1.035343885421753,grad_norm: 0.9999996541640533, iteration: 53437
loss: 1.142492651939392,grad_norm: 0.8236902723318411, iteration: 53438
loss: 1.054288625717163,grad_norm: 0.9999993301489039, iteration: 53439
loss: 1.0207960605621338,grad_norm: 0.9999991941533829, iteration: 53440
loss: 1.0214121341705322,grad_norm: 0.9999989347676089, iteration: 53441
loss: 0.9774284362792969,grad_norm: 0.9999990315603745, iteration: 53442
loss: 0.992916464805603,grad_norm: 0.9999994699447323, iteration: 53443
loss: 1.0217556953430176,grad_norm: 0.9099073964763355, iteration: 53444
loss: 1.0518367290496826,grad_norm: 0.8360240460812909, iteration: 53445
loss: 0.994337260723114,grad_norm: 0.8565019024732294, iteration: 53446
loss: 0.9598044753074646,grad_norm: 0.9999991829346321, iteration: 53447
loss: 0.97789466381073,grad_norm: 0.7766310569690237, iteration: 53448
loss: 0.9957826733589172,grad_norm: 0.9999991649432264, iteration: 53449
loss: 1.0034695863723755,grad_norm: 0.9999990617994929, iteration: 53450
loss: 0.9795487523078918,grad_norm: 0.8229555059685082, iteration: 53451
loss: 0.9920379519462585,grad_norm: 0.9240242805880735, iteration: 53452
loss: 0.9969866275787354,grad_norm: 0.946927693520361, iteration: 53453
loss: 0.993675947189331,grad_norm: 0.9999990076448856, iteration: 53454
loss: 1.054714560508728,grad_norm: 0.999999584162209, iteration: 53455
loss: 0.9558243155479431,grad_norm: 0.8308095458400998, iteration: 53456
loss: 1.023038387298584,grad_norm: 0.9999990300514181, iteration: 53457
loss: 1.045011043548584,grad_norm: 0.9667099882966949, iteration: 53458
loss: 1.0247291326522827,grad_norm: 0.9994911717400712, iteration: 53459
loss: 1.060323715209961,grad_norm: 0.8967122248480278, iteration: 53460
loss: 0.9807886481285095,grad_norm: 0.9999991395204034, iteration: 53461
loss: 0.9683720469474792,grad_norm: 0.9999993231426854, iteration: 53462
loss: 0.9932234287261963,grad_norm: 0.9999990373406081, iteration: 53463
loss: 1.0397355556488037,grad_norm: 0.9999991115018125, iteration: 53464
loss: 1.0012164115905762,grad_norm: 0.9962508966463881, iteration: 53465
loss: 0.9831788539886475,grad_norm: 0.9999994522287514, iteration: 53466
loss: 0.9936436414718628,grad_norm: 0.9638897507214305, iteration: 53467
loss: 0.9962208867073059,grad_norm: 0.9999991060075798, iteration: 53468
loss: 1.0124191045761108,grad_norm: 0.977014273014967, iteration: 53469
loss: 1.0250149965286255,grad_norm: 0.9999990619128026, iteration: 53470
loss: 1.0292654037475586,grad_norm: 0.9999991048695052, iteration: 53471
loss: 1.0273219347000122,grad_norm: 0.999999224365379, iteration: 53472
loss: 1.0930670499801636,grad_norm: 0.9999991606778788, iteration: 53473
loss: 1.1156343221664429,grad_norm: 0.9999995965888135, iteration: 53474
loss: 1.0291870832443237,grad_norm: 0.9565442908442131, iteration: 53475
loss: 1.0541701316833496,grad_norm: 0.9999992004608481, iteration: 53476
loss: 1.0332638025283813,grad_norm: 0.9999993223715122, iteration: 53477
loss: 1.0529855489730835,grad_norm: 0.9999992015154935, iteration: 53478
loss: 1.0359041690826416,grad_norm: 0.9455818863400416, iteration: 53479
loss: 1.0193371772766113,grad_norm: 0.8302841104718001, iteration: 53480
loss: 1.0013723373413086,grad_norm: 0.9999994826263985, iteration: 53481
loss: 0.9913138747215271,grad_norm: 0.8869950023748862, iteration: 53482
loss: 1.0202672481536865,grad_norm: 0.9788980703662343, iteration: 53483
loss: 0.9894633889198303,grad_norm: 0.7970131460684591, iteration: 53484
loss: 1.01808762550354,grad_norm: 0.9999994800781091, iteration: 53485
loss: 1.0314629077911377,grad_norm: 0.7951583089062007, iteration: 53486
loss: 1.0481514930725098,grad_norm: 0.9679606639347104, iteration: 53487
loss: 0.994871199131012,grad_norm: 0.9999999509431857, iteration: 53488
loss: 1.0168631076812744,grad_norm: 0.8819583605037316, iteration: 53489
loss: 1.0057857036590576,grad_norm: 0.9999991807383781, iteration: 53490
loss: 1.0283257961273193,grad_norm: 0.9999994605925717, iteration: 53491
loss: 1.0019224882125854,grad_norm: 0.9879731686288523, iteration: 53492
loss: 1.0068025588989258,grad_norm: 0.9999993099238459, iteration: 53493
loss: 1.0789966583251953,grad_norm: 0.9999997936303003, iteration: 53494
loss: 1.0664089918136597,grad_norm: 0.9999997927737166, iteration: 53495
loss: 0.9827616214752197,grad_norm: 0.865960015629109, iteration: 53496
loss: 0.9874179363250732,grad_norm: 0.9999992764800504, iteration: 53497
loss: 1.0098011493682861,grad_norm: 0.9999998426755555, iteration: 53498
loss: 0.9861324429512024,grad_norm: 0.9432067919883235, iteration: 53499
loss: 1.0263875722885132,grad_norm: 0.999999084372586, iteration: 53500
loss: 0.9998531341552734,grad_norm: 0.938325295476706, iteration: 53501
loss: 1.0067262649536133,grad_norm: 0.8679656525342457, iteration: 53502
loss: 0.9931074380874634,grad_norm: 0.7592919157773097, iteration: 53503
loss: 1.0172679424285889,grad_norm: 0.999999218467891, iteration: 53504
loss: 1.0442063808441162,grad_norm: 0.9999993585857282, iteration: 53505
loss: 0.9579266905784607,grad_norm: 0.9999992722006156, iteration: 53506
loss: 0.9909886121749878,grad_norm: 0.9414751194506126, iteration: 53507
loss: 0.9818341732025146,grad_norm: 0.8681522665851876, iteration: 53508
loss: 1.0157227516174316,grad_norm: 0.9999990124160552, iteration: 53509
loss: 1.1012756824493408,grad_norm: 0.9999992956445184, iteration: 53510
loss: 0.9928058385848999,grad_norm: 0.9914110078750591, iteration: 53511
loss: 0.9904594421386719,grad_norm: 0.9777171605226224, iteration: 53512
loss: 1.0034024715423584,grad_norm: 0.9999992253507605, iteration: 53513
loss: 1.0271183252334595,grad_norm: 0.9133227088270395, iteration: 53514
loss: 0.9798587560653687,grad_norm: 0.9999990964944513, iteration: 53515
loss: 1.1108638048171997,grad_norm: 0.9999991330604382, iteration: 53516
loss: 0.9790946841239929,grad_norm: 0.9999990357339757, iteration: 53517
loss: 1.0364032983779907,grad_norm: 0.9999996474753773, iteration: 53518
loss: 1.0273445844650269,grad_norm: 0.9408789026808406, iteration: 53519
loss: 1.03780198097229,grad_norm: 0.9999994283681798, iteration: 53520
loss: 1.0406965017318726,grad_norm: 0.9403092382757011, iteration: 53521
loss: 1.008767008781433,grad_norm: 0.9999991789725282, iteration: 53522
loss: 0.976709246635437,grad_norm: 0.9999991104860743, iteration: 53523
loss: 1.046320915222168,grad_norm: 0.9999990184538972, iteration: 53524
loss: 0.9868557453155518,grad_norm: 0.8740714277843507, iteration: 53525
loss: 1.0037933588027954,grad_norm: 0.9403914883262913, iteration: 53526
loss: 1.0971078872680664,grad_norm: 0.9999991239638754, iteration: 53527
loss: 1.2159947156906128,grad_norm: 0.9999993600539716, iteration: 53528
loss: 0.9711074233055115,grad_norm: 0.7962074737461675, iteration: 53529
loss: 0.9867936968803406,grad_norm: 0.9447384016319147, iteration: 53530
loss: 1.0486104488372803,grad_norm: 0.9181016128388237, iteration: 53531
loss: 0.9691359400749207,grad_norm: 0.9726866833356327, iteration: 53532
loss: 1.0175999402999878,grad_norm: 1.0000000160218552, iteration: 53533
loss: 1.0273616313934326,grad_norm: 0.9091633759759625, iteration: 53534
loss: 1.0179550647735596,grad_norm: 0.8694381162051906, iteration: 53535
loss: 1.1657673120498657,grad_norm: 0.9999993161150889, iteration: 53536
loss: 1.0527888536453247,grad_norm: 0.9999992042198016, iteration: 53537
loss: 1.0280745029449463,grad_norm: 0.9316803508706828, iteration: 53538
loss: 1.0694241523742676,grad_norm: 0.9999991268729648, iteration: 53539
loss: 1.0018972158432007,grad_norm: 0.9479809780058005, iteration: 53540
loss: 1.0373259782791138,grad_norm: 0.9999992831663583, iteration: 53541
loss: 1.0234920978546143,grad_norm: 0.9999991844761045, iteration: 53542
loss: 1.014585018157959,grad_norm: 0.9999990926844683, iteration: 53543
loss: 1.0097856521606445,grad_norm: 0.9023418848707653, iteration: 53544
loss: 0.9627150893211365,grad_norm: 0.9999991549605686, iteration: 53545
loss: 1.029167890548706,grad_norm: 0.8145775122621882, iteration: 53546
loss: 1.038779377937317,grad_norm: 0.9999997111848382, iteration: 53547
loss: 0.9935873746871948,grad_norm: 0.8540189653966123, iteration: 53548
loss: 0.9711698889732361,grad_norm: 0.8791480539362393, iteration: 53549
loss: 1.0557825565338135,grad_norm: 0.9559825403118449, iteration: 53550
loss: 1.0082652568817139,grad_norm: 0.9999991067501751, iteration: 53551
loss: 0.9895732402801514,grad_norm: 0.9871279933305799, iteration: 53552
loss: 0.9853460192680359,grad_norm: 0.7862048331287832, iteration: 53553
loss: 1.0536739826202393,grad_norm: 0.9999991489133987, iteration: 53554
loss: 0.9942219853401184,grad_norm: 0.9999994304553281, iteration: 53555
loss: 1.0088154077529907,grad_norm: 0.8454738014151132, iteration: 53556
loss: 1.040809988975525,grad_norm: 0.9999991266622976, iteration: 53557
loss: 1.0654762983322144,grad_norm: 0.9999993997952409, iteration: 53558
loss: 1.0109671354293823,grad_norm: 0.9952148352052458, iteration: 53559
loss: 1.0461198091506958,grad_norm: 0.9999993223521134, iteration: 53560
loss: 0.983546793460846,grad_norm: 0.9999990628652161, iteration: 53561
loss: 0.9793457388877869,grad_norm: 0.9999990409406576, iteration: 53562
loss: 1.0240118503570557,grad_norm: 0.8954096392628715, iteration: 53563
loss: 1.1246534585952759,grad_norm: 0.9999994227320389, iteration: 53564
loss: 1.0081782341003418,grad_norm: 0.8753541453514294, iteration: 53565
loss: 1.0120221376419067,grad_norm: 0.9417169556131433, iteration: 53566
loss: 0.9605847597122192,grad_norm: 0.9999990357064152, iteration: 53567
loss: 0.9645878076553345,grad_norm: 0.871721530633816, iteration: 53568
loss: 1.0272668600082397,grad_norm: 0.999999326012707, iteration: 53569
loss: 1.025817632675171,grad_norm: 0.9999991669904961, iteration: 53570
loss: 1.0363541841506958,grad_norm: 0.9999990965112493, iteration: 53571
loss: 1.043035626411438,grad_norm: 0.9999995878011364, iteration: 53572
loss: 1.0173912048339844,grad_norm: 0.9999990732607286, iteration: 53573
loss: 1.0669456720352173,grad_norm: 0.9999998816661528, iteration: 53574
loss: 1.0466511249542236,grad_norm: 0.9999989961995405, iteration: 53575
loss: 1.0147567987442017,grad_norm: 0.8514522895411503, iteration: 53576
loss: 0.9975274205207825,grad_norm: 0.9155391911184484, iteration: 53577
loss: 0.9883891940116882,grad_norm: 0.9999991084234205, iteration: 53578
loss: 1.014072299003601,grad_norm: 0.9999995627659629, iteration: 53579
loss: 1.0595628023147583,grad_norm: 0.9594865876604647, iteration: 53580
loss: 1.0890491008758545,grad_norm: 0.9704073367476551, iteration: 53581
loss: 1.0732508897781372,grad_norm: 0.9999991629764979, iteration: 53582
loss: 1.0584927797317505,grad_norm: 0.9999996159126519, iteration: 53583
loss: 0.9883086085319519,grad_norm: 0.8112740216868576, iteration: 53584
loss: 0.9804918169975281,grad_norm: 0.9999989284345949, iteration: 53585
loss: 1.1017372608184814,grad_norm: 0.9999990839334373, iteration: 53586
loss: 1.0382710695266724,grad_norm: 0.9999989680367923, iteration: 53587
loss: 1.0287818908691406,grad_norm: 0.999999087597744, iteration: 53588
loss: 1.043666124343872,grad_norm: 0.9373772311182892, iteration: 53589
loss: 1.031711220741272,grad_norm: 0.9208360758225085, iteration: 53590
loss: 1.0183122158050537,grad_norm: 0.7665934778879849, iteration: 53591
loss: 1.0114943981170654,grad_norm: 0.9999990494507098, iteration: 53592
loss: 0.9724594354629517,grad_norm: 0.8080537803559407, iteration: 53593
loss: 0.9622296690940857,grad_norm: 0.9999991030881784, iteration: 53594
loss: 0.986968457698822,grad_norm: 0.999999209963795, iteration: 53595
loss: 1.0137404203414917,grad_norm: 0.9999991790079846, iteration: 53596
loss: 1.0559260845184326,grad_norm: 0.9999995241734758, iteration: 53597
loss: 0.9635166525840759,grad_norm: 0.9584760904190359, iteration: 53598
loss: 0.9808312058448792,grad_norm: 0.9108094075703111, iteration: 53599
loss: 1.003233790397644,grad_norm: 0.8333156761690156, iteration: 53600
loss: 1.0734336376190186,grad_norm: 0.926190408548277, iteration: 53601
loss: 0.9763678312301636,grad_norm: 0.9999990233288862, iteration: 53602
loss: 1.0224989652633667,grad_norm: 0.999999180572023, iteration: 53603
loss: 1.0356045961380005,grad_norm: 0.8605525448706398, iteration: 53604
loss: 1.0438741445541382,grad_norm: 0.9999998283395218, iteration: 53605
loss: 1.016187071800232,grad_norm: 0.969803888752577, iteration: 53606
loss: 1.0155504941940308,grad_norm: 0.9999993743163484, iteration: 53607
loss: 1.0096986293792725,grad_norm: 0.9999992134523973, iteration: 53608
loss: 0.9832812547683716,grad_norm: 0.9999991612068715, iteration: 53609
loss: 0.9983569383621216,grad_norm: 0.9367339303167657, iteration: 53610
loss: 1.0236138105392456,grad_norm: 0.9473114763420963, iteration: 53611
loss: 1.059679627418518,grad_norm: 0.9999991372702546, iteration: 53612
loss: 0.9981737732887268,grad_norm: 0.9999991881484908, iteration: 53613
loss: 1.060897707939148,grad_norm: 0.9999996236087851, iteration: 53614
loss: 1.1492788791656494,grad_norm: 0.999999594998398, iteration: 53615
loss: 1.1300076246261597,grad_norm: 0.9999998306259253, iteration: 53616
loss: 1.132838487625122,grad_norm: 0.9999998544084077, iteration: 53617
loss: 1.0893480777740479,grad_norm: 0.9999996579876878, iteration: 53618
loss: 1.1292603015899658,grad_norm: 0.9999999297192352, iteration: 53619
loss: 1.0922874212265015,grad_norm: 0.999999230191835, iteration: 53620
loss: 1.0224969387054443,grad_norm: 0.9999998469259653, iteration: 53621
loss: 1.01108717918396,grad_norm: 0.9999997199298846, iteration: 53622
loss: 1.0196268558502197,grad_norm: 0.8935841640369153, iteration: 53623
loss: 0.9686111807823181,grad_norm: 0.8628099555821643, iteration: 53624
loss: 0.9802426099777222,grad_norm: 0.9479709481903338, iteration: 53625
loss: 1.1205350160598755,grad_norm: 0.9999997607829738, iteration: 53626
loss: 1.0181843042373657,grad_norm: 0.9999991080312803, iteration: 53627
loss: 0.9961863160133362,grad_norm: 0.9999991763091133, iteration: 53628
loss: 1.0132498741149902,grad_norm: 0.979392412868583, iteration: 53629
loss: 1.00363290309906,grad_norm: 0.9505479242058408, iteration: 53630
loss: 1.0052396059036255,grad_norm: 0.7750267720778004, iteration: 53631
loss: 1.0284305810928345,grad_norm: 0.8004102843343076, iteration: 53632
loss: 0.9681474566459656,grad_norm: 0.9320302159246863, iteration: 53633
loss: 1.059005856513977,grad_norm: 0.999999286935223, iteration: 53634
loss: 1.030037522315979,grad_norm: 0.9999992791955403, iteration: 53635
loss: 1.0643025636672974,grad_norm: 0.999999630182889, iteration: 53636
loss: 0.9897584915161133,grad_norm: 0.8866949039642115, iteration: 53637
loss: 1.014845609664917,grad_norm: 0.7862791267805473, iteration: 53638
loss: 0.9750478863716125,grad_norm: 0.9985022820994877, iteration: 53639
loss: 1.1179929971694946,grad_norm: 0.9999992387105174, iteration: 53640
loss: 1.1332803964614868,grad_norm: 0.9999996904973051, iteration: 53641
loss: 1.0136079788208008,grad_norm: 0.9999990458298809, iteration: 53642
loss: 1.0441689491271973,grad_norm: 0.9791089142874962, iteration: 53643
loss: 1.0385743379592896,grad_norm: 0.9627267828236831, iteration: 53644
loss: 1.0187726020812988,grad_norm: 0.9614145750436236, iteration: 53645
loss: 1.0133100748062134,grad_norm: 0.9999990858315927, iteration: 53646
loss: 1.015965461730957,grad_norm: 0.8617716853824586, iteration: 53647
loss: 1.0349950790405273,grad_norm: 0.9118269928649347, iteration: 53648
loss: 0.9960912466049194,grad_norm: 0.9999990927407258, iteration: 53649
loss: 1.070851445198059,grad_norm: 0.9682597892592923, iteration: 53650
loss: 1.005365014076233,grad_norm: 0.8504969880029485, iteration: 53651
loss: 1.0835139751434326,grad_norm: 0.9999989923123433, iteration: 53652
loss: 1.129334568977356,grad_norm: 0.9122835898875657, iteration: 53653
loss: 1.0488394498825073,grad_norm: 0.9999991531968145, iteration: 53654
loss: 1.040795087814331,grad_norm: 0.9999992111474357, iteration: 53655
loss: 1.0336658954620361,grad_norm: 0.9999995575442335, iteration: 53656
loss: 0.9751578569412231,grad_norm: 0.7944192400748327, iteration: 53657
loss: 1.0423566102981567,grad_norm: 0.9999993124897086, iteration: 53658
loss: 1.0139570236206055,grad_norm: 0.8708077488028406, iteration: 53659
loss: 1.0718673467636108,grad_norm: 0.9999991270554408, iteration: 53660
loss: 1.0027443170547485,grad_norm: 0.8236560339829466, iteration: 53661
loss: 1.026968240737915,grad_norm: 0.7644472774763239, iteration: 53662
loss: 1.0012707710266113,grad_norm: 0.9999994216545364, iteration: 53663
loss: 1.0028514862060547,grad_norm: 0.9999991165642452, iteration: 53664
loss: 0.9926098585128784,grad_norm: 0.9111720764412685, iteration: 53665
loss: 1.0568875074386597,grad_norm: 0.9999993714259877, iteration: 53666
loss: 1.0127716064453125,grad_norm: 0.9345572719332721, iteration: 53667
loss: 0.986598551273346,grad_norm: 0.9999992731311441, iteration: 53668
loss: 1.0472651720046997,grad_norm: 0.9568879571040516, iteration: 53669
loss: 1.0114173889160156,grad_norm: 0.9211942595296198, iteration: 53670
loss: 0.9851124286651611,grad_norm: 0.916353986822689, iteration: 53671
loss: 0.9856384992599487,grad_norm: 0.9398042326375534, iteration: 53672
loss: 1.0209596157073975,grad_norm: 0.8206014021795296, iteration: 53673
loss: 1.012712001800537,grad_norm: 0.9999998705345662, iteration: 53674
loss: 1.020493745803833,grad_norm: 0.9301359723925826, iteration: 53675
loss: 1.000760793685913,grad_norm: 0.9999991375762091, iteration: 53676
loss: 1.016895055770874,grad_norm: 0.9999996262843646, iteration: 53677
loss: 1.0025899410247803,grad_norm: 0.9467190478907614, iteration: 53678
loss: 0.9868214726448059,grad_norm: 0.9999990559467641, iteration: 53679
loss: 1.0037893056869507,grad_norm: 0.9035379870098599, iteration: 53680
loss: 1.0301311016082764,grad_norm: 0.9999998926122753, iteration: 53681
loss: 1.0156141519546509,grad_norm: 0.9999993207001926, iteration: 53682
loss: 1.0012723207473755,grad_norm: 0.9999996340099566, iteration: 53683
loss: 1.0238555669784546,grad_norm: 0.8669285554723519, iteration: 53684
loss: 0.9909713864326477,grad_norm: 0.9999989508785996, iteration: 53685
loss: 0.9583652019500732,grad_norm: 0.983701558956982, iteration: 53686
loss: 1.0379105806350708,grad_norm: 0.8583878268653374, iteration: 53687
loss: 0.9412843585014343,grad_norm: 0.9452524172281802, iteration: 53688
loss: 1.0168733596801758,grad_norm: 0.9999994444225704, iteration: 53689
loss: 1.006604790687561,grad_norm: 0.8284076177223862, iteration: 53690
loss: 1.0573655366897583,grad_norm: 0.9999991969421299, iteration: 53691
loss: 1.007683277130127,grad_norm: 0.953322317162476, iteration: 53692
loss: 1.027764916419983,grad_norm: 0.9999995259200902, iteration: 53693
loss: 1.0312696695327759,grad_norm: 0.9999992507169457, iteration: 53694
loss: 1.1097345352172852,grad_norm: 0.9837662171005235, iteration: 53695
loss: 1.0003844499588013,grad_norm: 0.9691036419535998, iteration: 53696
loss: 1.0458122491836548,grad_norm: 0.9999993441915125, iteration: 53697
loss: 1.050115942955017,grad_norm: 0.9999995084320796, iteration: 53698
loss: 1.0449354648590088,grad_norm: 0.9999997649430848, iteration: 53699
loss: 1.0704110860824585,grad_norm: 0.9999997349990306, iteration: 53700
loss: 1.0123043060302734,grad_norm: 0.8836151878511834, iteration: 53701
loss: 1.1534751653671265,grad_norm: 0.9999999429439755, iteration: 53702
loss: 1.0852937698364258,grad_norm: 0.9999993665188164, iteration: 53703
loss: 1.100199818611145,grad_norm: 0.9999990201968647, iteration: 53704
loss: 1.1853272914886475,grad_norm: 0.9999992075801004, iteration: 53705
loss: 1.0441184043884277,grad_norm: 0.9704579953569502, iteration: 53706
loss: 1.0681591033935547,grad_norm: 0.9999998238359715, iteration: 53707
loss: 1.0017290115356445,grad_norm: 0.9999993443552448, iteration: 53708
loss: 1.0655303001403809,grad_norm: 0.999999238496183, iteration: 53709
loss: 1.0393476486206055,grad_norm: 0.9579453566617925, iteration: 53710
loss: 1.0084348917007446,grad_norm: 0.9999992531195754, iteration: 53711
loss: 1.0466437339782715,grad_norm: 0.9999992749080571, iteration: 53712
loss: 1.0100141763687134,grad_norm: 0.9999991231013639, iteration: 53713
loss: 1.036296010017395,grad_norm: 0.832468024172923, iteration: 53714
loss: 1.0045486688613892,grad_norm: 0.985599871767138, iteration: 53715
loss: 0.9780744314193726,grad_norm: 0.9999992121567215, iteration: 53716
loss: 0.9951180815696716,grad_norm: 0.8816418084065076, iteration: 53717
loss: 1.0307458639144897,grad_norm: 0.9999992054587913, iteration: 53718
loss: 0.9492804408073425,grad_norm: 0.9383359484848521, iteration: 53719
loss: 0.9852502942085266,grad_norm: 0.9999992828298798, iteration: 53720
loss: 1.0754497051239014,grad_norm: 0.9999991035063573, iteration: 53721
loss: 1.028596043586731,grad_norm: 0.9999990907609979, iteration: 53722
loss: 1.0367101430892944,grad_norm: 0.8872328782286012, iteration: 53723
loss: 1.009026050567627,grad_norm: 0.9999996082330305, iteration: 53724
loss: 0.9977090954780579,grad_norm: 0.8467478544745024, iteration: 53725
loss: 1.04555082321167,grad_norm: 0.9999993587227727, iteration: 53726
loss: 1.0516347885131836,grad_norm: 0.9999989685538458, iteration: 53727
loss: 0.9974250197410583,grad_norm: 0.8739439277347781, iteration: 53728
loss: 0.9764516353607178,grad_norm: 0.8206087133988655, iteration: 53729
loss: 0.9898815751075745,grad_norm: 0.7680819733306932, iteration: 53730
loss: 0.9626328945159912,grad_norm: 0.8376256396194169, iteration: 53731
loss: 0.9584798812866211,grad_norm: 0.8679769396133022, iteration: 53732
loss: 1.014304518699646,grad_norm: 0.9999999405788353, iteration: 53733
loss: 0.9915291666984558,grad_norm: 0.9999990796585879, iteration: 53734
loss: 0.9745379686355591,grad_norm: 0.9728355620595474, iteration: 53735
loss: 1.0069968700408936,grad_norm: 0.9669121108283154, iteration: 53736
loss: 0.9816476106643677,grad_norm: 0.9999990600277899, iteration: 53737
loss: 1.0018869638442993,grad_norm: 0.99999923315754, iteration: 53738
loss: 1.0083699226379395,grad_norm: 0.9999993991489378, iteration: 53739
loss: 1.1111252307891846,grad_norm: 0.999999655389472, iteration: 53740
loss: 1.0421192646026611,grad_norm: 0.999999897333925, iteration: 53741
loss: 1.0131248235702515,grad_norm: 0.9999991633664819, iteration: 53742
loss: 1.03231942653656,grad_norm: 0.8813569534876421, iteration: 53743
loss: 1.0259469747543335,grad_norm: 0.9999990507388647, iteration: 53744
loss: 1.0216012001037598,grad_norm: 0.9007902867102727, iteration: 53745
loss: 1.0321698188781738,grad_norm: 0.9999995390503396, iteration: 53746
loss: 1.0614086389541626,grad_norm: 0.9272259519180965, iteration: 53747
loss: 0.9926875829696655,grad_norm: 0.9999992488093729, iteration: 53748
loss: 1.0210663080215454,grad_norm: 0.8862180972083183, iteration: 53749
loss: 0.9857136011123657,grad_norm: 0.9999992202927351, iteration: 53750
loss: 1.0387409925460815,grad_norm: 0.8886743858506545, iteration: 53751
loss: 1.0279592275619507,grad_norm: 0.9999997095177449, iteration: 53752
loss: 0.9802815318107605,grad_norm: 0.9934427235125851, iteration: 53753
loss: 0.9662482142448425,grad_norm: 0.9999993431521573, iteration: 53754
loss: 0.9858611822128296,grad_norm: 0.8940404213500912, iteration: 53755
loss: 1.0019639730453491,grad_norm: 0.9999990016267718, iteration: 53756
loss: 0.9989457726478577,grad_norm: 0.9089387133037461, iteration: 53757
loss: 0.9560635089874268,grad_norm: 0.9999990879772713, iteration: 53758
loss: 0.9950879216194153,grad_norm: 0.9999992927955271, iteration: 53759
loss: 1.021368384361267,grad_norm: 0.9999993539222526, iteration: 53760
loss: 1.0492265224456787,grad_norm: 0.9999991733724358, iteration: 53761
loss: 1.0175832509994507,grad_norm: 0.9999992181245421, iteration: 53762
loss: 1.0067273378372192,grad_norm: 0.9371529241671979, iteration: 53763
loss: 1.0361651182174683,grad_norm: 0.9702736484104818, iteration: 53764
loss: 1.0277377367019653,grad_norm: 0.9999993712030504, iteration: 53765
loss: 1.008367657661438,grad_norm: 0.9999991971571656, iteration: 53766
loss: 0.9910190105438232,grad_norm: 0.8554568631019507, iteration: 53767
loss: 1.0110036134719849,grad_norm: 0.9999994147333948, iteration: 53768
loss: 1.0130934715270996,grad_norm: 0.9650625034248628, iteration: 53769
loss: 1.0400733947753906,grad_norm: 0.9999994543562114, iteration: 53770
loss: 1.0147236585617065,grad_norm: 0.9999992126217916, iteration: 53771
loss: 1.0066522359848022,grad_norm: 0.9783509399881201, iteration: 53772
loss: 1.022670865058899,grad_norm: 0.9999997177701817, iteration: 53773
loss: 1.0453776121139526,grad_norm: 0.7892612247987554, iteration: 53774
loss: 0.9988993406295776,grad_norm: 0.8243298842251083, iteration: 53775
loss: 0.9804248213768005,grad_norm: 0.9230383776480754, iteration: 53776
loss: 1.0733282566070557,grad_norm: 0.9999988957282366, iteration: 53777
loss: 0.9902543425559998,grad_norm: 0.8741676987808699, iteration: 53778
loss: 1.0277996063232422,grad_norm: 0.9576078452532029, iteration: 53779
loss: 1.0052943229675293,grad_norm: 0.8600491150535234, iteration: 53780
loss: 1.004232406616211,grad_norm: 0.9999990067757349, iteration: 53781
loss: 1.016045331954956,grad_norm: 0.98237118490209, iteration: 53782
loss: 0.9974179267883301,grad_norm: 0.9855237464991405, iteration: 53783
loss: 0.989395797252655,grad_norm: 0.838751522643325, iteration: 53784
loss: 1.0319161415100098,grad_norm: 0.957452544279671, iteration: 53785
loss: 1.0077937841415405,grad_norm: 0.9804989873798199, iteration: 53786
loss: 0.9578350186347961,grad_norm: 0.909248268193673, iteration: 53787
loss: 1.100068211555481,grad_norm: 0.9999992169336176, iteration: 53788
loss: 0.9648807048797607,grad_norm: 0.9999994316807211, iteration: 53789
loss: 0.9582601189613342,grad_norm: 0.9999990304698971, iteration: 53790
loss: 0.9600594639778137,grad_norm: 0.9999991230402611, iteration: 53791
loss: 1.0106481313705444,grad_norm: 0.7499056887594897, iteration: 53792
loss: 0.9746317863464355,grad_norm: 0.8068894945644313, iteration: 53793
loss: 0.9986823797225952,grad_norm: 0.9885221000004667, iteration: 53794
loss: 0.9743223786354065,grad_norm: 0.8501178169674712, iteration: 53795
loss: 1.0111284255981445,grad_norm: 0.9459046932945626, iteration: 53796
loss: 1.0003942251205444,grad_norm: 0.9999990640825552, iteration: 53797
loss: 1.0396291017532349,grad_norm: 0.9999990578787356, iteration: 53798
loss: 0.9693776369094849,grad_norm: 0.8745902933625412, iteration: 53799
loss: 1.0204473733901978,grad_norm: 0.9999991377658561, iteration: 53800
loss: 1.116995096206665,grad_norm: 0.9999995438745696, iteration: 53801
loss: 0.9783124923706055,grad_norm: 0.9896160309127355, iteration: 53802
loss: 0.9598874449729919,grad_norm: 0.8318229870437974, iteration: 53803
loss: 0.9733735918998718,grad_norm: 0.9553029043323439, iteration: 53804
loss: 0.9914370775222778,grad_norm: 0.7528635271087375, iteration: 53805
loss: 1.003761887550354,grad_norm: 0.9640929476627864, iteration: 53806
loss: 1.0564919710159302,grad_norm: 0.9999992633925723, iteration: 53807
loss: 1.0297291278839111,grad_norm: 0.9564839274936842, iteration: 53808
loss: 1.1057409048080444,grad_norm: 0.9999991802955011, iteration: 53809
loss: 0.973724365234375,grad_norm: 0.8985339284148962, iteration: 53810
loss: 0.9748011231422424,grad_norm: 0.999999212316197, iteration: 53811
loss: 1.0173016786575317,grad_norm: 0.9990558470939609, iteration: 53812
loss: 1.015649676322937,grad_norm: 0.8926696705451678, iteration: 53813
loss: 0.9851595163345337,grad_norm: 0.9999992328820265, iteration: 53814
loss: 1.0060287714004517,grad_norm: 0.9999990128017179, iteration: 53815
loss: 1.0458340644836426,grad_norm: 0.9999992142065401, iteration: 53816
loss: 0.9889082312583923,grad_norm: 0.999999165463142, iteration: 53817
loss: 0.978114128112793,grad_norm: 0.9999992127280265, iteration: 53818
loss: 0.9952961802482605,grad_norm: 0.9505172845467399, iteration: 53819
loss: 1.0664154291152954,grad_norm: 0.9999998616369555, iteration: 53820
loss: 0.960965633392334,grad_norm: 0.9778377917353958, iteration: 53821
loss: 1.1060996055603027,grad_norm: 0.9999995693469577, iteration: 53822
loss: 1.005282998085022,grad_norm: 0.9291876693506733, iteration: 53823
loss: 1.0015126466751099,grad_norm: 0.9506858306075395, iteration: 53824
loss: 0.9373076558113098,grad_norm: 0.9366356596990221, iteration: 53825
loss: 0.9657456278800964,grad_norm: 0.8917282574602003, iteration: 53826
loss: 0.9850509762763977,grad_norm: 0.7518604360638099, iteration: 53827
loss: 1.0162261724472046,grad_norm: 0.8583927074119773, iteration: 53828
loss: 0.9773135185241699,grad_norm: 0.9999990748930478, iteration: 53829
loss: 1.041907548904419,grad_norm: 0.9999989843975935, iteration: 53830
loss: 1.0510526895523071,grad_norm: 0.9407964153201398, iteration: 53831
loss: 1.0691126585006714,grad_norm: 0.9999991395654183, iteration: 53832
loss: 1.0144150257110596,grad_norm: 0.9327025617340972, iteration: 53833
loss: 0.9885748624801636,grad_norm: 0.8530006981122662, iteration: 53834
loss: 1.016780972480774,grad_norm: 0.7697203374396429, iteration: 53835
loss: 0.989851713180542,grad_norm: 0.7627237595650423, iteration: 53836
loss: 0.9790694713592529,grad_norm: 0.9286861377532029, iteration: 53837
loss: 1.0129796266555786,grad_norm: 0.9999996975911575, iteration: 53838
loss: 1.0464569330215454,grad_norm: 0.9776336944331023, iteration: 53839
loss: 1.01297926902771,grad_norm: 0.9999991389459346, iteration: 53840
loss: 1.0369932651519775,grad_norm: 0.9999991305734207, iteration: 53841
loss: 1.038198471069336,grad_norm: 0.9999991974990737, iteration: 53842
loss: 1.0456031560897827,grad_norm: 0.9999997797717783, iteration: 53843
loss: 0.9923585653305054,grad_norm: 0.9999992260997864, iteration: 53844
loss: 0.9800940155982971,grad_norm: 0.9913872505800119, iteration: 53845
loss: 0.9810876846313477,grad_norm: 0.894657384872878, iteration: 53846
loss: 1.0045310258865356,grad_norm: 0.999999383494799, iteration: 53847
loss: 1.0090528726577759,grad_norm: 0.9999990621060147, iteration: 53848
loss: 1.0323914289474487,grad_norm: 0.999999222355693, iteration: 53849
loss: 1.0578049421310425,grad_norm: 0.999999295020558, iteration: 53850
loss: 1.0146127939224243,grad_norm: 0.7902293112175814, iteration: 53851
loss: 1.0199215412139893,grad_norm: 0.9999996230593581, iteration: 53852
loss: 1.0093154907226562,grad_norm: 0.8740018851482094, iteration: 53853
loss: 0.9846033453941345,grad_norm: 0.9063098403249135, iteration: 53854
loss: 1.0377687215805054,grad_norm: 0.9999991802494316, iteration: 53855
loss: 1.0150146484375,grad_norm: 0.9678678046026172, iteration: 53856
loss: 0.9953103065490723,grad_norm: 0.9999990546661578, iteration: 53857
loss: 1.0218887329101562,grad_norm: 0.9999991467881528, iteration: 53858
loss: 1.040289044380188,grad_norm: 0.8659408010166756, iteration: 53859
loss: 0.9974477887153625,grad_norm: 0.8423791953332094, iteration: 53860
loss: 1.0173441171646118,grad_norm: 0.9999991231193374, iteration: 53861
loss: 1.124079942703247,grad_norm: 0.9999995199292955, iteration: 53862
loss: 0.9867396354675293,grad_norm: 0.9999992614545496, iteration: 53863
loss: 0.9692847728729248,grad_norm: 0.8502091161296044, iteration: 53864
loss: 1.2851502895355225,grad_norm: 0.9999999152096806, iteration: 53865
loss: 1.01935875415802,grad_norm: 0.9210395316717495, iteration: 53866
loss: 0.9896935820579529,grad_norm: 0.8362390541006418, iteration: 53867
loss: 1.0098035335540771,grad_norm: 0.9999999490357255, iteration: 53868
loss: 1.0996705293655396,grad_norm: 0.9999998060145774, iteration: 53869
loss: 1.0469142198562622,grad_norm: 0.9999996843614343, iteration: 53870
loss: 1.050997018814087,grad_norm: 0.9999993707153952, iteration: 53871
loss: 0.9962431788444519,grad_norm: 0.9999991901275193, iteration: 53872
loss: 1.0722267627716064,grad_norm: 0.9999994203943775, iteration: 53873
loss: 1.0285985469818115,grad_norm: 0.9241017822478811, iteration: 53874
loss: 1.0064218044281006,grad_norm: 0.9332239925980133, iteration: 53875
loss: 1.0199859142303467,grad_norm: 0.9184710647163031, iteration: 53876
loss: 1.2107443809509277,grad_norm: 0.9999994283380004, iteration: 53877
loss: 1.0962296724319458,grad_norm: 0.9999996836204172, iteration: 53878
loss: 1.0499699115753174,grad_norm: 0.9999992131804993, iteration: 53879
loss: 1.0291869640350342,grad_norm: 0.8112833866967251, iteration: 53880
loss: 1.0859689712524414,grad_norm: 0.9999999203804322, iteration: 53881
loss: 1.0006401538848877,grad_norm: 0.8785843761227093, iteration: 53882
loss: 1.0150558948516846,grad_norm: 0.859930623952179, iteration: 53883
loss: 1.0198835134506226,grad_norm: 0.9999996098161313, iteration: 53884
loss: 1.0517559051513672,grad_norm: 0.9999989697807826, iteration: 53885
loss: 1.1758241653442383,grad_norm: 0.9999996666980374, iteration: 53886
loss: 1.0324662923812866,grad_norm: 0.9999997983179906, iteration: 53887
loss: 1.0699881315231323,grad_norm: 0.999999094250346, iteration: 53888
loss: 0.9784358739852905,grad_norm: 0.9999997405489949, iteration: 53889
loss: 1.0584415197372437,grad_norm: 0.9999992811120824, iteration: 53890
loss: 0.9894385933876038,grad_norm: 0.9999996356462311, iteration: 53891
loss: 1.0464144945144653,grad_norm: 0.9999998174187856, iteration: 53892
loss: 1.0369468927383423,grad_norm: 0.9999992354715413, iteration: 53893
loss: 0.9882650971412659,grad_norm: 0.9104048623551223, iteration: 53894
loss: 0.9391389489173889,grad_norm: 0.9254063261812231, iteration: 53895
loss: 1.0196478366851807,grad_norm: 0.9837927999998284, iteration: 53896
loss: 1.001177430152893,grad_norm: 0.9999999974442009, iteration: 53897
loss: 1.0362821817398071,grad_norm: 0.8252943068398315, iteration: 53898
loss: 0.996321439743042,grad_norm: 0.9793091827828011, iteration: 53899
loss: 1.0388182401657104,grad_norm: 0.999999810352591, iteration: 53900
loss: 1.0161010026931763,grad_norm: 0.7823700411951605, iteration: 53901
loss: 0.990464985370636,grad_norm: 0.9999993469603935, iteration: 53902
loss: 1.0396648645401,grad_norm: 0.9999992514367728, iteration: 53903
loss: 0.9884169697761536,grad_norm: 0.9928202185545506, iteration: 53904
loss: 1.0120493173599243,grad_norm: 0.7468931183827381, iteration: 53905
loss: 1.0245915651321411,grad_norm: 0.9999991436539957, iteration: 53906
loss: 1.0015863180160522,grad_norm: 0.982877607317098, iteration: 53907
loss: 0.9789924025535583,grad_norm: 0.8855886988099752, iteration: 53908
loss: 1.0399389266967773,grad_norm: 0.860990820301021, iteration: 53909
loss: 1.0337270498275757,grad_norm: 0.9999992205681217, iteration: 53910
loss: 1.0047121047973633,grad_norm: 0.7582181149084355, iteration: 53911
loss: 1.118712306022644,grad_norm: 0.9999996206631777, iteration: 53912
loss: 1.00485098361969,grad_norm: 0.9999991962732667, iteration: 53913
loss: 1.038761854171753,grad_norm: 0.8448997708001448, iteration: 53914
loss: 1.1620919704437256,grad_norm: 0.999999308112571, iteration: 53915
loss: 1.0513358116149902,grad_norm: 0.9999993219489334, iteration: 53916
loss: 0.9993577599525452,grad_norm: 0.9407667731827543, iteration: 53917
loss: 1.0039747953414917,grad_norm: 0.9999989767515878, iteration: 53918
loss: 1.0025955438613892,grad_norm: 0.9999991235916329, iteration: 53919
loss: 0.9880732893943787,grad_norm: 0.9508712925185407, iteration: 53920
loss: 0.961513102054596,grad_norm: 0.9761909296926027, iteration: 53921
loss: 1.0188212394714355,grad_norm: 0.9213495320169819, iteration: 53922
loss: 1.0044493675231934,grad_norm: 0.8194756532236304, iteration: 53923
loss: 0.9976562261581421,grad_norm: 0.8354587171606283, iteration: 53924
loss: 1.0205039978027344,grad_norm: 0.9999991509486691, iteration: 53925
loss: 0.9989848136901855,grad_norm: 0.9999989612020274, iteration: 53926
loss: 1.0320892333984375,grad_norm: 0.9999989038271332, iteration: 53927
loss: 0.9724204540252686,grad_norm: 0.8745336027458811, iteration: 53928
loss: 1.0064997673034668,grad_norm: 0.8057541185782353, iteration: 53929
loss: 0.992658257484436,grad_norm: 0.8408791044614816, iteration: 53930
loss: 1.0380505323410034,grad_norm: 0.9999990613919917, iteration: 53931
loss: 0.9974750280380249,grad_norm: 0.9999992271648684, iteration: 53932
loss: 1.0117675065994263,grad_norm: 0.9999991285448433, iteration: 53933
loss: 1.0123038291931152,grad_norm: 0.8762033225920005, iteration: 53934
loss: 1.018445372581482,grad_norm: 0.9999990707908044, iteration: 53935
loss: 1.0348896980285645,grad_norm: 0.9738234243240225, iteration: 53936
loss: 0.9864830374717712,grad_norm: 0.9999991840251112, iteration: 53937
loss: 0.9770810008049011,grad_norm: 0.9291672311253127, iteration: 53938
loss: 0.9711648225784302,grad_norm: 0.8791500291167761, iteration: 53939
loss: 0.964634358882904,grad_norm: 0.8347090616704727, iteration: 53940
loss: 1.0578114986419678,grad_norm: 0.99999998751112, iteration: 53941
loss: 0.9755848050117493,grad_norm: 0.9261904953344505, iteration: 53942
loss: 1.006905436515808,grad_norm: 0.9999991870775673, iteration: 53943
loss: 1.1018201112747192,grad_norm: 0.9806424262977194, iteration: 53944
loss: 0.9873766303062439,grad_norm: 0.8656516887609992, iteration: 53945
loss: 0.9840516448020935,grad_norm: 0.884998562506185, iteration: 53946
loss: 0.999701976776123,grad_norm: 0.977571170657579, iteration: 53947
loss: 1.004893183708191,grad_norm: 0.956374687913451, iteration: 53948
loss: 1.0313528776168823,grad_norm: 0.9999990765616231, iteration: 53949
loss: 1.0187255144119263,grad_norm: 0.9999993190359074, iteration: 53950
loss: 0.9726113080978394,grad_norm: 0.8433948054875747, iteration: 53951
loss: 1.0238629579544067,grad_norm: 0.8424572815945455, iteration: 53952
loss: 0.962268590927124,grad_norm: 0.793207823253381, iteration: 53953
loss: 0.9759383797645569,grad_norm: 0.9453697641276327, iteration: 53954
loss: 0.968933641910553,grad_norm: 0.9272958427927568, iteration: 53955
loss: 1.0129890441894531,grad_norm: 0.9124000131778197, iteration: 53956
loss: 0.9937311410903931,grad_norm: 0.9999992156598315, iteration: 53957
loss: 1.0036890506744385,grad_norm: 0.9999991364655292, iteration: 53958
loss: 1.0091572999954224,grad_norm: 0.9999990364686457, iteration: 53959
loss: 0.9763740301132202,grad_norm: 0.9860583822892941, iteration: 53960
loss: 1.0136383771896362,grad_norm: 0.9999991273050108, iteration: 53961
loss: 0.9732086062431335,grad_norm: 0.8311028621628084, iteration: 53962
loss: 1.0248074531555176,grad_norm: 0.9813547295449504, iteration: 53963
loss: 0.9779355525970459,grad_norm: 0.7645076302695277, iteration: 53964
loss: 1.1265149116516113,grad_norm: 0.9999990777573875, iteration: 53965
loss: 1.0378344058990479,grad_norm: 0.9999999859991927, iteration: 53966
loss: 0.9948402047157288,grad_norm: 0.818395973526094, iteration: 53967
loss: 1.0195732116699219,grad_norm: 0.9999991417676832, iteration: 53968
loss: 0.9743945002555847,grad_norm: 0.9744537015867921, iteration: 53969
loss: 1.0040805339813232,grad_norm: 0.9999992678977423, iteration: 53970
loss: 1.0517666339874268,grad_norm: 0.9999993370009166, iteration: 53971
loss: 0.9758638739585876,grad_norm: 0.9999991402475067, iteration: 53972
loss: 1.0914140939712524,grad_norm: 0.9029511844027232, iteration: 53973
loss: 1.02902352809906,grad_norm: 0.999999340794022, iteration: 53974
loss: 1.0002645254135132,grad_norm: 0.9228350946038457, iteration: 53975
loss: 1.0359283685684204,grad_norm: 0.9879458744306809, iteration: 53976
loss: 0.940136194229126,grad_norm: 0.9999992622492881, iteration: 53977
loss: 1.052913784980774,grad_norm: 0.9999991784915452, iteration: 53978
loss: 0.9786495566368103,grad_norm: 0.8360355438900857, iteration: 53979
loss: 1.0010608434677124,grad_norm: 0.9999990248625628, iteration: 53980
loss: 1.0065817832946777,grad_norm: 0.9999989591723458, iteration: 53981
loss: 0.9888826608657837,grad_norm: 0.9409759800082281, iteration: 53982
loss: 0.9956563115119934,grad_norm: 0.8320740504664826, iteration: 53983
loss: 1.0007127523422241,grad_norm: 0.9999989429165614, iteration: 53984
loss: 0.9753707647323608,grad_norm: 0.9999991293596739, iteration: 53985
loss: 0.9964788556098938,grad_norm: 0.9999991612985984, iteration: 53986
loss: 0.9839482307434082,grad_norm: 0.9999995679809172, iteration: 53987
loss: 0.9729487895965576,grad_norm: 0.9189457252919846, iteration: 53988
loss: 0.993395209312439,grad_norm: 0.7995137455495798, iteration: 53989
loss: 1.0001274347305298,grad_norm: 0.9093414464626042, iteration: 53990
loss: 1.0330696105957031,grad_norm: 0.9999990490007599, iteration: 53991
loss: 1.0170691013336182,grad_norm: 0.9092971384694783, iteration: 53992
loss: 1.0244803428649902,grad_norm: 0.9999989393567502, iteration: 53993
loss: 1.0246282815933228,grad_norm: 0.976910109056414, iteration: 53994
loss: 1.0272995233535767,grad_norm: 0.9884257260413315, iteration: 53995
loss: 1.0315485000610352,grad_norm: 0.9999995982227946, iteration: 53996
loss: 1.0480014085769653,grad_norm: 0.8857454230016055, iteration: 53997
loss: 1.0031654834747314,grad_norm: 0.8605074109526315, iteration: 53998
loss: 1.0123296976089478,grad_norm: 0.9999998653797944, iteration: 53999
loss: 1.0224064588546753,grad_norm: 0.9999996212377058, iteration: 54000
loss: 1.0029295682907104,grad_norm: 0.9520735695923341, iteration: 54001
loss: 1.0259556770324707,grad_norm: 0.9999991815409182, iteration: 54002
loss: 1.0000568628311157,grad_norm: 0.9999991288717417, iteration: 54003
loss: 0.9954433441162109,grad_norm: 0.9409833951662089, iteration: 54004
loss: 1.010790228843689,grad_norm: 0.9999997949711762, iteration: 54005
loss: 1.0325685739517212,grad_norm: 0.9932472214450484, iteration: 54006
loss: 1.008996844291687,grad_norm: 0.9999992249189038, iteration: 54007
loss: 1.0272412300109863,grad_norm: 0.9999995993415041, iteration: 54008
loss: 0.9982325434684753,grad_norm: 0.9502060897326945, iteration: 54009
loss: 1.0427464246749878,grad_norm: 0.9999991581646979, iteration: 54010
loss: 0.9873166084289551,grad_norm: 0.9543166717559795, iteration: 54011
loss: 1.0779303312301636,grad_norm: 0.9999995953961393, iteration: 54012
loss: 1.0129145383834839,grad_norm: 0.9999995406967404, iteration: 54013
loss: 0.9892863035202026,grad_norm: 0.8399990254713611, iteration: 54014
loss: 0.9995695948600769,grad_norm: 0.999999810358101, iteration: 54015
loss: 1.0337693691253662,grad_norm: 0.9614524322396079, iteration: 54016
loss: 1.0092179775238037,grad_norm: 0.9999995544597563, iteration: 54017
loss: 0.9871795177459717,grad_norm: 0.9773612842701515, iteration: 54018
loss: 1.0384827852249146,grad_norm: 0.9999998863215187, iteration: 54019
loss: 0.9938579797744751,grad_norm: 0.8488696084130062, iteration: 54020
loss: 0.9865565299987793,grad_norm: 0.9743737941958777, iteration: 54021
loss: 0.9873532056808472,grad_norm: 0.7557362140085615, iteration: 54022
loss: 0.9641262888908386,grad_norm: 0.9286015009647016, iteration: 54023
loss: 0.9761336445808411,grad_norm: 0.9239640304214783, iteration: 54024
loss: 1.0623832941055298,grad_norm: 0.9999998239987222, iteration: 54025
loss: 1.0132603645324707,grad_norm: 0.9999996742408905, iteration: 54026
loss: 0.9994000792503357,grad_norm: 0.8559841131878946, iteration: 54027
loss: 1.0093389749526978,grad_norm: 0.979186552229617, iteration: 54028
loss: 1.0368361473083496,grad_norm: 0.9999995725817132, iteration: 54029
loss: 1.0577760934829712,grad_norm: 0.9999994292143439, iteration: 54030
loss: 1.0126761198043823,grad_norm: 0.9999993838287305, iteration: 54031
loss: 1.0710854530334473,grad_norm: 0.9999994508326167, iteration: 54032
loss: 1.0023415088653564,grad_norm: 0.9999994884885678, iteration: 54033
loss: 0.9998082518577576,grad_norm: 0.9240248118276364, iteration: 54034
loss: 0.997489869594574,grad_norm: 0.9036643984683792, iteration: 54035
loss: 1.0083907842636108,grad_norm: 0.9999992762863622, iteration: 54036
loss: 1.0038808584213257,grad_norm: 0.9999990435360816, iteration: 54037
loss: 1.0144245624542236,grad_norm: 0.9999991408770361, iteration: 54038
loss: 1.1483603715896606,grad_norm: 0.9999991947678886, iteration: 54039
loss: 0.9944704174995422,grad_norm: 0.8100703868344987, iteration: 54040
loss: 1.0168360471725464,grad_norm: 0.8482475887253389, iteration: 54041
loss: 1.084154725074768,grad_norm: 0.9999998338103336, iteration: 54042
loss: 0.9915786981582642,grad_norm: 0.7447630131481359, iteration: 54043
loss: 1.0156961679458618,grad_norm: 0.848235661918088, iteration: 54044
loss: 1.0151125192642212,grad_norm: 0.8290882475156152, iteration: 54045
loss: 1.1704342365264893,grad_norm: 0.9999992565552461, iteration: 54046
loss: 1.0259066820144653,grad_norm: 0.9999991406058177, iteration: 54047
loss: 1.0534770488739014,grad_norm: 0.9999995905204381, iteration: 54048
loss: 1.0034911632537842,grad_norm: 0.9999990987236096, iteration: 54049
loss: 0.9839587807655334,grad_norm: 0.9817922329123346, iteration: 54050
loss: 1.0567786693572998,grad_norm: 0.9771262525159384, iteration: 54051
loss: 1.0074779987335205,grad_norm: 0.9865906089122092, iteration: 54052
loss: 1.0426537990570068,grad_norm: 0.9999993359618353, iteration: 54053
loss: 1.0083447694778442,grad_norm: 0.9999992354979731, iteration: 54054
loss: 1.1069166660308838,grad_norm: 0.9999993394837697, iteration: 54055
loss: 1.0513641834259033,grad_norm: 0.9999995385236633, iteration: 54056
loss: 1.0013493299484253,grad_norm: 0.9999991646731743, iteration: 54057
loss: 1.0731741189956665,grad_norm: 0.9256139642099733, iteration: 54058
loss: 1.0046404600143433,grad_norm: 0.9999989957888851, iteration: 54059
loss: 1.1017440557479858,grad_norm: 0.999999343696959, iteration: 54060
loss: 1.1717039346694946,grad_norm: 0.9999997689972553, iteration: 54061
loss: 0.996727705001831,grad_norm: 0.9999992286871912, iteration: 54062
loss: 1.0052783489227295,grad_norm: 0.9999990227560583, iteration: 54063
loss: 1.0030499696731567,grad_norm: 0.8585324377908017, iteration: 54064
loss: 0.9988875389099121,grad_norm: 0.9033864219116231, iteration: 54065
loss: 0.9842724204063416,grad_norm: 0.9999991444097466, iteration: 54066
loss: 1.0140210390090942,grad_norm: 0.9999990664423287, iteration: 54067
loss: 1.000841736793518,grad_norm: 0.9605030980007758, iteration: 54068
loss: 1.0298097133636475,grad_norm: 0.9999990464910834, iteration: 54069
loss: 1.0458909273147583,grad_norm: 0.9999991126006931, iteration: 54070
loss: 1.026214838027954,grad_norm: 0.9999991107760374, iteration: 54071
loss: 0.9895439743995667,grad_norm: 0.9999992621052556, iteration: 54072
loss: 1.0298634767532349,grad_norm: 0.8731584784479901, iteration: 54073
loss: 0.9971615672111511,grad_norm: 0.9999989801277013, iteration: 54074
loss: 1.0065267086029053,grad_norm: 0.9678490382563976, iteration: 54075
loss: 1.0148006677627563,grad_norm: 0.9999991416807285, iteration: 54076
loss: 1.0164966583251953,grad_norm: 0.9999993411024878, iteration: 54077
loss: 1.0114699602127075,grad_norm: 0.9999991737310167, iteration: 54078
loss: 1.114667296409607,grad_norm: 0.9999992399942713, iteration: 54079
loss: 1.0326493978500366,grad_norm: 0.9999991140135926, iteration: 54080
loss: 0.9942676424980164,grad_norm: 0.9999989996697557, iteration: 54081
loss: 1.0118577480316162,grad_norm: 0.8573318659383593, iteration: 54082
loss: 1.0216593742370605,grad_norm: 0.9999992191879831, iteration: 54083
loss: 1.0072356462478638,grad_norm: 0.7871188156912495, iteration: 54084
loss: 1.0155842304229736,grad_norm: 0.9999989832926501, iteration: 54085
loss: 1.006924033164978,grad_norm: 0.856972702425175, iteration: 54086
loss: 1.0337456464767456,grad_norm: 0.9790645716975893, iteration: 54087
loss: 1.0955222845077515,grad_norm: 0.9999997350337224, iteration: 54088
loss: 1.0269194841384888,grad_norm: 0.8022228225467796, iteration: 54089
loss: 1.03672456741333,grad_norm: 0.8491078354121883, iteration: 54090
loss: 0.970873236656189,grad_norm: 0.9375962364378877, iteration: 54091
loss: 0.9736884236335754,grad_norm: 0.9999991599298679, iteration: 54092
loss: 1.0236196517944336,grad_norm: 0.9999993506722579, iteration: 54093
loss: 1.0173985958099365,grad_norm: 0.9418841193820083, iteration: 54094
loss: 1.0314967632293701,grad_norm: 0.858204099307012, iteration: 54095
loss: 0.9852910041809082,grad_norm: 0.9324799386537188, iteration: 54096
loss: 1.0255556106567383,grad_norm: 0.999999365930963, iteration: 54097
loss: 0.9991232752799988,grad_norm: 0.9999989268067132, iteration: 54098
loss: 1.0085303783416748,grad_norm: 0.9411458730358402, iteration: 54099
loss: 0.9859353303909302,grad_norm: 0.9662425184441483, iteration: 54100
loss: 0.9828609228134155,grad_norm: 0.9103891156036494, iteration: 54101
loss: 0.9986410140991211,grad_norm: 0.9475714399694752, iteration: 54102
loss: 1.0122343301773071,grad_norm: 0.9999997680273626, iteration: 54103
loss: 1.0038069486618042,grad_norm: 0.9413646788961412, iteration: 54104
loss: 1.0155609846115112,grad_norm: 0.9002884431367845, iteration: 54105
loss: 0.9944421052932739,grad_norm: 0.9999991105987296, iteration: 54106
loss: 1.033677101135254,grad_norm: 0.9828992339008239, iteration: 54107
loss: 1.012813925743103,grad_norm: 0.9463849312415048, iteration: 54108
loss: 0.9933897256851196,grad_norm: 0.8530865750431766, iteration: 54109
loss: 1.007207989692688,grad_norm: 0.8078220002931813, iteration: 54110
loss: 1.0321600437164307,grad_norm: 0.9375974748998868, iteration: 54111
loss: 1.016127347946167,grad_norm: 0.9999992957100835, iteration: 54112
loss: 1.0115253925323486,grad_norm: 0.9999996889528274, iteration: 54113
loss: 0.9689700603485107,grad_norm: 0.7975557396501102, iteration: 54114
loss: 1.0283459424972534,grad_norm: 0.9999991224329458, iteration: 54115
loss: 0.9933431148529053,grad_norm: 0.8548753836455125, iteration: 54116
loss: 0.9998009204864502,grad_norm: 0.8980252598463246, iteration: 54117
loss: 1.0032976865768433,grad_norm: 0.9646604585073755, iteration: 54118
loss: 0.9846323728561401,grad_norm: 0.9086885065695646, iteration: 54119
loss: 1.041039228439331,grad_norm: 0.9999993633530755, iteration: 54120
loss: 0.9867372512817383,grad_norm: 0.9496427922614286, iteration: 54121
loss: 0.9916662573814392,grad_norm: 0.999999003204465, iteration: 54122
loss: 1.0086698532104492,grad_norm: 0.9999990054959791, iteration: 54123
loss: 1.001155138015747,grad_norm: 0.8884831028944138, iteration: 54124
loss: 1.027267575263977,grad_norm: 0.9841806287367831, iteration: 54125
loss: 1.0003883838653564,grad_norm: 0.83990332967983, iteration: 54126
loss: 1.0183452367782593,grad_norm: 0.8902739401672157, iteration: 54127
loss: 0.9887213110923767,grad_norm: 0.9676361992179476, iteration: 54128
loss: 1.0245373249053955,grad_norm: 0.9668491730479474, iteration: 54129
loss: 1.007043480873108,grad_norm: 0.9999996074766467, iteration: 54130
loss: 1.018415093421936,grad_norm: 0.9999992996487467, iteration: 54131
loss: 1.1122090816497803,grad_norm: 0.9999991542210416, iteration: 54132
loss: 1.0493251085281372,grad_norm: 0.9999992308023653, iteration: 54133
loss: 1.014680027961731,grad_norm: 0.9999998064584007, iteration: 54134
loss: 1.039039969444275,grad_norm: 0.9999991332613061, iteration: 54135
loss: 0.9953257441520691,grad_norm: 0.9027012051277875, iteration: 54136
loss: 1.0193519592285156,grad_norm: 0.8816859168201203, iteration: 54137
loss: 1.0626070499420166,grad_norm: 0.9999996815618154, iteration: 54138
loss: 1.0614145994186401,grad_norm: 0.9999990208629184, iteration: 54139
loss: 1.0014314651489258,grad_norm: 0.9999990511164156, iteration: 54140
loss: 1.013370394706726,grad_norm: 0.8337246861856801, iteration: 54141
loss: 1.0224871635437012,grad_norm: 0.966067752514127, iteration: 54142
loss: 1.009965419769287,grad_norm: 0.9570246672643059, iteration: 54143
loss: 0.9877138137817383,grad_norm: 0.8863349118817255, iteration: 54144
loss: 1.0038868188858032,grad_norm: 0.9683324733014431, iteration: 54145
loss: 1.0276819467544556,grad_norm: 0.9999993418498244, iteration: 54146
loss: 1.0159672498703003,grad_norm: 0.9999995601233396, iteration: 54147
loss: 1.033097505569458,grad_norm: 0.7678463349671417, iteration: 54148
loss: 1.002060890197754,grad_norm: 0.9999993085168881, iteration: 54149
loss: 0.9681059122085571,grad_norm: 0.8953691422804332, iteration: 54150
loss: 0.977073609828949,grad_norm: 0.9138267099333186, iteration: 54151
loss: 1.0252718925476074,grad_norm: 0.9999990846606707, iteration: 54152
loss: 0.9848635196685791,grad_norm: 0.8051011870190432, iteration: 54153
loss: 0.976778507232666,grad_norm: 0.965477254952166, iteration: 54154
loss: 1.0974977016448975,grad_norm: 0.9999996565166575, iteration: 54155
loss: 1.0303740501403809,grad_norm: 0.8392161127252924, iteration: 54156
loss: 0.9839361310005188,grad_norm: 0.7787498927887772, iteration: 54157
loss: 0.9982659220695496,grad_norm: 0.9626978821248826, iteration: 54158
loss: 1.0161226987838745,grad_norm: 0.9956779789352965, iteration: 54159
loss: 1.0222342014312744,grad_norm: 0.9999993049877872, iteration: 54160
loss: 1.0161986351013184,grad_norm: 0.9999991412224217, iteration: 54161
loss: 1.0124688148498535,grad_norm: 0.9999991149810075, iteration: 54162
loss: 0.9702849984169006,grad_norm: 0.9512812059360294, iteration: 54163
loss: 1.0061439275741577,grad_norm: 0.851791568099927, iteration: 54164
loss: 0.9808540940284729,grad_norm: 0.9710457909924601, iteration: 54165
loss: 0.9956337809562683,grad_norm: 0.8104433588073048, iteration: 54166
loss: 0.9992642402648926,grad_norm: 0.8375686913319742, iteration: 54167
loss: 1.0037332773208618,grad_norm: 0.9861652550357575, iteration: 54168
loss: 1.0036499500274658,grad_norm: 0.9567611570526923, iteration: 54169
loss: 1.0030298233032227,grad_norm: 0.9313087103760969, iteration: 54170
loss: 1.024590253829956,grad_norm: 0.9378915952475606, iteration: 54171
loss: 1.0269722938537598,grad_norm: 0.866498247948675, iteration: 54172
loss: 1.0104246139526367,grad_norm: 0.8493122302464374, iteration: 54173
loss: 1.0236413478851318,grad_norm: 0.9915516354271187, iteration: 54174
loss: 0.9586091041564941,grad_norm: 0.9129463310250714, iteration: 54175
loss: 1.0139806270599365,grad_norm: 0.9999995780067139, iteration: 54176
loss: 0.9907706379890442,grad_norm: 0.9999991376430518, iteration: 54177
loss: 0.9661799073219299,grad_norm: 0.999999066499734, iteration: 54178
loss: 0.9818805456161499,grad_norm: 0.970681983176747, iteration: 54179
loss: 1.0068475008010864,grad_norm: 0.999999009783779, iteration: 54180
loss: 1.0449734926223755,grad_norm: 0.9999994214719529, iteration: 54181
loss: 1.009234070777893,grad_norm: 0.9120471437293459, iteration: 54182
loss: 0.9860942959785461,grad_norm: 0.828285216932038, iteration: 54183
loss: 1.0783560276031494,grad_norm: 0.9999991664680716, iteration: 54184
loss: 1.0118069648742676,grad_norm: 0.9999999974173369, iteration: 54185
loss: 1.0126872062683105,grad_norm: 0.8384079165316364, iteration: 54186
loss: 1.0012421607971191,grad_norm: 0.8879424148545244, iteration: 54187
loss: 0.9526289105415344,grad_norm: 0.9999990994193224, iteration: 54188
loss: 1.1847807168960571,grad_norm: 0.9999997899439963, iteration: 54189
loss: 1.0170682668685913,grad_norm: 0.937710217693009, iteration: 54190
loss: 0.9902012348175049,grad_norm: 0.9999995003214874, iteration: 54191
loss: 1.0046137571334839,grad_norm: 0.8358115323186234, iteration: 54192
loss: 1.02018404006958,grad_norm: 0.8217230670577687, iteration: 54193
loss: 0.9689910411834717,grad_norm: 0.9999991302155322, iteration: 54194
loss: 0.979717493057251,grad_norm: 0.8607581570159277, iteration: 54195
loss: 1.0213488340377808,grad_norm: 0.9999992446216582, iteration: 54196
loss: 1.002989649772644,grad_norm: 0.9999996896201296, iteration: 54197
loss: 0.991197943687439,grad_norm: 0.9999990460263368, iteration: 54198
loss: 1.0125815868377686,grad_norm: 0.8515390081719539, iteration: 54199
loss: 1.0124783515930176,grad_norm: 0.9999991527806832, iteration: 54200
loss: 1.0120314359664917,grad_norm: 0.8828538284229335, iteration: 54201
loss: 0.9926638603210449,grad_norm: 0.9999991696012485, iteration: 54202
loss: 0.9875420331954956,grad_norm: 0.9999990246416594, iteration: 54203
loss: 0.9703468084335327,grad_norm: 0.9999995763981646, iteration: 54204
loss: 1.0170340538024902,grad_norm: 0.8644929534624671, iteration: 54205
loss: 1.0694509744644165,grad_norm: 0.9999993814680548, iteration: 54206
loss: 1.035414457321167,grad_norm: 0.9999997881653127, iteration: 54207
loss: 0.9723827242851257,grad_norm: 0.8196899566012591, iteration: 54208
loss: 0.9983910918235779,grad_norm: 0.920588618441032, iteration: 54209
loss: 0.9795377254486084,grad_norm: 0.9223129709364399, iteration: 54210
loss: 1.1249966621398926,grad_norm: 0.9999996974066934, iteration: 54211
loss: 0.9895676374435425,grad_norm: 0.8595304814633026, iteration: 54212
loss: 0.9932818412780762,grad_norm: 0.7032494886775601, iteration: 54213
loss: 1.0517644882202148,grad_norm: 0.9999998569817073, iteration: 54214
loss: 1.067282795906067,grad_norm: 0.9999992241376774, iteration: 54215
loss: 0.9944868683815002,grad_norm: 0.9791618239972129, iteration: 54216
loss: 0.9570850133895874,grad_norm: 0.8887268213071559, iteration: 54217
loss: 1.035406231880188,grad_norm: 0.8601704492678741, iteration: 54218
loss: 1.0350995063781738,grad_norm: 0.9741595548157964, iteration: 54219
loss: 1.041832447052002,grad_norm: 0.9999991928243244, iteration: 54220
loss: 1.0009859800338745,grad_norm: 0.883447260194998, iteration: 54221
loss: 1.0306957960128784,grad_norm: 0.9277314706168788, iteration: 54222
loss: 1.0361182689666748,grad_norm: 0.9835314711438488, iteration: 54223
loss: 1.0016931295394897,grad_norm: 0.9999992540868322, iteration: 54224
loss: 0.9971733689308167,grad_norm: 0.9183396505150848, iteration: 54225
loss: 0.9914671778678894,grad_norm: 0.9999991345079287, iteration: 54226
loss: 1.0046837329864502,grad_norm: 0.9305832185093919, iteration: 54227
loss: 0.9928985834121704,grad_norm: 0.9282105493104088, iteration: 54228
loss: 0.9840944409370422,grad_norm: 0.8483140793957521, iteration: 54229
loss: 1.0396051406860352,grad_norm: 0.8754186728556635, iteration: 54230
loss: 0.987315833568573,grad_norm: 0.9677079280008131, iteration: 54231
loss: 1.0352705717086792,grad_norm: 0.9091963689822931, iteration: 54232
loss: 1.0022392272949219,grad_norm: 0.8726378510874427, iteration: 54233
loss: 1.0453486442565918,grad_norm: 0.9124291753311764, iteration: 54234
loss: 1.0018724203109741,grad_norm: 0.9999988387404415, iteration: 54235
loss: 1.0081431865692139,grad_norm: 0.9999995007503649, iteration: 54236
loss: 1.007057547569275,grad_norm: 0.9999993488747362, iteration: 54237
loss: 0.9762126207351685,grad_norm: 0.8196036187581753, iteration: 54238
loss: 1.0036735534667969,grad_norm: 0.9999992685287346, iteration: 54239
loss: 0.9931791424751282,grad_norm: 0.9999991177561769, iteration: 54240
loss: 0.981498122215271,grad_norm: 0.8755722371191065, iteration: 54241
loss: 1.0160555839538574,grad_norm: 0.9999991176136753, iteration: 54242
loss: 0.9709833264350891,grad_norm: 0.9999991589978106, iteration: 54243
loss: 0.9529354572296143,grad_norm: 0.9999991377236729, iteration: 54244
loss: 0.9619120359420776,grad_norm: 0.9999991566198463, iteration: 54245
loss: 0.9768050909042358,grad_norm: 0.9056119764585006, iteration: 54246
loss: 1.0142799615859985,grad_norm: 0.9273843692947327, iteration: 54247
loss: 1.0013563632965088,grad_norm: 0.8133255608752343, iteration: 54248
loss: 1.1091785430908203,grad_norm: 0.9999994218686572, iteration: 54249
loss: 0.9769275784492493,grad_norm: 0.8134845106020093, iteration: 54250
loss: 1.0080362558364868,grad_norm: 0.9840965200355914, iteration: 54251
loss: 0.9829722046852112,grad_norm: 0.9708566368301526, iteration: 54252
loss: 1.05034339427948,grad_norm: 0.975867142776862, iteration: 54253
loss: 1.08443284034729,grad_norm: 0.9999990188995274, iteration: 54254
loss: 1.0456786155700684,grad_norm: 0.9999991400459037, iteration: 54255
loss: 0.9825614094734192,grad_norm: 0.9125668684977397, iteration: 54256
loss: 0.9729067087173462,grad_norm: 0.8119053221072169, iteration: 54257
loss: 0.9647513628005981,grad_norm: 0.8143429929052715, iteration: 54258
loss: 0.9777734875679016,grad_norm: 0.9113433301008554, iteration: 54259
loss: 1.0193254947662354,grad_norm: 0.9999990437864631, iteration: 54260
loss: 0.9871594905853271,grad_norm: 0.999999093254733, iteration: 54261
loss: 0.9461562037467957,grad_norm: 0.9798415740066129, iteration: 54262
loss: 0.9733519554138184,grad_norm: 0.999999146207834, iteration: 54263
loss: 0.9755586981773376,grad_norm: 0.9161377620427065, iteration: 54264
loss: 0.9849262833595276,grad_norm: 0.7342323169511134, iteration: 54265
loss: 1.0302891731262207,grad_norm: 0.9999997128807036, iteration: 54266
loss: 1.0162773132324219,grad_norm: 0.9999992489331133, iteration: 54267
loss: 0.9737263321876526,grad_norm: 0.9210138209560631, iteration: 54268
loss: 1.0180262327194214,grad_norm: 0.9999991524651282, iteration: 54269
loss: 1.0039937496185303,grad_norm: 0.9999989598719978, iteration: 54270
loss: 0.9965460896492004,grad_norm: 0.9999991394144994, iteration: 54271
loss: 1.0494354963302612,grad_norm: 0.9843146029152431, iteration: 54272
loss: 1.0130021572113037,grad_norm: 0.9999991179587603, iteration: 54273
loss: 1.0106868743896484,grad_norm: 0.8616190896760333, iteration: 54274
loss: 1.0217536687850952,grad_norm: 0.9999992291522161, iteration: 54275
loss: 1.0042074918746948,grad_norm: 0.9999993834635791, iteration: 54276
loss: 1.0207841396331787,grad_norm: 0.8812793266764944, iteration: 54277
loss: 1.025086522102356,grad_norm: 0.8514105029839945, iteration: 54278
loss: 1.0230848789215088,grad_norm: 0.999999040588627, iteration: 54279
loss: 1.0026614665985107,grad_norm: 0.6753817941930653, iteration: 54280
loss: 0.9725067019462585,grad_norm: 0.8633595936123074, iteration: 54281
loss: 1.0321518182754517,grad_norm: 0.9999991268156038, iteration: 54282
loss: 1.0082030296325684,grad_norm: 0.8758234303985625, iteration: 54283
loss: 1.0047814846038818,grad_norm: 0.9999990588463756, iteration: 54284
loss: 1.0312761068344116,grad_norm: 0.9818744809780949, iteration: 54285
loss: 0.988920271396637,grad_norm: 0.827233349945533, iteration: 54286
loss: 1.020815372467041,grad_norm: 0.9999992099131384, iteration: 54287
loss: 1.0080150365829468,grad_norm: 0.9999993091725482, iteration: 54288
loss: 1.1678142547607422,grad_norm: 1.000000060733556, iteration: 54289
loss: 0.9688330888748169,grad_norm: 0.9999992469468852, iteration: 54290
loss: 1.0543609857559204,grad_norm: 0.9999992803801413, iteration: 54291
loss: 0.9880394339561462,grad_norm: 0.9999993107421803, iteration: 54292
loss: 0.941070556640625,grad_norm: 0.9999989631822029, iteration: 54293
loss: 1.0248544216156006,grad_norm: 0.9999991816110037, iteration: 54294
loss: 1.0378562211990356,grad_norm: 0.999999819917732, iteration: 54295
loss: 0.9627634882926941,grad_norm: 0.9999991317338471, iteration: 54296
loss: 0.9648340940475464,grad_norm: 0.9199002352255244, iteration: 54297
loss: 0.9635917544364929,grad_norm: 0.9999990784556073, iteration: 54298
loss: 0.9704359173774719,grad_norm: 0.9999993597774187, iteration: 54299
loss: 1.0076566934585571,grad_norm: 0.9801311485455184, iteration: 54300
loss: 1.0292587280273438,grad_norm: 0.8506694793265898, iteration: 54301
loss: 1.0239046812057495,grad_norm: 0.9999991240642467, iteration: 54302
loss: 0.9607110619544983,grad_norm: 0.8554347497985437, iteration: 54303
loss: 1.0098989009857178,grad_norm: 0.9188509902498861, iteration: 54304
loss: 1.0146218538284302,grad_norm: 0.9999991328183349, iteration: 54305
loss: 1.0266530513763428,grad_norm: 0.9875297913557575, iteration: 54306
loss: 1.0625046491622925,grad_norm: 0.9999993874260986, iteration: 54307
loss: 1.032687783241272,grad_norm: 0.8343248302262674, iteration: 54308
loss: 0.9956338405609131,grad_norm: 0.9918543250106391, iteration: 54309
loss: 1.033578872680664,grad_norm: 0.9999990467831786, iteration: 54310
loss: 0.9770811200141907,grad_norm: 0.8051337685415281, iteration: 54311
loss: 1.0479830503463745,grad_norm: 0.9999991899236158, iteration: 54312
loss: 1.0006312131881714,grad_norm: 0.8973777905742276, iteration: 54313
loss: 1.0100107192993164,grad_norm: 0.9999990663885616, iteration: 54314
loss: 1.0137226581573486,grad_norm: 0.9999997368566749, iteration: 54315
loss: 1.009580373764038,grad_norm: 0.999999116118489, iteration: 54316
loss: 1.0123721361160278,grad_norm: 0.7864241334542019, iteration: 54317
loss: 1.017732858657837,grad_norm: 0.9999990980781006, iteration: 54318
loss: 1.0192232131958008,grad_norm: 0.9999993250033413, iteration: 54319
loss: 1.028448224067688,grad_norm: 0.8520774660270026, iteration: 54320
loss: 1.0657044649124146,grad_norm: 0.9999998999704571, iteration: 54321
loss: 1.042187213897705,grad_norm: 0.8667008761838566, iteration: 54322
loss: 0.9924444556236267,grad_norm: 0.99999929220685, iteration: 54323
loss: 0.9851266145706177,grad_norm: 0.999999444375902, iteration: 54324
loss: 0.9965205192565918,grad_norm: 0.6890149416747685, iteration: 54325
loss: 0.9881198406219482,grad_norm: 0.9999991016401063, iteration: 54326
loss: 1.036184310913086,grad_norm: 0.9999993873151585, iteration: 54327
loss: 0.9916876554489136,grad_norm: 0.9892618036174545, iteration: 54328
loss: 0.9713601469993591,grad_norm: 0.9932863754809657, iteration: 54329
loss: 1.086108684539795,grad_norm: 0.9999992548621418, iteration: 54330
loss: 1.0308531522750854,grad_norm: 0.9233454554060155, iteration: 54331
loss: 1.0809495449066162,grad_norm: 0.9999988473717815, iteration: 54332
loss: 0.9973311424255371,grad_norm: 0.8985285203116734, iteration: 54333
loss: 0.9847155809402466,grad_norm: 0.8853759265618129, iteration: 54334
loss: 0.9851936101913452,grad_norm: 0.9560191064078779, iteration: 54335
loss: 1.0086960792541504,grad_norm: 0.9999989954333213, iteration: 54336
loss: 1.0324324369430542,grad_norm: 0.9080161038411401, iteration: 54337
loss: 0.9647296071052551,grad_norm: 0.867208345338004, iteration: 54338
loss: 0.9991989731788635,grad_norm: 0.8142756652153054, iteration: 54339
loss: 1.0518838167190552,grad_norm: 0.9999991375196168, iteration: 54340
loss: 1.027919888496399,grad_norm: 0.9999993443569677, iteration: 54341
loss: 1.0062419176101685,grad_norm: 0.9999992090712668, iteration: 54342
loss: 1.0609893798828125,grad_norm: 0.9999992129019879, iteration: 54343
loss: 0.9865363240242004,grad_norm: 0.8873357603803027, iteration: 54344
loss: 1.0007123947143555,grad_norm: 0.9821076639632874, iteration: 54345
loss: 1.0206230878829956,grad_norm: 0.9999991185966554, iteration: 54346
loss: 1.0157948732376099,grad_norm: 0.8301833124852729, iteration: 54347
loss: 1.0056525468826294,grad_norm: 0.9867043526928577, iteration: 54348
loss: 1.08539879322052,grad_norm: 0.9999990630674258, iteration: 54349
loss: 0.9699370861053467,grad_norm: 0.8830369845660723, iteration: 54350
loss: 1.0330032110214233,grad_norm: 0.999999414511301, iteration: 54351
loss: 0.996380090713501,grad_norm: 0.9999993659942479, iteration: 54352
loss: 1.0015835762023926,grad_norm: 0.8972940951011454, iteration: 54353
loss: 1.0279967784881592,grad_norm: 0.9999996567885265, iteration: 54354
loss: 1.026007890701294,grad_norm: 0.8598733436113492, iteration: 54355
loss: 1.0291615724563599,grad_norm: 0.9999997280877561, iteration: 54356
loss: 1.0669667720794678,grad_norm: 0.9999996483056087, iteration: 54357
loss: 1.0215356349945068,grad_norm: 0.9999995676230329, iteration: 54358
loss: 1.0088759660720825,grad_norm: 0.9999990442539384, iteration: 54359
loss: 1.0427180528640747,grad_norm: 0.9999997136794812, iteration: 54360
loss: 0.9969086647033691,grad_norm: 0.9925675828877565, iteration: 54361
loss: 1.0076119899749756,grad_norm: 0.7766030775515146, iteration: 54362
loss: 0.9863805174827576,grad_norm: 0.9048061934856715, iteration: 54363
loss: 0.9847956299781799,grad_norm: 0.99999895409297, iteration: 54364
loss: 1.002016305923462,grad_norm: 0.933812926020981, iteration: 54365
loss: 1.0100219249725342,grad_norm: 0.7963618540903007, iteration: 54366
loss: 0.9605355858802795,grad_norm: 0.9999990250432311, iteration: 54367
loss: 0.9837245345115662,grad_norm: 0.9894878751504329, iteration: 54368
loss: 0.9469459056854248,grad_norm: 0.8762316725828249, iteration: 54369
loss: 1.0185246467590332,grad_norm: 0.9999991514875072, iteration: 54370
loss: 1.0138498544692993,grad_norm: 0.9999991893589609, iteration: 54371
loss: 1.0062988996505737,grad_norm: 0.8473296968087022, iteration: 54372
loss: 1.0525802373886108,grad_norm: 0.9999998928754225, iteration: 54373
loss: 0.9930529594421387,grad_norm: 0.9999992825790712, iteration: 54374
loss: 0.9700088500976562,grad_norm: 0.999999224223222, iteration: 54375
loss: 1.0175729990005493,grad_norm: 0.9999992800242261, iteration: 54376
loss: 0.9838083386421204,grad_norm: 0.937207060899971, iteration: 54377
loss: 0.9892407059669495,grad_norm: 0.8043470278438997, iteration: 54378
loss: 0.9958068132400513,grad_norm: 0.8319743256525854, iteration: 54379
loss: 1.0213284492492676,grad_norm: 0.99999911857525, iteration: 54380
loss: 1.0127451419830322,grad_norm: 0.9831129688629288, iteration: 54381
loss: 1.0188195705413818,grad_norm: 0.9970330831652763, iteration: 54382
loss: 0.9731717705726624,grad_norm: 0.856489938879362, iteration: 54383
loss: 0.972147524356842,grad_norm: 0.9999992939975848, iteration: 54384
loss: 1.010385513305664,grad_norm: 0.9999991145044922, iteration: 54385
loss: 0.9733688235282898,grad_norm: 0.999999090171823, iteration: 54386
loss: 0.9664666056632996,grad_norm: 0.9999990987448432, iteration: 54387
loss: 0.9503957033157349,grad_norm: 0.8491687954655626, iteration: 54388
loss: 0.9882851243019104,grad_norm: 0.9999990026336207, iteration: 54389
loss: 0.9931435585021973,grad_norm: 0.999999353230666, iteration: 54390
loss: 1.0140300989151,grad_norm: 0.8152883968642538, iteration: 54391
loss: 0.976854145526886,grad_norm: 0.8750022605583603, iteration: 54392
loss: 1.030884027481079,grad_norm: 0.9999991940249148, iteration: 54393
loss: 1.0424180030822754,grad_norm: 0.999999739062439, iteration: 54394
loss: 1.0441310405731201,grad_norm: 0.9891232687741376, iteration: 54395
loss: 0.9811210632324219,grad_norm: 0.999999251501952, iteration: 54396
loss: 1.0024441480636597,grad_norm: 0.8047439772344506, iteration: 54397
loss: 0.9870980978012085,grad_norm: 0.9999990550500609, iteration: 54398
loss: 1.0329889059066772,grad_norm: 0.9000695763320683, iteration: 54399
loss: 1.0219868421554565,grad_norm: 0.9603630948997948, iteration: 54400
loss: 0.9884371757507324,grad_norm: 0.8908522386951148, iteration: 54401
loss: 0.9978620409965515,grad_norm: 0.9999991894943956, iteration: 54402
loss: 1.0124263763427734,grad_norm: 0.9582198413632405, iteration: 54403
loss: 1.0083352327346802,grad_norm: 0.9999993936068725, iteration: 54404
loss: 0.9715288281440735,grad_norm: 0.8532260599356681, iteration: 54405
loss: 0.9800082445144653,grad_norm: 0.9999992661814563, iteration: 54406
loss: 1.0679537057876587,grad_norm: 0.9941464735674915, iteration: 54407
loss: 1.0112413167953491,grad_norm: 0.96990427385118, iteration: 54408
loss: 0.9904284477233887,grad_norm: 0.8728717944275787, iteration: 54409
loss: 1.0474227666854858,grad_norm: 0.8600635288163261, iteration: 54410
loss: 1.0027865171432495,grad_norm: 0.8623847724945994, iteration: 54411
loss: 1.0139963626861572,grad_norm: 0.9671286708056847, iteration: 54412
loss: 1.0386353731155396,grad_norm: 0.8697279651678659, iteration: 54413
loss: 0.9983181953430176,grad_norm: 0.8961115576788606, iteration: 54414
loss: 0.9972186088562012,grad_norm: 0.9939014122119865, iteration: 54415
loss: 0.9693277478218079,grad_norm: 0.9999993610012852, iteration: 54416
loss: 1.1915944814682007,grad_norm: 0.9999992376716133, iteration: 54417
loss: 1.0069301128387451,grad_norm: 0.9103570592846799, iteration: 54418
loss: 1.0117416381835938,grad_norm: 0.9999990754486855, iteration: 54419
loss: 1.0273187160491943,grad_norm: 0.9239967044646495, iteration: 54420
loss: 0.9742868542671204,grad_norm: 0.7527373123326339, iteration: 54421
loss: 1.022723913192749,grad_norm: 0.9999989512933909, iteration: 54422
loss: 1.0701638460159302,grad_norm: 0.999999857048703, iteration: 54423
loss: 1.0178025960922241,grad_norm: 0.861142642532582, iteration: 54424
loss: 1.0504461526870728,grad_norm: 0.9999993488050285, iteration: 54425
loss: 1.0089797973632812,grad_norm: 0.8954582239766555, iteration: 54426
loss: 0.9982213973999023,grad_norm: 0.9880534398680942, iteration: 54427
loss: 0.9967569708824158,grad_norm: 0.9999992965995596, iteration: 54428
loss: 1.0236856937408447,grad_norm: 0.9264733680856583, iteration: 54429
loss: 0.9806246161460876,grad_norm: 0.8794549656387568, iteration: 54430
loss: 1.0387181043624878,grad_norm: 0.8923066937860782, iteration: 54431
loss: 0.9978044629096985,grad_norm: 0.9225863330344043, iteration: 54432
loss: 1.0254697799682617,grad_norm: 0.6830685921500104, iteration: 54433
loss: 1.0331374406814575,grad_norm: 0.7670748543016735, iteration: 54434
loss: 0.9787821173667908,grad_norm: 0.8770301913868908, iteration: 54435
loss: 1.0376248359680176,grad_norm: 0.9659355404778434, iteration: 54436
loss: 0.9880805611610413,grad_norm: 0.9999993371440243, iteration: 54437
loss: 1.0277186632156372,grad_norm: 0.7693576089949518, iteration: 54438
loss: 0.9861211180686951,grad_norm: 0.999999332694128, iteration: 54439
loss: 0.9852327704429626,grad_norm: 0.8139206511681639, iteration: 54440
loss: 1.0252057313919067,grad_norm: 0.9267087404414829, iteration: 54441
loss: 1.0528486967086792,grad_norm: 0.999999191462897, iteration: 54442
loss: 1.0226820707321167,grad_norm: 0.9733802796679374, iteration: 54443
loss: 1.0207329988479614,grad_norm: 0.9999994581543177, iteration: 54444
loss: 0.9981616735458374,grad_norm: 0.9852423157665389, iteration: 54445
loss: 1.044867992401123,grad_norm: 0.9999991111390556, iteration: 54446
loss: 1.0131014585494995,grad_norm: 0.99999905741499, iteration: 54447
loss: 1.0234695672988892,grad_norm: 0.9999993948706554, iteration: 54448
loss: 1.0325238704681396,grad_norm: 0.9999992017034036, iteration: 54449
loss: 1.0037609338760376,grad_norm: 0.9999994649570657, iteration: 54450
loss: 1.0607575178146362,grad_norm: 1.000000006002923, iteration: 54451
loss: 1.0247135162353516,grad_norm: 0.9999992239342886, iteration: 54452
loss: 0.9876778721809387,grad_norm: 0.8170974495192697, iteration: 54453
loss: 1.0005725622177124,grad_norm: 0.8899210199146974, iteration: 54454
loss: 1.044572114944458,grad_norm: 0.9257836996733515, iteration: 54455
loss: 1.001875400543213,grad_norm: 0.8014194529872473, iteration: 54456
loss: 0.987787127494812,grad_norm: 0.9999991195281951, iteration: 54457
loss: 1.0104974508285522,grad_norm: 0.9999998866501068, iteration: 54458
loss: 1.0259674787521362,grad_norm: 0.7900454708814488, iteration: 54459
loss: 1.0007139444351196,grad_norm: 0.8453588614036907, iteration: 54460
loss: 0.9962257742881775,grad_norm: 0.9999989285737136, iteration: 54461
loss: 1.040513277053833,grad_norm: 0.8292213067120676, iteration: 54462
loss: 0.9727979898452759,grad_norm: 0.9999992829579771, iteration: 54463
loss: 1.0532015562057495,grad_norm: 0.9999995355308969, iteration: 54464
loss: 1.0271395444869995,grad_norm: 0.8146927075740334, iteration: 54465
loss: 1.0259443521499634,grad_norm: 0.9999992676176378, iteration: 54466
loss: 1.1725982427597046,grad_norm: 0.9999996865241972, iteration: 54467
loss: 0.9615395069122314,grad_norm: 0.9999990582654914, iteration: 54468
loss: 1.031919240951538,grad_norm: 0.9999993580237896, iteration: 54469
loss: 0.9933583736419678,grad_norm: 0.8952735675163824, iteration: 54470
loss: 1.0043772459030151,grad_norm: 0.8519204229959922, iteration: 54471
loss: 1.016239047050476,grad_norm: 0.7755257581592782, iteration: 54472
loss: 1.0271036624908447,grad_norm: 0.9999990258803471, iteration: 54473
loss: 1.0202035903930664,grad_norm: 0.9999994599498716, iteration: 54474
loss: 0.9835736155509949,grad_norm: 0.9999990983818404, iteration: 54475
loss: 1.0698446035385132,grad_norm: 0.999999572713242, iteration: 54476
loss: 1.07929527759552,grad_norm: 0.9999993785736261, iteration: 54477
loss: 1.0290697813034058,grad_norm: 0.9999990747570418, iteration: 54478
loss: 1.0092394351959229,grad_norm: 0.9999990424099758, iteration: 54479
loss: 1.0138014554977417,grad_norm: 0.8806768644453173, iteration: 54480
loss: 1.0068087577819824,grad_norm: 0.8578610968776056, iteration: 54481
loss: 0.9725391864776611,grad_norm: 0.8907825123980102, iteration: 54482
loss: 1.034098505973816,grad_norm: 0.7655369009745724, iteration: 54483
loss: 1.0013353824615479,grad_norm: 0.9999995179367618, iteration: 54484
loss: 0.9873570799827576,grad_norm: 0.9999990452858172, iteration: 54485
loss: 1.008371353149414,grad_norm: 0.9999997868329374, iteration: 54486
loss: 1.0039255619049072,grad_norm: 0.9999991696542684, iteration: 54487
loss: 1.0120309591293335,grad_norm: 0.971485037862805, iteration: 54488
loss: 0.9928379058837891,grad_norm: 0.9768274422423526, iteration: 54489
loss: 1.0160492658615112,grad_norm: 0.8141134041930799, iteration: 54490
loss: 0.9817900657653809,grad_norm: 0.855180462530682, iteration: 54491
loss: 0.9970642924308777,grad_norm: 0.7980478019980177, iteration: 54492
loss: 1.034016728401184,grad_norm: 0.9900676730092792, iteration: 54493
loss: 1.0082989931106567,grad_norm: 0.7709104589805618, iteration: 54494
loss: 1.0370315313339233,grad_norm: 0.791964183273588, iteration: 54495
loss: 0.9969809055328369,grad_norm: 0.9976603329548133, iteration: 54496
loss: 0.9665718674659729,grad_norm: 0.8437971994001967, iteration: 54497
loss: 1.0004512071609497,grad_norm: 0.9679952999610559, iteration: 54498
loss: 0.9895977973937988,grad_norm: 0.999999277650281, iteration: 54499
loss: 1.0033652782440186,grad_norm: 0.9999991957885214, iteration: 54500
loss: 1.0092992782592773,grad_norm: 0.9252700264503203, iteration: 54501
loss: 1.0161970853805542,grad_norm: 0.992131944733369, iteration: 54502
loss: 0.9729118943214417,grad_norm: 0.8796539831472494, iteration: 54503
loss: 1.0167609453201294,grad_norm: 0.9164728765912187, iteration: 54504
loss: 0.98557448387146,grad_norm: 0.8078115460907329, iteration: 54505
loss: 1.0557760000228882,grad_norm: 0.9999992127270169, iteration: 54506
loss: 0.9816354513168335,grad_norm: 0.9286756940614381, iteration: 54507
loss: 1.0257620811462402,grad_norm: 0.9995158265370971, iteration: 54508
loss: 1.0407766103744507,grad_norm: 0.9999996199268467, iteration: 54509
loss: 1.0343660116195679,grad_norm: 0.9999991322819505, iteration: 54510
loss: 0.9444423913955688,grad_norm: 0.913848732630935, iteration: 54511
loss: 1.086021900177002,grad_norm: 0.9999992003463785, iteration: 54512
loss: 1.040028691291809,grad_norm: 0.8927722388470497, iteration: 54513
loss: 1.0378926992416382,grad_norm: 0.7856171920987847, iteration: 54514
loss: 1.0288269519805908,grad_norm: 0.8214744253909528, iteration: 54515
loss: 1.0049512386322021,grad_norm: 0.9999992258639275, iteration: 54516
loss: 0.9943174719810486,grad_norm: 0.8631050532787964, iteration: 54517
loss: 1.0031243562698364,grad_norm: 0.9999990731134111, iteration: 54518
loss: 0.9901168942451477,grad_norm: 0.7633072284768747, iteration: 54519
loss: 1.00130295753479,grad_norm: 0.8937296542701393, iteration: 54520
loss: 1.0164481401443481,grad_norm: 0.9999990716239546, iteration: 54521
loss: 0.9912073612213135,grad_norm: 0.9999991180835383, iteration: 54522
loss: 0.9938777685165405,grad_norm: 0.9980347141513402, iteration: 54523
loss: 0.9900907874107361,grad_norm: 0.8588806806344061, iteration: 54524
loss: 1.0066378116607666,grad_norm: 0.9735843048705488, iteration: 54525
loss: 1.0013031959533691,grad_norm: 0.824504125725965, iteration: 54526
loss: 1.0027081966400146,grad_norm: 0.9908837795326274, iteration: 54527
loss: 1.0104458332061768,grad_norm: 0.9666154147241001, iteration: 54528
loss: 0.9881864190101624,grad_norm: 0.9578228127628956, iteration: 54529
loss: 1.0035170316696167,grad_norm: 0.8869716101283349, iteration: 54530
loss: 0.9837070107460022,grad_norm: 0.9999998574553282, iteration: 54531
loss: 0.9998618960380554,grad_norm: 0.9999997565768746, iteration: 54532
loss: 1.0126060247421265,grad_norm: 0.9484160152804488, iteration: 54533
loss: 0.9789444208145142,grad_norm: 0.9999990515567717, iteration: 54534
loss: 1.0141576528549194,grad_norm: 0.9999995217932554, iteration: 54535
loss: 0.9840337634086609,grad_norm: 0.9889082043019147, iteration: 54536
loss: 1.015724778175354,grad_norm: 0.9306780330011877, iteration: 54537
loss: 1.0120447874069214,grad_norm: 0.9999990825057448, iteration: 54538
loss: 1.0478085279464722,grad_norm: 0.8819962567672177, iteration: 54539
loss: 1.008549451828003,grad_norm: 0.9498478248780127, iteration: 54540
loss: 0.9940959215164185,grad_norm: 0.9999989961865987, iteration: 54541
loss: 1.0054683685302734,grad_norm: 0.8413286718302635, iteration: 54542
loss: 0.9819533824920654,grad_norm: 0.9668759128400974, iteration: 54543
loss: 1.0126633644104004,grad_norm: 0.9999992276887746, iteration: 54544
loss: 1.0082433223724365,grad_norm: 0.9251437839786081, iteration: 54545
loss: 1.0045230388641357,grad_norm: 0.9999990642243342, iteration: 54546
loss: 0.9842000007629395,grad_norm: 0.9141672091340926, iteration: 54547
loss: 1.030381679534912,grad_norm: 0.9999993710707501, iteration: 54548
loss: 0.964293897151947,grad_norm: 0.9705637323194478, iteration: 54549
loss: 1.041900396347046,grad_norm: 0.9101693373401228, iteration: 54550
loss: 1.020101547241211,grad_norm: 0.9999996712200935, iteration: 54551
loss: 1.0103586912155151,grad_norm: 0.9948467975020208, iteration: 54552
loss: 1.0334166288375854,grad_norm: 0.9131365665035354, iteration: 54553
loss: 0.9978775382041931,grad_norm: 0.9152193774468204, iteration: 54554
loss: 0.9856640696525574,grad_norm: 0.9017042892398139, iteration: 54555
loss: 0.9874824285507202,grad_norm: 0.8944750051785887, iteration: 54556
loss: 0.9604607224464417,grad_norm: 0.9999990685959134, iteration: 54557
loss: 1.0206619501113892,grad_norm: 0.8433800162904729, iteration: 54558
loss: 1.075758695602417,grad_norm: 0.9999991383968643, iteration: 54559
loss: 1.017476201057434,grad_norm: 0.8830284868066752, iteration: 54560
loss: 1.0044375658035278,grad_norm: 0.9999995153681492, iteration: 54561
loss: 1.027199387550354,grad_norm: 0.9999991474157625, iteration: 54562
loss: 0.9975984692573547,grad_norm: 0.8068379064736289, iteration: 54563
loss: 0.984628438949585,grad_norm: 0.9999991107086617, iteration: 54564
loss: 1.0143553018569946,grad_norm: 0.9232818346931876, iteration: 54565
loss: 0.9644889235496521,grad_norm: 0.999999075748769, iteration: 54566
loss: 0.9947574734687805,grad_norm: 0.999999201507464, iteration: 54567
loss: 1.0009212493896484,grad_norm: 0.9240516093736311, iteration: 54568
loss: 1.0189191102981567,grad_norm: 0.9999989485222244, iteration: 54569
loss: 0.9917669296264648,grad_norm: 0.9041947486215386, iteration: 54570
loss: 0.9897315502166748,grad_norm: 0.8477559724763283, iteration: 54571
loss: 0.9996603727340698,grad_norm: 0.8341665765861359, iteration: 54572
loss: 1.002701759338379,grad_norm: 0.8979621842863482, iteration: 54573
loss: 0.9986847639083862,grad_norm: 0.7898269466373403, iteration: 54574
loss: 1.0276644229888916,grad_norm: 0.9999989676204077, iteration: 54575
loss: 1.070284128189087,grad_norm: 0.9999996014741793, iteration: 54576
loss: 1.0405552387237549,grad_norm: 0.9999993934955743, iteration: 54577
loss: 1.0064362287521362,grad_norm: 0.7698382483391037, iteration: 54578
loss: 1.007995367050171,grad_norm: 0.9542207078773187, iteration: 54579
loss: 1.015060305595398,grad_norm: 0.8614130090310165, iteration: 54580
loss: 1.0116848945617676,grad_norm: 0.9524500936761567, iteration: 54581
loss: 0.9841468334197998,grad_norm: 0.9285841556721625, iteration: 54582
loss: 0.9936219453811646,grad_norm: 0.9999989471866862, iteration: 54583
loss: 0.9855426549911499,grad_norm: 0.9999991915325172, iteration: 54584
loss: 0.9866176843643188,grad_norm: 0.8560111821499305, iteration: 54585
loss: 1.017878532409668,grad_norm: 0.9003631631194161, iteration: 54586
loss: 0.989020824432373,grad_norm: 0.9098091831607055, iteration: 54587
loss: 1.0181719064712524,grad_norm: 0.9025125625923045, iteration: 54588
loss: 0.9902472496032715,grad_norm: 0.8297631069736302, iteration: 54589
loss: 1.0020318031311035,grad_norm: 0.9999991522616432, iteration: 54590
loss: 1.03815495967865,grad_norm: 0.9999995714950943, iteration: 54591
loss: 1.0118155479431152,grad_norm: 0.9999991447018176, iteration: 54592
loss: 0.9465762972831726,grad_norm: 0.9016474533483646, iteration: 54593
loss: 0.9767473340034485,grad_norm: 0.9043433390494784, iteration: 54594
loss: 0.9998539090156555,grad_norm: 0.9999990484775731, iteration: 54595
loss: 1.0037306547164917,grad_norm: 0.9999993276857669, iteration: 54596
loss: 0.9847076535224915,grad_norm: 0.9547058764593479, iteration: 54597
loss: 0.9851007461547852,grad_norm: 0.9999990699287321, iteration: 54598
loss: 1.0246747732162476,grad_norm: 0.7632823888572262, iteration: 54599
loss: 1.0068624019622803,grad_norm: 0.9161286623928926, iteration: 54600
loss: 0.9694843292236328,grad_norm: 0.9999992553472236, iteration: 54601
loss: 1.0123721361160278,grad_norm: 0.9821583119339788, iteration: 54602
loss: 0.977289617061615,grad_norm: 0.9999994006188506, iteration: 54603
loss: 1.0187517404556274,grad_norm: 0.8932920179395836, iteration: 54604
loss: 1.00186288356781,grad_norm: 0.9809181970060733, iteration: 54605
loss: 1.0073055028915405,grad_norm: 0.9340239437559822, iteration: 54606
loss: 1.000432014465332,grad_norm: 0.9999992179934971, iteration: 54607
loss: 0.9770970344543457,grad_norm: 0.8142845088830034, iteration: 54608
loss: 1.045479655265808,grad_norm: 0.9999990724223164, iteration: 54609
loss: 1.001510500907898,grad_norm: 0.9999994616662984, iteration: 54610
loss: 0.9973194003105164,grad_norm: 0.7796158167321801, iteration: 54611
loss: 0.972495973110199,grad_norm: 0.9999992677415748, iteration: 54612
loss: 1.0058566331863403,grad_norm: 0.9999999517510679, iteration: 54613
loss: 1.0090751647949219,grad_norm: 0.8774037111174915, iteration: 54614
loss: 0.9907236695289612,grad_norm: 0.7672345937222552, iteration: 54615
loss: 0.9932518601417542,grad_norm: 0.999999105459021, iteration: 54616
loss: 0.9984002709388733,grad_norm: 0.702593170258657, iteration: 54617
loss: 1.014534831047058,grad_norm: 0.834429891508957, iteration: 54618
loss: 1.0187424421310425,grad_norm: 0.9999996286039134, iteration: 54619
loss: 1.015255331993103,grad_norm: 0.9999991102027751, iteration: 54620
loss: 1.0341403484344482,grad_norm: 0.9322316875384882, iteration: 54621
loss: 0.9860790967941284,grad_norm: 0.9999990765064773, iteration: 54622
loss: 0.9673814177513123,grad_norm: 0.999999123653695, iteration: 54623
loss: 1.07415771484375,grad_norm: 0.9999991644749194, iteration: 54624
loss: 1.0410196781158447,grad_norm: 0.9999992882097269, iteration: 54625
loss: 1.0141384601593018,grad_norm: 0.9450513176122168, iteration: 54626
loss: 0.9970644116401672,grad_norm: 0.9999990152086, iteration: 54627
loss: 0.9867706298828125,grad_norm: 0.9344426513619944, iteration: 54628
loss: 1.0303754806518555,grad_norm: 0.9999990609521472, iteration: 54629
loss: 0.9858655333518982,grad_norm: 0.8880636335103976, iteration: 54630
loss: 1.0874873399734497,grad_norm: 0.999999187980953, iteration: 54631
loss: 1.0674970149993896,grad_norm: 0.9999994020087163, iteration: 54632
loss: 1.0039774179458618,grad_norm: 0.9241077997692547, iteration: 54633
loss: 1.0079761743545532,grad_norm: 0.8730127781084956, iteration: 54634
loss: 0.9897218346595764,grad_norm: 0.9999990521836009, iteration: 54635
loss: 1.0134607553482056,grad_norm: 0.9540056885526861, iteration: 54636
loss: 1.0164673328399658,grad_norm: 0.9337433132635102, iteration: 54637
loss: 1.0219169855117798,grad_norm: 0.7885867151522967, iteration: 54638
loss: 1.0334467887878418,grad_norm: 0.9940163044779133, iteration: 54639
loss: 1.0780751705169678,grad_norm: 0.999999313208005, iteration: 54640
loss: 0.9997766017913818,grad_norm: 0.9999989973107132, iteration: 54641
loss: 0.9907618165016174,grad_norm: 0.9805897123937762, iteration: 54642
loss: 1.0279738903045654,grad_norm: 0.9999995094623435, iteration: 54643
loss: 1.0275869369506836,grad_norm: 0.9015269714932809, iteration: 54644
loss: 1.0122456550598145,grad_norm: 0.9018498829398685, iteration: 54645
loss: 1.0099047422409058,grad_norm: 0.9111715114408905, iteration: 54646
loss: 0.9813849329948425,grad_norm: 0.9999991284518054, iteration: 54647
loss: 1.0668725967407227,grad_norm: 0.9999997145778703, iteration: 54648
loss: 1.012965202331543,grad_norm: 0.9999991032799979, iteration: 54649
loss: 1.0165642499923706,grad_norm: 0.9999990743175139, iteration: 54650
loss: 1.0356775522232056,grad_norm: 0.9999995981978861, iteration: 54651
loss: 0.9875029921531677,grad_norm: 0.9999991151149897, iteration: 54652
loss: 1.065780758857727,grad_norm: 0.9999997722425948, iteration: 54653
loss: 0.9625826478004456,grad_norm: 0.999999462319403, iteration: 54654
loss: 0.9793830513954163,grad_norm: 0.9999991159209912, iteration: 54655
loss: 1.0166430473327637,grad_norm: 0.9999996310916727, iteration: 54656
loss: 0.9389922022819519,grad_norm: 0.9999991503609256, iteration: 54657
loss: 1.0445600748062134,grad_norm: 0.9999997746685584, iteration: 54658
loss: 1.006134033203125,grad_norm: 0.914654820936108, iteration: 54659
loss: 1.1562458276748657,grad_norm: 0.9999996149494814, iteration: 54660
loss: 1.0071245431900024,grad_norm: 0.9999990226481953, iteration: 54661
loss: 1.071640968322754,grad_norm: 0.9999997353787028, iteration: 54662
loss: 0.9948068857192993,grad_norm: 0.9999990945457281, iteration: 54663
loss: 1.0236544609069824,grad_norm: 0.9999991852997648, iteration: 54664
loss: 0.9976067543029785,grad_norm: 0.9999991274121178, iteration: 54665
loss: 1.0238093137741089,grad_norm: 0.9999991689664678, iteration: 54666
loss: 1.061234951019287,grad_norm: 0.999999474999094, iteration: 54667
loss: 0.9928380846977234,grad_norm: 0.9170873040695832, iteration: 54668
loss: 1.0292562246322632,grad_norm: 0.8544843514616328, iteration: 54669
loss: 1.009423017501831,grad_norm: 0.9999992531093713, iteration: 54670
loss: 0.999463677406311,grad_norm: 0.9999992382936029, iteration: 54671
loss: 1.0000946521759033,grad_norm: 0.9186351315605475, iteration: 54672
loss: 0.9872540831565857,grad_norm: 0.9999990632512553, iteration: 54673
loss: 1.028650164604187,grad_norm: 0.9999992163699889, iteration: 54674
loss: 1.0416592359542847,grad_norm: 0.9999997142291166, iteration: 54675
loss: 0.9704082608222961,grad_norm: 0.9491856872302192, iteration: 54676
loss: 1.095504879951477,grad_norm: 0.9999996285698048, iteration: 54677
loss: 1.0289887189865112,grad_norm: 0.9999991555269567, iteration: 54678
loss: 1.0022202730178833,grad_norm: 0.9999991328683662, iteration: 54679
loss: 1.0003459453582764,grad_norm: 0.8882684574606678, iteration: 54680
loss: 1.0241470336914062,grad_norm: 0.885903503339089, iteration: 54681
loss: 1.018194317817688,grad_norm: 0.9039748098167343, iteration: 54682
loss: 1.0176904201507568,grad_norm: 0.9999992234533766, iteration: 54683
loss: 0.9936761260032654,grad_norm: 0.8683460758265437, iteration: 54684
loss: 0.9775019288063049,grad_norm: 0.8801419745642773, iteration: 54685
loss: 1.006021499633789,grad_norm: 0.9489178734290035, iteration: 54686
loss: 0.9992899298667908,grad_norm: 0.9437674736401737, iteration: 54687
loss: 1.0271542072296143,grad_norm: 0.9680176532330675, iteration: 54688
loss: 0.9883901476860046,grad_norm: 0.8698967245686442, iteration: 54689
loss: 0.986495852470398,grad_norm: 0.9999990387783324, iteration: 54690
loss: 1.0187606811523438,grad_norm: 0.848015420615201, iteration: 54691
loss: 1.0124948024749756,grad_norm: 0.8204333067356607, iteration: 54692
loss: 1.012763261795044,grad_norm: 0.8299810218934174, iteration: 54693
loss: 1.0049623250961304,grad_norm: 0.9455330051654954, iteration: 54694
loss: 0.9970499873161316,grad_norm: 0.9512946541804751, iteration: 54695
loss: 1.013155460357666,grad_norm: 0.9999993946759038, iteration: 54696
loss: 1.0236799716949463,grad_norm: 0.9453648970310712, iteration: 54697
loss: 1.04102623462677,grad_norm: 0.9169554146049427, iteration: 54698
loss: 1.0368123054504395,grad_norm: 0.8647175203307881, iteration: 54699
loss: 0.9780057072639465,grad_norm: 0.8908122624282746, iteration: 54700
loss: 1.0191686153411865,grad_norm: 0.9999991933973374, iteration: 54701
loss: 1.0126320123672485,grad_norm: 0.9023526355029716, iteration: 54702
loss: 1.0074273347854614,grad_norm: 0.8290390776148467, iteration: 54703
loss: 1.0064157247543335,grad_norm: 0.8931743491259093, iteration: 54704
loss: 1.0022910833358765,grad_norm: 0.7959929580927879, iteration: 54705
loss: 1.0032801628112793,grad_norm: 0.931763433808443, iteration: 54706
loss: 0.999004602432251,grad_norm: 0.9999990951718873, iteration: 54707
loss: 1.0181747674942017,grad_norm: 0.9233752957266607, iteration: 54708
loss: 0.9953662753105164,grad_norm: 0.79574956129315, iteration: 54709
loss: 0.9903726577758789,grad_norm: 0.9999990905447228, iteration: 54710
loss: 0.9813993573188782,grad_norm: 0.8389184170393401, iteration: 54711
loss: 0.9963237047195435,grad_norm: 0.8006339959129095, iteration: 54712
loss: 0.997557520866394,grad_norm: 0.8827259677422195, iteration: 54713
loss: 0.9632412195205688,grad_norm: 0.9999991134419159, iteration: 54714
loss: 1.002467393875122,grad_norm: 0.9114321654240001, iteration: 54715
loss: 1.0076595544815063,grad_norm: 0.9241401987728262, iteration: 54716
loss: 0.9997972249984741,grad_norm: 0.8897096323012661, iteration: 54717
loss: 0.9920897483825684,grad_norm: 0.8712990723466648, iteration: 54718
loss: 0.9898862242698669,grad_norm: 0.7471407418656961, iteration: 54719
loss: 0.9972138404846191,grad_norm: 0.9999991442125927, iteration: 54720
loss: 0.9745009541511536,grad_norm: 0.9503345219631544, iteration: 54721
loss: 0.9925474524497986,grad_norm: 0.9999989887737191, iteration: 54722
loss: 0.991963803768158,grad_norm: 0.8603784264987645, iteration: 54723
loss: 0.997290849685669,grad_norm: 0.9999993200532521, iteration: 54724
loss: 0.9817192554473877,grad_norm: 0.9098252304266371, iteration: 54725
loss: 0.9896594882011414,grad_norm: 0.999999119570985, iteration: 54726
loss: 1.0225549936294556,grad_norm: 0.9799672676449035, iteration: 54727
loss: 0.9999862313270569,grad_norm: 0.9999997831539429, iteration: 54728
loss: 0.9891891479492188,grad_norm: 0.8813986172329934, iteration: 54729
loss: 1.0025240182876587,grad_norm: 0.8600691513112778, iteration: 54730
loss: 0.9942323565483093,grad_norm: 0.8490471590793361, iteration: 54731
loss: 1.0231733322143555,grad_norm: 0.9999990609068986, iteration: 54732
loss: 0.9929893016815186,grad_norm: 0.9999990544848943, iteration: 54733
loss: 1.0114985704421997,grad_norm: 0.8547870157573362, iteration: 54734
loss: 1.034858226776123,grad_norm: 0.9999992838175902, iteration: 54735
loss: 1.0167638063430786,grad_norm: 0.999999029624573, iteration: 54736
loss: 0.9870496392250061,grad_norm: 0.9999990261641649, iteration: 54737
loss: 0.978736400604248,grad_norm: 0.9999991081819721, iteration: 54738
loss: 0.995959460735321,grad_norm: 0.9487522977120322, iteration: 54739
loss: 0.9498761296272278,grad_norm: 0.9999990951049588, iteration: 54740
loss: 1.016762375831604,grad_norm: 0.9999990630098844, iteration: 54741
loss: 1.0064496994018555,grad_norm: 0.8050754345161242, iteration: 54742
loss: 1.0152323246002197,grad_norm: 0.8359078564497584, iteration: 54743
loss: 1.06947922706604,grad_norm: 0.9864133523459251, iteration: 54744
loss: 1.0033328533172607,grad_norm: 0.9572080332933892, iteration: 54745
loss: 1.0039660930633545,grad_norm: 0.9077108186829929, iteration: 54746
loss: 0.9842186570167542,grad_norm: 0.9999992038389891, iteration: 54747
loss: 1.0438789129257202,grad_norm: 0.9999990369366574, iteration: 54748
loss: 0.9849561452865601,grad_norm: 0.8897380394006708, iteration: 54749
loss: 1.027121663093567,grad_norm: 0.9121936275571063, iteration: 54750
loss: 0.9745926856994629,grad_norm: 0.9999991259792901, iteration: 54751
loss: 1.0042095184326172,grad_norm: 0.9797688483007966, iteration: 54752
loss: 1.010993242263794,grad_norm: 0.9999990471391547, iteration: 54753
loss: 0.9916765093803406,grad_norm: 0.8505444185983635, iteration: 54754
loss: 1.0686843395233154,grad_norm: 0.9999996393800634, iteration: 54755
loss: 1.031623125076294,grad_norm: 0.9999991905283041, iteration: 54756
loss: 0.9946893453598022,grad_norm: 0.8659436349004747, iteration: 54757
loss: 0.9980692267417908,grad_norm: 0.8483531186016128, iteration: 54758
loss: 0.983344554901123,grad_norm: 0.9999993686295436, iteration: 54759
loss: 0.98264080286026,grad_norm: 0.9999990534894676, iteration: 54760
loss: 0.9904273152351379,grad_norm: 0.9125386878986127, iteration: 54761
loss: 0.9804354310035706,grad_norm: 0.9576391641327567, iteration: 54762
loss: 0.9566178321838379,grad_norm: 0.9726968939567577, iteration: 54763
loss: 0.9966347217559814,grad_norm: 0.9999991088559659, iteration: 54764
loss: 0.9946574568748474,grad_norm: 0.854837415080311, iteration: 54765
loss: 1.0135899782180786,grad_norm: 0.8414557376000502, iteration: 54766
loss: 1.0113736391067505,grad_norm: 0.9999990411159654, iteration: 54767
loss: 0.9966533184051514,grad_norm: 0.6854309410846631, iteration: 54768
loss: 0.9746460914611816,grad_norm: 0.8719258746824666, iteration: 54769
loss: 1.0011749267578125,grad_norm: 0.8791463713574225, iteration: 54770
loss: 1.04031503200531,grad_norm: 0.9999991271946629, iteration: 54771
loss: 1.0056376457214355,grad_norm: 0.8298205691993031, iteration: 54772
loss: 1.0251134634017944,grad_norm: 0.8689966101507328, iteration: 54773
loss: 1.0072438716888428,grad_norm: 0.9999990218768813, iteration: 54774
loss: 0.9522202610969543,grad_norm: 0.9565947304475818, iteration: 54775
loss: 1.0174822807312012,grad_norm: 0.9999996703999992, iteration: 54776
loss: 1.0145604610443115,grad_norm: 0.844221589066392, iteration: 54777
loss: 0.9834975004196167,grad_norm: 0.9999990431201166, iteration: 54778
loss: 1.0331785678863525,grad_norm: 0.9683965619582259, iteration: 54779
loss: 0.9827759861946106,grad_norm: 0.9999992088760944, iteration: 54780
loss: 1.0711299180984497,grad_norm: 0.9999992874132173, iteration: 54781
loss: 0.9850398302078247,grad_norm: 0.9673493144021249, iteration: 54782
loss: 1.009431004524231,grad_norm: 0.8453771479102, iteration: 54783
loss: 1.0090388059616089,grad_norm: 0.9999990383085314, iteration: 54784
loss: 1.0023634433746338,grad_norm: 0.9999991726458488, iteration: 54785
loss: 0.9970143437385559,grad_norm: 0.9308340455585885, iteration: 54786
loss: 0.9447709918022156,grad_norm: 0.9999991815190411, iteration: 54787
loss: 0.9806059002876282,grad_norm: 0.8405732798409223, iteration: 54788
loss: 1.0247870683670044,grad_norm: 0.7789932484059697, iteration: 54789
loss: 1.0157349109649658,grad_norm: 0.9847070208715775, iteration: 54790
loss: 1.014457106590271,grad_norm: 0.9999989295759932, iteration: 54791
loss: 0.98927903175354,grad_norm: 0.8287271488535033, iteration: 54792
loss: 0.9820895195007324,grad_norm: 0.8055590783276089, iteration: 54793
loss: 0.9562207460403442,grad_norm: 0.9688544131595892, iteration: 54794
loss: 1.0418826341629028,grad_norm: 0.9999991680878285, iteration: 54795
loss: 1.0434668064117432,grad_norm: 0.9999992982842277, iteration: 54796
loss: 1.063787817955017,grad_norm: 0.9999994828888469, iteration: 54797
loss: 0.9899874925613403,grad_norm: 0.9526210763164087, iteration: 54798
loss: 1.0963356494903564,grad_norm: 0.9999993293676637, iteration: 54799
loss: 0.9527798891067505,grad_norm: 0.8452071880965752, iteration: 54800
loss: 1.0277044773101807,grad_norm: 0.9999989616142593, iteration: 54801
loss: 1.00742769241333,grad_norm: 0.963994637158496, iteration: 54802
loss: 1.012999176979065,grad_norm: 0.9824136383593766, iteration: 54803
loss: 0.9920740723609924,grad_norm: 0.9189651037058447, iteration: 54804
loss: 1.0344723463058472,grad_norm: 0.8791980304057342, iteration: 54805
loss: 0.9912251234054565,grad_norm: 0.9999991404107926, iteration: 54806
loss: 0.9786387085914612,grad_norm: 0.8596370805261263, iteration: 54807
loss: 0.9991127848625183,grad_norm: 0.9933367381917244, iteration: 54808
loss: 1.0437796115875244,grad_norm: 0.8815218664865575, iteration: 54809
loss: 1.1093441247940063,grad_norm: 0.9999996307044583, iteration: 54810
loss: 0.9857630729675293,grad_norm: 0.8879706359390972, iteration: 54811
loss: 1.0167816877365112,grad_norm: 0.9573830280239218, iteration: 54812
loss: 1.0326038599014282,grad_norm: 0.9999996189530943, iteration: 54813
loss: 1.0006462335586548,grad_norm: 0.7994219916476715, iteration: 54814
loss: 0.9805173873901367,grad_norm: 0.997293617537361, iteration: 54815
loss: 0.9991741180419922,grad_norm: 0.9999992014449419, iteration: 54816
loss: 1.004529595375061,grad_norm: 0.854040417119854, iteration: 54817
loss: 1.0007442235946655,grad_norm: 0.8897723549141018, iteration: 54818
loss: 0.9957123398780823,grad_norm: 0.9999992108966163, iteration: 54819
loss: 0.9725871086120605,grad_norm: 0.999999099376801, iteration: 54820
loss: 1.0000499486923218,grad_norm: 0.870220294072104, iteration: 54821
loss: 1.022042989730835,grad_norm: 0.9999991089603576, iteration: 54822
loss: 0.983492910861969,grad_norm: 0.9999990844597965, iteration: 54823
loss: 0.9843177795410156,grad_norm: 0.9501745232855593, iteration: 54824
loss: 0.9741291403770447,grad_norm: 0.9834714977657226, iteration: 54825
loss: 0.9634455442428589,grad_norm: 0.9573649655126076, iteration: 54826
loss: 1.014697790145874,grad_norm: 0.9999994735845339, iteration: 54827
loss: 1.0335952043533325,grad_norm: 0.9999991762590719, iteration: 54828
loss: 1.0388514995574951,grad_norm: 0.9999990413177762, iteration: 54829
loss: 0.9659833908081055,grad_norm: 0.9999996013622502, iteration: 54830
loss: 1.0285141468048096,grad_norm: 0.9857327220719002, iteration: 54831
loss: 0.9898712635040283,grad_norm: 0.9999994296481122, iteration: 54832
loss: 1.0241693258285522,grad_norm: 0.9999990972966344, iteration: 54833
loss: 0.9510212540626526,grad_norm: 0.915293298968508, iteration: 54834
loss: 0.9578086137771606,grad_norm: 0.9222891511099028, iteration: 54835
loss: 1.0403839349746704,grad_norm: 0.9999991025510581, iteration: 54836
loss: 1.0213245153427124,grad_norm: 0.9999991473800277, iteration: 54837
loss: 0.9628896713256836,grad_norm: 0.9999989748530155, iteration: 54838
loss: 1.0425410270690918,grad_norm: 0.8974136255734807, iteration: 54839
loss: 1.0085670948028564,grad_norm: 0.999999196953491, iteration: 54840
loss: 1.03123140335083,grad_norm: 0.8568091954392723, iteration: 54841
loss: 0.9989205598831177,grad_norm: 0.8302144193627395, iteration: 54842
loss: 1.0316747426986694,grad_norm: 0.9066832052066346, iteration: 54843
loss: 1.009859323501587,grad_norm: 0.9999995879641198, iteration: 54844
loss: 0.9748115539550781,grad_norm: 0.9722831931466599, iteration: 54845
loss: 1.0029174089431763,grad_norm: 0.9999991401576447, iteration: 54846
loss: 1.0537418127059937,grad_norm: 0.9999997567527537, iteration: 54847
loss: 1.0479542016983032,grad_norm: 0.8589085452298915, iteration: 54848
loss: 1.0053991079330444,grad_norm: 0.9999996435366111, iteration: 54849
loss: 0.9902846217155457,grad_norm: 0.9448865211311587, iteration: 54850
loss: 1.0149437189102173,grad_norm: 0.9196726326275387, iteration: 54851
loss: 1.000128149986267,grad_norm: 0.9551956218915454, iteration: 54852
loss: 1.030697226524353,grad_norm: 0.7627690192488742, iteration: 54853
loss: 1.0159560441970825,grad_norm: 0.9999993113519875, iteration: 54854
loss: 0.9918834567070007,grad_norm: 0.8939641043166517, iteration: 54855
loss: 1.0376524925231934,grad_norm: 0.8797750104110073, iteration: 54856
loss: 1.0396645069122314,grad_norm: 0.9999999105604409, iteration: 54857
loss: 1.0100069046020508,grad_norm: 0.9636790083386875, iteration: 54858
loss: 1.0371346473693848,grad_norm: 0.9260798688829254, iteration: 54859
loss: 1.0026800632476807,grad_norm: 0.8319632900939802, iteration: 54860
loss: 1.0107277631759644,grad_norm: 0.9999993298575272, iteration: 54861
loss: 1.0437180995941162,grad_norm: 0.9999994657409125, iteration: 54862
loss: 0.9949348568916321,grad_norm: 0.9999990093966726, iteration: 54863
loss: 1.0006847381591797,grad_norm: 0.9999997562129553, iteration: 54864
loss: 1.010417103767395,grad_norm: 0.9999992751501748, iteration: 54865
loss: 1.0239030122756958,grad_norm: 0.9999989753732256, iteration: 54866
loss: 1.0518410205841064,grad_norm: 0.9999997772692913, iteration: 54867
loss: 0.9575749635696411,grad_norm: 0.9461337389949892, iteration: 54868
loss: 1.0332396030426025,grad_norm: 0.975900674023417, iteration: 54869
loss: 1.0003958940505981,grad_norm: 0.9989644872669466, iteration: 54870
loss: 0.9885060787200928,grad_norm: 0.9999990628000709, iteration: 54871
loss: 0.9834735989570618,grad_norm: 0.9657534176634633, iteration: 54872
loss: 1.0155091285705566,grad_norm: 0.9999995946232758, iteration: 54873
loss: 1.0309028625488281,grad_norm: 0.9576712830875791, iteration: 54874
loss: 0.9710137248039246,grad_norm: 0.9812388802216466, iteration: 54875
loss: 0.994253933429718,grad_norm: 0.9999991228401395, iteration: 54876
loss: 1.0318529605865479,grad_norm: 0.7854530861775948, iteration: 54877
loss: 0.9993352293968201,grad_norm: 0.96390846628285, iteration: 54878
loss: 1.0567927360534668,grad_norm: 0.9999997972898229, iteration: 54879
loss: 1.0377146005630493,grad_norm: 0.9999992454880987, iteration: 54880
loss: 1.033640742301941,grad_norm: 0.9999998636533317, iteration: 54881
loss: 0.9679036736488342,grad_norm: 0.9999992280654283, iteration: 54882
loss: 1.0928418636322021,grad_norm: 0.9999995717225593, iteration: 54883
loss: 1.0109710693359375,grad_norm: 0.9999993090691994, iteration: 54884
loss: 0.9843471646308899,grad_norm: 0.7728919299294781, iteration: 54885
loss: 1.0257105827331543,grad_norm: 0.9053826437364436, iteration: 54886
loss: 1.0186553001403809,grad_norm: 0.8444286733243286, iteration: 54887
loss: 1.046235203742981,grad_norm: 0.969868317012373, iteration: 54888
loss: 0.9613022804260254,grad_norm: 0.9114028194291579, iteration: 54889
loss: 1.0204429626464844,grad_norm: 0.9999990237518815, iteration: 54890
loss: 1.0909419059753418,grad_norm: 0.9999996010149989, iteration: 54891
loss: 1.0039474964141846,grad_norm: 0.8607266937909167, iteration: 54892
loss: 1.0058159828186035,grad_norm: 0.9260520299596006, iteration: 54893
loss: 1.0300285816192627,grad_norm: 0.9999991284225918, iteration: 54894
loss: 1.0134159326553345,grad_norm: 0.9999992295074553, iteration: 54895
loss: 1.0083184242248535,grad_norm: 0.9999992266083133, iteration: 54896
loss: 1.0346885919570923,grad_norm: 0.7932840323608217, iteration: 54897
loss: 0.9986923336982727,grad_norm: 0.9078849395978364, iteration: 54898
loss: 1.0120208263397217,grad_norm: 0.7641166069251346, iteration: 54899
loss: 0.9993709921836853,grad_norm: 0.8700484516208793, iteration: 54900
loss: 0.9785721898078918,grad_norm: 0.7827981841777308, iteration: 54901
loss: 0.9884637594223022,grad_norm: 0.9999989687952145, iteration: 54902
loss: 0.9963894486427307,grad_norm: 0.9999989723862748, iteration: 54903
loss: 1.0131350755691528,grad_norm: 0.855567666612895, iteration: 54904
loss: 0.9897508025169373,grad_norm: 0.8214415020639128, iteration: 54905
loss: 1.012225866317749,grad_norm: 0.8931653767292562, iteration: 54906
loss: 0.990081787109375,grad_norm: 0.7990676899425508, iteration: 54907
loss: 1.0132251977920532,grad_norm: 0.985727657942217, iteration: 54908
loss: 1.0660531520843506,grad_norm: 0.9999995978660338, iteration: 54909
loss: 0.9742310643196106,grad_norm: 0.9000574004831102, iteration: 54910
loss: 0.99360191822052,grad_norm: 0.9999992320017062, iteration: 54911
loss: 1.0200226306915283,grad_norm: 0.9999992686287776, iteration: 54912
loss: 1.0024992227554321,grad_norm: 0.8479010197980897, iteration: 54913
loss: 1.2129013538360596,grad_norm: 0.9999995630000641, iteration: 54914
loss: 1.0519688129425049,grad_norm: 0.9999993077394556, iteration: 54915
loss: 1.054848074913025,grad_norm: 0.9999998104476606, iteration: 54916
loss: 0.9759140610694885,grad_norm: 0.9302447852515789, iteration: 54917
loss: 0.9878169894218445,grad_norm: 0.9282266867408734, iteration: 54918
loss: 0.9877398014068604,grad_norm: 0.999999168247655, iteration: 54919
loss: 0.9700035452842712,grad_norm: 0.9999992197774207, iteration: 54920
loss: 1.0754940509796143,grad_norm: 0.9999989824468779, iteration: 54921
loss: 1.0482146739959717,grad_norm: 0.9999995378462839, iteration: 54922
loss: 1.0001112222671509,grad_norm: 0.9426241588112614, iteration: 54923
loss: 1.0246515274047852,grad_norm: 0.9999997895923193, iteration: 54924
loss: 0.9961547255516052,grad_norm: 0.9020498682112915, iteration: 54925
loss: 1.011422038078308,grad_norm: 0.9999991597870052, iteration: 54926
loss: 0.9944483637809753,grad_norm: 0.9772486481730621, iteration: 54927
loss: 1.0176608562469482,grad_norm: 0.9999990162895396, iteration: 54928
loss: 0.9962035417556763,grad_norm: 0.9050310119715312, iteration: 54929
loss: 1.016634464263916,grad_norm: 0.957556818236527, iteration: 54930
loss: 1.0123727321624756,grad_norm: 0.9810026017029766, iteration: 54931
loss: 0.9886072278022766,grad_norm: 0.9370902826840134, iteration: 54932
loss: 1.0284959077835083,grad_norm: 0.8806024402577216, iteration: 54933
loss: 0.9611000418663025,grad_norm: 0.9999989751784872, iteration: 54934
loss: 1.0995196104049683,grad_norm: 0.9999998522640223, iteration: 54935
loss: 0.9943261742591858,grad_norm: 0.9999993434752265, iteration: 54936
loss: 0.9928969144821167,grad_norm: 0.9999991001588886, iteration: 54937
loss: 0.9974757432937622,grad_norm: 0.8745544806403603, iteration: 54938
loss: 1.0284055471420288,grad_norm: 0.999999040474213, iteration: 54939
loss: 1.0199122428894043,grad_norm: 0.9789871477638185, iteration: 54940
loss: 1.0467690229415894,grad_norm: 0.9999996621785293, iteration: 54941
loss: 1.0183188915252686,grad_norm: 0.9999991578148699, iteration: 54942
loss: 0.9854132533073425,grad_norm: 0.9427715537796055, iteration: 54943
loss: 0.9995366930961609,grad_norm: 0.9999993260125394, iteration: 54944
loss: 0.9939016103744507,grad_norm: 0.8428186403229312, iteration: 54945
loss: 0.9792832136154175,grad_norm: 0.9999992192829624, iteration: 54946
loss: 1.0456453561782837,grad_norm: 0.8010099995458864, iteration: 54947
loss: 0.9928080439567566,grad_norm: 0.9999992311113507, iteration: 54948
loss: 1.0218071937561035,grad_norm: 0.9999995469530328, iteration: 54949
loss: 1.0491774082183838,grad_norm: 0.9999995053641688, iteration: 54950
loss: 1.0145806074142456,grad_norm: 0.8522326903884159, iteration: 54951
loss: 0.9789546728134155,grad_norm: 0.9999990911015243, iteration: 54952
loss: 0.9841175079345703,grad_norm: 0.8354448713173984, iteration: 54953
loss: 1.0536216497421265,grad_norm: 0.9063067700299109, iteration: 54954
loss: 0.9600436687469482,grad_norm: 0.9862524339588375, iteration: 54955
loss: 0.9734624624252319,grad_norm: 0.8087490463837375, iteration: 54956
loss: 1.0430470705032349,grad_norm: 0.9999991384278303, iteration: 54957
loss: 0.9668627977371216,grad_norm: 0.9999992451313918, iteration: 54958
loss: 1.0147185325622559,grad_norm: 0.9954853838417406, iteration: 54959
loss: 0.9984652400016785,grad_norm: 0.8226336735114553, iteration: 54960
loss: 1.020022988319397,grad_norm: 0.9711161849316073, iteration: 54961
loss: 1.03780198097229,grad_norm: 0.9767240336401363, iteration: 54962
loss: 1.0370433330535889,grad_norm: 0.9999990902265554, iteration: 54963
loss: 1.0034842491149902,grad_norm: 0.9999991303732717, iteration: 54964
loss: 1.0080382823944092,grad_norm: 0.9003431276093423, iteration: 54965
loss: 0.9914976954460144,grad_norm: 0.8828142344849621, iteration: 54966
loss: 1.0076042413711548,grad_norm: 0.8678441514736228, iteration: 54967
loss: 1.0196424722671509,grad_norm: 0.9991708696014624, iteration: 54968
loss: 0.9805008769035339,grad_norm: 0.8602660634020228, iteration: 54969
loss: 1.0141726732254028,grad_norm: 0.9999989981661446, iteration: 54970
loss: 1.0453283786773682,grad_norm: 0.9999991902106755, iteration: 54971
loss: 1.0305651426315308,grad_norm: 0.8846207487614525, iteration: 54972
loss: 1.0269802808761597,grad_norm: 0.8499352632385949, iteration: 54973
loss: 0.982241153717041,grad_norm: 0.9999995132245898, iteration: 54974
loss: 0.992303729057312,grad_norm: 0.9956536288975935, iteration: 54975
loss: 1.166520118713379,grad_norm: 1.0000001148078506, iteration: 54976
loss: 1.0077742338180542,grad_norm: 0.9999991666868181, iteration: 54977
loss: 1.0076196193695068,grad_norm: 0.8738901415731886, iteration: 54978
loss: 0.977536141872406,grad_norm: 0.9216229206000329, iteration: 54979
loss: 1.0152490139007568,grad_norm: 0.9999991150391142, iteration: 54980
loss: 1.0277597904205322,grad_norm: 0.9999996827896952, iteration: 54981
loss: 0.9885457158088684,grad_norm: 0.9148826487326923, iteration: 54982
loss: 1.0216084718704224,grad_norm: 0.9999991912199581, iteration: 54983
loss: 0.981190025806427,grad_norm: 0.9244282400406695, iteration: 54984
loss: 0.9828081727027893,grad_norm: 0.6750882184932658, iteration: 54985
loss: 1.0061906576156616,grad_norm: 0.8420773205123586, iteration: 54986
loss: 0.9807099103927612,grad_norm: 0.999999049937558, iteration: 54987
loss: 1.0302369594573975,grad_norm: 0.9439480484395208, iteration: 54988
loss: 1.0463835000991821,grad_norm: 0.9999998208172528, iteration: 54989
loss: 0.9601738452911377,grad_norm: 0.9999992451733959, iteration: 54990
loss: 1.0084468126296997,grad_norm: 0.8545897131811, iteration: 54991
loss: 1.0272151231765747,grad_norm: 0.9938063974476534, iteration: 54992
loss: 1.0000903606414795,grad_norm: 0.9999991344112038, iteration: 54993
loss: 1.0164427757263184,grad_norm: 0.9502261763562239, iteration: 54994
loss: 0.9787261486053467,grad_norm: 0.9776972994172771, iteration: 54995
loss: 1.021336555480957,grad_norm: 0.9396254443639065, iteration: 54996
loss: 0.9626896977424622,grad_norm: 0.8095195456609752, iteration: 54997
loss: 1.0096957683563232,grad_norm: 0.8966980730479785, iteration: 54998
loss: 1.005162239074707,grad_norm: 0.9585443240043954, iteration: 54999
loss: 1.0244181156158447,grad_norm: 0.9519644663628232, iteration: 55000
loss: 1.0065600872039795,grad_norm: 0.9679202829298, iteration: 55001
loss: 1.0028952360153198,grad_norm: 0.9732857500609535, iteration: 55002
loss: 1.022034764289856,grad_norm: 0.9999992613747544, iteration: 55003
loss: 1.0155515670776367,grad_norm: 0.9206031816704425, iteration: 55004
loss: 1.1558212041854858,grad_norm: 0.9999994440960588, iteration: 55005
loss: 0.9805880784988403,grad_norm: 0.8701773210744771, iteration: 55006
loss: 0.9493343830108643,grad_norm: 0.9999991252233656, iteration: 55007
loss: 1.0175163745880127,grad_norm: 0.8284248759306028, iteration: 55008
loss: 1.018563151359558,grad_norm: 0.9792775472429224, iteration: 55009
loss: 1.044572353363037,grad_norm: 0.9999990580153146, iteration: 55010
loss: 1.036921501159668,grad_norm: 0.9999991972383596, iteration: 55011
loss: 0.9653753638267517,grad_norm: 0.9405467778623189, iteration: 55012
loss: 0.9990777373313904,grad_norm: 0.849009723250516, iteration: 55013
loss: 1.0548045635223389,grad_norm: 0.865587950174978, iteration: 55014
loss: 1.0526424646377563,grad_norm: 0.9999993464982001, iteration: 55015
loss: 1.0094895362854004,grad_norm: 0.8167535095959914, iteration: 55016
loss: 1.09257173538208,grad_norm: 0.9999990299089615, iteration: 55017
loss: 0.9979409575462341,grad_norm: 0.845207928229283, iteration: 55018
loss: 0.983001708984375,grad_norm: 0.7503433937248773, iteration: 55019
loss: 0.967905580997467,grad_norm: 0.9999990701662961, iteration: 55020
loss: 0.9990540742874146,grad_norm: 0.8646278697213227, iteration: 55021
loss: 0.994985044002533,grad_norm: 0.9999991653445396, iteration: 55022
loss: 0.9990402460098267,grad_norm: 0.9999989719677692, iteration: 55023
loss: 1.0085768699645996,grad_norm: 0.999999395762357, iteration: 55024
loss: 0.9977591633796692,grad_norm: 0.8808641080077922, iteration: 55025
loss: 1.032149314880371,grad_norm: 0.9423585130026516, iteration: 55026
loss: 1.051541805267334,grad_norm: 0.9999995312602505, iteration: 55027
loss: 1.03958261013031,grad_norm: 0.9999993863104679, iteration: 55028
loss: 0.9741778373718262,grad_norm: 0.9793197246810224, iteration: 55029
loss: 0.9732260704040527,grad_norm: 0.9999989403554235, iteration: 55030
loss: 1.0156433582305908,grad_norm: 0.9590365121884022, iteration: 55031
loss: 1.0342967510223389,grad_norm: 0.9868033402877963, iteration: 55032
loss: 1.0140637159347534,grad_norm: 0.9999993452814083, iteration: 55033
loss: 0.9682530760765076,grad_norm: 0.8368986312881932, iteration: 55034
loss: 1.0424766540527344,grad_norm: 0.9719382002861569, iteration: 55035
loss: 0.9846662878990173,grad_norm: 0.9999990295618065, iteration: 55036
loss: 1.0741947889328003,grad_norm: 0.9999992469709665, iteration: 55037
loss: 1.024722933769226,grad_norm: 0.999999255597281, iteration: 55038
loss: 0.9957655072212219,grad_norm: 0.953636062171653, iteration: 55039
loss: 0.9999252557754517,grad_norm: 0.7844317968920135, iteration: 55040
loss: 1.0009766817092896,grad_norm: 0.8837177631145937, iteration: 55041
loss: 1.0056647062301636,grad_norm: 0.8404640387037227, iteration: 55042
loss: 0.9827607274055481,grad_norm: 0.91127283502563, iteration: 55043
loss: 0.9764338731765747,grad_norm: 0.9999990982548419, iteration: 55044
loss: 0.9995066523551941,grad_norm: 0.9795456487882174, iteration: 55045
loss: 1.022011160850525,grad_norm: 0.9999994034003298, iteration: 55046
loss: 0.9940574169158936,grad_norm: 0.9999993210369548, iteration: 55047
loss: 0.992098331451416,grad_norm: 0.9177478008513568, iteration: 55048
loss: 1.0040394067764282,grad_norm: 0.9999994209948229, iteration: 55049
loss: 1.0108922719955444,grad_norm: 0.9999990630643774, iteration: 55050
loss: 1.024775743484497,grad_norm: 0.7504424199053525, iteration: 55051
loss: 0.9581536054611206,grad_norm: 0.9999991122533747, iteration: 55052
loss: 0.9858715534210205,grad_norm: 0.8824602050099187, iteration: 55053
loss: 0.9880049824714661,grad_norm: 0.9818146081378186, iteration: 55054
loss: 1.0213919878005981,grad_norm: 0.999999074978405, iteration: 55055
loss: 0.9840960502624512,grad_norm: 0.7729689091878814, iteration: 55056
loss: 1.0233852863311768,grad_norm: 0.9798764467109682, iteration: 55057
loss: 1.0607513189315796,grad_norm: 0.9999991676232216, iteration: 55058
loss: 0.9756081700325012,grad_norm: 0.9925270094410487, iteration: 55059
loss: 1.028305172920227,grad_norm: 0.999998900780972, iteration: 55060
loss: 1.00246262550354,grad_norm: 0.8961081458918077, iteration: 55061
loss: 1.0060254335403442,grad_norm: 0.8991526913903659, iteration: 55062
loss: 1.0317893028259277,grad_norm: 0.8895017635827368, iteration: 55063
loss: 1.0402326583862305,grad_norm: 0.9999999259290371, iteration: 55064
loss: 1.0120292901992798,grad_norm: 0.8805344587312885, iteration: 55065
loss: 1.0081251859664917,grad_norm: 0.8126543542347542, iteration: 55066
loss: 1.0086534023284912,grad_norm: 0.954653657014154, iteration: 55067
loss: 0.974175751209259,grad_norm: 0.9999991361762428, iteration: 55068
loss: 1.016144037246704,grad_norm: 0.9974391082639086, iteration: 55069
loss: 1.0403286218643188,grad_norm: 0.9949936886481404, iteration: 55070
loss: 1.0121662616729736,grad_norm: 0.9766014227052533, iteration: 55071
loss: 1.081088662147522,grad_norm: 0.9999997577756592, iteration: 55072
loss: 1.0043307542800903,grad_norm: 0.9999992239537502, iteration: 55073
loss: 1.02385675907135,grad_norm: 0.9999992475537594, iteration: 55074
loss: 0.99676513671875,grad_norm: 0.7876248617695928, iteration: 55075
loss: 0.9852231740951538,grad_norm: 0.9999991009966624, iteration: 55076
loss: 1.0224378108978271,grad_norm: 0.9526251456797746, iteration: 55077
loss: 1.0112035274505615,grad_norm: 0.9522213040023636, iteration: 55078
loss: 0.9850937724113464,grad_norm: 0.8054823469160247, iteration: 55079
loss: 1.0334842205047607,grad_norm: 0.9999990933272315, iteration: 55080
loss: 0.9955596923828125,grad_norm: 0.7238917071714666, iteration: 55081
loss: 0.9857535362243652,grad_norm: 0.9315854528441309, iteration: 55082
loss: 0.9845721125602722,grad_norm: 0.8231459082854818, iteration: 55083
loss: 1.0163947343826294,grad_norm: 0.938821622751209, iteration: 55084
loss: 0.9916088581085205,grad_norm: 0.9518099936712915, iteration: 55085
loss: 0.9899207353591919,grad_norm: 0.7802546845530159, iteration: 55086
loss: 0.9575949907302856,grad_norm: 0.9999991640020075, iteration: 55087
loss: 1.017409086227417,grad_norm: 0.8299429186472489, iteration: 55088
loss: 1.077634572982788,grad_norm: 0.9999996676617638, iteration: 55089
loss: 1.021377682685852,grad_norm: 0.9142847761740089, iteration: 55090
loss: 1.008393406867981,grad_norm: 0.9800025004039843, iteration: 55091
loss: 1.012406826019287,grad_norm: 0.9999991453096202, iteration: 55092
loss: 1.002654790878296,grad_norm: 0.7761854500679074, iteration: 55093
loss: 1.016859769821167,grad_norm: 0.9999996038520039, iteration: 55094
loss: 0.9920002222061157,grad_norm: 0.9637562223048749, iteration: 55095
loss: 1.0104084014892578,grad_norm: 0.9999990854059713, iteration: 55096
loss: 1.009312629699707,grad_norm: 0.8533902496588647, iteration: 55097
loss: 1.0497716665267944,grad_norm: 0.9999999130926382, iteration: 55098
loss: 0.976958692073822,grad_norm: 0.9999991977062171, iteration: 55099
loss: 0.989815354347229,grad_norm: 0.9191290606390273, iteration: 55100
loss: 1.0140832662582397,grad_norm: 0.9330601083729393, iteration: 55101
loss: 1.0337446928024292,grad_norm: 0.9999990292935139, iteration: 55102
loss: 1.0111083984375,grad_norm: 0.9805631564095583, iteration: 55103
loss: 0.9827836751937866,grad_norm: 0.8575789433649742, iteration: 55104
loss: 1.021472454071045,grad_norm: 0.8695271193496467, iteration: 55105
loss: 1.012080192565918,grad_norm: 0.9999991453948133, iteration: 55106
loss: 1.0044453144073486,grad_norm: 0.9465468544958708, iteration: 55107
loss: 1.014477014541626,grad_norm: 0.8970101511201063, iteration: 55108
loss: 0.9800634980201721,grad_norm: 0.9999991521489702, iteration: 55109
loss: 1.0004547834396362,grad_norm: 0.9999992617371053, iteration: 55110
loss: 1.1180031299591064,grad_norm: 0.9999994259333388, iteration: 55111
loss: 1.0089117288589478,grad_norm: 0.9368222587918222, iteration: 55112
loss: 1.016558289527893,grad_norm: 0.9999993088626905, iteration: 55113
loss: 1.0435856580734253,grad_norm: 0.9999998959141518, iteration: 55114
loss: 0.9804545640945435,grad_norm: 0.816025168401079, iteration: 55115
loss: 1.0224449634552002,grad_norm: 0.9999990562119683, iteration: 55116
loss: 1.0093430280685425,grad_norm: 0.8564467105005051, iteration: 55117
loss: 0.9986915588378906,grad_norm: 0.8091314141928903, iteration: 55118
loss: 0.996639609336853,grad_norm: 0.9038087408311052, iteration: 55119
loss: 0.9768373370170593,grad_norm: 0.9999996835700437, iteration: 55120
loss: 1.0496619939804077,grad_norm: 0.9174691541505444, iteration: 55121
loss: 0.9701055884361267,grad_norm: 0.9999996728622752, iteration: 55122
loss: 1.0456243753433228,grad_norm: 0.9225377866762134, iteration: 55123
loss: 0.9947764277458191,grad_norm: 0.8234628139622078, iteration: 55124
loss: 0.9719748497009277,grad_norm: 0.8751880639366397, iteration: 55125
loss: 1.0489239692687988,grad_norm: 0.9999994084063824, iteration: 55126
loss: 1.0116550922393799,grad_norm: 0.8680544040078977, iteration: 55127
loss: 1.0195374488830566,grad_norm: 0.999999245022503, iteration: 55128
loss: 1.0248887538909912,grad_norm: 0.9568329566752322, iteration: 55129
loss: 1.0373961925506592,grad_norm: 0.9999992822228176, iteration: 55130
loss: 1.0255579948425293,grad_norm: 0.9999995480965858, iteration: 55131
loss: 1.0115180015563965,grad_norm: 0.9917047017983401, iteration: 55132
loss: 1.0296229124069214,grad_norm: 0.8532201584347477, iteration: 55133
loss: 1.0133570432662964,grad_norm: 0.9999991712008128, iteration: 55134
loss: 1.025931477546692,grad_norm: 0.999999635237547, iteration: 55135
loss: 1.0262600183486938,grad_norm: 0.9999993860502104, iteration: 55136
loss: 0.9804853200912476,grad_norm: 0.9999992333352588, iteration: 55137
loss: 1.0347821712493896,grad_norm: 0.9228892927853165, iteration: 55138
loss: 1.029955506324768,grad_norm: 0.9999992879056988, iteration: 55139
loss: 0.9621872901916504,grad_norm: 0.8630524556413216, iteration: 55140
loss: 1.0003668069839478,grad_norm: 0.9999991177559687, iteration: 55141
loss: 1.0164812803268433,grad_norm: 0.9999991065812921, iteration: 55142
loss: 1.0013583898544312,grad_norm: 0.8881607085046064, iteration: 55143
loss: 0.9906210899353027,grad_norm: 0.9039881277892879, iteration: 55144
loss: 0.9871790409088135,grad_norm: 0.9999991527137918, iteration: 55145
loss: 0.9842592477798462,grad_norm: 0.9999990374499091, iteration: 55146
loss: 1.0407183170318604,grad_norm: 0.999999422909796, iteration: 55147
loss: 0.9995947480201721,grad_norm: 0.9999992429241036, iteration: 55148
loss: 0.9850876927375793,grad_norm: 0.9999991638109671, iteration: 55149
loss: 0.9998190402984619,grad_norm: 0.8957464246316855, iteration: 55150
loss: 1.0449656248092651,grad_norm: 0.9999996327577398, iteration: 55151
loss: 1.0167675018310547,grad_norm: 0.9999995563699655, iteration: 55152
loss: 1.0257996320724487,grad_norm: 0.9999993847428348, iteration: 55153
loss: 1.0792194604873657,grad_norm: 0.9999998988174073, iteration: 55154
loss: 1.0235368013381958,grad_norm: 0.9999990579044536, iteration: 55155
loss: 1.0112682580947876,grad_norm: 0.9999989329770862, iteration: 55156
loss: 1.0233635902404785,grad_norm: 0.681755901815711, iteration: 55157
loss: 0.9614518880844116,grad_norm: 0.8437786738903508, iteration: 55158
loss: 0.9842019081115723,grad_norm: 0.9999990719024413, iteration: 55159
loss: 1.0291430950164795,grad_norm: 0.9999991371287342, iteration: 55160
loss: 1.0028468370437622,grad_norm: 0.9999990826356474, iteration: 55161
loss: 1.0107395648956299,grad_norm: 0.9999995171123415, iteration: 55162
loss: 0.9861270785331726,grad_norm: 0.9999996848121298, iteration: 55163
loss: 1.0305877923965454,grad_norm: 0.9999999423609226, iteration: 55164
loss: 0.9531044363975525,grad_norm: 0.9999990522043339, iteration: 55165
loss: 1.0290586948394775,grad_norm: 0.8330455903265442, iteration: 55166
loss: 0.9892547726631165,grad_norm: 0.9999991943945636, iteration: 55167
loss: 1.0235885381698608,grad_norm: 0.9803853887009686, iteration: 55168
loss: 1.1128777265548706,grad_norm: 0.999999823868679, iteration: 55169
loss: 1.1809803247451782,grad_norm: 0.999999889875299, iteration: 55170
loss: 1.0051637887954712,grad_norm: 0.9999989395349373, iteration: 55171
loss: 1.0081554651260376,grad_norm: 0.8748657223129523, iteration: 55172
loss: 0.9919523000717163,grad_norm: 0.931597985890059, iteration: 55173
loss: 0.990662693977356,grad_norm: 0.9414539641150266, iteration: 55174
loss: 1.0230482816696167,grad_norm: 0.9999992157339337, iteration: 55175
loss: 1.0020581483840942,grad_norm: 0.9999990160435509, iteration: 55176
loss: 1.0330474376678467,grad_norm: 0.9008591891178105, iteration: 55177
loss: 1.0312756299972534,grad_norm: 0.9189849107839051, iteration: 55178
loss: 1.0392893552780151,grad_norm: 0.9096112243506247, iteration: 55179
loss: 1.0160636901855469,grad_norm: 0.9999989831333346, iteration: 55180
loss: 1.0163403749465942,grad_norm: 0.9053649802743751, iteration: 55181
loss: 0.9650861620903015,grad_norm: 0.882419284326358, iteration: 55182
loss: 1.0022759437561035,grad_norm: 0.9999990090844141, iteration: 55183
loss: 1.0106351375579834,grad_norm: 0.9999996306940617, iteration: 55184
loss: 0.9867372512817383,grad_norm: 0.9235739709066679, iteration: 55185
loss: 1.020674705505371,grad_norm: 0.9317903365272826, iteration: 55186
loss: 1.0194075107574463,grad_norm: 0.9999991348288227, iteration: 55187
loss: 0.996156632900238,grad_norm: 0.9999996069467477, iteration: 55188
loss: 1.0045013427734375,grad_norm: 0.9025940727543132, iteration: 55189
loss: 1.0496571063995361,grad_norm: 0.9999990932590604, iteration: 55190
loss: 0.9748155474662781,grad_norm: 0.8643145488256894, iteration: 55191
loss: 1.019409418106079,grad_norm: 0.9999991444252909, iteration: 55192
loss: 0.9834162592887878,grad_norm: 0.9860559056879654, iteration: 55193
loss: 1.036785364151001,grad_norm: 0.9999994693029791, iteration: 55194
loss: 1.0021979808807373,grad_norm: 0.8461479988968819, iteration: 55195
loss: 1.0498946905136108,grad_norm: 0.9418816915441409, iteration: 55196
loss: 0.9863244295120239,grad_norm: 0.9999992676421813, iteration: 55197
loss: 1.013956069946289,grad_norm: 0.999999153799824, iteration: 55198
loss: 1.0123727321624756,grad_norm: 0.9138882277146119, iteration: 55199
loss: 1.0289280414581299,grad_norm: 0.9999991771899961, iteration: 55200
loss: 1.0103228092193604,grad_norm: 0.9890298985654793, iteration: 55201
loss: 0.9907435774803162,grad_norm: 0.8230133116434418, iteration: 55202
loss: 1.039017677307129,grad_norm: 0.999999842634869, iteration: 55203
loss: 0.9813860058784485,grad_norm: 0.8860345760400569, iteration: 55204
loss: 0.9933701157569885,grad_norm: 0.8759872084428828, iteration: 55205
loss: 0.9774364233016968,grad_norm: 0.9930007490238282, iteration: 55206
loss: 1.0469926595687866,grad_norm: 0.9999997566751031, iteration: 55207
loss: 1.0117133855819702,grad_norm: 0.9999999035717967, iteration: 55208
loss: 1.0256493091583252,grad_norm: 0.8336828250377992, iteration: 55209
loss: 1.0272642374038696,grad_norm: 0.9999990299810791, iteration: 55210
loss: 1.009686827659607,grad_norm: 0.941304213032795, iteration: 55211
loss: 1.0884612798690796,grad_norm: 0.9999993612037583, iteration: 55212
loss: 0.945428729057312,grad_norm: 0.9999991453758631, iteration: 55213
loss: 1.005873203277588,grad_norm: 0.9999992053193854, iteration: 55214
loss: 1.0191984176635742,grad_norm: 0.9219574547744215, iteration: 55215
loss: 0.9920842051506042,grad_norm: 0.999999231500003, iteration: 55216
loss: 1.024603009223938,grad_norm: 0.9859306349307815, iteration: 55217
loss: 1.0248836278915405,grad_norm: 0.999999279487836, iteration: 55218
loss: 0.9816509485244751,grad_norm: 0.9064491195535888, iteration: 55219
loss: 1.0279620885849,grad_norm: 0.9999994959810485, iteration: 55220
loss: 0.9925150275230408,grad_norm: 0.9216655967449734, iteration: 55221
loss: 1.0106384754180908,grad_norm: 0.999999368296234, iteration: 55222
loss: 1.0161089897155762,grad_norm: 0.9999993088214205, iteration: 55223
loss: 0.9941703081130981,grad_norm: 0.9222919116087284, iteration: 55224
loss: 1.0247153043746948,grad_norm: 0.9999991320738696, iteration: 55225
loss: 0.9943886995315552,grad_norm: 0.895210982225432, iteration: 55226
loss: 1.0432524681091309,grad_norm: 0.9999994229893773, iteration: 55227
loss: 0.9891231656074524,grad_norm: 0.932846450320469, iteration: 55228
loss: 1.0209932327270508,grad_norm: 0.9761719060701791, iteration: 55229
loss: 1.0042004585266113,grad_norm: 0.9999997063220841, iteration: 55230
loss: 1.027916431427002,grad_norm: 0.9999994762953656, iteration: 55231
loss: 0.9861761927604675,grad_norm: 0.9026727266688471, iteration: 55232
loss: 1.0350960493087769,grad_norm: 0.872382482768105, iteration: 55233
loss: 1.001210331916809,grad_norm: 0.866063945273593, iteration: 55234
loss: 0.9847038984298706,grad_norm: 0.9999994606290221, iteration: 55235
loss: 1.0154216289520264,grad_norm: 0.902202707952193, iteration: 55236
loss: 1.0100138187408447,grad_norm: 0.8974719215130738, iteration: 55237
loss: 1.016478419303894,grad_norm: 0.8956602935620368, iteration: 55238
loss: 0.9830241799354553,grad_norm: 0.9999993179607178, iteration: 55239
loss: 1.0097293853759766,grad_norm: 0.9999993656311735, iteration: 55240
loss: 1.057791829109192,grad_norm: 0.9999994871717748, iteration: 55241
loss: 0.9804794192314148,grad_norm: 0.9999991337811279, iteration: 55242
loss: 1.0229300260543823,grad_norm: 0.999999235272716, iteration: 55243
loss: 1.0026545524597168,grad_norm: 0.9999993432863825, iteration: 55244
loss: 1.0048060417175293,grad_norm: 0.7854890444488883, iteration: 55245
loss: 1.0124260187149048,grad_norm: 0.9999992167178797, iteration: 55246
loss: 1.0473581552505493,grad_norm: 0.9999995437274717, iteration: 55247
loss: 1.0101983547210693,grad_norm: 0.9999989667835959, iteration: 55248
loss: 1.0256340503692627,grad_norm: 0.9638552285090985, iteration: 55249
loss: 1.0097756385803223,grad_norm: 0.9999991847667635, iteration: 55250
loss: 0.9855412840843201,grad_norm: 0.9999991074847768, iteration: 55251
loss: 0.9748750329017639,grad_norm: 0.9999991998780022, iteration: 55252
loss: 0.9622099995613098,grad_norm: 0.9999997828100446, iteration: 55253
loss: 1.042121410369873,grad_norm: 0.9104199341557536, iteration: 55254
loss: 1.0752896070480347,grad_norm: 0.9999997465329339, iteration: 55255
loss: 1.0646287202835083,grad_norm: 0.9999996408231379, iteration: 55256
loss: 1.0184004306793213,grad_norm: 0.8574259675058503, iteration: 55257
loss: 1.0148472785949707,grad_norm: 0.9576359276745132, iteration: 55258
loss: 0.9809874296188354,grad_norm: 0.955325514362282, iteration: 55259
loss: 0.9652004837989807,grad_norm: 0.9999991567499096, iteration: 55260
loss: 1.0199828147888184,grad_norm: 0.9999995914182873, iteration: 55261
loss: 1.0034767389297485,grad_norm: 0.9128098227229072, iteration: 55262
loss: 0.9961679577827454,grad_norm: 0.8828166079380777, iteration: 55263
loss: 1.039663553237915,grad_norm: 0.8786063363277697, iteration: 55264
loss: 1.0130053758621216,grad_norm: 0.9999997258874994, iteration: 55265
loss: 1.0447391271591187,grad_norm: 0.9999993670414555, iteration: 55266
loss: 0.996241569519043,grad_norm: 0.9999993868465248, iteration: 55267
loss: 0.9826536178588867,grad_norm: 0.9999992147294013, iteration: 55268
loss: 1.0142102241516113,grad_norm: 0.999999333830624, iteration: 55269
loss: 0.9726860523223877,grad_norm: 0.9999992959692077, iteration: 55270
loss: 1.015589714050293,grad_norm: 0.9737456702478005, iteration: 55271
loss: 1.0044525861740112,grad_norm: 0.9446007029321346, iteration: 55272
loss: 0.9744129180908203,grad_norm: 0.9793948495787186, iteration: 55273
loss: 0.9961378574371338,grad_norm: 0.9999991067423103, iteration: 55274
loss: 0.9841927289962769,grad_norm: 0.9039272870443475, iteration: 55275
loss: 1.0118106603622437,grad_norm: 0.9999993508230071, iteration: 55276
loss: 1.1558326482772827,grad_norm: 0.9999996702048805, iteration: 55277
loss: 1.0382920503616333,grad_norm: 0.9999995868148608, iteration: 55278
loss: 0.9831121563911438,grad_norm: 0.7706362446764876, iteration: 55279
loss: 1.0296285152435303,grad_norm: 0.8756114963786473, iteration: 55280
loss: 1.0179063081741333,grad_norm: 0.9999992590051939, iteration: 55281
loss: 1.018531322479248,grad_norm: 0.9999991482090625, iteration: 55282
loss: 0.9761616587638855,grad_norm: 0.9999995036221984, iteration: 55283
loss: 1.006996989250183,grad_norm: 0.9683286975933596, iteration: 55284
loss: 1.3509963750839233,grad_norm: 0.9999998436514482, iteration: 55285
loss: 1.0141226053237915,grad_norm: 0.999999473220427, iteration: 55286
loss: 1.015129566192627,grad_norm: 0.8735100622077497, iteration: 55287
loss: 1.0217844247817993,grad_norm: 0.9866687882003197, iteration: 55288
loss: 0.9718717932701111,grad_norm: 0.941071976285784, iteration: 55289
loss: 1.0329021215438843,grad_norm: 0.9897393617605802, iteration: 55290
loss: 0.9772390127182007,grad_norm: 0.9171377268989386, iteration: 55291
loss: 0.9855695366859436,grad_norm: 0.8346270634940962, iteration: 55292
loss: 1.014042854309082,grad_norm: 0.9709854557497816, iteration: 55293
loss: 1.0056438446044922,grad_norm: 0.9999991457758591, iteration: 55294
loss: 1.1092056035995483,grad_norm: 0.9999992643081961, iteration: 55295
loss: 1.0273348093032837,grad_norm: 0.9999991021368004, iteration: 55296
loss: 0.9870433807373047,grad_norm: 0.9999992728496742, iteration: 55297
loss: 1.0014511346817017,grad_norm: 0.9769264539446662, iteration: 55298
loss: 1.0204437971115112,grad_norm: 0.8777679732934435, iteration: 55299
loss: 1.0168880224227905,grad_norm: 0.9999992108939084, iteration: 55300
loss: 1.0271440744400024,grad_norm: 0.9999994815411839, iteration: 55301
loss: 0.9518054127693176,grad_norm: 0.9999991866166738, iteration: 55302
loss: 0.9973434805870056,grad_norm: 0.9144582733880958, iteration: 55303
loss: 0.9948011636734009,grad_norm: 0.9999994690343946, iteration: 55304
loss: 1.041732907295227,grad_norm: 0.9999994513860894, iteration: 55305
loss: 1.0048218965530396,grad_norm: 0.779590451200847, iteration: 55306
loss: 0.993722677230835,grad_norm: 0.8774546520270297, iteration: 55307
loss: 0.996610164642334,grad_norm: 0.9999993201395173, iteration: 55308
loss: 0.9930726885795593,grad_norm: 0.8743911488736101, iteration: 55309
loss: 1.1272789239883423,grad_norm: 0.9999995594213897, iteration: 55310
loss: 1.0130397081375122,grad_norm: 0.8385540040822123, iteration: 55311
loss: 1.1394283771514893,grad_norm: 0.99999966864277, iteration: 55312
loss: 1.05557382106781,grad_norm: 0.9724749333339416, iteration: 55313
loss: 1.1523308753967285,grad_norm: 0.9999993498302395, iteration: 55314
loss: 1.0932365655899048,grad_norm: 0.9999993026231103, iteration: 55315
loss: 1.062668800354004,grad_norm: 0.9999995094213183, iteration: 55316
loss: 1.2295600175857544,grad_norm: 0.9999995068795698, iteration: 55317
loss: 1.114197015762329,grad_norm: 1.0000000226174603, iteration: 55318
loss: 1.2312099933624268,grad_norm: 0.9999999749539862, iteration: 55319
loss: 1.104682207107544,grad_norm: 0.999999622418991, iteration: 55320
loss: 1.0030418634414673,grad_norm: 0.7469917199566434, iteration: 55321
loss: 0.9908707737922668,grad_norm: 0.8023822137313865, iteration: 55322
loss: 0.9844732880592346,grad_norm: 0.8863370249915097, iteration: 55323
loss: 1.0055793523788452,grad_norm: 0.9999992240906415, iteration: 55324
loss: 1.0692824125289917,grad_norm: 0.9999990495385429, iteration: 55325
loss: 0.9993153214454651,grad_norm: 0.87743081770342, iteration: 55326
loss: 1.0495591163635254,grad_norm: 0.849491471861428, iteration: 55327
loss: 0.9642491936683655,grad_norm: 0.9224051360058467, iteration: 55328
loss: 0.9792670607566833,grad_norm: 0.8458337732185681, iteration: 55329
loss: 1.0473500490188599,grad_norm: 0.9805739234063601, iteration: 55330
loss: 1.012774109840393,grad_norm: 0.9999996487439303, iteration: 55331
loss: 0.9947971105575562,grad_norm: 0.8816916120145659, iteration: 55332
loss: 1.0045335292816162,grad_norm: 0.9580458768145711, iteration: 55333
loss: 0.9927053451538086,grad_norm: 0.8237664786864659, iteration: 55334
loss: 0.9686856865882874,grad_norm: 0.9999990617319191, iteration: 55335
loss: 1.0381089448928833,grad_norm: 0.9999990748819406, iteration: 55336
loss: 1.01796293258667,grad_norm: 0.9999992077258192, iteration: 55337
loss: 1.0034348964691162,grad_norm: 0.9999990045693425, iteration: 55338
loss: 1.0706828832626343,grad_norm: 0.999999125282028, iteration: 55339
loss: 1.0634729862213135,grad_norm: 0.99999970348808, iteration: 55340
loss: 1.031466007232666,grad_norm: 0.8402933400639866, iteration: 55341
loss: 1.006618618965149,grad_norm: 0.9999995902122158, iteration: 55342
loss: 1.020981788635254,grad_norm: 0.9999990827078995, iteration: 55343
loss: 1.028113603591919,grad_norm: 0.9099336840659441, iteration: 55344
loss: 1.0287474393844604,grad_norm: 0.999999636528646, iteration: 55345
loss: 0.969656765460968,grad_norm: 0.8999663763605814, iteration: 55346
loss: 1.002784013748169,grad_norm: 0.895616981635935, iteration: 55347
loss: 0.9905045628547668,grad_norm: 0.9999991459555873, iteration: 55348
loss: 0.9983696341514587,grad_norm: 0.8838467937564153, iteration: 55349
loss: 1.0095490217208862,grad_norm: 0.9208524201022832, iteration: 55350
loss: 1.0144644975662231,grad_norm: 0.9859540287146733, iteration: 55351
loss: 0.9811933636665344,grad_norm: 0.9999991632680505, iteration: 55352
loss: 0.9827392101287842,grad_norm: 0.7866397464416233, iteration: 55353
loss: 1.027180552482605,grad_norm: 0.9999999429360171, iteration: 55354
loss: 1.0329362154006958,grad_norm: 0.7958814016317701, iteration: 55355
loss: 1.0256835222244263,grad_norm: 0.9466317690467043, iteration: 55356
loss: 0.9886133670806885,grad_norm: 0.9528540074229338, iteration: 55357
loss: 0.9994791746139526,grad_norm: 0.9056100446310112, iteration: 55358
loss: 1.0181933641433716,grad_norm: 0.9999991106282415, iteration: 55359
loss: 1.029958963394165,grad_norm: 0.9724832781234185, iteration: 55360
loss: 1.0153197050094604,grad_norm: 0.9885796558366968, iteration: 55361
loss: 0.9955727458000183,grad_norm: 0.9999991324152094, iteration: 55362
loss: 1.003408432006836,grad_norm: 0.9419877617998985, iteration: 55363
loss: 0.998445451259613,grad_norm: 0.6778213998274171, iteration: 55364
loss: 1.0016274452209473,grad_norm: 0.9999991049219873, iteration: 55365
loss: 1.0044180154800415,grad_norm: 0.9999994879963756, iteration: 55366
loss: 1.0890411138534546,grad_norm: 0.9999992208942556, iteration: 55367
loss: 0.9854562282562256,grad_norm: 0.7952742799115985, iteration: 55368
loss: 1.1624493598937988,grad_norm: 0.9999997643579556, iteration: 55369
loss: 0.9963127374649048,grad_norm: 0.9999991469210407, iteration: 55370
loss: 1.0004976987838745,grad_norm: 0.7905979745569731, iteration: 55371
loss: 1.0237915515899658,grad_norm: 0.9999990952513329, iteration: 55372
loss: 1.0382258892059326,grad_norm: 0.9999992920905043, iteration: 55373
loss: 1.0237382650375366,grad_norm: 0.9575572328219824, iteration: 55374
loss: 1.0138137340545654,grad_norm: 0.9999991724862908, iteration: 55375
loss: 1.0251469612121582,grad_norm: 0.9950713695577064, iteration: 55376
loss: 0.9992920756340027,grad_norm: 0.9182232217032481, iteration: 55377
loss: 1.0640708208084106,grad_norm: 0.7914536747181583, iteration: 55378
loss: 1.0486462116241455,grad_norm: 0.9999999495312626, iteration: 55379
loss: 1.0057787895202637,grad_norm: 0.8868636628758353, iteration: 55380
loss: 1.0236068964004517,grad_norm: 0.9999996277791128, iteration: 55381
loss: 1.0181370973587036,grad_norm: 0.9999991660999716, iteration: 55382
loss: 1.0710902214050293,grad_norm: 0.999999587036703, iteration: 55383
loss: 1.010612964630127,grad_norm: 0.9999996248077972, iteration: 55384
loss: 1.061467170715332,grad_norm: 0.9999995750581911, iteration: 55385
loss: 0.9973941445350647,grad_norm: 0.8398804934673801, iteration: 55386
loss: 1.0521472692489624,grad_norm: 0.9999998676313043, iteration: 55387
loss: 1.0848431587219238,grad_norm: 0.999999880431735, iteration: 55388
loss: 0.9824042916297913,grad_norm: 0.9999998488485744, iteration: 55389
loss: 0.9981070160865784,grad_norm: 0.9737458152614251, iteration: 55390
loss: 0.9990237951278687,grad_norm: 0.9999992613145328, iteration: 55391
loss: 1.0978951454162598,grad_norm: 0.9999992815253544, iteration: 55392
loss: 1.0267504453659058,grad_norm: 0.9999992660480749, iteration: 55393
loss: 1.0028026103973389,grad_norm: 0.9300903455208583, iteration: 55394
loss: 1.0217571258544922,grad_norm: 0.999999555047627, iteration: 55395
loss: 1.0292805433273315,grad_norm: 0.9999991694430649, iteration: 55396
loss: 0.980889081954956,grad_norm: 0.943211718854743, iteration: 55397
loss: 1.0359553098678589,grad_norm: 0.9999992351745969, iteration: 55398
loss: 1.00778067111969,grad_norm: 0.8714707046047394, iteration: 55399
loss: 0.9653422236442566,grad_norm: 0.9927897084328243, iteration: 55400
loss: 1.0184745788574219,grad_norm: 0.9481869412856959, iteration: 55401
loss: 1.0051145553588867,grad_norm: 0.9567539942675104, iteration: 55402
loss: 1.0144928693771362,grad_norm: 0.8151385024682837, iteration: 55403
loss: 1.1058006286621094,grad_norm: 1.0000000204860922, iteration: 55404
loss: 0.9845348000526428,grad_norm: 0.9999991531456703, iteration: 55405
loss: 1.0065139532089233,grad_norm: 0.9999992273261237, iteration: 55406
loss: 1.0379867553710938,grad_norm: 0.9999995046268272, iteration: 55407
loss: 0.9812102317810059,grad_norm: 0.9999991152554966, iteration: 55408
loss: 0.970202624797821,grad_norm: 0.8125014549169707, iteration: 55409
loss: 1.026665449142456,grad_norm: 0.9999991881273376, iteration: 55410
loss: 1.10596764087677,grad_norm: 0.9999995783671767, iteration: 55411
loss: 1.0227922201156616,grad_norm: 0.9999991032153887, iteration: 55412
loss: 1.0436054468154907,grad_norm: 0.999999125808748, iteration: 55413
loss: 1.0183666944503784,grad_norm: 0.9999992512480711, iteration: 55414
loss: 0.9917517900466919,grad_norm: 0.9999994926150986, iteration: 55415
loss: 1.0698814392089844,grad_norm: 0.9999996077148121, iteration: 55416
loss: 1.0132333040237427,grad_norm: 0.9999990781508566, iteration: 55417
loss: 1.0007097721099854,grad_norm: 0.895005132114145, iteration: 55418
loss: 1.265082836151123,grad_norm: 0.9999993105502449, iteration: 55419
loss: 0.9783053994178772,grad_norm: 0.9999993494891777, iteration: 55420
loss: 1.09159517288208,grad_norm: 0.9999998061105992, iteration: 55421
loss: 1.017362356185913,grad_norm: 0.9999991436433914, iteration: 55422
loss: 1.2433158159255981,grad_norm: 0.9999997090981987, iteration: 55423
loss: 0.9993358850479126,grad_norm: 0.947004946954989, iteration: 55424
loss: 1.0164539813995361,grad_norm: 0.999998896782121, iteration: 55425
loss: 1.0259555578231812,grad_norm: 0.9999994253488557, iteration: 55426
loss: 1.0302850008010864,grad_norm: 0.9630395399309403, iteration: 55427
loss: 1.0195471048355103,grad_norm: 0.7492868788360243, iteration: 55428
loss: 1.083889126777649,grad_norm: 0.9999997880910767, iteration: 55429
loss: 1.2062947750091553,grad_norm: 0.9999997514107326, iteration: 55430
loss: 1.0007843971252441,grad_norm: 0.9999992607287319, iteration: 55431
loss: 0.9935111403465271,grad_norm: 0.9793301281710779, iteration: 55432
loss: 1.01762056350708,grad_norm: 0.8355482767899665, iteration: 55433
loss: 0.9960810542106628,grad_norm: 0.9999992395337339, iteration: 55434
loss: 1.0103213787078857,grad_norm: 0.9071120863044075, iteration: 55435
loss: 1.0026352405548096,grad_norm: 0.8226997366335485, iteration: 55436
loss: 1.0195772647857666,grad_norm: 0.9999993982662281, iteration: 55437
loss: 1.0302505493164062,grad_norm: 0.9999994410199927, iteration: 55438
loss: 1.0632082223892212,grad_norm: 0.9162145193306355, iteration: 55439
loss: 1.0086182355880737,grad_norm: 0.9999991717622513, iteration: 55440
loss: 1.0524011850357056,grad_norm: 0.9999989994389075, iteration: 55441
loss: 1.0413269996643066,grad_norm: 0.9447688248807192, iteration: 55442
loss: 1.0502046346664429,grad_norm: 0.8901690698115234, iteration: 55443
loss: 0.985870361328125,grad_norm: 0.966403104315648, iteration: 55444
loss: 0.9691529273986816,grad_norm: 0.9999991333706052, iteration: 55445
loss: 0.9963986873626709,grad_norm: 0.8492043437626542, iteration: 55446
loss: 1.032456874847412,grad_norm: 0.9266546373265799, iteration: 55447
loss: 0.9783990383148193,grad_norm: 0.7719111271953237, iteration: 55448
loss: 1.0310744047164917,grad_norm: 0.9204979097250096, iteration: 55449
loss: 1.064689040184021,grad_norm: 0.9999992734941604, iteration: 55450
loss: 0.9783294200897217,grad_norm: 0.9247893812721636, iteration: 55451
loss: 1.0081186294555664,grad_norm: 0.836580818162675, iteration: 55452
loss: 0.9944248795509338,grad_norm: 0.9999989961738884, iteration: 55453
loss: 1.0015480518341064,grad_norm: 0.9999995880314153, iteration: 55454
loss: 1.011992335319519,grad_norm: 0.9071771150758303, iteration: 55455
loss: 0.9845172762870789,grad_norm: 0.9287773845848704, iteration: 55456
loss: 0.982553243637085,grad_norm: 0.9646211977281924, iteration: 55457
loss: 1.019950270652771,grad_norm: 0.8922346589917802, iteration: 55458
loss: 1.0322132110595703,grad_norm: 0.9521387698802788, iteration: 55459
loss: 1.0135788917541504,grad_norm: 0.9999990560733756, iteration: 55460
loss: 1.006428599357605,grad_norm: 0.9877700706665963, iteration: 55461
loss: 1.0067802667617798,grad_norm: 0.9283877015001908, iteration: 55462
loss: 1.0561816692352295,grad_norm: 0.999999768167417, iteration: 55463
loss: 1.039427638053894,grad_norm: 0.8534720350276471, iteration: 55464
loss: 1.103602647781372,grad_norm: 0.9999995509835494, iteration: 55465
loss: 0.9836536645889282,grad_norm: 0.9999989781348279, iteration: 55466
loss: 0.9703590273857117,grad_norm: 0.9020552571733698, iteration: 55467
loss: 1.0089185237884521,grad_norm: 0.8143092956765767, iteration: 55468
loss: 1.0120515823364258,grad_norm: 0.9999999457787666, iteration: 55469
loss: 1.062232255935669,grad_norm: 0.9999993466357973, iteration: 55470
loss: 1.0012948513031006,grad_norm: 0.8715304088860005, iteration: 55471
loss: 1.0207544565200806,grad_norm: 0.99999922189262, iteration: 55472
loss: 1.0246621370315552,grad_norm: 0.9999990656715397, iteration: 55473
loss: 1.0513626337051392,grad_norm: 0.9999996795046036, iteration: 55474
loss: 1.0285961627960205,grad_norm: 0.9042696192480154, iteration: 55475
loss: 1.002013087272644,grad_norm: 0.9999994889992686, iteration: 55476
loss: 1.0038901567459106,grad_norm: 0.9999989450392421, iteration: 55477
loss: 1.019413709640503,grad_norm: 0.9203059644653832, iteration: 55478
loss: 1.020431637763977,grad_norm: 0.9897633611960365, iteration: 55479
loss: 1.0027796030044556,grad_norm: 0.9287709608901897, iteration: 55480
loss: 1.0168904066085815,grad_norm: 0.9999997530099294, iteration: 55481
loss: 1.0026936531066895,grad_norm: 0.9999991941754134, iteration: 55482
loss: 1.0388505458831787,grad_norm: 0.9999990900549057, iteration: 55483
loss: 1.0678632259368896,grad_norm: 0.9999994449298361, iteration: 55484
loss: 1.0194668769836426,grad_norm: 0.999999445236353, iteration: 55485
loss: 1.0216760635375977,grad_norm: 0.813731659388403, iteration: 55486
loss: 0.9995014667510986,grad_norm: 0.999999138036748, iteration: 55487
loss: 1.002795934677124,grad_norm: 0.90154344543691, iteration: 55488
loss: 0.9929127097129822,grad_norm: 0.9999991538010236, iteration: 55489
loss: 0.961541473865509,grad_norm: 0.977251954567383, iteration: 55490
loss: 0.9699103236198425,grad_norm: 0.9999990595678374, iteration: 55491
loss: 0.9973205924034119,grad_norm: 0.8257696488531248, iteration: 55492
loss: 0.9915170073509216,grad_norm: 0.9339250336851578, iteration: 55493
loss: 1.0106033086776733,grad_norm: 0.8769968943366909, iteration: 55494
loss: 1.025385856628418,grad_norm: 0.8444611849486563, iteration: 55495
loss: 0.9750396013259888,grad_norm: 0.999998991091211, iteration: 55496
loss: 1.0745255947113037,grad_norm: 0.9999994524707587, iteration: 55497
loss: 1.01776123046875,grad_norm: 0.9999991301769566, iteration: 55498
loss: 0.9752504229545593,grad_norm: 0.999999213407624, iteration: 55499
loss: 1.013368010520935,grad_norm: 0.9458623113381478, iteration: 55500
loss: 1.0332889556884766,grad_norm: 0.9999995429123061, iteration: 55501
loss: 1.000519037246704,grad_norm: 0.999999066665684, iteration: 55502
loss: 1.00237238407135,grad_norm: 0.9849585607134764, iteration: 55503
loss: 1.0448832511901855,grad_norm: 0.9999991584135316, iteration: 55504
loss: 1.015595555305481,grad_norm: 0.9999996516084236, iteration: 55505
loss: 1.0126289129257202,grad_norm: 0.8854415141727416, iteration: 55506
loss: 1.0387011766433716,grad_norm: 0.9999990935408009, iteration: 55507
loss: 0.9776936173439026,grad_norm: 0.9485519889557483, iteration: 55508
loss: 0.9882641434669495,grad_norm: 0.9999992175982957, iteration: 55509
loss: 0.9852050542831421,grad_norm: 0.9999997108621594, iteration: 55510
loss: 1.0022214651107788,grad_norm: 0.8327848798258017, iteration: 55511
loss: 1.0332566499710083,grad_norm: 0.9999995737120787, iteration: 55512
loss: 0.9841093420982361,grad_norm: 0.9779285797520069, iteration: 55513
loss: 0.9947792887687683,grad_norm: 0.9999990758211892, iteration: 55514
loss: 0.9751187562942505,grad_norm: 0.9999990911595502, iteration: 55515
loss: 1.0607571601867676,grad_norm: 0.8818678229755441, iteration: 55516
loss: 1.011428952217102,grad_norm: 0.9999994375893143, iteration: 55517
loss: 1.0135658979415894,grad_norm: 0.9015010124120041, iteration: 55518
loss: 1.0163776874542236,grad_norm: 0.7861736906696218, iteration: 55519
loss: 0.9685025215148926,grad_norm: 0.9764350319101566, iteration: 55520
loss: 1.0524928569793701,grad_norm: 0.8943252437535537, iteration: 55521
loss: 1.0138556957244873,grad_norm: 0.9259806180899504, iteration: 55522
loss: 0.9622560143470764,grad_norm: 0.8599623430154096, iteration: 55523
loss: 0.9766964316368103,grad_norm: 0.8100216258594293, iteration: 55524
loss: 0.9336949586868286,grad_norm: 0.9999990283210107, iteration: 55525
loss: 1.003355860710144,grad_norm: 0.7935254129259252, iteration: 55526
loss: 0.9962655305862427,grad_norm: 0.8575505771746273, iteration: 55527
loss: 0.9740622639656067,grad_norm: 0.9999990706022707, iteration: 55528
loss: 1.0244741439819336,grad_norm: 0.7443608744689281, iteration: 55529
loss: 1.0348831415176392,grad_norm: 0.9999994219405494, iteration: 55530
loss: 0.981937825679779,grad_norm: 0.9397409175135097, iteration: 55531
loss: 1.0229710340499878,grad_norm: 0.9290867131783472, iteration: 55532
loss: 1.0119346380233765,grad_norm: 0.9999992009320025, iteration: 55533
loss: 1.0546362400054932,grad_norm: 0.8538381424269263, iteration: 55534
loss: 1.0175788402557373,grad_norm: 0.9999991911158351, iteration: 55535
loss: 1.001947045326233,grad_norm: 0.9999991431287681, iteration: 55536
loss: 1.0012630224227905,grad_norm: 0.9201578524641904, iteration: 55537
loss: 1.0102697610855103,grad_norm: 0.9999991331611654, iteration: 55538
loss: 0.9792689681053162,grad_norm: 0.9999992868693276, iteration: 55539
loss: 0.966163158416748,grad_norm: 0.826230301566703, iteration: 55540
loss: 1.0103881359100342,grad_norm: 0.8925996691012666, iteration: 55541
loss: 0.9870939254760742,grad_norm: 0.8498691265705368, iteration: 55542
loss: 0.9854246973991394,grad_norm: 0.9999990505398262, iteration: 55543
loss: 0.9802558422088623,grad_norm: 0.999999073144652, iteration: 55544
loss: 0.9775111079216003,grad_norm: 0.878709731907419, iteration: 55545
loss: 1.00778329372406,grad_norm: 0.926732696444554, iteration: 55546
loss: 1.011526107788086,grad_norm: 0.8664745531901594, iteration: 55547
loss: 0.9941925406455994,grad_norm: 0.9999991151983514, iteration: 55548
loss: 0.9880267381668091,grad_norm: 0.8142652129854914, iteration: 55549
loss: 0.9779866337776184,grad_norm: 0.9999989721475661, iteration: 55550
loss: 0.9864532947540283,grad_norm: 0.8180394036951819, iteration: 55551
loss: 0.9835076928138733,grad_norm: 0.9800103216723722, iteration: 55552
loss: 1.0295729637145996,grad_norm: 0.9610971547292533, iteration: 55553
loss: 1.0321341753005981,grad_norm: 0.9999989970206267, iteration: 55554
loss: 1.0105177164077759,grad_norm: 0.9981865551606206, iteration: 55555
loss: 0.9991150498390198,grad_norm: 0.9999990185558535, iteration: 55556
loss: 1.0015056133270264,grad_norm: 0.9532600511280661, iteration: 55557
loss: 1.0257822275161743,grad_norm: 0.9999992836987157, iteration: 55558
loss: 1.018200397491455,grad_norm: 0.9328887896719518, iteration: 55559
loss: 0.9801729917526245,grad_norm: 0.8275104969546062, iteration: 55560
loss: 0.9863269329071045,grad_norm: 0.999999119652709, iteration: 55561
loss: 1.0007821321487427,grad_norm: 0.9999993438529099, iteration: 55562
loss: 1.0081440210342407,grad_norm: 0.9446193267842383, iteration: 55563
loss: 0.9554046392440796,grad_norm: 0.9892551191436548, iteration: 55564
loss: 0.9724280834197998,grad_norm: 0.790422604257902, iteration: 55565
loss: 0.9593330025672913,grad_norm: 0.8726428448101748, iteration: 55566
loss: 1.0644303560256958,grad_norm: 0.9493635225571954, iteration: 55567
loss: 0.9934097528457642,grad_norm: 0.9999991634909413, iteration: 55568
loss: 0.9858435392379761,grad_norm: 0.9311861157056499, iteration: 55569
loss: 0.9765019416809082,grad_norm: 0.9851928446236686, iteration: 55570
loss: 0.954823911190033,grad_norm: 0.8777125053867803, iteration: 55571
loss: 0.9995709657669067,grad_norm: 0.9999991230439985, iteration: 55572
loss: 0.9959523677825928,grad_norm: 0.9330638487565448, iteration: 55573
loss: 0.9920322299003601,grad_norm: 0.9999992324280366, iteration: 55574
loss: 1.006226897239685,grad_norm: 0.9999990787270152, iteration: 55575
loss: 1.0144305229187012,grad_norm: 0.8521944271672932, iteration: 55576
loss: 1.0365856885910034,grad_norm: 0.9206738783273468, iteration: 55577
loss: 0.9739158153533936,grad_norm: 0.9999989786258167, iteration: 55578
loss: 0.9862827062606812,grad_norm: 0.7704645824981081, iteration: 55579
loss: 0.9946739077568054,grad_norm: 0.9789568635449185, iteration: 55580
loss: 1.0059738159179688,grad_norm: 0.999998970414006, iteration: 55581
loss: 0.9536767601966858,grad_norm: 0.8971369255011297, iteration: 55582
loss: 0.9912281632423401,grad_norm: 0.9629547484749573, iteration: 55583
loss: 0.9877419471740723,grad_norm: 0.9264336305074562, iteration: 55584
loss: 0.9971544146537781,grad_norm: 0.85166498691954, iteration: 55585
loss: 1.003135323524475,grad_norm: 0.9999989964887386, iteration: 55586
loss: 1.0332773923873901,grad_norm: 0.9999992150650651, iteration: 55587
loss: 1.0101044178009033,grad_norm: 0.9999993662578717, iteration: 55588
loss: 1.1135445833206177,grad_norm: 0.9999996860462771, iteration: 55589
loss: 0.9932252764701843,grad_norm: 0.9725905410586276, iteration: 55590
loss: 1.002534031867981,grad_norm: 0.9273285529121367, iteration: 55591
loss: 1.079244613647461,grad_norm: 0.9999992719693125, iteration: 55592
loss: 1.0254212617874146,grad_norm: 0.9818400803251922, iteration: 55593
loss: 0.9889699816703796,grad_norm: 0.8175803848951306, iteration: 55594
loss: 1.0136277675628662,grad_norm: 0.9156390739236503, iteration: 55595
loss: 1.0147669315338135,grad_norm: 0.8428901774050259, iteration: 55596
loss: 1.0227024555206299,grad_norm: 0.9999991525660212, iteration: 55597
loss: 0.9976048469543457,grad_norm: 0.8693164235556751, iteration: 55598
loss: 0.9803367853164673,grad_norm: 0.9999992084056181, iteration: 55599
loss: 0.9960669279098511,grad_norm: 0.9999992692803223, iteration: 55600
loss: 1.0121526718139648,grad_norm: 0.9999993460499929, iteration: 55601
loss: 1.086408019065857,grad_norm: 0.9999990692172808, iteration: 55602
loss: 0.9475923776626587,grad_norm: 0.9274800328042239, iteration: 55603
loss: 1.000152587890625,grad_norm: 0.9386586909126593, iteration: 55604
loss: 0.9706276059150696,grad_norm: 0.9999990948757147, iteration: 55605
loss: 0.9992083311080933,grad_norm: 0.9338603950276478, iteration: 55606
loss: 1.0037356615066528,grad_norm: 0.8858565226998953, iteration: 55607
loss: 1.062003493309021,grad_norm: 0.9999993314057785, iteration: 55608
loss: 1.016601324081421,grad_norm: 0.9863476025629957, iteration: 55609
loss: 1.0293340682983398,grad_norm: 0.922503757094007, iteration: 55610
loss: 1.0144420862197876,grad_norm: 0.9999992818798745, iteration: 55611
loss: 1.021071434020996,grad_norm: 0.9335905074531096, iteration: 55612
loss: 0.9583754539489746,grad_norm: 0.964662355359381, iteration: 55613
loss: 1.0264602899551392,grad_norm: 0.9783139291193474, iteration: 55614
loss: 0.9661732912063599,grad_norm: 0.8576533827116738, iteration: 55615
loss: 1.036253571510315,grad_norm: 0.9999991827289132, iteration: 55616
loss: 1.0185621976852417,grad_norm: 0.999999477504712, iteration: 55617
loss: 1.0395911931991577,grad_norm: 0.8533119189029896, iteration: 55618
loss: 0.9911147356033325,grad_norm: 0.999999301671543, iteration: 55619
loss: 1.0340644121170044,grad_norm: 0.9219949359370099, iteration: 55620
loss: 1.053426742553711,grad_norm: 0.9999990747249411, iteration: 55621
loss: 0.9890711903572083,grad_norm: 0.8998978373048725, iteration: 55622
loss: 1.055555820465088,grad_norm: 0.9605829233235447, iteration: 55623
loss: 0.992163360118866,grad_norm: 0.8684701583506268, iteration: 55624
loss: 1.0529781579971313,grad_norm: 0.9999998134922604, iteration: 55625
loss: 1.1276627779006958,grad_norm: 0.999999378444282, iteration: 55626
loss: 0.9624633193016052,grad_norm: 0.9999990921794105, iteration: 55627
loss: 0.9922341108322144,grad_norm: 0.8407678639049913, iteration: 55628
loss: 1.0178526639938354,grad_norm: 0.9747749247831062, iteration: 55629
loss: 1.2417271137237549,grad_norm: 0.9999998647177066, iteration: 55630
loss: 1.0948342084884644,grad_norm: 0.9563486692599893, iteration: 55631
loss: 1.0190236568450928,grad_norm: 0.9999999080310852, iteration: 55632
loss: 1.17151939868927,grad_norm: 0.9999994638873778, iteration: 55633
loss: 1.3548250198364258,grad_norm: 0.9999997110165822, iteration: 55634
loss: 1.1930290460586548,grad_norm: 0.9999997782243709, iteration: 55635
loss: 1.0847656726837158,grad_norm: 0.9999994804826862, iteration: 55636
loss: 0.9889305830001831,grad_norm: 0.9720862513997408, iteration: 55637
loss: 0.9665418863296509,grad_norm: 0.8822388572940119, iteration: 55638
loss: 1.030030608177185,grad_norm: 0.9558536121560581, iteration: 55639
loss: 1.0089428424835205,grad_norm: 0.8813483332296803, iteration: 55640
loss: 1.1826163530349731,grad_norm: 0.9999991073733481, iteration: 55641
loss: 1.0154625177383423,grad_norm: 0.9999994426148482, iteration: 55642
loss: 1.1280659437179565,grad_norm: 0.999999521234816, iteration: 55643
loss: 1.0950610637664795,grad_norm: 0.9999991286659567, iteration: 55644
loss: 1.0005697011947632,grad_norm: 0.9475702208454843, iteration: 55645
loss: 0.9883327484130859,grad_norm: 0.9124856446798981, iteration: 55646
loss: 1.0253255367279053,grad_norm: 0.9999995534108385, iteration: 55647
loss: 1.0591591596603394,grad_norm: 0.9999998118994105, iteration: 55648
loss: 1.0767134428024292,grad_norm: 0.9999993465433069, iteration: 55649
loss: 1.0647637844085693,grad_norm: 0.9999990932183356, iteration: 55650
loss: 1.0060629844665527,grad_norm: 0.9162872291374674, iteration: 55651
loss: 1.0289384126663208,grad_norm: 0.9167010287924893, iteration: 55652
loss: 1.0256580114364624,grad_norm: 0.8369964225797467, iteration: 55653
loss: 1.0094729661941528,grad_norm: 0.8650828136962176, iteration: 55654
loss: 1.0786521434783936,grad_norm: 0.9999995441055187, iteration: 55655
loss: 1.0590966939926147,grad_norm: 0.9999995558393899, iteration: 55656
loss: 1.0605969429016113,grad_norm: 0.9999992423323408, iteration: 55657
loss: 1.1296764612197876,grad_norm: 0.9999996895646611, iteration: 55658
loss: 1.1366024017333984,grad_norm: 0.9999998243230368, iteration: 55659
loss: 1.0229192972183228,grad_norm: 0.9168807976402442, iteration: 55660
loss: 1.016854166984558,grad_norm: 0.8406277228269587, iteration: 55661
loss: 0.974210798740387,grad_norm: 0.9954284976374278, iteration: 55662
loss: 1.0429819822311401,grad_norm: 0.801051083004016, iteration: 55663
loss: 0.9964455962181091,grad_norm: 0.8682389939489908, iteration: 55664
loss: 1.007534146308899,grad_norm: 0.9999991765191587, iteration: 55665
loss: 1.0580378770828247,grad_norm: 0.9999993235988153, iteration: 55666
loss: 0.9990870356559753,grad_norm: 0.9999996106997515, iteration: 55667
loss: 0.9908729791641235,grad_norm: 0.9999992739385742, iteration: 55668
loss: 1.0028711557388306,grad_norm: 0.9300213469325225, iteration: 55669
loss: 0.9824620485305786,grad_norm: 0.8652221568867463, iteration: 55670
loss: 1.0091679096221924,grad_norm: 0.9999991468725932, iteration: 55671
loss: 1.0024470090866089,grad_norm: 0.896739934242415, iteration: 55672
loss: 0.9884920716285706,grad_norm: 0.9999992009141675, iteration: 55673
loss: 0.9956949353218079,grad_norm: 0.8447705426102983, iteration: 55674
loss: 1.041046380996704,grad_norm: 0.9065768422025636, iteration: 55675
loss: 0.9780696630477905,grad_norm: 0.9823874726654861, iteration: 55676
loss: 0.9855934977531433,grad_norm: 0.8382537401844598, iteration: 55677
loss: 0.9809433221817017,grad_norm: 0.9570448053009948, iteration: 55678
loss: 1.0157842636108398,grad_norm: 0.9153830684111183, iteration: 55679
loss: 1.0326008796691895,grad_norm: 0.9999991463840517, iteration: 55680
loss: 1.0799474716186523,grad_norm: 0.9673288568406674, iteration: 55681
loss: 1.1461856365203857,grad_norm: 0.999999433859582, iteration: 55682
loss: 1.0138033628463745,grad_norm: 0.9280614478187341, iteration: 55683
loss: 1.0305250883102417,grad_norm: 0.7666961837125723, iteration: 55684
loss: 0.9928383827209473,grad_norm: 0.9999993097507073, iteration: 55685
loss: 0.9745427370071411,grad_norm: 0.9999990634707948, iteration: 55686
loss: 1.0142349004745483,grad_norm: 0.9999991087182966, iteration: 55687
loss: 1.000529170036316,grad_norm: 0.9061199214617407, iteration: 55688
loss: 0.9983169436454773,grad_norm: 0.8686004544124689, iteration: 55689
loss: 0.9800882935523987,grad_norm: 0.9999991114801637, iteration: 55690
loss: 1.0058528184890747,grad_norm: 0.8112386517219172, iteration: 55691
loss: 0.9944741129875183,grad_norm: 0.801584494038123, iteration: 55692
loss: 1.011569619178772,grad_norm: 0.8557842715896555, iteration: 55693
loss: 0.9886124730110168,grad_norm: 0.9847090524847568, iteration: 55694
loss: 1.0042673349380493,grad_norm: 0.9999993796296701, iteration: 55695
loss: 1.0177390575408936,grad_norm: 0.9999990699399833, iteration: 55696
loss: 0.9909024834632874,grad_norm: 0.999999129149008, iteration: 55697
loss: 1.0006296634674072,grad_norm: 0.9999989626459874, iteration: 55698
loss: 0.9893549084663391,grad_norm: 0.9071223675805145, iteration: 55699
loss: 0.9920266270637512,grad_norm: 0.9999991715365318, iteration: 55700
loss: 1.0132414102554321,grad_norm: 0.7829873871000556, iteration: 55701
loss: 0.9941885471343994,grad_norm: 0.7192419874334389, iteration: 55702
loss: 1.0550576448440552,grad_norm: 0.9999993110707658, iteration: 55703
loss: 0.990686297416687,grad_norm: 0.8747940069402516, iteration: 55704
loss: 0.9973292946815491,grad_norm: 0.9999991481751879, iteration: 55705
loss: 1.0345563888549805,grad_norm: 0.9999990853766452, iteration: 55706
loss: 1.0669842958450317,grad_norm: 0.9845070282031632, iteration: 55707
loss: 1.0049256086349487,grad_norm: 0.8664404647851757, iteration: 55708
loss: 1.0255402326583862,grad_norm: 0.8725025504000743, iteration: 55709
loss: 0.9849748015403748,grad_norm: 0.8013550999884399, iteration: 55710
loss: 1.014256477355957,grad_norm: 0.8885795810869254, iteration: 55711
loss: 1.0438841581344604,grad_norm: 0.8895310174759965, iteration: 55712
loss: 1.0065101385116577,grad_norm: 0.8519398217655851, iteration: 55713
loss: 1.0347033739089966,grad_norm: 0.7472931951390468, iteration: 55714
loss: 1.0100265741348267,grad_norm: 0.9999993822222232, iteration: 55715
loss: 0.9735239148139954,grad_norm: 0.9999990981949979, iteration: 55716
loss: 1.0050092935562134,grad_norm: 0.7819938411906989, iteration: 55717
loss: 1.0207082033157349,grad_norm: 0.9999992066755657, iteration: 55718
loss: 0.9813979268074036,grad_norm: 0.9999990404445622, iteration: 55719
loss: 0.9852749705314636,grad_norm: 0.9999993077496879, iteration: 55720
loss: 1.0015827417373657,grad_norm: 0.9751716862307137, iteration: 55721
loss: 1.0063257217407227,grad_norm: 0.8489736301955928, iteration: 55722
loss: 0.9767873883247375,grad_norm: 0.7795756747609373, iteration: 55723
loss: 1.0152934789657593,grad_norm: 0.9711372186242594, iteration: 55724
loss: 0.9999743103981018,grad_norm: 0.999999431916609, iteration: 55725
loss: 1.0114837884902954,grad_norm: 0.9999989808374768, iteration: 55726
loss: 1.0485038757324219,grad_norm: 0.9441009543245286, iteration: 55727
loss: 0.9830970168113708,grad_norm: 0.9295309799332468, iteration: 55728
loss: 1.0332694053649902,grad_norm: 0.9393482391435218, iteration: 55729
loss: 0.993754506111145,grad_norm: 0.8203180516352636, iteration: 55730
loss: 1.003467082977295,grad_norm: 0.8987375053090463, iteration: 55731
loss: 1.0079509019851685,grad_norm: 0.9037761348702726, iteration: 55732
loss: 1.0263466835021973,grad_norm: 0.9999992757501373, iteration: 55733
loss: 0.9723407030105591,grad_norm: 0.9999999496245486, iteration: 55734
loss: 0.9944880604743958,grad_norm: 0.99999924643926, iteration: 55735
loss: 1.0077564716339111,grad_norm: 0.761487989361448, iteration: 55736
loss: 0.9937340617179871,grad_norm: 0.8958438772628609, iteration: 55737
loss: 0.9995567798614502,grad_norm: 0.9999991826453754, iteration: 55738
loss: 0.9912607669830322,grad_norm: 0.9579081068534855, iteration: 55739
loss: 1.002029299736023,grad_norm: 0.9304846722683491, iteration: 55740
loss: 1.08327317237854,grad_norm: 0.9489172977744547, iteration: 55741
loss: 1.0324443578720093,grad_norm: 0.9999992268788872, iteration: 55742
loss: 0.9468044638633728,grad_norm: 0.9016908412630064, iteration: 55743
loss: 1.008518934249878,grad_norm: 0.999999192063784, iteration: 55744
loss: 0.9654887914657593,grad_norm: 0.9999993389497429, iteration: 55745
loss: 1.0138105154037476,grad_norm: 0.7889225053777081, iteration: 55746
loss: 1.014542818069458,grad_norm: 0.992473575497365, iteration: 55747
loss: 1.0027586221694946,grad_norm: 0.9249239932530783, iteration: 55748
loss: 0.9712603092193604,grad_norm: 0.9999990198290439, iteration: 55749
loss: 0.9972977042198181,grad_norm: 0.9999992647054057, iteration: 55750
loss: 1.0161997079849243,grad_norm: 0.9907911575702516, iteration: 55751
loss: 0.9885843396186829,grad_norm: 0.8100750773547964, iteration: 55752
loss: 1.0277862548828125,grad_norm: 0.9999992558213183, iteration: 55753
loss: 1.012611985206604,grad_norm: 0.9091370719387194, iteration: 55754
loss: 1.020901083946228,grad_norm: 0.9999993073769772, iteration: 55755
loss: 0.9784895181655884,grad_norm: 0.9999990517921533, iteration: 55756
loss: 0.999783456325531,grad_norm: 0.8219181408262011, iteration: 55757
loss: 0.9755201935768127,grad_norm: 0.9999993562265169, iteration: 55758
loss: 0.9981621503829956,grad_norm: 0.9431458755032177, iteration: 55759
loss: 0.986556351184845,grad_norm: 0.9999992406616898, iteration: 55760
loss: 1.02564537525177,grad_norm: 0.999999024149765, iteration: 55761
loss: 0.9803489446640015,grad_norm: 0.9451872165668876, iteration: 55762
loss: 0.9761835932731628,grad_norm: 0.9999989823643396, iteration: 55763
loss: 0.9780716896057129,grad_norm: 0.8138446625442148, iteration: 55764
loss: 0.9905579090118408,grad_norm: 0.9356214664709981, iteration: 55765
loss: 1.0323923826217651,grad_norm: 0.9999992004271264, iteration: 55766
loss: 0.9782026410102844,grad_norm: 0.9850658022259803, iteration: 55767
loss: 1.0325430631637573,grad_norm: 0.9041307104565585, iteration: 55768
loss: 0.9841622710227966,grad_norm: 0.9999990612248506, iteration: 55769
loss: 1.0346415042877197,grad_norm: 0.9999998413754838, iteration: 55770
loss: 1.0205804109573364,grad_norm: 0.8259576776772122, iteration: 55771
loss: 1.009057641029358,grad_norm: 0.9999991647098552, iteration: 55772
loss: 0.9876993894577026,grad_norm: 0.9259664968778731, iteration: 55773
loss: 1.018341064453125,grad_norm: 0.9999990589187533, iteration: 55774
loss: 1.0486996173858643,grad_norm: 0.9999994124707028, iteration: 55775
loss: 1.028456211090088,grad_norm: 0.926438880426263, iteration: 55776
loss: 1.0378831624984741,grad_norm: 0.9999996375726617, iteration: 55777
loss: 1.0350126028060913,grad_norm: 0.999999585393188, iteration: 55778
loss: 1.068829894065857,grad_norm: 0.9999998923335248, iteration: 55779
loss: 0.9757335782051086,grad_norm: 0.9550986864044699, iteration: 55780
loss: 1.137434959411621,grad_norm: 0.9999992362759123, iteration: 55781
loss: 1.0388920307159424,grad_norm: 0.9999991659725677, iteration: 55782
loss: 1.0033609867095947,grad_norm: 0.9999992411814005, iteration: 55783
loss: 1.0481024980545044,grad_norm: 0.9863522208906033, iteration: 55784
loss: 1.0166815519332886,grad_norm: 0.9999994078830158, iteration: 55785
loss: 1.045366883277893,grad_norm: 0.7463111177253585, iteration: 55786
loss: 0.9944574236869812,grad_norm: 0.999999079926447, iteration: 55787
loss: 1.0286414623260498,grad_norm: 0.9806738849657418, iteration: 55788
loss: 1.040452480316162,grad_norm: 0.864965246943733, iteration: 55789
loss: 1.0324633121490479,grad_norm: 0.982814733372348, iteration: 55790
loss: 1.0023508071899414,grad_norm: 0.9999998491925622, iteration: 55791
loss: 1.0067799091339111,grad_norm: 0.8068744435430816, iteration: 55792
loss: 1.0383892059326172,grad_norm: 0.999999158610621, iteration: 55793
loss: 0.9955435991287231,grad_norm: 0.9999996018239267, iteration: 55794
loss: 1.0157467126846313,grad_norm: 0.9999992031029028, iteration: 55795
loss: 0.9971840381622314,grad_norm: 0.9032606188009628, iteration: 55796
loss: 0.9872274994850159,grad_norm: 0.9988545190130068, iteration: 55797
loss: 1.0114773511886597,grad_norm: 0.9999991542585905, iteration: 55798
loss: 1.0615397691726685,grad_norm: 0.9999992846287313, iteration: 55799
loss: 1.0288329124450684,grad_norm: 0.9091432279926842, iteration: 55800
loss: 1.0048021078109741,grad_norm: 0.9976759259955564, iteration: 55801
loss: 1.0012881755828857,grad_norm: 0.9999996855431168, iteration: 55802
loss: 0.9966524839401245,grad_norm: 0.9999992243869041, iteration: 55803
loss: 1.0300806760787964,grad_norm: 0.9999992557413718, iteration: 55804
loss: 0.975795328617096,grad_norm: 0.8497645311155826, iteration: 55805
loss: 0.990286111831665,grad_norm: 0.8502001787102553, iteration: 55806
loss: 1.0218358039855957,grad_norm: 0.9339436385143268, iteration: 55807
loss: 0.9864084720611572,grad_norm: 0.8393418206930476, iteration: 55808
loss: 1.0157313346862793,grad_norm: 0.9102887150450061, iteration: 55809
loss: 1.0106452703475952,grad_norm: 0.9911313129973248, iteration: 55810
loss: 0.9979987740516663,grad_norm: 0.9999991276092662, iteration: 55811
loss: 1.0194518566131592,grad_norm: 0.9999999112796946, iteration: 55812
loss: 1.02536141872406,grad_norm: 0.8743387434485522, iteration: 55813
loss: 0.9989778399467468,grad_norm: 0.8867081296367109, iteration: 55814
loss: 1.0074341297149658,grad_norm: 0.9999989993441943, iteration: 55815
loss: 1.0037815570831299,grad_norm: 0.8166132327882261, iteration: 55816
loss: 0.9889698624610901,grad_norm: 0.9999991264804293, iteration: 55817
loss: 1.005052924156189,grad_norm: 0.9999989981455072, iteration: 55818
loss: 0.9578297734260559,grad_norm: 0.9999991029674387, iteration: 55819
loss: 0.9993727803230286,grad_norm: 0.9074635882765653, iteration: 55820
loss: 1.0776681900024414,grad_norm: 0.9871427068121492, iteration: 55821
loss: 1.0376310348510742,grad_norm: 0.887024312113879, iteration: 55822
loss: 0.9696522355079651,grad_norm: 0.9652555152572397, iteration: 55823
loss: 0.9900821447372437,grad_norm: 0.9522366818479896, iteration: 55824
loss: 0.9735094308853149,grad_norm: 0.9999991865346144, iteration: 55825
loss: 1.0002498626708984,grad_norm: 0.9999990529988176, iteration: 55826
loss: 1.0178285837173462,grad_norm: 0.9419611252367417, iteration: 55827
loss: 0.9914776682853699,grad_norm: 0.99999906890899, iteration: 55828
loss: 1.0080366134643555,grad_norm: 0.8445463992835126, iteration: 55829
loss: 0.9985067248344421,grad_norm: 0.9999991065515795, iteration: 55830
loss: 1.0469005107879639,grad_norm: 0.9708515444438808, iteration: 55831
loss: 1.0217410326004028,grad_norm: 0.9999996123055123, iteration: 55832
loss: 0.9639104008674622,grad_norm: 0.9999990967089353, iteration: 55833
loss: 1.0370122194290161,grad_norm: 0.8034346988945994, iteration: 55834
loss: 1.0344390869140625,grad_norm: 0.9999990079765914, iteration: 55835
loss: 0.9926483035087585,grad_norm: 0.905353940702303, iteration: 55836
loss: 0.995161235332489,grad_norm: 0.9201997517839758, iteration: 55837
loss: 1.0478525161743164,grad_norm: 0.9999994935545913, iteration: 55838
loss: 0.9950066208839417,grad_norm: 0.999999074490339, iteration: 55839
loss: 1.0082341432571411,grad_norm: 0.9999991509728289, iteration: 55840
loss: 0.9670508503913879,grad_norm: 0.9766705518447407, iteration: 55841
loss: 0.9712162613868713,grad_norm: 0.8487877132582646, iteration: 55842
loss: 0.9706880450248718,grad_norm: 0.9999990433658345, iteration: 55843
loss: 0.9941338300704956,grad_norm: 0.9999992142828927, iteration: 55844
loss: 1.009821891784668,grad_norm: 0.7739166426095683, iteration: 55845
loss: 1.0179469585418701,grad_norm: 0.8715805006718034, iteration: 55846
loss: 1.0152475833892822,grad_norm: 0.8909212623404537, iteration: 55847
loss: 0.9937193393707275,grad_norm: 0.9999991407533668, iteration: 55848
loss: 1.0584548711776733,grad_norm: 0.9999991195405317, iteration: 55849
loss: 0.9878067374229431,grad_norm: 0.9353328507103469, iteration: 55850
loss: 1.0168627500534058,grad_norm: 0.9439241564224584, iteration: 55851
loss: 0.9509543776512146,grad_norm: 0.9999991190142257, iteration: 55852
loss: 1.0201419591903687,grad_norm: 0.8024695395230801, iteration: 55853
loss: 1.0056419372558594,grad_norm: 0.8672801260099022, iteration: 55854
loss: 1.0171971321105957,grad_norm: 0.8976343677514185, iteration: 55855
loss: 1.0208958387374878,grad_norm: 0.8720743430920224, iteration: 55856
loss: 1.0369800329208374,grad_norm: 0.9221525915985337, iteration: 55857
loss: 1.006518006324768,grad_norm: 0.9383306186942596, iteration: 55858
loss: 0.9577504396438599,grad_norm: 0.9091332469596953, iteration: 55859
loss: 1.072701334953308,grad_norm: 0.9999992052553468, iteration: 55860
loss: 0.9924896359443665,grad_norm: 0.7831086343494099, iteration: 55861
loss: 1.0053784847259521,grad_norm: 0.9109565298947426, iteration: 55862
loss: 1.036293625831604,grad_norm: 0.9999994241679029, iteration: 55863
loss: 0.9734207391738892,grad_norm: 0.9068139901005869, iteration: 55864
loss: 1.0080770254135132,grad_norm: 0.999999087168292, iteration: 55865
loss: 1.0597612857818604,grad_norm: 0.999999888203179, iteration: 55866
loss: 1.032360553741455,grad_norm: 0.999999153108724, iteration: 55867
loss: 1.0237525701522827,grad_norm: 0.8761757379793278, iteration: 55868
loss: 1.013850212097168,grad_norm: 0.8904610824986331, iteration: 55869
loss: 1.0070090293884277,grad_norm: 0.8941222137663843, iteration: 55870
loss: 0.9763725399971008,grad_norm: 0.7983557945269213, iteration: 55871
loss: 1.009952187538147,grad_norm: 0.9999994129157175, iteration: 55872
loss: 1.0834499597549438,grad_norm: 0.9608320389781378, iteration: 55873
loss: 1.0302690267562866,grad_norm: 0.8670866846033339, iteration: 55874
loss: 1.0352946519851685,grad_norm: 0.9999994074323921, iteration: 55875
loss: 1.0048271417617798,grad_norm: 0.8151200039945002, iteration: 55876
loss: 1.05246901512146,grad_norm: 0.9999999429986886, iteration: 55877
loss: 0.9813382625579834,grad_norm: 0.9999990133933915, iteration: 55878
loss: 1.0162845849990845,grad_norm: 0.8259730869365818, iteration: 55879
loss: 0.9924991726875305,grad_norm: 0.9999991917873048, iteration: 55880
loss: 0.9860819578170776,grad_norm: 0.8352394930359006, iteration: 55881
loss: 0.9758828282356262,grad_norm: 0.8224695201029494, iteration: 55882
loss: 1.0363738536834717,grad_norm: 0.9999992091310705, iteration: 55883
loss: 0.9726044535636902,grad_norm: 0.9999991598758418, iteration: 55884
loss: 0.9714402556419373,grad_norm: 0.9999992416393468, iteration: 55885
loss: 1.0889875888824463,grad_norm: 0.9999992592166316, iteration: 55886
loss: 0.9679334163665771,grad_norm: 0.9956671444181656, iteration: 55887
loss: 0.9579985737800598,grad_norm: 0.8665908176230421, iteration: 55888
loss: 1.0051474571228027,grad_norm: 0.9999993593184865, iteration: 55889
loss: 0.9674668908119202,grad_norm: 0.9016065874116902, iteration: 55890
loss: 1.0248045921325684,grad_norm: 0.9661805874667451, iteration: 55891
loss: 0.9955988526344299,grad_norm: 0.9189843510255454, iteration: 55892
loss: 1.1698962450027466,grad_norm: 0.9999995509017797, iteration: 55893
loss: 1.031007170677185,grad_norm: 0.9999994367715416, iteration: 55894
loss: 1.0216755867004395,grad_norm: 0.999999116022981, iteration: 55895
loss: 1.0111368894577026,grad_norm: 0.9999992051695274, iteration: 55896
loss: 1.0079433917999268,grad_norm: 0.8215613008194527, iteration: 55897
loss: 1.062568187713623,grad_norm: 0.9999996405042865, iteration: 55898
loss: 0.9810448884963989,grad_norm: 0.8603913753176222, iteration: 55899
loss: 0.9831116795539856,grad_norm: 0.8702569742913421, iteration: 55900
loss: 1.0821242332458496,grad_norm: 0.9999990662379453, iteration: 55901
loss: 1.0064352750778198,grad_norm: 0.9999991688649918, iteration: 55902
loss: 0.9996524453163147,grad_norm: 0.9999991213268862, iteration: 55903
loss: 1.0339499711990356,grad_norm: 0.8700773820729891, iteration: 55904
loss: 0.9948351383209229,grad_norm: 0.9368012325751522, iteration: 55905
loss: 1.0505660772323608,grad_norm: 0.9999991906038225, iteration: 55906
loss: 1.028536081314087,grad_norm: 0.9602968208350915, iteration: 55907
loss: 0.9686179757118225,grad_norm: 0.9467890918859816, iteration: 55908
loss: 1.0165224075317383,grad_norm: 0.9999992333810231, iteration: 55909
loss: 1.0376652479171753,grad_norm: 0.9509270400318257, iteration: 55910
loss: 1.0259884595870972,grad_norm: 0.999999159458128, iteration: 55911
loss: 1.047046422958374,grad_norm: 0.956117589711297, iteration: 55912
loss: 1.005068063735962,grad_norm: 0.9999990076816406, iteration: 55913
loss: 1.0300934314727783,grad_norm: 0.9298581832170173, iteration: 55914
loss: 1.0680142641067505,grad_norm: 0.9759947223598612, iteration: 55915
loss: 1.055745244026184,grad_norm: 0.9999998555250784, iteration: 55916
loss: 1.0295673608779907,grad_norm: 0.9999990726296969, iteration: 55917
loss: 0.9899793863296509,grad_norm: 0.9625470613317774, iteration: 55918
loss: 1.018132209777832,grad_norm: 0.8793506420259809, iteration: 55919
loss: 0.9591993689537048,grad_norm: 0.9380783046480806, iteration: 55920
loss: 1.0298079252243042,grad_norm: 0.9999990968016154, iteration: 55921
loss: 1.0035631656646729,grad_norm: 0.7661694630283109, iteration: 55922
loss: 1.0066578388214111,grad_norm: 0.9999991069548505, iteration: 55923
loss: 1.0291906595230103,grad_norm: 0.9999992898210011, iteration: 55924
loss: 1.023067593574524,grad_norm: 0.9999991433063998, iteration: 55925
loss: 0.9844810366630554,grad_norm: 0.999014882856683, iteration: 55926
loss: 1.0064340829849243,grad_norm: 0.999999058456258, iteration: 55927
loss: 0.9919068813323975,grad_norm: 0.9999989781804173, iteration: 55928
loss: 1.0018106698989868,grad_norm: 0.9999994043677548, iteration: 55929
loss: 0.969081699848175,grad_norm: 0.8571849832875179, iteration: 55930
loss: 1.030243158340454,grad_norm: 0.999999092986254, iteration: 55931
loss: 1.0088285207748413,grad_norm: 0.8371645421431881, iteration: 55932
loss: 1.010446548461914,grad_norm: 0.999999448548631, iteration: 55933
loss: 0.9542668461799622,grad_norm: 0.8973584956895002, iteration: 55934
loss: 1.0066739320755005,grad_norm: 0.7906191868279292, iteration: 55935
loss: 1.0285296440124512,grad_norm: 0.9999998095135048, iteration: 55936
loss: 0.9881166815757751,grad_norm: 0.999999637876595, iteration: 55937
loss: 0.9899672865867615,grad_norm: 0.999999088296988, iteration: 55938
loss: 0.9737828969955444,grad_norm: 0.9999990683883642, iteration: 55939
loss: 0.998959481716156,grad_norm: 0.8748496475727935, iteration: 55940
loss: 1.0098426342010498,grad_norm: 0.9999991387219512, iteration: 55941
loss: 0.9953532814979553,grad_norm: 0.9072284195359006, iteration: 55942
loss: 1.0345510244369507,grad_norm: 0.999999898936514, iteration: 55943
loss: 1.0233676433563232,grad_norm: 0.85024024578743, iteration: 55944
loss: 0.9779160618782043,grad_norm: 0.9431959983241606, iteration: 55945
loss: 0.9941132664680481,grad_norm: 0.9999992792899388, iteration: 55946
loss: 1.0124773979187012,grad_norm: 0.8512917453923143, iteration: 55947
loss: 1.027381181716919,grad_norm: 0.7767679575254263, iteration: 55948
loss: 1.0590978860855103,grad_norm: 0.9999993672934137, iteration: 55949
loss: 1.034934401512146,grad_norm: 0.9999998498414924, iteration: 55950
loss: 1.0101646184921265,grad_norm: 0.9133904724797373, iteration: 55951
loss: 1.0174640417099,grad_norm: 0.9999996871184369, iteration: 55952
loss: 1.0067123174667358,grad_norm: 0.999999153241945, iteration: 55953
loss: 0.988233745098114,grad_norm: 0.9999992315726793, iteration: 55954
loss: 1.0174193382263184,grad_norm: 0.8108849137515775, iteration: 55955
loss: 0.9885511994361877,grad_norm: 0.8209871208437045, iteration: 55956
loss: 1.0197126865386963,grad_norm: 0.9881982183015868, iteration: 55957
loss: 1.0030816793441772,grad_norm: 0.999999160600453, iteration: 55958
loss: 1.0065834522247314,grad_norm: 0.9952491218331244, iteration: 55959
loss: 0.9948952198028564,grad_norm: 0.999998963723887, iteration: 55960
loss: 1.0379917621612549,grad_norm: 0.9999993738194697, iteration: 55961
loss: 0.971993088722229,grad_norm: 0.9999991113564104, iteration: 55962
loss: 1.0553932189941406,grad_norm: 0.9999991250503476, iteration: 55963
loss: 0.9864752888679504,grad_norm: 0.8919771862765845, iteration: 55964
loss: 1.029756784439087,grad_norm: 0.999999381795908, iteration: 55965
loss: 0.9920370578765869,grad_norm: 0.9999996568823167, iteration: 55966
loss: 1.0353715419769287,grad_norm: 0.9999994661385598, iteration: 55967
loss: 1.0206620693206787,grad_norm: 0.9368322133609949, iteration: 55968
loss: 1.0387985706329346,grad_norm: 0.9459417707987132, iteration: 55969
loss: 1.0341846942901611,grad_norm: 0.999999285423, iteration: 55970
loss: 1.0199443101882935,grad_norm: 0.9999993698714116, iteration: 55971
loss: 1.0058261156082153,grad_norm: 0.9999995328103441, iteration: 55972
loss: 1.0007539987564087,grad_norm: 0.9999992328510251, iteration: 55973
loss: 1.007703185081482,grad_norm: 0.9999991758506389, iteration: 55974
loss: 1.0179399251937866,grad_norm: 0.9614695947595651, iteration: 55975
loss: 1.0025380849838257,grad_norm: 0.8622312191283629, iteration: 55976
loss: 1.009867787361145,grad_norm: 0.9999990978235047, iteration: 55977
loss: 1.021569848060608,grad_norm: 0.9999991781471571, iteration: 55978
loss: 1.0154905319213867,grad_norm: 0.790084187525271, iteration: 55979
loss: 1.0451735258102417,grad_norm: 0.9999991255872379, iteration: 55980
loss: 0.9678168296813965,grad_norm: 0.9449900170529704, iteration: 55981
loss: 1.025710940361023,grad_norm: 0.9999993772388566, iteration: 55982
loss: 0.987984836101532,grad_norm: 0.999999279667181, iteration: 55983
loss: 1.0429928302764893,grad_norm: 0.9999991327636494, iteration: 55984
loss: 0.960742175579071,grad_norm: 0.9540962414113668, iteration: 55985
loss: 1.0134479999542236,grad_norm: 0.9999993209151824, iteration: 55986
loss: 1.0242130756378174,grad_norm: 0.999999602206025, iteration: 55987
loss: 0.9867151975631714,grad_norm: 0.8137066349760278, iteration: 55988
loss: 1.017324686050415,grad_norm: 0.9999991451508374, iteration: 55989
loss: 1.003931999206543,grad_norm: 0.9422825728925587, iteration: 55990
loss: 1.0256898403167725,grad_norm: 0.9595627646465648, iteration: 55991
loss: 0.9680385589599609,grad_norm: 0.9345153497135017, iteration: 55992
loss: 0.9776614308357239,grad_norm: 0.9999994942257376, iteration: 55993
loss: 1.0100481510162354,grad_norm: 0.9999997790332256, iteration: 55994
loss: 0.9809421300888062,grad_norm: 0.9999992019130238, iteration: 55995
loss: 1.0008704662322998,grad_norm: 0.8130216999843553, iteration: 55996
loss: 1.0890114307403564,grad_norm: 0.9999997304142236, iteration: 55997
loss: 0.9987251162528992,grad_norm: 0.8451623987253603, iteration: 55998
loss: 1.0478330850601196,grad_norm: 0.999999458221869, iteration: 55999
loss: 0.9932308197021484,grad_norm: 0.9773954324718913, iteration: 56000
loss: 1.0389472246170044,grad_norm: 0.9999991082724597, iteration: 56001
loss: 1.041528344154358,grad_norm: 0.9910194852185433, iteration: 56002
loss: 0.9793232083320618,grad_norm: 0.8904031847040984, iteration: 56003
loss: 1.04551100730896,grad_norm: 0.9999998387164319, iteration: 56004
loss: 1.009294867515564,grad_norm: 0.9040935936525167, iteration: 56005
loss: 1.0014394521713257,grad_norm: 0.9999992784664343, iteration: 56006
loss: 0.9959030747413635,grad_norm: 0.9999994287627022, iteration: 56007
loss: 1.02156400680542,grad_norm: 0.9705256218193471, iteration: 56008
loss: 1.0406458377838135,grad_norm: 0.9139043259380019, iteration: 56009
loss: 1.1030524969100952,grad_norm: 0.9999996838355536, iteration: 56010
loss: 1.0141758918762207,grad_norm: 0.9225034778206248, iteration: 56011
loss: 1.1367000341415405,grad_norm: 0.9999993363218531, iteration: 56012
loss: 0.9786574840545654,grad_norm: 0.9999991380204446, iteration: 56013
loss: 1.029603362083435,grad_norm: 0.9999992109921828, iteration: 56014
loss: 1.0040990114212036,grad_norm: 0.9999990904055251, iteration: 56015
loss: 1.044691562652588,grad_norm: 0.9572877412880682, iteration: 56016
loss: 0.9689757227897644,grad_norm: 0.8113760320736833, iteration: 56017
loss: 1.0380419492721558,grad_norm: 0.9304367950553739, iteration: 56018
loss: 0.9612699151039124,grad_norm: 0.8923396662539111, iteration: 56019
loss: 1.0601861476898193,grad_norm: 0.9999997527157782, iteration: 56020
loss: 1.0861327648162842,grad_norm: 0.9999990823703631, iteration: 56021
loss: 0.9624249935150146,grad_norm: 0.9013084918918333, iteration: 56022
loss: 0.9865740537643433,grad_norm: 0.999999237006112, iteration: 56023
loss: 1.0180014371871948,grad_norm: 0.9999990901350716, iteration: 56024
loss: 1.0394246578216553,grad_norm: 0.9999994746179146, iteration: 56025
loss: 1.0093051195144653,grad_norm: 0.9999993324446796, iteration: 56026
loss: 1.0173602104187012,grad_norm: 0.9999989641904341, iteration: 56027
loss: 0.9875017404556274,grad_norm: 0.9999989616433895, iteration: 56028
loss: 1.040065884590149,grad_norm: 0.8399752064358039, iteration: 56029
loss: 0.9984627962112427,grad_norm: 0.9065637301768624, iteration: 56030
loss: 0.9950483441352844,grad_norm: 0.8614824627795776, iteration: 56031
loss: 1.0281195640563965,grad_norm: 0.9999997909339655, iteration: 56032
loss: 1.0498197078704834,grad_norm: 0.9999996363622035, iteration: 56033
loss: 1.0064735412597656,grad_norm: 0.8375762471625333, iteration: 56034
loss: 1.0376533269882202,grad_norm: 0.9999998887384476, iteration: 56035
loss: 1.007562518119812,grad_norm: 0.9428636182110367, iteration: 56036
loss: 0.9685112237930298,grad_norm: 0.9318270728980469, iteration: 56037
loss: 1.0967637300491333,grad_norm: 0.9999997252016721, iteration: 56038
loss: 1.020415186882019,grad_norm: 0.9366794697251637, iteration: 56039
loss: 1.0549900531768799,grad_norm: 0.999999407945465, iteration: 56040
loss: 0.9916026592254639,grad_norm: 0.9999992282607507, iteration: 56041
loss: 1.0095587968826294,grad_norm: 0.9801593967350101, iteration: 56042
loss: 1.0439599752426147,grad_norm: 0.8870004426658067, iteration: 56043
loss: 1.0432438850402832,grad_norm: 0.9489907009287348, iteration: 56044
loss: 1.032808542251587,grad_norm: 0.9999998728962831, iteration: 56045
loss: 0.9847580194473267,grad_norm: 0.9999992364287646, iteration: 56046
loss: 0.9757832884788513,grad_norm: 0.8371993262084104, iteration: 56047
loss: 0.9960161447525024,grad_norm: 0.9221382140759203, iteration: 56048
loss: 1.0247784852981567,grad_norm: 0.874436574548388, iteration: 56049
loss: 1.0603759288787842,grad_norm: 0.9999992715417955, iteration: 56050
loss: 1.0340983867645264,grad_norm: 0.9999992470791726, iteration: 56051
loss: 1.0208542346954346,grad_norm: 0.9999996550433331, iteration: 56052
loss: 1.0486701726913452,grad_norm: 0.9999991926933942, iteration: 56053
loss: 1.0464755296707153,grad_norm: 0.999999515686155, iteration: 56054
loss: 1.0515743494033813,grad_norm: 0.9999997604028263, iteration: 56055
loss: 1.0210931301116943,grad_norm: 0.8447356848945416, iteration: 56056
loss: 1.0440434217453003,grad_norm: 0.9999999662828417, iteration: 56057
loss: 1.113773226737976,grad_norm: 0.999999716876119, iteration: 56058
loss: 1.012539267539978,grad_norm: 0.7685228021367694, iteration: 56059
loss: 0.99644935131073,grad_norm: 0.9516164707920897, iteration: 56060
loss: 1.006974458694458,grad_norm: 0.9068725651868083, iteration: 56061
loss: 1.0324342250823975,grad_norm: 0.9060506175324472, iteration: 56062
loss: 1.1180734634399414,grad_norm: 0.9999998218442292, iteration: 56063
loss: 1.2742586135864258,grad_norm: 0.9999994957923114, iteration: 56064
loss: 1.026653528213501,grad_norm: 0.8017671272275048, iteration: 56065
loss: 1.205714464187622,grad_norm: 1.0000000165248903, iteration: 56066
loss: 1.218717098236084,grad_norm: 0.9999996917161944, iteration: 56067
loss: 1.0060688257217407,grad_norm: 0.8549162957700316, iteration: 56068
loss: 0.9809555411338806,grad_norm: 0.9566360152912213, iteration: 56069
loss: 1.0308843851089478,grad_norm: 0.9024392726233569, iteration: 56070
loss: 1.085342526435852,grad_norm: 0.9999993434015417, iteration: 56071
loss: 1.0317156314849854,grad_norm: 0.9238942924148028, iteration: 56072
loss: 1.0124796628952026,grad_norm: 0.9999990502333554, iteration: 56073
loss: 1.0006862878799438,grad_norm: 0.9999991159658408, iteration: 56074
loss: 1.0098426342010498,grad_norm: 0.9999992371489149, iteration: 56075
loss: 1.0092544555664062,grad_norm: 0.8858353871909547, iteration: 56076
loss: 1.046271800994873,grad_norm: 0.9999996865153395, iteration: 56077
loss: 1.0204452276229858,grad_norm: 0.9999994170766627, iteration: 56078
loss: 1.084477186203003,grad_norm: 0.9999993094072058, iteration: 56079
loss: 1.1399612426757812,grad_norm: 0.9999995675339436, iteration: 56080
loss: 1.0829038619995117,grad_norm: 0.9999996687281194, iteration: 56081
loss: 1.0123097896575928,grad_norm: 0.9999997074434663, iteration: 56082
loss: 0.9898110032081604,grad_norm: 0.9999997453009231, iteration: 56083
loss: 1.0240057706832886,grad_norm: 0.9999998970893668, iteration: 56084
loss: 1.0360422134399414,grad_norm: 0.9999998156742758, iteration: 56085
loss: 1.0136308670043945,grad_norm: 0.9999990268506054, iteration: 56086
loss: 1.0033056735992432,grad_norm: 0.9374844876939815, iteration: 56087
loss: 0.9918398857116699,grad_norm: 0.9999998546505253, iteration: 56088
loss: 1.0327383279800415,grad_norm: 0.9999991348364556, iteration: 56089
loss: 1.0997227430343628,grad_norm: 0.9999996602492056, iteration: 56090
loss: 1.0195659399032593,grad_norm: 0.8220577074799169, iteration: 56091
loss: 0.9915966391563416,grad_norm: 0.9814351658631114, iteration: 56092
loss: 1.0700109004974365,grad_norm: 0.9544036004214987, iteration: 56093
loss: 1.098081350326538,grad_norm: 0.9999991838227215, iteration: 56094
loss: 0.9956974983215332,grad_norm: 0.9999995337611575, iteration: 56095
loss: 1.0198757648468018,grad_norm: 0.9999992257031391, iteration: 56096
loss: 1.1166110038757324,grad_norm: 0.9999993461864239, iteration: 56097
loss: 1.0889358520507812,grad_norm: 0.9999989801526774, iteration: 56098
loss: 0.9705235958099365,grad_norm: 0.9999991429347533, iteration: 56099
loss: 1.0062958002090454,grad_norm: 0.9954093524821022, iteration: 56100
loss: 1.0057982206344604,grad_norm: 0.8886782231176549, iteration: 56101
loss: 0.9888360500335693,grad_norm: 0.9220660711395173, iteration: 56102
loss: 1.0085176229476929,grad_norm: 0.9999991052065277, iteration: 56103
loss: 1.0211944580078125,grad_norm: 0.8433236250685545, iteration: 56104
loss: 1.0279380083084106,grad_norm: 0.999999019506531, iteration: 56105
loss: 0.9776580333709717,grad_norm: 0.9999995197162279, iteration: 56106
loss: 0.9822664260864258,grad_norm: 0.9999992243891784, iteration: 56107
loss: 0.9958767294883728,grad_norm: 0.9999992478227805, iteration: 56108
loss: 1.0402343273162842,grad_norm: 0.9999990966187536, iteration: 56109
loss: 0.9919800758361816,grad_norm: 0.777452361946306, iteration: 56110
loss: 1.0106310844421387,grad_norm: 0.9134007767874169, iteration: 56111
loss: 1.0254720449447632,grad_norm: 0.8541813308504859, iteration: 56112
loss: 0.979637861251831,grad_norm: 0.7915979629832269, iteration: 56113
loss: 1.0446285009384155,grad_norm: 0.9999991358982031, iteration: 56114
loss: 1.0622478723526,grad_norm: 1.0000000081490714, iteration: 56115
loss: 1.0678998231887817,grad_norm: 0.9999993209646367, iteration: 56116
loss: 0.9626114368438721,grad_norm: 0.9999991739754971, iteration: 56117
loss: 1.0453145503997803,grad_norm: 0.9999989932276213, iteration: 56118
loss: 1.0322482585906982,grad_norm: 0.9217020106237633, iteration: 56119
loss: 1.0590959787368774,grad_norm: 0.9999991923117679, iteration: 56120
loss: 0.9945662617683411,grad_norm: 0.9999991128320073, iteration: 56121
loss: 1.1312202215194702,grad_norm: 0.9999993200331428, iteration: 56122
loss: 0.9860389232635498,grad_norm: 0.9999992516417266, iteration: 56123
loss: 1.0307449102401733,grad_norm: 0.9999993403658041, iteration: 56124
loss: 1.0479720830917358,grad_norm: 0.999999114211965, iteration: 56125
loss: 1.0107465982437134,grad_norm: 0.9999992125978076, iteration: 56126
loss: 0.9920145273208618,grad_norm: 0.9999991955250656, iteration: 56127
loss: 0.9894359707832336,grad_norm: 0.94742910761277, iteration: 56128
loss: 0.9690190553665161,grad_norm: 0.9999992974783067, iteration: 56129
loss: 1.0206576585769653,grad_norm: 0.9999999595244716, iteration: 56130
loss: 1.0180652141571045,grad_norm: 0.883393526389137, iteration: 56131
loss: 1.0098130702972412,grad_norm: 0.8978366293930737, iteration: 56132
loss: 1.025270938873291,grad_norm: 0.999999120900657, iteration: 56133
loss: 1.027910590171814,grad_norm: 0.9999992275903543, iteration: 56134
loss: 0.9982683062553406,grad_norm: 0.9355792689935774, iteration: 56135
loss: 0.9779587388038635,grad_norm: 0.999999483102171, iteration: 56136
loss: 1.042044758796692,grad_norm: 0.9999989516668001, iteration: 56137
loss: 1.0037636756896973,grad_norm: 0.9999991916157454, iteration: 56138
loss: 0.9587438702583313,grad_norm: 0.9696258206250814, iteration: 56139
loss: 0.998927891254425,grad_norm: 0.9999991156993825, iteration: 56140
loss: 0.9971821904182434,grad_norm: 0.9761526253340616, iteration: 56141
loss: 0.979418158531189,grad_norm: 0.859388100230949, iteration: 56142
loss: 0.9886844158172607,grad_norm: 0.9999991776603654, iteration: 56143
loss: 1.0270072221755981,grad_norm: 0.9999990680114385, iteration: 56144
loss: 1.0394777059555054,grad_norm: 0.8349450332061142, iteration: 56145
loss: 1.0421324968338013,grad_norm: 0.9999993194695208, iteration: 56146
loss: 1.0145046710968018,grad_norm: 0.9999991625966008, iteration: 56147
loss: 1.0139847993850708,grad_norm: 0.9107703929593262, iteration: 56148
loss: 1.0183085203170776,grad_norm: 0.9999992477101042, iteration: 56149
loss: 0.9670257568359375,grad_norm: 0.8349401756345608, iteration: 56150
loss: 0.9608626365661621,grad_norm: 0.9138761583449506, iteration: 56151
loss: 1.0014779567718506,grad_norm: 0.8753102983327468, iteration: 56152
loss: 1.019716739654541,grad_norm: 0.9999990893640567, iteration: 56153
loss: 1.0120301246643066,grad_norm: 0.999999161544385, iteration: 56154
loss: 0.9748316407203674,grad_norm: 0.9999991096972742, iteration: 56155
loss: 1.0014915466308594,grad_norm: 0.8788613841078174, iteration: 56156
loss: 0.9852859973907471,grad_norm: 0.9806061062994094, iteration: 56157
loss: 0.9858327507972717,grad_norm: 0.9711503669968276, iteration: 56158
loss: 0.9738897085189819,grad_norm: 0.9561815450054854, iteration: 56159
loss: 1.0266059637069702,grad_norm: 0.8668792776492479, iteration: 56160
loss: 1.0506865978240967,grad_norm: 0.9999991866113013, iteration: 56161
loss: 1.0176793336868286,grad_norm: 0.9354116167188047, iteration: 56162
loss: 1.0144931077957153,grad_norm: 0.9999992755938639, iteration: 56163
loss: 1.0229820013046265,grad_norm: 0.9999993640475845, iteration: 56164
loss: 1.103501796722412,grad_norm: 0.9999999052224341, iteration: 56165
loss: 1.0093516111373901,grad_norm: 0.96885892454605, iteration: 56166
loss: 0.991106390953064,grad_norm: 0.9999994230912319, iteration: 56167
loss: 1.085201621055603,grad_norm: 0.9999997617825186, iteration: 56168
loss: 1.0327932834625244,grad_norm: 0.9999991079700729, iteration: 56169
loss: 1.0527287721633911,grad_norm: 0.9999991326412246, iteration: 56170
loss: 0.9950427412986755,grad_norm: 0.999999256170134, iteration: 56171
loss: 1.0648131370544434,grad_norm: 0.9999997411415381, iteration: 56172
loss: 1.0280325412750244,grad_norm: 0.9621785953442056, iteration: 56173
loss: 1.1289855241775513,grad_norm: 0.9999996290094437, iteration: 56174
loss: 0.9576770067214966,grad_norm: 0.9999992392316928, iteration: 56175
loss: 0.9838212132453918,grad_norm: 0.9470520110206867, iteration: 56176
loss: 0.9800021648406982,grad_norm: 0.9415814980817642, iteration: 56177
loss: 0.9850357174873352,grad_norm: 0.7688562825835769, iteration: 56178
loss: 1.1007992029190063,grad_norm: 0.9999996517872914, iteration: 56179
loss: 1.0187581777572632,grad_norm: 0.99999905556848, iteration: 56180
loss: 1.0198651552200317,grad_norm: 0.9999990348869066, iteration: 56181
loss: 0.9853029847145081,grad_norm: 0.999999156840901, iteration: 56182
loss: 0.9839519262313843,grad_norm: 0.7382199901395086, iteration: 56183
loss: 1.0171101093292236,grad_norm: 0.9999997834024841, iteration: 56184
loss: 0.9987425208091736,grad_norm: 0.9230734349849808, iteration: 56185
loss: 1.1108852624893188,grad_norm: 0.9999995535318296, iteration: 56186
loss: 0.9949522018432617,grad_norm: 0.9999994856677884, iteration: 56187
loss: 1.007902979850769,grad_norm: 0.8620718596009934, iteration: 56188
loss: 1.0225262641906738,grad_norm: 0.9999996524043742, iteration: 56189
loss: 1.0703741312026978,grad_norm: 0.9999994130423163, iteration: 56190
loss: 1.0264644622802734,grad_norm: 0.9354379101667495, iteration: 56191
loss: 0.9884744882583618,grad_norm: 0.8116411301437565, iteration: 56192
loss: 0.9603049755096436,grad_norm: 0.8483668924926108, iteration: 56193
loss: 0.9784907698631287,grad_norm: 0.9580128029042342, iteration: 56194
loss: 1.0844500064849854,grad_norm: 0.9999997026058929, iteration: 56195
loss: 1.0333178043365479,grad_norm: 0.8828511869573593, iteration: 56196
loss: 1.0035995244979858,grad_norm: 0.8399825191467494, iteration: 56197
loss: 1.0918240547180176,grad_norm: 0.9999992098140209, iteration: 56198
loss: 0.9719630479812622,grad_norm: 0.9999996752184361, iteration: 56199
loss: 1.0646072626113892,grad_norm: 0.9999999224345144, iteration: 56200
loss: 1.1198643445968628,grad_norm: 0.9999998026558997, iteration: 56201
loss: 0.9858238101005554,grad_norm: 0.9960534803501525, iteration: 56202
loss: 0.9898746013641357,grad_norm: 0.9999989873070735, iteration: 56203
loss: 0.9991779923439026,grad_norm: 0.983542252667852, iteration: 56204
loss: 1.0745296478271484,grad_norm: 0.9999998774190704, iteration: 56205
loss: 0.9944477677345276,grad_norm: 0.989208994472233, iteration: 56206
loss: 1.0342122316360474,grad_norm: 0.9833548025696266, iteration: 56207
loss: 1.0106511116027832,grad_norm: 0.9999990425443062, iteration: 56208
loss: 0.9609194397926331,grad_norm: 0.8435279780476973, iteration: 56209
loss: 1.062893271446228,grad_norm: 0.9999993131092983, iteration: 56210
loss: 1.0172516107559204,grad_norm: 0.9999993714760508, iteration: 56211
loss: 1.0003973245620728,grad_norm: 0.9683486856142292, iteration: 56212
loss: 0.9694364070892334,grad_norm: 0.9999990707693003, iteration: 56213
loss: 1.0331580638885498,grad_norm: 0.9999991853446779, iteration: 56214
loss: 1.1123217344284058,grad_norm: 0.9999997610662871, iteration: 56215
loss: 1.0244032144546509,grad_norm: 0.9999996832003911, iteration: 56216
loss: 0.9702596664428711,grad_norm: 0.9227172409310355, iteration: 56217
loss: 0.9737584590911865,grad_norm: 0.9999991737142241, iteration: 56218
loss: 1.01143479347229,grad_norm: 0.9999993106420623, iteration: 56219
loss: 0.9993793368339539,grad_norm: 0.9999995381692535, iteration: 56220
loss: 0.9964807629585266,grad_norm: 0.8901525064505977, iteration: 56221
loss: 1.011017918586731,grad_norm: 0.9109065447025738, iteration: 56222
loss: 0.9906465411186218,grad_norm: 0.9291941688943255, iteration: 56223
loss: 0.9778054356575012,grad_norm: 0.9999990919973557, iteration: 56224
loss: 1.0045394897460938,grad_norm: 0.9349759593237303, iteration: 56225
loss: 1.0185548067092896,grad_norm: 0.9999991067370586, iteration: 56226
loss: 1.1091903448104858,grad_norm: 0.9999995264703778, iteration: 56227
loss: 1.0321025848388672,grad_norm: 0.99999920457825, iteration: 56228
loss: 1.1294455528259277,grad_norm: 0.9999992110655529, iteration: 56229
loss: 0.9732690453529358,grad_norm: 0.9999993324569618, iteration: 56230
loss: 1.017324686050415,grad_norm: 0.8601495959569386, iteration: 56231
loss: 1.0496492385864258,grad_norm: 0.9999992238694042, iteration: 56232
loss: 1.026382565498352,grad_norm: 0.9999996629942723, iteration: 56233
loss: 0.9725918769836426,grad_norm: 0.9999990805027079, iteration: 56234
loss: 1.0610045194625854,grad_norm: 0.9999999221881646, iteration: 56235
loss: 1.0422559976577759,grad_norm: 0.9759839013343395, iteration: 56236
loss: 1.0283621549606323,grad_norm: 0.9999993884930112, iteration: 56237
loss: 1.0212243795394897,grad_norm: 0.9999997429850574, iteration: 56238
loss: 1.007289171218872,grad_norm: 0.9999992816829879, iteration: 56239
loss: 1.001912236213684,grad_norm: 0.9040966995189158, iteration: 56240
loss: 0.9622406959533691,grad_norm: 0.9999990089746909, iteration: 56241
loss: 1.0489506721496582,grad_norm: 0.9999991185849632, iteration: 56242
loss: 0.9950510859489441,grad_norm: 0.9999991072404597, iteration: 56243
loss: 1.0583804845809937,grad_norm: 0.9999995025433955, iteration: 56244
loss: 0.9771687984466553,grad_norm: 0.8453659343740553, iteration: 56245
loss: 1.003259539604187,grad_norm: 0.8804749414550692, iteration: 56246
loss: 0.9551853537559509,grad_norm: 0.906782213487691, iteration: 56247
loss: 0.9836353063583374,grad_norm: 0.9510128199407183, iteration: 56248
loss: 1.01288640499115,grad_norm: 0.9322685105137595, iteration: 56249
loss: 1.1716214418411255,grad_norm: 0.9999996206330524, iteration: 56250
loss: 1.1235517263412476,grad_norm: 0.9999997556844697, iteration: 56251
loss: 1.0462616682052612,grad_norm: 0.9999991671465985, iteration: 56252
loss: 1.1167266368865967,grad_norm: 0.9999997280111254, iteration: 56253
loss: 1.0092545747756958,grad_norm: 0.9999998741440821, iteration: 56254
loss: 1.0341566801071167,grad_norm: 0.9207665428699577, iteration: 56255
loss: 1.0116020441055298,grad_norm: 0.9999990907700322, iteration: 56256
loss: 1.011172890663147,grad_norm: 0.9177139365454707, iteration: 56257
loss: 1.0626251697540283,grad_norm: 0.999999825698925, iteration: 56258
loss: 1.2289284467697144,grad_norm: 0.999999350645731, iteration: 56259
loss: 0.998803973197937,grad_norm: 0.8409831206101226, iteration: 56260
loss: 0.9809117913246155,grad_norm: 0.9093559038119663, iteration: 56261
loss: 1.0310355424880981,grad_norm: 0.9999991343093144, iteration: 56262
loss: 1.0070679187774658,grad_norm: 0.8394404941990571, iteration: 56263
loss: 1.0323306322097778,grad_norm: 0.862540355472193, iteration: 56264
loss: 0.9782172441482544,grad_norm: 0.9645172478276158, iteration: 56265
loss: 1.0442769527435303,grad_norm: 0.9999995086566538, iteration: 56266
loss: 1.0210518836975098,grad_norm: 0.9999997983904072, iteration: 56267
loss: 1.0166325569152832,grad_norm: 0.8119053593535397, iteration: 56268
loss: 1.1727440357208252,grad_norm: 0.9999998483217681, iteration: 56269
loss: 1.0419055223464966,grad_norm: 0.9855497805787875, iteration: 56270
loss: 1.0207637548446655,grad_norm: 0.9506725841836822, iteration: 56271
loss: 1.0137569904327393,grad_norm: 0.9147436519487474, iteration: 56272
loss: 0.9734600186347961,grad_norm: 0.9124012681597132, iteration: 56273
loss: 0.9987251162528992,grad_norm: 0.9999992312604086, iteration: 56274
loss: 1.1065562963485718,grad_norm: 0.9999997674278693, iteration: 56275
loss: 1.0211896896362305,grad_norm: 0.9999990888280776, iteration: 56276
loss: 1.180108904838562,grad_norm: 0.9999995841344389, iteration: 56277
loss: 1.1672414541244507,grad_norm: 0.9999997986765994, iteration: 56278
loss: 1.0731661319732666,grad_norm: 0.9999994773692817, iteration: 56279
loss: 1.1704148054122925,grad_norm: 0.9999996166821069, iteration: 56280
loss: 0.9999365210533142,grad_norm: 0.9999996824614743, iteration: 56281
loss: 1.0034111738204956,grad_norm: 0.9404691149503225, iteration: 56282
loss: 1.0371590852737427,grad_norm: 0.9999991387299942, iteration: 56283
loss: 1.0231308937072754,grad_norm: 0.9999995417774296, iteration: 56284
loss: 1.00034499168396,grad_norm: 0.9999991525129397, iteration: 56285
loss: 0.9979045987129211,grad_norm: 0.9999998307983136, iteration: 56286
loss: 0.9942553639411926,grad_norm: 0.9045860638546959, iteration: 56287
loss: 1.0008529424667358,grad_norm: 0.9999991173074921, iteration: 56288
loss: 0.9876322746276855,grad_norm: 0.8978413917692178, iteration: 56289
loss: 1.0033972263336182,grad_norm: 0.9999989531401751, iteration: 56290
loss: 1.1053016185760498,grad_norm: 0.9999994974623871, iteration: 56291
loss: 0.9738091826438904,grad_norm: 0.9999991257454013, iteration: 56292
loss: 1.0056922435760498,grad_norm: 0.9999994710601843, iteration: 56293
loss: 1.014530062675476,grad_norm: 0.9999995905329425, iteration: 56294
loss: 1.048783779144287,grad_norm: 0.9999992313634555, iteration: 56295
loss: 1.3379478454589844,grad_norm: 0.9999995720942295, iteration: 56296
loss: 1.1018403768539429,grad_norm: 0.9999996535747211, iteration: 56297
loss: 1.0251903533935547,grad_norm: 0.9999994747789641, iteration: 56298
loss: 0.9857354760169983,grad_norm: 0.9999993526697079, iteration: 56299
loss: 1.005519151687622,grad_norm: 0.9999991243892777, iteration: 56300
loss: 1.021568775177002,grad_norm: 0.9999991732913992, iteration: 56301
loss: 0.994190514087677,grad_norm: 0.9999991838834691, iteration: 56302
loss: 1.0167269706726074,grad_norm: 0.9999993461009236, iteration: 56303
loss: 1.0134228467941284,grad_norm: 0.9999991759195438, iteration: 56304
loss: 0.9341381192207336,grad_norm: 0.9999992409586913, iteration: 56305
loss: 0.9718236923217773,grad_norm: 0.9502847014387183, iteration: 56306
loss: 1.012290596961975,grad_norm: 0.9518398231579405, iteration: 56307
loss: 1.0277198553085327,grad_norm: 0.9999990772616902, iteration: 56308
loss: 0.9925442337989807,grad_norm: 0.8335373716526825, iteration: 56309
loss: 1.0028935670852661,grad_norm: 0.9999990533357676, iteration: 56310
loss: 0.9701573252677917,grad_norm: 0.9999991288883593, iteration: 56311
loss: 1.0191007852554321,grad_norm: 0.9999990349332342, iteration: 56312
loss: 0.9889300465583801,grad_norm: 0.9999992855361283, iteration: 56313
loss: 0.9931220412254333,grad_norm: 0.999999127053729, iteration: 56314
loss: 0.992952823638916,grad_norm: 0.9022771947869833, iteration: 56315
loss: 1.0544153451919556,grad_norm: 0.9999997905315064, iteration: 56316
loss: 0.9699919819831848,grad_norm: 0.8366901372118378, iteration: 56317
loss: 1.0042905807495117,grad_norm: 0.887034666318156, iteration: 56318
loss: 1.0411646366119385,grad_norm: 0.9999996678684837, iteration: 56319
loss: 1.0051395893096924,grad_norm: 0.8300125716013372, iteration: 56320
loss: 0.9981626868247986,grad_norm: 0.7823579780940711, iteration: 56321
loss: 1.0686511993408203,grad_norm: 0.9999996919781313, iteration: 56322
loss: 1.0049891471862793,grad_norm: 0.8472370910040707, iteration: 56323
loss: 1.0151677131652832,grad_norm: 0.9999994177587678, iteration: 56324
loss: 0.9882806539535522,grad_norm: 0.8988882152224221, iteration: 56325
loss: 1.0472385883331299,grad_norm: 0.9999993402439532, iteration: 56326
loss: 1.0364806652069092,grad_norm: 0.9999990557351999, iteration: 56327
loss: 0.9767504930496216,grad_norm: 0.8921675027912757, iteration: 56328
loss: 1.014593243598938,grad_norm: 0.862134812432323, iteration: 56329
loss: 1.0062792301177979,grad_norm: 0.9999992538890066, iteration: 56330
loss: 0.9961137771606445,grad_norm: 0.801269166202505, iteration: 56331
loss: 1.0190205574035645,grad_norm: 0.9999992348619503, iteration: 56332
loss: 1.0137335062026978,grad_norm: 0.867682807580669, iteration: 56333
loss: 0.99224853515625,grad_norm: 0.9755856459718315, iteration: 56334
loss: 0.9800255298614502,grad_norm: 0.8698460109735132, iteration: 56335
loss: 1.037643551826477,grad_norm: 0.9999995341822077, iteration: 56336
loss: 1.0453046560287476,grad_norm: 0.999999584240866, iteration: 56337
loss: 1.0486736297607422,grad_norm: 0.9999991909366786, iteration: 56338
loss: 1.022688627243042,grad_norm: 0.9999991816405832, iteration: 56339
loss: 1.0179120302200317,grad_norm: 0.9342818677059301, iteration: 56340
loss: 1.1237856149673462,grad_norm: 0.9999999062579746, iteration: 56341
loss: 1.0027809143066406,grad_norm: 0.9999990457250794, iteration: 56342
loss: 1.0110951662063599,grad_norm: 0.9999995984966569, iteration: 56343
loss: 1.0008405447006226,grad_norm: 0.9169981161304215, iteration: 56344
loss: 1.0403730869293213,grad_norm: 0.9999990707381432, iteration: 56345
loss: 1.048247218132019,grad_norm: 0.9999991352735073, iteration: 56346
loss: 1.1537679433822632,grad_norm: 0.9999995434467905, iteration: 56347
loss: 1.0062378644943237,grad_norm: 0.9999994129038465, iteration: 56348
loss: 1.0077465772628784,grad_norm: 0.9026928832066836, iteration: 56349
loss: 1.0404636859893799,grad_norm: 0.9999993220697008, iteration: 56350
loss: 1.020735502243042,grad_norm: 0.9999999172054872, iteration: 56351
loss: 1.0117908716201782,grad_norm: 0.9757420102117441, iteration: 56352
loss: 1.0358446836471558,grad_norm: 0.8618407433198084, iteration: 56353
loss: 1.0471848249435425,grad_norm: 0.9999998282576812, iteration: 56354
loss: 1.0125614404678345,grad_norm: 0.9999991612017934, iteration: 56355
loss: 1.0483721494674683,grad_norm: 0.9999999613220362, iteration: 56356
loss: 1.0002760887145996,grad_norm: 0.9542280686253785, iteration: 56357
loss: 0.9553402066230774,grad_norm: 0.9999989843683513, iteration: 56358
loss: 1.1059257984161377,grad_norm: 0.9705289901056426, iteration: 56359
loss: 1.0120826959609985,grad_norm: 0.9999990736534916, iteration: 56360
loss: 1.0159916877746582,grad_norm: 0.9320461056690843, iteration: 56361
loss: 1.0957618951797485,grad_norm: 0.9999993680383796, iteration: 56362
loss: 1.1034828424453735,grad_norm: 0.9999990413230465, iteration: 56363
loss: 1.0711787939071655,grad_norm: 0.9999995072079433, iteration: 56364
loss: 1.0498392581939697,grad_norm: 0.9999992528433006, iteration: 56365
loss: 0.9902463555335999,grad_norm: 0.9999991846088325, iteration: 56366
loss: 1.1705294847488403,grad_norm: 0.999999361666901, iteration: 56367
loss: 1.3912283182144165,grad_norm: 0.9999998653770754, iteration: 56368
loss: 1.4933464527130127,grad_norm: 0.9999995982753027, iteration: 56369
loss: 1.376600980758667,grad_norm: 0.9999998954507942, iteration: 56370
loss: 1.0537453889846802,grad_norm: 0.999999040382, iteration: 56371
loss: 1.1709673404693604,grad_norm: 0.9999991465955632, iteration: 56372
loss: 1.0742298364639282,grad_norm: 0.9999991950793875, iteration: 56373
loss: 1.1696852445602417,grad_norm: 0.9999993954958513, iteration: 56374
loss: 1.1185557842254639,grad_norm: 0.9999999344554606, iteration: 56375
loss: 1.1207284927368164,grad_norm: 0.9999994945996254, iteration: 56376
loss: 1.162007451057434,grad_norm: 0.999999483401791, iteration: 56377
loss: 1.3521606922149658,grad_norm: 0.9999998117387532, iteration: 56378
loss: 1.0484941005706787,grad_norm: 0.9999991523512708, iteration: 56379
loss: 1.2973566055297852,grad_norm: 0.999999780129503, iteration: 56380
loss: 1.0527207851409912,grad_norm: 0.9999994368908175, iteration: 56381
loss: 1.2680789232254028,grad_norm: 0.9999994375376865, iteration: 56382
loss: 1.0265229940414429,grad_norm: 0.9999991598562636, iteration: 56383
loss: 0.9825824499130249,grad_norm: 0.9999989710908135, iteration: 56384
loss: 1.0073078870773315,grad_norm: 0.9999991068443556, iteration: 56385
loss: 0.9781484603881836,grad_norm: 0.8500340574167679, iteration: 56386
loss: 1.0066297054290771,grad_norm: 0.9789679488256232, iteration: 56387
loss: 1.003739356994629,grad_norm: 0.9999992731978214, iteration: 56388
loss: 1.1227664947509766,grad_norm: 0.9999996767672741, iteration: 56389
loss: 1.0230549573898315,grad_norm: 0.9571620600011972, iteration: 56390
loss: 0.9929792881011963,grad_norm: 0.8366384636449239, iteration: 56391
loss: 1.0057541131973267,grad_norm: 0.9780951110048274, iteration: 56392
loss: 1.010308861732483,grad_norm: 0.9999991788811776, iteration: 56393
loss: 1.0403566360473633,grad_norm: 0.999999303570055, iteration: 56394
loss: 1.00479257106781,grad_norm: 0.9999993203272202, iteration: 56395
loss: 0.9796341061592102,grad_norm: 0.9999989975024849, iteration: 56396
loss: 1.1050430536270142,grad_norm: 0.9999991347639682, iteration: 56397
loss: 0.9920480847358704,grad_norm: 0.8694674735780901, iteration: 56398
loss: 0.9777539372444153,grad_norm: 0.9999992213691186, iteration: 56399
loss: 0.9780892133712769,grad_norm: 0.8744680917722345, iteration: 56400
loss: 0.9799644947052002,grad_norm: 0.9960119210311333, iteration: 56401
loss: 0.9577925205230713,grad_norm: 0.995542199035808, iteration: 56402
loss: 1.0181089639663696,grad_norm: 0.9071839303102595, iteration: 56403
loss: 1.0079506635665894,grad_norm: 0.9999994907532207, iteration: 56404
loss: 0.9815210700035095,grad_norm: 0.8884270253061887, iteration: 56405
loss: 1.0136979818344116,grad_norm: 0.9166366001539623, iteration: 56406
loss: 1.0016599893569946,grad_norm: 0.9999995054835511, iteration: 56407
loss: 0.9828174710273743,grad_norm: 0.9830996620147303, iteration: 56408
loss: 1.0120947360992432,grad_norm: 0.9999990514668291, iteration: 56409
loss: 0.9701634645462036,grad_norm: 0.8760064995448054, iteration: 56410
loss: 0.970963180065155,grad_norm: 0.8500414820216499, iteration: 56411
loss: 1.0058400630950928,grad_norm: 0.9404101828914364, iteration: 56412
loss: 0.9662050604820251,grad_norm: 0.9999992529115114, iteration: 56413
loss: 1.0057452917099,grad_norm: 0.8847594669399713, iteration: 56414
loss: 1.0020767450332642,grad_norm: 0.9292780072092215, iteration: 56415
loss: 0.9901131987571716,grad_norm: 0.835718459246354, iteration: 56416
loss: 1.02834153175354,grad_norm: 0.9999996888903552, iteration: 56417
loss: 1.0034250020980835,grad_norm: 0.9738720178510214, iteration: 56418
loss: 0.980542004108429,grad_norm: 0.9614157688351517, iteration: 56419
loss: 0.9741113781929016,grad_norm: 0.9400697250451171, iteration: 56420
loss: 1.0252524614334106,grad_norm: 0.9999990435092426, iteration: 56421
loss: 1.066296100616455,grad_norm: 0.9999993723364576, iteration: 56422
loss: 1.0536422729492188,grad_norm: 0.9999992899253157, iteration: 56423
loss: 1.009553074836731,grad_norm: 0.9884105156717576, iteration: 56424
loss: 1.008022665977478,grad_norm: 0.9740252073003463, iteration: 56425
loss: 0.9921497702598572,grad_norm: 0.9999992300808233, iteration: 56426
loss: 0.9966238141059875,grad_norm: 0.9999996924276575, iteration: 56427
loss: 1.036812663078308,grad_norm: 0.9999990553986885, iteration: 56428
loss: 1.0016758441925049,grad_norm: 0.9999994742141882, iteration: 56429
loss: 0.9857755303382874,grad_norm: 0.837312971370798, iteration: 56430
loss: 0.992489218711853,grad_norm: 0.9999990474439752, iteration: 56431
loss: 1.0358048677444458,grad_norm: 0.9999991013839772, iteration: 56432
loss: 1.010116457939148,grad_norm: 0.9999989342618971, iteration: 56433
loss: 0.9816697239875793,grad_norm: 0.8939290015588988, iteration: 56434
loss: 1.0165132284164429,grad_norm: 0.9983254924762155, iteration: 56435
loss: 0.9958926439285278,grad_norm: 0.999999228521568, iteration: 56436
loss: 0.9785028696060181,grad_norm: 0.8628476910111771, iteration: 56437
loss: 1.058327317237854,grad_norm: 0.9172178062785955, iteration: 56438
loss: 0.9899938106536865,grad_norm: 0.9358967961635325, iteration: 56439
loss: 0.9994989633560181,grad_norm: 0.9999990824539882, iteration: 56440
loss: 1.067243218421936,grad_norm: 0.9999998524276709, iteration: 56441
loss: 0.9891143441200256,grad_norm: 0.9999989961113638, iteration: 56442
loss: 0.9927982091903687,grad_norm: 0.9988071624133846, iteration: 56443
loss: 0.9598203897476196,grad_norm: 0.8542526846446704, iteration: 56444
loss: 1.0652841329574585,grad_norm: 0.9999993077804715, iteration: 56445
loss: 1.0193372964859009,grad_norm: 0.9001980022427992, iteration: 56446
loss: 1.0214036703109741,grad_norm: 0.8832613501467336, iteration: 56447
loss: 0.9952746629714966,grad_norm: 0.9999991902476368, iteration: 56448
loss: 0.9697017073631287,grad_norm: 0.9999993548861198, iteration: 56449
loss: 1.1811766624450684,grad_norm: 0.9999992949978292, iteration: 56450
loss: 1.0413228273391724,grad_norm: 1.0000000041527497, iteration: 56451
loss: 0.9896168112754822,grad_norm: 0.9999990144550791, iteration: 56452
loss: 0.9893782734870911,grad_norm: 0.9708287230816735, iteration: 56453
loss: 1.029359221458435,grad_norm: 0.9999991244724595, iteration: 56454
loss: 1.0132150650024414,grad_norm: 0.9999991630764956, iteration: 56455
loss: 0.9857048392295837,grad_norm: 0.8956571485830914, iteration: 56456
loss: 1.0043469667434692,grad_norm: 0.8169568557785816, iteration: 56457
loss: 1.0270850658416748,grad_norm: 0.8390042520343979, iteration: 56458
loss: 0.9865330457687378,grad_norm: 0.9518950164190592, iteration: 56459
loss: 1.0230408906936646,grad_norm: 0.8291888974398144, iteration: 56460
loss: 1.0697495937347412,grad_norm: 0.897374990018719, iteration: 56461
loss: 1.0287164449691772,grad_norm: 0.9498592113841918, iteration: 56462
loss: 0.9957765936851501,grad_norm: 0.9141317715585037, iteration: 56463
loss: 1.0479252338409424,grad_norm: 0.9999998689567132, iteration: 56464
loss: 1.0356085300445557,grad_norm: 0.9999992266905618, iteration: 56465
loss: 1.0830533504486084,grad_norm: 0.9999995229595101, iteration: 56466
loss: 0.9685591459274292,grad_norm: 0.9999991691467368, iteration: 56467
loss: 0.992909848690033,grad_norm: 0.9290321023579642, iteration: 56468
loss: 1.0986531972885132,grad_norm: 0.8620292496650374, iteration: 56469
loss: 0.9901943206787109,grad_norm: 0.8722823884175626, iteration: 56470
loss: 1.062272548675537,grad_norm: 0.9538671087863649, iteration: 56471
loss: 0.9889006018638611,grad_norm: 0.7926946182481155, iteration: 56472
loss: 1.1163272857666016,grad_norm: 0.9999996834337062, iteration: 56473
loss: 0.9977667331695557,grad_norm: 0.9999989772453474, iteration: 56474
loss: 0.9866071343421936,grad_norm: 0.9845104064269987, iteration: 56475
loss: 1.0107653141021729,grad_norm: 0.9999989877079336, iteration: 56476
loss: 1.0017951726913452,grad_norm: 0.9999991937640255, iteration: 56477
loss: 1.023559331893921,grad_norm: 0.880050101501299, iteration: 56478
loss: 1.0438587665557861,grad_norm: 0.9583844129614382, iteration: 56479
loss: 1.007037878036499,grad_norm: 0.8846619737966688, iteration: 56480
loss: 1.042893886566162,grad_norm: 0.9999991377010569, iteration: 56481
loss: 1.1546412706375122,grad_norm: 0.9999994164019329, iteration: 56482
loss: 1.1029964685440063,grad_norm: 0.9999994528099537, iteration: 56483
loss: 1.2462230920791626,grad_norm: 0.9999997055748077, iteration: 56484
loss: 1.102318525314331,grad_norm: 0.8978907702356623, iteration: 56485
loss: 1.0578813552856445,grad_norm: 0.8430859543566847, iteration: 56486
loss: 1.0582866668701172,grad_norm: 0.9838440261426655, iteration: 56487
loss: 2.008021116256714,grad_norm: 0.999999848658939, iteration: 56488
loss: 1.1940577030181885,grad_norm: 0.9999995259530112, iteration: 56489
loss: 1.5924131870269775,grad_norm: 0.9999998935467839, iteration: 56490
loss: 1.2278921604156494,grad_norm: 0.9999997775120262, iteration: 56491
loss: 1.6662204265594482,grad_norm: 0.9999997168955305, iteration: 56492
loss: 1.380514144897461,grad_norm: 0.99999993136406, iteration: 56493
loss: 1.4166966676712036,grad_norm: 0.9999999218800737, iteration: 56494
loss: 1.5485656261444092,grad_norm: 0.9999998542676971, iteration: 56495
loss: 1.2839783430099487,grad_norm: 0.9999994157971362, iteration: 56496
loss: 1.2959336042404175,grad_norm: 0.9999999747091654, iteration: 56497
loss: 1.5204294919967651,grad_norm: 0.9999999831756483, iteration: 56498
loss: 1.3032313585281372,grad_norm: 0.9999997748283457, iteration: 56499
loss: 1.6181339025497437,grad_norm: 0.999999730083872, iteration: 56500
loss: 1.3732258081436157,grad_norm: 0.999999875666475, iteration: 56501
loss: 1.4957456588745117,grad_norm: 0.9999998380715964, iteration: 56502
loss: 1.342443823814392,grad_norm: 0.9999996168575006, iteration: 56503
loss: 1.1614986658096313,grad_norm: 0.9999995034209432, iteration: 56504
loss: 1.6065210103988647,grad_norm: 0.9999999030839227, iteration: 56505
loss: 1.2923787832260132,grad_norm: 0.999999779772886, iteration: 56506
loss: 1.3719608783721924,grad_norm: 0.9999998861303315, iteration: 56507
loss: 1.4352701902389526,grad_norm: 0.9999995999132635, iteration: 56508
loss: 1.242737889289856,grad_norm: 0.9999993809824741, iteration: 56509
loss: 1.501932978630066,grad_norm: 0.9999993953111518, iteration: 56510
loss: 1.3673648834228516,grad_norm: 0.9999997070073146, iteration: 56511
loss: 1.2159144878387451,grad_norm: 0.9999997211984659, iteration: 56512
loss: 1.217782735824585,grad_norm: 0.9999995620308956, iteration: 56513
loss: 1.5081290006637573,grad_norm: 0.9999998869313129, iteration: 56514
loss: 1.2570916414260864,grad_norm: 0.9999995584703048, iteration: 56515
loss: 1.3119819164276123,grad_norm: 0.9999997440017153, iteration: 56516
loss: 1.166913628578186,grad_norm: 0.9999996214653981, iteration: 56517
loss: 1.2489114999771118,grad_norm: 0.9999997178889571, iteration: 56518
loss: 1.1469159126281738,grad_norm: 0.999999849076062, iteration: 56519
loss: 1.526572346687317,grad_norm: 0.9999996148710026, iteration: 56520
loss: 1.3505719900131226,grad_norm: 0.9999997531573881, iteration: 56521
loss: 1.3925111293792725,grad_norm: 0.9999998501058769, iteration: 56522
loss: 1.2859195470809937,grad_norm: 0.999999764791968, iteration: 56523
loss: 1.119597315788269,grad_norm: 0.9999999480862264, iteration: 56524
loss: 1.1381617784500122,grad_norm: 0.9999998977851149, iteration: 56525
loss: 1.1563771963119507,grad_norm: 0.9999999631862805, iteration: 56526
loss: 1.1563880443572998,grad_norm: 0.9999995406352762, iteration: 56527
loss: 1.6530025005340576,grad_norm: 0.9999996879440579, iteration: 56528
loss: 1.0055255889892578,grad_norm: 0.999999360873411, iteration: 56529
loss: 1.0996745824813843,grad_norm: 0.999999136032617, iteration: 56530
loss: 1.1304785013198853,grad_norm: 0.9999996422174874, iteration: 56531
loss: 1.210663914680481,grad_norm: 0.9999996606924262, iteration: 56532
loss: 1.167580008506775,grad_norm: 0.9999995533521442, iteration: 56533
loss: 1.0808980464935303,grad_norm: 0.9999994157389009, iteration: 56534
loss: 1.0552479028701782,grad_norm: 0.8856590015306435, iteration: 56535
loss: 1.216935396194458,grad_norm: 0.9999992745855939, iteration: 56536
loss: 1.1840574741363525,grad_norm: 0.9999998885696746, iteration: 56537
loss: 1.2128084897994995,grad_norm: 0.9999997285258321, iteration: 56538
loss: 1.1600309610366821,grad_norm: 0.9999996066571374, iteration: 56539
loss: 1.0638506412506104,grad_norm: 0.9999992809967926, iteration: 56540
loss: 1.2501869201660156,grad_norm: 0.9999995440333271, iteration: 56541
loss: 1.0616012811660767,grad_norm: 0.9999997299357845, iteration: 56542
loss: 1.1515010595321655,grad_norm: 0.9999993863565064, iteration: 56543
loss: 1.261813998222351,grad_norm: 0.9999991034326673, iteration: 56544
loss: 1.167451024055481,grad_norm: 0.9999995046030773, iteration: 56545
loss: 1.1469444036483765,grad_norm: 0.9999997096957316, iteration: 56546
loss: 1.0544085502624512,grad_norm: 0.9999989797926887, iteration: 56547
loss: 1.2593656778335571,grad_norm: 0.9999995255966075, iteration: 56548
loss: 1.3981633186340332,grad_norm: 0.9999995734570587, iteration: 56549
loss: 1.1548279523849487,grad_norm: 0.9999997220037995, iteration: 56550
loss: 1.1715772151947021,grad_norm: 0.9999998928112398, iteration: 56551
loss: 1.2422488927841187,grad_norm: 0.9999996009781195, iteration: 56552
loss: 1.0723903179168701,grad_norm: 0.9999992168055567, iteration: 56553
loss: 1.3641841411590576,grad_norm: 0.999999808285537, iteration: 56554
loss: 1.0647944211959839,grad_norm: 0.9999992288367937, iteration: 56555
loss: 1.2843714952468872,grad_norm: 0.999999708941284, iteration: 56556
loss: 1.2982962131500244,grad_norm: 0.9999999175642547, iteration: 56557
loss: 1.2319170236587524,grad_norm: 0.999999408612393, iteration: 56558
loss: 1.1263673305511475,grad_norm: 0.9999994295899006, iteration: 56559
loss: 1.0803613662719727,grad_norm: 0.9999991790940537, iteration: 56560
loss: 1.1084773540496826,grad_norm: 0.9999995558527209, iteration: 56561
loss: 1.1222219467163086,grad_norm: 0.9999993019274308, iteration: 56562
loss: 1.0724493265151978,grad_norm: 0.9999995232297634, iteration: 56563
loss: 1.2613815069198608,grad_norm: 0.9999994997944861, iteration: 56564
loss: 1.058608889579773,grad_norm: 0.897777050650149, iteration: 56565
loss: 1.1790047883987427,grad_norm: 0.9999995775325572, iteration: 56566
loss: 1.3253860473632812,grad_norm: 0.9999994795473759, iteration: 56567
loss: 1.0541203022003174,grad_norm: 0.9999996020236971, iteration: 56568
loss: 1.2059382200241089,grad_norm: 0.9999994779688542, iteration: 56569
loss: 1.0670825242996216,grad_norm: 0.9999996603516694, iteration: 56570
loss: 1.1209049224853516,grad_norm: 0.9999996051061881, iteration: 56571
loss: 1.2054139375686646,grad_norm: 0.9999996798170138, iteration: 56572
loss: 1.4267040491104126,grad_norm: 0.9999997195278997, iteration: 56573
loss: 1.0827206373214722,grad_norm: 0.9999995093154995, iteration: 56574
loss: 1.3235431909561157,grad_norm: 0.999999785630005, iteration: 56575
loss: 1.2993078231811523,grad_norm: 0.9999996970209303, iteration: 56576
loss: 1.3334912061691284,grad_norm: 0.9999996806391287, iteration: 56577
loss: 0.9963321685791016,grad_norm: 0.9999993611975909, iteration: 56578
loss: 1.33311927318573,grad_norm: 0.9999996954361188, iteration: 56579
loss: 1.4087399244308472,grad_norm: 0.9999995287294439, iteration: 56580
loss: 1.06727135181427,grad_norm: 0.9999996523465429, iteration: 56581
loss: 1.0550568103790283,grad_norm: 0.9999999391600929, iteration: 56582
loss: 1.231467366218567,grad_norm: 0.9999994472550868, iteration: 56583
loss: 1.2857316732406616,grad_norm: 0.9999998987862302, iteration: 56584
loss: 1.086176872253418,grad_norm: 0.9999997969567092, iteration: 56585
loss: 1.3103530406951904,grad_norm: 0.9999997956565525, iteration: 56586
loss: 1.3389829397201538,grad_norm: 0.999999853985315, iteration: 56587
loss: 1.4915217161178589,grad_norm: 0.9999997692195745, iteration: 56588
loss: 1.328371524810791,grad_norm: 0.9999997611443762, iteration: 56589
loss: 1.1987502574920654,grad_norm: 0.9999997723128949, iteration: 56590
loss: 1.2625820636749268,grad_norm: 0.9999995329987508, iteration: 56591
loss: 1.183329701423645,grad_norm: 0.9999995337265556, iteration: 56592
loss: 1.3873106241226196,grad_norm: 0.9999995801577711, iteration: 56593
loss: 1.3283661603927612,grad_norm: 0.9999998201194507, iteration: 56594
loss: 1.2846379280090332,grad_norm: 0.9999995008947472, iteration: 56595
loss: 1.277286171913147,grad_norm: 0.9999997222003516, iteration: 56596
loss: 1.198584794998169,grad_norm: 0.9999998323590611, iteration: 56597
loss: 1.586485743522644,grad_norm: 0.9999999094834239, iteration: 56598
loss: 1.491658091545105,grad_norm: 0.9999998598703455, iteration: 56599
loss: 1.1513177156448364,grad_norm: 0.9999995280167342, iteration: 56600
loss: 1.1286654472351074,grad_norm: 0.9999998148440626, iteration: 56601
loss: 1.083519458770752,grad_norm: 0.9999995329191508, iteration: 56602
loss: 1.1545281410217285,grad_norm: 0.9999992722533716, iteration: 56603
loss: 1.2211095094680786,grad_norm: 0.9999996597184044, iteration: 56604
loss: 1.1055554151535034,grad_norm: 0.9999992213415768, iteration: 56605
loss: 1.3363111019134521,grad_norm: 0.9999996624023875, iteration: 56606
loss: 1.17326819896698,grad_norm: 0.9999994301017633, iteration: 56607
loss: 1.3790342807769775,grad_norm: 0.999999704543338, iteration: 56608
loss: 1.225171446800232,grad_norm: 0.9999992844393725, iteration: 56609
loss: 1.3417880535125732,grad_norm: 0.999999845433292, iteration: 56610
loss: 1.192226767539978,grad_norm: 0.9999996180335534, iteration: 56611
loss: 1.1076334714889526,grad_norm: 0.9999995124440499, iteration: 56612
loss: 1.0858505964279175,grad_norm: 0.9999995053200808, iteration: 56613
loss: 1.156063437461853,grad_norm: 0.9999999183374325, iteration: 56614
loss: 1.0831303596496582,grad_norm: 0.9999996697534151, iteration: 56615
loss: 1.0466805696487427,grad_norm: 0.999999513050862, iteration: 56616
loss: 1.124666690826416,grad_norm: 0.9999993412160783, iteration: 56617
loss: 1.270875334739685,grad_norm: 0.9999998038247951, iteration: 56618
loss: 1.2314525842666626,grad_norm: 0.9999994023220615, iteration: 56619
loss: 1.0837615728378296,grad_norm: 0.9999995011509498, iteration: 56620
loss: 1.189650535583496,grad_norm: 0.9999996381118045, iteration: 56621
loss: 1.086194634437561,grad_norm: 0.9999995827911681, iteration: 56622
loss: 1.2111736536026,grad_norm: 0.9999998660358866, iteration: 56623
loss: 1.178285837173462,grad_norm: 0.9999999298431124, iteration: 56624
loss: 1.1113636493682861,grad_norm: 0.9999993013772593, iteration: 56625
loss: 1.2426172494888306,grad_norm: 0.9999997435622983, iteration: 56626
loss: 1.1730875968933105,grad_norm: 0.9999998145409056, iteration: 56627
loss: 1.1614248752593994,grad_norm: 0.9999999078735174, iteration: 56628
loss: 1.4335788488388062,grad_norm: 0.9999998320039758, iteration: 56629
loss: 1.2650232315063477,grad_norm: 0.9999996242617217, iteration: 56630
loss: 1.1917158365249634,grad_norm: 0.9999997913012831, iteration: 56631
loss: 1.2028883695602417,grad_norm: 0.9999995368693033, iteration: 56632
loss: 1.1965837478637695,grad_norm: 0.9999997137202252, iteration: 56633
loss: 1.1435147523880005,grad_norm: 0.9999994742944903, iteration: 56634
loss: 1.1514670848846436,grad_norm: 0.9999994450705322, iteration: 56635
loss: 1.008756399154663,grad_norm: 0.9999996631503401, iteration: 56636
loss: 1.3181726932525635,grad_norm: 0.9999995495712791, iteration: 56637
loss: 1.2266559600830078,grad_norm: 0.9999999053140646, iteration: 56638
loss: 1.1765810251235962,grad_norm: 0.9999993461107961, iteration: 56639
loss: 1.177569031715393,grad_norm: 0.9999997390186589, iteration: 56640
loss: 1.2538795471191406,grad_norm: 0.9999996613127645, iteration: 56641
loss: 1.1649760007858276,grad_norm: 0.9999999902179155, iteration: 56642
loss: 1.1381653547286987,grad_norm: 0.9999997872597557, iteration: 56643
loss: 1.1980012655258179,grad_norm: 0.999999973678323, iteration: 56644
loss: 1.4657609462738037,grad_norm: 0.9999999719717404, iteration: 56645
loss: 1.2585760354995728,grad_norm: 0.9999998206654062, iteration: 56646
loss: 1.2453737258911133,grad_norm: 0.9999998978962614, iteration: 56647
loss: 1.2750576734542847,grad_norm: 0.9999998371336023, iteration: 56648
loss: 1.287149429321289,grad_norm: 0.9999997552123391, iteration: 56649
loss: 1.1697026491165161,grad_norm: 0.9999995173134129, iteration: 56650
loss: 1.2377431392669678,grad_norm: 0.9999999326443355, iteration: 56651
loss: 1.1611360311508179,grad_norm: 0.9999998399155274, iteration: 56652
loss: 1.237449288368225,grad_norm: 0.99999994474964, iteration: 56653
loss: 1.1466282606124878,grad_norm: 0.9999998435372428, iteration: 56654
loss: 1.042037010192871,grad_norm: 0.9999997431381863, iteration: 56655
loss: 1.2371968030929565,grad_norm: 0.9999999669981084, iteration: 56656
loss: 1.165528655052185,grad_norm: 0.9999997498088217, iteration: 56657
loss: 1.2791450023651123,grad_norm: 0.9999997318495524, iteration: 56658
loss: 1.1958884000778198,grad_norm: 0.9999998777688375, iteration: 56659
loss: 1.33928644657135,grad_norm: 0.9999996133015049, iteration: 56660
loss: 1.1473287343978882,grad_norm: 0.9999993241830305, iteration: 56661
loss: 1.27130925655365,grad_norm: 0.9999997925258105, iteration: 56662
loss: 1.6241674423217773,grad_norm: 0.9999998389435854, iteration: 56663
loss: 1.1135448217391968,grad_norm: 0.9999999851127094, iteration: 56664
loss: 1.0606955289840698,grad_norm: 0.9999996846596172, iteration: 56665
loss: 1.4720159769058228,grad_norm: 0.9999998517903249, iteration: 56666
loss: 1.4745688438415527,grad_norm: 0.9999999282995433, iteration: 56667
loss: 1.7024648189544678,grad_norm: 0.9999998201271899, iteration: 56668
loss: 1.659751296043396,grad_norm: 0.9999995032818075, iteration: 56669
loss: 1.2231172323226929,grad_norm: 0.9999999523969979, iteration: 56670
loss: 1.4596632719039917,grad_norm: 1.0000000055526856, iteration: 56671
loss: 1.4781029224395752,grad_norm: 0.9999998397544402, iteration: 56672
loss: 1.5694044828414917,grad_norm: 1.0000000580289496, iteration: 56673
loss: 1.5021848678588867,grad_norm: 1.0000000059574368, iteration: 56674
loss: 1.2028664350509644,grad_norm: 0.999999730726638, iteration: 56675
loss: 1.5333151817321777,grad_norm: 0.9999999460479045, iteration: 56676
loss: 1.2497907876968384,grad_norm: 0.9999995022689908, iteration: 56677
loss: 1.197925329208374,grad_norm: 0.9999999946173579, iteration: 56678
loss: 1.2843239307403564,grad_norm: 0.9999998893042955, iteration: 56679
loss: 1.0863697528839111,grad_norm: 0.9999991955593195, iteration: 56680
loss: 1.2924792766571045,grad_norm: 1.000000002605697, iteration: 56681
loss: 1.2675023078918457,grad_norm: 0.9999998047157145, iteration: 56682
loss: 1.4200636148452759,grad_norm: 0.9999997531105185, iteration: 56683
loss: 1.0725539922714233,grad_norm: 0.9999993257009673, iteration: 56684
loss: 1.1312627792358398,grad_norm: 0.9999998888415608, iteration: 56685
loss: 1.3369446992874146,grad_norm: 0.9999997628989523, iteration: 56686
loss: 1.181231141090393,grad_norm: 0.9999996818512078, iteration: 56687
loss: 1.2410129308700562,grad_norm: 0.9999997100757666, iteration: 56688
loss: 1.1475048065185547,grad_norm: 0.999999240426822, iteration: 56689
loss: 1.204322338104248,grad_norm: 0.9999994947361784, iteration: 56690
loss: 1.1688376665115356,grad_norm: 0.9999998457969723, iteration: 56691
loss: 1.2499420642852783,grad_norm: 0.9999997982520489, iteration: 56692
loss: 1.0930233001708984,grad_norm: 0.9999991521177454, iteration: 56693
loss: 1.058764934539795,grad_norm: 0.9999992581368377, iteration: 56694
loss: 1.3195692300796509,grad_norm: 0.9999994379362266, iteration: 56695
loss: 1.1381409168243408,grad_norm: 0.9999996664574992, iteration: 56696
loss: 1.1860271692276,grad_norm: 0.9999996663788447, iteration: 56697
loss: 1.1954383850097656,grad_norm: 0.999999836800267, iteration: 56698
loss: 1.4571586847305298,grad_norm: 1.0000000075095432, iteration: 56699
loss: 1.136244773864746,grad_norm: 0.9999999377380391, iteration: 56700
loss: 1.069823980331421,grad_norm: 0.9999992510877487, iteration: 56701
loss: 1.0950144529342651,grad_norm: 0.9999995025941044, iteration: 56702
loss: 1.0370070934295654,grad_norm: 0.9999992640989334, iteration: 56703
loss: 1.1777311563491821,grad_norm: 0.9999994056183883, iteration: 56704
loss: 1.176805019378662,grad_norm: 0.9999993174685592, iteration: 56705
loss: 1.07991361618042,grad_norm: 0.999999051889203, iteration: 56706
loss: 1.0988025665283203,grad_norm: 0.999999553863352, iteration: 56707
loss: 1.0622023344039917,grad_norm: 0.9999994117142014, iteration: 56708
loss: 0.9943554997444153,grad_norm: 0.9999990794141314, iteration: 56709
loss: 1.019529104232788,grad_norm: 0.9999993906999364, iteration: 56710
loss: 1.04073965549469,grad_norm: 0.9999999490352257, iteration: 56711
loss: 1.063258409500122,grad_norm: 0.9999996557279365, iteration: 56712
loss: 1.1365864276885986,grad_norm: 0.999999723816048, iteration: 56713
loss: 1.0586349964141846,grad_norm: 0.9805089106690831, iteration: 56714
loss: 1.2692986726760864,grad_norm: 0.9999995776131442, iteration: 56715
loss: 1.0364559888839722,grad_norm: 0.9999995010303804, iteration: 56716
loss: 1.0883004665374756,grad_norm: 0.9515136439958188, iteration: 56717
loss: 1.0753557682037354,grad_norm: 0.9166824482845195, iteration: 56718
loss: 1.2110497951507568,grad_norm: 0.9999997901981288, iteration: 56719
loss: 1.0257636308670044,grad_norm: 0.9225577650935989, iteration: 56720
loss: 1.103880524635315,grad_norm: 0.9999991025023807, iteration: 56721
loss: 1.0730962753295898,grad_norm: 0.9999991676869369, iteration: 56722
loss: 1.016998529434204,grad_norm: 0.8823208027891761, iteration: 56723
loss: 1.1104236841201782,grad_norm: 0.9999992230535416, iteration: 56724
loss: 1.0292749404907227,grad_norm: 0.7598342854275163, iteration: 56725
loss: 1.0444118976593018,grad_norm: 0.9999989973049233, iteration: 56726
loss: 1.025876760482788,grad_norm: 0.999999240579253, iteration: 56727
loss: 1.1287583112716675,grad_norm: 0.9999994952367722, iteration: 56728
loss: 1.0653703212738037,grad_norm: 0.999999223018456, iteration: 56729
loss: 1.127946138381958,grad_norm: 0.9999997716806035, iteration: 56730
loss: 1.099432110786438,grad_norm: 1.0000000331590182, iteration: 56731
loss: 1.1151024103164673,grad_norm: 0.9999996829826019, iteration: 56732
loss: 0.9952504634857178,grad_norm: 0.9040005531257965, iteration: 56733
loss: 1.0035405158996582,grad_norm: 0.9999992296843918, iteration: 56734
loss: 1.0416409969329834,grad_norm: 0.9999995119346858, iteration: 56735
loss: 0.9775578379631042,grad_norm: 0.995699173891503, iteration: 56736
loss: 1.0390161275863647,grad_norm: 0.9999997427533858, iteration: 56737
loss: 1.1149961948394775,grad_norm: 0.9999996970412514, iteration: 56738
loss: 1.0494236946105957,grad_norm: 0.9532714120400249, iteration: 56739
loss: 1.0238397121429443,grad_norm: 0.8753272865839254, iteration: 56740
loss: 1.0284087657928467,grad_norm: 0.8106313048926231, iteration: 56741
loss: 1.0201666355133057,grad_norm: 0.9999991809530726, iteration: 56742
loss: 1.0122663974761963,grad_norm: 0.883926073872812, iteration: 56743
loss: 1.020663857460022,grad_norm: 0.999998989686352, iteration: 56744
loss: 0.9884931445121765,grad_norm: 0.999999261050021, iteration: 56745
loss: 0.9866624474525452,grad_norm: 0.940804399972665, iteration: 56746
loss: 0.9601367115974426,grad_norm: 0.9999990442938577, iteration: 56747
loss: 1.0058023929595947,grad_norm: 0.8881567716875605, iteration: 56748
loss: 1.0161163806915283,grad_norm: 0.963290173677624, iteration: 56749
loss: 1.0352251529693604,grad_norm: 0.9999998132503544, iteration: 56750
loss: 1.0530903339385986,grad_norm: 0.9999992300968021, iteration: 56751
loss: 1.0171343088150024,grad_norm: 0.8380755136417217, iteration: 56752
loss: 0.9977176785469055,grad_norm: 0.9999997713364815, iteration: 56753
loss: 1.014562964439392,grad_norm: 0.9663511367941706, iteration: 56754
loss: 1.001904845237732,grad_norm: 0.9999991229656698, iteration: 56755
loss: 1.0154098272323608,grad_norm: 0.9999999060023395, iteration: 56756
loss: 0.9953760504722595,grad_norm: 0.9469509593812461, iteration: 56757
loss: 1.0138627290725708,grad_norm: 0.9676709297117688, iteration: 56758
loss: 1.3288390636444092,grad_norm: 0.9999994743064734, iteration: 56759
loss: 1.0548092126846313,grad_norm: 0.9999992838801013, iteration: 56760
loss: 0.9739431738853455,grad_norm: 0.9999990013769892, iteration: 56761
loss: 1.0114948749542236,grad_norm: 0.9999993476879432, iteration: 56762
loss: 1.0470097064971924,grad_norm: 0.9999989859688727, iteration: 56763
loss: 1.093730092048645,grad_norm: 0.9999993252643502, iteration: 56764
loss: 0.9853914380073547,grad_norm: 0.9999989171209263, iteration: 56765
loss: 1.1166056394577026,grad_norm: 0.9999996378834871, iteration: 56766
loss: 1.1581430435180664,grad_norm: 0.9999993444006083, iteration: 56767
loss: 1.0589613914489746,grad_norm: 0.9999992497229311, iteration: 56768
loss: 1.0696965456008911,grad_norm: 0.999999595390671, iteration: 56769
loss: 1.0726429224014282,grad_norm: 0.9999998535795203, iteration: 56770
loss: 1.0295898914337158,grad_norm: 0.9999991583498803, iteration: 56771
loss: 0.9821733832359314,grad_norm: 0.9999990935740283, iteration: 56772
loss: 1.1785252094268799,grad_norm: 0.9999992282917658, iteration: 56773
loss: 1.0693761110305786,grad_norm: 0.9999991643435313, iteration: 56774
loss: 1.1598255634307861,grad_norm: 0.9999999603146569, iteration: 56775
loss: 1.000806450843811,grad_norm: 0.9999996531758885, iteration: 56776
loss: 0.9925742745399475,grad_norm: 0.7091397156618663, iteration: 56777
loss: 1.0129361152648926,grad_norm: 0.9477989212778963, iteration: 56778
loss: 0.9945620894432068,grad_norm: 0.9137537259780504, iteration: 56779
loss: 1.050811529159546,grad_norm: 0.9999989757765991, iteration: 56780
loss: 1.025375247001648,grad_norm: 0.9999991443226657, iteration: 56781
loss: 1.0551676750183105,grad_norm: 0.9999990913119301, iteration: 56782
loss: 1.0171699523925781,grad_norm: 0.9999991769338601, iteration: 56783
loss: 0.9902248382568359,grad_norm: 0.8918685358609774, iteration: 56784
loss: 0.9909760355949402,grad_norm: 0.9829514543731859, iteration: 56785
loss: 1.019115924835205,grad_norm: 0.9999993601627065, iteration: 56786
loss: 1.0232504606246948,grad_norm: 0.9468688156957572, iteration: 56787
loss: 1.0235395431518555,grad_norm: 0.9864126221580025, iteration: 56788
loss: 0.9958128333091736,grad_norm: 0.8600957697021983, iteration: 56789
loss: 0.9666630029678345,grad_norm: 0.891706404787036, iteration: 56790
loss: 1.026955008506775,grad_norm: 0.9202800693080645, iteration: 56791
loss: 0.9940407872200012,grad_norm: 0.8862400512092262, iteration: 56792
loss: 0.9834820032119751,grad_norm: 0.9724539899106142, iteration: 56793
loss: 0.9881523847579956,grad_norm: 0.9999995263348557, iteration: 56794
loss: 1.079598307609558,grad_norm: 0.9999996151631003, iteration: 56795
loss: 0.9896820187568665,grad_norm: 0.9974990896261386, iteration: 56796
loss: 1.0126690864562988,grad_norm: 0.999999573920949, iteration: 56797
loss: 0.9912394881248474,grad_norm: 0.9999992801155426, iteration: 56798
loss: 1.01827871799469,grad_norm: 0.9999999248006396, iteration: 56799
loss: 1.0081804990768433,grad_norm: 0.9999998524440852, iteration: 56800
loss: 1.0186331272125244,grad_norm: 0.9999990018187326, iteration: 56801
loss: 0.9751114845275879,grad_norm: 0.8594453740570032, iteration: 56802
loss: 1.168806552886963,grad_norm: 1.0000000492398682, iteration: 56803
loss: 0.9861963987350464,grad_norm: 0.9999989006227401, iteration: 56804
loss: 1.077620267868042,grad_norm: 0.9999996343395027, iteration: 56805
loss: 0.9943600296974182,grad_norm: 0.8208447403078626, iteration: 56806
loss: 1.069509744644165,grad_norm: 0.9999997753108651, iteration: 56807
loss: 0.9991232752799988,grad_norm: 0.8937394798189195, iteration: 56808
loss: 1.0578227043151855,grad_norm: 0.9999995195794045, iteration: 56809
loss: 1.0617854595184326,grad_norm: 0.9999995436380723, iteration: 56810
loss: 1.0323461294174194,grad_norm: 0.7240348752863935, iteration: 56811
loss: 1.0366401672363281,grad_norm: 0.9662386136738711, iteration: 56812
loss: 1.0359587669372559,grad_norm: 0.999999143922489, iteration: 56813
loss: 1.0261995792388916,grad_norm: 0.9999996607958616, iteration: 56814
loss: 0.9821378588676453,grad_norm: 0.9532635402701987, iteration: 56815
loss: 0.9945306181907654,grad_norm: 0.9795604928895963, iteration: 56816
loss: 1.0070663690567017,grad_norm: 0.8814944054157041, iteration: 56817
loss: 0.9760151505470276,grad_norm: 0.9999990146004574, iteration: 56818
loss: 0.9757099747657776,grad_norm: 0.86969971677856, iteration: 56819
loss: 1.08708918094635,grad_norm: 0.9999992554831217, iteration: 56820
loss: 0.9855631589889526,grad_norm: 0.8953157879039104, iteration: 56821
loss: 1.0481563806533813,grad_norm: 0.9999994633369085, iteration: 56822
loss: 1.0051708221435547,grad_norm: 0.899586097241253, iteration: 56823
loss: 0.9896187782287598,grad_norm: 0.9863003691608363, iteration: 56824
loss: 1.0940091609954834,grad_norm: 0.999999863620031, iteration: 56825
loss: 1.0173131227493286,grad_norm: 0.9999995204056217, iteration: 56826
loss: 1.0233033895492554,grad_norm: 0.7722085349552017, iteration: 56827
loss: 0.9900505542755127,grad_norm: 0.9151829263716645, iteration: 56828
loss: 0.9881569743156433,grad_norm: 0.91310740162895, iteration: 56829
loss: 1.0764964818954468,grad_norm: 0.9999998474230714, iteration: 56830
loss: 1.0265790224075317,grad_norm: 0.9839461371170609, iteration: 56831
loss: 1.0717343091964722,grad_norm: 0.9092311367385865, iteration: 56832
loss: 1.1533459424972534,grad_norm: 0.9999998646075698, iteration: 56833
loss: 0.9829718470573425,grad_norm: 0.8978847264312896, iteration: 56834
loss: 1.0132335424423218,grad_norm: 0.7674654262144363, iteration: 56835
loss: 1.0467084646224976,grad_norm: 0.9999997743316164, iteration: 56836
loss: 1.0145782232284546,grad_norm: 0.9999997558567878, iteration: 56837
loss: 1.008604884147644,grad_norm: 0.8069260579290579, iteration: 56838
loss: 1.0123084783554077,grad_norm: 0.9999993986175223, iteration: 56839
loss: 1.001022219657898,grad_norm: 0.9556197566931698, iteration: 56840
loss: 1.0455901622772217,grad_norm: 0.9293222253187254, iteration: 56841
loss: 1.0067083835601807,grad_norm: 0.8445219881123894, iteration: 56842
loss: 1.0053236484527588,grad_norm: 0.9999992731655326, iteration: 56843
loss: 1.0009323358535767,grad_norm: 0.9999991364594671, iteration: 56844
loss: 0.9817617535591125,grad_norm: 0.999999519624751, iteration: 56845
loss: 0.974521815776825,grad_norm: 0.9999991868241762, iteration: 56846
loss: 0.9819505214691162,grad_norm: 0.9646148663310272, iteration: 56847
loss: 1.0060118436813354,grad_norm: 0.9129522976426057, iteration: 56848
loss: 1.0267318487167358,grad_norm: 0.9999995397533977, iteration: 56849
loss: 1.0118144750595093,grad_norm: 0.9096265827519214, iteration: 56850
loss: 1.0478538274765015,grad_norm: 0.9617588721259877, iteration: 56851
loss: 0.9777941107749939,grad_norm: 0.8980328915541157, iteration: 56852
loss: 0.9939669370651245,grad_norm: 0.9999997971422544, iteration: 56853
loss: 1.0670454502105713,grad_norm: 0.9999999026738724, iteration: 56854
loss: 1.025879979133606,grad_norm: 0.9999990399193053, iteration: 56855
loss: 1.0102365016937256,grad_norm: 0.8926575932972699, iteration: 56856
loss: 1.1155815124511719,grad_norm: 0.9999991334625448, iteration: 56857
loss: 1.0691670179367065,grad_norm: 0.9999992212256796, iteration: 56858
loss: 1.0111488103866577,grad_norm: 0.9999991205197325, iteration: 56859
loss: 1.1126810312271118,grad_norm: 0.9999998429633088, iteration: 56860
loss: 0.9899494647979736,grad_norm: 0.9642621550546547, iteration: 56861
loss: 1.0606062412261963,grad_norm: 0.9999995160772944, iteration: 56862
loss: 1.013954997062683,grad_norm: 0.9999996740798718, iteration: 56863
loss: 1.0075790882110596,grad_norm: 0.8767518920207863, iteration: 56864
loss: 1.0341943502426147,grad_norm: 0.9999995493725639, iteration: 56865
loss: 1.0917420387268066,grad_norm: 0.999999804162664, iteration: 56866
loss: 1.0046768188476562,grad_norm: 0.9395118113187177, iteration: 56867
loss: 0.9926386475563049,grad_norm: 0.8392821963994088, iteration: 56868
loss: 1.0201137065887451,grad_norm: 0.9999990215826501, iteration: 56869
loss: 1.0651206970214844,grad_norm: 0.999999526983355, iteration: 56870
loss: 0.9624207019805908,grad_norm: 0.9999990283721464, iteration: 56871
loss: 0.9941725730895996,grad_norm: 0.9999991174769526, iteration: 56872
loss: 1.0211408138275146,grad_norm: 0.8748064741785949, iteration: 56873
loss: 1.0046254396438599,grad_norm: 0.8986463946287632, iteration: 56874
loss: 1.0105805397033691,grad_norm: 0.9005884779790428, iteration: 56875
loss: 1.051463007926941,grad_norm: 0.9999993557121594, iteration: 56876
loss: 1.048501968383789,grad_norm: 0.7621912133969823, iteration: 56877
loss: 1.0298351049423218,grad_norm: 0.9999994463050872, iteration: 56878
loss: 1.0041570663452148,grad_norm: 0.9999992221633053, iteration: 56879
loss: 0.9614289999008179,grad_norm: 0.9624988805871724, iteration: 56880
loss: 1.0151145458221436,grad_norm: 0.8250771884525363, iteration: 56881
loss: 0.9952439069747925,grad_norm: 0.9281740045050335, iteration: 56882
loss: 1.1897403001785278,grad_norm: 0.9999996896089263, iteration: 56883
loss: 1.0414609909057617,grad_norm: 0.7735950901867744, iteration: 56884
loss: 1.0327402353286743,grad_norm: 0.9868920991297275, iteration: 56885
loss: 1.0161261558532715,grad_norm: 0.9999994467388248, iteration: 56886
loss: 1.0074306726455688,grad_norm: 0.9999991813740112, iteration: 56887
loss: 0.9788949489593506,grad_norm: 0.9999996290914003, iteration: 56888
loss: 1.0125727653503418,grad_norm: 0.8724595429777658, iteration: 56889
loss: 1.0437402725219727,grad_norm: 0.9087561132593969, iteration: 56890
loss: 1.1606698036193848,grad_norm: 0.9999998733771073, iteration: 56891
loss: 0.9883732795715332,grad_norm: 0.9999999232060984, iteration: 56892
loss: 1.0341451168060303,grad_norm: 0.737281420680348, iteration: 56893
loss: 1.1558154821395874,grad_norm: 0.9999990596594805, iteration: 56894
loss: 1.00174081325531,grad_norm: 0.7303800023244236, iteration: 56895
loss: 1.0203545093536377,grad_norm: 0.9999991935279688, iteration: 56896
loss: 1.0021333694458008,grad_norm: 0.8521713788958584, iteration: 56897
loss: 1.2598117589950562,grad_norm: 0.9999996900424024, iteration: 56898
loss: 1.0173031091690063,grad_norm: 0.9602874248034816, iteration: 56899
loss: 0.9914717674255371,grad_norm: 0.9999994258490538, iteration: 56900
loss: 1.0486633777618408,grad_norm: 0.9999991400810097, iteration: 56901
loss: 1.0464973449707031,grad_norm: 0.9999994426329765, iteration: 56902
loss: 1.1031183004379272,grad_norm: 0.9999992619349498, iteration: 56903
loss: 0.9891980886459351,grad_norm: 0.9235737165838408, iteration: 56904
loss: 1.070216417312622,grad_norm: 0.9999990869082138, iteration: 56905
loss: 0.9363396167755127,grad_norm: 0.9405031088149985, iteration: 56906
loss: 1.0419418811798096,grad_norm: 0.9999991687279329, iteration: 56907
loss: 1.2580479383468628,grad_norm: 0.999999553207417, iteration: 56908
loss: 0.973010778427124,grad_norm: 0.9999991174891145, iteration: 56909
loss: 1.0616661310195923,grad_norm: 0.9999992925748619, iteration: 56910
loss: 0.9808403253555298,grad_norm: 0.910909621548571, iteration: 56911
loss: 1.0979739427566528,grad_norm: 0.9999994597039572, iteration: 56912
loss: 1.0527384281158447,grad_norm: 0.9999992643812332, iteration: 56913
loss: 1.127212405204773,grad_norm: 0.999999600937164, iteration: 56914
loss: 1.1762020587921143,grad_norm: 0.9999998401903913, iteration: 56915
loss: 0.9973552227020264,grad_norm: 0.9131202060352539, iteration: 56916
loss: 1.009608507156372,grad_norm: 0.8505536349310302, iteration: 56917
loss: 1.0543649196624756,grad_norm: 0.9999990256292239, iteration: 56918
loss: 1.1895098686218262,grad_norm: 0.9999998699754548, iteration: 56919
loss: 1.060031533241272,grad_norm: 0.9999992629581376, iteration: 56920
loss: 1.0214661359786987,grad_norm: 0.9999992166312592, iteration: 56921
loss: 0.9700432419776917,grad_norm: 0.9999990944522313, iteration: 56922
loss: 0.9931301474571228,grad_norm: 0.999999421923244, iteration: 56923
loss: 1.045417070388794,grad_norm: 0.9999995737717156, iteration: 56924
loss: 0.9916914105415344,grad_norm: 0.9999993802574882, iteration: 56925
loss: 1.0286438465118408,grad_norm: 0.9280108528910309, iteration: 56926
loss: 0.9865274429321289,grad_norm: 0.9999990974054468, iteration: 56927
loss: 0.972960889339447,grad_norm: 0.9999991742622741, iteration: 56928
loss: 0.997524619102478,grad_norm: 0.9345827331491104, iteration: 56929
loss: 0.9829627871513367,grad_norm: 0.999998918081793, iteration: 56930
loss: 1.0780586004257202,grad_norm: 0.9999997818719278, iteration: 56931
loss: 1.1666011810302734,grad_norm: 0.9999991206709364, iteration: 56932
loss: 1.1501845121383667,grad_norm: 0.9999995740994424, iteration: 56933
loss: 1.144944667816162,grad_norm: 0.999999485370267, iteration: 56934
loss: 1.0292247533798218,grad_norm: 0.8247570756968151, iteration: 56935
loss: 1.0550470352172852,grad_norm: 0.99999929668149, iteration: 56936
loss: 0.998160719871521,grad_norm: 0.9315313098273692, iteration: 56937
loss: 0.9810319542884827,grad_norm: 0.7845172267156068, iteration: 56938
loss: 1.0882843732833862,grad_norm: 0.9999994030401178, iteration: 56939
loss: 1.0690622329711914,grad_norm: 0.9999995705882945, iteration: 56940
loss: 1.0207819938659668,grad_norm: 0.8485054654058742, iteration: 56941
loss: 1.1878654956817627,grad_norm: 0.9999995536060906, iteration: 56942
loss: 1.203035593032837,grad_norm: 0.999999747418715, iteration: 56943
loss: 1.0717400312423706,grad_norm: 0.9999990984678183, iteration: 56944
loss: 1.0653210878372192,grad_norm: 0.999999464140606, iteration: 56945
loss: 1.1386921405792236,grad_norm: 1.000000061546108, iteration: 56946
loss: 0.9822098016738892,grad_norm: 0.9699689666993415, iteration: 56947
loss: 1.0781773328781128,grad_norm: 0.9676078047853591, iteration: 56948
loss: 1.115501880645752,grad_norm: 0.9999997979496311, iteration: 56949
loss: 0.9586077332496643,grad_norm: 0.9041100715029095, iteration: 56950
loss: 1.0047218799591064,grad_norm: 0.9897991179674914, iteration: 56951
loss: 1.0882219076156616,grad_norm: 0.9999993781286856, iteration: 56952
loss: 1.0655937194824219,grad_norm: 0.9999995023309173, iteration: 56953
loss: 1.132757544517517,grad_norm: 0.999999278430752, iteration: 56954
loss: 1.034258246421814,grad_norm: 0.8611001142046231, iteration: 56955
loss: 1.1761754751205444,grad_norm: 0.9999996164458806, iteration: 56956
loss: 1.0520703792572021,grad_norm: 0.999999278745373, iteration: 56957
loss: 1.0956928730010986,grad_norm: 0.9999996654395338, iteration: 56958
loss: 1.290065050125122,grad_norm: 0.9999998479153928, iteration: 56959
loss: 1.024077296257019,grad_norm: 0.9999997734717969, iteration: 56960
loss: 1.0402929782867432,grad_norm: 0.9999990621298026, iteration: 56961
loss: 1.1038784980773926,grad_norm: 0.9999999718695766, iteration: 56962
loss: 0.984965443611145,grad_norm: 0.7521611845989953, iteration: 56963
loss: 1.0301179885864258,grad_norm: 0.9683861614422777, iteration: 56964
loss: 1.0729519128799438,grad_norm: 0.9999998863924534, iteration: 56965
loss: 0.9783613681793213,grad_norm: 0.9999989533258883, iteration: 56966
loss: 1.0142186880111694,grad_norm: 0.8729832150911059, iteration: 56967
loss: 1.000239610671997,grad_norm: 0.9364528108173421, iteration: 56968
loss: 1.0562816858291626,grad_norm: 0.9816485168677039, iteration: 56969
loss: 1.0134146213531494,grad_norm: 0.9999995795493708, iteration: 56970
loss: 1.0201427936553955,grad_norm: 0.8392167635984898, iteration: 56971
loss: 1.0171672105789185,grad_norm: 0.8898826519459864, iteration: 56972
loss: 1.0168524980545044,grad_norm: 0.9999990883871761, iteration: 56973
loss: 1.0451806783676147,grad_norm: 0.9999993355907746, iteration: 56974
loss: 1.0020948648452759,grad_norm: 0.9999994600707222, iteration: 56975
loss: 1.0212827920913696,grad_norm: 0.9999991830245973, iteration: 56976
loss: 1.1196166276931763,grad_norm: 0.9999997599010867, iteration: 56977
loss: 1.0140620470046997,grad_norm: 0.9999996525354732, iteration: 56978
loss: 1.0679446458816528,grad_norm: 0.9999994750247528, iteration: 56979
loss: 0.9880405068397522,grad_norm: 0.8954282880334135, iteration: 56980
loss: 1.0789145231246948,grad_norm: 0.999999276368106, iteration: 56981
loss: 1.014310359954834,grad_norm: 0.9999996764460855, iteration: 56982
loss: 1.051820993423462,grad_norm: 0.9999989989560758, iteration: 56983
loss: 0.9973848462104797,grad_norm: 0.999998989759235, iteration: 56984
loss: 0.9895288348197937,grad_norm: 0.9999992140132626, iteration: 56985
loss: 1.0047930479049683,grad_norm: 0.797678768036142, iteration: 56986
loss: 1.014147162437439,grad_norm: 0.8570515813389994, iteration: 56987
loss: 1.0726455450057983,grad_norm: 0.999999333478696, iteration: 56988
loss: 1.0113697052001953,grad_norm: 0.903248006776864, iteration: 56989
loss: 1.0305049419403076,grad_norm: 0.7771689391373491, iteration: 56990
loss: 0.9950381517410278,grad_norm: 0.8882207232214823, iteration: 56991
loss: 1.028583288192749,grad_norm: 0.9467181532863292, iteration: 56992
loss: 1.0225070714950562,grad_norm: 0.9986845521830429, iteration: 56993
loss: 0.9864431023597717,grad_norm: 0.8493683136344604, iteration: 56994
loss: 1.0108739137649536,grad_norm: 0.9999991490439842, iteration: 56995
loss: 1.0058507919311523,grad_norm: 0.9461169264326076, iteration: 56996
loss: 0.9894790053367615,grad_norm: 0.9999994964290435, iteration: 56997
loss: 1.0281747579574585,grad_norm: 0.9724133055781677, iteration: 56998
loss: 1.009070873260498,grad_norm: 0.9943328439192544, iteration: 56999
loss: 1.003503680229187,grad_norm: 0.871353840833737, iteration: 57000
loss: 0.9867485761642456,grad_norm: 0.9778874301434417, iteration: 57001
loss: 1.026888132095337,grad_norm: 0.9999993863091927, iteration: 57002
loss: 0.9810383319854736,grad_norm: 0.9966473558415839, iteration: 57003
loss: 1.128895878791809,grad_norm: 0.9999998510327659, iteration: 57004
loss: 1.0119051933288574,grad_norm: 0.8622766425040962, iteration: 57005
loss: 1.0778142213821411,grad_norm: 0.9999991358795262, iteration: 57006
loss: 1.0431920289993286,grad_norm: 0.9999997511815858, iteration: 57007
loss: 0.9821109771728516,grad_norm: 0.87488307606134, iteration: 57008
loss: 1.0118463039398193,grad_norm: 0.9999992178458549, iteration: 57009
loss: 1.0310499668121338,grad_norm: 0.9999990224832497, iteration: 57010
loss: 0.9743831157684326,grad_norm: 0.9890694108471249, iteration: 57011
loss: 1.0035539865493774,grad_norm: 0.9823485014667258, iteration: 57012
loss: 0.9791818857192993,grad_norm: 0.9999991423042704, iteration: 57013
loss: 1.0150550603866577,grad_norm: 0.8675199843867273, iteration: 57014
loss: 1.1058423519134521,grad_norm: 0.9999992808836472, iteration: 57015
loss: 1.0248922109603882,grad_norm: 0.9013152877084856, iteration: 57016
loss: 1.0014363527297974,grad_norm: 0.9999991106332982, iteration: 57017
loss: 0.9903560876846313,grad_norm: 0.8849387125102509, iteration: 57018
loss: 1.0128800868988037,grad_norm: 0.8954836197039217, iteration: 57019
loss: 1.039519190788269,grad_norm: 0.9131652493244525, iteration: 57020
loss: 1.04055655002594,grad_norm: 0.9071236483004224, iteration: 57021
loss: 1.0018157958984375,grad_norm: 0.9144995305750843, iteration: 57022
loss: 1.0114456415176392,grad_norm: 0.9999996340335582, iteration: 57023
loss: 1.0315831899642944,grad_norm: 0.8397792029858692, iteration: 57024
loss: 0.9972449541091919,grad_norm: 0.8722310385174914, iteration: 57025
loss: 1.0020771026611328,grad_norm: 0.8248510625674648, iteration: 57026
loss: 1.0443686246871948,grad_norm: 0.9999991217465084, iteration: 57027
loss: 0.9806672930717468,grad_norm: 0.8445752358348867, iteration: 57028
loss: 1.031612753868103,grad_norm: 0.9417091562066855, iteration: 57029
loss: 0.9720778465270996,grad_norm: 0.9999990775391836, iteration: 57030
loss: 1.0332874059677124,grad_norm: 0.9050387247027891, iteration: 57031
loss: 1.0769875049591064,grad_norm: 0.9999990064113188, iteration: 57032
loss: 1.035115122795105,grad_norm: 0.9999992521340422, iteration: 57033
loss: 0.9989570379257202,grad_norm: 0.8750389010330542, iteration: 57034
loss: 1.0226150751113892,grad_norm: 0.9999990587253107, iteration: 57035
loss: 1.0021138191223145,grad_norm: 0.9999996643503636, iteration: 57036
loss: 1.0356144905090332,grad_norm: 0.9999995924191419, iteration: 57037
loss: 1.0376639366149902,grad_norm: 0.9999991662219136, iteration: 57038
loss: 0.9995190501213074,grad_norm: 0.9999992074644899, iteration: 57039
loss: 1.025241494178772,grad_norm: 0.9066298470945102, iteration: 57040
loss: 0.9950680732727051,grad_norm: 0.8354931200822459, iteration: 57041
loss: 1.027207851409912,grad_norm: 0.9597657001023001, iteration: 57042
loss: 0.9831726551055908,grad_norm: 0.9999991706762972, iteration: 57043
loss: 1.1958880424499512,grad_norm: 0.8994603563158033, iteration: 57044
loss: 0.9954102635383606,grad_norm: 0.8593262542699718, iteration: 57045
loss: 1.0617051124572754,grad_norm: 1.000000023732021, iteration: 57046
loss: 1.0690999031066895,grad_norm: 0.9999990692403029, iteration: 57047
loss: 0.9585053324699402,grad_norm: 0.9999995053737457, iteration: 57048
loss: 0.9687062501907349,grad_norm: 0.9599402790126932, iteration: 57049
loss: 0.99439537525177,grad_norm: 0.999999362078573, iteration: 57050
loss: 0.9789283871650696,grad_norm: 0.9999989375862731, iteration: 57051
loss: 0.9802698493003845,grad_norm: 0.8930628653963023, iteration: 57052
loss: 1.0431455373764038,grad_norm: 0.9999998486818673, iteration: 57053
loss: 0.994232714176178,grad_norm: 0.999999150643488, iteration: 57054
loss: 0.9913415908813477,grad_norm: 0.83053144277212, iteration: 57055
loss: 0.9979791045188904,grad_norm: 0.8016510664600119, iteration: 57056
loss: 1.0292301177978516,grad_norm: 0.9541933983048144, iteration: 57057
loss: 1.0165379047393799,grad_norm: 0.9777825598986852, iteration: 57058
loss: 0.9990103244781494,grad_norm: 0.8723258872311271, iteration: 57059
loss: 1.024404525756836,grad_norm: 0.9999992336621008, iteration: 57060
loss: 1.031235933303833,grad_norm: 0.9964124027062529, iteration: 57061
loss: 0.9869349598884583,grad_norm: 0.8847330045702693, iteration: 57062
loss: 1.0491902828216553,grad_norm: 0.999999341118382, iteration: 57063
loss: 1.0046687126159668,grad_norm: 0.8865419741364766, iteration: 57064
loss: 1.0022056102752686,grad_norm: 0.9999991288572809, iteration: 57065
loss: 1.0059540271759033,grad_norm: 0.9461466389466201, iteration: 57066
loss: 1.0572413206100464,grad_norm: 0.9140087215362852, iteration: 57067
loss: 1.0403344631195068,grad_norm: 0.8914403212548908, iteration: 57068
loss: 1.0025577545166016,grad_norm: 0.8540431451902958, iteration: 57069
loss: 1.014244794845581,grad_norm: 0.9121254927310135, iteration: 57070
loss: 0.9781873226165771,grad_norm: 0.8876948202099552, iteration: 57071
loss: 1.0005908012390137,grad_norm: 0.9999990970114061, iteration: 57072
loss: 1.0518841743469238,grad_norm: 0.999999486968573, iteration: 57073
loss: 1.0033910274505615,grad_norm: 0.7418744642218518, iteration: 57074
loss: 1.0167193412780762,grad_norm: 0.9999991138274951, iteration: 57075
loss: 1.016419768333435,grad_norm: 0.9509968401153187, iteration: 57076
loss: 1.0174580812454224,grad_norm: 0.8095455529896961, iteration: 57077
loss: 1.013745665550232,grad_norm: 0.8874392285834493, iteration: 57078
loss: 0.9864364266395569,grad_norm: 0.9999992926255311, iteration: 57079
loss: 0.9857698678970337,grad_norm: 0.8479554388206747, iteration: 57080
loss: 1.0036628246307373,grad_norm: 0.8032266035627276, iteration: 57081
loss: 1.0221052169799805,grad_norm: 0.8933387212255217, iteration: 57082
loss: 1.0035984516143799,grad_norm: 0.9999991447306517, iteration: 57083
loss: 1.00742769241333,grad_norm: 0.8356573719753304, iteration: 57084
loss: 0.9859405159950256,grad_norm: 0.9198742741465725, iteration: 57085
loss: 1.0384607315063477,grad_norm: 0.9999999011367041, iteration: 57086
loss: 0.9882467985153198,grad_norm: 0.9928932612537698, iteration: 57087
loss: 1.0435497760772705,grad_norm: 0.9999992633389556, iteration: 57088
loss: 1.092191457748413,grad_norm: 0.9999991922519625, iteration: 57089
loss: 1.0005366802215576,grad_norm: 0.8125494197182359, iteration: 57090
loss: 0.9985703825950623,grad_norm: 0.8937546721219523, iteration: 57091
loss: 0.9631884098052979,grad_norm: 0.8979147870248818, iteration: 57092
loss: 1.0138312578201294,grad_norm: 0.8377104478270361, iteration: 57093
loss: 1.0467946529388428,grad_norm: 0.9999991370481845, iteration: 57094
loss: 1.0114837884902954,grad_norm: 0.9999991613056501, iteration: 57095
loss: 1.0238369703292847,grad_norm: 0.9999994596336477, iteration: 57096
loss: 1.0416539907455444,grad_norm: 0.9999990807402225, iteration: 57097
loss: 1.0117547512054443,grad_norm: 0.999999225378257, iteration: 57098
loss: 1.0258495807647705,grad_norm: 0.9197903037636024, iteration: 57099
loss: 1.007965326309204,grad_norm: 0.9175076847523048, iteration: 57100
loss: 1.0126482248306274,grad_norm: 0.9999991822500414, iteration: 57101
loss: 1.000998616218567,grad_norm: 0.8259196453155971, iteration: 57102
loss: 1.014035940170288,grad_norm: 0.9999993564169289, iteration: 57103
loss: 0.9771173000335693,grad_norm: 0.9999990358820906, iteration: 57104
loss: 1.0238195657730103,grad_norm: 0.9743890661239044, iteration: 57105
loss: 1.0008095502853394,grad_norm: 0.9999991852522296, iteration: 57106
loss: 1.0145403146743774,grad_norm: 0.9877402707950921, iteration: 57107
loss: 1.0289382934570312,grad_norm: 0.999999054313732, iteration: 57108
loss: 1.0697609186172485,grad_norm: 0.9999994125953863, iteration: 57109
loss: 1.0208643674850464,grad_norm: 0.9661667668804718, iteration: 57110
loss: 1.024651050567627,grad_norm: 0.9999995328667797, iteration: 57111
loss: 1.0096242427825928,grad_norm: 0.7399994735566472, iteration: 57112
loss: 0.9942655563354492,grad_norm: 0.840509716372067, iteration: 57113
loss: 0.9872985482215881,grad_norm: 0.9175419860727438, iteration: 57114
loss: 1.002197504043579,grad_norm: 0.9999992593351898, iteration: 57115
loss: 0.977226972579956,grad_norm: 0.9303590439923397, iteration: 57116
loss: 0.9977102875709534,grad_norm: 0.9549148493623362, iteration: 57117
loss: 1.006984829902649,grad_norm: 0.9999990410537424, iteration: 57118
loss: 0.9999837279319763,grad_norm: 0.7406472762353865, iteration: 57119
loss: 1.004521369934082,grad_norm: 0.8944713148652647, iteration: 57120
loss: 1.0447490215301514,grad_norm: 0.9999994651383848, iteration: 57121
loss: 0.9977684617042542,grad_norm: 0.9619257488491828, iteration: 57122
loss: 0.9905993342399597,grad_norm: 0.8842274255516716, iteration: 57123
loss: 1.0327534675598145,grad_norm: 0.8933102899751124, iteration: 57124
loss: 0.9830524325370789,grad_norm: 0.9999990816459623, iteration: 57125
loss: 1.0562036037445068,grad_norm: 0.9999990583690516, iteration: 57126
loss: 1.0177059173583984,grad_norm: 0.8965075591681554, iteration: 57127
loss: 0.9974988698959351,grad_norm: 0.8037125932374641, iteration: 57128
loss: 1.0251914262771606,grad_norm: 0.9999991740734785, iteration: 57129
loss: 1.058371663093567,grad_norm: 0.9999996621604454, iteration: 57130
loss: 1.0480098724365234,grad_norm: 0.9999997138821333, iteration: 57131
loss: 0.9920592308044434,grad_norm: 0.8595233828114823, iteration: 57132
loss: 1.0274959802627563,grad_norm: 0.9999989929087122, iteration: 57133
loss: 1.0026774406433105,grad_norm: 0.9999990437106653, iteration: 57134
loss: 1.1047477722167969,grad_norm: 0.9999995888145697, iteration: 57135
loss: 0.9739115834236145,grad_norm: 0.959059156831326, iteration: 57136
loss: 0.9845691919326782,grad_norm: 0.9999991323519959, iteration: 57137
loss: 1.0146840810775757,grad_norm: 0.9133547582977957, iteration: 57138
loss: 0.986104428768158,grad_norm: 0.8485360882922939, iteration: 57139
loss: 0.9744675755500793,grad_norm: 0.9406029829599687, iteration: 57140
loss: 1.0417512655258179,grad_norm: 0.9603045636132208, iteration: 57141
loss: 1.0266380310058594,grad_norm: 0.927992799641596, iteration: 57142
loss: 1.0672622919082642,grad_norm: 0.9999991232963448, iteration: 57143
loss: 1.0082719326019287,grad_norm: 0.9999992212873877, iteration: 57144
loss: 0.9796409010887146,grad_norm: 0.9999998406610819, iteration: 57145
loss: 0.9803451299667358,grad_norm: 0.99999958731416, iteration: 57146
loss: 0.9700320959091187,grad_norm: 0.9644707898495887, iteration: 57147
loss: 0.9897374510765076,grad_norm: 0.7657444206090994, iteration: 57148
loss: 1.0320276021957397,grad_norm: 0.9999991864472206, iteration: 57149
loss: 0.9922469854354858,grad_norm: 0.8936620068735469, iteration: 57150
loss: 1.0146195888519287,grad_norm: 0.9295509747380025, iteration: 57151
loss: 1.049648642539978,grad_norm: 0.991423652593397, iteration: 57152
loss: 0.9728366732597351,grad_norm: 0.9999991021416075, iteration: 57153
loss: 0.9894682765007019,grad_norm: 0.945790669312799, iteration: 57154
loss: 1.0612895488739014,grad_norm: 0.9999997047364481, iteration: 57155
loss: 0.9895175695419312,grad_norm: 0.9183787036602357, iteration: 57156
loss: 1.0249062776565552,grad_norm: 0.9999990616520843, iteration: 57157
loss: 0.9881134033203125,grad_norm: 0.8751144235660537, iteration: 57158
loss: 1.0101007223129272,grad_norm: 0.999999087606357, iteration: 57159
loss: 1.0857250690460205,grad_norm: 0.999999615297201, iteration: 57160
loss: 0.9944761395454407,grad_norm: 0.8586274740261042, iteration: 57161
loss: 1.002790093421936,grad_norm: 0.9999992240743036, iteration: 57162
loss: 1.0238901376724243,grad_norm: 0.9999990394612505, iteration: 57163
loss: 0.9959047436714172,grad_norm: 0.8045876929787916, iteration: 57164
loss: 0.9947620630264282,grad_norm: 0.9167599428304971, iteration: 57165
loss: 1.0059622526168823,grad_norm: 0.9545584811390215, iteration: 57166
loss: 0.9933279156684875,grad_norm: 0.8839287756789552, iteration: 57167
loss: 0.9397910833358765,grad_norm: 0.9999991350205447, iteration: 57168
loss: 1.000231146812439,grad_norm: 0.865232374018013, iteration: 57169
loss: 0.9356940984725952,grad_norm: 0.9527663454364786, iteration: 57170
loss: 1.0203574895858765,grad_norm: 0.8393861863364507, iteration: 57171
loss: 0.9644480347633362,grad_norm: 0.9784473521699534, iteration: 57172
loss: 1.010349154472351,grad_norm: 0.9999991048264337, iteration: 57173
loss: 1.0301982164382935,grad_norm: 0.9999991060594782, iteration: 57174
loss: 1.0246965885162354,grad_norm: 0.9457325406864194, iteration: 57175
loss: 1.0156660079956055,grad_norm: 0.9999992081116119, iteration: 57176
loss: 0.9561455845832825,grad_norm: 0.9999992255274638, iteration: 57177
loss: 0.9725986123085022,grad_norm: 0.9999991510723871, iteration: 57178
loss: 1.021999716758728,grad_norm: 0.9406435354823502, iteration: 57179
loss: 1.0132490396499634,grad_norm: 0.9999991368446531, iteration: 57180
loss: 1.0746992826461792,grad_norm: 0.9999992147231888, iteration: 57181
loss: 0.9977318644523621,grad_norm: 0.857407807987905, iteration: 57182
loss: 1.0044015645980835,grad_norm: 0.9999991034997983, iteration: 57183
loss: 1.0558332204818726,grad_norm: 0.9999997757164679, iteration: 57184
loss: 1.0339361429214478,grad_norm: 0.984661736608011, iteration: 57185
loss: 1.0011568069458008,grad_norm: 0.9780920331270339, iteration: 57186
loss: 1.033684492111206,grad_norm: 0.9999995902469351, iteration: 57187
loss: 0.9708281755447388,grad_norm: 0.9999990577007365, iteration: 57188
loss: 1.0074450969696045,grad_norm: 0.8423072880378331, iteration: 57189
loss: 0.9580351710319519,grad_norm: 0.9999990276371251, iteration: 57190
loss: 1.0054357051849365,grad_norm: 0.8675480747943181, iteration: 57191
loss: 1.045615315437317,grad_norm: 0.8587432587004444, iteration: 57192
loss: 0.9779394865036011,grad_norm: 0.7883339500850016, iteration: 57193
loss: 0.9837212562561035,grad_norm: 0.8220784224798834, iteration: 57194
loss: 1.0291881561279297,grad_norm: 0.8875398330881756, iteration: 57195
loss: 1.0378365516662598,grad_norm: 0.906505908832805, iteration: 57196
loss: 1.0671756267547607,grad_norm: 0.9999991913079452, iteration: 57197
loss: 1.052473783493042,grad_norm: 0.8808196455352008, iteration: 57198
loss: 1.002185344696045,grad_norm: 0.9751371344262962, iteration: 57199
loss: 1.013975739479065,grad_norm: 0.8525729723093143, iteration: 57200
loss: 1.0353981256484985,grad_norm: 0.9423681218325077, iteration: 57201
loss: 1.0670984983444214,grad_norm: 0.9999998118233564, iteration: 57202
loss: 0.9719216227531433,grad_norm: 0.999999418366108, iteration: 57203
loss: 1.0237568616867065,grad_norm: 0.9421685175016371, iteration: 57204
loss: 1.0345277786254883,grad_norm: 0.7974196600828491, iteration: 57205
loss: 1.006919503211975,grad_norm: 0.9297429018263318, iteration: 57206
loss: 0.9957616329193115,grad_norm: 0.8777188626382937, iteration: 57207
loss: 1.0105235576629639,grad_norm: 0.8318941075366519, iteration: 57208
loss: 0.9755919575691223,grad_norm: 0.9709898430660361, iteration: 57209
loss: 1.1226856708526611,grad_norm: 0.9999997589378821, iteration: 57210
loss: 1.001944899559021,grad_norm: 0.8914989864723021, iteration: 57211
loss: 1.0008809566497803,grad_norm: 0.839815489945328, iteration: 57212
loss: 1.006780743598938,grad_norm: 0.9353839969429977, iteration: 57213
loss: 1.004166603088379,grad_norm: 0.9999991366612101, iteration: 57214
loss: 0.971896231174469,grad_norm: 0.9515091951299657, iteration: 57215
loss: 0.9906150102615356,grad_norm: 0.8676393071513018, iteration: 57216
loss: 1.0125923156738281,grad_norm: 0.9999992109505196, iteration: 57217
loss: 1.056458830833435,grad_norm: 0.9999992076037492, iteration: 57218
loss: 1.0079365968704224,grad_norm: 0.963884157277477, iteration: 57219
loss: 1.1773682832717896,grad_norm: 0.999999591605486, iteration: 57220
loss: 0.9673634767532349,grad_norm: 0.8313870367431883, iteration: 57221
loss: 1.0190738439559937,grad_norm: 0.850512617857842, iteration: 57222
loss: 1.0160859823226929,grad_norm: 0.9083656693642906, iteration: 57223
loss: 1.0242578983306885,grad_norm: 0.8855234826467316, iteration: 57224
loss: 1.0140106678009033,grad_norm: 0.9999990059299504, iteration: 57225
loss: 0.9865363836288452,grad_norm: 0.9666505086950963, iteration: 57226
loss: 0.9960408210754395,grad_norm: 0.9961855679901316, iteration: 57227
loss: 1.0309219360351562,grad_norm: 0.9999995168909553, iteration: 57228
loss: 1.0495350360870361,grad_norm: 0.9999994327516617, iteration: 57229
loss: 1.0019941329956055,grad_norm: 0.8781645838533506, iteration: 57230
loss: 0.9884048104286194,grad_norm: 0.8644793387762851, iteration: 57231
loss: 0.9762519598007202,grad_norm: 0.9952805554006051, iteration: 57232
loss: 0.9849579334259033,grad_norm: 0.9153225275871479, iteration: 57233
loss: 1.0017993450164795,grad_norm: 0.8899408986834655, iteration: 57234
loss: 0.9574272036552429,grad_norm: 0.8369602387345012, iteration: 57235
loss: 1.0053901672363281,grad_norm: 0.8816971025120831, iteration: 57236
loss: 0.9834509491920471,grad_norm: 0.9536917132313565, iteration: 57237
loss: 1.0594221353530884,grad_norm: 0.999999027223575, iteration: 57238
loss: 1.0170085430145264,grad_norm: 0.9999993508556583, iteration: 57239
loss: 0.993908166885376,grad_norm: 0.999999605236352, iteration: 57240
loss: 1.0003700256347656,grad_norm: 0.9821974878115198, iteration: 57241
loss: 0.9767269492149353,grad_norm: 0.8917534223652361, iteration: 57242
loss: 0.9570562839508057,grad_norm: 0.9576039634666959, iteration: 57243
loss: 1.0194907188415527,grad_norm: 0.9268555367245829, iteration: 57244
loss: 1.0205432176589966,grad_norm: 0.9999991559524191, iteration: 57245
loss: 1.000816822052002,grad_norm: 0.9489376747724937, iteration: 57246
loss: 0.954902708530426,grad_norm: 0.8317273232531883, iteration: 57247
loss: 1.0276790857315063,grad_norm: 0.9999992201113276, iteration: 57248
loss: 1.0077234506607056,grad_norm: 0.9999991256774307, iteration: 57249
loss: 1.0036026239395142,grad_norm: 0.95922907134459, iteration: 57250
loss: 0.9489808678627014,grad_norm: 0.9813618300472151, iteration: 57251
loss: 0.9663914442062378,grad_norm: 0.9319706350610419, iteration: 57252
loss: 1.0079245567321777,grad_norm: 0.9297801422376972, iteration: 57253
loss: 1.0028594732284546,grad_norm: 0.8070035463034325, iteration: 57254
loss: 1.0237399339675903,grad_norm: 0.9733862598305535, iteration: 57255
loss: 1.1461036205291748,grad_norm: 0.9999994184178577, iteration: 57256
loss: 1.0278470516204834,grad_norm: 0.9999991652561467, iteration: 57257
loss: 1.0204523801803589,grad_norm: 0.9999990191068386, iteration: 57258
loss: 1.0236626863479614,grad_norm: 0.9999995408932064, iteration: 57259
loss: 1.02409029006958,grad_norm: 0.8487191721627279, iteration: 57260
loss: 1.0035696029663086,grad_norm: 0.9648522641641707, iteration: 57261
loss: 1.062304139137268,grad_norm: 0.9646626192710135, iteration: 57262
loss: 1.0037956237792969,grad_norm: 0.9999993194113685, iteration: 57263
loss: 0.9768040180206299,grad_norm: 0.9999990488122584, iteration: 57264
loss: 1.023171067237854,grad_norm: 0.99999919307173, iteration: 57265
loss: 0.9987793564796448,grad_norm: 0.9999992128328875, iteration: 57266
loss: 0.9925108551979065,grad_norm: 0.9503980682494383, iteration: 57267
loss: 0.9946228265762329,grad_norm: 0.9889514664500816, iteration: 57268
loss: 1.0023804903030396,grad_norm: 0.9999989809499068, iteration: 57269
loss: 0.9596339464187622,grad_norm: 0.8759700438534703, iteration: 57270
loss: 0.9573334455490112,grad_norm: 0.9385704344700074, iteration: 57271
loss: 0.9967681765556335,grad_norm: 0.6869631586410195, iteration: 57272
loss: 1.0666273832321167,grad_norm: 0.9999991869137026, iteration: 57273
loss: 1.016335129737854,grad_norm: 0.9999994299468707, iteration: 57274
loss: 1.01259183883667,grad_norm: 0.8526657332771098, iteration: 57275
loss: 1.0060712099075317,grad_norm: 0.9999994653242823, iteration: 57276
loss: 1.0458775758743286,grad_norm: 0.9578297990838676, iteration: 57277
loss: 0.989406943321228,grad_norm: 0.9999990577528499, iteration: 57278
loss: 1.0012811422348022,grad_norm: 0.8571091696091223, iteration: 57279
loss: 0.9725316762924194,grad_norm: 0.99999953671569, iteration: 57280
loss: 0.9450761675834656,grad_norm: 0.9736807667592426, iteration: 57281
loss: 1.0326950550079346,grad_norm: 0.9365873758426658, iteration: 57282
loss: 1.0191160440444946,grad_norm: 0.9176880693903794, iteration: 57283
loss: 1.0385380983352661,grad_norm: 0.9596106611257867, iteration: 57284
loss: 1.011743426322937,grad_norm: 0.9999989958215473, iteration: 57285
loss: 1.0154438018798828,grad_norm: 0.9999991319830454, iteration: 57286
loss: 0.9943845272064209,grad_norm: 0.9999989741608648, iteration: 57287
loss: 1.018352746963501,grad_norm: 0.9999990480296971, iteration: 57288
loss: 0.9947713613510132,grad_norm: 0.9714962580450124, iteration: 57289
loss: 1.0122745037078857,grad_norm: 0.8899061816326285, iteration: 57290
loss: 1.0219759941101074,grad_norm: 0.9111751631449241, iteration: 57291
loss: 0.9945417046546936,grad_norm: 0.7764713881303796, iteration: 57292
loss: 0.960831344127655,grad_norm: 0.9999992109093857, iteration: 57293
loss: 1.0336434841156006,grad_norm: 0.9999993707214447, iteration: 57294
loss: 0.9589582681655884,grad_norm: 0.963494399862589, iteration: 57295
loss: 1.012154459953308,grad_norm: 0.9999990736970703, iteration: 57296
loss: 1.1202300786972046,grad_norm: 0.9999999534591043, iteration: 57297
loss: 0.9725824594497681,grad_norm: 0.9999991627392407, iteration: 57298
loss: 1.0192861557006836,grad_norm: 0.9999991646484594, iteration: 57299
loss: 0.9983429908752441,grad_norm: 0.99400811552735, iteration: 57300
loss: 1.092681884765625,grad_norm: 0.9999996306432358, iteration: 57301
loss: 1.0316598415374756,grad_norm: 0.9999993982816062, iteration: 57302
loss: 0.9893006086349487,grad_norm: 0.943242129049412, iteration: 57303
loss: 1.0740753412246704,grad_norm: 0.999999094833789, iteration: 57304
loss: 0.9973718523979187,grad_norm: 0.9999990919791013, iteration: 57305
loss: 1.0293431282043457,grad_norm: 0.7970217610654077, iteration: 57306
loss: 0.9856559038162231,grad_norm: 0.8522746242055842, iteration: 57307
loss: 1.0112501382827759,grad_norm: 0.9997145296379927, iteration: 57308
loss: 1.0147254467010498,grad_norm: 0.9999992285793144, iteration: 57309
loss: 1.038264274597168,grad_norm: 0.9999991501528158, iteration: 57310
loss: 1.0228605270385742,grad_norm: 0.9999992287651756, iteration: 57311
loss: 0.9618991017341614,grad_norm: 0.9999996345820931, iteration: 57312
loss: 0.9953508973121643,grad_norm: 0.9977014989616777, iteration: 57313
loss: 1.0394465923309326,grad_norm: 0.9999992575206932, iteration: 57314
loss: 1.000019907951355,grad_norm: 0.8724239297199383, iteration: 57315
loss: 0.9932664036750793,grad_norm: 0.9999991116574369, iteration: 57316
loss: 1.0294944047927856,grad_norm: 0.99999925200469, iteration: 57317
loss: 1.0564302206039429,grad_norm: 0.9999995076773367, iteration: 57318
loss: 0.9331393241882324,grad_norm: 0.9999991702516033, iteration: 57319
loss: 0.9983507990837097,grad_norm: 0.8831388905053453, iteration: 57320
loss: 1.0084785223007202,grad_norm: 0.9999993667637219, iteration: 57321
loss: 0.9954397678375244,grad_norm: 0.9999993332533824, iteration: 57322
loss: 1.0284532308578491,grad_norm: 0.9999992210294356, iteration: 57323
loss: 1.006615400314331,grad_norm: 0.9999991387347448, iteration: 57324
loss: 1.072916865348816,grad_norm: 0.9999993889567237, iteration: 57325
loss: 1.0059438943862915,grad_norm: 0.9999991521170258, iteration: 57326
loss: 1.0261913537979126,grad_norm: 0.8398396192820581, iteration: 57327
loss: 0.9858373403549194,grad_norm: 0.9433806054971974, iteration: 57328
loss: 1.028946042060852,grad_norm: 0.9999990102952921, iteration: 57329
loss: 1.0045015811920166,grad_norm: 0.8093575426572398, iteration: 57330
loss: 0.9748257994651794,grad_norm: 0.9655805356297582, iteration: 57331
loss: 0.9951009154319763,grad_norm: 0.8504402408268457, iteration: 57332
loss: 0.9994821548461914,grad_norm: 0.8482521441343579, iteration: 57333
loss: 0.9951696991920471,grad_norm: 0.932467269246412, iteration: 57334
loss: 0.9754042625427246,grad_norm: 0.9999994246557365, iteration: 57335
loss: 1.0002813339233398,grad_norm: 0.8572947013998202, iteration: 57336
loss: 0.9715061783790588,grad_norm: 0.9546638558559398, iteration: 57337
loss: 0.9580457806587219,grad_norm: 0.9999992283844195, iteration: 57338
loss: 0.9820559620857239,grad_norm: 0.9999991598847001, iteration: 57339
loss: 1.0257065296173096,grad_norm: 0.835438685577358, iteration: 57340
loss: 0.9902968406677246,grad_norm: 0.9999991860185976, iteration: 57341
loss: 1.036511778831482,grad_norm: 0.8150832013063191, iteration: 57342
loss: 0.944922149181366,grad_norm: 0.8970570766682284, iteration: 57343
loss: 1.0247756242752075,grad_norm: 0.9999991505018627, iteration: 57344
loss: 1.0263937711715698,grad_norm: 0.9999992254951374, iteration: 57345
loss: 0.9784154295921326,grad_norm: 0.9574647318658781, iteration: 57346
loss: 1.03757905960083,grad_norm: 0.8988804040055035, iteration: 57347
loss: 0.9921442866325378,grad_norm: 0.999999066219768, iteration: 57348
loss: 0.9737277626991272,grad_norm: 0.9992842511416928, iteration: 57349
loss: 0.9836989641189575,grad_norm: 0.9999991117417786, iteration: 57350
loss: 1.020846962928772,grad_norm: 0.9794641001102249, iteration: 57351
loss: 1.0471363067626953,grad_norm: 0.9999996916192153, iteration: 57352
loss: 1.0058629512786865,grad_norm: 0.9999991382908733, iteration: 57353
loss: 0.9721367955207825,grad_norm: 0.9999990377055828, iteration: 57354
loss: 1.022545337677002,grad_norm: 0.8444338274243186, iteration: 57355
loss: 1.0044595003128052,grad_norm: 0.8016558196237517, iteration: 57356
loss: 1.0075849294662476,grad_norm: 0.853525434766353, iteration: 57357
loss: 1.0255941152572632,grad_norm: 0.6959806623042658, iteration: 57358
loss: 1.1840063333511353,grad_norm: 0.9999997315796574, iteration: 57359
loss: 1.006358027458191,grad_norm: 0.999999058333342, iteration: 57360
loss: 0.9789232015609741,grad_norm: 0.8651026752877677, iteration: 57361
loss: 1.0155887603759766,grad_norm: 0.9104743526383651, iteration: 57362
loss: 1.0125161409378052,grad_norm: 0.9555705453343547, iteration: 57363
loss: 0.9954243302345276,grad_norm: 0.8715270883768043, iteration: 57364
loss: 0.9928250908851624,grad_norm: 0.8780260933192381, iteration: 57365
loss: 1.0044020414352417,grad_norm: 0.9999996387941255, iteration: 57366
loss: 1.0575662851333618,grad_norm: 0.9999992308862976, iteration: 57367
loss: 0.9750626683235168,grad_norm: 0.7602260895476365, iteration: 57368
loss: 1.041972279548645,grad_norm: 0.9935698303511405, iteration: 57369
loss: 1.038030982017517,grad_norm: 0.8763864615338959, iteration: 57370
loss: 1.0269087553024292,grad_norm: 0.9999991130208414, iteration: 57371
loss: 1.014308214187622,grad_norm: 0.9558711920931682, iteration: 57372
loss: 1.0051361322402954,grad_norm: 0.9013289174103349, iteration: 57373
loss: 0.9850756525993347,grad_norm: 0.9999989917879478, iteration: 57374
loss: 0.9717267155647278,grad_norm: 0.8715409203568828, iteration: 57375
loss: 0.9949144721031189,grad_norm: 0.8066705407662966, iteration: 57376
loss: 0.98590487241745,grad_norm: 0.9213929990200556, iteration: 57377
loss: 1.0621658563613892,grad_norm: 0.9999991695566949, iteration: 57378
loss: 0.9857165813446045,grad_norm: 0.9868781205228241, iteration: 57379
loss: 0.9939874410629272,grad_norm: 0.999999058980962, iteration: 57380
loss: 1.0312775373458862,grad_norm: 0.999999130561352, iteration: 57381
loss: 0.9581865668296814,grad_norm: 0.954689836887345, iteration: 57382
loss: 1.0536106824874878,grad_norm: 0.9999997330932324, iteration: 57383
loss: 1.0521972179412842,grad_norm: 0.999999359326654, iteration: 57384
loss: 0.9990012049674988,grad_norm: 0.9056098791858651, iteration: 57385
loss: 0.9972442388534546,grad_norm: 0.9843872299277806, iteration: 57386
loss: 0.9804114103317261,grad_norm: 0.9709958206989858, iteration: 57387
loss: 1.0052671432495117,grad_norm: 0.9999993499178822, iteration: 57388
loss: 0.9895811676979065,grad_norm: 0.9999991454325866, iteration: 57389
loss: 1.0345412492752075,grad_norm: 0.8952155876160257, iteration: 57390
loss: 1.0488828420639038,grad_norm: 0.9683585577224341, iteration: 57391
loss: 1.0162314176559448,grad_norm: 0.8523415697060089, iteration: 57392
loss: 0.9910550117492676,grad_norm: 0.944845874132205, iteration: 57393
loss: 0.9999046921730042,grad_norm: 0.876758877213305, iteration: 57394
loss: 0.9996482133865356,grad_norm: 0.999999106002321, iteration: 57395
loss: 0.9806647896766663,grad_norm: 0.712611676164591, iteration: 57396
loss: 1.0135358572006226,grad_norm: 0.9711839564629988, iteration: 57397
loss: 0.9742051362991333,grad_norm: 0.818251965753761, iteration: 57398
loss: 1.0996607542037964,grad_norm: 0.9999997079097699, iteration: 57399
loss: 1.2062653303146362,grad_norm: 0.9999997294215519, iteration: 57400
loss: 0.9607700705528259,grad_norm: 0.9999994866866956, iteration: 57401
loss: 0.9975812435150146,grad_norm: 0.8114106139436504, iteration: 57402
loss: 1.0173161029815674,grad_norm: 0.9093447025038657, iteration: 57403
loss: 1.0094985961914062,grad_norm: 0.9999993929590019, iteration: 57404
loss: 0.9931977987289429,grad_norm: 0.999999124887048, iteration: 57405
loss: 1.0537495613098145,grad_norm: 0.9999994005222816, iteration: 57406
loss: 1.0289026498794556,grad_norm: 0.9999995705915137, iteration: 57407
loss: 0.9936044812202454,grad_norm: 0.9999992125854704, iteration: 57408
loss: 1.0367825031280518,grad_norm: 0.8926625271151875, iteration: 57409
loss: 0.9875961542129517,grad_norm: 0.9511891724131376, iteration: 57410
loss: 0.9193688035011292,grad_norm: 0.9999991315979821, iteration: 57411
loss: 0.9901267290115356,grad_norm: 0.9999991710598137, iteration: 57412
loss: 1.1160824298858643,grad_norm: 0.999999701362955, iteration: 57413
loss: 0.9598454236984253,grad_norm: 0.9789289275316053, iteration: 57414
loss: 1.008722186088562,grad_norm: 0.9999991223958344, iteration: 57415
loss: 1.0695480108261108,grad_norm: 0.999999031153424, iteration: 57416
loss: 0.9573849439620972,grad_norm: 0.9704319853481403, iteration: 57417
loss: 0.9448288679122925,grad_norm: 0.9999989066632374, iteration: 57418
loss: 0.9832061529159546,grad_norm: 0.8881846348047813, iteration: 57419
loss: 1.0293283462524414,grad_norm: 0.9999991949586161, iteration: 57420
loss: 1.0331770181655884,grad_norm: 0.9999993866837549, iteration: 57421
loss: 1.046288251876831,grad_norm: 0.9782513534776505, iteration: 57422
loss: 1.0269852876663208,grad_norm: 0.9999993521590582, iteration: 57423
loss: 1.0314432382583618,grad_norm: 0.9999989959931284, iteration: 57424
loss: 1.1069163084030151,grad_norm: 0.9999997629539343, iteration: 57425
loss: 1.017736792564392,grad_norm: 0.9466497193377007, iteration: 57426
loss: 1.0166194438934326,grad_norm: 0.9999991190565828, iteration: 57427
loss: 1.0128772258758545,grad_norm: 0.7958070910906567, iteration: 57428
loss: 1.036787509918213,grad_norm: 0.9715502299015107, iteration: 57429
loss: 1.0136864185333252,grad_norm: 0.7842110985512726, iteration: 57430
loss: 1.0222954750061035,grad_norm: 0.8848547927207426, iteration: 57431
loss: 0.9524252414703369,grad_norm: 0.86419402509876, iteration: 57432
loss: 0.9674727916717529,grad_norm: 0.8589703209707932, iteration: 57433
loss: 1.0329654216766357,grad_norm: 0.8797730142256661, iteration: 57434
loss: 0.9856322407722473,grad_norm: 0.8845242670497786, iteration: 57435
loss: 1.0401681661605835,grad_norm: 0.9999999124315606, iteration: 57436
loss: 0.9968088269233704,grad_norm: 0.9999992281778347, iteration: 57437
loss: 1.013023018836975,grad_norm: 0.9999997785869323, iteration: 57438
loss: 0.9990590810775757,grad_norm: 0.9631553115787445, iteration: 57439
loss: 0.9408212900161743,grad_norm: 0.9752043066426281, iteration: 57440
loss: 1.009454607963562,grad_norm: 0.9405752959394892, iteration: 57441
loss: 1.0052051544189453,grad_norm: 0.764286608895414, iteration: 57442
loss: 1.0018643140792847,grad_norm: 0.999999575366791, iteration: 57443
loss: 1.1797211170196533,grad_norm: 0.9999992287176652, iteration: 57444
loss: 0.9980984330177307,grad_norm: 0.8630528900229761, iteration: 57445
loss: 1.020673155784607,grad_norm: 0.9944413475424513, iteration: 57446
loss: 1.0167278051376343,grad_norm: 0.9032512990224174, iteration: 57447
loss: 1.0087218284606934,grad_norm: 0.9084353065628736, iteration: 57448
loss: 0.9812304973602295,grad_norm: 0.9999991723700069, iteration: 57449
loss: 0.9834766983985901,grad_norm: 0.8446368974817705, iteration: 57450
loss: 1.0050114393234253,grad_norm: 0.9999991741672629, iteration: 57451
loss: 1.00045907497406,grad_norm: 0.8439788606713156, iteration: 57452
loss: 0.9925957918167114,grad_norm: 0.9999990210738482, iteration: 57453
loss: 1.0489470958709717,grad_norm: 0.9999992807699506, iteration: 57454
loss: 1.0143085718154907,grad_norm: 0.9999990227622793, iteration: 57455
loss: 1.0291399955749512,grad_norm: 0.8654447478417925, iteration: 57456
loss: 1.0342859029769897,grad_norm: 0.9999991112133231, iteration: 57457
loss: 1.0779383182525635,grad_norm: 0.9999995532006241, iteration: 57458
loss: 1.0472326278686523,grad_norm: 0.9999997072388176, iteration: 57459
loss: 1.03298020362854,grad_norm: 0.8991805279937187, iteration: 57460
loss: 1.0287445783615112,grad_norm: 0.999999246378503, iteration: 57461
loss: 1.0621142387390137,grad_norm: 0.9999991508546384, iteration: 57462
loss: 0.9802823662757874,grad_norm: 0.9999991234249089, iteration: 57463
loss: 0.9680267572402954,grad_norm: 0.8644084373948028, iteration: 57464
loss: 1.008220911026001,grad_norm: 0.8837091302983223, iteration: 57465
loss: 1.0017021894454956,grad_norm: 0.8329225232832672, iteration: 57466
loss: 1.0070701837539673,grad_norm: 0.8470197938217873, iteration: 57467
loss: 1.0073434114456177,grad_norm: 0.9999992345788159, iteration: 57468
loss: 1.0031062364578247,grad_norm: 0.8827914569452505, iteration: 57469
loss: 0.9967045187950134,grad_norm: 0.9999991290490214, iteration: 57470
loss: 1.1222625970840454,grad_norm: 0.9999990159893771, iteration: 57471
loss: 1.0033228397369385,grad_norm: 0.9999992366255592, iteration: 57472
loss: 1.047323226928711,grad_norm: 0.9794990956074338, iteration: 57473
loss: 0.9750615358352661,grad_norm: 0.9999990843130585, iteration: 57474
loss: 1.1001782417297363,grad_norm: 0.9999999415749822, iteration: 57475
loss: 1.0730632543563843,grad_norm: 0.9771254830810261, iteration: 57476
loss: 1.0514839887619019,grad_norm: 0.9999989417087825, iteration: 57477
loss: 1.002100944519043,grad_norm: 0.9999990828644653, iteration: 57478
loss: 0.999976396560669,grad_norm: 0.961596931420513, iteration: 57479
loss: 1.0103332996368408,grad_norm: 0.936114490325743, iteration: 57480
loss: 0.9978886842727661,grad_norm: 0.8386581023030585, iteration: 57481
loss: 1.0674540996551514,grad_norm: 0.960697237122519, iteration: 57482
loss: 1.01125168800354,grad_norm: 0.9275798570787479, iteration: 57483
loss: 1.0139411687850952,grad_norm: 0.9453279404884772, iteration: 57484
loss: 1.004136323928833,grad_norm: 0.9999992504731793, iteration: 57485
loss: 0.9975858330726624,grad_norm: 0.9999990177422444, iteration: 57486
loss: 1.0160905122756958,grad_norm: 0.9601809868591781, iteration: 57487
loss: 1.1144894361495972,grad_norm: 0.999999417827197, iteration: 57488
loss: 0.9832584261894226,grad_norm: 0.9999992740623266, iteration: 57489
loss: 1.0651086568832397,grad_norm: 0.9999992117027157, iteration: 57490
loss: 0.9833294153213501,grad_norm: 0.9351345912356379, iteration: 57491
loss: 1.0150684118270874,grad_norm: 0.9999990938889953, iteration: 57492
loss: 1.0187077522277832,grad_norm: 0.9999991092802833, iteration: 57493
loss: 1.0002729892730713,grad_norm: 0.8675340922940287, iteration: 57494
loss: 0.9905363321304321,grad_norm: 0.8199789945425204, iteration: 57495
loss: 1.0270909070968628,grad_norm: 0.9978899806755329, iteration: 57496
loss: 0.964249849319458,grad_norm: 0.82089873510789, iteration: 57497
loss: 1.0157451629638672,grad_norm: 0.9999990991734938, iteration: 57498
loss: 1.0952974557876587,grad_norm: 0.999999720209509, iteration: 57499
loss: 1.0301724672317505,grad_norm: 0.9999990778130248, iteration: 57500
loss: 1.0225852727890015,grad_norm: 0.8182479441269335, iteration: 57501
loss: 0.9945376515388489,grad_norm: 0.7849623445328986, iteration: 57502
loss: 1.0558700561523438,grad_norm: 0.9999993659162675, iteration: 57503
loss: 1.039732575416565,grad_norm: 0.9436220508364688, iteration: 57504
loss: 1.0178273916244507,grad_norm: 0.8187917000379648, iteration: 57505
loss: 0.9899868965148926,grad_norm: 0.827119092712394, iteration: 57506
loss: 0.9973037242889404,grad_norm: 0.8973909588123844, iteration: 57507
loss: 0.9668652415275574,grad_norm: 0.7979304797303592, iteration: 57508
loss: 1.0199238061904907,grad_norm: 0.9999994389156401, iteration: 57509
loss: 1.0068144798278809,grad_norm: 0.9999991085944571, iteration: 57510
loss: 1.001381754875183,grad_norm: 0.9296340164449509, iteration: 57511
loss: 1.0359904766082764,grad_norm: 0.8412147837344163, iteration: 57512
loss: 1.0097695589065552,grad_norm: 0.9999993431371408, iteration: 57513
loss: 1.0267066955566406,grad_norm: 0.9882856044247734, iteration: 57514
loss: 1.0345062017440796,grad_norm: 0.9999995384424639, iteration: 57515
loss: 0.9845510125160217,grad_norm: 0.8080653112591871, iteration: 57516
loss: 1.0040218830108643,grad_norm: 0.9999012467831484, iteration: 57517
loss: 0.9724789261817932,grad_norm: 0.9093624238466889, iteration: 57518
loss: 1.0025038719177246,grad_norm: 0.9999992558632274, iteration: 57519
loss: 1.0158733129501343,grad_norm: 0.8111257793952793, iteration: 57520
loss: 0.9639233946800232,grad_norm: 0.9192597820738238, iteration: 57521
loss: 0.9611119627952576,grad_norm: 0.9148068575177143, iteration: 57522
loss: 0.9838054776191711,grad_norm: 0.9999990245085779, iteration: 57523
loss: 1.0264652967453003,grad_norm: 0.8968932959258878, iteration: 57524
loss: 1.0607556104660034,grad_norm: 0.9999991915074581, iteration: 57525
loss: 1.0283939838409424,grad_norm: 0.7683889893979645, iteration: 57526
loss: 1.0290874242782593,grad_norm: 0.9999991864998792, iteration: 57527
loss: 1.0381850004196167,grad_norm: 0.9999999276750869, iteration: 57528
loss: 0.9990403056144714,grad_norm: 0.8725767974392443, iteration: 57529
loss: 0.9656378626823425,grad_norm: 0.9363605237770026, iteration: 57530
loss: 1.0015875101089478,grad_norm: 0.9999989652822944, iteration: 57531
loss: 0.9781901836395264,grad_norm: 0.999999425357574, iteration: 57532
loss: 0.995061457157135,grad_norm: 0.9993847873889554, iteration: 57533
loss: 1.0031911134719849,grad_norm: 0.9999990334665836, iteration: 57534
loss: 0.9985228776931763,grad_norm: 0.9999989706871055, iteration: 57535
loss: 1.0108377933502197,grad_norm: 0.9849345517897545, iteration: 57536
loss: 1.0330463647842407,grad_norm: 0.9409523675753522, iteration: 57537
loss: 1.0541740655899048,grad_norm: 0.9301393152595412, iteration: 57538
loss: 1.0147956609725952,grad_norm: 0.7455310556084479, iteration: 57539
loss: 0.9829704761505127,grad_norm: 0.8390968367191776, iteration: 57540
loss: 0.9930038452148438,grad_norm: 0.95318334914571, iteration: 57541
loss: 0.9874863624572754,grad_norm: 0.900190324810851, iteration: 57542
loss: 1.0053479671478271,grad_norm: 0.9999990555310166, iteration: 57543
loss: 0.9926807880401611,grad_norm: 0.9219324945836175, iteration: 57544
loss: 1.0099787712097168,grad_norm: 0.9999996647161619, iteration: 57545
loss: 0.9873979687690735,grad_norm: 0.9999991375897282, iteration: 57546
loss: 1.012445092201233,grad_norm: 0.9999994890958094, iteration: 57547
loss: 0.9945628046989441,grad_norm: 0.9999991501734317, iteration: 57548
loss: 1.0264636278152466,grad_norm: 0.9999990215852387, iteration: 57549
loss: 1.0048489570617676,grad_norm: 0.8536606362296596, iteration: 57550
loss: 0.9801117777824402,grad_norm: 0.9999995901105859, iteration: 57551
loss: 0.9624823331832886,grad_norm: 0.999999143425927, iteration: 57552
loss: 1.015944004058838,grad_norm: 0.9999992913131429, iteration: 57553
loss: 1.0171509981155396,grad_norm: 0.9999992729744299, iteration: 57554
loss: 1.0365108251571655,grad_norm: 0.9999998903921657, iteration: 57555
loss: 1.0229686498641968,grad_norm: 0.987351010903645, iteration: 57556
loss: 1.02242112159729,grad_norm: 0.9999992764241172, iteration: 57557
loss: 0.9818469285964966,grad_norm: 0.9999991016256415, iteration: 57558
loss: 1.190485954284668,grad_norm: 0.9999999398164661, iteration: 57559
loss: 1.0093425512313843,grad_norm: 0.9174638665523223, iteration: 57560
loss: 1.0361642837524414,grad_norm: 0.999999148337941, iteration: 57561
loss: 0.9889428019523621,grad_norm: 0.8390613351436782, iteration: 57562
loss: 1.044318437576294,grad_norm: 0.9999994744768008, iteration: 57563
loss: 1.0291310548782349,grad_norm: 0.9142235356030998, iteration: 57564
loss: 0.9662619233131409,grad_norm: 0.9999990352332979, iteration: 57565
loss: 1.0022515058517456,grad_norm: 0.9492842552105215, iteration: 57566
loss: 1.1002570390701294,grad_norm: 0.9999998915487838, iteration: 57567
loss: 0.9830904006958008,grad_norm: 0.8816278489609348, iteration: 57568
loss: 1.0105682611465454,grad_norm: 0.9999992623138327, iteration: 57569
loss: 1.0267140865325928,grad_norm: 0.9014752255521709, iteration: 57570
loss: 0.9819410443305969,grad_norm: 0.999999269502987, iteration: 57571
loss: 0.9750014543533325,grad_norm: 0.8358280374645634, iteration: 57572
loss: 1.0154787302017212,grad_norm: 0.8277623636856805, iteration: 57573
loss: 0.9990247488021851,grad_norm: 0.9157286316928714, iteration: 57574
loss: 0.9539420008659363,grad_norm: 0.9999989975676583, iteration: 57575
loss: 1.0138990879058838,grad_norm: 0.9999991267670542, iteration: 57576
loss: 0.9822860956192017,grad_norm: 0.9234087391680434, iteration: 57577
loss: 0.9761514663696289,grad_norm: 0.8047417406567202, iteration: 57578
loss: 0.9988383650779724,grad_norm: 0.841858631198395, iteration: 57579
loss: 0.9902802109718323,grad_norm: 0.999999252243804, iteration: 57580
loss: 1.015299916267395,grad_norm: 0.9999990903996643, iteration: 57581
loss: 0.9946293830871582,grad_norm: 0.8951128505413022, iteration: 57582
loss: 0.9842486381530762,grad_norm: 0.9295857962817246, iteration: 57583
loss: 1.003662347793579,grad_norm: 0.9999992732674243, iteration: 57584
loss: 1.0185908079147339,grad_norm: 0.9382932945359862, iteration: 57585
loss: 1.0077768564224243,grad_norm: 0.9999989830696429, iteration: 57586
loss: 1.0157908201217651,grad_norm: 0.9999997828399136, iteration: 57587
loss: 1.0214259624481201,grad_norm: 0.8382714254722073, iteration: 57588
loss: 1.0192674398422241,grad_norm: 0.7591433360393092, iteration: 57589
loss: 1.0318448543548584,grad_norm: 0.7813624405180915, iteration: 57590
loss: 1.1239054203033447,grad_norm: 0.999999932981855, iteration: 57591
loss: 1.0033286809921265,grad_norm: 0.9999990601299898, iteration: 57592
loss: 1.0087966918945312,grad_norm: 0.9999990958164641, iteration: 57593
loss: 0.9890566468238831,grad_norm: 0.8275056442208679, iteration: 57594
loss: 1.0687968730926514,grad_norm: 0.9999989736264089, iteration: 57595
loss: 1.0232229232788086,grad_norm: 0.8321897774417306, iteration: 57596
loss: 0.9955727458000183,grad_norm: 0.8868736486233597, iteration: 57597
loss: 1.026212453842163,grad_norm: 0.9999990152649526, iteration: 57598
loss: 1.0272202491760254,grad_norm: 0.8979029009152448, iteration: 57599
loss: 0.9843919277191162,grad_norm: 0.948705706935807, iteration: 57600
loss: 0.9473140239715576,grad_norm: 0.9339422726480113, iteration: 57601
loss: 1.007380723953247,grad_norm: 0.9398785981626789, iteration: 57602
loss: 1.028452754020691,grad_norm: 0.8773568119865751, iteration: 57603
loss: 1.0234013795852661,grad_norm: 0.9999996586292196, iteration: 57604
loss: 0.9989964365959167,grad_norm: 0.8431838174148651, iteration: 57605
loss: 1.0158334970474243,grad_norm: 0.8036903264035461, iteration: 57606
loss: 1.0202202796936035,grad_norm: 0.9999991660961818, iteration: 57607
loss: 1.01999032497406,grad_norm: 0.7912027347119185, iteration: 57608
loss: 1.0165737867355347,grad_norm: 0.9867281835278953, iteration: 57609
loss: 1.0390063524246216,grad_norm: 0.9999992268480639, iteration: 57610
loss: 0.9883458614349365,grad_norm: 0.9151704708718155, iteration: 57611
loss: 1.0899617671966553,grad_norm: 0.9999995726626374, iteration: 57612
loss: 0.9979556202888489,grad_norm: 0.9999990902357633, iteration: 57613
loss: 1.010536551475525,grad_norm: 0.9237336164795678, iteration: 57614
loss: 0.9905443787574768,grad_norm: 0.9999996295243743, iteration: 57615
loss: 0.9857273101806641,grad_norm: 0.9999990347444667, iteration: 57616
loss: 1.0152950286865234,grad_norm: 0.9999996818823031, iteration: 57617
loss: 0.9890110492706299,grad_norm: 0.9999990891280538, iteration: 57618
loss: 1.0091267824172974,grad_norm: 0.9999991118280418, iteration: 57619
loss: 1.0226916074752808,grad_norm: 0.9999996496239526, iteration: 57620
loss: 1.004679799079895,grad_norm: 0.8534477115386468, iteration: 57621
loss: 1.0282189846038818,grad_norm: 0.9999996807461645, iteration: 57622
loss: 1.0032835006713867,grad_norm: 0.9999993860317673, iteration: 57623
loss: 1.045931100845337,grad_norm: 0.9999991025471745, iteration: 57624
loss: 0.984889566898346,grad_norm: 0.8871264206604181, iteration: 57625
loss: 1.1223520040512085,grad_norm: 0.9999997285354428, iteration: 57626
loss: 0.9960886836051941,grad_norm: 0.8378617913405247, iteration: 57627
loss: 1.0206855535507202,grad_norm: 0.9999994654711537, iteration: 57628
loss: 0.9824707508087158,grad_norm: 0.8319625016746219, iteration: 57629
loss: 0.9879274368286133,grad_norm: 0.9999992432942031, iteration: 57630
loss: 1.007117509841919,grad_norm: 0.9685115285576782, iteration: 57631
loss: 1.0053633451461792,grad_norm: 0.7810971362456668, iteration: 57632
loss: 1.042501449584961,grad_norm: 0.9897119123147006, iteration: 57633
loss: 1.0119211673736572,grad_norm: 0.9999990809190229, iteration: 57634
loss: 0.9837666153907776,grad_norm: 0.9240272675913379, iteration: 57635
loss: 0.9978452920913696,grad_norm: 0.999999013655445, iteration: 57636
loss: 1.0296530723571777,grad_norm: 0.8434443460995341, iteration: 57637
loss: 0.9981670379638672,grad_norm: 0.9251971609793955, iteration: 57638
loss: 1.0101184844970703,grad_norm: 0.9999995539446005, iteration: 57639
loss: 1.0027821063995361,grad_norm: 0.8635049454219409, iteration: 57640
loss: 1.0375384092330933,grad_norm: 0.8535820992989882, iteration: 57641
loss: 1.03168523311615,grad_norm: 0.9999997337343464, iteration: 57642
loss: 1.0052998065948486,grad_norm: 0.9999991792184109, iteration: 57643
loss: 0.99943608045578,grad_norm: 0.97573548304916, iteration: 57644
loss: 1.006314754486084,grad_norm: 0.9999994115129167, iteration: 57645
loss: 1.0044232606887817,grad_norm: 0.9999992288392371, iteration: 57646
loss: 0.9945784211158752,grad_norm: 0.9999991554025975, iteration: 57647
loss: 1.0039352178573608,grad_norm: 0.9092849239439149, iteration: 57648
loss: 1.0821987390518188,grad_norm: 0.9999997063991006, iteration: 57649
loss: 0.985664963722229,grad_norm: 0.9999992392157697, iteration: 57650
loss: 1.0204249620437622,grad_norm: 0.9729650968164167, iteration: 57651
loss: 1.001185417175293,grad_norm: 0.9999991775744623, iteration: 57652
loss: 1.0108948945999146,grad_norm: 0.9999991318876733, iteration: 57653
loss: 1.0205916166305542,grad_norm: 0.9999990260611015, iteration: 57654
loss: 0.9888418912887573,grad_norm: 0.9098716781285706, iteration: 57655
loss: 1.022571086883545,grad_norm: 0.9999989246081257, iteration: 57656
loss: 0.9899309873580933,grad_norm: 0.8259132944567369, iteration: 57657
loss: 1.018597960472107,grad_norm: 0.9999996709854395, iteration: 57658
loss: 1.0200421810150146,grad_norm: 0.9999995625198641, iteration: 57659
loss: 1.0015467405319214,grad_norm: 0.8128268569720268, iteration: 57660
loss: 1.010080099105835,grad_norm: 0.8098172987350052, iteration: 57661
loss: 1.0651304721832275,grad_norm: 0.9999993425393598, iteration: 57662
loss: 1.011986494064331,grad_norm: 0.7817875719421201, iteration: 57663
loss: 0.9740031957626343,grad_norm: 0.9348694598892081, iteration: 57664
loss: 1.009689211845398,grad_norm: 0.7792870368871248, iteration: 57665
loss: 0.9810638427734375,grad_norm: 0.9999991880255538, iteration: 57666
loss: 1.0102710723876953,grad_norm: 0.725160400263978, iteration: 57667
loss: 1.2485909461975098,grad_norm: 0.9999991512190834, iteration: 57668
loss: 1.0521961450576782,grad_norm: 0.9999993997312361, iteration: 57669
loss: 0.9845668077468872,grad_norm: 0.8006708341475854, iteration: 57670
loss: 1.039128303527832,grad_norm: 0.9999997848577447, iteration: 57671
loss: 0.9923089742660522,grad_norm: 0.8865607772321997, iteration: 57672
loss: 0.9636180996894836,grad_norm: 0.9999990015504067, iteration: 57673
loss: 1.0156561136245728,grad_norm: 0.8554060993318392, iteration: 57674
loss: 1.0193730592727661,grad_norm: 0.9999998348557462, iteration: 57675
loss: 0.9431902170181274,grad_norm: 0.8657892655595983, iteration: 57676
loss: 1.0554351806640625,grad_norm: 0.9999993022642979, iteration: 57677
loss: 1.0029072761535645,grad_norm: 0.9999991069621411, iteration: 57678
loss: 1.0244920253753662,grad_norm: 0.9361530400455449, iteration: 57679
loss: 0.9746032357215881,grad_norm: 0.9999990923605984, iteration: 57680
loss: 1.031753659248352,grad_norm: 1.0000000126460922, iteration: 57681
loss: 1.2106215953826904,grad_norm: 0.9999998937836805, iteration: 57682
loss: 1.0135910511016846,grad_norm: 0.8449740985347542, iteration: 57683
loss: 1.0140981674194336,grad_norm: 0.9999991039656123, iteration: 57684
loss: 1.0303142070770264,grad_norm: 0.9999995071430368, iteration: 57685
loss: 1.0332190990447998,grad_norm: 0.9999991086437979, iteration: 57686
loss: 0.9919060468673706,grad_norm: 0.927455996779945, iteration: 57687
loss: 1.0192034244537354,grad_norm: 0.9999991414096809, iteration: 57688
loss: 1.0052387714385986,grad_norm: 0.9999992137677258, iteration: 57689
loss: 1.0019803047180176,grad_norm: 0.9999993129284485, iteration: 57690
loss: 1.0276728868484497,grad_norm: 0.9999992929420428, iteration: 57691
loss: 0.9762195348739624,grad_norm: 0.9999991139585712, iteration: 57692
loss: 0.9922101497650146,grad_norm: 0.8398436050190474, iteration: 57693
loss: 0.9820742011070251,grad_norm: 0.9999992482355106, iteration: 57694
loss: 1.0004905462265015,grad_norm: 0.9999989581871536, iteration: 57695
loss: 0.9917329549789429,grad_norm: 0.875196285932313, iteration: 57696
loss: 1.006803274154663,grad_norm: 0.9608511176887498, iteration: 57697
loss: 1.010667324066162,grad_norm: 0.9999992574921465, iteration: 57698
loss: 1.0234845876693726,grad_norm: 0.9999990903742946, iteration: 57699
loss: 1.0215853452682495,grad_norm: 0.9401461569476882, iteration: 57700
loss: 0.9973341226577759,grad_norm: 0.8920863873222697, iteration: 57701
loss: 0.9807182550430298,grad_norm: 0.9999990916326301, iteration: 57702
loss: 1.0254288911819458,grad_norm: 0.9915357618915606, iteration: 57703
loss: 0.9787482619285583,grad_norm: 0.9958861320024652, iteration: 57704
loss: 1.007503628730774,grad_norm: 0.9999991833985903, iteration: 57705
loss: 0.9887135028839111,grad_norm: 0.9999990610306106, iteration: 57706
loss: 0.9823771119117737,grad_norm: 0.91740157784164, iteration: 57707
loss: 0.9863284826278687,grad_norm: 0.8499474753418516, iteration: 57708
loss: 0.9846687912940979,grad_norm: 0.8692244571713525, iteration: 57709
loss: 1.0135165452957153,grad_norm: 0.8452016540671944, iteration: 57710
loss: 1.0112943649291992,grad_norm: 0.9999993927606583, iteration: 57711
loss: 1.0407171249389648,grad_norm: 0.9999989610597491, iteration: 57712
loss: 1.0191621780395508,grad_norm: 0.8347748965824172, iteration: 57713
loss: 0.9901387691497803,grad_norm: 0.9227356650097988, iteration: 57714
loss: 1.032776117324829,grad_norm: 0.964024357358238, iteration: 57715
loss: 1.0537681579589844,grad_norm: 0.9999998229935865, iteration: 57716
loss: 1.032169222831726,grad_norm: 0.9999990269094232, iteration: 57717
loss: 0.9864992499351501,grad_norm: 0.9999990760812124, iteration: 57718
loss: 1.0391372442245483,grad_norm: 0.9999995308585421, iteration: 57719
loss: 1.0344719886779785,grad_norm: 0.9999993345506392, iteration: 57720
loss: 1.0170025825500488,grad_norm: 0.9999990182100419, iteration: 57721
loss: 1.01131010055542,grad_norm: 0.9999990416527581, iteration: 57722
loss: 1.004797101020813,grad_norm: 0.8604378911530464, iteration: 57723
loss: 1.0185692310333252,grad_norm: 0.8916671998744707, iteration: 57724
loss: 1.019237756729126,grad_norm: 0.9999993829415085, iteration: 57725
loss: 1.095649003982544,grad_norm: 0.8246202648718095, iteration: 57726
loss: 0.9994821548461914,grad_norm: 0.7985513688830238, iteration: 57727
loss: 1.0182082653045654,grad_norm: 0.8198256743025071, iteration: 57728
loss: 0.9793910384178162,grad_norm: 0.9191917747123699, iteration: 57729
loss: 1.013433575630188,grad_norm: 0.9999993505152014, iteration: 57730
loss: 1.0479233264923096,grad_norm: 0.9999992470736361, iteration: 57731
loss: 0.9971556663513184,grad_norm: 0.9553914542087699, iteration: 57732
loss: 1.0186842679977417,grad_norm: 0.9002635229677353, iteration: 57733
loss: 1.003318190574646,grad_norm: 0.9529834210159901, iteration: 57734
loss: 1.0064250230789185,grad_norm: 0.9007117637262393, iteration: 57735
loss: 1.0333647727966309,grad_norm: 0.9666769092627062, iteration: 57736
loss: 1.0185710191726685,grad_norm: 0.9999990143968248, iteration: 57737
loss: 0.961784839630127,grad_norm: 0.8056336177041041, iteration: 57738
loss: 1.008480191230774,grad_norm: 0.9999991098321879, iteration: 57739
loss: 1.0152063369750977,grad_norm: 0.896948350527158, iteration: 57740
loss: 1.0206358432769775,grad_norm: 0.9999996695532157, iteration: 57741
loss: 1.015609860420227,grad_norm: 0.9999990201953917, iteration: 57742
loss: 1.004722237586975,grad_norm: 0.9099957745256314, iteration: 57743
loss: 1.046570062637329,grad_norm: 0.9999991976516258, iteration: 57744
loss: 1.0205241441726685,grad_norm: 0.9684116044181014, iteration: 57745
loss: 1.0384454727172852,grad_norm: 0.9669530011399138, iteration: 57746
loss: 1.1059913635253906,grad_norm: 0.9999994602880342, iteration: 57747
loss: 1.114936351776123,grad_norm: 0.999999584793686, iteration: 57748
loss: 1.1169867515563965,grad_norm: 0.9999990900122316, iteration: 57749
loss: 0.9910560250282288,grad_norm: 0.8665438695707092, iteration: 57750
loss: 1.0466474294662476,grad_norm: 0.999999991796016, iteration: 57751
loss: 1.1544448137283325,grad_norm: 0.9999996194355482, iteration: 57752
loss: 1.1732653379440308,grad_norm: 0.9999991293479994, iteration: 57753
loss: 0.9682429432868958,grad_norm: 0.9999992927425713, iteration: 57754
loss: 1.0227606296539307,grad_norm: 0.9999997196961303, iteration: 57755
loss: 1.062461495399475,grad_norm: 0.9999991082309115, iteration: 57756
loss: 0.9540380835533142,grad_norm: 0.999999146339003, iteration: 57757
loss: 1.0109127759933472,grad_norm: 0.775407113564258, iteration: 57758
loss: 1.0051522254943848,grad_norm: 0.9999995977162567, iteration: 57759
loss: 0.9848742485046387,grad_norm: 0.916573436814648, iteration: 57760
loss: 1.0496636629104614,grad_norm: 0.9999996310862727, iteration: 57761
loss: 1.0520957708358765,grad_norm: 0.9999999287268979, iteration: 57762
loss: 1.0119071006774902,grad_norm: 0.9999989416678802, iteration: 57763
loss: 1.0136224031448364,grad_norm: 0.9999992929711605, iteration: 57764
loss: 1.0847032070159912,grad_norm: 0.9999996771197905, iteration: 57765
loss: 1.0162972211837769,grad_norm: 0.9999991860263717, iteration: 57766
loss: 1.0298892259597778,grad_norm: 0.9999993597297269, iteration: 57767
loss: 1.04991614818573,grad_norm: 0.9999996241462911, iteration: 57768
loss: 0.9724667072296143,grad_norm: 0.9999990072879477, iteration: 57769
loss: 1.0228161811828613,grad_norm: 0.9665941480370007, iteration: 57770
loss: 1.040623664855957,grad_norm: 0.9999991193800146, iteration: 57771
loss: 1.0166125297546387,grad_norm: 0.9999993408719908, iteration: 57772
loss: 1.1053102016448975,grad_norm: 0.9999998349948056, iteration: 57773
loss: 1.0323189496994019,grad_norm: 0.9999991335003241, iteration: 57774
loss: 1.0198330879211426,grad_norm: 0.8598200654335418, iteration: 57775
loss: 1.032020092010498,grad_norm: 0.8202764267300866, iteration: 57776
loss: 0.9895028471946716,grad_norm: 0.9999989641433703, iteration: 57777
loss: 1.0126259326934814,grad_norm: 0.858113886163074, iteration: 57778
loss: 1.1968576908111572,grad_norm: 0.999999485245994, iteration: 57779
loss: 1.1181150674819946,grad_norm: 0.9999994488141386, iteration: 57780
loss: 0.9891730546951294,grad_norm: 0.999999111393396, iteration: 57781
loss: 1.0229583978652954,grad_norm: 0.999999496307729, iteration: 57782
loss: 1.0496418476104736,grad_norm: 0.9999992520691834, iteration: 57783
loss: 1.193803310394287,grad_norm: 0.9999996900486376, iteration: 57784
loss: 1.0377814769744873,grad_norm: 0.9999992587014545, iteration: 57785
loss: 1.0072417259216309,grad_norm: 0.9999994808809904, iteration: 57786
loss: 1.0188087224960327,grad_norm: 0.8359812020659952, iteration: 57787
loss: 1.0226938724517822,grad_norm: 0.9143447089087396, iteration: 57788
loss: 1.0159599781036377,grad_norm: 0.9999992534432717, iteration: 57789
loss: 1.1601709127426147,grad_norm: 0.99999925898698, iteration: 57790
loss: 1.0600346326828003,grad_norm: 0.9999997365959745, iteration: 57791
loss: 1.0926487445831299,grad_norm: 0.9999991794558646, iteration: 57792
loss: 1.1011321544647217,grad_norm: 0.9999994219368746, iteration: 57793
loss: 0.9673715829849243,grad_norm: 0.8402868659067685, iteration: 57794
loss: 1.018809199333191,grad_norm: 0.9999999058275091, iteration: 57795
loss: 1.0156022310256958,grad_norm: 0.9999996862584859, iteration: 57796
loss: 1.2419724464416504,grad_norm: 0.9999997016856821, iteration: 57797
loss: 0.9521304368972778,grad_norm: 0.9999990940383868, iteration: 57798
loss: 1.0517098903656006,grad_norm: 0.9999991141928873, iteration: 57799
loss: 1.064532995223999,grad_norm: 0.9999996382830418, iteration: 57800
loss: 0.9796697497367859,grad_norm: 0.9048343555636005, iteration: 57801
loss: 0.9909593462944031,grad_norm: 0.9247978968993491, iteration: 57802
loss: 0.9821692705154419,grad_norm: 0.9999991849323105, iteration: 57803
loss: 1.1213330030441284,grad_norm: 0.9999998087278098, iteration: 57804
loss: 1.058396339416504,grad_norm: 0.9999996702745155, iteration: 57805
loss: 1.1080424785614014,grad_norm: 0.9999997841979058, iteration: 57806
loss: 1.013786792755127,grad_norm: 0.9999998809550018, iteration: 57807
loss: 1.0008436441421509,grad_norm: 0.999999134941374, iteration: 57808
loss: 1.1227525472640991,grad_norm: 0.9999994167015561, iteration: 57809
loss: 1.0034043788909912,grad_norm: 0.999999022114423, iteration: 57810
loss: 1.0256904363632202,grad_norm: 0.9999994677523953, iteration: 57811
loss: 0.9621946811676025,grad_norm: 0.9999990997934901, iteration: 57812
loss: 1.0294536352157593,grad_norm: 0.8240766144061564, iteration: 57813
loss: 0.986853837966919,grad_norm: 0.9999991210122782, iteration: 57814
loss: 0.999457061290741,grad_norm: 0.9273680810028138, iteration: 57815
loss: 0.994925856590271,grad_norm: 0.9999992075356017, iteration: 57816
loss: 1.000422716140747,grad_norm: 0.9870833882133476, iteration: 57817
loss: 1.037340521812439,grad_norm: 0.9999997927183935, iteration: 57818
loss: 1.1097582578659058,grad_norm: 0.9999999165124257, iteration: 57819
loss: 0.9683097004890442,grad_norm: 0.9594501249688961, iteration: 57820
loss: 1.0474963188171387,grad_norm: 0.9999992765997932, iteration: 57821
loss: 1.0282173156738281,grad_norm: 0.9703930225768608, iteration: 57822
loss: 1.0321420431137085,grad_norm: 0.9999995688577165, iteration: 57823
loss: 1.0185084342956543,grad_norm: 0.7990463525371203, iteration: 57824
loss: 1.1415497064590454,grad_norm: 0.999999271897142, iteration: 57825
loss: 1.0016669034957886,grad_norm: 0.8981816673571491, iteration: 57826
loss: 1.1678348779678345,grad_norm: 0.9999999809768826, iteration: 57827
loss: 1.2566051483154297,grad_norm: 0.9999999575183708, iteration: 57828
loss: 1.0020978450775146,grad_norm: 0.9999991844938471, iteration: 57829
loss: 0.9957985877990723,grad_norm: 0.9806345765123751, iteration: 57830
loss: 1.0037214756011963,grad_norm: 0.981573479676712, iteration: 57831
loss: 1.031498670578003,grad_norm: 0.9999993311532909, iteration: 57832
loss: 1.0474841594696045,grad_norm: 0.9999992926839333, iteration: 57833
loss: 1.0157016515731812,grad_norm: 0.9999991111033811, iteration: 57834
loss: 1.070838212966919,grad_norm: 0.9999997945605765, iteration: 57835
loss: 0.9902046918869019,grad_norm: 0.8854754511895705, iteration: 57836
loss: 1.0869609117507935,grad_norm: 0.9999999390685667, iteration: 57837
loss: 0.9913251996040344,grad_norm: 0.9129206689362201, iteration: 57838
loss: 1.0736315250396729,grad_norm: 0.9999996728516873, iteration: 57839
loss: 1.0076429843902588,grad_norm: 0.9999992828566556, iteration: 57840
loss: 0.9835941791534424,grad_norm: 0.9762980000974327, iteration: 57841
loss: 1.0855553150177002,grad_norm: 0.9999995645508557, iteration: 57842
loss: 0.9755435585975647,grad_norm: 0.937943522538485, iteration: 57843
loss: 1.141552448272705,grad_norm: 0.9999998516541824, iteration: 57844
loss: 1.1020541191101074,grad_norm: 0.9999991493939676, iteration: 57845
loss: 0.9993910193443298,grad_norm: 0.9999996668164326, iteration: 57846
loss: 1.0893982648849487,grad_norm: 0.9999991133169172, iteration: 57847
loss: 1.147528052330017,grad_norm: 0.9999997281148929, iteration: 57848
loss: 0.9982022047042847,grad_norm: 0.9673556559982731, iteration: 57849
loss: 1.0034962892532349,grad_norm: 0.9999993199794243, iteration: 57850
loss: 1.034367322921753,grad_norm: 0.9999992979049248, iteration: 57851
loss: 1.101208209991455,grad_norm: 0.9999992579842304, iteration: 57852
loss: 1.1750128269195557,grad_norm: 0.9999999685358877, iteration: 57853
loss: 1.4642819166183472,grad_norm: 0.9999998262129668, iteration: 57854
loss: 1.0094722509384155,grad_norm: 0.999999282469074, iteration: 57855
loss: 1.0295820236206055,grad_norm: 0.999999743362458, iteration: 57856
loss: 1.0686701536178589,grad_norm: 0.9999993228358196, iteration: 57857
loss: 1.1696298122406006,grad_norm: 0.9999998972531841, iteration: 57858
loss: 1.1689211130142212,grad_norm: 0.999999498818818, iteration: 57859
loss: 1.0093380212783813,grad_norm: 0.9999991381173262, iteration: 57860
loss: 1.4444572925567627,grad_norm: 0.9999999847359254, iteration: 57861
loss: 1.6034680604934692,grad_norm: 0.9999998801800358, iteration: 57862
loss: 1.1866455078125,grad_norm: 0.9999996449112073, iteration: 57863
loss: 1.1128932237625122,grad_norm: 0.9999994374629652, iteration: 57864
loss: 1.1768052577972412,grad_norm: 0.9999997252971394, iteration: 57865
loss: 1.4418535232543945,grad_norm: 0.9999996161707859, iteration: 57866
loss: 1.4374792575836182,grad_norm: 0.99999989831249, iteration: 57867
loss: 1.2827317714691162,grad_norm: 0.9999997620249937, iteration: 57868
loss: 1.2583856582641602,grad_norm: 0.9999996199759564, iteration: 57869
loss: 1.3911291360855103,grad_norm: 0.9999998030748278, iteration: 57870
loss: 1.7287962436676025,grad_norm: 0.999999724691368, iteration: 57871
loss: 1.576572060585022,grad_norm: 0.999999762596441, iteration: 57872
loss: 1.6206845045089722,grad_norm: 0.9999999944831784, iteration: 57873
loss: 1.6411590576171875,grad_norm: 0.9999999652125605, iteration: 57874
loss: 1.6968919038772583,grad_norm: 1.000000138414646, iteration: 57875
loss: 1.5149480104446411,grad_norm: 0.9999998737098726, iteration: 57876
loss: 1.5846267938613892,grad_norm: 0.9999994529696775, iteration: 57877
loss: 1.1840324401855469,grad_norm: 0.9999996376745811, iteration: 57878
loss: 1.3497670888900757,grad_norm: 0.9999997203943287, iteration: 57879
loss: 1.6751618385314941,grad_norm: 0.9999999529446277, iteration: 57880
loss: 1.4156032800674438,grad_norm: 0.9999997252550449, iteration: 57881
loss: 1.2106285095214844,grad_norm: 0.9999998460537063, iteration: 57882
loss: 1.3300334215164185,grad_norm: 0.9999999259236153, iteration: 57883
loss: 1.434714436531067,grad_norm: 0.9999998768115762, iteration: 57884
loss: 1.2720032930374146,grad_norm: 0.9999997789149363, iteration: 57885
loss: 1.160101056098938,grad_norm: 0.9999995075382964, iteration: 57886
loss: 1.1372241973876953,grad_norm: 0.9999997136949036, iteration: 57887
loss: 1.2960697412490845,grad_norm: 0.9999998129028174, iteration: 57888
loss: 1.0660558938980103,grad_norm: 0.9999995087442443, iteration: 57889
loss: 1.2230898141860962,grad_norm: 0.9999996912049873, iteration: 57890
loss: 1.128190279006958,grad_norm: 0.99999972540827, iteration: 57891
loss: 1.1682332754135132,grad_norm: 0.9999995081231706, iteration: 57892
loss: 1.0904849767684937,grad_norm: 0.9999996791847959, iteration: 57893
loss: 1.2991540431976318,grad_norm: 0.9999994903572303, iteration: 57894
loss: 1.1119259595870972,grad_norm: 0.9999995296520875, iteration: 57895
loss: 1.0721869468688965,grad_norm: 0.9999995024443198, iteration: 57896
loss: 1.2028145790100098,grad_norm: 0.999999379682522, iteration: 57897
loss: 1.025997519493103,grad_norm: 0.9999991058852109, iteration: 57898
loss: 1.003361701965332,grad_norm: 0.9999993620044244, iteration: 57899
loss: 1.2147852182388306,grad_norm: 0.999999982372713, iteration: 57900
loss: 1.1667670011520386,grad_norm: 0.9999994524455484, iteration: 57901
loss: 1.2789180278778076,grad_norm: 0.9999996752410234, iteration: 57902
loss: 1.0822763442993164,grad_norm: 0.9999993510564936, iteration: 57903
loss: 1.2293720245361328,grad_norm: 0.9999998746428552, iteration: 57904
loss: 1.047608733177185,grad_norm: 0.9999994344867914, iteration: 57905
loss: 1.034179449081421,grad_norm: 0.9999992227866918, iteration: 57906
loss: 1.0746601819992065,grad_norm: 0.9999993653166156, iteration: 57907
loss: 1.0843360424041748,grad_norm: 0.9999991519239353, iteration: 57908
loss: 1.0182955265045166,grad_norm: 0.9999992437851803, iteration: 57909
loss: 1.0589367151260376,grad_norm: 0.9999995023164477, iteration: 57910
loss: 1.065314531326294,grad_norm: 0.999999277739909, iteration: 57911
loss: 1.094495177268982,grad_norm: 0.9999997981642306, iteration: 57912
loss: 1.160015344619751,grad_norm: 0.9999995301170365, iteration: 57913
loss: 1.0102667808532715,grad_norm: 0.9898305324128261, iteration: 57914
loss: 1.0336350202560425,grad_norm: 1.0000000801612303, iteration: 57915
loss: 1.0366721153259277,grad_norm: 0.9999991399934046, iteration: 57916
loss: 1.1469357013702393,grad_norm: 0.9999996857501947, iteration: 57917
loss: 1.099399447441101,grad_norm: 0.9999996356484788, iteration: 57918
loss: 1.0808804035186768,grad_norm: 0.9999990733213548, iteration: 57919
loss: 1.0964787006378174,grad_norm: 0.9999994175787051, iteration: 57920
loss: 1.1075598001480103,grad_norm: 0.999999863532068, iteration: 57921
loss: 1.1944448947906494,grad_norm: 0.9999996266503542, iteration: 57922
loss: 1.0254168510437012,grad_norm: 0.9999998634713541, iteration: 57923
loss: 1.0562067031860352,grad_norm: 0.9999993090685139, iteration: 57924
loss: 1.1481519937515259,grad_norm: 0.9999997759782477, iteration: 57925
loss: 1.2655267715454102,grad_norm: 0.9999995068030126, iteration: 57926
loss: 1.0923247337341309,grad_norm: 0.9999998622395747, iteration: 57927
loss: 1.2537314891815186,grad_norm: 0.9999997677769851, iteration: 57928
loss: 1.1548347473144531,grad_norm: 0.9999997633095645, iteration: 57929
loss: 1.0521035194396973,grad_norm: 0.9999996882174232, iteration: 57930
loss: 1.061244249343872,grad_norm: 0.999999237993914, iteration: 57931
loss: 1.1102573871612549,grad_norm: 0.9999992741962419, iteration: 57932
loss: 1.0818647146224976,grad_norm: 0.9999997574818607, iteration: 57933
loss: 1.0582953691482544,grad_norm: 0.9999994264759159, iteration: 57934
loss: 1.096315860748291,grad_norm: 0.9999994636084387, iteration: 57935
loss: 1.0973869562149048,grad_norm: 0.999999664559432, iteration: 57936
loss: 1.09859037399292,grad_norm: 0.9999993785341051, iteration: 57937
loss: 1.0771592855453491,grad_norm: 0.9999995218597477, iteration: 57938
loss: 1.1029822826385498,grad_norm: 0.9999996130734472, iteration: 57939
loss: 1.0304665565490723,grad_norm: 0.9814248583321253, iteration: 57940
loss: 1.0999808311462402,grad_norm: 0.9999993036738603, iteration: 57941
loss: 1.0632977485656738,grad_norm: 0.9999992037035859, iteration: 57942
loss: 1.1159782409667969,grad_norm: 0.9999995468617257, iteration: 57943
loss: 1.0553395748138428,grad_norm: 0.9827896065086194, iteration: 57944
loss: 1.179685354232788,grad_norm: 0.9999995261350496, iteration: 57945
loss: 1.0777968168258667,grad_norm: 0.9999992267643105, iteration: 57946
loss: 1.0724766254425049,grad_norm: 0.9999994112411601, iteration: 57947
loss: 1.176639199256897,grad_norm: 0.9999995242746168, iteration: 57948
loss: 1.0503658056259155,grad_norm: 0.9999991829871189, iteration: 57949
loss: 1.2289851903915405,grad_norm: 0.9999994246948671, iteration: 57950
loss: 1.068052053451538,grad_norm: 0.9999991446498933, iteration: 57951
loss: 1.1138558387756348,grad_norm: 0.9999995814054502, iteration: 57952
loss: 1.0320497751235962,grad_norm: 0.9999997351959761, iteration: 57953
loss: 1.0749553442001343,grad_norm: 0.9999991133994022, iteration: 57954
loss: 1.2261055707931519,grad_norm: 0.9999995013705535, iteration: 57955
loss: 1.216351866722107,grad_norm: 0.9999997639626176, iteration: 57956
loss: 1.0148077011108398,grad_norm: 0.99999948768188, iteration: 57957
loss: 1.1564509868621826,grad_norm: 0.9999997660243933, iteration: 57958
loss: 1.0769867897033691,grad_norm: 0.9999991694931538, iteration: 57959
loss: 1.0946892499923706,grad_norm: 0.9999994835770991, iteration: 57960
loss: 1.0953021049499512,grad_norm: 0.999999330056901, iteration: 57961
loss: 1.0493550300598145,grad_norm: 0.9999993419024896, iteration: 57962
loss: 1.033986210823059,grad_norm: 0.930034625757786, iteration: 57963
loss: 1.0019689798355103,grad_norm: 0.9999992221665881, iteration: 57964
loss: 1.1777358055114746,grad_norm: 0.9999998940681836, iteration: 57965
loss: 1.032027006149292,grad_norm: 0.9999993007997322, iteration: 57966
loss: 1.0887787342071533,grad_norm: 0.999999288861233, iteration: 57967
loss: 1.2990411520004272,grad_norm: 0.999999659353426, iteration: 57968
loss: 1.0940980911254883,grad_norm: 0.9999993509813917, iteration: 57969
loss: 1.0201411247253418,grad_norm: 0.9999993015591313, iteration: 57970
loss: 1.0671732425689697,grad_norm: 0.9999993235156485, iteration: 57971
loss: 1.019713282585144,grad_norm: 0.9999990955235516, iteration: 57972
loss: 1.0500973463058472,grad_norm: 0.9999990246087765, iteration: 57973
loss: 1.1123121976852417,grad_norm: 0.9999993122121643, iteration: 57974
loss: 1.0975145101547241,grad_norm: 0.9999994973907987, iteration: 57975
loss: 1.020349383354187,grad_norm: 0.9999992047328486, iteration: 57976
loss: 1.0677443742752075,grad_norm: 0.9522581846988075, iteration: 57977
loss: 1.1286813020706177,grad_norm: 0.9999994651509272, iteration: 57978
loss: 1.1563223600387573,grad_norm: 0.999999691067516, iteration: 57979
loss: 1.1316925287246704,grad_norm: 0.9999997519092576, iteration: 57980
loss: 1.33114755153656,grad_norm: 0.9999998574757907, iteration: 57981
loss: 1.0532914400100708,grad_norm: 0.9999990028387977, iteration: 57982
loss: 1.134797215461731,grad_norm: 0.9999994765714514, iteration: 57983
loss: 1.154624342918396,grad_norm: 0.9999994958333629, iteration: 57984
loss: 1.057680368423462,grad_norm: 0.9999992900291517, iteration: 57985
loss: 1.1014480590820312,grad_norm: 0.9999993580756649, iteration: 57986
loss: 1.1504384279251099,grad_norm: 0.9999997207619009, iteration: 57987
loss: 1.187785267829895,grad_norm: 0.9999995511579627, iteration: 57988
loss: 1.0331768989562988,grad_norm: 0.9999992521278211, iteration: 57989
loss: 1.113119125366211,grad_norm: 0.9999997027159644, iteration: 57990
loss: 1.1244938373565674,grad_norm: 0.9999994387216169, iteration: 57991
loss: 1.059249758720398,grad_norm: 0.9999994741774937, iteration: 57992
loss: 1.0827702283859253,grad_norm: 0.9999992248862565, iteration: 57993
loss: 1.1775470972061157,grad_norm: 0.99999943042279, iteration: 57994
loss: 1.164046049118042,grad_norm: 0.9999995163625764, iteration: 57995
loss: 1.051647663116455,grad_norm: 0.9999992512933852, iteration: 57996
loss: 1.1120827198028564,grad_norm: 0.9999997717097396, iteration: 57997
loss: 1.116112470626831,grad_norm: 0.9999995988857265, iteration: 57998
loss: 1.1872507333755493,grad_norm: 0.9999996686983286, iteration: 57999
loss: 1.0938814878463745,grad_norm: 0.9999992698038663, iteration: 58000
loss: 1.1636139154434204,grad_norm: 0.999999306820588, iteration: 58001
loss: 0.9718697667121887,grad_norm: 0.99999926249551, iteration: 58002
loss: 1.0705655813217163,grad_norm: 0.999999261846458, iteration: 58003
loss: 1.0498887300491333,grad_norm: 0.9999998505270191, iteration: 58004
loss: 1.018815279006958,grad_norm: 0.9999993098878976, iteration: 58005
loss: 1.1004828214645386,grad_norm: 0.9999992339641048, iteration: 58006
loss: 1.1812055110931396,grad_norm: 0.9999990403572323, iteration: 58007
loss: 1.1349499225616455,grad_norm: 0.9999993784704051, iteration: 58008
loss: 1.062308430671692,grad_norm: 0.9999991915475716, iteration: 58009
loss: 1.061781883239746,grad_norm: 0.9999990159966266, iteration: 58010
loss: 1.1232407093048096,grad_norm: 0.9999994103404312, iteration: 58011
loss: 1.0243638753890991,grad_norm: 0.9999991398034044, iteration: 58012
loss: 1.080632209777832,grad_norm: 0.9999992412135538, iteration: 58013
loss: 1.1667811870574951,grad_norm: 0.9999993949841649, iteration: 58014
loss: 1.064835786819458,grad_norm: 0.9999995473579782, iteration: 58015
loss: 1.0408991575241089,grad_norm: 0.999999536657036, iteration: 58016
loss: 1.0424551963806152,grad_norm: 0.9999992717223066, iteration: 58017
loss: 1.0494256019592285,grad_norm: 0.9999990696649413, iteration: 58018
loss: 1.0226725339889526,grad_norm: 0.9999996563503721, iteration: 58019
loss: 1.0686980485916138,grad_norm: 0.9999998951569099, iteration: 58020
loss: 1.0132086277008057,grad_norm: 0.9999991097254117, iteration: 58021
loss: 1.0125396251678467,grad_norm: 0.9999997351432209, iteration: 58022
loss: 1.029051423072815,grad_norm: 0.939721397748828, iteration: 58023
loss: 1.036048412322998,grad_norm: 0.8098778571875321, iteration: 58024
loss: 1.12211012840271,grad_norm: 0.9999993989658383, iteration: 58025
loss: 1.0526658296585083,grad_norm: 0.9999991098653457, iteration: 58026
loss: 1.0190823078155518,grad_norm: 0.9582826793393912, iteration: 58027
loss: 1.0368852615356445,grad_norm: 0.9999992077304765, iteration: 58028
loss: 1.0921502113342285,grad_norm: 0.9999994108093048, iteration: 58029
loss: 1.1426875591278076,grad_norm: 0.9999993525843928, iteration: 58030
loss: 1.063336730003357,grad_norm: 0.9999995479131784, iteration: 58031
loss: 1.2085920572280884,grad_norm: 0.9999993144328971, iteration: 58032
loss: 0.9911410808563232,grad_norm: 0.9819008567338546, iteration: 58033
loss: 1.1467528343200684,grad_norm: 0.9999990372801857, iteration: 58034
loss: 1.1285345554351807,grad_norm: 0.9999990896946706, iteration: 58035
loss: 1.0848093032836914,grad_norm: 0.9999994489099078, iteration: 58036
loss: 1.0731046199798584,grad_norm: 0.9999993171214978, iteration: 58037
loss: 1.1418366432189941,grad_norm: 0.9999995561422184, iteration: 58038
loss: 1.035508155822754,grad_norm: 0.9999998105779387, iteration: 58039
loss: 1.0479233264923096,grad_norm: 0.9795349412465809, iteration: 58040
loss: 1.0251187086105347,grad_norm: 0.9999993390284725, iteration: 58041
loss: 1.0116581916809082,grad_norm: 0.9999992015592938, iteration: 58042
loss: 1.0366793870925903,grad_norm: 0.9999994452602045, iteration: 58043
loss: 1.123692512512207,grad_norm: 0.9999996849040986, iteration: 58044
loss: 1.0409811735153198,grad_norm: 0.9999994317016468, iteration: 58045
loss: 1.1083753108978271,grad_norm: 0.9999996419069828, iteration: 58046
loss: 1.0864641666412354,grad_norm: 0.9999992837802743, iteration: 58047
loss: 1.0481836795806885,grad_norm: 0.9999991970668126, iteration: 58048
loss: 0.994077742099762,grad_norm: 0.9999991149453185, iteration: 58049
loss: 1.018110990524292,grad_norm: 0.9999991021378976, iteration: 58050
loss: 1.0452580451965332,grad_norm: 0.9999992793394121, iteration: 58051
loss: 1.0424983501434326,grad_norm: 0.9686059320130428, iteration: 58052
loss: 1.0442506074905396,grad_norm: 0.9999993988526409, iteration: 58053
loss: 1.017331838607788,grad_norm: 0.9999992685329431, iteration: 58054
loss: 1.043067455291748,grad_norm: 0.999999065882973, iteration: 58055
loss: 1.0186982154846191,grad_norm: 0.9999990914241131, iteration: 58056
loss: 1.0917797088623047,grad_norm: 0.9999993492960669, iteration: 58057
loss: 1.0325829982757568,grad_norm: 0.9999993375762105, iteration: 58058
loss: 1.0733891725540161,grad_norm: 0.9999996275851946, iteration: 58059
loss: 1.0184803009033203,grad_norm: 0.9765607233050594, iteration: 58060
loss: 1.1912747621536255,grad_norm: 0.9999995208986904, iteration: 58061
loss: 1.0226985216140747,grad_norm: 0.9999988586899734, iteration: 58062
loss: 1.112969160079956,grad_norm: 0.999999436767548, iteration: 58063
loss: 0.997188925743103,grad_norm: 0.9999991626629796, iteration: 58064
loss: 1.0148792266845703,grad_norm: 0.9123157927574588, iteration: 58065
loss: 1.0114598274230957,grad_norm: 0.9012159637350714, iteration: 58066
loss: 0.964598536491394,grad_norm: 0.974012155137423, iteration: 58067
loss: 1.1634411811828613,grad_norm: 0.999999925807825, iteration: 58068
loss: 1.0899968147277832,grad_norm: 0.9999995933693442, iteration: 58069
loss: 1.0574265718460083,grad_norm: 0.9999995176780108, iteration: 58070
loss: 1.1500396728515625,grad_norm: 0.9999998463563088, iteration: 58071
loss: 1.0126932859420776,grad_norm: 0.9999993095228725, iteration: 58072
loss: 1.109419822692871,grad_norm: 0.9999992425575175, iteration: 58073
loss: 1.0717421770095825,grad_norm: 0.9999990348277628, iteration: 58074
loss: 1.154127836227417,grad_norm: 0.9999999803916348, iteration: 58075
loss: 1.154314637184143,grad_norm: 0.9999992824506617, iteration: 58076
loss: 1.0700812339782715,grad_norm: 0.9999993637575578, iteration: 58077
loss: 0.9986827373504639,grad_norm: 0.9999991991384737, iteration: 58078
loss: 1.0864142179489136,grad_norm: 0.9999999768100925, iteration: 58079
loss: 0.9975633025169373,grad_norm: 0.9999998682712481, iteration: 58080
loss: 1.0418859720230103,grad_norm: 0.9999993904211307, iteration: 58081
loss: 1.0302270650863647,grad_norm: 0.9999992798280757, iteration: 58082
loss: 1.0548648834228516,grad_norm: 0.9999992227804392, iteration: 58083
loss: 1.184471845626831,grad_norm: 0.9999999607003837, iteration: 58084
loss: 1.0212512016296387,grad_norm: 0.99999981906005, iteration: 58085
loss: 0.9953027963638306,grad_norm: 0.7829322472481153, iteration: 58086
loss: 1.052071213722229,grad_norm: 0.9999996718724953, iteration: 58087
loss: 1.109442114830017,grad_norm: 0.9999997173616645, iteration: 58088
loss: 1.0057615041732788,grad_norm: 0.9999994729756837, iteration: 58089
loss: 1.1006826162338257,grad_norm: 0.9999997977309827, iteration: 58090
loss: 1.0228769779205322,grad_norm: 0.9795881392583373, iteration: 58091
loss: 0.9950336217880249,grad_norm: 0.9999989835189418, iteration: 58092
loss: 1.062554121017456,grad_norm: 0.9999998496489891, iteration: 58093
loss: 1.0783140659332275,grad_norm: 0.9999992564018013, iteration: 58094
loss: 1.1988791227340698,grad_norm: 0.9999997293387884, iteration: 58095
loss: 1.0495144128799438,grad_norm: 0.9999997670156773, iteration: 58096
loss: 1.314543604850769,grad_norm: 0.9999995528424872, iteration: 58097
loss: 1.0845824480056763,grad_norm: 0.9999991667532836, iteration: 58098
loss: 1.022962212562561,grad_norm: 0.9999990788445934, iteration: 58099
loss: 1.0291475057601929,grad_norm: 0.9999992505297679, iteration: 58100
loss: 1.0439610481262207,grad_norm: 0.9999995890161405, iteration: 58101
loss: 1.1025521755218506,grad_norm: 0.9999992827375445, iteration: 58102
loss: 1.0388484001159668,grad_norm: 0.9999992949996261, iteration: 58103
loss: 1.037118911743164,grad_norm: 0.9286400477718223, iteration: 58104
loss: 1.1546010971069336,grad_norm: 0.9999998626625768, iteration: 58105
loss: 1.1404688358306885,grad_norm: 0.999999567843988, iteration: 58106
loss: 1.185632586479187,grad_norm: 0.9999996158820222, iteration: 58107
loss: 1.0421898365020752,grad_norm: 0.9900984310884556, iteration: 58108
loss: 1.1331387758255005,grad_norm: 0.9999997740440426, iteration: 58109
loss: 1.0204846858978271,grad_norm: 0.9999991738833746, iteration: 58110
loss: 0.9928122758865356,grad_norm: 0.9001119180863685, iteration: 58111
loss: 1.0880714654922485,grad_norm: 0.9999996897741087, iteration: 58112
loss: 1.018662452697754,grad_norm: 0.9999992067671838, iteration: 58113
loss: 1.0560758113861084,grad_norm: 0.9999996611672606, iteration: 58114
loss: 1.059364676475525,grad_norm: 0.9827923253550078, iteration: 58115
loss: 1.1222045421600342,grad_norm: 0.9999992180323575, iteration: 58116
loss: 1.0221402645111084,grad_norm: 0.9862854949272013, iteration: 58117
loss: 1.1342387199401855,grad_norm: 0.9999995486366063, iteration: 58118
loss: 1.1318329572677612,grad_norm: 0.9999997989487143, iteration: 58119
loss: 1.0373599529266357,grad_norm: 0.9999992567714492, iteration: 58120
loss: 1.028579592704773,grad_norm: 0.9999992320464608, iteration: 58121
loss: 1.0063101053237915,grad_norm: 0.99999936415686, iteration: 58122
loss: 1.0750503540039062,grad_norm: 0.999999565978071, iteration: 58123
loss: 1.0958189964294434,grad_norm: 0.9999997272016329, iteration: 58124
loss: 1.0237311124801636,grad_norm: 0.8951186287503734, iteration: 58125
loss: 1.049037218093872,grad_norm: 0.999999274012582, iteration: 58126
loss: 1.076033353805542,grad_norm: 0.9999991887601851, iteration: 58127
loss: 1.033421516418457,grad_norm: 0.9999994080044124, iteration: 58128
loss: 1.041892409324646,grad_norm: 0.9999998182046622, iteration: 58129
loss: 0.9917465448379517,grad_norm: 0.8724200550746011, iteration: 58130
loss: 1.0240198373794556,grad_norm: 0.9999998360284512, iteration: 58131
loss: 1.1029841899871826,grad_norm: 0.9999997692524052, iteration: 58132
loss: 1.0802078247070312,grad_norm: 0.9999994065937268, iteration: 58133
loss: 1.021680474281311,grad_norm: 0.9999997759042776, iteration: 58134
loss: 0.9876692891120911,grad_norm: 0.9999998393814625, iteration: 58135
loss: 1.0161722898483276,grad_norm: 0.9772262569716699, iteration: 58136
loss: 1.0084880590438843,grad_norm: 0.9999994117811704, iteration: 58137
loss: 1.0403059720993042,grad_norm: 0.9999995234589294, iteration: 58138
loss: 1.0801712274551392,grad_norm: 0.9999995310875143, iteration: 58139
loss: 1.053242564201355,grad_norm: 0.9999995095171283, iteration: 58140
loss: 1.0042163133621216,grad_norm: 0.9999995605341199, iteration: 58141
loss: 1.0568007230758667,grad_norm: 0.9999996828375435, iteration: 58142
loss: 1.0296602249145508,grad_norm: 0.9741386813773255, iteration: 58143
loss: 1.0180209875106812,grad_norm: 0.9999991528202022, iteration: 58144
loss: 1.0321651697158813,grad_norm: 0.9999994734335234, iteration: 58145
loss: 1.015029788017273,grad_norm: 0.8752404873208567, iteration: 58146
loss: 1.0156306028366089,grad_norm: 0.9999997219715454, iteration: 58147
loss: 1.0846980810165405,grad_norm: 0.9373575546403102, iteration: 58148
loss: 0.9894228577613831,grad_norm: 0.9999998843724782, iteration: 58149
loss: 1.0410794019699097,grad_norm: 0.9999990782460633, iteration: 58150
loss: 1.0600636005401611,grad_norm: 0.9999999813999932, iteration: 58151
loss: 1.0154414176940918,grad_norm: 0.9999998501983243, iteration: 58152
loss: 0.9883979558944702,grad_norm: 0.9999993801640163, iteration: 58153
loss: 1.0271146297454834,grad_norm: 0.9999991564699283, iteration: 58154
loss: 1.044512152671814,grad_norm: 0.9999994130577773, iteration: 58155
loss: 0.981148898601532,grad_norm: 0.8880113334069727, iteration: 58156
loss: 1.0679548978805542,grad_norm: 0.9846118998354442, iteration: 58157
loss: 1.050775170326233,grad_norm: 0.9999993440838156, iteration: 58158
loss: 1.0401520729064941,grad_norm: 0.9999993867249934, iteration: 58159
loss: 0.9935804009437561,grad_norm: 0.9924050119396443, iteration: 58160
loss: 1.008904218673706,grad_norm: 0.9999990972753099, iteration: 58161
loss: 1.005805253982544,grad_norm: 0.9999997525479204, iteration: 58162
loss: 1.0274581909179688,grad_norm: 0.999999457688808, iteration: 58163
loss: 1.125205397605896,grad_norm: 0.9999993147139323, iteration: 58164
loss: 0.9901952147483826,grad_norm: 0.8157417918337644, iteration: 58165
loss: 0.9992730021476746,grad_norm: 0.9999990229735273, iteration: 58166
loss: 1.056330680847168,grad_norm: 0.8716554669332341, iteration: 58167
loss: 1.026853322982788,grad_norm: 0.9999990428378104, iteration: 58168
loss: 1.0399349927902222,grad_norm: 0.9999990411771694, iteration: 58169
loss: 1.0011199712753296,grad_norm: 0.9999993190552826, iteration: 58170
loss: 1.0247094631195068,grad_norm: 0.9162035307986469, iteration: 58171
loss: 0.986048698425293,grad_norm: 0.9690945554099516, iteration: 58172
loss: 1.0155550241470337,grad_norm: 0.9007303392915221, iteration: 58173
loss: 0.9760749340057373,grad_norm: 0.9754630730158566, iteration: 58174
loss: 1.0348155498504639,grad_norm: 0.999999256979034, iteration: 58175
loss: 0.996894896030426,grad_norm: 0.9239738738495153, iteration: 58176
loss: 1.008844256401062,grad_norm: 0.9999993616776995, iteration: 58177
loss: 1.0521336793899536,grad_norm: 0.9999996362163405, iteration: 58178
loss: 1.0172079801559448,grad_norm: 0.9879962971446, iteration: 58179
loss: 1.0238401889801025,grad_norm: 0.8295556183212203, iteration: 58180
loss: 1.030267357826233,grad_norm: 0.9999992704750758, iteration: 58181
loss: 0.9876682162284851,grad_norm: 0.8956882627820331, iteration: 58182
loss: 0.9873790740966797,grad_norm: 0.9999994576697948, iteration: 58183
loss: 1.0290964841842651,grad_norm: 0.9999990173535505, iteration: 58184
loss: 0.9941781163215637,grad_norm: 0.9398000419240474, iteration: 58185
loss: 1.0273032188415527,grad_norm: 0.9999991148338666, iteration: 58186
loss: 0.9504325985908508,grad_norm: 0.9221508089991177, iteration: 58187
loss: 1.0896517038345337,grad_norm: 0.9536448586090063, iteration: 58188
loss: 1.0377193689346313,grad_norm: 0.999999953064497, iteration: 58189
loss: 1.0304968357086182,grad_norm: 0.9999990780600684, iteration: 58190
loss: 0.9993996024131775,grad_norm: 0.999999990292942, iteration: 58191
loss: 0.9594661593437195,grad_norm: 0.9349312985204459, iteration: 58192
loss: 1.192569375038147,grad_norm: 0.9999999619753004, iteration: 58193
loss: 1.0481749773025513,grad_norm: 0.999999879117877, iteration: 58194
loss: 1.0154308080673218,grad_norm: 0.9999991611020458, iteration: 58195
loss: 1.0600017309188843,grad_norm: 0.9999992298242418, iteration: 58196
loss: 1.0517040491104126,grad_norm: 0.999998987225217, iteration: 58197
loss: 1.0496070384979248,grad_norm: 0.999999205421997, iteration: 58198
loss: 1.0652482509613037,grad_norm: 0.9223528339687255, iteration: 58199
loss: 1.1198768615722656,grad_norm: 0.9999994268098563, iteration: 58200
loss: 0.9719957113265991,grad_norm: 0.9092527842208233, iteration: 58201
loss: 1.1417322158813477,grad_norm: 0.9999996980181901, iteration: 58202
loss: 1.0115530490875244,grad_norm: 0.8716554724179737, iteration: 58203
loss: 0.9896653294563293,grad_norm: 0.8679559865854973, iteration: 58204
loss: 0.9954652190208435,grad_norm: 0.9458902575318944, iteration: 58205
loss: 0.9910029172897339,grad_norm: 0.9999990553929103, iteration: 58206
loss: 0.9822081327438354,grad_norm: 0.9999991281047964, iteration: 58207
loss: 0.9869541525840759,grad_norm: 0.8555565074421435, iteration: 58208
loss: 0.9552170634269714,grad_norm: 0.9999990339047008, iteration: 58209
loss: 1.0347604751586914,grad_norm: 0.9393852140011737, iteration: 58210
loss: 1.1152087450027466,grad_norm: 0.9999998161829975, iteration: 58211
loss: 1.0883755683898926,grad_norm: 0.9999993963163182, iteration: 58212
loss: 1.0553925037384033,grad_norm: 0.908535358650362, iteration: 58213
loss: 1.0871385335922241,grad_norm: 0.99999976714236, iteration: 58214
loss: 1.158905267715454,grad_norm: 0.9999998433560365, iteration: 58215
loss: 1.1697871685028076,grad_norm: 0.9999994235255573, iteration: 58216
loss: 1.0222194194793701,grad_norm: 0.9999990131944537, iteration: 58217
loss: 1.0462863445281982,grad_norm: 0.9999990363740188, iteration: 58218
loss: 0.9797640442848206,grad_norm: 0.9999992825411945, iteration: 58219
loss: 0.9995926022529602,grad_norm: 0.9827772172187604, iteration: 58220
loss: 0.9279977679252625,grad_norm: 0.9999989266342679, iteration: 58221
loss: 1.0529460906982422,grad_norm: 0.9765381959450355, iteration: 58222
loss: 1.1592071056365967,grad_norm: 0.9999996064382414, iteration: 58223
loss: 0.9611150026321411,grad_norm: 0.9999990809833683, iteration: 58224
loss: 0.9916746020317078,grad_norm: 0.8817316722412524, iteration: 58225
loss: 1.0095806121826172,grad_norm: 0.9999998189408814, iteration: 58226
loss: 1.0153101682662964,grad_norm: 0.9246524993782316, iteration: 58227
loss: 1.0552310943603516,grad_norm: 0.9999991014037467, iteration: 58228
loss: 1.0716139078140259,grad_norm: 0.9999991381076927, iteration: 58229
loss: 0.9778390526771545,grad_norm: 0.8835494994940621, iteration: 58230
loss: 1.0217931270599365,grad_norm: 0.999999259576673, iteration: 58231
loss: 1.0258702039718628,grad_norm: 0.9999999506435772, iteration: 58232
loss: 0.9934377670288086,grad_norm: 0.9999993523626455, iteration: 58233
loss: 1.0645884275436401,grad_norm: 0.9999992241448526, iteration: 58234
loss: 1.064595103263855,grad_norm: 0.9999995840253018, iteration: 58235
loss: 1.055914044380188,grad_norm: 0.9999990698291052, iteration: 58236
loss: 1.0078705549240112,grad_norm: 0.8206574585327885, iteration: 58237
loss: 1.0278308391571045,grad_norm: 0.8943261183648774, iteration: 58238
loss: 1.0408848524093628,grad_norm: 0.9999992903925901, iteration: 58239
loss: 1.0005587339401245,grad_norm: 0.9055292236696865, iteration: 58240
loss: 1.0093501806259155,grad_norm: 0.8690064571331161, iteration: 58241
loss: 1.0026780366897583,grad_norm: 0.9752214449026637, iteration: 58242
loss: 1.0989879369735718,grad_norm: 0.9999996510347903, iteration: 58243
loss: 1.0716521739959717,grad_norm: 0.999999262465026, iteration: 58244
loss: 0.9815590977668762,grad_norm: 0.999999110691737, iteration: 58245
loss: 1.061294436454773,grad_norm: 0.9999990777774244, iteration: 58246
loss: 1.0734654664993286,grad_norm: 0.9947941116124636, iteration: 58247
loss: 1.075368881225586,grad_norm: 0.9999993286563548, iteration: 58248
loss: 1.0644828081130981,grad_norm: 0.9999991509730436, iteration: 58249
loss: 1.006653904914856,grad_norm: 0.8866213690549768, iteration: 58250
loss: 1.1144747734069824,grad_norm: 0.9999999362701479, iteration: 58251
loss: 0.9995027780532837,grad_norm: 0.9999995089054318, iteration: 58252
loss: 0.999257504940033,grad_norm: 0.999999752698823, iteration: 58253
loss: 1.0006874799728394,grad_norm: 0.872912624754846, iteration: 58254
loss: 1.0198734998703003,grad_norm: 0.9999992867644939, iteration: 58255
loss: 1.0652989149093628,grad_norm: 0.9999990546470549, iteration: 58256
loss: 1.0487436056137085,grad_norm: 0.9999992612477135, iteration: 58257
loss: 1.1054925918579102,grad_norm: 0.9999998395334831, iteration: 58258
loss: 1.1700984239578247,grad_norm: 0.9999994470400968, iteration: 58259
loss: 1.031874418258667,grad_norm: 0.8593189985172136, iteration: 58260
loss: 1.094359278678894,grad_norm: 0.9999991198807804, iteration: 58261
loss: 0.9973613619804382,grad_norm: 0.9999377168392484, iteration: 58262
loss: 0.9999526143074036,grad_norm: 0.9999990978354935, iteration: 58263
loss: 0.9680320620536804,grad_norm: 0.9999992174997427, iteration: 58264
loss: 0.9789358973503113,grad_norm: 0.9925241471726037, iteration: 58265
loss: 0.9570389986038208,grad_norm: 0.999999169036349, iteration: 58266
loss: 1.0357552766799927,grad_norm: 0.9999992043854135, iteration: 58267
loss: 0.9808827638626099,grad_norm: 0.8466664446236974, iteration: 58268
loss: 1.0705338716506958,grad_norm: 0.9999994194508893, iteration: 58269
loss: 1.1189733743667603,grad_norm: 0.999999842051059, iteration: 58270
loss: 1.0879850387573242,grad_norm: 0.999999826946896, iteration: 58271
loss: 1.1503055095672607,grad_norm: 0.9999996720037788, iteration: 58272
loss: 1.0672290325164795,grad_norm: 0.9999994917939676, iteration: 58273
loss: 1.017471194267273,grad_norm: 0.9999997632939845, iteration: 58274
loss: 1.0039819478988647,grad_norm: 0.9296147820297307, iteration: 58275
loss: 1.0969274044036865,grad_norm: 0.9999999175208042, iteration: 58276
loss: 1.049201488494873,grad_norm: 0.9999999897063513, iteration: 58277
loss: 1.081906795501709,grad_norm: 0.9999993381595862, iteration: 58278
loss: 1.0323145389556885,grad_norm: 0.9999999129887708, iteration: 58279
loss: 1.0221120119094849,grad_norm: 0.9999995206887865, iteration: 58280
loss: 1.1601369380950928,grad_norm: 0.9999994516766906, iteration: 58281
loss: 1.0079281330108643,grad_norm: 0.9999991850400445, iteration: 58282
loss: 1.1029009819030762,grad_norm: 0.9999998860854632, iteration: 58283
loss: 1.0108444690704346,grad_norm: 0.96317756582999, iteration: 58284
loss: 1.0707135200500488,grad_norm: 0.9644862104749697, iteration: 58285
loss: 0.99082350730896,grad_norm: 0.9999991746554652, iteration: 58286
loss: 1.0219230651855469,grad_norm: 0.9999992977681837, iteration: 58287
loss: 0.9989762306213379,grad_norm: 0.7721111662975773, iteration: 58288
loss: 1.102855920791626,grad_norm: 0.9878384374130214, iteration: 58289
loss: 1.0408860445022583,grad_norm: 0.99999988832832, iteration: 58290
loss: 1.0255143642425537,grad_norm: 0.929683125210763, iteration: 58291
loss: 1.0112141370773315,grad_norm: 0.7945984230753143, iteration: 58292
loss: 1.0305397510528564,grad_norm: 0.9999995770378327, iteration: 58293
loss: 1.003594160079956,grad_norm: 0.9552317336473449, iteration: 58294
loss: 1.0509220361709595,grad_norm: 0.9999994315016271, iteration: 58295
loss: 1.0042922496795654,grad_norm: 0.999999046945252, iteration: 58296
loss: 1.0237468481063843,grad_norm: 0.9999991822656447, iteration: 58297
loss: 1.0071114301681519,grad_norm: 0.9999993532545662, iteration: 58298
loss: 1.0304005146026611,grad_norm: 0.9999992541842234, iteration: 58299
loss: 1.0820285081863403,grad_norm: 0.9999993998087212, iteration: 58300
loss: 1.007031798362732,grad_norm: 0.8196476931560873, iteration: 58301
loss: 1.0186630487442017,grad_norm: 0.9999991166868741, iteration: 58302
loss: 0.9721917510032654,grad_norm: 0.8962069000324302, iteration: 58303
loss: 0.990481972694397,grad_norm: 0.9999993818995548, iteration: 58304
loss: 1.006087303161621,grad_norm: 0.9999991967060494, iteration: 58305
loss: 1.0109976530075073,grad_norm: 0.9663434826616439, iteration: 58306
loss: 0.9924058318138123,grad_norm: 0.9249062832449861, iteration: 58307
loss: 1.004442811012268,grad_norm: 0.9999994370324549, iteration: 58308
loss: 1.0018984079360962,grad_norm: 0.9791179517827694, iteration: 58309
loss: 1.0786538124084473,grad_norm: 0.9999998561607716, iteration: 58310
loss: 0.9858245849609375,grad_norm: 0.9999989774128647, iteration: 58311
loss: 1.0398204326629639,grad_norm: 0.9999991714162684, iteration: 58312
loss: 1.019441843032837,grad_norm: 0.9999998893164669, iteration: 58313
loss: 0.9864402413368225,grad_norm: 0.999999289901992, iteration: 58314
loss: 1.070456624031067,grad_norm: 0.9999998618002423, iteration: 58315
loss: 1.1081583499908447,grad_norm: 0.9999998230849477, iteration: 58316
loss: 1.0125268697738647,grad_norm: 0.9397392360832346, iteration: 58317
loss: 1.064435601234436,grad_norm: 0.9999999039952433, iteration: 58318
loss: 1.0211700201034546,grad_norm: 0.8613098276745876, iteration: 58319
loss: 0.9810073375701904,grad_norm: 0.9999990387893926, iteration: 58320
loss: 0.9691832661628723,grad_norm: 0.9450459288073707, iteration: 58321
loss: 0.9846959114074707,grad_norm: 0.9999991011517886, iteration: 58322
loss: 1.0375313758850098,grad_norm: 0.9171791304391963, iteration: 58323
loss: 0.9392244219779968,grad_norm: 0.9999990789690242, iteration: 58324
loss: 1.0726078748703003,grad_norm: 0.9999999049564346, iteration: 58325
loss: 1.0775539875030518,grad_norm: 0.9999996221309769, iteration: 58326
loss: 1.1395556926727295,grad_norm: 0.9999991967029129, iteration: 58327
loss: 0.999660849571228,grad_norm: 0.9999991545035584, iteration: 58328
loss: 1.0023294687271118,grad_norm: 0.9999998891231032, iteration: 58329
loss: 1.011373519897461,grad_norm: 0.9999997544734575, iteration: 58330
loss: 1.0328967571258545,grad_norm: 0.9999990804828818, iteration: 58331
loss: 1.0062000751495361,grad_norm: 0.9999990480093089, iteration: 58332
loss: 1.007238507270813,grad_norm: 0.9999990147127704, iteration: 58333
loss: 1.0181182622909546,grad_norm: 0.9587354352914801, iteration: 58334
loss: 0.9785774350166321,grad_norm: 0.9654182878991701, iteration: 58335
loss: 1.0077811479568481,grad_norm: 0.9999991287087149, iteration: 58336
loss: 1.0733133554458618,grad_norm: 0.9999998673592125, iteration: 58337
loss: 1.0223697423934937,grad_norm: 0.9999994266235408, iteration: 58338
loss: 1.0036101341247559,grad_norm: 0.9559132897684842, iteration: 58339
loss: 0.9817695021629333,grad_norm: 0.7775503618290402, iteration: 58340
loss: 1.024681568145752,grad_norm: 0.9999991800810091, iteration: 58341
loss: 1.0046064853668213,grad_norm: 0.860892203277743, iteration: 58342
loss: 1.04297935962677,grad_norm: 0.9999991071035707, iteration: 58343
loss: 0.9734162092208862,grad_norm: 0.9999991237761846, iteration: 58344
loss: 0.9780306220054626,grad_norm: 0.9999992024186924, iteration: 58345
loss: 1.0431764125823975,grad_norm: 0.9999997683093603, iteration: 58346
loss: 1.0389693975448608,grad_norm: 0.9999991101298454, iteration: 58347
loss: 0.9940395951271057,grad_norm: 0.8875155624360953, iteration: 58348
loss: 1.0271238088607788,grad_norm: 0.9413241951723016, iteration: 58349
loss: 1.1047285795211792,grad_norm: 0.9999994356186395, iteration: 58350
loss: 0.9860407114028931,grad_norm: 0.9765430485853139, iteration: 58351
loss: 1.0342154502868652,grad_norm: 0.9751677181032531, iteration: 58352
loss: 1.0144689083099365,grad_norm: 0.9999989850098242, iteration: 58353
loss: 1.0043251514434814,grad_norm: 0.9999991529602394, iteration: 58354
loss: 1.0123783349990845,grad_norm: 0.9999992176735467, iteration: 58355
loss: 1.01234769821167,grad_norm: 0.8638334542891446, iteration: 58356
loss: 1.007722020149231,grad_norm: 0.9436721722040521, iteration: 58357
loss: 1.03206205368042,grad_norm: 0.8390450211812861, iteration: 58358
loss: 0.9958905577659607,grad_norm: 0.9475837739657244, iteration: 58359
loss: 0.9865242838859558,grad_norm: 0.870165900937344, iteration: 58360
loss: 1.0060229301452637,grad_norm: 0.9999991358993064, iteration: 58361
loss: 1.0159159898757935,grad_norm: 0.9999991955119737, iteration: 58362
loss: 1.044547438621521,grad_norm: 0.9680585495360993, iteration: 58363
loss: 1.0061159133911133,grad_norm: 0.9999991442683789, iteration: 58364
loss: 1.1381529569625854,grad_norm: 0.9999999353703389, iteration: 58365
loss: 0.990433394908905,grad_norm: 0.9999989928378907, iteration: 58366
loss: 1.0088815689086914,grad_norm: 0.9999996979134423, iteration: 58367
loss: 1.0178840160369873,grad_norm: 0.8794750791544782, iteration: 58368
loss: 0.9955825805664062,grad_norm: 0.8633147257763436, iteration: 58369
loss: 1.0113673210144043,grad_norm: 0.9999998527664329, iteration: 58370
loss: 0.9810673594474792,grad_norm: 0.997398679932146, iteration: 58371
loss: 1.033491849899292,grad_norm: 0.9999996220895065, iteration: 58372
loss: 1.0010430812835693,grad_norm: 0.8262425056844148, iteration: 58373
loss: 1.0041638612747192,grad_norm: 0.8208020305123036, iteration: 58374
loss: 1.0136244297027588,grad_norm: 0.9380153406972221, iteration: 58375
loss: 1.0286866426467896,grad_norm: 0.9851881392186078, iteration: 58376
loss: 1.0019001960754395,grad_norm: 0.9999991400669659, iteration: 58377
loss: 0.9954956769943237,grad_norm: 0.72963087231518, iteration: 58378
loss: 0.9777694940567017,grad_norm: 0.9133734466970409, iteration: 58379
loss: 0.9903832674026489,grad_norm: 0.9141472151475099, iteration: 58380
loss: 0.9903953671455383,grad_norm: 0.9999990350590924, iteration: 58381
loss: 1.0285654067993164,grad_norm: 0.9829861129555784, iteration: 58382
loss: 0.9795381426811218,grad_norm: 0.9999990794375645, iteration: 58383
loss: 0.9875611066818237,grad_norm: 0.9999992193820894, iteration: 58384
loss: 1.0269755125045776,grad_norm: 0.8027500246103654, iteration: 58385
loss: 0.9990787506103516,grad_norm: 0.8984725180570207, iteration: 58386
loss: 1.1023979187011719,grad_norm: 0.7619606940754926, iteration: 58387
loss: 1.0533853769302368,grad_norm: 0.9999992603551785, iteration: 58388
loss: 1.0090917348861694,grad_norm: 0.9999997378942259, iteration: 58389
loss: 1.0069009065628052,grad_norm: 0.9999994272435351, iteration: 58390
loss: 1.0496217012405396,grad_norm: 0.9911513616550515, iteration: 58391
loss: 0.9664455652236938,grad_norm: 0.9341507405194167, iteration: 58392
loss: 0.9884970784187317,grad_norm: 0.922290524415067, iteration: 58393
loss: 0.9610298275947571,grad_norm: 0.8185275976456904, iteration: 58394
loss: 1.0463041067123413,grad_norm: 0.9999991787127479, iteration: 58395
loss: 1.0403358936309814,grad_norm: 0.9999993672231952, iteration: 58396
loss: 1.072475552558899,grad_norm: 0.9999992367744011, iteration: 58397
loss: 1.0629643201828003,grad_norm: 0.9999990818211125, iteration: 58398
loss: 1.0110774040222168,grad_norm: 0.805635903104519, iteration: 58399
loss: 0.9990499019622803,grad_norm: 0.9999990612315837, iteration: 58400
loss: 0.9846764802932739,grad_norm: 0.9999995847155085, iteration: 58401
loss: 1.0795236825942993,grad_norm: 0.9999993548144487, iteration: 58402
loss: 0.9786520600318909,grad_norm: 0.9999989951957734, iteration: 58403
loss: 0.9937655329704285,grad_norm: 0.869144804431777, iteration: 58404
loss: 1.0421972274780273,grad_norm: 0.9999996510758477, iteration: 58405
loss: 0.9995334148406982,grad_norm: 0.9531379234071427, iteration: 58406
loss: 0.9964430928230286,grad_norm: 0.9661999985446241, iteration: 58407
loss: 1.0674952268600464,grad_norm: 0.9999992705792698, iteration: 58408
loss: 1.0739741325378418,grad_norm: 0.9999991766929706, iteration: 58409
loss: 1.0343272686004639,grad_norm: 0.8971230484970601, iteration: 58410
loss: 1.0430461168289185,grad_norm: 0.9999991941876984, iteration: 58411
loss: 0.9699971079826355,grad_norm: 0.8070284444428912, iteration: 58412
loss: 1.023823618888855,grad_norm: 0.9999990549301118, iteration: 58413
loss: 1.0847727060317993,grad_norm: 0.9999998010028363, iteration: 58414
loss: 1.0587228536605835,grad_norm: 0.9999991156175646, iteration: 58415
loss: 0.9908773899078369,grad_norm: 0.9202597560705149, iteration: 58416
loss: 0.9952071309089661,grad_norm: 0.9263617882888827, iteration: 58417
loss: 1.0287193059921265,grad_norm: 0.9999997636663798, iteration: 58418
loss: 0.9923349022865295,grad_norm: 0.8894545832670953, iteration: 58419
loss: 1.0551882982254028,grad_norm: 0.9757061453572334, iteration: 58420
loss: 1.0631288290023804,grad_norm: 0.9626908599783593, iteration: 58421
loss: 1.066664457321167,grad_norm: 0.9999997831560059, iteration: 58422
loss: 0.9813306927680969,grad_norm: 0.9367452128300267, iteration: 58423
loss: 1.00569486618042,grad_norm: 0.999999119254086, iteration: 58424
loss: 1.1019248962402344,grad_norm: 0.9999999507796148, iteration: 58425
loss: 0.9896804690361023,grad_norm: 0.9999991020379186, iteration: 58426
loss: 1.0210596323013306,grad_norm: 0.8425383694723648, iteration: 58427
loss: 0.961582362651825,grad_norm: 0.9999990293043676, iteration: 58428
loss: 1.0138061046600342,grad_norm: 0.9874330968197732, iteration: 58429
loss: 0.9781963229179382,grad_norm: 0.8924038515600442, iteration: 58430
loss: 0.9807522296905518,grad_norm: 0.999999097731427, iteration: 58431
loss: 1.0231409072875977,grad_norm: 0.9699838311063067, iteration: 58432
loss: 1.003858208656311,grad_norm: 0.9999990933426335, iteration: 58433
loss: 1.0236496925354004,grad_norm: 0.867303490897075, iteration: 58434
loss: 1.0164239406585693,grad_norm: 0.9999994288258862, iteration: 58435
loss: 0.9965664744377136,grad_norm: 0.8050929858670058, iteration: 58436
loss: 1.0104597806930542,grad_norm: 0.898491267258727, iteration: 58437
loss: 0.9970167279243469,grad_norm: 0.9999991896718998, iteration: 58438
loss: 0.9954756498336792,grad_norm: 0.8497882079243972, iteration: 58439
loss: 0.9909000992774963,grad_norm: 0.960879925130164, iteration: 58440
loss: 0.9996846318244934,grad_norm: 0.939040853442026, iteration: 58441
loss: 1.0601632595062256,grad_norm: 0.8728307380051027, iteration: 58442
loss: 1.0068244934082031,grad_norm: 0.7567921644166299, iteration: 58443
loss: 0.9946784377098083,grad_norm: 0.9524617503092943, iteration: 58444
loss: 1.0276541709899902,grad_norm: 0.8640083308383406, iteration: 58445
loss: 1.008292317390442,grad_norm: 0.958061150887378, iteration: 58446
loss: 0.9972347617149353,grad_norm: 0.9671107831996358, iteration: 58447
loss: 0.9839588403701782,grad_norm: 0.9840891225401158, iteration: 58448
loss: 1.1022993326187134,grad_norm: 0.9999999330394977, iteration: 58449
loss: 0.987337589263916,grad_norm: 0.9238991872227029, iteration: 58450
loss: 1.0312572717666626,grad_norm: 0.9999994039481785, iteration: 58451
loss: 1.0579637289047241,grad_norm: 0.9999997409444605, iteration: 58452
loss: 0.996375322341919,grad_norm: 0.9458063813683761, iteration: 58453
loss: 0.9997993111610413,grad_norm: 0.8739058327075946, iteration: 58454
loss: 1.0068145990371704,grad_norm: 0.8507148674878585, iteration: 58455
loss: 0.9747743010520935,grad_norm: 0.9597390413046589, iteration: 58456
loss: 1.0996782779693604,grad_norm: 0.999999527997013, iteration: 58457
loss: 1.0188435316085815,grad_norm: 0.9999991374705868, iteration: 58458
loss: 0.9863851070404053,grad_norm: 0.9999994314456143, iteration: 58459
loss: 0.990420401096344,grad_norm: 0.9999997055953559, iteration: 58460
loss: 0.997957170009613,grad_norm: 0.9999991015731671, iteration: 58461
loss: 1.0232702493667603,grad_norm: 0.9031878455479593, iteration: 58462
loss: 0.990349292755127,grad_norm: 0.8576558132814549, iteration: 58463
loss: 1.007666826248169,grad_norm: 0.9064944830636505, iteration: 58464
loss: 1.017014741897583,grad_norm: 0.9999991185187325, iteration: 58465
loss: 0.9657382369041443,grad_norm: 0.9999990326617239, iteration: 58466
loss: 0.9842607378959656,grad_norm: 0.9999989705126557, iteration: 58467
loss: 1.0371826887130737,grad_norm: 0.9999991337750684, iteration: 58468
loss: 0.9644608497619629,grad_norm: 0.9467916632880107, iteration: 58469
loss: 0.9843392968177795,grad_norm: 0.9538949605199375, iteration: 58470
loss: 0.9966385364532471,grad_norm: 0.9999991478541754, iteration: 58471
loss: 0.9906214475631714,grad_norm: 0.8895996544534474, iteration: 58472
loss: 1.027558445930481,grad_norm: 0.999999344765853, iteration: 58473
loss: 1.0079783201217651,grad_norm: 0.9999995785533558, iteration: 58474
loss: 1.0166023969650269,grad_norm: 0.9999996679543716, iteration: 58475
loss: 1.0109784603118896,grad_norm: 0.9999992849812225, iteration: 58476
loss: 1.048291802406311,grad_norm: 0.9999997812399519, iteration: 58477
loss: 1.0942459106445312,grad_norm: 0.9999992806384894, iteration: 58478
loss: 1.0502406358718872,grad_norm: 0.9999990972893598, iteration: 58479
loss: 0.9813867807388306,grad_norm: 0.9999990881423385, iteration: 58480
loss: 1.0242029428482056,grad_norm: 0.9254020958627885, iteration: 58481
loss: 1.0167795419692993,grad_norm: 0.8805073016649613, iteration: 58482
loss: 1.0375347137451172,grad_norm: 0.9999998860150422, iteration: 58483
loss: 0.9978951215744019,grad_norm: 0.9746699269612837, iteration: 58484
loss: 0.9915305376052856,grad_norm: 0.9999993581177189, iteration: 58485
loss: 1.0498472452163696,grad_norm: 0.9999993129123296, iteration: 58486
loss: 1.0183186531066895,grad_norm: 0.8987564289470139, iteration: 58487
loss: 1.0076100826263428,grad_norm: 0.9999992505730714, iteration: 58488
loss: 0.9829461574554443,grad_norm: 0.942769238671468, iteration: 58489
loss: 0.9836406707763672,grad_norm: 0.9999990840331793, iteration: 58490
loss: 1.0102311372756958,grad_norm: 0.9660767406303937, iteration: 58491
loss: 1.1068477630615234,grad_norm: 0.9999996666727379, iteration: 58492
loss: 1.0559875965118408,grad_norm: 0.9999991180919908, iteration: 58493
loss: 0.968622624874115,grad_norm: 0.9881799736316713, iteration: 58494
loss: 1.0287584066390991,grad_norm: 0.9999994667946346, iteration: 58495
loss: 1.000736951828003,grad_norm: 0.943396670061422, iteration: 58496
loss: 1.0449589490890503,grad_norm: 0.9477300112095465, iteration: 58497
loss: 0.9804499745368958,grad_norm: 0.9999993763440035, iteration: 58498
loss: 1.0686804056167603,grad_norm: 0.9999992488481478, iteration: 58499
loss: 0.9754932522773743,grad_norm: 0.8605705338660491, iteration: 58500
loss: 1.0064274072647095,grad_norm: 0.9999990304013312, iteration: 58501
loss: 0.998661458492279,grad_norm: 0.9999992412621143, iteration: 58502
loss: 1.0115591287612915,grad_norm: 0.9999993032577852, iteration: 58503
loss: 0.994378924369812,grad_norm: 0.9405119587534865, iteration: 58504
loss: 0.9920638799667358,grad_norm: 0.8642472458501417, iteration: 58505
loss: 1.0144915580749512,grad_norm: 0.9999996769923616, iteration: 58506
loss: 1.0806941986083984,grad_norm: 0.8359420116239016, iteration: 58507
loss: 1.0069588422775269,grad_norm: 0.9999993987122048, iteration: 58508
loss: 1.0532864332199097,grad_norm: 0.9999995401397235, iteration: 58509
loss: 0.9991680979728699,grad_norm: 0.9999990195247471, iteration: 58510
loss: 1.0528451204299927,grad_norm: 0.9999995563053462, iteration: 58511
loss: 1.0140986442565918,grad_norm: 0.9999991327213622, iteration: 58512
loss: 1.0178474187850952,grad_norm: 0.9999991968294798, iteration: 58513
loss: 1.080317497253418,grad_norm: 0.9999991288016817, iteration: 58514
loss: 1.03994882106781,grad_norm: 0.9999997940388775, iteration: 58515
loss: 0.9864792823791504,grad_norm: 0.9999995372569994, iteration: 58516
loss: 0.9973205924034119,grad_norm: 0.9999990728363909, iteration: 58517
loss: 0.9779725670814514,grad_norm: 0.9446322449914731, iteration: 58518
loss: 1.0523401498794556,grad_norm: 0.8988583779198813, iteration: 58519
loss: 1.0277141332626343,grad_norm: 0.9508967895434988, iteration: 58520
loss: 0.9807310700416565,grad_norm: 0.934891732576084, iteration: 58521
loss: 1.0457029342651367,grad_norm: 0.9999994678639691, iteration: 58522
loss: 1.0065120458602905,grad_norm: 0.7995605208750579, iteration: 58523
loss: 0.9877141118049622,grad_norm: 0.99999897357091, iteration: 58524
loss: 1.00059974193573,grad_norm: 0.9999991526550718, iteration: 58525
loss: 1.0274171829223633,grad_norm: 0.8552336828909735, iteration: 58526
loss: 1.0187265872955322,grad_norm: 0.8566234917635968, iteration: 58527
loss: 1.0266433954238892,grad_norm: 0.9999992663713707, iteration: 58528
loss: 1.0403615236282349,grad_norm: 0.9433840193446587, iteration: 58529
loss: 1.0061712265014648,grad_norm: 0.8749728815429478, iteration: 58530
loss: 0.9853971004486084,grad_norm: 0.8300093022354997, iteration: 58531
loss: 1.083540439605713,grad_norm: 0.9999991947382337, iteration: 58532
loss: 1.0170667171478271,grad_norm: 0.9999995798801756, iteration: 58533
loss: 0.9575597643852234,grad_norm: 0.9619238112469273, iteration: 58534
loss: 1.035328984260559,grad_norm: 0.9999991323252793, iteration: 58535
loss: 0.9540888071060181,grad_norm: 0.9999990572974763, iteration: 58536
loss: 1.0342482328414917,grad_norm: 0.9999993383773798, iteration: 58537
loss: 0.9906999468803406,grad_norm: 0.8971419972209961, iteration: 58538
loss: 1.0237021446228027,grad_norm: 0.9999999804655957, iteration: 58539
loss: 0.9878851175308228,grad_norm: 0.9999996036907418, iteration: 58540
loss: 0.9300438165664673,grad_norm: 0.9999991250183674, iteration: 58541
loss: 0.9923486709594727,grad_norm: 0.8115186685074552, iteration: 58542
loss: 1.0466591119766235,grad_norm: 0.9999999011767057, iteration: 58543
loss: 1.0295329093933105,grad_norm: 0.9999993554327279, iteration: 58544
loss: 1.0202583074569702,grad_norm: 0.9999994770787403, iteration: 58545
loss: 0.9796575307846069,grad_norm: 0.9999990985688247, iteration: 58546
loss: 1.0086954832077026,grad_norm: 0.9273188301674123, iteration: 58547
loss: 1.0001404285430908,grad_norm: 0.9897761353239768, iteration: 58548
loss: 0.9558178186416626,grad_norm: 0.8877729871344394, iteration: 58549
loss: 1.1125353574752808,grad_norm: 0.9999991196870592, iteration: 58550
loss: 1.0062490701675415,grad_norm: 0.999999215866827, iteration: 58551
loss: 0.9860270023345947,grad_norm: 0.9365195158358052, iteration: 58552
loss: 1.0020883083343506,grad_norm: 0.9436227039321292, iteration: 58553
loss: 0.9657741785049438,grad_norm: 0.9999992247345315, iteration: 58554
loss: 1.1925956010818481,grad_norm: 0.9999994835824384, iteration: 58555
loss: 1.0153918266296387,grad_norm: 0.9999994605901988, iteration: 58556
loss: 1.0069621801376343,grad_norm: 0.9999992150304425, iteration: 58557
loss: 1.0304832458496094,grad_norm: 0.843605404561497, iteration: 58558
loss: 1.0229744911193848,grad_norm: 0.8583169719539441, iteration: 58559
loss: 1.008135199546814,grad_norm: 0.9999992045048622, iteration: 58560
loss: 1.0222615003585815,grad_norm: 0.8620548130294118, iteration: 58561
loss: 0.9901919960975647,grad_norm: 0.877969831054954, iteration: 58562
loss: 1.0060185194015503,grad_norm: 0.9763281927769825, iteration: 58563
loss: 1.003395676612854,grad_norm: 0.9999994843389552, iteration: 58564
loss: 1.0061715841293335,grad_norm: 0.999998939861537, iteration: 58565
loss: 1.027978539466858,grad_norm: 0.9999992632048255, iteration: 58566
loss: 0.9839027523994446,grad_norm: 0.9216986193776976, iteration: 58567
loss: 1.0028725862503052,grad_norm: 0.9999991776299851, iteration: 58568
loss: 1.0395292043685913,grad_norm: 0.9999990923039065, iteration: 58569
loss: 0.9470402598381042,grad_norm: 0.8963588653495699, iteration: 58570
loss: 1.0091265439987183,grad_norm: 0.9946168747076006, iteration: 58571
loss: 1.0321139097213745,grad_norm: 0.9999996002160261, iteration: 58572
loss: 1.0288523435592651,grad_norm: 0.9999991716289472, iteration: 58573
loss: 1.0143779516220093,grad_norm: 0.9999991912616737, iteration: 58574
loss: 1.022497534751892,grad_norm: 0.8368818091353416, iteration: 58575
loss: 1.0169757604599,grad_norm: 0.999999329915169, iteration: 58576
loss: 0.9619216918945312,grad_norm: 0.9999993276608213, iteration: 58577
loss: 1.0804872512817383,grad_norm: 0.9542835116487788, iteration: 58578
loss: 1.0429478883743286,grad_norm: 0.9361837345913256, iteration: 58579
loss: 1.0434118509292603,grad_norm: 0.999999314062149, iteration: 58580
loss: 1.0140000581741333,grad_norm: 0.937430090315343, iteration: 58581
loss: 1.0265257358551025,grad_norm: 0.9203174256747501, iteration: 58582
loss: 1.02927827835083,grad_norm: 0.999999128854665, iteration: 58583
loss: 1.0044915676116943,grad_norm: 0.8334016483251857, iteration: 58584
loss: 1.0183911323547363,grad_norm: 0.9236719727601328, iteration: 58585
loss: 0.9893984794616699,grad_norm: 0.8966022472654824, iteration: 58586
loss: 0.9707303643226624,grad_norm: 0.9534185637661446, iteration: 58587
loss: 1.0170198678970337,grad_norm: 0.9999998879732325, iteration: 58588
loss: 1.0003385543823242,grad_norm: 0.9999990345850952, iteration: 58589
loss: 1.0207270383834839,grad_norm: 0.8889713988098707, iteration: 58590
loss: 1.0344346761703491,grad_norm: 0.9999993426996489, iteration: 58591
loss: 0.9974612593650818,grad_norm: 0.9618030503875564, iteration: 58592
loss: 1.013711929321289,grad_norm: 0.9999991848158793, iteration: 58593
loss: 1.048875331878662,grad_norm: 0.9999993656565733, iteration: 58594
loss: 1.026731014251709,grad_norm: 0.9999990133667007, iteration: 58595
loss: 0.9994779825210571,grad_norm: 0.9243687725196292, iteration: 58596
loss: 0.991424024105072,grad_norm: 0.9150750788338412, iteration: 58597
loss: 1.015326738357544,grad_norm: 0.7190448631149093, iteration: 58598
loss: 0.9914231896400452,grad_norm: 0.9999991008319136, iteration: 58599
loss: 0.997190535068512,grad_norm: 0.9999994079684147, iteration: 58600
loss: 1.0033209323883057,grad_norm: 0.773638342589972, iteration: 58601
loss: 0.9967058300971985,grad_norm: 0.7570653344041689, iteration: 58602
loss: 0.9899320006370544,grad_norm: 0.7742621287872196, iteration: 58603
loss: 1.0484918355941772,grad_norm: 0.999999308890091, iteration: 58604
loss: 0.992347002029419,grad_norm: 0.862966086176314, iteration: 58605
loss: 1.0338985919952393,grad_norm: 0.8736254213038485, iteration: 58606
loss: 0.9865036010742188,grad_norm: 0.7909454798478227, iteration: 58607
loss: 0.9712964296340942,grad_norm: 0.9187705754137503, iteration: 58608
loss: 1.0317931175231934,grad_norm: 0.8289683730104271, iteration: 58609
loss: 0.9782420992851257,grad_norm: 0.9528978561154572, iteration: 58610
loss: 1.0052478313446045,grad_norm: 0.9055297716900681, iteration: 58611
loss: 0.980992317199707,grad_norm: 0.8262059049707691, iteration: 58612
loss: 1.0509309768676758,grad_norm: 0.9999996808228059, iteration: 58613
loss: 0.992461621761322,grad_norm: 0.8550076363585782, iteration: 58614
loss: 1.0149692296981812,grad_norm: 0.8040798246669346, iteration: 58615
loss: 1.0043787956237793,grad_norm: 0.7913284956774741, iteration: 58616
loss: 0.9926484227180481,grad_norm: 0.9999996075132601, iteration: 58617
loss: 1.0388203859329224,grad_norm: 0.9999998419899494, iteration: 58618
loss: 1.009682536125183,grad_norm: 0.924842033226001, iteration: 58619
loss: 0.9804097414016724,grad_norm: 0.9999990269909567, iteration: 58620
loss: 0.95822674036026,grad_norm: 0.9110427084041676, iteration: 58621
loss: 0.9854773879051208,grad_norm: 0.9309612128704604, iteration: 58622
loss: 1.037351369857788,grad_norm: 0.9907699606235782, iteration: 58623
loss: 1.0198603868484497,grad_norm: 0.8192945864738288, iteration: 58624
loss: 1.001062273979187,grad_norm: 0.9999990243454804, iteration: 58625
loss: 1.0101701021194458,grad_norm: 0.9999992951239547, iteration: 58626
loss: 1.0162115097045898,grad_norm: 0.9999991224210902, iteration: 58627
loss: 0.9915527105331421,grad_norm: 0.9343077448292089, iteration: 58628
loss: 0.9961215257644653,grad_norm: 0.9421682715878205, iteration: 58629
loss: 1.0548135042190552,grad_norm: 0.9844942764348381, iteration: 58630
loss: 0.9615266919136047,grad_norm: 0.9999992200615035, iteration: 58631
loss: 1.0146065950393677,grad_norm: 0.9913674340640145, iteration: 58632
loss: 0.9718334078788757,grad_norm: 0.9999990246070845, iteration: 58633
loss: 0.9728217124938965,grad_norm: 0.688185855330188, iteration: 58634
loss: 1.0224266052246094,grad_norm: 0.9546688690826004, iteration: 58635
loss: 1.0302175283432007,grad_norm: 0.999999260951406, iteration: 58636
loss: 1.006847858428955,grad_norm: 0.8270859494757085, iteration: 58637
loss: 1.0460197925567627,grad_norm: 0.999999070488613, iteration: 58638
loss: 1.0652981996536255,grad_norm: 0.9826278467366177, iteration: 58639
loss: 1.0293059349060059,grad_norm: 0.8883142732641862, iteration: 58640
loss: 1.0326603651046753,grad_norm: 0.9016247173990788, iteration: 58641
loss: 0.9824469685554504,grad_norm: 0.992784910998872, iteration: 58642
loss: 1.0040513277053833,grad_norm: 0.9999990448253571, iteration: 58643
loss: 0.9944793581962585,grad_norm: 0.9315905565104232, iteration: 58644
loss: 1.032701849937439,grad_norm: 0.9971219087426716, iteration: 58645
loss: 1.013611912727356,grad_norm: 0.9118491722729087, iteration: 58646
loss: 1.0033918619155884,grad_norm: 0.9999994109253522, iteration: 58647
loss: 1.0195202827453613,grad_norm: 0.8592159508684379, iteration: 58648
loss: 0.9916602373123169,grad_norm: 0.8217978890019008, iteration: 58649
loss: 0.9738138914108276,grad_norm: 0.9723710727914882, iteration: 58650
loss: 0.9803996086120605,grad_norm: 0.9999990687440707, iteration: 58651
loss: 0.9790803790092468,grad_norm: 0.8890981988627189, iteration: 58652
loss: 1.0329116582870483,grad_norm: 0.9999998593820654, iteration: 58653
loss: 1.0460866689682007,grad_norm: 0.9999991094699269, iteration: 58654
loss: 0.9904919862747192,grad_norm: 0.823890908267032, iteration: 58655
loss: 1.012402892112732,grad_norm: 0.9290265707342201, iteration: 58656
loss: 1.0089685916900635,grad_norm: 0.9999992419938853, iteration: 58657
loss: 1.0344915390014648,grad_norm: 0.9999996589819141, iteration: 58658
loss: 1.0138100385665894,grad_norm: 0.9805228238869165, iteration: 58659
loss: 1.002217173576355,grad_norm: 0.9545448916897802, iteration: 58660
loss: 1.016700267791748,grad_norm: 0.9999991203465701, iteration: 58661
loss: 1.0443449020385742,grad_norm: 0.9999999596711514, iteration: 58662
loss: 0.993084728717804,grad_norm: 0.9999990072805395, iteration: 58663
loss: 1.0297365188598633,grad_norm: 0.7603048439639357, iteration: 58664
loss: 1.0377646684646606,grad_norm: 0.9999995531128014, iteration: 58665
loss: 1.011398434638977,grad_norm: 0.9999991779840308, iteration: 58666
loss: 1.0387992858886719,grad_norm: 0.9015991307169637, iteration: 58667
loss: 1.027999997138977,grad_norm: 0.9999991683177429, iteration: 58668
loss: 1.0286978483200073,grad_norm: 0.8795105181055642, iteration: 58669
loss: 0.9851661324501038,grad_norm: 0.8201201187303351, iteration: 58670
loss: 0.996746301651001,grad_norm: 0.9866433437827616, iteration: 58671
loss: 0.9498182535171509,grad_norm: 0.8863449921418973, iteration: 58672
loss: 1.033822774887085,grad_norm: 0.999999317853213, iteration: 58673
loss: 1.020236849784851,grad_norm: 0.9136759744146398, iteration: 58674
loss: 1.1336241960525513,grad_norm: 0.9999993771462564, iteration: 58675
loss: 1.0071734189987183,grad_norm: 0.8445381532320031, iteration: 58676
loss: 0.968222975730896,grad_norm: 0.7964837629321638, iteration: 58677
loss: 1.0992509126663208,grad_norm: 0.9999995724298414, iteration: 58678
loss: 0.98419588804245,grad_norm: 0.8718402964699667, iteration: 58679
loss: 0.9970770478248596,grad_norm: 0.9999992192418198, iteration: 58680
loss: 0.9957225918769836,grad_norm: 0.8076322514884412, iteration: 58681
loss: 1.0287574529647827,grad_norm: 0.8671285777884946, iteration: 58682
loss: 1.0059471130371094,grad_norm: 0.858265752271361, iteration: 58683
loss: 1.2421307563781738,grad_norm: 0.9999993918801718, iteration: 58684
loss: 0.9941840171813965,grad_norm: 0.9999991795730858, iteration: 58685
loss: 1.0042544603347778,grad_norm: 0.7226338055546937, iteration: 58686
loss: 0.9767752885818481,grad_norm: 0.9637927436260194, iteration: 58687
loss: 0.9919299483299255,grad_norm: 0.9353616894663334, iteration: 58688
loss: 0.9963480830192566,grad_norm: 0.8876320684771255, iteration: 58689
loss: 1.1136831045150757,grad_norm: 0.9999997334998554, iteration: 58690
loss: 1.0056575536727905,grad_norm: 0.8555016525248577, iteration: 58691
loss: 1.0002522468566895,grad_norm: 0.9565128677258464, iteration: 58692
loss: 0.9824997186660767,grad_norm: 0.9977277725233619, iteration: 58693
loss: 1.0013715028762817,grad_norm: 0.9999992483398917, iteration: 58694
loss: 1.0113673210144043,grad_norm: 0.9999992891558958, iteration: 58695
loss: 1.0054742097854614,grad_norm: 0.9726765713824814, iteration: 58696
loss: 1.004333257675171,grad_norm: 0.9904691600201633, iteration: 58697
loss: 0.9948311448097229,grad_norm: 0.8120706058212787, iteration: 58698
loss: 0.9919576048851013,grad_norm: 0.9999993191432086, iteration: 58699
loss: 0.9548383951187134,grad_norm: 0.9999991076392084, iteration: 58700
loss: 0.9777182936668396,grad_norm: 0.93099180645733, iteration: 58701
loss: 0.9749568700790405,grad_norm: 0.9999992917874835, iteration: 58702
loss: 1.0350899696350098,grad_norm: 0.999999000657706, iteration: 58703
loss: 1.0105698108673096,grad_norm: 0.8494815042158232, iteration: 58704
loss: 1.0939632654190063,grad_norm: 0.9723030188072438, iteration: 58705
loss: 1.0158030986785889,grad_norm: 0.828220650286305, iteration: 58706
loss: 1.021776795387268,grad_norm: 0.9999991245708859, iteration: 58707
loss: 1.0195415019989014,grad_norm: 0.9999996261388908, iteration: 58708
loss: 0.9831953644752502,grad_norm: 0.9259840680111769, iteration: 58709
loss: 1.032028079032898,grad_norm: 0.9999990960128099, iteration: 58710
loss: 0.9800182580947876,grad_norm: 0.9667832441644476, iteration: 58711
loss: 0.9794774651527405,grad_norm: 0.8535514060619355, iteration: 58712
loss: 1.0272760391235352,grad_norm: 0.9999991136658267, iteration: 58713
loss: 1.0282135009765625,grad_norm: 0.9999991224840895, iteration: 58714
loss: 0.9832676649093628,grad_norm: 0.8290893301513383, iteration: 58715
loss: 0.974943995475769,grad_norm: 0.9999992258714083, iteration: 58716
loss: 1.0015345811843872,grad_norm: 0.8231852081224333, iteration: 58717
loss: 0.9702243804931641,grad_norm: 0.9999991112218765, iteration: 58718
loss: 1.0243194103240967,grad_norm: 0.9999990840738769, iteration: 58719
loss: 1.0122510194778442,grad_norm: 0.9217395322468288, iteration: 58720
loss: 0.9938703775405884,grad_norm: 0.9999990294873381, iteration: 58721
loss: 0.9960016012191772,grad_norm: 0.8766501724217443, iteration: 58722
loss: 1.0380713939666748,grad_norm: 0.9999997898555821, iteration: 58723
loss: 1.0465489625930786,grad_norm: 0.9999993850951351, iteration: 58724
loss: 0.968747079372406,grad_norm: 0.8571047760155741, iteration: 58725
loss: 0.9822570085525513,grad_norm: 0.9999990802115616, iteration: 58726
loss: 1.0678191184997559,grad_norm: 0.9999991477685876, iteration: 58727
loss: 1.0422452688217163,grad_norm: 0.9999997675744914, iteration: 58728
loss: 0.9838829636573792,grad_norm: 0.8796806431046227, iteration: 58729
loss: 1.0380187034606934,grad_norm: 0.9999990861239697, iteration: 58730
loss: 1.0332419872283936,grad_norm: 0.882180988304929, iteration: 58731
loss: 0.9629613161087036,grad_norm: 0.9999989938391315, iteration: 58732
loss: 0.9960092306137085,grad_norm: 0.9763087787106041, iteration: 58733
loss: 1.0537099838256836,grad_norm: 0.9999997937833305, iteration: 58734
loss: 0.9959003925323486,grad_norm: 0.9999992311326644, iteration: 58735
loss: 1.0325350761413574,grad_norm: 0.9999995232346878, iteration: 58736
loss: 0.9774689078330994,grad_norm: 0.9886398735152303, iteration: 58737
loss: 1.0134116411209106,grad_norm: 0.8925414235755021, iteration: 58738
loss: 1.0148855447769165,grad_norm: 0.9999990942927849, iteration: 58739
loss: 0.9830746650695801,grad_norm: 0.9382092629058897, iteration: 58740
loss: 1.009968876838684,grad_norm: 0.8325831609065902, iteration: 58741
loss: 0.9919040203094482,grad_norm: 0.9950323360798902, iteration: 58742
loss: 0.9977307915687561,grad_norm: 0.9999990868895359, iteration: 58743
loss: 0.983634352684021,grad_norm: 0.9999991017785292, iteration: 58744
loss: 1.0320345163345337,grad_norm: 0.9034842084219623, iteration: 58745
loss: 1.0397154092788696,grad_norm: 0.9999997154382918, iteration: 58746
loss: 1.0095300674438477,grad_norm: 0.9999992188101018, iteration: 58747
loss: 1.0214173793792725,grad_norm: 0.7332483473113274, iteration: 58748
loss: 1.0669057369232178,grad_norm: 1.0000000180845707, iteration: 58749
loss: 0.9912319779396057,grad_norm: 0.9999991387613911, iteration: 58750
loss: 1.0594583749771118,grad_norm: 0.9999993390388298, iteration: 58751
loss: 1.0157058238983154,grad_norm: 0.8438822834031271, iteration: 58752
loss: 1.0685926675796509,grad_norm: 0.9999991055814312, iteration: 58753
loss: 1.0000276565551758,grad_norm: 0.9316463077737266, iteration: 58754
loss: 1.024009108543396,grad_norm: 0.9536880894752058, iteration: 58755
loss: 1.032407522201538,grad_norm: 0.9999997824070207, iteration: 58756
loss: 0.9806479811668396,grad_norm: 0.99999908225058, iteration: 58757
loss: 1.0064364671707153,grad_norm: 0.9999989523819337, iteration: 58758
loss: 0.9848721027374268,grad_norm: 0.9989279399098835, iteration: 58759
loss: 0.9985920190811157,grad_norm: 0.9673162039418647, iteration: 58760
loss: 0.962017834186554,grad_norm: 0.8057064532288779, iteration: 58761
loss: 0.9950565695762634,grad_norm: 0.9999990590235931, iteration: 58762
loss: 0.9948552846908569,grad_norm: 0.9665695405733538, iteration: 58763
loss: 0.9693313837051392,grad_norm: 0.967361812495238, iteration: 58764
loss: 0.9686176180839539,grad_norm: 0.7730346941568883, iteration: 58765
loss: 1.0207585096359253,grad_norm: 0.7990879587652489, iteration: 58766
loss: 1.1996153593063354,grad_norm: 0.9999999754507186, iteration: 58767
loss: 0.9684960246086121,grad_norm: 0.8904190456225456, iteration: 58768
loss: 0.9954476952552795,grad_norm: 0.9179490603930609, iteration: 58769
loss: 1.002360463142395,grad_norm: 0.8776735564232394, iteration: 58770
loss: 1.1551591157913208,grad_norm: 0.9999998403987553, iteration: 58771
loss: 0.9827693104743958,grad_norm: 0.9999989775463506, iteration: 58772
loss: 1.0525970458984375,grad_norm: 0.9999991309761492, iteration: 58773
loss: 0.9768257141113281,grad_norm: 0.8608773551190907, iteration: 58774
loss: 1.024841547012329,grad_norm: 0.9999998893666562, iteration: 58775
loss: 1.0475715398788452,grad_norm: 0.9999997381785556, iteration: 58776
loss: 1.036532998085022,grad_norm: 0.9999996552979791, iteration: 58777
loss: 1.0126324892044067,grad_norm: 0.8691840723259076, iteration: 58778
loss: 1.0393626689910889,grad_norm: 0.9999989709172571, iteration: 58779
loss: 1.0330454111099243,grad_norm: 0.8928348178118414, iteration: 58780
loss: 1.027255654335022,grad_norm: 0.8607254707479463, iteration: 58781
loss: 0.981060802936554,grad_norm: 0.9847884429963422, iteration: 58782
loss: 1.001753568649292,grad_norm: 0.9999991524064056, iteration: 58783
loss: 1.0083560943603516,grad_norm: 0.7175488465452416, iteration: 58784
loss: 0.9786936044692993,grad_norm: 0.9999989735467553, iteration: 58785
loss: 1.0201555490493774,grad_norm: 0.9526568402539953, iteration: 58786
loss: 1.0184556245803833,grad_norm: 0.9187235672359553, iteration: 58787
loss: 0.9680860042572021,grad_norm: 0.8439622561274006, iteration: 58788
loss: 0.9604324102401733,grad_norm: 0.8566812809676113, iteration: 58789
loss: 1.0345430374145508,grad_norm: 0.9999996146370602, iteration: 58790
loss: 1.0101414918899536,grad_norm: 0.999999176425918, iteration: 58791
loss: 0.9795569777488708,grad_norm: 0.8066348794279642, iteration: 58792
loss: 0.982983410358429,grad_norm: 0.9686911553428299, iteration: 58793
loss: 1.008863091468811,grad_norm: 0.9999990194773516, iteration: 58794
loss: 1.0422775745391846,grad_norm: 0.8169626511870619, iteration: 58795
loss: 1.0493614673614502,grad_norm: 0.999999279103448, iteration: 58796
loss: 0.9497638940811157,grad_norm: 0.9433734725776362, iteration: 58797
loss: 1.038498878479004,grad_norm: 0.9999995618906695, iteration: 58798
loss: 1.0104973316192627,grad_norm: 0.9901414072958001, iteration: 58799
loss: 0.998909592628479,grad_norm: 0.9352138903243559, iteration: 58800
loss: 1.0120683908462524,grad_norm: 0.9859284534023748, iteration: 58801
loss: 0.9885942935943604,grad_norm: 0.9999993714708467, iteration: 58802
loss: 1.0419929027557373,grad_norm: 0.9025491990501316, iteration: 58803
loss: 0.9885340332984924,grad_norm: 0.9061603693128532, iteration: 58804
loss: 0.9658127427101135,grad_norm: 0.9725322521681726, iteration: 58805
loss: 1.0059863328933716,grad_norm: 0.9999995260634176, iteration: 58806
loss: 1.0102088451385498,grad_norm: 0.8436785965611509, iteration: 58807
loss: 1.064031958580017,grad_norm: 0.9999998936920852, iteration: 58808
loss: 1.0438897609710693,grad_norm: 0.9860606095056913, iteration: 58809
loss: 1.0354573726654053,grad_norm: 0.9999991244178773, iteration: 58810
loss: 0.9997924566268921,grad_norm: 0.9234466520965557, iteration: 58811
loss: 0.9860550165176392,grad_norm: 0.9999991810503247, iteration: 58812
loss: 1.0079638957977295,grad_norm: 0.9167452777703852, iteration: 58813
loss: 0.9939375519752502,grad_norm: 0.8476285514921061, iteration: 58814
loss: 0.9862499237060547,grad_norm: 0.8340350854076164, iteration: 58815
loss: 0.9961573481559753,grad_norm: 0.7468369626842926, iteration: 58816
loss: 0.9885730147361755,grad_norm: 0.9512152795293725, iteration: 58817
loss: 0.9738370180130005,grad_norm: 0.771266503750663, iteration: 58818
loss: 1.004461646080017,grad_norm: 0.9677405195136254, iteration: 58819
loss: 1.027869462966919,grad_norm: 0.9878680150659164, iteration: 58820
loss: 1.027174472808838,grad_norm: 0.9999997436456415, iteration: 58821
loss: 0.9961745738983154,grad_norm: 0.9586497421066871, iteration: 58822
loss: 0.9617740511894226,grad_norm: 0.888956273064251, iteration: 58823
loss: 0.982948899269104,grad_norm: 0.8508419013358703, iteration: 58824
loss: 1.0139060020446777,grad_norm: 0.9999991769035383, iteration: 58825
loss: 1.0246140956878662,grad_norm: 0.9999990759439392, iteration: 58826
loss: 1.0196489095687866,grad_norm: 0.8998518607184126, iteration: 58827
loss: 1.0886636972427368,grad_norm: 0.9999995886794979, iteration: 58828
loss: 1.0085495710372925,grad_norm: 0.972269297949127, iteration: 58829
loss: 1.020137906074524,grad_norm: 0.9017126142444476, iteration: 58830
loss: 0.9812994599342346,grad_norm: 0.958685661572131, iteration: 58831
loss: 1.0431616306304932,grad_norm: 0.9999996707826961, iteration: 58832
loss: 1.0159648656845093,grad_norm: 0.9400592691670455, iteration: 58833
loss: 0.9960978627204895,grad_norm: 0.9878828860763765, iteration: 58834
loss: 0.9870178699493408,grad_norm: 0.9999991473900854, iteration: 58835
loss: 1.0169246196746826,grad_norm: 0.9947498916384473, iteration: 58836
loss: 0.9834006428718567,grad_norm: 0.978608485868474, iteration: 58837
loss: 1.0259556770324707,grad_norm: 0.8887362565170407, iteration: 58838
loss: 0.9843836426734924,grad_norm: 0.847476635519813, iteration: 58839
loss: 1.0121921300888062,grad_norm: 0.6423379856636215, iteration: 58840
loss: 1.0272191762924194,grad_norm: 0.9422407737243542, iteration: 58841
loss: 1.049795389175415,grad_norm: 0.9999995537440627, iteration: 58842
loss: 0.9959461092948914,grad_norm: 0.7641462116485486, iteration: 58843
loss: 1.035406231880188,grad_norm: 0.9999991630151561, iteration: 58844
loss: 1.001609444618225,grad_norm: 0.9933127321743316, iteration: 58845
loss: 1.0353825092315674,grad_norm: 0.9999996121614377, iteration: 58846
loss: 1.0021733045578003,grad_norm: 0.8987534953647962, iteration: 58847
loss: 0.9914734959602356,grad_norm: 0.9177816733852654, iteration: 58848
loss: 0.9923985600471497,grad_norm: 0.8647392686687242, iteration: 58849
loss: 0.9862424731254578,grad_norm: 0.8341299074318562, iteration: 58850
loss: 1.029403805732727,grad_norm: 0.9595744299518816, iteration: 58851
loss: 0.9665243625640869,grad_norm: 0.9355358854841227, iteration: 58852
loss: 0.9754841327667236,grad_norm: 0.9434021198312473, iteration: 58853
loss: 0.987945556640625,grad_norm: 0.7832702825831075, iteration: 58854
loss: 0.9748769402503967,grad_norm: 0.8879592388907195, iteration: 58855
loss: 0.9783337116241455,grad_norm: 0.9999991166479651, iteration: 58856
loss: 1.0058609247207642,grad_norm: 0.9416190904245683, iteration: 58857
loss: 1.0092172622680664,grad_norm: 0.9999992434101903, iteration: 58858
loss: 0.9685757160186768,grad_norm: 0.9270588603197231, iteration: 58859
loss: 1.0617761611938477,grad_norm: 0.9999995562186942, iteration: 58860
loss: 1.0367274284362793,grad_norm: 0.9999998199383006, iteration: 58861
loss: 0.9642310738563538,grad_norm: 0.9999990931834618, iteration: 58862
loss: 1.0293264389038086,grad_norm: 0.9999992484898741, iteration: 58863
loss: 0.9940531849861145,grad_norm: 0.9999992170594452, iteration: 58864
loss: 1.041643738746643,grad_norm: 0.9999996248590736, iteration: 58865
loss: 0.9891989827156067,grad_norm: 0.9594532255783658, iteration: 58866
loss: 1.04654061794281,grad_norm: 0.9999995489080439, iteration: 58867
loss: 0.9471456408500671,grad_norm: 0.845105773143115, iteration: 58868
loss: 1.0205152034759521,grad_norm: 0.9999994755175711, iteration: 58869
loss: 1.0656648874282837,grad_norm: 0.999999707542982, iteration: 58870
loss: 0.958048403263092,grad_norm: 0.9659511672517027, iteration: 58871
loss: 1.0270594358444214,grad_norm: 0.8259526122583172, iteration: 58872
loss: 1.0874370336532593,grad_norm: 0.9999991388549407, iteration: 58873
loss: 1.0431053638458252,grad_norm: 0.9999990978980914, iteration: 58874
loss: 1.0021916627883911,grad_norm: 0.9999988809470436, iteration: 58875
loss: 1.013015627861023,grad_norm: 0.750479278841883, iteration: 58876
loss: 0.9974250197410583,grad_norm: 0.9019100597426598, iteration: 58877
loss: 0.9868114590644836,grad_norm: 0.9999992678462327, iteration: 58878
loss: 1.0028308629989624,grad_norm: 0.9889731207912877, iteration: 58879
loss: 1.0761454105377197,grad_norm: 0.9999993777610983, iteration: 58880
loss: 1.1099117994308472,grad_norm: 0.9999997039642965, iteration: 58881
loss: 0.9962124228477478,grad_norm: 0.9999996818463639, iteration: 58882
loss: 1.008671760559082,grad_norm: 0.9999996110059977, iteration: 58883
loss: 1.081523060798645,grad_norm: 0.9999998503688197, iteration: 58884
loss: 0.9800618886947632,grad_norm: 0.8791963438712204, iteration: 58885
loss: 1.0220450162887573,grad_norm: 0.8591534352078479, iteration: 58886
loss: 1.002332329750061,grad_norm: 0.9407527591036352, iteration: 58887
loss: 1.0283176898956299,grad_norm: 0.999999803959624, iteration: 58888
loss: 1.0008776187896729,grad_norm: 0.9999991415368377, iteration: 58889
loss: 1.0497485399246216,grad_norm: 0.89525860179919, iteration: 58890
loss: 1.0176410675048828,grad_norm: 0.9999990326599238, iteration: 58891
loss: 0.9933048486709595,grad_norm: 0.9999991915690131, iteration: 58892
loss: 0.995143711566925,grad_norm: 0.9829321365893191, iteration: 58893
loss: 1.069642186164856,grad_norm: 0.9999991205583335, iteration: 58894
loss: 0.9791893362998962,grad_norm: 0.9053662455434314, iteration: 58895
loss: 1.0272300243377686,grad_norm: 0.9822097189360794, iteration: 58896
loss: 1.0704172849655151,grad_norm: 0.9999994390243239, iteration: 58897
loss: 0.9769744873046875,grad_norm: 0.8328424711342034, iteration: 58898
loss: 1.0354143381118774,grad_norm: 0.9999990966646432, iteration: 58899
loss: 1.0943843126296997,grad_norm: 0.9999993073485646, iteration: 58900
loss: 1.0083873271942139,grad_norm: 0.9999991454110635, iteration: 58901
loss: 0.9997867345809937,grad_norm: 0.940889573559843, iteration: 58902
loss: 0.9934005737304688,grad_norm: 0.9340305596626279, iteration: 58903
loss: 1.0239458084106445,grad_norm: 0.7487817354807493, iteration: 58904
loss: 0.998724102973938,grad_norm: 0.999999135563119, iteration: 58905
loss: 0.986797571182251,grad_norm: 0.9999993153676386, iteration: 58906
loss: 1.0891369581222534,grad_norm: 0.9999992466434493, iteration: 58907
loss: 0.9992218613624573,grad_norm: 0.9999994218258228, iteration: 58908
loss: 1.0391587018966675,grad_norm: 0.9999991439732641, iteration: 58909
loss: 1.2301782369613647,grad_norm: 0.9999991355804265, iteration: 58910
loss: 0.9776355028152466,grad_norm: 0.9374238140862324, iteration: 58911
loss: 0.9783884286880493,grad_norm: 0.8407372166373486, iteration: 58912
loss: 1.0336887836456299,grad_norm: 0.9952543405478238, iteration: 58913
loss: 0.9837849140167236,grad_norm: 0.9999990835555268, iteration: 58914
loss: 0.9988856315612793,grad_norm: 0.9999992904472833, iteration: 58915
loss: 0.9463664889335632,grad_norm: 0.8965494194629122, iteration: 58916
loss: 1.007776141166687,grad_norm: 0.9999991884153336, iteration: 58917
loss: 1.0346806049346924,grad_norm: 0.9999989816389986, iteration: 58918
loss: 0.9982513189315796,grad_norm: 0.8550591416234581, iteration: 58919
loss: 1.050459384918213,grad_norm: 0.9554019506365015, iteration: 58920
loss: 1.0304673910140991,grad_norm: 0.9999992822937621, iteration: 58921
loss: 1.1143856048583984,grad_norm: 0.9999994708935829, iteration: 58922
loss: 1.0131962299346924,grad_norm: 0.9999992007906447, iteration: 58923
loss: 1.1096282005310059,grad_norm: 0.9999996732087973, iteration: 58924
loss: 0.9959437847137451,grad_norm: 0.9205651408050898, iteration: 58925
loss: 1.2618298530578613,grad_norm: 0.9999995856097177, iteration: 58926
loss: 1.0118931531906128,grad_norm: 0.9999992646268522, iteration: 58927
loss: 1.1613746881484985,grad_norm: 0.9999998690498763, iteration: 58928
loss: 1.0811595916748047,grad_norm: 0.9814909119005515, iteration: 58929
loss: 0.9958133101463318,grad_norm: 0.9999994421231351, iteration: 58930
loss: 1.0240356922149658,grad_norm: 0.9185514273314449, iteration: 58931
loss: 0.9975646734237671,grad_norm: 0.9999991536773987, iteration: 58932
loss: 1.0142215490341187,grad_norm: 0.8945059534373637, iteration: 58933
loss: 1.0075193643569946,grad_norm: 0.9507655416320401, iteration: 58934
loss: 1.0158799886703491,grad_norm: 0.8848421044824768, iteration: 58935
loss: 0.999420702457428,grad_norm: 0.8613667422684842, iteration: 58936
loss: 1.0511077642440796,grad_norm: 0.9999992130281338, iteration: 58937
loss: 1.0273306369781494,grad_norm: 0.9155654453172865, iteration: 58938
loss: 1.0539242029190063,grad_norm: 0.8696635536202605, iteration: 58939
loss: 0.9923362135887146,grad_norm: 0.9122854009136714, iteration: 58940
loss: 0.9913920760154724,grad_norm: 0.8239041001563436, iteration: 58941
loss: 1.01526939868927,grad_norm: 0.9999999139225632, iteration: 58942
loss: 0.9845524430274963,grad_norm: 0.9999991607452888, iteration: 58943
loss: 1.026839017868042,grad_norm: 0.9999991629800653, iteration: 58944
loss: 0.9804483652114868,grad_norm: 0.9999992071136895, iteration: 58945
loss: 1.0097966194152832,grad_norm: 0.9999993138651667, iteration: 58946
loss: 1.0330884456634521,grad_norm: 0.9999991471738618, iteration: 58947
loss: 1.0007380247116089,grad_norm: 0.9999992102597175, iteration: 58948
loss: 0.9933831095695496,grad_norm: 0.9823817869190293, iteration: 58949
loss: 1.003516435623169,grad_norm: 0.9330332481449576, iteration: 58950
loss: 1.0210603475570679,grad_norm: 0.9999998836514429, iteration: 58951
loss: 0.9742230772972107,grad_norm: 0.8387382443134485, iteration: 58952
loss: 1.0598020553588867,grad_norm: 0.9999993205845608, iteration: 58953
loss: 1.0103375911712646,grad_norm: 0.9999996193432625, iteration: 58954
loss: 0.9739300012588501,grad_norm: 0.830935552806253, iteration: 58955
loss: 0.9801564812660217,grad_norm: 0.922570832485631, iteration: 58956
loss: 0.9927384853363037,grad_norm: 0.9999994176624715, iteration: 58957
loss: 0.9741795063018799,grad_norm: 0.9208518502337988, iteration: 58958
loss: 0.9852463603019714,grad_norm: 0.9596213559534483, iteration: 58959
loss: 1.0459190607070923,grad_norm: 0.9999991615542643, iteration: 58960
loss: 1.0175689458847046,grad_norm: 0.9999991866975437, iteration: 58961
loss: 1.0022813081741333,grad_norm: 0.9999991393628677, iteration: 58962
loss: 1.0159982442855835,grad_norm: 0.9369480891018335, iteration: 58963
loss: 0.9794962406158447,grad_norm: 0.9999996039010451, iteration: 58964
loss: 1.0064035654067993,grad_norm: 0.9403112726090295, iteration: 58965
loss: 1.0333507061004639,grad_norm: 0.9999992752672576, iteration: 58966
loss: 1.0483301877975464,grad_norm: 0.9999992583996318, iteration: 58967
loss: 1.1315909624099731,grad_norm: 0.9999993857129921, iteration: 58968
loss: 1.0291703939437866,grad_norm: 0.9999990875289003, iteration: 58969
loss: 1.0008615255355835,grad_norm: 0.9824326655976664, iteration: 58970
loss: 0.9865198731422424,grad_norm: 0.9999992680077348, iteration: 58971
loss: 1.0701442956924438,grad_norm: 0.9999993389349544, iteration: 58972
loss: 0.988380491733551,grad_norm: 0.9999992765349378, iteration: 58973
loss: 0.9925573468208313,grad_norm: 0.8930126586108993, iteration: 58974
loss: 0.9851694107055664,grad_norm: 0.9867710282997997, iteration: 58975
loss: 1.0635677576065063,grad_norm: 0.999999649789581, iteration: 58976
loss: 1.0038100481033325,grad_norm: 0.7512189577660711, iteration: 58977
loss: 1.013069748878479,grad_norm: 0.9680674056496043, iteration: 58978
loss: 0.9668431878089905,grad_norm: 0.958351134919715, iteration: 58979
loss: 1.0105525255203247,grad_norm: 0.9999998730338113, iteration: 58980
loss: 1.0398118495941162,grad_norm: 0.968596537113881, iteration: 58981
loss: 1.0004922151565552,grad_norm: 0.9999995777247613, iteration: 58982
loss: 1.013251781463623,grad_norm: 0.9999995692000648, iteration: 58983
loss: 0.9985930919647217,grad_norm: 0.9584891140294122, iteration: 58984
loss: 1.0275025367736816,grad_norm: 0.9200266854558229, iteration: 58985
loss: 1.0256246328353882,grad_norm: 0.9999991659866916, iteration: 58986
loss: 1.0247431993484497,grad_norm: 0.9929350162690942, iteration: 58987
loss: 0.9790012240409851,grad_norm: 0.9539629472945124, iteration: 58988
loss: 1.0095728635787964,grad_norm: 0.9999991472803069, iteration: 58989
loss: 0.9519952535629272,grad_norm: 0.997345303002557, iteration: 58990
loss: 0.9689854979515076,grad_norm: 0.8551895859713947, iteration: 58991
loss: 1.044145941734314,grad_norm: 0.9999991841103385, iteration: 58992
loss: 1.021651268005371,grad_norm: 0.9999991921808714, iteration: 58993
loss: 1.066856861114502,grad_norm: 0.9999991562812117, iteration: 58994
loss: 1.0687403678894043,grad_norm: 0.9999994007223455, iteration: 58995
loss: 1.0136579275131226,grad_norm: 0.9415385936243708, iteration: 58996
loss: 0.9726130962371826,grad_norm: 0.9999991125494486, iteration: 58997
loss: 1.018651008605957,grad_norm: 0.9999991179476976, iteration: 58998
loss: 0.9928317666053772,grad_norm: 0.9999997212666414, iteration: 58999
loss: 1.0184334516525269,grad_norm: 0.9063993297306717, iteration: 59000
loss: 1.0760630369186401,grad_norm: 0.9999995983648291, iteration: 59001
loss: 1.0065991878509521,grad_norm: 0.8323904245538154, iteration: 59002
loss: 1.0283150672912598,grad_norm: 0.9063227508132538, iteration: 59003
loss: 1.0278226137161255,grad_norm: 0.9308650633787543, iteration: 59004
loss: 0.9745892882347107,grad_norm: 0.9002386173904283, iteration: 59005
loss: 1.0270295143127441,grad_norm: 0.9999990304471148, iteration: 59006
loss: 0.9970610737800598,grad_norm: 0.9999995447188322, iteration: 59007
loss: 1.082638144493103,grad_norm: 0.9999996199589203, iteration: 59008
loss: 0.9826948642730713,grad_norm: 0.9257597687190279, iteration: 59009
loss: 0.9975398778915405,grad_norm: 0.9865240808518442, iteration: 59010
loss: 0.9827212691307068,grad_norm: 0.9999991337129084, iteration: 59011
loss: 1.056426763534546,grad_norm: 0.9999998165597143, iteration: 59012
loss: 1.0289859771728516,grad_norm: 0.9686401084993229, iteration: 59013
loss: 1.1166858673095703,grad_norm: 0.9999997499477205, iteration: 59014
loss: 1.0435048341751099,grad_norm: 0.8799556617821948, iteration: 59015
loss: 1.0065163373947144,grad_norm: 0.8411413813340645, iteration: 59016
loss: 1.022796869277954,grad_norm: 0.9999996448399187, iteration: 59017
loss: 1.0206701755523682,grad_norm: 0.8399427566174903, iteration: 59018
loss: 0.9640219211578369,grad_norm: 0.8861582962592364, iteration: 59019
loss: 1.031957745552063,grad_norm: 0.8097922957980562, iteration: 59020
loss: 1.1504279375076294,grad_norm: 0.9999993374018838, iteration: 59021
loss: 0.987430214881897,grad_norm: 0.851379107536144, iteration: 59022
loss: 1.0039843320846558,grad_norm: 0.9999991738098027, iteration: 59023
loss: 1.0084755420684814,grad_norm: 0.889081693428397, iteration: 59024
loss: 1.0124679803848267,grad_norm: 0.9999992236586349, iteration: 59025
loss: 1.0096850395202637,grad_norm: 0.9999991135218136, iteration: 59026
loss: 1.0085113048553467,grad_norm: 0.8149987915103275, iteration: 59027
loss: 1.0262924432754517,grad_norm: 0.9563415873808564, iteration: 59028
loss: 1.0092583894729614,grad_norm: 0.793013840179206, iteration: 59029
loss: 0.9975461363792419,grad_norm: 0.9999997997043651, iteration: 59030
loss: 1.0244406461715698,grad_norm: 0.9999993096599147, iteration: 59031
loss: 1.0021190643310547,grad_norm: 0.8913338337319024, iteration: 59032
loss: 1.0155893564224243,grad_norm: 0.9999991786713233, iteration: 59033
loss: 1.0128538608551025,grad_norm: 0.9999990846684749, iteration: 59034
loss: 1.10085928440094,grad_norm: 0.9999994830445593, iteration: 59035
loss: 1.087191104888916,grad_norm: 0.9999995701857441, iteration: 59036
loss: 0.9775322675704956,grad_norm: 0.9375261313053427, iteration: 59037
loss: 1.0784358978271484,grad_norm: 0.9999995799518602, iteration: 59038
loss: 1.0133213996887207,grad_norm: 0.999999036809715, iteration: 59039
loss: 1.0435707569122314,grad_norm: 0.9159333313192476, iteration: 59040
loss: 1.0076196193695068,grad_norm: 0.9999997312606934, iteration: 59041
loss: 0.9962372779846191,grad_norm: 0.9999991901826022, iteration: 59042
loss: 1.0066752433776855,grad_norm: 0.891035311324748, iteration: 59043
loss: 1.0253115892410278,grad_norm: 0.9776857211648856, iteration: 59044
loss: 1.0018188953399658,grad_norm: 0.999999239093246, iteration: 59045
loss: 0.986666738986969,grad_norm: 0.9577619322238818, iteration: 59046
loss: 0.9904806613922119,grad_norm: 0.7998862665631492, iteration: 59047
loss: 1.0067499876022339,grad_norm: 0.9569031400819255, iteration: 59048
loss: 1.062151551246643,grad_norm: 0.8359977837154868, iteration: 59049
loss: 1.0491993427276611,grad_norm: 0.9999993974445431, iteration: 59050
loss: 0.9893316626548767,grad_norm: 0.8777235794199885, iteration: 59051
loss: 1.0238968133926392,grad_norm: 0.9999994290953, iteration: 59052
loss: 0.9734296798706055,grad_norm: 0.9512843395477254, iteration: 59053
loss: 1.0902653932571411,grad_norm: 0.999999919791297, iteration: 59054
loss: 1.0587137937545776,grad_norm: 0.9999996149804654, iteration: 59055
loss: 1.0065538883209229,grad_norm: 0.8877488508972643, iteration: 59056
loss: 1.0169789791107178,grad_norm: 0.9999991262068266, iteration: 59057
loss: 1.0129480361938477,grad_norm: 0.7740427185983789, iteration: 59058
loss: 1.1365368366241455,grad_norm: 0.9999995189873402, iteration: 59059
loss: 0.9926589727401733,grad_norm: 0.9999992231110518, iteration: 59060
loss: 1.0418155193328857,grad_norm: 0.9999991517189183, iteration: 59061
loss: 0.9895371198654175,grad_norm: 0.9999991922767317, iteration: 59062
loss: 0.9862112998962402,grad_norm: 0.7968104579046127, iteration: 59063
loss: 1.052664875984192,grad_norm: 0.999999553518488, iteration: 59064
loss: 0.9879075288772583,grad_norm: 0.9999999620809894, iteration: 59065
loss: 0.9918497800827026,grad_norm: 0.8900614259627062, iteration: 59066
loss: 1.0666309595108032,grad_norm: 0.9999998931198062, iteration: 59067
loss: 1.0032492876052856,grad_norm: 0.8560624004141449, iteration: 59068
loss: 0.9789615869522095,grad_norm: 0.9206507235827899, iteration: 59069
loss: 0.9736424684524536,grad_norm: 0.884428048019371, iteration: 59070
loss: 1.0328422784805298,grad_norm: 0.8691584046901737, iteration: 59071
loss: 0.98203045129776,grad_norm: 0.9387112614435368, iteration: 59072
loss: 1.0156821012496948,grad_norm: 0.8523132731663999, iteration: 59073
loss: 0.9997172951698303,grad_norm: 0.999999139120882, iteration: 59074
loss: 0.9677703976631165,grad_norm: 0.833633498586105, iteration: 59075
loss: 0.9953893423080444,grad_norm: 0.9999995559837979, iteration: 59076
loss: 1.0096704959869385,grad_norm: 0.9999991353793541, iteration: 59077
loss: 1.0444225072860718,grad_norm: 0.9999991457898199, iteration: 59078
loss: 1.0055807828903198,grad_norm: 0.9541792380223503, iteration: 59079
loss: 1.0226786136627197,grad_norm: 0.8537125131247714, iteration: 59080
loss: 1.0065516233444214,grad_norm: 0.9999992193007421, iteration: 59081
loss: 1.0734082460403442,grad_norm: 0.9999996984235338, iteration: 59082
loss: 1.0143465995788574,grad_norm: 0.9999995702375872, iteration: 59083
loss: 1.0218675136566162,grad_norm: 0.9999992441478037, iteration: 59084
loss: 0.9914048314094543,grad_norm: 0.9537341533672918, iteration: 59085
loss: 1.0147786140441895,grad_norm: 0.9834914933053054, iteration: 59086
loss: 1.0262726545333862,grad_norm: 0.9999990491693739, iteration: 59087
loss: 1.0240974426269531,grad_norm: 0.9999992886279838, iteration: 59088
loss: 1.0115911960601807,grad_norm: 0.9999990871633689, iteration: 59089
loss: 1.0111814737319946,grad_norm: 0.9999992329549372, iteration: 59090
loss: 0.9953957200050354,grad_norm: 0.9999996098499032, iteration: 59091
loss: 1.0113558769226074,grad_norm: 0.9999998588579749, iteration: 59092
loss: 0.9946760535240173,grad_norm: 0.9092364072964052, iteration: 59093
loss: 0.9760562181472778,grad_norm: 0.9999990603821954, iteration: 59094
loss: 1.0272116661071777,grad_norm: 0.9999992743814831, iteration: 59095
loss: 1.035330057144165,grad_norm: 0.9999995219289827, iteration: 59096
loss: 0.9870936870574951,grad_norm: 0.9999992460881736, iteration: 59097
loss: 1.0592864751815796,grad_norm: 0.9999992949551625, iteration: 59098
loss: 0.9522402286529541,grad_norm: 0.9962956325696839, iteration: 59099
loss: 1.0259344577789307,grad_norm: 0.999999458678537, iteration: 59100
loss: 1.0235520601272583,grad_norm: 0.9532796823444761, iteration: 59101
loss: 0.9902217984199524,grad_norm: 0.9999993332119581, iteration: 59102
loss: 0.995888888835907,grad_norm: 0.9999993328247246, iteration: 59103
loss: 0.9976686835289001,grad_norm: 0.9999991641455713, iteration: 59104
loss: 1.0297377109527588,grad_norm: 0.999999614715616, iteration: 59105
loss: 1.0226240158081055,grad_norm: 0.9999995669080536, iteration: 59106
loss: 1.0112756490707397,grad_norm: 0.9065700309591934, iteration: 59107
loss: 1.0005346536636353,grad_norm: 0.9999989641762028, iteration: 59108
loss: 0.9849154949188232,grad_norm: 0.9999994346479147, iteration: 59109
loss: 0.9800196886062622,grad_norm: 0.9999990223756473, iteration: 59110
loss: 1.0163547992706299,grad_norm: 0.9765997012698039, iteration: 59111
loss: 0.972852885723114,grad_norm: 0.9999991275098405, iteration: 59112
loss: 0.9967889189720154,grad_norm: 0.9271799847871222, iteration: 59113
loss: 1.0381220579147339,grad_norm: 0.9999998137730952, iteration: 59114
loss: 1.0080888271331787,grad_norm: 0.9853905123881675, iteration: 59115
loss: 1.035137414932251,grad_norm: 0.99999973059153, iteration: 59116
loss: 1.0597434043884277,grad_norm: 0.9999990735707848, iteration: 59117
loss: 1.0165092945098877,grad_norm: 0.9999991523026867, iteration: 59118
loss: 1.0096752643585205,grad_norm: 0.7688684617623972, iteration: 59119
loss: 1.0565600395202637,grad_norm: 0.819637161269219, iteration: 59120
loss: 1.0238615274429321,grad_norm: 0.9999991024290837, iteration: 59121
loss: 1.0359193086624146,grad_norm: 0.9999993755921309, iteration: 59122
loss: 0.9923369288444519,grad_norm: 0.8912424356079726, iteration: 59123
loss: 1.012414574623108,grad_norm: 0.9753963179153857, iteration: 59124
loss: 0.9368918538093567,grad_norm: 0.9537050003704651, iteration: 59125
loss: 0.9899740219116211,grad_norm: 0.874016548168591, iteration: 59126
loss: 1.0006630420684814,grad_norm: 0.9999999681602524, iteration: 59127
loss: 0.9657930135726929,grad_norm: 0.9999996266419406, iteration: 59128
loss: 1.0239753723144531,grad_norm: 0.9516554871970374, iteration: 59129
loss: 0.9989548325538635,grad_norm: 0.9381084494247341, iteration: 59130
loss: 1.1147065162658691,grad_norm: 0.9999992233599616, iteration: 59131
loss: 1.003997802734375,grad_norm: 0.9999997753849359, iteration: 59132
loss: 1.007381558418274,grad_norm: 0.7149753179345542, iteration: 59133
loss: 0.9865491390228271,grad_norm: 0.9999991441238911, iteration: 59134
loss: 1.0567173957824707,grad_norm: 0.999999565531268, iteration: 59135
loss: 1.0067882537841797,grad_norm: 0.9210674521817738, iteration: 59136
loss: 1.017726182937622,grad_norm: 0.7540152739001369, iteration: 59137
loss: 0.9789523482322693,grad_norm: 0.999999080587968, iteration: 59138
loss: 1.0023428201675415,grad_norm: 0.9999990326386007, iteration: 59139
loss: 1.0163244009017944,grad_norm: 0.9999992477183219, iteration: 59140
loss: 0.9980666041374207,grad_norm: 0.9558662833196737, iteration: 59141
loss: 1.028212308883667,grad_norm: 0.9999992829405396, iteration: 59142
loss: 1.128764033317566,grad_norm: 0.9999992869902852, iteration: 59143
loss: 1.050614595413208,grad_norm: 0.929831047196219, iteration: 59144
loss: 1.0280784368515015,grad_norm: 0.8586091593171394, iteration: 59145
loss: 1.053621768951416,grad_norm: 0.9999991173688385, iteration: 59146
loss: 0.9761216640472412,grad_norm: 0.9999992159401325, iteration: 59147
loss: 0.9785497188568115,grad_norm: 0.9999990359688364, iteration: 59148
loss: 0.9945544004440308,grad_norm: 0.9999993519490723, iteration: 59149
loss: 1.0211752653121948,grad_norm: 0.9999991262538548, iteration: 59150
loss: 0.9714122414588928,grad_norm: 0.999999389962499, iteration: 59151
loss: 0.9951611161231995,grad_norm: 0.9059560220924204, iteration: 59152
loss: 1.025700330734253,grad_norm: 0.9999991810733441, iteration: 59153
loss: 0.9910932779312134,grad_norm: 0.999999167227367, iteration: 59154
loss: 0.992284893989563,grad_norm: 0.743951066590619, iteration: 59155
loss: 1.0256547927856445,grad_norm: 0.9999994052687843, iteration: 59156
loss: 0.9986883997917175,grad_norm: 0.9999990766935254, iteration: 59157
loss: 1.014880657196045,grad_norm: 0.9999993546626906, iteration: 59158
loss: 1.0257551670074463,grad_norm: 0.9999991511808151, iteration: 59159
loss: 0.998792827129364,grad_norm: 0.9999991485680122, iteration: 59160
loss: 1.002411127090454,grad_norm: 0.9999995385307691, iteration: 59161
loss: 1.040470838546753,grad_norm: 0.9999996022980263, iteration: 59162
loss: 0.9631056189537048,grad_norm: 0.9192770478321721, iteration: 59163
loss: 1.0008666515350342,grad_norm: 0.8605806291412614, iteration: 59164
loss: 0.9904155135154724,grad_norm: 0.9999991590944012, iteration: 59165
loss: 1.0255217552185059,grad_norm: 0.9999993567958869, iteration: 59166
loss: 0.9835001826286316,grad_norm: 0.9999991946526781, iteration: 59167
loss: 1.0128105878829956,grad_norm: 0.9999990525129782, iteration: 59168
loss: 0.9799000024795532,grad_norm: 0.9999991426062897, iteration: 59169
loss: 1.026511549949646,grad_norm: 0.9999998372800003, iteration: 59170
loss: 1.0140892267227173,grad_norm: 0.9999993139884669, iteration: 59171
loss: 1.0224814414978027,grad_norm: 0.9999995099558777, iteration: 59172
loss: 1.004271388053894,grad_norm: 0.899883082473731, iteration: 59173
loss: 0.9867619872093201,grad_norm: 0.999999214762183, iteration: 59174
loss: 0.9866284132003784,grad_norm: 0.9099096479972242, iteration: 59175
loss: 0.975433349609375,grad_norm: 0.9064352947510106, iteration: 59176
loss: 0.9834588170051575,grad_norm: 0.7204467600714609, iteration: 59177
loss: 1.0676063299179077,grad_norm: 0.9999997313243081, iteration: 59178
loss: 1.1148251295089722,grad_norm: 0.9999992904478199, iteration: 59179
loss: 1.024563193321228,grad_norm: 0.9006180855734094, iteration: 59180
loss: 0.9468681812286377,grad_norm: 0.9524053902147143, iteration: 59181
loss: 0.9336161613464355,grad_norm: 0.9999992621760135, iteration: 59182
loss: 0.982710599899292,grad_norm: 0.961753197367706, iteration: 59183
loss: 1.0035042762756348,grad_norm: 0.9999990840389501, iteration: 59184
loss: 0.9641883969306946,grad_norm: 0.9039438173645511, iteration: 59185
loss: 0.9797285199165344,grad_norm: 0.9999996313020089, iteration: 59186
loss: 1.018802523612976,grad_norm: 0.999999148392058, iteration: 59187
loss: 1.031324028968811,grad_norm: 0.9999994478535849, iteration: 59188
loss: 1.0127960443496704,grad_norm: 0.7825517861565494, iteration: 59189
loss: 1.025306224822998,grad_norm: 0.9174884891861925, iteration: 59190
loss: 1.024948000907898,grad_norm: 0.8161941839361928, iteration: 59191
loss: 0.9726178050041199,grad_norm: 0.8636056348617333, iteration: 59192
loss: 1.018039584159851,grad_norm: 0.9802597652034505, iteration: 59193
loss: 0.9984484910964966,grad_norm: 0.9999990843487844, iteration: 59194
loss: 0.9983919858932495,grad_norm: 0.9999996468620201, iteration: 59195
loss: 1.0065641403198242,grad_norm: 0.9999990967916422, iteration: 59196
loss: 0.98977130651474,grad_norm: 0.9248357856922942, iteration: 59197
loss: 1.0483287572860718,grad_norm: 0.9999992239600355, iteration: 59198
loss: 0.9750339984893799,grad_norm: 0.9999991095267016, iteration: 59199
loss: 0.9860495924949646,grad_norm: 0.9999990763646915, iteration: 59200
loss: 1.0023579597473145,grad_norm: 0.9999990974957662, iteration: 59201
loss: 0.9528049230575562,grad_norm: 0.7762959825061406, iteration: 59202
loss: 1.000140905380249,grad_norm: 0.9999992108233364, iteration: 59203
loss: 1.0279251337051392,grad_norm: 0.9966109892435567, iteration: 59204
loss: 1.0415980815887451,grad_norm: 0.9235293812250941, iteration: 59205
loss: 1.0217119455337524,grad_norm: 0.9999994617861693, iteration: 59206
loss: 1.0338926315307617,grad_norm: 0.9999991007939238, iteration: 59207
loss: 1.0332987308502197,grad_norm: 0.8240842942317373, iteration: 59208
loss: 0.9943916201591492,grad_norm: 0.8547350610440655, iteration: 59209
loss: 0.9832926392555237,grad_norm: 0.8600706608693706, iteration: 59210
loss: 1.0017131567001343,grad_norm: 0.9203237375702762, iteration: 59211
loss: 0.9908742904663086,grad_norm: 0.9999992534457396, iteration: 59212
loss: 0.9985611438751221,grad_norm: 0.9999992788668056, iteration: 59213
loss: 1.033660888671875,grad_norm: 0.9159758264161465, iteration: 59214
loss: 0.9856988191604614,grad_norm: 0.999999470793611, iteration: 59215
loss: 0.9871671199798584,grad_norm: 0.8984403498254129, iteration: 59216
loss: 1.0317219495773315,grad_norm: 0.9999998051800587, iteration: 59217
loss: 1.0217845439910889,grad_norm: 0.852598652041762, iteration: 59218
loss: 0.9741198420524597,grad_norm: 0.8626625612774866, iteration: 59219
loss: 1.027513027191162,grad_norm: 0.9999991246359471, iteration: 59220
loss: 0.9813706874847412,grad_norm: 0.9999993165866266, iteration: 59221
loss: 1.004744291305542,grad_norm: 0.999999046198416, iteration: 59222
loss: 0.978157103061676,grad_norm: 0.9260542245074451, iteration: 59223
loss: 1.006988763809204,grad_norm: 0.8486169126368518, iteration: 59224
loss: 0.9973039031028748,grad_norm: 0.9624984241135809, iteration: 59225
loss: 0.9573414325714111,grad_norm: 0.999999233866557, iteration: 59226
loss: 0.9872286319732666,grad_norm: 0.9588710000777303, iteration: 59227
loss: 1.0124911069869995,grad_norm: 0.9239403357861791, iteration: 59228
loss: 0.9623203277587891,grad_norm: 0.9999989128794431, iteration: 59229
loss: 1.0386748313903809,grad_norm: 0.9999998734174599, iteration: 59230
loss: 0.9900757074356079,grad_norm: 0.9431009677859759, iteration: 59231
loss: 1.0105434656143188,grad_norm: 0.9999994977058707, iteration: 59232
loss: 1.0012784004211426,grad_norm: 0.9829413417717587, iteration: 59233
loss: 1.0124926567077637,grad_norm: 0.9735140863080477, iteration: 59234
loss: 0.9849041700363159,grad_norm: 0.9999990741890263, iteration: 59235
loss: 0.9919113516807556,grad_norm: 0.8657058224174964, iteration: 59236
loss: 1.022524118423462,grad_norm: 0.9999991310315011, iteration: 59237
loss: 1.0161678791046143,grad_norm: 0.9999990834921892, iteration: 59238
loss: 1.002076268196106,grad_norm: 0.9999991678855421, iteration: 59239
loss: 0.9395045638084412,grad_norm: 0.9999993766144006, iteration: 59240
loss: 1.0644128322601318,grad_norm: 0.9999994651188703, iteration: 59241
loss: 1.064396619796753,grad_norm: 0.999999225243265, iteration: 59242
loss: 0.9866083264350891,grad_norm: 0.8928106052315722, iteration: 59243
loss: 0.968478262424469,grad_norm: 0.8392202572036952, iteration: 59244
loss: 0.9905269742012024,grad_norm: 0.8227977491380386, iteration: 59245
loss: 1.0006475448608398,grad_norm: 0.9999992103298727, iteration: 59246
loss: 1.0211952924728394,grad_norm: 0.9999991463376423, iteration: 59247
loss: 1.024341344833374,grad_norm: 0.974187218063153, iteration: 59248
loss: 1.0068162679672241,grad_norm: 0.9366276158467682, iteration: 59249
loss: 1.0041117668151855,grad_norm: 0.9999992678443167, iteration: 59250
loss: 0.9762040972709656,grad_norm: 0.8975466362034925, iteration: 59251
loss: 1.043479084968567,grad_norm: 0.9999991367686877, iteration: 59252
loss: 1.0077805519104004,grad_norm: 0.953162618904342, iteration: 59253
loss: 0.9784389734268188,grad_norm: 0.8165400255499388, iteration: 59254
loss: 0.9780516028404236,grad_norm: 0.8964618356534035, iteration: 59255
loss: 1.0098676681518555,grad_norm: 0.9999996013794554, iteration: 59256
loss: 0.9927268028259277,grad_norm: 0.9030961205903089, iteration: 59257
loss: 0.9801962971687317,grad_norm: 0.999999222450048, iteration: 59258
loss: 0.9950120449066162,grad_norm: 0.8301786625420563, iteration: 59259
loss: 1.0093320608139038,grad_norm: 0.9999991491734529, iteration: 59260
loss: 0.9843182563781738,grad_norm: 0.9429974794048469, iteration: 59261
loss: 1.0178154706954956,grad_norm: 0.9999990923527723, iteration: 59262
loss: 1.0288265943527222,grad_norm: 0.9999993175068839, iteration: 59263
loss: 0.9929642081260681,grad_norm: 0.9901570024729149, iteration: 59264
loss: 1.0424391031265259,grad_norm: 0.9999993488988151, iteration: 59265
loss: 1.0301270484924316,grad_norm: 0.9999996151713858, iteration: 59266
loss: 0.9648597836494446,grad_norm: 0.9758110040645446, iteration: 59267
loss: 1.0221929550170898,grad_norm: 0.999999098696015, iteration: 59268
loss: 1.0000771284103394,grad_norm: 0.9162526922339534, iteration: 59269
loss: 1.0450729131698608,grad_norm: 0.9999991011643807, iteration: 59270
loss: 1.0456181764602661,grad_norm: 0.9999990644407778, iteration: 59271
loss: 1.0121304988861084,grad_norm: 0.9999991213315347, iteration: 59272
loss: 0.9518585205078125,grad_norm: 0.9999990793106562, iteration: 59273
loss: 1.0103716850280762,grad_norm: 0.7580723696792437, iteration: 59274
loss: 1.0274162292480469,grad_norm: 0.9999991655637425, iteration: 59275
loss: 1.0287904739379883,grad_norm: 0.9946648926893119, iteration: 59276
loss: 1.049988865852356,grad_norm: 0.9999991211564466, iteration: 59277
loss: 1.0240302085876465,grad_norm: 0.8723324262131051, iteration: 59278
loss: 0.9787577986717224,grad_norm: 0.9753217650661312, iteration: 59279
loss: 1.0243825912475586,grad_norm: 0.9999990415654543, iteration: 59280
loss: 1.0964539051055908,grad_norm: 0.999999066039883, iteration: 59281
loss: 0.9813497066497803,grad_norm: 0.9808392037259984, iteration: 59282
loss: 1.0069888830184937,grad_norm: 0.7890769734679042, iteration: 59283
loss: 0.9927560687065125,grad_norm: 0.9999990126972702, iteration: 59284
loss: 1.010516881942749,grad_norm: 0.8054375846333065, iteration: 59285
loss: 1.04649817943573,grad_norm: 0.9290959941940229, iteration: 59286
loss: 1.0038182735443115,grad_norm: 0.8815205506618248, iteration: 59287
loss: 0.9912399649620056,grad_norm: 0.99999918916584, iteration: 59288
loss: 0.987693727016449,grad_norm: 0.9760779301995762, iteration: 59289
loss: 1.0431874990463257,grad_norm: 0.8705097909295404, iteration: 59290
loss: 0.9768088459968567,grad_norm: 0.9999991390978025, iteration: 59291
loss: 0.9916486144065857,grad_norm: 0.8804777468631612, iteration: 59292
loss: 1.0360301733016968,grad_norm: 0.7997087288366663, iteration: 59293
loss: 0.9838358759880066,grad_norm: 0.9999996395507494, iteration: 59294
loss: 1.0004513263702393,grad_norm: 0.971179189702108, iteration: 59295
loss: 1.0289305448532104,grad_norm: 0.9697475224724977, iteration: 59296
loss: 1.0271480083465576,grad_norm: 0.9999995023519346, iteration: 59297
loss: 1.016371250152588,grad_norm: 0.9999990772452579, iteration: 59298
loss: 1.0277307033538818,grad_norm: 0.8153698622873978, iteration: 59299
loss: 0.9938130974769592,grad_norm: 0.8639355125165438, iteration: 59300
loss: 0.9944628477096558,grad_norm: 0.9999990630645823, iteration: 59301
loss: 0.9745503067970276,grad_norm: 0.9999991218554227, iteration: 59302
loss: 1.0166511535644531,grad_norm: 0.9999991538074832, iteration: 59303
loss: 1.0359982252120972,grad_norm: 0.7776841795886339, iteration: 59304
loss: 1.025903582572937,grad_norm: 0.8747566513910305, iteration: 59305
loss: 1.0044559240341187,grad_norm: 0.8948641824549964, iteration: 59306
loss: 0.9869459271430969,grad_norm: 0.9999989848435753, iteration: 59307
loss: 1.0133261680603027,grad_norm: 0.8247458785301903, iteration: 59308
loss: 1.0025230646133423,grad_norm: 0.982140994342758, iteration: 59309
loss: 1.0229780673980713,grad_norm: 0.9999995092912518, iteration: 59310
loss: 1.002068042755127,grad_norm: 0.8760946269526914, iteration: 59311
loss: 0.9585660099983215,grad_norm: 0.8856996953276881, iteration: 59312
loss: 1.016367793083191,grad_norm: 0.9999993245310299, iteration: 59313
loss: 0.9779685735702515,grad_norm: 0.9999989588948034, iteration: 59314
loss: 1.0216577053070068,grad_norm: 0.9526652494389706, iteration: 59315
loss: 1.0300517082214355,grad_norm: 0.9999991762050215, iteration: 59316
loss: 0.9966149926185608,grad_norm: 0.9999991534669327, iteration: 59317
loss: 1.0222251415252686,grad_norm: 0.8563840289844823, iteration: 59318
loss: 1.0051515102386475,grad_norm: 0.8676485103719959, iteration: 59319
loss: 1.011200189590454,grad_norm: 0.8700847399505279, iteration: 59320
loss: 1.0122452974319458,grad_norm: 0.8926372165014614, iteration: 59321
loss: 0.9953250288963318,grad_norm: 0.8303647259552582, iteration: 59322
loss: 0.986886203289032,grad_norm: 0.999999110650008, iteration: 59323
loss: 1.043316125869751,grad_norm: 0.9999993041114302, iteration: 59324
loss: 1.008791446685791,grad_norm: 0.9352344767747606, iteration: 59325
loss: 1.0353562831878662,grad_norm: 0.9999993771794264, iteration: 59326
loss: 1.0133438110351562,grad_norm: 0.9999989718669055, iteration: 59327
loss: 1.0544419288635254,grad_norm: 0.9975467104787441, iteration: 59328
loss: 0.975948691368103,grad_norm: 0.8737759741423667, iteration: 59329
loss: 0.96150803565979,grad_norm: 0.9999991458027481, iteration: 59330
loss: 1.058077335357666,grad_norm: 0.9999993486292706, iteration: 59331
loss: 0.9741491675376892,grad_norm: 0.9999991486725587, iteration: 59332
loss: 1.0459442138671875,grad_norm: 0.9762810821549036, iteration: 59333
loss: 1.0049799680709839,grad_norm: 0.9999989397025799, iteration: 59334
loss: 1.0399619340896606,grad_norm: 0.9838845247963618, iteration: 59335
loss: 0.9889492988586426,grad_norm: 0.9999990846976482, iteration: 59336
loss: 1.0340808629989624,grad_norm: 0.807989351006542, iteration: 59337
loss: 1.0644549131393433,grad_norm: 0.9129011973090185, iteration: 59338
loss: 0.9984910488128662,grad_norm: 0.8732508436874183, iteration: 59339
loss: 0.9861794114112854,grad_norm: 0.9999991939586397, iteration: 59340
loss: 1.006120204925537,grad_norm: 0.9049407567735646, iteration: 59341
loss: 0.9953736662864685,grad_norm: 0.8301730599078745, iteration: 59342
loss: 0.9853236079216003,grad_norm: 0.8206497400117189, iteration: 59343
loss: 0.9826446175575256,grad_norm: 0.7576007324345919, iteration: 59344
loss: 1.0485265254974365,grad_norm: 0.9999998857971206, iteration: 59345
loss: 0.9789233803749084,grad_norm: 0.9485093989078406, iteration: 59346
loss: 0.9984497427940369,grad_norm: 0.886965637049819, iteration: 59347
loss: 1.0033419132232666,grad_norm: 0.8716776263221079, iteration: 59348
loss: 0.9945088624954224,grad_norm: 0.8450216281740652, iteration: 59349
loss: 1.012584924697876,grad_norm: 0.94561699778618, iteration: 59350
loss: 1.0174860954284668,grad_norm: 0.801762105909055, iteration: 59351
loss: 0.9978055357933044,grad_norm: 0.8738355271671662, iteration: 59352
loss: 0.9998201727867126,grad_norm: 0.7731048238969007, iteration: 59353
loss: 1.0128313302993774,grad_norm: 0.9942946908978229, iteration: 59354
loss: 0.9922811388969421,grad_norm: 0.9064615142893802, iteration: 59355
loss: 1.0410081148147583,grad_norm: 0.8656135541292014, iteration: 59356
loss: 0.9776217937469482,grad_norm: 0.7490214935633667, iteration: 59357
loss: 1.0085853338241577,grad_norm: 0.8969963936705915, iteration: 59358
loss: 1.0008989572525024,grad_norm: 0.8628732121520641, iteration: 59359
loss: 1.0302883386611938,grad_norm: 0.999998994851267, iteration: 59360
loss: 1.0270980596542358,grad_norm: 0.9999995300918547, iteration: 59361
loss: 0.9871877431869507,grad_norm: 0.9803732094203562, iteration: 59362
loss: 1.0155197381973267,grad_norm: 0.9871478335255821, iteration: 59363
loss: 1.0068258047103882,grad_norm: 0.8230442473896965, iteration: 59364
loss: 0.9815350770950317,grad_norm: 0.8417864747340064, iteration: 59365
loss: 1.0422683954238892,grad_norm: 0.9999990402075839, iteration: 59366
loss: 0.9612155556678772,grad_norm: 0.9627956283429481, iteration: 59367
loss: 1.0136327743530273,grad_norm: 0.8835296558646987, iteration: 59368
loss: 1.0810538530349731,grad_norm: 0.9999996160627069, iteration: 59369
loss: 1.003664493560791,grad_norm: 0.8376259631154874, iteration: 59370
loss: 1.0048333406448364,grad_norm: 0.8251218041414962, iteration: 59371
loss: 0.9921634793281555,grad_norm: 0.8099054938906249, iteration: 59372
loss: 0.9869519472122192,grad_norm: 0.9999990793461705, iteration: 59373
loss: 0.9984649419784546,grad_norm: 0.9570537090490173, iteration: 59374
loss: 1.0572237968444824,grad_norm: 0.9999996340760143, iteration: 59375
loss: 0.9959683418273926,grad_norm: 0.9999991903969935, iteration: 59376
loss: 1.0246002674102783,grad_norm: 0.9999991646228928, iteration: 59377
loss: 0.9907880425453186,grad_norm: 0.9373856134711639, iteration: 59378
loss: 1.0200235843658447,grad_norm: 0.8796880630212093, iteration: 59379
loss: 0.9684135317802429,grad_norm: 0.8265557441701247, iteration: 59380
loss: 1.048837423324585,grad_norm: 0.9999990544857859, iteration: 59381
loss: 1.0342352390289307,grad_norm: 0.8342123395195297, iteration: 59382
loss: 0.9858912825584412,grad_norm: 0.9047218237937404, iteration: 59383
loss: 1.0690618753433228,grad_norm: 0.9999995643010204, iteration: 59384
loss: 1.0244823694229126,grad_norm: 0.989871924177985, iteration: 59385
loss: 0.9984344840049744,grad_norm: 0.9556561474112538, iteration: 59386
loss: 1.014785647392273,grad_norm: 0.8013873959813615, iteration: 59387
loss: 0.9729421734809875,grad_norm: 0.9228255934730287, iteration: 59388
loss: 1.0333411693572998,grad_norm: 0.8353821543439142, iteration: 59389
loss: 0.9877227544784546,grad_norm: 0.9999991585364603, iteration: 59390
loss: 0.9997192025184631,grad_norm: 0.9565215311849379, iteration: 59391
loss: 0.9810046553611755,grad_norm: 0.8293243417078655, iteration: 59392
loss: 1.0220876932144165,grad_norm: 0.8839346925519306, iteration: 59393
loss: 0.9702568650245667,grad_norm: 0.8245972533594941, iteration: 59394
loss: 0.9964662790298462,grad_norm: 0.9412056164435311, iteration: 59395
loss: 1.0019639730453491,grad_norm: 0.9065693878127393, iteration: 59396
loss: 1.0234304666519165,grad_norm: 0.9999992358325822, iteration: 59397
loss: 1.033631682395935,grad_norm: 0.9999991700335737, iteration: 59398
loss: 1.0096590518951416,grad_norm: 0.9999989850783649, iteration: 59399
loss: 1.0045888423919678,grad_norm: 0.9999991140134443, iteration: 59400
loss: 0.9968839883804321,grad_norm: 0.9925621326794741, iteration: 59401
loss: 0.9896303415298462,grad_norm: 0.9730103722253208, iteration: 59402
loss: 1.0247468948364258,grad_norm: 0.9999990058669427, iteration: 59403
loss: 0.9602823853492737,grad_norm: 0.999999561478852, iteration: 59404
loss: 0.9948698282241821,grad_norm: 0.9999993498359173, iteration: 59405
loss: 1.019680142402649,grad_norm: 0.8667142164261494, iteration: 59406
loss: 1.052310824394226,grad_norm: 0.8052461507596326, iteration: 59407
loss: 0.962536096572876,grad_norm: 0.9999990842072164, iteration: 59408
loss: 0.9845379590988159,grad_norm: 0.9678645466210951, iteration: 59409
loss: 0.949605405330658,grad_norm: 0.8119985238320312, iteration: 59410
loss: 1.0426985025405884,grad_norm: 0.8533124866288609, iteration: 59411
loss: 1.0242969989776611,grad_norm: 0.8617048892257536, iteration: 59412
loss: 1.0058003664016724,grad_norm: 0.8276002807538871, iteration: 59413
loss: 0.9879134297370911,grad_norm: 0.9107230715630766, iteration: 59414
loss: 0.9960647225379944,grad_norm: 0.9121410084420573, iteration: 59415
loss: 0.9528705477714539,grad_norm: 0.9346247282088516, iteration: 59416
loss: 1.0740996599197388,grad_norm: 0.9999993393077931, iteration: 59417
loss: 0.997183620929718,grad_norm: 0.9156809156793547, iteration: 59418
loss: 1.038346767425537,grad_norm: 0.9999995772430423, iteration: 59419
loss: 1.024511694908142,grad_norm: 0.9999990488803017, iteration: 59420
loss: 1.0205729007720947,grad_norm: 0.8110181131797981, iteration: 59421
loss: 1.045181393623352,grad_norm: 0.999999260026048, iteration: 59422
loss: 0.9870267510414124,grad_norm: 0.9999991849014759, iteration: 59423
loss: 0.9841150641441345,grad_norm: 0.8653076168366084, iteration: 59424
loss: 1.0065062046051025,grad_norm: 0.9012637990591623, iteration: 59425
loss: 0.9964601397514343,grad_norm: 0.9999990243063739, iteration: 59426
loss: 0.9778274893760681,grad_norm: 0.9183420102535912, iteration: 59427
loss: 1.0086824893951416,grad_norm: 0.9479792328914125, iteration: 59428
loss: 1.0375550985336304,grad_norm: 0.9999992619142276, iteration: 59429
loss: 0.9941678643226624,grad_norm: 0.9267397965808668, iteration: 59430
loss: 1.030653953552246,grad_norm: 0.8710121207282842, iteration: 59431
loss: 0.9638550877571106,grad_norm: 0.9999990974609347, iteration: 59432
loss: 1.007365107536316,grad_norm: 0.8734457027332354, iteration: 59433
loss: 0.9349398612976074,grad_norm: 0.9999991183748643, iteration: 59434
loss: 1.0468591451644897,grad_norm: 0.9999991466614668, iteration: 59435
loss: 1.043757438659668,grad_norm: 0.7915569055916478, iteration: 59436
loss: 1.0248987674713135,grad_norm: 0.9190393301786997, iteration: 59437
loss: 1.0683329105377197,grad_norm: 0.9999997097912415, iteration: 59438
loss: 1.0114519596099854,grad_norm: 0.935975134654511, iteration: 59439
loss: 0.9857849478721619,grad_norm: 0.8607750601265227, iteration: 59440
loss: 1.0132198333740234,grad_norm: 0.9999990751596618, iteration: 59441
loss: 1.008988857269287,grad_norm: 0.8697710417368923, iteration: 59442
loss: 1.03744637966156,grad_norm: 0.9999992180766475, iteration: 59443
loss: 1.006819486618042,grad_norm: 0.9999991646451009, iteration: 59444
loss: 1.0351040363311768,grad_norm: 0.8605456699495805, iteration: 59445
loss: 1.02384352684021,grad_norm: 0.9999990906132142, iteration: 59446
loss: 0.985154390335083,grad_norm: 0.999999111133393, iteration: 59447
loss: 1.0205720663070679,grad_norm: 0.999999390780846, iteration: 59448
loss: 0.9829103946685791,grad_norm: 0.8839500123712365, iteration: 59449
loss: 0.9615580439567566,grad_norm: 0.8949284659483839, iteration: 59450
loss: 0.9892918467521667,grad_norm: 0.9197415697182525, iteration: 59451
loss: 1.023584246635437,grad_norm: 0.9999990140278525, iteration: 59452
loss: 1.051498293876648,grad_norm: 0.9999993583651021, iteration: 59453
loss: 0.9690961241722107,grad_norm: 0.9308629616094782, iteration: 59454
loss: 0.9816663861274719,grad_norm: 0.9217112747738517, iteration: 59455
loss: 1.007127046585083,grad_norm: 0.980863762789084, iteration: 59456
loss: 1.008955955505371,grad_norm: 0.8182147261520797, iteration: 59457
loss: 1.0190240144729614,grad_norm: 0.9999990665994294, iteration: 59458
loss: 1.0057837963104248,grad_norm: 0.8835873730375335, iteration: 59459
loss: 0.9797644019126892,grad_norm: 0.9999991426838838, iteration: 59460
loss: 1.0319819450378418,grad_norm: 0.9999991198312699, iteration: 59461
loss: 0.995691180229187,grad_norm: 0.9999990472186038, iteration: 59462
loss: 0.9925845861434937,grad_norm: 0.867757919555093, iteration: 59463
loss: 1.039644718170166,grad_norm: 0.8893352590912603, iteration: 59464
loss: 0.9970376491546631,grad_norm: 0.9999989593485202, iteration: 59465
loss: 0.993600070476532,grad_norm: 0.915983176219936, iteration: 59466
loss: 1.0022377967834473,grad_norm: 0.9999992295254688, iteration: 59467
loss: 0.9802244305610657,grad_norm: 0.8295627728271157, iteration: 59468
loss: 1.0414369106292725,grad_norm: 0.999999072134579, iteration: 59469
loss: 1.0075877904891968,grad_norm: 0.9923327466147096, iteration: 59470
loss: 1.0199332237243652,grad_norm: 0.7617490165611218, iteration: 59471
loss: 0.9938963055610657,grad_norm: 0.8728612869923312, iteration: 59472
loss: 1.0347883701324463,grad_norm: 0.9999994288467514, iteration: 59473
loss: 1.0097404718399048,grad_norm: 0.9999989881620739, iteration: 59474
loss: 1.1257199048995972,grad_norm: 0.9999990577664726, iteration: 59475
loss: 0.971070408821106,grad_norm: 0.9410336494390233, iteration: 59476
loss: 1.0172280073165894,grad_norm: 0.9305293125077561, iteration: 59477
loss: 0.9942389726638794,grad_norm: 0.933173583206792, iteration: 59478
loss: 1.008199691772461,grad_norm: 0.9999991631962675, iteration: 59479
loss: 1.0076861381530762,grad_norm: 0.9999992620478002, iteration: 59480
loss: 0.9922005534172058,grad_norm: 0.9724527560211843, iteration: 59481
loss: 0.9567073583602905,grad_norm: 0.9999989750108902, iteration: 59482
loss: 1.044497013092041,grad_norm: 0.999999333980333, iteration: 59483
loss: 1.0198131799697876,grad_norm: 0.9863718436129343, iteration: 59484
loss: 1.0026425123214722,grad_norm: 0.8952752579378496, iteration: 59485
loss: 1.0130921602249146,grad_norm: 0.9999990348685002, iteration: 59486
loss: 1.0021394491195679,grad_norm: 0.9380084933727293, iteration: 59487
loss: 1.0108137130737305,grad_norm: 0.9999989698955057, iteration: 59488
loss: 1.039818525314331,grad_norm: 0.9203096912134738, iteration: 59489
loss: 1.0255569219589233,grad_norm: 0.9999991212907314, iteration: 59490
loss: 1.0294289588928223,grad_norm: 0.9999991655755359, iteration: 59491
loss: 1.0125266313552856,grad_norm: 0.9999994670252816, iteration: 59492
loss: 0.9973328709602356,grad_norm: 0.999515170499189, iteration: 59493
loss: 1.002012014389038,grad_norm: 0.9855395962127608, iteration: 59494
loss: 1.000901460647583,grad_norm: 0.919929217145223, iteration: 59495
loss: 0.9815735220909119,grad_norm: 0.922346771227557, iteration: 59496
loss: 1.013655424118042,grad_norm: 0.9999991430240841, iteration: 59497
loss: 0.9733538031578064,grad_norm: 0.9143126649828748, iteration: 59498
loss: 0.9939285516738892,grad_norm: 0.9999991952242877, iteration: 59499
loss: 0.9953432083129883,grad_norm: 0.8619367628598875, iteration: 59500
loss: 0.9914994835853577,grad_norm: 0.9164443863339913, iteration: 59501
loss: 1.031062126159668,grad_norm: 0.9999991502669536, iteration: 59502
loss: 0.9794735908508301,grad_norm: 0.964969807726974, iteration: 59503
loss: 1.007667064666748,grad_norm: 0.9015239614705574, iteration: 59504
loss: 0.9693341255187988,grad_norm: 0.9999992181804193, iteration: 59505
loss: 1.012851595878601,grad_norm: 0.9999990451948603, iteration: 59506
loss: 0.9811814427375793,grad_norm: 0.999999072855257, iteration: 59507
loss: 1.026599407196045,grad_norm: 0.9999992657269212, iteration: 59508
loss: 1.011879324913025,grad_norm: 0.999999260603612, iteration: 59509
loss: 1.0235302448272705,grad_norm: 0.9999992135302036, iteration: 59510
loss: 0.9834383726119995,grad_norm: 0.9223078177531657, iteration: 59511
loss: 0.9979519248008728,grad_norm: 0.9179171182472731, iteration: 59512
loss: 1.0128166675567627,grad_norm: 0.8490275163886915, iteration: 59513
loss: 0.9977445006370544,grad_norm: 0.9999994325500723, iteration: 59514
loss: 0.9742937088012695,grad_norm: 0.9999990508512714, iteration: 59515
loss: 0.9903574585914612,grad_norm: 0.9532364963782142, iteration: 59516
loss: 0.9754911065101624,grad_norm: 0.9056257774438936, iteration: 59517
loss: 1.0161889791488647,grad_norm: 0.999998949630843, iteration: 59518
loss: 0.9835115075111389,grad_norm: 0.8401058149757935, iteration: 59519
loss: 0.9718043208122253,grad_norm: 0.9999991845430555, iteration: 59520
loss: 0.996882975101471,grad_norm: 0.9476191539375737, iteration: 59521
loss: 1.0020112991333008,grad_norm: 0.9757847258349682, iteration: 59522
loss: 1.0006990432739258,grad_norm: 0.9326296590802936, iteration: 59523
loss: 1.0104115009307861,grad_norm: 0.9681540334226398, iteration: 59524
loss: 0.9512337446212769,grad_norm: 0.8565984347995768, iteration: 59525
loss: 0.9867080450057983,grad_norm: 0.9999990632835638, iteration: 59526
loss: 1.006956934928894,grad_norm: 0.9999990476393517, iteration: 59527
loss: 1.0254490375518799,grad_norm: 0.9999992788443663, iteration: 59528
loss: 1.0076967477798462,grad_norm: 0.9606188584131711, iteration: 59529
loss: 0.9950030446052551,grad_norm: 0.8411253111597179, iteration: 59530
loss: 0.956604540348053,grad_norm: 0.9277907206815531, iteration: 59531
loss: 0.9904357194900513,grad_norm: 0.8863380447389319, iteration: 59532
loss: 0.9759566783905029,grad_norm: 0.8414857813836975, iteration: 59533
loss: 1.0016661882400513,grad_norm: 0.9999992093700137, iteration: 59534
loss: 0.9982773065567017,grad_norm: 0.9628140148448523, iteration: 59535
loss: 1.0125243663787842,grad_norm: 0.999999136909759, iteration: 59536
loss: 1.0012739896774292,grad_norm: 0.871414684592777, iteration: 59537
loss: 1.0585895776748657,grad_norm: 0.9995315638351069, iteration: 59538
loss: 0.9943152666091919,grad_norm: 0.9062804721973141, iteration: 59539
loss: 1.024262547492981,grad_norm: 0.9445106904177345, iteration: 59540
loss: 1.0052002668380737,grad_norm: 0.999999130872803, iteration: 59541
loss: 0.999584972858429,grad_norm: 0.8685701999020615, iteration: 59542
loss: 0.9903185963630676,grad_norm: 0.9999991188933511, iteration: 59543
loss: 1.0017699003219604,grad_norm: 0.9642787224482303, iteration: 59544
loss: 0.9532650113105774,grad_norm: 0.9171201798498844, iteration: 59545
loss: 1.0158599615097046,grad_norm: 0.9046400297560281, iteration: 59546
loss: 0.9922537207603455,grad_norm: 0.9019277867344064, iteration: 59547
loss: 1.0086297988891602,grad_norm: 0.9999990456742655, iteration: 59548
loss: 0.9898532629013062,grad_norm: 0.9779291799224071, iteration: 59549
loss: 0.9886982440948486,grad_norm: 0.922428941339736, iteration: 59550
loss: 1.0602868795394897,grad_norm: 0.9999991777560697, iteration: 59551
loss: 0.9843787550926208,grad_norm: 0.8694423716718228, iteration: 59552
loss: 1.0751529932022095,grad_norm: 0.9179811908030435, iteration: 59553
loss: 1.0103636980056763,grad_norm: 0.9978049320905583, iteration: 59554
loss: 0.9940148591995239,grad_norm: 0.9952457086344436, iteration: 59555
loss: 1.0039483308792114,grad_norm: 0.8891232176285466, iteration: 59556
loss: 1.0496755838394165,grad_norm: 0.9999995301539311, iteration: 59557
loss: 0.9921775460243225,grad_norm: 0.9643814065600367, iteration: 59558
loss: 1.022602915763855,grad_norm: 0.9999991331216445, iteration: 59559
loss: 1.0054042339324951,grad_norm: 0.9243279540737712, iteration: 59560
loss: 0.9848871231079102,grad_norm: 0.954842201461952, iteration: 59561
loss: 1.0196882486343384,grad_norm: 0.9329531940162451, iteration: 59562
loss: 1.0038710832595825,grad_norm: 0.937353376883428, iteration: 59563
loss: 1.0843088626861572,grad_norm: 0.877984910740542, iteration: 59564
loss: 1.119015097618103,grad_norm: 0.9999994859476484, iteration: 59565
loss: 0.9990664720535278,grad_norm: 0.8475573556156338, iteration: 59566
loss: 0.9734090566635132,grad_norm: 0.8884214843323175, iteration: 59567
loss: 1.0209039449691772,grad_norm: 0.8658234341812693, iteration: 59568
loss: 0.9562874436378479,grad_norm: 0.9195062335156018, iteration: 59569
loss: 0.9866078495979309,grad_norm: 0.9999991327041787, iteration: 59570
loss: 0.999360978603363,grad_norm: 0.999999212940785, iteration: 59571
loss: 1.0708810091018677,grad_norm: 0.9999993538079721, iteration: 59572
loss: 1.027766466140747,grad_norm: 0.9999990901102584, iteration: 59573
loss: 0.9785422682762146,grad_norm: 0.8506135277523986, iteration: 59574
loss: 0.9972408413887024,grad_norm: 0.9283227165797728, iteration: 59575
loss: 1.0139884948730469,grad_norm: 0.9999991764158525, iteration: 59576
loss: 0.9792759418487549,grad_norm: 0.8582732268863056, iteration: 59577
loss: 1.0312528610229492,grad_norm: 0.9487096954126852, iteration: 59578
loss: 1.0218472480773926,grad_norm: 0.9576760965682696, iteration: 59579
loss: 0.9861812591552734,grad_norm: 0.8669202176448223, iteration: 59580
loss: 1.0337167978286743,grad_norm: 0.98241275271565, iteration: 59581
loss: 1.000983715057373,grad_norm: 0.9102047650373248, iteration: 59582
loss: 0.9684114456176758,grad_norm: 0.8630702275529121, iteration: 59583
loss: 1.0601327419281006,grad_norm: 0.9999996951560389, iteration: 59584
loss: 0.9905505180358887,grad_norm: 0.8427164497487836, iteration: 59585
loss: 1.029401421546936,grad_norm: 0.9999996004206055, iteration: 59586
loss: 1.005261778831482,grad_norm: 0.9000578559591902, iteration: 59587
loss: 0.9907960295677185,grad_norm: 0.9452746519923602, iteration: 59588
loss: 1.0334076881408691,grad_norm: 0.9583752188325071, iteration: 59589
loss: 1.013397216796875,grad_norm: 0.9774775488075838, iteration: 59590
loss: 1.0182554721832275,grad_norm: 0.9589707374870265, iteration: 59591
loss: 0.9797454476356506,grad_norm: 0.7568210828358546, iteration: 59592
loss: 0.9537112712860107,grad_norm: 0.992001594211839, iteration: 59593
loss: 1.0141552686691284,grad_norm: 0.9999991930531832, iteration: 59594
loss: 1.0053514242172241,grad_norm: 0.8751208239635245, iteration: 59595
loss: 0.9970377683639526,grad_norm: 0.8242872531493334, iteration: 59596
loss: 1.0277444124221802,grad_norm: 0.9186053603024413, iteration: 59597
loss: 0.9946680068969727,grad_norm: 0.9999990890557765, iteration: 59598
loss: 1.0287667512893677,grad_norm: 0.9999992607483303, iteration: 59599
loss: 1.0071181058883667,grad_norm: 0.8330143830138032, iteration: 59600
loss: 0.9851806163787842,grad_norm: 0.9999992088046312, iteration: 59601
loss: 0.9698755741119385,grad_norm: 0.9747537990244255, iteration: 59602
loss: 1.0007492303848267,grad_norm: 0.86092788331892, iteration: 59603
loss: 1.0042403936386108,grad_norm: 0.9766998992180926, iteration: 59604
loss: 1.0268875360488892,grad_norm: 0.7591324401924184, iteration: 59605
loss: 0.9908871054649353,grad_norm: 0.9300447007830303, iteration: 59606
loss: 0.9762238264083862,grad_norm: 0.7833925356089415, iteration: 59607
loss: 1.0064024925231934,grad_norm: 0.9999991447101733, iteration: 59608
loss: 1.0023822784423828,grad_norm: 0.8678846082402518, iteration: 59609
loss: 0.9746671319007874,grad_norm: 0.9345955977892794, iteration: 59610
loss: 0.9717551469802856,grad_norm: 0.7472172647017363, iteration: 59611
loss: 1.0332473516464233,grad_norm: 0.8378865228639422, iteration: 59612
loss: 0.9826512336730957,grad_norm: 0.7713434583655633, iteration: 59613
loss: 1.0145010948181152,grad_norm: 0.7309668075161666, iteration: 59614
loss: 0.979547381401062,grad_norm: 0.8329490374352353, iteration: 59615
loss: 1.021352767944336,grad_norm: 0.8910599936287852, iteration: 59616
loss: 1.0233322381973267,grad_norm: 0.960705465223724, iteration: 59617
loss: 1.0000827312469482,grad_norm: 0.8850932255931638, iteration: 59618
loss: 0.9993852376937866,grad_norm: 0.9999990589419944, iteration: 59619
loss: 0.9969183802604675,grad_norm: 0.7959130343076458, iteration: 59620
loss: 0.9967349171638489,grad_norm: 0.9090149962614092, iteration: 59621
loss: 1.0377939939498901,grad_norm: 0.9999991384007035, iteration: 59622
loss: 1.0225857496261597,grad_norm: 0.9999998177913573, iteration: 59623
loss: 1.0128449201583862,grad_norm: 0.9999990830932594, iteration: 59624
loss: 0.9574851989746094,grad_norm: 0.7611375819221277, iteration: 59625
loss: 1.004887342453003,grad_norm: 0.9873940972759809, iteration: 59626
loss: 0.9574756026268005,grad_norm: 0.9716825297301106, iteration: 59627
loss: 0.9707145094871521,grad_norm: 0.9999992741074148, iteration: 59628
loss: 0.9616608023643494,grad_norm: 0.9753203856237084, iteration: 59629
loss: 1.1504188776016235,grad_norm: 0.9999993258040831, iteration: 59630
loss: 1.045222282409668,grad_norm: 0.9506177246439016, iteration: 59631
loss: 1.0034292936325073,grad_norm: 0.905036899767185, iteration: 59632
loss: 0.9492099285125732,grad_norm: 0.9502912416246106, iteration: 59633
loss: 1.039552927017212,grad_norm: 0.8961124694218062, iteration: 59634
loss: 0.9813269376754761,grad_norm: 0.9409067145032983, iteration: 59635
loss: 0.9734360575675964,grad_norm: 0.9271854153681389, iteration: 59636
loss: 0.991844892501831,grad_norm: 0.8893080839672082, iteration: 59637
loss: 0.9985309839248657,grad_norm: 0.8956140507714181, iteration: 59638
loss: 1.0153393745422363,grad_norm: 0.9527734794846219, iteration: 59639
loss: 1.0273147821426392,grad_norm: 0.9999997447379958, iteration: 59640
loss: 1.025675654411316,grad_norm: 0.8533850715069629, iteration: 59641
loss: 1.0054386854171753,grad_norm: 0.945192686059495, iteration: 59642
loss: 1.0024025440216064,grad_norm: 0.9999991274421848, iteration: 59643
loss: 1.0249968767166138,grad_norm: 0.9999990377627397, iteration: 59644
loss: 1.0116275548934937,grad_norm: 0.9999990226450234, iteration: 59645
loss: 1.0101124048233032,grad_norm: 0.9000840838559817, iteration: 59646
loss: 1.0294133424758911,grad_norm: 0.8778966506326099, iteration: 59647
loss: 0.9706469178199768,grad_norm: 0.9710726024484043, iteration: 59648
loss: 1.0017517805099487,grad_norm: 0.9999990872424493, iteration: 59649
loss: 1.019185185432434,grad_norm: 0.9120352324573775, iteration: 59650
loss: 0.9399679899215698,grad_norm: 0.9999991881143515, iteration: 59651
loss: 1.0430283546447754,grad_norm: 0.8961470604147357, iteration: 59652
loss: 1.0011169910430908,grad_norm: 0.960896299664089, iteration: 59653
loss: 0.976026177406311,grad_norm: 0.9074572695355635, iteration: 59654
loss: 1.0212806463241577,grad_norm: 0.9543164089992768, iteration: 59655
loss: 1.0132029056549072,grad_norm: 0.8675589159358097, iteration: 59656
loss: 1.0180866718292236,grad_norm: 0.9626483108234631, iteration: 59657
loss: 1.0299538373947144,grad_norm: 0.817018181829381, iteration: 59658
loss: 0.985982358455658,grad_norm: 0.922342473406722, iteration: 59659
loss: 1.0619432926177979,grad_norm: 0.9999997193945315, iteration: 59660
loss: 1.0236642360687256,grad_norm: 0.92282242994831, iteration: 59661
loss: 0.9967601895332336,grad_norm: 0.8984831444812343, iteration: 59662
loss: 0.9916566014289856,grad_norm: 0.9999992067748036, iteration: 59663
loss: 0.9955754280090332,grad_norm: 0.9972887210709335, iteration: 59664
loss: 1.0089406967163086,grad_norm: 0.8279378113725625, iteration: 59665
loss: 1.0226686000823975,grad_norm: 0.9520369486356195, iteration: 59666
loss: 1.0371525287628174,grad_norm: 0.9451310550797989, iteration: 59667
loss: 1.0105582475662231,grad_norm: 0.945655650256524, iteration: 59668
loss: 1.0123708248138428,grad_norm: 0.9999989820573908, iteration: 59669
loss: 0.9902225732803345,grad_norm: 0.9916731580595544, iteration: 59670
loss: 1.085932731628418,grad_norm: 0.9999995454903533, iteration: 59671
loss: 0.9895516037940979,grad_norm: 0.9619267689024877, iteration: 59672
loss: 0.997600257396698,grad_norm: 0.9244249907764739, iteration: 59673
loss: 0.9908374547958374,grad_norm: 0.9017127616806969, iteration: 59674
loss: 0.9960972666740417,grad_norm: 0.918444723659677, iteration: 59675
loss: 0.970641553401947,grad_norm: 0.9299979760525658, iteration: 59676
loss: 1.025190830230713,grad_norm: 0.9999991660582948, iteration: 59677
loss: 1.0003962516784668,grad_norm: 0.8127808484158903, iteration: 59678
loss: 0.9765709042549133,grad_norm: 0.882098840386888, iteration: 59679
loss: 0.9974671006202698,grad_norm: 0.9884195550393774, iteration: 59680
loss: 1.0165828466415405,grad_norm: 0.9999990801436418, iteration: 59681
loss: 1.0038572549819946,grad_norm: 0.9999990534332239, iteration: 59682
loss: 0.9694017767906189,grad_norm: 0.9999990166613436, iteration: 59683
loss: 0.9933895468711853,grad_norm: 0.9152935334036155, iteration: 59684
loss: 0.9529255628585815,grad_norm: 0.9999992343320238, iteration: 59685
loss: 0.9923927187919617,grad_norm: 0.8599088909224354, iteration: 59686
loss: 0.9494185447692871,grad_norm: 0.9999991138269524, iteration: 59687
loss: 1.0307813882827759,grad_norm: 0.9999990635781043, iteration: 59688
loss: 1.0292253494262695,grad_norm: 0.9310921458848124, iteration: 59689
loss: 1.032810926437378,grad_norm: 0.9999990760550483, iteration: 59690
loss: 0.9479418992996216,grad_norm: 0.99999896346326, iteration: 59691
loss: 0.9970211982727051,grad_norm: 0.9716352908508972, iteration: 59692
loss: 0.9698633551597595,grad_norm: 0.9999991866530537, iteration: 59693
loss: 0.9739891290664673,grad_norm: 0.8612378377851123, iteration: 59694
loss: 1.0428322553634644,grad_norm: 0.8652796468830485, iteration: 59695
loss: 1.0109606981277466,grad_norm: 0.9366896958385047, iteration: 59696
loss: 1.0723668336868286,grad_norm: 0.9999993149338872, iteration: 59697
loss: 0.989600658416748,grad_norm: 0.9999991250571555, iteration: 59698
loss: 1.0099292993545532,grad_norm: 0.9255383665637487, iteration: 59699
loss: 1.0102418661117554,grad_norm: 0.9999989735863902, iteration: 59700
loss: 1.0591648817062378,grad_norm: 0.8451375115645839, iteration: 59701
loss: 1.0036686658859253,grad_norm: 0.9296288116955937, iteration: 59702
loss: 0.9893541932106018,grad_norm: 0.999999527895019, iteration: 59703
loss: 0.9894028902053833,grad_norm: 0.8420438208734765, iteration: 59704
loss: 0.98944091796875,grad_norm: 0.7787949268474167, iteration: 59705
loss: 0.997630774974823,grad_norm: 0.9999991943112566, iteration: 59706
loss: 1.0065736770629883,grad_norm: 0.9999990979471113, iteration: 59707
loss: 0.9862419962882996,grad_norm: 0.9307541964719508, iteration: 59708
loss: 1.0059407949447632,grad_norm: 0.874449278376774, iteration: 59709
loss: 0.9892067909240723,grad_norm: 0.8773080451977686, iteration: 59710
loss: 0.9584004878997803,grad_norm: 0.9999994991367128, iteration: 59711
loss: 0.9774697422981262,grad_norm: 0.940418971710645, iteration: 59712
loss: 0.9935094714164734,grad_norm: 0.8721488118927606, iteration: 59713
loss: 1.016828179359436,grad_norm: 0.999999101075362, iteration: 59714
loss: 1.0139360427856445,grad_norm: 0.8190037503499348, iteration: 59715
loss: 1.0058343410491943,grad_norm: 0.7854279915423746, iteration: 59716
loss: 0.9338449835777283,grad_norm: 0.9281766778221218, iteration: 59717
loss: 0.9986793994903564,grad_norm: 0.999999545949772, iteration: 59718
loss: 0.9445353746414185,grad_norm: 0.9119657714067784, iteration: 59719
loss: 1.070631504058838,grad_norm: 0.9999997637916236, iteration: 59720
loss: 1.0024452209472656,grad_norm: 0.9999993748385885, iteration: 59721
loss: 1.0574432611465454,grad_norm: 0.9999994240179808, iteration: 59722
loss: 1.0356234312057495,grad_norm: 0.9999991708605259, iteration: 59723
loss: 0.9837055802345276,grad_norm: 0.9725610531127126, iteration: 59724
loss: 0.9764478206634521,grad_norm: 0.9561375106293057, iteration: 59725
loss: 1.0479702949523926,grad_norm: 0.9999992449117024, iteration: 59726
loss: 1.0290477275848389,grad_norm: 0.8354873455679891, iteration: 59727
loss: 0.9931972622871399,grad_norm: 0.931733402692428, iteration: 59728
loss: 0.9522486329078674,grad_norm: 0.9413175036275563, iteration: 59729
loss: 0.9974772930145264,grad_norm: 0.9605083369295067, iteration: 59730
loss: 1.0088977813720703,grad_norm: 0.999999301031702, iteration: 59731
loss: 0.9940739274024963,grad_norm: 0.7861846547574781, iteration: 59732
loss: 1.0389901399612427,grad_norm: 0.9999997455615137, iteration: 59733
loss: 1.0352195501327515,grad_norm: 0.8964531893416994, iteration: 59734
loss: 0.9826251268386841,grad_norm: 0.8193320539305156, iteration: 59735
loss: 0.9637516140937805,grad_norm: 0.999999170360115, iteration: 59736
loss: 1.0001306533813477,grad_norm: 0.8421162438934979, iteration: 59737
loss: 0.9570422172546387,grad_norm: 0.9928933110315469, iteration: 59738
loss: 0.9923888444900513,grad_norm: 0.9999991530765501, iteration: 59739
loss: 0.9991540908813477,grad_norm: 0.7505833964011952, iteration: 59740
loss: 0.9851216077804565,grad_norm: 0.7870266880350979, iteration: 59741
loss: 0.9885406494140625,grad_norm: 0.8495430986820981, iteration: 59742
loss: 0.9938941597938538,grad_norm: 0.9628499752116343, iteration: 59743
loss: 1.0377370119094849,grad_norm: 0.881949803334651, iteration: 59744
loss: 1.020298719406128,grad_norm: 0.9797570390721501, iteration: 59745
loss: 1.0196030139923096,grad_norm: 0.8982158533406125, iteration: 59746
loss: 1.0250933170318604,grad_norm: 0.9888519641847092, iteration: 59747
loss: 1.0412359237670898,grad_norm: 0.9999992771559023, iteration: 59748
loss: 0.9867523312568665,grad_norm: 0.9644712905673254, iteration: 59749
loss: 1.0489507913589478,grad_norm: 0.9364810155901371, iteration: 59750
loss: 0.9996451735496521,grad_norm: 0.9999990625440213, iteration: 59751
loss: 1.0236150026321411,grad_norm: 0.9999991075152366, iteration: 59752
loss: 1.0051676034927368,grad_norm: 0.7640423114970013, iteration: 59753
loss: 1.0147349834442139,grad_norm: 0.8671723356142632, iteration: 59754
loss: 1.0057896375656128,grad_norm: 0.9160353564065743, iteration: 59755
loss: 1.026045560836792,grad_norm: 0.999999100915791, iteration: 59756
loss: 1.0220534801483154,grad_norm: 0.8981182031357969, iteration: 59757
loss: 1.0474693775177002,grad_norm: 0.9999995475053941, iteration: 59758
loss: 1.0431698560714722,grad_norm: 0.8456515750625928, iteration: 59759
loss: 1.0153001546859741,grad_norm: 0.99999900196296, iteration: 59760
loss: 0.9970980286598206,grad_norm: 0.9336869092839194, iteration: 59761
loss: 1.046466588973999,grad_norm: 0.9999994002468456, iteration: 59762
loss: 0.9981331825256348,grad_norm: 0.999999189997063, iteration: 59763
loss: 1.0068104267120361,grad_norm: 0.8894497869318759, iteration: 59764
loss: 0.9970276951789856,grad_norm: 0.7999808937097983, iteration: 59765
loss: 1.0092926025390625,grad_norm: 0.7999264766070295, iteration: 59766
loss: 1.0011787414550781,grad_norm: 0.9999991952316055, iteration: 59767
loss: 1.0326659679412842,grad_norm: 0.9804378598793209, iteration: 59768
loss: 0.9776321053504944,grad_norm: 0.8025121037720021, iteration: 59769
loss: 0.9615822434425354,grad_norm: 0.9675293896818248, iteration: 59770
loss: 1.0316907167434692,grad_norm: 0.9999992309524234, iteration: 59771
loss: 0.9931773543357849,grad_norm: 0.9999990403459906, iteration: 59772
loss: 1.0004302263259888,grad_norm: 0.9787022989126881, iteration: 59773
loss: 0.9929932951927185,grad_norm: 0.9999991815860297, iteration: 59774
loss: 0.9997068643569946,grad_norm: 0.9999992282554115, iteration: 59775
loss: 1.0253756046295166,grad_norm: 0.9999992931491963, iteration: 59776
loss: 0.9839357733726501,grad_norm: 0.9120161507524575, iteration: 59777
loss: 1.014158844947815,grad_norm: 0.9891602164535376, iteration: 59778
loss: 1.000099778175354,grad_norm: 0.9050537048774574, iteration: 59779
loss: 1.0027748346328735,grad_norm: 0.9999990232904382, iteration: 59780
loss: 1.015500545501709,grad_norm: 0.8861843279726854, iteration: 59781
loss: 1.0128889083862305,grad_norm: 0.8862620817691695, iteration: 59782
loss: 0.9958255290985107,grad_norm: 0.9692944147713612, iteration: 59783
loss: 1.0514967441558838,grad_norm: 0.9731244523097552, iteration: 59784
loss: 1.0020939111709595,grad_norm: 0.9999990138339588, iteration: 59785
loss: 0.9867330193519592,grad_norm: 0.9740679261483451, iteration: 59786
loss: 1.0270310640335083,grad_norm: 0.9999990622170025, iteration: 59787
loss: 0.9755581617355347,grad_norm: 0.9999990214757601, iteration: 59788
loss: 1.0092929601669312,grad_norm: 0.9999993157946758, iteration: 59789
loss: 0.9605238437652588,grad_norm: 0.999999082037152, iteration: 59790
loss: 1.008874773979187,grad_norm: 0.999999320687774, iteration: 59791
loss: 0.9751266241073608,grad_norm: 0.9465417621958844, iteration: 59792
loss: 0.9997601509094238,grad_norm: 0.9999998836986517, iteration: 59793
loss: 1.0157818794250488,grad_norm: 0.8702790786704482, iteration: 59794
loss: 1.0227652788162231,grad_norm: 0.9999994578811483, iteration: 59795
loss: 1.0050321817398071,grad_norm: 0.9094819685168968, iteration: 59796
loss: 0.9893601536750793,grad_norm: 0.9999995671459088, iteration: 59797
loss: 0.9955574870109558,grad_norm: 0.9999990314864051, iteration: 59798
loss: 0.9931013584136963,grad_norm: 0.9999990018864086, iteration: 59799
loss: 1.037848949432373,grad_norm: 0.9811258755050954, iteration: 59800
loss: 0.9982401132583618,grad_norm: 0.9999993139385804, iteration: 59801
loss: 1.0450117588043213,grad_norm: 0.9999996317119036, iteration: 59802
loss: 1.000888466835022,grad_norm: 0.869711884261731, iteration: 59803
loss: 1.0087970495224,grad_norm: 0.9660438036130583, iteration: 59804
loss: 0.9758833050727844,grad_norm: 0.9999994519416004, iteration: 59805
loss: 0.9827598929405212,grad_norm: 0.8317786257928675, iteration: 59806
loss: 0.9769183397293091,grad_norm: 0.9423101585386496, iteration: 59807
loss: 1.005794644355774,grad_norm: 0.9999995177528761, iteration: 59808
loss: 1.0005956888198853,grad_norm: 0.9999991498224734, iteration: 59809
loss: 0.987697958946228,grad_norm: 0.8978954466861336, iteration: 59810
loss: 1.008461594581604,grad_norm: 0.9282649499698697, iteration: 59811
loss: 0.9911065101623535,grad_norm: 0.7993535373493527, iteration: 59812
loss: 1.0252491235733032,grad_norm: 0.849449125332779, iteration: 59813
loss: 1.0198347568511963,grad_norm: 0.999999058069242, iteration: 59814
loss: 1.0061832666397095,grad_norm: 0.9999990167500354, iteration: 59815
loss: 1.0117110013961792,grad_norm: 0.9999989936712368, iteration: 59816
loss: 0.9816149473190308,grad_norm: 0.9999993525079229, iteration: 59817
loss: 0.9900936484336853,grad_norm: 0.999999294097287, iteration: 59818
loss: 1.011125922203064,grad_norm: 0.9315858656673838, iteration: 59819
loss: 0.9825829267501831,grad_norm: 0.9999991296525143, iteration: 59820
loss: 1.0203450918197632,grad_norm: 0.999999172322342, iteration: 59821
loss: 0.9749536514282227,grad_norm: 0.999998980413927, iteration: 59822
loss: 1.0047358274459839,grad_norm: 0.8943807424884664, iteration: 59823
loss: 1.0572832822799683,grad_norm: 1.000000010634696, iteration: 59824
loss: 0.9668130874633789,grad_norm: 0.9426845858319266, iteration: 59825
loss: 1.0382779836654663,grad_norm: 0.999999443646406, iteration: 59826
loss: 0.9811744093894958,grad_norm: 0.9691151858441113, iteration: 59827
loss: 1.0244642496109009,grad_norm: 0.9096458643634934, iteration: 59828
loss: 1.0198134183883667,grad_norm: 0.9834018945829688, iteration: 59829
loss: 1.047544240951538,grad_norm: 0.9999990568779002, iteration: 59830
loss: 1.0034488439559937,grad_norm: 0.8656037976772281, iteration: 59831
loss: 1.0213128328323364,grad_norm: 0.8573261738278626, iteration: 59832
loss: 1.0294407606124878,grad_norm: 0.9999992693610746, iteration: 59833
loss: 0.9876500964164734,grad_norm: 0.9223619894542472, iteration: 59834
loss: 0.9848610162734985,grad_norm: 0.9999994583143709, iteration: 59835
loss: 1.0023977756500244,grad_norm: 0.9999995177104306, iteration: 59836
loss: 1.0269808769226074,grad_norm: 0.9781352610745633, iteration: 59837
loss: 0.9466248154640198,grad_norm: 0.9267823731133923, iteration: 59838
loss: 0.9543331265449524,grad_norm: 0.8731099761326863, iteration: 59839
loss: 0.9903604388237,grad_norm: 0.9999991011184027, iteration: 59840
loss: 1.024754285812378,grad_norm: 0.9703166004821548, iteration: 59841
loss: 0.981156051158905,grad_norm: 0.868041265255396, iteration: 59842
loss: 1.0267925262451172,grad_norm: 0.7912407676956192, iteration: 59843
loss: 1.0125006437301636,grad_norm: 0.9090890758382097, iteration: 59844
loss: 1.0200488567352295,grad_norm: 0.9999991426109159, iteration: 59845
loss: 1.0119737386703491,grad_norm: 0.822321136140575, iteration: 59846
loss: 1.0136957168579102,grad_norm: 0.9999989903960548, iteration: 59847
loss: 1.018079400062561,grad_norm: 0.8423510425130447, iteration: 59848
loss: 1.027967929840088,grad_norm: 0.8681386051841311, iteration: 59849
loss: 0.9790393710136414,grad_norm: 0.9305958949110356, iteration: 59850
loss: 0.9739418625831604,grad_norm: 0.9999991456298236, iteration: 59851
loss: 1.0141159296035767,grad_norm: 0.7519914895122061, iteration: 59852
loss: 0.9575580954551697,grad_norm: 0.9341778196792758, iteration: 59853
loss: 1.0096919536590576,grad_norm: 0.8220545776624859, iteration: 59854
loss: 0.9731445908546448,grad_norm: 0.8945883351725948, iteration: 59855
loss: 0.9923226237297058,grad_norm: 0.8836517255625934, iteration: 59856
loss: 1.0110923051834106,grad_norm: 0.9263541078341393, iteration: 59857
loss: 0.9926319718360901,grad_norm: 0.9853609296087275, iteration: 59858
loss: 0.9661476612091064,grad_norm: 0.9999992030087814, iteration: 59859
loss: 1.0026981830596924,grad_norm: 0.9880912269673618, iteration: 59860
loss: 1.02036452293396,grad_norm: 0.8434158754769977, iteration: 59861
loss: 0.9936271905899048,grad_norm: 0.867922927555519, iteration: 59862
loss: 1.0166600942611694,grad_norm: 0.9999989899586039, iteration: 59863
loss: 1.010373830795288,grad_norm: 0.9217632303923813, iteration: 59864
loss: 0.9967399835586548,grad_norm: 0.8740213074719366, iteration: 59865
loss: 1.0170011520385742,grad_norm: 0.9082233475378433, iteration: 59866
loss: 0.969882071018219,grad_norm: 0.9195680273797231, iteration: 59867
loss: 1.006974697113037,grad_norm: 0.8601740290263973, iteration: 59868
loss: 1.0363600254058838,grad_norm: 0.8591024412124978, iteration: 59869
loss: 1.0183619260787964,grad_norm: 0.9999990332289126, iteration: 59870
loss: 1.0173791646957397,grad_norm: 0.7958751352418338, iteration: 59871
loss: 0.9805657267570496,grad_norm: 0.9999991953298971, iteration: 59872
loss: 1.026606559753418,grad_norm: 0.9999990709258028, iteration: 59873
loss: 1.0336214303970337,grad_norm: 0.9999991159479418, iteration: 59874
loss: 0.9923186302185059,grad_norm: 0.8961759352723848, iteration: 59875
loss: 1.007694959640503,grad_norm: 0.9241956088946118, iteration: 59876
loss: 0.9823207855224609,grad_norm: 0.8930288845371883, iteration: 59877
loss: 1.01047945022583,grad_norm: 0.9904623453398753, iteration: 59878
loss: 0.9952059984207153,grad_norm: 0.8393492258853607, iteration: 59879
loss: 1.0425302982330322,grad_norm: 0.9999990042690424, iteration: 59880
loss: 1.010165810585022,grad_norm: 0.9999990419713486, iteration: 59881
loss: 1.042401909828186,grad_norm: 0.9999999305487765, iteration: 59882
loss: 1.0857317447662354,grad_norm: 0.9999995936268242, iteration: 59883
loss: 1.043749213218689,grad_norm: 0.987574343316694, iteration: 59884
loss: 0.9871960282325745,grad_norm: 0.9832681490032129, iteration: 59885
loss: 1.0044827461242676,grad_norm: 0.8140246899422527, iteration: 59886
loss: 1.0699303150177002,grad_norm: 0.9999997078116526, iteration: 59887
loss: 0.9922600984573364,grad_norm: 0.9073812081135607, iteration: 59888
loss: 1.0189865827560425,grad_norm: 0.8777876087000632, iteration: 59889
loss: 1.057937741279602,grad_norm: 0.9999992555084476, iteration: 59890
loss: 1.0175198316574097,grad_norm: 0.9999991375655575, iteration: 59891
loss: 1.0042409896850586,grad_norm: 0.8069743610580242, iteration: 59892
loss: 0.9833841323852539,grad_norm: 0.9422328070666486, iteration: 59893
loss: 1.0361919403076172,grad_norm: 0.9999995144965584, iteration: 59894
loss: 0.981009840965271,grad_norm: 0.8805145631238327, iteration: 59895
loss: 0.9980010390281677,grad_norm: 0.9999991345812753, iteration: 59896
loss: 1.0061382055282593,grad_norm: 0.7754781285400792, iteration: 59897
loss: 1.0384684801101685,grad_norm: 0.9999994248215851, iteration: 59898
loss: 1.0095525979995728,grad_norm: 0.9999989496238683, iteration: 59899
loss: 1.0127757787704468,grad_norm: 0.9999991534688929, iteration: 59900
loss: 0.9705613255500793,grad_norm: 0.9999990479493666, iteration: 59901
loss: 0.9857141971588135,grad_norm: 0.9999990755027817, iteration: 59902
loss: 1.0664690732955933,grad_norm: 0.9999994873517682, iteration: 59903
loss: 0.9939274191856384,grad_norm: 0.8323918736434012, iteration: 59904
loss: 0.9871610999107361,grad_norm: 0.919938105483543, iteration: 59905
loss: 1.007598876953125,grad_norm: 0.9999992606258049, iteration: 59906
loss: 1.0434783697128296,grad_norm: 0.9999995430169061, iteration: 59907
loss: 1.0185171365737915,grad_norm: 0.9999991903235089, iteration: 59908
loss: 0.9919822812080383,grad_norm: 0.8131691539729016, iteration: 59909
loss: 0.9722848534584045,grad_norm: 0.9016344931578169, iteration: 59910
loss: 1.0368084907531738,grad_norm: 0.9222453034397556, iteration: 59911
loss: 1.0331881046295166,grad_norm: 0.9572387071794417, iteration: 59912
loss: 0.9906997084617615,grad_norm: 0.8678630803435776, iteration: 59913
loss: 0.9452267289161682,grad_norm: 0.9266096630366915, iteration: 59914
loss: 0.9989633560180664,grad_norm: 0.789721076836208, iteration: 59915
loss: 0.995893120765686,grad_norm: 0.9847576759263091, iteration: 59916
loss: 1.000531792640686,grad_norm: 0.9814781917702862, iteration: 59917
loss: 1.0040425062179565,grad_norm: 0.9999991297662484, iteration: 59918
loss: 1.006925106048584,grad_norm: 0.850263480683059, iteration: 59919
loss: 0.9952241778373718,grad_norm: 0.9870735134414146, iteration: 59920
loss: 1.0427993535995483,grad_norm: 0.9999999288106518, iteration: 59921
loss: 0.9909878373146057,grad_norm: 0.999999001003757, iteration: 59922
loss: 0.9949063062667847,grad_norm: 0.9717694926441004, iteration: 59923
loss: 1.01365065574646,grad_norm: 0.999999011083348, iteration: 59924
loss: 0.9996619820594788,grad_norm: 0.828160432250217, iteration: 59925
loss: 0.972572922706604,grad_norm: 0.9999989796361534, iteration: 59926
loss: 1.0246384143829346,grad_norm: 0.9707289644591589, iteration: 59927
loss: 0.9777465462684631,grad_norm: 0.9805035119752762, iteration: 59928
loss: 0.9975078701972961,grad_norm: 0.8379695187040885, iteration: 59929
loss: 1.0149633884429932,grad_norm: 0.9999989973929431, iteration: 59930
loss: 0.9741127490997314,grad_norm: 0.9776783987091807, iteration: 59931
loss: 1.011157751083374,grad_norm: 0.9213563529070141, iteration: 59932
loss: 0.9835354685783386,grad_norm: 0.9169925019176084, iteration: 59933
loss: 1.0275115966796875,grad_norm: 0.9999990576937791, iteration: 59934
loss: 0.9856017231941223,grad_norm: 0.9778098501585262, iteration: 59935
loss: 1.044651985168457,grad_norm: 0.9999998331382028, iteration: 59936
loss: 1.0294229984283447,grad_norm: 0.9999992497385022, iteration: 59937
loss: 0.9897193312644958,grad_norm: 0.943756290759546, iteration: 59938
loss: 0.997355043888092,grad_norm: 0.9436645907345059, iteration: 59939
loss: 1.0110180377960205,grad_norm: 0.8079373418045147, iteration: 59940
loss: 1.0240628719329834,grad_norm: 0.9173713811467256, iteration: 59941
loss: 1.013547658920288,grad_norm: 0.9430551710451075, iteration: 59942
loss: 1.0098989009857178,grad_norm: 0.8653606052117585, iteration: 59943
loss: 1.0240203142166138,grad_norm: 0.8827291329177369, iteration: 59944
loss: 0.9976513385772705,grad_norm: 0.8453690950697051, iteration: 59945
loss: 0.9962887763977051,grad_norm: 0.9999991005184709, iteration: 59946
loss: 0.9837836623191833,grad_norm: 0.9999991600233475, iteration: 59947
loss: 1.0498573780059814,grad_norm: 0.8757644832695933, iteration: 59948
loss: 0.9912143349647522,grad_norm: 0.9562816748058768, iteration: 59949
loss: 0.9976456165313721,grad_norm: 0.9975471976970428, iteration: 59950
loss: 0.9899728894233704,grad_norm: 0.8804320238710669, iteration: 59951
loss: 0.9748221039772034,grad_norm: 0.9999991359159165, iteration: 59952
loss: 1.002450942993164,grad_norm: 0.8512143184904755, iteration: 59953
loss: 1.0234580039978027,grad_norm: 0.9999992425358293, iteration: 59954
loss: 0.9907535314559937,grad_norm: 0.9999990500195273, iteration: 59955
loss: 1.0384098291397095,grad_norm: 0.9999990936118579, iteration: 59956
loss: 0.9996777176856995,grad_norm: 0.8750550020384381, iteration: 59957
loss: 1.008763074874878,grad_norm: 0.8538333499739613, iteration: 59958
loss: 0.9861943125724792,grad_norm: 0.9089470254555977, iteration: 59959
loss: 0.9899243712425232,grad_norm: 0.8939640703498567, iteration: 59960
loss: 1.04436457157135,grad_norm: 0.9598537782903322, iteration: 59961
loss: 1.0093674659729004,grad_norm: 0.9886927879593732, iteration: 59962
loss: 1.0456714630126953,grad_norm: 0.9999995293942614, iteration: 59963
loss: 1.0597054958343506,grad_norm: 0.9999990637035185, iteration: 59964
loss: 1.014459252357483,grad_norm: 0.9129563269151943, iteration: 59965
loss: 0.9994173049926758,grad_norm: 0.972283424111524, iteration: 59966
loss: 0.9967700242996216,grad_norm: 0.992992017524791, iteration: 59967
loss: 1.0079373121261597,grad_norm: 0.9999991053788234, iteration: 59968
loss: 1.023431658744812,grad_norm: 0.9028171662206477, iteration: 59969
loss: 1.0199521780014038,grad_norm: 0.9999996055528589, iteration: 59970
loss: 0.9970136880874634,grad_norm: 0.9630035836015065, iteration: 59971
loss: 1.008285403251648,grad_norm: 0.9691145227216773, iteration: 59972
loss: 0.98486328125,grad_norm: 0.8152587790482899, iteration: 59973
loss: 0.9692704081535339,grad_norm: 0.9999995376194649, iteration: 59974
loss: 0.9641046524047852,grad_norm: 0.966210619897704, iteration: 59975
loss: 0.9981635212898254,grad_norm: 0.8559368287367844, iteration: 59976
loss: 0.9872154593467712,grad_norm: 0.8937292275353822, iteration: 59977
loss: 1.0623953342437744,grad_norm: 0.9999998777836931, iteration: 59978
loss: 1.023653268814087,grad_norm: 0.909989439786755, iteration: 59979
loss: 0.9854544997215271,grad_norm: 0.9999990076209313, iteration: 59980
loss: 1.055153489112854,grad_norm: 0.9999995658764611, iteration: 59981
loss: 0.9964191317558289,grad_norm: 0.9999989513416089, iteration: 59982
loss: 1.014840841293335,grad_norm: 0.8485612314501311, iteration: 59983
loss: 0.9444360136985779,grad_norm: 0.7931638495023694, iteration: 59984
loss: 0.982912540435791,grad_norm: 0.9999991946612519, iteration: 59985
loss: 0.9786744713783264,grad_norm: 0.8640665473159485, iteration: 59986
loss: 0.9860154390335083,grad_norm: 0.9170349937413595, iteration: 59987
loss: 1.0074830055236816,grad_norm: 0.874945525346336, iteration: 59988
loss: 1.0291260480880737,grad_norm: 0.8800882873737859, iteration: 59989
loss: 0.9588446617126465,grad_norm: 0.9762053464432351, iteration: 59990
loss: 1.0122169256210327,grad_norm: 0.999999126278249, iteration: 59991
loss: 1.0222892761230469,grad_norm: 0.970404245825843, iteration: 59992
loss: 1.0211931467056274,grad_norm: 0.797374078107377, iteration: 59993
loss: 1.0168426036834717,grad_norm: 0.8107596346882061, iteration: 59994
loss: 1.0385476350784302,grad_norm: 0.9999999721240819, iteration: 59995
loss: 0.9986798763275146,grad_norm: 0.9751495465558728, iteration: 59996
loss: 1.0090839862823486,grad_norm: 0.9999997059241892, iteration: 59997
loss: 1.0263365507125854,grad_norm: 0.9999993115464604, iteration: 59998
loss: 0.9922239780426025,grad_norm: 0.8328055691746585, iteration: 59999
loss: 1.0061231851577759,grad_norm: 0.8094625599314856, iteration: 60000
Evaluating at step 60000
{'val': 0.9972194582223892, 'test': 2.7173956583102292}
loss: 1.0193395614624023,grad_norm: 0.9999990373931448, iteration: 60001
loss: 1.0322014093399048,grad_norm: 0.9999990799288808, iteration: 60002
loss: 0.9670385122299194,grad_norm: 0.9445265860201296, iteration: 60003
loss: 0.9812709093093872,grad_norm: 0.9999991108911997, iteration: 60004
loss: 0.9756473302841187,grad_norm: 0.9537815643573666, iteration: 60005
loss: 1.0211198329925537,grad_norm: 0.8935060365380562, iteration: 60006
loss: 0.9968141317367554,grad_norm: 0.9999994609967144, iteration: 60007
loss: 1.032804250717163,grad_norm: 0.8385184021548825, iteration: 60008
loss: 0.9486272931098938,grad_norm: 0.9943248706889647, iteration: 60009
loss: 0.9926966428756714,grad_norm: 0.9514734688407496, iteration: 60010
loss: 1.045751690864563,grad_norm: 0.9132244025193774, iteration: 60011
loss: 1.0308361053466797,grad_norm: 0.9474960800814209, iteration: 60012
loss: 1.009438395500183,grad_norm: 0.9999995131797498, iteration: 60013
loss: 1.0031251907348633,grad_norm: 0.8347889715605324, iteration: 60014
loss: 1.0412912368774414,grad_norm: 0.9999998638222347, iteration: 60015
loss: 0.996487021446228,grad_norm: 0.9999991392407988, iteration: 60016
loss: 1.1165446043014526,grad_norm: 0.9999991689594724, iteration: 60017
loss: 1.0208035707473755,grad_norm: 0.9999991152849818, iteration: 60018
loss: 0.9870073199272156,grad_norm: 0.9999990277160269, iteration: 60019
loss: 0.9911431074142456,grad_norm: 0.9225797021921812, iteration: 60020
loss: 1.003773808479309,grad_norm: 0.9999992817587775, iteration: 60021
loss: 0.9585457444190979,grad_norm: 0.9999991157525977, iteration: 60022
loss: 1.0830862522125244,grad_norm: 0.9999996371251155, iteration: 60023
loss: 1.0334174633026123,grad_norm: 0.9999991556302698, iteration: 60024
loss: 1.050193190574646,grad_norm: 0.9999990949292406, iteration: 60025
loss: 1.0476646423339844,grad_norm: 0.9999997271657799, iteration: 60026
loss: 1.0117589235305786,grad_norm: 0.9803059346744872, iteration: 60027
loss: 1.0278103351593018,grad_norm: 0.9999991086033045, iteration: 60028
loss: 1.0080814361572266,grad_norm: 0.8503833994886592, iteration: 60029
loss: 0.9842594265937805,grad_norm: 0.9999991434381966, iteration: 60030
loss: 0.9986637234687805,grad_norm: 0.9999992627953312, iteration: 60031
loss: 0.9920472502708435,grad_norm: 0.9999990486441017, iteration: 60032
loss: 1.0078853368759155,grad_norm: 0.8893341647507758, iteration: 60033
loss: 1.002522587776184,grad_norm: 0.8909054119768508, iteration: 60034
loss: 1.0271039009094238,grad_norm: 0.9999991420566027, iteration: 60035
loss: 0.9899841547012329,grad_norm: 0.8067191118126904, iteration: 60036
loss: 1.0356496572494507,grad_norm: 0.7886333349450256, iteration: 60037
loss: 0.9810195565223694,grad_norm: 0.9999990153718498, iteration: 60038
loss: 1.0139700174331665,grad_norm: 0.8888971373742313, iteration: 60039
loss: 1.0247883796691895,grad_norm: 0.8239780975187004, iteration: 60040
loss: 0.9894160032272339,grad_norm: 0.999998985322789, iteration: 60041
loss: 0.9828024506568909,grad_norm: 0.926766900130895, iteration: 60042
loss: 1.0245623588562012,grad_norm: 0.9999993284532588, iteration: 60043
loss: 0.9366786479949951,grad_norm: 0.9999992200861864, iteration: 60044
loss: 1.0061935186386108,grad_norm: 0.9765126191991447, iteration: 60045
loss: 0.9947158694267273,grad_norm: 0.8138182936185604, iteration: 60046
loss: 0.97225421667099,grad_norm: 0.9999990379443149, iteration: 60047
loss: 1.0436424016952515,grad_norm: 0.9999996485679453, iteration: 60048
loss: 0.994804859161377,grad_norm: 0.8074358547550519, iteration: 60049
loss: 1.0584793090820312,grad_norm: 0.9999991479061651, iteration: 60050
loss: 1.0150412321090698,grad_norm: 0.8736414537122873, iteration: 60051
loss: 0.9879717826843262,grad_norm: 0.9207600422703145, iteration: 60052
loss: 1.0271432399749756,grad_norm: 0.9637046885356352, iteration: 60053
loss: 1.0089141130447388,grad_norm: 0.99999991269875, iteration: 60054
loss: 1.0042341947555542,grad_norm: 0.9999991848947034, iteration: 60055
loss: 1.00188148021698,grad_norm: 0.9999990675320484, iteration: 60056
loss: 0.9961249828338623,grad_norm: 0.999999160455071, iteration: 60057
loss: 0.9842906594276428,grad_norm: 0.9338476399288006, iteration: 60058
loss: 0.9889377355575562,grad_norm: 0.856232942418107, iteration: 60059
loss: 1.0301834344863892,grad_norm: 0.8811738676063269, iteration: 60060
loss: 0.995222806930542,grad_norm: 0.8626023505709869, iteration: 60061
loss: 1.0162241458892822,grad_norm: 0.9999994615188074, iteration: 60062
loss: 0.9717211723327637,grad_norm: 0.9613427971059205, iteration: 60063
loss: 1.016141653060913,grad_norm: 0.9254334100921132, iteration: 60064
loss: 0.9586664438247681,grad_norm: 0.9999999272827946, iteration: 60065
loss: 0.9806452393531799,grad_norm: 0.8350365730630004, iteration: 60066
loss: 0.948031485080719,grad_norm: 0.8341074555680101, iteration: 60067
loss: 0.9567636847496033,grad_norm: 0.9181295434105395, iteration: 60068
loss: 0.9568687677383423,grad_norm: 0.8508397357399321, iteration: 60069
loss: 1.0055515766143799,grad_norm: 0.9931568367119089, iteration: 60070
loss: 0.9571913480758667,grad_norm: 0.9999990957646407, iteration: 60071
loss: 0.9635661840438843,grad_norm: 0.8158761685542384, iteration: 60072
loss: 1.0083050727844238,grad_norm: 0.9914572164405244, iteration: 60073
loss: 1.0059561729431152,grad_norm: 0.8250061782899324, iteration: 60074
loss: 1.0145584344863892,grad_norm: 0.9493108995324473, iteration: 60075
loss: 1.0378702878952026,grad_norm: 0.9999998665051621, iteration: 60076
loss: 1.0413709878921509,grad_norm: 0.9999993362756325, iteration: 60077
loss: 0.9776427745819092,grad_norm: 0.9097145969647346, iteration: 60078
loss: 0.9708566069602966,grad_norm: 0.9999990531145985, iteration: 60079
loss: 0.9713287353515625,grad_norm: 0.9138781033699214, iteration: 60080
loss: 1.0063798427581787,grad_norm: 0.9831141340939563, iteration: 60081
loss: 0.9629471302032471,grad_norm: 0.7734556757065969, iteration: 60082
loss: 0.9964591264724731,grad_norm: 0.9067546420610989, iteration: 60083
loss: 1.0023698806762695,grad_norm: 0.8081371235880114, iteration: 60084
loss: 0.9824942350387573,grad_norm: 0.99261701482077, iteration: 60085
loss: 1.0042638778686523,grad_norm: 0.931541695492838, iteration: 60086
loss: 1.0284143686294556,grad_norm: 0.958389724557704, iteration: 60087
loss: 0.9907420873641968,grad_norm: 0.9735905880581527, iteration: 60088
loss: 0.9823663234710693,grad_norm: 0.8778817701544296, iteration: 60089
loss: 0.9918387532234192,grad_norm: 0.9999995748096266, iteration: 60090
loss: 1.033286452293396,grad_norm: 0.9999998738352291, iteration: 60091
loss: 0.9652612805366516,grad_norm: 0.9485654499605676, iteration: 60092
loss: 1.0341098308563232,grad_norm: 0.9999990874215087, iteration: 60093
loss: 1.0459238290786743,grad_norm: 0.9999992206353477, iteration: 60094
loss: 0.9828565120697021,grad_norm: 0.8768290852618003, iteration: 60095
loss: 0.9680021405220032,grad_norm: 0.95489487187591, iteration: 60096
loss: 0.9927829504013062,grad_norm: 0.7911123233067728, iteration: 60097
loss: 0.9919683933258057,grad_norm: 0.8215209437715874, iteration: 60098
loss: 1.0394891500473022,grad_norm: 0.9113215557375057, iteration: 60099
loss: 1.0750668048858643,grad_norm: 0.9999992495596517, iteration: 60100
loss: 1.0109843015670776,grad_norm: 0.9158996552764256, iteration: 60101
loss: 0.9932522773742676,grad_norm: 0.8391906759275659, iteration: 60102
loss: 0.9894428849220276,grad_norm: 0.9415789368691873, iteration: 60103
loss: 0.9926772117614746,grad_norm: 0.9186994799374061, iteration: 60104
loss: 1.025037169456482,grad_norm: 0.9000065673245619, iteration: 60105
loss: 1.0070148706436157,grad_norm: 0.8105598189118428, iteration: 60106
loss: 0.9845852255821228,grad_norm: 0.9022143074201174, iteration: 60107
loss: 0.9835222959518433,grad_norm: 0.9999991062214767, iteration: 60108
loss: 1.034527063369751,grad_norm: 0.8400023565269548, iteration: 60109
loss: 0.976872980594635,grad_norm: 0.9548914460710923, iteration: 60110
loss: 1.0466502904891968,grad_norm: 0.9999998611138652, iteration: 60111
loss: 1.0075938701629639,grad_norm: 0.9800668030196815, iteration: 60112
loss: 0.9857105612754822,grad_norm: 0.915555921936071, iteration: 60113
loss: 1.052805781364441,grad_norm: 0.9768959469315197, iteration: 60114
loss: 0.9771856665611267,grad_norm: 0.9999991592429686, iteration: 60115
loss: 0.9792904853820801,grad_norm: 0.9999991141773575, iteration: 60116
loss: 1.012274146080017,grad_norm: 0.9256063481977842, iteration: 60117
loss: 1.0194401741027832,grad_norm: 0.9999990610470308, iteration: 60118
loss: 1.0231491327285767,grad_norm: 0.9999993632206657, iteration: 60119
loss: 0.9739140868186951,grad_norm: 0.9979571514331036, iteration: 60120
loss: 1.0311459302902222,grad_norm: 0.9999997177882302, iteration: 60121
loss: 0.988064169883728,grad_norm: 0.8623035870945926, iteration: 60122
loss: 0.9806852340698242,grad_norm: 0.8965086598387415, iteration: 60123
loss: 0.9631075263023376,grad_norm: 0.8466286860337551, iteration: 60124
loss: 1.013878345489502,grad_norm: 0.9549326407974761, iteration: 60125
loss: 0.9875679612159729,grad_norm: 0.9999991257485433, iteration: 60126
loss: 0.9825208187103271,grad_norm: 0.8218005730007133, iteration: 60127
loss: 1.0274335145950317,grad_norm: 0.9999990934765088, iteration: 60128
loss: 1.029094934463501,grad_norm: 0.9888689847619275, iteration: 60129
loss: 1.0652164220809937,grad_norm: 0.9999995274928672, iteration: 60130
loss: 0.9878358244895935,grad_norm: 0.8957931052546916, iteration: 60131
loss: 0.9996531009674072,grad_norm: 0.9999992696112949, iteration: 60132
loss: 1.0094366073608398,grad_norm: 0.7354724562195136, iteration: 60133
loss: 1.046013593673706,grad_norm: 0.9197459074999046, iteration: 60134
loss: 1.0162017345428467,grad_norm: 0.9999991216713556, iteration: 60135
loss: 0.9882129430770874,grad_norm: 0.9631910723558051, iteration: 60136
loss: 1.011154055595398,grad_norm: 0.8973076104462014, iteration: 60137
loss: 0.9651613235473633,grad_norm: 0.8541964074631998, iteration: 60138
loss: 1.0013786554336548,grad_norm: 0.9999992454625641, iteration: 60139
loss: 1.039379358291626,grad_norm: 0.9433384642204855, iteration: 60140
loss: 1.0308480262756348,grad_norm: 0.9580702107339478, iteration: 60141
loss: 1.009994626045227,grad_norm: 0.9999993838107528, iteration: 60142
loss: 1.0004509687423706,grad_norm: 0.9999990766702065, iteration: 60143
loss: 0.9733638763427734,grad_norm: 0.9303169282537838, iteration: 60144
loss: 1.0279260873794556,grad_norm: 0.9999994001761546, iteration: 60145
loss: 0.9819344878196716,grad_norm: 0.9999998810977864, iteration: 60146
loss: 0.9637154340744019,grad_norm: 0.9600976913995173, iteration: 60147
loss: 0.9716686606407166,grad_norm: 0.9281486511596141, iteration: 60148
loss: 0.9833701848983765,grad_norm: 0.8221870642669338, iteration: 60149
loss: 0.9798595309257507,grad_norm: 0.999999141284579, iteration: 60150
loss: 0.956007182598114,grad_norm: 0.8238602344004197, iteration: 60151
loss: 1.0903875827789307,grad_norm: 0.9999993258975642, iteration: 60152
loss: 1.0190706253051758,grad_norm: 0.8039470228428166, iteration: 60153
loss: 1.0016580820083618,grad_norm: 0.9999992025439436, iteration: 60154
loss: 0.9974192976951599,grad_norm: 0.8477851094517154, iteration: 60155
loss: 1.019171118736267,grad_norm: 0.9999991281736286, iteration: 60156
loss: 0.9891862869262695,grad_norm: 0.8904022755464576, iteration: 60157
loss: 0.9992414712905884,grad_norm: 0.9194208677107267, iteration: 60158
loss: 0.985180675983429,grad_norm: 0.9409072676984634, iteration: 60159
loss: 0.9823274612426758,grad_norm: 0.8621322284893169, iteration: 60160
loss: 1.0233889818191528,grad_norm: 0.9999990917271007, iteration: 60161
loss: 1.035070776939392,grad_norm: 0.9364867421494136, iteration: 60162
loss: 1.0430257320404053,grad_norm: 0.9999997818407396, iteration: 60163
loss: 0.985203206539154,grad_norm: 0.828033470752168, iteration: 60164
loss: 1.0170769691467285,grad_norm: 0.9998952983117327, iteration: 60165
loss: 1.0113433599472046,grad_norm: 0.9233205699079552, iteration: 60166
loss: 0.9856025576591492,grad_norm: 0.9516193363159844, iteration: 60167
loss: 1.0074855089187622,grad_norm: 0.9400167747264834, iteration: 60168
loss: 1.0114096403121948,grad_norm: 0.9526541305515421, iteration: 60169
loss: 1.006388783454895,grad_norm: 0.9999993626035754, iteration: 60170
loss: 0.965898334980011,grad_norm: 0.8821848022502434, iteration: 60171
loss: 0.9977712631225586,grad_norm: 0.8202035571555646, iteration: 60172
loss: 0.9882955551147461,grad_norm: 0.9782807903859493, iteration: 60173
loss: 0.9989759922027588,grad_norm: 0.7943336813251975, iteration: 60174
loss: 1.0279195308685303,grad_norm: 0.8669440903362289, iteration: 60175
loss: 1.0141698122024536,grad_norm: 0.9739246477854326, iteration: 60176
loss: 0.9942613244056702,grad_norm: 0.999999674726333, iteration: 60177
loss: 0.987356424331665,grad_norm: 0.925570532970904, iteration: 60178
loss: 1.0113645792007446,grad_norm: 0.970508387274129, iteration: 60179
loss: 1.0335086584091187,grad_norm: 0.9721397006303963, iteration: 60180
loss: 1.0784026384353638,grad_norm: 0.99999976950176, iteration: 60181
loss: 1.0218710899353027,grad_norm: 0.8417640909560165, iteration: 60182
loss: 1.0099955797195435,grad_norm: 0.9999991484678122, iteration: 60183
loss: 0.9612783789634705,grad_norm: 0.9767723921146875, iteration: 60184
loss: 1.0228615999221802,grad_norm: 0.8555424096428125, iteration: 60185
loss: 1.005609393119812,grad_norm: 0.9407347919091539, iteration: 60186
loss: 0.9894019365310669,grad_norm: 0.872589246498426, iteration: 60187
loss: 0.9714547395706177,grad_norm: 0.9440390833484629, iteration: 60188
loss: 0.9755889773368835,grad_norm: 0.9999990234768912, iteration: 60189
loss: 1.0023268461227417,grad_norm: 0.9999998202515065, iteration: 60190
loss: 0.9929616451263428,grad_norm: 0.8993813742280216, iteration: 60191
loss: 0.9689617156982422,grad_norm: 0.9045074151641604, iteration: 60192
loss: 1.0119110345840454,grad_norm: 0.9316155278287951, iteration: 60193
loss: 1.0305733680725098,grad_norm: 0.9999991490722915, iteration: 60194
loss: 0.9901463985443115,grad_norm: 0.9574094148462284, iteration: 60195
loss: 1.009299635887146,grad_norm: 0.9807898116570619, iteration: 60196
loss: 1.0673925876617432,grad_norm: 0.9999995415357176, iteration: 60197
loss: 1.0447707176208496,grad_norm: 0.8346190186666979, iteration: 60198
loss: 1.058424949645996,grad_norm: 0.9999998636306919, iteration: 60199
loss: 0.9645155072212219,grad_norm: 0.83799781971289, iteration: 60200
loss: 0.9974915981292725,grad_norm: 0.7661329744779753, iteration: 60201
loss: 1.018146276473999,grad_norm: 0.787000382812559, iteration: 60202
loss: 0.9761285781860352,grad_norm: 0.8740557721359173, iteration: 60203
loss: 1.0383044481277466,grad_norm: 0.9192454965609868, iteration: 60204
loss: 1.041155457496643,grad_norm: 0.9999990629238118, iteration: 60205
loss: 0.9964431524276733,grad_norm: 0.9999991023151702, iteration: 60206
loss: 1.0476316213607788,grad_norm: 0.9999991579088314, iteration: 60207
loss: 1.0362474918365479,grad_norm: 0.9999991331678796, iteration: 60208
loss: 0.9632040858268738,grad_norm: 0.9648225170649891, iteration: 60209
loss: 0.9574486017227173,grad_norm: 0.8735162703954493, iteration: 60210
loss: 1.0188654661178589,grad_norm: 0.9554623951394127, iteration: 60211
loss: 0.9805017709732056,grad_norm: 0.9999992028560494, iteration: 60212
loss: 1.0025240182876587,grad_norm: 0.8798909723204326, iteration: 60213
loss: 1.0448075532913208,grad_norm: 0.9999989215678108, iteration: 60214
loss: 0.9752917885780334,grad_norm: 0.9710728755936708, iteration: 60215
loss: 1.0080453157424927,grad_norm: 0.9837062786723735, iteration: 60216
loss: 1.0003125667572021,grad_norm: 0.9999991935999937, iteration: 60217
loss: 1.0305964946746826,grad_norm: 0.9999991589852218, iteration: 60218
loss: 1.0027681589126587,grad_norm: 0.7669192647231703, iteration: 60219
loss: 0.9642320871353149,grad_norm: 0.7985191308657336, iteration: 60220
loss: 1.007323145866394,grad_norm: 0.9476600799497208, iteration: 60221
loss: 1.070087194442749,grad_norm: 0.9999996499528215, iteration: 60222
loss: 1.0262742042541504,grad_norm: 0.9024375422425323, iteration: 60223
loss: 1.1085816621780396,grad_norm: 0.9999998060791643, iteration: 60224
loss: 1.0087411403656006,grad_norm: 0.7861652106219779, iteration: 60225
loss: 0.9525141716003418,grad_norm: 0.9947134181569315, iteration: 60226
loss: 0.9989951848983765,grad_norm: 0.9257895233548359, iteration: 60227
loss: 1.0062801837921143,grad_norm: 0.7724442693313219, iteration: 60228
loss: 1.0042197704315186,grad_norm: 0.9544120149386683, iteration: 60229
loss: 1.008436918258667,grad_norm: 0.9030798536219627, iteration: 60230
loss: 1.0089772939682007,grad_norm: 0.997296834697667, iteration: 60231
loss: 0.9988476037979126,grad_norm: 0.9029416599098102, iteration: 60232
loss: 0.9835613965988159,grad_norm: 0.9999989960432208, iteration: 60233
loss: 0.9913442730903625,grad_norm: 0.8638478911850076, iteration: 60234
loss: 1.0165044069290161,grad_norm: 0.999999457264717, iteration: 60235
loss: 0.9596771597862244,grad_norm: 0.9999991027527593, iteration: 60236
loss: 0.9826170206069946,grad_norm: 0.8571637444417679, iteration: 60237
loss: 1.0229098796844482,grad_norm: 0.9374617103609273, iteration: 60238
loss: 1.0042589902877808,grad_norm: 0.999999063564337, iteration: 60239
loss: 0.9802605509757996,grad_norm: 0.9999992095516306, iteration: 60240
loss: 0.9821015000343323,grad_norm: 0.9999990258009926, iteration: 60241
loss: 0.9746537804603577,grad_norm: 0.8719077137497535, iteration: 60242
loss: 1.0367746353149414,grad_norm: 0.9449348554504684, iteration: 60243
loss: 0.9846382141113281,grad_norm: 0.9514127888964089, iteration: 60244
loss: 1.0157334804534912,grad_norm: 0.9491412446670101, iteration: 60245
loss: 1.0062979459762573,grad_norm: 0.9999991176085714, iteration: 60246
loss: 1.0327666997909546,grad_norm: 0.8714885209845029, iteration: 60247
loss: 1.0288628339767456,grad_norm: 0.9764622919908823, iteration: 60248
loss: 0.9836652278900146,grad_norm: 0.9999991279983579, iteration: 60249
loss: 1.0565603971481323,grad_norm: 0.9999998124114235, iteration: 60250
loss: 0.997722864151001,grad_norm: 0.9999995559049052, iteration: 60251
loss: 1.0111790895462036,grad_norm: 0.8317060673361915, iteration: 60252
loss: 0.9974520802497864,grad_norm: 0.9999991211448165, iteration: 60253
loss: 0.983251690864563,grad_norm: 0.8205398933140358, iteration: 60254
loss: 0.9910112023353577,grad_norm: 0.9999991885790356, iteration: 60255
loss: 1.029003620147705,grad_norm: 0.9999990730026433, iteration: 60256
loss: 0.996664822101593,grad_norm: 0.9999991133924245, iteration: 60257
loss: 0.9905381202697754,grad_norm: 0.8342785001045149, iteration: 60258
loss: 1.038676381111145,grad_norm: 0.922988282686154, iteration: 60259
loss: 1.0013507604599,grad_norm: 0.8843980817312908, iteration: 60260
loss: 1.0057804584503174,grad_norm: 0.9999992423003163, iteration: 60261
loss: 1.0279159545898438,grad_norm: 0.9999996585401926, iteration: 60262
loss: 1.0027916431427002,grad_norm: 0.9864659609124147, iteration: 60263
loss: 1.0285242795944214,grad_norm: 0.9184822587487622, iteration: 60264
loss: 0.9855096936225891,grad_norm: 0.8753576876062759, iteration: 60265
loss: 1.0006252527236938,grad_norm: 0.9999990429000553, iteration: 60266
loss: 1.0118893384933472,grad_norm: 0.8885596207016804, iteration: 60267
loss: 0.9927956461906433,grad_norm: 0.974168896731805, iteration: 60268
loss: 0.9702708721160889,grad_norm: 0.895018336053889, iteration: 60269
loss: 1.0028754472732544,grad_norm: 0.9999994575355625, iteration: 60270
loss: 0.9976187944412231,grad_norm: 0.9588101676492691, iteration: 60271
loss: 1.014225721359253,grad_norm: 0.9268086150473365, iteration: 60272
loss: 0.9850423336029053,grad_norm: 0.9771058846856111, iteration: 60273
loss: 0.9640725255012512,grad_norm: 0.9999989859811064, iteration: 60274
loss: 0.9960091710090637,grad_norm: 0.999999832438701, iteration: 60275
loss: 1.0238817930221558,grad_norm: 0.9972947567954178, iteration: 60276
loss: 1.017552137374878,grad_norm: 0.957758185024928, iteration: 60277
loss: 1.051186203956604,grad_norm: 0.9999992787060777, iteration: 60278
loss: 0.9571462869644165,grad_norm: 0.8227057845463157, iteration: 60279
loss: 1.0264252424240112,grad_norm: 0.9999991701977377, iteration: 60280
loss: 1.0182857513427734,grad_norm: 0.7976128364890596, iteration: 60281
loss: 1.0415902137756348,grad_norm: 0.783000250955741, iteration: 60282
loss: 1.0251755714416504,grad_norm: 0.999999132411984, iteration: 60283
loss: 0.9893215298652649,grad_norm: 0.9999991765285227, iteration: 60284
loss: 1.037419080734253,grad_norm: 0.9999990184342598, iteration: 60285
loss: 0.9696950316429138,grad_norm: 0.9999992537282086, iteration: 60286
loss: 1.0641847848892212,grad_norm: 0.9999991108243304, iteration: 60287
loss: 1.0257055759429932,grad_norm: 0.9999991461997736, iteration: 60288
loss: 0.9786572456359863,grad_norm: 0.9999993778963101, iteration: 60289
loss: 1.0051472187042236,grad_norm: 0.9999992973641919, iteration: 60290
loss: 0.9802199602127075,grad_norm: 0.8541736312045891, iteration: 60291
loss: 1.0038392543792725,grad_norm: 0.7465332366347244, iteration: 60292
loss: 1.000008225440979,grad_norm: 0.8660589547677613, iteration: 60293
loss: 1.0035151243209839,grad_norm: 0.9999991883314634, iteration: 60294
loss: 1.0437155961990356,grad_norm: 0.9999999054440624, iteration: 60295
loss: 1.0007126331329346,grad_norm: 0.9999990041643178, iteration: 60296
loss: 1.0069496631622314,grad_norm: 0.9687936124634743, iteration: 60297
loss: 1.0057990550994873,grad_norm: 0.9999989874573654, iteration: 60298
loss: 1.104699730873108,grad_norm: 0.9999991017935786, iteration: 60299
loss: 0.9616581201553345,grad_norm: 0.9694105593290139, iteration: 60300
loss: 1.005421757698059,grad_norm: 0.9999996410586108, iteration: 60301
loss: 0.9836583137512207,grad_norm: 0.9255770794277307, iteration: 60302
loss: 0.9942275285720825,grad_norm: 0.9783020462785406, iteration: 60303
loss: 0.986524224281311,grad_norm: 0.8706766064995974, iteration: 60304
loss: 1.0175762176513672,grad_norm: 0.8901826657978817, iteration: 60305
loss: 1.010140299797058,grad_norm: 0.871785218448444, iteration: 60306
loss: 1.016563057899475,grad_norm: 0.9711892146265968, iteration: 60307
loss: 0.9996561408042908,grad_norm: 0.8876135842944123, iteration: 60308
loss: 0.9673067331314087,grad_norm: 0.9999991003609796, iteration: 60309
loss: 0.9952368140220642,grad_norm: 0.9999994762007768, iteration: 60310
loss: 1.0253247022628784,grad_norm: 0.8782164316793261, iteration: 60311
loss: 0.9897975325584412,grad_norm: 0.9999992653258937, iteration: 60312
loss: 1.004144549369812,grad_norm: 0.999999091334974, iteration: 60313
loss: 0.9826202392578125,grad_norm: 0.8684364331550427, iteration: 60314
loss: 1.0126816034317017,grad_norm: 0.8972377431918561, iteration: 60315
loss: 1.0534448623657227,grad_norm: 0.9154939626692847, iteration: 60316
loss: 1.0151801109313965,grad_norm: 0.9999992221186259, iteration: 60317
loss: 1.0068718194961548,grad_norm: 0.9999990824064012, iteration: 60318
loss: 0.9911750555038452,grad_norm: 0.9999994721262668, iteration: 60319
loss: 0.9897736310958862,grad_norm: 0.8009465333279596, iteration: 60320
loss: 0.9641323685646057,grad_norm: 0.9144389811961573, iteration: 60321
loss: 1.0039079189300537,grad_norm: 0.9202698201564372, iteration: 60322
loss: 0.9877117872238159,grad_norm: 0.9999991293670549, iteration: 60323
loss: 0.9683805108070374,grad_norm: 0.8038816478887594, iteration: 60324
loss: 1.0003793239593506,grad_norm: 0.9999990464818689, iteration: 60325
loss: 1.1074265241622925,grad_norm: 0.9999994473835914, iteration: 60326
loss: 1.020218014717102,grad_norm: 0.9999991830452539, iteration: 60327
loss: 1.0392571687698364,grad_norm: 0.9999998514369959, iteration: 60328
loss: 1.0045950412750244,grad_norm: 0.9895972017254347, iteration: 60329
loss: 0.9638598561286926,grad_norm: 0.8995663900960726, iteration: 60330
loss: 0.9975438117980957,grad_norm: 0.9999993009988228, iteration: 60331
loss: 1.028977394104004,grad_norm: 0.852350371451713, iteration: 60332
loss: 1.0047638416290283,grad_norm: 0.9010463614275942, iteration: 60333
loss: 1.0485916137695312,grad_norm: 0.8741851907253967, iteration: 60334
loss: 0.989703893661499,grad_norm: 0.9999998814008372, iteration: 60335
loss: 1.003274917602539,grad_norm: 0.9999991924074229, iteration: 60336
loss: 0.9935852885246277,grad_norm: 0.8715851066446524, iteration: 60337
loss: 0.9893269538879395,grad_norm: 0.9999998357578944, iteration: 60338
loss: 1.0614982843399048,grad_norm: 0.9219162084500049, iteration: 60339
loss: 1.015403151512146,grad_norm: 0.9399858351491727, iteration: 60340
loss: 1.010225772857666,grad_norm: 0.953860898273759, iteration: 60341
loss: 1.0206819772720337,grad_norm: 0.9929949641905891, iteration: 60342
loss: 1.0264180898666382,grad_norm: 0.922155805686936, iteration: 60343
loss: 0.9877273440361023,grad_norm: 0.9816273819534223, iteration: 60344
loss: 0.984910249710083,grad_norm: 0.9428754066045196, iteration: 60345
loss: 1.0174193382263184,grad_norm: 0.9999996366015979, iteration: 60346
loss: 0.9611998200416565,grad_norm: 0.9084054979938361, iteration: 60347
loss: 1.0138345956802368,grad_norm: 0.9376706256662579, iteration: 60348
loss: 0.9721226096153259,grad_norm: 0.9999990798629614, iteration: 60349
loss: 1.0301419496536255,grad_norm: 0.8480815966737695, iteration: 60350
loss: 0.9999326467514038,grad_norm: 0.8706511791457078, iteration: 60351
loss: 1.0283153057098389,grad_norm: 0.9999993772265675, iteration: 60352
loss: 1.0242748260498047,grad_norm: 0.9999993305402612, iteration: 60353
loss: 1.042574405670166,grad_norm: 0.9999988995462679, iteration: 60354
loss: 1.0441068410873413,grad_norm: 0.9999999569853852, iteration: 60355
loss: 1.0046147108078003,grad_norm: 0.8310771899035027, iteration: 60356
loss: 1.0029685497283936,grad_norm: 0.8807076588377946, iteration: 60357
loss: 1.059383511543274,grad_norm: 0.9999994841585216, iteration: 60358
loss: 1.0487141609191895,grad_norm: 0.9999992109877862, iteration: 60359
loss: 1.025678277015686,grad_norm: 0.874409100412493, iteration: 60360
loss: 1.0047334432601929,grad_norm: 0.999137531580911, iteration: 60361
loss: 1.0131772756576538,grad_norm: 0.9990368645914, iteration: 60362
loss: 0.9989776015281677,grad_norm: 0.9115116768727057, iteration: 60363
loss: 1.025429606437683,grad_norm: 0.9906662077718252, iteration: 60364
loss: 1.029379963874817,grad_norm: 0.9999992075809606, iteration: 60365
loss: 1.08216392993927,grad_norm: 0.9999996978442235, iteration: 60366
loss: 1.0210644006729126,grad_norm: 0.999999105791366, iteration: 60367
loss: 1.0106390714645386,grad_norm: 0.811514580438561, iteration: 60368
loss: 1.0376157760620117,grad_norm: 0.9999991669514616, iteration: 60369
loss: 1.008637547492981,grad_norm: 0.9999993463189945, iteration: 60370
loss: 1.0168434381484985,grad_norm: 0.9400180223806638, iteration: 60371
loss: 0.9915527105331421,grad_norm: 0.8288297522226542, iteration: 60372
loss: 0.9988109469413757,grad_norm: 0.9301197866776267, iteration: 60373
loss: 0.9806085824966431,grad_norm: 0.9840346857120492, iteration: 60374
loss: 1.0204746723175049,grad_norm: 0.9999990343478085, iteration: 60375
loss: 1.0548601150512695,grad_norm: 0.9999998544996765, iteration: 60376
loss: 1.0131455659866333,grad_norm: 0.9190379908707541, iteration: 60377
loss: 1.0355761051177979,grad_norm: 0.9728810277769065, iteration: 60378
loss: 0.9862300157546997,grad_norm: 0.9999993290794303, iteration: 60379
loss: 1.008579134941101,grad_norm: 0.9999994751908121, iteration: 60380
loss: 1.0110924243927002,grad_norm: 0.8088339932863459, iteration: 60381
loss: 1.0009902715682983,grad_norm: 0.8971076789607331, iteration: 60382
loss: 1.0012725591659546,grad_norm: 0.8890469507854659, iteration: 60383
loss: 1.027669072151184,grad_norm: 0.8729695274932245, iteration: 60384
loss: 1.0275648832321167,grad_norm: 0.9999991625624883, iteration: 60385
loss: 1.0269367694854736,grad_norm: 0.8354838463883368, iteration: 60386
loss: 1.0086145401000977,grad_norm: 0.9999994964870296, iteration: 60387
loss: 0.981803297996521,grad_norm: 0.9706452143185562, iteration: 60388
loss: 0.9725825786590576,grad_norm: 0.7867318614088047, iteration: 60389
loss: 0.9849523901939392,grad_norm: 0.9999991371990004, iteration: 60390
loss: 1.0325322151184082,grad_norm: 0.830248098099368, iteration: 60391
loss: 0.9989129304885864,grad_norm: 0.9358925591814655, iteration: 60392
loss: 0.9858285784721375,grad_norm: 0.9999991371244712, iteration: 60393
loss: 1.0356569290161133,grad_norm: 0.9999996581851522, iteration: 60394
loss: 1.024807333946228,grad_norm: 0.9999990547252603, iteration: 60395
loss: 1.031317114830017,grad_norm: 0.999999902343101, iteration: 60396
loss: 0.9992254972457886,grad_norm: 0.839566838159696, iteration: 60397
loss: 1.0129677057266235,grad_norm: 0.8562592849905509, iteration: 60398
loss: 1.0393904447555542,grad_norm: 0.9999993014697212, iteration: 60399
loss: 1.01340651512146,grad_norm: 0.9999990330537101, iteration: 60400
loss: 0.9687583446502686,grad_norm: 0.9999992555911431, iteration: 60401
loss: 1.0218398571014404,grad_norm: 0.8637680154896119, iteration: 60402
loss: 0.9796069264411926,grad_norm: 0.9201997430570267, iteration: 60403
loss: 1.0040847063064575,grad_norm: 0.9999991480625814, iteration: 60404
loss: 0.9656910300254822,grad_norm: 0.9999990906401114, iteration: 60405
loss: 0.9972630739212036,grad_norm: 0.8124608894209009, iteration: 60406
loss: 0.9976687431335449,grad_norm: 0.9999990990909313, iteration: 60407
loss: 1.0220891237258911,grad_norm: 0.9507495814331056, iteration: 60408
loss: 1.0127893686294556,grad_norm: 0.9072628982622813, iteration: 60409
loss: 1.0066642761230469,grad_norm: 0.9999993206247602, iteration: 60410
loss: 0.9708916544914246,grad_norm: 0.9209525409451351, iteration: 60411
loss: 1.0099090337753296,grad_norm: 0.999999100086267, iteration: 60412
loss: 1.0087966918945312,grad_norm: 0.8598444435018757, iteration: 60413
loss: 1.009963870048523,grad_norm: 0.9999991364949922, iteration: 60414
loss: 1.0016016960144043,grad_norm: 0.9320706713508634, iteration: 60415
loss: 1.0085721015930176,grad_norm: 0.9999991204021221, iteration: 60416
loss: 0.9740519523620605,grad_norm: 0.8860852255375609, iteration: 60417
loss: 0.9661381244659424,grad_norm: 0.8775049752010837, iteration: 60418
loss: 1.0237412452697754,grad_norm: 0.9999997655170743, iteration: 60419
loss: 1.0006433725357056,grad_norm: 0.9999991066915388, iteration: 60420
loss: 0.9934213757514954,grad_norm: 0.8569938802123871, iteration: 60421
loss: 0.9949317574501038,grad_norm: 0.8819239464605181, iteration: 60422
loss: 0.9997584819793701,grad_norm: 0.9999991943705471, iteration: 60423
loss: 1.0203739404678345,grad_norm: 0.8612561461339389, iteration: 60424
loss: 0.995230495929718,grad_norm: 0.8434060813436806, iteration: 60425
loss: 1.0166677236557007,grad_norm: 0.7186633190068276, iteration: 60426
loss: 1.0098856687545776,grad_norm: 0.999999622289562, iteration: 60427
loss: 1.0388497114181519,grad_norm: 0.9999992405279184, iteration: 60428
loss: 1.0113688707351685,grad_norm: 0.9999988823598481, iteration: 60429
loss: 1.023046851158142,grad_norm: 0.8349227202002002, iteration: 60430
loss: 1.0027236938476562,grad_norm: 0.8922974467098669, iteration: 60431
loss: 0.9979528188705444,grad_norm: 0.8779996763058042, iteration: 60432
loss: 0.9986405968666077,grad_norm: 0.9254647499039661, iteration: 60433
loss: 1.0035393238067627,grad_norm: 0.9364533670454709, iteration: 60434
loss: 0.9965488910675049,grad_norm: 0.8289545970722885, iteration: 60435
loss: 1.0116289854049683,grad_norm: 0.9659439402775014, iteration: 60436
loss: 0.9992116093635559,grad_norm: 0.9140375617992763, iteration: 60437
loss: 1.0004920959472656,grad_norm: 0.9806362507831315, iteration: 60438
loss: 1.0061407089233398,grad_norm: 0.9932914016830526, iteration: 60439
loss: 0.9962435364723206,grad_norm: 0.9999991649662207, iteration: 60440
loss: 1.08909273147583,grad_norm: 0.9915041506255492, iteration: 60441
loss: 1.0081866979599,grad_norm: 0.9999993336382551, iteration: 60442
loss: 1.0080054998397827,grad_norm: 0.99999893840051, iteration: 60443
loss: 0.970545768737793,grad_norm: 0.9961767106136146, iteration: 60444
loss: 0.9620243310928345,grad_norm: 0.999999217294096, iteration: 60445
loss: 1.0515930652618408,grad_norm: 0.9999996757567214, iteration: 60446
loss: 0.9768073558807373,grad_norm: 0.9999995682301712, iteration: 60447
loss: 0.9801014065742493,grad_norm: 0.9501199910963686, iteration: 60448
loss: 0.9717092514038086,grad_norm: 0.9519716543296252, iteration: 60449
loss: 0.9434458017349243,grad_norm: 0.7699593291705169, iteration: 60450
loss: 0.993451714515686,grad_norm: 0.8451565807602965, iteration: 60451
loss: 0.995902955532074,grad_norm: 0.8924378503988586, iteration: 60452
loss: 1.0008188486099243,grad_norm: 0.9064818761587614, iteration: 60453
loss: 1.0142998695373535,grad_norm: 0.9999996848247203, iteration: 60454
loss: 0.9974928498268127,grad_norm: 0.9999989642500935, iteration: 60455
loss: 0.9875142574310303,grad_norm: 0.9999996840180426, iteration: 60456
loss: 0.9896267056465149,grad_norm: 0.9167645913543131, iteration: 60457
loss: 1.0221501588821411,grad_norm: 0.9119853160843113, iteration: 60458
loss: 1.029982566833496,grad_norm: 0.8539333330783013, iteration: 60459
loss: 0.9907761216163635,grad_norm: 0.9999992020529829, iteration: 60460
loss: 0.9887182116508484,grad_norm: 0.9428354830995971, iteration: 60461
loss: 1.025650978088379,grad_norm: 0.9999993389931898, iteration: 60462
loss: 0.9554315209388733,grad_norm: 0.9999990366450975, iteration: 60463
loss: 0.9984581470489502,grad_norm: 0.9551745478348244, iteration: 60464
loss: 1.026006817817688,grad_norm: 0.8079507920990564, iteration: 60465
loss: 1.021627426147461,grad_norm: 0.8658304707723841, iteration: 60466
loss: 1.0712809562683105,grad_norm: 0.9999995415384643, iteration: 60467
loss: 1.0202182531356812,grad_norm: 0.9400136046006319, iteration: 60468
loss: 0.9872282147407532,grad_norm: 0.9713273760229213, iteration: 60469
loss: 1.0269719362258911,grad_norm: 0.9999991116049959, iteration: 60470
loss: 0.9984428882598877,grad_norm: 0.9698845114533482, iteration: 60471
loss: 0.9987031817436218,grad_norm: 0.9621378449132524, iteration: 60472
loss: 1.0084439516067505,grad_norm: 0.8029575511364823, iteration: 60473
loss: 0.9925159215927124,grad_norm: 0.9930962360905791, iteration: 60474
loss: 1.0430784225463867,grad_norm: 0.8317662221661077, iteration: 60475
loss: 0.9940223097801208,grad_norm: 0.9924188405845044, iteration: 60476
loss: 1.0263113975524902,grad_norm: 0.9999997615561486, iteration: 60477
loss: 1.0037829875946045,grad_norm: 0.8715459432261866, iteration: 60478
loss: 0.9761025309562683,grad_norm: 0.8704163559116169, iteration: 60479
loss: 1.0074362754821777,grad_norm: 0.8947780754250481, iteration: 60480
loss: 1.0167754888534546,grad_norm: 0.9424801123552133, iteration: 60481
loss: 0.9933540225028992,grad_norm: 0.909626855355906, iteration: 60482
loss: 1.001552700996399,grad_norm: 0.9999991934751801, iteration: 60483
loss: 1.027361273765564,grad_norm: 0.8945965420545954, iteration: 60484
loss: 1.0081745386123657,grad_norm: 0.794574201560471, iteration: 60485
loss: 0.9918264150619507,grad_norm: 0.8520192635155603, iteration: 60486
loss: 0.9966153502464294,grad_norm: 0.9999990472929898, iteration: 60487
loss: 1.0377933979034424,grad_norm: 0.9136010364674046, iteration: 60488
loss: 1.0017777681350708,grad_norm: 0.8271059315287613, iteration: 60489
loss: 1.0260239839553833,grad_norm: 0.8706620540604626, iteration: 60490
loss: 0.9779344201087952,grad_norm: 0.9900633041179582, iteration: 60491
loss: 1.026246190071106,grad_norm: 0.9999998074503109, iteration: 60492
loss: 1.0300204753875732,grad_norm: 0.9999996876215888, iteration: 60493
loss: 0.994900107383728,grad_norm: 0.8036461823499734, iteration: 60494
loss: 0.9798914790153503,grad_norm: 0.9393226837855483, iteration: 60495
loss: 1.1445714235305786,grad_norm: 0.9999996719369763, iteration: 60496
loss: 1.0499780178070068,grad_norm: 0.9999992898352714, iteration: 60497
loss: 0.9935522675514221,grad_norm: 0.9843076196911292, iteration: 60498
loss: 0.9796909689903259,grad_norm: 0.9999991264296021, iteration: 60499
loss: 1.016672968864441,grad_norm: 0.9999992059494193, iteration: 60500
loss: 1.051798939704895,grad_norm: 0.9999998044175472, iteration: 60501
loss: 0.9975563883781433,grad_norm: 0.9999995183581342, iteration: 60502
loss: 1.0216060876846313,grad_norm: 0.7619547985157117, iteration: 60503
loss: 1.0047603845596313,grad_norm: 0.8268593233208431, iteration: 60504
loss: 1.0112559795379639,grad_norm: 0.9517548052196992, iteration: 60505
loss: 0.9383048415184021,grad_norm: 0.9999991600253831, iteration: 60506
loss: 1.0498415231704712,grad_norm: 0.9999999620137383, iteration: 60507
loss: 1.0033011436462402,grad_norm: 0.999999124440551, iteration: 60508
loss: 1.0085941553115845,grad_norm: 0.9751132783952858, iteration: 60509
loss: 1.0141990184783936,grad_norm: 0.96051495488374, iteration: 60510
loss: 0.9937540292739868,grad_norm: 0.7744972914146399, iteration: 60511
loss: 1.208403468132019,grad_norm: 0.9999995944305753, iteration: 60512
loss: 0.9849697351455688,grad_norm: 0.9612245673338782, iteration: 60513
loss: 1.0112725496292114,grad_norm: 0.8832004561180461, iteration: 60514
loss: 1.0463744401931763,grad_norm: 0.9999991008661802, iteration: 60515
loss: 0.9825549125671387,grad_norm: 0.8101751722435931, iteration: 60516
loss: 1.0978914499282837,grad_norm: 0.9999997350079323, iteration: 60517
loss: 0.9690641164779663,grad_norm: 0.9999990166652211, iteration: 60518
loss: 1.0883418321609497,grad_norm: 0.9999998735562157, iteration: 60519
loss: 1.0600584745407104,grad_norm: 0.9999994401214353, iteration: 60520
loss: 0.9670236110687256,grad_norm: 0.9999989635989248, iteration: 60521
loss: 0.9773056507110596,grad_norm: 0.9999990549638763, iteration: 60522
loss: 1.0131334066390991,grad_norm: 0.9999992708736201, iteration: 60523
loss: 0.9985191226005554,grad_norm: 0.9999989910097445, iteration: 60524
loss: 1.025524616241455,grad_norm: 0.9999997154009015, iteration: 60525
loss: 0.9525351524353027,grad_norm: 0.999999112486429, iteration: 60526
loss: 0.9744863510131836,grad_norm: 0.9854875929840246, iteration: 60527
loss: 1.029699683189392,grad_norm: 0.9999997484103483, iteration: 60528
loss: 1.032310128211975,grad_norm: 0.9999991567526896, iteration: 60529
loss: 1.0275988578796387,grad_norm: 0.9078953289627393, iteration: 60530
loss: 1.0123999118804932,grad_norm: 0.9999997159240048, iteration: 60531
loss: 1.026442050933838,grad_norm: 0.9999991923525364, iteration: 60532
loss: 1.0022799968719482,grad_norm: 0.8595795176095599, iteration: 60533
loss: 0.9923251867294312,grad_norm: 0.9999991346544796, iteration: 60534
loss: 0.9836752414703369,grad_norm: 0.9474872548227639, iteration: 60535
loss: 0.9839838743209839,grad_norm: 0.8299410868148313, iteration: 60536
loss: 1.002902626991272,grad_norm: 0.8545572882215093, iteration: 60537
loss: 1.0007644891738892,grad_norm: 0.9791826532512072, iteration: 60538
loss: 1.031784176826477,grad_norm: 0.9999995758828102, iteration: 60539
loss: 0.97714763879776,grad_norm: 0.8669475445831324, iteration: 60540
loss: 1.0122051239013672,grad_norm: 0.9999990965544435, iteration: 60541
loss: 1.0167180299758911,grad_norm: 0.9813964202973381, iteration: 60542
loss: 0.9739586710929871,grad_norm: 0.8245683877954556, iteration: 60543
loss: 0.9611797332763672,grad_norm: 0.9502451888867192, iteration: 60544
loss: 1.0084633827209473,grad_norm: 0.9999990529963412, iteration: 60545
loss: 0.987561821937561,grad_norm: 0.8519636254957174, iteration: 60546
loss: 0.9729253649711609,grad_norm: 0.9999991535989763, iteration: 60547
loss: 1.027687907218933,grad_norm: 0.8796555125551836, iteration: 60548
loss: 0.9850629568099976,grad_norm: 0.9999991353822942, iteration: 60549
loss: 0.9971688985824585,grad_norm: 0.9296368066649319, iteration: 60550
loss: 1.0060206651687622,grad_norm: 0.9999991644556652, iteration: 60551
loss: 1.0005598068237305,grad_norm: 0.83994761840351, iteration: 60552
loss: 1.0167707204818726,grad_norm: 0.9235778092628701, iteration: 60553
loss: 0.9892235994338989,grad_norm: 0.999999028464075, iteration: 60554
loss: 0.9953290224075317,grad_norm: 0.9999991810344419, iteration: 60555
loss: 1.0348607301712036,grad_norm: 0.8200801035116436, iteration: 60556
loss: 1.0547703504562378,grad_norm: 0.9999998521307945, iteration: 60557
loss: 0.995394229888916,grad_norm: 0.9312817930793088, iteration: 60558
loss: 1.0062670707702637,grad_norm: 0.9999991273407519, iteration: 60559
loss: 0.9821068644523621,grad_norm: 0.9522349755058578, iteration: 60560
loss: 1.016065001487732,grad_norm: 0.9362967575901493, iteration: 60561
loss: 1.0293091535568237,grad_norm: 0.9999991354752719, iteration: 60562
loss: 0.9848726987838745,grad_norm: 0.9999996252334973, iteration: 60563
loss: 0.9533818960189819,grad_norm: 0.93094128874118, iteration: 60564
loss: 1.0023062229156494,grad_norm: 0.9521959055988566, iteration: 60565
loss: 1.0068814754486084,grad_norm: 0.9999994461274389, iteration: 60566
loss: 0.9748216271400452,grad_norm: 0.8310266846823008, iteration: 60567
loss: 0.9984573125839233,grad_norm: 0.8072831810471869, iteration: 60568
loss: 0.9579910635948181,grad_norm: 0.9999996600478158, iteration: 60569
loss: 1.0273805856704712,grad_norm: 0.9999990991118917, iteration: 60570
loss: 1.0285038948059082,grad_norm: 0.8639423916516329, iteration: 60571
loss: 1.0360212326049805,grad_norm: 0.9999993575364172, iteration: 60572
loss: 0.9637718796730042,grad_norm: 0.8708745779908696, iteration: 60573
loss: 0.9828220009803772,grad_norm: 0.9999997615974151, iteration: 60574
loss: 0.9988421201705933,grad_norm: 0.9999992334766198, iteration: 60575
loss: 1.0310378074645996,grad_norm: 0.9708335584517547, iteration: 60576
loss: 0.9844773411750793,grad_norm: 0.8385599528960198, iteration: 60577
loss: 1.0358855724334717,grad_norm: 0.9999996115825194, iteration: 60578
loss: 1.0180463790893555,grad_norm: 0.9999992847866687, iteration: 60579
loss: 0.9991792440414429,grad_norm: 0.9391134519982992, iteration: 60580
loss: 0.9928947687149048,grad_norm: 0.9637967096260011, iteration: 60581
loss: 0.9833133220672607,grad_norm: 0.9424493880453949, iteration: 60582
loss: 1.0244536399841309,grad_norm: 0.9112918707091238, iteration: 60583
loss: 1.0569543838500977,grad_norm: 0.945056541038305, iteration: 60584
loss: 1.0218433141708374,grad_norm: 0.9999990893544431, iteration: 60585
loss: 1.001491904258728,grad_norm: 0.9012199076468236, iteration: 60586
loss: 0.9968162178993225,grad_norm: 0.9999998421805136, iteration: 60587
loss: 1.025925874710083,grad_norm: 0.9999994321820744, iteration: 60588
loss: 1.0467808246612549,grad_norm: 0.8725595017376581, iteration: 60589
loss: 1.0190049409866333,grad_norm: 0.7705211876546969, iteration: 60590
loss: 1.017295002937317,grad_norm: 0.9999991334715932, iteration: 60591
loss: 1.0324347019195557,grad_norm: 0.9999997590502868, iteration: 60592
loss: 1.0009082555770874,grad_norm: 0.9999995077032882, iteration: 60593
loss: 0.9805477857589722,grad_norm: 0.8534846580120393, iteration: 60594
loss: 1.002364993095398,grad_norm: 0.9409736623732013, iteration: 60595
loss: 0.9617472887039185,grad_norm: 0.8017087521999834, iteration: 60596
loss: 0.9837139844894409,grad_norm: 0.9821182926093306, iteration: 60597
loss: 1.015000820159912,grad_norm: 0.9999991459686183, iteration: 60598
loss: 1.0100345611572266,grad_norm: 0.9999991962949436, iteration: 60599
loss: 0.9911811351776123,grad_norm: 0.9300862285843913, iteration: 60600
loss: 1.0090398788452148,grad_norm: 0.9999991917113314, iteration: 60601
loss: 1.0016224384307861,grad_norm: 0.9999991157586423, iteration: 60602
loss: 1.05202054977417,grad_norm: 0.9999992891385079, iteration: 60603
loss: 1.0082470178604126,grad_norm: 0.9867504155361919, iteration: 60604
loss: 1.089784860610962,grad_norm: 0.9999992750514812, iteration: 60605
loss: 1.0001686811447144,grad_norm: 0.9999996829206482, iteration: 60606
loss: 1.012453317642212,grad_norm: 0.9999991310607782, iteration: 60607
loss: 1.0167001485824585,grad_norm: 0.9999998741956481, iteration: 60608
loss: 1.0315842628479004,grad_norm: 0.9038284117677463, iteration: 60609
loss: 0.9765600562095642,grad_norm: 0.9999994948046688, iteration: 60610
loss: 0.9833120107650757,grad_norm: 0.7819428976816472, iteration: 60611
loss: 0.9547047019004822,grad_norm: 0.9441211635711806, iteration: 60612
loss: 1.0266703367233276,grad_norm: 0.8367174562368153, iteration: 60613
loss: 0.9930041432380676,grad_norm: 0.9999991257980536, iteration: 60614
loss: 0.9832994937896729,grad_norm: 0.9999992287942383, iteration: 60615
loss: 1.0632070302963257,grad_norm: 0.9999996808874317, iteration: 60616
loss: 1.0305311679840088,grad_norm: 0.8377681432374371, iteration: 60617
loss: 1.0270954370498657,grad_norm: 0.7270691658174561, iteration: 60618
loss: 1.0428239107131958,grad_norm: 0.9117275382940939, iteration: 60619
loss: 1.0020802021026611,grad_norm: 0.9077474987109354, iteration: 60620
loss: 0.9809963703155518,grad_norm: 0.9817469387749912, iteration: 60621
loss: 1.000759243965149,grad_norm: 0.9999991197922258, iteration: 60622
loss: 1.048270583152771,grad_norm: 0.9999993688734297, iteration: 60623
loss: 1.0285189151763916,grad_norm: 0.8814134577594501, iteration: 60624
loss: 1.0089993476867676,grad_norm: 0.9999991344393996, iteration: 60625
loss: 1.0362623929977417,grad_norm: 0.9999994470849214, iteration: 60626
loss: 1.0107277631759644,grad_norm: 0.9999991182761743, iteration: 60627
loss: 0.9905532002449036,grad_norm: 0.8585519968133014, iteration: 60628
loss: 1.0488941669464111,grad_norm: 0.9168772810211434, iteration: 60629
loss: 1.0193443298339844,grad_norm: 0.887492529225158, iteration: 60630
loss: 0.9799823760986328,grad_norm: 0.893876339106003, iteration: 60631
loss: 1.0114130973815918,grad_norm: 0.947607735592461, iteration: 60632
loss: 0.9927616715431213,grad_norm: 0.9209557857682427, iteration: 60633
loss: 1.034401535987854,grad_norm: 0.9999991170762212, iteration: 60634
loss: 0.9836890697479248,grad_norm: 0.8992215464132728, iteration: 60635
loss: 1.0275455713272095,grad_norm: 0.999999360278804, iteration: 60636
loss: 1.0081664323806763,grad_norm: 0.8500306497503688, iteration: 60637
loss: 1.0526126623153687,grad_norm: 0.9999990978660367, iteration: 60638
loss: 1.0419830083847046,grad_norm: 0.8743355991430922, iteration: 60639
loss: 1.1565895080566406,grad_norm: 0.9999996476069707, iteration: 60640
loss: 1.0007057189941406,grad_norm: 0.9589666256750968, iteration: 60641
loss: 1.0046502351760864,grad_norm: 0.8331939057040596, iteration: 60642
loss: 1.0095263719558716,grad_norm: 0.9726143352829277, iteration: 60643
loss: 0.9935962557792664,grad_norm: 0.8828544216507945, iteration: 60644
loss: 1.0191044807434082,grad_norm: 0.9999992007291557, iteration: 60645
loss: 0.9982406497001648,grad_norm: 0.9999992885369693, iteration: 60646
loss: 1.021827220916748,grad_norm: 0.9999993392666587, iteration: 60647
loss: 1.0066215991973877,grad_norm: 0.9999991127485021, iteration: 60648
loss: 1.0076473951339722,grad_norm: 0.9999997064002146, iteration: 60649
loss: 0.9942827224731445,grad_norm: 0.9978370686688044, iteration: 60650
loss: 1.0067131519317627,grad_norm: 0.9999993107743903, iteration: 60651
loss: 1.0352696180343628,grad_norm: 0.9999990672720164, iteration: 60652
loss: 0.9883201718330383,grad_norm: 0.9167820401221957, iteration: 60653
loss: 0.9661167860031128,grad_norm: 0.8812462221737993, iteration: 60654
loss: 0.9597002267837524,grad_norm: 0.9999990312468473, iteration: 60655
loss: 1.0063880681991577,grad_norm: 0.9999992794517903, iteration: 60656
loss: 1.0071046352386475,grad_norm: 0.9999992271147071, iteration: 60657
loss: 0.9747403860092163,grad_norm: 0.9999990666355039, iteration: 60658
loss: 0.9848583936691284,grad_norm: 0.9742250813733726, iteration: 60659
loss: 1.037114143371582,grad_norm: 0.9999994096881857, iteration: 60660
loss: 1.0108102560043335,grad_norm: 0.9068939535491066, iteration: 60661
loss: 1.0180681943893433,grad_norm: 0.9999993749703423, iteration: 60662
loss: 1.009182333946228,grad_norm: 0.8769852652787508, iteration: 60663
loss: 0.9501851797103882,grad_norm: 0.8701280589182565, iteration: 60664
loss: 1.048689603805542,grad_norm: 0.999999137347675, iteration: 60665
loss: 0.9740725159645081,grad_norm: 0.8245195742076417, iteration: 60666
loss: 0.9993323683738708,grad_norm: 0.9999990344714637, iteration: 60667
loss: 0.977105975151062,grad_norm: 0.8986368857063387, iteration: 60668
loss: 1.0229461193084717,grad_norm: 0.9999991517884834, iteration: 60669
loss: 1.0139248371124268,grad_norm: 0.9999993689337143, iteration: 60670
loss: 1.0066285133361816,grad_norm: 0.9999991493870681, iteration: 60671
loss: 1.003092646598816,grad_norm: 0.8569488194763525, iteration: 60672
loss: 1.0317635536193848,grad_norm: 0.9576617190858044, iteration: 60673
loss: 0.9913917183876038,grad_norm: 0.9623984899273204, iteration: 60674
loss: 0.9884857535362244,grad_norm: 0.8950478939027375, iteration: 60675
loss: 1.0010998249053955,grad_norm: 0.9999990143340698, iteration: 60676
loss: 1.0270469188690186,grad_norm: 0.8067827831964409, iteration: 60677
loss: 1.0104109048843384,grad_norm: 0.9999989454152896, iteration: 60678
loss: 0.9887599349021912,grad_norm: 0.9999991784163007, iteration: 60679
loss: 1.0124945640563965,grad_norm: 0.9999990833318474, iteration: 60680
loss: 0.9614580869674683,grad_norm: 0.9999991091201347, iteration: 60681
loss: 1.044264316558838,grad_norm: 0.9999991759032488, iteration: 60682
loss: 1.0206294059753418,grad_norm: 0.9999991269968296, iteration: 60683
loss: 0.9594023823738098,grad_norm: 0.9999991178042092, iteration: 60684
loss: 1.0849220752716064,grad_norm: 0.9999993089630597, iteration: 60685
loss: 0.9799128770828247,grad_norm: 0.8857128772541544, iteration: 60686
loss: 0.9551540613174438,grad_norm: 0.9999991077006306, iteration: 60687
loss: 0.9806102514266968,grad_norm: 0.8554685536973627, iteration: 60688
loss: 1.00297212600708,grad_norm: 0.9089911598374296, iteration: 60689
loss: 0.9885591268539429,grad_norm: 0.8417364635241118, iteration: 60690
loss: 0.9931474328041077,grad_norm: 0.9888193987252563, iteration: 60691
loss: 1.0091798305511475,grad_norm: 0.7797768197205831, iteration: 60692
loss: 1.1037136316299438,grad_norm: 0.9999990625895504, iteration: 60693
loss: 1.0122191905975342,grad_norm: 0.9400914646443853, iteration: 60694
loss: 1.031468391418457,grad_norm: 0.983104616102454, iteration: 60695
loss: 1.0152583122253418,grad_norm: 0.9204179303650675, iteration: 60696
loss: 1.0229992866516113,grad_norm: 0.9999991457483377, iteration: 60697
loss: 0.9850476980209351,grad_norm: 0.9597458900408479, iteration: 60698
loss: 1.0220482349395752,grad_norm: 0.9999990594491917, iteration: 60699
loss: 1.0305733680725098,grad_norm: 0.9999995429050618, iteration: 60700
loss: 0.9886178970336914,grad_norm: 0.8907813041080216, iteration: 60701
loss: 0.984682559967041,grad_norm: 0.9475676567147163, iteration: 60702
loss: 0.9687504768371582,grad_norm: 0.8011816105109355, iteration: 60703
loss: 0.9630233645439148,grad_norm: 0.9572514949384231, iteration: 60704
loss: 1.0182522535324097,grad_norm: 0.7876711021906233, iteration: 60705
loss: 0.993415355682373,grad_norm: 0.9239192135100942, iteration: 60706
loss: 0.9933755993843079,grad_norm: 0.9999990379716379, iteration: 60707
loss: 1.0261754989624023,grad_norm: 0.9999996874451185, iteration: 60708
loss: 0.975838303565979,grad_norm: 0.9999991868858236, iteration: 60709
loss: 0.996396005153656,grad_norm: 0.8399107640874489, iteration: 60710
loss: 0.9891957640647888,grad_norm: 0.9391314526421618, iteration: 60711
loss: 0.9953449964523315,grad_norm: 0.9452172831866285, iteration: 60712
loss: 1.0064972639083862,grad_norm: 0.8327345171170363, iteration: 60713
loss: 1.0068198442459106,grad_norm: 0.9999992395058298, iteration: 60714
loss: 1.003071665763855,grad_norm: 0.9584842064747969, iteration: 60715
loss: 1.000388503074646,grad_norm: 0.9999991547855543, iteration: 60716
loss: 0.9835696220397949,grad_norm: 0.8287103440081447, iteration: 60717
loss: 0.9891331195831299,grad_norm: 0.9999996531352682, iteration: 60718
loss: 0.9763162732124329,grad_norm: 0.9999991182193769, iteration: 60719
loss: 0.9704972505569458,grad_norm: 0.9999991283068268, iteration: 60720
loss: 1.0198003053665161,grad_norm: 0.9999993512288078, iteration: 60721
loss: 1.0133614540100098,grad_norm: 0.9999994579431257, iteration: 60722
loss: 0.9790908098220825,grad_norm: 0.8396457234630571, iteration: 60723
loss: 0.9897457361221313,grad_norm: 0.9999990767643021, iteration: 60724
loss: 1.035081386566162,grad_norm: 0.8772509271526749, iteration: 60725
loss: 1.0029563903808594,grad_norm: 0.9077172102915815, iteration: 60726
loss: 1.0278117656707764,grad_norm: 0.9293528838003805, iteration: 60727
loss: 0.9680129885673523,grad_norm: 0.9091382384134784, iteration: 60728
loss: 1.0230454206466675,grad_norm: 0.9655681997889572, iteration: 60729
loss: 1.0220160484313965,grad_norm: 0.9999989728632613, iteration: 60730
loss: 1.0207369327545166,grad_norm: 0.9999990843438177, iteration: 60731
loss: 0.9652373194694519,grad_norm: 0.9999995086752304, iteration: 60732
loss: 0.9952259659767151,grad_norm: 0.9999993449303688, iteration: 60733
loss: 1.0195417404174805,grad_norm: 0.850509664440854, iteration: 60734
loss: 0.9996263980865479,grad_norm: 0.810439992348578, iteration: 60735
loss: 1.0287946462631226,grad_norm: 0.7975146526760734, iteration: 60736
loss: 0.9526189565658569,grad_norm: 0.9999990723767248, iteration: 60737
loss: 1.022381067276001,grad_norm: 0.9999996284957223, iteration: 60738
loss: 1.0105721950531006,grad_norm: 0.8964154611314296, iteration: 60739
loss: 1.0071688890457153,grad_norm: 0.8744409695540704, iteration: 60740
loss: 1.0361720323562622,grad_norm: 0.9511896477294457, iteration: 60741
loss: 1.0242869853973389,grad_norm: 0.9999989475315396, iteration: 60742
loss: 1.0254695415496826,grad_norm: 0.9999991881238012, iteration: 60743
loss: 0.9564714431762695,grad_norm: 0.986886889187737, iteration: 60744
loss: 1.0352113246917725,grad_norm: 0.9999990448157784, iteration: 60745
loss: 0.964270830154419,grad_norm: 0.9999992067598479, iteration: 60746
loss: 1.0245290994644165,grad_norm: 0.9999999020323729, iteration: 60747
loss: 1.2425307035446167,grad_norm: 0.9999997016609131, iteration: 60748
loss: 1.0082858800888062,grad_norm: 0.8555537301852456, iteration: 60749
loss: 1.0270973443984985,grad_norm: 0.9999998198293951, iteration: 60750
loss: 1.0111589431762695,grad_norm: 0.9999993071857126, iteration: 60751
loss: 1.0259087085723877,grad_norm: 0.9999992265151084, iteration: 60752
loss: 1.056667685508728,grad_norm: 0.9999998198795814, iteration: 60753
loss: 1.0317859649658203,grad_norm: 0.849410155073858, iteration: 60754
loss: 0.9798975586891174,grad_norm: 0.8842409210326617, iteration: 60755
loss: 1.0288550853729248,grad_norm: 0.839879537407947, iteration: 60756
loss: 1.0405023097991943,grad_norm: 0.9649291821430607, iteration: 60757
loss: 1.0208028554916382,grad_norm: 0.9999990505047058, iteration: 60758
loss: 0.9901067018508911,grad_norm: 0.9333575106728897, iteration: 60759
loss: 0.9906302094459534,grad_norm: 0.8208544939541179, iteration: 60760
loss: 1.0451551675796509,grad_norm: 0.9999994297923191, iteration: 60761
loss: 0.9959224462509155,grad_norm: 0.9630133054787503, iteration: 60762
loss: 0.9807049632072449,grad_norm: 0.9053799873757237, iteration: 60763
loss: 0.9898549914360046,grad_norm: 0.9999991154814657, iteration: 60764
loss: 0.9943477511405945,grad_norm: 0.9807335736415809, iteration: 60765
loss: 0.9902312755584717,grad_norm: 0.972049241007695, iteration: 60766
loss: 1.019572377204895,grad_norm: 0.9999997816344924, iteration: 60767
loss: 1.0269466638565063,grad_norm: 0.9999997332110546, iteration: 60768
loss: 1.0087474584579468,grad_norm: 0.8206025659083115, iteration: 60769
loss: 0.9912149906158447,grad_norm: 0.9999991851618719, iteration: 60770
loss: 1.0022600889205933,grad_norm: 0.9981512548994123, iteration: 60771
loss: 1.008123755455017,grad_norm: 0.780025555534121, iteration: 60772
loss: 0.9720684885978699,grad_norm: 0.9178508590225863, iteration: 60773
loss: 0.9735554456710815,grad_norm: 0.9999990926667832, iteration: 60774
loss: 1.001613736152649,grad_norm: 0.9999994691219111, iteration: 60775
loss: 0.9389165639877319,grad_norm: 0.9999991867295644, iteration: 60776
loss: 1.0347447395324707,grad_norm: 0.999999314293234, iteration: 60777
loss: 1.0052850246429443,grad_norm: 0.9119571775309055, iteration: 60778
loss: 0.98844313621521,grad_norm: 0.9999996258139267, iteration: 60779
loss: 1.0271828174591064,grad_norm: 0.9999992144437547, iteration: 60780
loss: 0.9926878809928894,grad_norm: 0.9999991098604017, iteration: 60781
loss: 0.989369809627533,grad_norm: 0.9759662019412171, iteration: 60782
loss: 1.0322836637496948,grad_norm: 0.999999175679704, iteration: 60783
loss: 1.0354053974151611,grad_norm: 0.999999632869641, iteration: 60784
loss: 0.9641655683517456,grad_norm: 0.9037301595362837, iteration: 60785
loss: 1.0321621894836426,grad_norm: 0.9999993610207215, iteration: 60786
loss: 0.9809899926185608,grad_norm: 0.886680900450462, iteration: 60787
loss: 0.9404176473617554,grad_norm: 0.9999990634511966, iteration: 60788
loss: 1.0125112533569336,grad_norm: 0.9254145188607275, iteration: 60789
loss: 0.9977691769599915,grad_norm: 0.7028338166523236, iteration: 60790
loss: 1.012546420097351,grad_norm: 0.9607168695977962, iteration: 60791
loss: 0.9560256600379944,grad_norm: 0.8421966126280666, iteration: 60792
loss: 0.9960290193557739,grad_norm: 0.9999993098402272, iteration: 60793
loss: 1.0193638801574707,grad_norm: 0.9882778434126991, iteration: 60794
loss: 1.0202499628067017,grad_norm: 0.9999995434469279, iteration: 60795
loss: 0.9912471175193787,grad_norm: 0.850181970166753, iteration: 60796
loss: 1.0261352062225342,grad_norm: 0.9999989440550983, iteration: 60797
loss: 1.0252068042755127,grad_norm: 0.9006506348803284, iteration: 60798
loss: 1.0308682918548584,grad_norm: 0.9999993269991885, iteration: 60799
loss: 1.0242528915405273,grad_norm: 0.9999994423471102, iteration: 60800
loss: 1.0496537685394287,grad_norm: 0.9999997900056036, iteration: 60801
loss: 1.1182427406311035,grad_norm: 0.9366101066183738, iteration: 60802
loss: 1.052690029144287,grad_norm: 0.8074757614493321, iteration: 60803
loss: 1.0093718767166138,grad_norm: 0.999999500941758, iteration: 60804
loss: 0.9662069082260132,grad_norm: 0.9999992987520923, iteration: 60805
loss: 1.0313851833343506,grad_norm: 0.9286468659098988, iteration: 60806
loss: 1.012681245803833,grad_norm: 0.9651861397333574, iteration: 60807
loss: 0.9929295182228088,grad_norm: 0.8717795664409369, iteration: 60808
loss: 0.9840648174285889,grad_norm: 0.8632366397113188, iteration: 60809
loss: 0.990126371383667,grad_norm: 0.9999992798903742, iteration: 60810
loss: 0.9301976561546326,grad_norm: 0.8431246139536847, iteration: 60811
loss: 1.0156033039093018,grad_norm: 0.9074733121232601, iteration: 60812
loss: 0.9972243905067444,grad_norm: 0.9635665246459113, iteration: 60813
loss: 0.9634021520614624,grad_norm: 0.9999991162646258, iteration: 60814
loss: 1.0917786359786987,grad_norm: 0.9999990515886544, iteration: 60815
loss: 0.9873396754264832,grad_norm: 0.8805766234023251, iteration: 60816
loss: 0.9945268630981445,grad_norm: 0.9999990093473743, iteration: 60817
loss: 1.0063470602035522,grad_norm: 0.9999990605045396, iteration: 60818
loss: 1.026174545288086,grad_norm: 0.8743107968112358, iteration: 60819
loss: 1.0285722017288208,grad_norm: 0.8155460092748394, iteration: 60820
loss: 0.9913743734359741,grad_norm: 0.99999919307052, iteration: 60821
loss: 1.021982192993164,grad_norm: 0.9999992005619593, iteration: 60822
loss: 1.0078166723251343,grad_norm: 0.9999991495651382, iteration: 60823
loss: 1.063331127166748,grad_norm: 0.9999999107173104, iteration: 60824
loss: 1.0326555967330933,grad_norm: 0.999999286168044, iteration: 60825
loss: 1.0234506130218506,grad_norm: 0.9129330863804904, iteration: 60826
loss: 1.0081466436386108,grad_norm: 0.991908109063722, iteration: 60827
loss: 1.0328623056411743,grad_norm: 0.8235889717147047, iteration: 60828
loss: 0.9752341508865356,grad_norm: 0.9999992470617713, iteration: 60829
loss: 1.0079432725906372,grad_norm: 0.7614015874705511, iteration: 60830
loss: 1.0156043767929077,grad_norm: 0.8077833999474996, iteration: 60831
loss: 1.0162101984024048,grad_norm: 0.9082835946243933, iteration: 60832
loss: 0.9921979308128357,grad_norm: 0.9999994066959028, iteration: 60833
loss: 1.0085512399673462,grad_norm: 0.9712239277514692, iteration: 60834
loss: 0.9785977005958557,grad_norm: 0.8150622106617639, iteration: 60835
loss: 1.006707787513733,grad_norm: 0.9245457322577662, iteration: 60836
loss: 0.9648938179016113,grad_norm: 0.9999990223378588, iteration: 60837
loss: 1.0038964748382568,grad_norm: 0.8742158049528089, iteration: 60838
loss: 0.987171471118927,grad_norm: 0.9166179564998286, iteration: 60839
loss: 0.983277440071106,grad_norm: 0.8864470592733467, iteration: 60840
loss: 1.0063953399658203,grad_norm: 0.9999989977900721, iteration: 60841
loss: 0.945271372795105,grad_norm: 0.9999991768625847, iteration: 60842
loss: 1.008712887763977,grad_norm: 0.9658515008335985, iteration: 60843
loss: 1.008730173110962,grad_norm: 0.8143219561808279, iteration: 60844
loss: 0.9773215651512146,grad_norm: 0.8421585542646773, iteration: 60845
loss: 0.9722091555595398,grad_norm: 0.9999991420316985, iteration: 60846
loss: 0.9742960929870605,grad_norm: 0.9999999299233544, iteration: 60847
loss: 0.9916714429855347,grad_norm: 0.9999992427544152, iteration: 60848
loss: 0.9890511631965637,grad_norm: 0.9187334405261359, iteration: 60849
loss: 1.0290583372116089,grad_norm: 0.8526212776970103, iteration: 60850
loss: 1.0182796716690063,grad_norm: 0.9449125584922689, iteration: 60851
loss: 0.978850245475769,grad_norm: 0.9999990512600124, iteration: 60852
loss: 1.0044111013412476,grad_norm: 0.9999990805339384, iteration: 60853
loss: 1.0264683961868286,grad_norm: 0.9037154180427012, iteration: 60854
loss: 1.0066986083984375,grad_norm: 0.9431032485652362, iteration: 60855
loss: 0.9688503742218018,grad_norm: 0.9973460648514177, iteration: 60856
loss: 0.9985549449920654,grad_norm: 0.8694211362252067, iteration: 60857
loss: 0.9937465190887451,grad_norm: 0.999999094383545, iteration: 60858
loss: 1.0031306743621826,grad_norm: 0.9101183736869985, iteration: 60859
loss: 1.0203800201416016,grad_norm: 0.7662702634367586, iteration: 60860
loss: 0.9824873805046082,grad_norm: 0.8859262633376639, iteration: 60861
loss: 0.97428959608078,grad_norm: 0.9479609349758558, iteration: 60862
loss: 0.9992582201957703,grad_norm: 0.7822501425007534, iteration: 60863
loss: 1.045820713043213,grad_norm: 0.9999992107599541, iteration: 60864
loss: 1.053399682044983,grad_norm: 0.999999491807676, iteration: 60865
loss: 1.0315948724746704,grad_norm: 0.9999998212165552, iteration: 60866
loss: 0.9885417819023132,grad_norm: 0.9999991402428925, iteration: 60867
loss: 0.9860908389091492,grad_norm: 0.9999991526094859, iteration: 60868
loss: 0.9993879199028015,grad_norm: 0.9999990842320456, iteration: 60869
loss: 1.037799596786499,grad_norm: 0.9999990653588403, iteration: 60870
loss: 1.0354008674621582,grad_norm: 0.871429689443489, iteration: 60871
loss: 0.9846051931381226,grad_norm: 0.8483856368055419, iteration: 60872
loss: 1.0268497467041016,grad_norm: 0.8796175514827902, iteration: 60873
loss: 1.0098885297775269,grad_norm: 0.9999991461327477, iteration: 60874
loss: 0.987834632396698,grad_norm: 0.8864216393023079, iteration: 60875
loss: 1.0003160238265991,grad_norm: 0.9999990877878406, iteration: 60876
loss: 1.0152570009231567,grad_norm: 0.9915359670141316, iteration: 60877
loss: 1.025610327720642,grad_norm: 0.9267367910614502, iteration: 60878
loss: 1.026923656463623,grad_norm: 0.9602595267835623, iteration: 60879
loss: 1.0096532106399536,grad_norm: 0.9999991361597229, iteration: 60880
loss: 1.0106265544891357,grad_norm: 0.9236258349729823, iteration: 60881
loss: 1.0088233947753906,grad_norm: 0.9999998522830148, iteration: 60882
loss: 1.0290751457214355,grad_norm: 0.9999989864503143, iteration: 60883
loss: 1.0071007013320923,grad_norm: 0.9999996944283398, iteration: 60884
loss: 1.047995924949646,grad_norm: 0.9999992924903236, iteration: 60885
loss: 1.0402271747589111,grad_norm: 0.9262059021632412, iteration: 60886
loss: 1.0174015760421753,grad_norm: 0.8687242111829846, iteration: 60887
loss: 1.0322304964065552,grad_norm: 0.9999990584995399, iteration: 60888
loss: 1.002052664756775,grad_norm: 0.9999990038584096, iteration: 60889
loss: 1.0012266635894775,grad_norm: 0.9017850319648398, iteration: 60890
loss: 1.0396467447280884,grad_norm: 0.9999997421190063, iteration: 60891
loss: 0.9863656163215637,grad_norm: 0.9860665748487979, iteration: 60892
loss: 1.0246683359146118,grad_norm: 0.8599149929956772, iteration: 60893
loss: 1.001793384552002,grad_norm: 0.9750386607573768, iteration: 60894
loss: 0.9793471097946167,grad_norm: 0.8490453785356067, iteration: 60895
loss: 1.0381101369857788,grad_norm: 0.9999999614367007, iteration: 60896
loss: 1.0210715532302856,grad_norm: 0.8700771551673031, iteration: 60897
loss: 0.9705669283866882,grad_norm: 0.8413619344219848, iteration: 60898
loss: 1.0776032209396362,grad_norm: 0.9999997283673935, iteration: 60899
loss: 1.042620062828064,grad_norm: 0.9999990362822789, iteration: 60900
loss: 0.997510552406311,grad_norm: 0.9999990757514079, iteration: 60901
loss: 1.033861756324768,grad_norm: 0.9999997245690261, iteration: 60902
loss: 0.9848137497901917,grad_norm: 0.9999990322613024, iteration: 60903
loss: 1.0081654787063599,grad_norm: 0.8713950064200555, iteration: 60904
loss: 0.9649167656898499,grad_norm: 0.9999991344724033, iteration: 60905
loss: 1.0434279441833496,grad_norm: 0.9999990892583692, iteration: 60906
loss: 1.0183275938034058,grad_norm: 0.806200929640515, iteration: 60907
loss: 0.9825615286827087,grad_norm: 0.8723883852003339, iteration: 60908
loss: 1.0152168273925781,grad_norm: 0.8838897868429016, iteration: 60909
loss: 1.0032061338424683,grad_norm: 0.914156893302557, iteration: 60910
loss: 1.0217363834381104,grad_norm: 0.9999993254307666, iteration: 60911
loss: 1.0111829042434692,grad_norm: 0.7805962108692064, iteration: 60912
loss: 1.0312203168869019,grad_norm: 0.9641253366991274, iteration: 60913
loss: 1.0019057989120483,grad_norm: 0.9092151332414539, iteration: 60914
loss: 0.9875478744506836,grad_norm: 0.7629425496345346, iteration: 60915
loss: 1.0062447786331177,grad_norm: 0.9999992395989153, iteration: 60916
loss: 0.9864528775215149,grad_norm: 0.829678077115166, iteration: 60917
loss: 0.991795003414154,grad_norm: 0.8632028301166201, iteration: 60918
loss: 0.9858756065368652,grad_norm: 0.999999780337732, iteration: 60919
loss: 0.9579009413719177,grad_norm: 0.9490444530730945, iteration: 60920
loss: 1.0022848844528198,grad_norm: 0.9353191075398057, iteration: 60921
loss: 1.0262113809585571,grad_norm: 0.8123065928810907, iteration: 60922
loss: 1.0026179552078247,grad_norm: 0.9999995952598698, iteration: 60923
loss: 1.0504677295684814,grad_norm: 0.9999999190188266, iteration: 60924
loss: 1.0109838247299194,grad_norm: 0.9819699670193519, iteration: 60925
loss: 1.020797610282898,grad_norm: 0.8281571241882547, iteration: 60926
loss: 1.044222354888916,grad_norm: 0.974362409624403, iteration: 60927
loss: 1.004448413848877,grad_norm: 0.9999991223828517, iteration: 60928
loss: 0.9939197301864624,grad_norm: 0.9999990615040134, iteration: 60929
loss: 1.0184344053268433,grad_norm: 0.9999991414141814, iteration: 60930
loss: 1.0226044654846191,grad_norm: 0.9999992473510845, iteration: 60931
loss: 0.9961009621620178,grad_norm: 0.9999989772211899, iteration: 60932
loss: 1.0003712177276611,grad_norm: 0.9806515857404883, iteration: 60933
loss: 0.9914103746414185,grad_norm: 0.9821350982575473, iteration: 60934
loss: 1.0084689855575562,grad_norm: 0.919639076667247, iteration: 60935
loss: 1.0242942571640015,grad_norm: 0.8532457803686497, iteration: 60936
loss: 1.0072256326675415,grad_norm: 0.9159235006743724, iteration: 60937
loss: 1.0365678071975708,grad_norm: 0.999999509996579, iteration: 60938
loss: 1.224134087562561,grad_norm: 0.9999997838987142, iteration: 60939
loss: 1.0204476118087769,grad_norm: 0.9274044191919572, iteration: 60940
loss: 0.9764569401741028,grad_norm: 0.9999994337093117, iteration: 60941
loss: 0.9716845154762268,grad_norm: 0.8730028001283886, iteration: 60942
loss: 0.9853919744491577,grad_norm: 0.9999990579065549, iteration: 60943
loss: 1.0181282758712769,grad_norm: 0.8927206893231386, iteration: 60944
loss: 1.0066770315170288,grad_norm: 0.9999997996351392, iteration: 60945
loss: 0.9903637170791626,grad_norm: 0.9221331969135975, iteration: 60946
loss: 1.0237528085708618,grad_norm: 0.9999991665061121, iteration: 60947
loss: 1.018579363822937,grad_norm: 0.9999990186293745, iteration: 60948
loss: 1.172853708267212,grad_norm: 0.9999993695255726, iteration: 60949
loss: 1.0047177076339722,grad_norm: 0.8907296858633973, iteration: 60950
loss: 0.9745001196861267,grad_norm: 0.9482078039162856, iteration: 60951
loss: 0.977558434009552,grad_norm: 0.9999993241459307, iteration: 60952
loss: 0.9528418183326721,grad_norm: 0.9999991385159995, iteration: 60953
loss: 1.0026743412017822,grad_norm: 0.861695465473887, iteration: 60954
loss: 1.0138546228408813,grad_norm: 0.9999998060036361, iteration: 60955
loss: 1.0376592874526978,grad_norm: 0.9999996343637372, iteration: 60956
loss: 1.1838933229446411,grad_norm: 0.9999993978916404, iteration: 60957
loss: 0.9784188270568848,grad_norm: 0.9159882495550804, iteration: 60958
loss: 0.9761075377464294,grad_norm: 0.9494699666598897, iteration: 60959
loss: 1.0113697052001953,grad_norm: 0.9999995260906329, iteration: 60960
loss: 1.0754848718643188,grad_norm: 0.9999996373485776, iteration: 60961
loss: 1.0242587327957153,grad_norm: 0.9999993032198788, iteration: 60962
loss: 0.9882785081863403,grad_norm: 0.9999991798580362, iteration: 60963
loss: 1.2570865154266357,grad_norm: 0.999999911626813, iteration: 60964
loss: 1.0287837982177734,grad_norm: 0.9999991291833767, iteration: 60965
loss: 1.0231696367263794,grad_norm: 0.9999995746009864, iteration: 60966
loss: 1.025032639503479,grad_norm: 0.999999024920276, iteration: 60967
loss: 1.2091023921966553,grad_norm: 0.999999373657709, iteration: 60968
loss: 1.0122311115264893,grad_norm: 0.9999999300817203, iteration: 60969
loss: 1.0811654329299927,grad_norm: 0.999999448547934, iteration: 60970
loss: 1.0074656009674072,grad_norm: 0.9999991815581424, iteration: 60971
loss: 1.057243824005127,grad_norm: 0.9999993667438026, iteration: 60972
loss: 1.019979476928711,grad_norm: 0.8141349220736598, iteration: 60973
loss: 1.0057400465011597,grad_norm: 0.9092588944074211, iteration: 60974
loss: 0.9989569783210754,grad_norm: 0.9999991365323116, iteration: 60975
loss: 0.9901434183120728,grad_norm: 0.9924095724241941, iteration: 60976
loss: 1.0460968017578125,grad_norm: 0.9999990355217365, iteration: 60977
loss: 0.9816038608551025,grad_norm: 0.9999991088779918, iteration: 60978
loss: 1.043169617652893,grad_norm: 0.930396076887675, iteration: 60979
loss: 1.015311360359192,grad_norm: 0.9999993665324101, iteration: 60980
loss: 1.0463420152664185,grad_norm: 0.826813107317685, iteration: 60981
loss: 1.0154743194580078,grad_norm: 0.9731026534237605, iteration: 60982
loss: 0.9940296411514282,grad_norm: 0.8992927242147739, iteration: 60983
loss: 1.0228865146636963,grad_norm: 0.824720960409985, iteration: 60984
loss: 1.1008111238479614,grad_norm: 0.9999994477109218, iteration: 60985
loss: 0.9880797863006592,grad_norm: 0.9999991759999081, iteration: 60986
loss: 1.0318225622177124,grad_norm: 0.9999993283424564, iteration: 60987
loss: 1.0197744369506836,grad_norm: 0.999999077961805, iteration: 60988
loss: 1.0552798509597778,grad_norm: 0.9999993018118273, iteration: 60989
loss: 0.976177453994751,grad_norm: 0.9999995501660792, iteration: 60990
loss: 0.9805861711502075,grad_norm: 0.9049665700676013, iteration: 60991
loss: 1.0171208381652832,grad_norm: 0.9999991846379684, iteration: 60992
loss: 1.0003211498260498,grad_norm: 0.9999997013207813, iteration: 60993
loss: 1.0040276050567627,grad_norm: 0.8993306872969942, iteration: 60994
loss: 1.0620144605636597,grad_norm: 0.9999992118626768, iteration: 60995
loss: 1.0422780513763428,grad_norm: 0.9999990993058422, iteration: 60996
loss: 0.9991002678871155,grad_norm: 0.9999991309695803, iteration: 60997
loss: 0.9976467490196228,grad_norm: 0.9999997761839585, iteration: 60998
loss: 0.988714337348938,grad_norm: 0.9999990905689461, iteration: 60999
loss: 0.9985105991363525,grad_norm: 0.9999992561434955, iteration: 61000
loss: 0.9863625168800354,grad_norm: 0.8243992777032029, iteration: 61001
loss: 0.9998884201049805,grad_norm: 0.8996212214558095, iteration: 61002
loss: 0.9973272681236267,grad_norm: 0.94557495215775, iteration: 61003
loss: 0.9989132881164551,grad_norm: 0.9415573499444303, iteration: 61004
loss: 0.9995442032814026,grad_norm: 0.9064921637352373, iteration: 61005
loss: 1.0480740070343018,grad_norm: 0.9151612146260815, iteration: 61006
loss: 1.0120906829833984,grad_norm: 0.9999991164594043, iteration: 61007
loss: 1.100866675376892,grad_norm: 0.9999994610883778, iteration: 61008
loss: 1.014079213142395,grad_norm: 0.9999997135397627, iteration: 61009
loss: 1.0300968885421753,grad_norm: 0.9999991453743992, iteration: 61010
loss: 1.0256181955337524,grad_norm: 0.9999999760271123, iteration: 61011
loss: 0.9758175611495972,grad_norm: 0.9432817477389507, iteration: 61012
loss: 1.1004877090454102,grad_norm: 0.9999998224190372, iteration: 61013
loss: 1.0171829462051392,grad_norm: 0.9446442863813086, iteration: 61014
loss: 1.0074130296707153,grad_norm: 0.9189156185904032, iteration: 61015
loss: 1.016437292098999,grad_norm: 0.9999991457116685, iteration: 61016
loss: 1.0043342113494873,grad_norm: 0.9999991517645689, iteration: 61017
loss: 1.0721746683120728,grad_norm: 0.9999996078721176, iteration: 61018
loss: 0.9896668195724487,grad_norm: 0.9999996265364026, iteration: 61019
loss: 1.0464893579483032,grad_norm: 0.9999992312859767, iteration: 61020
loss: 0.9703762531280518,grad_norm: 0.9999992008671846, iteration: 61021
loss: 0.9757310748100281,grad_norm: 0.9551284372763572, iteration: 61022
loss: 1.0305160284042358,grad_norm: 0.9576421105142082, iteration: 61023
loss: 1.0098094940185547,grad_norm: 0.8497722395730242, iteration: 61024
loss: 1.044858455657959,grad_norm: 0.9999994437839925, iteration: 61025
loss: 0.9984992742538452,grad_norm: 0.9999997145623089, iteration: 61026
loss: 1.048869013786316,grad_norm: 0.8985193834513445, iteration: 61027
loss: 1.013279914855957,grad_norm: 0.9999992658204119, iteration: 61028
loss: 0.9866284728050232,grad_norm: 0.8279975412145371, iteration: 61029
loss: 1.0098326206207275,grad_norm: 0.9344309480971131, iteration: 61030
loss: 0.9912011623382568,grad_norm: 0.7741025932959176, iteration: 61031
loss: 1.123551845550537,grad_norm: 0.999999529020074, iteration: 61032
loss: 0.9788767695426941,grad_norm: 0.9999993280956918, iteration: 61033
loss: 1.0458265542984009,grad_norm: 0.9999993660558087, iteration: 61034
loss: 0.972636878490448,grad_norm: 0.8902498158451485, iteration: 61035
loss: 1.1082919836044312,grad_norm: 0.9999992886113881, iteration: 61036
loss: 1.0499553680419922,grad_norm: 0.8693606277466431, iteration: 61037
loss: 1.0096443891525269,grad_norm: 0.9999991157949116, iteration: 61038
loss: 1.0108554363250732,grad_norm: 0.9999993331623355, iteration: 61039
loss: 1.045450210571289,grad_norm: 0.9999994920748021, iteration: 61040
loss: 1.2094860076904297,grad_norm: 0.9999993176963452, iteration: 61041
loss: 1.0066910982131958,grad_norm: 0.9982223714609502, iteration: 61042
loss: 1.0704662799835205,grad_norm: 0.9999993978665724, iteration: 61043
loss: 1.0318899154663086,grad_norm: 0.9999996087503616, iteration: 61044
loss: 1.008113980293274,grad_norm: 0.9855194112521594, iteration: 61045
loss: 1.1293766498565674,grad_norm: 0.9999990126373907, iteration: 61046
loss: 0.9992978572845459,grad_norm: 0.9108101299222384, iteration: 61047
loss: 0.9708675742149353,grad_norm: 0.9749514207475805, iteration: 61048
loss: 1.012719750404358,grad_norm: 0.9964478708372974, iteration: 61049
loss: 1.0749083757400513,grad_norm: 0.9999991411719229, iteration: 61050
loss: 1.0613563060760498,grad_norm: 0.9999994125128555, iteration: 61051
loss: 1.1454256772994995,grad_norm: 0.9999992220312985, iteration: 61052
loss: 1.0243027210235596,grad_norm: 1.0000000560783338, iteration: 61053
loss: 1.0622291564941406,grad_norm: 0.9999997472910742, iteration: 61054
loss: 1.1122840642929077,grad_norm: 0.9999995597545533, iteration: 61055
loss: 0.987936794757843,grad_norm: 0.9999993205124007, iteration: 61056
loss: 0.9884437918663025,grad_norm: 0.8400363491335012, iteration: 61057
loss: 1.0199471712112427,grad_norm: 0.833380816360118, iteration: 61058
loss: 1.1513094902038574,grad_norm: 0.999999204254568, iteration: 61059
loss: 0.9974189400672913,grad_norm: 0.9999991712082791, iteration: 61060
loss: 0.9985693097114563,grad_norm: 0.9999990672913787, iteration: 61061
loss: 1.0023460388183594,grad_norm: 0.9999991475163277, iteration: 61062
loss: 0.9602362513542175,grad_norm: 0.885855158653421, iteration: 61063
loss: 0.9672122597694397,grad_norm: 0.8472795111059294, iteration: 61064
loss: 1.0167216062545776,grad_norm: 0.999998996966296, iteration: 61065
loss: 1.1017751693725586,grad_norm: 0.9999994836211423, iteration: 61066
loss: 1.049288034439087,grad_norm: 0.9999998710322158, iteration: 61067
loss: 1.0175203084945679,grad_norm: 0.9999991366913816, iteration: 61068
loss: 0.993076503276825,grad_norm: 0.8392535435283738, iteration: 61069
loss: 0.9946585297584534,grad_norm: 0.9999991335033237, iteration: 61070
loss: 0.9705874919891357,grad_norm: 0.980457926508951, iteration: 61071
loss: 1.0535494089126587,grad_norm: 0.9999991409755827, iteration: 61072
loss: 1.0153475999832153,grad_norm: 0.9999995719678823, iteration: 61073
loss: 0.9752106070518494,grad_norm: 0.9999993771153682, iteration: 61074
loss: 1.035071611404419,grad_norm: 0.9338653849302982, iteration: 61075
loss: 1.372187614440918,grad_norm: 0.9999995637186813, iteration: 61076
loss: 1.6029282808303833,grad_norm: 0.9999999006306755, iteration: 61077
loss: 1.2062082290649414,grad_norm: 0.9999991772801046, iteration: 61078
loss: 1.0251154899597168,grad_norm: 0.9999990979308739, iteration: 61079
loss: 1.2790217399597168,grad_norm: 0.9999995247684601, iteration: 61080
loss: 1.1121078729629517,grad_norm: 0.983454717388395, iteration: 61081
loss: 1.3365764617919922,grad_norm: 0.9999993480285188, iteration: 61082
loss: 1.0509175062179565,grad_norm: 0.999999233898445, iteration: 61083
loss: 1.133386492729187,grad_norm: 0.999999827826161, iteration: 61084
loss: 1.1575242280960083,grad_norm: 0.9999992939750035, iteration: 61085
loss: 1.268938422203064,grad_norm: 0.999999362192317, iteration: 61086
loss: 1.360784649848938,grad_norm: 0.9999997295433424, iteration: 61087
loss: 1.048554539680481,grad_norm: 0.9999991826520072, iteration: 61088
loss: 1.2879620790481567,grad_norm: 0.9999994754124443, iteration: 61089
loss: 1.1924569606781006,grad_norm: 0.9999995774780283, iteration: 61090
loss: 1.3028239011764526,grad_norm: 0.9999997370121537, iteration: 61091
loss: 1.4538384675979614,grad_norm: 0.9999997634278678, iteration: 61092
loss: 1.301113486289978,grad_norm: 0.9999994945765448, iteration: 61093
loss: 1.3356666564941406,grad_norm: 0.9999996532402883, iteration: 61094
loss: 1.1314679384231567,grad_norm: 0.9999992304889795, iteration: 61095
loss: 1.2376021146774292,grad_norm: 0.999999232052399, iteration: 61096
loss: 1.2956750392913818,grad_norm: 0.9999995654851956, iteration: 61097
loss: 1.3092528581619263,grad_norm: 0.9999999004347543, iteration: 61098
loss: 1.1676406860351562,grad_norm: 0.9999993789669357, iteration: 61099
loss: 1.4265756607055664,grad_norm: 0.9999998043707227, iteration: 61100
loss: 1.3619625568389893,grad_norm: 0.9999994308887121, iteration: 61101
loss: 1.2446298599243164,grad_norm: 0.9999994990517935, iteration: 61102
loss: 1.2099817991256714,grad_norm: 0.9999993243166208, iteration: 61103
loss: 1.5295709371566772,grad_norm: 0.9999996789827706, iteration: 61104
loss: 1.732102394104004,grad_norm: 0.9999996613561805, iteration: 61105
loss: 1.3797836303710938,grad_norm: 0.9999995268617015, iteration: 61106
loss: 1.2329699993133545,grad_norm: 0.99999940623689, iteration: 61107
loss: 1.2323576211929321,grad_norm: 0.9999993148687495, iteration: 61108
loss: 1.1985465288162231,grad_norm: 0.9999994931594913, iteration: 61109
loss: 1.1480038166046143,grad_norm: 0.9999995258903515, iteration: 61110
loss: 1.4288604259490967,grad_norm: 0.9999999404923423, iteration: 61111
loss: 1.4426637887954712,grad_norm: 0.9999998578127793, iteration: 61112
loss: 1.4347577095031738,grad_norm: 0.9999999232154779, iteration: 61113
loss: 1.3146014213562012,grad_norm: 0.9999994142092917, iteration: 61114
loss: 1.6967997550964355,grad_norm: 0.9999996886542083, iteration: 61115
loss: 1.4070777893066406,grad_norm: 0.9999993293289254, iteration: 61116
loss: 1.6487398147583008,grad_norm: 0.9999995975286529, iteration: 61117
loss: 1.7694839239120483,grad_norm: 0.999999927791464, iteration: 61118
loss: 1.7102683782577515,grad_norm: 0.9999997438137698, iteration: 61119
loss: 1.8587799072265625,grad_norm: 0.9999998346705207, iteration: 61120
loss: 1.8577390909194946,grad_norm: 0.9999997450950054, iteration: 61121
loss: 1.3584955930709839,grad_norm: 0.9999999840167797, iteration: 61122
loss: 1.6109025478363037,grad_norm: 0.9999998566618312, iteration: 61123
loss: 1.6098756790161133,grad_norm: 0.9999995475224273, iteration: 61124
loss: 1.4104231595993042,grad_norm: 0.9999993157502397, iteration: 61125
loss: 1.4284411668777466,grad_norm: 0.9999993044589165, iteration: 61126
loss: 1.998182773590088,grad_norm: 0.9999996833999854, iteration: 61127
loss: 1.7537046670913696,grad_norm: 0.9999995597765496, iteration: 61128
loss: 1.6553951501846313,grad_norm: 0.9999995184033049, iteration: 61129
loss: 1.2991924285888672,grad_norm: 0.9999995530905463, iteration: 61130
loss: 1.3513685464859009,grad_norm: 0.9999995407385109, iteration: 61131
loss: 1.4618136882781982,grad_norm: 0.999999516431448, iteration: 61132
loss: 1.3498375415802002,grad_norm: 0.9999999961961701, iteration: 61133
loss: 1.5470166206359863,grad_norm: 1.0000000117562509, iteration: 61134
loss: 1.3911737203598022,grad_norm: 0.9999994580429207, iteration: 61135
loss: 1.334450602531433,grad_norm: 0.9999998572554305, iteration: 61136
loss: 1.3027291297912598,grad_norm: 0.9999995738309968, iteration: 61137
loss: 1.307573914527893,grad_norm: 0.9999997500298199, iteration: 61138
loss: 1.2485624551773071,grad_norm: 0.9999993710213798, iteration: 61139
loss: 1.2425651550292969,grad_norm: 0.9999998465584419, iteration: 61140
loss: 1.132683277130127,grad_norm: 0.9999991036940926, iteration: 61141
loss: 1.0987138748168945,grad_norm: 0.9999992632046403, iteration: 61142
loss: 1.2011659145355225,grad_norm: 0.9999998204362178, iteration: 61143
loss: 1.2522810697555542,grad_norm: 0.9999995891896364, iteration: 61144
loss: 1.1894235610961914,grad_norm: 0.9999994189394796, iteration: 61145
loss: 1.1674665212631226,grad_norm: 0.9999999347713616, iteration: 61146
loss: 1.0428060293197632,grad_norm: 0.9999998910944251, iteration: 61147
loss: 1.0724915266036987,grad_norm: 0.9999999882935979, iteration: 61148
loss: 0.9638602137565613,grad_norm: 0.9999991306047414, iteration: 61149
loss: 1.456202507019043,grad_norm: 0.9999994687040691, iteration: 61150
loss: 1.0966864824295044,grad_norm: 0.9999991855448821, iteration: 61151
loss: 1.1257658004760742,grad_norm: 0.9999998691371571, iteration: 61152
loss: 1.03316068649292,grad_norm: 0.999999363012082, iteration: 61153
loss: 1.2297158241271973,grad_norm: 0.9999998439275003, iteration: 61154
loss: 1.2131447792053223,grad_norm: 0.9999998679044635, iteration: 61155
loss: 1.0103731155395508,grad_norm: 0.8795911106068094, iteration: 61156
loss: 1.068941593170166,grad_norm: 1.0000000268968658, iteration: 61157
loss: 1.2657439708709717,grad_norm: 0.9999999166332297, iteration: 61158
loss: 1.0102903842926025,grad_norm: 0.9999991387760967, iteration: 61159
loss: 1.1224398612976074,grad_norm: 0.9999991311403015, iteration: 61160
loss: 1.0058258771896362,grad_norm: 0.9999991460085897, iteration: 61161
loss: 1.067421317100525,grad_norm: 0.999999628200518, iteration: 61162
loss: 1.0515092611312866,grad_norm: 0.9999995155319868, iteration: 61163
loss: 1.1378755569458008,grad_norm: 0.9999992138007071, iteration: 61164
loss: 1.0790127515792847,grad_norm: 0.9999997756598, iteration: 61165
loss: 1.0797065496444702,grad_norm: 0.9999991313025103, iteration: 61166
loss: 1.070331335067749,grad_norm: 0.9999991538207188, iteration: 61167
loss: 1.0843958854675293,grad_norm: 0.9999994448980184, iteration: 61168
loss: 1.3897945880889893,grad_norm: 0.9999999333040291, iteration: 61169
loss: 1.0933817625045776,grad_norm: 0.9999997182063333, iteration: 61170
loss: 1.0794154405593872,grad_norm: 0.9999995107931923, iteration: 61171
loss: 1.093960165977478,grad_norm: 0.9999998772195264, iteration: 61172
loss: 1.0059579610824585,grad_norm: 0.9999992134800465, iteration: 61173
loss: 1.0995250940322876,grad_norm: 0.9999992656333472, iteration: 61174
loss: 1.0276926755905151,grad_norm: 0.9999996528916614, iteration: 61175
loss: 1.2918699979782104,grad_norm: 0.9999998675085499, iteration: 61176
loss: 1.0461018085479736,grad_norm: 0.9999998873796501, iteration: 61177
loss: 1.149235725402832,grad_norm: 0.9999999912980829, iteration: 61178
loss: 1.070487141609192,grad_norm: 0.9999991153468482, iteration: 61179
loss: 1.1233773231506348,grad_norm: 0.9999992290452976, iteration: 61180
loss: 1.001760721206665,grad_norm: 0.8018127885007185, iteration: 61181
loss: 1.154526710510254,grad_norm: 0.9999998609143874, iteration: 61182
loss: 1.0064927339553833,grad_norm: 0.9731781582163762, iteration: 61183
loss: 1.1449494361877441,grad_norm: 0.9999997535612251, iteration: 61184
loss: 1.2062580585479736,grad_norm: 0.9999998324515849, iteration: 61185
loss: 1.0036145448684692,grad_norm: 0.8785818058241097, iteration: 61186
loss: 0.9854300022125244,grad_norm: 0.9066939618534648, iteration: 61187
loss: 1.1623934507369995,grad_norm: 0.9999991331810377, iteration: 61188
loss: 1.0777024030685425,grad_norm: 0.9999991306047333, iteration: 61189
loss: 1.1561247110366821,grad_norm: 0.9999993520311332, iteration: 61190
loss: 1.0631989240646362,grad_norm: 0.9999991009318437, iteration: 61191
loss: 0.997779905796051,grad_norm: 0.995866300483587, iteration: 61192
loss: 0.983934760093689,grad_norm: 0.825257227536881, iteration: 61193
loss: 1.057137131690979,grad_norm: 0.9999996549142607, iteration: 61194
loss: 1.1515852212905884,grad_norm: 0.9999995005159922, iteration: 61195
loss: 1.1734287738800049,grad_norm: 0.9999998829689726, iteration: 61196
loss: 1.125537633895874,grad_norm: 0.9999993371678845, iteration: 61197
loss: 1.0671048164367676,grad_norm: 0.9999993466480647, iteration: 61198
loss: 1.0172024965286255,grad_norm: 0.9999991512261762, iteration: 61199
loss: 1.1283341646194458,grad_norm: 0.9999999260117154, iteration: 61200
loss: 1.0167709589004517,grad_norm: 0.9999993952144922, iteration: 61201
loss: 1.0494767427444458,grad_norm: 0.9999998901189734, iteration: 61202
loss: 1.0144258737564087,grad_norm: 0.9999995679183684, iteration: 61203
loss: 1.0733999013900757,grad_norm: 0.9999993939811038, iteration: 61204
loss: 1.0131162405014038,grad_norm: 0.9999991878702632, iteration: 61205
loss: 1.081717848777771,grad_norm: 0.9999997033339839, iteration: 61206
loss: 1.0672527551651,grad_norm: 0.9999994690437125, iteration: 61207
loss: 0.9476020932197571,grad_norm: 0.9999997059230398, iteration: 61208
loss: 1.0811322927474976,grad_norm: 0.999999325440427, iteration: 61209
loss: 1.0427167415618896,grad_norm: 0.9999997470430851, iteration: 61210
loss: 1.0071227550506592,grad_norm: 0.9413714575599499, iteration: 61211
loss: 0.9812069535255432,grad_norm: 0.9006521552511388, iteration: 61212
loss: 1.0509600639343262,grad_norm: 1.0000000254036454, iteration: 61213
loss: 0.9969494938850403,grad_norm: 0.8724282730276803, iteration: 61214
loss: 1.0284814834594727,grad_norm: 0.9999990756739829, iteration: 61215
loss: 0.993003249168396,grad_norm: 0.9999991547038786, iteration: 61216
loss: 1.0870282649993896,grad_norm: 0.9999997705502226, iteration: 61217
loss: 1.0220839977264404,grad_norm: 0.9999992554913276, iteration: 61218
loss: 0.9933428764343262,grad_norm: 0.9943959204051551, iteration: 61219
loss: 1.0327502489089966,grad_norm: 0.9999993338237382, iteration: 61220
loss: 0.9805459380149841,grad_norm: 0.9028599128462866, iteration: 61221
loss: 0.9990977048873901,grad_norm: 0.9431610259346777, iteration: 61222
loss: 0.9591010808944702,grad_norm: 0.9621232460809696, iteration: 61223
loss: 1.0289360284805298,grad_norm: 0.8902245257821894, iteration: 61224
loss: 1.123642086982727,grad_norm: 0.9999992277674721, iteration: 61225
loss: 1.030513048171997,grad_norm: 0.9999996141250369, iteration: 61226
loss: 0.9773945212364197,grad_norm: 0.999999087235767, iteration: 61227
loss: 0.9853432178497314,grad_norm: 0.9811697419720266, iteration: 61228
loss: 1.0486372709274292,grad_norm: 0.9999995470773233, iteration: 61229
loss: 1.002947449684143,grad_norm: 0.9999991901684736, iteration: 61230
loss: 0.9789735674858093,grad_norm: 0.992082753023512, iteration: 61231
loss: 1.04091215133667,grad_norm: 0.9999993175918964, iteration: 61232
loss: 1.018654227256775,grad_norm: 0.9999998594108123, iteration: 61233
loss: 1.0906953811645508,grad_norm: 0.9999993149886195, iteration: 61234
loss: 1.0598214864730835,grad_norm: 0.977800535638723, iteration: 61235
loss: 0.9622023701667786,grad_norm: 0.8865718348652037, iteration: 61236
loss: 0.9972150921821594,grad_norm: 0.8890073535270856, iteration: 61237
loss: 1.0074206590652466,grad_norm: 0.9999991850263306, iteration: 61238
loss: 0.9695632457733154,grad_norm: 0.9198138122098019, iteration: 61239
loss: 1.0675925016403198,grad_norm: 0.9999994215402291, iteration: 61240
loss: 1.0412604808807373,grad_norm: 0.9999993577423986, iteration: 61241
loss: 1.0749857425689697,grad_norm: 0.9279307146236063, iteration: 61242
loss: 1.0647392272949219,grad_norm: 0.9999992986421256, iteration: 61243
loss: 0.9931788444519043,grad_norm: 0.8608916063404879, iteration: 61244
loss: 1.0533543825149536,grad_norm: 0.931821270953136, iteration: 61245
loss: 1.061729907989502,grad_norm: 0.999999497062504, iteration: 61246
loss: 1.017534852027893,grad_norm: 0.9067140348746605, iteration: 61247
loss: 1.0285370349884033,grad_norm: 0.9155162481646714, iteration: 61248
loss: 1.0291457176208496,grad_norm: 0.9999989931771069, iteration: 61249
loss: 1.0365040302276611,grad_norm: 0.9999993889473753, iteration: 61250
loss: 0.992011308670044,grad_norm: 0.9688597531077119, iteration: 61251
loss: 1.201680064201355,grad_norm: 1.0000000036432575, iteration: 61252
loss: 1.1655211448669434,grad_norm: 0.9999995747692529, iteration: 61253
loss: 1.0451875925064087,grad_norm: 0.9999998889042141, iteration: 61254
loss: 1.0125199556350708,grad_norm: 0.8187685138217359, iteration: 61255
loss: 1.0404505729675293,grad_norm: 0.9999999449940884, iteration: 61256
loss: 1.0603337287902832,grad_norm: 0.9999999192255193, iteration: 61257
loss: 1.0204347372055054,grad_norm: 0.9999993802814037, iteration: 61258
loss: 1.033823847770691,grad_norm: 0.9981856357222285, iteration: 61259
loss: 0.9996139407157898,grad_norm: 0.9166239962856655, iteration: 61260
loss: 0.9953745603561401,grad_norm: 0.9999996459990675, iteration: 61261
loss: 1.1054028272628784,grad_norm: 0.9999996901201208, iteration: 61262
loss: 1.0001299381256104,grad_norm: 0.9595096184307286, iteration: 61263
loss: 1.066247582435608,grad_norm: 0.9999993471571409, iteration: 61264
loss: 1.008411169052124,grad_norm: 0.9999994372859852, iteration: 61265
loss: 0.9817622900009155,grad_norm: 0.9999989533964364, iteration: 61266
loss: 1.0660122632980347,grad_norm: 0.9999996231323712, iteration: 61267
loss: 1.0749961137771606,grad_norm: 0.9999991487424238, iteration: 61268
loss: 1.1612845659255981,grad_norm: 0.9999996229697684, iteration: 61269
loss: 0.9795541763305664,grad_norm: 0.9999990742830293, iteration: 61270
loss: 1.0920881032943726,grad_norm: 0.9999993907846616, iteration: 61271
loss: 1.0079599618911743,grad_norm: 0.9484186028234342, iteration: 61272
loss: 1.080254077911377,grad_norm: 0.9999991233742924, iteration: 61273
loss: 1.010779619216919,grad_norm: 0.9999993750164875, iteration: 61274
loss: 0.9958770275115967,grad_norm: 0.9690283915516289, iteration: 61275
loss: 1.080153465270996,grad_norm: 0.9999998302965055, iteration: 61276
loss: 1.083775281906128,grad_norm: 0.9280201616513651, iteration: 61277
loss: 1.0241272449493408,grad_norm: 0.8450934621581159, iteration: 61278
loss: 0.9932109713554382,grad_norm: 0.9999998493355111, iteration: 61279
loss: 1.214708924293518,grad_norm: 0.9999994240443764, iteration: 61280
loss: 1.0660631656646729,grad_norm: 0.9999995869029122, iteration: 61281
loss: 1.003122329711914,grad_norm: 0.7708664324474663, iteration: 61282
loss: 1.0357365608215332,grad_norm: 0.7474412226916453, iteration: 61283
loss: 1.0583125352859497,grad_norm: 0.99999991586888, iteration: 61284
loss: 0.9756057858467102,grad_norm: 0.9999995669322911, iteration: 61285
loss: 1.0269802808761597,grad_norm: 0.9999991182144337, iteration: 61286
loss: 1.0056328773498535,grad_norm: 0.9999990520798361, iteration: 61287
loss: 1.089510202407837,grad_norm: 0.999999064145639, iteration: 61288
loss: 1.1429438591003418,grad_norm: 0.9999993850572939, iteration: 61289
loss: 0.9858355522155762,grad_norm: 0.9766796553419048, iteration: 61290
loss: 0.9999761581420898,grad_norm: 0.9999993095710596, iteration: 61291
loss: 1.0277713537216187,grad_norm: 0.99999986344096, iteration: 61292
loss: 1.0874768495559692,grad_norm: 0.9999994682248355, iteration: 61293
loss: 1.0759246349334717,grad_norm: 0.999999144825897, iteration: 61294
loss: 1.0383630990982056,grad_norm: 0.9999992737716752, iteration: 61295
loss: 1.1191455125808716,grad_norm: 0.9999992770046067, iteration: 61296
loss: 1.1091676950454712,grad_norm: 0.9643598607999164, iteration: 61297
loss: 1.0561827421188354,grad_norm: 0.9999992928984905, iteration: 61298
loss: 1.0685615539550781,grad_norm: 0.9999995478553306, iteration: 61299
loss: 1.4304581880569458,grad_norm: 0.9999996255751803, iteration: 61300
loss: 0.9932193756103516,grad_norm: 0.8341839352973601, iteration: 61301
loss: 1.208543300628662,grad_norm: 0.9999995230559926, iteration: 61302
loss: 1.2359992265701294,grad_norm: 0.9999994099377422, iteration: 61303
loss: 1.1592316627502441,grad_norm: 0.9999998127172308, iteration: 61304
loss: 1.0276224613189697,grad_norm: 0.9999990845097995, iteration: 61305
loss: 1.240838646888733,grad_norm: 0.9999996020003284, iteration: 61306
loss: 1.0133756399154663,grad_norm: 0.7740048811173381, iteration: 61307
loss: 1.0545331239700317,grad_norm: 0.9999995254344312, iteration: 61308
loss: 1.0196906328201294,grad_norm: 0.9999991875951544, iteration: 61309
loss: 0.9918031692504883,grad_norm: 0.9999991891706558, iteration: 61310
loss: 1.0523974895477295,grad_norm: 0.9999992268723157, iteration: 61311
loss: 1.0139473676681519,grad_norm: 0.9999996406026818, iteration: 61312
loss: 1.045803189277649,grad_norm: 0.9999997815962981, iteration: 61313
loss: 1.030229091644287,grad_norm: 0.9066339690757269, iteration: 61314
loss: 1.0589386224746704,grad_norm: 0.99999907649195, iteration: 61315
loss: 1.0087175369262695,grad_norm: 0.9171822021206402, iteration: 61316
loss: 1.0405895709991455,grad_norm: 0.9999992833820713, iteration: 61317
loss: 0.9962981939315796,grad_norm: 0.999998991873069, iteration: 61318
loss: 0.9985226988792419,grad_norm: 0.9529949878039919, iteration: 61319
loss: 1.0228376388549805,grad_norm: 0.9999993778043255, iteration: 61320
loss: 1.0000954866409302,grad_norm: 0.9999992659564315, iteration: 61321
loss: 1.0790245532989502,grad_norm: 0.9999991308798833, iteration: 61322
loss: 1.0348998308181763,grad_norm: 0.932698987164693, iteration: 61323
loss: 0.9824100732803345,grad_norm: 0.9999992668539351, iteration: 61324
loss: 1.050089955329895,grad_norm: 0.9999997268944849, iteration: 61325
loss: 1.026723027229309,grad_norm: 0.9999989612506512, iteration: 61326
loss: 1.0965973138809204,grad_norm: 0.9999997119860171, iteration: 61327
loss: 1.0255727767944336,grad_norm: 0.9999993077500168, iteration: 61328
loss: 0.9997819662094116,grad_norm: 0.999999342953801, iteration: 61329
loss: 1.0719753503799438,grad_norm: 0.9999997711051001, iteration: 61330
loss: 1.0519640445709229,grad_norm: 0.9999998154936736, iteration: 61331
loss: 1.0058761835098267,grad_norm: 0.9999991898984406, iteration: 61332
loss: 1.0305629968643188,grad_norm: 0.999999218414762, iteration: 61333
loss: 0.9958141446113586,grad_norm: 0.9149241290979141, iteration: 61334
loss: 1.0044933557510376,grad_norm: 0.9999990263923488, iteration: 61335
loss: 1.0113602876663208,grad_norm: 0.8407386669343974, iteration: 61336
loss: 0.9966054558753967,grad_norm: 0.9999992484937998, iteration: 61337
loss: 1.0208133459091187,grad_norm: 0.99999910025373, iteration: 61338
loss: 1.028924822807312,grad_norm: 0.9999992896554076, iteration: 61339
loss: 1.5116111040115356,grad_norm: 0.9999998234100925, iteration: 61340
loss: 0.9944651126861572,grad_norm: 0.9999991302024364, iteration: 61341
loss: 0.9925490021705627,grad_norm: 0.881728408627656, iteration: 61342
loss: 0.9925669431686401,grad_norm: 0.9854172686049497, iteration: 61343
loss: 1.0508091449737549,grad_norm: 0.9999994099495351, iteration: 61344
loss: 0.9833706021308899,grad_norm: 0.9999992860407124, iteration: 61345
loss: 0.9699598550796509,grad_norm: 0.9999992535892784, iteration: 61346
loss: 0.9947455525398254,grad_norm: 0.9616762979345537, iteration: 61347
loss: 0.9587533473968506,grad_norm: 0.9544625230399887, iteration: 61348
loss: 0.9844228029251099,grad_norm: 0.9999998123526984, iteration: 61349
loss: 0.9978131651878357,grad_norm: 0.99514000951509, iteration: 61350
loss: 0.957025945186615,grad_norm: 0.9670509937280996, iteration: 61351
loss: 1.0970337390899658,grad_norm: 0.9999998825680414, iteration: 61352
loss: 1.0433845520019531,grad_norm: 0.9999998289533207, iteration: 61353
loss: 1.0562455654144287,grad_norm: 0.9999990686712819, iteration: 61354
loss: 1.039668321609497,grad_norm: 0.9999992634912503, iteration: 61355
loss: 1.1136795282363892,grad_norm: 0.9999993910844168, iteration: 61356
loss: 1.0121673345565796,grad_norm: 0.999999332898468, iteration: 61357
loss: 1.056942343711853,grad_norm: 0.9221578503278728, iteration: 61358
loss: 1.0058786869049072,grad_norm: 0.999999079049656, iteration: 61359
loss: 0.9852287173271179,grad_norm: 0.9999990600830277, iteration: 61360
loss: 1.0311861038208008,grad_norm: 0.9999995517750421, iteration: 61361
loss: 0.9834870100021362,grad_norm: 0.8511332025305463, iteration: 61362
loss: 1.016208529472351,grad_norm: 0.9999997670767542, iteration: 61363
loss: 0.991025447845459,grad_norm: 0.999999201151139, iteration: 61364
loss: 1.0185096263885498,grad_norm: 0.8097826498663466, iteration: 61365
loss: 1.0150600671768188,grad_norm: 0.9371307937853217, iteration: 61366
loss: 1.0114480257034302,grad_norm: 0.9814506500311172, iteration: 61367
loss: 1.0149483680725098,grad_norm: 0.9999992624387302, iteration: 61368
loss: 1.0221604108810425,grad_norm: 0.998138586701507, iteration: 61369
loss: 0.9602228999137878,grad_norm: 0.9999990707170293, iteration: 61370
loss: 1.0485364198684692,grad_norm: 0.9651765887309117, iteration: 61371
loss: 1.0378375053405762,grad_norm: 0.999999606665706, iteration: 61372
loss: 1.0363389253616333,grad_norm: 0.9719577709484648, iteration: 61373
loss: 1.0001877546310425,grad_norm: 0.8223627212511616, iteration: 61374
loss: 1.0241715908050537,grad_norm: 0.8370518321678875, iteration: 61375
loss: 1.0591480731964111,grad_norm: 0.9361942000063737, iteration: 61376
loss: 0.9992250800132751,grad_norm: 0.8498076898869681, iteration: 61377
loss: 0.9866914749145508,grad_norm: 0.9999990042290126, iteration: 61378
loss: 0.9682541489601135,grad_norm: 0.9123414826501541, iteration: 61379
loss: 1.0120558738708496,grad_norm: 0.9999990908377704, iteration: 61380
loss: 1.0185753107070923,grad_norm: 0.9875783661562634, iteration: 61381
loss: 1.0218188762664795,grad_norm: 0.9999994467877357, iteration: 61382
loss: 1.0914310216903687,grad_norm: 0.9999997267925691, iteration: 61383
loss: 1.0055437088012695,grad_norm: 0.9579155154819281, iteration: 61384
loss: 1.0201761722564697,grad_norm: 0.9999992109657909, iteration: 61385
loss: 0.9808269739151001,grad_norm: 0.8679876118762756, iteration: 61386
loss: 0.9960917234420776,grad_norm: 0.856653474963369, iteration: 61387
loss: 1.0062425136566162,grad_norm: 0.9049144701789051, iteration: 61388
loss: 0.9567568898200989,grad_norm: 0.9465115034910544, iteration: 61389
loss: 1.0110101699829102,grad_norm: 0.9999996332474631, iteration: 61390
loss: 1.0086623430252075,grad_norm: 0.9999992027755829, iteration: 61391
loss: 1.0151708126068115,grad_norm: 0.9999990249373681, iteration: 61392
loss: 0.9929099082946777,grad_norm: 0.8383606785314953, iteration: 61393
loss: 1.118272066116333,grad_norm: 0.999999241514787, iteration: 61394
loss: 1.0503448247909546,grad_norm: 0.9999992955519447, iteration: 61395
loss: 1.0075408220291138,grad_norm: 0.9999994311182789, iteration: 61396
loss: 1.062025547027588,grad_norm: 0.9999993220757827, iteration: 61397
loss: 1.003566026687622,grad_norm: 0.9999990613396602, iteration: 61398
loss: 0.9895023703575134,grad_norm: 0.9999991601077884, iteration: 61399
loss: 1.0511035919189453,grad_norm: 0.9999994224488779, iteration: 61400
loss: 1.0175927877426147,grad_norm: 0.9999994532772507, iteration: 61401
loss: 1.0181304216384888,grad_norm: 0.99999911784934, iteration: 61402
loss: 1.043304443359375,grad_norm: 0.8387305839082484, iteration: 61403
loss: 1.026495099067688,grad_norm: 0.9999992402936434, iteration: 61404
loss: 1.0345356464385986,grad_norm: 0.9999990300828423, iteration: 61405
loss: 1.0107640027999878,grad_norm: 0.9999991152319299, iteration: 61406
loss: 0.9955365061759949,grad_norm: 0.9999990833821237, iteration: 61407
loss: 1.0317953824996948,grad_norm: 0.9999993474274581, iteration: 61408
loss: 0.9513097405433655,grad_norm: 0.9999992552616984, iteration: 61409
loss: 0.9851513504981995,grad_norm: 0.8638137065024081, iteration: 61410
loss: 0.9887747764587402,grad_norm: 0.9415511446795638, iteration: 61411
loss: 0.9894595742225647,grad_norm: 0.943298204482987, iteration: 61412
loss: 1.0532547235488892,grad_norm: 0.9999997249834429, iteration: 61413
loss: 1.0396825075149536,grad_norm: 0.9999994602283337, iteration: 61414
loss: 0.9986196756362915,grad_norm: 0.7927900670179755, iteration: 61415
loss: 1.0030159950256348,grad_norm: 0.9999990594064208, iteration: 61416
loss: 1.0166988372802734,grad_norm: 0.9636184447809262, iteration: 61417
loss: 1.0482701063156128,grad_norm: 0.8464490189090491, iteration: 61418
loss: 0.9973836541175842,grad_norm: 0.9999992381195412, iteration: 61419
loss: 0.982567548751831,grad_norm: 0.8054083182575857, iteration: 61420
loss: 1.0180087089538574,grad_norm: 0.99999896146672, iteration: 61421
loss: 0.9896346926689148,grad_norm: 0.9747845982741382, iteration: 61422
loss: 1.005583643913269,grad_norm: 0.9052486322408407, iteration: 61423
loss: 1.0254712104797363,grad_norm: 0.9999990995082958, iteration: 61424
loss: 1.0429624319076538,grad_norm: 0.8628967049396083, iteration: 61425
loss: 0.9984754323959351,grad_norm: 0.865645878774469, iteration: 61426
loss: 1.0333278179168701,grad_norm: 0.9999992269702832, iteration: 61427
loss: 0.9885525703430176,grad_norm: 0.9300993076060331, iteration: 61428
loss: 1.0069727897644043,grad_norm: 0.9999994429491396, iteration: 61429
loss: 0.996660053730011,grad_norm: 0.9999991956862185, iteration: 61430
loss: 0.9644537568092346,grad_norm: 0.9643885735119715, iteration: 61431
loss: 0.9907054901123047,grad_norm: 0.8798509590618097, iteration: 61432
loss: 1.0199288129806519,grad_norm: 0.7834460959132687, iteration: 61433
loss: 1.0443706512451172,grad_norm: 0.99999931467194, iteration: 61434
loss: 1.0186082124710083,grad_norm: 0.9999995394756742, iteration: 61435
loss: 1.022871494293213,grad_norm: 0.8456907241342705, iteration: 61436
loss: 1.0428260564804077,grad_norm: 0.9999990486857927, iteration: 61437
loss: 0.990156888961792,grad_norm: 0.9999991527766839, iteration: 61438
loss: 1.0046120882034302,grad_norm: 0.9999992737317271, iteration: 61439
loss: 0.9951509237289429,grad_norm: 0.9999998827920489, iteration: 61440
loss: 1.0216633081436157,grad_norm: 0.9016337539740286, iteration: 61441
loss: 0.9951788783073425,grad_norm: 0.8718749658926781, iteration: 61442
loss: 1.1707147359848022,grad_norm: 0.9999991555640474, iteration: 61443
loss: 1.0546752214431763,grad_norm: 0.9999994486658592, iteration: 61444
loss: 1.0270973443984985,grad_norm: 0.9999992681644368, iteration: 61445
loss: 1.0155929327011108,grad_norm: 0.9192216969389354, iteration: 61446
loss: 1.0030381679534912,grad_norm: 0.8622508862713787, iteration: 61447
loss: 1.0237531661987305,grad_norm: 0.90274183593481, iteration: 61448
loss: 0.9896323680877686,grad_norm: 0.8894524860324123, iteration: 61449
loss: 1.0041313171386719,grad_norm: 0.999999046878551, iteration: 61450
loss: 0.9974034428596497,grad_norm: 0.8552077154529447, iteration: 61451
loss: 1.0322819948196411,grad_norm: 0.9999993312501827, iteration: 61452
loss: 1.0275979042053223,grad_norm: 0.9321005497070849, iteration: 61453
loss: 1.0117318630218506,grad_norm: 0.8445594294985987, iteration: 61454
loss: 0.9700030088424683,grad_norm: 0.9484620432006327, iteration: 61455
loss: 1.0195754766464233,grad_norm: 0.8511231510884958, iteration: 61456
loss: 1.013380765914917,grad_norm: 0.8881827749227077, iteration: 61457
loss: 1.0069676637649536,grad_norm: 0.9999991124884738, iteration: 61458
loss: 1.023016095161438,grad_norm: 0.9999991897391711, iteration: 61459
loss: 1.0020880699157715,grad_norm: 0.8073788188295051, iteration: 61460
loss: 1.030738353729248,grad_norm: 0.870339649918416, iteration: 61461
loss: 1.0109786987304688,grad_norm: 0.9999990863515043, iteration: 61462
loss: 1.0068405866622925,grad_norm: 0.9999990602605288, iteration: 61463
loss: 1.039384365081787,grad_norm: 0.9999995658770424, iteration: 61464
loss: 1.038329005241394,grad_norm: 0.9999991603759971, iteration: 61465
loss: 0.9849300980567932,grad_norm: 0.9844448877044822, iteration: 61466
loss: 0.9956181645393372,grad_norm: 0.9258585867341796, iteration: 61467
loss: 0.9777072072029114,grad_norm: 0.9999991232578272, iteration: 61468
loss: 1.0058749914169312,grad_norm: 0.9999993127806166, iteration: 61469
loss: 0.9850460290908813,grad_norm: 0.8321169270589496, iteration: 61470
loss: 1.0515400171279907,grad_norm: 0.9560878821917328, iteration: 61471
loss: 1.048880696296692,grad_norm: 0.9999994984724072, iteration: 61472
loss: 1.0076602697372437,grad_norm: 0.9472372160885462, iteration: 61473
loss: 1.0243000984191895,grad_norm: 0.9562754592051754, iteration: 61474
loss: 1.0317490100860596,grad_norm: 0.9708360969249423, iteration: 61475
loss: 1.0075932741165161,grad_norm: 0.9170259474273328, iteration: 61476
loss: 1.0503413677215576,grad_norm: 0.9999989903551227, iteration: 61477
loss: 0.9978252649307251,grad_norm: 0.9999990243176845, iteration: 61478
loss: 1.0252279043197632,grad_norm: 0.9999994233495143, iteration: 61479
loss: 1.0292929410934448,grad_norm: 0.9999989810774437, iteration: 61480
loss: 0.9982459545135498,grad_norm: 0.9513738567357687, iteration: 61481
loss: 1.0160300731658936,grad_norm: 0.9667660625399884, iteration: 61482
loss: 0.9595844745635986,grad_norm: 0.8871261972350868, iteration: 61483
loss: 0.9875470995903015,grad_norm: 0.9999993441051309, iteration: 61484
loss: 1.0024235248565674,grad_norm: 0.8925767241853481, iteration: 61485
loss: 1.0131676197052002,grad_norm: 0.999999543367818, iteration: 61486
loss: 1.0340468883514404,grad_norm: 0.787565774597748, iteration: 61487
loss: 1.131700038909912,grad_norm: 0.9999998502099362, iteration: 61488
loss: 1.0272936820983887,grad_norm: 0.9999991798015841, iteration: 61489
loss: 0.9914001226425171,grad_norm: 0.8908276547985109, iteration: 61490
loss: 1.0104405879974365,grad_norm: 0.9999991211641182, iteration: 61491
loss: 1.024281620979309,grad_norm: 0.9564389659229372, iteration: 61492
loss: 1.0226072072982788,grad_norm: 0.9269002645255381, iteration: 61493
loss: 1.0094852447509766,grad_norm: 0.9999993727459701, iteration: 61494
loss: 1.0055450201034546,grad_norm: 0.9999989657942077, iteration: 61495
loss: 1.015573263168335,grad_norm: 0.9999991257456894, iteration: 61496
loss: 0.9774816036224365,grad_norm: 0.9412066370444544, iteration: 61497
loss: 1.1056939363479614,grad_norm: 0.9999993732172908, iteration: 61498
loss: 0.9878472685813904,grad_norm: 0.9479741359868823, iteration: 61499
loss: 1.005248785018921,grad_norm: 0.9834873968488719, iteration: 61500
loss: 0.9690421223640442,grad_norm: 0.8000391978717143, iteration: 61501
loss: 0.9947222471237183,grad_norm: 0.8168449829888443, iteration: 61502
loss: 1.0715110301971436,grad_norm: 0.9999997443379003, iteration: 61503
loss: 0.9996037483215332,grad_norm: 0.9067621243174191, iteration: 61504
loss: 0.9495453834533691,grad_norm: 0.9999992001358655, iteration: 61505
loss: 1.0043292045593262,grad_norm: 0.8197084014673199, iteration: 61506
loss: 0.9795244336128235,grad_norm: 0.7871911921044527, iteration: 61507
loss: 1.034987449645996,grad_norm: 0.999999163666506, iteration: 61508
loss: 1.044630527496338,grad_norm: 0.9999999038599744, iteration: 61509
loss: 1.0041717290878296,grad_norm: 0.809983962194822, iteration: 61510
loss: 1.0153577327728271,grad_norm: 0.99999924165273, iteration: 61511
loss: 0.9860976338386536,grad_norm: 0.9521345424229378, iteration: 61512
loss: 0.9754377603530884,grad_norm: 0.8682387108610529, iteration: 61513
loss: 1.0157890319824219,grad_norm: 0.9594219642780624, iteration: 61514
loss: 1.0500938892364502,grad_norm: 0.9999993812754886, iteration: 61515
loss: 1.0128228664398193,grad_norm: 0.9999992074794823, iteration: 61516
loss: 0.9854684472084045,grad_norm: 0.8338983021339789, iteration: 61517
loss: 1.0422106981277466,grad_norm: 0.9999992018724523, iteration: 61518
loss: 0.9904971122741699,grad_norm: 0.9999991689687228, iteration: 61519
loss: 1.0102516412734985,grad_norm: 0.9999993723874531, iteration: 61520
loss: 1.0402969121932983,grad_norm: 0.9999991478788351, iteration: 61521
loss: 1.0945155620574951,grad_norm: 0.9555944655620291, iteration: 61522
loss: 0.9822693467140198,grad_norm: 0.9999998531809281, iteration: 61523
loss: 1.0484987497329712,grad_norm: 0.999999283314392, iteration: 61524
loss: 1.0306540727615356,grad_norm: 0.893119056422716, iteration: 61525
loss: 0.9528072476387024,grad_norm: 0.7739442340443157, iteration: 61526
loss: 1.0414103269577026,grad_norm: 0.9999991011514482, iteration: 61527
loss: 1.0305416584014893,grad_norm: 0.8644195505748473, iteration: 61528
loss: 1.0112355947494507,grad_norm: 0.8711032540368618, iteration: 61529
loss: 1.0392996072769165,grad_norm: 0.9999995878800225, iteration: 61530
loss: 0.9899424314498901,grad_norm: 0.9999990647272765, iteration: 61531
loss: 1.0121904611587524,grad_norm: 0.9999990485051803, iteration: 61532
loss: 1.0230857133865356,grad_norm: 0.9705882518495691, iteration: 61533
loss: 0.991871178150177,grad_norm: 0.9999989807683806, iteration: 61534
loss: 1.0609158277511597,grad_norm: 0.9999994381581793, iteration: 61535
loss: 1.0037633180618286,grad_norm: 0.999999262939454, iteration: 61536
loss: 1.0141490697860718,grad_norm: 0.9999996859164442, iteration: 61537
loss: 0.9455710053443909,grad_norm: 0.9999991858792217, iteration: 61538
loss: 0.9873138070106506,grad_norm: 0.9999994305690141, iteration: 61539
loss: 1.040529489517212,grad_norm: 0.9999990994412348, iteration: 61540
loss: 1.0630710124969482,grad_norm: 0.9999991826016881, iteration: 61541
loss: 1.0393720865249634,grad_norm: 0.9999991831125495, iteration: 61542
loss: 1.0477155447006226,grad_norm: 0.8469521487884754, iteration: 61543
loss: 1.040714979171753,grad_norm: 0.9794602768328071, iteration: 61544
loss: 0.9755179286003113,grad_norm: 0.9547868476247563, iteration: 61545
loss: 1.0382168292999268,grad_norm: 0.9196479582424493, iteration: 61546
loss: 0.9891369938850403,grad_norm: 0.99999986219806, iteration: 61547
loss: 1.0378402471542358,grad_norm: 0.9999992214994935, iteration: 61548
loss: 0.9927943348884583,grad_norm: 0.8611735649249271, iteration: 61549
loss: 0.9565960168838501,grad_norm: 0.9852763815686779, iteration: 61550
loss: 0.9814000129699707,grad_norm: 0.894962887650988, iteration: 61551
loss: 0.9881488084793091,grad_norm: 0.9999992565862409, iteration: 61552
loss: 1.0305572748184204,grad_norm: 0.9999997352033653, iteration: 61553
loss: 1.1080601215362549,grad_norm: 0.9999990095472118, iteration: 61554
loss: 1.01149320602417,grad_norm: 0.9322954260558367, iteration: 61555
loss: 1.0387290716171265,grad_norm: 0.8750025848075604, iteration: 61556
loss: 1.0253534317016602,grad_norm: 0.9623206714832147, iteration: 61557
loss: 1.022741436958313,grad_norm: 0.8330501782261674, iteration: 61558
loss: 1.02041494846344,grad_norm: 0.9999995583125005, iteration: 61559
loss: 1.0431832075119019,grad_norm: 0.9999997074171736, iteration: 61560
loss: 1.0121493339538574,grad_norm: 0.9999993709164476, iteration: 61561
loss: 0.9924046993255615,grad_norm: 0.923525075029996, iteration: 61562
loss: 0.9921988844871521,grad_norm: 0.9999993772510313, iteration: 61563
loss: 0.9862420558929443,grad_norm: 0.9654672176225273, iteration: 61564
loss: 1.0074692964553833,grad_norm: 0.9999992168132223, iteration: 61565
loss: 1.0299420356750488,grad_norm: 0.7852212141974179, iteration: 61566
loss: 0.9879858493804932,grad_norm: 0.9563664011032152, iteration: 61567
loss: 0.9947115778923035,grad_norm: 0.9009684244068847, iteration: 61568
loss: 0.9887930750846863,grad_norm: 0.999999153909134, iteration: 61569
loss: 0.988413393497467,grad_norm: 0.8375891038294211, iteration: 61570
loss: 0.9908017516136169,grad_norm: 0.9999990594947274, iteration: 61571
loss: 1.1228605508804321,grad_norm: 0.9999998106252985, iteration: 61572
loss: 1.0690537691116333,grad_norm: 0.9999992511631974, iteration: 61573
loss: 0.9759845733642578,grad_norm: 0.9999991914527372, iteration: 61574
loss: 1.050498127937317,grad_norm: 0.922034699822853, iteration: 61575
loss: 1.0273802280426025,grad_norm: 0.99999981286686, iteration: 61576
loss: 1.0446125268936157,grad_norm: 0.9999994586519004, iteration: 61577
loss: 1.067153811454773,grad_norm: 0.9999998201242981, iteration: 61578
loss: 1.0038044452667236,grad_norm: 0.9999991071009455, iteration: 61579
loss: 0.9753165245056152,grad_norm: 0.8702749702831837, iteration: 61580
loss: 1.0021284818649292,grad_norm: 0.9999991343542315, iteration: 61581
loss: 1.009212851524353,grad_norm: 0.9329173355293897, iteration: 61582
loss: 1.035972237586975,grad_norm: 0.9110561758390109, iteration: 61583
loss: 1.0269137620925903,grad_norm: 0.9999990375810367, iteration: 61584
loss: 0.9566377997398376,grad_norm: 0.8467493878059402, iteration: 61585
loss: 1.0306724309921265,grad_norm: 0.9999991067847183, iteration: 61586
loss: 0.9780750274658203,grad_norm: 0.994051921166238, iteration: 61587
loss: 0.9898272156715393,grad_norm: 0.8857809976041716, iteration: 61588
loss: 1.1321717500686646,grad_norm: 0.9999995669770791, iteration: 61589
loss: 1.1488434076309204,grad_norm: 0.9999995120049702, iteration: 61590
loss: 1.0497163534164429,grad_norm: 0.9999994466558754, iteration: 61591
loss: 0.9810618162155151,grad_norm: 0.9999990694847726, iteration: 61592
loss: 1.0026593208312988,grad_norm: 0.9999997405212736, iteration: 61593
loss: 1.0013573169708252,grad_norm: 0.8181450089321591, iteration: 61594
loss: 0.9855329990386963,grad_norm: 0.999999145617468, iteration: 61595
loss: 0.9906697273254395,grad_norm: 0.9999989915378493, iteration: 61596
loss: 1.006478190422058,grad_norm: 0.8896069630532206, iteration: 61597
loss: 0.9839778542518616,grad_norm: 0.9999991516048816, iteration: 61598
loss: 0.9934871196746826,grad_norm: 0.9189479976045662, iteration: 61599
loss: 1.007561206817627,grad_norm: 0.9260686174141879, iteration: 61600
loss: 1.006842017173767,grad_norm: 0.9999991111957863, iteration: 61601
loss: 0.9884772896766663,grad_norm: 0.7782444496836111, iteration: 61602
loss: 1.0484389066696167,grad_norm: 0.99999984427634, iteration: 61603
loss: 1.0013645887374878,grad_norm: 0.9943762565668381, iteration: 61604
loss: 0.9836706519126892,grad_norm: 0.9999990283513266, iteration: 61605
loss: 1.033798098564148,grad_norm: 0.9999991353242154, iteration: 61606
loss: 0.9847085475921631,grad_norm: 0.9999991015980779, iteration: 61607
loss: 1.0088874101638794,grad_norm: 0.9999991165493133, iteration: 61608
loss: 1.0319178104400635,grad_norm: 0.923214946579107, iteration: 61609
loss: 1.0171531438827515,grad_norm: 0.9999998055548183, iteration: 61610
loss: 1.0270816087722778,grad_norm: 0.9303900723827246, iteration: 61611
loss: 1.0542089939117432,grad_norm: 0.9999996719635041, iteration: 61612
loss: 1.0115700960159302,grad_norm: 0.9593767632340156, iteration: 61613
loss: 1.0146400928497314,grad_norm: 0.9588442257091259, iteration: 61614
loss: 1.018039345741272,grad_norm: 0.8828379608434012, iteration: 61615
loss: 0.9674016237258911,grad_norm: 0.9896574179794091, iteration: 61616
loss: 1.1438311338424683,grad_norm: 0.9999998800371753, iteration: 61617
loss: 0.999168872833252,grad_norm: 0.8294451185618581, iteration: 61618
loss: 1.0345890522003174,grad_norm: 0.9504324270934336, iteration: 61619
loss: 0.9954734444618225,grad_norm: 0.9999997174387177, iteration: 61620
loss: 1.0419384241104126,grad_norm: 0.9999992731865252, iteration: 61621
loss: 1.0080112218856812,grad_norm: 0.9999994485014682, iteration: 61622
loss: 0.9907313585281372,grad_norm: 0.9711341218442661, iteration: 61623
loss: 1.009275197982788,grad_norm: 0.8821801597678445, iteration: 61624
loss: 1.0575830936431885,grad_norm: 0.9987820029158008, iteration: 61625
loss: 1.007057785987854,grad_norm: 0.9048909434299586, iteration: 61626
loss: 1.0327564477920532,grad_norm: 0.8099506735576564, iteration: 61627
loss: 0.9948132634162903,grad_norm: 0.9999991176580656, iteration: 61628
loss: 1.0428470373153687,grad_norm: 0.9217383685593541, iteration: 61629
loss: 1.0260765552520752,grad_norm: 0.9999991651462329, iteration: 61630
loss: 1.0124324560165405,grad_norm: 0.9999990839502665, iteration: 61631
loss: 0.968105673789978,grad_norm: 0.9377613224659607, iteration: 61632
loss: 1.0288695096969604,grad_norm: 0.9999991308119727, iteration: 61633
loss: 0.985821008682251,grad_norm: 0.8293297236048071, iteration: 61634
loss: 1.012120246887207,grad_norm: 0.9500127070849124, iteration: 61635
loss: 0.9831780791282654,grad_norm: 0.9083954929393933, iteration: 61636
loss: 0.9930679798126221,grad_norm: 0.8285121538996241, iteration: 61637
loss: 0.9957326054573059,grad_norm: 0.9584456754805766, iteration: 61638
loss: 0.9635036587715149,grad_norm: 0.9051111434136939, iteration: 61639
loss: 1.0027179718017578,grad_norm: 0.9143150713340753, iteration: 61640
loss: 1.0614229440689087,grad_norm: 0.999999338219627, iteration: 61641
loss: 1.0235530138015747,grad_norm: 0.8958158871740244, iteration: 61642
loss: 0.9914420247077942,grad_norm: 0.9999991996260676, iteration: 61643
loss: 1.0745596885681152,grad_norm: 0.9999995615763048, iteration: 61644
loss: 1.0102659463882446,grad_norm: 0.999999720002716, iteration: 61645
loss: 0.9781065583229065,grad_norm: 0.9999990177621781, iteration: 61646
loss: 1.0482338666915894,grad_norm: 0.9999999032856545, iteration: 61647
loss: 1.0086740255355835,grad_norm: 0.9156871495520684, iteration: 61648
loss: 0.9843790531158447,grad_norm: 0.8536905476250543, iteration: 61649
loss: 0.9996095299720764,grad_norm: 0.8834850197339651, iteration: 61650
loss: 1.017354130744934,grad_norm: 0.8737840734177044, iteration: 61651
loss: 1.0162278413772583,grad_norm: 0.9978927399128337, iteration: 61652
loss: 1.0124419927597046,grad_norm: 0.9999991856421366, iteration: 61653
loss: 1.0882790088653564,grad_norm: 0.9999993141594768, iteration: 61654
loss: 0.9908361434936523,grad_norm: 0.9999996104884992, iteration: 61655
loss: 0.9788923859596252,grad_norm: 0.9999992670586092, iteration: 61656
loss: 1.0246213674545288,grad_norm: 0.9999991873084922, iteration: 61657
loss: 0.9737524390220642,grad_norm: 0.8495517580329421, iteration: 61658
loss: 1.0400859117507935,grad_norm: 0.9999994416364916, iteration: 61659
loss: 0.9756233096122742,grad_norm: 0.8526283105187895, iteration: 61660
loss: 1.0070804357528687,grad_norm: 0.7566684567154248, iteration: 61661
loss: 0.9912286996841431,grad_norm: 0.9699850180226247, iteration: 61662
loss: 0.9984197020530701,grad_norm: 0.9999990943903776, iteration: 61663
loss: 1.017024278640747,grad_norm: 0.8583329726708081, iteration: 61664
loss: 1.009888768196106,grad_norm: 0.9999990969735951, iteration: 61665
loss: 1.032984972000122,grad_norm: 0.8145711473768855, iteration: 61666
loss: 0.9857951998710632,grad_norm: 0.9592113660373183, iteration: 61667
loss: 0.9728516936302185,grad_norm: 0.9880336422546879, iteration: 61668
loss: 0.9967215061187744,grad_norm: 0.9628998466961035, iteration: 61669
loss: 1.0120937824249268,grad_norm: 0.9999991696350484, iteration: 61670
loss: 1.0066603422164917,grad_norm: 0.9999989289469912, iteration: 61671
loss: 1.0661728382110596,grad_norm: 0.9999996855605413, iteration: 61672
loss: 1.0190393924713135,grad_norm: 0.9682384859163021, iteration: 61673
loss: 1.0447107553482056,grad_norm: 0.999999710155756, iteration: 61674
loss: 0.96478271484375,grad_norm: 0.9999990828076952, iteration: 61675
loss: 1.0199559926986694,grad_norm: 0.8763516815912837, iteration: 61676
loss: 0.9583128094673157,grad_norm: 0.9999991677480609, iteration: 61677
loss: 1.0795879364013672,grad_norm: 0.9999997420067314, iteration: 61678
loss: 1.0347658395767212,grad_norm: 0.9999994237752593, iteration: 61679
loss: 0.9987608194351196,grad_norm: 0.7179689809234876, iteration: 61680
loss: 1.1267530918121338,grad_norm: 0.9999993755565592, iteration: 61681
loss: 1.0055204629898071,grad_norm: 0.8400389293705083, iteration: 61682
loss: 1.0048094987869263,grad_norm: 0.9222279844514342, iteration: 61683
loss: 1.0453500747680664,grad_norm: 0.9999990295808951, iteration: 61684
loss: 0.9906934499740601,grad_norm: 0.8733991343277295, iteration: 61685
loss: 1.0210233926773071,grad_norm: 0.987753456400529, iteration: 61686
loss: 0.9752899408340454,grad_norm: 0.9999990677140896, iteration: 61687
loss: 0.968245267868042,grad_norm: 0.9999990690171784, iteration: 61688
loss: 1.0282952785491943,grad_norm: 0.8420443153918928, iteration: 61689
loss: 0.9977741241455078,grad_norm: 0.9999991577493981, iteration: 61690
loss: 1.0478076934814453,grad_norm: 0.9649650093127312, iteration: 61691
loss: 1.0235737562179565,grad_norm: 0.9999992324077173, iteration: 61692
loss: 1.0008963346481323,grad_norm: 0.9999990207268893, iteration: 61693
loss: 0.9945287704467773,grad_norm: 0.9999990629888655, iteration: 61694
loss: 0.9702075719833374,grad_norm: 0.9999992102008466, iteration: 61695
loss: 1.0108819007873535,grad_norm: 0.9999990453918755, iteration: 61696
loss: 1.03789484500885,grad_norm: 0.9999991242503966, iteration: 61697
loss: 0.9956871867179871,grad_norm: 0.9487924766119454, iteration: 61698
loss: 1.025810718536377,grad_norm: 0.9127505926146312, iteration: 61699
loss: 0.990472674369812,grad_norm: 0.9956988601786055, iteration: 61700
loss: 1.0037016868591309,grad_norm: 0.920052913100962, iteration: 61701
loss: 1.0063070058822632,grad_norm: 0.9999990605267365, iteration: 61702
loss: 1.086190104484558,grad_norm: 0.9999994518035512, iteration: 61703
loss: 1.0942957401275635,grad_norm: 0.9999993909988562, iteration: 61704
loss: 1.043959379196167,grad_norm: 0.9999992414364413, iteration: 61705
loss: 1.00398588180542,grad_norm: 0.9056453426884074, iteration: 61706
loss: 0.9783689379692078,grad_norm: 0.9999993640617372, iteration: 61707
loss: 1.0304425954818726,grad_norm: 0.9999991723033987, iteration: 61708
loss: 1.0553277730941772,grad_norm: 0.9999991183228522, iteration: 61709
loss: 0.9598125219345093,grad_norm: 0.983012687241546, iteration: 61710
loss: 1.0040932893753052,grad_norm: 0.9772758687467479, iteration: 61711
loss: 1.002612590789795,grad_norm: 0.8594141516710662, iteration: 61712
loss: 1.0074121952056885,grad_norm: 0.9782949789382003, iteration: 61713
loss: 1.0252766609191895,grad_norm: 0.9999998754407273, iteration: 61714
loss: 1.1855107545852661,grad_norm: 0.9999995147087142, iteration: 61715
loss: 1.0016170740127563,grad_norm: 0.970726694040009, iteration: 61716
loss: 1.0532441139221191,grad_norm: 0.9999995344530486, iteration: 61717
loss: 1.00431489944458,grad_norm: 0.9586131760040618, iteration: 61718
loss: 1.0712707042694092,grad_norm: 0.9999996483989824, iteration: 61719
loss: 1.018086552619934,grad_norm: 0.9999991149114303, iteration: 61720
loss: 0.9977102875709534,grad_norm: 0.9999992459063826, iteration: 61721
loss: 1.0107585191726685,grad_norm: 0.8525776052908296, iteration: 61722
loss: 1.1055569648742676,grad_norm: 0.9999992667914994, iteration: 61723
loss: 1.033748984336853,grad_norm: 0.999999140261729, iteration: 61724
loss: 1.0436261892318726,grad_norm: 0.9999990655439042, iteration: 61725
loss: 1.0057346820831299,grad_norm: 0.9999992084423797, iteration: 61726
loss: 0.9880009293556213,grad_norm: 0.9999991974582693, iteration: 61727
loss: 0.9943297505378723,grad_norm: 0.8862960803750377, iteration: 61728
loss: 0.9709493517875671,grad_norm: 0.9999990944597918, iteration: 61729
loss: 0.9961461424827576,grad_norm: 0.999999115916163, iteration: 61730
loss: 1.0234848260879517,grad_norm: 0.9668568839999548, iteration: 61731
loss: 1.0128446817398071,grad_norm: 0.9024490744805377, iteration: 61732
loss: 1.0288317203521729,grad_norm: 0.999999322254692, iteration: 61733
loss: 0.9879440665245056,grad_norm: 0.8940451953221922, iteration: 61734
loss: 1.0171167850494385,grad_norm: 0.9999992965886911, iteration: 61735
loss: 0.9996110200881958,grad_norm: 0.9625327874835502, iteration: 61736
loss: 0.9684239029884338,grad_norm: 0.9999991998571183, iteration: 61737
loss: 1.081498384475708,grad_norm: 0.9999995410266146, iteration: 61738
loss: 0.9966541528701782,grad_norm: 0.9583365543495498, iteration: 61739
loss: 1.0826294422149658,grad_norm: 0.9999996852843964, iteration: 61740
loss: 1.00969398021698,grad_norm: 0.9999991686659114, iteration: 61741
loss: 1.0272774696350098,grad_norm: 0.9830566841201086, iteration: 61742
loss: 0.9926146268844604,grad_norm: 0.956913435435173, iteration: 61743
loss: 1.0187456607818604,grad_norm: 0.9999991457501075, iteration: 61744
loss: 0.9858783483505249,grad_norm: 0.9999990864679256, iteration: 61745
loss: 0.9723666906356812,grad_norm: 0.9999991086836768, iteration: 61746
loss: 1.013702154159546,grad_norm: 0.9999991502662432, iteration: 61747
loss: 0.9969642758369446,grad_norm: 0.9999990610214314, iteration: 61748
loss: 1.0077921152114868,grad_norm: 0.999999604905214, iteration: 61749
loss: 1.0474708080291748,grad_norm: 0.9999993790620341, iteration: 61750
loss: 1.1562445163726807,grad_norm: 0.9999996562990305, iteration: 61751
loss: 1.0639917850494385,grad_norm: 0.9999993147946822, iteration: 61752
loss: 1.0402721166610718,grad_norm: 0.9999991223547969, iteration: 61753
loss: 1.035057544708252,grad_norm: 0.9999991784805997, iteration: 61754
loss: 1.005157470703125,grad_norm: 0.9999992182046591, iteration: 61755
loss: 0.9866183400154114,grad_norm: 0.9999990538607677, iteration: 61756
loss: 1.0172754526138306,grad_norm: 0.9588367217770871, iteration: 61757
loss: 1.0140454769134521,grad_norm: 0.9999998805922511, iteration: 61758
loss: 0.9722235798835754,grad_norm: 0.9999991336490562, iteration: 61759
loss: 0.965453028678894,grad_norm: 0.9753870493243072, iteration: 61760
loss: 0.9973830580711365,grad_norm: 0.7890009192393026, iteration: 61761
loss: 1.0753705501556396,grad_norm: 0.9999994691849777, iteration: 61762
loss: 1.00106680393219,grad_norm: 0.9999993983063414, iteration: 61763
loss: 1.0100717544555664,grad_norm: 0.9772940735692697, iteration: 61764
loss: 1.0278292894363403,grad_norm: 0.9243002588247152, iteration: 61765
loss: 0.9544243812561035,grad_norm: 0.99999931995745, iteration: 61766
loss: 1.0053101778030396,grad_norm: 0.9999991122222675, iteration: 61767
loss: 1.0106346607208252,grad_norm: 0.9872983926438874, iteration: 61768
loss: 1.0136300325393677,grad_norm: 0.9860903897713131, iteration: 61769
loss: 1.0258960723876953,grad_norm: 0.999999530803239, iteration: 61770
loss: 1.0139107704162598,grad_norm: 0.9733563465029661, iteration: 61771
loss: 0.993031919002533,grad_norm: 0.9999994466643756, iteration: 61772
loss: 1.0222457647323608,grad_norm: 0.9999992158339337, iteration: 61773
loss: 1.0053435564041138,grad_norm: 0.9999991590462332, iteration: 61774
loss: 1.0082266330718994,grad_norm: 0.8738597730559626, iteration: 61775
loss: 1.0195871591567993,grad_norm: 0.9999992000712336, iteration: 61776
loss: 1.0150309801101685,grad_norm: 0.9999989705728963, iteration: 61777
loss: 1.0361405611038208,grad_norm: 0.9999995220413921, iteration: 61778
loss: 1.0106340646743774,grad_norm: 0.999999012932124, iteration: 61779
loss: 1.027076244354248,grad_norm: 0.9999997615146137, iteration: 61780
loss: 1.0192559957504272,grad_norm: 0.9999990357709208, iteration: 61781
loss: 1.104551076889038,grad_norm: 0.9999996401953122, iteration: 61782
loss: 1.0426117181777954,grad_norm: 0.7902908127652193, iteration: 61783
loss: 0.9513768553733826,grad_norm: 0.868168912915266, iteration: 61784
loss: 0.9872571229934692,grad_norm: 0.8476260586381321, iteration: 61785
loss: 1.0202343463897705,grad_norm: 0.9429787756241813, iteration: 61786
loss: 1.0319843292236328,grad_norm: 0.9999994303517222, iteration: 61787
loss: 1.0591559410095215,grad_norm: 0.9999994943184853, iteration: 61788
loss: 0.9876911640167236,grad_norm: 0.999999126834705, iteration: 61789
loss: 1.0119099617004395,grad_norm: 0.9999991094040208, iteration: 61790
loss: 1.0597221851348877,grad_norm: 0.9907528901992533, iteration: 61791
loss: 1.0287803411483765,grad_norm: 0.999999273248656, iteration: 61792
loss: 1.0542619228363037,grad_norm: 0.8735556849514752, iteration: 61793
loss: 1.0297324657440186,grad_norm: 0.9999991677906429, iteration: 61794
loss: 1.0244721174240112,grad_norm: 0.8989415562419099, iteration: 61795
loss: 1.0221575498580933,grad_norm: 0.9999998820180511, iteration: 61796
loss: 1.0715428590774536,grad_norm: 0.9999992271733851, iteration: 61797
loss: 1.092278242111206,grad_norm: 0.985089382064648, iteration: 61798
loss: 1.048630714416504,grad_norm: 0.9999995278499267, iteration: 61799
loss: 1.0082148313522339,grad_norm: 0.9412424257413982, iteration: 61800
loss: 0.9931620359420776,grad_norm: 0.7539349156434876, iteration: 61801
loss: 0.954076886177063,grad_norm: 0.8822848061331042, iteration: 61802
loss: 1.0358846187591553,grad_norm: 0.9999999033455929, iteration: 61803
loss: 0.990582287311554,grad_norm: 0.9999992803306446, iteration: 61804
loss: 1.0639911890029907,grad_norm: 0.999999140476039, iteration: 61805
loss: 1.0254426002502441,grad_norm: 0.9679469272775784, iteration: 61806
loss: 0.9843735694885254,grad_norm: 0.9557751538532382, iteration: 61807
loss: 1.0135576725006104,grad_norm: 0.8732343374364758, iteration: 61808
loss: 1.0025206804275513,grad_norm: 0.8122476151610765, iteration: 61809
loss: 0.990333616733551,grad_norm: 0.8143359010750584, iteration: 61810
loss: 1.008410930633545,grad_norm: 0.9999991265535086, iteration: 61811
loss: 0.9761670231819153,grad_norm: 0.9140712422985056, iteration: 61812
loss: 1.0698038339614868,grad_norm: 0.9999994921482265, iteration: 61813
loss: 1.0274810791015625,grad_norm: 0.9999994962047384, iteration: 61814
loss: 1.0172650814056396,grad_norm: 0.9999989445509697, iteration: 61815
loss: 0.9580847024917603,grad_norm: 0.9999993696392687, iteration: 61816
loss: 1.0009373426437378,grad_norm: 0.9999998477564152, iteration: 61817
loss: 0.9754555225372314,grad_norm: 0.9999991246085801, iteration: 61818
loss: 1.0182167291641235,grad_norm: 0.8867676253830509, iteration: 61819
loss: 1.1115652322769165,grad_norm: 0.9999993938424194, iteration: 61820
loss: 1.0270591974258423,grad_norm: 0.768068245736629, iteration: 61821
loss: 0.9757206439971924,grad_norm: 0.8924184926224399, iteration: 61822
loss: 1.0490914583206177,grad_norm: 0.9999990007915418, iteration: 61823
loss: 0.9996927976608276,grad_norm: 0.8165750974748163, iteration: 61824
loss: 0.9685336351394653,grad_norm: 0.9849761866188051, iteration: 61825
loss: 0.9933341145515442,grad_norm: 0.9924877903378305, iteration: 61826
loss: 1.0886659622192383,grad_norm: 0.9849327702843398, iteration: 61827
loss: 1.0023354291915894,grad_norm: 0.9999990965787043, iteration: 61828
loss: 1.0468381643295288,grad_norm: 0.9999991400849093, iteration: 61829
loss: 1.006280779838562,grad_norm: 0.9926615013864021, iteration: 61830
loss: 1.0061030387878418,grad_norm: 0.8723994051696663, iteration: 61831
loss: 0.9962354302406311,grad_norm: 0.9999991791818568, iteration: 61832
loss: 0.989320695400238,grad_norm: 0.9156436722451549, iteration: 61833
loss: 1.0577389001846313,grad_norm: 0.999999282137374, iteration: 61834
loss: 1.0009000301361084,grad_norm: 0.8431551479675231, iteration: 61835
loss: 1.0349736213684082,grad_norm: 0.9999998323333453, iteration: 61836
loss: 1.0114383697509766,grad_norm: 0.871312934746515, iteration: 61837
loss: 1.0128391981124878,grad_norm: 0.9999991194114457, iteration: 61838
loss: 1.0761951208114624,grad_norm: 0.9999995050904398, iteration: 61839
loss: 1.010452151298523,grad_norm: 0.9977441432833537, iteration: 61840
loss: 1.0972340106964111,grad_norm: 1.0000000059801923, iteration: 61841
loss: 1.015485405921936,grad_norm: 0.9999998749875271, iteration: 61842
loss: 1.1315914392471313,grad_norm: 0.9999993901040565, iteration: 61843
loss: 1.1604092121124268,grad_norm: 1.0000000102153037, iteration: 61844
loss: 0.9805806875228882,grad_norm: 0.9999991815622266, iteration: 61845
loss: 1.0564234256744385,grad_norm: 0.9999998925984185, iteration: 61846
loss: 1.008806824684143,grad_norm: 0.9999994386466902, iteration: 61847
loss: 1.034306526184082,grad_norm: 0.9999995239110264, iteration: 61848
loss: 1.0130869150161743,grad_norm: 0.8522570120954318, iteration: 61849
loss: 1.048953890800476,grad_norm: 0.9999995272737079, iteration: 61850
loss: 1.2278424501419067,grad_norm: 0.9999995275300853, iteration: 61851
loss: 0.9687851071357727,grad_norm: 0.9999990786220687, iteration: 61852
loss: 1.0085822343826294,grad_norm: 0.9999992324490795, iteration: 61853
loss: 1.0152629613876343,grad_norm: 0.7908147198883705, iteration: 61854
loss: 1.0981489419937134,grad_norm: 0.9999997929921638, iteration: 61855
loss: 1.015839695930481,grad_norm: 0.9461461994578473, iteration: 61856
loss: 1.0181673765182495,grad_norm: 0.9078031947249315, iteration: 61857
loss: 0.9864721894264221,grad_norm: 0.9999992555448424, iteration: 61858
loss: 1.179290533065796,grad_norm: 0.9999994872609048, iteration: 61859
loss: 1.0108753442764282,grad_norm: 0.946892947934017, iteration: 61860
loss: 0.9977244734764099,grad_norm: 0.9999999882297124, iteration: 61861
loss: 0.9910823702812195,grad_norm: 0.9734189970445272, iteration: 61862
loss: 1.010287880897522,grad_norm: 0.9999990815899944, iteration: 61863
loss: 1.0145561695098877,grad_norm: 0.9999999808794392, iteration: 61864
loss: 1.03464674949646,grad_norm: 0.9999993006332619, iteration: 61865
loss: 1.1239738464355469,grad_norm: 0.9999995557601612, iteration: 61866
loss: 1.0394666194915771,grad_norm: 0.9999999544255675, iteration: 61867
loss: 1.096115231513977,grad_norm: 0.8870797162316731, iteration: 61868
loss: 1.0022860765457153,grad_norm: 0.9999990858243711, iteration: 61869
loss: 1.0371116399765015,grad_norm: 0.9999998276218804, iteration: 61870
loss: 1.0462398529052734,grad_norm: 0.999999203434825, iteration: 61871
loss: 1.049935221672058,grad_norm: 0.9999991641878964, iteration: 61872
loss: 1.1474748849868774,grad_norm: 0.999999455327896, iteration: 61873
loss: 1.1598286628723145,grad_norm: 0.9999998318124554, iteration: 61874
loss: 1.2345553636550903,grad_norm: 0.9999993711567506, iteration: 61875
loss: 1.0313721895217896,grad_norm: 0.999999262045916, iteration: 61876
loss: 0.977025032043457,grad_norm: 0.9810169877945726, iteration: 61877
loss: 1.086280107498169,grad_norm: 0.9999993190902848, iteration: 61878
loss: 0.9711273908615112,grad_norm: 0.8494350261324081, iteration: 61879
loss: 1.2846993207931519,grad_norm: 0.9999994513847882, iteration: 61880
loss: 1.1146831512451172,grad_norm: 0.9999995353675825, iteration: 61881
loss: 0.9793738126754761,grad_norm: 0.9999990805925781, iteration: 61882
loss: 1.1208401918411255,grad_norm: 0.999999167285514, iteration: 61883
loss: 1.0891526937484741,grad_norm: 0.9999991528396291, iteration: 61884
loss: 1.1910529136657715,grad_norm: 0.9999997494677073, iteration: 61885
loss: 1.0785939693450928,grad_norm: 0.931752046734487, iteration: 61886
loss: 1.03620183467865,grad_norm: 0.9999991248498193, iteration: 61887
loss: 1.0252854824066162,grad_norm: 0.9408802691977572, iteration: 61888
loss: 1.0166813135147095,grad_norm: 0.9269507372894218, iteration: 61889
loss: 0.9760003685951233,grad_norm: 0.9999991621841834, iteration: 61890
loss: 1.1702840328216553,grad_norm: 0.9999997000743489, iteration: 61891
loss: 1.0438504219055176,grad_norm: 0.9999992225485933, iteration: 61892
loss: 1.049813151359558,grad_norm: 0.9999990674599503, iteration: 61893
loss: 1.049254298210144,grad_norm: 0.9988251122054788, iteration: 61894
loss: 1.2773154973983765,grad_norm: 0.9999992883570799, iteration: 61895
loss: 1.022993564605713,grad_norm: 0.9999991241668191, iteration: 61896
loss: 1.0696245431900024,grad_norm: 0.9475730323859578, iteration: 61897
loss: 1.0226867198944092,grad_norm: 0.9999991737151149, iteration: 61898
loss: 1.127768874168396,grad_norm: 0.9999994323536824, iteration: 61899
loss: 1.0566682815551758,grad_norm: 0.9968952473235699, iteration: 61900
loss: 1.0956001281738281,grad_norm: 0.9999992561985943, iteration: 61901
loss: 1.0277435779571533,grad_norm: 0.9999991767624222, iteration: 61902
loss: 1.1586867570877075,grad_norm: 0.9366662966336404, iteration: 61903
loss: 1.0413848161697388,grad_norm: 0.9999990425249653, iteration: 61904
loss: 1.138798713684082,grad_norm: 0.9999992457809567, iteration: 61905
loss: 1.1710231304168701,grad_norm: 0.9999996496065451, iteration: 61906
loss: 1.1880038976669312,grad_norm: 0.9999995507821813, iteration: 61907
loss: 1.0566091537475586,grad_norm: 0.9999991191771194, iteration: 61908
loss: 0.9890260100364685,grad_norm: 0.9999993632819422, iteration: 61909
loss: 1.2350378036499023,grad_norm: 0.9999998720886879, iteration: 61910
loss: 1.0846641063690186,grad_norm: 0.9999991485945249, iteration: 61911
loss: 0.992300271987915,grad_norm: 0.9999993157647342, iteration: 61912
loss: 1.0980035066604614,grad_norm: 0.9999992825725712, iteration: 61913
loss: 1.00285005569458,grad_norm: 0.9999991988403116, iteration: 61914
loss: 1.1817984580993652,grad_norm: 0.9999992278690418, iteration: 61915
loss: 1.1446037292480469,grad_norm: 0.9999996074217927, iteration: 61916
loss: 0.9683326482772827,grad_norm: 0.9999991897357875, iteration: 61917
loss: 1.0100054740905762,grad_norm: 0.9999991175967067, iteration: 61918
loss: 0.9883021116256714,grad_norm: 0.9999991346007499, iteration: 61919
loss: 1.0367398262023926,grad_norm: 0.9999994447914556, iteration: 61920
loss: 1.0257338285446167,grad_norm: 0.9435873537980964, iteration: 61921
loss: 1.0124919414520264,grad_norm: 0.8214507314813582, iteration: 61922
loss: 1.0429096221923828,grad_norm: 0.9999991313814316, iteration: 61923
loss: 1.054075002670288,grad_norm: 0.9999992682350864, iteration: 61924
loss: 1.1072108745574951,grad_norm: 0.9999998531580601, iteration: 61925
loss: 1.0403517484664917,grad_norm: 0.9999991179214806, iteration: 61926
loss: 1.0604329109191895,grad_norm: 0.9999999251314993, iteration: 61927
loss: 0.988763689994812,grad_norm: 0.9999990173250998, iteration: 61928
loss: 1.024470329284668,grad_norm: 0.7942901901672245, iteration: 61929
loss: 1.030233383178711,grad_norm: 0.9486139822084941, iteration: 61930
loss: 1.012853741645813,grad_norm: 0.976953772098224, iteration: 61931
loss: 1.0672024488449097,grad_norm: 0.9999993079086599, iteration: 61932
loss: 0.9829990863800049,grad_norm: 0.9704878744755143, iteration: 61933
loss: 1.0435901880264282,grad_norm: 0.9999997490051279, iteration: 61934
loss: 1.021274209022522,grad_norm: 0.9999992505006403, iteration: 61935
loss: 0.9930548071861267,grad_norm: 0.9999992477651102, iteration: 61936
loss: 1.0362069606781006,grad_norm: 0.9999992059779323, iteration: 61937
loss: 0.9973285794258118,grad_norm: 0.9999999055158089, iteration: 61938
loss: 1.035037875175476,grad_norm: 0.796364228994369, iteration: 61939
loss: 1.1308176517486572,grad_norm: 0.9999997052070374, iteration: 61940
loss: 1.1333931684494019,grad_norm: 0.9999991895744348, iteration: 61941
loss: 1.0145012140274048,grad_norm: 0.9999995881043952, iteration: 61942
loss: 0.9953877329826355,grad_norm: 0.9999990278431251, iteration: 61943
loss: 1.0447536706924438,grad_norm: 0.9999998729067386, iteration: 61944
loss: 1.0482524633407593,grad_norm: 0.9999998410966525, iteration: 61945
loss: 1.0345497131347656,grad_norm: 0.937128314811037, iteration: 61946
loss: 1.0224943161010742,grad_norm: 0.999999522747097, iteration: 61947
loss: 1.101064682006836,grad_norm: 0.9999990504960578, iteration: 61948
loss: 1.0327214002609253,grad_norm: 0.9999990944413648, iteration: 61949
loss: 1.0274696350097656,grad_norm: 0.999999600569669, iteration: 61950
loss: 1.0247608423233032,grad_norm: 0.9999996426128965, iteration: 61951
loss: 1.0106918811798096,grad_norm: 0.9999990482342278, iteration: 61952
loss: 1.0133676528930664,grad_norm: 0.8839364629673772, iteration: 61953
loss: 1.099940299987793,grad_norm: 0.9999993478891862, iteration: 61954
loss: 1.0714476108551025,grad_norm: 0.9999998354119514, iteration: 61955
loss: 1.0094504356384277,grad_norm: 0.9285277046464425, iteration: 61956
loss: 1.0059707164764404,grad_norm: 0.977400516713275, iteration: 61957
loss: 0.9859908819198608,grad_norm: 0.9291529648160369, iteration: 61958
loss: 1.0170855522155762,grad_norm: 0.9999990361881747, iteration: 61959
loss: 1.0162705183029175,grad_norm: 0.9999989600003356, iteration: 61960
loss: 0.984270453453064,grad_norm: 0.9999990472159204, iteration: 61961
loss: 1.0090287923812866,grad_norm: 0.7933095487141009, iteration: 61962
loss: 1.040779948234558,grad_norm: 0.9999995238626761, iteration: 61963
loss: 0.998805582523346,grad_norm: 0.8325724155933295, iteration: 61964
loss: 0.9552048444747925,grad_norm: 0.9999992787361124, iteration: 61965
loss: 0.9826744198799133,grad_norm: 0.8792192043581573, iteration: 61966
loss: 1.0160949230194092,grad_norm: 0.9999992560526947, iteration: 61967
loss: 1.0125633478164673,grad_norm: 0.7820022750791489, iteration: 61968
loss: 1.0656100511550903,grad_norm: 0.9999995394374024, iteration: 61969
loss: 1.013049840927124,grad_norm: 0.9765563660118924, iteration: 61970
loss: 0.9997000098228455,grad_norm: 0.8627126733283202, iteration: 61971
loss: 1.028918981552124,grad_norm: 0.9999992859502509, iteration: 61972
loss: 1.004794716835022,grad_norm: 0.9999997339075148, iteration: 61973
loss: 1.0652450323104858,grad_norm: 0.9999994372874451, iteration: 61974
loss: 0.9988994598388672,grad_norm: 0.7984782894020027, iteration: 61975
loss: 0.9849465489387512,grad_norm: 0.9999990364007397, iteration: 61976
loss: 1.017946481704712,grad_norm: 0.7670100094577588, iteration: 61977
loss: 0.9356034398078918,grad_norm: 0.9368304342392895, iteration: 61978
loss: 1.0245343446731567,grad_norm: 0.9800882700496221, iteration: 61979
loss: 0.9854894876480103,grad_norm: 0.9999990299247351, iteration: 61980
loss: 1.0939475297927856,grad_norm: 0.9999995292475669, iteration: 61981
loss: 1.0360068082809448,grad_norm: 0.9999997335487424, iteration: 61982
loss: 1.0206862688064575,grad_norm: 0.9537843517709268, iteration: 61983
loss: 1.042816400527954,grad_norm: 0.9999992243691024, iteration: 61984
loss: 1.020137071609497,grad_norm: 0.9999990519602775, iteration: 61985
loss: 1.0175706148147583,grad_norm: 0.9999991382386205, iteration: 61986
loss: 1.014846920967102,grad_norm: 0.7358670059508768, iteration: 61987
loss: 1.011989951133728,grad_norm: 0.9999997426231734, iteration: 61988
loss: 0.9658045172691345,grad_norm: 0.9688677286478967, iteration: 61989
loss: 0.9790496826171875,grad_norm: 0.9999992736931265, iteration: 61990
loss: 1.0365842580795288,grad_norm: 0.9999995644690093, iteration: 61991
loss: 0.9942514896392822,grad_norm: 0.8699366683517415, iteration: 61992
loss: 0.9800332188606262,grad_norm: 0.8903583003628942, iteration: 61993
loss: 1.0343118906021118,grad_norm: 0.9999991253771119, iteration: 61994
loss: 1.0282591581344604,grad_norm: 0.886615395888869, iteration: 61995
loss: 1.0200412273406982,grad_norm: 0.999999383192785, iteration: 61996
loss: 1.0208618640899658,grad_norm: 0.8453715390585793, iteration: 61997
loss: 0.994818925857544,grad_norm: 0.9484245498771786, iteration: 61998
loss: 1.0055971145629883,grad_norm: 0.7919273811687979, iteration: 61999
loss: 1.002727746963501,grad_norm: 0.9999990458203116, iteration: 62000
loss: 1.0031825304031372,grad_norm: 0.9999991419064839, iteration: 62001
loss: 0.9962663054466248,grad_norm: 0.7247083229824954, iteration: 62002
loss: 1.0658851861953735,grad_norm: 0.9999991126921921, iteration: 62003
loss: 1.0535027980804443,grad_norm: 0.9435648213790411, iteration: 62004
loss: 0.9957630038261414,grad_norm: 0.9984589430303389, iteration: 62005
loss: 1.0177960395812988,grad_norm: 0.9999991169643575, iteration: 62006
loss: 0.9906638860702515,grad_norm: 0.999999130622567, iteration: 62007
loss: 0.9961521029472351,grad_norm: 0.9999991050449465, iteration: 62008
loss: 1.0222686529159546,grad_norm: 0.9999990914642304, iteration: 62009
loss: 1.024930715560913,grad_norm: 0.9605023730443712, iteration: 62010
loss: 1.03757643699646,grad_norm: 0.9999990675590293, iteration: 62011
loss: 1.0016261339187622,grad_norm: 0.9566575925255474, iteration: 62012
loss: 1.0149810314178467,grad_norm: 0.9999989900088677, iteration: 62013
loss: 1.0314536094665527,grad_norm: 0.9509779469684939, iteration: 62014
loss: 1.0126115083694458,grad_norm: 0.9999990534051728, iteration: 62015
loss: 1.0106840133666992,grad_norm: 0.9999992834743259, iteration: 62016
loss: 1.0111680030822754,grad_norm: 0.9999992612205018, iteration: 62017
loss: 0.9876500368118286,grad_norm: 0.9999992399739105, iteration: 62018
loss: 0.9831763505935669,grad_norm: 0.8784798566718349, iteration: 62019
loss: 0.9897541999816895,grad_norm: 0.7960369029067302, iteration: 62020
loss: 0.9981535077095032,grad_norm: 0.9428409535647662, iteration: 62021
loss: 0.9962401986122131,grad_norm: 0.9246528291386606, iteration: 62022
loss: 1.046273112297058,grad_norm: 0.9999988903242552, iteration: 62023
loss: 1.0754365921020508,grad_norm: 0.9999992285798172, iteration: 62024
loss: 1.0066566467285156,grad_norm: 0.9198950471869394, iteration: 62025
loss: 1.0323008298873901,grad_norm: 0.8719478801708214, iteration: 62026
loss: 0.9543120265007019,grad_norm: 0.9625743207778154, iteration: 62027
loss: 0.9808480739593506,grad_norm: 0.855090216072955, iteration: 62028
loss: 0.9754821062088013,grad_norm: 0.7918533995896119, iteration: 62029
loss: 1.0205574035644531,grad_norm: 0.9999991189630105, iteration: 62030
loss: 1.0266066789627075,grad_norm: 0.9431558672464253, iteration: 62031
loss: 0.9951248168945312,grad_norm: 0.7283890142971098, iteration: 62032
loss: 1.0125890970230103,grad_norm: 0.9784660362021912, iteration: 62033
loss: 0.998564600944519,grad_norm: 0.9999991367833604, iteration: 62034
loss: 0.9636332988739014,grad_norm: 0.7489983940493282, iteration: 62035
loss: 1.032382607460022,grad_norm: 0.9999991898490087, iteration: 62036
loss: 1.0166012048721313,grad_norm: 0.9999991516077151, iteration: 62037
loss: 1.0310693979263306,grad_norm: 0.893658173387535, iteration: 62038
loss: 1.0147826671600342,grad_norm: 0.8376196309342512, iteration: 62039
loss: 0.9813014268875122,grad_norm: 0.9654532514933925, iteration: 62040
loss: 1.0411473512649536,grad_norm: 0.916813057934487, iteration: 62041
loss: 1.0699228048324585,grad_norm: 0.9534346755408963, iteration: 62042
loss: 0.9766888618469238,grad_norm: 0.9999991086559207, iteration: 62043
loss: 0.9686533808708191,grad_norm: 0.9999992155429326, iteration: 62044
loss: 1.0320460796356201,grad_norm: 0.9217440112910138, iteration: 62045
loss: 1.0233756303787231,grad_norm: 0.8672136931346771, iteration: 62046
loss: 1.0117446184158325,grad_norm: 0.9999992081196927, iteration: 62047
loss: 1.0635950565338135,grad_norm: 0.9999998416716112, iteration: 62048
loss: 1.0100451707839966,grad_norm: 0.8646872860743812, iteration: 62049
loss: 1.0216844081878662,grad_norm: 0.9999990808561092, iteration: 62050
loss: 1.0602211952209473,grad_norm: 0.9999991240268045, iteration: 62051
loss: 0.9785588383674622,grad_norm: 0.8860850588558417, iteration: 62052
loss: 1.0107203722000122,grad_norm: 0.8969261169340248, iteration: 62053
loss: 1.0381933450698853,grad_norm: 0.9999992161350516, iteration: 62054
loss: 1.0155867338180542,grad_norm: 0.9999998756460865, iteration: 62055
loss: 1.013623833656311,grad_norm: 0.999999057626132, iteration: 62056
loss: 1.0059758424758911,grad_norm: 0.7728939418626547, iteration: 62057
loss: 1.0766527652740479,grad_norm: 0.8966551357131878, iteration: 62058
loss: 0.9920925498008728,grad_norm: 0.8884124417364635, iteration: 62059
loss: 1.019204020500183,grad_norm: 0.828321596205083, iteration: 62060
loss: 0.9970403909683228,grad_norm: 0.9046701040078713, iteration: 62061
loss: 0.9864974021911621,grad_norm: 0.9999991954245707, iteration: 62062
loss: 1.0011835098266602,grad_norm: 0.94946687325986, iteration: 62063
loss: 1.0069072246551514,grad_norm: 0.9999991833374554, iteration: 62064
loss: 1.0342836380004883,grad_norm: 0.9952059715720027, iteration: 62065
loss: 1.0089043378829956,grad_norm: 0.9043849732398391, iteration: 62066
loss: 0.9939606785774231,grad_norm: 0.8331884501224605, iteration: 62067
loss: 0.9800184965133667,grad_norm: 0.9401879356941838, iteration: 62068
loss: 1.0021660327911377,grad_norm: 0.8677796443704358, iteration: 62069
loss: 1.052291989326477,grad_norm: 0.8629286529624676, iteration: 62070
loss: 1.0313737392425537,grad_norm: 0.9370008242459967, iteration: 62071
loss: 1.0462058782577515,grad_norm: 0.999999796033788, iteration: 62072
loss: 1.0301058292388916,grad_norm: 0.9494436180474234, iteration: 62073
loss: 1.0210309028625488,grad_norm: 0.9999993361052287, iteration: 62074
loss: 1.0284922122955322,grad_norm: 0.9999990187739302, iteration: 62075
loss: 1.0004618167877197,grad_norm: 0.9218336091243177, iteration: 62076
loss: 1.0136688947677612,grad_norm: 0.8014147721952147, iteration: 62077
loss: 1.0687124729156494,grad_norm: 0.9999995311691083, iteration: 62078
loss: 1.0186634063720703,grad_norm: 0.999999101593923, iteration: 62079
loss: 0.9907258749008179,grad_norm: 0.9999992169540837, iteration: 62080
loss: 1.0037437677383423,grad_norm: 0.912037017573406, iteration: 62081
loss: 1.0314618349075317,grad_norm: 0.9999993037049215, iteration: 62082
loss: 0.9827578663825989,grad_norm: 0.8817265995542819, iteration: 62083
loss: 0.9720891714096069,grad_norm: 0.9423875467002322, iteration: 62084
loss: 1.0029760599136353,grad_norm: 0.9999993319159112, iteration: 62085
loss: 1.0101255178451538,grad_norm: 0.8094047763123968, iteration: 62086
loss: 1.0239697694778442,grad_norm: 0.8646518796548809, iteration: 62087
loss: 1.0272797346115112,grad_norm: 0.948781724408493, iteration: 62088
loss: 1.0032134056091309,grad_norm: 0.7873421246190544, iteration: 62089
loss: 0.9797754287719727,grad_norm: 0.8481955476337549, iteration: 62090
loss: 1.1079344749450684,grad_norm: 1.0000000286587354, iteration: 62091
loss: 0.9867305755615234,grad_norm: 0.9792723567400047, iteration: 62092
loss: 0.9742320775985718,grad_norm: 0.8851356822684618, iteration: 62093
loss: 0.9885017275810242,grad_norm: 0.8673155908916941, iteration: 62094
loss: 1.0271525382995605,grad_norm: 0.9999993250778502, iteration: 62095
loss: 0.9949411749839783,grad_norm: 0.9999989328373994, iteration: 62096
loss: 1.0173457860946655,grad_norm: 0.9999990022247953, iteration: 62097
loss: 1.0259038209915161,grad_norm: 0.9078284865507966, iteration: 62098
loss: 0.9947092533111572,grad_norm: 0.9999991570164041, iteration: 62099
loss: 0.9574153423309326,grad_norm: 0.7677870985397234, iteration: 62100
loss: 1.0371513366699219,grad_norm: 0.8275317575678818, iteration: 62101
loss: 1.0053565502166748,grad_norm: 0.7800024591265743, iteration: 62102
loss: 1.0281635522842407,grad_norm: 0.9999991316258584, iteration: 62103
loss: 1.0523487329483032,grad_norm: 0.9926272163530769, iteration: 62104
loss: 1.0207633972167969,grad_norm: 0.8930869296907638, iteration: 62105
loss: 0.9797939658164978,grad_norm: 0.9534012510261575, iteration: 62106
loss: 1.021291971206665,grad_norm: 0.9991084827374869, iteration: 62107
loss: 0.9720641374588013,grad_norm: 0.9197242851364298, iteration: 62108
loss: 0.9676657915115356,grad_norm: 0.9442908763105319, iteration: 62109
loss: 0.9866237640380859,grad_norm: 0.9999991661405515, iteration: 62110
loss: 1.0126839876174927,grad_norm: 0.7588486098993081, iteration: 62111
loss: 1.0197803974151611,grad_norm: 0.8803147928221556, iteration: 62112
loss: 1.0843992233276367,grad_norm: 0.9932724821080628, iteration: 62113
loss: 0.9917457103729248,grad_norm: 0.9484117164216517, iteration: 62114
loss: 0.9917120337486267,grad_norm: 0.7825149483655431, iteration: 62115
loss: 1.0280556678771973,grad_norm: 0.9999992018682738, iteration: 62116
loss: 1.0282458066940308,grad_norm: 0.9999995572368618, iteration: 62117
loss: 1.0523667335510254,grad_norm: 0.989194058978142, iteration: 62118
loss: 0.9995959401130676,grad_norm: 0.9999991216597541, iteration: 62119
loss: 1.0241426229476929,grad_norm: 0.9140763773656181, iteration: 62120
loss: 1.0238173007965088,grad_norm: 0.8563642450185593, iteration: 62121
loss: 0.9956275820732117,grad_norm: 0.9999991507057467, iteration: 62122
loss: 1.040496826171875,grad_norm: 0.9999992576991253, iteration: 62123
loss: 1.0132437944412231,grad_norm: 0.9838701233568633, iteration: 62124
loss: 1.0051188468933105,grad_norm: 0.9999991070855263, iteration: 62125
loss: 1.0875588655471802,grad_norm: 0.9999995362530598, iteration: 62126
loss: 1.0010340213775635,grad_norm: 0.9999990809561992, iteration: 62127
loss: 1.005271077156067,grad_norm: 0.999999388803624, iteration: 62128
loss: 0.9708547592163086,grad_norm: 0.9999989785520276, iteration: 62129
loss: 1.0174013376235962,grad_norm: 0.9999999082523081, iteration: 62130
loss: 0.9953222274780273,grad_norm: 0.9999995561153149, iteration: 62131
loss: 0.999127209186554,grad_norm: 0.8783993869489033, iteration: 62132
loss: 1.0098868608474731,grad_norm: 0.984886846448519, iteration: 62133
loss: 0.9834740161895752,grad_norm: 0.8891462148486716, iteration: 62134
loss: 1.053261637687683,grad_norm: 0.999999034246236, iteration: 62135
loss: 1.0094281435012817,grad_norm: 0.8888348007780178, iteration: 62136
loss: 1.0146325826644897,grad_norm: 0.8528230353551173, iteration: 62137
loss: 1.0207111835479736,grad_norm: 0.957837586699572, iteration: 62138
loss: 1.0346457958221436,grad_norm: 0.9381734756170256, iteration: 62139
loss: 1.0235613584518433,grad_norm: 0.999999268098921, iteration: 62140
loss: 1.035210371017456,grad_norm: 0.9309373157639405, iteration: 62141
loss: 1.0391392707824707,grad_norm: 0.9999992362705723, iteration: 62142
loss: 1.0121928453445435,grad_norm: 0.9185151606712176, iteration: 62143
loss: 1.0297664403915405,grad_norm: 0.9999992818819208, iteration: 62144
loss: 0.987944483757019,grad_norm: 0.8983420783609122, iteration: 62145
loss: 0.9724612832069397,grad_norm: 0.7942247834140885, iteration: 62146
loss: 0.9754192233085632,grad_norm: 0.8078060665967242, iteration: 62147
loss: 1.3698068857192993,grad_norm: 0.9999999210277547, iteration: 62148
loss: 1.0107817649841309,grad_norm: 0.8670148702620071, iteration: 62149
loss: 1.00757896900177,grad_norm: 0.862262392315021, iteration: 62150
loss: 1.0262137651443481,grad_norm: 0.999999349927149, iteration: 62151
loss: 1.0038799047470093,grad_norm: 0.8824628705977281, iteration: 62152
loss: 1.0173238515853882,grad_norm: 0.8880071638769338, iteration: 62153
loss: 0.98177170753479,grad_norm: 0.9653438409842037, iteration: 62154
loss: 0.9723919034004211,grad_norm: 0.9766312081939723, iteration: 62155
loss: 0.9642380475997925,grad_norm: 0.9485786708372062, iteration: 62156
loss: 1.0306975841522217,grad_norm: 0.9708896278851924, iteration: 62157
loss: 1.0029170513153076,grad_norm: 0.9999990626638441, iteration: 62158
loss: 0.9511780142784119,grad_norm: 0.9999991244976333, iteration: 62159
loss: 0.9892370104789734,grad_norm: 0.7960282580805522, iteration: 62160
loss: 0.9804865717887878,grad_norm: 0.8711102566269655, iteration: 62161
loss: 0.9721980094909668,grad_norm: 0.9999992281599621, iteration: 62162
loss: 0.9747739434242249,grad_norm: 0.7576963259902983, iteration: 62163
loss: 0.9629473686218262,grad_norm: 0.9999990414750044, iteration: 62164
loss: 0.9822407960891724,grad_norm: 0.8407366131066398, iteration: 62165
loss: 1.0039639472961426,grad_norm: 0.9999992769662305, iteration: 62166
loss: 1.0055545568466187,grad_norm: 0.8289399557941903, iteration: 62167
loss: 0.9997949004173279,grad_norm: 0.9999995176571416, iteration: 62168
loss: 1.1921433210372925,grad_norm: 0.9999992506922928, iteration: 62169
loss: 0.9634680151939392,grad_norm: 0.8536157318905846, iteration: 62170
loss: 0.9952940344810486,grad_norm: 0.8589304664935519, iteration: 62171
loss: 0.9923422932624817,grad_norm: 0.8972552877181583, iteration: 62172
loss: 1.046574354171753,grad_norm: 0.999999218228864, iteration: 62173
loss: 1.0756206512451172,grad_norm: 0.9999990888455933, iteration: 62174
loss: 1.0255273580551147,grad_norm: 0.9064992980278265, iteration: 62175
loss: 0.9808230400085449,grad_norm: 0.8876546982096849, iteration: 62176
loss: 0.9943344593048096,grad_norm: 0.9999990013543683, iteration: 62177
loss: 1.0185985565185547,grad_norm: 0.9999998819735326, iteration: 62178
loss: 0.9860191941261292,grad_norm: 0.9999992465787554, iteration: 62179
loss: 1.3597456216812134,grad_norm: 0.9999998686868943, iteration: 62180
loss: 1.0934075117111206,grad_norm: 0.9999995920214831, iteration: 62181
loss: 1.10603666305542,grad_norm: 0.9999995196102414, iteration: 62182
loss: 1.0292778015136719,grad_norm: 0.9999990714538691, iteration: 62183
loss: 0.9440492987632751,grad_norm: 0.8763829032770073, iteration: 62184
loss: 1.002920150756836,grad_norm: 0.8131699005126134, iteration: 62185
loss: 1.2694936990737915,grad_norm: 1.0000000243755829, iteration: 62186
loss: 1.1656638383865356,grad_norm: 0.9999993891794069, iteration: 62187
loss: 1.1077958345413208,grad_norm: 0.9999993238557061, iteration: 62188
loss: 1.2029435634613037,grad_norm: 0.9999994691771313, iteration: 62189
loss: 0.9779951572418213,grad_norm: 0.9999990600285216, iteration: 62190
loss: 0.9979105591773987,grad_norm: 0.8897659181361144, iteration: 62191
loss: 1.2505706548690796,grad_norm: 0.9999994222037072, iteration: 62192
loss: 1.019790530204773,grad_norm: 0.9999993782266647, iteration: 62193
loss: 1.0317730903625488,grad_norm: 0.9783426815140837, iteration: 62194
loss: 1.0333424806594849,grad_norm: 0.9999993802591503, iteration: 62195
loss: 1.0079975128173828,grad_norm: 0.9999994657003447, iteration: 62196
loss: 1.0201164484024048,grad_norm: 0.9999991952252716, iteration: 62197
loss: 0.9913524985313416,grad_norm: 0.9999991188256061, iteration: 62198
loss: 1.128979206085205,grad_norm: 0.9999990431158509, iteration: 62199
loss: 0.9728694558143616,grad_norm: 0.9999993877833582, iteration: 62200
loss: 1.0298715829849243,grad_norm: 0.9999993161602587, iteration: 62201
loss: 1.0648273229599,grad_norm: 0.9999998905865586, iteration: 62202
loss: 0.9997166395187378,grad_norm: 0.9999990801048145, iteration: 62203
loss: 1.0070011615753174,grad_norm: 0.8845355811699639, iteration: 62204
loss: 1.0415889024734497,grad_norm: 0.9999990583044569, iteration: 62205
loss: 1.0434218645095825,grad_norm: 0.9999992761327255, iteration: 62206
loss: 0.9805296063423157,grad_norm: 0.8974455447680283, iteration: 62207
loss: 1.0927019119262695,grad_norm: 0.9999996864059764, iteration: 62208
loss: 1.0622217655181885,grad_norm: 0.9999992657984752, iteration: 62209
loss: 1.0085221529006958,grad_norm: 0.9393902508834397, iteration: 62210
loss: 0.968548059463501,grad_norm: 0.7840409503746942, iteration: 62211
loss: 1.0095086097717285,grad_norm: 0.9999994366208761, iteration: 62212
loss: 1.013216257095337,grad_norm: 0.8386227928997299, iteration: 62213
loss: 0.992236316204071,grad_norm: 0.921098726537616, iteration: 62214
loss: 1.0168207883834839,grad_norm: 0.9999992605878499, iteration: 62215
loss: 0.9568309783935547,grad_norm: 0.8719708082380883, iteration: 62216
loss: 1.0054283142089844,grad_norm: 0.9502049583355606, iteration: 62217
loss: 1.0173919200897217,grad_norm: 0.9999991121860878, iteration: 62218
loss: 1.017586588859558,grad_norm: 0.9808414291412303, iteration: 62219
loss: 0.9813695549964905,grad_norm: 0.9999991091705221, iteration: 62220
loss: 1.0151413679122925,grad_norm: 0.8882475413645283, iteration: 62221
loss: 0.9849849939346313,grad_norm: 0.9999993634580493, iteration: 62222
loss: 0.975907027721405,grad_norm: 0.933117291445834, iteration: 62223
loss: 1.0159809589385986,grad_norm: 0.9999991023083165, iteration: 62224
loss: 1.0024778842926025,grad_norm: 0.8955912689210219, iteration: 62225
loss: 0.9962054491043091,grad_norm: 0.9999990045017799, iteration: 62226
loss: 1.002741813659668,grad_norm: 0.8195055289181831, iteration: 62227
loss: 0.9847542643547058,grad_norm: 0.9999990896785955, iteration: 62228
loss: 1.0311921834945679,grad_norm: 0.9930427934495308, iteration: 62229
loss: 1.061755657196045,grad_norm: 0.9999997442766735, iteration: 62230
loss: 1.0170187950134277,grad_norm: 0.9999993043257875, iteration: 62231
loss: 1.0560975074768066,grad_norm: 0.9999990440844698, iteration: 62232
loss: 0.9946138858795166,grad_norm: 0.9999989613354393, iteration: 62233
loss: 1.0103833675384521,grad_norm: 0.9999994675736243, iteration: 62234
loss: 1.0140362977981567,grad_norm: 0.8132572917218841, iteration: 62235
loss: 0.9871054887771606,grad_norm: 0.9403493879404521, iteration: 62236
loss: 0.9839140176773071,grad_norm: 0.8864201397396677, iteration: 62237
loss: 1.0178834199905396,grad_norm: 0.9999993918643153, iteration: 62238
loss: 0.9835413694381714,grad_norm: 0.8955218139101337, iteration: 62239
loss: 0.9855619072914124,grad_norm: 0.9446343336937806, iteration: 62240
loss: 1.0346734523773193,grad_norm: 0.9999992515275582, iteration: 62241
loss: 0.9871019721031189,grad_norm: 0.7852411981103771, iteration: 62242
loss: 1.0796633958816528,grad_norm: 0.9999998204929832, iteration: 62243
loss: 1.0262030363082886,grad_norm: 0.9999992396194095, iteration: 62244
loss: 0.9828678369522095,grad_norm: 0.8470480892886192, iteration: 62245
loss: 0.9949811100959778,grad_norm: 0.9999992592669676, iteration: 62246
loss: 1.0465013980865479,grad_norm: 0.9999994163461494, iteration: 62247
loss: 0.9803089499473572,grad_norm: 0.8003582642606273, iteration: 62248
loss: 0.9992577433586121,grad_norm: 0.6709801422770001, iteration: 62249
loss: 1.0337132215499878,grad_norm: 0.9999996303648221, iteration: 62250
loss: 1.0050843954086304,grad_norm: 0.9999991162432809, iteration: 62251
loss: 1.086655616760254,grad_norm: 0.9999991881443051, iteration: 62252
loss: 1.0070974826812744,grad_norm: 0.9382416785428757, iteration: 62253
loss: 1.0373201370239258,grad_norm: 0.9999992490398022, iteration: 62254
loss: 0.97068852186203,grad_norm: 0.9122765845917173, iteration: 62255
loss: 0.9738323092460632,grad_norm: 0.9695466993566845, iteration: 62256
loss: 1.0039703845977783,grad_norm: 0.937150403234136, iteration: 62257
loss: 0.9890912771224976,grad_norm: 0.9999991646224804, iteration: 62258
loss: 1.1058671474456787,grad_norm: 0.9999992977804307, iteration: 62259
loss: 1.0141757726669312,grad_norm: 0.9999991851420559, iteration: 62260
loss: 1.0188665390014648,grad_norm: 0.8158412589544033, iteration: 62261
loss: 0.9845471978187561,grad_norm: 0.9999995424329042, iteration: 62262
loss: 1.0692781209945679,grad_norm: 0.9999998197584796, iteration: 62263
loss: 1.0287142992019653,grad_norm: 0.9999989976935612, iteration: 62264
loss: 0.972419261932373,grad_norm: 0.8008885485900107, iteration: 62265
loss: 1.0056471824645996,grad_norm: 0.9026290801265017, iteration: 62266
loss: 0.9873760938644409,grad_norm: 0.9999990026558336, iteration: 62267
loss: 0.9755399823188782,grad_norm: 0.9597816076892038, iteration: 62268
loss: 0.9855571389198303,grad_norm: 0.8817733170800169, iteration: 62269
loss: 1.0042226314544678,grad_norm: 0.9898603919166437, iteration: 62270
loss: 1.0034762620925903,grad_norm: 0.9852284140431682, iteration: 62271
loss: 0.9569838047027588,grad_norm: 0.9999991559835186, iteration: 62272
loss: 1.0445740222930908,grad_norm: 0.999999099912843, iteration: 62273
loss: 1.0037580728530884,grad_norm: 0.8380661364656351, iteration: 62274
loss: 0.9743863344192505,grad_norm: 0.7692971666200277, iteration: 62275
loss: 1.0382088422775269,grad_norm: 0.783431199641622, iteration: 62276
loss: 0.9744871258735657,grad_norm: 0.9749206386700949, iteration: 62277
loss: 0.9928719401359558,grad_norm: 0.8734790234328305, iteration: 62278
loss: 0.9922152161598206,grad_norm: 0.9685668582253036, iteration: 62279
loss: 0.9892895221710205,grad_norm: 0.9999994336156641, iteration: 62280
loss: 1.0203043222427368,grad_norm: 0.9748057308069262, iteration: 62281
loss: 0.9783142805099487,grad_norm: 0.9999991666533166, iteration: 62282
loss: 1.0231966972351074,grad_norm: 0.9999994886439508, iteration: 62283
loss: 1.0346368551254272,grad_norm: 0.9999990617812795, iteration: 62284
loss: 0.9920580387115479,grad_norm: 0.8364373402648116, iteration: 62285
loss: 1.0427089929580688,grad_norm: 0.831066342937887, iteration: 62286
loss: 0.9619272351264954,grad_norm: 0.999999208657335, iteration: 62287
loss: 0.9983780980110168,grad_norm: 0.9999993944965018, iteration: 62288
loss: 1.0021334886550903,grad_norm: 0.8576647359471283, iteration: 62289
loss: 0.9934653043746948,grad_norm: 0.9927642667140306, iteration: 62290
loss: 1.0002044439315796,grad_norm: 0.963711752947054, iteration: 62291
loss: 0.9828664660453796,grad_norm: 0.7621495133157653, iteration: 62292
loss: 0.977906346321106,grad_norm: 0.9867255245751969, iteration: 62293
loss: 1.045884132385254,grad_norm: 0.9999992803349496, iteration: 62294
loss: 1.0449230670928955,grad_norm: 0.9999993880314374, iteration: 62295
loss: 1.019553303718567,grad_norm: 0.8252938607356747, iteration: 62296
loss: 1.0096954107284546,grad_norm: 0.9148366309625943, iteration: 62297
loss: 0.9554342031478882,grad_norm: 0.9999991909670036, iteration: 62298
loss: 1.0214775800704956,grad_norm: 0.9999991413540881, iteration: 62299
loss: 0.999652624130249,grad_norm: 0.9240543720756395, iteration: 62300
loss: 1.0098322629928589,grad_norm: 0.9883394142703229, iteration: 62301
loss: 0.9944862723350525,grad_norm: 0.9999991783953291, iteration: 62302
loss: 1.1138396263122559,grad_norm: 0.9038627593810942, iteration: 62303
loss: 1.0038431882858276,grad_norm: 0.9931712513306172, iteration: 62304
loss: 1.0641932487487793,grad_norm: 0.9999990930214671, iteration: 62305
loss: 1.0242513418197632,grad_norm: 0.9936169734144501, iteration: 62306
loss: 1.0360369682312012,grad_norm: 0.8114634712103143, iteration: 62307
loss: 0.9990273714065552,grad_norm: 0.9137259708032649, iteration: 62308
loss: 1.0357532501220703,grad_norm: 0.9999991305906607, iteration: 62309
loss: 1.0667760372161865,grad_norm: 0.9999998641794252, iteration: 62310
loss: 1.0280985832214355,grad_norm: 0.9360585650846376, iteration: 62311
loss: 1.0314483642578125,grad_norm: 0.9481098049837127, iteration: 62312
loss: 1.0109002590179443,grad_norm: 0.8479567551087502, iteration: 62313
loss: 0.9759129285812378,grad_norm: 0.7984235123634918, iteration: 62314
loss: 1.0385563373565674,grad_norm: 0.8807036741752574, iteration: 62315
loss: 0.9909312129020691,grad_norm: 0.9999991799054128, iteration: 62316
loss: 1.02664053440094,grad_norm: 0.9999996842923087, iteration: 62317
loss: 0.9703041315078735,grad_norm: 0.8346359604466794, iteration: 62318
loss: 1.0403488874435425,grad_norm: 0.8944404201101049, iteration: 62319
loss: 1.2292417287826538,grad_norm: 0.9999999817426031, iteration: 62320
loss: 1.0204784870147705,grad_norm: 0.9999990550873054, iteration: 62321
loss: 1.012740969657898,grad_norm: 0.9999991015527636, iteration: 62322
loss: 1.0230001211166382,grad_norm: 0.999999314720897, iteration: 62323
loss: 1.0288071632385254,grad_norm: 0.9249832056531999, iteration: 62324
loss: 0.9958963990211487,grad_norm: 0.9999991258180261, iteration: 62325
loss: 1.0003843307495117,grad_norm: 0.8762249405795905, iteration: 62326
loss: 1.0334234237670898,grad_norm: 0.9776149415805022, iteration: 62327
loss: 1.0185619592666626,grad_norm: 0.8576273365669711, iteration: 62328
loss: 1.0084818601608276,grad_norm: 0.9999991842549701, iteration: 62329
loss: 0.9717708826065063,grad_norm: 0.9696754094553345, iteration: 62330
loss: 0.9967361688613892,grad_norm: 0.7499499507408736, iteration: 62331
loss: 1.0644176006317139,grad_norm: 0.9999995011439583, iteration: 62332
loss: 1.0453940629959106,grad_norm: 0.9999995308502786, iteration: 62333
loss: 1.0121772289276123,grad_norm: 0.9999990152222965, iteration: 62334
loss: 1.005854845046997,grad_norm: 0.9272967879856249, iteration: 62335
loss: 0.9744089245796204,grad_norm: 0.9999991465814404, iteration: 62336
loss: 0.9910513758659363,grad_norm: 0.9999990020509099, iteration: 62337
loss: 0.9896258115768433,grad_norm: 0.9504488029185343, iteration: 62338
loss: 0.9726839065551758,grad_norm: 0.9706564485685865, iteration: 62339
loss: 0.9834854006767273,grad_norm: 0.9200029902447692, iteration: 62340
loss: 1.037050724029541,grad_norm: 0.9024479374648865, iteration: 62341
loss: 0.9891861081123352,grad_norm: 0.8483424897199237, iteration: 62342
loss: 1.0184215307235718,grad_norm: 0.9999998446119245, iteration: 62343
loss: 1.0543231964111328,grad_norm: 0.9494606623614938, iteration: 62344
loss: 0.9615022540092468,grad_norm: 0.8954186803358372, iteration: 62345
loss: 0.9372568726539612,grad_norm: 0.819139624052373, iteration: 62346
loss: 0.984618067741394,grad_norm: 0.9999989888040267, iteration: 62347
loss: 0.9710193276405334,grad_norm: 0.9999989985289465, iteration: 62348
loss: 0.9824930429458618,grad_norm: 0.9999991747550858, iteration: 62349
loss: 0.9895285367965698,grad_norm: 0.9268131394090684, iteration: 62350
loss: 1.0617674589157104,grad_norm: 0.9999991116334127, iteration: 62351
loss: 1.0062559843063354,grad_norm: 0.8424343097676443, iteration: 62352
loss: 0.99100261926651,grad_norm: 0.9999989933580188, iteration: 62353
loss: 1.017721176147461,grad_norm: 0.8783211683791349, iteration: 62354
loss: 1.008285403251648,grad_norm: 0.8390014255849345, iteration: 62355
loss: 1.0167434215545654,grad_norm: 0.9999996685859454, iteration: 62356
loss: 0.9983547925949097,grad_norm: 0.9613931325424131, iteration: 62357
loss: 0.9748291969299316,grad_norm: 0.8828871875556228, iteration: 62358
loss: 1.021898865699768,grad_norm: 0.9999992912090713, iteration: 62359
loss: 1.0331926345825195,grad_norm: 0.8466348694671375, iteration: 62360
loss: 1.0129653215408325,grad_norm: 0.9999990990214861, iteration: 62361
loss: 1.0306061506271362,grad_norm: 0.7895363290358316, iteration: 62362
loss: 0.9542784094810486,grad_norm: 0.9999990054688325, iteration: 62363
loss: 1.0101381540298462,grad_norm: 0.9999991075667468, iteration: 62364
loss: 1.0153568983078003,grad_norm: 0.986865449667634, iteration: 62365
loss: 1.0082082748413086,grad_norm: 0.999999085294712, iteration: 62366
loss: 0.9734111428260803,grad_norm: 0.9756461889408593, iteration: 62367
loss: 0.9884200096130371,grad_norm: 0.9820605667446553, iteration: 62368
loss: 1.0054398775100708,grad_norm: 0.8526469023396585, iteration: 62369
loss: 1.036837100982666,grad_norm: 0.9999999078062227, iteration: 62370
loss: 0.9881324172019958,grad_norm: 0.9999991065511743, iteration: 62371
loss: 0.9895130395889282,grad_norm: 0.8419530888872959, iteration: 62372
loss: 1.011412501335144,grad_norm: 0.8898778255903929, iteration: 62373
loss: 1.0043425559997559,grad_norm: 0.999999698029346, iteration: 62374
loss: 1.023254632949829,grad_norm: 0.8245008221408804, iteration: 62375
loss: 0.9836192727088928,grad_norm: 0.8897444046039514, iteration: 62376
loss: 1.0393010377883911,grad_norm: 0.9318590215648892, iteration: 62377
loss: 0.9750214219093323,grad_norm: 0.9999991463134221, iteration: 62378
loss: 1.0604519844055176,grad_norm: 0.999998980816347, iteration: 62379
loss: 1.0055283308029175,grad_norm: 0.9170126647542511, iteration: 62380
loss: 1.043880581855774,grad_norm: 0.8725437764224837, iteration: 62381
loss: 1.0158437490463257,grad_norm: 0.8817734643430106, iteration: 62382
loss: 0.9874096512794495,grad_norm: 0.9618776289085568, iteration: 62383
loss: 0.9992724061012268,grad_norm: 0.8590471470964367, iteration: 62384
loss: 0.9928903579711914,grad_norm: 0.9999990028513145, iteration: 62385
loss: 0.9779491424560547,grad_norm: 0.8489713266160653, iteration: 62386
loss: 1.0302603244781494,grad_norm: 0.9999993187711006, iteration: 62387
loss: 0.9984496831893921,grad_norm: 0.9999994831401029, iteration: 62388
loss: 1.0182735919952393,grad_norm: 0.8583773266329935, iteration: 62389
loss: 1.0369017124176025,grad_norm: 0.9416097146363018, iteration: 62390
loss: 0.9874061346054077,grad_norm: 0.8369144895959795, iteration: 62391
loss: 1.0177617073059082,grad_norm: 0.861241156161559, iteration: 62392
loss: 0.9865456223487854,grad_norm: 0.9375692948897676, iteration: 62393
loss: 0.9807166457176208,grad_norm: 0.9999990910401843, iteration: 62394
loss: 1.011958360671997,grad_norm: 0.999999308687257, iteration: 62395
loss: 1.031875491142273,grad_norm: 0.9970121007441529, iteration: 62396
loss: 1.1106078624725342,grad_norm: 0.9999995403692974, iteration: 62397
loss: 0.9953393340110779,grad_norm: 0.8893722497472332, iteration: 62398
loss: 0.9660137295722961,grad_norm: 0.9999992293734276, iteration: 62399
loss: 1.0255534648895264,grad_norm: 0.8299421142802685, iteration: 62400
loss: 0.9936524033546448,grad_norm: 0.999999548992276, iteration: 62401
loss: 1.0083565711975098,grad_norm: 0.8156288052119314, iteration: 62402
loss: 0.991575300693512,grad_norm: 0.9497047259451744, iteration: 62403
loss: 0.9738568663597107,grad_norm: 0.9597185572873319, iteration: 62404
loss: 1.0401417016983032,grad_norm: 0.9999993131308074, iteration: 62405
loss: 1.010828971862793,grad_norm: 0.8121062987132521, iteration: 62406
loss: 1.0051031112670898,grad_norm: 0.9483055507978354, iteration: 62407
loss: 0.9628714323043823,grad_norm: 0.8813018198507494, iteration: 62408
loss: 1.0199223756790161,grad_norm: 0.8128388882903828, iteration: 62409
loss: 0.9961521625518799,grad_norm: 0.9077630945299626, iteration: 62410
loss: 0.9970229268074036,grad_norm: 0.9999991813333977, iteration: 62411
loss: 1.008565902709961,grad_norm: 0.7966278848694632, iteration: 62412
loss: 1.0036295652389526,grad_norm: 0.9754934722946568, iteration: 62413
loss: 0.98985356092453,grad_norm: 0.7924742910690215, iteration: 62414
loss: 0.9762817025184631,grad_norm: 0.9999990671581972, iteration: 62415
loss: 1.049602746963501,grad_norm: 0.9999990936162646, iteration: 62416
loss: 0.9699275493621826,grad_norm: 0.7654670695535264, iteration: 62417
loss: 1.006418228149414,grad_norm: 0.9277862507358197, iteration: 62418
loss: 1.0097233057022095,grad_norm: 0.8871106532469627, iteration: 62419
loss: 0.981570303440094,grad_norm: 0.9509977269427395, iteration: 62420
loss: 1.0081393718719482,grad_norm: 0.9999990794948826, iteration: 62421
loss: 1.0099047422409058,grad_norm: 0.9267837706621361, iteration: 62422
loss: 0.9865451455116272,grad_norm: 0.9075063014225487, iteration: 62423
loss: 0.9790471792221069,grad_norm: 0.7880522198209405, iteration: 62424
loss: 1.0263768434524536,grad_norm: 0.999999947360602, iteration: 62425
loss: 1.0480259656906128,grad_norm: 0.9999991041950355, iteration: 62426
loss: 1.0048789978027344,grad_norm: 0.8926082521882506, iteration: 62427
loss: 0.9993203282356262,grad_norm: 0.910501740588974, iteration: 62428
loss: 0.9879392981529236,grad_norm: 0.999999132971854, iteration: 62429
loss: 1.0104025602340698,grad_norm: 0.8935926949537613, iteration: 62430
loss: 0.9794630408287048,grad_norm: 0.7977260780284022, iteration: 62431
loss: 0.9705914855003357,grad_norm: 0.9999992415252521, iteration: 62432
loss: 1.0176372528076172,grad_norm: 0.9362070594315935, iteration: 62433
loss: 0.9448236227035522,grad_norm: 0.9240376095010069, iteration: 62434
loss: 1.018836498260498,grad_norm: 0.9953255961591775, iteration: 62435
loss: 1.0096235275268555,grad_norm: 0.9773949071260779, iteration: 62436
loss: 1.0097455978393555,grad_norm: 0.9012818201878665, iteration: 62437
loss: 1.00731360912323,grad_norm: 0.8956747966765678, iteration: 62438
loss: 1.0274479389190674,grad_norm: 0.9960315833691199, iteration: 62439
loss: 1.0033519268035889,grad_norm: 0.7293401904462214, iteration: 62440
loss: 1.0017117261886597,grad_norm: 0.9588998595114155, iteration: 62441
loss: 1.045387625694275,grad_norm: 0.9999999151013381, iteration: 62442
loss: 1.0265817642211914,grad_norm: 0.9028365644656412, iteration: 62443
loss: 1.0391685962677002,grad_norm: 0.7772256842462341, iteration: 62444
loss: 0.9831714630126953,grad_norm: 0.9999993801973016, iteration: 62445
loss: 1.0799168348312378,grad_norm: 0.9199895897577628, iteration: 62446
loss: 1.057134747505188,grad_norm: 0.9954108332043059, iteration: 62447
loss: 0.9754098653793335,grad_norm: 0.9999991398383961, iteration: 62448
loss: 1.0320744514465332,grad_norm: 0.999999432005406, iteration: 62449
loss: 1.016626000404358,grad_norm: 0.9999990019757669, iteration: 62450
loss: 1.0199410915374756,grad_norm: 0.999999619352466, iteration: 62451
loss: 1.011583924293518,grad_norm: 0.9999997429193288, iteration: 62452
loss: 1.0131194591522217,grad_norm: 0.999999590964459, iteration: 62453
loss: 1.0151164531707764,grad_norm: 0.8905173666000374, iteration: 62454
loss: 1.0816060304641724,grad_norm: 0.9999999407877016, iteration: 62455
loss: 0.9747896790504456,grad_norm: 0.9999990189505165, iteration: 62456
loss: 0.9862738847732544,grad_norm: 0.9559949218833894, iteration: 62457
loss: 1.009436845779419,grad_norm: 0.9999990262307533, iteration: 62458
loss: 0.9913551211357117,grad_norm: 0.8693600865604016, iteration: 62459
loss: 1.0065104961395264,grad_norm: 0.8261508685397566, iteration: 62460
loss: 0.9959484338760376,grad_norm: 0.999999285711034, iteration: 62461
loss: 1.0732954740524292,grad_norm: 0.9999998310251285, iteration: 62462
loss: 1.1596580743789673,grad_norm: 0.9999993913755167, iteration: 62463
loss: 1.0041431188583374,grad_norm: 0.9999991151297043, iteration: 62464
loss: 1.0178338289260864,grad_norm: 0.9999990978369551, iteration: 62465
loss: 0.9794684052467346,grad_norm: 0.8690565789460156, iteration: 62466
loss: 0.98158860206604,grad_norm: 0.9999993057979792, iteration: 62467
loss: 1.0250205993652344,grad_norm: 0.7647126183868261, iteration: 62468
loss: 0.9514740705490112,grad_norm: 0.8269393969674147, iteration: 62469
loss: 1.0200480222702026,grad_norm: 0.9999995115579564, iteration: 62470
loss: 0.9867095947265625,grad_norm: 0.8659192832426956, iteration: 62471
loss: 1.0078679323196411,grad_norm: 0.7905995901596524, iteration: 62472
loss: 1.0644657611846924,grad_norm: 0.9999996490425572, iteration: 62473
loss: 1.014527678489685,grad_norm: 0.903968414417356, iteration: 62474
loss: 1.0303467512130737,grad_norm: 0.904756219860971, iteration: 62475
loss: 0.9882379174232483,grad_norm: 0.9793133362099598, iteration: 62476
loss: 1.0149861574172974,grad_norm: 0.9137602553030268, iteration: 62477
loss: 0.9606898427009583,grad_norm: 0.8760116183181089, iteration: 62478
loss: 0.9932175874710083,grad_norm: 0.8939485616611783, iteration: 62479
loss: 0.9922536611557007,grad_norm: 0.8365963156053317, iteration: 62480
loss: 1.0350346565246582,grad_norm: 0.7877681330565062, iteration: 62481
loss: 1.046165108680725,grad_norm: 0.9999993244829347, iteration: 62482
loss: 0.9884475469589233,grad_norm: 0.8620201979508142, iteration: 62483
loss: 1.0487788915634155,grad_norm: 0.999999755012862, iteration: 62484
loss: 0.9796872138977051,grad_norm: 0.9999991683202084, iteration: 62485
loss: 1.0534414052963257,grad_norm: 0.9999996241615808, iteration: 62486
loss: 1.0219740867614746,grad_norm: 0.9999993340280857, iteration: 62487
loss: 1.0228089094161987,grad_norm: 0.8862576777670605, iteration: 62488
loss: 1.0137182474136353,grad_norm: 0.9999996038176865, iteration: 62489
loss: 1.0208649635314941,grad_norm: 0.9999993118798144, iteration: 62490
loss: 1.0320438146591187,grad_norm: 0.9999995185793317, iteration: 62491
loss: 1.0276129245758057,grad_norm: 0.8644078700119113, iteration: 62492
loss: 1.0478005409240723,grad_norm: 0.7823585904395137, iteration: 62493
loss: 1.043716311454773,grad_norm: 0.9999991854103916, iteration: 62494
loss: 0.9741209745407104,grad_norm: 0.9054012844971879, iteration: 62495
loss: 1.0062984228134155,grad_norm: 0.9999990683524829, iteration: 62496
loss: 1.0110852718353271,grad_norm: 0.7965178303758914, iteration: 62497
loss: 0.9972398281097412,grad_norm: 0.9879244959613757, iteration: 62498
loss: 0.9759619832038879,grad_norm: 0.988647165989886, iteration: 62499
loss: 1.0910741090774536,grad_norm: 0.9999992657422803, iteration: 62500
loss: 1.0221821069717407,grad_norm: 0.9366677708744454, iteration: 62501
loss: 0.999764084815979,grad_norm: 0.9999989702005913, iteration: 62502
loss: 1.069771647453308,grad_norm: 0.9999999130163058, iteration: 62503
loss: 0.9818968176841736,grad_norm: 0.9999991807408551, iteration: 62504
loss: 1.0097973346710205,grad_norm: 0.9228474863724541, iteration: 62505
loss: 0.9790390133857727,grad_norm: 0.9999990784673698, iteration: 62506
loss: 1.0850074291229248,grad_norm: 0.9999997151344774, iteration: 62507
loss: 0.9853705763816833,grad_norm: 0.8329127884423313, iteration: 62508
loss: 0.9977737069129944,grad_norm: 0.9790148085286148, iteration: 62509
loss: 1.068971037864685,grad_norm: 0.9999996554704689, iteration: 62510
loss: 1.0175776481628418,grad_norm: 0.9999993681361888, iteration: 62511
loss: 0.9894533753395081,grad_norm: 0.9999993062974617, iteration: 62512
loss: 1.0059664249420166,grad_norm: 0.7654924541752677, iteration: 62513
loss: 1.046311378479004,grad_norm: 0.8332489333979115, iteration: 62514
loss: 1.0068142414093018,grad_norm: 0.8024922647480311, iteration: 62515
loss: 1.009688138961792,grad_norm: 0.9999995618327062, iteration: 62516
loss: 0.9931047558784485,grad_norm: 0.9027996207438213, iteration: 62517
loss: 0.9977812767028809,grad_norm: 0.999999453844807, iteration: 62518
loss: 0.9704787135124207,grad_norm: 0.8916579146070236, iteration: 62519
loss: 1.0110883712768555,grad_norm: 0.9591458799869901, iteration: 62520
loss: 1.0230798721313477,grad_norm: 0.824561205113302, iteration: 62521
loss: 1.0139760971069336,grad_norm: 0.8839050465320624, iteration: 62522
loss: 1.0387355089187622,grad_norm: 0.7972003679520858, iteration: 62523
loss: 1.0365263223648071,grad_norm: 0.9999994296193435, iteration: 62524
loss: 1.0251245498657227,grad_norm: 0.9999990657179225, iteration: 62525
loss: 0.9976626038551331,grad_norm: 0.9480228451317636, iteration: 62526
loss: 1.0301814079284668,grad_norm: 0.9791282745100548, iteration: 62527
loss: 1.0579692125320435,grad_norm: 0.9984003378167088, iteration: 62528
loss: 1.0799964666366577,grad_norm: 0.9999992657840835, iteration: 62529
loss: 0.9852688312530518,grad_norm: 0.9975358100604494, iteration: 62530
loss: 1.0156598091125488,grad_norm: 0.9299106079483183, iteration: 62531
loss: 1.0646424293518066,grad_norm: 0.9999996832474229, iteration: 62532
loss: 1.0090744495391846,grad_norm: 0.9999993132859458, iteration: 62533
loss: 1.0176374912261963,grad_norm: 0.9999993068099057, iteration: 62534
loss: 1.149234414100647,grad_norm: 0.9999990441958403, iteration: 62535
loss: 1.0364693403244019,grad_norm: 0.944422182803633, iteration: 62536
loss: 1.0046989917755127,grad_norm: 0.9999998662583549, iteration: 62537
loss: 1.0071660280227661,grad_norm: 0.9999993008108417, iteration: 62538
loss: 1.0231045484542847,grad_norm: 0.9131622551539244, iteration: 62539
loss: 1.0293545722961426,grad_norm: 0.9999994371129415, iteration: 62540
loss: 1.0053369998931885,grad_norm: 0.999999374385872, iteration: 62541
loss: 1.00788152217865,grad_norm: 0.9999992773670822, iteration: 62542
loss: 1.0550140142440796,grad_norm: 0.9999989839416233, iteration: 62543
loss: 0.989260733127594,grad_norm: 0.9999993609938804, iteration: 62544
loss: 1.0551245212554932,grad_norm: 0.9582018459993435, iteration: 62545
loss: 1.054181456565857,grad_norm: 0.9999992629572722, iteration: 62546
loss: 1.0111098289489746,grad_norm: 0.9999995357603275, iteration: 62547
loss: 1.0198500156402588,grad_norm: 0.9999991055600703, iteration: 62548
loss: 1.144715428352356,grad_norm: 0.999999955063615, iteration: 62549
loss: 1.0125051736831665,grad_norm: 1.0000000054726372, iteration: 62550
loss: 0.9668270349502563,grad_norm: 0.8673313672063252, iteration: 62551
loss: 0.9918970465660095,grad_norm: 0.9999990187972121, iteration: 62552
loss: 0.9968904256820679,grad_norm: 0.8320327287676808, iteration: 62553
loss: 1.0289593935012817,grad_norm: 0.9999994420667297, iteration: 62554
loss: 0.9955422282218933,grad_norm: 0.8714350483266727, iteration: 62555
loss: 1.024605393409729,grad_norm: 0.8995152672285095, iteration: 62556
loss: 1.0039587020874023,grad_norm: 0.9100336367702326, iteration: 62557
loss: 1.0115529298782349,grad_norm: 0.9999995515619482, iteration: 62558
loss: 0.9980913996696472,grad_norm: 0.9999992660399507, iteration: 62559
loss: 1.0162817239761353,grad_norm: 0.9031250046410151, iteration: 62560
loss: 1.0577081441879272,grad_norm: 0.8007159767350032, iteration: 62561
loss: 0.9889580011367798,grad_norm: 0.9999997149426898, iteration: 62562
loss: 1.0085119009017944,grad_norm: 0.9999997254693543, iteration: 62563
loss: 0.9779364466667175,grad_norm: 0.7731919515462957, iteration: 62564
loss: 1.0920138359069824,grad_norm: 0.9999992338261267, iteration: 62565
loss: 0.9974534511566162,grad_norm: 0.9607243921649805, iteration: 62566
loss: 0.9913477301597595,grad_norm: 0.9453810860662565, iteration: 62567
loss: 1.0640884637832642,grad_norm: 0.9999997206176896, iteration: 62568
loss: 1.0556477308273315,grad_norm: 0.9999993020097506, iteration: 62569
loss: 0.9897622466087341,grad_norm: 0.9999993808128157, iteration: 62570
loss: 1.0007081031799316,grad_norm: 0.802399127551807, iteration: 62571
loss: 1.0051106214523315,grad_norm: 0.9851731109776651, iteration: 62572
loss: 1.0075160264968872,grad_norm: 0.999999876961376, iteration: 62573
loss: 1.0133874416351318,grad_norm: 0.9999992137118638, iteration: 62574
loss: 1.0370055437088013,grad_norm: 0.9602321399811266, iteration: 62575
loss: 1.0009634494781494,grad_norm: 0.9366727014748638, iteration: 62576
loss: 1.0322692394256592,grad_norm: 0.9999990119104933, iteration: 62577
loss: 1.0234884023666382,grad_norm: 0.8300405494932763, iteration: 62578
loss: 1.028713583946228,grad_norm: 0.9784879535759688, iteration: 62579
loss: 1.0207802057266235,grad_norm: 0.9999997094222327, iteration: 62580
loss: 1.0123151540756226,grad_norm: 0.9999996220633163, iteration: 62581
loss: 1.0379548072814941,grad_norm: 0.9999992151996576, iteration: 62582
loss: 1.019817590713501,grad_norm: 0.9999991319791304, iteration: 62583
loss: 1.0126456022262573,grad_norm: 0.9303782276445501, iteration: 62584
loss: 1.0507643222808838,grad_norm: 0.9999994942765023, iteration: 62585
loss: 0.9981786608695984,grad_norm: 0.868177279801289, iteration: 62586
loss: 1.0310677289962769,grad_norm: 0.9999994077802716, iteration: 62587
loss: 1.0162845849990845,grad_norm: 0.9435932784014546, iteration: 62588
loss: 1.0063116550445557,grad_norm: 0.8804472311146055, iteration: 62589
loss: 1.0303419828414917,grad_norm: 0.9999993608364782, iteration: 62590
loss: 1.026062250137329,grad_norm: 0.9999992735967476, iteration: 62591
loss: 0.9897605776786804,grad_norm: 0.8836826906808269, iteration: 62592
loss: 0.9948059916496277,grad_norm: 0.8850034769544761, iteration: 62593
loss: 1.0092359781265259,grad_norm: 0.9999990139934896, iteration: 62594
loss: 1.0240548849105835,grad_norm: 0.9999994886923989, iteration: 62595
loss: 1.0044270753860474,grad_norm: 0.7330686800379288, iteration: 62596
loss: 0.9911476969718933,grad_norm: 0.9217325509288707, iteration: 62597
loss: 1.0005303621292114,grad_norm: 0.8882353501177935, iteration: 62598
loss: 1.0197455883026123,grad_norm: 0.9999996693830452, iteration: 62599
loss: 1.0399038791656494,grad_norm: 0.8617788916338709, iteration: 62600
loss: 1.1065700054168701,grad_norm: 0.9999993597998249, iteration: 62601
loss: 1.0870968103408813,grad_norm: 0.9999999345440441, iteration: 62602
loss: 0.989436149597168,grad_norm: 0.9775458107092597, iteration: 62603
loss: 1.0171077251434326,grad_norm: 0.8109803938546551, iteration: 62604
loss: 0.9888753890991211,grad_norm: 0.9999991248237868, iteration: 62605
loss: 0.9972621202468872,grad_norm: 0.9999991492354967, iteration: 62606
loss: 0.9928860068321228,grad_norm: 0.8732205237471441, iteration: 62607
loss: 0.9888429641723633,grad_norm: 0.820224491461636, iteration: 62608
loss: 1.0119572877883911,grad_norm: 0.9999992318585721, iteration: 62609
loss: 1.0292589664459229,grad_norm: 0.9062104453658203, iteration: 62610
loss: 0.998594343662262,grad_norm: 0.9999990294184405, iteration: 62611
loss: 1.0270600318908691,grad_norm: 0.9999991874332764, iteration: 62612
loss: 0.9896417260169983,grad_norm: 0.9999991253809185, iteration: 62613
loss: 0.9866442680358887,grad_norm: 0.9109633178028426, iteration: 62614
loss: 1.0357450246810913,grad_norm: 0.9049038567853547, iteration: 62615
loss: 0.9979696273803711,grad_norm: 0.9326313451040423, iteration: 62616
loss: 0.9901466965675354,grad_norm: 0.8937855113757652, iteration: 62617
loss: 0.9746990203857422,grad_norm: 0.9999993163566856, iteration: 62618
loss: 1.033646821975708,grad_norm: 0.999999253491118, iteration: 62619
loss: 1.0225260257720947,grad_norm: 0.9999998560000919, iteration: 62620
loss: 1.0083786249160767,grad_norm: 0.8512667721348551, iteration: 62621
loss: 0.979347288608551,grad_norm: 0.9420430798343917, iteration: 62622
loss: 0.9827383160591125,grad_norm: 0.9963912205854469, iteration: 62623
loss: 1.0074238777160645,grad_norm: 0.9999991330846317, iteration: 62624
loss: 0.9580413699150085,grad_norm: 0.9999990651569668, iteration: 62625
loss: 0.9972326755523682,grad_norm: 0.9999992537158658, iteration: 62626
loss: 0.9921790361404419,grad_norm: 0.8464658292467454, iteration: 62627
loss: 0.9966301918029785,grad_norm: 0.944953646832714, iteration: 62628
loss: 1.0440901517868042,grad_norm: 0.9999996658192044, iteration: 62629
loss: 1.0283715724945068,grad_norm: 0.9999991095873367, iteration: 62630
loss: 1.0069369077682495,grad_norm: 0.8448856250241453, iteration: 62631
loss: 1.006001353263855,grad_norm: 0.9776811639771661, iteration: 62632
loss: 1.0185471773147583,grad_norm: 0.8867798838005211, iteration: 62633
loss: 0.9732998013496399,grad_norm: 0.9275065653741855, iteration: 62634
loss: 1.0502017736434937,grad_norm: 0.999999120847581, iteration: 62635
loss: 1.0421212911605835,grad_norm: 0.9820832280410583, iteration: 62636
loss: 0.996768057346344,grad_norm: 0.999999810089315, iteration: 62637
loss: 1.0080386400222778,grad_norm: 0.9216207453439152, iteration: 62638
loss: 0.9718918800354004,grad_norm: 0.7702919972209877, iteration: 62639
loss: 0.9864840507507324,grad_norm: 0.9779676271418283, iteration: 62640
loss: 1.0008875131607056,grad_norm: 0.9999993405820433, iteration: 62641
loss: 1.0134350061416626,grad_norm: 0.9999989705898956, iteration: 62642
loss: 1.0717772245407104,grad_norm: 0.9999995908744287, iteration: 62643
loss: 0.9998466968536377,grad_norm: 0.8983757053404747, iteration: 62644
loss: 1.0140206813812256,grad_norm: 0.9565370785034739, iteration: 62645
loss: 0.9932199120521545,grad_norm: 0.9413128297066689, iteration: 62646
loss: 1.0629173517227173,grad_norm: 0.9999993846430418, iteration: 62647
loss: 1.0173654556274414,grad_norm: 0.9999999319013242, iteration: 62648
loss: 1.058976173400879,grad_norm: 0.9999995351827484, iteration: 62649
loss: 1.0203676223754883,grad_norm: 0.9636476928216731, iteration: 62650
loss: 1.0376583337783813,grad_norm: 0.9999991672305446, iteration: 62651
loss: 1.0735814571380615,grad_norm: 0.9999990414241904, iteration: 62652
loss: 1.022363543510437,grad_norm: 0.9999992625343783, iteration: 62653
loss: 1.0758881568908691,grad_norm: 0.9999991350504692, iteration: 62654
loss: 1.1006888151168823,grad_norm: 0.99999987574366, iteration: 62655
loss: 1.0210561752319336,grad_norm: 0.9999989872596223, iteration: 62656
loss: 1.004441499710083,grad_norm: 0.9999990795286309, iteration: 62657
loss: 0.9946190118789673,grad_norm: 0.9244977776749095, iteration: 62658
loss: 0.9916277527809143,grad_norm: 0.9213050659015819, iteration: 62659
loss: 0.9801074862480164,grad_norm: 0.8795681055797054, iteration: 62660
loss: 1.0570591688156128,grad_norm: 0.9999994762738319, iteration: 62661
loss: 1.0549540519714355,grad_norm: 0.9999996786597178, iteration: 62662
loss: 1.0469096899032593,grad_norm: 0.9999997703689519, iteration: 62663
loss: 1.313575029373169,grad_norm: 0.9999994078123419, iteration: 62664
loss: 1.1389905214309692,grad_norm: 0.9999995855033199, iteration: 62665
loss: 1.0259581804275513,grad_norm: 0.9999996483005363, iteration: 62666
loss: 1.1027355194091797,grad_norm: 0.9999996316549526, iteration: 62667
loss: 1.011410117149353,grad_norm: 0.9999992148481522, iteration: 62668
loss: 1.0208379030227661,grad_norm: 0.9999993648881196, iteration: 62669
loss: 1.1004130840301514,grad_norm: 0.999999736513286, iteration: 62670
loss: 1.062609314918518,grad_norm: 0.9788598444623237, iteration: 62671
loss: 1.053452968597412,grad_norm: 0.9999994664394588, iteration: 62672
loss: 1.1207304000854492,grad_norm: 0.9609320275829194, iteration: 62673
loss: 1.043123483657837,grad_norm: 0.9999996269481525, iteration: 62674
loss: 1.0124623775482178,grad_norm: 0.9999992850527254, iteration: 62675
loss: 1.0294885635375977,grad_norm: 0.9999994088794832, iteration: 62676
loss: 1.0246037244796753,grad_norm: 0.9999994483405891, iteration: 62677
loss: 1.0276174545288086,grad_norm: 0.9053272933062773, iteration: 62678
loss: 1.0200122594833374,grad_norm: 0.9999997346803561, iteration: 62679
loss: 1.100465178489685,grad_norm: 0.9999994545202703, iteration: 62680
loss: 1.0315055847167969,grad_norm: 0.9999993352834293, iteration: 62681
loss: 1.032997727394104,grad_norm: 0.9999990685078154, iteration: 62682
loss: 1.0292235612869263,grad_norm: 0.8416419311926674, iteration: 62683
loss: 1.0111145973205566,grad_norm: 0.8126743890385788, iteration: 62684
loss: 1.058367133140564,grad_norm: 0.9999992483794016, iteration: 62685
loss: 1.0635104179382324,grad_norm: 0.9999997442920544, iteration: 62686
loss: 1.0271351337432861,grad_norm: 0.9999999414032124, iteration: 62687
loss: 1.0481998920440674,grad_norm: 0.9999993089135906, iteration: 62688
loss: 1.0014781951904297,grad_norm: 0.8641687822627829, iteration: 62689
loss: 1.0241376161575317,grad_norm: 0.9999993511265939, iteration: 62690
loss: 1.0387077331542969,grad_norm: 0.9457730906095528, iteration: 62691
loss: 1.015071988105774,grad_norm: 0.9110994111895508, iteration: 62692
loss: 1.0018607378005981,grad_norm: 0.9600382065101319, iteration: 62693
loss: 0.9516907930374146,grad_norm: 0.8911001813752524, iteration: 62694
loss: 1.0253043174743652,grad_norm: 0.9999993633703621, iteration: 62695
loss: 1.1461507081985474,grad_norm: 0.9999997066238812, iteration: 62696
loss: 1.0115647315979004,grad_norm: 0.9999991685204495, iteration: 62697
loss: 0.9975451231002808,grad_norm: 0.7745474465126059, iteration: 62698
loss: 1.0099650621414185,grad_norm: 0.8740228901554954, iteration: 62699
loss: 1.0168977975845337,grad_norm: 0.9999989922371343, iteration: 62700
loss: 1.0645862817764282,grad_norm: 0.9999999422257418, iteration: 62701
loss: 1.039047122001648,grad_norm: 0.9999995533297868, iteration: 62702
loss: 1.0444743633270264,grad_norm: 0.9839560208899221, iteration: 62703
loss: 1.0316128730773926,grad_norm: 0.9999996336919423, iteration: 62704
loss: 1.009116768836975,grad_norm: 0.9999991658807765, iteration: 62705
loss: 1.0060516595840454,grad_norm: 0.8604666244649267, iteration: 62706
loss: 1.0288410186767578,grad_norm: 0.9999992554577551, iteration: 62707
loss: 0.9820734858512878,grad_norm: 0.7289932849252257, iteration: 62708
loss: 0.9967155456542969,grad_norm: 0.8516042037418703, iteration: 62709
loss: 0.9982267022132874,grad_norm: 0.9999993188227946, iteration: 62710
loss: 1.0482063293457031,grad_norm: 0.9999995303360885, iteration: 62711
loss: 1.0292479991912842,grad_norm: 0.9999992234085453, iteration: 62712
loss: 1.0235508680343628,grad_norm: 0.9999990783754391, iteration: 62713
loss: 1.0107512474060059,grad_norm: 0.9999991820892948, iteration: 62714
loss: 0.9869582056999207,grad_norm: 0.8739694862185915, iteration: 62715
loss: 1.002126932144165,grad_norm: 0.9674635992883657, iteration: 62716
loss: 1.0408753156661987,grad_norm: 0.959092764449555, iteration: 62717
loss: 1.1261509656906128,grad_norm: 0.9999997073809173, iteration: 62718
loss: 0.9712367653846741,grad_norm: 0.9999991440689089, iteration: 62719
loss: 1.0353261232376099,grad_norm: 0.9999996270317768, iteration: 62720
loss: 1.002880334854126,grad_norm: 0.9999992154519889, iteration: 62721
loss: 1.005063772201538,grad_norm: 0.8079245591116199, iteration: 62722
loss: 1.0094738006591797,grad_norm: 0.9825725761871865, iteration: 62723
loss: 1.0246580839157104,grad_norm: 0.9999993339574118, iteration: 62724
loss: 0.9958327412605286,grad_norm: 0.9999991802356043, iteration: 62725
loss: 0.9983338117599487,grad_norm: 0.907627970272597, iteration: 62726
loss: 1.1324063539505005,grad_norm: 0.9999995262476344, iteration: 62727
loss: 0.9895469546318054,grad_norm: 0.9081694153898172, iteration: 62728
loss: 1.0506691932678223,grad_norm: 0.9999998264704066, iteration: 62729
loss: 1.0094327926635742,grad_norm: 0.9346404038386223, iteration: 62730
loss: 1.0499845743179321,grad_norm: 0.8904979941527421, iteration: 62731
loss: 1.0316413640975952,grad_norm: 0.9999994474939968, iteration: 62732
loss: 1.008497953414917,grad_norm: 0.9124850331008709, iteration: 62733
loss: 1.02168869972229,grad_norm: 0.9999991744779655, iteration: 62734
loss: 1.0067380666732788,grad_norm: 0.8233648072863888, iteration: 62735
loss: 1.0224881172180176,grad_norm: 0.9999991578013082, iteration: 62736
loss: 1.01776921749115,grad_norm: 0.9999995696384841, iteration: 62737
loss: 0.9643553495407104,grad_norm: 0.8567742869309686, iteration: 62738
loss: 1.020982265472412,grad_norm: 0.9999993244256118, iteration: 62739
loss: 0.9882701635360718,grad_norm: 0.9618682619780917, iteration: 62740
loss: 1.0117545127868652,grad_norm: 0.93709568465175, iteration: 62741
loss: 1.0077909231185913,grad_norm: 0.9791343800605509, iteration: 62742
loss: 1.0092589855194092,grad_norm: 0.9999991957996431, iteration: 62743
loss: 1.0229229927062988,grad_norm: 0.9155804980414451, iteration: 62744
loss: 1.0169053077697754,grad_norm: 0.9211160764161065, iteration: 62745
loss: 1.0124281644821167,grad_norm: 0.9406398014874707, iteration: 62746
loss: 1.0238109827041626,grad_norm: 0.9999992609460993, iteration: 62747
loss: 1.0157264471054077,grad_norm: 0.9999991073246766, iteration: 62748
loss: 1.002868890762329,grad_norm: 0.9352271221585741, iteration: 62749
loss: 1.0165224075317383,grad_norm: 0.8744175082435276, iteration: 62750
loss: 1.0267256498336792,grad_norm: 0.9999993373225088, iteration: 62751
loss: 1.0139034986495972,grad_norm: 0.8331516727968495, iteration: 62752
loss: 0.9786529541015625,grad_norm: 0.8550437322372739, iteration: 62753
loss: 1.0240893363952637,grad_norm: 0.9265453224288531, iteration: 62754
loss: 0.9930462837219238,grad_norm: 0.9302493440777471, iteration: 62755
loss: 1.0181503295898438,grad_norm: 0.9643713112412372, iteration: 62756
loss: 0.9805808067321777,grad_norm: 0.9079176605741599, iteration: 62757
loss: 1.0107271671295166,grad_norm: 0.9999991453374724, iteration: 62758
loss: 1.0172017812728882,grad_norm: 0.9999991313380509, iteration: 62759
loss: 0.9597020149230957,grad_norm: 0.9551943152036627, iteration: 62760
loss: 1.0188930034637451,grad_norm: 0.798000774905158, iteration: 62761
loss: 1.014318823814392,grad_norm: 0.9999992868179765, iteration: 62762
loss: 1.0500603914260864,grad_norm: 0.7833442461561998, iteration: 62763
loss: 0.987984299659729,grad_norm: 0.9999994068375627, iteration: 62764
loss: 0.9776687622070312,grad_norm: 0.9999991050758609, iteration: 62765
loss: 0.9796689748764038,grad_norm: 0.9999990530130636, iteration: 62766
loss: 1.0815649032592773,grad_norm: 0.9999990756806199, iteration: 62767
loss: 1.0126763582229614,grad_norm: 0.8673355696459941, iteration: 62768
loss: 1.000545620918274,grad_norm: 0.9296887301314138, iteration: 62769
loss: 0.9894004464149475,grad_norm: 0.8880010596294193, iteration: 62770
loss: 0.9859453439712524,grad_norm: 0.9999990447009405, iteration: 62771
loss: 0.9858793020248413,grad_norm: 0.9999994793256609, iteration: 62772
loss: 0.9781311750411987,grad_norm: 0.9999989980028405, iteration: 62773
loss: 0.9969980120658875,grad_norm: 0.8548232058886865, iteration: 62774
loss: 1.0032494068145752,grad_norm: 0.9274205271467264, iteration: 62775
loss: 0.9847564697265625,grad_norm: 0.9285445162587016, iteration: 62776
loss: 0.9996957778930664,grad_norm: 0.7321627678762833, iteration: 62777
loss: 0.9922305941581726,grad_norm: 0.9999990394299393, iteration: 62778
loss: 0.9923648238182068,grad_norm: 0.9999990799212762, iteration: 62779
loss: 1.0030152797698975,grad_norm: 0.9999991955127869, iteration: 62780
loss: 1.032665729522705,grad_norm: 0.9691440230782402, iteration: 62781
loss: 0.9957665205001831,grad_norm: 0.9999995937260988, iteration: 62782
loss: 1.0188149213790894,grad_norm: 0.9999992587250145, iteration: 62783
loss: 0.9993065595626831,grad_norm: 0.9999990276025822, iteration: 62784
loss: 1.0080045461654663,grad_norm: 0.9174485339171216, iteration: 62785
loss: 1.0043556690216064,grad_norm: 0.8885589808052367, iteration: 62786
loss: 0.9848158955574036,grad_norm: 0.8310564199389533, iteration: 62787
loss: 1.0480859279632568,grad_norm: 0.8345962742864952, iteration: 62788
loss: 1.016185998916626,grad_norm: 0.8920932446419986, iteration: 62789
loss: 1.000320315361023,grad_norm: 0.965690871823524, iteration: 62790
loss: 0.9881203174591064,grad_norm: 0.8966080694490252, iteration: 62791
loss: 1.012841820716858,grad_norm: 0.9999997339142157, iteration: 62792
loss: 0.9819126129150391,grad_norm: 0.9999991324846007, iteration: 62793
loss: 1.0144615173339844,grad_norm: 0.9999991623111038, iteration: 62794
loss: 0.9816551208496094,grad_norm: 0.999999168496824, iteration: 62795
loss: 1.006119728088379,grad_norm: 0.999999066702249, iteration: 62796
loss: 0.9878138303756714,grad_norm: 0.9999991662536115, iteration: 62797
loss: 1.037346363067627,grad_norm: 0.9999990575041519, iteration: 62798
loss: 1.0052546262741089,grad_norm: 0.9359107906751212, iteration: 62799
loss: 1.0541375875473022,grad_norm: 0.9999999819528768, iteration: 62800
loss: 1.0305817127227783,grad_norm: 0.9847843946532395, iteration: 62801
loss: 1.0165185928344727,grad_norm: 0.9244873766202061, iteration: 62802
loss: 1.0222721099853516,grad_norm: 0.9952570584731478, iteration: 62803
loss: 1.0156006813049316,grad_norm: 0.9803490077761619, iteration: 62804
loss: 1.201608657836914,grad_norm: 0.9999991717991196, iteration: 62805
loss: 1.0580238103866577,grad_norm: 0.9711675292008923, iteration: 62806
loss: 1.0131863355636597,grad_norm: 0.974745205969432, iteration: 62807
loss: 1.0099642276763916,grad_norm: 0.9999988958418412, iteration: 62808
loss: 1.0142234563827515,grad_norm: 0.9999998806518993, iteration: 62809
loss: 0.9774001240730286,grad_norm: 0.9913317854998771, iteration: 62810
loss: 1.0033926963806152,grad_norm: 0.9618247316678172, iteration: 62811
loss: 1.0139974355697632,grad_norm: 0.9999991937292786, iteration: 62812
loss: 0.9830963015556335,grad_norm: 0.9999989923426914, iteration: 62813
loss: 0.9743979573249817,grad_norm: 0.9519158335947658, iteration: 62814
loss: 0.9998323321342468,grad_norm: 0.8431522427534637, iteration: 62815
loss: 1.010872483253479,grad_norm: 0.9496714104700158, iteration: 62816
loss: 0.9627828001976013,grad_norm: 0.999999091882032, iteration: 62817
loss: 1.0147854089736938,grad_norm: 0.9171540507648841, iteration: 62818
loss: 0.9871611595153809,grad_norm: 0.8546678574697625, iteration: 62819
loss: 0.9963637590408325,grad_norm: 0.9192212646850574, iteration: 62820
loss: 0.997805655002594,grad_norm: 0.9688971365250652, iteration: 62821
loss: 0.9805570244789124,grad_norm: 0.999999095040371, iteration: 62822
loss: 1.0451875925064087,grad_norm: 0.9999994255131431, iteration: 62823
loss: 1.024008870124817,grad_norm: 0.8469135520294303, iteration: 62824
loss: 0.9617875814437866,grad_norm: 0.9999992776048947, iteration: 62825
loss: 1.0277000665664673,grad_norm: 0.9999992329170404, iteration: 62826
loss: 1.0221307277679443,grad_norm: 0.934715373435056, iteration: 62827
loss: 1.0105551481246948,grad_norm: 0.9174575014118738, iteration: 62828
loss: 0.9836821556091309,grad_norm: 0.9999990747632889, iteration: 62829
loss: 1.0575538873672485,grad_norm: 0.981740840784657, iteration: 62830
loss: 1.0465327501296997,grad_norm: 0.9999994946373432, iteration: 62831
loss: 0.9808488488197327,grad_norm: 0.9168094925709022, iteration: 62832
loss: 0.99458909034729,grad_norm: 0.9160270267929477, iteration: 62833
loss: 0.9853034019470215,grad_norm: 0.9848587528433824, iteration: 62834
loss: 1.0865364074707031,grad_norm: 0.9999994043854307, iteration: 62835
loss: 0.9822351336479187,grad_norm: 0.8651967898890596, iteration: 62836
loss: 0.9943017363548279,grad_norm: 0.9999990460690618, iteration: 62837
loss: 1.0040898323059082,grad_norm: 0.9570387239916669, iteration: 62838
loss: 1.0140632390975952,grad_norm: 0.9087957963482285, iteration: 62839
loss: 1.0756407976150513,grad_norm: 0.9999992931586266, iteration: 62840
loss: 1.0230504274368286,grad_norm: 0.8908318892519675, iteration: 62841
loss: 0.9765900373458862,grad_norm: 0.8738121413873596, iteration: 62842
loss: 0.9956194758415222,grad_norm: 0.9305587407524, iteration: 62843
loss: 0.9692583680152893,grad_norm: 0.937435650214637, iteration: 62844
loss: 1.008664846420288,grad_norm: 0.8343254681190065, iteration: 62845
loss: 0.9504677057266235,grad_norm: 0.9505170097992367, iteration: 62846
loss: 1.017869234085083,grad_norm: 0.9303740156327144, iteration: 62847
loss: 0.9785812497138977,grad_norm: 0.9625259971298451, iteration: 62848
loss: 0.9882602095603943,grad_norm: 0.8672261962375143, iteration: 62849
loss: 1.0064208507537842,grad_norm: 0.8730411793455152, iteration: 62850
loss: 1.0153372287750244,grad_norm: 0.9871592006449318, iteration: 62851
loss: 0.9492455720901489,grad_norm: 0.9129032282111942, iteration: 62852
loss: 1.0134344100952148,grad_norm: 0.9855149779291837, iteration: 62853
loss: 0.9924611449241638,grad_norm: 0.8426053751992433, iteration: 62854
loss: 1.0007672309875488,grad_norm: 0.9324539614039017, iteration: 62855
loss: 0.9755662083625793,grad_norm: 0.9649212921101091, iteration: 62856
loss: 0.9930880665779114,grad_norm: 0.9999989684927747, iteration: 62857
loss: 1.0148651599884033,grad_norm: 0.7223440515937238, iteration: 62858
loss: 0.9982849359512329,grad_norm: 0.9960010288602945, iteration: 62859
loss: 1.0046360492706299,grad_norm: 0.9999989533558011, iteration: 62860
loss: 1.0167009830474854,grad_norm: 0.9999995648782364, iteration: 62861
loss: 1.049993634223938,grad_norm: 0.9999993928589871, iteration: 62862
loss: 1.015283465385437,grad_norm: 0.9890581690245301, iteration: 62863
loss: 0.9943036437034607,grad_norm: 0.9286794433566712, iteration: 62864
loss: 1.0222522020339966,grad_norm: 0.9999996701845285, iteration: 62865
loss: 0.9621598720550537,grad_norm: 0.9131928494420349, iteration: 62866
loss: 1.0417027473449707,grad_norm: 0.9999992242356616, iteration: 62867
loss: 0.9658197164535522,grad_norm: 0.7847088002772926, iteration: 62868
loss: 1.0879111289978027,grad_norm: 0.9999995981996429, iteration: 62869
loss: 1.015195608139038,grad_norm: 0.9628484295429023, iteration: 62870
loss: 1.0967124700546265,grad_norm: 0.9238182260349174, iteration: 62871
loss: 0.9986192584037781,grad_norm: 0.823650743759365, iteration: 62872
loss: 1.0325831174850464,grad_norm: 0.9913123521575999, iteration: 62873
loss: 1.0162310600280762,grad_norm: 0.9196345641395722, iteration: 62874
loss: 0.9687393307685852,grad_norm: 0.8919087231721475, iteration: 62875
loss: 0.9944114089012146,grad_norm: 0.9999989759984804, iteration: 62876
loss: 1.0127159357070923,grad_norm: 0.805342835891203, iteration: 62877
loss: 0.9856416583061218,grad_norm: 0.9999991015799434, iteration: 62878
loss: 0.9930525422096252,grad_norm: 0.918435742574672, iteration: 62879
loss: 1.0294091701507568,grad_norm: 0.9999996367310053, iteration: 62880
loss: 1.0051809549331665,grad_norm: 0.9999991083594114, iteration: 62881
loss: 0.9691275954246521,grad_norm: 0.9460879836317804, iteration: 62882
loss: 1.0090197324752808,grad_norm: 0.859563728411782, iteration: 62883
loss: 0.9948107004165649,grad_norm: 0.9999992379191914, iteration: 62884
loss: 0.9936041831970215,grad_norm: 0.9999993489321748, iteration: 62885
loss: 1.0748388767242432,grad_norm: 0.9803289183915577, iteration: 62886
loss: 1.0241838693618774,grad_norm: 0.9999997173403816, iteration: 62887
loss: 1.038025140762329,grad_norm: 0.9427819879304031, iteration: 62888
loss: 1.0141425132751465,grad_norm: 0.9999991420368686, iteration: 62889
loss: 1.0068060159683228,grad_norm: 0.9797295017529463, iteration: 62890
loss: 1.0642566680908203,grad_norm: 0.9999996185164803, iteration: 62891
loss: 0.9966055750846863,grad_norm: 0.8483406517790048, iteration: 62892
loss: 0.9962178468704224,grad_norm: 0.8322374414232312, iteration: 62893
loss: 1.022039771080017,grad_norm: 0.9729123677007532, iteration: 62894
loss: 1.029441237449646,grad_norm: 0.8437348603095638, iteration: 62895
loss: 1.0449885129928589,grad_norm: 0.9166656602682374, iteration: 62896
loss: 1.0875273942947388,grad_norm: 0.9999990038202159, iteration: 62897
loss: 1.0435148477554321,grad_norm: 0.83397572554938, iteration: 62898
loss: 1.041111707687378,grad_norm: 0.9999992637152254, iteration: 62899
loss: 1.0000360012054443,grad_norm: 0.9999995721089839, iteration: 62900
loss: 0.988254964351654,grad_norm: 0.9228968718386379, iteration: 62901
loss: 0.9912872314453125,grad_norm: 0.9999992133705783, iteration: 62902
loss: 0.9809460043907166,grad_norm: 0.9999990509748125, iteration: 62903
loss: 0.9862399697303772,grad_norm: 0.9907954424562212, iteration: 62904
loss: 0.993409276008606,grad_norm: 0.999999664724355, iteration: 62905
loss: 0.9816467761993408,grad_norm: 0.7744501320125969, iteration: 62906
loss: 0.9804543852806091,grad_norm: 0.9999992292969015, iteration: 62907
loss: 1.007724404335022,grad_norm: 0.9206477740953364, iteration: 62908
loss: 0.9901396036148071,grad_norm: 0.9593635181397464, iteration: 62909
loss: 0.974273681640625,grad_norm: 0.8544591763988775, iteration: 62910
loss: 1.0122923851013184,grad_norm: 0.9999995133248814, iteration: 62911
loss: 0.9933263063430786,grad_norm: 0.9999991540211653, iteration: 62912
loss: 1.032848596572876,grad_norm: 0.9999995619119364, iteration: 62913
loss: 1.009159803390503,grad_norm: 0.9841698479699467, iteration: 62914
loss: 0.969403088092804,grad_norm: 0.999999325259851, iteration: 62915
loss: 1.003614068031311,grad_norm: 0.9798143873187534, iteration: 62916
loss: 0.9668306708335876,grad_norm: 0.9999991874084653, iteration: 62917
loss: 0.9831202626228333,grad_norm: 0.9788364586476064, iteration: 62918
loss: 1.0082908868789673,grad_norm: 0.9999990144043192, iteration: 62919
loss: 1.0582913160324097,grad_norm: 0.9999996386291483, iteration: 62920
loss: 0.9910100102424622,grad_norm: 0.9950162119938214, iteration: 62921
loss: 1.0172014236450195,grad_norm: 0.999999358507702, iteration: 62922
loss: 0.9901161193847656,grad_norm: 0.9999990192749346, iteration: 62923
loss: 0.9762126803398132,grad_norm: 0.8597677884072383, iteration: 62924
loss: 0.9607199430465698,grad_norm: 0.9999989765270442, iteration: 62925
loss: 1.044875979423523,grad_norm: 0.9999991137954998, iteration: 62926
loss: 1.0565412044525146,grad_norm: 0.999999175120605, iteration: 62927
loss: 1.0131313800811768,grad_norm: 0.8478234266884186, iteration: 62928
loss: 1.023589849472046,grad_norm: 0.9769900402547015, iteration: 62929
loss: 1.0702123641967773,grad_norm: 0.8926276247691075, iteration: 62930
loss: 0.9874930381774902,grad_norm: 0.9581955199486435, iteration: 62931
loss: 1.027902603149414,grad_norm: 0.9757837170244039, iteration: 62932
loss: 0.9985612630844116,grad_norm: 0.9999991047118885, iteration: 62933
loss: 1.0003249645233154,grad_norm: 0.8594416685764923, iteration: 62934
loss: 1.0535764694213867,grad_norm: 0.9999990707493187, iteration: 62935
loss: 1.0018943548202515,grad_norm: 0.9999991635894869, iteration: 62936
loss: 0.9731537103652954,grad_norm: 0.9999990972235479, iteration: 62937
loss: 0.9939625263214111,grad_norm: 0.9349496893110217, iteration: 62938
loss: 1.0635666847229004,grad_norm: 0.9999990729248154, iteration: 62939
loss: 1.0476527214050293,grad_norm: 0.9999991914391442, iteration: 62940
loss: 1.036871075630188,grad_norm: 0.9999991059562736, iteration: 62941
loss: 0.9756519198417664,grad_norm: 0.9999991918536281, iteration: 62942
loss: 0.9862059950828552,grad_norm: 0.9104690903053464, iteration: 62943
loss: 0.9985334873199463,grad_norm: 0.9615813898714624, iteration: 62944
loss: 1.014866828918457,grad_norm: 0.9999990679121951, iteration: 62945
loss: 1.0159926414489746,grad_norm: 0.8738482416791061, iteration: 62946
loss: 0.9612482786178589,grad_norm: 0.8286061247674026, iteration: 62947
loss: 0.9948015809059143,grad_norm: 0.9128413870231639, iteration: 62948
loss: 1.0349470376968384,grad_norm: 0.9999993743973996, iteration: 62949
loss: 0.9650649428367615,grad_norm: 0.9250754780978878, iteration: 62950
loss: 1.0414586067199707,grad_norm: 0.9838310088217149, iteration: 62951
loss: 1.0242046117782593,grad_norm: 0.8451344031400858, iteration: 62952
loss: 1.0015075206756592,grad_norm: 0.8173919151716701, iteration: 62953
loss: 1.0079549551010132,grad_norm: 0.9818252407984157, iteration: 62954
loss: 1.0186793804168701,grad_norm: 0.8286705596431068, iteration: 62955
loss: 0.9734094142913818,grad_norm: 0.9999992136852178, iteration: 62956
loss: 1.0156750679016113,grad_norm: 0.9999991481680793, iteration: 62957
loss: 1.0046966075897217,grad_norm: 0.7929460724850983, iteration: 62958
loss: 1.0173802375793457,grad_norm: 0.9488746176699348, iteration: 62959
loss: 0.9991507530212402,grad_norm: 0.9999994807130903, iteration: 62960
loss: 1.0024278163909912,grad_norm: 0.8005721280429823, iteration: 62961
loss: 0.9943111538887024,grad_norm: 0.786635701239345, iteration: 62962
loss: 1.0366387367248535,grad_norm: 0.9999992347055868, iteration: 62963
loss: 0.9819319248199463,grad_norm: 0.849830999114833, iteration: 62964
loss: 0.9866684675216675,grad_norm: 0.926452141940275, iteration: 62965
loss: 0.9868653416633606,grad_norm: 0.9201404251558155, iteration: 62966
loss: 1.06223726272583,grad_norm: 0.999999931920082, iteration: 62967
loss: 0.987854540348053,grad_norm: 0.9999990462980919, iteration: 62968
loss: 0.9886147379875183,grad_norm: 0.9999989956631338, iteration: 62969
loss: 0.9965515732765198,grad_norm: 0.882520428252777, iteration: 62970
loss: 0.99333256483078,grad_norm: 0.830123187103777, iteration: 62971
loss: 1.0013039112091064,grad_norm: 0.9999988646740886, iteration: 62972
loss: 0.9930163025856018,grad_norm: 0.9427092563586029, iteration: 62973
loss: 0.9942035675048828,grad_norm: 0.7207801025182895, iteration: 62974
loss: 0.9992607235908508,grad_norm: 0.8948698547441114, iteration: 62975
loss: 1.0303313732147217,grad_norm: 0.9548610186692802, iteration: 62976
loss: 0.9918993711471558,grad_norm: 0.9682866662358677, iteration: 62977
loss: 0.9982135891914368,grad_norm: 0.9019762438918476, iteration: 62978
loss: 1.0039852857589722,grad_norm: 0.9999990885513047, iteration: 62979
loss: 1.0210598707199097,grad_norm: 0.8747057857324883, iteration: 62980
loss: 0.9756014943122864,grad_norm: 0.9815904859733603, iteration: 62981
loss: 0.9505236744880676,grad_norm: 0.9999990908397568, iteration: 62982
loss: 1.003388524055481,grad_norm: 0.9999997272712063, iteration: 62983
loss: 0.9747642278671265,grad_norm: 0.9512646319356375, iteration: 62984
loss: 1.0484592914581299,grad_norm: 0.8791695959978919, iteration: 62985
loss: 1.0300434827804565,grad_norm: 0.9999996565847522, iteration: 62986
loss: 0.985348641872406,grad_norm: 0.8688844626049144, iteration: 62987
loss: 1.1415507793426514,grad_norm: 0.9999998528712617, iteration: 62988
loss: 0.9899250268936157,grad_norm: 0.8285077981271408, iteration: 62989
loss: 1.0189272165298462,grad_norm: 0.9843067924345072, iteration: 62990
loss: 1.008975863456726,grad_norm: 0.9999991928340582, iteration: 62991
loss: 0.9617331027984619,grad_norm: 0.9999991944103103, iteration: 62992
loss: 0.9960128664970398,grad_norm: 0.9999990943224987, iteration: 62993
loss: 1.0100209712982178,grad_norm: 0.8574663232797648, iteration: 62994
loss: 0.9566769599914551,grad_norm: 0.8391560670267002, iteration: 62995
loss: 1.00808846950531,grad_norm: 0.9999992322131717, iteration: 62996
loss: 0.9763194918632507,grad_norm: 0.9999992880906985, iteration: 62997
loss: 1.0202670097351074,grad_norm: 0.9999994678519265, iteration: 62998
loss: 1.0275695323944092,grad_norm: 0.9999994521221474, iteration: 62999
loss: 1.0255751609802246,grad_norm: 0.9923566833193329, iteration: 63000
loss: 1.0094507932662964,grad_norm: 0.9999998055249059, iteration: 63001
loss: 0.9821268916130066,grad_norm: 0.8662540921027051, iteration: 63002
loss: 1.0118316411972046,grad_norm: 0.7736252850156577, iteration: 63003
loss: 1.0248808860778809,grad_norm: 0.8459404593359756, iteration: 63004
loss: 0.9976054430007935,grad_norm: 0.9999991018621851, iteration: 63005
loss: 1.0312269926071167,grad_norm: 0.9999998578054622, iteration: 63006
loss: 1.0259968042373657,grad_norm: 0.9999994588428378, iteration: 63007
loss: 1.0085890293121338,grad_norm: 0.8901831614050597, iteration: 63008
loss: 1.011206865310669,grad_norm: 0.7972224681237449, iteration: 63009
loss: 1.0542880296707153,grad_norm: 0.9999995498205143, iteration: 63010
loss: 1.0058051347732544,grad_norm: 0.9503439991963093, iteration: 63011
loss: 1.0132050514221191,grad_norm: 0.8981971851265987, iteration: 63012
loss: 1.0780577659606934,grad_norm: 0.9999991689061696, iteration: 63013
loss: 1.0452812910079956,grad_norm: 0.9999998324852981, iteration: 63014
loss: 1.0137742757797241,grad_norm: 0.789211557778647, iteration: 63015
loss: 0.9815118908882141,grad_norm: 0.7987926220156892, iteration: 63016
loss: 1.0431240797042847,grad_norm: 0.8239684632621798, iteration: 63017
loss: 0.9836997389793396,grad_norm: 0.9999989976583646, iteration: 63018
loss: 1.0368404388427734,grad_norm: 0.9999995896976629, iteration: 63019
loss: 0.9887377619743347,grad_norm: 0.9999992560168091, iteration: 63020
loss: 0.9906045794487,grad_norm: 0.9999996219284171, iteration: 63021
loss: 0.9702287316322327,grad_norm: 0.8727389187038653, iteration: 63022
loss: 1.0044409036636353,grad_norm: 0.908030368246335, iteration: 63023
loss: 0.981198251247406,grad_norm: 0.8697305269167809, iteration: 63024
loss: 1.0638158321380615,grad_norm: 0.999999871955987, iteration: 63025
loss: 1.0306514501571655,grad_norm: 0.9999996801711739, iteration: 63026
loss: 1.0041736364364624,grad_norm: 0.8918825478981609, iteration: 63027
loss: 1.0469610691070557,grad_norm: 0.9999999311039944, iteration: 63028
loss: 1.0181324481964111,grad_norm: 0.9999991974703899, iteration: 63029
loss: 1.0061781406402588,grad_norm: 0.8856072095666065, iteration: 63030
loss: 1.008195161819458,grad_norm: 0.9999994374871914, iteration: 63031
loss: 0.9613635540008545,grad_norm: 0.999999114551551, iteration: 63032
loss: 1.0347213745117188,grad_norm: 0.9999990980875412, iteration: 63033
loss: 1.0248233079910278,grad_norm: 0.9999998024719765, iteration: 63034
loss: 1.0003222227096558,grad_norm: 0.999999047595083, iteration: 63035
loss: 1.0309293270111084,grad_norm: 0.9999993885277376, iteration: 63036
loss: 1.0052955150604248,grad_norm: 0.9522123974903566, iteration: 63037
loss: 0.9827145934104919,grad_norm: 0.9364873296525523, iteration: 63038
loss: 1.0530369281768799,grad_norm: 0.9999994747899543, iteration: 63039
loss: 1.0686612129211426,grad_norm: 0.9999993831123491, iteration: 63040
loss: 0.993370771408081,grad_norm: 0.8413956784364421, iteration: 63041
loss: 1.0201841592788696,grad_norm: 0.9319128632300963, iteration: 63042
loss: 1.0207486152648926,grad_norm: 0.99999912087743, iteration: 63043
loss: 1.231726050376892,grad_norm: 0.9999996231443528, iteration: 63044
loss: 1.0080316066741943,grad_norm: 0.8201030416139334, iteration: 63045
loss: 1.0192766189575195,grad_norm: 0.9361121785200383, iteration: 63046
loss: 1.0040210485458374,grad_norm: 0.9999990108193415, iteration: 63047
loss: 1.0169274806976318,grad_norm: 0.9241240639123983, iteration: 63048
loss: 1.0805716514587402,grad_norm: 0.9999991952470828, iteration: 63049
loss: 1.0113378763198853,grad_norm: 0.9999990290856915, iteration: 63050
loss: 1.0210295915603638,grad_norm: 0.9999992565747615, iteration: 63051
loss: 0.9990363717079163,grad_norm: 0.91148176316996, iteration: 63052
loss: 1.059447169303894,grad_norm: 0.9641710212931224, iteration: 63053
loss: 0.9971065521240234,grad_norm: 0.9999990003788612, iteration: 63054
loss: 1.1195248365402222,grad_norm: 0.9999992993375313, iteration: 63055
loss: 1.0065093040466309,grad_norm: 0.9673356601191954, iteration: 63056
loss: 1.4087368249893188,grad_norm: 0.9999998850890802, iteration: 63057
loss: 1.0180368423461914,grad_norm: 0.9999991947886147, iteration: 63058
loss: 0.9852306246757507,grad_norm: 0.8861148072162656, iteration: 63059
loss: 1.0323766469955444,grad_norm: 0.9999995774548053, iteration: 63060
loss: 0.9929749965667725,grad_norm: 0.9999992790464187, iteration: 63061
loss: 1.033943772315979,grad_norm: 0.9653457094817447, iteration: 63062
loss: 0.9878585934638977,grad_norm: 0.9870183562763825, iteration: 63063
loss: 0.9791499376296997,grad_norm: 0.9999993027164178, iteration: 63064
loss: 0.9902845621109009,grad_norm: 0.8469173535375148, iteration: 63065
loss: 1.013865351676941,grad_norm: 0.7775271936735771, iteration: 63066
loss: 0.9891475439071655,grad_norm: 0.9160339837090945, iteration: 63067
loss: 0.9957237243652344,grad_norm: 0.8037013799151891, iteration: 63068
loss: 0.9779691100120544,grad_norm: 0.9999994869129128, iteration: 63069
loss: 0.9922640323638916,grad_norm: 0.9999993508190136, iteration: 63070
loss: 0.9937050938606262,grad_norm: 0.9999991430060999, iteration: 63071
loss: 1.0260496139526367,grad_norm: 0.9999994571714783, iteration: 63072
loss: 1.0508031845092773,grad_norm: 0.999999242319208, iteration: 63073
loss: 1.0121325254440308,grad_norm: 0.9999990014789215, iteration: 63074
loss: 1.0103181600570679,grad_norm: 0.9999990341645024, iteration: 63075
loss: 0.9972617030143738,grad_norm: 0.9999992120034089, iteration: 63076
loss: 0.98139888048172,grad_norm: 0.859897915353467, iteration: 63077
loss: 1.0167173147201538,grad_norm: 0.9999991175602768, iteration: 63078
loss: 1.0061182975769043,grad_norm: 0.9999994680441447, iteration: 63079
loss: 1.0151883363723755,grad_norm: 0.9999990780493602, iteration: 63080
loss: 1.031292200088501,grad_norm: 0.9999990848069321, iteration: 63081
loss: 1.0122922658920288,grad_norm: 0.9033094427672834, iteration: 63082
loss: 1.0178232192993164,grad_norm: 0.9329489323334583, iteration: 63083
loss: 1.035359501838684,grad_norm: 0.9834728986517622, iteration: 63084
loss: 1.0249934196472168,grad_norm: 0.9999992035121642, iteration: 63085
loss: 1.035979986190796,grad_norm: 0.9785766607214301, iteration: 63086
loss: 1.021096110343933,grad_norm: 0.8905695802673849, iteration: 63087
loss: 1.0290871858596802,grad_norm: 0.9401973292739041, iteration: 63088
loss: 1.0235207080841064,grad_norm: 0.9999992236222033, iteration: 63089
loss: 0.9676094055175781,grad_norm: 0.999999102167502, iteration: 63090
loss: 0.946376383304596,grad_norm: 0.9999991159576599, iteration: 63091
loss: 0.9673277735710144,grad_norm: 0.8320818889083861, iteration: 63092
loss: 0.9704458713531494,grad_norm: 0.9999989904909892, iteration: 63093
loss: 0.991565465927124,grad_norm: 0.9999993692270209, iteration: 63094
loss: 1.012578010559082,grad_norm: 0.7904395744210996, iteration: 63095
loss: 0.9788532853126526,grad_norm: 0.9999993288994065, iteration: 63096
loss: 1.0398666858673096,grad_norm: 0.9928696031940493, iteration: 63097
loss: 1.019457459449768,grad_norm: 0.9476594140988942, iteration: 63098
loss: 1.0145020484924316,grad_norm: 0.9999997580311063, iteration: 63099
loss: 1.0100913047790527,grad_norm: 0.804683250869801, iteration: 63100
loss: 0.9596232771873474,grad_norm: 0.999999099935149, iteration: 63101
loss: 0.9923154711723328,grad_norm: 0.9682477730903851, iteration: 63102
loss: 0.9671562314033508,grad_norm: 0.9655649719470681, iteration: 63103
loss: 1.0605356693267822,grad_norm: 0.9999994334152585, iteration: 63104
loss: 1.0469952821731567,grad_norm: 0.9766309485571031, iteration: 63105
loss: 0.9889475703239441,grad_norm: 0.9582504041120856, iteration: 63106
loss: 1.0454481840133667,grad_norm: 0.9999991847170717, iteration: 63107
loss: 1.1311086416244507,grad_norm: 0.9999992773557438, iteration: 63108
loss: 0.9598133563995361,grad_norm: 0.9082224846847091, iteration: 63109
loss: 1.0282021760940552,grad_norm: 0.8834746018697952, iteration: 63110
loss: 0.9721799492835999,grad_norm: 0.9999989913320404, iteration: 63111
loss: 1.036399245262146,grad_norm: 0.8961489813842662, iteration: 63112
loss: 0.9801392555236816,grad_norm: 0.9999990656869227, iteration: 63113
loss: 0.9863210320472717,grad_norm: 0.9598358682780617, iteration: 63114
loss: 1.0000953674316406,grad_norm: 0.9901419427717817, iteration: 63115
loss: 0.9867021441459656,grad_norm: 0.9431851214458772, iteration: 63116
loss: 0.965337872505188,grad_norm: 0.7586549719164016, iteration: 63117
loss: 1.0694069862365723,grad_norm: 0.9999999239463306, iteration: 63118
loss: 1.0075814723968506,grad_norm: 0.8166033060730722, iteration: 63119
loss: 1.2298338413238525,grad_norm: 0.9999994418055433, iteration: 63120
loss: 1.0169695615768433,grad_norm: 0.9999999529518422, iteration: 63121
loss: 1.0952484607696533,grad_norm: 0.9999993834938541, iteration: 63122
loss: 0.9989938139915466,grad_norm: 0.7482017653084686, iteration: 63123
loss: 1.0558942556381226,grad_norm: 0.9999997507726573, iteration: 63124
loss: 0.993078887462616,grad_norm: 0.9546672780995888, iteration: 63125
loss: 1.040461540222168,grad_norm: 0.9999992814198807, iteration: 63126
loss: 0.9641609191894531,grad_norm: 0.9999993662037475, iteration: 63127
loss: 0.9940640330314636,grad_norm: 0.9999990847506249, iteration: 63128
loss: 1.0063544511795044,grad_norm: 0.8114506179420018, iteration: 63129
loss: 1.0275682210922241,grad_norm: 0.9228762116497601, iteration: 63130
loss: 1.0149272680282593,grad_norm: 0.8403299597871967, iteration: 63131
loss: 0.9812328219413757,grad_norm: 0.9999992612557932, iteration: 63132
loss: 1.0807431936264038,grad_norm: 0.9999990596265657, iteration: 63133
loss: 0.980665385723114,grad_norm: 0.9442625631078925, iteration: 63134
loss: 1.0338164567947388,grad_norm: 0.9999991661891241, iteration: 63135
loss: 1.0108001232147217,grad_norm: 0.9999992046064667, iteration: 63136
loss: 0.9774180054664612,grad_norm: 0.999999121277462, iteration: 63137
loss: 0.9748419523239136,grad_norm: 0.9999992644545277, iteration: 63138
loss: 1.024464726448059,grad_norm: 0.9999989535236158, iteration: 63139
loss: 0.9760609865188599,grad_norm: 0.8418646635280225, iteration: 63140
loss: 0.9712643027305603,grad_norm: 0.9415041582410061, iteration: 63141
loss: 1.0086958408355713,grad_norm: 0.8282954294921073, iteration: 63142
loss: 1.0082972049713135,grad_norm: 0.9285249130011439, iteration: 63143
loss: 1.0115300416946411,grad_norm: 0.9458302885727634, iteration: 63144
loss: 0.9962904453277588,grad_norm: 0.9204167064576602, iteration: 63145
loss: 1.0089274644851685,grad_norm: 0.9999995667201367, iteration: 63146
loss: 0.977263867855072,grad_norm: 0.8130964917415635, iteration: 63147
loss: 0.9957407712936401,grad_norm: 0.9999991165722284, iteration: 63148
loss: 1.0178544521331787,grad_norm: 0.9999991763144637, iteration: 63149
loss: 1.057125210762024,grad_norm: 0.9999990140644089, iteration: 63150
loss: 1.095896601676941,grad_norm: 0.9999995195520968, iteration: 63151
loss: 1.019480586051941,grad_norm: 0.9999992013338744, iteration: 63152
loss: 1.0072994232177734,grad_norm: 0.8271575632570467, iteration: 63153
loss: 1.116304636001587,grad_norm: 0.9999998428132, iteration: 63154
loss: 1.0005130767822266,grad_norm: 0.999999044639484, iteration: 63155
loss: 0.9961450099945068,grad_norm: 0.9999993454526283, iteration: 63156
loss: 1.0058737993240356,grad_norm: 0.9999991999439046, iteration: 63157
loss: 0.9855968356132507,grad_norm: 0.9988514300922431, iteration: 63158
loss: 1.0379923582077026,grad_norm: 0.999999028700977, iteration: 63159
loss: 0.975628137588501,grad_norm: 0.8968940259613416, iteration: 63160
loss: 0.9984076023101807,grad_norm: 0.8753472781409269, iteration: 63161
loss: 1.0262748003005981,grad_norm: 0.9999991966991392, iteration: 63162
loss: 1.0300096273422241,grad_norm: 0.9999995098657556, iteration: 63163
loss: 0.9916313290596008,grad_norm: 0.9999989866283585, iteration: 63164
loss: 0.9941429495811462,grad_norm: 0.999999264312063, iteration: 63165
loss: 1.0032767057418823,grad_norm: 0.9759170468099387, iteration: 63166
loss: 1.0045781135559082,grad_norm: 0.8826911574905381, iteration: 63167
loss: 0.9977519512176514,grad_norm: 0.9999991368223725, iteration: 63168
loss: 1.0429774522781372,grad_norm: 0.8275381348214613, iteration: 63169
loss: 1.0342190265655518,grad_norm: 0.9999995048965938, iteration: 63170
loss: 0.9757198691368103,grad_norm: 0.9999990448440825, iteration: 63171
loss: 0.9832258224487305,grad_norm: 0.9999996579859376, iteration: 63172
loss: 0.9937090277671814,grad_norm: 0.999999109098514, iteration: 63173
loss: 0.980631947517395,grad_norm: 0.8436102795056918, iteration: 63174
loss: 1.055282473564148,grad_norm: 0.9999991714930283, iteration: 63175
loss: 0.9967976212501526,grad_norm: 0.9999992265966149, iteration: 63176
loss: 0.9745416045188904,grad_norm: 0.8839885487414523, iteration: 63177
loss: 0.9743289947509766,grad_norm: 0.9999992169050704, iteration: 63178
loss: 1.0059449672698975,grad_norm: 0.8798813779581177, iteration: 63179
loss: 0.9678527116775513,grad_norm: 0.8929740324436944, iteration: 63180
loss: 0.9907328486442566,grad_norm: 0.9999992026384129, iteration: 63181
loss: 0.9868779182434082,grad_norm: 0.9999991823902671, iteration: 63182
loss: 0.9822887778282166,grad_norm: 0.9016178283332226, iteration: 63183
loss: 1.0197969675064087,grad_norm: 0.9999990604696684, iteration: 63184
loss: 1.0706785917282104,grad_norm: 0.9999999146784951, iteration: 63185
loss: 1.0231268405914307,grad_norm: 0.8765521886367484, iteration: 63186
loss: 0.9908720850944519,grad_norm: 0.836475674292511, iteration: 63187
loss: 1.0205399990081787,grad_norm: 0.7804058505885401, iteration: 63188
loss: 1.0699138641357422,grad_norm: 0.9999991132329152, iteration: 63189
loss: 0.989632785320282,grad_norm: 0.9959158321806466, iteration: 63190
loss: 0.9874576330184937,grad_norm: 0.9864788588598236, iteration: 63191
loss: 0.9997609853744507,grad_norm: 0.9999993129130054, iteration: 63192
loss: 1.0175940990447998,grad_norm: 0.9665130887927457, iteration: 63193
loss: 0.9942319989204407,grad_norm: 0.999999191516333, iteration: 63194
loss: 0.9844653606414795,grad_norm: 0.9558270748401648, iteration: 63195
loss: 1.0299065113067627,grad_norm: 0.9999994741038183, iteration: 63196
loss: 1.031221628189087,grad_norm: 0.8469589417223, iteration: 63197
loss: 0.9675991535186768,grad_norm: 0.9961630013380449, iteration: 63198
loss: 1.0066026449203491,grad_norm: 0.9999990092163215, iteration: 63199
loss: 1.0648038387298584,grad_norm: 0.8966988724841165, iteration: 63200
loss: 1.0949450731277466,grad_norm: 0.9999990397171624, iteration: 63201
loss: 0.9873060584068298,grad_norm: 0.999999069364734, iteration: 63202
loss: 1.0016095638275146,grad_norm: 0.9999990632657797, iteration: 63203
loss: 1.0032763481140137,grad_norm: 0.9999992528022605, iteration: 63204
loss: 1.1157954931259155,grad_norm: 0.9999996136358671, iteration: 63205
loss: 1.0813624858856201,grad_norm: 0.9999994027825486, iteration: 63206
loss: 1.1104111671447754,grad_norm: 0.9999994570396757, iteration: 63207
loss: 1.0208652019500732,grad_norm: 0.8210314538546096, iteration: 63208
loss: 1.0342066287994385,grad_norm: 0.9999994021118872, iteration: 63209
loss: 1.0408546924591064,grad_norm: 0.8527765579287969, iteration: 63210
loss: 0.9869512915611267,grad_norm: 0.817056694889019, iteration: 63211
loss: 0.975956916809082,grad_norm: 0.8706436077625026, iteration: 63212
loss: 1.014267086982727,grad_norm: 0.9999990888462796, iteration: 63213
loss: 1.0440787076950073,grad_norm: 0.9999998368953764, iteration: 63214
loss: 1.12668776512146,grad_norm: 0.9999995213137419, iteration: 63215
loss: 1.0375921726226807,grad_norm: 0.9999994176735714, iteration: 63216
loss: 0.996931254863739,grad_norm: 0.9149728914778719, iteration: 63217
loss: 1.0460777282714844,grad_norm: 0.9999992287559126, iteration: 63218
loss: 1.0054433345794678,grad_norm: 0.999999129596816, iteration: 63219
loss: 0.9681528806686401,grad_norm: 0.9999990107478947, iteration: 63220
loss: 1.037412405014038,grad_norm: 0.9757176386836284, iteration: 63221
loss: 1.0274730920791626,grad_norm: 0.9854108798598239, iteration: 63222
loss: 1.0408400297164917,grad_norm: 0.9922850539747538, iteration: 63223
loss: 1.085468053817749,grad_norm: 0.9999995439141822, iteration: 63224
loss: 1.036599040031433,grad_norm: 0.9785899052620276, iteration: 63225
loss: 1.0280237197875977,grad_norm: 0.9999992096082477, iteration: 63226
loss: 1.0161433219909668,grad_norm: 0.9457815027316365, iteration: 63227
loss: 0.9844760298728943,grad_norm: 0.957302018026143, iteration: 63228
loss: 1.030179738998413,grad_norm: 0.9999995677798174, iteration: 63229
loss: 1.0087240934371948,grad_norm: 0.8880870678095829, iteration: 63230
loss: 0.9994220733642578,grad_norm: 0.9261589098846439, iteration: 63231
loss: 1.0280359983444214,grad_norm: 0.8591754662538117, iteration: 63232
loss: 0.9876644015312195,grad_norm: 0.9999990954066087, iteration: 63233
loss: 1.0214942693710327,grad_norm: 0.9999991263953094, iteration: 63234
loss: 1.1322696208953857,grad_norm: 0.9999994928144851, iteration: 63235
loss: 0.9763920307159424,grad_norm: 0.7709749327215186, iteration: 63236
loss: 1.0615127086639404,grad_norm: 0.9999998779236072, iteration: 63237
loss: 1.0400495529174805,grad_norm: 0.9999993207554961, iteration: 63238
loss: 1.0251938104629517,grad_norm: 0.9999992053937367, iteration: 63239
loss: 1.1037800312042236,grad_norm: 0.9999995503954504, iteration: 63240
loss: 1.0032826662063599,grad_norm: 0.9099812821375665, iteration: 63241
loss: 1.06904137134552,grad_norm: 0.9999999750146694, iteration: 63242
loss: 1.0393874645233154,grad_norm: 0.9999995289418699, iteration: 63243
loss: 1.073662281036377,grad_norm: 0.9999998577800634, iteration: 63244
loss: 1.0107107162475586,grad_norm: 0.8567912956754877, iteration: 63245
loss: 1.0124969482421875,grad_norm: 0.9295957895309807, iteration: 63246
loss: 1.0347659587860107,grad_norm: 0.9999992604809613, iteration: 63247
loss: 1.000807762145996,grad_norm: 0.9637148832572732, iteration: 63248
loss: 0.9841083288192749,grad_norm: 0.887441650193216, iteration: 63249
loss: 0.9964296221733093,grad_norm: 0.9999993780443502, iteration: 63250
loss: 0.9721132516860962,grad_norm: 0.9999994447606354, iteration: 63251
loss: 1.1355026960372925,grad_norm: 0.9999995923070901, iteration: 63252
loss: 0.977547824382782,grad_norm: 0.8315792479085135, iteration: 63253
loss: 1.005816102027893,grad_norm: 0.9999993307577899, iteration: 63254
loss: 1.005138874053955,grad_norm: 0.8054248011464931, iteration: 63255
loss: 0.9525348544120789,grad_norm: 0.9613792563555712, iteration: 63256
loss: 0.9939532279968262,grad_norm: 0.8528542312811379, iteration: 63257
loss: 0.9948423504829407,grad_norm: 0.8068791597190623, iteration: 63258
loss: 1.0094919204711914,grad_norm: 0.9353885211473617, iteration: 63259
loss: 0.9939352869987488,grad_norm: 0.9999992794738829, iteration: 63260
loss: 1.001477599143982,grad_norm: 0.9999992248418859, iteration: 63261
loss: 1.0010713338851929,grad_norm: 0.9999992070749878, iteration: 63262
loss: 1.0025522708892822,grad_norm: 0.9222782308510049, iteration: 63263
loss: 0.9652866125106812,grad_norm: 0.9999991198386484, iteration: 63264
loss: 1.058673620223999,grad_norm: 0.818871152347894, iteration: 63265
loss: 1.011892557144165,grad_norm: 0.9999992657295043, iteration: 63266
loss: 0.9986107349395752,grad_norm: 0.9368899350986842, iteration: 63267
loss: 0.9619077444076538,grad_norm: 0.7976270488234936, iteration: 63268
loss: 0.9881758093833923,grad_norm: 0.9173284134844643, iteration: 63269
loss: 1.019192099571228,grad_norm: 0.9999993921217165, iteration: 63270
loss: 0.9744325280189514,grad_norm: 0.9999994288677476, iteration: 63271
loss: 1.0308674573898315,grad_norm: 0.8860184100607672, iteration: 63272
loss: 1.0433429479599,grad_norm: 0.9999989917352785, iteration: 63273
loss: 1.0111775398254395,grad_norm: 0.7727076394502904, iteration: 63274
loss: 1.0200904607772827,grad_norm: 0.999999186240163, iteration: 63275
loss: 1.0219744443893433,grad_norm: 0.9999991094341377, iteration: 63276
loss: 1.0461333990097046,grad_norm: 0.999999331146021, iteration: 63277
loss: 1.004768967628479,grad_norm: 0.7956917706586547, iteration: 63278
loss: 1.0573341846466064,grad_norm: 0.992691868748722, iteration: 63279
loss: 0.9728440046310425,grad_norm: 0.7892966718047221, iteration: 63280
loss: 1.0688624382019043,grad_norm: 0.9999992621654363, iteration: 63281
loss: 0.9814923405647278,grad_norm: 0.9663334496774034, iteration: 63282
loss: 0.9738597273826599,grad_norm: 0.9263419133724108, iteration: 63283
loss: 0.9852361679077148,grad_norm: 0.971361871474035, iteration: 63284
loss: 1.007078766822815,grad_norm: 0.8102859941387285, iteration: 63285
loss: 1.083626627922058,grad_norm: 0.9999994011515084, iteration: 63286
loss: 1.0077276229858398,grad_norm: 0.7903396204025918, iteration: 63287
loss: 0.9907578229904175,grad_norm: 0.8540236711717288, iteration: 63288
loss: 0.9765036106109619,grad_norm: 0.875164841026692, iteration: 63289
loss: 1.0380101203918457,grad_norm: 0.8438070985522091, iteration: 63290
loss: 1.02251398563385,grad_norm: 0.9999991985574213, iteration: 63291
loss: 1.079579472541809,grad_norm: 0.999999630486065, iteration: 63292
loss: 0.9973958134651184,grad_norm: 0.9188443992018669, iteration: 63293
loss: 0.9838621020317078,grad_norm: 0.7666889348089386, iteration: 63294
loss: 1.0872730016708374,grad_norm: 0.955903129939881, iteration: 63295
loss: 1.057662844657898,grad_norm: 0.9999996969958699, iteration: 63296
loss: 0.9839443564414978,grad_norm: 0.9999990776693575, iteration: 63297
loss: 1.0111477375030518,grad_norm: 0.8803794147764231, iteration: 63298
loss: 1.0301218032836914,grad_norm: 0.998643924599971, iteration: 63299
loss: 0.998443067073822,grad_norm: 0.9999991108849523, iteration: 63300
loss: 1.2575660943984985,grad_norm: 0.9999996272438868, iteration: 63301
loss: 0.976763904094696,grad_norm: 0.9999989803291903, iteration: 63302
loss: 1.0079753398895264,grad_norm: 0.9992471420682526, iteration: 63303
loss: 1.072388768196106,grad_norm: 0.9999996628344674, iteration: 63304
loss: 0.9890420436859131,grad_norm: 0.999998998628167, iteration: 63305
loss: 1.0091630220413208,grad_norm: 0.8827840638768041, iteration: 63306
loss: 0.9613775014877319,grad_norm: 0.9999991682586431, iteration: 63307
loss: 1.0359292030334473,grad_norm: 0.9481849055895893, iteration: 63308
loss: 1.0241152048110962,grad_norm: 0.999999079623969, iteration: 63309
loss: 0.9871288537979126,grad_norm: 0.9519906698771755, iteration: 63310
loss: 1.0081695318222046,grad_norm: 0.9999992253735136, iteration: 63311
loss: 0.9903806447982788,grad_norm: 0.9999991913573925, iteration: 63312
loss: 0.9749881029129028,grad_norm: 0.8917552207267289, iteration: 63313
loss: 1.028784990310669,grad_norm: 0.8546359207410378, iteration: 63314
loss: 1.0013070106506348,grad_norm: 0.8726967399557347, iteration: 63315
loss: 0.9782414436340332,grad_norm: 0.9999990808389504, iteration: 63316
loss: 1.0117689371109009,grad_norm: 0.9999995420429465, iteration: 63317
loss: 1.0705749988555908,grad_norm: 0.9202079634456253, iteration: 63318
loss: 1.0097354650497437,grad_norm: 0.9592815021590462, iteration: 63319
loss: 1.0040398836135864,grad_norm: 0.9999990759162236, iteration: 63320
loss: 0.9704645276069641,grad_norm: 0.8741038584784924, iteration: 63321
loss: 1.0066349506378174,grad_norm: 0.9183070684838123, iteration: 63322
loss: 1.0365478992462158,grad_norm: 0.9899113960957667, iteration: 63323
loss: 0.9931905269622803,grad_norm: 0.9999990323734558, iteration: 63324
loss: 1.0274685621261597,grad_norm: 0.9999992606918949, iteration: 63325
loss: 1.0030579566955566,grad_norm: 0.8097371780592172, iteration: 63326
loss: 1.0181535482406616,grad_norm: 0.8272514155811764, iteration: 63327
loss: 1.0131068229675293,grad_norm: 0.8629718545609246, iteration: 63328
loss: 1.032759428024292,grad_norm: 0.8308572414223229, iteration: 63329
loss: 1.0073894262313843,grad_norm: 0.9069515383896999, iteration: 63330
loss: 0.9876011610031128,grad_norm: 0.8581194074860868, iteration: 63331
loss: 0.9631623029708862,grad_norm: 0.9526677585500601, iteration: 63332
loss: 1.0034252405166626,grad_norm: 0.9999991513326839, iteration: 63333
loss: 1.004095196723938,grad_norm: 0.9632358517183605, iteration: 63334
loss: 1.0250970125198364,grad_norm: 0.9434960583872266, iteration: 63335
loss: 0.9634392261505127,grad_norm: 0.9702354150434244, iteration: 63336
loss: 1.0018631219863892,grad_norm: 0.9999996378357148, iteration: 63337
loss: 1.0016791820526123,grad_norm: 0.8463110805125306, iteration: 63338
loss: 0.989974319934845,grad_norm: 0.9298529818906219, iteration: 63339
loss: 0.9800108075141907,grad_norm: 0.8469200245277314, iteration: 63340
loss: 1.0115185976028442,grad_norm: 0.7842209683074379, iteration: 63341
loss: 1.0105680227279663,grad_norm: 0.9999991827450243, iteration: 63342
loss: 0.9934657216072083,grad_norm: 0.8486232671269726, iteration: 63343
loss: 0.9705508351325989,grad_norm: 0.9249622683980905, iteration: 63344
loss: 0.987597644329071,grad_norm: 0.9371923672845279, iteration: 63345
loss: 1.0468204021453857,grad_norm: 0.9999990155793105, iteration: 63346
loss: 1.0272775888442993,grad_norm: 0.999999160462568, iteration: 63347
loss: 1.0009249448776245,grad_norm: 0.9340516529159733, iteration: 63348
loss: 0.9927501678466797,grad_norm: 0.9178534031904124, iteration: 63349
loss: 1.0174647569656372,grad_norm: 0.9999993394227924, iteration: 63350
loss: 1.030634880065918,grad_norm: 0.8410617943571769, iteration: 63351
loss: 0.989151656627655,grad_norm: 0.7934777975462525, iteration: 63352
loss: 0.999408483505249,grad_norm: 0.8878311135261042, iteration: 63353
loss: 0.9585466384887695,grad_norm: 0.9147610752294855, iteration: 63354
loss: 0.9961370229721069,grad_norm: 0.8711926299763743, iteration: 63355
loss: 1.0093454122543335,grad_norm: 0.7882022646601236, iteration: 63356
loss: 0.9737135171890259,grad_norm: 0.9780658999545695, iteration: 63357
loss: 0.9844735860824585,grad_norm: 0.980933074992499, iteration: 63358
loss: 1.0183334350585938,grad_norm: 0.8735704852982161, iteration: 63359
loss: 1.022875189781189,grad_norm: 0.9478594935994255, iteration: 63360
loss: 1.047965407371521,grad_norm: 0.9999993060922284, iteration: 63361
loss: 1.0112645626068115,grad_norm: 0.9999989248012686, iteration: 63362
loss: 1.0197170972824097,grad_norm: 0.998682238292591, iteration: 63363
loss: 0.9928652048110962,grad_norm: 0.8361124607413734, iteration: 63364
loss: 0.9779532551765442,grad_norm: 0.9999992658464473, iteration: 63365
loss: 1.0309765338897705,grad_norm: 0.8267062116605725, iteration: 63366
loss: 1.0153173208236694,grad_norm: 0.9564887417566129, iteration: 63367
loss: 0.970360279083252,grad_norm: 0.8824607312462526, iteration: 63368
loss: 1.0401307344436646,grad_norm: 0.9567908683631429, iteration: 63369
loss: 1.0223976373672485,grad_norm: 0.9357257525573257, iteration: 63370
loss: 0.9978125691413879,grad_norm: 0.9312646799040083, iteration: 63371
loss: 1.0300631523132324,grad_norm: 0.9599155833151487, iteration: 63372
loss: 1.0021851062774658,grad_norm: 0.9940416509206216, iteration: 63373
loss: 1.025130271911621,grad_norm: 0.7713445880749937, iteration: 63374
loss: 1.002691388130188,grad_norm: 0.7902015733785409, iteration: 63375
loss: 1.0457663536071777,grad_norm: 0.9999992503927362, iteration: 63376
loss: 0.9714357256889343,grad_norm: 0.9999990567032804, iteration: 63377
loss: 1.0071619749069214,grad_norm: 0.7981132178080419, iteration: 63378
loss: 1.007164478302002,grad_norm: 0.9626587565564124, iteration: 63379
loss: 0.9887908697128296,grad_norm: 0.8282001791808793, iteration: 63380
loss: 0.9903519153594971,grad_norm: 0.976604905771088, iteration: 63381
loss: 1.0035954713821411,grad_norm: 0.9151370247737971, iteration: 63382
loss: 1.0134689807891846,grad_norm: 0.9314626603171975, iteration: 63383
loss: 1.006015658378601,grad_norm: 0.9999991261445416, iteration: 63384
loss: 1.1454941034317017,grad_norm: 0.999999317339762, iteration: 63385
loss: 1.0064301490783691,grad_norm: 0.8139877166872618, iteration: 63386
loss: 1.0253920555114746,grad_norm: 0.9318034532209885, iteration: 63387
loss: 0.9985051155090332,grad_norm: 0.9999995967653224, iteration: 63388
loss: 1.024908185005188,grad_norm: 0.9999994476820444, iteration: 63389
loss: 0.9610311985015869,grad_norm: 0.9999992768400889, iteration: 63390
loss: 1.0305339097976685,grad_norm: 0.9486208831435549, iteration: 63391
loss: 0.983640193939209,grad_norm: 0.8746161849207809, iteration: 63392
loss: 1.0110551118850708,grad_norm: 0.999999146865549, iteration: 63393
loss: 1.0111018419265747,grad_norm: 0.9999989582758966, iteration: 63394
loss: 1.0052746534347534,grad_norm: 0.9999991628858136, iteration: 63395
loss: 0.988655149936676,grad_norm: 0.8296211878822765, iteration: 63396
loss: 0.9957190752029419,grad_norm: 0.9999990784365511, iteration: 63397
loss: 1.0200968980789185,grad_norm: 0.9999996166273921, iteration: 63398
loss: 0.9615141153335571,grad_norm: 0.9596756962795501, iteration: 63399
loss: 1.0214637517929077,grad_norm: 0.9999991790440373, iteration: 63400
loss: 1.016844630241394,grad_norm: 0.9999992875403939, iteration: 63401
loss: 1.0362094640731812,grad_norm: 0.9999991170801233, iteration: 63402
loss: 0.9861406087875366,grad_norm: 0.9999990201822555, iteration: 63403
loss: 1.0246294736862183,grad_norm: 0.999999194980974, iteration: 63404
loss: 0.993028998374939,grad_norm: 0.957389716177167, iteration: 63405
loss: 1.0006874799728394,grad_norm: 0.9783245349115091, iteration: 63406
loss: 1.000879168510437,grad_norm: 0.9555994340763465, iteration: 63407
loss: 0.9710673093795776,grad_norm: 0.9906843840152014, iteration: 63408
loss: 1.00789213180542,grad_norm: 0.9999991986397202, iteration: 63409
loss: 1.0101337432861328,grad_norm: 0.9110710448248942, iteration: 63410
loss: 1.018352746963501,grad_norm: 0.7599760689166852, iteration: 63411
loss: 0.9966720342636108,grad_norm: 0.932948857007365, iteration: 63412
loss: 0.9881062507629395,grad_norm: 0.9999991916465012, iteration: 63413
loss: 1.0649675130844116,grad_norm: 0.9999995504898956, iteration: 63414
loss: 0.9881588816642761,grad_norm: 0.817332882231302, iteration: 63415
loss: 0.9960654973983765,grad_norm: 0.8149909454516471, iteration: 63416
loss: 0.9827153086662292,grad_norm: 0.8095506717642899, iteration: 63417
loss: 0.9645273089408875,grad_norm: 0.9999991137382096, iteration: 63418
loss: 1.0047509670257568,grad_norm: 0.9999991083718973, iteration: 63419
loss: 1.0730903148651123,grad_norm: 0.9999991392806612, iteration: 63420
loss: 1.0058581829071045,grad_norm: 0.9451792998157987, iteration: 63421
loss: 1.0116822719573975,grad_norm: 0.933405487583386, iteration: 63422
loss: 1.0806572437286377,grad_norm: 0.8398729180908807, iteration: 63423
loss: 0.9900687336921692,grad_norm: 0.9999991493844004, iteration: 63424
loss: 0.9688712954521179,grad_norm: 0.8301331206443845, iteration: 63425
loss: 1.0413446426391602,grad_norm: 0.9999997696269264, iteration: 63426
loss: 1.047696828842163,grad_norm: 0.9999998325227027, iteration: 63427
loss: 0.9840124249458313,grad_norm: 0.9544348951396967, iteration: 63428
loss: 1.0114942789077759,grad_norm: 0.8314832380429247, iteration: 63429
loss: 0.9997222423553467,grad_norm: 0.9928309480999999, iteration: 63430
loss: 0.9943264722824097,grad_norm: 0.9999990002743421, iteration: 63431
loss: 0.9894443154335022,grad_norm: 0.7583466204831439, iteration: 63432
loss: 0.9813408851623535,grad_norm: 0.8431901617307254, iteration: 63433
loss: 0.9606513381004333,grad_norm: 0.9134889153801049, iteration: 63434
loss: 1.0088253021240234,grad_norm: 0.9999992859166229, iteration: 63435
loss: 1.013909101486206,grad_norm: 0.9999994585259265, iteration: 63436
loss: 1.045585036277771,grad_norm: 0.999999086540028, iteration: 63437
loss: 1.0103833675384521,grad_norm: 0.9999990873570386, iteration: 63438
loss: 1.0068645477294922,grad_norm: 0.9469130568123184, iteration: 63439
loss: 0.9692685008049011,grad_norm: 0.8480741415074622, iteration: 63440
loss: 1.0249049663543701,grad_norm: 0.9999989565879771, iteration: 63441
loss: 0.9878987669944763,grad_norm: 0.9865989728655666, iteration: 63442
loss: 0.9792659282684326,grad_norm: 0.8418211949771625, iteration: 63443
loss: 1.0239911079406738,grad_norm: 0.9296260148517819, iteration: 63444
loss: 1.0006519556045532,grad_norm: 0.9108602540852414, iteration: 63445
loss: 1.064038634300232,grad_norm: 0.999999072029321, iteration: 63446
loss: 0.9894057512283325,grad_norm: 0.9999998700714623, iteration: 63447
loss: 0.9555518627166748,grad_norm: 0.9999991997070906, iteration: 63448
loss: 0.9505534768104553,grad_norm: 0.9714240627396538, iteration: 63449
loss: 0.9921461939811707,grad_norm: 0.824062846536452, iteration: 63450
loss: 0.9960350394248962,grad_norm: 0.9292021955589529, iteration: 63451
loss: 1.0390338897705078,grad_norm: 0.999999843178739, iteration: 63452
loss: 1.0066334009170532,grad_norm: 0.9999989961995203, iteration: 63453
loss: 0.9666843414306641,grad_norm: 0.9262787448094719, iteration: 63454
loss: 1.0410927534103394,grad_norm: 0.9542203475288419, iteration: 63455
loss: 0.9982167482376099,grad_norm: 0.9999998095850821, iteration: 63456
loss: 1.0003833770751953,grad_norm: 0.8808740306905765, iteration: 63457
loss: 0.9931444525718689,grad_norm: 0.8381093803031641, iteration: 63458
loss: 0.9748169779777527,grad_norm: 0.9999991034809134, iteration: 63459
loss: 0.9970254302024841,grad_norm: 0.9999994224234581, iteration: 63460
loss: 1.0381739139556885,grad_norm: 0.9999992499231473, iteration: 63461
loss: 1.0115467309951782,grad_norm: 0.9647397474692656, iteration: 63462
loss: 0.9942663908004761,grad_norm: 0.9087172094442181, iteration: 63463
loss: 1.017542839050293,grad_norm: 0.9999991228232388, iteration: 63464
loss: 1.0113863945007324,grad_norm: 0.8548822715894697, iteration: 63465
loss: 0.9938254952430725,grad_norm: 0.9046572047154084, iteration: 63466
loss: 1.0610917806625366,grad_norm: 0.9999997639597925, iteration: 63467
loss: 1.0541434288024902,grad_norm: 0.9999994229677858, iteration: 63468
loss: 0.9835986495018005,grad_norm: 0.9999991585856822, iteration: 63469
loss: 0.9976269006729126,grad_norm: 0.9648317871389981, iteration: 63470
loss: 1.0084881782531738,grad_norm: 0.9999995941201347, iteration: 63471
loss: 1.032930612564087,grad_norm: 0.9999992989475525, iteration: 63472
loss: 1.0355480909347534,grad_norm: 1.0000000709342614, iteration: 63473
loss: 1.0526753664016724,grad_norm: 0.9264418768210141, iteration: 63474
loss: 1.1374726295471191,grad_norm: 0.9999990893712841, iteration: 63475
loss: 0.9912874102592468,grad_norm: 0.8873382437761989, iteration: 63476
loss: 0.9980257749557495,grad_norm: 0.894548533548568, iteration: 63477
loss: 1.0171679258346558,grad_norm: 0.8226518476192981, iteration: 63478
loss: 1.0681387186050415,grad_norm: 0.9999990518907687, iteration: 63479
loss: 1.016325831413269,grad_norm: 0.9999997985783134, iteration: 63480
loss: 1.0012447834014893,grad_norm: 0.9999990117755831, iteration: 63481
loss: 1.008475422859192,grad_norm: 0.9803356954626465, iteration: 63482
loss: 0.9850010275840759,grad_norm: 0.9999997705058796, iteration: 63483
loss: 0.9925267696380615,grad_norm: 0.9481829658661668, iteration: 63484
loss: 0.972474992275238,grad_norm: 0.8553543429805257, iteration: 63485
loss: 0.980027437210083,grad_norm: 0.9187586496128155, iteration: 63486
loss: 0.9893642663955688,grad_norm: 0.9905901405829982, iteration: 63487
loss: 1.0087285041809082,grad_norm: 0.9999991237836412, iteration: 63488
loss: 1.0039714574813843,grad_norm: 0.9617910903841409, iteration: 63489
loss: 0.9885225892066956,grad_norm: 0.9999994423616029, iteration: 63490
loss: 0.9730513691902161,grad_norm: 0.9615339234128587, iteration: 63491
loss: 1.0397652387619019,grad_norm: 0.9375371821978692, iteration: 63492
loss: 1.00911283493042,grad_norm: 0.9999991363330263, iteration: 63493
loss: 0.9972416758537292,grad_norm: 0.9512759691905126, iteration: 63494
loss: 1.0588222742080688,grad_norm: 0.999999134534899, iteration: 63495
loss: 1.0154520273208618,grad_norm: 0.9999998707462284, iteration: 63496
loss: 1.0024948120117188,grad_norm: 0.8232976770923709, iteration: 63497
loss: 1.023573875427246,grad_norm: 0.8892402203155709, iteration: 63498
loss: 1.035306453704834,grad_norm: 0.9999989831672575, iteration: 63499
loss: 0.9544475674629211,grad_norm: 0.9916094451835474, iteration: 63500
loss: 1.0100985765457153,grad_norm: 0.8275028254001556, iteration: 63501
loss: 0.9719113111495972,grad_norm: 0.9204392366092498, iteration: 63502
loss: 1.0398253202438354,grad_norm: 0.9999994823486695, iteration: 63503
loss: 1.0011930465698242,grad_norm: 0.8770987134392761, iteration: 63504
loss: 1.0429607629776,grad_norm: 0.9999990680664328, iteration: 63505
loss: 0.9930970072746277,grad_norm: 0.9999992109912591, iteration: 63506
loss: 0.9636640548706055,grad_norm: 0.8789931756287853, iteration: 63507
loss: 1.0388574600219727,grad_norm: 0.9562539643417494, iteration: 63508
loss: 0.9593151211738586,grad_norm: 0.9999992455250957, iteration: 63509
loss: 0.9681323170661926,grad_norm: 0.9999991203873607, iteration: 63510
loss: 1.044822335243225,grad_norm: 0.9999994862194586, iteration: 63511
loss: 0.9880131483078003,grad_norm: 0.8339595137193372, iteration: 63512
loss: 1.0222941637039185,grad_norm: 0.9999991505928325, iteration: 63513
loss: 0.98963463306427,grad_norm: 0.9646773670632542, iteration: 63514
loss: 1.0418709516525269,grad_norm: 0.9292714702877393, iteration: 63515
loss: 1.0307248830795288,grad_norm: 0.999999343295527, iteration: 63516
loss: 1.159278392791748,grad_norm: 0.99999931778999, iteration: 63517
loss: 1.0765178203582764,grad_norm: 0.9999997519068556, iteration: 63518
loss: 1.0070163011550903,grad_norm: 0.905935137934237, iteration: 63519
loss: 1.0056651830673218,grad_norm: 0.9999991003461782, iteration: 63520
loss: 1.089566707611084,grad_norm: 0.9424040513086618, iteration: 63521
loss: 0.9968358278274536,grad_norm: 0.9999992149426361, iteration: 63522
loss: 0.9626784324645996,grad_norm: 0.9999994069505304, iteration: 63523
loss: 0.9966427683830261,grad_norm: 0.9999990787550437, iteration: 63524
loss: 1.0006166696548462,grad_norm: 0.9999994913733691, iteration: 63525
loss: 1.0143901109695435,grad_norm: 0.9899016083881284, iteration: 63526
loss: 0.9795272350311279,grad_norm: 0.9999990663890869, iteration: 63527
loss: 1.027503252029419,grad_norm: 0.9732067205296081, iteration: 63528
loss: 1.0231295824050903,grad_norm: 0.9999995018971248, iteration: 63529
loss: 1.0271891355514526,grad_norm: 0.9123597906346094, iteration: 63530
loss: 1.0046534538269043,grad_norm: 0.983089998364637, iteration: 63531
loss: 1.0641371011734009,grad_norm: 0.9999992970977811, iteration: 63532
loss: 1.052164077758789,grad_norm: 0.9999993918487055, iteration: 63533
loss: 1.035058617591858,grad_norm: 0.999999710134314, iteration: 63534
loss: 1.0409742593765259,grad_norm: 0.9221941376342183, iteration: 63535
loss: 0.974567711353302,grad_norm: 0.9999992804628873, iteration: 63536
loss: 0.9655306935310364,grad_norm: 0.8140463371039172, iteration: 63537
loss: 1.0403320789337158,grad_norm: 0.9662100488896785, iteration: 63538
loss: 0.989490270614624,grad_norm: 0.8212590962901792, iteration: 63539
loss: 0.992153525352478,grad_norm: 0.9999991412665818, iteration: 63540
loss: 1.0291727781295776,grad_norm: 0.9999992584426758, iteration: 63541
loss: 1.0158506631851196,grad_norm: 0.999998970540597, iteration: 63542
loss: 1.0329617261886597,grad_norm: 0.8211615049341238, iteration: 63543
loss: 1.0157039165496826,grad_norm: 0.8825223288991828, iteration: 63544
loss: 1.0242063999176025,grad_norm: 0.8210437026445933, iteration: 63545
loss: 1.0037999153137207,grad_norm: 0.8651637179476694, iteration: 63546
loss: 0.9925656318664551,grad_norm: 0.9999992062909818, iteration: 63547
loss: 1.107045292854309,grad_norm: 0.9266227126084056, iteration: 63548
loss: 1.0204553604125977,grad_norm: 0.9999990945919739, iteration: 63549
loss: 1.0141701698303223,grad_norm: 0.9599648909877512, iteration: 63550
loss: 1.054025411605835,grad_norm: 0.9999996554247357, iteration: 63551
loss: 1.0044362545013428,grad_norm: 0.9122078147803436, iteration: 63552
loss: 0.9880695343017578,grad_norm: 0.8565372943494526, iteration: 63553
loss: 1.0282708406448364,grad_norm: 0.9999991303981282, iteration: 63554
loss: 1.0479917526245117,grad_norm: 0.9999996820654344, iteration: 63555
loss: 1.0304460525512695,grad_norm: 0.9392840764377588, iteration: 63556
loss: 1.032958984375,grad_norm: 0.9039153241471359, iteration: 63557
loss: 0.9990956783294678,grad_norm: 0.9999992262619523, iteration: 63558
loss: 1.0278557538986206,grad_norm: 0.9999998995170408, iteration: 63559
loss: 0.9973036050796509,grad_norm: 0.9999991322486222, iteration: 63560
loss: 1.0198826789855957,grad_norm: 0.9999989658544224, iteration: 63561
loss: 1.0267771482467651,grad_norm: 0.9584074549678512, iteration: 63562
loss: 0.9795679450035095,grad_norm: 0.8422857722527921, iteration: 63563
loss: 1.030876874923706,grad_norm: 0.9597746941274478, iteration: 63564
loss: 0.9955260157585144,grad_norm: 0.8882257324068937, iteration: 63565
loss: 1.0109635591506958,grad_norm: 0.9999992947836859, iteration: 63566
loss: 0.9962770938873291,grad_norm: 0.9999990718033732, iteration: 63567
loss: 0.9938072562217712,grad_norm: 0.8742759239583611, iteration: 63568
loss: 1.0460745096206665,grad_norm: 0.9999989921873739, iteration: 63569
loss: 0.9811232089996338,grad_norm: 0.7746108564297662, iteration: 63570
loss: 0.9973254799842834,grad_norm: 0.9999989732729961, iteration: 63571
loss: 0.9807462692260742,grad_norm: 0.8046827248309745, iteration: 63572
loss: 1.0317634344100952,grad_norm: 0.9999994608794832, iteration: 63573
loss: 1.0347105264663696,grad_norm: 0.9999995085165446, iteration: 63574
loss: 1.0117398500442505,grad_norm: 0.8920234307873613, iteration: 63575
loss: 0.9903053641319275,grad_norm: 0.9999991368879396, iteration: 63576
loss: 1.0219576358795166,grad_norm: 0.9999991778992131, iteration: 63577
loss: 1.0198110342025757,grad_norm: 0.999999646992233, iteration: 63578
loss: 1.003949761390686,grad_norm: 0.999999087336341, iteration: 63579
loss: 0.991454005241394,grad_norm: 0.8429653469729969, iteration: 63580
loss: 0.9742706418037415,grad_norm: 0.9999990216288318, iteration: 63581
loss: 1.0058794021606445,grad_norm: 0.945142760266309, iteration: 63582
loss: 0.9804035425186157,grad_norm: 0.99999915108746, iteration: 63583
loss: 0.9817477464675903,grad_norm: 0.8847809967065875, iteration: 63584
loss: 1.0178216695785522,grad_norm: 0.9999999077309856, iteration: 63585
loss: 0.9977599382400513,grad_norm: 0.9999991786343981, iteration: 63586
loss: 1.0059733390808105,grad_norm: 0.9999997363281554, iteration: 63587
loss: 1.21761953830719,grad_norm: 0.999999761909886, iteration: 63588
loss: 0.9436239004135132,grad_norm: 0.9999993054016639, iteration: 63589
loss: 0.9772184491157532,grad_norm: 0.9721792127428244, iteration: 63590
loss: 1.0156038999557495,grad_norm: 0.9999992033917204, iteration: 63591
loss: 1.0782893896102905,grad_norm: 0.9999991121785784, iteration: 63592
loss: 1.0311503410339355,grad_norm: 0.9999991901975915, iteration: 63593
loss: 1.023848295211792,grad_norm: 0.999999422362768, iteration: 63594
loss: 1.0054701566696167,grad_norm: 0.9999991729369285, iteration: 63595
loss: 1.1229362487792969,grad_norm: 0.9999997405019023, iteration: 63596
loss: 1.0167430639266968,grad_norm: 0.9999989348967153, iteration: 63597
loss: 1.011089563369751,grad_norm: 0.8232539464031835, iteration: 63598
loss: 1.0132325887680054,grad_norm: 0.8827598202357043, iteration: 63599
loss: 0.9698700904846191,grad_norm: 0.9999991341148092, iteration: 63600
loss: 1.0974159240722656,grad_norm: 0.9983191020470794, iteration: 63601
loss: 1.180468201637268,grad_norm: 0.9999990490043797, iteration: 63602
loss: 1.0164852142333984,grad_norm: 0.9532780021961568, iteration: 63603
loss: 0.9962417483329773,grad_norm: 0.9999992902772791, iteration: 63604
loss: 1.0017718076705933,grad_norm: 0.8501363313447123, iteration: 63605
loss: 0.9757465124130249,grad_norm: 0.8906036679596906, iteration: 63606
loss: 0.9964792132377625,grad_norm: 0.999999178229447, iteration: 63607
loss: 0.9941611289978027,grad_norm: 0.942984078659698, iteration: 63608
loss: 0.9861167073249817,grad_norm: 0.880125373945861, iteration: 63609
loss: 1.0337291955947876,grad_norm: 0.9999993331483968, iteration: 63610
loss: 0.9661697745323181,grad_norm: 0.9967337285874796, iteration: 63611
loss: 1.0109626054763794,grad_norm: 0.9999997659449378, iteration: 63612
loss: 0.963849663734436,grad_norm: 0.854944575974203, iteration: 63613
loss: 0.9992536902427673,grad_norm: 0.9795174025556153, iteration: 63614
loss: 1.0769639015197754,grad_norm: 0.8727379696018663, iteration: 63615
loss: 0.9555659294128418,grad_norm: 0.9999992082639177, iteration: 63616
loss: 1.0196071863174438,grad_norm: 0.9999990612623392, iteration: 63617
loss: 1.0493888854980469,grad_norm: 0.9180238335071413, iteration: 63618
loss: 1.0230629444122314,grad_norm: 0.9999999080136176, iteration: 63619
loss: 1.064241647720337,grad_norm: 0.9999994785501889, iteration: 63620
loss: 0.9956451654434204,grad_norm: 0.9999990404681806, iteration: 63621
loss: 1.0120059251785278,grad_norm: 0.9850265204501117, iteration: 63622
loss: 0.9703109264373779,grad_norm: 0.9999991501547215, iteration: 63623
loss: 0.9917580485343933,grad_norm: 0.8745597022499063, iteration: 63624
loss: 1.0495034456253052,grad_norm: 0.9999992376450331, iteration: 63625
loss: 1.0149857997894287,grad_norm: 0.9999989800955434, iteration: 63626
loss: 1.0035663843154907,grad_norm: 0.9145552077437668, iteration: 63627
loss: 1.0043169260025024,grad_norm: 0.8252881547209379, iteration: 63628
loss: 1.081321120262146,grad_norm: 0.999999440440114, iteration: 63629
loss: 1.0330209732055664,grad_norm: 0.9999996507567972, iteration: 63630
loss: 0.9799741506576538,grad_norm: 0.9000327775269226, iteration: 63631
loss: 0.9710066318511963,grad_norm: 0.8869511760475574, iteration: 63632
loss: 1.0241001844406128,grad_norm: 0.9999992813251358, iteration: 63633
loss: 1.0342931747436523,grad_norm: 0.9999993597319867, iteration: 63634
loss: 0.9800494909286499,grad_norm: 0.9999990888188501, iteration: 63635
loss: 0.995846688747406,grad_norm: 0.9412256918924558, iteration: 63636
loss: 0.9771215319633484,grad_norm: 0.955193795467027, iteration: 63637
loss: 1.0021344423294067,grad_norm: 0.9999993898462499, iteration: 63638
loss: 0.9927117824554443,grad_norm: 0.9474924902538009, iteration: 63639
loss: 1.0126245021820068,grad_norm: 0.8724533566224973, iteration: 63640
loss: 0.9886533617973328,grad_norm: 0.8460479372534698, iteration: 63641
loss: 0.9900768995285034,grad_norm: 0.9999991263494741, iteration: 63642
loss: 1.0125058889389038,grad_norm: 0.7472625966790952, iteration: 63643
loss: 1.0031968355178833,grad_norm: 0.9370821216491146, iteration: 63644
loss: 0.9531323909759521,grad_norm: 0.8961291838718353, iteration: 63645
loss: 1.0435541868209839,grad_norm: 0.9999991632642717, iteration: 63646
loss: 1.0119487047195435,grad_norm: 0.999999125148014, iteration: 63647
loss: 1.0182580947875977,grad_norm: 0.9999994614903617, iteration: 63648
loss: 1.0286248922348022,grad_norm: 0.9478632478749031, iteration: 63649
loss: 0.9847415089607239,grad_norm: 0.8153823985056242, iteration: 63650
loss: 1.0005900859832764,grad_norm: 0.9999781018758114, iteration: 63651
loss: 1.0148444175720215,grad_norm: 0.9999990871061543, iteration: 63652
loss: 0.984002411365509,grad_norm: 0.9470447074198425, iteration: 63653
loss: 0.9508828520774841,grad_norm: 0.8104558157377338, iteration: 63654
loss: 1.011970043182373,grad_norm: 0.9284574318473929, iteration: 63655
loss: 0.9963318109512329,grad_norm: 0.8980489560381122, iteration: 63656
loss: 0.9823219776153564,grad_norm: 0.9999992414194103, iteration: 63657
loss: 0.9907721281051636,grad_norm: 0.9999992866639944, iteration: 63658
loss: 1.004035234451294,grad_norm: 0.9288319880642891, iteration: 63659
loss: 1.024323582649231,grad_norm: 0.9999992090105542, iteration: 63660
loss: 1.0596929788589478,grad_norm: 0.999999506594064, iteration: 63661
loss: 1.0256428718566895,grad_norm: 0.9445690335103409, iteration: 63662
loss: 0.9706631898880005,grad_norm: 0.8254872851078823, iteration: 63663
loss: 0.9549718499183655,grad_norm: 0.9454045599695069, iteration: 63664
loss: 1.0205302238464355,grad_norm: 0.8406668903844308, iteration: 63665
loss: 0.9628558158874512,grad_norm: 0.9379002458431666, iteration: 63666
loss: 1.0113184452056885,grad_norm: 0.9999994824857021, iteration: 63667
loss: 1.0810976028442383,grad_norm: 0.9375736095659976, iteration: 63668
loss: 0.9800624251365662,grad_norm: 0.9206254562958616, iteration: 63669
loss: 0.9531536102294922,grad_norm: 0.9710816484538305, iteration: 63670
loss: 1.02957284450531,grad_norm: 0.8573201075183121, iteration: 63671
loss: 1.0400019884109497,grad_norm: 0.9999990646055054, iteration: 63672
loss: 1.0051075220108032,grad_norm: 0.9208548974965614, iteration: 63673
loss: 1.0047314167022705,grad_norm: 0.9999995690575055, iteration: 63674
loss: 1.003220796585083,grad_norm: 0.87137126159113, iteration: 63675
loss: 1.0209922790527344,grad_norm: 0.9999996721362963, iteration: 63676
loss: 0.9823151230812073,grad_norm: 0.999998994344619, iteration: 63677
loss: 1.0341789722442627,grad_norm: 0.9514420322136363, iteration: 63678
loss: 0.9779666662216187,grad_norm: 0.999999203638946, iteration: 63679
loss: 1.0099197626113892,grad_norm: 0.9999991052820388, iteration: 63680
loss: 1.0212517976760864,grad_norm: 0.9999993212304127, iteration: 63681
loss: 1.0388928651809692,grad_norm: 0.9999999102158728, iteration: 63682
loss: 0.9933611154556274,grad_norm: 0.8670388646965924, iteration: 63683
loss: 1.0339866876602173,grad_norm: 0.9999993328370154, iteration: 63684
loss: 1.0203588008880615,grad_norm: 0.9609503418489171, iteration: 63685
loss: 0.988041341304779,grad_norm: 0.9999993089381658, iteration: 63686
loss: 0.9789235591888428,grad_norm: 0.9458427093750748, iteration: 63687
loss: 1.0074063539505005,grad_norm: 0.9309870891996841, iteration: 63688
loss: 0.9855388402938843,grad_norm: 0.971751180155804, iteration: 63689
loss: 0.9672852158546448,grad_norm: 0.9202174278568739, iteration: 63690
loss: 0.9928449392318726,grad_norm: 0.960183978914303, iteration: 63691
loss: 0.9875495433807373,grad_norm: 0.9999991430623631, iteration: 63692
loss: 1.0231032371520996,grad_norm: 0.8747714787215691, iteration: 63693
loss: 1.0241131782531738,grad_norm: 0.9999991765342725, iteration: 63694
loss: 0.996707558631897,grad_norm: 0.8585365070152837, iteration: 63695
loss: 0.9951865077018738,grad_norm: 0.8704405132320132, iteration: 63696
loss: 1.0133925676345825,grad_norm: 0.9999991508775653, iteration: 63697
loss: 1.0048149824142456,grad_norm: 0.9442423895810016, iteration: 63698
loss: 1.041736364364624,grad_norm: 0.9999995639200225, iteration: 63699
loss: 1.030763864517212,grad_norm: 0.9999990898374768, iteration: 63700
loss: 1.057541847229004,grad_norm: 0.9999995449466245, iteration: 63701
loss: 0.9856053590774536,grad_norm: 0.9069077515272969, iteration: 63702
loss: 1.0188426971435547,grad_norm: 0.8277667455429899, iteration: 63703
loss: 1.0257381200790405,grad_norm: 0.884076895339445, iteration: 63704
loss: 0.9857339262962341,grad_norm: 0.9251560358907848, iteration: 63705
loss: 0.9865195751190186,grad_norm: 0.8521539127563267, iteration: 63706
loss: 1.016028881072998,grad_norm: 0.9999990230277127, iteration: 63707
loss: 1.001691222190857,grad_norm: 0.9956374827933531, iteration: 63708
loss: 1.038454532623291,grad_norm: 0.9610108055382031, iteration: 63709
loss: 0.9816405177116394,grad_norm: 0.9999991500015329, iteration: 63710
loss: 0.9950556755065918,grad_norm: 0.8674767828998055, iteration: 63711
loss: 0.9713162183761597,grad_norm: 0.9747163072776125, iteration: 63712
loss: 0.9930316209793091,grad_norm: 0.9929874181273655, iteration: 63713
loss: 1.0169624090194702,grad_norm: 0.9183217364361795, iteration: 63714
loss: 0.9974433779716492,grad_norm: 0.9063805421070792, iteration: 63715
loss: 0.9795083999633789,grad_norm: 0.9999991650787653, iteration: 63716
loss: 1.00837242603302,grad_norm: 0.9332331882597011, iteration: 63717
loss: 0.9860557317733765,grad_norm: 0.9999990494294336, iteration: 63718
loss: 0.9848044514656067,grad_norm: 0.9999999257337687, iteration: 63719
loss: 0.9622166156768799,grad_norm: 0.9999990648561271, iteration: 63720
loss: 0.9895925521850586,grad_norm: 0.999999280745919, iteration: 63721
loss: 1.0123530626296997,grad_norm: 0.8669673950744707, iteration: 63722
loss: 1.0153167247772217,grad_norm: 0.8878383225639161, iteration: 63723
loss: 0.9988769292831421,grad_norm: 0.9199759377465867, iteration: 63724
loss: 0.9802643656730652,grad_norm: 0.9999992583299911, iteration: 63725
loss: 1.039549708366394,grad_norm: 0.9999991276176641, iteration: 63726
loss: 0.9917656183242798,grad_norm: 0.9999990104751392, iteration: 63727
loss: 0.9747375249862671,grad_norm: 0.9999994765447033, iteration: 63728
loss: 0.9912279844284058,grad_norm: 0.9999991843703547, iteration: 63729
loss: 0.9992809891700745,grad_norm: 0.8459860001053632, iteration: 63730
loss: 1.0559043884277344,grad_norm: 0.9999991124433911, iteration: 63731
loss: 0.99857497215271,grad_norm: 0.9541467566935664, iteration: 63732
loss: 0.9921180605888367,grad_norm: 0.9999998652529344, iteration: 63733
loss: 0.9884337782859802,grad_norm: 0.7595716349263939, iteration: 63734
loss: 1.0267438888549805,grad_norm: 0.99999991941289, iteration: 63735
loss: 1.00359046459198,grad_norm: 0.8819257119674958, iteration: 63736
loss: 1.0094456672668457,grad_norm: 0.9999991986622814, iteration: 63737
loss: 1.0577913522720337,grad_norm: 0.9999991820166353, iteration: 63738
loss: 0.9894334673881531,grad_norm: 0.8526416649797305, iteration: 63739
loss: 1.003105640411377,grad_norm: 0.9379560211455538, iteration: 63740
loss: 1.1578847169876099,grad_norm: 0.8083203752192539, iteration: 63741
loss: 0.9952347278594971,grad_norm: 0.9973301950994687, iteration: 63742
loss: 0.9889660477638245,grad_norm: 0.999999118910391, iteration: 63743
loss: 1.0210063457489014,grad_norm: 0.999999225407324, iteration: 63744
loss: 0.9915306568145752,grad_norm: 0.9999992913298361, iteration: 63745
loss: 0.969682514667511,grad_norm: 0.9128241861220108, iteration: 63746
loss: 0.9897286295890808,grad_norm: 0.9555775853548203, iteration: 63747
loss: 1.0148065090179443,grad_norm: 0.9999991177255864, iteration: 63748
loss: 0.9764474034309387,grad_norm: 0.9210255476125423, iteration: 63749
loss: 1.0130053758621216,grad_norm: 0.8113347782209279, iteration: 63750
loss: 1.0326217412948608,grad_norm: 0.9249028604853018, iteration: 63751
loss: 1.075918197631836,grad_norm: 0.8403324623083555, iteration: 63752
loss: 1.0150457620620728,grad_norm: 0.9999991037633122, iteration: 63753
loss: 1.014798641204834,grad_norm: 0.9865291032426856, iteration: 63754
loss: 1.0012164115905762,grad_norm: 0.9999990844513358, iteration: 63755
loss: 1.0010424852371216,grad_norm: 0.8354050360067428, iteration: 63756
loss: 1.0214993953704834,grad_norm: 0.920009917920392, iteration: 63757
loss: 1.000069260597229,grad_norm: 0.928738442355281, iteration: 63758
loss: 0.9948749542236328,grad_norm: 0.9488052952562459, iteration: 63759
loss: 0.9685681462287903,grad_norm: 0.9320329851806937, iteration: 63760
loss: 1.0200293064117432,grad_norm: 0.9507022490501918, iteration: 63761
loss: 1.017088532447815,grad_norm: 0.999999145950937, iteration: 63762
loss: 0.999544620513916,grad_norm: 0.7626092885656914, iteration: 63763
loss: 0.9878482222557068,grad_norm: 0.7610698752052756, iteration: 63764
loss: 0.9487044811248779,grad_norm: 0.8705746004779538, iteration: 63765
loss: 1.0238127708435059,grad_norm: 0.9999995128720832, iteration: 63766
loss: 0.9789168834686279,grad_norm: 0.9401150450286322, iteration: 63767
loss: 1.0352561473846436,grad_norm: 0.9999994119608795, iteration: 63768
loss: 1.0091731548309326,grad_norm: 0.8448368234418482, iteration: 63769
loss: 0.9798172116279602,grad_norm: 0.8537070054282101, iteration: 63770
loss: 0.9895225763320923,grad_norm: 0.8492835838379664, iteration: 63771
loss: 1.0152266025543213,grad_norm: 0.9999991526745122, iteration: 63772
loss: 1.0723830461502075,grad_norm: 0.999999559586003, iteration: 63773
loss: 0.9943215250968933,grad_norm: 0.9016801273225953, iteration: 63774
loss: 0.9984939694404602,grad_norm: 0.9553614521912789, iteration: 63775
loss: 1.0021780729293823,grad_norm: 0.999999084187307, iteration: 63776
loss: 1.0526443719863892,grad_norm: 0.9357515898661602, iteration: 63777
loss: 0.9810724258422852,grad_norm: 0.9999989942572355, iteration: 63778
loss: 0.992432713508606,grad_norm: 0.8327700106086056, iteration: 63779
loss: 0.9711478352546692,grad_norm: 0.9118776986383125, iteration: 63780
loss: 1.0241092443466187,grad_norm: 0.9999994687205197, iteration: 63781
loss: 0.9955160021781921,grad_norm: 0.9094687392439063, iteration: 63782
loss: 1.0286222696304321,grad_norm: 0.999998991870574, iteration: 63783
loss: 1.0040738582611084,grad_norm: 0.9899871938170594, iteration: 63784
loss: 0.9691239595413208,grad_norm: 0.9272440843899137, iteration: 63785
loss: 0.9832951426506042,grad_norm: 0.8683004007731785, iteration: 63786
loss: 1.0455081462860107,grad_norm: 0.9999993302633443, iteration: 63787
loss: 1.0179803371429443,grad_norm: 0.9999994728661317, iteration: 63788
loss: 1.0066620111465454,grad_norm: 0.9406016559472757, iteration: 63789
loss: 0.9893359541893005,grad_norm: 0.9618779286571854, iteration: 63790
loss: 1.0905818939208984,grad_norm: 0.9999996074895491, iteration: 63791
loss: 0.9624351859092712,grad_norm: 0.8389694574723349, iteration: 63792
loss: 1.0506666898727417,grad_norm: 0.9999993891940648, iteration: 63793
loss: 0.9918827414512634,grad_norm: 0.9999997865611989, iteration: 63794
loss: 1.0160267353057861,grad_norm: 0.9038228540562171, iteration: 63795
loss: 1.0073899030685425,grad_norm: 0.9678401242663859, iteration: 63796
loss: 1.035935640335083,grad_norm: 0.9078011747226824, iteration: 63797
loss: 1.0344645977020264,grad_norm: 0.9999993757537029, iteration: 63798
loss: 1.0033303499221802,grad_norm: 0.9896155676642132, iteration: 63799
loss: 1.0061465501785278,grad_norm: 0.9999990781198059, iteration: 63800
loss: 0.9678490161895752,grad_norm: 0.7768132808776971, iteration: 63801
loss: 1.058911681175232,grad_norm: 0.9407029442301852, iteration: 63802
loss: 1.0128059387207031,grad_norm: 0.8875271084436781, iteration: 63803
loss: 1.01260507106781,grad_norm: 0.9390173899192108, iteration: 63804
loss: 1.0148478746414185,grad_norm: 0.8658873382125755, iteration: 63805
loss: 1.0166112184524536,grad_norm: 0.9999990082257639, iteration: 63806
loss: 1.0045851469039917,grad_norm: 0.8946560914168974, iteration: 63807
loss: 1.0040314197540283,grad_norm: 0.9587878435943418, iteration: 63808
loss: 1.0355055332183838,grad_norm: 0.999999465618973, iteration: 63809
loss: 1.0178879499435425,grad_norm: 0.7508283331443529, iteration: 63810
loss: 0.971403181552887,grad_norm: 0.9271458794919386, iteration: 63811
loss: 0.9654492735862732,grad_norm: 0.9999992897977682, iteration: 63812
loss: 0.9785237312316895,grad_norm: 0.8721614104907417, iteration: 63813
loss: 1.0253769159317017,grad_norm: 0.9999995225847055, iteration: 63814
loss: 0.9580182433128357,grad_norm: 0.9194922347286527, iteration: 63815
loss: 0.9723319411277771,grad_norm: 0.9999992930169364, iteration: 63816
loss: 0.9559771418571472,grad_norm: 0.8402275195782993, iteration: 63817
loss: 1.0340293645858765,grad_norm: 0.8733871828962565, iteration: 63818
loss: 0.9874939918518066,grad_norm: 0.8513987065668784, iteration: 63819
loss: 1.0187277793884277,grad_norm: 0.9999993128247313, iteration: 63820
loss: 0.9932084679603577,grad_norm: 0.9999992339786513, iteration: 63821
loss: 0.9957027435302734,grad_norm: 0.7937543421316037, iteration: 63822
loss: 0.9892725348472595,grad_norm: 0.8421823544165963, iteration: 63823
loss: 1.0046170949935913,grad_norm: 0.9745876277901493, iteration: 63824
loss: 1.0085930824279785,grad_norm: 0.8103924970976972, iteration: 63825
loss: 1.0283762216567993,grad_norm: 0.9999993389142148, iteration: 63826
loss: 0.9763197898864746,grad_norm: 0.8760210190939345, iteration: 63827
loss: 1.0035673379898071,grad_norm: 0.9350648226456758, iteration: 63828
loss: 1.0117566585540771,grad_norm: 0.8792117348336975, iteration: 63829
loss: 1.01284921169281,grad_norm: 0.9999993105677409, iteration: 63830
loss: 1.0681607723236084,grad_norm: 0.9999997348625048, iteration: 63831
loss: 0.9864743947982788,grad_norm: 0.874871977707685, iteration: 63832
loss: 1.0064634084701538,grad_norm: 0.9553828790706516, iteration: 63833
loss: 1.0189545154571533,grad_norm: 0.9999990317848407, iteration: 63834
loss: 1.008178949356079,grad_norm: 0.9577990821201373, iteration: 63835
loss: 0.9780181646347046,grad_norm: 0.9999991829367401, iteration: 63836
loss: 0.958820641040802,grad_norm: 0.9999991685500128, iteration: 63837
loss: 0.9753414988517761,grad_norm: 0.9999991761520877, iteration: 63838
loss: 1.0168086290359497,grad_norm: 0.9585731194766214, iteration: 63839
loss: 1.0573519468307495,grad_norm: 0.9999994031750996, iteration: 63840
loss: 1.0133801698684692,grad_norm: 0.962466594628545, iteration: 63841
loss: 1.0142275094985962,grad_norm: 0.909971483759165, iteration: 63842
loss: 1.0102945566177368,grad_norm: 0.9999991917329683, iteration: 63843
loss: 1.0945605039596558,grad_norm: 0.999999711767335, iteration: 63844
loss: 0.9996445775032043,grad_norm: 0.9541520104265694, iteration: 63845
loss: 0.9838802814483643,grad_norm: 0.9800471377065009, iteration: 63846
loss: 1.0588276386260986,grad_norm: 0.9999995444081623, iteration: 63847
loss: 1.0299654006958008,grad_norm: 0.9218687336877345, iteration: 63848
loss: 1.022108554840088,grad_norm: 0.9564240236628239, iteration: 63849
loss: 0.9965860843658447,grad_norm: 0.957675763092685, iteration: 63850
loss: 0.974987804889679,grad_norm: 0.9037192661063471, iteration: 63851
loss: 1.008509635925293,grad_norm: 0.8785536455040754, iteration: 63852
loss: 1.0155086517333984,grad_norm: 0.905606839134912, iteration: 63853
loss: 1.0016716718673706,grad_norm: 0.99999907185308, iteration: 63854
loss: 0.9890949726104736,grad_norm: 0.8324781599421098, iteration: 63855
loss: 0.9988580942153931,grad_norm: 0.770328916144215, iteration: 63856
loss: 1.0076020956039429,grad_norm: 0.9999995791686311, iteration: 63857
loss: 1.1222355365753174,grad_norm: 0.9999995979080631, iteration: 63858
loss: 0.9988908767700195,grad_norm: 0.9999991663360182, iteration: 63859
loss: 0.9819117188453674,grad_norm: 0.8001277648529076, iteration: 63860
loss: 1.0084398984909058,grad_norm: 0.8891805377237428, iteration: 63861
loss: 1.0667052268981934,grad_norm: 0.999999183133631, iteration: 63862
loss: 1.0041098594665527,grad_norm: 0.9377474771966237, iteration: 63863
loss: 0.9685942530632019,grad_norm: 0.9926765791517865, iteration: 63864
loss: 1.0335966348648071,grad_norm: 0.9999990849548963, iteration: 63865
loss: 0.9876854419708252,grad_norm: 0.9321490818929558, iteration: 63866
loss: 1.0137654542922974,grad_norm: 0.919657724587382, iteration: 63867
loss: 1.0296601057052612,grad_norm: 0.8997889591312339, iteration: 63868
loss: 1.0043131113052368,grad_norm: 0.9346386285503363, iteration: 63869
loss: 0.9852173328399658,grad_norm: 0.8786880789185378, iteration: 63870
loss: 1.080152153968811,grad_norm: 0.9999991410764483, iteration: 63871
loss: 1.0449548959732056,grad_norm: 0.9999991094801651, iteration: 63872
loss: 0.9740279912948608,grad_norm: 0.9999992202417791, iteration: 63873
loss: 0.961937665939331,grad_norm: 0.9713088057519227, iteration: 63874
loss: 0.9973480701446533,grad_norm: 0.9421865504067729, iteration: 63875
loss: 0.9990645051002502,grad_norm: 0.8922111065010055, iteration: 63876
loss: 0.981485903263092,grad_norm: 0.9999991601037947, iteration: 63877
loss: 1.009127140045166,grad_norm: 0.9999992926847978, iteration: 63878
loss: 1.0408650636672974,grad_norm: 0.9999997922122107, iteration: 63879
loss: 1.0417943000793457,grad_norm: 0.7808449292634483, iteration: 63880
loss: 0.9946947693824768,grad_norm: 0.9999991109513439, iteration: 63881
loss: 1.027463436126709,grad_norm: 0.9885568757783765, iteration: 63882
loss: 1.0508641004562378,grad_norm: 0.999999618810015, iteration: 63883
loss: 1.0227855443954468,grad_norm: 0.8622936140876207, iteration: 63884
loss: 0.9942048788070679,grad_norm: 0.9143398698770479, iteration: 63885
loss: 1.017215371131897,grad_norm: 0.9890574253612202, iteration: 63886
loss: 0.9969341158866882,grad_norm: 0.9999991092125303, iteration: 63887
loss: 1.022739052772522,grad_norm: 0.999999022796683, iteration: 63888
loss: 0.9991594552993774,grad_norm: 0.9999991562164608, iteration: 63889
loss: 1.0208028554916382,grad_norm: 0.9999992510419529, iteration: 63890
loss: 1.0062992572784424,grad_norm: 0.9999990199859743, iteration: 63891
loss: 1.0201464891433716,grad_norm: 0.9564172084758634, iteration: 63892
loss: 0.9850335121154785,grad_norm: 0.9145701951825991, iteration: 63893
loss: 1.1603127717971802,grad_norm: 0.999999819678982, iteration: 63894
loss: 0.993024468421936,grad_norm: 0.864382038942513, iteration: 63895
loss: 0.9783440232276917,grad_norm: 0.8485166501240995, iteration: 63896
loss: 1.0735764503479004,grad_norm: 0.9999993385624887, iteration: 63897
loss: 0.996738076210022,grad_norm: 0.9999994735981886, iteration: 63898
loss: 0.9931671023368835,grad_norm: 0.7915218374969539, iteration: 63899
loss: 1.0106703042984009,grad_norm: 0.8987231365599393, iteration: 63900
loss: 1.0088996887207031,grad_norm: 0.9999992457408989, iteration: 63901
loss: 1.0194114446640015,grad_norm: 0.8366211892677728, iteration: 63902
loss: 0.9959562420845032,grad_norm: 0.9999990463004268, iteration: 63903
loss: 1.0016294717788696,grad_norm: 0.9677502937940499, iteration: 63904
loss: 0.9792171120643616,grad_norm: 0.820445361358386, iteration: 63905
loss: 0.9925826787948608,grad_norm: 0.9556029163160965, iteration: 63906
loss: 1.0115247964859009,grad_norm: 0.9999992276926858, iteration: 63907
loss: 1.002131462097168,grad_norm: 0.8707958172274954, iteration: 63908
loss: 1.0103623867034912,grad_norm: 0.8015340383956808, iteration: 63909
loss: 1.004818081855774,grad_norm: 0.9718049322842677, iteration: 63910
loss: 0.9885198473930359,grad_norm: 0.9879604914659363, iteration: 63911
loss: 1.0211174488067627,grad_norm: 0.8186767425596869, iteration: 63912
loss: 1.005131483078003,grad_norm: 0.9288532840163872, iteration: 63913
loss: 1.0148191452026367,grad_norm: 0.9836647711288328, iteration: 63914
loss: 1.0338168144226074,grad_norm: 0.9725326956506539, iteration: 63915
loss: 1.0423099994659424,grad_norm: 0.9999992760403424, iteration: 63916
loss: 1.0382381677627563,grad_norm: 0.98089523214617, iteration: 63917
loss: 0.9845451712608337,grad_norm: 0.9999991520514371, iteration: 63918
loss: 0.9960697889328003,grad_norm: 0.9502871912812013, iteration: 63919
loss: 0.9962508082389832,grad_norm: 0.9069786846105405, iteration: 63920
loss: 1.0294954776763916,grad_norm: 0.9999993188974932, iteration: 63921
loss: 0.9839569330215454,grad_norm: 0.9999991752178169, iteration: 63922
loss: 0.9979490637779236,grad_norm: 0.8152702196380973, iteration: 63923
loss: 1.0404986143112183,grad_norm: 0.9048711031991722, iteration: 63924
loss: 1.0115307569503784,grad_norm: 0.999999440905344, iteration: 63925
loss: 0.9785516858100891,grad_norm: 0.8482891235054612, iteration: 63926
loss: 0.9745638370513916,grad_norm: 0.9999990874170386, iteration: 63927
loss: 0.9750204682350159,grad_norm: 0.8489378720860539, iteration: 63928
loss: 1.033992886543274,grad_norm: 0.9999991100767236, iteration: 63929
loss: 1.016333818435669,grad_norm: 0.9999990604407262, iteration: 63930
loss: 1.007746696472168,grad_norm: 0.9247789535346906, iteration: 63931
loss: 0.9941492080688477,grad_norm: 0.9264295483566326, iteration: 63932
loss: 1.0051813125610352,grad_norm: 0.9059859885371395, iteration: 63933
loss: 0.98603355884552,grad_norm: 0.8440680899815388, iteration: 63934
loss: 0.9859068989753723,grad_norm: 0.9559963620500544, iteration: 63935
loss: 1.0139691829681396,grad_norm: 0.9999990321296156, iteration: 63936
loss: 1.0186270475387573,grad_norm: 0.8443756266217981, iteration: 63937
loss: 1.0386370420455933,grad_norm: 0.9999990767482115, iteration: 63938
loss: 1.0051556825637817,grad_norm: 0.8843820926921223, iteration: 63939
loss: 1.0034006834030151,grad_norm: 0.9999990115058686, iteration: 63940
loss: 1.0213314294815063,grad_norm: 0.8721075708363144, iteration: 63941
loss: 1.0117073059082031,grad_norm: 0.9999990323984214, iteration: 63942
loss: 1.0097638368606567,grad_norm: 0.8347446932638931, iteration: 63943
loss: 1.017723560333252,grad_norm: 0.84201310695417, iteration: 63944
loss: 0.9610918760299683,grad_norm: 0.9999990808836131, iteration: 63945
loss: 1.015725016593933,grad_norm: 0.9582374607558056, iteration: 63946
loss: 1.0208436250686646,grad_norm: 0.79944455259203, iteration: 63947
loss: 1.0026966333389282,grad_norm: 0.893200845012828, iteration: 63948
loss: 1.0177624225616455,grad_norm: 0.9999988833825693, iteration: 63949
loss: 1.012174129486084,grad_norm: 0.9999991460958064, iteration: 63950
loss: 1.021251916885376,grad_norm: 0.9480916393238952, iteration: 63951
loss: 0.9890549182891846,grad_norm: 0.8416970085074514, iteration: 63952
loss: 0.9907746315002441,grad_norm: 0.9425575286834902, iteration: 63953
loss: 1.0190870761871338,grad_norm: 0.8999741351753571, iteration: 63954
loss: 0.9805433750152588,grad_norm: 0.919377493421791, iteration: 63955
loss: 0.9557495713233948,grad_norm: 0.9999992443175756, iteration: 63956
loss: 1.0448195934295654,grad_norm: 0.9160629721694553, iteration: 63957
loss: 1.0005872249603271,grad_norm: 0.9608183710834358, iteration: 63958
loss: 0.9876795411109924,grad_norm: 0.9212094278994634, iteration: 63959
loss: 1.0266990661621094,grad_norm: 0.9999996910964408, iteration: 63960
loss: 1.0042179822921753,grad_norm: 0.9058351548649376, iteration: 63961
loss: 1.0121995210647583,grad_norm: 0.8300865470639502, iteration: 63962
loss: 1.0250043869018555,grad_norm: 0.9999991806759422, iteration: 63963
loss: 1.0040760040283203,grad_norm: 0.9999991140609654, iteration: 63964
loss: 1.030008316040039,grad_norm: 0.9999990576099387, iteration: 63965
loss: 1.0547349452972412,grad_norm: 0.8900038468900433, iteration: 63966
loss: 1.0149500370025635,grad_norm: 0.8332550873998396, iteration: 63967
loss: 1.0088938474655151,grad_norm: 0.7788375708944412, iteration: 63968
loss: 1.020266056060791,grad_norm: 0.9999996774112843, iteration: 63969
loss: 1.023075819015503,grad_norm: 0.8160582902360566, iteration: 63970
loss: 1.0109938383102417,grad_norm: 0.9872633733694043, iteration: 63971
loss: 1.031750202178955,grad_norm: 0.9589814997493253, iteration: 63972
loss: 1.0077458620071411,grad_norm: 0.9319522124176317, iteration: 63973
loss: 1.052161455154419,grad_norm: 0.8638313552335987, iteration: 63974
loss: 1.0341978073120117,grad_norm: 0.9999995457060397, iteration: 63975
loss: 0.9802561402320862,grad_norm: 0.7476194196453269, iteration: 63976
loss: 0.9893126487731934,grad_norm: 0.9999992371784606, iteration: 63977
loss: 0.971703290939331,grad_norm: 0.9227200558281738, iteration: 63978
loss: 0.9866507649421692,grad_norm: 0.918329272002077, iteration: 63979
loss: 0.9902865290641785,grad_norm: 0.9999991463428467, iteration: 63980
loss: 1.0061663389205933,grad_norm: 0.7561369613487963, iteration: 63981
loss: 1.0439473390579224,grad_norm: 0.9999993862799165, iteration: 63982
loss: 0.9947360754013062,grad_norm: 0.9999994692834101, iteration: 63983
loss: 1.0010942220687866,grad_norm: 0.8578607655888776, iteration: 63984
loss: 0.9608854651451111,grad_norm: 0.87814677418728, iteration: 63985
loss: 1.0357091426849365,grad_norm: 0.9999995811132235, iteration: 63986
loss: 1.1091796159744263,grad_norm: 0.9999997019147504, iteration: 63987
loss: 1.002955436706543,grad_norm: 0.9074929904174284, iteration: 63988
loss: 1.0181576013565063,grad_norm: 0.9999991617653977, iteration: 63989
loss: 1.0021822452545166,grad_norm: 0.9899976266233808, iteration: 63990
loss: 1.0098336935043335,grad_norm: 0.999999040512954, iteration: 63991
loss: 1.0693048238754272,grad_norm: 0.9999991360322346, iteration: 63992
loss: 1.0046271085739136,grad_norm: 0.9416664192948906, iteration: 63993
loss: 1.0264443159103394,grad_norm: 0.9718046119824085, iteration: 63994
loss: 0.9730474948883057,grad_norm: 0.8212813417774338, iteration: 63995
loss: 0.994712233543396,grad_norm: 0.9999995653994843, iteration: 63996
loss: 1.0235280990600586,grad_norm: 0.9860675130859378, iteration: 63997
loss: 1.0124064683914185,grad_norm: 0.9999992172300499, iteration: 63998
loss: 0.9836617112159729,grad_norm: 0.9278826451353198, iteration: 63999
loss: 1.015658974647522,grad_norm: 0.8673615517623607, iteration: 64000
loss: 1.022416353225708,grad_norm: 0.9999991912054199, iteration: 64001
loss: 0.989479124546051,grad_norm: 0.9999991886026839, iteration: 64002
loss: 1.0236907005310059,grad_norm: 0.984147318206422, iteration: 64003
loss: 1.0204041004180908,grad_norm: 0.975851889077907, iteration: 64004
loss: 1.0002402067184448,grad_norm: 0.8542833362816196, iteration: 64005
loss: 1.0310776233673096,grad_norm: 0.9105090795115522, iteration: 64006
loss: 1.0214537382125854,grad_norm: 0.9999992081919113, iteration: 64007
loss: 0.9831981658935547,grad_norm: 0.9999991615953854, iteration: 64008
loss: 1.0078521966934204,grad_norm: 0.999999176800303, iteration: 64009
loss: 0.9985631704330444,grad_norm: 0.8110274745133049, iteration: 64010
loss: 0.9920052886009216,grad_norm: 0.9973492505885925, iteration: 64011
loss: 0.973228931427002,grad_norm: 0.9965285772026918, iteration: 64012
loss: 0.9644773602485657,grad_norm: 0.9705936387349602, iteration: 64013
loss: 0.9854132533073425,grad_norm: 0.7634839665557019, iteration: 64014
loss: 0.9860912561416626,grad_norm: 0.91198087682899, iteration: 64015
loss: 1.0607866048812866,grad_norm: 0.9999999594495942, iteration: 64016
loss: 1.0439887046813965,grad_norm: 0.9537495617444737, iteration: 64017
loss: 0.9957346320152283,grad_norm: 0.9999999598058735, iteration: 64018
loss: 1.0104650259017944,grad_norm: 0.9999990552845801, iteration: 64019
loss: 1.0165647268295288,grad_norm: 0.9999990903863486, iteration: 64020
loss: 1.0739582777023315,grad_norm: 0.9999991206497475, iteration: 64021
loss: 0.9993587732315063,grad_norm: 0.9738703524037118, iteration: 64022
loss: 1.0087636709213257,grad_norm: 0.8248374264899976, iteration: 64023
loss: 1.1063430309295654,grad_norm: 0.999999821251364, iteration: 64024
loss: 1.0120320320129395,grad_norm: 0.9999991360299353, iteration: 64025
loss: 1.0309842824935913,grad_norm: 0.9999991540606343, iteration: 64026
loss: 0.9633467793464661,grad_norm: 0.8599402675224216, iteration: 64027
loss: 0.9805186986923218,grad_norm: 0.9999991010182431, iteration: 64028
loss: 1.0107204914093018,grad_norm: 0.999999060109118, iteration: 64029
loss: 0.9616573452949524,grad_norm: 0.8537918152413819, iteration: 64030
loss: 1.034326195716858,grad_norm: 0.9674820002836648, iteration: 64031
loss: 0.9587811827659607,grad_norm: 0.9374680498587988, iteration: 64032
loss: 0.9877797365188599,grad_norm: 0.9007921395135123, iteration: 64033
loss: 1.0406957864761353,grad_norm: 0.9999992487425878, iteration: 64034
loss: 0.9850082397460938,grad_norm: 0.967284230497467, iteration: 64035
loss: 1.046156406402588,grad_norm: 0.9513551088070892, iteration: 64036
loss: 1.000125765800476,grad_norm: 0.9999991574732154, iteration: 64037
loss: 1.0071390867233276,grad_norm: 0.9880945369143069, iteration: 64038
loss: 0.9834675192832947,grad_norm: 0.9614135460646831, iteration: 64039
loss: 0.9974995851516724,grad_norm: 0.9575799672448121, iteration: 64040
loss: 1.0047491788864136,grad_norm: 0.8000101678951703, iteration: 64041
loss: 0.9974754452705383,grad_norm: 0.915667132513684, iteration: 64042
loss: 1.01256263256073,grad_norm: 0.9078057958260093, iteration: 64043
loss: 1.0169471502304077,grad_norm: 0.9537491108144668, iteration: 64044
loss: 0.9698115587234497,grad_norm: 0.8356626094505895, iteration: 64045
loss: 1.005325436592102,grad_norm: 0.8726021783730041, iteration: 64046
loss: 1.0510833263397217,grad_norm: 0.9999999747426204, iteration: 64047
loss: 1.0326547622680664,grad_norm: 0.9999992131181835, iteration: 64048
loss: 1.0039786100387573,grad_norm: 0.999999057481976, iteration: 64049
loss: 1.028709888458252,grad_norm: 0.8307086713181802, iteration: 64050
loss: 0.9362791180610657,grad_norm: 0.9999991006255856, iteration: 64051
loss: 0.9589386582374573,grad_norm: 0.9925048350046728, iteration: 64052
loss: 1.0325345993041992,grad_norm: 0.8723645702897096, iteration: 64053
loss: 0.9960049986839294,grad_norm: 0.9332980596732515, iteration: 64054
loss: 1.0087124109268188,grad_norm: 0.999998940380677, iteration: 64055
loss: 0.9843778610229492,grad_norm: 0.7745901975745783, iteration: 64056
loss: 1.0133023262023926,grad_norm: 0.9046794301587956, iteration: 64057
loss: 1.0311906337738037,grad_norm: 0.9999994833792157, iteration: 64058
loss: 0.9970138669013977,grad_norm: 0.9999991566152786, iteration: 64059
loss: 0.9896011352539062,grad_norm: 0.9999989521106964, iteration: 64060
loss: 1.0355229377746582,grad_norm: 0.999999166197722, iteration: 64061
loss: 0.9843353033065796,grad_norm: 0.8359998751729298, iteration: 64062
loss: 1.024914026260376,grad_norm: 0.9999999934628925, iteration: 64063
loss: 1.0242723226547241,grad_norm: 0.9193618790629465, iteration: 64064
loss: 0.9997538924217224,grad_norm: 0.7446918304141803, iteration: 64065
loss: 0.9963763952255249,grad_norm: 0.9196004024523408, iteration: 64066
loss: 1.0008817911148071,grad_norm: 0.858440040190993, iteration: 64067
loss: 1.0487557649612427,grad_norm: 0.9837516070919063, iteration: 64068
loss: 0.9844654202461243,grad_norm: 0.8375607746164536, iteration: 64069
loss: 1.015394687652588,grad_norm: 0.9999996431668879, iteration: 64070
loss: 0.9767438173294067,grad_norm: 0.9538753792165909, iteration: 64071
loss: 1.0252621173858643,grad_norm: 0.9999990657237696, iteration: 64072
loss: 1.0470154285430908,grad_norm: 0.9999990585606148, iteration: 64073
loss: 0.9732503890991211,grad_norm: 0.83950682350113, iteration: 64074
loss: 0.9726377725601196,grad_norm: 0.9433636040179063, iteration: 64075
loss: 0.9495255947113037,grad_norm: 0.8996539803920715, iteration: 64076
loss: 1.0342392921447754,grad_norm: 0.9999993157623912, iteration: 64077
loss: 1.0313788652420044,grad_norm: 0.8829165159445803, iteration: 64078
loss: 0.9893063306808472,grad_norm: 0.9999994781201247, iteration: 64079
loss: 1.035041332244873,grad_norm: 0.9999992401565152, iteration: 64080
loss: 1.0300655364990234,grad_norm: 0.9999998018667491, iteration: 64081
loss: 0.9881377816200256,grad_norm: 0.9999990675044972, iteration: 64082
loss: 1.0472105741500854,grad_norm: 0.9999999066031428, iteration: 64083
loss: 0.9939878582954407,grad_norm: 0.9999992380571848, iteration: 64084
loss: 1.0200849771499634,grad_norm: 0.9620868212099652, iteration: 64085
loss: 1.0026721954345703,grad_norm: 0.8824710954859691, iteration: 64086
loss: 1.0347059965133667,grad_norm: 0.9999995862957205, iteration: 64087
loss: 1.0133962631225586,grad_norm: 0.9999993788346495, iteration: 64088
loss: 0.98520427942276,grad_norm: 0.8621921660945723, iteration: 64089
loss: 1.0388176441192627,grad_norm: 0.7980231299019829, iteration: 64090
loss: 1.023943305015564,grad_norm: 0.9999999099331827, iteration: 64091
loss: 0.9824830293655396,grad_norm: 0.835027304667736, iteration: 64092
loss: 0.986475944519043,grad_norm: 0.8479028857580891, iteration: 64093
loss: 1.007925033569336,grad_norm: 0.9999990802942781, iteration: 64094
loss: 1.053223729133606,grad_norm: 0.9695908682290215, iteration: 64095
loss: 1.0271084308624268,grad_norm: 0.9999989808503396, iteration: 64096
loss: 0.9752222895622253,grad_norm: 0.8477532846195138, iteration: 64097
loss: 0.9792422652244568,grad_norm: 0.7798746267237727, iteration: 64098
loss: 1.0025936365127563,grad_norm: 0.9558651234285651, iteration: 64099
loss: 0.9938979744911194,grad_norm: 0.7486851267780303, iteration: 64100
loss: 1.0144598484039307,grad_norm: 0.9999992952758862, iteration: 64101
loss: 1.0341312885284424,grad_norm: 0.9999989718958381, iteration: 64102
loss: 1.030904769897461,grad_norm: 0.9178166383990898, iteration: 64103
loss: 0.9925709366798401,grad_norm: 0.9999993981175894, iteration: 64104
loss: 1.0166524648666382,grad_norm: 0.9999998566896457, iteration: 64105
loss: 1.0227144956588745,grad_norm: 0.8879349587962763, iteration: 64106
loss: 0.9766814708709717,grad_norm: 0.9432476965836005, iteration: 64107
loss: 1.016784429550171,grad_norm: 0.9717608053679688, iteration: 64108
loss: 1.0299842357635498,grad_norm: 0.9999993871989131, iteration: 64109
loss: 1.0021367073059082,grad_norm: 0.9999990074722405, iteration: 64110
loss: 0.9849812984466553,grad_norm: 0.870622024675372, iteration: 64111
loss: 1.0197545289993286,grad_norm: 0.9898999324727112, iteration: 64112
loss: 1.022841453552246,grad_norm: 0.9999991024720741, iteration: 64113
loss: 1.037469744682312,grad_norm: 0.9999994294254806, iteration: 64114
loss: 1.0005476474761963,grad_norm: 0.8631548590047615, iteration: 64115
loss: 1.0251655578613281,grad_norm: 0.9465336758797567, iteration: 64116
loss: 1.0233618021011353,grad_norm: 0.9806846979465313, iteration: 64117
loss: 1.0404181480407715,grad_norm: 0.9999994122564823, iteration: 64118
loss: 1.0049399137496948,grad_norm: 0.7688353019830104, iteration: 64119
loss: 1.007843017578125,grad_norm: 0.9999992714250765, iteration: 64120
loss: 0.9441832900047302,grad_norm: 0.9999992254668297, iteration: 64121
loss: 0.9918398857116699,grad_norm: 0.9601267049093266, iteration: 64122
loss: 0.9940721392631531,grad_norm: 0.9238860597249534, iteration: 64123
loss: 1.0374358892440796,grad_norm: 0.9999991185769255, iteration: 64124
loss: 0.9921546578407288,grad_norm: 0.952318633098121, iteration: 64125
loss: 0.9995983839035034,grad_norm: 0.8666265952716599, iteration: 64126
loss: 0.9970967173576355,grad_norm: 0.9152251887654389, iteration: 64127
loss: 1.0063432455062866,grad_norm: 0.801222278696176, iteration: 64128
loss: 1.0170872211456299,grad_norm: 0.9999992015669145, iteration: 64129
loss: 1.032831072807312,grad_norm: 0.9999990790181932, iteration: 64130
loss: 0.9786194562911987,grad_norm: 0.8918234359257992, iteration: 64131
loss: 0.9704987406730652,grad_norm: 0.9094400462210506, iteration: 64132
loss: 0.9920364618301392,grad_norm: 0.8295361967029334, iteration: 64133
loss: 0.9854596853256226,grad_norm: 0.8839652046830224, iteration: 64134
loss: 0.9821635484695435,grad_norm: 0.9999991026098733, iteration: 64135
loss: 1.0369877815246582,grad_norm: 0.948708943249979, iteration: 64136
loss: 1.0039116144180298,grad_norm: 0.9999994492118093, iteration: 64137
loss: 0.995807945728302,grad_norm: 0.8305518847488564, iteration: 64138
loss: 1.0144689083099365,grad_norm: 0.9505809461404616, iteration: 64139
loss: 1.039528727531433,grad_norm: 0.8521890917842591, iteration: 64140
loss: 0.9871326088905334,grad_norm: 0.999999753159377, iteration: 64141
loss: 0.9989280104637146,grad_norm: 0.9098389091940569, iteration: 64142
loss: 1.0062024593353271,grad_norm: 0.9999991835588001, iteration: 64143
loss: 1.0226383209228516,grad_norm: 0.9105573926569006, iteration: 64144
loss: 0.9952726364135742,grad_norm: 0.9999992553730291, iteration: 64145
loss: 1.0760388374328613,grad_norm: 0.9999991826734508, iteration: 64146
loss: 1.0104763507843018,grad_norm: 0.8475371584941517, iteration: 64147
loss: 0.9830580353736877,grad_norm: 0.9999994399444111, iteration: 64148
loss: 0.9814325571060181,grad_norm: 0.9999992464955817, iteration: 64149
loss: 1.1162898540496826,grad_norm: 0.9999995381053639, iteration: 64150
loss: 1.0030137300491333,grad_norm: 0.8691245150989889, iteration: 64151
loss: 1.0630815029144287,grad_norm: 0.9999994895928594, iteration: 64152
loss: 1.0156002044677734,grad_norm: 0.8159708153690847, iteration: 64153
loss: 0.9937736988067627,grad_norm: 0.8613688162108416, iteration: 64154
loss: 1.0068191289901733,grad_norm: 0.803783766937403, iteration: 64155
loss: 0.9693816900253296,grad_norm: 0.9999993227975075, iteration: 64156
loss: 0.9959936738014221,grad_norm: 0.9192873241303117, iteration: 64157
loss: 0.9718453288078308,grad_norm: 0.935520321880119, iteration: 64158
loss: 1.0115281343460083,grad_norm: 0.9421048587483567, iteration: 64159
loss: 1.0060292482376099,grad_norm: 0.9600073016184362, iteration: 64160
loss: 1.0231672525405884,grad_norm: 0.9999990444497506, iteration: 64161
loss: 1.006239414215088,grad_norm: 0.9999992402945368, iteration: 64162
loss: 1.025889277458191,grad_norm: 0.9999998915255397, iteration: 64163
loss: 0.9932779669761658,grad_norm: 0.8262004402786987, iteration: 64164
loss: 0.9936791062355042,grad_norm: 0.8283897526370474, iteration: 64165
loss: 0.9674616456031799,grad_norm: 0.8730132319770518, iteration: 64166
loss: 1.020959734916687,grad_norm: 0.9534089544636205, iteration: 64167
loss: 1.0030319690704346,grad_norm: 0.9171227479049092, iteration: 64168
loss: 0.989782452583313,grad_norm: 0.8718212844419011, iteration: 64169
loss: 1.0331926345825195,grad_norm: 0.9999990784272144, iteration: 64170
loss: 1.0382511615753174,grad_norm: 0.9999994996242512, iteration: 64171
loss: 1.0160832405090332,grad_norm: 0.8819519219366345, iteration: 64172
loss: 1.0095552206039429,grad_norm: 0.8015576710158222, iteration: 64173
loss: 0.9442840218544006,grad_norm: 0.9793384255274711, iteration: 64174
loss: 0.9664172530174255,grad_norm: 0.9999991968607153, iteration: 64175
loss: 1.0846223831176758,grad_norm: 0.9999992932366248, iteration: 64176
loss: 0.9760860800743103,grad_norm: 0.9796708405537643, iteration: 64177
loss: 1.014370083808899,grad_norm: 0.9999992752802509, iteration: 64178
loss: 1.0246027708053589,grad_norm: 0.9229336299961645, iteration: 64179
loss: 1.0152212381362915,grad_norm: 0.9999991515278083, iteration: 64180
loss: 1.0272506475448608,grad_norm: 0.7865534190187897, iteration: 64181
loss: 1.000412940979004,grad_norm: 0.9747123651466097, iteration: 64182
loss: 0.9984365701675415,grad_norm: 0.8101105442649287, iteration: 64183
loss: 1.0241758823394775,grad_norm: 0.8557717578237667, iteration: 64184
loss: 1.0909864902496338,grad_norm: 0.9999998102162994, iteration: 64185
loss: 0.9453479647636414,grad_norm: 0.999999037178999, iteration: 64186
loss: 0.9831596612930298,grad_norm: 0.8296321128315616, iteration: 64187
loss: 0.9897408485412598,grad_norm: 0.934273441633403, iteration: 64188
loss: 0.997239351272583,grad_norm: 0.8265009980657767, iteration: 64189
loss: 1.0543838739395142,grad_norm: 0.9999994756824265, iteration: 64190
loss: 1.002431869506836,grad_norm: 0.9999997462697402, iteration: 64191
loss: 1.0044499635696411,grad_norm: 0.9999998481881459, iteration: 64192
loss: 1.01493239402771,grad_norm: 0.9632974658425468, iteration: 64193
loss: 1.0251350402832031,grad_norm: 0.9999993232893306, iteration: 64194
loss: 0.9559522867202759,grad_norm: 0.9053291185087625, iteration: 64195
loss: 0.9966362118721008,grad_norm: 0.9999991585971665, iteration: 64196
loss: 0.9920971393585205,grad_norm: 0.9329310319871128, iteration: 64197
loss: 0.9766896367073059,grad_norm: 0.9999992164232846, iteration: 64198
loss: 1.0049998760223389,grad_norm: 0.9999990837867991, iteration: 64199
loss: 1.0282037258148193,grad_norm: 0.9999993719221116, iteration: 64200
loss: 1.0364545583724976,grad_norm: 0.9636718885247455, iteration: 64201
loss: 1.012855052947998,grad_norm: 0.9999991470174966, iteration: 64202
loss: 1.0141938924789429,grad_norm: 0.9999991121580455, iteration: 64203
loss: 0.9380310773849487,grad_norm: 0.9575777958953514, iteration: 64204
loss: 1.05703866481781,grad_norm: 0.9999994215787418, iteration: 64205
loss: 0.9945752620697021,grad_norm: 0.9635300058209417, iteration: 64206
loss: 0.998495876789093,grad_norm: 0.9999993111385174, iteration: 64207
loss: 1.0708560943603516,grad_norm: 0.9999998856959049, iteration: 64208
loss: 1.0042282342910767,grad_norm: 0.9999996134075909, iteration: 64209
loss: 1.0001068115234375,grad_norm: 0.9050579416496244, iteration: 64210
loss: 0.9966593384742737,grad_norm: 0.841225254536335, iteration: 64211
loss: 1.0957428216934204,grad_norm: 0.8380996075004113, iteration: 64212
loss: 0.9800511002540588,grad_norm: 0.8754341861537492, iteration: 64213
loss: 1.036700963973999,grad_norm: 0.9892513795148203, iteration: 64214
loss: 0.9971073269844055,grad_norm: 0.9573005298586696, iteration: 64215
loss: 1.0046794414520264,grad_norm: 0.9999990114765481, iteration: 64216
loss: 1.0338419675827026,grad_norm: 0.9999991849968537, iteration: 64217
loss: 0.9840633273124695,grad_norm: 0.9218607012704775, iteration: 64218
loss: 0.9843124151229858,grad_norm: 0.999999341014342, iteration: 64219
loss: 0.9730864763259888,grad_norm: 0.9999991664724411, iteration: 64220
loss: 1.0657813549041748,grad_norm: 0.7992021311843085, iteration: 64221
loss: 0.9673599004745483,grad_norm: 0.7742271455200785, iteration: 64222
loss: 0.9908137917518616,grad_norm: 0.8497458842639414, iteration: 64223
loss: 0.9783063530921936,grad_norm: 0.7742277122840395, iteration: 64224
loss: 1.0096927881240845,grad_norm: 0.8769444021420534, iteration: 64225
loss: 1.025308609008789,grad_norm: 0.9999992122399222, iteration: 64226
loss: 1.0095149278640747,grad_norm: 0.9999990980269836, iteration: 64227
loss: 1.044966459274292,grad_norm: 0.9999994652636481, iteration: 64228
loss: 1.00934636592865,grad_norm: 0.8054936907614052, iteration: 64229
loss: 1.0453542470932007,grad_norm: 0.999999281845699, iteration: 64230
loss: 1.0117452144622803,grad_norm: 0.9099047490656158, iteration: 64231
loss: 0.9898748993873596,grad_norm: 0.9999993265857386, iteration: 64232
loss: 0.9852659702301025,grad_norm: 0.9999990584381232, iteration: 64233
loss: 0.9734969139099121,grad_norm: 0.9999990352060282, iteration: 64234
loss: 1.0165231227874756,grad_norm: 0.9999990024180976, iteration: 64235
loss: 1.0210930109024048,grad_norm: 0.9999992074635827, iteration: 64236
loss: 1.0214334726333618,grad_norm: 0.9999990371175418, iteration: 64237
loss: 0.9988548755645752,grad_norm: 0.9999989716489565, iteration: 64238
loss: 0.97355717420578,grad_norm: 0.7959416333742997, iteration: 64239
loss: 1.0077663660049438,grad_norm: 0.9999990205654594, iteration: 64240
loss: 1.0977309942245483,grad_norm: 0.9999994245806613, iteration: 64241
loss: 0.9906463027000427,grad_norm: 0.8444833440128474, iteration: 64242
loss: 1.0101652145385742,grad_norm: 0.9999989596776147, iteration: 64243
loss: 0.9998087882995605,grad_norm: 0.9999990673112585, iteration: 64244
loss: 0.9670706987380981,grad_norm: 0.9487090625673995, iteration: 64245
loss: 1.0346232652664185,grad_norm: 0.9999998173745238, iteration: 64246
loss: 0.9757801294326782,grad_norm: 0.999999268206191, iteration: 64247
loss: 0.9931173324584961,grad_norm: 0.9999995610764391, iteration: 64248
loss: 1.0520750284194946,grad_norm: 0.9999995404029224, iteration: 64249
loss: 1.0328340530395508,grad_norm: 0.999999744985851, iteration: 64250
loss: 0.9943559765815735,grad_norm: 0.7839853980418079, iteration: 64251
loss: 1.0046573877334595,grad_norm: 0.9999991485828006, iteration: 64252
loss: 1.0155469179153442,grad_norm: 0.9999990879245334, iteration: 64253
loss: 1.003700613975525,grad_norm: 0.9999990712819654, iteration: 64254
loss: 0.9984527826309204,grad_norm: 0.8517378162441063, iteration: 64255
loss: 1.0017030239105225,grad_norm: 0.9999991306450816, iteration: 64256
loss: 1.005435585975647,grad_norm: 0.8902126995040918, iteration: 64257
loss: 1.0649529695510864,grad_norm: 0.9999993495616901, iteration: 64258
loss: 1.029434084892273,grad_norm: 0.9999995207810921, iteration: 64259
loss: 1.0497193336486816,grad_norm: 0.9999997834580648, iteration: 64260
loss: 1.0247225761413574,grad_norm: 0.9999993848662818, iteration: 64261
loss: 0.9928293228149414,grad_norm: 0.9999989222332072, iteration: 64262
loss: 0.963247537612915,grad_norm: 0.9999996637475039, iteration: 64263
loss: 1.0128581523895264,grad_norm: 0.9252869442837433, iteration: 64264
loss: 1.0119487047195435,grad_norm: 0.8488413327189088, iteration: 64265
loss: 1.0026215314865112,grad_norm: 0.8025726625590208, iteration: 64266
loss: 1.0804272890090942,grad_norm: 0.9999996691726355, iteration: 64267
loss: 1.0505836009979248,grad_norm: 0.9999990766619892, iteration: 64268
loss: 1.0392740964889526,grad_norm: 0.9582470758085744, iteration: 64269
loss: 0.9868693351745605,grad_norm: 0.8423100028887198, iteration: 64270
loss: 0.9685513377189636,grad_norm: 0.9999991065132225, iteration: 64271
loss: 1.0183963775634766,grad_norm: 0.8441316059538485, iteration: 64272
loss: 0.9973748922348022,grad_norm: 0.9999991807609993, iteration: 64273
loss: 1.0001968145370483,grad_norm: 0.9999992548116994, iteration: 64274
loss: 1.0460842847824097,grad_norm: 0.9036879765184646, iteration: 64275
loss: 1.0082234144210815,grad_norm: 0.9999990174121257, iteration: 64276
loss: 1.0213267803192139,grad_norm: 0.9999995370091008, iteration: 64277
loss: 1.0076258182525635,grad_norm: 0.9638843882094551, iteration: 64278
loss: 1.0352306365966797,grad_norm: 0.9804110274087979, iteration: 64279
loss: 1.0087107419967651,grad_norm: 0.9629282430905647, iteration: 64280
loss: 1.0158523321151733,grad_norm: 0.8153593617662672, iteration: 64281
loss: 1.0169798135757446,grad_norm: 0.9893234638615402, iteration: 64282
loss: 0.9784300923347473,grad_norm: 0.933809704047234, iteration: 64283
loss: 1.0187678337097168,grad_norm: 0.8667807279789451, iteration: 64284
loss: 1.0353370904922485,grad_norm: 0.9999991754435686, iteration: 64285
loss: 1.0532134771347046,grad_norm: 0.9520301978644736, iteration: 64286
loss: 1.0299359560012817,grad_norm: 0.9999991498612982, iteration: 64287
loss: 1.0089850425720215,grad_norm: 0.8953473754919717, iteration: 64288
loss: 1.0194299221038818,grad_norm: 0.9999998349560996, iteration: 64289
loss: 0.9795751571655273,grad_norm: 0.9999991176938351, iteration: 64290
loss: 1.0258549451828003,grad_norm: 0.9661356452016683, iteration: 64291
loss: 1.010840892791748,grad_norm: 0.9999993694304947, iteration: 64292
loss: 1.0149054527282715,grad_norm: 0.9999992213579618, iteration: 64293
loss: 1.0445735454559326,grad_norm: 0.9205808669206591, iteration: 64294
loss: 1.0122411251068115,grad_norm: 0.7959882648007822, iteration: 64295
loss: 1.000628113746643,grad_norm: 0.9999991444930489, iteration: 64296
loss: 1.0206656455993652,grad_norm: 0.793985711958323, iteration: 64297
loss: 1.018517017364502,grad_norm: 0.9545068903165628, iteration: 64298
loss: 1.0201865434646606,grad_norm: 0.999999321844182, iteration: 64299
loss: 1.0026987791061401,grad_norm: 0.9987111686371346, iteration: 64300
loss: 0.9921727776527405,grad_norm: 0.9999992601007693, iteration: 64301
loss: 0.9911080598831177,grad_norm: 0.9999991719918743, iteration: 64302
loss: 0.9855078458786011,grad_norm: 0.9999990946109208, iteration: 64303
loss: 1.017431616783142,grad_norm: 0.9999992861752937, iteration: 64304
loss: 1.0014503002166748,grad_norm: 0.799638726533155, iteration: 64305
loss: 1.022247076034546,grad_norm: 0.9999991386792085, iteration: 64306
loss: 1.0250986814498901,grad_norm: 0.9999991063211194, iteration: 64307
loss: 1.0144400596618652,grad_norm: 0.9466831686881455, iteration: 64308
loss: 1.0583033561706543,grad_norm: 0.9999997913661076, iteration: 64309
loss: 0.9827098846435547,grad_norm: 0.9663098850763165, iteration: 64310
loss: 0.9978301525115967,grad_norm: 0.8407919133746535, iteration: 64311
loss: 1.015720248222351,grad_norm: 0.9999989927109386, iteration: 64312
loss: 1.0162959098815918,grad_norm: 0.9011330683695936, iteration: 64313
loss: 0.9980472326278687,grad_norm: 0.7818164950534136, iteration: 64314
loss: 0.9824112057685852,grad_norm: 0.9834247314092733, iteration: 64315
loss: 1.0158517360687256,grad_norm: 0.9999994307330902, iteration: 64316
loss: 0.9996150732040405,grad_norm: 0.9999990745239862, iteration: 64317
loss: 1.0015616416931152,grad_norm: 0.9999992760204222, iteration: 64318
loss: 1.0364047288894653,grad_norm: 0.9755540852458924, iteration: 64319
loss: 0.9867381453514099,grad_norm: 0.9661634348230127, iteration: 64320
loss: 1.005997896194458,grad_norm: 0.9213571888667529, iteration: 64321
loss: 1.0077041387557983,grad_norm: 0.9999991101793142, iteration: 64322
loss: 1.013999581336975,grad_norm: 0.9206454282355576, iteration: 64323
loss: 1.0148451328277588,grad_norm: 0.9999991025920162, iteration: 64324
loss: 1.0616257190704346,grad_norm: 0.99999984879519, iteration: 64325
loss: 0.9686470627784729,grad_norm: 0.9887527668373672, iteration: 64326
loss: 1.0629324913024902,grad_norm: 0.9999990592554573, iteration: 64327
loss: 1.0787206888198853,grad_norm: 0.9999996969216822, iteration: 64328
loss: 1.0769277811050415,grad_norm: 0.9999999387394647, iteration: 64329
loss: 0.9986119866371155,grad_norm: 0.9999990573577876, iteration: 64330
loss: 0.9741454124450684,grad_norm: 0.9279101000759734, iteration: 64331
loss: 0.9962716102600098,grad_norm: 0.8995546931538586, iteration: 64332
loss: 1.0048377513885498,grad_norm: 0.999999906060345, iteration: 64333
loss: 1.0098977088928223,grad_norm: 0.9999990552623519, iteration: 64334
loss: 1.0296134948730469,grad_norm: 0.9999991537113656, iteration: 64335
loss: 1.025134801864624,grad_norm: 0.9999991168064551, iteration: 64336
loss: 0.9838151335716248,grad_norm: 0.830418703279319, iteration: 64337
loss: 0.9915261268615723,grad_norm: 0.9999998178361187, iteration: 64338
loss: 1.0197398662567139,grad_norm: 0.8765732499930725, iteration: 64339
loss: 1.0225435495376587,grad_norm: 0.833120383376063, iteration: 64340
loss: 1.100930094718933,grad_norm: 0.999999310669482, iteration: 64341
loss: 0.99944669008255,grad_norm: 0.9999991733630398, iteration: 64342
loss: 0.9997702240943909,grad_norm: 0.8453717421622443, iteration: 64343
loss: 1.0285028219223022,grad_norm: 0.907142735607696, iteration: 64344
loss: 1.032007098197937,grad_norm: 0.9999991548157483, iteration: 64345
loss: 1.0050179958343506,grad_norm: 0.9999997593810699, iteration: 64346
loss: 1.0249453783035278,grad_norm: 0.7925524196897369, iteration: 64347
loss: 0.9894517064094543,grad_norm: 0.9999991550749177, iteration: 64348
loss: 0.985161304473877,grad_norm: 0.9795375987852286, iteration: 64349
loss: 0.9780883193016052,grad_norm: 0.7812874596163484, iteration: 64350
loss: 1.0226861238479614,grad_norm: 0.9999991362235209, iteration: 64351
loss: 0.9665958285331726,grad_norm: 0.7518036363851789, iteration: 64352
loss: 1.0658378601074219,grad_norm: 0.9764232343063678, iteration: 64353
loss: 0.9909915924072266,grad_norm: 0.8392562830159557, iteration: 64354
loss: 0.9274843335151672,grad_norm: 0.9999989609278824, iteration: 64355
loss: 1.0356159210205078,grad_norm: 0.9999991244809765, iteration: 64356
loss: 0.9994826316833496,grad_norm: 0.8164148113976825, iteration: 64357
loss: 0.9920913577079773,grad_norm: 0.999999798962415, iteration: 64358
loss: 0.9984557628631592,grad_norm: 0.9859120577252609, iteration: 64359
loss: 0.9849467873573303,grad_norm: 0.9999993248558308, iteration: 64360
loss: 1.0069409608840942,grad_norm: 0.8148826704140633, iteration: 64361
loss: 1.001253366470337,grad_norm: 0.8431232754645256, iteration: 64362
loss: 0.9886896014213562,grad_norm: 0.9999990079498929, iteration: 64363
loss: 0.9933601021766663,grad_norm: 0.9622635652837273, iteration: 64364
loss: 1.0131142139434814,grad_norm: 0.9999997244859985, iteration: 64365
loss: 1.2183947563171387,grad_norm: 0.99999927609005, iteration: 64366
loss: 1.0072005987167358,grad_norm: 0.9999990636084876, iteration: 64367
loss: 1.014528751373291,grad_norm: 0.8828210512412147, iteration: 64368
loss: 1.005543828010559,grad_norm: 0.9999995144747493, iteration: 64369
loss: 0.9557803273200989,grad_norm: 0.9999991580594197, iteration: 64370
loss: 0.9885855317115784,grad_norm: 0.8285945822482106, iteration: 64371
loss: 0.9840248823165894,grad_norm: 0.9085225132993425, iteration: 64372
loss: 0.9967217445373535,grad_norm: 0.9365021550820435, iteration: 64373
loss: 0.9873546957969666,grad_norm: 0.8929400292541134, iteration: 64374
loss: 1.0091831684112549,grad_norm: 0.9999991909014784, iteration: 64375
loss: 0.9910886883735657,grad_norm: 0.8730889572029679, iteration: 64376
loss: 1.0603445768356323,grad_norm: 0.9999990414191694, iteration: 64377
loss: 0.989851176738739,grad_norm: 0.9680823008644076, iteration: 64378
loss: 1.0258935689926147,grad_norm: 0.9999992780883298, iteration: 64379
loss: 0.9876998662948608,grad_norm: 0.8386347419275826, iteration: 64380
loss: 1.0495797395706177,grad_norm: 0.9999993253266904, iteration: 64381
loss: 1.073861002922058,grad_norm: 0.9999998102175518, iteration: 64382
loss: 0.9950944781303406,grad_norm: 0.9417438978819883, iteration: 64383
loss: 0.9959748387336731,grad_norm: 0.9269394027366007, iteration: 64384
loss: 1.007869005203247,grad_norm: 0.99999927963357, iteration: 64385
loss: 1.0247976779937744,grad_norm: 0.9999991392283346, iteration: 64386
loss: 1.1177211999893188,grad_norm: 0.9999996934890465, iteration: 64387
loss: 1.107161521911621,grad_norm: 0.9999999054301055, iteration: 64388
loss: 1.0041542053222656,grad_norm: 0.9999994657747736, iteration: 64389
loss: 1.1060738563537598,grad_norm: 0.999999273496605, iteration: 64390
loss: 1.0157623291015625,grad_norm: 0.999999426950361, iteration: 64391
loss: 0.9806835055351257,grad_norm: 0.9305824477676009, iteration: 64392
loss: 1.0167628526687622,grad_norm: 0.8476494853752479, iteration: 64393
loss: 0.9639856815338135,grad_norm: 0.9418242725196168, iteration: 64394
loss: 1.0295827388763428,grad_norm: 0.999999020997543, iteration: 64395
loss: 1.0343738794326782,grad_norm: 0.9999991691267223, iteration: 64396
loss: 1.0126351118087769,grad_norm: 0.897202555058045, iteration: 64397
loss: 1.023268461227417,grad_norm: 0.9252832471799336, iteration: 64398
loss: 1.0345100164413452,grad_norm: 0.9999990945465074, iteration: 64399
loss: 0.9963731169700623,grad_norm: 0.976401864207148, iteration: 64400
loss: 0.9955515265464783,grad_norm: 0.9999990428165305, iteration: 64401
loss: 1.0017013549804688,grad_norm: 0.9552591530545593, iteration: 64402
loss: 0.9962563514709473,grad_norm: 0.9999992636034396, iteration: 64403
loss: 0.9651880860328674,grad_norm: 0.8774477850389527, iteration: 64404
loss: 1.0143485069274902,grad_norm: 0.9926074391823689, iteration: 64405
loss: 0.9922910332679749,grad_norm: 0.9999991183841537, iteration: 64406
loss: 1.0381447076797485,grad_norm: 0.8827704504969741, iteration: 64407
loss: 1.0105922222137451,grad_norm: 0.9999990882160934, iteration: 64408
loss: 0.9920234084129333,grad_norm: 0.7872528947350254, iteration: 64409
loss: 1.0137031078338623,grad_norm: 0.8647083365383686, iteration: 64410
loss: 1.0255337953567505,grad_norm: 0.9631700229977553, iteration: 64411
loss: 1.0017472505569458,grad_norm: 0.8569959092376529, iteration: 64412
loss: 0.9801179766654968,grad_norm: 0.9999991193830036, iteration: 64413
loss: 1.0273503065109253,grad_norm: 0.8900070845444534, iteration: 64414
loss: 1.0293793678283691,grad_norm: 0.787542366648979, iteration: 64415
loss: 1.0446044206619263,grad_norm: 0.9999991935190521, iteration: 64416
loss: 1.178152084350586,grad_norm: 0.9999996252521959, iteration: 64417
loss: 1.0250297784805298,grad_norm: 0.9999994417955618, iteration: 64418
loss: 1.0156387090682983,grad_norm: 0.999999033810897, iteration: 64419
loss: 1.0140503644943237,grad_norm: 0.7967006567326405, iteration: 64420
loss: 1.0085461139678955,grad_norm: 0.9999992042052244, iteration: 64421
loss: 1.039493441581726,grad_norm: 0.9999998241323234, iteration: 64422
loss: 0.9817240238189697,grad_norm: 0.7634380742818699, iteration: 64423
loss: 1.0222316980361938,grad_norm: 0.9999997946114382, iteration: 64424
loss: 0.9976264238357544,grad_norm: 0.9999991536694005, iteration: 64425
loss: 0.9812540411949158,grad_norm: 0.9999989428386641, iteration: 64426
loss: 0.9988707900047302,grad_norm: 0.8735846782850272, iteration: 64427
loss: 1.0186859369277954,grad_norm: 0.9999991140508917, iteration: 64428
loss: 1.0994961261749268,grad_norm: 0.9999992633066856, iteration: 64429
loss: 0.999360978603363,grad_norm: 0.9999992275000006, iteration: 64430
loss: 0.9935705661773682,grad_norm: 0.7825054512644357, iteration: 64431
loss: 0.9869672060012817,grad_norm: 0.8149869925009267, iteration: 64432
loss: 1.0148906707763672,grad_norm: 0.999999621854554, iteration: 64433
loss: 1.0028886795043945,grad_norm: 0.9676207838837858, iteration: 64434
loss: 1.0558003187179565,grad_norm: 0.9999995482353522, iteration: 64435
loss: 0.9875099658966064,grad_norm: 0.9257808979763793, iteration: 64436
loss: 0.9943839311599731,grad_norm: 0.9999993609607254, iteration: 64437
loss: 1.0406994819641113,grad_norm: 0.9999993247248762, iteration: 64438
loss: 0.9795591235160828,grad_norm: 0.9999991216710021, iteration: 64439
loss: 1.0170485973358154,grad_norm: 0.9999990857434169, iteration: 64440
loss: 1.0119425058364868,grad_norm: 0.9999989666984929, iteration: 64441
loss: 1.0331915616989136,grad_norm: 0.9127409778501159, iteration: 64442
loss: 1.0268241167068481,grad_norm: 0.9999990552156746, iteration: 64443
loss: 0.9834750890731812,grad_norm: 0.9999999297467668, iteration: 64444
loss: 0.9832946062088013,grad_norm: 0.9999992187518453, iteration: 64445
loss: 0.9930607080459595,grad_norm: 0.9339825225785596, iteration: 64446
loss: 1.0355112552642822,grad_norm: 0.9999989535776573, iteration: 64447
loss: 1.1158926486968994,grad_norm: 0.9064108417120271, iteration: 64448
loss: 1.0288598537445068,grad_norm: 0.9999994542639128, iteration: 64449
loss: 0.9916742444038391,grad_norm: 0.9506847222559827, iteration: 64450
loss: 1.040036678314209,grad_norm: 0.9999997150162535, iteration: 64451
loss: 1.0029940605163574,grad_norm: 0.9952762687251877, iteration: 64452
loss: 0.9706444144248962,grad_norm: 0.8759441999118592, iteration: 64453
loss: 1.0227878093719482,grad_norm: 0.9999998384412507, iteration: 64454
loss: 1.0499712228775024,grad_norm: 0.9999991141750554, iteration: 64455
loss: 0.9698217511177063,grad_norm: 0.9999996168135087, iteration: 64456
loss: 1.073040246963501,grad_norm: 0.9393687379617303, iteration: 64457
loss: 1.0084736347198486,grad_norm: 0.9752377711156581, iteration: 64458
loss: 0.9572463035583496,grad_norm: 0.9999991520701474, iteration: 64459
loss: 1.0677534341812134,grad_norm: 0.9999998395315958, iteration: 64460
loss: 1.0274169445037842,grad_norm: 0.9999994494115032, iteration: 64461
loss: 1.061201572418213,grad_norm: 0.9999993337239397, iteration: 64462
loss: 0.9985871911048889,grad_norm: 0.999999843627697, iteration: 64463
loss: 1.0102291107177734,grad_norm: 0.8706240508303484, iteration: 64464
loss: 1.004157304763794,grad_norm: 0.9999991689454143, iteration: 64465
loss: 1.0096940994262695,grad_norm: 0.9999991393489885, iteration: 64466
loss: 1.0131545066833496,grad_norm: 0.9064483081569137, iteration: 64467
loss: 1.001605749130249,grad_norm: 0.9938910115295858, iteration: 64468
loss: 0.982000470161438,grad_norm: 0.9716935176450834, iteration: 64469
loss: 1.076161503791809,grad_norm: 0.999999494478727, iteration: 64470
loss: 1.0150939226150513,grad_norm: 0.8729281319292782, iteration: 64471
loss: 1.0098998546600342,grad_norm: 0.9999991667436321, iteration: 64472
loss: 1.0451256036758423,grad_norm: 0.999999120207808, iteration: 64473
loss: 1.0323313474655151,grad_norm: 0.9999999196622537, iteration: 64474
loss: 1.0308388471603394,grad_norm: 0.9999993107607248, iteration: 64475
loss: 1.048911452293396,grad_norm: 0.9999994567461473, iteration: 64476
loss: 1.023610234260559,grad_norm: 0.9650475145457419, iteration: 64477
loss: 1.0140498876571655,grad_norm: 0.9474449506406277, iteration: 64478
loss: 0.9878216981887817,grad_norm: 0.9520240185118841, iteration: 64479
loss: 1.0411033630371094,grad_norm: 0.9999991966045592, iteration: 64480
loss: 1.0556339025497437,grad_norm: 0.9658582639108012, iteration: 64481
loss: 1.047167420387268,grad_norm: 0.9999993176486626, iteration: 64482
loss: 0.9869354367256165,grad_norm: 0.9999991364850727, iteration: 64483
loss: 0.9487730860710144,grad_norm: 0.8407153706164898, iteration: 64484
loss: 1.0317684412002563,grad_norm: 0.8008151232640983, iteration: 64485
loss: 1.0055772066116333,grad_norm: 0.9999991616250834, iteration: 64486
loss: 1.0287498235702515,grad_norm: 0.9999995276311922, iteration: 64487
loss: 1.1690630912780762,grad_norm: 0.9999998041362502, iteration: 64488
loss: 0.9951046109199524,grad_norm: 0.9220280875695548, iteration: 64489
loss: 1.0062283277511597,grad_norm: 0.8099062708663456, iteration: 64490
loss: 1.0413082838058472,grad_norm: 0.9999996564677566, iteration: 64491
loss: 1.0554243326187134,grad_norm: 0.9999993440294203, iteration: 64492
loss: 0.9924587607383728,grad_norm: 0.8652514895342194, iteration: 64493
loss: 0.9800925850868225,grad_norm: 0.9999993670557251, iteration: 64494
loss: 1.0146863460540771,grad_norm: 0.9999993171290305, iteration: 64495
loss: 0.9813363552093506,grad_norm: 0.8199019884946774, iteration: 64496
loss: 1.0975297689437866,grad_norm: 0.99999989473446, iteration: 64497
loss: 1.1137510538101196,grad_norm: 0.9999991516608906, iteration: 64498
loss: 1.0707881450653076,grad_norm: 0.9999999200717996, iteration: 64499
loss: 0.983697235584259,grad_norm: 0.9999992716354422, iteration: 64500
loss: 0.9829208254814148,grad_norm: 0.9999990740576548, iteration: 64501
loss: 1.0909764766693115,grad_norm: 0.9999991835902344, iteration: 64502
loss: 1.0091534852981567,grad_norm: 0.8719612042237599, iteration: 64503
loss: 0.9714240431785583,grad_norm: 0.9446289858849114, iteration: 64504
loss: 1.0489610433578491,grad_norm: 0.9999990773478126, iteration: 64505
loss: 1.0080825090408325,grad_norm: 0.9999991670301858, iteration: 64506
loss: 0.9783029556274414,grad_norm: 0.9407691390290247, iteration: 64507
loss: 0.9642347693443298,grad_norm: 0.8535369804663324, iteration: 64508
loss: 1.0319758653640747,grad_norm: 0.9999992014656255, iteration: 64509
loss: 0.9844402074813843,grad_norm: 0.9999993103958902, iteration: 64510
loss: 1.010480523109436,grad_norm: 0.9999991532903824, iteration: 64511
loss: 0.975152313709259,grad_norm: 0.928485932001554, iteration: 64512
loss: 0.9906623959541321,grad_norm: 0.9999991561717769, iteration: 64513
loss: 0.9768714308738708,grad_norm: 0.9999990051250099, iteration: 64514
loss: 0.9992890954017639,grad_norm: 0.9999990887414819, iteration: 64515
loss: 0.9910439848899841,grad_norm: 0.7829703964915898, iteration: 64516
loss: 1.0371257066726685,grad_norm: 0.9999994209100735, iteration: 64517
loss: 0.9857905507087708,grad_norm: 0.9999990245216521, iteration: 64518
loss: 1.0213069915771484,grad_norm: 0.9238971393917714, iteration: 64519
loss: 1.0016483068466187,grad_norm: 0.880911613865983, iteration: 64520
loss: 1.0123487710952759,grad_norm: 0.8386731337377709, iteration: 64521
loss: 1.0159633159637451,grad_norm: 0.9999991092926217, iteration: 64522
loss: 1.03728187084198,grad_norm: 0.8809803029248119, iteration: 64523
loss: 1.0184650421142578,grad_norm: 0.8478077550109869, iteration: 64524
loss: 1.0544400215148926,grad_norm: 0.9744980234127794, iteration: 64525
loss: 1.1763571500778198,grad_norm: 0.9999992215285493, iteration: 64526
loss: 1.0245566368103027,grad_norm: 0.90497045069217, iteration: 64527
loss: 1.0174225568771362,grad_norm: 0.8864849875910948, iteration: 64528
loss: 1.030531883239746,grad_norm: 0.9999990648039758, iteration: 64529
loss: 0.9903460741043091,grad_norm: 0.9980754351182111, iteration: 64530
loss: 1.0213261842727661,grad_norm: 0.9345325139928713, iteration: 64531
loss: 1.0535809993743896,grad_norm: 0.9999991795167403, iteration: 64532
loss: 1.071677565574646,grad_norm: 0.9999998901071259, iteration: 64533
loss: 1.018818974494934,grad_norm: 0.9999991646416136, iteration: 64534
loss: 1.0067968368530273,grad_norm: 0.9999991371587299, iteration: 64535
loss: 1.009311556816101,grad_norm: 0.9999991359298763, iteration: 64536
loss: 1.0948808193206787,grad_norm: 0.9999991363597696, iteration: 64537
loss: 1.001224160194397,grad_norm: 0.9637195234596917, iteration: 64538
loss: 1.0085712671279907,grad_norm: 0.8753947547026562, iteration: 64539
loss: 0.9819499850273132,grad_norm: 0.9916786879209677, iteration: 64540
loss: 1.028027892112732,grad_norm: 0.8733768801771601, iteration: 64541
loss: 0.9656438827514648,grad_norm: 0.8367621617604267, iteration: 64542
loss: 1.0290038585662842,grad_norm: 0.999999063911366, iteration: 64543
loss: 0.9916280508041382,grad_norm: 0.9339238690343151, iteration: 64544
loss: 1.009670376777649,grad_norm: 0.9999991968134645, iteration: 64545
loss: 0.9777693152427673,grad_norm: 0.999999045808683, iteration: 64546
loss: 0.965067982673645,grad_norm: 0.9936909217592814, iteration: 64547
loss: 0.9851789474487305,grad_norm: 0.9999992353127644, iteration: 64548
loss: 1.009549856185913,grad_norm: 0.999999111700841, iteration: 64549
loss: 0.9648544192314148,grad_norm: 0.8644122242206729, iteration: 64550
loss: 0.9908626079559326,grad_norm: 0.8827382175294441, iteration: 64551
loss: 1.0027883052825928,grad_norm: 0.8110992722519553, iteration: 64552
loss: 0.9883261322975159,grad_norm: 0.9999996979225031, iteration: 64553
loss: 1.0660367012023926,grad_norm: 0.9999999774964777, iteration: 64554
loss: 1.0226072072982788,grad_norm: 0.9999990744708792, iteration: 64555
loss: 1.0612661838531494,grad_norm: 0.8868614866238644, iteration: 64556
loss: 1.0176922082901,grad_norm: 0.9999992812015456, iteration: 64557
loss: 0.9865680932998657,grad_norm: 0.9999991447306904, iteration: 64558
loss: 0.978857159614563,grad_norm: 0.8882731005055031, iteration: 64559
loss: 0.9951673150062561,grad_norm: 0.9999992291612649, iteration: 64560
loss: 1.0726666450500488,grad_norm: 0.9326834372562229, iteration: 64561
loss: 1.0466434955596924,grad_norm: 0.984889545297251, iteration: 64562
loss: 1.0071885585784912,grad_norm: 0.9184954666085603, iteration: 64563
loss: 0.97707200050354,grad_norm: 0.9999990921054115, iteration: 64564
loss: 0.9535201787948608,grad_norm: 0.8660838891168339, iteration: 64565
loss: 1.0285632610321045,grad_norm: 0.7054900341087949, iteration: 64566
loss: 1.0733320713043213,grad_norm: 0.8373782030379331, iteration: 64567
loss: 1.0230317115783691,grad_norm: 0.8870146665854556, iteration: 64568
loss: 1.0781387090682983,grad_norm: 0.9999992907513289, iteration: 64569
loss: 1.1326667070388794,grad_norm: 0.9999991832440657, iteration: 64570
loss: 1.1824169158935547,grad_norm: 0.9999991327081799, iteration: 64571
loss: 1.0989693403244019,grad_norm: 0.9999995771924848, iteration: 64572
loss: 1.0441415309906006,grad_norm: 0.9999992054455338, iteration: 64573
loss: 1.0852110385894775,grad_norm: 0.9999997492101579, iteration: 64574
loss: 1.1233866214752197,grad_norm: 0.9805818355151357, iteration: 64575
loss: 0.9750568866729736,grad_norm: 0.9999991990825984, iteration: 64576
loss: 1.0384480953216553,grad_norm: 0.9414710962465347, iteration: 64577
loss: 1.0353050231933594,grad_norm: 0.9999989996924831, iteration: 64578
loss: 1.0205086469650269,grad_norm: 0.9999991757795555, iteration: 64579
loss: 0.9996963739395142,grad_norm: 0.8504077252588875, iteration: 64580
loss: 1.0111820697784424,grad_norm: 0.9371524508646722, iteration: 64581
loss: 0.9801509976387024,grad_norm: 0.9960673370167807, iteration: 64582
loss: 0.9711614847183228,grad_norm: 0.9999991952401018, iteration: 64583
loss: 0.9806579351425171,grad_norm: 0.866253598716803, iteration: 64584
loss: 1.0955424308776855,grad_norm: 0.9999992565217416, iteration: 64585
loss: 1.0283653736114502,grad_norm: 0.9995404498065493, iteration: 64586
loss: 1.0675169229507446,grad_norm: 0.9358749648392859, iteration: 64587
loss: 0.9812877178192139,grad_norm: 0.9999993767169837, iteration: 64588
loss: 1.0090930461883545,grad_norm: 0.9999988989936168, iteration: 64589
loss: 0.9863459467887878,grad_norm: 0.9763731501071624, iteration: 64590
loss: 0.9750530123710632,grad_norm: 0.9999991825880098, iteration: 64591
loss: 0.9895802736282349,grad_norm: 0.9999991165224277, iteration: 64592
loss: 1.0112133026123047,grad_norm: 0.9125846707483237, iteration: 64593
loss: 1.031134843826294,grad_norm: 0.918663647567046, iteration: 64594
loss: 0.9900182485580444,grad_norm: 0.999999821461403, iteration: 64595
loss: 0.9980196952819824,grad_norm: 0.9011732257760406, iteration: 64596
loss: 1.0078829526901245,grad_norm: 0.9999991020109593, iteration: 64597
loss: 1.0358494520187378,grad_norm: 0.9999989984359078, iteration: 64598
loss: 1.0961010456085205,grad_norm: 0.9999996705123597, iteration: 64599
loss: 1.0219907760620117,grad_norm: 0.8595017711807428, iteration: 64600
loss: 0.9889639616012573,grad_norm: 0.8392788679374966, iteration: 64601
loss: 0.9908173680305481,grad_norm: 0.8387024602542044, iteration: 64602
loss: 1.045131802558899,grad_norm: 0.9999995022251663, iteration: 64603
loss: 1.0258954763412476,grad_norm: 0.9999990275865234, iteration: 64604
loss: 1.0148584842681885,grad_norm: 0.8814122324016997, iteration: 64605
loss: 0.9998961091041565,grad_norm: 0.9999991809156342, iteration: 64606
loss: 0.9826723337173462,grad_norm: 0.909089157334868, iteration: 64607
loss: 1.0059771537780762,grad_norm: 0.8453779527212246, iteration: 64608
loss: 1.020241379737854,grad_norm: 0.9812060527579795, iteration: 64609
loss: 0.9795150756835938,grad_norm: 0.9999989310546856, iteration: 64610
loss: 1.0011378526687622,grad_norm: 0.9999991421482314, iteration: 64611
loss: 1.041149377822876,grad_norm: 0.999998969172954, iteration: 64612
loss: 0.9927090406417847,grad_norm: 0.999999153102952, iteration: 64613
loss: 1.0306105613708496,grad_norm: 0.9999993623672507, iteration: 64614
loss: 1.0072107315063477,grad_norm: 0.9315269421578063, iteration: 64615
loss: 0.9877970814704895,grad_norm: 0.8703388894341891, iteration: 64616
loss: 0.9590648412704468,grad_norm: 0.9999993381230623, iteration: 64617
loss: 1.019844889640808,grad_norm: 0.9653323892436301, iteration: 64618
loss: 0.9906237125396729,grad_norm: 0.9117640877340871, iteration: 64619
loss: 0.9916976094245911,grad_norm: 0.9999998568467363, iteration: 64620
loss: 0.9935325980186462,grad_norm: 0.9999998172875787, iteration: 64621
loss: 1.0111346244812012,grad_norm: 0.9377004171836384, iteration: 64622
loss: 1.0600264072418213,grad_norm: 0.9999990578799381, iteration: 64623
loss: 0.9814407825469971,grad_norm: 0.8641714195682418, iteration: 64624
loss: 1.03389310836792,grad_norm: 0.9501736412388867, iteration: 64625
loss: 0.9937874674797058,grad_norm: 0.9999992564263336, iteration: 64626
loss: 0.9541245698928833,grad_norm: 0.9999990677251551, iteration: 64627
loss: 1.0020960569381714,grad_norm: 0.9999992285443821, iteration: 64628
loss: 0.9958569407463074,grad_norm: 0.8437172308376817, iteration: 64629
loss: 1.0209914445877075,grad_norm: 0.8871160215731302, iteration: 64630
loss: 1.0421923398971558,grad_norm: 0.9999998249157559, iteration: 64631
loss: 1.0895960330963135,grad_norm: 0.9534300514479794, iteration: 64632
loss: 0.997165858745575,grad_norm: 0.9134528844575432, iteration: 64633
loss: 1.01881742477417,grad_norm: 0.9999991335820076, iteration: 64634
loss: 1.0140442848205566,grad_norm: 0.9378119805121841, iteration: 64635
loss: 0.9602129459381104,grad_norm: 0.7817156408056217, iteration: 64636
loss: 0.9902953505516052,grad_norm: 0.9999992352155009, iteration: 64637
loss: 0.9954591989517212,grad_norm: 0.9999995527272857, iteration: 64638
loss: 1.083227515220642,grad_norm: 0.9999989923333772, iteration: 64639
loss: 0.967263400554657,grad_norm: 0.9999990642918525, iteration: 64640
loss: 1.0010966062545776,grad_norm: 0.9999998407766106, iteration: 64641
loss: 1.5525015592575073,grad_norm: 0.9999998405371028, iteration: 64642
loss: 1.0075267553329468,grad_norm: 0.8553159281097197, iteration: 64643
loss: 0.9811424612998962,grad_norm: 0.7886142579158344, iteration: 64644
loss: 1.0043766498565674,grad_norm: 0.7634667041813533, iteration: 64645
loss: 0.9997788667678833,grad_norm: 0.9999990744804909, iteration: 64646
loss: 1.0069330930709839,grad_norm: 0.8222122945557688, iteration: 64647
loss: 1.0055733919143677,grad_norm: 0.8531816002724653, iteration: 64648
loss: 0.9731087684631348,grad_norm: 0.9499191315404122, iteration: 64649
loss: 0.9866178631782532,grad_norm: 0.9679846550352564, iteration: 64650
loss: 1.0203509330749512,grad_norm: 0.8445071108161477, iteration: 64651
loss: 1.0019268989562988,grad_norm: 0.9999989410472748, iteration: 64652
loss: 0.9850808382034302,grad_norm: 0.9999991454603583, iteration: 64653
loss: 1.0472031831741333,grad_norm: 0.999999565677444, iteration: 64654
loss: 1.0791794061660767,grad_norm: 0.8995027726383413, iteration: 64655
loss: 0.9954378008842468,grad_norm: 0.9999991419648366, iteration: 64656
loss: 0.9860748648643494,grad_norm: 0.9138032039489522, iteration: 64657
loss: 1.0460522174835205,grad_norm: 0.9664893706902198, iteration: 64658
loss: 0.9873631596565247,grad_norm: 0.9999990570294438, iteration: 64659
loss: 0.996870756149292,grad_norm: 0.9018138773634458, iteration: 64660
loss: 1.0503870248794556,grad_norm: 0.8500220368213881, iteration: 64661
loss: 1.023505449295044,grad_norm: 0.951118595934065, iteration: 64662
loss: 1.0085049867630005,grad_norm: 0.8407137803101543, iteration: 64663
loss: 0.9923616051673889,grad_norm: 0.9195940169948075, iteration: 64664
loss: 1.0136702060699463,grad_norm: 0.9999993173536716, iteration: 64665
loss: 0.9846742749214172,grad_norm: 0.9999992462717068, iteration: 64666
loss: 0.9642131924629211,grad_norm: 0.8854798635423891, iteration: 64667
loss: 1.0065449476242065,grad_norm: 0.999999066235318, iteration: 64668
loss: 1.090313196182251,grad_norm: 0.9999998596524637, iteration: 64669
loss: 0.9833884835243225,grad_norm: 0.879447916533595, iteration: 64670
loss: 0.9951793551445007,grad_norm: 0.9405669764448746, iteration: 64671
loss: 0.999941349029541,grad_norm: 0.9999989603851924, iteration: 64672
loss: 1.0134345293045044,grad_norm: 0.9173176441931079, iteration: 64673
loss: 1.097495198249817,grad_norm: 0.9964982014564217, iteration: 64674
loss: 0.9976493716239929,grad_norm: 0.9999998543844207, iteration: 64675
loss: 0.9908365607261658,grad_norm: 0.8281508669678719, iteration: 64676
loss: 1.0205650329589844,grad_norm: 0.9999991333211171, iteration: 64677
loss: 1.0225671529769897,grad_norm: 0.9999990092005804, iteration: 64678
loss: 1.0459039211273193,grad_norm: 0.8238430219595975, iteration: 64679
loss: 0.9807263612747192,grad_norm: 0.9049566666540773, iteration: 64680
loss: 0.949654757976532,grad_norm: 0.8651338924143677, iteration: 64681
loss: 0.9918299913406372,grad_norm: 0.9203591842919763, iteration: 64682
loss: 1.1048063039779663,grad_norm: 0.9999991950757245, iteration: 64683
loss: 1.0472897291183472,grad_norm: 0.8270517524689847, iteration: 64684
loss: 1.0040923357009888,grad_norm: 0.8455550204088075, iteration: 64685
loss: 0.9952791333198547,grad_norm: 0.7112467947091644, iteration: 64686
loss: 0.9951585531234741,grad_norm: 0.9999991445711995, iteration: 64687
loss: 1.007751226425171,grad_norm: 0.8867250531974648, iteration: 64688
loss: 1.0199756622314453,grad_norm: 0.9999994029656979, iteration: 64689
loss: 1.0013012886047363,grad_norm: 0.9807423065183818, iteration: 64690
loss: 0.9958512783050537,grad_norm: 0.9999991634622011, iteration: 64691
loss: 0.991362988948822,grad_norm: 0.9216307399076643, iteration: 64692
loss: 1.0275696516036987,grad_norm: 0.9999992371421816, iteration: 64693
loss: 1.0173550844192505,grad_norm: 0.9999992266552924, iteration: 64694
loss: 1.0107693672180176,grad_norm: 0.9999991231375328, iteration: 64695
loss: 0.9747828245162964,grad_norm: 0.9999990693690188, iteration: 64696
loss: 1.0000003576278687,grad_norm: 0.8744817071488661, iteration: 64697
loss: 1.0623656511306763,grad_norm: 0.9999991928090575, iteration: 64698
loss: 1.0201671123504639,grad_norm: 0.9999990335394849, iteration: 64699
loss: 0.9851770997047424,grad_norm: 0.8143322130880664, iteration: 64700
loss: 0.9767634272575378,grad_norm: 0.9999991311184814, iteration: 64701
loss: 0.9886366128921509,grad_norm: 0.7972522368944066, iteration: 64702
loss: 1.0396822690963745,grad_norm: 0.9999993078463153, iteration: 64703
loss: 1.020932912826538,grad_norm: 0.8943872228562332, iteration: 64704
loss: 1.043692946434021,grad_norm: 0.9999992078530517, iteration: 64705
loss: 1.0626726150512695,grad_norm: 0.9999993440283984, iteration: 64706
loss: 0.9749915599822998,grad_norm: 0.8589340378887825, iteration: 64707
loss: 0.9979276657104492,grad_norm: 0.9220068001109918, iteration: 64708
loss: 1.0357890129089355,grad_norm: 0.9999993979449138, iteration: 64709
loss: 1.0126852989196777,grad_norm: 0.9518268716247621, iteration: 64710
loss: 0.990094006061554,grad_norm: 0.9999990275902481, iteration: 64711
loss: 0.9746224880218506,grad_norm: 0.867882441585897, iteration: 64712
loss: 1.0259180068969727,grad_norm: 0.9999993286534675, iteration: 64713
loss: 1.0297067165374756,grad_norm: 0.9999995166080844, iteration: 64714
loss: 1.0089768171310425,grad_norm: 0.9999995589809973, iteration: 64715
loss: 0.998134970664978,grad_norm: 0.9852103524787836, iteration: 64716
loss: 1.02273690700531,grad_norm: 0.9999991232979921, iteration: 64717
loss: 1.0850855112075806,grad_norm: 0.9999999632723996, iteration: 64718
loss: 1.006514310836792,grad_norm: 0.9715121189200747, iteration: 64719
loss: 1.0791411399841309,grad_norm: 0.9999991751455661, iteration: 64720
loss: 1.0391724109649658,grad_norm: 0.8815534114824498, iteration: 64721
loss: 1.0020051002502441,grad_norm: 0.9999990409840941, iteration: 64722
loss: 0.9954912662506104,grad_norm: 0.8696340469715806, iteration: 64723
loss: 1.0046056509017944,grad_norm: 0.9999990951200595, iteration: 64724
loss: 0.9671129584312439,grad_norm: 0.9413017696003533, iteration: 64725
loss: 1.0396287441253662,grad_norm: 0.9714052217583947, iteration: 64726
loss: 1.107865333557129,grad_norm: 0.9999992099726849, iteration: 64727
loss: 1.0029557943344116,grad_norm: 0.9804093559400897, iteration: 64728
loss: 0.9751830101013184,grad_norm: 0.849431041959503, iteration: 64729
loss: 1.0784623622894287,grad_norm: 0.9999994019201696, iteration: 64730
loss: 0.974669873714447,grad_norm: 0.9386158378695615, iteration: 64731
loss: 1.0013408660888672,grad_norm: 0.9999993238715644, iteration: 64732
loss: 0.9984525442123413,grad_norm: 0.9888133669563399, iteration: 64733
loss: 1.26897394657135,grad_norm: 0.999999318866431, iteration: 64734
loss: 1.0410767793655396,grad_norm: 0.9999992850352956, iteration: 64735
loss: 1.0088903903961182,grad_norm: 0.8476886581429075, iteration: 64736
loss: 1.0769668817520142,grad_norm: 0.9999996834001614, iteration: 64737
loss: 1.0495349168777466,grad_norm: 0.9999991810506758, iteration: 64738
loss: 0.9842721819877625,grad_norm: 0.9999990831968663, iteration: 64739
loss: 1.0688563585281372,grad_norm: 0.9999993491221848, iteration: 64740
loss: 0.9611892700195312,grad_norm: 0.9999991033653102, iteration: 64741
loss: 1.008200764656067,grad_norm: 0.9362480912556771, iteration: 64742
loss: 1.0036611557006836,grad_norm: 0.9999991310650593, iteration: 64743
loss: 1.0020112991333008,grad_norm: 0.90711583309408, iteration: 64744
loss: 1.023941159248352,grad_norm: 0.8673826165055816, iteration: 64745
loss: 1.0295490026474,grad_norm: 0.8218556688498632, iteration: 64746
loss: 1.0104053020477295,grad_norm: 0.8612888539659938, iteration: 64747
loss: 1.0332142114639282,grad_norm: 0.9999994311833738, iteration: 64748
loss: 1.008008360862732,grad_norm: 0.9386033742804122, iteration: 64749
loss: 1.0322226285934448,grad_norm: 0.9999996552767538, iteration: 64750
loss: 1.0057657957077026,grad_norm: 0.9999992687095772, iteration: 64751
loss: 1.0389593839645386,grad_norm: 0.9999990608069932, iteration: 64752
loss: 1.0378648042678833,grad_norm: 0.999999260323619, iteration: 64753
loss: 0.9717047810554504,grad_norm: 0.9426265488900416, iteration: 64754
loss: 1.1017862558364868,grad_norm: 0.9999996498209839, iteration: 64755
loss: 0.9913867115974426,grad_norm: 0.9034218204165986, iteration: 64756
loss: 1.0263068675994873,grad_norm: 0.9999994468526809, iteration: 64757
loss: 0.9882073998451233,grad_norm: 0.9999992250124723, iteration: 64758
loss: 1.0370410680770874,grad_norm: 0.9999996922775404, iteration: 64759
loss: 1.0463311672210693,grad_norm: 0.9999995129699153, iteration: 64760
loss: 1.0104479789733887,grad_norm: 0.999999096737549, iteration: 64761
loss: 1.0071916580200195,grad_norm: 0.98964062279091, iteration: 64762
loss: 1.0033586025238037,grad_norm: 0.9288964373212151, iteration: 64763
loss: 0.987589418888092,grad_norm: 0.999999150028525, iteration: 64764
loss: 0.9880242347717285,grad_norm: 0.9999992931214768, iteration: 64765
loss: 1.0252070426940918,grad_norm: 0.999999027758354, iteration: 64766
loss: 1.0996031761169434,grad_norm: 0.9999994929496495, iteration: 64767
loss: 1.0271074771881104,grad_norm: 0.9999991055530999, iteration: 64768
loss: 1.0954015254974365,grad_norm: 0.9999995472997476, iteration: 64769
loss: 1.0168976783752441,grad_norm: 0.9999992432158901, iteration: 64770
loss: 1.027204990386963,grad_norm: 0.9999992345704725, iteration: 64771
loss: 1.052286148071289,grad_norm: 0.9999996911374142, iteration: 64772
loss: 1.1128807067871094,grad_norm: 0.9999992530426728, iteration: 64773
loss: 0.9716733694076538,grad_norm: 0.9999989828504554, iteration: 64774
loss: 1.0294350385665894,grad_norm: 0.9999991227705987, iteration: 64775
loss: 1.0524009466171265,grad_norm: 0.9900024696455124, iteration: 64776
loss: 1.1039137840270996,grad_norm: 0.9999995965373117, iteration: 64777
loss: 1.0172914266586304,grad_norm: 0.9899064498151369, iteration: 64778
loss: 1.014868140220642,grad_norm: 0.9674190504767297, iteration: 64779
loss: 1.0459458827972412,grad_norm: 0.9327395430974211, iteration: 64780
loss: 0.959690511226654,grad_norm: 0.851851548337221, iteration: 64781
loss: 1.008243441581726,grad_norm: 0.999999102466572, iteration: 64782
loss: 1.0143338441848755,grad_norm: 0.9999991091222548, iteration: 64783
loss: 0.9781933426856995,grad_norm: 0.999999213811228, iteration: 64784
loss: 0.9779099822044373,grad_norm: 0.9729831937980776, iteration: 64785
loss: 1.011904239654541,grad_norm: 0.7923441574144354, iteration: 64786
loss: 1.0487054586410522,grad_norm: 0.9999996996070476, iteration: 64787
loss: 1.0343281030654907,grad_norm: 0.999999909371382, iteration: 64788
loss: 0.984038233757019,grad_norm: 0.9999990399385106, iteration: 64789
loss: 0.9905945062637329,grad_norm: 0.999999144675091, iteration: 64790
loss: 1.0095710754394531,grad_norm: 0.9999993522484589, iteration: 64791
loss: 1.0128159523010254,grad_norm: 0.999999018732025, iteration: 64792
loss: 0.984432578086853,grad_norm: 0.8270370838056318, iteration: 64793
loss: 0.9885262250900269,grad_norm: 0.9425275463144863, iteration: 64794
loss: 1.002482295036316,grad_norm: 0.9980102718073205, iteration: 64795
loss: 1.0651047229766846,grad_norm: 0.9999999061209623, iteration: 64796
loss: 0.979469358921051,grad_norm: 0.999999087725121, iteration: 64797
loss: 1.0576680898666382,grad_norm: 0.8219252030810671, iteration: 64798
loss: 0.9859172105789185,grad_norm: 0.9999989051577131, iteration: 64799
loss: 0.9498537182807922,grad_norm: 0.9589920984074323, iteration: 64800
loss: 1.013167142868042,grad_norm: 0.8058450801552794, iteration: 64801
loss: 1.039681315422058,grad_norm: 0.9999993218277962, iteration: 64802
loss: 0.990397036075592,grad_norm: 0.9999990940162411, iteration: 64803
loss: 1.055261492729187,grad_norm: 0.9999991488112974, iteration: 64804
loss: 1.0094536542892456,grad_norm: 0.9416434854637832, iteration: 64805
loss: 1.0553877353668213,grad_norm: 0.9508451135727465, iteration: 64806
loss: 0.9999874234199524,grad_norm: 0.9193189855041581, iteration: 64807
loss: 1.0117475986480713,grad_norm: 0.7518526762659117, iteration: 64808
loss: 1.0047553777694702,grad_norm: 0.999999251529988, iteration: 64809
loss: 1.0130902528762817,grad_norm: 0.8634934603906684, iteration: 64810
loss: 0.9844703078269958,grad_norm: 0.9999994063156931, iteration: 64811
loss: 1.0133161544799805,grad_norm: 0.9999991160840802, iteration: 64812
loss: 1.0616387128829956,grad_norm: 0.9999992136097303, iteration: 64813
loss: 0.9763390421867371,grad_norm: 0.8177547366993315, iteration: 64814
loss: 1.014477014541626,grad_norm: 0.9999991360844511, iteration: 64815
loss: 0.9704580903053284,grad_norm: 0.9492900700537105, iteration: 64816
loss: 0.9422525763511658,grad_norm: 0.9388559066468558, iteration: 64817
loss: 0.9783512949943542,grad_norm: 0.8773983361838251, iteration: 64818
loss: 1.0085316896438599,grad_norm: 0.9999994012592098, iteration: 64819
loss: 1.0330864191055298,grad_norm: 0.9478214825893109, iteration: 64820
loss: 0.9787675738334656,grad_norm: 0.9080818346605595, iteration: 64821
loss: 1.0094329118728638,grad_norm: 0.9999992770439275, iteration: 64822
loss: 1.0462292432785034,grad_norm: 0.9142118779805475, iteration: 64823
loss: 1.0491303205490112,grad_norm: 0.9999995371260003, iteration: 64824
loss: 1.0319006443023682,grad_norm: 0.8435505044205136, iteration: 64825
loss: 1.0003464221954346,grad_norm: 0.846838389628255, iteration: 64826
loss: 0.9975025057792664,grad_norm: 0.8167175721103457, iteration: 64827
loss: 1.0110821723937988,grad_norm: 0.8817818002244185, iteration: 64828
loss: 1.0139080286026,grad_norm: 0.9999995763011154, iteration: 64829
loss: 0.9806328415870667,grad_norm: 0.9039423321018317, iteration: 64830
loss: 1.0535387992858887,grad_norm: 0.9999993938496178, iteration: 64831
loss: 1.0497610569000244,grad_norm: 0.8322995625021814, iteration: 64832
loss: 0.9989542365074158,grad_norm: 0.8938122031486382, iteration: 64833
loss: 1.006516933441162,grad_norm: 0.7934523398079057, iteration: 64834
loss: 1.0276899337768555,grad_norm: 0.9151994726957065, iteration: 64835
loss: 0.999518096446991,grad_norm: 0.9639823670214193, iteration: 64836
loss: 0.9990566372871399,grad_norm: 0.9327127554636058, iteration: 64837
loss: 0.9535940289497375,grad_norm: 0.9960110166756279, iteration: 64838
loss: 1.0386762619018555,grad_norm: 0.9999990070129826, iteration: 64839
loss: 1.027284026145935,grad_norm: 0.9999992300963237, iteration: 64840
loss: 1.0110887289047241,grad_norm: 0.999999178299022, iteration: 64841
loss: 1.120646595954895,grad_norm: 0.9999995564218525, iteration: 64842
loss: 1.0050891637802124,grad_norm: 0.9773179224288792, iteration: 64843
loss: 1.0163997411727905,grad_norm: 0.9999997726140902, iteration: 64844
loss: 1.0486459732055664,grad_norm: 0.9999989596815356, iteration: 64845
loss: 0.9764580726623535,grad_norm: 0.9234651809075026, iteration: 64846
loss: 1.0925617218017578,grad_norm: 0.9999991984505895, iteration: 64847
loss: 1.0071220397949219,grad_norm: 0.7943804330869751, iteration: 64848
loss: 1.009461522102356,grad_norm: 0.8129248134334831, iteration: 64849
loss: 0.9667829275131226,grad_norm: 0.9985589131179186, iteration: 64850
loss: 1.0389221906661987,grad_norm: 0.9999998493200847, iteration: 64851
loss: 1.020614504814148,grad_norm: 0.9999995023086667, iteration: 64852
loss: 0.9751663208007812,grad_norm: 0.9618477387355026, iteration: 64853
loss: 0.9789737462997437,grad_norm: 0.8662106329016707, iteration: 64854
loss: 1.0159333944320679,grad_norm: 0.9701793103216791, iteration: 64855
loss: 0.9850990176200867,grad_norm: 0.9999992963625856, iteration: 64856
loss: 1.0221805572509766,grad_norm: 0.9221776693562939, iteration: 64857
loss: 0.9947953224182129,grad_norm: 0.9999994132002646, iteration: 64858
loss: 0.9298356175422668,grad_norm: 0.8139754185823236, iteration: 64859
loss: 1.0143542289733887,grad_norm: 0.8708922871866744, iteration: 64860
loss: 0.9980833530426025,grad_norm: 0.9999991967507527, iteration: 64861
loss: 0.9758691191673279,grad_norm: 0.925788709412315, iteration: 64862
loss: 1.0074641704559326,grad_norm: 0.9969720719699169, iteration: 64863
loss: 1.0061448812484741,grad_norm: 0.8945931665757318, iteration: 64864
loss: 1.0166014432907104,grad_norm: 0.9999990747298507, iteration: 64865
loss: 0.9946965575218201,grad_norm: 0.8979602002891434, iteration: 64866
loss: 0.9955084323883057,grad_norm: 0.9172198848383947, iteration: 64867
loss: 0.9998695254325867,grad_norm: 0.817373118553905, iteration: 64868
loss: 1.0203739404678345,grad_norm: 0.9524699846619157, iteration: 64869
loss: 1.0068484544754028,grad_norm: 0.9298037760412827, iteration: 64870
loss: 1.0038199424743652,grad_norm: 0.9243941108364473, iteration: 64871
loss: 0.9916624426841736,grad_norm: 0.9999991081036732, iteration: 64872
loss: 1.0245729684829712,grad_norm: 0.9130820752246173, iteration: 64873
loss: 0.9929307699203491,grad_norm: 0.9999997822897585, iteration: 64874
loss: 1.0214701890945435,grad_norm: 0.8578680299987115, iteration: 64875
loss: 1.0410544872283936,grad_norm: 0.9999998823810218, iteration: 64876
loss: 1.0352134704589844,grad_norm: 0.8627175779050092, iteration: 64877
loss: 1.0136997699737549,grad_norm: 0.9999991700255588, iteration: 64878
loss: 0.9804076552391052,grad_norm: 0.9999990298319517, iteration: 64879
loss: 0.9992954730987549,grad_norm: 0.9999992774868215, iteration: 64880
loss: 1.007547378540039,grad_norm: 0.9999991083835855, iteration: 64881
loss: 0.9780242443084717,grad_norm: 0.8845924967179217, iteration: 64882
loss: 0.9944632053375244,grad_norm: 0.9388480259998602, iteration: 64883
loss: 0.99233478307724,grad_norm: 0.9107789708357644, iteration: 64884
loss: 1.0137680768966675,grad_norm: 0.8252039878518184, iteration: 64885
loss: 1.0019389390945435,grad_norm: 0.9999989310959502, iteration: 64886
loss: 1.0096086263656616,grad_norm: 0.9999991472970408, iteration: 64887
loss: 1.0010442733764648,grad_norm: 0.8887072160468154, iteration: 64888
loss: 1.0179208517074585,grad_norm: 0.9108908922984028, iteration: 64889
loss: 0.9722726941108704,grad_norm: 0.8553771526988324, iteration: 64890
loss: 0.9705686569213867,grad_norm: 0.9999991083570346, iteration: 64891
loss: 0.9926223158836365,grad_norm: 0.8254568296440704, iteration: 64892
loss: 0.9740459322929382,grad_norm: 0.9999990728922564, iteration: 64893
loss: 1.0051325559616089,grad_norm: 0.9999990976913429, iteration: 64894
loss: 1.010047197341919,grad_norm: 0.9999993491650232, iteration: 64895
loss: 1.0112804174423218,grad_norm: 0.9999992839925763, iteration: 64896
loss: 1.1416476964950562,grad_norm: 0.9999992269888971, iteration: 64897
loss: 0.9812456369400024,grad_norm: 0.9096579161156382, iteration: 64898
loss: 1.017388105392456,grad_norm: 0.8613794269710835, iteration: 64899
loss: 0.9541445970535278,grad_norm: 0.995467402887683, iteration: 64900
loss: 1.0152910947799683,grad_norm: 0.8687597379583838, iteration: 64901
loss: 1.0128908157348633,grad_norm: 0.9999991468430108, iteration: 64902
loss: 1.0108668804168701,grad_norm: 0.9515998210860144, iteration: 64903
loss: 1.018250584602356,grad_norm: 0.9688592152626873, iteration: 64904
loss: 1.0630362033843994,grad_norm: 0.9999993167391754, iteration: 64905
loss: 1.0381271839141846,grad_norm: 0.9999989597400105, iteration: 64906
loss: 1.0316179990768433,grad_norm: 0.9999990841578783, iteration: 64907
loss: 1.1308073997497559,grad_norm: 0.9999998512925855, iteration: 64908
loss: 1.1966067552566528,grad_norm: 0.9999997181144036, iteration: 64909
loss: 1.047095537185669,grad_norm: 0.9999992707218186, iteration: 64910
loss: 1.1048640012741089,grad_norm: 0.9999998069813857, iteration: 64911
loss: 1.0287245512008667,grad_norm: 0.9999990895576006, iteration: 64912
loss: 1.0037394762039185,grad_norm: 0.9448327360447105, iteration: 64913
loss: 0.9944807887077332,grad_norm: 0.8804793488806262, iteration: 64914
loss: 1.0315192937850952,grad_norm: 0.9999993434050088, iteration: 64915
loss: 1.0138275623321533,grad_norm: 0.8039763916667518, iteration: 64916
loss: 1.0187050104141235,grad_norm: 0.9282700821200311, iteration: 64917
loss: 1.0014652013778687,grad_norm: 0.9999995928354996, iteration: 64918
loss: 1.076196551322937,grad_norm: 0.9999994158518031, iteration: 64919
loss: 0.9690441489219666,grad_norm: 0.9999990893289222, iteration: 64920
loss: 1.0040277242660522,grad_norm: 0.9999991293664764, iteration: 64921
loss: 0.998802125453949,grad_norm: 0.943922458837965, iteration: 64922
loss: 0.9768255352973938,grad_norm: 0.9999990614322158, iteration: 64923
loss: 1.019336462020874,grad_norm: 0.9999989751907497, iteration: 64924
loss: 1.149960994720459,grad_norm: 0.9999996857827212, iteration: 64925
loss: 0.9984626173973083,grad_norm: 0.9999991380913946, iteration: 64926
loss: 1.013360619544983,grad_norm: 0.9688390186780818, iteration: 64927
loss: 1.0121769905090332,grad_norm: 0.999999156843517, iteration: 64928
loss: 1.0105637311935425,grad_norm: 0.8397270069947159, iteration: 64929
loss: 1.004116415977478,grad_norm: 0.8704970145145786, iteration: 64930
loss: 1.0183264017105103,grad_norm: 0.9999996891184082, iteration: 64931
loss: 0.9812631011009216,grad_norm: 0.9999994171781943, iteration: 64932
loss: 1.0665738582611084,grad_norm: 0.9999990827525564, iteration: 64933
loss: 1.0102031230926514,grad_norm: 0.9999990777490455, iteration: 64934
loss: 1.0029854774475098,grad_norm: 0.9206698063562422, iteration: 64935
loss: 1.0415356159210205,grad_norm: 0.9081943389215692, iteration: 64936
loss: 1.0046460628509521,grad_norm: 0.9756867253253508, iteration: 64937
loss: 1.0103086233139038,grad_norm: 0.9999991788139119, iteration: 64938
loss: 0.9954964518547058,grad_norm: 0.890434134087816, iteration: 64939
loss: 1.0003114938735962,grad_norm: 0.9861276059645756, iteration: 64940
loss: 0.9716898798942566,grad_norm: 0.9913167251461533, iteration: 64941
loss: 1.0279916524887085,grad_norm: 0.9749222599026534, iteration: 64942
loss: 1.0047944784164429,grad_norm: 0.86900975713128, iteration: 64943
loss: 0.9925257563591003,grad_norm: 0.999999037890721, iteration: 64944
loss: 1.005506992340088,grad_norm: 0.8975094732034383, iteration: 64945
loss: 1.0237853527069092,grad_norm: 0.9580621668060753, iteration: 64946
loss: 1.015129566192627,grad_norm: 0.9999992066367908, iteration: 64947
loss: 1.1672664880752563,grad_norm: 0.9999998325489375, iteration: 64948
loss: 0.9947213530540466,grad_norm: 0.8033511142322017, iteration: 64949
loss: 0.9842526316642761,grad_norm: 0.769967283292542, iteration: 64950
loss: 1.0164244174957275,grad_norm: 0.9999994987779096, iteration: 64951
loss: 1.005663514137268,grad_norm: 0.9999990971556679, iteration: 64952
loss: 1.0045655965805054,grad_norm: 0.9785961393016749, iteration: 64953
loss: 0.9953445792198181,grad_norm: 0.9956452848488372, iteration: 64954
loss: 1.0418283939361572,grad_norm: 0.9804807203133318, iteration: 64955
loss: 1.0054073333740234,grad_norm: 0.7763904873528453, iteration: 64956
loss: 0.9911931157112122,grad_norm: 0.8802061403387436, iteration: 64957
loss: 0.9682207703590393,grad_norm: 0.9999992034892322, iteration: 64958
loss: 0.9978356957435608,grad_norm: 0.8511534268844508, iteration: 64959
loss: 1.0063064098358154,grad_norm: 0.742395101146025, iteration: 64960
loss: 1.116817831993103,grad_norm: 0.9999999080158096, iteration: 64961
loss: 1.0043576955795288,grad_norm: 0.8717099541055179, iteration: 64962
loss: 1.0422383546829224,grad_norm: 0.9999998631492515, iteration: 64963
loss: 0.9975486993789673,grad_norm: 0.9999992104617235, iteration: 64964
loss: 0.984216034412384,grad_norm: 0.9999990919502613, iteration: 64965
loss: 1.0031449794769287,grad_norm: 0.9663613547526418, iteration: 64966
loss: 1.2049143314361572,grad_norm: 0.999999618223557, iteration: 64967
loss: 1.0014978647232056,grad_norm: 0.903987792580741, iteration: 64968
loss: 1.2579289674758911,grad_norm: 0.9999996350251533, iteration: 64969
loss: 1.0041208267211914,grad_norm: 0.9999994268338424, iteration: 64970
loss: 0.9964045882225037,grad_norm: 0.8514322466409142, iteration: 64971
loss: 1.057611107826233,grad_norm: 0.9999992310835989, iteration: 64972
loss: 1.3067491054534912,grad_norm: 0.9999999650128689, iteration: 64973
loss: 1.0392924547195435,grad_norm: 0.9999995758811784, iteration: 64974
loss: 1.330683708190918,grad_norm: 0.9999998726248961, iteration: 64975
loss: 1.0135390758514404,grad_norm: 0.9999992550236966, iteration: 64976
loss: 1.2300535440444946,grad_norm: 0.9999998934440192, iteration: 64977
loss: 1.086511492729187,grad_norm: 0.9999990827059877, iteration: 64978
loss: 0.9533321261405945,grad_norm: 0.9114487897590758, iteration: 64979
loss: 1.2616188526153564,grad_norm: 0.9999993851289962, iteration: 64980
loss: 1.2292330265045166,grad_norm: 0.9999990964603845, iteration: 64981
loss: 1.2182255983352661,grad_norm: 0.999999525297578, iteration: 64982
loss: 1.060396432876587,grad_norm: 0.9999989863210564, iteration: 64983
loss: 1.22350013256073,grad_norm: 0.9999997830139872, iteration: 64984
loss: 1.0869362354278564,grad_norm: 0.9999991351916337, iteration: 64985
loss: 0.9613650441169739,grad_norm: 0.9999990748179851, iteration: 64986
loss: 1.211725115776062,grad_norm: 0.9999991204964237, iteration: 64987
loss: 1.0785553455352783,grad_norm: 0.9999995869033417, iteration: 64988
loss: 1.0045522451400757,grad_norm: 0.9999994407067363, iteration: 64989
loss: 1.017298698425293,grad_norm: 0.9999990859135303, iteration: 64990
loss: 1.1334089040756226,grad_norm: 0.9999993252408381, iteration: 64991
loss: 1.283643126487732,grad_norm: 0.9999999096273829, iteration: 64992
loss: 1.092832326889038,grad_norm: 0.9999991689071378, iteration: 64993
loss: 1.1460981369018555,grad_norm: 0.9999997992771139, iteration: 64994
loss: 1.1225043535232544,grad_norm: 0.9999991559033267, iteration: 64995
loss: 1.0243690013885498,grad_norm: 0.9999991807231121, iteration: 64996
loss: 1.0209378004074097,grad_norm: 0.9999990859755927, iteration: 64997
loss: 1.0390545129776,grad_norm: 0.9999990956855014, iteration: 64998
loss: 1.0375698804855347,grad_norm: 0.9999990951761143, iteration: 64999
loss: 1.1529135704040527,grad_norm: 0.9999996128690021, iteration: 65000
loss: 1.0556241273880005,grad_norm: 0.9999991238024719, iteration: 65001
loss: 1.0097770690917969,grad_norm: 0.9999992570968976, iteration: 65002
loss: 1.045495629310608,grad_norm: 0.9528200832359339, iteration: 65003
loss: 1.0553722381591797,grad_norm: 0.9999994619649945, iteration: 65004
loss: 1.1386138200759888,grad_norm: 0.9999994745165734, iteration: 65005
loss: 1.1755045652389526,grad_norm: 0.9999996762409299, iteration: 65006
loss: 1.049737811088562,grad_norm: 0.8849373629612333, iteration: 65007
loss: 0.9703420400619507,grad_norm: 0.9288585425697395, iteration: 65008
loss: 1.1226803064346313,grad_norm: 0.999999467156547, iteration: 65009
loss: 1.0788943767547607,grad_norm: 0.9713485782423728, iteration: 65010
loss: 1.1044255495071411,grad_norm: 0.9999993045570713, iteration: 65011
loss: 1.022560954093933,grad_norm: 0.9999996673616085, iteration: 65012
loss: 1.0523890256881714,grad_norm: 0.9999996714728389, iteration: 65013
loss: 1.10950767993927,grad_norm: 0.9999993116055071, iteration: 65014
loss: 1.054478645324707,grad_norm: 0.9999991705218384, iteration: 65015
loss: 1.104811191558838,grad_norm: 0.9999994267265551, iteration: 65016
loss: 1.1617703437805176,grad_norm: 0.9999998926590392, iteration: 65017
loss: 1.0395749807357788,grad_norm: 0.9999991483530329, iteration: 65018
loss: 1.0229145288467407,grad_norm: 0.9999991940998763, iteration: 65019
loss: 1.1129707098007202,grad_norm: 0.9999999334949194, iteration: 65020
loss: 1.0727674961090088,grad_norm: 0.9999998173129115, iteration: 65021
loss: 0.9985767602920532,grad_norm: 0.9999992896133332, iteration: 65022
loss: 1.05440092086792,grad_norm: 0.9999999176413568, iteration: 65023
loss: 1.000535011291504,grad_norm: 0.9999991482603214, iteration: 65024
loss: 1.0532610416412354,grad_norm: 0.9999992560113392, iteration: 65025
loss: 1.263638973236084,grad_norm: 0.999999868483108, iteration: 65026
loss: 1.1150201559066772,grad_norm: 0.9999991413676302, iteration: 65027
loss: 1.0670804977416992,grad_norm: 0.9999993171365584, iteration: 65028
loss: 1.0535895824432373,grad_norm: 0.9999990326993378, iteration: 65029
loss: 1.0145292282104492,grad_norm: 0.9545390775360211, iteration: 65030
loss: 1.003255009651184,grad_norm: 0.9999997400105771, iteration: 65031
loss: 0.9665325880050659,grad_norm: 0.9999991271818378, iteration: 65032
loss: 1.0420421361923218,grad_norm: 0.8887661289515666, iteration: 65033
loss: 1.1283245086669922,grad_norm: 0.999999120760792, iteration: 65034
loss: 1.12039315700531,grad_norm: 0.9999995513581731, iteration: 65035
loss: 1.054579734802246,grad_norm: 0.9999990158536478, iteration: 65036
loss: 1.0501646995544434,grad_norm: 0.8452735518751118, iteration: 65037
loss: 0.9822731018066406,grad_norm: 0.9186642016047059, iteration: 65038
loss: 1.2062888145446777,grad_norm: 0.9999998485459068, iteration: 65039
loss: 1.0020257234573364,grad_norm: 0.9999990800610044, iteration: 65040
loss: 0.97078537940979,grad_norm: 0.9085782496947958, iteration: 65041
loss: 1.0080828666687012,grad_norm: 0.9999991524064666, iteration: 65042
loss: 1.0252330303192139,grad_norm: 0.9999990681583376, iteration: 65043
loss: 0.9885926842689514,grad_norm: 0.8416763571055764, iteration: 65044
loss: 1.1252155303955078,grad_norm: 0.9999999928405917, iteration: 65045
loss: 1.0067839622497559,grad_norm: 0.8422723734921481, iteration: 65046
loss: 0.9791625738143921,grad_norm: 0.8799833659501939, iteration: 65047
loss: 1.0807156562805176,grad_norm: 0.9923833623123, iteration: 65048
loss: 1.0083070993423462,grad_norm: 0.9999994073933093, iteration: 65049
loss: 1.1675975322723389,grad_norm: 0.9999992788720398, iteration: 65050
loss: 1.0228402614593506,grad_norm: 0.9999992163711158, iteration: 65051
loss: 1.0394906997680664,grad_norm: 0.9999992387509707, iteration: 65052
loss: 1.0684789419174194,grad_norm: 0.9999996532323133, iteration: 65053
loss: 0.9935122132301331,grad_norm: 0.9912306535769564, iteration: 65054
loss: 1.0046026706695557,grad_norm: 0.8810347816983418, iteration: 65055
loss: 1.0451946258544922,grad_norm: 0.9999994039888959, iteration: 65056
loss: 1.0122476816177368,grad_norm: 0.7924967923141577, iteration: 65057
loss: 1.0291748046875,grad_norm: 0.7159990973352304, iteration: 65058
loss: 0.9972066283226013,grad_norm: 0.9999992053251886, iteration: 65059
loss: 0.9980547428131104,grad_norm: 0.9999990551329543, iteration: 65060
loss: 0.9961894154548645,grad_norm: 0.8746083696449417, iteration: 65061
loss: 1.0710915327072144,grad_norm: 0.999999116090335, iteration: 65062
loss: 1.1659890413284302,grad_norm: 0.9999998938508559, iteration: 65063
loss: 1.007208228111267,grad_norm: 0.917186044232052, iteration: 65064
loss: 0.9962271451950073,grad_norm: 0.9999993348477639, iteration: 65065
loss: 1.0225788354873657,grad_norm: 0.999999131146233, iteration: 65066
loss: 1.0060960054397583,grad_norm: 0.8817149797575379, iteration: 65067
loss: 0.9656477570533752,grad_norm: 0.9999990904701623, iteration: 65068
loss: 1.0599108934402466,grad_norm: 0.9999993114632474, iteration: 65069
loss: 1.0124064683914185,grad_norm: 0.8256697889319743, iteration: 65070
loss: 1.1260801553726196,grad_norm: 0.9999992365086937, iteration: 65071
loss: 1.0530786514282227,grad_norm: 0.9999995833080207, iteration: 65072
loss: 1.0364470481872559,grad_norm: 0.9999990585340115, iteration: 65073
loss: 1.0346415042877197,grad_norm: 0.9999990866083621, iteration: 65074
loss: 1.003494381904602,grad_norm: 0.9999993342395185, iteration: 65075
loss: 1.0300447940826416,grad_norm: 0.9999993253598626, iteration: 65076
loss: 0.9966773390769958,grad_norm: 0.820063357325287, iteration: 65077
loss: 1.0238827466964722,grad_norm: 0.9999992141267511, iteration: 65078
loss: 1.2623012065887451,grad_norm: 0.9999998151100574, iteration: 65079
loss: 1.008611798286438,grad_norm: 0.9999994160601559, iteration: 65080
loss: 1.072760820388794,grad_norm: 0.9999992226855886, iteration: 65081
loss: 1.067887306213379,grad_norm: 0.9999998731114603, iteration: 65082
loss: 0.9800019860267639,grad_norm: 0.9548147089677718, iteration: 65083
loss: 0.9623526334762573,grad_norm: 0.9999990558063037, iteration: 65084
loss: 1.0338598489761353,grad_norm: 0.9999992120250951, iteration: 65085
loss: 1.1766375303268433,grad_norm: 0.9999992656759935, iteration: 65086
loss: 1.0110026597976685,grad_norm: 0.9999993234320177, iteration: 65087
loss: 1.1053239107131958,grad_norm: 0.9999991823867685, iteration: 65088
loss: 1.004683017730713,grad_norm: 0.9999991959898251, iteration: 65089
loss: 1.0965105295181274,grad_norm: 0.9999999561804734, iteration: 65090
loss: 1.0023771524429321,grad_norm: 0.9999990231270861, iteration: 65091
loss: 1.1242005825042725,grad_norm: 0.9744540386232188, iteration: 65092
loss: 1.063674807548523,grad_norm: 1.0000000126538848, iteration: 65093
loss: 1.0091005563735962,grad_norm: 0.9999995773399468, iteration: 65094
loss: 1.1246857643127441,grad_norm: 0.9999998696963468, iteration: 65095
loss: 1.1190969944000244,grad_norm: 0.9999998897535154, iteration: 65096
loss: 1.0687222480773926,grad_norm: 0.999999625709941, iteration: 65097
loss: 1.0188192129135132,grad_norm: 0.9999991022889566, iteration: 65098
loss: 1.1304774284362793,grad_norm: 0.9999995747134355, iteration: 65099
loss: 1.0091158151626587,grad_norm: 0.8786644694828285, iteration: 65100
loss: 0.9892176985740662,grad_norm: 0.912174626171401, iteration: 65101
loss: 1.0779199600219727,grad_norm: 0.9999993798944036, iteration: 65102
loss: 1.0099269151687622,grad_norm: 0.9999990532786651, iteration: 65103
loss: 0.9994689226150513,grad_norm: 0.9890050691002953, iteration: 65104
loss: 1.1141726970672607,grad_norm: 0.9999993830165084, iteration: 65105
loss: 0.9839770197868347,grad_norm: 0.9999994977444724, iteration: 65106
loss: 1.0152641534805298,grad_norm: 0.9999996687120688, iteration: 65107
loss: 1.0772439241409302,grad_norm: 0.9999990609922231, iteration: 65108
loss: 1.0183733701705933,grad_norm: 0.9999994340537581, iteration: 65109
loss: 0.9861567616462708,grad_norm: 0.7958685951211052, iteration: 65110
loss: 1.0233460664749146,grad_norm: 0.9999998880067856, iteration: 65111
loss: 1.0136929750442505,grad_norm: 0.9999991004561959, iteration: 65112
loss: 1.2061371803283691,grad_norm: 0.9999991964783888, iteration: 65113
loss: 1.1049354076385498,grad_norm: 0.9999999054391528, iteration: 65114
loss: 1.1081956624984741,grad_norm: 0.9999999245007812, iteration: 65115
loss: 1.161063313484192,grad_norm: 0.9999996118504696, iteration: 65116
loss: 1.0251221656799316,grad_norm: 0.9999993648260094, iteration: 65117
loss: 0.9788784384727478,grad_norm: 0.9999990496919708, iteration: 65118
loss: 1.049428105354309,grad_norm: 0.999999909925823, iteration: 65119
loss: 0.9776890873908997,grad_norm: 0.8541444027511517, iteration: 65120
loss: 1.0845491886138916,grad_norm: 0.9999996182243437, iteration: 65121
loss: 0.9735830426216125,grad_norm: 0.9999993348421082, iteration: 65122
loss: 1.0668772459030151,grad_norm: 0.9999993869813568, iteration: 65123
loss: 1.0698717832565308,grad_norm: 0.999999474833862, iteration: 65124
loss: 1.2227106094360352,grad_norm: 0.9999998483419067, iteration: 65125
loss: 1.019004464149475,grad_norm: 0.9999992409688001, iteration: 65126
loss: 1.10905921459198,grad_norm: 0.9999992666287368, iteration: 65127
loss: 1.010364294052124,grad_norm: 0.9999992326963963, iteration: 65128
loss: 1.1689512729644775,grad_norm: 0.9999995448986153, iteration: 65129
loss: 1.0467865467071533,grad_norm: 0.9999999716858915, iteration: 65130
loss: 1.063913106918335,grad_norm: 0.9999993286751826, iteration: 65131
loss: 1.0628869533538818,grad_norm: 0.999999060846401, iteration: 65132
loss: 1.0563273429870605,grad_norm: 0.9999997923635807, iteration: 65133
loss: 0.9935752153396606,grad_norm: 0.9999994719825293, iteration: 65134
loss: 1.0700407028198242,grad_norm: 0.999999641898611, iteration: 65135
loss: 1.1099205017089844,grad_norm: 0.9999995062760682, iteration: 65136
loss: 1.0638231039047241,grad_norm: 0.9999991572111449, iteration: 65137
loss: 0.959374189376831,grad_norm: 0.9999992332767584, iteration: 65138
loss: 1.122635841369629,grad_norm: 0.999999213274007, iteration: 65139
loss: 1.0951569080352783,grad_norm: 0.9832177206090028, iteration: 65140
loss: 1.0241388082504272,grad_norm: 0.999999401961216, iteration: 65141
loss: 1.5174483060836792,grad_norm: 0.9999995344022512, iteration: 65142
loss: 1.1542448997497559,grad_norm: 0.9999996309741469, iteration: 65143
loss: 1.0371490716934204,grad_norm: 0.9999995868516083, iteration: 65144
loss: 0.9407024383544922,grad_norm: 0.99999917060223, iteration: 65145
loss: 1.2307326793670654,grad_norm: 0.9999997089302813, iteration: 65146
loss: 1.0287342071533203,grad_norm: 0.999999702953706, iteration: 65147
loss: 1.0718597173690796,grad_norm: 0.999999435413947, iteration: 65148
loss: 1.09633469581604,grad_norm: 0.999999223171356, iteration: 65149
loss: 1.026877760887146,grad_norm: 0.9999998943120187, iteration: 65150
loss: 1.033786654472351,grad_norm: 0.9637485985271196, iteration: 65151
loss: 1.1798149347305298,grad_norm: 0.9999996943978795, iteration: 65152
loss: 1.096047282218933,grad_norm: 0.9999993530205947, iteration: 65153
loss: 1.1838881969451904,grad_norm: 0.9999992723324722, iteration: 65154
loss: 1.2753788232803345,grad_norm: 0.999999978656596, iteration: 65155
loss: 1.0948275327682495,grad_norm: 0.999999521313192, iteration: 65156
loss: 0.9963115453720093,grad_norm: 0.99999905303918, iteration: 65157
loss: 1.016624927520752,grad_norm: 0.9999995162208862, iteration: 65158
loss: 0.9686439037322998,grad_norm: 0.9999990326753421, iteration: 65159
loss: 1.0189108848571777,grad_norm: 0.9999993469381764, iteration: 65160
loss: 0.9788397550582886,grad_norm: 0.9489221972488199, iteration: 65161
loss: 1.0820670127868652,grad_norm: 0.9999994299262049, iteration: 65162
loss: 1.072187066078186,grad_norm: 0.9999994109967043, iteration: 65163
loss: 1.0869582891464233,grad_norm: 0.9999990190748225, iteration: 65164
loss: 0.9860700368881226,grad_norm: 0.8549303139302367, iteration: 65165
loss: 1.058106541633606,grad_norm: 0.9999996663306174, iteration: 65166
loss: 1.0077552795410156,grad_norm: 0.9999990175500925, iteration: 65167
loss: 0.9648622274398804,grad_norm: 0.9999993086866783, iteration: 65168
loss: 1.0076158046722412,grad_norm: 0.9045633159069928, iteration: 65169
loss: 1.0344022512435913,grad_norm: 0.9999993014795796, iteration: 65170
loss: 1.0202633142471313,grad_norm: 0.9662963666867642, iteration: 65171
loss: 0.9820002913475037,grad_norm: 0.8368235546520019, iteration: 65172
loss: 1.053332805633545,grad_norm: 0.9999998115410406, iteration: 65173
loss: 1.0777442455291748,grad_norm: 0.9999997618585982, iteration: 65174
loss: 1.011020302772522,grad_norm: 0.9497669518027535, iteration: 65175
loss: 1.0104210376739502,grad_norm: 0.9999993322450565, iteration: 65176
loss: 1.0847276449203491,grad_norm: 0.9999998658601863, iteration: 65177
loss: 1.0738638639450073,grad_norm: 0.9999992092143619, iteration: 65178
loss: 0.9928221702575684,grad_norm: 0.9359429705175953, iteration: 65179
loss: 0.9897136092185974,grad_norm: 0.9331492078506844, iteration: 65180
loss: 0.9561886191368103,grad_norm: 0.9999993333923031, iteration: 65181
loss: 1.037456750869751,grad_norm: 0.9999994056380843, iteration: 65182
loss: 1.00620436668396,grad_norm: 0.9999990991165049, iteration: 65183
loss: 1.054306149482727,grad_norm: 0.999999813444329, iteration: 65184
loss: 1.005763292312622,grad_norm: 0.9999990771073423, iteration: 65185
loss: 1.0144059658050537,grad_norm: 0.9999994582966966, iteration: 65186
loss: 1.0039458274841309,grad_norm: 0.9095537588767849, iteration: 65187
loss: 0.9948731064796448,grad_norm: 0.9999993931805476, iteration: 65188
loss: 0.9976267218589783,grad_norm: 0.9999994504616588, iteration: 65189
loss: 1.0070116519927979,grad_norm: 0.9430255981619079, iteration: 65190
loss: 1.0390821695327759,grad_norm: 0.7556917833944785, iteration: 65191
loss: 0.9920015335083008,grad_norm: 0.8638310384282822, iteration: 65192
loss: 1.018369197845459,grad_norm: 0.9999993928615191, iteration: 65193
loss: 1.0406408309936523,grad_norm: 0.9999991908553677, iteration: 65194
loss: 1.1041337251663208,grad_norm: 0.9999993747796563, iteration: 65195
loss: 0.9698216319084167,grad_norm: 0.9516775723430774, iteration: 65196
loss: 1.0684367418289185,grad_norm: 0.9999990428804767, iteration: 65197
loss: 1.2136985063552856,grad_norm: 0.9999998910529263, iteration: 65198
loss: 0.9954158663749695,grad_norm: 0.7249680530375001, iteration: 65199
loss: 1.0913901329040527,grad_norm: 1.0000000384702346, iteration: 65200
loss: 1.0198947191238403,grad_norm: 0.9999992928368753, iteration: 65201
loss: 1.05567467212677,grad_norm: 0.9999992667298369, iteration: 65202
loss: 1.051153302192688,grad_norm: 0.9358023665997421, iteration: 65203
loss: 1.0923210382461548,grad_norm: 0.9775543942709182, iteration: 65204
loss: 1.0417547225952148,grad_norm: 0.9841194941591905, iteration: 65205
loss: 1.099867820739746,grad_norm: 0.9999994577175074, iteration: 65206
loss: 1.0133686065673828,grad_norm: 0.999999099244984, iteration: 65207
loss: 0.9795718789100647,grad_norm: 0.9999990520092327, iteration: 65208
loss: 1.0070900917053223,grad_norm: 0.9999994301960868, iteration: 65209
loss: 1.0083236694335938,grad_norm: 0.9999999587449294, iteration: 65210
loss: 1.0056718587875366,grad_norm: 0.9999996045493947, iteration: 65211
loss: 1.0112286806106567,grad_norm: 0.9999992651361393, iteration: 65212
loss: 0.9423931241035461,grad_norm: 0.9999991998184266, iteration: 65213
loss: 1.112019419670105,grad_norm: 0.9999998012965167, iteration: 65214
loss: 0.9906448125839233,grad_norm: 0.8863257334379515, iteration: 65215
loss: 1.0178993940353394,grad_norm: 0.8876483904209594, iteration: 65216
loss: 0.9652073979377747,grad_norm: 0.8261308759654021, iteration: 65217
loss: 0.9934523105621338,grad_norm: 0.9999996372766143, iteration: 65218
loss: 1.0815356969833374,grad_norm: 0.9999993067678932, iteration: 65219
loss: 1.044471025466919,grad_norm: 0.9999992245746772, iteration: 65220
loss: 1.0513120889663696,grad_norm: 0.9999996164531315, iteration: 65221
loss: 1.2386360168457031,grad_norm: 0.9999998329340711, iteration: 65222
loss: 1.0430188179016113,grad_norm: 0.8388463251167592, iteration: 65223
loss: 1.0148199796676636,grad_norm: 0.9228170829821438, iteration: 65224
loss: 1.0086889266967773,grad_norm: 0.9999993517604959, iteration: 65225
loss: 0.9961344003677368,grad_norm: 0.791734409825512, iteration: 65226
loss: 1.046838402748108,grad_norm: 0.8326154279248623, iteration: 65227
loss: 0.9606500267982483,grad_norm: 0.8667478829697656, iteration: 65228
loss: 1.006868839263916,grad_norm: 0.9999991748217446, iteration: 65229
loss: 0.9901025891304016,grad_norm: 0.955506831988384, iteration: 65230
loss: 1.0342650413513184,grad_norm: 0.9999991600906664, iteration: 65231
loss: 0.9837223291397095,grad_norm: 0.9913836795538232, iteration: 65232
loss: 0.9663306474685669,grad_norm: 0.9999990624261681, iteration: 65233
loss: 0.9938518404960632,grad_norm: 0.9999992649562657, iteration: 65234
loss: 1.0107805728912354,grad_norm: 0.9999990166724653, iteration: 65235
loss: 1.0280349254608154,grad_norm: 0.9999991040821408, iteration: 65236
loss: 0.9864874482154846,grad_norm: 0.9240634804204287, iteration: 65237
loss: 1.026007056236267,grad_norm: 0.9999997087596738, iteration: 65238
loss: 1.0124293565750122,grad_norm: 0.9999992913194743, iteration: 65239
loss: 0.9920801520347595,grad_norm: 0.9999991772546432, iteration: 65240
loss: 1.0211498737335205,grad_norm: 0.9999992028090915, iteration: 65241
loss: 1.0247176885604858,grad_norm: 0.8502023407161662, iteration: 65242
loss: 1.0433403253555298,grad_norm: 0.9999995312141171, iteration: 65243
loss: 1.0098336935043335,grad_norm: 0.9469656755906541, iteration: 65244
loss: 1.0195876359939575,grad_norm: 0.9999990105288712, iteration: 65245
loss: 1.0429695844650269,grad_norm: 0.9999998782931473, iteration: 65246
loss: 0.9898048043251038,grad_norm: 0.9999991627377705, iteration: 65247
loss: 1.0303817987442017,grad_norm: 0.8181234625878321, iteration: 65248
loss: 1.0409181118011475,grad_norm: 0.9999991278843682, iteration: 65249
loss: 0.9939089417457581,grad_norm: 0.8740179797692385, iteration: 65250
loss: 1.0076383352279663,grad_norm: 0.942577028729182, iteration: 65251
loss: 0.9857257604598999,grad_norm: 0.9194621951393636, iteration: 65252
loss: 1.0337603092193604,grad_norm: 0.999999479900297, iteration: 65253
loss: 1.0273191928863525,grad_norm: 0.9368883864718359, iteration: 65254
loss: 0.9638288617134094,grad_norm: 0.9999991715104136, iteration: 65255
loss: 1.0061016082763672,grad_norm: 0.99999925557451, iteration: 65256
loss: 1.004135251045227,grad_norm: 0.9628783105718705, iteration: 65257
loss: 1.0227632522583008,grad_norm: 0.9654779151120596, iteration: 65258
loss: 1.029923677444458,grad_norm: 0.9315558390273433, iteration: 65259
loss: 0.9663534164428711,grad_norm: 0.9999995629179347, iteration: 65260
loss: 1.0324381589889526,grad_norm: 0.9999994854368779, iteration: 65261
loss: 0.9960273504257202,grad_norm: 0.82467972042653, iteration: 65262
loss: 1.0184041261672974,grad_norm: 0.9999989698045251, iteration: 65263
loss: 1.0014116764068604,grad_norm: 0.9999990840924678, iteration: 65264
loss: 1.0345643758773804,grad_norm: 0.9700942630498632, iteration: 65265
loss: 1.073734164237976,grad_norm: 0.99999972292727, iteration: 65266
loss: 1.0956467390060425,grad_norm: 0.9999992403634559, iteration: 65267
loss: 1.0151501893997192,grad_norm: 0.9999990626514892, iteration: 65268
loss: 1.0296448469161987,grad_norm: 0.9035229237438063, iteration: 65269
loss: 1.0277516841888428,grad_norm: 0.9999997075119849, iteration: 65270
loss: 0.9964397549629211,grad_norm: 0.9168277508809003, iteration: 65271
loss: 0.9993604421615601,grad_norm: 0.9286847222589196, iteration: 65272
loss: 0.9945419430732727,grad_norm: 0.9999990930835588, iteration: 65273
loss: 0.9878358840942383,grad_norm: 0.8769531549072683, iteration: 65274
loss: 1.0070610046386719,grad_norm: 0.9999997731850973, iteration: 65275
loss: 1.0078593492507935,grad_norm: 0.9999990156863724, iteration: 65276
loss: 1.0361377000808716,grad_norm: 0.8441857917070201, iteration: 65277
loss: 1.0000698566436768,grad_norm: 0.9211878017256911, iteration: 65278
loss: 1.0432010889053345,grad_norm: 0.9999994968227552, iteration: 65279
loss: 1.0481922626495361,grad_norm: 0.999999837317766, iteration: 65280
loss: 1.0797638893127441,grad_norm: 0.9999992468902709, iteration: 65281
loss: 1.0117180347442627,grad_norm: 0.9909919042612593, iteration: 65282
loss: 1.0283726453781128,grad_norm: 0.9918728573098388, iteration: 65283
loss: 0.9939481019973755,grad_norm: 0.8816550741639422, iteration: 65284
loss: 1.0887880325317383,grad_norm: 0.9999998872504202, iteration: 65285
loss: 1.1461150646209717,grad_norm: 0.9999992540412518, iteration: 65286
loss: 1.0458916425704956,grad_norm: 0.9999993363721651, iteration: 65287
loss: 0.9715209603309631,grad_norm: 0.9473664841757922, iteration: 65288
loss: 1.0770996809005737,grad_norm: 0.999999577910127, iteration: 65289
loss: 1.0181981325149536,grad_norm: 0.9316892623198372, iteration: 65290
loss: 1.020668625831604,grad_norm: 0.884093634015638, iteration: 65291
loss: 0.9689565896987915,grad_norm: 0.9745083262376106, iteration: 65292
loss: 1.007453441619873,grad_norm: 0.9999990476445536, iteration: 65293
loss: 1.026665210723877,grad_norm: 0.9999992395201176, iteration: 65294
loss: 0.9998626708984375,grad_norm: 0.909168731414567, iteration: 65295
loss: 1.0419203042984009,grad_norm: 0.9647879573829844, iteration: 65296
loss: 1.0124802589416504,grad_norm: 0.9999991474698452, iteration: 65297
loss: 1.2078384160995483,grad_norm: 0.9999997813272797, iteration: 65298
loss: 1.05381178855896,grad_norm: 0.9999995619654043, iteration: 65299
loss: 1.0071488618850708,grad_norm: 0.9999992386065406, iteration: 65300
loss: 1.031800627708435,grad_norm: 0.9999993617121474, iteration: 65301
loss: 0.9895434379577637,grad_norm: 0.931805466877233, iteration: 65302
loss: 0.9967862963676453,grad_norm: 0.8926567168451393, iteration: 65303
loss: 0.9955042600631714,grad_norm: 0.9999995999599117, iteration: 65304
loss: 1.0002137422561646,grad_norm: 0.9946921551071682, iteration: 65305
loss: 1.0100904703140259,grad_norm: 0.9999990604025715, iteration: 65306
loss: 1.0065405368804932,grad_norm: 0.8250879051849563, iteration: 65307
loss: 1.0189381837844849,grad_norm: 0.9025069848925296, iteration: 65308
loss: 0.9979246258735657,grad_norm: 0.9922859452480526, iteration: 65309
loss: 1.0169745683670044,grad_norm: 0.9491617212501822, iteration: 65310
loss: 1.0103684663772583,grad_norm: 0.9381880314039308, iteration: 65311
loss: 1.0771398544311523,grad_norm: 0.9999992158800355, iteration: 65312
loss: 0.9930883049964905,grad_norm: 0.8472555556215636, iteration: 65313
loss: 1.0030988454818726,grad_norm: 0.8531520112371549, iteration: 65314
loss: 1.120867371559143,grad_norm: 0.9999998037858119, iteration: 65315
loss: 1.0871702432632446,grad_norm: 0.9999989618195599, iteration: 65316
loss: 1.0377825498580933,grad_norm: 0.9999991461169028, iteration: 65317
loss: 1.0192779302597046,grad_norm: 0.9999990354085272, iteration: 65318
loss: 1.032565951347351,grad_norm: 0.999999291945106, iteration: 65319
loss: 1.0009713172912598,grad_norm: 0.9999998537610342, iteration: 65320
loss: 1.0012764930725098,grad_norm: 0.8067850499588085, iteration: 65321
loss: 1.0013563632965088,grad_norm: 0.9999995225764264, iteration: 65322
loss: 1.0063964128494263,grad_norm: 0.9507466715194749, iteration: 65323
loss: 1.0428115129470825,grad_norm: 0.9999994015092425, iteration: 65324
loss: 1.044255256652832,grad_norm: 0.8651751467673324, iteration: 65325
loss: 1.0336709022521973,grad_norm: 0.999999298913141, iteration: 65326
loss: 1.042006015777588,grad_norm: 0.9999990647795912, iteration: 65327
loss: 0.9701007604598999,grad_norm: 0.881746029687861, iteration: 65328
loss: 0.9837180972099304,grad_norm: 0.9016180792565849, iteration: 65329
loss: 1.0148874521255493,grad_norm: 0.8394683363820674, iteration: 65330
loss: 0.9892082214355469,grad_norm: 0.871702133187819, iteration: 65331
loss: 0.9884541034698486,grad_norm: 0.8940980193064688, iteration: 65332
loss: 0.9876707196235657,grad_norm: 0.9999993696307331, iteration: 65333
loss: 0.9865436553955078,grad_norm: 0.8854836139817257, iteration: 65334
loss: 1.0098413228988647,grad_norm: 0.9999992600429549, iteration: 65335
loss: 0.9770653247833252,grad_norm: 0.9461406501552857, iteration: 65336
loss: 1.0067002773284912,grad_norm: 0.9999997247476784, iteration: 65337
loss: 1.0246118307113647,grad_norm: 0.9999992559473151, iteration: 65338
loss: 0.9945780634880066,grad_norm: 0.9409772926061523, iteration: 65339
loss: 1.0207568407058716,grad_norm: 0.9999992299783207, iteration: 65340
loss: 1.0255640745162964,grad_norm: 0.992533664935775, iteration: 65341
loss: 0.9661260843276978,grad_norm: 0.9999992475178854, iteration: 65342
loss: 0.9656027555465698,grad_norm: 0.8400195362709268, iteration: 65343
loss: 0.9522404074668884,grad_norm: 0.9852633435666158, iteration: 65344
loss: 0.9964216351509094,grad_norm: 0.8686569437456547, iteration: 65345
loss: 0.9886890053749084,grad_norm: 0.999999328228618, iteration: 65346
loss: 0.9804522395133972,grad_norm: 0.7922439358174441, iteration: 65347
loss: 1.0418238639831543,grad_norm: 0.9999997558633834, iteration: 65348
loss: 0.9952422976493835,grad_norm: 0.9999995657742128, iteration: 65349
loss: 1.01992928981781,grad_norm: 0.8555794159228713, iteration: 65350
loss: 0.9922502636909485,grad_norm: 0.9576682691004798, iteration: 65351
loss: 1.0587416887283325,grad_norm: 0.9999991494651155, iteration: 65352
loss: 0.9815143942832947,grad_norm: 0.9999992412737121, iteration: 65353
loss: 0.9490296840667725,grad_norm: 0.8602070678255692, iteration: 65354
loss: 0.9958812594413757,grad_norm: 0.9999993505913898, iteration: 65355
loss: 0.9585757851600647,grad_norm: 0.999999699684602, iteration: 65356
loss: 1.1069103479385376,grad_norm: 0.9999996227993821, iteration: 65357
loss: 1.0342520475387573,grad_norm: 0.8683073174900705, iteration: 65358
loss: 1.0530328750610352,grad_norm: 0.9999991710311781, iteration: 65359
loss: 0.9971091151237488,grad_norm: 0.7831867445580316, iteration: 65360
loss: 0.9958223104476929,grad_norm: 0.9999995163041098, iteration: 65361
loss: 0.9964526295661926,grad_norm: 0.9999991438330961, iteration: 65362
loss: 1.015750765800476,grad_norm: 0.7799810483183759, iteration: 65363
loss: 1.0571695566177368,grad_norm: 0.9287575459933348, iteration: 65364
loss: 1.0596588850021362,grad_norm: 0.999999562059281, iteration: 65365
loss: 1.018094539642334,grad_norm: 0.9999995406030127, iteration: 65366
loss: 0.9728058576583862,grad_norm: 0.9362995107176045, iteration: 65367
loss: 1.0256808996200562,grad_norm: 0.9999993518828535, iteration: 65368
loss: 1.0157908201217651,grad_norm: 0.9999990189630679, iteration: 65369
loss: 0.9703726768493652,grad_norm: 0.9651275177817262, iteration: 65370
loss: 1.0092096328735352,grad_norm: 0.9999990750242698, iteration: 65371
loss: 1.0065208673477173,grad_norm: 0.823540790181887, iteration: 65372
loss: 1.1285165548324585,grad_norm: 0.9999991285414543, iteration: 65373
loss: 0.974729061126709,grad_norm: 0.9999990742948929, iteration: 65374
loss: 0.9941986203193665,grad_norm: 0.9999990830684987, iteration: 65375
loss: 1.0478403568267822,grad_norm: 0.9999997796594093, iteration: 65376
loss: 1.0569018125534058,grad_norm: 0.9999992062189257, iteration: 65377
loss: 0.9965386986732483,grad_norm: 0.9999991762452092, iteration: 65378
loss: 1.0209991931915283,grad_norm: 0.9469861611703136, iteration: 65379
loss: 0.9814547300338745,grad_norm: 0.9721505389991238, iteration: 65380
loss: 0.9957663416862488,grad_norm: 0.7670342576266247, iteration: 65381
loss: 1.1072015762329102,grad_norm: 0.9999996889779612, iteration: 65382
loss: 1.0311133861541748,grad_norm: 0.9999990893783712, iteration: 65383
loss: 1.0639349222183228,grad_norm: 0.9999993394757932, iteration: 65384
loss: 1.0346463918685913,grad_norm: 0.9999998318958885, iteration: 65385
loss: 1.0037463903427124,grad_norm: 0.9629882630393983, iteration: 65386
loss: 0.9878029227256775,grad_norm: 0.9269714885664265, iteration: 65387
loss: 0.9826250672340393,grad_norm: 0.9999993115791216, iteration: 65388
loss: 1.0305148363113403,grad_norm: 0.873769184575752, iteration: 65389
loss: 0.9744824767112732,grad_norm: 0.8267026069488834, iteration: 65390
loss: 1.0737712383270264,grad_norm: 0.9999991400515515, iteration: 65391
loss: 0.9973207712173462,grad_norm: 0.8702674256345271, iteration: 65392
loss: 0.9937682151794434,grad_norm: 0.95619200616427, iteration: 65393
loss: 1.0194483995437622,grad_norm: 0.8484970983045629, iteration: 65394
loss: 0.9855952858924866,grad_norm: 0.8148670394475673, iteration: 65395
loss: 1.0015525817871094,grad_norm: 0.99999918772621, iteration: 65396
loss: 1.0069454908370972,grad_norm: 0.9999991734320774, iteration: 65397
loss: 0.9815359115600586,grad_norm: 0.9999992072423619, iteration: 65398
loss: 1.0034854412078857,grad_norm: 0.9999990526486752, iteration: 65399
loss: 0.9881454706192017,grad_norm: 0.9999991935943845, iteration: 65400
loss: 1.0093415975570679,grad_norm: 0.9999991882739744, iteration: 65401
loss: 0.9906715154647827,grad_norm: 0.9999996284699321, iteration: 65402
loss: 1.0505234003067017,grad_norm: 0.999998954100008, iteration: 65403
loss: 1.0562187433242798,grad_norm: 0.9999992146657353, iteration: 65404
loss: 1.0053508281707764,grad_norm: 0.9999989934321679, iteration: 65405
loss: 0.9947144389152527,grad_norm: 0.9356903192404378, iteration: 65406
loss: 1.107833743095398,grad_norm: 0.9999989668464366, iteration: 65407
loss: 1.0136500597000122,grad_norm: 0.9999994815985154, iteration: 65408
loss: 0.9914186000823975,grad_norm: 0.9601039292344767, iteration: 65409
loss: 1.0219178199768066,grad_norm: 0.9613419001098098, iteration: 65410
loss: 1.0020685195922852,grad_norm: 0.8227588572355355, iteration: 65411
loss: 1.0365636348724365,grad_norm: 0.9999991541895471, iteration: 65412
loss: 1.0029786825180054,grad_norm: 0.9594766203683023, iteration: 65413
loss: 1.0522887706756592,grad_norm: 0.9999992188521797, iteration: 65414
loss: 1.075933575630188,grad_norm: 0.9999996596909387, iteration: 65415
loss: 1.0069280862808228,grad_norm: 0.8605628041781529, iteration: 65416
loss: 0.9966740608215332,grad_norm: 0.9162151064316696, iteration: 65417
loss: 1.009106993675232,grad_norm: 0.9666845780971605, iteration: 65418
loss: 0.9964125156402588,grad_norm: 0.8747390817800611, iteration: 65419
loss: 1.0757921934127808,grad_norm: 0.9100336070571983, iteration: 65420
loss: 0.9633616805076599,grad_norm: 0.9999991778499889, iteration: 65421
loss: 1.0674978494644165,grad_norm: 0.9999991082131378, iteration: 65422
loss: 1.0525710582733154,grad_norm: 0.9999996665043773, iteration: 65423
loss: 0.9897889494895935,grad_norm: 0.9940455447086778, iteration: 65424
loss: 1.0164635181427002,grad_norm: 0.9999991084998532, iteration: 65425
loss: 0.9736316800117493,grad_norm: 0.908611462631021, iteration: 65426
loss: 1.0020372867584229,grad_norm: 0.9999991076918779, iteration: 65427
loss: 1.0136723518371582,grad_norm: 0.9999992809767818, iteration: 65428
loss: 0.9809888601303101,grad_norm: 0.9999991058647736, iteration: 65429
loss: 1.0648932456970215,grad_norm: 0.9999999431088087, iteration: 65430
loss: 0.9775473475456238,grad_norm: 0.9999990585137256, iteration: 65431
loss: 1.0143835544586182,grad_norm: 0.9999990793515519, iteration: 65432
loss: 1.0079649686813354,grad_norm: 0.9999991035926672, iteration: 65433
loss: 1.1170867681503296,grad_norm: 0.9999990358701245, iteration: 65434
loss: 1.010398507118225,grad_norm: 0.9999998100863527, iteration: 65435
loss: 1.0201761722564697,grad_norm: 0.9727309257019162, iteration: 65436
loss: 1.0561882257461548,grad_norm: 0.999999171806813, iteration: 65437
loss: 1.0383172035217285,grad_norm: 0.9999990725341843, iteration: 65438
loss: 0.9780480265617371,grad_norm: 0.9999990219659689, iteration: 65439
loss: 1.038206696510315,grad_norm: 0.9999991552370242, iteration: 65440
loss: 1.0339361429214478,grad_norm: 0.9999990433812127, iteration: 65441
loss: 1.0174599885940552,grad_norm: 0.9999997849331168, iteration: 65442
loss: 1.000288486480713,grad_norm: 0.8670421052234447, iteration: 65443
loss: 1.3230006694793701,grad_norm: 0.9999998645647838, iteration: 65444
loss: 0.971720278263092,grad_norm: 0.9999991016294323, iteration: 65445
loss: 1.0414527654647827,grad_norm: 0.9999999148889113, iteration: 65446
loss: 1.2508971691131592,grad_norm: 0.9999990662541456, iteration: 65447
loss: 1.002703309059143,grad_norm: 0.7670317329098646, iteration: 65448
loss: 1.038601040840149,grad_norm: 0.9291014054073564, iteration: 65449
loss: 1.0068776607513428,grad_norm: 0.9999989720145613, iteration: 65450
loss: 1.0360430479049683,grad_norm: 0.9340201788801581, iteration: 65451
loss: 0.9760919809341431,grad_norm: 0.9321587421273968, iteration: 65452
loss: 0.971908450126648,grad_norm: 0.9630344298183913, iteration: 65453
loss: 1.0128068923950195,grad_norm: 0.9232130671589077, iteration: 65454
loss: 1.004210352897644,grad_norm: 0.8610658828454051, iteration: 65455
loss: 1.0321353673934937,grad_norm: 0.8076389305959897, iteration: 65456
loss: 1.001792311668396,grad_norm: 0.8525900703787076, iteration: 65457
loss: 1.0241409540176392,grad_norm: 0.999999230279699, iteration: 65458
loss: 0.9756924510002136,grad_norm: 0.9999990426880346, iteration: 65459
loss: 1.0090669393539429,grad_norm: 0.8722791753346801, iteration: 65460
loss: 1.017139196395874,grad_norm: 0.9328344289252469, iteration: 65461
loss: 0.9872196912765503,grad_norm: 0.9531112929148248, iteration: 65462
loss: 1.018205165863037,grad_norm: 0.9999993666864538, iteration: 65463
loss: 1.0083000659942627,grad_norm: 0.9692890234850973, iteration: 65464
loss: 0.9850876927375793,grad_norm: 0.8585279559471771, iteration: 65465
loss: 0.9968002438545227,grad_norm: 0.8612170418638299, iteration: 65466
loss: 0.9665186405181885,grad_norm: 0.9233993568188301, iteration: 65467
loss: 0.9951751828193665,grad_norm: 0.8918066070232181, iteration: 65468
loss: 0.9935760498046875,grad_norm: 0.964542839505221, iteration: 65469
loss: 1.0220471620559692,grad_norm: 0.9999990708402147, iteration: 65470
loss: 0.9705999493598938,grad_norm: 0.8015480810533262, iteration: 65471
loss: 1.0013117790222168,grad_norm: 0.999999127234632, iteration: 65472
loss: 0.9957363605499268,grad_norm: 0.973732360780105, iteration: 65473
loss: 0.9677693843841553,grad_norm: 0.9999991391942539, iteration: 65474
loss: 1.0149120092391968,grad_norm: 0.8139569365544573, iteration: 65475
loss: 0.9810296297073364,grad_norm: 0.9153429629403428, iteration: 65476
loss: 0.973617672920227,grad_norm: 0.9139674276359689, iteration: 65477
loss: 1.0057711601257324,grad_norm: 0.9604621074726541, iteration: 65478
loss: 0.9831855893135071,grad_norm: 0.9999991043948502, iteration: 65479
loss: 0.9505019187927246,grad_norm: 0.8472701458399159, iteration: 65480
loss: 0.9858031272888184,grad_norm: 0.890712436047598, iteration: 65481
loss: 1.0443745851516724,grad_norm: 0.9216830996044335, iteration: 65482
loss: 1.004827618598938,grad_norm: 0.9441588825547343, iteration: 65483
loss: 1.0208278894424438,grad_norm: 0.9850495798170907, iteration: 65484
loss: 0.9998125433921814,grad_norm: 0.926302755361992, iteration: 65485
loss: 0.9937172532081604,grad_norm: 0.8882448727769602, iteration: 65486
loss: 1.0147138833999634,grad_norm: 0.8413897652448403, iteration: 65487
loss: 1.08400297164917,grad_norm: 0.9999991664942692, iteration: 65488
loss: 0.9922367930412292,grad_norm: 0.9027369301090966, iteration: 65489
loss: 0.9855458736419678,grad_norm: 0.9999991550370158, iteration: 65490
loss: 0.9677707552909851,grad_norm: 0.9533682903650186, iteration: 65491
loss: 1.0577589273452759,grad_norm: 0.9999997708589273, iteration: 65492
loss: 1.015678882598877,grad_norm: 0.9999992214872065, iteration: 65493
loss: 1.0429672002792358,grad_norm: 0.9780126791598909, iteration: 65494
loss: 0.9770113825798035,grad_norm: 0.9999991486525039, iteration: 65495
loss: 1.0769953727722168,grad_norm: 0.9999993341042281, iteration: 65496
loss: 1.0405998229980469,grad_norm: 0.9319583596464616, iteration: 65497
loss: 1.0013285875320435,grad_norm: 0.9745261004081943, iteration: 65498
loss: 1.0299400091171265,grad_norm: 0.9752795380320598, iteration: 65499
loss: 0.969508171081543,grad_norm: 0.9999991969172869, iteration: 65500
loss: 0.995181143283844,grad_norm: 0.9774892311139713, iteration: 65501
loss: 1.0133215188980103,grad_norm: 0.9439442482554903, iteration: 65502
loss: 1.039271593093872,grad_norm: 0.877256003660661, iteration: 65503
loss: 1.0044851303100586,grad_norm: 0.8315601414232856, iteration: 65504
loss: 1.0411068201065063,grad_norm: 0.8494869267291669, iteration: 65505
loss: 1.0487606525421143,grad_norm: 0.999999153371237, iteration: 65506
loss: 0.996390163898468,grad_norm: 0.9999997632550817, iteration: 65507
loss: 1.0221149921417236,grad_norm: 0.9999990864051639, iteration: 65508
loss: 1.0246508121490479,grad_norm: 0.9481819786058276, iteration: 65509
loss: 1.0110403299331665,grad_norm: 0.9534935066568251, iteration: 65510
loss: 1.053418755531311,grad_norm: 0.9999990901992121, iteration: 65511
loss: 0.9790315628051758,grad_norm: 0.8123090103541656, iteration: 65512
loss: 1.0067214965820312,grad_norm: 0.9999989823621491, iteration: 65513
loss: 1.0105780363082886,grad_norm: 0.9340733458067584, iteration: 65514
loss: 0.9928479194641113,grad_norm: 0.9873169503653922, iteration: 65515
loss: 0.9757223725318909,grad_norm: 0.9999990529167795, iteration: 65516
loss: 1.0081373453140259,grad_norm: 0.9175799848882065, iteration: 65517
loss: 1.00909423828125,grad_norm: 0.8595309085067846, iteration: 65518
loss: 1.0516268014907837,grad_norm: 0.9309376661703809, iteration: 65519
loss: 1.002432107925415,grad_norm: 0.9999991232197977, iteration: 65520
loss: 1.0101853609085083,grad_norm: 0.9999993593926821, iteration: 65521
loss: 0.9925873279571533,grad_norm: 0.9999991711223262, iteration: 65522
loss: 1.0213276147842407,grad_norm: 0.8656211273536601, iteration: 65523
loss: 1.0105669498443604,grad_norm: 0.999999078354195, iteration: 65524
loss: 0.97683185338974,grad_norm: 0.8269619506835536, iteration: 65525
loss: 0.9653595685958862,grad_norm: 0.999999219525768, iteration: 65526
loss: 0.9814247488975525,grad_norm: 0.9999991473486213, iteration: 65527
loss: 0.9766064286231995,grad_norm: 0.9436278221496962, iteration: 65528
loss: 1.0138517618179321,grad_norm: 0.8989757107418135, iteration: 65529
loss: 0.993272602558136,grad_norm: 0.8363078540160975, iteration: 65530
loss: 1.1038693189620972,grad_norm: 0.9778049572357348, iteration: 65531
loss: 0.9826335906982422,grad_norm: 0.7700917311172165, iteration: 65532
loss: 1.1298189163208008,grad_norm: 0.9999995929867492, iteration: 65533
loss: 1.0008982419967651,grad_norm: 0.999999150043831, iteration: 65534
loss: 0.9952004551887512,grad_norm: 0.9999996656078561, iteration: 65535
loss: 1.0301921367645264,grad_norm: 0.9999998173046323, iteration: 65536
loss: 0.9687966704368591,grad_norm: 0.9265143831441905, iteration: 65537
loss: 0.9716629385948181,grad_norm: 0.9999989993500983, iteration: 65538
loss: 1.3552123308181763,grad_norm: 0.9999995197397851, iteration: 65539
loss: 1.0891436338424683,grad_norm: 0.8936543754920826, iteration: 65540
loss: 1.0165903568267822,grad_norm: 0.9284603714687798, iteration: 65541
loss: 1.0520412921905518,grad_norm: 0.9009029240735987, iteration: 65542
loss: 1.1230828762054443,grad_norm: 0.9999990063676022, iteration: 65543
loss: 1.0733267068862915,grad_norm: 0.9999990571395291, iteration: 65544
loss: 1.017541766166687,grad_norm: 0.8886223868926753, iteration: 65545
loss: 1.1060205698013306,grad_norm: 0.9999993872346162, iteration: 65546
loss: 1.0098518133163452,grad_norm: 0.9361150044951732, iteration: 65547
loss: 1.0296878814697266,grad_norm: 0.9999989453861081, iteration: 65548
loss: 1.0113874673843384,grad_norm: 0.9653029805828054, iteration: 65549
loss: 0.9808159470558167,grad_norm: 0.999999030016406, iteration: 65550
loss: 1.0266088247299194,grad_norm: 0.9999991051128485, iteration: 65551
loss: 1.0080245733261108,grad_norm: 0.947342487462998, iteration: 65552
loss: 1.0356605052947998,grad_norm: 0.8786762629755206, iteration: 65553
loss: 1.012523889541626,grad_norm: 0.9999990810213016, iteration: 65554
loss: 0.9735999703407288,grad_norm: 0.9999990408821958, iteration: 65555
loss: 1.033078908920288,grad_norm: 0.9999990861729856, iteration: 65556
loss: 0.9772887825965881,grad_norm: 0.928810557720634, iteration: 65557
loss: 1.0045768022537231,grad_norm: 0.819453284084817, iteration: 65558
loss: 0.999868631362915,grad_norm: 0.9742149951688344, iteration: 65559
loss: 1.018933892250061,grad_norm: 0.9045512531205406, iteration: 65560
loss: 1.0089582204818726,grad_norm: 0.9951287505979319, iteration: 65561
loss: 1.0114197731018066,grad_norm: 0.9704136481145664, iteration: 65562
loss: 1.0048270225524902,grad_norm: 0.9999992061175252, iteration: 65563
loss: 0.9804814457893372,grad_norm: 0.9724861219159263, iteration: 65564
loss: 0.9713307023048401,grad_norm: 0.9924421978455464, iteration: 65565
loss: 1.033456563949585,grad_norm: 0.9534339161211476, iteration: 65566
loss: 1.007171869277954,grad_norm: 0.9999990654965233, iteration: 65567
loss: 1.0027223825454712,grad_norm: 0.9916095273956195, iteration: 65568
loss: 0.9968370795249939,grad_norm: 0.8488009841505493, iteration: 65569
loss: 0.9557106494903564,grad_norm: 0.8735985220878534, iteration: 65570
loss: 1.037117838859558,grad_norm: 0.8891110207997979, iteration: 65571
loss: 0.976957380771637,grad_norm: 0.9999990857541066, iteration: 65572
loss: 1.0044729709625244,grad_norm: 0.8313064697959863, iteration: 65573
loss: 1.010115385055542,grad_norm: 0.9354383558035517, iteration: 65574
loss: 1.0654884576797485,grad_norm: 0.9999991485167563, iteration: 65575
loss: 0.9738358855247498,grad_norm: 0.9999990409556483, iteration: 65576
loss: 1.0188405513763428,grad_norm: 0.9999996063909617, iteration: 65577
loss: 1.0015296936035156,grad_norm: 0.9075806809779865, iteration: 65578
loss: 0.9993050694465637,grad_norm: 0.880491076926455, iteration: 65579
loss: 1.001069188117981,grad_norm: 0.9999992367994877, iteration: 65580
loss: 0.998924732208252,grad_norm: 0.9910925215441427, iteration: 65581
loss: 0.9951000213623047,grad_norm: 0.8586320750146792, iteration: 65582
loss: 0.981066882610321,grad_norm: 0.8980708428781985, iteration: 65583
loss: 0.9506886005401611,grad_norm: 0.9645745114794213, iteration: 65584
loss: 0.9601229429244995,grad_norm: 0.9130412244726391, iteration: 65585
loss: 0.9798825979232788,grad_norm: 0.8060134428348283, iteration: 65586
loss: 1.0118095874786377,grad_norm: 0.9999990791416391, iteration: 65587
loss: 1.0418195724487305,grad_norm: 0.9550359467291996, iteration: 65588
loss: 0.9784177541732788,grad_norm: 0.9999991564889102, iteration: 65589
loss: 1.0096805095672607,grad_norm: 0.9328937396088527, iteration: 65590
loss: 1.0273016691207886,grad_norm: 0.8358419907506333, iteration: 65591
loss: 1.1812812089920044,grad_norm: 0.9999997712489915, iteration: 65592
loss: 1.010303258895874,grad_norm: 0.9999996654348692, iteration: 65593
loss: 1.0034185647964478,grad_norm: 0.9447639676593851, iteration: 65594
loss: 0.9965306520462036,grad_norm: 0.8373660044112546, iteration: 65595
loss: 1.0220059156417847,grad_norm: 0.9999994314799732, iteration: 65596
loss: 1.0082433223724365,grad_norm: 0.8793250981886339, iteration: 65597
loss: 1.0405093431472778,grad_norm: 0.9999997386655533, iteration: 65598
loss: 0.9952646493911743,grad_norm: 0.8735437790657903, iteration: 65599
loss: 0.9755417108535767,grad_norm: 0.8219197603037766, iteration: 65600
loss: 0.9880451560020447,grad_norm: 0.9593435543681922, iteration: 65601
loss: 1.074473261833191,grad_norm: 0.8944086035440239, iteration: 65602
loss: 0.9873775839805603,grad_norm: 0.9393197444010385, iteration: 65603
loss: 0.9896838665008545,grad_norm: 0.9279618798648489, iteration: 65604
loss: 1.0246107578277588,grad_norm: 0.9999990951662733, iteration: 65605
loss: 0.9940921068191528,grad_norm: 0.8910067121619715, iteration: 65606
loss: 1.0210063457489014,grad_norm: 0.9399304927420076, iteration: 65607
loss: 1.0144233703613281,grad_norm: 0.9617238409797287, iteration: 65608
loss: 0.9990902543067932,grad_norm: 0.9434535561887629, iteration: 65609
loss: 0.9714404940605164,grad_norm: 0.9999993696864834, iteration: 65610
loss: 1.0361039638519287,grad_norm: 0.8562010336790672, iteration: 65611
loss: 1.038584589958191,grad_norm: 0.9274862601568261, iteration: 65612
loss: 1.0626952648162842,grad_norm: 0.9944851808082429, iteration: 65613
loss: 0.9969830513000488,grad_norm: 0.8288718953125193, iteration: 65614
loss: 1.0736628770828247,grad_norm: 0.908933145387139, iteration: 65615
loss: 1.047371745109558,grad_norm: 0.9999991198099483, iteration: 65616
loss: 0.9948806166648865,grad_norm: 0.8648340959658679, iteration: 65617
loss: 0.938832700252533,grad_norm: 0.9623763839870122, iteration: 65618
loss: 1.0073423385620117,grad_norm: 0.9305499232088679, iteration: 65619
loss: 0.9939019680023193,grad_norm: 0.9615879496458523, iteration: 65620
loss: 0.9760059714317322,grad_norm: 0.9118877149197402, iteration: 65621
loss: 0.9987260699272156,grad_norm: 0.9897189763433135, iteration: 65622
loss: 0.9735361933708191,grad_norm: 0.9552826646913837, iteration: 65623
loss: 0.9971777200698853,grad_norm: 0.999999164411455, iteration: 65624
loss: 1.0328357219696045,grad_norm: 0.8622907119978845, iteration: 65625
loss: 1.035569429397583,grad_norm: 0.9999991429372173, iteration: 65626
loss: 1.0504807233810425,grad_norm: 0.9999992069436081, iteration: 65627
loss: 1.0735224485397339,grad_norm: 0.9999991352187024, iteration: 65628
loss: 1.0143845081329346,grad_norm: 0.9999997594317279, iteration: 65629
loss: 0.9948141574859619,grad_norm: 0.9586942037992474, iteration: 65630
loss: 1.176664113998413,grad_norm: 0.9999997210916446, iteration: 65631
loss: 1.0005708932876587,grad_norm: 0.9999992192810881, iteration: 65632
loss: 0.9921395182609558,grad_norm: 0.9999993753325014, iteration: 65633
loss: 0.9916930198669434,grad_norm: 0.9939012338880372, iteration: 65634
loss: 0.9865453243255615,grad_norm: 0.8746138387901099, iteration: 65635
loss: 0.9818477630615234,grad_norm: 0.9999991190546031, iteration: 65636
loss: 1.1185919046401978,grad_norm: 0.9999990665521791, iteration: 65637
loss: 1.059175968170166,grad_norm: 0.9999995273973665, iteration: 65638
loss: 1.0371184349060059,grad_norm: 0.9632845205524199, iteration: 65639
loss: 1.0305663347244263,grad_norm: 0.9999995331486139, iteration: 65640
loss: 1.0541666746139526,grad_norm: 0.9999990791515371, iteration: 65641
loss: 1.1297014951705933,grad_norm: 0.999999142509386, iteration: 65642
loss: 1.2485136985778809,grad_norm: 0.9999995707975643, iteration: 65643
loss: 0.9767159223556519,grad_norm: 0.787188473589133, iteration: 65644
loss: 0.9948858022689819,grad_norm: 0.9446489198328336, iteration: 65645
loss: 1.0518789291381836,grad_norm: 0.992878817025371, iteration: 65646
loss: 1.2696776390075684,grad_norm: 0.9999992963469674, iteration: 65647
loss: 1.0976768732070923,grad_norm: 0.9999994325939526, iteration: 65648
loss: 1.073403000831604,grad_norm: 0.9999992686740284, iteration: 65649
loss: 1.0290672779083252,grad_norm: 0.9999995329566037, iteration: 65650
loss: 1.0817995071411133,grad_norm: 0.9999999515034113, iteration: 65651
loss: 0.9976637959480286,grad_norm: 0.9999992690777654, iteration: 65652
loss: 0.9973747134208679,grad_norm: 0.957328501110567, iteration: 65653
loss: 1.0001176595687866,grad_norm: 0.9999990624940112, iteration: 65654
loss: 1.0030039548873901,grad_norm: 0.9274373709723571, iteration: 65655
loss: 1.1031711101531982,grad_norm: 0.9999993261401322, iteration: 65656
loss: 1.0281327962875366,grad_norm: 0.9169470322010453, iteration: 65657
loss: 1.0570108890533447,grad_norm: 0.9999991822203476, iteration: 65658
loss: 1.0178534984588623,grad_norm: 0.999999088360163, iteration: 65659
loss: 0.9860934615135193,grad_norm: 0.8482503477984398, iteration: 65660
loss: 0.9962099194526672,grad_norm: 0.8624770724378139, iteration: 65661
loss: 1.0256410837173462,grad_norm: 0.9999995883584327, iteration: 65662
loss: 1.029920220375061,grad_norm: 0.9999993211615329, iteration: 65663
loss: 0.974056601524353,grad_norm: 0.9999992699322116, iteration: 65664
loss: 0.9898889064788818,grad_norm: 0.9999990602773005, iteration: 65665
loss: 0.9845017194747925,grad_norm: 0.999999212217698, iteration: 65666
loss: 1.0029752254486084,grad_norm: 0.9533311152437776, iteration: 65667
loss: 1.0038161277770996,grad_norm: 0.999999117459347, iteration: 65668
loss: 0.9951673746109009,grad_norm: 0.8368664959902836, iteration: 65669
loss: 1.015364646911621,grad_norm: 0.9999991348502988, iteration: 65670
loss: 1.0243583917617798,grad_norm: 0.9999994927057648, iteration: 65671
loss: 1.0605742931365967,grad_norm: 0.9999992601752351, iteration: 65672
loss: 1.0117039680480957,grad_norm: 0.999999056527131, iteration: 65673
loss: 1.0078924894332886,grad_norm: 0.9999993880242063, iteration: 65674
loss: 0.9682773351669312,grad_norm: 0.9999991839652117, iteration: 65675
loss: 1.0322554111480713,grad_norm: 0.9999991053149438, iteration: 65676
loss: 1.020498514175415,grad_norm: 0.8118437772632329, iteration: 65677
loss: 0.992784321308136,grad_norm: 0.8541655874316724, iteration: 65678
loss: 1.0424304008483887,grad_norm: 0.9999998694706201, iteration: 65679
loss: 0.9946277141571045,grad_norm: 0.9488504222590356, iteration: 65680
loss: 1.0142419338226318,grad_norm: 0.8377781808742603, iteration: 65681
loss: 1.0036648511886597,grad_norm: 0.9999994115081821, iteration: 65682
loss: 1.0611166954040527,grad_norm: 0.8274847965875137, iteration: 65683
loss: 0.9805370569229126,grad_norm: 0.9999990600631558, iteration: 65684
loss: 1.0068633556365967,grad_norm: 0.9999992946636592, iteration: 65685
loss: 1.0122967958450317,grad_norm: 0.9999994201319254, iteration: 65686
loss: 1.0186148881912231,grad_norm: 0.9288468617156558, iteration: 65687
loss: 1.0414495468139648,grad_norm: 0.9570266562324459, iteration: 65688
loss: 1.0205328464508057,grad_norm: 0.999999094113249, iteration: 65689
loss: 1.0016063451766968,grad_norm: 0.9676957961965648, iteration: 65690
loss: 0.9654650092124939,grad_norm: 0.9999990957953031, iteration: 65691
loss: 0.9669517874717712,grad_norm: 0.910170948135838, iteration: 65692
loss: 1.058907151222229,grad_norm: 0.9999995323096289, iteration: 65693
loss: 1.0044549703598022,grad_norm: 0.8053105992989879, iteration: 65694
loss: 0.9983541369438171,grad_norm: 0.9411609896818031, iteration: 65695
loss: 1.044279932975769,grad_norm: 0.9999998881193446, iteration: 65696
loss: 1.1555657386779785,grad_norm: 0.9999995218998564, iteration: 65697
loss: 0.9804163575172424,grad_norm: 0.9999991339309843, iteration: 65698
loss: 0.9830148816108704,grad_norm: 0.9999991106407693, iteration: 65699
loss: 1.0561374425888062,grad_norm: 0.9999999119439802, iteration: 65700
loss: 1.0655953884124756,grad_norm: 0.9999996368251455, iteration: 65701
loss: 1.122931957244873,grad_norm: 0.9999990477402926, iteration: 65702
loss: 0.9616982936859131,grad_norm: 0.9999991358577939, iteration: 65703
loss: 1.0004103183746338,grad_norm: 0.9782580284029541, iteration: 65704
loss: 1.0299426317214966,grad_norm: 0.9999995230820184, iteration: 65705
loss: 1.0084067583084106,grad_norm: 0.9999990560058077, iteration: 65706
loss: 1.0342540740966797,grad_norm: 0.9999997557506356, iteration: 65707
loss: 1.0087647438049316,grad_norm: 0.9999991219722267, iteration: 65708
loss: 1.0480865240097046,grad_norm: 0.9999992930441759, iteration: 65709
loss: 1.0007274150848389,grad_norm: 0.8830023120784875, iteration: 65710
loss: 0.9944517612457275,grad_norm: 0.9999996636155712, iteration: 65711
loss: 1.1644234657287598,grad_norm: 0.9999991795605462, iteration: 65712
loss: 0.9682071805000305,grad_norm: 0.9999990175759048, iteration: 65713
loss: 1.036190390586853,grad_norm: 0.999999161143862, iteration: 65714
loss: 1.0041799545288086,grad_norm: 0.999999171219044, iteration: 65715
loss: 1.0018482208251953,grad_norm: 0.8757427224294744, iteration: 65716
loss: 1.0811042785644531,grad_norm: 0.9999992123048302, iteration: 65717
loss: 0.9964518547058105,grad_norm: 0.9595053439892084, iteration: 65718
loss: 1.0059250593185425,grad_norm: 0.9999990513599928, iteration: 65719
loss: 1.0404560565948486,grad_norm: 0.9999994353964089, iteration: 65720
loss: 0.9960605502128601,grad_norm: 0.9332216928750028, iteration: 65721
loss: 1.0571300983428955,grad_norm: 0.9999990380528166, iteration: 65722
loss: 1.104322075843811,grad_norm: 1.0000000054966887, iteration: 65723
loss: 1.0659583806991577,grad_norm: 0.9999996000272429, iteration: 65724
loss: 1.030761957168579,grad_norm: 0.904271617293258, iteration: 65725
loss: 1.1327112913131714,grad_norm: 0.9999995283894048, iteration: 65726
loss: 1.01344895362854,grad_norm: 0.9877933545128905, iteration: 65727
loss: 1.0234137773513794,grad_norm: 0.9999997511098693, iteration: 65728
loss: 1.0260610580444336,grad_norm: 0.7733985334087022, iteration: 65729
loss: 1.025086522102356,grad_norm: 0.90992936831546, iteration: 65730
loss: 1.094340443611145,grad_norm: 0.9933557606364065, iteration: 65731
loss: 0.9988569021224976,grad_norm: 0.9509846436385422, iteration: 65732
loss: 1.059085726737976,grad_norm: 0.9999995492115749, iteration: 65733
loss: 0.9786583781242371,grad_norm: 0.9999989991000386, iteration: 65734
loss: 1.014578104019165,grad_norm: 0.891217816888263, iteration: 65735
loss: 1.0527193546295166,grad_norm: 0.9540380233747858, iteration: 65736
loss: 1.0160449743270874,grad_norm: 0.8383315108530718, iteration: 65737
loss: 0.9885907173156738,grad_norm: 0.9005347611151705, iteration: 65738
loss: 1.1208432912826538,grad_norm: 0.9999992723745066, iteration: 65739
loss: 1.045426845550537,grad_norm: 0.9999991944741922, iteration: 65740
loss: 1.0071585178375244,grad_norm: 0.9999995763466258, iteration: 65741
loss: 1.1612728834152222,grad_norm: 0.9999990381215177, iteration: 65742
loss: 0.9696756601333618,grad_norm: 0.8323906006954943, iteration: 65743
loss: 1.0241857767105103,grad_norm: 0.999999374951174, iteration: 65744
loss: 1.150844693183899,grad_norm: 0.9999998396352128, iteration: 65745
loss: 0.971474289894104,grad_norm: 0.9999993870508603, iteration: 65746
loss: 1.1340605020523071,grad_norm: 0.9999992929293894, iteration: 65747
loss: 1.0994236469268799,grad_norm: 0.9999998576671238, iteration: 65748
loss: 1.0643527507781982,grad_norm: 0.984296983986815, iteration: 65749
loss: 1.0457398891448975,grad_norm: 0.9104959824855577, iteration: 65750
loss: 0.9881742596626282,grad_norm: 0.8683314631020768, iteration: 65751
loss: 1.035488247871399,grad_norm: 0.9999995884433909, iteration: 65752
loss: 1.1319917440414429,grad_norm: 0.9999994073148238, iteration: 65753
loss: 1.1529840230941772,grad_norm: 0.9999991817922194, iteration: 65754
loss: 1.0336906909942627,grad_norm: 0.8136047183345557, iteration: 65755
loss: 1.0865100622177124,grad_norm: 0.9999993256405557, iteration: 65756
loss: 1.1454365253448486,grad_norm: 0.9999991966812354, iteration: 65757
loss: 0.9975448250770569,grad_norm: 0.9119713693418413, iteration: 65758
loss: 1.036887288093567,grad_norm: 0.8409913579191022, iteration: 65759
loss: 1.0167300701141357,grad_norm: 0.9242217814544228, iteration: 65760
loss: 1.0666725635528564,grad_norm: 0.9999992262933362, iteration: 65761
loss: 0.9794550538063049,grad_norm: 0.9999992443085843, iteration: 65762
loss: 1.1296765804290771,grad_norm: 0.9999992744531778, iteration: 65763
loss: 1.0255134105682373,grad_norm: 0.9481699639360099, iteration: 65764
loss: 1.2190947532653809,grad_norm: 0.9999997910678671, iteration: 65765
loss: 1.063989520072937,grad_norm: 0.952707086529709, iteration: 65766
loss: 1.0822619199752808,grad_norm: 0.9999993558795922, iteration: 65767
loss: 1.1301765441894531,grad_norm: 0.9999990892605023, iteration: 65768
loss: 1.1008743047714233,grad_norm: 0.8602163825070965, iteration: 65769
loss: 1.0256847143173218,grad_norm: 0.8598339488668236, iteration: 65770
loss: 0.9967555999755859,grad_norm: 0.9811667177191319, iteration: 65771
loss: 1.015775442123413,grad_norm: 0.8501453241611046, iteration: 65772
loss: 1.0099437236785889,grad_norm: 0.9999991864227158, iteration: 65773
loss: 1.04424250125885,grad_norm: 0.9999993052011161, iteration: 65774
loss: 1.0734773874282837,grad_norm: 0.9999992472693606, iteration: 65775
loss: 1.0463343858718872,grad_norm: 0.9999996962857807, iteration: 65776
loss: 0.9869820475578308,grad_norm: 0.989291846033447, iteration: 65777
loss: 1.046109676361084,grad_norm: 0.9999989510579063, iteration: 65778
loss: 1.0381865501403809,grad_norm: 0.9999989962713075, iteration: 65779
loss: 1.03148353099823,grad_norm: 0.999999842399641, iteration: 65780
loss: 1.087794542312622,grad_norm: 0.9999992418025223, iteration: 65781
loss: 1.0758056640625,grad_norm: 0.811983205515606, iteration: 65782
loss: 1.0344644784927368,grad_norm: 0.9999994885280857, iteration: 65783
loss: 1.016352891921997,grad_norm: 0.8796877206210509, iteration: 65784
loss: 1.0618267059326172,grad_norm: 0.9678827440478948, iteration: 65785
loss: 1.0686757564544678,grad_norm: 0.9999994375750775, iteration: 65786
loss: 0.9945651888847351,grad_norm: 0.8855177303159841, iteration: 65787
loss: 1.045681118965149,grad_norm: 0.9999990621713356, iteration: 65788
loss: 1.271181344985962,grad_norm: 0.9999990459705657, iteration: 65789
loss: 1.3726474046707153,grad_norm: 0.9999998837554196, iteration: 65790
loss: 1.0420747995376587,grad_norm: 0.9999991887752283, iteration: 65791
loss: 1.1389425992965698,grad_norm: 0.999999107605968, iteration: 65792
loss: 1.0691660642623901,grad_norm: 0.9919046878510205, iteration: 65793
loss: 1.0899578332901,grad_norm: 0.9999995682791295, iteration: 65794
loss: 1.1133040189743042,grad_norm: 0.9999994712903093, iteration: 65795
loss: 1.0400147438049316,grad_norm: 0.9999992557795575, iteration: 65796
loss: 1.047163486480713,grad_norm: 0.999999705174066, iteration: 65797
loss: 1.0559972524642944,grad_norm: 0.8894369399736219, iteration: 65798
loss: 1.016846776008606,grad_norm: 0.9999992879509999, iteration: 65799
loss: 0.9887405037879944,grad_norm: 0.9613716377657827, iteration: 65800
loss: 1.1469331979751587,grad_norm: 0.9999997573002135, iteration: 65801
loss: 1.139337420463562,grad_norm: 0.9999996471382414, iteration: 65802
loss: 1.0465739965438843,grad_norm: 0.9999995238948095, iteration: 65803
loss: 1.0338834524154663,grad_norm: 0.964067107354456, iteration: 65804
loss: 0.9688225984573364,grad_norm: 0.9999998311896188, iteration: 65805
loss: 1.0467815399169922,grad_norm: 0.9357263001733088, iteration: 65806
loss: 1.1583770513534546,grad_norm: 0.9999997221144047, iteration: 65807
loss: 1.014530897140503,grad_norm: 0.9729599869059323, iteration: 65808
loss: 1.0392730236053467,grad_norm: 0.876242708753395, iteration: 65809
loss: 1.0445702075958252,grad_norm: 0.9999989831031046, iteration: 65810
loss: 1.0480214357376099,grad_norm: 0.9135087508572678, iteration: 65811
loss: 1.0116020441055298,grad_norm: 0.8894858274253976, iteration: 65812
loss: 1.0174310207366943,grad_norm: 0.8652321853695044, iteration: 65813
loss: 1.2595117092132568,grad_norm: 0.9999998678348749, iteration: 65814
loss: 1.0107285976409912,grad_norm: 0.9595350104169927, iteration: 65815
loss: 1.0636202096939087,grad_norm: 0.9999995411856882, iteration: 65816
loss: 1.1640896797180176,grad_norm: 0.9999994302415081, iteration: 65817
loss: 1.021875023841858,grad_norm: 0.9999989827762477, iteration: 65818
loss: 1.0304392576217651,grad_norm: 0.9999996592167464, iteration: 65819
loss: 0.990815281867981,grad_norm: 0.9189710702904211, iteration: 65820
loss: 1.0233877897262573,grad_norm: 0.9999991872144989, iteration: 65821
loss: 1.0487678050994873,grad_norm: 0.9035817535241767, iteration: 65822
loss: 1.00571608543396,grad_norm: 0.8840647931906307, iteration: 65823
loss: 1.112890362739563,grad_norm: 0.9999995056879744, iteration: 65824
loss: 1.0029243230819702,grad_norm: 0.9999990337868835, iteration: 65825
loss: 1.0191689729690552,grad_norm: 0.99999931484823, iteration: 65826
loss: 1.0156131982803345,grad_norm: 0.999397411055732, iteration: 65827
loss: 1.009177327156067,grad_norm: 0.9999991359168726, iteration: 65828
loss: 1.0437239408493042,grad_norm: 0.9999995500185287, iteration: 65829
loss: 1.0000295639038086,grad_norm: 0.9999990028481274, iteration: 65830
loss: 1.1287596225738525,grad_norm: 0.9999989905619644, iteration: 65831
loss: 1.0985658168792725,grad_norm: 0.9999993809654957, iteration: 65832
loss: 1.0175970792770386,grad_norm: 0.9999990868577402, iteration: 65833
loss: 1.0396323204040527,grad_norm: 0.9999992833674763, iteration: 65834
loss: 0.9752072691917419,grad_norm: 0.910987897343967, iteration: 65835
loss: 1.0229483842849731,grad_norm: 0.8325650436814429, iteration: 65836
loss: 1.0274426937103271,grad_norm: 0.9999993196590428, iteration: 65837
loss: 0.9951100945472717,grad_norm: 0.8154777504771054, iteration: 65838
loss: 1.0229946374893188,grad_norm: 0.9480094129864333, iteration: 65839
loss: 1.0662981271743774,grad_norm: 0.958927128988774, iteration: 65840
loss: 1.0478616952896118,grad_norm: 0.9999990471930301, iteration: 65841
loss: 1.1231502294540405,grad_norm: 0.9999993355227862, iteration: 65842
loss: 1.0249979496002197,grad_norm: 0.9999991896404904, iteration: 65843
loss: 0.9747669100761414,grad_norm: 0.9999991463042063, iteration: 65844
loss: 0.9549296498298645,grad_norm: 0.8311915118688509, iteration: 65845
loss: 1.0217889547348022,grad_norm: 0.8146236386715574, iteration: 65846
loss: 1.0709151029586792,grad_norm: 0.9999997078016939, iteration: 65847
loss: 0.9953324794769287,grad_norm: 0.9999991391965546, iteration: 65848
loss: 1.0364375114440918,grad_norm: 0.9999988409220961, iteration: 65849
loss: 1.0654921531677246,grad_norm: 0.9999997878280635, iteration: 65850
loss: 1.0098344087600708,grad_norm: 0.835896130433778, iteration: 65851
loss: 1.0717644691467285,grad_norm: 0.999999864087689, iteration: 65852
loss: 1.0616564750671387,grad_norm: 0.9999998226224968, iteration: 65853
loss: 1.0169492959976196,grad_norm: 0.8239219276952447, iteration: 65854
loss: 1.01193368434906,grad_norm: 0.9593193528867268, iteration: 65855
loss: 0.9872166514396667,grad_norm: 0.763173742991312, iteration: 65856
loss: 0.9812625050544739,grad_norm: 0.9999990030121967, iteration: 65857
loss: 1.092864751815796,grad_norm: 0.9999992051752785, iteration: 65858
loss: 1.042032241821289,grad_norm: 0.8422586387548552, iteration: 65859
loss: 1.0040751695632935,grad_norm: 0.9999991228330742, iteration: 65860
loss: 0.9896093606948853,grad_norm: 0.9999991359069574, iteration: 65861
loss: 0.972663164138794,grad_norm: 0.8034726087383851, iteration: 65862
loss: 1.013641119003296,grad_norm: 0.9999994274015716, iteration: 65863
loss: 1.1932467222213745,grad_norm: 0.9999995882759785, iteration: 65864
loss: 1.116420030593872,grad_norm: 0.9373841816906046, iteration: 65865
loss: 1.21565580368042,grad_norm: 0.9999996971648084, iteration: 65866
loss: 1.0305880308151245,grad_norm: 0.8546754670150001, iteration: 65867
loss: 1.0273628234863281,grad_norm: 0.9999992062028713, iteration: 65868
loss: 1.001304030418396,grad_norm: 0.9999994706898135, iteration: 65869
loss: 1.0078045129776,grad_norm: 0.8850345559386853, iteration: 65870
loss: 0.9998549222946167,grad_norm: 0.9999994565726523, iteration: 65871
loss: 1.0158580541610718,grad_norm: 0.9098213523358722, iteration: 65872
loss: 1.0538616180419922,grad_norm: 0.9999992476192594, iteration: 65873
loss: 1.0677272081375122,grad_norm: 0.9999993415724668, iteration: 65874
loss: 1.1002143621444702,grad_norm: 0.9515504596363135, iteration: 65875
loss: 1.0696446895599365,grad_norm: 0.8536886952859235, iteration: 65876
loss: 0.9902347326278687,grad_norm: 0.9999998115633999, iteration: 65877
loss: 1.0610798597335815,grad_norm: 0.9999991816073303, iteration: 65878
loss: 1.0213197469711304,grad_norm: 0.9999990493093428, iteration: 65879
loss: 1.0462108850479126,grad_norm: 0.9999994891754501, iteration: 65880
loss: 0.9621025919914246,grad_norm: 0.8994842443312537, iteration: 65881
loss: 1.0494369268417358,grad_norm: 0.9135039022671481, iteration: 65882
loss: 1.0094406604766846,grad_norm: 0.9118702589338489, iteration: 65883
loss: 1.1166805028915405,grad_norm: 0.9999993116382132, iteration: 65884
loss: 1.0547335147857666,grad_norm: 0.9999993664674948, iteration: 65885
loss: 0.983579695224762,grad_norm: 0.8865356045486817, iteration: 65886
loss: 1.004213809967041,grad_norm: 0.9502465188469733, iteration: 65887
loss: 1.0153566598892212,grad_norm: 0.9999994470189735, iteration: 65888
loss: 1.0541938543319702,grad_norm: 0.9999990870097885, iteration: 65889
loss: 1.0653140544891357,grad_norm: 0.9999992004237376, iteration: 65890
loss: 0.9993271827697754,grad_norm: 0.8605216493972762, iteration: 65891
loss: 1.0272164344787598,grad_norm: 0.991504171685606, iteration: 65892
loss: 0.9828172326087952,grad_norm: 0.9035964278370594, iteration: 65893
loss: 1.0387940406799316,grad_norm: 0.999999821934953, iteration: 65894
loss: 1.0251290798187256,grad_norm: 0.9999990230840831, iteration: 65895
loss: 1.0536936521530151,grad_norm: 0.9576052642805271, iteration: 65896
loss: 1.0066250562667847,grad_norm: 0.9999994262412593, iteration: 65897
loss: 1.0030992031097412,grad_norm: 0.7159410569812937, iteration: 65898
loss: 1.050594687461853,grad_norm: 0.9999992108234489, iteration: 65899
loss: 1.0338705778121948,grad_norm: 0.9999991483928553, iteration: 65900
loss: 0.983647346496582,grad_norm: 0.9999989867700027, iteration: 65901
loss: 1.0678412914276123,grad_norm: 0.8827189837438556, iteration: 65902
loss: 0.9948875308036804,grad_norm: 0.9999990791602765, iteration: 65903
loss: 0.9986289143562317,grad_norm: 0.9999992546878557, iteration: 65904
loss: 1.0152751207351685,grad_norm: 0.9999992206772388, iteration: 65905
loss: 0.9875337481498718,grad_norm: 0.9778561765570286, iteration: 65906
loss: 0.9942231774330139,grad_norm: 0.8887499140510021, iteration: 65907
loss: 0.9485787749290466,grad_norm: 0.999999264889631, iteration: 65908
loss: 1.0254031419754028,grad_norm: 0.999999360309952, iteration: 65909
loss: 1.0026919841766357,grad_norm: 0.9999990490979079, iteration: 65910
loss: 1.0178622007369995,grad_norm: 0.9741280473423293, iteration: 65911
loss: 1.0605064630508423,grad_norm: 0.9309019329882777, iteration: 65912
loss: 1.0112664699554443,grad_norm: 0.9332642072961593, iteration: 65913
loss: 0.9675648808479309,grad_norm: 0.9590287998927056, iteration: 65914
loss: 1.0054724216461182,grad_norm: 0.9999990642909604, iteration: 65915
loss: 1.0316097736358643,grad_norm: 0.8986691420409753, iteration: 65916
loss: 1.0025603771209717,grad_norm: 0.942590769505238, iteration: 65917
loss: 1.0333404541015625,grad_norm: 0.8074996428812655, iteration: 65918
loss: 1.0458909273147583,grad_norm: 0.9999991798192567, iteration: 65919
loss: 1.018018126487732,grad_norm: 0.9999990241343678, iteration: 65920
loss: 1.004785418510437,grad_norm: 0.8507293660079818, iteration: 65921
loss: 1.0422279834747314,grad_norm: 0.9999996221786238, iteration: 65922
loss: 0.9747023582458496,grad_norm: 0.8617255821302382, iteration: 65923
loss: 0.9976545572280884,grad_norm: 0.9746782304413778, iteration: 65924
loss: 1.0997892618179321,grad_norm: 0.999999055278052, iteration: 65925
loss: 1.0160986185073853,grad_norm: 0.9999991516419086, iteration: 65926
loss: 1.0330332517623901,grad_norm: 0.7819749052731618, iteration: 65927
loss: 1.0232176780700684,grad_norm: 0.9079599391909334, iteration: 65928
loss: 1.2460025548934937,grad_norm: 0.999999316684694, iteration: 65929
loss: 1.0231915712356567,grad_norm: 0.9468857366501375, iteration: 65930
loss: 1.0096381902694702,grad_norm: 0.9310890959280278, iteration: 65931
loss: 0.9586568474769592,grad_norm: 0.9999990733366686, iteration: 65932
loss: 0.9693846702575684,grad_norm: 0.9999989518413726, iteration: 65933
loss: 1.0441198348999023,grad_norm: 0.9999992410703274, iteration: 65934
loss: 1.0512844324111938,grad_norm: 0.9999990622137902, iteration: 65935
loss: 1.002270221710205,grad_norm: 0.926930725107343, iteration: 65936
loss: 1.0255693197250366,grad_norm: 0.9999991913912485, iteration: 65937
loss: 1.031380295753479,grad_norm: 0.947229154404395, iteration: 65938
loss: 0.9841616153717041,grad_norm: 0.999999071721556, iteration: 65939
loss: 0.9916673302650452,grad_norm: 0.9999992015012955, iteration: 65940
loss: 1.0624313354492188,grad_norm: 0.9999991343988699, iteration: 65941
loss: 0.9976016283035278,grad_norm: 0.8650303605819737, iteration: 65942
loss: 1.0558332204818726,grad_norm: 0.9999991981833606, iteration: 65943
loss: 1.001149296760559,grad_norm: 0.8087462820911295, iteration: 65944
loss: 1.004502534866333,grad_norm: 0.8376655453202144, iteration: 65945
loss: 1.031651258468628,grad_norm: 0.9681077180637762, iteration: 65946
loss: 1.0068738460540771,grad_norm: 0.9312931507931503, iteration: 65947
loss: 1.0084662437438965,grad_norm: 0.8665327685586955, iteration: 65948
loss: 0.9542311429977417,grad_norm: 0.9681443913885607, iteration: 65949
loss: 1.026816964149475,grad_norm: 0.9999990531864712, iteration: 65950
loss: 0.9923978447914124,grad_norm: 0.92989472762929, iteration: 65951
loss: 1.0137361288070679,grad_norm: 0.9999992039151745, iteration: 65952
loss: 0.9936879277229309,grad_norm: 0.9104394368505988, iteration: 65953
loss: 1.0412273406982422,grad_norm: 0.999999158636958, iteration: 65954
loss: 0.989070475101471,grad_norm: 0.9999991509202429, iteration: 65955
loss: 1.0687164068222046,grad_norm: 0.9999992425437866, iteration: 65956
loss: 1.0422112941741943,grad_norm: 0.9999992660963155, iteration: 65957
loss: 1.0433070659637451,grad_norm: 0.9137531207690319, iteration: 65958
loss: 1.0754815340042114,grad_norm: 0.9999990609607743, iteration: 65959
loss: 1.0436739921569824,grad_norm: 0.8338631179506025, iteration: 65960
loss: 1.0029712915420532,grad_norm: 0.9926028159504989, iteration: 65961
loss: 1.026940941810608,grad_norm: 0.9999990824042271, iteration: 65962
loss: 1.037859320640564,grad_norm: 0.8496088452527503, iteration: 65963
loss: 1.0004501342773438,grad_norm: 0.8261321402369556, iteration: 65964
loss: 1.0400975942611694,grad_norm: 0.9902445480570776, iteration: 65965
loss: 0.9856772422790527,grad_norm: 0.8446043116631917, iteration: 65966
loss: 1.0370861291885376,grad_norm: 0.9999990544095173, iteration: 65967
loss: 0.9675251841545105,grad_norm: 0.9230172889044131, iteration: 65968
loss: 1.0184974670410156,grad_norm: 0.9999990563479053, iteration: 65969
loss: 1.0520422458648682,grad_norm: 0.9999998652353216, iteration: 65970
loss: 0.9985440373420715,grad_norm: 0.9999991833348548, iteration: 65971
loss: 1.0619460344314575,grad_norm: 0.9999992025559079, iteration: 65972
loss: 1.0510151386260986,grad_norm: 0.9411608053734787, iteration: 65973
loss: 0.9901093244552612,grad_norm: 0.9119777616854275, iteration: 65974
loss: 1.0463495254516602,grad_norm: 0.8881627451855265, iteration: 65975
loss: 0.9368453025817871,grad_norm: 0.9478938891852225, iteration: 65976
loss: 1.035576343536377,grad_norm: 0.9999993593858348, iteration: 65977
loss: 0.9996535181999207,grad_norm: 0.89654870911405, iteration: 65978
loss: 0.9721594452857971,grad_norm: 0.9203238526196265, iteration: 65979
loss: 1.023314118385315,grad_norm: 0.9999991650887556, iteration: 65980
loss: 0.9882187247276306,grad_norm: 0.9999992471984496, iteration: 65981
loss: 1.0240319967269897,grad_norm: 0.9024732761400216, iteration: 65982
loss: 1.0429208278656006,grad_norm: 0.9999993738720949, iteration: 65983
loss: 1.0125763416290283,grad_norm: 0.984881062094681, iteration: 65984
loss: 1.023640751838684,grad_norm: 0.9539539175235643, iteration: 65985
loss: 0.9662273526191711,grad_norm: 0.8751147394233316, iteration: 65986
loss: 1.0462726354599,grad_norm: 0.9999992831698507, iteration: 65987
loss: 1.0230371952056885,grad_norm: 0.9696507252674265, iteration: 65988
loss: 0.980871856212616,grad_norm: 0.9388969036850592, iteration: 65989
loss: 1.0596102476119995,grad_norm: 0.8771714954966718, iteration: 65990
loss: 1.021580696105957,grad_norm: 0.989687861772412, iteration: 65991
loss: 0.9888061881065369,grad_norm: 0.9999992727754804, iteration: 65992
loss: 1.034533143043518,grad_norm: 0.9999991020228529, iteration: 65993
loss: 0.9583829045295715,grad_norm: 0.9941605596580317, iteration: 65994
loss: 0.9935975074768066,grad_norm: 0.9196139844133338, iteration: 65995
loss: 0.9661619663238525,grad_norm: 0.9999993052286341, iteration: 65996
loss: 1.0384849309921265,grad_norm: 0.7826691082009997, iteration: 65997
loss: 1.0618878602981567,grad_norm: 0.93073860898999, iteration: 65998
loss: 1.0783082246780396,grad_norm: 0.9999990823951307, iteration: 65999
loss: 1.0636906623840332,grad_norm: 0.8775687123627129, iteration: 66000
loss: 1.0405539274215698,grad_norm: 0.9999998974225977, iteration: 66001
loss: 1.021451473236084,grad_norm: 0.9999990827665742, iteration: 66002
loss: 0.9993901252746582,grad_norm: 0.8483533073600537, iteration: 66003
loss: 0.9773604273796082,grad_norm: 0.7739625023256913, iteration: 66004
loss: 0.9717617630958557,grad_norm: 0.8725159983475048, iteration: 66005
loss: 1.0396264791488647,grad_norm: 0.9999996679074971, iteration: 66006
loss: 0.9767524003982544,grad_norm: 0.935979403930067, iteration: 66007
loss: 1.0582245588302612,grad_norm: 0.925295898414847, iteration: 66008
loss: 0.9999668002128601,grad_norm: 0.8546469504182754, iteration: 66009
loss: 1.017948031425476,grad_norm: 0.854895918129931, iteration: 66010
loss: 0.9816537499427795,grad_norm: 0.9456507877571807, iteration: 66011
loss: 0.9715207815170288,grad_norm: 0.9495116849443423, iteration: 66012
loss: 1.0196044445037842,grad_norm: 0.8987318172589934, iteration: 66013
loss: 1.0084004402160645,grad_norm: 0.9580243157973648, iteration: 66014
loss: 0.996320366859436,grad_norm: 0.9763151190590141, iteration: 66015
loss: 1.012554407119751,grad_norm: 0.9681785700240736, iteration: 66016
loss: 0.9903404712677002,grad_norm: 0.9999991582025124, iteration: 66017
loss: 1.0058506727218628,grad_norm: 0.9999992336127232, iteration: 66018
loss: 1.0338630676269531,grad_norm: 0.999999657305195, iteration: 66019
loss: 0.9955363869667053,grad_norm: 0.9988201487041453, iteration: 66020
loss: 0.9878570437431335,grad_norm: 0.8273335916673126, iteration: 66021
loss: 1.001368522644043,grad_norm: 0.9657317999640915, iteration: 66022
loss: 1.0055593252182007,grad_norm: 0.9528729462593282, iteration: 66023
loss: 1.0426563024520874,grad_norm: 0.9999989915573795, iteration: 66024
loss: 1.0104129314422607,grad_norm: 0.9308010515747152, iteration: 66025
loss: 1.1071416139602661,grad_norm: 0.9999997915757618, iteration: 66026
loss: 0.980546772480011,grad_norm: 0.9616639699778181, iteration: 66027
loss: 1.015812873840332,grad_norm: 0.9999992478464567, iteration: 66028
loss: 1.0997474193572998,grad_norm: 0.9999994204448488, iteration: 66029
loss: 1.055997610092163,grad_norm: 0.9999993088233115, iteration: 66030
loss: 1.0349171161651611,grad_norm: 0.8929101927761147, iteration: 66031
loss: 0.995688259601593,grad_norm: 0.9999991829879368, iteration: 66032
loss: 1.025174856185913,grad_norm: 0.9999995154915494, iteration: 66033
loss: 1.0356435775756836,grad_norm: 0.7502634972450813, iteration: 66034
loss: 1.027216911315918,grad_norm: 0.9999990994626939, iteration: 66035
loss: 1.039535403251648,grad_norm: 0.9999996232236301, iteration: 66036
loss: 1.024873971939087,grad_norm: 0.9416751630451348, iteration: 66037
loss: 0.9891270399093628,grad_norm: 0.9999992141676218, iteration: 66038
loss: 1.0003769397735596,grad_norm: 0.9999991896476199, iteration: 66039
loss: 1.0084547996520996,grad_norm: 0.9999993602178815, iteration: 66040
loss: 1.1033594608306885,grad_norm: 0.9999991777685088, iteration: 66041
loss: 1.0201408863067627,grad_norm: 0.9999989808412493, iteration: 66042
loss: 0.9875182509422302,grad_norm: 0.9088729815268569, iteration: 66043
loss: 1.0222586393356323,grad_norm: 0.9999991264150981, iteration: 66044
loss: 0.9933179616928101,grad_norm: 0.8129286068853347, iteration: 66045
loss: 1.0246981382369995,grad_norm: 0.9999991724477192, iteration: 66046
loss: 1.0222688913345337,grad_norm: 0.9999998665328809, iteration: 66047
loss: 0.9707995057106018,grad_norm: 0.9760313153136302, iteration: 66048
loss: 0.9882449507713318,grad_norm: 0.7935088329273238, iteration: 66049
loss: 1.035715103149414,grad_norm: 0.9999991530776018, iteration: 66050
loss: 1.0642372369766235,grad_norm: 0.9999989496330471, iteration: 66051
loss: 1.0132339000701904,grad_norm: 0.8611648574010023, iteration: 66052
loss: 1.0781621932983398,grad_norm: 0.9999995338020534, iteration: 66053
loss: 1.0412187576293945,grad_norm: 0.9999991388396547, iteration: 66054
loss: 1.032433271408081,grad_norm: 0.999999384324367, iteration: 66055
loss: 1.004625916481018,grad_norm: 0.9524047800065457, iteration: 66056
loss: 0.9825636148452759,grad_norm: 0.9104045157504211, iteration: 66057
loss: 1.0301451683044434,grad_norm: 0.999998979028267, iteration: 66058
loss: 1.035252332687378,grad_norm: 0.9999991988244589, iteration: 66059
loss: 1.0402415990829468,grad_norm: 0.9999994074657204, iteration: 66060
loss: 0.9840551018714905,grad_norm: 0.971120309014951, iteration: 66061
loss: 1.0122084617614746,grad_norm: 0.903778255350571, iteration: 66062
loss: 1.0179158449172974,grad_norm: 0.9999991323362567, iteration: 66063
loss: 1.0047996044158936,grad_norm: 0.9467044861505794, iteration: 66064
loss: 1.0268827676773071,grad_norm: 0.9999990362281593, iteration: 66065
loss: 1.0125188827514648,grad_norm: 0.9999994152178261, iteration: 66066
loss: 1.0073925256729126,grad_norm: 0.9999990531315086, iteration: 66067
loss: 1.0084813833236694,grad_norm: 0.9442876978346377, iteration: 66068
loss: 1.0238876342773438,grad_norm: 0.9999990637683807, iteration: 66069
loss: 1.031675934791565,grad_norm: 0.9999990943303677, iteration: 66070
loss: 1.0589158535003662,grad_norm: 0.9999992105093979, iteration: 66071
loss: 1.0026105642318726,grad_norm: 0.9999998562872154, iteration: 66072
loss: 0.9496136903762817,grad_norm: 0.9999990643177024, iteration: 66073
loss: 1.0341687202453613,grad_norm: 0.9999996724299518, iteration: 66074
loss: 1.0565440654754639,grad_norm: 0.9999992777863703, iteration: 66075
loss: 1.091286540031433,grad_norm: 0.9999998425686679, iteration: 66076
loss: 1.0118470191955566,grad_norm: 0.865127000347252, iteration: 66077
loss: 1.0258378982543945,grad_norm: 0.8808518363798027, iteration: 66078
loss: 1.042777180671692,grad_norm: 0.999999876216411, iteration: 66079
loss: 1.0456527471542358,grad_norm: 0.9999990171367529, iteration: 66080
loss: 1.0905424356460571,grad_norm: 0.9338795778859672, iteration: 66081
loss: 1.0416661500930786,grad_norm: 0.9999992420424091, iteration: 66082
loss: 1.0084999799728394,grad_norm: 0.9843317307629106, iteration: 66083
loss: 1.1070964336395264,grad_norm: 0.999999640080779, iteration: 66084
loss: 1.0844860076904297,grad_norm: 0.9999996462467077, iteration: 66085
loss: 1.0892094373703003,grad_norm: 0.9999992743241884, iteration: 66086
loss: 1.0494019985198975,grad_norm: 0.9999992837147598, iteration: 66087
loss: 0.9850157499313354,grad_norm: 0.9999991257502913, iteration: 66088
loss: 0.9697940945625305,grad_norm: 0.6809950484051894, iteration: 66089
loss: 1.0178560018539429,grad_norm: 0.9999991094287268, iteration: 66090
loss: 1.0163938999176025,grad_norm: 0.999999750132734, iteration: 66091
loss: 1.155275583267212,grad_norm: 0.9999992078987854, iteration: 66092
loss: 1.0503439903259277,grad_norm: 0.8267010826984197, iteration: 66093
loss: 0.9919759631156921,grad_norm: 0.9999990567708883, iteration: 66094
loss: 1.0398359298706055,grad_norm: 0.9999993633376657, iteration: 66095
loss: 0.9887948632240295,grad_norm: 0.9999997125285801, iteration: 66096
loss: 1.0857529640197754,grad_norm: 0.9999992289934075, iteration: 66097
loss: 1.1046441793441772,grad_norm: 0.9999997052456987, iteration: 66098
loss: 0.9980260133743286,grad_norm: 0.909822015518214, iteration: 66099
loss: 1.0271542072296143,grad_norm: 0.9999994046060529, iteration: 66100
loss: 0.9851214289665222,grad_norm: 0.9517074506287071, iteration: 66101
loss: 1.0348904132843018,grad_norm: 0.9999992737157966, iteration: 66102
loss: 1.0026772022247314,grad_norm: 0.9242834239621969, iteration: 66103
loss: 1.0587661266326904,grad_norm: 0.839257624923914, iteration: 66104
loss: 0.9851310849189758,grad_norm: 0.8880001689098208, iteration: 66105
loss: 1.0682196617126465,grad_norm: 0.9999995145656165, iteration: 66106
loss: 0.9884360432624817,grad_norm: 0.9999991282435935, iteration: 66107
loss: 1.0384926795959473,grad_norm: 0.9999991159841323, iteration: 66108
loss: 1.014204502105713,grad_norm: 0.9999997006552287, iteration: 66109
loss: 0.9926267862319946,grad_norm: 0.8299934666603775, iteration: 66110
loss: 0.9606232047080994,grad_norm: 0.9594408726140459, iteration: 66111
loss: 1.0300313234329224,grad_norm: 0.9999991301913006, iteration: 66112
loss: 0.9900702238082886,grad_norm: 0.987465091716641, iteration: 66113
loss: 0.9729948043823242,grad_norm: 0.9999990967792665, iteration: 66114
loss: 1.021505355834961,grad_norm: 0.9999992009508889, iteration: 66115
loss: 1.0209803581237793,grad_norm: 0.9999990025104056, iteration: 66116
loss: 1.0650056600570679,grad_norm: 0.9999998479130356, iteration: 66117
loss: 1.0791608095169067,grad_norm: 0.9983852457906015, iteration: 66118
loss: 0.9784665107727051,grad_norm: 0.9890176963996867, iteration: 66119
loss: 1.0655701160430908,grad_norm: 0.9999994554193016, iteration: 66120
loss: 0.9982019662857056,grad_norm: 0.9999995251368453, iteration: 66121
loss: 1.013261079788208,grad_norm: 0.7862176039531487, iteration: 66122
loss: 1.0476032495498657,grad_norm: 0.9999995688110397, iteration: 66123
loss: 0.9705992937088013,grad_norm: 0.8028511265391262, iteration: 66124
loss: 0.990598738193512,grad_norm: 0.9999992140803413, iteration: 66125
loss: 0.96918785572052,grad_norm: 0.9999990937456215, iteration: 66126
loss: 0.9946704506874084,grad_norm: 0.9999990719327346, iteration: 66127
loss: 1.0208559036254883,grad_norm: 0.9731440493533328, iteration: 66128
loss: 0.993596613407135,grad_norm: 0.9406191005205892, iteration: 66129
loss: 0.9870432019233704,grad_norm: 0.931681558276739, iteration: 66130
loss: 1.0184504985809326,grad_norm: 0.9999995336212698, iteration: 66131
loss: 1.0280734300613403,grad_norm: 0.9999991798130611, iteration: 66132
loss: 1.0024811029434204,grad_norm: 0.9999991629440563, iteration: 66133
loss: 0.9999188184738159,grad_norm: 0.9999993514361627, iteration: 66134
loss: 1.0018442869186401,grad_norm: 0.9797901853529438, iteration: 66135
loss: 1.0303221940994263,grad_norm: 0.8759873674230895, iteration: 66136
loss: 0.977353572845459,grad_norm: 0.9999994941120846, iteration: 66137
loss: 1.0437434911727905,grad_norm: 0.9116382271665469, iteration: 66138
loss: 0.9869093894958496,grad_norm: 0.999999079603435, iteration: 66139
loss: 1.0179535150527954,grad_norm: 0.9795342502624887, iteration: 66140
loss: 1.0597765445709229,grad_norm: 0.9999998940814067, iteration: 66141
loss: 1.010941505432129,grad_norm: 0.9737455539300455, iteration: 66142
loss: 1.0258160829544067,grad_norm: 0.9298395411453069, iteration: 66143
loss: 1.0133626461029053,grad_norm: 0.9999992830953977, iteration: 66144
loss: 1.05264151096344,grad_norm: 0.9999991806230472, iteration: 66145
loss: 1.0134872198104858,grad_norm: 0.9692653420105793, iteration: 66146
loss: 1.002266764640808,grad_norm: 0.9126107553906954, iteration: 66147
loss: 1.1203783750534058,grad_norm: 0.9999991901317605, iteration: 66148
loss: 1.0206068754196167,grad_norm: 0.9999996567996645, iteration: 66149
loss: 1.0190343856811523,grad_norm: 0.9138059492039656, iteration: 66150
loss: 1.0051735639572144,grad_norm: 0.8385961409623371, iteration: 66151
loss: 1.0097787380218506,grad_norm: 0.7587113469832822, iteration: 66152
loss: 1.0249032974243164,grad_norm: 0.9999996377253729, iteration: 66153
loss: 0.9976046681404114,grad_norm: 0.9999989914828258, iteration: 66154
loss: 1.1027822494506836,grad_norm: 0.9374987128061548, iteration: 66155
loss: 1.052822232246399,grad_norm: 0.7806102228681878, iteration: 66156
loss: 0.9992363452911377,grad_norm: 0.999999033062009, iteration: 66157
loss: 1.0234674215316772,grad_norm: 0.9999999613688499, iteration: 66158
loss: 0.9919849634170532,grad_norm: 0.975644356414155, iteration: 66159
loss: 1.0294981002807617,grad_norm: 0.9999991436279718, iteration: 66160
loss: 0.9917026162147522,grad_norm: 0.9318291916596111, iteration: 66161
loss: 0.9798411130905151,grad_norm: 0.9866204201431961, iteration: 66162
loss: 0.9955236911773682,grad_norm: 0.9999990480881007, iteration: 66163
loss: 1.0722633600234985,grad_norm: 0.9999999673837372, iteration: 66164
loss: 1.0615180730819702,grad_norm: 0.9991573333593619, iteration: 66165
loss: 1.0959663391113281,grad_norm: 0.9999995828539662, iteration: 66166
loss: 0.9487636089324951,grad_norm: 0.9426752847240495, iteration: 66167
loss: 0.9725459218025208,grad_norm: 0.8560940019928626, iteration: 66168
loss: 1.022300362586975,grad_norm: 0.8589637654484011, iteration: 66169
loss: 1.017193078994751,grad_norm: 0.9153740048668693, iteration: 66170
loss: 1.0822770595550537,grad_norm: 0.7835536035811002, iteration: 66171
loss: 1.0037660598754883,grad_norm: 0.8947855561940143, iteration: 66172
loss: 1.0297985076904297,grad_norm: 0.999999391179948, iteration: 66173
loss: 0.9960299730300903,grad_norm: 0.9999990705875202, iteration: 66174
loss: 1.0031704902648926,grad_norm: 0.9999989917323431, iteration: 66175
loss: 1.0594271421432495,grad_norm: 0.9999999161553422, iteration: 66176
loss: 0.9856982231140137,grad_norm: 0.8634998125524903, iteration: 66177
loss: 1.0468895435333252,grad_norm: 0.9999998759182107, iteration: 66178
loss: 1.0065566301345825,grad_norm: 0.9999990092451508, iteration: 66179
loss: 1.031856656074524,grad_norm: 0.8590455588280904, iteration: 66180
loss: 1.096408486366272,grad_norm: 0.9999998533375287, iteration: 66181
loss: 0.9713708162307739,grad_norm: 0.9999990638627233, iteration: 66182
loss: 0.988728404045105,grad_norm: 0.8743540736263613, iteration: 66183
loss: 1.0073589086532593,grad_norm: 0.9731362018903466, iteration: 66184
loss: 1.1441588401794434,grad_norm: 0.999999180462328, iteration: 66185
loss: 1.0006526708602905,grad_norm: 0.9224851092705533, iteration: 66186
loss: 1.0022403001785278,grad_norm: 0.999999218117821, iteration: 66187
loss: 1.238234281539917,grad_norm: 0.9999998081148953, iteration: 66188
loss: 1.0539963245391846,grad_norm: 0.9999993076382055, iteration: 66189
loss: 0.9921301007270813,grad_norm: 0.9999996187686473, iteration: 66190
loss: 1.035368800163269,grad_norm: 0.9999996764743122, iteration: 66191
loss: 0.9721289873123169,grad_norm: 0.8199409667710249, iteration: 66192
loss: 1.0062084197998047,grad_norm: 0.9999992815351222, iteration: 66193
loss: 0.9756277203559875,grad_norm: 0.8046087446114669, iteration: 66194
loss: 1.0295183658599854,grad_norm: 0.9999990987834065, iteration: 66195
loss: 0.9628332853317261,grad_norm: 0.999999816239198, iteration: 66196
loss: 0.9899774789810181,grad_norm: 0.7905844938526742, iteration: 66197
loss: 0.9518533945083618,grad_norm: 0.8399010775503074, iteration: 66198
loss: 0.9905099272727966,grad_norm: 0.8138775816532021, iteration: 66199
loss: 1.071245789527893,grad_norm: 0.9999997247543481, iteration: 66200
loss: 1.0077450275421143,grad_norm: 0.9770408254234083, iteration: 66201
loss: 1.0213671922683716,grad_norm: 0.9414010282167798, iteration: 66202
loss: 1.0081560611724854,grad_norm: 0.9999990691267463, iteration: 66203
loss: 1.018527626991272,grad_norm: 0.9999995802281071, iteration: 66204
loss: 0.9701452255249023,grad_norm: 0.9628885834296181, iteration: 66205
loss: 1.009621500968933,grad_norm: 0.8402905639564592, iteration: 66206
loss: 1.0203438997268677,grad_norm: 0.8518124660239462, iteration: 66207
loss: 0.9933159351348877,grad_norm: 0.8222859434281454, iteration: 66208
loss: 1.0164968967437744,grad_norm: 0.9999991487816399, iteration: 66209
loss: 0.9739522933959961,grad_norm: 0.9999991154204559, iteration: 66210
loss: 0.9806391596794128,grad_norm: 0.9999991121077657, iteration: 66211
loss: 1.0361006259918213,grad_norm: 0.9999990749421376, iteration: 66212
loss: 1.0254048109054565,grad_norm: 0.999999102833285, iteration: 66213
loss: 0.9944350123405457,grad_norm: 0.9999994802563116, iteration: 66214
loss: 1.042580485343933,grad_norm: 0.8964617697476308, iteration: 66215
loss: 1.0169967412948608,grad_norm: 0.9999992393962914, iteration: 66216
loss: 1.0130901336669922,grad_norm: 0.9999999616038489, iteration: 66217
loss: 0.9858832955360413,grad_norm: 0.8943604960019524, iteration: 66218
loss: 1.008190631866455,grad_norm: 0.8620082038060942, iteration: 66219
loss: 0.9931125044822693,grad_norm: 0.8143392377724019, iteration: 66220
loss: 0.993125855922699,grad_norm: 0.9424920430169454, iteration: 66221
loss: 0.9853266477584839,grad_norm: 0.9999991504313209, iteration: 66222
loss: 1.0057244300842285,grad_norm: 0.9235383477439724, iteration: 66223
loss: 0.9982810616493225,grad_norm: 0.8236397666225934, iteration: 66224
loss: 0.9864657521247864,grad_norm: 0.999999117213539, iteration: 66225
loss: 1.026597023010254,grad_norm: 0.8967192470794052, iteration: 66226
loss: 1.0005875825881958,grad_norm: 0.9290160103656391, iteration: 66227
loss: 1.030930519104004,grad_norm: 0.9999991194365867, iteration: 66228
loss: 1.024304747581482,grad_norm: 0.9999991327207568, iteration: 66229
loss: 1.0228513479232788,grad_norm: 0.9999993953326518, iteration: 66230
loss: 1.0233237743377686,grad_norm: 0.9115196785645734, iteration: 66231
loss: 0.9857523441314697,grad_norm: 0.7805746762295164, iteration: 66232
loss: 1.0375404357910156,grad_norm: 0.9999991537431142, iteration: 66233
loss: 1.0127112865447998,grad_norm: 0.8987302501012943, iteration: 66234
loss: 1.0188180208206177,grad_norm: 0.9664783566872817, iteration: 66235
loss: 0.987053394317627,grad_norm: 0.9278366469827559, iteration: 66236
loss: 0.9934202432632446,grad_norm: 0.8557422794059043, iteration: 66237
loss: 1.0169347524642944,grad_norm: 0.9573616152551971, iteration: 66238
loss: 1.0263088941574097,grad_norm: 0.9999996347124178, iteration: 66239
loss: 1.0427945852279663,grad_norm: 0.9999997778765757, iteration: 66240
loss: 1.0156465768814087,grad_norm: 0.9999991794490599, iteration: 66241
loss: 1.0028427839279175,grad_norm: 0.9999995983352958, iteration: 66242
loss: 1.0151793956756592,grad_norm: 0.9999991320742081, iteration: 66243
loss: 0.985340416431427,grad_norm: 0.9999990657049161, iteration: 66244
loss: 1.0075984001159668,grad_norm: 0.8895174120305797, iteration: 66245
loss: 1.021266222000122,grad_norm: 0.7774763519952768, iteration: 66246
loss: 0.9960201382637024,grad_norm: 0.9999991430102393, iteration: 66247
loss: 0.9908314943313599,grad_norm: 0.8651820484676978, iteration: 66248
loss: 1.0113999843597412,grad_norm: 0.8200892550033763, iteration: 66249
loss: 0.9910129308700562,grad_norm: 0.8823359106971025, iteration: 66250
loss: 1.0502870082855225,grad_norm: 0.9999998048791675, iteration: 66251
loss: 1.0115078687667847,grad_norm: 0.9999997835069919, iteration: 66252
loss: 1.026403546333313,grad_norm: 0.9685604467457469, iteration: 66253
loss: 0.9934700131416321,grad_norm: 0.9456813452350598, iteration: 66254
loss: 1.019688367843628,grad_norm: 0.9144686364333109, iteration: 66255
loss: 1.070173978805542,grad_norm: 0.9999997325817205, iteration: 66256
loss: 1.0364855527877808,grad_norm: 0.9999991759804525, iteration: 66257
loss: 0.9456695318222046,grad_norm: 0.9383551581711891, iteration: 66258
loss: 0.9997910857200623,grad_norm: 0.999999152188793, iteration: 66259
loss: 0.9860993027687073,grad_norm: 0.9547867275940096, iteration: 66260
loss: 1.0132031440734863,grad_norm: 0.999999272364566, iteration: 66261
loss: 0.9886786341667175,grad_norm: 0.999999373142578, iteration: 66262
loss: 0.9568670988082886,grad_norm: 0.9851461062001552, iteration: 66263
loss: 0.9821826815605164,grad_norm: 0.9715385002412993, iteration: 66264
loss: 1.0072640180587769,grad_norm: 0.8130613657984905, iteration: 66265
loss: 0.9666793942451477,grad_norm: 0.9999991728335643, iteration: 66266
loss: 0.9811432957649231,grad_norm: 0.9999989983679469, iteration: 66267
loss: 1.0758368968963623,grad_norm: 0.9999990420364054, iteration: 66268
loss: 1.0132975578308105,grad_norm: 0.9133580327813169, iteration: 66269
loss: 0.9890202879905701,grad_norm: 0.8993555115475226, iteration: 66270
loss: 1.020817756652832,grad_norm: 0.9999997131241942, iteration: 66271
loss: 1.0532081127166748,grad_norm: 0.9999992730763027, iteration: 66272
loss: 0.986202597618103,grad_norm: 0.9949813061002697, iteration: 66273
loss: 0.9824860095977783,grad_norm: 0.9058149748086606, iteration: 66274
loss: 1.0222270488739014,grad_norm: 0.7590561894571721, iteration: 66275
loss: 0.9987754821777344,grad_norm: 0.8703786219714361, iteration: 66276
loss: 1.0307435989379883,grad_norm: 0.909590466307736, iteration: 66277
loss: 0.9747695922851562,grad_norm: 0.8812220000868368, iteration: 66278
loss: 1.008112907409668,grad_norm: 0.9999995593244589, iteration: 66279
loss: 0.9673784971237183,grad_norm: 0.9482601821723746, iteration: 66280
loss: 1.0212891101837158,grad_norm: 0.9999992253607259, iteration: 66281
loss: 1.0157711505889893,grad_norm: 0.9999991987057931, iteration: 66282
loss: 1.139968991279602,grad_norm: 0.9999997707946826, iteration: 66283
loss: 1.0297850370407104,grad_norm: 0.9786450198843099, iteration: 66284
loss: 0.9733477234840393,grad_norm: 0.9999990084138185, iteration: 66285
loss: 0.9477283954620361,grad_norm: 0.8837305516194319, iteration: 66286
loss: 1.054500699043274,grad_norm: 0.9999997535431643, iteration: 66287
loss: 1.0352777242660522,grad_norm: 0.9999991695039917, iteration: 66288
loss: 1.0004724264144897,grad_norm: 0.9999991881019783, iteration: 66289
loss: 1.0080981254577637,grad_norm: 0.9999991259638991, iteration: 66290
loss: 1.0143170356750488,grad_norm: 0.9277638171555912, iteration: 66291
loss: 1.0301810503005981,grad_norm: 0.9999991056283714, iteration: 66292
loss: 0.9869667887687683,grad_norm: 0.8924417282059627, iteration: 66293
loss: 1.0068347454071045,grad_norm: 0.9573249741324571, iteration: 66294
loss: 1.0528277158737183,grad_norm: 0.9999991872333014, iteration: 66295
loss: 0.9871605038642883,grad_norm: 0.8376585908176257, iteration: 66296
loss: 1.0049997568130493,grad_norm: 0.8621808219093735, iteration: 66297
loss: 0.9967178106307983,grad_norm: 0.9999991087465667, iteration: 66298
loss: 0.9715201258659363,grad_norm: 0.922599012222252, iteration: 66299
loss: 1.0081048011779785,grad_norm: 0.9999994304146269, iteration: 66300
loss: 0.9940662980079651,grad_norm: 0.9934382870455649, iteration: 66301
loss: 1.0145753622055054,grad_norm: 0.9594241668547527, iteration: 66302
loss: 1.0465601682662964,grad_norm: 0.9999996693761374, iteration: 66303
loss: 1.0198822021484375,grad_norm: 0.999999895893644, iteration: 66304
loss: 1.0244925022125244,grad_norm: 0.822518659371675, iteration: 66305
loss: 0.9559000730514526,grad_norm: 0.9551613417489453, iteration: 66306
loss: 1.0101107358932495,grad_norm: 0.9206064367103326, iteration: 66307
loss: 0.9812677502632141,grad_norm: 0.999999109103732, iteration: 66308
loss: 0.9879616498947144,grad_norm: 0.8996849558467367, iteration: 66309
loss: 0.9853103756904602,grad_norm: 0.7518655085973412, iteration: 66310
loss: 1.0207184553146362,grad_norm: 0.999999130890877, iteration: 66311
loss: 0.9940676093101501,grad_norm: 0.999999629504041, iteration: 66312
loss: 0.9979321956634521,grad_norm: 0.9733308081981301, iteration: 66313
loss: 1.0767767429351807,grad_norm: 0.999999832257011, iteration: 66314
loss: 1.0106347799301147,grad_norm: 0.9999989932631409, iteration: 66315
loss: 1.026711344718933,grad_norm: 0.9741606196524317, iteration: 66316
loss: 0.9652309417724609,grad_norm: 0.9449782001903732, iteration: 66317
loss: 1.0228667259216309,grad_norm: 0.999999343838983, iteration: 66318
loss: 0.9936039447784424,grad_norm: 0.911231520218609, iteration: 66319
loss: 0.971085786819458,grad_norm: 0.8128081266598244, iteration: 66320
loss: 1.0500049591064453,grad_norm: 1.0000000130358246, iteration: 66321
loss: 1.0826551914215088,grad_norm: 0.99999908353528, iteration: 66322
loss: 1.0028208494186401,grad_norm: 0.9153011489251184, iteration: 66323
loss: 1.0512126684188843,grad_norm: 0.8712658069709863, iteration: 66324
loss: 1.0689455270767212,grad_norm: 0.9467718090744704, iteration: 66325
loss: 0.976479172706604,grad_norm: 0.8429617978844006, iteration: 66326
loss: 0.9832825064659119,grad_norm: 0.9698640507626494, iteration: 66327
loss: 0.9824796915054321,grad_norm: 0.808476164543579, iteration: 66328
loss: 1.0009410381317139,grad_norm: 0.979953502686236, iteration: 66329
loss: 0.9840878248214722,grad_norm: 0.8764934058995508, iteration: 66330
loss: 0.9757779836654663,grad_norm: 0.8660402906040493, iteration: 66331
loss: 1.0085902214050293,grad_norm: 0.7977452091077059, iteration: 66332
loss: 1.010724425315857,grad_norm: 0.970716697482008, iteration: 66333
loss: 0.9770560264587402,grad_norm: 0.8462650756653628, iteration: 66334
loss: 1.007717490196228,grad_norm: 0.9474174456604959, iteration: 66335
loss: 0.9636774063110352,grad_norm: 0.9999992001801996, iteration: 66336
loss: 1.0045312643051147,grad_norm: 0.8565429866267437, iteration: 66337
loss: 1.0037364959716797,grad_norm: 0.9237882627148624, iteration: 66338
loss: 1.028070330619812,grad_norm: 0.7724875689065794, iteration: 66339
loss: 1.0389398336410522,grad_norm: 0.9999997578473743, iteration: 66340
loss: 0.9873509407043457,grad_norm: 0.6977306743605196, iteration: 66341
loss: 0.9939753413200378,grad_norm: 0.9752507779975946, iteration: 66342
loss: 1.0088962316513062,grad_norm: 0.9999991993717177, iteration: 66343
loss: 1.0106933116912842,grad_norm: 0.8332186438039161, iteration: 66344
loss: 1.0484598875045776,grad_norm: 0.9999991653214052, iteration: 66345
loss: 0.9857519268989563,grad_norm: 0.9999995533407088, iteration: 66346
loss: 1.0319541692733765,grad_norm: 0.9999992401967601, iteration: 66347
loss: 0.9961022138595581,grad_norm: 0.8152870250392404, iteration: 66348
loss: 0.9621115922927856,grad_norm: 0.9283157095378712, iteration: 66349
loss: 1.183488368988037,grad_norm: 0.9999995049456325, iteration: 66350
loss: 0.9899565577507019,grad_norm: 0.9999992891525645, iteration: 66351
loss: 0.9936883449554443,grad_norm: 0.8779840704846088, iteration: 66352
loss: 1.0200719833374023,grad_norm: 0.9204504611207145, iteration: 66353
loss: 1.0250110626220703,grad_norm: 0.9010819485429594, iteration: 66354
loss: 1.026108980178833,grad_norm: 0.988332717064598, iteration: 66355
loss: 0.9815124273300171,grad_norm: 0.8735999734660532, iteration: 66356
loss: 1.0264661312103271,grad_norm: 0.9999996081926015, iteration: 66357
loss: 1.0087250471115112,grad_norm: 0.9999995477138961, iteration: 66358
loss: 1.0041451454162598,grad_norm: 0.9245318097705323, iteration: 66359
loss: 0.9732626080513,grad_norm: 0.9639333070134799, iteration: 66360
loss: 0.9875246286392212,grad_norm: 0.801912480083009, iteration: 66361
loss: 1.0312418937683105,grad_norm: 0.97899358322922, iteration: 66362
loss: 1.0097953081130981,grad_norm: 0.9919198319412114, iteration: 66363
loss: 1.0178700685501099,grad_norm: 0.9999992562604189, iteration: 66364
loss: 1.0088986158370972,grad_norm: 0.8985005849542572, iteration: 66365
loss: 0.9798166155815125,grad_norm: 0.7808620729882414, iteration: 66366
loss: 1.0141700506210327,grad_norm: 0.999999485182548, iteration: 66367
loss: 0.9938614964485168,grad_norm: 0.8614135629381211, iteration: 66368
loss: 1.025374412536621,grad_norm: 0.8642314032463313, iteration: 66369
loss: 1.002562165260315,grad_norm: 0.8766727318158141, iteration: 66370
loss: 0.9991737008094788,grad_norm: 0.9776634956643272, iteration: 66371
loss: 1.0303831100463867,grad_norm: 0.9999995588656027, iteration: 66372
loss: 1.0260636806488037,grad_norm: 0.9999998761964661, iteration: 66373
loss: 0.9867196679115295,grad_norm: 0.8981495693728777, iteration: 66374
loss: 1.046901822090149,grad_norm: 0.9999994663376467, iteration: 66375
loss: 0.9860255718231201,grad_norm: 0.9999992867717775, iteration: 66376
loss: 1.1541780233383179,grad_norm: 0.9999998879998443, iteration: 66377
loss: 1.0076881647109985,grad_norm: 0.7871569302549793, iteration: 66378
loss: 1.012314796447754,grad_norm: 0.9527944259658674, iteration: 66379
loss: 0.972771167755127,grad_norm: 0.9999990849622071, iteration: 66380
loss: 1.0201265811920166,grad_norm: 0.9330453773345346, iteration: 66381
loss: 0.9968813061714172,grad_norm: 0.9994323041947816, iteration: 66382
loss: 0.9710637331008911,grad_norm: 0.9999991268128441, iteration: 66383
loss: 0.9982578754425049,grad_norm: 0.995521944999877, iteration: 66384
loss: 1.0371618270874023,grad_norm: 0.999999573108402, iteration: 66385
loss: 0.9915294051170349,grad_norm: 0.9489528308920033, iteration: 66386
loss: 1.0415412187576294,grad_norm: 0.8091375960207279, iteration: 66387
loss: 1.0139203071594238,grad_norm: 0.8647996191645126, iteration: 66388
loss: 1.0012339353561401,grad_norm: 0.9999994479233184, iteration: 66389
loss: 1.0011910200119019,grad_norm: 0.9971673428087132, iteration: 66390
loss: 0.9833128452301025,grad_norm: 0.9795760964146126, iteration: 66391
loss: 1.0618102550506592,grad_norm: 0.9999992366681532, iteration: 66392
loss: 1.1012803316116333,grad_norm: 0.9999990877204235, iteration: 66393
loss: 1.019509196281433,grad_norm: 0.949790121021591, iteration: 66394
loss: 1.1324005126953125,grad_norm: 0.9999994837992852, iteration: 66395
loss: 1.061140537261963,grad_norm: 0.8595174241433552, iteration: 66396
loss: 1.0046201944351196,grad_norm: 0.817976492798484, iteration: 66397
loss: 0.9670587182044983,grad_norm: 0.9326387298040134, iteration: 66398
loss: 1.026383399963379,grad_norm: 0.9999995779298273, iteration: 66399
loss: 0.9509602785110474,grad_norm: 0.8822678572020978, iteration: 66400
loss: 1.0068750381469727,grad_norm: 0.8956995749719938, iteration: 66401
loss: 1.0299054384231567,grad_norm: 0.9999997029030896, iteration: 66402
loss: 0.989702582359314,grad_norm: 0.999999158007086, iteration: 66403
loss: 1.0492092370986938,grad_norm: 0.9999993445003897, iteration: 66404
loss: 0.9928590655326843,grad_norm: 0.9868755878897971, iteration: 66405
loss: 1.0131840705871582,grad_norm: 0.9763532311483927, iteration: 66406
loss: 1.0346914529800415,grad_norm: 0.9999994033318502, iteration: 66407
loss: 0.985491156578064,grad_norm: 0.9777708297540658, iteration: 66408
loss: 1.082202434539795,grad_norm: 0.9999993499928344, iteration: 66409
loss: 1.0200642347335815,grad_norm: 0.8144425906859813, iteration: 66410
loss: 1.0058867931365967,grad_norm: 0.9918824418158295, iteration: 66411
loss: 1.0581811666488647,grad_norm: 0.9999990829015037, iteration: 66412
loss: 1.036738634109497,grad_norm: 0.9999996688330488, iteration: 66413
loss: 1.0054491758346558,grad_norm: 0.904878061385719, iteration: 66414
loss: 0.9537181854248047,grad_norm: 0.9999989834711636, iteration: 66415
loss: 0.9627705216407776,grad_norm: 0.9783408444391818, iteration: 66416
loss: 0.9901133179664612,grad_norm: 0.9030858145625538, iteration: 66417
loss: 0.9972757697105408,grad_norm: 0.9999993841590011, iteration: 66418
loss: 1.0972427129745483,grad_norm: 0.9999991088986693, iteration: 66419
loss: 1.022330403327942,grad_norm: 0.9392300868362804, iteration: 66420
loss: 0.9768434762954712,grad_norm: 0.8666331918655763, iteration: 66421
loss: 1.0002456903457642,grad_norm: 0.8765413721220424, iteration: 66422
loss: 1.0049432516098022,grad_norm: 0.9065316724117087, iteration: 66423
loss: 1.0578869581222534,grad_norm: 0.98181138600782, iteration: 66424
loss: 1.02978515625,grad_norm: 0.9709850426061674, iteration: 66425
loss: 1.0116615295410156,grad_norm: 0.9999993232880496, iteration: 66426
loss: 0.9996639490127563,grad_norm: 0.9999993607069373, iteration: 66427
loss: 0.9503494501113892,grad_norm: 0.7891679079388398, iteration: 66428
loss: 1.0010780096054077,grad_norm: 0.9607188696190391, iteration: 66429
loss: 0.9876260757446289,grad_norm: 0.8814357604751617, iteration: 66430
loss: 0.9837459921836853,grad_norm: 0.9230325035496756, iteration: 66431
loss: 1.012566089630127,grad_norm: 0.9999994002872692, iteration: 66432
loss: 0.967430055141449,grad_norm: 0.9999991024326941, iteration: 66433
loss: 1.0585863590240479,grad_norm: 0.9999992657281914, iteration: 66434
loss: 0.9852800369262695,grad_norm: 0.916814818485804, iteration: 66435
loss: 0.9947571754455566,grad_norm: 0.8689470274638036, iteration: 66436
loss: 1.0245442390441895,grad_norm: 0.9999989890745165, iteration: 66437
loss: 0.994780421257019,grad_norm: 0.9999990954089913, iteration: 66438
loss: 1.014614224433899,grad_norm: 0.9352886932457192, iteration: 66439
loss: 1.0087567567825317,grad_norm: 0.7918591703584476, iteration: 66440
loss: 1.016793131828308,grad_norm: 0.7707471360271357, iteration: 66441
loss: 0.9855285882949829,grad_norm: 0.8772651002036531, iteration: 66442
loss: 0.9874915480613708,grad_norm: 0.8498529559594543, iteration: 66443
loss: 1.0092769861221313,grad_norm: 0.7913254897086535, iteration: 66444
loss: 0.9776801466941833,grad_norm: 0.9999992238304574, iteration: 66445
loss: 1.0397284030914307,grad_norm: 0.9999993645367011, iteration: 66446
loss: 1.0134692192077637,grad_norm: 0.9280932004354588, iteration: 66447
loss: 1.0327527523040771,grad_norm: 0.8747841743917074, iteration: 66448
loss: 1.0111310482025146,grad_norm: 0.999999347314506, iteration: 66449
loss: 1.0116795301437378,grad_norm: 0.9684112017140976, iteration: 66450
loss: 0.9874679446220398,grad_norm: 0.9092542987194223, iteration: 66451
loss: 0.9860754013061523,grad_norm: 0.9999996582573525, iteration: 66452
loss: 1.0210434198379517,grad_norm: 0.9242736834921111, iteration: 66453
loss: 1.0080211162567139,grad_norm: 0.8662832495145891, iteration: 66454
loss: 1.0180927515029907,grad_norm: 0.8474892699841026, iteration: 66455
loss: 1.0071998834609985,grad_norm: 0.9999990270899723, iteration: 66456
loss: 1.0218409299850464,grad_norm: 0.9999991527927882, iteration: 66457
loss: 0.9852218627929688,grad_norm: 0.9195542159321473, iteration: 66458
loss: 1.0089784860610962,grad_norm: 0.9999989963925802, iteration: 66459
loss: 1.0142788887023926,grad_norm: 0.9960878915120993, iteration: 66460
loss: 1.0536128282546997,grad_norm: 0.9695793104445667, iteration: 66461
loss: 1.06478750705719,grad_norm: 0.9999992521875297, iteration: 66462
loss: 0.9808158874511719,grad_norm: 0.8738300348056105, iteration: 66463
loss: 1.0131783485412598,grad_norm: 0.9999991677882544, iteration: 66464
loss: 0.9812535643577576,grad_norm: 0.83525731819681, iteration: 66465
loss: 0.9764777421951294,grad_norm: 0.9797813689974406, iteration: 66466
loss: 1.0128270387649536,grad_norm: 0.852544104751916, iteration: 66467
loss: 0.9307952523231506,grad_norm: 0.9999992679464953, iteration: 66468
loss: 0.9397444725036621,grad_norm: 0.999999092761358, iteration: 66469
loss: 0.9937570095062256,grad_norm: 0.804543368567441, iteration: 66470
loss: 0.9889004230499268,grad_norm: 0.8710952220983901, iteration: 66471
loss: 1.0268207788467407,grad_norm: 0.8906613954905748, iteration: 66472
loss: 1.0196157693862915,grad_norm: 0.9999991618091869, iteration: 66473
loss: 1.015022873878479,grad_norm: 0.9687640981001641, iteration: 66474
loss: 1.0043787956237793,grad_norm: 0.8811177233489257, iteration: 66475
loss: 1.0061322450637817,grad_norm: 0.9160098724404102, iteration: 66476
loss: 1.0361193418502808,grad_norm: 0.999999213055153, iteration: 66477
loss: 0.9920727014541626,grad_norm: 0.9999991288912492, iteration: 66478
loss: 1.0144444704055786,grad_norm: 0.972660601141062, iteration: 66479
loss: 1.012781023979187,grad_norm: 0.9999996182955436, iteration: 66480
loss: 0.9874151349067688,grad_norm: 0.999998989348878, iteration: 66481
loss: 0.9803174734115601,grad_norm: 0.9999991051727767, iteration: 66482
loss: 1.0075434446334839,grad_norm: 0.887069505376975, iteration: 66483
loss: 1.0138262510299683,grad_norm: 0.9999992063052695, iteration: 66484
loss: 1.0174179077148438,grad_norm: 0.7692139087746069, iteration: 66485
loss: 1.0006465911865234,grad_norm: 0.8930681120206323, iteration: 66486
loss: 1.0393645763397217,grad_norm: 0.8953310810914346, iteration: 66487
loss: 0.968143880367279,grad_norm: 0.9999991909940934, iteration: 66488
loss: 1.0430808067321777,grad_norm: 0.99999959761553, iteration: 66489
loss: 1.0045124292373657,grad_norm: 0.9136743138182336, iteration: 66490
loss: 0.9990513324737549,grad_norm: 0.8961119780465986, iteration: 66491
loss: 0.99263995885849,grad_norm: 0.9536565541622334, iteration: 66492
loss: 1.0096248388290405,grad_norm: 0.9999992038167602, iteration: 66493
loss: 0.9959595799446106,grad_norm: 0.9999993717544307, iteration: 66494
loss: 0.9953073263168335,grad_norm: 0.9999992595280891, iteration: 66495
loss: 1.0057464838027954,grad_norm: 0.8744872382066757, iteration: 66496
loss: 0.9617227911949158,grad_norm: 0.9999991003802402, iteration: 66497
loss: 1.0177024602890015,grad_norm: 0.9404464091705684, iteration: 66498
loss: 0.9561026096343994,grad_norm: 0.9135415289630363, iteration: 66499
loss: 0.9811601638793945,grad_norm: 0.9999991751203346, iteration: 66500
loss: 1.0076161623001099,grad_norm: 0.831312645025653, iteration: 66501
loss: 1.0297443866729736,grad_norm: 0.9139367887799553, iteration: 66502
loss: 1.0166505575180054,grad_norm: 0.8946204453197615, iteration: 66503
loss: 1.0164035558700562,grad_norm: 0.99999897417776, iteration: 66504
loss: 1.0140888690948486,grad_norm: 0.9080443284487072, iteration: 66505
loss: 1.0169581174850464,grad_norm: 0.8577016966267501, iteration: 66506
loss: 0.9577893614768982,grad_norm: 0.9691936276814964, iteration: 66507
loss: 1.016244888305664,grad_norm: 0.9999993570729518, iteration: 66508
loss: 0.9941144585609436,grad_norm: 0.9071692781693439, iteration: 66509
loss: 0.9815045595169067,grad_norm: 0.8790848512004871, iteration: 66510
loss: 0.995446503162384,grad_norm: 0.9946660002933224, iteration: 66511
loss: 0.9831147193908691,grad_norm: 0.9717962643891471, iteration: 66512
loss: 0.9799338579177856,grad_norm: 0.999999912428313, iteration: 66513
loss: 0.989233136177063,grad_norm: 0.8800439533929396, iteration: 66514
loss: 1.0100805759429932,grad_norm: 0.9999990490550006, iteration: 66515
loss: 0.9872957468032837,grad_norm: 0.8734757060174181, iteration: 66516
loss: 0.9833523631095886,grad_norm: 0.8272283813156145, iteration: 66517
loss: 1.0852168798446655,grad_norm: 0.9999994282676979, iteration: 66518
loss: 1.0084198713302612,grad_norm: 0.9999990954084648, iteration: 66519
loss: 0.9962961673736572,grad_norm: 0.9044702640046204, iteration: 66520
loss: 0.9773552417755127,grad_norm: 0.9307745638512355, iteration: 66521
loss: 0.9944177865982056,grad_norm: 0.8850345512015655, iteration: 66522
loss: 0.9892100095748901,grad_norm: 0.9158754834816815, iteration: 66523
loss: 1.0219907760620117,grad_norm: 0.8762746116965614, iteration: 66524
loss: 1.0146689414978027,grad_norm: 0.9877489925813427, iteration: 66525
loss: 0.9820655584335327,grad_norm: 0.9999991845732045, iteration: 66526
loss: 1.0021096467971802,grad_norm: 0.7271311074749662, iteration: 66527
loss: 1.010908842086792,grad_norm: 0.9999990826529973, iteration: 66528
loss: 1.1018452644348145,grad_norm: 0.999999217632666, iteration: 66529
loss: 1.0142948627471924,grad_norm: 0.9999991682320514, iteration: 66530
loss: 1.0128613710403442,grad_norm: 0.8929775908956815, iteration: 66531
loss: 1.0010972023010254,grad_norm: 0.9999990310323167, iteration: 66532
loss: 1.0284876823425293,grad_norm: 0.9999996502663805, iteration: 66533
loss: 1.001037836074829,grad_norm: 0.9999991863161396, iteration: 66534
loss: 1.0371037721633911,grad_norm: 0.9441098045265112, iteration: 66535
loss: 1.0212390422821045,grad_norm: 0.9064437938243746, iteration: 66536
loss: 0.9814766049385071,grad_norm: 0.893615675187479, iteration: 66537
loss: 0.9865570068359375,grad_norm: 0.8778588705558995, iteration: 66538
loss: 1.0252795219421387,grad_norm: 0.9999995128398746, iteration: 66539
loss: 0.9851136803627014,grad_norm: 0.8841783108996141, iteration: 66540
loss: 1.0190844535827637,grad_norm: 0.9999991380688045, iteration: 66541
loss: 0.969144344329834,grad_norm: 0.8841917726848538, iteration: 66542
loss: 1.0128673315048218,grad_norm: 0.9999992927257193, iteration: 66543
loss: 0.9698950052261353,grad_norm: 0.9999991036187506, iteration: 66544
loss: 0.9970605969429016,grad_norm: 0.9999990916813959, iteration: 66545
loss: 1.0119636058807373,grad_norm: 0.9999993139431089, iteration: 66546
loss: 0.9859889149665833,grad_norm: 0.8545248289585654, iteration: 66547
loss: 0.9966593980789185,grad_norm: 0.9634221358709717, iteration: 66548
loss: 0.9281461834907532,grad_norm: 0.9017338041818376, iteration: 66549
loss: 1.0033706426620483,grad_norm: 0.9472732463510561, iteration: 66550
loss: 1.0260850191116333,grad_norm: 0.9171246605785431, iteration: 66551
loss: 1.0066543817520142,grad_norm: 0.999999665717372, iteration: 66552
loss: 1.0032811164855957,grad_norm: 0.9533826655133549, iteration: 66553
loss: 1.0167192220687866,grad_norm: 0.9999992327944914, iteration: 66554
loss: 1.0145587921142578,grad_norm: 0.999999139336331, iteration: 66555
loss: 0.9911140203475952,grad_norm: 0.999999230383967, iteration: 66556
loss: 1.0473276376724243,grad_norm: 0.9999991789794498, iteration: 66557
loss: 1.084990382194519,grad_norm: 0.9999991084362214, iteration: 66558
loss: 1.098288655281067,grad_norm: 0.9999996925014718, iteration: 66559
loss: 1.0204883813858032,grad_norm: 0.8253289281933917, iteration: 66560
loss: 1.0907052755355835,grad_norm: 0.9999992758286048, iteration: 66561
loss: 0.999478816986084,grad_norm: 0.9972036463157564, iteration: 66562
loss: 1.004970908164978,grad_norm: 0.8607459725626865, iteration: 66563
loss: 1.027917742729187,grad_norm: 0.9999993128623763, iteration: 66564
loss: 1.0244042873382568,grad_norm: 0.9985835125086967, iteration: 66565
loss: 0.9977144002914429,grad_norm: 0.9422596743831532, iteration: 66566
loss: 0.9847319722175598,grad_norm: 0.964510547176892, iteration: 66567
loss: 0.986656665802002,grad_norm: 0.9515725498848969, iteration: 66568
loss: 0.9458162188529968,grad_norm: 0.9377124643396386, iteration: 66569
loss: 0.9957209229469299,grad_norm: 0.999999374257424, iteration: 66570
loss: 1.0391771793365479,grad_norm: 0.9999994225041016, iteration: 66571
loss: 1.0349533557891846,grad_norm: 0.9999990356616462, iteration: 66572
loss: 1.0114766359329224,grad_norm: 0.8953165043429411, iteration: 66573
loss: 0.9674438238143921,grad_norm: 0.9612197927796888, iteration: 66574
loss: 0.9980114102363586,grad_norm: 0.8788901277018346, iteration: 66575
loss: 0.9803244471549988,grad_norm: 0.9999990316642776, iteration: 66576
loss: 0.9977018237113953,grad_norm: 0.9509455016106416, iteration: 66577
loss: 1.0247151851654053,grad_norm: 0.9999990618204373, iteration: 66578
loss: 1.002347707748413,grad_norm: 0.7720372104101915, iteration: 66579
loss: 1.040341854095459,grad_norm: 0.9999996464741981, iteration: 66580
loss: 0.9893789887428284,grad_norm: 0.9336833123777447, iteration: 66581
loss: 1.0236130952835083,grad_norm: 0.9999992026523578, iteration: 66582
loss: 1.043931245803833,grad_norm: 0.8796966070421871, iteration: 66583
loss: 0.9824551939964294,grad_norm: 0.9999989702893779, iteration: 66584
loss: 1.0294711589813232,grad_norm: 0.9999996045970004, iteration: 66585
loss: 0.9698578715324402,grad_norm: 0.9013145161309306, iteration: 66586
loss: 0.9955469369888306,grad_norm: 0.8099105659893933, iteration: 66587
loss: 0.988445520401001,grad_norm: 0.9999995574806281, iteration: 66588
loss: 1.1014591455459595,grad_norm: 0.9999998633647236, iteration: 66589
loss: 0.9861629009246826,grad_norm: 0.758755937116759, iteration: 66590
loss: 0.9889081716537476,grad_norm: 0.9999992047916201, iteration: 66591
loss: 0.9760879874229431,grad_norm: 0.9014478393268058, iteration: 66592
loss: 1.0016990900039673,grad_norm: 0.9885486957182353, iteration: 66593
loss: 0.9936100244522095,grad_norm: 0.9999990052120301, iteration: 66594
loss: 0.9799846410751343,grad_norm: 0.9999989698031526, iteration: 66595
loss: 1.034397006034851,grad_norm: 0.9999991846840264, iteration: 66596
loss: 1.0609052181243896,grad_norm: 0.9999991468522554, iteration: 66597
loss: 0.9864915609359741,grad_norm: 0.8454702618280844, iteration: 66598
loss: 1.0163195133209229,grad_norm: 0.9999990081727824, iteration: 66599
loss: 0.9915565848350525,grad_norm: 0.999999165638682, iteration: 66600
loss: 0.9773927330970764,grad_norm: 0.999999286242585, iteration: 66601
loss: 1.0262882709503174,grad_norm: 0.8406424104544752, iteration: 66602
loss: 1.00119149684906,grad_norm: 0.9999991419651002, iteration: 66603
loss: 0.9787002205848694,grad_norm: 0.9167474050339857, iteration: 66604
loss: 1.033514142036438,grad_norm: 0.9999989505724904, iteration: 66605
loss: 0.9917826652526855,grad_norm: 0.8970482667667714, iteration: 66606
loss: 0.9932760000228882,grad_norm: 0.9999990030603871, iteration: 66607
loss: 0.9721807837486267,grad_norm: 0.9478812951663175, iteration: 66608
loss: 0.992170512676239,grad_norm: 0.9288020370805243, iteration: 66609
loss: 1.0511029958724976,grad_norm: 0.9999995595041941, iteration: 66610
loss: 1.0554509162902832,grad_norm: 0.9999997724135107, iteration: 66611
loss: 1.013899564743042,grad_norm: 0.8260463116714487, iteration: 66612
loss: 0.9325314164161682,grad_norm: 0.9999990076215058, iteration: 66613
loss: 0.9844778180122375,grad_norm: 0.949607398985345, iteration: 66614
loss: 1.0035204887390137,grad_norm: 0.9374338752966321, iteration: 66615
loss: 0.9998611807823181,grad_norm: 0.9999993791329216, iteration: 66616
loss: 1.0157513618469238,grad_norm: 0.9094350667255259, iteration: 66617
loss: 1.0192638635635376,grad_norm: 0.9999990481833877, iteration: 66618
loss: 1.0400447845458984,grad_norm: 0.9989943943793594, iteration: 66619
loss: 1.1095634698867798,grad_norm: 0.9999999010614923, iteration: 66620
loss: 1.0226207971572876,grad_norm: 0.9999992498397975, iteration: 66621
loss: 1.0412399768829346,grad_norm: 0.9999990365067275, iteration: 66622
loss: 1.045107126235962,grad_norm: 0.8249918007707525, iteration: 66623
loss: 1.0370157957077026,grad_norm: 0.9999991343069171, iteration: 66624
loss: 0.9808059930801392,grad_norm: 0.9999991826863662, iteration: 66625
loss: 1.0381721258163452,grad_norm: 0.9999993317130262, iteration: 66626
loss: 1.0206472873687744,grad_norm: 0.9206545118654941, iteration: 66627
loss: 0.9731519222259521,grad_norm: 0.9999993269934969, iteration: 66628
loss: 0.9882076382637024,grad_norm: 0.9999994318494244, iteration: 66629
loss: 1.0385876893997192,grad_norm: 0.9875680456424756, iteration: 66630
loss: 1.02574622631073,grad_norm: 0.9650114536800201, iteration: 66631
loss: 1.0411204099655151,grad_norm: 0.7524251664092693, iteration: 66632
loss: 1.0200071334838867,grad_norm: 0.9999993621952035, iteration: 66633
loss: 1.0185892581939697,grad_norm: 0.8983236168233679, iteration: 66634
loss: 1.0166646242141724,grad_norm: 0.8520083932153971, iteration: 66635
loss: 1.00960111618042,grad_norm: 0.9656999569784129, iteration: 66636
loss: 0.9957054257392883,grad_norm: 0.9999992153806753, iteration: 66637
loss: 1.1068168878555298,grad_norm: 0.9999992557549746, iteration: 66638
loss: 0.9653039574623108,grad_norm: 0.9999989157199481, iteration: 66639
loss: 1.0219162702560425,grad_norm: 0.9999999378882203, iteration: 66640
loss: 1.0741534233093262,grad_norm: 0.9491525245004958, iteration: 66641
loss: 0.9967028498649597,grad_norm: 0.8377695949047258, iteration: 66642
loss: 1.010421872138977,grad_norm: 0.9656969100156658, iteration: 66643
loss: 1.0065675973892212,grad_norm: 0.9236126345150137, iteration: 66644
loss: 1.052348256111145,grad_norm: 0.9999994465428655, iteration: 66645
loss: 1.180324673652649,grad_norm: 0.9999990777409326, iteration: 66646
loss: 1.0361204147338867,grad_norm: 0.9999994431058566, iteration: 66647
loss: 0.9826004505157471,grad_norm: 0.9757987150129747, iteration: 66648
loss: 0.9840863943099976,grad_norm: 0.8378587423425166, iteration: 66649
loss: 0.9987138509750366,grad_norm: 0.99999950654509, iteration: 66650
loss: 1.0042169094085693,grad_norm: 0.8918869550261983, iteration: 66651
loss: 1.0105117559432983,grad_norm: 0.7718680365770225, iteration: 66652
loss: 0.9911766052246094,grad_norm: 0.9999994063554909, iteration: 66653
loss: 0.9747375249862671,grad_norm: 0.7853493770058676, iteration: 66654
loss: 1.0639433860778809,grad_norm: 0.9999992569777327, iteration: 66655
loss: 1.0120218992233276,grad_norm: 0.9999991515747714, iteration: 66656
loss: 1.0304911136627197,grad_norm: 0.999999299158478, iteration: 66657
loss: 0.9900720119476318,grad_norm: 0.8183665738006961, iteration: 66658
loss: 1.0083940029144287,grad_norm: 0.9999993521728758, iteration: 66659
loss: 1.2008165121078491,grad_norm: 0.9999992048770064, iteration: 66660
loss: 0.9843641519546509,grad_norm: 0.9999991582918799, iteration: 66661
loss: 1.0012598037719727,grad_norm: 0.8653336885402882, iteration: 66662
loss: 1.118852138519287,grad_norm: 0.9999992436361017, iteration: 66663
loss: 1.0331337451934814,grad_norm: 0.9999996255497181, iteration: 66664
loss: 0.9897297620773315,grad_norm: 0.9205305205822292, iteration: 66665
loss: 0.9736476540565491,grad_norm: 0.9880047910719939, iteration: 66666
loss: 1.1565595865249634,grad_norm: 0.9999994899886929, iteration: 66667
loss: 1.0191363096237183,grad_norm: 0.9999995856638797, iteration: 66668
loss: 1.0098658800125122,grad_norm: 0.9795444783341011, iteration: 66669
loss: 1.0795742273330688,grad_norm: 0.9999998099338081, iteration: 66670
loss: 1.0059181451797485,grad_norm: 0.9999992450717223, iteration: 66671
loss: 0.9820532202720642,grad_norm: 0.8479478383409434, iteration: 66672
loss: 1.0287531614303589,grad_norm: 0.95759293242694, iteration: 66673
loss: 1.0037269592285156,grad_norm: 0.9556155292948383, iteration: 66674
loss: 1.0005429983139038,grad_norm: 0.8915370056879206, iteration: 66675
loss: 1.0335657596588135,grad_norm: 0.8661655185562722, iteration: 66676
loss: 1.0507901906967163,grad_norm: 0.9153081786841081, iteration: 66677
loss: 1.0534836053848267,grad_norm: 0.9999990567160022, iteration: 66678
loss: 1.076499581336975,grad_norm: 0.9999993047303989, iteration: 66679
loss: 1.0458697080612183,grad_norm: 0.9999998812000509, iteration: 66680
loss: 1.0306065082550049,grad_norm: 0.9999993887090731, iteration: 66681
loss: 1.000319242477417,grad_norm: 0.9969282559445187, iteration: 66682
loss: 1.0045548677444458,grad_norm: 0.9999995516523682, iteration: 66683
loss: 1.0307201147079468,grad_norm: 0.9999991510076301, iteration: 66684
loss: 0.9936670064926147,grad_norm: 0.9999993366529046, iteration: 66685
loss: 0.993769645690918,grad_norm: 0.8827136446160962, iteration: 66686
loss: 1.0565764904022217,grad_norm: 0.9999995669572088, iteration: 66687
loss: 1.0222198963165283,grad_norm: 0.9999991054889356, iteration: 66688
loss: 0.977522075176239,grad_norm: 0.8826819417302021, iteration: 66689
loss: 1.0047434568405151,grad_norm: 0.92302888089284, iteration: 66690
loss: 0.9829402565956116,grad_norm: 0.8597827095366226, iteration: 66691
loss: 0.9834563732147217,grad_norm: 0.7468518744011899, iteration: 66692
loss: 1.0049800872802734,grad_norm: 0.9544795396819445, iteration: 66693
loss: 1.0452167987823486,grad_norm: 0.9896722196962154, iteration: 66694
loss: 1.0204577445983887,grad_norm: 0.8078027791411881, iteration: 66695
loss: 1.02994704246521,grad_norm: 0.9999991431691704, iteration: 66696
loss: 1.048003077507019,grad_norm: 0.9441467750360447, iteration: 66697
loss: 0.9902900457382202,grad_norm: 0.9549190852269424, iteration: 66698
loss: 0.9661504030227661,grad_norm: 0.9564033160616762, iteration: 66699
loss: 1.0118014812469482,grad_norm: 0.9999991781373656, iteration: 66700
loss: 0.9936039447784424,grad_norm: 0.9395600127349057, iteration: 66701
loss: 0.9957578778266907,grad_norm: 0.999999183045126, iteration: 66702
loss: 1.0184327363967896,grad_norm: 0.999999524544719, iteration: 66703
loss: 1.0842266082763672,grad_norm: 0.9999998867117387, iteration: 66704
loss: 0.9975388646125793,grad_norm: 0.999999206752859, iteration: 66705
loss: 1.02869713306427,grad_norm: 0.9999990517465412, iteration: 66706
loss: 1.0074845552444458,grad_norm: 0.9999997940370733, iteration: 66707
loss: 1.0093733072280884,grad_norm: 0.8859148130274667, iteration: 66708
loss: 1.0124403238296509,grad_norm: 0.9999991041156667, iteration: 66709
loss: 0.9956063032150269,grad_norm: 0.9999993202020188, iteration: 66710
loss: 1.0061298608779907,grad_norm: 0.915939737930921, iteration: 66711
loss: 1.0011932849884033,grad_norm: 0.8737555294876018, iteration: 66712
loss: 1.0280641317367554,grad_norm: 0.8623324137087043, iteration: 66713
loss: 1.0221492052078247,grad_norm: 0.8030864285397591, iteration: 66714
loss: 1.0238356590270996,grad_norm: 0.9999992511598381, iteration: 66715
loss: 0.9856544137001038,grad_norm: 0.8144438450909949, iteration: 66716
loss: 1.0505328178405762,grad_norm: 0.9999992713053448, iteration: 66717
loss: 1.0083658695220947,grad_norm: 0.9999990073416147, iteration: 66718
loss: 1.0104236602783203,grad_norm: 0.9999994005600273, iteration: 66719
loss: 1.0101960897445679,grad_norm: 0.9999992650615925, iteration: 66720
loss: 0.9782047271728516,grad_norm: 0.9999991038116823, iteration: 66721
loss: 1.036179780960083,grad_norm: 0.9999990507864084, iteration: 66722
loss: 0.9621745347976685,grad_norm: 0.898751391250257, iteration: 66723
loss: 1.1459436416625977,grad_norm: 0.9999999758409172, iteration: 66724
loss: 0.980802059173584,grad_norm: 0.8673422576052359, iteration: 66725
loss: 0.9671024084091187,grad_norm: 0.9999990695392571, iteration: 66726
loss: 1.0363149642944336,grad_norm: 0.9052653319881941, iteration: 66727
loss: 1.0048377513885498,grad_norm: 0.9999989806016572, iteration: 66728
loss: 0.9970284104347229,grad_norm: 0.9999991180433137, iteration: 66729
loss: 1.016098976135254,grad_norm: 0.999999927285783, iteration: 66730
loss: 0.9711405038833618,grad_norm: 0.9999992546564591, iteration: 66731
loss: 1.0153433084487915,grad_norm: 0.8477793541441483, iteration: 66732
loss: 1.0228031873703003,grad_norm: 0.8963347477919785, iteration: 66733
loss: 0.9952499270439148,grad_norm: 0.97315078468686, iteration: 66734
loss: 1.0099629163742065,grad_norm: 0.8707351702887014, iteration: 66735
loss: 1.0045298337936401,grad_norm: 0.9719385995478591, iteration: 66736
loss: 0.9933084845542908,grad_norm: 0.8557737330439819, iteration: 66737
loss: 1.0080407857894897,grad_norm: 0.8193007271044525, iteration: 66738
loss: 1.022092580795288,grad_norm: 0.8402336130967751, iteration: 66739
loss: 1.0159306526184082,grad_norm: 0.9999993909407134, iteration: 66740
loss: 0.973703145980835,grad_norm: 0.9364506066549053, iteration: 66741
loss: 1.0495216846466064,grad_norm: 0.9229665902409958, iteration: 66742
loss: 1.0411832332611084,grad_norm: 0.8576495106687002, iteration: 66743
loss: 1.0159053802490234,grad_norm: 0.842053516460203, iteration: 66744
loss: 0.9978446364402771,grad_norm: 0.8755430267922234, iteration: 66745
loss: 0.97507244348526,grad_norm: 0.9510500461032794, iteration: 66746
loss: 1.0223243236541748,grad_norm: 0.9999991638156799, iteration: 66747
loss: 1.022321105003357,grad_norm: 0.9999991427435337, iteration: 66748
loss: 1.0169293880462646,grad_norm: 0.8911836681334198, iteration: 66749
loss: 1.0150514841079712,grad_norm: 0.8523995649220699, iteration: 66750
loss: 0.9964882731437683,grad_norm: 0.8572803042536457, iteration: 66751
loss: 0.9794683456420898,grad_norm: 0.9738286673247111, iteration: 66752
loss: 1.0217622518539429,grad_norm: 0.9219651244247711, iteration: 66753
loss: 0.989593505859375,grad_norm: 0.9294679070997802, iteration: 66754
loss: 1.0072219371795654,grad_norm: 0.9499378849096856, iteration: 66755
loss: 0.9652644395828247,grad_norm: 0.9999991445361093, iteration: 66756
loss: 0.9848119020462036,grad_norm: 0.9999990099672621, iteration: 66757
loss: 0.9778825640678406,grad_norm: 0.9999990936005397, iteration: 66758
loss: 1.0005306005477905,grad_norm: 0.9231553723549154, iteration: 66759
loss: 1.031535029411316,grad_norm: 0.9086569797196805, iteration: 66760
loss: 1.0065255165100098,grad_norm: 0.7645221567688484, iteration: 66761
loss: 1.0080476999282837,grad_norm: 0.9999991803204415, iteration: 66762
loss: 1.0045572519302368,grad_norm: 0.8615262734462512, iteration: 66763
loss: 1.0112786293029785,grad_norm: 0.8691019823023493, iteration: 66764
loss: 0.9965417385101318,grad_norm: 0.999999877207025, iteration: 66765
loss: 1.0258004665374756,grad_norm: 0.8396547506115243, iteration: 66766
loss: 1.0315366983413696,grad_norm: 0.9999990740217789, iteration: 66767
loss: 0.9786340594291687,grad_norm: 0.8132412469949959, iteration: 66768
loss: 0.9879642724990845,grad_norm: 0.7432736079103128, iteration: 66769
loss: 0.9612694382667542,grad_norm: 0.8957295490212186, iteration: 66770
loss: 0.9835981726646423,grad_norm: 0.981988573840952, iteration: 66771
loss: 0.9969661235809326,grad_norm: 0.9999989508799733, iteration: 66772
loss: 0.9985971450805664,grad_norm: 0.9999990627873917, iteration: 66773
loss: 1.0715569257736206,grad_norm: 0.8910877995854424, iteration: 66774
loss: 1.0166049003601074,grad_norm: 0.7960659838255212, iteration: 66775
loss: 1.036782145500183,grad_norm: 0.9218464780900515, iteration: 66776
loss: 1.002048134803772,grad_norm: 0.9242365910605895, iteration: 66777
loss: 0.9940225481987,grad_norm: 0.8967671754602968, iteration: 66778
loss: 1.001781940460205,grad_norm: 0.9999991270548992, iteration: 66779
loss: 0.9782756567001343,grad_norm: 0.8216746644016442, iteration: 66780
loss: 1.0017273426055908,grad_norm: 0.9923437600640509, iteration: 66781
loss: 1.0692098140716553,grad_norm: 0.9627552445200761, iteration: 66782
loss: 1.006666660308838,grad_norm: 0.8099925670145405, iteration: 66783
loss: 0.9883294701576233,grad_norm: 0.9850134502506139, iteration: 66784
loss: 1.153538465499878,grad_norm: 0.9999996313304229, iteration: 66785
loss: 1.0284183025360107,grad_norm: 0.99999952089992, iteration: 66786
loss: 0.9996163845062256,grad_norm: 0.9999997355566442, iteration: 66787
loss: 1.001346468925476,grad_norm: 0.7802065199229551, iteration: 66788
loss: 0.9709974527359009,grad_norm: 0.8869410199558917, iteration: 66789
loss: 1.0184632539749146,grad_norm: 0.9999993269146642, iteration: 66790
loss: 0.9704427123069763,grad_norm: 0.999999679067974, iteration: 66791
loss: 1.0740857124328613,grad_norm: 0.9856578365890566, iteration: 66792
loss: 0.9844018220901489,grad_norm: 0.9164213796782593, iteration: 66793
loss: 1.054486632347107,grad_norm: 0.9550535475153422, iteration: 66794
loss: 0.9775898456573486,grad_norm: 0.999999089436784, iteration: 66795
loss: 0.9854162931442261,grad_norm: 0.9599770636322323, iteration: 66796
loss: 1.0135302543640137,grad_norm: 0.892723316668434, iteration: 66797
loss: 0.9936153888702393,grad_norm: 0.9286621900055045, iteration: 66798
loss: 0.978585422039032,grad_norm: 0.9999993303459334, iteration: 66799
loss: 0.9610518217086792,grad_norm: 0.9423517529845736, iteration: 66800
loss: 0.9947035908699036,grad_norm: 0.9573856314197259, iteration: 66801
loss: 1.0289386510849,grad_norm: 0.881287193114086, iteration: 66802
loss: 0.9982244372367859,grad_norm: 0.8876828798275763, iteration: 66803
loss: 1.030003309249878,grad_norm: 0.9196131638627411, iteration: 66804
loss: 1.034972071647644,grad_norm: 0.9999995554921848, iteration: 66805
loss: 1.0815503597259521,grad_norm: 0.9999993870766806, iteration: 66806
loss: 0.9746893644332886,grad_norm: 0.9999990908808118, iteration: 66807
loss: 0.9779332280158997,grad_norm: 0.8293844977397068, iteration: 66808
loss: 1.0469114780426025,grad_norm: 0.9999994380288604, iteration: 66809
loss: 0.9831998944282532,grad_norm: 0.9885252950141329, iteration: 66810
loss: 1.0164422988891602,grad_norm: 0.8622692667377926, iteration: 66811
loss: 0.9896073341369629,grad_norm: 0.9999990545666658, iteration: 66812
loss: 1.0244288444519043,grad_norm: 0.9552267462006774, iteration: 66813
loss: 0.9928321242332458,grad_norm: 0.9312972900946038, iteration: 66814
loss: 1.0235170125961304,grad_norm: 0.999999248019368, iteration: 66815
loss: 1.0033702850341797,grad_norm: 0.8726873837777954, iteration: 66816
loss: 0.9950904250144958,grad_norm: 0.9226925770536003, iteration: 66817
loss: 1.0335856676101685,grad_norm: 0.9999992177407026, iteration: 66818
loss: 1.0187230110168457,grad_norm: 0.8754006064374662, iteration: 66819
loss: 0.9904931783676147,grad_norm: 0.9999992156778805, iteration: 66820
loss: 0.9595362544059753,grad_norm: 0.9464966684436268, iteration: 66821
loss: 1.0193580389022827,grad_norm: 0.8978866915590104, iteration: 66822
loss: 0.9790284037590027,grad_norm: 0.9999989907869367, iteration: 66823
loss: 0.9922279119491577,grad_norm: 0.9999990563995359, iteration: 66824
loss: 1.0299710035324097,grad_norm: 0.9481846909495165, iteration: 66825
loss: 0.9789520502090454,grad_norm: 0.94557827065257, iteration: 66826
loss: 1.0012344121932983,grad_norm: 0.9029111640694493, iteration: 66827
loss: 0.9825910329818726,grad_norm: 0.8776170035218339, iteration: 66828
loss: 0.9692728519439697,grad_norm: 0.8720982338580134, iteration: 66829
loss: 1.0787829160690308,grad_norm: 0.9999998454277486, iteration: 66830
loss: 0.9775031208992004,grad_norm: 0.7921878982284396, iteration: 66831
loss: 1.0378130674362183,grad_norm: 0.9999991415186645, iteration: 66832
loss: 1.043352723121643,grad_norm: 0.8638069448893692, iteration: 66833
loss: 1.0278247594833374,grad_norm: 0.9575364231509668, iteration: 66834
loss: 0.9856597185134888,grad_norm: 0.9238367380204439, iteration: 66835
loss: 1.0208826065063477,grad_norm: 0.9596116007625318, iteration: 66836
loss: 0.978696346282959,grad_norm: 0.9999992054328372, iteration: 66837
loss: 1.005849003791809,grad_norm: 0.891180706934342, iteration: 66838
loss: 1.0341049432754517,grad_norm: 0.9415747051365825, iteration: 66839
loss: 0.9978472590446472,grad_norm: 0.766525112272793, iteration: 66840
loss: 0.9973544478416443,grad_norm: 0.8023660675897063, iteration: 66841
loss: 0.9810235500335693,grad_norm: 0.9999992938929889, iteration: 66842
loss: 1.01340913772583,grad_norm: 0.9999995157413308, iteration: 66843
loss: 1.0171481370925903,grad_norm: 0.9999997567488005, iteration: 66844
loss: 1.1157617568969727,grad_norm: 0.9999993600646805, iteration: 66845
loss: 1.0193558931350708,grad_norm: 0.9999991220452833, iteration: 66846
loss: 1.001754879951477,grad_norm: 0.913263166259055, iteration: 66847
loss: 0.9671945571899414,grad_norm: 0.9393415798583393, iteration: 66848
loss: 0.9937373399734497,grad_norm: 0.8807670899249884, iteration: 66849
loss: 1.0246859788894653,grad_norm: 0.9999996956418667, iteration: 66850
loss: 1.030255675315857,grad_norm: 0.8118098714168, iteration: 66851
loss: 0.9975569248199463,grad_norm: 0.8349127548385843, iteration: 66852
loss: 1.0103240013122559,grad_norm: 0.8914048983527568, iteration: 66853
loss: 0.9741807579994202,grad_norm: 0.9542295615025731, iteration: 66854
loss: 0.9988203048706055,grad_norm: 0.7433485303966637, iteration: 66855
loss: 1.0591552257537842,grad_norm: 0.9999992442694768, iteration: 66856
loss: 1.011873483657837,grad_norm: 0.9123947289919526, iteration: 66857
loss: 0.9633532166481018,grad_norm: 0.9517938870700905, iteration: 66858
loss: 1.0031778812408447,grad_norm: 0.9999997312861161, iteration: 66859
loss: 0.9874150156974792,grad_norm: 0.8889202982093654, iteration: 66860
loss: 0.9793936610221863,grad_norm: 0.8831475968790305, iteration: 66861
loss: 1.0007071495056152,grad_norm: 0.9600143832042488, iteration: 66862
loss: 1.0543545484542847,grad_norm: 0.9999995015029871, iteration: 66863
loss: 0.9981256723403931,grad_norm: 0.9811699656064387, iteration: 66864
loss: 1.0074917078018188,grad_norm: 0.8399410218946032, iteration: 66865
loss: 1.0387827157974243,grad_norm: 0.999999028483033, iteration: 66866
loss: 1.0344840288162231,grad_norm: 0.9999990408473461, iteration: 66867
loss: 1.0928329229354858,grad_norm: 0.9999993967476022, iteration: 66868
loss: 0.9934780597686768,grad_norm: 0.9999990271838108, iteration: 66869
loss: 0.9916951060295105,grad_norm: 0.9639235444311244, iteration: 66870
loss: 1.0497568845748901,grad_norm: 0.9999990559609351, iteration: 66871
loss: 0.9818012118339539,grad_norm: 0.8416088684125859, iteration: 66872
loss: 0.9923198223114014,grad_norm: 0.9434610219947385, iteration: 66873
loss: 1.0507761240005493,grad_norm: 0.8982573850658101, iteration: 66874
loss: 0.9997274279594421,grad_norm: 0.9999989843243642, iteration: 66875
loss: 0.9931507706642151,grad_norm: 0.8341628513501401, iteration: 66876
loss: 0.9914044737815857,grad_norm: 0.8513906938557422, iteration: 66877
loss: 1.025709867477417,grad_norm: 0.9431719935792455, iteration: 66878
loss: 1.0293830633163452,grad_norm: 0.9999998231975704, iteration: 66879
loss: 1.0300371646881104,grad_norm: 0.9676712320751646, iteration: 66880
loss: 1.0213803052902222,grad_norm: 0.9999997123853133, iteration: 66881
loss: 1.031997561454773,grad_norm: 0.7380165309777386, iteration: 66882
loss: 1.0037304162979126,grad_norm: 0.8281609181053767, iteration: 66883
loss: 1.0490280389785767,grad_norm: 0.9861686678872906, iteration: 66884
loss: 1.0599782466888428,grad_norm: 0.9999994209323966, iteration: 66885
loss: 1.0008710622787476,grad_norm: 0.9999991869896967, iteration: 66886
loss: 0.9799007177352905,grad_norm: 0.9033039233504496, iteration: 66887
loss: 1.0132495164871216,grad_norm: 0.9522535889806487, iteration: 66888
loss: 1.1344858407974243,grad_norm: 0.9999997626760312, iteration: 66889
loss: 1.066373586654663,grad_norm: 0.9999989237665529, iteration: 66890
loss: 1.0170255899429321,grad_norm: 0.9999994790294933, iteration: 66891
loss: 1.014994740486145,grad_norm: 0.9999990276307786, iteration: 66892
loss: 1.0875132083892822,grad_norm: 0.892003140361393, iteration: 66893
loss: 1.0772169828414917,grad_norm: 0.9999991688066909, iteration: 66894
loss: 1.077901840209961,grad_norm: 0.9999996392373145, iteration: 66895
loss: 0.9841227531433105,grad_norm: 0.999999176932658, iteration: 66896
loss: 1.010491967201233,grad_norm: 0.9999998226045957, iteration: 66897
loss: 1.0732189416885376,grad_norm: 0.9999997287319577, iteration: 66898
loss: 1.0062655210494995,grad_norm: 0.9658009175050545, iteration: 66899
loss: 1.1443157196044922,grad_norm: 0.9999996024923778, iteration: 66900
loss: 1.0165455341339111,grad_norm: 0.9999991591341786, iteration: 66901
loss: 1.0518641471862793,grad_norm: 0.9999991458065631, iteration: 66902
loss: 1.0674477815628052,grad_norm: 0.9999990292494767, iteration: 66903
loss: 1.047157883644104,grad_norm: 0.9999991283618966, iteration: 66904
loss: 0.9732092022895813,grad_norm: 0.9396774109896221, iteration: 66905
loss: 1.0368051528930664,grad_norm: 0.9999992150835347, iteration: 66906
loss: 1.007767915725708,grad_norm: 0.9999992537669964, iteration: 66907
loss: 1.0484751462936401,grad_norm: 0.9952860267738307, iteration: 66908
loss: 1.0367919206619263,grad_norm: 0.9999991432860126, iteration: 66909
loss: 1.0046614408493042,grad_norm: 0.7797111904295686, iteration: 66910
loss: 1.0955039262771606,grad_norm: 0.9999991379864759, iteration: 66911
loss: 1.0847325325012207,grad_norm: 0.9999995846690343, iteration: 66912
loss: 1.0288879871368408,grad_norm: 0.9999993885029167, iteration: 66913
loss: 0.9989576935768127,grad_norm: 0.9477165614869694, iteration: 66914
loss: 1.192432165145874,grad_norm: 0.9999990415507259, iteration: 66915
loss: 1.015308141708374,grad_norm: 0.9565260566173975, iteration: 66916
loss: 1.014504313468933,grad_norm: 0.9999994576141515, iteration: 66917
loss: 1.0588493347167969,grad_norm: 0.9999992184989164, iteration: 66918
loss: 1.0211577415466309,grad_norm: 0.9762964393452251, iteration: 66919
loss: 1.0043277740478516,grad_norm: 0.9999992081425902, iteration: 66920
loss: 1.0281023979187012,grad_norm: 0.999999178489915, iteration: 66921
loss: 1.0358238220214844,grad_norm: 0.9999992315941603, iteration: 66922
loss: 1.098242163658142,grad_norm: 0.9999991405629955, iteration: 66923
loss: 1.0302252769470215,grad_norm: 0.9496827997544056, iteration: 66924
loss: 1.0449965000152588,grad_norm: 0.9133648178276278, iteration: 66925
loss: 1.2648330926895142,grad_norm: 0.9999992457252967, iteration: 66926
loss: 1.035193681716919,grad_norm: 0.9999991402660924, iteration: 66927
loss: 1.028475046157837,grad_norm: 0.9370858195814254, iteration: 66928
loss: 1.1536622047424316,grad_norm: 0.8541350894303577, iteration: 66929
loss: 0.9624894261360168,grad_norm: 0.8500472464004646, iteration: 66930
loss: 1.0373455286026,grad_norm: 0.9285415194892457, iteration: 66931
loss: 1.0371158123016357,grad_norm: 0.9326369385427361, iteration: 66932
loss: 0.9957212209701538,grad_norm: 0.8429595109721054, iteration: 66933
loss: 1.0158436298370361,grad_norm: 0.98944530244713, iteration: 66934
loss: 0.990208625793457,grad_norm: 0.9999996332786112, iteration: 66935
loss: 1.0241292715072632,grad_norm: 0.9999995597723208, iteration: 66936
loss: 1.081192970275879,grad_norm: 0.9999997790881061, iteration: 66937
loss: 0.9913833737373352,grad_norm: 0.8961426551950599, iteration: 66938
loss: 1.0206360816955566,grad_norm: 0.9999991952052438, iteration: 66939
loss: 1.0361436605453491,grad_norm: 0.9999990660671968, iteration: 66940
loss: 1.0144809484481812,grad_norm: 0.9999990335094476, iteration: 66941
loss: 1.0404788255691528,grad_norm: 0.9999991047126777, iteration: 66942
loss: 1.128219723701477,grad_norm: 0.9999994049101157, iteration: 66943
loss: 1.063588261604309,grad_norm: 0.9999995699792562, iteration: 66944
loss: 1.0607832670211792,grad_norm: 0.9372379149442991, iteration: 66945
loss: 1.0997370481491089,grad_norm: 0.9999993174653461, iteration: 66946
loss: 1.0353705883026123,grad_norm: 0.9848458435763248, iteration: 66947
loss: 1.036643624305725,grad_norm: 0.9999993199411413, iteration: 66948
loss: 0.9854933619499207,grad_norm: 0.8357778823844797, iteration: 66949
loss: 1.0243412256240845,grad_norm: 0.9999991060240501, iteration: 66950
loss: 1.0274019241333008,grad_norm: 0.8464735793897614, iteration: 66951
loss: 1.003052830696106,grad_norm: 0.9999990307349966, iteration: 66952
loss: 1.0511407852172852,grad_norm: 0.9322126871743939, iteration: 66953
loss: 1.113576054573059,grad_norm: 0.9999995134585712, iteration: 66954
loss: 1.0271618366241455,grad_norm: 0.9999991459396095, iteration: 66955
loss: 1.13128662109375,grad_norm: 0.9999994722879142, iteration: 66956
loss: 0.9719622731208801,grad_norm: 0.9950202773533284, iteration: 66957
loss: 1.0364474058151245,grad_norm: 0.9999991646124035, iteration: 66958
loss: 1.0715066194534302,grad_norm: 0.9388942619938063, iteration: 66959
loss: 1.1158742904663086,grad_norm: 0.9999996690444615, iteration: 66960
loss: 0.9915162920951843,grad_norm: 0.9060368639255613, iteration: 66961
loss: 1.1378859281539917,grad_norm: 0.9999993171032031, iteration: 66962
loss: 1.038198709487915,grad_norm: 0.999999109190866, iteration: 66963
loss: 0.9836711287498474,grad_norm: 0.9686428172781963, iteration: 66964
loss: 0.9798303842544556,grad_norm: 0.8813584468881075, iteration: 66965
loss: 1.0188062191009521,grad_norm: 0.75383836701725, iteration: 66966
loss: 1.035763144493103,grad_norm: 0.8784172329637017, iteration: 66967
loss: 1.0235910415649414,grad_norm: 0.9999999033674946, iteration: 66968
loss: 0.9994999170303345,grad_norm: 0.9999993457533186, iteration: 66969
loss: 1.048386573791504,grad_norm: 0.8211801180734575, iteration: 66970
loss: 1.0463663339614868,grad_norm: 0.9999992474442038, iteration: 66971
loss: 1.0108031034469604,grad_norm: 0.7826603240219183, iteration: 66972
loss: 1.0525301694869995,grad_norm: 0.9999999348248706, iteration: 66973
loss: 1.010629415512085,grad_norm: 0.9999989899312096, iteration: 66974
loss: 1.0617623329162598,grad_norm: 0.8536289377733546, iteration: 66975
loss: 1.0120283365249634,grad_norm: 0.9999997914496409, iteration: 66976
loss: 1.07744300365448,grad_norm: 0.999999690154978, iteration: 66977
loss: 0.9984854459762573,grad_norm: 0.9282802205770553, iteration: 66978
loss: 1.029734492301941,grad_norm: 0.9999991958291838, iteration: 66979
loss: 0.9908895492553711,grad_norm: 0.9999992327233661, iteration: 66980
loss: 1.0159293413162231,grad_norm: 0.9999991520995221, iteration: 66981
loss: 1.3460787534713745,grad_norm: 0.9999998560433845, iteration: 66982
loss: 1.0633423328399658,grad_norm: 0.9999992130437618, iteration: 66983
loss: 1.0556389093399048,grad_norm: 0.9854098480790984, iteration: 66984
loss: 1.061097264289856,grad_norm: 0.8897237926318383, iteration: 66985
loss: 1.2788137197494507,grad_norm: 0.9999998014065742, iteration: 66986
loss: 1.0077571868896484,grad_norm: 0.9848065381535275, iteration: 66987
loss: 1.0373791456222534,grad_norm: 0.9878039168668448, iteration: 66988
loss: 1.1540827751159668,grad_norm: 0.9999996766840353, iteration: 66989
loss: 1.0660861730575562,grad_norm: 0.9999998258375762, iteration: 66990
loss: 1.0999301671981812,grad_norm: 0.9999996635496654, iteration: 66991
loss: 1.0648382902145386,grad_norm: 0.9999992873095477, iteration: 66992
loss: 1.0154656171798706,grad_norm: 0.9578964116135088, iteration: 66993
loss: 1.0888090133666992,grad_norm: 0.9999990921575954, iteration: 66994
loss: 1.097132921218872,grad_norm: 0.9999990145835652, iteration: 66995
loss: 1.0273511409759521,grad_norm: 0.9999996233915335, iteration: 66996
loss: 1.0095688104629517,grad_norm: 0.9999991958418251, iteration: 66997
loss: 0.9868422746658325,grad_norm: 0.9999990882873627, iteration: 66998
loss: 0.9984638094902039,grad_norm: 0.9984968630733745, iteration: 66999
loss: 1.077298641204834,grad_norm: 0.9999997883177596, iteration: 67000
loss: 1.0187267065048218,grad_norm: 0.9537995451011158, iteration: 67001
loss: 1.0823537111282349,grad_norm: 0.9208636180648648, iteration: 67002
loss: 0.9951024651527405,grad_norm: 0.8114032842805465, iteration: 67003
loss: 1.0337730646133423,grad_norm: 0.9999990804080858, iteration: 67004
loss: 1.0606340169906616,grad_norm: 0.9999996240308398, iteration: 67005
loss: 1.1167476177215576,grad_norm: 0.9999994311089786, iteration: 67006
loss: 1.051667332649231,grad_norm: 0.9999999647280814, iteration: 67007
loss: 1.0051965713500977,grad_norm: 0.9999992581656826, iteration: 67008
loss: 0.988312840461731,grad_norm: 0.9999990113810617, iteration: 67009
loss: 1.1086184978485107,grad_norm: 0.9999991402783116, iteration: 67010
loss: 1.085286259651184,grad_norm: 0.999999314893425, iteration: 67011
loss: 1.1265939474105835,grad_norm: 1.0000000088030558, iteration: 67012
loss: 1.0551804304122925,grad_norm: 0.9999993331600086, iteration: 67013
loss: 1.006386399269104,grad_norm: 0.9644156279918635, iteration: 67014
loss: 1.040223240852356,grad_norm: 0.9999990324534568, iteration: 67015
loss: 1.1762596368789673,grad_norm: 0.9999991167324253, iteration: 67016
loss: 1.004342794418335,grad_norm: 0.999999247518904, iteration: 67017
loss: 0.9828707575798035,grad_norm: 0.9999989980559203, iteration: 67018
loss: 1.0185267925262451,grad_norm: 0.9999994417110565, iteration: 67019
loss: 1.0060712099075317,grad_norm: 0.7972550799974295, iteration: 67020
loss: 1.0029923915863037,grad_norm: 0.9999989914068378, iteration: 67021
loss: 1.128381609916687,grad_norm: 0.9999994969733169, iteration: 67022
loss: 1.0372997522354126,grad_norm: 0.9999990962055104, iteration: 67023
loss: 1.0264989137649536,grad_norm: 0.9999995960203857, iteration: 67024
loss: 1.0023064613342285,grad_norm: 0.9999993697980175, iteration: 67025
loss: 0.9994562864303589,grad_norm: 0.8755236540680371, iteration: 67026
loss: 0.988454282283783,grad_norm: 0.9043805468514111, iteration: 67027
loss: 1.0608642101287842,grad_norm: 0.9999996328746693, iteration: 67028
loss: 1.0117472410202026,grad_norm: 0.8972279958882565, iteration: 67029
loss: 1.008279800415039,grad_norm: 0.9411027134012866, iteration: 67030
loss: 1.001563549041748,grad_norm: 0.920502454848519, iteration: 67031
loss: 1.0679482221603394,grad_norm: 0.9999996932170988, iteration: 67032
loss: 1.0157874822616577,grad_norm: 0.8511747469129362, iteration: 67033
loss: 1.1937212944030762,grad_norm: 0.9999991577188645, iteration: 67034
loss: 1.0659903287887573,grad_norm: 0.999999257432672, iteration: 67035
loss: 0.9817648530006409,grad_norm: 0.8873675039351443, iteration: 67036
loss: 0.9817335605621338,grad_norm: 0.841653650982048, iteration: 67037
loss: 0.9530947804450989,grad_norm: 0.9999998123874965, iteration: 67038
loss: 1.0568695068359375,grad_norm: 0.9999992524851652, iteration: 67039
loss: 1.0190483331680298,grad_norm: 0.9999992406259689, iteration: 67040
loss: 1.0089362859725952,grad_norm: 0.935024599574925, iteration: 67041
loss: 1.0260746479034424,grad_norm: 0.9999993586115155, iteration: 67042
loss: 1.0088748931884766,grad_norm: 0.999998997378242, iteration: 67043
loss: 1.0167769193649292,grad_norm: 0.9209231361026421, iteration: 67044
loss: 0.9994534850120544,grad_norm: 0.9999952260687537, iteration: 67045
loss: 0.9895727038383484,grad_norm: 0.9999990507357139, iteration: 67046
loss: 1.0041089057922363,grad_norm: 0.9227946676354768, iteration: 67047
loss: 1.0006781816482544,grad_norm: 0.999999197977706, iteration: 67048
loss: 1.0551875829696655,grad_norm: 0.9999990399433422, iteration: 67049
loss: 1.026610016822815,grad_norm: 0.93769148289517, iteration: 67050
loss: 0.987786591053009,grad_norm: 0.9999990497296966, iteration: 67051
loss: 1.055069923400879,grad_norm: 0.9999993815205556, iteration: 67052
loss: 1.0377118587493896,grad_norm: 0.9932964145147494, iteration: 67053
loss: 1.0556042194366455,grad_norm: 0.9999992346515749, iteration: 67054
loss: 1.0842859745025635,grad_norm: 0.99999954101826, iteration: 67055
loss: 1.0210514068603516,grad_norm: 0.9999998008835439, iteration: 67056
loss: 0.9473705887794495,grad_norm: 0.8635161496163742, iteration: 67057
loss: 1.0229092836380005,grad_norm: 0.8335874113883548, iteration: 67058
loss: 0.981617271900177,grad_norm: 0.8181630000657459, iteration: 67059
loss: 0.9911636114120483,grad_norm: 0.9999991733357104, iteration: 67060
loss: 0.9740733504295349,grad_norm: 0.8554293925392925, iteration: 67061
loss: 0.9979390501976013,grad_norm: 0.9878268149662126, iteration: 67062
loss: 1.0192630290985107,grad_norm: 0.932223195623221, iteration: 67063
loss: 1.0267146825790405,grad_norm: 0.8627710038524624, iteration: 67064
loss: 1.0359137058258057,grad_norm: 0.9999998869458517, iteration: 67065
loss: 1.1231783628463745,grad_norm: 0.9999997330957475, iteration: 67066
loss: 1.010122299194336,grad_norm: 0.94304391496651, iteration: 67067
loss: 0.9592235088348389,grad_norm: 0.8039490624246336, iteration: 67068
loss: 1.00845468044281,grad_norm: 0.8378917157938105, iteration: 67069
loss: 1.0281586647033691,grad_norm: 0.9999991129083843, iteration: 67070
loss: 1.075055718421936,grad_norm: 0.9999998440518292, iteration: 67071
loss: 1.0081045627593994,grad_norm: 0.8758950357525569, iteration: 67072
loss: 0.9991976022720337,grad_norm: 0.8068715745391932, iteration: 67073
loss: 1.0272753238677979,grad_norm: 0.9999995477476002, iteration: 67074
loss: 0.9754501581192017,grad_norm: 0.9999989679033526, iteration: 67075
loss: 1.0792409181594849,grad_norm: 0.9999995382479087, iteration: 67076
loss: 1.0040922164916992,grad_norm: 0.9085784317827565, iteration: 67077
loss: 0.9954004287719727,grad_norm: 0.9802984405304269, iteration: 67078
loss: 1.0136677026748657,grad_norm: 0.8781032793512805, iteration: 67079
loss: 0.9960941076278687,grad_norm: 0.9108817576876472, iteration: 67080
loss: 1.048884630203247,grad_norm: 0.999999740500492, iteration: 67081
loss: 1.0029430389404297,grad_norm: 0.9999990723351981, iteration: 67082
loss: 1.0596182346343994,grad_norm: 0.84363606212078, iteration: 67083
loss: 1.0149626731872559,grad_norm: 0.9999992778867712, iteration: 67084
loss: 1.0025848150253296,grad_norm: 0.8713970559978806, iteration: 67085
loss: 1.0273313522338867,grad_norm: 0.9709616956265873, iteration: 67086
loss: 1.0076022148132324,grad_norm: 0.7307736046991302, iteration: 67087
loss: 1.0280009508132935,grad_norm: 0.9999991704553677, iteration: 67088
loss: 1.027685284614563,grad_norm: 0.9999992195750624, iteration: 67089
loss: 1.0225878953933716,grad_norm: 0.9548152638724866, iteration: 67090
loss: 1.0092995166778564,grad_norm: 0.9999992556672885, iteration: 67091
loss: 0.9913654327392578,grad_norm: 0.8398511966138001, iteration: 67092
loss: 1.003966212272644,grad_norm: 0.9643322861202193, iteration: 67093
loss: 1.032624363899231,grad_norm: 0.9999991763156877, iteration: 67094
loss: 1.0006322860717773,grad_norm: 0.9999993791405871, iteration: 67095
loss: 1.4714871644973755,grad_norm: 0.9999994618235606, iteration: 67096
loss: 1.0355554819107056,grad_norm: 0.8886999635601366, iteration: 67097
loss: 1.0302480459213257,grad_norm: 0.9999991027848271, iteration: 67098
loss: 1.030420184135437,grad_norm: 0.9999995078820788, iteration: 67099
loss: 1.018983244895935,grad_norm: 0.9999993627552002, iteration: 67100
loss: 1.0156095027923584,grad_norm: 0.9999998254874461, iteration: 67101
loss: 1.0154871940612793,grad_norm: 0.9594873170790917, iteration: 67102
loss: 0.9868698120117188,grad_norm: 0.839029788181405, iteration: 67103
loss: 0.9829437732696533,grad_norm: 0.9999991477996348, iteration: 67104
loss: 1.0289955139160156,grad_norm: 0.9999992269792575, iteration: 67105
loss: 1.0234909057617188,grad_norm: 0.9999991616216061, iteration: 67106
loss: 1.0077464580535889,grad_norm: 0.9999990955537508, iteration: 67107
loss: 0.9621203541755676,grad_norm: 0.9999991716260082, iteration: 67108
loss: 1.0028318166732788,grad_norm: 0.9999993649264752, iteration: 67109
loss: 0.9937673807144165,grad_norm: 0.9999990176885932, iteration: 67110
loss: 1.043154001235962,grad_norm: 0.9999997619855068, iteration: 67111
loss: 0.9778730869293213,grad_norm: 0.9999989657271026, iteration: 67112
loss: 0.9888936877250671,grad_norm: 0.9999993117823248, iteration: 67113
loss: 1.0718151330947876,grad_norm: 0.9999990638723377, iteration: 67114
loss: 1.0246320962905884,grad_norm: 0.9874894889967974, iteration: 67115
loss: 0.9974443912506104,grad_norm: 0.851071075886226, iteration: 67116
loss: 1.0354843139648438,grad_norm: 0.9999991072163545, iteration: 67117
loss: 1.009904146194458,grad_norm: 0.8492317344436405, iteration: 67118
loss: 1.0829713344573975,grad_norm: 0.9999990645065716, iteration: 67119
loss: 1.0455504655838013,grad_norm: 0.9999991215194372, iteration: 67120
loss: 1.0142433643341064,grad_norm: 0.9999992247211152, iteration: 67121
loss: 1.0936241149902344,grad_norm: 0.9999997123451311, iteration: 67122
loss: 1.0731499195098877,grad_norm: 0.9999999573525566, iteration: 67123
loss: 1.0062353610992432,grad_norm: 0.9999990950537749, iteration: 67124
loss: 1.0248668193817139,grad_norm: 0.9999992359905043, iteration: 67125
loss: 0.9981335401535034,grad_norm: 0.91950739813161, iteration: 67126
loss: 0.954615592956543,grad_norm: 0.999999049923838, iteration: 67127
loss: 1.0131641626358032,grad_norm: 0.8965237052768409, iteration: 67128
loss: 1.0180402994155884,grad_norm: 0.8925590006206262, iteration: 67129
loss: 0.9980367422103882,grad_norm: 0.9535971573402829, iteration: 67130
loss: 0.9851784706115723,grad_norm: 0.8675946861500938, iteration: 67131
loss: 1.0061168670654297,grad_norm: 0.9489367605408034, iteration: 67132
loss: 1.0173964500427246,grad_norm: 0.9070071899505019, iteration: 67133
loss: 1.036365032196045,grad_norm: 0.9999991846275545, iteration: 67134
loss: 0.9925360083580017,grad_norm: 0.884234688049363, iteration: 67135
loss: 1.0172407627105713,grad_norm: 0.9999995215097117, iteration: 67136
loss: 0.9970154762268066,grad_norm: 0.9050862250451823, iteration: 67137
loss: 1.007129192352295,grad_norm: 0.9726556194183714, iteration: 67138
loss: 0.9929376840591431,grad_norm: 0.9999989928506147, iteration: 67139
loss: 0.9722588658332825,grad_norm: 0.8576039925885425, iteration: 67140
loss: 0.9801504611968994,grad_norm: 0.9550645388496624, iteration: 67141
loss: 1.0157867670059204,grad_norm: 0.8736767960389593, iteration: 67142
loss: 1.0351264476776123,grad_norm: 0.9999994149493122, iteration: 67143
loss: 1.007369041442871,grad_norm: 0.7814777963251622, iteration: 67144
loss: 1.0337127447128296,grad_norm: 0.9999993708689648, iteration: 67145
loss: 1.0040265321731567,grad_norm: 0.9448120045216519, iteration: 67146
loss: 1.0085067749023438,grad_norm: 0.9344814370965094, iteration: 67147
loss: 1.0664522647857666,grad_norm: 0.9999992713408591, iteration: 67148
loss: 0.9771631956100464,grad_norm: 0.9041035327586068, iteration: 67149
loss: 1.0055738687515259,grad_norm: 0.9999997075245902, iteration: 67150
loss: 1.0171509981155396,grad_norm: 0.9796582935834899, iteration: 67151
loss: 1.0162992477416992,grad_norm: 0.9297101386299453, iteration: 67152
loss: 1.017248272895813,grad_norm: 0.9999996551290725, iteration: 67153
loss: 1.0434397459030151,grad_norm: 0.9999995756834548, iteration: 67154
loss: 1.0081554651260376,grad_norm: 0.8449453646904279, iteration: 67155
loss: 1.0233948230743408,grad_norm: 0.9999992105620493, iteration: 67156
loss: 1.008817434310913,grad_norm: 0.9999990139499048, iteration: 67157
loss: 0.9925823211669922,grad_norm: 0.9344088213232357, iteration: 67158
loss: 1.0144349336624146,grad_norm: 0.9999992615359039, iteration: 67159
loss: 0.983589231967926,grad_norm: 0.9999991365941266, iteration: 67160
loss: 0.9750695824623108,grad_norm: 0.9999988979331952, iteration: 67161
loss: 1.0392682552337646,grad_norm: 0.9999998579343091, iteration: 67162
loss: 0.9613670110702515,grad_norm: 0.9999990763498967, iteration: 67163
loss: 1.0245732069015503,grad_norm: 0.7639307029892735, iteration: 67164
loss: 0.9856978058815002,grad_norm: 0.9444005179611038, iteration: 67165
loss: 1.0027016401290894,grad_norm: 0.9999989809506663, iteration: 67166
loss: 1.1251646280288696,grad_norm: 0.9999994621845215, iteration: 67167
loss: 1.09661865234375,grad_norm: 0.9999998708474132, iteration: 67168
loss: 0.9832338690757751,grad_norm: 0.9269185645269193, iteration: 67169
loss: 1.0081177949905396,grad_norm: 0.9575055096022486, iteration: 67170
loss: 1.0793060064315796,grad_norm: 0.9001852656068998, iteration: 67171
loss: 0.9999484419822693,grad_norm: 0.8365648582993014, iteration: 67172
loss: 0.9807428121566772,grad_norm: 0.8643057532067083, iteration: 67173
loss: 0.9905359148979187,grad_norm: 0.8237892655121699, iteration: 67174
loss: 1.0498852729797363,grad_norm: 0.8301425744213525, iteration: 67175
loss: 1.0250420570373535,grad_norm: 0.9999999092091523, iteration: 67176
loss: 1.0972822904586792,grad_norm: 0.9999990737857812, iteration: 67177
loss: 1.0075916051864624,grad_norm: 0.9243764139383503, iteration: 67178
loss: 0.9943901896476746,grad_norm: 0.974341514460772, iteration: 67179
loss: 1.047717809677124,grad_norm: 0.8829852563110785, iteration: 67180
loss: 1.0034266710281372,grad_norm: 0.8739321785421997, iteration: 67181
loss: 1.0381190776824951,grad_norm: 0.9999990578951827, iteration: 67182
loss: 0.9914237260818481,grad_norm: 0.8855684618681546, iteration: 67183
loss: 1.0880147218704224,grad_norm: 0.9999996087448234, iteration: 67184
loss: 0.9988225698471069,grad_norm: 0.9999993001136194, iteration: 67185
loss: 1.0015753507614136,grad_norm: 0.7423478768532273, iteration: 67186
loss: 0.962005615234375,grad_norm: 0.9999989102418994, iteration: 67187
loss: 1.0231642723083496,grad_norm: 0.7480760886199863, iteration: 67188
loss: 1.0267949104309082,grad_norm: 0.833596144883488, iteration: 67189
loss: 0.9940069317817688,grad_norm: 0.9999990250687282, iteration: 67190
loss: 1.0054906606674194,grad_norm: 0.837269836527377, iteration: 67191
loss: 0.9816528558731079,grad_norm: 0.8585621892497477, iteration: 67192
loss: 0.9523882269859314,grad_norm: 0.9999991314823158, iteration: 67193
loss: 1.0844175815582275,grad_norm: 0.9789991312340484, iteration: 67194
loss: 0.9828552603721619,grad_norm: 0.9999992926818404, iteration: 67195
loss: 1.019539713859558,grad_norm: 0.9999991726051295, iteration: 67196
loss: 1.0259318351745605,grad_norm: 0.8351281782222411, iteration: 67197
loss: 0.9873508810997009,grad_norm: 0.930269360324287, iteration: 67198
loss: 0.9949663281440735,grad_norm: 0.9999992596582773, iteration: 67199
loss: 0.9810880422592163,grad_norm: 0.9265561701620469, iteration: 67200
loss: 1.0263901948928833,grad_norm: 0.9027742727613766, iteration: 67201
loss: 1.0059157609939575,grad_norm: 0.8719612153995185, iteration: 67202
loss: 1.0464377403259277,grad_norm: 0.999999149398935, iteration: 67203
loss: 0.9739187359809875,grad_norm: 0.8250992941668117, iteration: 67204
loss: 1.003907561302185,grad_norm: 0.99999912191656, iteration: 67205
loss: 1.0179729461669922,grad_norm: 0.8883292838063265, iteration: 67206
loss: 1.0175367593765259,grad_norm: 0.9687585363266188, iteration: 67207
loss: 1.01265287399292,grad_norm: 0.99999899324162, iteration: 67208
loss: 1.049763560295105,grad_norm: 0.9999992933590229, iteration: 67209
loss: 1.0385104417800903,grad_norm: 0.9999991693855609, iteration: 67210
loss: 0.955471932888031,grad_norm: 0.8982155321920796, iteration: 67211
loss: 1.0988997220993042,grad_norm: 0.9999995643906645, iteration: 67212
loss: 0.9851750135421753,grad_norm: 0.9683334187385595, iteration: 67213
loss: 1.1393650770187378,grad_norm: 0.9999997604397438, iteration: 67214
loss: 1.0568259954452515,grad_norm: 0.9999999707645671, iteration: 67215
loss: 0.9446023106575012,grad_norm: 0.9999990092883776, iteration: 67216
loss: 1.0245614051818848,grad_norm: 0.8582035917587929, iteration: 67217
loss: 1.0151294469833374,grad_norm: 0.9999993900259512, iteration: 67218
loss: 1.013920783996582,grad_norm: 0.9999992014031639, iteration: 67219
loss: 1.0280265808105469,grad_norm: 0.9748172561669096, iteration: 67220
loss: 1.022394061088562,grad_norm: 0.9395658352479856, iteration: 67221
loss: 1.0277984142303467,grad_norm: 0.9999990864120982, iteration: 67222
loss: 0.9810976982116699,grad_norm: 0.9197184361599843, iteration: 67223
loss: 0.9930300712585449,grad_norm: 0.9999991714945068, iteration: 67224
loss: 1.0078322887420654,grad_norm: 0.7571098726822609, iteration: 67225
loss: 1.0356274843215942,grad_norm: 0.9999992078102978, iteration: 67226
loss: 1.0114785432815552,grad_norm: 0.9902062590137729, iteration: 67227
loss: 0.9986541271209717,grad_norm: 0.999999250574463, iteration: 67228
loss: 0.9914742708206177,grad_norm: 0.9999991458562482, iteration: 67229
loss: 0.9917126297950745,grad_norm: 0.9999990851628289, iteration: 67230
loss: 1.0046850442886353,grad_norm: 0.7705891560591249, iteration: 67231
loss: 1.0077821016311646,grad_norm: 0.8250129920943754, iteration: 67232
loss: 1.017568588256836,grad_norm: 0.9999995961784799, iteration: 67233
loss: 0.9928479790687561,grad_norm: 0.8438268539413011, iteration: 67234
loss: 0.9531195163726807,grad_norm: 0.7920555628189921, iteration: 67235
loss: 1.0659754276275635,grad_norm: 0.9999997664081864, iteration: 67236
loss: 1.024893045425415,grad_norm: 0.8572942850474852, iteration: 67237
loss: 0.9973309636116028,grad_norm: 0.8623163034276652, iteration: 67238
loss: 0.9873527884483337,grad_norm: 0.9999994037711806, iteration: 67239
loss: 0.9752010107040405,grad_norm: 0.9999991422214258, iteration: 67240
loss: 0.9975227117538452,grad_norm: 0.9999989836683575, iteration: 67241
loss: 0.9923616051673889,grad_norm: 0.818534072495051, iteration: 67242
loss: 0.9802486896514893,grad_norm: 0.9999996229432905, iteration: 67243
loss: 1.0436397790908813,grad_norm: 0.9999991209969105, iteration: 67244
loss: 1.021411418914795,grad_norm: 0.9999991539372701, iteration: 67245
loss: 1.0386056900024414,grad_norm: 0.9999998568185896, iteration: 67246
loss: 1.0301653146743774,grad_norm: 0.999999235274221, iteration: 67247
loss: 1.0189393758773804,grad_norm: 0.9999990121106658, iteration: 67248
loss: 0.9975501894950867,grad_norm: 0.9309417008489935, iteration: 67249
loss: 1.0714244842529297,grad_norm: 0.9999996538877377, iteration: 67250
loss: 1.0175613164901733,grad_norm: 0.9637601646903062, iteration: 67251
loss: 1.005814790725708,grad_norm: 0.9999993470324309, iteration: 67252
loss: 1.0044620037078857,grad_norm: 0.9999991746411709, iteration: 67253
loss: 0.9748386740684509,grad_norm: 0.9999990638189675, iteration: 67254
loss: 0.9719531536102295,grad_norm: 0.9999991291213617, iteration: 67255
loss: 1.016466736793518,grad_norm: 0.9999997930619614, iteration: 67256
loss: 1.0050814151763916,grad_norm: 0.8868179266079916, iteration: 67257
loss: 0.9669873118400574,grad_norm: 0.9172471706677388, iteration: 67258
loss: 1.0295305252075195,grad_norm: 0.9999991798965451, iteration: 67259
loss: 0.9844567179679871,grad_norm: 0.999999038865198, iteration: 67260
loss: 0.9896115064620972,grad_norm: 0.9282156026962751, iteration: 67261
loss: 1.022426962852478,grad_norm: 0.9627206594781217, iteration: 67262
loss: 0.9693158864974976,grad_norm: 0.9999991706168874, iteration: 67263
loss: 1.0003299713134766,grad_norm: 0.8635001427165279, iteration: 67264
loss: 0.9820042848587036,grad_norm: 0.8964934692068403, iteration: 67265
loss: 1.0166620016098022,grad_norm: 0.9999991175619618, iteration: 67266
loss: 1.011001706123352,grad_norm: 0.9999990738384653, iteration: 67267
loss: 0.9954417943954468,grad_norm: 0.9999990668065882, iteration: 67268
loss: 1.0795656442642212,grad_norm: 0.8329571578638658, iteration: 67269
loss: 0.9943616986274719,grad_norm: 0.8813130230177113, iteration: 67270
loss: 1.0019651651382446,grad_norm: 0.8459051043319916, iteration: 67271
loss: 0.9960853457450867,grad_norm: 0.9999995643935207, iteration: 67272
loss: 0.995223879814148,grad_norm: 0.9999992010881491, iteration: 67273
loss: 0.9570241570472717,grad_norm: 0.9899052839002489, iteration: 67274
loss: 0.9603050947189331,grad_norm: 0.9999990748149482, iteration: 67275
loss: 0.9926282167434692,grad_norm: 0.9514828076132598, iteration: 67276
loss: 0.9803124070167542,grad_norm: 0.7760499472336888, iteration: 67277
loss: 0.9884656071662903,grad_norm: 0.8249207296381968, iteration: 67278
loss: 0.996231734752655,grad_norm: 0.9374259576413493, iteration: 67279
loss: 1.0401957035064697,grad_norm: 0.9999994805527995, iteration: 67280
loss: 0.9796199798583984,grad_norm: 0.9256942587022301, iteration: 67281
loss: 1.0557695627212524,grad_norm: 0.979666764953772, iteration: 67282
loss: 1.0103200674057007,grad_norm: 0.9225829922567889, iteration: 67283
loss: 1.008195161819458,grad_norm: 0.8254133964949462, iteration: 67284
loss: 1.0534772872924805,grad_norm: 0.9630911650323238, iteration: 67285
loss: 0.9876988530158997,grad_norm: 0.816718460358898, iteration: 67286
loss: 1.061578392982483,grad_norm: 0.9999992014392473, iteration: 67287
loss: 1.0196788311004639,grad_norm: 0.9091134097242946, iteration: 67288
loss: 0.9894775748252869,grad_norm: 0.9999990492413953, iteration: 67289
loss: 0.9975025653839111,grad_norm: 0.9463159607009154, iteration: 67290
loss: 0.9869560599327087,grad_norm: 0.999999344836256, iteration: 67291
loss: 0.9988606572151184,grad_norm: 0.8905941371795183, iteration: 67292
loss: 1.0147106647491455,grad_norm: 0.7997987905738522, iteration: 67293
loss: 0.9919519424438477,grad_norm: 0.895195419318786, iteration: 67294
loss: 0.9878136515617371,grad_norm: 0.9999997199851541, iteration: 67295
loss: 0.9573763012886047,grad_norm: 0.8634389522443918, iteration: 67296
loss: 1.018496036529541,grad_norm: 0.8822718223998403, iteration: 67297
loss: 1.0049986839294434,grad_norm: 0.894882031620201, iteration: 67298
loss: 1.0180543661117554,grad_norm: 0.9999991788441381, iteration: 67299
loss: 1.027167797088623,grad_norm: 0.9999990584185902, iteration: 67300
loss: 0.9814280867576599,grad_norm: 0.9522919565956219, iteration: 67301
loss: 0.9905959963798523,grad_norm: 0.7807910072174079, iteration: 67302
loss: 1.1012115478515625,grad_norm: 0.9999998228088982, iteration: 67303
loss: 0.9763272404670715,grad_norm: 0.9999991528839719, iteration: 67304
loss: 1.0002992153167725,grad_norm: 0.8945714127623807, iteration: 67305
loss: 1.0082361698150635,grad_norm: 0.999999127286674, iteration: 67306
loss: 1.0145984888076782,grad_norm: 0.9999994802591651, iteration: 67307
loss: 1.0063154697418213,grad_norm: 0.999999374780506, iteration: 67308
loss: 1.0088692903518677,grad_norm: 0.9999990313584431, iteration: 67309
loss: 1.0211106538772583,grad_norm: 0.9350569354615332, iteration: 67310
loss: 1.0078163146972656,grad_norm: 0.9999990406665837, iteration: 67311
loss: 0.97847580909729,grad_norm: 0.9999991323926009, iteration: 67312
loss: 1.0295302867889404,grad_norm: 0.9242833915406686, iteration: 67313
loss: 1.0410876274108887,grad_norm: 0.9766605429954763, iteration: 67314
loss: 1.1253446340560913,grad_norm: 0.9764272026072219, iteration: 67315
loss: 1.030760407447815,grad_norm: 0.9999991469833267, iteration: 67316
loss: 1.0243263244628906,grad_norm: 0.8300133753036201, iteration: 67317
loss: 0.9976889491081238,grad_norm: 0.8438020079165287, iteration: 67318
loss: 1.0338313579559326,grad_norm: 0.8625516715252857, iteration: 67319
loss: 1.0287938117980957,grad_norm: 0.9999991706356832, iteration: 67320
loss: 1.0018113851547241,grad_norm: 0.9999997840029795, iteration: 67321
loss: 1.0598795413970947,grad_norm: 0.9999995601677979, iteration: 67322
loss: 0.9574533700942993,grad_norm: 0.9999990543001723, iteration: 67323
loss: 1.0731425285339355,grad_norm: 0.929179369008764, iteration: 67324
loss: 0.9569743275642395,grad_norm: 0.8448949360771301, iteration: 67325
loss: 1.0585523843765259,grad_norm: 0.9999994819403502, iteration: 67326
loss: 0.9950785040855408,grad_norm: 0.8404072044982563, iteration: 67327
loss: 0.9800203442573547,grad_norm: 0.8861093284453734, iteration: 67328
loss: 0.9900825023651123,grad_norm: 0.9461901912815397, iteration: 67329
loss: 0.9689348936080933,grad_norm: 0.9539495316709655, iteration: 67330
loss: 1.0110197067260742,grad_norm: 0.8160042496844382, iteration: 67331
loss: 1.0071048736572266,grad_norm: 0.9010949000189888, iteration: 67332
loss: 1.003503441810608,grad_norm: 0.9999997657446578, iteration: 67333
loss: 0.972180187702179,grad_norm: 0.8723091979600428, iteration: 67334
loss: 1.0115368366241455,grad_norm: 0.9194610910560684, iteration: 67335
loss: 1.048059344291687,grad_norm: 0.9999992206046143, iteration: 67336
loss: 0.9987789392471313,grad_norm: 0.9131308803160946, iteration: 67337
loss: 0.9993261694908142,grad_norm: 0.9294415981262903, iteration: 67338
loss: 0.9918408393859863,grad_norm: 0.8799610975475195, iteration: 67339
loss: 0.9390823245048523,grad_norm: 0.9999993309595806, iteration: 67340
loss: 1.025253176689148,grad_norm: 0.9486463456752138, iteration: 67341
loss: 1.013927936553955,grad_norm: 0.8212417693794599, iteration: 67342
loss: 1.0415469408035278,grad_norm: 0.9999993721707351, iteration: 67343
loss: 1.0184552669525146,grad_norm: 0.9998514962738095, iteration: 67344
loss: 0.9835776090621948,grad_norm: 0.9400916986382467, iteration: 67345
loss: 1.1108406782150269,grad_norm: 0.9999998113563197, iteration: 67346
loss: 0.9960947632789612,grad_norm: 0.9999990552778044, iteration: 67347
loss: 1.0606507062911987,grad_norm: 0.9999990829239311, iteration: 67348
loss: 1.0257667303085327,grad_norm: 0.9999993732603794, iteration: 67349
loss: 1.0116190910339355,grad_norm: 0.999999276681153, iteration: 67350
loss: 1.0464917421340942,grad_norm: 0.9999997939203549, iteration: 67351
loss: 1.0010359287261963,grad_norm: 0.99999934700849, iteration: 67352
loss: 1.0350006818771362,grad_norm: 0.9999991580650824, iteration: 67353
loss: 1.003379225730896,grad_norm: 0.9999990469463143, iteration: 67354
loss: 0.9816285967826843,grad_norm: 0.9999991438969423, iteration: 67355
loss: 1.0143563747406006,grad_norm: 0.9861341592848812, iteration: 67356
loss: 1.0358457565307617,grad_norm: 0.9999996181560219, iteration: 67357
loss: 0.9951990246772766,grad_norm: 0.9271018827004595, iteration: 67358
loss: 1.0259993076324463,grad_norm: 0.9999994936024618, iteration: 67359
loss: 0.983302652835846,grad_norm: 0.8698704981779314, iteration: 67360
loss: 1.0286803245544434,grad_norm: 0.9999992121228636, iteration: 67361
loss: 1.0668071508407593,grad_norm: 0.9999994317105004, iteration: 67362
loss: 0.9803916811943054,grad_norm: 0.9999992649969971, iteration: 67363
loss: 0.9934702515602112,grad_norm: 0.999999099697117, iteration: 67364
loss: 0.9875604510307312,grad_norm: 0.9999989348740705, iteration: 67365
loss: 0.9606634974479675,grad_norm: 0.9191071063627995, iteration: 67366
loss: 0.9871399402618408,grad_norm: 0.9529748402341786, iteration: 67367
loss: 0.9966053366661072,grad_norm: 0.9999991774275864, iteration: 67368
loss: 1.0260058641433716,grad_norm: 0.9472046004672527, iteration: 67369
loss: 1.0488979816436768,grad_norm: 0.9999991052963391, iteration: 67370
loss: 1.0484404563903809,grad_norm: 0.8776984405494153, iteration: 67371
loss: 1.0066956281661987,grad_norm: 0.9362810417503149, iteration: 67372
loss: 1.0304276943206787,grad_norm: 0.9367161756073246, iteration: 67373
loss: 0.9775838255882263,grad_norm: 0.8576816146784363, iteration: 67374
loss: 0.9983003735542297,grad_norm: 0.8899861493700014, iteration: 67375
loss: 0.9783172607421875,grad_norm: 0.9999991695381882, iteration: 67376
loss: 0.9616193771362305,grad_norm: 0.9634657573177562, iteration: 67377
loss: 0.9867825508117676,grad_norm: 0.9839122284235692, iteration: 67378
loss: 0.9999524354934692,grad_norm: 0.9999991201426816, iteration: 67379
loss: 0.9944142699241638,grad_norm: 0.9999991254302507, iteration: 67380
loss: 0.9872274994850159,grad_norm: 0.9576039958955476, iteration: 67381
loss: 1.0168430805206299,grad_norm: 0.9999990220659789, iteration: 67382
loss: 1.0408799648284912,grad_norm: 0.9999991058179176, iteration: 67383
loss: 0.9793678522109985,grad_norm: 0.8139325589474017, iteration: 67384
loss: 1.0382673740386963,grad_norm: 0.99999925441012, iteration: 67385
loss: 1.0121169090270996,grad_norm: 0.9999990759761822, iteration: 67386
loss: 1.0258880853652954,grad_norm: 0.8789767034840542, iteration: 67387
loss: 1.114667296409607,grad_norm: 0.9999994242797707, iteration: 67388
loss: 1.015416145324707,grad_norm: 0.9999993642470519, iteration: 67389
loss: 1.0189732313156128,grad_norm: 0.9999991028950409, iteration: 67390
loss: 1.0173770189285278,grad_norm: 0.9999993883414972, iteration: 67391
loss: 1.0076080560684204,grad_norm: 0.813757436081045, iteration: 67392
loss: 1.0138096809387207,grad_norm: 0.9732001155717458, iteration: 67393
loss: 1.0733792781829834,grad_norm: 0.9999994934841505, iteration: 67394
loss: 0.9983535408973694,grad_norm: 0.9580333728662521, iteration: 67395
loss: 1.0062510967254639,grad_norm: 0.8201368528129666, iteration: 67396
loss: 1.0198078155517578,grad_norm: 0.9999990816513732, iteration: 67397
loss: 0.9563871026039124,grad_norm: 0.9718181674717451, iteration: 67398
loss: 1.1151052713394165,grad_norm: 0.9999991562791785, iteration: 67399
loss: 1.0542174577713013,grad_norm: 0.999999033955408, iteration: 67400
loss: 0.9760130047798157,grad_norm: 0.999999254703814, iteration: 67401
loss: 1.0191984176635742,grad_norm: 0.9999996386423845, iteration: 67402
loss: 1.0013593435287476,grad_norm: 0.9752629957015045, iteration: 67403
loss: 1.0388777256011963,grad_norm: 0.9999991534686248, iteration: 67404
loss: 1.0881092548370361,grad_norm: 0.9999991145117303, iteration: 67405
loss: 1.000410556793213,grad_norm: 0.9999992528033345, iteration: 67406
loss: 1.050009846687317,grad_norm: 0.9999997394881492, iteration: 67407
loss: 1.0140992403030396,grad_norm: 0.8786727396077187, iteration: 67408
loss: 1.1342569589614868,grad_norm: 0.9999992221153963, iteration: 67409
loss: 0.9914276003837585,grad_norm: 0.9874277263383653, iteration: 67410
loss: 1.0158934593200684,grad_norm: 0.9999999458956432, iteration: 67411
loss: 1.0102616548538208,grad_norm: 0.8465228393288434, iteration: 67412
loss: 1.145459532737732,grad_norm: 0.9999990556095256, iteration: 67413
loss: 0.9702990651130676,grad_norm: 0.9999990911134953, iteration: 67414
loss: 1.0929882526397705,grad_norm: 0.9999991839932345, iteration: 67415
loss: 0.9876630306243896,grad_norm: 0.8806874510047039, iteration: 67416
loss: 1.0194857120513916,grad_norm: 0.8110006115227254, iteration: 67417
loss: 1.049432635307312,grad_norm: 0.9999991733798254, iteration: 67418
loss: 1.0169423818588257,grad_norm: 0.9612410827452292, iteration: 67419
loss: 0.993053138256073,grad_norm: 0.9892251399294621, iteration: 67420
loss: 1.0240657329559326,grad_norm: 0.9999994935094018, iteration: 67421
loss: 1.0665968656539917,grad_norm: 0.9999994649092421, iteration: 67422
loss: 0.9829700589179993,grad_norm: 0.8711944831738405, iteration: 67423
loss: 1.0459955930709839,grad_norm: 0.999999363753648, iteration: 67424
loss: 0.9960967302322388,grad_norm: 0.8231969369606356, iteration: 67425
loss: 1.0398690700531006,grad_norm: 0.9999994492537309, iteration: 67426
loss: 0.9726236462593079,grad_norm: 0.9905711677001596, iteration: 67427
loss: 1.0232182741165161,grad_norm: 0.9999991422562229, iteration: 67428
loss: 0.9601513743400574,grad_norm: 0.9921169905474364, iteration: 67429
loss: 1.0218172073364258,grad_norm: 0.999999104198485, iteration: 67430
loss: 0.9682794809341431,grad_norm: 0.9999990641511116, iteration: 67431
loss: 1.019162893295288,grad_norm: 0.9999991670879416, iteration: 67432
loss: 1.0036680698394775,grad_norm: 0.8194119700220924, iteration: 67433
loss: 0.9838799238204956,grad_norm: 0.8979290777525785, iteration: 67434
loss: 0.9651415944099426,grad_norm: 0.8798802778946515, iteration: 67435
loss: 1.0293364524841309,grad_norm: 0.9999990765168844, iteration: 67436
loss: 1.0379222631454468,grad_norm: 0.9513272137820191, iteration: 67437
loss: 1.0257610082626343,grad_norm: 0.8553596354430689, iteration: 67438
loss: 0.9874316453933716,grad_norm: 0.9774665290848223, iteration: 67439
loss: 1.073107361793518,grad_norm: 0.9999992394164238, iteration: 67440
loss: 1.0046526193618774,grad_norm: 0.9400091930811161, iteration: 67441
loss: 1.0066838264465332,grad_norm: 0.8504681533290088, iteration: 67442
loss: 1.0385853052139282,grad_norm: 0.9468981262492245, iteration: 67443
loss: 0.9841840863227844,grad_norm: 0.9342234450590488, iteration: 67444
loss: 1.0021833181381226,grad_norm: 0.8877117045115022, iteration: 67445
loss: 0.9964951872825623,grad_norm: 0.9999993272726461, iteration: 67446
loss: 1.0170618295669556,grad_norm: 0.99999952036814, iteration: 67447
loss: 1.0021826028823853,grad_norm: 0.9999991617965328, iteration: 67448
loss: 0.9974682927131653,grad_norm: 0.806607265469198, iteration: 67449
loss: 1.082938551902771,grad_norm: 0.999999918069851, iteration: 67450
loss: 0.9658676385879517,grad_norm: 0.999999148953245, iteration: 67451
loss: 1.0192515850067139,grad_norm: 0.9999989368059887, iteration: 67452
loss: 1.0464695692062378,grad_norm: 0.8776871754861083, iteration: 67453
loss: 1.0413477420806885,grad_norm: 0.9999998492274705, iteration: 67454
loss: 1.02675199508667,grad_norm: 0.9999994190313035, iteration: 67455
loss: 1.0586025714874268,grad_norm: 0.9999993065334974, iteration: 67456
loss: 1.0625942945480347,grad_norm: 0.9999993657291908, iteration: 67457
loss: 0.9952002763748169,grad_norm: 0.91067601015931, iteration: 67458
loss: 0.9243037104606628,grad_norm: 0.9821637652666385, iteration: 67459
loss: 0.9936352968215942,grad_norm: 0.9999989493834626, iteration: 67460
loss: 1.0595511198043823,grad_norm: 0.9999994546289865, iteration: 67461
loss: 1.0458807945251465,grad_norm: 0.966745013538254, iteration: 67462
loss: 1.003836750984192,grad_norm: 0.9999993101010703, iteration: 67463
loss: 0.9878078699111938,grad_norm: 0.9999991074067016, iteration: 67464
loss: 1.0212066173553467,grad_norm: 0.999999153687074, iteration: 67465
loss: 1.0252841711044312,grad_norm: 0.9999992280385955, iteration: 67466
loss: 0.9912717938423157,grad_norm: 0.9704216858770158, iteration: 67467
loss: 0.9631353616714478,grad_norm: 0.7847981550859581, iteration: 67468
loss: 1.0697280168533325,grad_norm: 0.9460775842018372, iteration: 67469
loss: 1.006919503211975,grad_norm: 0.8315804610529064, iteration: 67470
loss: 0.9856273531913757,grad_norm: 0.999999566711, iteration: 67471
loss: 1.014554500579834,grad_norm: 0.9999992142317885, iteration: 67472
loss: 1.012885332107544,grad_norm: 0.8553472458532062, iteration: 67473
loss: 1.0373355150222778,grad_norm: 0.906428535738158, iteration: 67474
loss: 1.0662015676498413,grad_norm: 0.9999992534320103, iteration: 67475
loss: 1.0309500694274902,grad_norm: 0.9117838017028036, iteration: 67476
loss: 0.9961974620819092,grad_norm: 0.8298763186942882, iteration: 67477
loss: 1.0177098512649536,grad_norm: 0.992255048365562, iteration: 67478
loss: 0.9970006942749023,grad_norm: 0.9999991164622356, iteration: 67479
loss: 1.041011095046997,grad_norm: 0.9999996328032213, iteration: 67480
loss: 1.080935001373291,grad_norm: 0.9999995419414679, iteration: 67481
loss: 1.0471621751785278,grad_norm: 0.999999551036691, iteration: 67482
loss: 0.9929386973381042,grad_norm: 0.999999369655243, iteration: 67483
loss: 0.9831042885780334,grad_norm: 0.9999990318133836, iteration: 67484
loss: 0.9988128542900085,grad_norm: 0.7933348212572164, iteration: 67485
loss: 0.943822979927063,grad_norm: 0.8635809272532574, iteration: 67486
loss: 0.9616847038269043,grad_norm: 0.9999991896190752, iteration: 67487
loss: 1.0293705463409424,grad_norm: 0.9999998358533123, iteration: 67488
loss: 1.0268679857254028,grad_norm: 0.9999990449285597, iteration: 67489
loss: 0.954412043094635,grad_norm: 0.9999991693428112, iteration: 67490
loss: 0.9862386584281921,grad_norm: 0.9128162704166272, iteration: 67491
loss: 1.04059636592865,grad_norm: 0.8791622581588417, iteration: 67492
loss: 1.0091814994812012,grad_norm: 0.8653488178142655, iteration: 67493
loss: 1.0112353563308716,grad_norm: 0.999998984685159, iteration: 67494
loss: 1.0035115480422974,grad_norm: 0.9999991491395575, iteration: 67495
loss: 1.0260119438171387,grad_norm: 0.8951618541164607, iteration: 67496
loss: 1.0031684637069702,grad_norm: 0.8988277331493958, iteration: 67497
loss: 0.9806129336357117,grad_norm: 0.9999991962729845, iteration: 67498
loss: 0.9859777092933655,grad_norm: 0.99377039140309, iteration: 67499
loss: 1.0208301544189453,grad_norm: 0.9999990956668584, iteration: 67500
loss: 0.9618077278137207,grad_norm: 0.9999990969177223, iteration: 67501
loss: 1.0254968404769897,grad_norm: 0.8356083303760993, iteration: 67502
loss: 1.0039374828338623,grad_norm: 0.9124607144234447, iteration: 67503
loss: 0.9976180791854858,grad_norm: 0.9055792568265288, iteration: 67504
loss: 0.9866818189620972,grad_norm: 0.8700110759346111, iteration: 67505
loss: 0.9988959431648254,grad_norm: 0.9999991331382893, iteration: 67506
loss: 0.9754873514175415,grad_norm: 0.9900256624138731, iteration: 67507
loss: 0.9773647785186768,grad_norm: 0.999999239926169, iteration: 67508
loss: 1.0314470529556274,grad_norm: 0.9303489937795097, iteration: 67509
loss: 1.006890892982483,grad_norm: 0.9632218989819409, iteration: 67510
loss: 1.037021517753601,grad_norm: 0.9999997847648007, iteration: 67511
loss: 1.0254825353622437,grad_norm: 0.99999928151899, iteration: 67512
loss: 0.9844653606414795,grad_norm: 0.999999107483875, iteration: 67513
loss: 1.0044814348220825,grad_norm: 0.9999993603907233, iteration: 67514
loss: 1.0252140760421753,grad_norm: 0.9999992709454124, iteration: 67515
loss: 0.9951040148735046,grad_norm: 0.8226616045095824, iteration: 67516
loss: 0.9707664847373962,grad_norm: 0.9999990933612424, iteration: 67517
loss: 0.9843272566795349,grad_norm: 0.9368496727459136, iteration: 67518
loss: 0.9874033331871033,grad_norm: 0.9999990623497653, iteration: 67519
loss: 1.00770902633667,grad_norm: 0.9885730950017946, iteration: 67520
loss: 1.0371909141540527,grad_norm: 0.8513366454270012, iteration: 67521
loss: 1.0816333293914795,grad_norm: 0.8588761510685294, iteration: 67522
loss: 0.9817460179328918,grad_norm: 0.9451480409095547, iteration: 67523
loss: 0.9721969962120056,grad_norm: 0.9999990128234415, iteration: 67524
loss: 1.0069606304168701,grad_norm: 0.9999992299545553, iteration: 67525
loss: 1.0047667026519775,grad_norm: 0.8897192091393744, iteration: 67526
loss: 0.9853976368904114,grad_norm: 0.9633084064575579, iteration: 67527
loss: 0.9793504476547241,grad_norm: 0.9084062296044892, iteration: 67528
loss: 1.0382665395736694,grad_norm: 0.9999998878225218, iteration: 67529
loss: 1.0036611557006836,grad_norm: 0.8392404071039034, iteration: 67530
loss: 1.012539029121399,grad_norm: 0.9999992149826887, iteration: 67531
loss: 0.9573933482170105,grad_norm: 0.8855547493478102, iteration: 67532
loss: 1.0198973417282104,grad_norm: 0.999999198896132, iteration: 67533
loss: 0.9893826246261597,grad_norm: 0.9850619104592558, iteration: 67534
loss: 1.0086442232131958,grad_norm: 0.9596973924890795, iteration: 67535
loss: 1.0206029415130615,grad_norm: 0.9202481261893974, iteration: 67536
loss: 1.0128780603408813,grad_norm: 0.935205787736062, iteration: 67537
loss: 1.0326495170593262,grad_norm: 0.9258148420450373, iteration: 67538
loss: 1.013726830482483,grad_norm: 0.9167650439531542, iteration: 67539
loss: 1.0299662351608276,grad_norm: 0.9999992720591743, iteration: 67540
loss: 0.992779552936554,grad_norm: 0.7356450621778557, iteration: 67541
loss: 1.0381652116775513,grad_norm: 0.9999991114381628, iteration: 67542
loss: 0.9904228448867798,grad_norm: 0.9227258260690145, iteration: 67543
loss: 1.0329642295837402,grad_norm: 0.9999990762576709, iteration: 67544
loss: 1.0012445449829102,grad_norm: 0.9561657025721881, iteration: 67545
loss: 0.9732829928398132,grad_norm: 0.9999991790499745, iteration: 67546
loss: 0.9822799563407898,grad_norm: 0.879749150005657, iteration: 67547
loss: 0.996295690536499,grad_norm: 0.9999990384393387, iteration: 67548
loss: 1.0067273378372192,grad_norm: 0.9192306432622681, iteration: 67549
loss: 1.0206382274627686,grad_norm: 0.9999989742767459, iteration: 67550
loss: 1.056600570678711,grad_norm: 0.7898746652855096, iteration: 67551
loss: 1.0133016109466553,grad_norm: 0.8541313782786988, iteration: 67552
loss: 1.0153635740280151,grad_norm: 0.7793815136140906, iteration: 67553
loss: 1.012999415397644,grad_norm: 0.9999991916124894, iteration: 67554
loss: 1.0145009756088257,grad_norm: 0.8579329231665571, iteration: 67555
loss: 1.0162599086761475,grad_norm: 0.9999992165630895, iteration: 67556
loss: 1.008145809173584,grad_norm: 0.9738753152698778, iteration: 67557
loss: 0.9725673794746399,grad_norm: 0.9723356628365996, iteration: 67558
loss: 1.018168330192566,grad_norm: 0.9999990182282137, iteration: 67559
loss: 1.0312516689300537,grad_norm: 0.9912223833537347, iteration: 67560
loss: 1.0112076997756958,grad_norm: 0.8593217342138583, iteration: 67561
loss: 0.9934640526771545,grad_norm: 0.8556772431544175, iteration: 67562
loss: 0.9703822135925293,grad_norm: 0.9643406798985824, iteration: 67563
loss: 1.0536998510360718,grad_norm: 0.9999999531583992, iteration: 67564
loss: 1.0081912279129028,grad_norm: 0.9999992509421443, iteration: 67565
loss: 0.996375560760498,grad_norm: 0.8600466119915783, iteration: 67566
loss: 1.0045092105865479,grad_norm: 0.9999992023100107, iteration: 67567
loss: 0.9876940250396729,grad_norm: 0.9170149035313484, iteration: 67568
loss: 1.021262764930725,grad_norm: 0.9999992108621535, iteration: 67569
loss: 1.0027246475219727,grad_norm: 0.9815578222846175, iteration: 67570
loss: 1.0624606609344482,grad_norm: 0.999999930258508, iteration: 67571
loss: 0.9866743683815002,grad_norm: 0.9647823542731844, iteration: 67572
loss: 0.9698920249938965,grad_norm: 0.7668697689239343, iteration: 67573
loss: 1.0221526622772217,grad_norm: 0.9113418613510084, iteration: 67574
loss: 1.0087116956710815,grad_norm: 0.9971364985097226, iteration: 67575
loss: 0.9944215416908264,grad_norm: 0.9801681565941602, iteration: 67576
loss: 0.9860559701919556,grad_norm: 0.8809292648421169, iteration: 67577
loss: 1.019509196281433,grad_norm: 0.9333500663741304, iteration: 67578
loss: 1.0021544694900513,grad_norm: 0.9926738852845813, iteration: 67579
loss: 0.9575431942939758,grad_norm: 0.8263298238797333, iteration: 67580
loss: 0.9718536138534546,grad_norm: 0.999999222356805, iteration: 67581
loss: 1.0171743631362915,grad_norm: 0.9999996189890146, iteration: 67582
loss: 1.0026906728744507,grad_norm: 0.9999996686400449, iteration: 67583
loss: 0.9971668720245361,grad_norm: 0.8290792500201167, iteration: 67584
loss: 1.044373631477356,grad_norm: 0.9999991379067968, iteration: 67585
loss: 1.0067132711410522,grad_norm: 0.95189792183619, iteration: 67586
loss: 1.0340533256530762,grad_norm: 0.9999994718963987, iteration: 67587
loss: 0.9842709898948669,grad_norm: 0.9093006830156957, iteration: 67588
loss: 0.9974654912948608,grad_norm: 0.9999991265080983, iteration: 67589
loss: 0.983284592628479,grad_norm: 0.9131334177480382, iteration: 67590
loss: 1.0303936004638672,grad_norm: 0.9999991978698304, iteration: 67591
loss: 1.043545126914978,grad_norm: 0.9999992382745737, iteration: 67592
loss: 1.0067946910858154,grad_norm: 0.9999990347102787, iteration: 67593
loss: 0.9979574680328369,grad_norm: 0.9999997432197926, iteration: 67594
loss: 0.9943902492523193,grad_norm: 0.9618499258605059, iteration: 67595
loss: 1.042239785194397,grad_norm: 0.9999990085220211, iteration: 67596
loss: 0.9780194163322449,grad_norm: 0.8505344216773387, iteration: 67597
loss: 1.0201466083526611,grad_norm: 0.8479735610020608, iteration: 67598
loss: 0.9792193174362183,grad_norm: 0.8672601252077087, iteration: 67599
loss: 1.0168731212615967,grad_norm: 0.9999993904410468, iteration: 67600
loss: 1.0823673009872437,grad_norm: 0.9999993783159992, iteration: 67601
loss: 0.999821126461029,grad_norm: 0.9527096365662503, iteration: 67602
loss: 0.9892183542251587,grad_norm: 0.8180472949649882, iteration: 67603
loss: 0.9993636608123779,grad_norm: 0.963160668277008, iteration: 67604
loss: 1.0143113136291504,grad_norm: 0.9862559833641518, iteration: 67605
loss: 1.0191079378128052,grad_norm: 0.8609777835517971, iteration: 67606
loss: 1.0629103183746338,grad_norm: 0.9231778527411371, iteration: 67607
loss: 1.0266083478927612,grad_norm: 0.8517240954526759, iteration: 67608
loss: 1.0133920907974243,grad_norm: 0.8019084382389757, iteration: 67609
loss: 1.0292787551879883,grad_norm: 0.9443829270763224, iteration: 67610
loss: 1.0106099843978882,grad_norm: 0.9999994536954184, iteration: 67611
loss: 1.0929547548294067,grad_norm: 0.9999997945162932, iteration: 67612
loss: 0.9870303273200989,grad_norm: 0.9704414894547919, iteration: 67613
loss: 1.040549874305725,grad_norm: 0.9217776529729109, iteration: 67614
loss: 1.0055450201034546,grad_norm: 0.8887317280223448, iteration: 67615
loss: 1.0097273588180542,grad_norm: 0.8060752086515718, iteration: 67616
loss: 1.0231910943984985,grad_norm: 0.9944319680637648, iteration: 67617
loss: 0.9955171942710876,grad_norm: 0.9573889713657718, iteration: 67618
loss: 1.0099300146102905,grad_norm: 0.931127725668044, iteration: 67619
loss: 0.9975491762161255,grad_norm: 0.8421823575082588, iteration: 67620
loss: 0.977450966835022,grad_norm: 0.9494059956028701, iteration: 67621
loss: 1.0313540697097778,grad_norm: 0.9999991312123228, iteration: 67622
loss: 0.981621265411377,grad_norm: 0.9108839363527124, iteration: 67623
loss: 0.9849252104759216,grad_norm: 0.9999990405870851, iteration: 67624
loss: 0.9882543087005615,grad_norm: 0.7468706865778344, iteration: 67625
loss: 0.9904628992080688,grad_norm: 0.9999991313227046, iteration: 67626
loss: 0.9966518878936768,grad_norm: 0.9697529442106549, iteration: 67627
loss: 0.9890986084938049,grad_norm: 0.834286579531764, iteration: 67628
loss: 1.0214096307754517,grad_norm: 0.9372804083603806, iteration: 67629
loss: 0.9917569756507874,grad_norm: 0.9316008784457323, iteration: 67630
loss: 0.9789482951164246,grad_norm: 0.9999989574481961, iteration: 67631
loss: 0.9990750551223755,grad_norm: 0.9999992547722486, iteration: 67632
loss: 1.0126712322235107,grad_norm: 0.9999990903099439, iteration: 67633
loss: 0.9858936667442322,grad_norm: 0.927391036139922, iteration: 67634
loss: 1.042155146598816,grad_norm: 0.9999992689118916, iteration: 67635
loss: 0.992423951625824,grad_norm: 0.9999992781689695, iteration: 67636
loss: 0.9817125797271729,grad_norm: 0.7772761899272026, iteration: 67637
loss: 1.0032435655593872,grad_norm: 0.9960318306543015, iteration: 67638
loss: 1.0186676979064941,grad_norm: 0.9999990497359995, iteration: 67639
loss: 1.0119009017944336,grad_norm: 0.9999995951329355, iteration: 67640
loss: 1.079635500907898,grad_norm: 0.9999993978037308, iteration: 67641
loss: 0.9890591502189636,grad_norm: 0.9975646198949493, iteration: 67642
loss: 0.9931644201278687,grad_norm: 0.9551762750514078, iteration: 67643
loss: 1.023437738418579,grad_norm: 0.8750103294041645, iteration: 67644
loss: 0.9772839546203613,grad_norm: 0.9999991681455526, iteration: 67645
loss: 0.9920539259910583,grad_norm: 0.9999991519412804, iteration: 67646
loss: 0.9918915033340454,grad_norm: 0.9999991944340499, iteration: 67647
loss: 0.9831883907318115,grad_norm: 0.8405281362839397, iteration: 67648
loss: 0.9700471758842468,grad_norm: 0.9999994654397908, iteration: 67649
loss: 0.9901567101478577,grad_norm: 0.9552069870102315, iteration: 67650
loss: 1.064721703529358,grad_norm: 0.9999991315656028, iteration: 67651
loss: 1.050358533859253,grad_norm: 0.9999991631966787, iteration: 67652
loss: 0.9465942978858948,grad_norm: 0.8164755220726043, iteration: 67653
loss: 1.0249576568603516,grad_norm: 0.9999991041463213, iteration: 67654
loss: 0.9922757148742676,grad_norm: 0.9999991251826142, iteration: 67655
loss: 0.97065669298172,grad_norm: 0.8665535141792056, iteration: 67656
loss: 0.9825956225395203,grad_norm: 0.9999991893892842, iteration: 67657
loss: 1.0093317031860352,grad_norm: 0.9999991084680638, iteration: 67658
loss: 1.0162744522094727,grad_norm: 0.8936349708565304, iteration: 67659
loss: 0.9772073030471802,grad_norm: 0.989287664026865, iteration: 67660
loss: 0.9995554089546204,grad_norm: 0.95819077679321, iteration: 67661
loss: 0.9985599517822266,grad_norm: 0.9801995508344206, iteration: 67662
loss: 0.9956199526786804,grad_norm: 0.9184422359430046, iteration: 67663
loss: 1.0105587244033813,grad_norm: 0.8145972661563835, iteration: 67664
loss: 1.0357139110565186,grad_norm: 0.9344255870961591, iteration: 67665
loss: 0.9951155185699463,grad_norm: 0.9557825525833731, iteration: 67666
loss: 0.9721567034721375,grad_norm: 0.9284259405616226, iteration: 67667
loss: 0.9721114635467529,grad_norm: 0.9999989753240875, iteration: 67668
loss: 1.0442734956741333,grad_norm: 0.9999998377717219, iteration: 67669
loss: 1.0150516033172607,grad_norm: 0.9029158424229667, iteration: 67670
loss: 1.008247971534729,grad_norm: 0.924522370962756, iteration: 67671
loss: 0.9815406203269958,grad_norm: 0.9999991866639991, iteration: 67672
loss: 0.9491645693778992,grad_norm: 0.9629753331933942, iteration: 67673
loss: 0.9736526608467102,grad_norm: 0.8166705252449205, iteration: 67674
loss: 1.0525935888290405,grad_norm: 0.8603636723880597, iteration: 67675
loss: 1.0312126874923706,grad_norm: 0.896268452818935, iteration: 67676
loss: 1.0551992654800415,grad_norm: 0.9999995941561136, iteration: 67677
loss: 0.9311911463737488,grad_norm: 0.9895787960224737, iteration: 67678
loss: 0.9716011881828308,grad_norm: 0.8429843552863405, iteration: 67679
loss: 1.0030276775360107,grad_norm: 0.8840305850212209, iteration: 67680
loss: 1.0434199571609497,grad_norm: 0.9999996100423353, iteration: 67681
loss: 1.0053199529647827,grad_norm: 0.999999463009239, iteration: 67682
loss: 1.0091800689697266,grad_norm: 0.8968993341405455, iteration: 67683
loss: 0.9966428875923157,grad_norm: 0.8314751369848993, iteration: 67684
loss: 1.0171818733215332,grad_norm: 0.8861994179400338, iteration: 67685
loss: 1.0105702877044678,grad_norm: 0.7985347992752306, iteration: 67686
loss: 1.00132417678833,grad_norm: 0.9049391213810442, iteration: 67687
loss: 1.0098942518234253,grad_norm: 0.8822803259540413, iteration: 67688
loss: 0.99814373254776,grad_norm: 0.9823914235936951, iteration: 67689
loss: 1.0032174587249756,grad_norm: 0.9518603589625851, iteration: 67690
loss: 0.9438925981521606,grad_norm: 0.8140111303246336, iteration: 67691
loss: 1.0593630075454712,grad_norm: 0.9239658316577298, iteration: 67692
loss: 0.984515905380249,grad_norm: 0.8588896797282951, iteration: 67693
loss: 1.0190998315811157,grad_norm: 0.9090574360665931, iteration: 67694
loss: 0.9691822528839111,grad_norm: 0.9999993491157452, iteration: 67695
loss: 1.07929265499115,grad_norm: 0.999999614724735, iteration: 67696
loss: 0.9969522953033447,grad_norm: 0.9169099266216302, iteration: 67697
loss: 1.0052893161773682,grad_norm: 0.9085512010002409, iteration: 67698
loss: 0.9808319807052612,grad_norm: 0.9521535858941846, iteration: 67699
loss: 0.9995681643486023,grad_norm: 0.9182732088995468, iteration: 67700
loss: 0.9743754863739014,grad_norm: 0.9999993272546854, iteration: 67701
loss: 0.9720205068588257,grad_norm: 0.743136905110641, iteration: 67702
loss: 1.0165578126907349,grad_norm: 0.9409643815282065, iteration: 67703
loss: 0.9965904951095581,grad_norm: 0.9999990048869415, iteration: 67704
loss: 1.0826219320297241,grad_norm: 0.8292376671373437, iteration: 67705
loss: 0.9737109541893005,grad_norm: 0.999999103796536, iteration: 67706
loss: 1.0112977027893066,grad_norm: 0.9249028252262517, iteration: 67707
loss: 1.0520108938217163,grad_norm: 0.8759096428807361, iteration: 67708
loss: 1.0039640665054321,grad_norm: 0.9999995469718611, iteration: 67709
loss: 1.040911316871643,grad_norm: 0.999999529943161, iteration: 67710
loss: 1.0402106046676636,grad_norm: 0.8340660684784474, iteration: 67711
loss: 0.9284006953239441,grad_norm: 0.9094573850130302, iteration: 67712
loss: 1.0680058002471924,grad_norm: 0.9999990835544197, iteration: 67713
loss: 0.9780784845352173,grad_norm: 0.9206928002012024, iteration: 67714
loss: 0.9728960990905762,grad_norm: 0.8260624860163229, iteration: 67715
loss: 1.015749454498291,grad_norm: 0.9142665669654888, iteration: 67716
loss: 0.9851915240287781,grad_norm: 0.8156819487678743, iteration: 67717
loss: 1.0113762617111206,grad_norm: 0.8549165085140085, iteration: 67718
loss: 1.0255414247512817,grad_norm: 0.9999992839370898, iteration: 67719
loss: 1.0248667001724243,grad_norm: 0.9188146449143908, iteration: 67720
loss: 1.0241992473602295,grad_norm: 0.9088138826804122, iteration: 67721
loss: 1.0307377576828003,grad_norm: 0.8805336308907848, iteration: 67722
loss: 1.0263844728469849,grad_norm: 0.8755861574597176, iteration: 67723
loss: 1.0055614709854126,grad_norm: 0.8843242580550597, iteration: 67724
loss: 1.064103126525879,grad_norm: 0.9999999077283308, iteration: 67725
loss: 0.9919779896736145,grad_norm: 0.9999990474195823, iteration: 67726
loss: 0.9853343367576599,grad_norm: 0.766042275678968, iteration: 67727
loss: 0.9949578046798706,grad_norm: 0.9196406297218068, iteration: 67728
loss: 0.9931784868240356,grad_norm: 0.999999243026572, iteration: 67729
loss: 0.9867597222328186,grad_norm: 0.8688516662856184, iteration: 67730
loss: 0.9963041543960571,grad_norm: 0.9999991290881883, iteration: 67731
loss: 1.0225120782852173,grad_norm: 0.9999993051051719, iteration: 67732
loss: 0.9726272821426392,grad_norm: 0.9148960842525387, iteration: 67733
loss: 0.9955282211303711,grad_norm: 0.8346698453851025, iteration: 67734
loss: 1.018066167831421,grad_norm: 0.9843021836958961, iteration: 67735
loss: 1.0009421110153198,grad_norm: 0.8957315029893957, iteration: 67736
loss: 0.994094729423523,grad_norm: 0.9753442265685559, iteration: 67737
loss: 0.9892515540122986,grad_norm: 0.803825661112055, iteration: 67738
loss: 1.0069419145584106,grad_norm: 0.9838743906930202, iteration: 67739
loss: 1.0243124961853027,grad_norm: 0.9999995101139636, iteration: 67740
loss: 1.016940951347351,grad_norm: 0.8135214327717695, iteration: 67741
loss: 1.0046992301940918,grad_norm: 0.7715825905916881, iteration: 67742
loss: 1.0276919603347778,grad_norm: 0.9999989800635426, iteration: 67743
loss: 1.1260418891906738,grad_norm: 0.9999998277368285, iteration: 67744
loss: 1.0132806301116943,grad_norm: 0.9999996121435296, iteration: 67745
loss: 0.9643175005912781,grad_norm: 0.964743854618423, iteration: 67746
loss: 0.9866108298301697,grad_norm: 0.8534918996296009, iteration: 67747
loss: 1.022066593170166,grad_norm: 0.999998963858983, iteration: 67748
loss: 1.0237590074539185,grad_norm: 0.9076112310046897, iteration: 67749
loss: 0.9959262013435364,grad_norm: 0.9999990750266426, iteration: 67750
loss: 1.028403401374817,grad_norm: 0.9681269642332625, iteration: 67751
loss: 1.0158034563064575,grad_norm: 0.9999993333809596, iteration: 67752
loss: 1.0084282159805298,grad_norm: 0.9999992877941521, iteration: 67753
loss: 0.9965204000473022,grad_norm: 0.9999991474421631, iteration: 67754
loss: 1.0176715850830078,grad_norm: 0.8154199509454761, iteration: 67755
loss: 1.0200624465942383,grad_norm: 0.7895942308889089, iteration: 67756
loss: 1.1271523237228394,grad_norm: 0.9999992269264224, iteration: 67757
loss: 1.0021589994430542,grad_norm: 0.8315234419103891, iteration: 67758
loss: 1.0282151699066162,grad_norm: 0.9999993628757188, iteration: 67759
loss: 1.03653883934021,grad_norm: 0.9902522212306776, iteration: 67760
loss: 1.0304169654846191,grad_norm: 0.9999991525566653, iteration: 67761
loss: 0.9904003739356995,grad_norm: 0.9963447143266922, iteration: 67762
loss: 1.0250908136367798,grad_norm: 0.8802988567191559, iteration: 67763
loss: 0.9889683723449707,grad_norm: 0.8604812975900767, iteration: 67764
loss: 0.9955059885978699,grad_norm: 0.9631293369992057, iteration: 67765
loss: 1.0283899307250977,grad_norm: 0.7463537167318391, iteration: 67766
loss: 0.9809874892234802,grad_norm: 0.9999991341671336, iteration: 67767
loss: 0.9798811078071594,grad_norm: 0.8438785913587687, iteration: 67768
loss: 0.9794824123382568,grad_norm: 0.9648034064878152, iteration: 67769
loss: 1.046869158744812,grad_norm: 0.8519530887215935, iteration: 67770
loss: 1.0764837265014648,grad_norm: 0.9999993044650265, iteration: 67771
loss: 1.0866179466247559,grad_norm: 0.9608464784116412, iteration: 67772
loss: 0.9928299188613892,grad_norm: 0.9607628191522101, iteration: 67773
loss: 1.0304065942764282,grad_norm: 0.9999999391480043, iteration: 67774
loss: 0.9600385427474976,grad_norm: 0.9999992363440242, iteration: 67775
loss: 1.0069434642791748,grad_norm: 0.9999991084201413, iteration: 67776
loss: 0.9611433744430542,grad_norm: 0.8728342402112486, iteration: 67777
loss: 1.0834459066390991,grad_norm: 0.9999995541698757, iteration: 67778
loss: 0.9971956014633179,grad_norm: 0.9871910918267611, iteration: 67779
loss: 1.0126304626464844,grad_norm: 0.9999996141220744, iteration: 67780
loss: 1.0152620077133179,grad_norm: 0.9608581939484456, iteration: 67781
loss: 1.0323537588119507,grad_norm: 0.9999992897792019, iteration: 67782
loss: 1.021767497062683,grad_norm: 0.9762640990124273, iteration: 67783
loss: 1.0322635173797607,grad_norm: 0.9403940071568302, iteration: 67784
loss: 0.9798034429550171,grad_norm: 0.9999991090939433, iteration: 67785
loss: 1.0174020528793335,grad_norm: 0.9999993013942977, iteration: 67786
loss: 1.027366042137146,grad_norm: 0.9807867655988752, iteration: 67787
loss: 1.0039044618606567,grad_norm: 0.9672578543212209, iteration: 67788
loss: 1.004400610923767,grad_norm: 0.9999991603541507, iteration: 67789
loss: 1.018257975578308,grad_norm: 0.9999990330501741, iteration: 67790
loss: 0.9860048294067383,grad_norm: 0.8924992589044122, iteration: 67791
loss: 0.9836889505386353,grad_norm: 0.8112502384289768, iteration: 67792
loss: 0.9821364283561707,grad_norm: 0.9999990900079497, iteration: 67793
loss: 1.0479220151901245,grad_norm: 0.9999994205744765, iteration: 67794
loss: 1.1009464263916016,grad_norm: 0.8555224638332539, iteration: 67795
loss: 0.9984952807426453,grad_norm: 0.8067539362608834, iteration: 67796
loss: 0.9653285145759583,grad_norm: 0.9999991069981456, iteration: 67797
loss: 1.0306960344314575,grad_norm: 0.8207763231142577, iteration: 67798
loss: 0.9958916902542114,grad_norm: 0.9164076748966402, iteration: 67799
loss: 1.023891568183899,grad_norm: 0.9999992775553049, iteration: 67800
loss: 1.003289818763733,grad_norm: 0.9999992019714776, iteration: 67801
loss: 1.0157774686813354,grad_norm: 0.887054660626214, iteration: 67802
loss: 1.0396465063095093,grad_norm: 0.9999991206615855, iteration: 67803
loss: 0.9884065389633179,grad_norm: 0.9082286551061639, iteration: 67804
loss: 1.0221056938171387,grad_norm: 0.9999990926275396, iteration: 67805
loss: 1.1224185228347778,grad_norm: 0.9351967407414329, iteration: 67806
loss: 1.0223000049591064,grad_norm: 0.9999990092071113, iteration: 67807
loss: 1.0383501052856445,grad_norm: 0.9999992711298009, iteration: 67808
loss: 1.0073002576828003,grad_norm: 0.9390652581032781, iteration: 67809
loss: 0.9991604685783386,grad_norm: 0.8508406375056532, iteration: 67810
loss: 1.0028685331344604,grad_norm: 0.9773502382085799, iteration: 67811
loss: 1.0380864143371582,grad_norm: 0.9999996245223743, iteration: 67812
loss: 1.0298309326171875,grad_norm: 0.9999998443056808, iteration: 67813
loss: 1.006687879562378,grad_norm: 0.9999992464518217, iteration: 67814
loss: 0.9743671417236328,grad_norm: 0.9999991924585792, iteration: 67815
loss: 0.992014467716217,grad_norm: 0.9999992003171386, iteration: 67816
loss: 0.9987456798553467,grad_norm: 0.9313147930340782, iteration: 67817
loss: 1.01002037525177,grad_norm: 0.7584772250009812, iteration: 67818
loss: 0.9994341731071472,grad_norm: 0.9663886926713329, iteration: 67819
loss: 0.9839463829994202,grad_norm: 0.9014305977563524, iteration: 67820
loss: 1.047080636024475,grad_norm: 0.9777483921082059, iteration: 67821
loss: 0.9698173403739929,grad_norm: 0.9999991076646232, iteration: 67822
loss: 1.0536712408065796,grad_norm: 0.9999998596253532, iteration: 67823
loss: 1.0023397207260132,grad_norm: 0.9410502823064392, iteration: 67824
loss: 0.9920119643211365,grad_norm: 0.9999990220306937, iteration: 67825
loss: 1.03346848487854,grad_norm: 0.9999991267759186, iteration: 67826
loss: 0.9940134286880493,grad_norm: 0.9384856565934282, iteration: 67827
loss: 0.9965716004371643,grad_norm: 0.9588358573042283, iteration: 67828
loss: 1.0115593671798706,grad_norm: 0.8687955207064262, iteration: 67829
loss: 1.023895502090454,grad_norm: 0.9999991666201685, iteration: 67830
loss: 1.009562611579895,grad_norm: 0.9999991101012541, iteration: 67831
loss: 1.0106759071350098,grad_norm: 0.9999992438183052, iteration: 67832
loss: 0.9980988502502441,grad_norm: 0.9999990108143479, iteration: 67833
loss: 1.0176348686218262,grad_norm: 0.9820924836144039, iteration: 67834
loss: 1.0730769634246826,grad_norm: 0.9999995569483785, iteration: 67835
loss: 1.0060324668884277,grad_norm: 0.8375165899266966, iteration: 67836
loss: 1.0346564054489136,grad_norm: 0.9999992591013522, iteration: 67837
loss: 1.032774567604065,grad_norm: 0.9999998898236455, iteration: 67838
loss: 0.9640350341796875,grad_norm: 0.885563777780416, iteration: 67839
loss: 0.9504270553588867,grad_norm: 0.8393778070765964, iteration: 67840
loss: 1.0162442922592163,grad_norm: 0.9999993761755119, iteration: 67841
loss: 1.0100321769714355,grad_norm: 0.8894865253592339, iteration: 67842
loss: 0.9881682395935059,grad_norm: 0.999999069269237, iteration: 67843
loss: 1.0035268068313599,grad_norm: 0.8847517262218685, iteration: 67844
loss: 0.9918238520622253,grad_norm: 0.9253426589950045, iteration: 67845
loss: 1.0255835056304932,grad_norm: 0.9795591109748828, iteration: 67846
loss: 1.0446150302886963,grad_norm: 0.8904541331163407, iteration: 67847
loss: 0.9637360572814941,grad_norm: 0.9835356704113946, iteration: 67848
loss: 1.0204607248306274,grad_norm: 0.9999995301030358, iteration: 67849
loss: 1.0175797939300537,grad_norm: 0.9999991968578172, iteration: 67850
loss: 0.9978670477867126,grad_norm: 0.999999060947885, iteration: 67851
loss: 0.9922983050346375,grad_norm: 0.9403406163400945, iteration: 67852
loss: 0.9976218342781067,grad_norm: 0.9038087031522701, iteration: 67853
loss: 0.9951547384262085,grad_norm: 0.999999062473569, iteration: 67854
loss: 1.0334117412567139,grad_norm: 0.8660596457274986, iteration: 67855
loss: 0.9940770268440247,grad_norm: 0.9600506362947889, iteration: 67856
loss: 1.016319990158081,grad_norm: 0.9137536095824542, iteration: 67857
loss: 1.0207037925720215,grad_norm: 0.9999991177583482, iteration: 67858
loss: 1.0023128986358643,grad_norm: 0.9999991940888497, iteration: 67859
loss: 0.9976783394813538,grad_norm: 0.9999990686472251, iteration: 67860
loss: 0.9974283576011658,grad_norm: 0.8563790899392986, iteration: 67861
loss: 0.9923923015594482,grad_norm: 0.7644758198583301, iteration: 67862
loss: 0.9905978441238403,grad_norm: 0.9999995027296463, iteration: 67863
loss: 0.9991422891616821,grad_norm: 0.9999992363283758, iteration: 67864
loss: 1.0169975757598877,grad_norm: 0.9999990428439685, iteration: 67865
loss: 0.9718449711799622,grad_norm: 0.9231734232981337, iteration: 67866
loss: 1.0171808004379272,grad_norm: 0.9999996155205467, iteration: 67867
loss: 1.0094518661499023,grad_norm: 0.7075514918317178, iteration: 67868
loss: 0.9715763926506042,grad_norm: 0.9107306663428623, iteration: 67869
loss: 1.0155373811721802,grad_norm: 0.8858544808231869, iteration: 67870
loss: 1.0059527158737183,grad_norm: 0.9526057943872666, iteration: 67871
loss: 0.9895778298377991,grad_norm: 0.7825404315676022, iteration: 67872
loss: 0.9703497290611267,grad_norm: 0.9999991365861817, iteration: 67873
loss: 0.9959171414375305,grad_norm: 0.7243972174926978, iteration: 67874
loss: 0.9991123676300049,grad_norm: 0.9561725320603983, iteration: 67875
loss: 0.9822297096252441,grad_norm: 0.9999991398650796, iteration: 67876
loss: 1.0223414897918701,grad_norm: 0.8607781332671759, iteration: 67877
loss: 0.9977287650108337,grad_norm: 0.880557703949532, iteration: 67878
loss: 0.9890815019607544,grad_norm: 0.9999991939204831, iteration: 67879
loss: 1.0004751682281494,grad_norm: 0.8089904361586329, iteration: 67880
loss: 1.029714584350586,grad_norm: 0.9999990493256748, iteration: 67881
loss: 1.0252742767333984,grad_norm: 0.97992802185568, iteration: 67882
loss: 1.03264319896698,grad_norm: 0.9999991519861219, iteration: 67883
loss: 1.0059200525283813,grad_norm: 0.9999992304714724, iteration: 67884
loss: 1.0261396169662476,grad_norm: 0.9999990307302405, iteration: 67885
loss: 1.0098313093185425,grad_norm: 0.7770051325745231, iteration: 67886
loss: 0.996486246585846,grad_norm: 0.9999991029290453, iteration: 67887
loss: 1.0435669422149658,grad_norm: 0.8722214382191891, iteration: 67888
loss: 0.9991635084152222,grad_norm: 0.9999989267608976, iteration: 67889
loss: 1.0784026384353638,grad_norm: 0.999999249694383, iteration: 67890
loss: 0.9716153740882874,grad_norm: 0.8996574418040123, iteration: 67891
loss: 0.9960850477218628,grad_norm: 0.9999991312507915, iteration: 67892
loss: 0.9579356908798218,grad_norm: 0.8881887648248307, iteration: 67893
loss: 1.0060267448425293,grad_norm: 0.8701383232851558, iteration: 67894
loss: 1.0313397645950317,grad_norm: 0.9236002795046405, iteration: 67895
loss: 1.0089012384414673,grad_norm: 0.9999991292130759, iteration: 67896
loss: 1.0176063776016235,grad_norm: 0.9325257805356727, iteration: 67897
loss: 1.1657392978668213,grad_norm: 0.9999992617083376, iteration: 67898
loss: 0.9765263795852661,grad_norm: 0.9564866245462359, iteration: 67899
loss: 1.033467411994934,grad_norm: 0.999999871644579, iteration: 67900
loss: 0.9644497632980347,grad_norm: 0.8634951670438297, iteration: 67901
loss: 0.9850975275039673,grad_norm: 0.9999992218388206, iteration: 67902
loss: 0.9999813437461853,grad_norm: 0.9999990379448773, iteration: 67903
loss: 1.0334556102752686,grad_norm: 0.9064080861346441, iteration: 67904
loss: 0.9906294345855713,grad_norm: 0.9522400540393315, iteration: 67905
loss: 1.0779696702957153,grad_norm: 0.9999992315871847, iteration: 67906
loss: 1.025857925415039,grad_norm: 0.8819528220970634, iteration: 67907
loss: 1.032277226448059,grad_norm: 0.8660825488535067, iteration: 67908
loss: 1.033208966255188,grad_norm: 0.99999903136394, iteration: 67909
loss: 1.0529166460037231,grad_norm: 0.9999999023566628, iteration: 67910
loss: 1.0332837104797363,grad_norm: 0.9999991609731145, iteration: 67911
loss: 0.989939272403717,grad_norm: 0.9999996755259049, iteration: 67912
loss: 1.057045578956604,grad_norm: 0.9999998075861216, iteration: 67913
loss: 1.0061280727386475,grad_norm: 0.9091921675970887, iteration: 67914
loss: 1.0109508037567139,grad_norm: 0.9033365274495234, iteration: 67915
loss: 1.0351693630218506,grad_norm: 0.9999991934840208, iteration: 67916
loss: 0.9883358478546143,grad_norm: 0.9999991419225251, iteration: 67917
loss: 1.019370436668396,grad_norm: 0.8324135807028465, iteration: 67918
loss: 1.0039126873016357,grad_norm: 0.9460437094059835, iteration: 67919
loss: 0.9981378316879272,grad_norm: 0.940620879905022, iteration: 67920
loss: 1.0165116786956787,grad_norm: 0.9999993727999441, iteration: 67921
loss: 0.9640083312988281,grad_norm: 0.9927177124349963, iteration: 67922
loss: 1.0029033422470093,grad_norm: 0.9999992449533992, iteration: 67923
loss: 1.033188819885254,grad_norm: 0.9636535608132111, iteration: 67924
loss: 0.9797836542129517,grad_norm: 0.9513217496603644, iteration: 67925
loss: 1.010063648223877,grad_norm: 0.9829525346111694, iteration: 67926
loss: 0.9823586344718933,grad_norm: 0.876215274866598, iteration: 67927
loss: 0.9976392984390259,grad_norm: 0.9814041543374764, iteration: 67928
loss: 1.0133486986160278,grad_norm: 0.9999990593676072, iteration: 67929
loss: 1.0724945068359375,grad_norm: 0.9544513130512479, iteration: 67930
loss: 1.0305230617523193,grad_norm: 0.9999995841186754, iteration: 67931
loss: 0.9853092432022095,grad_norm: 0.7978813811829629, iteration: 67932
loss: 1.0051957368850708,grad_norm: 0.9999993774237563, iteration: 67933
loss: 0.9800684452056885,grad_norm: 0.9057237406584867, iteration: 67934
loss: 0.982260525226593,grad_norm: 0.935581259353703, iteration: 67935
loss: 1.0069701671600342,grad_norm: 0.9999990057050391, iteration: 67936
loss: 1.0041248798370361,grad_norm: 0.99999949114333, iteration: 67937
loss: 1.0136291980743408,grad_norm: 0.9999992251456411, iteration: 67938
loss: 1.0140835046768188,grad_norm: 0.9999990281682307, iteration: 67939
loss: 1.027846097946167,grad_norm: 0.8528676826730324, iteration: 67940
loss: 1.0745772123336792,grad_norm: 0.9999994070245622, iteration: 67941
loss: 1.0266941785812378,grad_norm: 0.8385600599837131, iteration: 67942
loss: 1.0298465490341187,grad_norm: 0.9999990683010227, iteration: 67943
loss: 1.009619116783142,grad_norm: 0.856340862544082, iteration: 67944
loss: 0.9995223879814148,grad_norm: 0.8879112861128418, iteration: 67945
loss: 0.9717919826507568,grad_norm: 0.9999991390302855, iteration: 67946
loss: 1.0446555614471436,grad_norm: 0.999999082423732, iteration: 67947
loss: 1.09539794921875,grad_norm: 0.9999997718555933, iteration: 67948
loss: 1.0368382930755615,grad_norm: 0.9999991456714118, iteration: 67949
loss: 0.9802765846252441,grad_norm: 0.8697298034338303, iteration: 67950
loss: 0.9796068072319031,grad_norm: 0.9517231434039978, iteration: 67951
loss: 0.9813709259033203,grad_norm: 0.9762749337466162, iteration: 67952
loss: 1.0160998106002808,grad_norm: 0.9999992213401152, iteration: 67953
loss: 0.9499626159667969,grad_norm: 0.8959031885503138, iteration: 67954
loss: 1.0068045854568481,grad_norm: 0.8076127934392588, iteration: 67955
loss: 1.0034857988357544,grad_norm: 0.9999991871795082, iteration: 67956
loss: 1.0551955699920654,grad_norm: 0.9841196992286277, iteration: 67957
loss: 1.0082275867462158,grad_norm: 0.9999990319884544, iteration: 67958
loss: 0.9929550886154175,grad_norm: 0.888562292488039, iteration: 67959
loss: 1.0104820728302002,grad_norm: 0.944917716011434, iteration: 67960
loss: 1.0483174324035645,grad_norm: 0.8686202604629528, iteration: 67961
loss: 1.0558290481567383,grad_norm: 0.9999994505176643, iteration: 67962
loss: 1.0205652713775635,grad_norm: 0.9999998102753253, iteration: 67963
loss: 1.0004225969314575,grad_norm: 0.9506299016030614, iteration: 67964
loss: 1.0360703468322754,grad_norm: 0.9999991146338484, iteration: 67965
loss: 1.0204017162322998,grad_norm: 0.9999998419286151, iteration: 67966
loss: 1.0400789976119995,grad_norm: 0.9999995340857963, iteration: 67967
loss: 1.010238766670227,grad_norm: 0.9102702658617637, iteration: 67968
loss: 0.9952608942985535,grad_norm: 0.9999990476401891, iteration: 67969
loss: 1.0126436948776245,grad_norm: 0.9999990326818176, iteration: 67970
loss: 1.001783847808838,grad_norm: 0.8993683273323265, iteration: 67971
loss: 0.9889845848083496,grad_norm: 0.9406002902645595, iteration: 67972
loss: 1.0012173652648926,grad_norm: 0.9979364562633809, iteration: 67973
loss: 1.1146472692489624,grad_norm: 0.9884408420512197, iteration: 67974
loss: 0.9703142642974854,grad_norm: 0.8893142701675472, iteration: 67975
loss: 1.01189386844635,grad_norm: 0.8237917031682902, iteration: 67976
loss: 0.9952438473701477,grad_norm: 0.7851529849732046, iteration: 67977
loss: 1.0073035955429077,grad_norm: 0.8940980954037783, iteration: 67978
loss: 1.0155783891677856,grad_norm: 0.8910515189481332, iteration: 67979
loss: 0.9734392166137695,grad_norm: 0.8697622638342596, iteration: 67980
loss: 1.0177558660507202,grad_norm: 0.9963374580055513, iteration: 67981
loss: 0.9651356339454651,grad_norm: 0.9718071996680916, iteration: 67982
loss: 0.9867036938667297,grad_norm: 0.960385716243657, iteration: 67983
loss: 0.9976551532745361,grad_norm: 0.9999991067970788, iteration: 67984
loss: 1.0009703636169434,grad_norm: 0.8807883331282125, iteration: 67985
loss: 1.032907247543335,grad_norm: 0.9999991404420906, iteration: 67986
loss: 0.9919100999832153,grad_norm: 0.9923339802441026, iteration: 67987
loss: 1.0189309120178223,grad_norm: 0.9828213948355737, iteration: 67988
loss: 1.0178409814834595,grad_norm: 0.9999991947089963, iteration: 67989
loss: 1.0427721738815308,grad_norm: 0.9999990084554462, iteration: 67990
loss: 0.9784688949584961,grad_norm: 0.9725865366266433, iteration: 67991
loss: 1.0194826126098633,grad_norm: 0.8300135390869889, iteration: 67992
loss: 0.9886307120323181,grad_norm: 0.8228069660112663, iteration: 67993
loss: 1.0295273065567017,grad_norm: 1.0000000980321058, iteration: 67994
loss: 0.9797928333282471,grad_norm: 0.999999375893078, iteration: 67995
loss: 1.0071145296096802,grad_norm: 0.9999991083732268, iteration: 67996
loss: 1.0232586860656738,grad_norm: 0.9999990837978294, iteration: 67997
loss: 1.002265453338623,grad_norm: 0.7968010778799496, iteration: 67998
loss: 1.1518268585205078,grad_norm: 0.9999993854794117, iteration: 67999
loss: 1.0007274150848389,grad_norm: 0.7553860548282881, iteration: 68000
loss: 0.9886111617088318,grad_norm: 0.9999992014381307, iteration: 68001
loss: 0.9675939083099365,grad_norm: 0.8526541938203922, iteration: 68002
loss: 1.0192450284957886,grad_norm: 0.8364137810691921, iteration: 68003
loss: 0.974709153175354,grad_norm: 0.8613930951342446, iteration: 68004
loss: 1.0192947387695312,grad_norm: 0.8671361592487968, iteration: 68005
loss: 0.9916272163391113,grad_norm: 0.9580761604112621, iteration: 68006
loss: 0.9855148792266846,grad_norm: 0.9208027688157785, iteration: 68007
loss: 1.009881854057312,grad_norm: 0.9999992090221548, iteration: 68008
loss: 1.0222054719924927,grad_norm: 0.9948528724964307, iteration: 68009
loss: 1.011401653289795,grad_norm: 0.9999993200631502, iteration: 68010
loss: 1.0551891326904297,grad_norm: 0.9999997020204063, iteration: 68011
loss: 0.9921998381614685,grad_norm: 0.9524640064694887, iteration: 68012
loss: 1.0107284784317017,grad_norm: 0.9999993044288177, iteration: 68013
loss: 0.9960559606552124,grad_norm: 0.9999992598540349, iteration: 68014
loss: 1.0227916240692139,grad_norm: 0.9703060232577674, iteration: 68015
loss: 0.9651814103126526,grad_norm: 0.9999990765599325, iteration: 68016
loss: 0.9614514708518982,grad_norm: 0.9999989770803185, iteration: 68017
loss: 1.0365948677062988,grad_norm: 0.9999990689852787, iteration: 68018
loss: 1.0203477144241333,grad_norm: 0.8259059152277844, iteration: 68019
loss: 1.0194647312164307,grad_norm: 0.9999999511308102, iteration: 68020
loss: 1.03136146068573,grad_norm: 0.762156142824262, iteration: 68021
loss: 0.9881852865219116,grad_norm: 0.8463705373479613, iteration: 68022
loss: 1.1105437278747559,grad_norm: 0.9999998918742263, iteration: 68023
loss: 0.9960205554962158,grad_norm: 0.9863973744064063, iteration: 68024
loss: 1.0536783933639526,grad_norm: 0.9600441294711322, iteration: 68025
loss: 1.0170100927352905,grad_norm: 0.9124956298617657, iteration: 68026
loss: 1.0876668691635132,grad_norm: 0.9999996363755368, iteration: 68027
loss: 0.9749336838722229,grad_norm: 0.9741022651293751, iteration: 68028
loss: 1.0130761861801147,grad_norm: 0.9999992369829126, iteration: 68029
loss: 1.0108938217163086,grad_norm: 0.9999990681224801, iteration: 68030
loss: 1.0305372476577759,grad_norm: 0.9784469871235428, iteration: 68031
loss: 0.9606080651283264,grad_norm: 0.9999990686320022, iteration: 68032
loss: 1.0042866468429565,grad_norm: 0.9999993428896284, iteration: 68033
loss: 0.9895997643470764,grad_norm: 0.9999991374609323, iteration: 68034
loss: 0.9905877113342285,grad_norm: 0.9695806047854894, iteration: 68035
loss: 1.0433624982833862,grad_norm: 0.8565335004388921, iteration: 68036
loss: 1.060652494430542,grad_norm: 0.9999995593299685, iteration: 68037
loss: 1.0238590240478516,grad_norm: 0.9877329164954326, iteration: 68038
loss: 0.9745094180107117,grad_norm: 0.9999993914239166, iteration: 68039
loss: 1.0046374797821045,grad_norm: 0.7461926523671405, iteration: 68040
loss: 0.9989606738090515,grad_norm: 0.9999990422807189, iteration: 68041
loss: 0.992300808429718,grad_norm: 0.9999991695458653, iteration: 68042
loss: 1.0971513986587524,grad_norm: 0.999999631742889, iteration: 68043
loss: 1.0013951063156128,grad_norm: 0.999999928328819, iteration: 68044
loss: 1.062233805656433,grad_norm: 0.9999994915252958, iteration: 68045
loss: 1.0176559686660767,grad_norm: 0.9999992438683197, iteration: 68046
loss: 0.9811719059944153,grad_norm: 0.9274333970427452, iteration: 68047
loss: 1.0443435907363892,grad_norm: 0.9999990268540082, iteration: 68048
loss: 0.9488371014595032,grad_norm: 0.83785169382279, iteration: 68049
loss: 0.9986834526062012,grad_norm: 0.9678653875889411, iteration: 68050
loss: 0.995326578617096,grad_norm: 0.9928719060513305, iteration: 68051
loss: 1.0186952352523804,grad_norm: 0.883762048145367, iteration: 68052
loss: 1.0161851644515991,grad_norm: 0.9999990619658063, iteration: 68053
loss: 1.016680121421814,grad_norm: 0.8658371273898248, iteration: 68054
loss: 0.9709102511405945,grad_norm: 0.8626891574996104, iteration: 68055
loss: 0.9590112566947937,grad_norm: 0.9999989866914996, iteration: 68056
loss: 1.0782197713851929,grad_norm: 0.9999991957533931, iteration: 68057
loss: 0.980182945728302,grad_norm: 0.9949762439993955, iteration: 68058
loss: 0.9796697497367859,grad_norm: 0.9904864951889812, iteration: 68059
loss: 1.019463062286377,grad_norm: 0.9325307159336644, iteration: 68060
loss: 1.0010367631912231,grad_norm: 0.9987401056090397, iteration: 68061
loss: 0.9795549511909485,grad_norm: 0.9999992281627736, iteration: 68062
loss: 0.9583655595779419,grad_norm: 0.9999993225722126, iteration: 68063
loss: 1.0070260763168335,grad_norm: 0.9081129659605993, iteration: 68064
loss: 1.0621753931045532,grad_norm: 0.9999995943805663, iteration: 68065
loss: 0.9997577667236328,grad_norm: 0.9999989569261016, iteration: 68066
loss: 0.9724456667900085,grad_norm: 0.8641582001518741, iteration: 68067
loss: 0.9752712845802307,grad_norm: 0.9615066125766724, iteration: 68068
loss: 0.9696387648582458,grad_norm: 0.7787411304438973, iteration: 68069
loss: 1.0363587141036987,grad_norm: 0.973706901900262, iteration: 68070
loss: 1.0353460311889648,grad_norm: 0.9999990544224087, iteration: 68071
loss: 0.9876258969306946,grad_norm: 0.8030281271392053, iteration: 68072
loss: 1.0256205797195435,grad_norm: 0.8188504367838667, iteration: 68073
loss: 0.9901130199432373,grad_norm: 0.9999991558924756, iteration: 68074
loss: 1.0362310409545898,grad_norm: 0.9999995976836127, iteration: 68075
loss: 0.985934317111969,grad_norm: 0.9999993386023023, iteration: 68076
loss: 0.9764944911003113,grad_norm: 0.999998952640252, iteration: 68077
loss: 0.9804104566574097,grad_norm: 0.9999991212715537, iteration: 68078
loss: 0.9979824423789978,grad_norm: 0.8603384376292265, iteration: 68079
loss: 1.0045828819274902,grad_norm: 0.8983077584307383, iteration: 68080
loss: 1.0282399654388428,grad_norm: 0.9999992384302101, iteration: 68081
loss: 1.0074082612991333,grad_norm: 0.9787097020383617, iteration: 68082
loss: 1.0210922956466675,grad_norm: 0.9893742246395776, iteration: 68083
loss: 1.0020630359649658,grad_norm: 0.9999994531208212, iteration: 68084
loss: 1.024261713027954,grad_norm: 0.9456258914502126, iteration: 68085
loss: 1.0501995086669922,grad_norm: 0.9999992308672113, iteration: 68086
loss: 1.0130988359451294,grad_norm: 0.8725501543227734, iteration: 68087
loss: 1.0167168378829956,grad_norm: 0.8903239789769364, iteration: 68088
loss: 1.0203313827514648,grad_norm: 0.8976439158701773, iteration: 68089
loss: 0.9778878092765808,grad_norm: 0.8693563592749627, iteration: 68090
loss: 1.0089219808578491,grad_norm: 0.8737265422283007, iteration: 68091
loss: 1.0065436363220215,grad_norm: 0.9647618723977957, iteration: 68092
loss: 1.0260618925094604,grad_norm: 0.9999992641043592, iteration: 68093
loss: 1.0154794454574585,grad_norm: 0.8684127677625046, iteration: 68094
loss: 1.0292032957077026,grad_norm: 0.915765703491856, iteration: 68095
loss: 1.0458842515945435,grad_norm: 0.9232992682322624, iteration: 68096
loss: 0.9991969466209412,grad_norm: 0.9590422592487353, iteration: 68097
loss: 0.9868529438972473,grad_norm: 0.9075922335427599, iteration: 68098
loss: 1.0226995944976807,grad_norm: 0.9999998116641128, iteration: 68099
loss: 1.0110194683074951,grad_norm: 0.9999991701001277, iteration: 68100
loss: 1.1168279647827148,grad_norm: 0.9999995579842994, iteration: 68101
loss: 0.9817510843276978,grad_norm: 0.9390699254269496, iteration: 68102
loss: 1.0209280252456665,grad_norm: 0.9999991655468736, iteration: 68103
loss: 0.9470424652099609,grad_norm: 0.9129900463640416, iteration: 68104
loss: 0.99163818359375,grad_norm: 0.9295283633916475, iteration: 68105
loss: 1.0487890243530273,grad_norm: 0.9999993315395937, iteration: 68106
loss: 1.0238018035888672,grad_norm: 0.8654434495695611, iteration: 68107
loss: 0.996116042137146,grad_norm: 0.847462593591432, iteration: 68108
loss: 0.9947754144668579,grad_norm: 0.9999989938948148, iteration: 68109
loss: 1.0024651288986206,grad_norm: 0.9822398132498155, iteration: 68110
loss: 1.0002013444900513,grad_norm: 0.9999990288180253, iteration: 68111
loss: 0.9489692449569702,grad_norm: 0.9212812715331284, iteration: 68112
loss: 1.0428147315979004,grad_norm: 0.9999998109745414, iteration: 68113
loss: 0.9862763285636902,grad_norm: 0.9181962308743389, iteration: 68114
loss: 0.9650230407714844,grad_norm: 0.8464988578549292, iteration: 68115
loss: 0.9952993392944336,grad_norm: 0.9280663702916884, iteration: 68116
loss: 1.0419892072677612,grad_norm: 0.9999994024905097, iteration: 68117
loss: 1.00509774684906,grad_norm: 0.8787467703747024, iteration: 68118
loss: 1.0021370649337769,grad_norm: 0.9999992762914169, iteration: 68119
loss: 0.9823042750358582,grad_norm: 0.9999992214677802, iteration: 68120
loss: 1.0625540018081665,grad_norm: 0.9999992940841026, iteration: 68121
loss: 1.0053776502609253,grad_norm: 0.9999991810057298, iteration: 68122
loss: 1.023552656173706,grad_norm: 0.9418724096557575, iteration: 68123
loss: 0.973275363445282,grad_norm: 0.9469313946580185, iteration: 68124
loss: 1.0153093338012695,grad_norm: 0.9130081160949397, iteration: 68125
loss: 1.0129293203353882,grad_norm: 0.9472637080448295, iteration: 68126
loss: 1.053554654121399,grad_norm: 0.9999994822467185, iteration: 68127
loss: 1.0100395679473877,grad_norm: 0.8698845255611332, iteration: 68128
loss: 0.9823915958404541,grad_norm: 0.999999094468125, iteration: 68129
loss: 1.032403588294983,grad_norm: 0.9699910497038164, iteration: 68130
loss: 1.0010323524475098,grad_norm: 0.9110668476188843, iteration: 68131
loss: 0.9589266777038574,grad_norm: 0.8903104111401831, iteration: 68132
loss: 1.0135700702667236,grad_norm: 0.9905872589935134, iteration: 68133
loss: 1.0005651712417603,grad_norm: 0.8755907033343564, iteration: 68134
loss: 1.0073195695877075,grad_norm: 0.999999696742007, iteration: 68135
loss: 1.007630705833435,grad_norm: 0.9999995078084363, iteration: 68136
loss: 1.0016323328018188,grad_norm: 0.9353300779619325, iteration: 68137
loss: 1.0245803594589233,grad_norm: 0.8699147549794051, iteration: 68138
loss: 1.0965837240219116,grad_norm: 0.9999991997727653, iteration: 68139
loss: 1.0241010189056396,grad_norm: 0.9518545744308767, iteration: 68140
loss: 1.0019744634628296,grad_norm: 0.9599555059014613, iteration: 68141
loss: 1.0657612085342407,grad_norm: 0.8917496566622143, iteration: 68142
loss: 1.0016995668411255,grad_norm: 0.9999991395687792, iteration: 68143
loss: 1.186657428741455,grad_norm: 0.9999991502147915, iteration: 68144
loss: 1.0419225692749023,grad_norm: 0.9999994412146406, iteration: 68145
loss: 1.0487313270568848,grad_norm: 0.8309776762563753, iteration: 68146
loss: 1.1514086723327637,grad_norm: 0.9999990511018476, iteration: 68147
loss: 0.9610913991928101,grad_norm: 0.8162950265180545, iteration: 68148
loss: 1.0423871278762817,grad_norm: 0.9999992659213, iteration: 68149
loss: 1.0629359483718872,grad_norm: 0.9999992622548751, iteration: 68150
loss: 1.001103401184082,grad_norm: 0.9411162218175588, iteration: 68151
loss: 0.9705277681350708,grad_norm: 0.9999992675135773, iteration: 68152
loss: 1.02064847946167,grad_norm: 0.9999993418632773, iteration: 68153
loss: 1.0900506973266602,grad_norm: 0.9999990563691796, iteration: 68154
loss: 1.0116682052612305,grad_norm: 0.9999990622437867, iteration: 68155
loss: 1.114513874053955,grad_norm: 0.9999997323895742, iteration: 68156
loss: 1.2603659629821777,grad_norm: 1.0000000061462053, iteration: 68157
loss: 1.0215398073196411,grad_norm: 0.999999269572258, iteration: 68158
loss: 1.132510781288147,grad_norm: 0.9999998592149423, iteration: 68159
loss: 1.0676987171173096,grad_norm: 0.9999990835114329, iteration: 68160
loss: 1.0856424570083618,grad_norm: 0.999999515069688, iteration: 68161
loss: 1.0288569927215576,grad_norm: 0.9999990162756155, iteration: 68162
loss: 1.0576426982879639,grad_norm: 0.999999099455775, iteration: 68163
loss: 0.9873428344726562,grad_norm: 0.9999992059132558, iteration: 68164
loss: 1.1287047863006592,grad_norm: 0.9999996899534787, iteration: 68165
loss: 1.0362277030944824,grad_norm: 0.9933035542304285, iteration: 68166
loss: 1.2502014636993408,grad_norm: 0.9999994808348938, iteration: 68167
loss: 1.007250189781189,grad_norm: 0.8767921248437331, iteration: 68168
loss: 1.0836591720581055,grad_norm: 0.9999999307155057, iteration: 68169
loss: 0.9894968867301941,grad_norm: 0.9394098624393804, iteration: 68170
loss: 1.0385310649871826,grad_norm: 0.9999990689397764, iteration: 68171
loss: 1.0612738132476807,grad_norm: 0.9999998997990922, iteration: 68172
loss: 1.1876628398895264,grad_norm: 0.9999998380880395, iteration: 68173
loss: 1.0624805688858032,grad_norm: 0.9999990989403242, iteration: 68174
loss: 1.1111990213394165,grad_norm: 0.9999993770904412, iteration: 68175
loss: 1.1475133895874023,grad_norm: 0.9999995261506667, iteration: 68176
loss: 1.0386229753494263,grad_norm: 0.9999992496884225, iteration: 68177
loss: 1.0639668703079224,grad_norm: 0.9999991546298228, iteration: 68178
loss: 1.2697113752365112,grad_norm: 0.9999996111375384, iteration: 68179
loss: 1.3126671314239502,grad_norm: 0.9999995002073315, iteration: 68180
loss: 1.1554343700408936,grad_norm: 0.9999995703491475, iteration: 68181
loss: 1.2102810144424438,grad_norm: 0.9999991066326965, iteration: 68182
loss: 1.0264043807983398,grad_norm: 0.9999994281963309, iteration: 68183
loss: 1.280017375946045,grad_norm: 0.999999274717623, iteration: 68184
loss: 1.2017476558685303,grad_norm: 0.9999992357078827, iteration: 68185
loss: 1.214493751525879,grad_norm: 0.9999992007399034, iteration: 68186
loss: 1.1548633575439453,grad_norm: 0.9999991260898488, iteration: 68187
loss: 1.0883666276931763,grad_norm: 0.9999991009654006, iteration: 68188
loss: 1.118381142616272,grad_norm: 0.9999994033886546, iteration: 68189
loss: 1.0990201234817505,grad_norm: 0.9999992878262609, iteration: 68190
loss: 1.194198727607727,grad_norm: 0.9999992793614241, iteration: 68191
loss: 0.992927074432373,grad_norm: 0.9999991504195052, iteration: 68192
loss: 1.3155639171600342,grad_norm: 0.999999329199371, iteration: 68193
loss: 1.1294114589691162,grad_norm: 0.9999993632564886, iteration: 68194
loss: 1.0886250734329224,grad_norm: 0.9999992729923854, iteration: 68195
loss: 0.9693312048912048,grad_norm: 0.9999990352330886, iteration: 68196
loss: 1.1569106578826904,grad_norm: 0.9999994132068054, iteration: 68197
loss: 1.1214115619659424,grad_norm: 0.9999991584832418, iteration: 68198
loss: 1.0124551057815552,grad_norm: 0.974611928876551, iteration: 68199
loss: 0.9972677230834961,grad_norm: 0.7831943523106987, iteration: 68200
loss: 1.1012405157089233,grad_norm: 0.9999998318054157, iteration: 68201
loss: 1.0311391353607178,grad_norm: 0.9999994606633738, iteration: 68202
loss: 1.0108495950698853,grad_norm: 0.8192335810727167, iteration: 68203
loss: 1.023095965385437,grad_norm: 0.833725879138307, iteration: 68204
loss: 1.0029077529907227,grad_norm: 0.8268294909085897, iteration: 68205
loss: 1.060776710510254,grad_norm: 0.9999996119831164, iteration: 68206
loss: 0.9910171031951904,grad_norm: 0.9999990727674145, iteration: 68207
loss: 1.0002408027648926,grad_norm: 0.9999992921610393, iteration: 68208
loss: 1.1446326971054077,grad_norm: 0.9999995377297158, iteration: 68209
loss: 0.9704449772834778,grad_norm: 0.9999991281637839, iteration: 68210
loss: 1.0473562479019165,grad_norm: 0.999999919673918, iteration: 68211
loss: 1.080478310585022,grad_norm: 0.9999994465385145, iteration: 68212
loss: 0.9878453612327576,grad_norm: 0.93821664957022, iteration: 68213
loss: 1.0736236572265625,grad_norm: 0.9999992498352133, iteration: 68214
loss: 0.9981449842453003,grad_norm: 0.8512024875799232, iteration: 68215
loss: 0.9984941482543945,grad_norm: 0.9999993438788896, iteration: 68216
loss: 0.9939479827880859,grad_norm: 0.9999990058825181, iteration: 68217
loss: 1.0422818660736084,grad_norm: 0.9999990886317787, iteration: 68218
loss: 1.043179988861084,grad_norm: 0.9999997996412568, iteration: 68219
loss: 0.9895652532577515,grad_norm: 0.9999991114419631, iteration: 68220
loss: 1.0450429916381836,grad_norm: 0.9999991494048438, iteration: 68221
loss: 1.0855454206466675,grad_norm: 0.9999992667386534, iteration: 68222
loss: 0.9979069828987122,grad_norm: 0.9999991591636074, iteration: 68223
loss: 1.0348254442214966,grad_norm: 0.9974423651646822, iteration: 68224
loss: 1.0052679777145386,grad_norm: 0.9999991200621782, iteration: 68225
loss: 0.9908115863800049,grad_norm: 0.9999991717569537, iteration: 68226
loss: 1.1012253761291504,grad_norm: 0.999999639171874, iteration: 68227
loss: 1.027989387512207,grad_norm: 0.9825576575796753, iteration: 68228
loss: 1.0601155757904053,grad_norm: 0.8280239776873627, iteration: 68229
loss: 0.9994240403175354,grad_norm: 0.9999994284004071, iteration: 68230
loss: 1.0248299837112427,grad_norm: 0.9735225268197703, iteration: 68231
loss: 1.0118935108184814,grad_norm: 0.8022279191308721, iteration: 68232
loss: 1.0497219562530518,grad_norm: 0.9999992388755323, iteration: 68233
loss: 0.9889666438102722,grad_norm: 0.9414955217048924, iteration: 68234
loss: 1.063891887664795,grad_norm: 0.7921679242898261, iteration: 68235
loss: 1.0322107076644897,grad_norm: 0.9999999136811658, iteration: 68236
loss: 0.9887182116508484,grad_norm: 0.9881487898349779, iteration: 68237
loss: 0.9847122430801392,grad_norm: 0.9388263258622535, iteration: 68238
loss: 0.9903082847595215,grad_norm: 0.8988149200635331, iteration: 68239
loss: 1.016787052154541,grad_norm: 0.9999992023835689, iteration: 68240
loss: 1.0218275785446167,grad_norm: 0.9999990084041551, iteration: 68241
loss: 1.032956600189209,grad_norm: 0.9999989792253083, iteration: 68242
loss: 0.9850612282752991,grad_norm: 0.9524713968136195, iteration: 68243
loss: 0.9912881851196289,grad_norm: 0.9391385827759494, iteration: 68244
loss: 0.9803889989852905,grad_norm: 0.8573402943699326, iteration: 68245
loss: 1.0023640394210815,grad_norm: 0.9176717339194622, iteration: 68246
loss: 1.019905686378479,grad_norm: 0.7647626171030988, iteration: 68247
loss: 1.0230932235717773,grad_norm: 0.9999991558996959, iteration: 68248
loss: 1.066478967666626,grad_norm: 0.9999997007080663, iteration: 68249
loss: 1.0329773426055908,grad_norm: 0.9586839633260436, iteration: 68250
loss: 0.9774353504180908,grad_norm: 0.960935946315315, iteration: 68251
loss: 0.9771507978439331,grad_norm: 0.9356337569533857, iteration: 68252
loss: 0.9699561595916748,grad_norm: 0.9448430594192743, iteration: 68253
loss: 0.9492876529693604,grad_norm: 0.8756299301826714, iteration: 68254
loss: 1.083377718925476,grad_norm: 0.8094166044672437, iteration: 68255
loss: 0.9994089007377625,grad_norm: 0.9593899286536751, iteration: 68256
loss: 1.0199779272079468,grad_norm: 0.9999991892719031, iteration: 68257
loss: 0.9935871362686157,grad_norm: 0.7635894227532083, iteration: 68258
loss: 1.0855467319488525,grad_norm: 0.9999998062783992, iteration: 68259
loss: 0.996135950088501,grad_norm: 0.9999992089352693, iteration: 68260
loss: 0.9735551476478577,grad_norm: 0.8860683482554587, iteration: 68261
loss: 1.0269569158554077,grad_norm: 0.8365041255743928, iteration: 68262
loss: 1.0104596614837646,grad_norm: 0.9999991823803014, iteration: 68263
loss: 1.003177523612976,grad_norm: 0.999999115759924, iteration: 68264
loss: 0.9725720286369324,grad_norm: 0.9999991063604582, iteration: 68265
loss: 1.0022025108337402,grad_norm: 0.8258540879524849, iteration: 68266
loss: 0.9594746232032776,grad_norm: 0.8415486150531187, iteration: 68267
loss: 1.0522457361221313,grad_norm: 0.9999998308635348, iteration: 68268
loss: 1.0391600131988525,grad_norm: 0.9999991156683625, iteration: 68269
loss: 0.9948998689651489,grad_norm: 0.9999991569875124, iteration: 68270
loss: 0.9607320427894592,grad_norm: 0.9999990478000383, iteration: 68271
loss: 1.009279489517212,grad_norm: 0.8315010904877913, iteration: 68272
loss: 1.023044466972351,grad_norm: 0.9999992339588233, iteration: 68273
loss: 1.0404558181762695,grad_norm: 0.9999999123903418, iteration: 68274
loss: 1.016414761543274,grad_norm: 0.999999025935011, iteration: 68275
loss: 1.0634310245513916,grad_norm: 0.9999991677328481, iteration: 68276
loss: 1.0169825553894043,grad_norm: 0.9976089459713811, iteration: 68277
loss: 1.0377987623214722,grad_norm: 0.8052779816755107, iteration: 68278
loss: 0.9683516621589661,grad_norm: 0.8430094169263646, iteration: 68279
loss: 0.9841064810752869,grad_norm: 0.8491631807654727, iteration: 68280
loss: 0.9726606607437134,grad_norm: 0.999999125720971, iteration: 68281
loss: 1.054042935371399,grad_norm: 0.9999992234715454, iteration: 68282
loss: 0.9645795226097107,grad_norm: 0.9999991134317238, iteration: 68283
loss: 1.048451542854309,grad_norm: 0.9999999006633293, iteration: 68284
loss: 1.0290027856826782,grad_norm: 0.8411071631043576, iteration: 68285
loss: 1.1692838668823242,grad_norm: 0.9999998141404559, iteration: 68286
loss: 0.99301677942276,grad_norm: 0.999999182304353, iteration: 68287
loss: 1.1133273839950562,grad_norm: 0.9999994620607897, iteration: 68288
loss: 0.9746934175491333,grad_norm: 0.8187712348952405, iteration: 68289
loss: 1.055935025215149,grad_norm: 0.974094256278166, iteration: 68290
loss: 0.9709409475326538,grad_norm: 0.8161441674807111, iteration: 68291
loss: 1.0109589099884033,grad_norm: 0.9444379321570087, iteration: 68292
loss: 1.0003345012664795,grad_norm: 0.9943120794848741, iteration: 68293
loss: 0.9873329997062683,grad_norm: 0.9160252524149584, iteration: 68294
loss: 1.0086910724639893,grad_norm: 0.9688462006055216, iteration: 68295
loss: 0.955707311630249,grad_norm: 0.9999991331002998, iteration: 68296
loss: 1.024322748184204,grad_norm: 0.9807233824130045, iteration: 68297
loss: 1.0811421871185303,grad_norm: 0.9999990920305271, iteration: 68298
loss: 1.0993291139602661,grad_norm: 0.9999995099660949, iteration: 68299
loss: 1.075331449508667,grad_norm: 0.9999991697717804, iteration: 68300
loss: 0.9654873013496399,grad_norm: 0.8577738771673002, iteration: 68301
loss: 1.0360674858093262,grad_norm: 0.9488185063701587, iteration: 68302
loss: 1.0062530040740967,grad_norm: 0.7974532335475606, iteration: 68303
loss: 0.987892210483551,grad_norm: 0.7359084332423803, iteration: 68304
loss: 1.0532418489456177,grad_norm: 0.9999991089243527, iteration: 68305
loss: 0.9840632677078247,grad_norm: 0.9999996570246168, iteration: 68306
loss: 0.9951017498970032,grad_norm: 0.9999992620042104, iteration: 68307
loss: 0.9940534234046936,grad_norm: 0.9314343488473359, iteration: 68308
loss: 1.049307942390442,grad_norm: 0.9999996688056464, iteration: 68309
loss: 1.1375449895858765,grad_norm: 0.9999998144870554, iteration: 68310
loss: 1.0566548109054565,grad_norm: 0.9999994594520007, iteration: 68311
loss: 0.934638261795044,grad_norm: 0.9999992146196053, iteration: 68312
loss: 1.0164538621902466,grad_norm: 0.9999996133904945, iteration: 68313
loss: 1.025408387184143,grad_norm: 0.9999990808217633, iteration: 68314
loss: 1.0890010595321655,grad_norm: 0.9999990247068773, iteration: 68315
loss: 1.003962516784668,grad_norm: 0.9999992463465025, iteration: 68316
loss: 1.0663763284683228,grad_norm: 0.9999994162710765, iteration: 68317
loss: 1.0630842447280884,grad_norm: 0.9276920744035285, iteration: 68318
loss: 1.013203740119934,grad_norm: 0.9137226449097567, iteration: 68319
loss: 0.9964556097984314,grad_norm: 0.908352349523208, iteration: 68320
loss: 0.9555533528327942,grad_norm: 0.9999991997950761, iteration: 68321
loss: 0.9963024854660034,grad_norm: 0.999998980800558, iteration: 68322
loss: 1.0020338296890259,grad_norm: 0.9999992100135381, iteration: 68323
loss: 1.0081743001937866,grad_norm: 0.8513532281460644, iteration: 68324
loss: 0.9545513987541199,grad_norm: 0.9999989863007925, iteration: 68325
loss: 1.0305474996566772,grad_norm: 0.9999990721242983, iteration: 68326
loss: 1.0146846771240234,grad_norm: 0.7974145342072309, iteration: 68327
loss: 1.0286310911178589,grad_norm: 0.9218164398995992, iteration: 68328
loss: 1.0250035524368286,grad_norm: 0.9999990880750663, iteration: 68329
loss: 0.9714234471321106,grad_norm: 0.9999991554979644, iteration: 68330
loss: 1.0104433298110962,grad_norm: 0.9999990178755611, iteration: 68331
loss: 0.9924668073654175,grad_norm: 0.9477525479031692, iteration: 68332
loss: 1.0341620445251465,grad_norm: 0.99999929298043, iteration: 68333
loss: 0.9545939564704895,grad_norm: 0.8087867291762049, iteration: 68334
loss: 0.9892616271972656,grad_norm: 0.8213664289913267, iteration: 68335
loss: 0.9946309328079224,grad_norm: 0.8937158087104276, iteration: 68336
loss: 0.9987426400184631,grad_norm: 0.9656631135729116, iteration: 68337
loss: 1.0324559211730957,grad_norm: 0.9999997447322417, iteration: 68338
loss: 1.0655606985092163,grad_norm: 0.9999992232239127, iteration: 68339
loss: 0.9740141034126282,grad_norm: 0.9369873110625013, iteration: 68340
loss: 0.9841387271881104,grad_norm: 0.9999990861038537, iteration: 68341
loss: 1.0939667224884033,grad_norm: 0.9999992153644981, iteration: 68342
loss: 1.0178567171096802,grad_norm: 0.9658666641381619, iteration: 68343
loss: 1.045045256614685,grad_norm: 0.9999992885533678, iteration: 68344
loss: 0.9799044728279114,grad_norm: 0.9999995177753292, iteration: 68345
loss: 1.0540920495986938,grad_norm: 0.9999996518535245, iteration: 68346
loss: 0.9649491310119629,grad_norm: 0.9212340303534904, iteration: 68347
loss: 1.0569862127304077,grad_norm: 0.9999990937676492, iteration: 68348
loss: 0.996512770652771,grad_norm: 0.9999995184464272, iteration: 68349
loss: 1.0316925048828125,grad_norm: 0.8595773255157038, iteration: 68350
loss: 0.9809073805809021,grad_norm: 0.9999995463793331, iteration: 68351
loss: 1.0359753370285034,grad_norm: 0.8355882976098692, iteration: 68352
loss: 1.004784107208252,grad_norm: 0.9144741641436694, iteration: 68353
loss: 1.062696933746338,grad_norm: 0.9999997205217661, iteration: 68354
loss: 1.015417456626892,grad_norm: 0.9057018363307792, iteration: 68355
loss: 1.0033636093139648,grad_norm: 0.9999989393259099, iteration: 68356
loss: 0.9835786819458008,grad_norm: 0.9999992180768775, iteration: 68357
loss: 1.0951074361801147,grad_norm: 0.9999996235116633, iteration: 68358
loss: 1.072545051574707,grad_norm: 0.9999991047617535, iteration: 68359
loss: 1.0368331670761108,grad_norm: 0.8657487009430398, iteration: 68360
loss: 0.9869364500045776,grad_norm: 0.9779683079237474, iteration: 68361
loss: 1.0977123975753784,grad_norm: 0.9999999593713538, iteration: 68362
loss: 0.9759795665740967,grad_norm: 0.9999996886020412, iteration: 68363
loss: 0.9982341527938843,grad_norm: 0.999998994806847, iteration: 68364
loss: 0.9770196080207825,grad_norm: 0.8015418048881977, iteration: 68365
loss: 1.085861325263977,grad_norm: 0.9859100990964422, iteration: 68366
loss: 0.9926276206970215,grad_norm: 0.9999989304396381, iteration: 68367
loss: 1.0252273082733154,grad_norm: 0.9543818080346349, iteration: 68368
loss: 0.9876950979232788,grad_norm: 0.9457466139073106, iteration: 68369
loss: 1.0506658554077148,grad_norm: 0.9136167778773673, iteration: 68370
loss: 0.9811678528785706,grad_norm: 0.9999989627358346, iteration: 68371
loss: 1.04801607131958,grad_norm: 0.9999991468808399, iteration: 68372
loss: 1.0318198204040527,grad_norm: 0.8815245581899344, iteration: 68373
loss: 0.9602182507514954,grad_norm: 0.8870441106635378, iteration: 68374
loss: 1.041171908378601,grad_norm: 0.999999166884182, iteration: 68375
loss: 0.9345755577087402,grad_norm: 0.9352146378935965, iteration: 68376
loss: 1.0643993616104126,grad_norm: 0.9999996807233125, iteration: 68377
loss: 1.0239505767822266,grad_norm: 0.9999993289362888, iteration: 68378
loss: 0.9988913536071777,grad_norm: 0.9999991160672409, iteration: 68379
loss: 1.023669719696045,grad_norm: 0.9999992633008465, iteration: 68380
loss: 1.0654029846191406,grad_norm: 0.9999998761242271, iteration: 68381
loss: 0.9931317567825317,grad_norm: 0.8469011190773629, iteration: 68382
loss: 1.0635321140289307,grad_norm: 0.9999992975871996, iteration: 68383
loss: 0.9960294365882874,grad_norm: 0.9999990130674493, iteration: 68384
loss: 1.0227584838867188,grad_norm: 0.9999990712090853, iteration: 68385
loss: 0.9921262860298157,grad_norm: 0.9999992111658031, iteration: 68386
loss: 1.0109928846359253,grad_norm: 0.9999995166977509, iteration: 68387
loss: 1.012522578239441,grad_norm: 0.9999992197142231, iteration: 68388
loss: 1.0415353775024414,grad_norm: 0.999999469883131, iteration: 68389
loss: 1.1180024147033691,grad_norm: 0.9999992246683772, iteration: 68390
loss: 1.036744236946106,grad_norm: 0.9999992067080679, iteration: 68391
loss: 1.0276075601577759,grad_norm: 0.8890161470594938, iteration: 68392
loss: 1.026639699935913,grad_norm: 0.999999051382445, iteration: 68393
loss: 0.9925175309181213,grad_norm: 0.9999990716783449, iteration: 68394
loss: 1.083541989326477,grad_norm: 0.9999998189953098, iteration: 68395
loss: 1.008671522140503,grad_norm: 0.9999995316274205, iteration: 68396
loss: 0.9919065833091736,grad_norm: 0.9999991190438124, iteration: 68397
loss: 1.0508637428283691,grad_norm: 0.9999992755073612, iteration: 68398
loss: 0.968275249004364,grad_norm: 0.9999990156072451, iteration: 68399
loss: 1.0413644313812256,grad_norm: 0.8364828979532697, iteration: 68400
loss: 1.0010265111923218,grad_norm: 0.8125954617735839, iteration: 68401
loss: 1.0211800336837769,grad_norm: 0.8986740529925031, iteration: 68402
loss: 1.00563645362854,grad_norm: 0.8983557836790492, iteration: 68403
loss: 0.988860011100769,grad_norm: 0.9902116010629651, iteration: 68404
loss: 1.0705602169036865,grad_norm: 0.999999368051644, iteration: 68405
loss: 1.00223708152771,grad_norm: 0.8399133647638249, iteration: 68406
loss: 1.0603009462356567,grad_norm: 0.9999991433287851, iteration: 68407
loss: 1.0042427778244019,grad_norm: 0.9999991308762868, iteration: 68408
loss: 1.044398307800293,grad_norm: 0.9999995733521969, iteration: 68409
loss: 0.9988453984260559,grad_norm: 0.9999991261590324, iteration: 68410
loss: 1.000344157218933,grad_norm: 0.8655634044881823, iteration: 68411
loss: 1.0805240869522095,grad_norm: 0.999999255715426, iteration: 68412
loss: 1.0107020139694214,grad_norm: 0.9999990688760684, iteration: 68413
loss: 1.0316405296325684,grad_norm: 0.9999992312235153, iteration: 68414
loss: 1.0523254871368408,grad_norm: 0.9035780412495585, iteration: 68415
loss: 0.9854652881622314,grad_norm: 0.9132886661822369, iteration: 68416
loss: 1.003010630607605,grad_norm: 0.8366547805968938, iteration: 68417
loss: 0.9839169383049011,grad_norm: 0.8635792009512862, iteration: 68418
loss: 1.0258197784423828,grad_norm: 0.9999991013713796, iteration: 68419
loss: 1.0893051624298096,grad_norm: 0.9815489462825965, iteration: 68420
loss: 0.9931371808052063,grad_norm: 0.8986532251231844, iteration: 68421
loss: 0.9930149912834167,grad_norm: 0.999999659382142, iteration: 68422
loss: 0.9533565044403076,grad_norm: 0.9021964663723179, iteration: 68423
loss: 1.0061863660812378,grad_norm: 0.999999117942469, iteration: 68424
loss: 1.0073515176773071,grad_norm: 0.999999106425562, iteration: 68425
loss: 1.0130411386489868,grad_norm: 0.9999991694456579, iteration: 68426
loss: 1.0487850904464722,grad_norm: 0.8680043103464534, iteration: 68427
loss: 1.0129196643829346,grad_norm: 0.9999990109101339, iteration: 68428
loss: 1.0637701749801636,grad_norm: 0.8837235738827403, iteration: 68429
loss: 0.9856334924697876,grad_norm: 0.8310194125892867, iteration: 68430
loss: 1.0200653076171875,grad_norm: 0.9494165351312568, iteration: 68431
loss: 1.0576497316360474,grad_norm: 0.9999990053913319, iteration: 68432
loss: 1.012069582939148,grad_norm: 0.999998970464096, iteration: 68433
loss: 1.072252631187439,grad_norm: 0.8599282190281555, iteration: 68434
loss: 1.0057297945022583,grad_norm: 0.9999991531475584, iteration: 68435
loss: 1.036628246307373,grad_norm: 0.9999991515677158, iteration: 68436
loss: 1.0790413618087769,grad_norm: 0.9999997330430414, iteration: 68437
loss: 0.9726929664611816,grad_norm: 0.9914908436994406, iteration: 68438
loss: 0.9979805946350098,grad_norm: 0.9999990887701379, iteration: 68439
loss: 0.974255383014679,grad_norm: 0.8265662984887182, iteration: 68440
loss: 1.0339542627334595,grad_norm: 0.9519350176755208, iteration: 68441
loss: 1.0537763833999634,grad_norm: 0.9999995945133939, iteration: 68442
loss: 1.0207834243774414,grad_norm: 0.9835656695596751, iteration: 68443
loss: 1.0075433254241943,grad_norm: 0.9999990937879464, iteration: 68444
loss: 1.0195735692977905,grad_norm: 0.8892051355576228, iteration: 68445
loss: 0.9850852489471436,grad_norm: 0.9794095209463062, iteration: 68446
loss: 1.0019056797027588,grad_norm: 0.9298718442148614, iteration: 68447
loss: 0.9804714918136597,grad_norm: 0.942332449580156, iteration: 68448
loss: 1.039341926574707,grad_norm: 0.9021652150736391, iteration: 68449
loss: 0.9849034547805786,grad_norm: 0.9346864572808092, iteration: 68450
loss: 1.0109901428222656,grad_norm: 0.999999113758727, iteration: 68451
loss: 1.0253024101257324,grad_norm: 0.9919584825952815, iteration: 68452
loss: 0.9563276171684265,grad_norm: 0.999999123229092, iteration: 68453
loss: 0.9889306426048279,grad_norm: 0.8517510321968975, iteration: 68454
loss: 1.0046536922454834,grad_norm: 0.8391951794617198, iteration: 68455
loss: 1.017687439918518,grad_norm: 0.9999991619666726, iteration: 68456
loss: 1.0448567867279053,grad_norm: 0.9999990552030987, iteration: 68457
loss: 0.9858090877532959,grad_norm: 0.9467680579176428, iteration: 68458
loss: 0.9743050336837769,grad_norm: 0.837763874545116, iteration: 68459
loss: 0.9840047359466553,grad_norm: 0.9999990335807901, iteration: 68460
loss: 1.0185292959213257,grad_norm: 0.7885623064477291, iteration: 68461
loss: 1.0132334232330322,grad_norm: 0.9216047969231822, iteration: 68462
loss: 1.0055134296417236,grad_norm: 0.971785461003196, iteration: 68463
loss: 0.978717565536499,grad_norm: 0.9999993945183491, iteration: 68464
loss: 1.0180885791778564,grad_norm: 0.9766579372309482, iteration: 68465
loss: 0.9826865792274475,grad_norm: 0.892803415655538, iteration: 68466
loss: 1.0123858451843262,grad_norm: 0.9999991059344394, iteration: 68467
loss: 0.9997614622116089,grad_norm: 0.8874864134827027, iteration: 68468
loss: 1.142370581626892,grad_norm: 0.9999992264936732, iteration: 68469
loss: 1.0077141523361206,grad_norm: 0.9489112104439665, iteration: 68470
loss: 1.0021733045578003,grad_norm: 0.9020683993085999, iteration: 68471
loss: 1.0073779821395874,grad_norm: 0.7925039827449029, iteration: 68472
loss: 1.0117591619491577,grad_norm: 0.9999990997551754, iteration: 68473
loss: 1.011470913887024,grad_norm: 0.9262061308224417, iteration: 68474
loss: 1.0664691925048828,grad_norm: 0.9999992571976487, iteration: 68475
loss: 0.9574445486068726,grad_norm: 0.9082993440562401, iteration: 68476
loss: 0.9739187359809875,grad_norm: 0.9999991049749345, iteration: 68477
loss: 0.9988662600517273,grad_norm: 0.8294024679871878, iteration: 68478
loss: 1.0192346572875977,grad_norm: 0.8756279735946557, iteration: 68479
loss: 1.0245434045791626,grad_norm: 0.9999990498371576, iteration: 68480
loss: 1.0034732818603516,grad_norm: 0.9999990553306837, iteration: 68481
loss: 1.013593316078186,grad_norm: 0.9787152168172952, iteration: 68482
loss: 1.032343864440918,grad_norm: 0.8863620260180121, iteration: 68483
loss: 1.0167094469070435,grad_norm: 0.9999990326927133, iteration: 68484
loss: 0.991007924079895,grad_norm: 0.9999990589679718, iteration: 68485
loss: 1.0775507688522339,grad_norm: 0.9999992278938636, iteration: 68486
loss: 1.113984227180481,grad_norm: 0.9999994586332103, iteration: 68487
loss: 1.0123475790023804,grad_norm: 0.999999028384637, iteration: 68488
loss: 1.0724430084228516,grad_norm: 0.8736230700344693, iteration: 68489
loss: 1.0446754693984985,grad_norm: 0.9618038474313297, iteration: 68490
loss: 1.0692111253738403,grad_norm: 0.9999996042227697, iteration: 68491
loss: 1.0043246746063232,grad_norm: 0.9999995641932086, iteration: 68492
loss: 1.01578950881958,grad_norm: 0.999999112287499, iteration: 68493
loss: 1.0210380554199219,grad_norm: 0.8903728371117543, iteration: 68494
loss: 1.0211548805236816,grad_norm: 0.9519678717259148, iteration: 68495
loss: 0.9788511991500854,grad_norm: 0.9865509380220194, iteration: 68496
loss: 1.0283161401748657,grad_norm: 0.9531739411002157, iteration: 68497
loss: 1.0172386169433594,grad_norm: 0.999999695685621, iteration: 68498
loss: 0.9904589653015137,grad_norm: 0.8536785040687667, iteration: 68499
loss: 1.030502438545227,grad_norm: 0.9999992234947773, iteration: 68500
loss: 1.031113624572754,grad_norm: 0.7596051529366423, iteration: 68501
loss: 1.0140727758407593,grad_norm: 0.9999990876768232, iteration: 68502
loss: 1.041838526725769,grad_norm: 0.9999996840317799, iteration: 68503
loss: 1.0492116212844849,grad_norm: 0.931033905570214, iteration: 68504
loss: 0.9864800572395325,grad_norm: 0.9999990374711963, iteration: 68505
loss: 0.9529812335968018,grad_norm: 0.9999990574562754, iteration: 68506
loss: 1.0070866346359253,grad_norm: 0.9999989428506733, iteration: 68507
loss: 0.9874446988105774,grad_norm: 0.9793980852490062, iteration: 68508
loss: 1.0046824216842651,grad_norm: 0.999999438440031, iteration: 68509
loss: 0.9656906127929688,grad_norm: 0.9999991177554503, iteration: 68510
loss: 1.0467420816421509,grad_norm: 0.7062877212796156, iteration: 68511
loss: 1.018742322921753,grad_norm: 0.9999990881527122, iteration: 68512
loss: 0.9876090884208679,grad_norm: 0.946292540495139, iteration: 68513
loss: 1.0089101791381836,grad_norm: 0.9541078764179372, iteration: 68514
loss: 0.9769155979156494,grad_norm: 0.9999990135502103, iteration: 68515
loss: 1.0043455362319946,grad_norm: 0.9405120585742593, iteration: 68516
loss: 0.9845521450042725,grad_norm: 0.8507355676227247, iteration: 68517
loss: 1.0431207418441772,grad_norm: 0.9999990756875947, iteration: 68518
loss: 0.9921203255653381,grad_norm: 0.8908432593483784, iteration: 68519
loss: 1.002549171447754,grad_norm: 0.999999053404454, iteration: 68520
loss: 1.0061042308807373,grad_norm: 0.9501644541246734, iteration: 68521
loss: 1.0019928216934204,grad_norm: 0.999999101669976, iteration: 68522
loss: 1.017370343208313,grad_norm: 0.9382483148590958, iteration: 68523
loss: 0.9986088871955872,grad_norm: 0.9442326249437951, iteration: 68524
loss: 0.9775773882865906,grad_norm: 0.9990830858382109, iteration: 68525
loss: 0.9866956472396851,grad_norm: 0.9999483714692613, iteration: 68526
loss: 0.9833076596260071,grad_norm: 0.9999992909981593, iteration: 68527
loss: 1.0236055850982666,grad_norm: 0.952170769648918, iteration: 68528
loss: 0.9958397150039673,grad_norm: 0.9999990994308551, iteration: 68529
loss: 1.009850025177002,grad_norm: 0.9064188097008553, iteration: 68530
loss: 0.9922893643379211,grad_norm: 0.9999991709901884, iteration: 68531
loss: 1.001018762588501,grad_norm: 0.8745743751489319, iteration: 68532
loss: 0.9938041567802429,grad_norm: 0.9467399068469664, iteration: 68533
loss: 0.9819083213806152,grad_norm: 0.9999992318183755, iteration: 68534
loss: 1.060660481452942,grad_norm: 0.9999997708728354, iteration: 68535
loss: 0.9509195685386658,grad_norm: 0.752705770102439, iteration: 68536
loss: 1.1214954853057861,grad_norm: 0.9999991995692135, iteration: 68537
loss: 0.9819297194480896,grad_norm: 0.8727169715605136, iteration: 68538
loss: 0.9917178750038147,grad_norm: 0.9343693535850068, iteration: 68539
loss: 1.00852632522583,grad_norm: 0.9210110948151162, iteration: 68540
loss: 1.0289095640182495,grad_norm: 0.9999995597368948, iteration: 68541
loss: 1.0720921754837036,grad_norm: 0.9999990070964303, iteration: 68542
loss: 1.1425141096115112,grad_norm: 0.9999992058826935, iteration: 68543
loss: 0.9460089802742004,grad_norm: 0.9361141380700704, iteration: 68544
loss: 1.0188579559326172,grad_norm: 0.9999992636040784, iteration: 68545
loss: 0.9587501287460327,grad_norm: 0.9999990276483612, iteration: 68546
loss: 0.9855175018310547,grad_norm: 0.7847944346068759, iteration: 68547
loss: 1.0856328010559082,grad_norm: 0.9999995153044322, iteration: 68548
loss: 1.014464259147644,grad_norm: 0.999999180846307, iteration: 68549
loss: 0.9949418902397156,grad_norm: 0.835547210211817, iteration: 68550
loss: 1.0242981910705566,grad_norm: 0.9391715862826029, iteration: 68551
loss: 1.174365520477295,grad_norm: 0.9999992846322262, iteration: 68552
loss: 1.0491291284561157,grad_norm: 0.9999990741253909, iteration: 68553
loss: 0.996866762638092,grad_norm: 0.8133914758646211, iteration: 68554
loss: 1.0065252780914307,grad_norm: 0.8983373135197471, iteration: 68555
loss: 1.0663169622421265,grad_norm: 0.9999990178595634, iteration: 68556
loss: 1.022429347038269,grad_norm: 0.9574014559368202, iteration: 68557
loss: 1.090957522392273,grad_norm: 0.9999993732958122, iteration: 68558
loss: 1.0160003900527954,grad_norm: 0.9999993825857263, iteration: 68559
loss: 1.0360199213027954,grad_norm: 0.9490858757585671, iteration: 68560
loss: 1.0773154497146606,grad_norm: 0.9491383513335818, iteration: 68561
loss: 1.0383827686309814,grad_norm: 0.9999991582709545, iteration: 68562
loss: 1.0364954471588135,grad_norm: 0.9999992381752918, iteration: 68563
loss: 1.0080485343933105,grad_norm: 0.9999996176547445, iteration: 68564
loss: 0.991283118724823,grad_norm: 0.9999991001196226, iteration: 68565
loss: 0.9990085363388062,grad_norm: 0.8335353532923021, iteration: 68566
loss: 1.043841004371643,grad_norm: 0.9999993351957641, iteration: 68567
loss: 0.977258026599884,grad_norm: 0.8984999203386774, iteration: 68568
loss: 1.0347750186920166,grad_norm: 0.8720923338037376, iteration: 68569
loss: 1.0207031965255737,grad_norm: 0.9999995129660482, iteration: 68570
loss: 1.0159629583358765,grad_norm: 0.8915732904876842, iteration: 68571
loss: 1.0720683336257935,grad_norm: 0.9112943162657879, iteration: 68572
loss: 0.9752426743507385,grad_norm: 0.9999989601625819, iteration: 68573
loss: 0.9943532347679138,grad_norm: 0.8208122312267706, iteration: 68574
loss: 1.0322139263153076,grad_norm: 0.9999996223687452, iteration: 68575
loss: 1.0209628343582153,grad_norm: 0.9805460049922238, iteration: 68576
loss: 0.9981890320777893,grad_norm: 0.9548269755139837, iteration: 68577
loss: 1.0110502243041992,grad_norm: 0.8914364951604419, iteration: 68578
loss: 0.953245222568512,grad_norm: 0.9999991963700237, iteration: 68579
loss: 1.0257601737976074,grad_norm: 0.9999994569592588, iteration: 68580
loss: 1.0323092937469482,grad_norm: 0.8598544577599732, iteration: 68581
loss: 0.9437954425811768,grad_norm: 0.9745129120225243, iteration: 68582
loss: 0.9820515513420105,grad_norm: 0.9999992290857433, iteration: 68583
loss: 1.0694247484207153,grad_norm: 0.9999994053117975, iteration: 68584
loss: 1.0166970491409302,grad_norm: 0.9999993737414191, iteration: 68585
loss: 0.9983722567558289,grad_norm: 0.9794937509563523, iteration: 68586
loss: 0.949237048625946,grad_norm: 0.999999607280823, iteration: 68587
loss: 1.029089331626892,grad_norm: 0.8949277739019768, iteration: 68588
loss: 1.0034055709838867,grad_norm: 0.9242021498157509, iteration: 68589
loss: 0.9979845881462097,grad_norm: 0.9999991525648763, iteration: 68590
loss: 0.9669662714004517,grad_norm: 0.8932657000442431, iteration: 68591
loss: 1.03016996383667,grad_norm: 0.988536712357573, iteration: 68592
loss: 0.9767448306083679,grad_norm: 0.8369245279580265, iteration: 68593
loss: 0.9649392366409302,grad_norm: 0.8914773417683827, iteration: 68594
loss: 0.9913188815116882,grad_norm: 0.9999989620821328, iteration: 68595
loss: 1.01639723777771,grad_norm: 0.9692556485322351, iteration: 68596
loss: 0.9850909113883972,grad_norm: 0.9999994800512588, iteration: 68597
loss: 1.013401985168457,grad_norm: 0.9999990919022648, iteration: 68598
loss: 1.009764552116394,grad_norm: 0.9170148134616015, iteration: 68599
loss: 1.0969241857528687,grad_norm: 0.9999991632764238, iteration: 68600
loss: 0.997383713722229,grad_norm: 0.9285680099574755, iteration: 68601
loss: 0.9983636140823364,grad_norm: 0.9316644011990838, iteration: 68602
loss: 0.9739396572113037,grad_norm: 0.8332209565595311, iteration: 68603
loss: 1.006554365158081,grad_norm: 0.9999991636537376, iteration: 68604
loss: 0.9780948162078857,grad_norm: 0.999999047390622, iteration: 68605
loss: 0.9907203316688538,grad_norm: 0.9999990978884751, iteration: 68606
loss: 1.0688015222549438,grad_norm: 0.999999235467928, iteration: 68607
loss: 1.0294759273529053,grad_norm: 0.9119488918658155, iteration: 68608
loss: 1.0304838418960571,grad_norm: 0.9999990389182792, iteration: 68609
loss: 1.059401512145996,grad_norm: 0.9999993741055984, iteration: 68610
loss: 1.0395983457565308,grad_norm: 0.9999991788382443, iteration: 68611
loss: 0.9855376482009888,grad_norm: 0.8741560981736949, iteration: 68612
loss: 1.0046244859695435,grad_norm: 0.8775555828133952, iteration: 68613
loss: 0.9933986067771912,grad_norm: 0.9640663621173449, iteration: 68614
loss: 1.004960060119629,grad_norm: 0.9999992108168199, iteration: 68615
loss: 1.0094776153564453,grad_norm: 0.9999991681700026, iteration: 68616
loss: 1.0134185552597046,grad_norm: 0.9875626518055769, iteration: 68617
loss: 0.9985934495925903,grad_norm: 0.9831037372421658, iteration: 68618
loss: 1.011737585067749,grad_norm: 0.9647199085688519, iteration: 68619
loss: 1.0033767223358154,grad_norm: 0.9999992268410988, iteration: 68620
loss: 0.9846997261047363,grad_norm: 0.9999993706142504, iteration: 68621
loss: 1.1154981851577759,grad_norm: 0.9999991568488953, iteration: 68622
loss: 0.9680857062339783,grad_norm: 0.8462036747038686, iteration: 68623
loss: 1.0053249597549438,grad_norm: 0.9028819446845714, iteration: 68624
loss: 1.0234172344207764,grad_norm: 0.9802049582925872, iteration: 68625
loss: 0.9764225482940674,grad_norm: 0.8970513569187445, iteration: 68626
loss: 1.003288745880127,grad_norm: 0.9999991892614285, iteration: 68627
loss: 1.0113918781280518,grad_norm: 0.9999991125971656, iteration: 68628
loss: 1.0339410305023193,grad_norm: 0.9999998683884463, iteration: 68629
loss: 1.0107203722000122,grad_norm: 0.8209569229090146, iteration: 68630
loss: 0.9814680814743042,grad_norm: 0.999999112603585, iteration: 68631
loss: 1.0051178932189941,grad_norm: 0.999999422681696, iteration: 68632
loss: 0.9794865250587463,grad_norm: 0.9210161333866725, iteration: 68633
loss: 0.979684591293335,grad_norm: 0.9580628091318603, iteration: 68634
loss: 1.0080679655075073,grad_norm: 0.9032375589329041, iteration: 68635
loss: 1.0020309686660767,grad_norm: 0.9613112003311394, iteration: 68636
loss: 1.0440714359283447,grad_norm: 0.9146282768143964, iteration: 68637
loss: 0.9740620851516724,grad_norm: 0.8875558938919879, iteration: 68638
loss: 0.9852178692817688,grad_norm: 0.9917566753155501, iteration: 68639
loss: 1.0088722705841064,grad_norm: 0.9999992973352442, iteration: 68640
loss: 0.9773269891738892,grad_norm: 0.9466838214562479, iteration: 68641
loss: 1.0154445171356201,grad_norm: 0.9999990119567445, iteration: 68642
loss: 0.9809695482254028,grad_norm: 0.9999991256531967, iteration: 68643
loss: 0.9658358693122864,grad_norm: 0.9999991762035682, iteration: 68644
loss: 1.0300596952438354,grad_norm: 0.8307300455674546, iteration: 68645
loss: 0.9696503281593323,grad_norm: 0.8498794638390024, iteration: 68646
loss: 0.9718639850616455,grad_norm: 0.9999992546800394, iteration: 68647
loss: 0.9599074721336365,grad_norm: 0.9423298991717962, iteration: 68648
loss: 0.9947710037231445,grad_norm: 0.9999990222392567, iteration: 68649
loss: 1.0159581899642944,grad_norm: 0.8890260103922597, iteration: 68650
loss: 1.04067862033844,grad_norm: 0.9999997224082762, iteration: 68651
loss: 1.0442125797271729,grad_norm: 0.9999996875356557, iteration: 68652
loss: 1.0397419929504395,grad_norm: 0.9999993147423856, iteration: 68653
loss: 1.0740281343460083,grad_norm: 0.9999993807797952, iteration: 68654
loss: 0.9745643138885498,grad_norm: 0.9999992299547734, iteration: 68655
loss: 1.0235719680786133,grad_norm: 0.9764398931737829, iteration: 68656
loss: 0.9805039167404175,grad_norm: 0.9999990928652489, iteration: 68657
loss: 1.006042718887329,grad_norm: 0.9797245500939807, iteration: 68658
loss: 1.0471947193145752,grad_norm: 0.9999991207041125, iteration: 68659
loss: 0.9532918334007263,grad_norm: 0.9999990979291495, iteration: 68660
loss: 0.9793480038642883,grad_norm: 0.8878220749186405, iteration: 68661
loss: 0.9960540533065796,grad_norm: 0.9296675699188857, iteration: 68662
loss: 1.0160514116287231,grad_norm: 0.9119409689954737, iteration: 68663
loss: 0.999476432800293,grad_norm: 0.999999134275761, iteration: 68664
loss: 1.003578543663025,grad_norm: 0.8481047311576764, iteration: 68665
loss: 1.0081182718276978,grad_norm: 0.9999993836713839, iteration: 68666
loss: 0.9776702523231506,grad_norm: 0.981500741331755, iteration: 68667
loss: 1.0487807989120483,grad_norm: 0.933374618637868, iteration: 68668
loss: 0.9894180297851562,grad_norm: 0.9999989766960388, iteration: 68669
loss: 1.0304213762283325,grad_norm: 0.9999992873336283, iteration: 68670
loss: 1.0113214254379272,grad_norm: 0.9167301787887503, iteration: 68671
loss: 0.9890551567077637,grad_norm: 0.7698610069208427, iteration: 68672
loss: 1.0446892976760864,grad_norm: 0.9999998418459081, iteration: 68673
loss: 1.0400326251983643,grad_norm: 0.999999116104207, iteration: 68674
loss: 0.9938330054283142,grad_norm: 0.9814943552943722, iteration: 68675
loss: 1.0398385524749756,grad_norm: 0.999999376750323, iteration: 68676
loss: 1.0306800603866577,grad_norm: 0.9999990213335285, iteration: 68677
loss: 0.9466505646705627,grad_norm: 0.8753468672613024, iteration: 68678
loss: 1.00039541721344,grad_norm: 0.9999990494182154, iteration: 68679
loss: 1.0751876831054688,grad_norm: 0.9999998817166025, iteration: 68680
loss: 1.0111597776412964,grad_norm: 0.9999991102287222, iteration: 68681
loss: 1.0266406536102295,grad_norm: 0.9735025679751934, iteration: 68682
loss: 0.984359085559845,grad_norm: 0.960935761423974, iteration: 68683
loss: 1.0295777320861816,grad_norm: 0.8118456095602731, iteration: 68684
loss: 1.0350830554962158,grad_norm: 0.9999990067305923, iteration: 68685
loss: 1.015313982963562,grad_norm: 0.9999994146070876, iteration: 68686
loss: 1.0142074823379517,grad_norm: 0.9144005051878338, iteration: 68687
loss: 1.0786408185958862,grad_norm: 0.9999991284788489, iteration: 68688
loss: 1.0113946199417114,grad_norm: 0.9069478806204024, iteration: 68689
loss: 1.0690538883209229,grad_norm: 0.986251238006274, iteration: 68690
loss: 0.9928480982780457,grad_norm: 0.9999989898812367, iteration: 68691
loss: 1.0300215482711792,grad_norm: 1.0000000194295178, iteration: 68692
loss: 0.9948602318763733,grad_norm: 0.9206798522422731, iteration: 68693
loss: 1.0233291387557983,grad_norm: 0.8587839897171499, iteration: 68694
loss: 1.0173529386520386,grad_norm: 0.9999991429280323, iteration: 68695
loss: 1.0306897163391113,grad_norm: 0.9999993612567085, iteration: 68696
loss: 1.0056201219558716,grad_norm: 0.9477710141319023, iteration: 68697
loss: 0.9955393075942993,grad_norm: 0.8393518627166566, iteration: 68698
loss: 1.0057531595230103,grad_norm: 0.9248127311235118, iteration: 68699
loss: 0.9757328033447266,grad_norm: 0.9620469378135812, iteration: 68700
loss: 0.9618203639984131,grad_norm: 0.9061807552747239, iteration: 68701
loss: 1.022796630859375,grad_norm: 0.9999996983193148, iteration: 68702
loss: 0.9853148460388184,grad_norm: 0.8941518705874427, iteration: 68703
loss: 0.9837504029273987,grad_norm: 0.920201289247346, iteration: 68704
loss: 1.0122448205947876,grad_norm: 0.773671907671579, iteration: 68705
loss: 1.0127010345458984,grad_norm: 0.9612018178354822, iteration: 68706
loss: 1.0255900621414185,grad_norm: 0.9999991895719885, iteration: 68707
loss: 0.9676735997200012,grad_norm: 0.9999992959089887, iteration: 68708
loss: 1.02367103099823,grad_norm: 0.9999994394172231, iteration: 68709
loss: 1.0086199045181274,grad_norm: 0.9999991631568983, iteration: 68710
loss: 0.9967584013938904,grad_norm: 0.910183922352017, iteration: 68711
loss: 1.0242353677749634,grad_norm: 0.9999991464923008, iteration: 68712
loss: 1.0866081714630127,grad_norm: 0.9999990685987371, iteration: 68713
loss: 1.024972677230835,grad_norm: 0.9999989485620308, iteration: 68714
loss: 1.031030297279358,grad_norm: 0.9999991274553975, iteration: 68715
loss: 1.0410833358764648,grad_norm: 0.9999996390699672, iteration: 68716
loss: 0.9541842341423035,grad_norm: 0.981447502216488, iteration: 68717
loss: 0.9823104739189148,grad_norm: 0.9449067973910684, iteration: 68718
loss: 1.0355348587036133,grad_norm: 0.9999990035590023, iteration: 68719
loss: 1.0519503355026245,grad_norm: 0.9999995313165941, iteration: 68720
loss: 1.0088928937911987,grad_norm: 0.8891651298172856, iteration: 68721
loss: 1.0078188180923462,grad_norm: 0.9999991206532068, iteration: 68722
loss: 0.977353572845459,grad_norm: 0.9999994839926474, iteration: 68723
loss: 0.9775727391242981,grad_norm: 0.9999991090874732, iteration: 68724
loss: 1.023003101348877,grad_norm: 0.802368683619019, iteration: 68725
loss: 0.9905155897140503,grad_norm: 0.9239780637071873, iteration: 68726
loss: 0.9757363796234131,grad_norm: 0.7986238021906947, iteration: 68727
loss: 1.038127064704895,grad_norm: 0.9999994128545634, iteration: 68728
loss: 1.0145204067230225,grad_norm: 0.801144598780182, iteration: 68729
loss: 1.0139045715332031,grad_norm: 0.8915155715534817, iteration: 68730
loss: 0.9674453735351562,grad_norm: 0.9999990775267839, iteration: 68731
loss: 1.0159977674484253,grad_norm: 0.999999072691494, iteration: 68732
loss: 1.0267632007598877,grad_norm: 0.9999997693287039, iteration: 68733
loss: 1.0275468826293945,grad_norm: 0.9999989732303503, iteration: 68734
loss: 1.0068732500076294,grad_norm: 0.8592820635839441, iteration: 68735
loss: 1.02687668800354,grad_norm: 0.9999990953250956, iteration: 68736
loss: 1.0151540040969849,grad_norm: 0.8770667473399063, iteration: 68737
loss: 1.0137494802474976,grad_norm: 0.9999992330969185, iteration: 68738
loss: 0.9790685772895813,grad_norm: 0.9261977536146044, iteration: 68739
loss: 1.0475472211837769,grad_norm: 0.9999990163784954, iteration: 68740
loss: 0.9695286750793457,grad_norm: 0.918944129664368, iteration: 68741
loss: 1.0240918397903442,grad_norm: 0.8034785198630467, iteration: 68742
loss: 1.027641773223877,grad_norm: 0.999999662329698, iteration: 68743
loss: 1.0011380910873413,grad_norm: 0.8800384399002921, iteration: 68744
loss: 0.9912364482879639,grad_norm: 0.9999995383026858, iteration: 68745
loss: 1.0499732494354248,grad_norm: 1.000000067533944, iteration: 68746
loss: 0.9851068258285522,grad_norm: 0.9591989169153664, iteration: 68747
loss: 0.9891495108604431,grad_norm: 0.8563728126703761, iteration: 68748
loss: 0.9972193241119385,grad_norm: 0.8955605702969405, iteration: 68749
loss: 0.9656956195831299,grad_norm: 0.9787162670261922, iteration: 68750
loss: 1.0139389038085938,grad_norm: 0.9999990835047554, iteration: 68751
loss: 1.0477675199508667,grad_norm: 0.9999992775069962, iteration: 68752
loss: 1.1583895683288574,grad_norm: 0.9999999215182603, iteration: 68753
loss: 1.0280908346176147,grad_norm: 0.999999111146997, iteration: 68754
loss: 1.0085728168487549,grad_norm: 0.9999995133063848, iteration: 68755
loss: 1.023691177368164,grad_norm: 0.9179578315414084, iteration: 68756
loss: 1.0372129678726196,grad_norm: 0.9665846821671643, iteration: 68757
loss: 0.9864025115966797,grad_norm: 0.9999990575918345, iteration: 68758
loss: 0.961113691329956,grad_norm: 0.9999996076292451, iteration: 68759
loss: 1.0051428079605103,grad_norm: 0.9271206857820293, iteration: 68760
loss: 1.0339484214782715,grad_norm: 0.8606585798290007, iteration: 68761
loss: 1.2418736219406128,grad_norm: 0.9999991807468348, iteration: 68762
loss: 1.0858758687973022,grad_norm: 0.9354814101917444, iteration: 68763
loss: 0.9957881569862366,grad_norm: 0.8772375366800159, iteration: 68764
loss: 1.0219427347183228,grad_norm: 0.9999994308838817, iteration: 68765
loss: 0.988031804561615,grad_norm: 0.9999989914423847, iteration: 68766
loss: 0.9884039759635925,grad_norm: 0.9058524764914844, iteration: 68767
loss: 1.148401141166687,grad_norm: 0.9999992333421444, iteration: 68768
loss: 1.053830623626709,grad_norm: 0.9999992160582977, iteration: 68769
loss: 1.0667357444763184,grad_norm: 0.9999990452864604, iteration: 68770
loss: 1.1854581832885742,grad_norm: 0.9999994481892875, iteration: 68771
loss: 1.0069161653518677,grad_norm: 0.9385837077190841, iteration: 68772
loss: 1.1222496032714844,grad_norm: 0.9373794926153659, iteration: 68773
loss: 1.0274205207824707,grad_norm: 0.9999991889100487, iteration: 68774
loss: 1.0063083171844482,grad_norm: 0.9084952441590127, iteration: 68775
loss: 1.0071773529052734,grad_norm: 0.8472196913942187, iteration: 68776
loss: 1.11091148853302,grad_norm: 0.9999993805726126, iteration: 68777
loss: 1.0184377431869507,grad_norm: 0.9139765474018171, iteration: 68778
loss: 1.1193395853042603,grad_norm: 0.9999990447640411, iteration: 68779
loss: 1.0481958389282227,grad_norm: 0.9999991319255731, iteration: 68780
loss: 1.1288838386535645,grad_norm: 0.9999992034558111, iteration: 68781
loss: 1.072776198387146,grad_norm: 0.85888486401295, iteration: 68782
loss: 1.0741281509399414,grad_norm: 0.999999390644692, iteration: 68783
loss: 0.9981285333633423,grad_norm: 0.9077415988533827, iteration: 68784
loss: 1.0482929944992065,grad_norm: 0.9999996351142666, iteration: 68785
loss: 1.1101263761520386,grad_norm: 0.9346792451605327, iteration: 68786
loss: 1.0458413362503052,grad_norm: 0.9999990730880578, iteration: 68787
loss: 1.0568666458129883,grad_norm: 0.9999998417053884, iteration: 68788
loss: 1.126118540763855,grad_norm: 0.9999998813310937, iteration: 68789
loss: 1.0944492816925049,grad_norm: 0.9999994473240305, iteration: 68790
loss: 1.1355592012405396,grad_norm: 0.999999344198144, iteration: 68791
loss: 0.9687644839286804,grad_norm: 0.9999990732454708, iteration: 68792
loss: 1.0797652006149292,grad_norm: 0.9999997377364819, iteration: 68793
loss: 1.0393328666687012,grad_norm: 0.9521828426117558, iteration: 68794
loss: 0.9892287254333496,grad_norm: 0.909947667506607, iteration: 68795
loss: 1.0108001232147217,grad_norm: 0.9999992747527295, iteration: 68796
loss: 1.0113428831100464,grad_norm: 0.9715817805251479, iteration: 68797
loss: 1.044294834136963,grad_norm: 0.999999164806226, iteration: 68798
loss: 0.9798189997673035,grad_norm: 0.7662008146561723, iteration: 68799
loss: 1.0463204383850098,grad_norm: 0.999999351590191, iteration: 68800
loss: 0.9904284477233887,grad_norm: 0.9679424359878701, iteration: 68801
loss: 1.0552486181259155,grad_norm: 0.8397277686836271, iteration: 68802
loss: 1.0758734941482544,grad_norm: 0.9999990352053315, iteration: 68803
loss: 1.074278712272644,grad_norm: 0.9999992962576831, iteration: 68804
loss: 1.0328539609909058,grad_norm: 0.9999992863758093, iteration: 68805
loss: 1.300737738609314,grad_norm: 0.9999996371291728, iteration: 68806
loss: 0.9973330497741699,grad_norm: 0.9420068088459186, iteration: 68807
loss: 1.0166068077087402,grad_norm: 0.9999991946903481, iteration: 68808
loss: 0.9588481187820435,grad_norm: 0.9999993015910389, iteration: 68809
loss: 0.9832028746604919,grad_norm: 0.9999991056197536, iteration: 68810
loss: 1.0429580211639404,grad_norm: 0.9650558556526015, iteration: 68811
loss: 1.0544941425323486,grad_norm: 0.9999993745591778, iteration: 68812
loss: 1.0059537887573242,grad_norm: 0.9323770445623706, iteration: 68813
loss: 1.0061537027359009,grad_norm: 0.9620297536295406, iteration: 68814
loss: 0.9933691620826721,grad_norm: 0.999999728029624, iteration: 68815
loss: 0.9910280108451843,grad_norm: 0.8693810308403503, iteration: 68816
loss: 1.0249321460723877,grad_norm: 0.9999991237490357, iteration: 68817
loss: 1.0112683773040771,grad_norm: 0.842321334119969, iteration: 68818
loss: 0.9907295107841492,grad_norm: 0.9873953297403066, iteration: 68819
loss: 1.0144203901290894,grad_norm: 0.9999991209461625, iteration: 68820
loss: 0.9998970031738281,grad_norm: 0.9006509919379547, iteration: 68821
loss: 1.0020724534988403,grad_norm: 0.9402300412460802, iteration: 68822
loss: 1.0084744691848755,grad_norm: 0.868657558574629, iteration: 68823
loss: 0.9794157147407532,grad_norm: 0.937857292890431, iteration: 68824
loss: 1.019543170928955,grad_norm: 0.9999990209842582, iteration: 68825
loss: 0.9893730878829956,grad_norm: 0.7295112743007443, iteration: 68826
loss: 0.9956612586975098,grad_norm: 0.9999990876733285, iteration: 68827
loss: 1.0593761205673218,grad_norm: 0.8967505393957611, iteration: 68828
loss: 0.9955175518989563,grad_norm: 0.8883472784393293, iteration: 68829
loss: 0.9988313913345337,grad_norm: 0.9869250748492874, iteration: 68830
loss: 0.989176332950592,grad_norm: 0.937048872011946, iteration: 68831
loss: 0.9989989399909973,grad_norm: 0.8943635427319049, iteration: 68832
loss: 1.0508642196655273,grad_norm: 0.9724749961472955, iteration: 68833
loss: 0.987934410572052,grad_norm: 0.7915401278817705, iteration: 68834
loss: 0.9835777282714844,grad_norm: 0.9999991618548649, iteration: 68835
loss: 1.0105153322219849,grad_norm: 0.9670666497234746, iteration: 68836
loss: 1.0219297409057617,grad_norm: 0.9999991036064924, iteration: 68837
loss: 1.0055885314941406,grad_norm: 0.9138825177002052, iteration: 68838
loss: 1.0617554187774658,grad_norm: 0.9999991649656569, iteration: 68839
loss: 1.0990447998046875,grad_norm: 0.9999991711269075, iteration: 68840
loss: 0.9838680624961853,grad_norm: 0.9999991300372042, iteration: 68841
loss: 1.0033985376358032,grad_norm: 0.989008543875591, iteration: 68842
loss: 0.9874821305274963,grad_norm: 0.769364571639191, iteration: 68843
loss: 1.0075982809066772,grad_norm: 0.9999990616354172, iteration: 68844
loss: 0.9978620409965515,grad_norm: 0.8402821659118206, iteration: 68845
loss: 1.0443742275238037,grad_norm: 0.9999992812160844, iteration: 68846
loss: 1.0103298425674438,grad_norm: 0.9999989737979621, iteration: 68847
loss: 0.9832813739776611,grad_norm: 0.9521427324953113, iteration: 68848
loss: 0.9870310425758362,grad_norm: 0.8758754041297808, iteration: 68849
loss: 1.056816816329956,grad_norm: 0.9999991406592617, iteration: 68850
loss: 1.008500337600708,grad_norm: 0.895383335337458, iteration: 68851
loss: 0.9963376522064209,grad_norm: 0.8988558282291471, iteration: 68852
loss: 0.9951176047325134,grad_norm: 0.9999992405536802, iteration: 68853
loss: 0.9933525323867798,grad_norm: 0.8985110022137892, iteration: 68854
loss: 1.042458176612854,grad_norm: 0.9999999877964382, iteration: 68855
loss: 1.0395121574401855,grad_norm: 0.9999992901271728, iteration: 68856
loss: 0.996953010559082,grad_norm: 0.8550517448634466, iteration: 68857
loss: 1.0330179929733276,grad_norm: 0.8875714242048068, iteration: 68858
loss: 0.9923597574234009,grad_norm: 0.9999990127211197, iteration: 68859
loss: 1.011746883392334,grad_norm: 0.9999989939567981, iteration: 68860
loss: 1.0229049921035767,grad_norm: 0.9999992478934416, iteration: 68861
loss: 0.9718892574310303,grad_norm: 0.9999990517231628, iteration: 68862
loss: 0.9996265172958374,grad_norm: 0.9999989504472865, iteration: 68863
loss: 1.0037320852279663,grad_norm: 0.9999990005763204, iteration: 68864
loss: 0.9851150512695312,grad_norm: 0.8194165008146093, iteration: 68865
loss: 1.0007853507995605,grad_norm: 0.9999991936585716, iteration: 68866
loss: 1.019645094871521,grad_norm: 0.8042058351572394, iteration: 68867
loss: 1.0165139436721802,grad_norm: 0.9999989616274582, iteration: 68868
loss: 0.9847915172576904,grad_norm: 0.9999995276836473, iteration: 68869
loss: 1.0147022008895874,grad_norm: 0.9919541118169223, iteration: 68870
loss: 1.0724711418151855,grad_norm: 0.9782097211122631, iteration: 68871
loss: 0.9832532405853271,grad_norm: 0.9369961239664751, iteration: 68872
loss: 0.989124059677124,grad_norm: 0.9671698708370189, iteration: 68873
loss: 0.9992000460624695,grad_norm: 0.9999998663414819, iteration: 68874
loss: 0.9767891764640808,grad_norm: 0.870835591686961, iteration: 68875
loss: 1.0110760927200317,grad_norm: 0.9671275329409948, iteration: 68876
loss: 1.0010942220687866,grad_norm: 0.8828469057290171, iteration: 68877
loss: 0.9923487305641174,grad_norm: 0.9999990043210013, iteration: 68878
loss: 1.0244498252868652,grad_norm: 0.9999990046983293, iteration: 68879
loss: 1.0020440816879272,grad_norm: 0.8200839537167923, iteration: 68880
loss: 1.0034620761871338,grad_norm: 0.9952680066672723, iteration: 68881
loss: 0.9986609220504761,grad_norm: 0.8342744406513907, iteration: 68882
loss: 0.9991992115974426,grad_norm: 0.9231675428338236, iteration: 68883
loss: 0.9636320471763611,grad_norm: 0.7998034468956357, iteration: 68884
loss: 0.9863920211791992,grad_norm: 0.8294742457532723, iteration: 68885
loss: 1.056769609451294,grad_norm: 0.9999996355861474, iteration: 68886
loss: 0.9861545562744141,grad_norm: 0.9942235725953927, iteration: 68887
loss: 1.0055161714553833,grad_norm: 0.9999996832939494, iteration: 68888
loss: 0.9966364502906799,grad_norm: 0.9188434877669855, iteration: 68889
loss: 1.007387399673462,grad_norm: 0.7679147300971215, iteration: 68890
loss: 1.0251195430755615,grad_norm: 0.8610314586013251, iteration: 68891
loss: 1.0031846761703491,grad_norm: 0.9999994226677246, iteration: 68892
loss: 1.0086315870285034,grad_norm: 0.855457107555058, iteration: 68893
loss: 0.9825209975242615,grad_norm: 0.8601244632743231, iteration: 68894
loss: 1.0530145168304443,grad_norm: 0.9999991092818437, iteration: 68895
loss: 1.019705057144165,grad_norm: 0.8902811116283306, iteration: 68896
loss: 0.9760474562644958,grad_norm: 0.9999990187523982, iteration: 68897
loss: 1.0486042499542236,grad_norm: 0.8885710580511667, iteration: 68898
loss: 1.0303003787994385,grad_norm: 0.9999993105816307, iteration: 68899
loss: 0.9622856378555298,grad_norm: 0.9999988402446257, iteration: 68900
loss: 1.0327292680740356,grad_norm: 0.9999991607967343, iteration: 68901
loss: 0.999320924282074,grad_norm: 0.999999068874842, iteration: 68902
loss: 1.0209214687347412,grad_norm: 0.9128158442687914, iteration: 68903
loss: 0.9678170084953308,grad_norm: 0.9875702008445728, iteration: 68904
loss: 1.0078835487365723,grad_norm: 0.9999990043148014, iteration: 68905
loss: 1.0122199058532715,grad_norm: 0.8074586945371098, iteration: 68906
loss: 0.9709678292274475,grad_norm: 0.9999991900149388, iteration: 68907
loss: 0.996874213218689,grad_norm: 0.9999989622379655, iteration: 68908
loss: 0.9763705730438232,grad_norm: 0.7711931344817744, iteration: 68909
loss: 1.0206001996994019,grad_norm: 0.9654629730885256, iteration: 68910
loss: 1.0143707990646362,grad_norm: 0.9999995686143047, iteration: 68911
loss: 1.015464186668396,grad_norm: 0.9999994647054362, iteration: 68912
loss: 0.9810497164726257,grad_norm: 0.8684313236249137, iteration: 68913
loss: 0.9980713129043579,grad_norm: 0.9206870863680203, iteration: 68914
loss: 1.0294843912124634,grad_norm: 0.9999997266857584, iteration: 68915
loss: 1.0016586780548096,grad_norm: 0.9573139976762073, iteration: 68916
loss: 0.9936935901641846,grad_norm: 0.9999991122604103, iteration: 68917
loss: 0.9899634122848511,grad_norm: 0.974581910964251, iteration: 68918
loss: 0.962639570236206,grad_norm: 0.9467678962585914, iteration: 68919
loss: 1.0483100414276123,grad_norm: 0.9999997297509893, iteration: 68920
loss: 1.0544534921646118,grad_norm: 0.9999991185056344, iteration: 68921
loss: 1.0228185653686523,grad_norm: 0.779218539652401, iteration: 68922
loss: 0.9878314137458801,grad_norm: 0.9999990285801186, iteration: 68923
loss: 1.02848219871521,grad_norm: 0.9453402345621734, iteration: 68924
loss: 0.9950534105300903,grad_norm: 0.9999991660533174, iteration: 68925
loss: 0.9778828024864197,grad_norm: 0.9458118319541243, iteration: 68926
loss: 1.0834941864013672,grad_norm: 1.0000000329097074, iteration: 68927
loss: 0.9998677372932434,grad_norm: 0.999200819500262, iteration: 68928
loss: 1.0144034624099731,grad_norm: 0.8687658894239632, iteration: 68929
loss: 0.9860355854034424,grad_norm: 0.9365826120271846, iteration: 68930
loss: 1.0176984071731567,grad_norm: 0.9999990271427888, iteration: 68931
loss: 1.0867129564285278,grad_norm: 0.9999991068348021, iteration: 68932
loss: 1.0340752601623535,grad_norm: 0.9999994174136241, iteration: 68933
loss: 1.0111159086227417,grad_norm: 0.9086306172312615, iteration: 68934
loss: 1.0341418981552124,grad_norm: 0.9201931108841342, iteration: 68935
loss: 1.0063616037368774,grad_norm: 0.9336347809005061, iteration: 68936
loss: 0.9781767129898071,grad_norm: 0.9999992034377128, iteration: 68937
loss: 1.0713844299316406,grad_norm: 0.999999020866488, iteration: 68938
loss: 1.0226919651031494,grad_norm: 0.9999991289463861, iteration: 68939
loss: 0.9795370697975159,grad_norm: 0.863656584240321, iteration: 68940
loss: 1.32395601272583,grad_norm: 0.9999990782583157, iteration: 68941
loss: 1.0577770471572876,grad_norm: 0.9999999155560201, iteration: 68942
loss: 0.9720897674560547,grad_norm: 0.8871239010295789, iteration: 68943
loss: 0.9885861873626709,grad_norm: 0.958050731900622, iteration: 68944
loss: 1.0080167055130005,grad_norm: 0.927513334324805, iteration: 68945
loss: 0.9974755644798279,grad_norm: 0.8214512660023349, iteration: 68946
loss: 1.0249158143997192,grad_norm: 0.8419302707206964, iteration: 68947
loss: 1.0336012840270996,grad_norm: 0.9999995335578991, iteration: 68948
loss: 1.039557695388794,grad_norm: 0.9999996076585428, iteration: 68949
loss: 1.0972509384155273,grad_norm: 0.9999994401888797, iteration: 68950
loss: 1.0921491384506226,grad_norm: 0.9999991758556366, iteration: 68951
loss: 1.0159671306610107,grad_norm: 0.9774432929303036, iteration: 68952
loss: 1.0671510696411133,grad_norm: 0.9999993313608712, iteration: 68953
loss: 0.9823152422904968,grad_norm: 0.9925990368389022, iteration: 68954
loss: 1.0012892484664917,grad_norm: 0.9577023381496499, iteration: 68955
loss: 1.0214923620224,grad_norm: 0.9879171848679174, iteration: 68956
loss: 0.9773818254470825,grad_norm: 0.8741655972465068, iteration: 68957
loss: 1.0229219198226929,grad_norm: 0.939889183594061, iteration: 68958
loss: 1.0416091680526733,grad_norm: 0.9999991919515487, iteration: 68959
loss: 1.007468581199646,grad_norm: 0.9999999133178964, iteration: 68960
loss: 1.0320615768432617,grad_norm: 0.999999804876119, iteration: 68961
loss: 0.9989808201789856,grad_norm: 0.8867569330798116, iteration: 68962
loss: 1.0083483457565308,grad_norm: 0.9999990961168661, iteration: 68963
loss: 0.9901885986328125,grad_norm: 0.827797807032803, iteration: 68964
loss: 1.0249077081680298,grad_norm: 0.9947786167674948, iteration: 68965
loss: 0.9914161562919617,grad_norm: 0.9262505639687928, iteration: 68966
loss: 1.090343713760376,grad_norm: 0.9999993459589857, iteration: 68967
loss: 1.0160799026489258,grad_norm: 0.7926535262865354, iteration: 68968
loss: 1.1228944063186646,grad_norm: 0.9999996508724285, iteration: 68969
loss: 1.0221093893051147,grad_norm: 0.899357858242669, iteration: 68970
loss: 0.9960300326347351,grad_norm: 0.9999997260470059, iteration: 68971
loss: 1.0247459411621094,grad_norm: 0.8324566421742361, iteration: 68972
loss: 0.9921029210090637,grad_norm: 0.9999990593413506, iteration: 68973
loss: 1.063991665840149,grad_norm: 0.9999991865702188, iteration: 68974
loss: 1.0257521867752075,grad_norm: 0.9999994667653818, iteration: 68975
loss: 1.0399587154388428,grad_norm: 0.8897827787265083, iteration: 68976
loss: 1.0461963415145874,grad_norm: 0.999999511744648, iteration: 68977
loss: 1.014216423034668,grad_norm: 0.9662812926444951, iteration: 68978
loss: 0.9998314380645752,grad_norm: 0.9999997916861931, iteration: 68979
loss: 0.9936211705207825,grad_norm: 0.965842970485464, iteration: 68980
loss: 1.0089106559753418,grad_norm: 0.8403720299633217, iteration: 68981
loss: 0.997186541557312,grad_norm: 0.915138872244226, iteration: 68982
loss: 0.968181848526001,grad_norm: 0.9999989770363024, iteration: 68983
loss: 0.9762327671051025,grad_norm: 0.7995109576033659, iteration: 68984
loss: 0.9838100671768188,grad_norm: 0.965611740133508, iteration: 68985
loss: 1.0156670808792114,grad_norm: 0.9999992757031796, iteration: 68986
loss: 1.0203330516815186,grad_norm: 0.9999989500891409, iteration: 68987
loss: 1.0069878101348877,grad_norm: 0.9999998014491296, iteration: 68988
loss: 1.0199445486068726,grad_norm: 0.9016337602525341, iteration: 68989
loss: 1.0132545232772827,grad_norm: 0.9519904145720827, iteration: 68990
loss: 1.0774037837982178,grad_norm: 0.9241834070935901, iteration: 68991
loss: 1.0684012174606323,grad_norm: 0.9999993397462162, iteration: 68992
loss: 1.0100423097610474,grad_norm: 0.999999124186411, iteration: 68993
loss: 1.0006868839263916,grad_norm: 0.9999991105272173, iteration: 68994
loss: 0.9918628931045532,grad_norm: 0.9565872372981846, iteration: 68995
loss: 1.0079231262207031,grad_norm: 0.7348156892891279, iteration: 68996
loss: 1.0079604387283325,grad_norm: 0.7622658784590324, iteration: 68997
loss: 1.0449657440185547,grad_norm: 0.9999998786369508, iteration: 68998
loss: 1.038375735282898,grad_norm: 0.9999992235398352, iteration: 68999
loss: 1.0396567583084106,grad_norm: 0.9999996185105002, iteration: 69000
loss: 1.039411187171936,grad_norm: 0.9999990002569462, iteration: 69001
loss: 1.0066912174224854,grad_norm: 0.9161516022500668, iteration: 69002
loss: 1.1600145101547241,grad_norm: 0.9999996865455075, iteration: 69003
loss: 1.0032382011413574,grad_norm: 0.754828799015648, iteration: 69004
loss: 1.0090328454971313,grad_norm: 0.9999991343003529, iteration: 69005
loss: 1.000532627105713,grad_norm: 0.971324416730074, iteration: 69006
loss: 0.9825637340545654,grad_norm: 0.8689032944790136, iteration: 69007
loss: 1.0149866342544556,grad_norm: 0.8988001700850465, iteration: 69008
loss: 1.0249418020248413,grad_norm: 0.9999991594005311, iteration: 69009
loss: 0.9876964092254639,grad_norm: 0.9999990651280565, iteration: 69010
loss: 0.991943895816803,grad_norm: 0.8172179008762724, iteration: 69011
loss: 1.0047019720077515,grad_norm: 0.7609637533843708, iteration: 69012
loss: 1.0468292236328125,grad_norm: 0.9999999294373609, iteration: 69013
loss: 0.9940879940986633,grad_norm: 0.9502381497761185, iteration: 69014
loss: 1.0515236854553223,grad_norm: 0.9588714126656567, iteration: 69015
loss: 1.0675476789474487,grad_norm: 0.9999991864231194, iteration: 69016
loss: 0.9724195599555969,grad_norm: 0.907482963010187, iteration: 69017
loss: 1.0292768478393555,grad_norm: 0.9999996805085861, iteration: 69018
loss: 1.0640124082565308,grad_norm: 0.9999990544315491, iteration: 69019
loss: 1.0807324647903442,grad_norm: 0.999999577022731, iteration: 69020
loss: 0.9645107984542847,grad_norm: 0.9999990483876042, iteration: 69021
loss: 0.9816002249717712,grad_norm: 0.851792116991648, iteration: 69022
loss: 1.082621693611145,grad_norm: 0.9999998908606039, iteration: 69023
loss: 1.0006804466247559,grad_norm: 0.9999992807739334, iteration: 69024
loss: 1.0098450183868408,grad_norm: 0.9297467691998075, iteration: 69025
loss: 1.0079342126846313,grad_norm: 0.9120927775921014, iteration: 69026
loss: 0.9764498472213745,grad_norm: 0.9999991561843413, iteration: 69027
loss: 0.9902134537696838,grad_norm: 0.9999998927440694, iteration: 69028
loss: 0.9768224954605103,grad_norm: 0.9999991427559143, iteration: 69029
loss: 1.0035086870193481,grad_norm: 0.996616353505733, iteration: 69030
loss: 0.9880709052085876,grad_norm: 0.8481265263503879, iteration: 69031
loss: 1.0050523281097412,grad_norm: 0.9999992851732797, iteration: 69032
loss: 1.0045266151428223,grad_norm: 0.9391194484296633, iteration: 69033
loss: 1.0281767845153809,grad_norm: 0.9638827131730443, iteration: 69034
loss: 0.956675112247467,grad_norm: 0.9117084694797143, iteration: 69035
loss: 0.998380184173584,grad_norm: 0.7623137709131979, iteration: 69036
loss: 1.0392260551452637,grad_norm: 0.9373664030066227, iteration: 69037
loss: 0.9930915236473083,grad_norm: 0.9157985884659372, iteration: 69038
loss: 1.0181363821029663,grad_norm: 0.9111550813562036, iteration: 69039
loss: 1.0241413116455078,grad_norm: 0.9999991715722907, iteration: 69040
loss: 1.3134479522705078,grad_norm: 0.9999996608483235, iteration: 69041
loss: 1.0115044116973877,grad_norm: 0.9999994679986631, iteration: 69042
loss: 1.0151513814926147,grad_norm: 0.9999995617386954, iteration: 69043
loss: 1.017985224723816,grad_norm: 0.9999991071882574, iteration: 69044
loss: 1.095855712890625,grad_norm: 0.9999994810085773, iteration: 69045
loss: 1.0214426517486572,grad_norm: 0.9999991591523517, iteration: 69046
loss: 1.1410174369812012,grad_norm: 0.9999997060555524, iteration: 69047
loss: 1.0074944496154785,grad_norm: 0.999999192101747, iteration: 69048
loss: 1.0848077535629272,grad_norm: 0.9416446292374554, iteration: 69049
loss: 1.067526936531067,grad_norm: 0.9999998502942362, iteration: 69050
loss: 1.0143225193023682,grad_norm: 0.8532185454632236, iteration: 69051
loss: 1.1738481521606445,grad_norm: 0.9621402525187334, iteration: 69052
loss: 1.0480177402496338,grad_norm: 0.9999995365665517, iteration: 69053
loss: 1.0738906860351562,grad_norm: 0.9999990538774213, iteration: 69054
loss: 1.0189985036849976,grad_norm: 0.9008435218297987, iteration: 69055
loss: 1.0144823789596558,grad_norm: 0.9999991750585303, iteration: 69056
loss: 1.082882046699524,grad_norm: 0.999999440163252, iteration: 69057
loss: 1.0531085729599,grad_norm: 0.9999996692006923, iteration: 69058
loss: 0.9821023344993591,grad_norm: 0.9999990991222022, iteration: 69059
loss: 1.0063406229019165,grad_norm: 0.9001593392023538, iteration: 69060
loss: 1.0213665962219238,grad_norm: 0.9672966419442863, iteration: 69061
loss: 1.0432744026184082,grad_norm: 0.9999996736408292, iteration: 69062
loss: 0.9973186254501343,grad_norm: 0.8817726783187104, iteration: 69063
loss: 1.0293474197387695,grad_norm: 0.9999992268199126, iteration: 69064
loss: 0.9810323715209961,grad_norm: 0.9999990766472306, iteration: 69065
loss: 0.9782939553260803,grad_norm: 0.9999992725238344, iteration: 69066
loss: 1.0036007165908813,grad_norm: 0.9527537603566444, iteration: 69067
loss: 1.016351580619812,grad_norm: 0.9999991695606842, iteration: 69068
loss: 0.9480195641517639,grad_norm: 0.9999991115826958, iteration: 69069
loss: 1.0463265180587769,grad_norm: 0.9999992320231917, iteration: 69070
loss: 1.0116167068481445,grad_norm: 0.999999335645366, iteration: 69071
loss: 0.9903257489204407,grad_norm: 0.900050663346594, iteration: 69072
loss: 1.023084044456482,grad_norm: 0.9999989886936569, iteration: 69073
loss: 0.9617063999176025,grad_norm: 0.9999992515160459, iteration: 69074
loss: 1.0083625316619873,grad_norm: 0.8113454844247638, iteration: 69075
loss: 0.983201265335083,grad_norm: 0.9999990338379403, iteration: 69076
loss: 1.0255309343338013,grad_norm: 0.9999991909049427, iteration: 69077
loss: 1.0085023641586304,grad_norm: 0.9947958080318081, iteration: 69078
loss: 0.9424884915351868,grad_norm: 0.8612651706585244, iteration: 69079
loss: 1.0282869338989258,grad_norm: 0.8822560517901178, iteration: 69080
loss: 1.0338977575302124,grad_norm: 0.9078298502697671, iteration: 69081
loss: 1.0068471431732178,grad_norm: 0.9783304855839616, iteration: 69082
loss: 0.9985672235488892,grad_norm: 0.9999991141714658, iteration: 69083
loss: 1.053030252456665,grad_norm: 0.9999991513751725, iteration: 69084
loss: 0.9711251258850098,grad_norm: 0.9999994753974497, iteration: 69085
loss: 1.0490704774856567,grad_norm: 0.9999991477671786, iteration: 69086
loss: 0.9689943194389343,grad_norm: 0.9999994851795733, iteration: 69087
loss: 0.9825079441070557,grad_norm: 0.999998967037838, iteration: 69088
loss: 1.0126584768295288,grad_norm: 0.9655144802447664, iteration: 69089
loss: 1.0102015733718872,grad_norm: 0.903537918746031, iteration: 69090
loss: 1.0132091045379639,grad_norm: 0.926799102824141, iteration: 69091
loss: 0.9849153161048889,grad_norm: 0.8460555036437559, iteration: 69092
loss: 1.089044451713562,grad_norm: 0.9999992161496041, iteration: 69093
loss: 1.0970520973205566,grad_norm: 0.9999993368607444, iteration: 69094
loss: 1.1077938079833984,grad_norm: 0.9999993392834509, iteration: 69095
loss: 0.9947518110275269,grad_norm: 0.9999991004983029, iteration: 69096
loss: 1.0441856384277344,grad_norm: 0.9999996835176186, iteration: 69097
loss: 0.9696760773658752,grad_norm: 0.92544956261359, iteration: 69098
loss: 0.9776561260223389,grad_norm: 0.9787361186165006, iteration: 69099
loss: 1.113641619682312,grad_norm: 0.9999996991699626, iteration: 69100
loss: 1.0029617547988892,grad_norm: 0.9557650326122158, iteration: 69101
loss: 1.019925594329834,grad_norm: 0.8671283140030124, iteration: 69102
loss: 0.9855583906173706,grad_norm: 0.9325019989812123, iteration: 69103
loss: 1.0227837562561035,grad_norm: 0.8914696413043418, iteration: 69104
loss: 1.0102934837341309,grad_norm: 0.9984904305740475, iteration: 69105
loss: 1.097501277923584,grad_norm: 0.9999996843524622, iteration: 69106
loss: 1.0702519416809082,grad_norm: 0.9999991196081094, iteration: 69107
loss: 1.0825245380401611,grad_norm: 0.9999991082512819, iteration: 69108
loss: 0.9995766282081604,grad_norm: 0.9535035958461816, iteration: 69109
loss: 1.0420178174972534,grad_norm: 0.9999991036408749, iteration: 69110
loss: 1.0691779851913452,grad_norm: 0.9999991296799983, iteration: 69111
loss: 1.0545570850372314,grad_norm: 0.9999992163205811, iteration: 69112
loss: 1.0272151231765747,grad_norm: 0.9015581297254031, iteration: 69113
loss: 1.0094828605651855,grad_norm: 0.9999994775955928, iteration: 69114
loss: 1.0431801080703735,grad_norm: 0.9999991709607425, iteration: 69115
loss: 1.097726821899414,grad_norm: 0.9999991269863896, iteration: 69116
loss: 1.0124045610427856,grad_norm: 0.8437234095891984, iteration: 69117
loss: 1.011831521987915,grad_norm: 0.9623102219161533, iteration: 69118
loss: 1.0028561353683472,grad_norm: 0.9086843193150376, iteration: 69119
loss: 1.005043625831604,grad_norm: 0.9999990445706769, iteration: 69120
loss: 1.0055370330810547,grad_norm: 0.9999997897234446, iteration: 69121
loss: 0.9979655146598816,grad_norm: 0.9960180802747137, iteration: 69122
loss: 0.9915944933891296,grad_norm: 0.8873115198112638, iteration: 69123
loss: 1.0629839897155762,grad_norm: 0.999999053726146, iteration: 69124
loss: 1.0577960014343262,grad_norm: 0.999999954311633, iteration: 69125
loss: 0.9698716998100281,grad_norm: 0.9999992363908043, iteration: 69126
loss: 1.0723422765731812,grad_norm: 0.8910827694905372, iteration: 69127
loss: 1.018505573272705,grad_norm: 0.7730207275517964, iteration: 69128
loss: 1.0106346607208252,grad_norm: 0.9999990950167044, iteration: 69129
loss: 1.0184290409088135,grad_norm: 0.7478817844527572, iteration: 69130
loss: 0.9511982202529907,grad_norm: 0.9342585092497743, iteration: 69131
loss: 1.0090537071228027,grad_norm: 0.932911831638799, iteration: 69132
loss: 0.9813143014907837,grad_norm: 0.7351070684936238, iteration: 69133
loss: 0.982147216796875,grad_norm: 0.8360130742228263, iteration: 69134
loss: 1.01930832862854,grad_norm: 0.9999991402536053, iteration: 69135
loss: 1.0165516138076782,grad_norm: 0.9999990744906674, iteration: 69136
loss: 1.0207228660583496,grad_norm: 0.9999991291159989, iteration: 69137
loss: 0.9876567125320435,grad_norm: 0.8175510455699933, iteration: 69138
loss: 0.9991527795791626,grad_norm: 0.9763869485177276, iteration: 69139
loss: 0.9900005459785461,grad_norm: 0.8022172947892413, iteration: 69140
loss: 1.072797417640686,grad_norm: 0.999999432321521, iteration: 69141
loss: 0.9963797926902771,grad_norm: 0.9154655977880443, iteration: 69142
loss: 1.0182842016220093,grad_norm: 0.9838402982547252, iteration: 69143
loss: 1.0035048723220825,grad_norm: 0.7700651312743281, iteration: 69144
loss: 1.0979748964309692,grad_norm: 0.9018568919139571, iteration: 69145
loss: 1.0250581502914429,grad_norm: 0.9999992542651619, iteration: 69146
loss: 1.0039136409759521,grad_norm: 0.7867431546585208, iteration: 69147
loss: 1.0144188404083252,grad_norm: 0.9999993547057555, iteration: 69148
loss: 1.0102907419204712,grad_norm: 0.9220903829226827, iteration: 69149
loss: 1.0299856662750244,grad_norm: 0.999999133111292, iteration: 69150
loss: 1.0271692276000977,grad_norm: 0.9999991520145166, iteration: 69151
loss: 0.9846310019493103,grad_norm: 0.9999990527768169, iteration: 69152
loss: 1.020333170890808,grad_norm: 0.999999120246135, iteration: 69153
loss: 0.9948427081108093,grad_norm: 0.9473340551572578, iteration: 69154
loss: 1.0571867227554321,grad_norm: 0.9999991519519725, iteration: 69155
loss: 0.9771064519882202,grad_norm: 0.801524035716392, iteration: 69156
loss: 1.001767873764038,grad_norm: 0.9999993022564998, iteration: 69157
loss: 1.0119636058807373,grad_norm: 0.9999996360157313, iteration: 69158
loss: 1.0220893621444702,grad_norm: 0.9999990784180917, iteration: 69159
loss: 0.9710381627082825,grad_norm: 0.9872263079419419, iteration: 69160
loss: 0.9650040864944458,grad_norm: 0.9999991399626764, iteration: 69161
loss: 0.989001989364624,grad_norm: 0.999999160599207, iteration: 69162
loss: 0.9986783862113953,grad_norm: 0.8661593963202593, iteration: 69163
loss: 1.0295765399932861,grad_norm: 0.9509545747442104, iteration: 69164
loss: 1.090883731842041,grad_norm: 0.8886332920645538, iteration: 69165
loss: 0.9838478565216064,grad_norm: 0.9327092965089367, iteration: 69166
loss: 0.9967983961105347,grad_norm: 0.8593422760374759, iteration: 69167
loss: 1.0184745788574219,grad_norm: 0.9999991339604354, iteration: 69168
loss: 0.9715316891670227,grad_norm: 0.8841130190512385, iteration: 69169
loss: 0.9706057906150818,grad_norm: 0.9068568113593353, iteration: 69170
loss: 1.0144405364990234,grad_norm: 0.9613517660202424, iteration: 69171
loss: 0.9763539433479309,grad_norm: 1.0000000208377975, iteration: 69172
loss: 1.0606473684310913,grad_norm: 0.7659481534126851, iteration: 69173
loss: 1.0483112335205078,grad_norm: 0.9408394073280151, iteration: 69174
loss: 1.023747205734253,grad_norm: 0.9999992255169791, iteration: 69175
loss: 1.010452389717102,grad_norm: 0.8758634774604704, iteration: 69176
loss: 1.0087518692016602,grad_norm: 0.7981310312578542, iteration: 69177
loss: 0.9705973267555237,grad_norm: 0.8918166196528973, iteration: 69178
loss: 1.0366756916046143,grad_norm: 0.9999996009617097, iteration: 69179
loss: 0.9991417527198792,grad_norm: 0.9999991760022889, iteration: 69180
loss: 1.0175020694732666,grad_norm: 0.999999145566649, iteration: 69181
loss: 0.9700756072998047,grad_norm: 0.7188046142642626, iteration: 69182
loss: 0.998371958732605,grad_norm: 0.8826790601028124, iteration: 69183
loss: 1.00991690158844,grad_norm: 0.8356104947540136, iteration: 69184
loss: 0.9713799357414246,grad_norm: 0.7975268113240088, iteration: 69185
loss: 0.9914900064468384,grad_norm: 0.8216831995628574, iteration: 69186
loss: 1.0345635414123535,grad_norm: 0.905496467695723, iteration: 69187
loss: 0.9632706642150879,grad_norm: 0.9999990861966557, iteration: 69188
loss: 0.9673339128494263,grad_norm: 0.9999991333430697, iteration: 69189
loss: 1.0161898136138916,grad_norm: 0.8710292104492253, iteration: 69190
loss: 1.0216047763824463,grad_norm: 0.9341995746247045, iteration: 69191
loss: 0.9944407939910889,grad_norm: 0.847573387123803, iteration: 69192
loss: 0.9829283356666565,grad_norm: 0.8542576334681106, iteration: 69193
loss: 0.985956072807312,grad_norm: 0.999999072027208, iteration: 69194
loss: 0.9928251504898071,grad_norm: 0.999999293440151, iteration: 69195
loss: 0.9904190301895142,grad_norm: 0.9999993375989638, iteration: 69196
loss: 0.9870873093605042,grad_norm: 0.9509144244204091, iteration: 69197
loss: 0.9632254838943481,grad_norm: 0.9459669291714938, iteration: 69198
loss: 0.978756844997406,grad_norm: 0.9665050119010012, iteration: 69199
loss: 1.0318101644515991,grad_norm: 0.8862531519221883, iteration: 69200
loss: 0.9854156970977783,grad_norm: 0.8989426684951515, iteration: 69201
loss: 1.0279000997543335,grad_norm: 0.9548272414339286, iteration: 69202
loss: 1.017821192741394,grad_norm: 0.8535368625921833, iteration: 69203
loss: 1.007370114326477,grad_norm: 0.9999991457692073, iteration: 69204
loss: 0.9872094392776489,grad_norm: 0.99999917834941, iteration: 69205
loss: 1.0078588724136353,grad_norm: 0.9892774338887562, iteration: 69206
loss: 1.0385820865631104,grad_norm: 0.8619570336206175, iteration: 69207
loss: 0.9901919960975647,grad_norm: 0.9223925291052396, iteration: 69208
loss: 1.0211924314498901,grad_norm: 0.7904392927244048, iteration: 69209
loss: 1.0081932544708252,grad_norm: 0.9999990753911119, iteration: 69210
loss: 0.9824427962303162,grad_norm: 0.9426654148128921, iteration: 69211
loss: 1.1030231714248657,grad_norm: 0.9999990725533374, iteration: 69212
loss: 0.9867877960205078,grad_norm: 0.9808525445072572, iteration: 69213
loss: 0.9900929927825928,grad_norm: 0.7893312169695068, iteration: 69214
loss: 1.0021460056304932,grad_norm: 0.9999992094627291, iteration: 69215
loss: 0.9969370365142822,grad_norm: 0.8118843655140051, iteration: 69216
loss: 1.0331438779830933,grad_norm: 0.9999990012595186, iteration: 69217
loss: 1.0235605239868164,grad_norm: 0.9999991946550628, iteration: 69218
loss: 0.995552122592926,grad_norm: 0.9999991029875323, iteration: 69219
loss: 1.0190784931182861,grad_norm: 0.9755052688689313, iteration: 69220
loss: 0.9946895837783813,grad_norm: 0.9999990263642881, iteration: 69221
loss: 1.0213091373443604,grad_norm: 0.9999991779580765, iteration: 69222
loss: 1.0107088088989258,grad_norm: 0.8533597451926822, iteration: 69223
loss: 1.0028932094573975,grad_norm: 0.7885147559684564, iteration: 69224
loss: 0.9979308843612671,grad_norm: 0.8869292555481136, iteration: 69225
loss: 0.9981892704963684,grad_norm: 0.7910385021504598, iteration: 69226
loss: 1.0395557880401611,grad_norm: 0.9999990312316833, iteration: 69227
loss: 1.0162612199783325,grad_norm: 0.9999997446813838, iteration: 69228
loss: 0.992636501789093,grad_norm: 0.8393501451499383, iteration: 69229
loss: 0.9706664681434631,grad_norm: 0.9999991182400759, iteration: 69230
loss: 1.0049978494644165,grad_norm: 0.9999992271065852, iteration: 69231
loss: 0.9799963235855103,grad_norm: 0.9999993411897085, iteration: 69232
loss: 1.1223278045654297,grad_norm: 0.9999992919362874, iteration: 69233
loss: 0.9937853217124939,grad_norm: 0.9766353791724381, iteration: 69234
loss: 0.9857336282730103,grad_norm: 0.9744751942382188, iteration: 69235
loss: 0.9883330464363098,grad_norm: 0.9999991373890716, iteration: 69236
loss: 1.063055157661438,grad_norm: 0.9452329083585292, iteration: 69237
loss: 0.9867551326751709,grad_norm: 0.9999990705101217, iteration: 69238
loss: 1.0146996974945068,grad_norm: 0.9091859493716329, iteration: 69239
loss: 0.9569941163063049,grad_norm: 0.9853822564224386, iteration: 69240
loss: 1.0022066831588745,grad_norm: 0.9999989907908119, iteration: 69241
loss: 1.0282419919967651,grad_norm: 0.8838197277979085, iteration: 69242
loss: 1.0743581056594849,grad_norm: 0.9999992350618208, iteration: 69243
loss: 0.9815336465835571,grad_norm: 0.9999991650595179, iteration: 69244
loss: 0.9972667098045349,grad_norm: 0.8139951101245221, iteration: 69245
loss: 0.9998794794082642,grad_norm: 0.9999991015924286, iteration: 69246
loss: 1.1256835460662842,grad_norm: 0.9999996802028546, iteration: 69247
loss: 1.0132564306259155,grad_norm: 0.9999991705028658, iteration: 69248
loss: 1.0344873666763306,grad_norm: 0.7620837814232814, iteration: 69249
loss: 1.0488053560256958,grad_norm: 0.9006750573905583, iteration: 69250
loss: 1.029726505279541,grad_norm: 0.9999991700047142, iteration: 69251
loss: 1.0190486907958984,grad_norm: 0.9999991084658629, iteration: 69252
loss: 1.0022528171539307,grad_norm: 0.7682579279065879, iteration: 69253
loss: 0.9969812631607056,grad_norm: 0.8501288282127613, iteration: 69254
loss: 1.1329728364944458,grad_norm: 0.9999998758597956, iteration: 69255
loss: 1.0735182762145996,grad_norm: 0.9999992451078987, iteration: 69256
loss: 1.0172817707061768,grad_norm: 0.9827958775353306, iteration: 69257
loss: 1.0129621028900146,grad_norm: 0.9944632322396163, iteration: 69258
loss: 1.0129238367080688,grad_norm: 0.9999990680598696, iteration: 69259
loss: 1.0394595861434937,grad_norm: 0.9999994023232536, iteration: 69260
loss: 0.9999793767929077,grad_norm: 0.9170728542154089, iteration: 69261
loss: 0.9956772923469543,grad_norm: 0.8850195155754501, iteration: 69262
loss: 1.0000218152999878,grad_norm: 0.8472747397834278, iteration: 69263
loss: 1.0557496547698975,grad_norm: 0.7783815491318385, iteration: 69264
loss: 0.9820558428764343,grad_norm: 0.974207638168012, iteration: 69265
loss: 1.0546516180038452,grad_norm: 0.909052414789426, iteration: 69266
loss: 1.0182479619979858,grad_norm: 0.9999991813693754, iteration: 69267
loss: 0.9709287881851196,grad_norm: 0.9999991457210947, iteration: 69268
loss: 1.019659399986267,grad_norm: 0.9996474793027688, iteration: 69269
loss: 0.9892323017120361,grad_norm: 0.8994859310664196, iteration: 69270
loss: 1.1336455345153809,grad_norm: 0.950063704534258, iteration: 69271
loss: 0.9875093698501587,grad_norm: 0.9763995130811721, iteration: 69272
loss: 1.0684707164764404,grad_norm: 0.9999993253535868, iteration: 69273
loss: 1.1229257583618164,grad_norm: 0.9999996407748962, iteration: 69274
loss: 1.0387873649597168,grad_norm: 0.9549052618761584, iteration: 69275
loss: 1.0126898288726807,grad_norm: 0.8511391523652312, iteration: 69276
loss: 0.9862068295478821,grad_norm: 0.9621975006717823, iteration: 69277
loss: 1.177836537361145,grad_norm: 0.9999990528126449, iteration: 69278
loss: 1.0007073879241943,grad_norm: 0.8136405156236476, iteration: 69279
loss: 1.0264222621917725,grad_norm: 0.9999992423234786, iteration: 69280
loss: 1.1104377508163452,grad_norm: 0.9999997306843652, iteration: 69281
loss: 1.0054243803024292,grad_norm: 0.8336880913715692, iteration: 69282
loss: 1.0046297311782837,grad_norm: 0.7010908155384246, iteration: 69283
loss: 1.025308609008789,grad_norm: 0.9999999300622759, iteration: 69284
loss: 0.9886972904205322,grad_norm: 0.9107840264992122, iteration: 69285
loss: 1.1365628242492676,grad_norm: 0.9999992608488849, iteration: 69286
loss: 1.1111762523651123,grad_norm: 0.9408047481486149, iteration: 69287
loss: 1.2215701341629028,grad_norm: 0.999999074645652, iteration: 69288
loss: 1.3850948810577393,grad_norm: 0.999999891927261, iteration: 69289
loss: 1.1926095485687256,grad_norm: 0.9999992422968926, iteration: 69290
loss: 1.1017056703567505,grad_norm: 0.9999993184766078, iteration: 69291
loss: 1.1525239944458008,grad_norm: 0.9999991368922772, iteration: 69292
loss: 1.0925554037094116,grad_norm: 0.9999990956118614, iteration: 69293
loss: 1.0048702955245972,grad_norm: 0.9999993616096208, iteration: 69294
loss: 1.3179537057876587,grad_norm: 0.9999992289853529, iteration: 69295
loss: 1.3256796598434448,grad_norm: 0.9999999032238368, iteration: 69296
loss: 1.2441813945770264,grad_norm: 0.9999993557523738, iteration: 69297
loss: 1.3427464962005615,grad_norm: 0.9999997422223867, iteration: 69298
loss: 1.1677875518798828,grad_norm: 0.9999997845691664, iteration: 69299
loss: 1.0465350151062012,grad_norm: 0.9999990616618952, iteration: 69300
loss: 1.0269533395767212,grad_norm: 0.9898120769947452, iteration: 69301
loss: 1.1120233535766602,grad_norm: 0.9999995757444929, iteration: 69302
loss: 1.4279608726501465,grad_norm: 0.9999998937926438, iteration: 69303
loss: 1.0940803289413452,grad_norm: 0.9999991263978891, iteration: 69304
loss: 1.0900118350982666,grad_norm: 0.9999993472321262, iteration: 69305
loss: 1.2595958709716797,grad_norm: 0.9999999804731454, iteration: 69306
loss: 1.0237727165222168,grad_norm: 0.9782462549772302, iteration: 69307
loss: 1.0318872928619385,grad_norm: 0.9999992661584032, iteration: 69308
loss: 0.9983529448509216,grad_norm: 0.9999994647762755, iteration: 69309
loss: 1.0260264873504639,grad_norm: 0.9999999995093247, iteration: 69310
loss: 0.9865713119506836,grad_norm: 0.918527508499606, iteration: 69311
loss: 1.204200029373169,grad_norm: 0.9999998579839506, iteration: 69312
loss: 0.9757125973701477,grad_norm: 0.9378776601087978, iteration: 69313
loss: 1.0836018323898315,grad_norm: 0.9999992169007578, iteration: 69314
loss: 1.018198013305664,grad_norm: 0.7864139937279631, iteration: 69315
loss: 1.0487010478973389,grad_norm: 0.7982195717875593, iteration: 69316
loss: 1.0841113328933716,grad_norm: 0.8897499135339411, iteration: 69317
loss: 1.161125898361206,grad_norm: 0.9999992357140953, iteration: 69318
loss: 0.9668466448783875,grad_norm: 0.9999991304289175, iteration: 69319
loss: 1.0241572856903076,grad_norm: 0.9999991461525323, iteration: 69320
loss: 1.3278330564498901,grad_norm: 0.9999997527134219, iteration: 69321
loss: 1.0253946781158447,grad_norm: 0.9999992893238236, iteration: 69322
loss: 0.9809336066246033,grad_norm: 0.9999991003014296, iteration: 69323
loss: 1.6074708700180054,grad_norm: 0.9999999137491618, iteration: 69324
loss: 1.2786829471588135,grad_norm: 0.9999998934385993, iteration: 69325
loss: 1.3361091613769531,grad_norm: 0.9999998246656653, iteration: 69326
loss: 1.4990172386169434,grad_norm: 0.9999993401081081, iteration: 69327
loss: 1.1405543088912964,grad_norm: 0.9999990718900719, iteration: 69328
loss: 1.2855175733566284,grad_norm: 0.9999992802164024, iteration: 69329
loss: 1.0001152753829956,grad_norm: 0.942099664366838, iteration: 69330
loss: 1.0998456478118896,grad_norm: 0.9999994996706679, iteration: 69331
loss: 1.0277185440063477,grad_norm: 0.9999991967094681, iteration: 69332
loss: 1.229756474494934,grad_norm: 0.9999997136752629, iteration: 69333
loss: 1.2331937551498413,grad_norm: 0.9999994236802292, iteration: 69334
loss: 1.093713402748108,grad_norm: 0.9999999507186034, iteration: 69335
loss: 1.0352014303207397,grad_norm: 0.999999146384065, iteration: 69336
loss: 1.4286375045776367,grad_norm: 0.9999995716323526, iteration: 69337
loss: 1.3281259536743164,grad_norm: 0.9999995769605852, iteration: 69338
loss: 1.110271692276001,grad_norm: 0.9999995950643841, iteration: 69339
loss: 1.3705898523330688,grad_norm: 0.9999993839844881, iteration: 69340
loss: 1.0734741687774658,grad_norm: 0.9381272223558592, iteration: 69341
loss: 1.727090835571289,grad_norm: 0.9999997207994971, iteration: 69342
loss: 1.1519325971603394,grad_norm: 0.9999998375141503, iteration: 69343
loss: 1.1390397548675537,grad_norm: 0.9999992424564699, iteration: 69344
loss: 1.1260465383529663,grad_norm: 0.9999997705661344, iteration: 69345
loss: 0.9856299161911011,grad_norm: 0.9999993777493407, iteration: 69346
loss: 1.3716379404067993,grad_norm: 0.9999993387673266, iteration: 69347
loss: 1.1983877420425415,grad_norm: 0.9999994703759121, iteration: 69348
loss: 1.1693410873413086,grad_norm: 0.9999992460791863, iteration: 69349
loss: 1.284531593322754,grad_norm: 0.9999994739186611, iteration: 69350
loss: 1.0918253660202026,grad_norm: 0.9999995226036617, iteration: 69351
loss: 1.0333315134048462,grad_norm: 0.9999992034249732, iteration: 69352
loss: 1.0275696516036987,grad_norm: 0.9999994422119756, iteration: 69353
loss: 1.0686174631118774,grad_norm: 0.9999991693891348, iteration: 69354
loss: 1.025597095489502,grad_norm: 0.8669073274731677, iteration: 69355
loss: 1.0804778337478638,grad_norm: 0.9999990947551015, iteration: 69356
loss: 1.1124976873397827,grad_norm: 0.9999997323215438, iteration: 69357
loss: 0.9998291730880737,grad_norm: 0.9999999981358625, iteration: 69358
loss: 0.9995393753051758,grad_norm: 0.9999991423001319, iteration: 69359
loss: 0.9942184686660767,grad_norm: 0.9999992082147003, iteration: 69360
loss: 1.0949575901031494,grad_norm: 0.8935799507740149, iteration: 69361
loss: 1.0484161376953125,grad_norm: 0.9999997952474573, iteration: 69362
loss: 1.0003043413162231,grad_norm: 0.9999991111395069, iteration: 69363
loss: 0.9832940101623535,grad_norm: 0.8017617906034114, iteration: 69364
loss: 1.0301306247711182,grad_norm: 0.9999995876863516, iteration: 69365
loss: 0.9890622496604919,grad_norm: 0.9311886642615931, iteration: 69366
loss: 0.9746350049972534,grad_norm: 0.9184614987178507, iteration: 69367
loss: 0.9997493624687195,grad_norm: 0.9999993093850851, iteration: 69368
loss: 1.0978622436523438,grad_norm: 0.9999997831682389, iteration: 69369
loss: 1.0825730562210083,grad_norm: 0.9999998838751131, iteration: 69370
loss: 0.9932602047920227,grad_norm: 0.9999991513747657, iteration: 69371
loss: 0.9944831728935242,grad_norm: 0.9721854569600983, iteration: 69372
loss: 0.9710030555725098,grad_norm: 0.9999991263181316, iteration: 69373
loss: 1.1148958206176758,grad_norm: 0.9999991262475842, iteration: 69374
loss: 0.9328292012214661,grad_norm: 0.9724724835858127, iteration: 69375
loss: 0.9678256511688232,grad_norm: 0.8787039356073602, iteration: 69376
loss: 1.0243254899978638,grad_norm: 0.9999990518704149, iteration: 69377
loss: 0.9874722361564636,grad_norm: 0.9621789769883768, iteration: 69378
loss: 1.0308854579925537,grad_norm: 0.9999994335025296, iteration: 69379
loss: 1.009534239768982,grad_norm: 0.9129191399966416, iteration: 69380
loss: 0.9666117429733276,grad_norm: 0.9325382011037443, iteration: 69381
loss: 0.9874361157417297,grad_norm: 0.9952292060683959, iteration: 69382
loss: 1.0044848918914795,grad_norm: 0.9999992941693742, iteration: 69383
loss: 1.0242929458618164,grad_norm: 0.793782733678175, iteration: 69384
loss: 1.131447434425354,grad_norm: 1.0000000407320588, iteration: 69385
loss: 1.0033020973205566,grad_norm: 0.9999991491669996, iteration: 69386
loss: 1.0430010557174683,grad_norm: 0.8833855317255892, iteration: 69387
loss: 1.0422320365905762,grad_norm: 0.9999993827811838, iteration: 69388
loss: 1.0095938444137573,grad_norm: 0.877500335478453, iteration: 69389
loss: 1.2067240476608276,grad_norm: 0.9999991200207813, iteration: 69390
loss: 0.9960150122642517,grad_norm: 0.8791980634142699, iteration: 69391
loss: 0.9721410274505615,grad_norm: 0.8649538455680896, iteration: 69392
loss: 1.014635443687439,grad_norm: 0.999999133850764, iteration: 69393
loss: 0.9987490773200989,grad_norm: 0.9825750152790965, iteration: 69394
loss: 1.135866403579712,grad_norm: 0.9999993651759451, iteration: 69395
loss: 1.026624083518982,grad_norm: 0.9726801362868528, iteration: 69396
loss: 1.0334657430648804,grad_norm: 0.9999992870673822, iteration: 69397
loss: 1.1125335693359375,grad_norm: 0.8942925264596097, iteration: 69398
loss: 0.9995403289794922,grad_norm: 0.9999997933623807, iteration: 69399
loss: 1.0786254405975342,grad_norm: 0.9999996538691744, iteration: 69400
loss: 1.0045578479766846,grad_norm: 0.999999134597601, iteration: 69401
loss: 1.2504079341888428,grad_norm: 0.9999999402764144, iteration: 69402
loss: 1.05014967918396,grad_norm: 0.9999995517864602, iteration: 69403
loss: 1.0066202878952026,grad_norm: 0.9999994341182374, iteration: 69404
loss: 1.0090042352676392,grad_norm: 0.9464844522373457, iteration: 69405
loss: 1.0323385000228882,grad_norm: 0.9999991379634432, iteration: 69406
loss: 1.0124025344848633,grad_norm: 0.8952190533075232, iteration: 69407
loss: 0.9894514679908752,grad_norm: 0.8693305514903746, iteration: 69408
loss: 1.184971809387207,grad_norm: 0.9999991751311483, iteration: 69409
loss: 1.2120819091796875,grad_norm: 0.9999998556646011, iteration: 69410
loss: 1.035110592842102,grad_norm: 0.9999997565901142, iteration: 69411
loss: 1.1537702083587646,grad_norm: 0.9999991193474577, iteration: 69412
loss: 1.1016697883605957,grad_norm: 0.9999996752755589, iteration: 69413
loss: 1.1389143466949463,grad_norm: 0.9999996481251203, iteration: 69414
loss: 1.0197758674621582,grad_norm: 0.9873341522932929, iteration: 69415
loss: 0.9741958975791931,grad_norm: 0.8690950536578137, iteration: 69416
loss: 1.0149471759796143,grad_norm: 0.9565928790591579, iteration: 69417
loss: 0.9954049587249756,grad_norm: 0.8713611606281529, iteration: 69418
loss: 0.9748616814613342,grad_norm: 0.8467795265816879, iteration: 69419
loss: 1.0077271461486816,grad_norm: 0.9999999638530256, iteration: 69420
loss: 1.0868312120437622,grad_norm: 0.8883470049242335, iteration: 69421
loss: 1.050120234489441,grad_norm: 0.8635817440611115, iteration: 69422
loss: 1.0201590061187744,grad_norm: 0.9999990833623931, iteration: 69423
loss: 0.9749470353126526,grad_norm: 0.8695201471256836, iteration: 69424
loss: 1.0250808000564575,grad_norm: 0.9999991513087704, iteration: 69425
loss: 1.0253095626831055,grad_norm: 0.9999991106525753, iteration: 69426
loss: 0.9460150599479675,grad_norm: 0.8793593172461133, iteration: 69427
loss: 1.0416817665100098,grad_norm: 0.9999993837992425, iteration: 69428
loss: 0.9935934543609619,grad_norm: 0.9999991635131025, iteration: 69429
loss: 0.9882841110229492,grad_norm: 0.9564763916745344, iteration: 69430
loss: 1.0351088047027588,grad_norm: 0.9999990194331169, iteration: 69431
loss: 1.0964475870132446,grad_norm: 0.9999994007659058, iteration: 69432
loss: 1.023246169090271,grad_norm: 0.9999991958100499, iteration: 69433
loss: 0.9442757964134216,grad_norm: 0.7618951853336027, iteration: 69434
loss: 1.0239002704620361,grad_norm: 0.8844627100781294, iteration: 69435
loss: 1.0238677263259888,grad_norm: 0.9999990655826378, iteration: 69436
loss: 1.0194811820983887,grad_norm: 0.9999990673493929, iteration: 69437
loss: 0.976097822189331,grad_norm: 0.9999995629230347, iteration: 69438
loss: 0.9797274470329285,grad_norm: 0.98812784091589, iteration: 69439
loss: 0.9697218537330627,grad_norm: 0.9850174753531896, iteration: 69440
loss: 1.0189355611801147,grad_norm: 0.9999991151955074, iteration: 69441
loss: 1.1264010667800903,grad_norm: 0.9999998747630009, iteration: 69442
loss: 1.0334359407424927,grad_norm: 0.9999991554354557, iteration: 69443
loss: 1.0135629177093506,grad_norm: 0.9925906762806098, iteration: 69444
loss: 1.0318963527679443,grad_norm: 0.9498458189104169, iteration: 69445
loss: 0.9667959809303284,grad_norm: 0.9999991639087762, iteration: 69446
loss: 1.017372488975525,grad_norm: 0.9999992178395041, iteration: 69447
loss: 1.075354814529419,grad_norm: 1.0000000015697414, iteration: 69448
loss: 1.0192986726760864,grad_norm: 0.8124267620825532, iteration: 69449
loss: 1.0238277912139893,grad_norm: 0.9999990230115984, iteration: 69450
loss: 1.1165237426757812,grad_norm: 0.999999452414546, iteration: 69451
loss: 1.0163617134094238,grad_norm: 0.9999995856848316, iteration: 69452
loss: 1.0117266178131104,grad_norm: 0.9999991820837143, iteration: 69453
loss: 1.2420506477355957,grad_norm: 0.9999991030829107, iteration: 69454
loss: 0.9993746876716614,grad_norm: 0.9999992313957052, iteration: 69455
loss: 1.038081169128418,grad_norm: 0.8746218210091475, iteration: 69456
loss: 0.9968364238739014,grad_norm: 0.9077096394118581, iteration: 69457
loss: 1.007851243019104,grad_norm: 0.8199495338854548, iteration: 69458
loss: 1.0108941793441772,grad_norm: 0.9999990058278805, iteration: 69459
loss: 0.98638916015625,grad_norm: 0.999999248868798, iteration: 69460
loss: 1.0116097927093506,grad_norm: 0.9202398899094093, iteration: 69461
loss: 1.099082112312317,grad_norm: 0.9999997016648576, iteration: 69462
loss: 1.0030678510665894,grad_norm: 0.9983458478690701, iteration: 69463
loss: 0.9876943230628967,grad_norm: 0.9999990838574976, iteration: 69464
loss: 1.0084619522094727,grad_norm: 0.8855234482100849, iteration: 69465
loss: 1.0084730386734009,grad_norm: 0.9300918717577985, iteration: 69466
loss: 0.9802006483078003,grad_norm: 0.8082159001052586, iteration: 69467
loss: 0.9861927032470703,grad_norm: 0.8991183309022825, iteration: 69468
loss: 0.9859620928764343,grad_norm: 0.9999989136779048, iteration: 69469
loss: 1.01865816116333,grad_norm: 0.8522303486339288, iteration: 69470
loss: 1.02176034450531,grad_norm: 0.9999992733998876, iteration: 69471
loss: 0.9722866415977478,grad_norm: 0.9999989420793074, iteration: 69472
loss: 1.2184104919433594,grad_norm: 0.9999989990084768, iteration: 69473
loss: 1.0886491537094116,grad_norm: 0.9999992311521985, iteration: 69474
loss: 1.2361669540405273,grad_norm: 0.9999999526734484, iteration: 69475
loss: 1.1660594940185547,grad_norm: 0.9999996630437463, iteration: 69476
loss: 1.1025387048721313,grad_norm: 0.9999995674107475, iteration: 69477
loss: 0.9886562824249268,grad_norm: 0.8916467291389738, iteration: 69478
loss: 1.0300371646881104,grad_norm: 0.999999485456735, iteration: 69479
loss: 1.1535791158676147,grad_norm: 1.0000000180559656, iteration: 69480
loss: 1.0799118280410767,grad_norm: 0.9999991501690885, iteration: 69481
loss: 1.0866731405258179,grad_norm: 0.9999993492760827, iteration: 69482
loss: 1.0750550031661987,grad_norm: 0.9999990688957955, iteration: 69483
loss: 1.1083941459655762,grad_norm: 0.9999991432907016, iteration: 69484
loss: 1.0622142553329468,grad_norm: 0.9999992503865067, iteration: 69485
loss: 1.0638576745986938,grad_norm: 0.9999995754099028, iteration: 69486
loss: 0.9583520889282227,grad_norm: 0.9999989785067351, iteration: 69487
loss: 0.999060332775116,grad_norm: 0.9999990384542982, iteration: 69488
loss: 1.1543493270874023,grad_norm: 0.9999992049577542, iteration: 69489
loss: 1.0135650634765625,grad_norm: 0.9999989390265553, iteration: 69490
loss: 0.9977290034294128,grad_norm: 0.7534485510321329, iteration: 69491
loss: 1.0778506994247437,grad_norm: 0.9999998296345102, iteration: 69492
loss: 0.9834775924682617,grad_norm: 0.999999174430281, iteration: 69493
loss: 1.101159930229187,grad_norm: 0.999999868887822, iteration: 69494
loss: 1.1866989135742188,grad_norm: 0.9999998591247615, iteration: 69495
loss: 1.0153924226760864,grad_norm: 0.999999020197205, iteration: 69496
loss: 1.106574296951294,grad_norm: 0.9999998388133919, iteration: 69497
loss: 1.147871732711792,grad_norm: 0.9999998903523366, iteration: 69498
loss: 1.0313847064971924,grad_norm: 0.9999994063273592, iteration: 69499
loss: 1.0397289991378784,grad_norm: 0.999999838745378, iteration: 69500
loss: 1.0182416439056396,grad_norm: 0.9999992968398412, iteration: 69501
loss: 1.1819875240325928,grad_norm: 0.9999996659128312, iteration: 69502
loss: 1.1026064157485962,grad_norm: 0.9999993105040157, iteration: 69503
loss: 1.0789861679077148,grad_norm: 0.908370273014144, iteration: 69504
loss: 1.0040432214736938,grad_norm: 0.9999997858473496, iteration: 69505
loss: 1.0507142543792725,grad_norm: 0.9999994828365999, iteration: 69506
loss: 0.9648253917694092,grad_norm: 0.8920441090945073, iteration: 69507
loss: 1.0373965501785278,grad_norm: 0.9999994692171764, iteration: 69508
loss: 1.0369616746902466,grad_norm: 0.8717876635342527, iteration: 69509
loss: 1.0124232769012451,grad_norm: 0.8368622395722303, iteration: 69510
loss: 0.9421078562736511,grad_norm: 0.9887259901505696, iteration: 69511
loss: 0.9639419317245483,grad_norm: 0.7507927113399767, iteration: 69512
loss: 0.9855667948722839,grad_norm: 0.9999991100088056, iteration: 69513
loss: 1.0182517766952515,grad_norm: 0.9999998166580331, iteration: 69514
loss: 1.0124481916427612,grad_norm: 0.9713118476159419, iteration: 69515
loss: 0.9755106568336487,grad_norm: 0.9999997272418609, iteration: 69516
loss: 1.06233811378479,grad_norm: 0.9115303758771712, iteration: 69517
loss: 0.9988145232200623,grad_norm: 0.9045677397466224, iteration: 69518
loss: 0.9569279551506042,grad_norm: 0.963540508637781, iteration: 69519
loss: 1.0099014043807983,grad_norm: 0.851680704114916, iteration: 69520
loss: 1.0953254699707031,grad_norm: 0.999999520061009, iteration: 69521
loss: 0.9634636640548706,grad_norm: 0.9307994295644222, iteration: 69522
loss: 1.0123779773712158,grad_norm: 0.9438681122657251, iteration: 69523
loss: 0.9802275896072388,grad_norm: 0.9999990844291093, iteration: 69524
loss: 1.2317047119140625,grad_norm: 0.9999990850783473, iteration: 69525
loss: 1.012100100517273,grad_norm: 0.9223282081953974, iteration: 69526
loss: 1.240667700767517,grad_norm: 0.999999831952165, iteration: 69527
loss: 1.0317972898483276,grad_norm: 0.9999992236581436, iteration: 69528
loss: 0.9832038283348083,grad_norm: 0.8814506193273008, iteration: 69529
loss: 0.9879007339477539,grad_norm: 0.9999992850371399, iteration: 69530
loss: 1.256424069404602,grad_norm: 0.9999995985112717, iteration: 69531
loss: 1.0353477001190186,grad_norm: 0.9999995434391745, iteration: 69532
loss: 1.023075819015503,grad_norm: 0.8717813604358466, iteration: 69533
loss: 1.0506457090377808,grad_norm: 0.9999996605479746, iteration: 69534
loss: 1.0059341192245483,grad_norm: 0.9517848971308966, iteration: 69535
loss: 1.2447606325149536,grad_norm: 0.9999999092314015, iteration: 69536
loss: 0.9862675666809082,grad_norm: 0.848509816072369, iteration: 69537
loss: 1.0214498043060303,grad_norm: 0.999999152739918, iteration: 69538
loss: 1.0922621488571167,grad_norm: 0.9999995863044865, iteration: 69539
loss: 0.9920756220817566,grad_norm: 0.9999998565473706, iteration: 69540
loss: 0.9981618523597717,grad_norm: 0.930968122200994, iteration: 69541
loss: 1.2440236806869507,grad_norm: 0.9999998207509595, iteration: 69542
loss: 1.1127514839172363,grad_norm: 0.9999995919060398, iteration: 69543
loss: 1.0371572971343994,grad_norm: 0.9847667141928592, iteration: 69544
loss: 1.268141269683838,grad_norm: 0.999999764829364, iteration: 69545
loss: 0.9614599347114563,grad_norm: 0.9999992285731388, iteration: 69546
loss: 1.0064886808395386,grad_norm: 0.9999992905621087, iteration: 69547
loss: 0.9788371324539185,grad_norm: 0.9999991841906725, iteration: 69548
loss: 1.2510380744934082,grad_norm: 0.9999996202524042, iteration: 69549
loss: 1.324642539024353,grad_norm: 0.9999993660273231, iteration: 69550
loss: 1.011880874633789,grad_norm: 0.9999995780652967, iteration: 69551
loss: 1.0265982151031494,grad_norm: 0.9999994077245901, iteration: 69552
loss: 1.1127119064331055,grad_norm: 0.9999994712418554, iteration: 69553
loss: 1.0830798149108887,grad_norm: 0.9999997431362686, iteration: 69554
loss: 1.053899884223938,grad_norm: 0.9999990862793287, iteration: 69555
loss: 1.2021180391311646,grad_norm: 0.9999996659792029, iteration: 69556
loss: 1.0865532159805298,grad_norm: 0.9999993438043612, iteration: 69557
loss: 1.0623339414596558,grad_norm: 0.9999994975593874, iteration: 69558
loss: 1.0192575454711914,grad_norm: 0.9999990390453919, iteration: 69559
loss: 1.2204712629318237,grad_norm: 0.9999996843622502, iteration: 69560
loss: 1.0829265117645264,grad_norm: 0.9999992547128176, iteration: 69561
loss: 0.9927039742469788,grad_norm: 0.9999993816797049, iteration: 69562
loss: 0.9874647259712219,grad_norm: 0.9999992837452714, iteration: 69563
loss: 0.9950715899467468,grad_norm: 0.945004735931151, iteration: 69564
loss: 1.0309293270111084,grad_norm: 0.9999998347641473, iteration: 69565
loss: 1.1026335954666138,grad_norm: 0.9999995024334194, iteration: 69566
loss: 1.0507919788360596,grad_norm: 0.9999995783007706, iteration: 69567
loss: 0.9742463231086731,grad_norm: 0.9999991070207673, iteration: 69568
loss: 1.0018558502197266,grad_norm: 0.996594614298223, iteration: 69569
loss: 0.996156632900238,grad_norm: 0.9999991129072473, iteration: 69570
loss: 1.137877345085144,grad_norm: 0.9999997365010854, iteration: 69571
loss: 1.0017282962799072,grad_norm: 0.9445903545110118, iteration: 69572
loss: 1.0320568084716797,grad_norm: 0.9999997360754316, iteration: 69573
loss: 0.9948086738586426,grad_norm: 0.9999991737385142, iteration: 69574
loss: 0.9943252801895142,grad_norm: 0.8976222732776434, iteration: 69575
loss: 0.9860280752182007,grad_norm: 0.8195017485633361, iteration: 69576
loss: 1.030224084854126,grad_norm: 0.9999994870188207, iteration: 69577
loss: 1.0287368297576904,grad_norm: 0.9999991510290107, iteration: 69578
loss: 0.9823635220527649,grad_norm: 0.9999991086326155, iteration: 69579
loss: 1.0298519134521484,grad_norm: 0.9999991770299897, iteration: 69580
loss: 1.004584550857544,grad_norm: 0.8756360890309557, iteration: 69581
loss: 1.0173242092132568,grad_norm: 0.9999991016861283, iteration: 69582
loss: 1.0285453796386719,grad_norm: 0.9309768346439008, iteration: 69583
loss: 1.0671402215957642,grad_norm: 0.9999991749939858, iteration: 69584
loss: 1.1242492198944092,grad_norm: 0.9999997678797911, iteration: 69585
loss: 1.0101186037063599,grad_norm: 0.999999149897, iteration: 69586
loss: 1.063735842704773,grad_norm: 0.9999998591857225, iteration: 69587
loss: 1.0345215797424316,grad_norm: 0.9616962960855321, iteration: 69588
loss: 0.9742856621742249,grad_norm: 0.9999995761635532, iteration: 69589
loss: 1.0014179944992065,grad_norm: 0.9999991643334751, iteration: 69590
loss: 0.9779949188232422,grad_norm: 0.897571369593098, iteration: 69591
loss: 0.984713077545166,grad_norm: 0.91229032933014, iteration: 69592
loss: 1.0177305936813354,grad_norm: 0.9842140717807186, iteration: 69593
loss: 1.0100971460342407,grad_norm: 0.999999137967642, iteration: 69594
loss: 1.0068297386169434,grad_norm: 0.8811855374952953, iteration: 69595
loss: 1.022107720375061,grad_norm: 0.9496536452215413, iteration: 69596
loss: 1.0194650888442993,grad_norm: 0.9999993551060139, iteration: 69597
loss: 0.9833928346633911,grad_norm: 0.999999139802989, iteration: 69598
loss: 0.9920193552970886,grad_norm: 0.7727396306555261, iteration: 69599
loss: 1.004162073135376,grad_norm: 0.9999995383787945, iteration: 69600
loss: 0.9903832077980042,grad_norm: 0.7280026066975589, iteration: 69601
loss: 1.0244208574295044,grad_norm: 0.9160872495024399, iteration: 69602
loss: 1.1119816303253174,grad_norm: 0.988581088978667, iteration: 69603
loss: 1.07489812374115,grad_norm: 0.9999998494466183, iteration: 69604
loss: 1.0151824951171875,grad_norm: 0.9309369622541279, iteration: 69605
loss: 0.9852766394615173,grad_norm: 0.9001648359537194, iteration: 69606
loss: 0.9973299503326416,grad_norm: 0.9999991777745881, iteration: 69607
loss: 1.024909496307373,grad_norm: 0.9999991771503505, iteration: 69608
loss: 1.0569230318069458,grad_norm: 0.999999648122663, iteration: 69609
loss: 1.0066531896591187,grad_norm: 0.9999992941076566, iteration: 69610
loss: 1.076980471611023,grad_norm: 0.9999998871761925, iteration: 69611
loss: 1.0110303163528442,grad_norm: 0.9999991739227321, iteration: 69612
loss: 0.9780923128128052,grad_norm: 0.8946736263027537, iteration: 69613
loss: 1.2850393056869507,grad_norm: 0.9999991327621511, iteration: 69614
loss: 1.0108351707458496,grad_norm: 0.944712674702068, iteration: 69615
loss: 1.0534461736679077,grad_norm: 0.9999989572814897, iteration: 69616
loss: 1.0665596723556519,grad_norm: 0.9999993464631167, iteration: 69617
loss: 1.038698434829712,grad_norm: 0.9999995780830401, iteration: 69618
loss: 1.0245928764343262,grad_norm: 0.9999997775429673, iteration: 69619
loss: 0.9928280711174011,grad_norm: 0.9443498669587058, iteration: 69620
loss: 1.0157221555709839,grad_norm: 0.9204525082093109, iteration: 69621
loss: 0.9987610578536987,grad_norm: 0.8234873903195056, iteration: 69622
loss: 1.000380039215088,grad_norm: 0.9739125627634345, iteration: 69623
loss: 1.1841883659362793,grad_norm: 0.9999993579656735, iteration: 69624
loss: 1.0653899908065796,grad_norm: 0.9999993660880373, iteration: 69625
loss: 1.059882640838623,grad_norm: 0.9999993614540797, iteration: 69626
loss: 1.045528531074524,grad_norm: 0.9999997912696843, iteration: 69627
loss: 1.1536329984664917,grad_norm: 0.9999994316405877, iteration: 69628
loss: 1.0285862684249878,grad_norm: 0.9999990319641217, iteration: 69629
loss: 1.0120275020599365,grad_norm: 0.9494230756517098, iteration: 69630
loss: 1.0339668989181519,grad_norm: 0.9999994365615035, iteration: 69631
loss: 1.1296865940093994,grad_norm: 0.999999212403126, iteration: 69632
loss: 1.0748227834701538,grad_norm: 0.9999996143268752, iteration: 69633
loss: 1.0315452814102173,grad_norm: 0.9999991614992602, iteration: 69634
loss: 1.0675863027572632,grad_norm: 0.9999993744950663, iteration: 69635
loss: 1.1299296617507935,grad_norm: 0.9999995715688531, iteration: 69636
loss: 1.038196086883545,grad_norm: 0.9999996879586515, iteration: 69637
loss: 1.031481146812439,grad_norm: 0.9432980896017922, iteration: 69638
loss: 1.152868628501892,grad_norm: 0.9999992398634243, iteration: 69639
loss: 1.0207523107528687,grad_norm: 0.9641921914459003, iteration: 69640
loss: 1.033350944519043,grad_norm: 0.9999992802760225, iteration: 69641
loss: 0.9696494936943054,grad_norm: 0.7138317012536775, iteration: 69642
loss: 0.9984967708587646,grad_norm: 0.8560931037129896, iteration: 69643
loss: 1.0001931190490723,grad_norm: 0.9999993202112997, iteration: 69644
loss: 1.0286965370178223,grad_norm: 0.9999990389012591, iteration: 69645
loss: 1.0864790678024292,grad_norm: 0.9474848415640924, iteration: 69646
loss: 0.987105131149292,grad_norm: 0.9999991812862336, iteration: 69647
loss: 1.1076891422271729,grad_norm: 0.9999996580415433, iteration: 69648
loss: 1.126007318496704,grad_norm: 0.9999993483921445, iteration: 69649
loss: 1.2451927661895752,grad_norm: 0.9999995184396661, iteration: 69650
loss: 1.015500545501709,grad_norm: 0.99999937271914, iteration: 69651
loss: 1.029363751411438,grad_norm: 0.8356673588430864, iteration: 69652
loss: 1.021077275276184,grad_norm: 0.999999164673727, iteration: 69653
loss: 1.0151987075805664,grad_norm: 0.7384628147878148, iteration: 69654
loss: 1.1630001068115234,grad_norm: 0.9999991778524352, iteration: 69655
loss: 0.9786495566368103,grad_norm: 0.8201864552034227, iteration: 69656
loss: 1.1906769275665283,grad_norm: 0.9091885861061556, iteration: 69657
loss: 0.9679036140441895,grad_norm: 0.9656936286048817, iteration: 69658
loss: 1.0756174325942993,grad_norm: 0.9770306440005697, iteration: 69659
loss: 1.0166126489639282,grad_norm: 0.9235256509113796, iteration: 69660
loss: 1.2113244533538818,grad_norm: 0.9999998970227508, iteration: 69661
loss: 1.0056456327438354,grad_norm: 0.9445062834436312, iteration: 69662
loss: 1.015699863433838,grad_norm: 0.9351595211922304, iteration: 69663
loss: 1.0754858255386353,grad_norm: 0.8176220635223814, iteration: 69664
loss: 1.1035250425338745,grad_norm: 0.9999999936324905, iteration: 69665
loss: 1.0973434448242188,grad_norm: 0.9999991398258459, iteration: 69666
loss: 1.0288621187210083,grad_norm: 0.9999999148822059, iteration: 69667
loss: 1.049825668334961,grad_norm: 0.9999992569039325, iteration: 69668
loss: 1.0163297653198242,grad_norm: 0.9999998191579015, iteration: 69669
loss: 1.1485340595245361,grad_norm: 0.9999992967727089, iteration: 69670
loss: 1.0498874187469482,grad_norm: 0.9999992536167305, iteration: 69671
loss: 1.1800819635391235,grad_norm: 0.9999992429391943, iteration: 69672
loss: 1.120521903038025,grad_norm: 1.0000000431306013, iteration: 69673
loss: 0.9974160194396973,grad_norm: 0.9664872212138891, iteration: 69674
loss: 1.2056926488876343,grad_norm: 0.9999994296392307, iteration: 69675
loss: 1.244836688041687,grad_norm: 0.9999996688487449, iteration: 69676
loss: 1.4555562734603882,grad_norm: 0.9999999520505827, iteration: 69677
loss: 1.3473515510559082,grad_norm: 0.9999999474163995, iteration: 69678
loss: 1.1081496477127075,grad_norm: 0.9221745599639724, iteration: 69679
loss: 1.5391346216201782,grad_norm: 0.9999998724952562, iteration: 69680
loss: 1.2245184183120728,grad_norm: 0.9999997926653996, iteration: 69681
loss: 1.9939035177230835,grad_norm: 1.0000000940455511, iteration: 69682
loss: 1.822624683380127,grad_norm: 0.9999998244570858, iteration: 69683
loss: 1.2912662029266357,grad_norm: 0.9999996086883878, iteration: 69684
loss: 1.294481635093689,grad_norm: 0.9999999949912171, iteration: 69685
loss: 1.3473047018051147,grad_norm: 0.9999998665829496, iteration: 69686
loss: 1.0778555870056152,grad_norm: 0.9999996406981232, iteration: 69687
loss: 1.1512295007705688,grad_norm: 0.9999999272848047, iteration: 69688
loss: 1.1987477540969849,grad_norm: 0.9999999291777845, iteration: 69689
loss: 1.2435064315795898,grad_norm: 0.999999881378227, iteration: 69690
loss: 1.3248039484024048,grad_norm: 0.9999998381648141, iteration: 69691
loss: 1.0737707614898682,grad_norm: 0.9999996255879796, iteration: 69692
loss: 1.2783375978469849,grad_norm: 0.9999997548779525, iteration: 69693
loss: 1.714375376701355,grad_norm: 0.9999998728779553, iteration: 69694
loss: 1.1510480642318726,grad_norm: 0.9999999568032242, iteration: 69695
loss: 1.2227247953414917,grad_norm: 0.9999998876570565, iteration: 69696
loss: 1.2897826433181763,grad_norm: 0.9999999313756424, iteration: 69697
loss: 1.4994291067123413,grad_norm: 0.9999998904743519, iteration: 69698
loss: 1.4338055849075317,grad_norm: 0.99999960934871, iteration: 69699
loss: 1.0816086530685425,grad_norm: 0.9999996184721204, iteration: 69700
loss: 1.3882821798324585,grad_norm: 0.9999996140273972, iteration: 69701
loss: 1.2531249523162842,grad_norm: 0.9999999400087628, iteration: 69702
loss: 1.2559559345245361,grad_norm: 0.9999999998048281, iteration: 69703
loss: 1.2543939352035522,grad_norm: 0.9999997453896053, iteration: 69704
loss: 1.1446914672851562,grad_norm: 0.9999997300793914, iteration: 69705
loss: 1.0180230140686035,grad_norm: 0.9999994850920325, iteration: 69706
loss: 1.3374067544937134,grad_norm: 0.9999998757850779, iteration: 69707
loss: 1.111743450164795,grad_norm: 0.9999996187949702, iteration: 69708
loss: 1.1135554313659668,grad_norm: 0.999999575414678, iteration: 69709
loss: 1.1963553428649902,grad_norm: 0.999999383239941, iteration: 69710
loss: 1.0787711143493652,grad_norm: 0.9999993068927978, iteration: 69711
loss: 1.1000407934188843,grad_norm: 0.9999995059503682, iteration: 69712
loss: 1.0907515287399292,grad_norm: 0.9999992303638701, iteration: 69713
loss: 1.109437108039856,grad_norm: 0.9999994026388728, iteration: 69714
loss: 1.1662304401397705,grad_norm: 0.9999989910155267, iteration: 69715
loss: 1.1743152141571045,grad_norm: 0.9999997759040579, iteration: 69716
loss: 1.0972387790679932,grad_norm: 0.9999994057483779, iteration: 69717
loss: 1.1949421167373657,grad_norm: 0.9999999639534427, iteration: 69718
loss: 1.1996911764144897,grad_norm: 0.999999955930233, iteration: 69719
loss: 1.3916605710983276,grad_norm: 0.9999993906878442, iteration: 69720
loss: 1.2316731214523315,grad_norm: 0.9999990904837979, iteration: 69721
loss: 1.6537784337997437,grad_norm: 0.9999997288712723, iteration: 69722
loss: 1.5379770994186401,grad_norm: 0.9999994826365133, iteration: 69723
loss: 1.1072221994400024,grad_norm: 0.9999997621096203, iteration: 69724
loss: 1.2822118997573853,grad_norm: 0.9999998492420533, iteration: 69725
loss: 1.3204463720321655,grad_norm: 0.999999839532423, iteration: 69726
loss: 1.261690378189087,grad_norm: 0.9999992732414998, iteration: 69727
loss: 1.576603651046753,grad_norm: 0.9999997888069743, iteration: 69728
loss: 1.4676158428192139,grad_norm: 0.9999998531959811, iteration: 69729
loss: 1.6698960065841675,grad_norm: 0.9999998061167867, iteration: 69730
loss: 1.6648188829421997,grad_norm: 0.9999998231507382, iteration: 69731
loss: 1.7537273168563843,grad_norm: 0.9999998061960927, iteration: 69732
loss: 2.164048433303833,grad_norm: 0.9999998445807451, iteration: 69733
loss: 2.0370795726776123,grad_norm: 0.9999998027341616, iteration: 69734
loss: 1.812751054763794,grad_norm: 0.9999996342431466, iteration: 69735
loss: 1.991267442703247,grad_norm: 0.9999998397248405, iteration: 69736
loss: 2.061411142349243,grad_norm: 0.9999997310967119, iteration: 69737
loss: 2.269059658050537,grad_norm: 0.9999999683142993, iteration: 69738
loss: 2.000347852706909,grad_norm: 0.9999996388345727, iteration: 69739
loss: 1.9372618198394775,grad_norm: 0.999999902659312, iteration: 69740
loss: 1.8106187582015991,grad_norm: 0.9999999245613858, iteration: 69741
loss: 1.8601065874099731,grad_norm: 0.9999999349234381, iteration: 69742
loss: 2.1124134063720703,grad_norm: 0.9999998411394833, iteration: 69743
loss: 2.009730577468872,grad_norm: 0.9999998329382161, iteration: 69744
loss: 2.0532031059265137,grad_norm: 0.999999976267547, iteration: 69745
loss: 1.8082185983657837,grad_norm: 0.9999998781745626, iteration: 69746
loss: 1.8279738426208496,grad_norm: 0.9999997213591719, iteration: 69747
loss: 1.8805443048477173,grad_norm: 0.9999998690308182, iteration: 69748
loss: 1.8596136569976807,grad_norm: 1.000000060451132, iteration: 69749
loss: 1.952764630317688,grad_norm: 0.9999995804062489, iteration: 69750
loss: 1.6401033401489258,grad_norm: 0.9999999207787109, iteration: 69751
loss: 1.6017873287200928,grad_norm: 0.9999998779054758, iteration: 69752
loss: 1.7206958532333374,grad_norm: 0.9999998626045231, iteration: 69753
loss: 1.7510607242584229,grad_norm: 0.9999998635769471, iteration: 69754
loss: 1.603408694267273,grad_norm: 0.9999998222327698, iteration: 69755
loss: 1.5563169717788696,grad_norm: 0.9999998017622527, iteration: 69756
loss: 1.9766242504119873,grad_norm: 0.9999996033904383, iteration: 69757
loss: 1.5949040651321411,grad_norm: 0.9999999284770263, iteration: 69758
loss: 1.5820597410202026,grad_norm: 0.9999998616435964, iteration: 69759
loss: 1.6973263025283813,grad_norm: 1.0000000120951014, iteration: 69760
loss: 1.6777944564819336,grad_norm: 0.9999999199564804, iteration: 69761
loss: 1.6352355480194092,grad_norm: 0.9999999804711942, iteration: 69762
loss: 1.9920309782028198,grad_norm: 0.9999998954864342, iteration: 69763
loss: 1.444380283355713,grad_norm: 0.9999998652989665, iteration: 69764
loss: 1.6559194326400757,grad_norm: 0.999999867140286, iteration: 69765
loss: 1.52989661693573,grad_norm: 0.9999998473047684, iteration: 69766
loss: 1.385680079460144,grad_norm: 0.9999998040059201, iteration: 69767
loss: 1.8270848989486694,grad_norm: 0.9999998913843056, iteration: 69768
loss: 1.285343050956726,grad_norm: 0.9999997668401248, iteration: 69769
loss: 1.4507896900177002,grad_norm: 0.9999999565490657, iteration: 69770
loss: 1.31625235080719,grad_norm: 0.9999996462390279, iteration: 69771
loss: 1.6576584577560425,grad_norm: 0.9999996155427429, iteration: 69772
loss: 1.4631069898605347,grad_norm: 0.9999996992922685, iteration: 69773
loss: 1.5006966590881348,grad_norm: 0.9999997905967374, iteration: 69774
loss: 1.3042746782302856,grad_norm: 0.9999998171475825, iteration: 69775
loss: 1.185036540031433,grad_norm: 0.9999999162448922, iteration: 69776
loss: 1.2639532089233398,grad_norm: 0.9999999601418925, iteration: 69777
loss: 1.382571816444397,grad_norm: 0.9999999855633905, iteration: 69778
loss: 1.3482624292373657,grad_norm: 0.9999998712010806, iteration: 69779
loss: 1.1411733627319336,grad_norm: 1.0000000067425285, iteration: 69780
loss: 1.42999267578125,grad_norm: 0.9999998208308428, iteration: 69781
loss: 1.1280373334884644,grad_norm: 0.9999994317224743, iteration: 69782
loss: 1.196907877922058,grad_norm: 0.9999998591235704, iteration: 69783
loss: 1.2558505535125732,grad_norm: 0.9999993774670305, iteration: 69784
loss: 1.1382298469543457,grad_norm: 0.9999999040587242, iteration: 69785
loss: 1.2150657176971436,grad_norm: 0.9999998695128863, iteration: 69786
loss: 1.283714771270752,grad_norm: 0.9999998846037443, iteration: 69787
loss: 1.2180743217468262,grad_norm: 0.9999997948740879, iteration: 69788
loss: 1.3269556760787964,grad_norm: 0.999999883624818, iteration: 69789
loss: 1.1946749687194824,grad_norm: 0.9999997077324255, iteration: 69790
loss: 1.3916059732437134,grad_norm: 0.9999998675441029, iteration: 69791
loss: 1.252953290939331,grad_norm: 0.9999998053890994, iteration: 69792
loss: 1.1115504503250122,grad_norm: 0.999999498869126, iteration: 69793
loss: 1.1062965393066406,grad_norm: 0.9999997358172864, iteration: 69794
loss: 1.1011979579925537,grad_norm: 0.999999018593683, iteration: 69795
loss: 1.08427894115448,grad_norm: 1.0000000256123085, iteration: 69796
loss: 1.127946138381958,grad_norm: 0.9999994146108164, iteration: 69797
loss: 1.0806986093521118,grad_norm: 0.9999991408408785, iteration: 69798
loss: 1.3161554336547852,grad_norm: 0.9999998009928172, iteration: 69799
loss: 1.1127328872680664,grad_norm: 0.9999996830589599, iteration: 69800
loss: 1.158461093902588,grad_norm: 0.9999992517890886, iteration: 69801
loss: 1.2419309616088867,grad_norm: 0.9999997629721855, iteration: 69802
loss: 1.1439414024353027,grad_norm: 0.9999998891892484, iteration: 69803
loss: 1.495006799697876,grad_norm: 0.9999997063964582, iteration: 69804
loss: 1.0228873491287231,grad_norm: 0.9999990943655854, iteration: 69805
loss: 1.2588106393814087,grad_norm: 0.9999996856308604, iteration: 69806
loss: 1.1064119338989258,grad_norm: 0.999999322930104, iteration: 69807
loss: 1.0841153860092163,grad_norm: 0.9999997132631715, iteration: 69808
loss: 1.0749515295028687,grad_norm: 0.9999996002854922, iteration: 69809
loss: 1.2724878787994385,grad_norm: 0.9999999233503296, iteration: 69810
loss: 1.118025541305542,grad_norm: 0.999999928122205, iteration: 69811
loss: 1.1123210191726685,grad_norm: 0.9999992324842109, iteration: 69812
loss: 1.2523410320281982,grad_norm: 0.9999996330236248, iteration: 69813
loss: 1.1698607206344604,grad_norm: 0.9999996685636552, iteration: 69814
loss: 1.2363307476043701,grad_norm: 0.9999999092749071, iteration: 69815
loss: 1.1452895402908325,grad_norm: 0.9999998662390821, iteration: 69816
loss: 1.262571930885315,grad_norm: 0.9999996749250137, iteration: 69817
loss: 1.2648227214813232,grad_norm: 0.999999950068013, iteration: 69818
loss: 1.5526942014694214,grad_norm: 0.9999997819907209, iteration: 69819
loss: 1.2925686836242676,grad_norm: 0.9999997435198341, iteration: 69820
loss: 1.2829952239990234,grad_norm: 0.9999998396384163, iteration: 69821
loss: 1.4189761877059937,grad_norm: 0.9999998877731952, iteration: 69822
loss: 1.2198362350463867,grad_norm: 0.9999999284370665, iteration: 69823
loss: 1.2012540102005005,grad_norm: 0.9999999101146196, iteration: 69824
loss: 1.1726887226104736,grad_norm: 0.9999995370588028, iteration: 69825
loss: 1.2847568988800049,grad_norm: 0.9999998780382426, iteration: 69826
loss: 1.3601282835006714,grad_norm: 0.9999998951357516, iteration: 69827
loss: 1.4094834327697754,grad_norm: 0.9999998243388529, iteration: 69828
loss: 1.51846182346344,grad_norm: 0.9999998958648724, iteration: 69829
loss: 1.4372084140777588,grad_norm: 0.9999999089887982, iteration: 69830
loss: 1.2152847051620483,grad_norm: 0.9999998688309375, iteration: 69831
loss: 1.1380308866500854,grad_norm: 0.99999988303355, iteration: 69832
loss: 1.2461614608764648,grad_norm: 0.9999999333558448, iteration: 69833
loss: 1.2041537761688232,grad_norm: 0.9999996579823481, iteration: 69834
loss: 1.1154741048812866,grad_norm: 0.9999998517036595, iteration: 69835
loss: 1.1289085149765015,grad_norm: 1.0000000227480166, iteration: 69836
loss: 1.1964439153671265,grad_norm: 0.9999995954883882, iteration: 69837
loss: 1.4300727844238281,grad_norm: 1.0000000189023188, iteration: 69838
loss: 1.1319963932037354,grad_norm: 0.9999994669652033, iteration: 69839
loss: 1.2607170343399048,grad_norm: 0.9999995927119086, iteration: 69840
loss: 1.0763806104660034,grad_norm: 0.9999990544909281, iteration: 69841
loss: 1.1438294649124146,grad_norm: 0.9999998664317342, iteration: 69842
loss: 1.2273951768875122,grad_norm: 1.0000000459375749, iteration: 69843
loss: 1.2107330560684204,grad_norm: 0.9999997762267795, iteration: 69844
loss: 1.0666850805282593,grad_norm: 0.9999993846233383, iteration: 69845
loss: 1.0485023260116577,grad_norm: 0.9999991332847235, iteration: 69846
loss: 1.0788023471832275,grad_norm: 0.9999996871729999, iteration: 69847
loss: 1.234805941581726,grad_norm: 0.9999991531833013, iteration: 69848
loss: 1.2367472648620605,grad_norm: 0.9999999507823961, iteration: 69849
loss: 1.2056983709335327,grad_norm: 0.9999997551297547, iteration: 69850
loss: 1.3719583749771118,grad_norm: 0.9999999031096716, iteration: 69851
loss: 1.0648472309112549,grad_norm: 0.9742808903142329, iteration: 69852
loss: 1.3339306116104126,grad_norm: 0.9999995567950329, iteration: 69853
loss: 1.524680495262146,grad_norm: 0.9999997367792798, iteration: 69854
loss: 1.3819894790649414,grad_norm: 0.999999809857908, iteration: 69855
loss: 1.2393592596054077,grad_norm: 0.9999997899888331, iteration: 69856
loss: 1.210099697113037,grad_norm: 0.9999992495553748, iteration: 69857
loss: 1.2291396856307983,grad_norm: 0.9999996100521268, iteration: 69858
loss: 1.2876206636428833,grad_norm: 0.9999995395123285, iteration: 69859
loss: 1.4250702857971191,grad_norm: 0.999999961966847, iteration: 69860
loss: 1.371402382850647,grad_norm: 1.0000000296096831, iteration: 69861
loss: 1.3815993070602417,grad_norm: 0.9999998749545914, iteration: 69862
loss: 1.1338772773742676,grad_norm: 0.9999997763002242, iteration: 69863
loss: 1.0992021560668945,grad_norm: 0.9999998055490334, iteration: 69864
loss: 1.1944327354431152,grad_norm: 0.9999998118366186, iteration: 69865
loss: 1.0182114839553833,grad_norm: 0.9999991341336146, iteration: 69866
loss: 1.05752432346344,grad_norm: 0.9999998525312276, iteration: 69867
loss: 1.3383780717849731,grad_norm: 0.9999998273942897, iteration: 69868
loss: 1.0730341672897339,grad_norm: 0.9999990679623857, iteration: 69869
loss: 1.07467520236969,grad_norm: 0.9999997509852796, iteration: 69870
loss: 1.1431318521499634,grad_norm: 0.9999996153402416, iteration: 69871
loss: 1.040621280670166,grad_norm: 0.9694773190754956, iteration: 69872
loss: 1.031221866607666,grad_norm: 0.8742825490686639, iteration: 69873
loss: 1.0202810764312744,grad_norm: 0.9999992168363778, iteration: 69874
loss: 1.0677794218063354,grad_norm: 0.9999998218575263, iteration: 69875
loss: 1.0429033041000366,grad_norm: 0.8339128309222746, iteration: 69876
loss: 1.158034086227417,grad_norm: 0.9999994656497571, iteration: 69877
loss: 1.0169825553894043,grad_norm: 0.9999991487321, iteration: 69878
loss: 1.041799545288086,grad_norm: 0.9999997000982291, iteration: 69879
loss: 1.2901703119277954,grad_norm: 0.9999998785111329, iteration: 69880
loss: 1.0020687580108643,grad_norm: 0.9999993344216146, iteration: 69881
loss: 0.9911436438560486,grad_norm: 0.9999992420377779, iteration: 69882
loss: 0.9886580109596252,grad_norm: 0.9882883821397537, iteration: 69883
loss: 1.0225465297698975,grad_norm: 0.9260667242460711, iteration: 69884
loss: 1.0741947889328003,grad_norm: 0.9999999039411595, iteration: 69885
loss: 1.2834374904632568,grad_norm: 0.9999998519690012, iteration: 69886
loss: 1.1201189756393433,grad_norm: 0.9999993611082183, iteration: 69887
loss: 1.0190601348876953,grad_norm: 0.9999994583035782, iteration: 69888
loss: 1.0902373790740967,grad_norm: 0.9999993363892586, iteration: 69889
loss: 1.0364609956741333,grad_norm: 0.999999110240318, iteration: 69890
loss: 1.0315653085708618,grad_norm: 0.9999992267035649, iteration: 69891
loss: 1.056956171989441,grad_norm: 0.999999721455685, iteration: 69892
loss: 1.0157551765441895,grad_norm: 0.9739973209799961, iteration: 69893
loss: 1.1115388870239258,grad_norm: 0.9999995516233916, iteration: 69894
loss: 1.0685449838638306,grad_norm: 0.9999994762953003, iteration: 69895
loss: 1.154943585395813,grad_norm: 0.9999998713996319, iteration: 69896
loss: 0.9948090314865112,grad_norm: 0.9999990338212075, iteration: 69897
loss: 1.0193123817443848,grad_norm: 1.0000000256769572, iteration: 69898
loss: 1.0461156368255615,grad_norm: 0.9268768352862298, iteration: 69899
loss: 1.1181058883666992,grad_norm: 0.9999999678635502, iteration: 69900
loss: 1.0108959674835205,grad_norm: 0.9999991331774201, iteration: 69901
loss: 1.020818829536438,grad_norm: 0.9999999502100179, iteration: 69902
loss: 0.9813786149024963,grad_norm: 0.9999993547075405, iteration: 69903
loss: 1.0751434564590454,grad_norm: 0.9999996320813939, iteration: 69904
loss: 0.9867172241210938,grad_norm: 0.9999991869242606, iteration: 69905
loss: 1.0168440341949463,grad_norm: 0.9999998214000397, iteration: 69906
loss: 1.0439993143081665,grad_norm: 0.9999992624077622, iteration: 69907
loss: 1.054612159729004,grad_norm: 1.0000000771419737, iteration: 69908
loss: 1.019790768623352,grad_norm: 0.999999259879652, iteration: 69909
loss: 1.0778874158859253,grad_norm: 0.999999332367622, iteration: 69910
loss: 0.981880784034729,grad_norm: 0.9072675545196797, iteration: 69911
loss: 1.1231218576431274,grad_norm: 0.9999997349814761, iteration: 69912
loss: 1.0871442556381226,grad_norm: 0.9999996709127343, iteration: 69913
loss: 1.091767430305481,grad_norm: 0.999999757076868, iteration: 69914
loss: 1.0388095378875732,grad_norm: 0.9094675953259952, iteration: 69915
loss: 1.299566626548767,grad_norm: 1.0000000335554606, iteration: 69916
loss: 1.0605818033218384,grad_norm: 0.999999826094006, iteration: 69917
loss: 1.1285016536712646,grad_norm: 1.0000000491657126, iteration: 69918
loss: 1.207970380783081,grad_norm: 0.9999998369860331, iteration: 69919
loss: 1.1228896379470825,grad_norm: 0.9999999103566336, iteration: 69920
loss: 1.180622935295105,grad_norm: 1.0000000182384323, iteration: 69921
loss: 1.087546944618225,grad_norm: 0.9999999694702223, iteration: 69922
loss: 1.0982205867767334,grad_norm: 0.9999998459658489, iteration: 69923
loss: 1.2748891115188599,grad_norm: 0.999999903534325, iteration: 69924
loss: 1.019771933555603,grad_norm: 0.9999999723363387, iteration: 69925
loss: 1.248218059539795,grad_norm: 0.9999998923326353, iteration: 69926
loss: 1.1193046569824219,grad_norm: 0.9999994954676812, iteration: 69927
loss: 1.1092551946640015,grad_norm: 1.0000000212824591, iteration: 69928
loss: 1.1020269393920898,grad_norm: 0.9999994597698354, iteration: 69929
loss: 1.0766956806182861,grad_norm: 0.999999312533917, iteration: 69930
loss: 1.0214179754257202,grad_norm: 0.9999990468501273, iteration: 69931
loss: 1.2848773002624512,grad_norm: 0.9999996170496708, iteration: 69932
loss: 1.1739702224731445,grad_norm: 0.9999999702718829, iteration: 69933
loss: 1.0444786548614502,grad_norm: 0.999999663398798, iteration: 69934
loss: 1.0345268249511719,grad_norm: 0.9999996595195211, iteration: 69935
loss: 1.1072829961776733,grad_norm: 0.9999991811293357, iteration: 69936
loss: 1.1231987476348877,grad_norm: 0.9999995715821965, iteration: 69937
loss: 0.9947593212127686,grad_norm: 0.9999990305354881, iteration: 69938
loss: 1.0465267896652222,grad_norm: 0.9999995112088784, iteration: 69939
loss: 1.0221834182739258,grad_norm: 0.9999993534769094, iteration: 69940
loss: 1.0335801839828491,grad_norm: 0.9999995715571213, iteration: 69941
loss: 1.4121235609054565,grad_norm: 1.0000000076698021, iteration: 69942
loss: 0.9786604642868042,grad_norm: 0.9999991051066266, iteration: 69943
loss: 0.955860435962677,grad_norm: 0.9999991033161828, iteration: 69944
loss: 1.0362621545791626,grad_norm: 0.9999994048015802, iteration: 69945
loss: 1.182486653327942,grad_norm: 0.9999994233202473, iteration: 69946
loss: 1.0458682775497437,grad_norm: 0.9999991050679161, iteration: 69947
loss: 1.094858169555664,grad_norm: 0.9999993126803053, iteration: 69948
loss: 1.0113288164138794,grad_norm: 0.9999998490899811, iteration: 69949
loss: 1.0975037813186646,grad_norm: 0.9999991261103278, iteration: 69950
loss: 1.1122856140136719,grad_norm: 0.9999993509295034, iteration: 69951
loss: 1.0479522943496704,grad_norm: 0.9999999619631641, iteration: 69952
loss: 1.0033258199691772,grad_norm: 0.9999996678509049, iteration: 69953
loss: 1.071884036064148,grad_norm: 0.9999993329346243, iteration: 69954
loss: 0.9782790541648865,grad_norm: 0.999999198506224, iteration: 69955
loss: 1.0367562770843506,grad_norm: 0.999999277068506, iteration: 69956
loss: 0.9759076237678528,grad_norm: 0.9999995840479856, iteration: 69957
loss: 1.0138051509857178,grad_norm: 1.000000018870221, iteration: 69958
loss: 1.1245592832565308,grad_norm: 0.9999994077172325, iteration: 69959
loss: 1.0870344638824463,grad_norm: 0.9999993634941472, iteration: 69960
loss: 1.0224599838256836,grad_norm: 0.9999991548307802, iteration: 69961
loss: 0.9695857167243958,grad_norm: 0.9999990306526557, iteration: 69962
loss: 1.006412386894226,grad_norm: 0.9999991517746637, iteration: 69963
loss: 1.0888078212738037,grad_norm: 0.9999992514354004, iteration: 69964
loss: 1.0343644618988037,grad_norm: 0.9999993600249649, iteration: 69965
loss: 1.056132435798645,grad_norm: 0.9999998282778244, iteration: 69966
loss: 0.984282374382019,grad_norm: 0.9999998128954273, iteration: 69967
loss: 1.059625267982483,grad_norm: 0.9999994890744885, iteration: 69968
loss: 1.0372815132141113,grad_norm: 0.9999992254092391, iteration: 69969
loss: 1.1333446502685547,grad_norm: 1.000000032728541, iteration: 69970
loss: 1.0000574588775635,grad_norm: 0.9171039656793751, iteration: 69971
loss: 1.0195597410202026,grad_norm: 0.8086884578971245, iteration: 69972
loss: 1.1548296213150024,grad_norm: 0.9999996322629913, iteration: 69973
loss: 1.0048223733901978,grad_norm: 0.9107626342430826, iteration: 69974
loss: 1.0190831422805786,grad_norm: 0.999999270132508, iteration: 69975
loss: 1.1175894737243652,grad_norm: 0.9999999449395256, iteration: 69976
loss: 1.0007832050323486,grad_norm: 0.9999997910517991, iteration: 69977
loss: 1.0555765628814697,grad_norm: 0.7928959530450875, iteration: 69978
loss: 1.0007745027542114,grad_norm: 0.9999991526589408, iteration: 69979
loss: 1.0910704135894775,grad_norm: 0.9999993569218567, iteration: 69980
loss: 0.96505206823349,grad_norm: 0.9999999128418844, iteration: 69981
loss: 0.9927440285682678,grad_norm: 0.928300699364711, iteration: 69982
loss: 1.0555684566497803,grad_norm: 0.889776644399863, iteration: 69983
loss: 1.0699392557144165,grad_norm: 0.9999989943803308, iteration: 69984
loss: 0.9743086695671082,grad_norm: 0.9999991382173404, iteration: 69985
loss: 1.1083303689956665,grad_norm: 0.9999992176053651, iteration: 69986
loss: 1.064074993133545,grad_norm: 0.9999992034357102, iteration: 69987
loss: 1.1913652420043945,grad_norm: 0.9999992558682039, iteration: 69988
loss: 1.2103276252746582,grad_norm: 0.9999991015542391, iteration: 69989
loss: 1.0496177673339844,grad_norm: 0.9999996667545896, iteration: 69990
loss: 1.0156657695770264,grad_norm: 0.999999330783887, iteration: 69991
loss: 1.0656778812408447,grad_norm: 0.9999998173981585, iteration: 69992
loss: 0.9775972366333008,grad_norm: 0.9347145879699111, iteration: 69993
loss: 1.0135728120803833,grad_norm: 0.9669388778689026, iteration: 69994
loss: 1.018325686454773,grad_norm: 0.9999991917062268, iteration: 69995
loss: 1.0585843324661255,grad_norm: 0.999999225835651, iteration: 69996
loss: 1.0376046895980835,grad_norm: 0.9999993106319942, iteration: 69997
loss: 1.0937187671661377,grad_norm: 0.9999994824501899, iteration: 69998
loss: 1.0301461219787598,grad_norm: 0.9999994474772508, iteration: 69999
loss: 1.0268797874450684,grad_norm: 0.999999046935784, iteration: 70000
Evaluating at step 70000
{'val': 1.0043858159333467, 'test': 2.224148019434164}
loss: 0.9997671842575073,grad_norm: 0.9999992016430738, iteration: 70001
loss: 1.0812605619430542,grad_norm: 0.9999991744938745, iteration: 70002
loss: 1.125265121459961,grad_norm: 0.9999995440749768, iteration: 70003
loss: 1.0472283363342285,grad_norm: 0.9999990435644053, iteration: 70004
loss: 1.0128854513168335,grad_norm: 0.9999989204591648, iteration: 70005
loss: 1.0178121328353882,grad_norm: 0.9999995793285987, iteration: 70006
loss: 1.0185906887054443,grad_norm: 0.9484795265688275, iteration: 70007
loss: 0.989524781703949,grad_norm: 0.989570240434492, iteration: 70008
loss: 1.016628623008728,grad_norm: 0.9999998381052297, iteration: 70009
loss: 1.038960337638855,grad_norm: 0.9999995107338147, iteration: 70010
loss: 1.0387319326400757,grad_norm: 0.9999990585940607, iteration: 70011
loss: 1.0501818656921387,grad_norm: 0.9999991956619569, iteration: 70012
loss: 1.1356005668640137,grad_norm: 0.999999388957573, iteration: 70013
loss: 1.0409530401229858,grad_norm: 0.9999996123851976, iteration: 70014
loss: 0.9647393822669983,grad_norm: 0.999999080034966, iteration: 70015
loss: 1.0967713594436646,grad_norm: 0.9999995041051213, iteration: 70016
loss: 0.9793853759765625,grad_norm: 0.8244197399588893, iteration: 70017
loss: 1.0365257263183594,grad_norm: 0.9999992957161351, iteration: 70018
loss: 1.054518222808838,grad_norm: 0.9999991632683491, iteration: 70019
loss: 1.0043084621429443,grad_norm: 0.9999992043494785, iteration: 70020
loss: 1.102695345878601,grad_norm: 0.9999990372662688, iteration: 70021
loss: 1.0010911226272583,grad_norm: 0.9999993278575153, iteration: 70022
loss: 1.2833858728408813,grad_norm: 0.9999996594988604, iteration: 70023
loss: 1.1907854080200195,grad_norm: 0.9999991961493967, iteration: 70024
loss: 0.9867663383483887,grad_norm: 0.9999993480258026, iteration: 70025
loss: 1.0253570079803467,grad_norm: 0.8976824461773225, iteration: 70026
loss: 1.0164672136306763,grad_norm: 0.858464832438363, iteration: 70027
loss: 1.0440940856933594,grad_norm: 0.9999994536900843, iteration: 70028
loss: 1.1942646503448486,grad_norm: 0.9999997647150223, iteration: 70029
loss: 1.0365768671035767,grad_norm: 0.9999990682257335, iteration: 70030
loss: 0.9791946411132812,grad_norm: 0.881796989107084, iteration: 70031
loss: 1.0224149227142334,grad_norm: 0.8968449521860737, iteration: 70032
loss: 1.1162534952163696,grad_norm: 0.9999999073329413, iteration: 70033
loss: 0.997954249382019,grad_norm: 0.9999991853458695, iteration: 70034
loss: 1.0019947290420532,grad_norm: 0.8646215020870407, iteration: 70035
loss: 1.0927201509475708,grad_norm: 0.9999993267207069, iteration: 70036
loss: 1.0408704280853271,grad_norm: 0.9080569571685158, iteration: 70037
loss: 1.0060824155807495,grad_norm: 0.9999999146211507, iteration: 70038
loss: 1.0285016298294067,grad_norm: 0.9999990181873232, iteration: 70039
loss: 1.1451716423034668,grad_norm: 0.9999999720758497, iteration: 70040
loss: 1.0017114877700806,grad_norm: 0.9999990660473579, iteration: 70041
loss: 1.0971633195877075,grad_norm: 0.999999577492623, iteration: 70042
loss: 1.0597515106201172,grad_norm: 0.9999998365526384, iteration: 70043
loss: 1.0130528211593628,grad_norm: 0.8068307819753777, iteration: 70044
loss: 1.0087474584579468,grad_norm: 0.9999991936360856, iteration: 70045
loss: 1.0004087686538696,grad_norm: 0.99999904644003, iteration: 70046
loss: 0.9893891215324402,grad_norm: 0.9814593142193873, iteration: 70047
loss: 1.008455753326416,grad_norm: 0.9848578174355782, iteration: 70048
loss: 1.0077491998672485,grad_norm: 0.9717531929780007, iteration: 70049
loss: 1.0230458974838257,grad_norm: 0.9999992699333927, iteration: 70050
loss: 1.0308094024658203,grad_norm: 0.9999994330194512, iteration: 70051
loss: 1.0392290353775024,grad_norm: 0.9999999707185575, iteration: 70052
loss: 0.9900612831115723,grad_norm: 0.7632960325387809, iteration: 70053
loss: 1.050496220588684,grad_norm: 0.8536887994816079, iteration: 70054
loss: 1.0118529796600342,grad_norm: 0.9999996688522185, iteration: 70055
loss: 1.0252453088760376,grad_norm: 0.9999994788523924, iteration: 70056
loss: 1.0418795347213745,grad_norm: 0.9999991590211924, iteration: 70057
loss: 1.002282738685608,grad_norm: 0.9999991823920875, iteration: 70058
loss: 1.0541503429412842,grad_norm: 0.9999993308398434, iteration: 70059
loss: 1.0217498540878296,grad_norm: 0.9999991143649344, iteration: 70060
loss: 1.031867265701294,grad_norm: 0.9999996046575658, iteration: 70061
loss: 1.0338467359542847,grad_norm: 0.999999333168038, iteration: 70062
loss: 1.0443090200424194,grad_norm: 0.9999992270558414, iteration: 70063
loss: 1.039864182472229,grad_norm: 0.9999995459746677, iteration: 70064
loss: 0.9888297319412231,grad_norm: 0.9953080803534456, iteration: 70065
loss: 1.0218194723129272,grad_norm: 0.9036564707675282, iteration: 70066
loss: 1.0395325422286987,grad_norm: 0.999999376241522, iteration: 70067
loss: 0.9755653142929077,grad_norm: 0.9907985986734965, iteration: 70068
loss: 0.9591420888900757,grad_norm: 0.9999996635006643, iteration: 70069
loss: 1.0059877634048462,grad_norm: 0.9033165263483788, iteration: 70070
loss: 1.0501924753189087,grad_norm: 0.9999994404297083, iteration: 70071
loss: 1.0087512731552124,grad_norm: 0.9067758849883304, iteration: 70072
loss: 0.969359278678894,grad_norm: 0.9999991542376211, iteration: 70073
loss: 1.045586347579956,grad_norm: 0.9999992960958778, iteration: 70074
loss: 1.0502982139587402,grad_norm: 0.9999998373911931, iteration: 70075
loss: 1.1469002962112427,grad_norm: 0.9999997510884885, iteration: 70076
loss: 1.059216022491455,grad_norm: 0.9999995302096834, iteration: 70077
loss: 1.0131217241287231,grad_norm: 0.8618138862259457, iteration: 70078
loss: 1.0001945495605469,grad_norm: 0.9230732834990508, iteration: 70079
loss: 1.086710810661316,grad_norm: 0.9999991022576479, iteration: 70080
loss: 1.1091318130493164,grad_norm: 0.9999994082865851, iteration: 70081
loss: 1.104536771774292,grad_norm: 0.8591672180697935, iteration: 70082
loss: 1.0390039682388306,grad_norm: 0.9999991000482245, iteration: 70083
loss: 1.031362771987915,grad_norm: 0.9999997090394342, iteration: 70084
loss: 1.0148553848266602,grad_norm: 0.9999994793860828, iteration: 70085
loss: 1.0467759370803833,grad_norm: 0.87196677488723, iteration: 70086
loss: 1.0141222476959229,grad_norm: 0.9999990649926919, iteration: 70087
loss: 0.979884147644043,grad_norm: 0.9702421594999346, iteration: 70088
loss: 0.9799466133117676,grad_norm: 0.9999990753682678, iteration: 70089
loss: 0.9944282174110413,grad_norm: 0.9999992783223, iteration: 70090
loss: 1.0265737771987915,grad_norm: 0.8488176588458826, iteration: 70091
loss: 1.0359761714935303,grad_norm: 0.8558719001951257, iteration: 70092
loss: 0.9548417925834656,grad_norm: 0.999999204108298, iteration: 70093
loss: 0.9898256063461304,grad_norm: 0.9999992002434419, iteration: 70094
loss: 1.2053050994873047,grad_norm: 0.9999997426923615, iteration: 70095
loss: 1.0231472253799438,grad_norm: 0.7778326719659457, iteration: 70096
loss: 0.9952740669250488,grad_norm: 0.8458033397702449, iteration: 70097
loss: 0.9982565641403198,grad_norm: 0.9862711956795304, iteration: 70098
loss: 1.0276472568511963,grad_norm: 0.9999997139507942, iteration: 70099
loss: 1.0247547626495361,grad_norm: 0.9999996656568295, iteration: 70100
loss: 1.0301607847213745,grad_norm: 0.9688213281388146, iteration: 70101
loss: 0.9844812154769897,grad_norm: 0.9968139165456581, iteration: 70102
loss: 1.0180624723434448,grad_norm: 0.7239194758763969, iteration: 70103
loss: 1.0106335878372192,grad_norm: 0.9676329141917264, iteration: 70104
loss: 1.074405550956726,grad_norm: 0.8641097138373897, iteration: 70105
loss: 1.0306422710418701,grad_norm: 0.9587196069509483, iteration: 70106
loss: 0.9981587529182434,grad_norm: 0.9999990101428506, iteration: 70107
loss: 1.0755107402801514,grad_norm: 0.999999760289452, iteration: 70108
loss: 1.0019711256027222,grad_norm: 0.9999991178077704, iteration: 70109
loss: 1.0230516195297241,grad_norm: 0.9999995543962386, iteration: 70110
loss: 0.991780161857605,grad_norm: 0.9071910880174643, iteration: 70111
loss: 1.0220282077789307,grad_norm: 0.9999994105467317, iteration: 70112
loss: 1.0137935876846313,grad_norm: 0.981620022841328, iteration: 70113
loss: 1.019287109375,grad_norm: 0.9561976593043449, iteration: 70114
loss: 0.9718302488327026,grad_norm: 0.9387706266738413, iteration: 70115
loss: 1.0281355381011963,grad_norm: 0.8407334876095726, iteration: 70116
loss: 1.0120054483413696,grad_norm: 0.9844565373426359, iteration: 70117
loss: 1.0142511129379272,grad_norm: 0.8918307485858852, iteration: 70118
loss: 1.0166223049163818,grad_norm: 0.8287826385830926, iteration: 70119
loss: 0.9955806136131287,grad_norm: 0.8545039861155426, iteration: 70120
loss: 1.0054796934127808,grad_norm: 0.9999992426793235, iteration: 70121
loss: 0.9968036413192749,grad_norm: 0.9708952236113383, iteration: 70122
loss: 1.0397461652755737,grad_norm: 0.9999993379426491, iteration: 70123
loss: 1.0348438024520874,grad_norm: 0.8961170516778755, iteration: 70124
loss: 1.110357642173767,grad_norm: 0.9379288694511796, iteration: 70125
loss: 1.0390633344650269,grad_norm: 0.999999190374211, iteration: 70126
loss: 1.009423017501831,grad_norm: 0.775283249437731, iteration: 70127
loss: 0.9758339524269104,grad_norm: 0.974997572908675, iteration: 70128
loss: 1.0159313678741455,grad_norm: 0.8599049275008944, iteration: 70129
loss: 1.0247076749801636,grad_norm: 0.8652614052184782, iteration: 70130
loss: 1.0485084056854248,grad_norm: 0.9999991629794635, iteration: 70131
loss: 1.0232157707214355,grad_norm: 0.9999990180489828, iteration: 70132
loss: 1.0318610668182373,grad_norm: 0.9066810740770468, iteration: 70133
loss: 0.9857497215270996,grad_norm: 0.9999993909627326, iteration: 70134
loss: 0.9600115418434143,grad_norm: 0.9999992102366627, iteration: 70135
loss: 0.9950537085533142,grad_norm: 0.999999145532303, iteration: 70136
loss: 0.9653736352920532,grad_norm: 0.9999990876320153, iteration: 70137
loss: 1.0912050008773804,grad_norm: 0.9881195306150966, iteration: 70138
loss: 0.9536105990409851,grad_norm: 0.9897029032352463, iteration: 70139
loss: 1.0136351585388184,grad_norm: 0.9364039807878749, iteration: 70140
loss: 1.0469120740890503,grad_norm: 0.9999995426771627, iteration: 70141
loss: 0.9849997758865356,grad_norm: 0.8752187225133433, iteration: 70142
loss: 1.009068250656128,grad_norm: 0.9999992252335821, iteration: 70143
loss: 0.9892157912254333,grad_norm: 0.8896399891341887, iteration: 70144
loss: 1.055427074432373,grad_norm: 0.9999993896690123, iteration: 70145
loss: 1.0456198453903198,grad_norm: 0.9999996395649986, iteration: 70146
loss: 1.0487685203552246,grad_norm: 0.9999993643234403, iteration: 70147
loss: 1.0111123323440552,grad_norm: 0.9999998088875612, iteration: 70148
loss: 0.9963984489440918,grad_norm: 0.9999992422895817, iteration: 70149
loss: 1.027923583984375,grad_norm: 0.9416351717132224, iteration: 70150
loss: 0.9995352029800415,grad_norm: 0.9999989358463833, iteration: 70151
loss: 1.0487021207809448,grad_norm: 0.9999991129142175, iteration: 70152
loss: 1.1227904558181763,grad_norm: 0.999999767745826, iteration: 70153
loss: 0.9759789109230042,grad_norm: 0.8683997926347238, iteration: 70154
loss: 1.0152956247329712,grad_norm: 0.9999998940461305, iteration: 70155
loss: 1.0198911428451538,grad_norm: 0.9999996327577987, iteration: 70156
loss: 1.0511635541915894,grad_norm: 0.8464867767781137, iteration: 70157
loss: 1.0554189682006836,grad_norm: 0.9999998865384923, iteration: 70158
loss: 0.9661084413528442,grad_norm: 0.7981536495934786, iteration: 70159
loss: 1.0056785345077515,grad_norm: 0.9999993719184487, iteration: 70160
loss: 0.9542378187179565,grad_norm: 0.8681809538230578, iteration: 70161
loss: 1.0087897777557373,grad_norm: 0.984329038719048, iteration: 70162
loss: 0.9914107322692871,grad_norm: 0.7901206180774759, iteration: 70163
loss: 1.041849970817566,grad_norm: 0.8687394405641614, iteration: 70164
loss: 0.9938765168190002,grad_norm: 0.9603430856055107, iteration: 70165
loss: 0.9519926905632019,grad_norm: 0.9545318437240171, iteration: 70166
loss: 1.0366947650909424,grad_norm: 0.9113063409443085, iteration: 70167
loss: 1.027181625366211,grad_norm: 0.9999991055045259, iteration: 70168
loss: 1.011232614517212,grad_norm: 0.949380485613871, iteration: 70169
loss: 0.9964613914489746,grad_norm: 0.8920728948454287, iteration: 70170
loss: 1.01990807056427,grad_norm: 0.9999994647393281, iteration: 70171
loss: 1.0343645811080933,grad_norm: 0.9181134912866111, iteration: 70172
loss: 1.0641220808029175,grad_norm: 0.9647348671030854, iteration: 70173
loss: 1.015415906906128,grad_norm: 0.999998990661736, iteration: 70174
loss: 1.026907205581665,grad_norm: 0.8763395058677396, iteration: 70175
loss: 1.0261893272399902,grad_norm: 0.9999993975456213, iteration: 70176
loss: 1.0469086170196533,grad_norm: 0.9999991551479359, iteration: 70177
loss: 0.994670033454895,grad_norm: 0.8899778590381519, iteration: 70178
loss: 1.008561372756958,grad_norm: 0.9999991932515595, iteration: 70179
loss: 1.0407744646072388,grad_norm: 0.9999989885777476, iteration: 70180
loss: 1.0012415647506714,grad_norm: 0.7814824703620129, iteration: 70181
loss: 1.0036002397537231,grad_norm: 0.8763445529102546, iteration: 70182
loss: 0.9834992289543152,grad_norm: 0.9228366643822106, iteration: 70183
loss: 0.967132031917572,grad_norm: 0.8033006161706275, iteration: 70184
loss: 0.9843980073928833,grad_norm: 0.959068849762064, iteration: 70185
loss: 1.0118553638458252,grad_norm: 0.9999993074700442, iteration: 70186
loss: 1.0407606363296509,grad_norm: 0.9999997913341682, iteration: 70187
loss: 0.9938108921051025,grad_norm: 0.8627978280691122, iteration: 70188
loss: 0.9899689555168152,grad_norm: 0.8657425793777839, iteration: 70189
loss: 0.9835652709007263,grad_norm: 0.8229098646336549, iteration: 70190
loss: 1.009616494178772,grad_norm: 0.9999990614345501, iteration: 70191
loss: 1.0220426321029663,grad_norm: 0.9999991215148432, iteration: 70192
loss: 1.0171045064926147,grad_norm: 0.8341357290137135, iteration: 70193
loss: 0.99786376953125,grad_norm: 0.976555640472892, iteration: 70194
loss: 0.9854909181594849,grad_norm: 0.9999990631972464, iteration: 70195
loss: 1.0358843803405762,grad_norm: 0.9999991105047205, iteration: 70196
loss: 0.9853247404098511,grad_norm: 0.940165517121314, iteration: 70197
loss: 1.0000416040420532,grad_norm: 0.9999991584598179, iteration: 70198
loss: 1.0230227708816528,grad_norm: 0.8688599053422346, iteration: 70199
loss: 0.9922122359275818,grad_norm: 0.9881723030888956, iteration: 70200
loss: 1.0039467811584473,grad_norm: 0.9037189011652479, iteration: 70201
loss: 1.0648953914642334,grad_norm: 0.9532592042330479, iteration: 70202
loss: 1.0671359300613403,grad_norm: 0.9999992479003049, iteration: 70203
loss: 1.2684012651443481,grad_norm: 0.9999997663896981, iteration: 70204
loss: 1.0389679670333862,grad_norm: 0.9999990584203219, iteration: 70205
loss: 0.9666590690612793,grad_norm: 0.900495054752203, iteration: 70206
loss: 1.0866694450378418,grad_norm: 0.9954286268477324, iteration: 70207
loss: 1.0062514543533325,grad_norm: 0.9999996259831468, iteration: 70208
loss: 1.0033671855926514,grad_norm: 0.9450465134909974, iteration: 70209
loss: 0.9934884309768677,grad_norm: 0.8743145875798872, iteration: 70210
loss: 1.001144289970398,grad_norm: 0.8487350205669202, iteration: 70211
loss: 1.0271481275558472,grad_norm: 0.9999992021766896, iteration: 70212
loss: 1.0203657150268555,grad_norm: 0.8400231805219619, iteration: 70213
loss: 0.9937160611152649,grad_norm: 0.9999990444092597, iteration: 70214
loss: 1.0152649879455566,grad_norm: 0.8090361572681896, iteration: 70215
loss: 0.984210729598999,grad_norm: 0.9999991529961809, iteration: 70216
loss: 1.0123317241668701,grad_norm: 0.999999456489077, iteration: 70217
loss: 1.0162090063095093,grad_norm: 0.9396843684457746, iteration: 70218
loss: 1.0109875202178955,grad_norm: 0.8958342504515093, iteration: 70219
loss: 1.1049714088439941,grad_norm: 0.9999991021253305, iteration: 70220
loss: 0.962780773639679,grad_norm: 0.7717522955261807, iteration: 70221
loss: 1.0052787065505981,grad_norm: 0.9999992883184569, iteration: 70222
loss: 1.0234367847442627,grad_norm: 0.9761544655569261, iteration: 70223
loss: 1.0443390607833862,grad_norm: 0.9975673740998942, iteration: 70224
loss: 1.0182396173477173,grad_norm: 0.9089386259693332, iteration: 70225
loss: 1.0861608982086182,grad_norm: 0.9999998151975925, iteration: 70226
loss: 1.0465975999832153,grad_norm: 0.9408607647809398, iteration: 70227
loss: 0.9954255819320679,grad_norm: 0.7442130237094503, iteration: 70228
loss: 1.0404082536697388,grad_norm: 0.8459881018630511, iteration: 70229
loss: 0.9817667007446289,grad_norm: 0.9647059346829249, iteration: 70230
loss: 1.039696455001831,grad_norm: 0.9999997338082239, iteration: 70231
loss: 1.0205726623535156,grad_norm: 0.9999990411015763, iteration: 70232
loss: 1.110788345336914,grad_norm: 0.999999344061409, iteration: 70233
loss: 1.0065584182739258,grad_norm: 0.8446826820419506, iteration: 70234
loss: 1.0262415409088135,grad_norm: 0.9999992613510887, iteration: 70235
loss: 1.0425469875335693,grad_norm: 0.8491882665614167, iteration: 70236
loss: 1.0557337999343872,grad_norm: 0.7740649037275128, iteration: 70237
loss: 1.0015515089035034,grad_norm: 0.8210121508323303, iteration: 70238
loss: 0.9684508442878723,grad_norm: 0.9630292174088713, iteration: 70239
loss: 0.9880502820014954,grad_norm: 0.9286938447983466, iteration: 70240
loss: 1.0007725954055786,grad_norm: 0.8850557049387647, iteration: 70241
loss: 1.0157465934753418,grad_norm: 0.9999990908674399, iteration: 70242
loss: 0.9930572509765625,grad_norm: 0.8975489756848745, iteration: 70243
loss: 1.0257484912872314,grad_norm: 0.7664623642292195, iteration: 70244
loss: 1.066053867340088,grad_norm: 0.9999991228180033, iteration: 70245
loss: 1.0095653533935547,grad_norm: 0.999999077283169, iteration: 70246
loss: 1.0082101821899414,grad_norm: 0.9999990824351455, iteration: 70247
loss: 1.0252115726470947,grad_norm: 0.9999992446880687, iteration: 70248
loss: 1.0100178718566895,grad_norm: 0.7877120901060994, iteration: 70249
loss: 0.9614803791046143,grad_norm: 0.9254940888028722, iteration: 70250
loss: 1.0346884727478027,grad_norm: 0.9999989756846237, iteration: 70251
loss: 0.995156466960907,grad_norm: 0.8951870678002859, iteration: 70252
loss: 1.1102163791656494,grad_norm: 0.9999992772418718, iteration: 70253
loss: 1.0316272974014282,grad_norm: 0.9999996434903013, iteration: 70254
loss: 0.982745885848999,grad_norm: 0.8766362324807384, iteration: 70255
loss: 1.0105818510055542,grad_norm: 0.9397695602364289, iteration: 70256
loss: 1.0090694427490234,grad_norm: 0.8566085009268531, iteration: 70257
loss: 1.0347670316696167,grad_norm: 0.9715751481801451, iteration: 70258
loss: 1.082882046699524,grad_norm: 0.9999996435765515, iteration: 70259
loss: 1.0428627729415894,grad_norm: 0.9999997001180445, iteration: 70260
loss: 1.008301854133606,grad_norm: 0.982523056834262, iteration: 70261
loss: 0.982310950756073,grad_norm: 0.7865064916758844, iteration: 70262
loss: 1.0110063552856445,grad_norm: 0.947397032283934, iteration: 70263
loss: 0.9996746182441711,grad_norm: 0.9999990236751927, iteration: 70264
loss: 1.0303499698638916,grad_norm: 0.9999998059559067, iteration: 70265
loss: 1.0134096145629883,grad_norm: 0.7979770548142703, iteration: 70266
loss: 1.0005125999450684,grad_norm: 0.8557509310833781, iteration: 70267
loss: 0.9942281246185303,grad_norm: 0.9999989234854123, iteration: 70268
loss: 1.0279706716537476,grad_norm: 0.9725523950050977, iteration: 70269
loss: 0.9708880186080933,grad_norm: 0.8994789069145874, iteration: 70270
loss: 1.0293292999267578,grad_norm: 0.9999991389063821, iteration: 70271
loss: 1.0396754741668701,grad_norm: 0.9999990255019043, iteration: 70272
loss: 1.036947250366211,grad_norm: 0.9999998814693147, iteration: 70273
loss: 1.0054540634155273,grad_norm: 0.9999991232664456, iteration: 70274
loss: 1.000427484512329,grad_norm: 0.9471988420174322, iteration: 70275
loss: 1.0589866638183594,grad_norm: 0.9999994622732256, iteration: 70276
loss: 1.0371465682983398,grad_norm: 0.9999990669463399, iteration: 70277
loss: 1.005712628364563,grad_norm: 0.9999996228367273, iteration: 70278
loss: 1.067939043045044,grad_norm: 1.0000000243455882, iteration: 70279
loss: 1.019837737083435,grad_norm: 0.9972716811821531, iteration: 70280
loss: 1.1061378717422485,grad_norm: 0.9999991270152809, iteration: 70281
loss: 1.1866207122802734,grad_norm: 0.9635761337324052, iteration: 70282
loss: 1.3207422494888306,grad_norm: 0.9999992177540578, iteration: 70283
loss: 1.1297478675842285,grad_norm: 0.9878461458260644, iteration: 70284
loss: 1.0609644651412964,grad_norm: 0.999999551969571, iteration: 70285
loss: 1.0246727466583252,grad_norm: 0.9999998645825156, iteration: 70286
loss: 1.034895896911621,grad_norm: 0.8943870831095094, iteration: 70287
loss: 0.9826880693435669,grad_norm: 0.8231949641066244, iteration: 70288
loss: 0.9926491975784302,grad_norm: 0.9031749480008592, iteration: 70289
loss: 1.0106366872787476,grad_norm: 0.829973976775234, iteration: 70290
loss: 1.0047693252563477,grad_norm: 0.9334681010018945, iteration: 70291
loss: 1.0173650979995728,grad_norm: 0.9018353561651942, iteration: 70292
loss: 0.9773932099342346,grad_norm: 0.9830584504652354, iteration: 70293
loss: 1.003374457359314,grad_norm: 0.9999996850685868, iteration: 70294
loss: 1.0292109251022339,grad_norm: 0.9609512379979293, iteration: 70295
loss: 0.9964428544044495,grad_norm: 0.9834542450893489, iteration: 70296
loss: 0.9978787302970886,grad_norm: 0.88642320131293, iteration: 70297
loss: 0.9915532469749451,grad_norm: 0.9999991220541145, iteration: 70298
loss: 1.026569128036499,grad_norm: 0.9804221969899094, iteration: 70299
loss: 1.00401771068573,grad_norm: 0.9645248215509009, iteration: 70300
loss: 0.9905771613121033,grad_norm: 0.9099365998708433, iteration: 70301
loss: 0.9952856302261353,grad_norm: 0.9470874734723597, iteration: 70302
loss: 1.0812177658081055,grad_norm: 0.9999999580888479, iteration: 70303
loss: 1.198785424232483,grad_norm: 0.9999998259263394, iteration: 70304
loss: 1.0205076932907104,grad_norm: 0.9999990161171708, iteration: 70305
loss: 1.0285366773605347,grad_norm: 0.9999994457300005, iteration: 70306
loss: 1.025816798210144,grad_norm: 0.792454406461274, iteration: 70307
loss: 1.0064109563827515,grad_norm: 0.8769938401029208, iteration: 70308
loss: 0.9936009049415588,grad_norm: 0.7866826039074493, iteration: 70309
loss: 1.0207980871200562,grad_norm: 0.9999991540262851, iteration: 70310
loss: 0.9582626223564148,grad_norm: 0.8431716310280927, iteration: 70311
loss: 1.020548701286316,grad_norm: 0.9999990483611908, iteration: 70312
loss: 0.9698972702026367,grad_norm: 0.9820790940122835, iteration: 70313
loss: 1.019404411315918,grad_norm: 0.9999990828482912, iteration: 70314
loss: 1.2869373559951782,grad_norm: 0.9999996909537479, iteration: 70315
loss: 1.0870333909988403,grad_norm: 0.9999994920183322, iteration: 70316
loss: 1.0102916955947876,grad_norm: 0.9999992813027333, iteration: 70317
loss: 1.0113645792007446,grad_norm: 0.9060925489575768, iteration: 70318
loss: 1.2395455837249756,grad_norm: 0.9999992928440605, iteration: 70319
loss: 0.9973377585411072,grad_norm: 0.9574575696393763, iteration: 70320
loss: 1.0057984590530396,grad_norm: 0.9707475757738123, iteration: 70321
loss: 0.9991790652275085,grad_norm: 0.9363449530164449, iteration: 70322
loss: 1.005172610282898,grad_norm: 0.8743896424481185, iteration: 70323
loss: 0.9949132800102234,grad_norm: 0.9999992024024743, iteration: 70324
loss: 1.0123882293701172,grad_norm: 0.9999990058151147, iteration: 70325
loss: 1.0749868154525757,grad_norm: 0.9999997822961896, iteration: 70326
loss: 0.9906362891197205,grad_norm: 0.999999212574764, iteration: 70327
loss: 1.07551109790802,grad_norm: 0.9999992553330281, iteration: 70328
loss: 1.001221776008606,grad_norm: 0.9535548165088218, iteration: 70329
loss: 1.0200767517089844,grad_norm: 0.9019269053270436, iteration: 70330
loss: 1.0327272415161133,grad_norm: 0.9886602151374752, iteration: 70331
loss: 1.0211371183395386,grad_norm: 0.8942852413643709, iteration: 70332
loss: 1.0072630643844604,grad_norm: 0.9999989575338812, iteration: 70333
loss: 1.0626875162124634,grad_norm: 0.9195517911869399, iteration: 70334
loss: 1.0425232648849487,grad_norm: 0.9266979776574332, iteration: 70335
loss: 1.1355079412460327,grad_norm: 0.9999994342434956, iteration: 70336
loss: 1.0084298849105835,grad_norm: 0.9999992720322323, iteration: 70337
loss: 1.0454696416854858,grad_norm: 0.9999992924243963, iteration: 70338
loss: 1.0087676048278809,grad_norm: 0.9999995037660105, iteration: 70339
loss: 1.0178383588790894,grad_norm: 0.999998978966919, iteration: 70340
loss: 1.0240939855575562,grad_norm: 0.8865337562414065, iteration: 70341
loss: 1.0159759521484375,grad_norm: 0.9999999521506215, iteration: 70342
loss: 0.9717569351196289,grad_norm: 0.9999997179083979, iteration: 70343
loss: 1.0065038204193115,grad_norm: 0.9999990405590264, iteration: 70344
loss: 0.9962848424911499,grad_norm: 0.9999993481966724, iteration: 70345
loss: 0.9865416884422302,grad_norm: 0.9460394143812679, iteration: 70346
loss: 0.9882650971412659,grad_norm: 0.9999994508761866, iteration: 70347
loss: 0.9854966402053833,grad_norm: 0.8190804441556075, iteration: 70348
loss: 1.0045788288116455,grad_norm: 1.0000000125524635, iteration: 70349
loss: 1.0255242586135864,grad_norm: 0.9999990738675408, iteration: 70350
loss: 1.0987335443496704,grad_norm: 0.9999991130567558, iteration: 70351
loss: 0.9815102815628052,grad_norm: 0.8639722334410679, iteration: 70352
loss: 1.0062836408615112,grad_norm: 0.9999992395288683, iteration: 70353
loss: 1.0055958032608032,grad_norm: 0.7138588106017268, iteration: 70354
loss: 1.0236464738845825,grad_norm: 0.9186316905052381, iteration: 70355
loss: 1.0153625011444092,grad_norm: 0.8402762961307267, iteration: 70356
loss: 1.0158658027648926,grad_norm: 0.9999990921695432, iteration: 70357
loss: 1.0324338674545288,grad_norm: 0.9491723347852555, iteration: 70358
loss: 1.0146642923355103,grad_norm: 0.9533154834140889, iteration: 70359
loss: 0.9732954502105713,grad_norm: 0.895379555021282, iteration: 70360
loss: 1.0122138261795044,grad_norm: 1.0000000082023488, iteration: 70361
loss: 0.9842890501022339,grad_norm: 0.999999020861211, iteration: 70362
loss: 0.9849662184715271,grad_norm: 0.9999991601406497, iteration: 70363
loss: 0.995061457157135,grad_norm: 0.9280843269445844, iteration: 70364
loss: 1.0197093486785889,grad_norm: 0.8914442368287389, iteration: 70365
loss: 1.0131103992462158,grad_norm: 0.9999991786505869, iteration: 70366
loss: 1.001832127571106,grad_norm: 0.8490378301632534, iteration: 70367
loss: 1.0925805568695068,grad_norm: 0.9999992110286792, iteration: 70368
loss: 1.0114176273345947,grad_norm: 0.9999992002310146, iteration: 70369
loss: 1.0425928831100464,grad_norm: 0.9999996153692864, iteration: 70370
loss: 1.0197886228561401,grad_norm: 0.9999992656755261, iteration: 70371
loss: 1.0190216302871704,grad_norm: 0.9852481995885809, iteration: 70372
loss: 1.0243703126907349,grad_norm: 0.9999993279969588, iteration: 70373
loss: 0.9975486993789673,grad_norm: 0.9172733633363845, iteration: 70374
loss: 0.989401638507843,grad_norm: 0.9999991917293822, iteration: 70375
loss: 1.001495361328125,grad_norm: 0.8189649124699494, iteration: 70376
loss: 0.9791653156280518,grad_norm: 0.9999990613526749, iteration: 70377
loss: 1.0053493976593018,grad_norm: 0.999999791747142, iteration: 70378
loss: 1.0253819227218628,grad_norm: 0.8798158488014718, iteration: 70379
loss: 0.9903491139411926,grad_norm: 0.9999989822382152, iteration: 70380
loss: 0.9840996265411377,grad_norm: 0.7522338804857897, iteration: 70381
loss: 0.997081458568573,grad_norm: 0.9550336364568879, iteration: 70382
loss: 1.0723981857299805,grad_norm: 0.9999992918035369, iteration: 70383
loss: 1.033140778541565,grad_norm: 0.9999991072912348, iteration: 70384
loss: 0.9959241151809692,grad_norm: 0.9999992014790591, iteration: 70385
loss: 1.01446533203125,grad_norm: 0.7565692734485665, iteration: 70386
loss: 0.9925817847251892,grad_norm: 0.8179694449762404, iteration: 70387
loss: 0.9913792014122009,grad_norm: 0.76565915107032, iteration: 70388
loss: 1.0264010429382324,grad_norm: 0.9999994446872398, iteration: 70389
loss: 1.0217196941375732,grad_norm: 0.7862269662811905, iteration: 70390
loss: 0.97078937292099,grad_norm: 0.9999992161449953, iteration: 70391
loss: 1.0305471420288086,grad_norm: 0.9819581165571489, iteration: 70392
loss: 1.0335921049118042,grad_norm: 0.9999991513243184, iteration: 70393
loss: 0.9906061291694641,grad_norm: 0.9999990499623518, iteration: 70394
loss: 1.0131165981292725,grad_norm: 0.9999993755029576, iteration: 70395
loss: 1.0192536115646362,grad_norm: 0.7822278128474345, iteration: 70396
loss: 1.0065438747406006,grad_norm: 0.9999993371074614, iteration: 70397
loss: 1.0593031644821167,grad_norm: 0.952566585067504, iteration: 70398
loss: 1.0120867490768433,grad_norm: 0.999999197254839, iteration: 70399
loss: 0.9869962930679321,grad_norm: 0.994711323998066, iteration: 70400
loss: 0.9941602349281311,grad_norm: 0.9999991461723657, iteration: 70401
loss: 1.0016483068466187,grad_norm: 0.9074211105115552, iteration: 70402
loss: 0.9848544597625732,grad_norm: 0.9999990907108129, iteration: 70403
loss: 1.035079836845398,grad_norm: 0.9434371186253069, iteration: 70404
loss: 1.0372018814086914,grad_norm: 0.9999992741887094, iteration: 70405
loss: 1.0383718013763428,grad_norm: 0.9999989676215225, iteration: 70406
loss: 0.9940316677093506,grad_norm: 0.907680018689974, iteration: 70407
loss: 0.9893836975097656,grad_norm: 0.9711095570754399, iteration: 70408
loss: 1.016438603401184,grad_norm: 0.9411889980840564, iteration: 70409
loss: 0.97368323802948,grad_norm: 0.9961371157447397, iteration: 70410
loss: 1.0072461366653442,grad_norm: 0.9588899708844947, iteration: 70411
loss: 0.9973286390304565,grad_norm: 0.9999990536109196, iteration: 70412
loss: 0.978438675403595,grad_norm: 0.7794729343541681, iteration: 70413
loss: 1.0227150917053223,grad_norm: 0.9999998153093297, iteration: 70414
loss: 0.9619125723838806,grad_norm: 0.9999989679519127, iteration: 70415
loss: 1.2206816673278809,grad_norm: 0.9999996536294609, iteration: 70416
loss: 1.037225365638733,grad_norm: 0.999999942244404, iteration: 70417
loss: 1.1133003234863281,grad_norm: 1.000000041489437, iteration: 70418
loss: 0.982564389705658,grad_norm: 0.996745955118197, iteration: 70419
loss: 1.0203145742416382,grad_norm: 0.9473042964435356, iteration: 70420
loss: 1.0191550254821777,grad_norm: 0.8022970956145621, iteration: 70421
loss: 1.072433590888977,grad_norm: 0.9999996635913405, iteration: 70422
loss: 0.9668365120887756,grad_norm: 0.9999991511125071, iteration: 70423
loss: 1.006721019744873,grad_norm: 0.9999990868249508, iteration: 70424
loss: 1.0199629068374634,grad_norm: 0.8214996302057784, iteration: 70425
loss: 1.0175570249557495,grad_norm: 0.9999990488123092, iteration: 70426
loss: 0.9968639612197876,grad_norm: 0.9999992380196651, iteration: 70427
loss: 1.0265427827835083,grad_norm: 0.8771326565826578, iteration: 70428
loss: 1.0278575420379639,grad_norm: 0.999999106823963, iteration: 70429
loss: 1.0111753940582275,grad_norm: 0.9209313740352998, iteration: 70430
loss: 1.0125595331192017,grad_norm: 0.9999995080351156, iteration: 70431
loss: 0.9685060381889343,grad_norm: 0.99999919371976, iteration: 70432
loss: 0.9791885614395142,grad_norm: 0.9240027472474869, iteration: 70433
loss: 0.955499529838562,grad_norm: 0.9581527565362941, iteration: 70434
loss: 0.9811664819717407,grad_norm: 0.999999110915717, iteration: 70435
loss: 0.992101788520813,grad_norm: 0.9999996213978264, iteration: 70436
loss: 1.036514163017273,grad_norm: 0.9999991507264689, iteration: 70437
loss: 1.0036215782165527,grad_norm: 0.907673216842276, iteration: 70438
loss: 0.9991015791893005,grad_norm: 0.9895826179933054, iteration: 70439
loss: 1.015984296798706,grad_norm: 0.9999991802492368, iteration: 70440
loss: 0.994121789932251,grad_norm: 0.8627950326773487, iteration: 70441
loss: 1.0326822996139526,grad_norm: 0.9930008376788719, iteration: 70442
loss: 1.0059174299240112,grad_norm: 0.9999991711649352, iteration: 70443
loss: 1.014907956123352,grad_norm: 0.8499798206749686, iteration: 70444
loss: 0.987969160079956,grad_norm: 0.7508491981953781, iteration: 70445
loss: 1.025421380996704,grad_norm: 0.9615812305830711, iteration: 70446
loss: 0.9777741432189941,grad_norm: 0.9440114552511191, iteration: 70447
loss: 1.0271145105361938,grad_norm: 0.9999992628043829, iteration: 70448
loss: 0.9901509284973145,grad_norm: 0.8257891859872534, iteration: 70449
loss: 1.0031514167785645,grad_norm: 0.914246575953353, iteration: 70450
loss: 0.9928175806999207,grad_norm: 0.7811748113324705, iteration: 70451
loss: 1.0440990924835205,grad_norm: 0.9999999836537826, iteration: 70452
loss: 1.01156485080719,grad_norm: 0.9302893764611503, iteration: 70453
loss: 1.0105758905410767,grad_norm: 0.8928255694691299, iteration: 70454
loss: 1.0011897087097168,grad_norm: 0.923955434453219, iteration: 70455
loss: 1.0066542625427246,grad_norm: 0.8777250316839403, iteration: 70456
loss: 0.9911066889762878,grad_norm: 0.9999990855767098, iteration: 70457
loss: 1.1244169473648071,grad_norm: 0.9999994086625484, iteration: 70458
loss: 0.9724511504173279,grad_norm: 0.9999990438521917, iteration: 70459
loss: 1.0457549095153809,grad_norm: 0.8846055113572492, iteration: 70460
loss: 1.0239161252975464,grad_norm: 0.999999043176625, iteration: 70461
loss: 1.0336202383041382,grad_norm: 0.9999992707924406, iteration: 70462
loss: 1.0038377046585083,grad_norm: 0.8175462666850611, iteration: 70463
loss: 1.0417711734771729,grad_norm: 0.9999992382656199, iteration: 70464
loss: 1.0017948150634766,grad_norm: 0.9698025312205717, iteration: 70465
loss: 1.013224482536316,grad_norm: 0.9999993245131398, iteration: 70466
loss: 1.005417823791504,grad_norm: 0.9999991371575417, iteration: 70467
loss: 1.0108485221862793,grad_norm: 0.9999994762566314, iteration: 70468
loss: 0.9760366678237915,grad_norm: 0.8574747017804994, iteration: 70469
loss: 0.9538207054138184,grad_norm: 0.9999991216913254, iteration: 70470
loss: 1.050704002380371,grad_norm: 0.9999995673012861, iteration: 70471
loss: 1.0420247316360474,grad_norm: 0.9156635129716905, iteration: 70472
loss: 0.9892337322235107,grad_norm: 0.8280592595358772, iteration: 70473
loss: 0.9777014851570129,grad_norm: 0.9999992078668574, iteration: 70474
loss: 1.0015922784805298,grad_norm: 0.9999990967449834, iteration: 70475
loss: 1.0097534656524658,grad_norm: 0.9999990600744343, iteration: 70476
loss: 1.0400121212005615,grad_norm: 0.9412030006771994, iteration: 70477
loss: 0.9893788695335388,grad_norm: 0.9999991250600041, iteration: 70478
loss: 0.9916242361068726,grad_norm: 0.8753700758821037, iteration: 70479
loss: 1.0214762687683105,grad_norm: 0.7667267920737585, iteration: 70480
loss: 0.9885907173156738,grad_norm: 0.830656026281771, iteration: 70481
loss: 1.002069354057312,grad_norm: 0.846089043289088, iteration: 70482
loss: 1.0158863067626953,grad_norm: 0.8572841628469429, iteration: 70483
loss: 0.9866511225700378,grad_norm: 0.8305518116211742, iteration: 70484
loss: 1.2368487119674683,grad_norm: 0.9999993412597579, iteration: 70485
loss: 1.0342220067977905,grad_norm: 0.8957081529013874, iteration: 70486
loss: 1.0192278623580933,grad_norm: 0.9999992506736383, iteration: 70487
loss: 0.9608725309371948,grad_norm: 0.8922562143105415, iteration: 70488
loss: 1.0289310216903687,grad_norm: 0.9999993107154721, iteration: 70489
loss: 0.9778569340705872,grad_norm: 0.999999196078486, iteration: 70490
loss: 1.08910071849823,grad_norm: 0.9246514335101252, iteration: 70491
loss: 0.9621540307998657,grad_norm: 0.9832240014022536, iteration: 70492
loss: 1.0851191282272339,grad_norm: 0.999999402279887, iteration: 70493
loss: 1.0137910842895508,grad_norm: 0.9999990909966667, iteration: 70494
loss: 0.9626116752624512,grad_norm: 0.8721781825795633, iteration: 70495
loss: 1.0430861711502075,grad_norm: 0.9999996928079705, iteration: 70496
loss: 1.0215277671813965,grad_norm: 0.8408035007695267, iteration: 70497
loss: 0.9745407104492188,grad_norm: 0.9999998546356023, iteration: 70498
loss: 1.0299406051635742,grad_norm: 0.9837252886178893, iteration: 70499
loss: 1.0544624328613281,grad_norm: 0.9999991610836844, iteration: 70500
loss: 1.0248417854309082,grad_norm: 0.8818716207994551, iteration: 70501
loss: 1.0025147199630737,grad_norm: 0.9531465247264999, iteration: 70502
loss: 0.9803770184516907,grad_norm: 0.863361267588892, iteration: 70503
loss: 1.0442203283309937,grad_norm: 0.9999992257290827, iteration: 70504
loss: 1.0506244897842407,grad_norm: 0.9940604057259672, iteration: 70505
loss: 0.9930748343467712,grad_norm: 0.8939076478029782, iteration: 70506
loss: 1.076418399810791,grad_norm: 0.9999992743280642, iteration: 70507
loss: 0.9984539151191711,grad_norm: 0.9743069128074987, iteration: 70508
loss: 0.9961159229278564,grad_norm: 0.9838996802334379, iteration: 70509
loss: 1.0458546876907349,grad_norm: 0.9999992936061834, iteration: 70510
loss: 1.0293819904327393,grad_norm: 0.9999996521964762, iteration: 70511
loss: 0.9674524068832397,grad_norm: 0.8857620982127427, iteration: 70512
loss: 1.1299210786819458,grad_norm: 0.9999993361248194, iteration: 70513
loss: 1.0147441625595093,grad_norm: 0.9882377263739452, iteration: 70514
loss: 1.0575120449066162,grad_norm: 0.9999995138896615, iteration: 70515
loss: 0.9938230514526367,grad_norm: 0.9999991069853158, iteration: 70516
loss: 0.9846315979957581,grad_norm: 0.9358868990844575, iteration: 70517
loss: 0.9928595423698425,grad_norm: 0.8253160489044574, iteration: 70518
loss: 1.026071310043335,grad_norm: 0.9999998810025688, iteration: 70519
loss: 1.0655597448349,grad_norm: 0.999999163577317, iteration: 70520
loss: 1.036753535270691,grad_norm: 0.999999526561783, iteration: 70521
loss: 0.9760869741439819,grad_norm: 0.9999992171963792, iteration: 70522
loss: 0.966964066028595,grad_norm: 0.8638960931568401, iteration: 70523
loss: 1.0210471153259277,grad_norm: 0.9999994348612257, iteration: 70524
loss: 1.0012441873550415,grad_norm: 0.9999990211344789, iteration: 70525
loss: 0.9954105019569397,grad_norm: 0.8852243368229992, iteration: 70526
loss: 0.9953416585922241,grad_norm: 0.9870203131370016, iteration: 70527
loss: 1.0782889127731323,grad_norm: 0.8579246756215863, iteration: 70528
loss: 1.0008660554885864,grad_norm: 0.824042914027958, iteration: 70529
loss: 0.981495201587677,grad_norm: 0.8777221983860872, iteration: 70530
loss: 1.061284065246582,grad_norm: 0.9999998545642653, iteration: 70531
loss: 0.9674441814422607,grad_norm: 0.9999990475622317, iteration: 70532
loss: 1.0311743021011353,grad_norm: 0.9743649230801661, iteration: 70533
loss: 0.9780682325363159,grad_norm: 0.9999990061155611, iteration: 70534
loss: 0.9932343363761902,grad_norm: 0.8646178022619988, iteration: 70535
loss: 1.0007437467575073,grad_norm: 0.8514604705835044, iteration: 70536
loss: 1.0055598020553589,grad_norm: 0.9999991260413892, iteration: 70537
loss: 1.0066553354263306,grad_norm: 0.8250166282464944, iteration: 70538
loss: 0.9678136110305786,grad_norm: 0.9999991098892165, iteration: 70539
loss: 1.0179306268692017,grad_norm: 0.9999991633964912, iteration: 70540
loss: 1.0183496475219727,grad_norm: 0.9999991538755432, iteration: 70541
loss: 0.9882534146308899,grad_norm: 0.9337626062933828, iteration: 70542
loss: 0.9816349744796753,grad_norm: 0.9999991782798167, iteration: 70543
loss: 1.0337885618209839,grad_norm: 0.9986980375271461, iteration: 70544
loss: 1.012830376625061,grad_norm: 0.9999991661968979, iteration: 70545
loss: 0.9698962569236755,grad_norm: 0.9999989653377115, iteration: 70546
loss: 1.011527180671692,grad_norm: 0.9307670668522496, iteration: 70547
loss: 0.9804445505142212,grad_norm: 0.9999998762460103, iteration: 70548
loss: 1.0001354217529297,grad_norm: 0.9765507150019079, iteration: 70549
loss: 1.011154294013977,grad_norm: 0.9999990605289801, iteration: 70550
loss: 1.044541835784912,grad_norm: 0.999999451979085, iteration: 70551
loss: 1.0517598390579224,grad_norm: 0.9148631588040753, iteration: 70552
loss: 1.0045078992843628,grad_norm: 0.8259741964710069, iteration: 70553
loss: 1.0460366010665894,grad_norm: 0.9999991336651938, iteration: 70554
loss: 0.9764326214790344,grad_norm: 0.8216532306245, iteration: 70555
loss: 1.030800223350525,grad_norm: 0.9295551277225902, iteration: 70556
loss: 1.0004481077194214,grad_norm: 0.7973980898963943, iteration: 70557
loss: 0.9842024445533752,grad_norm: 0.9999993613431255, iteration: 70558
loss: 0.9968305230140686,grad_norm: 0.9200469546939682, iteration: 70559
loss: 0.9927120208740234,grad_norm: 0.9083253175286905, iteration: 70560
loss: 0.9960008263587952,grad_norm: 0.9999998176479726, iteration: 70561
loss: 1.0129384994506836,grad_norm: 0.9999997864106556, iteration: 70562
loss: 0.9701487421989441,grad_norm: 0.861502818979594, iteration: 70563
loss: 0.9855017066001892,grad_norm: 0.9999993492810877, iteration: 70564
loss: 1.055840015411377,grad_norm: 0.9999998476181969, iteration: 70565
loss: 1.0033992528915405,grad_norm: 0.845678663685228, iteration: 70566
loss: 0.9910730123519897,grad_norm: 0.9999990841843437, iteration: 70567
loss: 0.990466296672821,grad_norm: 0.8857820853790612, iteration: 70568
loss: 0.958328366279602,grad_norm: 0.9999991329771926, iteration: 70569
loss: 1.1877368688583374,grad_norm: 0.9999990672492352, iteration: 70570
loss: 1.0507863759994507,grad_norm: 0.9999990953653295, iteration: 70571
loss: 1.0251367092132568,grad_norm: 0.9752394267310178, iteration: 70572
loss: 1.0666964054107666,grad_norm: 0.9999992343375355, iteration: 70573
loss: 0.9991908669471741,grad_norm: 0.9929103035054928, iteration: 70574
loss: 0.9717456698417664,grad_norm: 0.942930901723807, iteration: 70575
loss: 1.0786306858062744,grad_norm: 0.9999993581934369, iteration: 70576
loss: 1.106061577796936,grad_norm: 0.9999990477021546, iteration: 70577
loss: 1.0255476236343384,grad_norm: 0.9999994949798185, iteration: 70578
loss: 0.9909626841545105,grad_norm: 0.9999992461604159, iteration: 70579
loss: 1.0101556777954102,grad_norm: 0.8553751266339343, iteration: 70580
loss: 1.0815705060958862,grad_norm: 0.9999991002860182, iteration: 70581
loss: 0.9959461688995361,grad_norm: 0.7587306970774518, iteration: 70582
loss: 1.0720481872558594,grad_norm: 0.9999999374120646, iteration: 70583
loss: 1.0305911302566528,grad_norm: 0.9999992867451922, iteration: 70584
loss: 1.0811476707458496,grad_norm: 0.9999992317655604, iteration: 70585
loss: 1.0304032564163208,grad_norm: 0.9887277812843943, iteration: 70586
loss: 0.9964218735694885,grad_norm: 0.7716284353077905, iteration: 70587
loss: 1.0033414363861084,grad_norm: 0.973393127209027, iteration: 70588
loss: 1.0385411977767944,grad_norm: 0.9999998927231637, iteration: 70589
loss: 1.0311113595962524,grad_norm: 0.8895390993696687, iteration: 70590
loss: 1.1347826719284058,grad_norm: 0.9999990912259576, iteration: 70591
loss: 0.9921459555625916,grad_norm: 0.8909230785116581, iteration: 70592
loss: 1.0390093326568604,grad_norm: 0.9999990115759032, iteration: 70593
loss: 1.0260875225067139,grad_norm: 0.9999992889945876, iteration: 70594
loss: 1.0261949300765991,grad_norm: 0.999999266741792, iteration: 70595
loss: 0.9506568312644958,grad_norm: 0.9038042411382036, iteration: 70596
loss: 1.0110409259796143,grad_norm: 0.9551467453298367, iteration: 70597
loss: 1.0610800981521606,grad_norm: 0.9999998869916039, iteration: 70598
loss: 1.0038889646530151,grad_norm: 0.7691873328983838, iteration: 70599
loss: 1.0160212516784668,grad_norm: 0.89580734570491, iteration: 70600
loss: 1.0845223665237427,grad_norm: 0.9999992460816046, iteration: 70601
loss: 0.9945400357246399,grad_norm: 0.9265883446446697, iteration: 70602
loss: 0.9888888597488403,grad_norm: 0.9738065630192502, iteration: 70603
loss: 1.0051381587982178,grad_norm: 0.8679678402228815, iteration: 70604
loss: 0.9986116290092468,grad_norm: 0.8273393786123102, iteration: 70605
loss: 0.980450451374054,grad_norm: 0.8048385904982284, iteration: 70606
loss: 1.0143914222717285,grad_norm: 0.9999991136116803, iteration: 70607
loss: 1.0075045824050903,grad_norm: 0.9643199855723811, iteration: 70608
loss: 1.0388598442077637,grad_norm: 0.7431168999767506, iteration: 70609
loss: 1.0358505249023438,grad_norm: 0.9583172870771371, iteration: 70610
loss: 1.0288523435592651,grad_norm: 0.8542345857394307, iteration: 70611
loss: 0.9832278490066528,grad_norm: 0.9492818427334514, iteration: 70612
loss: 0.9794267416000366,grad_norm: 0.9999994995454891, iteration: 70613
loss: 1.212303638458252,grad_norm: 0.9999994537361923, iteration: 70614
loss: 1.0074125528335571,grad_norm: 0.9999989569786657, iteration: 70615
loss: 1.0106594562530518,grad_norm: 0.9881737062498684, iteration: 70616
loss: 0.9896247982978821,grad_norm: 0.9999995825752191, iteration: 70617
loss: 1.1363189220428467,grad_norm: 0.9922251492209774, iteration: 70618
loss: 0.9827786684036255,grad_norm: 0.7740001590855473, iteration: 70619
loss: 1.0195478200912476,grad_norm: 0.999999079212288, iteration: 70620
loss: 1.0151429176330566,grad_norm: 0.9630653981012572, iteration: 70621
loss: 1.035069465637207,grad_norm: 0.999999236598475, iteration: 70622
loss: 0.989535391330719,grad_norm: 0.6967837675073205, iteration: 70623
loss: 0.9990940093994141,grad_norm: 0.9471289535368426, iteration: 70624
loss: 1.0221023559570312,grad_norm: 0.933105731850175, iteration: 70625
loss: 1.021763801574707,grad_norm: 0.9293176135700538, iteration: 70626
loss: 0.9867181777954102,grad_norm: 0.9999991357266096, iteration: 70627
loss: 1.0162826776504517,grad_norm: 0.9252251256874279, iteration: 70628
loss: 1.0748393535614014,grad_norm: 0.986485595195728, iteration: 70629
loss: 0.9893709421157837,grad_norm: 0.9999994244641831, iteration: 70630
loss: 1.023277759552002,grad_norm: 0.9345833394745684, iteration: 70631
loss: 1.0140036344528198,grad_norm: 0.9999991872896103, iteration: 70632
loss: 1.011486530303955,grad_norm: 0.9999991058271203, iteration: 70633
loss: 0.9698513746261597,grad_norm: 0.9999990564617601, iteration: 70634
loss: 1.0111079216003418,grad_norm: 0.999999784222139, iteration: 70635
loss: 1.0383472442626953,grad_norm: 0.9974119248268258, iteration: 70636
loss: 1.0015439987182617,grad_norm: 0.7566114323856885, iteration: 70637
loss: 1.0706019401550293,grad_norm: 0.960118769562094, iteration: 70638
loss: 1.0620594024658203,grad_norm: 0.8385271976146517, iteration: 70639
loss: 0.9966952204704285,grad_norm: 0.9999990874315317, iteration: 70640
loss: 1.0411192178726196,grad_norm: 0.912100922634916, iteration: 70641
loss: 1.00786292552948,grad_norm: 0.9792763437599027, iteration: 70642
loss: 1.0124542713165283,grad_norm: 0.9237415970433306, iteration: 70643
loss: 0.9666635394096375,grad_norm: 0.9083376725697787, iteration: 70644
loss: 1.0418188571929932,grad_norm: 0.928866446443915, iteration: 70645
loss: 1.0329147577285767,grad_norm: 0.9999990983473215, iteration: 70646
loss: 1.0891854763031006,grad_norm: 0.9999996538433576, iteration: 70647
loss: 1.014607310295105,grad_norm: 0.8468908722933962, iteration: 70648
loss: 0.9985857605934143,grad_norm: 0.8046856513071672, iteration: 70649
loss: 1.0238898992538452,grad_norm: 0.999999177986414, iteration: 70650
loss: 1.023128628730774,grad_norm: 0.8334104121409406, iteration: 70651
loss: 1.0029693841934204,grad_norm: 0.8553417186061882, iteration: 70652
loss: 0.9732493758201599,grad_norm: 0.9999989808355938, iteration: 70653
loss: 0.9950449466705322,grad_norm: 0.9999990342545139, iteration: 70654
loss: 1.0228067636489868,grad_norm: 0.8547926062473123, iteration: 70655
loss: 0.9809824228286743,grad_norm: 0.9911535890388594, iteration: 70656
loss: 1.0374003648757935,grad_norm: 0.930414743065597, iteration: 70657
loss: 0.9964045882225037,grad_norm: 0.9999990471878217, iteration: 70658
loss: 0.9974634647369385,grad_norm: 0.8584206925093639, iteration: 70659
loss: 0.9887550473213196,grad_norm: 0.9418373169056101, iteration: 70660
loss: 0.9972789883613586,grad_norm: 0.9258180383991187, iteration: 70661
loss: 0.9995238184928894,grad_norm: 0.9650450387143956, iteration: 70662
loss: 1.0159181356430054,grad_norm: 0.8948540116952179, iteration: 70663
loss: 0.9614013433456421,grad_norm: 0.9999990959277181, iteration: 70664
loss: 1.047898769378662,grad_norm: 0.9999993574931051, iteration: 70665
loss: 1.1292529106140137,grad_norm: 0.9999996507652736, iteration: 70666
loss: 1.0016156435012817,grad_norm: 0.9240747846810229, iteration: 70667
loss: 1.0104279518127441,grad_norm: 0.9463312690117073, iteration: 70668
loss: 0.9615468978881836,grad_norm: 0.9529415710416034, iteration: 70669
loss: 0.9871067404747009,grad_norm: 0.9999990486102239, iteration: 70670
loss: 0.995108425617218,grad_norm: 0.8105857564142261, iteration: 70671
loss: 0.9892951250076294,grad_norm: 0.9999991157883231, iteration: 70672
loss: 1.0907273292541504,grad_norm: 0.8051154836233153, iteration: 70673
loss: 1.0023963451385498,grad_norm: 0.9999990343924765, iteration: 70674
loss: 0.9998517632484436,grad_norm: 0.9132818002912718, iteration: 70675
loss: 0.9753229022026062,grad_norm: 0.999999024138198, iteration: 70676
loss: 0.9615781903266907,grad_norm: 0.999999092898962, iteration: 70677
loss: 1.0226764678955078,grad_norm: 0.9999989790432144, iteration: 70678
loss: 1.0099403858184814,grad_norm: 0.7688527550734453, iteration: 70679
loss: 1.029941439628601,grad_norm: 0.999999080907985, iteration: 70680
loss: 1.0098931789398193,grad_norm: 0.9225455052971434, iteration: 70681
loss: 1.0101044178009033,grad_norm: 0.840807918581493, iteration: 70682
loss: 1.053081750869751,grad_norm: 0.9999999121477676, iteration: 70683
loss: 0.9846931099891663,grad_norm: 0.8020463807052969, iteration: 70684
loss: 0.99454265832901,grad_norm: 0.9236545153863144, iteration: 70685
loss: 1.0089056491851807,grad_norm: 0.7486566986235148, iteration: 70686
loss: 0.977741003036499,grad_norm: 0.8906838837499137, iteration: 70687
loss: 1.003369927406311,grad_norm: 0.9999990655043789, iteration: 70688
loss: 0.9967068433761597,grad_norm: 0.8737057242823757, iteration: 70689
loss: 1.0293827056884766,grad_norm: 0.8751645321068646, iteration: 70690
loss: 1.043568730354309,grad_norm: 0.9999996407822023, iteration: 70691
loss: 1.024465560913086,grad_norm: 0.9999991866969914, iteration: 70692
loss: 1.0050402879714966,grad_norm: 0.8506522195665688, iteration: 70693
loss: 0.998884916305542,grad_norm: 0.8766156275888702, iteration: 70694
loss: 0.9989099502563477,grad_norm: 0.8932537478254262, iteration: 70695
loss: 1.0149794816970825,grad_norm: 0.8660725503418032, iteration: 70696
loss: 1.0318809747695923,grad_norm: 0.924753369235174, iteration: 70697
loss: 0.9922996759414673,grad_norm: 0.989306403403321, iteration: 70698
loss: 1.017144799232483,grad_norm: 0.9532260671716041, iteration: 70699
loss: 1.0228290557861328,grad_norm: 0.8793384341257064, iteration: 70700
loss: 1.0042420625686646,grad_norm: 0.9999994334572093, iteration: 70701
loss: 0.9906747341156006,grad_norm: 0.9999990836776956, iteration: 70702
loss: 0.9763799905776978,grad_norm: 0.9135475981908541, iteration: 70703
loss: 0.9799075722694397,grad_norm: 0.8356637063609827, iteration: 70704
loss: 1.0826059579849243,grad_norm: 0.9999997180742612, iteration: 70705
loss: 1.015676736831665,grad_norm: 0.8319374587613488, iteration: 70706
loss: 0.993690550327301,grad_norm: 0.9547124237901409, iteration: 70707
loss: 0.9992920160293579,grad_norm: 0.999999426261227, iteration: 70708
loss: 1.063107967376709,grad_norm: 0.9999992849809477, iteration: 70709
loss: 1.1014821529388428,grad_norm: 0.9999993105032479, iteration: 70710
loss: 1.0135101079940796,grad_norm: 0.9999994044064077, iteration: 70711
loss: 1.074907660484314,grad_norm: 0.9999998151439616, iteration: 70712
loss: 1.0362298488616943,grad_norm: 0.9999992154177603, iteration: 70713
loss: 0.9974679946899414,grad_norm: 0.9043685035916439, iteration: 70714
loss: 1.0831329822540283,grad_norm: 0.9999990907275917, iteration: 70715
loss: 1.183518886566162,grad_norm: 0.9999996923699853, iteration: 70716
loss: 0.9947507977485657,grad_norm: 0.8024612761256881, iteration: 70717
loss: 0.989466667175293,grad_norm: 0.8717988546516809, iteration: 70718
loss: 0.9635338187217712,grad_norm: 0.7623559515958438, iteration: 70719
loss: 0.9606904983520508,grad_norm: 0.9999993289531678, iteration: 70720
loss: 1.0075255632400513,grad_norm: 0.9999991810785914, iteration: 70721
loss: 1.0933713912963867,grad_norm: 0.9999995768367822, iteration: 70722
loss: 1.0162110328674316,grad_norm: 0.9999998541358307, iteration: 70723
loss: 1.1565861701965332,grad_norm: 0.9999994208690507, iteration: 70724
loss: 1.0275086164474487,grad_norm: 0.9999990821795725, iteration: 70725
loss: 1.123376727104187,grad_norm: 0.9999994872388136, iteration: 70726
loss: 1.237968921661377,grad_norm: 0.9999994463498386, iteration: 70727
loss: 1.0208313465118408,grad_norm: 0.9999990730484345, iteration: 70728
loss: 1.0269871950149536,grad_norm: 0.923986541225545, iteration: 70729
loss: 1.0125117301940918,grad_norm: 0.922471826153085, iteration: 70730
loss: 1.0183616876602173,grad_norm: 0.9333979299201574, iteration: 70731
loss: 1.0310715436935425,grad_norm: 0.9999994227162919, iteration: 70732
loss: 1.0891281366348267,grad_norm: 0.9999999583709883, iteration: 70733
loss: 1.0495260953903198,grad_norm: 0.9999992453649256, iteration: 70734
loss: 1.0289593935012817,grad_norm: 0.9798565344522768, iteration: 70735
loss: 1.0010197162628174,grad_norm: 0.8816240010682427, iteration: 70736
loss: 1.0015910863876343,grad_norm: 0.9999991282258259, iteration: 70737
loss: 1.0464904308319092,grad_norm: 0.9999998304705712, iteration: 70738
loss: 1.0243268013000488,grad_norm: 0.9999991552359899, iteration: 70739
loss: 0.9841420650482178,grad_norm: 0.8518086646034551, iteration: 70740
loss: 1.0099791288375854,grad_norm: 0.8334437120368623, iteration: 70741
loss: 1.011998176574707,grad_norm: 0.9999995581467055, iteration: 70742
loss: 1.0684020519256592,grad_norm: 0.9275641970701217, iteration: 70743
loss: 0.996901273727417,grad_norm: 0.9999991950742584, iteration: 70744
loss: 1.57023286819458,grad_norm: 1.000000002367377, iteration: 70745
loss: 1.0173323154449463,grad_norm: 0.8537677981341741, iteration: 70746
loss: 1.0168269872665405,grad_norm: 0.9324445092580872, iteration: 70747
loss: 1.0240899324417114,grad_norm: 0.9999993714036991, iteration: 70748
loss: 1.0065031051635742,grad_norm: 0.7650535169440554, iteration: 70749
loss: 0.9933838844299316,grad_norm: 0.9999993672277226, iteration: 70750
loss: 1.0077708959579468,grad_norm: 0.9941008432573619, iteration: 70751
loss: 0.9646876454353333,grad_norm: 0.9999990289168141, iteration: 70752
loss: 1.0338191986083984,grad_norm: 0.9999991328430371, iteration: 70753
loss: 0.9802637696266174,grad_norm: 0.9999996288915252, iteration: 70754
loss: 1.0695674419403076,grad_norm: 0.9999997431591122, iteration: 70755
loss: 1.0317025184631348,grad_norm: 0.9999994621763253, iteration: 70756
loss: 1.0587968826293945,grad_norm: 0.9999994747520774, iteration: 70757
loss: 0.9879110455513,grad_norm: 0.999999083870757, iteration: 70758
loss: 0.9884941577911377,grad_norm: 0.975790884844083, iteration: 70759
loss: 1.110977053642273,grad_norm: 0.9999995113110691, iteration: 70760
loss: 1.0151023864746094,grad_norm: 0.9999996140729124, iteration: 70761
loss: 1.0754485130310059,grad_norm: 0.973863866794021, iteration: 70762
loss: 1.016872763633728,grad_norm: 0.9999993718039667, iteration: 70763
loss: 1.066922903060913,grad_norm: 0.9999992368424688, iteration: 70764
loss: 0.9895868301391602,grad_norm: 0.961843910632934, iteration: 70765
loss: 1.0262219905853271,grad_norm: 0.9189424292066777, iteration: 70766
loss: 1.004173755645752,grad_norm: 0.8495129989814179, iteration: 70767
loss: 1.039169430732727,grad_norm: 0.9999997810565162, iteration: 70768
loss: 1.0981863737106323,grad_norm: 0.9999995895115271, iteration: 70769
loss: 1.0673391819000244,grad_norm: 0.999999731724299, iteration: 70770
loss: 1.0320130586624146,grad_norm: 0.9999993929687415, iteration: 70771
loss: 1.148270845413208,grad_norm: 0.9999999048517846, iteration: 70772
loss: 1.093907356262207,grad_norm: 0.999999686301334, iteration: 70773
loss: 1.041456699371338,grad_norm: 0.9999998914188963, iteration: 70774
loss: 1.0165988206863403,grad_norm: 0.9999995799672675, iteration: 70775
loss: 1.003941297531128,grad_norm: 0.9999992266902759, iteration: 70776
loss: 1.046769142150879,grad_norm: 0.9999993442654187, iteration: 70777
loss: 0.9906601309776306,grad_norm: 0.99999930370202, iteration: 70778
loss: 1.0163538455963135,grad_norm: 0.9999992857764315, iteration: 70779
loss: 1.0492167472839355,grad_norm: 0.9999998897031013, iteration: 70780
loss: 1.0850244760513306,grad_norm: 0.999999736665234, iteration: 70781
loss: 1.032447099685669,grad_norm: 0.9234516649364305, iteration: 70782
loss: 0.9963635802268982,grad_norm: 0.9999995949666833, iteration: 70783
loss: 1.006299614906311,grad_norm: 0.9836624958919573, iteration: 70784
loss: 1.004898190498352,grad_norm: 0.9758338281956567, iteration: 70785
loss: 0.9778541922569275,grad_norm: 0.9999990514675419, iteration: 70786
loss: 1.007142424583435,grad_norm: 0.9999991479005991, iteration: 70787
loss: 1.056306004524231,grad_norm: 0.9999997976774182, iteration: 70788
loss: 0.9771045446395874,grad_norm: 0.9999992216028081, iteration: 70789
loss: 1.0759928226470947,grad_norm: 0.9355787348601412, iteration: 70790
loss: 0.9744684100151062,grad_norm: 0.8864963113535638, iteration: 70791
loss: 1.048843502998352,grad_norm: 0.9999993996330944, iteration: 70792
loss: 0.9963572025299072,grad_norm: 0.7815682860545633, iteration: 70793
loss: 1.010965347290039,grad_norm: 0.9999994179919979, iteration: 70794
loss: 1.053120732307434,grad_norm: 0.9999994557205304, iteration: 70795
loss: 0.9851458668708801,grad_norm: 0.9999996645972414, iteration: 70796
loss: 0.9925809502601624,grad_norm: 0.9316755227685436, iteration: 70797
loss: 1.0086536407470703,grad_norm: 0.8682598105662502, iteration: 70798
loss: 1.0130034685134888,grad_norm: 0.9999991598410382, iteration: 70799
loss: 0.9757402539253235,grad_norm: 0.9192702468332985, iteration: 70800
loss: 1.0217695236206055,grad_norm: 0.9999990457329576, iteration: 70801
loss: 0.9618802070617676,grad_norm: 0.7767269912377388, iteration: 70802
loss: 1.0137393474578857,grad_norm: 0.9999991206482537, iteration: 70803
loss: 0.9973646402359009,grad_norm: 0.9633155490798833, iteration: 70804
loss: 1.1530109643936157,grad_norm: 0.999999185638576, iteration: 70805
loss: 1.0104925632476807,grad_norm: 0.9999990990467472, iteration: 70806
loss: 1.0289446115493774,grad_norm: 0.9999993011569377, iteration: 70807
loss: 1.0355889797210693,grad_norm: 0.9999991347422507, iteration: 70808
loss: 1.0228698253631592,grad_norm: 0.9999991283748286, iteration: 70809
loss: 1.0707963705062866,grad_norm: 0.9620658404806307, iteration: 70810
loss: 1.0367552042007446,grad_norm: 0.9999997562133209, iteration: 70811
loss: 1.0082658529281616,grad_norm: 0.8414880515296465, iteration: 70812
loss: 0.9921455979347229,grad_norm: 0.999998986424757, iteration: 70813
loss: 1.0268023014068604,grad_norm: 0.9999991670435032, iteration: 70814
loss: 1.027076244354248,grad_norm: 0.8832775277698758, iteration: 70815
loss: 1.024840235710144,grad_norm: 0.7808847239409373, iteration: 70816
loss: 0.9560322165489197,grad_norm: 0.9999989816037028, iteration: 70817
loss: 1.0438615083694458,grad_norm: 0.9999991480140831, iteration: 70818
loss: 0.9772100448608398,grad_norm: 0.9999991645092658, iteration: 70819
loss: 1.0093470811843872,grad_norm: 0.9999994286584132, iteration: 70820
loss: 1.0165669918060303,grad_norm: 0.9999990751822185, iteration: 70821
loss: 0.9826868772506714,grad_norm: 0.9999990099698465, iteration: 70822
loss: 0.9961799383163452,grad_norm: 0.9989850666867188, iteration: 70823
loss: 1.0859876871109009,grad_norm: 0.9999994377636808, iteration: 70824
loss: 0.9922498464584351,grad_norm: 0.8785528363977257, iteration: 70825
loss: 1.0112946033477783,grad_norm: 0.9999991509402459, iteration: 70826
loss: 1.0053718090057373,grad_norm: 0.9293951936857696, iteration: 70827
loss: 0.9960984587669373,grad_norm: 0.9999992115611441, iteration: 70828
loss: 1.1175848245620728,grad_norm: 0.9999992467753704, iteration: 70829
loss: 1.2563457489013672,grad_norm: 0.9999991911309164, iteration: 70830
loss: 0.9806554913520813,grad_norm: 0.9482109448653577, iteration: 70831
loss: 0.9736055135726929,grad_norm: 0.9321991833047089, iteration: 70832
loss: 1.047646164894104,grad_norm: 0.9999992147376165, iteration: 70833
loss: 1.0165544748306274,grad_norm: 0.9999994110362608, iteration: 70834
loss: 1.0995711088180542,grad_norm: 0.999999625529246, iteration: 70835
loss: 1.2418385744094849,grad_norm: 0.999999439794417, iteration: 70836
loss: 0.9794251918792725,grad_norm: 0.9999991759098531, iteration: 70837
loss: 1.0733922719955444,grad_norm: 0.9999994813672963, iteration: 70838
loss: 1.0087981224060059,grad_norm: 0.9886136879731645, iteration: 70839
loss: 1.0436384677886963,grad_norm: 0.9999994804272496, iteration: 70840
loss: 1.0040379762649536,grad_norm: 0.8835909749847529, iteration: 70841
loss: 1.0018436908721924,grad_norm: 0.9999994000323851, iteration: 70842
loss: 0.9959366321563721,grad_norm: 0.9999990552075496, iteration: 70843
loss: 0.9602416753768921,grad_norm: 0.8373201167836669, iteration: 70844
loss: 1.029626727104187,grad_norm: 0.8031402789118641, iteration: 70845
loss: 1.001434326171875,grad_norm: 0.9999991009157825, iteration: 70846
loss: 0.979136049747467,grad_norm: 0.8868725332085167, iteration: 70847
loss: 1.0122138261795044,grad_norm: 0.9242016837319371, iteration: 70848
loss: 1.0076690912246704,grad_norm: 0.999999107521146, iteration: 70849
loss: 0.9548198580741882,grad_norm: 0.9765565922443692, iteration: 70850
loss: 0.9966645836830139,grad_norm: 0.9085282041268823, iteration: 70851
loss: 1.0232652425765991,grad_norm: 0.9999993667646458, iteration: 70852
loss: 1.102563500404358,grad_norm: 0.9999996909591937, iteration: 70853
loss: 1.0109480619430542,grad_norm: 0.9999990940174703, iteration: 70854
loss: 1.0174095630645752,grad_norm: 0.9999993395478243, iteration: 70855
loss: 1.0084681510925293,grad_norm: 0.9999991597054532, iteration: 70856
loss: 0.9957178831100464,grad_norm: 0.999999093083481, iteration: 70857
loss: 1.1547962427139282,grad_norm: 0.9999999945365361, iteration: 70858
loss: 1.0181019306182861,grad_norm: 0.9999992272999945, iteration: 70859
loss: 0.9856277108192444,grad_norm: 0.96123757289542, iteration: 70860
loss: 1.0988982915878296,grad_norm: 0.9999997290536504, iteration: 70861
loss: 1.017080307006836,grad_norm: 0.9999991319616027, iteration: 70862
loss: 1.0019760131835938,grad_norm: 0.8570020651979756, iteration: 70863
loss: 1.2247387170791626,grad_norm: 0.9999993260395786, iteration: 70864
loss: 1.0281593799591064,grad_norm: 0.7821482947614388, iteration: 70865
loss: 1.0244474411010742,grad_norm: 0.913439731521703, iteration: 70866
loss: 1.0006705522537231,grad_norm: 0.827251438241566, iteration: 70867
loss: 1.0240164995193481,grad_norm: 0.9999991941052782, iteration: 70868
loss: 1.072745680809021,grad_norm: 0.9999998124015442, iteration: 70869
loss: 1.0027740001678467,grad_norm: 0.9999990065682871, iteration: 70870
loss: 1.0757513046264648,grad_norm: 0.99999942322592, iteration: 70871
loss: 0.9855977892875671,grad_norm: 0.9317843047364777, iteration: 70872
loss: 0.9974534511566162,grad_norm: 0.8901592891469547, iteration: 70873
loss: 1.0471837520599365,grad_norm: 0.9999992097979352, iteration: 70874
loss: 1.0479624271392822,grad_norm: 0.9999990285871126, iteration: 70875
loss: 1.0211573839187622,grad_norm: 0.9999989741144103, iteration: 70876
loss: 1.013819694519043,grad_norm: 0.9999990214513125, iteration: 70877
loss: 0.9790055751800537,grad_norm: 0.9439417233172949, iteration: 70878
loss: 1.0189508199691772,grad_norm: 0.9842637999642493, iteration: 70879
loss: 1.0200220346450806,grad_norm: 0.999999117046281, iteration: 70880
loss: 0.9750765562057495,grad_norm: 0.9999991146566051, iteration: 70881
loss: 0.9647636413574219,grad_norm: 0.9497441308912451, iteration: 70882
loss: 0.9914267659187317,grad_norm: 0.8841147712913552, iteration: 70883
loss: 0.9987948536872864,grad_norm: 0.9333150879320873, iteration: 70884
loss: 1.0042860507965088,grad_norm: 0.9036809384371061, iteration: 70885
loss: 0.9815345406532288,grad_norm: 0.9999990609057389, iteration: 70886
loss: 1.012028455734253,grad_norm: 0.9774004519105061, iteration: 70887
loss: 1.0135586261749268,grad_norm: 0.9629694722417211, iteration: 70888
loss: 0.9840118885040283,grad_norm: 0.9999991688103395, iteration: 70889
loss: 0.9815467596054077,grad_norm: 0.9421991447681445, iteration: 70890
loss: 0.9481920003890991,grad_norm: 0.9479195265922833, iteration: 70891
loss: 1.0296061038970947,grad_norm: 0.8669243944457776, iteration: 70892
loss: 0.9998261332511902,grad_norm: 0.8848397902819786, iteration: 70893
loss: 1.0469988584518433,grad_norm: 0.9281792749904705, iteration: 70894
loss: 0.9990662336349487,grad_norm: 0.93852647114671, iteration: 70895
loss: 1.0877019166946411,grad_norm: 0.9999998494059784, iteration: 70896
loss: 1.0131394863128662,grad_norm: 0.9999992924123666, iteration: 70897
loss: 0.9754180312156677,grad_norm: 0.8340827129925261, iteration: 70898
loss: 1.0312225818634033,grad_norm: 0.9196258149348252, iteration: 70899
loss: 0.9801321625709534,grad_norm: 0.9999991685894555, iteration: 70900
loss: 1.0111486911773682,grad_norm: 0.9961283131655019, iteration: 70901
loss: 1.0610151290893555,grad_norm: 0.9999990871002087, iteration: 70902
loss: 1.0164276361465454,grad_norm: 0.8794104240296148, iteration: 70903
loss: 1.0365643501281738,grad_norm: 0.9999992344847713, iteration: 70904
loss: 1.0370498895645142,grad_norm: 0.9999995701693027, iteration: 70905
loss: 0.9895505309104919,grad_norm: 0.9982219507266167, iteration: 70906
loss: 1.0058904886245728,grad_norm: 0.8882829269350545, iteration: 70907
loss: 1.0153921842575073,grad_norm: 0.8600097018423257, iteration: 70908
loss: 0.9803242683410645,grad_norm: 0.9884741077053586, iteration: 70909
loss: 1.0158793926239014,grad_norm: 0.9999991283646968, iteration: 70910
loss: 1.0064873695373535,grad_norm: 0.9249389925904303, iteration: 70911
loss: 0.9849040508270264,grad_norm: 0.883337589521108, iteration: 70912
loss: 0.9879827499389648,grad_norm: 0.9535209177091387, iteration: 70913
loss: 0.95278000831604,grad_norm: 0.9999991459453437, iteration: 70914
loss: 0.9929004311561584,grad_norm: 0.8962009617029203, iteration: 70915
loss: 1.0819052457809448,grad_norm: 0.9003279751943409, iteration: 70916
loss: 1.071991205215454,grad_norm: 0.999999886387447, iteration: 70917
loss: 0.9926402568817139,grad_norm: 0.9502476627890482, iteration: 70918
loss: 0.979137659072876,grad_norm: 0.9999992742663979, iteration: 70919
loss: 1.0309077501296997,grad_norm: 0.8244598564146364, iteration: 70920
loss: 0.9866923689842224,grad_norm: 0.8450785442378522, iteration: 70921
loss: 1.1080315113067627,grad_norm: 0.999999423175732, iteration: 70922
loss: 1.045540690422058,grad_norm: 0.999999148103466, iteration: 70923
loss: 1.0631483793258667,grad_norm: 0.7785499769911367, iteration: 70924
loss: 0.9761956334114075,grad_norm: 0.9095744788586586, iteration: 70925
loss: 1.10208261013031,grad_norm: 0.9999990152752933, iteration: 70926
loss: 1.0260484218597412,grad_norm: 0.9786769859667473, iteration: 70927
loss: 1.004102110862732,grad_norm: 0.9627709229403673, iteration: 70928
loss: 1.0194981098175049,grad_norm: 0.9999990398726513, iteration: 70929
loss: 0.9519514441490173,grad_norm: 0.9999991665857808, iteration: 70930
loss: 0.979320764541626,grad_norm: 0.9999990991606181, iteration: 70931
loss: 1.016983151435852,grad_norm: 0.9999994591003345, iteration: 70932
loss: 1.0061041116714478,grad_norm: 0.9999991761177295, iteration: 70933
loss: 1.0481261014938354,grad_norm: 0.9999991903405845, iteration: 70934
loss: 1.0236600637435913,grad_norm: 0.9922468890847663, iteration: 70935
loss: 1.0022521018981934,grad_norm: 0.999999189500948, iteration: 70936
loss: 0.9764183759689331,grad_norm: 0.8988831072334097, iteration: 70937
loss: 0.9570420980453491,grad_norm: 0.9999992102812655, iteration: 70938
loss: 1.0223087072372437,grad_norm: 0.9999990542138278, iteration: 70939
loss: 1.038428544998169,grad_norm: 0.9999991754502403, iteration: 70940
loss: 1.0305358171463013,grad_norm: 0.9999990969211423, iteration: 70941
loss: 0.9986563324928284,grad_norm: 0.994034495027421, iteration: 70942
loss: 1.0128381252288818,grad_norm: 0.9703162362865322, iteration: 70943
loss: 0.9755474328994751,grad_norm: 0.9999991074286715, iteration: 70944
loss: 1.0057246685028076,grad_norm: 0.9314792633271335, iteration: 70945
loss: 0.9531216621398926,grad_norm: 0.9999992374712885, iteration: 70946
loss: 1.0015037059783936,grad_norm: 0.9402289166006819, iteration: 70947
loss: 1.0169544219970703,grad_norm: 0.9900073262125463, iteration: 70948
loss: 0.9974382519721985,grad_norm: 0.9886585733110823, iteration: 70949
loss: 0.9510185122489929,grad_norm: 0.8759267395540539, iteration: 70950
loss: 1.010143518447876,grad_norm: 0.8933534383285413, iteration: 70951
loss: 0.964827299118042,grad_norm: 0.9152632590868242, iteration: 70952
loss: 1.0181334018707275,grad_norm: 0.945267519199529, iteration: 70953
loss: 0.942644476890564,grad_norm: 0.926240195603478, iteration: 70954
loss: 1.0026437044143677,grad_norm: 0.9999991236643604, iteration: 70955
loss: 1.0517746210098267,grad_norm: 0.9452412278744051, iteration: 70956
loss: 1.0086941719055176,grad_norm: 0.9999990745537807, iteration: 70957
loss: 0.9973984360694885,grad_norm: 0.9999990297654492, iteration: 70958
loss: 0.9947519898414612,grad_norm: 0.9999994548227509, iteration: 70959
loss: 0.9891678094863892,grad_norm: 0.9196324009608431, iteration: 70960
loss: 0.9825027585029602,grad_norm: 0.8176601317056812, iteration: 70961
loss: 0.9721105098724365,grad_norm: 0.919156484471848, iteration: 70962
loss: 0.9704148173332214,grad_norm: 0.9999992879129378, iteration: 70963
loss: 0.9853063821792603,grad_norm: 0.9999995997057582, iteration: 70964
loss: 1.0289196968078613,grad_norm: 0.9999990555093708, iteration: 70965
loss: 0.9983022809028625,grad_norm: 0.9999990516896708, iteration: 70966
loss: 0.9856340885162354,grad_norm: 0.9999993974032357, iteration: 70967
loss: 0.9602969288825989,grad_norm: 0.9125693164990175, iteration: 70968
loss: 0.996859073638916,grad_norm: 0.999999338607955, iteration: 70969
loss: 1.0021576881408691,grad_norm: 0.8963201770377301, iteration: 70970
loss: 1.0332742929458618,grad_norm: 0.9223864708297296, iteration: 70971
loss: 0.9998727440834045,grad_norm: 0.9999994235981982, iteration: 70972
loss: 0.9903151392936707,grad_norm: 0.9999991013574855, iteration: 70973
loss: 0.9786730408668518,grad_norm: 0.9475391215004756, iteration: 70974
loss: 0.9919908046722412,grad_norm: 0.9999991549718034, iteration: 70975
loss: 1.013154149055481,grad_norm: 0.7490551720832266, iteration: 70976
loss: 1.029319167137146,grad_norm: 0.9999990655746608, iteration: 70977
loss: 0.9888573288917542,grad_norm: 0.9999992189448476, iteration: 70978
loss: 1.0117827653884888,grad_norm: 0.8696051416197443, iteration: 70979
loss: 1.0006576776504517,grad_norm: 0.8598432980838081, iteration: 70980
loss: 1.0140371322631836,grad_norm: 0.9999995620767905, iteration: 70981
loss: 0.9691190719604492,grad_norm: 0.8860776870234691, iteration: 70982
loss: 1.005735158920288,grad_norm: 0.9999992045495213, iteration: 70983
loss: 0.9801317453384399,grad_norm: 0.8801969096330339, iteration: 70984
loss: 1.0033049583435059,grad_norm: 0.9288540818317745, iteration: 70985
loss: 0.96238774061203,grad_norm: 0.9999989777233327, iteration: 70986
loss: 1.0171246528625488,grad_norm: 0.9999991920836722, iteration: 70987
loss: 1.0061240196228027,grad_norm: 0.9999990695093465, iteration: 70988
loss: 1.011209487915039,grad_norm: 0.9999992613707126, iteration: 70989
loss: 0.9653307199478149,grad_norm: 0.9762139965692912, iteration: 70990
loss: 1.0003935098648071,grad_norm: 0.9999993268744185, iteration: 70991
loss: 1.0124393701553345,grad_norm: 0.8476202435239724, iteration: 70992
loss: 0.9936999082565308,grad_norm: 0.9999990874615214, iteration: 70993
loss: 0.9450712203979492,grad_norm: 0.7846962441123826, iteration: 70994
loss: 1.0181748867034912,grad_norm: 0.9208069308866121, iteration: 70995
loss: 0.9989588260650635,grad_norm: 0.9999989910378075, iteration: 70996
loss: 0.9768901467323303,grad_norm: 0.8880409687756308, iteration: 70997
loss: 1.0106043815612793,grad_norm: 0.9449564187393972, iteration: 70998
loss: 1.0321093797683716,grad_norm: 0.9999992111486175, iteration: 70999
loss: 1.029464840888977,grad_norm: 0.9999990839182975, iteration: 71000
loss: 1.0121407508850098,grad_norm: 0.8012782416422326, iteration: 71001
loss: 1.0334705114364624,grad_norm: 0.9999995590030485, iteration: 71002
loss: 0.9952161312103271,grad_norm: 0.9999991717233586, iteration: 71003
loss: 1.0169978141784668,grad_norm: 0.9999993035672028, iteration: 71004
loss: 1.0151538848876953,grad_norm: 0.9999990894713409, iteration: 71005
loss: 1.0565005540847778,grad_norm: 0.9227535671731704, iteration: 71006
loss: 0.9782937169075012,grad_norm: 0.8520089441722085, iteration: 71007
loss: 0.996799647808075,grad_norm: 0.7733488902640142, iteration: 71008
loss: 1.0374290943145752,grad_norm: 0.9999990527993898, iteration: 71009
loss: 0.9961004257202148,grad_norm: 0.9180020645192383, iteration: 71010
loss: 1.0435303449630737,grad_norm: 0.9999994059455586, iteration: 71011
loss: 1.0285123586654663,grad_norm: 0.876857331081127, iteration: 71012
loss: 0.9938967227935791,grad_norm: 0.9608890014463781, iteration: 71013
loss: 1.0229735374450684,grad_norm: 0.999999087815071, iteration: 71014
loss: 1.010140299797058,grad_norm: 0.9999989998087798, iteration: 71015
loss: 0.9991233944892883,grad_norm: 0.9999997149067111, iteration: 71016
loss: 1.0014686584472656,grad_norm: 0.9572271642610451, iteration: 71017
loss: 0.9636882543563843,grad_norm: 0.9235227713911928, iteration: 71018
loss: 1.039433240890503,grad_norm: 0.999999713901998, iteration: 71019
loss: 1.048054814338684,grad_norm: 0.9999991809999998, iteration: 71020
loss: 1.0093412399291992,grad_norm: 0.9406690178087521, iteration: 71021
loss: 0.9958354234695435,grad_norm: 0.9999991046419778, iteration: 71022
loss: 1.0388675928115845,grad_norm: 0.7554071510846588, iteration: 71023
loss: 1.0206780433654785,grad_norm: 0.9739285355997374, iteration: 71024
loss: 0.9669791460037231,grad_norm: 0.935467711216771, iteration: 71025
loss: 0.9833038449287415,grad_norm: 0.883169105741468, iteration: 71026
loss: 1.0051794052124023,grad_norm: 0.9999993082695043, iteration: 71027
loss: 1.006166696548462,grad_norm: 0.9999995267104184, iteration: 71028
loss: 1.0071630477905273,grad_norm: 0.9503833098842435, iteration: 71029
loss: 0.9905270934104919,grad_norm: 0.8772622568318277, iteration: 71030
loss: 1.0057719945907593,grad_norm: 0.8693289598477703, iteration: 71031
loss: 0.9806483387947083,grad_norm: 0.9867920957155758, iteration: 71032
loss: 1.017526626586914,grad_norm: 0.7565090135827682, iteration: 71033
loss: 1.0018839836120605,grad_norm: 0.9999993148246795, iteration: 71034
loss: 0.9933593273162842,grad_norm: 0.9999990444763595, iteration: 71035
loss: 1.0395716428756714,grad_norm: 0.9999992027409879, iteration: 71036
loss: 1.0357321500778198,grad_norm: 0.9999991944841478, iteration: 71037
loss: 0.9860258102416992,grad_norm: 0.9574748594300261, iteration: 71038
loss: 1.004612684249878,grad_norm: 0.9432176628983192, iteration: 71039
loss: 1.0255460739135742,grad_norm: 0.9999998384271331, iteration: 71040
loss: 0.9695409536361694,grad_norm: 0.8540142317370718, iteration: 71041
loss: 0.9699188470840454,grad_norm: 0.9999992695247827, iteration: 71042
loss: 1.0359127521514893,grad_norm: 0.985457366391164, iteration: 71043
loss: 1.0240907669067383,grad_norm: 0.99999899757376, iteration: 71044
loss: 0.9979270100593567,grad_norm: 0.9291590363928571, iteration: 71045
loss: 1.0119935274124146,grad_norm: 0.9999993365819715, iteration: 71046
loss: 0.9752429127693176,grad_norm: 0.999999084649069, iteration: 71047
loss: 1.0055052042007446,grad_norm: 0.9527474811718561, iteration: 71048
loss: 1.0384267568588257,grad_norm: 0.9999997386546223, iteration: 71049
loss: 1.0141937732696533,grad_norm: 0.999998969371244, iteration: 71050
loss: 1.025133490562439,grad_norm: 0.9053990876682578, iteration: 71051
loss: 1.0204588174819946,grad_norm: 0.9999994333516964, iteration: 71052
loss: 1.0795468091964722,grad_norm: 0.9999999617692319, iteration: 71053
loss: 0.9780731201171875,grad_norm: 0.999999087195296, iteration: 71054
loss: 1.0286773443222046,grad_norm: 0.999999192761681, iteration: 71055
loss: 0.9957801103591919,grad_norm: 0.927488612704779, iteration: 71056
loss: 1.0369048118591309,grad_norm: 0.9893973147575436, iteration: 71057
loss: 0.9923732876777649,grad_norm: 0.9999993266436846, iteration: 71058
loss: 1.0640026330947876,grad_norm: 0.9999992688423754, iteration: 71059
loss: 1.027530550956726,grad_norm: 0.9940754155925108, iteration: 71060
loss: 0.9796144366264343,grad_norm: 0.9999991381100392, iteration: 71061
loss: 1.0084866285324097,grad_norm: 0.9241710632992642, iteration: 71062
loss: 1.0113087892532349,grad_norm: 0.939973177289897, iteration: 71063
loss: 1.0198789834976196,grad_norm: 0.9404879403986423, iteration: 71064
loss: 0.9796248078346252,grad_norm: 0.9461190537321215, iteration: 71065
loss: 0.9911652207374573,grad_norm: 0.9999991852615201, iteration: 71066
loss: 1.0182801485061646,grad_norm: 0.9999990589739548, iteration: 71067
loss: 1.0387150049209595,grad_norm: 0.9999991610790473, iteration: 71068
loss: 0.9627842307090759,grad_norm: 0.8624576223170736, iteration: 71069
loss: 1.0202099084854126,grad_norm: 0.8493414793965789, iteration: 71070
loss: 1.0136659145355225,grad_norm: 0.8534562951693095, iteration: 71071
loss: 1.0303009748458862,grad_norm: 0.9575736259917093, iteration: 71072
loss: 0.9865486025810242,grad_norm: 0.8035173732841426, iteration: 71073
loss: 1.0430580377578735,grad_norm: 0.9558460723416669, iteration: 71074
loss: 1.0211155414581299,grad_norm: 0.950528648598142, iteration: 71075
loss: 0.9762764573097229,grad_norm: 0.9999989928014287, iteration: 71076
loss: 1.004949688911438,grad_norm: 0.9999989661883553, iteration: 71077
loss: 1.0105713605880737,grad_norm: 0.9530551232625487, iteration: 71078
loss: 1.0179129838943481,grad_norm: 0.9901213476770446, iteration: 71079
loss: 1.035412073135376,grad_norm: 0.9999991881843022, iteration: 71080
loss: 1.0466084480285645,grad_norm: 0.9999992366967876, iteration: 71081
loss: 0.9815593361854553,grad_norm: 0.8939973555878818, iteration: 71082
loss: 0.9940634965896606,grad_norm: 0.9094826732472362, iteration: 71083
loss: 0.9874724745750427,grad_norm: 0.9306547920336623, iteration: 71084
loss: 0.975214958190918,grad_norm: 0.8161407724304536, iteration: 71085
loss: 1.0609146356582642,grad_norm: 0.8576499702703628, iteration: 71086
loss: 1.018741250038147,grad_norm: 0.9087636927615854, iteration: 71087
loss: 1.0048282146453857,grad_norm: 0.8579662328771759, iteration: 71088
loss: 1.1902674436569214,grad_norm: 0.9999991994166455, iteration: 71089
loss: 0.9846318960189819,grad_norm: 0.9174507612751998, iteration: 71090
loss: 1.0632437467575073,grad_norm: 0.9087977391818669, iteration: 71091
loss: 1.0016677379608154,grad_norm: 0.926118196717329, iteration: 71092
loss: 1.006129264831543,grad_norm: 0.9316292126708482, iteration: 71093
loss: 1.0562670230865479,grad_norm: 0.9999997725738435, iteration: 71094
loss: 1.0073970556259155,grad_norm: 0.9999990844853557, iteration: 71095
loss: 1.0466389656066895,grad_norm: 0.9999992717009365, iteration: 71096
loss: 1.0118881464004517,grad_norm: 0.9941905114610012, iteration: 71097
loss: 0.997601330280304,grad_norm: 0.999999195101243, iteration: 71098
loss: 1.0446466207504272,grad_norm: 0.9999991871797583, iteration: 71099
loss: 1.037738561630249,grad_norm: 0.8440739864743677, iteration: 71100
loss: 1.0439902544021606,grad_norm: 0.9999990946208903, iteration: 71101
loss: 0.9935446977615356,grad_norm: 0.9398017666023798, iteration: 71102
loss: 0.9917705655097961,grad_norm: 0.9529003251484932, iteration: 71103
loss: 1.0145505666732788,grad_norm: 0.8685444631899223, iteration: 71104
loss: 1.008385181427002,grad_norm: 0.8949538251599168, iteration: 71105
loss: 0.9848735332489014,grad_norm: 0.9240436289345064, iteration: 71106
loss: 1.0606484413146973,grad_norm: 0.9074830702160328, iteration: 71107
loss: 0.9849211573600769,grad_norm: 0.9999992511449642, iteration: 71108
loss: 0.9943416714668274,grad_norm: 0.9999990757328082, iteration: 71109
loss: 0.976694643497467,grad_norm: 0.9408630514526093, iteration: 71110
loss: 1.0338969230651855,grad_norm: 0.8551913731646701, iteration: 71111
loss: 0.9841932058334351,grad_norm: 0.9050700114330583, iteration: 71112
loss: 1.0115736722946167,grad_norm: 0.7823675131978765, iteration: 71113
loss: 1.0142141580581665,grad_norm: 0.9999998559948732, iteration: 71114
loss: 1.065982460975647,grad_norm: 0.9999992879473693, iteration: 71115
loss: 1.0071687698364258,grad_norm: 0.7992085270408275, iteration: 71116
loss: 1.0177167654037476,grad_norm: 0.9877001529646998, iteration: 71117
loss: 1.0174472332000732,grad_norm: 0.8892697782570346, iteration: 71118
loss: 1.0056461095809937,grad_norm: 0.9999996542207499, iteration: 71119
loss: 1.0298829078674316,grad_norm: 0.9999990361128852, iteration: 71120
loss: 1.0160458087921143,grad_norm: 0.9999992561371362, iteration: 71121
loss: 1.0087227821350098,grad_norm: 0.9637137057938728, iteration: 71122
loss: 0.9872938394546509,grad_norm: 0.95936030031875, iteration: 71123
loss: 1.033498764038086,grad_norm: 0.9999991558772859, iteration: 71124
loss: 0.99422687292099,grad_norm: 0.9999991233021942, iteration: 71125
loss: 0.9686070680618286,grad_norm: 0.999999404216561, iteration: 71126
loss: 0.9819111227989197,grad_norm: 0.9999991979878332, iteration: 71127
loss: 1.0181032419204712,grad_norm: 0.9999996304974526, iteration: 71128
loss: 0.9920309782028198,grad_norm: 0.9750055051414955, iteration: 71129
loss: 1.03533935546875,grad_norm: 0.922654262254232, iteration: 71130
loss: 1.0399608612060547,grad_norm: 0.9999996296308912, iteration: 71131
loss: 0.9896155595779419,grad_norm: 0.9346111323593983, iteration: 71132
loss: 1.0425257682800293,grad_norm: 0.999999539949902, iteration: 71133
loss: 1.0195873975753784,grad_norm: 0.9999990925077044, iteration: 71134
loss: 1.0248324871063232,grad_norm: 0.8061130066975721, iteration: 71135
loss: 0.9871650338172913,grad_norm: 0.8292119388574617, iteration: 71136
loss: 1.0389809608459473,grad_norm: 0.9999992186609928, iteration: 71137
loss: 1.0071437358856201,grad_norm: 0.9821616893167807, iteration: 71138
loss: 0.9959892630577087,grad_norm: 0.9999989348303441, iteration: 71139
loss: 0.9878756403923035,grad_norm: 0.9486793747716672, iteration: 71140
loss: 1.0178685188293457,grad_norm: 0.8510953891339321, iteration: 71141
loss: 1.0253294706344604,grad_norm: 0.8336214414017524, iteration: 71142
loss: 1.0078389644622803,grad_norm: 0.924879519019497, iteration: 71143
loss: 0.9983159899711609,grad_norm: 0.999999258909717, iteration: 71144
loss: 1.054462194442749,grad_norm: 0.9999991142682289, iteration: 71145
loss: 1.0066195726394653,grad_norm: 0.8296138500381344, iteration: 71146
loss: 1.0632463693618774,grad_norm: 0.9155609767727687, iteration: 71147
loss: 0.9956478476524353,grad_norm: 0.8458683500079274, iteration: 71148
loss: 0.9619855880737305,grad_norm: 0.848082061364607, iteration: 71149
loss: 0.9972481727600098,grad_norm: 0.999999143005781, iteration: 71150
loss: 1.0074620246887207,grad_norm: 0.9999990943355317, iteration: 71151
loss: 1.0034939050674438,grad_norm: 0.7978515327529105, iteration: 71152
loss: 0.9814741611480713,grad_norm: 0.880100271979713, iteration: 71153
loss: 0.9873098134994507,grad_norm: 0.9999991342785804, iteration: 71154
loss: 1.0057376623153687,grad_norm: 0.8373258553537417, iteration: 71155
loss: 0.9676684141159058,grad_norm: 0.9999990206132369, iteration: 71156
loss: 1.0161670446395874,grad_norm: 0.7896618273123658, iteration: 71157
loss: 1.014060139656067,grad_norm: 0.9999993979784108, iteration: 71158
loss: 1.0353800058364868,grad_norm: 0.9999999046281587, iteration: 71159
loss: 1.0026897192001343,grad_norm: 0.9166956468850375, iteration: 71160
loss: 1.0252891778945923,grad_norm: 0.9251273455937056, iteration: 71161
loss: 1.015749454498291,grad_norm: 0.9290839858832809, iteration: 71162
loss: 0.988650918006897,grad_norm: 0.9999992030286928, iteration: 71163
loss: 1.0056182146072388,grad_norm: 0.9999991018020329, iteration: 71164
loss: 1.0013225078582764,grad_norm: 0.9260605968538727, iteration: 71165
loss: 1.0247316360473633,grad_norm: 0.9999991728949509, iteration: 71166
loss: 1.0275284051895142,grad_norm: 0.8532322627249344, iteration: 71167
loss: 1.004805326461792,grad_norm: 0.9996013926512818, iteration: 71168
loss: 0.9753560423851013,grad_norm: 0.9999990637515747, iteration: 71169
loss: 1.0301088094711304,grad_norm: 0.9511720687048947, iteration: 71170
loss: 1.0062142610549927,grad_norm: 0.9395327224639138, iteration: 71171
loss: 0.9871098399162292,grad_norm: 0.9243492526471991, iteration: 71172
loss: 0.9790869355201721,grad_norm: 0.8399956310310815, iteration: 71173
loss: 1.037750244140625,grad_norm: 0.9999993203890184, iteration: 71174
loss: 1.0154619216918945,grad_norm: 0.8499449975382, iteration: 71175
loss: 1.0132986307144165,grad_norm: 0.9999997329970656, iteration: 71176
loss: 1.3378186225891113,grad_norm: 0.9999998946377042, iteration: 71177
loss: 1.05039381980896,grad_norm: 0.9999990181178448, iteration: 71178
loss: 1.0100862979888916,grad_norm: 0.9999991479759964, iteration: 71179
loss: 0.9988565444946289,grad_norm: 0.9260577838633425, iteration: 71180
loss: 0.9628652930259705,grad_norm: 0.9406005368850604, iteration: 71181
loss: 1.0281226634979248,grad_norm: 0.9730014768477149, iteration: 71182
loss: 1.0275137424468994,grad_norm: 0.9501689867159822, iteration: 71183
loss: 1.0382081270217896,grad_norm: 0.9999995597929255, iteration: 71184
loss: 0.9867540597915649,grad_norm: 0.9331941741208225, iteration: 71185
loss: 1.0231019258499146,grad_norm: 0.9999991399177577, iteration: 71186
loss: 1.0205984115600586,grad_norm: 0.8759660568792514, iteration: 71187
loss: 0.9937060475349426,grad_norm: 0.9591563349996742, iteration: 71188
loss: 1.0175731182098389,grad_norm: 0.9999990297915919, iteration: 71189
loss: 0.9975780844688416,grad_norm: 0.8763236234407654, iteration: 71190
loss: 1.016313910484314,grad_norm: 0.9999991436950822, iteration: 71191
loss: 0.9939886927604675,grad_norm: 0.999999032354829, iteration: 71192
loss: 1.0296469926834106,grad_norm: 0.9999989717297268, iteration: 71193
loss: 0.9888641834259033,grad_norm: 0.9999990579298993, iteration: 71194
loss: 1.0074585676193237,grad_norm: 0.9999997653952503, iteration: 71195
loss: 1.099085807800293,grad_norm: 0.9999997449196014, iteration: 71196
loss: 0.964060366153717,grad_norm: 0.9999990975245328, iteration: 71197
loss: 1.0493805408477783,grad_norm: 0.9438336474915122, iteration: 71198
loss: 0.9579140543937683,grad_norm: 0.9674789572357488, iteration: 71199
loss: 1.0154587030410767,grad_norm: 0.826806029354709, iteration: 71200
loss: 1.0073661804199219,grad_norm: 0.9952448725602824, iteration: 71201
loss: 0.9947469234466553,grad_norm: 0.8850460389185828, iteration: 71202
loss: 0.9998997449874878,grad_norm: 0.9999990487100909, iteration: 71203
loss: 1.0292723178863525,grad_norm: 0.9313803449952128, iteration: 71204
loss: 1.0104795694351196,grad_norm: 0.9896512766627881, iteration: 71205
loss: 1.0205153226852417,grad_norm: 0.9145035892072478, iteration: 71206
loss: 0.964357852935791,grad_norm: 0.7478563570276855, iteration: 71207
loss: 1.0115888118743896,grad_norm: 0.999999225301593, iteration: 71208
loss: 0.9522865414619446,grad_norm: 0.8867225504799147, iteration: 71209
loss: 1.0189489126205444,grad_norm: 0.956933294412683, iteration: 71210
loss: 1.0332543849945068,grad_norm: 0.8057137646515855, iteration: 71211
loss: 0.9931434988975525,grad_norm: 0.9540676361006174, iteration: 71212
loss: 0.9910975098609924,grad_norm: 0.9999992469954336, iteration: 71213
loss: 1.0006279945373535,grad_norm: 0.8538090350159738, iteration: 71214
loss: 0.9912019371986389,grad_norm: 0.7704782716246388, iteration: 71215
loss: 0.9915162324905396,grad_norm: 0.9999989732185001, iteration: 71216
loss: 1.0722161531448364,grad_norm: 0.9999990483298927, iteration: 71217
loss: 0.9973227381706238,grad_norm: 0.9807187650626658, iteration: 71218
loss: 1.004157304763794,grad_norm: 0.7684814288894224, iteration: 71219
loss: 0.9978610873222351,grad_norm: 0.8410579922695802, iteration: 71220
loss: 0.9712026715278625,grad_norm: 0.9356304806651612, iteration: 71221
loss: 1.0347797870635986,grad_norm: 0.9516211122727681, iteration: 71222
loss: 1.0001935958862305,grad_norm: 0.8709546643512042, iteration: 71223
loss: 0.9970175623893738,grad_norm: 0.8441468390158823, iteration: 71224
loss: 1.0380303859710693,grad_norm: 0.9999990536038997, iteration: 71225
loss: 0.9954635500907898,grad_norm: 0.8323215039251801, iteration: 71226
loss: 0.9730408191680908,grad_norm: 0.8388802738542412, iteration: 71227
loss: 1.0169010162353516,grad_norm: 0.9045865273808625, iteration: 71228
loss: 1.0107051134109497,grad_norm: 0.9999993828673535, iteration: 71229
loss: 1.0740410089492798,grad_norm: 0.9999992811212299, iteration: 71230
loss: 1.025472640991211,grad_norm: 0.9999992856571542, iteration: 71231
loss: 0.9557079672813416,grad_norm: 0.959603456693373, iteration: 71232
loss: 0.9853944182395935,grad_norm: 0.8034804934942301, iteration: 71233
loss: 1.0172652006149292,grad_norm: 0.9999997779165568, iteration: 71234
loss: 1.0128686428070068,grad_norm: 0.8442256792812539, iteration: 71235
loss: 1.0732953548431396,grad_norm: 0.9999998253316797, iteration: 71236
loss: 0.9927355647087097,grad_norm: 0.999999556916237, iteration: 71237
loss: 1.0400527715682983,grad_norm: 0.9999992656638517, iteration: 71238
loss: 0.9810362458229065,grad_norm: 0.9059604854591183, iteration: 71239
loss: 0.9676687121391296,grad_norm: 0.9378968518280202, iteration: 71240
loss: 1.0454071760177612,grad_norm: 0.9999998893873724, iteration: 71241
loss: 0.9765747785568237,grad_norm: 0.8112904349100699, iteration: 71242
loss: 1.0156946182250977,grad_norm: 0.8287178383813335, iteration: 71243
loss: 0.9987919926643372,grad_norm: 0.9999990014422863, iteration: 71244
loss: 0.9632648825645447,grad_norm: 0.9408801605322294, iteration: 71245
loss: 1.0031670331954956,grad_norm: 0.9239797870042606, iteration: 71246
loss: 0.991026759147644,grad_norm: 0.9904250510689624, iteration: 71247
loss: 1.0283499956130981,grad_norm: 0.9999997510878782, iteration: 71248
loss: 1.0083509683609009,grad_norm: 0.9999993855893603, iteration: 71249
loss: 1.019307255744934,grad_norm: 0.7736878207330702, iteration: 71250
loss: 0.9939590692520142,grad_norm: 0.8574882237968542, iteration: 71251
loss: 0.9909488558769226,grad_norm: 0.9306243364197501, iteration: 71252
loss: 1.0121175050735474,grad_norm: 0.9999992381360329, iteration: 71253
loss: 0.9954591989517212,grad_norm: 0.9737072710813327, iteration: 71254
loss: 1.0221366882324219,grad_norm: 0.9999991626074168, iteration: 71255
loss: 1.023240327835083,grad_norm: 0.9696306268377981, iteration: 71256
loss: 0.9931851029396057,grad_norm: 0.9644803424526984, iteration: 71257
loss: 1.0099135637283325,grad_norm: 0.8763551812606795, iteration: 71258
loss: 0.9897160530090332,grad_norm: 0.9537570451374787, iteration: 71259
loss: 1.0067625045776367,grad_norm: 0.9999992065516546, iteration: 71260
loss: 1.0324724912643433,grad_norm: 0.9999994090993065, iteration: 71261
loss: 0.9905956983566284,grad_norm: 0.999999544391964, iteration: 71262
loss: 1.0868474245071411,grad_norm: 0.9179806832702795, iteration: 71263
loss: 0.9923666715621948,grad_norm: 0.942694894485417, iteration: 71264
loss: 1.029194712638855,grad_norm: 0.9999995984376632, iteration: 71265
loss: 0.968860924243927,grad_norm: 0.982074296516963, iteration: 71266
loss: 0.986224353313446,grad_norm: 0.7837813921482274, iteration: 71267
loss: 1.014742136001587,grad_norm: 0.9999991057016655, iteration: 71268
loss: 1.0222513675689697,grad_norm: 0.9055702451068728, iteration: 71269
loss: 1.0024092197418213,grad_norm: 0.9999991613917361, iteration: 71270
loss: 1.044602632522583,grad_norm: 0.9999994928250141, iteration: 71271
loss: 0.9998422861099243,grad_norm: 0.9999989762209868, iteration: 71272
loss: 0.9918561577796936,grad_norm: 0.932200305236358, iteration: 71273
loss: 0.9999632239341736,grad_norm: 0.9999999222621427, iteration: 71274
loss: 1.0217417478561401,grad_norm: 0.861755910930908, iteration: 71275
loss: 1.0103346109390259,grad_norm: 0.9109944150351821, iteration: 71276
loss: 1.0254006385803223,grad_norm: 0.9999993759874666, iteration: 71277
loss: 0.9543734788894653,grad_norm: 0.9999998517050259, iteration: 71278
loss: 0.9772724509239197,grad_norm: 0.9321712540520755, iteration: 71279
loss: 1.0301567316055298,grad_norm: 0.9999991053503144, iteration: 71280
loss: 0.9988559484481812,grad_norm: 0.9999995962572856, iteration: 71281
loss: 1.015260100364685,grad_norm: 0.9999991438849738, iteration: 71282
loss: 1.0128322839736938,grad_norm: 0.9999989620325356, iteration: 71283
loss: 1.0032157897949219,grad_norm: 0.9999990685682698, iteration: 71284
loss: 0.9724926352500916,grad_norm: 0.9532835433424686, iteration: 71285
loss: 1.1121565103530884,grad_norm: 0.9999992546660225, iteration: 71286
loss: 1.0269956588745117,grad_norm: 0.9999998531887507, iteration: 71287
loss: 1.1012386083602905,grad_norm: 0.9999995698988613, iteration: 71288
loss: 0.96148282289505,grad_norm: 0.9725506021265445, iteration: 71289
loss: 1.0512222051620483,grad_norm: 0.9999992051323924, iteration: 71290
loss: 0.9912591576576233,grad_norm: 0.9999993342002212, iteration: 71291
loss: 1.0278457403182983,grad_norm: 0.9999990764987077, iteration: 71292
loss: 1.0805073976516724,grad_norm: 0.9999990824837736, iteration: 71293
loss: 0.9911323189735413,grad_norm: 0.8648401033491746, iteration: 71294
loss: 1.082417368888855,grad_norm: 0.99999952090359, iteration: 71295
loss: 1.1024577617645264,grad_norm: 0.9999993053354256, iteration: 71296
loss: 1.0362218618392944,grad_norm: 0.9999993640384955, iteration: 71297
loss: 0.9861729145050049,grad_norm: 0.887409250834065, iteration: 71298
loss: 1.1477131843566895,grad_norm: 0.9999994358876396, iteration: 71299
loss: 1.0178276300430298,grad_norm: 0.9906523291522278, iteration: 71300
loss: 1.327665090560913,grad_norm: 0.9999998841322443, iteration: 71301
loss: 0.9732726216316223,grad_norm: 0.9999991329943876, iteration: 71302
loss: 1.0082091093063354,grad_norm: 0.7719069515654872, iteration: 71303
loss: 1.0430772304534912,grad_norm: 0.927574534461806, iteration: 71304
loss: 1.167196273803711,grad_norm: 0.9999997536729291, iteration: 71305
loss: 1.0387382507324219,grad_norm: 0.9999997551423997, iteration: 71306
loss: 1.0247360467910767,grad_norm: 0.999999078872349, iteration: 71307
loss: 0.9648196697235107,grad_norm: 0.9999990232423982, iteration: 71308
loss: 1.0114314556121826,grad_norm: 0.9848334037362485, iteration: 71309
loss: 1.0283048152923584,grad_norm: 0.9999996193486383, iteration: 71310
loss: 1.271872639656067,grad_norm: 0.9999993171812893, iteration: 71311
loss: 0.9843748211860657,grad_norm: 0.9999989640197247, iteration: 71312
loss: 1.0546789169311523,grad_norm: 0.986702330796417, iteration: 71313
loss: 1.0620752573013306,grad_norm: 0.996370634887777, iteration: 71314
loss: 1.0133726596832275,grad_norm: 0.880314503401238, iteration: 71315
loss: 1.1218255758285522,grad_norm: 0.999999377107684, iteration: 71316
loss: 1.1780074834823608,grad_norm: 0.9999999591636095, iteration: 71317
loss: 1.0597891807556152,grad_norm: 0.9999991978872432, iteration: 71318
loss: 0.9957828521728516,grad_norm: 0.8044586762469378, iteration: 71319
loss: 0.989459216594696,grad_norm: 0.9164447600672166, iteration: 71320
loss: 1.0910184383392334,grad_norm: 0.9999992739328862, iteration: 71321
loss: 1.031020164489746,grad_norm: 0.9604611193355396, iteration: 71322
loss: 0.9704095125198364,grad_norm: 0.9999991144027457, iteration: 71323
loss: 1.0873292684555054,grad_norm: 0.9999999094884302, iteration: 71324
loss: 1.0589427947998047,grad_norm: 0.9999990930392129, iteration: 71325
loss: 1.020197868347168,grad_norm: 1.0000000332143832, iteration: 71326
loss: 1.0944976806640625,grad_norm: 0.9999995338402765, iteration: 71327
loss: 0.9878134727478027,grad_norm: 0.9288553914294524, iteration: 71328
loss: 1.0440434217453003,grad_norm: 0.8829712492810367, iteration: 71329
loss: 1.1448241472244263,grad_norm: 0.999999654944489, iteration: 71330
loss: 0.988900899887085,grad_norm: 0.8667559490999532, iteration: 71331
loss: 1.1262162923812866,grad_norm: 0.9832441608652378, iteration: 71332
loss: 1.0686286687850952,grad_norm: 0.9999995235643273, iteration: 71333
loss: 1.0078133344650269,grad_norm: 0.9999997733066176, iteration: 71334
loss: 1.0539524555206299,grad_norm: 0.9094073514869632, iteration: 71335
loss: 1.1148545742034912,grad_norm: 0.999999084658061, iteration: 71336
loss: 1.0337681770324707,grad_norm: 0.9999992157517262, iteration: 71337
loss: 1.0385761260986328,grad_norm: 0.9332872693645051, iteration: 71338
loss: 1.0513365268707275,grad_norm: 0.9999996168613362, iteration: 71339
loss: 0.9811800122261047,grad_norm: 0.8347620849726934, iteration: 71340
loss: 1.032169222831726,grad_norm: 0.9999990761028809, iteration: 71341
loss: 1.0586082935333252,grad_norm: 0.9999991662875041, iteration: 71342
loss: 1.0705819129943848,grad_norm: 0.9999991367397146, iteration: 71343
loss: 0.9610902667045593,grad_norm: 0.9031236831225332, iteration: 71344
loss: 1.0230827331542969,grad_norm: 0.9999992225817241, iteration: 71345
loss: 1.1691700220108032,grad_norm: 0.9999994717649681, iteration: 71346
loss: 0.983154296875,grad_norm: 0.9750626330187746, iteration: 71347
loss: 0.997209906578064,grad_norm: 0.9999996727450506, iteration: 71348
loss: 0.9662464261054993,grad_norm: 0.9019811176568064, iteration: 71349
loss: 1.0091474056243896,grad_norm: 0.999999015983594, iteration: 71350
loss: 1.0167136192321777,grad_norm: 0.9999700876385189, iteration: 71351
loss: 1.0005172491073608,grad_norm: 0.9696648628688418, iteration: 71352
loss: 0.991413414478302,grad_norm: 0.9696786436600393, iteration: 71353
loss: 1.0264512300491333,grad_norm: 0.89238868115034, iteration: 71354
loss: 1.03083336353302,grad_norm: 0.8945986605973757, iteration: 71355
loss: 1.0306994915008545,grad_norm: 0.984882164861382, iteration: 71356
loss: 0.9803981781005859,grad_norm: 0.9999991015751248, iteration: 71357
loss: 1.0650235414505005,grad_norm: 0.8411696306750043, iteration: 71358
loss: 0.9983919858932495,grad_norm: 0.9999990611054671, iteration: 71359
loss: 1.0275213718414307,grad_norm: 0.9572339948989974, iteration: 71360
loss: 1.1307761669158936,grad_norm: 0.9999994927947946, iteration: 71361
loss: 1.0355026721954346,grad_norm: 0.9132944623018067, iteration: 71362
loss: 0.9733821153640747,grad_norm: 0.9454701638790971, iteration: 71363
loss: 1.0202819108963013,grad_norm: 0.9999997137837485, iteration: 71364
loss: 1.0124865770339966,grad_norm: 0.8074464158581114, iteration: 71365
loss: 0.9920474886894226,grad_norm: 0.9999992629191521, iteration: 71366
loss: 1.0079634189605713,grad_norm: 0.9803964975986844, iteration: 71367
loss: 1.011273980140686,grad_norm: 0.9999992084302648, iteration: 71368
loss: 1.0413718223571777,grad_norm: 0.8178165063544519, iteration: 71369
loss: 0.971865177154541,grad_norm: 0.7651880792454138, iteration: 71370
loss: 1.1617431640625,grad_norm: 0.9999991917949289, iteration: 71371
loss: 1.0238245725631714,grad_norm: 0.8512247512551666, iteration: 71372
loss: 1.0239888429641724,grad_norm: 0.9040796985428381, iteration: 71373
loss: 0.9761694669723511,grad_norm: 0.9999996475059484, iteration: 71374
loss: 0.9975809454917908,grad_norm: 0.8790749521883561, iteration: 71375
loss: 1.007822036743164,grad_norm: 0.8899571934944667, iteration: 71376
loss: 1.001338243484497,grad_norm: 0.9901913654845559, iteration: 71377
loss: 1.029473066329956,grad_norm: 0.9999991877729666, iteration: 71378
loss: 0.9678143262863159,grad_norm: 0.9999991898244739, iteration: 71379
loss: 1.0387860536575317,grad_norm: 0.9965022789178434, iteration: 71380
loss: 1.0101872682571411,grad_norm: 0.9999990960513037, iteration: 71381
loss: 1.0214606523513794,grad_norm: 0.9999991028694553, iteration: 71382
loss: 1.0178251266479492,grad_norm: 0.9999992091788876, iteration: 71383
loss: 1.11342453956604,grad_norm: 0.9999991137616067, iteration: 71384
loss: 1.0056757926940918,grad_norm: 0.837894886526448, iteration: 71385
loss: 0.970956027507782,grad_norm: 0.9999991442813544, iteration: 71386
loss: 1.0462889671325684,grad_norm: 0.9999994108269638, iteration: 71387
loss: 1.053178310394287,grad_norm: 0.8787359645204751, iteration: 71388
loss: 1.0445806980133057,grad_norm: 0.9999994692885239, iteration: 71389
loss: 0.9977741241455078,grad_norm: 0.9055483993018775, iteration: 71390
loss: 1.0133074522018433,grad_norm: 0.9999991341399849, iteration: 71391
loss: 1.016086459159851,grad_norm: 0.9999992687268786, iteration: 71392
loss: 1.1337974071502686,grad_norm: 0.9999997732731556, iteration: 71393
loss: 1.0233111381530762,grad_norm: 0.9479034483632282, iteration: 71394
loss: 0.9964792728424072,grad_norm: 0.9999999482741013, iteration: 71395
loss: 1.0497187376022339,grad_norm: 0.853338092893885, iteration: 71396
loss: 0.9752591252326965,grad_norm: 0.9584527546946561, iteration: 71397
loss: 1.0246440172195435,grad_norm: 0.8337851339879253, iteration: 71398
loss: 1.013049840927124,grad_norm: 0.9247201177161475, iteration: 71399
loss: 1.0057799816131592,grad_norm: 0.912692944226973, iteration: 71400
loss: 1.0255061388015747,grad_norm: 0.9999990522335714, iteration: 71401
loss: 1.006177544593811,grad_norm: 0.954977419723614, iteration: 71402
loss: 0.980108916759491,grad_norm: 0.8081651346621969, iteration: 71403
loss: 1.0035526752471924,grad_norm: 0.9518808089647965, iteration: 71404
loss: 1.0046441555023193,grad_norm: 0.9999992864043425, iteration: 71405
loss: 1.0176547765731812,grad_norm: 0.8033273762951587, iteration: 71406
loss: 1.0088108777999878,grad_norm: 0.9999991125962744, iteration: 71407
loss: 1.0855929851531982,grad_norm: 0.9999997261653576, iteration: 71408
loss: 1.031139612197876,grad_norm: 0.9999991111313598, iteration: 71409
loss: 1.0432616472244263,grad_norm: 0.9426271924986225, iteration: 71410
loss: 1.0173943042755127,grad_norm: 0.8986708225249402, iteration: 71411
loss: 1.0406885147094727,grad_norm: 0.9024571759737042, iteration: 71412
loss: 1.0816982984542847,grad_norm: 0.9999997514489039, iteration: 71413
loss: 0.9678276777267456,grad_norm: 0.8434258748001511, iteration: 71414
loss: 0.9869377017021179,grad_norm: 0.9999990637085546, iteration: 71415
loss: 1.0434445142745972,grad_norm: 0.9999990564363508, iteration: 71416
loss: 1.04513418674469,grad_norm: 0.9999992125032267, iteration: 71417
loss: 0.9904016852378845,grad_norm: 0.8906930946900818, iteration: 71418
loss: 1.0557655096054077,grad_norm: 0.9999990368604407, iteration: 71419
loss: 1.0216690301895142,grad_norm: 0.8487169116800464, iteration: 71420
loss: 0.9947177171707153,grad_norm: 0.8497048869853914, iteration: 71421
loss: 0.9914298057556152,grad_norm: 0.8600839063463301, iteration: 71422
loss: 1.0127782821655273,grad_norm: 0.7338334377064037, iteration: 71423
loss: 1.0077273845672607,grad_norm: 0.9999991157385214, iteration: 71424
loss: 1.0179409980773926,grad_norm: 0.9999990907282167, iteration: 71425
loss: 0.9835968017578125,grad_norm: 0.9999992429543857, iteration: 71426
loss: 1.0046801567077637,grad_norm: 0.992549596933015, iteration: 71427
loss: 1.0089263916015625,grad_norm: 0.8153014691083027, iteration: 71428
loss: 1.1079505681991577,grad_norm: 0.9999991150927315, iteration: 71429
loss: 0.9675336480140686,grad_norm: 0.9999991120303291, iteration: 71430
loss: 0.9966343641281128,grad_norm: 0.7668715310823341, iteration: 71431
loss: 0.9921402335166931,grad_norm: 0.9699521597543965, iteration: 71432
loss: 0.9746707677841187,grad_norm: 0.8352030968971078, iteration: 71433
loss: 0.9830878376960754,grad_norm: 0.9280235924825757, iteration: 71434
loss: 1.056746244430542,grad_norm: 0.9999994334613546, iteration: 71435
loss: 0.9684599041938782,grad_norm: 0.999999597935712, iteration: 71436
loss: 1.0144517421722412,grad_norm: 0.9295809839703485, iteration: 71437
loss: 1.2840224504470825,grad_norm: 0.9999995612428879, iteration: 71438
loss: 1.0286707878112793,grad_norm: 0.9999991917601846, iteration: 71439
loss: 1.0100716352462769,grad_norm: 0.8010052867396407, iteration: 71440
loss: 1.003658652305603,grad_norm: 0.9793203983375617, iteration: 71441
loss: 1.0289697647094727,grad_norm: 0.8144079514112132, iteration: 71442
loss: 0.9880538582801819,grad_norm: 0.8547721118553878, iteration: 71443
loss: 1.0352542400360107,grad_norm: 0.9999989919762132, iteration: 71444
loss: 1.0730258226394653,grad_norm: 0.9999996621324074, iteration: 71445
loss: 0.9947831034660339,grad_norm: 0.9999991181797531, iteration: 71446
loss: 0.9791255593299866,grad_norm: 0.8597309832065863, iteration: 71447
loss: 0.9990192651748657,grad_norm: 0.9999993572113369, iteration: 71448
loss: 0.9728588461875916,grad_norm: 0.9487297014635273, iteration: 71449
loss: 1.0212126970291138,grad_norm: 0.920056723986721, iteration: 71450
loss: 1.0234806537628174,grad_norm: 0.8846143509768071, iteration: 71451
loss: 1.0156679153442383,grad_norm: 0.9107013569655885, iteration: 71452
loss: 1.0637606382369995,grad_norm: 0.8191172172829662, iteration: 71453
loss: 1.0213857889175415,grad_norm: 0.8347709912020628, iteration: 71454
loss: 1.0301936864852905,grad_norm: 0.9999992960776865, iteration: 71455
loss: 1.072825312614441,grad_norm: 0.9999991417322641, iteration: 71456
loss: 1.0398281812667847,grad_norm: 0.9999989800713499, iteration: 71457
loss: 1.069153070449829,grad_norm: 0.9999997753388913, iteration: 71458
loss: 1.0273044109344482,grad_norm: 0.9999991842142526, iteration: 71459
loss: 1.0350199937820435,grad_norm: 0.999999523232031, iteration: 71460
loss: 1.062843680381775,grad_norm: 0.9999991607826061, iteration: 71461
loss: 0.9933959245681763,grad_norm: 0.9999992475457709, iteration: 71462
loss: 0.9911195635795593,grad_norm: 0.9999995166370425, iteration: 71463
loss: 1.0307234525680542,grad_norm: 0.9999994271433051, iteration: 71464
loss: 1.0389426946640015,grad_norm: 0.9467669448938235, iteration: 71465
loss: 1.0510151386260986,grad_norm: 0.9999990899281183, iteration: 71466
loss: 1.022078037261963,grad_norm: 0.8760279470638419, iteration: 71467
loss: 1.2076791524887085,grad_norm: 0.9999998950117631, iteration: 71468
loss: 0.9594821333885193,grad_norm: 0.7529421889257544, iteration: 71469
loss: 1.0296392440795898,grad_norm: 0.9999993532017474, iteration: 71470
loss: 1.0260485410690308,grad_norm: 0.8583258460954166, iteration: 71471
loss: 0.9810932874679565,grad_norm: 0.9999992431023756, iteration: 71472
loss: 0.9966904520988464,grad_norm: 0.9999990025460159, iteration: 71473
loss: 0.9969270825386047,grad_norm: 0.9999991036078566, iteration: 71474
loss: 0.9917449355125427,grad_norm: 0.8497164426175132, iteration: 71475
loss: 0.984333872795105,grad_norm: 0.9615907042622946, iteration: 71476
loss: 1.0205602645874023,grad_norm: 0.9077794441508692, iteration: 71477
loss: 1.000885248184204,grad_norm: 0.9756424322861549, iteration: 71478
loss: 1.0045710802078247,grad_norm: 0.9999991625531108, iteration: 71479
loss: 1.001720666885376,grad_norm: 0.937291651849852, iteration: 71480
loss: 1.0099174976348877,grad_norm: 0.9999995191783081, iteration: 71481
loss: 1.0054982900619507,grad_norm: 0.9999996855232096, iteration: 71482
loss: 1.0476487874984741,grad_norm: 0.999999183648648, iteration: 71483
loss: 1.0012567043304443,grad_norm: 0.8442242313476932, iteration: 71484
loss: 1.0185599327087402,grad_norm: 0.9646901843495671, iteration: 71485
loss: 0.9626379609107971,grad_norm: 0.8738763161676133, iteration: 71486
loss: 1.041736125946045,grad_norm: 0.9999999813190225, iteration: 71487
loss: 1.0889360904693604,grad_norm: 0.999999934128681, iteration: 71488
loss: 0.9874444007873535,grad_norm: 0.999999113044782, iteration: 71489
loss: 1.034242868423462,grad_norm: 0.9999995453062119, iteration: 71490
loss: 1.0134614706039429,grad_norm: 0.9801252459148664, iteration: 71491
loss: 1.021841049194336,grad_norm: 0.9999995272865574, iteration: 71492
loss: 1.0533709526062012,grad_norm: 0.9999994787315126, iteration: 71493
loss: 1.0478249788284302,grad_norm: 0.9999992699489993, iteration: 71494
loss: 1.0072622299194336,grad_norm: 0.8254730040815322, iteration: 71495
loss: 1.0892860889434814,grad_norm: 0.999999145754042, iteration: 71496
loss: 1.0572075843811035,grad_norm: 0.9999993930006551, iteration: 71497
loss: 1.1056897640228271,grad_norm: 0.9905984789372919, iteration: 71498
loss: 1.0348377227783203,grad_norm: 0.9999990518524522, iteration: 71499
loss: 1.0007163286209106,grad_norm: 0.9338463277612887, iteration: 71500
loss: 0.9416619539260864,grad_norm: 0.9241646588873097, iteration: 71501
loss: 1.1148566007614136,grad_norm: 0.9999992743304588, iteration: 71502
loss: 0.9747072458267212,grad_norm: 0.8774424943590758, iteration: 71503
loss: 1.0186712741851807,grad_norm: 0.9999994849046533, iteration: 71504
loss: 1.0055632591247559,grad_norm: 0.9079211599736725, iteration: 71505
loss: 1.0216866731643677,grad_norm: 0.7978118889078434, iteration: 71506
loss: 1.0468428134918213,grad_norm: 0.9999992667253934, iteration: 71507
loss: 1.0423672199249268,grad_norm: 0.9673979576163216, iteration: 71508
loss: 0.9617733955383301,grad_norm: 0.99999915972628, iteration: 71509
loss: 1.0404384136199951,grad_norm: 0.9999991455164575, iteration: 71510
loss: 1.0081579685211182,grad_norm: 0.9342878412086776, iteration: 71511
loss: 1.0048261880874634,grad_norm: 0.9999995427037369, iteration: 71512
loss: 1.023244857788086,grad_norm: 0.8103701293858179, iteration: 71513
loss: 1.0023623704910278,grad_norm: 0.9474861142902724, iteration: 71514
loss: 1.095523476600647,grad_norm: 0.9999997489598736, iteration: 71515
loss: 0.9955535531044006,grad_norm: 0.8146331820143551, iteration: 71516
loss: 0.9677291512489319,grad_norm: 0.9999990983824357, iteration: 71517
loss: 1.033666729927063,grad_norm: 0.9999994343903619, iteration: 71518
loss: 1.0146832466125488,grad_norm: 0.99999908346395, iteration: 71519
loss: 1.0241237878799438,grad_norm: 0.9951143464695038, iteration: 71520
loss: 1.0446749925613403,grad_norm: 0.8584211897140592, iteration: 71521
loss: 0.980785071849823,grad_norm: 0.9999996052941377, iteration: 71522
loss: 0.9697858095169067,grad_norm: 0.785872594332078, iteration: 71523
loss: 0.9604721665382385,grad_norm: 0.9999991329897471, iteration: 71524
loss: 0.9929757714271545,grad_norm: 0.9392951912788798, iteration: 71525
loss: 0.998005747795105,grad_norm: 0.9190586482649491, iteration: 71526
loss: 1.0386736392974854,grad_norm: 0.9999990139046331, iteration: 71527
loss: 0.9459862112998962,grad_norm: 0.8488078134175973, iteration: 71528
loss: 1.0599861145019531,grad_norm: 0.9999994648468937, iteration: 71529
loss: 0.9602258801460266,grad_norm: 0.9726114145639886, iteration: 71530
loss: 0.992780327796936,grad_norm: 0.9500115042828083, iteration: 71531
loss: 1.0095914602279663,grad_norm: 0.9999991821893397, iteration: 71532
loss: 0.996110200881958,grad_norm: 0.8861941158641174, iteration: 71533
loss: 0.9756408333778381,grad_norm: 0.8802277891941487, iteration: 71534
loss: 1.045526146888733,grad_norm: 0.999999051859601, iteration: 71535
loss: 1.0010524988174438,grad_norm: 0.974756640028351, iteration: 71536
loss: 1.042535424232483,grad_norm: 0.958673063236296, iteration: 71537
loss: 1.0321274995803833,grad_norm: 0.8989559853269833, iteration: 71538
loss: 0.9768065214157104,grad_norm: 0.9999991225272473, iteration: 71539
loss: 0.9996772408485413,grad_norm: 0.9999992521504026, iteration: 71540
loss: 1.0145843029022217,grad_norm: 0.9829126956883573, iteration: 71541
loss: 1.0342448949813843,grad_norm: 0.9314629505972724, iteration: 71542
loss: 1.0619741678237915,grad_norm: 0.9999995940735873, iteration: 71543
loss: 0.9942020177841187,grad_norm: 0.9344469835189165, iteration: 71544
loss: 0.939157247543335,grad_norm: 0.9999989650120109, iteration: 71545
loss: 0.9515727758407593,grad_norm: 0.9999992650511204, iteration: 71546
loss: 1.009623408317566,grad_norm: 0.999999280161486, iteration: 71547
loss: 1.0113778114318848,grad_norm: 0.9999995869381906, iteration: 71548
loss: 1.0339915752410889,grad_norm: 0.9999990136836678, iteration: 71549
loss: 0.975811779499054,grad_norm: 0.9706823634917567, iteration: 71550
loss: 0.9997326731681824,grad_norm: 0.8921820488664528, iteration: 71551
loss: 1.0199286937713623,grad_norm: 0.9806328081611181, iteration: 71552
loss: 0.996764600276947,grad_norm: 0.9966242066687487, iteration: 71553
loss: 0.993135392665863,grad_norm: 0.9999989464667836, iteration: 71554
loss: 1.0156309604644775,grad_norm: 0.9999995706548203, iteration: 71555
loss: 0.9813399314880371,grad_norm: 0.9999991387802639, iteration: 71556
loss: 1.0040462017059326,grad_norm: 0.9294288644374418, iteration: 71557
loss: 1.0602326393127441,grad_norm: 0.9999994590588546, iteration: 71558
loss: 1.0046652555465698,grad_norm: 0.9619083543022544, iteration: 71559
loss: 1.0588550567626953,grad_norm: 0.9999996956145798, iteration: 71560
loss: 1.0046215057373047,grad_norm: 0.9999991709901563, iteration: 71561
loss: 1.000808596611023,grad_norm: 0.7875112037534973, iteration: 71562
loss: 0.998315155506134,grad_norm: 0.944635651961356, iteration: 71563
loss: 1.0058308839797974,grad_norm: 0.9999995687503859, iteration: 71564
loss: 1.0241988897323608,grad_norm: 0.9553993920219991, iteration: 71565
loss: 0.9569486379623413,grad_norm: 0.9999990292104655, iteration: 71566
loss: 1.0008004903793335,grad_norm: 0.8903938265802108, iteration: 71567
loss: 0.9873024821281433,grad_norm: 0.9547156885432962, iteration: 71568
loss: 0.9885702133178711,grad_norm: 0.8964797500078577, iteration: 71569
loss: 0.990043044090271,grad_norm: 0.9999990309033866, iteration: 71570
loss: 1.0046238899230957,grad_norm: 0.7570561607697859, iteration: 71571
loss: 1.0240527391433716,grad_norm: 0.9999998795549511, iteration: 71572
loss: 1.0026085376739502,grad_norm: 0.9999991898840092, iteration: 71573
loss: 1.1356143951416016,grad_norm: 0.9788302744734856, iteration: 71574
loss: 0.9994003176689148,grad_norm: 0.8793551691169295, iteration: 71575
loss: 1.062457799911499,grad_norm: 0.9999996873467192, iteration: 71576
loss: 0.982201337814331,grad_norm: 0.9999990181871599, iteration: 71577
loss: 1.0079859495162964,grad_norm: 0.8480832413890186, iteration: 71578
loss: 1.0228184461593628,grad_norm: 0.999999152427073, iteration: 71579
loss: 0.9890738725662231,grad_norm: 0.9449968000691669, iteration: 71580
loss: 0.9903040528297424,grad_norm: 0.8345608736251444, iteration: 71581
loss: 0.9903416633605957,grad_norm: 0.9751983562436352, iteration: 71582
loss: 1.0377137660980225,grad_norm: 0.9999996045408083, iteration: 71583
loss: 1.0132824182510376,grad_norm: 0.9999992966084167, iteration: 71584
loss: 0.9945861101150513,grad_norm: 0.9688108196828358, iteration: 71585
loss: 1.0367298126220703,grad_norm: 0.9999996061379276, iteration: 71586
loss: 0.9650971293449402,grad_norm: 0.999999112434904, iteration: 71587
loss: 0.9936143159866333,grad_norm: 0.9456074842702317, iteration: 71588
loss: 1.0102258920669556,grad_norm: 0.9602225290320946, iteration: 71589
loss: 1.0137509107589722,grad_norm: 0.9999994972794191, iteration: 71590
loss: 0.9949318170547485,grad_norm: 0.8995032239594842, iteration: 71591
loss: 0.998400092124939,grad_norm: 0.8152635413868654, iteration: 71592
loss: 1.0949372053146362,grad_norm: 0.9999990611320911, iteration: 71593
loss: 0.9635965824127197,grad_norm: 0.8780011477586502, iteration: 71594
loss: 1.0154458284378052,grad_norm: 0.9999992182127292, iteration: 71595
loss: 1.0199002027511597,grad_norm: 0.9379235574107334, iteration: 71596
loss: 0.9813703298568726,grad_norm: 0.9457585044856059, iteration: 71597
loss: 0.9887564778327942,grad_norm: 0.9999996316873583, iteration: 71598
loss: 1.1098803281784058,grad_norm: 0.999999218770808, iteration: 71599
loss: 1.087903380393982,grad_norm: 0.9999990816862424, iteration: 71600
loss: 0.9826540946960449,grad_norm: 0.9999992158155011, iteration: 71601
loss: 1.071617603302002,grad_norm: 0.9999996642163954, iteration: 71602
loss: 1.030841588973999,grad_norm: 0.9999990267735362, iteration: 71603
loss: 0.9896643161773682,grad_norm: 0.9961333769950096, iteration: 71604
loss: 1.0094295740127563,grad_norm: 0.9922278855791056, iteration: 71605
loss: 1.0139319896697998,grad_norm: 0.8823163543921572, iteration: 71606
loss: 1.0401735305786133,grad_norm: 0.9999991624018603, iteration: 71607
loss: 1.0148674249649048,grad_norm: 0.9999990860164184, iteration: 71608
loss: 1.0354607105255127,grad_norm: 0.999999155617601, iteration: 71609
loss: 0.9922608137130737,grad_norm: 0.9999991339137304, iteration: 71610
loss: 1.030462622642517,grad_norm: 0.9999997673087808, iteration: 71611
loss: 1.0167006254196167,grad_norm: 0.9588463977994351, iteration: 71612
loss: 0.9741117358207703,grad_norm: 0.999999158359502, iteration: 71613
loss: 0.9963554739952087,grad_norm: 0.9999994458912651, iteration: 71614
loss: 0.9900503754615784,grad_norm: 0.9660551326687803, iteration: 71615
loss: 1.0082017183303833,grad_norm: 0.8436603897888031, iteration: 71616
loss: 1.0928983688354492,grad_norm: 0.9999996612498925, iteration: 71617
loss: 0.9638787508010864,grad_norm: 0.9552651565983054, iteration: 71618
loss: 0.9878286719322205,grad_norm: 0.8726456126952203, iteration: 71619
loss: 1.0598074197769165,grad_norm: 0.9999993009029498, iteration: 71620
loss: 0.9727495908737183,grad_norm: 0.9999991863178276, iteration: 71621
loss: 0.9986591339111328,grad_norm: 0.88634762086254, iteration: 71622
loss: 1.0185924768447876,grad_norm: 0.8960326936043042, iteration: 71623
loss: 1.0191514492034912,grad_norm: 0.9999996942272776, iteration: 71624
loss: 0.9700978994369507,grad_norm: 0.9999990689270288, iteration: 71625
loss: 1.0420204401016235,grad_norm: 0.9999994535865294, iteration: 71626
loss: 0.9779519438743591,grad_norm: 0.9372048931520567, iteration: 71627
loss: 1.0053166151046753,grad_norm: 0.9999992934804869, iteration: 71628
loss: 1.014237642288208,grad_norm: 0.8481603377209852, iteration: 71629
loss: 0.9760113954544067,grad_norm: 0.9999991695608625, iteration: 71630
loss: 1.0067867040634155,grad_norm: 0.9042688953577284, iteration: 71631
loss: 1.04023015499115,grad_norm: 0.9999994369478356, iteration: 71632
loss: 0.9637638926506042,grad_norm: 0.9999990923942986, iteration: 71633
loss: 1.0465116500854492,grad_norm: 0.999999967867854, iteration: 71634
loss: 0.9983789324760437,grad_norm: 0.9355603389611813, iteration: 71635
loss: 1.0026257038116455,grad_norm: 0.9999992923884726, iteration: 71636
loss: 0.990898609161377,grad_norm: 0.8408031357772687, iteration: 71637
loss: 1.0086133480072021,grad_norm: 0.8987778258393456, iteration: 71638
loss: 0.9815871119499207,grad_norm: 0.999999087582967, iteration: 71639
loss: 0.9973995089530945,grad_norm: 0.999999925618246, iteration: 71640
loss: 0.9880112409591675,grad_norm: 0.9999993222020856, iteration: 71641
loss: 1.0012426376342773,grad_norm: 0.9067008635356796, iteration: 71642
loss: 0.9935970306396484,grad_norm: 0.9487864560024479, iteration: 71643
loss: 0.9958924055099487,grad_norm: 0.8370630075391438, iteration: 71644
loss: 1.018017292022705,grad_norm: 0.9999991416807548, iteration: 71645
loss: 0.9757707715034485,grad_norm: 0.9122924722358142, iteration: 71646
loss: 1.0047821998596191,grad_norm: 0.841733076397841, iteration: 71647
loss: 0.9861173629760742,grad_norm: 0.8717993760847507, iteration: 71648
loss: 0.9369186758995056,grad_norm: 0.9999991423874863, iteration: 71649
loss: 0.9589305520057678,grad_norm: 0.9609729088121339, iteration: 71650
loss: 1.01999032497406,grad_norm: 0.9999994820044423, iteration: 71651
loss: 1.037198781967163,grad_norm: 0.9999992757768461, iteration: 71652
loss: 1.0329995155334473,grad_norm: 0.9999992244874023, iteration: 71653
loss: 0.9831947684288025,grad_norm: 0.9999991055556459, iteration: 71654
loss: 0.9634824395179749,grad_norm: 0.9999991566356768, iteration: 71655
loss: 0.9703989624977112,grad_norm: 0.9479066151144858, iteration: 71656
loss: 0.998317301273346,grad_norm: 0.9768087094361048, iteration: 71657
loss: 0.9953373670578003,grad_norm: 0.7154432137603118, iteration: 71658
loss: 1.0354194641113281,grad_norm: 0.9999991316966182, iteration: 71659
loss: 0.9972549676895142,grad_norm: 0.9999991133248562, iteration: 71660
loss: 0.9599356055259705,grad_norm: 0.9999991889971581, iteration: 71661
loss: 1.003934383392334,grad_norm: 0.8375415978967704, iteration: 71662
loss: 0.9879971146583557,grad_norm: 0.9999993323643085, iteration: 71663
loss: 1.0416203737258911,grad_norm: 0.9999997195804975, iteration: 71664
loss: 0.9899830222129822,grad_norm: 0.8798926374814487, iteration: 71665
loss: 1.0206050872802734,grad_norm: 0.8804008481772508, iteration: 71666
loss: 0.9981086850166321,grad_norm: 0.8173090596619698, iteration: 71667
loss: 0.9989305734634399,grad_norm: 0.9999989745815552, iteration: 71668
loss: 0.9661091566085815,grad_norm: 0.9778055935838368, iteration: 71669
loss: 1.0029106140136719,grad_norm: 0.889678718517877, iteration: 71670
loss: 1.0343716144561768,grad_norm: 0.9999994089191324, iteration: 71671
loss: 0.9805036187171936,grad_norm: 0.9999991886736155, iteration: 71672
loss: 1.0590447187423706,grad_norm: 0.9999990046957302, iteration: 71673
loss: 0.9920763373374939,grad_norm: 0.9999995254671807, iteration: 71674
loss: 0.9886611104011536,grad_norm: 0.9255385491556186, iteration: 71675
loss: 0.980198323726654,grad_norm: 0.8736564174686425, iteration: 71676
loss: 0.9963648915290833,grad_norm: 0.9995997522830603, iteration: 71677
loss: 0.954078733921051,grad_norm: 0.9999992827116623, iteration: 71678
loss: 0.9915273785591125,grad_norm: 0.9999990383143792, iteration: 71679
loss: 1.0248980522155762,grad_norm: 0.998390324964388, iteration: 71680
loss: 1.0266262292861938,grad_norm: 0.9500747363820762, iteration: 71681
loss: 1.0176481008529663,grad_norm: 0.9999993311561745, iteration: 71682
loss: 0.9692904353141785,grad_norm: 0.8401635802108846, iteration: 71683
loss: 1.0240763425827026,grad_norm: 0.8032150373714799, iteration: 71684
loss: 1.0094540119171143,grad_norm: 0.9176616175795456, iteration: 71685
loss: 1.0102366209030151,grad_norm: 0.9999990680052625, iteration: 71686
loss: 1.0192930698394775,grad_norm: 0.8513779718388722, iteration: 71687
loss: 1.010800838470459,grad_norm: 0.9013790575866378, iteration: 71688
loss: 1.0024667978286743,grad_norm: 0.9999990633891686, iteration: 71689
loss: 1.0214035511016846,grad_norm: 0.9419005902709697, iteration: 71690
loss: 1.0162056684494019,grad_norm: 0.9999991577523925, iteration: 71691
loss: 1.0080362558364868,grad_norm: 0.9255670667247852, iteration: 71692
loss: 1.0445549488067627,grad_norm: 0.9395312155527992, iteration: 71693
loss: 1.036361813545227,grad_norm: 0.9006880773647913, iteration: 71694
loss: 1.0237575769424438,grad_norm: 0.952016315500083, iteration: 71695
loss: 0.9807770848274231,grad_norm: 0.8411863534629715, iteration: 71696
loss: 0.9947806596755981,grad_norm: 0.9665185237010927, iteration: 71697
loss: 0.9693049788475037,grad_norm: 0.8875436574081801, iteration: 71698
loss: 1.0841954946517944,grad_norm: 0.9999994544227262, iteration: 71699
loss: 1.0096477270126343,grad_norm: 0.9278081318932504, iteration: 71700
loss: 0.9645136594772339,grad_norm: 0.9999990219056795, iteration: 71701
loss: 0.9595640301704407,grad_norm: 0.9263232550528537, iteration: 71702
loss: 1.0402125120162964,grad_norm: 0.9999990601950663, iteration: 71703
loss: 0.9993392825126648,grad_norm: 0.8483258332352773, iteration: 71704
loss: 0.9756821393966675,grad_norm: 0.9999991894117176, iteration: 71705
loss: 0.9831175804138184,grad_norm: 0.8105304978868759, iteration: 71706
loss: 1.0071463584899902,grad_norm: 0.9842350200645456, iteration: 71707
loss: 1.0093131065368652,grad_norm: 0.9088206974388119, iteration: 71708
loss: 0.9829793572425842,grad_norm: 0.9999992692313291, iteration: 71709
loss: 0.9680320024490356,grad_norm: 0.9815998070408917, iteration: 71710
loss: 0.9987812638282776,grad_norm: 0.9999996660519281, iteration: 71711
loss: 0.9847185015678406,grad_norm: 0.9999991496648186, iteration: 71712
loss: 1.0271068811416626,grad_norm: 0.9999992347169285, iteration: 71713
loss: 1.0666985511779785,grad_norm: 0.9999994582824513, iteration: 71714
loss: 0.9860774278640747,grad_norm: 0.9999991510676244, iteration: 71715
loss: 0.9857379198074341,grad_norm: 0.8823448479963646, iteration: 71716
loss: 1.0360363721847534,grad_norm: 0.9999994129936965, iteration: 71717
loss: 0.9673454761505127,grad_norm: 0.9285802464373831, iteration: 71718
loss: 1.0449576377868652,grad_norm: 0.9999995128935083, iteration: 71719
loss: 0.9736180901527405,grad_norm: 0.9999992907542474, iteration: 71720
loss: 0.9885320067405701,grad_norm: 0.9999991676992108, iteration: 71721
loss: 1.0699889659881592,grad_norm: 0.9999992337160751, iteration: 71722
loss: 1.0312880277633667,grad_norm: 0.9993183591564283, iteration: 71723
loss: 1.0163418054580688,grad_norm: 0.9999991110550728, iteration: 71724
loss: 0.9717628955841064,grad_norm: 0.8554370809170918, iteration: 71725
loss: 1.0414862632751465,grad_norm: 0.9999993766655897, iteration: 71726
loss: 1.0279958248138428,grad_norm: 0.984036066224226, iteration: 71727
loss: 0.9715254902839661,grad_norm: 0.952340652999585, iteration: 71728
loss: 1.0404441356658936,grad_norm: 0.8332775925429518, iteration: 71729
loss: 1.0128084421157837,grad_norm: 0.9051545840195064, iteration: 71730
loss: 0.9974521398544312,grad_norm: 0.9494576285318491, iteration: 71731
loss: 0.9832502007484436,grad_norm: 0.7865076962770031, iteration: 71732
loss: 1.0376948118209839,grad_norm: 0.8545413797690582, iteration: 71733
loss: 1.033365249633789,grad_norm: 0.9999992570615271, iteration: 71734
loss: 1.0278030633926392,grad_norm: 0.8583146424968207, iteration: 71735
loss: 1.0162452459335327,grad_norm: 0.9999992059394055, iteration: 71736
loss: 0.9800352454185486,grad_norm: 0.9138973879026954, iteration: 71737
loss: 0.9979057312011719,grad_norm: 0.8958154046170043, iteration: 71738
loss: 1.0119259357452393,grad_norm: 0.9999990949263164, iteration: 71739
loss: 0.963972806930542,grad_norm: 0.9748245855126048, iteration: 71740
loss: 1.0046793222427368,grad_norm: 0.9777528046173812, iteration: 71741
loss: 0.9875152707099915,grad_norm: 0.9070752352387556, iteration: 71742
loss: 0.9940979480743408,grad_norm: 0.9504265752874109, iteration: 71743
loss: 0.9672823548316956,grad_norm: 0.9999991638935244, iteration: 71744
loss: 1.029159426689148,grad_norm: 0.9999991315959434, iteration: 71745
loss: 1.0137492418289185,grad_norm: 0.9999995663647328, iteration: 71746
loss: 0.9651739597320557,grad_norm: 0.9719323135755341, iteration: 71747
loss: 1.008108139038086,grad_norm: 0.9076694404378903, iteration: 71748
loss: 0.9896649122238159,grad_norm: 0.8763873818243848, iteration: 71749
loss: 1.088436245918274,grad_norm: 0.9999992407765104, iteration: 71750
loss: 0.9734188914299011,grad_norm: 0.9999996416301825, iteration: 71751
loss: 0.9952706694602966,grad_norm: 0.8611478769072339, iteration: 71752
loss: 1.023792028427124,grad_norm: 0.9999989609882795, iteration: 71753
loss: 1.0124342441558838,grad_norm: 0.869208933396643, iteration: 71754
loss: 0.9807152152061462,grad_norm: 0.9999996326424175, iteration: 71755
loss: 1.1546365022659302,grad_norm: 0.999999518137019, iteration: 71756
loss: 1.0097932815551758,grad_norm: 0.9999992246839449, iteration: 71757
loss: 1.0012706518173218,grad_norm: 0.9999991213836704, iteration: 71758
loss: 0.9670946002006531,grad_norm: 0.8249197963982866, iteration: 71759
loss: 0.9906516671180725,grad_norm: 0.9999989537810887, iteration: 71760
loss: 1.0923885107040405,grad_norm: 0.9999995095063473, iteration: 71761
loss: 0.9821392297744751,grad_norm: 0.919713730532392, iteration: 71762
loss: 1.0363783836364746,grad_norm: 0.9601881533584098, iteration: 71763
loss: 0.9979787468910217,grad_norm: 0.7610129441107412, iteration: 71764
loss: 1.071574330329895,grad_norm: 0.9999992544650553, iteration: 71765
loss: 1.0231908559799194,grad_norm: 0.9584693624522812, iteration: 71766
loss: 0.9999225735664368,grad_norm: 0.8082542486388149, iteration: 71767
loss: 0.9822054505348206,grad_norm: 0.9999991402495363, iteration: 71768
loss: 0.9512792229652405,grad_norm: 0.8812435258393551, iteration: 71769
loss: 0.9775276780128479,grad_norm: 0.9999994533319613, iteration: 71770
loss: 1.0000364780426025,grad_norm: 0.8022392549356977, iteration: 71771
loss: 1.0086551904678345,grad_norm: 0.8827156746458982, iteration: 71772
loss: 1.0492078065872192,grad_norm: 0.9999998568661331, iteration: 71773
loss: 0.9977154731750488,grad_norm: 0.9999992680067292, iteration: 71774
loss: 1.0060341358184814,grad_norm: 0.8662216509574862, iteration: 71775
loss: 1.0004541873931885,grad_norm: 0.9727171659035881, iteration: 71776
loss: 0.9823064208030701,grad_norm: 0.7906891649907551, iteration: 71777
loss: 1.010520339012146,grad_norm: 0.9364102757424032, iteration: 71778
loss: 0.968654453754425,grad_norm: 0.8664473635280855, iteration: 71779
loss: 1.0450905561447144,grad_norm: 0.9119133798093981, iteration: 71780
loss: 1.0164940357208252,grad_norm: 0.9749391519706618, iteration: 71781
loss: 0.9820506572723389,grad_norm: 0.9745494781673071, iteration: 71782
loss: 0.961921215057373,grad_norm: 0.9999990621480417, iteration: 71783
loss: 1.0136842727661133,grad_norm: 0.7845056144582708, iteration: 71784
loss: 1.1140460968017578,grad_norm: 0.9999993044983008, iteration: 71785
loss: 1.0153883695602417,grad_norm: 0.9999991075407962, iteration: 71786
loss: 1.0064665079116821,grad_norm: 0.999999067589526, iteration: 71787
loss: 1.0827207565307617,grad_norm: 0.999999545124011, iteration: 71788
loss: 1.033048152923584,grad_norm: 0.8586685812380851, iteration: 71789
loss: 1.0248370170593262,grad_norm: 0.8872498533041464, iteration: 71790
loss: 1.1627390384674072,grad_norm: 0.9999995468224294, iteration: 71791
loss: 0.998626708984375,grad_norm: 0.9511493585943439, iteration: 71792
loss: 0.9903107285499573,grad_norm: 0.8309581129769473, iteration: 71793
loss: 0.9980314373970032,grad_norm: 0.9999992287523444, iteration: 71794
loss: 1.001704216003418,grad_norm: 0.999998837126206, iteration: 71795
loss: 1.019346833229065,grad_norm: 0.7809899335309701, iteration: 71796
loss: 1.0042225122451782,grad_norm: 0.9589888690899476, iteration: 71797
loss: 1.0164092779159546,grad_norm: 0.7948396340860024, iteration: 71798
loss: 0.9891685247421265,grad_norm: 0.9999990300630447, iteration: 71799
loss: 1.0954209566116333,grad_norm: 0.9999992676696746, iteration: 71800
loss: 0.9835286736488342,grad_norm: 0.9999992249393793, iteration: 71801
loss: 1.036436915397644,grad_norm: 0.9999995702000662, iteration: 71802
loss: 0.9788050055503845,grad_norm: 0.9724394730679897, iteration: 71803
loss: 1.0213489532470703,grad_norm: 0.9999991174757532, iteration: 71804
loss: 1.0275537967681885,grad_norm: 0.9999992933198365, iteration: 71805
loss: 1.0933500528335571,grad_norm: 1.0000000965465414, iteration: 71806
loss: 1.016480565071106,grad_norm: 0.9999995588499022, iteration: 71807
loss: 0.9795199036598206,grad_norm: 0.9999992573021039, iteration: 71808
loss: 1.0093307495117188,grad_norm: 0.9999993773919135, iteration: 71809
loss: 1.0223296880722046,grad_norm: 0.9999991098727302, iteration: 71810
loss: 1.0012723207473755,grad_norm: 0.9619210281654476, iteration: 71811
loss: 1.007481575012207,grad_norm: 0.7518777487243632, iteration: 71812
loss: 1.0005671977996826,grad_norm: 0.9999990294823125, iteration: 71813
loss: 0.9895100593566895,grad_norm: 0.9999990476841943, iteration: 71814
loss: 0.9767348766326904,grad_norm: 0.8696683034868431, iteration: 71815
loss: 1.037618637084961,grad_norm: 0.9999989931794377, iteration: 71816
loss: 1.088233232498169,grad_norm: 0.9999995434392064, iteration: 71817
loss: 0.9696889519691467,grad_norm: 0.9869869262403235, iteration: 71818
loss: 1.033898949623108,grad_norm: 0.8885369098548922, iteration: 71819
loss: 0.9720423817634583,grad_norm: 0.8248027181033007, iteration: 71820
loss: 0.9855032563209534,grad_norm: 0.8903105276348079, iteration: 71821
loss: 1.0061309337615967,grad_norm: 0.8057123620212622, iteration: 71822
loss: 0.9914894700050354,grad_norm: 0.9999991825476416, iteration: 71823
loss: 1.0396510362625122,grad_norm: 0.9999991723654488, iteration: 71824
loss: 1.0045344829559326,grad_norm: 0.8255083050021051, iteration: 71825
loss: 0.9995130300521851,grad_norm: 0.9854757258074421, iteration: 71826
loss: 1.0094178915023804,grad_norm: 0.9422993476525395, iteration: 71827
loss: 1.0232635736465454,grad_norm: 0.9999991874605249, iteration: 71828
loss: 1.0608144998550415,grad_norm: 0.9999997795216193, iteration: 71829
loss: 1.0266386270523071,grad_norm: 0.9999990644067807, iteration: 71830
loss: 0.9867116808891296,grad_norm: 0.8633217816759078, iteration: 71831
loss: 1.041540503501892,grad_norm: 0.8885691320099678, iteration: 71832
loss: 0.9824225902557373,grad_norm: 0.8531313126498296, iteration: 71833
loss: 0.9930355548858643,grad_norm: 0.9999990876468586, iteration: 71834
loss: 1.039736032485962,grad_norm: 0.8976741130406019, iteration: 71835
loss: 0.9639769792556763,grad_norm: 0.9999991808638576, iteration: 71836
loss: 1.0045082569122314,grad_norm: 0.9727356503478485, iteration: 71837
loss: 1.0267739295959473,grad_norm: 0.8179457190835788, iteration: 71838
loss: 1.0353100299835205,grad_norm: 0.9999995846535523, iteration: 71839
loss: 1.0138901472091675,grad_norm: 0.8974735207363962, iteration: 71840
loss: 1.0722103118896484,grad_norm: 0.9999995601275535, iteration: 71841
loss: 1.0047900676727295,grad_norm: 0.9999998784307168, iteration: 71842
loss: 1.0267688035964966,grad_norm: 0.8139268973287713, iteration: 71843
loss: 0.9770358204841614,grad_norm: 0.9999993834755129, iteration: 71844
loss: 1.0141555070877075,grad_norm: 0.8582540513685865, iteration: 71845
loss: 1.0202972888946533,grad_norm: 0.9999992029666055, iteration: 71846
loss: 1.0163614749908447,grad_norm: 0.9119822939126678, iteration: 71847
loss: 1.0171858072280884,grad_norm: 0.8988806061637243, iteration: 71848
loss: 0.9814502596855164,grad_norm: 0.8971504372318737, iteration: 71849
loss: 1.0183994770050049,grad_norm: 0.999999324505682, iteration: 71850
loss: 1.0280171632766724,grad_norm: 0.9540035069384115, iteration: 71851
loss: 1.1705070734024048,grad_norm: 0.9999995080604193, iteration: 71852
loss: 0.9904253482818604,grad_norm: 0.8323830666097661, iteration: 71853
loss: 0.9914515018463135,grad_norm: 0.9999991477407222, iteration: 71854
loss: 0.9860494136810303,grad_norm: 0.9036666035026428, iteration: 71855
loss: 1.009245753288269,grad_norm: 0.8435278702802823, iteration: 71856
loss: 0.9679356217384338,grad_norm: 0.7875187306106948, iteration: 71857
loss: 0.9927605986595154,grad_norm: 0.7727546630738277, iteration: 71858
loss: 0.9865677952766418,grad_norm: 0.9999992481083063, iteration: 71859
loss: 1.020728349685669,grad_norm: 0.9999992824380157, iteration: 71860
loss: 0.9613186120986938,grad_norm: 0.9999991954372235, iteration: 71861
loss: 1.0263975858688354,grad_norm: 0.9328524557864876, iteration: 71862
loss: 1.0229634046554565,grad_norm: 0.9999992734416971, iteration: 71863
loss: 0.958849310874939,grad_norm: 0.9999991582284911, iteration: 71864
loss: 1.008080005645752,grad_norm: 0.8324167190863139, iteration: 71865
loss: 1.0417143106460571,grad_norm: 0.9999998758231989, iteration: 71866
loss: 0.9885855317115784,grad_norm: 0.9302221792756447, iteration: 71867
loss: 1.0191880464553833,grad_norm: 0.9363014420345345, iteration: 71868
loss: 0.9527982473373413,grad_norm: 0.9999990089056429, iteration: 71869
loss: 1.0050067901611328,grad_norm: 0.9999991865908369, iteration: 71870
loss: 1.0307501554489136,grad_norm: 0.8833472396924801, iteration: 71871
loss: 0.9625895619392395,grad_norm: 0.9640016658807667, iteration: 71872
loss: 1.0684353113174438,grad_norm: 0.9999990565266508, iteration: 71873
loss: 1.0517724752426147,grad_norm: 0.9070678710879253, iteration: 71874
loss: 1.0135936737060547,grad_norm: 0.8385715042523512, iteration: 71875
loss: 1.0478878021240234,grad_norm: 0.9999991065971271, iteration: 71876
loss: 0.963827908039093,grad_norm: 0.8908605555139509, iteration: 71877
loss: 1.0000134706497192,grad_norm: 0.8692961857073619, iteration: 71878
loss: 1.013620376586914,grad_norm: 0.9487952037282643, iteration: 71879
loss: 0.980696976184845,grad_norm: 0.9308357795493228, iteration: 71880
loss: 1.04340398311615,grad_norm: 0.980360574327699, iteration: 71881
loss: 0.9944966435432434,grad_norm: 0.8723422303460647, iteration: 71882
loss: 0.9808595180511475,grad_norm: 0.9999993655062226, iteration: 71883
loss: 0.9995982050895691,grad_norm: 0.8833934920848032, iteration: 71884
loss: 1.0708088874816895,grad_norm: 0.9999991630995783, iteration: 71885
loss: 1.0519697666168213,grad_norm: 0.9327177262046336, iteration: 71886
loss: 1.0139657258987427,grad_norm: 0.906697323547818, iteration: 71887
loss: 1.0197112560272217,grad_norm: 0.9223511606860231, iteration: 71888
loss: 1.0055508613586426,grad_norm: 0.8208368080997535, iteration: 71889
loss: 0.9700117707252502,grad_norm: 0.9938818406230894, iteration: 71890
loss: 1.0096313953399658,grad_norm: 0.9953597322454028, iteration: 71891
loss: 1.0371239185333252,grad_norm: 0.9999992136380017, iteration: 71892
loss: 0.9759957790374756,grad_norm: 0.998914356631991, iteration: 71893
loss: 0.9941077828407288,grad_norm: 0.799561437964861, iteration: 71894
loss: 1.0140659809112549,grad_norm: 0.9999992270838551, iteration: 71895
loss: 0.9875620603561401,grad_norm: 0.9914992687141256, iteration: 71896
loss: 0.9566489458084106,grad_norm: 0.9242847578801503, iteration: 71897
loss: 1.0699050426483154,grad_norm: 0.9999993766755197, iteration: 71898
loss: 0.9935563206672668,grad_norm: 0.9999991313562953, iteration: 71899
loss: 1.041578769683838,grad_norm: 0.9999995108986398, iteration: 71900
loss: 1.0184770822525024,grad_norm: 0.9288839031090984, iteration: 71901
loss: 1.0208553075790405,grad_norm: 0.9999990658112856, iteration: 71902
loss: 0.981779158115387,grad_norm: 0.8163975323478433, iteration: 71903
loss: 1.0210024118423462,grad_norm: 0.9999991311377134, iteration: 71904
loss: 0.9692656397819519,grad_norm: 0.9999992206132218, iteration: 71905
loss: 1.0037003755569458,grad_norm: 0.9999994627154145, iteration: 71906
loss: 1.0106672048568726,grad_norm: 0.9999991950933719, iteration: 71907
loss: 1.0374027490615845,grad_norm: 0.9999992506616303, iteration: 71908
loss: 0.9979819655418396,grad_norm: 0.9834488423689063, iteration: 71909
loss: 0.9814850091934204,grad_norm: 0.9999992056195427, iteration: 71910
loss: 0.9972432255744934,grad_norm: 0.9999992438296965, iteration: 71911
loss: 0.9670186042785645,grad_norm: 0.999999169237707, iteration: 71912
loss: 0.9836357235908508,grad_norm: 0.9999991079936212, iteration: 71913
loss: 1.0289911031723022,grad_norm: 0.9999995565900381, iteration: 71914
loss: 1.0173169374465942,grad_norm: 0.9717055011266469, iteration: 71915
loss: 0.9967141151428223,grad_norm: 0.8402413944469908, iteration: 71916
loss: 0.9974843859672546,grad_norm: 0.9999990704792837, iteration: 71917
loss: 1.050990343093872,grad_norm: 0.9999992707425348, iteration: 71918
loss: 0.9789192080497742,grad_norm: 0.9999989986340603, iteration: 71919
loss: 1.0027817487716675,grad_norm: 0.9999994245843066, iteration: 71920
loss: 1.0368133783340454,grad_norm: 0.9999991156320497, iteration: 71921
loss: 0.9907598495483398,grad_norm: 0.9876074464471051, iteration: 71922
loss: 0.9857876896858215,grad_norm: 0.8517945623487053, iteration: 71923
loss: 0.9679393172264099,grad_norm: 0.9999991727572491, iteration: 71924
loss: 0.9733190536499023,grad_norm: 0.8791231879247084, iteration: 71925
loss: 0.9975945353507996,grad_norm: 0.9999996406709114, iteration: 71926
loss: 1.0155881643295288,grad_norm: 0.9419691397948801, iteration: 71927
loss: 0.9750902652740479,grad_norm: 0.876291496163953, iteration: 71928
loss: 1.1781905889511108,grad_norm: 0.9999996183700688, iteration: 71929
loss: 0.9740126729011536,grad_norm: 0.8672635529615412, iteration: 71930
loss: 0.9484608769416809,grad_norm: 0.8311176607079783, iteration: 71931
loss: 1.0103925466537476,grad_norm: 0.8970836022740977, iteration: 71932
loss: 1.0045031309127808,grad_norm: 0.9588803801285548, iteration: 71933
loss: 0.9742639660835266,grad_norm: 0.9999991659067089, iteration: 71934
loss: 0.9919573068618774,grad_norm: 0.8517197759471293, iteration: 71935
loss: 1.0568323135375977,grad_norm: 0.8450123212971958, iteration: 71936
loss: 1.0620250701904297,grad_norm: 0.9999997847975751, iteration: 71937
loss: 1.0296430587768555,grad_norm: 0.9999990479217481, iteration: 71938
loss: 1.0065909624099731,grad_norm: 0.9999991613684704, iteration: 71939
loss: 0.9856460094451904,grad_norm: 0.999999483990513, iteration: 71940
loss: 1.0283807516098022,grad_norm: 0.9999990245899278, iteration: 71941
loss: 1.044792652130127,grad_norm: 0.9999991601073162, iteration: 71942
loss: 1.0224008560180664,grad_norm: 0.9999997944272004, iteration: 71943
loss: 0.9957805275917053,grad_norm: 0.9812159585202463, iteration: 71944
loss: 0.9850702881813049,grad_norm: 0.9225439910317598, iteration: 71945
loss: 1.0257105827331543,grad_norm: 0.9344178813047487, iteration: 71946
loss: 1.0243929624557495,grad_norm: 0.999999214753683, iteration: 71947
loss: 0.9894751310348511,grad_norm: 0.9915693065740542, iteration: 71948
loss: 1.0079501867294312,grad_norm: 0.9056672916571409, iteration: 71949
loss: 1.0167375802993774,grad_norm: 0.9999991210923131, iteration: 71950
loss: 1.0273281335830688,grad_norm: 0.9999990903134274, iteration: 71951
loss: 1.0178697109222412,grad_norm: 0.9999993097721427, iteration: 71952
loss: 1.0734074115753174,grad_norm: 0.9999993587484575, iteration: 71953
loss: 1.0184210538864136,grad_norm: 0.8562096291132704, iteration: 71954
loss: 1.0042227506637573,grad_norm: 0.9592275903953909, iteration: 71955
loss: 1.0218620300292969,grad_norm: 0.7402256925301091, iteration: 71956
loss: 1.0320346355438232,grad_norm: 0.8880186007988543, iteration: 71957
loss: 0.9853844046592712,grad_norm: 0.9999991230678784, iteration: 71958
loss: 1.0181620121002197,grad_norm: 0.9999992811971185, iteration: 71959
loss: 1.0374983549118042,grad_norm: 0.9853650023655195, iteration: 71960
loss: 1.002294659614563,grad_norm: 0.9851561598926043, iteration: 71961
loss: 0.9866793751716614,grad_norm: 0.9904541605793933, iteration: 71962
loss: 0.9898322224617004,grad_norm: 0.9999991521136098, iteration: 71963
loss: 0.9989722371101379,grad_norm: 0.9447245758253874, iteration: 71964
loss: 1.007440209388733,grad_norm: 0.8385168460857844, iteration: 71965
loss: 1.0051867961883545,grad_norm: 0.8042364580697258, iteration: 71966
loss: 0.9949283599853516,grad_norm: 0.9320370163314932, iteration: 71967
loss: 0.9782124161720276,grad_norm: 0.9999991880018176, iteration: 71968
loss: 0.99765545129776,grad_norm: 0.9999993587123696, iteration: 71969
loss: 0.9894969463348389,grad_norm: 0.938074967202575, iteration: 71970
loss: 1.0032029151916504,grad_norm: 0.9027067349740342, iteration: 71971
loss: 0.9797847867012024,grad_norm: 0.8872595262951287, iteration: 71972
loss: 1.0180598497390747,grad_norm: 0.7639174720836533, iteration: 71973
loss: 1.073488712310791,grad_norm: 0.9999998470531413, iteration: 71974
loss: 1.0272992849349976,grad_norm: 0.9598238557783954, iteration: 71975
loss: 1.005407452583313,grad_norm: 0.9511489971996326, iteration: 71976
loss: 0.9972608685493469,grad_norm: 0.9703126154707882, iteration: 71977
loss: 1.000929832458496,grad_norm: 0.9999993188444805, iteration: 71978
loss: 1.0213356018066406,grad_norm: 0.9999991966751369, iteration: 71979
loss: 0.9760836362838745,grad_norm: 0.9076804051127587, iteration: 71980
loss: 1.0141621828079224,grad_norm: 0.9173039614646236, iteration: 71981
loss: 1.0122162103652954,grad_norm: 0.9999997021211262, iteration: 71982
loss: 1.0115355253219604,grad_norm: 0.9999992782897951, iteration: 71983
loss: 0.9603910446166992,grad_norm: 0.9112872529193217, iteration: 71984
loss: 0.9926374554634094,grad_norm: 0.9742221930765781, iteration: 71985
loss: 1.0440359115600586,grad_norm: 0.9999993488920381, iteration: 71986
loss: 0.9924678802490234,grad_norm: 0.9999990890700349, iteration: 71987
loss: 1.054248332977295,grad_norm: 0.9999992071644215, iteration: 71988
loss: 0.9537498950958252,grad_norm: 0.8927738835703356, iteration: 71989
loss: 1.0070109367370605,grad_norm: 0.9999999067979932, iteration: 71990
loss: 1.0131546258926392,grad_norm: 0.9569905928392534, iteration: 71991
loss: 0.9880335330963135,grad_norm: 0.999999117932743, iteration: 71992
loss: 0.9989705681800842,grad_norm: 0.7628547258750906, iteration: 71993
loss: 1.0485105514526367,grad_norm: 0.9999991614038841, iteration: 71994
loss: 1.068372368812561,grad_norm: 0.9620914682878317, iteration: 71995
loss: 1.0129058361053467,grad_norm: 0.8762332548405558, iteration: 71996
loss: 1.2029350996017456,grad_norm: 0.999999838112642, iteration: 71997
loss: 1.0311418771743774,grad_norm: 0.7403109149056595, iteration: 71998
loss: 0.9930445551872253,grad_norm: 0.9999998282628916, iteration: 71999
loss: 0.9837540984153748,grad_norm: 0.9999993336453236, iteration: 72000
loss: 0.9785200953483582,grad_norm: 0.9632112661908688, iteration: 72001
loss: 0.9867929220199585,grad_norm: 0.9999991533014678, iteration: 72002
loss: 0.9907727241516113,grad_norm: 0.8076090945616435, iteration: 72003
loss: 1.0048186779022217,grad_norm: 0.9999992288174242, iteration: 72004
loss: 0.9804964065551758,grad_norm: 0.9260266013215843, iteration: 72005
loss: 1.0285056829452515,grad_norm: 0.9999989877422507, iteration: 72006
loss: 0.9867950081825256,grad_norm: 0.9375519422477744, iteration: 72007
loss: 1.016640543937683,grad_norm: 0.9639020048839099, iteration: 72008
loss: 1.0116651058197021,grad_norm: 0.9360378564297889, iteration: 72009
loss: 0.99828040599823,grad_norm: 0.9999989601294599, iteration: 72010
loss: 1.1056667566299438,grad_norm: 0.9999993520220095, iteration: 72011
loss: 1.0089069604873657,grad_norm: 0.9999989463345812, iteration: 72012
loss: 0.9892880320549011,grad_norm: 0.9999999041609727, iteration: 72013
loss: 0.9698860049247742,grad_norm: 0.9999999634419604, iteration: 72014
loss: 0.9846036434173584,grad_norm: 0.9787719023731086, iteration: 72015
loss: 1.02118718624115,grad_norm: 0.9999997704772935, iteration: 72016
loss: 1.006361961364746,grad_norm: 0.7995991059947161, iteration: 72017
loss: 0.9833191633224487,grad_norm: 0.735439493268764, iteration: 72018
loss: 1.006738543510437,grad_norm: 0.899055982523362, iteration: 72019
loss: 1.000874400138855,grad_norm: 0.9874309042768252, iteration: 72020
loss: 0.9826412200927734,grad_norm: 0.8441071545151408, iteration: 72021
loss: 0.9817753434181213,grad_norm: 0.9306876081885471, iteration: 72022
loss: 1.0242608785629272,grad_norm: 0.8783410472898626, iteration: 72023
loss: 0.9713404178619385,grad_norm: 0.8893282988505167, iteration: 72024
loss: 1.0114911794662476,grad_norm: 0.9999991529628419, iteration: 72025
loss: 1.015721082687378,grad_norm: 0.9906982425454283, iteration: 72026
loss: 1.0580620765686035,grad_norm: 0.999999701875158, iteration: 72027
loss: 0.9790030121803284,grad_norm: 0.9999991783801383, iteration: 72028
loss: 1.0513163805007935,grad_norm: 0.9999995423090714, iteration: 72029
loss: 1.0300794839859009,grad_norm: 0.999999552862887, iteration: 72030
loss: 1.0509909391403198,grad_norm: 0.8605151397112464, iteration: 72031
loss: 0.9665700793266296,grad_norm: 0.8229714420897479, iteration: 72032
loss: 1.0719752311706543,grad_norm: 0.9999992176296744, iteration: 72033
loss: 1.0382099151611328,grad_norm: 0.9999993579487597, iteration: 72034
loss: 1.0076006650924683,grad_norm: 0.8110332659967981, iteration: 72035
loss: 1.0700161457061768,grad_norm: 0.9999991236331119, iteration: 72036
loss: 0.9915043711662292,grad_norm: 0.9578631000129173, iteration: 72037
loss: 1.0482336282730103,grad_norm: 0.9900245163656118, iteration: 72038
loss: 1.0495604276657104,grad_norm: 0.9999998163408022, iteration: 72039
loss: 1.0695230960845947,grad_norm: 0.9999995224879694, iteration: 72040
loss: 1.0500142574310303,grad_norm: 0.9999994092823277, iteration: 72041
loss: 0.9842484593391418,grad_norm: 0.9816766988857806, iteration: 72042
loss: 1.0748445987701416,grad_norm: 0.9999993795338774, iteration: 72043
loss: 0.9922424554824829,grad_norm: 0.7107094034201106, iteration: 72044
loss: 0.995008647441864,grad_norm: 0.7837062962101057, iteration: 72045
loss: 1.121725082397461,grad_norm: 0.9999992644030258, iteration: 72046
loss: 1.0343115329742432,grad_norm: 0.999999125305346, iteration: 72047
loss: 0.9901450276374817,grad_norm: 0.917835556347987, iteration: 72048
loss: 1.0045952796936035,grad_norm: 0.9469839264022912, iteration: 72049
loss: 1.0215100049972534,grad_norm: 0.8712079580740032, iteration: 72050
loss: 1.012879729270935,grad_norm: 0.8604418312271026, iteration: 72051
loss: 1.0166651010513306,grad_norm: 0.9561944068385206, iteration: 72052
loss: 1.0150221586227417,grad_norm: 0.9999996264047758, iteration: 72053
loss: 1.0657461881637573,grad_norm: 0.9999991803331965, iteration: 72054
loss: 1.027747392654419,grad_norm: 0.9999995959386262, iteration: 72055
loss: 1.0454730987548828,grad_norm: 0.9999998488195072, iteration: 72056
loss: 0.9769701957702637,grad_norm: 0.9999991339803195, iteration: 72057
loss: 1.0029432773590088,grad_norm: 0.7205169938670711, iteration: 72058
loss: 1.086013913154602,grad_norm: 0.9987414319020839, iteration: 72059
loss: 1.0845788717269897,grad_norm: 0.9999993919892637, iteration: 72060
loss: 1.0310325622558594,grad_norm: 0.9999995433679263, iteration: 72061
loss: 1.0231215953826904,grad_norm: 0.9385349389763241, iteration: 72062
loss: 1.0126280784606934,grad_norm: 0.9999990837157594, iteration: 72063
loss: 1.0120222568511963,grad_norm: 0.9999990723527685, iteration: 72064
loss: 1.0673445463180542,grad_norm: 0.9999990986104524, iteration: 72065
loss: 1.0324848890304565,grad_norm: 0.8119656233005823, iteration: 72066
loss: 1.0169715881347656,grad_norm: 0.9999992840641186, iteration: 72067
loss: 0.9756491184234619,grad_norm: 0.9999990557397272, iteration: 72068
loss: 0.994695246219635,grad_norm: 0.9458707849739192, iteration: 72069
loss: 1.033153772354126,grad_norm: 0.9999996588640427, iteration: 72070
loss: 0.9563357830047607,grad_norm: 0.9484749504118483, iteration: 72071
loss: 1.0274419784545898,grad_norm: 0.9090247051638652, iteration: 72072
loss: 0.9992581605911255,grad_norm: 0.9999992540973885, iteration: 72073
loss: 0.9561671018600464,grad_norm: 0.8584170765679252, iteration: 72074
loss: 1.0095200538635254,grad_norm: 0.9999997184200432, iteration: 72075
loss: 1.0138680934906006,grad_norm: 0.8774179287844646, iteration: 72076
loss: 0.9714773893356323,grad_norm: 0.9070647985008617, iteration: 72077
loss: 1.003973364830017,grad_norm: 0.9999990123313695, iteration: 72078
loss: 0.9576043486595154,grad_norm: 0.9999991276747926, iteration: 72079
loss: 1.0153287649154663,grad_norm: 0.9999992102937898, iteration: 72080
loss: 1.096319556236267,grad_norm: 0.999999170044944, iteration: 72081
loss: 1.036840558052063,grad_norm: 0.9999996158403313, iteration: 72082
loss: 1.0128589868545532,grad_norm: 0.8347510993431612, iteration: 72083
loss: 1.0336229801177979,grad_norm: 0.9999989248574755, iteration: 72084
loss: 1.0478081703186035,grad_norm: 0.9999996610450405, iteration: 72085
loss: 1.035413384437561,grad_norm: 0.9999997286759185, iteration: 72086
loss: 0.9834966063499451,grad_norm: 0.9999990794447301, iteration: 72087
loss: 0.9982566237449646,grad_norm: 0.8679353641115001, iteration: 72088
loss: 1.014910101890564,grad_norm: 0.9999994065395738, iteration: 72089
loss: 1.026762843132019,grad_norm: 0.9999995874089661, iteration: 72090
loss: 1.0048798322677612,grad_norm: 0.9999989114525983, iteration: 72091
loss: 1.0090147256851196,grad_norm: 0.9105687200339427, iteration: 72092
loss: 0.989635705947876,grad_norm: 0.9811185638412739, iteration: 72093
loss: 1.0119836330413818,grad_norm: 0.9079530496029032, iteration: 72094
loss: 0.9819689393043518,grad_norm: 0.9369904290399297, iteration: 72095
loss: 1.0246179103851318,grad_norm: 0.8811685855561344, iteration: 72096
loss: 1.0003678798675537,grad_norm: 0.9999993817239077, iteration: 72097
loss: 0.9794068336486816,grad_norm: 0.9999990079948861, iteration: 72098
loss: 0.9703290462493896,grad_norm: 0.9226364338139156, iteration: 72099
loss: 1.0045464038848877,grad_norm: 0.836691830698281, iteration: 72100
loss: 1.0779895782470703,grad_norm: 0.9999995860094738, iteration: 72101
loss: 1.0008821487426758,grad_norm: 0.8719936331299635, iteration: 72102
loss: 1.0333961248397827,grad_norm: 0.9999992733307892, iteration: 72103
loss: 1.068933129310608,grad_norm: 0.999999367847504, iteration: 72104
loss: 1.031723976135254,grad_norm: 0.986033826815449, iteration: 72105
loss: 1.0233933925628662,grad_norm: 0.9479684153977646, iteration: 72106
loss: 1.0128768682479858,grad_norm: 0.9999990439531672, iteration: 72107
loss: 1.00055992603302,grad_norm: 0.9999993101439317, iteration: 72108
loss: 0.9982736110687256,grad_norm: 0.9577554973878072, iteration: 72109
loss: 0.9740852117538452,grad_norm: 0.9686326670194803, iteration: 72110
loss: 0.9794649481773376,grad_norm: 0.9999991975472822, iteration: 72111
loss: 1.0181649923324585,grad_norm: 0.9934423788642716, iteration: 72112
loss: 0.9903238415718079,grad_norm: 0.9999994208101803, iteration: 72113
loss: 1.0286188125610352,grad_norm: 0.9266803841166628, iteration: 72114
loss: 1.051906704902649,grad_norm: 0.999999776177909, iteration: 72115
loss: 1.007053017616272,grad_norm: 0.9999993529560541, iteration: 72116
loss: 1.016356110572815,grad_norm: 0.927132459115532, iteration: 72117
loss: 0.9863170385360718,grad_norm: 0.9868782117616427, iteration: 72118
loss: 1.0233957767486572,grad_norm: 0.873215492322079, iteration: 72119
loss: 1.021140456199646,grad_norm: 0.9999992281549378, iteration: 72120
loss: 1.00130033493042,grad_norm: 0.9999990926715919, iteration: 72121
loss: 0.9831269383430481,grad_norm: 0.9475813067664716, iteration: 72122
loss: 1.030493140220642,grad_norm: 0.9999993853515367, iteration: 72123
loss: 1.0560698509216309,grad_norm: 0.9082569760627861, iteration: 72124
loss: 1.037717580795288,grad_norm: 0.9734147077000166, iteration: 72125
loss: 1.072595238685608,grad_norm: 0.9999991578015829, iteration: 72126
loss: 1.0089234113693237,grad_norm: 0.9090635861760643, iteration: 72127
loss: 1.0552855730056763,grad_norm: 0.9999998143741119, iteration: 72128
loss: 1.0531656742095947,grad_norm: 0.9999993616079644, iteration: 72129
loss: 0.9291211366653442,grad_norm: 0.9999990601454243, iteration: 72130
loss: 1.015586256980896,grad_norm: 0.8159396254630894, iteration: 72131
loss: 1.020772933959961,grad_norm: 0.8830712320998656, iteration: 72132
loss: 0.99811190366745,grad_norm: 0.9999993227894833, iteration: 72133
loss: 1.039350152015686,grad_norm: 0.9342826459152699, iteration: 72134
loss: 1.0109901428222656,grad_norm: 0.96210593465116, iteration: 72135
loss: 0.9574913382530212,grad_norm: 0.9999992657443767, iteration: 72136
loss: 0.9847967028617859,grad_norm: 0.880757009962444, iteration: 72137
loss: 1.0107550621032715,grad_norm: 0.8847675919042544, iteration: 72138
loss: 1.0489445924758911,grad_norm: 0.9999995581997582, iteration: 72139
loss: 1.0313465595245361,grad_norm: 0.8971172877682645, iteration: 72140
loss: 1.0002262592315674,grad_norm: 0.8714976926277518, iteration: 72141
loss: 1.0091514587402344,grad_norm: 0.9999997901070479, iteration: 72142
loss: 1.0395313501358032,grad_norm: 0.9999993969296601, iteration: 72143
loss: 1.001046061515808,grad_norm: 0.9999994765266258, iteration: 72144
loss: 1.0371443033218384,grad_norm: 0.95252561088892, iteration: 72145
loss: 1.0397915840148926,grad_norm: 0.9999998656909759, iteration: 72146
loss: 0.9812218546867371,grad_norm: 0.9427740152458818, iteration: 72147
loss: 1.035369634628296,grad_norm: 0.9341014607867175, iteration: 72148
loss: 1.0047392845153809,grad_norm: 0.9407715759540525, iteration: 72149
loss: 1.0444313287734985,grad_norm: 0.9999990237969331, iteration: 72150
loss: 1.0205717086791992,grad_norm: 0.7249067669077148, iteration: 72151
loss: 1.001509189605713,grad_norm: 0.9999997812556871, iteration: 72152
loss: 0.9762479662895203,grad_norm: 0.9999990458176888, iteration: 72153
loss: 1.0215017795562744,grad_norm: 0.9999999393789548, iteration: 72154
loss: 0.9972888231277466,grad_norm: 0.8770879023427015, iteration: 72155
loss: 0.9962738752365112,grad_norm: 0.9999993843261433, iteration: 72156
loss: 1.0610038042068481,grad_norm: 0.9999991527595733, iteration: 72157
loss: 0.9825860857963562,grad_norm: 0.9999989757860758, iteration: 72158
loss: 0.9788100719451904,grad_norm: 0.9999991944528261, iteration: 72159
loss: 1.022964596748352,grad_norm: 0.9654311001796118, iteration: 72160
loss: 0.9574555158615112,grad_norm: 0.999999608046335, iteration: 72161
loss: 0.9937698245048523,grad_norm: 0.9999998418457012, iteration: 72162
loss: 1.0473427772521973,grad_norm: 0.9999996648029008, iteration: 72163
loss: 0.9694671630859375,grad_norm: 0.8884012112041092, iteration: 72164
loss: 0.9680603742599487,grad_norm: 0.9452361431821169, iteration: 72165
loss: 1.0190151929855347,grad_norm: 0.9744852814121963, iteration: 72166
loss: 1.0240578651428223,grad_norm: 0.9999991832213391, iteration: 72167
loss: 1.0324100255966187,grad_norm: 0.9999991268430096, iteration: 72168
loss: 1.0522347688674927,grad_norm: 0.9999994634389545, iteration: 72169
loss: 0.9716686606407166,grad_norm: 0.8446781700125331, iteration: 72170
loss: 1.030542016029358,grad_norm: 0.9999996454174724, iteration: 72171
loss: 1.012920618057251,grad_norm: 0.9999997674800383, iteration: 72172
loss: 1.053886890411377,grad_norm: 0.9999991668247897, iteration: 72173
loss: 0.9802775979042053,grad_norm: 0.7987209642301034, iteration: 72174
loss: 1.04206120967865,grad_norm: 0.9426201018148235, iteration: 72175
loss: 1.023383617401123,grad_norm: 0.9999992539122017, iteration: 72176
loss: 0.996889054775238,grad_norm: 0.8471613099721524, iteration: 72177
loss: 0.9699973464012146,grad_norm: 0.8697351557870631, iteration: 72178
loss: 0.9715550541877747,grad_norm: 0.9999991512333244, iteration: 72179
loss: 0.9886162877082825,grad_norm: 0.7453606452387735, iteration: 72180
loss: 0.9643608927726746,grad_norm: 0.8514059066593727, iteration: 72181
loss: 0.9914173483848572,grad_norm: 0.8857292350636885, iteration: 72182
loss: 1.0339903831481934,grad_norm: 0.999999450286235, iteration: 72183
loss: 1.0246248245239258,grad_norm: 0.8922334415283368, iteration: 72184
loss: 1.0129796266555786,grad_norm: 0.9999997193294321, iteration: 72185
loss: 0.9905440211296082,grad_norm: 0.9999993609193896, iteration: 72186
loss: 1.0773898363113403,grad_norm: 0.999999911778264, iteration: 72187
loss: 1.017043113708496,grad_norm: 0.8482987437338873, iteration: 72188
loss: 1.024687647819519,grad_norm: 0.9999990846687115, iteration: 72189
loss: 0.9774698615074158,grad_norm: 0.9706306314117414, iteration: 72190
loss: 0.9634182453155518,grad_norm: 0.9217970857853834, iteration: 72191
loss: 1.0258852243423462,grad_norm: 0.989767512756355, iteration: 72192
loss: 0.996330976486206,grad_norm: 0.9803212040475217, iteration: 72193
loss: 0.970034658908844,grad_norm: 0.9774823915151006, iteration: 72194
loss: 1.0334707498550415,grad_norm: 0.8721872742015174, iteration: 72195
loss: 0.9767364859580994,grad_norm: 0.9999990156876325, iteration: 72196
loss: 0.9816725254058838,grad_norm: 0.9275793704237076, iteration: 72197
loss: 1.0139836072921753,grad_norm: 0.8343564777971465, iteration: 72198
loss: 1.0386238098144531,grad_norm: 0.9999992850073235, iteration: 72199
loss: 0.9899409413337708,grad_norm: 0.935712867425391, iteration: 72200
loss: 0.999158501625061,grad_norm: 0.9999991026632659, iteration: 72201
loss: 1.0202069282531738,grad_norm: 0.9501609857336604, iteration: 72202
loss: 0.9982829689979553,grad_norm: 0.9371388350768802, iteration: 72203
loss: 0.9882203936576843,grad_norm: 0.7864712208749535, iteration: 72204
loss: 0.9640981554985046,grad_norm: 0.9999991723516701, iteration: 72205
loss: 0.9760938882827759,grad_norm: 0.9999990815360771, iteration: 72206
loss: 0.9816327691078186,grad_norm: 0.9443202233349522, iteration: 72207
loss: 0.9808340668678284,grad_norm: 0.999999030327932, iteration: 72208
loss: 0.9966014623641968,grad_norm: 0.9999992281937333, iteration: 72209
loss: 1.0079026222229004,grad_norm: 0.9999991438149091, iteration: 72210
loss: 1.0530403852462769,grad_norm: 0.9999997984358552, iteration: 72211
loss: 1.0060666799545288,grad_norm: 0.8352934009121145, iteration: 72212
loss: 1.0056718587875366,grad_norm: 0.999999110227343, iteration: 72213
loss: 1.0243782997131348,grad_norm: 0.9773645982917715, iteration: 72214
loss: 1.0205881595611572,grad_norm: 0.9636391968190201, iteration: 72215
loss: 1.0802810192108154,grad_norm: 0.9999993236011074, iteration: 72216
loss: 1.0204527378082275,grad_norm: 0.7766065529230712, iteration: 72217
loss: 0.9824255704879761,grad_norm: 0.9999991584077738, iteration: 72218
loss: 1.0076544284820557,grad_norm: 0.8930613707313764, iteration: 72219
loss: 1.046981930732727,grad_norm: 0.9999990736695406, iteration: 72220
loss: 1.001969575881958,grad_norm: 0.9731320925072888, iteration: 72221
loss: 1.008407711982727,grad_norm: 0.9999990745687076, iteration: 72222
loss: 0.9953649640083313,grad_norm: 0.905217318872066, iteration: 72223
loss: 1.001684308052063,grad_norm: 0.972600400738546, iteration: 72224
loss: 1.0423697233200073,grad_norm: 0.9704260818985568, iteration: 72225
loss: 1.028056263923645,grad_norm: 0.9999995973955209, iteration: 72226
loss: 0.996488630771637,grad_norm: 0.8707760190807443, iteration: 72227
loss: 1.0592312812805176,grad_norm: 0.9999995440023095, iteration: 72228
loss: 1.0033143758773804,grad_norm: 0.8438926219227253, iteration: 72229
loss: 0.9887877106666565,grad_norm: 0.9680705801281212, iteration: 72230
loss: 0.9803823232650757,grad_norm: 0.9099695147356371, iteration: 72231
loss: 0.9758918881416321,grad_norm: 0.9619139812398704, iteration: 72232
loss: 0.9943321943283081,grad_norm: 0.7760809687547872, iteration: 72233
loss: 0.9992214441299438,grad_norm: 0.8039740183121491, iteration: 72234
loss: 0.9848681092262268,grad_norm: 0.9999990263279566, iteration: 72235
loss: 1.025093674659729,grad_norm: 0.8763998357455601, iteration: 72236
loss: 1.0194536447525024,grad_norm: 0.9999992707832964, iteration: 72237
loss: 1.0217586755752563,grad_norm: 0.9999992521058252, iteration: 72238
loss: 1.046251893043518,grad_norm: 0.9067432464848877, iteration: 72239
loss: 1.02114999294281,grad_norm: 0.8879235949170845, iteration: 72240
loss: 1.00403892993927,grad_norm: 0.999999230750098, iteration: 72241
loss: 1.0141640901565552,grad_norm: 0.9771105990646143, iteration: 72242
loss: 1.0452356338500977,grad_norm: 0.9670852483087377, iteration: 72243
loss: 0.9861546754837036,grad_norm: 0.9011628453711008, iteration: 72244
loss: 0.9534956216812134,grad_norm: 0.9999991985336646, iteration: 72245
loss: 1.0347628593444824,grad_norm: 0.9753432342030182, iteration: 72246
loss: 0.9908975958824158,grad_norm: 0.9999997239213324, iteration: 72247
loss: 0.9952390193939209,grad_norm: 0.9999994271037874, iteration: 72248
loss: 1.1774836778640747,grad_norm: 0.9999991253864517, iteration: 72249
loss: 1.0116760730743408,grad_norm: 0.9364441613703853, iteration: 72250
loss: 1.009928584098816,grad_norm: 0.9999992798049869, iteration: 72251
loss: 0.9947665333747864,grad_norm: 0.9924150159260253, iteration: 72252
loss: 1.0146771669387817,grad_norm: 0.9989221220409784, iteration: 72253
loss: 1.0284420251846313,grad_norm: 0.8639606321006865, iteration: 72254
loss: 0.9669719934463501,grad_norm: 0.9999990175501734, iteration: 72255
loss: 0.9481152296066284,grad_norm: 0.966565641732198, iteration: 72256
loss: 1.070641040802002,grad_norm: 0.9999994510021719, iteration: 72257
loss: 1.0027198791503906,grad_norm: 0.9999997101656641, iteration: 72258
loss: 1.0041139125823975,grad_norm: 0.9999994569685147, iteration: 72259
loss: 1.0402230024337769,grad_norm: 0.9999996037333737, iteration: 72260
loss: 0.9782518148422241,grad_norm: 0.9526561414079996, iteration: 72261
loss: 0.9962838292121887,grad_norm: 0.9208085121574032, iteration: 72262
loss: 1.0241938829421997,grad_norm: 0.9867190613598431, iteration: 72263
loss: 0.9975679516792297,grad_norm: 0.9833724486445381, iteration: 72264
loss: 0.9954236149787903,grad_norm: 0.8289873762641927, iteration: 72265
loss: 1.0299164056777954,grad_norm: 0.9999995831607298, iteration: 72266
loss: 1.0005691051483154,grad_norm: 0.9999990626740003, iteration: 72267
loss: 0.9939378499984741,grad_norm: 0.9558854939969301, iteration: 72268
loss: 0.9793218970298767,grad_norm: 0.8561515164583722, iteration: 72269
loss: 0.9819482564926147,grad_norm: 0.9653904421195509, iteration: 72270
loss: 0.9845269322395325,grad_norm: 0.9272916894383658, iteration: 72271
loss: 1.0354121923446655,grad_norm: 0.9999997497479237, iteration: 72272
loss: 1.0023120641708374,grad_norm: 0.9486633568670568, iteration: 72273
loss: 1.0312951803207397,grad_norm: 0.9999993317386836, iteration: 72274
loss: 1.043401837348938,grad_norm: 0.9999992011267161, iteration: 72275
loss: 1.0247392654418945,grad_norm: 0.9999991217519271, iteration: 72276
loss: 0.9919002056121826,grad_norm: 0.9540301960471385, iteration: 72277
loss: 1.0386661291122437,grad_norm: 0.8707132641912675, iteration: 72278
loss: 1.0597001314163208,grad_norm: 0.999999470934695, iteration: 72279
loss: 0.996425449848175,grad_norm: 0.9999995259321438, iteration: 72280
loss: 0.9888445138931274,grad_norm: 0.9480178754421651, iteration: 72281
loss: 1.0552061796188354,grad_norm: 0.9999994187861686, iteration: 72282
loss: 1.0189048051834106,grad_norm: 0.9142955988136164, iteration: 72283
loss: 1.0330581665039062,grad_norm: 0.9999996246597889, iteration: 72284
loss: 1.0019009113311768,grad_norm: 0.9186023951938369, iteration: 72285
loss: 1.0108811855316162,grad_norm: 0.999999109197589, iteration: 72286
loss: 1.0405696630477905,grad_norm: 0.9999997456222929, iteration: 72287
loss: 0.9559058547019958,grad_norm: 0.9999990400891345, iteration: 72288
loss: 1.0750383138656616,grad_norm: 0.9999993857985514, iteration: 72289
loss: 1.0543590784072876,grad_norm: 0.9702024915959858, iteration: 72290
loss: 0.9551271796226501,grad_norm: 0.9999991535125236, iteration: 72291
loss: 1.0109729766845703,grad_norm: 0.9999992176969744, iteration: 72292
loss: 1.000470519065857,grad_norm: 0.9999991106635003, iteration: 72293
loss: 1.0450958013534546,grad_norm: 0.9999991703494894, iteration: 72294
loss: 1.0397359132766724,grad_norm: 0.9999991700284472, iteration: 72295
loss: 1.047844648361206,grad_norm: 0.999999487788991, iteration: 72296
loss: 1.021618366241455,grad_norm: 0.9999991895025022, iteration: 72297
loss: 0.985201358795166,grad_norm: 0.8709591072241627, iteration: 72298
loss: 1.0090348720550537,grad_norm: 0.8937194085098688, iteration: 72299
loss: 1.0220867395401,grad_norm: 0.8920075445968367, iteration: 72300
loss: 1.0507988929748535,grad_norm: 0.9999997992737129, iteration: 72301
loss: 1.04936945438385,grad_norm: 0.9999991121037051, iteration: 72302
loss: 1.0381699800491333,grad_norm: 0.9999988483970502, iteration: 72303
loss: 0.9914605021476746,grad_norm: 0.8471841290447032, iteration: 72304
loss: 0.9970113039016724,grad_norm: 0.8676723847249906, iteration: 72305
loss: 0.9907159209251404,grad_norm: 0.8474897300092775, iteration: 72306
loss: 0.986800491809845,grad_norm: 0.8253778170718675, iteration: 72307
loss: 1.0411710739135742,grad_norm: 0.9316038088024199, iteration: 72308
loss: 1.0194697380065918,grad_norm: 0.8614079378943875, iteration: 72309
loss: 0.9905079007148743,grad_norm: 0.999999372675314, iteration: 72310
loss: 1.032740592956543,grad_norm: 0.9999995805128642, iteration: 72311
loss: 1.0056419372558594,grad_norm: 0.999999389069454, iteration: 72312
loss: 0.9570039510726929,grad_norm: 0.8906071075408196, iteration: 72313
loss: 0.9774426221847534,grad_norm: 0.8819125746575579, iteration: 72314
loss: 1.0029938220977783,grad_norm: 0.9200223941057992, iteration: 72315
loss: 1.0036303997039795,grad_norm: 0.9793324799340517, iteration: 72316
loss: 1.0520977973937988,grad_norm: 0.9999997578571794, iteration: 72317
loss: 0.9725086092948914,grad_norm: 0.886478707844313, iteration: 72318
loss: 1.069248914718628,grad_norm: 0.9999990455744561, iteration: 72319
loss: 0.9989579916000366,grad_norm: 0.9509425168448011, iteration: 72320
loss: 0.9541106820106506,grad_norm: 0.813234502172475, iteration: 72321
loss: 1.007597804069519,grad_norm: 0.7782913625809086, iteration: 72322
loss: 0.9993417859077454,grad_norm: 0.9999989697407988, iteration: 72323
loss: 1.0195363759994507,grad_norm: 0.9999992913141601, iteration: 72324
loss: 1.0099202394485474,grad_norm: 0.8434814615055723, iteration: 72325
loss: 1.0012670755386353,grad_norm: 0.9999990106282058, iteration: 72326
loss: 0.9981723427772522,grad_norm: 0.9999992590199378, iteration: 72327
loss: 1.0548518896102905,grad_norm: 0.9999992818246108, iteration: 72328
loss: 1.0025438070297241,grad_norm: 0.9999991597478602, iteration: 72329
loss: 1.0406813621520996,grad_norm: 0.9999992382124085, iteration: 72330
loss: 1.0089452266693115,grad_norm: 0.995795569765338, iteration: 72331
loss: 1.0165958404541016,grad_norm: 0.9999991005025294, iteration: 72332
loss: 1.0149141550064087,grad_norm: 0.9415492366274181, iteration: 72333
loss: 1.0656629800796509,grad_norm: 0.9999991389730457, iteration: 72334
loss: 1.0557984113693237,grad_norm: 0.9999991922372184, iteration: 72335
loss: 1.1138484477996826,grad_norm: 0.9999999629463763, iteration: 72336
loss: 1.0465179681777954,grad_norm: 0.9999996617848559, iteration: 72337
loss: 1.0067269802093506,grad_norm: 0.9999990320621224, iteration: 72338
loss: 1.196568489074707,grad_norm: 0.9999996970806012, iteration: 72339
loss: 1.2134379148483276,grad_norm: 0.9999999406091409, iteration: 72340
loss: 1.080484390258789,grad_norm: 0.9999994982061552, iteration: 72341
loss: 1.2043200731277466,grad_norm: 0.999999879545947, iteration: 72342
loss: 1.0697418451309204,grad_norm: 0.9999991325376677, iteration: 72343
loss: 1.0186183452606201,grad_norm: 0.9999991277577502, iteration: 72344
loss: 1.1611406803131104,grad_norm: 0.9999992431706886, iteration: 72345
loss: 1.0930629968643188,grad_norm: 0.9999991113156996, iteration: 72346
loss: 1.1286338567733765,grad_norm: 0.9999999751735199, iteration: 72347
loss: 1.0069472789764404,grad_norm: 0.9999991508508174, iteration: 72348
loss: 1.0080560445785522,grad_norm: 0.9617684112353727, iteration: 72349
loss: 1.0174683332443237,grad_norm: 0.9999991816485915, iteration: 72350
loss: 1.0058330297470093,grad_norm: 0.9999990972838563, iteration: 72351
loss: 1.0113744735717773,grad_norm: 0.987238900319505, iteration: 72352
loss: 0.9897630214691162,grad_norm: 0.9999991271863757, iteration: 72353
loss: 0.9914789795875549,grad_norm: 0.9999990782502994, iteration: 72354
loss: 1.0226030349731445,grad_norm: 0.9999996000256401, iteration: 72355
loss: 1.0156733989715576,grad_norm: 0.9999994048284525, iteration: 72356
loss: 1.072919487953186,grad_norm: 0.9999999399093545, iteration: 72357
loss: 1.0107431411743164,grad_norm: 0.9999994600998505, iteration: 72358
loss: 0.9655919075012207,grad_norm: 0.9999998149975877, iteration: 72359
loss: 1.001491904258728,grad_norm: 0.9999991291851936, iteration: 72360
loss: 0.9971261620521545,grad_norm: 0.9999996382902969, iteration: 72361
loss: 1.0219786167144775,grad_norm: 0.9999989585967923, iteration: 72362
loss: 1.0132032632827759,grad_norm: 0.9999992022817887, iteration: 72363
loss: 1.0251035690307617,grad_norm: 0.972568658298311, iteration: 72364
loss: 1.0064311027526855,grad_norm: 0.944886810179905, iteration: 72365
loss: 0.9776481986045837,grad_norm: 0.912186774129687, iteration: 72366
loss: 1.0120512247085571,grad_norm: 0.9014890297741286, iteration: 72367
loss: 1.0067346096038818,grad_norm: 0.9999992745716482, iteration: 72368
loss: 0.992929995059967,grad_norm: 0.9760843942053927, iteration: 72369
loss: 0.977459192276001,grad_norm: 0.8988525678255471, iteration: 72370
loss: 0.9789025187492371,grad_norm: 0.8734294484164012, iteration: 72371
loss: 0.9896887540817261,grad_norm: 0.9999991640215281, iteration: 72372
loss: 1.0068044662475586,grad_norm: 0.9999997851678496, iteration: 72373
loss: 1.0143805742263794,grad_norm: 0.9275052160487266, iteration: 72374
loss: 1.0152957439422607,grad_norm: 0.7893035265584611, iteration: 72375
loss: 1.04902184009552,grad_norm: 0.9999997999094539, iteration: 72376
loss: 1.0202265977859497,grad_norm: 0.9614437166307688, iteration: 72377
loss: 0.9723650813102722,grad_norm: 0.9999989352735895, iteration: 72378
loss: 1.026271104812622,grad_norm: 0.9999991231948042, iteration: 72379
loss: 0.9946080446243286,grad_norm: 0.999999718058487, iteration: 72380
loss: 1.0479352474212646,grad_norm: 0.9354513575167703, iteration: 72381
loss: 1.0570744276046753,grad_norm: 0.9999999850391578, iteration: 72382
loss: 0.9976994395256042,grad_norm: 0.9999993337428998, iteration: 72383
loss: 0.9709859490394592,grad_norm: 0.9999991139763901, iteration: 72384
loss: 0.9994288086891174,grad_norm: 0.8672865201250055, iteration: 72385
loss: 0.9797797203063965,grad_norm: 0.9999992510040244, iteration: 72386
loss: 1.0120083093643188,grad_norm: 0.9999991104304498, iteration: 72387
loss: 0.9983876943588257,grad_norm: 0.9999996345507477, iteration: 72388
loss: 1.0092458724975586,grad_norm: 0.8247458141810378, iteration: 72389
loss: 0.9979944825172424,grad_norm: 0.9097521779088285, iteration: 72390
loss: 0.9743896722793579,grad_norm: 0.9999990831182919, iteration: 72391
loss: 1.0909838676452637,grad_norm: 0.9999997732936245, iteration: 72392
loss: 1.010990023612976,grad_norm: 0.9999989992651923, iteration: 72393
loss: 0.9943779110908508,grad_norm: 0.8174745852370408, iteration: 72394
loss: 0.9884850382804871,grad_norm: 0.9999994328501157, iteration: 72395
loss: 1.096635103225708,grad_norm: 0.9999992860111533, iteration: 72396
loss: 0.9929509162902832,grad_norm: 0.9999990925086139, iteration: 72397
loss: 0.9550898671150208,grad_norm: 0.9683306819684494, iteration: 72398
loss: 0.9652584195137024,grad_norm: 0.9999991347532512, iteration: 72399
loss: 1.005533218383789,grad_norm: 0.931425812412896, iteration: 72400
loss: 1.0158940553665161,grad_norm: 0.8949595955212877, iteration: 72401
loss: 0.9812432527542114,grad_norm: 0.8558914358395453, iteration: 72402
loss: 1.0488883256912231,grad_norm: 0.9999992620320372, iteration: 72403
loss: 1.0246806144714355,grad_norm: 0.9999990908738209, iteration: 72404
loss: 0.99595707654953,grad_norm: 0.919305618613975, iteration: 72405
loss: 1.0107123851776123,grad_norm: 0.9303527616365284, iteration: 72406
loss: 1.0177321434020996,grad_norm: 0.9999991215011009, iteration: 72407
loss: 1.0101912021636963,grad_norm: 0.9167855669543994, iteration: 72408
loss: 0.9472929835319519,grad_norm: 0.9687334560155558, iteration: 72409
loss: 0.948279857635498,grad_norm: 0.9999989200910416, iteration: 72410
loss: 0.9885438680648804,grad_norm: 0.9835777030269843, iteration: 72411
loss: 1.0355098247528076,grad_norm: 0.9460938816782531, iteration: 72412
loss: 0.9735676050186157,grad_norm: 0.9121551460320779, iteration: 72413
loss: 0.9646704196929932,grad_norm: 0.9999990764627821, iteration: 72414
loss: 1.0324312448501587,grad_norm: 0.9999991847246954, iteration: 72415
loss: 0.9949532151222229,grad_norm: 1.00000003951903, iteration: 72416
loss: 1.0262070894241333,grad_norm: 0.9999992618374469, iteration: 72417
loss: 1.0108627080917358,grad_norm: 0.9669912545173274, iteration: 72418
loss: 1.0036671161651611,grad_norm: 0.8722338286174897, iteration: 72419
loss: 1.0063396692276,grad_norm: 0.9999995440411604, iteration: 72420
loss: 0.9998566508293152,grad_norm: 0.9999996491939823, iteration: 72421
loss: 1.0218156576156616,grad_norm: 0.999999997373636, iteration: 72422
loss: 1.023120403289795,grad_norm: 0.933220319227446, iteration: 72423
loss: 0.9933090806007385,grad_norm: 0.9553495067596351, iteration: 72424
loss: 0.9892880320549011,grad_norm: 0.7611431039921863, iteration: 72425
loss: 0.9811667203903198,grad_norm: 0.8413186167730027, iteration: 72426
loss: 0.9890711307525635,grad_norm: 0.9999992065016778, iteration: 72427
loss: 0.987496554851532,grad_norm: 0.9999994231478372, iteration: 72428
loss: 0.9732523560523987,grad_norm: 0.9999994007259914, iteration: 72429
loss: 1.0124529600143433,grad_norm: 0.9999994725853881, iteration: 72430
loss: 0.9877520799636841,grad_norm: 0.8396629666677775, iteration: 72431
loss: 1.012405514717102,grad_norm: 0.9999991230687547, iteration: 72432
loss: 1.1271848678588867,grad_norm: 0.9999996080789911, iteration: 72433
loss: 0.9829023480415344,grad_norm: 0.8348816065584419, iteration: 72434
loss: 1.0592159032821655,grad_norm: 0.999999153546661, iteration: 72435
loss: 1.0192999839782715,grad_norm: 0.8181519047464375, iteration: 72436
loss: 0.9852792620658875,grad_norm: 0.9999992960472456, iteration: 72437
loss: 1.0094183683395386,grad_norm: 0.9549765743519217, iteration: 72438
loss: 1.0143816471099854,grad_norm: 0.9999990082837854, iteration: 72439
loss: 1.0066719055175781,grad_norm: 0.9803327352502941, iteration: 72440
loss: 1.1776113510131836,grad_norm: 0.9999994739311473, iteration: 72441
loss: 0.9951858520507812,grad_norm: 0.9999989652650284, iteration: 72442
loss: 0.9766495227813721,grad_norm: 0.9701130132449988, iteration: 72443
loss: 1.0008444786071777,grad_norm: 0.8832137104206169, iteration: 72444
loss: 1.004638433456421,grad_norm: 0.9394677641019825, iteration: 72445
loss: 1.022191047668457,grad_norm: 0.9999991936258376, iteration: 72446
loss: 1.0517936944961548,grad_norm: 0.9999999468633369, iteration: 72447
loss: 0.9809707403182983,grad_norm: 0.9999992269733186, iteration: 72448
loss: 1.0159119367599487,grad_norm: 0.9999990574216203, iteration: 72449
loss: 1.0103431940078735,grad_norm: 0.9310405045116368, iteration: 72450
loss: 1.008925437927246,grad_norm: 0.9999992292292923, iteration: 72451
loss: 1.0461313724517822,grad_norm: 0.9561995380522352, iteration: 72452
loss: 0.9720916152000427,grad_norm: 0.8913160439856134, iteration: 72453
loss: 0.9617912173271179,grad_norm: 0.7453430646933982, iteration: 72454
loss: 1.0184838771820068,grad_norm: 0.9999991583879363, iteration: 72455
loss: 1.082288384437561,grad_norm: 0.9999998982528304, iteration: 72456
loss: 1.0123475790023804,grad_norm: 0.9916539207920467, iteration: 72457
loss: 1.019516110420227,grad_norm: 0.7895670398575523, iteration: 72458
loss: 1.045843243598938,grad_norm: 0.9841462369941061, iteration: 72459
loss: 1.0287187099456787,grad_norm: 0.9760568592927261, iteration: 72460
loss: 1.0245705842971802,grad_norm: 0.9999992800702427, iteration: 72461
loss: 1.009489893913269,grad_norm: 0.9999989430513528, iteration: 72462
loss: 1.0053188800811768,grad_norm: 0.9999991126452248, iteration: 72463
loss: 1.008076786994934,grad_norm: 0.9999996791315514, iteration: 72464
loss: 0.9951114654541016,grad_norm: 0.9999991556177946, iteration: 72465
loss: 1.1228384971618652,grad_norm: 0.9999997790407041, iteration: 72466
loss: 1.030799150466919,grad_norm: 0.9999998421177806, iteration: 72467
loss: 1.026694655418396,grad_norm: 0.8969765716079028, iteration: 72468
loss: 1.0023634433746338,grad_norm: 0.9066199384182215, iteration: 72469
loss: 0.968367338180542,grad_norm: 0.89486623113201, iteration: 72470
loss: 0.9951438307762146,grad_norm: 0.9303395727894787, iteration: 72471
loss: 1.315948486328125,grad_norm: 0.9999998315798369, iteration: 72472
loss: 1.2458622455596924,grad_norm: 0.9999991306864797, iteration: 72473
loss: 1.0236340761184692,grad_norm: 0.9936676375747461, iteration: 72474
loss: 1.3381147384643555,grad_norm: 0.9999993963758621, iteration: 72475
loss: 1.2992208003997803,grad_norm: 0.9999998399369968, iteration: 72476
loss: 1.5185942649841309,grad_norm: 1.0000000004134442, iteration: 72477
loss: 1.047443151473999,grad_norm: 0.9999990351769424, iteration: 72478
loss: 1.197383999824524,grad_norm: 0.9999995227270565, iteration: 72479
loss: 0.9999045729637146,grad_norm: 0.9999991765531603, iteration: 72480
loss: 1.042698621749878,grad_norm: 0.9999994818635971, iteration: 72481
loss: 1.0820198059082031,grad_norm: 0.9999990230908741, iteration: 72482
loss: 1.1515990495681763,grad_norm: 0.9999994631940152, iteration: 72483
loss: 1.0654261112213135,grad_norm: 0.999999230750854, iteration: 72484
loss: 1.161808729171753,grad_norm: 0.9999996891233898, iteration: 72485
loss: 1.0967057943344116,grad_norm: 0.9999997674248222, iteration: 72486
loss: 1.0905462503433228,grad_norm: 0.9999998939495436, iteration: 72487
loss: 1.0633975267410278,grad_norm: 0.9999992699941624, iteration: 72488
loss: 1.0237411260604858,grad_norm: 0.9999997863267014, iteration: 72489
loss: 0.9940119385719299,grad_norm: 0.8833640089337345, iteration: 72490
loss: 1.0899765491485596,grad_norm: 0.9999998027507027, iteration: 72491
loss: 1.059247374534607,grad_norm: 0.9999991563777907, iteration: 72492
loss: 1.0162506103515625,grad_norm: 0.8561703771144518, iteration: 72493
loss: 0.9707245230674744,grad_norm: 0.9291861291567117, iteration: 72494
loss: 1.0745242834091187,grad_norm: 0.9999995518151022, iteration: 72495
loss: 1.02620267868042,grad_norm: 0.9999995167323351, iteration: 72496
loss: 1.0265878438949585,grad_norm: 0.999999088251398, iteration: 72497
loss: 0.9926194548606873,grad_norm: 0.9962816918650002, iteration: 72498
loss: 1.1124359369277954,grad_norm: 0.9999993317219902, iteration: 72499
loss: 0.9987922310829163,grad_norm: 0.9999992797053194, iteration: 72500
loss: 1.004698634147644,grad_norm: 0.8662352565953128, iteration: 72501
loss: 1.0243045091629028,grad_norm: 0.8604368210302532, iteration: 72502
loss: 1.0170975923538208,grad_norm: 0.9393028766157283, iteration: 72503
loss: 0.9409237504005432,grad_norm: 0.9999990894427555, iteration: 72504
loss: 1.032476544380188,grad_norm: 0.9999997426590178, iteration: 72505
loss: 1.012451171875,grad_norm: 0.9999989890263652, iteration: 72506
loss: 0.9911782145500183,grad_norm: 0.9686598215696559, iteration: 72507
loss: 0.9944813251495361,grad_norm: 0.9632786032616477, iteration: 72508
loss: 1.1606602668762207,grad_norm: 0.9999996234135886, iteration: 72509
loss: 1.0264219045639038,grad_norm: 0.9035743896267447, iteration: 72510
loss: 1.0021579265594482,grad_norm: 0.9999990813478162, iteration: 72511
loss: 1.0001379251480103,grad_norm: 0.9361226126929946, iteration: 72512
loss: 1.1350208520889282,grad_norm: 0.9999993471889762, iteration: 72513
loss: 0.9814066290855408,grad_norm: 0.999999117695095, iteration: 72514
loss: 0.9483159780502319,grad_norm: 0.9999989963300694, iteration: 72515
loss: 1.0461078882217407,grad_norm: 0.9999993857798491, iteration: 72516
loss: 1.0231820344924927,grad_norm: 0.99999933437363, iteration: 72517
loss: 1.0154907703399658,grad_norm: 0.9333774157268953, iteration: 72518
loss: 1.0199583768844604,grad_norm: 0.9325078959120879, iteration: 72519
loss: 1.1261041164398193,grad_norm: 0.9999996304030894, iteration: 72520
loss: 1.1929770708084106,grad_norm: 0.9999991479295989, iteration: 72521
loss: 1.0263586044311523,grad_norm: 0.8557221862456544, iteration: 72522
loss: 1.007805347442627,grad_norm: 0.9740670202211025, iteration: 72523
loss: 1.0359671115875244,grad_norm: 0.8254133750469792, iteration: 72524
loss: 1.0116780996322632,grad_norm: 0.9999990901558093, iteration: 72525
loss: 0.999967098236084,grad_norm: 0.999999518598143, iteration: 72526
loss: 1.049700140953064,grad_norm: 0.9999991479816953, iteration: 72527
loss: 1.0068663358688354,grad_norm: 0.9545280382228623, iteration: 72528
loss: 1.0188007354736328,grad_norm: 0.9999991134025698, iteration: 72529
loss: 1.0159517526626587,grad_norm: 0.9852023346641673, iteration: 72530
loss: 0.9971923828125,grad_norm: 0.9999989480657898, iteration: 72531
loss: 1.0626333951950073,grad_norm: 0.8405324023369048, iteration: 72532
loss: 1.0877748727798462,grad_norm: 0.9999999198009731, iteration: 72533
loss: 1.0076305866241455,grad_norm: 0.9999991151737042, iteration: 72534
loss: 1.0210306644439697,grad_norm: 0.9999992650348563, iteration: 72535
loss: 1.0388860702514648,grad_norm: 0.9999995044687746, iteration: 72536
loss: 1.043272614479065,grad_norm: 0.9694156392105401, iteration: 72537
loss: 0.990329384803772,grad_norm: 0.9999990579041853, iteration: 72538
loss: 1.001890778541565,grad_norm: 0.9546946783233464, iteration: 72539
loss: 1.0457208156585693,grad_norm: 0.9999993949698093, iteration: 72540
loss: 1.005541443824768,grad_norm: 0.9359262111681694, iteration: 72541
loss: 0.9818915128707886,grad_norm: 0.9406926392079448, iteration: 72542
loss: 1.0587952136993408,grad_norm: 0.9999993555028777, iteration: 72543
loss: 1.0661273002624512,grad_norm: 0.9999998349469403, iteration: 72544
loss: 1.0204048156738281,grad_norm: 0.9999990636473475, iteration: 72545
loss: 1.1254725456237793,grad_norm: 0.9999993420209576, iteration: 72546
loss: 1.1377241611480713,grad_norm: 1.0000000205621868, iteration: 72547
loss: 1.0298737287521362,grad_norm: 0.925677961019295, iteration: 72548
loss: 0.966366708278656,grad_norm: 0.9999992255857013, iteration: 72549
loss: 0.9855378270149231,grad_norm: 0.7828316521977341, iteration: 72550
loss: 1.284497618675232,grad_norm: 0.9999997644002651, iteration: 72551
loss: 1.0275264978408813,grad_norm: 0.9541797517764462, iteration: 72552
loss: 1.0115114450454712,grad_norm: 0.9999991978763858, iteration: 72553
loss: 1.0094647407531738,grad_norm: 0.9999993931959658, iteration: 72554
loss: 1.0931155681610107,grad_norm: 0.9999997560769424, iteration: 72555
loss: 0.9715977907180786,grad_norm: 0.941505700502812, iteration: 72556
loss: 1.0657446384429932,grad_norm: 0.9999994586894537, iteration: 72557
loss: 1.022043228149414,grad_norm: 0.939244766637751, iteration: 72558
loss: 0.9930062890052795,grad_norm: 0.9999991037895603, iteration: 72559
loss: 1.0188831090927124,grad_norm: 0.9999998830157881, iteration: 72560
loss: 0.9758248925209045,grad_norm: 0.9999991363003847, iteration: 72561
loss: 1.058394432067871,grad_norm: 0.9999997544432859, iteration: 72562
loss: 1.0524024963378906,grad_norm: 0.9999999443009594, iteration: 72563
loss: 1.035427451133728,grad_norm: 0.9999996015163801, iteration: 72564
loss: 1.0198073387145996,grad_norm: 0.9142794216002945, iteration: 72565
loss: 1.0835436582565308,grad_norm: 0.9392907538815763, iteration: 72566
loss: 1.2407667636871338,grad_norm: 0.9999992231938141, iteration: 72567
loss: 1.132272720336914,grad_norm: 0.999999247451909, iteration: 72568
loss: 1.1660445928573608,grad_norm: 0.9999991938670963, iteration: 72569
loss: 1.184243083000183,grad_norm: 0.9999992936385649, iteration: 72570
loss: 1.1159683465957642,grad_norm: 0.9999996540880769, iteration: 72571
loss: 1.097396731376648,grad_norm: 0.9999992188456075, iteration: 72572
loss: 1.0515044927597046,grad_norm: 0.8660527207007828, iteration: 72573
loss: 1.2169206142425537,grad_norm: 0.9999994597978682, iteration: 72574
loss: 1.1901369094848633,grad_norm: 0.9999994472071164, iteration: 72575
loss: 1.3206310272216797,grad_norm: 0.99999963658219, iteration: 72576
loss: 1.1857980489730835,grad_norm: 0.9999993338999068, iteration: 72577
loss: 1.2251994609832764,grad_norm: 0.9999997099526838, iteration: 72578
loss: 1.064034342765808,grad_norm: 0.9999995245615828, iteration: 72579
loss: 1.4731162786483765,grad_norm: 0.9999998071285546, iteration: 72580
loss: 1.3251739740371704,grad_norm: 0.9999999042398153, iteration: 72581
loss: 1.0975956916809082,grad_norm: 0.9999997307158012, iteration: 72582
loss: 1.2880430221557617,grad_norm: 0.9999997243462463, iteration: 72583
loss: 1.089611530303955,grad_norm: 0.9999991658697405, iteration: 72584
loss: 1.1439021825790405,grad_norm: 0.9999991005552525, iteration: 72585
loss: 1.083268404006958,grad_norm: 0.9999994070460417, iteration: 72586
loss: 1.2171438932418823,grad_norm: 0.9999996198725587, iteration: 72587
loss: 1.3301726579666138,grad_norm: 0.9999998873274676, iteration: 72588
loss: 1.2640513181686401,grad_norm: 0.9999998869775167, iteration: 72589
loss: 1.3181804418563843,grad_norm: 0.9999995047121857, iteration: 72590
loss: 1.2028155326843262,grad_norm: 1.0000000595777367, iteration: 72591
loss: 1.0767573118209839,grad_norm: 0.9999993185745594, iteration: 72592
loss: 1.157088279724121,grad_norm: 0.9999997707764341, iteration: 72593
loss: 1.2890962362289429,grad_norm: 0.9999997683432622, iteration: 72594
loss: 1.338217854499817,grad_norm: 0.9999997945583614, iteration: 72595
loss: 1.0583882331848145,grad_norm: 0.9999994369540062, iteration: 72596
loss: 1.1121326684951782,grad_norm: 0.999999535759904, iteration: 72597
loss: 1.1999322175979614,grad_norm: 0.9999996997764472, iteration: 72598
loss: 1.0770783424377441,grad_norm: 0.9999997915417901, iteration: 72599
loss: 0.9942842125892639,grad_norm: 0.8955768152298267, iteration: 72600
loss: 1.0919737815856934,grad_norm: 0.9999996786400227, iteration: 72601
loss: 1.0150070190429688,grad_norm: 0.7974597404510322, iteration: 72602
loss: 1.0346952676773071,grad_norm: 0.9262385123165449, iteration: 72603
loss: 0.9884921312332153,grad_norm: 0.855184985030584, iteration: 72604
loss: 0.9847431778907776,grad_norm: 0.902839217370967, iteration: 72605
loss: 1.0786453485488892,grad_norm: 0.9999996555750638, iteration: 72606
loss: 1.0253297090530396,grad_norm: 0.9999990929791504, iteration: 72607
loss: 1.0825070142745972,grad_norm: 0.9999998353281665, iteration: 72608
loss: 1.3329370021820068,grad_norm: 0.9999995967447078, iteration: 72609
loss: 1.2208057641983032,grad_norm: 0.9999996076056463, iteration: 72610
loss: 0.9969403743743896,grad_norm: 0.9999991629979157, iteration: 72611
loss: 1.137740135192871,grad_norm: 0.9999998767445479, iteration: 72612
loss: 1.03830087184906,grad_norm: 0.9999993251794136, iteration: 72613
loss: 0.9904072284698486,grad_norm: 0.8423119262898687, iteration: 72614
loss: 0.987091600894928,grad_norm: 0.9999992029585126, iteration: 72615
loss: 1.1298335790634155,grad_norm: 0.9999997100889226, iteration: 72616
loss: 0.9597055912017822,grad_norm: 0.8031659099748341, iteration: 72617
loss: 0.9842798709869385,grad_norm: 0.9999990881556976, iteration: 72618
loss: 1.0485308170318604,grad_norm: 0.9999998309247115, iteration: 72619
loss: 1.1284966468811035,grad_norm: 0.9999996502804757, iteration: 72620
loss: 0.9956292510032654,grad_norm: 0.999999186072387, iteration: 72621
loss: 0.9869521856307983,grad_norm: 0.8071016699955538, iteration: 72622
loss: 0.9479116797447205,grad_norm: 0.9235458299682255, iteration: 72623
loss: 1.0049991607666016,grad_norm: 0.999999128801198, iteration: 72624
loss: 0.9712796807289124,grad_norm: 0.999999299294834, iteration: 72625
loss: 1.0482200384140015,grad_norm: 0.9999993075966831, iteration: 72626
loss: 1.0327749252319336,grad_norm: 0.9999998886168243, iteration: 72627
loss: 1.1706219911575317,grad_norm: 0.999999216669664, iteration: 72628
loss: 1.1381529569625854,grad_norm: 0.9999994314793398, iteration: 72629
loss: 1.146323323249817,grad_norm: 0.9999998498660817, iteration: 72630
loss: 0.9830119013786316,grad_norm: 0.9999991742823221, iteration: 72631
loss: 1.0267901420593262,grad_norm: 0.9651662386637363, iteration: 72632
loss: 1.0569230318069458,grad_norm: 0.9999990191406825, iteration: 72633
loss: 1.0239088535308838,grad_norm: 0.9999997742445524, iteration: 72634
loss: 1.0387005805969238,grad_norm: 0.9999993629141025, iteration: 72635
loss: 1.0169026851654053,grad_norm: 0.9262448742323515, iteration: 72636
loss: 0.9301669597625732,grad_norm: 0.9999993115984868, iteration: 72637
loss: 0.9619567394256592,grad_norm: 0.9350540337613226, iteration: 72638
loss: 0.975577712059021,grad_norm: 0.8983979597758807, iteration: 72639
loss: 1.0009325742721558,grad_norm: 0.9999990938184817, iteration: 72640
loss: 1.121317744255066,grad_norm: 0.9999995261664909, iteration: 72641
loss: 0.9789701700210571,grad_norm: 0.815833386965787, iteration: 72642
loss: 1.0580248832702637,grad_norm: 0.9999995911874041, iteration: 72643
loss: 1.034593939781189,grad_norm: 0.9999992985282946, iteration: 72644
loss: 0.9992809891700745,grad_norm: 0.9999991818741163, iteration: 72645
loss: 1.0202510356903076,grad_norm: 0.9375346162841931, iteration: 72646
loss: 1.0125819444656372,grad_norm: 0.9999991649301041, iteration: 72647
loss: 1.0137217044830322,grad_norm: 0.83821661131106, iteration: 72648
loss: 1.0003880262374878,grad_norm: 0.9137353182186104, iteration: 72649
loss: 1.0843418836593628,grad_norm: 0.8399278221062335, iteration: 72650
loss: 0.9823938608169556,grad_norm: 0.8130146422963813, iteration: 72651
loss: 0.989848256111145,grad_norm: 0.9999990688275437, iteration: 72652
loss: 0.9675015807151794,grad_norm: 0.9999991729215911, iteration: 72653
loss: 1.0044457912445068,grad_norm: 0.9999991431960759, iteration: 72654
loss: 1.0596399307250977,grad_norm: 0.99999984155755, iteration: 72655
loss: 1.0178245306015015,grad_norm: 0.9953643530091828, iteration: 72656
loss: 1.047319769859314,grad_norm: 0.9999994293610884, iteration: 72657
loss: 1.0867524147033691,grad_norm: 0.9665775824721334, iteration: 72658
loss: 0.9837100505828857,grad_norm: 0.9999994542696862, iteration: 72659
loss: 0.9952970743179321,grad_norm: 0.8390784312860418, iteration: 72660
loss: 0.9894541501998901,grad_norm: 0.9999991060487281, iteration: 72661
loss: 1.0274252891540527,grad_norm: 0.9999993849711588, iteration: 72662
loss: 1.004780650138855,grad_norm: 0.9160818508046077, iteration: 72663
loss: 1.0879074335098267,grad_norm: 0.9999990149865207, iteration: 72664
loss: 1.0342381000518799,grad_norm: 0.867374848327135, iteration: 72665
loss: 1.0274361371994019,grad_norm: 0.9999996627563104, iteration: 72666
loss: 0.9869059920310974,grad_norm: 0.9444139969213439, iteration: 72667
loss: 1.0323642492294312,grad_norm: 0.9999991001477816, iteration: 72668
loss: 1.005699634552002,grad_norm: 0.9824868592537126, iteration: 72669
loss: 0.9986435770988464,grad_norm: 0.9327910538511296, iteration: 72670
loss: 1.0999906063079834,grad_norm: 0.9999996307457643, iteration: 72671
loss: 0.996825635433197,grad_norm: 0.9999990243417531, iteration: 72672
loss: 0.98649662733078,grad_norm: 0.8751772656615909, iteration: 72673
loss: 1.0176416635513306,grad_norm: 0.9656874814601066, iteration: 72674
loss: 0.9765663146972656,grad_norm: 0.9083251090708471, iteration: 72675
loss: 1.0760602951049805,grad_norm: 0.9908561792844877, iteration: 72676
loss: 0.9736515283584595,grad_norm: 0.7798734347920966, iteration: 72677
loss: 1.0144284963607788,grad_norm: 0.9999990799251646, iteration: 72678
loss: 0.9981318712234497,grad_norm: 0.9999991961795845, iteration: 72679
loss: 1.0094270706176758,grad_norm: 0.889603561884632, iteration: 72680
loss: 1.001501202583313,grad_norm: 0.9324308232472315, iteration: 72681
loss: 0.9847078323364258,grad_norm: 0.7592272302400083, iteration: 72682
loss: 1.009590983390808,grad_norm: 0.8622910297157773, iteration: 72683
loss: 1.0356115102767944,grad_norm: 0.9999991410244592, iteration: 72684
loss: 1.0089913606643677,grad_norm: 0.9999991616144251, iteration: 72685
loss: 1.0194129943847656,grad_norm: 0.8632833772678307, iteration: 72686
loss: 1.0188984870910645,grad_norm: 0.9985598040542625, iteration: 72687
loss: 1.0529974699020386,grad_norm: 0.9999993238001947, iteration: 72688
loss: 0.9973999261856079,grad_norm: 0.9999990440570421, iteration: 72689
loss: 1.0192832946777344,grad_norm: 0.9999991896884047, iteration: 72690
loss: 0.9659225344657898,grad_norm: 0.8649242328127456, iteration: 72691
loss: 0.9690465927124023,grad_norm: 0.9298749283535056, iteration: 72692
loss: 1.0086718797683716,grad_norm: 0.9999992135320117, iteration: 72693
loss: 1.040389060974121,grad_norm: 0.8352745396803801, iteration: 72694
loss: 0.969506025314331,grad_norm: 0.830945391546182, iteration: 72695
loss: 1.0237776041030884,grad_norm: 0.8567327305004298, iteration: 72696
loss: 1.027404546737671,grad_norm: 0.9999990618454935, iteration: 72697
loss: 1.0177290439605713,grad_norm: 0.9094000613617278, iteration: 72698
loss: 1.072522521018982,grad_norm: 0.9999991357737021, iteration: 72699
loss: 1.02365243434906,grad_norm: 0.9999999060550492, iteration: 72700
loss: 0.9820978045463562,grad_norm: 0.8618053479373302, iteration: 72701
loss: 0.9908722639083862,grad_norm: 0.9999990561081239, iteration: 72702
loss: 0.9748930335044861,grad_norm: 0.9008635125825861, iteration: 72703
loss: 1.0588319301605225,grad_norm: 0.9999990795275995, iteration: 72704
loss: 0.9834651947021484,grad_norm: 0.9999994694053808, iteration: 72705
loss: 1.0627186298370361,grad_norm: 0.9999991039649675, iteration: 72706
loss: 1.0028082132339478,grad_norm: 0.9344640939137242, iteration: 72707
loss: 1.0376605987548828,grad_norm: 0.999999665565046, iteration: 72708
loss: 1.0220403671264648,grad_norm: 0.9999992191201602, iteration: 72709
loss: 1.0930451154708862,grad_norm: 0.9999992893740315, iteration: 72710
loss: 1.0135916471481323,grad_norm: 0.8316270478540523, iteration: 72711
loss: 1.0492502450942993,grad_norm: 0.9321105101132151, iteration: 72712
loss: 1.0083582401275635,grad_norm: 0.995588037233324, iteration: 72713
loss: 1.030971884727478,grad_norm: 0.9999995511207116, iteration: 72714
loss: 0.9923356175422668,grad_norm: 0.9999991595597555, iteration: 72715
loss: 1.006144642829895,grad_norm: 0.9329074157066373, iteration: 72716
loss: 0.9930777549743652,grad_norm: 0.8484027614027245, iteration: 72717
loss: 0.9990617632865906,grad_norm: 0.9355853191246258, iteration: 72718
loss: 0.9908157587051392,grad_norm: 0.9063606142625124, iteration: 72719
loss: 1.0180951356887817,grad_norm: 0.9999997274483913, iteration: 72720
loss: 1.00789475440979,grad_norm: 0.9525215230793891, iteration: 72721
loss: 1.0097522735595703,grad_norm: 0.8824707948115534, iteration: 72722
loss: 1.1208218336105347,grad_norm: 0.9999990840014462, iteration: 72723
loss: 1.0409560203552246,grad_norm: 0.9999990870887602, iteration: 72724
loss: 1.0723928213119507,grad_norm: 0.999999120296382, iteration: 72725
loss: 1.0158264636993408,grad_norm: 0.8311295411899209, iteration: 72726
loss: 1.0283371210098267,grad_norm: 0.8381706960101264, iteration: 72727
loss: 1.0324360132217407,grad_norm: 0.9999989942107239, iteration: 72728
loss: 1.00078547000885,grad_norm: 0.9999992169604922, iteration: 72729
loss: 1.015496850013733,grad_norm: 0.854516638707139, iteration: 72730
loss: 0.9944607019424438,grad_norm: 0.7523029586398234, iteration: 72731
loss: 1.0906732082366943,grad_norm: 0.9999994110122548, iteration: 72732
loss: 1.0409358739852905,grad_norm: 0.9999992927805353, iteration: 72733
loss: 1.0344502925872803,grad_norm: 0.9999994679780885, iteration: 72734
loss: 1.0692040920257568,grad_norm: 0.9999995197501551, iteration: 72735
loss: 0.9742916822433472,grad_norm: 0.9999991009007035, iteration: 72736
loss: 1.0175433158874512,grad_norm: 0.999999291657279, iteration: 72737
loss: 0.9687789082527161,grad_norm: 0.9999990692478433, iteration: 72738
loss: 1.0365878343582153,grad_norm: 0.9999994131065116, iteration: 72739
loss: 1.005014181137085,grad_norm: 0.9999992142160209, iteration: 72740
loss: 1.0058056116104126,grad_norm: 0.9701871727717005, iteration: 72741
loss: 1.001858115196228,grad_norm: 0.9908683426338231, iteration: 72742
loss: 1.0285732746124268,grad_norm: 0.9999999184468775, iteration: 72743
loss: 1.0726827383041382,grad_norm: 0.9999993024652148, iteration: 72744
loss: 1.0488466024398804,grad_norm: 0.9660337810983117, iteration: 72745
loss: 1.0010968446731567,grad_norm: 0.881636929216885, iteration: 72746
loss: 0.9898548126220703,grad_norm: 0.9999993934578325, iteration: 72747
loss: 0.9798533916473389,grad_norm: 0.9999991686251906, iteration: 72748
loss: 1.0547552108764648,grad_norm: 0.9999991097193073, iteration: 72749
loss: 1.0181894302368164,grad_norm: 0.9999990355862461, iteration: 72750
loss: 1.0174504518508911,grad_norm: 0.9202245919496409, iteration: 72751
loss: 1.1113277673721313,grad_norm: 0.9999994032189665, iteration: 72752
loss: 1.0034821033477783,grad_norm: 0.933788276391103, iteration: 72753
loss: 1.1085830926895142,grad_norm: 0.9999991362036067, iteration: 72754
loss: 0.9841454029083252,grad_norm: 0.8740499592509372, iteration: 72755
loss: 1.0440518856048584,grad_norm: 0.9999997765375591, iteration: 72756
loss: 1.0671626329421997,grad_norm: 0.9768113791895529, iteration: 72757
loss: 1.1700544357299805,grad_norm: 0.9999999003419067, iteration: 72758
loss: 0.9840375185012817,grad_norm: 0.9999991970202293, iteration: 72759
loss: 1.0164090394973755,grad_norm: 0.8677870214202533, iteration: 72760
loss: 1.0292205810546875,grad_norm: 0.9572886402089246, iteration: 72761
loss: 1.016775131225586,grad_norm: 0.9140690108298007, iteration: 72762
loss: 0.9688780903816223,grad_norm: 0.9510022640163515, iteration: 72763
loss: 1.004667043685913,grad_norm: 0.999999129728241, iteration: 72764
loss: 0.9833340644836426,grad_norm: 0.9999992439317501, iteration: 72765
loss: 0.9901670217514038,grad_norm: 0.9999989597388919, iteration: 72766
loss: 1.1328608989715576,grad_norm: 0.9999995876750601, iteration: 72767
loss: 1.009435772895813,grad_norm: 0.9999991543481054, iteration: 72768
loss: 1.0112628936767578,grad_norm: 0.9219282243241756, iteration: 72769
loss: 0.9865309000015259,grad_norm: 0.8092318267513086, iteration: 72770
loss: 1.0241150856018066,grad_norm: 0.9069295876153456, iteration: 72771
loss: 0.9985054135322571,grad_norm: 0.9999991565091018, iteration: 72772
loss: 1.1336205005645752,grad_norm: 0.9999993305983609, iteration: 72773
loss: 0.9915343523025513,grad_norm: 0.8394272050561541, iteration: 72774
loss: 0.9970215559005737,grad_norm: 0.8596235904297425, iteration: 72775
loss: 0.9982353448867798,grad_norm: 0.8480675049550029, iteration: 72776
loss: 0.9717261791229248,grad_norm: 0.9999990317733645, iteration: 72777
loss: 0.999489426612854,grad_norm: 0.9999994550513478, iteration: 72778
loss: 1.0046592950820923,grad_norm: 0.9999990634859315, iteration: 72779
loss: 0.9921312928199768,grad_norm: 0.8785524404290473, iteration: 72780
loss: 1.0124508142471313,grad_norm: 0.9321462843066982, iteration: 72781
loss: 0.9999265074729919,grad_norm: 0.9587331520173045, iteration: 72782
loss: 0.9916678071022034,grad_norm: 0.8027409381198962, iteration: 72783
loss: 0.9842160940170288,grad_norm: 0.8786073109854391, iteration: 72784
loss: 1.0350462198257446,grad_norm: 0.9047897490573964, iteration: 72785
loss: 0.9984108805656433,grad_norm: 0.989023709079317, iteration: 72786
loss: 1.029293417930603,grad_norm: 0.9999995091016444, iteration: 72787
loss: 0.9876412153244019,grad_norm: 0.9556145346558372, iteration: 72788
loss: 1.021897315979004,grad_norm: 0.9108740787707886, iteration: 72789
loss: 1.13351309299469,grad_norm: 0.9999995598252847, iteration: 72790
loss: 1.0419068336486816,grad_norm: 0.9999992536182325, iteration: 72791
loss: 0.9762835502624512,grad_norm: 0.9801941633109427, iteration: 72792
loss: 0.9898281693458557,grad_norm: 0.9999991089580496, iteration: 72793
loss: 0.9529178142547607,grad_norm: 0.9318829571768134, iteration: 72794
loss: 0.994695782661438,grad_norm: 0.999999269326732, iteration: 72795
loss: 1.0464098453521729,grad_norm: 0.9999992446899247, iteration: 72796
loss: 1.0115396976470947,grad_norm: 0.9999998025226537, iteration: 72797
loss: 1.0071141719818115,grad_norm: 0.8458450858569745, iteration: 72798
loss: 1.0223228931427002,grad_norm: 0.9999999371932269, iteration: 72799
loss: 0.9939015507698059,grad_norm: 0.9999991331195214, iteration: 72800
loss: 0.9885029196739197,grad_norm: 0.9010133044565011, iteration: 72801
loss: 1.0163692235946655,grad_norm: 0.862031757066102, iteration: 72802
loss: 1.1162400245666504,grad_norm: 0.9999995431682123, iteration: 72803
loss: 0.9932634830474854,grad_norm: 0.8271785827229194, iteration: 72804
loss: 1.0204764604568481,grad_norm: 0.9999990095655943, iteration: 72805
loss: 1.0254390239715576,grad_norm: 0.9999991329134119, iteration: 72806
loss: 0.9649576544761658,grad_norm: 0.8975933847796721, iteration: 72807
loss: 1.0081877708435059,grad_norm: 0.9999991762847104, iteration: 72808
loss: 1.0432778596878052,grad_norm: 0.9999991127874417, iteration: 72809
loss: 0.976116418838501,grad_norm: 0.8412822263825825, iteration: 72810
loss: 1.0231441259384155,grad_norm: 0.9999990709609066, iteration: 72811
loss: 1.0557247400283813,grad_norm: 0.7179193162774232, iteration: 72812
loss: 0.9956627488136292,grad_norm: 0.8342806266146148, iteration: 72813
loss: 1.0561734437942505,grad_norm: 0.9974056205117282, iteration: 72814
loss: 1.0342644453048706,grad_norm: 0.9948933213328498, iteration: 72815
loss: 0.9804818630218506,grad_norm: 0.9379949502492041, iteration: 72816
loss: 0.9989243149757385,grad_norm: 0.7674498183667496, iteration: 72817
loss: 1.0295125246047974,grad_norm: 0.8438051782756419, iteration: 72818
loss: 0.9800781607627869,grad_norm: 0.9999990420007905, iteration: 72819
loss: 0.9797908663749695,grad_norm: 0.9999993663523074, iteration: 72820
loss: 0.9918109178543091,grad_norm: 0.9999993639211499, iteration: 72821
loss: 1.1388556957244873,grad_norm: 0.999999153400091, iteration: 72822
loss: 1.0297961235046387,grad_norm: 0.999999160282149, iteration: 72823
loss: 0.9946956634521484,grad_norm: 0.9339803218250231, iteration: 72824
loss: 0.9949025511741638,grad_norm: 0.9412422577344727, iteration: 72825
loss: 1.028790831565857,grad_norm: 0.999999554989434, iteration: 72826
loss: 1.0274101495742798,grad_norm: 0.9999990876514654, iteration: 72827
loss: 0.9966947436332703,grad_norm: 0.9999990952101583, iteration: 72828
loss: 1.0137709379196167,grad_norm: 0.9384704493068335, iteration: 72829
loss: 0.9844886660575867,grad_norm: 0.9570934202758706, iteration: 72830
loss: 1.0168988704681396,grad_norm: 0.9226455168654908, iteration: 72831
loss: 0.9819578528404236,grad_norm: 0.9069656783986301, iteration: 72832
loss: 1.0102031230926514,grad_norm: 0.9999996330116298, iteration: 72833
loss: 1.042044997215271,grad_norm: 0.9999991706600428, iteration: 72834
loss: 1.1689611673355103,grad_norm: 0.9999998941347781, iteration: 72835
loss: 0.9963865280151367,grad_norm: 0.9888641118990193, iteration: 72836
loss: 0.9922170639038086,grad_norm: 0.8630732480810954, iteration: 72837
loss: 1.0395992994308472,grad_norm: 0.8181640174280705, iteration: 72838
loss: 1.0069972276687622,grad_norm: 0.9426909786265979, iteration: 72839
loss: 1.0130852460861206,grad_norm: 0.9485245786206187, iteration: 72840
loss: 1.005268931388855,grad_norm: 0.9335186427740726, iteration: 72841
loss: 1.1290189027786255,grad_norm: 0.9999994432877133, iteration: 72842
loss: 0.9897297620773315,grad_norm: 0.9999991733385806, iteration: 72843
loss: 1.1053853034973145,grad_norm: 0.9999998359795049, iteration: 72844
loss: 0.9999540448188782,grad_norm: 0.8961360182038097, iteration: 72845
loss: 1.0537370443344116,grad_norm: 0.9999994497974085, iteration: 72846
loss: 0.9765212535858154,grad_norm: 0.9999993016702845, iteration: 72847
loss: 1.071894884109497,grad_norm: 0.9999998852059977, iteration: 72848
loss: 1.0367220640182495,grad_norm: 0.9928447402392655, iteration: 72849
loss: 1.008882999420166,grad_norm: 0.999999052763098, iteration: 72850
loss: 1.0924146175384521,grad_norm: 0.9999990978220679, iteration: 72851
loss: 0.983291506767273,grad_norm: 0.8952986307799143, iteration: 72852
loss: 1.0554615259170532,grad_norm: 0.999998984891142, iteration: 72853
loss: 0.9859786033630371,grad_norm: 0.9857236557621746, iteration: 72854
loss: 0.9755911231040955,grad_norm: 0.9162185566336041, iteration: 72855
loss: 1.0042970180511475,grad_norm: 0.9999992087250229, iteration: 72856
loss: 1.0045900344848633,grad_norm: 0.9999990862314379, iteration: 72857
loss: 1.027982234954834,grad_norm: 0.9999994415325018, iteration: 72858
loss: 1.0791670083999634,grad_norm: 0.9999993536026441, iteration: 72859
loss: 1.033298373222351,grad_norm: 0.9999988919483774, iteration: 72860
loss: 1.0252686738967896,grad_norm: 0.9561378046994459, iteration: 72861
loss: 1.0215182304382324,grad_norm: 0.9003162788753022, iteration: 72862
loss: 1.0222985744476318,grad_norm: 0.9999990910194959, iteration: 72863
loss: 1.041650414466858,grad_norm: 0.983664671257314, iteration: 72864
loss: 0.984622061252594,grad_norm: 0.9999990131726102, iteration: 72865
loss: 1.0020197629928589,grad_norm: 0.8833391886672988, iteration: 72866
loss: 1.031063199043274,grad_norm: 0.9387106311660264, iteration: 72867
loss: 1.0816090106964111,grad_norm: 0.9999992626625479, iteration: 72868
loss: 0.9856153130531311,grad_norm: 0.9999990281083274, iteration: 72869
loss: 1.005548119544983,grad_norm: 0.9999990748700344, iteration: 72870
loss: 0.9976796507835388,grad_norm: 0.9999991489837449, iteration: 72871
loss: 1.035370111465454,grad_norm: 0.9939503324516388, iteration: 72872
loss: 1.0386406183242798,grad_norm: 0.9999991562686531, iteration: 72873
loss: 0.9842028021812439,grad_norm: 0.9999993593496451, iteration: 72874
loss: 1.0228837728500366,grad_norm: 0.9243661565100117, iteration: 72875
loss: 1.0158480405807495,grad_norm: 0.903701579286287, iteration: 72876
loss: 0.998723030090332,grad_norm: 0.9999989444483509, iteration: 72877
loss: 0.9976247549057007,grad_norm: 0.9999993948519259, iteration: 72878
loss: 0.9906930923461914,grad_norm: 0.9814822940176682, iteration: 72879
loss: 1.1048498153686523,grad_norm: 0.9999998423893741, iteration: 72880
loss: 1.0280357599258423,grad_norm: 0.9556928827713875, iteration: 72881
loss: 1.0430673360824585,grad_norm: 0.9510797057239625, iteration: 72882
loss: 1.0344659090042114,grad_norm: 0.9999999132628513, iteration: 72883
loss: 1.0654211044311523,grad_norm: 0.999999597702992, iteration: 72884
loss: 1.0220688581466675,grad_norm: 0.8631436248928062, iteration: 72885
loss: 0.9849236607551575,grad_norm: 0.9044049130331661, iteration: 72886
loss: 1.0644725561141968,grad_norm: 0.9142413207284098, iteration: 72887
loss: 1.0038766860961914,grad_norm: 0.8405927928959638, iteration: 72888
loss: 1.1662256717681885,grad_norm: 0.9999991114495148, iteration: 72889
loss: 1.0424236059188843,grad_norm: 0.999999330700426, iteration: 72890
loss: 0.9869458675384521,grad_norm: 0.9132571564696117, iteration: 72891
loss: 0.9492421746253967,grad_norm: 0.8404121656186885, iteration: 72892
loss: 1.0192195177078247,grad_norm: 0.9999991050192196, iteration: 72893
loss: 0.9769247174263,grad_norm: 0.9192833544906441, iteration: 72894
loss: 0.9907833933830261,grad_norm: 0.9631706541993126, iteration: 72895
loss: 1.061320424079895,grad_norm: 0.999999492085478, iteration: 72896
loss: 1.0290286540985107,grad_norm: 0.9999997274188017, iteration: 72897
loss: 1.0384113788604736,grad_norm: 0.99999912510145, iteration: 72898
loss: 1.4991827011108398,grad_norm: 1.0000000004222112, iteration: 72899
loss: 0.9737852215766907,grad_norm: 0.9102894188289182, iteration: 72900
loss: 1.0551156997680664,grad_norm: 0.9999991787206747, iteration: 72901
loss: 1.035393238067627,grad_norm: 0.7837999349491985, iteration: 72902
loss: 0.9866002202033997,grad_norm: 0.8899674536737572, iteration: 72903
loss: 0.9879842400550842,grad_norm: 0.8412981236588525, iteration: 72904
loss: 0.982606053352356,grad_norm: 0.8003628735671464, iteration: 72905
loss: 1.0857279300689697,grad_norm: 0.9999994843180952, iteration: 72906
loss: 1.0104128122329712,grad_norm: 0.9573082551044323, iteration: 72907
loss: 1.0167789459228516,grad_norm: 0.9999998735830313, iteration: 72908
loss: 1.0067484378814697,grad_norm: 0.9999995246156071, iteration: 72909
loss: 1.064028263092041,grad_norm: 0.9999994088455599, iteration: 72910
loss: 1.0873534679412842,grad_norm: 0.9606339391788088, iteration: 72911
loss: 1.1744400262832642,grad_norm: 0.9999992578364529, iteration: 72912
loss: 1.213474988937378,grad_norm: 0.9999992109694164, iteration: 72913
loss: 1.4013330936431885,grad_norm: 0.999999959944718, iteration: 72914
loss: 1.0045650005340576,grad_norm: 0.9999996049392489, iteration: 72915
loss: 1.2906807661056519,grad_norm: 0.9999994064966087, iteration: 72916
loss: 1.0466092824935913,grad_norm: 0.9999996986225383, iteration: 72917
loss: 1.0796663761138916,grad_norm: 0.9999993388281202, iteration: 72918
loss: 1.030303955078125,grad_norm: 0.9095294766733464, iteration: 72919
loss: 1.090890645980835,grad_norm: 0.9999997635161497, iteration: 72920
loss: 1.0069952011108398,grad_norm: 0.9371883393782686, iteration: 72921
loss: 1.246953010559082,grad_norm: 0.9999996983567628, iteration: 72922
loss: 1.1262236833572388,grad_norm: 0.9999994767216779, iteration: 72923
loss: 1.0846233367919922,grad_norm: 0.9999992868568183, iteration: 72924
loss: 0.9808506965637207,grad_norm: 0.9999990769452427, iteration: 72925
loss: 1.1067050695419312,grad_norm: 0.9999990977072835, iteration: 72926
loss: 0.9921820163726807,grad_norm: 0.964586098093167, iteration: 72927
loss: 1.0428707599639893,grad_norm: 0.9999991665421156, iteration: 72928
loss: 1.254194974899292,grad_norm: 0.9999999108333052, iteration: 72929
loss: 1.0340149402618408,grad_norm: 0.9233893837806946, iteration: 72930
loss: 1.2567400932312012,grad_norm: 0.9999996517320385, iteration: 72931
loss: 1.0619231462478638,grad_norm: 0.9999997219086918, iteration: 72932
loss: 1.0318642854690552,grad_norm: 0.7897280688444049, iteration: 72933
loss: 1.027590036392212,grad_norm: 0.9999993345866478, iteration: 72934
loss: 1.0351600646972656,grad_norm: 0.9999993730746809, iteration: 72935
loss: 1.0864838361740112,grad_norm: 0.9999994428670603, iteration: 72936
loss: 1.014294147491455,grad_norm: 0.9999991391875429, iteration: 72937
loss: 0.9943298101425171,grad_norm: 0.9999990391974298, iteration: 72938
loss: 1.0123542547225952,grad_norm: 0.8100788005901348, iteration: 72939
loss: 1.0293209552764893,grad_norm: 0.9999992806313057, iteration: 72940
loss: 1.015028476715088,grad_norm: 0.9999996886888676, iteration: 72941
loss: 1.0603697299957275,grad_norm: 0.9999993328218492, iteration: 72942
loss: 1.0028512477874756,grad_norm: 0.9999991007382912, iteration: 72943
loss: 1.035996913909912,grad_norm: 0.999999614364748, iteration: 72944
loss: 1.081954002380371,grad_norm: 0.9999992743729854, iteration: 72945
loss: 0.9910346865653992,grad_norm: 0.9999992205381535, iteration: 72946
loss: 1.0802844762802124,grad_norm: 0.9999997206160537, iteration: 72947
loss: 1.0442439317703247,grad_norm: 0.9999995149610432, iteration: 72948
loss: 0.9998252987861633,grad_norm: 0.9183745581696146, iteration: 72949
loss: 0.9980430006980896,grad_norm: 0.9999995200278899, iteration: 72950
loss: 1.087929129600525,grad_norm: 0.9999992537496493, iteration: 72951
loss: 0.9921090602874756,grad_norm: 0.9999990341485604, iteration: 72952
loss: 1.1364384889602661,grad_norm: 0.9999997048045495, iteration: 72953
loss: 0.9999496936798096,grad_norm: 0.9999991108026755, iteration: 72954
loss: 0.9693852663040161,grad_norm: 0.999999196295024, iteration: 72955
loss: 1.0186458826065063,grad_norm: 0.9999992667508198, iteration: 72956
loss: 1.0938056707382202,grad_norm: 0.9999993980395164, iteration: 72957
loss: 1.1222712993621826,grad_norm: 0.9999995274305798, iteration: 72958
loss: 0.9942649006843567,grad_norm: 0.999999085748631, iteration: 72959
loss: 1.0413908958435059,grad_norm: 0.9999991921374133, iteration: 72960
loss: 1.1626237630844116,grad_norm: 0.9999999320561702, iteration: 72961
loss: 1.1254926919937134,grad_norm: 0.9999998057596128, iteration: 72962
loss: 1.0562031269073486,grad_norm: 0.9999996260849753, iteration: 72963
loss: 1.000566840171814,grad_norm: 0.8737244098385075, iteration: 72964
loss: 1.0420849323272705,grad_norm: 0.9999993412703487, iteration: 72965
loss: 1.0356031656265259,grad_norm: 0.9999996615208263, iteration: 72966
loss: 1.0213168859481812,grad_norm: 0.9999990665361669, iteration: 72967
loss: 0.981212854385376,grad_norm: 0.9999990625210513, iteration: 72968
loss: 1.0535444021224976,grad_norm: 0.9727411075135649, iteration: 72969
loss: 1.0704210996627808,grad_norm: 0.9999994370814146, iteration: 72970
loss: 1.0384820699691772,grad_norm: 0.9999998043793336, iteration: 72971
loss: 0.985043466091156,grad_norm: 0.9999991498887578, iteration: 72972
loss: 1.009817361831665,grad_norm: 0.8414681626193605, iteration: 72973
loss: 1.0006461143493652,grad_norm: 0.8094693267787502, iteration: 72974
loss: 1.038477897644043,grad_norm: 0.9999997237727866, iteration: 72975
loss: 1.0329734086990356,grad_norm: 0.9999992814039417, iteration: 72976
loss: 1.0460789203643799,grad_norm: 0.9999991776175382, iteration: 72977
loss: 0.967110276222229,grad_norm: 0.9999992766315982, iteration: 72978
loss: 1.0276120901107788,grad_norm: 0.9999990277953632, iteration: 72979
loss: 1.0654942989349365,grad_norm: 0.999999368214486, iteration: 72980
loss: 1.0582630634307861,grad_norm: 0.9999994430490557, iteration: 72981
loss: 1.0233500003814697,grad_norm: 0.9999992200697415, iteration: 72982
loss: 1.0320385694503784,grad_norm: 0.8614967040433043, iteration: 72983
loss: 1.0152630805969238,grad_norm: 0.9999994007093884, iteration: 72984
loss: 1.0242598056793213,grad_norm: 0.9673105559645461, iteration: 72985
loss: 1.1135376691818237,grad_norm: 0.9999993926989739, iteration: 72986
loss: 1.0594019889831543,grad_norm: 0.9999990243337481, iteration: 72987
loss: 1.0112215280532837,grad_norm: 0.9999991300422156, iteration: 72988
loss: 0.985155463218689,grad_norm: 0.9999995798762574, iteration: 72989
loss: 0.9766728281974792,grad_norm: 0.9999997709151803, iteration: 72990
loss: 0.9961335062980652,grad_norm: 0.8530281804874695, iteration: 72991
loss: 0.9918381571769714,grad_norm: 0.9288598486605495, iteration: 72992
loss: 1.0651530027389526,grad_norm: 0.99999975692056, iteration: 72993
loss: 1.0398873090744019,grad_norm: 0.9999991332266751, iteration: 72994
loss: 0.9974935054779053,grad_norm: 0.9079449252773696, iteration: 72995
loss: 1.1823328733444214,grad_norm: 0.999999871154018, iteration: 72996
loss: 0.990512490272522,grad_norm: 0.949531845165394, iteration: 72997
loss: 0.9813054203987122,grad_norm: 0.9999990042157312, iteration: 72998
loss: 1.1466162204742432,grad_norm: 0.9999997228376477, iteration: 72999
loss: 1.0010170936584473,grad_norm: 0.9760105829350696, iteration: 73000
loss: 1.0232185125350952,grad_norm: 0.9999997561798107, iteration: 73001
loss: 1.0692867040634155,grad_norm: 0.9999994554640169, iteration: 73002
loss: 1.022184133529663,grad_norm: 0.8510280033164447, iteration: 73003
loss: 0.9778969883918762,grad_norm: 0.8264706971476906, iteration: 73004
loss: 1.0738868713378906,grad_norm: 0.9999998729797089, iteration: 73005
loss: 1.0104634761810303,grad_norm: 0.9628901777761417, iteration: 73006
loss: 1.0340722799301147,grad_norm: 0.9173222477721559, iteration: 73007
loss: 0.964433491230011,grad_norm: 0.99999912522443, iteration: 73008
loss: 1.1087788343429565,grad_norm: 0.9999996314179961, iteration: 73009
loss: 0.9993805885314941,grad_norm: 0.999999581607744, iteration: 73010
loss: 1.0383579730987549,grad_norm: 0.9999991460744244, iteration: 73011
loss: 0.993593692779541,grad_norm: 0.9195048071681128, iteration: 73012
loss: 0.9950309991836548,grad_norm: 0.8604522565646257, iteration: 73013
loss: 1.0114123821258545,grad_norm: 0.8407945346821987, iteration: 73014
loss: 0.9937437176704407,grad_norm: 0.8561562487813973, iteration: 73015
loss: 0.9973681569099426,grad_norm: 0.9999992007443785, iteration: 73016
loss: 1.0747545957565308,grad_norm: 0.999999210382786, iteration: 73017
loss: 1.0213558673858643,grad_norm: 0.8701411177393092, iteration: 73018
loss: 0.9962882399559021,grad_norm: 0.9136828112203693, iteration: 73019
loss: 0.9920317530632019,grad_norm: 0.987741843432724, iteration: 73020
loss: 1.1120187044143677,grad_norm: 0.999999848388508, iteration: 73021
loss: 1.0432538986206055,grad_norm: 0.9999992872539354, iteration: 73022
loss: 1.0511418581008911,grad_norm: 0.9999999174719709, iteration: 73023
loss: 1.0782183408737183,grad_norm: 1.000000104206169, iteration: 73024
loss: 1.0577250719070435,grad_norm: 0.9999994843295422, iteration: 73025
loss: 0.9804881811141968,grad_norm: 0.9558550224701451, iteration: 73026
loss: 1.0736290216445923,grad_norm: 0.9999997129520399, iteration: 73027
loss: 0.9462729692459106,grad_norm: 0.897976215425139, iteration: 73028
loss: 1.0652714967727661,grad_norm: 0.9999995290383824, iteration: 73029
loss: 1.0113308429718018,grad_norm: 0.874319549873754, iteration: 73030
loss: 1.0876355171203613,grad_norm: 0.9999995712537684, iteration: 73031
loss: 1.4466805458068848,grad_norm: 0.999999808631097, iteration: 73032
loss: 1.2295469045639038,grad_norm: 0.9999998311106313, iteration: 73033
loss: 1.1583847999572754,grad_norm: 0.9999997139899244, iteration: 73034
loss: 1.024911880493164,grad_norm: 0.999999436395529, iteration: 73035
loss: 0.9707851409912109,grad_norm: 0.9999991254945805, iteration: 73036
loss: 1.0957889556884766,grad_norm: 0.9999998609360179, iteration: 73037
loss: 0.9866981506347656,grad_norm: 0.8347188215832371, iteration: 73038
loss: 1.0406701564788818,grad_norm: 0.9999992684330659, iteration: 73039
loss: 1.1525046825408936,grad_norm: 0.9999999014381618, iteration: 73040
loss: 1.1174880266189575,grad_norm: 0.9999993724047459, iteration: 73041
loss: 1.0081239938735962,grad_norm: 0.9999992296529544, iteration: 73042
loss: 1.0280665159225464,grad_norm: 0.9999993270791949, iteration: 73043
loss: 1.2332465648651123,grad_norm: 0.9999994182441858, iteration: 73044
loss: 1.0303208827972412,grad_norm: 0.9999992130010512, iteration: 73045
loss: 1.3679943084716797,grad_norm: 0.9999995766500579, iteration: 73046
loss: 1.1098638772964478,grad_norm: 0.999999089149023, iteration: 73047
loss: 1.0212558507919312,grad_norm: 0.9999990530402492, iteration: 73048
loss: 1.093992829322815,grad_norm: 0.9999998812427922, iteration: 73049
loss: 1.0291860103607178,grad_norm: 0.9999991191398919, iteration: 73050
loss: 1.0190016031265259,grad_norm: 0.9837941896964948, iteration: 73051
loss: 0.9963552951812744,grad_norm: 0.9446455627228223, iteration: 73052
loss: 1.1286675930023193,grad_norm: 0.9999998614689211, iteration: 73053
loss: 1.0080729722976685,grad_norm: 0.9360248607510228, iteration: 73054
loss: 1.0734797716140747,grad_norm: 0.9999990683507491, iteration: 73055
loss: 1.0023002624511719,grad_norm: 0.9679436736463075, iteration: 73056
loss: 1.297865629196167,grad_norm: 0.9999997404895524, iteration: 73057
loss: 1.0505928993225098,grad_norm: 0.999999413238456, iteration: 73058
loss: 1.0371125936508179,grad_norm: 0.9195908658312844, iteration: 73059
loss: 1.026618242263794,grad_norm: 0.9999993627529533, iteration: 73060
loss: 1.0484414100646973,grad_norm: 0.9999989650166778, iteration: 73061
loss: 0.9703641533851624,grad_norm: 0.9999990792980356, iteration: 73062
loss: 0.9914103746414185,grad_norm: 0.8800780102007387, iteration: 73063
loss: 1.126615285873413,grad_norm: 0.8932882626909057, iteration: 73064
loss: 1.1065765619277954,grad_norm: 0.9999998469027355, iteration: 73065
loss: 0.9980488419532776,grad_norm: 0.9386569363151591, iteration: 73066
loss: 1.0211677551269531,grad_norm: 0.9999999727387977, iteration: 73067
loss: 1.004459261894226,grad_norm: 0.9999992008749414, iteration: 73068
loss: 1.0117080211639404,grad_norm: 0.9965994035072814, iteration: 73069
loss: 1.0201150178909302,grad_norm: 0.9999990385878962, iteration: 73070
loss: 0.9934685826301575,grad_norm: 0.9129234936185027, iteration: 73071
loss: 1.000868320465088,grad_norm: 0.8746843628525011, iteration: 73072
loss: 1.0444974899291992,grad_norm: 0.9999996938653115, iteration: 73073
loss: 0.9923049211502075,grad_norm: 0.9999991797522322, iteration: 73074
loss: 1.0860786437988281,grad_norm: 0.9999996757183257, iteration: 73075
loss: 0.9808321595191956,grad_norm: 0.9999992432889182, iteration: 73076
loss: 0.9854037761688232,grad_norm: 0.9999996315678191, iteration: 73077
loss: 1.0069736242294312,grad_norm: 0.9609873877629002, iteration: 73078
loss: 1.008034110069275,grad_norm: 0.9999992524148477, iteration: 73079
loss: 1.0084298849105835,grad_norm: 0.9999998279011705, iteration: 73080
loss: 1.1065369844436646,grad_norm: 0.9999997108776748, iteration: 73081
loss: 1.0062391757965088,grad_norm: 0.941071829006211, iteration: 73082
loss: 1.024902582168579,grad_norm: 0.9999992013424451, iteration: 73083
loss: 0.9911346435546875,grad_norm: 0.9999991603020052, iteration: 73084
loss: 1.1036701202392578,grad_norm: 0.9999993734929451, iteration: 73085
loss: 1.0837812423706055,grad_norm: 0.9999991279702269, iteration: 73086
loss: 1.0462061166763306,grad_norm: 0.999999493489952, iteration: 73087
loss: 1.0221357345581055,grad_norm: 0.9999990481430922, iteration: 73088
loss: 1.0060640573501587,grad_norm: 0.9283713989990238, iteration: 73089
loss: 1.0713251829147339,grad_norm: 0.9999989989364837, iteration: 73090
loss: 1.0028736591339111,grad_norm: 0.9999992258790122, iteration: 73091
loss: 0.9860918521881104,grad_norm: 0.7381004976633565, iteration: 73092
loss: 1.0486887693405151,grad_norm: 0.9999991334448354, iteration: 73093
loss: 0.9867830872535706,grad_norm: 0.9964613074770137, iteration: 73094
loss: 1.0197217464447021,grad_norm: 0.843217073906183, iteration: 73095
loss: 1.0082350969314575,grad_norm: 0.9718776784175381, iteration: 73096
loss: 0.9486163854598999,grad_norm: 0.9999990636706592, iteration: 73097
loss: 1.0053977966308594,grad_norm: 0.9999994192424166, iteration: 73098
loss: 0.9639245271682739,grad_norm: 0.9999995053806398, iteration: 73099
loss: 1.0323853492736816,grad_norm: 0.9017347219095599, iteration: 73100
loss: 1.0053786039352417,grad_norm: 0.902724852149111, iteration: 73101
loss: 1.1222585439682007,grad_norm: 0.9999990875311129, iteration: 73102
loss: 1.0859235525131226,grad_norm: 0.9999994876513746, iteration: 73103
loss: 1.0020853281021118,grad_norm: 0.9999990686872233, iteration: 73104
loss: 1.0229523181915283,grad_norm: 0.9553811952208311, iteration: 73105
loss: 1.0245658159255981,grad_norm: 0.9999990442255787, iteration: 73106
loss: 1.1459696292877197,grad_norm: 0.9999997135678276, iteration: 73107
loss: 0.9850232005119324,grad_norm: 0.8519331485437206, iteration: 73108
loss: 1.0020500421524048,grad_norm: 0.8772942640141882, iteration: 73109
loss: 1.016892671585083,grad_norm: 0.8189524808254761, iteration: 73110
loss: 1.0010889768600464,grad_norm: 0.8414883398730754, iteration: 73111
loss: 0.996259331703186,grad_norm: 0.8811688302594095, iteration: 73112
loss: 0.9901018142700195,grad_norm: 0.8523014493652427, iteration: 73113
loss: 0.988304615020752,grad_norm: 0.9999989955556523, iteration: 73114
loss: 0.9862291812896729,grad_norm: 0.9999990910061416, iteration: 73115
loss: 0.9742175340652466,grad_norm: 0.9999995878946525, iteration: 73116
loss: 0.9775041937828064,grad_norm: 0.9999991589563202, iteration: 73117
loss: 1.0531880855560303,grad_norm: 0.9999993598076233, iteration: 73118
loss: 1.0158990621566772,grad_norm: 0.9413980350989325, iteration: 73119
loss: 1.0242704153060913,grad_norm: 0.6999056944376038, iteration: 73120
loss: 0.998201847076416,grad_norm: 0.877818436926372, iteration: 73121
loss: 0.9830506443977356,grad_norm: 0.9434990774742336, iteration: 73122
loss: 0.9900231957435608,grad_norm: 0.9055344524798543, iteration: 73123
loss: 1.0009551048278809,grad_norm: 0.9750832058083465, iteration: 73124
loss: 0.9894827008247375,grad_norm: 0.9711824208513938, iteration: 73125
loss: 0.9930487871170044,grad_norm: 0.9122880850785844, iteration: 73126
loss: 1.0235141515731812,grad_norm: 0.9999990325795188, iteration: 73127
loss: 1.0245882272720337,grad_norm: 0.854075358540135, iteration: 73128
loss: 1.0794477462768555,grad_norm: 0.9999999540969378, iteration: 73129
loss: 1.0417842864990234,grad_norm: 0.9131550621655427, iteration: 73130
loss: 1.0301731824874878,grad_norm: 0.9119759548677104, iteration: 73131
loss: 0.9459188580513,grad_norm: 0.8725318888775306, iteration: 73132
loss: 0.9795194268226624,grad_norm: 0.8287676146561601, iteration: 73133
loss: 1.0190205574035645,grad_norm: 0.9901689623011803, iteration: 73134
loss: 1.0198040008544922,grad_norm: 0.9999990663087172, iteration: 73135
loss: 1.0277248620986938,grad_norm: 0.9858167431188248, iteration: 73136
loss: 1.0035808086395264,grad_norm: 0.8662915555125456, iteration: 73137
loss: 1.0702855587005615,grad_norm: 0.9999997863980904, iteration: 73138
loss: 1.0434273481369019,grad_norm: 0.999999279144058, iteration: 73139
loss: 1.0521763563156128,grad_norm: 0.909675077191641, iteration: 73140
loss: 1.0124601125717163,grad_norm: 0.9999992546566655, iteration: 73141
loss: 0.9994299411773682,grad_norm: 0.8259112423120141, iteration: 73142
loss: 0.9918292760848999,grad_norm: 0.9244147712689598, iteration: 73143
loss: 1.0083959102630615,grad_norm: 0.8465317296742423, iteration: 73144
loss: 0.9860049486160278,grad_norm: 0.9999990158250163, iteration: 73145
loss: 0.996410071849823,grad_norm: 0.9126041073326477, iteration: 73146
loss: 0.9938428401947021,grad_norm: 0.9999992472823173, iteration: 73147
loss: 1.0412712097167969,grad_norm: 0.842345121712404, iteration: 73148
loss: 1.0061521530151367,grad_norm: 0.9764256566771075, iteration: 73149
loss: 0.9498738050460815,grad_norm: 0.999999156760999, iteration: 73150
loss: 0.9868940711021423,grad_norm: 0.8496165704348078, iteration: 73151
loss: 1.136859655380249,grad_norm: 0.999999986490717, iteration: 73152
loss: 1.0619505643844604,grad_norm: 0.9999993316806776, iteration: 73153
loss: 1.0420019626617432,grad_norm: 0.9202042356320242, iteration: 73154
loss: 1.0063444375991821,grad_norm: 0.999999126868718, iteration: 73155
loss: 1.0195714235305786,grad_norm: 0.8982024861632998, iteration: 73156
loss: 1.0193496942520142,grad_norm: 0.8069067583701461, iteration: 73157
loss: 1.0205333232879639,grad_norm: 0.9999992321774547, iteration: 73158
loss: 0.9742526412010193,grad_norm: 0.8512226214288224, iteration: 73159
loss: 1.0207847356796265,grad_norm: 0.9999991196797018, iteration: 73160
loss: 1.0123728513717651,grad_norm: 0.9999990898968532, iteration: 73161
loss: 1.025693416595459,grad_norm: 0.9999990672844568, iteration: 73162
loss: 0.9729270935058594,grad_norm: 0.8950172448766806, iteration: 73163
loss: 0.9868514537811279,grad_norm: 0.9513533251306577, iteration: 73164
loss: 1.0432363748550415,grad_norm: 0.9999993551411195, iteration: 73165
loss: 1.0104725360870361,grad_norm: 0.9999992687118823, iteration: 73166
loss: 0.9579062461853027,grad_norm: 0.7586953217910223, iteration: 73167
loss: 0.9710978865623474,grad_norm: 0.9390795124062151, iteration: 73168
loss: 0.9574685096740723,grad_norm: 0.9999991400959047, iteration: 73169
loss: 1.1253026723861694,grad_norm: 0.9999992998852736, iteration: 73170
loss: 1.0391454696655273,grad_norm: 0.9332831263327636, iteration: 73171
loss: 0.9711117744445801,grad_norm: 0.9999990479610845, iteration: 73172
loss: 1.0415003299713135,grad_norm: 0.8834804152111234, iteration: 73173
loss: 0.9712214469909668,grad_norm: 0.9493474720412906, iteration: 73174
loss: 0.9774188995361328,grad_norm: 0.886296675449112, iteration: 73175
loss: 0.9981977343559265,grad_norm: 0.979531936283039, iteration: 73176
loss: 1.0098103284835815,grad_norm: 0.968036103573228, iteration: 73177
loss: 1.0006749629974365,grad_norm: 0.9038077981761853, iteration: 73178
loss: 1.0110830068588257,grad_norm: 0.8444807907768269, iteration: 73179
loss: 1.0313818454742432,grad_norm: 0.9999991437794509, iteration: 73180
loss: 1.0144902467727661,grad_norm: 0.9999990647556684, iteration: 73181
loss: 1.0067802667617798,grad_norm: 0.999999378183003, iteration: 73182
loss: 1.0160976648330688,grad_norm: 0.9999998382397466, iteration: 73183
loss: 1.0132068395614624,grad_norm: 0.876413780495751, iteration: 73184
loss: 1.005187749862671,grad_norm: 0.9999992310844809, iteration: 73185
loss: 1.0510621070861816,grad_norm: 0.8875488173500883, iteration: 73186
loss: 0.989378809928894,grad_norm: 0.7849952170830561, iteration: 73187
loss: 1.022018313407898,grad_norm: 0.9999989769903467, iteration: 73188
loss: 1.0037115812301636,grad_norm: 0.9449762655059474, iteration: 73189
loss: 0.9804996848106384,grad_norm: 0.9999995216370162, iteration: 73190
loss: 0.9879381656646729,grad_norm: 0.8513313231731096, iteration: 73191
loss: 1.0300790071487427,grad_norm: 0.9999867553206572, iteration: 73192
loss: 0.9872448444366455,grad_norm: 0.998337585609894, iteration: 73193
loss: 0.9821617007255554,grad_norm: 0.9999996668411525, iteration: 73194
loss: 0.9972102046012878,grad_norm: 0.9999990339425647, iteration: 73195
loss: 1.0286692380905151,grad_norm: 0.9999992244232471, iteration: 73196
loss: 0.9905296564102173,grad_norm: 0.9902726260840087, iteration: 73197
loss: 1.0200366973876953,grad_norm: 0.8474699439359413, iteration: 73198
loss: 0.9750101566314697,grad_norm: 0.9999995851361976, iteration: 73199
loss: 1.026174545288086,grad_norm: 0.8803764869015707, iteration: 73200
loss: 1.0108838081359863,grad_norm: 0.9698284778152544, iteration: 73201
loss: 0.9892361760139465,grad_norm: 0.9999998075064654, iteration: 73202
loss: 0.9858984351158142,grad_norm: 0.9286601522642745, iteration: 73203
loss: 1.019777536392212,grad_norm: 0.9999990584665431, iteration: 73204
loss: 1.0806463956832886,grad_norm: 0.9999993227376905, iteration: 73205
loss: 0.9548268914222717,grad_norm: 0.9614449820363274, iteration: 73206
loss: 0.9968010783195496,grad_norm: 0.8906875405244715, iteration: 73207
loss: 1.0131651163101196,grad_norm: 0.9999992266230332, iteration: 73208
loss: 1.0031508207321167,grad_norm: 0.9999989500976048, iteration: 73209
loss: 1.0746835470199585,grad_norm: 0.9999995240208548, iteration: 73210
loss: 0.9843170642852783,grad_norm: 0.9999998666076144, iteration: 73211
loss: 1.0185755491256714,grad_norm: 0.9999991188533089, iteration: 73212
loss: 1.0044468641281128,grad_norm: 0.9485731388721661, iteration: 73213
loss: 1.01170015335083,grad_norm: 0.8759798565542586, iteration: 73214
loss: 1.0390032529830933,grad_norm: 0.8334781834522553, iteration: 73215
loss: 1.0451902151107788,grad_norm: 0.999998976404592, iteration: 73216
loss: 1.0428814888000488,grad_norm: 0.999999411965593, iteration: 73217
loss: 0.9605587720870972,grad_norm: 0.9574833969481809, iteration: 73218
loss: 1.0175553560256958,grad_norm: 0.8573349898624679, iteration: 73219
loss: 1.0123261213302612,grad_norm: 0.9587422716251903, iteration: 73220
loss: 1.0322074890136719,grad_norm: 0.7193693375379235, iteration: 73221
loss: 1.0136282444000244,grad_norm: 0.9999995269478161, iteration: 73222
loss: 1.0277884006500244,grad_norm: 0.9144933508977433, iteration: 73223
loss: 1.0004832744598389,grad_norm: 0.9182874714867686, iteration: 73224
loss: 1.002380132675171,grad_norm: 0.999999840967831, iteration: 73225
loss: 1.0017393827438354,grad_norm: 0.9625655924812911, iteration: 73226
loss: 0.9715004563331604,grad_norm: 0.9999989339405122, iteration: 73227
loss: 0.974239706993103,grad_norm: 0.8170093096578768, iteration: 73228
loss: 1.1313167810440063,grad_norm: 0.999999420973571, iteration: 73229
loss: 1.080926775932312,grad_norm: 0.9999995591702182, iteration: 73230
loss: 1.0369809865951538,grad_norm: 0.999999388631525, iteration: 73231
loss: 1.0012706518173218,grad_norm: 0.9663144074440649, iteration: 73232
loss: 0.988068163394928,grad_norm: 0.9999999665706183, iteration: 73233
loss: 1.0029526948928833,grad_norm: 0.8899835174272503, iteration: 73234
loss: 1.0592104196548462,grad_norm: 0.9999991895551603, iteration: 73235
loss: 1.0222676992416382,grad_norm: 0.8638651434722143, iteration: 73236
loss: 1.1311358213424683,grad_norm: 0.9999999560639952, iteration: 73237
loss: 1.0029789209365845,grad_norm: 0.9999996808909919, iteration: 73238
loss: 0.9944054484367371,grad_norm: 0.9416399925935468, iteration: 73239
loss: 1.0049928426742554,grad_norm: 0.8444306724523951, iteration: 73240
loss: 0.9832688570022583,grad_norm: 0.9317727013064511, iteration: 73241
loss: 0.9770317673683167,grad_norm: 0.99999903406865, iteration: 73242
loss: 1.002839207649231,grad_norm: 0.9485487384999863, iteration: 73243
loss: 0.9802705645561218,grad_norm: 0.999999297018094, iteration: 73244
loss: 0.9991546273231506,grad_norm: 0.846206343939483, iteration: 73245
loss: 0.9839423894882202,grad_norm: 0.9999991111405752, iteration: 73246
loss: 1.033021092414856,grad_norm: 0.9999992247318328, iteration: 73247
loss: 1.0191998481750488,grad_norm: 0.9999990479066224, iteration: 73248
loss: 1.0714194774627686,grad_norm: 0.9999997472111998, iteration: 73249
loss: 1.0093239545822144,grad_norm: 0.8275086518576574, iteration: 73250
loss: 0.976040780544281,grad_norm: 0.964481371796657, iteration: 73251
loss: 0.9737992286682129,grad_norm: 0.9999992319999123, iteration: 73252
loss: 1.000487208366394,grad_norm: 0.9524471451496723, iteration: 73253
loss: 1.0086780786514282,grad_norm: 0.9999996487944324, iteration: 73254
loss: 0.9933214783668518,grad_norm: 0.916970617237851, iteration: 73255
loss: 0.9802533388137817,grad_norm: 0.9999991606169871, iteration: 73256
loss: 0.9706893563270569,grad_norm: 0.9796017234967057, iteration: 73257
loss: 1.003793716430664,grad_norm: 0.9999991406206106, iteration: 73258
loss: 0.9892117381095886,grad_norm: 0.9999991269340789, iteration: 73259
loss: 1.004165530204773,grad_norm: 0.9999992957917773, iteration: 73260
loss: 0.9917081594467163,grad_norm: 0.7466914117698576, iteration: 73261
loss: 0.9851542115211487,grad_norm: 0.99999904038001, iteration: 73262
loss: 1.0404187440872192,grad_norm: 0.9999998970312893, iteration: 73263
loss: 0.9887014031410217,grad_norm: 0.8700505937049221, iteration: 73264
loss: 1.0394288301467896,grad_norm: 0.9999996560758257, iteration: 73265
loss: 1.0146143436431885,grad_norm: 0.9283967079763128, iteration: 73266
loss: 1.0388587713241577,grad_norm: 0.9224420756310794, iteration: 73267
loss: 1.027171015739441,grad_norm: 0.8691458879118475, iteration: 73268
loss: 0.9672377705574036,grad_norm: 0.8639139270970688, iteration: 73269
loss: 0.9753129482269287,grad_norm: 0.9881908329356994, iteration: 73270
loss: 1.0135027170181274,grad_norm: 0.8382551519899871, iteration: 73271
loss: 1.053832769393921,grad_norm: 0.8236002626595251, iteration: 73272
loss: 1.033087968826294,grad_norm: 0.9333061210757366, iteration: 73273
loss: 1.0574114322662354,grad_norm: 0.9999996804287336, iteration: 73274
loss: 1.0136041641235352,grad_norm: 0.9183610234115626, iteration: 73275
loss: 1.0703202486038208,grad_norm: 0.9887923347924287, iteration: 73276
loss: 0.9698644280433655,grad_norm: 0.9999994085823913, iteration: 73277
loss: 1.053080439567566,grad_norm: 0.9932858514871752, iteration: 73278
loss: 0.9964109659194946,grad_norm: 0.8657394549894897, iteration: 73279
loss: 1.0279653072357178,grad_norm: 0.9141233124146882, iteration: 73280
loss: 1.005309820175171,grad_norm: 0.9999991696056979, iteration: 73281
loss: 1.012121319770813,grad_norm: 0.9727800722937293, iteration: 73282
loss: 0.9735361337661743,grad_norm: 0.9999996027484124, iteration: 73283
loss: 1.0564578771591187,grad_norm: 0.99999964230878, iteration: 73284
loss: 1.0573253631591797,grad_norm: 0.9999990482877316, iteration: 73285
loss: 1.0621531009674072,grad_norm: 0.9999991518545523, iteration: 73286
loss: 1.004103183746338,grad_norm: 0.9999989908689308, iteration: 73287
loss: 1.0731428861618042,grad_norm: 0.9999993522455172, iteration: 73288
loss: 1.079588770866394,grad_norm: 0.9999992621661267, iteration: 73289
loss: 1.0371659994125366,grad_norm: 0.9999991455414636, iteration: 73290
loss: 0.9892535209655762,grad_norm: 0.9999990915310509, iteration: 73291
loss: 1.0081732273101807,grad_norm: 0.8848517646534171, iteration: 73292
loss: 1.0210953950881958,grad_norm: 0.93974422071349, iteration: 73293
loss: 0.9844408631324768,grad_norm: 0.9999994484357152, iteration: 73294
loss: 0.9970547556877136,grad_norm: 0.9651008487426807, iteration: 73295
loss: 0.9599283337593079,grad_norm: 0.999999261216508, iteration: 73296
loss: 0.9906212091445923,grad_norm: 0.9999998335353025, iteration: 73297
loss: 1.0246672630310059,grad_norm: 0.9999991472532288, iteration: 73298
loss: 0.9924591183662415,grad_norm: 0.9828920894417443, iteration: 73299
loss: 1.0754575729370117,grad_norm: 0.9999996048961892, iteration: 73300
loss: 1.0256305932998657,grad_norm: 0.9999991754310531, iteration: 73301
loss: 1.0140246152877808,grad_norm: 0.9999996380663836, iteration: 73302
loss: 1.031569004058838,grad_norm: 0.9999993998867256, iteration: 73303
loss: 1.0067646503448486,grad_norm: 0.9999998310550119, iteration: 73304
loss: 0.9844775795936584,grad_norm: 0.9999993973138197, iteration: 73305
loss: 0.937102735042572,grad_norm: 0.9312162212168369, iteration: 73306
loss: 1.0542771816253662,grad_norm: 0.9999994299472059, iteration: 73307
loss: 1.0222783088684082,grad_norm: 0.8726571622344351, iteration: 73308
loss: 1.0060230493545532,grad_norm: 0.9999991150107911, iteration: 73309
loss: 0.9933892488479614,grad_norm: 0.9998902774041726, iteration: 73310
loss: 1.0005544424057007,grad_norm: 0.9999990255481777, iteration: 73311
loss: 0.9891692399978638,grad_norm: 0.8584653918558122, iteration: 73312
loss: 0.9635231494903564,grad_norm: 0.9616687127668129, iteration: 73313
loss: 1.0036965608596802,grad_norm: 0.9918258370931504, iteration: 73314
loss: 1.0187333822250366,grad_norm: 0.9999989871205142, iteration: 73315
loss: 0.9968377351760864,grad_norm: 0.9923541387139769, iteration: 73316
loss: 1.0831960439682007,grad_norm: 0.9941286632570081, iteration: 73317
loss: 0.9907743334770203,grad_norm: 0.9903700235781514, iteration: 73318
loss: 0.9947386384010315,grad_norm: 0.956912103597393, iteration: 73319
loss: 0.999679684638977,grad_norm: 0.9460883441861625, iteration: 73320
loss: 1.0160690546035767,grad_norm: 0.999999653857971, iteration: 73321
loss: 0.9905199408531189,grad_norm: 0.9999993952273262, iteration: 73322
loss: 1.0056264400482178,grad_norm: 0.8683398279350845, iteration: 73323
loss: 0.9837057590484619,grad_norm: 0.9779410164852447, iteration: 73324
loss: 1.0176055431365967,grad_norm: 0.9531433940987482, iteration: 73325
loss: 0.9685524702072144,grad_norm: 0.9999992145643076, iteration: 73326
loss: 0.9967845678329468,grad_norm: 0.999999026648128, iteration: 73327
loss: 1.0758482217788696,grad_norm: 0.9999995989691988, iteration: 73328
loss: 1.0448616743087769,grad_norm: 0.8580351785303106, iteration: 73329
loss: 0.967925488948822,grad_norm: 0.9999996556359544, iteration: 73330
loss: 1.0156211853027344,grad_norm: 0.9505477889990567, iteration: 73331
loss: 1.0440629720687866,grad_norm: 0.9999993233370221, iteration: 73332
loss: 0.9999679327011108,grad_norm: 0.9999991805773984, iteration: 73333
loss: 1.0660631656646729,grad_norm: 0.9999996954309541, iteration: 73334
loss: 1.0130959749221802,grad_norm: 0.9999992555157203, iteration: 73335
loss: 0.9610806107521057,grad_norm: 0.8683069824877819, iteration: 73336
loss: 1.0031923055648804,grad_norm: 0.999999289749649, iteration: 73337
loss: 1.0085337162017822,grad_norm: 0.8720900047957844, iteration: 73338
loss: 1.017144799232483,grad_norm: 0.9999992410545411, iteration: 73339
loss: 0.9837164878845215,grad_norm: 0.8691805811224869, iteration: 73340
loss: 0.9850391745567322,grad_norm: 0.9999991040623506, iteration: 73341
loss: 0.990730881690979,grad_norm: 0.9999992480921124, iteration: 73342
loss: 0.9972110390663147,grad_norm: 0.8428001525158952, iteration: 73343
loss: 1.0024060010910034,grad_norm: 0.9030609501819102, iteration: 73344
loss: 1.0401133298873901,grad_norm: 0.9999994862621588, iteration: 73345
loss: 1.012105941772461,grad_norm: 0.9999989854353853, iteration: 73346
loss: 0.9884149432182312,grad_norm: 0.9732299542152696, iteration: 73347
loss: 1.0329021215438843,grad_norm: 0.9999990176362143, iteration: 73348
loss: 1.0638492107391357,grad_norm: 0.9999994841503292, iteration: 73349
loss: 1.0203834772109985,grad_norm: 0.9999993001776246, iteration: 73350
loss: 1.0162451267242432,grad_norm: 0.8842586734914147, iteration: 73351
loss: 0.9996355772018433,grad_norm: 0.9999989292282907, iteration: 73352
loss: 1.1195570230484009,grad_norm: 0.9999994422851168, iteration: 73353
loss: 1.1108179092407227,grad_norm: 0.9999998098166403, iteration: 73354
loss: 1.0984748601913452,grad_norm: 0.9999996028367131, iteration: 73355
loss: 1.0485137701034546,grad_norm: 0.9999994193497254, iteration: 73356
loss: 0.9782094955444336,grad_norm: 0.7724255011209081, iteration: 73357
loss: 0.9749165773391724,grad_norm: 0.9999993135047827, iteration: 73358
loss: 0.9868554472923279,grad_norm: 0.9999997806079083, iteration: 73359
loss: 1.0414879322052002,grad_norm: 0.9999989906458594, iteration: 73360
loss: 0.9835240244865417,grad_norm: 0.7910968546949083, iteration: 73361
loss: 0.9941531419754028,grad_norm: 0.9999990308504282, iteration: 73362
loss: 1.0105366706848145,grad_norm: 0.9183679577214395, iteration: 73363
loss: 1.022108554840088,grad_norm: 0.917342649582329, iteration: 73364
loss: 0.9299113750457764,grad_norm: 0.9185971640592432, iteration: 73365
loss: 0.9965013265609741,grad_norm: 0.999999176706602, iteration: 73366
loss: 0.9739975929260254,grad_norm: 0.9500681403815371, iteration: 73367
loss: 0.988756537437439,grad_norm: 0.9959614342774883, iteration: 73368
loss: 0.9825450778007507,grad_norm: 0.7766433196536167, iteration: 73369
loss: 1.0165431499481201,grad_norm: 0.9999991412048315, iteration: 73370
loss: 0.9760614037513733,grad_norm: 0.8870033775900191, iteration: 73371
loss: 1.012751579284668,grad_norm: 0.9999991497131366, iteration: 73372
loss: 1.053100824356079,grad_norm: 0.9999998237405134, iteration: 73373
loss: 1.0273290872573853,grad_norm: 0.8707009193339742, iteration: 73374
loss: 1.0133161544799805,grad_norm: 0.8164351125873222, iteration: 73375
loss: 0.9707003831863403,grad_norm: 0.9999989584949482, iteration: 73376
loss: 1.0117883682250977,grad_norm: 0.8695718838005612, iteration: 73377
loss: 1.0259557962417603,grad_norm: 0.9581420221998856, iteration: 73378
loss: 1.0741305351257324,grad_norm: 0.885806417498205, iteration: 73379
loss: 1.036154866218567,grad_norm: 0.9999997353621268, iteration: 73380
loss: 1.0432175397872925,grad_norm: 0.8588390502600186, iteration: 73381
loss: 0.9692450165748596,grad_norm: 0.9999996272209375, iteration: 73382
loss: 1.0516622066497803,grad_norm: 0.9999993081146832, iteration: 73383
loss: 1.0650047063827515,grad_norm: 0.9999991583656136, iteration: 73384
loss: 0.9938088655471802,grad_norm: 0.8887816418062269, iteration: 73385
loss: 1.048448085784912,grad_norm: 0.9999991310571378, iteration: 73386
loss: 0.9964693784713745,grad_norm: 0.9910103790350485, iteration: 73387
loss: 1.0145882368087769,grad_norm: 0.9936785873821223, iteration: 73388
loss: 1.0134522914886475,grad_norm: 0.9999990613773957, iteration: 73389
loss: 1.0091513395309448,grad_norm: 0.9065363762817871, iteration: 73390
loss: 1.007901906967163,grad_norm: 0.8366831795804711, iteration: 73391
loss: 1.059891939163208,grad_norm: 0.9999994987961829, iteration: 73392
loss: 1.0234484672546387,grad_norm: 0.8608926034577975, iteration: 73393
loss: 0.9949371218681335,grad_norm: 0.8049349461136107, iteration: 73394
loss: 0.9744177460670471,grad_norm: 0.9444562330681495, iteration: 73395
loss: 1.0559741258621216,grad_norm: 0.9999990660255758, iteration: 73396
loss: 0.9993571043014526,grad_norm: 0.8695090891331066, iteration: 73397
loss: 0.9856880307197571,grad_norm: 0.9999991222490079, iteration: 73398
loss: 1.0564864873886108,grad_norm: 0.9573012464889628, iteration: 73399
loss: 1.0202033519744873,grad_norm: 0.9099285851627579, iteration: 73400
loss: 0.9774278998374939,grad_norm: 0.9279292632353621, iteration: 73401
loss: 1.015799880027771,grad_norm: 0.9187354326631464, iteration: 73402
loss: 1.0075269937515259,grad_norm: 0.999999079337266, iteration: 73403
loss: 0.9887345433235168,grad_norm: 0.8484718450105802, iteration: 73404
loss: 0.9912328124046326,grad_norm: 0.9999990206663778, iteration: 73405
loss: 1.0254466533660889,grad_norm: 0.999999852182049, iteration: 73406
loss: 0.9728151559829712,grad_norm: 0.8450534842493285, iteration: 73407
loss: 1.0095068216323853,grad_norm: 0.9999994118869094, iteration: 73408
loss: 1.0336357355117798,grad_norm: 0.9999995234507486, iteration: 73409
loss: 0.9925691485404968,grad_norm: 0.9999991523828333, iteration: 73410
loss: 1.0392309427261353,grad_norm: 0.9773975198248892, iteration: 73411
loss: 1.0057334899902344,grad_norm: 0.9409610489448336, iteration: 73412
loss: 1.0078943967819214,grad_norm: 0.9400104873504056, iteration: 73413
loss: 1.0210784673690796,grad_norm: 0.9557035751640562, iteration: 73414
loss: 1.0505508184432983,grad_norm: 0.9999993742601894, iteration: 73415
loss: 0.935731828212738,grad_norm: 0.8576096752166678, iteration: 73416
loss: 1.0436676740646362,grad_norm: 0.8596394083527178, iteration: 73417
loss: 0.9827439188957214,grad_norm: 0.9999991201858098, iteration: 73418
loss: 1.026713252067566,grad_norm: 0.9999993837535045, iteration: 73419
loss: 1.0533552169799805,grad_norm: 0.9125998687284427, iteration: 73420
loss: 0.9978272914886475,grad_norm: 0.9999992651986801, iteration: 73421
loss: 1.0969330072402954,grad_norm: 0.9999994673735219, iteration: 73422
loss: 0.9950305819511414,grad_norm: 0.8498615788676952, iteration: 73423
loss: 0.9968224167823792,grad_norm: 0.9202507398804234, iteration: 73424
loss: 0.9951473474502563,grad_norm: 0.9804238385981877, iteration: 73425
loss: 0.9873355627059937,grad_norm: 0.8786054945207655, iteration: 73426
loss: 1.0282469987869263,grad_norm: 0.9359298770372869, iteration: 73427
loss: 0.9962563514709473,grad_norm: 0.9775652515442976, iteration: 73428
loss: 1.024514079093933,grad_norm: 0.9999991742217585, iteration: 73429
loss: 0.9982672929763794,grad_norm: 0.9999991243236911, iteration: 73430
loss: 0.9716194272041321,grad_norm: 0.8101005622471688, iteration: 73431
loss: 1.0164121389389038,grad_norm: 0.9999990396567685, iteration: 73432
loss: 0.9862836599349976,grad_norm: 0.9999996581386362, iteration: 73433
loss: 1.0009928941726685,grad_norm: 0.9999991273621383, iteration: 73434
loss: 1.0130207538604736,grad_norm: 0.8366924023184297, iteration: 73435
loss: 1.024361491203308,grad_norm: 0.9999998801698772, iteration: 73436
loss: 0.9964846968650818,grad_norm: 0.8546562405354784, iteration: 73437
loss: 1.0182665586471558,grad_norm: 0.9213077119163127, iteration: 73438
loss: 1.0112749338150024,grad_norm: 0.9603133277201895, iteration: 73439
loss: 1.0202014446258545,grad_norm: 0.9470086194860118, iteration: 73440
loss: 0.9664292335510254,grad_norm: 0.9999994930874804, iteration: 73441
loss: 1.049136757850647,grad_norm: 0.9999989505040227, iteration: 73442
loss: 1.012988567352295,grad_norm: 0.8361944273243966, iteration: 73443
loss: 0.9850819706916809,grad_norm: 0.9071598560064362, iteration: 73444
loss: 0.9922881722450256,grad_norm: 0.8976769902858882, iteration: 73445
loss: 1.0290888547897339,grad_norm: 0.9310813658508288, iteration: 73446
loss: 0.964568018913269,grad_norm: 0.947806781767635, iteration: 73447
loss: 0.9665300846099854,grad_norm: 0.8925418080692678, iteration: 73448
loss: 0.9629252552986145,grad_norm: 0.7905776652137948, iteration: 73449
loss: 0.9800639748573303,grad_norm: 0.9353454761769439, iteration: 73450
loss: 1.0111581087112427,grad_norm: 0.9999995522169356, iteration: 73451
loss: 1.0318379402160645,grad_norm: 0.9378096380955981, iteration: 73452
loss: 0.9912506937980652,grad_norm: 0.8523886115900902, iteration: 73453
loss: 0.9797601699829102,grad_norm: 0.9499121297880456, iteration: 73454
loss: 0.9806710481643677,grad_norm: 0.9999994548165352, iteration: 73455
loss: 0.9826955795288086,grad_norm: 0.9999993463822635, iteration: 73456
loss: 1.0714834928512573,grad_norm: 0.9999991431915556, iteration: 73457
loss: 1.0181010961532593,grad_norm: 0.8910976122857528, iteration: 73458
loss: 0.9778114557266235,grad_norm: 0.8780458899259653, iteration: 73459
loss: 0.967928946018219,grad_norm: 0.8933645345252639, iteration: 73460
loss: 1.0747509002685547,grad_norm: 0.9999992068548692, iteration: 73461
loss: 0.9861801862716675,grad_norm: 0.7811818562141064, iteration: 73462
loss: 0.9854572415351868,grad_norm: 0.9999990854061168, iteration: 73463
loss: 0.9791688323020935,grad_norm: 0.9999990985243825, iteration: 73464
loss: 1.0124452114105225,grad_norm: 0.9613329182123955, iteration: 73465
loss: 0.9879856705665588,grad_norm: 0.9188206262069399, iteration: 73466
loss: 1.0001977682113647,grad_norm: 0.8913006105791066, iteration: 73467
loss: 0.9453282952308655,grad_norm: 0.9746703388370866, iteration: 73468
loss: 1.0349218845367432,grad_norm: 0.756815177271869, iteration: 73469
loss: 0.9711287021636963,grad_norm: 0.9305817436658099, iteration: 73470
loss: 1.2936797142028809,grad_norm: 0.9999999237247512, iteration: 73471
loss: 0.9439267516136169,grad_norm: 0.9634782440594587, iteration: 73472
loss: 0.9837501645088196,grad_norm: 0.953780529519595, iteration: 73473
loss: 1.0187230110168457,grad_norm: 0.9737649253165954, iteration: 73474
loss: 1.0055345296859741,grad_norm: 0.9815478461010617, iteration: 73475
loss: 0.9669166207313538,grad_norm: 0.9829739838312782, iteration: 73476
loss: 1.0839781761169434,grad_norm: 0.9999993743642669, iteration: 73477
loss: 1.0262713432312012,grad_norm: 0.7863004094179671, iteration: 73478
loss: 1.0356038808822632,grad_norm: 0.9999991821032023, iteration: 73479
loss: 1.069076657295227,grad_norm: 0.9999992099353192, iteration: 73480
loss: 1.0022705793380737,grad_norm: 0.9999991682033794, iteration: 73481
loss: 0.9787446856498718,grad_norm: 0.9999990718916351, iteration: 73482
loss: 0.9569360613822937,grad_norm: 0.9999991292033791, iteration: 73483
loss: 1.0111801624298096,grad_norm: 0.8544721072653826, iteration: 73484
loss: 1.0114420652389526,grad_norm: 0.9711251445666055, iteration: 73485
loss: 0.9897831678390503,grad_norm: 0.9042340474608487, iteration: 73486
loss: 0.9747019410133362,grad_norm: 0.9999989153530339, iteration: 73487
loss: 1.0255283117294312,grad_norm: 0.8669232225543024, iteration: 73488
loss: 1.0080804824829102,grad_norm: 0.8664201000716296, iteration: 73489
loss: 1.0320000648498535,grad_norm: 0.9999990535888831, iteration: 73490
loss: 0.9588091373443604,grad_norm: 0.8358814495448241, iteration: 73491
loss: 0.9847457408905029,grad_norm: 0.9999990701235569, iteration: 73492
loss: 0.9730666875839233,grad_norm: 0.999999026509027, iteration: 73493
loss: 1.0179252624511719,grad_norm: 0.87828443978788, iteration: 73494
loss: 1.0205707550048828,grad_norm: 0.9999991118706221, iteration: 73495
loss: 1.0568101406097412,grad_norm: 0.9999995921628908, iteration: 73496
loss: 0.9756476283073425,grad_norm: 0.9999991271206803, iteration: 73497
loss: 0.9902124404907227,grad_norm: 0.9695424034216201, iteration: 73498
loss: 0.9746274352073669,grad_norm: 0.9666116342548078, iteration: 73499
loss: 0.9877238273620605,grad_norm: 0.9999992663497771, iteration: 73500
loss: 1.0067312717437744,grad_norm: 0.9050782792410712, iteration: 73501
loss: 1.069283366203308,grad_norm: 0.9999996663737125, iteration: 73502
loss: 1.0085480213165283,grad_norm: 0.8456715750643452, iteration: 73503
loss: 0.9655265212059021,grad_norm: 0.8869915158258325, iteration: 73504
loss: 0.9888803958892822,grad_norm: 0.99379584677619, iteration: 73505
loss: 1.0381712913513184,grad_norm: 0.9999994697026452, iteration: 73506
loss: 0.9637722969055176,grad_norm: 0.9999996804872452, iteration: 73507
loss: 0.9766332507133484,grad_norm: 0.9382036277694951, iteration: 73508
loss: 1.005926251411438,grad_norm: 0.9203485718744077, iteration: 73509
loss: 0.9789214730262756,grad_norm: 0.8066029583074751, iteration: 73510
loss: 1.0397008657455444,grad_norm: 0.9999993675836834, iteration: 73511
loss: 0.9728894829750061,grad_norm: 0.9999991296069121, iteration: 73512
loss: 1.022796392440796,grad_norm: 0.9999990450659829, iteration: 73513
loss: 0.9941517114639282,grad_norm: 0.9363320159167992, iteration: 73514
loss: 0.943884015083313,grad_norm: 0.999999194111375, iteration: 73515
loss: 1.0309903621673584,grad_norm: 0.9439192647880744, iteration: 73516
loss: 1.0298694372177124,grad_norm: 0.9556528801205054, iteration: 73517
loss: 0.9837930798530579,grad_norm: 0.9286953380574264, iteration: 73518
loss: 0.9948241710662842,grad_norm: 0.9999992271365782, iteration: 73519
loss: 1.0205280780792236,grad_norm: 0.9482790195656513, iteration: 73520
loss: 1.0085692405700684,grad_norm: 0.9999998093336755, iteration: 73521
loss: 1.0289416313171387,grad_norm: 0.999999586916499, iteration: 73522
loss: 1.0103204250335693,grad_norm: 0.8699421227962787, iteration: 73523
loss: 1.0296181440353394,grad_norm: 0.9999989520170343, iteration: 73524
loss: 1.0375510454177856,grad_norm: 0.9492917611650511, iteration: 73525
loss: 1.0211528539657593,grad_norm: 0.8548154389744838, iteration: 73526
loss: 0.9902916550636292,grad_norm: 0.9920802822844793, iteration: 73527
loss: 0.9752132892608643,grad_norm: 0.9699958246621014, iteration: 73528
loss: 0.9890603423118591,grad_norm: 0.9999991591992168, iteration: 73529
loss: 0.9878450036048889,grad_norm: 0.7803540674060992, iteration: 73530
loss: 1.0947579145431519,grad_norm: 0.9999998725615471, iteration: 73531
loss: 1.044031023979187,grad_norm: 0.950570842473137, iteration: 73532
loss: 1.0332072973251343,grad_norm: 0.9999988877218182, iteration: 73533
loss: 1.0112560987472534,grad_norm: 0.9629467551519989, iteration: 73534
loss: 1.0375068187713623,grad_norm: 0.9999993256047375, iteration: 73535
loss: 1.0304592847824097,grad_norm: 0.9999992561128965, iteration: 73536
loss: 0.9897184371948242,grad_norm: 0.999999205543007, iteration: 73537
loss: 1.0063015222549438,grad_norm: 0.9999992428515265, iteration: 73538
loss: 0.9942352771759033,grad_norm: 0.9999992904282707, iteration: 73539
loss: 1.0094971656799316,grad_norm: 0.9121743296098855, iteration: 73540
loss: 0.9782779812812805,grad_norm: 0.9809504144368181, iteration: 73541
loss: 0.9914008378982544,grad_norm: 0.8144246920993958, iteration: 73542
loss: 0.9880620837211609,grad_norm: 0.8840328920885928, iteration: 73543
loss: 1.015609622001648,grad_norm: 0.914214638923347, iteration: 73544
loss: 1.03164541721344,grad_norm: 0.9999995680301318, iteration: 73545
loss: 1.0025731325149536,grad_norm: 0.8612617665159032, iteration: 73546
loss: 1.0042210817337036,grad_norm: 0.7951720739236041, iteration: 73547
loss: 0.9784854650497437,grad_norm: 0.8035051290984191, iteration: 73548
loss: 0.962483823299408,grad_norm: 0.9999990242181658, iteration: 73549
loss: 1.0035794973373413,grad_norm: 0.9999993007952573, iteration: 73550
loss: 0.9782088994979858,grad_norm: 0.8248981025314781, iteration: 73551
loss: 1.0181978940963745,grad_norm: 0.9469006024377491, iteration: 73552
loss: 1.0053107738494873,grad_norm: 0.999999198897313, iteration: 73553
loss: 0.9712154865264893,grad_norm: 0.9343421602641702, iteration: 73554
loss: 1.0852335691452026,grad_norm: 0.9999995202167759, iteration: 73555
loss: 0.9623525142669678,grad_norm: 0.8543595184077728, iteration: 73556
loss: 0.9688442945480347,grad_norm: 0.9491736925228146, iteration: 73557
loss: 0.9664095640182495,grad_norm: 0.9999990909272305, iteration: 73558
loss: 1.0096486806869507,grad_norm: 0.8524455467101962, iteration: 73559
loss: 0.9872195720672607,grad_norm: 0.9999990453099388, iteration: 73560
loss: 1.012020230293274,grad_norm: 0.999999104031768, iteration: 73561
loss: 0.9934810400009155,grad_norm: 0.7568981933481905, iteration: 73562
loss: 0.9866530299186707,grad_norm: 0.9680906965259163, iteration: 73563
loss: 1.0116775035858154,grad_norm: 0.9999989197272793, iteration: 73564
loss: 1.0224978923797607,grad_norm: 0.8491894760294441, iteration: 73565
loss: 1.034865379333496,grad_norm: 0.9873554424084862, iteration: 73566
loss: 0.9929665923118591,grad_norm: 0.9999990535127639, iteration: 73567
loss: 1.0027793645858765,grad_norm: 0.9031227397208033, iteration: 73568
loss: 1.0095840692520142,grad_norm: 0.9999993619432486, iteration: 73569
loss: 1.0277332067489624,grad_norm: 0.8999832404993107, iteration: 73570
loss: 0.9389750957489014,grad_norm: 0.735001915754237, iteration: 73571
loss: 0.9720805883407593,grad_norm: 0.7704681792477549, iteration: 73572
loss: 1.0032049417495728,grad_norm: 0.9486481920392982, iteration: 73573
loss: 0.9786235094070435,grad_norm: 0.9939087598792605, iteration: 73574
loss: 1.025494933128357,grad_norm: 0.9999995301554291, iteration: 73575
loss: 0.9638761878013611,grad_norm: 0.9999992012889303, iteration: 73576
loss: 1.0153939723968506,grad_norm: 0.8599895172595136, iteration: 73577
loss: 1.022573471069336,grad_norm: 0.8531200225322372, iteration: 73578
loss: 0.9805037975311279,grad_norm: 0.7678003376476344, iteration: 73579
loss: 1.0347251892089844,grad_norm: 0.9999992264924368, iteration: 73580
loss: 1.0339405536651611,grad_norm: 0.9999992124844344, iteration: 73581
loss: 1.0390863418579102,grad_norm: 0.8958124799169611, iteration: 73582
loss: 0.9903823733329773,grad_norm: 0.9834229528382852, iteration: 73583
loss: 0.9875278472900391,grad_norm: 0.9999991831126939, iteration: 73584
loss: 1.0189671516418457,grad_norm: 0.9295536546092117, iteration: 73585
loss: 1.0405372381210327,grad_norm: 0.9063269188387268, iteration: 73586
loss: 0.9800264239311218,grad_norm: 0.9488679939504755, iteration: 73587
loss: 1.0503767728805542,grad_norm: 0.9999990817952945, iteration: 73588
loss: 0.9631947875022888,grad_norm: 0.9999989994513802, iteration: 73589
loss: 0.9816276431083679,grad_norm: 0.999999908010536, iteration: 73590
loss: 0.9994713664054871,grad_norm: 0.99999944801282, iteration: 73591
loss: 0.9985544681549072,grad_norm: 0.957398071098269, iteration: 73592
loss: 1.0211973190307617,grad_norm: 0.7961568374001363, iteration: 73593
loss: 1.0126912593841553,grad_norm: 0.743325714934048, iteration: 73594
loss: 0.9891050457954407,grad_norm: 0.8266908672231701, iteration: 73595
loss: 1.0072965621948242,grad_norm: 0.9289694747040294, iteration: 73596
loss: 1.0217564105987549,grad_norm: 0.8966712284054827, iteration: 73597
loss: 1.0212624073028564,grad_norm: 0.9692679864242228, iteration: 73598
loss: 1.0215705633163452,grad_norm: 0.953315384679044, iteration: 73599
loss: 0.9896711111068726,grad_norm: 0.9999992205010202, iteration: 73600
loss: 0.9961665272712708,grad_norm: 0.8579303291865102, iteration: 73601
loss: 0.9563626646995544,grad_norm: 0.9844028896433238, iteration: 73602
loss: 1.019567847251892,grad_norm: 0.9999993223636477, iteration: 73603
loss: 1.003501296043396,grad_norm: 0.821245730396641, iteration: 73604
loss: 1.000835657119751,grad_norm: 0.9999990563934266, iteration: 73605
loss: 1.0005944967269897,grad_norm: 0.9540802048498632, iteration: 73606
loss: 0.9730730652809143,grad_norm: 0.9999991306438354, iteration: 73607
loss: 1.0159766674041748,grad_norm: 0.9222717195392893, iteration: 73608
loss: 1.2542601823806763,grad_norm: 0.9999996623828837, iteration: 73609
loss: 1.0559484958648682,grad_norm: 0.9999996146091346, iteration: 73610
loss: 0.9859796166419983,grad_norm: 0.8363186135581441, iteration: 73611
loss: 1.0082347393035889,grad_norm: 0.9195982906064435, iteration: 73612
loss: 1.0201383829116821,grad_norm: 0.9376755603521851, iteration: 73613
loss: 1.0274302959442139,grad_norm: 0.999999207137969, iteration: 73614
loss: 1.0442715883255005,grad_norm: 0.9999992918253244, iteration: 73615
loss: 0.9913676977157593,grad_norm: 0.9221675788764869, iteration: 73616
loss: 0.9883803129196167,grad_norm: 0.9999991600114998, iteration: 73617
loss: 0.9981226921081543,grad_norm: 0.9908352894972187, iteration: 73618
loss: 0.9846358895301819,grad_norm: 0.9072021827356311, iteration: 73619
loss: 1.0127310752868652,grad_norm: 0.958029793216559, iteration: 73620
loss: 1.0030577182769775,grad_norm: 0.9999991758331275, iteration: 73621
loss: 1.0017396211624146,grad_norm: 0.9999992084619427, iteration: 73622
loss: 1.0190390348434448,grad_norm: 0.9048733140842008, iteration: 73623
loss: 0.9964687824249268,grad_norm: 0.9999994771081644, iteration: 73624
loss: 1.001336932182312,grad_norm: 0.9567672007442029, iteration: 73625
loss: 1.0185898542404175,grad_norm: 0.9649600320395122, iteration: 73626
loss: 0.9825723767280579,grad_norm: 0.9999990044379476, iteration: 73627
loss: 0.97553551197052,grad_norm: 0.9999991535920976, iteration: 73628
loss: 0.9906738996505737,grad_norm: 0.7977510306869551, iteration: 73629
loss: 1.0018303394317627,grad_norm: 0.8901265097149211, iteration: 73630
loss: 0.975371241569519,grad_norm: 0.9785908695320007, iteration: 73631
loss: 0.9857660531997681,grad_norm: 0.8700672041287617, iteration: 73632
loss: 1.0312458276748657,grad_norm: 0.9999994987446429, iteration: 73633
loss: 1.0180250406265259,grad_norm: 0.9999992724892582, iteration: 73634
loss: 0.9862386584281921,grad_norm: 0.7751442705868382, iteration: 73635
loss: 1.003722906112671,grad_norm: 0.7884389033795811, iteration: 73636
loss: 1.0252314805984497,grad_norm: 0.9999993867309659, iteration: 73637
loss: 1.0095338821411133,grad_norm: 0.9487728386317116, iteration: 73638
loss: 1.0604251623153687,grad_norm: 0.9999996936918977, iteration: 73639
loss: 1.00439453125,grad_norm: 0.8166026355253699, iteration: 73640
loss: 0.9990856051445007,grad_norm: 0.9996069464498817, iteration: 73641
loss: 1.0136371850967407,grad_norm: 0.8497582294038406, iteration: 73642
loss: 0.9845260381698608,grad_norm: 0.855860647453763, iteration: 73643
loss: 0.923862874507904,grad_norm: 0.8854982764830666, iteration: 73644
loss: 0.9971133470535278,grad_norm: 0.9999991849094699, iteration: 73645
loss: 0.9528738260269165,grad_norm: 0.956296163724062, iteration: 73646
loss: 1.0205309391021729,grad_norm: 0.9999991391393391, iteration: 73647
loss: 0.9819592237472534,grad_norm: 0.8605847068932185, iteration: 73648
loss: 0.9771226048469543,grad_norm: 0.7758762492001925, iteration: 73649
loss: 0.9691213965415955,grad_norm: 0.9207164981427185, iteration: 73650
loss: 0.9992566108703613,grad_norm: 0.9059123630706635, iteration: 73651
loss: 0.99993896484375,grad_norm: 0.9494773995664059, iteration: 73652
loss: 0.9919416308403015,grad_norm: 0.99999921071898, iteration: 73653
loss: 1.0267714262008667,grad_norm: 0.9999991307971788, iteration: 73654
loss: 1.0459920167922974,grad_norm: 0.9999995454817557, iteration: 73655
loss: 0.9949302077293396,grad_norm: 0.9700792548945721, iteration: 73656
loss: 1.019855260848999,grad_norm: 0.9999990193864207, iteration: 73657
loss: 0.9953981041908264,grad_norm: 0.8109582658778544, iteration: 73658
loss: 1.0375885963439941,grad_norm: 0.8727883753352945, iteration: 73659
loss: 0.9894781708717346,grad_norm: 0.9958189721263676, iteration: 73660
loss: 0.9818028211593628,grad_norm: 0.9999991824662888, iteration: 73661
loss: 0.9894813299179077,grad_norm: 0.9999991206432397, iteration: 73662
loss: 0.981259286403656,grad_norm: 0.8972311040201144, iteration: 73663
loss: 1.031051754951477,grad_norm: 0.8646264797841181, iteration: 73664
loss: 0.9894139766693115,grad_norm: 0.8711518478286813, iteration: 73665
loss: 1.0004456043243408,grad_norm: 0.9999991711610606, iteration: 73666
loss: 0.9712290167808533,grad_norm: 0.9999992472145294, iteration: 73667
loss: 0.9946533441543579,grad_norm: 0.8855407627963764, iteration: 73668
loss: 1.0091218948364258,grad_norm: 0.9999991638425121, iteration: 73669
loss: 1.0220621824264526,grad_norm: 0.8582903351600456, iteration: 73670
loss: 0.9620404243469238,grad_norm: 0.927887181304132, iteration: 73671
loss: 0.9779192805290222,grad_norm: 0.9999990261944643, iteration: 73672
loss: 1.0098117589950562,grad_norm: 0.8684644810972312, iteration: 73673
loss: 0.999989926815033,grad_norm: 0.7485222930686254, iteration: 73674
loss: 0.979358971118927,grad_norm: 0.8641372155585942, iteration: 73675
loss: 1.1609535217285156,grad_norm: 0.9081360924749422, iteration: 73676
loss: 0.9956901669502258,grad_norm: 0.954162566203039, iteration: 73677
loss: 0.967736542224884,grad_norm: 0.8066697178642526, iteration: 73678
loss: 1.0002083778381348,grad_norm: 0.8701661064319633, iteration: 73679
loss: 1.0099902153015137,grad_norm: 0.9999999068604625, iteration: 73680
loss: 1.0279948711395264,grad_norm: 0.9999990852278685, iteration: 73681
loss: 1.0038237571716309,grad_norm: 0.7911569022482277, iteration: 73682
loss: 1.0112541913986206,grad_norm: 0.7968244110947839, iteration: 73683
loss: 1.006115198135376,grad_norm: 0.899172050465936, iteration: 73684
loss: 1.0734776258468628,grad_norm: 0.999999614245742, iteration: 73685
loss: 0.9825149178504944,grad_norm: 0.9999992476812599, iteration: 73686
loss: 1.003525733947754,grad_norm: 0.9999991305478854, iteration: 73687
loss: 1.082549810409546,grad_norm: 0.9999998813181011, iteration: 73688
loss: 1.0090268850326538,grad_norm: 0.9999992627750918, iteration: 73689
loss: 1.0260008573532104,grad_norm: 0.9999990623971023, iteration: 73690
loss: 0.9532967209815979,grad_norm: 0.9999993314423524, iteration: 73691
loss: 0.9790787100791931,grad_norm: 0.9396313815695709, iteration: 73692
loss: 0.9596258997917175,grad_norm: 0.999999335894572, iteration: 73693
loss: 0.973921537399292,grad_norm: 0.7993729126760915, iteration: 73694
loss: 1.0115032196044922,grad_norm: 0.9999991190822008, iteration: 73695
loss: 1.0251225233078003,grad_norm: 0.9641416438060894, iteration: 73696
loss: 1.0041316747665405,grad_norm: 0.973837154633167, iteration: 73697
loss: 1.023332118988037,grad_norm: 0.9999993059680757, iteration: 73698
loss: 1.0288643836975098,grad_norm: 0.9040390693215062, iteration: 73699
loss: 0.9522383809089661,grad_norm: 0.9999999329460475, iteration: 73700
loss: 1.0037530660629272,grad_norm: 0.7804530662450673, iteration: 73701
loss: 0.9820343255996704,grad_norm: 0.9895268144904731, iteration: 73702
loss: 0.9527421593666077,grad_norm: 0.8690695366770824, iteration: 73703
loss: 0.9982980489730835,grad_norm: 0.9999989892268774, iteration: 73704
loss: 1.0217013359069824,grad_norm: 0.9588917278211448, iteration: 73705
loss: 1.0169819593429565,grad_norm: 0.9999990980641691, iteration: 73706
loss: 0.9808852672576904,grad_norm: 0.9999991157089848, iteration: 73707
loss: 1.0146182775497437,grad_norm: 0.9999991497174292, iteration: 73708
loss: 0.9992764592170715,grad_norm: 0.9999989711811925, iteration: 73709
loss: 0.9538877010345459,grad_norm: 0.9999991334301133, iteration: 73710
loss: 0.9853380918502808,grad_norm: 0.7833909862672359, iteration: 73711
loss: 1.0249215364456177,grad_norm: 0.8910733848489498, iteration: 73712
loss: 1.0020405054092407,grad_norm: 0.8912628216727628, iteration: 73713
loss: 0.9680181741714478,grad_norm: 0.9057712143817749, iteration: 73714
loss: 0.9764154553413391,grad_norm: 0.8402823125642297, iteration: 73715
loss: 1.0000382661819458,grad_norm: 0.9999992539312024, iteration: 73716
loss: 1.0872976779937744,grad_norm: 0.9999999317577352, iteration: 73717
loss: 0.9917961955070496,grad_norm: 0.7971096862048906, iteration: 73718
loss: 0.999603807926178,grad_norm: 0.9509763194252853, iteration: 73719
loss: 0.9864323735237122,grad_norm: 0.9999991236805666, iteration: 73720
loss: 0.9727780222892761,grad_norm: 0.8925463657966213, iteration: 73721
loss: 0.9744747281074524,grad_norm: 0.9353863281930784, iteration: 73722
loss: 1.018914818763733,grad_norm: 0.8975377373027406, iteration: 73723
loss: 1.0623750686645508,grad_norm: 0.9999990643080354, iteration: 73724
loss: 0.9784592986106873,grad_norm: 0.9999991822315218, iteration: 73725
loss: 1.0068913698196411,grad_norm: 0.987940476259953, iteration: 73726
loss: 1.0419563055038452,grad_norm: 0.9999996382915048, iteration: 73727
loss: 0.9773932695388794,grad_norm: 0.9134238691293922, iteration: 73728
loss: 0.9553812146186829,grad_norm: 0.9209706742268277, iteration: 73729
loss: 1.0268337726593018,grad_norm: 0.9236012720577813, iteration: 73730
loss: 1.0013644695281982,grad_norm: 0.9999994865063153, iteration: 73731
loss: 0.9531163573265076,grad_norm: 0.9999989376521439, iteration: 73732
loss: 0.9436425566673279,grad_norm: 0.761430023700205, iteration: 73733
loss: 1.0718222856521606,grad_norm: 0.8891309926413449, iteration: 73734
loss: 0.9914887547492981,grad_norm: 0.8788850515803206, iteration: 73735
loss: 0.9986017942428589,grad_norm: 0.8499151035625885, iteration: 73736
loss: 1.001563549041748,grad_norm: 0.9478211773780897, iteration: 73737
loss: 1.0137836933135986,grad_norm: 0.99999910291687, iteration: 73738
loss: 1.0220832824707031,grad_norm: 0.9999991306173573, iteration: 73739
loss: 1.0072964429855347,grad_norm: 0.9999990417097108, iteration: 73740
loss: 0.9567373394966125,grad_norm: 0.8974855468683748, iteration: 73741
loss: 1.0220564603805542,grad_norm: 0.953901779110418, iteration: 73742
loss: 0.9887310266494751,grad_norm: 0.9010923442716935, iteration: 73743
loss: 1.0199142694473267,grad_norm: 0.9636797607285295, iteration: 73744
loss: 1.0010266304016113,grad_norm: 0.9999991939164549, iteration: 73745
loss: 1.028333067893982,grad_norm: 0.9999989988643686, iteration: 73746
loss: 1.0169782638549805,grad_norm: 0.7825471513695199, iteration: 73747
loss: 1.0168817043304443,grad_norm: 0.8299457332416867, iteration: 73748
loss: 0.9991732239723206,grad_norm: 0.9701533919629006, iteration: 73749
loss: 0.9852885603904724,grad_norm: 0.999999163471721, iteration: 73750
loss: 1.0380421876907349,grad_norm: 0.9999991543077306, iteration: 73751
loss: 0.9728211760520935,grad_norm: 0.99999901612426, iteration: 73752
loss: 1.0329281091690063,grad_norm: 0.9016939871565062, iteration: 73753
loss: 1.026863932609558,grad_norm: 0.8573347364245666, iteration: 73754
loss: 0.9804719686508179,grad_norm: 0.9031738184565481, iteration: 73755
loss: 0.9959475994110107,grad_norm: 0.9999990620566582, iteration: 73756
loss: 0.9908030033111572,grad_norm: 0.9814927411479918, iteration: 73757
loss: 0.9832779765129089,grad_norm: 0.9999991377075541, iteration: 73758
loss: 0.9730036854743958,grad_norm: 0.8360772781619374, iteration: 73759
loss: 1.0431270599365234,grad_norm: 0.9999992449125285, iteration: 73760
loss: 1.014562726020813,grad_norm: 0.872291560604853, iteration: 73761
loss: 0.9704816937446594,grad_norm: 0.9999991582412763, iteration: 73762
loss: 1.0118639469146729,grad_norm: 0.999999054112225, iteration: 73763
loss: 1.0245215892791748,grad_norm: 0.890435959005563, iteration: 73764
loss: 1.0191998481750488,grad_norm: 0.9999997060304752, iteration: 73765
loss: 1.018566608428955,grad_norm: 0.9999990552454999, iteration: 73766
loss: 0.9881141781806946,grad_norm: 0.9999996924857623, iteration: 73767
loss: 1.000959038734436,grad_norm: 0.7533534449148183, iteration: 73768
loss: 1.0442733764648438,grad_norm: 0.7455966031026919, iteration: 73769
loss: 0.9833167195320129,grad_norm: 0.9283032449653638, iteration: 73770
loss: 0.9930436015129089,grad_norm: 0.8724919789991786, iteration: 73771
loss: 1.03759765625,grad_norm: 0.9999994906728128, iteration: 73772
loss: 0.9663413166999817,grad_norm: 0.9999990874336244, iteration: 73773
loss: 1.0288954973220825,grad_norm: 0.9999997835216897, iteration: 73774
loss: 0.9969190359115601,grad_norm: 0.9458234202246831, iteration: 73775
loss: 0.9861422777175903,grad_norm: 0.91269331666984, iteration: 73776
loss: 1.017163872718811,grad_norm: 0.9517793147026211, iteration: 73777
loss: 1.000481128692627,grad_norm: 0.8868164625850232, iteration: 73778
loss: 1.0224312543869019,grad_norm: 0.9608807922276896, iteration: 73779
loss: 1.0211619138717651,grad_norm: 0.9999993123247001, iteration: 73780
loss: 0.9947461485862732,grad_norm: 0.9025487860229262, iteration: 73781
loss: 1.012711524963379,grad_norm: 0.9711669699552392, iteration: 73782
loss: 1.0161765813827515,grad_norm: 0.999999298306377, iteration: 73783
loss: 1.0060698986053467,grad_norm: 0.8731200917477334, iteration: 73784
loss: 0.9771263003349304,grad_norm: 0.8398425439654887, iteration: 73785
loss: 0.9780556559562683,grad_norm: 0.9237643807415276, iteration: 73786
loss: 1.044040560722351,grad_norm: 0.939238373082002, iteration: 73787
loss: 1.0118576288223267,grad_norm: 0.8824737599004924, iteration: 73788
loss: 0.9990976452827454,grad_norm: 0.9612295535517905, iteration: 73789
loss: 0.9934019446372986,grad_norm: 0.8419470346020406, iteration: 73790
loss: 1.0232622623443604,grad_norm: 0.7904980500728399, iteration: 73791
loss: 0.9878273010253906,grad_norm: 0.9150110229646935, iteration: 73792
loss: 1.0476188659667969,grad_norm: 0.9999991027777699, iteration: 73793
loss: 0.9965223073959351,grad_norm: 0.9999989917920249, iteration: 73794
loss: 1.0043935775756836,grad_norm: 0.9999991135558401, iteration: 73795
loss: 1.0177075862884521,grad_norm: 0.8013666183186331, iteration: 73796
loss: 1.001431941986084,grad_norm: 0.999999105977491, iteration: 73797
loss: 0.9871949553489685,grad_norm: 0.9339968932158879, iteration: 73798
loss: 1.0777958631515503,grad_norm: 0.9999999461888623, iteration: 73799
loss: 1.0005930662155151,grad_norm: 0.9946607659020237, iteration: 73800
loss: 1.0426558256149292,grad_norm: 0.9999998687448949, iteration: 73801
loss: 1.0100085735321045,grad_norm: 0.9107573763751761, iteration: 73802
loss: 0.9986327290534973,grad_norm: 0.7999289533922421, iteration: 73803
loss: 1.0118184089660645,grad_norm: 0.9999991398730382, iteration: 73804
loss: 1.011715292930603,grad_norm: 0.9714889393747902, iteration: 73805
loss: 1.0500407218933105,grad_norm: 0.8998352653416923, iteration: 73806
loss: 0.9855200052261353,grad_norm: 0.9999991347657275, iteration: 73807
loss: 0.9705270528793335,grad_norm: 0.8827933137718008, iteration: 73808
loss: 0.9738529920578003,grad_norm: 0.8795796832445699, iteration: 73809
loss: 1.0105258226394653,grad_norm: 0.8921407129520128, iteration: 73810
loss: 1.0029948949813843,grad_norm: 0.9999990168139903, iteration: 73811
loss: 0.9897077679634094,grad_norm: 0.8386534406519552, iteration: 73812
loss: 1.0194875001907349,grad_norm: 0.9999992728523026, iteration: 73813
loss: 1.0430591106414795,grad_norm: 0.9751240940570788, iteration: 73814
loss: 0.9973613619804382,grad_norm: 0.9046095310375439, iteration: 73815
loss: 1.01319420337677,grad_norm: 0.7641810205517534, iteration: 73816
loss: 0.9646093845367432,grad_norm: 0.9729478339870968, iteration: 73817
loss: 1.0515440702438354,grad_norm: 0.9974914580332819, iteration: 73818
loss: 1.0146610736846924,grad_norm: 0.8255429286215312, iteration: 73819
loss: 0.9909676313400269,grad_norm: 0.7708203358827301, iteration: 73820
loss: 0.986979603767395,grad_norm: 0.8063662570662443, iteration: 73821
loss: 1.0423322916030884,grad_norm: 0.9999990756099563, iteration: 73822
loss: 0.9980025291442871,grad_norm: 0.9482015321936582, iteration: 73823
loss: 0.9922571182250977,grad_norm: 0.9565311639334715, iteration: 73824
loss: 1.005494236946106,grad_norm: 0.9999990131785124, iteration: 73825
loss: 1.0175095796585083,grad_norm: 0.7231876970747735, iteration: 73826
loss: 1.028737187385559,grad_norm: 0.9999994359891119, iteration: 73827
loss: 0.9793763756752014,grad_norm: 0.9670789457610963, iteration: 73828
loss: 1.0310654640197754,grad_norm: 0.9999992855947409, iteration: 73829
loss: 1.0147662162780762,grad_norm: 0.9999993397224569, iteration: 73830
loss: 1.122456669807434,grad_norm: 0.9999990286214897, iteration: 73831
loss: 0.9964492321014404,grad_norm: 0.8619658523321562, iteration: 73832
loss: 1.006496548652649,grad_norm: 0.9999993950362698, iteration: 73833
loss: 0.9762893319129944,grad_norm: 0.9999990636804323, iteration: 73834
loss: 0.9640140533447266,grad_norm: 0.8117608463913261, iteration: 73835
loss: 1.0109816789627075,grad_norm: 0.8282179842937097, iteration: 73836
loss: 0.9810537099838257,grad_norm: 0.9530244949289342, iteration: 73837
loss: 0.9967493414878845,grad_norm: 0.9479765008286247, iteration: 73838
loss: 0.9723522663116455,grad_norm: 0.8740167782158728, iteration: 73839
loss: 1.0327295064926147,grad_norm: 0.9715100199258876, iteration: 73840
loss: 0.9816027283668518,grad_norm: 0.9227817348444015, iteration: 73841
loss: 0.9979960918426514,grad_norm: 0.9118752044464778, iteration: 73842
loss: 1.0528944730758667,grad_norm: 0.99999986636314, iteration: 73843
loss: 1.0430922508239746,grad_norm: 0.9999991256028996, iteration: 73844
loss: 1.1347408294677734,grad_norm: 0.9999999760851315, iteration: 73845
loss: 1.0022798776626587,grad_norm: 0.999999859875503, iteration: 73846
loss: 0.9801404476165771,grad_norm: 0.9201088124064019, iteration: 73847
loss: 0.9859448671340942,grad_norm: 0.9999990462197055, iteration: 73848
loss: 1.0730420351028442,grad_norm: 0.9999991922107865, iteration: 73849
loss: 0.9466008543968201,grad_norm: 0.9756730150489376, iteration: 73850
loss: 1.0152955055236816,grad_norm: 0.9585179815714512, iteration: 73851
loss: 1.0033323764801025,grad_norm: 0.8147481786574648, iteration: 73852
loss: 1.1372987031936646,grad_norm: 0.9999999932975298, iteration: 73853
loss: 0.9799849390983582,grad_norm: 0.9160556878375749, iteration: 73854
loss: 1.0656355619430542,grad_norm: 0.9999990348232217, iteration: 73855
loss: 1.1722623109817505,grad_norm: 0.9080139341628808, iteration: 73856
loss: 1.1038333177566528,grad_norm: 0.9898114208782145, iteration: 73857
loss: 1.0053093433380127,grad_norm: 0.9230354523135273, iteration: 73858
loss: 1.0936498641967773,grad_norm: 0.9999995132068867, iteration: 73859
loss: 1.1123046875,grad_norm: 0.9999991714631222, iteration: 73860
loss: 0.9987379908561707,grad_norm: 0.9999990328907977, iteration: 73861
loss: 1.034536600112915,grad_norm: 0.902782624067892, iteration: 73862
loss: 1.0686900615692139,grad_norm: 0.99574552579104, iteration: 73863
loss: 1.039766550064087,grad_norm: 0.9999993424863697, iteration: 73864
loss: 1.0153586864471436,grad_norm: 0.8964764779729804, iteration: 73865
loss: 0.9981668591499329,grad_norm: 0.9251478056009615, iteration: 73866
loss: 0.9800452589988708,grad_norm: 0.9999993178326803, iteration: 73867
loss: 1.0748380422592163,grad_norm: 0.9999990250378233, iteration: 73868
loss: 1.0403640270233154,grad_norm: 0.9999992152954453, iteration: 73869
loss: 1.0010098218917847,grad_norm: 0.99999927221057, iteration: 73870
loss: 0.9983026385307312,grad_norm: 0.9781943497569128, iteration: 73871
loss: 1.0214259624481201,grad_norm: 0.9417495369210601, iteration: 73872
loss: 0.9746883511543274,grad_norm: 0.8895727238278693, iteration: 73873
loss: 1.0057430267333984,grad_norm: 0.8921723149123112, iteration: 73874
loss: 1.1548634767532349,grad_norm: 0.9999992009750964, iteration: 73875
loss: 1.0113799571990967,grad_norm: 0.8618806473413082, iteration: 73876
loss: 1.1650090217590332,grad_norm: 0.9999993098485093, iteration: 73877
loss: 1.0091943740844727,grad_norm: 0.885407600663541, iteration: 73878
loss: 0.9949541687965393,grad_norm: 0.9999990996001077, iteration: 73879
loss: 1.053939700126648,grad_norm: 0.9999998882377072, iteration: 73880
loss: 1.034155011177063,grad_norm: 0.9999997510424801, iteration: 73881
loss: 1.106071949005127,grad_norm: 0.9999995121635028, iteration: 73882
loss: 1.0310720205307007,grad_norm: 0.9737051684238779, iteration: 73883
loss: 1.0292831659317017,grad_norm: 0.9999997937530425, iteration: 73884
loss: 1.036925196647644,grad_norm: 0.9005641178430445, iteration: 73885
loss: 1.0162943601608276,grad_norm: 0.9999998661956784, iteration: 73886
loss: 1.030113697052002,grad_norm: 0.9999992112629092, iteration: 73887
loss: 0.9575339555740356,grad_norm: 0.9999990596052838, iteration: 73888
loss: 1.108406662940979,grad_norm: 0.9999994722473008, iteration: 73889
loss: 1.0176962614059448,grad_norm: 0.9999992864811174, iteration: 73890
loss: 1.0422126054763794,grad_norm: 0.9786688418450136, iteration: 73891
loss: 0.96372389793396,grad_norm: 0.9999990241057818, iteration: 73892
loss: 1.0609657764434814,grad_norm: 0.9999991429555654, iteration: 73893
loss: 1.0200815200805664,grad_norm: 0.9999990741895749, iteration: 73894
loss: 1.0158706903457642,grad_norm: 0.9999997583828629, iteration: 73895
loss: 1.027779221534729,grad_norm: 0.7988185919204297, iteration: 73896
loss: 0.9866239428520203,grad_norm: 0.9800597259257682, iteration: 73897
loss: 0.9872799515724182,grad_norm: 0.966848915207233, iteration: 73898
loss: 0.9989284873008728,grad_norm: 0.9999997563668749, iteration: 73899
loss: 0.9828034043312073,grad_norm: 0.969488486240893, iteration: 73900
loss: 1.033862590789795,grad_norm: 0.9427932098437178, iteration: 73901
loss: 1.0211747884750366,grad_norm: 0.9732050698279281, iteration: 73902
loss: 1.0466855764389038,grad_norm: 0.9999994710829656, iteration: 73903
loss: 1.0189812183380127,grad_norm: 0.8011886345279142, iteration: 73904
loss: 1.0227558612823486,grad_norm: 0.830738960028989, iteration: 73905
loss: 0.9929333329200745,grad_norm: 0.9999992120458048, iteration: 73906
loss: 1.0187216997146606,grad_norm: 0.9999996247154118, iteration: 73907
loss: 0.9911424517631531,grad_norm: 0.9651106441753322, iteration: 73908
loss: 1.055433750152588,grad_norm: 0.9999992848033887, iteration: 73909
loss: 1.0151898860931396,grad_norm: 0.8520212191536385, iteration: 73910
loss: 0.9641566872596741,grad_norm: 0.9999992279772975, iteration: 73911
loss: 1.0474458932876587,grad_norm: 0.9999990302662033, iteration: 73912
loss: 1.0935455560684204,grad_norm: 0.9431728068948337, iteration: 73913
loss: 1.0103414058685303,grad_norm: 0.9441816111090788, iteration: 73914
loss: 1.0279775857925415,grad_norm: 0.998363153099202, iteration: 73915
loss: 1.0063674449920654,grad_norm: 0.9719430942983579, iteration: 73916
loss: 1.0499889850616455,grad_norm: 0.9999992704301457, iteration: 73917
loss: 0.9941391348838806,grad_norm: 0.9999991284522429, iteration: 73918
loss: 1.1169464588165283,grad_norm: 0.9999997441803152, iteration: 73919
loss: 0.995545506477356,grad_norm: 0.860803633078181, iteration: 73920
loss: 0.9705470204353333,grad_norm: 0.8690572788915787, iteration: 73921
loss: 1.0118722915649414,grad_norm: 0.905812126518322, iteration: 73922
loss: 1.014284372329712,grad_norm: 0.9999994012424291, iteration: 73923
loss: 1.00747549533844,grad_norm: 0.9800975574541313, iteration: 73924
loss: 1.0056102275848389,grad_norm: 0.8155774380683856, iteration: 73925
loss: 1.0080294609069824,grad_norm: 0.9999990667199372, iteration: 73926
loss: 1.0375062227249146,grad_norm: 0.9999994475034202, iteration: 73927
loss: 0.986360490322113,grad_norm: 0.9999990644347878, iteration: 73928
loss: 1.0285518169403076,grad_norm: 0.9999989854316518, iteration: 73929
loss: 0.9881613850593567,grad_norm: 0.999999048181005, iteration: 73930
loss: 0.9750489592552185,grad_norm: 0.9999990538463276, iteration: 73931
loss: 1.005051851272583,grad_norm: 0.9999993816692652, iteration: 73932
loss: 1.0235371589660645,grad_norm: 0.8149948513381141, iteration: 73933
loss: 1.0256731510162354,grad_norm: 0.9390242030831513, iteration: 73934
loss: 1.0403659343719482,grad_norm: 0.9999997542226915, iteration: 73935
loss: 0.988203763961792,grad_norm: 0.9621466885127595, iteration: 73936
loss: 1.0663684606552124,grad_norm: 0.9999993807335203, iteration: 73937
loss: 0.9933936595916748,grad_norm: 0.8090416642949458, iteration: 73938
loss: 1.0298163890838623,grad_norm: 0.9655962641526507, iteration: 73939
loss: 0.9829630255699158,grad_norm: 0.9461283523997035, iteration: 73940
loss: 1.0348416566848755,grad_norm: 0.9745408546730486, iteration: 73941
loss: 1.03274667263031,grad_norm: 0.9999990776186523, iteration: 73942
loss: 1.0573748350143433,grad_norm: 0.999999760769585, iteration: 73943
loss: 1.0032340288162231,grad_norm: 0.908365235939898, iteration: 73944
loss: 1.006110668182373,grad_norm: 0.9999990754924736, iteration: 73945
loss: 0.9975299835205078,grad_norm: 0.9460932570104041, iteration: 73946
loss: 0.9808263778686523,grad_norm: 0.9999990825172045, iteration: 73947
loss: 1.0121136903762817,grad_norm: 0.9999990599984734, iteration: 73948
loss: 1.0497995615005493,grad_norm: 1.0000000080597964, iteration: 73949
loss: 1.032774567604065,grad_norm: 0.9999993387111061, iteration: 73950
loss: 1.0151432752609253,grad_norm: 0.9578066604399833, iteration: 73951
loss: 0.9976615309715271,grad_norm: 0.9216208114556114, iteration: 73952
loss: 0.9827352166175842,grad_norm: 0.9697674001700184, iteration: 73953
loss: 1.029004693031311,grad_norm: 0.9999992003813717, iteration: 73954
loss: 1.0253865718841553,grad_norm: 0.8445392622361925, iteration: 73955
loss: 1.0237759351730347,grad_norm: 0.8918798676974721, iteration: 73956
loss: 1.0438803434371948,grad_norm: 0.9999993545693066, iteration: 73957
loss: 1.0383379459381104,grad_norm: 0.7815206583080824, iteration: 73958
loss: 1.0195845365524292,grad_norm: 0.9884502202402646, iteration: 73959
loss: 0.994681179523468,grad_norm: 0.8587321797474493, iteration: 73960
loss: 1.0047593116760254,grad_norm: 0.9117005970432518, iteration: 73961
loss: 0.9720749855041504,grad_norm: 0.9201871002419678, iteration: 73962
loss: 1.0333483219146729,grad_norm: 0.9646090792796468, iteration: 73963
loss: 1.0080703496932983,grad_norm: 0.7999105624745335, iteration: 73964
loss: 1.0046290159225464,grad_norm: 0.999998952748338, iteration: 73965
loss: 1.0380698442459106,grad_norm: 0.9267409742666908, iteration: 73966
loss: 1.0108513832092285,grad_norm: 0.9754047080566458, iteration: 73967
loss: 1.0430573225021362,grad_norm: 0.999999247601372, iteration: 73968
loss: 0.9651781916618347,grad_norm: 0.985228840541461, iteration: 73969
loss: 1.1517658233642578,grad_norm: 0.9999999563440756, iteration: 73970
loss: 1.058127999305725,grad_norm: 0.9999995338478629, iteration: 73971
loss: 0.9904080033302307,grad_norm: 0.8632324261648973, iteration: 73972
loss: 1.0167341232299805,grad_norm: 0.8850575782322289, iteration: 73973
loss: 0.9995906949043274,grad_norm: 0.900173890949122, iteration: 73974
loss: 0.9952495098114014,grad_norm: 0.9999991700706538, iteration: 73975
loss: 1.0263540744781494,grad_norm: 0.9999994425654071, iteration: 73976
loss: 0.9991269707679749,grad_norm: 0.9999990116254506, iteration: 73977
loss: 0.9861924052238464,grad_norm: 0.8141979967570679, iteration: 73978
loss: 1.035951018333435,grad_norm: 0.9999993353949315, iteration: 73979
loss: 1.0186020135879517,grad_norm: 0.99999960439548, iteration: 73980
loss: 1.0593440532684326,grad_norm: 0.9999992379060022, iteration: 73981
loss: 1.0080817937850952,grad_norm: 0.7871617016872777, iteration: 73982
loss: 1.035983681678772,grad_norm: 0.9999992132909769, iteration: 73983
loss: 1.0013898611068726,grad_norm: 0.9574785429473401, iteration: 73984
loss: 1.0035483837127686,grad_norm: 0.9579870032141677, iteration: 73985
loss: 1.0052604675292969,grad_norm: 0.9219557643476961, iteration: 73986
loss: 1.045849323272705,grad_norm: 0.9999994020101798, iteration: 73987
loss: 1.0063292980194092,grad_norm: 0.9012040788903766, iteration: 73988
loss: 1.0465443134307861,grad_norm: 0.9999994808764836, iteration: 73989
loss: 1.0151978731155396,grad_norm: 0.9999992365567414, iteration: 73990
loss: 1.0000118017196655,grad_norm: 0.9714806693220427, iteration: 73991
loss: 0.9961467385292053,grad_norm: 0.9999992494996169, iteration: 73992
loss: 1.0218199491500854,grad_norm: 0.8711419571222011, iteration: 73993
loss: 1.0038968324661255,grad_norm: 0.999999226792933, iteration: 73994
loss: 1.022466778755188,grad_norm: 0.999999603719187, iteration: 73995
loss: 1.0100208520889282,grad_norm: 0.9463141326731186, iteration: 73996
loss: 1.016970157623291,grad_norm: 0.9999991688761084, iteration: 73997
loss: 1.024739384651184,grad_norm: 0.8461526304975072, iteration: 73998
loss: 1.0441969633102417,grad_norm: 0.9999992970840426, iteration: 73999
loss: 0.977771520614624,grad_norm: 0.9999991642684368, iteration: 74000
loss: 1.044924020767212,grad_norm: 0.9999993507860827, iteration: 74001
loss: 1.0063811540603638,grad_norm: 0.9999990915813918, iteration: 74002
loss: 1.0074392557144165,grad_norm: 0.960959981118551, iteration: 74003
loss: 0.9951460361480713,grad_norm: 0.8900791170645076, iteration: 74004
loss: 1.0629193782806396,grad_norm: 0.9999990242003327, iteration: 74005
loss: 0.9812584519386292,grad_norm: 0.8077069609982882, iteration: 74006
loss: 1.02479088306427,grad_norm: 0.9999991774046273, iteration: 74007
loss: 0.9982946515083313,grad_norm: 0.999998929455638, iteration: 74008
loss: 0.9758871793746948,grad_norm: 0.9648205984286015, iteration: 74009
loss: 1.0051809549331665,grad_norm: 0.9117803675060626, iteration: 74010
loss: 1.0180281400680542,grad_norm: 0.9999992812826055, iteration: 74011
loss: 1.0219523906707764,grad_norm: 0.9738269695335396, iteration: 74012
loss: 1.0164780616760254,grad_norm: 0.9999998816086407, iteration: 74013
loss: 1.0089399814605713,grad_norm: 0.8391671253005845, iteration: 74014
loss: 0.9925317764282227,grad_norm: 0.9675966872030219, iteration: 74015
loss: 0.9642931818962097,grad_norm: 0.9999991192076583, iteration: 74016
loss: 0.983529269695282,grad_norm: 0.8558952690563237, iteration: 74017
loss: 1.073351502418518,grad_norm: 0.9999992703507988, iteration: 74018
loss: 0.9844298958778381,grad_norm: 0.8720064399693521, iteration: 74019
loss: 0.9825828075408936,grad_norm: 0.8613516649607187, iteration: 74020
loss: 0.9970033168792725,grad_norm: 0.9999998129618412, iteration: 74021
loss: 1.0394108295440674,grad_norm: 0.9999994816104664, iteration: 74022
loss: 1.0504851341247559,grad_norm: 0.9999992253946353, iteration: 74023
loss: 1.0308622121810913,grad_norm: 0.9938133398596009, iteration: 74024
loss: 0.9963736534118652,grad_norm: 0.9999996886394092, iteration: 74025
loss: 1.0222949981689453,grad_norm: 0.9999994092706163, iteration: 74026
loss: 1.0055752992630005,grad_norm: 0.9999992778142294, iteration: 74027
loss: 0.9771996736526489,grad_norm: 0.8678248452217838, iteration: 74028
loss: 1.0192779302597046,grad_norm: 0.9999997778606009, iteration: 74029
loss: 1.0011276006698608,grad_norm: 0.9375896303745058, iteration: 74030
loss: 0.9855101704597473,grad_norm: 0.9867881872476993, iteration: 74031
loss: 0.9827956557273865,grad_norm: 0.9044066218771456, iteration: 74032
loss: 1.0508112907409668,grad_norm: 0.9116935684500145, iteration: 74033
loss: 1.035552740097046,grad_norm: 0.9999991567657579, iteration: 74034
loss: 0.9907155632972717,grad_norm: 0.9508058252845205, iteration: 74035
loss: 1.0110081434249878,grad_norm: 0.8241422137163661, iteration: 74036
loss: 1.0432699918746948,grad_norm: 0.8971121654995426, iteration: 74037
loss: 1.0340996980667114,grad_norm: 0.9736070115191372, iteration: 74038
loss: 1.0710225105285645,grad_norm: 0.9914505177513954, iteration: 74039
loss: 1.081345796585083,grad_norm: 0.999999136192051, iteration: 74040
loss: 1.0783607959747314,grad_norm: 0.9999994367737326, iteration: 74041
loss: 1.034645438194275,grad_norm: 0.9634258330795559, iteration: 74042
loss: 0.9939213991165161,grad_norm: 0.9999997430974694, iteration: 74043
loss: 1.0337191820144653,grad_norm: 0.9999996251902242, iteration: 74044
loss: 1.032385230064392,grad_norm: 0.9999998659145779, iteration: 74045
loss: 1.1474182605743408,grad_norm: 0.9999998883812324, iteration: 74046
loss: 1.0946491956710815,grad_norm: 0.9999999393702936, iteration: 74047
loss: 1.1063199043273926,grad_norm: 0.9999993178418628, iteration: 74048
loss: 1.3545074462890625,grad_norm: 1.0000000491127616, iteration: 74049
loss: 1.4970932006835938,grad_norm: 0.9999999579814728, iteration: 74050
loss: 1.2390661239624023,grad_norm: 0.9999997968160319, iteration: 74051
loss: 1.1795865297317505,grad_norm: 0.9999996310634627, iteration: 74052
loss: 1.2508814334869385,grad_norm: 0.9999997172864188, iteration: 74053
loss: 1.2199530601501465,grad_norm: 1.0000000347925948, iteration: 74054
loss: 1.0889273881912231,grad_norm: 0.999999956123509, iteration: 74055
loss: 1.0323668718338013,grad_norm: 0.9999996372095926, iteration: 74056
loss: 1.1308157444000244,grad_norm: 0.9999999922880478, iteration: 74057
loss: 1.0754138231277466,grad_norm: 0.9999993601415861, iteration: 74058
loss: 1.0033810138702393,grad_norm: 0.9999991134610026, iteration: 74059
loss: 0.9798989295959473,grad_norm: 0.9999992038910411, iteration: 74060
loss: 0.9621315598487854,grad_norm: 0.9926235344012809, iteration: 74061
loss: 1.0689226388931274,grad_norm: 0.999999548051681, iteration: 74062
loss: 1.020432472229004,grad_norm: 0.7260509525378896, iteration: 74063
loss: 1.0684365034103394,grad_norm: 0.9999993375290042, iteration: 74064
loss: 0.9731378555297852,grad_norm: 0.999999226439122, iteration: 74065
loss: 1.0539259910583496,grad_norm: 0.9999992865048252, iteration: 74066
loss: 1.010239839553833,grad_norm: 0.9475750245008658, iteration: 74067
loss: 1.0198668241500854,grad_norm: 0.9099264619725781, iteration: 74068
loss: 1.0373857021331787,grad_norm: 0.9999994767593211, iteration: 74069
loss: 0.9914652705192566,grad_norm: 0.950651702120625, iteration: 74070
loss: 1.0049192905426025,grad_norm: 0.991992747966892, iteration: 74071
loss: 1.0774816274642944,grad_norm: 0.999999957603503, iteration: 74072
loss: 1.0692299604415894,grad_norm: 0.9457771785176493, iteration: 74073
loss: 1.0027986764907837,grad_norm: 0.9999997936742437, iteration: 74074
loss: 1.0131782293319702,grad_norm: 0.8666879823084124, iteration: 74075
loss: 1.0057165622711182,grad_norm: 0.8794086041248645, iteration: 74076
loss: 0.9915144443511963,grad_norm: 0.7863953212828896, iteration: 74077
loss: 1.0120515823364258,grad_norm: 0.9999991294422256, iteration: 74078
loss: 1.0144845247268677,grad_norm: 0.9999993550383103, iteration: 74079
loss: 0.968299925327301,grad_norm: 0.999999207958733, iteration: 74080
loss: 0.9843637347221375,grad_norm: 0.9739486015099306, iteration: 74081
loss: 1.0420082807540894,grad_norm: 0.9999991891749096, iteration: 74082
loss: 1.1077892780303955,grad_norm: 0.8519033941688788, iteration: 74083
loss: 0.9628600478172302,grad_norm: 0.8192952881978648, iteration: 74084
loss: 1.0769857168197632,grad_norm: 0.9838963303327426, iteration: 74085
loss: 1.043158769607544,grad_norm: 0.9999991085622635, iteration: 74086
loss: 1.0601012706756592,grad_norm: 0.894191266835446, iteration: 74087
loss: 1.0505516529083252,grad_norm: 0.999999053475005, iteration: 74088
loss: 1.046019434928894,grad_norm: 0.9999992446557548, iteration: 74089
loss: 0.9688137173652649,grad_norm: 0.9123276342140216, iteration: 74090
loss: 1.0246065855026245,grad_norm: 0.9999991821329567, iteration: 74091
loss: 1.0353997945785522,grad_norm: 0.9999990482013527, iteration: 74092
loss: 1.0418610572814941,grad_norm: 0.8958809980120293, iteration: 74093
loss: 1.0540014505386353,grad_norm: 0.9999992076258115, iteration: 74094
loss: 1.1050937175750732,grad_norm: 0.9999993505238325, iteration: 74095
loss: 1.2225314378738403,grad_norm: 0.9999998957530024, iteration: 74096
loss: 1.0813958644866943,grad_norm: 0.9051839496289513, iteration: 74097
loss: 1.0093382596969604,grad_norm: 0.8786110911330447, iteration: 74098
loss: 1.0623356103897095,grad_norm: 0.9999991000608903, iteration: 74099
loss: 1.0409659147262573,grad_norm: 0.999999158001981, iteration: 74100
loss: 0.9913378953933716,grad_norm: 0.9999991099092341, iteration: 74101
loss: 0.9785432815551758,grad_norm: 0.9185980513325966, iteration: 74102
loss: 1.0029770135879517,grad_norm: 0.9002748333973164, iteration: 74103
loss: 1.0201144218444824,grad_norm: 0.9999990158224633, iteration: 74104
loss: 1.2054070234298706,grad_norm: 0.9390623789378791, iteration: 74105
loss: 1.053428292274475,grad_norm: 0.984187117007208, iteration: 74106
loss: 1.0086860656738281,grad_norm: 0.932178749221253, iteration: 74107
loss: 1.108885645866394,grad_norm: 0.9999990079566312, iteration: 74108
loss: 0.986475944519043,grad_norm: 0.8322821841002119, iteration: 74109
loss: 1.0846574306488037,grad_norm: 0.9999992752275815, iteration: 74110
loss: 1.0432260036468506,grad_norm: 0.8660620802700392, iteration: 74111
loss: 0.9815157651901245,grad_norm: 0.938346259104054, iteration: 74112
loss: 1.0126107931137085,grad_norm: 0.8667226028579742, iteration: 74113
loss: 1.1486725807189941,grad_norm: 0.9999996862668384, iteration: 74114
loss: 1.0086965560913086,grad_norm: 0.9646837378293841, iteration: 74115
loss: 0.9862930178642273,grad_norm: 0.9999990915100843, iteration: 74116
loss: 1.0457653999328613,grad_norm: 0.9742014454031503, iteration: 74117
loss: 1.0408204793930054,grad_norm: 0.9999990730436196, iteration: 74118
loss: 1.0485384464263916,grad_norm: 0.9999992433306765, iteration: 74119
loss: 1.01552414894104,grad_norm: 0.9999990544226864, iteration: 74120
loss: 0.9874910116195679,grad_norm: 0.9272382907857672, iteration: 74121
loss: 1.016375184059143,grad_norm: 0.9999989987442712, iteration: 74122
loss: 1.0007433891296387,grad_norm: 0.9725755828319887, iteration: 74123
loss: 1.0035581588745117,grad_norm: 0.9972140588136796, iteration: 74124
loss: 0.9987318515777588,grad_norm: 0.9999990358524904, iteration: 74125
loss: 1.0245928764343262,grad_norm: 0.8102091200170052, iteration: 74126
loss: 1.0502771139144897,grad_norm: 0.9999993586390072, iteration: 74127
loss: 1.0109554529190063,grad_norm: 0.819679930139562, iteration: 74128
loss: 0.9986562728881836,grad_norm: 0.9999990297170381, iteration: 74129
loss: 1.0377286672592163,grad_norm: 0.9732162254160903, iteration: 74130
loss: 1.0759881734848022,grad_norm: 0.9959029980035539, iteration: 74131
loss: 0.9696623682975769,grad_norm: 0.9999994008082945, iteration: 74132
loss: 1.037290334701538,grad_norm: 0.9999990091540658, iteration: 74133
loss: 1.017744541168213,grad_norm: 0.9999990336473185, iteration: 74134
loss: 1.0234007835388184,grad_norm: 0.9398591053175288, iteration: 74135
loss: 1.0313684940338135,grad_norm: 0.8163476776148502, iteration: 74136
loss: 0.9979361295700073,grad_norm: 0.9531450252888882, iteration: 74137
loss: 1.0796912908554077,grad_norm: 0.9999993363342019, iteration: 74138
loss: 0.9570634365081787,grad_norm: 0.9498699680080539, iteration: 74139
loss: 1.0401443243026733,grad_norm: 0.8629077256002083, iteration: 74140
loss: 1.0102037191390991,grad_norm: 0.8755823632452562, iteration: 74141
loss: 1.068066120147705,grad_norm: 0.9999992422400712, iteration: 74142
loss: 1.0684788227081299,grad_norm: 0.9999990816508766, iteration: 74143
loss: 1.0087682008743286,grad_norm: 0.8621592095533652, iteration: 74144
loss: 1.0570627450942993,grad_norm: 0.9999996194513573, iteration: 74145
loss: 1.0298974514007568,grad_norm: 0.9999992453128193, iteration: 74146
loss: 1.0718778371810913,grad_norm: 0.9999997965498721, iteration: 74147
loss: 1.08224356174469,grad_norm: 0.9999999365574714, iteration: 74148
loss: 1.1103322505950928,grad_norm: 0.9999993447580743, iteration: 74149
loss: 1.2095215320587158,grad_norm: 0.9999994247051074, iteration: 74150
loss: 1.0024924278259277,grad_norm: 0.8358569073229335, iteration: 74151
loss: 1.003428339958191,grad_norm: 0.9694694935970448, iteration: 74152
loss: 0.9935905337333679,grad_norm: 0.9999991372762151, iteration: 74153
loss: 1.0526260137557983,grad_norm: 0.9178318065850893, iteration: 74154
loss: 1.0025608539581299,grad_norm: 0.9745283967972723, iteration: 74155
loss: 0.9954773187637329,grad_norm: 0.9999989237085501, iteration: 74156
loss: 1.06672203540802,grad_norm: 0.978389247415675, iteration: 74157
loss: 1.0157248973846436,grad_norm: 0.7601272270001751, iteration: 74158
loss: 1.0068731307983398,grad_norm: 0.9096579958000669, iteration: 74159
loss: 1.037358283996582,grad_norm: 0.9999990333549785, iteration: 74160
loss: 1.046169400215149,grad_norm: 0.9999991263651185, iteration: 74161
loss: 1.0737918615341187,grad_norm: 0.9999991477250586, iteration: 74162
loss: 1.0476688146591187,grad_norm: 0.9999996943420905, iteration: 74163
loss: 1.0499320030212402,grad_norm: 0.999999669253609, iteration: 74164
loss: 1.0371477603912354,grad_norm: 0.9999991328559474, iteration: 74165
loss: 1.0430904626846313,grad_norm: 0.9999991818969909, iteration: 74166
loss: 1.056801199913025,grad_norm: 0.9800190733047242, iteration: 74167
loss: 1.0365065336227417,grad_norm: 0.8102689731347882, iteration: 74168
loss: 1.061018943786621,grad_norm: 0.9597515081879979, iteration: 74169
loss: 1.033452033996582,grad_norm: 0.9719619842707904, iteration: 74170
loss: 1.0619703531265259,grad_norm: 0.9677539457757477, iteration: 74171
loss: 1.0455517768859863,grad_norm: 0.8342149943781841, iteration: 74172
loss: 0.975871741771698,grad_norm: 0.8981730210723361, iteration: 74173
loss: 1.0524194240570068,grad_norm: 0.999999100603387, iteration: 74174
loss: 1.050580620765686,grad_norm: 0.9999999810894031, iteration: 74175
loss: 1.0197865962982178,grad_norm: 0.9360410599375659, iteration: 74176
loss: 0.9966707825660706,grad_norm: 0.9999990689475852, iteration: 74177
loss: 1.031348466873169,grad_norm: 0.8042290413257234, iteration: 74178
loss: 1.0075454711914062,grad_norm: 0.9439275686419828, iteration: 74179
loss: 1.0124679803848267,grad_norm: 0.999999465463842, iteration: 74180
loss: 0.9727287888526917,grad_norm: 0.9689643013068229, iteration: 74181
loss: 1.0945079326629639,grad_norm: 0.999999904233289, iteration: 74182
loss: 0.9911363124847412,grad_norm: 0.9999988563761962, iteration: 74183
loss: 1.115304708480835,grad_norm: 0.9999992704745252, iteration: 74184
loss: 1.0923283100128174,grad_norm: 0.9433538038556345, iteration: 74185
loss: 1.0515390634536743,grad_norm: 0.9999998349817166, iteration: 74186
loss: 1.1574474573135376,grad_norm: 0.9999999003510445, iteration: 74187
loss: 1.0810856819152832,grad_norm: 0.9999997119010524, iteration: 74188
loss: 1.0272538661956787,grad_norm: 0.9999993251598239, iteration: 74189
loss: 1.0727026462554932,grad_norm: 0.999999744264712, iteration: 74190
loss: 1.0661683082580566,grad_norm: 0.9999991742637775, iteration: 74191
loss: 1.069728136062622,grad_norm: 0.92419740597487, iteration: 74192
loss: 1.051909327507019,grad_norm: 0.9327546147275019, iteration: 74193
loss: 1.194658875465393,grad_norm: 0.9999994485511452, iteration: 74194
loss: 1.0480124950408936,grad_norm: 0.999999128913428, iteration: 74195
loss: 0.9979848861694336,grad_norm: 0.9432038192184653, iteration: 74196
loss: 1.1472415924072266,grad_norm: 0.9999991253701724, iteration: 74197
loss: 1.0488851070404053,grad_norm: 0.8337987150636492, iteration: 74198
loss: 1.0156803131103516,grad_norm: 0.9999994290409514, iteration: 74199
loss: 0.994839072227478,grad_norm: 0.9999991776980122, iteration: 74200
loss: 1.1703022718429565,grad_norm: 0.9999994533389303, iteration: 74201
loss: 1.1576435565948486,grad_norm: 0.9999997093160121, iteration: 74202
loss: 1.1038823127746582,grad_norm: 0.9999991713113402, iteration: 74203
loss: 1.0157710313796997,grad_norm: 0.999999388146201, iteration: 74204
loss: 1.0395530462265015,grad_norm: 0.9999996752024889, iteration: 74205
loss: 1.02077054977417,grad_norm: 0.9999997772749721, iteration: 74206
loss: 1.0162965059280396,grad_norm: 0.999999702018193, iteration: 74207
loss: 0.9942302107810974,grad_norm: 0.9999991782287599, iteration: 74208
loss: 1.0121654272079468,grad_norm: 0.9999993441995584, iteration: 74209
loss: 1.0484559535980225,grad_norm: 0.9999996971423873, iteration: 74210
loss: 1.0506230592727661,grad_norm: 0.9999998763071029, iteration: 74211
loss: 1.1864571571350098,grad_norm: 0.9999999306098708, iteration: 74212
loss: 1.0442982912063599,grad_norm: 0.9999996673232977, iteration: 74213
loss: 1.0268845558166504,grad_norm: 0.99999898761559, iteration: 74214
loss: 1.0151909589767456,grad_norm: 0.9999991205116174, iteration: 74215
loss: 0.9934235215187073,grad_norm: 0.943962995454582, iteration: 74216
loss: 1.0262188911437988,grad_norm: 0.9999990242637752, iteration: 74217
loss: 1.0160225629806519,grad_norm: 0.9673157756188822, iteration: 74218
loss: 1.0205727815628052,grad_norm: 0.8625130999927358, iteration: 74219
loss: 1.0737930536270142,grad_norm: 0.9694422641936388, iteration: 74220
loss: 1.067845344543457,grad_norm: 0.9999990700695738, iteration: 74221
loss: 0.9847428798675537,grad_norm: 0.9200792319348015, iteration: 74222
loss: 1.0451040267944336,grad_norm: 0.999999687340034, iteration: 74223
loss: 1.0290195941925049,grad_norm: 0.869785383235225, iteration: 74224
loss: 1.093382477760315,grad_norm: 0.9999995880484007, iteration: 74225
loss: 1.0120235681533813,grad_norm: 0.9721580813899707, iteration: 74226
loss: 1.0271341800689697,grad_norm: 0.8914800355567585, iteration: 74227
loss: 1.0083494186401367,grad_norm: 0.9999994547216711, iteration: 74228
loss: 1.0225279331207275,grad_norm: 0.9999992414258345, iteration: 74229
loss: 1.048026204109192,grad_norm: 0.9999992089068038, iteration: 74230
loss: 1.02898371219635,grad_norm: 0.9999990605967878, iteration: 74231
loss: 1.0027744770050049,grad_norm: 0.8914199884301567, iteration: 74232
loss: 1.0326472520828247,grad_norm: 0.9999990569473711, iteration: 74233
loss: 1.0382075309753418,grad_norm: 0.9999997750857226, iteration: 74234
loss: 1.0395179986953735,grad_norm: 0.9999994157293968, iteration: 74235
loss: 1.0261080265045166,grad_norm: 0.9999991278139473, iteration: 74236
loss: 1.0280097723007202,grad_norm: 0.9393073711952241, iteration: 74237
loss: 1.0850880146026611,grad_norm: 0.9999991632527457, iteration: 74238
loss: 1.0767465829849243,grad_norm: 0.9999992295350625, iteration: 74239
loss: 1.0598530769348145,grad_norm: 0.9329765430568835, iteration: 74240
loss: 0.9941004514694214,grad_norm: 0.9999992024440953, iteration: 74241
loss: 1.0313050746917725,grad_norm: 0.8810021603750993, iteration: 74242
loss: 0.994781494140625,grad_norm: 0.9838983731917244, iteration: 74243
loss: 0.976692259311676,grad_norm: 0.9117217732599885, iteration: 74244
loss: 1.1208921670913696,grad_norm: 0.9999996210356823, iteration: 74245
loss: 0.9832764267921448,grad_norm: 0.9001285493700983, iteration: 74246
loss: 1.0499920845031738,grad_norm: 0.9999993039600179, iteration: 74247
loss: 1.1027543544769287,grad_norm: 0.9999993864187169, iteration: 74248
loss: 0.9937849044799805,grad_norm: 0.999999011957297, iteration: 74249
loss: 1.024245023727417,grad_norm: 0.9999991838073032, iteration: 74250
loss: 1.0390926599502563,grad_norm: 0.9228344689679654, iteration: 74251
loss: 1.0575361251831055,grad_norm: 1.0000000526960837, iteration: 74252
loss: 1.0070348978042603,grad_norm: 0.9111345112318929, iteration: 74253
loss: 1.0982719659805298,grad_norm: 0.9999991474158424, iteration: 74254
loss: 1.0377638339996338,grad_norm: 0.9635914910865353, iteration: 74255
loss: 1.0185545682907104,grad_norm: 0.9999992632673678, iteration: 74256
loss: 1.0039362907409668,grad_norm: 0.9999991482632478, iteration: 74257
loss: 0.964625358581543,grad_norm: 0.9999992988940771, iteration: 74258
loss: 1.004595398902893,grad_norm: 0.8234699207917608, iteration: 74259
loss: 1.0434765815734863,grad_norm: 0.9999993385059371, iteration: 74260
loss: 1.0040018558502197,grad_norm: 0.9999990053385241, iteration: 74261
loss: 0.9971726536750793,grad_norm: 0.9999990829602756, iteration: 74262
loss: 1.088782548904419,grad_norm: 0.9999996817396852, iteration: 74263
loss: 0.9860438108444214,grad_norm: 0.8733966032068561, iteration: 74264
loss: 0.9938263893127441,grad_norm: 0.8482845414884639, iteration: 74265
loss: 1.007391333580017,grad_norm: 0.9506274991249546, iteration: 74266
loss: 0.9912638068199158,grad_norm: 0.9023151196719056, iteration: 74267
loss: 1.008261799812317,grad_norm: 0.9532314315165895, iteration: 74268
loss: 1.0158771276474,grad_norm: 0.9999989634143839, iteration: 74269
loss: 1.0057169198989868,grad_norm: 0.8209431601005222, iteration: 74270
loss: 0.9834352135658264,grad_norm: 0.8413599285841141, iteration: 74271
loss: 0.9862285852432251,grad_norm: 0.9999990458481411, iteration: 74272
loss: 0.9977702498435974,grad_norm: 0.9999992550031602, iteration: 74273
loss: 0.9984009265899658,grad_norm: 0.9167527481234569, iteration: 74274
loss: 1.0049604177474976,grad_norm: 0.9999990132628778, iteration: 74275
loss: 0.9743263125419617,grad_norm: 0.7837905115044007, iteration: 74276
loss: 1.0196337699890137,grad_norm: 0.9999990252673889, iteration: 74277
loss: 1.0031083822250366,grad_norm: 0.9328619870879775, iteration: 74278
loss: 1.020918846130371,grad_norm: 0.9999996393922749, iteration: 74279
loss: 1.044538974761963,grad_norm: 0.797607013820429, iteration: 74280
loss: 1.013344168663025,grad_norm: 0.8975735627720072, iteration: 74281
loss: 1.1092129945755005,grad_norm: 0.971297672412392, iteration: 74282
loss: 0.9861086010932922,grad_norm: 0.9999992343512019, iteration: 74283
loss: 1.0132771730422974,grad_norm: 0.9295080654337481, iteration: 74284
loss: 1.0074976682662964,grad_norm: 0.9999992126954331, iteration: 74285
loss: 1.0050166845321655,grad_norm: 0.9999993085788296, iteration: 74286
loss: 1.0127358436584473,grad_norm: 0.9619405303078902, iteration: 74287
loss: 1.0269863605499268,grad_norm: 0.9999994611943679, iteration: 74288
loss: 1.0893731117248535,grad_norm: 0.999999963394323, iteration: 74289
loss: 1.0126967430114746,grad_norm: 0.82112020169299, iteration: 74290
loss: 1.0308690071105957,grad_norm: 0.8454868502386541, iteration: 74291
loss: 1.062355399131775,grad_norm: 0.9999997572413211, iteration: 74292
loss: 0.9838220477104187,grad_norm: 0.8155045683559341, iteration: 74293
loss: 1.0763636827468872,grad_norm: 0.8080765311210502, iteration: 74294
loss: 0.994939386844635,grad_norm: 0.8292714946492065, iteration: 74295
loss: 1.0128047466278076,grad_norm: 0.8048495110527902, iteration: 74296
loss: 1.0153188705444336,grad_norm: 0.9999990902389154, iteration: 74297
loss: 1.1795071363449097,grad_norm: 0.9999997162908837, iteration: 74298
loss: 1.1001859903335571,grad_norm: 0.9999990890661016, iteration: 74299
loss: 1.0121221542358398,grad_norm: 0.9478415522598922, iteration: 74300
loss: 1.0082182884216309,grad_norm: 0.9999991573206096, iteration: 74301
loss: 0.9855101704597473,grad_norm: 0.9672238725342338, iteration: 74302
loss: 0.9719772338867188,grad_norm: 0.7911948449840325, iteration: 74303
loss: 0.9951532483100891,grad_norm: 0.844870669277923, iteration: 74304
loss: 0.9970366954803467,grad_norm: 0.9999993215709705, iteration: 74305
loss: 0.9917176961898804,grad_norm: 0.9999990782627897, iteration: 74306
loss: 0.9999880194664001,grad_norm: 0.9999998412822971, iteration: 74307
loss: 1.0956013202667236,grad_norm: 0.9999990508711921, iteration: 74308
loss: 0.9972639679908752,grad_norm: 0.9999992725314893, iteration: 74309
loss: 1.045671820640564,grad_norm: 0.999999654680059, iteration: 74310
loss: 1.0779825448989868,grad_norm: 0.9999992579362122, iteration: 74311
loss: 1.025127649307251,grad_norm: 0.9999999724598865, iteration: 74312
loss: 0.9897918105125427,grad_norm: 0.9999990655544788, iteration: 74313
loss: 1.0146214962005615,grad_norm: 0.9999990761223512, iteration: 74314
loss: 1.0115247964859009,grad_norm: 0.9999990692712383, iteration: 74315
loss: 0.972675085067749,grad_norm: 0.999999100188997, iteration: 74316
loss: 1.0261443853378296,grad_norm: 0.9683464309270348, iteration: 74317
loss: 0.9903536438941956,grad_norm: 0.9954068567455431, iteration: 74318
loss: 0.9765698313713074,grad_norm: 0.9999994631408639, iteration: 74319
loss: 1.0736286640167236,grad_norm: 0.9550616069101986, iteration: 74320
loss: 1.0122958421707153,grad_norm: 0.8952854105975288, iteration: 74321
loss: 1.070538878440857,grad_norm: 0.9999992073681638, iteration: 74322
loss: 1.0580607652664185,grad_norm: 0.9999992777078374, iteration: 74323
loss: 1.0200926065444946,grad_norm: 0.9806048611237258, iteration: 74324
loss: 0.9901028871536255,grad_norm: 0.8398116888265094, iteration: 74325
loss: 1.0082898139953613,grad_norm: 0.9999992238993627, iteration: 74326
loss: 0.9896492958068848,grad_norm: 0.9101266693013506, iteration: 74327
loss: 0.9826948046684265,grad_norm: 0.9373317826471264, iteration: 74328
loss: 1.0335768461227417,grad_norm: 0.9475535655878415, iteration: 74329
loss: 0.9898892045021057,grad_norm: 0.9032300896390374, iteration: 74330
loss: 1.029197096824646,grad_norm: 0.8732192684598385, iteration: 74331
loss: 1.0063024759292603,grad_norm: 0.8633749119529043, iteration: 74332
loss: 0.9859734177589417,grad_norm: 0.9437505978997331, iteration: 74333
loss: 1.0051637887954712,grad_norm: 0.9550975663910627, iteration: 74334
loss: 1.0596481561660767,grad_norm: 0.8860997003690763, iteration: 74335
loss: 0.9975149035453796,grad_norm: 0.9837148179852319, iteration: 74336
loss: 0.9939995408058167,grad_norm: 0.9905723382247783, iteration: 74337
loss: 0.9852364659309387,grad_norm: 0.899956693279502, iteration: 74338
loss: 1.025405764579773,grad_norm: 0.9999989203920495, iteration: 74339
loss: 0.9820066094398499,grad_norm: 0.9999992551537159, iteration: 74340
loss: 0.9984934329986572,grad_norm: 0.9999990728690802, iteration: 74341
loss: 0.995550274848938,grad_norm: 0.8958510969632981, iteration: 74342
loss: 1.0042004585266113,grad_norm: 0.9192112998272209, iteration: 74343
loss: 0.974646806716919,grad_norm: 0.9999992666728804, iteration: 74344
loss: 1.0236936807632446,grad_norm: 0.9517772962633925, iteration: 74345
loss: 1.0201679468154907,grad_norm: 0.8682947343878648, iteration: 74346
loss: 1.0181926488876343,grad_norm: 0.9999991900905156, iteration: 74347
loss: 0.9682531356811523,grad_norm: 0.9165418274988457, iteration: 74348
loss: 0.9963585138320923,grad_norm: 0.802578422230982, iteration: 74349
loss: 1.1175711154937744,grad_norm: 0.9999998891832381, iteration: 74350
loss: 1.013529896736145,grad_norm: 0.920376235292193, iteration: 74351
loss: 0.9817753434181213,grad_norm: 0.916090022085729, iteration: 74352
loss: 1.0130796432495117,grad_norm: 0.9093126683027276, iteration: 74353
loss: 1.023301362991333,grad_norm: 0.9999990504718694, iteration: 74354
loss: 1.0262330770492554,grad_norm: 0.9999992576299257, iteration: 74355
loss: 0.9990566968917847,grad_norm: 0.9669085538357268, iteration: 74356
loss: 1.014882206916809,grad_norm: 0.8574065442524226, iteration: 74357
loss: 0.998114824295044,grad_norm: 0.8267485713901603, iteration: 74358
loss: 1.0339293479919434,grad_norm: 0.9999990795190137, iteration: 74359
loss: 0.9649586081504822,grad_norm: 0.9999998974753957, iteration: 74360
loss: 0.9966064095497131,grad_norm: 0.9756070274501678, iteration: 74361
loss: 0.9614318013191223,grad_norm: 0.9999990572746225, iteration: 74362
loss: 0.9951008558273315,grad_norm: 0.9739365220417346, iteration: 74363
loss: 0.9686666131019592,grad_norm: 0.9768472273603487, iteration: 74364
loss: 1.0460481643676758,grad_norm: 0.9999992208838138, iteration: 74365
loss: 1.0107958316802979,grad_norm: 0.9628331349737428, iteration: 74366
loss: 1.0414118766784668,grad_norm: 0.9999999112890977, iteration: 74367
loss: 1.0164809226989746,grad_norm: 0.9578093671422102, iteration: 74368
loss: 1.021547555923462,grad_norm: 0.806502800440386, iteration: 74369
loss: 1.0340369939804077,grad_norm: 0.9475563394559621, iteration: 74370
loss: 0.9915081262588501,grad_norm: 0.8066700135760028, iteration: 74371
loss: 1.0000947713851929,grad_norm: 0.9999991094270043, iteration: 74372
loss: 1.071133017539978,grad_norm: 0.9999992337117253, iteration: 74373
loss: 0.9903982877731323,grad_norm: 0.9911684587468033, iteration: 74374
loss: 1.016305923461914,grad_norm: 0.9781471587888646, iteration: 74375
loss: 1.0133247375488281,grad_norm: 0.9837546301287928, iteration: 74376
loss: 0.9812519550323486,grad_norm: 0.8397207904011871, iteration: 74377
loss: 1.0491994619369507,grad_norm: 0.9999996404817172, iteration: 74378
loss: 1.030404806137085,grad_norm: 0.7793938734439241, iteration: 74379
loss: 1.0050920248031616,grad_norm: 0.999999155568702, iteration: 74380
loss: 0.9737770557403564,grad_norm: 0.9743999596584582, iteration: 74381
loss: 1.0351781845092773,grad_norm: 0.9999991411389733, iteration: 74382
loss: 0.9730424880981445,grad_norm: 0.989586417750535, iteration: 74383
loss: 1.0210986137390137,grad_norm: 0.9469922099609802, iteration: 74384
loss: 0.9744018316268921,grad_norm: 0.9391764316161206, iteration: 74385
loss: 1.0042623281478882,grad_norm: 0.8005771904713603, iteration: 74386
loss: 1.0072886943817139,grad_norm: 0.9999992253062653, iteration: 74387
loss: 1.125779151916504,grad_norm: 0.999999817683216, iteration: 74388
loss: 0.9938772320747375,grad_norm: 0.9850815021869231, iteration: 74389
loss: 1.035463809967041,grad_norm: 0.9294672437590275, iteration: 74390
loss: 0.9902876019477844,grad_norm: 0.9813823405049781, iteration: 74391
loss: 1.0018640756607056,grad_norm: 0.9210550610262996, iteration: 74392
loss: 1.0272324085235596,grad_norm: 0.928011262011796, iteration: 74393
loss: 1.0133877992630005,grad_norm: 0.8354519091101791, iteration: 74394
loss: 1.0218417644500732,grad_norm: 0.9266644334965486, iteration: 74395
loss: 1.0079604387283325,grad_norm: 0.8032667001356503, iteration: 74396
loss: 1.0402183532714844,grad_norm: 0.8529985285243583, iteration: 74397
loss: 1.0191407203674316,grad_norm: 0.8134209762220513, iteration: 74398
loss: 1.0034687519073486,grad_norm: 0.9197694626043837, iteration: 74399
loss: 1.1027190685272217,grad_norm: 0.9999990615850893, iteration: 74400
loss: 0.9617751836776733,grad_norm: 0.9202280165891038, iteration: 74401
loss: 1.0323618650436401,grad_norm: 0.9908168477346493, iteration: 74402
loss: 1.0696579217910767,grad_norm: 0.9999990924426864, iteration: 74403
loss: 0.9995706081390381,grad_norm: 0.9999990249326001, iteration: 74404
loss: 0.9923932552337646,grad_norm: 0.9610500075723472, iteration: 74405
loss: 0.9814479947090149,grad_norm: 0.9999991399642562, iteration: 74406
loss: 1.0372306108474731,grad_norm: 0.9472519626094581, iteration: 74407
loss: 1.0407551527023315,grad_norm: 0.9999989646897873, iteration: 74408
loss: 1.06904935836792,grad_norm: 0.9998221573671556, iteration: 74409
loss: 0.9778918623924255,grad_norm: 0.9323855359958492, iteration: 74410
loss: 0.9754253625869751,grad_norm: 0.9819234964834584, iteration: 74411
loss: 1.0002639293670654,grad_norm: 0.8190039220517994, iteration: 74412
loss: 1.0005377531051636,grad_norm: 0.9999990879044874, iteration: 74413
loss: 1.0432891845703125,grad_norm: 0.9565026614580217, iteration: 74414
loss: 1.0953972339630127,grad_norm: 1.0000000245630556, iteration: 74415
loss: 1.012026309967041,grad_norm: 0.9604025800851534, iteration: 74416
loss: 1.0266523361206055,grad_norm: 0.8948924894547955, iteration: 74417
loss: 0.9857197403907776,grad_norm: 0.999999201142161, iteration: 74418
loss: 1.1549066305160522,grad_norm: 0.999999632812472, iteration: 74419
loss: 1.0762009620666504,grad_norm: 0.9630913793171768, iteration: 74420
loss: 1.0057358741760254,grad_norm: 0.9532920308688445, iteration: 74421
loss: 1.0035024881362915,grad_norm: 0.9439589208891009, iteration: 74422
loss: 1.0439965724945068,grad_norm: 0.9999995121167479, iteration: 74423
loss: 0.9588066339492798,grad_norm: 0.9999992814559717, iteration: 74424
loss: 0.9941536784172058,grad_norm: 0.9801496731238603, iteration: 74425
loss: 1.0352463722229004,grad_norm: 0.9999991201098722, iteration: 74426
loss: 1.0421265363693237,grad_norm: 0.999999187533441, iteration: 74427
loss: 1.003265380859375,grad_norm: 0.9999991668201725, iteration: 74428
loss: 1.0624418258666992,grad_norm: 0.9999997285632335, iteration: 74429
loss: 1.0813478231430054,grad_norm: 0.9999992495464072, iteration: 74430
loss: 1.001524567604065,grad_norm: 0.99999910120398, iteration: 74431
loss: 0.9777565002441406,grad_norm: 0.9045242206008763, iteration: 74432
loss: 1.0229581594467163,grad_norm: 0.8974124009844343, iteration: 74433
loss: 0.9971203207969666,grad_norm: 0.9999990415444726, iteration: 74434
loss: 1.0319633483886719,grad_norm: 0.9436971734581716, iteration: 74435
loss: 1.019888997077942,grad_norm: 0.8367763985381883, iteration: 74436
loss: 1.0016382932662964,grad_norm: 0.9301380655354344, iteration: 74437
loss: 1.065742015838623,grad_norm: 0.9675810523335907, iteration: 74438
loss: 0.9800403118133545,grad_norm: 0.9501813372235779, iteration: 74439
loss: 1.0229159593582153,grad_norm: 0.8130761756262744, iteration: 74440
loss: 0.991822361946106,grad_norm: 0.9999990592631272, iteration: 74441
loss: 1.0722769498825073,grad_norm: 0.999999736666052, iteration: 74442
loss: 1.0023646354675293,grad_norm: 0.930784314691509, iteration: 74443
loss: 1.0129320621490479,grad_norm: 0.9999992277437123, iteration: 74444
loss: 1.0500909090042114,grad_norm: 0.9999991299629941, iteration: 74445
loss: 1.0329257249832153,grad_norm: 0.9992926345154293, iteration: 74446
loss: 0.9871540665626526,grad_norm: 0.8047004923327199, iteration: 74447
loss: 1.0718145370483398,grad_norm: 0.9999990097095883, iteration: 74448
loss: 1.005544900894165,grad_norm: 0.9118147071936679, iteration: 74449
loss: 1.0009784698486328,grad_norm: 0.9888665220933593, iteration: 74450
loss: 1.0134613513946533,grad_norm: 0.9999991841800516, iteration: 74451
loss: 1.1197803020477295,grad_norm: 1.0000000283109722, iteration: 74452
loss: 0.9859218597412109,grad_norm: 0.908842189900474, iteration: 74453
loss: 1.0056469440460205,grad_norm: 0.8812607522760745, iteration: 74454
loss: 1.019460916519165,grad_norm: 0.79229607073481, iteration: 74455
loss: 0.9740917682647705,grad_norm: 0.9028968301667596, iteration: 74456
loss: 1.0131349563598633,grad_norm: 0.9999991858136014, iteration: 74457
loss: 1.0164053440093994,grad_norm: 0.9999991112641119, iteration: 74458
loss: 1.0219444036483765,grad_norm: 0.9999992903599744, iteration: 74459
loss: 1.0281710624694824,grad_norm: 0.8782072072206293, iteration: 74460
loss: 1.0095067024230957,grad_norm: 0.9999991960111932, iteration: 74461
loss: 0.9891553521156311,grad_norm: 0.9999992364254944, iteration: 74462
loss: 0.9816285967826843,grad_norm: 0.9999991075310141, iteration: 74463
loss: 1.0060443878173828,grad_norm: 0.8165909409683414, iteration: 74464
loss: 0.9680319428443909,grad_norm: 0.9642519262456674, iteration: 74465
loss: 1.0023846626281738,grad_norm: 0.79825266610372, iteration: 74466
loss: 1.0129787921905518,grad_norm: 0.9999990511009194, iteration: 74467
loss: 0.9874505996704102,grad_norm: 0.9315377065839617, iteration: 74468
loss: 0.9916427731513977,grad_norm: 0.9000322812023761, iteration: 74469
loss: 0.9883426427841187,grad_norm: 0.9999991474685015, iteration: 74470
loss: 1.0147705078125,grad_norm: 0.9625058446741291, iteration: 74471
loss: 1.033943772315979,grad_norm: 0.8642078921165339, iteration: 74472
loss: 1.0280057191848755,grad_norm: 0.9999993231960438, iteration: 74473
loss: 0.9999178647994995,grad_norm: 0.8298625507103958, iteration: 74474
loss: 1.023494005203247,grad_norm: 0.9999992801900235, iteration: 74475
loss: 1.0353810787200928,grad_norm: 0.8674955448371388, iteration: 74476
loss: 1.0218114852905273,grad_norm: 0.902127713254353, iteration: 74477
loss: 0.947991669178009,grad_norm: 0.9637258368684692, iteration: 74478
loss: 0.9368643164634705,grad_norm: 0.9466689331949949, iteration: 74479
loss: 0.9980210661888123,grad_norm: 0.9733783245706921, iteration: 74480
loss: 1.012682557106018,grad_norm: 0.9347571859229079, iteration: 74481
loss: 0.9738386273384094,grad_norm: 0.994773940040122, iteration: 74482
loss: 1.0084189176559448,grad_norm: 0.8227094675563548, iteration: 74483
loss: 0.972430944442749,grad_norm: 0.9537383548640218, iteration: 74484
loss: 0.9649301767349243,grad_norm: 0.8845246575698218, iteration: 74485
loss: 0.9595951437950134,grad_norm: 0.9999991665636778, iteration: 74486
loss: 1.0036938190460205,grad_norm: 0.999999199261987, iteration: 74487
loss: 1.0118622779846191,grad_norm: 0.871553787978962, iteration: 74488
loss: 1.022120475769043,grad_norm: 0.9651258348507374, iteration: 74489
loss: 1.0203161239624023,grad_norm: 0.999999267201092, iteration: 74490
loss: 1.0183182954788208,grad_norm: 0.9419740931620509, iteration: 74491
loss: 1.0760618448257446,grad_norm: 0.9999997202581529, iteration: 74492
loss: 1.001020073890686,grad_norm: 0.9999992147407115, iteration: 74493
loss: 1.0220667123794556,grad_norm: 0.9999991046426261, iteration: 74494
loss: 0.9957150816917419,grad_norm: 0.9897112704012205, iteration: 74495
loss: 0.9930665493011475,grad_norm: 0.9503603613958689, iteration: 74496
loss: 1.0133174657821655,grad_norm: 0.999999046878645, iteration: 74497
loss: 0.9545604586601257,grad_norm: 0.9438552412748964, iteration: 74498
loss: 1.0287659168243408,grad_norm: 0.9936889672587796, iteration: 74499
loss: 0.9982900023460388,grad_norm: 0.9999991710240378, iteration: 74500
loss: 0.9755817651748657,grad_norm: 0.8936719932174908, iteration: 74501
loss: 1.0267635583877563,grad_norm: 0.8718336286954039, iteration: 74502
loss: 0.992121160030365,grad_norm: 0.99999913688939, iteration: 74503
loss: 0.9563637971878052,grad_norm: 0.9430597009107379, iteration: 74504
loss: 0.9733200669288635,grad_norm: 0.8812919460151328, iteration: 74505
loss: 0.9996774792671204,grad_norm: 0.9969904929270197, iteration: 74506
loss: 0.9773405194282532,grad_norm: 0.9423956128425179, iteration: 74507
loss: 1.0172078609466553,grad_norm: 0.8328667195663596, iteration: 74508
loss: 0.9667831063270569,grad_norm: 0.9999990371995048, iteration: 74509
loss: 0.9990329146385193,grad_norm: 0.8945082548893739, iteration: 74510
loss: 0.9821839928627014,grad_norm: 0.9901975937707631, iteration: 74511
loss: 0.9935712218284607,grad_norm: 0.8687073817117296, iteration: 74512
loss: 1.0152955055236816,grad_norm: 0.9999991753906493, iteration: 74513
loss: 1.0177090167999268,grad_norm: 0.9999990548648907, iteration: 74514
loss: 1.025050401687622,grad_norm: 0.8688563962045215, iteration: 74515
loss: 1.0099842548370361,grad_norm: 0.9295499954629729, iteration: 74516
loss: 0.9496520757675171,grad_norm: 0.9999989974070681, iteration: 74517
loss: 1.0648322105407715,grad_norm: 0.9514172141185139, iteration: 74518
loss: 1.0028294324874878,grad_norm: 0.999999007273526, iteration: 74519
loss: 1.0502737760543823,grad_norm: 0.9999995038606924, iteration: 74520
loss: 1.0854334831237793,grad_norm: 0.934177236488046, iteration: 74521
loss: 1.0325772762298584,grad_norm: 0.9999992410370169, iteration: 74522
loss: 0.9873142838478088,grad_norm: 0.9999999080264309, iteration: 74523
loss: 1.1035144329071045,grad_norm: 0.9999997476527814, iteration: 74524
loss: 1.013838291168213,grad_norm: 0.9596757502826054, iteration: 74525
loss: 1.0230988264083862,grad_norm: 0.8735215192573164, iteration: 74526
loss: 0.9959825873374939,grad_norm: 0.9999990236981559, iteration: 74527
loss: 1.0082226991653442,grad_norm: 0.8707339364566808, iteration: 74528
loss: 1.0582243204116821,grad_norm: 0.8773669816929823, iteration: 74529
loss: 1.0192712545394897,grad_norm: 0.84788544679215, iteration: 74530
loss: 0.9774627685546875,grad_norm: 0.7916054088255503, iteration: 74531
loss: 1.0051023960113525,grad_norm: 0.9999988993185525, iteration: 74532
loss: 0.9931508302688599,grad_norm: 0.9999992464609169, iteration: 74533
loss: 1.0147751569747925,grad_norm: 0.9420905756540112, iteration: 74534
loss: 0.9884350299835205,grad_norm: 0.9999990737320912, iteration: 74535
loss: 1.013390302658081,grad_norm: 0.8935321666339827, iteration: 74536
loss: 1.0371168851852417,grad_norm: 0.9584212772723265, iteration: 74537
loss: 0.9923168420791626,grad_norm: 0.8893370169580697, iteration: 74538
loss: 1.0083398818969727,grad_norm: 0.8427460697473942, iteration: 74539
loss: 0.9928104877471924,grad_norm: 0.9999994105897619, iteration: 74540
loss: 1.0639337301254272,grad_norm: 0.9999991270454697, iteration: 74541
loss: 1.014115333557129,grad_norm: 0.9999992821942995, iteration: 74542
loss: 1.0018621683120728,grad_norm: 0.8238114455654032, iteration: 74543
loss: 1.0156095027923584,grad_norm: 0.8750820871524545, iteration: 74544
loss: 1.0111497640609741,grad_norm: 0.9443188443423529, iteration: 74545
loss: 0.9947487115859985,grad_norm: 0.951835179051624, iteration: 74546
loss: 1.0281888246536255,grad_norm: 0.9999989938733463, iteration: 74547
loss: 0.9892122149467468,grad_norm: 0.9161049911099204, iteration: 74548
loss: 0.9802024960517883,grad_norm: 0.999999261454385, iteration: 74549
loss: 1.0047225952148438,grad_norm: 0.9999994113031516, iteration: 74550
loss: 1.015323519706726,grad_norm: 0.8652097061884243, iteration: 74551
loss: 1.0167529582977295,grad_norm: 0.8912893735061214, iteration: 74552
loss: 1.0565803050994873,grad_norm: 0.9999992925206386, iteration: 74553
loss: 1.014352798461914,grad_norm: 0.8672595229617791, iteration: 74554
loss: 1.0127818584442139,grad_norm: 0.9971876854609737, iteration: 74555
loss: 1.0457453727722168,grad_norm: 0.9773394738541288, iteration: 74556
loss: 1.0112377405166626,grad_norm: 0.9016253525415179, iteration: 74557
loss: 1.0772734880447388,grad_norm: 0.9999997566135521, iteration: 74558
loss: 0.9873697757720947,grad_norm: 0.9241433541300004, iteration: 74559
loss: 0.9997461438179016,grad_norm: 0.8395249491066556, iteration: 74560
loss: 0.9876484274864197,grad_norm: 0.896262289465677, iteration: 74561
loss: 1.0608406066894531,grad_norm: 0.9999992532859251, iteration: 74562
loss: 0.9938931465148926,grad_norm: 0.9588674408379563, iteration: 74563
loss: 1.0091278553009033,grad_norm: 0.955515103712588, iteration: 74564
loss: 0.992700457572937,grad_norm: 0.9256865204461915, iteration: 74565
loss: 0.9934082627296448,grad_norm: 0.8379519501213809, iteration: 74566
loss: 1.024802327156067,grad_norm: 0.9999991954112546, iteration: 74567
loss: 0.9973456263542175,grad_norm: 0.9999995100362571, iteration: 74568
loss: 1.0220497846603394,grad_norm: 0.9743186170217429, iteration: 74569
loss: 1.031566858291626,grad_norm: 0.8427238371688723, iteration: 74570
loss: 1.0229789018630981,grad_norm: 0.9386330369162192, iteration: 74571
loss: 1.014464259147644,grad_norm: 0.9627534730396429, iteration: 74572
loss: 1.0109729766845703,grad_norm: 0.8723671744116156, iteration: 74573
loss: 0.9854232668876648,grad_norm: 0.9733875726475114, iteration: 74574
loss: 0.9821290969848633,grad_norm: 0.9232869089060709, iteration: 74575
loss: 1.0133025646209717,grad_norm: 0.9999992385061099, iteration: 74576
loss: 1.00477135181427,grad_norm: 0.8108772839967863, iteration: 74577
loss: 0.9743310809135437,grad_norm: 0.9642491216436567, iteration: 74578
loss: 1.0091480016708374,grad_norm: 0.92757932920472, iteration: 74579
loss: 0.9821943044662476,grad_norm: 0.932784192216765, iteration: 74580
loss: 1.0275461673736572,grad_norm: 0.9999992375891525, iteration: 74581
loss: 1.0216410160064697,grad_norm: 0.9999996378821279, iteration: 74582
loss: 1.030616283416748,grad_norm: 0.9999998407832285, iteration: 74583
loss: 0.9953805804252625,grad_norm: 0.8260826787461748, iteration: 74584
loss: 0.992340087890625,grad_norm: 0.945055981369604, iteration: 74585
loss: 0.9712613821029663,grad_norm: 0.9999991041784564, iteration: 74586
loss: 1.0206592082977295,grad_norm: 0.9999996472209115, iteration: 74587
loss: 0.999671220779419,grad_norm: 0.9999992420228254, iteration: 74588
loss: 1.020493984222412,grad_norm: 0.9999991705657577, iteration: 74589
loss: 1.0168431997299194,grad_norm: 0.7260082513766698, iteration: 74590
loss: 1.0428049564361572,grad_norm: 0.8578815359717854, iteration: 74591
loss: 1.0211647748947144,grad_norm: 0.999999106969941, iteration: 74592
loss: 1.0246003866195679,grad_norm: 0.9999992697347423, iteration: 74593
loss: 1.080335021018982,grad_norm: 0.999999875959165, iteration: 74594
loss: 1.01200532913208,grad_norm: 0.8336362364404937, iteration: 74595
loss: 1.0673038959503174,grad_norm: 0.9999990828420801, iteration: 74596
loss: 0.9787784814834595,grad_norm: 0.9284596030225878, iteration: 74597
loss: 0.9970183372497559,grad_norm: 0.8611089045453812, iteration: 74598
loss: 0.9944697022438049,grad_norm: 0.9999992063792205, iteration: 74599
loss: 1.0057685375213623,grad_norm: 0.8217966922602763, iteration: 74600
loss: 0.98440021276474,grad_norm: 0.9133167474229068, iteration: 74601
loss: 1.1010558605194092,grad_norm: 0.9999998154399921, iteration: 74602
loss: 0.9887419939041138,grad_norm: 0.8360428277931882, iteration: 74603
loss: 1.016273856163025,grad_norm: 0.8236884580780656, iteration: 74604
loss: 1.0811107158660889,grad_norm: 0.9999997617303572, iteration: 74605
loss: 1.0229995250701904,grad_norm: 0.9999999056923785, iteration: 74606
loss: 1.0069457292556763,grad_norm: 0.9141421574430986, iteration: 74607
loss: 1.0110194683074951,grad_norm: 0.919329536445011, iteration: 74608
loss: 1.0284174680709839,grad_norm: 0.9185757149537828, iteration: 74609
loss: 1.02224600315094,grad_norm: 0.8288979137932138, iteration: 74610
loss: 0.9768895506858826,grad_norm: 0.9999989536058572, iteration: 74611
loss: 1.099377989768982,grad_norm: 0.9999998596216031, iteration: 74612
loss: 0.9907692670822144,grad_norm: 0.945616774117918, iteration: 74613
loss: 1.0513893365859985,grad_norm: 0.999999276537905, iteration: 74614
loss: 1.048068881034851,grad_norm: 0.8449264299203386, iteration: 74615
loss: 1.0130424499511719,grad_norm: 0.9999990514425607, iteration: 74616
loss: 1.054785132408142,grad_norm: 0.9999992521003912, iteration: 74617
loss: 1.0406161546707153,grad_norm: 0.9999989385682363, iteration: 74618
loss: 1.0472649335861206,grad_norm: 0.9999991685844377, iteration: 74619
loss: 1.2072311639785767,grad_norm: 0.9999997377774916, iteration: 74620
loss: 1.1078886985778809,grad_norm: 0.9999995318258864, iteration: 74621
loss: 1.0275580883026123,grad_norm: 0.9999999442724992, iteration: 74622
loss: 1.0396875143051147,grad_norm: 0.9999992232670959, iteration: 74623
loss: 1.000593662261963,grad_norm: 0.999999390266677, iteration: 74624
loss: 1.044883131980896,grad_norm: 0.9999995838191471, iteration: 74625
loss: 1.064775824546814,grad_norm: 0.9202913275601251, iteration: 74626
loss: 1.0003759860992432,grad_norm: 0.8888384863634895, iteration: 74627
loss: 1.0066014528274536,grad_norm: 0.9999990741338266, iteration: 74628
loss: 1.0050749778747559,grad_norm: 0.999999007490117, iteration: 74629
loss: 1.0077418088912964,grad_norm: 0.9999990432390543, iteration: 74630
loss: 1.020350694656372,grad_norm: 0.937424725941843, iteration: 74631
loss: 0.9948646426200867,grad_norm: 0.8270646913710208, iteration: 74632
loss: 1.0417271852493286,grad_norm: 0.9999999960119091, iteration: 74633
loss: 1.016688585281372,grad_norm: 0.9404504896713641, iteration: 74634
loss: 1.0337806940078735,grad_norm: 0.9999998187027318, iteration: 74635
loss: 1.0003246068954468,grad_norm: 0.9999990220096404, iteration: 74636
loss: 1.0338873863220215,grad_norm: 0.9999991811592012, iteration: 74637
loss: 0.9985450506210327,grad_norm: 0.9999991186749357, iteration: 74638
loss: 1.026104211807251,grad_norm: 0.9928591081474771, iteration: 74639
loss: 1.0041714906692505,grad_norm: 0.9999990860995912, iteration: 74640
loss: 1.0092902183532715,grad_norm: 0.9999991829264402, iteration: 74641
loss: 0.9805201888084412,grad_norm: 0.9194626236799499, iteration: 74642
loss: 1.0155738592147827,grad_norm: 0.953971822960627, iteration: 74643
loss: 1.0121514797210693,grad_norm: 0.9933611806550523, iteration: 74644
loss: 0.9841561317443848,grad_norm: 0.9999992583241138, iteration: 74645
loss: 0.9765238761901855,grad_norm: 0.9999991554606827, iteration: 74646
loss: 1.0046782493591309,grad_norm: 0.7187463577590043, iteration: 74647
loss: 0.9882609248161316,grad_norm: 0.9435901458306891, iteration: 74648
loss: 1.0280320644378662,grad_norm: 0.9386017618131627, iteration: 74649
loss: 1.0083032846450806,grad_norm: 0.7743305292008833, iteration: 74650
loss: 1.025675892829895,grad_norm: 0.961399197957351, iteration: 74651
loss: 1.0326091051101685,grad_norm: 0.9999995864526098, iteration: 74652
loss: 0.988813579082489,grad_norm: 0.9577652364766632, iteration: 74653
loss: 1.062319278717041,grad_norm: 0.8597702890105661, iteration: 74654
loss: 0.9864274859428406,grad_norm: 0.8044372371361067, iteration: 74655
loss: 1.0434499979019165,grad_norm: 0.9532076248385152, iteration: 74656
loss: 1.0080384016036987,grad_norm: 0.7862026854451436, iteration: 74657
loss: 1.0091419219970703,grad_norm: 0.9812862091787384, iteration: 74658
loss: 1.0235307216644287,grad_norm: 0.9999991859521498, iteration: 74659
loss: 1.1116467714309692,grad_norm: 0.9999998927657188, iteration: 74660
loss: 1.0332224369049072,grad_norm: 0.999999587436049, iteration: 74661
loss: 1.001216173171997,grad_norm: 0.8716415027847132, iteration: 74662
loss: 1.0067009925842285,grad_norm: 0.8802157544425034, iteration: 74663
loss: 0.9693641662597656,grad_norm: 0.9695927847228102, iteration: 74664
loss: 1.0184575319290161,grad_norm: 0.8671201237732635, iteration: 74665
loss: 0.9990428686141968,grad_norm: 0.9058574722408056, iteration: 74666
loss: 1.0114092826843262,grad_norm: 0.9599305738886772, iteration: 74667
loss: 1.0049184560775757,grad_norm: 0.8662065626400064, iteration: 74668
loss: 1.0090640783309937,grad_norm: 0.8100177371844824, iteration: 74669
loss: 0.9860460162162781,grad_norm: 0.9178144416744884, iteration: 74670
loss: 1.0208005905151367,grad_norm: 0.9825758409948144, iteration: 74671
loss: 0.9762100577354431,grad_norm: 0.9796947376013112, iteration: 74672
loss: 1.002741813659668,grad_norm: 0.9753662567201454, iteration: 74673
loss: 1.02286958694458,grad_norm: 0.9999997636350192, iteration: 74674
loss: 0.9622849225997925,grad_norm: 0.9999991963520258, iteration: 74675
loss: 0.9895505905151367,grad_norm: 0.9104898250263336, iteration: 74676
loss: 0.9844812154769897,grad_norm: 0.9999990828023304, iteration: 74677
loss: 1.0053964853286743,grad_norm: 0.8120135955342979, iteration: 74678
loss: 1.0284100770950317,grad_norm: 0.9999992776671116, iteration: 74679
loss: 0.9983037710189819,grad_norm: 0.6628275720523746, iteration: 74680
loss: 1.0012404918670654,grad_norm: 0.9999991846515454, iteration: 74681
loss: 1.0048844814300537,grad_norm: 0.7616369181925129, iteration: 74682
loss: 1.0059267282485962,grad_norm: 0.9713523446795012, iteration: 74683
loss: 0.9877009391784668,grad_norm: 0.9999990578965743, iteration: 74684
loss: 1.0409644842147827,grad_norm: 0.9999993679903249, iteration: 74685
loss: 0.9830345511436462,grad_norm: 0.7582839231230505, iteration: 74686
loss: 0.9990362524986267,grad_norm: 0.9999992181598326, iteration: 74687
loss: 1.022106409072876,grad_norm: 0.7740573905872706, iteration: 74688
loss: 1.0387704372406006,grad_norm: 0.9999992997810132, iteration: 74689
loss: 0.9897158741950989,grad_norm: 0.7939852845778059, iteration: 74690
loss: 1.010390043258667,grad_norm: 0.9999992553732016, iteration: 74691
loss: 1.027312994003296,grad_norm: 0.9333695579040896, iteration: 74692
loss: 0.9549777507781982,grad_norm: 0.916150422215418, iteration: 74693
loss: 1.020153522491455,grad_norm: 0.927587466466331, iteration: 74694
loss: 0.9616434574127197,grad_norm: 0.9187703328696486, iteration: 74695
loss: 0.9978947639465332,grad_norm: 0.9144092909391107, iteration: 74696
loss: 0.9969738721847534,grad_norm: 0.8933090389157184, iteration: 74697
loss: 0.99447101354599,grad_norm: 0.9301472052300679, iteration: 74698
loss: 1.0226473808288574,grad_norm: 0.8927629846320217, iteration: 74699
loss: 1.0080260038375854,grad_norm: 0.9999992358207755, iteration: 74700
loss: 1.0917229652404785,grad_norm: 0.9999995908780643, iteration: 74701
loss: 1.045232892036438,grad_norm: 0.9999999118849332, iteration: 74702
loss: 1.0033875703811646,grad_norm: 0.9160597959698561, iteration: 74703
loss: 0.9916930198669434,grad_norm: 0.9999992013129935, iteration: 74704
loss: 1.010999083518982,grad_norm: 0.8785264672586252, iteration: 74705
loss: 1.000299334526062,grad_norm: 0.9999991281757316, iteration: 74706
loss: 1.020485520362854,grad_norm: 0.999292360790878, iteration: 74707
loss: 0.9954074621200562,grad_norm: 0.9999991727706008, iteration: 74708
loss: 0.984757125377655,grad_norm: 0.771518140603921, iteration: 74709
loss: 0.9738700985908508,grad_norm: 0.9893887355654216, iteration: 74710
loss: 0.9471734762191772,grad_norm: 0.9368881534993619, iteration: 74711
loss: 1.013692855834961,grad_norm: 0.999999028763752, iteration: 74712
loss: 1.0729361772537231,grad_norm: 0.9999994098336715, iteration: 74713
loss: 1.0101202726364136,grad_norm: 0.9159410347278529, iteration: 74714
loss: 1.0719306468963623,grad_norm: 0.9999991491488003, iteration: 74715
loss: 1.016402006149292,grad_norm: 0.856323096246501, iteration: 74716
loss: 1.0385464429855347,grad_norm: 0.99999910734754, iteration: 74717
loss: 0.9989495277404785,grad_norm: 0.891081325119454, iteration: 74718
loss: 1.0373573303222656,grad_norm: 0.9999994982209924, iteration: 74719
loss: 0.984227180480957,grad_norm: 0.7485204691569023, iteration: 74720
loss: 1.036483883857727,grad_norm: 0.9999999920233751, iteration: 74721
loss: 0.9659422039985657,grad_norm: 0.9999991892215769, iteration: 74722
loss: 1.0198473930358887,grad_norm: 0.8400546717121434, iteration: 74723
loss: 1.006183385848999,grad_norm: 0.999999474542237, iteration: 74724
loss: 0.9882998466491699,grad_norm: 0.9999991084805114, iteration: 74725
loss: 0.9836943745613098,grad_norm: 0.9999993642662119, iteration: 74726
loss: 1.0004009008407593,grad_norm: 0.9544238131596239, iteration: 74727
loss: 1.040590763092041,grad_norm: 0.992480122181755, iteration: 74728
loss: 1.0307480096817017,grad_norm: 0.9468018249525874, iteration: 74729
loss: 0.9656686782836914,grad_norm: 0.9207237557235636, iteration: 74730
loss: 1.0135608911514282,grad_norm: 0.907146737023406, iteration: 74731
loss: 0.9585599303245544,grad_norm: 0.9451674605035176, iteration: 74732
loss: 1.0290489196777344,grad_norm: 0.999999845192086, iteration: 74733
loss: 0.9908005595207214,grad_norm: 0.9659703675752491, iteration: 74734
loss: 0.9612051844596863,grad_norm: 0.8827356465960438, iteration: 74735
loss: 1.0289433002471924,grad_norm: 0.9999995323571611, iteration: 74736
loss: 1.0023534297943115,grad_norm: 0.999999039341789, iteration: 74737
loss: 1.0093598365783691,grad_norm: 0.9999992296959687, iteration: 74738
loss: 1.046546459197998,grad_norm: 0.9533898332734304, iteration: 74739
loss: 0.9870212078094482,grad_norm: 0.9061432099628408, iteration: 74740
loss: 1.0053739547729492,grad_norm: 0.9720851969634854, iteration: 74741
loss: 1.0130292177200317,grad_norm: 0.8446031024228579, iteration: 74742
loss: 1.0220024585723877,grad_norm: 0.9999992151939036, iteration: 74743
loss: 0.9713645577430725,grad_norm: 0.999999074853246, iteration: 74744
loss: 1.0308588743209839,grad_norm: 0.9999991962099598, iteration: 74745
loss: 1.0056312084197998,grad_norm: 0.8297745881774478, iteration: 74746
loss: 0.977811336517334,grad_norm: 0.9999991560218743, iteration: 74747
loss: 1.0170085430145264,grad_norm: 0.9668524680861672, iteration: 74748
loss: 1.0094045400619507,grad_norm: 0.8704873040920796, iteration: 74749
loss: 1.014807105064392,grad_norm: 0.8700029041782225, iteration: 74750
loss: 1.0321637392044067,grad_norm: 0.9682869559372858, iteration: 74751
loss: 1.0340083837509155,grad_norm: 0.804687460751406, iteration: 74752
loss: 1.0452739000320435,grad_norm: 0.9999991231073215, iteration: 74753
loss: 1.0258386135101318,grad_norm: 0.9999998802493856, iteration: 74754
loss: 0.9946886301040649,grad_norm: 0.9999990128767794, iteration: 74755
loss: 0.9879096746444702,grad_norm: 0.9999990496671772, iteration: 74756
loss: 0.9823492765426636,grad_norm: 0.9608454182282491, iteration: 74757
loss: 1.006000280380249,grad_norm: 0.999999672539927, iteration: 74758
loss: 0.9561144113540649,grad_norm: 0.9642228266995263, iteration: 74759
loss: 0.9683061838150024,grad_norm: 0.9999990652901395, iteration: 74760
loss: 1.016939401626587,grad_norm: 0.8961122837043101, iteration: 74761
loss: 1.035651445388794,grad_norm: 0.9999989723031408, iteration: 74762
loss: 0.9469559788703918,grad_norm: 0.8838979571624941, iteration: 74763
loss: 1.014180302619934,grad_norm: 0.7966125932046859, iteration: 74764
loss: 1.018515944480896,grad_norm: 0.9354967188784598, iteration: 74765
loss: 1.0713701248168945,grad_norm: 0.9999994072379519, iteration: 74766
loss: 0.9672343134880066,grad_norm: 0.9494727733949964, iteration: 74767
loss: 1.0585212707519531,grad_norm: 0.9091879808716915, iteration: 74768
loss: 1.1294050216674805,grad_norm: 0.9999993977331607, iteration: 74769
loss: 0.9963001012802124,grad_norm: 0.999999105640344, iteration: 74770
loss: 0.9914425015449524,grad_norm: 0.8934038525996083, iteration: 74771
loss: 1.0045949220657349,grad_norm: 0.976773049317881, iteration: 74772
loss: 0.9928808808326721,grad_norm: 0.9999991740884223, iteration: 74773
loss: 1.0140180587768555,grad_norm: 0.8190641344448869, iteration: 74774
loss: 0.9867557883262634,grad_norm: 0.8467219209427607, iteration: 74775
loss: 0.9861218929290771,grad_norm: 0.9287732789648138, iteration: 74776
loss: 0.9657013416290283,grad_norm: 0.9289859321046862, iteration: 74777
loss: 0.9945273399353027,grad_norm: 0.9597355900453179, iteration: 74778
loss: 1.0120130777359009,grad_norm: 0.9222339446101655, iteration: 74779
loss: 1.0535129308700562,grad_norm: 0.8414694659176899, iteration: 74780
loss: 1.006932020187378,grad_norm: 0.9583898498101345, iteration: 74781
loss: 0.9964824914932251,grad_norm: 0.9968676524410567, iteration: 74782
loss: 1.028394341468811,grad_norm: 0.9999991464448308, iteration: 74783
loss: 0.9826568365097046,grad_norm: 0.9874354979792701, iteration: 74784
loss: 1.0513490438461304,grad_norm: 0.999999613826449, iteration: 74785
loss: 1.0086758136749268,grad_norm: 0.9430217631717599, iteration: 74786
loss: 0.9822081923484802,grad_norm: 0.8428975460170754, iteration: 74787
loss: 0.9746841788291931,grad_norm: 0.9999990557920979, iteration: 74788
loss: 1.0083520412445068,grad_norm: 0.9999998666318977, iteration: 74789
loss: 0.9784911274909973,grad_norm: 0.9999991095619857, iteration: 74790
loss: 1.0131261348724365,grad_norm: 0.8913630932606484, iteration: 74791
loss: 1.0188673734664917,grad_norm: 0.8460817052725079, iteration: 74792
loss: 1.0159435272216797,grad_norm: 0.9999994904134752, iteration: 74793
loss: 1.0120381116867065,grad_norm: 0.9999990881925909, iteration: 74794
loss: 0.9779841899871826,grad_norm: 0.9999991798381054, iteration: 74795
loss: 0.9907476305961609,grad_norm: 0.9999991378696839, iteration: 74796
loss: 1.005169153213501,grad_norm: 0.9435037059088303, iteration: 74797
loss: 1.005500078201294,grad_norm: 0.9331143258209245, iteration: 74798
loss: 1.003618597984314,grad_norm: 0.8810650821165091, iteration: 74799
loss: 0.970801830291748,grad_norm: 0.9999991605482393, iteration: 74800
loss: 1.100290298461914,grad_norm: 0.9999990864678432, iteration: 74801
loss: 1.0075408220291138,grad_norm: 0.82349615825792, iteration: 74802
loss: 0.9890038371086121,grad_norm: 0.9999994078190338, iteration: 74803
loss: 1.0250720977783203,grad_norm: 0.9994786720292502, iteration: 74804
loss: 1.003317952156067,grad_norm: 0.9788735162526883, iteration: 74805
loss: 0.9909148216247559,grad_norm: 0.9999991951916593, iteration: 74806
loss: 1.0603126287460327,grad_norm: 0.9999990840531388, iteration: 74807
loss: 1.0060482025146484,grad_norm: 0.9498759173586461, iteration: 74808
loss: 0.9813958406448364,grad_norm: 0.9999992516187971, iteration: 74809
loss: 0.9722316265106201,grad_norm: 0.9999990140839411, iteration: 74810
loss: 0.9967045783996582,grad_norm: 0.9999990568293263, iteration: 74811
loss: 1.058861494064331,grad_norm: 0.9999993433850002, iteration: 74812
loss: 0.9822841286659241,grad_norm: 0.9999991001425589, iteration: 74813
loss: 1.0011895895004272,grad_norm: 0.9999991850239422, iteration: 74814
loss: 0.9646782875061035,grad_norm: 0.7781717824046775, iteration: 74815
loss: 1.0350245237350464,grad_norm: 0.9999993253550687, iteration: 74816
loss: 1.0231125354766846,grad_norm: 0.9951501116473265, iteration: 74817
loss: 0.9645265340805054,grad_norm: 0.858836502112048, iteration: 74818
loss: 1.0344988107681274,grad_norm: 0.8046243140857745, iteration: 74819
loss: 1.0127347707748413,grad_norm: 0.9999993355108631, iteration: 74820
loss: 1.0151326656341553,grad_norm: 0.8959922105357362, iteration: 74821
loss: 1.0219391584396362,grad_norm: 0.9999991391097859, iteration: 74822
loss: 0.980478048324585,grad_norm: 0.9999991300496068, iteration: 74823
loss: 0.9663072228431702,grad_norm: 0.9976119972367968, iteration: 74824
loss: 1.0257128477096558,grad_norm: 0.9324636712338427, iteration: 74825
loss: 1.0049412250518799,grad_norm: 0.865060433622392, iteration: 74826
loss: 0.9833269715309143,grad_norm: 0.999999092892216, iteration: 74827
loss: 0.9809514880180359,grad_norm: 0.999999006141811, iteration: 74828
loss: 1.0145642757415771,grad_norm: 0.873333478363602, iteration: 74829
loss: 1.0079892873764038,grad_norm: 0.9999991671677749, iteration: 74830
loss: 0.9905950427055359,grad_norm: 0.9999997129913354, iteration: 74831
loss: 1.0430588722229004,grad_norm: 0.9145126305806666, iteration: 74832
loss: 0.9690975546836853,grad_norm: 0.9474651115114974, iteration: 74833
loss: 0.9866357445716858,grad_norm: 0.7508492468396843, iteration: 74834
loss: 1.0055829286575317,grad_norm: 0.8377551755603159, iteration: 74835
loss: 1.0311030149459839,grad_norm: 0.8996069292270275, iteration: 74836
loss: 1.2512201070785522,grad_norm: 0.9999999617851797, iteration: 74837
loss: 1.0007543563842773,grad_norm: 0.9999991293832972, iteration: 74838
loss: 0.9707505106925964,grad_norm: 0.87116000089449, iteration: 74839
loss: 1.037590503692627,grad_norm: 0.7765540783212054, iteration: 74840
loss: 1.0178139209747314,grad_norm: 0.9999991326694769, iteration: 74841
loss: 0.9896547198295593,grad_norm: 0.9224348734519194, iteration: 74842
loss: 1.0063724517822266,grad_norm: 0.8499787330028731, iteration: 74843
loss: 1.007567286491394,grad_norm: 0.8929300307493266, iteration: 74844
loss: 1.02329421043396,grad_norm: 0.9999990243303826, iteration: 74845
loss: 1.023227572441101,grad_norm: 0.9999990383691163, iteration: 74846
loss: 0.9901673793792725,grad_norm: 0.8472342517889354, iteration: 74847
loss: 0.9911555051803589,grad_norm: 0.8996008259272469, iteration: 74848
loss: 1.013739824295044,grad_norm: 0.8892992459294525, iteration: 74849
loss: 1.010257601737976,grad_norm: 0.9999997762517523, iteration: 74850
loss: 0.9888432621955872,grad_norm: 0.9721374224681733, iteration: 74851
loss: 1.0211780071258545,grad_norm: 0.92108202192525, iteration: 74852
loss: 1.0071024894714355,grad_norm: 0.9559837773676803, iteration: 74853
loss: 1.0094783306121826,grad_norm: 0.9460778758013929, iteration: 74854
loss: 1.0147066116333008,grad_norm: 0.856045786247863, iteration: 74855
loss: 1.0270925760269165,grad_norm: 0.9720239993478587, iteration: 74856
loss: 0.9864464402198792,grad_norm: 0.8968189553669933, iteration: 74857
loss: 1.0252653360366821,grad_norm: 0.9283046651538424, iteration: 74858
loss: 1.0118640661239624,grad_norm: 0.9999989749445087, iteration: 74859
loss: 1.0180902481079102,grad_norm: 0.898389583693384, iteration: 74860
loss: 1.0200607776641846,grad_norm: 0.9330806788903167, iteration: 74861
loss: 1.0010679960250854,grad_norm: 0.9079105271224391, iteration: 74862
loss: 1.0109121799468994,grad_norm: 0.9999991845882883, iteration: 74863
loss: 1.0359649658203125,grad_norm: 0.999069774851224, iteration: 74864
loss: 1.0068844556808472,grad_norm: 0.9490624423658306, iteration: 74865
loss: 0.9770927429199219,grad_norm: 0.9999990003316589, iteration: 74866
loss: 1.0408987998962402,grad_norm: 0.9115947473999673, iteration: 74867
loss: 0.9708347916603088,grad_norm: 0.8994929041596074, iteration: 74868
loss: 1.0167616605758667,grad_norm: 0.9907657640256767, iteration: 74869
loss: 1.0075476169586182,grad_norm: 0.9999993380189294, iteration: 74870
loss: 1.00766921043396,grad_norm: 0.9446300660938939, iteration: 74871
loss: 1.0102556943893433,grad_norm: 0.9999990301692193, iteration: 74872
loss: 0.9731943011283875,grad_norm: 0.9999994702059151, iteration: 74873
loss: 1.0054259300231934,grad_norm: 0.8853312389159794, iteration: 74874
loss: 0.9918528199195862,grad_norm: 0.8426778229152887, iteration: 74875
loss: 1.0114457607269287,grad_norm: 0.8999450249088223, iteration: 74876
loss: 0.9892178773880005,grad_norm: 0.9999994695606619, iteration: 74877
loss: 0.9639982581138611,grad_norm: 0.9256333071013529, iteration: 74878
loss: 1.0099292993545532,grad_norm: 0.8192455690226561, iteration: 74879
loss: 1.0092540979385376,grad_norm: 0.8291813990770988, iteration: 74880
loss: 1.007848858833313,grad_norm: 0.9298779973647365, iteration: 74881
loss: 1.0199271440505981,grad_norm: 0.8823685865195978, iteration: 74882
loss: 0.9950746297836304,grad_norm: 0.9999990908084632, iteration: 74883
loss: 1.034318447113037,grad_norm: 0.9999993923761207, iteration: 74884
loss: 1.0166521072387695,grad_norm: 0.8950497843350811, iteration: 74885
loss: 0.9968965649604797,grad_norm: 0.8583960191531497, iteration: 74886
loss: 1.0360580682754517,grad_norm: 0.9999990204992458, iteration: 74887
loss: 1.0075831413269043,grad_norm: 0.8941517028896028, iteration: 74888
loss: 0.9853449463844299,grad_norm: 0.8341058327975363, iteration: 74889
loss: 0.9885340929031372,grad_norm: 0.8528356339511742, iteration: 74890
loss: 0.9799812436103821,grad_norm: 0.9999992070294697, iteration: 74891
loss: 0.9931174516677856,grad_norm: 0.995181486819591, iteration: 74892
loss: 1.0116065740585327,grad_norm: 0.8459902714105979, iteration: 74893
loss: 1.0102263689041138,grad_norm: 0.9544243898567992, iteration: 74894
loss: 1.005411982536316,grad_norm: 0.8646490238025042, iteration: 74895
loss: 1.0169254541397095,grad_norm: 0.9999999615507571, iteration: 74896
loss: 1.0103791952133179,grad_norm: 0.9359508593257835, iteration: 74897
loss: 1.1369763612747192,grad_norm: 0.9999991580928055, iteration: 74898
loss: 0.995802640914917,grad_norm: 0.8360526022427317, iteration: 74899
loss: 1.009374737739563,grad_norm: 0.8645871540653931, iteration: 74900
loss: 0.9938160181045532,grad_norm: 0.8772882554849709, iteration: 74901
loss: 1.0083547830581665,grad_norm: 0.9298937617327351, iteration: 74902
loss: 0.9979240298271179,grad_norm: 0.9999991062083056, iteration: 74903
loss: 1.0052303075790405,grad_norm: 0.9541904050864508, iteration: 74904
loss: 0.9831179976463318,grad_norm: 0.9036991944971934, iteration: 74905
loss: 1.039442539215088,grad_norm: 0.8554594374784413, iteration: 74906
loss: 1.0014783143997192,grad_norm: 0.9754977333218264, iteration: 74907
loss: 0.979363203048706,grad_norm: 0.999999219982767, iteration: 74908
loss: 0.9819008111953735,grad_norm: 0.7859682459628031, iteration: 74909
loss: 1.0164915323257446,grad_norm: 0.8429703174778588, iteration: 74910
loss: 0.9850071668624878,grad_norm: 0.8821098633388602, iteration: 74911
loss: 1.0323172807693481,grad_norm: 0.9999996340700582, iteration: 74912
loss: 1.0234254598617554,grad_norm: 0.9999993570345742, iteration: 74913
loss: 1.004964828491211,grad_norm: 0.8262511238127581, iteration: 74914
loss: 1.007386565208435,grad_norm: 0.9526912441817275, iteration: 74915
loss: 0.9956594109535217,grad_norm: 0.9999991567894233, iteration: 74916
loss: 1.0267881155014038,grad_norm: 0.9424129754104379, iteration: 74917
loss: 1.0025168657302856,grad_norm: 0.8318047883716295, iteration: 74918
loss: 1.0099329948425293,grad_norm: 0.9401497832154853, iteration: 74919
loss: 0.9438661932945251,grad_norm: 0.9305624768133257, iteration: 74920
loss: 1.0109447240829468,grad_norm: 0.9074963861301996, iteration: 74921
loss: 0.9844183921813965,grad_norm: 0.9608832870624838, iteration: 74922
loss: 0.9905004501342773,grad_norm: 0.9916857879051649, iteration: 74923
loss: 0.98478102684021,grad_norm: 0.8615284514812074, iteration: 74924
loss: 0.9956862330436707,grad_norm: 0.9945706169027191, iteration: 74925
loss: 1.0186641216278076,grad_norm: 0.9999990912472678, iteration: 74926
loss: 0.9931187033653259,grad_norm: 0.9210840474433383, iteration: 74927
loss: 0.9984085559844971,grad_norm: 0.99999928995392, iteration: 74928
loss: 0.9871624112129211,grad_norm: 0.9999992288974123, iteration: 74929
loss: 1.0067061185836792,grad_norm: 0.8052579519961129, iteration: 74930
loss: 0.9775434136390686,grad_norm: 0.9999992816347365, iteration: 74931
loss: 0.9741927981376648,grad_norm: 0.9964196918988436, iteration: 74932
loss: 1.0148102045059204,grad_norm: 0.9999991462383576, iteration: 74933
loss: 1.0317392349243164,grad_norm: 0.8560531826270702, iteration: 74934
loss: 1.009391188621521,grad_norm: 0.8691234357601654, iteration: 74935
loss: 1.0178303718566895,grad_norm: 0.9999998764377773, iteration: 74936
loss: 1.0139870643615723,grad_norm: 0.8107085047711939, iteration: 74937
loss: 0.9689314365386963,grad_norm: 0.895489605358214, iteration: 74938
loss: 1.0097417831420898,grad_norm: 0.9999993941837758, iteration: 74939
loss: 1.0140353441238403,grad_norm: 0.9999993208297738, iteration: 74940
loss: 0.9956126809120178,grad_norm: 0.8174967747543646, iteration: 74941
loss: 0.9368293881416321,grad_norm: 0.9972812656941338, iteration: 74942
loss: 0.9735991358757019,grad_norm: 0.9109045906083719, iteration: 74943
loss: 0.9888891577720642,grad_norm: 0.9999991535502973, iteration: 74944
loss: 1.012173056602478,grad_norm: 0.7848358228231712, iteration: 74945
loss: 0.9856128692626953,grad_norm: 0.8994975987988407, iteration: 74946
loss: 0.9933942556381226,grad_norm: 0.9999990074046462, iteration: 74947
loss: 1.0018762350082397,grad_norm: 0.8632304735118455, iteration: 74948
loss: 1.010514736175537,grad_norm: 0.9785956731325571, iteration: 74949
loss: 1.0711549520492554,grad_norm: 0.9999991809345847, iteration: 74950
loss: 1.008436918258667,grad_norm: 0.8304837536297996, iteration: 74951
loss: 0.9926201701164246,grad_norm: 0.8656827991410894, iteration: 74952
loss: 0.991905689239502,grad_norm: 0.9999990927244844, iteration: 74953
loss: 1.0477769374847412,grad_norm: 0.9999995427461068, iteration: 74954
loss: 0.985660970211029,grad_norm: 0.9388738059152725, iteration: 74955
loss: 0.9858539700508118,grad_norm: 0.8895083363565018, iteration: 74956
loss: 0.989903450012207,grad_norm: 0.999999107080585, iteration: 74957
loss: 1.0522778034210205,grad_norm: 0.9999994869114185, iteration: 74958
loss: 0.9976765513420105,grad_norm: 0.9999991526366432, iteration: 74959
loss: 0.9994189143180847,grad_norm: 0.9320871706095366, iteration: 74960
loss: 0.9906526803970337,grad_norm: 0.9461809712145863, iteration: 74961
loss: 1.0061438083648682,grad_norm: 0.999999026031124, iteration: 74962
loss: 1.01603364944458,grad_norm: 0.9999990270467143, iteration: 74963
loss: 1.009879231452942,grad_norm: 0.9999992244613601, iteration: 74964
loss: 1.0207477807998657,grad_norm: 0.9409943624018849, iteration: 74965
loss: 1.0156279802322388,grad_norm: 0.8526934924904666, iteration: 74966
loss: 1.0195327997207642,grad_norm: 0.90827450483862, iteration: 74967
loss: 0.9390043616294861,grad_norm: 0.9999992261786421, iteration: 74968
loss: 0.9732857942581177,grad_norm: 0.9336270181434375, iteration: 74969
loss: 1.012511134147644,grad_norm: 0.9943663928482448, iteration: 74970
loss: 0.9951130151748657,grad_norm: 0.8218697897767878, iteration: 74971
loss: 1.0439257621765137,grad_norm: 0.9999992298318642, iteration: 74972
loss: 1.0070197582244873,grad_norm: 0.99999919141245, iteration: 74973
loss: 0.9975922107696533,grad_norm: 0.8939579315705501, iteration: 74974
loss: 1.001027226448059,grad_norm: 0.8344502533536255, iteration: 74975
loss: 1.027270793914795,grad_norm: 0.9999990711264314, iteration: 74976
loss: 0.9770084023475647,grad_norm: 0.8650765961288617, iteration: 74977
loss: 0.980866014957428,grad_norm: 0.850796102756498, iteration: 74978
loss: 1.0258510112762451,grad_norm: 0.8328627794582204, iteration: 74979
loss: 1.0369534492492676,grad_norm: 0.9999989871825972, iteration: 74980
loss: 1.0020630359649658,grad_norm: 0.9318527611536117, iteration: 74981
loss: 1.0034676790237427,grad_norm: 0.9816450945481406, iteration: 74982
loss: 1.0046457052230835,grad_norm: 0.841880911662184, iteration: 74983
loss: 0.9839135408401489,grad_norm: 0.9041997887916657, iteration: 74984
loss: 1.0324223041534424,grad_norm: 0.9999995077673212, iteration: 74985
loss: 0.9905221462249756,grad_norm: 0.8530517210408802, iteration: 74986
loss: 0.9958899021148682,grad_norm: 0.9999998554295317, iteration: 74987
loss: 0.9924086332321167,grad_norm: 0.9999993367267863, iteration: 74988
loss: 1.0143723487854004,grad_norm: 0.9999991816584466, iteration: 74989
loss: 1.0001085996627808,grad_norm: 0.8677757325034666, iteration: 74990
loss: 1.0671130418777466,grad_norm: 0.9294318976915815, iteration: 74991
loss: 0.9948773384094238,grad_norm: 0.9532894855605982, iteration: 74992
loss: 0.9935370087623596,grad_norm: 0.9999991437347259, iteration: 74993
loss: 0.9929801225662231,grad_norm: 0.9231835842111742, iteration: 74994
loss: 1.0310126543045044,grad_norm: 0.9999991790717264, iteration: 74995
loss: 1.000978708267212,grad_norm: 0.893325921004697, iteration: 74996
loss: 0.9952912330627441,grad_norm: 0.85182914536402, iteration: 74997
loss: 1.0539000034332275,grad_norm: 0.9525750393924961, iteration: 74998
loss: 1.0084660053253174,grad_norm: 0.9068140261250373, iteration: 74999
loss: 0.995688259601593,grad_norm: 0.9414452008348493, iteration: 75000
loss: 0.9975758194923401,grad_norm: 0.9469186125056273, iteration: 75001
loss: 1.0758856534957886,grad_norm: 0.9999995812989063, iteration: 75002
loss: 0.9632193446159363,grad_norm: 0.999999048004869, iteration: 75003
loss: 0.9917750954627991,grad_norm: 0.9292509739779345, iteration: 75004
loss: 1.0960869789123535,grad_norm: 0.9636570419230259, iteration: 75005
loss: 1.0125083923339844,grad_norm: 0.999999051842813, iteration: 75006
loss: 1.0036669969558716,grad_norm: 0.9561569633545244, iteration: 75007
loss: 1.0524338483810425,grad_norm: 0.8797438247650666, iteration: 75008
loss: 1.0235025882720947,grad_norm: 0.9615238726019313, iteration: 75009
loss: 1.0326323509216309,grad_norm: 0.9999995619969391, iteration: 75010
loss: 1.0419197082519531,grad_norm: 0.9361948916209737, iteration: 75011
loss: 1.0124924182891846,grad_norm: 0.8877109069979827, iteration: 75012
loss: 0.9997799396514893,grad_norm: 0.9999999666088705, iteration: 75013
loss: 0.9905498027801514,grad_norm: 0.9999995937178068, iteration: 75014
loss: 0.9709370732307434,grad_norm: 0.9999989325770409, iteration: 75015
loss: 1.028217077255249,grad_norm: 0.9999995223931429, iteration: 75016
loss: 1.00373375415802,grad_norm: 0.9999989840413721, iteration: 75017
loss: 1.014765739440918,grad_norm: 0.9999995061980919, iteration: 75018
loss: 1.0282642841339111,grad_norm: 0.9047045777717982, iteration: 75019
loss: 1.018699288368225,grad_norm: 0.9999989831175486, iteration: 75020
loss: 1.0049363374710083,grad_norm: 0.8680774441876126, iteration: 75021
loss: 0.9835425615310669,grad_norm: 0.9999991525296427, iteration: 75022
loss: 1.0280053615570068,grad_norm: 0.8398790059963364, iteration: 75023
loss: 1.0499768257141113,grad_norm: 0.9999994993870822, iteration: 75024
loss: 1.0539679527282715,grad_norm: 0.8646546669272659, iteration: 75025
loss: 1.0066663026809692,grad_norm: 0.8421239304439357, iteration: 75026
loss: 1.0546947717666626,grad_norm: 0.9999995422669886, iteration: 75027
loss: 1.0296740531921387,grad_norm: 0.999999115106481, iteration: 75028
loss: 0.9996088147163391,grad_norm: 0.997938748573436, iteration: 75029
loss: 0.960661768913269,grad_norm: 0.892231265894545, iteration: 75030
loss: 0.9845861196517944,grad_norm: 0.9999991034472393, iteration: 75031
loss: 1.000688076019287,grad_norm: 0.966620561604391, iteration: 75032
loss: 1.012304663658142,grad_norm: 0.9999990213274761, iteration: 75033
loss: 0.9966026544570923,grad_norm: 0.9773496063431103, iteration: 75034
loss: 1.0543063879013062,grad_norm: 0.9890161874824145, iteration: 75035
loss: 0.974543571472168,grad_norm: 0.8504834876706663, iteration: 75036
loss: 1.0071831941604614,grad_norm: 0.8392112886001313, iteration: 75037
loss: 1.0138381719589233,grad_norm: 0.9800045998981722, iteration: 75038
loss: 1.0126242637634277,grad_norm: 0.9999992743027711, iteration: 75039
loss: 0.9917032122612,grad_norm: 0.9872197812651052, iteration: 75040
loss: 1.0267043113708496,grad_norm: 0.9999992909435484, iteration: 75041
loss: 0.9912254214286804,grad_norm: 0.8539212313153449, iteration: 75042
loss: 0.9897906184196472,grad_norm: 0.8920339018502336, iteration: 75043
loss: 0.9937876462936401,grad_norm: 0.8052800613001271, iteration: 75044
loss: 0.9869738817214966,grad_norm: 0.8903560112657471, iteration: 75045
loss: 1.0262696743011475,grad_norm: 0.93759106365977, iteration: 75046
loss: 0.9930088520050049,grad_norm: 0.8060507734877639, iteration: 75047
loss: 0.9881348609924316,grad_norm: 0.9916400089794278, iteration: 75048
loss: 0.9939691424369812,grad_norm: 0.9999996410465024, iteration: 75049
loss: 0.9897847771644592,grad_norm: 0.9915973914829527, iteration: 75050
loss: 0.9603345394134521,grad_norm: 0.8106903306230802, iteration: 75051
loss: 1.1271486282348633,grad_norm: 0.9646557497866227, iteration: 75052
loss: 0.9685938358306885,grad_norm: 0.9999991937616542, iteration: 75053
loss: 1.0395957231521606,grad_norm: 0.9999990994628015, iteration: 75054
loss: 1.02628493309021,grad_norm: 0.9848091714864309, iteration: 75055
loss: 0.9838930368423462,grad_norm: 0.9222729427847267, iteration: 75056
loss: 0.9744992256164551,grad_norm: 0.7869478640394577, iteration: 75057
loss: 1.0337331295013428,grad_norm: 0.8881342521320358, iteration: 75058
loss: 0.9696981906890869,grad_norm: 0.9072029959097405, iteration: 75059
loss: 0.962989330291748,grad_norm: 0.8908204211842934, iteration: 75060
loss: 1.0146570205688477,grad_norm: 0.9999991101823678, iteration: 75061
loss: 1.034636378288269,grad_norm: 0.9999995182778186, iteration: 75062
loss: 1.0069019794464111,grad_norm: 0.8885122482989937, iteration: 75063
loss: 0.9912647008895874,grad_norm: 0.9213553159264154, iteration: 75064
loss: 1.0157177448272705,grad_norm: 0.974989851424504, iteration: 75065
loss: 0.9794472455978394,grad_norm: 0.8948831638680459, iteration: 75066
loss: 0.9733198285102844,grad_norm: 0.8169999378190973, iteration: 75067
loss: 1.024011492729187,grad_norm: 0.9743895322935223, iteration: 75068
loss: 0.9813204407691956,grad_norm: 0.9999993760927693, iteration: 75069
loss: 0.9622566103935242,grad_norm: 0.9999990502950202, iteration: 75070
loss: 1.0809365510940552,grad_norm: 0.9999992202558303, iteration: 75071
loss: 1.0526405572891235,grad_norm: 0.9999990601096288, iteration: 75072
loss: 0.9893990755081177,grad_norm: 0.9127450780414792, iteration: 75073
loss: 0.9992157816886902,grad_norm: 0.8233421287223439, iteration: 75074
loss: 1.003525733947754,grad_norm: 0.9999990956238575, iteration: 75075
loss: 1.0498969554901123,grad_norm: 0.9999992613336082, iteration: 75076
loss: 1.0049941539764404,grad_norm: 0.9999991682349832, iteration: 75077
loss: 0.9778919816017151,grad_norm: 0.9056080753066281, iteration: 75078
loss: 1.0086584091186523,grad_norm: 0.9417486181886008, iteration: 75079
loss: 0.9819207787513733,grad_norm: 0.9149256897712736, iteration: 75080
loss: 1.0081175565719604,grad_norm: 0.954444363156186, iteration: 75081
loss: 0.951320230960846,grad_norm: 0.8704976387835001, iteration: 75082
loss: 1.0194933414459229,grad_norm: 0.9524181668594447, iteration: 75083
loss: 0.9943382740020752,grad_norm: 0.9498135440726109, iteration: 75084
loss: 1.0331125259399414,grad_norm: 0.999999494987003, iteration: 75085
loss: 1.0023932456970215,grad_norm: 0.873183562086138, iteration: 75086
loss: 1.01430344581604,grad_norm: 0.9999993548286533, iteration: 75087
loss: 0.9717575907707214,grad_norm: 0.9190478844585299, iteration: 75088
loss: 1.009660005569458,grad_norm: 0.9448486043609869, iteration: 75089
loss: 0.9994181990623474,grad_norm: 0.981822897537218, iteration: 75090
loss: 0.9877723455429077,grad_norm: 0.9999995928615983, iteration: 75091
loss: 1.0122766494750977,grad_norm: 0.9171343656865002, iteration: 75092
loss: 1.0129942893981934,grad_norm: 0.9031262596687581, iteration: 75093
loss: 0.9769307374954224,grad_norm: 0.843557653673236, iteration: 75094
loss: 0.9961862564086914,grad_norm: 0.9999991063541983, iteration: 75095
loss: 1.0145734548568726,grad_norm: 0.9999992054959213, iteration: 75096
loss: 0.9946355223655701,grad_norm: 0.9833433179512893, iteration: 75097
loss: 0.997488796710968,grad_norm: 0.887160811325937, iteration: 75098
loss: 1.0808113813400269,grad_norm: 0.9482936434854609, iteration: 75099
loss: 1.0371737480163574,grad_norm: 0.9999992871152636, iteration: 75100
loss: 1.0065276622772217,grad_norm: 0.9999992893733771, iteration: 75101
loss: 0.9665021300315857,grad_norm: 0.9569968420311543, iteration: 75102
loss: 1.0361895561218262,grad_norm: 0.972758130598043, iteration: 75103
loss: 1.0059971809387207,grad_norm: 0.9999989891693111, iteration: 75104
loss: 0.9713233709335327,grad_norm: 0.9999990997314433, iteration: 75105
loss: 1.0422625541687012,grad_norm: 0.8550950795696743, iteration: 75106
loss: 0.9534898400306702,grad_norm: 0.9648392511060866, iteration: 75107
loss: 1.0053578615188599,grad_norm: 0.9999995226191511, iteration: 75108
loss: 1.0426160097122192,grad_norm: 0.9117622191662408, iteration: 75109
loss: 0.9912965893745422,grad_norm: 0.823746892939572, iteration: 75110
loss: 1.0386831760406494,grad_norm: 0.8174489167533823, iteration: 75111
loss: 1.0153231620788574,grad_norm: 0.9276785783795446, iteration: 75112
loss: 1.034703254699707,grad_norm: 0.9999991886016028, iteration: 75113
loss: 1.011812686920166,grad_norm: 0.9999991799584204, iteration: 75114
loss: 0.9511772394180298,grad_norm: 0.9999991129663401, iteration: 75115
loss: 1.00111985206604,grad_norm: 0.9999991615840224, iteration: 75116
loss: 1.0207351446151733,grad_norm: 0.952267633349065, iteration: 75117
loss: 1.0107088088989258,grad_norm: 0.8418056844267778, iteration: 75118
loss: 0.9892651438713074,grad_norm: 0.8120327886167312, iteration: 75119
loss: 1.0300639867782593,grad_norm: 0.9224718120487644, iteration: 75120
loss: 0.9749288558959961,grad_norm: 0.9999991189377688, iteration: 75121
loss: 1.0061959028244019,grad_norm: 0.999999515613558, iteration: 75122
loss: 1.1099897623062134,grad_norm: 0.9999998181428582, iteration: 75123
loss: 0.9902806282043457,grad_norm: 0.9334230298504976, iteration: 75124
loss: 0.9845457077026367,grad_norm: 0.8925029693206555, iteration: 75125
loss: 0.9913614392280579,grad_norm: 0.965209937186782, iteration: 75126
loss: 1.029348373413086,grad_norm: 0.9817230831731278, iteration: 75127
loss: 0.9848928451538086,grad_norm: 0.999999467749587, iteration: 75128
loss: 1.0341713428497314,grad_norm: 0.9999991979786537, iteration: 75129
loss: 0.9967162609100342,grad_norm: 0.9999992089611296, iteration: 75130
loss: 1.007125973701477,grad_norm: 0.9999992585763361, iteration: 75131
loss: 0.997885525226593,grad_norm: 0.9999993128919802, iteration: 75132
loss: 0.973505973815918,grad_norm: 0.9999991390718997, iteration: 75133
loss: 0.9804027080535889,grad_norm: 0.8566592667847965, iteration: 75134
loss: 1.0036143064498901,grad_norm: 0.9999990890701389, iteration: 75135
loss: 0.9872170686721802,grad_norm: 0.9463061876418607, iteration: 75136
loss: 1.0235826969146729,grad_norm: 0.9428175128621548, iteration: 75137
loss: 1.0015207529067993,grad_norm: 0.9999992225297163, iteration: 75138
loss: 1.0261471271514893,grad_norm: 0.9999990496451602, iteration: 75139
loss: 0.9978715777397156,grad_norm: 0.9450139061539967, iteration: 75140
loss: 0.9808085560798645,grad_norm: 0.9872892905248127, iteration: 75141
loss: 1.0849344730377197,grad_norm: 0.9999995947114958, iteration: 75142
loss: 1.0021504163742065,grad_norm: 0.9999992512527917, iteration: 75143
loss: 1.0712453126907349,grad_norm: 0.9999992981113635, iteration: 75144
loss: 1.020044207572937,grad_norm: 0.9999995877370849, iteration: 75145
loss: 0.9914358258247375,grad_norm: 0.9137070117782261, iteration: 75146
loss: 0.9710022211074829,grad_norm: 0.8354090265946806, iteration: 75147
loss: 0.9583125114440918,grad_norm: 0.8861130149442606, iteration: 75148
loss: 0.9883387684822083,grad_norm: 0.9999989488552747, iteration: 75149
loss: 0.9727834463119507,grad_norm: 0.9999993513245479, iteration: 75150
loss: 1.0045056343078613,grad_norm: 0.9999991574560514, iteration: 75151
loss: 0.9847701787948608,grad_norm: 0.9701047438345907, iteration: 75152
loss: 1.000550389289856,grad_norm: 0.9999992226180007, iteration: 75153
loss: 1.0387526750564575,grad_norm: 0.9341755173373513, iteration: 75154
loss: 1.0228404998779297,grad_norm: 0.9999992380106532, iteration: 75155
loss: 1.0615285634994507,grad_norm: 0.9999991768908888, iteration: 75156
loss: 0.9625605344772339,grad_norm: 0.8360846583945556, iteration: 75157
loss: 0.9736766815185547,grad_norm: 0.9747165111392436, iteration: 75158
loss: 0.9772120714187622,grad_norm: 0.9870410093442836, iteration: 75159
loss: 1.062233805656433,grad_norm: 0.9999991573146859, iteration: 75160
loss: 1.0267914533615112,grad_norm: 0.9999993591343146, iteration: 75161
loss: 1.009242057800293,grad_norm: 0.9999993015933316, iteration: 75162
loss: 1.0123869180679321,grad_norm: 0.9999992728092112, iteration: 75163
loss: 1.0122411251068115,grad_norm: 0.9120398466952983, iteration: 75164
loss: 0.9865576028823853,grad_norm: 0.9999990107925018, iteration: 75165
loss: 1.023113489151001,grad_norm: 0.9999994764734342, iteration: 75166
loss: 0.9903709888458252,grad_norm: 0.8545837049494939, iteration: 75167
loss: 0.9974501729011536,grad_norm: 0.8782304018691894, iteration: 75168
loss: 0.9713233113288879,grad_norm: 0.9999995437197046, iteration: 75169
loss: 0.9776431322097778,grad_norm: 0.9598793559792549, iteration: 75170
loss: 1.0277096033096313,grad_norm: 0.9999999627851741, iteration: 75171
loss: 0.9753758311271667,grad_norm: 0.9999990798985493, iteration: 75172
loss: 0.9818524718284607,grad_norm: 0.8646934594936067, iteration: 75173
loss: 1.0283567905426025,grad_norm: 0.999999764748819, iteration: 75174
loss: 1.0463283061981201,grad_norm: 0.9999994485862207, iteration: 75175
loss: 1.0085902214050293,grad_norm: 0.971123172798886, iteration: 75176
loss: 0.9656065106391907,grad_norm: 0.9999991355126779, iteration: 75177
loss: 1.009559154510498,grad_norm: 0.9080485778902405, iteration: 75178
loss: 1.052141785621643,grad_norm: 0.9346263169447809, iteration: 75179
loss: 0.996639609336853,grad_norm: 0.8714631704380001, iteration: 75180
loss: 0.9708816409111023,grad_norm: 0.9999991896820966, iteration: 75181
loss: 1.016276478767395,grad_norm: 0.9999990265617807, iteration: 75182
loss: 1.027797818183899,grad_norm: 0.999999128385648, iteration: 75183
loss: 1.0448954105377197,grad_norm: 0.9746852664595459, iteration: 75184
loss: 0.997399091720581,grad_norm: 0.9999989807657611, iteration: 75185
loss: 1.007162094116211,grad_norm: 0.9999992338273346, iteration: 75186
loss: 1.0472891330718994,grad_norm: 0.9394534268254737, iteration: 75187
loss: 0.9918801188468933,grad_norm: 0.9999992120093406, iteration: 75188
loss: 0.9868487119674683,grad_norm: 0.9754095635722634, iteration: 75189
loss: 1.0993374586105347,grad_norm: 0.9999996361959506, iteration: 75190
loss: 1.0308455228805542,grad_norm: 0.9118608075233969, iteration: 75191
loss: 1.0115488767623901,grad_norm: 0.999999648761149, iteration: 75192
loss: 1.0449702739715576,grad_norm: 0.9999992063837925, iteration: 75193
loss: 1.008113145828247,grad_norm: 0.7818099875728934, iteration: 75194
loss: 1.0821765661239624,grad_norm: 0.9999991915248236, iteration: 75195
loss: 0.9705060124397278,grad_norm: 0.7809637092019172, iteration: 75196
loss: 1.0200188159942627,grad_norm: 0.8779499797058583, iteration: 75197
loss: 0.9850481152534485,grad_norm: 0.9789847680347197, iteration: 75198
loss: 1.0097095966339111,grad_norm: 0.8328488051696762, iteration: 75199
loss: 1.095147728919983,grad_norm: 0.9999991064131533, iteration: 75200
loss: 1.0310523509979248,grad_norm: 0.9999991748199124, iteration: 75201
loss: 1.0487016439437866,grad_norm: 0.9999990991294274, iteration: 75202
loss: 1.0154762268066406,grad_norm: 0.8242175142109472, iteration: 75203
loss: 0.9866170883178711,grad_norm: 0.9268750749415325, iteration: 75204
loss: 1.0894432067871094,grad_norm: 0.9220711804835031, iteration: 75205
loss: 0.9890305399894714,grad_norm: 0.993113295301034, iteration: 75206
loss: 0.9704587459564209,grad_norm: 0.9999999206339075, iteration: 75207
loss: 1.0391812324523926,grad_norm: 0.8964653060205271, iteration: 75208
loss: 0.9938134551048279,grad_norm: 0.9149245611254457, iteration: 75209
loss: 1.0424760580062866,grad_norm: 0.999999564313775, iteration: 75210
loss: 0.9784970879554749,grad_norm: 0.8893244105297322, iteration: 75211
loss: 1.02805495262146,grad_norm: 0.8162316729085114, iteration: 75212
loss: 1.024639368057251,grad_norm: 0.9485556464376756, iteration: 75213
loss: 0.9631865620613098,grad_norm: 0.991164218641765, iteration: 75214
loss: 0.9598366022109985,grad_norm: 0.8552518255510212, iteration: 75215
loss: 1.045166254043579,grad_norm: 0.8822047708142738, iteration: 75216
loss: 1.005993366241455,grad_norm: 0.9999990189633944, iteration: 75217
loss: 0.9924575090408325,grad_norm: 0.9101076156006016, iteration: 75218
loss: 0.9905441999435425,grad_norm: 0.9588789460011856, iteration: 75219
loss: 1.0203226804733276,grad_norm: 0.8662810940191427, iteration: 75220
loss: 0.9993131756782532,grad_norm: 0.9999995601885502, iteration: 75221
loss: 1.1464738845825195,grad_norm: 0.9999991396388509, iteration: 75222
loss: 1.0047944784164429,grad_norm: 0.9999992489931505, iteration: 75223
loss: 1.011476755142212,grad_norm: 0.9999993882170812, iteration: 75224
loss: 1.0457183122634888,grad_norm: 0.9999991495302931, iteration: 75225
loss: 1.0074292421340942,grad_norm: 0.8396222716637929, iteration: 75226
loss: 1.006553292274475,grad_norm: 0.854230509426956, iteration: 75227
loss: 1.0373222827911377,grad_norm: 0.9999992295208031, iteration: 75228
loss: 0.9984477162361145,grad_norm: 0.892724369020846, iteration: 75229
loss: 0.9780071973800659,grad_norm: 0.9945528769628812, iteration: 75230
loss: 1.0190414190292358,grad_norm: 0.9999991021851785, iteration: 75231
loss: 1.0427982807159424,grad_norm: 0.8767934236636183, iteration: 75232
loss: 1.125482439994812,grad_norm: 0.9999995781205887, iteration: 75233
loss: 1.0216138362884521,grad_norm: 0.9999998849417441, iteration: 75234
loss: 0.9936670064926147,grad_norm: 0.9999990823514617, iteration: 75235
loss: 0.9998172521591187,grad_norm: 0.9999994157763602, iteration: 75236
loss: 0.9879884123802185,grad_norm: 0.9413773097464699, iteration: 75237
loss: 1.0123555660247803,grad_norm: 0.9999991799011366, iteration: 75238
loss: 1.0356838703155518,grad_norm: 0.9999996237998748, iteration: 75239
loss: 1.022125482559204,grad_norm: 0.9999992819304958, iteration: 75240
loss: 1.0121753215789795,grad_norm: 0.99999944390825, iteration: 75241
loss: 1.0119596719741821,grad_norm: 0.9723306175464642, iteration: 75242
loss: 0.9819126725196838,grad_norm: 0.9999992319011196, iteration: 75243
loss: 0.9817474484443665,grad_norm: 0.8984908399845798, iteration: 75244
loss: 1.000294804573059,grad_norm: 0.9999990859340604, iteration: 75245
loss: 1.034251093864441,grad_norm: 0.9340236884457674, iteration: 75246
loss: 0.9630833864212036,grad_norm: 0.999998978368776, iteration: 75247
loss: 1.017953872680664,grad_norm: 0.9025283438827754, iteration: 75248
loss: 1.0059090852737427,grad_norm: 0.9662630216184461, iteration: 75249
loss: 0.9892157912254333,grad_norm: 0.9999990184789742, iteration: 75250
loss: 1.0207877159118652,grad_norm: 0.8307407427738965, iteration: 75251
loss: 1.072662115097046,grad_norm: 0.999999657988232, iteration: 75252
loss: 1.0609431266784668,grad_norm: 0.9999993126017339, iteration: 75253
loss: 0.9864656329154968,grad_norm: 0.9999990974341765, iteration: 75254
loss: 1.020923376083374,grad_norm: 0.9999991969662131, iteration: 75255
loss: 0.9897615909576416,grad_norm: 0.8371550376320273, iteration: 75256
loss: 1.0213435888290405,grad_norm: 0.9506970951392403, iteration: 75257
loss: 0.9632918238639832,grad_norm: 0.804792756726833, iteration: 75258
loss: 1.0115176439285278,grad_norm: 0.9999990941211685, iteration: 75259
loss: 1.028226375579834,grad_norm: 0.8847720574095101, iteration: 75260
loss: 0.9843065142631531,grad_norm: 0.9720968500206968, iteration: 75261
loss: 1.0057213306427002,grad_norm: 0.999999268076032, iteration: 75262
loss: 1.002600073814392,grad_norm: 0.9765744088961043, iteration: 75263
loss: 1.007973551750183,grad_norm: 0.9999992174467827, iteration: 75264
loss: 0.9817822575569153,grad_norm: 0.9206215500561267, iteration: 75265
loss: 1.0070655345916748,grad_norm: 0.9999992844871116, iteration: 75266
loss: 0.9998266696929932,grad_norm: 0.7853617666856955, iteration: 75267
loss: 0.9783203601837158,grad_norm: 0.8839334793336805, iteration: 75268
loss: 0.9928452372550964,grad_norm: 0.9999989560964162, iteration: 75269
loss: 0.9962655305862427,grad_norm: 0.8442752932928912, iteration: 75270
loss: 1.0270591974258423,grad_norm: 0.9999991973497775, iteration: 75271
loss: 1.0377733707427979,grad_norm: 0.8453532775134994, iteration: 75272
loss: 1.002706527709961,grad_norm: 0.997863513362137, iteration: 75273
loss: 0.9667210578918457,grad_norm: 0.9410475955118057, iteration: 75274
loss: 1.0225807428359985,grad_norm: 0.9455393874581319, iteration: 75275
loss: 1.0345245599746704,grad_norm: 0.8994417846699215, iteration: 75276
loss: 1.0022969245910645,grad_norm: 0.9999990202037249, iteration: 75277
loss: 1.0175315141677856,grad_norm: 0.9368987495941372, iteration: 75278
loss: 1.0430221557617188,grad_norm: 0.7396063742894062, iteration: 75279
loss: 0.9509592056274414,grad_norm: 0.8433289431316815, iteration: 75280
loss: 0.9926499128341675,grad_norm: 0.6897969229705132, iteration: 75281
loss: 1.0295698642730713,grad_norm: 0.8345623117394101, iteration: 75282
loss: 1.0916943550109863,grad_norm: 0.9999997914751907, iteration: 75283
loss: 1.0001871585845947,grad_norm: 0.999998939562634, iteration: 75284
loss: 1.0826585292816162,grad_norm: 0.9999994463190893, iteration: 75285
loss: 0.995034396648407,grad_norm: 0.999999272776423, iteration: 75286
loss: 1.0212880373001099,grad_norm: 0.9999991666208763, iteration: 75287
loss: 1.006707787513733,grad_norm: 0.952997574868206, iteration: 75288
loss: 0.9803336262702942,grad_norm: 0.8878478000851312, iteration: 75289
loss: 1.0183614492416382,grad_norm: 0.9999998771219002, iteration: 75290
loss: 0.9574489593505859,grad_norm: 0.820176461803381, iteration: 75291
loss: 0.9973874688148499,grad_norm: 0.999999421456536, iteration: 75292
loss: 1.0036367177963257,grad_norm: 0.9999993183344558, iteration: 75293
loss: 1.0462604761123657,grad_norm: 0.9999997194040197, iteration: 75294
loss: 0.9765474200248718,grad_norm: 0.9999992587141974, iteration: 75295
loss: 0.9985780715942383,grad_norm: 0.9202534782587259, iteration: 75296
loss: 1.103871464729309,grad_norm: 0.9999994314888829, iteration: 75297
loss: 0.9596912860870361,grad_norm: 0.8056605139059823, iteration: 75298
loss: 1.055759072303772,grad_norm: 0.9999990696139868, iteration: 75299
loss: 0.9729154706001282,grad_norm: 0.9957904054771897, iteration: 75300
loss: 0.9939942955970764,grad_norm: 0.9999991400447211, iteration: 75301
loss: 0.9803429841995239,grad_norm: 0.917483877938733, iteration: 75302
loss: 0.9863948822021484,grad_norm: 0.9999990906457491, iteration: 75303
loss: 0.9855843186378479,grad_norm: 0.9305402732161373, iteration: 75304
loss: 1.001642107963562,grad_norm: 0.9033163444517122, iteration: 75305
loss: 0.9825237989425659,grad_norm: 0.9999992006532612, iteration: 75306
loss: 1.0196009874343872,grad_norm: 0.9999992216985452, iteration: 75307
loss: 1.0494256019592285,grad_norm: 0.9889806357879101, iteration: 75308
loss: 1.0520809888839722,grad_norm: 0.9999992366118196, iteration: 75309
loss: 1.1078317165374756,grad_norm: 0.9828097142695407, iteration: 75310
loss: 0.985364556312561,grad_norm: 0.790095992525993, iteration: 75311
loss: 0.973914384841919,grad_norm: 0.9391861436068064, iteration: 75312
loss: 1.0235075950622559,grad_norm: 0.901578587482294, iteration: 75313
loss: 0.9747224450111389,grad_norm: 0.9999991203574844, iteration: 75314
loss: 1.0518457889556885,grad_norm: 0.9936241114061775, iteration: 75315
loss: 1.018386960029602,grad_norm: 0.9999988996475448, iteration: 75316
loss: 1.0324783325195312,grad_norm: 0.9106952525973842, iteration: 75317
loss: 0.9727708101272583,grad_norm: 0.9999991750846402, iteration: 75318
loss: 0.9909305572509766,grad_norm: 0.9674554410967395, iteration: 75319
loss: 1.0059012174606323,grad_norm: 0.9999989678174097, iteration: 75320
loss: 0.9841615557670593,grad_norm: 0.9080826655174294, iteration: 75321
loss: 1.0128672122955322,grad_norm: 0.8798961396805701, iteration: 75322
loss: 1.0173964500427246,grad_norm: 0.9999994557202815, iteration: 75323
loss: 1.0240880250930786,grad_norm: 0.9922818217847084, iteration: 75324
loss: 0.9626224637031555,grad_norm: 0.9315372508358513, iteration: 75325
loss: 0.9920250177383423,grad_norm: 0.9141272618845657, iteration: 75326
loss: 1.1837838888168335,grad_norm: 0.9999999551513534, iteration: 75327
loss: 1.0121768712997437,grad_norm: 0.9640169938706146, iteration: 75328
loss: 0.9956313967704773,grad_norm: 0.9999989701697, iteration: 75329
loss: 1.0046182870864868,grad_norm: 0.9999995778495835, iteration: 75330
loss: 1.0860344171524048,grad_norm: 0.9999999381017078, iteration: 75331
loss: 0.9912964701652527,grad_norm: 0.9999991152870481, iteration: 75332
loss: 1.0011764764785767,grad_norm: 0.8750053918102679, iteration: 75333
loss: 1.0741171836853027,grad_norm: 0.9999995062629232, iteration: 75334
loss: 1.008254885673523,grad_norm: 0.8960682239826618, iteration: 75335
loss: 1.0223709344863892,grad_norm: 0.9999995958625245, iteration: 75336
loss: 0.958218514919281,grad_norm: 0.970441097333819, iteration: 75337
loss: 1.0011518001556396,grad_norm: 0.8162750939647389, iteration: 75338
loss: 0.9999431371688843,grad_norm: 0.9760638943122402, iteration: 75339
loss: 0.9942138195037842,grad_norm: 0.9999992083884179, iteration: 75340
loss: 1.0049140453338623,grad_norm: 0.9999990270038104, iteration: 75341
loss: 1.0142704248428345,grad_norm: 0.9800904480647169, iteration: 75342
loss: 1.002369999885559,grad_norm: 0.9999989758840709, iteration: 75343
loss: 0.9806426763534546,grad_norm: 0.9259652955689494, iteration: 75344
loss: 0.9673219919204712,grad_norm: 0.9298250203689922, iteration: 75345
loss: 1.0032753944396973,grad_norm: 0.9999991321079564, iteration: 75346
loss: 0.9877015948295593,grad_norm: 0.9999992283640124, iteration: 75347
loss: 1.0102424621582031,grad_norm: 0.8109631023145599, iteration: 75348
loss: 0.975576639175415,grad_norm: 0.8379870191098291, iteration: 75349
loss: 1.0166484117507935,grad_norm: 0.9999991212195167, iteration: 75350
loss: 1.0630055665969849,grad_norm: 0.8406636932555785, iteration: 75351
loss: 0.9970284700393677,grad_norm: 0.999999754556631, iteration: 75352
loss: 0.9846093654632568,grad_norm: 0.7957391329799117, iteration: 75353
loss: 0.9811457395553589,grad_norm: 0.9742264194371993, iteration: 75354
loss: 0.9921125173568726,grad_norm: 0.8257601710916396, iteration: 75355
loss: 1.0075702667236328,grad_norm: 0.92206686375359, iteration: 75356
loss: 0.9834031462669373,grad_norm: 0.8348596266456044, iteration: 75357
loss: 1.0015802383422852,grad_norm: 0.9381430868631708, iteration: 75358
loss: 1.0169166326522827,grad_norm: 0.9999990802965487, iteration: 75359
loss: 1.1150896549224854,grad_norm: 0.9999999194867962, iteration: 75360
loss: 0.9873225688934326,grad_norm: 0.7784631906207271, iteration: 75361
loss: 1.0203558206558228,grad_norm: 0.825312894318058, iteration: 75362
loss: 1.0098661184310913,grad_norm: 0.8400949194723825, iteration: 75363
loss: 1.0354282855987549,grad_norm: 0.9999989471225297, iteration: 75364
loss: 1.0208414793014526,grad_norm: 0.8843681140021548, iteration: 75365
loss: 0.9707182049751282,grad_norm: 0.9980240575436945, iteration: 75366
loss: 1.0341501235961914,grad_norm: 0.9610738436212868, iteration: 75367
loss: 0.9902738928794861,grad_norm: 0.8326197825256311, iteration: 75368
loss: 0.9913145899772644,grad_norm: 0.9999992991773593, iteration: 75369
loss: 1.0281270742416382,grad_norm: 0.8429068466357069, iteration: 75370
loss: 0.9508797526359558,grad_norm: 0.8579113120014676, iteration: 75371
loss: 0.9553627967834473,grad_norm: 0.9881520903614474, iteration: 75372
loss: 1.034446358680725,grad_norm: 0.900787148369857, iteration: 75373
loss: 1.1066566705703735,grad_norm: 0.9999990025561156, iteration: 75374
loss: 1.0035429000854492,grad_norm: 0.9999989658278169, iteration: 75375
loss: 1.0374196767807007,grad_norm: 0.9999997300536171, iteration: 75376
loss: 1.0304590463638306,grad_norm: 0.9999989927771403, iteration: 75377
loss: 0.9987108707427979,grad_norm: 0.7708446846216673, iteration: 75378
loss: 1.0168274641036987,grad_norm: 0.8664897258441228, iteration: 75379
loss: 1.0096954107284546,grad_norm: 0.8766121765605878, iteration: 75380
loss: 1.0107159614562988,grad_norm: 0.9999990208911047, iteration: 75381
loss: 1.0158367156982422,grad_norm: 0.8669826854775928, iteration: 75382
loss: 1.0093082189559937,grad_norm: 0.9999992190218806, iteration: 75383
loss: 1.0064858198165894,grad_norm: 0.989102635692193, iteration: 75384
loss: 1.0026699304580688,grad_norm: 0.8214330632675227, iteration: 75385
loss: 0.9939612150192261,grad_norm: 0.7019259641261559, iteration: 75386
loss: 0.9882321953773499,grad_norm: 0.9999993771596549, iteration: 75387
loss: 1.0070871114730835,grad_norm: 0.8626405773724304, iteration: 75388
loss: 1.0208748579025269,grad_norm: 0.9145490408116599, iteration: 75389
loss: 1.0205354690551758,grad_norm: 0.9529176097458, iteration: 75390
loss: 1.034012794494629,grad_norm: 0.9999990365212821, iteration: 75391
loss: 1.0860295295715332,grad_norm: 0.9999993884009956, iteration: 75392
loss: 1.0223791599273682,grad_norm: 0.9999991291737566, iteration: 75393
loss: 0.9991647601127625,grad_norm: 0.9999992724406308, iteration: 75394
loss: 0.9695503115653992,grad_norm: 0.9495977890450347, iteration: 75395
loss: 1.0231304168701172,grad_norm: 0.9999991637708696, iteration: 75396
loss: 1.0158536434173584,grad_norm: 0.8265521858051843, iteration: 75397
loss: 1.0078450441360474,grad_norm: 0.9999997130951128, iteration: 75398
loss: 1.029085397720337,grad_norm: 0.9999992290767299, iteration: 75399
loss: 1.0738615989685059,grad_norm: 0.9999990934264424, iteration: 75400
loss: 0.9857438802719116,grad_norm: 0.9999998290406635, iteration: 75401
loss: 1.0045528411865234,grad_norm: 0.9999990981785991, iteration: 75402
loss: 0.9943127036094666,grad_norm: 0.9999993567588853, iteration: 75403
loss: 1.0021449327468872,grad_norm: 0.9999992209005696, iteration: 75404
loss: 1.0221713781356812,grad_norm: 0.9999997642263146, iteration: 75405
loss: 1.0035951137542725,grad_norm: 0.845413841467099, iteration: 75406
loss: 1.017181396484375,grad_norm: 0.9999991012868368, iteration: 75407
loss: 0.9948668479919434,grad_norm: 0.7870910114040663, iteration: 75408
loss: 1.0138441324234009,grad_norm: 0.8773978981278622, iteration: 75409
loss: 0.9812670946121216,grad_norm: 0.8908663127248071, iteration: 75410
loss: 0.9736307859420776,grad_norm: 0.9411200219862766, iteration: 75411
loss: 0.9760022163391113,grad_norm: 0.9999990287670495, iteration: 75412
loss: 0.9938740730285645,grad_norm: 0.8996644061617122, iteration: 75413
loss: 0.954779326915741,grad_norm: 0.7845882577071842, iteration: 75414
loss: 0.9881141185760498,grad_norm: 0.9999993572290815, iteration: 75415
loss: 0.9903243184089661,grad_norm: 0.9999992728102161, iteration: 75416
loss: 1.004601001739502,grad_norm: 0.8272355071588511, iteration: 75417
loss: 0.9853615164756775,grad_norm: 0.9999997804272472, iteration: 75418
loss: 1.070594072341919,grad_norm: 0.9999991774069386, iteration: 75419
loss: 0.9888425469398499,grad_norm: 0.9999992815533155, iteration: 75420
loss: 1.022876501083374,grad_norm: 0.9276068406614641, iteration: 75421
loss: 1.0225733518600464,grad_norm: 0.9999995574017335, iteration: 75422
loss: 1.0273523330688477,grad_norm: 0.9096337903906346, iteration: 75423
loss: 1.0551167726516724,grad_norm: 0.9999994626421681, iteration: 75424
loss: 0.9983770251274109,grad_norm: 0.9641507800689701, iteration: 75425
loss: 1.0093586444854736,grad_norm: 0.9999993546834808, iteration: 75426
loss: 1.0478825569152832,grad_norm: 0.9999998530596147, iteration: 75427
loss: 1.1623599529266357,grad_norm: 0.9999992613641702, iteration: 75428
loss: 1.0274317264556885,grad_norm: 0.8047275227290184, iteration: 75429
loss: 1.0326441526412964,grad_norm: 0.999999820998965, iteration: 75430
loss: 1.008436679840088,grad_norm: 0.9979150668273538, iteration: 75431
loss: 0.9781836271286011,grad_norm: 0.9999993732180336, iteration: 75432
loss: 0.9726115465164185,grad_norm: 0.9506251796034719, iteration: 75433
loss: 1.0484838485717773,grad_norm: 0.9999993561782111, iteration: 75434
loss: 1.0139256715774536,grad_norm: 0.9999990875012121, iteration: 75435
loss: 1.0712591409683228,grad_norm: 0.9999999121695062, iteration: 75436
loss: 1.0019547939300537,grad_norm: 0.8027717231646134, iteration: 75437
loss: 0.9815062880516052,grad_norm: 0.9999996843289182, iteration: 75438
loss: 0.9932250380516052,grad_norm: 0.9204967794857328, iteration: 75439
loss: 1.0186625719070435,grad_norm: 0.9999995570545047, iteration: 75440
loss: 0.9904327392578125,grad_norm: 0.999999106409694, iteration: 75441
loss: 0.9841955900192261,grad_norm: 0.998861631767391, iteration: 75442
loss: 1.0196354389190674,grad_norm: 0.9999994686981497, iteration: 75443
loss: 1.0302952527999878,grad_norm: 0.9999997614039738, iteration: 75444
loss: 1.011857032775879,grad_norm: 0.9999992757758522, iteration: 75445
loss: 1.057896614074707,grad_norm: 0.9999993520715516, iteration: 75446
loss: 1.0389217138290405,grad_norm: 0.9999991431038008, iteration: 75447
loss: 0.9949491620063782,grad_norm: 0.9999997974633038, iteration: 75448
loss: 1.0011231899261475,grad_norm: 0.999999167421227, iteration: 75449
loss: 0.9940814971923828,grad_norm: 0.9999994073123302, iteration: 75450
loss: 0.9732945561408997,grad_norm: 0.9999997181526545, iteration: 75451
loss: 0.9775381088256836,grad_norm: 0.9999990742878524, iteration: 75452
loss: 1.0316053628921509,grad_norm: 0.9999991512578612, iteration: 75453
loss: 1.093529462814331,grad_norm: 0.999999280639635, iteration: 75454
loss: 1.007325530052185,grad_norm: 0.9999992352707814, iteration: 75455
loss: 1.0978530645370483,grad_norm: 0.9999992127573331, iteration: 75456
loss: 1.0261961221694946,grad_norm: 0.9746854580500683, iteration: 75457
loss: 0.993952214717865,grad_norm: 0.999999149250311, iteration: 75458
loss: 1.0352884531021118,grad_norm: 0.9999993141517405, iteration: 75459
loss: 1.0146808624267578,grad_norm: 0.9999991147888448, iteration: 75460
loss: 0.975557804107666,grad_norm: 0.9999989756313008, iteration: 75461
loss: 1.0448099374771118,grad_norm: 0.9999997001643525, iteration: 75462
loss: 0.9838851690292358,grad_norm: 0.9063434932738812, iteration: 75463
loss: 1.0144213438034058,grad_norm: 0.999999372700337, iteration: 75464
loss: 1.000584602355957,grad_norm: 0.919753691205478, iteration: 75465
loss: 0.9810596108436584,grad_norm: 0.9370297928695691, iteration: 75466
loss: 1.0513502359390259,grad_norm: 0.9999994485846251, iteration: 75467
loss: 1.0908989906311035,grad_norm: 0.9999990150992943, iteration: 75468
loss: 0.9977844953536987,grad_norm: 0.9849224073763543, iteration: 75469
loss: 0.9763728380203247,grad_norm: 0.9999991175406219, iteration: 75470
loss: 0.982128918170929,grad_norm: 0.9292092398720226, iteration: 75471
loss: 1.044063687324524,grad_norm: 0.9999998443167442, iteration: 75472
loss: 1.0023515224456787,grad_norm: 0.9013505739102186, iteration: 75473
loss: 1.0747692584991455,grad_norm: 0.9999996146012978, iteration: 75474
loss: 1.1147739887237549,grad_norm: 0.9999996105720417, iteration: 75475
loss: 1.020251989364624,grad_norm: 0.934824698791962, iteration: 75476
loss: 1.0464212894439697,grad_norm: 0.9999991739070773, iteration: 75477
loss: 1.1922069787979126,grad_norm: 0.9999999064817172, iteration: 75478
loss: 1.013045310974121,grad_norm: 0.999999396190279, iteration: 75479
loss: 1.0732786655426025,grad_norm: 0.9999994368953061, iteration: 75480
loss: 1.007973313331604,grad_norm: 0.7462749562981981, iteration: 75481
loss: 0.9780929684638977,grad_norm: 0.9999992070875491, iteration: 75482
loss: 1.0662416219711304,grad_norm: 0.9999996996458779, iteration: 75483
loss: 1.0247942209243774,grad_norm: 0.9999990467918249, iteration: 75484
loss: 0.97427898645401,grad_norm: 0.9999990930202362, iteration: 75485
loss: 0.9915714263916016,grad_norm: 0.8292354647310217, iteration: 75486
loss: 1.035086989402771,grad_norm: 0.9811786616561151, iteration: 75487
loss: 1.016242504119873,grad_norm: 0.9999991310287492, iteration: 75488
loss: 0.9592561721801758,grad_norm: 0.9999991961276746, iteration: 75489
loss: 1.0317262411117554,grad_norm: 0.99999947494594, iteration: 75490
loss: 1.0553280115127563,grad_norm: 0.7913604046749912, iteration: 75491
loss: 0.9980965256690979,grad_norm: 0.9533825081202995, iteration: 75492
loss: 1.0371123552322388,grad_norm: 0.9999998864900911, iteration: 75493
loss: 0.9825342893600464,grad_norm: 0.9999996330848161, iteration: 75494
loss: 1.0172017812728882,grad_norm: 0.9999996189017558, iteration: 75495
loss: 1.141603708267212,grad_norm: 0.999999727105898, iteration: 75496
loss: 1.054260492324829,grad_norm: 0.999999674609749, iteration: 75497
loss: 1.109961986541748,grad_norm: 0.9999999076050183, iteration: 75498
loss: 1.0340149402618408,grad_norm: 0.999999330233812, iteration: 75499
loss: 1.093773603439331,grad_norm: 0.9999995860977213, iteration: 75500
loss: 0.9961826205253601,grad_norm: 0.9999989453226964, iteration: 75501
loss: 0.9872182607650757,grad_norm: 0.9274369133939155, iteration: 75502
loss: 1.0100663900375366,grad_norm: 0.9999991631234787, iteration: 75503
loss: 0.9852349162101746,grad_norm: 0.978271094626382, iteration: 75504
loss: 1.0071133375167847,grad_norm: 0.999999148170535, iteration: 75505
loss: 1.0076279640197754,grad_norm: 0.9080484145241804, iteration: 75506
loss: 1.0726549625396729,grad_norm: 0.9645632166249173, iteration: 75507
loss: 1.0818690061569214,grad_norm: 0.9999990794349192, iteration: 75508
loss: 1.0206475257873535,grad_norm: 0.9999990983099034, iteration: 75509
loss: 0.9778988361358643,grad_norm: 0.9999991835555065, iteration: 75510
loss: 0.9821727871894836,grad_norm: 0.9054032892082202, iteration: 75511
loss: 1.0077282190322876,grad_norm: 0.9045333345161937, iteration: 75512
loss: 1.0087883472442627,grad_norm: 0.9999997215593124, iteration: 75513
loss: 1.011903166770935,grad_norm: 0.9999994661645587, iteration: 75514
loss: 1.0188478231430054,grad_norm: 0.8609244224300677, iteration: 75515
loss: 0.9972198009490967,grad_norm: 0.8908439711452862, iteration: 75516
loss: 1.0667271614074707,grad_norm: 0.9999998551004694, iteration: 75517
loss: 1.0654902458190918,grad_norm: 0.999999044603219, iteration: 75518
loss: 1.0399916172027588,grad_norm: 0.9999999597720105, iteration: 75519
loss: 0.9976528882980347,grad_norm: 0.9645286899721711, iteration: 75520
loss: 1.0021764039993286,grad_norm: 0.9192306613213381, iteration: 75521
loss: 0.999961256980896,grad_norm: 0.9999990581194422, iteration: 75522
loss: 1.119435429573059,grad_norm: 0.9999993390065662, iteration: 75523
loss: 1.0549732446670532,grad_norm: 0.9999997001874092, iteration: 75524
loss: 1.064568042755127,grad_norm: 0.9999991526305669, iteration: 75525
loss: 0.9715111255645752,grad_norm: 0.9999992501178845, iteration: 75526
loss: 1.066576361656189,grad_norm: 1.0000000708050527, iteration: 75527
loss: 1.146950125694275,grad_norm: 0.9999999256505758, iteration: 75528
loss: 1.0185160636901855,grad_norm: 0.9999994011468029, iteration: 75529
loss: 1.0830025672912598,grad_norm: 0.9999991688785543, iteration: 75530
loss: 1.0289822816848755,grad_norm: 0.9375523914575153, iteration: 75531
loss: 1.0171840190887451,grad_norm: 0.9999995391697327, iteration: 75532
loss: 1.093060851097107,grad_norm: 0.999999118247883, iteration: 75533
loss: 1.0095585584640503,grad_norm: 0.8436400703584639, iteration: 75534
loss: 1.015299677848816,grad_norm: 0.9653211851548138, iteration: 75535
loss: 1.032415747642517,grad_norm: 0.8427152107539443, iteration: 75536
loss: 1.016714334487915,grad_norm: 0.999999174426508, iteration: 75537
loss: 1.0326809883117676,grad_norm: 0.9999990656995489, iteration: 75538
loss: 1.0371043682098389,grad_norm: 0.9999990355902221, iteration: 75539
loss: 1.1025031805038452,grad_norm: 1.0000000611717308, iteration: 75540
loss: 1.0253318548202515,grad_norm: 0.8218668123815819, iteration: 75541
loss: 0.9854921102523804,grad_norm: 0.9999991977387871, iteration: 75542
loss: 1.0619070529937744,grad_norm: 0.999999748986707, iteration: 75543
loss: 1.008955955505371,grad_norm: 0.9999992418612987, iteration: 75544
loss: 1.0009886026382446,grad_norm: 0.9999995451936686, iteration: 75545
loss: 1.0013668537139893,grad_norm: 0.9999997757313668, iteration: 75546
loss: 1.191809058189392,grad_norm: 0.9999994978544224, iteration: 75547
loss: 0.9798724055290222,grad_norm: 0.8832800846573231, iteration: 75548
loss: 0.995774507522583,grad_norm: 0.9999990906180107, iteration: 75549
loss: 1.028639316558838,grad_norm: 0.9999995740028467, iteration: 75550
loss: 1.002181887626648,grad_norm: 0.9999990762533817, iteration: 75551
loss: 1.0264159440994263,grad_norm: 0.9999991797975065, iteration: 75552
loss: 1.0221585035324097,grad_norm: 0.8663002025430785, iteration: 75553
loss: 1.1042912006378174,grad_norm: 0.9999992498868856, iteration: 75554
loss: 0.9895589351654053,grad_norm: 0.9999991038022038, iteration: 75555
loss: 1.0420644283294678,grad_norm: 0.9999992980303765, iteration: 75556
loss: 1.0208652019500732,grad_norm: 0.877452245990778, iteration: 75557
loss: 1.0288844108581543,grad_norm: 0.9999989282993577, iteration: 75558
loss: 0.9899675846099854,grad_norm: 0.9269783378601567, iteration: 75559
loss: 1.0198947191238403,grad_norm: 0.9999991585218406, iteration: 75560
loss: 1.1134157180786133,grad_norm: 0.9999998474483404, iteration: 75561
loss: 0.988679826259613,grad_norm: 0.8271726257758046, iteration: 75562
loss: 0.9768931865692139,grad_norm: 0.7244650732032225, iteration: 75563
loss: 1.0370787382125854,grad_norm: 0.9999993012608407, iteration: 75564
loss: 1.0196635723114014,grad_norm: 0.999999586316669, iteration: 75565
loss: 1.0166821479797363,grad_norm: 0.9098078069697293, iteration: 75566
loss: 1.005581021308899,grad_norm: 0.9541522401457426, iteration: 75567
loss: 1.0126675367355347,grad_norm: 0.9388529918129023, iteration: 75568
loss: 0.9893016815185547,grad_norm: 0.9033453930674675, iteration: 75569
loss: 1.0283763408660889,grad_norm: 0.8217396952087577, iteration: 75570
loss: 1.0054926872253418,grad_norm: 0.9999993457495141, iteration: 75571
loss: 1.0905171632766724,grad_norm: 0.9999996749497366, iteration: 75572
loss: 1.0357279777526855,grad_norm: 0.9999996485024435, iteration: 75573
loss: 1.0114446878433228,grad_norm: 0.9999994088062076, iteration: 75574
loss: 1.0190742015838623,grad_norm: 0.7962628488724557, iteration: 75575
loss: 1.030532717704773,grad_norm: 0.9104289624474705, iteration: 75576
loss: 1.0190528631210327,grad_norm: 0.999999003519757, iteration: 75577
loss: 0.9977605938911438,grad_norm: 0.9999992696804674, iteration: 75578
loss: 1.0095105171203613,grad_norm: 0.8761076241586215, iteration: 75579
loss: 1.148703694343567,grad_norm: 0.9999998271837398, iteration: 75580
loss: 1.0054125785827637,grad_norm: 0.9999989751437734, iteration: 75581
loss: 1.0063118934631348,grad_norm: 0.9999993079919453, iteration: 75582
loss: 1.034067988395691,grad_norm: 0.9999992239495248, iteration: 75583
loss: 0.9955682754516602,grad_norm: 0.6969230502929663, iteration: 75584
loss: 1.0495967864990234,grad_norm: 0.980194645135147, iteration: 75585
loss: 1.0123049020767212,grad_norm: 0.887022690651072, iteration: 75586
loss: 1.0337364673614502,grad_norm: 0.999999764970682, iteration: 75587
loss: 1.0028111934661865,grad_norm: 0.9999991830511333, iteration: 75588
loss: 0.9778329133987427,grad_norm: 0.9999998513428608, iteration: 75589
loss: 1.0757207870483398,grad_norm: 0.9999998593738759, iteration: 75590
loss: 1.056075096130371,grad_norm: 0.921758452340532, iteration: 75591
loss: 1.0900115966796875,grad_norm: 0.9999998427442996, iteration: 75592
loss: 1.1122667789459229,grad_norm: 0.9999995516269324, iteration: 75593
loss: 1.0080311298370361,grad_norm: 0.9999995274955319, iteration: 75594
loss: 1.0249258279800415,grad_norm: 0.9999992811529926, iteration: 75595
loss: 0.9801386594772339,grad_norm: 0.9999994027776219, iteration: 75596
loss: 1.018113613128662,grad_norm: 0.9999989899290762, iteration: 75597
loss: 0.9660488367080688,grad_norm: 0.9999996050277961, iteration: 75598
loss: 1.0357669591903687,grad_norm: 0.9999993136041625, iteration: 75599
loss: 0.9996814131736755,grad_norm: 0.9468098894924963, iteration: 75600
loss: 1.052846908569336,grad_norm: 0.9999993408980655, iteration: 75601
loss: 1.039189100265503,grad_norm: 0.9999995353930958, iteration: 75602
loss: 1.0182271003723145,grad_norm: 0.7852610445497852, iteration: 75603
loss: 0.9996474981307983,grad_norm: 0.9999992494409563, iteration: 75604
loss: 1.059950590133667,grad_norm: 0.9999991720405831, iteration: 75605
loss: 0.98446124792099,grad_norm: 0.8723326409037684, iteration: 75606
loss: 1.0434538125991821,grad_norm: 0.9999996347462178, iteration: 75607
loss: 1.0247688293457031,grad_norm: 0.9571589462226258, iteration: 75608
loss: 1.0027103424072266,grad_norm: 0.8570906164501594, iteration: 75609
loss: 1.0041499137878418,grad_norm: 0.9122051665101567, iteration: 75610
loss: 1.0003058910369873,grad_norm: 0.9999992455837331, iteration: 75611
loss: 1.0198822021484375,grad_norm: 0.9999991702067632, iteration: 75612
loss: 1.0160454511642456,grad_norm: 0.9322865047428093, iteration: 75613
loss: 1.0263487100601196,grad_norm: 0.999999271824136, iteration: 75614
loss: 1.0108622312545776,grad_norm: 0.9706654063718789, iteration: 75615
loss: 1.0384018421173096,grad_norm: 0.9999992950360012, iteration: 75616
loss: 1.051338791847229,grad_norm: 0.9999993260997277, iteration: 75617
loss: 0.9955626130104065,grad_norm: 0.9999992771696776, iteration: 75618
loss: 0.970495343208313,grad_norm: 0.9224199636004318, iteration: 75619
loss: 1.0142431259155273,grad_norm: 0.9999990376365057, iteration: 75620
loss: 1.0041584968566895,grad_norm: 0.9369669879793336, iteration: 75621
loss: 1.0086885690689087,grad_norm: 0.999998999421302, iteration: 75622
loss: 0.9894554018974304,grad_norm: 0.9469328535777691, iteration: 75623
loss: 0.9920678734779358,grad_norm: 0.9999991966729482, iteration: 75624
loss: 1.0341436862945557,grad_norm: 0.999999257764668, iteration: 75625
loss: 0.9744715094566345,grad_norm: 0.8989220019312625, iteration: 75626
loss: 1.1495651006698608,grad_norm: 0.9999993014877443, iteration: 75627
loss: 1.0175808668136597,grad_norm: 0.8449395486457801, iteration: 75628
loss: 1.0060815811157227,grad_norm: 0.9853443212572248, iteration: 75629
loss: 1.008751392364502,grad_norm: 0.9999991038931288, iteration: 75630
loss: 1.0300992727279663,grad_norm: 0.9999990592542735, iteration: 75631
loss: 1.028620958328247,grad_norm: 0.881474423467758, iteration: 75632
loss: 0.9832370281219482,grad_norm: 0.8762782446328149, iteration: 75633
loss: 1.0440185070037842,grad_norm: 0.9999994200702215, iteration: 75634
loss: 0.9923058152198792,grad_norm: 0.8052662003632025, iteration: 75635
loss: 1.004929780960083,grad_norm: 0.9999994482445358, iteration: 75636
loss: 0.9935919046401978,grad_norm: 0.9999991361347299, iteration: 75637
loss: 0.9921606779098511,grad_norm: 0.9091436910504036, iteration: 75638
loss: 1.0285768508911133,grad_norm: 0.9999993234963822, iteration: 75639
loss: 1.0293339490890503,grad_norm: 0.9999996675147964, iteration: 75640
loss: 1.0064798593521118,grad_norm: 0.8163320518836695, iteration: 75641
loss: 1.0054396390914917,grad_norm: 0.8790295951548429, iteration: 75642
loss: 1.0151249170303345,grad_norm: 0.8933186398149282, iteration: 75643
loss: 1.0666390657424927,grad_norm: 0.9999991060643932, iteration: 75644
loss: 0.9714836478233337,grad_norm: 0.9402116394943797, iteration: 75645
loss: 0.9656071662902832,grad_norm: 0.990695063234908, iteration: 75646
loss: 1.011906385421753,grad_norm: 0.8880124937931706, iteration: 75647
loss: 1.0089807510375977,grad_norm: 0.9999990170267862, iteration: 75648
loss: 0.9912405610084534,grad_norm: 0.9152863379579985, iteration: 75649
loss: 1.011721134185791,grad_norm: 0.9999994757898424, iteration: 75650
loss: 1.013041615486145,grad_norm: 0.9925812239849083, iteration: 75651
loss: 1.024558663368225,grad_norm: 0.9999990893349355, iteration: 75652
loss: 1.0143386125564575,grad_norm: 0.9300058690960717, iteration: 75653
loss: 0.982879638671875,grad_norm: 0.8913650995864976, iteration: 75654
loss: 0.9793522953987122,grad_norm: 0.9706621012775224, iteration: 75655
loss: 1.0414109230041504,grad_norm: 0.9999996334377542, iteration: 75656
loss: 1.0149154663085938,grad_norm: 0.8186306471834621, iteration: 75657
loss: 1.0176942348480225,grad_norm: 0.9022934133006146, iteration: 75658
loss: 0.9905503392219543,grad_norm: 0.8301433532494782, iteration: 75659
loss: 1.028743863105774,grad_norm: 0.999999778289339, iteration: 75660
loss: 1.0407350063323975,grad_norm: 0.9753651526356679, iteration: 75661
loss: 1.0917699337005615,grad_norm: 0.9999994978116641, iteration: 75662
loss: 1.0007530450820923,grad_norm: 0.9999998011313375, iteration: 75663
loss: 0.9892589449882507,grad_norm: 0.9999990527813105, iteration: 75664
loss: 0.9772257208824158,grad_norm: 0.9999991489262025, iteration: 75665
loss: 0.9800024628639221,grad_norm: 0.9999998272703876, iteration: 75666
loss: 1.0124051570892334,grad_norm: 0.9566941806504607, iteration: 75667
loss: 1.009484887123108,grad_norm: 0.9162222791779473, iteration: 75668
loss: 1.0287587642669678,grad_norm: 0.9999995171766415, iteration: 75669
loss: 1.0024434328079224,grad_norm: 0.9999994043728629, iteration: 75670
loss: 1.0186669826507568,grad_norm: 0.9999993232538045, iteration: 75671
loss: 1.025186538696289,grad_norm: 0.8046425399114426, iteration: 75672
loss: 1.0304005146026611,grad_norm: 0.9160738872969807, iteration: 75673
loss: 1.0001375675201416,grad_norm: 0.8038139301836243, iteration: 75674
loss: 0.9899613261222839,grad_norm: 0.9533861249459905, iteration: 75675
loss: 0.9800154566764832,grad_norm: 0.9802052864251769, iteration: 75676
loss: 1.00948965549469,grad_norm: 0.8603237610524384, iteration: 75677
loss: 0.9940803647041321,grad_norm: 0.9395006309589087, iteration: 75678
loss: 1.0598176717758179,grad_norm: 0.9999992252493918, iteration: 75679
loss: 0.9713481664657593,grad_norm: 0.8928112656054391, iteration: 75680
loss: 1.0136957168579102,grad_norm: 0.9999996849123246, iteration: 75681
loss: 1.0095025300979614,grad_norm: 0.9999993220783134, iteration: 75682
loss: 1.0195484161376953,grad_norm: 0.9999989963151126, iteration: 75683
loss: 0.9843488335609436,grad_norm: 0.8736768583390289, iteration: 75684
loss: 1.0028032064437866,grad_norm: 0.9702454589601524, iteration: 75685
loss: 1.0375243425369263,grad_norm: 0.9999996183939538, iteration: 75686
loss: 0.9641079902648926,grad_norm: 0.8575902782978461, iteration: 75687
loss: 1.0070487260818481,grad_norm: 0.9318736716339475, iteration: 75688
loss: 1.0083047151565552,grad_norm: 0.9999990858522172, iteration: 75689
loss: 0.9803204536437988,grad_norm: 0.8701488442007389, iteration: 75690
loss: 1.0283548831939697,grad_norm: 0.9999996519545091, iteration: 75691
loss: 1.0536524057388306,grad_norm: 0.9999992010830114, iteration: 75692
loss: 1.025756597518921,grad_norm: 0.999999060380519, iteration: 75693
loss: 1.0265965461730957,grad_norm: 0.9779919948936527, iteration: 75694
loss: 1.0250736474990845,grad_norm: 0.9999992828998497, iteration: 75695
loss: 1.01646888256073,grad_norm: 0.9999990133587057, iteration: 75696
loss: 0.989148736000061,grad_norm: 0.9837014412948366, iteration: 75697
loss: 1.045876145362854,grad_norm: 0.9999990075485347, iteration: 75698
loss: 1.0193959474563599,grad_norm: 0.9159396142398424, iteration: 75699
loss: 1.0418782234191895,grad_norm: 0.9330054084274857, iteration: 75700
loss: 1.005259394645691,grad_norm: 0.7863127822959445, iteration: 75701
loss: 0.9975823760032654,grad_norm: 0.9731385617160981, iteration: 75702
loss: 1.028735876083374,grad_norm: 0.9999995439562377, iteration: 75703
loss: 1.0588823556900024,grad_norm: 0.9999995775629729, iteration: 75704
loss: 1.0206176042556763,grad_norm: 0.8606997272407104, iteration: 75705
loss: 0.9978042244911194,grad_norm: 0.8602487600629405, iteration: 75706
loss: 0.9941093325614929,grad_norm: 0.9655405628893124, iteration: 75707
loss: 1.0214484930038452,grad_norm: 0.8670925729809734, iteration: 75708
loss: 1.0391781330108643,grad_norm: 0.9999991208870902, iteration: 75709
loss: 1.0621615648269653,grad_norm: 0.9999991004508144, iteration: 75710
loss: 1.0208772420883179,grad_norm: 0.9063904435590118, iteration: 75711
loss: 1.0010915994644165,grad_norm: 0.9999994949207334, iteration: 75712
loss: 1.011892557144165,grad_norm: 0.9463651272705542, iteration: 75713
loss: 1.0283987522125244,grad_norm: 0.9499489477515696, iteration: 75714
loss: 1.01773202419281,grad_norm: 0.9186530669384763, iteration: 75715
loss: 1.009728193283081,grad_norm: 0.9770537954925554, iteration: 75716
loss: 0.9685765504837036,grad_norm: 0.9716069665954554, iteration: 75717
loss: 0.9935591816902161,grad_norm: 0.9999990533476535, iteration: 75718
loss: 0.9695310592651367,grad_norm: 0.9831150419624135, iteration: 75719
loss: 0.9913176894187927,grad_norm: 0.8540808992226157, iteration: 75720
loss: 1.0177428722381592,grad_norm: 0.9999990379776383, iteration: 75721
loss: 0.9955385327339172,grad_norm: 0.9467100357143521, iteration: 75722
loss: 0.9974771738052368,grad_norm: 0.8067496588336104, iteration: 75723
loss: 1.0641390085220337,grad_norm: 0.9999995102445327, iteration: 75724
loss: 0.9583423137664795,grad_norm: 0.9999991655122549, iteration: 75725
loss: 1.0199739933013916,grad_norm: 0.9999991121042486, iteration: 75726
loss: 1.016323447227478,grad_norm: 0.9999990139940239, iteration: 75727
loss: 1.0038659572601318,grad_norm: 0.978184806071598, iteration: 75728
loss: 1.0260522365570068,grad_norm: 0.9999993449484023, iteration: 75729
loss: 1.0670576095581055,grad_norm: 0.9999991154489759, iteration: 75730
loss: 1.0740059614181519,grad_norm: 0.9999994469653968, iteration: 75731
loss: 1.0059783458709717,grad_norm: 0.9999993807417611, iteration: 75732
loss: 0.9756843447685242,grad_norm: 0.9999993195190471, iteration: 75733
loss: 1.0300630331039429,grad_norm: 0.9736389487761692, iteration: 75734
loss: 1.0437908172607422,grad_norm: 0.9999998815968401, iteration: 75735
loss: 0.9969769716262817,grad_norm: 0.9999999530357828, iteration: 75736
loss: 0.9928356409072876,grad_norm: 0.999999151235436, iteration: 75737
loss: 0.9879341721534729,grad_norm: 0.9435140368931002, iteration: 75738
loss: 0.9989323019981384,grad_norm: 0.9999991850933242, iteration: 75739
loss: 1.0145137310028076,grad_norm: 0.9844355936964962, iteration: 75740
loss: 0.9845815896987915,grad_norm: 0.9999992069351779, iteration: 75741
loss: 1.0261341333389282,grad_norm: 0.9393603384329118, iteration: 75742
loss: 1.0316941738128662,grad_norm: 0.9999991502329563, iteration: 75743
loss: 1.027626395225525,grad_norm: 0.9294281742854777, iteration: 75744
loss: 1.047354817390442,grad_norm: 0.9192922015139461, iteration: 75745
loss: 0.9783470630645752,grad_norm: 0.9467121084644188, iteration: 75746
loss: 1.008395791053772,grad_norm: 0.9999995550723999, iteration: 75747
loss: 1.1448606252670288,grad_norm: 0.9999999719049906, iteration: 75748
loss: 0.9925655126571655,grad_norm: 0.8843068973420232, iteration: 75749
loss: 0.9928772449493408,grad_norm: 0.9999992096827225, iteration: 75750
loss: 1.0727323293685913,grad_norm: 0.9999990443293396, iteration: 75751
loss: 1.0067452192306519,grad_norm: 0.9782663274631437, iteration: 75752
loss: 1.0496350526809692,grad_norm: 0.9999995959934709, iteration: 75753
loss: 0.9943804740905762,grad_norm: 0.9999993261563846, iteration: 75754
loss: 1.029299020767212,grad_norm: 0.7937334069936575, iteration: 75755
loss: 1.0040754079818726,grad_norm: 0.9999998050284382, iteration: 75756
loss: 1.0287575721740723,grad_norm: 0.999999366411461, iteration: 75757
loss: 0.9718086123466492,grad_norm: 0.9999990865086226, iteration: 75758
loss: 1.061521053314209,grad_norm: 0.9999999131765585, iteration: 75759
loss: 1.0311307907104492,grad_norm: 0.9117943604210241, iteration: 75760
loss: 1.0020663738250732,grad_norm: 0.9999990837945637, iteration: 75761
loss: 1.0176810026168823,grad_norm: 0.9999989936510211, iteration: 75762
loss: 1.0037130117416382,grad_norm: 0.870537379000577, iteration: 75763
loss: 1.0072681903839111,grad_norm: 0.9999991105182993, iteration: 75764
loss: 1.0498954057693481,grad_norm: 0.9999991545453667, iteration: 75765
loss: 1.045158863067627,grad_norm: 0.823742458350591, iteration: 75766
loss: 1.00196373462677,grad_norm: 0.9919982634712977, iteration: 75767
loss: 0.9670562744140625,grad_norm: 0.9865410537210401, iteration: 75768
loss: 1.0322273969650269,grad_norm: 0.9999991833589812, iteration: 75769
loss: 1.0029139518737793,grad_norm: 0.9999992145625526, iteration: 75770
loss: 0.9867285490036011,grad_norm: 0.9117556270161637, iteration: 75771
loss: 1.036213755607605,grad_norm: 0.9999992662993397, iteration: 75772
loss: 0.9589082598686218,grad_norm: 0.850151592944441, iteration: 75773
loss: 0.9967269897460938,grad_norm: 0.8990808730895377, iteration: 75774
loss: 0.9949558973312378,grad_norm: 0.9624309350761615, iteration: 75775
loss: 0.9556342363357544,grad_norm: 0.9999996547837542, iteration: 75776
loss: 1.020967960357666,grad_norm: 0.9999991419085703, iteration: 75777
loss: 1.0477670431137085,grad_norm: 0.9342476641557431, iteration: 75778
loss: 1.0628767013549805,grad_norm: 0.9999997736473565, iteration: 75779
loss: 1.0160449743270874,grad_norm: 0.8991754362728526, iteration: 75780
loss: 0.9908246397972107,grad_norm: 0.8892884520094884, iteration: 75781
loss: 0.9729747176170349,grad_norm: 0.999999043585591, iteration: 75782
loss: 1.049630045890808,grad_norm: 0.9999997512944855, iteration: 75783
loss: 0.9946011304855347,grad_norm: 0.9999991865860158, iteration: 75784
loss: 0.9971134662628174,grad_norm: 0.9999995426004769, iteration: 75785
loss: 1.1013920307159424,grad_norm: 0.9999994971574121, iteration: 75786
loss: 0.9313550591468811,grad_norm: 0.9074784156330548, iteration: 75787
loss: 1.0398558378219604,grad_norm: 0.8901827902623414, iteration: 75788
loss: 0.9965949058532715,grad_norm: 0.9999990444159077, iteration: 75789
loss: 0.9832677841186523,grad_norm: 0.9650032912898174, iteration: 75790
loss: 0.9749000668525696,grad_norm: 0.9999991477977559, iteration: 75791
loss: 0.9932284951210022,grad_norm: 0.9999991779503202, iteration: 75792
loss: 0.9955911636352539,grad_norm: 0.8341674807273486, iteration: 75793
loss: 1.0231647491455078,grad_norm: 0.887732497080766, iteration: 75794
loss: 1.0014053583145142,grad_norm: 0.8653294775249927, iteration: 75795
loss: 0.9639421105384827,grad_norm: 0.8973382311394235, iteration: 75796
loss: 1.0143214464187622,grad_norm: 0.9999992389389113, iteration: 75797
loss: 1.022063970565796,grad_norm: 0.9310686190565908, iteration: 75798
loss: 1.0602433681488037,grad_norm: 0.9999991126265366, iteration: 75799
loss: 1.0931406021118164,grad_norm: 0.9999989703657051, iteration: 75800
loss: 1.0677951574325562,grad_norm: 0.9195874072838524, iteration: 75801
loss: 0.9793870449066162,grad_norm: 0.8306814912558431, iteration: 75802
loss: 1.043513536453247,grad_norm: 0.9999994752556709, iteration: 75803
loss: 1.0045044422149658,grad_norm: 0.9999991280178793, iteration: 75804
loss: 1.0177668333053589,grad_norm: 0.9999991507281806, iteration: 75805
loss: 1.0101202726364136,grad_norm: 0.8620410278940915, iteration: 75806
loss: 0.9787436723709106,grad_norm: 1.0000000218997653, iteration: 75807
loss: 1.0081394910812378,grad_norm: 0.9380138120823335, iteration: 75808
loss: 0.9802735447883606,grad_norm: 0.9999991487914576, iteration: 75809
loss: 1.0878849029541016,grad_norm: 0.999999530554832, iteration: 75810
loss: 1.0158729553222656,grad_norm: 0.9228838024892411, iteration: 75811
loss: 1.0257439613342285,grad_norm: 0.9999990967867461, iteration: 75812
loss: 0.9801690578460693,grad_norm: 0.9300025481983982, iteration: 75813
loss: 0.9913398027420044,grad_norm: 0.9999990733478142, iteration: 75814
loss: 0.9984952211380005,grad_norm: 0.919023778101776, iteration: 75815
loss: 1.0706068277359009,grad_norm: 1.0000000574545176, iteration: 75816
loss: 0.9947822690010071,grad_norm: 0.9999991659140455, iteration: 75817
loss: 1.0144999027252197,grad_norm: 0.9215474878112274, iteration: 75818
loss: 1.0274527072906494,grad_norm: 0.905642220063922, iteration: 75819
loss: 1.0556225776672363,grad_norm: 0.9999995322901631, iteration: 75820
loss: 0.9951863288879395,grad_norm: 0.9999996765672816, iteration: 75821
loss: 1.0140554904937744,grad_norm: 0.999998972697449, iteration: 75822
loss: 0.9953899383544922,grad_norm: 0.9999990236797812, iteration: 75823
loss: 1.0416537523269653,grad_norm: 0.9999992182227174, iteration: 75824
loss: 0.9272829294204712,grad_norm: 0.999999255838854, iteration: 75825
loss: 0.9946194887161255,grad_norm: 0.999999132646011, iteration: 75826
loss: 0.9905303120613098,grad_norm: 0.9087707507265005, iteration: 75827
loss: 0.9858733415603638,grad_norm: 0.8879083136853229, iteration: 75828
loss: 1.003038763999939,grad_norm: 0.9999991650802958, iteration: 75829
loss: 1.0478229522705078,grad_norm: 0.9997120988351699, iteration: 75830
loss: 1.0037039518356323,grad_norm: 0.999999237740828, iteration: 75831
loss: 1.0431742668151855,grad_norm: 0.9999990608816393, iteration: 75832
loss: 1.013368010520935,grad_norm: 0.9472040005488024, iteration: 75833
loss: 0.9834644198417664,grad_norm: 0.7832854693729787, iteration: 75834
loss: 1.019317626953125,grad_norm: 0.8742970134083446, iteration: 75835
loss: 1.0361934900283813,grad_norm: 0.9999998736208264, iteration: 75836
loss: 1.0152902603149414,grad_norm: 0.9999995132631396, iteration: 75837
loss: 1.0234477519989014,grad_norm: 0.9999999221701937, iteration: 75838
loss: 1.0116437673568726,grad_norm: 0.9664656565508104, iteration: 75839
loss: 1.03643798828125,grad_norm: 0.9999998923086647, iteration: 75840
loss: 1.0044989585876465,grad_norm: 0.9464488832053337, iteration: 75841
loss: 0.9747980237007141,grad_norm: 0.9999992878140328, iteration: 75842
loss: 1.026443362236023,grad_norm: 0.9999993083031925, iteration: 75843
loss: 0.9752925634384155,grad_norm: 0.99999925658891, iteration: 75844
loss: 1.0083448886871338,grad_norm: 0.8956881248018698, iteration: 75845
loss: 0.9897223711013794,grad_norm: 0.9999991860835179, iteration: 75846
loss: 1.0145597457885742,grad_norm: 0.9176046733131159, iteration: 75847
loss: 0.9927939772605896,grad_norm: 0.9999992074890403, iteration: 75848
loss: 1.006190538406372,grad_norm: 0.7044948497824635, iteration: 75849
loss: 1.0372711420059204,grad_norm: 0.9999994974180599, iteration: 75850
loss: 0.999459445476532,grad_norm: 0.8594979646909413, iteration: 75851
loss: 0.9856166243553162,grad_norm: 0.9544342410849265, iteration: 75852
loss: 1.074028491973877,grad_norm: 0.9999997904332396, iteration: 75853
loss: 1.0202738046646118,grad_norm: 0.9999997168767049, iteration: 75854
loss: 1.0111674070358276,grad_norm: 0.9682334655808351, iteration: 75855
loss: 1.064157247543335,grad_norm: 0.9999993145380248, iteration: 75856
loss: 0.9727020859718323,grad_norm: 0.9999992406913117, iteration: 75857
loss: 1.043054461479187,grad_norm: 0.9999994750422979, iteration: 75858
loss: 0.9991005063056946,grad_norm: 0.8007862329771622, iteration: 75859
loss: 1.0539473295211792,grad_norm: 0.9999992840769185, iteration: 75860
loss: 0.9918068051338196,grad_norm: 0.9082492495453695, iteration: 75861
loss: 0.98399817943573,grad_norm: 0.9999990772417813, iteration: 75862
loss: 1.025679349899292,grad_norm: 0.9999997834414261, iteration: 75863
loss: 0.9959830045700073,grad_norm: 0.8683158658792709, iteration: 75864
loss: 0.9658063054084778,grad_norm: 0.8667698785814307, iteration: 75865
loss: 1.006822943687439,grad_norm: 0.8248809127043593, iteration: 75866
loss: 0.9707912802696228,grad_norm: 0.9999991093609265, iteration: 75867
loss: 1.0036393404006958,grad_norm: 0.922800154200955, iteration: 75868
loss: 0.9869140982627869,grad_norm: 0.8829876969977783, iteration: 75869
loss: 0.9974789619445801,grad_norm: 0.9999990436165851, iteration: 75870
loss: 1.0256586074829102,grad_norm: 0.8751687454063526, iteration: 75871
loss: 0.9976980686187744,grad_norm: 0.9096571959903079, iteration: 75872
loss: 0.9877564311027527,grad_norm: 0.8719230270243034, iteration: 75873
loss: 1.0010044574737549,grad_norm: 0.9999990804849407, iteration: 75874
loss: 1.007288932800293,grad_norm: 0.9999998481495928, iteration: 75875
loss: 0.9611319303512573,grad_norm: 0.9999989966523488, iteration: 75876
loss: 1.0079247951507568,grad_norm: 0.9999990407387105, iteration: 75877
loss: 0.9880067706108093,grad_norm: 0.9999998479326087, iteration: 75878
loss: 0.9597674608230591,grad_norm: 0.7697914504628951, iteration: 75879
loss: 1.0084084272384644,grad_norm: 0.8043722486627691, iteration: 75880
loss: 1.0452122688293457,grad_norm: 0.999998912816553, iteration: 75881
loss: 1.0360554456710815,grad_norm: 0.9999996274450514, iteration: 75882
loss: 1.0831791162490845,grad_norm: 0.9999997972095571, iteration: 75883
loss: 0.9716901779174805,grad_norm: 0.7592443893923257, iteration: 75884
loss: 0.9899640679359436,grad_norm: 0.847685298534639, iteration: 75885
loss: 1.0317022800445557,grad_norm: 0.9999996481220335, iteration: 75886
loss: 1.0502077341079712,grad_norm: 0.9999998946418962, iteration: 75887
loss: 1.013981819152832,grad_norm: 0.8143189008856817, iteration: 75888
loss: 1.0053807497024536,grad_norm: 0.8554341581420121, iteration: 75889
loss: 1.0097496509552002,grad_norm: 0.9999994264348153, iteration: 75890
loss: 1.2247207164764404,grad_norm: 0.9999992713762228, iteration: 75891
loss: 0.9689149260520935,grad_norm: 0.9999993562258952, iteration: 75892
loss: 0.9973195195198059,grad_norm: 0.87996795497902, iteration: 75893
loss: 1.0313993692398071,grad_norm: 0.7903100016439953, iteration: 75894
loss: 1.0976923704147339,grad_norm: 0.9019750419835543, iteration: 75895
loss: 1.0225602388381958,grad_norm: 0.871924541818714, iteration: 75896
loss: 1.0176715850830078,grad_norm: 0.9999992000557218, iteration: 75897
loss: 1.0615469217300415,grad_norm: 0.9999999006223831, iteration: 75898
loss: 1.0276612043380737,grad_norm: 0.7984451407954297, iteration: 75899
loss: 1.0121098756790161,grad_norm: 0.7471338583665316, iteration: 75900
loss: 1.042007327079773,grad_norm: 0.9999996401257328, iteration: 75901
loss: 1.026717185974121,grad_norm: 0.9586329004666809, iteration: 75902
loss: 1.0636590719223022,grad_norm: 0.9999997810987877, iteration: 75903
loss: 1.0272480249404907,grad_norm: 0.999999204799408, iteration: 75904
loss: 1.0266278982162476,grad_norm: 0.9999996755962957, iteration: 75905
loss: 1.013552188873291,grad_norm: 0.999999042046292, iteration: 75906
loss: 1.091945767402649,grad_norm: 0.9999992843089648, iteration: 75907
loss: 1.0781043767929077,grad_norm: 0.9999991624851075, iteration: 75908
loss: 1.017148733139038,grad_norm: 0.9999998427982846, iteration: 75909
loss: 1.0495789051055908,grad_norm: 0.9999997056055407, iteration: 75910
loss: 1.0434650182724,grad_norm: 0.9999990641446527, iteration: 75911
loss: 1.0771669149398804,grad_norm: 0.9999999152560548, iteration: 75912
loss: 0.9777165055274963,grad_norm: 0.9263791511240482, iteration: 75913
loss: 1.0406339168548584,grad_norm: 0.9999993031558966, iteration: 75914
loss: 1.0276423692703247,grad_norm: 0.8436697460000726, iteration: 75915
loss: 1.113992691040039,grad_norm: 0.9999998808561257, iteration: 75916
loss: 1.1103713512420654,grad_norm: 0.9999992005796119, iteration: 75917
loss: 0.9972054362297058,grad_norm: 0.9051322858712813, iteration: 75918
loss: 1.0427062511444092,grad_norm: 0.9999994949748454, iteration: 75919
loss: 1.1305040121078491,grad_norm: 0.9999998693016033, iteration: 75920
loss: 0.9634016752243042,grad_norm: 0.9999990939894473, iteration: 75921
loss: 1.024730920791626,grad_norm: 0.9999992270158147, iteration: 75922
loss: 1.0591450929641724,grad_norm: 0.9999996350830113, iteration: 75923
loss: 1.022255778312683,grad_norm: 0.9999991553970957, iteration: 75924
loss: 1.0167675018310547,grad_norm: 0.9999993717521442, iteration: 75925
loss: 1.0653777122497559,grad_norm: 0.9999998564560935, iteration: 75926
loss: 1.0352259874343872,grad_norm: 0.90821574802499, iteration: 75927
loss: 1.0374141931533813,grad_norm: 0.9444675212712419, iteration: 75928
loss: 1.0065995454788208,grad_norm: 0.8693411730378453, iteration: 75929
loss: 0.9928868412971497,grad_norm: 0.8278408002307298, iteration: 75930
loss: 1.0440113544464111,grad_norm: 0.9999996002169762, iteration: 75931
loss: 1.0189886093139648,grad_norm: 0.9999998266419527, iteration: 75932
loss: 1.0291857719421387,grad_norm: 0.8956800719008394, iteration: 75933
loss: 0.9860147833824158,grad_norm: 0.9232487388339966, iteration: 75934
loss: 1.00467848777771,grad_norm: 0.9999995994202323, iteration: 75935
loss: 1.0414968729019165,grad_norm: 0.8471665364748935, iteration: 75936
loss: 1.019932746887207,grad_norm: 0.9999992624855434, iteration: 75937
loss: 0.9797868132591248,grad_norm: 0.9999991729017171, iteration: 75938
loss: 1.0770447254180908,grad_norm: 0.9999999029811694, iteration: 75939
loss: 1.0072674751281738,grad_norm: 0.9999995371712913, iteration: 75940
loss: 0.9726053476333618,grad_norm: 0.9541086928943002, iteration: 75941
loss: 0.9840432405471802,grad_norm: 0.9622431855292886, iteration: 75942
loss: 1.0982435941696167,grad_norm: 0.9999992522305903, iteration: 75943
loss: 0.9874071478843689,grad_norm: 0.9240924113215905, iteration: 75944
loss: 0.9237260222434998,grad_norm: 0.9999988893202727, iteration: 75945
loss: 0.9908760786056519,grad_norm: 0.8624140879193806, iteration: 75946
loss: 1.028892159461975,grad_norm: 0.988850671585922, iteration: 75947
loss: 1.0366413593292236,grad_norm: 0.9147955816917941, iteration: 75948
loss: 1.0070382356643677,grad_norm: 0.8608485240418046, iteration: 75949
loss: 0.9998332262039185,grad_norm: 0.918060502881449, iteration: 75950
loss: 0.9511815309524536,grad_norm: 0.9682453850308752, iteration: 75951
loss: 1.0034704208374023,grad_norm: 0.9999990717158684, iteration: 75952
loss: 1.059291124343872,grad_norm: 0.9999998649214131, iteration: 75953
loss: 1.0257160663604736,grad_norm: 0.7787631358176467, iteration: 75954
loss: 1.0159637928009033,grad_norm: 0.9629127517590741, iteration: 75955
loss: 1.0038810968399048,grad_norm: 0.9796548117581, iteration: 75956
loss: 0.9920029640197754,grad_norm: 0.7995781514869976, iteration: 75957
loss: 0.9652747511863708,grad_norm: 0.9261348367110247, iteration: 75958
loss: 1.0107611417770386,grad_norm: 0.9451587698907821, iteration: 75959
loss: 1.0466437339782715,grad_norm: 0.9999996405612153, iteration: 75960
loss: 1.0065988302230835,grad_norm: 0.9999992562889082, iteration: 75961
loss: 1.000711441040039,grad_norm: 0.9012756896403809, iteration: 75962
loss: 1.0385774374008179,grad_norm: 0.9641616270438751, iteration: 75963
loss: 1.0137711763381958,grad_norm: 0.9999994343112105, iteration: 75964
loss: 0.9952377080917358,grad_norm: 0.9030503531275672, iteration: 75965
loss: 1.0138531923294067,grad_norm: 0.999998999235371, iteration: 75966
loss: 1.02216374874115,grad_norm: 0.9999998969254646, iteration: 75967
loss: 1.0276479721069336,grad_norm: 0.9999991879738791, iteration: 75968
loss: 1.0006494522094727,grad_norm: 0.9999999904671341, iteration: 75969
loss: 1.0505471229553223,grad_norm: 0.9999993834831868, iteration: 75970
loss: 0.9987623691558838,grad_norm: 0.8824439265465577, iteration: 75971
loss: 1.005542516708374,grad_norm: 0.99999912082251, iteration: 75972
loss: 0.9932287931442261,grad_norm: 0.9999991776915576, iteration: 75973
loss: 1.0306174755096436,grad_norm: 0.9999997247051596, iteration: 75974
loss: 1.0677767992019653,grad_norm: 0.999999784409408, iteration: 75975
loss: 0.9886552691459656,grad_norm: 0.9514743876985445, iteration: 75976
loss: 1.0087156295776367,grad_norm: 0.9697034638092169, iteration: 75977
loss: 1.0428290367126465,grad_norm: 0.9999993187826289, iteration: 75978
loss: 0.9631468057632446,grad_norm: 0.9011358224260742, iteration: 75979
loss: 0.9831091165542603,grad_norm: 0.9999991022737524, iteration: 75980
loss: 1.0071171522140503,grad_norm: 0.9999991901162444, iteration: 75981
loss: 1.0464085340499878,grad_norm: 0.9999992762149996, iteration: 75982
loss: 0.9998930096626282,grad_norm: 0.9676672017763216, iteration: 75983
loss: 1.045163631439209,grad_norm: 0.8306871504034649, iteration: 75984
loss: 1.0279090404510498,grad_norm: 0.9720693511580185, iteration: 75985
loss: 0.9938779473304749,grad_norm: 0.8650624030038659, iteration: 75986
loss: 0.9951055645942688,grad_norm: 0.9491717778993418, iteration: 75987
loss: 0.9647091627120972,grad_norm: 0.9999990702991228, iteration: 75988
loss: 1.0040823221206665,grad_norm: 0.7719795060827409, iteration: 75989
loss: 0.9903445243835449,grad_norm: 0.9550464370689026, iteration: 75990
loss: 0.9783255457878113,grad_norm: 0.9999998756939366, iteration: 75991
loss: 1.0157504081726074,grad_norm: 0.9676659706321666, iteration: 75992
loss: 1.0263442993164062,grad_norm: 0.932000880145347, iteration: 75993
loss: 0.9897889494895935,grad_norm: 0.9999995066968022, iteration: 75994
loss: 1.0474693775177002,grad_norm: 0.9999991875738938, iteration: 75995
loss: 1.0725390911102295,grad_norm: 0.9999998302067195, iteration: 75996
loss: 1.0245896577835083,grad_norm: 0.8319288674921245, iteration: 75997
loss: 1.0011088848114014,grad_norm: 0.9999993918451787, iteration: 75998
loss: 0.9908519983291626,grad_norm: 0.9987780765934949, iteration: 75999
loss: 1.0168287754058838,grad_norm: 0.8712784771191449, iteration: 76000
loss: 1.0278600454330444,grad_norm: 0.9208703272416916, iteration: 76001
loss: 1.019248366355896,grad_norm: 0.9918165357999821, iteration: 76002
loss: 1.0138590335845947,grad_norm: 0.791456735034121, iteration: 76003
loss: 0.9771136045455933,grad_norm: 0.9513498728144519, iteration: 76004
loss: 1.0573594570159912,grad_norm: 0.9999990000003908, iteration: 76005
loss: 1.0807923078536987,grad_norm: 0.9999992196834717, iteration: 76006
loss: 1.0170310735702515,grad_norm: 0.8559497159656878, iteration: 76007
loss: 1.0104649066925049,grad_norm: 0.9999991749756062, iteration: 76008
loss: 1.030938744544983,grad_norm: 0.9999991001294978, iteration: 76009
loss: 1.0228408575057983,grad_norm: 0.9999992933948473, iteration: 76010
loss: 1.0918389558792114,grad_norm: 0.9999993186852668, iteration: 76011
loss: 1.0160049200057983,grad_norm: 0.999999120384179, iteration: 76012
loss: 1.0123599767684937,grad_norm: 0.7719779999167313, iteration: 76013
loss: 1.0265189409255981,grad_norm: 0.9496718880458291, iteration: 76014
loss: 0.9763744473457336,grad_norm: 0.8599383082419015, iteration: 76015
loss: 0.9856789708137512,grad_norm: 0.9999990220005949, iteration: 76016
loss: 0.9821345806121826,grad_norm: 0.9570469429648331, iteration: 76017
loss: 0.9816460013389587,grad_norm: 0.9999992394535362, iteration: 76018
loss: 0.9997565746307373,grad_norm: 0.9596406241167457, iteration: 76019
loss: 1.0224940776824951,grad_norm: 0.9999990230521414, iteration: 76020
loss: 1.103593111038208,grad_norm: 0.9999998548762997, iteration: 76021
loss: 1.0132135152816772,grad_norm: 0.9118810510689367, iteration: 76022
loss: 0.9956530928611755,grad_norm: 0.9999994901127812, iteration: 76023
loss: 1.0531560182571411,grad_norm: 0.9999989335199161, iteration: 76024
loss: 1.0097320079803467,grad_norm: 0.9999990633784909, iteration: 76025
loss: 0.9763440489768982,grad_norm: 0.9283826220189657, iteration: 76026
loss: 0.9992367625236511,grad_norm: 0.9999997319143361, iteration: 76027
loss: 0.998716413974762,grad_norm: 0.9094465848569305, iteration: 76028
loss: 1.027162790298462,grad_norm: 0.847309964460612, iteration: 76029
loss: 1.0253063440322876,grad_norm: 0.9911311035345386, iteration: 76030
loss: 1.0571404695510864,grad_norm: 0.9250101921854773, iteration: 76031
loss: 1.0053319931030273,grad_norm: 0.934574822339178, iteration: 76032
loss: 1.0105676651000977,grad_norm: 0.9768989914409637, iteration: 76033
loss: 1.0519369840621948,grad_norm: 0.9999991794216592, iteration: 76034
loss: 0.9937671422958374,grad_norm: 0.8142924241037733, iteration: 76035
loss: 1.0081180334091187,grad_norm: 0.9999994450931898, iteration: 76036
loss: 0.9808017611503601,grad_norm: 0.9359088891854327, iteration: 76037
loss: 1.1131163835525513,grad_norm: 0.9999998990246062, iteration: 76038
loss: 1.0469475984573364,grad_norm: 0.9999989837444856, iteration: 76039
loss: 0.9825206995010376,grad_norm: 0.8153369074638898, iteration: 76040
loss: 1.170641303062439,grad_norm: 0.9999999733002233, iteration: 76041
loss: 0.9889460802078247,grad_norm: 0.8561747138978401, iteration: 76042
loss: 1.074784517288208,grad_norm: 0.9999992109608092, iteration: 76043
loss: 1.0691591501235962,grad_norm: 0.9999992407310127, iteration: 76044
loss: 1.0061019659042358,grad_norm: 0.9782143876636624, iteration: 76045
loss: 0.9963312745094299,grad_norm: 0.8290111906888507, iteration: 76046
loss: 1.021531581878662,grad_norm: 0.9999996119933566, iteration: 76047
loss: 1.0225976705551147,grad_norm: 0.8326311984654826, iteration: 76048
loss: 1.0018939971923828,grad_norm: 0.8698441878064647, iteration: 76049
loss: 0.9792307615280151,grad_norm: 0.9037887986037758, iteration: 76050
loss: 0.9925023317337036,grad_norm: 0.999999794995281, iteration: 76051
loss: 1.0217773914337158,grad_norm: 0.8170610876586466, iteration: 76052
loss: 0.9920990467071533,grad_norm: 0.9999992255136636, iteration: 76053
loss: 0.975273847579956,grad_norm: 0.9459089948586035, iteration: 76054
loss: 0.9919736385345459,grad_norm: 0.7970861499793805, iteration: 76055
loss: 1.0227558612823486,grad_norm: 0.9999994473620712, iteration: 76056
loss: 0.9814231991767883,grad_norm: 0.9967592247403103, iteration: 76057
loss: 0.9955576658248901,grad_norm: 0.9265113851697999, iteration: 76058
loss: 0.9853877425193787,grad_norm: 0.8079505223479909, iteration: 76059
loss: 0.9847764372825623,grad_norm: 0.9999999041293052, iteration: 76060
loss: 0.9758666157722473,grad_norm: 0.9554370851752824, iteration: 76061
loss: 1.0202006101608276,grad_norm: 0.9263136081544455, iteration: 76062
loss: 1.187716007232666,grad_norm: 0.9999999404534282, iteration: 76063
loss: 1.0156265497207642,grad_norm: 0.9999994013921087, iteration: 76064
loss: 0.9938578605651855,grad_norm: 0.8457364737866047, iteration: 76065
loss: 1.0083807706832886,grad_norm: 0.9999990852258756, iteration: 76066
loss: 1.0019376277923584,grad_norm: 0.9999997811094253, iteration: 76067
loss: 0.9390718936920166,grad_norm: 0.9685420119609375, iteration: 76068
loss: 0.9630699753761292,grad_norm: 0.9999989844418177, iteration: 76069
loss: 1.0476833581924438,grad_norm: 0.9999990293722313, iteration: 76070
loss: 1.0163286924362183,grad_norm: 0.9988220970534268, iteration: 76071
loss: 1.0193923711776733,grad_norm: 0.9999991019437414, iteration: 76072
loss: 1.014718770980835,grad_norm: 0.9999991296640939, iteration: 76073
loss: 0.9741062521934509,grad_norm: 0.9342104620046076, iteration: 76074
loss: 0.9627799391746521,grad_norm: 0.9999990869791344, iteration: 76075
loss: 0.9748546481132507,grad_norm: 0.999999055747645, iteration: 76076
loss: 1.0098602771759033,grad_norm: 0.8568305006017347, iteration: 76077
loss: 0.9833433628082275,grad_norm: 0.9999991522214982, iteration: 76078
loss: 1.0000883340835571,grad_norm: 0.9999993182954975, iteration: 76079
loss: 1.0199623107910156,grad_norm: 0.887095589544131, iteration: 76080
loss: 1.0349370241165161,grad_norm: 0.9999991948572929, iteration: 76081
loss: 1.0153874158859253,grad_norm: 0.9676430050460645, iteration: 76082
loss: 1.0703047513961792,grad_norm: 0.8232238733943001, iteration: 76083
loss: 0.9798160195350647,grad_norm: 0.9999990812452336, iteration: 76084
loss: 1.000451683998108,grad_norm: 0.7811566435324193, iteration: 76085
loss: 0.9927986264228821,grad_norm: 0.8139784971970772, iteration: 76086
loss: 1.060070514678955,grad_norm: 0.9999992391650674, iteration: 76087
loss: 0.9816088080406189,grad_norm: 0.9999989732560091, iteration: 76088
loss: 0.99747633934021,grad_norm: 0.8947105541771021, iteration: 76089
loss: 0.9952866435050964,grad_norm: 0.9999990941899002, iteration: 76090
loss: 0.9973428845405579,grad_norm: 0.9306117641940059, iteration: 76091
loss: 1.0092880725860596,grad_norm: 0.8371801631320118, iteration: 76092
loss: 0.9777171611785889,grad_norm: 0.999998984721678, iteration: 76093
loss: 1.0051685571670532,grad_norm: 0.8349356609870391, iteration: 76094
loss: 1.010648488998413,grad_norm: 0.9999991357394948, iteration: 76095
loss: 1.0872050523757935,grad_norm: 0.9999998596796766, iteration: 76096
loss: 1.0923123359680176,grad_norm: 0.9999990062128875, iteration: 76097
loss: 1.1565736532211304,grad_norm: 0.9999995547030669, iteration: 76098
loss: 0.9888277649879456,grad_norm: 0.9999991671608053, iteration: 76099
loss: 0.9907786250114441,grad_norm: 0.9086837421285244, iteration: 76100
loss: 0.9768726229667664,grad_norm: 0.8604912572816898, iteration: 76101
loss: 1.051770806312561,grad_norm: 0.8370043333865382, iteration: 76102
loss: 1.0199599266052246,grad_norm: 0.8693512845637692, iteration: 76103
loss: 0.9951213002204895,grad_norm: 0.9444457477412341, iteration: 76104
loss: 1.0014991760253906,grad_norm: 0.8784940952734318, iteration: 76105
loss: 1.0084946155548096,grad_norm: 0.9999992823120964, iteration: 76106
loss: 1.0154054164886475,grad_norm: 0.9447028925195615, iteration: 76107
loss: 1.0569192171096802,grad_norm: 0.9999997757000458, iteration: 76108
loss: 1.0789190530776978,grad_norm: 0.9999999454630166, iteration: 76109
loss: 1.2082650661468506,grad_norm: 0.9999993647341894, iteration: 76110
loss: 1.0082181692123413,grad_norm: 0.9999993555726078, iteration: 76111
loss: 0.9814220666885376,grad_norm: 0.8815631072386746, iteration: 76112
loss: 1.0739984512329102,grad_norm: 0.9999990454359107, iteration: 76113
loss: 1.0010632276535034,grad_norm: 0.9999995664736376, iteration: 76114
loss: 0.97266685962677,grad_norm: 0.893390159989438, iteration: 76115
loss: 1.025398850440979,grad_norm: 0.903148995587343, iteration: 76116
loss: 1.0066825151443481,grad_norm: 0.9999998441729344, iteration: 76117
loss: 1.014816164970398,grad_norm: 0.8258703207204555, iteration: 76118
loss: 1.0148365497589111,grad_norm: 0.9999995156899516, iteration: 76119
loss: 1.0055344104766846,grad_norm: 0.8028785321259069, iteration: 76120
loss: 1.0455985069274902,grad_norm: 0.9999990623141293, iteration: 76121
loss: 0.9786710739135742,grad_norm: 0.9999991837160237, iteration: 76122
loss: 1.003641963005066,grad_norm: 0.9457139888157187, iteration: 76123
loss: 1.0241678953170776,grad_norm: 0.9865013200609795, iteration: 76124
loss: 0.9616404175758362,grad_norm: 0.9350401149982734, iteration: 76125
loss: 1.0312896966934204,grad_norm: 0.9999993169638939, iteration: 76126
loss: 1.0188136100769043,grad_norm: 0.9999995661459343, iteration: 76127
loss: 1.0542497634887695,grad_norm: 0.9999995466191699, iteration: 76128
loss: 1.138849139213562,grad_norm: 0.9999992132912391, iteration: 76129
loss: 1.0551913976669312,grad_norm: 0.8959522017431534, iteration: 76130
loss: 1.0123502016067505,grad_norm: 0.9999990246380656, iteration: 76131
loss: 1.0152286291122437,grad_norm: 0.9999991068172588, iteration: 76132
loss: 0.982010543346405,grad_norm: 0.9999991530995846, iteration: 76133
loss: 1.0494468212127686,grad_norm: 0.9873335544422609, iteration: 76134
loss: 1.091110110282898,grad_norm: 0.9999994440745676, iteration: 76135
loss: 1.035320520401001,grad_norm: 0.9999994879230344, iteration: 76136
loss: 0.982269823551178,grad_norm: 0.9999993245092569, iteration: 76137
loss: 0.9829371571540833,grad_norm: 0.9826560627830891, iteration: 76138
loss: 1.0065374374389648,grad_norm: 0.8605998540116554, iteration: 76139
loss: 0.9780389666557312,grad_norm: 0.9999991186253284, iteration: 76140
loss: 0.9609008431434631,grad_norm: 0.8769360678060479, iteration: 76141
loss: 0.9407764077186584,grad_norm: 0.9258317613996404, iteration: 76142
loss: 0.9606361985206604,grad_norm: 0.9999990817068823, iteration: 76143
loss: 1.134703516960144,grad_norm: 0.9999989726945321, iteration: 76144
loss: 0.9897280335426331,grad_norm: 0.9999995967359543, iteration: 76145
loss: 1.0417778491973877,grad_norm: 0.9047477221612373, iteration: 76146
loss: 0.9774124026298523,grad_norm: 0.8880729895975499, iteration: 76147
loss: 1.0054032802581787,grad_norm: 0.9999990057698069, iteration: 76148
loss: 1.045792818069458,grad_norm: 0.9239731322709535, iteration: 76149
loss: 0.9738289713859558,grad_norm: 0.9999990962667877, iteration: 76150
loss: 1.015860915184021,grad_norm: 0.9999991249123914, iteration: 76151
loss: 1.0449590682983398,grad_norm: 0.9700981445376914, iteration: 76152
loss: 1.0484809875488281,grad_norm: 0.9999993383287233, iteration: 76153
loss: 0.9616644978523254,grad_norm: 0.8568141941190279, iteration: 76154
loss: 0.9560601115226746,grad_norm: 0.927918757478327, iteration: 76155
loss: 1.0854690074920654,grad_norm: 0.9999993723369445, iteration: 76156
loss: 1.0149413347244263,grad_norm: 0.9999992917322377, iteration: 76157
loss: 1.0572474002838135,grad_norm: 0.9999995192795946, iteration: 76158
loss: 1.016943097114563,grad_norm: 0.9999991771740472, iteration: 76159
loss: 1.0164467096328735,grad_norm: 0.9999991377791099, iteration: 76160
loss: 0.999480664730072,grad_norm: 0.9999995483889752, iteration: 76161
loss: 1.0421706438064575,grad_norm: 0.9999989429491984, iteration: 76162
loss: 0.9889639616012573,grad_norm: 0.9999990725961015, iteration: 76163
loss: 1.0095115900039673,grad_norm: 0.9324015384108002, iteration: 76164
loss: 1.0060386657714844,grad_norm: 0.8801969906182463, iteration: 76165
loss: 1.0176424980163574,grad_norm: 0.9463948442426704, iteration: 76166
loss: 1.0338174104690552,grad_norm: 0.999999523190883, iteration: 76167
loss: 1.0520762205123901,grad_norm: 0.9203749295088867, iteration: 76168
loss: 1.0273208618164062,grad_norm: 0.9524210909876767, iteration: 76169
loss: 1.129312515258789,grad_norm: 0.9999997758901612, iteration: 76170
loss: 1.0174555778503418,grad_norm: 0.9555764100882885, iteration: 76171
loss: 0.9999964833259583,grad_norm: 0.9999989245946534, iteration: 76172
loss: 0.9969114661216736,grad_norm: 0.9999991386673052, iteration: 76173
loss: 1.0492825508117676,grad_norm: 0.9999995987905637, iteration: 76174
loss: 0.9667811989784241,grad_norm: 0.8484534984826417, iteration: 76175
loss: 1.023816466331482,grad_norm: 0.9999991983731151, iteration: 76176
loss: 1.0184818506240845,grad_norm: 0.9473935849319891, iteration: 76177
loss: 1.0143643617630005,grad_norm: 0.9573371004456025, iteration: 76178
loss: 0.9628893733024597,grad_norm: 0.9422411515472976, iteration: 76179
loss: 1.025349497795105,grad_norm: 0.8546324894422357, iteration: 76180
loss: 1.0039840936660767,grad_norm: 0.9651731451095047, iteration: 76181
loss: 1.0285894870758057,grad_norm: 0.9276270310098922, iteration: 76182
loss: 0.9917473793029785,grad_norm: 0.8933861765866259, iteration: 76183
loss: 1.022204041481018,grad_norm: 0.9999991490089418, iteration: 76184
loss: 1.0074654817581177,grad_norm: 0.8656627248979097, iteration: 76185
loss: 0.9797980189323425,grad_norm: 0.9711616994106216, iteration: 76186
loss: 0.9894168376922607,grad_norm: 0.9536428288681585, iteration: 76187
loss: 1.0130819082260132,grad_norm: 0.9999990869438892, iteration: 76188
loss: 0.9387708902359009,grad_norm: 0.9013604567259217, iteration: 76189
loss: 1.060136318206787,grad_norm: 0.9999994148911816, iteration: 76190
loss: 1.026133418083191,grad_norm: 0.9999990347765182, iteration: 76191
loss: 1.014409065246582,grad_norm: 0.9999996982052194, iteration: 76192
loss: 1.1058871746063232,grad_norm: 0.9999991525576086, iteration: 76193
loss: 1.0717079639434814,grad_norm: 0.9999990939410602, iteration: 76194
loss: 1.1025292873382568,grad_norm: 0.9999990492073282, iteration: 76195
loss: 0.9973206520080566,grad_norm: 0.9999997638325242, iteration: 76196
loss: 0.9898053407669067,grad_norm: 0.9855968376347782, iteration: 76197
loss: 0.9964656829833984,grad_norm: 0.999999183134499, iteration: 76198
loss: 1.0193427801132202,grad_norm: 0.8827011238740716, iteration: 76199
loss: 1.0223603248596191,grad_norm: 0.9999990587173775, iteration: 76200
loss: 0.9936100840568542,grad_norm: 0.9981515828274098, iteration: 76201
loss: 1.0284346342086792,grad_norm: 0.9999998969237651, iteration: 76202
loss: 1.0141607522964478,grad_norm: 0.9999993267690923, iteration: 76203
loss: 1.0553672313690186,grad_norm: 0.8687342618483375, iteration: 76204
loss: 1.0062681436538696,grad_norm: 0.7766639367437839, iteration: 76205
loss: 0.9861018061637878,grad_norm: 0.9999990503025404, iteration: 76206
loss: 1.033174753189087,grad_norm: 0.9999991548431536, iteration: 76207
loss: 1.034621000289917,grad_norm: 0.9999992047118647, iteration: 76208
loss: 1.0291976928710938,grad_norm: 0.9717253237936068, iteration: 76209
loss: 1.0289859771728516,grad_norm: 0.9999995094537342, iteration: 76210
loss: 0.9954853057861328,grad_norm: 0.9999996251046542, iteration: 76211
loss: 0.9889100790023804,grad_norm: 0.939551985338237, iteration: 76212
loss: 1.0013318061828613,grad_norm: 0.9090062340387064, iteration: 76213
loss: 1.0064020156860352,grad_norm: 0.9999992388429788, iteration: 76214
loss: 0.9863104820251465,grad_norm: 0.9999991357849436, iteration: 76215
loss: 0.9995244145393372,grad_norm: 0.8886016348288436, iteration: 76216
loss: 0.9869998097419739,grad_norm: 0.9999990073735294, iteration: 76217
loss: 0.9872195720672607,grad_norm: 0.937269209972748, iteration: 76218
loss: 0.9582045078277588,grad_norm: 0.9015678791414542, iteration: 76219
loss: 0.999804675579071,grad_norm: 0.9999995055926179, iteration: 76220
loss: 1.0278501510620117,grad_norm: 0.9999990220647278, iteration: 76221
loss: 0.9932352900505066,grad_norm: 0.9999991169725533, iteration: 76222
loss: 0.9516111016273499,grad_norm: 0.9999990349053264, iteration: 76223
loss: 0.9791133403778076,grad_norm: 0.9999990147234518, iteration: 76224
loss: 0.9932373762130737,grad_norm: 0.9999990887270124, iteration: 76225
loss: 1.070741057395935,grad_norm: 0.9999998743507138, iteration: 76226
loss: 1.0047316551208496,grad_norm: 0.9999990998952755, iteration: 76227
loss: 0.9962201714515686,grad_norm: 0.9999992636810007, iteration: 76228
loss: 1.0380878448486328,grad_norm: 0.9999990343116237, iteration: 76229
loss: 0.9959449768066406,grad_norm: 0.9063980080706281, iteration: 76230
loss: 1.0034964084625244,grad_norm: 0.9652910758764761, iteration: 76231
loss: 1.080481767654419,grad_norm: 0.9999994944941335, iteration: 76232
loss: 1.0259610414505005,grad_norm: 0.8521750969443862, iteration: 76233
loss: 1.009362816810608,grad_norm: 0.8602748000730532, iteration: 76234
loss: 1.1035618782043457,grad_norm: 0.99999920425527, iteration: 76235
loss: 0.98353511095047,grad_norm: 0.9999992846488972, iteration: 76236
loss: 0.9755551218986511,grad_norm: 0.8445957962448305, iteration: 76237
loss: 1.0150516033172607,grad_norm: 0.944313306720556, iteration: 76238
loss: 0.9812518954277039,grad_norm: 0.9363412328744434, iteration: 76239
loss: 0.9852849841117859,grad_norm: 0.9999990556394807, iteration: 76240
loss: 1.0251094102859497,grad_norm: 0.98359209737539, iteration: 76241
loss: 1.0082234144210815,grad_norm: 0.9999998423034764, iteration: 76242
loss: 0.9954812526702881,grad_norm: 0.8229562294027348, iteration: 76243
loss: 1.0217499732971191,grad_norm: 0.9999991338170896, iteration: 76244
loss: 1.1353522539138794,grad_norm: 0.9999991468463695, iteration: 76245
loss: 1.062098741531372,grad_norm: 0.9999996251229826, iteration: 76246
loss: 1.0291351079940796,grad_norm: 0.9986535310720436, iteration: 76247
loss: 1.0762410163879395,grad_norm: 0.9999998965475045, iteration: 76248
loss: 1.0212554931640625,grad_norm: 0.9963099708834393, iteration: 76249
loss: 1.2559651136398315,grad_norm: 0.9999999200949572, iteration: 76250
loss: 1.0846190452575684,grad_norm: 0.9999994974007047, iteration: 76251
loss: 1.001193881034851,grad_norm: 0.9999991132017889, iteration: 76252
loss: 1.0863972902297974,grad_norm: 0.9999994282074873, iteration: 76253
loss: 1.0894719362258911,grad_norm: 0.9999998095072588, iteration: 76254
loss: 0.9831778407096863,grad_norm: 0.7725705461156344, iteration: 76255
loss: 1.0344079732894897,grad_norm: 0.8504423092788448, iteration: 76256
loss: 1.0324403047561646,grad_norm: 0.9999991801310703, iteration: 76257
loss: 1.1191697120666504,grad_norm: 0.9999992695538942, iteration: 76258
loss: 1.1613857746124268,grad_norm: 0.9999993378743542, iteration: 76259
loss: 1.2377575635910034,grad_norm: 0.9999997548441002, iteration: 76260
loss: 1.050930380821228,grad_norm: 0.9999999540537469, iteration: 76261
loss: 1.0066654682159424,grad_norm: 0.9999995656880879, iteration: 76262
loss: 0.9778492450714111,grad_norm: 0.9755314060366123, iteration: 76263
loss: 1.0227023363113403,grad_norm: 0.9837669265554005, iteration: 76264
loss: 1.0698614120483398,grad_norm: 0.9999991793773174, iteration: 76265
loss: 1.0181018114089966,grad_norm: 0.9147343638061682, iteration: 76266
loss: 1.0409557819366455,grad_norm: 0.9999997038029301, iteration: 76267
loss: 1.0430333614349365,grad_norm: 0.9999995941641493, iteration: 76268
loss: 1.0774664878845215,grad_norm: 0.9999993485748805, iteration: 76269
loss: 1.075156331062317,grad_norm: 0.9999992746294737, iteration: 76270
loss: 0.9644156098365784,grad_norm: 0.9570085834849271, iteration: 76271
loss: 1.0383604764938354,grad_norm: 0.9999990167104631, iteration: 76272
loss: 1.026277780532837,grad_norm: 0.9129134477504147, iteration: 76273
loss: 1.0052393674850464,grad_norm: 0.8723187790855867, iteration: 76274
loss: 1.0466080904006958,grad_norm: 0.9999991514465731, iteration: 76275
loss: 1.0405999422073364,grad_norm: 0.9999990368805005, iteration: 76276
loss: 0.9925217032432556,grad_norm: 0.9999990174020966, iteration: 76277
loss: 1.0632035732269287,grad_norm: 0.9999996343034963, iteration: 76278
loss: 1.017683982849121,grad_norm: 0.999999017892947, iteration: 76279
loss: 1.0148180723190308,grad_norm: 0.9999993931865176, iteration: 76280
loss: 1.007885456085205,grad_norm: 0.9999992462212908, iteration: 76281
loss: 1.013667106628418,grad_norm: 0.9613539620309607, iteration: 76282
loss: 1.0153812170028687,grad_norm: 0.9759938447954681, iteration: 76283
loss: 1.0172103643417358,grad_norm: 0.9999990541990996, iteration: 76284
loss: 0.9713279008865356,grad_norm: 0.8897893750800554, iteration: 76285
loss: 1.1025446653366089,grad_norm: 0.9999993064454402, iteration: 76286
loss: 1.0156502723693848,grad_norm: 0.9546605825747831, iteration: 76287
loss: 1.0221747159957886,grad_norm: 0.9999990774290328, iteration: 76288
loss: 0.9979164004325867,grad_norm: 0.9446385417154061, iteration: 76289
loss: 0.9804271459579468,grad_norm: 0.9999991118269488, iteration: 76290
loss: 0.9527246356010437,grad_norm: 0.9025111577336048, iteration: 76291
loss: 0.9948353171348572,grad_norm: 0.9873903975724294, iteration: 76292
loss: 1.0358152389526367,grad_norm: 0.9999994341021786, iteration: 76293
loss: 0.9846219420433044,grad_norm: 0.9999991966826595, iteration: 76294
loss: 0.967469334602356,grad_norm: 0.9999989680308202, iteration: 76295
loss: 1.0025652647018433,grad_norm: 0.9999991192365748, iteration: 76296
loss: 1.0120152235031128,grad_norm: 0.9999990401051345, iteration: 76297
loss: 0.9858807325363159,grad_norm: 0.889820700178667, iteration: 76298
loss: 1.0706590414047241,grad_norm: 0.9999990862402339, iteration: 76299
loss: 1.0346879959106445,grad_norm: 0.8999328796579424, iteration: 76300
loss: 0.9703106880187988,grad_norm: 0.8395918545300448, iteration: 76301
loss: 1.0179774761199951,grad_norm: 0.9999997992034005, iteration: 76302
loss: 1.0232526063919067,grad_norm: 0.999999011078398, iteration: 76303
loss: 1.0045617818832397,grad_norm: 0.8020720599787662, iteration: 76304
loss: 1.0056220293045044,grad_norm: 0.9999990829596208, iteration: 76305
loss: 1.0207282304763794,grad_norm: 0.8472134004112052, iteration: 76306
loss: 0.9719555377960205,grad_norm: 0.8268644942398339, iteration: 76307
loss: 1.2759689092636108,grad_norm: 0.9999999074262731, iteration: 76308
loss: 1.0244636535644531,grad_norm: 0.9999998724525991, iteration: 76309
loss: 0.9989714622497559,grad_norm: 0.9524114816147783, iteration: 76310
loss: 1.0664783716201782,grad_norm: 0.9999994234561934, iteration: 76311
loss: 1.024011254310608,grad_norm: 0.9999991064717282, iteration: 76312
loss: 1.045649528503418,grad_norm: 0.999999538846506, iteration: 76313
loss: 1.0344481468200684,grad_norm: 0.8990319501580611, iteration: 76314
loss: 1.0390993356704712,grad_norm: 0.9256006056900491, iteration: 76315
loss: 1.2453924417495728,grad_norm: 0.9999999220345006, iteration: 76316
loss: 0.9886050224304199,grad_norm: 0.9840985187175909, iteration: 76317
loss: 1.169000267982483,grad_norm: 0.9999998114860603, iteration: 76318
loss: 1.060117244720459,grad_norm: 0.9999993008129551, iteration: 76319
loss: 1.051275372505188,grad_norm: 0.9999999372478522, iteration: 76320
loss: 0.9966702461242676,grad_norm: 0.9246527909270883, iteration: 76321
loss: 1.22236168384552,grad_norm: 0.9999996933760151, iteration: 76322
loss: 1.3775185346603394,grad_norm: 0.9999996257227993, iteration: 76323
loss: 1.2859904766082764,grad_norm: 0.9999999150824843, iteration: 76324
loss: 1.1539846658706665,grad_norm: 0.9999997166428972, iteration: 76325
loss: 1.017039179801941,grad_norm: 0.9999997415653442, iteration: 76326
loss: 1.1583081483840942,grad_norm: 0.9999993664486101, iteration: 76327
loss: 1.346653938293457,grad_norm: 0.9999993608483203, iteration: 76328
loss: 1.0317128896713257,grad_norm: 0.9243269636284438, iteration: 76329
loss: 1.0485515594482422,grad_norm: 0.9999993946038995, iteration: 76330
loss: 1.1964499950408936,grad_norm: 0.9999995307219535, iteration: 76331
loss: 1.0570017099380493,grad_norm: 0.9999994670461316, iteration: 76332
loss: 0.9738078117370605,grad_norm: 0.8743347163746612, iteration: 76333
loss: 1.1126534938812256,grad_norm: 0.9999992623181366, iteration: 76334
loss: 1.0077358484268188,grad_norm: 0.943042516623557, iteration: 76335
loss: 1.1628669500350952,grad_norm: 0.9999992921134352, iteration: 76336
loss: 0.987617552280426,grad_norm: 0.9999998990486235, iteration: 76337
loss: 1.0011284351348877,grad_norm: 0.8957482309338176, iteration: 76338
loss: 1.234965205192566,grad_norm: 0.9999998964526197, iteration: 76339
loss: 1.047635793685913,grad_norm: 0.9999997147589025, iteration: 76340
loss: 1.0696728229522705,grad_norm: 0.9999997787834088, iteration: 76341
loss: 1.0324962139129639,grad_norm: 0.9460009792283605, iteration: 76342
loss: 1.0332107543945312,grad_norm: 0.9999993151449224, iteration: 76343
loss: 1.0799554586410522,grad_norm: 0.9999993601474144, iteration: 76344
loss: 1.1109169721603394,grad_norm: 0.9999993869394826, iteration: 76345
loss: 1.0037671327590942,grad_norm: 0.9358843998924775, iteration: 76346
loss: 1.0391324758529663,grad_norm: 0.9999993804903891, iteration: 76347
loss: 1.0496749877929688,grad_norm: 0.9999996384809592, iteration: 76348
loss: 1.0527429580688477,grad_norm: 0.9999990037749596, iteration: 76349
loss: 1.052146077156067,grad_norm: 0.9999992550743162, iteration: 76350
loss: 1.063362956047058,grad_norm: 0.9999991961928657, iteration: 76351
loss: 1.0210942029953003,grad_norm: 0.9999990968656542, iteration: 76352
loss: 1.0320554971694946,grad_norm: 0.9999991645492996, iteration: 76353
loss: 1.0084575414657593,grad_norm: 0.9999991937509467, iteration: 76354
loss: 1.0093172788619995,grad_norm: 0.8949848126293137, iteration: 76355
loss: 1.0276800394058228,grad_norm: 0.8746549850321529, iteration: 76356
loss: 0.9978671073913574,grad_norm: 0.9526132603838197, iteration: 76357
loss: 1.007568597793579,grad_norm: 0.9999990991537558, iteration: 76358
loss: 0.9841079115867615,grad_norm: 0.9999998807985996, iteration: 76359
loss: 1.024032711982727,grad_norm: 0.9999998364172854, iteration: 76360
loss: 1.0696361064910889,grad_norm: 0.9999990112841617, iteration: 76361
loss: 1.0700852870941162,grad_norm: 0.9999991530220977, iteration: 76362
loss: 1.081131935119629,grad_norm: 0.9999992568050828, iteration: 76363
loss: 1.0475555658340454,grad_norm: 0.999999444141182, iteration: 76364
loss: 1.0007697343826294,grad_norm: 0.9106569770388454, iteration: 76365
loss: 1.0281007289886475,grad_norm: 0.9480302242751696, iteration: 76366
loss: 0.9852145314216614,grad_norm: 0.7740222504510264, iteration: 76367
loss: 1.009750485420227,grad_norm: 0.9999994105873595, iteration: 76368
loss: 1.006806492805481,grad_norm: 0.9999993557357651, iteration: 76369
loss: 1.0365424156188965,grad_norm: 0.9999996352247609, iteration: 76370
loss: 1.0127782821655273,grad_norm: 0.9999991163873108, iteration: 76371
loss: 1.0728472471237183,grad_norm: 0.9999996665554391, iteration: 76372
loss: 0.991452693939209,grad_norm: 0.9999990235254916, iteration: 76373
loss: 1.0093023777008057,grad_norm: 0.9490429916812451, iteration: 76374
loss: 1.024315357208252,grad_norm: 0.9999990495326058, iteration: 76375
loss: 0.9752200245857239,grad_norm: 0.9999990657984257, iteration: 76376
loss: 1.0118675231933594,grad_norm: 0.8198350358347914, iteration: 76377
loss: 0.9745199680328369,grad_norm: 0.9999991230972487, iteration: 76378
loss: 1.0044217109680176,grad_norm: 0.8588636356216997, iteration: 76379
loss: 1.0252948999404907,grad_norm: 0.8602481364981387, iteration: 76380
loss: 1.0080609321594238,grad_norm: 0.8164129651335896, iteration: 76381
loss: 1.0595487356185913,grad_norm: 0.8327234948864439, iteration: 76382
loss: 0.971298098564148,grad_norm: 0.9681841685124345, iteration: 76383
loss: 1.0260670185089111,grad_norm: 0.9999991517229022, iteration: 76384
loss: 0.9997493624687195,grad_norm: 0.9999997938570455, iteration: 76385
loss: 1.0046818256378174,grad_norm: 0.9999996479582721, iteration: 76386
loss: 0.996676504611969,grad_norm: 0.9999991311393857, iteration: 76387
loss: 1.0045959949493408,grad_norm: 0.9918591706106895, iteration: 76388
loss: 1.0681082010269165,grad_norm: 0.9929918354099059, iteration: 76389
loss: 1.025192379951477,grad_norm: 0.9153422968166463, iteration: 76390
loss: 0.990167498588562,grad_norm: 0.999999025700125, iteration: 76391
loss: 1.0077943801879883,grad_norm: 0.8390375414207929, iteration: 76392
loss: 1.0229310989379883,grad_norm: 0.9880251656777328, iteration: 76393
loss: 0.9637670516967773,grad_norm: 0.8814842312881078, iteration: 76394
loss: 1.0017048120498657,grad_norm: 0.8816091809064662, iteration: 76395
loss: 1.0509282350540161,grad_norm: 0.9999989975754534, iteration: 76396
loss: 0.956240177154541,grad_norm: 0.9999991490668392, iteration: 76397
loss: 1.0213099718093872,grad_norm: 0.9999991780625396, iteration: 76398
loss: 1.0205820798873901,grad_norm: 0.9999994021060957, iteration: 76399
loss: 1.0051759481430054,grad_norm: 0.9999991664920825, iteration: 76400
loss: 0.9982628226280212,grad_norm: 0.8157202939629553, iteration: 76401
loss: 0.9895980358123779,grad_norm: 0.872595846235769, iteration: 76402
loss: 1.0768035650253296,grad_norm: 0.9282588301893216, iteration: 76403
loss: 0.9770079851150513,grad_norm: 0.9473415947616233, iteration: 76404
loss: 0.9891338348388672,grad_norm: 0.8900639066238732, iteration: 76405
loss: 1.0213261842727661,grad_norm: 0.8743673453238624, iteration: 76406
loss: 0.9854071736335754,grad_norm: 0.8548454973236774, iteration: 76407
loss: 0.9880263805389404,grad_norm: 0.9999991306427165, iteration: 76408
loss: 1.0078654289245605,grad_norm: 0.9338735811525815, iteration: 76409
loss: 0.9796229600906372,grad_norm: 0.8763523658623303, iteration: 76410
loss: 0.9793334007263184,grad_norm: 0.8548572216737311, iteration: 76411
loss: 1.018091082572937,grad_norm: 0.8823599517764528, iteration: 76412
loss: 0.9836483597755432,grad_norm: 0.9999994148918252, iteration: 76413
loss: 1.0069481134414673,grad_norm: 0.8920897830837948, iteration: 76414
loss: 1.015110731124878,grad_norm: 0.9472719086758875, iteration: 76415
loss: 1.0024702548980713,grad_norm: 0.9999996842225805, iteration: 76416
loss: 0.978082537651062,grad_norm: 0.9718059805437161, iteration: 76417
loss: 0.9919044971466064,grad_norm: 0.8843862442471229, iteration: 76418
loss: 0.9823452234268188,grad_norm: 0.9999991190491121, iteration: 76419
loss: 1.0492714643478394,grad_norm: 0.9999993625577827, iteration: 76420
loss: 0.9988664984703064,grad_norm: 0.9999991124283081, iteration: 76421
loss: 1.0087190866470337,grad_norm: 0.999999283088576, iteration: 76422
loss: 1.0101407766342163,grad_norm: 0.9758782826161808, iteration: 76423
loss: 0.9711014628410339,grad_norm: 0.9343658402531878, iteration: 76424
loss: 0.9973505139350891,grad_norm: 0.9027803899945317, iteration: 76425
loss: 0.9922590255737305,grad_norm: 0.9999996332554836, iteration: 76426
loss: 0.9954515099525452,grad_norm: 0.8338814628597465, iteration: 76427
loss: 1.0009307861328125,grad_norm: 0.9999994206568148, iteration: 76428
loss: 1.0383505821228027,grad_norm: 0.9999989752565421, iteration: 76429
loss: 1.0039418935775757,grad_norm: 0.986836585288243, iteration: 76430
loss: 1.023179531097412,grad_norm: 0.8646311270986232, iteration: 76431
loss: 0.9829013347625732,grad_norm: 0.8364648799786303, iteration: 76432
loss: 0.9987873435020447,grad_norm: 0.9999993309325721, iteration: 76433
loss: 0.9549306631088257,grad_norm: 0.9999991537280806, iteration: 76434
loss: 1.0053304433822632,grad_norm: 0.9989661864649465, iteration: 76435
loss: 0.969693660736084,grad_norm: 0.9929995136858332, iteration: 76436
loss: 0.9943745136260986,grad_norm: 0.9531369267804357, iteration: 76437
loss: 0.972199022769928,grad_norm: 0.9999991438399729, iteration: 76438
loss: 0.9976903796195984,grad_norm: 0.877004772346624, iteration: 76439
loss: 1.051560640335083,grad_norm: 0.9999998700075532, iteration: 76440
loss: 0.9646123051643372,grad_norm: 0.9112211626714086, iteration: 76441
loss: 0.9990052580833435,grad_norm: 0.9999990900064388, iteration: 76442
loss: 1.0196441411972046,grad_norm: 0.9999998403969494, iteration: 76443
loss: 0.9780292510986328,grad_norm: 0.999998970983324, iteration: 76444
loss: 0.9974720478057861,grad_norm: 0.9390530285292741, iteration: 76445
loss: 0.9900487065315247,grad_norm: 0.9999996882933314, iteration: 76446
loss: 1.0009503364562988,grad_norm: 0.8531097750358104, iteration: 76447
loss: 1.0170966386795044,grad_norm: 0.9999991664646475, iteration: 76448
loss: 1.0077794790267944,grad_norm: 0.9132562631213108, iteration: 76449
loss: 1.0149935483932495,grad_norm: 0.9999998345588188, iteration: 76450
loss: 1.0383858680725098,grad_norm: 0.999999267239477, iteration: 76451
loss: 1.0075689554214478,grad_norm: 0.7376994067715537, iteration: 76452
loss: 1.0298901796340942,grad_norm: 0.930834111791891, iteration: 76453
loss: 1.004456639289856,grad_norm: 0.7421136624787631, iteration: 76454
loss: 1.0114015340805054,grad_norm: 0.9063819900104155, iteration: 76455
loss: 0.9860143661499023,grad_norm: 0.8659649776708931, iteration: 76456
loss: 1.05463707447052,grad_norm: 0.8897245554413076, iteration: 76457
loss: 1.0310256481170654,grad_norm: 0.9999993429131129, iteration: 76458
loss: 0.9703018665313721,grad_norm: 0.9999992651434724, iteration: 76459
loss: 0.9784883260726929,grad_norm: 0.9551890343455113, iteration: 76460
loss: 1.2840569019317627,grad_norm: 0.9999992861063967, iteration: 76461
loss: 0.9822558164596558,grad_norm: 0.9068135170448731, iteration: 76462
loss: 0.987606942653656,grad_norm: 0.9999991101799393, iteration: 76463
loss: 0.997717022895813,grad_norm: 0.8531106665969073, iteration: 76464
loss: 1.061416506767273,grad_norm: 0.9999995122483357, iteration: 76465
loss: 1.0113987922668457,grad_norm: 0.9999992064185609, iteration: 76466
loss: 0.9990100860595703,grad_norm: 0.9999992776343889, iteration: 76467
loss: 1.0177913904190063,grad_norm: 0.8949062543541767, iteration: 76468
loss: 0.9890458583831787,grad_norm: 0.9530792769502788, iteration: 76469
loss: 1.0593807697296143,grad_norm: 0.8130918948347133, iteration: 76470
loss: 1.0309779644012451,grad_norm: 0.9999989990797037, iteration: 76471
loss: 1.091897964477539,grad_norm: 0.9999999280799858, iteration: 76472
loss: 1.0360827445983887,grad_norm: 0.8706298294417492, iteration: 76473
loss: 0.9987406730651855,grad_norm: 0.9999990862906901, iteration: 76474
loss: 1.022888422012329,grad_norm: 0.7676753067776741, iteration: 76475
loss: 1.002954363822937,grad_norm: 0.7945576430614414, iteration: 76476
loss: 1.0163289308547974,grad_norm: 0.9157381478794336, iteration: 76477
loss: 0.9916016459465027,grad_norm: 0.955093218246118, iteration: 76478
loss: 0.99551922082901,grad_norm: 0.9514331227411024, iteration: 76479
loss: 1.0231671333312988,grad_norm: 0.923630675738813, iteration: 76480
loss: 1.000812292098999,grad_norm: 0.9999991029354709, iteration: 76481
loss: 1.0071874856948853,grad_norm: 0.8863279317213848, iteration: 76482
loss: 0.9704927802085876,grad_norm: 0.9418577003741117, iteration: 76483
loss: 1.0648671388626099,grad_norm: 0.8937931935370318, iteration: 76484
loss: 0.9616060853004456,grad_norm: 0.9999990978876357, iteration: 76485
loss: 0.9940310120582581,grad_norm: 0.9602921753268746, iteration: 76486
loss: 0.9903541207313538,grad_norm: 0.8953702151637499, iteration: 76487
loss: 0.9874382019042969,grad_norm: 0.8578872041406009, iteration: 76488
loss: 1.0129566192626953,grad_norm: 0.9349257143303719, iteration: 76489
loss: 1.0170480012893677,grad_norm: 0.7869578732422287, iteration: 76490
loss: 1.0647761821746826,grad_norm: 0.9999997402248387, iteration: 76491
loss: 1.0272849798202515,grad_norm: 0.9469864598568348, iteration: 76492
loss: 0.985249936580658,grad_norm: 0.8142547291177165, iteration: 76493
loss: 0.9797274470329285,grad_norm: 0.9643879139397067, iteration: 76494
loss: 0.9982324242591858,grad_norm: 0.9999990711361083, iteration: 76495
loss: 1.1350464820861816,grad_norm: 0.9999994017451918, iteration: 76496
loss: 0.9526168704032898,grad_norm: 0.999998876078712, iteration: 76497
loss: 0.984636664390564,grad_norm: 0.911211364556532, iteration: 76498
loss: 1.0270178318023682,grad_norm: 0.9999997502928667, iteration: 76499
loss: 1.0592821836471558,grad_norm: 0.9953101190523325, iteration: 76500
loss: 0.9850925207138062,grad_norm: 0.9999990600618461, iteration: 76501
loss: 1.0062874555587769,grad_norm: 0.9047144503635273, iteration: 76502
loss: 1.0764459371566772,grad_norm: 0.9999997536164054, iteration: 76503
loss: 1.0291929244995117,grad_norm: 0.9999992022554139, iteration: 76504
loss: 1.0017361640930176,grad_norm: 0.9542855435234066, iteration: 76505
loss: 1.0601141452789307,grad_norm: 0.9999994403829956, iteration: 76506
loss: 0.9817811846733093,grad_norm: 0.782622843323278, iteration: 76507
loss: 0.9974831342697144,grad_norm: 0.9999991238374943, iteration: 76508
loss: 1.0236339569091797,grad_norm: 0.9997703081068591, iteration: 76509
loss: 0.9695608615875244,grad_norm: 0.9707413214345295, iteration: 76510
loss: 1.1163702011108398,grad_norm: 0.9999991911023725, iteration: 76511
loss: 0.9873411059379578,grad_norm: 0.9689511933801394, iteration: 76512
loss: 1.0328500270843506,grad_norm: 0.8656595058577028, iteration: 76513
loss: 1.0071743726730347,grad_norm: 0.9999997321498363, iteration: 76514
loss: 0.9992873072624207,grad_norm: 0.9270590431116068, iteration: 76515
loss: 0.9859400987625122,grad_norm: 0.9620921474824342, iteration: 76516
loss: 1.000535011291504,grad_norm: 0.9587419322532611, iteration: 76517
loss: 0.9812629818916321,grad_norm: 0.9695016634316427, iteration: 76518
loss: 1.0316643714904785,grad_norm: 0.9999991565332772, iteration: 76519
loss: 0.9751502275466919,grad_norm: 0.9460450295804339, iteration: 76520
loss: 1.0176444053649902,grad_norm: 0.8993713843580451, iteration: 76521
loss: 1.0210131406784058,grad_norm: 0.9892591631335519, iteration: 76522
loss: 0.9872806668281555,grad_norm: 0.9999990919449863, iteration: 76523
loss: 1.0039284229278564,grad_norm: 0.8555526368508266, iteration: 76524
loss: 0.9973050951957703,grad_norm: 0.9241820281027291, iteration: 76525
loss: 0.9915348887443542,grad_norm: 0.8778982735433706, iteration: 76526
loss: 1.030255675315857,grad_norm: 0.9216818512041429, iteration: 76527
loss: 0.980489194393158,grad_norm: 0.9567628415821373, iteration: 76528
loss: 0.9983425140380859,grad_norm: 0.9999993466517071, iteration: 76529
loss: 1.0573604106903076,grad_norm: 0.9999991134983152, iteration: 76530
loss: 0.9621894955635071,grad_norm: 0.8982769796592024, iteration: 76531
loss: 1.0334523916244507,grad_norm: 0.9999990062492823, iteration: 76532
loss: 1.06192946434021,grad_norm: 0.9999998400670644, iteration: 76533
loss: 0.9594675302505493,grad_norm: 0.999998984233142, iteration: 76534
loss: 0.9580987691879272,grad_norm: 0.884231149662087, iteration: 76535
loss: 0.9996620416641235,grad_norm: 0.8484999399000706, iteration: 76536
loss: 1.0198496580123901,grad_norm: 0.9161142718103445, iteration: 76537
loss: 1.0025408267974854,grad_norm: 0.9229400519888127, iteration: 76538
loss: 0.9978469014167786,grad_norm: 0.8899090271228204, iteration: 76539
loss: 1.052419900894165,grad_norm: 0.9999990886559436, iteration: 76540
loss: 0.9734379649162292,grad_norm: 0.9999992103345583, iteration: 76541
loss: 0.9789146184921265,grad_norm: 0.9829001496615911, iteration: 76542
loss: 1.0309679508209229,grad_norm: 0.999999289395366, iteration: 76543
loss: 0.9923103451728821,grad_norm: 0.8025950190555962, iteration: 76544
loss: 1.0187673568725586,grad_norm: 0.9999991638173833, iteration: 76545
loss: 1.0004830360412598,grad_norm: 0.9264481740482627, iteration: 76546
loss: 1.1401419639587402,grad_norm: 0.9999998054554148, iteration: 76547
loss: 0.9882862567901611,grad_norm: 0.9486761244847812, iteration: 76548
loss: 1.0140997171401978,grad_norm: 0.9999990390294228, iteration: 76549
loss: 1.0144844055175781,grad_norm: 0.9999997973273748, iteration: 76550
loss: 1.0685206651687622,grad_norm: 0.8733932660280463, iteration: 76551
loss: 0.9987903237342834,grad_norm: 0.9999989039645526, iteration: 76552
loss: 1.0167970657348633,grad_norm: 0.7731077520451627, iteration: 76553
loss: 1.0007624626159668,grad_norm: 0.9999993252651526, iteration: 76554
loss: 0.9845461249351501,grad_norm: 0.8457131823340274, iteration: 76555
loss: 0.9786683917045593,grad_norm: 0.890111421496745, iteration: 76556
loss: 1.0014700889587402,grad_norm: 0.9045666701041494, iteration: 76557
loss: 1.0409239530563354,grad_norm: 0.9405780093248833, iteration: 76558
loss: 0.9996502995491028,grad_norm: 0.9999990368246447, iteration: 76559
loss: 1.007103443145752,grad_norm: 0.9999989507855986, iteration: 76560
loss: 1.0205432176589966,grad_norm: 0.9227512271121132, iteration: 76561
loss: 0.9800430536270142,grad_norm: 0.9999990150369837, iteration: 76562
loss: 1.0756582021713257,grad_norm: 0.9999997381530397, iteration: 76563
loss: 0.9963439702987671,grad_norm: 0.9369160535887957, iteration: 76564
loss: 0.9767570495605469,grad_norm: 0.8354728994133706, iteration: 76565
loss: 0.9932762980461121,grad_norm: 0.9559745451978768, iteration: 76566
loss: 1.0065762996673584,grad_norm: 0.9999992323281316, iteration: 76567
loss: 0.9618701934814453,grad_norm: 0.9999991760420931, iteration: 76568
loss: 0.9961687922477722,grad_norm: 0.9999991043745652, iteration: 76569
loss: 0.9917181134223938,grad_norm: 0.9227557519048882, iteration: 76570
loss: 1.0212124586105347,grad_norm: 0.90720190282218, iteration: 76571
loss: 0.9750984311103821,grad_norm: 0.9999989502865286, iteration: 76572
loss: 1.0034408569335938,grad_norm: 0.9952463702675219, iteration: 76573
loss: 0.9854560494422913,grad_norm: 0.9909767159046776, iteration: 76574
loss: 1.0680195093154907,grad_norm: 0.868023193315324, iteration: 76575
loss: 0.9976881742477417,grad_norm: 0.861967703344056, iteration: 76576
loss: 1.0164059400558472,grad_norm: 0.9999994404627376, iteration: 76577
loss: 1.0854853391647339,grad_norm: 0.9999997117632428, iteration: 76578
loss: 0.9894609451293945,grad_norm: 0.8117845568333006, iteration: 76579
loss: 1.487103819847107,grad_norm: 0.9999994047648205, iteration: 76580
loss: 0.9451548457145691,grad_norm: 0.8915767156696, iteration: 76581
loss: 0.9958570599555969,grad_norm: 0.999999169292294, iteration: 76582
loss: 0.9465145468711853,grad_norm: 0.9851012421439002, iteration: 76583
loss: 0.9994137287139893,grad_norm: 0.9039955214067296, iteration: 76584
loss: 0.9860435724258423,grad_norm: 0.747296456412144, iteration: 76585
loss: 1.2813608646392822,grad_norm: 0.9999998811371926, iteration: 76586
loss: 1.0753229856491089,grad_norm: 0.9999999258598897, iteration: 76587
loss: 1.136578917503357,grad_norm: 0.9999996555415599, iteration: 76588
loss: 1.0357253551483154,grad_norm: 0.8432739240047603, iteration: 76589
loss: 1.191436767578125,grad_norm: 0.99999955826863, iteration: 76590
loss: 1.1469621658325195,grad_norm: 0.9999992339965268, iteration: 76591
loss: 1.0659722089767456,grad_norm: 0.9999989828387683, iteration: 76592
loss: 1.0261527299880981,grad_norm: 0.9106848658519354, iteration: 76593
loss: 1.0836987495422363,grad_norm: 0.9052609832372158, iteration: 76594
loss: 1.1100128889083862,grad_norm: 0.9999993856404453, iteration: 76595
loss: 1.6182852983474731,grad_norm: 0.9999997471348295, iteration: 76596
loss: 1.111051082611084,grad_norm: 0.9999993783143294, iteration: 76597
loss: 1.7649215459823608,grad_norm: 0.9999998600070206, iteration: 76598
loss: 1.3011181354522705,grad_norm: 0.9999995876849459, iteration: 76599
loss: 1.2957117557525635,grad_norm: 0.9999997043077865, iteration: 76600
loss: 1.1196907758712769,grad_norm: 0.99999952302128, iteration: 76601
loss: 1.2330061197280884,grad_norm: 0.9999997516481183, iteration: 76602
loss: 1.1230777502059937,grad_norm: 0.9999997500485848, iteration: 76603
loss: 1.1528141498565674,grad_norm: 0.9999999417451646, iteration: 76604
loss: 1.0783321857452393,grad_norm: 0.9999992482052138, iteration: 76605
loss: 1.2944467067718506,grad_norm: 0.9999996125736041, iteration: 76606
loss: 1.054902195930481,grad_norm: 0.8623595659667447, iteration: 76607
loss: 1.042181372642517,grad_norm: 0.9999993115508162, iteration: 76608
loss: 1.2377084493637085,grad_norm: 0.9999995390801971, iteration: 76609
loss: 1.0432974100112915,grad_norm: 0.8956371895310093, iteration: 76610
loss: 1.2286196947097778,grad_norm: 0.9999996673999497, iteration: 76611
loss: 1.0447415113449097,grad_norm: 0.9999993661377349, iteration: 76612
loss: 1.0422242879867554,grad_norm: 0.9999997430168589, iteration: 76613
loss: 1.216591238975525,grad_norm: 0.9999995335451578, iteration: 76614
loss: 1.0934240818023682,grad_norm: 0.9999995035450391, iteration: 76615
loss: 1.0405020713806152,grad_norm: 0.9999999158848002, iteration: 76616
loss: 1.0036015510559082,grad_norm: 0.9506220562148237, iteration: 76617
loss: 1.110272765159607,grad_norm: 0.9999994504665723, iteration: 76618
loss: 1.0698243379592896,grad_norm: 0.9999993401765794, iteration: 76619
loss: 1.2517496347427368,grad_norm: 0.999999867210592, iteration: 76620
loss: 0.9737446904182434,grad_norm: 0.8586440965554303, iteration: 76621
loss: 0.9838638305664062,grad_norm: 0.9999991066799236, iteration: 76622
loss: 0.9679164290428162,grad_norm: 0.9999992136093209, iteration: 76623
loss: 1.022161841392517,grad_norm: 0.9812638409147824, iteration: 76624
loss: 1.0289385318756104,grad_norm: 0.999999229586244, iteration: 76625
loss: 1.0305590629577637,grad_norm: 0.8130211151976914, iteration: 76626
loss: 1.0367847681045532,grad_norm: 0.9875050401778186, iteration: 76627
loss: 0.9959285259246826,grad_norm: 0.9999995930714177, iteration: 76628
loss: 0.9885092377662659,grad_norm: 0.907464168594471, iteration: 76629
loss: 0.9753190875053406,grad_norm: 0.9937971495816297, iteration: 76630
loss: 1.023334264755249,grad_norm: 0.9999993052810182, iteration: 76631
loss: 1.0012937784194946,grad_norm: 0.9999990577756523, iteration: 76632
loss: 1.0543620586395264,grad_norm: 0.9768629535896, iteration: 76633
loss: 1.0750192403793335,grad_norm: 0.9999999792594244, iteration: 76634
loss: 0.9576792120933533,grad_norm: 0.8870494490019242, iteration: 76635
loss: 1.0014541149139404,grad_norm: 0.8708051261558815, iteration: 76636
loss: 1.023008108139038,grad_norm: 0.9999992336380277, iteration: 76637
loss: 1.01595938205719,grad_norm: 0.9999995364425434, iteration: 76638
loss: 1.0136816501617432,grad_norm: 0.7982756961225291, iteration: 76639
loss: 0.9894592761993408,grad_norm: 0.9342568281022047, iteration: 76640
loss: 1.0219757556915283,grad_norm: 0.7400203323810152, iteration: 76641
loss: 1.0069127082824707,grad_norm: 0.9999992841797017, iteration: 76642
loss: 0.9961584806442261,grad_norm: 0.9999990733090147, iteration: 76643
loss: 1.022625207901001,grad_norm: 0.9999991505621142, iteration: 76644
loss: 1.0051913261413574,grad_norm: 0.9791447158676337, iteration: 76645
loss: 1.0637004375457764,grad_norm: 0.9999994401150856, iteration: 76646
loss: 1.028372883796692,grad_norm: 0.9522788051167468, iteration: 76647
loss: 1.0387918949127197,grad_norm: 0.9999996524969941, iteration: 76648
loss: 1.074155330657959,grad_norm: 0.9999998473922547, iteration: 76649
loss: 1.015979290008545,grad_norm: 0.9966812330494013, iteration: 76650
loss: 0.9834535121917725,grad_norm: 0.9999991747510807, iteration: 76651
loss: 0.9991013407707214,grad_norm: 0.9999990142222746, iteration: 76652
loss: 1.0137430429458618,grad_norm: 0.9999990936167904, iteration: 76653
loss: 1.0341538190841675,grad_norm: 0.9999997742689989, iteration: 76654
loss: 0.9851961731910706,grad_norm: 0.9999990852492083, iteration: 76655
loss: 1.0204459428787231,grad_norm: 0.9999990108712853, iteration: 76656
loss: 0.9951409101486206,grad_norm: 0.9999991905677064, iteration: 76657
loss: 1.0260353088378906,grad_norm: 0.9999989772303834, iteration: 76658
loss: 1.0013192892074585,grad_norm: 0.894316152150892, iteration: 76659
loss: 1.0144559144973755,grad_norm: 0.9706622273596125, iteration: 76660
loss: 1.180884838104248,grad_norm: 0.9999997481139298, iteration: 76661
loss: 1.0199693441390991,grad_norm: 0.9999999042375941, iteration: 76662
loss: 1.058735966682434,grad_norm: 0.9999997269796778, iteration: 76663
loss: 1.004926323890686,grad_norm: 0.9611264671648664, iteration: 76664
loss: 1.0161654949188232,grad_norm: 0.9796590415423205, iteration: 76665
loss: 1.023964762687683,grad_norm: 0.999999275242257, iteration: 76666
loss: 1.006622314453125,grad_norm: 0.999999186435136, iteration: 76667
loss: 1.0202494859695435,grad_norm: 0.9999998465043186, iteration: 76668
loss: 0.9558781981468201,grad_norm: 0.8922842830350586, iteration: 76669
loss: 1.1199097633361816,grad_norm: 0.9999991695661609, iteration: 76670
loss: 0.9542781710624695,grad_norm: 0.976313148124099, iteration: 76671
loss: 0.9941838383674622,grad_norm: 0.9999991468442179, iteration: 76672
loss: 1.0558573007583618,grad_norm: 0.9999999661552049, iteration: 76673
loss: 1.0571738481521606,grad_norm: 0.999999680131351, iteration: 76674
loss: 1.0024482011795044,grad_norm: 0.8997117653878444, iteration: 76675
loss: 1.0060710906982422,grad_norm: 0.9999990647039402, iteration: 76676
loss: 0.9875116348266602,grad_norm: 0.9218535305313538, iteration: 76677
loss: 1.042792558670044,grad_norm: 0.9999996203868629, iteration: 76678
loss: 1.0285165309906006,grad_norm: 0.9999992651039734, iteration: 76679
loss: 0.9710562825202942,grad_norm: 0.9754721028030301, iteration: 76680
loss: 1.0306620597839355,grad_norm: 0.9725718042924041, iteration: 76681
loss: 1.0256632566452026,grad_norm: 0.9999997341446479, iteration: 76682
loss: 1.0658615827560425,grad_norm: 0.9999995288164042, iteration: 76683
loss: 1.0550668239593506,grad_norm: 0.9999998026957819, iteration: 76684
loss: 0.9534866213798523,grad_norm: 0.8168028059749386, iteration: 76685
loss: 0.97177654504776,grad_norm: 0.9999994428693024, iteration: 76686
loss: 0.9744570255279541,grad_norm: 0.9562306032213866, iteration: 76687
loss: 0.9904025793075562,grad_norm: 0.9999990177725924, iteration: 76688
loss: 1.0390077829360962,grad_norm: 0.999999248430303, iteration: 76689
loss: 0.98398756980896,grad_norm: 0.8788459473570307, iteration: 76690
loss: 0.9693202376365662,grad_norm: 0.8041705512936215, iteration: 76691
loss: 1.043051838874817,grad_norm: 0.8526497567671153, iteration: 76692
loss: 0.9973150491714478,grad_norm: 0.9886982249041412, iteration: 76693
loss: 1.016068935394287,grad_norm: 0.9999992358586436, iteration: 76694
loss: 1.026483178138733,grad_norm: 0.8635750826977647, iteration: 76695
loss: 1.041995882987976,grad_norm: 0.913587036185506, iteration: 76696
loss: 1.0026741027832031,grad_norm: 0.8627539531738481, iteration: 76697
loss: 1.0032334327697754,grad_norm: 0.9999991181640873, iteration: 76698
loss: 1.0247477293014526,grad_norm: 0.9796499458471623, iteration: 76699
loss: 1.1204830408096313,grad_norm: 0.9999997515881022, iteration: 76700
loss: 0.9847907423973083,grad_norm: 0.8906471911467451, iteration: 76701
loss: 1.034091591835022,grad_norm: 0.9274410138851614, iteration: 76702
loss: 1.0766047239303589,grad_norm: 0.9999990889254774, iteration: 76703
loss: 1.0302293300628662,grad_norm: 0.9999991839896742, iteration: 76704
loss: 1.0179705619812012,grad_norm: 1.000000017762594, iteration: 76705
loss: 0.9977102875709534,grad_norm: 0.9641601013081927, iteration: 76706
loss: 1.0440608263015747,grad_norm: 0.8497254084068545, iteration: 76707
loss: 0.9886900186538696,grad_norm: 0.9999990732741649, iteration: 76708
loss: 0.9922904968261719,grad_norm: 0.9798343427592712, iteration: 76709
loss: 0.9559077620506287,grad_norm: 0.9999991077399689, iteration: 76710
loss: 1.0178722143173218,grad_norm: 0.999999922730675, iteration: 76711
loss: 1.0273364782333374,grad_norm: 0.9932191821098626, iteration: 76712
loss: 1.0226168632507324,grad_norm: 0.9999996375923567, iteration: 76713
loss: 1.0253421068191528,grad_norm: 0.9999998424608855, iteration: 76714
loss: 0.9911983609199524,grad_norm: 0.9969369443333549, iteration: 76715
loss: 1.0160269737243652,grad_norm: 0.9999993017994491, iteration: 76716
loss: 1.031895399093628,grad_norm: 0.8555133913807244, iteration: 76717
loss: 1.0169323682785034,grad_norm: 0.9999990889348654, iteration: 76718
loss: 1.0359050035476685,grad_norm: 0.9999991644569929, iteration: 76719
loss: 0.9627701640129089,grad_norm: 0.9999991084679959, iteration: 76720
loss: 1.0093438625335693,grad_norm: 0.9999993812628408, iteration: 76721
loss: 1.0330629348754883,grad_norm: 0.9015877727027372, iteration: 76722
loss: 1.01131272315979,grad_norm: 0.9999993900261779, iteration: 76723
loss: 1.0030620098114014,grad_norm: 0.9999989686148137, iteration: 76724
loss: 0.9873285889625549,grad_norm: 0.9999992283933361, iteration: 76725
loss: 1.0536330938339233,grad_norm: 0.9999991416764574, iteration: 76726
loss: 1.0029411315917969,grad_norm: 0.9999991591628378, iteration: 76727
loss: 0.9974965453147888,grad_norm: 0.8556467336374389, iteration: 76728
loss: 1.0019943714141846,grad_norm: 0.9043004891740408, iteration: 76729
loss: 0.9920778870582581,grad_norm: 0.8844600355166486, iteration: 76730
loss: 1.0289483070373535,grad_norm: 0.9999990701585141, iteration: 76731
loss: 1.0208228826522827,grad_norm: 0.999999172202875, iteration: 76732
loss: 1.0288954973220825,grad_norm: 0.9636464654333633, iteration: 76733
loss: 0.9961857795715332,grad_norm: 0.9106952775517145, iteration: 76734
loss: 0.9845483899116516,grad_norm: 0.8636851543931915, iteration: 76735
loss: 1.0127713680267334,grad_norm: 0.9376513467964781, iteration: 76736
loss: 1.0415589809417725,grad_norm: 0.9999996466373982, iteration: 76737
loss: 1.0093621015548706,grad_norm: 0.9999991729847234, iteration: 76738
loss: 0.9880015850067139,grad_norm: 0.9999989444167638, iteration: 76739
loss: 1.0098540782928467,grad_norm: 0.9353157957662592, iteration: 76740
loss: 1.1801955699920654,grad_norm: 0.9999996262769207, iteration: 76741
loss: 1.0482782125473022,grad_norm: 0.9999994313876692, iteration: 76742
loss: 1.029764175415039,grad_norm: 0.9999989507933484, iteration: 76743
loss: 0.999090313911438,grad_norm: 0.8829651934598823, iteration: 76744
loss: 1.0137194395065308,grad_norm: 0.9431079947512618, iteration: 76745
loss: 0.9944555759429932,grad_norm: 0.9922684155081744, iteration: 76746
loss: 1.024754524230957,grad_norm: 0.8513101012372366, iteration: 76747
loss: 1.0197710990905762,grad_norm: 0.9999993618776268, iteration: 76748
loss: 1.0310970544815063,grad_norm: 0.9999991132093716, iteration: 76749
loss: 0.9889073371887207,grad_norm: 0.8561796888290538, iteration: 76750
loss: 0.9908514022827148,grad_norm: 0.9197503924857141, iteration: 76751
loss: 0.9799221158027649,grad_norm: 0.9870582411357256, iteration: 76752
loss: 1.0181306600570679,grad_norm: 0.9999996346144588, iteration: 76753
loss: 0.9880410432815552,grad_norm: 0.9218799901765558, iteration: 76754
loss: 1.0905356407165527,grad_norm: 0.9999990452288917, iteration: 76755
loss: 1.0010063648223877,grad_norm: 0.9188778471833541, iteration: 76756
loss: 0.9944866299629211,grad_norm: 0.9143164799017894, iteration: 76757
loss: 1.0263668298721313,grad_norm: 0.8962970573766846, iteration: 76758
loss: 1.0144954919815063,grad_norm: 0.9999993583342915, iteration: 76759
loss: 1.0740801095962524,grad_norm: 0.9999999093487261, iteration: 76760
loss: 1.0023818016052246,grad_norm: 0.9773243698089109, iteration: 76761
loss: 0.9800674915313721,grad_norm: 0.9971193269456412, iteration: 76762
loss: 0.9825634956359863,grad_norm: 0.8987119978445882, iteration: 76763
loss: 1.045478105545044,grad_norm: 0.9531501369723338, iteration: 76764
loss: 1.0464344024658203,grad_norm: 0.9999993040661445, iteration: 76765
loss: 0.9562622308731079,grad_norm: 0.8770766775039586, iteration: 76766
loss: 0.9869363903999329,grad_norm: 0.8391233734277393, iteration: 76767
loss: 1.0037448406219482,grad_norm: 0.9999998826400067, iteration: 76768
loss: 1.0003314018249512,grad_norm: 0.9046844439063573, iteration: 76769
loss: 0.9711910486221313,grad_norm: 0.7767290880779437, iteration: 76770
loss: 0.9839805960655212,grad_norm: 0.9999991093040619, iteration: 76771
loss: 1.0458433628082275,grad_norm: 0.8273723804034192, iteration: 76772
loss: 1.0241096019744873,grad_norm: 0.7933833483900258, iteration: 76773
loss: 0.9622442722320557,grad_norm: 0.8976271570398479, iteration: 76774
loss: 1.2779134511947632,grad_norm: 0.9999996580603624, iteration: 76775
loss: 0.9999790787696838,grad_norm: 0.985641331487045, iteration: 76776
loss: 1.0052858591079712,grad_norm: 0.8089971904988525, iteration: 76777
loss: 1.0052506923675537,grad_norm: 0.9999997257623031, iteration: 76778
loss: 1.0126019716262817,grad_norm: 0.9303565511237928, iteration: 76779
loss: 1.0097695589065552,grad_norm: 0.9999991965118473, iteration: 76780
loss: 0.9644004702568054,grad_norm: 0.8873922584286705, iteration: 76781
loss: 1.0543228387832642,grad_norm: 0.999999072384743, iteration: 76782
loss: 0.9655012488365173,grad_norm: 0.972848450639486, iteration: 76783
loss: 1.0459074974060059,grad_norm: 0.9999996717505548, iteration: 76784
loss: 0.9961585402488708,grad_norm: 0.8788221929172726, iteration: 76785
loss: 0.9894264340400696,grad_norm: 0.9121512995127241, iteration: 76786
loss: 1.0305674076080322,grad_norm: 0.9999991704101673, iteration: 76787
loss: 1.0126885175704956,grad_norm: 0.8907190878261161, iteration: 76788
loss: 0.9820460677146912,grad_norm: 0.9999992398209081, iteration: 76789
loss: 1.0088921785354614,grad_norm: 0.8821991402758331, iteration: 76790
loss: 0.9588261246681213,grad_norm: 0.9999996020545008, iteration: 76791
loss: 0.9722293615341187,grad_norm: 0.9999989686658298, iteration: 76792
loss: 0.9618323445320129,grad_norm: 0.8522146499854047, iteration: 76793
loss: 1.0858179330825806,grad_norm: 0.9999995555932263, iteration: 76794
loss: 0.982359766960144,grad_norm: 0.9999989846311963, iteration: 76795
loss: 1.0356615781784058,grad_norm: 0.9999990706893896, iteration: 76796
loss: 0.9640352129936218,grad_norm: 0.8633617778317838, iteration: 76797
loss: 0.9769513010978699,grad_norm: 0.8693780706608039, iteration: 76798
loss: 1.0305101871490479,grad_norm: 0.9999996813369546, iteration: 76799
loss: 1.000251293182373,grad_norm: 0.9277324899213173, iteration: 76800
loss: 1.0008305311203003,grad_norm: 0.9399364787494682, iteration: 76801
loss: 0.9446266889572144,grad_norm: 0.7972024896086117, iteration: 76802
loss: 1.0325088500976562,grad_norm: 0.9999989883195445, iteration: 76803
loss: 0.9576489925384521,grad_norm: 0.9896058490209046, iteration: 76804
loss: 0.9820160865783691,grad_norm: 0.9485552937987307, iteration: 76805
loss: 1.000933051109314,grad_norm: 0.9999991884927513, iteration: 76806
loss: 1.0216799974441528,grad_norm: 0.9999994692907233, iteration: 76807
loss: 1.0059884786605835,grad_norm: 0.9999996379767415, iteration: 76808
loss: 1.0335654020309448,grad_norm: 0.9404878888342301, iteration: 76809
loss: 1.0214097499847412,grad_norm: 0.9999997572935981, iteration: 76810
loss: 0.9708775877952576,grad_norm: 0.9430201913515204, iteration: 76811
loss: 1.0019094944000244,grad_norm: 0.9999993262251696, iteration: 76812
loss: 1.0876374244689941,grad_norm: 0.9999994974658575, iteration: 76813
loss: 1.0199445486068726,grad_norm: 0.9999991484870666, iteration: 76814
loss: 1.0433013439178467,grad_norm: 0.9999991133580055, iteration: 76815
loss: 0.9789339303970337,grad_norm: 0.8669461888116541, iteration: 76816
loss: 0.9902756214141846,grad_norm: 0.9090363142784906, iteration: 76817
loss: 0.9707077741622925,grad_norm: 0.9774120666749918, iteration: 76818
loss: 1.0114690065383911,grad_norm: 0.780911358510154, iteration: 76819
loss: 0.988713800907135,grad_norm: 0.9827709705727193, iteration: 76820
loss: 1.0421797037124634,grad_norm: 0.945639740668563, iteration: 76821
loss: 0.9964654445648193,grad_norm: 0.9194105496560318, iteration: 76822
loss: 0.9960042834281921,grad_norm: 0.8665656604863079, iteration: 76823
loss: 1.0001702308654785,grad_norm: 0.9999991149315741, iteration: 76824
loss: 1.040786623954773,grad_norm: 0.8800217848019513, iteration: 76825
loss: 1.0254348516464233,grad_norm: 0.9999992119423483, iteration: 76826
loss: 0.994524359703064,grad_norm: 0.9691621327170447, iteration: 76827
loss: 1.0035113096237183,grad_norm: 0.9999990892404629, iteration: 76828
loss: 0.9647272229194641,grad_norm: 0.9999990382826173, iteration: 76829
loss: 1.0245922803878784,grad_norm: 0.806658873064151, iteration: 76830
loss: 1.0165836811065674,grad_norm: 0.8814685606796828, iteration: 76831
loss: 0.9633082747459412,grad_norm: 0.9999991147638171, iteration: 76832
loss: 1.0169025659561157,grad_norm: 0.8859985870527213, iteration: 76833
loss: 0.9731065034866333,grad_norm: 0.9076960615741542, iteration: 76834
loss: 1.0242260694503784,grad_norm: 0.9999990229595167, iteration: 76835
loss: 0.9912778735160828,grad_norm: 0.9999990887990768, iteration: 76836
loss: 1.0204287767410278,grad_norm: 0.9999998587093865, iteration: 76837
loss: 1.003100037574768,grad_norm: 0.9999997182913096, iteration: 76838
loss: 0.9649538993835449,grad_norm: 0.8909549642718989, iteration: 76839
loss: 1.0123313665390015,grad_norm: 0.9999990445402801, iteration: 76840
loss: 0.9972729086875916,grad_norm: 0.9999989605944921, iteration: 76841
loss: 0.9941619038581848,grad_norm: 0.9046381744057352, iteration: 76842
loss: 0.9689754843711853,grad_norm: 0.8628861016866359, iteration: 76843
loss: 1.0222551822662354,grad_norm: 0.8158384841388433, iteration: 76844
loss: 0.9837229251861572,grad_norm: 0.9999991132176482, iteration: 76845
loss: 1.004315733909607,grad_norm: 0.9538634796595677, iteration: 76846
loss: 0.9986835718154907,grad_norm: 0.9469680464072443, iteration: 76847
loss: 0.975426435470581,grad_norm: 0.78732169459893, iteration: 76848
loss: 1.009259819984436,grad_norm: 0.9999991254164367, iteration: 76849
loss: 0.9763838052749634,grad_norm: 0.9260825951422924, iteration: 76850
loss: 1.0048644542694092,grad_norm: 0.9216456639735738, iteration: 76851
loss: 1.0756081342697144,grad_norm: 0.9999992898864425, iteration: 76852
loss: 1.0045486688613892,grad_norm: 0.9999991593467448, iteration: 76853
loss: 0.9901721477508545,grad_norm: 0.8995962671139252, iteration: 76854
loss: 1.0009099245071411,grad_norm: 0.8145645067239825, iteration: 76855
loss: 1.0352474451065063,grad_norm: 0.958703063872649, iteration: 76856
loss: 1.004935383796692,grad_norm: 0.9740140275120133, iteration: 76857
loss: 0.9859732389450073,grad_norm: 0.8509224322278446, iteration: 76858
loss: 0.9943712949752808,grad_norm: 0.9999992449424647, iteration: 76859
loss: 0.9994683265686035,grad_norm: 0.9079385195184301, iteration: 76860
loss: 1.006393313407898,grad_norm: 0.9588217216983405, iteration: 76861
loss: 1.0106672048568726,grad_norm: 0.9999991014703252, iteration: 76862
loss: 1.0098026990890503,grad_norm: 0.9999993170420532, iteration: 76863
loss: 0.9695212244987488,grad_norm: 0.8912597777923891, iteration: 76864
loss: 0.999549925327301,grad_norm: 0.9999990874801592, iteration: 76865
loss: 1.0022435188293457,grad_norm: 0.9681467437968627, iteration: 76866
loss: 1.112770438194275,grad_norm: 0.9999992520414361, iteration: 76867
loss: 1.033747911453247,grad_norm: 0.8966531205581624, iteration: 76868
loss: 1.0348381996154785,grad_norm: 0.9999993883726817, iteration: 76869
loss: 1.0148862600326538,grad_norm: 0.9097414712989503, iteration: 76870
loss: 1.0073730945587158,grad_norm: 0.9321484755870345, iteration: 76871
loss: 1.0656437873840332,grad_norm: 0.9999991545937162, iteration: 76872
loss: 0.9730690717697144,grad_norm: 0.9999993045603572, iteration: 76873
loss: 1.016228199005127,grad_norm: 0.9999991998092749, iteration: 76874
loss: 0.9644157290458679,grad_norm: 0.9999993369413914, iteration: 76875
loss: 1.0345284938812256,grad_norm: 0.9999990081130574, iteration: 76876
loss: 0.9971157312393188,grad_norm: 0.828726756580642, iteration: 76877
loss: 1.0105040073394775,grad_norm: 0.8836043345016334, iteration: 76878
loss: 1.0120768547058105,grad_norm: 0.8526500158604998, iteration: 76879
loss: 1.0268220901489258,grad_norm: 0.9875334637949156, iteration: 76880
loss: 0.9879538416862488,grad_norm: 0.9708143792149139, iteration: 76881
loss: 0.981523871421814,grad_norm: 0.9999991782433673, iteration: 76882
loss: 1.0367907285690308,grad_norm: 0.8686704177940809, iteration: 76883
loss: 1.026286244392395,grad_norm: 0.9063466258058758, iteration: 76884
loss: 0.9964118599891663,grad_norm: 0.9974438903406299, iteration: 76885
loss: 0.9760801196098328,grad_norm: 0.8546310388034738, iteration: 76886
loss: 1.0244665145874023,grad_norm: 0.9999991838873907, iteration: 76887
loss: 1.0256966352462769,grad_norm: 0.9998551450724574, iteration: 76888
loss: 1.1863338947296143,grad_norm: 0.9999992590425568, iteration: 76889
loss: 0.994464099407196,grad_norm: 0.9999991088885889, iteration: 76890
loss: 0.9840395450592041,grad_norm: 0.9968354457011982, iteration: 76891
loss: 0.9810256958007812,grad_norm: 0.9999991598450819, iteration: 76892
loss: 1.0195232629776,grad_norm: 0.9293796585850536, iteration: 76893
loss: 0.9878106713294983,grad_norm: 0.7593173010188065, iteration: 76894
loss: 1.0732990503311157,grad_norm: 0.9999998417821592, iteration: 76895
loss: 0.9715365767478943,grad_norm: 0.9999990604738827, iteration: 76896
loss: 1.026953101158142,grad_norm: 0.9193987070391756, iteration: 76897
loss: 1.0165598392486572,grad_norm: 0.7188712094152065, iteration: 76898
loss: 0.9949747323989868,grad_norm: 0.9999991454384457, iteration: 76899
loss: 0.9812138080596924,grad_norm: 0.7991316640310518, iteration: 76900
loss: 0.9728478789329529,grad_norm: 0.8242230477856554, iteration: 76901
loss: 1.036004662513733,grad_norm: 0.999999117892003, iteration: 76902
loss: 1.0070863962173462,grad_norm: 0.9506403769798012, iteration: 76903
loss: 1.0207757949829102,grad_norm: 0.902997260287553, iteration: 76904
loss: 1.0229729413986206,grad_norm: 0.9387377956976326, iteration: 76905
loss: 1.0995457172393799,grad_norm: 0.9342258235691787, iteration: 76906
loss: 1.0093729496002197,grad_norm: 0.9381830298863371, iteration: 76907
loss: 1.0566015243530273,grad_norm: 0.9562790173068757, iteration: 76908
loss: 1.199442982673645,grad_norm: 0.999999228955694, iteration: 76909
loss: 1.1042094230651855,grad_norm: 0.9999996251182184, iteration: 76910
loss: 0.9849867224693298,grad_norm: 0.9999997025682441, iteration: 76911
loss: 1.0540225505828857,grad_norm: 0.9999992194395149, iteration: 76912
loss: 1.14665687084198,grad_norm: 0.9999990993274134, iteration: 76913
loss: 1.0616165399551392,grad_norm: 0.9999998242358066, iteration: 76914
loss: 1.074629783630371,grad_norm: 0.9999994500017259, iteration: 76915
loss: 1.021686315536499,grad_norm: 0.999999159273912, iteration: 76916
loss: 0.9676740765571594,grad_norm: 0.9999995757552835, iteration: 76917
loss: 0.9858822226524353,grad_norm: 0.9999991690762167, iteration: 76918
loss: 1.214113712310791,grad_norm: 0.9999991643587873, iteration: 76919
loss: 0.9838171601295471,grad_norm: 0.9999998698051407, iteration: 76920
loss: 1.2621251344680786,grad_norm: 0.9999993639701731, iteration: 76921
loss: 1.0408204793930054,grad_norm: 0.9999992957585528, iteration: 76922
loss: 1.0160635709762573,grad_norm: 0.9605785702751247, iteration: 76923
loss: 1.0077663660049438,grad_norm: 0.9999993856250616, iteration: 76924
loss: 1.0282658338546753,grad_norm: 0.9192568646819563, iteration: 76925
loss: 1.0191482305526733,grad_norm: 0.8976209475933195, iteration: 76926
loss: 1.0072927474975586,grad_norm: 0.9999991768448148, iteration: 76927
loss: 1.041210412979126,grad_norm: 0.9999998357175759, iteration: 76928
loss: 0.9922111630439758,grad_norm: 0.9503675099868903, iteration: 76929
loss: 1.0274392366409302,grad_norm: 0.9999991990458945, iteration: 76930
loss: 0.9682189226150513,grad_norm: 0.9999991912899376, iteration: 76931
loss: 1.0011656284332275,grad_norm: 0.9999993037388989, iteration: 76932
loss: 1.2221828699111938,grad_norm: 0.9999995720303878, iteration: 76933
loss: 1.1672372817993164,grad_norm: 0.9999999154126561, iteration: 76934
loss: 0.9848853945732117,grad_norm: 0.8312783681313927, iteration: 76935
loss: 1.0264848470687866,grad_norm: 0.8572340309494626, iteration: 76936
loss: 1.089762806892395,grad_norm: 0.9999991757164206, iteration: 76937
loss: 1.0399993658065796,grad_norm: 0.9999993428083195, iteration: 76938
loss: 1.0338629484176636,grad_norm: 0.9532128865812899, iteration: 76939
loss: 1.0543943643569946,grad_norm: 0.999999879889749, iteration: 76940
loss: 1.0917103290557861,grad_norm: 0.9999995884397779, iteration: 76941
loss: 0.9777630567550659,grad_norm: 0.9999992072273679, iteration: 76942
loss: 1.0223397016525269,grad_norm: 0.949626235182043, iteration: 76943
loss: 1.0299808979034424,grad_norm: 0.9999991919473195, iteration: 76944
loss: 1.0126008987426758,grad_norm: 0.9707497058064312, iteration: 76945
loss: 1.0313490629196167,grad_norm: 0.9999990103412254, iteration: 76946
loss: 1.036759376525879,grad_norm: 0.9999989824739645, iteration: 76947
loss: 1.0145728588104248,grad_norm: 0.9999995585374543, iteration: 76948
loss: 0.9810563921928406,grad_norm: 0.9999991638127961, iteration: 76949
loss: 1.06270432472229,grad_norm: 0.9999993580244846, iteration: 76950
loss: 1.0403423309326172,grad_norm: 1.0000000266988973, iteration: 76951
loss: 1.0588774681091309,grad_norm: 0.9566227052929627, iteration: 76952
loss: 1.0058974027633667,grad_norm: 0.8076144960660074, iteration: 76953
loss: 1.013534426689148,grad_norm: 0.999999117174476, iteration: 76954
loss: 0.9913957118988037,grad_norm: 0.8241240518812394, iteration: 76955
loss: 1.0082637071609497,grad_norm: 0.9393944558302493, iteration: 76956
loss: 0.9815396666526794,grad_norm: 0.9999992324368036, iteration: 76957
loss: 1.0067864656448364,grad_norm: 0.9661698869849211, iteration: 76958
loss: 1.0211915969848633,grad_norm: 0.9705160368477035, iteration: 76959
loss: 1.018300175666809,grad_norm: 0.8140317764271485, iteration: 76960
loss: 1.095417857170105,grad_norm: 0.9999990869996933, iteration: 76961
loss: 1.0586384534835815,grad_norm: 0.9089733382035654, iteration: 76962
loss: 0.9930394887924194,grad_norm: 0.7776972477220953, iteration: 76963
loss: 0.9673552513122559,grad_norm: 0.999999425213005, iteration: 76964
loss: 0.992243766784668,grad_norm: 0.8642658244363935, iteration: 76965
loss: 1.0877788066864014,grad_norm: 0.979264800690395, iteration: 76966
loss: 1.0126811265945435,grad_norm: 0.9999991079085664, iteration: 76967
loss: 1.0191258192062378,grad_norm: 0.9999991585929842, iteration: 76968
loss: 0.964141309261322,grad_norm: 0.9414977383794028, iteration: 76969
loss: 1.0415629148483276,grad_norm: 0.9999989585068264, iteration: 76970
loss: 1.0259671211242676,grad_norm: 0.9999993630416177, iteration: 76971
loss: 0.9999010562896729,grad_norm: 0.9999990500861076, iteration: 76972
loss: 1.120039939880371,grad_norm: 0.9999998747627953, iteration: 76973
loss: 0.996341347694397,grad_norm: 0.9999996490670272, iteration: 76974
loss: 0.9972398281097412,grad_norm: 0.8786864332332934, iteration: 76975
loss: 1.0510004758834839,grad_norm: 0.999999822099831, iteration: 76976
loss: 1.0176647901535034,grad_norm: 0.9999992190845798, iteration: 76977
loss: 1.028713345527649,grad_norm: 0.9999990124328814, iteration: 76978
loss: 1.041359782218933,grad_norm: 0.9999991614188037, iteration: 76979
loss: 0.9778773784637451,grad_norm: 0.8846158570976487, iteration: 76980
loss: 1.016456961631775,grad_norm: 0.9999991376396205, iteration: 76981
loss: 0.9775452613830566,grad_norm: 0.8242573267941135, iteration: 76982
loss: 1.0113413333892822,grad_norm: 0.8461868603820991, iteration: 76983
loss: 1.0231446027755737,grad_norm: 0.9999991146747689, iteration: 76984
loss: 1.0113375186920166,grad_norm: 0.9999990145947877, iteration: 76985
loss: 0.978124737739563,grad_norm: 0.9999991451419459, iteration: 76986
loss: 1.1066269874572754,grad_norm: 0.9999997383048747, iteration: 76987
loss: 0.9924120306968689,grad_norm: 0.9999989536134187, iteration: 76988
loss: 1.0386334657669067,grad_norm: 0.8639682535290771, iteration: 76989
loss: 0.998160183429718,grad_norm: 0.9570929759563012, iteration: 76990
loss: 0.9948992133140564,grad_norm: 0.8945598191971263, iteration: 76991
loss: 0.9668495059013367,grad_norm: 0.9996634870720543, iteration: 76992
loss: 0.9875935316085815,grad_norm: 0.8345212366987055, iteration: 76993
loss: 1.0229222774505615,grad_norm: 0.999999004134781, iteration: 76994
loss: 1.0996485948562622,grad_norm: 0.9999990914588152, iteration: 76995
loss: 0.9928810596466064,grad_norm: 0.99999894265545, iteration: 76996
loss: 1.0014073848724365,grad_norm: 0.9069727353912341, iteration: 76997
loss: 1.0287423133850098,grad_norm: 0.9466270202384827, iteration: 76998
loss: 0.9869544506072998,grad_norm: 0.9469318929295013, iteration: 76999
loss: 0.9671234488487244,grad_norm: 0.7573025037115714, iteration: 77000
loss: 1.032758116722107,grad_norm: 0.9429087919149972, iteration: 77001
loss: 1.055314540863037,grad_norm: 0.9311965270651824, iteration: 77002
loss: 1.0074931383132935,grad_norm: 0.9532529572965858, iteration: 77003
loss: 0.9952520728111267,grad_norm: 0.9999990581301816, iteration: 77004
loss: 0.9917049407958984,grad_norm: 0.9731880518805308, iteration: 77005
loss: 1.0171655416488647,grad_norm: 0.8742195903490095, iteration: 77006
loss: 1.0044327974319458,grad_norm: 0.9396451149441093, iteration: 77007
loss: 1.011174201965332,grad_norm: 0.8511394439899906, iteration: 77008
loss: 1.0097624063491821,grad_norm: 0.9999990775983988, iteration: 77009
loss: 0.9831849932670593,grad_norm: 0.8975923326407488, iteration: 77010
loss: 1.061663269996643,grad_norm: 0.9999991408770653, iteration: 77011
loss: 1.0070537328720093,grad_norm: 0.8027325241329151, iteration: 77012
loss: 0.9905151724815369,grad_norm: 0.8903176645639919, iteration: 77013
loss: 0.9786534309387207,grad_norm: 0.9775839143152333, iteration: 77014
loss: 1.0332509279251099,grad_norm: 0.9999994758159246, iteration: 77015
loss: 1.0353208780288696,grad_norm: 0.9637940681285339, iteration: 77016
loss: 1.0077483654022217,grad_norm: 0.999999241905445, iteration: 77017
loss: 0.9699874520301819,grad_norm: 0.9999991748590462, iteration: 77018
loss: 1.0228673219680786,grad_norm: 0.9999996804222135, iteration: 77019
loss: 0.9978663921356201,grad_norm: 0.8151150200698752, iteration: 77020
loss: 0.9857635498046875,grad_norm: 0.9995322427742994, iteration: 77021
loss: 1.0145114660263062,grad_norm: 0.999999442962249, iteration: 77022
loss: 1.0182472467422485,grad_norm: 0.886641861485213, iteration: 77023
loss: 1.0758414268493652,grad_norm: 0.999999736016769, iteration: 77024
loss: 1.0639655590057373,grad_norm: 0.9999990867963969, iteration: 77025
loss: 0.9683892726898193,grad_norm: 0.999999082781498, iteration: 77026
loss: 1.0037400722503662,grad_norm: 0.9531604846034559, iteration: 77027
loss: 1.1414300203323364,grad_norm: 0.9594686570092703, iteration: 77028
loss: 1.080276370048523,grad_norm: 0.899882940922842, iteration: 77029
loss: 1.0376834869384766,grad_norm: 0.9914958332071437, iteration: 77030
loss: 0.9765592217445374,grad_norm: 0.9999992310820068, iteration: 77031
loss: 1.045621395111084,grad_norm: 0.9999989471585472, iteration: 77032
loss: 1.018044352531433,grad_norm: 0.9999991448251629, iteration: 77033
loss: 0.9813910126686096,grad_norm: 0.8187666319002016, iteration: 77034
loss: 1.001401662826538,grad_norm: 0.8180281471960945, iteration: 77035
loss: 0.992800772190094,grad_norm: 0.9999989704059491, iteration: 77036
loss: 1.0441086292266846,grad_norm: 0.9893550777502135, iteration: 77037
loss: 1.1361217498779297,grad_norm: 0.9999989952084236, iteration: 77038
loss: 1.0700757503509521,grad_norm: 0.9999991203013859, iteration: 77039
loss: 1.0079329013824463,grad_norm: 0.9999994568627986, iteration: 77040
loss: 0.9758051037788391,grad_norm: 0.9796929626488675, iteration: 77041
loss: 1.00653874874115,grad_norm: 0.9999990042838112, iteration: 77042
loss: 1.0452622175216675,grad_norm: 0.8268192710433881, iteration: 77043
loss: 0.9659421443939209,grad_norm: 0.8997917360885872, iteration: 77044
loss: 1.0539575815200806,grad_norm: 0.9999998595150363, iteration: 77045
loss: 1.0013608932495117,grad_norm: 0.9999991310645789, iteration: 77046
loss: 1.0622355937957764,grad_norm: 0.8372120406231753, iteration: 77047
loss: 1.026222586631775,grad_norm: 0.8965894297957974, iteration: 77048
loss: 0.9919217824935913,grad_norm: 0.8135788742709048, iteration: 77049
loss: 1.0812573432922363,grad_norm: 0.9999996071673894, iteration: 77050
loss: 1.0013203620910645,grad_norm: 0.8766924851391967, iteration: 77051
loss: 1.0148082971572876,grad_norm: 0.9999991021047767, iteration: 77052
loss: 1.0592594146728516,grad_norm: 0.9999998006878255, iteration: 77053
loss: 1.1271538734436035,grad_norm: 1.0000000150266357, iteration: 77054
loss: 1.0130529403686523,grad_norm: 0.9999994839547052, iteration: 77055
loss: 1.0292176008224487,grad_norm: 0.9650397502807857, iteration: 77056
loss: 1.0054008960723877,grad_norm: 0.9645118722560226, iteration: 77057
loss: 1.0254963636398315,grad_norm: 0.9999997299856566, iteration: 77058
loss: 0.9540774822235107,grad_norm: 0.979252969322455, iteration: 77059
loss: 1.0471036434173584,grad_norm: 0.8165118530639763, iteration: 77060
loss: 0.9991924166679382,grad_norm: 0.7144929295686614, iteration: 77061
loss: 1.0375242233276367,grad_norm: 0.9999991536755679, iteration: 77062
loss: 1.0381593704223633,grad_norm: 0.900694255574647, iteration: 77063
loss: 0.9418092370033264,grad_norm: 0.9999991734887905, iteration: 77064
loss: 1.000941276550293,grad_norm: 0.9806928580722895, iteration: 77065
loss: 1.023754358291626,grad_norm: 0.9638571281509306, iteration: 77066
loss: 1.06818687915802,grad_norm: 0.9999999272063934, iteration: 77067
loss: 1.0591429471969604,grad_norm: 0.9999991298512251, iteration: 77068
loss: 1.001838207244873,grad_norm: 0.9999991651554396, iteration: 77069
loss: 1.0719908475875854,grad_norm: 0.999999357994095, iteration: 77070
loss: 1.0686286687850952,grad_norm: 0.9965703464811251, iteration: 77071
loss: 1.1144793033599854,grad_norm: 0.9999990824161039, iteration: 77072
loss: 0.9952765107154846,grad_norm: 0.8308931163740556, iteration: 77073
loss: 1.0193113088607788,grad_norm: 0.9999989782723959, iteration: 77074
loss: 1.043436050415039,grad_norm: 0.9999997020375558, iteration: 77075
loss: 0.9663575887680054,grad_norm: 0.9402512680355843, iteration: 77076
loss: 1.0080604553222656,grad_norm: 0.9311213697794569, iteration: 77077
loss: 1.054766058921814,grad_norm: 0.8197047914828224, iteration: 77078
loss: 0.9899367094039917,grad_norm: 0.8794660741290209, iteration: 77079
loss: 1.1003133058547974,grad_norm: 0.9999995702813543, iteration: 77080
loss: 0.9844956994056702,grad_norm: 0.9999991252618877, iteration: 77081
loss: 1.109373927116394,grad_norm: 0.9999993821914585, iteration: 77082
loss: 0.9933579564094543,grad_norm: 0.9999991399799314, iteration: 77083
loss: 0.9978473782539368,grad_norm: 0.9281984405925711, iteration: 77084
loss: 1.0819315910339355,grad_norm: 0.9999989831867279, iteration: 77085
loss: 1.0013129711151123,grad_norm: 0.9999991863423145, iteration: 77086
loss: 1.037758231163025,grad_norm: 0.9999989831166136, iteration: 77087
loss: 1.0113085508346558,grad_norm: 0.9999993399723737, iteration: 77088
loss: 1.0917725563049316,grad_norm: 0.9999992465410938, iteration: 77089
loss: 1.1105148792266846,grad_norm: 0.9524974177449393, iteration: 77090
loss: 1.0384776592254639,grad_norm: 0.8897174365499642, iteration: 77091
loss: 1.0947284698486328,grad_norm: 0.9999991709721284, iteration: 77092
loss: 1.107933521270752,grad_norm: 0.9999996391725193, iteration: 77093
loss: 0.9460729360580444,grad_norm: 0.9136255096819434, iteration: 77094
loss: 1.0285866260528564,grad_norm: 0.9999998684586346, iteration: 77095
loss: 0.9940495491027832,grad_norm: 0.9192988826526894, iteration: 77096
loss: 1.026165246963501,grad_norm: 0.8115280994059869, iteration: 77097
loss: 1.0339330434799194,grad_norm: 0.9999996188019091, iteration: 77098
loss: 1.0488866567611694,grad_norm: 0.9999992407608923, iteration: 77099
loss: 1.0615595579147339,grad_norm: 0.9757793897843269, iteration: 77100
loss: 1.0668442249298096,grad_norm: 0.9999992268532979, iteration: 77101
loss: 1.029716968536377,grad_norm: 0.9999990331275942, iteration: 77102
loss: 0.9746192693710327,grad_norm: 0.8908138720254114, iteration: 77103
loss: 1.0506813526153564,grad_norm: 0.9999995675540505, iteration: 77104
loss: 1.0060604810714722,grad_norm: 0.912634531901805, iteration: 77105
loss: 1.0432848930358887,grad_norm: 0.9999994870163202, iteration: 77106
loss: 1.0328737497329712,grad_norm: 0.9999991896550042, iteration: 77107
loss: 1.0581978559494019,grad_norm: 0.9502343292615508, iteration: 77108
loss: 1.0544065237045288,grad_norm: 0.9999992554128629, iteration: 77109
loss: 0.9980913996696472,grad_norm: 0.7687350119004956, iteration: 77110
loss: 1.059270977973938,grad_norm: 0.9999990009778514, iteration: 77111
loss: 1.0202020406723022,grad_norm: 0.9999991818888919, iteration: 77112
loss: 0.973020613193512,grad_norm: 0.9725605159330766, iteration: 77113
loss: 1.0953943729400635,grad_norm: 0.9999996355574033, iteration: 77114
loss: 0.9833184480667114,grad_norm: 0.8902434958194004, iteration: 77115
loss: 1.0657579898834229,grad_norm: 0.999999162612471, iteration: 77116
loss: 1.0113365650177002,grad_norm: 0.9999997511859182, iteration: 77117
loss: 0.9998975992202759,grad_norm: 0.9360499958052717, iteration: 77118
loss: 1.028231143951416,grad_norm: 0.9997935198004071, iteration: 77119
loss: 1.1107546091079712,grad_norm: 0.9999992867531863, iteration: 77120
loss: 1.037083625793457,grad_norm: 0.8350059234948902, iteration: 77121
loss: 1.0409629344940186,grad_norm: 0.999999073091876, iteration: 77122
loss: 1.0427225828170776,grad_norm: 0.9999990057490248, iteration: 77123
loss: 1.0172420740127563,grad_norm: 0.9761640034594921, iteration: 77124
loss: 0.9782807230949402,grad_norm: 0.8923793136427335, iteration: 77125
loss: 0.9993075728416443,grad_norm: 0.8728827492740314, iteration: 77126
loss: 0.9717663526535034,grad_norm: 0.9999995730052674, iteration: 77127
loss: 0.9749493598937988,grad_norm: 0.8870406100831464, iteration: 77128
loss: 1.1887561082839966,grad_norm: 0.9999993005403016, iteration: 77129
loss: 1.0448590517044067,grad_norm: 0.9635463123351398, iteration: 77130
loss: 1.0254199504852295,grad_norm: 0.9999999332994061, iteration: 77131
loss: 0.9940053224563599,grad_norm: 0.999999226303507, iteration: 77132
loss: 1.0301764011383057,grad_norm: 0.9858006759365305, iteration: 77133
loss: 1.006445050239563,grad_norm: 0.9573953519025548, iteration: 77134
loss: 1.0933969020843506,grad_norm: 0.9999989826470813, iteration: 77135
loss: 1.0053564310073853,grad_norm: 0.9411693688841332, iteration: 77136
loss: 1.018155574798584,grad_norm: 0.8603105712722691, iteration: 77137
loss: 0.9983119368553162,grad_norm: 0.8949073444527201, iteration: 77138
loss: 1.0553261041641235,grad_norm: 0.996524256174185, iteration: 77139
loss: 0.9840046167373657,grad_norm: 0.896020253061615, iteration: 77140
loss: 0.9981704354286194,grad_norm: 0.9879794576496078, iteration: 77141
loss: 1.0364456176757812,grad_norm: 0.9999989843178354, iteration: 77142
loss: 1.019634485244751,grad_norm: 0.9999992727298372, iteration: 77143
loss: 1.0763723850250244,grad_norm: 0.9764273810055998, iteration: 77144
loss: 1.0798287391662598,grad_norm: 0.9999997762710605, iteration: 77145
loss: 1.0606412887573242,grad_norm: 0.7578907843785792, iteration: 77146
loss: 1.0609928369522095,grad_norm: 0.9999992906176594, iteration: 77147
loss: 1.0038396120071411,grad_norm: 0.7930181352360491, iteration: 77148
loss: 1.0067975521087646,grad_norm: 0.9999990541405974, iteration: 77149
loss: 1.047448992729187,grad_norm: 0.999999126700255, iteration: 77150
loss: 1.047959327697754,grad_norm: 0.9999995411281652, iteration: 77151
loss: 1.0109187364578247,grad_norm: 0.9173958188511633, iteration: 77152
loss: 1.006354570388794,grad_norm: 0.9339254743984181, iteration: 77153
loss: 0.9981757402420044,grad_norm: 0.9999991367170517, iteration: 77154
loss: 0.9981351494789124,grad_norm: 0.9999991089316732, iteration: 77155
loss: 1.0373036861419678,grad_norm: 0.9999992348637937, iteration: 77156
loss: 1.0314358472824097,grad_norm: 0.9999996267577389, iteration: 77157
loss: 1.0173771381378174,grad_norm: 0.9410674129483577, iteration: 77158
loss: 1.029356598854065,grad_norm: 0.9999989838065213, iteration: 77159
loss: 0.9843368530273438,grad_norm: 0.8831236259963523, iteration: 77160
loss: 1.051781177520752,grad_norm: 0.9995501663576325, iteration: 77161
loss: 0.9836400151252747,grad_norm: 0.9999991393423119, iteration: 77162
loss: 0.9989948868751526,grad_norm: 0.9761819712286179, iteration: 77163
loss: 0.9751278758049011,grad_norm: 0.8707533906590991, iteration: 77164
loss: 0.9566487669944763,grad_norm: 0.9999990997973308, iteration: 77165
loss: 1.1162277460098267,grad_norm: 1.0000000662644641, iteration: 77166
loss: 1.023617148399353,grad_norm: 0.6883251380759607, iteration: 77167
loss: 1.0333433151245117,grad_norm: 0.9999992465383246, iteration: 77168
loss: 1.029796838760376,grad_norm: 0.9746574544371266, iteration: 77169
loss: 1.0164861679077148,grad_norm: 0.999999323631039, iteration: 77170
loss: 1.0040780305862427,grad_norm: 0.9999990804050973, iteration: 77171
loss: 1.061743974685669,grad_norm: 0.9250631665354538, iteration: 77172
loss: 1.0020638704299927,grad_norm: 0.850886085587413, iteration: 77173
loss: 1.0580257177352905,grad_norm: 0.9999994307199813, iteration: 77174
loss: 1.1015021800994873,grad_norm: 0.9350117898230101, iteration: 77175
loss: 1.0438024997711182,grad_norm: 0.9930159193946563, iteration: 77176
loss: 0.9984785914421082,grad_norm: 0.876117757766463, iteration: 77177
loss: 1.0528321266174316,grad_norm: 0.9999992266098272, iteration: 77178
loss: 1.042454719543457,grad_norm: 0.9999991884075146, iteration: 77179
loss: 1.006337285041809,grad_norm: 0.9999990294909604, iteration: 77180
loss: 1.0385209321975708,grad_norm: 0.9999992627179076, iteration: 77181
loss: 1.100061297416687,grad_norm: 0.9999991703413398, iteration: 77182
loss: 1.0311352014541626,grad_norm: 0.8860355340223478, iteration: 77183
loss: 0.9790206551551819,grad_norm: 0.9753935359919002, iteration: 77184
loss: 1.000249981880188,grad_norm: 0.9999991109827288, iteration: 77185
loss: 1.0120364427566528,grad_norm: 0.9104338959172137, iteration: 77186
loss: 1.029535174369812,grad_norm: 0.8359097521438512, iteration: 77187
loss: 0.9840648174285889,grad_norm: 0.9999995983058665, iteration: 77188
loss: 1.063953161239624,grad_norm: 0.9999992425974674, iteration: 77189
loss: 0.9979523420333862,grad_norm: 0.8879809477017456, iteration: 77190
loss: 1.0443681478500366,grad_norm: 0.9999990904996972, iteration: 77191
loss: 1.000075101852417,grad_norm: 0.8795780930370407, iteration: 77192
loss: 0.991136908531189,grad_norm: 0.9677351299674033, iteration: 77193
loss: 1.01641845703125,grad_norm: 0.9999995049357822, iteration: 77194
loss: 0.9970135688781738,grad_norm: 0.9999990622433453, iteration: 77195
loss: 1.022849440574646,grad_norm: 0.9811803761820509, iteration: 77196
loss: 1.0441588163375854,grad_norm: 0.9662551578859123, iteration: 77197
loss: 0.9737296104431152,grad_norm: 0.8374848335368744, iteration: 77198
loss: 1.051013708114624,grad_norm: 0.9999991209214472, iteration: 77199
loss: 1.0609275102615356,grad_norm: 0.9999989603809555, iteration: 77200
loss: 1.0039912462234497,grad_norm: 0.9058865946608013, iteration: 77201
loss: 0.9980361461639404,grad_norm: 0.8785908473162863, iteration: 77202
loss: 1.0131794214248657,grad_norm: 0.999999262327923, iteration: 77203
loss: 1.0035319328308105,grad_norm: 0.99999914765295, iteration: 77204
loss: 1.001099944114685,grad_norm: 0.9999990390973593, iteration: 77205
loss: 1.017202615737915,grad_norm: 0.8660901254530168, iteration: 77206
loss: 0.9911953210830688,grad_norm: 0.9999990870089988, iteration: 77207
loss: 0.9974435567855835,grad_norm: 0.8444258882330781, iteration: 77208
loss: 1.086063265800476,grad_norm: 0.9474918137361636, iteration: 77209
loss: 0.994314432144165,grad_norm: 0.9999990134751843, iteration: 77210
loss: 0.9950364232063293,grad_norm: 0.9082850778124845, iteration: 77211
loss: 1.047149896621704,grad_norm: 0.9999998383532381, iteration: 77212
loss: 1.0955578088760376,grad_norm: 0.9999990061235356, iteration: 77213
loss: 1.020534873008728,grad_norm: 0.9607301692605629, iteration: 77214
loss: 0.9842436909675598,grad_norm: 0.934187810869768, iteration: 77215
loss: 1.0475378036499023,grad_norm: 0.9999991653620779, iteration: 77216
loss: 1.0572148561477661,grad_norm: 0.9421117942472115, iteration: 77217
loss: 1.1463959217071533,grad_norm: 0.9999993281218715, iteration: 77218
loss: 1.0062153339385986,grad_norm: 0.9280042639609518, iteration: 77219
loss: 1.0013575553894043,grad_norm: 0.8765064539347542, iteration: 77220
loss: 1.0155795812606812,grad_norm: 0.929352451481439, iteration: 77221
loss: 1.0297468900680542,grad_norm: 0.9999992109267385, iteration: 77222
loss: 1.0711256265640259,grad_norm: 0.9999995763766347, iteration: 77223
loss: 1.0606064796447754,grad_norm: 0.9024299348422816, iteration: 77224
loss: 1.042311191558838,grad_norm: 0.9999994319036459, iteration: 77225
loss: 0.9693989753723145,grad_norm: 0.8497167711920874, iteration: 77226
loss: 1.0608042478561401,grad_norm: 0.8358596893377922, iteration: 77227
loss: 1.0380771160125732,grad_norm: 0.9999992759846409, iteration: 77228
loss: 1.0847760438919067,grad_norm: 0.99999974522836, iteration: 77229
loss: 1.0949426889419556,grad_norm: 0.9333649547097563, iteration: 77230
loss: 1.0664006471633911,grad_norm: 0.9383094717273943, iteration: 77231
loss: 1.0142452716827393,grad_norm: 0.9164677328998758, iteration: 77232
loss: 1.0782045125961304,grad_norm: 0.9999992127697432, iteration: 77233
loss: 1.1353070735931396,grad_norm: 0.9999997069731551, iteration: 77234
loss: 0.9893843531608582,grad_norm: 0.8256098511261223, iteration: 77235
loss: 0.9916691780090332,grad_norm: 0.9999998610236293, iteration: 77236
loss: 0.9953823685646057,grad_norm: 0.8974413759547234, iteration: 77237
loss: 1.0765633583068848,grad_norm: 0.9999992574106076, iteration: 77238
loss: 1.0308988094329834,grad_norm: 0.9999991532466549, iteration: 77239
loss: 1.0451990365982056,grad_norm: 0.9523973779095909, iteration: 77240
loss: 1.013515591621399,grad_norm: 0.8652661280153826, iteration: 77241
loss: 1.055258870124817,grad_norm: 0.9999998215805521, iteration: 77242
loss: 0.9951215982437134,grad_norm: 0.8566879998777444, iteration: 77243
loss: 1.0949578285217285,grad_norm: 0.8747811487752273, iteration: 77244
loss: 1.0974547863006592,grad_norm: 0.9999991513195094, iteration: 77245
loss: 1.0735570192337036,grad_norm: 0.9683001064169282, iteration: 77246
loss: 1.00308096408844,grad_norm: 0.999999090784971, iteration: 77247
loss: 0.9906545877456665,grad_norm: 0.8201221479032154, iteration: 77248
loss: 1.0454847812652588,grad_norm: 0.9999994060445339, iteration: 77249
loss: 1.0024340152740479,grad_norm: 0.9999990516976422, iteration: 77250
loss: 1.0141725540161133,grad_norm: 0.9388150231360299, iteration: 77251
loss: 0.9978999495506287,grad_norm: 0.999999324035425, iteration: 77252
loss: 0.9985703229904175,grad_norm: 0.9999991388957734, iteration: 77253
loss: 1.0615071058273315,grad_norm: 0.9999991464984268, iteration: 77254
loss: 1.0943711996078491,grad_norm: 0.9999994707371416, iteration: 77255
loss: 1.0464974641799927,grad_norm: 0.9396790686158879, iteration: 77256
loss: 1.027092695236206,grad_norm: 0.9816685555486788, iteration: 77257
loss: 0.9824399352073669,grad_norm: 0.8205568488613422, iteration: 77258
loss: 1.013506531715393,grad_norm: 0.9665493345923591, iteration: 77259
loss: 1.016547679901123,grad_norm: 0.8528493751819619, iteration: 77260
loss: 1.1047120094299316,grad_norm: 0.9999990683872281, iteration: 77261
loss: 1.0667780637741089,grad_norm: 0.999999310682401, iteration: 77262
loss: 1.0937930345535278,grad_norm: 0.9330294138362315, iteration: 77263
loss: 1.0928561687469482,grad_norm: 0.9999992413759865, iteration: 77264
loss: 1.1007691621780396,grad_norm: 0.9597269763472863, iteration: 77265
loss: 1.0054354667663574,grad_norm: 0.8226265778918467, iteration: 77266
loss: 1.0286873579025269,grad_norm: 0.8397890829073754, iteration: 77267
loss: 1.0479182004928589,grad_norm: 0.9192302588678404, iteration: 77268
loss: 1.0056155920028687,grad_norm: 0.9049788241808915, iteration: 77269
loss: 1.0627961158752441,grad_norm: 0.9999991794380548, iteration: 77270
loss: 1.0651288032531738,grad_norm: 0.9999996555232722, iteration: 77271
loss: 1.042015790939331,grad_norm: 0.890667673208017, iteration: 77272
loss: 1.0681381225585938,grad_norm: 0.9999989916717321, iteration: 77273
loss: 1.0492942333221436,grad_norm: 0.9999992538190161, iteration: 77274
loss: 1.050926923751831,grad_norm: 0.9999998507721051, iteration: 77275
loss: 0.9581149816513062,grad_norm: 0.9968449947134937, iteration: 77276
loss: 0.9911033511161804,grad_norm: 0.9999990410471784, iteration: 77277
loss: 1.0855560302734375,grad_norm: 0.957274923483243, iteration: 77278
loss: 0.998773455619812,grad_norm: 0.9521789163586907, iteration: 77279
loss: 1.0262945890426636,grad_norm: 0.9999991582673181, iteration: 77280
loss: 1.0741289854049683,grad_norm: 0.9999991622027286, iteration: 77281
loss: 1.0753676891326904,grad_norm: 0.9257687095267471, iteration: 77282
loss: 1.1004712581634521,grad_norm: 0.999999270544448, iteration: 77283
loss: 1.0553922653198242,grad_norm: 1.0000000331681151, iteration: 77284
loss: 1.0128811597824097,grad_norm: 0.9999992276357266, iteration: 77285
loss: 1.1308785676956177,grad_norm: 0.9999990167368262, iteration: 77286
loss: 1.0091075897216797,grad_norm: 0.9979326134340208, iteration: 77287
loss: 1.0185374021530151,grad_norm: 0.9999990527651207, iteration: 77288
loss: 0.9991917610168457,grad_norm: 0.8314323820461509, iteration: 77289
loss: 1.0184231996536255,grad_norm: 0.8580630645638964, iteration: 77290
loss: 1.0408575534820557,grad_norm: 0.8644074595824498, iteration: 77291
loss: 1.0003571510314941,grad_norm: 0.9999992051133079, iteration: 77292
loss: 1.0975697040557861,grad_norm: 0.9999998488731242, iteration: 77293
loss: 1.0155595541000366,grad_norm: 0.9091886808507693, iteration: 77294
loss: 0.9760852456092834,grad_norm: 0.9838881355743895, iteration: 77295
loss: 1.107896327972412,grad_norm: 0.9798201303067762, iteration: 77296
loss: 1.1017529964447021,grad_norm: 0.9999992897761818, iteration: 77297
loss: 1.1285500526428223,grad_norm: 0.9999993738975096, iteration: 77298
loss: 0.9804226160049438,grad_norm: 0.9999989511118896, iteration: 77299
loss: 1.090972661972046,grad_norm: 0.9999990840687791, iteration: 77300
loss: 1.0555368661880493,grad_norm: 0.9910403110701871, iteration: 77301
loss: 1.004557728767395,grad_norm: 0.8093060501976425, iteration: 77302
loss: 0.978377103805542,grad_norm: 0.8604509210257716, iteration: 77303
loss: 1.0204213857650757,grad_norm: 0.8075463042066668, iteration: 77304
loss: 1.0528008937835693,grad_norm: 0.8015732014216171, iteration: 77305
loss: 0.9925925135612488,grad_norm: 0.9999990363163339, iteration: 77306
loss: 1.0291134119033813,grad_norm: 0.9999991551163717, iteration: 77307
loss: 1.0188730955123901,grad_norm: 0.8612947302550197, iteration: 77308
loss: 0.982170045375824,grad_norm: 0.9999991509702465, iteration: 77309
loss: 1.1830331087112427,grad_norm: 0.9999993437537696, iteration: 77310
loss: 1.0416375398635864,grad_norm: 0.9999994945454386, iteration: 77311
loss: 1.083840012550354,grad_norm: 0.9999991766744343, iteration: 77312
loss: 1.027025818824768,grad_norm: 0.9999992053354919, iteration: 77313
loss: 1.0251538753509521,grad_norm: 0.8806121301483276, iteration: 77314
loss: 1.1390384435653687,grad_norm: 0.9999998009890141, iteration: 77315
loss: 1.053470253944397,grad_norm: 0.9682284963802611, iteration: 77316
loss: 1.0133070945739746,grad_norm: 0.9999998003801308, iteration: 77317
loss: 1.021968960762024,grad_norm: 0.8400750239672733, iteration: 77318
loss: 1.0084799528121948,grad_norm: 0.8705684582128513, iteration: 77319
loss: 1.0084688663482666,grad_norm: 0.9707254055203531, iteration: 77320
loss: 1.071764588356018,grad_norm: 0.9999997121389005, iteration: 77321
loss: 1.0481468439102173,grad_norm: 0.8840834026057632, iteration: 77322
loss: 1.06490957736969,grad_norm: 0.9999991523596589, iteration: 77323
loss: 1.1381871700286865,grad_norm: 0.9999991885947396, iteration: 77324
loss: 1.0417855978012085,grad_norm: 0.80328890706852, iteration: 77325
loss: 1.053906798362732,grad_norm: 0.8802248243524843, iteration: 77326
loss: 1.0495654344558716,grad_norm: 0.999999784992747, iteration: 77327
loss: 1.0884931087493896,grad_norm: 0.9999998195668143, iteration: 77328
loss: 1.044569492340088,grad_norm: 1.0000000225878205, iteration: 77329
loss: 1.0549930334091187,grad_norm: 0.9856231316540064, iteration: 77330
loss: 1.0045604705810547,grad_norm: 0.9227147813610285, iteration: 77331
loss: 1.050715446472168,grad_norm: 0.9999991612209722, iteration: 77332
loss: 1.043400764465332,grad_norm: 0.9999996959081554, iteration: 77333
loss: 1.0321248769760132,grad_norm: 0.8888269588688202, iteration: 77334
loss: 1.000228762626648,grad_norm: 0.9999997210832255, iteration: 77335
loss: 0.9971871972084045,grad_norm: 0.9999990736415701, iteration: 77336
loss: 0.9841022491455078,grad_norm: 0.9999991090161693, iteration: 77337
loss: 1.0599323511123657,grad_norm: 0.9999997163269027, iteration: 77338
loss: 1.0272095203399658,grad_norm: 0.9999996283880935, iteration: 77339
loss: 1.0005146265029907,grad_norm: 0.8697825617698185, iteration: 77340
loss: 1.057030200958252,grad_norm: 0.9999996243321839, iteration: 77341
loss: 1.0765596628189087,grad_norm: 0.9999995683214745, iteration: 77342
loss: 1.0921926498413086,grad_norm: 0.9999990998257029, iteration: 77343
loss: 1.0320749282836914,grad_norm: 0.9999995798191201, iteration: 77344
loss: 1.0337415933609009,grad_norm: 0.9999992570762679, iteration: 77345
loss: 1.1488113403320312,grad_norm: 0.9999996573276018, iteration: 77346
loss: 0.9802191853523254,grad_norm: 0.8881743446716025, iteration: 77347
loss: 1.0980165004730225,grad_norm: 0.9999992524039829, iteration: 77348
loss: 1.0676701068878174,grad_norm: 0.9999996421016427, iteration: 77349
loss: 1.0854045152664185,grad_norm: 0.9999993018673106, iteration: 77350
loss: 1.0659432411193848,grad_norm: 0.9999997458488759, iteration: 77351
loss: 1.0270825624465942,grad_norm: 0.9999992024549152, iteration: 77352
loss: 1.024208426475525,grad_norm: 0.9196106351952064, iteration: 77353
loss: 1.0361379384994507,grad_norm: 0.9999989973762696, iteration: 77354
loss: 0.9820666313171387,grad_norm: 0.8433550144791485, iteration: 77355
loss: 1.0377391576766968,grad_norm: 0.8183403559935328, iteration: 77356
loss: 1.0244232416152954,grad_norm: 0.9999995216225868, iteration: 77357
loss: 0.9945185780525208,grad_norm: 0.9957233992241632, iteration: 77358
loss: 1.0851662158966064,grad_norm: 0.9999997238369566, iteration: 77359
loss: 1.0779664516448975,grad_norm: 0.999999100551419, iteration: 77360
loss: 1.1049734354019165,grad_norm: 0.9350265709013175, iteration: 77361
loss: 0.9752031564712524,grad_norm: 0.9529673140343041, iteration: 77362
loss: 1.0132348537445068,grad_norm: 0.9301930500175514, iteration: 77363
loss: 1.015734314918518,grad_norm: 0.9026642914774174, iteration: 77364
loss: 0.9811191558837891,grad_norm: 0.7317680740803714, iteration: 77365
loss: 1.0219234228134155,grad_norm: 0.999999693370287, iteration: 77366
loss: 1.003272533416748,grad_norm: 0.999999077980487, iteration: 77367
loss: 0.978678286075592,grad_norm: 0.8484312152242135, iteration: 77368
loss: 1.0107098817825317,grad_norm: 0.9846092325285516, iteration: 77369
loss: 0.9733242392539978,grad_norm: 0.902075781686354, iteration: 77370
loss: 1.0735520124435425,grad_norm: 0.9183815133110724, iteration: 77371
loss: 1.0354982614517212,grad_norm: 0.9999992027841424, iteration: 77372
loss: 1.0128055810928345,grad_norm: 0.9999991719817364, iteration: 77373
loss: 0.9813335537910461,grad_norm: 0.9092312893713256, iteration: 77374
loss: 1.2039456367492676,grad_norm: 0.9999994387826596, iteration: 77375
loss: 1.1032112836837769,grad_norm: 0.9999994390189114, iteration: 77376
loss: 1.0610973834991455,grad_norm: 0.9999994867051695, iteration: 77377
loss: 1.3096768856048584,grad_norm: 0.9999993506951356, iteration: 77378
loss: 1.044142246246338,grad_norm: 0.9706558087859835, iteration: 77379
loss: 1.0086249113082886,grad_norm: 0.9699482752384495, iteration: 77380
loss: 1.003636360168457,grad_norm: 0.9999992869494516, iteration: 77381
loss: 1.0204627513885498,grad_norm: 0.9999989494545014, iteration: 77382
loss: 1.025119662284851,grad_norm: 0.91854907632182, iteration: 77383
loss: 1.0155762434005737,grad_norm: 0.8466199302252478, iteration: 77384
loss: 0.9896825551986694,grad_norm: 0.967237841978041, iteration: 77385
loss: 1.0208886861801147,grad_norm: 0.7823840114990441, iteration: 77386
loss: 1.0110063552856445,grad_norm: 0.9862085051614083, iteration: 77387
loss: 1.1250476837158203,grad_norm: 0.9999993097005755, iteration: 77388
loss: 0.967795729637146,grad_norm: 0.9999989618086632, iteration: 77389
loss: 1.1229645013809204,grad_norm: 0.8767267171792227, iteration: 77390
loss: 1.0273982286453247,grad_norm: 0.9431462346486935, iteration: 77391
loss: 1.0652202367782593,grad_norm: 0.9999992594901681, iteration: 77392
loss: 0.9721015691757202,grad_norm: 0.9999990202509157, iteration: 77393
loss: 1.1710264682769775,grad_norm: 0.9999992707335447, iteration: 77394
loss: 1.0254175662994385,grad_norm: 0.9999997520995575, iteration: 77395
loss: 1.1870222091674805,grad_norm: 0.9687240382800845, iteration: 77396
loss: 1.0355122089385986,grad_norm: 0.9999994615923595, iteration: 77397
loss: 1.0900555849075317,grad_norm: 0.9067028990551391, iteration: 77398
loss: 1.0187445878982544,grad_norm: 0.8658022666977183, iteration: 77399
loss: 1.0631219148635864,grad_norm: 0.9999994317367645, iteration: 77400
loss: 1.0161569118499756,grad_norm: 0.9999991742145138, iteration: 77401
loss: 1.0213912725448608,grad_norm: 0.9999991454062094, iteration: 77402
loss: 0.9614778757095337,grad_norm: 0.9999992092657706, iteration: 77403
loss: 1.0253962278366089,grad_norm: 0.9999996244931768, iteration: 77404
loss: 1.0434701442718506,grad_norm: 0.91710737466527, iteration: 77405
loss: 1.041204810142517,grad_norm: 0.9999998320627976, iteration: 77406
loss: 1.0481669902801514,grad_norm: 0.9999991320787112, iteration: 77407
loss: 1.0512117147445679,grad_norm: 0.9742069734525683, iteration: 77408
loss: 1.0278788805007935,grad_norm: 0.9999997733074458, iteration: 77409
loss: 1.166577935218811,grad_norm: 0.9999991452157568, iteration: 77410
loss: 1.05490243434906,grad_norm: 0.9999998832930939, iteration: 77411
loss: 0.9872084259986877,grad_norm: 0.9372204964032026, iteration: 77412
loss: 1.0104870796203613,grad_norm: 0.9940109430673892, iteration: 77413
loss: 1.0738774538040161,grad_norm: 0.9999995181936708, iteration: 77414
loss: 1.058616280555725,grad_norm: 0.9999991949368888, iteration: 77415
loss: 1.0660489797592163,grad_norm: 0.9999992589891216, iteration: 77416
loss: 1.0237547159194946,grad_norm: 0.9999990269870295, iteration: 77417
loss: 1.0105228424072266,grad_norm: 0.9999997038465005, iteration: 77418
loss: 1.054716944694519,grad_norm: 0.9999999558233138, iteration: 77419
loss: 1.0231776237487793,grad_norm: 0.9999993288618617, iteration: 77420
loss: 1.2242182493209839,grad_norm: 0.9999996923907525, iteration: 77421
loss: 0.9727648496627808,grad_norm: 0.9685938970209994, iteration: 77422
loss: 1.054938554763794,grad_norm: 0.9999997613506543, iteration: 77423
loss: 1.046512484550476,grad_norm: 0.9999991707518952, iteration: 77424
loss: 0.9994314908981323,grad_norm: 0.8710580727182721, iteration: 77425
loss: 1.0452145338058472,grad_norm: 0.9999995188495372, iteration: 77426
loss: 1.1232343912124634,grad_norm: 0.9999991721698238, iteration: 77427
loss: 1.054349660873413,grad_norm: 0.9999993163205881, iteration: 77428
loss: 1.0218902826309204,grad_norm: 0.9036704726050406, iteration: 77429
loss: 1.0271958112716675,grad_norm: 0.9999990396393831, iteration: 77430
loss: 1.113159418106079,grad_norm: 0.9999990836138014, iteration: 77431
loss: 0.9806784391403198,grad_norm: 0.9463070569874048, iteration: 77432
loss: 1.0889347791671753,grad_norm: 0.9999996501611575, iteration: 77433
loss: 0.9693849086761475,grad_norm: 0.9999992918648071, iteration: 77434
loss: 1.2421200275421143,grad_norm: 0.9999996884776023, iteration: 77435
loss: 0.9929184317588806,grad_norm: 0.9999991898497727, iteration: 77436
loss: 1.0466649532318115,grad_norm: 0.999999197469015, iteration: 77437
loss: 1.0129450559616089,grad_norm: 0.999999080184742, iteration: 77438
loss: 1.0242586135864258,grad_norm: 0.9999994214840369, iteration: 77439
loss: 1.0358296632766724,grad_norm: 0.999998965323998, iteration: 77440
loss: 0.9945524334907532,grad_norm: 0.8541767120812208, iteration: 77441
loss: 0.9819539785385132,grad_norm: 0.9495258495743537, iteration: 77442
loss: 1.0581601858139038,grad_norm: 0.8795052593021715, iteration: 77443
loss: 1.1131319999694824,grad_norm: 0.999999941002958, iteration: 77444
loss: 0.989029049873352,grad_norm: 0.9999997787103958, iteration: 77445
loss: 0.9919491410255432,grad_norm: 0.9999993769136422, iteration: 77446
loss: 1.0107256174087524,grad_norm: 0.9999999499835163, iteration: 77447
loss: 0.9690055847167969,grad_norm: 0.9818456646031117, iteration: 77448
loss: 1.0961754322052002,grad_norm: 0.9999998422921346, iteration: 77449
loss: 1.0055557489395142,grad_norm: 0.9219671187467524, iteration: 77450
loss: 1.1288825273513794,grad_norm: 0.9999991355321703, iteration: 77451
loss: 1.1939176321029663,grad_norm: 0.9999996175866807, iteration: 77452
loss: 1.0613261461257935,grad_norm: 0.9010135264174293, iteration: 77453
loss: 1.102704405784607,grad_norm: 0.9999991622791924, iteration: 77454
loss: 1.0303459167480469,grad_norm: 0.9999997393187698, iteration: 77455
loss: 1.345461368560791,grad_norm: 0.9999999692448676, iteration: 77456
loss: 1.0692923069000244,grad_norm: 0.9999993915224811, iteration: 77457
loss: 1.110588550567627,grad_norm: 1.000000005415386, iteration: 77458
loss: 1.3716415166854858,grad_norm: 0.9999999339531586, iteration: 77459
loss: 1.064733862876892,grad_norm: 0.9999993816952253, iteration: 77460
loss: 1.048924207687378,grad_norm: 0.8008586810265585, iteration: 77461
loss: 0.9946397542953491,grad_norm: 0.999999911415307, iteration: 77462
loss: 1.087579369544983,grad_norm: 0.9999991578506653, iteration: 77463
loss: 1.1011924743652344,grad_norm: 0.9999992663263225, iteration: 77464
loss: 0.993442952632904,grad_norm: 0.999999454038652, iteration: 77465
loss: 1.0111122131347656,grad_norm: 1.000000071761563, iteration: 77466
loss: 1.1528218984603882,grad_norm: 0.9999992729465155, iteration: 77467
loss: 1.131055474281311,grad_norm: 0.9999993236122695, iteration: 77468
loss: 1.0593281984329224,grad_norm: 0.9999992902096138, iteration: 77469
loss: 1.0410535335540771,grad_norm: 0.9999994880157541, iteration: 77470
loss: 1.160313367843628,grad_norm: 0.9999998525209721, iteration: 77471
loss: 1.0288448333740234,grad_norm: 0.9999993905584722, iteration: 77472
loss: 1.0411607027053833,grad_norm: 0.9999996193700867, iteration: 77473
loss: 1.1096147298812866,grad_norm: 0.9999996198303103, iteration: 77474
loss: 1.0310184955596924,grad_norm: 0.9999991999732127, iteration: 77475
loss: 1.0996358394622803,grad_norm: 0.9999999249062465, iteration: 77476
loss: 1.0851088762283325,grad_norm: 0.9999991190048615, iteration: 77477
loss: 0.9775363802909851,grad_norm: 0.8437651545725402, iteration: 77478
loss: 1.129761815071106,grad_norm: 0.9999994763309891, iteration: 77479
loss: 1.0171928405761719,grad_norm: 0.9999991826826978, iteration: 77480
loss: 1.0172330141067505,grad_norm: 0.9999997919005875, iteration: 77481
loss: 1.1111633777618408,grad_norm: 1.0000000180849677, iteration: 77482
loss: 1.0926719903945923,grad_norm: 0.9999997585499809, iteration: 77483
loss: 1.0389553308486938,grad_norm: 0.9643809570752743, iteration: 77484
loss: 1.000514030456543,grad_norm: 0.9781467033845783, iteration: 77485
loss: 1.0425217151641846,grad_norm: 0.9999990975231852, iteration: 77486
loss: 1.1243863105773926,grad_norm: 0.9999995673226164, iteration: 77487
loss: 1.0990298986434937,grad_norm: 0.9999995132011283, iteration: 77488
loss: 0.9873932600021362,grad_norm: 0.9999989307225987, iteration: 77489
loss: 1.110221266746521,grad_norm: 0.9999996958777286, iteration: 77490
loss: 1.0852272510528564,grad_norm: 0.99999964119562, iteration: 77491
loss: 1.0014857053756714,grad_norm: 0.9222278804543295, iteration: 77492
loss: 1.0903640985488892,grad_norm: 0.9999991041275998, iteration: 77493
loss: 1.124202013015747,grad_norm: 0.9999997643515949, iteration: 77494
loss: 1.1872285604476929,grad_norm: 0.9999997052961874, iteration: 77495
loss: 1.048388957977295,grad_norm: 0.9999999239764671, iteration: 77496
loss: 1.0956326723098755,grad_norm: 0.9999997147276781, iteration: 77497
loss: 1.079527735710144,grad_norm: 0.9999991498707111, iteration: 77498
loss: 1.18643057346344,grad_norm: 0.9999994328569658, iteration: 77499
loss: 1.1012916564941406,grad_norm: 0.9977128754728148, iteration: 77500
loss: 1.0709072351455688,grad_norm: 0.9979835873227747, iteration: 77501
loss: 1.244539499282837,grad_norm: 0.9999991887287344, iteration: 77502
loss: 1.0976887941360474,grad_norm: 0.9999990919924958, iteration: 77503
loss: 1.0477222204208374,grad_norm: 0.999999308286833, iteration: 77504
loss: 1.0188981294631958,grad_norm: 0.9999994150790792, iteration: 77505
loss: 1.0435378551483154,grad_norm: 0.9999992289489853, iteration: 77506
loss: 0.9813985228538513,grad_norm: 0.9999992194287158, iteration: 77507
loss: 1.2069839239120483,grad_norm: 0.9999996331601356, iteration: 77508
loss: 1.1052933931350708,grad_norm: 0.9999990120751283, iteration: 77509
loss: 1.1654945611953735,grad_norm: 0.9999992873792404, iteration: 77510
loss: 1.0268129110336304,grad_norm: 0.9999996571731761, iteration: 77511
loss: 1.128097653388977,grad_norm: 0.9999990990197446, iteration: 77512
loss: 1.0081980228424072,grad_norm: 0.9999990216316004, iteration: 77513
loss: 1.209695816040039,grad_norm: 0.9999999650433916, iteration: 77514
loss: 1.2226593494415283,grad_norm: 0.9999992819377863, iteration: 77515
loss: 1.142834186553955,grad_norm: 0.9999990293747315, iteration: 77516
loss: 1.2770116329193115,grad_norm: 0.9999999457587361, iteration: 77517
loss: 1.142030954360962,grad_norm: 0.9999992486233686, iteration: 77518
loss: 1.0735034942626953,grad_norm: 0.9999995262959095, iteration: 77519
loss: 1.246896505355835,grad_norm: 0.999999214814583, iteration: 77520
loss: 1.2881585359573364,grad_norm: 0.9999992660564593, iteration: 77521
loss: 1.2146960496902466,grad_norm: 0.9999992001736243, iteration: 77522
loss: 1.189858078956604,grad_norm: 0.9999999464210194, iteration: 77523
loss: 1.1640903949737549,grad_norm: 0.9999992527529454, iteration: 77524
loss: 1.2221323251724243,grad_norm: 0.9999991704935771, iteration: 77525
loss: 1.3327202796936035,grad_norm: 0.9999992978541267, iteration: 77526
loss: 1.3480820655822754,grad_norm: 0.9999996499298478, iteration: 77527
loss: 1.2085578441619873,grad_norm: 0.9999997028175913, iteration: 77528
loss: 1.2593028545379639,grad_norm: 1.0000000629114358, iteration: 77529
loss: 1.118349552154541,grad_norm: 0.9999995642691022, iteration: 77530
loss: 1.0626417398452759,grad_norm: 0.9999994944558057, iteration: 77531
loss: 1.1863561868667603,grad_norm: 0.9999993076186044, iteration: 77532
loss: 1.1563470363616943,grad_norm: 0.9999994804231082, iteration: 77533
loss: 1.0831328630447388,grad_norm: 0.9435264736877641, iteration: 77534
loss: 1.2246330976486206,grad_norm: 0.9999997679299564, iteration: 77535
loss: 1.1631145477294922,grad_norm: 0.9999992090099576, iteration: 77536
loss: 1.054663896560669,grad_norm: 0.9999993521374707, iteration: 77537
loss: 1.2263411283493042,grad_norm: 0.9999993785851139, iteration: 77538
loss: 1.1073205471038818,grad_norm: 0.9999994008275477, iteration: 77539
loss: 1.0778809785842896,grad_norm: 0.9999996681291832, iteration: 77540
loss: 1.1294511556625366,grad_norm: 0.988592898065966, iteration: 77541
loss: 0.9873972535133362,grad_norm: 0.940125620013503, iteration: 77542
loss: 1.2066328525543213,grad_norm: 0.9999990463707934, iteration: 77543
loss: 1.168229103088379,grad_norm: 0.999999330602254, iteration: 77544
loss: 1.1860318183898926,grad_norm: 0.999999087247965, iteration: 77545
loss: 1.3407288789749146,grad_norm: 0.9999993585308666, iteration: 77546
loss: 1.1250550746917725,grad_norm: 0.9999998836381833, iteration: 77547
loss: 1.3598995208740234,grad_norm: 0.9999996496161978, iteration: 77548
loss: 1.1008076667785645,grad_norm: 0.9999993620267529, iteration: 77549
loss: 1.2479901313781738,grad_norm: 0.9999998710918324, iteration: 77550
loss: 1.1520600318908691,grad_norm: 0.9999995206524692, iteration: 77551
loss: 1.1650301218032837,grad_norm: 1.0000000607767707, iteration: 77552
loss: 1.201791763305664,grad_norm: 0.9999991784571094, iteration: 77553
loss: 1.2488352060317993,grad_norm: 0.999999356452976, iteration: 77554
loss: 1.0771952867507935,grad_norm: 0.999999178235326, iteration: 77555
loss: 1.1144686937332153,grad_norm: 0.9999991240111306, iteration: 77556
loss: 1.2953007221221924,grad_norm: 0.99999912307044, iteration: 77557
loss: 1.2323203086853027,grad_norm: 0.9999997911829619, iteration: 77558
loss: 1.1831735372543335,grad_norm: 0.9999997482897229, iteration: 77559
loss: 1.0950860977172852,grad_norm: 0.9999992611833624, iteration: 77560
loss: 1.2224912643432617,grad_norm: 0.9999992503441014, iteration: 77561
loss: 1.131845474243164,grad_norm: 0.9999993863398093, iteration: 77562
loss: 1.2478384971618652,grad_norm: 0.9999991165690504, iteration: 77563
loss: 1.1211376190185547,grad_norm: 0.9999992083908276, iteration: 77564
loss: 1.0168256759643555,grad_norm: 0.999999353435135, iteration: 77565
loss: 1.0422837734222412,grad_norm: 0.9443918447230633, iteration: 77566
loss: 1.081121802330017,grad_norm: 0.9999992048768434, iteration: 77567
loss: 1.046473503112793,grad_norm: 0.892846002070271, iteration: 77568
loss: 1.0745694637298584,grad_norm: 0.9999996740743494, iteration: 77569
loss: 1.0948452949523926,grad_norm: 0.9999991613276381, iteration: 77570
loss: 1.073685884475708,grad_norm: 0.9999989677999085, iteration: 77571
loss: 1.0027157068252563,grad_norm: 0.9999996162705509, iteration: 77572
loss: 1.1050373315811157,grad_norm: 0.9999995436681595, iteration: 77573
loss: 1.0081729888916016,grad_norm: 0.9999991510199814, iteration: 77574
loss: 1.08993399143219,grad_norm: 0.9999998151601178, iteration: 77575
loss: 1.0277787446975708,grad_norm: 0.7691717952049624, iteration: 77576
loss: 1.0475999116897583,grad_norm: 0.999999050975037, iteration: 77577
loss: 1.0436654090881348,grad_norm: 0.9999996338771813, iteration: 77578
loss: 1.0082144737243652,grad_norm: 0.9999990928672832, iteration: 77579
loss: 1.024680495262146,grad_norm: 0.9196152969270879, iteration: 77580
loss: 1.0014703273773193,grad_norm: 0.8715934540770023, iteration: 77581
loss: 1.0746212005615234,grad_norm: 0.9999998391337357, iteration: 77582
loss: 1.0244989395141602,grad_norm: 0.9999990543089213, iteration: 77583
loss: 1.0369356870651245,grad_norm: 0.9999991500760176, iteration: 77584
loss: 1.1207168102264404,grad_norm: 0.9999994329076323, iteration: 77585
loss: 1.1637368202209473,grad_norm: 0.9999995960602798, iteration: 77586
loss: 1.0356260538101196,grad_norm: 0.999999197500037, iteration: 77587
loss: 1.0660881996154785,grad_norm: 0.999999136945338, iteration: 77588
loss: 1.0801464319229126,grad_norm: 0.9999998659575261, iteration: 77589
loss: 1.2235136032104492,grad_norm: 0.9999999019509419, iteration: 77590
loss: 1.0502599477767944,grad_norm: 0.9999995609663689, iteration: 77591
loss: 1.0073349475860596,grad_norm: 0.8762643224949297, iteration: 77592
loss: 1.1545463800430298,grad_norm: 0.9999990188520774, iteration: 77593
loss: 1.0537917613983154,grad_norm: 0.966994017021534, iteration: 77594
loss: 1.0546621084213257,grad_norm: 0.9999995347424733, iteration: 77595
loss: 1.07968270778656,grad_norm: 0.9999998798229232, iteration: 77596
loss: 1.1899315118789673,grad_norm: 0.9999998498730442, iteration: 77597
loss: 1.1570978164672852,grad_norm: 0.99999987599111, iteration: 77598
loss: 1.1569799184799194,grad_norm: 0.9999997139692999, iteration: 77599
loss: 1.1901901960372925,grad_norm: 0.9999994436300488, iteration: 77600
loss: 0.9990079998970032,grad_norm: 0.9999993452289925, iteration: 77601
loss: 1.0432969331741333,grad_norm: 0.9999998503894723, iteration: 77602
loss: 1.0730997323989868,grad_norm: 0.96626599145705, iteration: 77603
loss: 1.0121031999588013,grad_norm: 0.999999139360074, iteration: 77604
loss: 1.1722055673599243,grad_norm: 0.9390143942837134, iteration: 77605
loss: 1.0338925123214722,grad_norm: 0.9999992497121073, iteration: 77606
loss: 1.0507962703704834,grad_norm: 0.8524305033740269, iteration: 77607
loss: 0.9816198348999023,grad_norm: 0.805663094006654, iteration: 77608
loss: 1.0650358200073242,grad_norm: 0.9999996581879329, iteration: 77609
loss: 1.092836618423462,grad_norm: 0.9999994872858533, iteration: 77610
loss: 0.9827462434768677,grad_norm: 0.9106138036328543, iteration: 77611
loss: 1.026548981666565,grad_norm: 0.9492374639528768, iteration: 77612
loss: 1.053361177444458,grad_norm: 1.0000000157528612, iteration: 77613
loss: 1.1228861808776855,grad_norm: 0.999999275512336, iteration: 77614
loss: 1.0203847885131836,grad_norm: 0.9999991502006293, iteration: 77615
loss: 1.037340521812439,grad_norm: 0.9999992385056077, iteration: 77616
loss: 1.026353120803833,grad_norm: 0.9999996166081513, iteration: 77617
loss: 0.9736020565032959,grad_norm: 0.9999989678857137, iteration: 77618
loss: 1.0072386264801025,grad_norm: 0.979755852156913, iteration: 77619
loss: 1.0043359994888306,grad_norm: 0.949806656276032, iteration: 77620
loss: 0.9986171126365662,grad_norm: 0.9999990476642482, iteration: 77621
loss: 1.010575532913208,grad_norm: 0.9999990373860064, iteration: 77622
loss: 1.0207523107528687,grad_norm: 0.9904582908527003, iteration: 77623
loss: 0.9834282398223877,grad_norm: 0.8832679368329966, iteration: 77624
loss: 0.9785975217819214,grad_norm: 0.9999991736475878, iteration: 77625
loss: 1.031721830368042,grad_norm: 0.9999991616925893, iteration: 77626
loss: 1.1186965703964233,grad_norm: 0.9999991975959518, iteration: 77627
loss: 1.0484884977340698,grad_norm: 0.9999994777477897, iteration: 77628
loss: 1.043013095855713,grad_norm: 0.9410710298283754, iteration: 77629
loss: 1.0352762937545776,grad_norm: 0.9999991720580804, iteration: 77630
loss: 1.0502279996871948,grad_norm: 0.9999996296983925, iteration: 77631
loss: 1.0172908306121826,grad_norm: 0.9868602027559317, iteration: 77632
loss: 1.0362207889556885,grad_norm: 0.9999992544306886, iteration: 77633
loss: 1.1947457790374756,grad_norm: 0.9999995541797119, iteration: 77634
loss: 1.0775837898254395,grad_norm: 0.9999994766235422, iteration: 77635
loss: 0.9960606694221497,grad_norm: 0.9999991665587041, iteration: 77636
loss: 1.1972692012786865,grad_norm: 1.0000000453421698, iteration: 77637
loss: 1.0486527681350708,grad_norm: 0.9621325499576421, iteration: 77638
loss: 1.2555142641067505,grad_norm: 0.9999998581167729, iteration: 77639
loss: 1.1287497282028198,grad_norm: 0.999999748038629, iteration: 77640
loss: 1.1202312707901,grad_norm: 0.9292796526494309, iteration: 77641
loss: 1.0513920783996582,grad_norm: 0.9431512094801446, iteration: 77642
loss: 1.0584604740142822,grad_norm: 0.9994146355071715, iteration: 77643
loss: 1.1378370523452759,grad_norm: 0.9999999813729371, iteration: 77644
loss: 1.1162718534469604,grad_norm: 0.9999992194532611, iteration: 77645
loss: 1.1082162857055664,grad_norm: 0.999999954305481, iteration: 77646
loss: 1.1118086576461792,grad_norm: 0.9999994132684303, iteration: 77647
loss: 1.185341715812683,grad_norm: 0.9999996868846398, iteration: 77648
loss: 1.1208651065826416,grad_norm: 0.999999241369056, iteration: 77649
loss: 1.0479309558868408,grad_norm: 0.9999992459351078, iteration: 77650
loss: 1.1462968587875366,grad_norm: 0.9999992056697188, iteration: 77651
loss: 1.076747179031372,grad_norm: 0.9604420565222695, iteration: 77652
loss: 1.1797846555709839,grad_norm: 0.9999990216285732, iteration: 77653
loss: 1.0840591192245483,grad_norm: 0.9440129600265748, iteration: 77654
loss: 1.1209408044815063,grad_norm: 0.9999991059800384, iteration: 77655
loss: 1.1088566780090332,grad_norm: 0.9760114337879375, iteration: 77656
loss: 1.172622561454773,grad_norm: 0.9999993586554017, iteration: 77657
loss: 1.0784575939178467,grad_norm: 0.999998973439141, iteration: 77658
loss: 1.0954740047454834,grad_norm: 0.9999990779815416, iteration: 77659
loss: 1.1714580059051514,grad_norm: 0.9999999138063922, iteration: 77660
loss: 1.1037667989730835,grad_norm: 0.9999997674669096, iteration: 77661
loss: 1.0566905736923218,grad_norm: 0.9999989887458747, iteration: 77662
loss: 1.1492128372192383,grad_norm: 0.9999998687540242, iteration: 77663
loss: 1.1729689836502075,grad_norm: 0.9999994413627141, iteration: 77664
loss: 1.1530483961105347,grad_norm: 0.9999992916499201, iteration: 77665
loss: 1.0985039472579956,grad_norm: 0.9813071077488237, iteration: 77666
loss: 1.0497314929962158,grad_norm: 0.9999990600796408, iteration: 77667
loss: 1.1112991571426392,grad_norm: 0.9999993311081743, iteration: 77668
loss: 1.0322998762130737,grad_norm: 0.9558265514453904, iteration: 77669
loss: 1.1006711721420288,grad_norm: 0.9999993996749288, iteration: 77670
loss: 1.2284713983535767,grad_norm: 0.9999995634501596, iteration: 77671
loss: 1.1432968378067017,grad_norm: 0.9999998602025153, iteration: 77672
loss: 1.035790205001831,grad_norm: 0.8842507446409499, iteration: 77673
loss: 1.0860131978988647,grad_norm: 0.9719894666696313, iteration: 77674
loss: 1.171463966369629,grad_norm: 0.9999997593601401, iteration: 77675
loss: 1.0597176551818848,grad_norm: 0.9999992955080856, iteration: 77676
loss: 1.0617228746414185,grad_norm: 0.9999998064413439, iteration: 77677
loss: 1.069767713546753,grad_norm: 0.9999994429907872, iteration: 77678
loss: 1.1409251689910889,grad_norm: 0.9999997488055264, iteration: 77679
loss: 1.1991002559661865,grad_norm: 0.9999990086509517, iteration: 77680
loss: 1.0653936862945557,grad_norm: 0.9999990567535233, iteration: 77681
loss: 1.0754870176315308,grad_norm: 0.9999993168664223, iteration: 77682
loss: 1.0526551008224487,grad_norm: 0.9999997868821048, iteration: 77683
loss: 1.1004693508148193,grad_norm: 0.9999997290540219, iteration: 77684
loss: 1.147422194480896,grad_norm: 0.9999995582267119, iteration: 77685
loss: 1.0154688358306885,grad_norm: 0.9999996351926979, iteration: 77686
loss: 1.072380781173706,grad_norm: 0.9999992391304237, iteration: 77687
loss: 1.066760778427124,grad_norm: 0.9999995949055763, iteration: 77688
loss: 1.1126524209976196,grad_norm: 0.9999995964751863, iteration: 77689
loss: 1.008557915687561,grad_norm: 0.8854177234726867, iteration: 77690
loss: 1.0451834201812744,grad_norm: 0.9741881447306733, iteration: 77691
loss: 1.0188770294189453,grad_norm: 0.9999990266433367, iteration: 77692
loss: 1.0156999826431274,grad_norm: 0.999999267807465, iteration: 77693
loss: 0.9836271405220032,grad_norm: 0.8055266572984775, iteration: 77694
loss: 1.1767195463180542,grad_norm: 0.9999998718749543, iteration: 77695
loss: 1.058754324913025,grad_norm: 0.9999998126191119, iteration: 77696
loss: 1.0213284492492676,grad_norm: 0.9999992162076858, iteration: 77697
loss: 0.9966601133346558,grad_norm: 0.9102877652489035, iteration: 77698
loss: 1.0051063299179077,grad_norm: 0.9999993377021567, iteration: 77699
loss: 1.1007764339447021,grad_norm: 0.9999997532615083, iteration: 77700
loss: 1.0363259315490723,grad_norm: 0.8909081271710695, iteration: 77701
loss: 1.0645445585250854,grad_norm: 0.9999993453802786, iteration: 77702
loss: 1.1371464729309082,grad_norm: 0.9999993247101855, iteration: 77703
loss: 1.0145961046218872,grad_norm: 0.9999991276102251, iteration: 77704
loss: 1.005200743675232,grad_norm: 0.9999990262402113, iteration: 77705
loss: 1.0648586750030518,grad_norm: 0.9999998446488794, iteration: 77706
loss: 1.1135252714157104,grad_norm: 0.999999311159952, iteration: 77707
loss: 1.0807416439056396,grad_norm: 0.9999994526741472, iteration: 77708
loss: 1.0082874298095703,grad_norm: 0.9664623878937822, iteration: 77709
loss: 1.1076432466506958,grad_norm: 0.9999995377373334, iteration: 77710
loss: 0.9858300685882568,grad_norm: 0.9125722351693232, iteration: 77711
loss: 1.0319854021072388,grad_norm: 0.9999990619900128, iteration: 77712
loss: 1.0373486280441284,grad_norm: 0.9999995715038139, iteration: 77713
loss: 1.0124473571777344,grad_norm: 0.9999991067473735, iteration: 77714
loss: 1.0711088180541992,grad_norm: 0.9999996860776772, iteration: 77715
loss: 1.029087781906128,grad_norm: 0.9999990958153383, iteration: 77716
loss: 0.9903299808502197,grad_norm: 0.9991286682807284, iteration: 77717
loss: 1.103866696357727,grad_norm: 0.9999992635107929, iteration: 77718
loss: 0.9590311646461487,grad_norm: 0.9376388310367452, iteration: 77719
loss: 1.0526667833328247,grad_norm: 0.9999990476347004, iteration: 77720
loss: 1.0001952648162842,grad_norm: 0.8575348006907688, iteration: 77721
loss: 0.981128454208374,grad_norm: 0.9999989750226937, iteration: 77722
loss: 1.0378774404525757,grad_norm: 0.9999995091177377, iteration: 77723
loss: 1.0046242475509644,grad_norm: 0.9972912134067271, iteration: 77724
loss: 1.0204344987869263,grad_norm: 0.7771039561601873, iteration: 77725
loss: 1.1670434474945068,grad_norm: 0.9999991948685276, iteration: 77726
loss: 1.0456019639968872,grad_norm: 0.9999993891410115, iteration: 77727
loss: 1.0091270208358765,grad_norm: 0.9357693616436554, iteration: 77728
loss: 1.0010864734649658,grad_norm: 0.9999991793193299, iteration: 77729
loss: 1.0023326873779297,grad_norm: 0.9999998818650168, iteration: 77730
loss: 0.9911263585090637,grad_norm: 0.9999993335906134, iteration: 77731
loss: 1.0220533609390259,grad_norm: 0.9999995423753809, iteration: 77732
loss: 1.0513097047805786,grad_norm: 0.9537841986014829, iteration: 77733
loss: 0.9881334900856018,grad_norm: 0.9525944439820617, iteration: 77734
loss: 1.0352216958999634,grad_norm: 0.8716877072136296, iteration: 77735
loss: 0.9798912405967712,grad_norm: 0.8965893327438617, iteration: 77736
loss: 1.0769782066345215,grad_norm: 0.9999993943245276, iteration: 77737
loss: 0.9960154891014099,grad_norm: 0.9954116128325116, iteration: 77738
loss: 1.1029800176620483,grad_norm: 0.999999772859809, iteration: 77739
loss: 1.003674030303955,grad_norm: 0.9321035707082993, iteration: 77740
loss: 1.0499217510223389,grad_norm: 0.999999459299933, iteration: 77741
loss: 0.9690141081809998,grad_norm: 0.9999992415602995, iteration: 77742
loss: 1.0224924087524414,grad_norm: 0.9999991645277493, iteration: 77743
loss: 1.0289390087127686,grad_norm: 0.9772660252968566, iteration: 77744
loss: 0.9974316954612732,grad_norm: 0.8398740928659222, iteration: 77745
loss: 1.088398814201355,grad_norm: 0.9999995560313693, iteration: 77746
loss: 1.1304938793182373,grad_norm: 0.9999997654210524, iteration: 77747
loss: 0.9910889267921448,grad_norm: 0.9999994917259077, iteration: 77748
loss: 1.0166473388671875,grad_norm: 0.9278974688391961, iteration: 77749
loss: 1.0544211864471436,grad_norm: 0.9999991158210495, iteration: 77750
loss: 1.0309624671936035,grad_norm: 0.9999999655213628, iteration: 77751
loss: 1.0731981992721558,grad_norm: 0.9999996624351972, iteration: 77752
loss: 1.0312888622283936,grad_norm: 0.9311023356945418, iteration: 77753
loss: 0.9787110090255737,grad_norm: 0.8717413916480607, iteration: 77754
loss: 1.0037235021591187,grad_norm: 0.9046460679476356, iteration: 77755
loss: 1.0429184436798096,grad_norm: 0.9999999058767144, iteration: 77756
loss: 0.9867958426475525,grad_norm: 0.9591622227320229, iteration: 77757
loss: 1.0845001935958862,grad_norm: 0.9999995365523773, iteration: 77758
loss: 1.0399360656738281,grad_norm: 0.8732236409557711, iteration: 77759
loss: 1.021958589553833,grad_norm: 0.7563965371059425, iteration: 77760
loss: 1.0088235139846802,grad_norm: 0.999999682432596, iteration: 77761
loss: 1.0126689672470093,grad_norm: 0.999999223306215, iteration: 77762
loss: 1.0132030248641968,grad_norm: 0.9360107384301429, iteration: 77763
loss: 1.0500378608703613,grad_norm: 0.965307881869594, iteration: 77764
loss: 1.0048683881759644,grad_norm: 0.999999150305449, iteration: 77765
loss: 1.0390074253082275,grad_norm: 0.9015167427785533, iteration: 77766
loss: 1.0670968294143677,grad_norm: 0.9999997772798491, iteration: 77767
loss: 1.1951327323913574,grad_norm: 0.9999998803869554, iteration: 77768
loss: 0.9791088104248047,grad_norm: 0.7598358966105969, iteration: 77769
loss: 1.0556069612503052,grad_norm: 0.9999991898139737, iteration: 77770
loss: 1.0163065195083618,grad_norm: 0.9999991217964755, iteration: 77771
loss: 0.9893631339073181,grad_norm: 0.9999997811934892, iteration: 77772
loss: 1.0956299304962158,grad_norm: 0.9999995484931696, iteration: 77773
loss: 0.9796956777572632,grad_norm: 0.9999991051853416, iteration: 77774
loss: 0.9904353618621826,grad_norm: 0.9999991657492467, iteration: 77775
loss: 1.0223772525787354,grad_norm: 0.9999989907585164, iteration: 77776
loss: 1.0340116024017334,grad_norm: 0.9999990957764434, iteration: 77777
loss: 1.035499095916748,grad_norm: 0.9999997768657295, iteration: 77778
loss: 1.0062493085861206,grad_norm: 0.9615724795354288, iteration: 77779
loss: 1.0244680643081665,grad_norm: 0.9999996517799176, iteration: 77780
loss: 0.9773394465446472,grad_norm: 0.9999990074908143, iteration: 77781
loss: 1.1322450637817383,grad_norm: 0.999999845494477, iteration: 77782
loss: 1.0048272609710693,grad_norm: 0.8853010583031662, iteration: 77783
loss: 1.0172719955444336,grad_norm: 0.9999993463816356, iteration: 77784
loss: 0.9868625998497009,grad_norm: 0.8226613847193142, iteration: 77785
loss: 1.0123356580734253,grad_norm: 0.9175871316982726, iteration: 77786
loss: 1.1696120500564575,grad_norm: 0.9999998157359083, iteration: 77787
loss: 1.030848741531372,grad_norm: 0.999999831938485, iteration: 77788
loss: 0.9721487760543823,grad_norm: 0.9999993133414222, iteration: 77789
loss: 1.036576271057129,grad_norm: 0.9305079726190848, iteration: 77790
loss: 1.0588204860687256,grad_norm: 0.9999993346822674, iteration: 77791
loss: 1.0071346759796143,grad_norm: 0.9999996317638434, iteration: 77792
loss: 1.1676664352416992,grad_norm: 0.9999989540142115, iteration: 77793
loss: 1.0407159328460693,grad_norm: 0.942312505773134, iteration: 77794
loss: 0.9900658130645752,grad_norm: 0.9999990652667415, iteration: 77795
loss: 1.0500777959823608,grad_norm: 0.9999994520735909, iteration: 77796
loss: 1.118153691291809,grad_norm: 0.9999992741156799, iteration: 77797
loss: 1.053073763847351,grad_norm: 0.9999999820517891, iteration: 77798
loss: 1.0155855417251587,grad_norm: 0.9999992009412534, iteration: 77799
loss: 1.0210033655166626,grad_norm: 0.999999323848149, iteration: 77800
loss: 1.1447169780731201,grad_norm: 0.9999995731843907, iteration: 77801
loss: 1.0373988151550293,grad_norm: 0.9999997341900821, iteration: 77802
loss: 1.0027942657470703,grad_norm: 0.9999998173252038, iteration: 77803
loss: 1.0106213092803955,grad_norm: 0.9999990384233927, iteration: 77804
loss: 1.3598922491073608,grad_norm: 0.9999996576469843, iteration: 77805
loss: 0.9795835614204407,grad_norm: 0.9194477747901891, iteration: 77806
loss: 0.9927341938018799,grad_norm: 0.9438401880662302, iteration: 77807
loss: 1.0614869594573975,grad_norm: 0.9999995182208841, iteration: 77808
loss: 1.2007982730865479,grad_norm: 1.000000058883183, iteration: 77809
loss: 1.0479907989501953,grad_norm: 0.9999996089205274, iteration: 77810
loss: 1.0004631280899048,grad_norm: 0.9999994677827133, iteration: 77811
loss: 0.9757208228111267,grad_norm: 0.9999995362885601, iteration: 77812
loss: 1.1412937641143799,grad_norm: 0.9999998711741328, iteration: 77813
loss: 0.99193274974823,grad_norm: 0.9999992137642897, iteration: 77814
loss: 1.0193126201629639,grad_norm: 0.9306665464050992, iteration: 77815
loss: 1.040274977684021,grad_norm: 0.9999995465493534, iteration: 77816
loss: 1.067845344543457,grad_norm: 0.9552304351488193, iteration: 77817
loss: 1.06015944480896,grad_norm: 0.9999992243265576, iteration: 77818
loss: 1.064156174659729,grad_norm: 0.9529892497408629, iteration: 77819
loss: 1.1468243598937988,grad_norm: 0.9999997137070618, iteration: 77820
loss: 1.0483906269073486,grad_norm: 0.9999993437621202, iteration: 77821
loss: 1.237391471862793,grad_norm: 1.0000000209885913, iteration: 77822
loss: 1.0003573894500732,grad_norm: 0.9999995867846099, iteration: 77823
loss: 1.0386046171188354,grad_norm: 0.9999999510004576, iteration: 77824
loss: 1.0209447145462036,grad_norm: 0.9984362783391963, iteration: 77825
loss: 0.9913811087608337,grad_norm: 0.9091540354256813, iteration: 77826
loss: 1.0280498266220093,grad_norm: 0.9307293565636343, iteration: 77827
loss: 1.0165170431137085,grad_norm: 0.9028858528878689, iteration: 77828
loss: 1.0938923358917236,grad_norm: 0.999999785988034, iteration: 77829
loss: 1.047189474105835,grad_norm: 0.9999997067417796, iteration: 77830
loss: 1.204154133796692,grad_norm: 0.9999998117645527, iteration: 77831
loss: 0.995051920413971,grad_norm: 0.9999991826622028, iteration: 77832
loss: 1.0554782152175903,grad_norm: 0.9999996255327644, iteration: 77833
loss: 1.0472323894500732,grad_norm: 0.9999998086204246, iteration: 77834
loss: 0.9731093049049377,grad_norm: 0.9999995215211838, iteration: 77835
loss: 0.9901765584945679,grad_norm: 0.9764602357519339, iteration: 77836
loss: 1.0585529804229736,grad_norm: 0.9999997284209456, iteration: 77837
loss: 1.0843284130096436,grad_norm: 0.999999208424781, iteration: 77838
loss: 1.0342425107955933,grad_norm: 0.9999994453751201, iteration: 77839
loss: 1.0960837602615356,grad_norm: 0.9999995075941904, iteration: 77840
loss: 0.980503261089325,grad_norm: 0.9077617815451476, iteration: 77841
loss: 1.045714259147644,grad_norm: 0.9999996821869571, iteration: 77842
loss: 0.9966893196105957,grad_norm: 0.9999998264698526, iteration: 77843
loss: 1.052638053894043,grad_norm: 0.9999991815407107, iteration: 77844
loss: 1.0097458362579346,grad_norm: 0.9999995884245791, iteration: 77845
loss: 1.0693120956420898,grad_norm: 0.9844794206363302, iteration: 77846
loss: 0.9853200912475586,grad_norm: 0.9999992018688807, iteration: 77847
loss: 1.030258297920227,grad_norm: 0.9217763264361327, iteration: 77848
loss: 0.9944661259651184,grad_norm: 0.9377522488163519, iteration: 77849
loss: 1.0174626111984253,grad_norm: 0.9999999137120825, iteration: 77850
loss: 1.0852375030517578,grad_norm: 0.9999993466572757, iteration: 77851
loss: 1.0006564855575562,grad_norm: 0.9999993384561743, iteration: 77852
loss: 1.0194936990737915,grad_norm: 0.9540788489091934, iteration: 77853
loss: 1.1783140897750854,grad_norm: 0.9999997882253356, iteration: 77854
loss: 1.0537889003753662,grad_norm: 0.8480362308188255, iteration: 77855
loss: 1.043835163116455,grad_norm: 0.9999994871540021, iteration: 77856
loss: 1.0112481117248535,grad_norm: 0.889445796108688, iteration: 77857
loss: 1.0095453262329102,grad_norm: 0.9933566347419971, iteration: 77858
loss: 1.0429182052612305,grad_norm: 0.9999998868142043, iteration: 77859
loss: 1.0381015539169312,grad_norm: 0.999999226932286, iteration: 77860
loss: 1.0849021673202515,grad_norm: 0.9999996036531833, iteration: 77861
loss: 1.0470201969146729,grad_norm: 0.9999992300106029, iteration: 77862
loss: 1.059775710105896,grad_norm: 0.9999995304091072, iteration: 77863
loss: 1.0466967821121216,grad_norm: 0.9999992492331574, iteration: 77864
loss: 0.983340322971344,grad_norm: 0.9929065712227931, iteration: 77865
loss: 1.1518845558166504,grad_norm: 0.9999997589977643, iteration: 77866
loss: 1.0895276069641113,grad_norm: 0.9999989928572206, iteration: 77867
loss: 0.9514845013618469,grad_norm: 0.8927861028152491, iteration: 77868
loss: 1.0386096239089966,grad_norm: 0.9999992055292428, iteration: 77869
loss: 1.0246363878250122,grad_norm: 0.9224477123353705, iteration: 77870
loss: 0.9910414814949036,grad_norm: 0.999999085028398, iteration: 77871
loss: 0.9966839551925659,grad_norm: 0.9084495322291489, iteration: 77872
loss: 0.9825693964958191,grad_norm: 0.9287350618694934, iteration: 77873
loss: 1.017774224281311,grad_norm: 0.9999994450745958, iteration: 77874
loss: 0.9880374073982239,grad_norm: 0.9970555348148719, iteration: 77875
loss: 0.9722225069999695,grad_norm: 0.9067225144851621, iteration: 77876
loss: 0.977608859539032,grad_norm: 0.9999990381951427, iteration: 77877
loss: 1.0834224224090576,grad_norm: 0.9999995552783505, iteration: 77878
loss: 1.0201237201690674,grad_norm: 0.9534743745771765, iteration: 77879
loss: 1.0045156478881836,grad_norm: 0.9360229200503567, iteration: 77880
loss: 1.0076168775558472,grad_norm: 0.9430930801857115, iteration: 77881
loss: 1.0217205286026,grad_norm: 0.9999995786549838, iteration: 77882
loss: 1.0448033809661865,grad_norm: 0.9999989624796941, iteration: 77883
loss: 0.9765396118164062,grad_norm: 0.9999998938710901, iteration: 77884
loss: 1.1623663902282715,grad_norm: 0.9999994645204566, iteration: 77885
loss: 0.9974111914634705,grad_norm: 0.9205577808401545, iteration: 77886
loss: 1.01641047000885,grad_norm: 0.9999992268056793, iteration: 77887
loss: 1.001994013786316,grad_norm: 0.8575567698920596, iteration: 77888
loss: 0.9840744733810425,grad_norm: 0.8224105575944801, iteration: 77889
loss: 0.9781568050384521,grad_norm: 0.7599714331018141, iteration: 77890
loss: 0.9813334345817566,grad_norm: 0.8013762092621503, iteration: 77891
loss: 1.0286988019943237,grad_norm: 0.9635879454047952, iteration: 77892
loss: 1.058030605316162,grad_norm: 0.9999998253982, iteration: 77893
loss: 0.9913011193275452,grad_norm: 0.9999990929209897, iteration: 77894
loss: 1.032509684562683,grad_norm: 0.8807373515095852, iteration: 77895
loss: 1.0951879024505615,grad_norm: 0.9999990812366722, iteration: 77896
loss: 1.1955524682998657,grad_norm: 0.9999998625915301, iteration: 77897
loss: 1.0609763860702515,grad_norm: 0.9999994660456544, iteration: 77898
loss: 1.0332601070404053,grad_norm: 0.9999992869864431, iteration: 77899
loss: 1.0360718965530396,grad_norm: 0.9999990655014914, iteration: 77900
loss: 1.0012327432632446,grad_norm: 0.8919842018520565, iteration: 77901
loss: 1.0243875980377197,grad_norm: 0.9856668389930782, iteration: 77902
loss: 0.9859209656715393,grad_norm: 0.9999991057393748, iteration: 77903
loss: 1.025952696800232,grad_norm: 0.9999997543730517, iteration: 77904
loss: 1.0326043367385864,grad_norm: 0.9999995974468384, iteration: 77905
loss: 0.9904890656471252,grad_norm: 0.999999419554985, iteration: 77906
loss: 1.0242172479629517,grad_norm: 0.9999994423571542, iteration: 77907
loss: 1.017323613166809,grad_norm: 0.9999991776022112, iteration: 77908
loss: 1.0506107807159424,grad_norm: 0.9999994943891072, iteration: 77909
loss: 1.121524691581726,grad_norm: 0.9999998228713021, iteration: 77910
loss: 1.0654021501541138,grad_norm: 0.9999991591230862, iteration: 77911
loss: 1.0905836820602417,grad_norm: 0.9999994604527116, iteration: 77912
loss: 1.2299777269363403,grad_norm: 0.9999993998133975, iteration: 77913
loss: 1.0952231884002686,grad_norm: 0.9999996233899211, iteration: 77914
loss: 1.1506420373916626,grad_norm: 0.9999999115792224, iteration: 77915
loss: 1.008234977722168,grad_norm: 0.9999990617721172, iteration: 77916
loss: 1.0445451736450195,grad_norm: 0.9766266570923955, iteration: 77917
loss: 1.0592831373214722,grad_norm: 0.9999994701045931, iteration: 77918
loss: 0.9714897274971008,grad_norm: 0.9999992252486096, iteration: 77919
loss: 0.9674999117851257,grad_norm: 0.9999994025389765, iteration: 77920
loss: 1.0708822011947632,grad_norm: 0.9999994774320655, iteration: 77921
loss: 1.1398723125457764,grad_norm: 0.9999999202663998, iteration: 77922
loss: 1.0551966428756714,grad_norm: 0.9999998187162192, iteration: 77923
loss: 1.1845064163208008,grad_norm: 0.999999712282671, iteration: 77924
loss: 0.9510756134986877,grad_norm: 0.9999991879361974, iteration: 77925
loss: 1.0161868333816528,grad_norm: 0.9999998714207106, iteration: 77926
loss: 0.9864888787269592,grad_norm: 0.9999992018055891, iteration: 77927
loss: 0.9999054670333862,grad_norm: 0.928321607895211, iteration: 77928
loss: 1.0300672054290771,grad_norm: 0.9999996041982983, iteration: 77929
loss: 1.0434882640838623,grad_norm: 0.9999996307356032, iteration: 77930
loss: 1.091805100440979,grad_norm: 0.9999993653840206, iteration: 77931
loss: 1.0448403358459473,grad_norm: 0.9999993408136809, iteration: 77932
loss: 1.0273547172546387,grad_norm: 0.9999992551881712, iteration: 77933
loss: 0.9772521257400513,grad_norm: 0.8030386416677258, iteration: 77934
loss: 1.1005350351333618,grad_norm: 0.9999993014008537, iteration: 77935
loss: 1.1838526725769043,grad_norm: 0.9999997136107845, iteration: 77936
loss: 0.9925925135612488,grad_norm: 0.8183023569389994, iteration: 77937
loss: 0.9923887848854065,grad_norm: 0.9999990166923564, iteration: 77938
loss: 1.113275408744812,grad_norm: 0.9999999323481026, iteration: 77939
loss: 1.0186489820480347,grad_norm: 0.9999992901298363, iteration: 77940
loss: 1.0147120952606201,grad_norm: 1.0000000193712706, iteration: 77941
loss: 1.0506012439727783,grad_norm: 0.9999995572120163, iteration: 77942
loss: 1.1260613203048706,grad_norm: 0.9999998103194934, iteration: 77943
loss: 1.1655263900756836,grad_norm: 0.9999997952298955, iteration: 77944
loss: 0.9662024974822998,grad_norm: 0.9999992920472097, iteration: 77945
loss: 1.0157746076583862,grad_norm: 0.9999997395497349, iteration: 77946
loss: 1.1624253988265991,grad_norm: 0.9999992123741148, iteration: 77947
loss: 1.3092974424362183,grad_norm: 0.999999965895143, iteration: 77948
loss: 1.082602858543396,grad_norm: 0.9999991805225203, iteration: 77949
loss: 1.1041334867477417,grad_norm: 0.9999990623769837, iteration: 77950
loss: 1.0772439241409302,grad_norm: 0.9999990202162088, iteration: 77951
loss: 1.0096718072891235,grad_norm: 0.8764797227636996, iteration: 77952
loss: 1.0559988021850586,grad_norm: 0.9999998646379343, iteration: 77953
loss: 1.177093744277954,grad_norm: 0.9999992039626835, iteration: 77954
loss: 1.0327316522598267,grad_norm: 0.9832000404445336, iteration: 77955
loss: 0.9574326872825623,grad_norm: 0.9960253992115048, iteration: 77956
loss: 1.0856767892837524,grad_norm: 0.9999990527513757, iteration: 77957
loss: 1.0870574712753296,grad_norm: 0.9999998686674638, iteration: 77958
loss: 1.2389377355575562,grad_norm: 0.9999999976563984, iteration: 77959
loss: 1.1326324939727783,grad_norm: 0.9999999637830913, iteration: 77960
loss: 1.3104243278503418,grad_norm: 0.999999954160426, iteration: 77961
loss: 1.2053496837615967,grad_norm: 0.9999997378726405, iteration: 77962
loss: 1.2388458251953125,grad_norm: 0.9999993808025955, iteration: 77963
loss: 1.0073271989822388,grad_norm: 0.944113115507934, iteration: 77964
loss: 1.0940369367599487,grad_norm: 0.9999990380160263, iteration: 77965
loss: 1.0554232597351074,grad_norm: 0.8062001321373193, iteration: 77966
loss: 0.99387127161026,grad_norm: 0.8992992889406973, iteration: 77967
loss: 0.986765444278717,grad_norm: 0.832718826583414, iteration: 77968
loss: 1.0631214380264282,grad_norm: 0.9999994641333828, iteration: 77969
loss: 1.1833157539367676,grad_norm: 0.9999999064887133, iteration: 77970
loss: 0.9921755194664001,grad_norm: 0.9999990705538335, iteration: 77971
loss: 1.0097630023956299,grad_norm: 0.999999047395959, iteration: 77972
loss: 0.9869883060455322,grad_norm: 0.9281253786301593, iteration: 77973
loss: 1.1237157583236694,grad_norm: 0.9999998456938097, iteration: 77974
loss: 1.1343399286270142,grad_norm: 0.9999995403541636, iteration: 77975
loss: 1.0035330057144165,grad_norm: 0.9999991087745891, iteration: 77976
loss: 1.0073902606964111,grad_norm: 0.8098178952274201, iteration: 77977
loss: 1.0872199535369873,grad_norm: 0.9999991383759307, iteration: 77978
loss: 1.082314372062683,grad_norm: 0.9999994607088225, iteration: 77979
loss: 1.1327003240585327,grad_norm: 0.9999996336396878, iteration: 77980
loss: 1.1908526420593262,grad_norm: 0.9999998187538196, iteration: 77981
loss: 1.0387210845947266,grad_norm: 0.9999995625674958, iteration: 77982
loss: 1.2069323062896729,grad_norm: 0.9999997879898082, iteration: 77983
loss: 1.1707788705825806,grad_norm: 0.9999999428135359, iteration: 77984
loss: 1.2214758396148682,grad_norm: 0.999999702770165, iteration: 77985
loss: 1.2271368503570557,grad_norm: 0.9999993799517825, iteration: 77986
loss: 1.0927823781967163,grad_norm: 0.9999993306207516, iteration: 77987
loss: 1.2639176845550537,grad_norm: 0.9999995204777693, iteration: 77988
loss: 1.284473180770874,grad_norm: 0.9999996722672573, iteration: 77989
loss: 1.1913871765136719,grad_norm: 0.9999998847859882, iteration: 77990
loss: 1.1248867511749268,grad_norm: 0.9999996286525077, iteration: 77991
loss: 1.3111666440963745,grad_norm: 0.999999627892835, iteration: 77992
loss: 1.2853997945785522,grad_norm: 0.9999998970391842, iteration: 77993
loss: 1.5866326093673706,grad_norm: 1.0000000588373197, iteration: 77994
loss: 1.2914243936538696,grad_norm: 0.999999739534834, iteration: 77995
loss: 1.252798318862915,grad_norm: 0.9999996338370778, iteration: 77996
loss: 1.4330095052719116,grad_norm: 0.999999869807831, iteration: 77997
loss: 1.1998810768127441,grad_norm: 0.9999995292644169, iteration: 77998
loss: 1.1395162343978882,grad_norm: 0.9999998335937009, iteration: 77999
loss: 1.070143699645996,grad_norm: 0.9570256908989818, iteration: 78000
loss: 1.0366463661193848,grad_norm: 0.9999990444273883, iteration: 78001
loss: 1.1347614526748657,grad_norm: 0.9999994419193602, iteration: 78002
loss: 1.0202343463897705,grad_norm: 0.9999990948997376, iteration: 78003
loss: 1.1714215278625488,grad_norm: 0.9999998059986686, iteration: 78004
loss: 1.0341747999191284,grad_norm: 0.9999994526518917, iteration: 78005
loss: 1.0455451011657715,grad_norm: 0.9820202900284041, iteration: 78006
loss: 1.0743660926818848,grad_norm: 0.9999990480644709, iteration: 78007
loss: 1.0146598815917969,grad_norm: 0.9999991905127026, iteration: 78008
loss: 1.0242465734481812,grad_norm: 0.9999996923281594, iteration: 78009
loss: 1.047238826751709,grad_norm: 0.9914552796577558, iteration: 78010
loss: 1.1039217710494995,grad_norm: 0.999999739013941, iteration: 78011
loss: 1.0652847290039062,grad_norm: 1.0000000544420127, iteration: 78012
loss: 1.1169500350952148,grad_norm: 0.9877291725263344, iteration: 78013
loss: 0.9955662488937378,grad_norm: 0.9533419043618678, iteration: 78014
loss: 1.0975379943847656,grad_norm: 0.9999998755145739, iteration: 78015
loss: 0.998288631439209,grad_norm: 0.9999989427567761, iteration: 78016
loss: 1.187179684638977,grad_norm: 0.9999994253277914, iteration: 78017
loss: 1.1168484687805176,grad_norm: 0.9999992200379757, iteration: 78018
loss: 1.1726195812225342,grad_norm: 0.9999999740002816, iteration: 78019
loss: 1.0633265972137451,grad_norm: 0.999999969968621, iteration: 78020
loss: 1.017899751663208,grad_norm: 0.9999999172855463, iteration: 78021
loss: 1.189473032951355,grad_norm: 0.9999998917513093, iteration: 78022
loss: 1.1004661321640015,grad_norm: 0.9999996711561685, iteration: 78023
loss: 1.2110984325408936,grad_norm: 0.9999999836972356, iteration: 78024
loss: 1.0976954698562622,grad_norm: 0.9999997559525987, iteration: 78025
loss: 1.2176541090011597,grad_norm: 0.9999999903293213, iteration: 78026
loss: 1.0318936109542847,grad_norm: 0.9999997457922856, iteration: 78027
loss: 1.0478142499923706,grad_norm: 0.999999779480688, iteration: 78028
loss: 1.1029853820800781,grad_norm: 0.9999998238433498, iteration: 78029
loss: 1.0413612127304077,grad_norm: 0.9999994717975714, iteration: 78030
loss: 1.0004935264587402,grad_norm: 0.8801656254541246, iteration: 78031
loss: 1.185151219367981,grad_norm: 0.9999998060139237, iteration: 78032
loss: 1.1461209058761597,grad_norm: 0.9999995898446554, iteration: 78033
loss: 1.126643180847168,grad_norm: 0.9999995700635195, iteration: 78034
loss: 1.077214002609253,grad_norm: 0.9999999341470971, iteration: 78035
loss: 1.0628681182861328,grad_norm: 0.9999998876937899, iteration: 78036
loss: 1.1883361339569092,grad_norm: 0.9999999629928352, iteration: 78037
loss: 1.0996590852737427,grad_norm: 0.9999991831306713, iteration: 78038
loss: 1.271968126296997,grad_norm: 0.9999997773567331, iteration: 78039
loss: 1.1048636436462402,grad_norm: 0.9999996018405003, iteration: 78040
loss: 1.0336812734603882,grad_norm: 0.9999994657821848, iteration: 78041
loss: 1.1107405424118042,grad_norm: 0.9999997937625892, iteration: 78042
loss: 1.5211788415908813,grad_norm: 0.9999999213524505, iteration: 78043
loss: 1.153833031654358,grad_norm: 0.9999996038808643, iteration: 78044
loss: 1.009324312210083,grad_norm: 0.9999999373005137, iteration: 78045
loss: 1.075837254524231,grad_norm: 0.9999998739169026, iteration: 78046
loss: 1.1294785737991333,grad_norm: 0.9999995468954737, iteration: 78047
loss: 1.0727722644805908,grad_norm: 0.9999997807670705, iteration: 78048
loss: 1.096010446548462,grad_norm: 0.9999996363726641, iteration: 78049
loss: 1.0902409553527832,grad_norm: 0.9999996569412221, iteration: 78050
loss: 1.04568612575531,grad_norm: 0.9999994304568167, iteration: 78051
loss: 1.0445681810379028,grad_norm: 0.9999998897942042, iteration: 78052
loss: 1.0264064073562622,grad_norm: 0.9999990846043638, iteration: 78053
loss: 1.1506742238998413,grad_norm: 1.0000000163099536, iteration: 78054
loss: 1.1391547918319702,grad_norm: 0.9943700495665316, iteration: 78055
loss: 1.0747220516204834,grad_norm: 0.9999997970643331, iteration: 78056
loss: 1.1899207830429077,grad_norm: 0.999999477399072, iteration: 78057
loss: 1.0347251892089844,grad_norm: 0.9999991913693421, iteration: 78058
loss: 1.1759285926818848,grad_norm: 0.99999952426717, iteration: 78059
loss: 1.1597588062286377,grad_norm: 0.9999996202556339, iteration: 78060
loss: 1.0578467845916748,grad_norm: 0.9999996092400707, iteration: 78061
loss: 1.1736502647399902,grad_norm: 0.9999993412147522, iteration: 78062
loss: 1.014702320098877,grad_norm: 0.9667953788995775, iteration: 78063
loss: 1.0741333961486816,grad_norm: 0.9999993207440305, iteration: 78064
loss: 1.0851824283599854,grad_norm: 0.9999995229634335, iteration: 78065
loss: 1.017940878868103,grad_norm: 0.9999999245669018, iteration: 78066
loss: 1.1519520282745361,grad_norm: 0.9999996509626682, iteration: 78067
loss: 1.1613061428070068,grad_norm: 0.9999992692323965, iteration: 78068
loss: 1.135053277015686,grad_norm: 0.9999996814760485, iteration: 78069
loss: 1.2830911874771118,grad_norm: 0.9999998390734548, iteration: 78070
loss: 1.062071442604065,grad_norm: 0.9999991313531141, iteration: 78071
loss: 1.2318590879440308,grad_norm: 0.9999999724939181, iteration: 78072
loss: 1.158263087272644,grad_norm: 0.9999999681152775, iteration: 78073
loss: 1.0320806503295898,grad_norm: 0.9999990254580894, iteration: 78074
loss: 1.3510137796401978,grad_norm: 0.9999999835132942, iteration: 78075
loss: 1.0930202007293701,grad_norm: 0.9999991364655776, iteration: 78076
loss: 1.1140803098678589,grad_norm: 0.973310933848337, iteration: 78077
loss: 1.0326545238494873,grad_norm: 0.9999996492466469, iteration: 78078
loss: 1.1548435688018799,grad_norm: 0.99999963798678, iteration: 78079
loss: 1.0276662111282349,grad_norm: 0.9999996138916681, iteration: 78080
loss: 1.0630557537078857,grad_norm: 0.9999996661256925, iteration: 78081
loss: 1.0612539052963257,grad_norm: 0.9999994545983482, iteration: 78082
loss: 1.2306276559829712,grad_norm: 0.9999999835858976, iteration: 78083
loss: 1.251538634300232,grad_norm: 0.9999994544424488, iteration: 78084
loss: 1.0521087646484375,grad_norm: 0.9999995045297089, iteration: 78085
loss: 1.0794075727462769,grad_norm: 0.999999215954165, iteration: 78086
loss: 1.0930347442626953,grad_norm: 0.999999211704393, iteration: 78087
loss: 0.9904969334602356,grad_norm: 0.8552975054950451, iteration: 78088
loss: 1.0072251558303833,grad_norm: 0.9999997494579766, iteration: 78089
loss: 1.003929853439331,grad_norm: 0.8935193016139916, iteration: 78090
loss: 1.0846247673034668,grad_norm: 0.9999990551704708, iteration: 78091
loss: 1.0115917921066284,grad_norm: 0.9999990780420549, iteration: 78092
loss: 1.1121565103530884,grad_norm: 0.999999543424425, iteration: 78093
loss: 1.131446123123169,grad_norm: 0.9999998094377617, iteration: 78094
loss: 1.0110560655593872,grad_norm: 0.9368726838447946, iteration: 78095
loss: 1.0026741027832031,grad_norm: 0.8250097644967191, iteration: 78096
loss: 0.9912948608398438,grad_norm: 0.999999212907455, iteration: 78097
loss: 1.1852455139160156,grad_norm: 0.9999991092448424, iteration: 78098
loss: 1.0564593076705933,grad_norm: 0.9999990728514742, iteration: 78099
loss: 1.2398263216018677,grad_norm: 1.0000000013012775, iteration: 78100
loss: 1.0788072347640991,grad_norm: 0.9999999517756014, iteration: 78101
loss: 1.2016782760620117,grad_norm: 0.9999999756066758, iteration: 78102
loss: 1.1435939073562622,grad_norm: 0.9999992306400892, iteration: 78103
loss: 1.1618763208389282,grad_norm: 0.9999994043843027, iteration: 78104
loss: 1.0737701654434204,grad_norm: 0.9999995558613722, iteration: 78105
loss: 1.030554175376892,grad_norm: 0.9999990304102977, iteration: 78106
loss: 1.0757158994674683,grad_norm: 0.978485523143784, iteration: 78107
loss: 1.0485893487930298,grad_norm: 0.9999992670495842, iteration: 78108
loss: 1.5123114585876465,grad_norm: 0.9999997580017821, iteration: 78109
loss: 1.0370742082595825,grad_norm: 0.9999995159690872, iteration: 78110
loss: 1.1313812732696533,grad_norm: 0.9999996923386307, iteration: 78111
loss: 1.3730926513671875,grad_norm: 0.9999998052577989, iteration: 78112
loss: 1.0554107427597046,grad_norm: 0.9999991047490928, iteration: 78113
loss: 1.1214221715927124,grad_norm: 0.9999992276695189, iteration: 78114
loss: 1.0655068159103394,grad_norm: 0.9999994270682743, iteration: 78115
loss: 1.1740872859954834,grad_norm: 0.9999994821029226, iteration: 78116
loss: 1.0154128074645996,grad_norm: 0.999999323180744, iteration: 78117
loss: 1.289268136024475,grad_norm: 0.9999998300698959, iteration: 78118
loss: 1.1622180938720703,grad_norm: 0.999999526404631, iteration: 78119
loss: 1.1614934206008911,grad_norm: 0.9999993641625899, iteration: 78120
loss: 1.0506011247634888,grad_norm: 0.9077309213829688, iteration: 78121
loss: 1.0036170482635498,grad_norm: 0.9999991390517811, iteration: 78122
loss: 1.165915608406067,grad_norm: 0.9999993960054278, iteration: 78123
loss: 1.0621689558029175,grad_norm: 0.9999991199919833, iteration: 78124
loss: 1.0510112047195435,grad_norm: 0.9999992512766999, iteration: 78125
loss: 1.227379560470581,grad_norm: 0.999998975776825, iteration: 78126
loss: 1.1318069696426392,grad_norm: 0.9999998788924622, iteration: 78127
loss: 1.2972549200057983,grad_norm: 0.9999995829480788, iteration: 78128
loss: 1.0338337421417236,grad_norm: 0.9999990542248456, iteration: 78129
loss: 1.0365855693817139,grad_norm: 0.988658349521669, iteration: 78130
loss: 1.075662612915039,grad_norm: 0.8705341874843033, iteration: 78131
loss: 1.0402779579162598,grad_norm: 0.9228389092580432, iteration: 78132
loss: 1.2994587421417236,grad_norm: 0.999999807107563, iteration: 78133
loss: 0.9705691337585449,grad_norm: 0.8778598144068687, iteration: 78134
loss: 1.0361655950546265,grad_norm: 0.9999996462739575, iteration: 78135
loss: 1.311427116394043,grad_norm: 0.9999996273217254, iteration: 78136
loss: 1.001932144165039,grad_norm: 0.9999995649241337, iteration: 78137
loss: 1.0361824035644531,grad_norm: 0.981047285928353, iteration: 78138
loss: 1.1047394275665283,grad_norm: 0.8703750385046747, iteration: 78139
loss: 1.0047916173934937,grad_norm: 0.8992996162873276, iteration: 78140
loss: 1.0012507438659668,grad_norm: 0.9537948835551349, iteration: 78141
loss: 1.0429784059524536,grad_norm: 0.9200085261583647, iteration: 78142
loss: 1.0228339433670044,grad_norm: 0.9627628503849287, iteration: 78143
loss: 1.0664278268814087,grad_norm: 0.999999816285216, iteration: 78144
loss: 1.1333675384521484,grad_norm: 0.9741731181063654, iteration: 78145
loss: 1.3665698766708374,grad_norm: 0.999999533287151, iteration: 78146
loss: 1.0292599201202393,grad_norm: 0.9999993006001501, iteration: 78147
loss: 1.072274923324585,grad_norm: 0.8947436585554718, iteration: 78148
loss: 1.0748755931854248,grad_norm: 0.9999992257053549, iteration: 78149
loss: 1.0942096710205078,grad_norm: 0.9999993433473711, iteration: 78150
loss: 1.1002962589263916,grad_norm: 0.9999995217052569, iteration: 78151
loss: 1.0341449975967407,grad_norm: 0.8757359527265995, iteration: 78152
loss: 1.0814684629440308,grad_norm: 0.9999995101648618, iteration: 78153
loss: 0.9974145293235779,grad_norm: 0.9380528634229178, iteration: 78154
loss: 1.2940455675125122,grad_norm: 0.9999996414572259, iteration: 78155
loss: 1.0513874292373657,grad_norm: 0.9038236973017275, iteration: 78156
loss: 1.0607190132141113,grad_norm: 0.9999991719382572, iteration: 78157
loss: 0.9686927199363708,grad_norm: 0.9168081630237264, iteration: 78158
loss: 1.0134727954864502,grad_norm: 0.9999991140559946, iteration: 78159
loss: 1.0270975828170776,grad_norm: 0.9264300123299917, iteration: 78160
loss: 1.1253968477249146,grad_norm: 0.9999998705774464, iteration: 78161
loss: 1.0017832517623901,grad_norm: 0.9999999858233174, iteration: 78162
loss: 1.0758912563323975,grad_norm: 0.9999992113414589, iteration: 78163
loss: 0.9874894618988037,grad_norm: 0.9884640378146845, iteration: 78164
loss: 1.02439284324646,grad_norm: 0.9999994976708788, iteration: 78165
loss: 1.182881474494934,grad_norm: 0.9999992699644228, iteration: 78166
loss: 1.0035672187805176,grad_norm: 0.8801373864420329, iteration: 78167
loss: 1.0342419147491455,grad_norm: 0.9999991409117899, iteration: 78168
loss: 1.034852147102356,grad_norm: 0.9999992359381569, iteration: 78169
loss: 0.9939754605293274,grad_norm: 0.9254017598753981, iteration: 78170
loss: 1.0788230895996094,grad_norm: 0.999999659297421, iteration: 78171
loss: 1.0361354351043701,grad_norm: 0.9999996683646721, iteration: 78172
loss: 1.0399233102798462,grad_norm: 0.9999996255299892, iteration: 78173
loss: 1.013750433921814,grad_norm: 0.9035434188530672, iteration: 78174
loss: 1.108362078666687,grad_norm: 0.9999991290455097, iteration: 78175
loss: 1.1860953569412231,grad_norm: 0.999999144280733, iteration: 78176
loss: 1.0929830074310303,grad_norm: 0.9999990922434031, iteration: 78177
loss: 0.9866393804550171,grad_norm: 0.8482441051537043, iteration: 78178
loss: 1.0709352493286133,grad_norm: 0.9193864126805598, iteration: 78179
loss: 1.0691187381744385,grad_norm: 0.9999991535381134, iteration: 78180
loss: 0.9983699917793274,grad_norm: 0.8911688929661454, iteration: 78181
loss: 1.0598247051239014,grad_norm: 0.9999996975935562, iteration: 78182
loss: 0.9953211545944214,grad_norm: 0.9999999275176302, iteration: 78183
loss: 1.1176999807357788,grad_norm: 0.9999996665072922, iteration: 78184
loss: 0.9900401830673218,grad_norm: 0.9831187233863289, iteration: 78185
loss: 1.0154340267181396,grad_norm: 0.8263653107706748, iteration: 78186
loss: 1.0410425662994385,grad_norm: 0.9999993440657171, iteration: 78187
loss: 1.120259165763855,grad_norm: 0.9999993704146645, iteration: 78188
loss: 1.0751643180847168,grad_norm: 0.9999998313590086, iteration: 78189
loss: 1.0707404613494873,grad_norm: 0.9999996714936616, iteration: 78190
loss: 1.1922599077224731,grad_norm: 0.9999997741609958, iteration: 78191
loss: 1.2860400676727295,grad_norm: 0.9999996029622324, iteration: 78192
loss: 1.035365104675293,grad_norm: 0.9999997837898876, iteration: 78193
loss: 1.001072645187378,grad_norm: 0.9741522641428766, iteration: 78194
loss: 1.092901349067688,grad_norm: 0.9999999161931218, iteration: 78195
loss: 1.1875693798065186,grad_norm: 0.9999992441215922, iteration: 78196
loss: 1.0797502994537354,grad_norm: 0.9999993815880414, iteration: 78197
loss: 1.0873368978500366,grad_norm: 0.999999355158974, iteration: 78198
loss: 1.054439663887024,grad_norm: 0.9999999366895802, iteration: 78199
loss: 1.1896591186523438,grad_norm: 0.9999997981300416, iteration: 78200
loss: 1.1646617650985718,grad_norm: 0.9999994942013012, iteration: 78201
loss: 1.2988709211349487,grad_norm: 0.9999999059714914, iteration: 78202
loss: 1.437975525856018,grad_norm: 0.99999973618269, iteration: 78203
loss: 1.5566928386688232,grad_norm: 0.9999998566303372, iteration: 78204
loss: 1.634850263595581,grad_norm: 0.9999997587839958, iteration: 78205
loss: 1.6511956453323364,grad_norm: 0.9999994583463128, iteration: 78206
loss: 1.803712010383606,grad_norm: 0.9999998998717454, iteration: 78207
loss: 1.84270441532135,grad_norm: 0.9999998606945172, iteration: 78208
loss: 1.6775147914886475,grad_norm: 0.9999998713468834, iteration: 78209
loss: 1.4745030403137207,grad_norm: 0.9999998880355265, iteration: 78210
loss: 2.007179021835327,grad_norm: 0.9999997924291202, iteration: 78211
loss: 1.7371623516082764,grad_norm: 0.9999998147940782, iteration: 78212
loss: 1.6831694841384888,grad_norm: 0.9999998410918615, iteration: 78213
loss: 1.6300504207611084,grad_norm: 0.9999999233585422, iteration: 78214
loss: 1.6237560510635376,grad_norm: 0.9999996603812252, iteration: 78215
loss: 1.647182822227478,grad_norm: 0.9999998736781994, iteration: 78216
loss: 1.4162904024124146,grad_norm: 0.999999568611624, iteration: 78217
loss: 1.4845530986785889,grad_norm: 0.9999997552699007, iteration: 78218
loss: 1.777890920639038,grad_norm: 0.9999998925834548, iteration: 78219
loss: 1.6040769815444946,grad_norm: 0.9999998926104502, iteration: 78220
loss: 1.3136781454086304,grad_norm: 0.9999994500326352, iteration: 78221
loss: 1.1708743572235107,grad_norm: 0.9999992464482576, iteration: 78222
loss: 1.0427076816558838,grad_norm: 0.9999997717905443, iteration: 78223
loss: 0.997582197189331,grad_norm: 0.9999998968347411, iteration: 78224
loss: 0.9858225584030151,grad_norm: 0.9999992752134512, iteration: 78225
loss: 1.0990889072418213,grad_norm: 0.9999999112899733, iteration: 78226
loss: 1.2507070302963257,grad_norm: 0.9999997547519054, iteration: 78227
loss: 1.086501955986023,grad_norm: 0.9999991426300299, iteration: 78228
loss: 1.1908156871795654,grad_norm: 0.9999995526140845, iteration: 78229
loss: 1.1406599283218384,grad_norm: 0.9999992491186096, iteration: 78230
loss: 1.2998008728027344,grad_norm: 0.9999993573494734, iteration: 78231
loss: 1.3625298738479614,grad_norm: 0.9999998506792951, iteration: 78232
loss: 1.082467794418335,grad_norm: 0.9999992055448148, iteration: 78233
loss: 1.56313955783844,grad_norm: 0.9999998433538184, iteration: 78234
loss: 1.0871530771255493,grad_norm: 0.9704341729903819, iteration: 78235
loss: 1.4148560762405396,grad_norm: 0.9999997546875434, iteration: 78236
loss: 1.1668816804885864,grad_norm: 0.9999999336155031, iteration: 78237
loss: 1.2008938789367676,grad_norm: 0.9999994483451178, iteration: 78238
loss: 1.160259485244751,grad_norm: 0.9999995433586126, iteration: 78239
loss: 1.3032302856445312,grad_norm: 0.9999999778387126, iteration: 78240
loss: 1.066020131111145,grad_norm: 0.9999994338896556, iteration: 78241
loss: 1.4355192184448242,grad_norm: 0.9999994208603498, iteration: 78242
loss: 1.4459940195083618,grad_norm: 0.9999994679204609, iteration: 78243
loss: 1.1014796495437622,grad_norm: 0.9250373110478972, iteration: 78244
loss: 1.1773704290390015,grad_norm: 0.9999990900420072, iteration: 78245
loss: 1.410041093826294,grad_norm: 0.9999998020019412, iteration: 78246
loss: 1.2324745655059814,grad_norm: 0.9999997640794127, iteration: 78247
loss: 1.0232430696487427,grad_norm: 0.9999994125227841, iteration: 78248
loss: 1.203228235244751,grad_norm: 0.9999999714653757, iteration: 78249
loss: 1.1875523328781128,grad_norm: 0.9999992201878907, iteration: 78250
loss: 1.3209552764892578,grad_norm: 0.9999996355363163, iteration: 78251
loss: 1.1512105464935303,grad_norm: 0.9999997064634849, iteration: 78252
loss: 1.1872185468673706,grad_norm: 0.9999993263558773, iteration: 78253
loss: 1.139119029045105,grad_norm: 0.9999990958717808, iteration: 78254
loss: 1.1011382341384888,grad_norm: 0.9999996812086228, iteration: 78255
loss: 1.1987398862838745,grad_norm: 0.9999992151136304, iteration: 78256
loss: 1.053356647491455,grad_norm: 1.0000000700641118, iteration: 78257
loss: 1.1794688701629639,grad_norm: 0.9999993269650302, iteration: 78258
loss: 1.1687339544296265,grad_norm: 0.9999993868756422, iteration: 78259
loss: 1.1687265634536743,grad_norm: 0.9999999585161065, iteration: 78260
loss: 1.0937092304229736,grad_norm: 0.9999992621624257, iteration: 78261
loss: 1.0082987546920776,grad_norm: 0.9999994030968313, iteration: 78262
loss: 0.9666972160339355,grad_norm: 0.9999997809494265, iteration: 78263
loss: 1.1240723133087158,grad_norm: 0.9999998473352174, iteration: 78264
loss: 1.6393897533416748,grad_norm: 0.9999998258748349, iteration: 78265
loss: 1.0884970426559448,grad_norm: 0.9999995054523859, iteration: 78266
loss: 1.3254095315933228,grad_norm: 0.9999997147075355, iteration: 78267
loss: 1.2137521505355835,grad_norm: 0.999999385372525, iteration: 78268
loss: 1.3056867122650146,grad_norm: 0.9999996201942235, iteration: 78269
loss: 1.1068824529647827,grad_norm: 0.9999995399379732, iteration: 78270
loss: 1.055464506149292,grad_norm: 0.9999992281013946, iteration: 78271
loss: 1.1718205213546753,grad_norm: 0.999999439651222, iteration: 78272
loss: 1.191598653793335,grad_norm: 0.9999998698554458, iteration: 78273
loss: 1.1897053718566895,grad_norm: 0.9999998233624794, iteration: 78274
loss: 1.2641396522521973,grad_norm: 0.9999993321387547, iteration: 78275
loss: 1.1190173625946045,grad_norm: 0.9999997915835955, iteration: 78276
loss: 1.2994630336761475,grad_norm: 0.9999997967080563, iteration: 78277
loss: 1.1649513244628906,grad_norm: 0.9999998706680221, iteration: 78278
loss: 1.1676304340362549,grad_norm: 0.999999683207798, iteration: 78279
loss: 1.1303633451461792,grad_norm: 0.9999993030965675, iteration: 78280
loss: 1.137866497039795,grad_norm: 0.9999996080452703, iteration: 78281
loss: 1.1014305353164673,grad_norm: 0.9999994720758129, iteration: 78282
loss: 1.1232987642288208,grad_norm: 0.9999993535004407, iteration: 78283
loss: 1.0903587341308594,grad_norm: 0.999999273999403, iteration: 78284
loss: 1.1000852584838867,grad_norm: 0.9999990523235639, iteration: 78285
loss: 1.0418728590011597,grad_norm: 0.8728904997758747, iteration: 78286
loss: 1.0281561613082886,grad_norm: 0.9999991521173598, iteration: 78287
loss: 1.093259334564209,grad_norm: 0.9999997603015506, iteration: 78288
loss: 1.1220687627792358,grad_norm: 0.999999525280216, iteration: 78289
loss: 1.1244478225708008,grad_norm: 0.9999993431964025, iteration: 78290
loss: 1.2256726026535034,grad_norm: 0.9999992022702796, iteration: 78291
loss: 1.0349675416946411,grad_norm: 0.9999991904012737, iteration: 78292
loss: 1.0558146238327026,grad_norm: 0.9999992047184114, iteration: 78293
loss: 1.0530558824539185,grad_norm: 0.9999992599542334, iteration: 78294
loss: 1.1763533353805542,grad_norm: 0.9999999701970983, iteration: 78295
loss: 1.1223742961883545,grad_norm: 0.9999996840447041, iteration: 78296
loss: 1.14434814453125,grad_norm: 0.9999997217309099, iteration: 78297
loss: 1.182252049446106,grad_norm: 0.9999995325047929, iteration: 78298
loss: 1.1270668506622314,grad_norm: 0.999999470062926, iteration: 78299
loss: 1.009929895401001,grad_norm: 0.999999456376435, iteration: 78300
loss: 1.1023927927017212,grad_norm: 0.9999998771295805, iteration: 78301
loss: 1.2380870580673218,grad_norm: 0.9999998095248929, iteration: 78302
loss: 0.9851027131080627,grad_norm: 0.9519945303091537, iteration: 78303
loss: 0.9916657209396362,grad_norm: 0.999999119447554, iteration: 78304
loss: 1.0535566806793213,grad_norm: 0.9999993554245256, iteration: 78305
loss: 1.1344401836395264,grad_norm: 0.9999999147092454, iteration: 78306
loss: 1.1587780714035034,grad_norm: 0.999999807703785, iteration: 78307
loss: 1.1101740598678589,grad_norm: 0.999999060603039, iteration: 78308
loss: 1.0973165035247803,grad_norm: 0.9999997979078615, iteration: 78309
loss: 1.0860446691513062,grad_norm: 0.9999998414811817, iteration: 78310
loss: 1.040023684501648,grad_norm: 0.999999276918526, iteration: 78311
loss: 0.9941523671150208,grad_norm: 0.9999993984404633, iteration: 78312
loss: 0.9676879048347473,grad_norm: 0.82876911155823, iteration: 78313
loss: 1.0770626068115234,grad_norm: 0.9999995693417358, iteration: 78314
loss: 1.0126861333847046,grad_norm: 0.8540842829683211, iteration: 78315
loss: 0.9733797907829285,grad_norm: 0.9999992364236672, iteration: 78316
loss: 1.0937576293945312,grad_norm: 0.9999997108193921, iteration: 78317
loss: 1.0983657836914062,grad_norm: 0.9999994993925343, iteration: 78318
loss: 0.9786860942840576,grad_norm: 0.8688635791295781, iteration: 78319
loss: 1.0468195676803589,grad_norm: 0.9999993827828512, iteration: 78320
loss: 1.2801522016525269,grad_norm: 0.9999999106208576, iteration: 78321
loss: 1.006574034690857,grad_norm: 0.8259134101711686, iteration: 78322
loss: 0.9925307035446167,grad_norm: 0.9999996324478692, iteration: 78323
loss: 1.165332317352295,grad_norm: 0.9999994174580008, iteration: 78324
loss: 1.0803043842315674,grad_norm: 0.9473087232853282, iteration: 78325
loss: 1.0131863355636597,grad_norm: 0.8493100907517823, iteration: 78326
loss: 1.0485256910324097,grad_norm: 0.9633467056699577, iteration: 78327
loss: 1.0701885223388672,grad_norm: 0.999999685826435, iteration: 78328
loss: 1.0515531301498413,grad_norm: 0.9999996457492573, iteration: 78329
loss: 1.0363212823867798,grad_norm: 0.9999993708390603, iteration: 78330
loss: 1.0339833498001099,grad_norm: 0.9999998436409948, iteration: 78331
loss: 1.128068208694458,grad_norm: 0.9999993062160623, iteration: 78332
loss: 1.0531365871429443,grad_norm: 0.9999992063501534, iteration: 78333
loss: 1.124461054801941,grad_norm: 0.9999994778980061, iteration: 78334
loss: 1.0047111511230469,grad_norm: 0.9999997614257118, iteration: 78335
loss: 1.0559042692184448,grad_norm: 0.8826802625711682, iteration: 78336
loss: 1.046154260635376,grad_norm: 0.942017752008061, iteration: 78337
loss: 1.0470184087753296,grad_norm: 0.9946904169642866, iteration: 78338
loss: 1.0168066024780273,grad_norm: 0.9999994303527895, iteration: 78339
loss: 1.012857437133789,grad_norm: 0.8938690713229851, iteration: 78340
loss: 1.0045007467269897,grad_norm: 0.9252961242058606, iteration: 78341
loss: 1.0247212648391724,grad_norm: 0.9493978990253423, iteration: 78342
loss: 0.9817399978637695,grad_norm: 0.8503049793864365, iteration: 78343
loss: 0.9605433344841003,grad_norm: 0.9999991000607346, iteration: 78344
loss: 1.019901156425476,grad_norm: 0.9789435563087604, iteration: 78345
loss: 1.0332711935043335,grad_norm: 0.9999993561491993, iteration: 78346
loss: 1.0011647939682007,grad_norm: 0.9450608890183109, iteration: 78347
loss: 0.9944648146629333,grad_norm: 0.885686575099277, iteration: 78348
loss: 0.997719407081604,grad_norm: 0.9999991799256384, iteration: 78349
loss: 1.1633102893829346,grad_norm: 0.9999999403192251, iteration: 78350
loss: 1.0575193166732788,grad_norm: 0.9999994033350718, iteration: 78351
loss: 0.9760032892227173,grad_norm: 0.9989174078056086, iteration: 78352
loss: 1.004894733428955,grad_norm: 0.9999993021852495, iteration: 78353
loss: 1.097228765487671,grad_norm: 0.999999249198458, iteration: 78354
loss: 0.9659764766693115,grad_norm: 0.7900710075479281, iteration: 78355
loss: 1.2264506816864014,grad_norm: 0.9999997794757045, iteration: 78356
loss: 1.0285277366638184,grad_norm: 0.9999996949864922, iteration: 78357
loss: 1.0143368244171143,grad_norm: 0.9999989656427057, iteration: 78358
loss: 0.9896231889724731,grad_norm: 0.9999991876054553, iteration: 78359
loss: 0.9768434166908264,grad_norm: 0.9999995065921057, iteration: 78360
loss: 0.9982532858848572,grad_norm: 0.9343925513402791, iteration: 78361
loss: 1.0288597345352173,grad_norm: 0.9417446672531005, iteration: 78362
loss: 1.0983964204788208,grad_norm: 0.9999990411133004, iteration: 78363
loss: 0.9647773504257202,grad_norm: 0.8419081456268699, iteration: 78364
loss: 1.3787833452224731,grad_norm: 0.9999998058761675, iteration: 78365
loss: 1.037846565246582,grad_norm: 0.9647740835476896, iteration: 78366
loss: 1.0648223161697388,grad_norm: 0.9999991724902149, iteration: 78367
loss: 1.0280824899673462,grad_norm: 0.9999999304967202, iteration: 78368
loss: 1.0106241703033447,grad_norm: 0.9654661864418506, iteration: 78369
loss: 0.9994319081306458,grad_norm: 0.9547874765600519, iteration: 78370
loss: 1.0515761375427246,grad_norm: 0.9999989575276934, iteration: 78371
loss: 1.1254863739013672,grad_norm: 0.8841777443938278, iteration: 78372
loss: 1.0269640684127808,grad_norm: 0.9999994252688063, iteration: 78373
loss: 0.9552446603775024,grad_norm: 0.9999991306438435, iteration: 78374
loss: 1.0789201259613037,grad_norm: 0.8836451346255948, iteration: 78375
loss: 1.0618324279785156,grad_norm: 0.9999992794870152, iteration: 78376
loss: 1.0741004943847656,grad_norm: 0.9999992363127126, iteration: 78377
loss: 1.007584810256958,grad_norm: 0.9391433472205521, iteration: 78378
loss: 1.002971887588501,grad_norm: 0.9794963583129088, iteration: 78379
loss: 1.0070229768753052,grad_norm: 0.9999990475650423, iteration: 78380
loss: 1.0238513946533203,grad_norm: 0.9999990570619535, iteration: 78381
loss: 0.9976454973220825,grad_norm: 0.8880311205798624, iteration: 78382
loss: 1.0091105699539185,grad_norm: 0.9999995760576277, iteration: 78383
loss: 1.072950839996338,grad_norm: 0.9999991949337492, iteration: 78384
loss: 1.0335789918899536,grad_norm: 0.8179184231777383, iteration: 78385
loss: 0.9815849661827087,grad_norm: 0.9999993702499755, iteration: 78386
loss: 1.1903038024902344,grad_norm: 0.9999992595783372, iteration: 78387
loss: 1.1978062391281128,grad_norm: 0.999999440736346, iteration: 78388
loss: 1.0464541912078857,grad_norm: 0.8539985720373449, iteration: 78389
loss: 0.9770756959915161,grad_norm: 0.9999990549663457, iteration: 78390
loss: 1.006131649017334,grad_norm: 0.8053594723501275, iteration: 78391
loss: 0.9360124468803406,grad_norm: 0.9999990954893999, iteration: 78392
loss: 1.0059702396392822,grad_norm: 0.999999400436602, iteration: 78393
loss: 1.0701420307159424,grad_norm: 0.9565557792373333, iteration: 78394
loss: 1.226192593574524,grad_norm: 0.9999999970146274, iteration: 78395
loss: 0.9832900166511536,grad_norm: 0.9999990798191919, iteration: 78396
loss: 1.2499537467956543,grad_norm: 0.9999993973664256, iteration: 78397
loss: 1.0479122400283813,grad_norm: 0.9999998435975339, iteration: 78398
loss: 1.0073984861373901,grad_norm: 0.9999990968564436, iteration: 78399
loss: 0.9477056860923767,grad_norm: 0.839074923228327, iteration: 78400
loss: 1.010628342628479,grad_norm: 0.9463571983540375, iteration: 78401
loss: 1.0320215225219727,grad_norm: 0.9999995916083572, iteration: 78402
loss: 1.0324149131774902,grad_norm: 0.9417933416074691, iteration: 78403
loss: 1.0211130380630493,grad_norm: 0.9999989913414391, iteration: 78404
loss: 1.2169207334518433,grad_norm: 0.9999996749539928, iteration: 78405
loss: 0.9843704104423523,grad_norm: 0.9626719395349761, iteration: 78406
loss: 1.0502128601074219,grad_norm: 0.9999998559921243, iteration: 78407
loss: 1.0071183443069458,grad_norm: 0.9999990693139482, iteration: 78408
loss: 1.031988263130188,grad_norm: 0.8704389478411759, iteration: 78409
loss: 1.039335012435913,grad_norm: 0.9999990312664417, iteration: 78410
loss: 1.027486801147461,grad_norm: 0.9957135933281246, iteration: 78411
loss: 0.9495092034339905,grad_norm: 0.9380027155589183, iteration: 78412
loss: 1.0493040084838867,grad_norm: 0.9999996915420976, iteration: 78413
loss: 1.0141459703445435,grad_norm: 0.9999992147287562, iteration: 78414
loss: 0.9681713581085205,grad_norm: 0.9999992867425535, iteration: 78415
loss: 1.0026094913482666,grad_norm: 0.8905510572381821, iteration: 78416
loss: 0.9596737623214722,grad_norm: 0.8884056613901846, iteration: 78417
loss: 1.2576119899749756,grad_norm: 0.9999995912846137, iteration: 78418
loss: 1.0449780225753784,grad_norm: 0.9999990733117025, iteration: 78419
loss: 1.0209906101226807,grad_norm: 0.9999991419933256, iteration: 78420
loss: 1.0079593658447266,grad_norm: 0.8632593863929283, iteration: 78421
loss: 0.9747966527938843,grad_norm: 0.9999990016106826, iteration: 78422
loss: 1.0339235067367554,grad_norm: 0.9999990894056571, iteration: 78423
loss: 1.0268040895462036,grad_norm: 0.9311813004266426, iteration: 78424
loss: 1.0595802068710327,grad_norm: 0.9999999633013663, iteration: 78425
loss: 1.174487829208374,grad_norm: 0.9999991464907406, iteration: 78426
loss: 1.019370675086975,grad_norm: 0.9908089044779872, iteration: 78427
loss: 0.9565166234970093,grad_norm: 0.9769582608015734, iteration: 78428
loss: 1.1524178981781006,grad_norm: 0.9999997180105379, iteration: 78429
loss: 1.0078216791152954,grad_norm: 0.9227940498120145, iteration: 78430
loss: 1.1382865905761719,grad_norm: 0.9999996017964047, iteration: 78431
loss: 1.0169397592544556,grad_norm: 0.9999993184100071, iteration: 78432
loss: 0.9685534238815308,grad_norm: 0.9999990311648435, iteration: 78433
loss: 1.1871191263198853,grad_norm: 0.9999992659048402, iteration: 78434
loss: 0.9762839674949646,grad_norm: 0.9239025287300018, iteration: 78435
loss: 1.0188404321670532,grad_norm: 0.855895988518339, iteration: 78436
loss: 1.1386089324951172,grad_norm: 0.9999990708423556, iteration: 78437
loss: 1.0099101066589355,grad_norm: 0.9999990909812742, iteration: 78438
loss: 1.0765444040298462,grad_norm: 0.9999999782710961, iteration: 78439
loss: 0.9839332103729248,grad_norm: 0.9847417447113127, iteration: 78440
loss: 1.0323823690414429,grad_norm: 0.9999992413367641, iteration: 78441
loss: 1.0056098699569702,grad_norm: 0.9583241002449133, iteration: 78442
loss: 1.0136566162109375,grad_norm: 0.8814357239877795, iteration: 78443
loss: 1.0241016149520874,grad_norm: 0.9999994627897953, iteration: 78444
loss: 1.0334599018096924,grad_norm: 0.9999994325420171, iteration: 78445
loss: 1.08713698387146,grad_norm: 0.9999994443846899, iteration: 78446
loss: 1.0383014678955078,grad_norm: 0.9999991813876503, iteration: 78447
loss: 1.0756162405014038,grad_norm: 0.9764239335216248, iteration: 78448
loss: 1.1081620454788208,grad_norm: 1.0000000132729803, iteration: 78449
loss: 1.017664909362793,grad_norm: 0.8023291987762475, iteration: 78450
loss: 0.9854127764701843,grad_norm: 0.9648266641795356, iteration: 78451
loss: 1.2156654596328735,grad_norm: 0.999999347416626, iteration: 78452
loss: 0.9937246441841125,grad_norm: 0.8206616968621663, iteration: 78453
loss: 1.0118770599365234,grad_norm: 0.9962786562075133, iteration: 78454
loss: 1.0791667699813843,grad_norm: 0.8740561972148337, iteration: 78455
loss: 1.0063732862472534,grad_norm: 0.8415580948802973, iteration: 78456
loss: 1.0077781677246094,grad_norm: 0.9999990859343559, iteration: 78457
loss: 1.0115729570388794,grad_norm: 0.9999992212422503, iteration: 78458
loss: 1.039912462234497,grad_norm: 0.9999993095940395, iteration: 78459
loss: 1.0140472650527954,grad_norm: 0.9673148732661553, iteration: 78460
loss: 0.9806489944458008,grad_norm: 0.9999991916627602, iteration: 78461
loss: 1.022554636001587,grad_norm: 0.9999991979304113, iteration: 78462
loss: 0.9820074439048767,grad_norm: 0.9999991182028476, iteration: 78463
loss: 1.0437273979187012,grad_norm: 0.8265885714671913, iteration: 78464
loss: 1.0319911241531372,grad_norm: 0.9999992209124489, iteration: 78465
loss: 0.997218906879425,grad_norm: 0.9999992705384985, iteration: 78466
loss: 1.0220743417739868,grad_norm: 0.9913437001961334, iteration: 78467
loss: 1.03523588180542,grad_norm: 0.9245656639456812, iteration: 78468
loss: 0.9667500853538513,grad_norm: 0.8559806177677061, iteration: 78469
loss: 1.013646125793457,grad_norm: 0.9999991696733536, iteration: 78470
loss: 1.038361668586731,grad_norm: 0.8774743722473801, iteration: 78471
loss: 1.060340166091919,grad_norm: 0.9929432041543153, iteration: 78472
loss: 0.9915786981582642,grad_norm: 0.9999990022814045, iteration: 78473
loss: 1.4197847843170166,grad_norm: 0.999999887793959, iteration: 78474
loss: 0.9944025278091431,grad_norm: 0.827841762222518, iteration: 78475
loss: 0.9859497547149658,grad_norm: 0.9279505448829857, iteration: 78476
loss: 0.9955424666404724,grad_norm: 0.999999091708815, iteration: 78477
loss: 1.001498818397522,grad_norm: 0.9999991751033809, iteration: 78478
loss: 1.0594874620437622,grad_norm: 0.9999991135329671, iteration: 78479
loss: 1.0329939126968384,grad_norm: 0.9999990844251823, iteration: 78480
loss: 0.9925733208656311,grad_norm: 0.9999994734076642, iteration: 78481
loss: 1.0184839963912964,grad_norm: 0.9369344063393429, iteration: 78482
loss: 0.9819086790084839,grad_norm: 0.9750881326295633, iteration: 78483
loss: 1.0490291118621826,grad_norm: 0.9103338296614314, iteration: 78484
loss: 1.0093986988067627,grad_norm: 0.9999990463768823, iteration: 78485
loss: 1.0761057138442993,grad_norm: 0.9999991280092767, iteration: 78486
loss: 1.0266259908676147,grad_norm: 0.9999992660852758, iteration: 78487
loss: 0.9910022616386414,grad_norm: 0.9657572431405952, iteration: 78488
loss: 1.0312210321426392,grad_norm: 0.9999994496429814, iteration: 78489
loss: 1.008791208267212,grad_norm: 0.8913703556951604, iteration: 78490
loss: 0.99832683801651,grad_norm: 0.8596954734598491, iteration: 78491
loss: 1.0119597911834717,grad_norm: 0.986580145627902, iteration: 78492
loss: 1.0477075576782227,grad_norm: 0.9999991586519977, iteration: 78493
loss: 0.9737484455108643,grad_norm: 0.9030720321897359, iteration: 78494
loss: 0.9738417267799377,grad_norm: 0.9832063379009588, iteration: 78495
loss: 1.0096575021743774,grad_norm: 0.9999989556323353, iteration: 78496
loss: 1.0207674503326416,grad_norm: 0.9017015636205703, iteration: 78497
loss: 1.0958459377288818,grad_norm: 0.7848589797312686, iteration: 78498
loss: 1.0400500297546387,grad_norm: 0.9999992698852217, iteration: 78499
loss: 1.0132997035980225,grad_norm: 0.9176248225003191, iteration: 78500
loss: 1.0524040460586548,grad_norm: 0.9999990744909355, iteration: 78501
loss: 1.0449273586273193,grad_norm: 0.9017512804863771, iteration: 78502
loss: 1.0085421800613403,grad_norm: 0.9999994940887305, iteration: 78503
loss: 1.1975643634796143,grad_norm: 0.9999995223280451, iteration: 78504
loss: 0.9814712405204773,grad_norm: 0.9999991902196168, iteration: 78505
loss: 1.1023008823394775,grad_norm: 0.9999994442329492, iteration: 78506
loss: 1.1545664072036743,grad_norm: 1.0000000019363033, iteration: 78507
loss: 1.008629560470581,grad_norm: 0.8432437956904403, iteration: 78508
loss: 1.0126793384552002,grad_norm: 0.9700355817340507, iteration: 78509
loss: 1.0263431072235107,grad_norm: 0.9999992113627659, iteration: 78510
loss: 1.0212353467941284,grad_norm: 0.9999992401876179, iteration: 78511
loss: 0.9766796827316284,grad_norm: 0.7348379628715203, iteration: 78512
loss: 1.0620216131210327,grad_norm: 0.9999997870237906, iteration: 78513
loss: 0.9928979277610779,grad_norm: 0.7594086515987075, iteration: 78514
loss: 0.9868133068084717,grad_norm: 0.9999991123179829, iteration: 78515
loss: 1.0669453144073486,grad_norm: 0.9999992586297527, iteration: 78516
loss: 1.0349191427230835,grad_norm: 0.8391527132854121, iteration: 78517
loss: 1.0355033874511719,grad_norm: 0.9999998588360348, iteration: 78518
loss: 0.99240642786026,grad_norm: 0.999999430102122, iteration: 78519
loss: 0.9861788153648376,grad_norm: 0.9999991408717113, iteration: 78520
loss: 0.985095739364624,grad_norm: 0.8408708705124035, iteration: 78521
loss: 1.0321533679962158,grad_norm: 0.9999990993695872, iteration: 78522
loss: 1.0222018957138062,grad_norm: 0.9726932412031176, iteration: 78523
loss: 0.9613290429115295,grad_norm: 0.82981491182814, iteration: 78524
loss: 0.9433352947235107,grad_norm: 0.9396468425656753, iteration: 78525
loss: 0.98509281873703,grad_norm: 0.8862311795027282, iteration: 78526
loss: 1.0490401983261108,grad_norm: 0.9999996214493111, iteration: 78527
loss: 1.031415581703186,grad_norm: 0.9999995918297653, iteration: 78528
loss: 0.954478919506073,grad_norm: 0.8651745893773228, iteration: 78529
loss: 1.0612462759017944,grad_norm: 0.9999990509941117, iteration: 78530
loss: 0.9944073557853699,grad_norm: 0.8094146283774866, iteration: 78531
loss: 0.9836220741271973,grad_norm: 0.8629379769017135, iteration: 78532
loss: 1.0205180644989014,grad_norm: 0.9307652865658643, iteration: 78533
loss: 1.0490047931671143,grad_norm: 0.9999993260204828, iteration: 78534
loss: 1.0987811088562012,grad_norm: 0.9999999098026102, iteration: 78535
loss: 1.014441728591919,grad_norm: 0.9999990531015583, iteration: 78536
loss: 1.0270761251449585,grad_norm: 0.9999991357552368, iteration: 78537
loss: 1.0263642072677612,grad_norm: 0.8686848057135268, iteration: 78538
loss: 1.0229723453521729,grad_norm: 0.9518868335605125, iteration: 78539
loss: 1.0263004302978516,grad_norm: 0.8315355082785943, iteration: 78540
loss: 0.988834798336029,grad_norm: 0.999999795300802, iteration: 78541
loss: 1.0149883031845093,grad_norm: 0.9999991221821958, iteration: 78542
loss: 1.0503343343734741,grad_norm: 0.9999993506721929, iteration: 78543
loss: 1.051459789276123,grad_norm: 0.99999913145033, iteration: 78544
loss: 1.0206539630889893,grad_norm: 0.9313929502328406, iteration: 78545
loss: 1.044372320175171,grad_norm: 0.9016990407402647, iteration: 78546
loss: 0.9843882918357849,grad_norm: 0.9258016107631841, iteration: 78547
loss: 1.019828200340271,grad_norm: 0.9999991651641637, iteration: 78548
loss: 1.0204588174819946,grad_norm: 0.9999991076562285, iteration: 78549
loss: 1.0515941381454468,grad_norm: 0.9320733295785006, iteration: 78550
loss: 1.0391991138458252,grad_norm: 0.9999990623099855, iteration: 78551
loss: 1.0400855541229248,grad_norm: 0.999999470005833, iteration: 78552
loss: 1.0139061212539673,grad_norm: 0.9999991169693898, iteration: 78553
loss: 1.106099009513855,grad_norm: 0.9999999042704101, iteration: 78554
loss: 1.0799168348312378,grad_norm: 0.9999995548395274, iteration: 78555
loss: 0.9893060922622681,grad_norm: 0.9999991259813704, iteration: 78556
loss: 1.038007378578186,grad_norm: 0.9999993287398784, iteration: 78557
loss: 0.9705517292022705,grad_norm: 0.8636864014364073, iteration: 78558
loss: 0.9937291741371155,grad_norm: 0.9999994456338355, iteration: 78559
loss: 0.9877595901489258,grad_norm: 0.9851982882977267, iteration: 78560
loss: 1.002145528793335,grad_norm: 0.9743600918011363, iteration: 78561
loss: 0.9984102249145508,grad_norm: 0.9956050991940212, iteration: 78562
loss: 1.0186498165130615,grad_norm: 0.9999991834668609, iteration: 78563
loss: 1.015824317932129,grad_norm: 0.9999999288291788, iteration: 78564
loss: 1.0351064205169678,grad_norm: 0.8655985911561319, iteration: 78565
loss: 1.002135157585144,grad_norm: 0.8412326863101117, iteration: 78566
loss: 1.0193336009979248,grad_norm: 0.999999115731014, iteration: 78567
loss: 1.0461524724960327,grad_norm: 0.9999994248959827, iteration: 78568
loss: 1.02311110496521,grad_norm: 0.9258901403987729, iteration: 78569
loss: 1.0259321928024292,grad_norm: 0.9999993438576845, iteration: 78570
loss: 0.9985663294792175,grad_norm: 0.9999990954837769, iteration: 78571
loss: 1.0340031385421753,grad_norm: 0.9999990355685656, iteration: 78572
loss: 0.9979498386383057,grad_norm: 0.8947182916953962, iteration: 78573
loss: 1.000752329826355,grad_norm: 0.9329983108318429, iteration: 78574
loss: 0.9981730580329895,grad_norm: 0.9232906992947908, iteration: 78575
loss: 0.9667505621910095,grad_norm: 0.8071798338092857, iteration: 78576
loss: 0.9970003366470337,grad_norm: 0.9999991235099318, iteration: 78577
loss: 1.006869912147522,grad_norm: 0.917999447837976, iteration: 78578
loss: 1.0066540241241455,grad_norm: 0.982167650806295, iteration: 78579
loss: 0.9848732352256775,grad_norm: 0.9999997079781838, iteration: 78580
loss: 1.127920150756836,grad_norm: 0.9999994372110993, iteration: 78581
loss: 1.0569151639938354,grad_norm: 0.9999997126897417, iteration: 78582
loss: 1.023634910583496,grad_norm: 0.9999990454663602, iteration: 78583
loss: 1.1030118465423584,grad_norm: 0.9754366787418083, iteration: 78584
loss: 1.0194464921951294,grad_norm: 0.9305430511131685, iteration: 78585
loss: 1.0319052934646606,grad_norm: 0.9699680896015742, iteration: 78586
loss: 1.0183757543563843,grad_norm: 0.9999990497999864, iteration: 78587
loss: 1.0341562032699585,grad_norm: 0.9999995492366244, iteration: 78588
loss: 0.9504410028457642,grad_norm: 0.8800502806782212, iteration: 78589
loss: 0.975208044052124,grad_norm: 0.9399724932122099, iteration: 78590
loss: 1.0500508546829224,grad_norm: 0.9662097589893675, iteration: 78591
loss: 1.023880124092102,grad_norm: 0.8576775445468471, iteration: 78592
loss: 0.9958209991455078,grad_norm: 0.9999991931607256, iteration: 78593
loss: 0.9894343614578247,grad_norm: 0.8628820093958216, iteration: 78594
loss: 0.9979707598686218,grad_norm: 0.7317578027613776, iteration: 78595
loss: 1.0054799318313599,grad_norm: 0.80544651639801, iteration: 78596
loss: 0.9780341982841492,grad_norm: 0.8795633054483022, iteration: 78597
loss: 0.9749143719673157,grad_norm: 0.9999989245164511, iteration: 78598
loss: 1.001513123512268,grad_norm: 0.9999990051308726, iteration: 78599
loss: 1.011812448501587,grad_norm: 0.9999991410302016, iteration: 78600
loss: 1.0852984189987183,grad_norm: 0.9999992879548348, iteration: 78601
loss: 0.9978794455528259,grad_norm: 0.9562357684359166, iteration: 78602
loss: 0.980951726436615,grad_norm: 0.9999991061548058, iteration: 78603
loss: 1.0987499952316284,grad_norm: 0.9999998984129469, iteration: 78604
loss: 1.0016807317733765,grad_norm: 0.9999993367448728, iteration: 78605
loss: 1.0179518461227417,grad_norm: 0.9771834347614651, iteration: 78606
loss: 1.004709243774414,grad_norm: 0.9999992497668075, iteration: 78607
loss: 0.9961503744125366,grad_norm: 0.8472938997612376, iteration: 78608
loss: 1.020654559135437,grad_norm: 0.9999991245236103, iteration: 78609
loss: 0.9946717023849487,grad_norm: 0.8309842942359202, iteration: 78610
loss: 1.0196715593338013,grad_norm: 0.9999993525240047, iteration: 78611
loss: 1.0155316591262817,grad_norm: 0.9999992848810813, iteration: 78612
loss: 0.9747861623764038,grad_norm: 0.9999990013092874, iteration: 78613
loss: 0.9895622134208679,grad_norm: 0.8704883965354197, iteration: 78614
loss: 1.021611213684082,grad_norm: 0.9999990950043094, iteration: 78615
loss: 0.9884490966796875,grad_norm: 0.9999991135992153, iteration: 78616
loss: 0.9839296936988831,grad_norm: 0.9770152259862681, iteration: 78617
loss: 1.0281310081481934,grad_norm: 0.8879257501453923, iteration: 78618
loss: 0.9606186151504517,grad_norm: 0.9701245526799112, iteration: 78619
loss: 0.9768667817115784,grad_norm: 0.9905319922944382, iteration: 78620
loss: 1.018181562423706,grad_norm: 0.8911699634307837, iteration: 78621
loss: 1.0041052103042603,grad_norm: 0.9999993352099743, iteration: 78622
loss: 1.0533056259155273,grad_norm: 0.9999991132542909, iteration: 78623
loss: 1.043242335319519,grad_norm: 0.9999998753165105, iteration: 78624
loss: 0.9828046560287476,grad_norm: 0.9999993443288515, iteration: 78625
loss: 1.0134108066558838,grad_norm: 0.9605885157522505, iteration: 78626
loss: 0.9995434880256653,grad_norm: 0.7938291954611947, iteration: 78627
loss: 0.9687238335609436,grad_norm: 0.9758946908011493, iteration: 78628
loss: 1.0079444646835327,grad_norm: 0.9353379158311779, iteration: 78629
loss: 1.0081185102462769,grad_norm: 0.9999990790790174, iteration: 78630
loss: 0.9835702776908875,grad_norm: 0.942076246185982, iteration: 78631
loss: 0.9881716370582581,grad_norm: 0.8945876632397146, iteration: 78632
loss: 0.9752631783485413,grad_norm: 0.8176399275538943, iteration: 78633
loss: 0.9922009706497192,grad_norm: 0.9768759691090277, iteration: 78634
loss: 1.0670275688171387,grad_norm: 0.9999999261134919, iteration: 78635
loss: 1.0026131868362427,grad_norm: 0.9999993951161605, iteration: 78636
loss: 0.983022928237915,grad_norm: 0.9999994101699398, iteration: 78637
loss: 1.0120420455932617,grad_norm: 0.9406316867129648, iteration: 78638
loss: 0.9972757697105408,grad_norm: 0.9347450358258883, iteration: 78639
loss: 1.0336788892745972,grad_norm: 0.9999990705129262, iteration: 78640
loss: 1.0521280765533447,grad_norm: 0.9999996636859426, iteration: 78641
loss: 1.036495327949524,grad_norm: 0.9999998429232552, iteration: 78642
loss: 1.021330714225769,grad_norm: 0.9999990285393818, iteration: 78643
loss: 1.1029261350631714,grad_norm: 0.9999991272596717, iteration: 78644
loss: 1.015462040901184,grad_norm: 0.8242056282873709, iteration: 78645
loss: 1.0076791048049927,grad_norm: 0.9999991465136403, iteration: 78646
loss: 0.976824164390564,grad_norm: 0.9999990682660482, iteration: 78647
loss: 0.9836611747741699,grad_norm: 0.9999992246289869, iteration: 78648
loss: 1.0303188562393188,grad_norm: 0.9001181500505675, iteration: 78649
loss: 1.0199217796325684,grad_norm: 0.9999989465429718, iteration: 78650
loss: 0.9862961173057556,grad_norm: 0.897348077247721, iteration: 78651
loss: 1.0176998376846313,grad_norm: 0.934408333253779, iteration: 78652
loss: 0.9848794341087341,grad_norm: 0.8646354967978239, iteration: 78653
loss: 0.9758577346801758,grad_norm: 0.9999990688401021, iteration: 78654
loss: 0.9970952272415161,grad_norm: 0.8610109610935766, iteration: 78655
loss: 0.9925412535667419,grad_norm: 0.9298122143083138, iteration: 78656
loss: 0.9740865230560303,grad_norm: 0.9999998862538498, iteration: 78657
loss: 0.964036226272583,grad_norm: 0.9999994125169459, iteration: 78658
loss: 1.0158324241638184,grad_norm: 0.7905025590238124, iteration: 78659
loss: 0.9850131869316101,grad_norm: 0.8559012282161625, iteration: 78660
loss: 0.9836224913597107,grad_norm: 0.8819623885555435, iteration: 78661
loss: 0.9787392020225525,grad_norm: 0.7894425019127108, iteration: 78662
loss: 0.9839524030685425,grad_norm: 0.8912507137077078, iteration: 78663
loss: 1.102187156677246,grad_norm: 0.9999992112934394, iteration: 78664
loss: 1.0461430549621582,grad_norm: 0.9999990462747279, iteration: 78665
loss: 1.0199929475784302,grad_norm: 0.9999991211971807, iteration: 78666
loss: 0.9711088538169861,grad_norm: 0.9999990542251644, iteration: 78667
loss: 1.0060170888900757,grad_norm: 0.8354458180083874, iteration: 78668
loss: 0.9811065793037415,grad_norm: 0.9999992272476731, iteration: 78669
loss: 0.9679978489875793,grad_norm: 0.929511509108995, iteration: 78670
loss: 1.0144665241241455,grad_norm: 0.9516607302026989, iteration: 78671
loss: 0.9946631789207458,grad_norm: 0.9652142484689259, iteration: 78672
loss: 1.062048316001892,grad_norm: 0.9999998706524499, iteration: 78673
loss: 1.0012844800949097,grad_norm: 0.9412518491403461, iteration: 78674
loss: 1.0110137462615967,grad_norm: 0.8206910266823778, iteration: 78675
loss: 1.0170029401779175,grad_norm: 0.8554771127584198, iteration: 78676
loss: 0.9942950010299683,grad_norm: 0.9109935348115057, iteration: 78677
loss: 1.0088086128234863,grad_norm: 0.84745375987215, iteration: 78678
loss: 0.9966411590576172,grad_norm: 0.8285480608746022, iteration: 78679
loss: 1.0612049102783203,grad_norm: 0.8976014908251309, iteration: 78680
loss: 0.9907140135765076,grad_norm: 0.8453490535587389, iteration: 78681
loss: 1.0202133655548096,grad_norm: 0.9999999402882225, iteration: 78682
loss: 1.0253634452819824,grad_norm: 0.9990852755976509, iteration: 78683
loss: 1.0497281551361084,grad_norm: 0.9999998792345384, iteration: 78684
loss: 0.9632976651191711,grad_norm: 0.999999123931124, iteration: 78685
loss: 1.033090353012085,grad_norm: 0.9999989866408622, iteration: 78686
loss: 1.0457649230957031,grad_norm: 0.999999804607031, iteration: 78687
loss: 1.0346773862838745,grad_norm: 0.9999989904191396, iteration: 78688
loss: 0.9598482847213745,grad_norm: 0.9522439201829251, iteration: 78689
loss: 1.0009230375289917,grad_norm: 0.9999991120712834, iteration: 78690
loss: 1.0141505002975464,grad_norm: 0.9999991408603615, iteration: 78691
loss: 0.9767521619796753,grad_norm: 0.978623730860642, iteration: 78692
loss: 1.0166621208190918,grad_norm: 0.819355724901452, iteration: 78693
loss: 0.9940859079360962,grad_norm: 0.9507458584185897, iteration: 78694
loss: 1.0541776418685913,grad_norm: 0.9999997391853884, iteration: 78695
loss: 1.0215041637420654,grad_norm: 0.9397456754382197, iteration: 78696
loss: 1.0180398225784302,grad_norm: 0.9999991926966771, iteration: 78697
loss: 1.0026922225952148,grad_norm: 0.999999040519245, iteration: 78698
loss: 1.1063485145568848,grad_norm: 0.9999997836963116, iteration: 78699
loss: 1.0522607564926147,grad_norm: 0.9999993044240894, iteration: 78700
loss: 0.9954187870025635,grad_norm: 0.8661233128082075, iteration: 78701
loss: 0.9872273802757263,grad_norm: 0.8858376311864442, iteration: 78702
loss: 1.0174806118011475,grad_norm: 0.9999992776359161, iteration: 78703
loss: 0.9966580271720886,grad_norm: 0.831000752045062, iteration: 78704
loss: 0.990027904510498,grad_norm: 0.9999991731319747, iteration: 78705
loss: 0.9488787055015564,grad_norm: 0.8943446891575192, iteration: 78706
loss: 1.0050126314163208,grad_norm: 0.889809679294543, iteration: 78707
loss: 0.9587892889976501,grad_norm: 0.9999992996130509, iteration: 78708
loss: 1.000821828842163,grad_norm: 0.9999990756447018, iteration: 78709
loss: 0.9939819574356079,grad_norm: 0.9400815697162234, iteration: 78710
loss: 1.0071910619735718,grad_norm: 0.9999994466094712, iteration: 78711
loss: 1.0307934284210205,grad_norm: 0.9503433512382617, iteration: 78712
loss: 1.0023078918457031,grad_norm: 0.9999993561994645, iteration: 78713
loss: 1.0245344638824463,grad_norm: 0.999999246172062, iteration: 78714
loss: 0.9658218622207642,grad_norm: 0.9119716139727635, iteration: 78715
loss: 1.037199854850769,grad_norm: 0.9999996605374201, iteration: 78716
loss: 1.004926085472107,grad_norm: 0.9138304654919891, iteration: 78717
loss: 1.0609384775161743,grad_norm: 0.9981969772078524, iteration: 78718
loss: 1.0088647603988647,grad_norm: 0.7793373830959006, iteration: 78719
loss: 1.0085359811782837,grad_norm: 0.9999994189273316, iteration: 78720
loss: 1.0277208089828491,grad_norm: 0.9219408518615114, iteration: 78721
loss: 0.9869470596313477,grad_norm: 0.9574062070300641, iteration: 78722
loss: 0.9959303140640259,grad_norm: 0.9999992403234906, iteration: 78723
loss: 0.9590076804161072,grad_norm: 0.9999991299448854, iteration: 78724
loss: 1.0009137392044067,grad_norm: 0.9999992720456523, iteration: 78725
loss: 1.0108766555786133,grad_norm: 0.9999992261146463, iteration: 78726
loss: 1.0447312593460083,grad_norm: 0.9999991113547073, iteration: 78727
loss: 1.0017105340957642,grad_norm: 0.9529007229532392, iteration: 78728
loss: 1.1517963409423828,grad_norm: 0.9999994347268983, iteration: 78729
loss: 0.9666857719421387,grad_norm: 0.7875849333421978, iteration: 78730
loss: 0.9677777886390686,grad_norm: 0.9999991174277221, iteration: 78731
loss: 0.9637567400932312,grad_norm: 0.8711268316662237, iteration: 78732
loss: 1.033970594406128,grad_norm: 0.9828201440660682, iteration: 78733
loss: 1.0444990396499634,grad_norm: 0.99611313756477, iteration: 78734
loss: 1.019997000694275,grad_norm: 0.9408092717452995, iteration: 78735
loss: 1.0008214712142944,grad_norm: 0.7323356888653989, iteration: 78736
loss: 0.9932585954666138,grad_norm: 0.9978512434157978, iteration: 78737
loss: 0.9964649081230164,grad_norm: 0.9999991174525146, iteration: 78738
loss: 1.0177319049835205,grad_norm: 0.9451206916275863, iteration: 78739
loss: 0.9860922694206238,grad_norm: 0.7762601570060967, iteration: 78740
loss: 1.0278961658477783,grad_norm: 0.9999990485205338, iteration: 78741
loss: 0.9472150802612305,grad_norm: 0.840741607064199, iteration: 78742
loss: 0.9988732933998108,grad_norm: 0.9073866221801256, iteration: 78743
loss: 1.031955599784851,grad_norm: 0.999999196900103, iteration: 78744
loss: 0.9706911444664001,grad_norm: 0.999999172957425, iteration: 78745
loss: 0.9983184337615967,grad_norm: 0.9737246892244864, iteration: 78746
loss: 1.0153234004974365,grad_norm: 0.887597514853862, iteration: 78747
loss: 1.045133352279663,grad_norm: 0.999999311557238, iteration: 78748
loss: 0.994839608669281,grad_norm: 0.7509613092984313, iteration: 78749
loss: 1.0208203792572021,grad_norm: 0.8935358255314864, iteration: 78750
loss: 0.9924041032791138,grad_norm: 0.9792397695249102, iteration: 78751
loss: 1.0294147729873657,grad_norm: 0.947167074557479, iteration: 78752
loss: 0.9712444543838501,grad_norm: 0.8276356366005155, iteration: 78753
loss: 1.0006965398788452,grad_norm: 0.9999991181976626, iteration: 78754
loss: 0.9996697902679443,grad_norm: 0.9999991661145866, iteration: 78755
loss: 0.9921610951423645,grad_norm: 0.9999991512381461, iteration: 78756
loss: 1.0506677627563477,grad_norm: 0.9999992233241911, iteration: 78757
loss: 1.0102226734161377,grad_norm: 0.9999992208402623, iteration: 78758
loss: 1.015832543373108,grad_norm: 0.7711830342141929, iteration: 78759
loss: 1.0019499063491821,grad_norm: 0.8282577082221498, iteration: 78760
loss: 0.9509997963905334,grad_norm: 0.9999991076519412, iteration: 78761
loss: 1.0257664918899536,grad_norm: 0.9999989395045827, iteration: 78762
loss: 1.011800765991211,grad_norm: 0.8520840984721346, iteration: 78763
loss: 1.0297473669052124,grad_norm: 0.9999999532948082, iteration: 78764
loss: 1.0480289459228516,grad_norm: 0.9478882890583402, iteration: 78765
loss: 1.017432451248169,grad_norm: 0.9543738316851613, iteration: 78766
loss: 0.994233250617981,grad_norm: 0.999999010026475, iteration: 78767
loss: 1.0556097030639648,grad_norm: 0.9999991387818108, iteration: 78768
loss: 0.9945685863494873,grad_norm: 0.8904949912822127, iteration: 78769
loss: 1.0019093751907349,grad_norm: 0.9999991118808274, iteration: 78770
loss: 0.9922186732292175,grad_norm: 0.9999989473471252, iteration: 78771
loss: 1.0375738143920898,grad_norm: 0.928440495003028, iteration: 78772
loss: 1.0232545137405396,grad_norm: 0.8386069673244712, iteration: 78773
loss: 1.0225486755371094,grad_norm: 0.9102806151357575, iteration: 78774
loss: 0.9840576648712158,grad_norm: 0.9999988618777038, iteration: 78775
loss: 0.9923990964889526,grad_norm: 0.9999991581264085, iteration: 78776
loss: 1.0122205018997192,grad_norm: 0.9999990518699678, iteration: 78777
loss: 1.0155304670333862,grad_norm: 0.8136178072832204, iteration: 78778
loss: 0.9771865010261536,grad_norm: 0.9999990388627263, iteration: 78779
loss: 1.008428931236267,grad_norm: 0.9999991501227786, iteration: 78780
loss: 0.9827769994735718,grad_norm: 0.8014396535335285, iteration: 78781
loss: 0.9963971376419067,grad_norm: 0.9384958988947726, iteration: 78782
loss: 0.9720273017883301,grad_norm: 0.9080970137630848, iteration: 78783
loss: 1.0154534578323364,grad_norm: 0.9870816412420131, iteration: 78784
loss: 1.010360598564148,grad_norm: 0.9999991909430247, iteration: 78785
loss: 1.027308464050293,grad_norm: 0.9999989296546012, iteration: 78786
loss: 1.0263899564743042,grad_norm: 0.8367762535176058, iteration: 78787
loss: 0.9705592393875122,grad_norm: 0.9590592562215868, iteration: 78788
loss: 1.0344973802566528,grad_norm: 0.8907507632709041, iteration: 78789
loss: 1.0006234645843506,grad_norm: 0.9900838725582405, iteration: 78790
loss: 0.9870077967643738,grad_norm: 0.9999991719245501, iteration: 78791
loss: 0.9467890858650208,grad_norm: 0.8610505902506437, iteration: 78792
loss: 0.9977794885635376,grad_norm: 0.9825791764790227, iteration: 78793
loss: 1.0235567092895508,grad_norm: 0.8857152548392913, iteration: 78794
loss: 0.9952238202095032,grad_norm: 0.8039719457645071, iteration: 78795
loss: 1.015153408050537,grad_norm: 0.910743547741699, iteration: 78796
loss: 1.0239620208740234,grad_norm: 0.9999989927005347, iteration: 78797
loss: 0.9911054968833923,grad_norm: 0.7979536055325779, iteration: 78798
loss: 1.0024317502975464,grad_norm: 0.7571592090427299, iteration: 78799
loss: 0.9832480549812317,grad_norm: 0.9999990972793728, iteration: 78800
loss: 0.9997230768203735,grad_norm: 0.8931927408587277, iteration: 78801
loss: 1.022619605064392,grad_norm: 0.8355209559479295, iteration: 78802
loss: 1.0070029497146606,grad_norm: 0.9290994729186389, iteration: 78803
loss: 1.0033936500549316,grad_norm: 0.9999991024505002, iteration: 78804
loss: 1.0377938747406006,grad_norm: 0.9169335174622933, iteration: 78805
loss: 0.9990087151527405,grad_norm: 0.9999990829803045, iteration: 78806
loss: 0.9749996662139893,grad_norm: 0.9795078333964307, iteration: 78807
loss: 0.9830743074417114,grad_norm: 0.9999998335711224, iteration: 78808
loss: 0.9621833562850952,grad_norm: 0.9315888012374666, iteration: 78809
loss: 1.030503749847412,grad_norm: 0.8547575847210543, iteration: 78810
loss: 0.9969953894615173,grad_norm: 0.8548746623452786, iteration: 78811
loss: 1.016829013824463,grad_norm: 0.9999990801629043, iteration: 78812
loss: 0.9759261608123779,grad_norm: 0.8605033840430418, iteration: 78813
loss: 0.982909083366394,grad_norm: 0.9999992451589338, iteration: 78814
loss: 0.9976931810379028,grad_norm: 0.7161573800067826, iteration: 78815
loss: 0.9906612634658813,grad_norm: 0.9999994125039663, iteration: 78816
loss: 1.037922739982605,grad_norm: 0.9999990070781375, iteration: 78817
loss: 1.0540475845336914,grad_norm: 0.99999898551466, iteration: 78818
loss: 0.9857406616210938,grad_norm: 0.9999991011428916, iteration: 78819
loss: 1.0849188566207886,grad_norm: 0.9999991777022742, iteration: 78820
loss: 1.0324503183364868,grad_norm: 0.9131369432946747, iteration: 78821
loss: 1.171871542930603,grad_norm: 0.999999363125002, iteration: 78822
loss: 0.9577177166938782,grad_norm: 0.8628776244633827, iteration: 78823
loss: 0.9899160265922546,grad_norm: 0.8460739609441277, iteration: 78824
loss: 1.0210375785827637,grad_norm: 0.9603032098658701, iteration: 78825
loss: 1.0038602352142334,grad_norm: 0.9976054166602539, iteration: 78826
loss: 0.9970282912254333,grad_norm: 0.9999991878815657, iteration: 78827
loss: 1.0094836950302124,grad_norm: 0.901746195515589, iteration: 78828
loss: 0.9982567429542542,grad_norm: 0.9352600835647651, iteration: 78829
loss: 0.9774994850158691,grad_norm: 0.860936106732739, iteration: 78830
loss: 0.9838910102844238,grad_norm: 0.7680761068624556, iteration: 78831
loss: 1.006407380104065,grad_norm: 0.9999992538712925, iteration: 78832
loss: 0.979199230670929,grad_norm: 0.9999992104754343, iteration: 78833
loss: 0.9688278436660767,grad_norm: 0.8668984097804596, iteration: 78834
loss: 0.9963116645812988,grad_norm: 0.9999992030143788, iteration: 78835
loss: 1.0158240795135498,grad_norm: 0.7522034933452648, iteration: 78836
loss: 0.9519555568695068,grad_norm: 0.8234640182655437, iteration: 78837
loss: 0.9832677841186523,grad_norm: 0.9999990530821863, iteration: 78838
loss: 0.983877420425415,grad_norm: 0.8852528151537539, iteration: 78839
loss: 1.011522650718689,grad_norm: 0.9583902607571813, iteration: 78840
loss: 1.0121020078659058,grad_norm: 0.999999108387265, iteration: 78841
loss: 1.0140143632888794,grad_norm: 0.9999990595267024, iteration: 78842
loss: 0.9776874780654907,grad_norm: 0.9999991182849826, iteration: 78843
loss: 1.012848138809204,grad_norm: 0.9999991215148469, iteration: 78844
loss: 0.9361939430236816,grad_norm: 0.8288631310462093, iteration: 78845
loss: 1.01389741897583,grad_norm: 0.895546888851998, iteration: 78846
loss: 1.0300379991531372,grad_norm: 0.9999995327697939, iteration: 78847
loss: 0.9914185404777527,grad_norm: 0.9435042768780912, iteration: 78848
loss: 1.0112130641937256,grad_norm: 0.8096115909602648, iteration: 78849
loss: 0.9942077994346619,grad_norm: 0.9999991634591767, iteration: 78850
loss: 1.0041626691818237,grad_norm: 0.8627304377408915, iteration: 78851
loss: 1.0238428115844727,grad_norm: 0.8953436659594122, iteration: 78852
loss: 1.0141783952713013,grad_norm: 0.8794970545485733, iteration: 78853
loss: 1.0151594877243042,grad_norm: 0.8819837553609355, iteration: 78854
loss: 0.9930289387702942,grad_norm: 0.9999992608290621, iteration: 78855
loss: 0.948509931564331,grad_norm: 0.9152823214813017, iteration: 78856
loss: 0.969490647315979,grad_norm: 0.9224288541593105, iteration: 78857
loss: 1.0008254051208496,grad_norm: 0.8981972038925826, iteration: 78858
loss: 1.0128633975982666,grad_norm: 0.923413033625679, iteration: 78859
loss: 1.027605414390564,grad_norm: 0.9999991730324852, iteration: 78860
loss: 1.0218082666397095,grad_norm: 0.7750454879291923, iteration: 78861
loss: 0.9899611473083496,grad_norm: 0.8694804919600695, iteration: 78862
loss: 1.0227524042129517,grad_norm: 0.9245050266612526, iteration: 78863
loss: 1.0070302486419678,grad_norm: 0.9999993529528649, iteration: 78864
loss: 0.937921404838562,grad_norm: 0.9999990399860536, iteration: 78865
loss: 1.0232877731323242,grad_norm: 0.9523068870215254, iteration: 78866
loss: 0.9704252481460571,grad_norm: 0.9129095111499936, iteration: 78867
loss: 0.9829874038696289,grad_norm: 0.999999102799403, iteration: 78868
loss: 1.022791862487793,grad_norm: 0.9252912252873289, iteration: 78869
loss: 1.0492080450057983,grad_norm: 0.9662110911671851, iteration: 78870
loss: 1.00557541847229,grad_norm: 0.9999991402144192, iteration: 78871
loss: 0.9788104891777039,grad_norm: 0.7939401546078432, iteration: 78872
loss: 1.0185109376907349,grad_norm: 0.9999995082779196, iteration: 78873
loss: 0.984860360622406,grad_norm: 0.9999991427619983, iteration: 78874
loss: 1.1292129755020142,grad_norm: 0.9999995466556775, iteration: 78875
loss: 0.970217227935791,grad_norm: 0.8698994618714543, iteration: 78876
loss: 0.9480650424957275,grad_norm: 0.9815280678318148, iteration: 78877
loss: 0.9695103764533997,grad_norm: 0.9251476258230714, iteration: 78878
loss: 1.0353074073791504,grad_norm: 0.7788974308273665, iteration: 78879
loss: 1.0484589338302612,grad_norm: 0.9473702441318369, iteration: 78880
loss: 1.009278655052185,grad_norm: 0.9999991662836056, iteration: 78881
loss: 1.0139598846435547,grad_norm: 0.794804637462816, iteration: 78882
loss: 0.9813289642333984,grad_norm: 0.7885859046728588, iteration: 78883
loss: 0.9741430878639221,grad_norm: 0.9107460466924615, iteration: 78884
loss: 0.9857645034790039,grad_norm: 0.8320847866484198, iteration: 78885
loss: 0.9589946866035461,grad_norm: 0.9817602623741711, iteration: 78886
loss: 1.0005687475204468,grad_norm: 0.999999464935319, iteration: 78887
loss: 1.0339819192886353,grad_norm: 0.9149896305741319, iteration: 78888
loss: 1.0154497623443604,grad_norm: 0.999999199166441, iteration: 78889
loss: 0.9702165126800537,grad_norm: 0.749239804846639, iteration: 78890
loss: 1.0009831190109253,grad_norm: 0.8588623404546674, iteration: 78891
loss: 1.0024868249893188,grad_norm: 0.9999990754838578, iteration: 78892
loss: 0.9899576306343079,grad_norm: 0.9999992979321529, iteration: 78893
loss: 1.012333869934082,grad_norm: 0.9999993165145884, iteration: 78894
loss: 1.01689612865448,grad_norm: 0.9999992771925137, iteration: 78895
loss: 0.9742670655250549,grad_norm: 0.8731318957732774, iteration: 78896
loss: 1.0071629285812378,grad_norm: 0.8430803928797517, iteration: 78897
loss: 1.0913320779800415,grad_norm: 0.9999991610771328, iteration: 78898
loss: 1.002260684967041,grad_norm: 0.9334373559121141, iteration: 78899
loss: 1.011785864830017,grad_norm: 0.9999992062270636, iteration: 78900
loss: 1.0418717861175537,grad_norm: 0.8769280106041557, iteration: 78901
loss: 0.987581729888916,grad_norm: 0.9424027150928052, iteration: 78902
loss: 1.0465450286865234,grad_norm: 0.9999991809302464, iteration: 78903
loss: 1.0195268392562866,grad_norm: 0.9758313757532008, iteration: 78904
loss: 1.0201797485351562,grad_norm: 0.9253674710423979, iteration: 78905
loss: 0.985623836517334,grad_norm: 0.9069365344081155, iteration: 78906
loss: 1.0385502576828003,grad_norm: 0.9322372122748479, iteration: 78907
loss: 1.0218464136123657,grad_norm: 0.9999990771580619, iteration: 78908
loss: 1.0244324207305908,grad_norm: 0.8742913026496212, iteration: 78909
loss: 1.0230869054794312,grad_norm: 0.9999991451079524, iteration: 78910
loss: 1.0023765563964844,grad_norm: 0.9999991078113037, iteration: 78911
loss: 1.0193653106689453,grad_norm: 0.976920340016405, iteration: 78912
loss: 1.0122326612472534,grad_norm: 0.9716832704235885, iteration: 78913
loss: 1.0026389360427856,grad_norm: 0.8339081027853155, iteration: 78914
loss: 0.9792514443397522,grad_norm: 0.9602844751650003, iteration: 78915
loss: 0.9850103259086609,grad_norm: 0.9135740966396297, iteration: 78916
loss: 0.9552763104438782,grad_norm: 0.9882841332185889, iteration: 78917
loss: 0.9815837144851685,grad_norm: 0.9999990657618968, iteration: 78918
loss: 0.9973143935203552,grad_norm: 0.9999991429617934, iteration: 78919
loss: 0.9987863302230835,grad_norm: 0.9999992532446744, iteration: 78920
loss: 0.9664055705070496,grad_norm: 0.7974357677439882, iteration: 78921
loss: 1.0217899084091187,grad_norm: 0.8764163698022955, iteration: 78922
loss: 1.0084922313690186,grad_norm: 0.9999995217597994, iteration: 78923
loss: 0.9858086109161377,grad_norm: 0.9999989649322444, iteration: 78924
loss: 1.0053114891052246,grad_norm: 0.9353779346017562, iteration: 78925
loss: 0.9793375730514526,grad_norm: 0.8377967868859948, iteration: 78926
loss: 0.97776859998703,grad_norm: 0.916477266563287, iteration: 78927
loss: 1.002026081085205,grad_norm: 0.9999990168004445, iteration: 78928
loss: 0.9745277762413025,grad_norm: 0.9999992112819124, iteration: 78929
loss: 0.996976375579834,grad_norm: 0.9600788309272003, iteration: 78930
loss: 0.973336935043335,grad_norm: 0.8893580329438292, iteration: 78931
loss: 0.9778777956962585,grad_norm: 0.8968429964544579, iteration: 78932
loss: 1.0322707891464233,grad_norm: 0.9999998160343394, iteration: 78933
loss: 0.9994224905967712,grad_norm: 0.9999991690856154, iteration: 78934
loss: 1.0499364137649536,grad_norm: 0.9999990667990496, iteration: 78935
loss: 0.9576731324195862,grad_norm: 0.9999989043094445, iteration: 78936
loss: 1.0365675687789917,grad_norm: 0.9999992252329123, iteration: 78937
loss: 1.0324020385742188,grad_norm: 0.9999990604393285, iteration: 78938
loss: 1.033099889755249,grad_norm: 0.999999497310625, iteration: 78939
loss: 1.0020644664764404,grad_norm: 0.8707306658393001, iteration: 78940
loss: 0.9866708517074585,grad_norm: 0.929260988288316, iteration: 78941
loss: 1.0060477256774902,grad_norm: 0.9999990996535215, iteration: 78942
loss: 1.039206624031067,grad_norm: 0.9999991062496479, iteration: 78943
loss: 1.0077074766159058,grad_norm: 0.9999990619213291, iteration: 78944
loss: 1.0967482328414917,grad_norm: 0.9999994940895205, iteration: 78945
loss: 0.9851147532463074,grad_norm: 0.9999991474847437, iteration: 78946
loss: 0.9999950528144836,grad_norm: 0.8754972114931515, iteration: 78947
loss: 1.081332802772522,grad_norm: 0.9999991803457825, iteration: 78948
loss: 0.99640291929245,grad_norm: 0.8053064487547443, iteration: 78949
loss: 1.0640168190002441,grad_norm: 0.9999992162097009, iteration: 78950
loss: 0.9712088108062744,grad_norm: 0.8848003753133331, iteration: 78951
loss: 1.029861330986023,grad_norm: 0.914739096819236, iteration: 78952
loss: 1.0086332559585571,grad_norm: 0.9455031867663191, iteration: 78953
loss: 1.0020257234573364,grad_norm: 0.885446518346721, iteration: 78954
loss: 1.022584319114685,grad_norm: 0.8544087169803826, iteration: 78955
loss: 1.0209956169128418,grad_norm: 0.9999990563995785, iteration: 78956
loss: 1.0372204780578613,grad_norm: 0.8205721506074717, iteration: 78957
loss: 1.025524377822876,grad_norm: 0.9999989976917097, iteration: 78958
loss: 0.9793990254402161,grad_norm: 0.8686310798404858, iteration: 78959
loss: 0.9656173586845398,grad_norm: 0.9999989654999649, iteration: 78960
loss: 1.0540069341659546,grad_norm: 0.9999990521847129, iteration: 78961
loss: 1.0036168098449707,grad_norm: 0.9999991656302949, iteration: 78962
loss: 0.974510669708252,grad_norm: 0.9999991043989586, iteration: 78963
loss: 1.0296990871429443,grad_norm: 0.999999164670369, iteration: 78964
loss: 0.979362964630127,grad_norm: 0.8725657337837105, iteration: 78965
loss: 1.010406255722046,grad_norm: 0.9892711787607723, iteration: 78966
loss: 0.9938587546348572,grad_norm: 0.8658453593514942, iteration: 78967
loss: 0.9688399434089661,grad_norm: 0.9718109004763553, iteration: 78968
loss: 1.0974342823028564,grad_norm: 0.9999998143372268, iteration: 78969
loss: 1.0019164085388184,grad_norm: 0.9712974924309083, iteration: 78970
loss: 1.0011096000671387,grad_norm: 0.8686163376255208, iteration: 78971
loss: 1.0303839445114136,grad_norm: 0.8750933660345409, iteration: 78972
loss: 0.9894899725914001,grad_norm: 0.8466709206321069, iteration: 78973
loss: 1.0341477394104004,grad_norm: 0.8742317016873005, iteration: 78974
loss: 1.067006230354309,grad_norm: 0.9999993039098792, iteration: 78975
loss: 0.9931509494781494,grad_norm: 0.9055219802201319, iteration: 78976
loss: 1.0207931995391846,grad_norm: 0.9333578505668125, iteration: 78977
loss: 0.9757442474365234,grad_norm: 0.9999989725359122, iteration: 78978
loss: 1.0116158723831177,grad_norm: 0.952868833276214, iteration: 78979
loss: 1.0054829120635986,grad_norm: 0.9962879971816779, iteration: 78980
loss: 0.9998993277549744,grad_norm: 0.8980740505201579, iteration: 78981
loss: 1.0178899765014648,grad_norm: 0.8184779005981556, iteration: 78982
loss: 1.0346195697784424,grad_norm: 0.9999990736678747, iteration: 78983
loss: 0.9988982677459717,grad_norm: 0.9368154851637726, iteration: 78984
loss: 1.0045905113220215,grad_norm: 0.851542899058034, iteration: 78985
loss: 0.994366466999054,grad_norm: 0.8101046019672428, iteration: 78986
loss: 1.0003494024276733,grad_norm: 0.9999991716555633, iteration: 78987
loss: 0.9771630167961121,grad_norm: 0.865724408285706, iteration: 78988
loss: 0.9906632900238037,grad_norm: 0.9999991438779527, iteration: 78989
loss: 0.9747428297996521,grad_norm: 0.9999991647102081, iteration: 78990
loss: 1.0311707258224487,grad_norm: 0.9999989973212716, iteration: 78991
loss: 1.0519464015960693,grad_norm: 0.9113857639550415, iteration: 78992
loss: 1.008642315864563,grad_norm: 0.9999991307781632, iteration: 78993
loss: 0.969432532787323,grad_norm: 0.794733742372374, iteration: 78994
loss: 1.0026479959487915,grad_norm: 0.9090012742643071, iteration: 78995
loss: 1.0154166221618652,grad_norm: 0.8544933991086572, iteration: 78996
loss: 0.9640423059463501,grad_norm: 0.927708576231053, iteration: 78997
loss: 0.9831472039222717,grad_norm: 0.9999991767175603, iteration: 78998
loss: 0.9795287847518921,grad_norm: 0.7930499151447357, iteration: 78999
loss: 1.0348820686340332,grad_norm: 0.9005706790351858, iteration: 79000
loss: 1.0160017013549805,grad_norm: 0.926867309697257, iteration: 79001
loss: 0.9911043643951416,grad_norm: 0.8253623107112771, iteration: 79002
loss: 1.0034396648406982,grad_norm: 0.8168867032979406, iteration: 79003
loss: 1.000139594078064,grad_norm: 0.8972311259149577, iteration: 79004
loss: 0.9678800106048584,grad_norm: 0.8737183760923137, iteration: 79005
loss: 1.0147349834442139,grad_norm: 0.8950515898735354, iteration: 79006
loss: 1.0253533124923706,grad_norm: 0.9999990240471223, iteration: 79007
loss: 0.9908291101455688,grad_norm: 0.9446297227900033, iteration: 79008
loss: 0.9665369391441345,grad_norm: 0.9999990896671855, iteration: 79009
loss: 1.0106712579727173,grad_norm: 0.9021033197546711, iteration: 79010
loss: 0.9929121136665344,grad_norm: 0.9856926847156098, iteration: 79011
loss: 1.0391043424606323,grad_norm: 0.9447767644739871, iteration: 79012
loss: 1.0097205638885498,grad_norm: 0.9999994242700555, iteration: 79013
loss: 1.0407178401947021,grad_norm: 0.9426844593160172, iteration: 79014
loss: 0.975284993648529,grad_norm: 0.9184117422385772, iteration: 79015
loss: 1.0343592166900635,grad_norm: 0.9999993746651508, iteration: 79016
loss: 1.0156670808792114,grad_norm: 0.9457644453028511, iteration: 79017
loss: 1.051771879196167,grad_norm: 0.8885575388532867, iteration: 79018
loss: 0.9803585410118103,grad_norm: 0.9783130287227845, iteration: 79019
loss: 1.0188632011413574,grad_norm: 0.9999997420502513, iteration: 79020
loss: 1.009865403175354,grad_norm: 0.8267698097061855, iteration: 79021
loss: 0.9778889417648315,grad_norm: 0.9017066466894932, iteration: 79022
loss: 0.9969552159309387,grad_norm: 0.9643462252205603, iteration: 79023
loss: 1.0283418893814087,grad_norm: 0.9999991461092442, iteration: 79024
loss: 0.962104320526123,grad_norm: 0.9236360514492252, iteration: 79025
loss: 1.025300145149231,grad_norm: 0.8544676145889453, iteration: 79026
loss: 0.9869619607925415,grad_norm: 0.9999993472502987, iteration: 79027
loss: 0.9619576930999756,grad_norm: 0.7660933144924595, iteration: 79028
loss: 1.0365793704986572,grad_norm: 0.9999991966448433, iteration: 79029
loss: 0.9919841885566711,grad_norm: 0.9135167405928789, iteration: 79030
loss: 1.0392608642578125,grad_norm: 0.8936886796200555, iteration: 79031
loss: 0.9955588579177856,grad_norm: 0.999999062341633, iteration: 79032
loss: 1.0107218027114868,grad_norm: 0.9405744820072868, iteration: 79033
loss: 0.9938572645187378,grad_norm: 0.9999991648272961, iteration: 79034
loss: 1.0599949359893799,grad_norm: 0.9394410729492927, iteration: 79035
loss: 1.0318347215652466,grad_norm: 0.9999992929050144, iteration: 79036
loss: 0.9892004728317261,grad_norm: 0.9999991147172237, iteration: 79037
loss: 1.0245627164840698,grad_norm: 0.9999991647025257, iteration: 79038
loss: 1.0705283880233765,grad_norm: 0.9999998287916417, iteration: 79039
loss: 1.0136680603027344,grad_norm: 0.8711754150364388, iteration: 79040
loss: 0.9625514149665833,grad_norm: 0.9453716042665425, iteration: 79041
loss: 1.0665093660354614,grad_norm: 0.9999991310318186, iteration: 79042
loss: 1.0085800886154175,grad_norm: 0.9999991451752364, iteration: 79043
loss: 1.0017987489700317,grad_norm: 0.7909305507901757, iteration: 79044
loss: 1.0109167098999023,grad_norm: 0.9242681615543148, iteration: 79045
loss: 1.0290886163711548,grad_norm: 0.9287446054862465, iteration: 79046
loss: 0.9625365734100342,grad_norm: 0.8556572509822701, iteration: 79047
loss: 1.1176049709320068,grad_norm: 0.9999995479169114, iteration: 79048
loss: 1.0052449703216553,grad_norm: 0.9999998226207046, iteration: 79049
loss: 1.0887218713760376,grad_norm: 0.9999996518943527, iteration: 79050
loss: 1.0677763223648071,grad_norm: 0.9999999243891314, iteration: 79051
loss: 1.0075839757919312,grad_norm: 0.9588590531146128, iteration: 79052
loss: 1.0183768272399902,grad_norm: 0.9999990651550393, iteration: 79053
loss: 1.0264830589294434,grad_norm: 0.97089684615805, iteration: 79054
loss: 1.012235403060913,grad_norm: 0.7863728857047859, iteration: 79055
loss: 1.0133212804794312,grad_norm: 0.99999923205791, iteration: 79056
loss: 1.000788688659668,grad_norm: 0.7670599591277296, iteration: 79057
loss: 0.9909950494766235,grad_norm: 0.8533462255920239, iteration: 79058
loss: 1.0405484437942505,grad_norm: 0.9999991170873179, iteration: 79059
loss: 0.9929556846618652,grad_norm: 0.9999990414460712, iteration: 79060
loss: 1.0601297616958618,grad_norm: 0.9999991071220563, iteration: 79061
loss: 0.9837808609008789,grad_norm: 0.8284975724797861, iteration: 79062
loss: 1.072059154510498,grad_norm: 0.9999998700932521, iteration: 79063
loss: 1.0246726274490356,grad_norm: 0.999999118203514, iteration: 79064
loss: 1.0132521390914917,grad_norm: 0.9517918072148279, iteration: 79065
loss: 0.9991282224655151,grad_norm: 0.7932612154638913, iteration: 79066
loss: 1.012926697731018,grad_norm: 0.9938781027845046, iteration: 79067
loss: 0.9763241410255432,grad_norm: 0.7390927394123998, iteration: 79068
loss: 1.0020869970321655,grad_norm: 0.8483328164912611, iteration: 79069
loss: 1.0430200099945068,grad_norm: 0.9999992641444755, iteration: 79070
loss: 1.035919427871704,grad_norm: 0.9999990171651019, iteration: 79071
loss: 1.0240164995193481,grad_norm: 0.9562965184482016, iteration: 79072
loss: 1.0133368968963623,grad_norm: 0.8765874418283768, iteration: 79073
loss: 1.0032532215118408,grad_norm: 0.8902662377540675, iteration: 79074
loss: 1.0761148929595947,grad_norm: 0.9445736431413325, iteration: 79075
loss: 0.9906924366950989,grad_norm: 0.9999990257553082, iteration: 79076
loss: 0.9938563704490662,grad_norm: 0.9999991934879665, iteration: 79077
loss: 1.0302006006240845,grad_norm: 0.999999059311655, iteration: 79078
loss: 1.0024586915969849,grad_norm: 0.9999991729914093, iteration: 79079
loss: 1.0424367189407349,grad_norm: 0.9999999276852093, iteration: 79080
loss: 0.9694868326187134,grad_norm: 0.8832264141430548, iteration: 79081
loss: 1.005822777748108,grad_norm: 0.8409820984840766, iteration: 79082
loss: 0.9936662912368774,grad_norm: 0.9999989577084474, iteration: 79083
loss: 1.02556312084198,grad_norm: 0.9769854979795796, iteration: 79084
loss: 0.9841048717498779,grad_norm: 0.9101270212621047, iteration: 79085
loss: 1.005415439605713,grad_norm: 0.8635757520889903, iteration: 79086
loss: 1.0278390645980835,grad_norm: 0.999999163042504, iteration: 79087
loss: 0.9756826758384705,grad_norm: 0.9999992239288313, iteration: 79088
loss: 1.002886176109314,grad_norm: 0.822301015348652, iteration: 79089
loss: 1.0160516500473022,grad_norm: 0.8538258732972459, iteration: 79090
loss: 0.9913499355316162,grad_norm: 0.9999990235549048, iteration: 79091
loss: 1.0136950016021729,grad_norm: 0.9999992901406065, iteration: 79092
loss: 1.0007802248001099,grad_norm: 0.9178949311510916, iteration: 79093
loss: 0.982025682926178,grad_norm: 0.9999994913444534, iteration: 79094
loss: 1.0718557834625244,grad_norm: 0.9999992998404996, iteration: 79095
loss: 0.9804003238677979,grad_norm: 0.9999991098910401, iteration: 79096
loss: 1.0475846529006958,grad_norm: 0.9999991240946086, iteration: 79097
loss: 1.1428037881851196,grad_norm: 0.9999991544222189, iteration: 79098
loss: 1.0832239389419556,grad_norm: 0.9797821465141892, iteration: 79099
loss: 1.0178920030593872,grad_norm: 0.886551814910696, iteration: 79100
loss: 1.0300679206848145,grad_norm: 0.8716391413578422, iteration: 79101
loss: 1.015430212020874,grad_norm: 0.9999991885841784, iteration: 79102
loss: 0.9741376638412476,grad_norm: 0.9999990903035737, iteration: 79103
loss: 1.005484700202942,grad_norm: 0.935806465657152, iteration: 79104
loss: 0.9707745909690857,grad_norm: 0.9200413234141885, iteration: 79105
loss: 0.9837310910224915,grad_norm: 0.735472670514727, iteration: 79106
loss: 1.0580459833145142,grad_norm: 0.9999990636525855, iteration: 79107
loss: 1.0039759874343872,grad_norm: 0.8845432353067275, iteration: 79108
loss: 1.02080500125885,grad_norm: 0.8902872387675387, iteration: 79109
loss: 1.0425500869750977,grad_norm: 0.9999991326631686, iteration: 79110
loss: 0.9996337294578552,grad_norm: 0.8953319649084049, iteration: 79111
loss: 1.0024781227111816,grad_norm: 0.9021572822619366, iteration: 79112
loss: 1.0162423849105835,grad_norm: 0.9507301043533353, iteration: 79113
loss: 1.0367975234985352,grad_norm: 0.9999991818487286, iteration: 79114
loss: 1.0443931818008423,grad_norm: 0.9734925495597894, iteration: 79115
loss: 0.9990146160125732,grad_norm: 0.7919875252114662, iteration: 79116
loss: 0.9904266595840454,grad_norm: 0.9747049671833372, iteration: 79117
loss: 0.9914451837539673,grad_norm: 0.999999406082263, iteration: 79118
loss: 1.0044106245040894,grad_norm: 0.9999992298559937, iteration: 79119
loss: 0.9869920611381531,grad_norm: 0.8492980517118438, iteration: 79120
loss: 0.9895240068435669,grad_norm: 0.999999044245156, iteration: 79121
loss: 0.9856433868408203,grad_norm: 0.8324390766271718, iteration: 79122
loss: 1.0472240447998047,grad_norm: 0.9999998879756146, iteration: 79123
loss: 1.008156657218933,grad_norm: 0.9999995784208359, iteration: 79124
loss: 1.0414456129074097,grad_norm: 0.999999283602538, iteration: 79125
loss: 1.0115177631378174,grad_norm: 0.999999068693549, iteration: 79126
loss: 1.0054936408996582,grad_norm: 0.9999990360206197, iteration: 79127
loss: 1.0252125263214111,grad_norm: 0.8612426636974498, iteration: 79128
loss: 1.0043909549713135,grad_norm: 0.8744713982093681, iteration: 79129
loss: 1.01594078540802,grad_norm: 0.8863623158500159, iteration: 79130
loss: 1.024821400642395,grad_norm: 0.9357527015761118, iteration: 79131
loss: 0.986605703830719,grad_norm: 0.8665611003425435, iteration: 79132
loss: 0.9919591546058655,grad_norm: 0.9877712894548201, iteration: 79133
loss: 1.0123993158340454,grad_norm: 0.999999488554312, iteration: 79134
loss: 0.9875516295433044,grad_norm: 0.99999916944319, iteration: 79135
loss: 1.0729392766952515,grad_norm: 0.9467391849219057, iteration: 79136
loss: 0.9755042195320129,grad_norm: 0.8865748749263024, iteration: 79137
loss: 0.9827972650527954,grad_norm: 0.9999991224527788, iteration: 79138
loss: 0.9795655608177185,grad_norm: 0.9719033502631204, iteration: 79139
loss: 1.0334049463272095,grad_norm: 0.9999992341467406, iteration: 79140
loss: 1.0732795000076294,grad_norm: 0.9999998096946001, iteration: 79141
loss: 1.1316237449645996,grad_norm: 0.9999999108089517, iteration: 79142
loss: 0.9934104681015015,grad_norm: 0.7959949734843174, iteration: 79143
loss: 1.053196907043457,grad_norm: 0.999999328991274, iteration: 79144
loss: 1.0261507034301758,grad_norm: 0.8285162409263842, iteration: 79145
loss: 0.9919096827507019,grad_norm: 0.7347344982382729, iteration: 79146
loss: 0.9754733443260193,grad_norm: 0.8467655210827464, iteration: 79147
loss: 1.067124605178833,grad_norm: 0.9999996812085259, iteration: 79148
loss: 1.0638997554779053,grad_norm: 0.9999996191282675, iteration: 79149
loss: 1.0960993766784668,grad_norm: 0.9999990847801776, iteration: 79150
loss: 1.0167087316513062,grad_norm: 0.9999994752202359, iteration: 79151
loss: 0.9961460828781128,grad_norm: 0.9999991323909365, iteration: 79152
loss: 1.0504162311553955,grad_norm: 0.8490068413639235, iteration: 79153
loss: 0.9753097891807556,grad_norm: 0.9903372713084102, iteration: 79154
loss: 1.0240099430084229,grad_norm: 0.9187987379891336, iteration: 79155
loss: 0.9978529810905457,grad_norm: 0.8119407796021911, iteration: 79156
loss: 1.0196564197540283,grad_norm: 0.833314071571084, iteration: 79157
loss: 0.9832963347434998,grad_norm: 0.9999989742827273, iteration: 79158
loss: 0.9901160001754761,grad_norm: 0.9999993718012282, iteration: 79159
loss: 0.9769498109817505,grad_norm: 0.8864666350233125, iteration: 79160
loss: 0.976495087146759,grad_norm: 0.9593598885473013, iteration: 79161
loss: 1.010199785232544,grad_norm: 0.9327972582339332, iteration: 79162
loss: 0.9988672137260437,grad_norm: 0.9053724135722936, iteration: 79163
loss: 1.0491647720336914,grad_norm: 0.9999991243155162, iteration: 79164
loss: 1.0773903131484985,grad_norm: 0.9999993366135964, iteration: 79165
loss: 1.0013972520828247,grad_norm: 0.9999991451916159, iteration: 79166
loss: 1.0006392002105713,grad_norm: 0.9999992359773974, iteration: 79167
loss: 0.999183714389801,grad_norm: 0.875686333305961, iteration: 79168
loss: 0.9893858432769775,grad_norm: 0.9999990959820226, iteration: 79169
loss: 0.982877254486084,grad_norm: 0.9834856890995733, iteration: 79170
loss: 1.0242983102798462,grad_norm: 0.9439584628507989, iteration: 79171
loss: 0.9897411465644836,grad_norm: 0.9999992092968177, iteration: 79172
loss: 0.984992265701294,grad_norm: 0.8281915446388983, iteration: 79173
loss: 0.98899245262146,grad_norm: 0.8448014253368593, iteration: 79174
loss: 1.046951413154602,grad_norm: 0.9999992491592751, iteration: 79175
loss: 1.023804783821106,grad_norm: 0.9999993299556834, iteration: 79176
loss: 0.9724308252334595,grad_norm: 0.9999991281554436, iteration: 79177
loss: 1.0089930295944214,grad_norm: 0.9999994076649584, iteration: 79178
loss: 1.0295695066452026,grad_norm: 0.8656962388996501, iteration: 79179
loss: 1.105340600013733,grad_norm: 0.9999996218311673, iteration: 79180
loss: 1.0312341451644897,grad_norm: 0.9070013058875052, iteration: 79181
loss: 1.0207477807998657,grad_norm: 0.9875574673093358, iteration: 79182
loss: 1.0013091564178467,grad_norm: 0.9392512015195149, iteration: 79183
loss: 1.0522974729537964,grad_norm: 0.9999997083921054, iteration: 79184
loss: 1.0033354759216309,grad_norm: 0.9999993436322765, iteration: 79185
loss: 1.0284240245819092,grad_norm: 0.8066750394262634, iteration: 79186
loss: 0.9915124773979187,grad_norm: 0.9999992078267663, iteration: 79187
loss: 1.0876057147979736,grad_norm: 0.9999994162755294, iteration: 79188
loss: 0.9857850670814514,grad_norm: 0.8127363962302715, iteration: 79189
loss: 1.049588918685913,grad_norm: 0.9999990144045388, iteration: 79190
loss: 0.9982596039772034,grad_norm: 0.8477456693103811, iteration: 79191
loss: 0.985433042049408,grad_norm: 0.9285157538730054, iteration: 79192
loss: 0.9891643524169922,grad_norm: 0.8829822801986631, iteration: 79193
loss: 1.0030385255813599,grad_norm: 0.8138188086989266, iteration: 79194
loss: 1.0156234502792358,grad_norm: 0.9999994925014081, iteration: 79195
loss: 1.0311421155929565,grad_norm: 0.9269768611195558, iteration: 79196
loss: 1.0158488750457764,grad_norm: 0.9999990539894387, iteration: 79197
loss: 0.9984766244888306,grad_norm: 0.9599685281870022, iteration: 79198
loss: 0.9973645806312561,grad_norm: 0.9999992566121507, iteration: 79199
loss: 1.015519618988037,grad_norm: 0.8940919546158931, iteration: 79200
loss: 0.9958368539810181,grad_norm: 0.9999992450938483, iteration: 79201
loss: 1.0204542875289917,grad_norm: 1.0000000345915312, iteration: 79202
loss: 0.958986759185791,grad_norm: 0.9999992109232987, iteration: 79203
loss: 0.9808844923973083,grad_norm: 0.9999989467494155, iteration: 79204
loss: 1.0047606229782104,grad_norm: 0.9444601005070814, iteration: 79205
loss: 1.0454660654067993,grad_norm: 0.9999995277983348, iteration: 79206
loss: 1.02456533908844,grad_norm: 0.9999992176520311, iteration: 79207
loss: 1.0374245643615723,grad_norm: 0.9999993431403796, iteration: 79208
loss: 1.0607941150665283,grad_norm: 0.8757564848165027, iteration: 79209
loss: 1.0133734941482544,grad_norm: 0.8556461653249036, iteration: 79210
loss: 1.1225718259811401,grad_norm: 0.9999997643731952, iteration: 79211
loss: 1.0335408449172974,grad_norm: 0.9999990983670408, iteration: 79212
loss: 1.0220826864242554,grad_norm: 0.9530076802635215, iteration: 79213
loss: 0.9877801537513733,grad_norm: 0.7988728900676754, iteration: 79214
loss: 1.013695478439331,grad_norm: 0.9999991881052146, iteration: 79215
loss: 0.9832473993301392,grad_norm: 0.8607013252188916, iteration: 79216
loss: 0.9961676597595215,grad_norm: 0.8833236202545957, iteration: 79217
loss: 0.987488865852356,grad_norm: 0.9999994486782532, iteration: 79218
loss: 1.001254916191101,grad_norm: 0.9999995829645474, iteration: 79219
loss: 0.9688518643379211,grad_norm: 0.9383580849721924, iteration: 79220
loss: 1.0129005908966064,grad_norm: 0.999999854492315, iteration: 79221
loss: 1.0685781240463257,grad_norm: 0.9999994311841541, iteration: 79222
loss: 1.0003786087036133,grad_norm: 0.999998948147481, iteration: 79223
loss: 0.9768393039703369,grad_norm: 0.99999911436625, iteration: 79224
loss: 1.0043526887893677,grad_norm: 0.8509869503116666, iteration: 79225
loss: 1.053494930267334,grad_norm: 0.9999999290419295, iteration: 79226
loss: 0.9893468022346497,grad_norm: 0.9351391573866459, iteration: 79227
loss: 1.153139352798462,grad_norm: 0.9844206649676631, iteration: 79228
loss: 1.03728449344635,grad_norm: 0.920837221181657, iteration: 79229
loss: 1.0131117105484009,grad_norm: 0.9999991497090197, iteration: 79230
loss: 1.053476333618164,grad_norm: 0.9354544968796048, iteration: 79231
loss: 1.0102945566177368,grad_norm: 0.8704340247976894, iteration: 79232
loss: 1.0268863439559937,grad_norm: 0.7941222800911419, iteration: 79233
loss: 1.016613483428955,grad_norm: 0.9999994875559093, iteration: 79234
loss: 1.0175526142120361,grad_norm: 0.8320284347569048, iteration: 79235
loss: 1.0136393308639526,grad_norm: 0.9932653361638224, iteration: 79236
loss: 1.0403172969818115,grad_norm: 0.8812192012089765, iteration: 79237
loss: 1.0941729545593262,grad_norm: 0.9999999589647526, iteration: 79238
loss: 1.013702630996704,grad_norm: 0.8922761676368521, iteration: 79239
loss: 1.0089706182479858,grad_norm: 0.8466014969408082, iteration: 79240
loss: 0.9994780421257019,grad_norm: 0.999999099549523, iteration: 79241
loss: 1.0436217784881592,grad_norm: 0.9999990687226694, iteration: 79242
loss: 1.1940313577651978,grad_norm: 0.9999992116772707, iteration: 79243
loss: 0.9967421889305115,grad_norm: 0.9051392126166906, iteration: 79244
loss: 1.0229045152664185,grad_norm: 0.8427885501024898, iteration: 79245
loss: 1.4819682836532593,grad_norm: 0.9999998122594744, iteration: 79246
loss: 0.972507119178772,grad_norm: 0.9475690406779065, iteration: 79247
loss: 1.012764573097229,grad_norm: 0.9999990682488431, iteration: 79248
loss: 1.1622811555862427,grad_norm: 0.999999220737212, iteration: 79249
loss: 1.0027447938919067,grad_norm: 0.9999999221805222, iteration: 79250
loss: 0.9917309880256653,grad_norm: 0.9999995200815887, iteration: 79251
loss: 1.0349974632263184,grad_norm: 0.9051333110265625, iteration: 79252
loss: 0.9891816973686218,grad_norm: 0.8824494484074993, iteration: 79253
loss: 0.9707481861114502,grad_norm: 0.93659460628227, iteration: 79254
loss: 1.1693607568740845,grad_norm: 0.999999044733332, iteration: 79255
loss: 1.0067150592803955,grad_norm: 0.9973169179538857, iteration: 79256
loss: 1.0030500888824463,grad_norm: 0.8137340331243377, iteration: 79257
loss: 0.9836807250976562,grad_norm: 0.9999993309757644, iteration: 79258
loss: 1.023389220237732,grad_norm: 0.8824980559462113, iteration: 79259
loss: 1.052540898323059,grad_norm: 0.8610165704498381, iteration: 79260
loss: 1.052328109741211,grad_norm: 0.9999990346084736, iteration: 79261
loss: 1.1030161380767822,grad_norm: 0.9999992061498211, iteration: 79262
loss: 1.0906106233596802,grad_norm: 0.999999004445393, iteration: 79263
loss: 0.9853758811950684,grad_norm: 0.8299173690395947, iteration: 79264
loss: 1.2915282249450684,grad_norm: 0.9999993571132668, iteration: 79265
loss: 0.9976133108139038,grad_norm: 0.7836254609690252, iteration: 79266
loss: 0.9874462485313416,grad_norm: 0.7974091355684735, iteration: 79267
loss: 0.988264799118042,grad_norm: 0.9843093676820865, iteration: 79268
loss: 1.0075695514678955,grad_norm: 0.8929801362534276, iteration: 79269
loss: 1.3894785642623901,grad_norm: 0.999999638994421, iteration: 79270
loss: 1.0112406015396118,grad_norm: 0.9999990669007232, iteration: 79271
loss: 0.9722573161125183,grad_norm: 0.9706845785104502, iteration: 79272
loss: 0.9847661256790161,grad_norm: 0.9614656928976035, iteration: 79273
loss: 1.0115822553634644,grad_norm: 0.9999991964966101, iteration: 79274
loss: 0.9615939855575562,grad_norm: 0.7886242494843132, iteration: 79275
loss: 1.0771821737289429,grad_norm: 0.9999995206292754, iteration: 79276
loss: 1.0212430953979492,grad_norm: 0.99999922027727, iteration: 79277
loss: 1.0099468231201172,grad_norm: 0.9597938738066248, iteration: 79278
loss: 0.995051920413971,grad_norm: 0.9999990666871962, iteration: 79279
loss: 1.020748496055603,grad_norm: 0.9999990631882375, iteration: 79280
loss: 0.9818163514137268,grad_norm: 0.8893870391744871, iteration: 79281
loss: 1.0357791185379028,grad_norm: 0.9999991138370615, iteration: 79282
loss: 1.1243069171905518,grad_norm: 0.9999993113617697, iteration: 79283
loss: 1.0022730827331543,grad_norm: 0.9820391549498764, iteration: 79284
loss: 1.0666371583938599,grad_norm: 0.9999990275392464, iteration: 79285
loss: 1.052648663520813,grad_norm: 0.9567465477553687, iteration: 79286
loss: 1.0127918720245361,grad_norm: 0.9999990784388368, iteration: 79287
loss: 1.0743523836135864,grad_norm: 0.9999994600374433, iteration: 79288
loss: 1.0256165266036987,grad_norm: 0.9443171671096222, iteration: 79289
loss: 1.005704402923584,grad_norm: 0.9165258802500372, iteration: 79290
loss: 1.0058941841125488,grad_norm: 0.9999991248485672, iteration: 79291
loss: 1.0050561428070068,grad_norm: 0.9685777121254479, iteration: 79292
loss: 1.109276533126831,grad_norm: 0.9451381948902317, iteration: 79293
loss: 0.9957009553909302,grad_norm: 0.8855236335020167, iteration: 79294
loss: 1.1011526584625244,grad_norm: 0.999999582559061, iteration: 79295
loss: 1.049440622329712,grad_norm: 0.9067186679427179, iteration: 79296
loss: 1.0389010906219482,grad_norm: 0.9999989496769918, iteration: 79297
loss: 1.0122467279434204,grad_norm: 0.9253315676125011, iteration: 79298
loss: 1.024113416671753,grad_norm: 0.9423860792716497, iteration: 79299
loss: 0.9843975901603699,grad_norm: 0.9999994934077039, iteration: 79300
loss: 0.9957893490791321,grad_norm: 0.7876445195693655, iteration: 79301
loss: 1.0295623540878296,grad_norm: 0.9999990677811784, iteration: 79302
loss: 1.0121235847473145,grad_norm: 0.9999993343205493, iteration: 79303
loss: 1.0479766130447388,grad_norm: 0.9999999762385777, iteration: 79304
loss: 1.0476737022399902,grad_norm: 0.9999991822973674, iteration: 79305
loss: 1.009270191192627,grad_norm: 0.8564734583981334, iteration: 79306
loss: 1.0756983757019043,grad_norm: 0.9999991777599472, iteration: 79307
loss: 1.0402617454528809,grad_norm: 0.9999993411000304, iteration: 79308
loss: 1.0625039339065552,grad_norm: 0.9999997056897851, iteration: 79309
loss: 1.0031177997589111,grad_norm: 0.9999994591727076, iteration: 79310
loss: 1.0118423700332642,grad_norm: 0.9679180265164928, iteration: 79311
loss: 1.0323759317398071,grad_norm: 0.9258003115803566, iteration: 79312
loss: 0.9925708174705505,grad_norm: 0.7623468547734014, iteration: 79313
loss: 1.0383650064468384,grad_norm: 0.8491670575414976, iteration: 79314
loss: 0.9710484147071838,grad_norm: 0.8395335893767978, iteration: 79315
loss: 0.9777713418006897,grad_norm: 0.8401579848030271, iteration: 79316
loss: 1.0774486064910889,grad_norm: 0.89955410959988, iteration: 79317
loss: 1.0357812643051147,grad_norm: 0.9999993040200087, iteration: 79318
loss: 1.0193759202957153,grad_norm: 0.9999993099636127, iteration: 79319
loss: 1.005768060684204,grad_norm: 0.9792800391180729, iteration: 79320
loss: 1.0151537656784058,grad_norm: 0.9938849260683285, iteration: 79321
loss: 1.04105544090271,grad_norm: 0.833462479857363, iteration: 79322
loss: 1.025563359260559,grad_norm: 0.8247997109284937, iteration: 79323
loss: 0.997356116771698,grad_norm: 0.9999994282851925, iteration: 79324
loss: 1.0095603466033936,grad_norm: 0.9999991310462498, iteration: 79325
loss: 1.0227314233779907,grad_norm: 0.9651447805564437, iteration: 79326
loss: 1.074312448501587,grad_norm: 0.9999990389923162, iteration: 79327
loss: 1.0378464460372925,grad_norm: 0.8965127814098515, iteration: 79328
loss: 1.0551279783248901,grad_norm: 0.999999243915948, iteration: 79329
loss: 0.9966244697570801,grad_norm: 0.9999991235023464, iteration: 79330
loss: 1.081594705581665,grad_norm: 0.9999990684106392, iteration: 79331
loss: 0.9996846914291382,grad_norm: 0.9999990575082162, iteration: 79332
loss: 1.0295627117156982,grad_norm: 0.9999994965163518, iteration: 79333
loss: 1.002181053161621,grad_norm: 0.9578905148026459, iteration: 79334
loss: 0.9673388004302979,grad_norm: 0.9999996895584792, iteration: 79335
loss: 1.0015021562576294,grad_norm: 0.999999264132867, iteration: 79336
loss: 0.9672873616218567,grad_norm: 0.9999991427041607, iteration: 79337
loss: 1.005297303199768,grad_norm: 0.959497466703642, iteration: 79338
loss: 1.0052467584609985,grad_norm: 0.7918674960806339, iteration: 79339
loss: 0.9868901968002319,grad_norm: 0.9999993004986869, iteration: 79340
loss: 1.0296093225479126,grad_norm: 0.9999993631411253, iteration: 79341
loss: 1.039984941482544,grad_norm: 0.813531640801562, iteration: 79342
loss: 1.017822027206421,grad_norm: 0.8489573167562995, iteration: 79343
loss: 0.980888843536377,grad_norm: 0.895619686291735, iteration: 79344
loss: 1.0008589029312134,grad_norm: 0.8568313248564026, iteration: 79345
loss: 1.008707880973816,grad_norm: 0.9999992934994438, iteration: 79346
loss: 1.0228900909423828,grad_norm: 0.9999990991313583, iteration: 79347
loss: 1.0013128519058228,grad_norm: 0.9999991608310986, iteration: 79348
loss: 0.993260383605957,grad_norm: 0.8949229191519811, iteration: 79349
loss: 0.9697802066802979,grad_norm: 0.9999991343215439, iteration: 79350
loss: 1.0051971673965454,grad_norm: 0.9753923490821619, iteration: 79351
loss: 1.0484580993652344,grad_norm: 0.9999991495773859, iteration: 79352
loss: 1.0274182558059692,grad_norm: 0.9129544147354541, iteration: 79353
loss: 1.0321085453033447,grad_norm: 0.8744533619803745, iteration: 79354
loss: 1.0057392120361328,grad_norm: 0.9726196487610966, iteration: 79355
loss: 0.9834538698196411,grad_norm: 0.9196653908447009, iteration: 79356
loss: 1.032588243484497,grad_norm: 0.999999214396734, iteration: 79357
loss: 1.0168519020080566,grad_norm: 0.9999995872468722, iteration: 79358
loss: 1.0601757764816284,grad_norm: 0.9026174836436212, iteration: 79359
loss: 0.9884536266326904,grad_norm: 0.9689055435740839, iteration: 79360
loss: 0.9950366616249084,grad_norm: 0.957388023370886, iteration: 79361
loss: 0.9917156100273132,grad_norm: 0.9999998602932071, iteration: 79362
loss: 1.2122972011566162,grad_norm: 0.99999953687487, iteration: 79363
loss: 1.032395601272583,grad_norm: 0.932136998213761, iteration: 79364
loss: 1.0172061920166016,grad_norm: 0.9999991459771732, iteration: 79365
loss: 0.9937704205513,grad_norm: 0.9356009476726407, iteration: 79366
loss: 1.0101854801177979,grad_norm: 0.8805045629400836, iteration: 79367
loss: 0.9917776584625244,grad_norm: 0.9736792294295491, iteration: 79368
loss: 1.0153437852859497,grad_norm: 0.9999995800021179, iteration: 79369
loss: 0.9822415709495544,grad_norm: 0.9702127391208359, iteration: 79370
loss: 1.0029869079589844,grad_norm: 0.9999991174195478, iteration: 79371
loss: 0.9727891087532043,grad_norm: 0.999999823539382, iteration: 79372
loss: 0.9982781410217285,grad_norm: 0.9999992741721044, iteration: 79373
loss: 1.027614712715149,grad_norm: 0.9999993115133774, iteration: 79374
loss: 1.063163161277771,grad_norm: 0.9999990421548043, iteration: 79375
loss: 0.9898676872253418,grad_norm: 0.9999991009193833, iteration: 79376
loss: 1.0492124557495117,grad_norm: 0.9999991990909276, iteration: 79377
loss: 1.055793285369873,grad_norm: 0.9999991807099167, iteration: 79378
loss: 0.984433114528656,grad_norm: 0.8158672287416436, iteration: 79379
loss: 1.0205565690994263,grad_norm: 0.9999990918570708, iteration: 79380
loss: 1.0171773433685303,grad_norm: 0.9999991545776498, iteration: 79381
loss: 0.9545600414276123,grad_norm: 0.9999991231784147, iteration: 79382
loss: 0.9683074951171875,grad_norm: 0.8078779366017513, iteration: 79383
loss: 0.9966256022453308,grad_norm: 0.9423932490303343, iteration: 79384
loss: 1.0149742364883423,grad_norm: 0.9831722771033263, iteration: 79385
loss: 1.031662940979004,grad_norm: 0.9999990679153984, iteration: 79386
loss: 1.0250065326690674,grad_norm: 0.9999992095606959, iteration: 79387
loss: 1.0476280450820923,grad_norm: 0.9999999237958379, iteration: 79388
loss: 1.0248582363128662,grad_norm: 0.9999991742298404, iteration: 79389
loss: 1.0074657201766968,grad_norm: 0.9701134763440862, iteration: 79390
loss: 1.0514768362045288,grad_norm: 0.9999992892265127, iteration: 79391
loss: 1.022679090499878,grad_norm: 0.9927882676630453, iteration: 79392
loss: 0.9870632290840149,grad_norm: 0.9120514702020567, iteration: 79393
loss: 0.9948840737342834,grad_norm: 0.9999999533138688, iteration: 79394
loss: 1.0515005588531494,grad_norm: 0.8547934504675313, iteration: 79395
loss: 1.0267077684402466,grad_norm: 0.9999993292846455, iteration: 79396
loss: 1.0429338216781616,grad_norm: 0.9999992601442578, iteration: 79397
loss: 1.0510114431381226,grad_norm: 0.9999990650502604, iteration: 79398
loss: 0.9965664148330688,grad_norm: 0.9999999106127911, iteration: 79399
loss: 1.0225486755371094,grad_norm: 0.9999992805498085, iteration: 79400
loss: 1.0267902612686157,grad_norm: 0.9999991547102166, iteration: 79401
loss: 1.0271800756454468,grad_norm: 0.9999990984438087, iteration: 79402
loss: 1.2444639205932617,grad_norm: 0.9999996920826085, iteration: 79403
loss: 1.0036423206329346,grad_norm: 0.9497668434070121, iteration: 79404
loss: 1.128171682357788,grad_norm: 0.9999996987873591, iteration: 79405
loss: 1.0072283744812012,grad_norm: 0.9070210313377394, iteration: 79406
loss: 1.2212884426116943,grad_norm: 0.9999998032270471, iteration: 79407
loss: 1.0097897052764893,grad_norm: 0.9999991642154228, iteration: 79408
loss: 0.9884376525878906,grad_norm: 0.9999998324721285, iteration: 79409
loss: 1.1242188215255737,grad_norm: 0.9999996497543554, iteration: 79410
loss: 0.9876658916473389,grad_norm: 0.8692484142850398, iteration: 79411
loss: 0.9689488410949707,grad_norm: 0.9240235576881726, iteration: 79412
loss: 1.0638054609298706,grad_norm: 0.9999993049509011, iteration: 79413
loss: 1.0346028804779053,grad_norm: 0.9762026758671216, iteration: 79414
loss: 1.080682635307312,grad_norm: 0.9999997301562175, iteration: 79415
loss: 1.0143218040466309,grad_norm: 0.9999991393010387, iteration: 79416
loss: 0.991361141204834,grad_norm: 0.7678583396243616, iteration: 79417
loss: 0.972970187664032,grad_norm: 0.9999993891742961, iteration: 79418
loss: 1.156837821006775,grad_norm: 0.9999996266365706, iteration: 79419
loss: 1.0317493677139282,grad_norm: 0.9382522131983182, iteration: 79420
loss: 1.1529293060302734,grad_norm: 0.9999998789555371, iteration: 79421
loss: 1.1514936685562134,grad_norm: 0.9999997208564523, iteration: 79422
loss: 1.0404835939407349,grad_norm: 0.835511064287327, iteration: 79423
loss: 0.9497671723365784,grad_norm: 0.9353781268392696, iteration: 79424
loss: 1.0470718145370483,grad_norm: 0.9999997173180324, iteration: 79425
loss: 0.9767038226127625,grad_norm: 0.969517066835698, iteration: 79426
loss: 1.1036434173583984,grad_norm: 0.9999993990546411, iteration: 79427
loss: 1.0682929754257202,grad_norm: 0.9999997859488069, iteration: 79428
loss: 1.0656654834747314,grad_norm: 0.9627001566278851, iteration: 79429
loss: 1.093645453453064,grad_norm: 0.9999998065192698, iteration: 79430
loss: 0.9779818058013916,grad_norm: 0.9999991913473202, iteration: 79431
loss: 1.009804606437683,grad_norm: 0.9585413092799634, iteration: 79432
loss: 0.9926022887229919,grad_norm: 0.9999992871213819, iteration: 79433
loss: 1.0286569595336914,grad_norm: 0.9945256810429838, iteration: 79434
loss: 1.0082447528839111,grad_norm: 0.9999990509246813, iteration: 79435
loss: 0.9946253895759583,grad_norm: 0.9999991712498644, iteration: 79436
loss: 1.0424598455429077,grad_norm: 0.9999990996709778, iteration: 79437
loss: 1.0111476182937622,grad_norm: 0.999999134037008, iteration: 79438
loss: 1.1042808294296265,grad_norm: 0.9999991594674121, iteration: 79439
loss: 0.9789049029350281,grad_norm: 0.9999990561133869, iteration: 79440
loss: 0.9962977170944214,grad_norm: 0.77858402055869, iteration: 79441
loss: 0.9972465634346008,grad_norm: 0.9346107882076728, iteration: 79442
loss: 1.035478949546814,grad_norm: 0.9027163619466236, iteration: 79443
loss: 1.0286827087402344,grad_norm: 0.9999991432663249, iteration: 79444
loss: 0.9850407242774963,grad_norm: 0.999999277960324, iteration: 79445
loss: 1.0412049293518066,grad_norm: 0.9363270633063833, iteration: 79446
loss: 0.9948182702064514,grad_norm: 0.9999989365100119, iteration: 79447
loss: 1.050277590751648,grad_norm: 0.8988696316053402, iteration: 79448
loss: 1.023198127746582,grad_norm: 0.999999778852261, iteration: 79449
loss: 1.012899398803711,grad_norm: 0.9999995133167794, iteration: 79450
loss: 1.0066219568252563,grad_norm: 0.9999997266273571, iteration: 79451
loss: 1.118010401725769,grad_norm: 0.9999992266180264, iteration: 79452
loss: 0.9458882212638855,grad_norm: 0.9934507942782317, iteration: 79453
loss: 1.0300902128219604,grad_norm: 0.9999990269114468, iteration: 79454
loss: 1.0192530155181885,grad_norm: 0.849967316380156, iteration: 79455
loss: 1.0298571586608887,grad_norm: 0.999999504415662, iteration: 79456
loss: 0.9850825667381287,grad_norm: 0.9565075283724661, iteration: 79457
loss: 1.018057942390442,grad_norm: 0.8443738971302363, iteration: 79458
loss: 1.0062259435653687,grad_norm: 0.8932462835570424, iteration: 79459
loss: 0.9904685020446777,grad_norm: 0.9999992686205763, iteration: 79460
loss: 1.0289381742477417,grad_norm: 0.9999993028376293, iteration: 79461
loss: 0.9741237163543701,grad_norm: 0.8903888859731292, iteration: 79462
loss: 1.0338575839996338,grad_norm: 0.999999596115099, iteration: 79463
loss: 1.0285375118255615,grad_norm: 0.9353036517637355, iteration: 79464
loss: 1.022446870803833,grad_norm: 0.8047894902464756, iteration: 79465
loss: 1.0368692874908447,grad_norm: 0.999999315685333, iteration: 79466
loss: 0.9977843165397644,grad_norm: 0.9574983418322509, iteration: 79467
loss: 1.004402995109558,grad_norm: 0.8534275376615187, iteration: 79468
loss: 1.0324522256851196,grad_norm: 0.9999992214911019, iteration: 79469
loss: 1.006052017211914,grad_norm: 0.8508909960942688, iteration: 79470
loss: 1.0253711938858032,grad_norm: 0.9999996020709275, iteration: 79471
loss: 1.0215407609939575,grad_norm: 0.9999990203571795, iteration: 79472
loss: 1.0316442251205444,grad_norm: 0.9856046183328568, iteration: 79473
loss: 1.0394850969314575,grad_norm: 0.9999997690453937, iteration: 79474
loss: 0.9643712043762207,grad_norm: 0.9999990889355063, iteration: 79475
loss: 1.0180083513259888,grad_norm: 0.9599750919881819, iteration: 79476
loss: 1.0732556581497192,grad_norm: 0.9999993373685997, iteration: 79477
loss: 0.989203929901123,grad_norm: 0.8232196339527406, iteration: 79478
loss: 1.0370359420776367,grad_norm: 0.9999990267761099, iteration: 79479
loss: 1.006958246231079,grad_norm: 0.9354047913654583, iteration: 79480
loss: 0.9903225898742676,grad_norm: 0.9999991547415694, iteration: 79481
loss: 1.01908540725708,grad_norm: 0.9999996351943974, iteration: 79482
loss: 0.9838414788246155,grad_norm: 0.8691358138019716, iteration: 79483
loss: 1.0162063837051392,grad_norm: 0.9999993621700024, iteration: 79484
loss: 1.058630108833313,grad_norm: 0.847751641552202, iteration: 79485
loss: 1.0393649339675903,grad_norm: 0.9999992266243339, iteration: 79486
loss: 1.0059375762939453,grad_norm: 0.8828524805875012, iteration: 79487
loss: 1.110754370689392,grad_norm: 0.9999990236521477, iteration: 79488
loss: 1.0671849250793457,grad_norm: 0.9999990031614269, iteration: 79489
loss: 1.0870741605758667,grad_norm: 0.9999991823519604, iteration: 79490
loss: 1.0446679592132568,grad_norm: 0.8620421916776478, iteration: 79491
loss: 0.9341831803321838,grad_norm: 0.8945128119954856, iteration: 79492
loss: 1.0627903938293457,grad_norm: 0.9999991254327854, iteration: 79493
loss: 0.9926672577857971,grad_norm: 0.9999992211596357, iteration: 79494
loss: 0.9980794191360474,grad_norm: 0.8743592907811208, iteration: 79495
loss: 1.066870927810669,grad_norm: 0.9251453959772286, iteration: 79496
loss: 1.035143494606018,grad_norm: 0.9999995344381843, iteration: 79497
loss: 1.0607582330703735,grad_norm: 0.9999999635818722, iteration: 79498
loss: 1.0176693201065063,grad_norm: 0.9318019438685796, iteration: 79499
loss: 0.9794725775718689,grad_norm: 0.9447762135820222, iteration: 79500
loss: 1.0302505493164062,grad_norm: 1.00000003622466, iteration: 79501
loss: 0.9902220368385315,grad_norm: 0.9333960511650317, iteration: 79502
loss: 1.247783899307251,grad_norm: 0.9999999773929936, iteration: 79503
loss: 1.0878974199295044,grad_norm: 0.9294471095444092, iteration: 79504
loss: 1.282206416130066,grad_norm: 0.9999997849207594, iteration: 79505
loss: 1.0357635021209717,grad_norm: 0.9999991902045795, iteration: 79506
loss: 1.2517403364181519,grad_norm: 0.9999997018641197, iteration: 79507
loss: 1.4402151107788086,grad_norm: 1.0000000930576893, iteration: 79508
loss: 1.5107322931289673,grad_norm: 0.9999999236876069, iteration: 79509
loss: 2.045684337615967,grad_norm: 0.9999998249227577, iteration: 79510
loss: 1.1436208486557007,grad_norm: 0.9999999221577317, iteration: 79511
loss: 2.0534613132476807,grad_norm: 0.9999998109740037, iteration: 79512
loss: 1.4828953742980957,grad_norm: 0.9999999880504031, iteration: 79513
loss: 2.387467622756958,grad_norm: 1.0000000856869706, iteration: 79514
loss: 1.1878235340118408,grad_norm: 0.999999980208453, iteration: 79515
loss: 0.9955831170082092,grad_norm: 0.8153188045905898, iteration: 79516
loss: 1.4505866765975952,grad_norm: 0.9999996745512607, iteration: 79517
loss: 1.17984139919281,grad_norm: 0.9999998558962985, iteration: 79518
loss: 1.2173510789871216,grad_norm: 0.9999999030468413, iteration: 79519
loss: 1.213869333267212,grad_norm: 0.99999944929111, iteration: 79520
loss: 1.0479604005813599,grad_norm: 0.8958628976835655, iteration: 79521
loss: 1.037598729133606,grad_norm: 0.9999998843321207, iteration: 79522
loss: 1.0213266611099243,grad_norm: 0.9999996903784213, iteration: 79523
loss: 1.000317931175232,grad_norm: 0.9768262851529985, iteration: 79524
loss: 0.9681146144866943,grad_norm: 0.9999991809590119, iteration: 79525
loss: 0.9945680499076843,grad_norm: 0.9999993962994581, iteration: 79526
loss: 1.0085477828979492,grad_norm: 0.9999998623083967, iteration: 79527
loss: 1.0653419494628906,grad_norm: 0.9999992124497329, iteration: 79528
loss: 1.0604411363601685,grad_norm: 1.0000000422591968, iteration: 79529
loss: 1.1499327421188354,grad_norm: 0.9999994541776414, iteration: 79530
loss: 1.0584124326705933,grad_norm: 0.9999999789814574, iteration: 79531
loss: 1.0485514402389526,grad_norm: 0.9483496256867462, iteration: 79532
loss: 1.099868893623352,grad_norm: 0.99999993487721, iteration: 79533
loss: 0.973318874835968,grad_norm: 0.9999992853223897, iteration: 79534
loss: 1.1514427661895752,grad_norm: 0.9999999656488708, iteration: 79535
loss: 1.0595026016235352,grad_norm: 0.9999998264285214, iteration: 79536
loss: 1.118466854095459,grad_norm: 0.9999997618367769, iteration: 79537
loss: 1.2460200786590576,grad_norm: 0.9999999496647558, iteration: 79538
loss: 1.4229086637496948,grad_norm: 0.9999997134459118, iteration: 79539
loss: 1.2441209554672241,grad_norm: 0.9999998589122631, iteration: 79540
loss: 1.1037263870239258,grad_norm: 0.9999999414314505, iteration: 79541
loss: 1.0807230472564697,grad_norm: 0.9999991616703848, iteration: 79542
loss: 1.3900991678237915,grad_norm: 1.0000000338499049, iteration: 79543
loss: 1.067337989807129,grad_norm: 0.9999994751451511, iteration: 79544
loss: 1.3776599168777466,grad_norm: 0.9999999549466113, iteration: 79545
loss: 1.0770888328552246,grad_norm: 0.9999992778426355, iteration: 79546
loss: 1.0063786506652832,grad_norm: 0.9999991771111768, iteration: 79547
loss: 1.1273152828216553,grad_norm: 0.9999999396118999, iteration: 79548
loss: 1.2460535764694214,grad_norm: 1.0000000046611046, iteration: 79549
loss: 1.4191988706588745,grad_norm: 0.9999994612066109, iteration: 79550
loss: 1.0202933549880981,grad_norm: 0.9999994145548116, iteration: 79551
loss: 1.1271024942398071,grad_norm: 0.9999997194816872, iteration: 79552
loss: 1.0678635835647583,grad_norm: 0.9999995985481764, iteration: 79553
loss: 0.988230288028717,grad_norm: 0.9254937458558041, iteration: 79554
loss: 1.049784541130066,grad_norm: 0.8212549114299595, iteration: 79555
loss: 1.129093050956726,grad_norm: 0.9999996519561821, iteration: 79556
loss: 1.0547219514846802,grad_norm: 0.999999878323244, iteration: 79557
loss: 0.9946541786193848,grad_norm: 0.9999989881605793, iteration: 79558
loss: 1.1813455820083618,grad_norm: 0.9999997293502322, iteration: 79559
loss: 0.9970118999481201,grad_norm: 0.9999991585035592, iteration: 79560
loss: 1.1509383916854858,grad_norm: 0.9999998939634084, iteration: 79561
loss: 1.1042675971984863,grad_norm: 0.9999997195071596, iteration: 79562
loss: 1.009514570236206,grad_norm: 0.9732668028008054, iteration: 79563
loss: 0.985621452331543,grad_norm: 0.9999989562940855, iteration: 79564
loss: 1.0384827852249146,grad_norm: 0.9999999233010894, iteration: 79565
loss: 1.2570992708206177,grad_norm: 0.9999999349052295, iteration: 79566
loss: 1.1520583629608154,grad_norm: 0.9999994283119034, iteration: 79567
loss: 1.183025598526001,grad_norm: 0.999999667137301, iteration: 79568
loss: 1.3710968494415283,grad_norm: 0.9999999164793429, iteration: 79569
loss: 0.9914067387580872,grad_norm: 0.9999990641076086, iteration: 79570
loss: 1.155615210533142,grad_norm: 0.9999995985081649, iteration: 79571
loss: 1.038283109664917,grad_norm: 0.999999704481297, iteration: 79572
loss: 1.115790843963623,grad_norm: 0.9999998220865006, iteration: 79573
loss: 1.1921391487121582,grad_norm: 0.9999995848197317, iteration: 79574
loss: 1.694647192955017,grad_norm: 0.9999997988010134, iteration: 79575
loss: 1.0823099613189697,grad_norm: 0.9952573570631017, iteration: 79576
loss: 1.1616480350494385,grad_norm: 0.9999996160959271, iteration: 79577
loss: 1.1854710578918457,grad_norm: 0.9999991242865394, iteration: 79578
loss: 1.1879557371139526,grad_norm: 0.9999999034112932, iteration: 79579
loss: 1.0864146947860718,grad_norm: 0.9999998320839016, iteration: 79580
loss: 1.0164672136306763,grad_norm: 0.9999991460393823, iteration: 79581
loss: 1.007546067237854,grad_norm: 0.9999992435064678, iteration: 79582
loss: 0.9985360503196716,grad_norm: 0.9999998847588191, iteration: 79583
loss: 1.1054246425628662,grad_norm: 0.999999819477631, iteration: 79584
loss: 1.073339581489563,grad_norm: 0.9999992313507698, iteration: 79585
loss: 1.0764386653900146,grad_norm: 0.9999993008721437, iteration: 79586
loss: 1.1488908529281616,grad_norm: 0.9999994246422237, iteration: 79587
loss: 1.0408625602722168,grad_norm: 0.9799642124757031, iteration: 79588
loss: 1.1188781261444092,grad_norm: 0.9999998380285917, iteration: 79589
loss: 1.1654006242752075,grad_norm: 0.999999787937478, iteration: 79590
loss: 1.2358909845352173,grad_norm: 0.999999621388888, iteration: 79591
loss: 1.3035656213760376,grad_norm: 0.9999994421933385, iteration: 79592
loss: 1.028162956237793,grad_norm: 0.9999998429042416, iteration: 79593
loss: 0.968744695186615,grad_norm: 0.9999989568361678, iteration: 79594
loss: 0.9875867366790771,grad_norm: 0.9999994619405488, iteration: 79595
loss: 1.1441391706466675,grad_norm: 0.9999996351954652, iteration: 79596
loss: 1.2033367156982422,grad_norm: 0.999999295962963, iteration: 79597
loss: 1.0973917245864868,grad_norm: 0.9999998189554401, iteration: 79598
loss: 1.028387188911438,grad_norm: 0.9999990374227344, iteration: 79599
loss: 1.0894558429718018,grad_norm: 0.9999995493452531, iteration: 79600
loss: 1.1449719667434692,grad_norm: 0.9999998182994244, iteration: 79601
loss: 1.1034806966781616,grad_norm: 0.9999998157241192, iteration: 79602
loss: 1.1037214994430542,grad_norm: 0.9999992621160326, iteration: 79603
loss: 1.241403341293335,grad_norm: 0.9999997935774578, iteration: 79604
loss: 1.0395244359970093,grad_norm: 0.9851372311302184, iteration: 79605
loss: 1.101759433746338,grad_norm: 0.9999996817004474, iteration: 79606
loss: 1.0160446166992188,grad_norm: 0.9999997418011792, iteration: 79607
loss: 1.0931377410888672,grad_norm: 0.9999995361519416, iteration: 79608
loss: 1.0944336652755737,grad_norm: 0.9999998025167394, iteration: 79609
loss: 0.9956898093223572,grad_norm: 0.9842937330267921, iteration: 79610
loss: 1.0601915121078491,grad_norm: 0.99999920713293, iteration: 79611
loss: 1.0508631467819214,grad_norm: 0.9999997837481973, iteration: 79612
loss: 1.1512686014175415,grad_norm: 0.9999991585481981, iteration: 79613
loss: 1.0000563859939575,grad_norm: 0.8605248406587721, iteration: 79614
loss: 1.0218164920806885,grad_norm: 0.9566106848333761, iteration: 79615
loss: 1.0637222528457642,grad_norm: 0.9999994476681986, iteration: 79616
loss: 1.0040762424468994,grad_norm: 0.9999993426118317, iteration: 79617
loss: 1.0723121166229248,grad_norm: 0.9999998098491956, iteration: 79618
loss: 1.0168471336364746,grad_norm: 0.9999998121436569, iteration: 79619
loss: 1.207921028137207,grad_norm: 0.9999995298971827, iteration: 79620
loss: 1.539671778678894,grad_norm: 0.9999997933746315, iteration: 79621
loss: 1.1162611246109009,grad_norm: 0.9999991734961643, iteration: 79622
loss: 0.9803010821342468,grad_norm: 0.9988455523181254, iteration: 79623
loss: 0.9751011729240417,grad_norm: 0.9999994732442551, iteration: 79624
loss: 1.267616629600525,grad_norm: 0.9999996741986232, iteration: 79625
loss: 1.0867129564285278,grad_norm: 0.9999994089330804, iteration: 79626
loss: 1.2246352434158325,grad_norm: 0.9999994972310742, iteration: 79627
loss: 1.042537808418274,grad_norm: 0.9999993470565173, iteration: 79628
loss: 1.0866674184799194,grad_norm: 0.9999993904572103, iteration: 79629
loss: 1.1644257307052612,grad_norm: 0.999999364561063, iteration: 79630
loss: 1.0265101194381714,grad_norm: 0.9999992933934952, iteration: 79631
loss: 1.0806217193603516,grad_norm: 0.999999302563758, iteration: 79632
loss: 0.9951311349868774,grad_norm: 0.9999993474887127, iteration: 79633
loss: 1.0782803297042847,grad_norm: 0.9999992790881922, iteration: 79634
loss: 1.1672711372375488,grad_norm: 0.9999994940103003, iteration: 79635
loss: 1.1108382940292358,grad_norm: 0.9999997232623695, iteration: 79636
loss: 1.3395389318466187,grad_norm: 0.9999997945137586, iteration: 79637
loss: 1.0365182161331177,grad_norm: 0.9999991081566315, iteration: 79638
loss: 1.200339436531067,grad_norm: 0.9999993616891136, iteration: 79639
loss: 1.1146008968353271,grad_norm: 0.9999993389194871, iteration: 79640
loss: 1.0225995779037476,grad_norm: 0.9999990488304007, iteration: 79641
loss: 1.116296410560608,grad_norm: 0.9999994725211786, iteration: 79642
loss: 1.205426812171936,grad_norm: 0.9999996314690857, iteration: 79643
loss: 1.1176098585128784,grad_norm: 0.9999992524720152, iteration: 79644
loss: 1.111777424812317,grad_norm: 0.8731294289064498, iteration: 79645
loss: 1.035955548286438,grad_norm: 0.9999998290256091, iteration: 79646
loss: 1.2772538661956787,grad_norm: 1.000000058485474, iteration: 79647
loss: 1.2035863399505615,grad_norm: 0.9999996842342357, iteration: 79648
loss: 1.057927131652832,grad_norm: 0.9999993585827147, iteration: 79649
loss: 1.231172800064087,grad_norm: 0.9999999664293866, iteration: 79650
loss: 1.164125680923462,grad_norm: 0.9999991546499131, iteration: 79651
loss: 1.0937201976776123,grad_norm: 0.9999992326454678, iteration: 79652
loss: 1.155969262123108,grad_norm: 0.9999994696975537, iteration: 79653
loss: 1.0324753522872925,grad_norm: 0.999999708660681, iteration: 79654
loss: 1.183638095855713,grad_norm: 0.9999996580709153, iteration: 79655
loss: 1.1534615755081177,grad_norm: 0.9999995751551065, iteration: 79656
loss: 1.0345097780227661,grad_norm: 0.9999992038937139, iteration: 79657
loss: 1.1518535614013672,grad_norm: 0.9999993645829752, iteration: 79658
loss: 1.053189754486084,grad_norm: 0.9999997488428852, iteration: 79659
loss: 1.0644004344940186,grad_norm: 0.9999994419395755, iteration: 79660
loss: 1.2858399152755737,grad_norm: 0.9999999054784854, iteration: 79661
loss: 1.0208559036254883,grad_norm: 0.9999997149785932, iteration: 79662
loss: 1.0509920120239258,grad_norm: 0.9999991767537219, iteration: 79663
loss: 1.1856733560562134,grad_norm: 0.9999992508276306, iteration: 79664
loss: 1.1665326356887817,grad_norm: 0.9999999336146052, iteration: 79665
loss: 1.1061582565307617,grad_norm: 0.9999998333670665, iteration: 79666
loss: 1.1125801801681519,grad_norm: 0.9999998602446019, iteration: 79667
loss: 1.0914578437805176,grad_norm: 0.9453569539873578, iteration: 79668
loss: 1.1658732891082764,grad_norm: 0.9999991830484211, iteration: 79669
loss: 1.046184778213501,grad_norm: 0.9999997079121199, iteration: 79670
loss: 1.3363076448440552,grad_norm: 0.999999934395961, iteration: 79671
loss: 1.148600697517395,grad_norm: 0.9999996799135045, iteration: 79672
loss: 1.1928786039352417,grad_norm: 0.9999998638202107, iteration: 79673
loss: 1.3429127931594849,grad_norm: 0.999999985409379, iteration: 79674
loss: 1.0649900436401367,grad_norm: 0.9999997302836126, iteration: 79675
loss: 1.216265320777893,grad_norm: 0.9999994969255837, iteration: 79676
loss: 1.2435173988342285,grad_norm: 1.0000000602361956, iteration: 79677
loss: 1.4993653297424316,grad_norm: 0.9999998566337467, iteration: 79678
loss: 1.1234235763549805,grad_norm: 0.9999999041504344, iteration: 79679
loss: 1.3863704204559326,grad_norm: 0.9999999273793462, iteration: 79680
loss: 1.2795718908309937,grad_norm: 0.9999997202287778, iteration: 79681
loss: 1.1801509857177734,grad_norm: 0.9999999564977321, iteration: 79682
loss: 1.026419758796692,grad_norm: 0.9999993312420251, iteration: 79683
loss: 1.2104105949401855,grad_norm: 0.9999993703083337, iteration: 79684
loss: 1.3949832916259766,grad_norm: 0.9999999136141915, iteration: 79685
loss: 1.2193387746810913,grad_norm: 0.9999999735649459, iteration: 79686
loss: 1.0590943098068237,grad_norm: 0.9999999327515349, iteration: 79687
loss: 1.116967797279358,grad_norm: 0.9999999512351544, iteration: 79688
loss: 1.3204587697982788,grad_norm: 0.9999996626467147, iteration: 79689
loss: 1.1520845890045166,grad_norm: 0.9999996730211366, iteration: 79690
loss: 1.006503939628601,grad_norm: 0.9999991400461055, iteration: 79691
loss: 1.1338707208633423,grad_norm: 0.9999991789990448, iteration: 79692
loss: 1.1845587491989136,grad_norm: 0.999999255618946, iteration: 79693
loss: 1.3456951379776,grad_norm: 0.9999994498286461, iteration: 79694
loss: 1.097033143043518,grad_norm: 0.9999991395289533, iteration: 79695
loss: 1.067541241645813,grad_norm: 0.9999997730521625, iteration: 79696
loss: 1.110845923423767,grad_norm: 0.9999993017164119, iteration: 79697
loss: 1.1262296438217163,grad_norm: 0.9999991518928155, iteration: 79698
loss: 1.034196138381958,grad_norm: 0.999999824678182, iteration: 79699
loss: 0.9871169924736023,grad_norm: 0.9732041527585454, iteration: 79700
loss: 1.2535706758499146,grad_norm: 0.9999998589812519, iteration: 79701
loss: 1.0269243717193604,grad_norm: 0.9999991849161612, iteration: 79702
loss: 1.2298284769058228,grad_norm: 0.9999998971536527, iteration: 79703
loss: 0.9644210934638977,grad_norm: 0.8936876225473196, iteration: 79704
loss: 1.0108507871627808,grad_norm: 0.999999193082062, iteration: 79705
loss: 1.093135118484497,grad_norm: 0.9999996543789967, iteration: 79706
loss: 0.9928077459335327,grad_norm: 0.9999991869979251, iteration: 79707
loss: 1.087634801864624,grad_norm: 0.9999991556212073, iteration: 79708
loss: 1.2187579870224,grad_norm: 0.9999991457769355, iteration: 79709
loss: 1.0494085550308228,grad_norm: 0.9292601448301305, iteration: 79710
loss: 0.9877398014068604,grad_norm: 0.9999990249778247, iteration: 79711
loss: 1.2447831630706787,grad_norm: 0.9999992119955781, iteration: 79712
loss: 1.2212258577346802,grad_norm: 0.9999997144431888, iteration: 79713
loss: 1.139174461364746,grad_norm: 0.9999998074244393, iteration: 79714
loss: 1.0491474866867065,grad_norm: 0.9337320568780303, iteration: 79715
loss: 1.2361347675323486,grad_norm: 0.999999831883082, iteration: 79716
loss: 1.051052212715149,grad_norm: 0.9999998587428535, iteration: 79717
loss: 1.175344467163086,grad_norm: 0.9999997759858785, iteration: 79718
loss: 1.2056527137756348,grad_norm: 0.9999999186424768, iteration: 79719
loss: 1.1107946634292603,grad_norm: 0.9999998400924488, iteration: 79720
loss: 1.0970654487609863,grad_norm: 0.9999993212251475, iteration: 79721
loss: 1.0826256275177002,grad_norm: 0.9999997120466527, iteration: 79722
loss: 1.0822272300720215,grad_norm: 0.9999991185884121, iteration: 79723
loss: 1.26371431350708,grad_norm: 0.9999998823598372, iteration: 79724
loss: 1.1487388610839844,grad_norm: 0.999999029766554, iteration: 79725
loss: 1.1019439697265625,grad_norm: 0.9999995384558431, iteration: 79726
loss: 0.9894018769264221,grad_norm: 0.9999991022122902, iteration: 79727
loss: 1.040903091430664,grad_norm: 0.9999990523267445, iteration: 79728
loss: 0.98888099193573,grad_norm: 0.9011207272171536, iteration: 79729
loss: 1.0934940576553345,grad_norm: 0.9999995550867783, iteration: 79730
loss: 1.035745620727539,grad_norm: 0.9999992189647151, iteration: 79731
loss: 0.9938994646072388,grad_norm: 0.9999990074673986, iteration: 79732
loss: 1.0118836164474487,grad_norm: 0.9999999310726407, iteration: 79733
loss: 1.0658009052276611,grad_norm: 0.999999309393312, iteration: 79734
loss: 1.199695110321045,grad_norm: 0.9999997465696918, iteration: 79735
loss: 1.0571857690811157,grad_norm: 0.9999990153746601, iteration: 79736
loss: 1.077581763267517,grad_norm: 0.9999998934757893, iteration: 79737
loss: 1.377468228340149,grad_norm: 0.9999997956782847, iteration: 79738
loss: 1.0298449993133545,grad_norm: 0.9999990983537489, iteration: 79739
loss: 1.0100449323654175,grad_norm: 0.999999045149637, iteration: 79740
loss: 1.1368986368179321,grad_norm: 0.999999439061212, iteration: 79741
loss: 1.0347694158554077,grad_norm: 0.9999990782189506, iteration: 79742
loss: 1.1773582696914673,grad_norm: 0.9999997183231072, iteration: 79743
loss: 0.9952226877212524,grad_norm: 0.9999998860139259, iteration: 79744
loss: 1.0282076597213745,grad_norm: 0.9999996657681933, iteration: 79745
loss: 1.0363479852676392,grad_norm: 0.9178267917943181, iteration: 79746
loss: 1.1708017587661743,grad_norm: 0.9999996394282638, iteration: 79747
loss: 1.0321663618087769,grad_norm: 0.9999991015911098, iteration: 79748
loss: 1.0329545736312866,grad_norm: 0.999999141428775, iteration: 79749
loss: 1.0060538053512573,grad_norm: 0.9999991186530887, iteration: 79750
loss: 1.0369356870651245,grad_norm: 0.964639919890259, iteration: 79751
loss: 1.0157431364059448,grad_norm: 0.8250168367385805, iteration: 79752
loss: 1.0530577898025513,grad_norm: 0.9999991155682151, iteration: 79753
loss: 1.0458805561065674,grad_norm: 0.9999994925854732, iteration: 79754
loss: 1.047666072845459,grad_norm: 0.9999993638647088, iteration: 79755
loss: 1.072792410850525,grad_norm: 0.9999993788080104, iteration: 79756
loss: 0.9893360733985901,grad_norm: 0.9999996599690173, iteration: 79757
loss: 1.0123857259750366,grad_norm: 0.9999992270462544, iteration: 79758
loss: 1.0254225730895996,grad_norm: 0.999999187118421, iteration: 79759
loss: 1.0818722248077393,grad_norm: 0.9999992771649115, iteration: 79760
loss: 1.0130337476730347,grad_norm: 0.91722341080979, iteration: 79761
loss: 1.039389967918396,grad_norm: 0.9324833559867343, iteration: 79762
loss: 1.1243621110916138,grad_norm: 0.999999565931235, iteration: 79763
loss: 0.9777268767356873,grad_norm: 0.9999990671770081, iteration: 79764
loss: 1.0270837545394897,grad_norm: 0.9999992473475604, iteration: 79765
loss: 1.0485879182815552,grad_norm: 0.9315330119954667, iteration: 79766
loss: 1.068402647972107,grad_norm: 0.9999996378986872, iteration: 79767
loss: 1.0134294033050537,grad_norm: 0.9999993011542753, iteration: 79768
loss: 1.0588791370391846,grad_norm: 0.9999996349164694, iteration: 79769
loss: 0.9977960586547852,grad_norm: 0.938919713766518, iteration: 79770
loss: 1.0155844688415527,grad_norm: 0.9999992699510504, iteration: 79771
loss: 1.0013006925582886,grad_norm: 0.9999989964852423, iteration: 79772
loss: 1.0606478452682495,grad_norm: 0.9293333885062737, iteration: 79773
loss: 1.0346733331680298,grad_norm: 0.8640475980574663, iteration: 79774
loss: 1.1775383949279785,grad_norm: 0.9999995458090525, iteration: 79775
loss: 1.2225780487060547,grad_norm: 0.9999998931530206, iteration: 79776
loss: 1.069280743598938,grad_norm: 0.9999994452636113, iteration: 79777
loss: 1.0582380294799805,grad_norm: 0.9999991769969377, iteration: 79778
loss: 1.1063514947891235,grad_norm: 0.999999764530887, iteration: 79779
loss: 1.0008965730667114,grad_norm: 0.967795651749717, iteration: 79780
loss: 1.06302011013031,grad_norm: 0.9999995981996123, iteration: 79781
loss: 1.1061840057373047,grad_norm: 0.9999993968590738, iteration: 79782
loss: 1.0031459331512451,grad_norm: 0.9999989146255336, iteration: 79783
loss: 1.0041468143463135,grad_norm: 0.883330468826149, iteration: 79784
loss: 1.0390506982803345,grad_norm: 0.9999998834472643, iteration: 79785
loss: 1.054746150970459,grad_norm: 0.9999991293189139, iteration: 79786
loss: 0.9808422923088074,grad_norm: 0.9999989630075146, iteration: 79787
loss: 1.199044942855835,grad_norm: 0.9999994578909498, iteration: 79788
loss: 1.0988101959228516,grad_norm: 0.9999991014098008, iteration: 79789
loss: 0.9989963173866272,grad_norm: 0.9999991622679211, iteration: 79790
loss: 0.9944978356361389,grad_norm: 0.7969198703984722, iteration: 79791
loss: 1.0328404903411865,grad_norm: 0.9999997329586334, iteration: 79792
loss: 1.0284053087234497,grad_norm: 0.9999992098738217, iteration: 79793
loss: 1.1069542169570923,grad_norm: 0.9999997774571198, iteration: 79794
loss: 1.0546456575393677,grad_norm: 0.9999990165128511, iteration: 79795
loss: 1.040702223777771,grad_norm: 1.000000039684089, iteration: 79796
loss: 0.9834406971931458,grad_norm: 0.971964283376381, iteration: 79797
loss: 1.0251922607421875,grad_norm: 0.8658055965030962, iteration: 79798
loss: 1.073577642440796,grad_norm: 0.9999990697141794, iteration: 79799
loss: 1.0612084865570068,grad_norm: 0.9999999548013848, iteration: 79800
loss: 1.0440571308135986,grad_norm: 0.9999995125340069, iteration: 79801
loss: 1.041277289390564,grad_norm: 0.9999996676424255, iteration: 79802
loss: 0.9934790730476379,grad_norm: 0.8716701056518531, iteration: 79803
loss: 0.9862827062606812,grad_norm: 0.999999534956352, iteration: 79804
loss: 1.0073261260986328,grad_norm: 0.8534900343296008, iteration: 79805
loss: 1.0346425771713257,grad_norm: 0.9704144005095536, iteration: 79806
loss: 1.0499662160873413,grad_norm: 0.9999990831985519, iteration: 79807
loss: 0.9779377579689026,grad_norm: 0.9999994799734859, iteration: 79808
loss: 1.183521032333374,grad_norm: 0.9010848143139313, iteration: 79809
loss: 1.0073225498199463,grad_norm: 0.8419243472571135, iteration: 79810
loss: 1.0864224433898926,grad_norm: 0.999998934287377, iteration: 79811
loss: 1.0173076391220093,grad_norm: 0.8109081850142921, iteration: 79812
loss: 0.991120457649231,grad_norm: 0.8781009234693966, iteration: 79813
loss: 1.0681928396224976,grad_norm: 0.9999992364519641, iteration: 79814
loss: 1.0292439460754395,grad_norm: 0.9999992999438735, iteration: 79815
loss: 1.0069103240966797,grad_norm: 0.9999991504393786, iteration: 79816
loss: 1.053932547569275,grad_norm: 0.9999990683819576, iteration: 79817
loss: 1.1834399700164795,grad_norm: 0.9999996102327567, iteration: 79818
loss: 1.0337485074996948,grad_norm: 0.9023486321194453, iteration: 79819
loss: 1.084191918373108,grad_norm: 0.9999992547203381, iteration: 79820
loss: 0.9991283416748047,grad_norm: 0.9454199359695473, iteration: 79821
loss: 1.0060913562774658,grad_norm: 0.9999997865486557, iteration: 79822
loss: 1.0441358089447021,grad_norm: 0.9999999348881644, iteration: 79823
loss: 1.0125001668930054,grad_norm: 0.957461516049289, iteration: 79824
loss: 0.9641223549842834,grad_norm: 0.963176575350126, iteration: 79825
loss: 0.9969822764396667,grad_norm: 0.9999990928464727, iteration: 79826
loss: 1.0279946327209473,grad_norm: 0.9999998439338285, iteration: 79827
loss: 1.0829956531524658,grad_norm: 0.9685954968812944, iteration: 79828
loss: 1.1990187168121338,grad_norm: 0.9999996297863755, iteration: 79829
loss: 1.0411986112594604,grad_norm: 0.9999997119629308, iteration: 79830
loss: 1.0977412462234497,grad_norm: 0.9999999170999844, iteration: 79831
loss: 1.317441701889038,grad_norm: 0.9999997930732812, iteration: 79832
loss: 1.1032358407974243,grad_norm: 0.9999990015650445, iteration: 79833
loss: 0.9989333152770996,grad_norm: 1.0000000032392593, iteration: 79834
loss: 1.0040476322174072,grad_norm: 0.9999990978327902, iteration: 79835
loss: 1.039512276649475,grad_norm: 0.8860456146939478, iteration: 79836
loss: 1.0363343954086304,grad_norm: 0.9999997908191995, iteration: 79837
loss: 1.0381957292556763,grad_norm: 0.9999999733632517, iteration: 79838
loss: 1.2386964559555054,grad_norm: 0.999999650235883, iteration: 79839
loss: 1.0147950649261475,grad_norm: 0.9999991431486337, iteration: 79840
loss: 1.0987763404846191,grad_norm: 0.9999990828678254, iteration: 79841
loss: 1.0754278898239136,grad_norm: 0.999999315592218, iteration: 79842
loss: 0.971855878829956,grad_norm: 0.9999991826210887, iteration: 79843
loss: 0.9813107252120972,grad_norm: 0.9804390170593262, iteration: 79844
loss: 1.0335370302200317,grad_norm: 0.9999998964580911, iteration: 79845
loss: 0.9858125448226929,grad_norm: 0.9999991752624697, iteration: 79846
loss: 1.0072367191314697,grad_norm: 0.9999993331383288, iteration: 79847
loss: 1.0375666618347168,grad_norm: 0.9999991715909606, iteration: 79848
loss: 1.126987099647522,grad_norm: 0.9999997681319476, iteration: 79849
loss: 1.0659960508346558,grad_norm: 0.9999997895428606, iteration: 79850
loss: 1.017724633216858,grad_norm: 0.989821816036203, iteration: 79851
loss: 1.0425690412521362,grad_norm: 0.9999990677292647, iteration: 79852
loss: 1.011910080909729,grad_norm: 0.8517534041670156, iteration: 79853
loss: 1.1083134412765503,grad_norm: 0.9438364552149469, iteration: 79854
loss: 1.006341576576233,grad_norm: 0.9999990551998847, iteration: 79855
loss: 1.0824306011199951,grad_norm: 0.999999229507011, iteration: 79856
loss: 1.1964592933654785,grad_norm: 0.9999999630765074, iteration: 79857
loss: 0.9679540395736694,grad_norm: 0.855856804188632, iteration: 79858
loss: 1.2174367904663086,grad_norm: 0.9999997051866037, iteration: 79859
loss: 1.0193175077438354,grad_norm: 0.9999991305652263, iteration: 79860
loss: 0.9811750054359436,grad_norm: 1.0000000150953114, iteration: 79861
loss: 0.9929704070091248,grad_norm: 0.9254378024313191, iteration: 79862
loss: 1.0416070222854614,grad_norm: 0.9999994314216327, iteration: 79863
loss: 1.080288052558899,grad_norm: 0.9999996590240587, iteration: 79864
loss: 1.0748683214187622,grad_norm: 0.9999997778907433, iteration: 79865
loss: 1.0765537023544312,grad_norm: 0.9999998217619575, iteration: 79866
loss: 1.064583420753479,grad_norm: 0.9999998763605764, iteration: 79867
loss: 0.9731132984161377,grad_norm: 0.9391952992074601, iteration: 79868
loss: 1.019061803817749,grad_norm: 0.9999991910426449, iteration: 79869
loss: 1.234108805656433,grad_norm: 0.9999993467317551, iteration: 79870
loss: 1.0309937000274658,grad_norm: 0.9999991551204982, iteration: 79871
loss: 1.0298024415969849,grad_norm: 0.9999992260460546, iteration: 79872
loss: 0.9667268395423889,grad_norm: 0.9205731292337093, iteration: 79873
loss: 1.1386457681655884,grad_norm: 0.9999998318378307, iteration: 79874
loss: 0.9670634865760803,grad_norm: 0.9111511978066449, iteration: 79875
loss: 1.085219383239746,grad_norm: 0.9999996175776527, iteration: 79876
loss: 1.3652474880218506,grad_norm: 0.999999338721655, iteration: 79877
loss: 1.0687192678451538,grad_norm: 0.9999997103357993, iteration: 79878
loss: 0.9768907427787781,grad_norm: 0.9999991584517354, iteration: 79879
loss: 1.1165097951889038,grad_norm: 0.9999999280100434, iteration: 79880
loss: 1.0115036964416504,grad_norm: 0.8989365603862736, iteration: 79881
loss: 1.0201002359390259,grad_norm: 0.9999995987194424, iteration: 79882
loss: 1.0283147096633911,grad_norm: 0.9999998942663233, iteration: 79883
loss: 1.1158561706542969,grad_norm: 0.9999995413307377, iteration: 79884
loss: 1.1421825885772705,grad_norm: 0.9999998819495112, iteration: 79885
loss: 0.9926178455352783,grad_norm: 0.9331871595036769, iteration: 79886
loss: 1.0841546058654785,grad_norm: 1.0000000360991796, iteration: 79887
loss: 1.0567055940628052,grad_norm: 0.9999993718285602, iteration: 79888
loss: 1.0291006565093994,grad_norm: 0.9999992642569059, iteration: 79889
loss: 1.0085744857788086,grad_norm: 0.9962714959029446, iteration: 79890
loss: 1.0231051445007324,grad_norm: 0.919039519120792, iteration: 79891
loss: 1.0363268852233887,grad_norm: 0.9999995464517079, iteration: 79892
loss: 1.0370126962661743,grad_norm: 0.9999998522377036, iteration: 79893
loss: 1.015784740447998,grad_norm: 0.9963930915658828, iteration: 79894
loss: 0.985230565071106,grad_norm: 0.9999990424059675, iteration: 79895
loss: 1.006162166595459,grad_norm: 0.999999761795794, iteration: 79896
loss: 1.0258029699325562,grad_norm: 0.9999991157401779, iteration: 79897
loss: 1.049481749534607,grad_norm: 0.9999991527471889, iteration: 79898
loss: 0.9766499996185303,grad_norm: 0.7981338845115364, iteration: 79899
loss: 1.003868579864502,grad_norm: 0.9999991515365438, iteration: 79900
loss: 0.9831216335296631,grad_norm: 0.9999997060872698, iteration: 79901
loss: 1.0504388809204102,grad_norm: 0.9999998815387122, iteration: 79902
loss: 1.0536185503005981,grad_norm: 0.999999233314701, iteration: 79903
loss: 1.024441123008728,grad_norm: 0.9623188700646239, iteration: 79904
loss: 1.0385401248931885,grad_norm: 0.9999991058672077, iteration: 79905
loss: 0.9998486638069153,grad_norm: 0.9999990177309158, iteration: 79906
loss: 1.0112073421478271,grad_norm: 0.9999990692531786, iteration: 79907
loss: 1.0281081199645996,grad_norm: 0.9529317003685809, iteration: 79908
loss: 1.026698112487793,grad_norm: 0.9999995481333397, iteration: 79909
loss: 1.0320850610733032,grad_norm: 0.9999990362848121, iteration: 79910
loss: 1.0863484144210815,grad_norm: 0.9999990646890551, iteration: 79911
loss: 1.073762059211731,grad_norm: 0.9999997178796308, iteration: 79912
loss: 1.037817358970642,grad_norm: 0.9999995500003819, iteration: 79913
loss: 1.0751270055770874,grad_norm: 0.9725359323211862, iteration: 79914
loss: 0.9635806679725647,grad_norm: 0.8838852383466552, iteration: 79915
loss: 0.9790661931037903,grad_norm: 0.9487928827218436, iteration: 79916
loss: 1.0331149101257324,grad_norm: 0.9446912183193, iteration: 79917
loss: 0.9919483065605164,grad_norm: 0.9999991499426311, iteration: 79918
loss: 0.9897092580795288,grad_norm: 0.9999975688141751, iteration: 79919
loss: 1.0332094430923462,grad_norm: 0.9999992553178462, iteration: 79920
loss: 1.021167278289795,grad_norm: 0.9999993443744346, iteration: 79921
loss: 1.0122201442718506,grad_norm: 0.9999991886520642, iteration: 79922
loss: 0.9921746253967285,grad_norm: 0.9999997167207503, iteration: 79923
loss: 0.999595582485199,grad_norm: 0.9999996725275613, iteration: 79924
loss: 1.0808703899383545,grad_norm: 0.999999041033949, iteration: 79925
loss: 1.0026239156723022,grad_norm: 0.9999991660916855, iteration: 79926
loss: 1.0083491802215576,grad_norm: 0.8120849721671431, iteration: 79927
loss: 1.0131996870040894,grad_norm: 0.9999999070904583, iteration: 79928
loss: 1.0390814542770386,grad_norm: 0.9999991438493622, iteration: 79929
loss: 0.9914523363113403,grad_norm: 0.9289242544858676, iteration: 79930
loss: 1.0292474031448364,grad_norm: 0.8744443345183696, iteration: 79931
loss: 1.0369725227355957,grad_norm: 0.8457078210243992, iteration: 79932
loss: 1.000046968460083,grad_norm: 0.8549791314881096, iteration: 79933
loss: 0.9777506589889526,grad_norm: 0.9379503227971575, iteration: 79934
loss: 0.9785743951797485,grad_norm: 0.9999991443617938, iteration: 79935
loss: 0.99993497133255,grad_norm: 0.9999994520777477, iteration: 79936
loss: 1.0274677276611328,grad_norm: 0.9999993343874234, iteration: 79937
loss: 1.0274831056594849,grad_norm: 0.9999990988556395, iteration: 79938
loss: 0.9941419363021851,grad_norm: 0.9728596723943767, iteration: 79939
loss: 1.0419564247131348,grad_norm: 0.9999993409010325, iteration: 79940
loss: 1.087191104888916,grad_norm: 0.999999609639533, iteration: 79941
loss: 1.0343575477600098,grad_norm: 0.9218509880122479, iteration: 79942
loss: 1.0431876182556152,grad_norm: 0.9999989944419361, iteration: 79943
loss: 1.0685350894927979,grad_norm: 0.9999992695507531, iteration: 79944
loss: 1.065887451171875,grad_norm: 0.9999990606756143, iteration: 79945
loss: 0.9786635041236877,grad_norm: 0.9999992748777702, iteration: 79946
loss: 1.0244847536087036,grad_norm: 0.9999991301872154, iteration: 79947
loss: 0.992439866065979,grad_norm: 0.9999990320357828, iteration: 79948
loss: 1.0150402784347534,grad_norm: 0.9999991454941043, iteration: 79949
loss: 1.0667473077774048,grad_norm: 0.8934775360616086, iteration: 79950
loss: 1.0756763219833374,grad_norm: 0.9999994762428237, iteration: 79951
loss: 0.9939457774162292,grad_norm: 0.8453527288797791, iteration: 79952
loss: 1.05281662940979,grad_norm: 0.999999729397509, iteration: 79953
loss: 1.0248290300369263,grad_norm: 0.989805562693484, iteration: 79954
loss: 1.0324023962020874,grad_norm: 0.9999990444322787, iteration: 79955
loss: 1.0146673917770386,grad_norm: 0.9999991136181521, iteration: 79956
loss: 0.9988736510276794,grad_norm: 0.9999992942609401, iteration: 79957
loss: 1.0498749017715454,grad_norm: 0.9000119894623064, iteration: 79958
loss: 1.0021378993988037,grad_norm: 0.9999991502319964, iteration: 79959
loss: 0.9624084234237671,grad_norm: 0.9018080790139182, iteration: 79960
loss: 1.020163655281067,grad_norm: 0.9999990894711316, iteration: 79961
loss: 0.9817979335784912,grad_norm: 0.9849315466065982, iteration: 79962
loss: 1.0298231840133667,grad_norm: 0.970695068710829, iteration: 79963
loss: 1.0607553720474243,grad_norm: 0.9934629883427444, iteration: 79964
loss: 1.0159642696380615,grad_norm: 0.9243634612895932, iteration: 79965
loss: 1.0024938583374023,grad_norm: 0.9599085171667314, iteration: 79966
loss: 1.0268679857254028,grad_norm: 0.8755156128535678, iteration: 79967
loss: 0.9866146445274353,grad_norm: 0.9999992078878516, iteration: 79968
loss: 1.037466287612915,grad_norm: 0.999999459391739, iteration: 79969
loss: 1.0218017101287842,grad_norm: 0.9495943310284635, iteration: 79970
loss: 1.0218628644943237,grad_norm: 0.9999995941962191, iteration: 79971
loss: 0.9606750011444092,grad_norm: 0.8418841589933969, iteration: 79972
loss: 1.0093746185302734,grad_norm: 0.987198053369407, iteration: 79973
loss: 1.1288524866104126,grad_norm: 0.9999990273842598, iteration: 79974
loss: 1.0630576610565186,grad_norm: 0.8385872463539846, iteration: 79975
loss: 1.026914119720459,grad_norm: 0.9999992580377254, iteration: 79976
loss: 1.019848346710205,grad_norm: 0.9999990135005427, iteration: 79977
loss: 0.9854516386985779,grad_norm: 0.9999991072804613, iteration: 79978
loss: 1.0161705017089844,grad_norm: 0.8832227198545122, iteration: 79979
loss: 0.9810142517089844,grad_norm: 0.9999990127063476, iteration: 79980
loss: 0.992724597454071,grad_norm: 0.9999966721868191, iteration: 79981
loss: 0.9941704273223877,grad_norm: 0.9448474091108066, iteration: 79982
loss: 0.9964238405227661,grad_norm: 0.9777366185532141, iteration: 79983
loss: 0.96110600233078,grad_norm: 0.9328391894231458, iteration: 79984
loss: 0.9935677647590637,grad_norm: 0.9999991700953321, iteration: 79985
loss: 1.0432435274124146,grad_norm: 0.9999997510287042, iteration: 79986
loss: 1.0234496593475342,grad_norm: 0.9712830201731378, iteration: 79987
loss: 0.9857627153396606,grad_norm: 0.9684844126370131, iteration: 79988
loss: 0.9938783049583435,grad_norm: 0.9182628998019071, iteration: 79989
loss: 1.0063594579696655,grad_norm: 0.8858499400968188, iteration: 79990
loss: 1.0599596500396729,grad_norm: 0.9999992596002669, iteration: 79991
loss: 0.9535496234893799,grad_norm: 0.9191492464702076, iteration: 79992
loss: 1.0143628120422363,grad_norm: 0.8970551904362783, iteration: 79993
loss: 1.0958592891693115,grad_norm: 0.9999998047856391, iteration: 79994
loss: 1.0461851358413696,grad_norm: 0.8910870023539127, iteration: 79995
loss: 1.0139037370681763,grad_norm: 0.9285595226598649, iteration: 79996
loss: 1.0030385255813599,grad_norm: 0.921262713449581, iteration: 79997
loss: 1.042890191078186,grad_norm: 0.9227739409008285, iteration: 79998
loss: 1.04538094997406,grad_norm: 0.9999989915042343, iteration: 79999
loss: 1.0434569120407104,grad_norm: 0.9999994488779527, iteration: 80000
Evaluating at step 80000
{'val': 1.0074090547859669, 'test': 2.397986426521033}
loss: 1.120532512664795,grad_norm: 0.9999994948764829, iteration: 80001
loss: 1.0437041521072388,grad_norm: 0.7573406832494494, iteration: 80002
loss: 1.1204861402511597,grad_norm: 1.0000000103518805, iteration: 80003
loss: 1.0551029443740845,grad_norm: 0.8589376318389014, iteration: 80004
loss: 0.9964991807937622,grad_norm: 0.9010597002627346, iteration: 80005
loss: 1.002120018005371,grad_norm: 0.8367499093390072, iteration: 80006
loss: 0.9731650352478027,grad_norm: 0.8812769031586543, iteration: 80007
loss: 1.0084275007247925,grad_norm: 0.9999996308665465, iteration: 80008
loss: 1.0264655351638794,grad_norm: 0.9999994465279549, iteration: 80009
loss: 1.0226867198944092,grad_norm: 0.957989968589446, iteration: 80010
loss: 1.0371222496032715,grad_norm: 0.9999994326612068, iteration: 80011
loss: 0.9984274506568909,grad_norm: 0.9999992641211866, iteration: 80012
loss: 1.013932228088379,grad_norm: 0.9105150641500139, iteration: 80013
loss: 1.022871494293213,grad_norm: 0.7680941557604183, iteration: 80014
loss: 0.9955688714981079,grad_norm: 0.9999992480241551, iteration: 80015
loss: 1.0470527410507202,grad_norm: 0.9409119267833147, iteration: 80016
loss: 1.0131099224090576,grad_norm: 0.9999992612639244, iteration: 80017
loss: 1.0093461275100708,grad_norm: 0.7940363375264541, iteration: 80018
loss: 0.9793325662612915,grad_norm: 0.9425958324907704, iteration: 80019
loss: 0.9847067594528198,grad_norm: 0.9999989937627793, iteration: 80020
loss: 0.9735265374183655,grad_norm: 0.9006612342959855, iteration: 80021
loss: 1.0043370723724365,grad_norm: 0.9278168558753278, iteration: 80022
loss: 1.0088616609573364,grad_norm: 0.9721085276728967, iteration: 80023
loss: 1.059147834777832,grad_norm: 0.8702022485878235, iteration: 80024
loss: 1.0101902484893799,grad_norm: 0.9999990862551051, iteration: 80025
loss: 1.081546664237976,grad_norm: 0.9999995284364002, iteration: 80026
loss: 1.013583779335022,grad_norm: 0.999999231604479, iteration: 80027
loss: 1.0112844705581665,grad_norm: 0.9115321652260364, iteration: 80028
loss: 1.0021437406539917,grad_norm: 0.9999992467384519, iteration: 80029
loss: 0.9939889311790466,grad_norm: 0.9582077428708752, iteration: 80030
loss: 1.0623282194137573,grad_norm: 0.8405800563543065, iteration: 80031
loss: 1.009364128112793,grad_norm: 0.9999990871462126, iteration: 80032
loss: 1.0800899267196655,grad_norm: 0.9717774349541275, iteration: 80033
loss: 1.0364238023757935,grad_norm: 0.9999992739192683, iteration: 80034
loss: 1.086363434791565,grad_norm: 0.8276394948944006, iteration: 80035
loss: 1.098244309425354,grad_norm: 0.999999244335917, iteration: 80036
loss: 0.9820783138275146,grad_norm: 0.8562932073114634, iteration: 80037
loss: 1.232329249382019,grad_norm: 0.9999994101072327, iteration: 80038
loss: 1.0089056491851807,grad_norm: 0.9999991545112442, iteration: 80039
loss: 1.0215537548065186,grad_norm: 0.9999991782905955, iteration: 80040
loss: 0.9694686532020569,grad_norm: 0.9999991967015559, iteration: 80041
loss: 0.9952203035354614,grad_norm: 0.8361582920027986, iteration: 80042
loss: 0.9853233098983765,grad_norm: 0.9999990999365432, iteration: 80043
loss: 1.0598323345184326,grad_norm: 0.9354512876084147, iteration: 80044
loss: 0.9989106059074402,grad_norm: 0.8383333103915549, iteration: 80045
loss: 0.9852731823921204,grad_norm: 0.9999994566687629, iteration: 80046
loss: 0.9856810569763184,grad_norm: 0.9999995911096182, iteration: 80047
loss: 1.068039059638977,grad_norm: 0.9999990451088664, iteration: 80048
loss: 1.0529531240463257,grad_norm: 0.9999993972497911, iteration: 80049
loss: 1.082458734512329,grad_norm: 0.999999168659654, iteration: 80050
loss: 1.0810269117355347,grad_norm: 0.9999991727032446, iteration: 80051
loss: 0.955493152141571,grad_norm: 0.9999990581062767, iteration: 80052
loss: 1.0373618602752686,grad_norm: 0.9122530378046827, iteration: 80053
loss: 1.0240845680236816,grad_norm: 0.9999991407197591, iteration: 80054
loss: 1.0076448917388916,grad_norm: 0.8480990043680675, iteration: 80055
loss: 1.021345615386963,grad_norm: 0.9369285072402301, iteration: 80056
loss: 0.9938578009605408,grad_norm: 0.9999995243638397, iteration: 80057
loss: 1.0140553712844849,grad_norm: 0.9999991095686704, iteration: 80058
loss: 0.9648044109344482,grad_norm: 0.9999992914365979, iteration: 80059
loss: 1.0796512365341187,grad_norm: 0.9999991323514106, iteration: 80060
loss: 1.0353872776031494,grad_norm: 0.9192969443829819, iteration: 80061
loss: 1.017806053161621,grad_norm: 0.9220020351689275, iteration: 80062
loss: 0.9890751242637634,grad_norm: 0.9999991323363233, iteration: 80063
loss: 1.0427809953689575,grad_norm: 0.9999991467084435, iteration: 80064
loss: 0.9852007627487183,grad_norm: 0.9999995188252887, iteration: 80065
loss: 1.064160704612732,grad_norm: 0.99999908015713, iteration: 80066
loss: 1.0138822793960571,grad_norm: 0.9999992209755336, iteration: 80067
loss: 1.0428844690322876,grad_norm: 0.9999997874819784, iteration: 80068
loss: 0.990782618522644,grad_norm: 0.9999990357861557, iteration: 80069
loss: 1.0129519701004028,grad_norm: 0.999999360010739, iteration: 80070
loss: 1.0526480674743652,grad_norm: 0.9596223137919248, iteration: 80071
loss: 1.0292555093765259,grad_norm: 0.999999005510008, iteration: 80072
loss: 1.1083178520202637,grad_norm: 0.9999996008958731, iteration: 80073
loss: 0.9993609189987183,grad_norm: 0.9075090126214651, iteration: 80074
loss: 1.0266518592834473,grad_norm: 0.9999991481895469, iteration: 80075
loss: 1.0698548555374146,grad_norm: 0.9999999065709695, iteration: 80076
loss: 1.097450852394104,grad_norm: 0.9999992798244972, iteration: 80077
loss: 1.3484352827072144,grad_norm: 0.9999992960603008, iteration: 80078
loss: 1.0471950769424438,grad_norm: 0.9999991783569175, iteration: 80079
loss: 1.1439158916473389,grad_norm: 0.9999998259976599, iteration: 80080
loss: 1.0170832872390747,grad_norm: 0.9999994945448138, iteration: 80081
loss: 1.0000118017196655,grad_norm: 0.9999998051505149, iteration: 80082
loss: 1.0637513399124146,grad_norm: 0.9779706408982486, iteration: 80083
loss: 1.0681744813919067,grad_norm: 0.9999991970328865, iteration: 80084
loss: 0.9792998433113098,grad_norm: 0.8891569006044855, iteration: 80085
loss: 1.0415202379226685,grad_norm: 0.9999993485152469, iteration: 80086
loss: 1.0715100765228271,grad_norm: 0.9999990024420179, iteration: 80087
loss: 1.0056767463684082,grad_norm: 0.8168127640736623, iteration: 80088
loss: 1.0738874673843384,grad_norm: 0.9999993502151459, iteration: 80089
loss: 0.9999662041664124,grad_norm: 0.6915687481022065, iteration: 80090
loss: 1.0237442255020142,grad_norm: 0.9256479022090434, iteration: 80091
loss: 0.9611294269561768,grad_norm: 0.9999990667234591, iteration: 80092
loss: 0.9866740107536316,grad_norm: 0.9402981157452801, iteration: 80093
loss: 1.0641207695007324,grad_norm: 0.9999996244426889, iteration: 80094
loss: 1.0561853647232056,grad_norm: 0.9999990095592631, iteration: 80095
loss: 0.9770116209983826,grad_norm: 0.9999992625745969, iteration: 80096
loss: 1.0074106454849243,grad_norm: 0.9999990853564175, iteration: 80097
loss: 1.0391476154327393,grad_norm: 0.9658409255979473, iteration: 80098
loss: 1.03629732131958,grad_norm: 0.8146984907880793, iteration: 80099
loss: 1.01546311378479,grad_norm: 0.8905258811273742, iteration: 80100
loss: 0.9961808323860168,grad_norm: 0.9442605125250135, iteration: 80101
loss: 1.0546391010284424,grad_norm: 0.9999999325253071, iteration: 80102
loss: 1.1961508989334106,grad_norm: 1.000000042699888, iteration: 80103
loss: 1.0023623704910278,grad_norm: 0.8259205652289721, iteration: 80104
loss: 0.9637629389762878,grad_norm: 0.9804440240538118, iteration: 80105
loss: 1.1111884117126465,grad_norm: 0.9999995631756751, iteration: 80106
loss: 1.0107061862945557,grad_norm: 0.895011977032187, iteration: 80107
loss: 1.138116717338562,grad_norm: 0.9999996403783217, iteration: 80108
loss: 1.1833090782165527,grad_norm: 1.000000061710675, iteration: 80109
loss: 1.03823721408844,grad_norm: 0.9999991979786768, iteration: 80110
loss: 1.0289254188537598,grad_norm: 0.9268776868077317, iteration: 80111
loss: 1.044828176498413,grad_norm: 0.8646556468175881, iteration: 80112
loss: 1.0798554420471191,grad_norm: 0.8399311530486375, iteration: 80113
loss: 1.02182137966156,grad_norm: 0.9108618052184465, iteration: 80114
loss: 1.038030982017517,grad_norm: 0.9944435197634001, iteration: 80115
loss: 1.018811821937561,grad_norm: 0.9999991931867964, iteration: 80116
loss: 1.0099009275436401,grad_norm: 0.8227123340246124, iteration: 80117
loss: 0.9953108429908752,grad_norm: 0.964952908057694, iteration: 80118
loss: 1.0300143957138062,grad_norm: 0.9999995187149234, iteration: 80119
loss: 1.0242884159088135,grad_norm: 0.9873031323177305, iteration: 80120
loss: 1.0934149026870728,grad_norm: 0.9999999597709861, iteration: 80121
loss: 1.0032380819320679,grad_norm: 0.7991923979119966, iteration: 80122
loss: 1.0071269273757935,grad_norm: 0.7935230649504045, iteration: 80123
loss: 1.0010093450546265,grad_norm: 0.8831438348485239, iteration: 80124
loss: 1.185915470123291,grad_norm: 0.9999997625072587, iteration: 80125
loss: 1.0441278219223022,grad_norm: 0.9999993220664756, iteration: 80126
loss: 1.025498867034912,grad_norm: 0.9999990808013302, iteration: 80127
loss: 1.0269644260406494,grad_norm: 0.9999989266962209, iteration: 80128
loss: 0.9904247522354126,grad_norm: 0.9999992546659004, iteration: 80129
loss: 0.9882757067680359,grad_norm: 0.9408493010907778, iteration: 80130
loss: 1.0324631929397583,grad_norm: 0.9953467826281223, iteration: 80131
loss: 1.0339713096618652,grad_norm: 0.7967553334624728, iteration: 80132
loss: 1.0318734645843506,grad_norm: 0.999999095968606, iteration: 80133
loss: 0.9712753295898438,grad_norm: 0.9999990139496558, iteration: 80134
loss: 1.1838446855545044,grad_norm: 0.9999994275247288, iteration: 80135
loss: 1.024681568145752,grad_norm: 0.9999990659608263, iteration: 80136
loss: 0.9543680548667908,grad_norm: 0.9999991142010217, iteration: 80137
loss: 1.0724103450775146,grad_norm: 0.8456604948682696, iteration: 80138
loss: 1.113949179649353,grad_norm: 0.9999997376169935, iteration: 80139
loss: 1.0312705039978027,grad_norm: 0.9999991994678474, iteration: 80140
loss: 0.9975611567497253,grad_norm: 0.9999993170099498, iteration: 80141
loss: 1.0298540592193604,grad_norm: 0.9999997647834618, iteration: 80142
loss: 1.0612412691116333,grad_norm: 0.9999990293318496, iteration: 80143
loss: 1.0310510396957397,grad_norm: 0.999999665226453, iteration: 80144
loss: 1.0219178199768066,grad_norm: 0.9999991283555041, iteration: 80145
loss: 1.0061893463134766,grad_norm: 0.9999992405188504, iteration: 80146
loss: 1.0267542600631714,grad_norm: 0.9999994786186269, iteration: 80147
loss: 0.9773491621017456,grad_norm: 0.9999991709494374, iteration: 80148
loss: 1.0629686117172241,grad_norm: 0.9438986844778139, iteration: 80149
loss: 0.9969694018363953,grad_norm: 0.8753143490610145, iteration: 80150
loss: 0.9973853230476379,grad_norm: 0.9240030970600988, iteration: 80151
loss: 1.1126633882522583,grad_norm: 0.9999998578211245, iteration: 80152
loss: 1.0630260705947876,grad_norm: 0.9088920421236731, iteration: 80153
loss: 1.0194233655929565,grad_norm: 0.9734266418365877, iteration: 80154
loss: 1.020972728729248,grad_norm: 0.9999992729893101, iteration: 80155
loss: 1.0484371185302734,grad_norm: 0.9757972699862333, iteration: 80156
loss: 0.9971781969070435,grad_norm: 0.9999993340517719, iteration: 80157
loss: 1.2294151782989502,grad_norm: 0.9999996909324134, iteration: 80158
loss: 1.065054178237915,grad_norm: 0.9089290016434354, iteration: 80159
loss: 1.100895881652832,grad_norm: 0.9999996310289018, iteration: 80160
loss: 1.1376298666000366,grad_norm: 0.9999996675687753, iteration: 80161
loss: 1.0808595418930054,grad_norm: 0.9999993689391209, iteration: 80162
loss: 1.0166568756103516,grad_norm: 0.8809463126604351, iteration: 80163
loss: 1.046595573425293,grad_norm: 0.9999990906593881, iteration: 80164
loss: 1.109279990196228,grad_norm: 0.9999994831639551, iteration: 80165
loss: 1.0574239492416382,grad_norm: 0.9285964094910443, iteration: 80166
loss: 1.202648401260376,grad_norm: 0.999999881218714, iteration: 80167
loss: 1.0691289901733398,grad_norm: 0.9999991098695993, iteration: 80168
loss: 1.0367577075958252,grad_norm: 0.9999994123999457, iteration: 80169
loss: 1.0594379901885986,grad_norm: 0.9999996126504787, iteration: 80170
loss: 0.9858853220939636,grad_norm: 0.9537406874829606, iteration: 80171
loss: 1.1346819400787354,grad_norm: 0.9999999650313067, iteration: 80172
loss: 1.0782698392868042,grad_norm: 0.9999996711579269, iteration: 80173
loss: 0.9987550973892212,grad_norm: 0.8743648953465737, iteration: 80174
loss: 1.1397298574447632,grad_norm: 0.9999994029179539, iteration: 80175
loss: 1.0576140880584717,grad_norm: 0.9999990817251394, iteration: 80176
loss: 1.034605860710144,grad_norm: 0.9999995639846553, iteration: 80177
loss: 1.0742628574371338,grad_norm: 0.9999995668267367, iteration: 80178
loss: 0.9961663484573364,grad_norm: 0.9999993224904826, iteration: 80179
loss: 1.0000494718551636,grad_norm: 0.9089770471466525, iteration: 80180
loss: 1.0405433177947998,grad_norm: 0.9999991247078831, iteration: 80181
loss: 0.9813821315765381,grad_norm: 0.9999990073795815, iteration: 80182
loss: 0.9928767681121826,grad_norm: 0.9999994281909722, iteration: 80183
loss: 1.043559193611145,grad_norm: 0.9451270431340845, iteration: 80184
loss: 1.0941071510314941,grad_norm: 0.9999996748987127, iteration: 80185
loss: 0.9821760654449463,grad_norm: 0.999999106865348, iteration: 80186
loss: 1.077933669090271,grad_norm: 0.9999997696735047, iteration: 80187
loss: 1.0377089977264404,grad_norm: 0.8967609719840284, iteration: 80188
loss: 1.0337494611740112,grad_norm: 0.9999992339412028, iteration: 80189
loss: 1.0434736013412476,grad_norm: 0.9999991414297889, iteration: 80190
loss: 1.1604747772216797,grad_norm: 0.9999999570117643, iteration: 80191
loss: 1.0779279470443726,grad_norm: 0.9999997479534692, iteration: 80192
loss: 1.0822527408599854,grad_norm: 0.9999998955272082, iteration: 80193
loss: 1.0509412288665771,grad_norm: 0.9999999149705532, iteration: 80194
loss: 1.193895936012268,grad_norm: 0.9999997765442055, iteration: 80195
loss: 0.9873118996620178,grad_norm: 0.9002750572069624, iteration: 80196
loss: 1.063704490661621,grad_norm: 0.9508911056365482, iteration: 80197
loss: 1.1170545816421509,grad_norm: 0.9999995992254139, iteration: 80198
loss: 1.0263999700546265,grad_norm: 0.9999992600729762, iteration: 80199
loss: 1.0059889554977417,grad_norm: 0.9999989496019925, iteration: 80200
loss: 0.9675076603889465,grad_norm: 0.9999989929236252, iteration: 80201
loss: 0.9638853073120117,grad_norm: 0.958390800653805, iteration: 80202
loss: 1.01210618019104,grad_norm: 0.9206669015769001, iteration: 80203
loss: 1.008763074874878,grad_norm: 0.9999992373771012, iteration: 80204
loss: 1.1288902759552002,grad_norm: 0.9999999631345599, iteration: 80205
loss: 1.0313688516616821,grad_norm: 0.9999998739417522, iteration: 80206
loss: 1.032307505607605,grad_norm: 0.9999994319232249, iteration: 80207
loss: 1.0628334283828735,grad_norm: 0.9185255253074504, iteration: 80208
loss: 1.1311529874801636,grad_norm: 0.9999994834364982, iteration: 80209
loss: 1.1272549629211426,grad_norm: 0.999999948878287, iteration: 80210
loss: 1.1161551475524902,grad_norm: 0.9999999680085799, iteration: 80211
loss: 1.044058918952942,grad_norm: 0.9999996307427705, iteration: 80212
loss: 1.0613980293273926,grad_norm: 0.9999994123738173, iteration: 80213
loss: 1.1498944759368896,grad_norm: 0.9999993121529945, iteration: 80214
loss: 1.0858864784240723,grad_norm: 0.9999996534865295, iteration: 80215
loss: 1.1733920574188232,grad_norm: 0.9999991145807203, iteration: 80216
loss: 1.0760725736618042,grad_norm: 0.9999997144332272, iteration: 80217
loss: 1.3153458833694458,grad_norm: 0.999999784363746, iteration: 80218
loss: 1.0877721309661865,grad_norm: 0.999999603092788, iteration: 80219
loss: 0.9813643097877502,grad_norm: 0.9999998645517456, iteration: 80220
loss: 1.0647492408752441,grad_norm: 0.9374202647790161, iteration: 80221
loss: 1.0625406503677368,grad_norm: 0.9999992714054138, iteration: 80222
loss: 1.0667635202407837,grad_norm: 0.9999994882916684, iteration: 80223
loss: 1.0327423810958862,grad_norm: 0.9999993918012661, iteration: 80224
loss: 1.1793731451034546,grad_norm: 0.9999996427556228, iteration: 80225
loss: 1.0733803510665894,grad_norm: 0.9999991420883996, iteration: 80226
loss: 1.1829931735992432,grad_norm: 0.9999996017534958, iteration: 80227
loss: 1.0911142826080322,grad_norm: 0.9999992649071147, iteration: 80228
loss: 1.053184986114502,grad_norm: 0.9999995814965725, iteration: 80229
loss: 1.0896400213241577,grad_norm: 0.999999476266238, iteration: 80230
loss: 1.014914870262146,grad_norm: 0.9999991985338339, iteration: 80231
loss: 1.1315181255340576,grad_norm: 0.9999990680989773, iteration: 80232
loss: 1.2175240516662598,grad_norm: 0.9999998081807078, iteration: 80233
loss: 1.1058391332626343,grad_norm: 0.9999994107778593, iteration: 80234
loss: 1.034036636352539,grad_norm: 0.9999991685584306, iteration: 80235
loss: 0.9726268649101257,grad_norm: 0.9086635532109879, iteration: 80236
loss: 1.0545005798339844,grad_norm: 0.9999996941572437, iteration: 80237
loss: 1.0031696557998657,grad_norm: 0.9999990827957737, iteration: 80238
loss: 1.185401439666748,grad_norm: 0.9999997516615067, iteration: 80239
loss: 1.022356390953064,grad_norm: 0.9999991206351783, iteration: 80240
loss: 0.9821102619171143,grad_norm: 0.9999993119043347, iteration: 80241
loss: 1.1031807661056519,grad_norm: 0.999999437171523, iteration: 80242
loss: 1.025144338607788,grad_norm: 0.9999991158810936, iteration: 80243
loss: 1.1090736389160156,grad_norm: 0.9999991593919458, iteration: 80244
loss: 1.1096978187561035,grad_norm: 0.9999993385576454, iteration: 80245
loss: 1.1403629779815674,grad_norm: 0.9999997117351515, iteration: 80246
loss: 1.2643253803253174,grad_norm: 0.9999998160952157, iteration: 80247
loss: 1.1191962957382202,grad_norm: 0.9999999424604072, iteration: 80248
loss: 0.98974609375,grad_norm: 0.999999783500161, iteration: 80249
loss: 1.0207579135894775,grad_norm: 0.9999999076165035, iteration: 80250
loss: 1.0728294849395752,grad_norm: 0.9999993990307543, iteration: 80251
loss: 1.140841007232666,grad_norm: 0.9999993312267571, iteration: 80252
loss: 1.094143271446228,grad_norm: 0.999999818628712, iteration: 80253
loss: 1.045477032661438,grad_norm: 0.8467466978821245, iteration: 80254
loss: 1.0217900276184082,grad_norm: 0.8885060139743965, iteration: 80255
loss: 1.0627554655075073,grad_norm: 0.9999991214816184, iteration: 80256
loss: 1.1068885326385498,grad_norm: 0.9999992839949434, iteration: 80257
loss: 1.0234708786010742,grad_norm: 0.9999994124418431, iteration: 80258
loss: 0.9996941089630127,grad_norm: 0.9999997173466392, iteration: 80259
loss: 1.112766146659851,grad_norm: 0.999999309447456, iteration: 80260
loss: 1.012739896774292,grad_norm: 0.9999992743307714, iteration: 80261
loss: 1.0209919214248657,grad_norm: 0.9999994148461343, iteration: 80262
loss: 1.1101508140563965,grad_norm: 0.9999992146041452, iteration: 80263
loss: 1.0802171230316162,grad_norm: 0.9999992894280997, iteration: 80264
loss: 0.9965548515319824,grad_norm: 0.9999992610587277, iteration: 80265
loss: 1.089795470237732,grad_norm: 0.8575460954703071, iteration: 80266
loss: 1.1748946905136108,grad_norm: 0.9999996054428887, iteration: 80267
loss: 0.9737712144851685,grad_norm: 0.9523627348137188, iteration: 80268
loss: 1.0016992092132568,grad_norm: 0.9999997077292166, iteration: 80269
loss: 1.0926589965820312,grad_norm: 0.9999991174431995, iteration: 80270
loss: 1.0376557111740112,grad_norm: 0.9999996701885063, iteration: 80271
loss: 1.2073277235031128,grad_norm: 0.9999995569123714, iteration: 80272
loss: 1.1199628114700317,grad_norm: 0.9999997687649429, iteration: 80273
loss: 1.0587642192840576,grad_norm: 0.9999992099550554, iteration: 80274
loss: 1.001396656036377,grad_norm: 0.9999990446692549, iteration: 80275
loss: 1.0545097589492798,grad_norm: 0.9999990509738234, iteration: 80276
loss: 1.2757048606872559,grad_norm: 0.9999994658531665, iteration: 80277
loss: 1.240866780281067,grad_norm: 0.9999994374555526, iteration: 80278
loss: 1.0827564001083374,grad_norm: 0.999999228268838, iteration: 80279
loss: 1.119030237197876,grad_norm: 0.9999998736914817, iteration: 80280
loss: 1.3732616901397705,grad_norm: 0.9999995609249217, iteration: 80281
loss: 1.2998545169830322,grad_norm: 1.0000000426592468, iteration: 80282
loss: 1.4098354578018188,grad_norm: 0.9999998403695841, iteration: 80283
loss: 1.1093670129776,grad_norm: 0.9999997736672446, iteration: 80284
loss: 1.1273101568222046,grad_norm: 0.9999995029735598, iteration: 80285
loss: 1.1230148077011108,grad_norm: 0.9999995382090755, iteration: 80286
loss: 1.2923332452774048,grad_norm: 0.9999997793872243, iteration: 80287
loss: 1.1497911214828491,grad_norm: 0.9999995504971031, iteration: 80288
loss: 1.0585776567459106,grad_norm: 0.9794362120861184, iteration: 80289
loss: 1.0556997060775757,grad_norm: 0.9999998150663267, iteration: 80290
loss: 1.095491647720337,grad_norm: 0.9999996322896487, iteration: 80291
loss: 1.1956974267959595,grad_norm: 0.9999998718058036, iteration: 80292
loss: 1.3236161470413208,grad_norm: 0.9999997713030392, iteration: 80293
loss: 1.1244165897369385,grad_norm: 0.9999995237859416, iteration: 80294
loss: 1.2559571266174316,grad_norm: 0.9999998897326589, iteration: 80295
loss: 1.0059880018234253,grad_norm: 0.8489449884115681, iteration: 80296
loss: 1.0294084548950195,grad_norm: 0.9999990941636827, iteration: 80297
loss: 1.1252692937850952,grad_norm: 0.9999991506206716, iteration: 80298
loss: 1.1054288148880005,grad_norm: 0.9999999018434934, iteration: 80299
loss: 1.095410704612732,grad_norm: 0.9999999815545602, iteration: 80300
loss: 1.1954374313354492,grad_norm: 0.9999992222494835, iteration: 80301
loss: 1.195067286491394,grad_norm: 0.9999993139126038, iteration: 80302
loss: 1.3412574529647827,grad_norm: 0.999999425763194, iteration: 80303
loss: 1.1195216178894043,grad_norm: 0.9999996252121406, iteration: 80304
loss: 1.0889884233474731,grad_norm: 0.9999995951144639, iteration: 80305
loss: 1.1464956998825073,grad_norm: 0.9999994240213236, iteration: 80306
loss: 1.1116931438446045,grad_norm: 0.9999998669431861, iteration: 80307
loss: 1.147728681564331,grad_norm: 0.999999178665562, iteration: 80308
loss: 1.2514535188674927,grad_norm: 0.9999996183709576, iteration: 80309
loss: 1.2082188129425049,grad_norm: 0.9999998530586373, iteration: 80310
loss: 1.0861340761184692,grad_norm: 0.9999992581998269, iteration: 80311
loss: 1.1241296529769897,grad_norm: 1.0000000031772882, iteration: 80312
loss: 1.1117477416992188,grad_norm: 0.9580745070077186, iteration: 80313
loss: 1.148309350013733,grad_norm: 0.9999995748717628, iteration: 80314
loss: 1.1826698780059814,grad_norm: 0.9999998213434528, iteration: 80315
loss: 1.2302393913269043,grad_norm: 1.000000061145062, iteration: 80316
loss: 1.3196038007736206,grad_norm: 0.9999997882559948, iteration: 80317
loss: 1.2231903076171875,grad_norm: 0.9999998394097979, iteration: 80318
loss: 1.0902812480926514,grad_norm: 0.9999992190850266, iteration: 80319
loss: 1.2995429039001465,grad_norm: 0.9999996683376954, iteration: 80320
loss: 1.6103483438491821,grad_norm: 0.9999998046545631, iteration: 80321
loss: 1.3495246171951294,grad_norm: 0.999999872764675, iteration: 80322
loss: 1.3148175477981567,grad_norm: 1.0000000242988791, iteration: 80323
loss: 1.2261004447937012,grad_norm: 0.9999995907801517, iteration: 80324
loss: 1.4614585638046265,grad_norm: 0.9999996407982076, iteration: 80325
loss: 1.332329511642456,grad_norm: 0.9999997833691586, iteration: 80326
loss: 1.7125170230865479,grad_norm: 0.9999997114692202, iteration: 80327
loss: 1.120145559310913,grad_norm: 0.9999991324733558, iteration: 80328
loss: 1.8718901872634888,grad_norm: 0.9999999284014918, iteration: 80329
loss: 1.5752496719360352,grad_norm: 1.000000024310096, iteration: 80330
loss: 1.3651411533355713,grad_norm: 0.9999996156128089, iteration: 80331
loss: 1.1149706840515137,grad_norm: 0.9999994487442587, iteration: 80332
loss: 1.3508036136627197,grad_norm: 0.999999875599165, iteration: 80333
loss: 1.0984009504318237,grad_norm: 0.9999994286103288, iteration: 80334
loss: 1.2494077682495117,grad_norm: 0.9999998823734721, iteration: 80335
loss: 1.2332711219787598,grad_norm: 0.9999998840736243, iteration: 80336
loss: 1.513329267501831,grad_norm: 0.9999997705371328, iteration: 80337
loss: 1.1666109561920166,grad_norm: 0.9999998477717467, iteration: 80338
loss: 1.5381956100463867,grad_norm: 0.9999996323876169, iteration: 80339
loss: 1.2556407451629639,grad_norm: 0.9999999759684749, iteration: 80340
loss: 1.2708855867385864,grad_norm: 0.9999997078502929, iteration: 80341
loss: 1.4066991806030273,grad_norm: 0.999999742520709, iteration: 80342
loss: 1.1678047180175781,grad_norm: 0.9999998905751091, iteration: 80343
loss: 1.1584104299545288,grad_norm: 0.9999995158777765, iteration: 80344
loss: 1.1732021570205688,grad_norm: 0.9999999347819086, iteration: 80345
loss: 1.4110649824142456,grad_norm: 0.9999997342614753, iteration: 80346
loss: 1.084765911102295,grad_norm: 0.999999984498316, iteration: 80347
loss: 1.1577035188674927,grad_norm: 0.9999999871948544, iteration: 80348
loss: 1.2918453216552734,grad_norm: 0.9999999738432049, iteration: 80349
loss: 1.0765385627746582,grad_norm: 0.9999994810231465, iteration: 80350
loss: 1.133913516998291,grad_norm: 0.9690963366163816, iteration: 80351
loss: 1.0939244031906128,grad_norm: 0.9999996046830029, iteration: 80352
loss: 1.3590736389160156,grad_norm: 0.9999994135128702, iteration: 80353
loss: 1.1936218738555908,grad_norm: 0.9999997269666661, iteration: 80354
loss: 1.1056791543960571,grad_norm: 0.9999993502362226, iteration: 80355
loss: 1.0825735330581665,grad_norm: 0.9999993788262697, iteration: 80356
loss: 1.0142385959625244,grad_norm: 0.9999990399111088, iteration: 80357
loss: 1.1335492134094238,grad_norm: 0.9999994420238956, iteration: 80358
loss: 1.2478383779525757,grad_norm: 0.9999998894926891, iteration: 80359
loss: 1.1559675931930542,grad_norm: 0.9999991646849405, iteration: 80360
loss: 1.1899079084396362,grad_norm: 0.9999996438232726, iteration: 80361
loss: 1.1877715587615967,grad_norm: 0.9999992914108536, iteration: 80362
loss: 1.075785756111145,grad_norm: 0.999999488315582, iteration: 80363
loss: 0.9889525175094604,grad_norm: 0.9999993023522082, iteration: 80364
loss: 1.0664783716201782,grad_norm: 0.999999034959205, iteration: 80365
loss: 1.2272377014160156,grad_norm: 0.9999998498957888, iteration: 80366
loss: 1.03840970993042,grad_norm: 0.9393862162163069, iteration: 80367
loss: 1.2452408075332642,grad_norm: 0.9999998614946356, iteration: 80368
loss: 1.0584826469421387,grad_norm: 0.877798255555823, iteration: 80369
loss: 1.0995848178863525,grad_norm: 0.999999402457711, iteration: 80370
loss: 1.070022702217102,grad_norm: 0.9999998175643408, iteration: 80371
loss: 1.0251166820526123,grad_norm: 0.9999997232708695, iteration: 80372
loss: 0.9624958038330078,grad_norm: 0.9999991414328535, iteration: 80373
loss: 1.00385582447052,grad_norm: 0.9999990116249688, iteration: 80374
loss: 1.0813322067260742,grad_norm: 0.9999992004717886, iteration: 80375
loss: 1.1249257326126099,grad_norm: 0.9999996002897898, iteration: 80376
loss: 1.3831671476364136,grad_norm: 0.9999994699534451, iteration: 80377
loss: 1.2740957736968994,grad_norm: 0.9999998099039394, iteration: 80378
loss: 1.2519381046295166,grad_norm: 0.9999999002411735, iteration: 80379
loss: 1.1629302501678467,grad_norm: 0.9999992834014065, iteration: 80380
loss: 1.3663160800933838,grad_norm: 0.9999993181569004, iteration: 80381
loss: 1.294584035873413,grad_norm: 0.9999995193662141, iteration: 80382
loss: 1.1617234945297241,grad_norm: 0.9999997105512025, iteration: 80383
loss: 1.102931261062622,grad_norm: 0.9999994617599229, iteration: 80384
loss: 1.1685220003128052,grad_norm: 0.9999989922383613, iteration: 80385
loss: 1.0461344718933105,grad_norm: 0.9999990783077364, iteration: 80386
loss: 1.2826741933822632,grad_norm: 0.9999993696986027, iteration: 80387
loss: 1.0698575973510742,grad_norm: 0.9999990869145725, iteration: 80388
loss: 1.0427167415618896,grad_norm: 0.9999993112675868, iteration: 80389
loss: 1.1268978118896484,grad_norm: 0.9999995359778676, iteration: 80390
loss: 1.029640555381775,grad_norm: 0.9999992312093204, iteration: 80391
loss: 1.1936774253845215,grad_norm: 0.9999993642476204, iteration: 80392
loss: 1.012495756149292,grad_norm: 0.9748539632357192, iteration: 80393
loss: 1.428544521331787,grad_norm: 0.9999999749805784, iteration: 80394
loss: 0.9924032688140869,grad_norm: 0.9999995510446057, iteration: 80395
loss: 1.1139987707138062,grad_norm: 0.9999990601974, iteration: 80396
loss: 1.1458979845046997,grad_norm: 0.9999993524599003, iteration: 80397
loss: 1.066612958908081,grad_norm: 0.9999992754609704, iteration: 80398
loss: 1.2114331722259521,grad_norm: 0.9999998674220891, iteration: 80399
loss: 1.1580193042755127,grad_norm: 0.999999360671704, iteration: 80400
loss: 1.2347502708435059,grad_norm: 0.9999992450862403, iteration: 80401
loss: 1.1840336322784424,grad_norm: 0.9999996944925237, iteration: 80402
loss: 1.183963656425476,grad_norm: 0.9999991365573587, iteration: 80403
loss: 1.1774641275405884,grad_norm: 0.9999998904202714, iteration: 80404
loss: 1.0833834409713745,grad_norm: 0.9999996116861654, iteration: 80405
loss: 1.1585910320281982,grad_norm: 0.9999993100855061, iteration: 80406
loss: 1.0668312311172485,grad_norm: 0.9999993370137116, iteration: 80407
loss: 1.0845848321914673,grad_norm: 0.9999992861078144, iteration: 80408
loss: 1.064626693725586,grad_norm: 1.0000000151763548, iteration: 80409
loss: 1.0577236413955688,grad_norm: 0.9999990887727938, iteration: 80410
loss: 1.1654996871948242,grad_norm: 0.9999996423786175, iteration: 80411
loss: 1.090814232826233,grad_norm: 0.9999995301524941, iteration: 80412
loss: 1.0890995264053345,grad_norm: 0.9127691442735975, iteration: 80413
loss: 1.1435574293136597,grad_norm: 0.9999996463002845, iteration: 80414
loss: 1.2238857746124268,grad_norm: 0.9999998178689408, iteration: 80415
loss: 1.1161502599716187,grad_norm: 0.999999721493845, iteration: 80416
loss: 1.272349238395691,grad_norm: 0.9999998966049016, iteration: 80417
loss: 1.1825915575027466,grad_norm: 0.9999994427344245, iteration: 80418
loss: 1.068934679031372,grad_norm: 0.9999997128893512, iteration: 80419
loss: 1.0706171989440918,grad_norm: 0.9999992037363263, iteration: 80420
loss: 1.0140711069107056,grad_norm: 0.999999123091587, iteration: 80421
loss: 0.9930005073547363,grad_norm: 0.9999992683632692, iteration: 80422
loss: 1.0726995468139648,grad_norm: 0.999999072182553, iteration: 80423
loss: 0.9876850247383118,grad_norm: 0.9999990051762591, iteration: 80424
loss: 1.0554418563842773,grad_norm: 0.9999997328538721, iteration: 80425
loss: 0.9966133832931519,grad_norm: 0.9999992025045759, iteration: 80426
loss: 1.002550721168518,grad_norm: 0.999999292668507, iteration: 80427
loss: 1.1133241653442383,grad_norm: 0.9999996274821104, iteration: 80428
loss: 1.1211743354797363,grad_norm: 0.9999999098412536, iteration: 80429
loss: 1.044342279434204,grad_norm: 0.9999991597460507, iteration: 80430
loss: 1.179236888885498,grad_norm: 0.9999998662963056, iteration: 80431
loss: 0.9928392171859741,grad_norm: 0.9999994417583266, iteration: 80432
loss: 1.036534309387207,grad_norm: 0.9999994749259945, iteration: 80433
loss: 0.9933999180793762,grad_norm: 0.9851055962687063, iteration: 80434
loss: 1.1806410551071167,grad_norm: 0.9999995115978222, iteration: 80435
loss: 1.1407092809677124,grad_norm: 0.9999998165219156, iteration: 80436
loss: 0.9815728068351746,grad_norm: 0.9999996979128574, iteration: 80437
loss: 1.1041094064712524,grad_norm: 0.9999993350463218, iteration: 80438
loss: 1.0822855234146118,grad_norm: 0.9999995064933791, iteration: 80439
loss: 0.9881972074508667,grad_norm: 0.9999992926510015, iteration: 80440
loss: 1.0352342128753662,grad_norm: 0.9999996331408078, iteration: 80441
loss: 1.0136806964874268,grad_norm: 0.9999992360522025, iteration: 80442
loss: 1.0229523181915283,grad_norm: 0.9999991883110743, iteration: 80443
loss: 1.0581567287445068,grad_norm: 0.9999993306337591, iteration: 80444
loss: 1.2891132831573486,grad_norm: 0.9999999658673822, iteration: 80445
loss: 1.11091947555542,grad_norm: 0.9999998630822918, iteration: 80446
loss: 1.1781424283981323,grad_norm: 0.999999894294498, iteration: 80447
loss: 1.2081996202468872,grad_norm: 0.9999994912636659, iteration: 80448
loss: 1.1670557260513306,grad_norm: 0.9999993016427428, iteration: 80449
loss: 1.2806366682052612,grad_norm: 0.9999998683065948, iteration: 80450
loss: 1.1783636808395386,grad_norm: 0.9999995463830471, iteration: 80451
loss: 1.0089805126190186,grad_norm: 0.9999991040094641, iteration: 80452
loss: 1.1806648969650269,grad_norm: 0.9999999256956894, iteration: 80453
loss: 1.2064770460128784,grad_norm: 1.0000000304273164, iteration: 80454
loss: 1.1787546873092651,grad_norm: 0.9999994258117397, iteration: 80455
loss: 1.0964769124984741,grad_norm: 0.9999994882380446, iteration: 80456
loss: 0.9835915565490723,grad_norm: 0.9972450720029179, iteration: 80457
loss: 1.0099272727966309,grad_norm: 0.9999996730476526, iteration: 80458
loss: 1.0649709701538086,grad_norm: 0.9999999565639603, iteration: 80459
loss: 1.005827784538269,grad_norm: 0.9999992378441265, iteration: 80460
loss: 1.021358609199524,grad_norm: 0.9984050521516993, iteration: 80461
loss: 1.1081461906433105,grad_norm: 0.999999238527987, iteration: 80462
loss: 1.0677512884140015,grad_norm: 0.9352770681614069, iteration: 80463
loss: 0.980441153049469,grad_norm: 0.9999991337252252, iteration: 80464
loss: 1.0189121961593628,grad_norm: 0.9999993176632067, iteration: 80465
loss: 1.1478148698806763,grad_norm: 0.9999996493749198, iteration: 80466
loss: 1.0681259632110596,grad_norm: 0.9999998299322155, iteration: 80467
loss: 1.0129165649414062,grad_norm: 0.9720160417450637, iteration: 80468
loss: 1.0857456922531128,grad_norm: 0.9999998954745569, iteration: 80469
loss: 1.1038286685943604,grad_norm: 0.999999834663537, iteration: 80470
loss: 1.108130693435669,grad_norm: 0.9999992203114926, iteration: 80471
loss: 1.0617074966430664,grad_norm: 0.9999994392159304, iteration: 80472
loss: 1.0727593898773193,grad_norm: 0.9999996950652924, iteration: 80473
loss: 1.0479100942611694,grad_norm: 0.999999109997225, iteration: 80474
loss: 0.9558973908424377,grad_norm: 0.9999990070542997, iteration: 80475
loss: 1.0940264463424683,grad_norm: 0.9999998200984841, iteration: 80476
loss: 0.9984080791473389,grad_norm: 0.9999991873695714, iteration: 80477
loss: 1.0274512767791748,grad_norm: 0.999999841715646, iteration: 80478
loss: 1.0529186725616455,grad_norm: 0.9999990645051035, iteration: 80479
loss: 1.024122953414917,grad_norm: 0.9999994429424699, iteration: 80480
loss: 1.1066176891326904,grad_norm: 0.9999996570116669, iteration: 80481
loss: 1.0423617362976074,grad_norm: 0.9999993144206215, iteration: 80482
loss: 1.0300599336624146,grad_norm: 0.9999990900780191, iteration: 80483
loss: 1.0246113538742065,grad_norm: 0.9838138607248945, iteration: 80484
loss: 1.0556312799453735,grad_norm: 0.9999993974642073, iteration: 80485
loss: 1.0179641246795654,grad_norm: 0.8829374681997568, iteration: 80486
loss: 1.0462626218795776,grad_norm: 0.9999994024872865, iteration: 80487
loss: 1.1590116024017334,grad_norm: 0.9999993144347895, iteration: 80488
loss: 1.0310804843902588,grad_norm: 0.8993270138432335, iteration: 80489
loss: 1.0516722202301025,grad_norm: 0.9999991968920843, iteration: 80490
loss: 1.1488063335418701,grad_norm: 0.9999991846087098, iteration: 80491
loss: 1.0200766324996948,grad_norm: 0.9339886582373544, iteration: 80492
loss: 1.0280956029891968,grad_norm: 0.9999992227513894, iteration: 80493
loss: 1.0401802062988281,grad_norm: 0.999999020832747, iteration: 80494
loss: 1.1354438066482544,grad_norm: 0.9999998584612378, iteration: 80495
loss: 1.0649968385696411,grad_norm: 1.0000000600426708, iteration: 80496
loss: 1.2849348783493042,grad_norm: 0.9999996074744232, iteration: 80497
loss: 1.052211046218872,grad_norm: 0.9999992011468382, iteration: 80498
loss: 1.0888179540634155,grad_norm: 0.847388573192529, iteration: 80499
loss: 1.0602242946624756,grad_norm: 0.9999990663988223, iteration: 80500
loss: 1.275870680809021,grad_norm: 0.9999996482046406, iteration: 80501
loss: 0.9747549295425415,grad_norm: 0.9999990959966473, iteration: 80502
loss: 1.124768614768982,grad_norm: 0.9999999163018757, iteration: 80503
loss: 1.1374105215072632,grad_norm: 0.9999997918657506, iteration: 80504
loss: 1.0182775259017944,grad_norm: 0.9619539909789369, iteration: 80505
loss: 1.0669118165969849,grad_norm: 0.9999991095672909, iteration: 80506
loss: 0.9670379757881165,grad_norm: 0.9999997428482102, iteration: 80507
loss: 1.0030704736709595,grad_norm: 0.9999991906246403, iteration: 80508
loss: 1.1070427894592285,grad_norm: 0.9999992703973651, iteration: 80509
loss: 1.0126444101333618,grad_norm: 0.9688499140968184, iteration: 80510
loss: 0.9485878944396973,grad_norm: 0.9217951587487877, iteration: 80511
loss: 1.0314855575561523,grad_norm: 0.9478031294304604, iteration: 80512
loss: 1.0870721340179443,grad_norm: 0.9999996517788485, iteration: 80513
loss: 1.0848474502563477,grad_norm: 0.999999274418845, iteration: 80514
loss: 1.0668946504592896,grad_norm: 0.9999994999059287, iteration: 80515
loss: 1.0923563241958618,grad_norm: 0.9999993615772328, iteration: 80516
loss: 1.057282567024231,grad_norm: 0.9999990301795989, iteration: 80517
loss: 1.1005502939224243,grad_norm: 0.9999992208109346, iteration: 80518
loss: 1.0979846715927124,grad_norm: 0.9999996191183775, iteration: 80519
loss: 1.0900286436080933,grad_norm: 0.9999994685601089, iteration: 80520
loss: 1.0708662271499634,grad_norm: 0.9999998655692072, iteration: 80521
loss: 1.0343410968780518,grad_norm: 0.9999990682172836, iteration: 80522
loss: 1.0311106443405151,grad_norm: 0.9999992496097958, iteration: 80523
loss: 1.2643719911575317,grad_norm: 0.9999995230156739, iteration: 80524
loss: 1.074555516242981,grad_norm: 0.9999992421907642, iteration: 80525
loss: 1.2012007236480713,grad_norm: 0.9999993302811697, iteration: 80526
loss: 1.0557924509048462,grad_norm: 0.9999997685229512, iteration: 80527
loss: 1.1424572467803955,grad_norm: 0.9999991223561012, iteration: 80528
loss: 0.9916042685508728,grad_norm: 0.9999990215400382, iteration: 80529
loss: 1.059708833694458,grad_norm: 0.9999999045738193, iteration: 80530
loss: 1.0647298097610474,grad_norm: 0.9999996328481406, iteration: 80531
loss: 1.0428447723388672,grad_norm: 0.9692025369355783, iteration: 80532
loss: 1.0279686450958252,grad_norm: 0.9999996484549527, iteration: 80533
loss: 0.9627357721328735,grad_norm: 0.9999991649729327, iteration: 80534
loss: 1.0614569187164307,grad_norm: 0.9999998116524315, iteration: 80535
loss: 1.0198392868041992,grad_norm: 0.9999990732655968, iteration: 80536
loss: 1.0924938917160034,grad_norm: 0.9999992155036442, iteration: 80537
loss: 1.015084981918335,grad_norm: 0.9868906646252773, iteration: 80538
loss: 1.0754380226135254,grad_norm: 0.9999992820788999, iteration: 80539
loss: 1.142600655555725,grad_norm: 0.9999997896008367, iteration: 80540
loss: 1.0202409029006958,grad_norm: 0.9999995475280413, iteration: 80541
loss: 1.1237674951553345,grad_norm: 0.9999991728211394, iteration: 80542
loss: 1.0656235218048096,grad_norm: 0.99999916663901, iteration: 80543
loss: 1.0920348167419434,grad_norm: 0.9999993228244765, iteration: 80544
loss: 1.0746062994003296,grad_norm: 0.9999993556259394, iteration: 80545
loss: 0.998165488243103,grad_norm: 0.8692248641261271, iteration: 80546
loss: 1.1009619235992432,grad_norm: 0.9999998018367268, iteration: 80547
loss: 1.0336158275604248,grad_norm: 0.9430586704206035, iteration: 80548
loss: 1.0071488618850708,grad_norm: 0.9999992621947588, iteration: 80549
loss: 1.0276497602462769,grad_norm: 0.9999992777660551, iteration: 80550
loss: 1.0315587520599365,grad_norm: 0.9999991366106161, iteration: 80551
loss: 1.0735028982162476,grad_norm: 0.9999992562979271, iteration: 80552
loss: 1.0282301902770996,grad_norm: 0.9933571205535592, iteration: 80553
loss: 1.0696793794631958,grad_norm: 0.99999924457932, iteration: 80554
loss: 1.079931378364563,grad_norm: 0.9999992036864034, iteration: 80555
loss: 1.0528912544250488,grad_norm: 0.999999262480577, iteration: 80556
loss: 1.1174410581588745,grad_norm: 0.9999995570039883, iteration: 80557
loss: 1.1269882917404175,grad_norm: 0.9999992941709717, iteration: 80558
loss: 1.0371092557907104,grad_norm: 0.9999992115867807, iteration: 80559
loss: 1.0672013759613037,grad_norm: 0.9999992168772566, iteration: 80560
loss: 1.0639564990997314,grad_norm: 0.9999996073476253, iteration: 80561
loss: 1.0730031728744507,grad_norm: 0.9892420243767313, iteration: 80562
loss: 1.0459275245666504,grad_norm: 0.950621366734311, iteration: 80563
loss: 1.1441781520843506,grad_norm: 0.9999997464010232, iteration: 80564
loss: 1.0554217100143433,grad_norm: 0.9598131806182848, iteration: 80565
loss: 1.0028057098388672,grad_norm: 0.8397770765338675, iteration: 80566
loss: 1.0113208293914795,grad_norm: 0.9088171207693972, iteration: 80567
loss: 1.0594313144683838,grad_norm: 0.9999999663468324, iteration: 80568
loss: 1.095501184463501,grad_norm: 0.9999991690770085, iteration: 80569
loss: 1.0854703187942505,grad_norm: 0.9999991558105366, iteration: 80570
loss: 1.0825529098510742,grad_norm: 0.9999995398398166, iteration: 80571
loss: 1.0051591396331787,grad_norm: 0.8041706764500689, iteration: 80572
loss: 1.078927993774414,grad_norm: 0.9550480275939621, iteration: 80573
loss: 1.0160048007965088,grad_norm: 0.9999996591162789, iteration: 80574
loss: 1.075108289718628,grad_norm: 0.9652655075977306, iteration: 80575
loss: 1.112331509590149,grad_norm: 0.9999992197588139, iteration: 80576
loss: 1.216166377067566,grad_norm: 0.9999996676393578, iteration: 80577
loss: 1.0880975723266602,grad_norm: 0.9999992164540978, iteration: 80578
loss: 1.0763877630233765,grad_norm: 0.9999992529999084, iteration: 80579
loss: 1.0180282592773438,grad_norm: 0.9999992595384873, iteration: 80580
loss: 1.0797010660171509,grad_norm: 0.9999996856492802, iteration: 80581
loss: 1.0928620100021362,grad_norm: 0.9999991944248113, iteration: 80582
loss: 1.0539017915725708,grad_norm: 0.9999991600767165, iteration: 80583
loss: 1.03231942653656,grad_norm: 0.9403184761737046, iteration: 80584
loss: 1.048926591873169,grad_norm: 0.999999049112886, iteration: 80585
loss: 1.0015759468078613,grad_norm: 0.9999992175385017, iteration: 80586
loss: 0.9718083739280701,grad_norm: 0.9999993233833493, iteration: 80587
loss: 0.9753473997116089,grad_norm: 0.9999991983458216, iteration: 80588
loss: 1.037927269935608,grad_norm: 0.9999991994892176, iteration: 80589
loss: 1.0158581733703613,grad_norm: 0.9999988738187539, iteration: 80590
loss: 1.0405831336975098,grad_norm: 0.999999542728333, iteration: 80591
loss: 0.9852679967880249,grad_norm: 0.9999990707235006, iteration: 80592
loss: 1.0242204666137695,grad_norm: 0.9999991626337174, iteration: 80593
loss: 1.0857503414154053,grad_norm: 0.9999996711224309, iteration: 80594
loss: 1.0239605903625488,grad_norm: 0.999999768867191, iteration: 80595
loss: 1.0505489110946655,grad_norm: 0.999999360192832, iteration: 80596
loss: 1.1903512477874756,grad_norm: 0.9999993006541662, iteration: 80597
loss: 0.9818099737167358,grad_norm: 0.9049583117957597, iteration: 80598
loss: 1.023910641670227,grad_norm: 0.8934916383950849, iteration: 80599
loss: 1.0244916677474976,grad_norm: 0.9999999916199555, iteration: 80600
loss: 1.014220118522644,grad_norm: 0.9999991452272633, iteration: 80601
loss: 1.0354210138320923,grad_norm: 0.9999993456945216, iteration: 80602
loss: 1.0486165285110474,grad_norm: 0.8718003338420672, iteration: 80603
loss: 1.0215247869491577,grad_norm: 0.8732281377917136, iteration: 80604
loss: 1.01694655418396,grad_norm: 0.999999094169766, iteration: 80605
loss: 1.0474900007247925,grad_norm: 0.9999997473675116, iteration: 80606
loss: 0.9681172370910645,grad_norm: 0.9999991024610002, iteration: 80607
loss: 1.0002495050430298,grad_norm: 0.9999991225100613, iteration: 80608
loss: 1.1265472173690796,grad_norm: 0.9999991693440061, iteration: 80609
loss: 1.0609593391418457,grad_norm: 0.9999998721292714, iteration: 80610
loss: 1.0668810606002808,grad_norm: 0.9999991215173155, iteration: 80611
loss: 1.058748722076416,grad_norm: 0.9999993720163427, iteration: 80612
loss: 1.0879127979278564,grad_norm: 0.9999993231743063, iteration: 80613
loss: 0.980746865272522,grad_norm: 0.8335552557017387, iteration: 80614
loss: 1.0865142345428467,grad_norm: 0.9999990772876602, iteration: 80615
loss: 1.289272427558899,grad_norm: 0.9999997299339418, iteration: 80616
loss: 1.0508016347885132,grad_norm: 0.9999993845848136, iteration: 80617
loss: 1.0893577337265015,grad_norm: 0.9999992507277292, iteration: 80618
loss: 1.0423002243041992,grad_norm: 0.9999994890736398, iteration: 80619
loss: 1.2174044847488403,grad_norm: 0.9999994931784749, iteration: 80620
loss: 1.0348453521728516,grad_norm: 0.9999994122537772, iteration: 80621
loss: 1.1680244207382202,grad_norm: 0.9999998442906569, iteration: 80622
loss: 0.9822680354118347,grad_norm: 0.9964370124512352, iteration: 80623
loss: 1.078147292137146,grad_norm: 0.9999998634029695, iteration: 80624
loss: 1.1315116882324219,grad_norm: 0.9999995887266361, iteration: 80625
loss: 1.1449284553527832,grad_norm: 0.9999992056830157, iteration: 80626
loss: 1.0649782419204712,grad_norm: 0.9999991526595743, iteration: 80627
loss: 1.0290305614471436,grad_norm: 0.9743073112945545, iteration: 80628
loss: 1.1252962350845337,grad_norm: 0.9285872332882164, iteration: 80629
loss: 1.1958837509155273,grad_norm: 0.9999993936626391, iteration: 80630
loss: 0.9811490774154663,grad_norm: 0.9999992099159313, iteration: 80631
loss: 1.178312063217163,grad_norm: 0.9999999492257137, iteration: 80632
loss: 1.0804308652877808,grad_norm: 0.9937225152046247, iteration: 80633
loss: 1.0714679956436157,grad_norm: 0.9999992189727954, iteration: 80634
loss: 1.0646401643753052,grad_norm: 0.9513385395891379, iteration: 80635
loss: 1.0363490581512451,grad_norm: 0.999999894681189, iteration: 80636
loss: 1.3544573783874512,grad_norm: 0.9999997611841199, iteration: 80637
loss: 1.2597465515136719,grad_norm: 0.9999996934306672, iteration: 80638
loss: 1.1966946125030518,grad_norm: 0.9999996713895745, iteration: 80639
loss: 1.1387795209884644,grad_norm: 0.9999992577332467, iteration: 80640
loss: 1.0920660495758057,grad_norm: 0.9999996712141216, iteration: 80641
loss: 1.1425998210906982,grad_norm: 0.9999997999193139, iteration: 80642
loss: 1.201549768447876,grad_norm: 0.9999994606598765, iteration: 80643
loss: 1.089937686920166,grad_norm: 0.9999994925908791, iteration: 80644
loss: 1.165224552154541,grad_norm: 0.9999995991570493, iteration: 80645
loss: 1.3184481859207153,grad_norm: 1.000000062110409, iteration: 80646
loss: 1.1751471757888794,grad_norm: 0.999999617480901, iteration: 80647
loss: 1.0983428955078125,grad_norm: 0.9999998910171205, iteration: 80648
loss: 1.2431824207305908,grad_norm: 0.999999480692116, iteration: 80649
loss: 1.0251049995422363,grad_norm: 0.9020076251467367, iteration: 80650
loss: 1.0406473875045776,grad_norm: 0.9417819587343192, iteration: 80651
loss: 1.0394028425216675,grad_norm: 0.9999991861241675, iteration: 80652
loss: 1.1272953748703003,grad_norm: 0.9999998099512367, iteration: 80653
loss: 1.2255511283874512,grad_norm: 0.9999995810966398, iteration: 80654
loss: 1.119053840637207,grad_norm: 0.9999995553575747, iteration: 80655
loss: 1.0598328113555908,grad_norm: 0.8179868354254274, iteration: 80656
loss: 1.1620628833770752,grad_norm: 0.9999999052563074, iteration: 80657
loss: 1.0408618450164795,grad_norm: 0.9999991820683041, iteration: 80658
loss: 1.2086840867996216,grad_norm: 0.9999999857946578, iteration: 80659
loss: 1.0557273626327515,grad_norm: 0.9999995419659007, iteration: 80660
loss: 1.0102072954177856,grad_norm: 0.9999992243438767, iteration: 80661
loss: 1.0911061763763428,grad_norm: 0.9240720051564212, iteration: 80662
loss: 1.0736585855484009,grad_norm: 0.999999299248169, iteration: 80663
loss: 1.0455092191696167,grad_norm: 0.9999997779298392, iteration: 80664
loss: 1.194451093673706,grad_norm: 0.9999996644543454, iteration: 80665
loss: 1.201442837715149,grad_norm: 0.9999997545169657, iteration: 80666
loss: 1.0768489837646484,grad_norm: 0.9999993517709946, iteration: 80667
loss: 1.1185688972473145,grad_norm: 0.999999342254955, iteration: 80668
loss: 1.0226796865463257,grad_norm: 0.9257553476549096, iteration: 80669
loss: 1.2124131917953491,grad_norm: 0.9999997283866663, iteration: 80670
loss: 1.0865737199783325,grad_norm: 0.9999998327723418, iteration: 80671
loss: 1.1150864362716675,grad_norm: 0.999999163355027, iteration: 80672
loss: 1.169418215751648,grad_norm: 0.9999993412393496, iteration: 80673
loss: 1.2741296291351318,grad_norm: 0.9999995255780149, iteration: 80674
loss: 1.3071368932724,grad_norm: 0.9999998974959777, iteration: 80675
loss: 1.0027192831039429,grad_norm: 0.9169131982141467, iteration: 80676
loss: 1.0723152160644531,grad_norm: 0.9999991337587374, iteration: 80677
loss: 1.1961464881896973,grad_norm: 0.999999470023795, iteration: 80678
loss: 1.0553686618804932,grad_norm: 0.9999992845132532, iteration: 80679
loss: 1.0409977436065674,grad_norm: 0.9999994334558849, iteration: 80680
loss: 1.1695750951766968,grad_norm: 0.999999873192467, iteration: 80681
loss: 1.1304621696472168,grad_norm: 0.9999998469679238, iteration: 80682
loss: 0.9648931622505188,grad_norm: 0.9432692343368897, iteration: 80683
loss: 1.117205023765564,grad_norm: 0.9999992791325633, iteration: 80684
loss: 1.1538885831832886,grad_norm: 0.9999998211378371, iteration: 80685
loss: 1.0707594156265259,grad_norm: 0.9999990873975724, iteration: 80686
loss: 1.1714004278182983,grad_norm: 0.9999996085363674, iteration: 80687
loss: 1.0359582901000977,grad_norm: 0.9999993405872364, iteration: 80688
loss: 1.0515962839126587,grad_norm: 0.999999706546726, iteration: 80689
loss: 1.1006689071655273,grad_norm: 0.9999992582585276, iteration: 80690
loss: 1.056486964225769,grad_norm: 0.9999995690924723, iteration: 80691
loss: 0.9899422526359558,grad_norm: 0.9999998937068335, iteration: 80692
loss: 1.1482447385787964,grad_norm: 0.9999998146015191, iteration: 80693
loss: 1.2325611114501953,grad_norm: 0.9999999050375848, iteration: 80694
loss: 1.1706757545471191,grad_norm: 0.9999999040984409, iteration: 80695
loss: 1.1158015727996826,grad_norm: 0.9999993986157087, iteration: 80696
loss: 1.0560898780822754,grad_norm: 0.9999996233239847, iteration: 80697
loss: 1.0920825004577637,grad_norm: 0.9999993327020208, iteration: 80698
loss: 1.0843039751052856,grad_norm: 0.9999991255279702, iteration: 80699
loss: 1.0400128364562988,grad_norm: 0.9999997201742992, iteration: 80700
loss: 1.0693451166152954,grad_norm: 0.9999994812607372, iteration: 80701
loss: 1.068692922592163,grad_norm: 0.9999994387058102, iteration: 80702
loss: 1.0719894170761108,grad_norm: 0.9999993327025989, iteration: 80703
loss: 1.0914827585220337,grad_norm: 0.9999996402723555, iteration: 80704
loss: 1.4456090927124023,grad_norm: 0.9999998880527505, iteration: 80705
loss: 1.108784794807434,grad_norm: 0.9999991620146614, iteration: 80706
loss: 1.2861167192459106,grad_norm: 0.999999701274777, iteration: 80707
loss: 1.044423222541809,grad_norm: 0.9999997973422718, iteration: 80708
loss: 1.0136536359786987,grad_norm: 0.9999993219772796, iteration: 80709
loss: 1.0825458765029907,grad_norm: 0.9999995250221009, iteration: 80710
loss: 1.2132378816604614,grad_norm: 0.9999997858158337, iteration: 80711
loss: 1.2346248626708984,grad_norm: 0.999999546326083, iteration: 80712
loss: 1.223044991493225,grad_norm: 0.999999388109241, iteration: 80713
loss: 1.0664498805999756,grad_norm: 0.9999992804138255, iteration: 80714
loss: 1.076106071472168,grad_norm: 0.9999994268189264, iteration: 80715
loss: 1.0802545547485352,grad_norm: 0.9999997619604278, iteration: 80716
loss: 1.098921775817871,grad_norm: 0.9999993357058241, iteration: 80717
loss: 1.1736997365951538,grad_norm: 0.9999998343832743, iteration: 80718
loss: 1.1378554105758667,grad_norm: 0.999999313835601, iteration: 80719
loss: 1.2063651084899902,grad_norm: 0.9999996916428601, iteration: 80720
loss: 1.2505896091461182,grad_norm: 0.9999997581768805, iteration: 80721
loss: 1.008197546005249,grad_norm: 0.9999998350974082, iteration: 80722
loss: 1.0242319107055664,grad_norm: 0.9999991113586263, iteration: 80723
loss: 1.1371351480484009,grad_norm: 0.9999996157712558, iteration: 80724
loss: 1.4752188920974731,grad_norm: 0.999999497969628, iteration: 80725
loss: 1.152618646621704,grad_norm: 0.9999994066459922, iteration: 80726
loss: 1.025081753730774,grad_norm: 0.9999994875335053, iteration: 80727
loss: 1.137884259223938,grad_norm: 0.9999999513495323, iteration: 80728
loss: 1.0694910287857056,grad_norm: 1.0000000119152623, iteration: 80729
loss: 1.085783839225769,grad_norm: 0.9999998409738529, iteration: 80730
loss: 1.1148207187652588,grad_norm: 0.999999581957293, iteration: 80731
loss: 1.1456233263015747,grad_norm: 0.9999998928373496, iteration: 80732
loss: 1.2340269088745117,grad_norm: 0.9999995762910208, iteration: 80733
loss: 1.0385799407958984,grad_norm: 0.9999996002461558, iteration: 80734
loss: 1.1899493932724,grad_norm: 0.9999993196509855, iteration: 80735
loss: 1.1696226596832275,grad_norm: 0.9999997509751919, iteration: 80736
loss: 1.1060868501663208,grad_norm: 0.9999993079087031, iteration: 80737
loss: 1.0111777782440186,grad_norm: 0.999999260378768, iteration: 80738
loss: 1.196925401687622,grad_norm: 0.9999997544678089, iteration: 80739
loss: 1.028241515159607,grad_norm: 0.9999996118589931, iteration: 80740
loss: 1.2598555088043213,grad_norm: 0.9999999181357132, iteration: 80741
loss: 1.0784333944320679,grad_norm: 0.9999995753726982, iteration: 80742
loss: 1.0702204704284668,grad_norm: 0.9999993105921208, iteration: 80743
loss: 1.1214251518249512,grad_norm: 0.9999997280852845, iteration: 80744
loss: 0.9832282662391663,grad_norm: 0.9999991807976232, iteration: 80745
loss: 1.0423259735107422,grad_norm: 0.9999991470256985, iteration: 80746
loss: 0.9901637434959412,grad_norm: 0.9826236752803288, iteration: 80747
loss: 1.0576977729797363,grad_norm: 0.9999991763227528, iteration: 80748
loss: 1.0014911890029907,grad_norm: 0.9999990152907888, iteration: 80749
loss: 1.0070216655731201,grad_norm: 0.9999995730346539, iteration: 80750
loss: 1.0517327785491943,grad_norm: 0.9999994716529543, iteration: 80751
loss: 1.0125329494476318,grad_norm: 0.9999991800280024, iteration: 80752
loss: 1.0458574295043945,grad_norm: 0.9999994635087861, iteration: 80753
loss: 1.0189205408096313,grad_norm: 0.9999994200308537, iteration: 80754
loss: 0.977679431438446,grad_norm: 0.9999991361328171, iteration: 80755
loss: 1.0021950006484985,grad_norm: 0.9404949925493967, iteration: 80756
loss: 1.1061667203903198,grad_norm: 0.9999998002788608, iteration: 80757
loss: 0.988275408744812,grad_norm: 0.863099408316766, iteration: 80758
loss: 0.9876634478569031,grad_norm: 0.9999994585126863, iteration: 80759
loss: 1.0616955757141113,grad_norm: 0.9816247297160017, iteration: 80760
loss: 1.1925066709518433,grad_norm: 0.9999999332773658, iteration: 80761
loss: 1.1357825994491577,grad_norm: 0.9999991834474354, iteration: 80762
loss: 1.146138072013855,grad_norm: 0.9999998455043829, iteration: 80763
loss: 1.0259345769882202,grad_norm: 0.9999991685016176, iteration: 80764
loss: 1.0326128005981445,grad_norm: 0.9999993573030712, iteration: 80765
loss: 1.0214635133743286,grad_norm: 0.999999263719945, iteration: 80766
loss: 1.0280722379684448,grad_norm: 0.9648848923317436, iteration: 80767
loss: 1.086064338684082,grad_norm: 0.9999997066524207, iteration: 80768
loss: 1.1074163913726807,grad_norm: 0.9999993084621328, iteration: 80769
loss: 1.0116360187530518,grad_norm: 0.9999994408790729, iteration: 80770
loss: 1.0405621528625488,grad_norm: 0.9999993291596739, iteration: 80771
loss: 1.1674542427062988,grad_norm: 0.9999996180955705, iteration: 80772
loss: 1.1417993307113647,grad_norm: 0.999999705917732, iteration: 80773
loss: 1.0692250728607178,grad_norm: 0.9999994299964244, iteration: 80774
loss: 1.031827449798584,grad_norm: 0.9999992167432417, iteration: 80775
loss: 0.9895678162574768,grad_norm: 0.9999998157772377, iteration: 80776
loss: 1.038783073425293,grad_norm: 0.9999991404979522, iteration: 80777
loss: 1.0067113637924194,grad_norm: 0.9999997908891364, iteration: 80778
loss: 1.2649177312850952,grad_norm: 0.9999998237221004, iteration: 80779
loss: 0.992948591709137,grad_norm: 0.999999333524259, iteration: 80780
loss: 1.1232002973556519,grad_norm: 0.999999325648032, iteration: 80781
loss: 1.01791512966156,grad_norm: 0.9999993502164294, iteration: 80782
loss: 1.0475786924362183,grad_norm: 0.999999433924865, iteration: 80783
loss: 0.9708341360092163,grad_norm: 0.9999990702757516, iteration: 80784
loss: 1.003401279449463,grad_norm: 0.9999995710158642, iteration: 80785
loss: 1.0406113862991333,grad_norm: 0.9794083031051048, iteration: 80786
loss: 1.0418286323547363,grad_norm: 0.9999992017889763, iteration: 80787
loss: 1.0012083053588867,grad_norm: 0.9675584349155213, iteration: 80788
loss: 1.0195051431655884,grad_norm: 0.9167123921412207, iteration: 80789
loss: 1.0229549407958984,grad_norm: 0.9999991050087516, iteration: 80790
loss: 1.0355867147445679,grad_norm: 0.9999994488896149, iteration: 80791
loss: 1.0214194059371948,grad_norm: 0.9999997453744998, iteration: 80792
loss: 1.0098152160644531,grad_norm: 0.972871339880282, iteration: 80793
loss: 1.0451377630233765,grad_norm: 0.9999990757490623, iteration: 80794
loss: 1.0702568292617798,grad_norm: 0.999999110967171, iteration: 80795
loss: 1.0177980661392212,grad_norm: 0.904724452245111, iteration: 80796
loss: 1.3939961194992065,grad_norm: 0.9999999216566258, iteration: 80797
loss: 1.1484463214874268,grad_norm: 0.9999998135886948, iteration: 80798
loss: 1.1105881929397583,grad_norm: 0.9999995807606753, iteration: 80799
loss: 1.0710530281066895,grad_norm: 0.9999991768996704, iteration: 80800
loss: 1.0323859453201294,grad_norm: 0.9999999149423681, iteration: 80801
loss: 1.0211049318313599,grad_norm: 0.9999991967801608, iteration: 80802
loss: 1.0534039735794067,grad_norm: 0.9999995267243276, iteration: 80803
loss: 1.053345799446106,grad_norm: 0.9999992741503154, iteration: 80804
loss: 1.0423566102981567,grad_norm: 0.9999990796733735, iteration: 80805
loss: 1.1496816873550415,grad_norm: 0.9999998837772297, iteration: 80806
loss: 1.0828888416290283,grad_norm: 0.999999906713588, iteration: 80807
loss: 1.1132680177688599,grad_norm: 0.9999997009843653, iteration: 80808
loss: 1.132491946220398,grad_norm: 0.9999993151384767, iteration: 80809
loss: 1.1242159605026245,grad_norm: 0.9999998368903456, iteration: 80810
loss: 1.080420970916748,grad_norm: 0.9999995025873872, iteration: 80811
loss: 0.9928866624832153,grad_norm: 0.9999998576196979, iteration: 80812
loss: 1.1142839193344116,grad_norm: 0.9999999023223995, iteration: 80813
loss: 1.000174880027771,grad_norm: 0.9999994841057405, iteration: 80814
loss: 1.067473292350769,grad_norm: 0.9999999116866489, iteration: 80815
loss: 1.117380976676941,grad_norm: 0.9999998540925472, iteration: 80816
loss: 1.192901611328125,grad_norm: 0.9999999355111586, iteration: 80817
loss: 1.0132403373718262,grad_norm: 0.9739000619465293, iteration: 80818
loss: 1.0801929235458374,grad_norm: 0.9999999285201613, iteration: 80819
loss: 1.0476906299591064,grad_norm: 0.9999995459237524, iteration: 80820
loss: 1.0722734928131104,grad_norm: 0.9999995915332022, iteration: 80821
loss: 0.9949349164962769,grad_norm: 0.9999992837368952, iteration: 80822
loss: 1.0407510995864868,grad_norm: 0.9999990152649564, iteration: 80823
loss: 1.0999915599822998,grad_norm: 0.9999993613590054, iteration: 80824
loss: 0.9997977614402771,grad_norm: 0.9999991437396683, iteration: 80825
loss: 1.0604900121688843,grad_norm: 0.9999993524195849, iteration: 80826
loss: 1.0455803871154785,grad_norm: 0.9999995827720464, iteration: 80827
loss: 1.017849326133728,grad_norm: 0.9999991673347287, iteration: 80828
loss: 1.0771503448486328,grad_norm: 0.9999995657634291, iteration: 80829
loss: 1.068061351776123,grad_norm: 0.9999993332226751, iteration: 80830
loss: 1.052753210067749,grad_norm: 0.9999993298320288, iteration: 80831
loss: 0.9706684350967407,grad_norm: 0.9386400327853953, iteration: 80832
loss: 1.155319094657898,grad_norm: 0.9999991643371235, iteration: 80833
loss: 1.0212576389312744,grad_norm: 0.9999990829868481, iteration: 80834
loss: 1.0277680158615112,grad_norm: 0.9801495702452234, iteration: 80835
loss: 1.1398924589157104,grad_norm: 0.9999997707866229, iteration: 80836
loss: 0.9879159927368164,grad_norm: 0.8820724457556071, iteration: 80837
loss: 1.0999259948730469,grad_norm: 0.9999991131154843, iteration: 80838
loss: 0.9740013480186462,grad_norm: 0.9999991554968423, iteration: 80839
loss: 1.0032199621200562,grad_norm: 0.9245392410685844, iteration: 80840
loss: 1.1980842351913452,grad_norm: 0.9999998663398385, iteration: 80841
loss: 1.0242383480072021,grad_norm: 0.9999993536486871, iteration: 80842
loss: 1.0812658071517944,grad_norm: 0.999999469596228, iteration: 80843
loss: 1.0691238641738892,grad_norm: 0.999999811256037, iteration: 80844
loss: 1.2060236930847168,grad_norm: 0.9999998547460851, iteration: 80845
loss: 1.1146517992019653,grad_norm: 0.9999994834820782, iteration: 80846
loss: 1.1065610647201538,grad_norm: 0.99999934273042, iteration: 80847
loss: 0.9778950214385986,grad_norm: 0.8978152679416582, iteration: 80848
loss: 1.0326834917068481,grad_norm: 0.9999991775692842, iteration: 80849
loss: 1.0183807611465454,grad_norm: 0.9999998301791233, iteration: 80850
loss: 1.1369171142578125,grad_norm: 0.9999997008055819, iteration: 80851
loss: 1.0388399362564087,grad_norm: 0.9999995070631041, iteration: 80852
loss: 1.001464605331421,grad_norm: 0.9999997968032296, iteration: 80853
loss: 1.2058436870574951,grad_norm: 0.9999999847776324, iteration: 80854
loss: 1.1525439023971558,grad_norm: 0.9999999085169466, iteration: 80855
loss: 1.1778852939605713,grad_norm: 0.9999997546378412, iteration: 80856
loss: 1.1444851160049438,grad_norm: 0.9999992026945554, iteration: 80857
loss: 1.0085762739181519,grad_norm: 0.9872910692465017, iteration: 80858
loss: 1.120896577835083,grad_norm: 0.9304061751526569, iteration: 80859
loss: 1.2142853736877441,grad_norm: 0.9999999830354092, iteration: 80860
loss: 1.1069105863571167,grad_norm: 0.99999905737053, iteration: 80861
loss: 1.2423913478851318,grad_norm: 0.9999997372490486, iteration: 80862
loss: 1.201980471611023,grad_norm: 0.9999994519954681, iteration: 80863
loss: 1.038001298904419,grad_norm: 0.9999991379104831, iteration: 80864
loss: 1.0512700080871582,grad_norm: 0.9999990209211085, iteration: 80865
loss: 1.097017526626587,grad_norm: 0.9999994734906019, iteration: 80866
loss: 1.0182398557662964,grad_norm: 0.9999992660958685, iteration: 80867
loss: 1.0534813404083252,grad_norm: 0.999999681578199, iteration: 80868
loss: 1.1350806951522827,grad_norm: 0.999999772398246, iteration: 80869
loss: 1.0836375951766968,grad_norm: 0.9999990832799762, iteration: 80870
loss: 0.9842460751533508,grad_norm: 0.9999994477895486, iteration: 80871
loss: 1.1268991231918335,grad_norm: 0.9999993823915257, iteration: 80872
loss: 1.0465006828308105,grad_norm: 0.9999997597435955, iteration: 80873
loss: 1.1818618774414062,grad_norm: 0.9999997424761117, iteration: 80874
loss: 1.0273268222808838,grad_norm: 0.9999996616196789, iteration: 80875
loss: 1.1255183219909668,grad_norm: 0.9999997087768304, iteration: 80876
loss: 1.0145820379257202,grad_norm: 0.9812072363582334, iteration: 80877
loss: 1.1000667810440063,grad_norm: 0.9999993756662567, iteration: 80878
loss: 1.0243741273880005,grad_norm: 0.9999990083088847, iteration: 80879
loss: 1.0478193759918213,grad_norm: 0.9999999487095739, iteration: 80880
loss: 1.1234127283096313,grad_norm: 0.9999995217624625, iteration: 80881
loss: 0.9854162931442261,grad_norm: 0.9999991348977018, iteration: 80882
loss: 1.0227127075195312,grad_norm: 0.9411050597752595, iteration: 80883
loss: 1.0113176107406616,grad_norm: 0.9999992130214318, iteration: 80884
loss: 1.1319358348846436,grad_norm: 0.9999993661811131, iteration: 80885
loss: 1.0315465927124023,grad_norm: 0.9999991946725796, iteration: 80886
loss: 1.3098795413970947,grad_norm: 0.9999997640481023, iteration: 80887
loss: 1.3299663066864014,grad_norm: 0.9999996647666461, iteration: 80888
loss: 1.210731863975525,grad_norm: 0.9999995654779835, iteration: 80889
loss: 1.1526100635528564,grad_norm: 0.999999559347429, iteration: 80890
loss: 1.2485332489013672,grad_norm: 0.9999998732227903, iteration: 80891
loss: 1.1193205118179321,grad_norm: 0.9999994354799402, iteration: 80892
loss: 1.1045887470245361,grad_norm: 0.9999997511253996, iteration: 80893
loss: 1.2701772451400757,grad_norm: 0.9999993358995095, iteration: 80894
loss: 1.208717703819275,grad_norm: 0.9999992669748868, iteration: 80895
loss: 1.1121493577957153,grad_norm: 0.9999997311921128, iteration: 80896
loss: 1.0942174196243286,grad_norm: 0.999999054019619, iteration: 80897
loss: 1.3431750535964966,grad_norm: 0.9999999671121432, iteration: 80898
loss: 1.064429521560669,grad_norm: 0.9999996520320041, iteration: 80899
loss: 1.046219825744629,grad_norm: 0.999999372364728, iteration: 80900
loss: 1.32356858253479,grad_norm: 0.9999999268617569, iteration: 80901
loss: 1.0314861536026,grad_norm: 0.9999997292341886, iteration: 80902
loss: 1.075768232345581,grad_norm: 0.9999993760075412, iteration: 80903
loss: 1.0953292846679688,grad_norm: 0.9999992580098992, iteration: 80904
loss: 1.0424801111221313,grad_norm: 0.999999001962307, iteration: 80905
loss: 1.1695176362991333,grad_norm: 0.9999995604735104, iteration: 80906
loss: 1.0058746337890625,grad_norm: 0.999999251127509, iteration: 80907
loss: 1.1675498485565186,grad_norm: 1.0000000424795883, iteration: 80908
loss: 1.0283184051513672,grad_norm: 0.9999992802064099, iteration: 80909
loss: 1.0156638622283936,grad_norm: 0.9999989777123752, iteration: 80910
loss: 1.0763707160949707,grad_norm: 0.8631141054493734, iteration: 80911
loss: 1.077562689781189,grad_norm: 0.9999998042591799, iteration: 80912
loss: 1.0909504890441895,grad_norm: 0.9999990928827749, iteration: 80913
loss: 1.0463064908981323,grad_norm: 0.9999992641088041, iteration: 80914
loss: 1.0896437168121338,grad_norm: 0.9634579455396373, iteration: 80915
loss: 1.184667706489563,grad_norm: 0.9999997016311519, iteration: 80916
loss: 1.0649199485778809,grad_norm: 0.9999994595534428, iteration: 80917
loss: 1.11354398727417,grad_norm: 0.9999997959256796, iteration: 80918
loss: 1.0779924392700195,grad_norm: 0.9999998237834653, iteration: 80919
loss: 0.9741480946540833,grad_norm: 0.9999998914244537, iteration: 80920
loss: 1.0306522846221924,grad_norm: 0.9550250904987851, iteration: 80921
loss: 1.0012155771255493,grad_norm: 0.9999989245100421, iteration: 80922
loss: 1.1586898565292358,grad_norm: 0.9999992825354602, iteration: 80923
loss: 1.0485382080078125,grad_norm: 0.8450879807924284, iteration: 80924
loss: 1.085898756980896,grad_norm: 0.9982423393633832, iteration: 80925
loss: 1.0625687837600708,grad_norm: 0.999999472179831, iteration: 80926
loss: 1.1447808742523193,grad_norm: 0.9584421850905478, iteration: 80927
loss: 1.0627745389938354,grad_norm: 0.999999556911884, iteration: 80928
loss: 1.228176474571228,grad_norm: 0.9999997898266162, iteration: 80929
loss: 1.0884428024291992,grad_norm: 0.9999996189920962, iteration: 80930
loss: 1.0740231275558472,grad_norm: 0.9999998345463228, iteration: 80931
loss: 1.1967836618423462,grad_norm: 0.9999995694300814, iteration: 80932
loss: 0.9902535080909729,grad_norm: 0.9999993157320091, iteration: 80933
loss: 1.0094594955444336,grad_norm: 0.8710012274519146, iteration: 80934
loss: 1.1010597944259644,grad_norm: 0.9999996734879119, iteration: 80935
loss: 1.119691252708435,grad_norm: 0.9999996226111849, iteration: 80936
loss: 1.2479751110076904,grad_norm: 0.999999972706225, iteration: 80937
loss: 1.0255153179168701,grad_norm: 0.9999995480660077, iteration: 80938
loss: 1.1603058576583862,grad_norm: 0.9999998720363997, iteration: 80939
loss: 1.037205696105957,grad_norm: 0.9999993049527967, iteration: 80940
loss: 1.1990691423416138,grad_norm: 1.000000037798065, iteration: 80941
loss: 1.0300251245498657,grad_norm: 0.9999990677399553, iteration: 80942
loss: 1.1453360319137573,grad_norm: 0.9999997628504693, iteration: 80943
loss: 1.095083236694336,grad_norm: 0.9999997428040364, iteration: 80944
loss: 1.0794785022735596,grad_norm: 1.0000000299735865, iteration: 80945
loss: 1.0860449075698853,grad_norm: 0.9999996264606341, iteration: 80946
loss: 1.3469371795654297,grad_norm: 1.0000000131023943, iteration: 80947
loss: 1.1355994939804077,grad_norm: 0.9999996821283441, iteration: 80948
loss: 1.2554832696914673,grad_norm: 0.9999998056988726, iteration: 80949
loss: 1.272270679473877,grad_norm: 0.9999996912405364, iteration: 80950
loss: 1.1235060691833496,grad_norm: 0.9999991457371228, iteration: 80951
loss: 1.0622528791427612,grad_norm: 0.9999996131091733, iteration: 80952
loss: 1.2010831832885742,grad_norm: 0.9999998927362515, iteration: 80953
loss: 1.159805417060852,grad_norm: 0.9999993551925531, iteration: 80954
loss: 1.051460862159729,grad_norm: 0.9999997353182353, iteration: 80955
loss: 1.0535035133361816,grad_norm: 0.9999998107314596, iteration: 80956
loss: 1.1082290410995483,grad_norm: 0.9999998430570702, iteration: 80957
loss: 1.0153173208236694,grad_norm: 0.9999992444577431, iteration: 80958
loss: 1.1437153816223145,grad_norm: 0.9999991215215586, iteration: 80959
loss: 1.0971184968948364,grad_norm: 0.9999998217023552, iteration: 80960
loss: 1.0960708856582642,grad_norm: 0.999999951776832, iteration: 80961
loss: 1.0392228364944458,grad_norm: 0.9337854149288521, iteration: 80962
loss: 0.9849137663841248,grad_norm: 0.9188428116151749, iteration: 80963
loss: 1.0784728527069092,grad_norm: 0.9999992623783136, iteration: 80964
loss: 1.1430575847625732,grad_norm: 0.9999995348906733, iteration: 80965
loss: 1.2132200002670288,grad_norm: 0.9999999275669298, iteration: 80966
loss: 1.0091923475265503,grad_norm: 0.8815609300774497, iteration: 80967
loss: 1.0827239751815796,grad_norm: 0.9999991938421802, iteration: 80968
loss: 1.1139583587646484,grad_norm: 0.9999993352512516, iteration: 80969
loss: 1.1213963031768799,grad_norm: 0.9999995257879103, iteration: 80970
loss: 1.0006153583526611,grad_norm: 0.8699251526518661, iteration: 80971
loss: 1.1948415040969849,grad_norm: 0.9999999081281308, iteration: 80972
loss: 1.0155467987060547,grad_norm: 0.9999991777207579, iteration: 80973
loss: 1.0040793418884277,grad_norm: 0.9999996368243707, iteration: 80974
loss: 1.1021939516067505,grad_norm: 0.9999992687127461, iteration: 80975
loss: 1.08902108669281,grad_norm: 0.9678462720723761, iteration: 80976
loss: 1.0956768989562988,grad_norm: 0.9999994849003607, iteration: 80977
loss: 1.0229134559631348,grad_norm: 0.999999262662735, iteration: 80978
loss: 1.0729155540466309,grad_norm: 0.8637695200356021, iteration: 80979
loss: 1.051252841949463,grad_norm: 0.9999994713312745, iteration: 80980
loss: 1.1317421197891235,grad_norm: 0.9999998078700848, iteration: 80981
loss: 1.0960423946380615,grad_norm: 0.999999964607972, iteration: 80982
loss: 1.034454107284546,grad_norm: 0.998025216533373, iteration: 80983
loss: 1.181893229484558,grad_norm: 0.9999992303467418, iteration: 80984
loss: 1.1865004301071167,grad_norm: 0.9999998391123253, iteration: 80985
loss: 1.0102765560150146,grad_norm: 0.9999994357628362, iteration: 80986
loss: 1.0001349449157715,grad_norm: 0.9999991014383719, iteration: 80987
loss: 1.0465638637542725,grad_norm: 0.993007692466064, iteration: 80988
loss: 1.056769609451294,grad_norm: 0.999999182028662, iteration: 80989
loss: 1.0718772411346436,grad_norm: 0.8701694773808041, iteration: 80990
loss: 1.0411168336868286,grad_norm: 0.9526494132684715, iteration: 80991
loss: 1.1849414110183716,grad_norm: 0.9999998588339594, iteration: 80992
loss: 1.0071649551391602,grad_norm: 0.8777389190796518, iteration: 80993
loss: 1.0615925788879395,grad_norm: 0.9012464355689188, iteration: 80994
loss: 1.0467854738235474,grad_norm: 0.9999990435863776, iteration: 80995
loss: 1.0502915382385254,grad_norm: 0.9999997539767717, iteration: 80996
loss: 1.010457992553711,grad_norm: 0.8832041581640288, iteration: 80997
loss: 1.1528682708740234,grad_norm: 0.9999998338625941, iteration: 80998
loss: 1.2300450801849365,grad_norm: 0.9999992976185116, iteration: 80999
loss: 1.0984022617340088,grad_norm: 0.9999999677923175, iteration: 81000
loss: 1.1661341190338135,grad_norm: 0.9999993082115224, iteration: 81001
loss: 1.0888876914978027,grad_norm: 0.9999992520182457, iteration: 81002
loss: 1.1299711465835571,grad_norm: 0.9999998804257243, iteration: 81003
loss: 0.9606679081916809,grad_norm: 0.9999994585158388, iteration: 81004
loss: 1.0615266561508179,grad_norm: 0.999999306555236, iteration: 81005
loss: 1.0918244123458862,grad_norm: 0.9999998000256289, iteration: 81006
loss: 1.0137720108032227,grad_norm: 0.999999831123586, iteration: 81007
loss: 1.2328166961669922,grad_norm: 0.9999997121671003, iteration: 81008
loss: 1.064719319343567,grad_norm: 0.9953739711466899, iteration: 81009
loss: 1.0042470693588257,grad_norm: 0.9999991300530229, iteration: 81010
loss: 1.0366533994674683,grad_norm: 0.9999994604809973, iteration: 81011
loss: 1.0359138250350952,grad_norm: 0.9999997613636988, iteration: 81012
loss: 1.043206810951233,grad_norm: 0.9999992766349473, iteration: 81013
loss: 0.9602782130241394,grad_norm: 0.9731736128930268, iteration: 81014
loss: 1.0765446424484253,grad_norm: 0.9569865028489248, iteration: 81015
loss: 1.1039248704910278,grad_norm: 0.8912666765116547, iteration: 81016
loss: 1.045666217803955,grad_norm: 0.9999995237958615, iteration: 81017
loss: 1.044937014579773,grad_norm: 0.9999991330680761, iteration: 81018
loss: 1.0132191181182861,grad_norm: 0.905561046564785, iteration: 81019
loss: 1.0202358961105347,grad_norm: 0.7745604349884998, iteration: 81020
loss: 1.001277208328247,grad_norm: 0.9367728138570436, iteration: 81021
loss: 0.9862093329429626,grad_norm: 0.9999992948151334, iteration: 81022
loss: 1.0605863332748413,grad_norm: 0.9345656433046259, iteration: 81023
loss: 1.2495701313018799,grad_norm: 0.9999993991572911, iteration: 81024
loss: 1.1774100065231323,grad_norm: 0.99999985258058, iteration: 81025
loss: 1.0054211616516113,grad_norm: 0.9932083265053289, iteration: 81026
loss: 1.0200811624526978,grad_norm: 0.9999997399786837, iteration: 81027
loss: 1.0541712045669556,grad_norm: 0.9999996880424575, iteration: 81028
loss: 1.0251885652542114,grad_norm: 0.956449703783419, iteration: 81029
loss: 1.013701319694519,grad_norm: 0.9999996373898246, iteration: 81030
loss: 1.078662395477295,grad_norm: 0.9999991985286094, iteration: 81031
loss: 1.2356373071670532,grad_norm: 0.9999994437321835, iteration: 81032
loss: 1.103657841682434,grad_norm: 0.999999588320459, iteration: 81033
loss: 1.045013666152954,grad_norm: 0.9444093913175536, iteration: 81034
loss: 1.0252422094345093,grad_norm: 0.9712596690084699, iteration: 81035
loss: 1.0349658727645874,grad_norm: 0.9999990593366308, iteration: 81036
loss: 1.0774954557418823,grad_norm: 0.9631065314562887, iteration: 81037
loss: 1.0001380443572998,grad_norm: 0.9999996712443374, iteration: 81038
loss: 1.0075832605361938,grad_norm: 0.8852687738459003, iteration: 81039
loss: 1.05032217502594,grad_norm: 0.999999527782327, iteration: 81040
loss: 1.114958643913269,grad_norm: 0.9999991762141324, iteration: 81041
loss: 1.0870941877365112,grad_norm: 0.9999991617826726, iteration: 81042
loss: 1.025728702545166,grad_norm: 0.9999999459052236, iteration: 81043
loss: 0.9998925924301147,grad_norm: 0.8407565528037787, iteration: 81044
loss: 0.9815880060195923,grad_norm: 0.9197276411252107, iteration: 81045
loss: 1.0030266046524048,grad_norm: 0.9221656557755576, iteration: 81046
loss: 1.061753511428833,grad_norm: 0.9999992121002323, iteration: 81047
loss: 1.0686156749725342,grad_norm: 0.9999992375923444, iteration: 81048
loss: 0.9910288453102112,grad_norm: 0.9999992931759246, iteration: 81049
loss: 1.025133490562439,grad_norm: 0.810435743072715, iteration: 81050
loss: 1.0485118627548218,grad_norm: 0.9999991718697551, iteration: 81051
loss: 1.0140761137008667,grad_norm: 0.9999989150927501, iteration: 81052
loss: 1.069218635559082,grad_norm: 0.9999990848372693, iteration: 81053
loss: 1.0160188674926758,grad_norm: 0.9999990489102185, iteration: 81054
loss: 1.0601080656051636,grad_norm: 0.9137839595029196, iteration: 81055
loss: 1.1045176982879639,grad_norm: 0.999999425292578, iteration: 81056
loss: 1.0318210124969482,grad_norm: 0.9432210846806389, iteration: 81057
loss: 1.1268572807312012,grad_norm: 0.9999997854523313, iteration: 81058
loss: 1.1047134399414062,grad_norm: 0.9999998075370243, iteration: 81059
loss: 1.1475085020065308,grad_norm: 0.999999768491062, iteration: 81060
loss: 1.0481082201004028,grad_norm: 0.9999996563239275, iteration: 81061
loss: 1.0946718454360962,grad_norm: 0.9999998118515135, iteration: 81062
loss: 1.0264254808425903,grad_norm: 1.0000000421071031, iteration: 81063
loss: 1.0017293691635132,grad_norm: 0.9999991986171866, iteration: 81064
loss: 1.0223690271377563,grad_norm: 0.8704389838992048, iteration: 81065
loss: 0.9977210760116577,grad_norm: 0.8656677032181618, iteration: 81066
loss: 1.0879651308059692,grad_norm: 0.9999995503143292, iteration: 81067
loss: 1.0176661014556885,grad_norm: 0.9999992105818516, iteration: 81068
loss: 0.9876160621643066,grad_norm: 0.8266606039419321, iteration: 81069
loss: 1.0641837120056152,grad_norm: 0.9999998284275009, iteration: 81070
loss: 1.0288068056106567,grad_norm: 0.858516646769274, iteration: 81071
loss: 1.0645205974578857,grad_norm: 0.9999994373798394, iteration: 81072
loss: 1.0943779945373535,grad_norm: 0.9999991660082878, iteration: 81073
loss: 0.9905843734741211,grad_norm: 0.9242531901052677, iteration: 81074
loss: 1.0248464345932007,grad_norm: 0.999999251244402, iteration: 81075
loss: 1.0408860445022583,grad_norm: 0.9999990593169208, iteration: 81076
loss: 1.0353591442108154,grad_norm: 0.9999994189192061, iteration: 81077
loss: 1.0126032829284668,grad_norm: 0.9158872384923181, iteration: 81078
loss: 1.0115457773208618,grad_norm: 0.8559626160951985, iteration: 81079
loss: 0.9622617959976196,grad_norm: 0.9231528613660003, iteration: 81080
loss: 1.0420235395431519,grad_norm: 0.9999990880684464, iteration: 81081
loss: 0.9537254571914673,grad_norm: 0.952947979724146, iteration: 81082
loss: 1.0566545724868774,grad_norm: 0.9999991983348749, iteration: 81083
loss: 1.0772348642349243,grad_norm: 0.9999994971745039, iteration: 81084
loss: 1.0249329805374146,grad_norm: 0.9999994529253807, iteration: 81085
loss: 1.0066183805465698,grad_norm: 0.9999993948213646, iteration: 81086
loss: 1.0935375690460205,grad_norm: 0.9752221776781481, iteration: 81087
loss: 0.9930196404457092,grad_norm: 0.9999992356982024, iteration: 81088
loss: 0.9972849488258362,grad_norm: 0.9265051837132702, iteration: 81089
loss: 0.9993916749954224,grad_norm: 0.8024191242627494, iteration: 81090
loss: 1.069573163986206,grad_norm: 0.9999991758105005, iteration: 81091
loss: 0.9653263092041016,grad_norm: 0.8582598931542719, iteration: 81092
loss: 1.1929402351379395,grad_norm: 0.9999996866730326, iteration: 81093
loss: 1.062963843345642,grad_norm: 0.9999999090200872, iteration: 81094
loss: 0.9847300052642822,grad_norm: 0.8288027338051405, iteration: 81095
loss: 1.0848881006240845,grad_norm: 0.9999993371429747, iteration: 81096
loss: 0.9691711068153381,grad_norm: 0.9999997163942714, iteration: 81097
loss: 1.136935830116272,grad_norm: 0.9999997060236167, iteration: 81098
loss: 1.0157276391983032,grad_norm: 0.8801701507828302, iteration: 81099
loss: 1.0006715059280396,grad_norm: 0.9731969013462135, iteration: 81100
loss: 1.0581259727478027,grad_norm: 0.9175062012310435, iteration: 81101
loss: 1.0875898599624634,grad_norm: 0.9999990499200514, iteration: 81102
loss: 0.9839339256286621,grad_norm: 0.9269923218167297, iteration: 81103
loss: 1.0150418281555176,grad_norm: 0.999999187787862, iteration: 81104
loss: 0.9959364533424377,grad_norm: 0.8376423046501656, iteration: 81105
loss: 1.0263944864273071,grad_norm: 0.8978682705191632, iteration: 81106
loss: 1.108721375465393,grad_norm: 0.9775385530682912, iteration: 81107
loss: 1.018454909324646,grad_norm: 0.9787306582402943, iteration: 81108
loss: 0.9717128872871399,grad_norm: 0.995021273186299, iteration: 81109
loss: 1.0011423826217651,grad_norm: 0.8028791921457259, iteration: 81110
loss: 1.0521053075790405,grad_norm: 0.999999467682018, iteration: 81111
loss: 0.9846581816673279,grad_norm: 0.9037914898218675, iteration: 81112
loss: 1.063220500946045,grad_norm: 0.9999999388725167, iteration: 81113
loss: 1.1074554920196533,grad_norm: 0.9208053262177265, iteration: 81114
loss: 1.0362114906311035,grad_norm: 0.8855551012515542, iteration: 81115
loss: 1.0464797019958496,grad_norm: 0.9999997868606618, iteration: 81116
loss: 0.9929811358451843,grad_norm: 0.8063850553420558, iteration: 81117
loss: 1.0054889917373657,grad_norm: 0.8727531972954692, iteration: 81118
loss: 1.051153302192688,grad_norm: 0.8833156644714296, iteration: 81119
loss: 0.9637116193771362,grad_norm: 0.9091192459846092, iteration: 81120
loss: 1.0197118520736694,grad_norm: 0.9999992223481017, iteration: 81121
loss: 1.0816867351531982,grad_norm: 0.997856751143964, iteration: 81122
loss: 1.0068023204803467,grad_norm: 0.9060329803326433, iteration: 81123
loss: 0.9623438119888306,grad_norm: 0.9419100129658539, iteration: 81124
loss: 1.0176101922988892,grad_norm: 0.9999991373278229, iteration: 81125
loss: 1.0453517436981201,grad_norm: 0.9999994282746116, iteration: 81126
loss: 0.9735082983970642,grad_norm: 0.7879698670130402, iteration: 81127
loss: 1.0176137685775757,grad_norm: 0.9615101765733958, iteration: 81128
loss: 1.0086466073989868,grad_norm: 0.9986800528454369, iteration: 81129
loss: 1.0677852630615234,grad_norm: 0.885660906822083, iteration: 81130
loss: 0.9819135069847107,grad_norm: 0.8419448512535658, iteration: 81131
loss: 0.9898175001144409,grad_norm: 0.8593322404651286, iteration: 81132
loss: 1.1356346607208252,grad_norm: 0.9497643654618008, iteration: 81133
loss: 0.9967080950737,grad_norm: 0.8797350256571377, iteration: 81134
loss: 1.0154823064804077,grad_norm: 0.9999990772780053, iteration: 81135
loss: 1.0002940893173218,grad_norm: 0.9999990645634238, iteration: 81136
loss: 1.0000337362289429,grad_norm: 0.9999992506849851, iteration: 81137
loss: 0.9970442652702332,grad_norm: 0.8764442663979174, iteration: 81138
loss: 1.0014736652374268,grad_norm: 0.8846542367430434, iteration: 81139
loss: 0.9998295903205872,grad_norm: 0.7203583566370372, iteration: 81140
loss: 1.0342886447906494,grad_norm: 0.9999990889464351, iteration: 81141
loss: 0.9772802591323853,grad_norm: 0.9999993643294798, iteration: 81142
loss: 1.09242582321167,grad_norm: 0.9999991378209904, iteration: 81143
loss: 0.9688849449157715,grad_norm: 0.9999990539508348, iteration: 81144
loss: 1.0882534980773926,grad_norm: 0.999999400086023, iteration: 81145
loss: 1.0853245258331299,grad_norm: 0.9999990372296296, iteration: 81146
loss: 0.9863444566726685,grad_norm: 0.8298277441916538, iteration: 81147
loss: 1.1152528524398804,grad_norm: 0.9999991271106938, iteration: 81148
loss: 1.0615631341934204,grad_norm: 0.8088923669701713, iteration: 81149
loss: 1.0352076292037964,grad_norm: 0.9278528238153555, iteration: 81150
loss: 0.9808250665664673,grad_norm: 0.9201442085779239, iteration: 81151
loss: 1.001856803894043,grad_norm: 0.8834440741384464, iteration: 81152
loss: 1.0089480876922607,grad_norm: 0.8978601050811251, iteration: 81153
loss: 1.0529472827911377,grad_norm: 0.9999990628322396, iteration: 81154
loss: 0.987487256526947,grad_norm: 0.9287032086509523, iteration: 81155
loss: 1.0849392414093018,grad_norm: 0.9999993738876283, iteration: 81156
loss: 0.998699963092804,grad_norm: 0.9080595677891844, iteration: 81157
loss: 1.1771851778030396,grad_norm: 0.9999991878089456, iteration: 81158
loss: 1.0440638065338135,grad_norm: 0.8892656696093941, iteration: 81159
loss: 1.061740756034851,grad_norm: 0.9999991560415384, iteration: 81160
loss: 1.089681625366211,grad_norm: 0.9586607744263124, iteration: 81161
loss: 1.0461292266845703,grad_norm: 0.9999994064438384, iteration: 81162
loss: 1.2791610956192017,grad_norm: 0.9721510673724563, iteration: 81163
loss: 0.9833498597145081,grad_norm: 0.7905426747123512, iteration: 81164
loss: 1.0121283531188965,grad_norm: 0.9427919522108138, iteration: 81165
loss: 1.0098893642425537,grad_norm: 0.9999993385950501, iteration: 81166
loss: 1.0873109102249146,grad_norm: 0.9653468475665111, iteration: 81167
loss: 0.9583922028541565,grad_norm: 0.9781917010845514, iteration: 81168
loss: 1.054935336112976,grad_norm: 0.9999992436835163, iteration: 81169
loss: 0.9719581007957458,grad_norm: 0.9999990526847826, iteration: 81170
loss: 1.020155668258667,grad_norm: 0.99999930734678, iteration: 81171
loss: 0.9904477000236511,grad_norm: 0.9999990564272077, iteration: 81172
loss: 1.0366827249526978,grad_norm: 0.9999991305908316, iteration: 81173
loss: 0.9764530658721924,grad_norm: 0.951051530229435, iteration: 81174
loss: 1.0368958711624146,grad_norm: 0.9999994240626837, iteration: 81175
loss: 1.0518893003463745,grad_norm: 0.9999997599652, iteration: 81176
loss: 0.9560500979423523,grad_norm: 0.7459569393074462, iteration: 81177
loss: 1.113682508468628,grad_norm: 0.9999990820155513, iteration: 81178
loss: 1.0729666948318481,grad_norm: 0.9999995016875958, iteration: 81179
loss: 1.0374915599822998,grad_norm: 0.9999993900725849, iteration: 81180
loss: 1.0007051229476929,grad_norm: 0.915581405500663, iteration: 81181
loss: 1.0443803071975708,grad_norm: 0.9999995056736055, iteration: 81182
loss: 1.044785499572754,grad_norm: 0.9402392047286, iteration: 81183
loss: 0.9985668659210205,grad_norm: 0.9999996643427159, iteration: 81184
loss: 1.021125078201294,grad_norm: 0.999999115777832, iteration: 81185
loss: 1.0190223455429077,grad_norm: 0.679003398075665, iteration: 81186
loss: 1.0140987634658813,grad_norm: 0.9223849930850555, iteration: 81187
loss: 0.972402811050415,grad_norm: 0.8339482018275168, iteration: 81188
loss: 1.1145656108856201,grad_norm: 0.999999274380595, iteration: 81189
loss: 1.1207013130187988,grad_norm: 0.9999992090622589, iteration: 81190
loss: 1.0355167388916016,grad_norm: 0.9572217563596486, iteration: 81191
loss: 1.0118513107299805,grad_norm: 0.8984561743588423, iteration: 81192
loss: 0.9977327585220337,grad_norm: 0.944269564353076, iteration: 81193
loss: 1.0609668493270874,grad_norm: 0.9999993313921597, iteration: 81194
loss: 1.0662614107131958,grad_norm: 0.9999991520852483, iteration: 81195
loss: 1.0648784637451172,grad_norm: 0.9506842959824372, iteration: 81196
loss: 1.075324535369873,grad_norm: 0.8391547231406838, iteration: 81197
loss: 1.0011643171310425,grad_norm: 0.8035290970522232, iteration: 81198
loss: 1.1555476188659668,grad_norm: 0.9999991152013696, iteration: 81199
loss: 1.0444620847702026,grad_norm: 0.9036618700693109, iteration: 81200
loss: 0.9948053956031799,grad_norm: 0.9999992031137449, iteration: 81201
loss: 1.0145002603530884,grad_norm: 0.9999996409509093, iteration: 81202
loss: 0.9962737560272217,grad_norm: 0.9717978556584793, iteration: 81203
loss: 0.9913285970687866,grad_norm: 0.8348439889133118, iteration: 81204
loss: 1.1402437686920166,grad_norm: 0.9640141710307762, iteration: 81205
loss: 1.0119764804840088,grad_norm: 0.9999994652721006, iteration: 81206
loss: 1.0986984968185425,grad_norm: 0.9999997180084471, iteration: 81207
loss: 1.0280992984771729,grad_norm: 0.999998979639061, iteration: 81208
loss: 1.0152267217636108,grad_norm: 0.9503212226673298, iteration: 81209
loss: 1.1013301610946655,grad_norm: 0.9999998989825686, iteration: 81210
loss: 1.031570315361023,grad_norm: 0.9999992278227042, iteration: 81211
loss: 1.046322226524353,grad_norm: 0.9999993076611967, iteration: 81212
loss: 1.0612080097198486,grad_norm: 0.9886858374335035, iteration: 81213
loss: 1.093127727508545,grad_norm: 0.897969443208727, iteration: 81214
loss: 1.0161724090576172,grad_norm: 0.9104878055494117, iteration: 81215
loss: 1.109843373298645,grad_norm: 0.9999990424262689, iteration: 81216
loss: 1.0305458307266235,grad_norm: 0.9999991020389009, iteration: 81217
loss: 1.0025070905685425,grad_norm: 0.9358814907550925, iteration: 81218
loss: 1.1215342283248901,grad_norm: 0.9999995652527507, iteration: 81219
loss: 0.9929848313331604,grad_norm: 0.8641117591293793, iteration: 81220
loss: 0.995456337928772,grad_norm: 0.9999990732218688, iteration: 81221
loss: 1.0326519012451172,grad_norm: 0.9999995845698156, iteration: 81222
loss: 1.0237138271331787,grad_norm: 0.9999993863588335, iteration: 81223
loss: 0.97768235206604,grad_norm: 0.9999994278408354, iteration: 81224
loss: 1.0273700952529907,grad_norm: 0.9524236221048734, iteration: 81225
loss: 1.065063714981079,grad_norm: 0.9999992811436107, iteration: 81226
loss: 1.0514805316925049,grad_norm: 0.9999993101141319, iteration: 81227
loss: 0.9948973059654236,grad_norm: 0.9999993278792008, iteration: 81228
loss: 0.9632236361503601,grad_norm: 0.9999991869555265, iteration: 81229
loss: 0.9846752285957336,grad_norm: 0.9545160822222533, iteration: 81230
loss: 1.0928274393081665,grad_norm: 0.9999992936525499, iteration: 81231
loss: 0.980099618434906,grad_norm: 0.9457455270589581, iteration: 81232
loss: 0.993587076663971,grad_norm: 0.9913520083625568, iteration: 81233
loss: 1.072837471961975,grad_norm: 0.9999992761774527, iteration: 81234
loss: 1.0029933452606201,grad_norm: 0.9665852483513189, iteration: 81235
loss: 1.0464529991149902,grad_norm: 0.999999325499917, iteration: 81236
loss: 1.0695949792861938,grad_norm: 0.9999993127327947, iteration: 81237
loss: 1.1206363439559937,grad_norm: 0.9999995295042574, iteration: 81238
loss: 0.9896487593650818,grad_norm: 0.8327409731023606, iteration: 81239
loss: 1.027076244354248,grad_norm: 0.858808220980856, iteration: 81240
loss: 0.9986273050308228,grad_norm: 0.9888191486655836, iteration: 81241
loss: 0.9873891472816467,grad_norm: 0.9504069073874107, iteration: 81242
loss: 0.9819871187210083,grad_norm: 0.9072624513894272, iteration: 81243
loss: 1.0036754608154297,grad_norm: 0.8517203150959584, iteration: 81244
loss: 1.017364263534546,grad_norm: 0.9999989674527394, iteration: 81245
loss: 1.025215983390808,grad_norm: 0.947192501824583, iteration: 81246
loss: 1.0665500164031982,grad_norm: 0.9999991283906222, iteration: 81247
loss: 1.020851969718933,grad_norm: 0.9999990549478922, iteration: 81248
loss: 1.063482642173767,grad_norm: 0.9999991776634274, iteration: 81249
loss: 1.0170340538024902,grad_norm: 0.8129925316101515, iteration: 81250
loss: 1.0808990001678467,grad_norm: 0.9728910184670368, iteration: 81251
loss: 1.0089139938354492,grad_norm: 0.9326383565769495, iteration: 81252
loss: 1.1850048303604126,grad_norm: 0.9999990879025591, iteration: 81253
loss: 1.058674693107605,grad_norm: 0.9999996642344775, iteration: 81254
loss: 1.163406491279602,grad_norm: 0.9999991999242587, iteration: 81255
loss: 1.0648798942565918,grad_norm: 0.9999991601320619, iteration: 81256
loss: 1.0028393268585205,grad_norm: 0.9148527945595627, iteration: 81257
loss: 1.0128604173660278,grad_norm: 0.7683122743769752, iteration: 81258
loss: 1.0208646059036255,grad_norm: 0.9999992589972918, iteration: 81259
loss: 1.0493583679199219,grad_norm: 0.9999990252997087, iteration: 81260
loss: 1.0210998058319092,grad_norm: 0.872787430686346, iteration: 81261
loss: 0.9967777729034424,grad_norm: 0.9999992621077819, iteration: 81262
loss: 0.9366192817687988,grad_norm: 0.9804328715338201, iteration: 81263
loss: 1.0727601051330566,grad_norm: 0.9999992297394797, iteration: 81264
loss: 0.9987142086029053,grad_norm: 0.9999991539550122, iteration: 81265
loss: 1.0244415998458862,grad_norm: 0.9999995964243394, iteration: 81266
loss: 0.9859970211982727,grad_norm: 0.9999996474220012, iteration: 81267
loss: 1.0815969705581665,grad_norm: 0.9862799564779613, iteration: 81268
loss: 1.0104342699050903,grad_norm: 0.999999250459233, iteration: 81269
loss: 1.023797631263733,grad_norm: 0.9999991652154733, iteration: 81270
loss: 1.0326989889144897,grad_norm: 0.9459071396394962, iteration: 81271
loss: 1.0212464332580566,grad_norm: 0.8737804033114119, iteration: 81272
loss: 0.9960412979125977,grad_norm: 0.9999995398369448, iteration: 81273
loss: 1.0119190216064453,grad_norm: 0.9999991077006074, iteration: 81274
loss: 1.0722955465316772,grad_norm: 0.9999994404725401, iteration: 81275
loss: 0.9931948781013489,grad_norm: 0.9999990725294944, iteration: 81276
loss: 0.9831284284591675,grad_norm: 0.9617552570807437, iteration: 81277
loss: 0.9812760949134827,grad_norm: 0.8381728214158326, iteration: 81278
loss: 0.9910759925842285,grad_norm: 0.9999994503977171, iteration: 81279
loss: 0.9943777918815613,grad_norm: 0.9999989663143071, iteration: 81280
loss: 1.0512324571609497,grad_norm: 0.9999990744597943, iteration: 81281
loss: 1.1063047647476196,grad_norm: 0.999999962182754, iteration: 81282
loss: 1.0026576519012451,grad_norm: 0.8700371068663368, iteration: 81283
loss: 1.0279817581176758,grad_norm: 0.9999995460802844, iteration: 81284
loss: 1.0358408689498901,grad_norm: 0.9999991707418662, iteration: 81285
loss: 0.9787816405296326,grad_norm: 0.8803104208848794, iteration: 81286
loss: 1.0846554040908813,grad_norm: 0.9755980729050701, iteration: 81287
loss: 1.0072513818740845,grad_norm: 0.9117186402795507, iteration: 81288
loss: 0.9802296161651611,grad_norm: 0.8947761968510801, iteration: 81289
loss: 1.0371103286743164,grad_norm: 0.9999990706584836, iteration: 81290
loss: 1.013056755065918,grad_norm: 0.9999990750876364, iteration: 81291
loss: 1.0162807703018188,grad_norm: 0.9999990437265218, iteration: 81292
loss: 1.0491652488708496,grad_norm: 0.8604819398564009, iteration: 81293
loss: 1.0158027410507202,grad_norm: 0.8591023829071479, iteration: 81294
loss: 0.9747765064239502,grad_norm: 0.9981350923849398, iteration: 81295
loss: 1.0318920612335205,grad_norm: 0.9999992042368208, iteration: 81296
loss: 0.9989748597145081,grad_norm: 0.9820306078093425, iteration: 81297
loss: 1.0109286308288574,grad_norm: 0.9080953179523996, iteration: 81298
loss: 1.1199570894241333,grad_norm: 0.9999993660824243, iteration: 81299
loss: 0.9623181819915771,grad_norm: 0.9999991234450103, iteration: 81300
loss: 1.1694072484970093,grad_norm: 0.9375929176106196, iteration: 81301
loss: 1.0630532503128052,grad_norm: 0.8727658662477005, iteration: 81302
loss: 1.0390679836273193,grad_norm: 0.9818358620457265, iteration: 81303
loss: 1.0815329551696777,grad_norm: 0.9999995352768823, iteration: 81304
loss: 1.0403872728347778,grad_norm: 0.9123145915609006, iteration: 81305
loss: 1.0403547286987305,grad_norm: 0.8470769606262173, iteration: 81306
loss: 0.9698817133903503,grad_norm: 0.9250703210305092, iteration: 81307
loss: 1.1270055770874023,grad_norm: 0.9929157958620397, iteration: 81308
loss: 1.0216795206069946,grad_norm: 0.9999998217676714, iteration: 81309
loss: 1.0321606397628784,grad_norm: 0.9413098409615717, iteration: 81310
loss: 0.9844432473182678,grad_norm: 0.9999990107103861, iteration: 81311
loss: 1.0060480833053589,grad_norm: 0.9999991605322412, iteration: 81312
loss: 0.9982356429100037,grad_norm: 0.9232674109003854, iteration: 81313
loss: 1.14548921585083,grad_norm: 0.9999999180581166, iteration: 81314
loss: 0.9929452538490295,grad_norm: 0.892071242161545, iteration: 81315
loss: 1.072459101676941,grad_norm: 0.9999990357368614, iteration: 81316
loss: 1.0104451179504395,grad_norm: 0.797320345508029, iteration: 81317
loss: 0.9755600094795227,grad_norm: 0.7401790191493436, iteration: 81318
loss: 1.021873950958252,grad_norm: 0.9999992139769409, iteration: 81319
loss: 1.001760721206665,grad_norm: 0.9999990255091316, iteration: 81320
loss: 0.9812812805175781,grad_norm: 0.999404485241231, iteration: 81321
loss: 1.0419130325317383,grad_norm: 0.9999991532496568, iteration: 81322
loss: 1.0572130680084229,grad_norm: 0.9999991758647379, iteration: 81323
loss: 1.1624679565429688,grad_norm: 0.9999998434238127, iteration: 81324
loss: 1.0082778930664062,grad_norm: 0.8968367799126185, iteration: 81325
loss: 1.020706057548523,grad_norm: 0.9999999559738771, iteration: 81326
loss: 1.0516438484191895,grad_norm: 0.8518815436066336, iteration: 81327
loss: 1.0292351245880127,grad_norm: 0.9999999620470044, iteration: 81328
loss: 1.0317902565002441,grad_norm: 0.974369346761881, iteration: 81329
loss: 0.9945148825645447,grad_norm: 0.9999992722749605, iteration: 81330
loss: 1.027750849723816,grad_norm: 0.9999992219761739, iteration: 81331
loss: 0.9689491391181946,grad_norm: 0.8340759983419563, iteration: 81332
loss: 0.9452328681945801,grad_norm: 0.999999111947775, iteration: 81333
loss: 1.0192632675170898,grad_norm: 0.9999991731016401, iteration: 81334
loss: 0.9817054867744446,grad_norm: 0.9999991509041598, iteration: 81335
loss: 0.9879273176193237,grad_norm: 0.7331448751699134, iteration: 81336
loss: 1.0228545665740967,grad_norm: 0.9999991287168691, iteration: 81337
loss: 0.9969870448112488,grad_norm: 0.9999991280419044, iteration: 81338
loss: 0.9997540712356567,grad_norm: 0.8034002151376819, iteration: 81339
loss: 1.019186019897461,grad_norm: 0.9999993862472993, iteration: 81340
loss: 1.0207585096359253,grad_norm: 0.9999991297236508, iteration: 81341
loss: 1.0565085411071777,grad_norm: 0.9159021954680442, iteration: 81342
loss: 1.0123214721679688,grad_norm: 0.8804083071035104, iteration: 81343
loss: 1.00192129611969,grad_norm: 0.7483437552572463, iteration: 81344
loss: 1.0107636451721191,grad_norm: 0.9815959254102837, iteration: 81345
loss: 0.9856807589530945,grad_norm: 0.9999990107197909, iteration: 81346
loss: 0.9931272268295288,grad_norm: 0.8295167498642845, iteration: 81347
loss: 1.0722860097885132,grad_norm: 0.9999992024804895, iteration: 81348
loss: 1.0323506593704224,grad_norm: 0.9999991815708459, iteration: 81349
loss: 1.00002121925354,grad_norm: 0.9407636443132882, iteration: 81350
loss: 1.0084726810455322,grad_norm: 0.9999991678361396, iteration: 81351
loss: 1.032511830329895,grad_norm: 0.9193269358025937, iteration: 81352
loss: 0.9968897700309753,grad_norm: 0.9963608161361188, iteration: 81353
loss: 1.032698631286621,grad_norm: 0.9999989581945121, iteration: 81354
loss: 1.0047250986099243,grad_norm: 0.9999992306232329, iteration: 81355
loss: 1.0049513578414917,grad_norm: 0.9999991355877648, iteration: 81356
loss: 0.966317355632782,grad_norm: 0.7964453791989508, iteration: 81357
loss: 1.028991937637329,grad_norm: 0.9999991476846444, iteration: 81358
loss: 0.9808327555656433,grad_norm: 0.967395662120077, iteration: 81359
loss: 0.9845082759857178,grad_norm: 0.9999992145609081, iteration: 81360
loss: 1.0019900798797607,grad_norm: 0.9999990451024455, iteration: 81361
loss: 1.028494119644165,grad_norm: 0.9708842203152594, iteration: 81362
loss: 0.9997950792312622,grad_norm: 0.9686527579486691, iteration: 81363
loss: 1.028714656829834,grad_norm: 0.9503629701990233, iteration: 81364
loss: 1.001756191253662,grad_norm: 0.9318556403380285, iteration: 81365
loss: 1.0128493309020996,grad_norm: 0.9996206451988371, iteration: 81366
loss: 1.0809271335601807,grad_norm: 0.9999992130257486, iteration: 81367
loss: 1.0084612369537354,grad_norm: 0.9493150485545576, iteration: 81368
loss: 1.0397367477416992,grad_norm: 0.9999994679486031, iteration: 81369
loss: 1.0313944816589355,grad_norm: 0.8179977882577053, iteration: 81370
loss: 1.0572776794433594,grad_norm: 0.9003556609093595, iteration: 81371
loss: 1.0004855394363403,grad_norm: 0.9418216487227113, iteration: 81372
loss: 1.0613625049591064,grad_norm: 0.9246967151517405, iteration: 81373
loss: 0.9687536954879761,grad_norm: 0.9983144984725936, iteration: 81374
loss: 0.9871916174888611,grad_norm: 0.8234876480951336, iteration: 81375
loss: 1.0150388479232788,grad_norm: 0.9156777885533921, iteration: 81376
loss: 1.017304539680481,grad_norm: 0.8984726451276618, iteration: 81377
loss: 1.108224630355835,grad_norm: 0.9999992186979576, iteration: 81378
loss: 1.0378172397613525,grad_norm: 0.9999995063956213, iteration: 81379
loss: 0.9943171143531799,grad_norm: 0.9999992069582423, iteration: 81380
loss: 1.0092283487319946,grad_norm: 0.8661999399115355, iteration: 81381
loss: 1.0418241024017334,grad_norm: 0.9999996437930178, iteration: 81382
loss: 0.9954737424850464,grad_norm: 0.9151326197158964, iteration: 81383
loss: 1.1083683967590332,grad_norm: 0.9999991900872244, iteration: 81384
loss: 1.0294315814971924,grad_norm: 0.9248149258742195, iteration: 81385
loss: 1.0502948760986328,grad_norm: 0.9999990444411023, iteration: 81386
loss: 1.2632702589035034,grad_norm: 0.9999995833636843, iteration: 81387
loss: 0.9855892062187195,grad_norm: 0.9999989945312144, iteration: 81388
loss: 0.9626476764678955,grad_norm: 0.9720035207768167, iteration: 81389
loss: 1.0230883359909058,grad_norm: 0.999999086257928, iteration: 81390
loss: 1.0037661790847778,grad_norm: 0.7969180603126139, iteration: 81391
loss: 1.025000810623169,grad_norm: 0.9669086052172279, iteration: 81392
loss: 0.9798071384429932,grad_norm: 0.9683915767529029, iteration: 81393
loss: 1.0609188079833984,grad_norm: 0.9999990457953832, iteration: 81394
loss: 1.0405988693237305,grad_norm: 0.8454722673706518, iteration: 81395
loss: 1.0182349681854248,grad_norm: 0.9223138724440074, iteration: 81396
loss: 1.007666826248169,grad_norm: 0.999999346109021, iteration: 81397
loss: 1.0050400495529175,grad_norm: 0.8485547632363039, iteration: 81398
loss: 1.048922061920166,grad_norm: 0.9999999281891869, iteration: 81399
loss: 0.9908925890922546,grad_norm: 0.8628093593342174, iteration: 81400
loss: 1.0021649599075317,grad_norm: 0.9613270811502771, iteration: 81401
loss: 1.0398608446121216,grad_norm: 0.9999993228140667, iteration: 81402
loss: 0.9816679358482361,grad_norm: 0.8820560951657759, iteration: 81403
loss: 0.9853274822235107,grad_norm: 0.8865873303282295, iteration: 81404
loss: 1.0058976411819458,grad_norm: 0.8399860642406911, iteration: 81405
loss: 0.984490692615509,grad_norm: 0.9851789872736494, iteration: 81406
loss: 1.018393635749817,grad_norm: 0.8141400017136664, iteration: 81407
loss: 0.9939950108528137,grad_norm: 0.969595314033434, iteration: 81408
loss: 1.0016759634017944,grad_norm: 0.999999001108119, iteration: 81409
loss: 0.9995278716087341,grad_norm: 0.9121356151386711, iteration: 81410
loss: 1.073494553565979,grad_norm: 0.9999993372641215, iteration: 81411
loss: 0.9805796146392822,grad_norm: 0.8891970730237893, iteration: 81412
loss: 0.9844014048576355,grad_norm: 0.8169057985631345, iteration: 81413
loss: 1.0068453550338745,grad_norm: 0.863666054067098, iteration: 81414
loss: 0.9993391036987305,grad_norm: 0.8899630421237607, iteration: 81415
loss: 0.9746178984642029,grad_norm: 0.9999992137029305, iteration: 81416
loss: 1.020885944366455,grad_norm: 0.999999882859204, iteration: 81417
loss: 1.0113445520401,grad_norm: 0.9999989449922994, iteration: 81418
loss: 1.0463820695877075,grad_norm: 0.9999994422138194, iteration: 81419
loss: 0.9902123212814331,grad_norm: 0.999999244813686, iteration: 81420
loss: 1.036118745803833,grad_norm: 0.914034318014031, iteration: 81421
loss: 0.9651113748550415,grad_norm: 0.8777123398174153, iteration: 81422
loss: 1.1248751878738403,grad_norm: 0.9999992732217945, iteration: 81423
loss: 1.0089417695999146,grad_norm: 0.9999990367067609, iteration: 81424
loss: 1.0032227039337158,grad_norm: 0.9999991013207835, iteration: 81425
loss: 1.0729928016662598,grad_norm: 0.9999999964671101, iteration: 81426
loss: 1.045580267906189,grad_norm: 0.99999957891013, iteration: 81427
loss: 1.0001721382141113,grad_norm: 0.9999990502192011, iteration: 81428
loss: 1.0312191247940063,grad_norm: 0.8492559348722485, iteration: 81429
loss: 1.0038894414901733,grad_norm: 0.9999989987511494, iteration: 81430
loss: 0.9588428139686584,grad_norm: 0.9999990392423646, iteration: 81431
loss: 0.9601807594299316,grad_norm: 0.9999991890584331, iteration: 81432
loss: 1.003085970878601,grad_norm: 0.9999992390303367, iteration: 81433
loss: 0.9768697023391724,grad_norm: 0.8462653527565517, iteration: 81434
loss: 0.964823842048645,grad_norm: 0.9573043417593535, iteration: 81435
loss: 1.0457252264022827,grad_norm: 0.9999999015408252, iteration: 81436
loss: 0.9919165372848511,grad_norm: 0.9645352686470404, iteration: 81437
loss: 1.0094425678253174,grad_norm: 0.9999999286454475, iteration: 81438
loss: 0.9637966156005859,grad_norm: 0.9999989882169604, iteration: 81439
loss: 1.0218894481658936,grad_norm: 0.9317431226435625, iteration: 81440
loss: 1.003669261932373,grad_norm: 0.9278586408097039, iteration: 81441
loss: 1.0073295831680298,grad_norm: 0.855866637674317, iteration: 81442
loss: 0.9894824028015137,grad_norm: 0.9999989694872555, iteration: 81443
loss: 0.9786390662193298,grad_norm: 0.9999990663454764, iteration: 81444
loss: 0.9654309153556824,grad_norm: 0.8537552509276093, iteration: 81445
loss: 0.9986284375190735,grad_norm: 0.8489534492359996, iteration: 81446
loss: 1.1686676740646362,grad_norm: 0.9999996627425605, iteration: 81447
loss: 0.9886907339096069,grad_norm: 0.9521408295965873, iteration: 81448
loss: 0.9993756413459778,grad_norm: 0.8115042935629988, iteration: 81449
loss: 1.0013231039047241,grad_norm: 0.9999994237756064, iteration: 81450
loss: 1.0163613557815552,grad_norm: 0.8855887685797641, iteration: 81451
loss: 0.9401798844337463,grad_norm: 0.9999992198142078, iteration: 81452
loss: 0.9977632164955139,grad_norm: 0.9999990881500758, iteration: 81453
loss: 0.97474205493927,grad_norm: 0.999999048112793, iteration: 81454
loss: 1.046913981437683,grad_norm: 0.9999999801657561, iteration: 81455
loss: 1.032712459564209,grad_norm: 0.9085566639782842, iteration: 81456
loss: 0.9579761028289795,grad_norm: 0.9999991561670357, iteration: 81457
loss: 1.008386254310608,grad_norm: 0.9999990547540891, iteration: 81458
loss: 0.9977101683616638,grad_norm: 0.9410180115892877, iteration: 81459
loss: 0.9731925129890442,grad_norm: 0.9446261141711184, iteration: 81460
loss: 1.003589391708374,grad_norm: 0.851170137354474, iteration: 81461
loss: 1.0585287809371948,grad_norm: 0.9999991526823037, iteration: 81462
loss: 1.0283973217010498,grad_norm: 0.999999147833508, iteration: 81463
loss: 1.0315769910812378,grad_norm: 0.999999408532252, iteration: 81464
loss: 1.0019373893737793,grad_norm: 0.9558727684051598, iteration: 81465
loss: 0.9667325019836426,grad_norm: 0.9999996953820646, iteration: 81466
loss: 1.0107139348983765,grad_norm: 0.7205529890431357, iteration: 81467
loss: 1.0092473030090332,grad_norm: 0.740112837332425, iteration: 81468
loss: 1.0105878114700317,grad_norm: 0.9008228039158518, iteration: 81469
loss: 1.0563777685165405,grad_norm: 0.9999994648361568, iteration: 81470
loss: 0.9998537302017212,grad_norm: 0.8537175826454313, iteration: 81471
loss: 1.0084699392318726,grad_norm: 0.9999990635354591, iteration: 81472
loss: 1.0420836210250854,grad_norm: 0.9999990905157043, iteration: 81473
loss: 1.049783706665039,grad_norm: 0.9862818819727016, iteration: 81474
loss: 0.9727481603622437,grad_norm: 0.8243602203313845, iteration: 81475
loss: 0.99238520860672,grad_norm: 0.9999990478204568, iteration: 81476
loss: 1.008361577987671,grad_norm: 0.7992377359142838, iteration: 81477
loss: 1.0302934646606445,grad_norm: 0.7812295655804316, iteration: 81478
loss: 0.9613742232322693,grad_norm: 0.9961345680637915, iteration: 81479
loss: 0.9708025455474854,grad_norm: 0.9922107543722726, iteration: 81480
loss: 1.0088037252426147,grad_norm: 0.8133528741281373, iteration: 81481
loss: 0.965924859046936,grad_norm: 0.846645319665879, iteration: 81482
loss: 0.9818134903907776,grad_norm: 0.9217803341520084, iteration: 81483
loss: 0.9598619937896729,grad_norm: 0.9999989851354496, iteration: 81484
loss: 0.9999910593032837,grad_norm: 0.9103019870822658, iteration: 81485
loss: 0.9999846816062927,grad_norm: 0.975597230805656, iteration: 81486
loss: 1.0063718557357788,grad_norm: 0.9886618251564683, iteration: 81487
loss: 1.0111825466156006,grad_norm: 0.9344395521952439, iteration: 81488
loss: 0.9786906242370605,grad_norm: 0.9999992906979933, iteration: 81489
loss: 1.0454403162002563,grad_norm: 0.9031785546214497, iteration: 81490
loss: 0.9785624742507935,grad_norm: 0.9340841715958085, iteration: 81491
loss: 1.019564151763916,grad_norm: 0.9379623571457145, iteration: 81492
loss: 0.9909413456916809,grad_norm: 0.8922061841269073, iteration: 81493
loss: 0.9922288656234741,grad_norm: 0.9999992260583201, iteration: 81494
loss: 1.0642507076263428,grad_norm: 0.9999991789079014, iteration: 81495
loss: 1.026280403137207,grad_norm: 0.8260446792748019, iteration: 81496
loss: 1.04477059841156,grad_norm: 0.9999993889828215, iteration: 81497
loss: 1.0507086515426636,grad_norm: 0.9999991462523773, iteration: 81498
loss: 1.0582891702651978,grad_norm: 1.0000000407536438, iteration: 81499
loss: 0.9821040630340576,grad_norm: 0.9999992183161895, iteration: 81500
loss: 1.0252177715301514,grad_norm: 0.7481611636785475, iteration: 81501
loss: 1.0172539949417114,grad_norm: 0.7410480415969986, iteration: 81502
loss: 1.0049442052841187,grad_norm: 0.999999827674216, iteration: 81503
loss: 1.0382323265075684,grad_norm: 0.9999997003310552, iteration: 81504
loss: 1.034442663192749,grad_norm: 0.9999992267916233, iteration: 81505
loss: 1.0258126258850098,grad_norm: 0.9999991078208604, iteration: 81506
loss: 0.9721922874450684,grad_norm: 0.906729933906059, iteration: 81507
loss: 1.0935367345809937,grad_norm: 0.9999997064980201, iteration: 81508
loss: 0.9761582016944885,grad_norm: 0.999999065257349, iteration: 81509
loss: 1.0176007747650146,grad_norm: 0.9094150381007694, iteration: 81510
loss: 0.9872479438781738,grad_norm: 0.9999991066877625, iteration: 81511
loss: 0.9854463934898376,grad_norm: 0.8532098065530089, iteration: 81512
loss: 1.0378247499465942,grad_norm: 0.9999991305324865, iteration: 81513
loss: 1.0122864246368408,grad_norm: 0.9598263943471763, iteration: 81514
loss: 1.023295521736145,grad_norm: 0.9999996250771686, iteration: 81515
loss: 1.0271739959716797,grad_norm: 0.948884187548238, iteration: 81516
loss: 1.0099369287490845,grad_norm: 0.9999990509238453, iteration: 81517
loss: 1.0395399332046509,grad_norm: 0.9999992134796164, iteration: 81518
loss: 0.9968546032905579,grad_norm: 0.8574397761457316, iteration: 81519
loss: 0.9704318046569824,grad_norm: 0.869277607174022, iteration: 81520
loss: 1.0324732065200806,grad_norm: 0.9999994089907874, iteration: 81521
loss: 0.979999303817749,grad_norm: 0.9999989862012437, iteration: 81522
loss: 1.0336006879806519,grad_norm: 0.9999999562736263, iteration: 81523
loss: 1.0075452327728271,grad_norm: 0.9999990722950621, iteration: 81524
loss: 1.012785792350769,grad_norm: 0.9618082150842858, iteration: 81525
loss: 1.0282697677612305,grad_norm: 0.8737663452010808, iteration: 81526
loss: 1.0129995346069336,grad_norm: 0.8104578675022502, iteration: 81527
loss: 1.0060769319534302,grad_norm: 0.9999991203856623, iteration: 81528
loss: 1.016502857208252,grad_norm: 0.9999991624946614, iteration: 81529
loss: 1.036937952041626,grad_norm: 0.9999990403979383, iteration: 81530
loss: 0.9974141716957092,grad_norm: 0.9999995959269136, iteration: 81531
loss: 1.0532793998718262,grad_norm: 0.9999992988067716, iteration: 81532
loss: 1.0081721544265747,grad_norm: 0.9510003876697286, iteration: 81533
loss: 1.0081628561019897,grad_norm: 0.8239113774265688, iteration: 81534
loss: 1.026940941810608,grad_norm: 0.7529970418343621, iteration: 81535
loss: 0.9845499992370605,grad_norm: 0.947608768436053, iteration: 81536
loss: 1.0350151062011719,grad_norm: 0.9999998691729833, iteration: 81537
loss: 1.0184484720230103,grad_norm: 0.9999991358138884, iteration: 81538
loss: 1.0275088548660278,grad_norm: 0.999999951764359, iteration: 81539
loss: 1.0298783779144287,grad_norm: 0.8010988580922938, iteration: 81540
loss: 1.0078781843185425,grad_norm: 0.8702809175775233, iteration: 81541
loss: 0.9970099925994873,grad_norm: 0.7994901274266278, iteration: 81542
loss: 0.9789478778839111,grad_norm: 0.9999990944001266, iteration: 81543
loss: 0.9887131452560425,grad_norm: 0.9999997277034324, iteration: 81544
loss: 0.988308310508728,grad_norm: 0.8122207445364321, iteration: 81545
loss: 1.0176029205322266,grad_norm: 0.8096046141329871, iteration: 81546
loss: 1.041681170463562,grad_norm: 0.9999999905811932, iteration: 81547
loss: 1.0128504037857056,grad_norm: 0.8632074684775698, iteration: 81548
loss: 1.0043160915374756,grad_norm: 0.8583700209143208, iteration: 81549
loss: 1.0312656164169312,grad_norm: 0.9999991583325605, iteration: 81550
loss: 1.0067468881607056,grad_norm: 0.9999996941118897, iteration: 81551
loss: 1.0398011207580566,grad_norm: 0.9105891919260544, iteration: 81552
loss: 1.0167466402053833,grad_norm: 0.9999991926011376, iteration: 81553
loss: 1.02415132522583,grad_norm: 0.9817161371349887, iteration: 81554
loss: 1.0582736730575562,grad_norm: 0.9447263618917718, iteration: 81555
loss: 0.9665306210517883,grad_norm: 0.9402538584573411, iteration: 81556
loss: 1.0077861547470093,grad_norm: 0.937819899170234, iteration: 81557
loss: 1.028468370437622,grad_norm: 0.9999998229481242, iteration: 81558
loss: 0.9866027235984802,grad_norm: 0.8376365366264722, iteration: 81559
loss: 1.0402711629867554,grad_norm: 0.9999993681310412, iteration: 81560
loss: 1.0527225732803345,grad_norm: 0.9999996498150873, iteration: 81561
loss: 0.9876216053962708,grad_norm: 0.9999990507441097, iteration: 81562
loss: 1.0045139789581299,grad_norm: 0.8991896309998201, iteration: 81563
loss: 1.0262528657913208,grad_norm: 0.8955118683851481, iteration: 81564
loss: 0.9977788329124451,grad_norm: 0.9999991724644479, iteration: 81565
loss: 0.9952412247657776,grad_norm: 0.9234386772660489, iteration: 81566
loss: 1.0072013139724731,grad_norm: 0.8121661093298849, iteration: 81567
loss: 0.9748427271842957,grad_norm: 0.8676328149035906, iteration: 81568
loss: 0.9811223745346069,grad_norm: 0.874952622155579, iteration: 81569
loss: 0.9987838864326477,grad_norm: 0.9999994721021455, iteration: 81570
loss: 0.9750756621360779,grad_norm: 0.9454038440373258, iteration: 81571
loss: 1.0210695266723633,grad_norm: 0.8279220122820811, iteration: 81572
loss: 1.0470387935638428,grad_norm: 0.9999993939420856, iteration: 81573
loss: 1.025496244430542,grad_norm: 0.9999991592963197, iteration: 81574
loss: 0.9849759340286255,grad_norm: 0.9244440312178606, iteration: 81575
loss: 0.9851987957954407,grad_norm: 0.9999991637841155, iteration: 81576
loss: 0.9981920719146729,grad_norm: 0.9103051070142921, iteration: 81577
loss: 0.998555064201355,grad_norm: 0.893194079095148, iteration: 81578
loss: 1.006230115890503,grad_norm: 0.9305809259211266, iteration: 81579
loss: 1.0081117153167725,grad_norm: 0.9410126891762948, iteration: 81580
loss: 1.023737907409668,grad_norm: 0.9575336028273105, iteration: 81581
loss: 1.0488362312316895,grad_norm: 0.9999990397728574, iteration: 81582
loss: 1.0073292255401611,grad_norm: 0.9257179381829029, iteration: 81583
loss: 0.9924768805503845,grad_norm: 0.808853463402855, iteration: 81584
loss: 0.9997921586036682,grad_norm: 0.9999996486813133, iteration: 81585
loss: 1.0093246698379517,grad_norm: 0.9296718919716784, iteration: 81586
loss: 1.0300003290176392,grad_norm: 0.8530208100410376, iteration: 81587
loss: 0.9955650568008423,grad_norm: 0.999999132654489, iteration: 81588
loss: 0.9917480945587158,grad_norm: 0.9082150731491178, iteration: 81589
loss: 0.9754934310913086,grad_norm: 0.929752949541797, iteration: 81590
loss: 0.9996348023414612,grad_norm: 0.820924991544014, iteration: 81591
loss: 1.0408979654312134,grad_norm: 0.9999991264254384, iteration: 81592
loss: 0.9684963226318359,grad_norm: 0.9559512433076476, iteration: 81593
loss: 0.9936060905456543,grad_norm: 0.8165469435322906, iteration: 81594
loss: 1.0133686065673828,grad_norm: 0.9079837395082788, iteration: 81595
loss: 1.0158792734146118,grad_norm: 0.9999998317228423, iteration: 81596
loss: 1.0216885805130005,grad_norm: 0.9999990903746818, iteration: 81597
loss: 1.0235648155212402,grad_norm: 0.973465651622683, iteration: 81598
loss: 1.0124220848083496,grad_norm: 0.8218607467441602, iteration: 81599
loss: 1.0051060914993286,grad_norm: 0.9216732563232505, iteration: 81600
loss: 0.9981335997581482,grad_norm: 0.8840230392207765, iteration: 81601
loss: 1.0105292797088623,grad_norm: 0.9999991644807559, iteration: 81602
loss: 1.0150126218795776,grad_norm: 0.9999996881246758, iteration: 81603
loss: 1.0122425556182861,grad_norm: 0.9957310761480773, iteration: 81604
loss: 0.9851152896881104,grad_norm: 0.9999995277602483, iteration: 81605
loss: 0.9876570105552673,grad_norm: 0.8539837410093787, iteration: 81606
loss: 0.969912052154541,grad_norm: 0.9768972160932414, iteration: 81607
loss: 1.0298831462860107,grad_norm: 0.9999997012578837, iteration: 81608
loss: 1.0486315488815308,grad_norm: 0.8108460773760118, iteration: 81609
loss: 1.0330654382705688,grad_norm: 0.8296715845511764, iteration: 81610
loss: 1.017695426940918,grad_norm: 0.9999992905410655, iteration: 81611
loss: 0.971291720867157,grad_norm: 0.9115661209759526, iteration: 81612
loss: 0.9998648762702942,grad_norm: 0.8849507187734856, iteration: 81613
loss: 1.0064640045166016,grad_norm: 0.999999095702505, iteration: 81614
loss: 1.0240943431854248,grad_norm: 0.999999233875767, iteration: 81615
loss: 1.0183888673782349,grad_norm: 0.999999452588439, iteration: 81616
loss: 0.9885042309761047,grad_norm: 0.7398657404287836, iteration: 81617
loss: 1.012596607208252,grad_norm: 0.846081531776069, iteration: 81618
loss: 1.022253394126892,grad_norm: 0.8641642391454006, iteration: 81619
loss: 0.9794294834136963,grad_norm: 0.8561159556859754, iteration: 81620
loss: 0.9648272395133972,grad_norm: 0.9666614353677266, iteration: 81621
loss: 0.9818028807640076,grad_norm: 0.9999990505294322, iteration: 81622
loss: 0.9846065640449524,grad_norm: 0.9999991051560503, iteration: 81623
loss: 0.9906312823295593,grad_norm: 0.9999990407974192, iteration: 81624
loss: 1.0665194988250732,grad_norm: 0.8835826943168472, iteration: 81625
loss: 1.0110260248184204,grad_norm: 0.999999277317726, iteration: 81626
loss: 1.0156210660934448,grad_norm: 0.861880755732247, iteration: 81627
loss: 0.9665933847427368,grad_norm: 0.8939129936798142, iteration: 81628
loss: 1.026934027671814,grad_norm: 0.9999992008980988, iteration: 81629
loss: 1.0004914999008179,grad_norm: 0.9165410939194575, iteration: 81630
loss: 0.9904237985610962,grad_norm: 0.8811749782515029, iteration: 81631
loss: 1.0060111284255981,grad_norm: 0.8242224067937659, iteration: 81632
loss: 1.0278936624526978,grad_norm: 0.999999568233693, iteration: 81633
loss: 0.9962195158004761,grad_norm: 0.9278906154493443, iteration: 81634
loss: 1.0358035564422607,grad_norm: 0.9607633736160585, iteration: 81635
loss: 0.9905804991722107,grad_norm: 0.9554721451786605, iteration: 81636
loss: 0.9788151979446411,grad_norm: 0.75821685493097, iteration: 81637
loss: 1.0228253602981567,grad_norm: 0.9110102972438144, iteration: 81638
loss: 1.0954546928405762,grad_norm: 0.9999992632348472, iteration: 81639
loss: 1.0160094499588013,grad_norm: 0.9510209981345067, iteration: 81640
loss: 0.9624835848808289,grad_norm: 0.9999991505773467, iteration: 81641
loss: 1.0992637872695923,grad_norm: 0.9999993838563627, iteration: 81642
loss: 1.0672160387039185,grad_norm: 0.9114309472937687, iteration: 81643
loss: 1.0063034296035767,grad_norm: 0.8472758644507811, iteration: 81644
loss: 1.002790927886963,grad_norm: 0.9999990818741934, iteration: 81645
loss: 0.9830760955810547,grad_norm: 0.999999181701702, iteration: 81646
loss: 0.9658582210540771,grad_norm: 0.8258553406416712, iteration: 81647
loss: 1.0808125734329224,grad_norm: 0.9999991789702372, iteration: 81648
loss: 1.1362642049789429,grad_norm: 0.999999256704168, iteration: 81649
loss: 1.0118671655654907,grad_norm: 0.9235897024901476, iteration: 81650
loss: 0.9745181798934937,grad_norm: 0.949614945664698, iteration: 81651
loss: 1.006652593612671,grad_norm: 0.8253492723249995, iteration: 81652
loss: 0.9881291389465332,grad_norm: 0.9999991578323604, iteration: 81653
loss: 0.9619825482368469,grad_norm: 0.8380895932503584, iteration: 81654
loss: 1.0028432607650757,grad_norm: 0.9901843389613899, iteration: 81655
loss: 1.0373241901397705,grad_norm: 0.8457901645486386, iteration: 81656
loss: 0.9940034747123718,grad_norm: 0.9073015322442174, iteration: 81657
loss: 1.0336666107177734,grad_norm: 0.999999522215222, iteration: 81658
loss: 0.9959748983383179,grad_norm: 0.9999992303404379, iteration: 81659
loss: 0.9802653193473816,grad_norm: 0.9815851736744756, iteration: 81660
loss: 0.9647236466407776,grad_norm: 0.8954116655274228, iteration: 81661
loss: 1.007161259651184,grad_norm: 0.868765512113468, iteration: 81662
loss: 1.0084463357925415,grad_norm: 0.9614714197333184, iteration: 81663
loss: 0.9766828417778015,grad_norm: 0.823552710385128, iteration: 81664
loss: 0.972483217716217,grad_norm: 0.895640012773285, iteration: 81665
loss: 1.0462135076522827,grad_norm: 0.9999990509363902, iteration: 81666
loss: 1.0002737045288086,grad_norm: 0.9212556041729203, iteration: 81667
loss: 1.0464376211166382,grad_norm: 0.99999910573935, iteration: 81668
loss: 1.0118433237075806,grad_norm: 0.8024713951423222, iteration: 81669
loss: 0.9796746373176575,grad_norm: 0.9610008247122439, iteration: 81670
loss: 1.0406804084777832,grad_norm: 0.9999992302489036, iteration: 81671
loss: 1.0191149711608887,grad_norm: 0.9999997804650049, iteration: 81672
loss: 1.0101343393325806,grad_norm: 0.925665274670178, iteration: 81673
loss: 0.9797439575195312,grad_norm: 0.9999991559146552, iteration: 81674
loss: 1.0048258304595947,grad_norm: 0.9436430899281961, iteration: 81675
loss: 1.0112930536270142,grad_norm: 0.9468288994390158, iteration: 81676
loss: 1.0560250282287598,grad_norm: 0.9999994028687562, iteration: 81677
loss: 0.9776493310928345,grad_norm: 0.9999990674008647, iteration: 81678
loss: 0.9653939604759216,grad_norm: 0.8719402360776798, iteration: 81679
loss: 1.011603832244873,grad_norm: 0.9999992168276393, iteration: 81680
loss: 0.9991509914398193,grad_norm: 0.9009412219991404, iteration: 81681
loss: 1.0149954557418823,grad_norm: 0.99999894839145, iteration: 81682
loss: 0.9925564527511597,grad_norm: 0.9378091132683831, iteration: 81683
loss: 0.983177661895752,grad_norm: 0.9282301865487786, iteration: 81684
loss: 1.0179566144943237,grad_norm: 0.8169800026250162, iteration: 81685
loss: 1.042822003364563,grad_norm: 0.9999991694362724, iteration: 81686
loss: 1.023668646812439,grad_norm: 0.9999993387308806, iteration: 81687
loss: 0.9878196716308594,grad_norm: 0.999999133383566, iteration: 81688
loss: 1.009124994277954,grad_norm: 0.9047471085695213, iteration: 81689
loss: 1.029539942741394,grad_norm: 0.9999998592259512, iteration: 81690
loss: 1.0336909294128418,grad_norm: 0.9901404213362186, iteration: 81691
loss: 1.1803131103515625,grad_norm: 0.9999991548082433, iteration: 81692
loss: 1.0437084436416626,grad_norm: 0.9125219929536332, iteration: 81693
loss: 1.0213086605072021,grad_norm: 0.9190406706371126, iteration: 81694
loss: 1.0400458574295044,grad_norm: 0.99999969033865, iteration: 81695
loss: 1.0576159954071045,grad_norm: 0.9999997357491354, iteration: 81696
loss: 0.970562219619751,grad_norm: 0.7661850586426725, iteration: 81697
loss: 1.0114978551864624,grad_norm: 0.9999990794280462, iteration: 81698
loss: 1.0484257936477661,grad_norm: 0.9999990429940936, iteration: 81699
loss: 0.9918060302734375,grad_norm: 0.8649814735376179, iteration: 81700
loss: 0.9986243844032288,grad_norm: 0.879309531022764, iteration: 81701
loss: 0.9947160482406616,grad_norm: 0.8697387540013102, iteration: 81702
loss: 1.0074681043624878,grad_norm: 0.9999991305526787, iteration: 81703
loss: 0.9783353209495544,grad_norm: 0.9999990760285892, iteration: 81704
loss: 1.0055739879608154,grad_norm: 0.9999995199650561, iteration: 81705
loss: 0.9641398787498474,grad_norm: 0.9999991350620646, iteration: 81706
loss: 1.0124890804290771,grad_norm: 0.9811052292644388, iteration: 81707
loss: 0.993297815322876,grad_norm: 0.8240069961624591, iteration: 81708
loss: 0.9946464896202087,grad_norm: 0.761722517812343, iteration: 81709
loss: 1.0045262575149536,grad_norm: 0.8513030898832581, iteration: 81710
loss: 0.9788782596588135,grad_norm: 0.9999992111304883, iteration: 81711
loss: 0.9898509979248047,grad_norm: 0.8862540694845963, iteration: 81712
loss: 0.9744285345077515,grad_norm: 0.9999991196475287, iteration: 81713
loss: 1.0040521621704102,grad_norm: 0.9999989889759868, iteration: 81714
loss: 0.9805359244346619,grad_norm: 0.8886279321148931, iteration: 81715
loss: 1.0045700073242188,grad_norm: 0.9381117674668822, iteration: 81716
loss: 1.063830852508545,grad_norm: 0.9496677511486408, iteration: 81717
loss: 0.9916240572929382,grad_norm: 0.8719646926278571, iteration: 81718
loss: 0.9761443734169006,grad_norm: 0.9278567493773334, iteration: 81719
loss: 1.018448829650879,grad_norm: 0.9999992733228857, iteration: 81720
loss: 1.067325234413147,grad_norm: 0.9999997225536363, iteration: 81721
loss: 1.0022634267807007,grad_norm: 0.7561781503291704, iteration: 81722
loss: 0.9773182272911072,grad_norm: 0.8819967956122375, iteration: 81723
loss: 1.1469143629074097,grad_norm: 0.9999995730320053, iteration: 81724
loss: 0.9984827637672424,grad_norm: 0.7823905623534511, iteration: 81725
loss: 0.9934662580490112,grad_norm: 0.9497496064400882, iteration: 81726
loss: 1.000107765197754,grad_norm: 0.9820889099300996, iteration: 81727
loss: 1.0582966804504395,grad_norm: 0.9999996720006723, iteration: 81728
loss: 1.0351207256317139,grad_norm: 0.9999998571575863, iteration: 81729
loss: 1.0222454071044922,grad_norm: 0.9999998144857876, iteration: 81730
loss: 1.0193359851837158,grad_norm: 0.9999988866908613, iteration: 81731
loss: 1.0128118991851807,grad_norm: 0.9999994274127333, iteration: 81732
loss: 0.9807525873184204,grad_norm: 0.8614332128683042, iteration: 81733
loss: 0.9747039079666138,grad_norm: 0.9009545924223963, iteration: 81734
loss: 1.0208759307861328,grad_norm: 0.9866315757193163, iteration: 81735
loss: 1.0016794204711914,grad_norm: 0.8487723156084428, iteration: 81736
loss: 0.9575918316841125,grad_norm: 0.8946508810814928, iteration: 81737
loss: 0.9633677005767822,grad_norm: 0.9410545332329414, iteration: 81738
loss: 1.0033384561538696,grad_norm: 0.9999996273741906, iteration: 81739
loss: 0.9599382281303406,grad_norm: 0.9999989349427859, iteration: 81740
loss: 0.9853003621101379,grad_norm: 0.9999990501142739, iteration: 81741
loss: 1.0464798212051392,grad_norm: 0.8200108790439111, iteration: 81742
loss: 1.0181260108947754,grad_norm: 0.9999991131094139, iteration: 81743
loss: 1.002315878868103,grad_norm: 0.9418808686913912, iteration: 81744
loss: 0.9935181736946106,grad_norm: 0.9999991511063585, iteration: 81745
loss: 1.0135447978973389,grad_norm: 0.9999991718706978, iteration: 81746
loss: 1.024152398109436,grad_norm: 0.9999991623004906, iteration: 81747
loss: 1.0404146909713745,grad_norm: 0.999999050850453, iteration: 81748
loss: 0.9805602431297302,grad_norm: 0.8469839207086479, iteration: 81749
loss: 1.0016049146652222,grad_norm: 0.8985843946951717, iteration: 81750
loss: 1.035210132598877,grad_norm: 0.792193265280202, iteration: 81751
loss: 0.9575475454330444,grad_norm: 0.9772400668540194, iteration: 81752
loss: 1.0329841375350952,grad_norm: 0.99999920869727, iteration: 81753
loss: 0.9790447354316711,grad_norm: 0.8693782931531052, iteration: 81754
loss: 1.0117121934890747,grad_norm: 0.8137323581925989, iteration: 81755
loss: 1.0419857501983643,grad_norm: 0.8999957331402956, iteration: 81756
loss: 1.0531903505325317,grad_norm: 0.999999644530874, iteration: 81757
loss: 0.9616919159889221,grad_norm: 0.9999991547066542, iteration: 81758
loss: 1.0002669095993042,grad_norm: 0.9999994941403376, iteration: 81759
loss: 0.9754247665405273,grad_norm: 0.8707639936574985, iteration: 81760
loss: 1.1357104778289795,grad_norm: 0.999999371374741, iteration: 81761
loss: 0.9810749292373657,grad_norm: 0.8591754083799853, iteration: 81762
loss: 1.0216503143310547,grad_norm: 0.9999992944150446, iteration: 81763
loss: 1.110639214515686,grad_norm: 0.9999991831544776, iteration: 81764
loss: 0.9958299994468689,grad_norm: 0.9622192299437848, iteration: 81765
loss: 0.9926980137825012,grad_norm: 0.885751557301508, iteration: 81766
loss: 0.9948462843894958,grad_norm: 0.9295446144164995, iteration: 81767
loss: 1.1099313497543335,grad_norm: 0.9999990923747865, iteration: 81768
loss: 1.007544994354248,grad_norm: 0.8692459697532695, iteration: 81769
loss: 1.0335296392440796,grad_norm: 0.8603292178343238, iteration: 81770
loss: 0.9842901229858398,grad_norm: 0.9999990232080507, iteration: 81771
loss: 0.9754025340080261,grad_norm: 0.99999924302629, iteration: 81772
loss: 1.0088253021240234,grad_norm: 0.8408256400583929, iteration: 81773
loss: 0.9932038187980652,grad_norm: 0.9160358159009998, iteration: 81774
loss: 1.0236194133758545,grad_norm: 0.9999990810421685, iteration: 81775
loss: 0.9887468218803406,grad_norm: 0.9999990338939551, iteration: 81776
loss: 1.0239118337631226,grad_norm: 0.9999991200587902, iteration: 81777
loss: 1.013245940208435,grad_norm: 0.999999039462926, iteration: 81778
loss: 0.9734337329864502,grad_norm: 0.9999991053511799, iteration: 81779
loss: 0.9993845820426941,grad_norm: 0.9460435374002928, iteration: 81780
loss: 0.973761796951294,grad_norm: 0.9999990639361559, iteration: 81781
loss: 1.0118775367736816,grad_norm: 0.9999990578674284, iteration: 81782
loss: 1.0489250421524048,grad_norm: 0.9999992119845925, iteration: 81783
loss: 1.0297811031341553,grad_norm: 0.9216492147970564, iteration: 81784
loss: 1.0053634643554688,grad_norm: 0.8651859874679373, iteration: 81785
loss: 0.9905720949172974,grad_norm: 0.95287339448633, iteration: 81786
loss: 1.0264018774032593,grad_norm: 0.9999994388169088, iteration: 81787
loss: 1.0430117845535278,grad_norm: 0.9999992047380423, iteration: 81788
loss: 0.9863756895065308,grad_norm: 0.9130401065668965, iteration: 81789
loss: 0.9986959099769592,grad_norm: 0.9629321578287807, iteration: 81790
loss: 1.0333443880081177,grad_norm: 0.9999998152132311, iteration: 81791
loss: 1.0023349523544312,grad_norm: 0.8664782024852447, iteration: 81792
loss: 1.0144093036651611,grad_norm: 0.969832980214715, iteration: 81793
loss: 0.9955669641494751,grad_norm: 0.9578431647557732, iteration: 81794
loss: 0.9524699449539185,grad_norm: 0.9887338023535037, iteration: 81795
loss: 1.0172693729400635,grad_norm: 0.9999991860331865, iteration: 81796
loss: 1.0005004405975342,grad_norm: 0.9916229145992831, iteration: 81797
loss: 1.0444821119308472,grad_norm: 0.9999993198586541, iteration: 81798
loss: 0.9973172545433044,grad_norm: 0.9999992049747191, iteration: 81799
loss: 0.9869288206100464,grad_norm: 0.9999992214862747, iteration: 81800
loss: 0.9810161590576172,grad_norm: 0.9999994730350438, iteration: 81801
loss: 0.9910637736320496,grad_norm: 0.8695913327291165, iteration: 81802
loss: 0.9878220558166504,grad_norm: 0.9999991769652693, iteration: 81803
loss: 0.9452982544898987,grad_norm: 0.9828500030569101, iteration: 81804
loss: 1.0203790664672852,grad_norm: 0.9091372829778791, iteration: 81805
loss: 1.0194480419158936,grad_norm: 0.8550189503370524, iteration: 81806
loss: 0.994002103805542,grad_norm: 0.8947669337922942, iteration: 81807
loss: 0.9970460534095764,grad_norm: 0.9287643323961902, iteration: 81808
loss: 0.9849243760108948,grad_norm: 0.9416641951564667, iteration: 81809
loss: 0.9862375855445862,grad_norm: 0.8375536816129153, iteration: 81810
loss: 1.0474177598953247,grad_norm: 0.953786117524613, iteration: 81811
loss: 1.0416979789733887,grad_norm: 0.9999992212250741, iteration: 81812
loss: 1.0068475008010864,grad_norm: 0.8938916625077119, iteration: 81813
loss: 1.043864369392395,grad_norm: 0.8946450067736067, iteration: 81814
loss: 1.026695728302002,grad_norm: 0.9999992265147054, iteration: 81815
loss: 1.12030029296875,grad_norm: 0.9707133829272833, iteration: 81816
loss: 1.0057967901229858,grad_norm: 0.9971486114358167, iteration: 81817
loss: 1.0228683948516846,grad_norm: 0.9715845825756418, iteration: 81818
loss: 1.0191411972045898,grad_norm: 0.928860163013481, iteration: 81819
loss: 1.0863711833953857,grad_norm: 0.9999991831914663, iteration: 81820
loss: 0.986495316028595,grad_norm: 0.9999990276095334, iteration: 81821
loss: 0.9799461364746094,grad_norm: 0.999999196076425, iteration: 81822
loss: 1.0136947631835938,grad_norm: 0.9511337430071669, iteration: 81823
loss: 1.015014886856079,grad_norm: 0.9999994569833254, iteration: 81824
loss: 1.0284942388534546,grad_norm: 0.9999998376356856, iteration: 81825
loss: 1.0433392524719238,grad_norm: 0.9999998209946444, iteration: 81826
loss: 0.9833950400352478,grad_norm: 0.9810632482490755, iteration: 81827
loss: 1.0324878692626953,grad_norm: 0.9405045230227217, iteration: 81828
loss: 1.015918493270874,grad_norm: 0.9607075189464936, iteration: 81829
loss: 1.027084469795227,grad_norm: 0.9689909577539239, iteration: 81830
loss: 1.0118988752365112,grad_norm: 0.9940808015955127, iteration: 81831
loss: 0.9824528098106384,grad_norm: 0.940937486497987, iteration: 81832
loss: 1.0106083154678345,grad_norm: 0.9216125989720871, iteration: 81833
loss: 1.020937204360962,grad_norm: 0.9999995341633644, iteration: 81834
loss: 1.036030650138855,grad_norm: 0.9999989869958548, iteration: 81835
loss: 1.0080457925796509,grad_norm: 0.8982504451149117, iteration: 81836
loss: 0.9970632791519165,grad_norm: 0.892739522440243, iteration: 81837
loss: 1.0753206014633179,grad_norm: 0.9870946333535063, iteration: 81838
loss: 1.0191797018051147,grad_norm: 0.8119743911983348, iteration: 81839
loss: 0.9885541796684265,grad_norm: 0.8231738356473042, iteration: 81840
loss: 0.9947196245193481,grad_norm: 0.9999990657475158, iteration: 81841
loss: 1.089107632637024,grad_norm: 0.9999994711838289, iteration: 81842
loss: 1.020795464515686,grad_norm: 0.9999996095142143, iteration: 81843
loss: 1.0047672986984253,grad_norm: 0.9872681117155384, iteration: 81844
loss: 1.002082347869873,grad_norm: 0.8195053468651476, iteration: 81845
loss: 1.0067038536071777,grad_norm: 0.9999992270488152, iteration: 81846
loss: 1.0813946723937988,grad_norm: 0.9999994632589565, iteration: 81847
loss: 0.9870496988296509,grad_norm: 0.8016068300540516, iteration: 81848
loss: 1.0134915113449097,grad_norm: 0.9999991725182106, iteration: 81849
loss: 1.0194406509399414,grad_norm: 0.8520963649374236, iteration: 81850
loss: 0.9956337213516235,grad_norm: 0.7610008325235412, iteration: 81851
loss: 0.9852938055992126,grad_norm: 0.9999992632038546, iteration: 81852
loss: 1.0340133905410767,grad_norm: 0.8646733893994968, iteration: 81853
loss: 1.0293920040130615,grad_norm: 0.9999994909072664, iteration: 81854
loss: 1.013948678970337,grad_norm: 0.9852323283047667, iteration: 81855
loss: 0.9952020049095154,grad_norm: 0.8083121586384017, iteration: 81856
loss: 1.0195316076278687,grad_norm: 0.9999999181409085, iteration: 81857
loss: 1.0053859949111938,grad_norm: 0.897859963941857, iteration: 81858
loss: 0.978977620601654,grad_norm: 0.8510780771956367, iteration: 81859
loss: 0.9854439496994019,grad_norm: 0.902526863141733, iteration: 81860
loss: 1.0178709030151367,grad_norm: 0.9999991612237044, iteration: 81861
loss: 0.973173975944519,grad_norm: 0.9999991365562471, iteration: 81862
loss: 1.0106782913208008,grad_norm: 0.8020911791936648, iteration: 81863
loss: 1.0043754577636719,grad_norm: 0.7975815390372392, iteration: 81864
loss: 0.9722487926483154,grad_norm: 0.9999991410413092, iteration: 81865
loss: 0.9867302775382996,grad_norm: 0.9170094125135985, iteration: 81866
loss: 1.037800669670105,grad_norm: 0.9999989672174733, iteration: 81867
loss: 1.0021017789840698,grad_norm: 0.957418106269485, iteration: 81868
loss: 1.054826021194458,grad_norm: 0.9999990504508753, iteration: 81869
loss: 0.9811130166053772,grad_norm: 0.8483859642592759, iteration: 81870
loss: 1.0364019870758057,grad_norm: 0.9999996230343131, iteration: 81871
loss: 0.993134617805481,grad_norm: 0.9500132098945694, iteration: 81872
loss: 1.0117080211639404,grad_norm: 0.9999991187907856, iteration: 81873
loss: 0.9777862429618835,grad_norm: 0.8710017577629892, iteration: 81874
loss: 0.9652888178825378,grad_norm: 0.8729621089306924, iteration: 81875
loss: 0.9787534475326538,grad_norm: 0.9868947646982957, iteration: 81876
loss: 0.9957216382026672,grad_norm: 0.9999991667129623, iteration: 81877
loss: 1.0097320079803467,grad_norm: 0.9048831275165815, iteration: 81878
loss: 1.04710054397583,grad_norm: 0.9999995434912327, iteration: 81879
loss: 1.0200194120407104,grad_norm: 0.9999992217823231, iteration: 81880
loss: 1.0623164176940918,grad_norm: 0.9999998240272616, iteration: 81881
loss: 0.9956833124160767,grad_norm: 0.9999995145120634, iteration: 81882
loss: 1.0056689977645874,grad_norm: 0.9450358785958154, iteration: 81883
loss: 0.9682397246360779,grad_norm: 0.8405817426625136, iteration: 81884
loss: 0.9949561357498169,grad_norm: 0.9999990767940703, iteration: 81885
loss: 1.0145245790481567,grad_norm: 0.9016395963028643, iteration: 81886
loss: 1.0233744382858276,grad_norm: 0.9999995653924081, iteration: 81887
loss: 0.9684886336326599,grad_norm: 0.8545673259679352, iteration: 81888
loss: 0.9983614087104797,grad_norm: 0.9999991696502186, iteration: 81889
loss: 0.9888514876365662,grad_norm: 0.9469801876923146, iteration: 81890
loss: 1.0520274639129639,grad_norm: 0.9999990830920698, iteration: 81891
loss: 1.028795838356018,grad_norm: 0.9999998309683684, iteration: 81892
loss: 1.005816102027893,grad_norm: 0.9379798981640861, iteration: 81893
loss: 1.0206447839736938,grad_norm: 0.9911448038431587, iteration: 81894
loss: 0.9702658653259277,grad_norm: 0.9999990514416737, iteration: 81895
loss: 0.9837961792945862,grad_norm: 0.9123466792572587, iteration: 81896
loss: 1.0163646936416626,grad_norm: 0.9999991497209623, iteration: 81897
loss: 0.9907760620117188,grad_norm: 0.8167339987083228, iteration: 81898
loss: 0.9824601411819458,grad_norm: 0.9999989753412047, iteration: 81899
loss: 0.9889856576919556,grad_norm: 0.8041712017289728, iteration: 81900
loss: 1.0030041933059692,grad_norm: 0.9999991020802483, iteration: 81901
loss: 1.018959879875183,grad_norm: 0.8596153162916911, iteration: 81902
loss: 0.9741815328598022,grad_norm: 0.9925814214714168, iteration: 81903
loss: 1.0398879051208496,grad_norm: 0.9999991821264977, iteration: 81904
loss: 1.0350571870803833,grad_norm: 0.821929433512173, iteration: 81905
loss: 1.0279920101165771,grad_norm: 0.9999990396025941, iteration: 81906
loss: 0.9868007302284241,grad_norm: 0.9999996278952162, iteration: 81907
loss: 1.024519920349121,grad_norm: 0.9999994104360549, iteration: 81908
loss: 1.198329210281372,grad_norm: 0.9999996691354, iteration: 81909
loss: 1.0177267789840698,grad_norm: 0.9999992168328588, iteration: 81910
loss: 0.9801559448242188,grad_norm: 0.9999990360885622, iteration: 81911
loss: 1.0404173135757446,grad_norm: 0.893440900027914, iteration: 81912
loss: 1.0350675582885742,grad_norm: 0.9999995864339563, iteration: 81913
loss: 1.0293527841567993,grad_norm: 0.9534624283762293, iteration: 81914
loss: 1.007051944732666,grad_norm: 0.9999991014079217, iteration: 81915
loss: 0.9991769790649414,grad_norm: 0.9999992209516131, iteration: 81916
loss: 0.9712859392166138,grad_norm: 0.9658209468985962, iteration: 81917
loss: 1.1022307872772217,grad_norm: 0.9999994904931321, iteration: 81918
loss: 1.0314252376556396,grad_norm: 0.9999992321999095, iteration: 81919
loss: 0.9892728924751282,grad_norm: 0.9487416759521452, iteration: 81920
loss: 0.9707686901092529,grad_norm: 0.9326504140342442, iteration: 81921
loss: 1.0271477699279785,grad_norm: 0.9191963798336059, iteration: 81922
loss: 0.9833697080612183,grad_norm: 0.9022531520494641, iteration: 81923
loss: 1.0455065965652466,grad_norm: 0.9396420056315566, iteration: 81924
loss: 1.0169357061386108,grad_norm: 0.9366269202820281, iteration: 81925
loss: 1.0746747255325317,grad_norm: 0.9999992172355, iteration: 81926
loss: 0.9796174168586731,grad_norm: 0.9744571115104664, iteration: 81927
loss: 1.0059242248535156,grad_norm: 0.9999990559968785, iteration: 81928
loss: 1.0805002450942993,grad_norm: 0.9999994251676891, iteration: 81929
loss: 1.0107734203338623,grad_norm: 0.9999996247961703, iteration: 81930
loss: 0.9838242530822754,grad_norm: 0.9999991106498121, iteration: 81931
loss: 1.0110455751419067,grad_norm: 0.7874065677432927, iteration: 81932
loss: 1.014997959136963,grad_norm: 0.8815092209822408, iteration: 81933
loss: 1.0350037813186646,grad_norm: 0.9999994616011741, iteration: 81934
loss: 1.02264404296875,grad_norm: 0.9293004443948316, iteration: 81935
loss: 1.0674386024475098,grad_norm: 0.9999993769673295, iteration: 81936
loss: 0.9916488528251648,grad_norm: 0.9999991637446666, iteration: 81937
loss: 0.9926738142967224,grad_norm: 0.9999998845110148, iteration: 81938
loss: 1.0055259466171265,grad_norm: 0.8772579549671764, iteration: 81939
loss: 0.991818368434906,grad_norm: 0.9485238920032184, iteration: 81940
loss: 0.9644984602928162,grad_norm: 0.966899324741989, iteration: 81941
loss: 1.0064820051193237,grad_norm: 0.9248525216031631, iteration: 81942
loss: 1.0773251056671143,grad_norm: 0.9999991600768966, iteration: 81943
loss: 0.9833047986030579,grad_norm: 0.9547009597681975, iteration: 81944
loss: 1.0198118686676025,grad_norm: 0.9999991723171215, iteration: 81945
loss: 0.9885782599449158,grad_norm: 0.9999991236206592, iteration: 81946
loss: 1.1754653453826904,grad_norm: 0.9999992153516885, iteration: 81947
loss: 0.9808709621429443,grad_norm: 0.9999990576419792, iteration: 81948
loss: 1.018573522567749,grad_norm: 0.7423028028726348, iteration: 81949
loss: 1.0264089107513428,grad_norm: 0.9999991238768303, iteration: 81950
loss: 0.9855159521102905,grad_norm: 0.9898906859518017, iteration: 81951
loss: 1.0685431957244873,grad_norm: 0.9279660035578203, iteration: 81952
loss: 1.0204187631607056,grad_norm: 0.9999991084346985, iteration: 81953
loss: 1.039236068725586,grad_norm: 0.9999991049426168, iteration: 81954
loss: 1.0504745244979858,grad_norm: 0.9999993800366236, iteration: 81955
loss: 1.0333983898162842,grad_norm: 0.9441916800465681, iteration: 81956
loss: 0.9669000506401062,grad_norm: 0.9295915525695251, iteration: 81957
loss: 0.9836633205413818,grad_norm: 0.9149172371393647, iteration: 81958
loss: 1.0404616594314575,grad_norm: 0.9999996466796331, iteration: 81959
loss: 0.9899579882621765,grad_norm: 0.999999708385934, iteration: 81960
loss: 1.0358448028564453,grad_norm: 0.9999997485070542, iteration: 81961
loss: 1.0074691772460938,grad_norm: 0.9999991137628714, iteration: 81962
loss: 1.0054205656051636,grad_norm: 0.814088177271947, iteration: 81963
loss: 0.9835496544837952,grad_norm: 0.9395265343577002, iteration: 81964
loss: 0.9792720079421997,grad_norm: 0.999999104462419, iteration: 81965
loss: 1.0186049938201904,grad_norm: 0.9999991934758136, iteration: 81966
loss: 0.9800601601600647,grad_norm: 0.763201239294293, iteration: 81967
loss: 1.0002535581588745,grad_norm: 0.8300216989839774, iteration: 81968
loss: 0.9757295846939087,grad_norm: 0.976678176471604, iteration: 81969
loss: 1.0352991819381714,grad_norm: 0.9999994483528744, iteration: 81970
loss: 0.9755009412765503,grad_norm: 0.9498504649152554, iteration: 81971
loss: 0.9807499647140503,grad_norm: 0.8414841059266054, iteration: 81972
loss: 0.9630821943283081,grad_norm: 0.9198118993860934, iteration: 81973
loss: 1.0111706256866455,grad_norm: 0.9999990765903038, iteration: 81974
loss: 1.0392898321151733,grad_norm: 0.9999994957122288, iteration: 81975
loss: 0.9974214434623718,grad_norm: 0.6682486017338636, iteration: 81976
loss: 1.0129915475845337,grad_norm: 0.8640960925396839, iteration: 81977
loss: 1.042024850845337,grad_norm: 0.9999990399519226, iteration: 81978
loss: 1.024489164352417,grad_norm: 0.9999991513182276, iteration: 81979
loss: 1.0283013582229614,grad_norm: 0.999999080913946, iteration: 81980
loss: 1.012454867362976,grad_norm: 0.9999996901123291, iteration: 81981
loss: 1.0396441221237183,grad_norm: 0.999999295071801, iteration: 81982
loss: 1.0139285326004028,grad_norm: 0.9999992048611469, iteration: 81983
loss: 1.0480411052703857,grad_norm: 0.9999994688169072, iteration: 81984
loss: 1.0207829475402832,grad_norm: 0.9999992367765761, iteration: 81985
loss: 0.9359491467475891,grad_norm: 0.9999992671311935, iteration: 81986
loss: 1.009516716003418,grad_norm: 0.9999994493938625, iteration: 81987
loss: 0.9613539576530457,grad_norm: 0.9999991084602049, iteration: 81988
loss: 1.1104916334152222,grad_norm: 0.9999999919797092, iteration: 81989
loss: 1.016648292541504,grad_norm: 0.863993745536529, iteration: 81990
loss: 1.0394628047943115,grad_norm: 0.9999993981089057, iteration: 81991
loss: 0.9842774271965027,grad_norm: 0.9327320655826804, iteration: 81992
loss: 1.009864091873169,grad_norm: 0.9999989463275542, iteration: 81993
loss: 0.9797165393829346,grad_norm: 0.7259520978051092, iteration: 81994
loss: 1.0196964740753174,grad_norm: 0.9355752933495858, iteration: 81995
loss: 1.0218290090560913,grad_norm: 0.9880693424757967, iteration: 81996
loss: 0.993103563785553,grad_norm: 0.916416803403966, iteration: 81997
loss: 0.9969847798347473,grad_norm: 0.7872722159173715, iteration: 81998
loss: 1.0254288911819458,grad_norm: 0.9999991504600274, iteration: 81999
loss: 1.005752682685852,grad_norm: 0.9999994604268059, iteration: 82000
loss: 1.0549732446670532,grad_norm: 0.9999990983705052, iteration: 82001
loss: 1.0431525707244873,grad_norm: 0.9999991298343365, iteration: 82002
loss: 1.047777533531189,grad_norm: 0.9999993622136689, iteration: 82003
loss: 1.0361671447753906,grad_norm: 0.9999998400923316, iteration: 82004
loss: 0.9821324348449707,grad_norm: 0.9999991180604517, iteration: 82005
loss: 1.004014492034912,grad_norm: 0.9999992875014351, iteration: 82006
loss: 0.9740813374519348,grad_norm: 0.9625938972929252, iteration: 82007
loss: 1.0171037912368774,grad_norm: 0.9999990539361917, iteration: 82008
loss: 0.9932634234428406,grad_norm: 0.9891837502962044, iteration: 82009
loss: 1.000126838684082,grad_norm: 0.9999991396338831, iteration: 82010
loss: 1.0593314170837402,grad_norm: 0.9999993666787188, iteration: 82011
loss: 1.0426450967788696,grad_norm: 0.9999993747889357, iteration: 82012
loss: 1.027532935142517,grad_norm: 0.9999991780036888, iteration: 82013
loss: 1.027669072151184,grad_norm: 0.8627958611491108, iteration: 82014
loss: 1.0995995998382568,grad_norm: 0.9810870578636248, iteration: 82015
loss: 0.9848905205726624,grad_norm: 0.956143599078341, iteration: 82016
loss: 1.0155433416366577,grad_norm: 0.8461476016603281, iteration: 82017
loss: 0.9994862079620361,grad_norm: 0.9999991588094934, iteration: 82018
loss: 1.0300014019012451,grad_norm: 0.8815617958644458, iteration: 82019
loss: 1.0182843208312988,grad_norm: 0.9999991052808953, iteration: 82020
loss: 1.0528534650802612,grad_norm: 0.9999993648802622, iteration: 82021
loss: 0.9908990263938904,grad_norm: 0.9970419440564073, iteration: 82022
loss: 1.0303850173950195,grad_norm: 0.7976349645544845, iteration: 82023
loss: 0.981148898601532,grad_norm: 0.9166006272353068, iteration: 82024
loss: 1.0257220268249512,grad_norm: 0.9986409767131169, iteration: 82025
loss: 0.9909422397613525,grad_norm: 0.9480581863510774, iteration: 82026
loss: 1.0010267496109009,grad_norm: 0.9999992842700591, iteration: 82027
loss: 0.9931053519248962,grad_norm: 0.8218803273921133, iteration: 82028
loss: 0.9951565861701965,grad_norm: 0.9838212050491841, iteration: 82029
loss: 1.0359160900115967,grad_norm: 0.9999994891148288, iteration: 82030
loss: 1.0242899656295776,grad_norm: 0.8008599079309726, iteration: 82031
loss: 0.9886032938957214,grad_norm: 0.8221782523816812, iteration: 82032
loss: 0.9991813898086548,grad_norm: 0.9605688186316674, iteration: 82033
loss: 1.0189188718795776,grad_norm: 0.8696195202219661, iteration: 82034
loss: 1.0002700090408325,grad_norm: 0.9977001739951772, iteration: 82035
loss: 0.9869115948677063,grad_norm: 0.8221532124026922, iteration: 82036
loss: 0.9752009510993958,grad_norm: 0.9312305897021569, iteration: 82037
loss: 0.9941160678863525,grad_norm: 0.8810478968007753, iteration: 82038
loss: 1.0248901844024658,grad_norm: 0.991239032627582, iteration: 82039
loss: 0.9810420274734497,grad_norm: 0.9999991007711501, iteration: 82040
loss: 0.9998421669006348,grad_norm: 0.9999997107141411, iteration: 82041
loss: 0.9966210126876831,grad_norm: 0.6771717215514365, iteration: 82042
loss: 1.0025266408920288,grad_norm: 0.8249536339730258, iteration: 82043
loss: 1.0081243515014648,grad_norm: 0.9999994379558585, iteration: 82044
loss: 0.9995133280754089,grad_norm: 0.9226043701335004, iteration: 82045
loss: 1.0058043003082275,grad_norm: 0.947821974901303, iteration: 82046
loss: 0.9666733741760254,grad_norm: 0.9999991178390036, iteration: 82047
loss: 1.0162239074707031,grad_norm: 0.9999995394411135, iteration: 82048
loss: 1.0077601671218872,grad_norm: 0.9999988871893553, iteration: 82049
loss: 1.014042854309082,grad_norm: 0.9270979453273366, iteration: 82050
loss: 0.9991734027862549,grad_norm: 0.7353632979491346, iteration: 82051
loss: 0.9982084035873413,grad_norm: 0.9999991540010179, iteration: 82052
loss: 1.0251286029815674,grad_norm: 0.9296204496266609, iteration: 82053
loss: 1.0205477476119995,grad_norm: 0.7783626926417061, iteration: 82054
loss: 1.015793800354004,grad_norm: 0.9118068319901409, iteration: 82055
loss: 0.996604323387146,grad_norm: 0.9999989674252227, iteration: 82056
loss: 0.9630213975906372,grad_norm: 0.9606078125951356, iteration: 82057
loss: 0.9884869456291199,grad_norm: 0.9279315020808431, iteration: 82058
loss: 0.9720720052719116,grad_norm: 0.8564987285853093, iteration: 82059
loss: 1.0108518600463867,grad_norm: 0.9999998265483288, iteration: 82060
loss: 1.0339691638946533,grad_norm: 0.9999990713789435, iteration: 82061
loss: 0.9694151878356934,grad_norm: 0.9272585287221703, iteration: 82062
loss: 0.9890962839126587,grad_norm: 0.9999991181739021, iteration: 82063
loss: 0.9892058372497559,grad_norm: 0.867065833187172, iteration: 82064
loss: 1.0180940628051758,grad_norm: 0.9633968846655203, iteration: 82065
loss: 0.9742991328239441,grad_norm: 0.950874041648521, iteration: 82066
loss: 0.9708228707313538,grad_norm: 0.8571786198071251, iteration: 82067
loss: 0.9880586862564087,grad_norm: 0.9525005642985149, iteration: 82068
loss: 0.9801424741744995,grad_norm: 0.9999991055792358, iteration: 82069
loss: 0.9827442169189453,grad_norm: 0.9211809208750493, iteration: 82070
loss: 0.9665576219558716,grad_norm: 0.9999991776241738, iteration: 82071
loss: 0.9926745295524597,grad_norm: 0.9999991302991662, iteration: 82072
loss: 0.9745919108390808,grad_norm: 0.9975455399020322, iteration: 82073
loss: 1.0006306171417236,grad_norm: 0.7871993810165635, iteration: 82074
loss: 0.9639462232589722,grad_norm: 0.9795827208287416, iteration: 82075
loss: 1.0280811786651611,grad_norm: 0.9999994393720488, iteration: 82076
loss: 1.011562466621399,grad_norm: 0.9331353555019121, iteration: 82077
loss: 1.034153699874878,grad_norm: 0.9999991953805863, iteration: 82078
loss: 1.0768641233444214,grad_norm: 0.9999994721850449, iteration: 82079
loss: 1.0170423984527588,grad_norm: 0.915068191781783, iteration: 82080
loss: 0.9819523692131042,grad_norm: 0.8540225659501274, iteration: 82081
loss: 0.9957287907600403,grad_norm: 0.8142338922649921, iteration: 82082
loss: 1.0160049200057983,grad_norm: 0.9999994235393742, iteration: 82083
loss: 0.9894986748695374,grad_norm: 0.9999989666998077, iteration: 82084
loss: 1.0020992755889893,grad_norm: 0.9222619284911173, iteration: 82085
loss: 1.0474153757095337,grad_norm: 0.999999069591402, iteration: 82086
loss: 1.021013617515564,grad_norm: 0.9999995399444803, iteration: 82087
loss: 0.9790189266204834,grad_norm: 0.9071138739802057, iteration: 82088
loss: 0.9881303906440735,grad_norm: 0.9803882236391112, iteration: 82089
loss: 1.0093872547149658,grad_norm: 0.999999048886964, iteration: 82090
loss: 1.0090923309326172,grad_norm: 0.9733953776336636, iteration: 82091
loss: 0.9902158975601196,grad_norm: 0.7751748363876702, iteration: 82092
loss: 0.9998663663864136,grad_norm: 0.9999991647577312, iteration: 82093
loss: 0.9915125966072083,grad_norm: 0.9066331288135336, iteration: 82094
loss: 1.0415654182434082,grad_norm: 0.9999995064802766, iteration: 82095
loss: 1.0067025423049927,grad_norm: 0.9999989576070922, iteration: 82096
loss: 1.041136622428894,grad_norm: 0.8761473668230432, iteration: 82097
loss: 1.0415611267089844,grad_norm: 0.9999991686343278, iteration: 82098
loss: 0.9877209067344666,grad_norm: 0.8194555884544914, iteration: 82099
loss: 1.0069704055786133,grad_norm: 0.9735987576783198, iteration: 82100
loss: 1.0207747220993042,grad_norm: 0.9999990613489064, iteration: 82101
loss: 0.9898978471755981,grad_norm: 0.7566626778836, iteration: 82102
loss: 1.026769757270813,grad_norm: 0.9572345085613375, iteration: 82103
loss: 1.0256117582321167,grad_norm: 0.9999998225396807, iteration: 82104
loss: 1.023393988609314,grad_norm: 0.9064165202322172, iteration: 82105
loss: 0.9722802639007568,grad_norm: 0.9336648136963764, iteration: 82106
loss: 0.9761391282081604,grad_norm: 0.8470914049219026, iteration: 82107
loss: 1.0432424545288086,grad_norm: 0.9999991687666865, iteration: 82108
loss: 0.9874762892723083,grad_norm: 0.8684275356418847, iteration: 82109
loss: 0.9931603074073792,grad_norm: 0.9267868118699932, iteration: 82110
loss: 0.9756308794021606,grad_norm: 0.7887544264291727, iteration: 82111
loss: 0.9923412799835205,grad_norm: 0.8682410111441363, iteration: 82112
loss: 1.0047941207885742,grad_norm: 0.9329689078904995, iteration: 82113
loss: 1.0135339498519897,grad_norm: 0.8505177749386336, iteration: 82114
loss: 0.9643554091453552,grad_norm: 0.9543964574523114, iteration: 82115
loss: 0.970769464969635,grad_norm: 0.8302448422201368, iteration: 82116
loss: 0.9626031517982483,grad_norm: 0.80195421128847, iteration: 82117
loss: 0.9807463884353638,grad_norm: 0.8278525874485856, iteration: 82118
loss: 1.0275626182556152,grad_norm: 0.9165521564920381, iteration: 82119
loss: 0.981085479259491,grad_norm: 0.9999991802523535, iteration: 82120
loss: 1.0506669282913208,grad_norm: 0.9087612628090544, iteration: 82121
loss: 1.0407919883728027,grad_norm: 0.9999993938547145, iteration: 82122
loss: 1.010079264640808,grad_norm: 0.9999994202093104, iteration: 82123
loss: 1.0199105739593506,grad_norm: 0.9999991492934863, iteration: 82124
loss: 0.9828945398330688,grad_norm: 0.9798942056954335, iteration: 82125
loss: 1.0046035051345825,grad_norm: 0.8665178490106764, iteration: 82126
loss: 0.9944484829902649,grad_norm: 0.7400647539712906, iteration: 82127
loss: 0.9794482588768005,grad_norm: 0.9744154038870484, iteration: 82128
loss: 0.9945963025093079,grad_norm: 0.9429186471884254, iteration: 82129
loss: 1.0020852088928223,grad_norm: 0.9261790850024804, iteration: 82130
loss: 0.9931263327598572,grad_norm: 0.9999990831964153, iteration: 82131
loss: 0.972997784614563,grad_norm: 0.950180939428561, iteration: 82132
loss: 1.0394959449768066,grad_norm: 0.9918824468520168, iteration: 82133
loss: 1.0043351650238037,grad_norm: 0.9078113543240794, iteration: 82134
loss: 1.001720666885376,grad_norm: 0.9999991944673523, iteration: 82135
loss: 1.177638053894043,grad_norm: 0.9999997148328233, iteration: 82136
loss: 1.059676170349121,grad_norm: 0.9999994137875594, iteration: 82137
loss: 1.0977979898452759,grad_norm: 0.9999995740792746, iteration: 82138
loss: 0.9822978377342224,grad_norm: 0.8488901085865143, iteration: 82139
loss: 0.9994173645973206,grad_norm: 0.8835093770520384, iteration: 82140
loss: 1.0232425928115845,grad_norm: 0.9999992590079855, iteration: 82141
loss: 1.028146743774414,grad_norm: 0.9295207048309955, iteration: 82142
loss: 0.9931309223175049,grad_norm: 0.9999991165008044, iteration: 82143
loss: 1.0206674337387085,grad_norm: 0.9817306570200394, iteration: 82144
loss: 1.0415432453155518,grad_norm: 0.9937221900158735, iteration: 82145
loss: 0.9644094109535217,grad_norm: 0.9653082595440328, iteration: 82146
loss: 1.0221260786056519,grad_norm: 0.9999997520561995, iteration: 82147
loss: 1.0371416807174683,grad_norm: 0.9999993496916628, iteration: 82148
loss: 0.986758291721344,grad_norm: 0.9999990069628892, iteration: 82149
loss: 0.9994086027145386,grad_norm: 0.7481071335011852, iteration: 82150
loss: 1.1030094623565674,grad_norm: 0.9999992268253786, iteration: 82151
loss: 1.0542044639587402,grad_norm: 0.9999992511294782, iteration: 82152
loss: 1.0090703964233398,grad_norm: 0.8625227542216609, iteration: 82153
loss: 1.0080626010894775,grad_norm: 0.8328929990616096, iteration: 82154
loss: 0.9898518323898315,grad_norm: 0.9694956407780146, iteration: 82155
loss: 0.9824947714805603,grad_norm: 0.9999989839534865, iteration: 82156
loss: 1.0186738967895508,grad_norm: 0.7306407518646179, iteration: 82157
loss: 1.0115753412246704,grad_norm: 0.9999997561507148, iteration: 82158
loss: 1.0436075925827026,grad_norm: 0.9540762501364207, iteration: 82159
loss: 0.9863597750663757,grad_norm: 0.9999991516203605, iteration: 82160
loss: 1.0065850019454956,grad_norm: 0.866571192126979, iteration: 82161
loss: 1.0620317459106445,grad_norm: 0.9999996631539608, iteration: 82162
loss: 1.0199778079986572,grad_norm: 0.999999206588883, iteration: 82163
loss: 1.0716450214385986,grad_norm: 0.9999993392188598, iteration: 82164
loss: 1.0160187482833862,grad_norm: 0.9999990384921941, iteration: 82165
loss: 0.9974379539489746,grad_norm: 0.8253054085712671, iteration: 82166
loss: 0.9826503396034241,grad_norm: 0.9604638622070554, iteration: 82167
loss: 1.0443201065063477,grad_norm: 0.8709583766743183, iteration: 82168
loss: 0.9907331466674805,grad_norm: 0.9999994341426129, iteration: 82169
loss: 1.0383223295211792,grad_norm: 0.9999990729426538, iteration: 82170
loss: 1.0332587957382202,grad_norm: 0.9999991634943223, iteration: 82171
loss: 0.9984764456748962,grad_norm: 0.9999998105173007, iteration: 82172
loss: 1.0150257349014282,grad_norm: 0.9999989438844189, iteration: 82173
loss: 1.0356746912002563,grad_norm: 0.9999992221217279, iteration: 82174
loss: 1.0185350179672241,grad_norm: 0.9296996296077576, iteration: 82175
loss: 0.9724859595298767,grad_norm: 0.8911187457676165, iteration: 82176
loss: 0.9786770343780518,grad_norm: 0.9979523073361081, iteration: 82177
loss: 0.9933475255966187,grad_norm: 0.9999992030242624, iteration: 82178
loss: 1.0149259567260742,grad_norm: 0.7922585117560464, iteration: 82179
loss: 1.0778610706329346,grad_norm: 0.9999996926462561, iteration: 82180
loss: 1.0267783403396606,grad_norm: 0.9999995154499289, iteration: 82181
loss: 1.022191047668457,grad_norm: 0.996031414722666, iteration: 82182
loss: 1.1001861095428467,grad_norm: 0.8969559809456948, iteration: 82183
loss: 0.9789258241653442,grad_norm: 0.8914016859894347, iteration: 82184
loss: 1.0063412189483643,grad_norm: 0.9999992740415307, iteration: 82185
loss: 1.0065560340881348,grad_norm: 0.8162245658413451, iteration: 82186
loss: 0.9606750011444092,grad_norm: 0.9289426332044737, iteration: 82187
loss: 1.2149051427841187,grad_norm: 0.9999998990301918, iteration: 82188
loss: 1.0987640619277954,grad_norm: 0.9999990250541272, iteration: 82189
loss: 0.9737504720687866,grad_norm: 0.8597273606298506, iteration: 82190
loss: 1.0114694833755493,grad_norm: 0.9121366645292759, iteration: 82191
loss: 1.003623604774475,grad_norm: 0.8888982812940223, iteration: 82192
loss: 0.9909859299659729,grad_norm: 0.9999995910547741, iteration: 82193
loss: 1.0083900690078735,grad_norm: 0.9999995841203351, iteration: 82194
loss: 1.013298749923706,grad_norm: 0.8819067492802749, iteration: 82195
loss: 0.9498287439346313,grad_norm: 0.9770548927159902, iteration: 82196
loss: 1.0029164552688599,grad_norm: 0.8063479911561988, iteration: 82197
loss: 0.9623327255249023,grad_norm: 0.9999989115565907, iteration: 82198
loss: 0.9823582172393799,grad_norm: 0.999999229777121, iteration: 82199
loss: 0.9829704761505127,grad_norm: 0.9999990596837793, iteration: 82200
loss: 0.9723961353302002,grad_norm: 0.9135174679522134, iteration: 82201
loss: 0.9862399101257324,grad_norm: 0.8941836338298405, iteration: 82202
loss: 1.01762056350708,grad_norm: 0.9999990051396194, iteration: 82203
loss: 0.9886184334754944,grad_norm: 0.9999991784482617, iteration: 82204
loss: 0.9831129312515259,grad_norm: 0.8172500352725942, iteration: 82205
loss: 0.9645856022834778,grad_norm: 0.9463461848534857, iteration: 82206
loss: 1.0137062072753906,grad_norm: 0.9999990873215819, iteration: 82207
loss: 0.9716253876686096,grad_norm: 0.9999992303738143, iteration: 82208
loss: 1.0124311447143555,grad_norm: 0.9490630625123077, iteration: 82209
loss: 0.972259521484375,grad_norm: 0.8932928887303334, iteration: 82210
loss: 1.010096788406372,grad_norm: 0.964560893839005, iteration: 82211
loss: 0.996411144733429,grad_norm: 0.8358687292875376, iteration: 82212
loss: 1.0293411016464233,grad_norm: 0.7641515764944689, iteration: 82213
loss: 0.9941404461860657,grad_norm: 0.9678475559031003, iteration: 82214
loss: 1.12001371383667,grad_norm: 0.9999994437208912, iteration: 82215
loss: 1.027380108833313,grad_norm: 0.999999241621876, iteration: 82216
loss: 1.0413285493850708,grad_norm: 0.9999990527689143, iteration: 82217
loss: 1.0024429559707642,grad_norm: 0.9999993485164932, iteration: 82218
loss: 0.9914913773536682,grad_norm: 0.8961421096637755, iteration: 82219
loss: 0.9806311130523682,grad_norm: 0.7826889789562794, iteration: 82220
loss: 1.011522650718689,grad_norm: 0.9999990820368739, iteration: 82221
loss: 1.0149675607681274,grad_norm: 0.999999084909121, iteration: 82222
loss: 1.0252468585968018,grad_norm: 0.9999994225861532, iteration: 82223
loss: 0.9800182580947876,grad_norm: 0.999999129102056, iteration: 82224
loss: 1.0091367959976196,grad_norm: 0.7991514031589543, iteration: 82225
loss: 1.0917799472808838,grad_norm: 0.9999993336493392, iteration: 82226
loss: 1.023132085800171,grad_norm: 0.9999996693763114, iteration: 82227
loss: 1.0049625635147095,grad_norm: 0.9168414403031503, iteration: 82228
loss: 0.9897028803825378,grad_norm: 0.9342271027907815, iteration: 82229
loss: 0.9872715473175049,grad_norm: 0.999999765297299, iteration: 82230
loss: 0.9917857050895691,grad_norm: 0.9999995799944065, iteration: 82231
loss: 1.0409572124481201,grad_norm: 0.9999998383455638, iteration: 82232
loss: 0.9919686913490295,grad_norm: 0.9999991775778103, iteration: 82233
loss: 1.015663504600525,grad_norm: 0.9274153587054794, iteration: 82234
loss: 0.9618152379989624,grad_norm: 0.9779828163751418, iteration: 82235
loss: 0.9833968877792358,grad_norm: 0.8275458681964342, iteration: 82236
loss: 0.9811155200004578,grad_norm: 0.8837365183758098, iteration: 82237
loss: 1.0174039602279663,grad_norm: 0.9999992746181705, iteration: 82238
loss: 0.9874088168144226,grad_norm: 0.9999991200558931, iteration: 82239
loss: 1.012435793876648,grad_norm: 0.999999073153886, iteration: 82240
loss: 0.9898383617401123,grad_norm: 0.9666991058546612, iteration: 82241
loss: 1.012718677520752,grad_norm: 0.9195416529777999, iteration: 82242
loss: 0.9826371669769287,grad_norm: 0.9999992085861337, iteration: 82243
loss: 0.9920545220375061,grad_norm: 0.912975793159492, iteration: 82244
loss: 0.9936687350273132,grad_norm: 0.9999990377610474, iteration: 82245
loss: 1.001497507095337,grad_norm: 0.8751859937041839, iteration: 82246
loss: 1.0908008813858032,grad_norm: 0.9999996131219061, iteration: 82247
loss: 0.987358808517456,grad_norm: 0.9999991843391308, iteration: 82248
loss: 1.0163358449935913,grad_norm: 0.9999991140460452, iteration: 82249
loss: 0.9667651057243347,grad_norm: 0.999999249202874, iteration: 82250
loss: 1.0737028121948242,grad_norm: 0.9999991509413594, iteration: 82251
loss: 0.9675379991531372,grad_norm: 0.9883537766882489, iteration: 82252
loss: 0.9941825270652771,grad_norm: 0.8021030600599742, iteration: 82253
loss: 1.0126285552978516,grad_norm: 0.9999991464116901, iteration: 82254
loss: 0.9744834303855896,grad_norm: 0.9542678027394393, iteration: 82255
loss: 0.9853072762489319,grad_norm: 0.9999990317518944, iteration: 82256
loss: 1.0164247751235962,grad_norm: 0.9040245287233885, iteration: 82257
loss: 1.0012626647949219,grad_norm: 0.7277201509442366, iteration: 82258
loss: 0.968812108039856,grad_norm: 0.9999991217076692, iteration: 82259
loss: 1.0395945310592651,grad_norm: 0.9999997064392291, iteration: 82260
loss: 1.0391900539398193,grad_norm: 1.0000000148621653, iteration: 82261
loss: 1.0410979986190796,grad_norm: 0.9999990788422608, iteration: 82262
loss: 0.996282160282135,grad_norm: 0.9999994913725356, iteration: 82263
loss: 0.9959433078765869,grad_norm: 0.8730105246717402, iteration: 82264
loss: 0.9694157838821411,grad_norm: 0.976137826751807, iteration: 82265
loss: 1.002475380897522,grad_norm: 0.9999990521816211, iteration: 82266
loss: 1.0134217739105225,grad_norm: 0.9999991717797261, iteration: 82267
loss: 0.989902675151825,grad_norm: 0.8702604993584191, iteration: 82268
loss: 0.9719772934913635,grad_norm: 0.8901959231512401, iteration: 82269
loss: 0.9776579737663269,grad_norm: 0.9999995876276758, iteration: 82270
loss: 1.2829455137252808,grad_norm: 0.9999998262877243, iteration: 82271
loss: 1.078738808631897,grad_norm: 0.9999990435119526, iteration: 82272
loss: 0.9763019680976868,grad_norm: 0.9987678369209368, iteration: 82273
loss: 1.0186049938201904,grad_norm: 0.9570371008152139, iteration: 82274
loss: 0.9646666049957275,grad_norm: 0.9238445091932341, iteration: 82275
loss: 1.0160003900527954,grad_norm: 0.9999993442552461, iteration: 82276
loss: 1.0039724111557007,grad_norm: 0.8349966472340683, iteration: 82277
loss: 0.9910228848457336,grad_norm: 0.966728283257988, iteration: 82278
loss: 0.991710364818573,grad_norm: 0.9999996863719365, iteration: 82279
loss: 0.9766499400138855,grad_norm: 0.8677717794706986, iteration: 82280
loss: 1.0139576196670532,grad_norm: 0.7726294546216522, iteration: 82281
loss: 1.070359468460083,grad_norm: 0.9999992965021621, iteration: 82282
loss: 1.0269229412078857,grad_norm: 0.9967819305558628, iteration: 82283
loss: 0.9490960240364075,grad_norm: 0.893293350814661, iteration: 82284
loss: 0.9709002375602722,grad_norm: 0.9999992379589423, iteration: 82285
loss: 1.054439663887024,grad_norm: 0.9999991230664149, iteration: 82286
loss: 1.0583192110061646,grad_norm: 0.9999992361764857, iteration: 82287
loss: 0.9898911118507385,grad_norm: 0.9381694685424388, iteration: 82288
loss: 1.0019960403442383,grad_norm: 0.9295549526878624, iteration: 82289
loss: 1.0471793413162231,grad_norm: 0.8891917491262064, iteration: 82290
loss: 1.0374068021774292,grad_norm: 0.9999993370611865, iteration: 82291
loss: 0.9739065766334534,grad_norm: 0.9999992463617509, iteration: 82292
loss: 0.9826762676239014,grad_norm: 0.9958487895771689, iteration: 82293
loss: 1.0274521112442017,grad_norm: 0.823009787688053, iteration: 82294
loss: 1.0068312883377075,grad_norm: 0.9537126292530448, iteration: 82295
loss: 0.9968980550765991,grad_norm: 0.9999998328196648, iteration: 82296
loss: 1.0051692724227905,grad_norm: 0.8877957639251653, iteration: 82297
loss: 0.9910722970962524,grad_norm: 0.9187037469858687, iteration: 82298
loss: 0.9661032557487488,grad_norm: 0.9999994687547011, iteration: 82299
loss: 1.0144075155258179,grad_norm: 0.9783747193090738, iteration: 82300
loss: 0.9299711585044861,grad_norm: 0.9999996157725803, iteration: 82301
loss: 1.0370630025863647,grad_norm: 0.8706171284842327, iteration: 82302
loss: 1.0554505586624146,grad_norm: 0.8761764047457745, iteration: 82303
loss: 1.0535240173339844,grad_norm: 0.9999992228078756, iteration: 82304
loss: 1.0293341875076294,grad_norm: 0.9999990513986889, iteration: 82305
loss: 1.0273100137710571,grad_norm: 0.9999990760950597, iteration: 82306
loss: 1.041318655014038,grad_norm: 0.8108781356691119, iteration: 82307
loss: 1.0340381860733032,grad_norm: 0.999999090047896, iteration: 82308
loss: 0.9697645306587219,grad_norm: 0.9279225704203509, iteration: 82309
loss: 1.0551990270614624,grad_norm: 0.9999995660944859, iteration: 82310
loss: 1.0216407775878906,grad_norm: 0.7606703120127393, iteration: 82311
loss: 1.0068202018737793,grad_norm: 0.9999996829554396, iteration: 82312
loss: 1.0854387283325195,grad_norm: 0.9999991569242616, iteration: 82313
loss: 1.0443847179412842,grad_norm: 0.999999818762925, iteration: 82314
loss: 0.9675851464271545,grad_norm: 0.8005361011744658, iteration: 82315
loss: 1.0459072589874268,grad_norm: 0.9999990675675675, iteration: 82316
loss: 1.0462454557418823,grad_norm: 0.9875266280343749, iteration: 82317
loss: 0.9819383025169373,grad_norm: 0.9999991242160526, iteration: 82318
loss: 0.9955150485038757,grad_norm: 0.9663497464202062, iteration: 82319
loss: 0.9734321236610413,grad_norm: 0.9999990791046618, iteration: 82320
loss: 0.9423664212226868,grad_norm: 0.9447932356347026, iteration: 82321
loss: 1.0756009817123413,grad_norm: 0.9999996641418923, iteration: 82322
loss: 0.9656657576560974,grad_norm: 0.965432021989547, iteration: 82323
loss: 0.9908445477485657,grad_norm: 0.9509223153244737, iteration: 82324
loss: 1.0289862155914307,grad_norm: 0.8875086941630367, iteration: 82325
loss: 0.9783666729927063,grad_norm: 0.7932454381834873, iteration: 82326
loss: 0.9892950654029846,grad_norm: 0.7895289599209012, iteration: 82327
loss: 1.043897032737732,grad_norm: 0.9999991328655145, iteration: 82328
loss: 1.049182415008545,grad_norm: 0.9999996331078207, iteration: 82329
loss: 0.9964389801025391,grad_norm: 0.9999993947160039, iteration: 82330
loss: 0.9870598912239075,grad_norm: 0.7408348744713168, iteration: 82331
loss: 1.0407623052597046,grad_norm: 0.9999998758885881, iteration: 82332
loss: 1.023684024810791,grad_norm: 0.8548739393197125, iteration: 82333
loss: 1.0572978258132935,grad_norm: 0.9199544423993925, iteration: 82334
loss: 1.0091428756713867,grad_norm: 0.9999990078143801, iteration: 82335
loss: 1.0041162967681885,grad_norm: 0.9241112421912951, iteration: 82336
loss: 0.9725122451782227,grad_norm: 0.9999991218097576, iteration: 82337
loss: 1.0262819528579712,grad_norm: 0.9745923035954147, iteration: 82338
loss: 1.028380274772644,grad_norm: 0.9642650291050567, iteration: 82339
loss: 1.0028237104415894,grad_norm: 0.9067747120759272, iteration: 82340
loss: 1.0035488605499268,grad_norm: 0.8494333623193052, iteration: 82341
loss: 1.02305269241333,grad_norm: 0.9367057527908618, iteration: 82342
loss: 1.0129221677780151,grad_norm: 0.9999991914602091, iteration: 82343
loss: 1.0196510553359985,grad_norm: 0.9504898207061622, iteration: 82344
loss: 1.0025520324707031,grad_norm: 0.9055821417603616, iteration: 82345
loss: 1.0273051261901855,grad_norm: 0.921138232946492, iteration: 82346
loss: 1.0073492527008057,grad_norm: 0.9999991315703303, iteration: 82347
loss: 1.0346879959106445,grad_norm: 0.9999997812558785, iteration: 82348
loss: 1.0294004678726196,grad_norm: 0.9999990998203934, iteration: 82349
loss: 1.101082682609558,grad_norm: 0.9999999194867429, iteration: 82350
loss: 1.0708587169647217,grad_norm: 0.9756599654156238, iteration: 82351
loss: 0.9808710813522339,grad_norm: 0.8878431257686631, iteration: 82352
loss: 1.0045970678329468,grad_norm: 0.9999990841129388, iteration: 82353
loss: 1.0140984058380127,grad_norm: 0.9952005528204615, iteration: 82354
loss: 1.0135315656661987,grad_norm: 0.9592726570982998, iteration: 82355
loss: 1.021028995513916,grad_norm: 0.7084213873490435, iteration: 82356
loss: 1.0809433460235596,grad_norm: 0.9324536432960056, iteration: 82357
loss: 1.018381953239441,grad_norm: 0.9999991625441901, iteration: 82358
loss: 1.0348578691482544,grad_norm: 0.9999997233660516, iteration: 82359
loss: 1.0147043466567993,grad_norm: 0.9999991344214394, iteration: 82360
loss: 1.001358985900879,grad_norm: 0.8507342722834892, iteration: 82361
loss: 1.0619642734527588,grad_norm: 0.9999993596830559, iteration: 82362
loss: 0.9696049690246582,grad_norm: 0.859474162123991, iteration: 82363
loss: 1.0174667835235596,grad_norm: 0.8453109584758384, iteration: 82364
loss: 0.9820159077644348,grad_norm: 0.9127885628648158, iteration: 82365
loss: 1.016890287399292,grad_norm: 0.999999051299949, iteration: 82366
loss: 1.041133165359497,grad_norm: 0.9648560757734926, iteration: 82367
loss: 0.9976155757904053,grad_norm: 0.8961158502760695, iteration: 82368
loss: 1.0123692750930786,grad_norm: 0.8638476181456949, iteration: 82369
loss: 0.9828810095787048,grad_norm: 0.909317676149581, iteration: 82370
loss: 1.002975583076477,grad_norm: 0.9999991261462756, iteration: 82371
loss: 0.9944950342178345,grad_norm: 0.8066986564436402, iteration: 82372
loss: 1.034142017364502,grad_norm: 0.9999995126741549, iteration: 82373
loss: 0.988831639289856,grad_norm: 0.9999989896101874, iteration: 82374
loss: 1.062608242034912,grad_norm: 0.9999990750945665, iteration: 82375
loss: 1.013001799583435,grad_norm: 0.9999991351078578, iteration: 82376
loss: 1.004211187362671,grad_norm: 0.9581336165791484, iteration: 82377
loss: 1.0364511013031006,grad_norm: 0.9999995240040718, iteration: 82378
loss: 1.0197045803070068,grad_norm: 0.9999994074434233, iteration: 82379
loss: 0.9841511845588684,grad_norm: 0.926157164044674, iteration: 82380
loss: 1.0023465156555176,grad_norm: 0.9999990704973161, iteration: 82381
loss: 1.0338188409805298,grad_norm: 0.9112275323703547, iteration: 82382
loss: 1.0830410718917847,grad_norm: 0.9999995693516281, iteration: 82383
loss: 1.0408110618591309,grad_norm: 0.9844407226208629, iteration: 82384
loss: 1.003865361213684,grad_norm: 0.9438171668119975, iteration: 82385
loss: 1.0149738788604736,grad_norm: 0.9999990619276471, iteration: 82386
loss: 1.0133002996444702,grad_norm: 0.8990637858752268, iteration: 82387
loss: 1.0405100584030151,grad_norm: 0.9999995497690063, iteration: 82388
loss: 1.0192407369613647,grad_norm: 0.9999999486643093, iteration: 82389
loss: 1.014755129814148,grad_norm: 0.99999954152931, iteration: 82390
loss: 0.9681364893913269,grad_norm: 0.99999914902493, iteration: 82391
loss: 1.0299677848815918,grad_norm: 0.9694261370179224, iteration: 82392
loss: 1.0397047996520996,grad_norm: 0.9999998477537206, iteration: 82393
loss: 0.989605724811554,grad_norm: 0.9999992735056104, iteration: 82394
loss: 0.9974004030227661,grad_norm: 0.9999996270588649, iteration: 82395
loss: 0.9841195940971375,grad_norm: 0.9886059159614214, iteration: 82396
loss: 1.0703003406524658,grad_norm: 0.9999991512148464, iteration: 82397
loss: 1.1053506135940552,grad_norm: 0.9373505509313163, iteration: 82398
loss: 0.9788976907730103,grad_norm: 0.9999991968277393, iteration: 82399
loss: 0.9886715412139893,grad_norm: 0.9999997049846996, iteration: 82400
loss: 1.0018116235733032,grad_norm: 0.8843597739400866, iteration: 82401
loss: 0.9977167844772339,grad_norm: 0.8311910263664763, iteration: 82402
loss: 0.9926937818527222,grad_norm: 0.9130594301281192, iteration: 82403
loss: 1.0091826915740967,grad_norm: 0.9095342500879994, iteration: 82404
loss: 0.9959102272987366,grad_norm: 0.890384404028091, iteration: 82405
loss: 1.0015783309936523,grad_norm: 0.9515590815868034, iteration: 82406
loss: 0.9908527731895447,grad_norm: 0.8611207439369367, iteration: 82407
loss: 0.9840413928031921,grad_norm: 0.9999993956622654, iteration: 82408
loss: 1.0284730195999146,grad_norm: 0.9565611150399359, iteration: 82409
loss: 1.0021655559539795,grad_norm: 0.8086210637266924, iteration: 82410
loss: 1.0179128646850586,grad_norm: 0.9999990671661472, iteration: 82411
loss: 1.0250722169876099,grad_norm: 0.885953973965673, iteration: 82412
loss: 1.070602536201477,grad_norm: 0.9661658813749493, iteration: 82413
loss: 1.0336097478866577,grad_norm: 0.9999997102058463, iteration: 82414
loss: 1.0726346969604492,grad_norm: 0.9033440062117745, iteration: 82415
loss: 1.0183637142181396,grad_norm: 0.8383405486093848, iteration: 82416
loss: 0.9871832132339478,grad_norm: 0.9070354211077619, iteration: 82417
loss: 1.0365959405899048,grad_norm: 0.9999992172192798, iteration: 82418
loss: 0.9673859477043152,grad_norm: 0.9772568634706463, iteration: 82419
loss: 0.963636577129364,grad_norm: 0.8024959472879016, iteration: 82420
loss: 1.019217848777771,grad_norm: 0.9911270806559767, iteration: 82421
loss: 1.034990906715393,grad_norm: 0.8063216837119913, iteration: 82422
loss: 0.9874078631401062,grad_norm: 0.9590049748317591, iteration: 82423
loss: 1.018425464630127,grad_norm: 0.9999995184713867, iteration: 82424
loss: 1.031392216682434,grad_norm: 0.999999256986614, iteration: 82425
loss: 1.013453722000122,grad_norm: 0.9999991857570638, iteration: 82426
loss: 0.9777184128761292,grad_norm: 0.9999996899902885, iteration: 82427
loss: 1.0066930055618286,grad_norm: 0.7582684788499321, iteration: 82428
loss: 0.9955297708511353,grad_norm: 0.937696481459633, iteration: 82429
loss: 1.0003852844238281,grad_norm: 0.808305969505978, iteration: 82430
loss: 1.0011988878250122,grad_norm: 0.8718899377084601, iteration: 82431
loss: 1.0150306224822998,grad_norm: 0.9999989907971608, iteration: 82432
loss: 1.0287827253341675,grad_norm: 0.9672763567315628, iteration: 82433
loss: 1.0143802165985107,grad_norm: 0.7846048933676584, iteration: 82434
loss: 1.0428760051727295,grad_norm: 0.9999992083566247, iteration: 82435
loss: 0.9898086190223694,grad_norm: 0.9711972808408875, iteration: 82436
loss: 1.0022671222686768,grad_norm: 0.9099576631153253, iteration: 82437
loss: 0.9835618138313293,grad_norm: 0.9999990207755853, iteration: 82438
loss: 1.0056090354919434,grad_norm: 0.8975896823436686, iteration: 82439
loss: 0.9905471801757812,grad_norm: 0.9999997134732298, iteration: 82440
loss: 1.04317307472229,grad_norm: 0.999999098641542, iteration: 82441
loss: 0.9948069453239441,grad_norm: 0.9462418192305758, iteration: 82442
loss: 1.0881528854370117,grad_norm: 0.9999997120557832, iteration: 82443
loss: 1.0188992023468018,grad_norm: 0.9130807248733578, iteration: 82444
loss: 1.010328769683838,grad_norm: 0.999999088562794, iteration: 82445
loss: 1.0231661796569824,grad_norm: 0.9999997323279123, iteration: 82446
loss: 0.9795634746551514,grad_norm: 0.7881474966766231, iteration: 82447
loss: 1.0432703495025635,grad_norm: 0.9999995575842341, iteration: 82448
loss: 0.9830003380775452,grad_norm: 0.9999990855054981, iteration: 82449
loss: 1.1208815574645996,grad_norm: 0.9999991833379864, iteration: 82450
loss: 1.0446721315383911,grad_norm: 0.9999994766403385, iteration: 82451
loss: 1.0228177309036255,grad_norm: 0.9683915686834054, iteration: 82452
loss: 1.0028268098831177,grad_norm: 0.9563208252186962, iteration: 82453
loss: 0.9872262477874756,grad_norm: 0.8272563494007938, iteration: 82454
loss: 1.0168787240982056,grad_norm: 0.9999990499922037, iteration: 82455
loss: 0.9876481294631958,grad_norm: 0.9254113775111076, iteration: 82456
loss: 1.00583016872406,grad_norm: 0.9999991946923501, iteration: 82457
loss: 1.0266255140304565,grad_norm: 0.9118219594764322, iteration: 82458
loss: 1.0806370973587036,grad_norm: 0.9999998040797266, iteration: 82459
loss: 1.0121781826019287,grad_norm: 0.9999993218914911, iteration: 82460
loss: 0.9876935482025146,grad_norm: 0.8914782640205315, iteration: 82461
loss: 1.0910996198654175,grad_norm: 0.999999809640126, iteration: 82462
loss: 0.9963516592979431,grad_norm: 0.8527561196430229, iteration: 82463
loss: 0.9932705760002136,grad_norm: 0.999999021672938, iteration: 82464
loss: 0.9977214932441711,grad_norm: 0.9862006140553823, iteration: 82465
loss: 1.0301473140716553,grad_norm: 0.9999991356106057, iteration: 82466
loss: 1.0081554651260376,grad_norm: 0.9482892419192965, iteration: 82467
loss: 1.003447413444519,grad_norm: 0.9999991762191808, iteration: 82468
loss: 1.0083246231079102,grad_norm: 0.8665439951534589, iteration: 82469
loss: 1.0273774862289429,grad_norm: 0.9648906445371135, iteration: 82470
loss: 1.0766737461090088,grad_norm: 0.9199391851052761, iteration: 82471
loss: 1.013785719871521,grad_norm: 0.9068776996354427, iteration: 82472
loss: 1.003777027130127,grad_norm: 0.9999994306480491, iteration: 82473
loss: 0.9691988825798035,grad_norm: 0.9999990813949025, iteration: 82474
loss: 1.0280771255493164,grad_norm: 0.9999989493454504, iteration: 82475
loss: 1.0680503845214844,grad_norm: 0.9999991772403245, iteration: 82476
loss: 1.0341163873672485,grad_norm: 0.999998988001213, iteration: 82477
loss: 1.0065386295318604,grad_norm: 0.9999991489740436, iteration: 82478
loss: 1.0714035034179688,grad_norm: 0.9999999005058066, iteration: 82479
loss: 0.9977733492851257,grad_norm: 0.9999990848338722, iteration: 82480
loss: 0.9629867672920227,grad_norm: 0.7722326652354746, iteration: 82481
loss: 1.0132277011871338,grad_norm: 0.9102905931005423, iteration: 82482
loss: 0.9826239943504333,grad_norm: 0.8097627173902315, iteration: 82483
loss: 0.9932728409767151,grad_norm: 0.7653060512478946, iteration: 82484
loss: 0.9881989359855652,grad_norm: 0.9999990592461706, iteration: 82485
loss: 1.01418137550354,grad_norm: 0.9999992118066822, iteration: 82486
loss: 0.9783087372779846,grad_norm: 0.9096017270908356, iteration: 82487
loss: 1.0091238021850586,grad_norm: 0.8952533738030374, iteration: 82488
loss: 1.0253312587738037,grad_norm: 0.8836022453748539, iteration: 82489
loss: 1.0806684494018555,grad_norm: 0.9999990120416017, iteration: 82490
loss: 1.0146430730819702,grad_norm: 0.8149475284429656, iteration: 82491
loss: 1.1130753755569458,grad_norm: 0.9999999693353475, iteration: 82492
loss: 0.9925610423088074,grad_norm: 0.9999996710569389, iteration: 82493
loss: 1.0524072647094727,grad_norm: 0.8601511927725146, iteration: 82494
loss: 0.9771918654441833,grad_norm: 0.7604047960614922, iteration: 82495
loss: 1.018564224243164,grad_norm: 0.9999991968972389, iteration: 82496
loss: 0.9899753928184509,grad_norm: 0.9238672546453023, iteration: 82497
loss: 1.0034925937652588,grad_norm: 0.9111103325895124, iteration: 82498
loss: 1.1024466753005981,grad_norm: 0.9059929866623689, iteration: 82499
loss: 1.0336254835128784,grad_norm: 0.8544029365817507, iteration: 82500
loss: 0.9847295880317688,grad_norm: 0.9999989609988529, iteration: 82501
loss: 0.9904459118843079,grad_norm: 0.901116734614283, iteration: 82502
loss: 1.002172589302063,grad_norm: 0.9006797984336332, iteration: 82503
loss: 0.979633629322052,grad_norm: 0.9449661035800877, iteration: 82504
loss: 1.0284346342086792,grad_norm: 0.9041211737893802, iteration: 82505
loss: 1.1176211833953857,grad_norm: 0.9999989714713478, iteration: 82506
loss: 0.9879002571105957,grad_norm: 0.9440627097864505, iteration: 82507
loss: 1.06003737449646,grad_norm: 0.9999995911345285, iteration: 82508
loss: 1.0409066677093506,grad_norm: 0.9999989256878447, iteration: 82509
loss: 0.9987772703170776,grad_norm: 0.999999940525058, iteration: 82510
loss: 1.1311269998550415,grad_norm: 0.9999995872225547, iteration: 82511
loss: 1.0274704694747925,grad_norm: 0.7588744366180501, iteration: 82512
loss: 1.0076611042022705,grad_norm: 0.9653868927805557, iteration: 82513
loss: 0.9957510232925415,grad_norm: 0.8979430107162225, iteration: 82514
loss: 1.0352716445922852,grad_norm: 0.9999998969071262, iteration: 82515
loss: 1.0127711296081543,grad_norm: 0.9999993834519854, iteration: 82516
loss: 1.0812879800796509,grad_norm: 0.9999990960721044, iteration: 82517
loss: 1.010504126548767,grad_norm: 0.85085147768687, iteration: 82518
loss: 1.003250241279602,grad_norm: 0.9974157237444268, iteration: 82519
loss: 1.0045415163040161,grad_norm: 0.9755980581427207, iteration: 82520
loss: 1.0137848854064941,grad_norm: 0.9999991661163736, iteration: 82521
loss: 1.0195395946502686,grad_norm: 0.9999990336762185, iteration: 82522
loss: 1.0149999856948853,grad_norm: 0.9811639237720085, iteration: 82523
loss: 0.9959308505058289,grad_norm: 0.9999992410571078, iteration: 82524
loss: 0.9876448512077332,grad_norm: 0.9500268735574324, iteration: 82525
loss: 1.0134539604187012,grad_norm: 0.8585264550875868, iteration: 82526
loss: 1.0092071294784546,grad_norm: 0.9348215335812204, iteration: 82527
loss: 1.0059125423431396,grad_norm: 0.7851811395914068, iteration: 82528
loss: 1.0187031030654907,grad_norm: 0.8852961159889554, iteration: 82529
loss: 0.9967960119247437,grad_norm: 0.8556528196345482, iteration: 82530
loss: 1.1130090951919556,grad_norm: 0.9999994430674093, iteration: 82531
loss: 1.1120342016220093,grad_norm: 0.90771746356095, iteration: 82532
loss: 1.0917975902557373,grad_norm: 0.9999996844757673, iteration: 82533
loss: 1.0319929122924805,grad_norm: 0.8692024461808611, iteration: 82534
loss: 0.9892948865890503,grad_norm: 0.9999992745643592, iteration: 82535
loss: 1.046801209449768,grad_norm: 0.9999993924460143, iteration: 82536
loss: 1.043943166732788,grad_norm: 0.7934759135863694, iteration: 82537
loss: 0.9867076277732849,grad_norm: 0.8807347599601172, iteration: 82538
loss: 1.0211143493652344,grad_norm: 0.9432305454476624, iteration: 82539
loss: 1.0180915594100952,grad_norm: 0.7768551726325449, iteration: 82540
loss: 1.0281221866607666,grad_norm: 0.8386682986023215, iteration: 82541
loss: 1.0329389572143555,grad_norm: 0.9999998078658876, iteration: 82542
loss: 1.0041712522506714,grad_norm: 0.9999998177048541, iteration: 82543
loss: 0.9956923127174377,grad_norm: 0.9367905348558203, iteration: 82544
loss: 0.9903582334518433,grad_norm: 0.9999990309783253, iteration: 82545
loss: 1.0296480655670166,grad_norm: 0.9999998444022387, iteration: 82546
loss: 0.9657570123672485,grad_norm: 0.9999992278044308, iteration: 82547
loss: 0.97637540102005,grad_norm: 0.9085957416302093, iteration: 82548
loss: 0.9891828298568726,grad_norm: 0.8562476218177977, iteration: 82549
loss: 0.9793407320976257,grad_norm: 0.9999990119798783, iteration: 82550
loss: 1.0348155498504639,grad_norm: 0.8263810141067405, iteration: 82551
loss: 1.0244687795639038,grad_norm: 0.8433474182577806, iteration: 82552
loss: 1.008506178855896,grad_norm: 0.8805912514175082, iteration: 82553
loss: 0.9930014610290527,grad_norm: 0.959249465891306, iteration: 82554
loss: 1.0413124561309814,grad_norm: 0.9999991229307645, iteration: 82555
loss: 0.9926338195800781,grad_norm: 0.9999992580308125, iteration: 82556
loss: 1.0218414068222046,grad_norm: 0.7964137926474844, iteration: 82557
loss: 1.0159116983413696,grad_norm: 0.9466695032820008, iteration: 82558
loss: 0.9921896457672119,grad_norm: 0.8037011673289619, iteration: 82559
loss: 0.9999114274978638,grad_norm: 0.9999997019745448, iteration: 82560
loss: 0.9929936528205872,grad_norm: 0.9999993030806172, iteration: 82561
loss: 1.0187605619430542,grad_norm: 0.9999991392969473, iteration: 82562
loss: 0.98905348777771,grad_norm: 0.974571085002037, iteration: 82563
loss: 1.020444393157959,grad_norm: 0.9999996776146114, iteration: 82564
loss: 1.0210515260696411,grad_norm: 0.9115431008494326, iteration: 82565
loss: 0.9995605945587158,grad_norm: 0.9828136744131393, iteration: 82566
loss: 0.9932106733322144,grad_norm: 0.8067449699072908, iteration: 82567
loss: 0.9947889447212219,grad_norm: 0.8367073857238754, iteration: 82568
loss: 1.0013841390609741,grad_norm: 0.8995511359773416, iteration: 82569
loss: 1.057432770729065,grad_norm: 0.9999998112310895, iteration: 82570
loss: 1.0367270708084106,grad_norm: 0.9999991654247745, iteration: 82571
loss: 1.0361320972442627,grad_norm: 0.999999553655142, iteration: 82572
loss: 1.0102535486221313,grad_norm: 0.9818388695133663, iteration: 82573
loss: 0.9929422736167908,grad_norm: 0.8394040616623495, iteration: 82574
loss: 1.0130038261413574,grad_norm: 0.9899597054938137, iteration: 82575
loss: 0.997086763381958,grad_norm: 0.8075311445746842, iteration: 82576
loss: 1.0165481567382812,grad_norm: 0.999999533539161, iteration: 82577
loss: 0.9726817607879639,grad_norm: 0.9111168535997213, iteration: 82578
loss: 0.9926095604896545,grad_norm: 0.9999997796597578, iteration: 82579
loss: 0.9666939377784729,grad_norm: 0.9999989850691979, iteration: 82580
loss: 1.023301124572754,grad_norm: 0.9999996279836558, iteration: 82581
loss: 1.008385419845581,grad_norm: 0.8560942520972376, iteration: 82582
loss: 1.0055691003799438,grad_norm: 0.9999991611326223, iteration: 82583
loss: 0.9591596126556396,grad_norm: 0.8743148904844257, iteration: 82584
loss: 1.0024763345718384,grad_norm: 0.8009945081986571, iteration: 82585
loss: 0.9900675415992737,grad_norm: 0.8367885967324175, iteration: 82586
loss: 0.9959230422973633,grad_norm: 0.9999994218729286, iteration: 82587
loss: 1.0157122611999512,grad_norm: 0.9999991701946722, iteration: 82588
loss: 1.0284701585769653,grad_norm: 0.999999481793085, iteration: 82589
loss: 0.9647881388664246,grad_norm: 0.9999990121992498, iteration: 82590
loss: 1.0129584074020386,grad_norm: 0.7855733239431134, iteration: 82591
loss: 1.0547789335250854,grad_norm: 0.8254551989236883, iteration: 82592
loss: 1.0499157905578613,grad_norm: 0.9999990464574432, iteration: 82593
loss: 1.0009446144104004,grad_norm: 0.9062716379238954, iteration: 82594
loss: 1.0025137662887573,grad_norm: 0.9672963292237773, iteration: 82595
loss: 0.9894659519195557,grad_norm: 0.8515124654605905, iteration: 82596
loss: 0.99835205078125,grad_norm: 0.8527164354703294, iteration: 82597
loss: 1.026315689086914,grad_norm: 0.9636916050523718, iteration: 82598
loss: 0.9685026407241821,grad_norm: 0.7620000505680056, iteration: 82599
loss: 1.1113014221191406,grad_norm: 0.9207761978617549, iteration: 82600
loss: 1.0312789678573608,grad_norm: 0.9999993427453281, iteration: 82601
loss: 1.0336560010910034,grad_norm: 0.9999989626238026, iteration: 82602
loss: 1.0292094945907593,grad_norm: 0.9999993853132327, iteration: 82603
loss: 1.100290298461914,grad_norm: 0.9999996076066067, iteration: 82604
loss: 0.9930596947669983,grad_norm: 0.9999997138717123, iteration: 82605
loss: 0.9937164187431335,grad_norm: 0.8838970629680188, iteration: 82606
loss: 1.030253291130066,grad_norm: 0.9091153398492899, iteration: 82607
loss: 1.0352275371551514,grad_norm: 0.9999996183143616, iteration: 82608
loss: 0.9742013216018677,grad_norm: 0.9461190560870275, iteration: 82609
loss: 0.9796274304389954,grad_norm: 0.9497077377672385, iteration: 82610
loss: 0.9978697896003723,grad_norm: 0.8402159734987982, iteration: 82611
loss: 0.990000307559967,grad_norm: 1.0000000413439145, iteration: 82612
loss: 0.9994434118270874,grad_norm: 0.9999991373067376, iteration: 82613
loss: 0.9997034072875977,grad_norm: 0.9686317159357172, iteration: 82614
loss: 0.9643415808677673,grad_norm: 0.8191803176227151, iteration: 82615
loss: 1.051183819770813,grad_norm: 0.8592015287041639, iteration: 82616
loss: 1.0485825538635254,grad_norm: 0.9999990191943393, iteration: 82617
loss: 0.9787851572036743,grad_norm: 0.7915650166315819, iteration: 82618
loss: 0.9992409944534302,grad_norm: 0.9421197827965054, iteration: 82619
loss: 1.0118659734725952,grad_norm: 0.9672352447895056, iteration: 82620
loss: 0.9890609383583069,grad_norm: 0.9219068414278353, iteration: 82621
loss: 1.0086637735366821,grad_norm: 0.9999991581917386, iteration: 82622
loss: 1.02822744846344,grad_norm: 0.9499538699554623, iteration: 82623
loss: 0.9389065504074097,grad_norm: 0.8456428765655407, iteration: 82624
loss: 0.9994688034057617,grad_norm: 0.9346838607985487, iteration: 82625
loss: 0.9739789962768555,grad_norm: 0.9999991153651766, iteration: 82626
loss: 1.032120943069458,grad_norm: 0.8381393895552126, iteration: 82627
loss: 1.0237709283828735,grad_norm: 0.9999992366574654, iteration: 82628
loss: 1.1634021997451782,grad_norm: 0.9999993877181768, iteration: 82629
loss: 1.063245415687561,grad_norm: 0.9999994428124812, iteration: 82630
loss: 0.9911177754402161,grad_norm: 0.9376327848150385, iteration: 82631
loss: 0.9634281396865845,grad_norm: 0.9924921020639684, iteration: 82632
loss: 0.9903806447982788,grad_norm: 0.999999050988714, iteration: 82633
loss: 0.948284387588501,grad_norm: 0.999999189391975, iteration: 82634
loss: 1.0081119537353516,grad_norm: 0.9004929068398895, iteration: 82635
loss: 1.0777008533477783,grad_norm: 0.9999992742928141, iteration: 82636
loss: 1.0016508102416992,grad_norm: 0.9999994134642864, iteration: 82637
loss: 0.9513900876045227,grad_norm: 0.7820119115475901, iteration: 82638
loss: 1.0941537618637085,grad_norm: 0.9999997576814438, iteration: 82639
loss: 1.0668615102767944,grad_norm: 0.9999998580236967, iteration: 82640
loss: 1.0090878009796143,grad_norm: 0.995010938903166, iteration: 82641
loss: 1.0118253231048584,grad_norm: 0.9999991870825572, iteration: 82642
loss: 1.0393182039260864,grad_norm: 0.9714534728160117, iteration: 82643
loss: 0.9818038940429688,grad_norm: 0.999999214467166, iteration: 82644
loss: 0.975054144859314,grad_norm: 0.8438985340096067, iteration: 82645
loss: 0.9827224612236023,grad_norm: 0.9063862854306797, iteration: 82646
loss: 0.9983965754508972,grad_norm: 0.8418634055594687, iteration: 82647
loss: 1.0148080587387085,grad_norm: 0.9874961578623053, iteration: 82648
loss: 1.002566933631897,grad_norm: 0.9999989616326622, iteration: 82649
loss: 0.988676130771637,grad_norm: 0.9999991712287835, iteration: 82650
loss: 1.0471211671829224,grad_norm: 0.9999991340824222, iteration: 82651
loss: 1.028480052947998,grad_norm: 0.9999993817190009, iteration: 82652
loss: 1.034324049949646,grad_norm: 0.9999993561578765, iteration: 82653
loss: 1.0381405353546143,grad_norm: 0.9999991687641325, iteration: 82654
loss: 0.9842923283576965,grad_norm: 0.9024558697883116, iteration: 82655
loss: 1.0030405521392822,grad_norm: 0.8843169807923994, iteration: 82656
loss: 1.0153801441192627,grad_norm: 0.9999992078727428, iteration: 82657
loss: 0.9852108955383301,grad_norm: 0.811436775933313, iteration: 82658
loss: 0.9734061360359192,grad_norm: 0.8758683443136988, iteration: 82659
loss: 0.9549748301506042,grad_norm: 0.9999990387360316, iteration: 82660
loss: 1.0696756839752197,grad_norm: 0.934122730315799, iteration: 82661
loss: 0.976427435874939,grad_norm: 0.9999990222539507, iteration: 82662
loss: 0.9870026707649231,grad_norm: 0.922899328221888, iteration: 82663
loss: 1.0106594562530518,grad_norm: 0.86618743610269, iteration: 82664
loss: 0.9995293617248535,grad_norm: 0.803522478963087, iteration: 82665
loss: 1.0439532995224,grad_norm: 0.9999999384695522, iteration: 82666
loss: 0.9633405804634094,grad_norm: 0.8987495985819071, iteration: 82667
loss: 1.0433117151260376,grad_norm: 0.9983507000681461, iteration: 82668
loss: 0.9849265813827515,grad_norm: 0.9499460587253293, iteration: 82669
loss: 1.000527262687683,grad_norm: 0.8723802156935686, iteration: 82670
loss: 0.9766689538955688,grad_norm: 0.9999990579151568, iteration: 82671
loss: 1.0161197185516357,grad_norm: 0.8965140270486078, iteration: 82672
loss: 0.9811000227928162,grad_norm: 0.8666711424070741, iteration: 82673
loss: 1.0645772218704224,grad_norm: 0.9999991006756446, iteration: 82674
loss: 0.9639744758605957,grad_norm: 0.9415697239621207, iteration: 82675
loss: 1.005653738975525,grad_norm: 0.8069512176671707, iteration: 82676
loss: 0.9826956391334534,grad_norm: 0.9133821375420157, iteration: 82677
loss: 0.9980568289756775,grad_norm: 0.9719183498428117, iteration: 82678
loss: 1.0061640739440918,grad_norm: 0.9999991493854632, iteration: 82679
loss: 1.0026224851608276,grad_norm: 0.9999990147601638, iteration: 82680
loss: 1.0150703191757202,grad_norm: 0.9999991822584527, iteration: 82681
loss: 1.011757493019104,grad_norm: 0.83761119016378, iteration: 82682
loss: 0.975355863571167,grad_norm: 0.8846473834737257, iteration: 82683
loss: 1.0041273832321167,grad_norm: 0.9338085822297194, iteration: 82684
loss: 1.1184605360031128,grad_norm: 0.8705444696675677, iteration: 82685
loss: 0.9939833283424377,grad_norm: 0.9999989862272848, iteration: 82686
loss: 0.9954512119293213,grad_norm: 0.9679543979054988, iteration: 82687
loss: 0.9754133820533752,grad_norm: 0.9531172325997733, iteration: 82688
loss: 1.0236406326293945,grad_norm: 0.8920580216477126, iteration: 82689
loss: 1.0745480060577393,grad_norm: 0.8953488114981794, iteration: 82690
loss: 0.9781903028488159,grad_norm: 0.9999990079457007, iteration: 82691
loss: 0.9728027582168579,grad_norm: 0.9347904470421226, iteration: 82692
loss: 1.010946273803711,grad_norm: 0.9999990697975326, iteration: 82693
loss: 0.9476088881492615,grad_norm: 0.9467661994597945, iteration: 82694
loss: 1.011220097541809,grad_norm: 0.9999990543608114, iteration: 82695
loss: 1.0376783609390259,grad_norm: 0.7676158371635854, iteration: 82696
loss: 1.0193732976913452,grad_norm: 0.9999992668588199, iteration: 82697
loss: 1.0420043468475342,grad_norm: 0.9999992502722953, iteration: 82698
loss: 1.0140494108200073,grad_norm: 0.9999991056883897, iteration: 82699
loss: 1.0156586170196533,grad_norm: 0.9583528380589137, iteration: 82700
loss: 1.042868733406067,grad_norm: 0.9999990867106247, iteration: 82701
loss: 0.9955241084098816,grad_norm: 0.8074644724180654, iteration: 82702
loss: 0.9748598337173462,grad_norm: 0.8619923660738347, iteration: 82703
loss: 1.0209057331085205,grad_norm: 0.9901665224590687, iteration: 82704
loss: 1.0570766925811768,grad_norm: 0.9999991589887223, iteration: 82705
loss: 1.0123400688171387,grad_norm: 0.8849887651294842, iteration: 82706
loss: 1.0039464235305786,grad_norm: 0.8649059787027586, iteration: 82707
loss: 0.9899526238441467,grad_norm: 0.9445271007649713, iteration: 82708
loss: 1.0243991613388062,grad_norm: 0.9999991525887143, iteration: 82709
loss: 1.0038177967071533,grad_norm: 0.9066514173541539, iteration: 82710
loss: 0.9974840879440308,grad_norm: 0.9846884757497512, iteration: 82711
loss: 1.039947509765625,grad_norm: 0.8651197468968227, iteration: 82712
loss: 1.0246059894561768,grad_norm: 0.8500710056809528, iteration: 82713
loss: 0.968021810054779,grad_norm: 0.8974261904489653, iteration: 82714
loss: 1.0319756269454956,grad_norm: 0.999999738685064, iteration: 82715
loss: 1.0286885499954224,grad_norm: 0.9999992502602277, iteration: 82716
loss: 1.0029163360595703,grad_norm: 0.767605261240869, iteration: 82717
loss: 0.9982216954231262,grad_norm: 0.8560681443278482, iteration: 82718
loss: 1.004414439201355,grad_norm: 0.9711938015882873, iteration: 82719
loss: 1.0138041973114014,grad_norm: 0.7275168904704236, iteration: 82720
loss: 1.0871485471725464,grad_norm: 0.9999992222510937, iteration: 82721
loss: 1.0152873992919922,grad_norm: 1.000000081415495, iteration: 82722
loss: 0.9957911372184753,grad_norm: 0.9999991268401146, iteration: 82723
loss: 1.0455055236816406,grad_norm: 0.9999995264340876, iteration: 82724
loss: 1.0789272785186768,grad_norm: 0.9999994515385835, iteration: 82725
loss: 1.0044007301330566,grad_norm: 0.9022195588229613, iteration: 82726
loss: 1.0376249551773071,grad_norm: 0.9178724325645092, iteration: 82727
loss: 1.0087083578109741,grad_norm: 0.9999990108341863, iteration: 82728
loss: 1.0090125799179077,grad_norm: 0.9789455110965636, iteration: 82729
loss: 1.0134810209274292,grad_norm: 0.9998023715170966, iteration: 82730
loss: 0.9784291982650757,grad_norm: 0.9468380389269417, iteration: 82731
loss: 1.0066516399383545,grad_norm: 0.9424465124292717, iteration: 82732
loss: 1.0123237371444702,grad_norm: 0.9999990661964471, iteration: 82733
loss: 0.9973899722099304,grad_norm: 0.9730382940422734, iteration: 82734
loss: 1.0385419130325317,grad_norm: 0.9999999112541831, iteration: 82735
loss: 1.1207237243652344,grad_norm: 0.999999690733487, iteration: 82736
loss: 0.9700912833213806,grad_norm: 0.921086984500286, iteration: 82737
loss: 1.0364761352539062,grad_norm: 0.9999998368682321, iteration: 82738
loss: 0.9971283078193665,grad_norm: 0.9004873073122531, iteration: 82739
loss: 0.9945021867752075,grad_norm: 0.9999990659416979, iteration: 82740
loss: 1.0270538330078125,grad_norm: 0.9999989284907279, iteration: 82741
loss: 1.02738618850708,grad_norm: 0.8866612825075085, iteration: 82742
loss: 1.0304566621780396,grad_norm: 0.999999101489475, iteration: 82743
loss: 0.9748126864433289,grad_norm: 0.9999995422809718, iteration: 82744
loss: 0.9869626760482788,grad_norm: 0.9249776813759314, iteration: 82745
loss: 0.9852666854858398,grad_norm: 0.9999991683067636, iteration: 82746
loss: 0.9783892631530762,grad_norm: 0.9186540690393691, iteration: 82747
loss: 1.0412451028823853,grad_norm: 0.9999989866364934, iteration: 82748
loss: 1.0682034492492676,grad_norm: 0.8781166124154252, iteration: 82749
loss: 1.0005561113357544,grad_norm: 0.9999991592416507, iteration: 82750
loss: 1.011654257774353,grad_norm: 0.8805654573297228, iteration: 82751
loss: 0.9764169454574585,grad_norm: 0.8331717350308522, iteration: 82752
loss: 0.9753801226615906,grad_norm: 0.9017995206107184, iteration: 82753
loss: 0.9859663248062134,grad_norm: 0.9554415613708624, iteration: 82754
loss: 0.9874688386917114,grad_norm: 0.999999172440907, iteration: 82755
loss: 0.988274872303009,grad_norm: 0.7656119573608836, iteration: 82756
loss: 0.987379789352417,grad_norm: 0.7924772669748869, iteration: 82757
loss: 1.0193639993667603,grad_norm: 0.9459522204374976, iteration: 82758
loss: 1.023939609527588,grad_norm: 0.9999990864116267, iteration: 82759
loss: 1.0044437646865845,grad_norm: 0.9079672133409512, iteration: 82760
loss: 0.9971478581428528,grad_norm: 0.9999991611333432, iteration: 82761
loss: 1.0252283811569214,grad_norm: 0.8306461461511371, iteration: 82762
loss: 1.0617142915725708,grad_norm: 0.9246507946109919, iteration: 82763
loss: 1.1116228103637695,grad_norm: 0.9999994963454144, iteration: 82764
loss: 0.9792332053184509,grad_norm: 0.9875623590815441, iteration: 82765
loss: 1.0026441812515259,grad_norm: 0.8956160552668057, iteration: 82766
loss: 0.9963117241859436,grad_norm: 0.9293744877146671, iteration: 82767
loss: 1.099068284034729,grad_norm: 0.9899553514525181, iteration: 82768
loss: 1.0000576972961426,grad_norm: 0.9261349647136967, iteration: 82769
loss: 1.004319429397583,grad_norm: 0.9133750604630719, iteration: 82770
loss: 1.0548391342163086,grad_norm: 0.9999991205591896, iteration: 82771
loss: 0.9982133507728577,grad_norm: 0.9442782463654543, iteration: 82772
loss: 0.9920153617858887,grad_norm: 0.9360922551570886, iteration: 82773
loss: 0.999798595905304,grad_norm: 0.9288023731751318, iteration: 82774
loss: 1.0568267107009888,grad_norm: 0.9999991198141586, iteration: 82775
loss: 0.979953944683075,grad_norm: 0.8667014995311857, iteration: 82776
loss: 1.0080170631408691,grad_norm: 0.9999996186865165, iteration: 82777
loss: 0.9967835545539856,grad_norm: 0.8855497639839526, iteration: 82778
loss: 1.024807095527649,grad_norm: 0.9326542333204452, iteration: 82779
loss: 1.0923616886138916,grad_norm: 0.9999990492229129, iteration: 82780
loss: 0.9790493249893188,grad_norm: 0.8220459965972805, iteration: 82781
loss: 1.009591817855835,grad_norm: 0.9355083872646052, iteration: 82782
loss: 0.9515745639801025,grad_norm: 0.8670396401225395, iteration: 82783
loss: 0.9721806049346924,grad_norm: 0.999999005945119, iteration: 82784
loss: 1.0093532800674438,grad_norm: 0.8136919206947952, iteration: 82785
loss: 1.0165233612060547,grad_norm: 0.8431561488119627, iteration: 82786
loss: 1.0176923274993896,grad_norm: 0.8958766165850447, iteration: 82787
loss: 1.0023605823516846,grad_norm: 0.9999990309167053, iteration: 82788
loss: 1.0484304428100586,grad_norm: 0.9999992782013771, iteration: 82789
loss: 0.994899570941925,grad_norm: 0.9999991948272632, iteration: 82790
loss: 1.0120909214019775,grad_norm: 0.9999991131183468, iteration: 82791
loss: 1.0417602062225342,grad_norm: 0.8770130653795387, iteration: 82792
loss: 1.019970417022705,grad_norm: 0.9106339902380272, iteration: 82793
loss: 1.0020159482955933,grad_norm: 0.9999989853275753, iteration: 82794
loss: 0.9799001216888428,grad_norm: 0.8114554552056151, iteration: 82795
loss: 1.0798043012619019,grad_norm: 0.9999995058586204, iteration: 82796
loss: 1.0091919898986816,grad_norm: 0.7600336966373864, iteration: 82797
loss: 1.0186315774917603,grad_norm: 0.999999848302449, iteration: 82798
loss: 1.0950654745101929,grad_norm: 0.9999993156624437, iteration: 82799
loss: 0.9851617813110352,grad_norm: 0.9999990937336422, iteration: 82800
loss: 1.004744529724121,grad_norm: 0.7838936534533713, iteration: 82801
loss: 1.0237942934036255,grad_norm: 0.9999989943479062, iteration: 82802
loss: 1.0391356945037842,grad_norm: 0.9999990840334806, iteration: 82803
loss: 0.9941596388816833,grad_norm: 0.9999989853097226, iteration: 82804
loss: 1.0218300819396973,grad_norm: 0.8987037239415661, iteration: 82805
loss: 1.0150054693222046,grad_norm: 0.8222701455884995, iteration: 82806
loss: 1.000131368637085,grad_norm: 0.9999990995183291, iteration: 82807
loss: 1.0134859085083008,grad_norm: 0.9999991870762491, iteration: 82808
loss: 1.0230796337127686,grad_norm: 0.9999992392783249, iteration: 82809
loss: 1.0299805402755737,grad_norm: 0.9999991261052923, iteration: 82810
loss: 0.9917404651641846,grad_norm: 0.9999993711086582, iteration: 82811
loss: 1.0259931087493896,grad_norm: 0.7772552482440696, iteration: 82812
loss: 1.0102895498275757,grad_norm: 0.9999991624698367, iteration: 82813
loss: 0.9736354947090149,grad_norm: 0.9527980672796271, iteration: 82814
loss: 0.9638269543647766,grad_norm: 0.8870930451911353, iteration: 82815
loss: 0.994369387626648,grad_norm: 0.9355877140462744, iteration: 82816
loss: 0.9925018548965454,grad_norm: 0.9999992221575116, iteration: 82817
loss: 0.9942799210548401,grad_norm: 0.965641625389253, iteration: 82818
loss: 1.0135498046875,grad_norm: 0.9999990458649388, iteration: 82819
loss: 1.0079519748687744,grad_norm: 0.9485003298271388, iteration: 82820
loss: 1.0527548789978027,grad_norm: 0.8990539212226549, iteration: 82821
loss: 1.0147866010665894,grad_norm: 0.8759091200599881, iteration: 82822
loss: 1.0834619998931885,grad_norm: 0.9999994851833757, iteration: 82823
loss: 0.9912832975387573,grad_norm: 0.9626551989940602, iteration: 82824
loss: 1.017921805381775,grad_norm: 0.9176337716984817, iteration: 82825
loss: 0.9854466319084167,grad_norm: 0.8952973503245416, iteration: 82826
loss: 0.9572702646255493,grad_norm: 0.9999990806533404, iteration: 82827
loss: 1.017387866973877,grad_norm: 0.9172987723269745, iteration: 82828
loss: 0.9986220002174377,grad_norm: 0.7609455996337018, iteration: 82829
loss: 0.9486221671104431,grad_norm: 0.9999989859988099, iteration: 82830
loss: 1.0812268257141113,grad_norm: 0.9099553894018968, iteration: 82831
loss: 0.9889468550682068,grad_norm: 0.7909014742923011, iteration: 82832
loss: 1.0965207815170288,grad_norm: 0.9999992449977603, iteration: 82833
loss: 1.0573495626449585,grad_norm: 0.999999164052541, iteration: 82834
loss: 0.999182403087616,grad_norm: 0.9792173092508749, iteration: 82835
loss: 0.9909711480140686,grad_norm: 0.7608816237418924, iteration: 82836
loss: 0.9807844161987305,grad_norm: 0.9999990538985898, iteration: 82837
loss: 1.0130219459533691,grad_norm: 0.8761532628195289, iteration: 82838
loss: 1.0018831491470337,grad_norm: 0.9592951071087773, iteration: 82839
loss: 0.9893739223480225,grad_norm: 0.9999991202104294, iteration: 82840
loss: 0.9863927960395813,grad_norm: 0.9999991511244387, iteration: 82841
loss: 0.9795105457305908,grad_norm: 0.9174949639046348, iteration: 82842
loss: 1.027857780456543,grad_norm: 0.8283070094844127, iteration: 82843
loss: 0.995064914226532,grad_norm: 0.9999992005661038, iteration: 82844
loss: 0.9846723675727844,grad_norm: 0.9851833939328782, iteration: 82845
loss: 1.036853313446045,grad_norm: 0.9173703916699838, iteration: 82846
loss: 0.9888275861740112,grad_norm: 0.8410303307490168, iteration: 82847
loss: 1.0013810396194458,grad_norm: 0.8954645532921472, iteration: 82848
loss: 0.9870501160621643,grad_norm: 0.9999995110948537, iteration: 82849
loss: 1.0182697772979736,grad_norm: 0.9999990403419792, iteration: 82850
loss: 1.0009340047836304,grad_norm: 0.8752840212828692, iteration: 82851
loss: 0.9899448752403259,grad_norm: 0.999999150765683, iteration: 82852
loss: 0.9768337607383728,grad_norm: 0.8993627983736302, iteration: 82853
loss: 0.9981380701065063,grad_norm: 0.999999243880781, iteration: 82854
loss: 1.0123307704925537,grad_norm: 0.9024410597415939, iteration: 82855
loss: 1.0226303339004517,grad_norm: 0.7741591456879141, iteration: 82856
loss: 1.0411641597747803,grad_norm: 0.8326547863410756, iteration: 82857
loss: 1.007055401802063,grad_norm: 0.6997414011062439, iteration: 82858
loss: 1.0563621520996094,grad_norm: 0.8775356999537741, iteration: 82859
loss: 0.9934275150299072,grad_norm: 0.9743270020647724, iteration: 82860
loss: 1.0017484426498413,grad_norm: 0.9063950654683433, iteration: 82861
loss: 1.0419780015945435,grad_norm: 0.8919456855782577, iteration: 82862
loss: 0.9613926410675049,grad_norm: 0.9143767633604216, iteration: 82863
loss: 0.9941757917404175,grad_norm: 0.9999991753054525, iteration: 82864
loss: 1.0061696767807007,grad_norm: 0.9999991204004244, iteration: 82865
loss: 1.0116642713546753,grad_norm: 0.922374659061984, iteration: 82866
loss: 1.0134660005569458,grad_norm: 0.9276093908088751, iteration: 82867
loss: 1.0255006551742554,grad_norm: 0.869666074687306, iteration: 82868
loss: 1.0271222591400146,grad_norm: 0.9999991006331101, iteration: 82869
loss: 1.0074405670166016,grad_norm: 0.9999991615321691, iteration: 82870
loss: 0.9781928062438965,grad_norm: 0.9618504045388787, iteration: 82871
loss: 0.9992767572402954,grad_norm: 0.9999993895353101, iteration: 82872
loss: 1.0064862966537476,grad_norm: 0.7914548817213752, iteration: 82873
loss: 0.9953948259353638,grad_norm: 0.9999990188981607, iteration: 82874
loss: 0.9907642006874084,grad_norm: 0.9999997906775114, iteration: 82875
loss: 1.0797128677368164,grad_norm: 0.9999991736770413, iteration: 82876
loss: 0.9973829388618469,grad_norm: 0.999999051067317, iteration: 82877
loss: 0.9754395484924316,grad_norm: 0.9695916806273994, iteration: 82878
loss: 1.0204166173934937,grad_norm: 0.9153511968862273, iteration: 82879
loss: 1.0135900974273682,grad_norm: 0.7997478287009356, iteration: 82880
loss: 0.9957894086837769,grad_norm: 0.9189921119874849, iteration: 82881
loss: 0.9897181391716003,grad_norm: 0.7896971087067229, iteration: 82882
loss: 1.0381739139556885,grad_norm: 0.9999991836956287, iteration: 82883
loss: 1.0108433961868286,grad_norm: 0.9756752427429147, iteration: 82884
loss: 1.0357890129089355,grad_norm: 0.8186643324100609, iteration: 82885
loss: 1.0446302890777588,grad_norm: 0.9687474630250864, iteration: 82886
loss: 0.9908626079559326,grad_norm: 0.8418228625299431, iteration: 82887
loss: 1.0048388242721558,grad_norm: 0.9999994949404704, iteration: 82888
loss: 1.0105763673782349,grad_norm: 0.9109780402984691, iteration: 82889
loss: 1.0120302438735962,grad_norm: 0.8735203159397208, iteration: 82890
loss: 1.072279453277588,grad_norm: 0.9999999203492591, iteration: 82891
loss: 1.0185136795043945,grad_norm: 0.9577886056957977, iteration: 82892
loss: 0.9626854062080383,grad_norm: 0.849426661814503, iteration: 82893
loss: 1.007266879081726,grad_norm: 0.9999991515264752, iteration: 82894
loss: 1.0378834009170532,grad_norm: 0.9779239933517614, iteration: 82895
loss: 0.978343665599823,grad_norm: 0.9330424072558294, iteration: 82896
loss: 1.0604051351547241,grad_norm: 0.9999990437342672, iteration: 82897
loss: 0.9770357012748718,grad_norm: 0.8443501243681714, iteration: 82898
loss: 0.9958081841468811,grad_norm: 0.9999991570163873, iteration: 82899
loss: 1.003196120262146,grad_norm: 0.999999056232921, iteration: 82900
loss: 1.026595950126648,grad_norm: 0.8135639215151917, iteration: 82901
loss: 0.970675528049469,grad_norm: 0.9745569169303688, iteration: 82902
loss: 0.9844968914985657,grad_norm: 0.8892330435018515, iteration: 82903
loss: 0.999447762966156,grad_norm: 0.9999992797219576, iteration: 82904
loss: 1.0390485525131226,grad_norm: 0.99999914309761, iteration: 82905
loss: 0.9908804893493652,grad_norm: 0.9379594164246231, iteration: 82906
loss: 0.9842209219932556,grad_norm: 0.9595551879647648, iteration: 82907
loss: 0.9835987091064453,grad_norm: 0.7270536905957643, iteration: 82908
loss: 1.0745235681533813,grad_norm: 0.9999994522380592, iteration: 82909
loss: 1.0261571407318115,grad_norm: 0.8614514136270008, iteration: 82910
loss: 0.961556613445282,grad_norm: 0.9999990884399219, iteration: 82911
loss: 1.0168521404266357,grad_norm: 0.9074140230398592, iteration: 82912
loss: 1.0091155767440796,grad_norm: 0.8422885856698682, iteration: 82913
loss: 1.0370129346847534,grad_norm: 0.9999996906407039, iteration: 82914
loss: 0.9962970614433289,grad_norm: 0.7757605318873014, iteration: 82915
loss: 1.005776047706604,grad_norm: 0.8979587955915479, iteration: 82916
loss: 1.0499505996704102,grad_norm: 0.9999990543635272, iteration: 82917
loss: 1.0094807147979736,grad_norm: 0.9999992674632172, iteration: 82918
loss: 0.9990200400352478,grad_norm: 0.9077451096926871, iteration: 82919
loss: 1.0174652338027954,grad_norm: 0.903016964386436, iteration: 82920
loss: 0.9835970401763916,grad_norm: 0.905415667322604, iteration: 82921
loss: 1.0140681266784668,grad_norm: 0.810300078844781, iteration: 82922
loss: 0.9971065521240234,grad_norm: 0.999999217006678, iteration: 82923
loss: 0.9650159478187561,grad_norm: 0.7130108351527868, iteration: 82924
loss: 1.0206737518310547,grad_norm: 0.9162371924787289, iteration: 82925
loss: 0.983880877494812,grad_norm: 0.9999989792506868, iteration: 82926
loss: 1.0139883756637573,grad_norm: 0.9999991264231808, iteration: 82927
loss: 1.063905954360962,grad_norm: 0.9782339582463202, iteration: 82928
loss: 1.0776110887527466,grad_norm: 1.0000000166368141, iteration: 82929
loss: 1.0160715579986572,grad_norm: 0.8233854189661975, iteration: 82930
loss: 1.004978060722351,grad_norm: 0.9577014170662672, iteration: 82931
loss: 0.9955269694328308,grad_norm: 0.8701458243532736, iteration: 82932
loss: 0.9828097224235535,grad_norm: 0.8508404496096986, iteration: 82933
loss: 0.9977491497993469,grad_norm: 0.8781923564323998, iteration: 82934
loss: 1.0142768621444702,grad_norm: 0.8114921719628191, iteration: 82935
loss: 1.0189052820205688,grad_norm: 0.9999992175099734, iteration: 82936
loss: 0.9942906498908997,grad_norm: 0.9999992160313561, iteration: 82937
loss: 1.0792104005813599,grad_norm: 0.9999991878142401, iteration: 82938
loss: 1.006364345550537,grad_norm: 0.7660712676443956, iteration: 82939
loss: 0.996614396572113,grad_norm: 0.9845128623008831, iteration: 82940
loss: 1.0248552560806274,grad_norm: 0.9999991810113278, iteration: 82941
loss: 1.0223288536071777,grad_norm: 0.9999990472068702, iteration: 82942
loss: 1.0285793542861938,grad_norm: 0.9999989040174065, iteration: 82943
loss: 1.0120742321014404,grad_norm: 0.9999991180515909, iteration: 82944
loss: 1.0125092267990112,grad_norm: 0.8878377409788548, iteration: 82945
loss: 1.0011250972747803,grad_norm: 0.9852614739337815, iteration: 82946
loss: 0.979354202747345,grad_norm: 0.8762640092059283, iteration: 82947
loss: 1.019602656364441,grad_norm: 0.9517279477689375, iteration: 82948
loss: 0.955454409122467,grad_norm: 0.8420148054076134, iteration: 82949
loss: 1.0875157117843628,grad_norm: 0.9514302892103478, iteration: 82950
loss: 0.9819645285606384,grad_norm: 0.9999991547569784, iteration: 82951
loss: 0.9735620617866516,grad_norm: 0.9999991555709474, iteration: 82952
loss: 0.9832940697669983,grad_norm: 0.8749845611490097, iteration: 82953
loss: 1.0029072761535645,grad_norm: 0.9176152869495483, iteration: 82954
loss: 0.9884448051452637,grad_norm: 0.999999906770364, iteration: 82955
loss: 0.9654141068458557,grad_norm: 0.8228822677857249, iteration: 82956
loss: 0.9960744976997375,grad_norm: 0.7420896955519838, iteration: 82957
loss: 0.9759636521339417,grad_norm: 0.9999992243398096, iteration: 82958
loss: 0.9942077398300171,grad_norm: 0.8875563689518935, iteration: 82959
loss: 0.9928452968597412,grad_norm: 0.8705974050419343, iteration: 82960
loss: 1.0374799966812134,grad_norm: 0.8236503397746235, iteration: 82961
loss: 0.9881561398506165,grad_norm: 0.7906800143790067, iteration: 82962
loss: 1.0092090368270874,grad_norm: 0.9120077531159748, iteration: 82963
loss: 1.0184885263442993,grad_norm: 0.911516639616015, iteration: 82964
loss: 0.9828743934631348,grad_norm: 0.8678586878271879, iteration: 82965
loss: 1.0363056659698486,grad_norm: 0.9982657399497074, iteration: 82966
loss: 1.0229753255844116,grad_norm: 0.9999992602860349, iteration: 82967
loss: 0.9940431118011475,grad_norm: 0.9065648812085423, iteration: 82968
loss: 1.0125045776367188,grad_norm: 0.9999991888892205, iteration: 82969
loss: 1.0094842910766602,grad_norm: 0.8765694483008477, iteration: 82970
loss: 1.0281931161880493,grad_norm: 0.9999994040457979, iteration: 82971
loss: 0.9866803884506226,grad_norm: 0.9343181620090741, iteration: 82972
loss: 0.9806180596351624,grad_norm: 0.9999990988885042, iteration: 82973
loss: 0.9753559231758118,grad_norm: 0.8376395431630771, iteration: 82974
loss: 1.0084137916564941,grad_norm: 0.8799504392892498, iteration: 82975
loss: 0.9936925768852234,grad_norm: 0.822937159441167, iteration: 82976
loss: 1.003583550453186,grad_norm: 0.9999989785330091, iteration: 82977
loss: 0.9523231387138367,grad_norm: 0.9444735919033117, iteration: 82978
loss: 0.9823987483978271,grad_norm: 0.8103619505871829, iteration: 82979
loss: 1.0137332677841187,grad_norm: 0.9999994460891494, iteration: 82980
loss: 0.996546745300293,grad_norm: 0.9900672672961665, iteration: 82981
loss: 0.9852063059806824,grad_norm: 0.8051921587531115, iteration: 82982
loss: 1.0024381875991821,grad_norm: 0.8670363754816263, iteration: 82983
loss: 1.0097512006759644,grad_norm: 0.7552528136826556, iteration: 82984
loss: 0.9959743618965149,grad_norm: 0.9999990860323287, iteration: 82985
loss: 0.9628180265426636,grad_norm: 0.9999992004645298, iteration: 82986
loss: 0.9937913417816162,grad_norm: 0.7956836886316951, iteration: 82987
loss: 0.9939544796943665,grad_norm: 0.9999990942675372, iteration: 82988
loss: 0.9952602386474609,grad_norm: 0.9333367349688121, iteration: 82989
loss: 0.9862953424453735,grad_norm: 0.7986608340477505, iteration: 82990
loss: 0.9806679487228394,grad_norm: 0.8429607302962275, iteration: 82991
loss: 0.9485504031181335,grad_norm: 0.9999993196317868, iteration: 82992
loss: 1.0167882442474365,grad_norm: 0.9880351448319032, iteration: 82993
loss: 1.0243709087371826,grad_norm: 0.9999991718412296, iteration: 82994
loss: 0.9702158570289612,grad_norm: 0.7638949283159124, iteration: 82995
loss: 0.9963235855102539,grad_norm: 0.9484290949386297, iteration: 82996
loss: 1.028701901435852,grad_norm: 0.9113330916607721, iteration: 82997
loss: 0.9899691343307495,grad_norm: 0.9999993970719393, iteration: 82998
loss: 0.9650336503982544,grad_norm: 0.8887309503896753, iteration: 82999
loss: 0.9900805354118347,grad_norm: 0.9999991681061232, iteration: 83000
loss: 1.0128687620162964,grad_norm: 0.9600601324597456, iteration: 83001
loss: 0.9694891571998596,grad_norm: 0.9237092951315073, iteration: 83002
loss: 0.9878354668617249,grad_norm: 0.7356776397519244, iteration: 83003
loss: 1.008821725845337,grad_norm: 0.9999992741941208, iteration: 83004
loss: 0.977577805519104,grad_norm: 0.8007672927427262, iteration: 83005
loss: 1.0345126390457153,grad_norm: 0.9991608915317868, iteration: 83006
loss: 0.997442901134491,grad_norm: 0.9999990946317073, iteration: 83007
loss: 1.0848846435546875,grad_norm: 0.9999998797406373, iteration: 83008
loss: 0.9675629138946533,grad_norm: 0.9375974911406466, iteration: 83009
loss: 0.9962493777275085,grad_norm: 0.9669022475217559, iteration: 83010
loss: 1.0037811994552612,grad_norm: 0.876241174836104, iteration: 83011
loss: 0.9989519715309143,grad_norm: 0.9999991929484798, iteration: 83012
loss: 0.9783132672309875,grad_norm: 0.8491152779931395, iteration: 83013
loss: 0.9792811274528503,grad_norm: 0.9999991266679842, iteration: 83014
loss: 1.0421557426452637,grad_norm: 0.9999995359838599, iteration: 83015
loss: 1.0300171375274658,grad_norm: 0.8381882631588795, iteration: 83016
loss: 1.033347725868225,grad_norm: 0.9558186740177943, iteration: 83017
loss: 0.9990097284317017,grad_norm: 0.896406749751712, iteration: 83018
loss: 0.9593414068222046,grad_norm: 0.8634845218234313, iteration: 83019
loss: 0.9945499897003174,grad_norm: 0.9999991372828225, iteration: 83020
loss: 1.0470856428146362,grad_norm: 0.9999990407456453, iteration: 83021
loss: 1.0506370067596436,grad_norm: 0.8888862454837146, iteration: 83022
loss: 1.0183049440383911,grad_norm: 0.9999991988430077, iteration: 83023
loss: 1.04526948928833,grad_norm: 0.9725901957506198, iteration: 83024
loss: 1.0080583095550537,grad_norm: 0.9762447026807614, iteration: 83025
loss: 0.9963659644126892,grad_norm: 0.9011066797378829, iteration: 83026
loss: 1.165827751159668,grad_norm: 0.9999996934496448, iteration: 83027
loss: 1.0004080533981323,grad_norm: 0.963190546554731, iteration: 83028
loss: 1.0075241327285767,grad_norm: 0.999999598494866, iteration: 83029
loss: 1.0165364742279053,grad_norm: 0.8314774027855814, iteration: 83030
loss: 1.011268973350525,grad_norm: 0.8779949488041096, iteration: 83031
loss: 1.033909559249878,grad_norm: 0.9003552935757752, iteration: 83032
loss: 1.0233992338180542,grad_norm: 0.9830057510640426, iteration: 83033
loss: 1.0093461275100708,grad_norm: 0.999999094910783, iteration: 83034
loss: 1.027029037475586,grad_norm: 0.8068827982519594, iteration: 83035
loss: 1.0004667043685913,grad_norm: 0.8932694245120029, iteration: 83036
loss: 1.0491136312484741,grad_norm: 0.824306242669449, iteration: 83037
loss: 1.0202012062072754,grad_norm: 0.9999996010242713, iteration: 83038
loss: 0.9847474098205566,grad_norm: 0.9136965219629265, iteration: 83039
loss: 1.0056706666946411,grad_norm: 0.7992455692832954, iteration: 83040
loss: 1.0073775053024292,grad_norm: 0.9765959539740564, iteration: 83041
loss: 1.0320719480514526,grad_norm: 0.9999992748982444, iteration: 83042
loss: 1.0160869359970093,grad_norm: 0.9999996181790852, iteration: 83043
loss: 0.9885447025299072,grad_norm: 0.9047220526736678, iteration: 83044
loss: 1.0255489349365234,grad_norm: 0.8195938594773988, iteration: 83045
loss: 1.0105921030044556,grad_norm: 0.812584549328073, iteration: 83046
loss: 0.9906015992164612,grad_norm: 0.9999994025058339, iteration: 83047
loss: 0.975334882736206,grad_norm: 0.9999995187699346, iteration: 83048
loss: 1.0030642747879028,grad_norm: 0.9930754599100895, iteration: 83049
loss: 1.0269196033477783,grad_norm: 0.8516294754640902, iteration: 83050
loss: 1.0155974626541138,grad_norm: 0.8101518366862668, iteration: 83051
loss: 0.9819625616073608,grad_norm: 0.999999195381525, iteration: 83052
loss: 0.9943252205848694,grad_norm: 0.9999991536157449, iteration: 83053
loss: 0.9961954355239868,grad_norm: 0.9054794639323591, iteration: 83054
loss: 1.0124197006225586,grad_norm: 0.8435727399030569, iteration: 83055
loss: 0.9957135915756226,grad_norm: 0.8762878764956001, iteration: 83056
loss: 0.9921843409538269,grad_norm: 0.9999993887325446, iteration: 83057
loss: 1.0945128202438354,grad_norm: 0.9999991720104787, iteration: 83058
loss: 1.036220908164978,grad_norm: 0.9148562411984245, iteration: 83059
loss: 0.9714080691337585,grad_norm: 0.92840159007553, iteration: 83060
loss: 1.0456722974777222,grad_norm: 0.836226803031315, iteration: 83061
loss: 1.03827965259552,grad_norm: 0.9388713193670287, iteration: 83062
loss: 0.9856274127960205,grad_norm: 0.9999991251352103, iteration: 83063
loss: 0.994738757610321,grad_norm: 0.8830918082540222, iteration: 83064
loss: 1.0078662633895874,grad_norm: 0.8599881328298604, iteration: 83065
loss: 0.9993011951446533,grad_norm: 0.999999150214305, iteration: 83066
loss: 1.0476152896881104,grad_norm: 0.9001806971119148, iteration: 83067
loss: 1.005230188369751,grad_norm: 0.9999990303468296, iteration: 83068
loss: 0.9806406497955322,grad_norm: 0.9133351086811551, iteration: 83069
loss: 0.9781670570373535,grad_norm: 0.882108187439287, iteration: 83070
loss: 0.9940906167030334,grad_norm: 0.9072629178074064, iteration: 83071
loss: 1.03323495388031,grad_norm: 0.9428891116540692, iteration: 83072
loss: 0.9744648933410645,grad_norm: 0.9837135463387506, iteration: 83073
loss: 0.9918573498725891,grad_norm: 0.9043611847147524, iteration: 83074
loss: 0.9995954036712646,grad_norm: 0.9999990301043371, iteration: 83075
loss: 0.9833114743232727,grad_norm: 0.8565064247328278, iteration: 83076
loss: 0.9981905221939087,grad_norm: 0.9999991923207272, iteration: 83077
loss: 0.9838964343070984,grad_norm: 0.9129255126226894, iteration: 83078
loss: 0.9943939447402954,grad_norm: 0.994212962161575, iteration: 83079
loss: 0.9808869957923889,grad_norm: 0.8102960330761074, iteration: 83080
loss: 0.9935297966003418,grad_norm: 0.999999044485324, iteration: 83081
loss: 0.9928166270256042,grad_norm: 0.8702346815548657, iteration: 83082
loss: 1.043911337852478,grad_norm: 0.9882042829779479, iteration: 83083
loss: 0.9993306398391724,grad_norm: 0.8983517532810716, iteration: 83084
loss: 0.958418607711792,grad_norm: 0.9999991282268559, iteration: 83085
loss: 0.9962614178657532,grad_norm: 0.9358184144978102, iteration: 83086
loss: 1.097009301185608,grad_norm: 0.9391593206478536, iteration: 83087
loss: 0.9487997889518738,grad_norm: 0.9999991360781101, iteration: 83088
loss: 1.0219930410385132,grad_norm: 0.9531738502606846, iteration: 83089
loss: 0.9997594952583313,grad_norm: 0.9999993285699739, iteration: 83090
loss: 1.0024033784866333,grad_norm: 0.8496426962706609, iteration: 83091
loss: 1.0092146396636963,grad_norm: 0.8717964440732782, iteration: 83092
loss: 1.0053044557571411,grad_norm: 0.9999989671913306, iteration: 83093
loss: 0.9960330128669739,grad_norm: 0.9999990859909007, iteration: 83094
loss: 1.0062159299850464,grad_norm: 0.8453267581395733, iteration: 83095
loss: 1.0552332401275635,grad_norm: 0.9999993786270641, iteration: 83096
loss: 0.9866747260093689,grad_norm: 0.9414962981326835, iteration: 83097
loss: 1.0814391374588013,grad_norm: 0.999999205428887, iteration: 83098
loss: 1.0446940660476685,grad_norm: 0.9404067928273603, iteration: 83099
loss: 0.9906253814697266,grad_norm: 0.8829972607939056, iteration: 83100
loss: 1.0237064361572266,grad_norm: 0.8416863601511615, iteration: 83101
loss: 0.9771295189857483,grad_norm: 0.9165235881968112, iteration: 83102
loss: 0.9937527179718018,grad_norm: 0.7807697310388192, iteration: 83103
loss: 0.9834528565406799,grad_norm: 0.9999990880574413, iteration: 83104
loss: 1.0346451997756958,grad_norm: 0.9999990603098353, iteration: 83105
loss: 1.0281016826629639,grad_norm: 0.9999990753225378, iteration: 83106
loss: 1.011470913887024,grad_norm: 0.8859068623185652, iteration: 83107
loss: 1.0076512098312378,grad_norm: 0.9999991127725583, iteration: 83108
loss: 1.021696925163269,grad_norm: 0.999999274590014, iteration: 83109
loss: 1.009531021118164,grad_norm: 0.9474753872879043, iteration: 83110
loss: 1.0207414627075195,grad_norm: 0.9999992762579362, iteration: 83111
loss: 1.0189756155014038,grad_norm: 0.8193178832440388, iteration: 83112
loss: 1.0568779706954956,grad_norm: 0.9999992788521941, iteration: 83113
loss: 1.0153822898864746,grad_norm: 0.9999992308586343, iteration: 83114
loss: 1.0311362743377686,grad_norm: 0.9535425923150654, iteration: 83115
loss: 1.0253701210021973,grad_norm: 0.9083286991271242, iteration: 83116
loss: 0.9966989159584045,grad_norm: 0.9999991938342868, iteration: 83117
loss: 0.9743993878364563,grad_norm: 0.8699054058218482, iteration: 83118
loss: 1.0015064477920532,grad_norm: 0.9999992671231634, iteration: 83119
loss: 1.005630373954773,grad_norm: 0.9824414443956755, iteration: 83120
loss: 0.9789910316467285,grad_norm: 0.8597843096537601, iteration: 83121
loss: 1.0757139921188354,grad_norm: 0.9999991247085351, iteration: 83122
loss: 1.0300546884536743,grad_norm: 0.9999990934671006, iteration: 83123
loss: 0.9967098832130432,grad_norm: 0.9096261212335782, iteration: 83124
loss: 0.9984920620918274,grad_norm: 0.939549150151293, iteration: 83125
loss: 1.0034797191619873,grad_norm: 0.9305833759623312, iteration: 83126
loss: 1.0254037380218506,grad_norm: 0.9356689552481239, iteration: 83127
loss: 1.0283300876617432,grad_norm: 0.999999311916872, iteration: 83128
loss: 1.1398756504058838,grad_norm: 0.9999996362592426, iteration: 83129
loss: 1.0315003395080566,grad_norm: 0.9999992904574673, iteration: 83130
loss: 0.9838690161705017,grad_norm: 0.9999989922770002, iteration: 83131
loss: 1.0192519426345825,grad_norm: 0.9999992716416485, iteration: 83132
loss: 1.0135295391082764,grad_norm: 0.8708430657536935, iteration: 83133
loss: 1.0343906879425049,grad_norm: 0.9330591100734117, iteration: 83134
loss: 1.0172673463821411,grad_norm: 0.9999993241300473, iteration: 83135
loss: 1.001460313796997,grad_norm: 0.9741950991699959, iteration: 83136
loss: 1.0009671449661255,grad_norm: 0.889817591305554, iteration: 83137
loss: 0.977342963218689,grad_norm: 0.9395329499019673, iteration: 83138
loss: 0.978536069393158,grad_norm: 0.8942772588563342, iteration: 83139
loss: 1.0101227760314941,grad_norm: 0.9999996070102507, iteration: 83140
loss: 1.0047924518585205,grad_norm: 0.9999992525469994, iteration: 83141
loss: 1.0138261318206787,grad_norm: 0.9999992939103659, iteration: 83142
loss: 1.0098350048065186,grad_norm: 0.8819471365359023, iteration: 83143
loss: 1.006640911102295,grad_norm: 0.986650693149761, iteration: 83144
loss: 1.005624532699585,grad_norm: 0.9288597063836609, iteration: 83145
loss: 1.0016112327575684,grad_norm: 0.9999992653242284, iteration: 83146
loss: 1.012070655822754,grad_norm: 0.9399673524416983, iteration: 83147
loss: 0.9769099950790405,grad_norm: 0.9999991300368687, iteration: 83148
loss: 0.9879278540611267,grad_norm: 0.8218169957756108, iteration: 83149
loss: 1.007222056388855,grad_norm: 0.999999116471735, iteration: 83150
loss: 1.0006755590438843,grad_norm: 0.9916751278064646, iteration: 83151
loss: 1.043991208076477,grad_norm: 0.8243238624178288, iteration: 83152
loss: 0.9889156818389893,grad_norm: 0.8442995956144151, iteration: 83153
loss: 0.9991327524185181,grad_norm: 0.9540889729763805, iteration: 83154
loss: 1.032680869102478,grad_norm: 0.8174100276067957, iteration: 83155
loss: 0.9869588613510132,grad_norm: 0.7708972605355928, iteration: 83156
loss: 0.9884958863258362,grad_norm: 0.979829834359239, iteration: 83157
loss: 0.9937782883644104,grad_norm: 0.730402459473258, iteration: 83158
loss: 1.013819694519043,grad_norm: 0.9999991471560669, iteration: 83159
loss: 1.0095642805099487,grad_norm: 0.9999991224704436, iteration: 83160
loss: 1.0734435319900513,grad_norm: 0.9999995205773832, iteration: 83161
loss: 1.0364407300949097,grad_norm: 0.8194786792797562, iteration: 83162
loss: 0.9405310750007629,grad_norm: 0.8542115442310139, iteration: 83163
loss: 1.009459376335144,grad_norm: 0.9520861536484618, iteration: 83164
loss: 1.0222338438034058,grad_norm: 0.8861513361710466, iteration: 83165
loss: 1.0423575639724731,grad_norm: 0.9988121955320753, iteration: 83166
loss: 1.0074094533920288,grad_norm: 0.7896828753370378, iteration: 83167
loss: 1.0072402954101562,grad_norm: 0.9426066417635587, iteration: 83168
loss: 0.9990257024765015,grad_norm: 0.8968750686608219, iteration: 83169
loss: 0.9985204935073853,grad_norm: 0.9333243993004839, iteration: 83170
loss: 1.0028469562530518,grad_norm: 0.9680034276259889, iteration: 83171
loss: 1.0014389753341675,grad_norm: 0.999999139543446, iteration: 83172
loss: 0.9781132936477661,grad_norm: 0.9942977036366637, iteration: 83173
loss: 0.9604570865631104,grad_norm: 0.8471045180956595, iteration: 83174
loss: 1.0032975673675537,grad_norm: 0.9317318359975387, iteration: 83175
loss: 1.0012675523757935,grad_norm: 0.8490879767092075, iteration: 83176
loss: 0.9891813397407532,grad_norm: 0.7968803734101594, iteration: 83177
loss: 0.9874297976493835,grad_norm: 0.9134353462569091, iteration: 83178
loss: 1.014060616493225,grad_norm: 0.9999992869034422, iteration: 83179
loss: 0.9871993660926819,grad_norm: 0.9122783926632131, iteration: 83180
loss: 0.9793612957000732,grad_norm: 0.8566176336976222, iteration: 83181
loss: 0.997410237789154,grad_norm: 0.8980572889174494, iteration: 83182
loss: 1.011160135269165,grad_norm: 0.8501768886065794, iteration: 83183
loss: 0.9890578985214233,grad_norm: 0.8104351291425814, iteration: 83184
loss: 1.0228990316390991,grad_norm: 0.9168912113979891, iteration: 83185
loss: 1.020362138748169,grad_norm: 0.9999989917431641, iteration: 83186
loss: 1.0076403617858887,grad_norm: 0.999999037685047, iteration: 83187
loss: 1.0016003847122192,grad_norm: 0.9580886943605135, iteration: 83188
loss: 1.0222454071044922,grad_norm: 0.9763582043098126, iteration: 83189
loss: 0.9922071695327759,grad_norm: 0.8946081529552887, iteration: 83190
loss: 0.967103123664856,grad_norm: 0.8452146237492827, iteration: 83191
loss: 0.9624808430671692,grad_norm: 0.9148143037866842, iteration: 83192
loss: 0.9707053303718567,grad_norm: 0.88146403215925, iteration: 83193
loss: 0.9961878061294556,grad_norm: 0.8292630939101864, iteration: 83194
loss: 0.9687031507492065,grad_norm: 0.9977691696660723, iteration: 83195
loss: 1.0704803466796875,grad_norm: 0.8761386734470066, iteration: 83196
loss: 1.0048257112503052,grad_norm: 0.8888009965785991, iteration: 83197
loss: 1.016446590423584,grad_norm: 0.8503250607355359, iteration: 83198
loss: 0.9866502285003662,grad_norm: 0.999999005541128, iteration: 83199
loss: 1.0073294639587402,grad_norm: 0.9520772737164703, iteration: 83200
loss: 1.0277040004730225,grad_norm: 0.8887809555833992, iteration: 83201
loss: 1.0003325939178467,grad_norm: 0.9628978473031499, iteration: 83202
loss: 0.9803780913352966,grad_norm: 0.9160351324270414, iteration: 83203
loss: 1.0352436304092407,grad_norm: 0.9251153693948143, iteration: 83204
loss: 1.0384008884429932,grad_norm: 0.9999994305964154, iteration: 83205
loss: 1.0128625631332397,grad_norm: 0.9999991733021723, iteration: 83206
loss: 0.9995543360710144,grad_norm: 0.8089342900940637, iteration: 83207
loss: 0.9946508407592773,grad_norm: 0.9999990603808324, iteration: 83208
loss: 1.0262893438339233,grad_norm: 0.9999990385037926, iteration: 83209
loss: 1.0529149770736694,grad_norm: 0.9999991007721367, iteration: 83210
loss: 1.021582841873169,grad_norm: 0.9999991913187702, iteration: 83211
loss: 1.047810673713684,grad_norm: 0.9999998240687973, iteration: 83212
loss: 0.9681923985481262,grad_norm: 0.9999991312811652, iteration: 83213
loss: 1.0044304132461548,grad_norm: 0.825561311645873, iteration: 83214
loss: 1.0034741163253784,grad_norm: 0.9999991705881344, iteration: 83215
loss: 0.976285994052887,grad_norm: 0.92512222999983, iteration: 83216
loss: 1.0010764598846436,grad_norm: 0.9999990750030535, iteration: 83217
loss: 0.989851176738739,grad_norm: 0.9598761864182263, iteration: 83218
loss: 1.0212806463241577,grad_norm: 0.8145819449371063, iteration: 83219
loss: 1.0062730312347412,grad_norm: 0.8957297910010031, iteration: 83220
loss: 0.989861786365509,grad_norm: 0.9749435157006521, iteration: 83221
loss: 1.0274654626846313,grad_norm: 0.9999996576007987, iteration: 83222
loss: 0.9829480051994324,grad_norm: 0.9999990740646064, iteration: 83223
loss: 1.0117006301879883,grad_norm: 0.9289708936778194, iteration: 83224
loss: 1.0106884241104126,grad_norm: 0.8052869171054815, iteration: 83225
loss: 1.0250197649002075,grad_norm: 0.9806415061265784, iteration: 83226
loss: 0.9751400351524353,grad_norm: 0.9919778006836901, iteration: 83227
loss: 0.982097864151001,grad_norm: 0.8712429969842591, iteration: 83228
loss: 1.0064382553100586,grad_norm: 0.9947358064902362, iteration: 83229
loss: 1.0658456087112427,grad_norm: 0.9999998302995061, iteration: 83230
loss: 0.99183589220047,grad_norm: 0.8762801101117902, iteration: 83231
loss: 0.9939659833908081,grad_norm: 0.9677241346120964, iteration: 83232
loss: 0.9734117984771729,grad_norm: 0.9999991233412299, iteration: 83233
loss: 1.0068656206130981,grad_norm: 0.9843196855551506, iteration: 83234
loss: 0.9741445779800415,grad_norm: 0.9806219789351159, iteration: 83235
loss: 0.9975979924201965,grad_norm: 0.9206280604443121, iteration: 83236
loss: 1.023694634437561,grad_norm: 0.8944993303914351, iteration: 83237
loss: 1.0071831941604614,grad_norm: 0.8799280940031264, iteration: 83238
loss: 1.0032960176467896,grad_norm: 0.9999994046307049, iteration: 83239
loss: 0.969627857208252,grad_norm: 0.9999990727383923, iteration: 83240
loss: 1.0744106769561768,grad_norm: 0.9999991956028327, iteration: 83241
loss: 1.0392004251480103,grad_norm: 0.999999277859992, iteration: 83242
loss: 0.9636558890342712,grad_norm: 0.9999989542973073, iteration: 83243
loss: 0.9789689183235168,grad_norm: 0.9999991873258844, iteration: 83244
loss: 0.9866660237312317,grad_norm: 0.9627560836366144, iteration: 83245
loss: 0.9804858565330505,grad_norm: 0.8991232285345315, iteration: 83246
loss: 0.9817788004875183,grad_norm: 0.9999989779522471, iteration: 83247
loss: 1.0356113910675049,grad_norm: 0.9987852728313266, iteration: 83248
loss: 1.0156841278076172,grad_norm: 0.9999989166866844, iteration: 83249
loss: 1.0155385732650757,grad_norm: 0.8473690473758392, iteration: 83250
loss: 0.998738169670105,grad_norm: 0.9536711693847589, iteration: 83251
loss: 0.9878604412078857,grad_norm: 0.8249104338834927, iteration: 83252
loss: 0.9984949827194214,grad_norm: 0.9999996035025418, iteration: 83253
loss: 0.9955374002456665,grad_norm: 0.9999991742425198, iteration: 83254
loss: 1.0068472623825073,grad_norm: 0.9999990766030956, iteration: 83255
loss: 1.043563723564148,grad_norm: 0.930666165507351, iteration: 83256
loss: 1.036632776260376,grad_norm: 0.8358468497825323, iteration: 83257
loss: 0.981006920337677,grad_norm: 0.8816807733081559, iteration: 83258
loss: 0.9967193603515625,grad_norm: 0.8420232885540099, iteration: 83259
loss: 1.0080373287200928,grad_norm: 0.9703348456960385, iteration: 83260
loss: 1.008180856704712,grad_norm: 0.9226100333129538, iteration: 83261
loss: 1.037577748298645,grad_norm: 0.7813691060083163, iteration: 83262
loss: 0.9773715734481812,grad_norm: 0.9479692611193884, iteration: 83263
loss: 0.9968903660774231,grad_norm: 0.7218433433386996, iteration: 83264
loss: 0.9754292964935303,grad_norm: 0.9577562970449015, iteration: 83265
loss: 1.021751046180725,grad_norm: 0.984528686009752, iteration: 83266
loss: 0.9817043542861938,grad_norm: 0.9753849247176267, iteration: 83267
loss: 1.0320420265197754,grad_norm: 0.8602396111496015, iteration: 83268
loss: 1.011171817779541,grad_norm: 0.999999212295814, iteration: 83269
loss: 1.0300949811935425,grad_norm: 0.9999990956463345, iteration: 83270
loss: 0.960334062576294,grad_norm: 0.9923045341147448, iteration: 83271
loss: 1.011185884475708,grad_norm: 0.9484275872162309, iteration: 83272
loss: 1.0150431394577026,grad_norm: 0.9999990929046461, iteration: 83273
loss: 1.1000967025756836,grad_norm: 0.8926069416295591, iteration: 83274
loss: 0.9920142292976379,grad_norm: 0.9200403899604603, iteration: 83275
loss: 0.9803098440170288,grad_norm: 0.9191392804567817, iteration: 83276
loss: 1.0234240293502808,grad_norm: 0.9952053814063164, iteration: 83277
loss: 0.9685962796211243,grad_norm: 0.8998675731855257, iteration: 83278
loss: 1.0086250305175781,grad_norm: 0.9999991275783267, iteration: 83279
loss: 0.9890145659446716,grad_norm: 0.8846502246528878, iteration: 83280
loss: 1.0015716552734375,grad_norm: 0.9146525842571844, iteration: 83281
loss: 0.9413859844207764,grad_norm: 0.8006407703379093, iteration: 83282
loss: 0.9649385213851929,grad_norm: 0.9371644689591141, iteration: 83283
loss: 0.9815278053283691,grad_norm: 0.9999998355951573, iteration: 83284
loss: 1.03060781955719,grad_norm: 0.9999994366562913, iteration: 83285
loss: 0.9777297973632812,grad_norm: 0.9267778368481235, iteration: 83286
loss: 1.0167070627212524,grad_norm: 0.9999995810276973, iteration: 83287
loss: 0.9826512336730957,grad_norm: 0.8452969617603568, iteration: 83288
loss: 0.987464427947998,grad_norm: 0.999999098656372, iteration: 83289
loss: 1.078234076499939,grad_norm: 0.999999214311641, iteration: 83290
loss: 1.0184986591339111,grad_norm: 0.9999991404350033, iteration: 83291
loss: 0.9974535703659058,grad_norm: 0.8951128312600092, iteration: 83292
loss: 0.9910908937454224,grad_norm: 0.876549555164414, iteration: 83293
loss: 0.9998099207878113,grad_norm: 0.9078647228163537, iteration: 83294
loss: 0.9806238412857056,grad_norm: 0.9638804716169557, iteration: 83295
loss: 0.9625792503356934,grad_norm: 0.7737518608032627, iteration: 83296
loss: 0.9859530329704285,grad_norm: 0.8983974002528041, iteration: 83297
loss: 1.0095717906951904,grad_norm: 0.9999994440824125, iteration: 83298
loss: 0.9799644351005554,grad_norm: 0.8718794005241275, iteration: 83299
loss: 1.000288724899292,grad_norm: 0.9999991822020847, iteration: 83300
loss: 1.038856029510498,grad_norm: 0.9999989546277714, iteration: 83301
loss: 1.0734196901321411,grad_norm: 0.8891357555675999, iteration: 83302
loss: 0.9700507521629333,grad_norm: 0.9999991463970671, iteration: 83303
loss: 1.0143163204193115,grad_norm: 0.9180989704239259, iteration: 83304
loss: 0.9991516470909119,grad_norm: 0.9955057328981051, iteration: 83305
loss: 1.0211127996444702,grad_norm: 0.9999994509845177, iteration: 83306
loss: 1.032525897026062,grad_norm: 0.9255399118228392, iteration: 83307
loss: 1.0802595615386963,grad_norm: 0.9999994651357932, iteration: 83308
loss: 1.0221853256225586,grad_norm: 0.9858412769079037, iteration: 83309
loss: 0.9896787405014038,grad_norm: 0.8631627260829526, iteration: 83310
loss: 1.0916825532913208,grad_norm: 0.999999530372739, iteration: 83311
loss: 1.1616867780685425,grad_norm: 0.9999998084208512, iteration: 83312
loss: 1.0045925378799438,grad_norm: 0.8198010769941844, iteration: 83313
loss: 1.0309901237487793,grad_norm: 0.8848197135257697, iteration: 83314
loss: 1.023445725440979,grad_norm: 0.9639322750299211, iteration: 83315
loss: 1.0410654544830322,grad_norm: 0.7932314653750622, iteration: 83316
loss: 1.0534446239471436,grad_norm: 1.0000000137279534, iteration: 83317
loss: 0.9935410022735596,grad_norm: 0.9976539346413354, iteration: 83318
loss: 1.0062226057052612,grad_norm: 0.9187370706463203, iteration: 83319
loss: 1.0271624326705933,grad_norm: 0.9999998005729498, iteration: 83320
loss: 1.101479172706604,grad_norm: 0.9999996009612916, iteration: 83321
loss: 1.0562375783920288,grad_norm: 0.9999998486688515, iteration: 83322
loss: 1.0348633527755737,grad_norm: 0.8281385667092487, iteration: 83323
loss: 1.1022385358810425,grad_norm: 0.9999995973533631, iteration: 83324
loss: 1.0554924011230469,grad_norm: 0.9999993537488991, iteration: 83325
loss: 1.0241174697875977,grad_norm: 0.9999995376731193, iteration: 83326
loss: 1.0353293418884277,grad_norm: 0.9999991982280366, iteration: 83327
loss: 1.0034538507461548,grad_norm: 0.9447320705497257, iteration: 83328
loss: 0.9553929567337036,grad_norm: 0.8794103669435022, iteration: 83329
loss: 1.1233344078063965,grad_norm: 0.9999993279558205, iteration: 83330
loss: 1.0007232427597046,grad_norm: 0.9999994712993874, iteration: 83331
loss: 1.0006120204925537,grad_norm: 0.9982615157247924, iteration: 83332
loss: 1.0076037645339966,grad_norm: 0.9999991405814015, iteration: 83333
loss: 0.9986221194267273,grad_norm: 0.9319230492908198, iteration: 83334
loss: 0.9832472801208496,grad_norm: 0.9999989538488587, iteration: 83335
loss: 1.05916428565979,grad_norm: 0.9999995377143582, iteration: 83336
loss: 1.0469818115234375,grad_norm: 0.9999994543364762, iteration: 83337
loss: 1.0122941732406616,grad_norm: 0.9881992548273203, iteration: 83338
loss: 1.044321060180664,grad_norm: 0.8239472411940929, iteration: 83339
loss: 1.1057509183883667,grad_norm: 0.999999325977807, iteration: 83340
loss: 1.0051767826080322,grad_norm: 0.7977602464280172, iteration: 83341
loss: 0.995082676410675,grad_norm: 0.9999995832452243, iteration: 83342
loss: 1.116258978843689,grad_norm: 0.9999998744214319, iteration: 83343
loss: 1.0432093143463135,grad_norm: 0.9345248236641707, iteration: 83344
loss: 0.9930347800254822,grad_norm: 0.9999996654516251, iteration: 83345
loss: 1.032148838043213,grad_norm: 0.8631145463289981, iteration: 83346
loss: 1.0205506086349487,grad_norm: 0.9999992351371988, iteration: 83347
loss: 1.1134079694747925,grad_norm: 0.9268066944940512, iteration: 83348
loss: 0.9303035140037537,grad_norm: 0.9323611843868705, iteration: 83349
loss: 0.9946907758712769,grad_norm: 0.9999991559630821, iteration: 83350
loss: 1.0950846672058105,grad_norm: 0.9559625893396488, iteration: 83351
loss: 0.9630613327026367,grad_norm: 0.9303473999344868, iteration: 83352
loss: 1.066709280014038,grad_norm: 0.924557441370153, iteration: 83353
loss: 1.0204033851623535,grad_norm: 0.9651064433696281, iteration: 83354
loss: 1.0425578355789185,grad_norm: 0.902345293437987, iteration: 83355
loss: 1.153590440750122,grad_norm: 0.9999992240014864, iteration: 83356
loss: 1.2666256427764893,grad_norm: 0.9999997282944739, iteration: 83357
loss: 1.2234594821929932,grad_norm: 0.9999991904826869, iteration: 83358
loss: 1.2257972955703735,grad_norm: 0.999999200705037, iteration: 83359
loss: 1.025972604751587,grad_norm: 0.7828090947201078, iteration: 83360
loss: 1.2527687549591064,grad_norm: 0.9999998384732693, iteration: 83361
loss: 1.0469046831130981,grad_norm: 0.9311296854181165, iteration: 83362
loss: 1.1719586849212646,grad_norm: 0.9999993067428445, iteration: 83363
loss: 1.0565648078918457,grad_norm: 0.9999990289513435, iteration: 83364
loss: 0.9660578966140747,grad_norm: 0.8724832132981566, iteration: 83365
loss: 0.9812960624694824,grad_norm: 0.9983004788583163, iteration: 83366
loss: 1.082079291343689,grad_norm: 0.9407583332482323, iteration: 83367
loss: 0.9657747149467468,grad_norm: 0.9999992159082576, iteration: 83368
loss: 1.010527491569519,grad_norm: 0.9999994739281103, iteration: 83369
loss: 1.0225886106491089,grad_norm: 0.9811355487152742, iteration: 83370
loss: 1.088377594947815,grad_norm: 0.9999992595465749, iteration: 83371
loss: 1.048263669013977,grad_norm: 0.9999996426764259, iteration: 83372
loss: 1.0072931051254272,grad_norm: 0.890142471780049, iteration: 83373
loss: 1.0061800479888916,grad_norm: 0.9999990571148308, iteration: 83374
loss: 1.034091830253601,grad_norm: 0.8514333546173984, iteration: 83375
loss: 0.9847501516342163,grad_norm: 0.9999990631031773, iteration: 83376
loss: 0.9911590218544006,grad_norm: 0.9999990597738957, iteration: 83377
loss: 1.0357639789581299,grad_norm: 0.9999999073824477, iteration: 83378
loss: 1.0088685750961304,grad_norm: 0.9999993563820322, iteration: 83379
loss: 1.0311369895935059,grad_norm: 0.9999997564546205, iteration: 83380
loss: 1.0382362604141235,grad_norm: 0.9992128976884485, iteration: 83381
loss: 1.0041595697402954,grad_norm: 0.8783633740289415, iteration: 83382
loss: 1.0183343887329102,grad_norm: 0.9805744763650606, iteration: 83383
loss: 0.9989332556724548,grad_norm: 0.8056323267686955, iteration: 83384
loss: 0.9956356287002563,grad_norm: 0.9999990295946053, iteration: 83385
loss: 0.9843187928199768,grad_norm: 0.9999990909212839, iteration: 83386
loss: 0.997715413570404,grad_norm: 0.8070268327070172, iteration: 83387
loss: 0.9811617732048035,grad_norm: 0.7931388630132321, iteration: 83388
loss: 1.0897520780563354,grad_norm: 0.999999624929453, iteration: 83389
loss: 1.0162463188171387,grad_norm: 0.8342258684281674, iteration: 83390
loss: 1.0152294635772705,grad_norm: 0.9999990538271187, iteration: 83391
loss: 1.0191278457641602,grad_norm: 0.9999998309714564, iteration: 83392
loss: 1.0702533721923828,grad_norm: 0.9446040894352437, iteration: 83393
loss: 1.030752182006836,grad_norm: 0.999999051036751, iteration: 83394
loss: 0.977393388748169,grad_norm: 0.8601718622557258, iteration: 83395
loss: 0.9994685053825378,grad_norm: 0.9058569730408463, iteration: 83396
loss: 0.9566347002983093,grad_norm: 0.9999991490755895, iteration: 83397
loss: 1.074286699295044,grad_norm: 0.9999997153778102, iteration: 83398
loss: 1.1074903011322021,grad_norm: 0.9999993071512739, iteration: 83399
loss: 1.0695457458496094,grad_norm: 0.9999989761770962, iteration: 83400
loss: 1.0340932607650757,grad_norm: 0.9999998356573958, iteration: 83401
loss: 0.9908048510551453,grad_norm: 0.9960451863048208, iteration: 83402
loss: 1.0173914432525635,grad_norm: 0.9999990451002949, iteration: 83403
loss: 1.1257784366607666,grad_norm: 0.9999993564947625, iteration: 83404
loss: 1.0128995180130005,grad_norm: 0.9687181756336214, iteration: 83405
loss: 0.9970107078552246,grad_norm: 0.9182565503311089, iteration: 83406
loss: 0.972469687461853,grad_norm: 0.999999106080427, iteration: 83407
loss: 1.062593936920166,grad_norm: 0.9999995235956245, iteration: 83408
loss: 1.0096572637557983,grad_norm: 0.9100209056187689, iteration: 83409
loss: 1.013147234916687,grad_norm: 0.9999990171292565, iteration: 83410
loss: 1.0707486867904663,grad_norm: 0.9999995104933679, iteration: 83411
loss: 0.9814077019691467,grad_norm: 0.9404857236287908, iteration: 83412
loss: 1.0193105936050415,grad_norm: 0.9999991582137293, iteration: 83413
loss: 0.982612133026123,grad_norm: 0.824729493882309, iteration: 83414
loss: 1.0180482864379883,grad_norm: 0.8637565984809588, iteration: 83415
loss: 1.0137065649032593,grad_norm: 0.9999990610476752, iteration: 83416
loss: 0.9729552865028381,grad_norm: 0.9999995166714867, iteration: 83417
loss: 1.0114485025405884,grad_norm: 0.9292454856907709, iteration: 83418
loss: 1.009126901626587,grad_norm: 0.999999104280071, iteration: 83419
loss: 1.0240286588668823,grad_norm: 0.9462647213090964, iteration: 83420
loss: 1.037589430809021,grad_norm: 0.8274335101917106, iteration: 83421
loss: 1.005225658416748,grad_norm: 0.9999997521799636, iteration: 83422
loss: 0.997868537902832,grad_norm: 0.8861568243238533, iteration: 83423
loss: 1.0124382972717285,grad_norm: 0.9474086837198364, iteration: 83424
loss: 0.998892605304718,grad_norm: 0.7912067079945769, iteration: 83425
loss: 1.0127674341201782,grad_norm: 0.9179307008441355, iteration: 83426
loss: 0.9841247797012329,grad_norm: 0.8941802918253713, iteration: 83427
loss: 0.9824396371841431,grad_norm: 0.8485512330885738, iteration: 83428
loss: 1.0002282857894897,grad_norm: 0.9999992200962164, iteration: 83429
loss: 1.0763553380966187,grad_norm: 0.9999999395850708, iteration: 83430
loss: 1.0035165548324585,grad_norm: 0.8960271213436782, iteration: 83431
loss: 1.0023529529571533,grad_norm: 0.999999121054386, iteration: 83432
loss: 1.0129426717758179,grad_norm: 0.9999993825898942, iteration: 83433
loss: 1.0215482711791992,grad_norm: 0.9135629834939386, iteration: 83434
loss: 1.1130750179290771,grad_norm: 0.9999992609397398, iteration: 83435
loss: 1.0150058269500732,grad_norm: 0.9861834203392011, iteration: 83436
loss: 1.0232702493667603,grad_norm: 0.9999990820043484, iteration: 83437
loss: 1.020990252494812,grad_norm: 0.8924468506915927, iteration: 83438
loss: 1.0236682891845703,grad_norm: 0.9999992879096137, iteration: 83439
loss: 1.0832308530807495,grad_norm: 0.9351666070428184, iteration: 83440
loss: 1.096440315246582,grad_norm: 0.9999992360851547, iteration: 83441
loss: 1.0093697309494019,grad_norm: 0.8909727625815257, iteration: 83442
loss: 1.087051510810852,grad_norm: 0.9999995685082469, iteration: 83443
loss: 0.9875631332397461,grad_norm: 0.999999295569931, iteration: 83444
loss: 0.9516891837120056,grad_norm: 0.9999991303823403, iteration: 83445
loss: 1.0135442018508911,grad_norm: 0.9828470588326255, iteration: 83446
loss: 1.0517102479934692,grad_norm: 0.9999997511840767, iteration: 83447
loss: 1.0063536167144775,grad_norm: 0.9339382260022304, iteration: 83448
loss: 1.034265398979187,grad_norm: 0.9999991132898294, iteration: 83449
loss: 0.9737234115600586,grad_norm: 0.9999993442858948, iteration: 83450
loss: 0.9705116748809814,grad_norm: 0.7973862683667305, iteration: 83451
loss: 1.0032780170440674,grad_norm: 0.8954501651273444, iteration: 83452
loss: 1.0088874101638794,grad_norm: 0.9999993053883582, iteration: 83453
loss: 1.0202723741531372,grad_norm: 0.8100262586626188, iteration: 83454
loss: 1.0008087158203125,grad_norm: 0.9293008417903366, iteration: 83455
loss: 0.9566066861152649,grad_norm: 0.8939511774853205, iteration: 83456
loss: 1.009475588798523,grad_norm: 0.7252608777740122, iteration: 83457
loss: 1.045150876045227,grad_norm: 0.958808036273809, iteration: 83458
loss: 1.0638742446899414,grad_norm: 0.9269612933678246, iteration: 83459
loss: 0.9840841889381409,grad_norm: 0.9999992962332906, iteration: 83460
loss: 1.0204684734344482,grad_norm: 0.999999523088015, iteration: 83461
loss: 1.0257984399795532,grad_norm: 0.9999991600829999, iteration: 83462
loss: 1.0284093618392944,grad_norm: 0.9328696154941191, iteration: 83463
loss: 1.094490885734558,grad_norm: 1.000000007185305, iteration: 83464
loss: 0.9972821474075317,grad_norm: 0.8925138754447415, iteration: 83465
loss: 1.0078986883163452,grad_norm: 0.9187904273956934, iteration: 83466
loss: 1.0114340782165527,grad_norm: 0.9832538643116652, iteration: 83467
loss: 0.9960240721702576,grad_norm: 0.9298772943071324, iteration: 83468
loss: 1.0598994493484497,grad_norm: 0.9999994363807007, iteration: 83469
loss: 1.018790602684021,grad_norm: 0.9999993952205984, iteration: 83470
loss: 1.0047627687454224,grad_norm: 0.999999149807892, iteration: 83471
loss: 1.0165952444076538,grad_norm: 0.8792434312991487, iteration: 83472
loss: 1.024620771408081,grad_norm: 0.999999375745514, iteration: 83473
loss: 1.049940586090088,grad_norm: 0.9999998228543443, iteration: 83474
loss: 0.9904817342758179,grad_norm: 0.9999990930019239, iteration: 83475
loss: 0.9868438243865967,grad_norm: 0.8852043743241362, iteration: 83476
loss: 1.0368982553482056,grad_norm: 0.806737695133668, iteration: 83477
loss: 0.9870679974555969,grad_norm: 0.8443698665549767, iteration: 83478
loss: 0.9921888113021851,grad_norm: 0.9999989635534164, iteration: 83479
loss: 1.0023056268692017,grad_norm: 0.7800155704891447, iteration: 83480
loss: 1.03653085231781,grad_norm: 0.999999395347356, iteration: 83481
loss: 1.015333890914917,grad_norm: 0.8658118207219814, iteration: 83482
loss: 1.0238404273986816,grad_norm: 0.8842072443287275, iteration: 83483
loss: 1.0118449926376343,grad_norm: 0.9999990553736269, iteration: 83484
loss: 1.0046253204345703,grad_norm: 0.986903219365169, iteration: 83485
loss: 1.0766041278839111,grad_norm: 0.9999998440868169, iteration: 83486
loss: 0.9938651323318481,grad_norm: 0.8758051281642062, iteration: 83487
loss: 1.0235803127288818,grad_norm: 0.9999991234476643, iteration: 83488
loss: 1.0136386156082153,grad_norm: 0.7024193741173159, iteration: 83489
loss: 0.9677531719207764,grad_norm: 0.9999991831589841, iteration: 83490
loss: 1.0140575170516968,grad_norm: 0.9178236329521585, iteration: 83491
loss: 0.9691635370254517,grad_norm: 0.8216046087699189, iteration: 83492
loss: 0.9949599504470825,grad_norm: 0.888700691037268, iteration: 83493
loss: 1.0533819198608398,grad_norm: 0.9999992390713256, iteration: 83494
loss: 0.9916144609451294,grad_norm: 0.9718550173499095, iteration: 83495
loss: 0.9943200349807739,grad_norm: 0.9999991411771307, iteration: 83496
loss: 0.9920425415039062,grad_norm: 0.984085195161726, iteration: 83497
loss: 1.0214513540267944,grad_norm: 0.8384745726009719, iteration: 83498
loss: 1.0102957487106323,grad_norm: 0.8520648690872675, iteration: 83499
loss: 0.9758177995681763,grad_norm: 0.9999990206166929, iteration: 83500
loss: 0.9675726890563965,grad_norm: 0.9313076098817555, iteration: 83501
loss: 0.9908997416496277,grad_norm: 0.9412333272283465, iteration: 83502
loss: 1.0222290754318237,grad_norm: 0.999999164536063, iteration: 83503
loss: 1.028892159461975,grad_norm: 0.8071204780156381, iteration: 83504
loss: 1.049108624458313,grad_norm: 0.9999996285915992, iteration: 83505
loss: 1.0190807580947876,grad_norm: 0.9999991969432952, iteration: 83506
loss: 0.9943044781684875,grad_norm: 0.9999990648936297, iteration: 83507
loss: 0.9930768609046936,grad_norm: 0.9999993876724923, iteration: 83508
loss: 0.9647788405418396,grad_norm: 0.9999993548303358, iteration: 83509
loss: 1.0263463258743286,grad_norm: 0.9429193211775831, iteration: 83510
loss: 0.9738839268684387,grad_norm: 0.9967543958556008, iteration: 83511
loss: 0.9878447651863098,grad_norm: 0.838918924526973, iteration: 83512
loss: 1.0038566589355469,grad_norm: 0.9999990377464829, iteration: 83513
loss: 1.09943687915802,grad_norm: 0.999999354014301, iteration: 83514
loss: 0.9690045118331909,grad_norm: 0.8396595883784833, iteration: 83515
loss: 1.0103341341018677,grad_norm: 0.8560446854740084, iteration: 83516
loss: 1.0304269790649414,grad_norm: 0.9999996848245271, iteration: 83517
loss: 0.9888083338737488,grad_norm: 0.9999992087691754, iteration: 83518
loss: 1.1087589263916016,grad_norm: 0.9595945861214209, iteration: 83519
loss: 0.980252742767334,grad_norm: 0.7775181558629776, iteration: 83520
loss: 0.9959701895713806,grad_norm: 0.9999997206529339, iteration: 83521
loss: 1.002354383468628,grad_norm: 0.7588968978895281, iteration: 83522
loss: 1.041239857673645,grad_norm: 0.9999990694507953, iteration: 83523
loss: 0.9931327104568481,grad_norm: 0.7921784302922094, iteration: 83524
loss: 0.9820775389671326,grad_norm: 0.8650345738881181, iteration: 83525
loss: 0.9689924716949463,grad_norm: 0.7778009748445474, iteration: 83526
loss: 1.0137654542922974,grad_norm: 0.9051330543652993, iteration: 83527
loss: 0.9926040768623352,grad_norm: 0.9999990262329748, iteration: 83528
loss: 1.0247875452041626,grad_norm: 0.9999991003223586, iteration: 83529
loss: 0.9825817346572876,grad_norm: 0.9999992059322707, iteration: 83530
loss: 1.0141676664352417,grad_norm: 0.9999991578949486, iteration: 83531
loss: 0.9832737445831299,grad_norm: 0.8367062090617771, iteration: 83532
loss: 1.0709490776062012,grad_norm: 0.9999996180000605, iteration: 83533
loss: 0.9884602427482605,grad_norm: 0.93820175354477, iteration: 83534
loss: 0.9806057214736938,grad_norm: 0.8992726380680813, iteration: 83535
loss: 0.9947469234466553,grad_norm: 0.9057319063255995, iteration: 83536
loss: 1.0896039009094238,grad_norm: 0.9999996610265166, iteration: 83537
loss: 0.9688147902488708,grad_norm: 0.9999989453447962, iteration: 83538
loss: 0.991027295589447,grad_norm: 0.8810967670678427, iteration: 83539
loss: 1.0131036043167114,grad_norm: 0.9999990724545619, iteration: 83540
loss: 1.0381838083267212,grad_norm: 0.8088824916407904, iteration: 83541
loss: 0.9979115128517151,grad_norm: 0.8330859439614207, iteration: 83542
loss: 1.0253255367279053,grad_norm: 0.9999991026819863, iteration: 83543
loss: 0.9995130300521851,grad_norm: 0.9414337234336255, iteration: 83544
loss: 1.054980754852295,grad_norm: 0.9295949988397495, iteration: 83545
loss: 0.996105968952179,grad_norm: 0.9999991029425452, iteration: 83546
loss: 0.9591450691223145,grad_norm: 0.9999989446945986, iteration: 83547
loss: 1.0062246322631836,grad_norm: 0.9635386665951795, iteration: 83548
loss: 0.9791439771652222,grad_norm: 0.999999137029893, iteration: 83549
loss: 1.0150233507156372,grad_norm: 0.9999992860592427, iteration: 83550
loss: 0.9817339181900024,grad_norm: 0.9688122696340637, iteration: 83551
loss: 1.0385866165161133,grad_norm: 0.931883892939552, iteration: 83552
loss: 1.0141863822937012,grad_norm: 0.9421088965775881, iteration: 83553
loss: 0.9922744631767273,grad_norm: 0.8506194956693885, iteration: 83554
loss: 1.0034832954406738,grad_norm: 0.8446291350062703, iteration: 83555
loss: 0.9605993032455444,grad_norm: 0.9712503619402946, iteration: 83556
loss: 0.9921227097511292,grad_norm: 0.8239056484366715, iteration: 83557
loss: 0.9809359312057495,grad_norm: 0.9593869170809288, iteration: 83558
loss: 0.9797979593276978,grad_norm: 0.9148480927156267, iteration: 83559
loss: 1.3013297319412231,grad_norm: 0.9999998565879988, iteration: 83560
loss: 1.0027657747268677,grad_norm: 0.8899005875916148, iteration: 83561
loss: 1.0489866733551025,grad_norm: 0.9999991152573426, iteration: 83562
loss: 0.9953780174255371,grad_norm: 0.9999990869398704, iteration: 83563
loss: 1.0271759033203125,grad_norm: 0.9999996013088804, iteration: 83564
loss: 1.0263621807098389,grad_norm: 0.9999997356839604, iteration: 83565
loss: 1.0722849369049072,grad_norm: 0.9999992149401093, iteration: 83566
loss: 1.0072028636932373,grad_norm: 0.9927615094590622, iteration: 83567
loss: 1.0337599515914917,grad_norm: 0.9009907817963214, iteration: 83568
loss: 1.0327743291854858,grad_norm: 0.9813743143713836, iteration: 83569
loss: 1.0519365072250366,grad_norm: 0.9999992766361824, iteration: 83570
loss: 0.9975738525390625,grad_norm: 0.9845638999419594, iteration: 83571
loss: 1.0453147888183594,grad_norm: 0.9999991321640582, iteration: 83572
loss: 1.0271601676940918,grad_norm: 0.9999993959246747, iteration: 83573
loss: 0.9958146214485168,grad_norm: 0.8797405925394558, iteration: 83574
loss: 0.9682462811470032,grad_norm: 0.9057171459768706, iteration: 83575
loss: 0.9885877370834351,grad_norm: 0.8282139337548527, iteration: 83576
loss: 0.9771524667739868,grad_norm: 0.9285814805621274, iteration: 83577
loss: 0.9597121477127075,grad_norm: 0.9510106464448922, iteration: 83578
loss: 1.0094952583312988,grad_norm: 0.8146977849049533, iteration: 83579
loss: 1.0506072044372559,grad_norm: 0.8202004424280311, iteration: 83580
loss: 1.0240707397460938,grad_norm: 0.7539276739614799, iteration: 83581
loss: 0.9994059801101685,grad_norm: 0.9999992048739279, iteration: 83582
loss: 1.0218085050582886,grad_norm: 0.7526755939878578, iteration: 83583
loss: 1.0200393199920654,grad_norm: 0.9231464379057734, iteration: 83584
loss: 1.0293962955474854,grad_norm: 0.9859991998450752, iteration: 83585
loss: 0.9880942106246948,grad_norm: 0.9999989087623333, iteration: 83586
loss: 0.9821245074272156,grad_norm: 0.9999990733611369, iteration: 83587
loss: 1.0178683996200562,grad_norm: 0.9999992831868281, iteration: 83588
loss: 1.024512767791748,grad_norm: 0.8211768552778418, iteration: 83589
loss: 0.9977027177810669,grad_norm: 0.8281571819162011, iteration: 83590
loss: 1.0412137508392334,grad_norm: 0.9999993529194184, iteration: 83591
loss: 1.005426049232483,grad_norm: 0.9999998939628799, iteration: 83592
loss: 1.011425495147705,grad_norm: 0.9999998550794543, iteration: 83593
loss: 0.9828833937644958,grad_norm: 0.8659684659353344, iteration: 83594
loss: 1.0030665397644043,grad_norm: 0.9999991514678348, iteration: 83595
loss: 0.9604759216308594,grad_norm: 0.8940757707633226, iteration: 83596
loss: 1.0313242673873901,grad_norm: 0.8104873701737205, iteration: 83597
loss: 0.9859510064125061,grad_norm: 0.999999214447241, iteration: 83598
loss: 0.9863380193710327,grad_norm: 0.9999990277870515, iteration: 83599
loss: 0.9929954409599304,grad_norm: 0.8901489192255857, iteration: 83600
loss: 1.015484094619751,grad_norm: 0.9999992332862538, iteration: 83601
loss: 1.0164711475372314,grad_norm: 0.9358519596558229, iteration: 83602
loss: 1.0142316818237305,grad_norm: 0.9999991724331796, iteration: 83603
loss: 0.959717869758606,grad_norm: 0.9540939505874371, iteration: 83604
loss: 1.0447537899017334,grad_norm: 0.8593303947838863, iteration: 83605
loss: 0.9620714783668518,grad_norm: 0.9849823724928722, iteration: 83606
loss: 0.9990472793579102,grad_norm: 0.8888887256561727, iteration: 83607
loss: 1.0071237087249756,grad_norm: 0.9999996327949434, iteration: 83608
loss: 1.0257391929626465,grad_norm: 0.9999992501359746, iteration: 83609
loss: 1.0153522491455078,grad_norm: 0.9477016507095749, iteration: 83610
loss: 0.9885913133621216,grad_norm: 0.9202138278016507, iteration: 83611
loss: 1.0332573652267456,grad_norm: 0.9846742003626973, iteration: 83612
loss: 0.9655351638793945,grad_norm: 0.7978299991389877, iteration: 83613
loss: 0.9906750917434692,grad_norm: 0.8933412707457072, iteration: 83614
loss: 1.0291730165481567,grad_norm: 0.9194466614186964, iteration: 83615
loss: 0.9869614243507385,grad_norm: 0.8627270622159809, iteration: 83616
loss: 0.9747301340103149,grad_norm: 0.959162280385937, iteration: 83617
loss: 0.9779180884361267,grad_norm: 0.9999991255920321, iteration: 83618
loss: 1.030709147453308,grad_norm: 0.9999994212898352, iteration: 83619
loss: 0.9744239449501038,grad_norm: 0.9000817744733558, iteration: 83620
loss: 0.998443603515625,grad_norm: 0.9208605365716072, iteration: 83621
loss: 1.0216000080108643,grad_norm: 0.9999991878535407, iteration: 83622
loss: 1.0336254835128784,grad_norm: 0.999999316394214, iteration: 83623
loss: 0.9997573494911194,grad_norm: 0.9999994718332285, iteration: 83624
loss: 0.9817944765090942,grad_norm: 0.8868926210429013, iteration: 83625
loss: 1.0359209775924683,grad_norm: 0.9176451920476845, iteration: 83626
loss: 1.0136711597442627,grad_norm: 0.9999997344824482, iteration: 83627
loss: 0.9651727676391602,grad_norm: 0.9264772834798619, iteration: 83628
loss: 0.9861247539520264,grad_norm: 0.9134139729627714, iteration: 83629
loss: 1.0372766256332397,grad_norm: 0.9999993638880713, iteration: 83630
loss: 0.9773015379905701,grad_norm: 0.8729005811605974, iteration: 83631
loss: 0.9823747873306274,grad_norm: 0.9999993887258475, iteration: 83632
loss: 1.0399606227874756,grad_norm: 0.9999998931560778, iteration: 83633
loss: 1.020521640777588,grad_norm: 0.9999995054703469, iteration: 83634
loss: 0.980876088142395,grad_norm: 0.9999989989241408, iteration: 83635
loss: 1.0033409595489502,grad_norm: 0.9999991016653096, iteration: 83636
loss: 0.9867860674858093,grad_norm: 0.9999991117743448, iteration: 83637
loss: 1.0218688249588013,grad_norm: 0.9999992037171256, iteration: 83638
loss: 1.007435917854309,grad_norm: 0.9734721052467866, iteration: 83639
loss: 1.049484133720398,grad_norm: 0.8840413033177634, iteration: 83640
loss: 1.005874752998352,grad_norm: 0.9898839149570373, iteration: 83641
loss: 1.0062958002090454,grad_norm: 0.9999993190871956, iteration: 83642
loss: 1.0158867835998535,grad_norm: 0.9912713361288755, iteration: 83643
loss: 0.987829864025116,grad_norm: 0.9999989781165175, iteration: 83644
loss: 1.0339162349700928,grad_norm: 0.999075932325379, iteration: 83645
loss: 1.0049796104431152,grad_norm: 0.7552600526649961, iteration: 83646
loss: 1.035488247871399,grad_norm: 0.9999993761697872, iteration: 83647
loss: 1.0014264583587646,grad_norm: 0.9189431245891386, iteration: 83648
loss: 1.0077930688858032,grad_norm: 0.9175434389104815, iteration: 83649
loss: 1.0295701026916504,grad_norm: 0.8673275133768648, iteration: 83650
loss: 1.0174328088760376,grad_norm: 0.9786690333858463, iteration: 83651
loss: 0.9723119735717773,grad_norm: 0.9999990955509781, iteration: 83652
loss: 0.9844983220100403,grad_norm: 0.9956430186913356, iteration: 83653
loss: 1.0265426635742188,grad_norm: 0.9999999452861287, iteration: 83654
loss: 1.043166160583496,grad_norm: 0.9999993473358283, iteration: 83655
loss: 1.0539826154708862,grad_norm: 0.9999998412857203, iteration: 83656
loss: 1.0291948318481445,grad_norm: 0.9999991632750952, iteration: 83657
loss: 0.9750774502754211,grad_norm: 0.867065277293846, iteration: 83658
loss: 0.9902740716934204,grad_norm: 0.9999990305199021, iteration: 83659
loss: 1.018739104270935,grad_norm: 0.9999994375094916, iteration: 83660
loss: 1.0207304954528809,grad_norm: 0.9999990718290895, iteration: 83661
loss: 1.000421404838562,grad_norm: 0.8815011102407364, iteration: 83662
loss: 1.042453408241272,grad_norm: 0.9999991961958161, iteration: 83663
loss: 1.016767978668213,grad_norm: 0.9999989795041959, iteration: 83664
loss: 1.0225279331207275,grad_norm: 0.9999990337730096, iteration: 83665
loss: 1.0151463747024536,grad_norm: 0.8973828205672176, iteration: 83666
loss: 0.9818669557571411,grad_norm: 0.9999991127542368, iteration: 83667
loss: 1.0043461322784424,grad_norm: 0.9770753073962923, iteration: 83668
loss: 1.0234856605529785,grad_norm: 0.749982655460452, iteration: 83669
loss: 0.9942775964736938,grad_norm: 0.9540185816284384, iteration: 83670
loss: 1.050031065940857,grad_norm: 0.9999991582598524, iteration: 83671
loss: 1.2229275703430176,grad_norm: 0.9999996687603663, iteration: 83672
loss: 0.9731655120849609,grad_norm: 0.9999991032715998, iteration: 83673
loss: 1.0689517259597778,grad_norm: 0.9999992341921479, iteration: 83674
loss: 0.9830798506736755,grad_norm: 0.9999990659853116, iteration: 83675
loss: 0.9907103776931763,grad_norm: 0.866380023903173, iteration: 83676
loss: 0.9874797463417053,grad_norm: 0.9922824366628389, iteration: 83677
loss: 1.0331463813781738,grad_norm: 0.9999993853543901, iteration: 83678
loss: 0.9822230339050293,grad_norm: 0.7805335059349127, iteration: 83679
loss: 1.0410659313201904,grad_norm: 0.9999991644631325, iteration: 83680
loss: 0.995137095451355,grad_norm: 0.9999989475972877, iteration: 83681
loss: 1.0276744365692139,grad_norm: 0.9999996706258985, iteration: 83682
loss: 1.0094454288482666,grad_norm: 0.9886443795705928, iteration: 83683
loss: 0.9988964796066284,grad_norm: 0.9870534431466271, iteration: 83684
loss: 1.031037449836731,grad_norm: 0.9999992044579321, iteration: 83685
loss: 1.0077258348464966,grad_norm: 0.9999992693281666, iteration: 83686
loss: 1.029678225517273,grad_norm: 0.9999992630012721, iteration: 83687
loss: 1.0759238004684448,grad_norm: 0.8630382563816843, iteration: 83688
loss: 1.0015193223953247,grad_norm: 0.8901800321050493, iteration: 83689
loss: 0.9990932941436768,grad_norm: 0.9337849362888773, iteration: 83690
loss: 0.9960246682167053,grad_norm: 0.9999989799630682, iteration: 83691
loss: 0.9357388019561768,grad_norm: 0.9999990365456795, iteration: 83692
loss: 1.0806124210357666,grad_norm: 0.9999990803701907, iteration: 83693
loss: 1.0400035381317139,grad_norm: 0.950939456244614, iteration: 83694
loss: 1.0245496034622192,grad_norm: 0.9999995441575518, iteration: 83695
loss: 1.0639578104019165,grad_norm: 0.9999998509966619, iteration: 83696
loss: 1.015162467956543,grad_norm: 0.9999991148654317, iteration: 83697
loss: 1.0060946941375732,grad_norm: 0.9604841580238642, iteration: 83698
loss: 1.053560495376587,grad_norm: 0.9999992213936503, iteration: 83699
loss: 1.050569772720337,grad_norm: 0.9999994775990562, iteration: 83700
loss: 1.021101713180542,grad_norm: 0.9694605212097913, iteration: 83701
loss: 1.0084933042526245,grad_norm: 0.8642113687800865, iteration: 83702
loss: 1.0116645097732544,grad_norm: 0.9999997175656924, iteration: 83703
loss: 0.995245635509491,grad_norm: 0.9253654141348944, iteration: 83704
loss: 1.0396742820739746,grad_norm: 0.9999999720421855, iteration: 83705
loss: 0.9852080345153809,grad_norm: 0.8822527017393612, iteration: 83706
loss: 0.9801985025405884,grad_norm: 0.8649045897185731, iteration: 83707
loss: 0.9941609501838684,grad_norm: 0.9341095554626427, iteration: 83708
loss: 1.0035613775253296,grad_norm: 0.9292853947344725, iteration: 83709
loss: 1.0072283744812012,grad_norm: 0.8384418802290118, iteration: 83710
loss: 0.9995949864387512,grad_norm: 0.8116945487757916, iteration: 83711
loss: 1.0757824182510376,grad_norm: 0.9999994891906901, iteration: 83712
loss: 1.0329328775405884,grad_norm: 0.9999998858718381, iteration: 83713
loss: 1.2509177923202515,grad_norm: 0.9999998283031016, iteration: 83714
loss: 1.0218762159347534,grad_norm: 0.9999991163777261, iteration: 83715
loss: 1.0132898092269897,grad_norm: 0.9224035388243843, iteration: 83716
loss: 1.0220177173614502,grad_norm: 0.9999993357302116, iteration: 83717
loss: 1.034903883934021,grad_norm: 0.9999992377031829, iteration: 83718
loss: 1.0424610376358032,grad_norm: 0.99999906310993, iteration: 83719
loss: 0.9928941130638123,grad_norm: 0.9583452519800896, iteration: 83720
loss: 1.0065042972564697,grad_norm: 0.9999998656728483, iteration: 83721
loss: 0.98736971616745,grad_norm: 0.9999991103047543, iteration: 83722
loss: 1.0072976350784302,grad_norm: 0.8252607821471616, iteration: 83723
loss: 1.116574764251709,grad_norm: 0.9999992627794515, iteration: 83724
loss: 0.9783629775047302,grad_norm: 0.9171161445100948, iteration: 83725
loss: 1.3771886825561523,grad_norm: 0.9999998389697297, iteration: 83726
loss: 1.2782366275787354,grad_norm: 0.9999999000282787, iteration: 83727
loss: 1.042594313621521,grad_norm: 0.9999991523669117, iteration: 83728
loss: 1.0000338554382324,grad_norm: 0.9999997916452529, iteration: 83729
loss: 0.9749017357826233,grad_norm: 0.7410712356619413, iteration: 83730
loss: 0.9904125928878784,grad_norm: 0.9999991654919085, iteration: 83731
loss: 1.0389251708984375,grad_norm: 0.9999992285963266, iteration: 83732
loss: 1.046514868736267,grad_norm: 0.9999999128099045, iteration: 83733
loss: 1.1591596603393555,grad_norm: 0.9999997603615897, iteration: 83734
loss: 1.020984172821045,grad_norm: 0.9999992314789994, iteration: 83735
loss: 1.0229768753051758,grad_norm: 0.9178033825075519, iteration: 83736
loss: 1.1054816246032715,grad_norm: 0.999999382227866, iteration: 83737
loss: 0.9933260083198547,grad_norm: 0.7929637813830173, iteration: 83738
loss: 1.0634148120880127,grad_norm: 0.9999992328237206, iteration: 83739
loss: 1.022113561630249,grad_norm: 0.9424553949214665, iteration: 83740
loss: 1.0950590372085571,grad_norm: 0.999999336503276, iteration: 83741
loss: 1.2365951538085938,grad_norm: 0.9999998674942394, iteration: 83742
loss: 1.2478848695755005,grad_norm: 0.9999999012199872, iteration: 83743
loss: 1.0463014841079712,grad_norm: 0.999999100644529, iteration: 83744
loss: 1.1397156715393066,grad_norm: 0.9999998115678286, iteration: 83745
loss: 1.1719146966934204,grad_norm: 0.9999996857858013, iteration: 83746
loss: 1.3733587265014648,grad_norm: 0.9999995470338442, iteration: 83747
loss: 1.1743340492248535,grad_norm: 0.9999994894772244, iteration: 83748
loss: 1.005608320236206,grad_norm: 0.7845202460936314, iteration: 83749
loss: 1.3201464414596558,grad_norm: 0.9999998530250369, iteration: 83750
loss: 1.3865387439727783,grad_norm: 0.9999999692193059, iteration: 83751
loss: 1.3518832921981812,grad_norm: 0.999999899373595, iteration: 83752
loss: 1.1707050800323486,grad_norm: 0.9999999401562507, iteration: 83753
loss: 1.4315537214279175,grad_norm: 0.9999999596922742, iteration: 83754
loss: 1.2127786874771118,grad_norm: 0.9999999381653099, iteration: 83755
loss: 1.162221074104309,grad_norm: 0.9999999428103227, iteration: 83756
loss: 1.312535047531128,grad_norm: 0.9999998836495495, iteration: 83757
loss: 1.0496658086776733,grad_norm: 0.9999992209797504, iteration: 83758
loss: 1.0079927444458008,grad_norm: 0.99999929866358, iteration: 83759
loss: 1.237196683883667,grad_norm: 0.999999671943447, iteration: 83760
loss: 1.002211570739746,grad_norm: 0.938014324401802, iteration: 83761
loss: 1.3682039976119995,grad_norm: 0.9999998048190957, iteration: 83762
loss: 1.1681333780288696,grad_norm: 0.9999996093519553, iteration: 83763
loss: 1.1266409158706665,grad_norm: 0.9999993204951385, iteration: 83764
loss: 1.145041584968567,grad_norm: 0.9999997752486366, iteration: 83765
loss: 1.1613415479660034,grad_norm: 0.9999999554621188, iteration: 83766
loss: 1.0761977434158325,grad_norm: 0.9198567381183482, iteration: 83767
loss: 1.1165764331817627,grad_norm: 0.9999998333521498, iteration: 83768
loss: 1.0399973392486572,grad_norm: 0.9999990446119854, iteration: 83769
loss: 1.010050892829895,grad_norm: 0.9999990801521942, iteration: 83770
loss: 0.9934773445129395,grad_norm: 0.9999995899173204, iteration: 83771
loss: 1.1014693975448608,grad_norm: 0.9999995599386973, iteration: 83772
loss: 1.034320592880249,grad_norm: 0.8545947344541374, iteration: 83773
loss: 1.0094558000564575,grad_norm: 0.7910703445024736, iteration: 83774
loss: 1.0306938886642456,grad_norm: 0.8792706787155548, iteration: 83775
loss: 1.0778884887695312,grad_norm: 0.9999991506258483, iteration: 83776
loss: 1.0666069984436035,grad_norm: 0.9999993170303821, iteration: 83777
loss: 0.9988152384757996,grad_norm: 0.9999992064973825, iteration: 83778
loss: 1.1399080753326416,grad_norm: 0.9999998159961108, iteration: 83779
loss: 1.0736322402954102,grad_norm: 0.9999995611598477, iteration: 83780
loss: 0.9804645776748657,grad_norm: 0.9327882439035722, iteration: 83781
loss: 0.980131983757019,grad_norm: 0.9187397728675749, iteration: 83782
loss: 1.018304467201233,grad_norm: 0.9999990382904197, iteration: 83783
loss: 1.1059705018997192,grad_norm: 0.999999504805239, iteration: 83784
loss: 1.0745824575424194,grad_norm: 0.999999481133909, iteration: 83785
loss: 1.042094111442566,grad_norm: 0.9999991041592118, iteration: 83786
loss: 1.0905331373214722,grad_norm: 0.9999998569277091, iteration: 83787
loss: 1.0135085582733154,grad_norm: 1.0000000061957448, iteration: 83788
loss: 1.2918992042541504,grad_norm: 0.9999998321108177, iteration: 83789
loss: 1.1832544803619385,grad_norm: 0.9999997287019252, iteration: 83790
loss: 1.1617982387542725,grad_norm: 0.9999998174616566, iteration: 83791
loss: 0.9757527112960815,grad_norm: 0.9380775817787121, iteration: 83792
loss: 1.1411621570587158,grad_norm: 0.9999995969360819, iteration: 83793
loss: 1.1633142232894897,grad_norm: 0.9999996079931739, iteration: 83794
loss: 1.2740569114685059,grad_norm: 0.999999954664676, iteration: 83795
loss: 1.2571932077407837,grad_norm: 0.9999996588134442, iteration: 83796
loss: 1.144301176071167,grad_norm: 0.9999997284359352, iteration: 83797
loss: 1.1280654668807983,grad_norm: 0.9999998916992424, iteration: 83798
loss: 1.1736805438995361,grad_norm: 0.9999993638033017, iteration: 83799
loss: 1.3119412660598755,grad_norm: 0.9999997618357571, iteration: 83800
loss: 1.3781782388687134,grad_norm: 0.9999999552359078, iteration: 83801
loss: 1.1209502220153809,grad_norm: 1.000000025202849, iteration: 83802
loss: 1.4165046215057373,grad_norm: 0.9999999504547498, iteration: 83803
loss: 1.6673640012741089,grad_norm: 0.9999999828123765, iteration: 83804
loss: 1.6420047283172607,grad_norm: 1.0000001149372721, iteration: 83805
loss: 1.285090446472168,grad_norm: 0.999999930385368, iteration: 83806
loss: 1.4944206476211548,grad_norm: 0.9999998317154479, iteration: 83807
loss: 1.4733245372772217,grad_norm: 0.9999999095387828, iteration: 83808
loss: 1.7075828313827515,grad_norm: 0.999999729851143, iteration: 83809
loss: 1.653652548789978,grad_norm: 0.9999998874166429, iteration: 83810
loss: 1.8647438287734985,grad_norm: 0.9999999163029135, iteration: 83811
loss: 2.10689115524292,grad_norm: 1.0000000412906835, iteration: 83812
loss: 2.163691997528076,grad_norm: 0.9999998884067541, iteration: 83813
loss: 1.9066963195800781,grad_norm: 0.9999999391003369, iteration: 83814
loss: 2.101856231689453,grad_norm: 1.0000000874032569, iteration: 83815
loss: 1.747225046157837,grad_norm: 0.9999999112004854, iteration: 83816
loss: 1.6873723268508911,grad_norm: 1.00000003121447, iteration: 83817
loss: 1.4958044290542603,grad_norm: 0.9999999019803942, iteration: 83818
loss: 1.703012228012085,grad_norm: 1.0000000049999227, iteration: 83819
loss: 1.5704381465911865,grad_norm: 0.9999999581559472, iteration: 83820
loss: 1.371172547340393,grad_norm: 0.9999998643586485, iteration: 83821
loss: 2.0657389163970947,grad_norm: 0.9999999824743557, iteration: 83822
loss: 2.1089084148406982,grad_norm: 0.9999999664954463, iteration: 83823
loss: 1.5549712181091309,grad_norm: 0.9999998234692896, iteration: 83824
loss: 1.4418033361434937,grad_norm: 0.9999998177858453, iteration: 83825
loss: 1.6042265892028809,grad_norm: 1.0000000177165687, iteration: 83826
loss: 1.3219890594482422,grad_norm: 0.9999996046283984, iteration: 83827
loss: 1.3009499311447144,grad_norm: 0.9999999286810912, iteration: 83828
loss: 1.572515845298767,grad_norm: 0.9999998898269935, iteration: 83829
loss: 1.4688525199890137,grad_norm: 0.9999999637177882, iteration: 83830
loss: 1.7528471946716309,grad_norm: 0.999999888407712, iteration: 83831
loss: 1.268164873123169,grad_norm: 0.9999998845529875, iteration: 83832
loss: 1.5461283922195435,grad_norm: 0.999999918469516, iteration: 83833
loss: 1.6557130813598633,grad_norm: 0.9999999807423173, iteration: 83834
loss: 1.572787880897522,grad_norm: 0.9999998845278772, iteration: 83835
loss: 1.1847105026245117,grad_norm: 0.9999996004472704, iteration: 83836
loss: 1.2107430696487427,grad_norm: 0.9999998461987379, iteration: 83837
loss: 1.627156376838684,grad_norm: 0.9999998997846755, iteration: 83838
loss: 1.4985312223434448,grad_norm: 0.9999999791526624, iteration: 83839
loss: 1.09385085105896,grad_norm: 0.9999998191833421, iteration: 83840
loss: 1.3567262887954712,grad_norm: 0.9999999069424118, iteration: 83841
loss: 1.5365135669708252,grad_norm: 0.9999998818462159, iteration: 83842
loss: 1.2609405517578125,grad_norm: 0.9999998164517541, iteration: 83843
loss: 1.416459083557129,grad_norm: 0.9999999918848425, iteration: 83844
loss: 1.2692151069641113,grad_norm: 0.99999989436013, iteration: 83845
loss: 1.2258567810058594,grad_norm: 0.9999997070921194, iteration: 83846
loss: 1.2395153045654297,grad_norm: 0.9999996135425285, iteration: 83847
loss: 1.1695265769958496,grad_norm: 0.9999999925571005, iteration: 83848
loss: 1.3446859121322632,grad_norm: 0.9999999282921551, iteration: 83849
loss: 1.1444844007492065,grad_norm: 0.9999998919784483, iteration: 83850
loss: 1.4071907997131348,grad_norm: 0.9999996955627374, iteration: 83851
loss: 1.117294192314148,grad_norm: 0.9999996531444394, iteration: 83852
loss: 1.0907177925109863,grad_norm: 0.9999994465108745, iteration: 83853
loss: 1.2283414602279663,grad_norm: 0.9999996633037331, iteration: 83854
loss: 1.2721141576766968,grad_norm: 0.9999997858459755, iteration: 83855
loss: 1.2284950017929077,grad_norm: 0.999999898990712, iteration: 83856
loss: 1.0560733079910278,grad_norm: 0.9999995183701416, iteration: 83857
loss: 1.196642279624939,grad_norm: 0.9999997515277066, iteration: 83858
loss: 1.0875111818313599,grad_norm: 0.9999999812707219, iteration: 83859
loss: 1.149370789527893,grad_norm: 0.9999997666840427, iteration: 83860
loss: 1.1686203479766846,grad_norm: 0.9999995303672917, iteration: 83861
loss: 1.1280664205551147,grad_norm: 0.9999999858306527, iteration: 83862
loss: 1.101866602897644,grad_norm: 0.9999992470738504, iteration: 83863
loss: 1.0735149383544922,grad_norm: 0.9999999246213285, iteration: 83864
loss: 1.2606213092803955,grad_norm: 0.9999998909567, iteration: 83865
loss: 1.1872649192810059,grad_norm: 0.9999995945362352, iteration: 83866
loss: 0.9813494682312012,grad_norm: 0.9999992316046993, iteration: 83867
loss: 1.1106351613998413,grad_norm: 0.9999992099750986, iteration: 83868
loss: 1.1379832029342651,grad_norm: 0.999999704191137, iteration: 83869
loss: 1.0051143169403076,grad_norm: 0.9999992315341931, iteration: 83870
loss: 1.063807725906372,grad_norm: 0.9999998309556587, iteration: 83871
loss: 1.1663881540298462,grad_norm: 0.9999995101870797, iteration: 83872
loss: 0.9814021587371826,grad_norm: 0.9999990447028284, iteration: 83873
loss: 1.2112256288528442,grad_norm: 0.9999999484286962, iteration: 83874
loss: 1.177167296409607,grad_norm: 0.9999997396887228, iteration: 83875
loss: 1.1183528900146484,grad_norm: 0.9999999800087257, iteration: 83876
loss: 1.1297494173049927,grad_norm: 0.9999998593713446, iteration: 83877
loss: 1.0097911357879639,grad_norm: 0.9999996831425887, iteration: 83878
loss: 1.0579514503479004,grad_norm: 0.9999992696531153, iteration: 83879
loss: 1.210405945777893,grad_norm: 0.9999998221409384, iteration: 83880
loss: 0.999784529209137,grad_norm: 0.872590876499074, iteration: 83881
loss: 1.0795173645019531,grad_norm: 0.9999998124608999, iteration: 83882
loss: 1.1069532632827759,grad_norm: 0.9999990637172164, iteration: 83883
loss: 0.9889165163040161,grad_norm: 0.9421964233226157, iteration: 83884
loss: 0.9871960282325745,grad_norm: 0.9999995982758362, iteration: 83885
loss: 1.1020828485488892,grad_norm: 0.9999995654462043, iteration: 83886
loss: 1.0815949440002441,grad_norm: 0.9999996548499849, iteration: 83887
loss: 1.4282112121582031,grad_norm: 0.9999999089450279, iteration: 83888
loss: 1.2314584255218506,grad_norm: 0.9999998324467834, iteration: 83889
loss: 1.0533148050308228,grad_norm: 0.9999996365577666, iteration: 83890
loss: 1.0777133703231812,grad_norm: 0.9999991994275081, iteration: 83891
loss: 1.1149383783340454,grad_norm: 0.9999994861160745, iteration: 83892
loss: 1.0190198421478271,grad_norm: 0.9999991515170611, iteration: 83893
loss: 1.0811717510223389,grad_norm: 0.9999991940418101, iteration: 83894
loss: 0.9802728295326233,grad_norm: 0.9999997599744737, iteration: 83895
loss: 1.0413507223129272,grad_norm: 0.9999996160726445, iteration: 83896
loss: 1.0729961395263672,grad_norm: 0.9999990963107641, iteration: 83897
loss: 1.0782526731491089,grad_norm: 0.9999990843931554, iteration: 83898
loss: 1.0122758150100708,grad_norm: 0.9999996187998165, iteration: 83899
loss: 1.0557390451431274,grad_norm: 0.9999990799798707, iteration: 83900
loss: 0.9956765174865723,grad_norm: 0.9999998958204774, iteration: 83901
loss: 1.023164987564087,grad_norm: 0.8874061586344719, iteration: 83902
loss: 1.0608056783676147,grad_norm: 0.9999996923225524, iteration: 83903
loss: 1.0658353567123413,grad_norm: 0.9999997094545441, iteration: 83904
loss: 0.9932000637054443,grad_norm: 0.9999997898703428, iteration: 83905
loss: 1.0487525463104248,grad_norm: 0.999999624025749, iteration: 83906
loss: 1.1318408250808716,grad_norm: 0.9999999160309129, iteration: 83907
loss: 1.1028172969818115,grad_norm: 0.9999996291829812, iteration: 83908
loss: 1.1088052988052368,grad_norm: 0.9999998981086227, iteration: 83909
loss: 1.0321635007858276,grad_norm: 0.9999996716150774, iteration: 83910
loss: 1.098498821258545,grad_norm: 0.9999998528196264, iteration: 83911
loss: 1.019781231880188,grad_norm: 0.999999487294044, iteration: 83912
loss: 1.077744722366333,grad_norm: 0.9999996209700311, iteration: 83913
loss: 1.104118824005127,grad_norm: 0.9999995419465351, iteration: 83914
loss: 1.0371514558792114,grad_norm: 0.9999992519666198, iteration: 83915
loss: 1.0813038349151611,grad_norm: 0.9999991404602787, iteration: 83916
loss: 1.0310680866241455,grad_norm: 0.9999993712078477, iteration: 83917
loss: 1.0780922174453735,grad_norm: 0.9999990952826471, iteration: 83918
loss: 1.043088674545288,grad_norm: 0.9999997345894878, iteration: 83919
loss: 1.0377355813980103,grad_norm: 0.999999847604604, iteration: 83920
loss: 1.0284936428070068,grad_norm: 0.999999155406583, iteration: 83921
loss: 1.1426324844360352,grad_norm: 0.9999998770841139, iteration: 83922
loss: 1.214115858078003,grad_norm: 0.9999996390056264, iteration: 83923
loss: 1.000087857246399,grad_norm: 0.9999994007953008, iteration: 83924
loss: 1.0082476139068604,grad_norm: 0.9339024128188015, iteration: 83925
loss: 1.2758097648620605,grad_norm: 0.9999998553767412, iteration: 83926
loss: 1.0726280212402344,grad_norm: 0.9999993521982616, iteration: 83927
loss: 1.119320273399353,grad_norm: 0.9999992641771845, iteration: 83928
loss: 1.090408444404602,grad_norm: 0.9999996436741266, iteration: 83929
loss: 1.023120403289795,grad_norm: 0.9999993242398015, iteration: 83930
loss: 1.122153878211975,grad_norm: 0.9999997129494242, iteration: 83931
loss: 1.3000532388687134,grad_norm: 0.9999996581226324, iteration: 83932
loss: 1.11978018283844,grad_norm: 0.9999995740905426, iteration: 83933
loss: 1.040587306022644,grad_norm: 0.9999993131543994, iteration: 83934
loss: 1.0082226991653442,grad_norm: 0.9776121539057177, iteration: 83935
loss: 1.022588849067688,grad_norm: 0.9999991390604467, iteration: 83936
loss: 1.054671049118042,grad_norm: 0.9999990414780131, iteration: 83937
loss: 1.004122018814087,grad_norm: 0.9999993485357542, iteration: 83938
loss: 1.0729161500930786,grad_norm: 0.9999995563808025, iteration: 83939
loss: 1.12876296043396,grad_norm: 0.9999997212562217, iteration: 83940
loss: 1.0958622694015503,grad_norm: 0.9999995258973946, iteration: 83941
loss: 1.1771886348724365,grad_norm: 0.9999997570704093, iteration: 83942
loss: 1.0434807538986206,grad_norm: 0.999999418692647, iteration: 83943
loss: 1.0021404027938843,grad_norm: 0.9083730984243119, iteration: 83944
loss: 1.01500403881073,grad_norm: 0.9999999089431266, iteration: 83945
loss: 1.0260310173034668,grad_norm: 0.8848397778907949, iteration: 83946
loss: 1.1523420810699463,grad_norm: 0.9999992149104485, iteration: 83947
loss: 1.0394582748413086,grad_norm: 0.9999991893528594, iteration: 83948
loss: 0.9774200320243835,grad_norm: 0.9999998373346523, iteration: 83949
loss: 0.9893385171890259,grad_norm: 0.9999997350676735, iteration: 83950
loss: 0.9968594908714294,grad_norm: 0.7023032996529367, iteration: 83951
loss: 0.995429515838623,grad_norm: 0.9999990705232259, iteration: 83952
loss: 1.0462698936462402,grad_norm: 0.9999993477067072, iteration: 83953
loss: 1.0422892570495605,grad_norm: 0.9999994352141708, iteration: 83954
loss: 0.9919209480285645,grad_norm: 0.8117032348075567, iteration: 83955
loss: 1.0054237842559814,grad_norm: 0.9999990564594187, iteration: 83956
loss: 1.04594886302948,grad_norm: 0.999999182531327, iteration: 83957
loss: 0.9926223754882812,grad_norm: 0.9999991097001947, iteration: 83958
loss: 1.1393989324569702,grad_norm: 0.9999994409504253, iteration: 83959
loss: 1.1752852201461792,grad_norm: 0.9999999109053034, iteration: 83960
loss: 1.0829319953918457,grad_norm: 0.9999993930525879, iteration: 83961
loss: 0.9890075922012329,grad_norm: 0.9999990344067932, iteration: 83962
loss: 1.0287567377090454,grad_norm: 0.8428172550613983, iteration: 83963
loss: 1.1779600381851196,grad_norm: 0.9999993075534638, iteration: 83964
loss: 1.0543208122253418,grad_norm: 1.0000000498718284, iteration: 83965
loss: 1.037832498550415,grad_norm: 0.9999995891595781, iteration: 83966
loss: 1.2454532384872437,grad_norm: 0.9999998273210836, iteration: 83967
loss: 1.0859688520431519,grad_norm: 0.9999993473331573, iteration: 83968
loss: 1.268488883972168,grad_norm: 0.9999997911532592, iteration: 83969
loss: 1.0814512968063354,grad_norm: 0.9999996184689989, iteration: 83970
loss: 1.177240252494812,grad_norm: 0.9999996364345756, iteration: 83971
loss: 1.7448701858520508,grad_norm: 0.9999997926716577, iteration: 83972
loss: 1.0902365446090698,grad_norm: 0.9999994542152932, iteration: 83973
loss: 1.1585967540740967,grad_norm: 1.00000006681551, iteration: 83974
loss: 1.188245415687561,grad_norm: 0.9999992745189479, iteration: 83975
loss: 1.0864676237106323,grad_norm: 0.9999996505838021, iteration: 83976
loss: 1.1550683975219727,grad_norm: 0.999999575891508, iteration: 83977
loss: 1.0265170335769653,grad_norm: 0.9197097838502561, iteration: 83978
loss: 1.1030606031417847,grad_norm: 0.9999992039866652, iteration: 83979
loss: 1.0155653953552246,grad_norm: 0.9999991888246323, iteration: 83980
loss: 1.015900731086731,grad_norm: 0.9999998698201756, iteration: 83981
loss: 1.0262796878814697,grad_norm: 0.9153139162588768, iteration: 83982
loss: 1.1039518117904663,grad_norm: 0.9999997868140279, iteration: 83983
loss: 0.9961917400360107,grad_norm: 0.9999999896054194, iteration: 83984
loss: 1.0371206998825073,grad_norm: 0.9866261763861268, iteration: 83985
loss: 1.0836745500564575,grad_norm: 0.9999993524118377, iteration: 83986
loss: 1.0982787609100342,grad_norm: 0.9999998958826791, iteration: 83987
loss: 1.1086829900741577,grad_norm: 0.9999996393446917, iteration: 83988
loss: 1.3866963386535645,grad_norm: 0.9999995558522592, iteration: 83989
loss: 1.0187530517578125,grad_norm: 0.9999991264617457, iteration: 83990
loss: 1.0286895036697388,grad_norm: 0.9999990601702553, iteration: 83991
loss: 1.0998600721359253,grad_norm: 0.9999989922128288, iteration: 83992
loss: 1.067179560661316,grad_norm: 0.9999999613051713, iteration: 83993
loss: 1.028000831604004,grad_norm: 0.9999991962567868, iteration: 83994
loss: 1.0670723915100098,grad_norm: 0.9110080869690755, iteration: 83995
loss: 1.034101963043213,grad_norm: 0.9999997005706212, iteration: 83996
loss: 1.0246831178665161,grad_norm: 0.9999998204170963, iteration: 83997
loss: 1.0051873922348022,grad_norm: 0.9999991341304543, iteration: 83998
loss: 1.0722829103469849,grad_norm: 0.8953248142628575, iteration: 83999
loss: 1.0713926553726196,grad_norm: 0.9999991858515097, iteration: 84000
loss: 1.0000286102294922,grad_norm: 0.999999168121595, iteration: 84001
loss: 0.9942446351051331,grad_norm: 0.9999990490258277, iteration: 84002
loss: 1.1770399808883667,grad_norm: 0.9999995312009843, iteration: 84003
loss: 1.0437489748001099,grad_norm: 0.9685266444991392, iteration: 84004
loss: 1.3184658288955688,grad_norm: 0.9999997250414756, iteration: 84005
loss: 1.0920934677124023,grad_norm: 0.9999992108744598, iteration: 84006
loss: 1.1146107912063599,grad_norm: 0.9999996740681735, iteration: 84007
loss: 1.140990138053894,grad_norm: 0.9999995338196589, iteration: 84008
loss: 1.1703407764434814,grad_norm: 0.9999996922893855, iteration: 84009
loss: 1.0086020231246948,grad_norm: 0.9999990371761959, iteration: 84010
loss: 0.9697891473770142,grad_norm: 0.9485580325950335, iteration: 84011
loss: 1.106562614440918,grad_norm: 0.9999994811276999, iteration: 84012
loss: 1.1410504579544067,grad_norm: 0.9999993155362602, iteration: 84013
loss: 0.9997428059577942,grad_norm: 0.9999991464348537, iteration: 84014
loss: 1.1383074522018433,grad_norm: 0.9999996055402208, iteration: 84015
loss: 1.0357365608215332,grad_norm: 0.9999992788797983, iteration: 84016
loss: 1.1849061250686646,grad_norm: 0.9999997153954808, iteration: 84017
loss: 1.0837996006011963,grad_norm: 0.9999998991805542, iteration: 84018
loss: 1.0725995302200317,grad_norm: 0.9999998299695413, iteration: 84019
loss: 1.0227710008621216,grad_norm: 0.9999990768230445, iteration: 84020
loss: 1.2758411169052124,grad_norm: 0.9999998673399584, iteration: 84021
loss: 1.0071730613708496,grad_norm: 0.9999992200772334, iteration: 84022
loss: 1.075790286064148,grad_norm: 0.9999996321546026, iteration: 84023
loss: 1.009791374206543,grad_norm: 0.9999992762865784, iteration: 84024
loss: 1.1473981142044067,grad_norm: 0.9999999064507579, iteration: 84025
loss: 1.1404675245285034,grad_norm: 0.9999992918742748, iteration: 84026
loss: 1.2381314039230347,grad_norm: 0.9999999412432347, iteration: 84027
loss: 1.1045175790786743,grad_norm: 0.9999996740244942, iteration: 84028
loss: 1.0663855075836182,grad_norm: 0.9999992561021989, iteration: 84029
loss: 1.0366510152816772,grad_norm: 0.9999992776202441, iteration: 84030
loss: 1.0873454809188843,grad_norm: 0.9999999919187109, iteration: 84031
loss: 0.996358335018158,grad_norm: 0.9999999255465201, iteration: 84032
loss: 1.02540123462677,grad_norm: 0.9999990630569325, iteration: 84033
loss: 1.008249282836914,grad_norm: 0.9999990759257845, iteration: 84034
loss: 1.0512975454330444,grad_norm: 0.9999990612207834, iteration: 84035
loss: 1.0239609479904175,grad_norm: 0.9999994025166019, iteration: 84036
loss: 1.037943720817566,grad_norm: 0.9999998756421804, iteration: 84037
loss: 1.0560901165008545,grad_norm: 0.9999995172059816, iteration: 84038
loss: 1.0457344055175781,grad_norm: 0.999999274511616, iteration: 84039
loss: 1.0149294137954712,grad_norm: 0.999999164410016, iteration: 84040
loss: 1.0437432527542114,grad_norm: 0.9999992629666914, iteration: 84041
loss: 0.9908232092857361,grad_norm: 0.9999997704509729, iteration: 84042
loss: 0.9863734245300293,grad_norm: 0.9800833345338272, iteration: 84043
loss: 1.0225342512130737,grad_norm: 0.9999999064672699, iteration: 84044
loss: 1.0302503108978271,grad_norm: 0.9999991415705768, iteration: 84045
loss: 1.0404337644577026,grad_norm: 0.9999991984545589, iteration: 84046
loss: 1.0039725303649902,grad_norm: 0.9999991291262259, iteration: 84047
loss: 1.0013288259506226,grad_norm: 0.8532089084290103, iteration: 84048
loss: 0.9807140231132507,grad_norm: 0.9999990818229739, iteration: 84049
loss: 1.228757381439209,grad_norm: 0.9999995640333846, iteration: 84050
loss: 1.0280109643936157,grad_norm: 0.9999996181590395, iteration: 84051
loss: 1.0040453672409058,grad_norm: 0.9459073992161257, iteration: 84052
loss: 0.9937828779220581,grad_norm: 0.7424216496547741, iteration: 84053
loss: 0.9636209607124329,grad_norm: 0.9605795773793112, iteration: 84054
loss: 1.00618577003479,grad_norm: 0.999999359887219, iteration: 84055
loss: 1.0513405799865723,grad_norm: 0.9999992095021234, iteration: 84056
loss: 1.0026726722717285,grad_norm: 0.9999993341730771, iteration: 84057
loss: 1.2273445129394531,grad_norm: 0.9999996586461974, iteration: 84058
loss: 0.9948176145553589,grad_norm: 0.9201605579801716, iteration: 84059
loss: 0.9690557718276978,grad_norm: 0.9999991148036985, iteration: 84060
loss: 1.0499866008758545,grad_norm: 0.9999998857919559, iteration: 84061
loss: 1.0926038026809692,grad_norm: 0.9999996250507606, iteration: 84062
loss: 1.0415217876434326,grad_norm: 0.9999995841447358, iteration: 84063
loss: 0.9971436858177185,grad_norm: 0.8629403066623825, iteration: 84064
loss: 0.9865045547485352,grad_norm: 0.9999993929574759, iteration: 84065
loss: 1.0488189458847046,grad_norm: 0.9834901769899401, iteration: 84066
loss: 0.9715720415115356,grad_norm: 0.9999993902616623, iteration: 84067
loss: 0.9921207427978516,grad_norm: 0.9702238676341033, iteration: 84068
loss: 1.0070536136627197,grad_norm: 0.9999992340994441, iteration: 84069
loss: 1.0785903930664062,grad_norm: 0.8804992412702849, iteration: 84070
loss: 1.0492955446243286,grad_norm: 0.9999994326868817, iteration: 84071
loss: 0.987606406211853,grad_norm: 0.9999989507796369, iteration: 84072
loss: 0.9982724189758301,grad_norm: 0.9999992089272062, iteration: 84073
loss: 1.041513442993164,grad_norm: 0.9999994266209623, iteration: 84074
loss: 0.984843909740448,grad_norm: 0.9999991376811949, iteration: 84075
loss: 0.9994679093360901,grad_norm: 0.868586873871871, iteration: 84076
loss: 1.000569224357605,grad_norm: 0.9817972866449761, iteration: 84077
loss: 1.1808956861495972,grad_norm: 0.9999998856376013, iteration: 84078
loss: 1.1636788845062256,grad_norm: 0.9999996507398814, iteration: 84079
loss: 1.0614796876907349,grad_norm: 0.9999991148106573, iteration: 84080
loss: 1.0365831851959229,grad_norm: 0.7242906732991106, iteration: 84081
loss: 1.0273092985153198,grad_norm: 0.8119606612948613, iteration: 84082
loss: 1.112582802772522,grad_norm: 0.9999992793552208, iteration: 84083
loss: 0.9979026913642883,grad_norm: 0.9999990616830302, iteration: 84084
loss: 1.0325572490692139,grad_norm: 0.8554327207160914, iteration: 84085
loss: 0.985141932964325,grad_norm: 0.9999992386321263, iteration: 84086
loss: 0.998471736907959,grad_norm: 0.8131031610923374, iteration: 84087
loss: 1.0780564546585083,grad_norm: 0.9999993427832935, iteration: 84088
loss: 1.011298418045044,grad_norm: 0.999999444617957, iteration: 84089
loss: 1.0426796674728394,grad_norm: 0.9999992925227262, iteration: 84090
loss: 1.015817403793335,grad_norm: 0.8502271342667094, iteration: 84091
loss: 0.984310507774353,grad_norm: 0.846933513041259, iteration: 84092
loss: 1.0027750730514526,grad_norm: 0.9999997129504715, iteration: 84093
loss: 1.0303055047988892,grad_norm: 0.8922134156569246, iteration: 84094
loss: 1.015777587890625,grad_norm: 0.9999989590374958, iteration: 84095
loss: 0.9763830304145813,grad_norm: 0.8330257869622283, iteration: 84096
loss: 1.0643023252487183,grad_norm: 0.9669362013142743, iteration: 84097
loss: 1.0264256000518799,grad_norm: 0.999999759541472, iteration: 84098
loss: 0.9916536211967468,grad_norm: 0.7421851394383426, iteration: 84099
loss: 1.0166336297988892,grad_norm: 0.9999998357210538, iteration: 84100
loss: 1.0323737859725952,grad_norm: 0.9999999200226284, iteration: 84101
loss: 1.0376135110855103,grad_norm: 0.8138316873532077, iteration: 84102
loss: 1.0810515880584717,grad_norm: 0.9999993476896589, iteration: 84103
loss: 0.9676220417022705,grad_norm: 0.875541333655183, iteration: 84104
loss: 0.9899283647537231,grad_norm: 0.9999990770092054, iteration: 84105
loss: 0.9998725652694702,grad_norm: 0.9423371373323177, iteration: 84106
loss: 0.9697994589805603,grad_norm: 0.94639218176733, iteration: 84107
loss: 1.008880376815796,grad_norm: 0.7425354070981713, iteration: 84108
loss: 0.9976732134819031,grad_norm: 0.9453545995445596, iteration: 84109
loss: 0.9922895431518555,grad_norm: 0.8553391379858298, iteration: 84110
loss: 1.0054683685302734,grad_norm: 0.8742893032580403, iteration: 84111
loss: 1.002005696296692,grad_norm: 0.9999992314597836, iteration: 84112
loss: 1.0311253070831299,grad_norm: 0.9999995783888789, iteration: 84113
loss: 0.9911122918128967,grad_norm: 0.9999996361796528, iteration: 84114
loss: 0.9799628257751465,grad_norm: 0.9999993497844093, iteration: 84115
loss: 0.9629821181297302,grad_norm: 0.7601479531993125, iteration: 84116
loss: 1.0188119411468506,grad_norm: 0.999999069273204, iteration: 84117
loss: 1.195870041847229,grad_norm: 1.0000000313706379, iteration: 84118
loss: 1.064156413078308,grad_norm: 0.9999993371983846, iteration: 84119
loss: 0.9921194911003113,grad_norm: 0.916685558694237, iteration: 84120
loss: 1.0410046577453613,grad_norm: 0.8774740594062235, iteration: 84121
loss: 1.0576682090759277,grad_norm: 0.9153019285621905, iteration: 84122
loss: 0.9380791783332825,grad_norm: 0.8361450416865827, iteration: 84123
loss: 1.1470232009887695,grad_norm: 0.9999992658012089, iteration: 84124
loss: 1.0954160690307617,grad_norm: 0.9999998430867724, iteration: 84125
loss: 0.9996578693389893,grad_norm: 0.9999996059944446, iteration: 84126
loss: 1.0273263454437256,grad_norm: 0.9999991672992189, iteration: 84127
loss: 1.0115869045257568,grad_norm: 0.9999989513466437, iteration: 84128
loss: 0.9701999425888062,grad_norm: 0.8675950542560964, iteration: 84129
loss: 1.062119722366333,grad_norm: 0.9999995353046048, iteration: 84130
loss: 0.9960747957229614,grad_norm: 0.9578342907229119, iteration: 84131
loss: 1.0075604915618896,grad_norm: 0.9999990279747928, iteration: 84132
loss: 1.1327415704727173,grad_norm: 0.9999991220691543, iteration: 84133
loss: 1.0506923198699951,grad_norm: 0.9999997614292258, iteration: 84134
loss: 0.984923243522644,grad_norm: 0.9999992634035457, iteration: 84135
loss: 1.0418477058410645,grad_norm: 0.9999991623973375, iteration: 84136
loss: 0.9684443473815918,grad_norm: 0.9999991192903955, iteration: 84137
loss: 0.9827717542648315,grad_norm: 0.8742329948491562, iteration: 84138
loss: 1.1746196746826172,grad_norm: 0.999999942459934, iteration: 84139
loss: 1.0093735456466675,grad_norm: 0.9618367854649408, iteration: 84140
loss: 0.9945149421691895,grad_norm: 0.9655965169155314, iteration: 84141
loss: 1.0115171670913696,grad_norm: 0.9999989872930045, iteration: 84142
loss: 1.264618158340454,grad_norm: 0.9999998342255959, iteration: 84143
loss: 1.2532014846801758,grad_norm: 0.9999996864650089, iteration: 84144
loss: 1.1806610822677612,grad_norm: 0.9999993979384943, iteration: 84145
loss: 1.1075936555862427,grad_norm: 0.9999990677227817, iteration: 84146
loss: 1.0117225646972656,grad_norm: 0.879017702442152, iteration: 84147
loss: 1.0355637073516846,grad_norm: 0.9999997412433166, iteration: 84148
loss: 1.0263350009918213,grad_norm: 0.9999992271649045, iteration: 84149
loss: 0.9912657141685486,grad_norm: 0.9999990427455112, iteration: 84150
loss: 1.0262950658798218,grad_norm: 0.9999991729202669, iteration: 84151
loss: 1.1497727632522583,grad_norm: 0.9999990841518377, iteration: 84152
loss: 1.0422775745391846,grad_norm: 0.8660716788089629, iteration: 84153
loss: 1.0195488929748535,grad_norm: 0.999999225106296, iteration: 84154
loss: 1.0560801029205322,grad_norm: 0.9999990872020827, iteration: 84155
loss: 1.0601816177368164,grad_norm: 0.9999991249381524, iteration: 84156
loss: 1.1766316890716553,grad_norm: 0.9999994333520171, iteration: 84157
loss: 1.048587679862976,grad_norm: 0.9999996274179196, iteration: 84158
loss: 1.0156313180923462,grad_norm: 0.8270061646136084, iteration: 84159
loss: 1.0280725955963135,grad_norm: 0.9999992027783855, iteration: 84160
loss: 1.0160619020462036,grad_norm: 0.9999992132524677, iteration: 84161
loss: 1.0307189226150513,grad_norm: 0.9999991750846045, iteration: 84162
loss: 1.0042237043380737,grad_norm: 0.9071416368026708, iteration: 84163
loss: 1.0057851076126099,grad_norm: 0.9999992434707484, iteration: 84164
loss: 0.9794492125511169,grad_norm: 0.8466636590822888, iteration: 84165
loss: 0.9786741137504578,grad_norm: 0.8810904678219776, iteration: 84166
loss: 1.0307549238204956,grad_norm: 0.9999991134441821, iteration: 84167
loss: 1.0453648567199707,grad_norm: 0.9999990691351589, iteration: 84168
loss: 0.9914414286613464,grad_norm: 0.9920327537193852, iteration: 84169
loss: 1.1184641122817993,grad_norm: 0.9999992553523807, iteration: 84170
loss: 1.0471738576889038,grad_norm: 0.7842898899845074, iteration: 84171
loss: 1.008716344833374,grad_norm: 0.942185078525396, iteration: 84172
loss: 0.9819064140319824,grad_norm: 0.8136586819459843, iteration: 84173
loss: 1.0059555768966675,grad_norm: 0.918438718045659, iteration: 84174
loss: 0.9566318392753601,grad_norm: 0.9999991814988551, iteration: 84175
loss: 0.9697336554527283,grad_norm: 0.9999991209812246, iteration: 84176
loss: 1.1411198377609253,grad_norm: 0.9976367617713526, iteration: 84177
loss: 1.0628364086151123,grad_norm: 0.9510863154595056, iteration: 84178
loss: 0.9766668081283569,grad_norm: 0.8581486525587699, iteration: 84179
loss: 1.0515689849853516,grad_norm: 0.9999999577971809, iteration: 84180
loss: 0.9933071136474609,grad_norm: 0.9099508065769839, iteration: 84181
loss: 1.0317879915237427,grad_norm: 0.9999996552704795, iteration: 84182
loss: 1.0069643259048462,grad_norm: 0.837544722768176, iteration: 84183
loss: 0.9858366250991821,grad_norm: 0.9106950632285643, iteration: 84184
loss: 0.9998272657394409,grad_norm: 0.9999996827472265, iteration: 84185
loss: 1.0003422498703003,grad_norm: 0.9999992309572872, iteration: 84186
loss: 0.9823466539382935,grad_norm: 0.9535112924354173, iteration: 84187
loss: 0.994371235370636,grad_norm: 0.90784146981636, iteration: 84188
loss: 0.9973711967468262,grad_norm: 0.9999992420186021, iteration: 84189
loss: 1.0225054025650024,grad_norm: 0.999999743948166, iteration: 84190
loss: 1.0041743516921997,grad_norm: 0.9462149030142002, iteration: 84191
loss: 1.0217162370681763,grad_norm: 0.8951671072515469, iteration: 84192
loss: 1.0033029317855835,grad_norm: 0.9813390697946084, iteration: 84193
loss: 0.9920324087142944,grad_norm: 0.999998967823244, iteration: 84194
loss: 1.0859384536743164,grad_norm: 0.9458602875111393, iteration: 84195
loss: 1.1899055242538452,grad_norm: 0.9999992530884081, iteration: 84196
loss: 1.228613018989563,grad_norm: 0.9999992759525695, iteration: 84197
loss: 0.9846151471138,grad_norm: 0.902258528309781, iteration: 84198
loss: 1.0373518466949463,grad_norm: 0.7971494852433778, iteration: 84199
loss: 1.0083749294281006,grad_norm: 0.8750595999988899, iteration: 84200
loss: 1.0160480737686157,grad_norm: 0.999999071618285, iteration: 84201
loss: 0.9820109605789185,grad_norm: 0.7077269649681847, iteration: 84202
loss: 1.0576705932617188,grad_norm: 0.9786177307460497, iteration: 84203
loss: 0.9685370326042175,grad_norm: 0.9999993447639558, iteration: 84204
loss: 1.0053575038909912,grad_norm: 0.9645159179160746, iteration: 84205
loss: 0.9813318848609924,grad_norm: 0.8014048798138583, iteration: 84206
loss: 1.00416898727417,grad_norm: 0.9215310774179594, iteration: 84207
loss: 1.1259149312973022,grad_norm: 0.9999994113569686, iteration: 84208
loss: 1.062037467956543,grad_norm: 0.9999996429869105, iteration: 84209
loss: 1.0322574377059937,grad_norm: 0.978173264977407, iteration: 84210
loss: 0.979339599609375,grad_norm: 0.9999994881235837, iteration: 84211
loss: 1.040974497795105,grad_norm: 0.9999995934160311, iteration: 84212
loss: 0.9579410552978516,grad_norm: 0.9999992159045352, iteration: 84213
loss: 1.0494319200515747,grad_norm: 0.9403301219430761, iteration: 84214
loss: 1.0348166227340698,grad_norm: 1.0000000485565625, iteration: 84215
loss: 1.0682425498962402,grad_norm: 0.9999997975983003, iteration: 84216
loss: 1.0076133012771606,grad_norm: 0.9999998350295989, iteration: 84217
loss: 1.1014117002487183,grad_norm: 0.9999997986289417, iteration: 84218
loss: 1.0104560852050781,grad_norm: 0.9999989958018138, iteration: 84219
loss: 1.038987398147583,grad_norm: 0.9999991159723808, iteration: 84220
loss: 1.0273851156234741,grad_norm: 0.8436591825078568, iteration: 84221
loss: 1.03529691696167,grad_norm: 0.9999991188712289, iteration: 84222
loss: 1.1027543544769287,grad_norm: 0.9999992268848001, iteration: 84223
loss: 1.0354673862457275,grad_norm: 0.9999994687087934, iteration: 84224
loss: 0.9965330362319946,grad_norm: 0.9247623303870921, iteration: 84225
loss: 1.033759593963623,grad_norm: 0.8281043856637795, iteration: 84226
loss: 1.0307790040969849,grad_norm: 0.9728622836864497, iteration: 84227
loss: 1.0157809257507324,grad_norm: 0.949940209937867, iteration: 84228
loss: 0.9915319681167603,grad_norm: 0.8977261930549675, iteration: 84229
loss: 0.9637648463249207,grad_norm: 0.9329706979774176, iteration: 84230
loss: 0.9942663311958313,grad_norm: 0.9712990033951009, iteration: 84231
loss: 1.0450164079666138,grad_norm: 0.9999990434479914, iteration: 84232
loss: 0.9762839078903198,grad_norm: 0.999998995140739, iteration: 84233
loss: 1.004357099533081,grad_norm: 0.9619203736627812, iteration: 84234
loss: 0.9872202277183533,grad_norm: 0.9230779763343475, iteration: 84235
loss: 1.0355379581451416,grad_norm: 0.9999991822539485, iteration: 84236
loss: 1.043582558631897,grad_norm: 0.9999994160977961, iteration: 84237
loss: 1.0025204420089722,grad_norm: 0.9999991277921503, iteration: 84238
loss: 1.0574264526367188,grad_norm: 0.9999998308255177, iteration: 84239
loss: 1.072874903678894,grad_norm: 0.9999992100799189, iteration: 84240
loss: 0.9877161383628845,grad_norm: 0.9409979432277306, iteration: 84241
loss: 1.0033267736434937,grad_norm: 0.8572240848345315, iteration: 84242
loss: 1.0081433057785034,grad_norm: 0.9463247326194328, iteration: 84243
loss: 1.0078083276748657,grad_norm: 0.9999997272088987, iteration: 84244
loss: 1.0704545974731445,grad_norm: 0.9999993082688312, iteration: 84245
loss: 1.0988361835479736,grad_norm: 0.9999992259029401, iteration: 84246
loss: 1.0324269533157349,grad_norm: 0.8953418969993265, iteration: 84247
loss: 1.0402716398239136,grad_norm: 0.9999994365369418, iteration: 84248
loss: 1.2330455780029297,grad_norm: 0.9999995554010619, iteration: 84249
loss: 1.0046865940093994,grad_norm: 0.9417799475665589, iteration: 84250
loss: 1.0389882326126099,grad_norm: 0.999999295938943, iteration: 84251
loss: 0.98847895860672,grad_norm: 0.9999991085956785, iteration: 84252
loss: 1.0239301919937134,grad_norm: 0.9999993853565543, iteration: 84253
loss: 1.0710139274597168,grad_norm: 0.9999991940572738, iteration: 84254
loss: 0.9862542748451233,grad_norm: 0.9999990537691681, iteration: 84255
loss: 0.9979411363601685,grad_norm: 0.8457176461176037, iteration: 84256
loss: 1.0136889219284058,grad_norm: 0.9139021479414381, iteration: 84257
loss: 0.978876531124115,grad_norm: 0.9074015229657685, iteration: 84258
loss: 1.0407845973968506,grad_norm: 0.9999992117239408, iteration: 84259
loss: 1.0225803852081299,grad_norm: 0.9999989890003745, iteration: 84260
loss: 0.9824426770210266,grad_norm: 0.9611908048888218, iteration: 84261
loss: 1.007302165031433,grad_norm: 0.9999992168276298, iteration: 84262
loss: 1.0038918256759644,grad_norm: 0.8985599133871142, iteration: 84263
loss: 1.1002517938613892,grad_norm: 0.9795074379696566, iteration: 84264
loss: 1.004043698310852,grad_norm: 0.9221810696108302, iteration: 84265
loss: 1.0999691486358643,grad_norm: 0.9999990750431075, iteration: 84266
loss: 1.0007545948028564,grad_norm: 0.920218429335212, iteration: 84267
loss: 1.0492119789123535,grad_norm: 0.9742171132680727, iteration: 84268
loss: 1.0318011045455933,grad_norm: 0.9999993275614442, iteration: 84269
loss: 1.029567003250122,grad_norm: 0.9895431312596041, iteration: 84270
loss: 1.0013387203216553,grad_norm: 0.9560942019967799, iteration: 84271
loss: 1.006333827972412,grad_norm: 0.915380088504187, iteration: 84272
loss: 1.0396736860275269,grad_norm: 0.9999999831234151, iteration: 84273
loss: 1.0285991430282593,grad_norm: 0.9999997953241748, iteration: 84274
loss: 0.9771642088890076,grad_norm: 0.9785141906865895, iteration: 84275
loss: 0.9857311248779297,grad_norm: 0.9083837348898952, iteration: 84276
loss: 1.0052900314331055,grad_norm: 0.9436745570968919, iteration: 84277
loss: 0.9978875517845154,grad_norm: 0.949746908140875, iteration: 84278
loss: 1.009024977684021,grad_norm: 0.9999995331183279, iteration: 84279
loss: 1.0043593645095825,grad_norm: 0.9999998079705663, iteration: 84280
loss: 1.4261358976364136,grad_norm: 0.9999997484844275, iteration: 84281
loss: 1.0155608654022217,grad_norm: 0.9276244518902562, iteration: 84282
loss: 1.0043649673461914,grad_norm: 0.9999991176453492, iteration: 84283
loss: 0.9801596999168396,grad_norm: 0.8109652305236816, iteration: 84284
loss: 1.0174881219863892,grad_norm: 0.9440386674526563, iteration: 84285
loss: 1.0145747661590576,grad_norm: 0.9549184347864549, iteration: 84286
loss: 1.0605998039245605,grad_norm: 0.9999990033736526, iteration: 84287
loss: 1.030820369720459,grad_norm: 0.9999995523464038, iteration: 84288
loss: 1.0006883144378662,grad_norm: 0.999999227653291, iteration: 84289
loss: 1.0436023473739624,grad_norm: 0.9999991126829471, iteration: 84290
loss: 0.981465756893158,grad_norm: 0.9999990897211068, iteration: 84291
loss: 0.9554298520088196,grad_norm: 0.8702067523953763, iteration: 84292
loss: 1.0418591499328613,grad_norm: 0.9999991274503368, iteration: 84293
loss: 0.9831938743591309,grad_norm: 0.899765576676995, iteration: 84294
loss: 1.019993782043457,grad_norm: 0.9999992121713464, iteration: 84295
loss: 1.1117054224014282,grad_norm: 0.9999999135279278, iteration: 84296
loss: 1.042228102684021,grad_norm: 0.9999993455235737, iteration: 84297
loss: 1.110499382019043,grad_norm: 0.9999998859842814, iteration: 84298
loss: 1.031617283821106,grad_norm: 0.9337295005379724, iteration: 84299
loss: 1.0023990869522095,grad_norm: 0.9999992047617052, iteration: 84300
loss: 1.0962879657745361,grad_norm: 0.9999994870975785, iteration: 84301
loss: 0.9947746396064758,grad_norm: 0.9575680389387148, iteration: 84302
loss: 0.9762461185455322,grad_norm: 0.9999991303871745, iteration: 84303
loss: 0.9836273193359375,grad_norm: 0.9999990973286313, iteration: 84304
loss: 0.9840084910392761,grad_norm: 0.999999149293114, iteration: 84305
loss: 0.9969596862792969,grad_norm: 0.9863220097680243, iteration: 84306
loss: 1.0041213035583496,grad_norm: 0.9779091758544133, iteration: 84307
loss: 1.0459754467010498,grad_norm: 0.9999992309495382, iteration: 84308
loss: 1.0098575353622437,grad_norm: 0.9999990446526651, iteration: 84309
loss: 1.1254260540008545,grad_norm: 0.9999992125826042, iteration: 84310
loss: 1.012180209159851,grad_norm: 0.9999989884782754, iteration: 84311
loss: 1.0612504482269287,grad_norm: 0.9999998989448601, iteration: 84312
loss: 1.002294659614563,grad_norm: 0.9999991971582375, iteration: 84313
loss: 1.0109132528305054,grad_norm: 0.9288375184850948, iteration: 84314
loss: 1.0344724655151367,grad_norm: 0.9207623968239925, iteration: 84315
loss: 1.000921368598938,grad_norm: 0.8699457674859811, iteration: 84316
loss: 1.0257622003555298,grad_norm: 0.9999994432932031, iteration: 84317
loss: 0.9849121570587158,grad_norm: 0.9999989938436824, iteration: 84318
loss: 1.000137209892273,grad_norm: 0.888479254413341, iteration: 84319
loss: 0.9958333373069763,grad_norm: 0.9999990628459106, iteration: 84320
loss: 0.9760068655014038,grad_norm: 0.9999996656610715, iteration: 84321
loss: 0.9603662490844727,grad_norm: 0.978205199835389, iteration: 84322
loss: 0.992435097694397,grad_norm: 0.9999991311430588, iteration: 84323
loss: 0.9947768449783325,grad_norm: 0.8700705802941449, iteration: 84324
loss: 0.9584082961082458,grad_norm: 0.9042728300700338, iteration: 84325
loss: 0.938194990158081,grad_norm: 0.9999991901303603, iteration: 84326
loss: 0.9532817602157593,grad_norm: 0.9379045856330286, iteration: 84327
loss: 1.0016154050827026,grad_norm: 0.9696902769650236, iteration: 84328
loss: 0.9849582314491272,grad_norm: 0.9611529022597864, iteration: 84329
loss: 0.9930031895637512,grad_norm: 0.9999991694435887, iteration: 84330
loss: 1.0217797756195068,grad_norm: 0.9999997085264609, iteration: 84331
loss: 0.9603784084320068,grad_norm: 0.863147088205561, iteration: 84332
loss: 1.018979549407959,grad_norm: 0.8341720856241487, iteration: 84333
loss: 0.983960747718811,grad_norm: 0.9999992899123066, iteration: 84334
loss: 1.0171284675598145,grad_norm: 0.7950841761701991, iteration: 84335
loss: 1.0008504390716553,grad_norm: 0.814432591683871, iteration: 84336
loss: 0.9722656607627869,grad_norm: 0.9999990694854086, iteration: 84337
loss: 0.939012885093689,grad_norm: 0.9999992531040742, iteration: 84338
loss: 1.0099738836288452,grad_norm: 0.8354796830231405, iteration: 84339
loss: 1.138036847114563,grad_norm: 0.9999995437388473, iteration: 84340
loss: 1.0081206560134888,grad_norm: 0.8572744750571233, iteration: 84341
loss: 1.271167516708374,grad_norm: 0.9999998011658944, iteration: 84342
loss: 1.025395154953003,grad_norm: 0.98350107620076, iteration: 84343
loss: 1.0018055438995361,grad_norm: 0.9608893075883408, iteration: 84344
loss: 1.0172629356384277,grad_norm: 0.9999989627874479, iteration: 84345
loss: 0.9591197967529297,grad_norm: 0.9999990936213723, iteration: 84346
loss: 1.015271544456482,grad_norm: 0.9876844715973416, iteration: 84347
loss: 0.997535765171051,grad_norm: 0.8726540582159409, iteration: 84348
loss: 1.12934148311615,grad_norm: 0.9999992141067934, iteration: 84349
loss: 0.963720977306366,grad_norm: 0.9473001416599991, iteration: 84350
loss: 0.9963776469230652,grad_norm: 0.7940556312796148, iteration: 84351
loss: 0.9863910675048828,grad_norm: 0.8068466392438158, iteration: 84352
loss: 0.9830129146575928,grad_norm: 0.9298520166811585, iteration: 84353
loss: 1.0166537761688232,grad_norm: 0.7566600395228671, iteration: 84354
loss: 1.0801221132278442,grad_norm: 0.9135464165768866, iteration: 84355
loss: 1.0091831684112549,grad_norm: 0.9999990861768561, iteration: 84356
loss: 1.0050867795944214,grad_norm: 0.9999991656134226, iteration: 84357
loss: 0.9774782061576843,grad_norm: 0.9180309022048371, iteration: 84358
loss: 0.9987913370132446,grad_norm: 0.9999993063122937, iteration: 84359
loss: 1.0274111032485962,grad_norm: 0.862384145182101, iteration: 84360
loss: 1.054591178894043,grad_norm: 0.9999995226763848, iteration: 84361
loss: 1.0152490139007568,grad_norm: 0.9999991507493843, iteration: 84362
loss: 0.9673718214035034,grad_norm: 0.9999901372533937, iteration: 84363
loss: 1.029318928718567,grad_norm: 0.9999992096652723, iteration: 84364
loss: 0.9824033975601196,grad_norm: 0.9999992512729369, iteration: 84365
loss: 0.9489082098007202,grad_norm: 0.9471368911458526, iteration: 84366
loss: 1.0041600465774536,grad_norm: 0.8813958081690577, iteration: 84367
loss: 1.0249513387680054,grad_norm: 0.9999990886411655, iteration: 84368
loss: 1.0222618579864502,grad_norm: 0.9868757951952433, iteration: 84369
loss: 1.010461449623108,grad_norm: 0.8393968173303094, iteration: 84370
loss: 0.9898775815963745,grad_norm: 0.8852459920599011, iteration: 84371
loss: 1.3412363529205322,grad_norm: 1.0000000091348558, iteration: 84372
loss: 1.0011496543884277,grad_norm: 0.9999992977187288, iteration: 84373
loss: 1.0804272890090942,grad_norm: 0.9999991339579117, iteration: 84374
loss: 1.0126028060913086,grad_norm: 0.9370004595768068, iteration: 84375
loss: 1.0336942672729492,grad_norm: 0.7546814662475106, iteration: 84376
loss: 0.9930247068405151,grad_norm: 0.8713729277099536, iteration: 84377
loss: 0.9902284145355225,grad_norm: 0.9999993423297681, iteration: 84378
loss: 0.9948068261146545,grad_norm: 0.874181674889195, iteration: 84379
loss: 1.0318477153778076,grad_norm: 0.9999993964029884, iteration: 84380
loss: 1.008394479751587,grad_norm: 0.989791959815726, iteration: 84381
loss: 0.9951155185699463,grad_norm: 0.9999990160032975, iteration: 84382
loss: 0.9902213215827942,grad_norm: 0.9999991983983103, iteration: 84383
loss: 1.0125234127044678,grad_norm: 0.9681557894762308, iteration: 84384
loss: 1.0210477113723755,grad_norm: 0.9999992509331479, iteration: 84385
loss: 1.0295685529708862,grad_norm: 0.8904776542616682, iteration: 84386
loss: 1.0002247095108032,grad_norm: 0.9999990931868322, iteration: 84387
loss: 1.0057038068771362,grad_norm: 0.9999992320831105, iteration: 84388
loss: 1.0059943199157715,grad_norm: 0.9856619995402829, iteration: 84389
loss: 1.0174989700317383,grad_norm: 0.8581729528170328, iteration: 84390
loss: 1.000420093536377,grad_norm: 0.9999992266560687, iteration: 84391
loss: 0.9592195749282837,grad_norm: 0.9748139443369153, iteration: 84392
loss: 1.0581520795822144,grad_norm: 0.9999991186114876, iteration: 84393
loss: 0.988391637802124,grad_norm: 0.9817831716882371, iteration: 84394
loss: 0.986933171749115,grad_norm: 0.9999995991009072, iteration: 84395
loss: 0.9880263805389404,grad_norm: 0.9999990055836592, iteration: 84396
loss: 1.0567575693130493,grad_norm: 0.9877228150416628, iteration: 84397
loss: 0.9967791438102722,grad_norm: 0.8782008169810505, iteration: 84398
loss: 1.025593876838684,grad_norm: 0.8622779651054799, iteration: 84399
loss: 1.1240661144256592,grad_norm: 0.9999995696967174, iteration: 84400
loss: 1.139119029045105,grad_norm: 0.9999997489071341, iteration: 84401
loss: 1.0073283910751343,grad_norm: 0.9724640038158947, iteration: 84402
loss: 1.03465735912323,grad_norm: 0.8885964113298688, iteration: 84403
loss: 0.9889403581619263,grad_norm: 0.9035645124078218, iteration: 84404
loss: 1.0058645009994507,grad_norm: 0.9731067925447178, iteration: 84405
loss: 1.006689429283142,grad_norm: 0.9999994332627957, iteration: 84406
loss: 0.9803792238235474,grad_norm: 0.9441734632034067, iteration: 84407
loss: 0.9962627291679382,grad_norm: 0.849506898867044, iteration: 84408
loss: 1.0789124965667725,grad_norm: 0.9999991657346665, iteration: 84409
loss: 1.0556756258010864,grad_norm: 0.9999991142407051, iteration: 84410
loss: 1.0324171781539917,grad_norm: 0.9494757599544008, iteration: 84411
loss: 1.004142165184021,grad_norm: 0.8668992608443322, iteration: 84412
loss: 0.9991082549095154,grad_norm: 0.8824785130512599, iteration: 84413
loss: 0.9920650720596313,grad_norm: 0.9999998306207369, iteration: 84414
loss: 1.0298618078231812,grad_norm: 0.876121648636054, iteration: 84415
loss: 1.006980299949646,grad_norm: 0.9999995625171135, iteration: 84416
loss: 1.0075005292892456,grad_norm: 0.8795969922524963, iteration: 84417
loss: 1.0227398872375488,grad_norm: 0.956819941523161, iteration: 84418
loss: 0.9932632446289062,grad_norm: 0.9663720037981016, iteration: 84419
loss: 1.0219027996063232,grad_norm: 0.9999997449186543, iteration: 84420
loss: 1.092443823814392,grad_norm: 0.9999991263814618, iteration: 84421
loss: 1.0001288652420044,grad_norm: 0.8607685537784903, iteration: 84422
loss: 0.9847592115402222,grad_norm: 0.9999990735703987, iteration: 84423
loss: 1.0130583047866821,grad_norm: 0.9024439021722354, iteration: 84424
loss: 0.9661401510238647,grad_norm: 0.999999078162631, iteration: 84425
loss: 1.0124322175979614,grad_norm: 0.7464090842531668, iteration: 84426
loss: 1.0429327487945557,grad_norm: 0.9999999110204522, iteration: 84427
loss: 1.0178331136703491,grad_norm: 0.8936933768490272, iteration: 84428
loss: 0.9805229902267456,grad_norm: 0.8627192391485924, iteration: 84429
loss: 1.0080524682998657,grad_norm: 0.841171852095988, iteration: 84430
loss: 0.9703662395477295,grad_norm: 0.9174705472463733, iteration: 84431
loss: 1.0326921939849854,grad_norm: 0.9174653287845287, iteration: 84432
loss: 1.0167038440704346,grad_norm: 0.7812308103007466, iteration: 84433
loss: 0.982363224029541,grad_norm: 0.8732073822743789, iteration: 84434
loss: 1.0438735485076904,grad_norm: 0.9850266339634772, iteration: 84435
loss: 1.027881145477295,grad_norm: 0.9999996771505882, iteration: 84436
loss: 1.0341379642486572,grad_norm: 0.9584152740776, iteration: 84437
loss: 1.113162636756897,grad_norm: 0.9211609356723341, iteration: 84438
loss: 1.0078386068344116,grad_norm: 0.9377785683753538, iteration: 84439
loss: 1.0173262357711792,grad_norm: 0.9999991533070277, iteration: 84440
loss: 1.0232014656066895,grad_norm: 0.9150586999729428, iteration: 84441
loss: 1.0477389097213745,grad_norm: 0.999999753772029, iteration: 84442
loss: 1.3335936069488525,grad_norm: 0.9999995724745024, iteration: 84443
loss: 1.061776041984558,grad_norm: 0.9999999417965694, iteration: 84444
loss: 1.0039197206497192,grad_norm: 0.8983366797681926, iteration: 84445
loss: 1.0019330978393555,grad_norm: 0.9999990646960052, iteration: 84446
loss: 1.080726146697998,grad_norm: 0.9999997921849592, iteration: 84447
loss: 1.0381217002868652,grad_norm: 0.9999998209729412, iteration: 84448
loss: 1.0968772172927856,grad_norm: 0.9999996550873138, iteration: 84449
loss: 1.1572531461715698,grad_norm: 0.9999993698037535, iteration: 84450
loss: 1.0631005764007568,grad_norm: 0.9999993655777595, iteration: 84451
loss: 1.1815844774246216,grad_norm: 0.9999998456644116, iteration: 84452
loss: 1.1108522415161133,grad_norm: 0.9999997536365817, iteration: 84453
loss: 1.1058772802352905,grad_norm: 0.9815066969834406, iteration: 84454
loss: 1.4180818796157837,grad_norm: 0.999999650043468, iteration: 84455
loss: 1.1455315351486206,grad_norm: 0.9999999071028837, iteration: 84456
loss: 1.1467043161392212,grad_norm: 0.9999992015684306, iteration: 84457
loss: 1.4097530841827393,grad_norm: 0.9999997470169482, iteration: 84458
loss: 1.1959894895553589,grad_norm: 0.9999998489895261, iteration: 84459
loss: 1.3771699666976929,grad_norm: 0.9999999431726703, iteration: 84460
loss: 1.160449743270874,grad_norm: 0.9999990767168812, iteration: 84461
loss: 1.1065551042556763,grad_norm: 0.9999997864700542, iteration: 84462
loss: 1.0201234817504883,grad_norm: 0.9572639294359195, iteration: 84463
loss: 1.0848978757858276,grad_norm: 0.9999999444711364, iteration: 84464
loss: 1.0684360265731812,grad_norm: 0.9999997506138201, iteration: 84465
loss: 1.2193397283554077,grad_norm: 0.9999995060979399, iteration: 84466
loss: 1.1121186017990112,grad_norm: 0.9999996085106225, iteration: 84467
loss: 1.0440192222595215,grad_norm: 0.9999995476956175, iteration: 84468
loss: 1.0443178415298462,grad_norm: 0.9999991283285268, iteration: 84469
loss: 1.0887484550476074,grad_norm: 0.9999991813089409, iteration: 84470
loss: 1.0106970071792603,grad_norm: 0.9999989915369524, iteration: 84471
loss: 1.118513584136963,grad_norm: 0.9999996443009453, iteration: 84472
loss: 1.036241054534912,grad_norm: 0.999999513132497, iteration: 84473
loss: 1.2707077264785767,grad_norm: 0.9999997297841923, iteration: 84474
loss: 1.0770434141159058,grad_norm: 0.9999991240982741, iteration: 84475
loss: 1.0554356575012207,grad_norm: 0.9531914641775316, iteration: 84476
loss: 1.0520647764205933,grad_norm: 0.9999991736221286, iteration: 84477
loss: 1.0152004957199097,grad_norm: 0.9633685715111014, iteration: 84478
loss: 1.0624791383743286,grad_norm: 0.9999991714486853, iteration: 84479
loss: 1.1078435182571411,grad_norm: 0.9999992996972534, iteration: 84480
loss: 1.0356056690216064,grad_norm: 0.9160364594666297, iteration: 84481
loss: 1.075897216796875,grad_norm: 0.9999993170536904, iteration: 84482
loss: 1.1030585765838623,grad_norm: 0.9999997309631229, iteration: 84483
loss: 1.0060499906539917,grad_norm: 0.9999998973733432, iteration: 84484
loss: 1.0761547088623047,grad_norm: 0.9999991304509529, iteration: 84485
loss: 1.0587385892868042,grad_norm: 0.9999995720938496, iteration: 84486
loss: 1.009918212890625,grad_norm: 0.97426270218327, iteration: 84487
loss: 1.0213360786437988,grad_norm: 0.9999997587820206, iteration: 84488
loss: 1.1905897855758667,grad_norm: 0.999999928255329, iteration: 84489
loss: 1.0199599266052246,grad_norm: 0.8559134157697884, iteration: 84490
loss: 0.982455849647522,grad_norm: 0.979774647955501, iteration: 84491
loss: 1.0492513179779053,grad_norm: 0.9999999248374581, iteration: 84492
loss: 1.04561185836792,grad_norm: 1.0000000168015668, iteration: 84493
loss: 1.043378233909607,grad_norm: 0.9999991478557524, iteration: 84494
loss: 1.0849974155426025,grad_norm: 0.9732359333563245, iteration: 84495
loss: 1.0788378715515137,grad_norm: 0.9999991081891196, iteration: 84496
loss: 1.0114696025848389,grad_norm: 0.9999995403986749, iteration: 84497
loss: 0.9926469922065735,grad_norm: 0.9895451341353307, iteration: 84498
loss: 0.9741896390914917,grad_norm: 0.999999344594837, iteration: 84499
loss: 1.079741358757019,grad_norm: 0.999999104430446, iteration: 84500
loss: 1.0232399702072144,grad_norm: 0.9999992716050701, iteration: 84501
loss: 1.1061400175094604,grad_norm: 0.9999994945888576, iteration: 84502
loss: 1.0711042881011963,grad_norm: 0.9253158871260188, iteration: 84503
loss: 0.9534441232681274,grad_norm: 0.8384656800688333, iteration: 84504
loss: 0.9916925430297852,grad_norm: 0.8821731357335318, iteration: 84505
loss: 1.1406406164169312,grad_norm: 0.9999992306355573, iteration: 84506
loss: 1.0622639656066895,grad_norm: 0.9999997941023515, iteration: 84507
loss: 1.0459682941436768,grad_norm: 0.9999997938876574, iteration: 84508
loss: 0.9898114800453186,grad_norm: 0.9688437413466247, iteration: 84509
loss: 1.0338943004608154,grad_norm: 0.999999177528534, iteration: 84510
loss: 1.1981784105300903,grad_norm: 0.9999996728525652, iteration: 84511
loss: 1.063417911529541,grad_norm: 0.9999992924493072, iteration: 84512
loss: 1.043380618095398,grad_norm: 0.9908553112984493, iteration: 84513
loss: 1.0462325811386108,grad_norm: 0.9999995674976557, iteration: 84514
loss: 1.0032044649124146,grad_norm: 0.9344288672172514, iteration: 84515
loss: 0.9691483974456787,grad_norm: 0.9749353351870769, iteration: 84516
loss: 0.9992735981941223,grad_norm: 0.9999991380108909, iteration: 84517
loss: 1.0740411281585693,grad_norm: 0.9999995907276484, iteration: 84518
loss: 1.087540864944458,grad_norm: 0.9999995472866351, iteration: 84519
loss: 1.0331226587295532,grad_norm: 0.9999994579560194, iteration: 84520
loss: 0.9826704859733582,grad_norm: 0.9999991805963602, iteration: 84521
loss: 0.990064263343811,grad_norm: 0.9999990312689078, iteration: 84522
loss: 0.9614905118942261,grad_norm: 0.9999995992152113, iteration: 84523
loss: 1.175809621810913,grad_norm: 0.9999997207598882, iteration: 84524
loss: 1.0329338312149048,grad_norm: 0.9470794328541975, iteration: 84525
loss: 0.9911386370658875,grad_norm: 0.8551480473932662, iteration: 84526
loss: 0.9959738850593567,grad_norm: 0.9564923380986204, iteration: 84527
loss: 0.9984543919563293,grad_norm: 0.9999995523238588, iteration: 84528
loss: 1.0682512521743774,grad_norm: 0.950074801774544, iteration: 84529
loss: 0.9993540048599243,grad_norm: 0.999999655293593, iteration: 84530
loss: 1.3470344543457031,grad_norm: 0.9999993104487, iteration: 84531
loss: 0.96793133020401,grad_norm: 0.9348337614320866, iteration: 84532
loss: 1.0935006141662598,grad_norm: 0.9999997252282586, iteration: 84533
loss: 1.075779676437378,grad_norm: 0.9999999900522761, iteration: 84534
loss: 1.076338529586792,grad_norm: 0.9999995249119842, iteration: 84535
loss: 1.0636557340621948,grad_norm: 0.9999992574546549, iteration: 84536
loss: 0.985340416431427,grad_norm: 0.8814949366653356, iteration: 84537
loss: 1.1571258306503296,grad_norm: 0.9999996703821828, iteration: 84538
loss: 1.050686240196228,grad_norm: 0.9999996948069068, iteration: 84539
loss: 1.0659822225570679,grad_norm: 0.9999993240089078, iteration: 84540
loss: 1.0420840978622437,grad_norm: 0.9999993685472298, iteration: 84541
loss: 1.0095940828323364,grad_norm: 0.9999990920744632, iteration: 84542
loss: 1.0388169288635254,grad_norm: 0.9999991967181853, iteration: 84543
loss: 0.985844075679779,grad_norm: 0.8396113500769772, iteration: 84544
loss: 0.9537218809127808,grad_norm: 0.9999991031674615, iteration: 84545
loss: 1.307706356048584,grad_norm: 0.9999997077873636, iteration: 84546
loss: 1.042149543762207,grad_norm: 0.9999998869273705, iteration: 84547
loss: 1.0495811700820923,grad_norm: 0.8867136601337334, iteration: 84548
loss: 0.9892590045928955,grad_norm: 0.9767032924200832, iteration: 84549
loss: 1.058709979057312,grad_norm: 0.9999996871850675, iteration: 84550
loss: 1.0122395753860474,grad_norm: 0.9844001846494872, iteration: 84551
loss: 0.967555582523346,grad_norm: 0.9486154214736997, iteration: 84552
loss: 1.0038431882858276,grad_norm: 0.8558690799628279, iteration: 84553
loss: 1.0373934507369995,grad_norm: 0.9999992065123686, iteration: 84554
loss: 0.982909083366394,grad_norm: 0.9999991707506499, iteration: 84555
loss: 1.1552165746688843,grad_norm: 0.9999997388910442, iteration: 84556
loss: 1.0392961502075195,grad_norm: 0.947872328691279, iteration: 84557
loss: 1.137363314628601,grad_norm: 0.9999997525627682, iteration: 84558
loss: 1.0724396705627441,grad_norm: 0.9999990941994271, iteration: 84559
loss: 1.0101262331008911,grad_norm: 0.9999997011353587, iteration: 84560
loss: 0.9765966534614563,grad_norm: 0.8177486882590842, iteration: 84561
loss: 1.4630711078643799,grad_norm: 0.9999992984049401, iteration: 84562
loss: 1.0127736330032349,grad_norm: 0.9999992274287159, iteration: 84563
loss: 1.0477224588394165,grad_norm: 0.9999995746949972, iteration: 84564
loss: 1.2222117185592651,grad_norm: 0.9999994074638214, iteration: 84565
loss: 1.0236073732376099,grad_norm: 0.9578278413449647, iteration: 84566
loss: 1.0337729454040527,grad_norm: 0.88994212253522, iteration: 84567
loss: 1.0406497716903687,grad_norm: 0.9999993218590914, iteration: 84568
loss: 1.015408992767334,grad_norm: 0.9999993566063415, iteration: 84569
loss: 1.092726230621338,grad_norm: 0.9999994887182243, iteration: 84570
loss: 1.367990493774414,grad_norm: 0.9999993469052089, iteration: 84571
loss: 1.0140833854675293,grad_norm: 0.9665334875169604, iteration: 84572
loss: 0.9632508754730225,grad_norm: 0.9999992840860942, iteration: 84573
loss: 1.032871961593628,grad_norm: 0.9999992002288484, iteration: 84574
loss: 0.9939475655555725,grad_norm: 0.9310508799031065, iteration: 84575
loss: 1.0080652236938477,grad_norm: 0.9999997895218375, iteration: 84576
loss: 1.0524177551269531,grad_norm: 0.9199059287225032, iteration: 84577
loss: 1.0253551006317139,grad_norm: 0.9065264347368052, iteration: 84578
loss: 1.0263580083847046,grad_norm: 0.881175956366711, iteration: 84579
loss: 1.0280609130859375,grad_norm: 0.9999991207618416, iteration: 84580
loss: 1.018736481666565,grad_norm: 0.9999997378820192, iteration: 84581
loss: 1.0118191242218018,grad_norm: 0.9999994030150864, iteration: 84582
loss: 0.9813106060028076,grad_norm: 0.883323156751, iteration: 84583
loss: 1.0497422218322754,grad_norm: 0.8274225396110937, iteration: 84584
loss: 1.1157255172729492,grad_norm: 0.9999999428413797, iteration: 84585
loss: 0.999305009841919,grad_norm: 0.9510669633791349, iteration: 84586
loss: 1.1129122972488403,grad_norm: 0.9999995174192386, iteration: 84587
loss: 1.091132402420044,grad_norm: 0.9999993435169868, iteration: 84588
loss: 1.0019521713256836,grad_norm: 0.9999990592815396, iteration: 84589
loss: 1.088065266609192,grad_norm: 0.9999993341737871, iteration: 84590
loss: 1.0155962705612183,grad_norm: 0.9999992671533235, iteration: 84591
loss: 1.0329173803329468,grad_norm: 0.9999994436796427, iteration: 84592
loss: 1.0282007455825806,grad_norm: 0.8883578439696689, iteration: 84593
loss: 1.049073338508606,grad_norm: 0.929307715974361, iteration: 84594
loss: 0.9964464902877808,grad_norm: 0.9999993102453155, iteration: 84595
loss: 0.9756149649620056,grad_norm: 0.9999992217711648, iteration: 84596
loss: 1.0159060955047607,grad_norm: 0.9999989436838856, iteration: 84597
loss: 1.0130507946014404,grad_norm: 0.9999992267424733, iteration: 84598
loss: 1.0579365491867065,grad_norm: 0.9999992444460385, iteration: 84599
loss: 1.12428617477417,grad_norm: 0.9884140344444164, iteration: 84600
loss: 0.9904791712760925,grad_norm: 0.9717141759398916, iteration: 84601
loss: 0.9767104983329773,grad_norm: 0.8341340407393595, iteration: 84602
loss: 1.1679670810699463,grad_norm: 0.9999998382803849, iteration: 84603
loss: 1.2507803440093994,grad_norm: 0.9999999839187348, iteration: 84604
loss: 0.9912146925926208,grad_norm: 0.9999993573381196, iteration: 84605
loss: 1.024471402168274,grad_norm: 0.999998910331863, iteration: 84606
loss: 1.0730881690979004,grad_norm: 0.9999996492513007, iteration: 84607
loss: 1.1346251964569092,grad_norm: 0.9999997402606814, iteration: 84608
loss: 1.0709303617477417,grad_norm: 0.999999345980858, iteration: 84609
loss: 1.2361023426055908,grad_norm: 0.9999997408989532, iteration: 84610
loss: 1.0230846405029297,grad_norm: 0.9221319823758867, iteration: 84611
loss: 1.0433216094970703,grad_norm: 0.9999996736011769, iteration: 84612
loss: 1.0407443046569824,grad_norm: 0.8430286220428387, iteration: 84613
loss: 0.9955295324325562,grad_norm: 0.9822776894734004, iteration: 84614
loss: 1.0317360162734985,grad_norm: 0.9999990818496033, iteration: 84615
loss: 1.0895124673843384,grad_norm: 0.9999994125814743, iteration: 84616
loss: 1.0129245519638062,grad_norm: 0.9352541927961695, iteration: 84617
loss: 1.0491156578063965,grad_norm: 0.9999993059512894, iteration: 84618
loss: 1.0611778497695923,grad_norm: 0.8770882807676029, iteration: 84619
loss: 1.0904514789581299,grad_norm: 0.98253734468411, iteration: 84620
loss: 1.0014678239822388,grad_norm: 0.9950021554185288, iteration: 84621
loss: 0.9898319244384766,grad_norm: 0.9999994456820863, iteration: 84622
loss: 1.0540413856506348,grad_norm: 0.999999176977778, iteration: 84623
loss: 0.9980041980743408,grad_norm: 0.7299095973362315, iteration: 84624
loss: 1.0908360481262207,grad_norm: 0.9999998901579057, iteration: 84625
loss: 1.0130078792572021,grad_norm: 0.8751402183797042, iteration: 84626
loss: 1.0469878911972046,grad_norm: 0.9999992424951011, iteration: 84627
loss: 1.011407732963562,grad_norm: 0.7958453246468742, iteration: 84628
loss: 1.0360326766967773,grad_norm: 0.9999991960923568, iteration: 84629
loss: 1.0078108310699463,grad_norm: 0.8408472973021508, iteration: 84630
loss: 1.0283374786376953,grad_norm: 0.9999990993161136, iteration: 84631
loss: 0.9399823546409607,grad_norm: 0.9999990949270653, iteration: 84632
loss: 0.9431988000869751,grad_norm: 0.878338980269012, iteration: 84633
loss: 1.0345505475997925,grad_norm: 0.999999341461706, iteration: 84634
loss: 1.0172464847564697,grad_norm: 0.8453616735603979, iteration: 84635
loss: 1.0120466947555542,grad_norm: 0.9686939015529435, iteration: 84636
loss: 0.987943708896637,grad_norm: 0.962979605905701, iteration: 84637
loss: 1.0009231567382812,grad_norm: 0.8320085624204056, iteration: 84638
loss: 0.9903161525726318,grad_norm: 0.9350060144169992, iteration: 84639
loss: 0.9780282378196716,grad_norm: 0.8747505272254817, iteration: 84640
loss: 0.9886849522590637,grad_norm: 0.9613916826642288, iteration: 84641
loss: 1.059744119644165,grad_norm: 0.9661159637288222, iteration: 84642
loss: 1.0246913433074951,grad_norm: 0.8228702798987589, iteration: 84643
loss: 0.9677491188049316,grad_norm: 0.9999989318689827, iteration: 84644
loss: 1.0901154279708862,grad_norm: 0.9999993266139706, iteration: 84645
loss: 1.0032870769500732,grad_norm: 0.947842612837084, iteration: 84646
loss: 0.9616014361381531,grad_norm: 0.7916132996544555, iteration: 84647
loss: 0.9921747446060181,grad_norm: 0.8724554228896456, iteration: 84648
loss: 0.9974814057350159,grad_norm: 0.9204213394919034, iteration: 84649
loss: 0.9529563784599304,grad_norm: 0.8609200959761238, iteration: 84650
loss: 0.9736762046813965,grad_norm: 0.9503484476376595, iteration: 84651
loss: 1.1490164995193481,grad_norm: 0.9999997273924608, iteration: 84652
loss: 1.0714154243469238,grad_norm: 0.999999535040629, iteration: 84653
loss: 1.0496551990509033,grad_norm: 0.9193611098356962, iteration: 84654
loss: 1.0143107175827026,grad_norm: 0.9000574046126361, iteration: 84655
loss: 0.9866336584091187,grad_norm: 0.9999995948137472, iteration: 84656
loss: 1.00947904586792,grad_norm: 0.9739140071559879, iteration: 84657
loss: 1.0668004751205444,grad_norm: 0.9999996695357116, iteration: 84658
loss: 1.0740697383880615,grad_norm: 0.9999992296000505, iteration: 84659
loss: 0.99629807472229,grad_norm: 0.9999991635645977, iteration: 84660
loss: 0.9988152980804443,grad_norm: 0.9408381392394273, iteration: 84661
loss: 1.0721943378448486,grad_norm: 0.9999990551704785, iteration: 84662
loss: 1.0264477729797363,grad_norm: 0.9999998582360156, iteration: 84663
loss: 1.1090645790100098,grad_norm: 0.9999999523198434, iteration: 84664
loss: 1.0592081546783447,grad_norm: 0.9999998615766511, iteration: 84665
loss: 1.0629444122314453,grad_norm: 0.9999999287884866, iteration: 84666
loss: 1.1097255945205688,grad_norm: 0.9999998964777328, iteration: 84667
loss: 0.9641507863998413,grad_norm: 0.9999990890253369, iteration: 84668
loss: 1.1853231191635132,grad_norm: 0.9999999572684253, iteration: 84669
loss: 1.1531684398651123,grad_norm: 0.9999991061722514, iteration: 84670
loss: 0.9849523901939392,grad_norm: 0.9999991729377837, iteration: 84671
loss: 0.9695248007774353,grad_norm: 0.9999990724758822, iteration: 84672
loss: 0.9569473266601562,grad_norm: 0.9999990654300965, iteration: 84673
loss: 1.0131086111068726,grad_norm: 0.9999990671816578, iteration: 84674
loss: 1.0450530052185059,grad_norm: 0.8177119038752506, iteration: 84675
loss: 0.9755489826202393,grad_norm: 0.8864241604628091, iteration: 84676
loss: 1.0173771381378174,grad_norm: 0.9883563259089827, iteration: 84677
loss: 0.9473205804824829,grad_norm: 0.9999993514849548, iteration: 84678
loss: 1.0196762084960938,grad_norm: 0.8203380610998561, iteration: 84679
loss: 1.038223385810852,grad_norm: 0.9999990848651652, iteration: 84680
loss: 0.9841929078102112,grad_norm: 0.999999092504137, iteration: 84681
loss: 1.0024914741516113,grad_norm: 0.9999991093112727, iteration: 84682
loss: 1.1236824989318848,grad_norm: 0.9999993196091042, iteration: 84683
loss: 1.3605244159698486,grad_norm: 0.9999993905913966, iteration: 84684
loss: 1.3659709692001343,grad_norm: 0.9999997427827378, iteration: 84685
loss: 1.0074119567871094,grad_norm: 0.969071713046055, iteration: 84686
loss: 1.1626510620117188,grad_norm: 0.9999998719879208, iteration: 84687
loss: 0.9533633589744568,grad_norm: 0.999999113620574, iteration: 84688
loss: 1.061777949333191,grad_norm: 0.9994521132897612, iteration: 84689
loss: 1.0326646566390991,grad_norm: 0.9999989053422734, iteration: 84690
loss: 1.2515584230422974,grad_norm: 0.999999894396275, iteration: 84691
loss: 0.994662880897522,grad_norm: 0.8453221556402638, iteration: 84692
loss: 1.0132869482040405,grad_norm: 0.9448573984122591, iteration: 84693
loss: 1.076675295829773,grad_norm: 0.9999999645021281, iteration: 84694
loss: 1.0678335428237915,grad_norm: 0.9999994049316687, iteration: 84695
loss: 1.0366299152374268,grad_norm: 0.999999794110978, iteration: 84696
loss: 0.9975412487983704,grad_norm: 0.8581595300243039, iteration: 84697
loss: 0.9897928833961487,grad_norm: 0.9999990858273083, iteration: 84698
loss: 0.9965223073959351,grad_norm: 0.9350443337967231, iteration: 84699
loss: 0.9895823001861572,grad_norm: 0.9047253767827309, iteration: 84700
loss: 1.000260591506958,grad_norm: 0.8162893158196239, iteration: 84701
loss: 1.1572920083999634,grad_norm: 0.9999994069392958, iteration: 84702
loss: 0.9714767336845398,grad_norm: 0.806842688101554, iteration: 84703
loss: 1.0315678119659424,grad_norm: 0.9999990818836905, iteration: 84704
loss: 1.034988522529602,grad_norm: 0.889982726773087, iteration: 84705
loss: 1.0317742824554443,grad_norm: 0.9999990338650462, iteration: 84706
loss: 1.014859676361084,grad_norm: 0.9007117082305041, iteration: 84707
loss: 0.9951504468917847,grad_norm: 0.9358204025242871, iteration: 84708
loss: 1.0625169277191162,grad_norm: 0.9547266125332758, iteration: 84709
loss: 1.0199127197265625,grad_norm: 0.8804737611754234, iteration: 84710
loss: 0.9625658988952637,grad_norm: 0.9999991733316058, iteration: 84711
loss: 1.0051097869873047,grad_norm: 0.8986631227699796, iteration: 84712
loss: 0.9676012992858887,grad_norm: 0.9999989790643617, iteration: 84713
loss: 1.0591697692871094,grad_norm: 0.9999992863099635, iteration: 84714
loss: 1.0057339668273926,grad_norm: 0.9607928204069096, iteration: 84715
loss: 1.017976999282837,grad_norm: 0.9999993021529887, iteration: 84716
loss: 1.0290731191635132,grad_norm: 0.9999995416499744, iteration: 84717
loss: 1.0374598503112793,grad_norm: 0.9999990827721894, iteration: 84718
loss: 1.0346418619155884,grad_norm: 0.9999991906295437, iteration: 84719
loss: 1.0069704055786133,grad_norm: 0.7338419409145466, iteration: 84720
loss: 0.9710844159126282,grad_norm: 0.9999991069093584, iteration: 84721
loss: 1.0863090753555298,grad_norm: 0.9999997655384563, iteration: 84722
loss: 1.0491435527801514,grad_norm: 0.9999991930381239, iteration: 84723
loss: 0.9972550272941589,grad_norm: 0.8463751437105452, iteration: 84724
loss: 1.199118733406067,grad_norm: 0.9999998805084684, iteration: 84725
loss: 0.9700831770896912,grad_norm: 0.9833059517402187, iteration: 84726
loss: 0.9929319024085999,grad_norm: 0.9999992501267967, iteration: 84727
loss: 1.0179535150527954,grad_norm: 0.7927243663521126, iteration: 84728
loss: 1.0028630495071411,grad_norm: 0.8823711295931106, iteration: 84729
loss: 0.9931599497795105,grad_norm: 0.9999989864309463, iteration: 84730
loss: 1.0807164907455444,grad_norm: 0.9486159482908264, iteration: 84731
loss: 1.0205800533294678,grad_norm: 0.9999993460111517, iteration: 84732
loss: 1.2045964002609253,grad_norm: 0.9910153479000721, iteration: 84733
loss: 1.0847060680389404,grad_norm: 0.9999990914853677, iteration: 84734
loss: 1.0460865497589111,grad_norm: 0.9999990383982008, iteration: 84735
loss: 1.0320082902908325,grad_norm: 0.9999999383773414, iteration: 84736
loss: 1.0299118757247925,grad_norm: 0.9999994346127944, iteration: 84737
loss: 0.9952347874641418,grad_norm: 0.9434948825410019, iteration: 84738
loss: 1.0257179737091064,grad_norm: 0.9681485678070688, iteration: 84739
loss: 1.0034739971160889,grad_norm: 0.9999991337637413, iteration: 84740
loss: 1.0653926134109497,grad_norm: 0.9999991109435038, iteration: 84741
loss: 1.0861462354660034,grad_norm: 0.9999995425433226, iteration: 84742
loss: 0.9956033229827881,grad_norm: 0.9999990135841423, iteration: 84743
loss: 1.0239053964614868,grad_norm: 0.9999990927497192, iteration: 84744
loss: 1.0204238891601562,grad_norm: 0.9999990436616988, iteration: 84745
loss: 1.1231578588485718,grad_norm: 0.9999991975772939, iteration: 84746
loss: 1.0699265003204346,grad_norm: 0.9999993253412355, iteration: 84747
loss: 0.987687885761261,grad_norm: 0.8640655040266453, iteration: 84748
loss: 1.020612359046936,grad_norm: 0.9999990769215561, iteration: 84749
loss: 1.0066431760787964,grad_norm: 0.9999993033794304, iteration: 84750
loss: 0.9616260528564453,grad_norm: 0.9576389728789859, iteration: 84751
loss: 1.0175068378448486,grad_norm: 0.9999991375273519, iteration: 84752
loss: 1.0011073350906372,grad_norm: 0.9999992482351344, iteration: 84753
loss: 1.043554663658142,grad_norm: 0.9814059904124112, iteration: 84754
loss: 1.1048625707626343,grad_norm: 0.9999992455917027, iteration: 84755
loss: 1.0449178218841553,grad_norm: 0.9999989927196713, iteration: 84756
loss: 0.9749241471290588,grad_norm: 0.9434633381823846, iteration: 84757
loss: 1.1958463191986084,grad_norm: 0.9999992302151488, iteration: 84758
loss: 1.0229612588882446,grad_norm: 0.9999990534286316, iteration: 84759
loss: 0.9549067616462708,grad_norm: 0.838822298578281, iteration: 84760
loss: 1.0233763456344604,grad_norm: 0.9891547600537978, iteration: 84761
loss: 1.066260576248169,grad_norm: 0.9999992305123911, iteration: 84762
loss: 1.023977279663086,grad_norm: 0.9999991289103614, iteration: 84763
loss: 0.9739353060722351,grad_norm: 0.9999996167404799, iteration: 84764
loss: 0.9633305072784424,grad_norm: 0.9645837233755747, iteration: 84765
loss: 1.0082471370697021,grad_norm: 0.9999990851418971, iteration: 84766
loss: 1.1009981632232666,grad_norm: 0.999999898503582, iteration: 84767
loss: 1.090060830116272,grad_norm: 0.9999996691340028, iteration: 84768
loss: 0.9609350562095642,grad_norm: 0.9999989354164712, iteration: 84769
loss: 1.0243620872497559,grad_norm: 0.9999990450418643, iteration: 84770
loss: 1.1129311323165894,grad_norm: 0.9999992416975826, iteration: 84771
loss: 1.0810693502426147,grad_norm: 0.9999991391657925, iteration: 84772
loss: 1.1839228868484497,grad_norm: 0.9999993030805794, iteration: 84773
loss: 1.037427544593811,grad_norm: 0.9195065049021961, iteration: 84774
loss: 1.0123871564865112,grad_norm: 0.9576504654442313, iteration: 84775
loss: 1.1804007291793823,grad_norm: 0.9999996792355886, iteration: 84776
loss: 1.065559983253479,grad_norm: 0.9999993967288952, iteration: 84777
loss: 0.9732217192649841,grad_norm: 0.999999239478735, iteration: 84778
loss: 1.1201057434082031,grad_norm: 0.999999271900933, iteration: 84779
loss: 0.9983708262443542,grad_norm: 0.8726747741549616, iteration: 84780
loss: 1.0655832290649414,grad_norm: 0.9903146356114195, iteration: 84781
loss: 1.0257925987243652,grad_norm: 0.9999990704947637, iteration: 84782
loss: 1.077237844467163,grad_norm: 0.9999990387900803, iteration: 84783
loss: 1.0828787088394165,grad_norm: 0.9999999792838844, iteration: 84784
loss: 0.9742016792297363,grad_norm: 0.9999995886456488, iteration: 84785
loss: 1.0076382160186768,grad_norm: 0.9999991226128612, iteration: 84786
loss: 1.0897746086120605,grad_norm: 0.999999434280904, iteration: 84787
loss: 1.0304559469223022,grad_norm: 0.9147603275399147, iteration: 84788
loss: 0.9718365669250488,grad_norm: 0.9999993841044783, iteration: 84789
loss: 0.9984429478645325,grad_norm: 0.9999995161139515, iteration: 84790
loss: 1.033779501914978,grad_norm: 0.8925282965301184, iteration: 84791
loss: 1.013043761253357,grad_norm: 0.9999992427384568, iteration: 84792
loss: 1.046858787536621,grad_norm: 0.9999992339632607, iteration: 84793
loss: 1.012648582458496,grad_norm: 0.9643804084227202, iteration: 84794
loss: 1.0876257419586182,grad_norm: 1.000000004482383, iteration: 84795
loss: 0.9827473163604736,grad_norm: 0.9760616964780368, iteration: 84796
loss: 0.9982693195343018,grad_norm: 0.9999992631812731, iteration: 84797
loss: 1.0076438188552856,grad_norm: 0.835473042697983, iteration: 84798
loss: 1.0000081062316895,grad_norm: 0.9993085730150865, iteration: 84799
loss: 1.0013140439987183,grad_norm: 0.9999992935359092, iteration: 84800
loss: 1.0676746368408203,grad_norm: 0.9999991565661116, iteration: 84801
loss: 0.9463921785354614,grad_norm: 0.9999990456724024, iteration: 84802
loss: 1.0492738485336304,grad_norm: 0.843185722359907, iteration: 84803
loss: 0.978968620300293,grad_norm: 0.8596909812732183, iteration: 84804
loss: 1.0629112720489502,grad_norm: 0.9999994443613361, iteration: 84805
loss: 1.001859426498413,grad_norm: 0.9045996332865703, iteration: 84806
loss: 0.9891268014907837,grad_norm: 0.9999991987678414, iteration: 84807
loss: 1.0213563442230225,grad_norm: 0.990087297895652, iteration: 84808
loss: 1.0694284439086914,grad_norm: 0.9999991756757811, iteration: 84809
loss: 1.025593876838684,grad_norm: 0.9024388500971917, iteration: 84810
loss: 0.9675095081329346,grad_norm: 0.9792001067807773, iteration: 84811
loss: 1.0080593824386597,grad_norm: 0.9999992297153592, iteration: 84812
loss: 1.0450319051742554,grad_norm: 0.9973043953668392, iteration: 84813
loss: 1.0555827617645264,grad_norm: 0.9999992864435028, iteration: 84814
loss: 1.004167914390564,grad_norm: 0.9085303877493507, iteration: 84815
loss: 1.0000596046447754,grad_norm: 0.9999990318311018, iteration: 84816
loss: 1.0045452117919922,grad_norm: 0.9999991107470686, iteration: 84817
loss: 1.0415266752243042,grad_norm: 0.999999126211143, iteration: 84818
loss: 1.0081591606140137,grad_norm: 0.8563332208105465, iteration: 84819
loss: 0.9750863909721375,grad_norm: 0.9999991456893944, iteration: 84820
loss: 1.0941802263259888,grad_norm: 0.9999994330234764, iteration: 84821
loss: 1.0100619792938232,grad_norm: 0.9484701976703371, iteration: 84822
loss: 0.976631224155426,grad_norm: 0.8366483131987118, iteration: 84823
loss: 1.0137213468551636,grad_norm: 0.9999990812647628, iteration: 84824
loss: 0.9691490530967712,grad_norm: 0.7889921264557338, iteration: 84825
loss: 0.9892547130584717,grad_norm: 0.8851749482230419, iteration: 84826
loss: 0.9617177844047546,grad_norm: 0.9309321717230774, iteration: 84827
loss: 0.9969136118888855,grad_norm: 0.9178768432004806, iteration: 84828
loss: 1.053513765335083,grad_norm: 0.8982629598904492, iteration: 84829
loss: 1.0071066617965698,grad_norm: 0.9999992313774231, iteration: 84830
loss: 0.9344556927680969,grad_norm: 0.9151248925135721, iteration: 84831
loss: 1.0177689790725708,grad_norm: 0.9999993783719513, iteration: 84832
loss: 0.9934394359588623,grad_norm: 0.7737601445993655, iteration: 84833
loss: 0.9794387817382812,grad_norm: 0.8694716054830934, iteration: 84834
loss: 0.9906960725784302,grad_norm: 0.9378822190101077, iteration: 84835
loss: 1.0014649629592896,grad_norm: 0.9237146883433567, iteration: 84836
loss: 1.003188133239746,grad_norm: 0.841795364572747, iteration: 84837
loss: 1.139249324798584,grad_norm: 0.9999991254970108, iteration: 84838
loss: 1.0168108940124512,grad_norm: 0.9107169606292076, iteration: 84839
loss: 1.0010571479797363,grad_norm: 0.9640856759348079, iteration: 84840
loss: 0.9410266280174255,grad_norm: 0.9394461190670137, iteration: 84841
loss: 1.0155755281448364,grad_norm: 0.9999998767885, iteration: 84842
loss: 0.9595766663551331,grad_norm: 0.9999999310300846, iteration: 84843
loss: 0.9768517017364502,grad_norm: 0.883420835588508, iteration: 84844
loss: 0.9971482753753662,grad_norm: 0.9200736910256153, iteration: 84845
loss: 1.0129002332687378,grad_norm: 0.8428426099212837, iteration: 84846
loss: 1.0072530508041382,grad_norm: 0.9999991164825094, iteration: 84847
loss: 0.994604766368866,grad_norm: 0.9999993306250312, iteration: 84848
loss: 0.9958160519599915,grad_norm: 0.9999991579696454, iteration: 84849
loss: 0.9849230051040649,grad_norm: 0.9999991058215515, iteration: 84850
loss: 1.050261378288269,grad_norm: 0.9999992151213188, iteration: 84851
loss: 0.9826509356498718,grad_norm: 0.9319664755893076, iteration: 84852
loss: 1.007310390472412,grad_norm: 0.999999474627199, iteration: 84853
loss: 0.9817197918891907,grad_norm: 0.9999991961304083, iteration: 84854
loss: 1.043135404586792,grad_norm: 0.8909118905972327, iteration: 84855
loss: 0.9962284564971924,grad_norm: 0.9999990686869002, iteration: 84856
loss: 0.9916441440582275,grad_norm: 0.8839876700539315, iteration: 84857
loss: 1.0143885612487793,grad_norm: 0.9999993181739779, iteration: 84858
loss: 1.014444351196289,grad_norm: 0.9999990172680341, iteration: 84859
loss: 1.0356171131134033,grad_norm: 0.8612448859833219, iteration: 84860
loss: 0.9569717645645142,grad_norm: 0.864806702634494, iteration: 84861
loss: 0.9534044861793518,grad_norm: 0.954595510776639, iteration: 84862
loss: 1.0615577697753906,grad_norm: 0.9999992944482908, iteration: 84863
loss: 0.9711552858352661,grad_norm: 0.9446950566975404, iteration: 84864
loss: 0.9842560291290283,grad_norm: 0.9999992761893898, iteration: 84865
loss: 0.9814404845237732,grad_norm: 0.9999990028084286, iteration: 84866
loss: 1.0124026536941528,grad_norm: 0.9049237685835589, iteration: 84867
loss: 1.0279812812805176,grad_norm: 0.9999995142239662, iteration: 84868
loss: 1.0448466539382935,grad_norm: 0.9999991367706225, iteration: 84869
loss: 0.964368999004364,grad_norm: 0.9164662771150953, iteration: 84870
loss: 0.9915953874588013,grad_norm: 0.9249720565057931, iteration: 84871
loss: 1.0314128398895264,grad_norm: 0.9999992122289356, iteration: 84872
loss: 0.9793986678123474,grad_norm: 0.9999990091810699, iteration: 84873
loss: 1.0186936855316162,grad_norm: 0.9999994542543256, iteration: 84874
loss: 0.9886409044265747,grad_norm: 0.9757810312450164, iteration: 84875
loss: 0.9861015677452087,grad_norm: 0.999999334098588, iteration: 84876
loss: 1.0329487323760986,grad_norm: 0.9999993191635592, iteration: 84877
loss: 1.0221425294876099,grad_norm: 0.9999998736420252, iteration: 84878
loss: 0.9591847062110901,grad_norm: 0.9999991189179621, iteration: 84879
loss: 0.9983574151992798,grad_norm: 0.9999991828614627, iteration: 84880
loss: 1.0106145143508911,grad_norm: 0.8768630578634371, iteration: 84881
loss: 1.0248132944107056,grad_norm: 0.9040668026221965, iteration: 84882
loss: 1.0619943141937256,grad_norm: 0.779489957314184, iteration: 84883
loss: 1.0787267684936523,grad_norm: 0.9999991759302689, iteration: 84884
loss: 0.9626314043998718,grad_norm: 0.9999992068127355, iteration: 84885
loss: 0.9791891574859619,grad_norm: 0.8729124464820573, iteration: 84886
loss: 1.0329171419143677,grad_norm: 0.9999991910052001, iteration: 84887
loss: 0.9933602213859558,grad_norm: 0.9999990162502882, iteration: 84888
loss: 1.1667145490646362,grad_norm: 0.9999994159775653, iteration: 84889
loss: 1.000540852546692,grad_norm: 0.9500842554279855, iteration: 84890
loss: 1.0248777866363525,grad_norm: 0.9999991480998579, iteration: 84891
loss: 1.0246124267578125,grad_norm: 0.999999112116765, iteration: 84892
loss: 1.028101921081543,grad_norm: 0.9555517234042101, iteration: 84893
loss: 0.9745996594429016,grad_norm: 0.9988729003780097, iteration: 84894
loss: 1.0216137170791626,grad_norm: 0.9999993283105203, iteration: 84895
loss: 0.9896426200866699,grad_norm: 0.972326852928697, iteration: 84896
loss: 0.9908297061920166,grad_norm: 0.9770189795626084, iteration: 84897
loss: 0.9741920828819275,grad_norm: 0.9999991713259355, iteration: 84898
loss: 1.1587302684783936,grad_norm: 0.9999992427608109, iteration: 84899
loss: 1.0168362855911255,grad_norm: 0.9999991452530187, iteration: 84900
loss: 0.978181779384613,grad_norm: 0.9999997811961209, iteration: 84901
loss: 0.9792517423629761,grad_norm: 0.9999994020339504, iteration: 84902
loss: 1.0339157581329346,grad_norm: 0.9999996532117583, iteration: 84903
loss: 1.190540075302124,grad_norm: 0.9999993946413833, iteration: 84904
loss: 0.9971625804901123,grad_norm: 0.9999990012799619, iteration: 84905
loss: 1.0636239051818848,grad_norm: 0.9999993935182095, iteration: 84906
loss: 0.9960922598838806,grad_norm: 0.958796305006304, iteration: 84907
loss: 1.0254281759262085,grad_norm: 0.9325960646773432, iteration: 84908
loss: 0.9969491362571716,grad_norm: 0.9999991120140446, iteration: 84909
loss: 1.07381010055542,grad_norm: 0.9999991796693702, iteration: 84910
loss: 0.9989533424377441,grad_norm: 0.9378573672783328, iteration: 84911
loss: 0.9812538623809814,grad_norm: 0.9999991884644699, iteration: 84912
loss: 1.083924412727356,grad_norm: 0.9999997628793289, iteration: 84913
loss: 1.0345512628555298,grad_norm: 0.9999991776503254, iteration: 84914
loss: 1.0087753534317017,grad_norm: 0.9276941077277242, iteration: 84915
loss: 1.026247262954712,grad_norm: 0.9999991128778808, iteration: 84916
loss: 1.087051272392273,grad_norm: 0.9999992767291845, iteration: 84917
loss: 1.0292174816131592,grad_norm: 0.9634899889368275, iteration: 84918
loss: 0.9727200865745544,grad_norm: 0.8736971391713093, iteration: 84919
loss: 1.0215325355529785,grad_norm: 0.9999993441101049, iteration: 84920
loss: 1.091567039489746,grad_norm: 0.9999992892077356, iteration: 84921
loss: 0.9888919591903687,grad_norm: 0.9284757802778438, iteration: 84922
loss: 0.9958251714706421,grad_norm: 0.9535085742316349, iteration: 84923
loss: 0.9716359972953796,grad_norm: 0.8765747819888847, iteration: 84924
loss: 1.0091238021850586,grad_norm: 0.9441100379247584, iteration: 84925
loss: 1.0649343729019165,grad_norm: 0.9999993152430602, iteration: 84926
loss: 1.054111123085022,grad_norm: 0.9999993380783693, iteration: 84927
loss: 1.0359456539154053,grad_norm: 0.795007074010498, iteration: 84928
loss: 1.0115416049957275,grad_norm: 0.9999996449626748, iteration: 84929
loss: 1.0374946594238281,grad_norm: 0.9999994442582044, iteration: 84930
loss: 0.9821408987045288,grad_norm: 0.9352620984839397, iteration: 84931
loss: 0.9914771318435669,grad_norm: 0.9999990213296324, iteration: 84932
loss: 0.9767724275588989,grad_norm: 0.9999989682411177, iteration: 84933
loss: 1.0195832252502441,grad_norm: 0.8379924803492433, iteration: 84934
loss: 0.9959486126899719,grad_norm: 0.9999990435445081, iteration: 84935
loss: 1.0349515676498413,grad_norm: 0.8804812835828575, iteration: 84936
loss: 0.999002993106842,grad_norm: 0.9999991687175438, iteration: 84937
loss: 1.027170181274414,grad_norm: 0.8971422144233567, iteration: 84938
loss: 0.9865097999572754,grad_norm: 0.9999990910519887, iteration: 84939
loss: 1.0039706230163574,grad_norm: 0.7882240435949255, iteration: 84940
loss: 0.9895375370979309,grad_norm: 0.9934470926575804, iteration: 84941
loss: 1.0195287466049194,grad_norm: 0.8862870944727902, iteration: 84942
loss: 1.0174555778503418,grad_norm: 0.9999991808194584, iteration: 84943
loss: 1.010159969329834,grad_norm: 0.9542461039164529, iteration: 84944
loss: 1.1728832721710205,grad_norm: 0.9999992877351104, iteration: 84945
loss: 0.973105251789093,grad_norm: 0.9999995274451557, iteration: 84946
loss: 1.1004106998443604,grad_norm: 0.9999993970255527, iteration: 84947
loss: 1.0150072574615479,grad_norm: 0.9465361088754392, iteration: 84948
loss: 1.0623478889465332,grad_norm: 0.9999991782242399, iteration: 84949
loss: 1.0871407985687256,grad_norm: 0.9999993716137152, iteration: 84950
loss: 1.0120924711227417,grad_norm: 0.9999991878976054, iteration: 84951
loss: 0.9830957055091858,grad_norm: 0.9221327997484714, iteration: 84952
loss: 1.0156248807907104,grad_norm: 0.8967975326500297, iteration: 84953
loss: 1.0075902938842773,grad_norm: 0.8930716168229494, iteration: 84954
loss: 1.1784307956695557,grad_norm: 0.9999997087201917, iteration: 84955
loss: 0.9976767301559448,grad_norm: 0.946849035793844, iteration: 84956
loss: 0.9972805380821228,grad_norm: 0.9999991882470363, iteration: 84957
loss: 1.0058823823928833,grad_norm: 0.8425061361006705, iteration: 84958
loss: 1.0351873636245728,grad_norm: 0.999999133692166, iteration: 84959
loss: 1.01263427734375,grad_norm: 0.9663439464907815, iteration: 84960
loss: 1.021321415901184,grad_norm: 0.8620592888369283, iteration: 84961
loss: 1.0076656341552734,grad_norm: 0.9645688839040252, iteration: 84962
loss: 1.0600390434265137,grad_norm: 0.9278661719598823, iteration: 84963
loss: 1.0363261699676514,grad_norm: 0.9999995622869641, iteration: 84964
loss: 0.9916563034057617,grad_norm: 0.8815692557043068, iteration: 84965
loss: 1.0017385482788086,grad_norm: 0.9999997304824938, iteration: 84966
loss: 1.0140217542648315,grad_norm: 0.9151463339012897, iteration: 84967
loss: 1.0407339334487915,grad_norm: 0.9999994686701192, iteration: 84968
loss: 0.9831244349479675,grad_norm: 0.9704203043336272, iteration: 84969
loss: 0.9956220984458923,grad_norm: 0.9789369771476434, iteration: 84970
loss: 0.9851853251457214,grad_norm: 0.9999992136429504, iteration: 84971
loss: 0.9822422862052917,grad_norm: 0.8979676575457706, iteration: 84972
loss: 0.984228253364563,grad_norm: 0.9674686020032273, iteration: 84973
loss: 1.1885160207748413,grad_norm: 0.9999997932911174, iteration: 84974
loss: 1.0263746976852417,grad_norm: 0.9770942464153521, iteration: 84975
loss: 0.9903895258903503,grad_norm: 0.7887645974790434, iteration: 84976
loss: 1.0098055601119995,grad_norm: 0.9378227748357026, iteration: 84977
loss: 1.0241543054580688,grad_norm: 0.9999991780345122, iteration: 84978
loss: 1.0221518278121948,grad_norm: 0.9999995207999097, iteration: 84979
loss: 0.9733830690383911,grad_norm: 0.9999991172262132, iteration: 84980
loss: 1.0662944316864014,grad_norm: 0.9999991141904402, iteration: 84981
loss: 1.0322544574737549,grad_norm: 0.9999991077502364, iteration: 84982
loss: 0.9937891960144043,grad_norm: 0.9490132224051324, iteration: 84983
loss: 1.0067856311798096,grad_norm: 0.9999990873229979, iteration: 84984
loss: 1.0116338729858398,grad_norm: 1.0000000133903237, iteration: 84985
loss: 1.1258686780929565,grad_norm: 0.9999999267891307, iteration: 84986
loss: 1.0377737283706665,grad_norm: 0.9999992972792814, iteration: 84987
loss: 0.9910137057304382,grad_norm: 0.9999991485530829, iteration: 84988
loss: 1.0513596534729004,grad_norm: 0.9862305230375025, iteration: 84989
loss: 1.1054255962371826,grad_norm: 0.9999993642851669, iteration: 84990
loss: 1.0166029930114746,grad_norm: 0.9999994431843243, iteration: 84991
loss: 1.026390790939331,grad_norm: 0.9999991997951218, iteration: 84992
loss: 1.0913112163543701,grad_norm: 0.9595107367978494, iteration: 84993
loss: 1.0099453926086426,grad_norm: 0.999999314128368, iteration: 84994
loss: 1.3858312368392944,grad_norm: 0.9999995043282951, iteration: 84995
loss: 1.001975655555725,grad_norm: 0.9999991197687197, iteration: 84996
loss: 1.195006251335144,grad_norm: 0.99999987256902, iteration: 84997
loss: 1.1476572751998901,grad_norm: 0.9999991745551996, iteration: 84998
loss: 0.9681810140609741,grad_norm: 0.9999991205518892, iteration: 84999
loss: 1.0079327821731567,grad_norm: 0.8629790461538527, iteration: 85000
loss: 1.0581766366958618,grad_norm: 0.9999996265757014, iteration: 85001
loss: 1.197060465812683,grad_norm: 1.000000082392404, iteration: 85002
loss: 1.0938037633895874,grad_norm: 0.9999991318721052, iteration: 85003
loss: 1.0074914693832397,grad_norm: 0.999999190473357, iteration: 85004
loss: 1.0694599151611328,grad_norm: 0.9999993633550828, iteration: 85005
loss: 1.0462048053741455,grad_norm: 0.9999993258629627, iteration: 85006
loss: 1.0461485385894775,grad_norm: 0.9999995722045373, iteration: 85007
loss: 1.0215178728103638,grad_norm: 0.9999991623803743, iteration: 85008
loss: 0.9911567568778992,grad_norm: 0.9029334439046984, iteration: 85009
loss: 1.2102830410003662,grad_norm: 0.9999992856039776, iteration: 85010
loss: 1.0723292827606201,grad_norm: 0.9999994343348771, iteration: 85011
loss: 1.394178032875061,grad_norm: 0.9999994912732334, iteration: 85012
loss: 0.9816100001335144,grad_norm: 0.999999112671099, iteration: 85013
loss: 1.1491872072219849,grad_norm: 0.9999996184155723, iteration: 85014
loss: 1.1097739934921265,grad_norm: 0.9628536090566087, iteration: 85015
loss: 1.4075366258621216,grad_norm: 0.9999996153612519, iteration: 85016
loss: 1.0684257745742798,grad_norm: 0.9999996970920217, iteration: 85017
loss: 1.0040032863616943,grad_norm: 0.9072740472040596, iteration: 85018
loss: 0.9912063479423523,grad_norm: 0.999999044479095, iteration: 85019
loss: 1.3182514905929565,grad_norm: 0.9999999398150128, iteration: 85020
loss: 0.9957106113433838,grad_norm: 0.9986380220968769, iteration: 85021
loss: 1.0903139114379883,grad_norm: 0.9999996955001308, iteration: 85022
loss: 1.0784196853637695,grad_norm: 0.999999219082525, iteration: 85023
loss: 1.030026912689209,grad_norm: 0.9999991517085026, iteration: 85024
loss: 1.149560570716858,grad_norm: 0.9999995501634937, iteration: 85025
loss: 1.0659728050231934,grad_norm: 0.9999990262729374, iteration: 85026
loss: 1.0506681203842163,grad_norm: 0.9999997554253729, iteration: 85027
loss: 1.246516227722168,grad_norm: 0.9999992738201183, iteration: 85028
loss: 1.1297026872634888,grad_norm: 0.9999997762455185, iteration: 85029
loss: 0.969963550567627,grad_norm: 0.9999991828036385, iteration: 85030
loss: 1.0499683618545532,grad_norm: 0.9999991117476259, iteration: 85031
loss: 1.0936487913131714,grad_norm: 0.9999996802561026, iteration: 85032
loss: 1.0543062686920166,grad_norm: 0.999999185869406, iteration: 85033
loss: 1.2900114059448242,grad_norm: 0.9999996585079299, iteration: 85034
loss: 1.032476782798767,grad_norm: 0.9373362749008339, iteration: 85035
loss: 1.0460693836212158,grad_norm: 0.9121164890861919, iteration: 85036
loss: 1.0132657289505005,grad_norm: 0.9999991257414181, iteration: 85037
loss: 1.0902329683303833,grad_norm: 0.9999991133194116, iteration: 85038
loss: 1.244497537612915,grad_norm: 0.9999992608095704, iteration: 85039
loss: 1.2215379476547241,grad_norm: 0.9999995634347694, iteration: 85040
loss: 1.253578782081604,grad_norm: 0.9999999368653075, iteration: 85041
loss: 1.0915132761001587,grad_norm: 0.9999993515551675, iteration: 85042
loss: 0.964056670665741,grad_norm: 0.9999991236922563, iteration: 85043
loss: 1.1432442665100098,grad_norm: 0.9999995206094184, iteration: 85044
loss: 1.0169849395751953,grad_norm: 0.9999996475632279, iteration: 85045
loss: 1.1835225820541382,grad_norm: 0.9999995629644838, iteration: 85046
loss: 0.9939085245132446,grad_norm: 0.9424018966487238, iteration: 85047
loss: 1.111651062965393,grad_norm: 0.9999995919597113, iteration: 85048
loss: 1.1125189065933228,grad_norm: 0.999999444809491, iteration: 85049
loss: 1.1605674028396606,grad_norm: 1.0000000272848935, iteration: 85050
loss: 1.0767369270324707,grad_norm: 0.9999991778123446, iteration: 85051
loss: 1.0134848356246948,grad_norm: 0.988465072564872, iteration: 85052
loss: 1.0295878648757935,grad_norm: 0.9364559721597767, iteration: 85053
loss: 1.0771257877349854,grad_norm: 0.9999994973305111, iteration: 85054
loss: 1.241753339767456,grad_norm: 0.9999997247929989, iteration: 85055
loss: 1.0477932691574097,grad_norm: 0.999999171036935, iteration: 85056
loss: 1.0547966957092285,grad_norm: 0.9999993172910754, iteration: 85057
loss: 1.1618634462356567,grad_norm: 0.9999997731640803, iteration: 85058
loss: 1.1281156539916992,grad_norm: 0.9999998072852485, iteration: 85059
loss: 1.1243044137954712,grad_norm: 0.9999991465675747, iteration: 85060
loss: 1.1246228218078613,grad_norm: 0.9999997203225625, iteration: 85061
loss: 1.066666841506958,grad_norm: 0.9999993365797849, iteration: 85062
loss: 1.0422931909561157,grad_norm: 0.9120556085015016, iteration: 85063
loss: 1.129312515258789,grad_norm: 0.9999993971369884, iteration: 85064
loss: 1.1872498989105225,grad_norm: 0.9999997991303374, iteration: 85065
loss: 1.1102145910263062,grad_norm: 0.999999772630066, iteration: 85066
loss: 1.1312024593353271,grad_norm: 0.9999996222905768, iteration: 85067
loss: 1.1127898693084717,grad_norm: 0.999999550077704, iteration: 85068
loss: 1.1640790700912476,grad_norm: 0.9999997361293228, iteration: 85069
loss: 1.318360447883606,grad_norm: 0.9999996095107646, iteration: 85070
loss: 1.0204654932022095,grad_norm: 0.9999994501192734, iteration: 85071
loss: 1.104985237121582,grad_norm: 0.9999994401661738, iteration: 85072
loss: 1.173088788986206,grad_norm: 0.9999992032981087, iteration: 85073
loss: 1.1732685565948486,grad_norm: 0.999999829241698, iteration: 85074
loss: 1.211729884147644,grad_norm: 0.9999998553518934, iteration: 85075
loss: 1.0138765573501587,grad_norm: 0.9999991499543046, iteration: 85076
loss: 1.4703538417816162,grad_norm: 0.9999996462786398, iteration: 85077
loss: 1.1363996267318726,grad_norm: 0.999999653086408, iteration: 85078
loss: 1.2792037725448608,grad_norm: 0.999999693408987, iteration: 85079
loss: 1.0241360664367676,grad_norm: 0.9000318766816359, iteration: 85080
loss: 1.100652813911438,grad_norm: 0.9999990511934075, iteration: 85081
loss: 1.1852468252182007,grad_norm: 0.9999998790110892, iteration: 85082
loss: 1.2726387977600098,grad_norm: 0.9999996664625409, iteration: 85083
loss: 1.228547215461731,grad_norm: 0.9999997003210137, iteration: 85084
loss: 1.2914403676986694,grad_norm: 0.9999992785945562, iteration: 85085
loss: 1.3087623119354248,grad_norm: 0.9999995778180435, iteration: 85086
loss: 1.2777653932571411,grad_norm: 0.9999999091986399, iteration: 85087
loss: 1.0591734647750854,grad_norm: 0.9999992229244138, iteration: 85088
loss: 1.2424062490463257,grad_norm: 0.9999993669312421, iteration: 85089
loss: 1.3788633346557617,grad_norm: 0.9999997065679512, iteration: 85090
loss: 1.3051332235336304,grad_norm: 0.9999993307341478, iteration: 85091
loss: 1.326399803161621,grad_norm: 0.9999998789093013, iteration: 85092
loss: 1.3052120208740234,grad_norm: 0.9999998566815229, iteration: 85093
loss: 1.2333160638809204,grad_norm: 0.9999999597589004, iteration: 85094
loss: 1.4058128595352173,grad_norm: 0.9999998765484953, iteration: 85095
loss: 1.283142328262329,grad_norm: 0.9999995601517497, iteration: 85096
loss: 1.6107839345932007,grad_norm: 0.9999996363593067, iteration: 85097
loss: 1.212754487991333,grad_norm: 0.9999998612713616, iteration: 85098
loss: 1.3745445013046265,grad_norm: 0.9999999170039793, iteration: 85099
loss: 1.4756295680999756,grad_norm: 0.9999996734642764, iteration: 85100
loss: 1.2493107318878174,grad_norm: 0.999999795468184, iteration: 85101
loss: 1.369516372680664,grad_norm: 0.9999998263073439, iteration: 85102
loss: 1.187296986579895,grad_norm: 0.9999997240245141, iteration: 85103
loss: 1.275794506072998,grad_norm: 0.9999998845804311, iteration: 85104
loss: 1.4707328081130981,grad_norm: 0.9999999208466027, iteration: 85105
loss: 1.0573407411575317,grad_norm: 0.9999991599225313, iteration: 85106
loss: 1.058411955833435,grad_norm: 0.9999992830857113, iteration: 85107
loss: 1.1642875671386719,grad_norm: 0.9999992904370129, iteration: 85108
loss: 1.1733700037002563,grad_norm: 0.9999996913323141, iteration: 85109
loss: 1.0939245223999023,grad_norm: 0.9999991561636898, iteration: 85110
loss: 1.1828887462615967,grad_norm: 0.9999992856499648, iteration: 85111
loss: 1.4138737916946411,grad_norm: 0.9999997993844386, iteration: 85112
loss: 1.2332640886306763,grad_norm: 0.9999997413767829, iteration: 85113
loss: 1.032393455505371,grad_norm: 0.9999995579333714, iteration: 85114
loss: 1.0307761430740356,grad_norm: 0.999999141320342, iteration: 85115
loss: 1.1839998960494995,grad_norm: 0.9999993488128062, iteration: 85116
loss: 1.4985144138336182,grad_norm: 0.999999980181478, iteration: 85117
loss: 1.2099251747131348,grad_norm: 0.9999998615910569, iteration: 85118
loss: 1.293582797050476,grad_norm: 0.9999999069764246, iteration: 85119
loss: 1.3594753742218018,grad_norm: 0.99999999700532, iteration: 85120
loss: 1.0805665254592896,grad_norm: 0.9999999325920781, iteration: 85121
loss: 1.4028995037078857,grad_norm: 1.0000000297295413, iteration: 85122
loss: 1.8937653303146362,grad_norm: 1.0000000807739435, iteration: 85123
loss: 1.6588163375854492,grad_norm: 0.999999949197111, iteration: 85124
loss: 1.6712058782577515,grad_norm: 0.9999999642269479, iteration: 85125
loss: 1.667651891708374,grad_norm: 0.9999999111933014, iteration: 85126
loss: 1.3020358085632324,grad_norm: 0.999999855168159, iteration: 85127
loss: 1.752458095550537,grad_norm: 0.9999999338887166, iteration: 85128
loss: 1.4208126068115234,grad_norm: 0.999999977090191, iteration: 85129
loss: 1.1649794578552246,grad_norm: 0.9999998717394407, iteration: 85130
loss: 1.3883998394012451,grad_norm: 0.9999998264709737, iteration: 85131
loss: 1.2158704996109009,grad_norm: 1.000000007053304, iteration: 85132
loss: 1.071658730506897,grad_norm: 0.9999993890655136, iteration: 85133
loss: 1.410812497138977,grad_norm: 0.9999997498166393, iteration: 85134
loss: 1.26686692237854,grad_norm: 0.999999463539963, iteration: 85135
loss: 1.1660791635513306,grad_norm: 0.9999998067802436, iteration: 85136
loss: 1.3701908588409424,grad_norm: 0.9999997235707104, iteration: 85137
loss: 1.1337438821792603,grad_norm: 0.9999999514936061, iteration: 85138
loss: 1.2063905000686646,grad_norm: 0.9999995680015625, iteration: 85139
loss: 1.2819584608078003,grad_norm: 0.9999998822512978, iteration: 85140
loss: 1.2573094367980957,grad_norm: 0.9999999055617453, iteration: 85141
loss: 1.1114579439163208,grad_norm: 0.999999958841526, iteration: 85142
loss: 1.1941406726837158,grad_norm: 0.9999995796823659, iteration: 85143
loss: 1.1648585796356201,grad_norm: 0.9999995856553444, iteration: 85144
loss: 1.157802700996399,grad_norm: 0.999999777974724, iteration: 85145
loss: 1.2772319316864014,grad_norm: 0.9999994374286074, iteration: 85146
loss: 1.122249722480774,grad_norm: 0.9999993067305099, iteration: 85147
loss: 1.2251455783843994,grad_norm: 0.9999997719646518, iteration: 85148
loss: 1.2820500135421753,grad_norm: 0.9999998018379249, iteration: 85149
loss: 1.1416311264038086,grad_norm: 0.9999997869802106, iteration: 85150
loss: 1.1442060470581055,grad_norm: 0.9999994788128271, iteration: 85151
loss: 1.1280089616775513,grad_norm: 0.9999995473370211, iteration: 85152
loss: 1.1723642349243164,grad_norm: 0.9999996007750206, iteration: 85153
loss: 1.112980604171753,grad_norm: 0.9999993182202105, iteration: 85154
loss: 1.1062194108963013,grad_norm: 0.9999999072946735, iteration: 85155
loss: 1.1995601654052734,grad_norm: 0.9999993384429012, iteration: 85156
loss: 1.0887236595153809,grad_norm: 0.9999999938364367, iteration: 85157
loss: 1.2250313758850098,grad_norm: 0.9999999255275763, iteration: 85158
loss: 1.0391762256622314,grad_norm: 0.9999992371131474, iteration: 85159
loss: 1.0685580968856812,grad_norm: 0.9999998090196686, iteration: 85160
loss: 1.1771509647369385,grad_norm: 0.9999999616128477, iteration: 85161
loss: 0.9987547397613525,grad_norm: 0.9999992189494218, iteration: 85162
loss: 1.081335186958313,grad_norm: 0.9999995118399726, iteration: 85163
loss: 1.045091986656189,grad_norm: 0.9999990857579913, iteration: 85164
loss: 1.116702675819397,grad_norm: 0.9999994477873824, iteration: 85165
loss: 1.0601227283477783,grad_norm: 0.9999993104180395, iteration: 85166
loss: 1.025831937789917,grad_norm: 0.9999994042399695, iteration: 85167
loss: 0.9867099523544312,grad_norm: 0.9629170204777384, iteration: 85168
loss: 1.083907127380371,grad_norm: 0.9999991914723425, iteration: 85169
loss: 1.0102020502090454,grad_norm: 0.9999991636373381, iteration: 85170
loss: 1.0818558931350708,grad_norm: 0.9999993518874237, iteration: 85171
loss: 1.0417766571044922,grad_norm: 0.9999992956324302, iteration: 85172
loss: 1.0533136129379272,grad_norm: 0.9999996658499806, iteration: 85173
loss: 1.0186121463775635,grad_norm: 0.9999994618483169, iteration: 85174
loss: 1.036961555480957,grad_norm: 0.9999993212800637, iteration: 85175
loss: 1.0837366580963135,grad_norm: 0.999999429743311, iteration: 85176
loss: 1.0656137466430664,grad_norm: 0.999999475064728, iteration: 85177
loss: 0.9379295110702515,grad_norm: 0.8874268610113499, iteration: 85178
loss: 1.1216661930084229,grad_norm: 0.9999999565627141, iteration: 85179
loss: 1.195198893547058,grad_norm: 0.999999505928687, iteration: 85180
loss: 1.0438851118087769,grad_norm: 0.9999992644084044, iteration: 85181
loss: 1.0960605144500732,grad_norm: 0.9999997264698602, iteration: 85182
loss: 1.0359582901000977,grad_norm: 0.9564962008990554, iteration: 85183
loss: 1.0432320833206177,grad_norm: 0.9674471094064175, iteration: 85184
loss: 1.018188714981079,grad_norm: 0.9999991179634613, iteration: 85185
loss: 1.1312026977539062,grad_norm: 0.9999991857844852, iteration: 85186
loss: 1.1028116941452026,grad_norm: 0.9999999198386078, iteration: 85187
loss: 1.0497949123382568,grad_norm: 0.9999991648168257, iteration: 85188
loss: 1.0574424266815186,grad_norm: 0.9999992609329119, iteration: 85189
loss: 0.9856317639350891,grad_norm: 0.9999993953663359, iteration: 85190
loss: 1.1018685102462769,grad_norm: 0.9999994931169482, iteration: 85191
loss: 1.0749752521514893,grad_norm: 0.9999995477237702, iteration: 85192
loss: 1.1475117206573486,grad_norm: 0.9999999313010979, iteration: 85193
loss: 1.1120444536209106,grad_norm: 0.999999814234518, iteration: 85194
loss: 1.0173499584197998,grad_norm: 0.9999992794757494, iteration: 85195
loss: 1.104580044746399,grad_norm: 0.999999714442563, iteration: 85196
loss: 1.0785279273986816,grad_norm: 0.9999994086693793, iteration: 85197
loss: 1.031408667564392,grad_norm: 0.9999991785018874, iteration: 85198
loss: 1.013274073600769,grad_norm: 0.993404875388711, iteration: 85199
loss: 1.047499418258667,grad_norm: 0.9999997112522706, iteration: 85200
loss: 0.9741573333740234,grad_norm: 0.9999991476337965, iteration: 85201
loss: 1.1893061399459839,grad_norm: 0.9999992642015448, iteration: 85202
loss: 0.9800024628639221,grad_norm: 0.999999482374748, iteration: 85203
loss: 1.0107444524765015,grad_norm: 0.8445910160033375, iteration: 85204
loss: 1.0086920261383057,grad_norm: 0.9999998806317911, iteration: 85205
loss: 1.0295140743255615,grad_norm: 0.9999990893457967, iteration: 85206
loss: 1.4239287376403809,grad_norm: 0.9999995897947375, iteration: 85207
loss: 1.0147372484207153,grad_norm: 0.9437041127803955, iteration: 85208
loss: 1.0309256315231323,grad_norm: 0.9999992849327821, iteration: 85209
loss: 1.2764084339141846,grad_norm: 0.9999997611582293, iteration: 85210
loss: 1.0160731077194214,grad_norm: 0.9999992009466624, iteration: 85211
loss: 1.1385443210601807,grad_norm: 0.9999995092654234, iteration: 85212
loss: 1.1583521366119385,grad_norm: 0.999999760126731, iteration: 85213
loss: 1.0257445573806763,grad_norm: 0.9658625089423761, iteration: 85214
loss: 1.0931791067123413,grad_norm: 0.9999995214722276, iteration: 85215
loss: 1.1043381690979004,grad_norm: 0.9999995325496795, iteration: 85216
loss: 1.0972758531570435,grad_norm: 0.9999996157604168, iteration: 85217
loss: 1.1007425785064697,grad_norm: 0.9999999416445434, iteration: 85218
loss: 1.21692955493927,grad_norm: 0.9999997874534677, iteration: 85219
loss: 1.09490168094635,grad_norm: 0.9999995997341083, iteration: 85220
loss: 1.0566291809082031,grad_norm: 0.9999994249046587, iteration: 85221
loss: 1.1042059659957886,grad_norm: 0.999999553986957, iteration: 85222
loss: 1.0990734100341797,grad_norm: 0.9999995962431153, iteration: 85223
loss: 1.2313255071640015,grad_norm: 0.9999998668093588, iteration: 85224
loss: 1.3878473043441772,grad_norm: 0.9999996991130702, iteration: 85225
loss: 1.3720285892486572,grad_norm: 1.0000000341228934, iteration: 85226
loss: 1.1601388454437256,grad_norm: 0.9999996074815215, iteration: 85227
loss: 1.1408915519714355,grad_norm: 0.9999993628356607, iteration: 85228
loss: 1.0702128410339355,grad_norm: 0.9999996053084454, iteration: 85229
loss: 1.0961681604385376,grad_norm: 0.9999991870302647, iteration: 85230
loss: 1.0227969884872437,grad_norm: 0.9999991793644839, iteration: 85231
loss: 1.0458130836486816,grad_norm: 0.9999993972166691, iteration: 85232
loss: 1.3930907249450684,grad_norm: 0.9999997958737002, iteration: 85233
loss: 1.097476840019226,grad_norm: 0.9999991312879463, iteration: 85234
loss: 1.1826910972595215,grad_norm: 0.999999637386019, iteration: 85235
loss: 1.219062089920044,grad_norm: 0.999999853380785, iteration: 85236
loss: 1.2865108251571655,grad_norm: 1.00000007964705, iteration: 85237
loss: 1.1926144361495972,grad_norm: 0.999999806896507, iteration: 85238
loss: 1.190053939819336,grad_norm: 0.9999996810808195, iteration: 85239
loss: 1.1072834730148315,grad_norm: 0.9999992133601999, iteration: 85240
loss: 1.3041187524795532,grad_norm: 0.9999998835160046, iteration: 85241
loss: 1.2775660753250122,grad_norm: 0.9999999283338956, iteration: 85242
loss: 0.9862579703330994,grad_norm: 0.9999999380880925, iteration: 85243
loss: 1.137315034866333,grad_norm: 0.999999900241276, iteration: 85244
loss: 1.1881153583526611,grad_norm: 0.9999998872922788, iteration: 85245
loss: 1.1590256690979004,grad_norm: 0.9999998932206774, iteration: 85246
loss: 1.1362262964248657,grad_norm: 0.9999993297639297, iteration: 85247
loss: 1.0310014486312866,grad_norm: 0.9999996299254402, iteration: 85248
loss: 1.1909997463226318,grad_norm: 0.9999994573217438, iteration: 85249
loss: 1.2063465118408203,grad_norm: 0.9999998280805273, iteration: 85250
loss: 1.0644532442092896,grad_norm: 0.9999992409151832, iteration: 85251
loss: 1.1298837661743164,grad_norm: 0.9999998848548107, iteration: 85252
loss: 1.047985553741455,grad_norm: 0.9999998101799337, iteration: 85253
loss: 1.081209659576416,grad_norm: 0.9187360258107924, iteration: 85254
loss: 1.3265639543533325,grad_norm: 0.9999996944025263, iteration: 85255
loss: 1.0232561826705933,grad_norm: 0.9999991198269197, iteration: 85256
loss: 1.181580662727356,grad_norm: 0.9999997377465631, iteration: 85257
loss: 1.0077910423278809,grad_norm: 0.9999998726391406, iteration: 85258
loss: 1.092444658279419,grad_norm: 0.9999994287283365, iteration: 85259
loss: 1.0976113080978394,grad_norm: 0.9999996182734882, iteration: 85260
loss: 1.3505213260650635,grad_norm: 0.9999999675658097, iteration: 85261
loss: 1.2331440448760986,grad_norm: 1.0000000504547146, iteration: 85262
loss: 0.99439537525177,grad_norm: 0.9999993420930429, iteration: 85263
loss: 1.0834646224975586,grad_norm: 0.999999660931482, iteration: 85264
loss: 1.1307324171066284,grad_norm: 0.9999996723498399, iteration: 85265
loss: 1.1293233633041382,grad_norm: 0.9999999203519965, iteration: 85266
loss: 1.1320033073425293,grad_norm: 0.9999993426535162, iteration: 85267
loss: 1.0840489864349365,grad_norm: 0.999999338387257, iteration: 85268
loss: 1.060703158378601,grad_norm: 0.9999995337309596, iteration: 85269
loss: 1.092445969581604,grad_norm: 0.9999990984998608, iteration: 85270
loss: 1.1376880407333374,grad_norm: 0.9999996790762739, iteration: 85271
loss: 1.1697537899017334,grad_norm: 0.9999994639734266, iteration: 85272
loss: 1.2016111612319946,grad_norm: 0.9999999813613148, iteration: 85273
loss: 1.1678012609481812,grad_norm: 0.9999992604554576, iteration: 85274
loss: 1.090857982635498,grad_norm: 0.9999998584835835, iteration: 85275
loss: 1.0607835054397583,grad_norm: 0.9999993016463068, iteration: 85276
loss: 1.0089976787567139,grad_norm: 0.9999996262830001, iteration: 85277
loss: 1.08863365650177,grad_norm: 0.8989801382574127, iteration: 85278
loss: 1.0451773405075073,grad_norm: 0.950627369994893, iteration: 85279
loss: 1.037093162536621,grad_norm: 0.8481930549378855, iteration: 85280
loss: 1.0294979810714722,grad_norm: 0.8834202184168133, iteration: 85281
loss: 1.128200888633728,grad_norm: 0.9999992275925704, iteration: 85282
loss: 1.1425467729568481,grad_norm: 0.999999603397477, iteration: 85283
loss: 1.1653060913085938,grad_norm: 0.9999994671903506, iteration: 85284
loss: 0.9968227744102478,grad_norm: 0.9999990268552955, iteration: 85285
loss: 0.9765245318412781,grad_norm: 0.9999991918164821, iteration: 85286
loss: 1.05362868309021,grad_norm: 0.9999993592273866, iteration: 85287
loss: 1.0995879173278809,grad_norm: 0.9999999602021972, iteration: 85288
loss: 1.005968689918518,grad_norm: 0.9275786975993082, iteration: 85289
loss: 0.9911943674087524,grad_norm: 0.9433755341408436, iteration: 85290
loss: 1.0301594734191895,grad_norm: 0.9999993464485397, iteration: 85291
loss: 0.9878371357917786,grad_norm: 0.9989861295013931, iteration: 85292
loss: 1.0834494829177856,grad_norm: 0.9999996842297934, iteration: 85293
loss: 1.0098413228988647,grad_norm: 0.9999997126941517, iteration: 85294
loss: 1.0689842700958252,grad_norm: 0.9999997520067574, iteration: 85295
loss: 1.030661940574646,grad_norm: 0.999999745936709, iteration: 85296
loss: 1.1925222873687744,grad_norm: 0.9999991693052813, iteration: 85297
loss: 1.048431158065796,grad_norm: 0.9999989425668905, iteration: 85298
loss: 1.1423931121826172,grad_norm: 0.9999999911303553, iteration: 85299
loss: 1.086037039756775,grad_norm: 0.8742184056634563, iteration: 85300
loss: 0.9884777069091797,grad_norm: 0.9999993386882979, iteration: 85301
loss: 1.0496655702590942,grad_norm: 0.9558920458187772, iteration: 85302
loss: 1.0932583808898926,grad_norm: 0.9999999607215756, iteration: 85303
loss: 1.3474667072296143,grad_norm: 0.9999994744628046, iteration: 85304
loss: 0.9806728363037109,grad_norm: 0.771837134033079, iteration: 85305
loss: 1.0290014743804932,grad_norm: 0.9999992726066956, iteration: 85306
loss: 1.0212311744689941,grad_norm: 0.9999997046939898, iteration: 85307
loss: 1.1142359972000122,grad_norm: 0.9999993838190785, iteration: 85308
loss: 1.0264897346496582,grad_norm: 0.9999992487016592, iteration: 85309
loss: 0.9960885643959045,grad_norm: 0.822978448103506, iteration: 85310
loss: 1.0844290256500244,grad_norm: 0.9999992738577734, iteration: 85311
loss: 1.1447280645370483,grad_norm: 0.9999992888673954, iteration: 85312
loss: 1.140540599822998,grad_norm: 0.9999996723420921, iteration: 85313
loss: 1.1648691892623901,grad_norm: 0.9999998163848139, iteration: 85314
loss: 1.0053943395614624,grad_norm: 0.9788312405782987, iteration: 85315
loss: 0.9943869709968567,grad_norm: 0.9999994393793529, iteration: 85316
loss: 1.030497431755066,grad_norm: 0.9999993645760453, iteration: 85317
loss: 1.0210504531860352,grad_norm: 0.9999991123610512, iteration: 85318
loss: 0.9542708396911621,grad_norm: 0.9999989523574292, iteration: 85319
loss: 0.995516300201416,grad_norm: 0.8626556112336886, iteration: 85320
loss: 0.992245614528656,grad_norm: 0.8995595536662531, iteration: 85321
loss: 1.1341956853866577,grad_norm: 0.9999994537438515, iteration: 85322
loss: 1.0014227628707886,grad_norm: 0.9644839389207165, iteration: 85323
loss: 1.0295931100845337,grad_norm: 0.9297671584716956, iteration: 85324
loss: 1.0218007564544678,grad_norm: 0.9999991376275141, iteration: 85325
loss: 1.0197763442993164,grad_norm: 0.815921385424272, iteration: 85326
loss: 1.0840140581130981,grad_norm: 0.9999993020584249, iteration: 85327
loss: 0.9515845775604248,grad_norm: 0.9999992550009182, iteration: 85328
loss: 1.050718903541565,grad_norm: 0.9999990550352946, iteration: 85329
loss: 1.0349727869033813,grad_norm: 0.9999998424025293, iteration: 85330
loss: 1.0167312622070312,grad_norm: 0.9999993787204009, iteration: 85331
loss: 1.0707725286483765,grad_norm: 0.9415672944587874, iteration: 85332
loss: 0.9725687503814697,grad_norm: 0.8685941725996721, iteration: 85333
loss: 1.0467709302902222,grad_norm: 0.9564795661450277, iteration: 85334
loss: 1.128663420677185,grad_norm: 0.9999999170152213, iteration: 85335
loss: 1.101513147354126,grad_norm: 0.9999996993950261, iteration: 85336
loss: 1.0762994289398193,grad_norm: 0.9999991514093581, iteration: 85337
loss: 1.0020478963851929,grad_norm: 0.9999992231025672, iteration: 85338
loss: 1.0148158073425293,grad_norm: 0.999999510398548, iteration: 85339
loss: 1.0307974815368652,grad_norm: 0.999999153891904, iteration: 85340
loss: 0.9849929809570312,grad_norm: 0.9999993039617022, iteration: 85341
loss: 1.0294078588485718,grad_norm: 0.8363419937487776, iteration: 85342
loss: 0.9864096641540527,grad_norm: 0.9999991009526467, iteration: 85343
loss: 0.9903380870819092,grad_norm: 0.8876046821851391, iteration: 85344
loss: 1.0163768529891968,grad_norm: 0.9999990455151214, iteration: 85345
loss: 0.9768230319023132,grad_norm: 0.9999991328674019, iteration: 85346
loss: 1.0009667873382568,grad_norm: 0.8139037077306652, iteration: 85347
loss: 1.160671591758728,grad_norm: 0.9999999596985731, iteration: 85348
loss: 1.0165306329727173,grad_norm: 0.9999992660086223, iteration: 85349
loss: 1.0547817945480347,grad_norm: 0.9999997831561683, iteration: 85350
loss: 0.99540776014328,grad_norm: 0.9999990268080512, iteration: 85351
loss: 1.0009303092956543,grad_norm: 0.9027271335009406, iteration: 85352
loss: 1.0010699033737183,grad_norm: 0.8766103375652401, iteration: 85353
loss: 1.0242642164230347,grad_norm: 0.9878785806913573, iteration: 85354
loss: 1.0083037614822388,grad_norm: 0.9999995063344574, iteration: 85355
loss: 0.9928318858146667,grad_norm: 0.9999991366393032, iteration: 85356
loss: 1.0490434169769287,grad_norm: 0.9999997570662598, iteration: 85357
loss: 1.001933217048645,grad_norm: 0.9999990002347572, iteration: 85358
loss: 1.0045363903045654,grad_norm: 0.7899413850266243, iteration: 85359
loss: 1.0494377613067627,grad_norm: 0.8845817391896723, iteration: 85360
loss: 0.9939931035041809,grad_norm: 0.7773119041317471, iteration: 85361
loss: 0.9935060143470764,grad_norm: 0.9999992208709616, iteration: 85362
loss: 1.014636516571045,grad_norm: 0.9482481976331409, iteration: 85363
loss: 1.0014885663986206,grad_norm: 0.9038388023468058, iteration: 85364
loss: 1.0391463041305542,grad_norm: 0.9999995371942961, iteration: 85365
loss: 1.0096324682235718,grad_norm: 0.8762481230120359, iteration: 85366
loss: 1.003174901008606,grad_norm: 0.9999991410879111, iteration: 85367
loss: 0.9699838757514954,grad_norm: 0.7526397729899157, iteration: 85368
loss: 1.052056908607483,grad_norm: 0.943936729027299, iteration: 85369
loss: 1.2157292366027832,grad_norm: 0.9999998654662055, iteration: 85370
loss: 1.0212104320526123,grad_norm: 0.977265080332243, iteration: 85371
loss: 1.134572982788086,grad_norm: 0.9999989812489736, iteration: 85372
loss: 1.1487576961517334,grad_norm: 0.9999999454988086, iteration: 85373
loss: 0.9852587580680847,grad_norm: 0.9971946682745614, iteration: 85374
loss: 0.9555098414421082,grad_norm: 0.999999048441994, iteration: 85375
loss: 1.0127789974212646,grad_norm: 0.9999991084020201, iteration: 85376
loss: 0.9916074872016907,grad_norm: 0.8894634159838266, iteration: 85377
loss: 1.0118924379348755,grad_norm: 0.8602467664970207, iteration: 85378
loss: 0.9971959590911865,grad_norm: 0.9999992568384561, iteration: 85379
loss: 1.011955976486206,grad_norm: 0.9999995335010095, iteration: 85380
loss: 1.0270692110061646,grad_norm: 0.999999548147303, iteration: 85381
loss: 1.0094356536865234,grad_norm: 1.0000000325413552, iteration: 85382
loss: 0.9659983515739441,grad_norm: 0.9531091784501556, iteration: 85383
loss: 0.9944746494293213,grad_norm: 0.9177795029613043, iteration: 85384
loss: 1.0432888269424438,grad_norm: 0.9999994091362987, iteration: 85385
loss: 1.0204042196273804,grad_norm: 0.8791567681747101, iteration: 85386
loss: 1.0835615396499634,grad_norm: 0.9962585226791437, iteration: 85387
loss: 1.037126064300537,grad_norm: 0.9790375779124544, iteration: 85388
loss: 1.0834301710128784,grad_norm: 0.962880746574216, iteration: 85389
loss: 1.0074769258499146,grad_norm: 0.9999991744368765, iteration: 85390
loss: 1.045027494430542,grad_norm: 0.9999990628408423, iteration: 85391
loss: 0.9931463599205017,grad_norm: 0.9779721323573366, iteration: 85392
loss: 0.9934183359146118,grad_norm: 0.9999990473324673, iteration: 85393
loss: 1.0025207996368408,grad_norm: 0.8533569823241983, iteration: 85394
loss: 0.9900861978530884,grad_norm: 0.9999993107121261, iteration: 85395
loss: 0.9877451062202454,grad_norm: 0.9999991064369432, iteration: 85396
loss: 0.9960903525352478,grad_norm: 0.9999990773551684, iteration: 85397
loss: 0.9901283383369446,grad_norm: 0.9999993486943896, iteration: 85398
loss: 0.9783290028572083,grad_norm: 0.9999991550742166, iteration: 85399
loss: 1.0093379020690918,grad_norm: 0.9596742265858942, iteration: 85400
loss: 1.0420022010803223,grad_norm: 1.0000000156504467, iteration: 85401
loss: 0.9984315633773804,grad_norm: 0.9999990350293211, iteration: 85402
loss: 1.0648609399795532,grad_norm: 0.999999709964131, iteration: 85403
loss: 0.9668609499931335,grad_norm: 0.9999995719924869, iteration: 85404
loss: 1.001629114151001,grad_norm: 0.9999994868863115, iteration: 85405
loss: 1.0051847696304321,grad_norm: 0.8575359552825011, iteration: 85406
loss: 1.1226823329925537,grad_norm: 0.9999995189535642, iteration: 85407
loss: 1.0097686052322388,grad_norm: 0.7725380019688282, iteration: 85408
loss: 0.9970397353172302,grad_norm: 0.9999990741299157, iteration: 85409
loss: 1.0190651416778564,grad_norm: 0.9999992231714468, iteration: 85410
loss: 1.0605058670043945,grad_norm: 0.9999996791263254, iteration: 85411
loss: 1.0252835750579834,grad_norm: 0.9999991796200209, iteration: 85412
loss: 0.9952556490898132,grad_norm: 0.9786221670405941, iteration: 85413
loss: 1.0404809713363647,grad_norm: 0.803022160267238, iteration: 85414
loss: 1.0301215648651123,grad_norm: 0.9187763036594165, iteration: 85415
loss: 1.0160211324691772,grad_norm: 0.8803693891023028, iteration: 85416
loss: 1.0257810354232788,grad_norm: 0.8143364793603628, iteration: 85417
loss: 1.134824514389038,grad_norm: 0.9999996518411997, iteration: 85418
loss: 0.9956178069114685,grad_norm: 0.8685982106465001, iteration: 85419
loss: 1.0522878170013428,grad_norm: 0.9999997281852451, iteration: 85420
loss: 0.996307373046875,grad_norm: 0.9464041580857941, iteration: 85421
loss: 1.0173553228378296,grad_norm: 0.9999996400208142, iteration: 85422
loss: 1.023065209388733,grad_norm: 0.9999994608931364, iteration: 85423
loss: 1.0225895643234253,grad_norm: 0.9999995137190052, iteration: 85424
loss: 1.001285433769226,grad_norm: 0.9999995560940328, iteration: 85425
loss: 0.999284565448761,grad_norm: 0.999999353805582, iteration: 85426
loss: 1.0386722087860107,grad_norm: 0.9999993485613938, iteration: 85427
loss: 1.0140914916992188,grad_norm: 0.8854417155160514, iteration: 85428
loss: 0.9654975533485413,grad_norm: 0.9999992350585273, iteration: 85429
loss: 1.0407229661941528,grad_norm: 0.9999999872570893, iteration: 85430
loss: 1.054278016090393,grad_norm: 0.9999997758063637, iteration: 85431
loss: 0.9953757524490356,grad_norm: 0.9999995813899534, iteration: 85432
loss: 0.9949721693992615,grad_norm: 0.8650342597253287, iteration: 85433
loss: 0.995509147644043,grad_norm: 0.9999991833414501, iteration: 85434
loss: 1.0560389757156372,grad_norm: 0.9999994899031898, iteration: 85435
loss: 1.0081394910812378,grad_norm: 0.9999992244796301, iteration: 85436
loss: 1.0155686140060425,grad_norm: 0.8016791248709253, iteration: 85437
loss: 0.9503298997879028,grad_norm: 0.9999990849556142, iteration: 85438
loss: 1.0124660730361938,grad_norm: 0.9230606513466039, iteration: 85439
loss: 0.9803814888000488,grad_norm: 0.9993931519866633, iteration: 85440
loss: 0.9836375713348389,grad_norm: 0.9148928981221829, iteration: 85441
loss: 1.0025262832641602,grad_norm: 0.9329733451819809, iteration: 85442
loss: 1.237726092338562,grad_norm: 0.9999992786275134, iteration: 85443
loss: 0.949120044708252,grad_norm: 0.8220090946434743, iteration: 85444
loss: 1.0221019983291626,grad_norm: 0.9999992620940167, iteration: 85445
loss: 1.0410716533660889,grad_norm: 0.9999990038754962, iteration: 85446
loss: 1.0182831287384033,grad_norm: 0.9318606271697117, iteration: 85447
loss: 1.0277646780014038,grad_norm: 0.9999989678420994, iteration: 85448
loss: 1.0407891273498535,grad_norm: 0.9301903922374226, iteration: 85449
loss: 1.0487346649169922,grad_norm: 0.999999228964301, iteration: 85450
loss: 1.149244785308838,grad_norm: 0.9999999815702318, iteration: 85451
loss: 1.0493509769439697,grad_norm: 0.9999995245989024, iteration: 85452
loss: 0.9926607012748718,grad_norm: 0.7613828405602585, iteration: 85453
loss: 1.0309547185897827,grad_norm: 0.9999990849505236, iteration: 85454
loss: 1.0417088270187378,grad_norm: 0.9999997847384843, iteration: 85455
loss: 1.0490658283233643,grad_norm: 0.9999993100851139, iteration: 85456
loss: 1.0522103309631348,grad_norm: 0.8579072827126131, iteration: 85457
loss: 1.0933771133422852,grad_norm: 0.999999729569244, iteration: 85458
loss: 1.021608829498291,grad_norm: 0.9999996986548461, iteration: 85459
loss: 1.053157925605774,grad_norm: 0.9203234730380264, iteration: 85460
loss: 1.0980631113052368,grad_norm: 0.9999997942033398, iteration: 85461
loss: 1.0185219049453735,grad_norm: 0.9999992240535375, iteration: 85462
loss: 1.0524542331695557,grad_norm: 0.9999992642629767, iteration: 85463
loss: 1.156124472618103,grad_norm: 0.9999997888543367, iteration: 85464
loss: 1.0814933776855469,grad_norm: 0.999999334179361, iteration: 85465
loss: 1.127572774887085,grad_norm: 0.9999998823081571, iteration: 85466
loss: 1.1813418865203857,grad_norm: 0.9999992860262231, iteration: 85467
loss: 1.063525915145874,grad_norm: 0.97045989606081, iteration: 85468
loss: 1.0178927183151245,grad_norm: 0.9999991647452313, iteration: 85469
loss: 1.017022967338562,grad_norm: 0.9412680119031197, iteration: 85470
loss: 1.0410126447677612,grad_norm: 0.999999358151295, iteration: 85471
loss: 1.0106701850891113,grad_norm: 0.9504620077469158, iteration: 85472
loss: 1.0286775827407837,grad_norm: 0.9999999162788696, iteration: 85473
loss: 0.9931738972663879,grad_norm: 0.9999993819905512, iteration: 85474
loss: 1.0100500583648682,grad_norm: 0.9593883747147125, iteration: 85475
loss: 1.0397356748580933,grad_norm: 0.9999991817897607, iteration: 85476
loss: 0.9910332560539246,grad_norm: 0.9999990987896132, iteration: 85477
loss: 1.0079432725906372,grad_norm: 0.9999995489774457, iteration: 85478
loss: 1.0541694164276123,grad_norm: 0.9999997126673128, iteration: 85479
loss: 0.9888262152671814,grad_norm: 0.8602558347816815, iteration: 85480
loss: 1.0211290121078491,grad_norm: 0.9999993215067216, iteration: 85481
loss: 0.9863781332969666,grad_norm: 0.8252116795105412, iteration: 85482
loss: 1.064563512802124,grad_norm: 0.9999990135068243, iteration: 85483
loss: 1.1767371892929077,grad_norm: 0.9999994647084738, iteration: 85484
loss: 1.0924170017242432,grad_norm: 0.9999996604281385, iteration: 85485
loss: 1.0528568029403687,grad_norm: 0.9999997732425154, iteration: 85486
loss: 0.9805724620819092,grad_norm: 0.8398211689286714, iteration: 85487
loss: 1.0043210983276367,grad_norm: 0.9999993075315702, iteration: 85488
loss: 1.0030502080917358,grad_norm: 0.8620257819975727, iteration: 85489
loss: 1.072443962097168,grad_norm: 0.999999723837558, iteration: 85490
loss: 1.017202377319336,grad_norm: 0.9729945319809643, iteration: 85491
loss: 1.0157233476638794,grad_norm: 0.9488150465490515, iteration: 85492
loss: 0.9969626665115356,grad_norm: 0.8196292442125505, iteration: 85493
loss: 1.0105547904968262,grad_norm: 0.8489148644061534, iteration: 85494
loss: 1.066495656967163,grad_norm: 0.9999993220733919, iteration: 85495
loss: 1.0825201272964478,grad_norm: 0.9999991790759292, iteration: 85496
loss: 1.0711785554885864,grad_norm: 0.9999991431149267, iteration: 85497
loss: 1.0623629093170166,grad_norm: 0.9999991223967107, iteration: 85498
loss: 0.9970779418945312,grad_norm: 0.9127561234643304, iteration: 85499
loss: 1.0379449129104614,grad_norm: 0.9999990087743905, iteration: 85500
loss: 1.0140408277511597,grad_norm: 0.9999994238090446, iteration: 85501
loss: 1.0274642705917358,grad_norm: 0.921071920587848, iteration: 85502
loss: 1.0066406726837158,grad_norm: 0.999999059745656, iteration: 85503
loss: 1.0153895616531372,grad_norm: 0.9525432856882884, iteration: 85504
loss: 1.033353567123413,grad_norm: 0.9999992252089738, iteration: 85505
loss: 1.0134440660476685,grad_norm: 0.8670198520613926, iteration: 85506
loss: 1.0469285249710083,grad_norm: 0.9999990838868672, iteration: 85507
loss: 0.9990311861038208,grad_norm: 0.9999990062258953, iteration: 85508
loss: 1.018585205078125,grad_norm: 0.9999998939355226, iteration: 85509
loss: 1.0229425430297852,grad_norm: 0.838291142542934, iteration: 85510
loss: 1.0398398637771606,grad_norm: 0.9999995489222615, iteration: 85511
loss: 0.9832760691642761,grad_norm: 0.8394902061632918, iteration: 85512
loss: 1.0079221725463867,grad_norm: 0.9999992339277572, iteration: 85513
loss: 1.0051958560943604,grad_norm: 0.8345367351190641, iteration: 85514
loss: 1.0208301544189453,grad_norm: 0.921111150646606, iteration: 85515
loss: 1.0028918981552124,grad_norm: 0.9998435977894544, iteration: 85516
loss: 1.0592409372329712,grad_norm: 0.9999996534295911, iteration: 85517
loss: 1.0157649517059326,grad_norm: 0.9942058637715913, iteration: 85518
loss: 0.9956644177436829,grad_norm: 0.9999990091091499, iteration: 85519
loss: 1.006360411643982,grad_norm: 0.9999990554634604, iteration: 85520
loss: 1.0825566053390503,grad_norm: 0.9999991675967513, iteration: 85521
loss: 0.9839372038841248,grad_norm: 0.9999990572294783, iteration: 85522
loss: 1.0010653734207153,grad_norm: 0.9999995559179997, iteration: 85523
loss: 1.0089505910873413,grad_norm: 0.9999992322485286, iteration: 85524
loss: 1.0129202604293823,grad_norm: 0.972343967336794, iteration: 85525
loss: 1.0334903001785278,grad_norm: 0.9999991718696847, iteration: 85526
loss: 1.0597177743911743,grad_norm: 0.999999782011502, iteration: 85527
loss: 0.996178388595581,grad_norm: 0.9999997843220603, iteration: 85528
loss: 1.0574672222137451,grad_norm: 0.8811418725258148, iteration: 85529
loss: 1.0315650701522827,grad_norm: 0.824709775164147, iteration: 85530
loss: 0.9814683794975281,grad_norm: 0.9088759187973334, iteration: 85531
loss: 1.0344364643096924,grad_norm: 0.9772658662274547, iteration: 85532
loss: 1.0561574697494507,grad_norm: 0.9999997961658622, iteration: 85533
loss: 1.011243462562561,grad_norm: 0.931803626769362, iteration: 85534
loss: 1.0473099946975708,grad_norm: 0.9999995403994273, iteration: 85535
loss: 1.0135453939437866,grad_norm: 0.9303307702083599, iteration: 85536
loss: 1.026424765586853,grad_norm: 0.9999992326824255, iteration: 85537
loss: 0.98860764503479,grad_norm: 0.9999991489192511, iteration: 85538
loss: 0.9783281087875366,grad_norm: 0.9999991505451898, iteration: 85539
loss: 0.9894495010375977,grad_norm: 0.9999990493831318, iteration: 85540
loss: 0.988760769367218,grad_norm: 0.9999991308570029, iteration: 85541
loss: 1.027071475982666,grad_norm: 0.999999819277626, iteration: 85542
loss: 1.0018326044082642,grad_norm: 0.9639368982374461, iteration: 85543
loss: 0.9931409358978271,grad_norm: 0.9999989512863695, iteration: 85544
loss: 1.0096652507781982,grad_norm: 0.999999030916352, iteration: 85545
loss: 1.004571795463562,grad_norm: 0.9999999296251719, iteration: 85546
loss: 1.063466191291809,grad_norm: 0.9999992953809723, iteration: 85547
loss: 0.9639086723327637,grad_norm: 0.7846125268154512, iteration: 85548
loss: 1.0883026123046875,grad_norm: 1.0000000259765824, iteration: 85549
loss: 1.0130356550216675,grad_norm: 0.9487327454173582, iteration: 85550
loss: 0.9829058647155762,grad_norm: 0.9999989490507426, iteration: 85551
loss: 0.9890336990356445,grad_norm: 0.9498320730539661, iteration: 85552
loss: 1.0027244091033936,grad_norm: 0.8067875997361417, iteration: 85553
loss: 1.0171157121658325,grad_norm: 0.9999992502801328, iteration: 85554
loss: 1.0868772268295288,grad_norm: 0.9999997067166783, iteration: 85555
loss: 1.032231330871582,grad_norm: 0.9999994165482818, iteration: 85556
loss: 0.990815281867981,grad_norm: 0.8886076747562176, iteration: 85557
loss: 1.0035498142242432,grad_norm: 0.9410300300321011, iteration: 85558
loss: 1.0093275308609009,grad_norm: 0.9392780512716182, iteration: 85559
loss: 0.9825913906097412,grad_norm: 0.999999136058177, iteration: 85560
loss: 0.9898096919059753,grad_norm: 0.9334417687239936, iteration: 85561
loss: 1.0138299465179443,grad_norm: 0.9151771025212218, iteration: 85562
loss: 0.9874561429023743,grad_norm: 0.8760776993308997, iteration: 85563
loss: 1.16278076171875,grad_norm: 0.9999995829709211, iteration: 85564
loss: 1.057719349861145,grad_norm: 0.9999998746524847, iteration: 85565
loss: 1.0449175834655762,grad_norm: 0.9999990369083589, iteration: 85566
loss: 0.9889887571334839,grad_norm: 0.8736714335500314, iteration: 85567
loss: 0.9940133094787598,grad_norm: 0.9267803727313256, iteration: 85568
loss: 1.1244032382965088,grad_norm: 0.9474257515793659, iteration: 85569
loss: 1.0275068283081055,grad_norm: 0.8108966971931836, iteration: 85570
loss: 0.9854365587234497,grad_norm: 0.9475534792877732, iteration: 85571
loss: 1.0406200885772705,grad_norm: 0.9999995874577606, iteration: 85572
loss: 1.0209875106811523,grad_norm: 0.9999990045553349, iteration: 85573
loss: 0.9976850748062134,grad_norm: 0.9999991892226556, iteration: 85574
loss: 1.0468826293945312,grad_norm: 0.99999920535477, iteration: 85575
loss: 0.9511041045188904,grad_norm: 0.9376388764583633, iteration: 85576
loss: 0.9827714562416077,grad_norm: 0.8760758421202562, iteration: 85577
loss: 0.9804815053939819,grad_norm: 0.9999992680948513, iteration: 85578
loss: 1.0092357397079468,grad_norm: 0.9999990629159784, iteration: 85579
loss: 0.9878267049789429,grad_norm: 0.907479120540304, iteration: 85580
loss: 1.0331300497055054,grad_norm: 0.9999995708645787, iteration: 85581
loss: 0.9814659953117371,grad_norm: 0.8179725127725548, iteration: 85582
loss: 1.0000813007354736,grad_norm: 0.8689475785275536, iteration: 85583
loss: 0.9883908629417419,grad_norm: 0.8810585940110149, iteration: 85584
loss: 0.9857106804847717,grad_norm: 0.7988626657161509, iteration: 85585
loss: 0.9595040678977966,grad_norm: 0.9500128272345668, iteration: 85586
loss: 0.9495897889137268,grad_norm: 0.9999991772222538, iteration: 85587
loss: 1.0380975008010864,grad_norm: 0.9292950988300274, iteration: 85588
loss: 1.0510756969451904,grad_norm: 0.9999991607127213, iteration: 85589
loss: 1.009056568145752,grad_norm: 0.8304025344832295, iteration: 85590
loss: 0.9869611859321594,grad_norm: 0.9999995390748664, iteration: 85591
loss: 0.9730900526046753,grad_norm: 0.9999994719032368, iteration: 85592
loss: 1.0044074058532715,grad_norm: 0.8815273626518554, iteration: 85593
loss: 0.9869853854179382,grad_norm: 0.8420652018441184, iteration: 85594
loss: 0.9847024083137512,grad_norm: 0.9999990879527526, iteration: 85595
loss: 0.9743325114250183,grad_norm: 0.999999025568499, iteration: 85596
loss: 1.0225493907928467,grad_norm: 0.9999996417850724, iteration: 85597
loss: 1.0463645458221436,grad_norm: 0.9999998260089964, iteration: 85598
loss: 1.0020452737808228,grad_norm: 0.8933225945664364, iteration: 85599
loss: 1.0261693000793457,grad_norm: 0.9232033664546566, iteration: 85600
loss: 1.0053049325942993,grad_norm: 0.757251145760476, iteration: 85601
loss: 1.0629291534423828,grad_norm: 0.9599966372937836, iteration: 85602
loss: 0.9928767681121826,grad_norm: 0.9788808230774658, iteration: 85603
loss: 1.0203509330749512,grad_norm: 0.8542807307875795, iteration: 85604
loss: 1.0215551853179932,grad_norm: 0.7607100607249496, iteration: 85605
loss: 1.019199013710022,grad_norm: 0.9980362146279929, iteration: 85606
loss: 1.0365175008773804,grad_norm: 0.8990801742572395, iteration: 85607
loss: 1.0150104761123657,grad_norm: 0.9999998516590524, iteration: 85608
loss: 1.0795114040374756,grad_norm: 0.9307463692991494, iteration: 85609
loss: 1.0843653678894043,grad_norm: 0.9999993048511916, iteration: 85610
loss: 0.9781731963157654,grad_norm: 0.848649362149988, iteration: 85611
loss: 1.0081043243408203,grad_norm: 0.9999998976441367, iteration: 85612
loss: 1.0510895252227783,grad_norm: 0.9999993393033647, iteration: 85613
loss: 1.036041259765625,grad_norm: 0.9999993263883297, iteration: 85614
loss: 1.0336053371429443,grad_norm: 0.9999995015116424, iteration: 85615
loss: 1.0320218801498413,grad_norm: 0.9999990259081221, iteration: 85616
loss: 1.0266315937042236,grad_norm: 0.9999993034715173, iteration: 85617
loss: 0.9679542779922485,grad_norm: 0.9999990965095767, iteration: 85618
loss: 1.0311610698699951,grad_norm: 0.9834695853788159, iteration: 85619
loss: 1.0357228517532349,grad_norm: 0.9999995909569616, iteration: 85620
loss: 0.9711945652961731,grad_norm: 0.8740264079520691, iteration: 85621
loss: 0.9747390151023865,grad_norm: 0.9999996353776566, iteration: 85622
loss: 0.9943284392356873,grad_norm: 0.9560683566624285, iteration: 85623
loss: 1.0312734842300415,grad_norm: 0.9999991011546652, iteration: 85624
loss: 0.9905145764350891,grad_norm: 0.8388568147771968, iteration: 85625
loss: 0.9588159322738647,grad_norm: 0.8607377376889352, iteration: 85626
loss: 1.0005409717559814,grad_norm: 0.9000488353584103, iteration: 85627
loss: 0.9996306300163269,grad_norm: 0.8298103670231088, iteration: 85628
loss: 1.0259604454040527,grad_norm: 0.9970483390962478, iteration: 85629
loss: 1.04430091381073,grad_norm: 0.999999068572489, iteration: 85630
loss: 0.9944396615028381,grad_norm: 0.7884607050992988, iteration: 85631
loss: 1.0751187801361084,grad_norm: 0.9999999482668395, iteration: 85632
loss: 1.0538166761398315,grad_norm: 0.9999996012719633, iteration: 85633
loss: 0.9715787172317505,grad_norm: 0.9560851338640619, iteration: 85634
loss: 1.0192623138427734,grad_norm: 0.9999991119198803, iteration: 85635
loss: 1.0352283716201782,grad_norm: 0.9999993524871479, iteration: 85636
loss: 1.0053671598434448,grad_norm: 0.868513854508458, iteration: 85637
loss: 0.9893841743469238,grad_norm: 0.9604084792019012, iteration: 85638
loss: 1.05238676071167,grad_norm: 0.9999992574588189, iteration: 85639
loss: 0.9931251406669617,grad_norm: 0.8909046922943121, iteration: 85640
loss: 1.0148626565933228,grad_norm: 0.9999990786861509, iteration: 85641
loss: 1.1304435729980469,grad_norm: 0.9999992317015383, iteration: 85642
loss: 0.9929801821708679,grad_norm: 0.9999991823203679, iteration: 85643
loss: 1.1399577856063843,grad_norm: 0.9999999124102662, iteration: 85644
loss: 1.0201278924942017,grad_norm: 0.9999991994898177, iteration: 85645
loss: 0.974351167678833,grad_norm: 0.9510822721217247, iteration: 85646
loss: 1.0225058794021606,grad_norm: 0.9999992871520718, iteration: 85647
loss: 1.0008704662322998,grad_norm: 0.9999990677477889, iteration: 85648
loss: 1.033926248550415,grad_norm: 0.9999994517674857, iteration: 85649
loss: 1.0071605443954468,grad_norm: 0.9122333986578776, iteration: 85650
loss: 1.0161776542663574,grad_norm: 0.9999989833981777, iteration: 85651
loss: 1.0573700666427612,grad_norm: 0.9999998920918372, iteration: 85652
loss: 1.0049774646759033,grad_norm: 0.9999997679561973, iteration: 85653
loss: 0.9906260967254639,grad_norm: 0.9334151311285075, iteration: 85654
loss: 1.0110487937927246,grad_norm: 0.8219189472176209, iteration: 85655
loss: 1.0045430660247803,grad_norm: 0.961582513702701, iteration: 85656
loss: 1.0694564580917358,grad_norm: 0.9999997558255318, iteration: 85657
loss: 1.03441321849823,grad_norm: 0.8096895163984575, iteration: 85658
loss: 0.9671480059623718,grad_norm: 0.9999995528525668, iteration: 85659
loss: 1.0247169733047485,grad_norm: 0.9999989570011443, iteration: 85660
loss: 1.0612173080444336,grad_norm: 0.9999994434700056, iteration: 85661
loss: 1.164072036743164,grad_norm: 0.9999997971563558, iteration: 85662
loss: 1.0676137208938599,grad_norm: 0.9999994313499153, iteration: 85663
loss: 1.0227805376052856,grad_norm: 0.999999324717558, iteration: 85664
loss: 0.9800283908843994,grad_norm: 0.9999991992322014, iteration: 85665
loss: 1.015802025794983,grad_norm: 0.9999990596706447, iteration: 85666
loss: 1.1655097007751465,grad_norm: 0.9999999151617787, iteration: 85667
loss: 1.0757499933242798,grad_norm: 0.9999997241842508, iteration: 85668
loss: 1.002793550491333,grad_norm: 0.9999992236246362, iteration: 85669
loss: 1.0168601274490356,grad_norm: 0.811647132561059, iteration: 85670
loss: 1.0234366655349731,grad_norm: 0.9497836544825903, iteration: 85671
loss: 1.0163038969039917,grad_norm: 0.8655069783699578, iteration: 85672
loss: 1.0202385187149048,grad_norm: 0.9999995406700374, iteration: 85673
loss: 1.0536129474639893,grad_norm: 0.9999991366660562, iteration: 85674
loss: 1.0755892992019653,grad_norm: 0.9999995831619365, iteration: 85675
loss: 1.0107392072677612,grad_norm: 0.9999990561864612, iteration: 85676
loss: 1.0383710861206055,grad_norm: 0.9999990918813897, iteration: 85677
loss: 0.9946703314781189,grad_norm: 0.8762332319100541, iteration: 85678
loss: 0.9945549964904785,grad_norm: 0.8554210156043451, iteration: 85679
loss: 1.0119702816009521,grad_norm: 0.9722254531412856, iteration: 85680
loss: 1.0292720794677734,grad_norm: 0.9999990493735963, iteration: 85681
loss: 1.0282129049301147,grad_norm: 0.9999995802477853, iteration: 85682
loss: 1.1101105213165283,grad_norm: 1.0000000139953569, iteration: 85683
loss: 1.052672266960144,grad_norm: 0.9999995024756179, iteration: 85684
loss: 1.0037297010421753,grad_norm: 0.8826322866060755, iteration: 85685
loss: 1.0663026571273804,grad_norm: 0.9999995822730147, iteration: 85686
loss: 1.0836669206619263,grad_norm: 0.999999791664287, iteration: 85687
loss: 1.0086371898651123,grad_norm: 0.8577393413568422, iteration: 85688
loss: 1.017926573753357,grad_norm: 0.962635713391494, iteration: 85689
loss: 1.0327092409133911,grad_norm: 0.8533841527286716, iteration: 85690
loss: 1.1293201446533203,grad_norm: 0.9999995120138474, iteration: 85691
loss: 1.0108978748321533,grad_norm: 0.9999992417665289, iteration: 85692
loss: 1.0096737146377563,grad_norm: 0.9999996603473376, iteration: 85693
loss: 1.0423600673675537,grad_norm: 0.9999990987565156, iteration: 85694
loss: 1.01284658908844,grad_norm: 0.9999993097983325, iteration: 85695
loss: 0.9639748930931091,grad_norm: 0.9999990085775144, iteration: 85696
loss: 1.1287333965301514,grad_norm: 0.9999999198490669, iteration: 85697
loss: 1.2824662923812866,grad_norm: 0.9999999330531532, iteration: 85698
loss: 1.0725350379943848,grad_norm: 0.9999993601483923, iteration: 85699
loss: 1.0718306303024292,grad_norm: 0.9999999688652668, iteration: 85700
loss: 1.0316697359085083,grad_norm: 0.9978186640825426, iteration: 85701
loss: 1.0825741291046143,grad_norm: 0.9999990064251462, iteration: 85702
loss: 0.9988646507263184,grad_norm: 0.9999991775896035, iteration: 85703
loss: 1.02330482006073,grad_norm: 0.8441017045392304, iteration: 85704
loss: 1.00306236743927,grad_norm: 0.999999638848545, iteration: 85705
loss: 1.061133623123169,grad_norm: 0.9999999042542884, iteration: 85706
loss: 1.0573194026947021,grad_norm: 0.999999387705171, iteration: 85707
loss: 0.9734178185462952,grad_norm: 0.8836410396090081, iteration: 85708
loss: 1.112677812576294,grad_norm: 0.9999990931372639, iteration: 85709
loss: 0.9986394643783569,grad_norm: 0.9999998110545976, iteration: 85710
loss: 1.0096101760864258,grad_norm: 0.9999991037540408, iteration: 85711
loss: 1.0133272409439087,grad_norm: 0.9999991697557644, iteration: 85712
loss: 0.9837737679481506,grad_norm: 0.9026244817033153, iteration: 85713
loss: 1.0998506546020508,grad_norm: 0.9470407627301772, iteration: 85714
loss: 1.023665189743042,grad_norm: 0.9999991414093364, iteration: 85715
loss: 1.0378987789154053,grad_norm: 0.9999996358092246, iteration: 85716
loss: 1.0007083415985107,grad_norm: 0.9999995125087028, iteration: 85717
loss: 0.9947962164878845,grad_norm: 0.9081169568193833, iteration: 85718
loss: 1.0574764013290405,grad_norm: 0.9924410161466891, iteration: 85719
loss: 0.992536723613739,grad_norm: 0.9101639428294183, iteration: 85720
loss: 0.9861141443252563,grad_norm: 0.9313514461921065, iteration: 85721
loss: 0.9608421325683594,grad_norm: 0.9229971110618048, iteration: 85722
loss: 1.0208854675292969,grad_norm: 0.8821939661238105, iteration: 85723
loss: 0.9872684478759766,grad_norm: 0.9961940121878352, iteration: 85724
loss: 1.030638575553894,grad_norm: 0.9999998299938325, iteration: 85725
loss: 1.0434006452560425,grad_norm: 0.9999995724864379, iteration: 85726
loss: 1.0076314210891724,grad_norm: 0.9999995266406989, iteration: 85727
loss: 1.0811820030212402,grad_norm: 0.9999993985588373, iteration: 85728
loss: 1.0320810079574585,grad_norm: 0.8578050267612775, iteration: 85729
loss: 1.0664769411087036,grad_norm: 0.9999999231877338, iteration: 85730
loss: 1.0122859477996826,grad_norm: 0.9912341021037803, iteration: 85731
loss: 1.0468177795410156,grad_norm: 0.999999375899275, iteration: 85732
loss: 0.9596530199050903,grad_norm: 0.9999995536842251, iteration: 85733
loss: 1.108513355255127,grad_norm: 0.9999991913260061, iteration: 85734
loss: 0.940323531627655,grad_norm: 0.9305990769775658, iteration: 85735
loss: 1.0726574659347534,grad_norm: 0.9205263321641154, iteration: 85736
loss: 1.0152390003204346,grad_norm: 0.7866598231988499, iteration: 85737
loss: 1.0300910472869873,grad_norm: 0.9436915135139518, iteration: 85738
loss: 1.0191528797149658,grad_norm: 0.9999993154319995, iteration: 85739
loss: 0.9949970841407776,grad_norm: 0.7445571523093188, iteration: 85740
loss: 1.008585810661316,grad_norm: 0.9999997894453385, iteration: 85741
loss: 1.044945240020752,grad_norm: 0.9999991505768889, iteration: 85742
loss: 0.9784296751022339,grad_norm: 0.9999990536953749, iteration: 85743
loss: 1.100144386291504,grad_norm: 0.9999998210076325, iteration: 85744
loss: 1.0406986474990845,grad_norm: 0.9999991902335091, iteration: 85745
loss: 1.0306648015975952,grad_norm: 0.8969812360344412, iteration: 85746
loss: 1.0086376667022705,grad_norm: 0.9907720197906112, iteration: 85747
loss: 1.0167064666748047,grad_norm: 0.9945090202764718, iteration: 85748
loss: 1.0314232110977173,grad_norm: 0.9999990145808775, iteration: 85749
loss: 1.0303640365600586,grad_norm: 0.9999995692762269, iteration: 85750
loss: 1.0150080919265747,grad_norm: 0.9999989535719611, iteration: 85751
loss: 1.0679734945297241,grad_norm: 0.9999995141378257, iteration: 85752
loss: 0.9855980277061462,grad_norm: 0.7818417640660942, iteration: 85753
loss: 1.0545495748519897,grad_norm: 0.989010867034573, iteration: 85754
loss: 1.0089259147644043,grad_norm: 0.9432679753895711, iteration: 85755
loss: 0.9890757203102112,grad_norm: 0.9538569075564177, iteration: 85756
loss: 1.036896824836731,grad_norm: 0.9999991294528153, iteration: 85757
loss: 1.0475605726242065,grad_norm: 0.9999994229813128, iteration: 85758
loss: 0.9902424812316895,grad_norm: 0.9999991095231657, iteration: 85759
loss: 1.014723300933838,grad_norm: 0.8892935718374478, iteration: 85760
loss: 1.0379841327667236,grad_norm: 0.999999166305659, iteration: 85761
loss: 0.9942650198936462,grad_norm: 0.999999375080401, iteration: 85762
loss: 0.9944183826446533,grad_norm: 0.9999991423538294, iteration: 85763
loss: 0.9852937459945679,grad_norm: 0.9736177681613859, iteration: 85764
loss: 1.0029217004776,grad_norm: 0.9534982363902371, iteration: 85765
loss: 1.0707159042358398,grad_norm: 0.9999991733510665, iteration: 85766
loss: 1.0086114406585693,grad_norm: 0.9545603347404547, iteration: 85767
loss: 1.0227186679840088,grad_norm: 0.7927169241331248, iteration: 85768
loss: 1.0459539890289307,grad_norm: 0.9999991521539595, iteration: 85769
loss: 1.032420039176941,grad_norm: 0.8953963401345612, iteration: 85770
loss: 1.0638407468795776,grad_norm: 0.999999611399921, iteration: 85771
loss: 1.0236773490905762,grad_norm: 0.977462725950419, iteration: 85772
loss: 1.0474563837051392,grad_norm: 0.9999992067441361, iteration: 85773
loss: 1.0738760232925415,grad_norm: 0.8781983803942072, iteration: 85774
loss: 0.9771963357925415,grad_norm: 0.9999991873851699, iteration: 85775
loss: 1.021748661994934,grad_norm: 0.9999991436207852, iteration: 85776
loss: 0.9825253486633301,grad_norm: 0.9999989240583578, iteration: 85777
loss: 1.0229135751724243,grad_norm: 0.9538727277098218, iteration: 85778
loss: 0.9843894243240356,grad_norm: 0.9901497276113295, iteration: 85779
loss: 1.0391695499420166,grad_norm: 0.7526843294982403, iteration: 85780
loss: 1.0916266441345215,grad_norm: 0.9999999126871112, iteration: 85781
loss: 1.0919163227081299,grad_norm: 0.9999993701676032, iteration: 85782
loss: 1.0616214275360107,grad_norm: 0.9445695822957721, iteration: 85783
loss: 1.0210597515106201,grad_norm: 0.7835227616750068, iteration: 85784
loss: 1.0097849369049072,grad_norm: 0.9999993742416221, iteration: 85785
loss: 1.0400536060333252,grad_norm: 0.8889705655394468, iteration: 85786
loss: 0.9917593002319336,grad_norm: 0.9999989903165472, iteration: 85787
loss: 1.0686293840408325,grad_norm: 0.9761387244467827, iteration: 85788
loss: 1.1357557773590088,grad_norm: 0.9999996577534132, iteration: 85789
loss: 1.0901799201965332,grad_norm: 0.9999997297174027, iteration: 85790
loss: 1.0022728443145752,grad_norm: 0.9999990908132339, iteration: 85791
loss: 1.0235159397125244,grad_norm: 0.9999991247027122, iteration: 85792
loss: 1.0169316530227661,grad_norm: 0.9999991172185131, iteration: 85793
loss: 0.9816311597824097,grad_norm: 0.8160456752497652, iteration: 85794
loss: 1.0145785808563232,grad_norm: 0.9999991524845834, iteration: 85795
loss: 1.0176703929901123,grad_norm: 0.9999993283669543, iteration: 85796
loss: 1.0242748260498047,grad_norm: 0.9999997661721718, iteration: 85797
loss: 1.0288969278335571,grad_norm: 0.9999991549349394, iteration: 85798
loss: 1.116370439529419,grad_norm: 0.9999996041789669, iteration: 85799
loss: 1.002273440361023,grad_norm: 0.9456875623029782, iteration: 85800
loss: 1.0344632863998413,grad_norm: 0.9999998102061134, iteration: 85801
loss: 1.0358738899230957,grad_norm: 0.8595600995335052, iteration: 85802
loss: 1.0244662761688232,grad_norm: 0.9630264900700265, iteration: 85803
loss: 1.02225923538208,grad_norm: 0.999999120296419, iteration: 85804
loss: 1.0264402627944946,grad_norm: 0.9999991383869096, iteration: 85805
loss: 1.0381678342819214,grad_norm: 0.9999991253548455, iteration: 85806
loss: 1.0011930465698242,grad_norm: 0.9691501660374904, iteration: 85807
loss: 1.0302648544311523,grad_norm: 0.999999785334864, iteration: 85808
loss: 1.0305403470993042,grad_norm: 0.9999990959436983, iteration: 85809
loss: 0.9984425902366638,grad_norm: 0.9999990854472579, iteration: 85810
loss: 0.989298939704895,grad_norm: 0.9999997639008593, iteration: 85811
loss: 1.0611488819122314,grad_norm: 0.9999993065849776, iteration: 85812
loss: 1.0378177165985107,grad_norm: 0.9999991338662672, iteration: 85813
loss: 1.0933878421783447,grad_norm: 1.0000000111706802, iteration: 85814
loss: 1.0927292108535767,grad_norm: 0.9999991762723532, iteration: 85815
loss: 0.9835544228553772,grad_norm: 0.9999989824091666, iteration: 85816
loss: 1.041509985923767,grad_norm: 0.9999992243546488, iteration: 85817
loss: 1.004044771194458,grad_norm: 0.943231661438323, iteration: 85818
loss: 1.0389738082885742,grad_norm: 0.9184253106226746, iteration: 85819
loss: 1.090440034866333,grad_norm: 0.9999989783400134, iteration: 85820
loss: 1.0128766298294067,grad_norm: 0.9088039738807223, iteration: 85821
loss: 0.9885135293006897,grad_norm: 0.9889835182749348, iteration: 85822
loss: 0.9878341555595398,grad_norm: 0.9536937947137293, iteration: 85823
loss: 0.9949540495872498,grad_norm: 0.9008333995829839, iteration: 85824
loss: 0.9648440480232239,grad_norm: 0.7494865904913481, iteration: 85825
loss: 1.000759243965149,grad_norm: 0.8893751546195634, iteration: 85826
loss: 1.048089861869812,grad_norm: 0.9999991364011368, iteration: 85827
loss: 1.0897845029830933,grad_norm: 0.9999994294767588, iteration: 85828
loss: 0.9853256940841675,grad_norm: 0.9659059190095584, iteration: 85829
loss: 1.0000786781311035,grad_norm: 0.8915114036800449, iteration: 85830
loss: 1.028367280960083,grad_norm: 0.999999095047345, iteration: 85831
loss: 1.0046770572662354,grad_norm: 0.9581565197028149, iteration: 85832
loss: 1.005540370941162,grad_norm: 0.8991823151342793, iteration: 85833
loss: 0.986048698425293,grad_norm: 0.9999990618934513, iteration: 85834
loss: 1.0266413688659668,grad_norm: 0.9999990517517524, iteration: 85835
loss: 1.0059746503829956,grad_norm: 0.9999990627125865, iteration: 85836
loss: 1.0075079202651978,grad_norm: 0.9168002908067534, iteration: 85837
loss: 1.0177525281906128,grad_norm: 0.9999993577236386, iteration: 85838
loss: 1.0047742128372192,grad_norm: 0.9027175577046334, iteration: 85839
loss: 1.0004620552062988,grad_norm: 0.811540515057685, iteration: 85840
loss: 1.0645761489868164,grad_norm: 0.9999990902545207, iteration: 85841
loss: 1.0736027956008911,grad_norm: 0.9999998736682991, iteration: 85842
loss: 0.994328498840332,grad_norm: 0.8666845226151962, iteration: 85843
loss: 0.9975521564483643,grad_norm: 0.8563919565207727, iteration: 85844
loss: 1.002111792564392,grad_norm: 0.99999994849537, iteration: 85845
loss: 1.0173497200012207,grad_norm: 0.9197559619594646, iteration: 85846
loss: 1.068226933479309,grad_norm: 0.9999989563225112, iteration: 85847
loss: 1.0134198665618896,grad_norm: 0.9999990267643488, iteration: 85848
loss: 1.018858790397644,grad_norm: 0.999999929717137, iteration: 85849
loss: 1.0637694597244263,grad_norm: 0.9999990889497383, iteration: 85850
loss: 0.9758186936378479,grad_norm: 0.9999999139667233, iteration: 85851
loss: 1.0005919933319092,grad_norm: 0.9999996661185088, iteration: 85852
loss: 1.014380693435669,grad_norm: 0.9999991358935464, iteration: 85853
loss: 1.0233875513076782,grad_norm: 0.971252245821569, iteration: 85854
loss: 1.023390293121338,grad_norm: 0.8611928495251002, iteration: 85855
loss: 0.9824285507202148,grad_norm: 0.9999990273913671, iteration: 85856
loss: 1.014344334602356,grad_norm: 0.9842083696656971, iteration: 85857
loss: 0.9808779358863831,grad_norm: 0.8806046644622005, iteration: 85858
loss: 1.0153419971466064,grad_norm: 0.9999991673798267, iteration: 85859
loss: 1.0610548257827759,grad_norm: 0.9999994362902186, iteration: 85860
loss: 1.010230541229248,grad_norm: 0.9684845800450087, iteration: 85861
loss: 0.9881247282028198,grad_norm: 0.9083088562755237, iteration: 85862
loss: 0.9846820831298828,grad_norm: 0.9999990452111789, iteration: 85863
loss: 1.0089513063430786,grad_norm: 0.9999991537222014, iteration: 85864
loss: 0.9984024167060852,grad_norm: 0.9421937876980585, iteration: 85865
loss: 1.0344951152801514,grad_norm: 0.9999994603090758, iteration: 85866
loss: 1.041351079940796,grad_norm: 0.954092083903767, iteration: 85867
loss: 0.9980846047401428,grad_norm: 0.9999990372282493, iteration: 85868
loss: 1.092831015586853,grad_norm: 0.9999997917676136, iteration: 85869
loss: 1.0256479978561401,grad_norm: 0.9999990439972083, iteration: 85870
loss: 1.066578984260559,grad_norm: 0.9515520511686817, iteration: 85871
loss: 1.0844414234161377,grad_norm: 0.9999990084219773, iteration: 85872
loss: 1.0428614616394043,grad_norm: 0.99999915046901, iteration: 85873
loss: 1.0101975202560425,grad_norm: 0.925072676724079, iteration: 85874
loss: 1.024572730064392,grad_norm: 0.9999992356503246, iteration: 85875
loss: 0.9866524338722229,grad_norm: 0.8235946557704454, iteration: 85876
loss: 0.9633827805519104,grad_norm: 0.8639075571554122, iteration: 85877
loss: 1.0041767358779907,grad_norm: 0.9999992543744969, iteration: 85878
loss: 1.0157783031463623,grad_norm: 0.8687609672488601, iteration: 85879
loss: 0.9902782440185547,grad_norm: 0.9601516141797942, iteration: 85880
loss: 0.9880580902099609,grad_norm: 0.9999991737335142, iteration: 85881
loss: 0.961337685585022,grad_norm: 0.9053160120941849, iteration: 85882
loss: 1.0428636074066162,grad_norm: 0.9999995153657848, iteration: 85883
loss: 0.970279335975647,grad_norm: 0.9587663879470368, iteration: 85884
loss: 0.9922925233840942,grad_norm: 0.9554652153127751, iteration: 85885
loss: 1.012460708618164,grad_norm: 0.984785782201002, iteration: 85886
loss: 1.1221119165420532,grad_norm: 0.9999999913879237, iteration: 85887
loss: 1.0393555164337158,grad_norm: 0.9127066153379831, iteration: 85888
loss: 0.9965367913246155,grad_norm: 0.9758832880878072, iteration: 85889
loss: 1.027279257774353,grad_norm: 0.9716253044340472, iteration: 85890
loss: 1.0194791555404663,grad_norm: 0.9999992033247336, iteration: 85891
loss: 1.0185288190841675,grad_norm: 0.9999994413456764, iteration: 85892
loss: 1.071266531944275,grad_norm: 0.9999997767516181, iteration: 85893
loss: 0.9850524067878723,grad_norm: 0.999999360766103, iteration: 85894
loss: 1.0581945180892944,grad_norm: 0.9999994296000035, iteration: 85895
loss: 1.0002868175506592,grad_norm: 0.8812335061007008, iteration: 85896
loss: 1.0082331895828247,grad_norm: 0.9999995026779108, iteration: 85897
loss: 0.968468189239502,grad_norm: 0.982550211834305, iteration: 85898
loss: 0.9735816121101379,grad_norm: 0.9999995630910048, iteration: 85899
loss: 1.007238507270813,grad_norm: 0.9999993110021714, iteration: 85900
loss: 0.9891719222068787,grad_norm: 0.9033849981620441, iteration: 85901
loss: 1.039505124092102,grad_norm: 0.966222077423326, iteration: 85902
loss: 1.0137057304382324,grad_norm: 0.9889811479013074, iteration: 85903
loss: 1.072477102279663,grad_norm: 0.9999997431324453, iteration: 85904
loss: 0.9946429133415222,grad_norm: 0.9999991085502797, iteration: 85905
loss: 1.0253998041152954,grad_norm: 0.8489142773123889, iteration: 85906
loss: 1.0670218467712402,grad_norm: 0.9999990768618365, iteration: 85907
loss: 1.0069122314453125,grad_norm: 0.9528847048328508, iteration: 85908
loss: 0.989686131477356,grad_norm: 0.821122818165238, iteration: 85909
loss: 1.0252169370651245,grad_norm: 0.9999991289211321, iteration: 85910
loss: 1.044297218322754,grad_norm: 0.9999996867830456, iteration: 85911
loss: 1.0715526342391968,grad_norm: 0.9419381051396801, iteration: 85912
loss: 0.9860610961914062,grad_norm: 0.866910902392378, iteration: 85913
loss: 1.0395349264144897,grad_norm: 0.805681229638508, iteration: 85914
loss: 1.0239676237106323,grad_norm: 0.9999997195333618, iteration: 85915
loss: 1.0437746047973633,grad_norm: 0.9999990993686892, iteration: 85916
loss: 1.02256178855896,grad_norm: 0.8766091152416847, iteration: 85917
loss: 1.0762548446655273,grad_norm: 0.9999991024950687, iteration: 85918
loss: 1.039756178855896,grad_norm: 0.999999123037727, iteration: 85919
loss: 1.0091031789779663,grad_norm: 0.9999996943640409, iteration: 85920
loss: 1.0297216176986694,grad_norm: 0.9999991676742481, iteration: 85921
loss: 1.0566108226776123,grad_norm: 0.9999998907904013, iteration: 85922
loss: 1.0060731172561646,grad_norm: 0.959656799201448, iteration: 85923
loss: 1.0600954294204712,grad_norm: 0.9999995274806998, iteration: 85924
loss: 0.9898094534873962,grad_norm: 0.9023635394564855, iteration: 85925
loss: 0.9782215356826782,grad_norm: 0.9999991607771658, iteration: 85926
loss: 1.0689923763275146,grad_norm: 0.9920360359923084, iteration: 85927
loss: 1.0165455341339111,grad_norm: 0.9999991627108892, iteration: 85928
loss: 0.960443913936615,grad_norm: 0.9282684482000993, iteration: 85929
loss: 1.007295846939087,grad_norm: 0.8912802479316583, iteration: 85930
loss: 1.000841498374939,grad_norm: 0.9999992517848381, iteration: 85931
loss: 0.9936913847923279,grad_norm: 0.9999994317070011, iteration: 85932
loss: 1.020967960357666,grad_norm: 0.9286768876507357, iteration: 85933
loss: 1.0489016771316528,grad_norm: 0.9608759266287384, iteration: 85934
loss: 0.991812527179718,grad_norm: 0.9999991744729958, iteration: 85935
loss: 1.0845588445663452,grad_norm: 0.999999140813397, iteration: 85936
loss: 0.9985587000846863,grad_norm: 0.9999989863081534, iteration: 85937
loss: 1.0305166244506836,grad_norm: 0.8776097661157748, iteration: 85938
loss: 1.0105401277542114,grad_norm: 0.9999991206663007, iteration: 85939
loss: 1.0303988456726074,grad_norm: 0.9603299831040777, iteration: 85940
loss: 1.0208098888397217,grad_norm: 0.9299512880443511, iteration: 85941
loss: 1.216071605682373,grad_norm: 0.9999998579911218, iteration: 85942
loss: 1.0140000581741333,grad_norm: 0.826465234658332, iteration: 85943
loss: 0.9639087915420532,grad_norm: 0.9174793277460277, iteration: 85944
loss: 0.9950101971626282,grad_norm: 0.9999993259628019, iteration: 85945
loss: 1.008430004119873,grad_norm: 0.971045558831137, iteration: 85946
loss: 1.0762815475463867,grad_norm: 0.9999992690437429, iteration: 85947
loss: 1.0605049133300781,grad_norm: 0.925591173715356, iteration: 85948
loss: 0.9843935966491699,grad_norm: 0.8731829885219163, iteration: 85949
loss: 1.063889503479004,grad_norm: 0.9236114827284632, iteration: 85950
loss: 1.0068765878677368,grad_norm: 0.9705036925943046, iteration: 85951
loss: 1.0086450576782227,grad_norm: 0.8799939252370675, iteration: 85952
loss: 1.0420030355453491,grad_norm: 0.8191326519716617, iteration: 85953
loss: 0.9771020412445068,grad_norm: 0.8446376969393381, iteration: 85954
loss: 1.0387777090072632,grad_norm: 0.9999993012347291, iteration: 85955
loss: 1.0517566204071045,grad_norm: 0.9999992260594854, iteration: 85956
loss: 1.0008044242858887,grad_norm: 0.9999994182372292, iteration: 85957
loss: 1.007554054260254,grad_norm: 0.9999992826730584, iteration: 85958
loss: 1.002551555633545,grad_norm: 0.9563889582739383, iteration: 85959
loss: 0.9796003103256226,grad_norm: 0.8568679347572984, iteration: 85960
loss: 1.0117515325546265,grad_norm: 0.8359372266994628, iteration: 85961
loss: 1.058635950088501,grad_norm: 0.999999186968344, iteration: 85962
loss: 0.9996549487113953,grad_norm: 0.7298221872793594, iteration: 85963
loss: 1.039922833442688,grad_norm: 0.9999995373623404, iteration: 85964
loss: 1.0088293552398682,grad_norm: 0.9288920997191342, iteration: 85965
loss: 1.0467503070831299,grad_norm: 0.9999992939375815, iteration: 85966
loss: 1.012911319732666,grad_norm: 0.9444466746137095, iteration: 85967
loss: 0.9937907457351685,grad_norm: 0.9999990476249192, iteration: 85968
loss: 0.9857068061828613,grad_norm: 0.951530299478868, iteration: 85969
loss: 1.0350159406661987,grad_norm: 0.9757643906700113, iteration: 85970
loss: 1.0034762620925903,grad_norm: 0.7428090094091249, iteration: 85971
loss: 1.0175161361694336,grad_norm: 0.9999991246703421, iteration: 85972
loss: 1.0086109638214111,grad_norm: 0.9999990223355623, iteration: 85973
loss: 1.0282880067825317,grad_norm: 0.9999996703654098, iteration: 85974
loss: 1.0217523574829102,grad_norm: 0.9999992316806188, iteration: 85975
loss: 1.0752032995224,grad_norm: 0.9999990298917116, iteration: 85976
loss: 1.0483109951019287,grad_norm: 0.9999994344677918, iteration: 85977
loss: 1.0120693445205688,grad_norm: 0.9999991555050671, iteration: 85978
loss: 0.9947302937507629,grad_norm: 0.790633268286252, iteration: 85979
loss: 1.0774891376495361,grad_norm: 0.9999997752733781, iteration: 85980
loss: 1.0598522424697876,grad_norm: 0.8704979941355007, iteration: 85981
loss: 1.0039530992507935,grad_norm: 0.9999999569734833, iteration: 85982
loss: 1.092214584350586,grad_norm: 0.9999990836203815, iteration: 85983
loss: 0.9797660708427429,grad_norm: 0.9397469991596186, iteration: 85984
loss: 0.9814304709434509,grad_norm: 0.9999994777951784, iteration: 85985
loss: 0.9776128530502319,grad_norm: 0.9999991371556181, iteration: 85986
loss: 1.0210801362991333,grad_norm: 0.9612961128620103, iteration: 85987
loss: 0.9848936796188354,grad_norm: 0.9999991422995521, iteration: 85988
loss: 1.0376225709915161,grad_norm: 0.891070667877154, iteration: 85989
loss: 1.1269406080245972,grad_norm: 0.9999998898109901, iteration: 85990
loss: 0.9776129722595215,grad_norm: 0.9321522724832472, iteration: 85991
loss: 1.0361099243164062,grad_norm: 0.9999993662992477, iteration: 85992
loss: 0.9994718432426453,grad_norm: 0.9999994629394899, iteration: 85993
loss: 1.0388641357421875,grad_norm: 0.9999993239177437, iteration: 85994
loss: 1.0027049779891968,grad_norm: 0.9999993849487803, iteration: 85995
loss: 1.05280601978302,grad_norm: 0.9999999943099687, iteration: 85996
loss: 1.026579737663269,grad_norm: 0.9999992408441598, iteration: 85997
loss: 1.0893324613571167,grad_norm: 0.9237839076612893, iteration: 85998
loss: 1.0494521856307983,grad_norm: 0.9999992589617267, iteration: 85999
loss: 1.0449248552322388,grad_norm: 0.7851950494194005, iteration: 86000
loss: 1.0570648908615112,grad_norm: 0.9436812719173863, iteration: 86001
loss: 1.022810459136963,grad_norm: 0.9131789885122726, iteration: 86002
loss: 1.0187565088272095,grad_norm: 0.9789750752964135, iteration: 86003
loss: 1.0638258457183838,grad_norm: 0.9999990892641788, iteration: 86004
loss: 1.1173077821731567,grad_norm: 0.9999995639503453, iteration: 86005
loss: 1.0215474367141724,grad_norm: 0.9288619570344717, iteration: 86006
loss: 1.024740219116211,grad_norm: 0.9206926773957257, iteration: 86007
loss: 1.068920612335205,grad_norm: 0.9999992724759509, iteration: 86008
loss: 1.0189257860183716,grad_norm: 0.9999992984503268, iteration: 86009
loss: 1.0752800703048706,grad_norm: 0.9999993671560394, iteration: 86010
loss: 1.069973111152649,grad_norm: 0.9125526038762692, iteration: 86011
loss: 1.0316722393035889,grad_norm: 0.9999992993147934, iteration: 86012
loss: 1.0899006128311157,grad_norm: 0.9999992566190337, iteration: 86013
loss: 0.9892138838768005,grad_norm: 0.821288367000644, iteration: 86014
loss: 0.9686611294746399,grad_norm: 0.9999990629185, iteration: 86015
loss: 1.0156433582305908,grad_norm: 0.9999990567767634, iteration: 86016
loss: 1.0059353113174438,grad_norm: 0.9999998307033146, iteration: 86017
loss: 0.9861606359481812,grad_norm: 0.8358202342716844, iteration: 86018
loss: 1.0766417980194092,grad_norm: 0.9999994893691242, iteration: 86019
loss: 0.9789538383483887,grad_norm: 0.9999990729473762, iteration: 86020
loss: 1.0964643955230713,grad_norm: 0.9999994743573832, iteration: 86021
loss: 1.0323600769042969,grad_norm: 0.8621739001998447, iteration: 86022
loss: 1.0039985179901123,grad_norm: 0.9310559321952684, iteration: 86023
loss: 1.0108176469802856,grad_norm: 0.9999996300682313, iteration: 86024
loss: 1.0037362575531006,grad_norm: 0.9999998644484757, iteration: 86025
loss: 1.0326869487762451,grad_norm: 0.9846193216537996, iteration: 86026
loss: 0.9667649269104004,grad_norm: 0.9999991689602109, iteration: 86027
loss: 1.0302064418792725,grad_norm: 0.9999996224761699, iteration: 86028
loss: 0.9895066618919373,grad_norm: 0.9999992885852347, iteration: 86029
loss: 0.9649690985679626,grad_norm: 0.8267741440972056, iteration: 86030
loss: 1.075622320175171,grad_norm: 0.9999992632314936, iteration: 86031
loss: 1.0506821870803833,grad_norm: 0.9999993435283652, iteration: 86032
loss: 0.9962087869644165,grad_norm: 0.8306634385308294, iteration: 86033
loss: 1.0641824007034302,grad_norm: 0.9924851630496704, iteration: 86034
loss: 1.0279932022094727,grad_norm: 0.9999992548830945, iteration: 86035
loss: 0.9786196351051331,grad_norm: 0.9407045897093245, iteration: 86036
loss: 1.0141035318374634,grad_norm: 0.9269006740402541, iteration: 86037
loss: 0.9982722997665405,grad_norm: 0.9562195930743403, iteration: 86038
loss: 1.032828450202942,grad_norm: 0.9999994913785298, iteration: 86039
loss: 1.0222777128219604,grad_norm: 0.9999989987480076, iteration: 86040
loss: 0.9992082118988037,grad_norm: 0.876660869326382, iteration: 86041
loss: 1.0247822999954224,grad_norm: 0.8586869980817091, iteration: 86042
loss: 0.9763211011886597,grad_norm: 0.9693406651599905, iteration: 86043
loss: 0.9642932415008545,grad_norm: 0.876801828873542, iteration: 86044
loss: 1.0027410984039307,grad_norm: 0.9614243981274305, iteration: 86045
loss: 1.0068645477294922,grad_norm: 0.9999992037771519, iteration: 86046
loss: 1.0164638757705688,grad_norm: 0.9321748648176781, iteration: 86047
loss: 0.9697909355163574,grad_norm: 0.9028171004131014, iteration: 86048
loss: 1.0155452489852905,grad_norm: 0.9628671396066558, iteration: 86049
loss: 0.9802619218826294,grad_norm: 0.9999989583699872, iteration: 86050
loss: 0.9733978509902954,grad_norm: 0.9999991855236299, iteration: 86051
loss: 0.9955008029937744,grad_norm: 0.9999990844957427, iteration: 86052
loss: 1.068672776222229,grad_norm: 0.9999990923306181, iteration: 86053
loss: 0.9915072321891785,grad_norm: 0.9999991413178732, iteration: 86054
loss: 0.9990588426589966,grad_norm: 0.9999992953367112, iteration: 86055
loss: 0.9639261960983276,grad_norm: 0.7370490845201454, iteration: 86056
loss: 0.9863418340682983,grad_norm: 0.8621090674479953, iteration: 86057
loss: 1.026795506477356,grad_norm: 0.8574018777443376, iteration: 86058
loss: 1.0069676637649536,grad_norm: 0.9999991863858757, iteration: 86059
loss: 1.077963948249817,grad_norm: 0.9999996747892091, iteration: 86060
loss: 1.0255385637283325,grad_norm: 0.9999990196754908, iteration: 86061
loss: 1.0041661262512207,grad_norm: 0.890590285569215, iteration: 86062
loss: 1.0228382349014282,grad_norm: 0.9999992091101652, iteration: 86063
loss: 0.9953505396842957,grad_norm: 0.9999990454833266, iteration: 86064
loss: 1.096109390258789,grad_norm: 0.9999996835012633, iteration: 86065
loss: 1.0306795835494995,grad_norm: 0.9330763151165965, iteration: 86066
loss: 1.0320513248443604,grad_norm: 0.7753794205706358, iteration: 86067
loss: 1.0433560609817505,grad_norm: 0.9999991616487505, iteration: 86068
loss: 1.0075408220291138,grad_norm: 0.9499427810110486, iteration: 86069
loss: 0.9345210790634155,grad_norm: 0.8979100966590768, iteration: 86070
loss: 1.0322527885437012,grad_norm: 0.8116868076765886, iteration: 86071
loss: 1.018646001815796,grad_norm: 0.9999993618335151, iteration: 86072
loss: 0.9803636074066162,grad_norm: 0.8771518170210827, iteration: 86073
loss: 1.0125516653060913,grad_norm: 0.9999990518751765, iteration: 86074
loss: 0.9809916615486145,grad_norm: 0.9793619566369104, iteration: 86075
loss: 1.0483101606369019,grad_norm: 0.9999994023141163, iteration: 86076
loss: 1.1058095693588257,grad_norm: 0.9999990653250814, iteration: 86077
loss: 0.9727113842964172,grad_norm: 0.8839199473226445, iteration: 86078
loss: 1.0344833135604858,grad_norm: 0.8410095492363149, iteration: 86079
loss: 1.017454981803894,grad_norm: 0.999999749616363, iteration: 86080
loss: 1.0857163667678833,grad_norm: 0.9217839839082943, iteration: 86081
loss: 0.9750440716743469,grad_norm: 0.9999991073192894, iteration: 86082
loss: 1.0015314817428589,grad_norm: 0.9177257432370811, iteration: 86083
loss: 1.064811110496521,grad_norm: 0.9999999820719561, iteration: 86084
loss: 1.0247802734375,grad_norm: 0.9999995651254814, iteration: 86085
loss: 1.0316052436828613,grad_norm: 0.9999998756579505, iteration: 86086
loss: 0.9813434481620789,grad_norm: 0.9806740586948258, iteration: 86087
loss: 0.9946777820587158,grad_norm: 0.9999990533106559, iteration: 86088
loss: 1.0323153734207153,grad_norm: 0.9999993147988887, iteration: 86089
loss: 1.0351078510284424,grad_norm: 0.9999991169901307, iteration: 86090
loss: 0.9936582446098328,grad_norm: 0.8817873835516992, iteration: 86091
loss: 1.0265077352523804,grad_norm: 0.8246756287295395, iteration: 86092
loss: 0.9990790486335754,grad_norm: 0.9999993956617543, iteration: 86093
loss: 0.9727946519851685,grad_norm: 0.9999995199852446, iteration: 86094
loss: 1.0087023973464966,grad_norm: 0.9999991394080653, iteration: 86095
loss: 0.9750964641571045,grad_norm: 0.9484861487379778, iteration: 86096
loss: 1.048899531364441,grad_norm: 0.9999996311764113, iteration: 86097
loss: 1.182562232017517,grad_norm: 0.9999998751003901, iteration: 86098
loss: 1.1399458646774292,grad_norm: 0.9999994479411355, iteration: 86099
loss: 0.992027759552002,grad_norm: 0.9999991799661899, iteration: 86100
loss: 1.0861740112304688,grad_norm: 1.0000000485851201, iteration: 86101
loss: 1.0036855936050415,grad_norm: 0.9999992439587535, iteration: 86102
loss: 1.003249168395996,grad_norm: 0.9061986898004412, iteration: 86103
loss: 1.0759953260421753,grad_norm: 0.9999990434853699, iteration: 86104
loss: 1.1843678951263428,grad_norm: 0.9906157487330929, iteration: 86105
loss: 0.9562909007072449,grad_norm: 0.957134827925901, iteration: 86106
loss: 1.0010522603988647,grad_norm: 0.9012625344816512, iteration: 86107
loss: 1.064002275466919,grad_norm: 0.9999993756243838, iteration: 86108
loss: 1.0344773530960083,grad_norm: 0.9999994962435451, iteration: 86109
loss: 1.056968331336975,grad_norm: 0.873578669224516, iteration: 86110
loss: 1.032379150390625,grad_norm: 0.9999992273986142, iteration: 86111
loss: 1.0456812381744385,grad_norm: 0.9332168739078305, iteration: 86112
loss: 1.0298149585723877,grad_norm: 0.8519530287899333, iteration: 86113
loss: 1.0125701427459717,grad_norm: 0.9321122710007367, iteration: 86114
loss: 1.0165290832519531,grad_norm: 0.9999996370973978, iteration: 86115
loss: 1.110587239265442,grad_norm: 0.9999990875651626, iteration: 86116
loss: 1.0646823644638062,grad_norm: 0.9999991093650524, iteration: 86117
loss: 1.1672446727752686,grad_norm: 0.999999367212673, iteration: 86118
loss: 1.0545287132263184,grad_norm: 0.9999995701375365, iteration: 86119
loss: 1.1102060079574585,grad_norm: 0.9999991365128349, iteration: 86120
loss: 0.9883852601051331,grad_norm: 0.9612916781989531, iteration: 86121
loss: 1.043375015258789,grad_norm: 0.999999726626511, iteration: 86122
loss: 1.0441169738769531,grad_norm: 0.9999993179483022, iteration: 86123
loss: 1.059522032737732,grad_norm: 0.9999993397602439, iteration: 86124
loss: 1.0229493379592896,grad_norm: 0.9999989872224319, iteration: 86125
loss: 1.0311847925186157,grad_norm: 0.9999990844124155, iteration: 86126
loss: 1.0074621438980103,grad_norm: 0.8233998302993831, iteration: 86127
loss: 1.074435830116272,grad_norm: 0.9999999078384386, iteration: 86128
loss: 1.012789011001587,grad_norm: 0.9999992809110054, iteration: 86129
loss: 1.0853224992752075,grad_norm: 0.9999994923366126, iteration: 86130
loss: 1.0205214023590088,grad_norm: 0.9999990853670677, iteration: 86131
loss: 0.9866005182266235,grad_norm: 0.7873131987468723, iteration: 86132
loss: 1.0203890800476074,grad_norm: 0.9999992180139704, iteration: 86133
loss: 1.0790647268295288,grad_norm: 0.9999994006036419, iteration: 86134
loss: 1.00429368019104,grad_norm: 0.8787724050479506, iteration: 86135
loss: 0.9714021682739258,grad_norm: 0.9848410415813891, iteration: 86136
loss: 0.9592278599739075,grad_norm: 0.8652066051331605, iteration: 86137
loss: 1.0189361572265625,grad_norm: 0.9841854178721863, iteration: 86138
loss: 1.008674144744873,grad_norm: 0.9999997355564292, iteration: 86139
loss: 1.010944128036499,grad_norm: 0.9999990931876495, iteration: 86140
loss: 1.0052424669265747,grad_norm: 0.8731202437727663, iteration: 86141
loss: 1.018110990524292,grad_norm: 0.8827487890585802, iteration: 86142
loss: 1.0058485269546509,grad_norm: 0.9076290573837785, iteration: 86143
loss: 1.0014760494232178,grad_norm: 0.8104675635952918, iteration: 86144
loss: 0.9952970743179321,grad_norm: 0.9999990852906745, iteration: 86145
loss: 1.0050565004348755,grad_norm: 0.9999991577783206, iteration: 86146
loss: 1.0026476383209229,grad_norm: 0.9999992272801675, iteration: 86147
loss: 1.007436990737915,grad_norm: 0.8256146587558886, iteration: 86148
loss: 1.0244436264038086,grad_norm: 0.9999992431509878, iteration: 86149
loss: 1.0137667655944824,grad_norm: 0.9999991574408206, iteration: 86150
loss: 1.04031240940094,grad_norm: 0.9999996075163081, iteration: 86151
loss: 0.9937193989753723,grad_norm: 0.9999994575150456, iteration: 86152
loss: 1.0153652429580688,grad_norm: 0.9999990887887454, iteration: 86153
loss: 1.0405452251434326,grad_norm: 0.9999991156397309, iteration: 86154
loss: 0.9750035405158997,grad_norm: 0.9507271666968479, iteration: 86155
loss: 0.9593444466590881,grad_norm: 0.9529640856376232, iteration: 86156
loss: 1.079314947128296,grad_norm: 0.9999994482849428, iteration: 86157
loss: 0.9857123494148254,grad_norm: 0.9176524936509156, iteration: 86158
loss: 1.0151891708374023,grad_norm: 0.9605638827615073, iteration: 86159
loss: 0.9883175492286682,grad_norm: 0.9999992462722047, iteration: 86160
loss: 0.9782186150550842,grad_norm: 0.8189085011873314, iteration: 86161
loss: 1.067163109779358,grad_norm: 0.8255762230924059, iteration: 86162
loss: 1.0098775625228882,grad_norm: 0.888456347735132, iteration: 86163
loss: 0.9906722903251648,grad_norm: 0.9999990783609102, iteration: 86164
loss: 1.005823016166687,grad_norm: 0.8588799149955216, iteration: 86165
loss: 0.9823429584503174,grad_norm: 0.8235763792955734, iteration: 86166
loss: 0.9963578581809998,grad_norm: 0.8528378125925387, iteration: 86167
loss: 1.0114750862121582,grad_norm: 0.9999991223143047, iteration: 86168
loss: 1.0089279413223267,grad_norm: 0.9999991133618281, iteration: 86169
loss: 1.0150562524795532,grad_norm: 0.9999991267892706, iteration: 86170
loss: 0.9806820750236511,grad_norm: 0.8914649807495485, iteration: 86171
loss: 0.9784666299819946,grad_norm: 0.949248167266745, iteration: 86172
loss: 0.9551668167114258,grad_norm: 0.9999990456678521, iteration: 86173
loss: 1.0328562259674072,grad_norm: 0.9999999566449271, iteration: 86174
loss: 0.9877376556396484,grad_norm: 0.9999990660118404, iteration: 86175
loss: 0.9997064471244812,grad_norm: 0.8091024954380567, iteration: 86176
loss: 0.9644799828529358,grad_norm: 0.9999990919572029, iteration: 86177
loss: 1.0030314922332764,grad_norm: 0.9042403635451921, iteration: 86178
loss: 0.9430357217788696,grad_norm: 0.9999990239222589, iteration: 86179
loss: 1.0117887258529663,grad_norm: 0.9999993019000603, iteration: 86180
loss: 1.05336594581604,grad_norm: 0.9999992192170921, iteration: 86181
loss: 1.053528070449829,grad_norm: 0.9999991566119011, iteration: 86182
loss: 0.9989490509033203,grad_norm: 0.9999991584849293, iteration: 86183
loss: 1.182425618171692,grad_norm: 0.9999993244427176, iteration: 86184
loss: 1.0307660102844238,grad_norm: 0.9999994595662822, iteration: 86185
loss: 1.0353907346725464,grad_norm: 0.9999992325224341, iteration: 86186
loss: 1.0016200542449951,grad_norm: 0.9448240564861927, iteration: 86187
loss: 1.0085620880126953,grad_norm: 0.9999992046037186, iteration: 86188
loss: 0.9842618107795715,grad_norm: 0.9385520985803266, iteration: 86189
loss: 1.0511807203292847,grad_norm: 0.9999991802555488, iteration: 86190
loss: 1.051476240158081,grad_norm: 0.9827464196076384, iteration: 86191
loss: 0.9705621600151062,grad_norm: 0.9999993915265364, iteration: 86192
loss: 1.0136375427246094,grad_norm: 0.9999996341354527, iteration: 86193
loss: 1.0013236999511719,grad_norm: 0.9999991255072002, iteration: 86194
loss: 1.008765459060669,grad_norm: 0.9999992204150594, iteration: 86195
loss: 1.1649547815322876,grad_norm: 0.9999991523553546, iteration: 86196
loss: 1.0592808723449707,grad_norm: 0.9999993084923073, iteration: 86197
loss: 1.04170823097229,grad_norm: 0.8452052294495485, iteration: 86198
loss: 1.0155974626541138,grad_norm: 0.945926607424436, iteration: 86199
loss: 1.042434811592102,grad_norm: 0.9999991684273337, iteration: 86200
loss: 0.9751401543617249,grad_norm: 0.899915782954745, iteration: 86201
loss: 1.0430500507354736,grad_norm: 0.999999523679023, iteration: 86202
loss: 1.0289299488067627,grad_norm: 0.9262353861099172, iteration: 86203
loss: 1.0373408794403076,grad_norm: 0.9722403156540992, iteration: 86204
loss: 0.9862077236175537,grad_norm: 0.9999991980778943, iteration: 86205
loss: 0.9892603158950806,grad_norm: 0.9663691776896405, iteration: 86206
loss: 1.0382113456726074,grad_norm: 0.8880024013519309, iteration: 86207
loss: 0.9710183143615723,grad_norm: 0.7870584949693455, iteration: 86208
loss: 1.0026252269744873,grad_norm: 0.9019763011979279, iteration: 86209
loss: 0.9804220199584961,grad_norm: 0.8593787414104542, iteration: 86210
loss: 0.9807696342468262,grad_norm: 0.9941074700475205, iteration: 86211
loss: 1.021962285041809,grad_norm: 0.9999994230439238, iteration: 86212
loss: 0.9810730814933777,grad_norm: 0.9999991718139474, iteration: 86213
loss: 0.97346031665802,grad_norm: 0.9999990944245243, iteration: 86214
loss: 0.9898542165756226,grad_norm: 0.9999995937098268, iteration: 86215
loss: 1.0087939500808716,grad_norm: 0.9313478030014339, iteration: 86216
loss: 1.007546305656433,grad_norm: 0.9775612556892229, iteration: 86217
loss: 1.0095701217651367,grad_norm: 0.8933788048953943, iteration: 86218
loss: 1.0247154235839844,grad_norm: 0.8722714778828808, iteration: 86219
loss: 1.0055596828460693,grad_norm: 0.8438122388706756, iteration: 86220
loss: 1.0182656049728394,grad_norm: 0.9999990458322646, iteration: 86221
loss: 1.0119199752807617,grad_norm: 0.9999994444330699, iteration: 86222
loss: 0.9994558095932007,grad_norm: 0.9437156454074195, iteration: 86223
loss: 1.0323330163955688,grad_norm: 0.9999995256031455, iteration: 86224
loss: 1.0416712760925293,grad_norm: 0.8737694410577939, iteration: 86225
loss: 1.0107522010803223,grad_norm: 0.9999992760381671, iteration: 86226
loss: 0.9849369525909424,grad_norm: 0.8208985721731283, iteration: 86227
loss: 1.007080078125,grad_norm: 0.9398506173616117, iteration: 86228
loss: 1.0025743246078491,grad_norm: 0.9999990575949242, iteration: 86229
loss: 0.99491286277771,grad_norm: 0.9999990214378178, iteration: 86230
loss: 1.0181050300598145,grad_norm: 0.9181068416338368, iteration: 86231
loss: 1.0277453660964966,grad_norm: 0.9816303230764434, iteration: 86232
loss: 0.9634932279586792,grad_norm: 0.9892099906835592, iteration: 86233
loss: 0.9709518551826477,grad_norm: 0.9999990113063892, iteration: 86234
loss: 1.000716209411621,grad_norm: 0.8766086349871035, iteration: 86235
loss: 1.026442050933838,grad_norm: 0.9279419857233927, iteration: 86236
loss: 1.0423033237457275,grad_norm: 0.9999997767011504, iteration: 86237
loss: 0.9670431017875671,grad_norm: 0.8832078968947442, iteration: 86238
loss: 1.0197546482086182,grad_norm: 0.8711629418280261, iteration: 86239
loss: 1.000274658203125,grad_norm: 0.8669755763246625, iteration: 86240
loss: 1.0639116764068604,grad_norm: 0.9999991229475909, iteration: 86241
loss: 1.004433035850525,grad_norm: 0.9999994999652492, iteration: 86242
loss: 1.0016857385635376,grad_norm: 0.8972466068932842, iteration: 86243
loss: 0.9787606000900269,grad_norm: 0.8210000899928996, iteration: 86244
loss: 0.998508870601654,grad_norm: 0.9650512786722193, iteration: 86245
loss: 1.0763005018234253,grad_norm: 0.9371658571124836, iteration: 86246
loss: 1.005439281463623,grad_norm: 0.9765853347490175, iteration: 86247
loss: 0.9992305636405945,grad_norm: 0.8213630456144767, iteration: 86248
loss: 1.0687512159347534,grad_norm: 0.9999998335083026, iteration: 86249
loss: 1.062654733657837,grad_norm: 0.9999990798408869, iteration: 86250
loss: 1.0010331869125366,grad_norm: 0.8707477892546079, iteration: 86251
loss: 1.02059805393219,grad_norm: 0.7917818970013747, iteration: 86252
loss: 0.9651039838790894,grad_norm: 0.9999992136336486, iteration: 86253
loss: 1.000359058380127,grad_norm: 0.9757884390381089, iteration: 86254
loss: 1.0313444137573242,grad_norm: 0.9999993341579331, iteration: 86255
loss: 1.004528284072876,grad_norm: 0.8655655220941932, iteration: 86256
loss: 1.0037963390350342,grad_norm: 0.8670269910542171, iteration: 86257
loss: 1.0046334266662598,grad_norm: 0.9999991994090366, iteration: 86258
loss: 0.9997134804725647,grad_norm: 0.9320636848679887, iteration: 86259
loss: 0.9746080636978149,grad_norm: 0.9668035881641438, iteration: 86260
loss: 1.0349613428115845,grad_norm: 0.9040367606578703, iteration: 86261
loss: 0.9785492420196533,grad_norm: 0.97877572614004, iteration: 86262
loss: 0.9912893772125244,grad_norm: 0.9367927615087216, iteration: 86263
loss: 1.085843563079834,grad_norm: 0.9999998848824609, iteration: 86264
loss: 0.9942848682403564,grad_norm: 0.9567378588399665, iteration: 86265
loss: 0.9872927069664001,grad_norm: 0.9999991378330229, iteration: 86266
loss: 1.026856780052185,grad_norm: 0.7704605027646845, iteration: 86267
loss: 0.989075243473053,grad_norm: 0.9999999024232387, iteration: 86268
loss: 0.9985250234603882,grad_norm: 0.999999132354433, iteration: 86269
loss: 1.0159019231796265,grad_norm: 0.9999991816319139, iteration: 86270
loss: 1.021153450012207,grad_norm: 0.8552462067089258, iteration: 86271
loss: 1.0060328245162964,grad_norm: 0.9999990833067189, iteration: 86272
loss: 0.9767533540725708,grad_norm: 0.9999990826840846, iteration: 86273
loss: 1.0030827522277832,grad_norm: 0.9999989495927928, iteration: 86274
loss: 1.0207476615905762,grad_norm: 0.9999994565235617, iteration: 86275
loss: 0.9735139012336731,grad_norm: 0.9997791495909673, iteration: 86276
loss: 0.993135929107666,grad_norm: 0.9999994283015704, iteration: 86277
loss: 1.0364198684692383,grad_norm: 0.9999990284681867, iteration: 86278
loss: 0.9652104377746582,grad_norm: 0.8915604498899344, iteration: 86279
loss: 0.9969307780265808,grad_norm: 0.9999995271763206, iteration: 86280
loss: 1.0121115446090698,grad_norm: 0.9197397217488424, iteration: 86281
loss: 1.0164636373519897,grad_norm: 0.8555942885911285, iteration: 86282
loss: 0.9991710782051086,grad_norm: 0.9999990722513775, iteration: 86283
loss: 1.0592236518859863,grad_norm: 0.9966973218011999, iteration: 86284
loss: 1.0113781690597534,grad_norm: 0.8631341135989591, iteration: 86285
loss: 0.999773383140564,grad_norm: 0.9334725477562825, iteration: 86286
loss: 0.9764898419380188,grad_norm: 0.9427741252311196, iteration: 86287
loss: 1.0070348978042603,grad_norm: 0.9739737917442588, iteration: 86288
loss: 1.006666660308838,grad_norm: 0.9999990043931745, iteration: 86289
loss: 1.0395431518554688,grad_norm: 0.9706136334867083, iteration: 86290
loss: 1.0475943088531494,grad_norm: 0.9999997476134109, iteration: 86291
loss: 0.9679810404777527,grad_norm: 0.9999991813856668, iteration: 86292
loss: 0.9662737250328064,grad_norm: 0.9549057618457553, iteration: 86293
loss: 0.9897161722183228,grad_norm: 0.8121416028483073, iteration: 86294
loss: 0.9861497282981873,grad_norm: 0.9008453526039348, iteration: 86295
loss: 0.9561447501182556,grad_norm: 0.9999992492323171, iteration: 86296
loss: 1.0499545335769653,grad_norm: 0.9999989956843638, iteration: 86297
loss: 0.9797064661979675,grad_norm: 0.8650643437102811, iteration: 86298
loss: 1.0346076488494873,grad_norm: 0.8041923647101685, iteration: 86299
loss: 0.9951587915420532,grad_norm: 0.9999992330043438, iteration: 86300
loss: 1.0301634073257446,grad_norm: 0.9999998332215763, iteration: 86301
loss: 1.019126296043396,grad_norm: 0.961341813081969, iteration: 86302
loss: 1.0144082307815552,grad_norm: 0.9999991110411006, iteration: 86303
loss: 1.0222485065460205,grad_norm: 0.9999990339039553, iteration: 86304
loss: 0.990568995475769,grad_norm: 0.8634853071801385, iteration: 86305
loss: 1.0131950378417969,grad_norm: 0.9999991289722544, iteration: 86306
loss: 0.9511259198188782,grad_norm: 0.9667843513507715, iteration: 86307
loss: 1.0040912628173828,grad_norm: 0.9258866157965673, iteration: 86308
loss: 0.9637249708175659,grad_norm: 0.8369031829372583, iteration: 86309
loss: 1.0447592735290527,grad_norm: 0.8649854371942292, iteration: 86310
loss: 0.964085042476654,grad_norm: 0.9999990709033375, iteration: 86311
loss: 1.0126051902770996,grad_norm: 0.9276129204626291, iteration: 86312
loss: 1.000787615776062,grad_norm: 0.9999991745768592, iteration: 86313
loss: 1.0017234086990356,grad_norm: 0.9999991342811, iteration: 86314
loss: 0.9922918677330017,grad_norm: 0.9247333044529237, iteration: 86315
loss: 1.0297633409500122,grad_norm: 0.8432239231022979, iteration: 86316
loss: 1.1998682022094727,grad_norm: 0.9999996392822507, iteration: 86317
loss: 1.0542057752609253,grad_norm: 0.9875061173590008, iteration: 86318
loss: 1.015850305557251,grad_norm: 0.9473841534238846, iteration: 86319
loss: 1.015488624572754,grad_norm: 0.9524844148498532, iteration: 86320
loss: 1.0123064517974854,grad_norm: 0.8182877839967105, iteration: 86321
loss: 1.020076036453247,grad_norm: 0.9056275381736484, iteration: 86322
loss: 0.9753033518791199,grad_norm: 0.9452348231810604, iteration: 86323
loss: 0.9896026253700256,grad_norm: 0.9352621809520186, iteration: 86324
loss: 0.987900972366333,grad_norm: 0.8014446379497037, iteration: 86325
loss: 1.011579155921936,grad_norm: 0.9999990434912496, iteration: 86326
loss: 1.030491828918457,grad_norm: 0.9999994304718312, iteration: 86327
loss: 1.02749502658844,grad_norm: 0.9364882113164898, iteration: 86328
loss: 0.9839279055595398,grad_norm: 0.8406586595836754, iteration: 86329
loss: 1.0262647867202759,grad_norm: 0.999999590162965, iteration: 86330
loss: 1.0677268505096436,grad_norm: 0.8032199212000138, iteration: 86331
loss: 1.0132945775985718,grad_norm: 0.9999993108945673, iteration: 86332
loss: 0.9878333806991577,grad_norm: 0.9999990200458138, iteration: 86333
loss: 1.0230046510696411,grad_norm: 0.991593273446636, iteration: 86334
loss: 1.0260573625564575,grad_norm: 0.9237906599862371, iteration: 86335
loss: 1.0698328018188477,grad_norm: 0.9999998479783496, iteration: 86336
loss: 0.9964630007743835,grad_norm: 0.9999992285207359, iteration: 86337
loss: 1.0345808267593384,grad_norm: 0.999999704744685, iteration: 86338
loss: 1.0068713426589966,grad_norm: 0.9999991282072136, iteration: 86339
loss: 1.0053874254226685,grad_norm: 0.9999990119999586, iteration: 86340
loss: 0.9948052763938904,grad_norm: 0.9999992290607155, iteration: 86341
loss: 0.9974996447563171,grad_norm: 0.9344041303465103, iteration: 86342
loss: 0.9743825793266296,grad_norm: 0.9745222602506372, iteration: 86343
loss: 1.0141255855560303,grad_norm: 0.845885137700311, iteration: 86344
loss: 1.010034203529358,grad_norm: 0.8176604817964355, iteration: 86345
loss: 1.0500692129135132,grad_norm: 0.8931941749212227, iteration: 86346
loss: 1.0721547603607178,grad_norm: 0.9999991957932507, iteration: 86347
loss: 0.9803126454353333,grad_norm: 0.8914681437199091, iteration: 86348
loss: 0.993934154510498,grad_norm: 0.9999992544289551, iteration: 86349
loss: 0.9876201152801514,grad_norm: 0.8465651537142941, iteration: 86350
loss: 0.9942803978919983,grad_norm: 0.9999992241013688, iteration: 86351
loss: 1.028873324394226,grad_norm: 0.9999991367215245, iteration: 86352
loss: 0.9815565943717957,grad_norm: 0.9999995776095922, iteration: 86353
loss: 1.012352705001831,grad_norm: 0.9999989897274361, iteration: 86354
loss: 1.0108463764190674,grad_norm: 0.9999991721977718, iteration: 86355
loss: 1.0263102054595947,grad_norm: 0.9999994121340355, iteration: 86356
loss: 1.0262874364852905,grad_norm: 0.9968895880761871, iteration: 86357
loss: 1.024954915046692,grad_norm: 0.9769436758138703, iteration: 86358
loss: 1.0212067365646362,grad_norm: 0.9999990822665126, iteration: 86359
loss: 0.9804307818412781,grad_norm: 0.9999991773487183, iteration: 86360
loss: 0.9828113317489624,grad_norm: 0.8637693637449225, iteration: 86361
loss: 0.9996271729469299,grad_norm: 0.9999990500134588, iteration: 86362
loss: 1.0131065845489502,grad_norm: 0.999999194291663, iteration: 86363
loss: 1.0350624322891235,grad_norm: 0.99999911860446, iteration: 86364
loss: 1.0004297494888306,grad_norm: 0.9999990739256417, iteration: 86365
loss: 1.0280077457427979,grad_norm: 0.8490717503127765, iteration: 86366
loss: 1.0201505422592163,grad_norm: 0.9999991908551765, iteration: 86367
loss: 0.9805923104286194,grad_norm: 0.9195594796771053, iteration: 86368
loss: 0.964053750038147,grad_norm: 0.9999992963109522, iteration: 86369
loss: 0.9908081293106079,grad_norm: 0.9999990903652486, iteration: 86370
loss: 1.010626196861267,grad_norm: 0.9999990247310224, iteration: 86371
loss: 1.0090057849884033,grad_norm: 0.9665906890138855, iteration: 86372
loss: 1.0266180038452148,grad_norm: 0.9999990610998711, iteration: 86373
loss: 0.9939219951629639,grad_norm: 0.93216653834114, iteration: 86374
loss: 1.019814133644104,grad_norm: 0.9999991604470199, iteration: 86375
loss: 0.996745228767395,grad_norm: 0.9999990842239683, iteration: 86376
loss: 1.0128735303878784,grad_norm: 0.9998969314363846, iteration: 86377
loss: 1.0483578443527222,grad_norm: 0.8243149565777517, iteration: 86378
loss: 1.019223690032959,grad_norm: 0.9999998858905234, iteration: 86379
loss: 1.0214056968688965,grad_norm: 0.7657284139358319, iteration: 86380
loss: 0.9968257546424866,grad_norm: 0.9999992030782642, iteration: 86381
loss: 0.9566443562507629,grad_norm: 0.9999991483955925, iteration: 86382
loss: 1.0394349098205566,grad_norm: 0.9805146903471919, iteration: 86383
loss: 1.0145256519317627,grad_norm: 0.9762210364101619, iteration: 86384
loss: 1.0048540830612183,grad_norm: 0.9668510827841768, iteration: 86385
loss: 1.0101290941238403,grad_norm: 0.9123867743770817, iteration: 86386
loss: 1.0136796236038208,grad_norm: 0.9999993188947663, iteration: 86387
loss: 1.0262432098388672,grad_norm: 0.999999425856718, iteration: 86388
loss: 1.0358625650405884,grad_norm: 0.9999990840356416, iteration: 86389
loss: 0.995547890663147,grad_norm: 0.93701272281414, iteration: 86390
loss: 1.0040565729141235,grad_norm: 0.8925025731578493, iteration: 86391
loss: 1.0505495071411133,grad_norm: 0.8802075328602503, iteration: 86392
loss: 1.021389126777649,grad_norm: 0.999999590749959, iteration: 86393
loss: 1.0130751132965088,grad_norm: 0.9816599297433551, iteration: 86394
loss: 1.0422979593276978,grad_norm: 0.9999991780837799, iteration: 86395
loss: 1.0211695432662964,grad_norm: 0.9448091117862418, iteration: 86396
loss: 1.0045109987258911,grad_norm: 0.9999992163869722, iteration: 86397
loss: 1.0015136003494263,grad_norm: 0.7836930910067121, iteration: 86398
loss: 1.0074049234390259,grad_norm: 0.9536697679285614, iteration: 86399
loss: 0.9874822497367859,grad_norm: 0.9799251896569177, iteration: 86400
loss: 1.000447154045105,grad_norm: 0.9999991346182886, iteration: 86401
loss: 0.9924210906028748,grad_norm: 0.9943486943541001, iteration: 86402
loss: 0.9790751934051514,grad_norm: 0.9999993546397136, iteration: 86403
loss: 0.9983044266700745,grad_norm: 0.9267307422738932, iteration: 86404
loss: 1.0008198022842407,grad_norm: 0.955322120171876, iteration: 86405
loss: 1.048595666885376,grad_norm: 0.9999997681387024, iteration: 86406
loss: 1.0030782222747803,grad_norm: 0.99999918507611, iteration: 86407
loss: 1.007414698600769,grad_norm: 0.9999990189559731, iteration: 86408
loss: 0.9824007749557495,grad_norm: 0.9999992215725556, iteration: 86409
loss: 1.0020675659179688,grad_norm: 0.9999991519461469, iteration: 86410
loss: 1.0128133296966553,grad_norm: 0.9699646929556952, iteration: 86411
loss: 1.0782239437103271,grad_norm: 0.9999991165802093, iteration: 86412
loss: 0.9977592825889587,grad_norm: 0.9999991255530734, iteration: 86413
loss: 1.0384306907653809,grad_norm: 0.9332943241289904, iteration: 86414
loss: 0.9570783972740173,grad_norm: 0.8359555314606635, iteration: 86415
loss: 1.0357738733291626,grad_norm: 0.9999996486274038, iteration: 86416
loss: 1.0318312644958496,grad_norm: 0.9956766300360193, iteration: 86417
loss: 1.0267196893692017,grad_norm: 0.9870344425711418, iteration: 86418
loss: 1.0073000192642212,grad_norm: 0.8959376817519403, iteration: 86419
loss: 1.000523567199707,grad_norm: 0.9474698989525776, iteration: 86420
loss: 1.0230897665023804,grad_norm: 0.883349530443547, iteration: 86421
loss: 1.009864091873169,grad_norm: 0.7548065584452027, iteration: 86422
loss: 0.9907339215278625,grad_norm: 0.8272081793326562, iteration: 86423
loss: 1.069744348526001,grad_norm: 0.9999991232966025, iteration: 86424
loss: 0.9908186197280884,grad_norm: 0.916366147006724, iteration: 86425
loss: 0.9793137311935425,grad_norm: 0.934501161637832, iteration: 86426
loss: 1.0353188514709473,grad_norm: 0.9999993238920095, iteration: 86427
loss: 1.0226771831512451,grad_norm: 0.8890292635270008, iteration: 86428
loss: 1.0715878009796143,grad_norm: 0.9999991325743971, iteration: 86429
loss: 1.0400166511535645,grad_norm: 0.9820626704536402, iteration: 86430
loss: 0.9966951608657837,grad_norm: 0.9836353591365539, iteration: 86431
loss: 0.9819219708442688,grad_norm: 0.971256692957262, iteration: 86432
loss: 1.007686734199524,grad_norm: 0.9120218026285241, iteration: 86433
loss: 0.9976337552070618,grad_norm: 0.9332613274944288, iteration: 86434
loss: 1.0103719234466553,grad_norm: 0.9999989884310033, iteration: 86435
loss: 0.9922630190849304,grad_norm: 0.7484614801546636, iteration: 86436
loss: 1.2034889459609985,grad_norm: 0.9999994066527059, iteration: 86437
loss: 1.0352786779403687,grad_norm: 0.9999998844084448, iteration: 86438
loss: 0.9779688715934753,grad_norm: 0.8352358867749937, iteration: 86439
loss: 0.9841371178627014,grad_norm: 0.877010326508636, iteration: 86440
loss: 1.038723349571228,grad_norm: 0.9999991794508377, iteration: 86441
loss: 0.9719133377075195,grad_norm: 0.9745967231458852, iteration: 86442
loss: 0.9871582388877869,grad_norm: 0.9999991690998979, iteration: 86443
loss: 0.9952170848846436,grad_norm: 0.8762211205698379, iteration: 86444
loss: 1.029605507850647,grad_norm: 0.979724438148702, iteration: 86445
loss: 0.9817413091659546,grad_norm: 0.9494719916138021, iteration: 86446
loss: 1.0141332149505615,grad_norm: 0.9999997165911257, iteration: 86447
loss: 1.0159753561019897,grad_norm: 0.9999990297036191, iteration: 86448
loss: 1.0321933031082153,grad_norm: 0.9999998493256932, iteration: 86449
loss: 1.0330238342285156,grad_norm: 0.8257926305260128, iteration: 86450
loss: 0.997427225112915,grad_norm: 0.7194009029903744, iteration: 86451
loss: 0.9871549010276794,grad_norm: 0.9101747138286379, iteration: 86452
loss: 0.9711684584617615,grad_norm: 0.9999989866431854, iteration: 86453
loss: 0.9799033999443054,grad_norm: 0.9914625384035253, iteration: 86454
loss: 1.0436221361160278,grad_norm: 0.9999990765971967, iteration: 86455
loss: 1.0814557075500488,grad_norm: 0.9999994121157603, iteration: 86456
loss: 1.001571536064148,grad_norm: 0.889225731878401, iteration: 86457
loss: 0.9759093523025513,grad_norm: 0.9999990663009029, iteration: 86458
loss: 0.9891310930252075,grad_norm: 0.6687045710776104, iteration: 86459
loss: 1.020735502243042,grad_norm: 0.9959096728834375, iteration: 86460
loss: 1.0382989645004272,grad_norm: 0.9999991033788883, iteration: 86461
loss: 1.0958726406097412,grad_norm: 0.999999706186656, iteration: 86462
loss: 1.0220242738723755,grad_norm: 0.9999990613239419, iteration: 86463
loss: 0.9990159273147583,grad_norm: 0.9999996570018829, iteration: 86464
loss: 1.2291760444641113,grad_norm: 0.9999997891655521, iteration: 86465
loss: 1.0249007940292358,grad_norm: 0.9999998060015587, iteration: 86466
loss: 1.004540205001831,grad_norm: 0.9999990895965457, iteration: 86467
loss: 0.9867126941680908,grad_norm: 0.9999992566968636, iteration: 86468
loss: 1.0061904191970825,grad_norm: 0.8362738848640937, iteration: 86469
loss: 1.0282827615737915,grad_norm: 1.0000000449762259, iteration: 86470
loss: 1.0044989585876465,grad_norm: 0.9999992066046557, iteration: 86471
loss: 1.0229287147521973,grad_norm: 0.9365381693268254, iteration: 86472
loss: 1.0051355361938477,grad_norm: 0.9730270756469244, iteration: 86473
loss: 0.9544742107391357,grad_norm: 0.9563938019215585, iteration: 86474
loss: 1.0463483333587646,grad_norm: 0.9999994928720687, iteration: 86475
loss: 0.977178156375885,grad_norm: 0.9153734910492504, iteration: 86476
loss: 0.9864490032196045,grad_norm: 0.9999992160591836, iteration: 86477
loss: 1.0058624744415283,grad_norm: 0.8310541884196105, iteration: 86478
loss: 1.029105544090271,grad_norm: 0.8692374765795376, iteration: 86479
loss: 0.9780147671699524,grad_norm: 0.9414555556830043, iteration: 86480
loss: 1.010158658027649,grad_norm: 0.7283918058208219, iteration: 86481
loss: 0.9870023131370544,grad_norm: 0.9059416343985386, iteration: 86482
loss: 1.0342466831207275,grad_norm: 0.9999991765154295, iteration: 86483
loss: 1.0012321472167969,grad_norm: 0.8900009589099676, iteration: 86484
loss: 0.9976978302001953,grad_norm: 0.9769760042870715, iteration: 86485
loss: 1.0053436756134033,grad_norm: 0.8340234049954656, iteration: 86486
loss: 1.00784432888031,grad_norm: 0.9999993455728579, iteration: 86487
loss: 1.043058156967163,grad_norm: 0.9999991472332979, iteration: 86488
loss: 0.9929601550102234,grad_norm: 0.9999990975853494, iteration: 86489
loss: 0.940939724445343,grad_norm: 0.782527669526802, iteration: 86490
loss: 0.9924690127372742,grad_norm: 0.9077920158016851, iteration: 86491
loss: 1.0121619701385498,grad_norm: 0.8490304827737284, iteration: 86492
loss: 1.0284115076065063,grad_norm: 0.9465444859085836, iteration: 86493
loss: 0.9902998805046082,grad_norm: 0.999999071140721, iteration: 86494
loss: 0.9825359582901001,grad_norm: 0.9999991479602451, iteration: 86495
loss: 0.9975594282150269,grad_norm: 0.9999993833730006, iteration: 86496
loss: 0.9982399940490723,grad_norm: 0.999638310343515, iteration: 86497
loss: 0.9999925494194031,grad_norm: 0.9999998845892702, iteration: 86498
loss: 1.010787010192871,grad_norm: 0.9519777084518626, iteration: 86499
loss: 1.028786301612854,grad_norm: 0.7481810967708904, iteration: 86500
loss: 1.0145026445388794,grad_norm: 0.9999991771819148, iteration: 86501
loss: 0.9752350449562073,grad_norm: 0.999999127096352, iteration: 86502
loss: 0.9973946213722229,grad_norm: 0.8852506291512912, iteration: 86503
loss: 0.9490109086036682,grad_norm: 0.865081767299196, iteration: 86504
loss: 1.0110087394714355,grad_norm: 0.9058524147582315, iteration: 86505
loss: 1.1651612520217896,grad_norm: 0.9999995021442529, iteration: 86506
loss: 1.0387928485870361,grad_norm: 0.9902933100598895, iteration: 86507
loss: 1.1195971965789795,grad_norm: 0.9999997401873402, iteration: 86508
loss: 1.006121277809143,grad_norm: 0.7795241105932593, iteration: 86509
loss: 1.0118968486785889,grad_norm: 0.9999992018065104, iteration: 86510
loss: 0.9647287726402283,grad_norm: 0.8571323960309749, iteration: 86511
loss: 0.984478235244751,grad_norm: 0.9999990686628205, iteration: 86512
loss: 0.9782176613807678,grad_norm: 0.9999990865218087, iteration: 86513
loss: 1.035699486732483,grad_norm: 0.999999219181959, iteration: 86514
loss: 0.9726092219352722,grad_norm: 0.9999990328565735, iteration: 86515
loss: 1.0101410150527954,grad_norm: 0.9999994196114415, iteration: 86516
loss: 1.0305663347244263,grad_norm: 0.9999994317176755, iteration: 86517
loss: 1.1344480514526367,grad_norm: 0.9999990168947692, iteration: 86518
loss: 1.0319304466247559,grad_norm: 0.9999997255089152, iteration: 86519
loss: 1.020808219909668,grad_norm: 0.9818289516940557, iteration: 86520
loss: 0.9827175736427307,grad_norm: 0.8945664855098777, iteration: 86521
loss: 0.9942333698272705,grad_norm: 0.8894485028503805, iteration: 86522
loss: 1.0534864664077759,grad_norm: 0.9417225783005477, iteration: 86523
loss: 1.0527764558792114,grad_norm: 0.999999306594262, iteration: 86524
loss: 1.024682641029358,grad_norm: 0.9999993386386303, iteration: 86525
loss: 1.0075205564498901,grad_norm: 0.9297417679643675, iteration: 86526
loss: 1.0338373184204102,grad_norm: 0.9999989780184947, iteration: 86527
loss: 0.9875518679618835,grad_norm: 0.9150553378418989, iteration: 86528
loss: 0.9786949753761292,grad_norm: 0.8097686747080084, iteration: 86529
loss: 1.0380840301513672,grad_norm: 0.9999990344394426, iteration: 86530
loss: 1.0093039274215698,grad_norm: 0.7841467973745077, iteration: 86531
loss: 0.9905096888542175,grad_norm: 0.9999990307414849, iteration: 86532
loss: 1.000723123550415,grad_norm: 0.8786431841576817, iteration: 86533
loss: 1.0296751260757446,grad_norm: 0.9999990262780181, iteration: 86534
loss: 1.025037169456482,grad_norm: 0.8485770871298253, iteration: 86535
loss: 0.9491257667541504,grad_norm: 0.8943622817237394, iteration: 86536
loss: 1.0224367380142212,grad_norm: 0.9999989886117441, iteration: 86537
loss: 0.99259352684021,grad_norm: 0.9999990019175019, iteration: 86538
loss: 1.0227583646774292,grad_norm: 0.8894579071339603, iteration: 86539
loss: 1.0276470184326172,grad_norm: 0.9999992859218427, iteration: 86540
loss: 0.9880529046058655,grad_norm: 0.9344680384151776, iteration: 86541
loss: 1.0241003036499023,grad_norm: 0.9999996252748296, iteration: 86542
loss: 0.9492840766906738,grad_norm: 0.9999992412137937, iteration: 86543
loss: 1.0493557453155518,grad_norm: 0.9272743135449726, iteration: 86544
loss: 1.0207041501998901,grad_norm: 0.8508301520837473, iteration: 86545
loss: 0.9721661806106567,grad_norm: 0.8425163544807407, iteration: 86546
loss: 1.0492891073226929,grad_norm: 0.9063438980591957, iteration: 86547
loss: 1.0040581226348877,grad_norm: 0.9340888504332715, iteration: 86548
loss: 1.0278904438018799,grad_norm: 0.9758406993757982, iteration: 86549
loss: 1.0753554105758667,grad_norm: 0.9999996614164058, iteration: 86550
loss: 0.9628714919090271,grad_norm: 0.8612041925359988, iteration: 86551
loss: 1.0847655534744263,grad_norm: 0.9999999025077609, iteration: 86552
loss: 1.0201035737991333,grad_norm: 0.8702905861531749, iteration: 86553
loss: 1.0473955869674683,grad_norm: 0.8942908181340874, iteration: 86554
loss: 0.9951943159103394,grad_norm: 0.8977973421427407, iteration: 86555
loss: 0.9963934421539307,grad_norm: 0.9888621103752734, iteration: 86556
loss: 1.0671706199645996,grad_norm: 0.9999991228496297, iteration: 86557
loss: 0.998694658279419,grad_norm: 0.8514965550885961, iteration: 86558
loss: 1.0077065229415894,grad_norm: 0.9818203439929263, iteration: 86559
loss: 0.9874926805496216,grad_norm: 0.9120454805822038, iteration: 86560
loss: 0.9979360103607178,grad_norm: 0.8017296601444841, iteration: 86561
loss: 0.9752013087272644,grad_norm: 0.9294132589525949, iteration: 86562
loss: 1.0162774324417114,grad_norm: 0.9999993863492687, iteration: 86563
loss: 0.9974158406257629,grad_norm: 0.9607612601077489, iteration: 86564
loss: 1.0139940977096558,grad_norm: 0.7400252202758969, iteration: 86565
loss: 1.0191099643707275,grad_norm: 0.9999990208081441, iteration: 86566
loss: 0.9969182014465332,grad_norm: 0.9770361155116918, iteration: 86567
loss: 1.0118976831436157,grad_norm: 0.7915368364141312, iteration: 86568
loss: 0.9786534309387207,grad_norm: 0.8755457146061089, iteration: 86569
loss: 1.0232843160629272,grad_norm: 0.99999907771518, iteration: 86570
loss: 1.0437564849853516,grad_norm: 0.8824750440759546, iteration: 86571
loss: 0.9939743280410767,grad_norm: 0.9999998542563951, iteration: 86572
loss: 1.128857135772705,grad_norm: 0.999999694300068, iteration: 86573
loss: 1.058454155921936,grad_norm: 0.9999993124616773, iteration: 86574
loss: 0.9930792450904846,grad_norm: 0.9527332964435077, iteration: 86575
loss: 1.0765600204467773,grad_norm: 0.9999990876822132, iteration: 86576
loss: 1.0600297451019287,grad_norm: 0.9999992285985461, iteration: 86577
loss: 1.0034376382827759,grad_norm: 0.9999990386008195, iteration: 86578
loss: 1.0602056980133057,grad_norm: 0.999999925420728, iteration: 86579
loss: 0.9726626873016357,grad_norm: 0.9308915378787069, iteration: 86580
loss: 0.967458188533783,grad_norm: 0.9999991156328164, iteration: 86581
loss: 0.9958092570304871,grad_norm: 0.9123637633327559, iteration: 86582
loss: 0.9863618612289429,grad_norm: 0.8528206443522958, iteration: 86583
loss: 1.0302408933639526,grad_norm: 0.9772486522742546, iteration: 86584
loss: 0.9965662360191345,grad_norm: 0.907244782335303, iteration: 86585
loss: 1.0007472038269043,grad_norm: 0.8696082264623358, iteration: 86586
loss: 0.9940769672393799,grad_norm: 0.9057124307646002, iteration: 86587
loss: 1.0066380500793457,grad_norm: 0.9999994380406483, iteration: 86588
loss: 0.9997658729553223,grad_norm: 0.9701289806294335, iteration: 86589
loss: 0.9982014298439026,grad_norm: 0.999999059452873, iteration: 86590
loss: 1.002910852432251,grad_norm: 0.9709462104042936, iteration: 86591
loss: 1.0638303756713867,grad_norm: 0.9999991046657274, iteration: 86592
loss: 1.0287525653839111,grad_norm: 0.8957764617151831, iteration: 86593
loss: 1.0535567998886108,grad_norm: 0.9999994391490398, iteration: 86594
loss: 1.0058274269104004,grad_norm: 0.9999993082433475, iteration: 86595
loss: 0.9979283809661865,grad_norm: 0.8772518737013641, iteration: 86596
loss: 1.0267634391784668,grad_norm: 0.999999618562586, iteration: 86597
loss: 1.091530442237854,grad_norm: 0.9134080531391527, iteration: 86598
loss: 0.979786217212677,grad_norm: 0.9216281598831383, iteration: 86599
loss: 1.009035348892212,grad_norm: 0.7757713938740282, iteration: 86600
loss: 1.0119825601577759,grad_norm: 0.9999990730725732, iteration: 86601
loss: 0.9694091081619263,grad_norm: 0.999999221949359, iteration: 86602
loss: 1.000379204750061,grad_norm: 0.864195508686576, iteration: 86603
loss: 1.0869475603103638,grad_norm: 0.9165403371187075, iteration: 86604
loss: 1.0733550786972046,grad_norm: 0.999999570552522, iteration: 86605
loss: 1.0753706693649292,grad_norm: 0.9999993256172789, iteration: 86606
loss: 1.0103482007980347,grad_norm: 0.9683563441023315, iteration: 86607
loss: 1.0132777690887451,grad_norm: 0.9999990702688061, iteration: 86608
loss: 1.0916829109191895,grad_norm: 0.9999992831963401, iteration: 86609
loss: 1.060978889465332,grad_norm: 0.999999545012602, iteration: 86610
loss: 1.0706126689910889,grad_norm: 0.9999990234251084, iteration: 86611
loss: 1.0667904615402222,grad_norm: 0.9999991586947028, iteration: 86612
loss: 1.0339243412017822,grad_norm: 0.9999992874802024, iteration: 86613
loss: 0.97496098279953,grad_norm: 0.9659932018593781, iteration: 86614
loss: 0.9837085008621216,grad_norm: 0.8694642194885546, iteration: 86615
loss: 1.026948094367981,grad_norm: 0.842571093468333, iteration: 86616
loss: 0.994867742061615,grad_norm: 0.8749287134179676, iteration: 86617
loss: 1.016589641571045,grad_norm: 0.8957953386924472, iteration: 86618
loss: 1.0259108543395996,grad_norm: 0.9999992919049777, iteration: 86619
loss: 1.0551215410232544,grad_norm: 0.9922536137913843, iteration: 86620
loss: 0.9685828685760498,grad_norm: 0.9999990712951398, iteration: 86621
loss: 1.0276612043380737,grad_norm: 0.9999989599894956, iteration: 86622
loss: 0.9967473745346069,grad_norm: 0.8957235337208665, iteration: 86623
loss: 0.9970960021018982,grad_norm: 0.9978750407123322, iteration: 86624
loss: 0.9934954643249512,grad_norm: 0.9999990888036205, iteration: 86625
loss: 1.0024968385696411,grad_norm: 0.9999990983529481, iteration: 86626
loss: 1.0308126211166382,grad_norm: 0.9620765248440352, iteration: 86627
loss: 0.9920844435691833,grad_norm: 0.9999991277943319, iteration: 86628
loss: 0.990021824836731,grad_norm: 0.9506550472224442, iteration: 86629
loss: 1.0313913822174072,grad_norm: 0.9999990159261019, iteration: 86630
loss: 1.0082372426986694,grad_norm: 0.9999990722180285, iteration: 86631
loss: 1.1691700220108032,grad_norm: 0.9999998262415635, iteration: 86632
loss: 0.9817107319831848,grad_norm: 0.9963669863139031, iteration: 86633
loss: 1.0170776844024658,grad_norm: 0.9037566196962394, iteration: 86634
loss: 0.9744307398796082,grad_norm: 0.9593242446329218, iteration: 86635
loss: 1.0052562952041626,grad_norm: 0.9015871343706298, iteration: 86636
loss: 0.9692314267158508,grad_norm: 0.9808055312997488, iteration: 86637
loss: 0.9776927828788757,grad_norm: 0.9999992075955652, iteration: 86638
loss: 1.0664514303207397,grad_norm: 0.8571687651975366, iteration: 86639
loss: 0.9960809350013733,grad_norm: 0.9999991162717378, iteration: 86640
loss: 0.9917770624160767,grad_norm: 0.9999992105367479, iteration: 86641
loss: 1.011122226715088,grad_norm: 0.9999994660670882, iteration: 86642
loss: 0.9842868447303772,grad_norm: 0.9263030681810662, iteration: 86643
loss: 0.9955604672431946,grad_norm: 0.9539331633461869, iteration: 86644
loss: 0.9960203170776367,grad_norm: 0.9546071307294998, iteration: 86645
loss: 1.0071654319763184,grad_norm: 0.9999991488111336, iteration: 86646
loss: 1.0148320198059082,grad_norm: 0.9908683924997925, iteration: 86647
loss: 0.9809373617172241,grad_norm: 0.855412346645326, iteration: 86648
loss: 1.136046290397644,grad_norm: 0.9999995138816433, iteration: 86649
loss: 1.0374342203140259,grad_norm: 0.9999989691389388, iteration: 86650
loss: 0.988788902759552,grad_norm: 0.8515731010030432, iteration: 86651
loss: 1.0537090301513672,grad_norm: 0.9999990828420727, iteration: 86652
loss: 1.01626718044281,grad_norm: 0.9760798698005372, iteration: 86653
loss: 1.032049536705017,grad_norm: 0.9999994968325521, iteration: 86654
loss: 0.9866844415664673,grad_norm: 0.9999995654176725, iteration: 86655
loss: 1.019364833831787,grad_norm: 0.9704886831568482, iteration: 86656
loss: 1.0116488933563232,grad_norm: 0.9946884741365275, iteration: 86657
loss: 1.1770871877670288,grad_norm: 0.9999997681702733, iteration: 86658
loss: 1.1503413915634155,grad_norm: 0.9999993664220057, iteration: 86659
loss: 0.9878776669502258,grad_norm: 0.9999990479259137, iteration: 86660
loss: 0.9976909160614014,grad_norm: 0.9999991985125799, iteration: 86661
loss: 1.0162395238876343,grad_norm: 0.9419474019951222, iteration: 86662
loss: 1.1017919778823853,grad_norm: 0.8128434379213375, iteration: 86663
loss: 0.9918456673622131,grad_norm: 0.8776066899628568, iteration: 86664
loss: 1.0263115167617798,grad_norm: 0.9999990547428422, iteration: 86665
loss: 1.1047941446304321,grad_norm: 0.999999561962822, iteration: 86666
loss: 1.0059177875518799,grad_norm: 0.8518792462257258, iteration: 86667
loss: 1.0070598125457764,grad_norm: 0.8899415938245968, iteration: 86668
loss: 1.0976333618164062,grad_norm: 0.9999999442767151, iteration: 86669
loss: 0.9909840226173401,grad_norm: 0.9999991779364172, iteration: 86670
loss: 1.0883809328079224,grad_norm: 0.9999996539616292, iteration: 86671
loss: 1.0576462745666504,grad_norm: 0.9999990297853399, iteration: 86672
loss: 1.0383803844451904,grad_norm: 0.9999993201730776, iteration: 86673
loss: 1.0813673734664917,grad_norm: 0.9999990546620057, iteration: 86674
loss: 0.9962151050567627,grad_norm: 0.9999991720520649, iteration: 86675
loss: 0.9827077984809875,grad_norm: 0.999999154998254, iteration: 86676
loss: 0.9915758371353149,grad_norm: 0.9999990835252285, iteration: 86677
loss: 1.0570101737976074,grad_norm: 0.9999993053031527, iteration: 86678
loss: 1.005990743637085,grad_norm: 0.9999990800390881, iteration: 86679
loss: 0.9974788427352905,grad_norm: 0.8878648556992185, iteration: 86680
loss: 1.0198246240615845,grad_norm: 0.9340618944382548, iteration: 86681
loss: 1.1109062433242798,grad_norm: 0.9999993842320315, iteration: 86682
loss: 1.0780210494995117,grad_norm: 0.9999998453546144, iteration: 86683
loss: 1.0043346881866455,grad_norm: 0.9999994180455148, iteration: 86684
loss: 0.9868823289871216,grad_norm: 0.9999994880767619, iteration: 86685
loss: 1.1673877239227295,grad_norm: 0.999999196559072, iteration: 86686
loss: 1.0057482719421387,grad_norm: 0.9999990940213355, iteration: 86687
loss: 1.0328245162963867,grad_norm: 0.9999998514443772, iteration: 86688
loss: 1.0013179779052734,grad_norm: 0.9999999257002885, iteration: 86689
loss: 1.1179468631744385,grad_norm: 0.9999997843231527, iteration: 86690
loss: 1.0221526622772217,grad_norm: 0.9999990575090851, iteration: 86691
loss: 0.9986729621887207,grad_norm: 0.974081691063882, iteration: 86692
loss: 1.0356416702270508,grad_norm: 0.9999999297217725, iteration: 86693
loss: 1.0011931657791138,grad_norm: 0.8894651148574247, iteration: 86694
loss: 1.0749270915985107,grad_norm: 0.9999992068846955, iteration: 86695
loss: 1.1390643119812012,grad_norm: 0.9999997856043623, iteration: 86696
loss: 1.055692434310913,grad_norm: 0.8219183209276458, iteration: 86697
loss: 1.0658165216445923,grad_norm: 0.9999991864146753, iteration: 86698
loss: 1.056906819343567,grad_norm: 0.9999996813294963, iteration: 86699
loss: 1.04865562915802,grad_norm: 0.9999992899096805, iteration: 86700
loss: 1.1087878942489624,grad_norm: 0.975933595780121, iteration: 86701
loss: 1.0114399194717407,grad_norm: 0.9999992674511783, iteration: 86702
loss: 0.9980288743972778,grad_norm: 0.9999994140948489, iteration: 86703
loss: 1.027362585067749,grad_norm: 0.9999991178398147, iteration: 86704
loss: 1.0231777429580688,grad_norm: 0.9999990925139512, iteration: 86705
loss: 0.9743625521659851,grad_norm: 0.999999199724715, iteration: 86706
loss: 1.0529038906097412,grad_norm: 0.9999994386614555, iteration: 86707
loss: 0.9692326188087463,grad_norm: 0.9999989276046628, iteration: 86708
loss: 1.042291283607483,grad_norm: 0.9999997733398096, iteration: 86709
loss: 0.9834977388381958,grad_norm: 0.9682274281060574, iteration: 86710
loss: 1.0950247049331665,grad_norm: 0.9999994083350667, iteration: 86711
loss: 1.0147966146469116,grad_norm: 0.9999991421218455, iteration: 86712
loss: 1.0670186281204224,grad_norm: 0.9999994516430333, iteration: 86713
loss: 1.0687884092330933,grad_norm: 0.9999998317760347, iteration: 86714
loss: 0.9860743284225464,grad_norm: 0.938649667120865, iteration: 86715
loss: 1.0121221542358398,grad_norm: 0.9647612368557678, iteration: 86716
loss: 1.043460488319397,grad_norm: 0.9999997508884929, iteration: 86717
loss: 1.0550919771194458,grad_norm: 0.9999995288508923, iteration: 86718
loss: 0.9609047174453735,grad_norm: 0.8991566531110075, iteration: 86719
loss: 1.034253716468811,grad_norm: 0.9999990098962294, iteration: 86720
loss: 1.012133240699768,grad_norm: 0.9017733311792071, iteration: 86721
loss: 1.0152751207351685,grad_norm: 0.8783919652702707, iteration: 86722
loss: 1.0873839855194092,grad_norm: 0.9999991333083637, iteration: 86723
loss: 1.0307888984680176,grad_norm: 0.9999996879864393, iteration: 86724
loss: 1.0076795816421509,grad_norm: 0.9224532680562549, iteration: 86725
loss: 0.9950898289680481,grad_norm: 0.9851924237412921, iteration: 86726
loss: 0.9483703970909119,grad_norm: 0.9999995353164297, iteration: 86727
loss: 1.0495502948760986,grad_norm: 0.9999998283010644, iteration: 86728
loss: 1.0713188648223877,grad_norm: 0.9999998309416955, iteration: 86729
loss: 1.0096666812896729,grad_norm: 0.9095689995868383, iteration: 86730
loss: 1.0687652826309204,grad_norm: 0.9999995113647239, iteration: 86731
loss: 1.0202323198318481,grad_norm: 0.9999990378717817, iteration: 86732
loss: 1.0159837007522583,grad_norm: 0.9999998310952548, iteration: 86733
loss: 1.0291980504989624,grad_norm: 0.9999996015409146, iteration: 86734
loss: 1.1381837129592896,grad_norm: 0.9999997887107449, iteration: 86735
loss: 1.0307930707931519,grad_norm: 0.9999999104197284, iteration: 86736
loss: 1.035564661026001,grad_norm: 0.9999992566016339, iteration: 86737
loss: 1.0323995351791382,grad_norm: 0.8747773424242259, iteration: 86738
loss: 0.9771167635917664,grad_norm: 0.999999098723058, iteration: 86739
loss: 1.0181409120559692,grad_norm: 0.9999990449066827, iteration: 86740
loss: 1.0143485069274902,grad_norm: 0.9999995505817278, iteration: 86741
loss: 1.014853835105896,grad_norm: 0.9999990616014638, iteration: 86742
loss: 1.0649316310882568,grad_norm: 0.9999995334173848, iteration: 86743
loss: 0.9986345767974854,grad_norm: 0.9563316474197249, iteration: 86744
loss: 0.9834173321723938,grad_norm: 0.9080861071501731, iteration: 86745
loss: 1.0524964332580566,grad_norm: 0.9999991077735797, iteration: 86746
loss: 1.0245622396469116,grad_norm: 0.8978026915778092, iteration: 86747
loss: 1.0462055206298828,grad_norm: 0.9559134894506959, iteration: 86748
loss: 0.9739892482757568,grad_norm: 0.9999991884416216, iteration: 86749
loss: 0.9741790890693665,grad_norm: 0.9723034952006887, iteration: 86750
loss: 1.000694751739502,grad_norm: 0.9996454042319904, iteration: 86751
loss: 1.0216466188430786,grad_norm: 0.9999991373180722, iteration: 86752
loss: 1.032457709312439,grad_norm: 0.9471324923726868, iteration: 86753
loss: 1.0353481769561768,grad_norm: 0.9191402250540054, iteration: 86754
loss: 0.9629878997802734,grad_norm: 0.9999991395838996, iteration: 86755
loss: 0.9706929922103882,grad_norm: 0.9999992256780666, iteration: 86756
loss: 1.0027881860733032,grad_norm: 0.999999492214463, iteration: 86757
loss: 1.002994418144226,grad_norm: 0.8721445297670495, iteration: 86758
loss: 1.1244728565216064,grad_norm: 0.9999999738608827, iteration: 86759
loss: 1.0681477785110474,grad_norm: 0.9096819144576278, iteration: 86760
loss: 0.9902321696281433,grad_norm: 0.999999111128423, iteration: 86761
loss: 1.0561131238937378,grad_norm: 0.9999993134716182, iteration: 86762
loss: 1.0526723861694336,grad_norm: 0.9999997572717629, iteration: 86763
loss: 0.9859267473220825,grad_norm: 0.9999991857059091, iteration: 86764
loss: 0.9657986164093018,grad_norm: 0.7931004190746354, iteration: 86765
loss: 1.025623083114624,grad_norm: 0.9999992085857927, iteration: 86766
loss: 1.0613038539886475,grad_norm: 0.9999996912290022, iteration: 86767
loss: 0.9977207779884338,grad_norm: 0.9999995353868517, iteration: 86768
loss: 0.9511629343032837,grad_norm: 0.9407210639448254, iteration: 86769
loss: 0.9904565215110779,grad_norm: 0.8950332506760552, iteration: 86770
loss: 1.017526388168335,grad_norm: 0.8399338631008941, iteration: 86771
loss: 1.0642459392547607,grad_norm: 0.9999996547493942, iteration: 86772
loss: 0.9942859411239624,grad_norm: 0.9464199119509291, iteration: 86773
loss: 1.051367163658142,grad_norm: 0.9507376881184284, iteration: 86774
loss: 0.9881566166877747,grad_norm: 0.9902989970926434, iteration: 86775
loss: 0.9790899753570557,grad_norm: 0.8860584265627519, iteration: 86776
loss: 0.9809058904647827,grad_norm: 0.9974796176811126, iteration: 86777
loss: 1.0048452615737915,grad_norm: 0.8822791496575323, iteration: 86778
loss: 1.0206999778747559,grad_norm: 0.8872363217223285, iteration: 86779
loss: 0.9912664890289307,grad_norm: 0.9909510096196115, iteration: 86780
loss: 1.0467581748962402,grad_norm: 0.9999993359021712, iteration: 86781
loss: 1.0492746829986572,grad_norm: 0.9212729010535875, iteration: 86782
loss: 1.0341227054595947,grad_norm: 0.9999990924148043, iteration: 86783
loss: 1.0325064659118652,grad_norm: 0.9999996157593275, iteration: 86784
loss: 1.013199806213379,grad_norm: 0.8162649009138777, iteration: 86785
loss: 1.0657836198806763,grad_norm: 0.9999990886243253, iteration: 86786
loss: 1.0097789764404297,grad_norm: 0.9999995022204764, iteration: 86787
loss: 0.9851039052009583,grad_norm: 0.9999990185610187, iteration: 86788
loss: 1.0435314178466797,grad_norm: 0.9999991667359066, iteration: 86789
loss: 0.9929399490356445,grad_norm: 0.9999999428385864, iteration: 86790
loss: 1.0097568035125732,grad_norm: 0.999999386920045, iteration: 86791
loss: 1.0057346820831299,grad_norm: 0.7381416095207108, iteration: 86792
loss: 1.0393389463424683,grad_norm: 0.9999992675627613, iteration: 86793
loss: 1.0235933065414429,grad_norm: 0.999999848481052, iteration: 86794
loss: 1.0190017223358154,grad_norm: 0.9999993523591518, iteration: 86795
loss: 1.0379918813705444,grad_norm: 0.9999992250029874, iteration: 86796
loss: 1.057852864265442,grad_norm: 0.9999991527104037, iteration: 86797
loss: 1.039194107055664,grad_norm: 0.9999993624350095, iteration: 86798
loss: 0.9738353490829468,grad_norm: 0.9928386645722088, iteration: 86799
loss: 1.016999363899231,grad_norm: 0.9994478016930961, iteration: 86800
loss: 1.14483642578125,grad_norm: 0.9999996545058086, iteration: 86801
loss: 0.998548686504364,grad_norm: 0.7881373528798826, iteration: 86802
loss: 1.0478025674819946,grad_norm: 0.9999990187166979, iteration: 86803
loss: 0.9647982120513916,grad_norm: 0.9022610628111566, iteration: 86804
loss: 1.0241570472717285,grad_norm: 0.9999990640567848, iteration: 86805
loss: 1.018548846244812,grad_norm: 0.9115721088304765, iteration: 86806
loss: 1.0116335153579712,grad_norm: 0.9999990407482853, iteration: 86807
loss: 0.9643725156784058,grad_norm: 0.8387565475054891, iteration: 86808
loss: 1.0311088562011719,grad_norm: 0.9999992483852747, iteration: 86809
loss: 1.0126793384552002,grad_norm: 0.9999993088478881, iteration: 86810
loss: 1.0021469593048096,grad_norm: 0.8930065724073571, iteration: 86811
loss: 1.014039397239685,grad_norm: 0.9999991783598444, iteration: 86812
loss: 1.0596472024917603,grad_norm: 0.999999361302636, iteration: 86813
loss: 0.9892212748527527,grad_norm: 0.8159518696173639, iteration: 86814
loss: 1.0017640590667725,grad_norm: 0.9507983234105654, iteration: 86815
loss: 0.9923081398010254,grad_norm: 0.9999992490282625, iteration: 86816
loss: 0.9500762820243835,grad_norm: 0.9553076072314154, iteration: 86817
loss: 1.052664041519165,grad_norm: 0.9999995161123417, iteration: 86818
loss: 1.068221092224121,grad_norm: 0.9999995702295965, iteration: 86819
loss: 1.0325734615325928,grad_norm: 0.8349592725584831, iteration: 86820
loss: 1.0047789812088013,grad_norm: 0.9999989821706717, iteration: 86821
loss: 1.006691575050354,grad_norm: 0.790873293798565, iteration: 86822
loss: 1.0112097263336182,grad_norm: 0.9999991138776849, iteration: 86823
loss: 0.980507493019104,grad_norm: 0.8926145545182128, iteration: 86824
loss: 1.0086442232131958,grad_norm: 0.9999990011152813, iteration: 86825
loss: 1.0672590732574463,grad_norm: 0.908356033578233, iteration: 86826
loss: 1.0352089405059814,grad_norm: 0.999999654196724, iteration: 86827
loss: 0.9812799692153931,grad_norm: 0.9999995144183501, iteration: 86828
loss: 1.0382050275802612,grad_norm: 0.9999992258744246, iteration: 86829
loss: 0.9709633588790894,grad_norm: 0.8776435788117558, iteration: 86830
loss: 0.9966250658035278,grad_norm: 0.8307020873861641, iteration: 86831
loss: 1.0020405054092407,grad_norm: 0.999998908519232, iteration: 86832
loss: 1.0125147104263306,grad_norm: 0.9999990992202574, iteration: 86833
loss: 1.0046089887619019,grad_norm: 0.9734068099214865, iteration: 86834
loss: 1.0075078010559082,grad_norm: 0.9999996890079834, iteration: 86835
loss: 1.0242892503738403,grad_norm: 0.9999991563330148, iteration: 86836
loss: 1.007596492767334,grad_norm: 0.8917357646606978, iteration: 86837
loss: 1.0403615236282349,grad_norm: 0.9999991774774584, iteration: 86838
loss: 0.9861351251602173,grad_norm: 0.9999991872974207, iteration: 86839
loss: 1.0056885480880737,grad_norm: 0.9999993532190875, iteration: 86840
loss: 0.9910317659378052,grad_norm: 0.9154872203501749, iteration: 86841
loss: 1.0431315898895264,grad_norm: 0.9999992227502221, iteration: 86842
loss: 1.0059152841567993,grad_norm: 0.8066609634238185, iteration: 86843
loss: 1.0068416595458984,grad_norm: 0.9412841724341175, iteration: 86844
loss: 0.994651734828949,grad_norm: 0.741629888868096, iteration: 86845
loss: 0.9604664444923401,grad_norm: 0.9999991385458361, iteration: 86846
loss: 1.0143448114395142,grad_norm: 0.8843906888822233, iteration: 86847
loss: 1.0057238340377808,grad_norm: 0.9999989714566153, iteration: 86848
loss: 0.9970062971115112,grad_norm: 0.9449958845531751, iteration: 86849
loss: 0.9969972372055054,grad_norm: 0.8917480663598243, iteration: 86850
loss: 1.0043482780456543,grad_norm: 0.9477698908249481, iteration: 86851
loss: 1.0536749362945557,grad_norm: 0.9999991429922549, iteration: 86852
loss: 1.022284984588623,grad_norm: 0.9134818807988301, iteration: 86853
loss: 1.0040498971939087,grad_norm: 0.9254114378959829, iteration: 86854
loss: 0.9974329471588135,grad_norm: 0.8596613931645337, iteration: 86855
loss: 1.004855990409851,grad_norm: 0.8107812150371323, iteration: 86856
loss: 0.9653609991073608,grad_norm: 0.9999991442839596, iteration: 86857
loss: 0.9763035774230957,grad_norm: 0.7901857212119251, iteration: 86858
loss: 0.9968323111534119,grad_norm: 0.9999991743143269, iteration: 86859
loss: 1.0193164348602295,grad_norm: 0.9999992174415763, iteration: 86860
loss: 1.0103594064712524,grad_norm: 0.7912084924752382, iteration: 86861
loss: 1.0308337211608887,grad_norm: 0.97577139861739, iteration: 86862
loss: 1.0017356872558594,grad_norm: 0.9157454323088967, iteration: 86863
loss: 0.9968728423118591,grad_norm: 0.8688747270411759, iteration: 86864
loss: 0.989470362663269,grad_norm: 0.8929136454303661, iteration: 86865
loss: 0.9840799570083618,grad_norm: 0.7466119433718386, iteration: 86866
loss: 1.0214579105377197,grad_norm: 0.9128541384658807, iteration: 86867
loss: 1.1150137186050415,grad_norm: 0.9999997401904868, iteration: 86868
loss: 1.0033395290374756,grad_norm: 0.8350873630440058, iteration: 86869
loss: 1.0564886331558228,grad_norm: 0.999999690501232, iteration: 86870
loss: 1.038515567779541,grad_norm: 0.8432405300071739, iteration: 86871
loss: 0.9947935938835144,grad_norm: 0.8304402328416413, iteration: 86872
loss: 0.9749559164047241,grad_norm: 0.8804879521827251, iteration: 86873
loss: 1.0312162637710571,grad_norm: 0.9999991488182344, iteration: 86874
loss: 1.098366141319275,grad_norm: 0.9999993022425827, iteration: 86875
loss: 0.9908809065818787,grad_norm: 0.9530318592631313, iteration: 86876
loss: 1.034270167350769,grad_norm: 0.9999993091798854, iteration: 86877
loss: 0.9932812452316284,grad_norm: 0.8901743517380543, iteration: 86878
loss: 1.0321999788284302,grad_norm: 0.9999996651382098, iteration: 86879
loss: 1.035954475402832,grad_norm: 0.9291706005091457, iteration: 86880
loss: 1.1329175233840942,grad_norm: 0.9999998592313641, iteration: 86881
loss: 1.0193761587142944,grad_norm: 0.9998757673253996, iteration: 86882
loss: 1.0194668769836426,grad_norm: 0.9999990497193534, iteration: 86883
loss: 0.9771724939346313,grad_norm: 0.9317295678831216, iteration: 86884
loss: 1.0281792879104614,grad_norm: 0.9999993553302697, iteration: 86885
loss: 1.0011569261550903,grad_norm: 0.8995924579962501, iteration: 86886
loss: 1.069638729095459,grad_norm: 0.8991528620952571, iteration: 86887
loss: 0.972341001033783,grad_norm: 0.9999989638100588, iteration: 86888
loss: 1.0188868045806885,grad_norm: 0.8615695640763699, iteration: 86889
loss: 0.9969518780708313,grad_norm: 0.9999990378614705, iteration: 86890
loss: 1.0017801523208618,grad_norm: 0.9999990773520545, iteration: 86891
loss: 1.0102583169937134,grad_norm: 0.9999991304698128, iteration: 86892
loss: 1.030107855796814,grad_norm: 0.9028138349312917, iteration: 86893
loss: 0.9906279444694519,grad_norm: 0.9999994390993004, iteration: 86894
loss: 0.992466151714325,grad_norm: 0.9999997270680001, iteration: 86895
loss: 1.0007755756378174,grad_norm: 0.9371209116656627, iteration: 86896
loss: 0.9965974688529968,grad_norm: 0.9999990777152363, iteration: 86897
loss: 0.998289942741394,grad_norm: 0.9999993467515652, iteration: 86898
loss: 0.9776953458786011,grad_norm: 0.9999990829160987, iteration: 86899
loss: 1.0006901025772095,grad_norm: 0.9999998415936617, iteration: 86900
loss: 1.106526494026184,grad_norm: 0.9999990632908025, iteration: 86901
loss: 1.0051679611206055,grad_norm: 0.8329142963379553, iteration: 86902
loss: 1.0261598825454712,grad_norm: 0.999999255526509, iteration: 86903
loss: 1.000142216682434,grad_norm: 0.9750715942334776, iteration: 86904
loss: 0.98832106590271,grad_norm: 0.9999991640289755, iteration: 86905
loss: 1.0189988613128662,grad_norm: 0.999999018261604, iteration: 86906
loss: 0.9795857071876526,grad_norm: 0.9999991330090985, iteration: 86907
loss: 0.9920757412910461,grad_norm: 0.9054002888972568, iteration: 86908
loss: 0.9586147665977478,grad_norm: 0.957779296448175, iteration: 86909
loss: 0.992459237575531,grad_norm: 0.9999990694838758, iteration: 86910
loss: 1.0299428701400757,grad_norm: 0.8934604639389593, iteration: 86911
loss: 1.0192657709121704,grad_norm: 0.9999990799998573, iteration: 86912
loss: 1.0216519832611084,grad_norm: 0.9999998288361314, iteration: 86913
loss: 0.9702991843223572,grad_norm: 0.7951060819930907, iteration: 86914
loss: 1.0156726837158203,grad_norm: 0.9999999471835151, iteration: 86915
loss: 0.9843518733978271,grad_norm: 0.9242111334155392, iteration: 86916
loss: 1.027658224105835,grad_norm: 0.9999998922170305, iteration: 86917
loss: 1.025922417640686,grad_norm: 0.9999991016958337, iteration: 86918
loss: 1.0454953908920288,grad_norm: 0.9999999643315207, iteration: 86919
loss: 1.0058921575546265,grad_norm: 0.8276392316700382, iteration: 86920
loss: 1.0029377937316895,grad_norm: 0.8736305963552812, iteration: 86921
loss: 0.9988069534301758,grad_norm: 0.999999228510193, iteration: 86922
loss: 1.1010894775390625,grad_norm: 0.9999991626993305, iteration: 86923
loss: 1.0533816814422607,grad_norm: 0.999999594307894, iteration: 86924
loss: 1.083817958831787,grad_norm: 0.999999795435237, iteration: 86925
loss: 0.9956557154655457,grad_norm: 0.8277202705402437, iteration: 86926
loss: 0.9898836016654968,grad_norm: 0.8710117151525526, iteration: 86927
loss: 1.0646517276763916,grad_norm: 0.9999993518919115, iteration: 86928
loss: 1.0133823156356812,grad_norm: 0.871833562138877, iteration: 86929
loss: 1.0658159255981445,grad_norm: 0.9999990487405108, iteration: 86930
loss: 1.0119426250457764,grad_norm: 0.8830044414466119, iteration: 86931
loss: 1.0328513383865356,grad_norm: 0.9999996092303192, iteration: 86932
loss: 1.0966546535491943,grad_norm: 0.9999997109808471, iteration: 86933
loss: 1.0286945104599,grad_norm: 0.9999990090924877, iteration: 86934
loss: 0.9750881195068359,grad_norm: 0.8245246329697555, iteration: 86935
loss: 1.039323329925537,grad_norm: 0.867514121545457, iteration: 86936
loss: 1.0592008829116821,grad_norm: 0.9999996123577182, iteration: 86937
loss: 0.9792657494544983,grad_norm: 0.9224343542116535, iteration: 86938
loss: 0.9769470691680908,grad_norm: 0.999999166510225, iteration: 86939
loss: 1.0419872999191284,grad_norm: 0.9022883383415303, iteration: 86940
loss: 1.0177524089813232,grad_norm: 0.9628813235661545, iteration: 86941
loss: 0.9569699168205261,grad_norm: 0.9999990809082474, iteration: 86942
loss: 1.0421020984649658,grad_norm: 0.9999996109409675, iteration: 86943
loss: 1.0189096927642822,grad_norm: 0.9152039655428721, iteration: 86944
loss: 1.001658320426941,grad_norm: 0.9999995408252046, iteration: 86945
loss: 1.0039935111999512,grad_norm: 0.9999991362769665, iteration: 86946
loss: 1.0505025386810303,grad_norm: 0.8690183167975966, iteration: 86947
loss: 1.022332787513733,grad_norm: 0.9999993384568795, iteration: 86948
loss: 1.004586100578308,grad_norm: 0.9366191750573408, iteration: 86949
loss: 1.053230881690979,grad_norm: 0.9999992154363119, iteration: 86950
loss: 1.010879635810852,grad_norm: 0.9999990651488574, iteration: 86951
loss: 1.0232288837432861,grad_norm: 0.954100189762283, iteration: 86952
loss: 1.0245343446731567,grad_norm: 0.8768108768402719, iteration: 86953
loss: 1.0117008686065674,grad_norm: 0.8438452520632184, iteration: 86954
loss: 1.019025206565857,grad_norm: 0.9999994547345346, iteration: 86955
loss: 1.009603500366211,grad_norm: 0.8523654519490401, iteration: 86956
loss: 0.9788985252380371,grad_norm: 0.9221580935252284, iteration: 86957
loss: 1.020769715309143,grad_norm: 0.999999136104225, iteration: 86958
loss: 1.054793357849121,grad_norm: 0.9191369797723525, iteration: 86959
loss: 0.998048722743988,grad_norm: 0.927870413965938, iteration: 86960
loss: 1.0511291027069092,grad_norm: 0.9999997492663388, iteration: 86961
loss: 1.1298812627792358,grad_norm: 0.9999996395449496, iteration: 86962
loss: 0.9535232782363892,grad_norm: 0.9999990634346694, iteration: 86963
loss: 0.9920479655265808,grad_norm: 0.9999991797557831, iteration: 86964
loss: 1.0521807670593262,grad_norm: 0.9999991409079932, iteration: 86965
loss: 1.0063871145248413,grad_norm: 0.9408987525624646, iteration: 86966
loss: 0.9661116600036621,grad_norm: 0.8965761283508227, iteration: 86967
loss: 1.0189934968948364,grad_norm: 0.9999991412008319, iteration: 86968
loss: 1.0004554986953735,grad_norm: 0.8698774597231369, iteration: 86969
loss: 0.9881294965744019,grad_norm: 0.8747423387066209, iteration: 86970
loss: 1.050930142402649,grad_norm: 0.814582559358125, iteration: 86971
loss: 0.9668178558349609,grad_norm: 0.8923145251758201, iteration: 86972
loss: 1.0125325918197632,grad_norm: 0.8714075391437502, iteration: 86973
loss: 1.0190081596374512,grad_norm: 0.9999994131954302, iteration: 86974
loss: 0.9934515953063965,grad_norm: 0.9999990218618742, iteration: 86975
loss: 0.9980326294898987,grad_norm: 0.9999991517146966, iteration: 86976
loss: 1.0146865844726562,grad_norm: 0.9075894780555448, iteration: 86977
loss: 1.0056674480438232,grad_norm: 0.9999990153725644, iteration: 86978
loss: 1.0131360292434692,grad_norm: 0.9999997535621189, iteration: 86979
loss: 0.9934391379356384,grad_norm: 0.9999991352250318, iteration: 86980
loss: 1.0001050233840942,grad_norm: 0.9999992520728079, iteration: 86981
loss: 0.9822297692298889,grad_norm: 0.9877857389646523, iteration: 86982
loss: 1.1930030584335327,grad_norm: 0.9999998414348056, iteration: 86983
loss: 0.9477888941764832,grad_norm: 0.9314421379714456, iteration: 86984
loss: 1.0176743268966675,grad_norm: 0.9330759478377717, iteration: 86985
loss: 0.9708574414253235,grad_norm: 0.9999991206870965, iteration: 86986
loss: 1.0732989311218262,grad_norm: 0.9999995445401, iteration: 86987
loss: 0.9869105219841003,grad_norm: 0.9999992029868062, iteration: 86988
loss: 0.978245198726654,grad_norm: 0.863697736700138, iteration: 86989
loss: 1.0120034217834473,grad_norm: 0.9999992966236859, iteration: 86990
loss: 1.0943681001663208,grad_norm: 0.9999992510457888, iteration: 86991
loss: 0.9924089908599854,grad_norm: 0.8722544776087096, iteration: 86992
loss: 1.0150437355041504,grad_norm: 0.7374207200409567, iteration: 86993
loss: 1.0402950048446655,grad_norm: 0.8851165184146151, iteration: 86994
loss: 1.2134889364242554,grad_norm: 0.9999997272964061, iteration: 86995
loss: 1.0485213994979858,grad_norm: 0.9999993100002461, iteration: 86996
loss: 1.0489907264709473,grad_norm: 0.9999993385713241, iteration: 86997
loss: 1.0178110599517822,grad_norm: 0.8220905423309519, iteration: 86998
loss: 1.0631561279296875,grad_norm: 0.9999999380625034, iteration: 86999
loss: 1.0705554485321045,grad_norm: 0.9999991595303629, iteration: 87000
loss: 1.0233582258224487,grad_norm: 0.9468224615574913, iteration: 87001
loss: 0.9872599840164185,grad_norm: 0.999999434011448, iteration: 87002
loss: 1.0092463493347168,grad_norm: 0.9999991441476652, iteration: 87003
loss: 1.0476449728012085,grad_norm: 0.9999993189441291, iteration: 87004
loss: 0.9859503507614136,grad_norm: 0.9999998522211226, iteration: 87005
loss: 0.9811035990715027,grad_norm: 0.94463914501024, iteration: 87006
loss: 1.0290807485580444,grad_norm: 0.7843744659788052, iteration: 87007
loss: 0.9849771857261658,grad_norm: 0.9039420343151578, iteration: 87008
loss: 0.9854196906089783,grad_norm: 0.9999990944557636, iteration: 87009
loss: 1.0129846334457397,grad_norm: 0.9164166558754236, iteration: 87010
loss: 0.996636688709259,grad_norm: 0.9999990947239984, iteration: 87011
loss: 0.9965563416481018,grad_norm: 0.787525841588258, iteration: 87012
loss: 1.0303996801376343,grad_norm: 0.9149528999299085, iteration: 87013
loss: 1.0159947872161865,grad_norm: 0.9999991977499357, iteration: 87014
loss: 1.0090713500976562,grad_norm: 0.9999996814372616, iteration: 87015
loss: 0.9995047450065613,grad_norm: 0.8581392166455623, iteration: 87016
loss: 0.9611843824386597,grad_norm: 0.9546729771690383, iteration: 87017
loss: 1.0749268531799316,grad_norm: 0.9999990920450748, iteration: 87018
loss: 1.0100106000900269,grad_norm: 0.8782970097121371, iteration: 87019
loss: 1.0693131685256958,grad_norm: 0.9999998480209076, iteration: 87020
loss: 1.0454236268997192,grad_norm: 0.999999893217904, iteration: 87021
loss: 1.018032193183899,grad_norm: 0.9811557766386377, iteration: 87022
loss: 0.9918788075447083,grad_norm: 0.9856483979291224, iteration: 87023
loss: 1.0539969205856323,grad_norm: 0.9999996670477498, iteration: 87024
loss: 1.20659339427948,grad_norm: 0.9365739560925126, iteration: 87025
loss: 1.0260587930679321,grad_norm: 0.9999995457520463, iteration: 87026
loss: 1.0304694175720215,grad_norm: 0.9999993401033348, iteration: 87027
loss: 1.0221093893051147,grad_norm: 0.9999994301047694, iteration: 87028
loss: 1.0073851346969604,grad_norm: 0.8041615814075486, iteration: 87029
loss: 0.998390257358551,grad_norm: 0.9184813304234816, iteration: 87030
loss: 1.0513458251953125,grad_norm: 0.9999991093529593, iteration: 87031
loss: 1.0153578519821167,grad_norm: 0.9999997365660098, iteration: 87032
loss: 0.9947183132171631,grad_norm: 0.9284636282181479, iteration: 87033
loss: 1.037419319152832,grad_norm: 0.9934641270836336, iteration: 87034
loss: 1.015985131263733,grad_norm: 0.9999992891390727, iteration: 87035
loss: 0.9902362823486328,grad_norm: 0.8577037360502628, iteration: 87036
loss: 1.0029093027114868,grad_norm: 0.838866150809376, iteration: 87037
loss: 1.0151292085647583,grad_norm: 0.9268993903320986, iteration: 87038
loss: 1.0196092128753662,grad_norm: 0.897513827355101, iteration: 87039
loss: 0.9765236377716064,grad_norm: 0.9999991919068933, iteration: 87040
loss: 1.0127058029174805,grad_norm: 0.99999925068981, iteration: 87041
loss: 0.9840770959854126,grad_norm: 0.8593073464165628, iteration: 87042
loss: 1.0673412084579468,grad_norm: 0.947393697899345, iteration: 87043
loss: 0.9883456826210022,grad_norm: 0.8805284582988062, iteration: 87044
loss: 1.0303536653518677,grad_norm: 0.8559012006180574, iteration: 87045
loss: 1.060460090637207,grad_norm: 0.9510531357797306, iteration: 87046
loss: 1.0170871019363403,grad_norm: 0.9265617822775922, iteration: 87047
loss: 0.9792214035987854,grad_norm: 0.9999994193025327, iteration: 87048
loss: 1.0134849548339844,grad_norm: 0.9362312409186552, iteration: 87049
loss: 1.0220301151275635,grad_norm: 0.944533679889179, iteration: 87050
loss: 1.0025866031646729,grad_norm: 0.9999991739366273, iteration: 87051
loss: 1.0205492973327637,grad_norm: 0.7220597061924142, iteration: 87052
loss: 0.9849429130554199,grad_norm: 0.8129565773426043, iteration: 87053
loss: 1.050169825553894,grad_norm: 0.9999991592770809, iteration: 87054
loss: 0.9897764325141907,grad_norm: 0.9999994181683213, iteration: 87055
loss: 0.9825277328491211,grad_norm: 0.999998970589485, iteration: 87056
loss: 1.072763442993164,grad_norm: 0.9999994433118898, iteration: 87057
loss: 0.9906008243560791,grad_norm: 0.7642235212677675, iteration: 87058
loss: 1.009216547012329,grad_norm: 0.8152099261683416, iteration: 87059
loss: 1.026771068572998,grad_norm: 0.9999991023184548, iteration: 87060
loss: 1.0246661901474,grad_norm: 0.9999998732577504, iteration: 87061
loss: 0.9991933703422546,grad_norm: 0.9830328120424098, iteration: 87062
loss: 1.0063015222549438,grad_norm: 0.9999991794478459, iteration: 87063
loss: 1.0180503129959106,grad_norm: 0.9999991854648741, iteration: 87064
loss: 1.0931214094161987,grad_norm: 0.993177573218821, iteration: 87065
loss: 1.1013762950897217,grad_norm: 0.999999158578421, iteration: 87066
loss: 0.9932687878608704,grad_norm: 0.9745483138130558, iteration: 87067
loss: 1.0022400617599487,grad_norm: 0.9999991695287801, iteration: 87068
loss: 1.059266448020935,grad_norm: 0.999999226383023, iteration: 87069
loss: 1.021730661392212,grad_norm: 0.9999999166405235, iteration: 87070
loss: 1.0296523571014404,grad_norm: 0.9999996812278602, iteration: 87071
loss: 0.9841123223304749,grad_norm: 0.8730769317072574, iteration: 87072
loss: 1.1602444648742676,grad_norm: 0.9999994030574386, iteration: 87073
loss: 1.0472334623336792,grad_norm: 0.9999997691799725, iteration: 87074
loss: 1.2098290920257568,grad_norm: 0.9999995964976817, iteration: 87075
loss: 1.0420042276382446,grad_norm: 0.9999999287396221, iteration: 87076
loss: 1.1338880062103271,grad_norm: 0.9076374631475403, iteration: 87077
loss: 1.057763695716858,grad_norm: 0.9999990886236388, iteration: 87078
loss: 1.0273514986038208,grad_norm: 0.8650228858317714, iteration: 87079
loss: 1.017142415046692,grad_norm: 0.928974400449536, iteration: 87080
loss: 1.078991174697876,grad_norm: 0.9999993373471483, iteration: 87081
loss: 0.9746400713920593,grad_norm: 0.9261978964763483, iteration: 87082
loss: 1.1193268299102783,grad_norm: 0.9999998689057905, iteration: 87083
loss: 0.9861741065979004,grad_norm: 0.9207571371727984, iteration: 87084
loss: 0.9978637099266052,grad_norm: 0.94294135524836, iteration: 87085
loss: 0.9736402630805969,grad_norm: 0.8578139002837721, iteration: 87086
loss: 1.0091570615768433,grad_norm: 0.9999990562090108, iteration: 87087
loss: 1.022034764289856,grad_norm: 0.9999994178791304, iteration: 87088
loss: 1.0116552114486694,grad_norm: 0.9999998719679047, iteration: 87089
loss: 0.9739830493927002,grad_norm: 0.9216011875451873, iteration: 87090
loss: 1.000177264213562,grad_norm: 0.9999990953098363, iteration: 87091
loss: 1.0556942224502563,grad_norm: 0.9999992868897368, iteration: 87092
loss: 1.0372587442398071,grad_norm: 0.9999989842832097, iteration: 87093
loss: 0.992131769657135,grad_norm: 0.8268748941547237, iteration: 87094
loss: 1.0286290645599365,grad_norm: 0.8808480464009193, iteration: 87095
loss: 1.0278325080871582,grad_norm: 0.8061975798445947, iteration: 87096
loss: 1.0396783351898193,grad_norm: 0.9999997624744309, iteration: 87097
loss: 1.0037271976470947,grad_norm: 0.999998982173343, iteration: 87098
loss: 1.0017794370651245,grad_norm: 0.999999359225118, iteration: 87099
loss: 1.216866374015808,grad_norm: 0.9999992209461335, iteration: 87100
loss: 0.995073139667511,grad_norm: 0.9999998149188409, iteration: 87101
loss: 0.9661758542060852,grad_norm: 0.8031366226990867, iteration: 87102
loss: 1.0162314176559448,grad_norm: 0.8848785123088434, iteration: 87103
loss: 0.9537333250045776,grad_norm: 0.9098085629982194, iteration: 87104
loss: 1.0165162086486816,grad_norm: 0.9999992934360161, iteration: 87105
loss: 1.0072726011276245,grad_norm: 0.9999991176258216, iteration: 87106
loss: 1.0359562635421753,grad_norm: 0.9999994612137008, iteration: 87107
loss: 1.0177758932113647,grad_norm: 0.9546181158619598, iteration: 87108
loss: 1.0448427200317383,grad_norm: 0.9999991706185228, iteration: 87109
loss: 1.0029810667037964,grad_norm: 0.8709380712564968, iteration: 87110
loss: 1.0607703924179077,grad_norm: 0.9999992848877448, iteration: 87111
loss: 1.0844719409942627,grad_norm: 0.9999992859315311, iteration: 87112
loss: 1.1074285507202148,grad_norm: 0.999999464143668, iteration: 87113
loss: 0.9422919154167175,grad_norm: 0.9999991900067241, iteration: 87114
loss: 1.083573818206787,grad_norm: 0.9811287680878619, iteration: 87115
loss: 1.0852524042129517,grad_norm: 0.9999994671001678, iteration: 87116
loss: 0.9677404761314392,grad_norm: 0.965350180215979, iteration: 87117
loss: 1.0944733619689941,grad_norm: 0.9999996665632961, iteration: 87118
loss: 1.0771732330322266,grad_norm: 0.9905622451935487, iteration: 87119
loss: 0.995337724685669,grad_norm: 0.9166502451242727, iteration: 87120
loss: 0.9901072978973389,grad_norm: 0.9999992401904683, iteration: 87121
loss: 1.0153354406356812,grad_norm: 0.9999992195507157, iteration: 87122
loss: 1.002920389175415,grad_norm: 0.8820708935713922, iteration: 87123
loss: 1.0019850730895996,grad_norm: 0.7368896110176463, iteration: 87124
loss: 1.0119081735610962,grad_norm: 0.9787759705134109, iteration: 87125
loss: 1.0226023197174072,grad_norm: 0.9999991334574356, iteration: 87126
loss: 1.0409743785858154,grad_norm: 0.9999998539638333, iteration: 87127
loss: 1.1158555746078491,grad_norm: 0.9999998548133181, iteration: 87128
loss: 0.9872546195983887,grad_norm: 0.9999989828418675, iteration: 87129
loss: 1.008345127105713,grad_norm: 0.9662790014682243, iteration: 87130
loss: 1.0092943906784058,grad_norm: 0.999999535323348, iteration: 87131
loss: 1.0090630054473877,grad_norm: 0.9999993118933306, iteration: 87132
loss: 1.0185823440551758,grad_norm: 0.9999998610342048, iteration: 87133
loss: 1.0115686655044556,grad_norm: 0.9999992213260187, iteration: 87134
loss: 0.991828441619873,grad_norm: 0.9349259888124328, iteration: 87135
loss: 1.073872685432434,grad_norm: 0.9999995769499013, iteration: 87136
loss: 0.9935621619224548,grad_norm: 0.9807172373988716, iteration: 87137
loss: 0.9920298457145691,grad_norm: 0.9620654212754491, iteration: 87138
loss: 1.0457202196121216,grad_norm: 0.9999993582973303, iteration: 87139
loss: 1.0124595165252686,grad_norm: 0.7528282962882635, iteration: 87140
loss: 1.023099422454834,grad_norm: 0.9999998639173572, iteration: 87141
loss: 1.0519896745681763,grad_norm: 0.9324254434515711, iteration: 87142
loss: 1.0081137418746948,grad_norm: 0.9605189977632675, iteration: 87143
loss: 1.0324444770812988,grad_norm: 0.9999992264620152, iteration: 87144
loss: 1.0730514526367188,grad_norm: 0.7591583584718141, iteration: 87145
loss: 1.0738677978515625,grad_norm: 0.9999999239720893, iteration: 87146
loss: 1.0370659828186035,grad_norm: 0.9999995708624413, iteration: 87147
loss: 0.9796538949012756,grad_norm: 0.9375957276767246, iteration: 87148
loss: 1.0240861177444458,grad_norm: 0.9656562467088368, iteration: 87149
loss: 0.9806869029998779,grad_norm: 0.8165891595246759, iteration: 87150
loss: 0.995859682559967,grad_norm: 0.9999996153126277, iteration: 87151
loss: 1.0776376724243164,grad_norm: 0.9999999037999715, iteration: 87152
loss: 1.0154327154159546,grad_norm: 0.9999991600959168, iteration: 87153
loss: 1.0141242742538452,grad_norm: 0.9999991529751735, iteration: 87154
loss: 1.0026440620422363,grad_norm: 0.9999991277293866, iteration: 87155
loss: 1.0710954666137695,grad_norm: 0.9999989689839458, iteration: 87156
loss: 1.220810055732727,grad_norm: 0.9999999946211969, iteration: 87157
loss: 0.9718437194824219,grad_norm: 0.9999990859334917, iteration: 87158
loss: 1.1208221912384033,grad_norm: 0.9999998849861005, iteration: 87159
loss: 1.0150007009506226,grad_norm: 0.974501755858123, iteration: 87160
loss: 1.0097895860671997,grad_norm: 0.8596372917309063, iteration: 87161
loss: 1.0273116827011108,grad_norm: 0.8851905676106145, iteration: 87162
loss: 1.0464205741882324,grad_norm: 0.7885961010143108, iteration: 87163
loss: 1.050572395324707,grad_norm: 0.9602679034130732, iteration: 87164
loss: 1.021838903427124,grad_norm: 0.9999991723993329, iteration: 87165
loss: 0.9849715828895569,grad_norm: 0.9999990126990694, iteration: 87166
loss: 1.026492953300476,grad_norm: 0.9999990550055695, iteration: 87167
loss: 1.007514238357544,grad_norm: 0.999999124336034, iteration: 87168
loss: 1.011142373085022,grad_norm: 0.8650361148836682, iteration: 87169
loss: 0.9958518147468567,grad_norm: 0.9879710064421146, iteration: 87170
loss: 0.9831804037094116,grad_norm: 0.9999991058370095, iteration: 87171
loss: 1.0305174589157104,grad_norm: 0.9999994025475452, iteration: 87172
loss: 1.1157124042510986,grad_norm: 0.9999993892855138, iteration: 87173
loss: 0.9741149544715881,grad_norm: 0.9755650243249312, iteration: 87174
loss: 1.0121116638183594,grad_norm: 0.9492182084644853, iteration: 87175
loss: 1.0246758460998535,grad_norm: 0.9999990881159664, iteration: 87176
loss: 1.0107563734054565,grad_norm: 0.8509708595705378, iteration: 87177
loss: 1.001273512840271,grad_norm: 0.9999991791888914, iteration: 87178
loss: 0.9990625977516174,grad_norm: 0.9999993476616189, iteration: 87179
loss: 1.052809476852417,grad_norm: 0.999999781909916, iteration: 87180
loss: 1.0343449115753174,grad_norm: 0.9999998613533414, iteration: 87181
loss: 0.985267698764801,grad_norm: 0.9999991303199273, iteration: 87182
loss: 1.0110142230987549,grad_norm: 0.9999994905350601, iteration: 87183
loss: 0.9725013375282288,grad_norm: 0.9947101929549275, iteration: 87184
loss: 1.2106900215148926,grad_norm: 0.9999997096400678, iteration: 87185
loss: 1.0401442050933838,grad_norm: 0.9999998825908821, iteration: 87186
loss: 1.0895298719406128,grad_norm: 0.9506205730814844, iteration: 87187
loss: 0.9844344258308411,grad_norm: 0.8023189183527096, iteration: 87188
loss: 1.053527593612671,grad_norm: 0.9999992508514198, iteration: 87189
loss: 1.0051273107528687,grad_norm: 0.9999998630704017, iteration: 87190
loss: 0.9665734767913818,grad_norm: 0.9999989777515162, iteration: 87191
loss: 1.1255052089691162,grad_norm: 0.9999996911185771, iteration: 87192
loss: 0.9720956087112427,grad_norm: 0.9999948185036273, iteration: 87193
loss: 1.067139744758606,grad_norm: 0.9999993591594384, iteration: 87194
loss: 1.0190229415893555,grad_norm: 0.9835048274190861, iteration: 87195
loss: 1.0098567008972168,grad_norm: 0.9999994607603733, iteration: 87196
loss: 1.0026168823242188,grad_norm: 0.9999996991882403, iteration: 87197
loss: 1.0013561248779297,grad_norm: 0.9999991867283566, iteration: 87198
loss: 0.9940050840377808,grad_norm: 0.9367282289001586, iteration: 87199
loss: 1.368100881576538,grad_norm: 0.9999991768942355, iteration: 87200
loss: 0.9955057501792908,grad_norm: 0.9859870551102186, iteration: 87201
loss: 0.9947946667671204,grad_norm: 0.9999995125425676, iteration: 87202
loss: 1.0302315950393677,grad_norm: 0.9999990225373596, iteration: 87203
loss: 0.9863918423652649,grad_norm: 0.9999991441294159, iteration: 87204
loss: 0.9639978408813477,grad_norm: 0.9964839033879027, iteration: 87205
loss: 1.0367012023925781,grad_norm: 0.9999996963792688, iteration: 87206
loss: 1.1003811359405518,grad_norm: 0.9999994515937948, iteration: 87207
loss: 1.0293561220169067,grad_norm: 0.9999996184346017, iteration: 87208
loss: 0.9999593496322632,grad_norm: 0.9999996388573205, iteration: 87209
loss: 1.1843574047088623,grad_norm: 0.999999816268759, iteration: 87210
loss: 0.9964459538459778,grad_norm: 0.9606474872124501, iteration: 87211
loss: 0.9550607800483704,grad_norm: 0.854426219141299, iteration: 87212
loss: 1.0607231855392456,grad_norm: 0.9999998425291506, iteration: 87213
loss: 0.976844847202301,grad_norm: 0.9999991524572254, iteration: 87214
loss: 1.0376548767089844,grad_norm: 0.9999996817054159, iteration: 87215
loss: 0.9795952439308167,grad_norm: 0.8033430440650627, iteration: 87216
loss: 1.0711236000061035,grad_norm: 0.9999993863252297, iteration: 87217
loss: 1.0538133382797241,grad_norm: 0.9999993090644436, iteration: 87218
loss: 1.0869718790054321,grad_norm: 0.9999999068375067, iteration: 87219
loss: 0.9844117164611816,grad_norm: 0.9480737558197866, iteration: 87220
loss: 1.085024356842041,grad_norm: 0.9999993943869823, iteration: 87221
loss: 1.0191117525100708,grad_norm: 0.9999990725291729, iteration: 87222
loss: 1.0185145139694214,grad_norm: 0.9985599655707474, iteration: 87223
loss: 1.061706304550171,grad_norm: 0.9999991974258212, iteration: 87224
loss: 0.972931981086731,grad_norm: 0.9315489202240845, iteration: 87225
loss: 1.0252715349197388,grad_norm: 0.9751548012890094, iteration: 87226
loss: 1.1008671522140503,grad_norm: 0.9999998413872542, iteration: 87227
loss: 1.1074957847595215,grad_norm: 0.9999993721894822, iteration: 87228
loss: 1.0879263877868652,grad_norm: 0.9999991096657077, iteration: 87229
loss: 1.053162693977356,grad_norm: 0.9999996161519511, iteration: 87230
loss: 1.0217798948287964,grad_norm: 0.9999991424433677, iteration: 87231
loss: 1.0167036056518555,grad_norm: 0.9300732128730587, iteration: 87232
loss: 0.9732416272163391,grad_norm: 0.9619582868669629, iteration: 87233
loss: 1.007929801940918,grad_norm: 0.9805535373813496, iteration: 87234
loss: 1.2494685649871826,grad_norm: 0.9999994615556624, iteration: 87235
loss: 1.0391907691955566,grad_norm: 0.9105684401220493, iteration: 87236
loss: 0.9891592264175415,grad_norm: 0.8683321897545312, iteration: 87237
loss: 1.0700457096099854,grad_norm: 0.9999994651760039, iteration: 87238
loss: 0.9884472489356995,grad_norm: 0.9374082676042327, iteration: 87239
loss: 1.0193836688995361,grad_norm: 0.9999993455953318, iteration: 87240
loss: 0.98362797498703,grad_norm: 0.818247622522859, iteration: 87241
loss: 1.0126694440841675,grad_norm: 0.9999992115556323, iteration: 87242
loss: 1.078012466430664,grad_norm: 0.9999991583619878, iteration: 87243
loss: 1.0240991115570068,grad_norm: 0.9999993991186943, iteration: 87244
loss: 1.0012587308883667,grad_norm: 0.9999990859708147, iteration: 87245
loss: 0.9916027188301086,grad_norm: 0.8774985346798093, iteration: 87246
loss: 0.9997495412826538,grad_norm: 0.8913183803768281, iteration: 87247
loss: 1.0436269044876099,grad_norm: 0.9999995034002334, iteration: 87248
loss: 0.9999781250953674,grad_norm: 0.9999993899807853, iteration: 87249
loss: 1.0388849973678589,grad_norm: 0.9716718780079325, iteration: 87250
loss: 0.993279755115509,grad_norm: 0.9574036198161385, iteration: 87251
loss: 1.021539568901062,grad_norm: 0.9138591745776429, iteration: 87252
loss: 1.01007080078125,grad_norm: 0.999999195242747, iteration: 87253
loss: 0.9984349608421326,grad_norm: 0.9999990599382074, iteration: 87254
loss: 1.3045940399169922,grad_norm: 0.9999992630543973, iteration: 87255
loss: 0.9790573120117188,grad_norm: 0.9999991203741753, iteration: 87256
loss: 0.9941483736038208,grad_norm: 0.999999182971899, iteration: 87257
loss: 1.1376397609710693,grad_norm: 0.9999995716506184, iteration: 87258
loss: 1.0351836681365967,grad_norm: 0.9999991577559426, iteration: 87259
loss: 1.0455901622772217,grad_norm: 0.92386586685805, iteration: 87260
loss: 1.007978916168213,grad_norm: 0.9342164394854265, iteration: 87261
loss: 1.1666595935821533,grad_norm: 0.9999998457204715, iteration: 87262
loss: 0.9922690391540527,grad_norm: 0.9855839732344599, iteration: 87263
loss: 1.027904748916626,grad_norm: 0.858051943083055, iteration: 87264
loss: 0.9805564880371094,grad_norm: 0.96723462862017, iteration: 87265
loss: 0.986924946308136,grad_norm: 0.9999994940979241, iteration: 87266
loss: 1.0030603408813477,grad_norm: 0.8531608545664624, iteration: 87267
loss: 1.2254626750946045,grad_norm: 0.9999995945966933, iteration: 87268
loss: 1.0314199924468994,grad_norm: 0.9999990087919617, iteration: 87269
loss: 1.0210705995559692,grad_norm: 0.9324244590144828, iteration: 87270
loss: 1.0656445026397705,grad_norm: 0.9999992568692743, iteration: 87271
loss: 0.9803393483161926,grad_norm: 0.9999991369670279, iteration: 87272
loss: 1.0109156370162964,grad_norm: 0.9999993534054127, iteration: 87273
loss: 1.0024293661117554,grad_norm: 0.9267143835363472, iteration: 87274
loss: 1.090056300163269,grad_norm: 0.9999990735280467, iteration: 87275
loss: 1.0090558528900146,grad_norm: 0.9999992837464999, iteration: 87276
loss: 1.03257417678833,grad_norm: 0.7949231795395604, iteration: 87277
loss: 1.0379761457443237,grad_norm: 0.9525581237414447, iteration: 87278
loss: 1.0959563255310059,grad_norm: 0.9999999592800028, iteration: 87279
loss: 1.011849284172058,grad_norm: 0.9308250361683035, iteration: 87280
loss: 1.0203144550323486,grad_norm: 0.9999993682800378, iteration: 87281
loss: 1.0147886276245117,grad_norm: 0.9678712825019722, iteration: 87282
loss: 0.9892194867134094,grad_norm: 0.9398296647392183, iteration: 87283
loss: 0.9810608625411987,grad_norm: 0.9979949976362231, iteration: 87284
loss: 1.0085047483444214,grad_norm: 0.8833857125200715, iteration: 87285
loss: 0.9928027987480164,grad_norm: 0.99999907284484, iteration: 87286
loss: 1.0150611400604248,grad_norm: 0.9999996268177873, iteration: 87287
loss: 1.04305100440979,grad_norm: 0.971557792808099, iteration: 87288
loss: 1.0788931846618652,grad_norm: 0.9999993947822776, iteration: 87289
loss: 0.9949334859848022,grad_norm: 0.8216980464348268, iteration: 87290
loss: 1.0153186321258545,grad_norm: 0.8427897781101997, iteration: 87291
loss: 1.2393630743026733,grad_norm: 0.9999998518206565, iteration: 87292
loss: 1.0486934185028076,grad_norm: 0.999999722508803, iteration: 87293
loss: 1.0406460762023926,grad_norm: 0.9999993383806608, iteration: 87294
loss: 1.0162349939346313,grad_norm: 0.8976685097277778, iteration: 87295
loss: 1.0463193655014038,grad_norm: 0.8152576724705058, iteration: 87296
loss: 1.0153939723968506,grad_norm: 0.9364824974897084, iteration: 87297
loss: 0.9835968017578125,grad_norm: 0.9999995589646401, iteration: 87298
loss: 0.9953754544258118,grad_norm: 0.961926032114606, iteration: 87299
loss: 1.1054576635360718,grad_norm: 0.9999990853881503, iteration: 87300
loss: 1.1969056129455566,grad_norm: 0.9999999594208628, iteration: 87301
loss: 1.1062939167022705,grad_norm: 0.9999995936924404, iteration: 87302
loss: 0.9966304898262024,grad_norm: 0.8707020699589523, iteration: 87303
loss: 1.101228952407837,grad_norm: 0.9999995776762184, iteration: 87304
loss: 1.0242868661880493,grad_norm: 0.8966151045239277, iteration: 87305
loss: 0.9786906838417053,grad_norm: 0.9999991383117807, iteration: 87306
loss: 0.9944908618927002,grad_norm: 0.8710407701457479, iteration: 87307
loss: 0.9842678904533386,grad_norm: 0.9999995117972438, iteration: 87308
loss: 0.9928722977638245,grad_norm: 0.9999993131178939, iteration: 87309
loss: 0.9941257834434509,grad_norm: 0.9999990909637589, iteration: 87310
loss: 1.0614631175994873,grad_norm: 0.9999996077391238, iteration: 87311
loss: 1.0145567655563354,grad_norm: 0.9999993496432299, iteration: 87312
loss: 1.0384666919708252,grad_norm: 0.9147771390358148, iteration: 87313
loss: 1.0522847175598145,grad_norm: 0.9999991345465546, iteration: 87314
loss: 1.0009949207305908,grad_norm: 0.8604127975467313, iteration: 87315
loss: 0.9611948132514954,grad_norm: 0.9410625966941192, iteration: 87316
loss: 1.011841893196106,grad_norm: 0.8527866379815512, iteration: 87317
loss: 0.9761055707931519,grad_norm: 0.9999991825435564, iteration: 87318
loss: 0.9950745701789856,grad_norm: 0.9999989996651035, iteration: 87319
loss: 1.0515570640563965,grad_norm: 0.9999992256857436, iteration: 87320
loss: 1.1303077936172485,grad_norm: 0.999999916930622, iteration: 87321
loss: 0.969379723072052,grad_norm: 0.8501642983725527, iteration: 87322
loss: 1.0316170454025269,grad_norm: 0.8287923970494152, iteration: 87323
loss: 0.9816021919250488,grad_norm: 0.9297049956603097, iteration: 87324
loss: 1.0086183547973633,grad_norm: 0.9999993774724015, iteration: 87325
loss: 1.0560071468353271,grad_norm: 0.8997057597517569, iteration: 87326
loss: 0.9699487090110779,grad_norm: 0.9999990688522103, iteration: 87327
loss: 0.999828577041626,grad_norm: 0.8462637009569273, iteration: 87328
loss: 0.988370954990387,grad_norm: 0.80176258784162, iteration: 87329
loss: 0.9951606392860413,grad_norm: 0.866086782312712, iteration: 87330
loss: 1.0104596614837646,grad_norm: 0.9412605844274907, iteration: 87331
loss: 0.9732166528701782,grad_norm: 0.8987886054036548, iteration: 87332
loss: 1.082528829574585,grad_norm: 0.9999992559860038, iteration: 87333
loss: 1.0023589134216309,grad_norm: 0.8064739559214227, iteration: 87334
loss: 1.303562045097351,grad_norm: 0.9999999274963824, iteration: 87335
loss: 1.0126746892929077,grad_norm: 0.9999992637074929, iteration: 87336
loss: 1.0952860116958618,grad_norm: 0.9999993902735365, iteration: 87337
loss: 1.0050443410873413,grad_norm: 0.9156287791093012, iteration: 87338
loss: 0.9739989638328552,grad_norm: 0.9999990737042187, iteration: 87339
loss: 1.0139367580413818,grad_norm: 0.9999992538355619, iteration: 87340
loss: 1.0788527727127075,grad_norm: 0.9999991654054093, iteration: 87341
loss: 1.1920897960662842,grad_norm: 0.9999996108380611, iteration: 87342
loss: 1.0353189706802368,grad_norm: 0.822417712957682, iteration: 87343
loss: 1.055237054824829,grad_norm: 0.9999997070899699, iteration: 87344
loss: 1.0002001523971558,grad_norm: 0.8111459902765725, iteration: 87345
loss: 1.0976815223693848,grad_norm: 0.9999991476073619, iteration: 87346
loss: 1.0449527502059937,grad_norm: 0.8383945799843008, iteration: 87347
loss: 1.2342383861541748,grad_norm: 1.0000000067977792, iteration: 87348
loss: 1.103760838508606,grad_norm: 0.9999990437370097, iteration: 87349
loss: 1.0174753665924072,grad_norm: 0.9424347497995306, iteration: 87350
loss: 1.115979790687561,grad_norm: 0.9999991871879423, iteration: 87351
loss: 0.9973497986793518,grad_norm: 0.9158481706634759, iteration: 87352
loss: 1.0013089179992676,grad_norm: 0.7423517912112547, iteration: 87353
loss: 1.1894688606262207,grad_norm: 0.9999990923733207, iteration: 87354
loss: 0.9982799887657166,grad_norm: 0.9999997287997712, iteration: 87355
loss: 1.087119460105896,grad_norm: 0.9999998837215209, iteration: 87356
loss: 1.0484849214553833,grad_norm: 0.9999990091870194, iteration: 87357
loss: 1.0195109844207764,grad_norm: 0.9442044972285295, iteration: 87358
loss: 1.1118056774139404,grad_norm: 0.9999995417878762, iteration: 87359
loss: 1.082997441291809,grad_norm: 0.9999990799176043, iteration: 87360
loss: 1.5970629453659058,grad_norm: 0.9999995926374421, iteration: 87361
loss: 1.143931269645691,grad_norm: 0.9999995185405934, iteration: 87362
loss: 1.122392177581787,grad_norm: 0.9999994113764245, iteration: 87363
loss: 1.0098973512649536,grad_norm: 0.999999116589541, iteration: 87364
loss: 1.0474642515182495,grad_norm: 0.9999993549044843, iteration: 87365
loss: 1.1601731777191162,grad_norm: 0.9999998381244646, iteration: 87366
loss: 1.0612056255340576,grad_norm: 0.9999990883964759, iteration: 87367
loss: 1.0072021484375,grad_norm: 0.9999991350480183, iteration: 87368
loss: 1.0765961408615112,grad_norm: 0.9999991921451785, iteration: 87369
loss: 1.1732302904129028,grad_norm: 0.9999996719426801, iteration: 87370
loss: 1.1237411499023438,grad_norm: 0.9999991813514115, iteration: 87371
loss: 1.238875150680542,grad_norm: 0.9999997986318853, iteration: 87372
loss: 1.076156735420227,grad_norm: 0.9999990750895726, iteration: 87373
loss: 1.2466819286346436,grad_norm: 0.999999827128244, iteration: 87374
loss: 1.0774998664855957,grad_norm: 0.9999991270358779, iteration: 87375
loss: 1.4971517324447632,grad_norm: 0.9999997552845551, iteration: 87376
loss: 1.2930253744125366,grad_norm: 0.9999996400968864, iteration: 87377
loss: 1.4118744134902954,grad_norm: 0.9999998827566731, iteration: 87378
loss: 1.359215259552002,grad_norm: 0.999999559203763, iteration: 87379
loss: 1.679425835609436,grad_norm: 0.9999999759905202, iteration: 87380
loss: 1.4792205095291138,grad_norm: 0.9999999944930892, iteration: 87381
loss: 1.4012537002563477,grad_norm: 0.9999996548098278, iteration: 87382
loss: 1.5904510021209717,grad_norm: 1.0000000629741899, iteration: 87383
loss: 1.5166112184524536,grad_norm: 0.9999999580233038, iteration: 87384
loss: 1.477132797241211,grad_norm: 0.9999999798361628, iteration: 87385
loss: 1.2349984645843506,grad_norm: 0.9999997382194192, iteration: 87386
loss: 1.456393837928772,grad_norm: 0.9999997482776634, iteration: 87387
loss: 1.2435226440429688,grad_norm: 0.9999998113793598, iteration: 87388
loss: 1.5863667726516724,grad_norm: 0.9999998775297334, iteration: 87389
loss: 1.7541840076446533,grad_norm: 0.9999998993367821, iteration: 87390
loss: 1.2826467752456665,grad_norm: 0.9999995722870986, iteration: 87391
loss: 1.2384371757507324,grad_norm: 0.999999470212911, iteration: 87392
loss: 1.2380998134613037,grad_norm: 0.999999488186069, iteration: 87393
loss: 1.4332975149154663,grad_norm: 0.9999997847746341, iteration: 87394
loss: 1.139112114906311,grad_norm: 0.9999992833426631, iteration: 87395
loss: 1.2407046556472778,grad_norm: 0.9999992806733565, iteration: 87396
loss: 1.057732343673706,grad_norm: 0.9883258342470587, iteration: 87397
loss: 1.1665503978729248,grad_norm: 0.9999991090795964, iteration: 87398
loss: 1.1437326669692993,grad_norm: 0.9999993074771119, iteration: 87399
loss: 1.1086703538894653,grad_norm: 0.9999998125540056, iteration: 87400
loss: 1.0975106954574585,grad_norm: 0.9999999887540559, iteration: 87401
loss: 1.089231252670288,grad_norm: 0.9999996454384036, iteration: 87402
loss: 1.1338595151901245,grad_norm: 0.9999995441539845, iteration: 87403
loss: 1.1128791570663452,grad_norm: 0.999999769474232, iteration: 87404
loss: 1.1474096775054932,grad_norm: 0.9999997512747618, iteration: 87405
loss: 1.1231639385223389,grad_norm: 0.9999991216581767, iteration: 87406
loss: 1.0875800848007202,grad_norm: 0.9999997934322132, iteration: 87407
loss: 1.0211031436920166,grad_norm: 0.9999992100551369, iteration: 87408
loss: 1.153955340385437,grad_norm: 0.9999993682211156, iteration: 87409
loss: 1.207629680633545,grad_norm: 1.0000000296623301, iteration: 87410
loss: 1.0689342021942139,grad_norm: 0.9999992732413403, iteration: 87411
loss: 1.1846511363983154,grad_norm: 0.9999995270440875, iteration: 87412
loss: 1.1227704286575317,grad_norm: 0.9999991280116307, iteration: 87413
loss: 1.0807949304580688,grad_norm: 0.9999997575962659, iteration: 87414
loss: 1.2412972450256348,grad_norm: 1.000000098867991, iteration: 87415
loss: 1.1961995363235474,grad_norm: 0.9999997945809161, iteration: 87416
loss: 1.363518238067627,grad_norm: 0.9999996459077215, iteration: 87417
loss: 1.1167799234390259,grad_norm: 0.9834275510245446, iteration: 87418
loss: 1.1561702489852905,grad_norm: 0.9999992928262278, iteration: 87419
loss: 1.1333247423171997,grad_norm: 0.9999998366927656, iteration: 87420
loss: 1.1010335683822632,grad_norm: 0.9999999419595028, iteration: 87421
loss: 1.372275471687317,grad_norm: 0.9999996359602824, iteration: 87422
loss: 1.1804425716400146,grad_norm: 0.9999992044538137, iteration: 87423
loss: 1.0822213888168335,grad_norm: 0.9999992205388556, iteration: 87424
loss: 1.026332139968872,grad_norm: 0.9999997782829441, iteration: 87425
loss: 1.1602696180343628,grad_norm: 0.9999996777803242, iteration: 87426
loss: 1.0990424156188965,grad_norm: 0.9999991592071322, iteration: 87427
loss: 1.3550533056259155,grad_norm: 0.9999998993074134, iteration: 87428
loss: 1.3242580890655518,grad_norm: 0.99999988391576, iteration: 87429
loss: 1.0777995586395264,grad_norm: 0.9999999300928892, iteration: 87430
loss: 1.0525285005569458,grad_norm: 0.9999991380454887, iteration: 87431
loss: 1.3204299211502075,grad_norm: 0.9999997415201947, iteration: 87432
loss: 1.2120858430862427,grad_norm: 0.9999998438931099, iteration: 87433
loss: 1.0619163513183594,grad_norm: 0.9999992150641118, iteration: 87434
loss: 1.196673035621643,grad_norm: 0.9999993700344265, iteration: 87435
loss: 1.1289750337600708,grad_norm: 0.9999991099002807, iteration: 87436
loss: 1.0710303783416748,grad_norm: 0.9999990856693763, iteration: 87437
loss: 1.0014899969100952,grad_norm: 0.9999999615992948, iteration: 87438
loss: 1.620440125465393,grad_norm: 0.9999998130984254, iteration: 87439
loss: 1.4967694282531738,grad_norm: 0.9999998199292582, iteration: 87440
loss: 1.1119385957717896,grad_norm: 0.9999993614062652, iteration: 87441
loss: 1.0250236988067627,grad_norm: 0.99999907276202, iteration: 87442
loss: 1.3721325397491455,grad_norm: 0.9999994402351273, iteration: 87443
loss: 1.2986260652542114,grad_norm: 0.9999994957791213, iteration: 87444
loss: 1.3193796873092651,grad_norm: 0.9999996601867844, iteration: 87445
loss: 1.296606183052063,grad_norm: 0.9999999822375237, iteration: 87446
loss: 1.242897868156433,grad_norm: 0.9999999132868621, iteration: 87447
loss: 1.0799623727798462,grad_norm: 0.9999996950526102, iteration: 87448
loss: 1.115105152130127,grad_norm: 0.9999990559255608, iteration: 87449
loss: 1.1512192487716675,grad_norm: 0.9999999371049962, iteration: 87450
loss: 1.1023783683776855,grad_norm: 0.9999992981373599, iteration: 87451
loss: 1.1096006631851196,grad_norm: 0.9999995491273166, iteration: 87452
loss: 1.2396056652069092,grad_norm: 0.9999993568353754, iteration: 87453
loss: 1.0862209796905518,grad_norm: 0.9999999179613992, iteration: 87454
loss: 1.0567396879196167,grad_norm: 0.9999990637826789, iteration: 87455
loss: 1.33575439453125,grad_norm: 0.9999992847306005, iteration: 87456
loss: 1.0720415115356445,grad_norm: 0.9999991325820963, iteration: 87457
loss: 1.0361961126327515,grad_norm: 0.9052189639430116, iteration: 87458
loss: 1.2395132780075073,grad_norm: 0.9999999471677613, iteration: 87459
loss: 1.1016509532928467,grad_norm: 0.9999990898795873, iteration: 87460
loss: 1.1443061828613281,grad_norm: 0.9999991191455255, iteration: 87461
loss: 1.1059058904647827,grad_norm: 0.9999998892319, iteration: 87462
loss: 1.2299044132232666,grad_norm: 0.9999991943013037, iteration: 87463
loss: 1.161901593208313,grad_norm: 0.9999991616071415, iteration: 87464
loss: 1.1102001667022705,grad_norm: 0.9999990448558886, iteration: 87465
loss: 1.064653992652893,grad_norm: 0.9999992312518347, iteration: 87466
loss: 1.0409611463546753,grad_norm: 0.9179273108100389, iteration: 87467
loss: 1.125270128250122,grad_norm: 0.9999994810465366, iteration: 87468
loss: 1.0970607995986938,grad_norm: 0.9999992349653269, iteration: 87469
loss: 0.9634696245193481,grad_norm: 0.9999990968693571, iteration: 87470
loss: 1.0482357740402222,grad_norm: 0.8493890439318146, iteration: 87471
loss: 1.0145559310913086,grad_norm: 0.8356369549637293, iteration: 87472
loss: 1.1527811288833618,grad_norm: 0.9999995369366114, iteration: 87473
loss: 1.031105399131775,grad_norm: 0.9148986753158789, iteration: 87474
loss: 1.1216614246368408,grad_norm: 0.9527204747044645, iteration: 87475
loss: 1.0479763746261597,grad_norm: 0.9864596718443519, iteration: 87476
loss: 1.083007574081421,grad_norm: 0.9999991544417242, iteration: 87477
loss: 1.2566922903060913,grad_norm: 0.9999991901941268, iteration: 87478
loss: 1.10562264919281,grad_norm: 0.9954758958339714, iteration: 87479
loss: 1.0671637058258057,grad_norm: 0.9999992631161639, iteration: 87480
loss: 1.0476161241531372,grad_norm: 0.9999991429839827, iteration: 87481
loss: 1.1056921482086182,grad_norm: 0.9999990702304463, iteration: 87482
loss: 1.041710376739502,grad_norm: 0.9999997374817015, iteration: 87483
loss: 1.088422417640686,grad_norm: 0.8750656702339261, iteration: 87484
loss: 1.0434536933898926,grad_norm: 0.9999998388820772, iteration: 87485
loss: 1.0703039169311523,grad_norm: 0.9999998504495423, iteration: 87486
loss: 1.0638136863708496,grad_norm: 0.9999993572110535, iteration: 87487
loss: 1.0437746047973633,grad_norm: 0.9999990468701598, iteration: 87488
loss: 1.0803492069244385,grad_norm: 0.9999991087375866, iteration: 87489
loss: 1.0536965131759644,grad_norm: 0.8610978626160504, iteration: 87490
loss: 1.0572328567504883,grad_norm: 0.9999994971257316, iteration: 87491
loss: 1.066917896270752,grad_norm: 0.9999990595589522, iteration: 87492
loss: 1.0838226079940796,grad_norm: 0.8344987396872928, iteration: 87493
loss: 1.0385247468948364,grad_norm: 0.999999726458535, iteration: 87494
loss: 1.2368457317352295,grad_norm: 0.9999999125720634, iteration: 87495
loss: 1.0724068880081177,grad_norm: 0.99999899751374, iteration: 87496
loss: 1.0804013013839722,grad_norm: 0.9999990535041537, iteration: 87497
loss: 1.0218943357467651,grad_norm: 0.9999991880205128, iteration: 87498
loss: 1.0524832010269165,grad_norm: 0.9229044704617834, iteration: 87499
loss: 1.0510820150375366,grad_norm: 0.999999587715787, iteration: 87500
loss: 1.3458435535430908,grad_norm: 0.99999954689416, iteration: 87501
loss: 1.1168793439865112,grad_norm: 0.9999996473043665, iteration: 87502
loss: 1.57982337474823,grad_norm: 0.9999998686405938, iteration: 87503
loss: 1.1672147512435913,grad_norm: 0.9999996206605074, iteration: 87504
loss: 0.9625587463378906,grad_norm: 0.9999865653504018, iteration: 87505
loss: 1.1131172180175781,grad_norm: 0.9214909265073297, iteration: 87506
loss: 1.2066045999526978,grad_norm: 0.9999997373225286, iteration: 87507
loss: 1.019430160522461,grad_norm: 0.9999991485605726, iteration: 87508
loss: 1.2065813541412354,grad_norm: 0.9999992490495344, iteration: 87509
loss: 1.1415427923202515,grad_norm: 0.9999993716447874, iteration: 87510
loss: 1.1527454853057861,grad_norm: 0.9999991983463841, iteration: 87511
loss: 1.139880657196045,grad_norm: 0.9999999706489237, iteration: 87512
loss: 1.245849609375,grad_norm: 1.0000000631841364, iteration: 87513
loss: 1.1252530813217163,grad_norm: 0.9999996034752433, iteration: 87514
loss: 1.0271644592285156,grad_norm: 0.999999204006778, iteration: 87515
loss: 1.2253831624984741,grad_norm: 0.9999999777740864, iteration: 87516
loss: 1.2299890518188477,grad_norm: 0.9999998031995372, iteration: 87517
loss: 1.2408251762390137,grad_norm: 0.9999998893843763, iteration: 87518
loss: 1.0428677797317505,grad_norm: 0.9999998542719171, iteration: 87519
loss: 1.0879193544387817,grad_norm: 0.9999999809023769, iteration: 87520
loss: 1.0408531427383423,grad_norm: 0.9999992262796727, iteration: 87521
loss: 1.135543704032898,grad_norm: 0.9999999782355096, iteration: 87522
loss: 1.2448341846466064,grad_norm: 0.9999999914534372, iteration: 87523
loss: 1.1002025604248047,grad_norm: 0.9999996834013279, iteration: 87524
loss: 1.2280690670013428,grad_norm: 0.9999998614107818, iteration: 87525
loss: 1.0600959062576294,grad_norm: 0.9999992841655465, iteration: 87526
loss: 1.1827691793441772,grad_norm: 0.9999998627630887, iteration: 87527
loss: 1.2459005117416382,grad_norm: 0.9999998373142223, iteration: 87528
loss: 1.3131083250045776,grad_norm: 0.999999595790859, iteration: 87529
loss: 1.3336858749389648,grad_norm: 0.9999997642321363, iteration: 87530
loss: 1.4667001962661743,grad_norm: 0.9999997642814112, iteration: 87531
loss: 1.1690200567245483,grad_norm: 0.9999999925111898, iteration: 87532
loss: 1.105352759361267,grad_norm: 0.9999998098366624, iteration: 87533
loss: 1.1387332677841187,grad_norm: 0.9999992979815306, iteration: 87534
loss: 1.6880427598953247,grad_norm: 0.9999998600025842, iteration: 87535
loss: 1.3382561206817627,grad_norm: 0.9999998726034441, iteration: 87536
loss: 1.1826157569885254,grad_norm: 0.9999995042340574, iteration: 87537
loss: 1.215402364730835,grad_norm: 0.9999997519975776, iteration: 87538
loss: 1.6120707988739014,grad_norm: 1.0000000307988204, iteration: 87539
loss: 1.2895570993423462,grad_norm: 0.9999999485119759, iteration: 87540
loss: 1.3969073295593262,grad_norm: 0.9999999017468304, iteration: 87541
loss: 1.285402536392212,grad_norm: 0.9999996862000071, iteration: 87542
loss: 1.2834692001342773,grad_norm: 0.9999999387414724, iteration: 87543
loss: 1.2128232717514038,grad_norm: 0.999999458353519, iteration: 87544
loss: 1.0377599000930786,grad_norm: 0.8859678051877332, iteration: 87545
loss: 1.2490090131759644,grad_norm: 0.9999999256680524, iteration: 87546
loss: 1.0921417474746704,grad_norm: 0.9999999998129852, iteration: 87547
loss: 1.1493984460830688,grad_norm: 0.999999587906394, iteration: 87548
loss: 1.2945308685302734,grad_norm: 1.0000000106289333, iteration: 87549
loss: 1.025829792022705,grad_norm: 0.9999995889883074, iteration: 87550
loss: 1.1072602272033691,grad_norm: 0.9999998756292198, iteration: 87551
loss: 1.030578851699829,grad_norm: 0.9969003019375122, iteration: 87552
loss: 1.6402788162231445,grad_norm: 0.9999998306305837, iteration: 87553
loss: 1.2162363529205322,grad_norm: 0.9999992789921893, iteration: 87554
loss: 1.206163763999939,grad_norm: 0.999999605789781, iteration: 87555
loss: 1.2608262300491333,grad_norm: 0.9999993485634824, iteration: 87556
loss: 1.0551789999008179,grad_norm: 0.9999991801590151, iteration: 87557
loss: 1.2494760751724243,grad_norm: 0.9999998273756991, iteration: 87558
loss: 1.1578410863876343,grad_norm: 0.9999994307354252, iteration: 87559
loss: 1.1964166164398193,grad_norm: 0.9999995431261648, iteration: 87560
loss: 1.174475073814392,grad_norm: 0.9999996009889222, iteration: 87561
loss: 1.1692564487457275,grad_norm: 0.9999991781002338, iteration: 87562
loss: 1.1028039455413818,grad_norm: 0.9999998162103235, iteration: 87563
loss: 1.4755405187606812,grad_norm: 0.9999997270544968, iteration: 87564
loss: 1.1136022806167603,grad_norm: 0.9999996255282093, iteration: 87565
loss: 1.2145456075668335,grad_norm: 0.9999996250000683, iteration: 87566
loss: 1.2752224206924438,grad_norm: 0.9999997381167407, iteration: 87567
loss: 1.0719470977783203,grad_norm: 0.9999994497423492, iteration: 87568
loss: 1.4232628345489502,grad_norm: 0.999999907341985, iteration: 87569
loss: 1.1077783107757568,grad_norm: 0.9999999171141601, iteration: 87570
loss: 1.3866909742355347,grad_norm: 0.999999750565537, iteration: 87571
loss: 1.186689853668213,grad_norm: 0.9999998911954284, iteration: 87572
loss: 1.2619249820709229,grad_norm: 0.999999855328084, iteration: 87573
loss: 1.5547677278518677,grad_norm: 0.9999999242316703, iteration: 87574
loss: 1.3682231903076172,grad_norm: 0.9999997886169374, iteration: 87575
loss: 1.3506940603256226,grad_norm: 0.9999999620347158, iteration: 87576
loss: 1.4114166498184204,grad_norm: 0.9999995697553208, iteration: 87577
loss: 1.1404157876968384,grad_norm: 0.9999997606663688, iteration: 87578
loss: 1.2374646663665771,grad_norm: 0.9999994747145005, iteration: 87579
loss: 1.1122562885284424,grad_norm: 0.9999992677427881, iteration: 87580
loss: 1.3324408531188965,grad_norm: 0.9999999638813267, iteration: 87581
loss: 1.5204631090164185,grad_norm: 0.9999999122777917, iteration: 87582
loss: 1.3186241388320923,grad_norm: 0.9999999977184375, iteration: 87583
loss: 1.5412622690200806,grad_norm: 0.9999997741942905, iteration: 87584
loss: 1.7641762495040894,grad_norm: 0.9999998579234333, iteration: 87585
loss: 1.271712303161621,grad_norm: 0.9999998489510644, iteration: 87586
loss: 1.3730452060699463,grad_norm: 0.9999995831719904, iteration: 87587
loss: 1.3026971817016602,grad_norm: 0.999999412096814, iteration: 87588
loss: 1.464170217514038,grad_norm: 0.9999998548120289, iteration: 87589
loss: 1.3272089958190918,grad_norm: 1.000000068094432, iteration: 87590
loss: 1.3448947668075562,grad_norm: 0.9999999038707258, iteration: 87591
loss: 1.1769994497299194,grad_norm: 0.9999999867459232, iteration: 87592
loss: 1.2142683267593384,grad_norm: 0.9999993039621933, iteration: 87593
loss: 1.2533814907073975,grad_norm: 0.9999999355123596, iteration: 87594
loss: 1.4958544969558716,grad_norm: 0.9999999315885298, iteration: 87595
loss: 1.346859097480774,grad_norm: 0.9999993538372621, iteration: 87596
loss: 1.1845630407333374,grad_norm: 0.999999723717552, iteration: 87597
loss: 1.5710490942001343,grad_norm: 0.9999999825772378, iteration: 87598
loss: 1.3044135570526123,grad_norm: 0.9999997580495728, iteration: 87599
loss: 1.108970046043396,grad_norm: 0.8781892830811686, iteration: 87600
loss: 1.2773675918579102,grad_norm: 0.9999997556575111, iteration: 87601
loss: 1.3054009675979614,grad_norm: 0.999999506525941, iteration: 87602
loss: 1.3496402502059937,grad_norm: 0.9999999165832966, iteration: 87603
loss: 0.9886714816093445,grad_norm: 0.9999991878418669, iteration: 87604
loss: 1.1939871311187744,grad_norm: 0.9999996323304468, iteration: 87605
loss: 1.2359365224838257,grad_norm: 0.999999920371961, iteration: 87606
loss: 1.4421329498291016,grad_norm: 0.9999999232670579, iteration: 87607
loss: 1.3992410898208618,grad_norm: 0.9999998776689801, iteration: 87608
loss: 2.0879034996032715,grad_norm: 1.0000000572317342, iteration: 87609
loss: 1.4591132402420044,grad_norm: 0.9999998516327884, iteration: 87610
loss: 1.373094081878662,grad_norm: 0.9999999185523099, iteration: 87611
loss: 1.303997278213501,grad_norm: 0.9999999499163894, iteration: 87612
loss: 1.7064168453216553,grad_norm: 0.9999999322817485, iteration: 87613
loss: 1.2795677185058594,grad_norm: 0.9999998318757111, iteration: 87614
loss: 1.6939377784729004,grad_norm: 0.999999961608038, iteration: 87615
loss: 1.3309555053710938,grad_norm: 0.9999994802411042, iteration: 87616
loss: 1.5381070375442505,grad_norm: 0.9999998957236814, iteration: 87617
loss: 1.4970017671585083,grad_norm: 0.9999998148156695, iteration: 87618
loss: 1.5742350816726685,grad_norm: 1.0000000544735783, iteration: 87619
loss: 1.8545453548431396,grad_norm: 0.9999998845805422, iteration: 87620
loss: 1.6480004787445068,grad_norm: 0.9999999589192422, iteration: 87621
loss: 1.3466455936431885,grad_norm: 1.0000000519028398, iteration: 87622
loss: 1.8797868490219116,grad_norm: 1.0000000310548725, iteration: 87623
loss: 1.6223907470703125,grad_norm: 0.9999998051988391, iteration: 87624
loss: 1.878198266029358,grad_norm: 1.0000000291282167, iteration: 87625
loss: 1.5545464754104614,grad_norm: 1.0000000452003681, iteration: 87626
loss: 1.2895934581756592,grad_norm: 0.9999998948832545, iteration: 87627
loss: 1.86737060546875,grad_norm: 0.9999999326657708, iteration: 87628
loss: 1.6040154695510864,grad_norm: 1.000000002903893, iteration: 87629
loss: 1.732390284538269,grad_norm: 0.9999997686584443, iteration: 87630
loss: 1.6735461950302124,grad_norm: 0.9999999486711866, iteration: 87631
loss: 1.8962671756744385,grad_norm: 0.999999906421933, iteration: 87632
loss: 1.8935306072235107,grad_norm: 0.9999999364220382, iteration: 87633
loss: 1.7267745733261108,grad_norm: 0.9999997916197498, iteration: 87634
loss: 1.9129396677017212,grad_norm: 0.9999998945998597, iteration: 87635
loss: 1.6693154573440552,grad_norm: 0.9999999375600481, iteration: 87636
loss: 2.343376398086548,grad_norm: 0.9999999581782291, iteration: 87637
loss: 2.2916431427001953,grad_norm: 0.9999999260821318, iteration: 87638
loss: 2.5385820865631104,grad_norm: 0.9999999059212953, iteration: 87639
loss: 2.2929139137268066,grad_norm: 1.000000008203473, iteration: 87640
loss: 1.8904541730880737,grad_norm: 0.9999999630135572, iteration: 87641
loss: 1.777325987815857,grad_norm: 0.9999998783668632, iteration: 87642
loss: 1.8909308910369873,grad_norm: 0.9999999100078457, iteration: 87643
loss: 2.1318538188934326,grad_norm: 0.9999999047477798, iteration: 87644
loss: 1.6930789947509766,grad_norm: 0.9999998228141136, iteration: 87645
loss: 1.797716736793518,grad_norm: 0.9999997573960172, iteration: 87646
loss: 1.4988105297088623,grad_norm: 0.9999997600730075, iteration: 87647
loss: 1.3891167640686035,grad_norm: 0.999999925088821, iteration: 87648
loss: 1.6694997549057007,grad_norm: 0.9999996709674521, iteration: 87649
loss: 1.4222015142440796,grad_norm: 1.000000009702585, iteration: 87650
loss: 1.8731735944747925,grad_norm: 0.9999999710011007, iteration: 87651
loss: 1.4847573041915894,grad_norm: 0.9999997186697871, iteration: 87652
loss: 1.9026743173599243,grad_norm: 0.9999997883609613, iteration: 87653
loss: 1.376500129699707,grad_norm: 0.9999997918193392, iteration: 87654
loss: 1.2463042736053467,grad_norm: 0.9999999576596996, iteration: 87655
loss: 1.242394208908081,grad_norm: 0.999999849050322, iteration: 87656
loss: 1.416641354560852,grad_norm: 0.9999999627393382, iteration: 87657
loss: 1.6677323579788208,grad_norm: 0.9999998336511879, iteration: 87658
loss: 1.2656128406524658,grad_norm: 0.9999997369508014, iteration: 87659
loss: 1.3258209228515625,grad_norm: 1.0000000452237676, iteration: 87660
loss: 1.2450389862060547,grad_norm: 0.9999994865008446, iteration: 87661
loss: 1.2039916515350342,grad_norm: 0.9999995833816123, iteration: 87662
loss: 1.267831563949585,grad_norm: 1.000000023157363, iteration: 87663
loss: 1.468923568725586,grad_norm: 0.9999997011545989, iteration: 87664
loss: 1.4890247583389282,grad_norm: 0.9999999278413564, iteration: 87665
loss: 1.287880301475525,grad_norm: 0.999999880161028, iteration: 87666
loss: 1.6219128370285034,grad_norm: 0.9999998760105048, iteration: 87667
loss: 1.6201077699661255,grad_norm: 0.9999999314295672, iteration: 87668
loss: 1.3985626697540283,grad_norm: 0.999999467944559, iteration: 87669
loss: 1.096359133720398,grad_norm: 0.9999995868277204, iteration: 87670
loss: 1.3428632020950317,grad_norm: 0.9999998607342115, iteration: 87671
loss: 1.2268574237823486,grad_norm: 0.999999303696543, iteration: 87672
loss: 1.2167201042175293,grad_norm: 0.9999999913035544, iteration: 87673
loss: 1.225969672203064,grad_norm: 0.9999999198904538, iteration: 87674
loss: 1.3951666355133057,grad_norm: 0.9999999237404225, iteration: 87675
loss: 1.3488057851791382,grad_norm: 0.9999995928306168, iteration: 87676
loss: 1.2755625247955322,grad_norm: 0.9999997694087588, iteration: 87677
loss: 1.238974928855896,grad_norm: 0.9999998715984698, iteration: 87678
loss: 1.2199623584747314,grad_norm: 0.9999994852434599, iteration: 87679
loss: 1.2545892000198364,grad_norm: 0.9999994958751088, iteration: 87680
loss: 1.1446120738983154,grad_norm: 0.9999993476177885, iteration: 87681
loss: 1.3085777759552002,grad_norm: 0.9999994636611211, iteration: 87682
loss: 1.3197380304336548,grad_norm: 0.9999998618429172, iteration: 87683
loss: 1.276843547821045,grad_norm: 0.9999997971747123, iteration: 87684
loss: 1.504185438156128,grad_norm: 0.9999995591376802, iteration: 87685
loss: 1.2257568836212158,grad_norm: 1.000000077908964, iteration: 87686
loss: 1.2287719249725342,grad_norm: 0.9999998571732552, iteration: 87687
loss: 1.3453716039657593,grad_norm: 0.9999997388065466, iteration: 87688
loss: 1.4653096199035645,grad_norm: 0.9999999395844505, iteration: 87689
loss: 1.0736244916915894,grad_norm: 0.9999990504110459, iteration: 87690
loss: 1.4095770120620728,grad_norm: 0.9999999899094848, iteration: 87691
loss: 1.1715506315231323,grad_norm: 0.9999998089655521, iteration: 87692
loss: 1.371055245399475,grad_norm: 0.9999998822046702, iteration: 87693
loss: 1.341237187385559,grad_norm: 0.9999998331416413, iteration: 87694
loss: 1.1519999504089355,grad_norm: 0.9999992622400347, iteration: 87695
loss: 1.1640880107879639,grad_norm: 0.9999999725804999, iteration: 87696
loss: 1.141454815864563,grad_norm: 0.9999997693675067, iteration: 87697
loss: 1.2971323728561401,grad_norm: 0.9999997039981644, iteration: 87698
loss: 1.2682311534881592,grad_norm: 0.9999997869207431, iteration: 87699
loss: 1.1337165832519531,grad_norm: 0.9999992916514457, iteration: 87700
loss: 1.3242603540420532,grad_norm: 0.9999998091068897, iteration: 87701
loss: 1.5094481706619263,grad_norm: 0.9999997593948102, iteration: 87702
loss: 1.1186307668685913,grad_norm: 0.9251273405309575, iteration: 87703
loss: 1.1674703359603882,grad_norm: 0.9999996707991889, iteration: 87704
loss: 1.3414547443389893,grad_norm: 0.999999605254169, iteration: 87705
loss: 1.284299373626709,grad_norm: 0.9999992473943874, iteration: 87706
loss: 1.0468171834945679,grad_norm: 0.926511437588629, iteration: 87707
loss: 1.2213894128799438,grad_norm: 0.9999998712350479, iteration: 87708
loss: 1.1828806400299072,grad_norm: 0.9999998442307148, iteration: 87709
loss: 1.1097501516342163,grad_norm: 0.999999891878501, iteration: 87710
loss: 1.2949745655059814,grad_norm: 0.9999999657111266, iteration: 87711
loss: 1.199453592300415,grad_norm: 0.9999995561341695, iteration: 87712
loss: 1.0089374780654907,grad_norm: 0.9999992208322062, iteration: 87713
loss: 1.2835944890975952,grad_norm: 0.999999904900367, iteration: 87714
loss: 1.2001779079437256,grad_norm: 0.9999999187200754, iteration: 87715
loss: 1.2036148309707642,grad_norm: 0.999999941863527, iteration: 87716
loss: 1.0712066888809204,grad_norm: 0.9999996128078633, iteration: 87717
loss: 1.168555736541748,grad_norm: 0.999999868765998, iteration: 87718
loss: 1.3878962993621826,grad_norm: 0.9999998321564187, iteration: 87719
loss: 1.2094429731369019,grad_norm: 0.9999997591012604, iteration: 87720
loss: 1.1542675495147705,grad_norm: 0.9999995934268592, iteration: 87721
loss: 1.3720391988754272,grad_norm: 0.9999998581144955, iteration: 87722
loss: 1.086465835571289,grad_norm: 0.9999994797932461, iteration: 87723
loss: 1.1472731828689575,grad_norm: 0.9999996687838195, iteration: 87724
loss: 1.30265212059021,grad_norm: 1.0000000200855148, iteration: 87725
loss: 1.1879098415374756,grad_norm: 0.999999888589045, iteration: 87726
loss: 1.1361100673675537,grad_norm: 0.9999994378439303, iteration: 87727
loss: 1.01735258102417,grad_norm: 0.9999993047834111, iteration: 87728
loss: 1.097101092338562,grad_norm: 0.9999993914238154, iteration: 87729
loss: 0.9665372967720032,grad_norm: 0.9999990296576765, iteration: 87730
loss: 1.3119515180587769,grad_norm: 0.9999999640595741, iteration: 87731
loss: 1.1205275058746338,grad_norm: 0.999999966121903, iteration: 87732
loss: 1.1357173919677734,grad_norm: 0.9999995800180117, iteration: 87733
loss: 1.056307077407837,grad_norm: 0.9999992074648595, iteration: 87734
loss: 1.0880167484283447,grad_norm: 0.9999990296344923, iteration: 87735
loss: 1.0296822786331177,grad_norm: 0.9999990757569, iteration: 87736
loss: 1.0339246988296509,grad_norm: 0.9999999638107264, iteration: 87737
loss: 1.3659305572509766,grad_norm: 0.9999999053546884, iteration: 87738
loss: 1.1286402940750122,grad_norm: 0.9999994699710407, iteration: 87739
loss: 1.74072265625,grad_norm: 0.9999998701553462, iteration: 87740
loss: 1.3266478776931763,grad_norm: 0.9999998030191671, iteration: 87741
loss: 1.055661678314209,grad_norm: 0.9999999835857369, iteration: 87742
loss: 1.1370604038238525,grad_norm: 0.9999998210618001, iteration: 87743
loss: 1.089314579963684,grad_norm: 0.9999992789696834, iteration: 87744
loss: 1.3227735757827759,grad_norm: 0.999999898538623, iteration: 87745
loss: 1.1173826456069946,grad_norm: 0.9999991552094208, iteration: 87746
loss: 1.1657500267028809,grad_norm: 0.9999999599968878, iteration: 87747
loss: 1.2879260778427124,grad_norm: 0.9999997915931721, iteration: 87748
loss: 1.1171752214431763,grad_norm: 0.9999999441211287, iteration: 87749
loss: 1.3135738372802734,grad_norm: 0.9999999657461817, iteration: 87750
loss: 1.0886359214782715,grad_norm: 1.0000000380626035, iteration: 87751
loss: 1.191190242767334,grad_norm: 0.9999992213621085, iteration: 87752
loss: 1.05476713180542,grad_norm: 0.9999994266616467, iteration: 87753
loss: 0.9910679459571838,grad_norm: 0.8793887264095887, iteration: 87754
loss: 1.0230891704559326,grad_norm: 0.9712331262137878, iteration: 87755
loss: 1.0611735582351685,grad_norm: 0.8921938690462076, iteration: 87756
loss: 1.037198781967163,grad_norm: 0.9999993891090796, iteration: 87757
loss: 1.0804821252822876,grad_norm: 0.9999995921907189, iteration: 87758
loss: 1.0463491678237915,grad_norm: 0.9999997755133831, iteration: 87759
loss: 1.0461238622665405,grad_norm: 0.9999994381571725, iteration: 87760
loss: 0.9822837114334106,grad_norm: 0.9626376883126933, iteration: 87761
loss: 1.2197402715682983,grad_norm: 0.9999999263034914, iteration: 87762
loss: 0.9920775890350342,grad_norm: 0.999999326713657, iteration: 87763
loss: 1.012139916419983,grad_norm: 0.9999998053727442, iteration: 87764
loss: 1.113805890083313,grad_norm: 0.9999999689430498, iteration: 87765
loss: 1.0029475688934326,grad_norm: 0.9999993643698202, iteration: 87766
loss: 1.0829262733459473,grad_norm: 0.922663860049185, iteration: 87767
loss: 1.0363892316818237,grad_norm: 0.999999159631812, iteration: 87768
loss: 1.2371995449066162,grad_norm: 0.9999996190327431, iteration: 87769
loss: 1.0343329906463623,grad_norm: 0.9999992250290108, iteration: 87770
loss: 1.0418769121170044,grad_norm: 0.9999991669221853, iteration: 87771
loss: 1.0146915912628174,grad_norm: 0.8755113416523319, iteration: 87772
loss: 1.1459401845932007,grad_norm: 0.9999993840710937, iteration: 87773
loss: 1.069663643836975,grad_norm: 0.9999991752487142, iteration: 87774
loss: 1.0513893365859985,grad_norm: 0.9999991355198624, iteration: 87775
loss: 1.1004738807678223,grad_norm: 1.0000000540940046, iteration: 87776
loss: 1.0508792400360107,grad_norm: 0.9868315777028432, iteration: 87777
loss: 1.008692741394043,grad_norm: 0.9322557471170889, iteration: 87778
loss: 1.1580389738082886,grad_norm: 0.9999998833522773, iteration: 87779
loss: 1.08060622215271,grad_norm: 0.9999996409413912, iteration: 87780
loss: 1.135500431060791,grad_norm: 0.9999998454580799, iteration: 87781
loss: 1.0841084718704224,grad_norm: 0.999999009622107, iteration: 87782
loss: 1.036625623703003,grad_norm: 1.000000100381972, iteration: 87783
loss: 1.245998740196228,grad_norm: 0.9999999232289059, iteration: 87784
loss: 1.1010390520095825,grad_norm: 0.999999229734455, iteration: 87785
loss: 1.1224926710128784,grad_norm: 1.000000002008278, iteration: 87786
loss: 0.997576892375946,grad_norm: 0.8864200937363703, iteration: 87787
loss: 1.0648524761199951,grad_norm: 0.9999995603317394, iteration: 87788
loss: 0.9626134634017944,grad_norm: 0.9999991139077781, iteration: 87789
loss: 1.061370849609375,grad_norm: 0.9999998362682817, iteration: 87790
loss: 0.9979519844055176,grad_norm: 0.9999992451386822, iteration: 87791
loss: 1.0066770315170288,grad_norm: 0.8595826302435564, iteration: 87792
loss: 1.0729658603668213,grad_norm: 0.9999995903371331, iteration: 87793
loss: 1.0216675996780396,grad_norm: 0.9597790025771921, iteration: 87794
loss: 0.9933513402938843,grad_norm: 0.79164510487855, iteration: 87795
loss: 1.1774135828018188,grad_norm: 0.999999539905782, iteration: 87796
loss: 1.0109890699386597,grad_norm: 0.9999992362968048, iteration: 87797
loss: 0.9845919609069824,grad_norm: 0.9999994150114547, iteration: 87798
loss: 1.0370047092437744,grad_norm: 0.9999995240814524, iteration: 87799
loss: 1.1198759078979492,grad_norm: 0.9999995413278693, iteration: 87800
loss: 0.9849758148193359,grad_norm: 0.857238341079899, iteration: 87801
loss: 1.0125173330307007,grad_norm: 0.9999990786508463, iteration: 87802
loss: 1.064221978187561,grad_norm: 0.9999998250291283, iteration: 87803
loss: 1.0100533962249756,grad_norm: 0.9999992009028805, iteration: 87804
loss: 1.058127999305725,grad_norm: 0.9378525211972022, iteration: 87805
loss: 0.9895713329315186,grad_norm: 0.9999990975780134, iteration: 87806
loss: 0.9901046752929688,grad_norm: 0.9999990511609816, iteration: 87807
loss: 1.2480437755584717,grad_norm: 0.9999994931156585, iteration: 87808
loss: 1.0272362232208252,grad_norm: 0.9999994742848032, iteration: 87809
loss: 1.0331803560256958,grad_norm: 0.9999993829688032, iteration: 87810
loss: 1.0073903799057007,grad_norm: 0.999999456493523, iteration: 87811
loss: 1.0455729961395264,grad_norm: 0.9148008026305845, iteration: 87812
loss: 1.0371761322021484,grad_norm: 0.9999992744195906, iteration: 87813
loss: 1.0712660551071167,grad_norm: 0.9999991788498477, iteration: 87814
loss: 1.1229493618011475,grad_norm: 0.9999994309835978, iteration: 87815
loss: 0.9702588319778442,grad_norm: 0.999999191547756, iteration: 87816
loss: 1.18885338306427,grad_norm: 0.999999799143314, iteration: 87817
loss: 1.0127381086349487,grad_norm: 0.8624039400224779, iteration: 87818
loss: 1.1093825101852417,grad_norm: 0.9218737799263709, iteration: 87819
loss: 1.0116910934448242,grad_norm: 0.9999992681701615, iteration: 87820
loss: 1.1888384819030762,grad_norm: 1.0000000478317486, iteration: 87821
loss: 1.0032609701156616,grad_norm: 0.9999989806087296, iteration: 87822
loss: 1.0071414709091187,grad_norm: 0.9747568114662815, iteration: 87823
loss: 1.0925753116607666,grad_norm: 0.9999993158605447, iteration: 87824
loss: 1.0019899606704712,grad_norm: 0.8234807438211308, iteration: 87825
loss: 1.1289628744125366,grad_norm: 0.9999996496178933, iteration: 87826
loss: 1.0003843307495117,grad_norm: 0.993411379725749, iteration: 87827
loss: 1.0807838439941406,grad_norm: 0.9420728387853866, iteration: 87828
loss: 1.3624347448349,grad_norm: 0.9999997130392425, iteration: 87829
loss: 1.0491546392440796,grad_norm: 0.999999950800079, iteration: 87830
loss: 0.9943017959594727,grad_norm: 0.91015508145774, iteration: 87831
loss: 0.9903320074081421,grad_norm: 0.9999991358155325, iteration: 87832
loss: 1.010613203048706,grad_norm: 0.999999651511737, iteration: 87833
loss: 1.0175151824951172,grad_norm: 0.8418826103109064, iteration: 87834
loss: 1.125413179397583,grad_norm: 0.9999991995701759, iteration: 87835
loss: 1.0879597663879395,grad_norm: 0.9999993738832891, iteration: 87836
loss: 0.9717718362808228,grad_norm: 0.9417169858256362, iteration: 87837
loss: 1.1624338626861572,grad_norm: 0.9999999533565052, iteration: 87838
loss: 1.0153940916061401,grad_norm: 0.9679953306473719, iteration: 87839
loss: 0.9723094701766968,grad_norm: 0.8822590614052177, iteration: 87840
loss: 1.0522180795669556,grad_norm: 0.999999377703122, iteration: 87841
loss: 1.066530466079712,grad_norm: 0.9999990893578696, iteration: 87842
loss: 1.0958173274993896,grad_norm: 0.9999999600360012, iteration: 87843
loss: 1.246179223060608,grad_norm: 0.9999999652847513, iteration: 87844
loss: 0.9693107604980469,grad_norm: 0.9999991047371328, iteration: 87845
loss: 1.0689098834991455,grad_norm: 0.8293213367146555, iteration: 87846
loss: 1.0151500701904297,grad_norm: 0.99999912815991, iteration: 87847
loss: 0.9991196393966675,grad_norm: 0.9999991772433872, iteration: 87848
loss: 1.020682692527771,grad_norm: 0.8481018348633912, iteration: 87849
loss: 0.981692373752594,grad_norm: 0.9300768484882947, iteration: 87850
loss: 1.0128473043441772,grad_norm: 0.9999992176192126, iteration: 87851
loss: 0.9652382731437683,grad_norm: 0.7961494641656303, iteration: 87852
loss: 1.0110175609588623,grad_norm: 0.999999719407788, iteration: 87853
loss: 0.9623789191246033,grad_norm: 0.9999993581559008, iteration: 87854
loss: 1.080801248550415,grad_norm: 0.9999995372867617, iteration: 87855
loss: 1.0326350927352905,grad_norm: 0.9999997513810944, iteration: 87856
loss: 1.0741496086120605,grad_norm: 0.9505254205642927, iteration: 87857
loss: 1.1144195795059204,grad_norm: 0.9999994207399044, iteration: 87858
loss: 1.0755970478057861,grad_norm: 0.9999992011712142, iteration: 87859
loss: 1.085686445236206,grad_norm: 0.9999996799940032, iteration: 87860
loss: 0.9945967197418213,grad_norm: 0.9194963819773814, iteration: 87861
loss: 1.0032497644424438,grad_norm: 0.9993133492970745, iteration: 87862
loss: 0.9978705048561096,grad_norm: 0.9999990164118846, iteration: 87863
loss: 1.011593222618103,grad_norm: 0.9999991213070503, iteration: 87864
loss: 1.0431169271469116,grad_norm: 0.9999994668561883, iteration: 87865
loss: 0.93636155128479,grad_norm: 0.8238893579163442, iteration: 87866
loss: 1.0500996112823486,grad_norm: 0.9999998991413805, iteration: 87867
loss: 1.054347276687622,grad_norm: 0.9999992268823904, iteration: 87868
loss: 1.0100842714309692,grad_norm: 0.8918010145463385, iteration: 87869
loss: 1.0193147659301758,grad_norm: 0.9425292862236045, iteration: 87870
loss: 1.0251542329788208,grad_norm: 0.9014607364665052, iteration: 87871
loss: 1.015408992767334,grad_norm: 0.9999993028602941, iteration: 87872
loss: 1.1397877931594849,grad_norm: 0.9999999556726952, iteration: 87873
loss: 1.0353302955627441,grad_norm: 0.9999994310871242, iteration: 87874
loss: 1.0542148351669312,grad_norm: 0.9999990446950417, iteration: 87875
loss: 1.0324918031692505,grad_norm: 0.9999991713555005, iteration: 87876
loss: 0.9967119693756104,grad_norm: 0.9984961555532171, iteration: 87877
loss: 1.054661750793457,grad_norm: 0.9999992065673738, iteration: 87878
loss: 0.9806761741638184,grad_norm: 0.9999994575000644, iteration: 87879
loss: 1.0222725868225098,grad_norm: 0.9999999264706412, iteration: 87880
loss: 1.018775224685669,grad_norm: 0.9999990957406205, iteration: 87881
loss: 1.03608238697052,grad_norm: 0.8710213106973915, iteration: 87882
loss: 1.0331610441207886,grad_norm: 0.8821969964718499, iteration: 87883
loss: 1.0685807466506958,grad_norm: 0.8657541757014897, iteration: 87884
loss: 0.9520683884620667,grad_norm: 0.9999990831724566, iteration: 87885
loss: 0.9961029291152954,grad_norm: 0.9999991228107534, iteration: 87886
loss: 1.0137745141983032,grad_norm: 0.999999272674664, iteration: 87887
loss: 1.0193877220153809,grad_norm: 0.9999989859685409, iteration: 87888
loss: 1.012480616569519,grad_norm: 0.8575299774424843, iteration: 87889
loss: 0.9986996650695801,grad_norm: 0.9999991943382284, iteration: 87890
loss: 1.1323673725128174,grad_norm: 0.999999822477135, iteration: 87891
loss: 1.0817654132843018,grad_norm: 0.9999993208795565, iteration: 87892
loss: 1.0059771537780762,grad_norm: 0.9778444319707686, iteration: 87893
loss: 1.0176810026168823,grad_norm: 0.9434006871293428, iteration: 87894
loss: 1.007097840309143,grad_norm: 0.8427075468334576, iteration: 87895
loss: 1.133770227432251,grad_norm: 0.9999991745256014, iteration: 87896
loss: 0.9897175431251526,grad_norm: 0.9183028828879878, iteration: 87897
loss: 1.0415154695510864,grad_norm: 1.0000000058655196, iteration: 87898
loss: 1.0171175003051758,grad_norm: 0.9999991743874754, iteration: 87899
loss: 1.0400495529174805,grad_norm: 0.9545184523796085, iteration: 87900
loss: 1.0004884004592896,grad_norm: 0.8952994809538227, iteration: 87901
loss: 1.0614992380142212,grad_norm: 0.9999996162945558, iteration: 87902
loss: 1.003511667251587,grad_norm: 0.999999097812921, iteration: 87903
loss: 1.0047279596328735,grad_norm: 0.8589078238047686, iteration: 87904
loss: 0.9573634266853333,grad_norm: 0.9999992048287453, iteration: 87905
loss: 1.0318224430084229,grad_norm: 0.9999997218737486, iteration: 87906
loss: 0.974416196346283,grad_norm: 0.9476104796744207, iteration: 87907
loss: 1.167034387588501,grad_norm: 0.9999991841399877, iteration: 87908
loss: 1.139015793800354,grad_norm: 0.9999996941379944, iteration: 87909
loss: 0.9834223389625549,grad_norm: 0.8888300488552477, iteration: 87910
loss: 0.9891616702079773,grad_norm: 0.8436996970547214, iteration: 87911
loss: 1.0016776323318481,grad_norm: 0.9999992889974264, iteration: 87912
loss: 1.0598442554473877,grad_norm: 0.9999997837830825, iteration: 87913
loss: 1.077304482460022,grad_norm: 0.9999992889179021, iteration: 87914
loss: 1.0673937797546387,grad_norm: 0.9999990974024549, iteration: 87915
loss: 1.1864017248153687,grad_norm: 0.9999998585780703, iteration: 87916
loss: 1.058374047279358,grad_norm: 0.9999999006674888, iteration: 87917
loss: 0.9994775056838989,grad_norm: 0.9999993574948826, iteration: 87918
loss: 1.0174709558486938,grad_norm: 0.9126669474398617, iteration: 87919
loss: 1.0269012451171875,grad_norm: 0.9932207063792086, iteration: 87920
loss: 0.9978821277618408,grad_norm: 0.9999990575554282, iteration: 87921
loss: 1.0119088888168335,grad_norm: 0.9999991832012792, iteration: 87922
loss: 1.051773190498352,grad_norm: 0.9999991014336642, iteration: 87923
loss: 0.9992390871047974,grad_norm: 0.9372849146745363, iteration: 87924
loss: 1.0071245431900024,grad_norm: 0.9999992081823641, iteration: 87925
loss: 1.0100734233856201,grad_norm: 0.999999021292586, iteration: 87926
loss: 0.9360669851303101,grad_norm: 0.9999990146802128, iteration: 87927
loss: 1.1172136068344116,grad_norm: 0.9999994772499383, iteration: 87928
loss: 1.0493850708007812,grad_norm: 0.9752556556246654, iteration: 87929
loss: 1.2393062114715576,grad_norm: 0.9999999071657341, iteration: 87930
loss: 1.0205649137496948,grad_norm: 0.8794256389392753, iteration: 87931
loss: 0.9431631565093994,grad_norm: 0.8912210291340634, iteration: 87932
loss: 1.1215568780899048,grad_norm: 0.9999996204972809, iteration: 87933
loss: 1.0268254280090332,grad_norm: 0.981742917049372, iteration: 87934
loss: 1.1416099071502686,grad_norm: 0.9999998156374527, iteration: 87935
loss: 0.9632014632225037,grad_norm: 0.8420888617364268, iteration: 87936
loss: 1.0055736303329468,grad_norm: 0.9999992122002208, iteration: 87937
loss: 1.0226271152496338,grad_norm: 0.999999493607087, iteration: 87938
loss: 0.9979568719863892,grad_norm: 0.9999989443575854, iteration: 87939
loss: 1.0366144180297852,grad_norm: 0.9999994279702712, iteration: 87940
loss: 1.003446340560913,grad_norm: 0.9999997224796828, iteration: 87941
loss: 1.0111242532730103,grad_norm: 0.9999991541386803, iteration: 87942
loss: 1.0191212892532349,grad_norm: 0.9999998719189006, iteration: 87943
loss: 1.0053150653839111,grad_norm: 0.9655264542251146, iteration: 87944
loss: 0.9935542941093445,grad_norm: 0.9999991151273715, iteration: 87945
loss: 1.0421159267425537,grad_norm: 0.9038398876162222, iteration: 87946
loss: 1.0459697246551514,grad_norm: 0.9999990876232582, iteration: 87947
loss: 1.012271761894226,grad_norm: 0.9999992580668889, iteration: 87948
loss: 1.0210224390029907,grad_norm: 0.8697783841842355, iteration: 87949
loss: 1.000584602355957,grad_norm: 0.9999992425842738, iteration: 87950
loss: 0.9864296913146973,grad_norm: 0.9769901778349087, iteration: 87951
loss: 1.0483527183532715,grad_norm: 0.9999991332950338, iteration: 87952
loss: 1.010818600654602,grad_norm: 0.9185343116638899, iteration: 87953
loss: 1.0325168371200562,grad_norm: 0.9603271706615671, iteration: 87954
loss: 1.1225786209106445,grad_norm: 0.9999996521050223, iteration: 87955
loss: 1.0639346837997437,grad_norm: 0.9999995376255987, iteration: 87956
loss: 1.0103925466537476,grad_norm: 0.776112916198762, iteration: 87957
loss: 0.9755420088768005,grad_norm: 0.9999996804722766, iteration: 87958
loss: 1.035568356513977,grad_norm: 0.995545340449021, iteration: 87959
loss: 1.0487308502197266,grad_norm: 0.9999993456504396, iteration: 87960
loss: 1.0130996704101562,grad_norm: 0.9417416817717349, iteration: 87961
loss: 1.0530915260314941,grad_norm: 0.9999993076586028, iteration: 87962
loss: 1.0428630113601685,grad_norm: 0.8860196847228462, iteration: 87963
loss: 0.9753239750862122,grad_norm: 0.9999994581926639, iteration: 87964
loss: 1.0146089792251587,grad_norm: 0.9999990588837205, iteration: 87965
loss: 1.006402850151062,grad_norm: 0.9999991423945273, iteration: 87966
loss: 1.0211639404296875,grad_norm: 0.9999990556853882, iteration: 87967
loss: 0.9952213764190674,grad_norm: 0.9999991684088436, iteration: 87968
loss: 1.0834102630615234,grad_norm: 0.9280422709912933, iteration: 87969
loss: 1.0054714679718018,grad_norm: 0.9254588088067542, iteration: 87970
loss: 1.0582585334777832,grad_norm: 0.9999999553040998, iteration: 87971
loss: 1.0090770721435547,grad_norm: 0.8230783074596179, iteration: 87972
loss: 0.9819284677505493,grad_norm: 0.9999991257656689, iteration: 87973
loss: 1.0372711420059204,grad_norm: 0.9999996137048374, iteration: 87974
loss: 1.0833137035369873,grad_norm: 0.9999990592642303, iteration: 87975
loss: 1.0722250938415527,grad_norm: 0.9614512859157653, iteration: 87976
loss: 1.0012999773025513,grad_norm: 0.9999991822856502, iteration: 87977
loss: 1.0495023727416992,grad_norm: 0.9999990723424347, iteration: 87978
loss: 1.0329101085662842,grad_norm: 0.9999991400569961, iteration: 87979
loss: 1.0517572164535522,grad_norm: 0.9999993107785442, iteration: 87980
loss: 1.2049028873443604,grad_norm: 0.9999991197221626, iteration: 87981
loss: 0.9958996176719666,grad_norm: 0.8782460008645243, iteration: 87982
loss: 1.067481279373169,grad_norm: 0.9999993641544168, iteration: 87983
loss: 1.0602930784225464,grad_norm: 0.9999999941338314, iteration: 87984
loss: 1.0508832931518555,grad_norm: 0.9999993000635352, iteration: 87985
loss: 1.000491976737976,grad_norm: 0.9999990471106238, iteration: 87986
loss: 0.9922786355018616,grad_norm: 0.9999991107347739, iteration: 87987
loss: 1.0076218843460083,grad_norm: 0.9999992735163084, iteration: 87988
loss: 1.099014401435852,grad_norm: 0.9220040376334404, iteration: 87989
loss: 1.0012013912200928,grad_norm: 0.8061385242185912, iteration: 87990
loss: 1.0090782642364502,grad_norm: 0.9999992972042817, iteration: 87991
loss: 1.0672545433044434,grad_norm: 0.9558002076767239, iteration: 87992
loss: 1.0118591785430908,grad_norm: 0.9999992810466403, iteration: 87993
loss: 1.032322883605957,grad_norm: 0.9999991042404572, iteration: 87994
loss: 1.0254864692687988,grad_norm: 0.9999992270243778, iteration: 87995
loss: 1.003006100654602,grad_norm: 0.9999992281625585, iteration: 87996
loss: 1.0440789461135864,grad_norm: 0.9999994558934121, iteration: 87997
loss: 0.9968515634536743,grad_norm: 0.8214144558197874, iteration: 87998
loss: 0.9752104878425598,grad_norm: 0.9999989481678866, iteration: 87999
loss: 1.030108094215393,grad_norm: 0.9999998791752567, iteration: 88000
loss: 0.993805468082428,grad_norm: 0.9321677410539737, iteration: 88001
loss: 1.0855088233947754,grad_norm: 0.9999991843039173, iteration: 88002
loss: 1.083486557006836,grad_norm: 0.9999994706016779, iteration: 88003
loss: 1.1976354122161865,grad_norm: 0.9999999354279289, iteration: 88004
loss: 1.0173559188842773,grad_norm: 0.9999992825052924, iteration: 88005
loss: 1.021277904510498,grad_norm: 0.8209894546125732, iteration: 88006
loss: 1.0828220844268799,grad_norm: 0.9999992921082087, iteration: 88007
loss: 1.0349351167678833,grad_norm: 0.9535655715914239, iteration: 88008
loss: 1.0503169298171997,grad_norm: 0.9999991553350586, iteration: 88009
loss: 0.9719019532203674,grad_norm: 0.9999991657797102, iteration: 88010
loss: 0.991705060005188,grad_norm: 0.9999991259917114, iteration: 88011
loss: 1.1148319244384766,grad_norm: 0.9999997108439287, iteration: 88012
loss: 1.0223463773727417,grad_norm: 0.8794497419892443, iteration: 88013
loss: 1.0615581274032593,grad_norm: 0.9449802432328643, iteration: 88014
loss: 1.0818989276885986,grad_norm: 0.9999991818172134, iteration: 88015
loss: 1.036545991897583,grad_norm: 0.9999995380715336, iteration: 88016
loss: 0.9902937412261963,grad_norm: 0.9474051530311233, iteration: 88017
loss: 1.0089452266693115,grad_norm: 0.9999994771876178, iteration: 88018
loss: 1.137202501296997,grad_norm: 1.0000000613887796, iteration: 88019
loss: 1.0002799034118652,grad_norm: 0.9999994331716239, iteration: 88020
loss: 1.1344223022460938,grad_norm: 0.9999999609845586, iteration: 88021
loss: 1.0608116388320923,grad_norm: 0.9785562933531511, iteration: 88022
loss: 1.0534099340438843,grad_norm: 0.972781853072275, iteration: 88023
loss: 1.0297179222106934,grad_norm: 0.9999992816397931, iteration: 88024
loss: 1.0939652919769287,grad_norm: 0.9999992899676975, iteration: 88025
loss: 1.0542725324630737,grad_norm: 0.9999992179901952, iteration: 88026
loss: 0.9993357062339783,grad_norm: 0.9999990377167247, iteration: 88027
loss: 1.0685484409332275,grad_norm: 0.9999999877001052, iteration: 88028
loss: 1.045857310295105,grad_norm: 0.9461126747431459, iteration: 88029
loss: 1.0728530883789062,grad_norm: 0.9999999451607786, iteration: 88030
loss: 1.0518009662628174,grad_norm: 0.9287983855057207, iteration: 88031
loss: 1.2214906215667725,grad_norm: 0.9999991739329943, iteration: 88032
loss: 1.026242971420288,grad_norm: 0.9999991003890093, iteration: 88033
loss: 1.0493476390838623,grad_norm: 0.9999990430834595, iteration: 88034
loss: 1.1080398559570312,grad_norm: 0.9999993496661714, iteration: 88035
loss: 1.0680465698242188,grad_norm: 0.9999997836815462, iteration: 88036
loss: 1.003171682357788,grad_norm: 0.99999957093008, iteration: 88037
loss: 1.491727590560913,grad_norm: 1.000000040317296, iteration: 88038
loss: 0.9821406602859497,grad_norm: 0.9999991734223889, iteration: 88039
loss: 1.0715736150741577,grad_norm: 0.9989258902553877, iteration: 88040
loss: 1.070894718170166,grad_norm: 1.000000029491881, iteration: 88041
loss: 1.0693329572677612,grad_norm: 0.9999996074640148, iteration: 88042
loss: 1.224238634109497,grad_norm: 0.9999993368544702, iteration: 88043
loss: 1.1100914478302002,grad_norm: 0.9999993704791208, iteration: 88044
loss: 0.9701674580574036,grad_norm: 0.9217064563352411, iteration: 88045
loss: 1.0614255666732788,grad_norm: 0.9999997041382812, iteration: 88046
loss: 1.0072295665740967,grad_norm: 0.9999991025271057, iteration: 88047
loss: 0.9712387323379517,grad_norm: 0.9999991971255325, iteration: 88048
loss: 1.1253715753555298,grad_norm: 0.9026574255209597, iteration: 88049
loss: 1.0819348096847534,grad_norm: 0.9999992195011973, iteration: 88050
loss: 1.0536516904830933,grad_norm: 0.9999989782592942, iteration: 88051
loss: 0.9861944317817688,grad_norm: 0.8196160381965739, iteration: 88052
loss: 1.0538462400436401,grad_norm: 0.9999991106715512, iteration: 88053
loss: 1.0283199548721313,grad_norm: 0.9999990481603317, iteration: 88054
loss: 1.1292505264282227,grad_norm: 0.9999999232546863, iteration: 88055
loss: 1.0003353357315063,grad_norm: 0.9644880987078222, iteration: 88056
loss: 1.0050866603851318,grad_norm: 0.9999991089826845, iteration: 88057
loss: 1.0295850038528442,grad_norm: 0.9999991302304736, iteration: 88058
loss: 1.049318552017212,grad_norm: 0.9999994258200912, iteration: 88059
loss: 1.0303351879119873,grad_norm: 0.9999993054469815, iteration: 88060
loss: 1.2481179237365723,grad_norm: 0.9999996633780827, iteration: 88061
loss: 1.2686392068862915,grad_norm: 0.999999778579294, iteration: 88062
loss: 1.0398972034454346,grad_norm: 0.9199761316264207, iteration: 88063
loss: 1.0887649059295654,grad_norm: 0.957562951467117, iteration: 88064
loss: 0.9856940507888794,grad_norm: 0.9999994430633738, iteration: 88065
loss: 1.075356364250183,grad_norm: 0.9999998104661055, iteration: 88066
loss: 1.060926079750061,grad_norm: 0.9999995225400234, iteration: 88067
loss: 0.993617057800293,grad_norm: 1.0000000075492645, iteration: 88068
loss: 1.1190662384033203,grad_norm: 0.9999991409185802, iteration: 88069
loss: 1.0808149576187134,grad_norm: 0.999999419780002, iteration: 88070
loss: 1.0072823762893677,grad_norm: 0.9082134966207519, iteration: 88071
loss: 1.0381752252578735,grad_norm: 0.9999990419875332, iteration: 88072
loss: 1.037920594215393,grad_norm: 0.99999918945684, iteration: 88073
loss: 1.0506995916366577,grad_norm: 0.8938882110771074, iteration: 88074
loss: 1.105028748512268,grad_norm: 0.9999993102096588, iteration: 88075
loss: 1.0530060529708862,grad_norm: 0.9999999194985126, iteration: 88076
loss: 1.006003975868225,grad_norm: 0.8731207339715942, iteration: 88077
loss: 1.1623018980026245,grad_norm: 0.9999998643766098, iteration: 88078
loss: 1.0161951780319214,grad_norm: 0.9999998367787286, iteration: 88079
loss: 1.0373454093933105,grad_norm: 0.9999992435156461, iteration: 88080
loss: 1.0255604982376099,grad_norm: 0.9999991788385462, iteration: 88081
loss: 1.0217658281326294,grad_norm: 0.9999990633665448, iteration: 88082
loss: 1.0486414432525635,grad_norm: 0.803742517606183, iteration: 88083
loss: 0.977469801902771,grad_norm: 0.999999225038876, iteration: 88084
loss: 1.0760407447814941,grad_norm: 0.9999993763497926, iteration: 88085
loss: 1.028969407081604,grad_norm: 0.9999991388255842, iteration: 88086
loss: 0.9777791500091553,grad_norm: 0.9999990519221094, iteration: 88087
loss: 1.0706323385238647,grad_norm: 0.9999992861229865, iteration: 88088
loss: 1.0109961032867432,grad_norm: 0.9999997312732993, iteration: 88089
loss: 1.121574878692627,grad_norm: 0.9999994278699188, iteration: 88090
loss: 1.0290048122406006,grad_norm: 0.9999996608440177, iteration: 88091
loss: 1.001707911491394,grad_norm: 0.877671437174918, iteration: 88092
loss: 1.0087662935256958,grad_norm: 0.9999990989034997, iteration: 88093
loss: 1.0665267705917358,grad_norm: 0.9999997909112748, iteration: 88094
loss: 0.9952375888824463,grad_norm: 0.9390900137636096, iteration: 88095
loss: 1.1473904848098755,grad_norm: 0.9999994297368112, iteration: 88096
loss: 1.053368091583252,grad_norm: 0.9361369587478602, iteration: 88097
loss: 1.0582035779953003,grad_norm: 0.9999991706772475, iteration: 88098
loss: 1.1905728578567505,grad_norm: 0.9999999109782438, iteration: 88099
loss: 0.9796833395957947,grad_norm: 0.9999996398630591, iteration: 88100
loss: 1.0505197048187256,grad_norm: 0.9999995628521754, iteration: 88101
loss: 1.1166514158248901,grad_norm: 0.9999994302587258, iteration: 88102
loss: 1.0686094760894775,grad_norm: 0.9137281957050649, iteration: 88103
loss: 1.0799899101257324,grad_norm: 0.9086467949865763, iteration: 88104
loss: 1.03934907913208,grad_norm: 0.9999990378575763, iteration: 88105
loss: 1.0466482639312744,grad_norm: 0.9999995083672152, iteration: 88106
loss: 1.0864698886871338,grad_norm: 0.9999995194477593, iteration: 88107
loss: 1.003446340560913,grad_norm: 0.9999990397829207, iteration: 88108
loss: 1.005268931388855,grad_norm: 0.9999993295281245, iteration: 88109
loss: 1.1779162883758545,grad_norm: 0.9999995787370592, iteration: 88110
loss: 1.0108332633972168,grad_norm: 0.9999992473691804, iteration: 88111
loss: 0.9978495240211487,grad_norm: 0.9999996226458858, iteration: 88112
loss: 1.0090287923812866,grad_norm: 0.9939212414272168, iteration: 88113
loss: 1.0235300064086914,grad_norm: 0.9999995081712332, iteration: 88114
loss: 1.0620390176773071,grad_norm: 0.893106628595536, iteration: 88115
loss: 1.0500693321228027,grad_norm: 0.9999997607382581, iteration: 88116
loss: 1.0572248697280884,grad_norm: 0.999999811006655, iteration: 88117
loss: 1.1233940124511719,grad_norm: 0.9999998780926078, iteration: 88118
loss: 1.1599340438842773,grad_norm: 0.9999997225648052, iteration: 88119
loss: 1.240494966506958,grad_norm: 0.9999999770215864, iteration: 88120
loss: 1.0754178762435913,grad_norm: 0.9999994516095405, iteration: 88121
loss: 1.0607786178588867,grad_norm: 0.9999992704534061, iteration: 88122
loss: 1.1135945320129395,grad_norm: 0.9999994819045341, iteration: 88123
loss: 1.3531169891357422,grad_norm: 0.9999999690219685, iteration: 88124
loss: 1.001967430114746,grad_norm: 0.9999996709772149, iteration: 88125
loss: 1.119645357131958,grad_norm: 0.9687702202299809, iteration: 88126
loss: 1.0483036041259766,grad_norm: 0.9999990477993037, iteration: 88127
loss: 1.137068748474121,grad_norm: 0.9999999814770887, iteration: 88128
loss: 1.227003812789917,grad_norm: 0.9999998699919546, iteration: 88129
loss: 1.0087103843688965,grad_norm: 0.9999991287642059, iteration: 88130
loss: 1.041805624961853,grad_norm: 0.9999993969474826, iteration: 88131
loss: 1.039669156074524,grad_norm: 0.8273277780468914, iteration: 88132
loss: 1.0125545263290405,grad_norm: 0.9333336489711356, iteration: 88133
loss: 1.054534912109375,grad_norm: 0.9999990319373514, iteration: 88134
loss: 0.9988521337509155,grad_norm: 0.9999990387456953, iteration: 88135
loss: 0.9808862805366516,grad_norm: 0.999999572100403, iteration: 88136
loss: 1.108962893486023,grad_norm: 0.9999999783320289, iteration: 88137
loss: 1.0306533575057983,grad_norm: 0.9999996615688421, iteration: 88138
loss: 1.0330109596252441,grad_norm: 0.9999998349996411, iteration: 88139
loss: 1.0072904825210571,grad_norm: 0.9999995354038165, iteration: 88140
loss: 1.0239580869674683,grad_norm: 0.999999635541966, iteration: 88141
loss: 1.0720423460006714,grad_norm: 0.9999998219713052, iteration: 88142
loss: 1.0927138328552246,grad_norm: 0.9999997895410935, iteration: 88143
loss: 1.0254249572753906,grad_norm: 0.9447173768561521, iteration: 88144
loss: 1.0195560455322266,grad_norm: 0.8739928813590887, iteration: 88145
loss: 1.0443446636199951,grad_norm: 0.9947224386552488, iteration: 88146
loss: 0.9754062294960022,grad_norm: 0.81656969572514, iteration: 88147
loss: 1.0109270811080933,grad_norm: 0.993213687482484, iteration: 88148
loss: 0.9706732630729675,grad_norm: 0.9999997487748011, iteration: 88149
loss: 1.0894354581832886,grad_norm: 0.9999993733374906, iteration: 88150
loss: 1.0609972476959229,grad_norm: 0.999999049950893, iteration: 88151
loss: 1.0835145711898804,grad_norm: 0.9999992609780938, iteration: 88152
loss: 0.9946582317352295,grad_norm: 0.9999991427753896, iteration: 88153
loss: 1.0752075910568237,grad_norm: 0.9999990958461432, iteration: 88154
loss: 1.0132570266723633,grad_norm: 0.9999995433551611, iteration: 88155
loss: 1.1018636226654053,grad_norm: 0.9999995280130283, iteration: 88156
loss: 1.0621570348739624,grad_norm: 0.9999991063715311, iteration: 88157
loss: 1.058556079864502,grad_norm: 0.9999992400576929, iteration: 88158
loss: 0.9937348365783691,grad_norm: 0.942705715000582, iteration: 88159
loss: 1.0183436870574951,grad_norm: 0.999999034673708, iteration: 88160
loss: 1.1011278629302979,grad_norm: 0.9999990952742053, iteration: 88161
loss: 1.0608391761779785,grad_norm: 0.9421885815529988, iteration: 88162
loss: 1.0257402658462524,grad_norm: 0.9235755543813253, iteration: 88163
loss: 1.001981496810913,grad_norm: 0.960608590040385, iteration: 88164
loss: 1.0794750452041626,grad_norm: 0.9999996818081834, iteration: 88165
loss: 1.01774001121521,grad_norm: 0.99999976439909, iteration: 88166
loss: 1.0425862073898315,grad_norm: 0.8521339373343484, iteration: 88167
loss: 1.0126099586486816,grad_norm: 0.9277451689487839, iteration: 88168
loss: 1.0634151697158813,grad_norm: 0.9999992771838735, iteration: 88169
loss: 1.025412678718567,grad_norm: 0.9999990490966711, iteration: 88170
loss: 1.0358078479766846,grad_norm: 0.9534049820719542, iteration: 88171
loss: 1.0223534107208252,grad_norm: 0.9518848080123233, iteration: 88172
loss: 1.0293785333633423,grad_norm: 0.9438646438008145, iteration: 88173
loss: 1.0018126964569092,grad_norm: 0.8907739096199465, iteration: 88174
loss: 1.0956313610076904,grad_norm: 0.9999997546250095, iteration: 88175
loss: 1.1090214252471924,grad_norm: 0.9999999151430627, iteration: 88176
loss: 0.9985004663467407,grad_norm: 0.9999990628920732, iteration: 88177
loss: 1.0467201471328735,grad_norm: 0.9999991196760227, iteration: 88178
loss: 1.0879371166229248,grad_norm: 0.9999996157249739, iteration: 88179
loss: 1.050605297088623,grad_norm: 0.9999991728584019, iteration: 88180
loss: 1.0000532865524292,grad_norm: 0.9999994605259259, iteration: 88181
loss: 0.9989600777626038,grad_norm: 0.9604481067780261, iteration: 88182
loss: 1.0054467916488647,grad_norm: 0.999999958285518, iteration: 88183
loss: 1.0347260236740112,grad_norm: 0.9999997287756943, iteration: 88184
loss: 1.0253537893295288,grad_norm: 0.9534288823118549, iteration: 88185
loss: 1.0567327737808228,grad_norm: 0.9999997161988763, iteration: 88186
loss: 1.0186264514923096,grad_norm: 0.9867978631156834, iteration: 88187
loss: 1.0184694528579712,grad_norm: 0.9999990639261145, iteration: 88188
loss: 1.0312403440475464,grad_norm: 0.9819541762359167, iteration: 88189
loss: 1.1042577028274536,grad_norm: 0.7895397391594859, iteration: 88190
loss: 1.1068118810653687,grad_norm: 0.9999996177402153, iteration: 88191
loss: 1.1363930702209473,grad_norm: 0.9999998911038872, iteration: 88192
loss: 0.9976392388343811,grad_norm: 0.968418907542565, iteration: 88193
loss: 1.0363482236862183,grad_norm: 0.9999990921369725, iteration: 88194
loss: 1.0179363489151,grad_norm: 0.9398871294545743, iteration: 88195
loss: 1.1009238958358765,grad_norm: 0.9999994371557345, iteration: 88196
loss: 1.059273600578308,grad_norm: 0.9999993676965278, iteration: 88197
loss: 1.0434942245483398,grad_norm: 0.999999596591703, iteration: 88198
loss: 1.046476125717163,grad_norm: 0.9999998510539018, iteration: 88199
loss: 1.0762611627578735,grad_norm: 0.9999994624932382, iteration: 88200
loss: 1.1016368865966797,grad_norm: 1.000000069346224, iteration: 88201
loss: 1.2487621307373047,grad_norm: 0.9999998152133168, iteration: 88202
loss: 0.993598461151123,grad_norm: 0.9999998423166454, iteration: 88203
loss: 1.0211787223815918,grad_norm: 0.9999989648406469, iteration: 88204
loss: 1.1797740459442139,grad_norm: 0.99910905675439, iteration: 88205
loss: 1.10154128074646,grad_norm: 0.9999999798151314, iteration: 88206
loss: 0.9876332879066467,grad_norm: 0.999999930655241, iteration: 88207
loss: 1.022038459777832,grad_norm: 0.860157265761765, iteration: 88208
loss: 1.0561162233352661,grad_norm: 0.99999906424227, iteration: 88209
loss: 1.1217972040176392,grad_norm: 0.9999997126090804, iteration: 88210
loss: 1.08418607711792,grad_norm: 0.9145524734763641, iteration: 88211
loss: 1.2680588960647583,grad_norm: 0.9999994833463658, iteration: 88212
loss: 1.056420087814331,grad_norm: 0.8472745122988212, iteration: 88213
loss: 1.0350075960159302,grad_norm: 0.9689341219739177, iteration: 88214
loss: 1.0786253213882446,grad_norm: 0.9999990866764246, iteration: 88215
loss: 1.04618239402771,grad_norm: 0.9999991922050332, iteration: 88216
loss: 0.9993004202842712,grad_norm: 0.9256807107578856, iteration: 88217
loss: 1.0681407451629639,grad_norm: 0.9999994656409064, iteration: 88218
loss: 1.104702115058899,grad_norm: 0.9999995110766294, iteration: 88219
loss: 1.0028470754623413,grad_norm: 0.9999992194422394, iteration: 88220
loss: 1.0727050304412842,grad_norm: 0.9999998515947764, iteration: 88221
loss: 1.1047537326812744,grad_norm: 0.9976522811764038, iteration: 88222
loss: 1.1423290967941284,grad_norm: 0.9999998227153436, iteration: 88223
loss: 1.0325783491134644,grad_norm: 0.9999992540106868, iteration: 88224
loss: 1.0263375043869019,grad_norm: 0.9999995607379126, iteration: 88225
loss: 1.0302640199661255,grad_norm: 0.888469453415721, iteration: 88226
loss: 1.1174802780151367,grad_norm: 0.9999991467798773, iteration: 88227
loss: 1.014198660850525,grad_norm: 0.9999989824552862, iteration: 88228
loss: 1.0401068925857544,grad_norm: 0.9999999155146038, iteration: 88229
loss: 1.0535807609558105,grad_norm: 0.999999117557205, iteration: 88230
loss: 1.1239264011383057,grad_norm: 0.9999993909351661, iteration: 88231
loss: 1.015296220779419,grad_norm: 0.917299034956942, iteration: 88232
loss: 1.0176290273666382,grad_norm: 0.9999996966545458, iteration: 88233
loss: 1.0138858556747437,grad_norm: 0.9999990621704675, iteration: 88234
loss: 1.0596617460250854,grad_norm: 0.9876379982538713, iteration: 88235
loss: 1.1791977882385254,grad_norm: 0.999999072120802, iteration: 88236
loss: 1.0040898323059082,grad_norm: 0.9999991039571798, iteration: 88237
loss: 1.1273481845855713,grad_norm: 0.9999999166301645, iteration: 88238
loss: 1.192162036895752,grad_norm: 0.9999997205497877, iteration: 88239
loss: 1.130149483680725,grad_norm: 0.999999638374073, iteration: 88240
loss: 1.19926118850708,grad_norm: 0.9999995197342221, iteration: 88241
loss: 1.0079275369644165,grad_norm: 0.920065043527758, iteration: 88242
loss: 1.11372709274292,grad_norm: 0.99999949464914, iteration: 88243
loss: 1.024537205696106,grad_norm: 0.9999991324035682, iteration: 88244
loss: 1.1660192012786865,grad_norm: 0.9999995986741732, iteration: 88245
loss: 1.059471845626831,grad_norm: 0.9598833136686122, iteration: 88246
loss: 1.0453777313232422,grad_norm: 0.9304623753093689, iteration: 88247
loss: 1.0675827264785767,grad_norm: 0.9327665786939879, iteration: 88248
loss: 1.2303625345230103,grad_norm: 0.9999999121866655, iteration: 88249
loss: 1.1108781099319458,grad_norm: 0.9999997091896538, iteration: 88250
loss: 1.0264506340026855,grad_norm: 0.9509101799260432, iteration: 88251
loss: 1.0412918329238892,grad_norm: 1.000000040713543, iteration: 88252
loss: 1.0313622951507568,grad_norm: 0.9999995149561779, iteration: 88253
loss: 1.0060385465621948,grad_norm: 0.9999997345604131, iteration: 88254
loss: 1.0066148042678833,grad_norm: 0.9657459402306212, iteration: 88255
loss: 1.017789363861084,grad_norm: 0.9999999204523289, iteration: 88256
loss: 0.9665372967720032,grad_norm: 0.9999994039396742, iteration: 88257
loss: 1.0843733549118042,grad_norm: 0.9999992839486995, iteration: 88258
loss: 1.0107277631759644,grad_norm: 0.8972398256241981, iteration: 88259
loss: 1.2042388916015625,grad_norm: 0.9999998997676329, iteration: 88260
loss: 1.0718107223510742,grad_norm: 0.9999994398183096, iteration: 88261
loss: 1.008545994758606,grad_norm: 0.9999993411579218, iteration: 88262
loss: 1.0444841384887695,grad_norm: 0.9335609235276506, iteration: 88263
loss: 1.0202783346176147,grad_norm: 0.880716706774071, iteration: 88264
loss: 1.003267765045166,grad_norm: 0.8904997493088203, iteration: 88265
loss: 1.0189542770385742,grad_norm: 0.9999992022580146, iteration: 88266
loss: 0.9830606579780579,grad_norm: 0.9542215407111759, iteration: 88267
loss: 1.0045326948165894,grad_norm: 0.9999990015678197, iteration: 88268
loss: 1.1309858560562134,grad_norm: 0.9870599617543012, iteration: 88269
loss: 1.081811785697937,grad_norm: 0.9999992879275109, iteration: 88270
loss: 1.190534234046936,grad_norm: 0.9999995685624404, iteration: 88271
loss: 1.221875548362732,grad_norm: 0.9999991610952701, iteration: 88272
loss: 1.168130874633789,grad_norm: 0.9999999959278364, iteration: 88273
loss: 1.0076302289962769,grad_norm: 0.9999997542798613, iteration: 88274
loss: 1.0047277212142944,grad_norm: 0.9999998404734658, iteration: 88275
loss: 1.1794328689575195,grad_norm: 0.9999998993297465, iteration: 88276
loss: 1.0085887908935547,grad_norm: 0.9999991265201054, iteration: 88277
loss: 1.119663953781128,grad_norm: 0.9999996968035028, iteration: 88278
loss: 0.9734223484992981,grad_norm: 0.8445891575547277, iteration: 88279
loss: 0.9773591756820679,grad_norm: 0.9999996757229347, iteration: 88280
loss: 1.0026557445526123,grad_norm: 0.7828169519784908, iteration: 88281
loss: 1.018489122390747,grad_norm: 0.9999994431822727, iteration: 88282
loss: 1.0723199844360352,grad_norm: 0.952206848868726, iteration: 88283
loss: 1.1991876363754272,grad_norm: 0.9999993733445548, iteration: 88284
loss: 1.154672622680664,grad_norm: 0.9999996863102933, iteration: 88285
loss: 1.03766930103302,grad_norm: 0.8945741261995034, iteration: 88286
loss: 1.3885053396224976,grad_norm: 0.999999566353506, iteration: 88287
loss: 1.2526026964187622,grad_norm: 0.9999999510443377, iteration: 88288
loss: 1.057653784751892,grad_norm: 0.9999990193624421, iteration: 88289
loss: 1.0657702684402466,grad_norm: 0.9999993757893378, iteration: 88290
loss: 1.0576422214508057,grad_norm: 0.9999991237517475, iteration: 88291
loss: 1.148702621459961,grad_norm: 0.9999994032706501, iteration: 88292
loss: 1.3413183689117432,grad_norm: 0.9999994518575689, iteration: 88293
loss: 1.1317145824432373,grad_norm: 0.9999994249425972, iteration: 88294
loss: 1.2034623622894287,grad_norm: 0.9999993899471501, iteration: 88295
loss: 1.2105625867843628,grad_norm: 0.9999993786500296, iteration: 88296
loss: 1.0469577312469482,grad_norm: 0.999999303638867, iteration: 88297
loss: 1.1229841709136963,grad_norm: 0.999999177134961, iteration: 88298
loss: 1.0456087589263916,grad_norm: 0.9999995309023916, iteration: 88299
loss: 1.0058491230010986,grad_norm: 0.8712139691202488, iteration: 88300
loss: 1.0839202404022217,grad_norm: 0.9999992004394239, iteration: 88301
loss: 1.0996599197387695,grad_norm: 0.9999989698125893, iteration: 88302
loss: 1.2171579599380493,grad_norm: 0.9999998183243424, iteration: 88303
loss: 1.0676910877227783,grad_norm: 0.980054660737017, iteration: 88304
loss: 1.0578182935714722,grad_norm: 0.9999990622019148, iteration: 88305
loss: 1.1984050273895264,grad_norm: 0.9999996487295886, iteration: 88306
loss: 1.0850149393081665,grad_norm: 0.9999994799073012, iteration: 88307
loss: 1.1450258493423462,grad_norm: 0.9999993456615978, iteration: 88308
loss: 1.1319386959075928,grad_norm: 0.9999998107637112, iteration: 88309
loss: 1.0766769647598267,grad_norm: 0.9999993820527925, iteration: 88310
loss: 0.9957996010780334,grad_norm: 0.9988984626140649, iteration: 88311
loss: 1.021583914756775,grad_norm: 0.9999994081726187, iteration: 88312
loss: 1.0647965669631958,grad_norm: 0.91295012236111, iteration: 88313
loss: 1.0709147453308105,grad_norm: 0.9999993304405974, iteration: 88314
loss: 1.0143463611602783,grad_norm: 0.7632167472765711, iteration: 88315
loss: 1.0764156579971313,grad_norm: 0.9999993484496328, iteration: 88316
loss: 1.166787028312683,grad_norm: 0.9999996000304648, iteration: 88317
loss: 1.1354860067367554,grad_norm: 0.999999824807001, iteration: 88318
loss: 1.0972574949264526,grad_norm: 0.9999999250663363, iteration: 88319
loss: 1.0765727758407593,grad_norm: 0.9999993121744727, iteration: 88320
loss: 1.0363309383392334,grad_norm: 0.999999193201346, iteration: 88321
loss: 1.294823408126831,grad_norm: 0.9999999521052914, iteration: 88322
loss: 1.2650014162063599,grad_norm: 0.9999996805913167, iteration: 88323
loss: 1.063213586807251,grad_norm: 0.8181669112771369, iteration: 88324
loss: 1.0218616724014282,grad_norm: 0.9999991110328794, iteration: 88325
loss: 1.0872437953948975,grad_norm: 0.9999994107333952, iteration: 88326
loss: 1.0084878206253052,grad_norm: 0.999999201245005, iteration: 88327
loss: 1.127960443496704,grad_norm: 0.9999997436185536, iteration: 88328
loss: 1.0706804990768433,grad_norm: 0.9999994691365646, iteration: 88329
loss: 1.0029832124710083,grad_norm: 0.9999992406268563, iteration: 88330
loss: 1.0076043605804443,grad_norm: 0.9999991985163728, iteration: 88331
loss: 1.239987850189209,grad_norm: 0.9999996877285894, iteration: 88332
loss: 1.3234350681304932,grad_norm: 0.9999997130693965, iteration: 88333
loss: 1.0492018461227417,grad_norm: 0.8889161628128615, iteration: 88334
loss: 1.0549829006195068,grad_norm: 0.9999992770506996, iteration: 88335
loss: 1.003227949142456,grad_norm: 0.9999991586014436, iteration: 88336
loss: 0.9778043627738953,grad_norm: 0.9918489669061503, iteration: 88337
loss: 1.064111351966858,grad_norm: 0.972203054833854, iteration: 88338
loss: 1.0804877281188965,grad_norm: 0.9999998564070508, iteration: 88339
loss: 1.0474907159805298,grad_norm: 0.9999992247150684, iteration: 88340
loss: 1.2991737127304077,grad_norm: 0.9999998163803124, iteration: 88341
loss: 1.1215451955795288,grad_norm: 0.9999997533403553, iteration: 88342
loss: 1.066957712173462,grad_norm: 0.9999993451763428, iteration: 88343
loss: 1.0607898235321045,grad_norm: 0.9999996638844587, iteration: 88344
loss: 1.1088950634002686,grad_norm: 0.999999904662885, iteration: 88345
loss: 1.0179063081741333,grad_norm: 0.9999992242519771, iteration: 88346
loss: 1.2518517971038818,grad_norm: 0.9999998389040348, iteration: 88347
loss: 1.00477933883667,grad_norm: 0.9999990631869459, iteration: 88348
loss: 1.1159714460372925,grad_norm: 0.9999996624668467, iteration: 88349
loss: 1.0483648777008057,grad_norm: 0.9999994434331981, iteration: 88350
loss: 1.1392930746078491,grad_norm: 0.9999994770520286, iteration: 88351
loss: 1.1526691913604736,grad_norm: 0.9999995358135059, iteration: 88352
loss: 1.0039669275283813,grad_norm: 0.9556495010233165, iteration: 88353
loss: 1.0159565210342407,grad_norm: 0.9999991623476108, iteration: 88354
loss: 1.0789508819580078,grad_norm: 0.9999992464736053, iteration: 88355
loss: 1.1364480257034302,grad_norm: 0.9999997863720509, iteration: 88356
loss: 1.0517780780792236,grad_norm: 0.9413955624489226, iteration: 88357
loss: 1.1687791347503662,grad_norm: 0.9999997577809998, iteration: 88358
loss: 1.0140666961669922,grad_norm: 0.9999992189790629, iteration: 88359
loss: 1.2152155637741089,grad_norm: 0.9999995878050392, iteration: 88360
loss: 1.0429725646972656,grad_norm: 0.9466650956501216, iteration: 88361
loss: 1.107721209526062,grad_norm: 0.9999999539690569, iteration: 88362
loss: 1.4533463716506958,grad_norm: 0.9999998631595884, iteration: 88363
loss: 0.9937018752098083,grad_norm: 0.9999993644915766, iteration: 88364
loss: 0.9837290644645691,grad_norm: 0.848442169277019, iteration: 88365
loss: 1.1899621486663818,grad_norm: 0.9999995203023558, iteration: 88366
loss: 1.1250313520431519,grad_norm: 0.9999997004978628, iteration: 88367
loss: 1.0791569948196411,grad_norm: 0.9999994930985538, iteration: 88368
loss: 1.071893334388733,grad_norm: 0.9999994981726639, iteration: 88369
loss: 1.0662498474121094,grad_norm: 0.9999991504655282, iteration: 88370
loss: 1.2275474071502686,grad_norm: 0.9999992796312185, iteration: 88371
loss: 1.027529001235962,grad_norm: 0.9999990298848095, iteration: 88372
loss: 1.0237795114517212,grad_norm: 0.9999993238471439, iteration: 88373
loss: 1.2184033393859863,grad_norm: 0.99999990044287, iteration: 88374
loss: 1.0867300033569336,grad_norm: 0.9999993683816885, iteration: 88375
loss: 1.0682182312011719,grad_norm: 0.9999994544360482, iteration: 88376
loss: 1.0431571006774902,grad_norm: 0.9999993138302403, iteration: 88377
loss: 1.342689037322998,grad_norm: 0.999999829779871, iteration: 88378
loss: 1.1060456037521362,grad_norm: 0.9999992501336568, iteration: 88379
loss: 1.1234383583068848,grad_norm: 0.999999251915913, iteration: 88380
loss: 1.0088207721710205,grad_norm: 0.9343006035250085, iteration: 88381
loss: 1.0330899953842163,grad_norm: 0.9999993343621769, iteration: 88382
loss: 1.3764053583145142,grad_norm: 0.9999997428296674, iteration: 88383
loss: 0.9722658395767212,grad_norm: 0.999999243513855, iteration: 88384
loss: 1.0018188953399658,grad_norm: 0.9999996555086579, iteration: 88385
loss: 1.0959482192993164,grad_norm: 0.9999995690993043, iteration: 88386
loss: 1.1164876222610474,grad_norm: 0.9999992065734963, iteration: 88387
loss: 0.995862603187561,grad_norm: 0.9999991702659355, iteration: 88388
loss: 1.22171950340271,grad_norm: 0.9999999293617677, iteration: 88389
loss: 0.961185872554779,grad_norm: 0.9403196112920719, iteration: 88390
loss: 1.1039657592773438,grad_norm: 0.9999998362098373, iteration: 88391
loss: 1.0739177465438843,grad_norm: 0.9999992100212848, iteration: 88392
loss: 1.0027118921279907,grad_norm: 0.9999993626787753, iteration: 88393
loss: 1.088157296180725,grad_norm: 0.9999993718414515, iteration: 88394
loss: 1.0936006307601929,grad_norm: 0.9999998067622957, iteration: 88395
loss: 1.1855272054672241,grad_norm: 0.9999997366870279, iteration: 88396
loss: 1.0520192384719849,grad_norm: 0.9999991754713653, iteration: 88397
loss: 1.1783777475357056,grad_norm: 0.9999996981852511, iteration: 88398
loss: 0.994495153427124,grad_norm: 0.999999079495142, iteration: 88399
loss: 1.077576994895935,grad_norm: 0.9999998194303251, iteration: 88400
loss: 1.062633991241455,grad_norm: 0.9999995009018029, iteration: 88401
loss: 1.080665111541748,grad_norm: 0.9999999043469657, iteration: 88402
loss: 1.042635440826416,grad_norm: 0.9999999708497637, iteration: 88403
loss: 1.0617761611938477,grad_norm: 0.9999992609518412, iteration: 88404
loss: 1.1809422969818115,grad_norm: 0.9999991881218189, iteration: 88405
loss: 0.9939975142478943,grad_norm: 0.966019774416459, iteration: 88406
loss: 1.123766541481018,grad_norm: 0.9999991492036017, iteration: 88407
loss: 1.1782782077789307,grad_norm: 0.9999999816530748, iteration: 88408
loss: 1.0911304950714111,grad_norm: 0.999999095662711, iteration: 88409
loss: 1.0438776016235352,grad_norm: 0.9999997127334267, iteration: 88410
loss: 1.051358699798584,grad_norm: 0.9999991821741193, iteration: 88411
loss: 1.0025956630706787,grad_norm: 0.9999994490161674, iteration: 88412
loss: 1.3110761642456055,grad_norm: 0.9999992254678896, iteration: 88413
loss: 0.9965429902076721,grad_norm: 0.8648598420406931, iteration: 88414
loss: 0.9710457921028137,grad_norm: 0.9999991777431163, iteration: 88415
loss: 1.1037416458129883,grad_norm: 0.9999997235835676, iteration: 88416
loss: 0.9883169531822205,grad_norm: 0.8892699508588122, iteration: 88417
loss: 1.0448931455612183,grad_norm: 0.9992679302942503, iteration: 88418
loss: 1.1190788745880127,grad_norm: 0.9999998313158962, iteration: 88419
loss: 1.030346393585205,grad_norm: 0.9999992325719688, iteration: 88420
loss: 1.0868337154388428,grad_norm: 0.9999990800166006, iteration: 88421
loss: 1.074068546295166,grad_norm: 0.9999999280436501, iteration: 88422
loss: 1.0383520126342773,grad_norm: 0.9999993239812297, iteration: 88423
loss: 1.0030940771102905,grad_norm: 0.790722286151674, iteration: 88424
loss: 1.0593290328979492,grad_norm: 0.9999994713325472, iteration: 88425
loss: 1.1468396186828613,grad_norm: 0.9999995352800236, iteration: 88426
loss: 0.9998701214790344,grad_norm: 0.9999993720298236, iteration: 88427
loss: 1.0855447053909302,grad_norm: 0.9999992766574005, iteration: 88428
loss: 1.039976954460144,grad_norm: 0.9999994119316756, iteration: 88429
loss: 0.9996114373207092,grad_norm: 0.8674082729513681, iteration: 88430
loss: 1.0529950857162476,grad_norm: 0.9999995597967634, iteration: 88431
loss: 1.0328242778778076,grad_norm: 0.9999996032228464, iteration: 88432
loss: 0.972123384475708,grad_norm: 0.9999990842380371, iteration: 88433
loss: 1.0373072624206543,grad_norm: 0.9999998773564517, iteration: 88434
loss: 0.9945213198661804,grad_norm: 0.9999995819710584, iteration: 88435
loss: 1.1117268800735474,grad_norm: 0.9999996611634873, iteration: 88436
loss: 1.0743029117584229,grad_norm: 0.999999581471315, iteration: 88437
loss: 1.0524259805679321,grad_norm: 0.9999999005234519, iteration: 88438
loss: 1.0440226793289185,grad_norm: 0.9999995824454998, iteration: 88439
loss: 1.0031499862670898,grad_norm: 0.9999990888815428, iteration: 88440
loss: 0.9955145120620728,grad_norm: 0.8572087765823168, iteration: 88441
loss: 1.1268452405929565,grad_norm: 0.9999998011431704, iteration: 88442
loss: 1.034374475479126,grad_norm: 0.9999995969182665, iteration: 88443
loss: 1.123281717300415,grad_norm: 0.9999999005405853, iteration: 88444
loss: 1.0428334474563599,grad_norm: 0.999999099610748, iteration: 88445
loss: 0.9877234101295471,grad_norm: 0.9775716296595469, iteration: 88446
loss: 1.008621096611023,grad_norm: 0.9999994155402725, iteration: 88447
loss: 1.0903573036193848,grad_norm: 0.9999996463753268, iteration: 88448
loss: 1.0406324863433838,grad_norm: 0.9999999867085929, iteration: 88449
loss: 1.1322308778762817,grad_norm: 0.9999998427602917, iteration: 88450
loss: 1.091054916381836,grad_norm: 0.9999998149827366, iteration: 88451
loss: 0.9942235350608826,grad_norm: 0.9999994207848945, iteration: 88452
loss: 1.035581111907959,grad_norm: 0.9999990859948305, iteration: 88453
loss: 1.040769338607788,grad_norm: 0.999999996920376, iteration: 88454
loss: 1.1612972021102905,grad_norm: 0.9999999369960176, iteration: 88455
loss: 1.0192264318466187,grad_norm: 0.8924905906272276, iteration: 88456
loss: 1.0744545459747314,grad_norm: 0.9398760943515867, iteration: 88457
loss: 0.9945220351219177,grad_norm: 0.9999996041843481, iteration: 88458
loss: 1.0684913396835327,grad_norm: 0.9999997611186978, iteration: 88459
loss: 0.9789784550666809,grad_norm: 0.7809959914131868, iteration: 88460
loss: 1.041285514831543,grad_norm: 0.9999992589434366, iteration: 88461
loss: 1.1563011407852173,grad_norm: 0.9999991948908704, iteration: 88462
loss: 1.033983588218689,grad_norm: 0.9999996333053481, iteration: 88463
loss: 1.0949145555496216,grad_norm: 0.9999998602553675, iteration: 88464
loss: 1.1286015510559082,grad_norm: 1.0000000726717517, iteration: 88465
loss: 1.1874886751174927,grad_norm: 0.999999712814848, iteration: 88466
loss: 1.0643835067749023,grad_norm: 0.9999995654588902, iteration: 88467
loss: 1.0642918348312378,grad_norm: 0.9999994184937218, iteration: 88468
loss: 1.218775749206543,grad_norm: 0.9999996971099671, iteration: 88469
loss: 1.0000804662704468,grad_norm: 0.9999991332535686, iteration: 88470
loss: 1.0072071552276611,grad_norm: 0.99999932950012, iteration: 88471
loss: 1.1043946743011475,grad_norm: 0.9999996953643125, iteration: 88472
loss: 1.0204180479049683,grad_norm: 0.9999998799546608, iteration: 88473
loss: 1.0102052688598633,grad_norm: 0.999999048287899, iteration: 88474
loss: 1.0609097480773926,grad_norm: 0.9999998886845589, iteration: 88475
loss: 1.2015712261199951,grad_norm: 0.9999996652241882, iteration: 88476
loss: 1.1146830320358276,grad_norm: 0.9999998361140582, iteration: 88477
loss: 1.0111385583877563,grad_norm: 0.9999991276079563, iteration: 88478
loss: 1.004626750946045,grad_norm: 0.7510544905303956, iteration: 88479
loss: 0.9976920485496521,grad_norm: 0.9999995851074484, iteration: 88480
loss: 1.1919081211090088,grad_norm: 0.9999998972840946, iteration: 88481
loss: 0.9859887361526489,grad_norm: 0.9999990927267627, iteration: 88482
loss: 1.0753871202468872,grad_norm: 0.9999995510811397, iteration: 88483
loss: 1.15653395652771,grad_norm: 0.9999993974375032, iteration: 88484
loss: 1.0146863460540771,grad_norm: 0.9999990914367037, iteration: 88485
loss: 1.1417418718338013,grad_norm: 0.9999998382656473, iteration: 88486
loss: 1.0766760110855103,grad_norm: 0.9833926805325897, iteration: 88487
loss: 1.2023757696151733,grad_norm: 0.999999161718146, iteration: 88488
loss: 1.066843032836914,grad_norm: 0.9999997599781476, iteration: 88489
loss: 1.037539005279541,grad_norm: 0.9999999746830386, iteration: 88490
loss: 1.1256645917892456,grad_norm: 0.9999997402654006, iteration: 88491
loss: 1.070898413658142,grad_norm: 0.9999992710396782, iteration: 88492
loss: 1.089550495147705,grad_norm: 0.9999998881911423, iteration: 88493
loss: 1.157821536064148,grad_norm: 0.9999998649614368, iteration: 88494
loss: 1.0072157382965088,grad_norm: 0.9999998153962653, iteration: 88495
loss: 1.0856082439422607,grad_norm: 0.9999990673769824, iteration: 88496
loss: 1.0990246534347534,grad_norm: 0.9999998324683685, iteration: 88497
loss: 1.0248132944107056,grad_norm: 0.9999993146555767, iteration: 88498
loss: 1.0815808773040771,grad_norm: 0.9999999311323249, iteration: 88499
loss: 1.0915037393569946,grad_norm: 0.9999995165265061, iteration: 88500
loss: 1.2380915880203247,grad_norm: 0.9999998511060463, iteration: 88501
loss: 1.0769577026367188,grad_norm: 0.9999992301468732, iteration: 88502
loss: 1.0853734016418457,grad_norm: 0.9999990973874788, iteration: 88503
loss: 1.176217794418335,grad_norm: 0.9999998342553378, iteration: 88504
loss: 1.058957576751709,grad_norm: 0.9999999542560659, iteration: 88505
loss: 1.0899125337600708,grad_norm: 0.9783142957795449, iteration: 88506
loss: 1.0806570053100586,grad_norm: 0.9999996292214633, iteration: 88507
loss: 1.188036322593689,grad_norm: 0.9999993198831361, iteration: 88508
loss: 1.0973198413848877,grad_norm: 0.999999683414507, iteration: 88509
loss: 1.4230825901031494,grad_norm: 0.9999999466705578, iteration: 88510
loss: 1.0559463500976562,grad_norm: 0.9999996051388914, iteration: 88511
loss: 1.0186588764190674,grad_norm: 0.9999995552258215, iteration: 88512
loss: 1.0415794849395752,grad_norm: 0.9446312950742342, iteration: 88513
loss: 0.9928680062294006,grad_norm: 0.9999994614246757, iteration: 88514
loss: 1.1033893823623657,grad_norm: 0.9999997369918033, iteration: 88515
loss: 1.0475775003433228,grad_norm: 0.9999990053590608, iteration: 88516
loss: 1.108752965927124,grad_norm: 0.999999497469017, iteration: 88517
loss: 1.016944169998169,grad_norm: 0.9999992547091002, iteration: 88518
loss: 1.2362853288650513,grad_norm: 0.9999996957399911, iteration: 88519
loss: 1.1005045175552368,grad_norm: 0.9999993592763433, iteration: 88520
loss: 1.1131423711776733,grad_norm: 0.9999993162889796, iteration: 88521
loss: 1.0747634172439575,grad_norm: 0.9999991592482997, iteration: 88522
loss: 1.0578585863113403,grad_norm: 0.999999153373331, iteration: 88523
loss: 1.0691572427749634,grad_norm: 0.9999997855535584, iteration: 88524
loss: 1.0273244380950928,grad_norm: 0.9999992161756421, iteration: 88525
loss: 1.1673399209976196,grad_norm: 0.9999999367827053, iteration: 88526
loss: 1.144503116607666,grad_norm: 0.9999992018965059, iteration: 88527
loss: 1.1658707857131958,grad_norm: 0.9999998397552228, iteration: 88528
loss: 1.085127353668213,grad_norm: 0.9999996359154805, iteration: 88529
loss: 1.1597533226013184,grad_norm: 0.9999999203328814, iteration: 88530
loss: 1.1037441492080688,grad_norm: 0.9999999558510537, iteration: 88531
loss: 1.0135091543197632,grad_norm: 0.9999992310797887, iteration: 88532
loss: 1.1628376245498657,grad_norm: 0.9999992590570539, iteration: 88533
loss: 1.057419776916504,grad_norm: 0.9999991671589689, iteration: 88534
loss: 1.0869518518447876,grad_norm: 0.9999992901184156, iteration: 88535
loss: 1.0081626176834106,grad_norm: 0.970063235227137, iteration: 88536
loss: 1.007150650024414,grad_norm: 0.99999935986024, iteration: 88537
loss: 1.0114046335220337,grad_norm: 0.9999997152105671, iteration: 88538
loss: 1.0472509860992432,grad_norm: 0.9999997829754135, iteration: 88539
loss: 1.0640467405319214,grad_norm: 0.9999997547238773, iteration: 88540
loss: 1.1486425399780273,grad_norm: 0.9999999444734828, iteration: 88541
loss: 1.2701743841171265,grad_norm: 0.9999999832044583, iteration: 88542
loss: 1.1093926429748535,grad_norm: 0.9999994852782549, iteration: 88543
loss: 1.0166348218917847,grad_norm: 0.9753814017717766, iteration: 88544
loss: 1.0638130903244019,grad_norm: 0.9999992188323655, iteration: 88545
loss: 1.038678765296936,grad_norm: 0.9999991203303589, iteration: 88546
loss: 0.9966148734092712,grad_norm: 0.999999145076032, iteration: 88547
loss: 1.0535781383514404,grad_norm: 0.9999997270525496, iteration: 88548
loss: 1.0022932291030884,grad_norm: 0.9999998405216265, iteration: 88549
loss: 1.0161309242248535,grad_norm: 0.9999990533622165, iteration: 88550
loss: 1.0577185153961182,grad_norm: 0.9142129228011364, iteration: 88551
loss: 1.0139594078063965,grad_norm: 0.8917246666627828, iteration: 88552
loss: 1.047654390335083,grad_norm: 0.9999996692786682, iteration: 88553
loss: 1.2620909214019775,grad_norm: 0.9999998431011666, iteration: 88554
loss: 0.9925349354743958,grad_norm: 0.8792025409006676, iteration: 88555
loss: 1.0678740739822388,grad_norm: 0.9999993888054203, iteration: 88556
loss: 1.0905134677886963,grad_norm: 0.9999990384311311, iteration: 88557
loss: 1.2112371921539307,grad_norm: 0.9999997703648829, iteration: 88558
loss: 1.0614981651306152,grad_norm: 0.9999993735154064, iteration: 88559
loss: 1.1274912357330322,grad_norm: 0.9999996007049005, iteration: 88560
loss: 1.0325373411178589,grad_norm: 0.9999991787677588, iteration: 88561
loss: 1.0172815322875977,grad_norm: 0.9999994673169547, iteration: 88562
loss: 1.0463378429412842,grad_norm: 0.9999992095788046, iteration: 88563
loss: 1.1022858619689941,grad_norm: 0.9696626485184658, iteration: 88564
loss: 0.9926174879074097,grad_norm: 0.9700310373651074, iteration: 88565
loss: 1.10061776638031,grad_norm: 0.99999935834077, iteration: 88566
loss: 1.1330814361572266,grad_norm: 0.9999996096059119, iteration: 88567
loss: 1.094840407371521,grad_norm: 0.9999996361150173, iteration: 88568
loss: 1.063020944595337,grad_norm: 0.9915403833860675, iteration: 88569
loss: 1.1483837366104126,grad_norm: 0.9999992071814676, iteration: 88570
loss: 1.1005042791366577,grad_norm: 0.9999999622369188, iteration: 88571
loss: 1.0615626573562622,grad_norm: 0.9999993589629033, iteration: 88572
loss: 1.050083041191101,grad_norm: 0.9999993274953494, iteration: 88573
loss: 1.05649995803833,grad_norm: 0.9999994709427908, iteration: 88574
loss: 1.0986673831939697,grad_norm: 0.9999996147747915, iteration: 88575
loss: 1.1024800539016724,grad_norm: 0.9999990140589143, iteration: 88576
loss: 1.0082569122314453,grad_norm: 0.8294673963322347, iteration: 88577
loss: 1.1123031377792358,grad_norm: 0.9999996037864383, iteration: 88578
loss: 1.0317509174346924,grad_norm: 0.9999992284343423, iteration: 88579
loss: 1.026497721672058,grad_norm: 0.9999993452631162, iteration: 88580
loss: 0.9832326769828796,grad_norm: 0.9999992137731923, iteration: 88581
loss: 1.0546468496322632,grad_norm: 0.829216343431852, iteration: 88582
loss: 1.0089823007583618,grad_norm: 1.0000000063460306, iteration: 88583
loss: 0.9804439544677734,grad_norm: 0.9108062395183851, iteration: 88584
loss: 1.0667275190353394,grad_norm: 0.9999996602262852, iteration: 88585
loss: 1.0587043762207031,grad_norm: 0.9999993612386553, iteration: 88586
loss: 1.0091944932937622,grad_norm: 0.9999990591621398, iteration: 88587
loss: 1.0209590196609497,grad_norm: 0.9999990605847507, iteration: 88588
loss: 1.0321869850158691,grad_norm: 0.9999991661626755, iteration: 88589
loss: 0.99143385887146,grad_norm: 0.9999993010021162, iteration: 88590
loss: 1.0656036138534546,grad_norm: 0.9999998033574263, iteration: 88591
loss: 1.0180665254592896,grad_norm: 0.9999991074931396, iteration: 88592
loss: 1.0979242324829102,grad_norm: 0.9999996949557243, iteration: 88593
loss: 1.0535622835159302,grad_norm: 0.9999995095418349, iteration: 88594
loss: 1.0411691665649414,grad_norm: 0.9999993802457071, iteration: 88595
loss: 1.0098789930343628,grad_norm: 0.9999999112536131, iteration: 88596
loss: 1.062351942062378,grad_norm: 0.9999994069676102, iteration: 88597
loss: 1.018129825592041,grad_norm: 0.9999990154037096, iteration: 88598
loss: 1.0532737970352173,grad_norm: 0.9999991401596152, iteration: 88599
loss: 1.3279474973678589,grad_norm: 0.9999997373269742, iteration: 88600
loss: 1.0199501514434814,grad_norm: 0.9999991357417379, iteration: 88601
loss: 0.986492931842804,grad_norm: 0.9999991267345866, iteration: 88602
loss: 1.045142412185669,grad_norm: 0.9999993591094679, iteration: 88603
loss: 0.9657556414604187,grad_norm: 0.9999994836165931, iteration: 88604
loss: 1.0846787691116333,grad_norm: 0.9999993099713255, iteration: 88605
loss: 1.0801026821136475,grad_norm: 0.9999993839726895, iteration: 88606
loss: 0.9786124229431152,grad_norm: 0.999999091650814, iteration: 88607
loss: 1.0315850973129272,grad_norm: 0.9999992513816078, iteration: 88608
loss: 1.015030026435852,grad_norm: 0.9546601704622294, iteration: 88609
loss: 1.0152641534805298,grad_norm: 0.7866611472293309, iteration: 88610
loss: 1.0297006368637085,grad_norm: 0.8298943398397777, iteration: 88611
loss: 0.9980805516242981,grad_norm: 0.9999993003594194, iteration: 88612
loss: 1.0011709928512573,grad_norm: 0.787623953235848, iteration: 88613
loss: 1.0457203388214111,grad_norm: 0.9999994145246138, iteration: 88614
loss: 1.045868158340454,grad_norm: 0.9999989856880246, iteration: 88615
loss: 0.9719522595405579,grad_norm: 0.8426118415730655, iteration: 88616
loss: 1.0852073431015015,grad_norm: 0.9999995232154099, iteration: 88617
loss: 1.0179251432418823,grad_norm: 0.9744658450091664, iteration: 88618
loss: 1.0392920970916748,grad_norm: 0.9999995725334732, iteration: 88619
loss: 1.0141968727111816,grad_norm: 0.9999991733766784, iteration: 88620
loss: 0.9847898483276367,grad_norm: 0.9999998959028406, iteration: 88621
loss: 1.074631690979004,grad_norm: 0.99999990373222, iteration: 88622
loss: 1.055433988571167,grad_norm: 0.9999990327937694, iteration: 88623
loss: 1.2022457122802734,grad_norm: 0.9999998859577297, iteration: 88624
loss: 1.0256043672561646,grad_norm: 0.999999164609815, iteration: 88625
loss: 1.052490234375,grad_norm: 0.9999994953160866, iteration: 88626
loss: 1.0806206464767456,grad_norm: 0.9999994761244586, iteration: 88627
loss: 1.089187741279602,grad_norm: 0.9999991414135668, iteration: 88628
loss: 1.0717685222625732,grad_norm: 0.9999992541354865, iteration: 88629
loss: 1.1001989841461182,grad_norm: 0.9999995816215805, iteration: 88630
loss: 1.048986792564392,grad_norm: 0.9732367615964036, iteration: 88631
loss: 1.078900933265686,grad_norm: 0.9999999672290627, iteration: 88632
loss: 1.0726712942123413,grad_norm: 0.9999997834483322, iteration: 88633
loss: 1.1483612060546875,grad_norm: 0.9999991694516849, iteration: 88634
loss: 1.18659508228302,grad_norm: 0.999999683290301, iteration: 88635
loss: 0.9953433275222778,grad_norm: 0.9999991893346152, iteration: 88636
loss: 0.9975462555885315,grad_norm: 0.999999596467191, iteration: 88637
loss: 1.015548825263977,grad_norm: 0.9999991742458183, iteration: 88638
loss: 0.9884077906608582,grad_norm: 0.9999991744079778, iteration: 88639
loss: 1.0868886709213257,grad_norm: 0.9999992336805669, iteration: 88640
loss: 1.0866645574569702,grad_norm: 0.9999993532781929, iteration: 88641
loss: 1.0175862312316895,grad_norm: 0.9999992253436384, iteration: 88642
loss: 0.9874255657196045,grad_norm: 0.999999187091458, iteration: 88643
loss: 1.0156362056732178,grad_norm: 0.9178514186655637, iteration: 88644
loss: 1.0488673448562622,grad_norm: 0.9999989890169714, iteration: 88645
loss: 1.156654715538025,grad_norm: 0.9999995172198455, iteration: 88646
loss: 1.0544564723968506,grad_norm: 0.9999990558933929, iteration: 88647
loss: 1.0472546815872192,grad_norm: 0.9139439860770049, iteration: 88648
loss: 1.1221528053283691,grad_norm: 0.999999685300509, iteration: 88649
loss: 1.0727543830871582,grad_norm: 0.999999932230598, iteration: 88650
loss: 0.9864993095397949,grad_norm: 0.9999990473621944, iteration: 88651
loss: 1.0214214324951172,grad_norm: 0.9999990925205405, iteration: 88652
loss: 1.2176260948181152,grad_norm: 0.9999997419299083, iteration: 88653
loss: 1.0631046295166016,grad_norm: 0.9999996522929385, iteration: 88654
loss: 1.0599476099014282,grad_norm: 0.9914445009577553, iteration: 88655
loss: 1.1268950700759888,grad_norm: 0.9999995592208697, iteration: 88656
loss: 1.1394251585006714,grad_norm: 0.9999992122401723, iteration: 88657
loss: 1.0459810495376587,grad_norm: 0.9999993191628052, iteration: 88658
loss: 1.1689754724502563,grad_norm: 0.9999996705628197, iteration: 88659
loss: 1.1089887619018555,grad_norm: 0.9999999104860007, iteration: 88660
loss: 1.2849268913269043,grad_norm: 0.9999999176758831, iteration: 88661
loss: 1.1009384393692017,grad_norm: 0.9999996801298972, iteration: 88662
loss: 1.0889043807983398,grad_norm: 0.9999990942427227, iteration: 88663
loss: 1.5714635848999023,grad_norm: 0.9999999722915103, iteration: 88664
loss: 0.971455454826355,grad_norm: 0.9999992421968313, iteration: 88665
loss: 1.0907121896743774,grad_norm: 0.9999999175432452, iteration: 88666
loss: 1.2463393211364746,grad_norm: 0.9999999268120826, iteration: 88667
loss: 1.0473499298095703,grad_norm: 0.9684622720056206, iteration: 88668
loss: 1.0186469554901123,grad_norm: 0.9394721198944092, iteration: 88669
loss: 1.0388380289077759,grad_norm: 0.9999995173429715, iteration: 88670
loss: 1.3379106521606445,grad_norm: 0.9999997984334502, iteration: 88671
loss: 1.13132905960083,grad_norm: 0.9999996976098925, iteration: 88672
loss: 1.124106764793396,grad_norm: 0.9999999810535232, iteration: 88673
loss: 1.1061456203460693,grad_norm: 0.999999546538796, iteration: 88674
loss: 1.2648706436157227,grad_norm: 0.9999999235762924, iteration: 88675
loss: 1.1215777397155762,grad_norm: 0.9999997248511899, iteration: 88676
loss: 1.1559911966323853,grad_norm: 0.9999995563365733, iteration: 88677
loss: 1.462408423423767,grad_norm: 1.000000012705747, iteration: 88678
loss: 1.227256417274475,grad_norm: 0.9999992697382902, iteration: 88679
loss: 1.1599372625350952,grad_norm: 0.9999997897685912, iteration: 88680
loss: 1.4593989849090576,grad_norm: 0.9999998669790545, iteration: 88681
loss: 1.1488072872161865,grad_norm: 0.9999999533325502, iteration: 88682
loss: 1.5115230083465576,grad_norm: 1.0000000440753054, iteration: 88683
loss: 1.0305522680282593,grad_norm: 0.999999205762856, iteration: 88684
loss: 0.988980233669281,grad_norm: 0.9999989896130813, iteration: 88685
loss: 1.0623432397842407,grad_norm: 0.9999998449284451, iteration: 88686
loss: 1.1150999069213867,grad_norm: 0.9999997640680108, iteration: 88687
loss: 1.2338316440582275,grad_norm: 0.9999996372022502, iteration: 88688
loss: 1.1480765342712402,grad_norm: 0.9999996054685795, iteration: 88689
loss: 1.2871674299240112,grad_norm: 0.9999997819159542, iteration: 88690
loss: 1.1432673931121826,grad_norm: 0.9999998171328641, iteration: 88691
loss: 1.1135365962982178,grad_norm: 0.9999995115740699, iteration: 88692
loss: 0.9945351481437683,grad_norm: 0.8696920067914093, iteration: 88693
loss: 1.1471518278121948,grad_norm: 0.9999999194742405, iteration: 88694
loss: 1.3838284015655518,grad_norm: 0.9999997109307903, iteration: 88695
loss: 1.0864092111587524,grad_norm: 0.9999998720005532, iteration: 88696
loss: 1.098644733428955,grad_norm: 0.9999992793666309, iteration: 88697
loss: 1.0947003364562988,grad_norm: 0.9999996295440743, iteration: 88698
loss: 1.0712343454360962,grad_norm: 0.999999983516192, iteration: 88699
loss: 1.182794451713562,grad_norm: 0.9999998150052836, iteration: 88700
loss: 1.1150798797607422,grad_norm: 0.9999992401834261, iteration: 88701
loss: 1.4140719175338745,grad_norm: 0.9999999799830753, iteration: 88702
loss: 1.0924330949783325,grad_norm: 0.999999591319907, iteration: 88703
loss: 1.23142409324646,grad_norm: 0.999999988820347, iteration: 88704
loss: 1.086432695388794,grad_norm: 0.9999991132064622, iteration: 88705
loss: 1.0834648609161377,grad_norm: 1.000000053309217, iteration: 88706
loss: 1.0698269605636597,grad_norm: 0.9999991289013987, iteration: 88707
loss: 1.0469709634780884,grad_norm: 0.9999990287189309, iteration: 88708
loss: 1.02570378780365,grad_norm: 0.9999991252028596, iteration: 88709
loss: 1.1079909801483154,grad_norm: 0.9999998505186289, iteration: 88710
loss: 1.1026941537857056,grad_norm: 0.9999991746595613, iteration: 88711
loss: 1.0383198261260986,grad_norm: 0.9999993936788708, iteration: 88712
loss: 1.0887889862060547,grad_norm: 1.0000000066319772, iteration: 88713
loss: 1.0748710632324219,grad_norm: 0.9999997654312733, iteration: 88714
loss: 1.1540067195892334,grad_norm: 0.9999996664078918, iteration: 88715
loss: 1.0243794918060303,grad_norm: 0.9739235325187189, iteration: 88716
loss: 1.1257374286651611,grad_norm: 0.9999990292026626, iteration: 88717
loss: 1.1325008869171143,grad_norm: 0.9999997505398248, iteration: 88718
loss: 1.122298002243042,grad_norm: 0.9999990929451352, iteration: 88719
loss: 1.1227219104766846,grad_norm: 0.9999991308110202, iteration: 88720
loss: 1.0696325302124023,grad_norm: 0.9999996613199262, iteration: 88721
loss: 1.0340156555175781,grad_norm: 0.9945112255854391, iteration: 88722
loss: 1.0858176946640015,grad_norm: 0.9999998377409608, iteration: 88723
loss: 1.080521821975708,grad_norm: 0.9999993299866204, iteration: 88724
loss: 1.0200669765472412,grad_norm: 0.9999990169684201, iteration: 88725
loss: 1.0950679779052734,grad_norm: 0.9999992439166164, iteration: 88726
loss: 1.1909376382827759,grad_norm: 0.9999996224052636, iteration: 88727
loss: 1.0358256101608276,grad_norm: 0.9999991755448698, iteration: 88728
loss: 1.1284396648406982,grad_norm: 0.9999994232134384, iteration: 88729
loss: 1.042906641960144,grad_norm: 0.9999991224054834, iteration: 88730
loss: 0.9778662323951721,grad_norm: 0.999999671193901, iteration: 88731
loss: 1.0335538387298584,grad_norm: 0.9999994910700047, iteration: 88732
loss: 1.0030219554901123,grad_norm: 0.8911674339594127, iteration: 88733
loss: 1.077773928642273,grad_norm: 0.9999998253559071, iteration: 88734
loss: 1.0326913595199585,grad_norm: 0.9999990258612211, iteration: 88735
loss: 1.1603549718856812,grad_norm: 0.9999997790534588, iteration: 88736
loss: 1.0072098970413208,grad_norm: 0.9745482967275936, iteration: 88737
loss: 1.1010794639587402,grad_norm: 0.9999998530464166, iteration: 88738
loss: 1.2135685682296753,grad_norm: 0.9999998237569993, iteration: 88739
loss: 1.0035474300384521,grad_norm: 0.9999990218915709, iteration: 88740
loss: 1.0700668096542358,grad_norm: 0.9999994427805903, iteration: 88741
loss: 1.0215611457824707,grad_norm: 0.9999993497149167, iteration: 88742
loss: 1.0610243082046509,grad_norm: 0.9999995226463696, iteration: 88743
loss: 1.1106805801391602,grad_norm: 0.9999999853127826, iteration: 88744
loss: 1.1531623601913452,grad_norm: 0.9999996886395862, iteration: 88745
loss: 0.9392781257629395,grad_norm: 0.988447720952571, iteration: 88746
loss: 0.9962224364280701,grad_norm: 0.8904937231716559, iteration: 88747
loss: 1.0536853075027466,grad_norm: 0.9999993314404078, iteration: 88748
loss: 1.2904307842254639,grad_norm: 0.9999998729996801, iteration: 88749
loss: 1.1423825025558472,grad_norm: 0.9999998689650844, iteration: 88750
loss: 1.0544843673706055,grad_norm: 0.9999995089544873, iteration: 88751
loss: 1.1401100158691406,grad_norm: 0.9999993261232136, iteration: 88752
loss: 1.0390645265579224,grad_norm: 0.9999998266623292, iteration: 88753
loss: 1.1428461074829102,grad_norm: 0.9999998605876782, iteration: 88754
loss: 1.0439465045928955,grad_norm: 0.9999996779946959, iteration: 88755
loss: 0.9946175813674927,grad_norm: 0.9999990785857992, iteration: 88756
loss: 1.2598053216934204,grad_norm: 0.999999800892014, iteration: 88757
loss: 0.9987887740135193,grad_norm: 0.9999990729456001, iteration: 88758
loss: 1.0785560607910156,grad_norm: 0.9999997694717033, iteration: 88759
loss: 1.114150047302246,grad_norm: 0.9999996116924917, iteration: 88760
loss: 1.1820495128631592,grad_norm: 0.9999998855658807, iteration: 88761
loss: 1.0550230741500854,grad_norm: 0.9071134200262643, iteration: 88762
loss: 1.2013274431228638,grad_norm: 0.9999992444217596, iteration: 88763
loss: 1.0271296501159668,grad_norm: 0.9999992650929501, iteration: 88764
loss: 1.3714416027069092,grad_norm: 0.9999999117151555, iteration: 88765
loss: 1.133052945137024,grad_norm: 0.999999824374968, iteration: 88766
loss: 1.2520772218704224,grad_norm: 0.999999989746041, iteration: 88767
loss: 1.1804633140563965,grad_norm: 0.9999998146737177, iteration: 88768
loss: 1.0503814220428467,grad_norm: 0.9999993105858244, iteration: 88769
loss: 1.1853889226913452,grad_norm: 0.9999998614472294, iteration: 88770
loss: 0.9918871521949768,grad_norm: 0.9999992605712895, iteration: 88771
loss: 1.009028673171997,grad_norm: 0.9999993904908947, iteration: 88772
loss: 1.1294821500778198,grad_norm: 0.9999991811731896, iteration: 88773
loss: 1.0037838220596313,grad_norm: 0.9999994206393865, iteration: 88774
loss: 1.0298157930374146,grad_norm: 0.9999996487373511, iteration: 88775
loss: 1.110054612159729,grad_norm: 0.9999994911633526, iteration: 88776
loss: 1.0760173797607422,grad_norm: 0.9999991366639812, iteration: 88777
loss: 0.9976328015327454,grad_norm: 0.9999996809111615, iteration: 88778
loss: 1.104448914527893,grad_norm: 0.9999990572780499, iteration: 88779
loss: 1.016721487045288,grad_norm: 0.9999991976367757, iteration: 88780
loss: 1.0573557615280151,grad_norm: 0.9999992008460358, iteration: 88781
loss: 1.224184274673462,grad_norm: 0.9999992866021286, iteration: 88782
loss: 1.0772391557693481,grad_norm: 0.9999997327338578, iteration: 88783
loss: 1.0920436382293701,grad_norm: 0.9999994728993049, iteration: 88784
loss: 1.1025217771530151,grad_norm: 0.9999995026302702, iteration: 88785
loss: 1.1763015985488892,grad_norm: 0.9999999950361645, iteration: 88786
loss: 1.0712757110595703,grad_norm: 0.9999993474327232, iteration: 88787
loss: 1.0308258533477783,grad_norm: 0.9999990617780978, iteration: 88788
loss: 1.1004936695098877,grad_norm: 0.9999992833653389, iteration: 88789
loss: 1.0560356378555298,grad_norm: 0.999999119676634, iteration: 88790
loss: 1.0827724933624268,grad_norm: 0.9999997394304196, iteration: 88791
loss: 1.0841903686523438,grad_norm: 0.9999994708713904, iteration: 88792
loss: 1.0298932790756226,grad_norm: 0.9999990336558702, iteration: 88793
loss: 1.135067105293274,grad_norm: 0.9999994222415145, iteration: 88794
loss: 1.141249656677246,grad_norm: 0.9999994912964962, iteration: 88795
loss: 1.1062036752700806,grad_norm: 0.9918356934762393, iteration: 88796
loss: 1.091919183731079,grad_norm: 0.9999994092534799, iteration: 88797
loss: 1.154953122138977,grad_norm: 0.9999991470745021, iteration: 88798
loss: 1.4428611993789673,grad_norm: 0.9999999333301022, iteration: 88799
loss: 1.0464563369750977,grad_norm: 0.9999993302338851, iteration: 88800
loss: 1.1355587244033813,grad_norm: 0.943831021652443, iteration: 88801
loss: 1.0009350776672363,grad_norm: 0.9790666658552502, iteration: 88802
loss: 1.0541273355484009,grad_norm: 0.9999994595311607, iteration: 88803
loss: 1.2273775339126587,grad_norm: 0.999999935866106, iteration: 88804
loss: 1.1509265899658203,grad_norm: 0.9999997488385947, iteration: 88805
loss: 1.2054773569107056,grad_norm: 0.999999697126365, iteration: 88806
loss: 1.1192469596862793,grad_norm: 0.9999992309696796, iteration: 88807
loss: 1.0914446115493774,grad_norm: 0.999999371164025, iteration: 88808
loss: 1.1902543306350708,grad_norm: 0.9999996958947854, iteration: 88809
loss: 1.0719752311706543,grad_norm: 0.9999995013315264, iteration: 88810
loss: 1.075410008430481,grad_norm: 0.9999993692775495, iteration: 88811
loss: 1.1658210754394531,grad_norm: 0.9999990114074802, iteration: 88812
loss: 1.2582415342330933,grad_norm: 0.9999993256775869, iteration: 88813
loss: 1.1399085521697998,grad_norm: 0.999999236581109, iteration: 88814
loss: 1.0803576707839966,grad_norm: 0.9999992787180665, iteration: 88815
loss: 1.1336034536361694,grad_norm: 0.9310110371387333, iteration: 88816
loss: 1.3616052865982056,grad_norm: 0.9999999235499574, iteration: 88817
loss: 1.1935558319091797,grad_norm: 0.9999996847633368, iteration: 88818
loss: 1.212960958480835,grad_norm: 0.9999996575285237, iteration: 88819
loss: 1.1643686294555664,grad_norm: 0.9999997705928387, iteration: 88820
loss: 1.1996153593063354,grad_norm: 0.9999998429495833, iteration: 88821
loss: 1.042176365852356,grad_norm: 0.9999991060923852, iteration: 88822
loss: 1.2793406248092651,grad_norm: 0.9999999927468269, iteration: 88823
loss: 1.0634154081344604,grad_norm: 0.9999993179938165, iteration: 88824
loss: 1.2931512594223022,grad_norm: 0.9999995746478058, iteration: 88825
loss: 1.0982186794281006,grad_norm: 0.9999993523396303, iteration: 88826
loss: 1.2182836532592773,grad_norm: 0.9999997452188214, iteration: 88827
loss: 1.1293050050735474,grad_norm: 0.999999829928321, iteration: 88828
loss: 1.0803035497665405,grad_norm: 0.9232523121366963, iteration: 88829
loss: 1.1115540266036987,grad_norm: 0.9999991476724982, iteration: 88830
loss: 1.2015163898468018,grad_norm: 0.9999996672061814, iteration: 88831
loss: 1.2102820873260498,grad_norm: 0.999999320625282, iteration: 88832
loss: 1.0477113723754883,grad_norm: 0.999999757980403, iteration: 88833
loss: 1.1782283782958984,grad_norm: 0.9999995726476933, iteration: 88834
loss: 1.0900378227233887,grad_norm: 0.9999992785289636, iteration: 88835
loss: 1.0948734283447266,grad_norm: 0.9999992384685988, iteration: 88836
loss: 1.09371018409729,grad_norm: 0.9999999668290572, iteration: 88837
loss: 1.0915316343307495,grad_norm: 0.999999675803964, iteration: 88838
loss: 1.4449708461761475,grad_norm: 0.9999996686743783, iteration: 88839
loss: 1.116451382637024,grad_norm: 0.999999104295894, iteration: 88840
loss: 1.0706424713134766,grad_norm: 0.9999995603808275, iteration: 88841
loss: 1.085448145866394,grad_norm: 0.9999992533413528, iteration: 88842
loss: 1.3310108184814453,grad_norm: 1.0000000289460618, iteration: 88843
loss: 1.055647373199463,grad_norm: 0.9999990366260338, iteration: 88844
loss: 0.9705484509468079,grad_norm: 0.9071479142036466, iteration: 88845
loss: 1.0906447172164917,grad_norm: 0.9999992853975168, iteration: 88846
loss: 1.0583205223083496,grad_norm: 0.9999990416272103, iteration: 88847
loss: 1.2414963245391846,grad_norm: 0.9999998682779097, iteration: 88848
loss: 1.111899971961975,grad_norm: 0.9999990495607332, iteration: 88849
loss: 1.1459983587265015,grad_norm: 0.9999995990060537, iteration: 88850
loss: 1.293349266052246,grad_norm: 0.9999997055689639, iteration: 88851
loss: 1.1382980346679688,grad_norm: 0.9999997732280429, iteration: 88852
loss: 1.0954272747039795,grad_norm: 0.9999996911820866, iteration: 88853
loss: 1.199952483177185,grad_norm: 0.9999998012479916, iteration: 88854
loss: 1.2757205963134766,grad_norm: 0.9999999446278877, iteration: 88855
loss: 1.0648109912872314,grad_norm: 0.9999990333773842, iteration: 88856
loss: 1.1558334827423096,grad_norm: 0.9999997409102094, iteration: 88857
loss: 1.1410616636276245,grad_norm: 0.999999632543462, iteration: 88858
loss: 1.3107097148895264,grad_norm: 0.9999997870424308, iteration: 88859
loss: 1.0747710466384888,grad_norm: 0.9999991133229494, iteration: 88860
loss: 1.0167815685272217,grad_norm: 0.9999992512569277, iteration: 88861
loss: 1.0109212398529053,grad_norm: 0.927076947568996, iteration: 88862
loss: 1.1014540195465088,grad_norm: 0.9999992615656953, iteration: 88863
loss: 1.1052790880203247,grad_norm: 0.9999997437351194, iteration: 88864
loss: 1.0277396440505981,grad_norm: 0.9550411317552613, iteration: 88865
loss: 1.025077223777771,grad_norm: 0.9999993104571194, iteration: 88866
loss: 1.1201368570327759,grad_norm: 0.9999992229358418, iteration: 88867
loss: 1.1111187934875488,grad_norm: 0.999999845968385, iteration: 88868
loss: 1.246667504310608,grad_norm: 0.9999997266483253, iteration: 88869
loss: 1.189612627029419,grad_norm: 0.9999998073554631, iteration: 88870
loss: 0.9813337326049805,grad_norm: 0.9999997376876587, iteration: 88871
loss: 1.0009512901306152,grad_norm: 0.9999992789620318, iteration: 88872
loss: 1.2821687459945679,grad_norm: 0.999999877946514, iteration: 88873
loss: 1.0110305547714233,grad_norm: 0.999999474297139, iteration: 88874
loss: 1.0959758758544922,grad_norm: 0.9999997017525648, iteration: 88875
loss: 1.0683494806289673,grad_norm: 0.9999998013986092, iteration: 88876
loss: 1.0679177045822144,grad_norm: 0.9999990587800975, iteration: 88877
loss: 1.1610562801361084,grad_norm: 1.0000000238922078, iteration: 88878
loss: 1.0324853658676147,grad_norm: 0.9999992068261745, iteration: 88879
loss: 1.0443010330200195,grad_norm: 0.9999997665480295, iteration: 88880
loss: 1.1470915079116821,grad_norm: 0.999999614273791, iteration: 88881
loss: 1.4436378479003906,grad_norm: 0.9999997828207264, iteration: 88882
loss: 1.0121736526489258,grad_norm: 0.9999997273442571, iteration: 88883
loss: 1.3939474821090698,grad_norm: 0.99999984233227, iteration: 88884
loss: 1.1680923700332642,grad_norm: 0.999999783716453, iteration: 88885
loss: 1.1468729972839355,grad_norm: 0.9999995571243856, iteration: 88886
loss: 1.126039743423462,grad_norm: 0.9999994478790443, iteration: 88887
loss: 1.1876801252365112,grad_norm: 0.9999998535074678, iteration: 88888
loss: 1.0493377447128296,grad_norm: 0.9777594319295265, iteration: 88889
loss: 1.2752474546432495,grad_norm: 0.9999995669693977, iteration: 88890
loss: 1.0453068017959595,grad_norm: 0.9999999862712483, iteration: 88891
loss: 1.0580042600631714,grad_norm: 0.9999992526986424, iteration: 88892
loss: 1.1983119249343872,grad_norm: 1.0000000865364786, iteration: 88893
loss: 1.19231379032135,grad_norm: 0.9999998022545776, iteration: 88894
loss: 1.1591136455535889,grad_norm: 1.0000000110875935, iteration: 88895
loss: 1.021371603012085,grad_norm: 0.9999990968286153, iteration: 88896
loss: 1.098555326461792,grad_norm: 0.9999992089476258, iteration: 88897
loss: 1.1365495920181274,grad_norm: 0.9999993718482633, iteration: 88898
loss: 1.269418716430664,grad_norm: 0.9999998310027698, iteration: 88899
loss: 1.3779293298721313,grad_norm: 0.9999999970990808, iteration: 88900
loss: 1.1795108318328857,grad_norm: 0.9999997021311889, iteration: 88901
loss: 1.0466396808624268,grad_norm: 0.9999995334188224, iteration: 88902
loss: 1.4311728477478027,grad_norm: 0.9999994878947822, iteration: 88903
loss: 1.098419189453125,grad_norm: 1.000000022689388, iteration: 88904
loss: 1.232170581817627,grad_norm: 0.9999997618675122, iteration: 88905
loss: 1.2231433391571045,grad_norm: 0.9999995634485878, iteration: 88906
loss: 1.059563159942627,grad_norm: 0.9999992400748398, iteration: 88907
loss: 1.2727129459381104,grad_norm: 0.9999999776218279, iteration: 88908
loss: 1.142923355102539,grad_norm: 0.999999784524889, iteration: 88909
loss: 1.0992765426635742,grad_norm: 0.9999996787098374, iteration: 88910
loss: 1.293691873550415,grad_norm: 0.9999999214628228, iteration: 88911
loss: 1.0704162120819092,grad_norm: 0.9999994891104126, iteration: 88912
loss: 1.1056023836135864,grad_norm: 0.9999994947960917, iteration: 88913
loss: 1.2310937643051147,grad_norm: 0.9999992585410319, iteration: 88914
loss: 1.028044581413269,grad_norm: 0.9999991964823585, iteration: 88915
loss: 1.1527748107910156,grad_norm: 0.9999998665129809, iteration: 88916
loss: 1.276025414466858,grad_norm: 0.999999980751541, iteration: 88917
loss: 1.1802150011062622,grad_norm: 0.9999999781265877, iteration: 88918
loss: 1.0654412508010864,grad_norm: 0.9581198738474735, iteration: 88919
loss: 1.3221919536590576,grad_norm: 0.999999650400086, iteration: 88920
loss: 1.1381735801696777,grad_norm: 0.9999997039976658, iteration: 88921
loss: 1.1266402006149292,grad_norm: 0.9999990156198763, iteration: 88922
loss: 1.2477494478225708,grad_norm: 0.999999607348348, iteration: 88923
loss: 1.2293994426727295,grad_norm: 0.9999999722860656, iteration: 88924
loss: 1.1033408641815186,grad_norm: 0.9999993207863019, iteration: 88925
loss: 1.1457151174545288,grad_norm: 0.9999992601709076, iteration: 88926
loss: 1.071082592010498,grad_norm: 0.9798428787080191, iteration: 88927
loss: 1.0904295444488525,grad_norm: 0.9999993078553532, iteration: 88928
loss: 1.0676357746124268,grad_norm: 1.000000014944611, iteration: 88929
loss: 1.1356974840164185,grad_norm: 0.9999991267993702, iteration: 88930
loss: 1.3879687786102295,grad_norm: 0.9999997271322774, iteration: 88931
loss: 1.3814643621444702,grad_norm: 0.999999922554461, iteration: 88932
loss: 1.0697333812713623,grad_norm: 0.9857641286295322, iteration: 88933
loss: 1.090688943862915,grad_norm: 0.9999990440779699, iteration: 88934
loss: 1.0734964609146118,grad_norm: 0.9184339730468748, iteration: 88935
loss: 1.0736156702041626,grad_norm: 1.0000000040850898, iteration: 88936
loss: 1.041346788406372,grad_norm: 0.9999991939236479, iteration: 88937
loss: 1.0133942365646362,grad_norm: 0.9999993732186184, iteration: 88938
loss: 1.1548563241958618,grad_norm: 0.9999994907024565, iteration: 88939
loss: 1.0932968854904175,grad_norm: 0.9999999964101479, iteration: 88940
loss: 1.1611138582229614,grad_norm: 0.999999592060548, iteration: 88941
loss: 0.9850289225578308,grad_norm: 0.9640958577961355, iteration: 88942
loss: 1.1720188856124878,grad_norm: 0.9999994258017256, iteration: 88943
loss: 1.0024901628494263,grad_norm: 0.9999990442132359, iteration: 88944
loss: 1.0666261911392212,grad_norm: 0.9999991169787982, iteration: 88945
loss: 0.9525518417358398,grad_norm: 0.9103463478582172, iteration: 88946
loss: 1.0678207874298096,grad_norm: 0.9999990877941596, iteration: 88947
loss: 1.1639066934585571,grad_norm: 0.999999856177472, iteration: 88948
loss: 1.032853603363037,grad_norm: 0.9206179030399139, iteration: 88949
loss: 1.1194443702697754,grad_norm: 0.9999994931297845, iteration: 88950
loss: 1.089382290840149,grad_norm: 0.9999992711115938, iteration: 88951
loss: 1.0992480516433716,grad_norm: 0.999999832691631, iteration: 88952
loss: 1.1726579666137695,grad_norm: 0.9999998957138372, iteration: 88953
loss: 1.0008422136306763,grad_norm: 0.9999998545084815, iteration: 88954
loss: 1.1438140869140625,grad_norm: 0.9999991926565379, iteration: 88955
loss: 1.1306554079055786,grad_norm: 0.999999124021514, iteration: 88956
loss: 1.1644588708877563,grad_norm: 0.9999993965852351, iteration: 88957
loss: 1.069521427154541,grad_norm: 0.9999998233999593, iteration: 88958
loss: 1.0279443264007568,grad_norm: 0.9999999030664097, iteration: 88959
loss: 1.0677717924118042,grad_norm: 0.9999990466415499, iteration: 88960
loss: 1.1256176233291626,grad_norm: 0.9999992243383643, iteration: 88961
loss: 0.9782397747039795,grad_norm: 0.9526309621240082, iteration: 88962
loss: 1.0375484228134155,grad_norm: 0.9999992612647081, iteration: 88963
loss: 1.134135127067566,grad_norm: 0.9999996288679684, iteration: 88964
loss: 1.2478280067443848,grad_norm: 0.9999999328565288, iteration: 88965
loss: 1.065353512763977,grad_norm: 0.9276086120995715, iteration: 88966
loss: 1.1000374555587769,grad_norm: 0.9999997533527057, iteration: 88967
loss: 1.0510950088500977,grad_norm: 0.9999994957921582, iteration: 88968
loss: 1.062265396118164,grad_norm: 0.9999989952988142, iteration: 88969
loss: 1.0976784229278564,grad_norm: 0.9999991898128252, iteration: 88970
loss: 1.1394566297531128,grad_norm: 0.9999991375754135, iteration: 88971
loss: 1.0240676403045654,grad_norm: 0.9999991682817241, iteration: 88972
loss: 1.2281650304794312,grad_norm: 0.9999998636294314, iteration: 88973
loss: 1.078808069229126,grad_norm: 0.9999993942533849, iteration: 88974
loss: 1.073819875717163,grad_norm: 0.9999994340646806, iteration: 88975
loss: 1.0665290355682373,grad_norm: 0.9999993668471693, iteration: 88976
loss: 1.1457915306091309,grad_norm: 0.9999999999622203, iteration: 88977
loss: 1.030663251876831,grad_norm: 0.9999993486590248, iteration: 88978
loss: 1.067247986793518,grad_norm: 0.9999990978055303, iteration: 88979
loss: 1.231284260749817,grad_norm: 0.9999997405494881, iteration: 88980
loss: 1.0708057880401611,grad_norm: 0.999999258671501, iteration: 88981
loss: 1.0492340326309204,grad_norm: 0.9999991890626057, iteration: 88982
loss: 1.054286241531372,grad_norm: 0.9145879401083484, iteration: 88983
loss: 0.9839509129524231,grad_norm: 0.947481332781122, iteration: 88984
loss: 1.0292328596115112,grad_norm: 0.9399801162043646, iteration: 88985
loss: 1.0307731628417969,grad_norm: 0.9247486738880379, iteration: 88986
loss: 1.0548690557479858,grad_norm: 0.9999991024360916, iteration: 88987
loss: 1.211366891860962,grad_norm: 0.9999998147381152, iteration: 88988
loss: 1.0048402547836304,grad_norm: 0.9951674594521318, iteration: 88989
loss: 1.0102139711380005,grad_norm: 0.8708613072318742, iteration: 88990
loss: 1.062803864479065,grad_norm: 0.9999999278780388, iteration: 88991
loss: 1.0405815839767456,grad_norm: 0.9999999457226428, iteration: 88992
loss: 0.9988940954208374,grad_norm: 0.9999991390662001, iteration: 88993
loss: 1.088619351387024,grad_norm: 0.9999998163095929, iteration: 88994
loss: 1.0079842805862427,grad_norm: 0.9999994904884933, iteration: 88995
loss: 1.0003494024276733,grad_norm: 0.9999991271328477, iteration: 88996
loss: 0.9933458566665649,grad_norm: 0.9999990835250764, iteration: 88997
loss: 1.070114016532898,grad_norm: 0.9999992584806917, iteration: 88998
loss: 1.0751676559448242,grad_norm: 0.9999995188875923, iteration: 88999
loss: 0.9862005114555359,grad_norm: 0.8131101437942954, iteration: 89000
loss: 1.210970163345337,grad_norm: 0.999999805769083, iteration: 89001
loss: 1.1199284791946411,grad_norm: 0.9999998985289943, iteration: 89002
loss: 0.971496045589447,grad_norm: 0.9242324592873005, iteration: 89003
loss: 1.45709228515625,grad_norm: 1.0000000065404888, iteration: 89004
loss: 1.0766918659210205,grad_norm: 0.9914587712424667, iteration: 89005
loss: 1.1717644929885864,grad_norm: 0.9999992472176275, iteration: 89006
loss: 1.0547468662261963,grad_norm: 0.9999997722519922, iteration: 89007
loss: 1.0564029216766357,grad_norm: 0.9999991593797418, iteration: 89008
loss: 1.0026767253875732,grad_norm: 0.745114913979339, iteration: 89009
loss: 1.3322137594223022,grad_norm: 0.9999999188771922, iteration: 89010
loss: 1.1048362255096436,grad_norm: 0.9999997110235443, iteration: 89011
loss: 0.9775785207748413,grad_norm: 0.954660086936357, iteration: 89012
loss: 1.0901118516921997,grad_norm: 0.9999995407512862, iteration: 89013
loss: 1.0367608070373535,grad_norm: 0.9674546695000501, iteration: 89014
loss: 1.0523567199707031,grad_norm: 0.9999997761868605, iteration: 89015
loss: 0.9750810861587524,grad_norm: 0.9999991704440088, iteration: 89016
loss: 1.034942388534546,grad_norm: 0.8267476421910888, iteration: 89017
loss: 1.0650522708892822,grad_norm: 0.9409720420542403, iteration: 89018
loss: 1.0409870147705078,grad_norm: 0.9999994474666374, iteration: 89019
loss: 1.073767066001892,grad_norm: 0.9999997312467084, iteration: 89020
loss: 0.9959138631820679,grad_norm: 0.9999991572795848, iteration: 89021
loss: 1.0562220811843872,grad_norm: 0.9999991802798159, iteration: 89022
loss: 1.0977576971054077,grad_norm: 0.9999992012874513, iteration: 89023
loss: 1.0096724033355713,grad_norm: 0.9576053752401583, iteration: 89024
loss: 1.0224910974502563,grad_norm: 0.8571411974237515, iteration: 89025
loss: 1.1422004699707031,grad_norm: 0.9999995817562841, iteration: 89026
loss: 1.0294753313064575,grad_norm: 0.9063427334830985, iteration: 89027
loss: 1.0568288564682007,grad_norm: 0.9999993769963371, iteration: 89028
loss: 1.0289745330810547,grad_norm: 0.9247485142084675, iteration: 89029
loss: 0.9856849312782288,grad_norm: 0.9465911947293946, iteration: 89030
loss: 1.003340482711792,grad_norm: 0.9427351835234626, iteration: 89031
loss: 1.1101577281951904,grad_norm: 0.9999992422687486, iteration: 89032
loss: 1.1539604663848877,grad_norm: 0.9999999642960989, iteration: 89033
loss: 1.0448863506317139,grad_norm: 0.9999994960260383, iteration: 89034
loss: 1.0251742601394653,grad_norm: 0.8536176608447164, iteration: 89035
loss: 1.1384334564208984,grad_norm: 1.000000039590419, iteration: 89036
loss: 1.0191887617111206,grad_norm: 0.9999993719607824, iteration: 89037
loss: 1.0573971271514893,grad_norm: 0.9999998768950825, iteration: 89038
loss: 1.0283188819885254,grad_norm: 0.9999990551525556, iteration: 89039
loss: 1.1944643259048462,grad_norm: 0.9999997519072855, iteration: 89040
loss: 0.9960554242134094,grad_norm: 0.9286093924897038, iteration: 89041
loss: 1.0285606384277344,grad_norm: 0.8798286677381018, iteration: 89042
loss: 1.064360499382019,grad_norm: 0.9999993436909042, iteration: 89043
loss: 1.063804030418396,grad_norm: 0.999999253596671, iteration: 89044
loss: 0.9954822063446045,grad_norm: 0.9999998434026933, iteration: 89045
loss: 1.0035721063613892,grad_norm: 0.9999990832164157, iteration: 89046
loss: 1.1413575410842896,grad_norm: 0.9999998765479908, iteration: 89047
loss: 1.0341672897338867,grad_norm: 0.999999096948902, iteration: 89048
loss: 1.0045850276947021,grad_norm: 0.9999998207690369, iteration: 89049
loss: 1.2000268697738647,grad_norm: 0.9999997864079847, iteration: 89050
loss: 1.0779472589492798,grad_norm: 0.9985021295929811, iteration: 89051
loss: 1.0971462726593018,grad_norm: 0.9999999212660702, iteration: 89052
loss: 1.0236772298812866,grad_norm: 0.9999991444843249, iteration: 89053
loss: 0.9546578526496887,grad_norm: 0.9999995807145501, iteration: 89054
loss: 1.050515055656433,grad_norm: 0.999999291096239, iteration: 89055
loss: 1.4528430700302124,grad_norm: 1.0000000377988674, iteration: 89056
loss: 1.013641119003296,grad_norm: 0.8493017731977992, iteration: 89057
loss: 1.0059959888458252,grad_norm: 0.9405217218618542, iteration: 89058
loss: 1.083932876586914,grad_norm: 0.9999998203134506, iteration: 89059
loss: 1.2076845169067383,grad_norm: 0.9999996568153376, iteration: 89060
loss: 1.0225554704666138,grad_norm: 0.999999082897601, iteration: 89061
loss: 0.9821706414222717,grad_norm: 0.7725462390502552, iteration: 89062
loss: 1.0633455514907837,grad_norm: 0.9999999143623005, iteration: 89063
loss: 1.3607168197631836,grad_norm: 0.9999997858849383, iteration: 89064
loss: 1.0973773002624512,grad_norm: 0.9999998316437922, iteration: 89065
loss: 1.1947141885757446,grad_norm: 0.9999999656372245, iteration: 89066
loss: 1.0659549236297607,grad_norm: 0.999999464950383, iteration: 89067
loss: 1.1336084604263306,grad_norm: 0.9999994910586271, iteration: 89068
loss: 1.0694907903671265,grad_norm: 0.9999991802095983, iteration: 89069
loss: 1.101386308670044,grad_norm: 0.999999642866788, iteration: 89070
loss: 1.0364910364151,grad_norm: 0.9999992722466978, iteration: 89071
loss: 1.0244182348251343,grad_norm: 0.9999998213708027, iteration: 89072
loss: 1.134639859199524,grad_norm: 0.9999998902729443, iteration: 89073
loss: 1.1238702535629272,grad_norm: 0.9999994223391268, iteration: 89074
loss: 1.0834583044052124,grad_norm: 0.9999993694189792, iteration: 89075
loss: 1.0150364637374878,grad_norm: 0.9999992162114735, iteration: 89076
loss: 0.9620174169540405,grad_norm: 0.8000100855282096, iteration: 89077
loss: 1.1316484212875366,grad_norm: 0.9999996727497051, iteration: 89078
loss: 1.0853506326675415,grad_norm: 0.9999994319599914, iteration: 89079
loss: 1.0462559461593628,grad_norm: 0.9999997467966276, iteration: 89080
loss: 1.0096545219421387,grad_norm: 0.9999991475638487, iteration: 89081
loss: 1.0608298778533936,grad_norm: 0.9999995035934626, iteration: 89082
loss: 1.0034645795822144,grad_norm: 0.9999990000153652, iteration: 89083
loss: 1.11374032497406,grad_norm: 0.9999998304640987, iteration: 89084
loss: 1.0627903938293457,grad_norm: 0.9999998092512151, iteration: 89085
loss: 1.102228045463562,grad_norm: 0.9999998134165675, iteration: 89086
loss: 1.1038850545883179,grad_norm: 0.9999992768411778, iteration: 89087
loss: 1.243843913078308,grad_norm: 0.9999992553742915, iteration: 89088
loss: 1.0051156282424927,grad_norm: 0.9999990752526656, iteration: 89089
loss: 1.1603291034698486,grad_norm: 0.999999760134358, iteration: 89090
loss: 0.9973555207252502,grad_norm: 0.9273964702669087, iteration: 89091
loss: 1.014854907989502,grad_norm: 0.9999991481447708, iteration: 89092
loss: 0.9821545481681824,grad_norm: 0.9540930664677023, iteration: 89093
loss: 1.0205234289169312,grad_norm: 0.9999991591660996, iteration: 89094
loss: 1.004691481590271,grad_norm: 0.9999992919216387, iteration: 89095
loss: 1.0215071439743042,grad_norm: 0.9896734502105149, iteration: 89096
loss: 1.0283615589141846,grad_norm: 0.999999061724863, iteration: 89097
loss: 0.9755841493606567,grad_norm: 0.9890931853589915, iteration: 89098
loss: 1.0206172466278076,grad_norm: 0.9999989900025696, iteration: 89099
loss: 0.9681803584098816,grad_norm: 0.9999990544493311, iteration: 89100
loss: 1.0121887922286987,grad_norm: 0.9999993959804265, iteration: 89101
loss: 1.032427430152893,grad_norm: 0.9999997988280421, iteration: 89102
loss: 0.9952027797698975,grad_norm: 0.9317579269715517, iteration: 89103
loss: 1.0886911153793335,grad_norm: 0.9999990696361036, iteration: 89104
loss: 1.1049169301986694,grad_norm: 0.9999993245886774, iteration: 89105
loss: 1.0648441314697266,grad_norm: 1.0000000641032236, iteration: 89106
loss: 1.0220569372177124,grad_norm: 0.9999995348840941, iteration: 89107
loss: 0.995750367641449,grad_norm: 0.9737000956155683, iteration: 89108
loss: 1.1088839769363403,grad_norm: 0.9999996590631458, iteration: 89109
loss: 1.0051651000976562,grad_norm: 0.834060217700473, iteration: 89110
loss: 0.9768620133399963,grad_norm: 0.7894093011029081, iteration: 89111
loss: 0.994269847869873,grad_norm: 0.9270779393309878, iteration: 89112
loss: 0.9949313402175903,grad_norm: 0.9489289961237938, iteration: 89113
loss: 1.067277431488037,grad_norm: 0.9999993976111645, iteration: 89114
loss: 1.0737863779067993,grad_norm: 0.9999995081893966, iteration: 89115
loss: 1.0691335201263428,grad_norm: 0.9999993012911621, iteration: 89116
loss: 1.0611034631729126,grad_norm: 0.9999990381265373, iteration: 89117
loss: 1.042781114578247,grad_norm: 0.9999996386939768, iteration: 89118
loss: 1.0408283472061157,grad_norm: 0.9999998252704151, iteration: 89119
loss: 1.084580421447754,grad_norm: 0.9999996002122125, iteration: 89120
loss: 1.000810980796814,grad_norm: 0.9999992570093659, iteration: 89121
loss: 1.0485316514968872,grad_norm: 0.9999991829768765, iteration: 89122
loss: 0.9863495230674744,grad_norm: 0.9321946251048855, iteration: 89123
loss: 1.0005688667297363,grad_norm: 0.9999989553532976, iteration: 89124
loss: 0.9888352155685425,grad_norm: 0.9999998141232239, iteration: 89125
loss: 1.1396710872650146,grad_norm: 0.999999484732157, iteration: 89126
loss: 1.1106067895889282,grad_norm: 0.8413266721018803, iteration: 89127
loss: 1.0009593963623047,grad_norm: 0.9999993838707764, iteration: 89128
loss: 1.0975234508514404,grad_norm: 0.9999996719945214, iteration: 89129
loss: 1.2921245098114014,grad_norm: 0.9999998213030604, iteration: 89130
loss: 1.0423145294189453,grad_norm: 0.9395608346293335, iteration: 89131
loss: 0.9697418212890625,grad_norm: 0.768684849291706, iteration: 89132
loss: 1.041296362876892,grad_norm: 0.9999990841595944, iteration: 89133
loss: 1.0149534940719604,grad_norm: 0.999999854828491, iteration: 89134
loss: 0.994915783405304,grad_norm: 0.9999991134460378, iteration: 89135
loss: 1.0542174577713013,grad_norm: 0.9999989678822827, iteration: 89136
loss: 1.0180811882019043,grad_norm: 0.9999991620913193, iteration: 89137
loss: 1.0098247528076172,grad_norm: 0.8869594056739794, iteration: 89138
loss: 1.04409921169281,grad_norm: 0.9999993192326354, iteration: 89139
loss: 1.0082130432128906,grad_norm: 0.999999107406408, iteration: 89140
loss: 1.030918002128601,grad_norm: 0.9097428447916245, iteration: 89141
loss: 1.1152032613754272,grad_norm: 0.9868200321618447, iteration: 89142
loss: 0.9782324433326721,grad_norm: 0.9183213394117918, iteration: 89143
loss: 1.0389424562454224,grad_norm: 0.9999997111520604, iteration: 89144
loss: 1.1453607082366943,grad_norm: 0.9999999396185252, iteration: 89145
loss: 1.011965274810791,grad_norm: 0.9999993372517827, iteration: 89146
loss: 1.0379917621612549,grad_norm: 0.9026720388489045, iteration: 89147
loss: 1.000927448272705,grad_norm: 0.9999991670564022, iteration: 89148
loss: 1.0353447198867798,grad_norm: 0.9999998522485505, iteration: 89149
loss: 1.004518985748291,grad_norm: 0.9999991076428933, iteration: 89150
loss: 1.0412805080413818,grad_norm: 0.9999996698373141, iteration: 89151
loss: 1.0680198669433594,grad_norm: 0.9999993987446271, iteration: 89152
loss: 1.0108524560928345,grad_norm: 0.819023553797714, iteration: 89153
loss: 1.0006386041641235,grad_norm: 0.8723682275926448, iteration: 89154
loss: 0.9825650453567505,grad_norm: 0.9999991043973349, iteration: 89155
loss: 0.9958855509757996,grad_norm: 0.9230262168754234, iteration: 89156
loss: 0.9815327525138855,grad_norm: 0.9999990497636253, iteration: 89157
loss: 1.0331923961639404,grad_norm: 0.9999997742683637, iteration: 89158
loss: 0.9704797863960266,grad_norm: 0.9999991357827553, iteration: 89159
loss: 1.030961275100708,grad_norm: 0.800071312820735, iteration: 89160
loss: 1.0283913612365723,grad_norm: 0.9999991263746574, iteration: 89161
loss: 1.084937334060669,grad_norm: 0.9999997223695041, iteration: 89162
loss: 1.0506051778793335,grad_norm: 0.8206244619827561, iteration: 89163
loss: 1.0111597776412964,grad_norm: 0.9999992609782498, iteration: 89164
loss: 1.0040438175201416,grad_norm: 0.9999992159522362, iteration: 89165
loss: 1.029833197593689,grad_norm: 0.9999992082718464, iteration: 89166
loss: 1.0238159894943237,grad_norm: 0.8694701495938462, iteration: 89167
loss: 1.0163025856018066,grad_norm: 0.9999990874899073, iteration: 89168
loss: 0.9859918355941772,grad_norm: 0.9999998629263127, iteration: 89169
loss: 1.0452686548233032,grad_norm: 0.9570566655174628, iteration: 89170
loss: 1.0101447105407715,grad_norm: 0.8986633553552127, iteration: 89171
loss: 0.997645914554596,grad_norm: 0.9445099576067643, iteration: 89172
loss: 0.9858480095863342,grad_norm: 0.9628512328489924, iteration: 89173
loss: 0.9911344647407532,grad_norm: 0.9999990778641691, iteration: 89174
loss: 1.0319241285324097,grad_norm: 0.9912074307379635, iteration: 89175
loss: 1.017329216003418,grad_norm: 0.9511888136414743, iteration: 89176
loss: 1.0502257347106934,grad_norm: 0.7132527176158546, iteration: 89177
loss: 1.0153485536575317,grad_norm: 0.9999998471732549, iteration: 89178
loss: 1.0101898908615112,grad_norm: 0.8590043826196595, iteration: 89179
loss: 0.9987532496452332,grad_norm: 0.9845140851505751, iteration: 89180
loss: 1.0723546743392944,grad_norm: 0.9263751712284497, iteration: 89181
loss: 1.021835446357727,grad_norm: 0.9999991396575002, iteration: 89182
loss: 1.021450161933899,grad_norm: 0.8680034195890457, iteration: 89183
loss: 1.0081422328948975,grad_norm: 0.8202752075628581, iteration: 89184
loss: 0.9996922612190247,grad_norm: 0.9646669230450019, iteration: 89185
loss: 1.0332026481628418,grad_norm: 0.9999990619708975, iteration: 89186
loss: 0.9802175760269165,grad_norm: 0.8111751385483962, iteration: 89187
loss: 1.0034654140472412,grad_norm: 0.9686029096841423, iteration: 89188
loss: 1.0130035877227783,grad_norm: 0.9845725142775472, iteration: 89189
loss: 1.040806531906128,grad_norm: 0.9999990878004891, iteration: 89190
loss: 1.0342055559158325,grad_norm: 0.9999991832568261, iteration: 89191
loss: 1.051586389541626,grad_norm: 0.8825430420259949, iteration: 89192
loss: 1.0416820049285889,grad_norm: 0.9999999242878459, iteration: 89193
loss: 0.9699334502220154,grad_norm: 0.9291915576160557, iteration: 89194
loss: 1.1185460090637207,grad_norm: 0.9999991428970172, iteration: 89195
loss: 1.0096473693847656,grad_norm: 0.9894805944202802, iteration: 89196
loss: 1.094949722290039,grad_norm: 0.999999032627729, iteration: 89197
loss: 0.9866634607315063,grad_norm: 0.9513241279705315, iteration: 89198
loss: 1.0246387720108032,grad_norm: 0.8732010704803027, iteration: 89199
loss: 0.9604836702346802,grad_norm: 0.9999989528096994, iteration: 89200
loss: 1.0232082605361938,grad_norm: 0.9999990341109192, iteration: 89201
loss: 1.0230154991149902,grad_norm: 0.9999992343524502, iteration: 89202
loss: 1.0135002136230469,grad_norm: 0.9999989935963476, iteration: 89203
loss: 1.0213336944580078,grad_norm: 0.8543798152982465, iteration: 89204
loss: 0.9923623204231262,grad_norm: 0.9259026733622062, iteration: 89205
loss: 1.0234544277191162,grad_norm: 0.9999994112929997, iteration: 89206
loss: 1.0030879974365234,grad_norm: 0.9999991571205673, iteration: 89207
loss: 1.0416017770767212,grad_norm: 0.999999220491129, iteration: 89208
loss: 1.0286916494369507,grad_norm: 0.9999990900862838, iteration: 89209
loss: 0.9778383374214172,grad_norm: 0.9999990868230055, iteration: 89210
loss: 1.011650562286377,grad_norm: 0.8951114064022411, iteration: 89211
loss: 0.9987624883651733,grad_norm: 0.8834089028335576, iteration: 89212
loss: 1.209429144859314,grad_norm: 0.9999990280241193, iteration: 89213
loss: 0.9925012588500977,grad_norm: 0.9999991286493999, iteration: 89214
loss: 1.0347309112548828,grad_norm: 0.9368531919268602, iteration: 89215
loss: 1.019042730331421,grad_norm: 0.7730099413944813, iteration: 89216
loss: 1.0264207124710083,grad_norm: 0.821277650696853, iteration: 89217
loss: 0.9952318668365479,grad_norm: 0.8372332522206731, iteration: 89218
loss: 1.0338207483291626,grad_norm: 0.9999992290670733, iteration: 89219
loss: 1.032590389251709,grad_norm: 0.9999996744298815, iteration: 89220
loss: 1.0100197792053223,grad_norm: 0.9599376094868876, iteration: 89221
loss: 0.9762439727783203,grad_norm: 0.9998771793287723, iteration: 89222
loss: 1.065881609916687,grad_norm: 0.8108750761843223, iteration: 89223
loss: 0.9889757633209229,grad_norm: 0.9499098424232734, iteration: 89224
loss: 1.0616455078125,grad_norm: 0.9999994525663336, iteration: 89225
loss: 1.0029252767562866,grad_norm: 0.928289620332614, iteration: 89226
loss: 1.1076149940490723,grad_norm: 0.9999996928769692, iteration: 89227
loss: 0.9930346012115479,grad_norm: 0.9999995822821017, iteration: 89228
loss: 1.0183024406433105,grad_norm: 0.9999993136591214, iteration: 89229
loss: 1.141323208808899,grad_norm: 0.9999990827861623, iteration: 89230
loss: 1.1004207134246826,grad_norm: 0.9999992345503964, iteration: 89231
loss: 1.0001579523086548,grad_norm: 0.9704131188250894, iteration: 89232
loss: 0.9861518144607544,grad_norm: 0.9333503262324835, iteration: 89233
loss: 1.0346189737319946,grad_norm: 0.9784497737011704, iteration: 89234
loss: 0.9682610034942627,grad_norm: 0.7967500598990056, iteration: 89235
loss: 0.9763213992118835,grad_norm: 0.9618711872341071, iteration: 89236
loss: 0.9880519509315491,grad_norm: 0.9591436062014631, iteration: 89237
loss: 0.9803625345230103,grad_norm: 0.999998908801883, iteration: 89238
loss: 1.0216786861419678,grad_norm: 0.8062276466558418, iteration: 89239
loss: 1.0090043544769287,grad_norm: 0.9396864046184994, iteration: 89240
loss: 1.0326745510101318,grad_norm: 0.8982927329157284, iteration: 89241
loss: 0.9833811521530151,grad_norm: 0.9002322615945415, iteration: 89242
loss: 1.0060385465621948,grad_norm: 0.9310013737521236, iteration: 89243
loss: 0.9765809774398804,grad_norm: 0.7446649393221486, iteration: 89244
loss: 0.9992958903312683,grad_norm: 0.9999990457901274, iteration: 89245
loss: 0.9727084040641785,grad_norm: 0.9966131749981583, iteration: 89246
loss: 1.048352837562561,grad_norm: 0.8932126377219668, iteration: 89247
loss: 1.0138545036315918,grad_norm: 0.9702809742711314, iteration: 89248
loss: 0.967070460319519,grad_norm: 0.9999991109868192, iteration: 89249
loss: 1.0169721841812134,grad_norm: 0.8831930429396896, iteration: 89250
loss: 0.9946619868278503,grad_norm: 0.908937288826745, iteration: 89251
loss: 0.9820074439048767,grad_norm: 0.9254729716651948, iteration: 89252
loss: 1.0348336696624756,grad_norm: 0.9393532331006748, iteration: 89253
loss: 0.9863163232803345,grad_norm: 0.9597204366743057, iteration: 89254
loss: 0.9995360374450684,grad_norm: 0.8481506428282296, iteration: 89255
loss: 0.9758320450782776,grad_norm: 0.8972624120734606, iteration: 89256
loss: 1.0418238639831543,grad_norm: 0.9999994547708059, iteration: 89257
loss: 1.028511881828308,grad_norm: 0.7933295301581884, iteration: 89258
loss: 1.007381558418274,grad_norm: 0.9999991107314217, iteration: 89259
loss: 1.149653434753418,grad_norm: 0.9999990418007324, iteration: 89260
loss: 0.9783247113227844,grad_norm: 0.9194391770252014, iteration: 89261
loss: 1.0252131223678589,grad_norm: 0.881780375247687, iteration: 89262
loss: 1.0112345218658447,grad_norm: 0.9999997537675679, iteration: 89263
loss: 1.0101860761642456,grad_norm: 0.9999992542889534, iteration: 89264
loss: 1.014582633972168,grad_norm: 0.9999990820102139, iteration: 89265
loss: 0.9953384399414062,grad_norm: 0.8635821849833704, iteration: 89266
loss: 0.9931958317756653,grad_norm: 0.9310208214941327, iteration: 89267
loss: 0.9770308136940002,grad_norm: 0.9999990473884204, iteration: 89268
loss: 1.0558277368545532,grad_norm: 0.8323240325814765, iteration: 89269
loss: 1.0189712047576904,grad_norm: 0.9999993292396862, iteration: 89270
loss: 1.0518871545791626,grad_norm: 0.9999991733152942, iteration: 89271
loss: 1.0050376653671265,grad_norm: 0.8533525742760212, iteration: 89272
loss: 0.981719970703125,grad_norm: 0.940142163464518, iteration: 89273
loss: 0.9867561459541321,grad_norm: 0.9999993776636698, iteration: 89274
loss: 0.9666960835456848,grad_norm: 0.8935527716578346, iteration: 89275
loss: 1.0100960731506348,grad_norm: 0.9076166696651587, iteration: 89276
loss: 0.9740942716598511,grad_norm: 0.9999991224953998, iteration: 89277
loss: 0.997407078742981,grad_norm: 0.9733469586607651, iteration: 89278
loss: 1.00551176071167,grad_norm: 0.970966655654609, iteration: 89279
loss: 0.9872735142707825,grad_norm: 0.785710252609832, iteration: 89280
loss: 1.012678623199463,grad_norm: 0.7929402260890852, iteration: 89281
loss: 1.0092488527297974,grad_norm: 0.9906800610297682, iteration: 89282
loss: 1.0031397342681885,grad_norm: 0.9280375400499152, iteration: 89283
loss: 1.0176208019256592,grad_norm: 0.764319328220329, iteration: 89284
loss: 1.000983476638794,grad_norm: 0.8617411586946365, iteration: 89285
loss: 0.9550554156303406,grad_norm: 0.8965021498895742, iteration: 89286
loss: 0.9501330852508545,grad_norm: 0.8747366824889341, iteration: 89287
loss: 1.0341434478759766,grad_norm: 0.9527166621373276, iteration: 89288
loss: 1.0264146327972412,grad_norm: 0.999999854528196, iteration: 89289
loss: 0.9750628471374512,grad_norm: 0.8854637058849367, iteration: 89290
loss: 0.9846827387809753,grad_norm: 0.9638520889822123, iteration: 89291
loss: 0.972769021987915,grad_norm: 0.8807606038821371, iteration: 89292
loss: 1.0321435928344727,grad_norm: 0.9999991300374614, iteration: 89293
loss: 1.0090411901474,grad_norm: 0.9999992572198785, iteration: 89294
loss: 1.0578464269638062,grad_norm: 0.9745469599973972, iteration: 89295
loss: 1.0684798955917358,grad_norm: 0.9999992938993172, iteration: 89296
loss: 0.9824033975601196,grad_norm: 0.90861757233614, iteration: 89297
loss: 1.0502375364303589,grad_norm: 0.9999991079808936, iteration: 89298
loss: 0.9747031927108765,grad_norm: 0.9563379710508058, iteration: 89299
loss: 1.0202804803848267,grad_norm: 0.8551407060901288, iteration: 89300
loss: 1.0013691186904907,grad_norm: 0.8790268872068481, iteration: 89301
loss: 1.0223027467727661,grad_norm: 0.9999991033000362, iteration: 89302
loss: 1.0075170993804932,grad_norm: 0.9999991399082051, iteration: 89303
loss: 0.9971222877502441,grad_norm: 1.0000000382665635, iteration: 89304
loss: 0.9588707685470581,grad_norm: 0.9999989967427837, iteration: 89305
loss: 0.9758319854736328,grad_norm: 0.97633765358988, iteration: 89306
loss: 0.9974693059921265,grad_norm: 0.9791539135142503, iteration: 89307
loss: 1.008699893951416,grad_norm: 0.8227515084850242, iteration: 89308
loss: 0.9995013475418091,grad_norm: 0.8934918316874499, iteration: 89309
loss: 0.9961042404174805,grad_norm: 0.9999991904431994, iteration: 89310
loss: 0.9901536703109741,grad_norm: 0.9611113614301998, iteration: 89311
loss: 0.9833166599273682,grad_norm: 0.9999990666360464, iteration: 89312
loss: 1.0002809762954712,grad_norm: 0.8110248392402419, iteration: 89313
loss: 1.040177583694458,grad_norm: 0.9999993229563691, iteration: 89314
loss: 1.0129398107528687,grad_norm: 0.9999997790195323, iteration: 89315
loss: 1.0500752925872803,grad_norm: 0.999999889517835, iteration: 89316
loss: 1.0308122634887695,grad_norm: 0.9689384321755774, iteration: 89317
loss: 0.9666260480880737,grad_norm: 0.8213220257606157, iteration: 89318
loss: 0.977196991443634,grad_norm: 0.9999991791791977, iteration: 89319
loss: 0.9928184151649475,grad_norm: 0.7800117605239175, iteration: 89320
loss: 1.0446810722351074,grad_norm: 0.7895367207507704, iteration: 89321
loss: 1.0444730520248413,grad_norm: 0.9070759496758645, iteration: 89322
loss: 0.9915897250175476,grad_norm: 0.8279371199242423, iteration: 89323
loss: 1.1661990880966187,grad_norm: 0.999999929733274, iteration: 89324
loss: 1.0443264245986938,grad_norm: 0.9999999836334529, iteration: 89325
loss: 1.2489771842956543,grad_norm: 0.9999998002765523, iteration: 89326
loss: 1.0626025199890137,grad_norm: 0.9999990850545594, iteration: 89327
loss: 1.1173875331878662,grad_norm: 1.0000000026406957, iteration: 89328
loss: 1.125854730606079,grad_norm: 0.9228831053488273, iteration: 89329
loss: 1.0398186445236206,grad_norm: 0.9999992578433223, iteration: 89330
loss: 1.0679489374160767,grad_norm: 0.999999099581176, iteration: 89331
loss: 0.989387571811676,grad_norm: 0.9999991920978079, iteration: 89332
loss: 1.0492548942565918,grad_norm: 0.9999992332030789, iteration: 89333
loss: 1.0178695917129517,grad_norm: 0.9999998349577359, iteration: 89334
loss: 1.1313620805740356,grad_norm: 0.9469024294477251, iteration: 89335
loss: 1.0553855895996094,grad_norm: 0.9999990913597615, iteration: 89336
loss: 1.0659493207931519,grad_norm: 0.9999996404156442, iteration: 89337
loss: 1.1842143535614014,grad_norm: 0.9999991669937435, iteration: 89338
loss: 1.0086373090744019,grad_norm: 0.9909532788727519, iteration: 89339
loss: 1.0385785102844238,grad_norm: 0.9999991633146249, iteration: 89340
loss: 1.0588104724884033,grad_norm: 0.9999993328099789, iteration: 89341
loss: 1.017833948135376,grad_norm: 0.9999990798091256, iteration: 89342
loss: 1.06666100025177,grad_norm: 0.9999998015131004, iteration: 89343
loss: 1.2319360971450806,grad_norm: 0.9999997565153261, iteration: 89344
loss: 1.3516547679901123,grad_norm: 0.9999998745029263, iteration: 89345
loss: 1.0910625457763672,grad_norm: 0.9336668371763844, iteration: 89346
loss: 1.1429545879364014,grad_norm: 0.9999992033162476, iteration: 89347
loss: 1.0301711559295654,grad_norm: 0.9999991204176085, iteration: 89348
loss: 1.0918580293655396,grad_norm: 0.9999994195309311, iteration: 89349
loss: 1.103006362915039,grad_norm: 0.9999999043617334, iteration: 89350
loss: 1.0386604070663452,grad_norm: 0.922889081816771, iteration: 89351
loss: 1.0288738012313843,grad_norm: 0.9999991088047225, iteration: 89352
loss: 1.162919044494629,grad_norm: 1.0000000259378283, iteration: 89353
loss: 1.0390863418579102,grad_norm: 0.9999991177723762, iteration: 89354
loss: 1.021766185760498,grad_norm: 0.9999992476331732, iteration: 89355
loss: 0.9818202257156372,grad_norm: 0.999999142831202, iteration: 89356
loss: 1.0303727388381958,grad_norm: 0.9999994017047672, iteration: 89357
loss: 0.96951824426651,grad_norm: 0.9999991367418031, iteration: 89358
loss: 1.0326088666915894,grad_norm: 0.999999231326545, iteration: 89359
loss: 1.0148946046829224,grad_norm: 0.999999094098927, iteration: 89360
loss: 1.012976050376892,grad_norm: 0.9999996577288739, iteration: 89361
loss: 1.0377856492996216,grad_norm: 0.8864726939568539, iteration: 89362
loss: 1.1318155527114868,grad_norm: 0.9999994689173606, iteration: 89363
loss: 1.178914189338684,grad_norm: 0.9999993713388579, iteration: 89364
loss: 1.0912286043167114,grad_norm: 0.953046222244096, iteration: 89365
loss: 0.9761171340942383,grad_norm: 0.99999908631966, iteration: 89366
loss: 1.1176848411560059,grad_norm: 0.9999997940810424, iteration: 89367
loss: 1.1270233392715454,grad_norm: 0.9999992082944819, iteration: 89368
loss: 1.0081449747085571,grad_norm: 0.9999992328753695, iteration: 89369
loss: 1.0597354173660278,grad_norm: 0.9999994426738408, iteration: 89370
loss: 1.0248479843139648,grad_norm: 0.9999994987468542, iteration: 89371
loss: 1.07670259475708,grad_norm: 0.999999843371003, iteration: 89372
loss: 1.0292794704437256,grad_norm: 0.942447234780872, iteration: 89373
loss: 1.0737403631210327,grad_norm: 0.9999998498725922, iteration: 89374
loss: 0.9865057468414307,grad_norm: 0.9999992486759816, iteration: 89375
loss: 1.010372519493103,grad_norm: 0.9999998952725717, iteration: 89376
loss: 1.0399675369262695,grad_norm: 0.9999995171974444, iteration: 89377
loss: 1.0017573833465576,grad_norm: 1.0000000720590474, iteration: 89378
loss: 0.9858911633491516,grad_norm: 0.9999995296024731, iteration: 89379
loss: 1.032925009727478,grad_norm: 0.9459076961980506, iteration: 89380
loss: 0.9488770365715027,grad_norm: 0.999999465831953, iteration: 89381
loss: 1.0015333890914917,grad_norm: 0.8617731939988093, iteration: 89382
loss: 1.0093278884887695,grad_norm: 0.9724241854613639, iteration: 89383
loss: 1.0460082292556763,grad_norm: 0.9365554054876428, iteration: 89384
loss: 1.1833429336547852,grad_norm: 0.8782861358272751, iteration: 89385
loss: 1.0088785886764526,grad_norm: 0.9999999801302266, iteration: 89386
loss: 0.9653137922286987,grad_norm: 0.9374500271937622, iteration: 89387
loss: 0.9972661733627319,grad_norm: 0.9999991526584084, iteration: 89388
loss: 1.0767823457717896,grad_norm: 0.9999990931283549, iteration: 89389
loss: 1.0025790929794312,grad_norm: 0.9999990169552507, iteration: 89390
loss: 1.0014764070510864,grad_norm: 0.9999991105818614, iteration: 89391
loss: 1.0054465532302856,grad_norm: 0.8467238614335877, iteration: 89392
loss: 0.9927045106887817,grad_norm: 0.9999994487848898, iteration: 89393
loss: 1.0801960229873657,grad_norm: 0.9999993070619057, iteration: 89394
loss: 1.0160409212112427,grad_norm: 0.9755366495580977, iteration: 89395
loss: 1.0206693410873413,grad_norm: 0.9999992738216552, iteration: 89396
loss: 0.9911035299301147,grad_norm: 0.99999901073194, iteration: 89397
loss: 1.0660687685012817,grad_norm: 0.9999999424575263, iteration: 89398
loss: 1.0400769710540771,grad_norm: 0.9508883693167615, iteration: 89399
loss: 1.014492392539978,grad_norm: 0.9999994591532266, iteration: 89400
loss: 1.033396601676941,grad_norm: 0.8656749573698863, iteration: 89401
loss: 1.0372117757797241,grad_norm: 0.8520881461362617, iteration: 89402
loss: 1.0051246881484985,grad_norm: 0.9999996125946995, iteration: 89403
loss: 1.0363904237747192,grad_norm: 0.9999991386171789, iteration: 89404
loss: 1.0664410591125488,grad_norm: 0.9999999472826142, iteration: 89405
loss: 1.0632967948913574,grad_norm: 0.8459372707316587, iteration: 89406
loss: 1.1009185314178467,grad_norm: 0.8559942022884176, iteration: 89407
loss: 1.0230565071105957,grad_norm: 0.8354301978933478, iteration: 89408
loss: 0.9819033145904541,grad_norm: 0.8871584579331152, iteration: 89409
loss: 1.0836971998214722,grad_norm: 0.9999991495523946, iteration: 89410
loss: 1.026228904724121,grad_norm: 0.9638557923008743, iteration: 89411
loss: 1.0040661096572876,grad_norm: 0.9038382235565867, iteration: 89412
loss: 1.0951467752456665,grad_norm: 0.9688644824531911, iteration: 89413
loss: 1.0165040493011475,grad_norm: 0.7631500858642797, iteration: 89414
loss: 1.0186641216278076,grad_norm: 0.8743197582580561, iteration: 89415
loss: 0.989935576915741,grad_norm: 0.9853443066908529, iteration: 89416
loss: 1.0485639572143555,grad_norm: 0.9999994670879648, iteration: 89417
loss: 1.1264649629592896,grad_norm: 0.9999996493276477, iteration: 89418
loss: 1.035244107246399,grad_norm: 0.9342991672786415, iteration: 89419
loss: 1.0368472337722778,grad_norm: 0.8525279067777772, iteration: 89420
loss: 1.0330325365066528,grad_norm: 0.9572160271549984, iteration: 89421
loss: 1.0167195796966553,grad_norm: 0.9913585396806439, iteration: 89422
loss: 1.0329973697662354,grad_norm: 0.9999992013929985, iteration: 89423
loss: 0.9595633745193481,grad_norm: 0.9999992338433374, iteration: 89424
loss: 1.1073331832885742,grad_norm: 0.9999995623348513, iteration: 89425
loss: 0.9999926686286926,grad_norm: 0.9029754849281068, iteration: 89426
loss: 1.0046290159225464,grad_norm: 0.9999992965079246, iteration: 89427
loss: 1.0186785459518433,grad_norm: 0.9999999726552886, iteration: 89428
loss: 1.0476881265640259,grad_norm: 0.999999234469074, iteration: 89429
loss: 1.0268040895462036,grad_norm: 0.9999991739373801, iteration: 89430
loss: 0.9769365191459656,grad_norm: 0.8516877229137211, iteration: 89431
loss: 0.9907474517822266,grad_norm: 0.8487136398579906, iteration: 89432
loss: 1.0117474794387817,grad_norm: 0.9953790747679463, iteration: 89433
loss: 0.98247891664505,grad_norm: 0.9999990019832835, iteration: 89434
loss: 1.079598307609558,grad_norm: 0.999999215883293, iteration: 89435
loss: 1.0388851165771484,grad_norm: 0.9999992718476491, iteration: 89436
loss: 1.0307084321975708,grad_norm: 0.9999999179685577, iteration: 89437
loss: 1.0207408666610718,grad_norm: 0.9999990650222023, iteration: 89438
loss: 1.0436526536941528,grad_norm: 0.999999061141284, iteration: 89439
loss: 1.0268570184707642,grad_norm: 0.9999996556290051, iteration: 89440
loss: 1.0082162618637085,grad_norm: 0.9706256181846786, iteration: 89441
loss: 0.9968423247337341,grad_norm: 0.9999991197487859, iteration: 89442
loss: 0.985173761844635,grad_norm: 0.9999993593402751, iteration: 89443
loss: 1.0804872512817383,grad_norm: 0.999999314061583, iteration: 89444
loss: 1.0290279388427734,grad_norm: 0.937000362093447, iteration: 89445
loss: 0.9654428362846375,grad_norm: 0.9999989693412344, iteration: 89446
loss: 1.0187660455703735,grad_norm: 0.9381433155094657, iteration: 89447
loss: 1.0129894018173218,grad_norm: 0.999999666912305, iteration: 89448
loss: 1.059014916419983,grad_norm: 0.9999991191571184, iteration: 89449
loss: 1.017245888710022,grad_norm: 0.8689253004916252, iteration: 89450
loss: 1.011468768119812,grad_norm: 0.9979610243381927, iteration: 89451
loss: 1.113603115081787,grad_norm: 0.999999011735684, iteration: 89452
loss: 0.988469123840332,grad_norm: 0.9999999422351545, iteration: 89453
loss: 1.0178278684616089,grad_norm: 0.9999996269538962, iteration: 89454
loss: 0.9942393898963928,grad_norm: 0.9999990887134952, iteration: 89455
loss: 0.995000422000885,grad_norm: 0.9999993365077384, iteration: 89456
loss: 1.0080686807632446,grad_norm: 0.9999992950512898, iteration: 89457
loss: 1.0152820348739624,grad_norm: 0.9999990710003075, iteration: 89458
loss: 0.9918128252029419,grad_norm: 0.9760667551694756, iteration: 89459
loss: 1.0652233362197876,grad_norm: 0.8530578586910409, iteration: 89460
loss: 0.9571967720985413,grad_norm: 0.9999992658992116, iteration: 89461
loss: 0.9909366369247437,grad_norm: 0.9177852833945277, iteration: 89462
loss: 1.1363476514816284,grad_norm: 0.948248291156386, iteration: 89463
loss: 0.9847240447998047,grad_norm: 0.9559909861374644, iteration: 89464
loss: 0.9363800287246704,grad_norm: 0.9999991284438032, iteration: 89465
loss: 1.0088787078857422,grad_norm: 0.8903192514464477, iteration: 89466
loss: 1.1345688104629517,grad_norm: 0.999999087752802, iteration: 89467
loss: 1.068085789680481,grad_norm: 0.9999993845759527, iteration: 89468
loss: 1.024887204170227,grad_norm: 0.9999991061423711, iteration: 89469
loss: 1.0097527503967285,grad_norm: 0.9797613973252365, iteration: 89470
loss: 0.9678563475608826,grad_norm: 0.7816605009646533, iteration: 89471
loss: 1.0279531478881836,grad_norm: 0.9999993185945183, iteration: 89472
loss: 1.2263996601104736,grad_norm: 0.9999994448672088, iteration: 89473
loss: 1.0668505430221558,grad_norm: 0.9999995836304227, iteration: 89474
loss: 1.019514560699463,grad_norm: 0.9239042235788262, iteration: 89475
loss: 0.990334689617157,grad_norm: 0.8063504755631555, iteration: 89476
loss: 0.9719727039337158,grad_norm: 0.9145915213405812, iteration: 89477
loss: 0.963496744632721,grad_norm: 0.9963126666108595, iteration: 89478
loss: 0.9572270512580872,grad_norm: 0.9234405051999399, iteration: 89479
loss: 0.9719032645225525,grad_norm: 0.8497181582229727, iteration: 89480
loss: 1.0668278932571411,grad_norm: 0.9999994456216155, iteration: 89481
loss: 1.0244163274765015,grad_norm: 0.9999996046017299, iteration: 89482
loss: 1.2652695178985596,grad_norm: 0.9999993349827871, iteration: 89483
loss: 1.031138300895691,grad_norm: 0.8134603107577616, iteration: 89484
loss: 0.9966659545898438,grad_norm: 0.9903111480401504, iteration: 89485
loss: 1.0159552097320557,grad_norm: 1.000000095726046, iteration: 89486
loss: 1.0020779371261597,grad_norm: 0.9999995204497003, iteration: 89487
loss: 1.0239142179489136,grad_norm: 0.9196230317700361, iteration: 89488
loss: 1.0982742309570312,grad_norm: 0.99999915846837, iteration: 89489
loss: 1.0231144428253174,grad_norm: 0.9999993948210989, iteration: 89490
loss: 1.0202876329421997,grad_norm: 0.8669157941186346, iteration: 89491
loss: 1.0244916677474976,grad_norm: 0.9507731727240082, iteration: 89492
loss: 1.008073329925537,grad_norm: 0.8111224134673106, iteration: 89493
loss: 1.0324835777282715,grad_norm: 0.9999998636476359, iteration: 89494
loss: 1.014596700668335,grad_norm: 0.9999989965227316, iteration: 89495
loss: 1.0420676469802856,grad_norm: 0.9999993178655728, iteration: 89496
loss: 1.0097718238830566,grad_norm: 0.9848692942898953, iteration: 89497
loss: 1.0160460472106934,grad_norm: 0.9999991050227157, iteration: 89498
loss: 0.9855619668960571,grad_norm: 0.7353578265860058, iteration: 89499
loss: 1.0320885181427002,grad_norm: 0.9999992043263302, iteration: 89500
loss: 1.0400488376617432,grad_norm: 0.9938339991100421, iteration: 89501
loss: 1.0155433416366577,grad_norm: 0.9999989843048379, iteration: 89502
loss: 1.077486276626587,grad_norm: 0.9999995020888179, iteration: 89503
loss: 0.9977712035179138,grad_norm: 0.9383388045066959, iteration: 89504
loss: 1.0146039724349976,grad_norm: 0.8723100460915791, iteration: 89505
loss: 1.0051312446594238,grad_norm: 0.8746530629813591, iteration: 89506
loss: 1.016464352607727,grad_norm: 0.9429082661720756, iteration: 89507
loss: 1.0286102294921875,grad_norm: 0.8421913854646282, iteration: 89508
loss: 0.9907401204109192,grad_norm: 0.9303530011594009, iteration: 89509
loss: 0.9710547924041748,grad_norm: 0.9508756783791672, iteration: 89510
loss: 1.0336328744888306,grad_norm: 0.9999996625593262, iteration: 89511
loss: 0.984721302986145,grad_norm: 0.9999991593796388, iteration: 89512
loss: 0.9661286473274231,grad_norm: 0.9999993768358714, iteration: 89513
loss: 1.0141268968582153,grad_norm: 0.9999994183175045, iteration: 89514
loss: 0.966236412525177,grad_norm: 0.9999991842170792, iteration: 89515
loss: 1.1179118156433105,grad_norm: 0.9999994523267621, iteration: 89516
loss: 1.0135341882705688,grad_norm: 0.8488242966511109, iteration: 89517
loss: 1.027061104774475,grad_norm: 0.9770839565709261, iteration: 89518
loss: 1.0116114616394043,grad_norm: 0.9999999046370633, iteration: 89519
loss: 1.0016472339630127,grad_norm: 0.9999991837682872, iteration: 89520
loss: 1.0613431930541992,grad_norm: 0.9999991021976945, iteration: 89521
loss: 0.9878149032592773,grad_norm: 0.99138079591685, iteration: 89522
loss: 1.0567435026168823,grad_norm: 0.9999996454890097, iteration: 89523
loss: 0.9821539521217346,grad_norm: 0.9969218706063149, iteration: 89524
loss: 1.0015166997909546,grad_norm: 0.9999991320141075, iteration: 89525
loss: 0.9795706868171692,grad_norm: 0.9999993074188924, iteration: 89526
loss: 0.9891685247421265,grad_norm: 0.9999990425964373, iteration: 89527
loss: 0.9931289553642273,grad_norm: 0.9250088369448417, iteration: 89528
loss: 0.9886291027069092,grad_norm: 0.8490539527112193, iteration: 89529
loss: 1.0161902904510498,grad_norm: 0.9999997937122753, iteration: 89530
loss: 1.025387167930603,grad_norm: 0.9615238127861759, iteration: 89531
loss: 0.9450377225875854,grad_norm: 0.9999990072730973, iteration: 89532
loss: 1.0017454624176025,grad_norm: 0.9999991505080644, iteration: 89533
loss: 1.0017269849777222,grad_norm: 0.9220814574509731, iteration: 89534
loss: 1.0074405670166016,grad_norm: 0.9999996209575597, iteration: 89535
loss: 0.9962140917778015,grad_norm: 0.8854783372456975, iteration: 89536
loss: 1.0857517719268799,grad_norm: 0.9999996914188451, iteration: 89537
loss: 0.9800362586975098,grad_norm: 0.9999990936335658, iteration: 89538
loss: 1.0056047439575195,grad_norm: 0.8925048314335973, iteration: 89539
loss: 1.0100178718566895,grad_norm: 0.8268270702367227, iteration: 89540
loss: 0.9833709001541138,grad_norm: 0.9894045279588709, iteration: 89541
loss: 1.0182809829711914,grad_norm: 0.9999990987945359, iteration: 89542
loss: 1.0064411163330078,grad_norm: 0.8369353026856392, iteration: 89543
loss: 0.9997609257698059,grad_norm: 0.9999988986985415, iteration: 89544
loss: 1.0344712734222412,grad_norm: 0.9019796874637782, iteration: 89545
loss: 0.9777000546455383,grad_norm: 0.9999991476996505, iteration: 89546
loss: 1.0247559547424316,grad_norm: 0.9999991245195234, iteration: 89547
loss: 1.016241431236267,grad_norm: 0.9999995539123528, iteration: 89548
loss: 1.0185192823410034,grad_norm: 0.9497593690492512, iteration: 89549
loss: 0.9756261110305786,grad_norm: 0.9999991823636735, iteration: 89550
loss: 1.0336686372756958,grad_norm: 0.921594849614088, iteration: 89551
loss: 1.0167406797409058,grad_norm: 0.8724447582448072, iteration: 89552
loss: 1.040623664855957,grad_norm: 0.9561910875836656, iteration: 89553
loss: 1.0067789554595947,grad_norm: 0.9999990622992763, iteration: 89554
loss: 0.9928795695304871,grad_norm: 0.9999994043775918, iteration: 89555
loss: 0.9877768754959106,grad_norm: 0.9999991106524952, iteration: 89556
loss: 1.0036042928695679,grad_norm: 0.9825363574624174, iteration: 89557
loss: 1.056050181388855,grad_norm: 0.924046780700004, iteration: 89558
loss: 1.0118436813354492,grad_norm: 0.8612819449215585, iteration: 89559
loss: 1.030956745147705,grad_norm: 0.8207556807668424, iteration: 89560
loss: 1.014521598815918,grad_norm: 0.8437202829433341, iteration: 89561
loss: 1.0117502212524414,grad_norm: 0.8191907323943117, iteration: 89562
loss: 0.9937307238578796,grad_norm: 0.9681550952095294, iteration: 89563
loss: 0.983635663986206,grad_norm: 0.8098595905034919, iteration: 89564
loss: 1.114715576171875,grad_norm: 0.9999992363064333, iteration: 89565
loss: 0.943267822265625,grad_norm: 0.8976841529831782, iteration: 89566
loss: 1.1225577592849731,grad_norm: 0.9999999037790289, iteration: 89567
loss: 1.0198057889938354,grad_norm: 0.8461979346055678, iteration: 89568
loss: 1.0604119300842285,grad_norm: 0.9999992289238379, iteration: 89569
loss: 0.988378643989563,grad_norm: 0.9999991455463458, iteration: 89570
loss: 1.036751389503479,grad_norm: 0.9999992193590487, iteration: 89571
loss: 1.0000756978988647,grad_norm: 0.890386133391876, iteration: 89572
loss: 0.994823157787323,grad_norm: 0.9624091699020051, iteration: 89573
loss: 1.0221983194351196,grad_norm: 0.9999991026463477, iteration: 89574
loss: 1.0124095678329468,grad_norm: 0.7706149820643355, iteration: 89575
loss: 0.9986078143119812,grad_norm: 0.8852292572013689, iteration: 89576
loss: 0.9859279990196228,grad_norm: 0.9857887174233207, iteration: 89577
loss: 1.0087440013885498,grad_norm: 0.9500666493639125, iteration: 89578
loss: 1.0431461334228516,grad_norm: 0.9127530826516991, iteration: 89579
loss: 1.0629664659500122,grad_norm: 0.9999993989063399, iteration: 89580
loss: 1.0611265897750854,grad_norm: 0.9473506202858591, iteration: 89581
loss: 0.9903165102005005,grad_norm: 0.9660212326761503, iteration: 89582
loss: 0.9921161532402039,grad_norm: 0.9876175474315528, iteration: 89583
loss: 1.034388542175293,grad_norm: 0.9999990896334631, iteration: 89584
loss: 1.0703202486038208,grad_norm: 0.9999993771891007, iteration: 89585
loss: 1.0198605060577393,grad_norm: 0.9916384589089507, iteration: 89586
loss: 1.017860770225525,grad_norm: 0.8941999506148921, iteration: 89587
loss: 1.0025266408920288,grad_norm: 0.760571249944641, iteration: 89588
loss: 0.9791974425315857,grad_norm: 0.7984289182293917, iteration: 89589
loss: 0.9901365041732788,grad_norm: 0.9999989823375064, iteration: 89590
loss: 1.0141706466674805,grad_norm: 0.9999995958275101, iteration: 89591
loss: 1.0300981998443604,grad_norm: 0.9158302147671125, iteration: 89592
loss: 1.084396481513977,grad_norm: 1.000000000901007, iteration: 89593
loss: 1.054151177406311,grad_norm: 0.9999998858543803, iteration: 89594
loss: 1.011817455291748,grad_norm: 0.9999991692854804, iteration: 89595
loss: 1.0064431428909302,grad_norm: 0.9105158961104998, iteration: 89596
loss: 1.0240750312805176,grad_norm: 0.8815039835518407, iteration: 89597
loss: 0.9908394813537598,grad_norm: 0.9999990172563554, iteration: 89598
loss: 1.042559027671814,grad_norm: 0.9999990350700436, iteration: 89599
loss: 1.0572230815887451,grad_norm: 0.9484510176660412, iteration: 89600
loss: 1.0798801183700562,grad_norm: 0.9325049185929033, iteration: 89601
loss: 1.0645753145217896,grad_norm: 0.9999998416143452, iteration: 89602
loss: 1.1284375190734863,grad_norm: 0.9999998726998734, iteration: 89603
loss: 0.9736747145652771,grad_norm: 0.8684066465210633, iteration: 89604
loss: 1.0078414678573608,grad_norm: 0.829335969617058, iteration: 89605
loss: 0.9545392394065857,grad_norm: 0.9406362218398616, iteration: 89606
loss: 0.9969556927680969,grad_norm: 0.9999992042646824, iteration: 89607
loss: 1.0491280555725098,grad_norm: 0.9608643326685383, iteration: 89608
loss: 1.027783989906311,grad_norm: 0.9944976026956531, iteration: 89609
loss: 1.0187275409698486,grad_norm: 0.9999991128953121, iteration: 89610
loss: 0.994236409664154,grad_norm: 0.9997506013378261, iteration: 89611
loss: 0.9915558099746704,grad_norm: 0.9999992090511299, iteration: 89612
loss: 0.9456178545951843,grad_norm: 0.9215049041843945, iteration: 89613
loss: 1.0594000816345215,grad_norm: 0.9999997768499257, iteration: 89614
loss: 0.9834144115447998,grad_norm: 0.7962702247778414, iteration: 89615
loss: 1.038878083229065,grad_norm: 0.7771760994134962, iteration: 89616
loss: 1.065116286277771,grad_norm: 0.8226066230111791, iteration: 89617
loss: 1.0562026500701904,grad_norm: 0.9999998907523928, iteration: 89618
loss: 0.9952269196510315,grad_norm: 0.7958108846307971, iteration: 89619
loss: 1.1348106861114502,grad_norm: 0.9329515632888852, iteration: 89620
loss: 1.1242046356201172,grad_norm: 0.99999956547548, iteration: 89621
loss: 1.0082145929336548,grad_norm: 0.8964960771802413, iteration: 89622
loss: 1.0086512565612793,grad_norm: 0.926978820658791, iteration: 89623
loss: 1.0824558734893799,grad_norm: 0.9999991917439048, iteration: 89624
loss: 1.1259382963180542,grad_norm: 0.9999995926480856, iteration: 89625
loss: 0.9882873892784119,grad_norm: 0.9999991106215907, iteration: 89626
loss: 1.065768837928772,grad_norm: 0.9999990694870032, iteration: 89627
loss: 1.0521764755249023,grad_norm: 0.9389129886139174, iteration: 89628
loss: 1.1662412881851196,grad_norm: 0.9999991840862179, iteration: 89629
loss: 0.954071581363678,grad_norm: 0.9168993655197539, iteration: 89630
loss: 0.9725982546806335,grad_norm: 0.9999991794115736, iteration: 89631
loss: 1.100625991821289,grad_norm: 0.9999990993091562, iteration: 89632
loss: 1.2226618528366089,grad_norm: 0.9999997056732127, iteration: 89633
loss: 0.9870119094848633,grad_norm: 0.9999995458141456, iteration: 89634
loss: 1.077894687652588,grad_norm: 0.9999991025371371, iteration: 89635
loss: 1.091149926185608,grad_norm: 0.9999994335918214, iteration: 89636
loss: 1.0458552837371826,grad_norm: 1.000000075731951, iteration: 89637
loss: 1.0709718465805054,grad_norm: 0.999999111303359, iteration: 89638
loss: 1.0867586135864258,grad_norm: 0.9999989328762917, iteration: 89639
loss: 1.1412291526794434,grad_norm: 0.9999994265218695, iteration: 89640
loss: 0.9822644591331482,grad_norm: 0.7651035604550119, iteration: 89641
loss: 1.1614829301834106,grad_norm: 0.9999992142878891, iteration: 89642
loss: 1.13357412815094,grad_norm: 0.9999999733789775, iteration: 89643
loss: 1.0699703693389893,grad_norm: 0.9999998058276612, iteration: 89644
loss: 1.0152511596679688,grad_norm: 0.8850478368081941, iteration: 89645
loss: 0.9799036383628845,grad_norm: 0.8107025358528204, iteration: 89646
loss: 1.0378257036209106,grad_norm: 0.9999998373923602, iteration: 89647
loss: 1.0670783519744873,grad_norm: 0.9999991539778178, iteration: 89648
loss: 1.010398268699646,grad_norm: 0.999999192490081, iteration: 89649
loss: 1.0155582427978516,grad_norm: 0.9999991938610104, iteration: 89650
loss: 1.0061475038528442,grad_norm: 0.9999991428715762, iteration: 89651
loss: 1.0556823015213013,grad_norm: 0.8983965399362657, iteration: 89652
loss: 1.0769239664077759,grad_norm: 0.915854115446178, iteration: 89653
loss: 1.080151081085205,grad_norm: 0.9999995116034783, iteration: 89654
loss: 1.0056763887405396,grad_norm: 0.8175877934819051, iteration: 89655
loss: 1.0419143438339233,grad_norm: 0.9273042139882459, iteration: 89656
loss: 1.0782477855682373,grad_norm: 0.9999990348820749, iteration: 89657
loss: 1.040703535079956,grad_norm: 0.9999992269892608, iteration: 89658
loss: 0.9954720735549927,grad_norm: 0.8889232349186909, iteration: 89659
loss: 1.1051713228225708,grad_norm: 0.9999994235015587, iteration: 89660
loss: 1.0048503875732422,grad_norm: 0.9999991323485432, iteration: 89661
loss: 1.0141162872314453,grad_norm: 0.8305793485572146, iteration: 89662
loss: 1.0614656209945679,grad_norm: 0.9999996704366161, iteration: 89663
loss: 0.9942978024482727,grad_norm: 0.9886181315476666, iteration: 89664
loss: 1.0131162405014038,grad_norm: 0.9999994125668319, iteration: 89665
loss: 1.0431900024414062,grad_norm: 0.9999992317682151, iteration: 89666
loss: 0.9783048629760742,grad_norm: 0.9999990319851754, iteration: 89667
loss: 0.996262788772583,grad_norm: 0.8522223718215305, iteration: 89668
loss: 1.1479779481887817,grad_norm: 0.9999998355094474, iteration: 89669
loss: 1.0520447492599487,grad_norm: 0.9999990966310341, iteration: 89670
loss: 1.0805811882019043,grad_norm: 0.9999991926354536, iteration: 89671
loss: 1.0065562725067139,grad_norm: 0.9999990337362588, iteration: 89672
loss: 1.0042636394500732,grad_norm: 0.9999990023525884, iteration: 89673
loss: 1.0202516317367554,grad_norm: 0.9999991855922467, iteration: 89674
loss: 1.015575647354126,grad_norm: 0.9540894949893499, iteration: 89675
loss: 1.0123261213302612,grad_norm: 0.9999991180826198, iteration: 89676
loss: 0.9875179529190063,grad_norm: 0.999999858992921, iteration: 89677
loss: 1.0537785291671753,grad_norm: 0.8337240298745879, iteration: 89678
loss: 1.0215193033218384,grad_norm: 0.8736334615929228, iteration: 89679
loss: 0.9941405653953552,grad_norm: 0.883087910155698, iteration: 89680
loss: 0.9812385439872742,grad_norm: 0.9009525582136252, iteration: 89681
loss: 0.9974592924118042,grad_norm: 0.9999989334502396, iteration: 89682
loss: 1.0400370359420776,grad_norm: 0.8622941300706358, iteration: 89683
loss: 1.0158475637435913,grad_norm: 0.9999996431617176, iteration: 89684
loss: 0.9812448024749756,grad_norm: 0.9534691097482638, iteration: 89685
loss: 0.9871963858604431,grad_norm: 0.985879027566163, iteration: 89686
loss: 0.9884101748466492,grad_norm: 0.9999990872479967, iteration: 89687
loss: 1.0205669403076172,grad_norm: 0.9511882001106069, iteration: 89688
loss: 0.9970861673355103,grad_norm: 0.8233318368642182, iteration: 89689
loss: 0.9981042146682739,grad_norm: 0.9954328635305884, iteration: 89690
loss: 1.0827082395553589,grad_norm: 0.9341839172550547, iteration: 89691
loss: 1.04728364944458,grad_norm: 0.8778733415062011, iteration: 89692
loss: 1.0146328210830688,grad_norm: 0.7602039446261878, iteration: 89693
loss: 1.0244704484939575,grad_norm: 0.9397638967928358, iteration: 89694
loss: 1.1137497425079346,grad_norm: 0.9595404836910756, iteration: 89695
loss: 0.9984065890312195,grad_norm: 0.8878906315346339, iteration: 89696
loss: 1.0081353187561035,grad_norm: 0.9999992481288058, iteration: 89697
loss: 0.9926573634147644,grad_norm: 0.866905513065584, iteration: 89698
loss: 1.033894419670105,grad_norm: 0.8159290703128125, iteration: 89699
loss: 1.0432442426681519,grad_norm: 0.999999057315503, iteration: 89700
loss: 1.0235742330551147,grad_norm: 0.8349821567069624, iteration: 89701
loss: 1.0006115436553955,grad_norm: 0.9731984729684785, iteration: 89702
loss: 1.0340632200241089,grad_norm: 0.8469680340911138, iteration: 89703
loss: 0.9870068430900574,grad_norm: 0.8547278145661434, iteration: 89704
loss: 0.9852955341339111,grad_norm: 0.9999990932694925, iteration: 89705
loss: 1.036368489265442,grad_norm: 0.9673492319740228, iteration: 89706
loss: 1.0149827003479004,grad_norm: 0.999999027945234, iteration: 89707
loss: 1.0135111808776855,grad_norm: 0.999999203382184, iteration: 89708
loss: 1.0179290771484375,grad_norm: 0.8753411176348221, iteration: 89709
loss: 0.9808688759803772,grad_norm: 0.9999991220629764, iteration: 89710
loss: 1.038683533668518,grad_norm: 0.9999991303427296, iteration: 89711
loss: 0.998273491859436,grad_norm: 0.8897490124922638, iteration: 89712
loss: 0.9990135431289673,grad_norm: 0.9919772248328828, iteration: 89713
loss: 1.0567575693130493,grad_norm: 0.9999999385587348, iteration: 89714
loss: 1.2307039499282837,grad_norm: 0.9999999160821804, iteration: 89715
loss: 0.9894583821296692,grad_norm: 0.9999989673388869, iteration: 89716
loss: 1.0075565576553345,grad_norm: 0.7891520557896925, iteration: 89717
loss: 1.0124635696411133,grad_norm: 0.9999991133473717, iteration: 89718
loss: 0.9958745837211609,grad_norm: 0.9999990803040276, iteration: 89719
loss: 0.9892165064811707,grad_norm: 0.9999991716739803, iteration: 89720
loss: 0.9968193173408508,grad_norm: 0.7957619258126011, iteration: 89721
loss: 0.9927043914794922,grad_norm: 0.9999989650790886, iteration: 89722
loss: 1.0056577920913696,grad_norm: 0.8146493536105747, iteration: 89723
loss: 1.0175726413726807,grad_norm: 0.9999989977972749, iteration: 89724
loss: 1.0636048316955566,grad_norm: 0.9999991993554562, iteration: 89725
loss: 1.0230928659439087,grad_norm: 0.9999990661934375, iteration: 89726
loss: 0.9871543645858765,grad_norm: 0.9999990250615733, iteration: 89727
loss: 0.9912401437759399,grad_norm: 0.9580892238692136, iteration: 89728
loss: 0.9986075758934021,grad_norm: 0.8901058691811574, iteration: 89729
loss: 1.0370925664901733,grad_norm: 0.999999093483702, iteration: 89730
loss: 1.0816819667816162,grad_norm: 0.9999998477826355, iteration: 89731
loss: 1.0245381593704224,grad_norm: 0.9999995412673379, iteration: 89732
loss: 1.0212684869766235,grad_norm: 0.7284255336366796, iteration: 89733
loss: 0.9611340165138245,grad_norm: 0.8896656189669591, iteration: 89734
loss: 1.0108542442321777,grad_norm: 0.9999990861663126, iteration: 89735
loss: 0.9903295040130615,grad_norm: 0.9307169134505363, iteration: 89736
loss: 0.9546715617179871,grad_norm: 0.9999991315320526, iteration: 89737
loss: 1.0424691438674927,grad_norm: 0.999999134170492, iteration: 89738
loss: 1.068922758102417,grad_norm: 0.9999994450556682, iteration: 89739
loss: 0.977197527885437,grad_norm: 0.9999990972626382, iteration: 89740
loss: 0.9727134108543396,grad_norm: 0.9999990511489214, iteration: 89741
loss: 0.9990108609199524,grad_norm: 0.9877639345104957, iteration: 89742
loss: 0.9991834759712219,grad_norm: 0.9999992180623754, iteration: 89743
loss: 0.9869763255119324,grad_norm: 0.7685893459302097, iteration: 89744
loss: 1.0025789737701416,grad_norm: 0.9999990841366695, iteration: 89745
loss: 1.0024062395095825,grad_norm: 0.9999994528168941, iteration: 89746
loss: 1.008649468421936,grad_norm: 0.9960551942572661, iteration: 89747
loss: 0.9900115728378296,grad_norm: 0.9999990651838933, iteration: 89748
loss: 0.9998544454574585,grad_norm: 0.8563787773040299, iteration: 89749
loss: 1.001650333404541,grad_norm: 0.8694680152354596, iteration: 89750
loss: 1.0186625719070435,grad_norm: 0.99999902867738, iteration: 89751
loss: 1.0303871631622314,grad_norm: 0.9999990697735092, iteration: 89752
loss: 0.9933977127075195,grad_norm: 0.9999993879247613, iteration: 89753
loss: 1.014747142791748,grad_norm: 0.9999997133239904, iteration: 89754
loss: 1.041822910308838,grad_norm: 0.9999991515238091, iteration: 89755
loss: 1.1419881582260132,grad_norm: 0.9999998479482216, iteration: 89756
loss: 1.015099287033081,grad_norm: 0.9909582801164013, iteration: 89757
loss: 0.9649410843849182,grad_norm: 0.9666692558981493, iteration: 89758
loss: 0.9908792972564697,grad_norm: 0.9826335737120782, iteration: 89759
loss: 1.0033866167068481,grad_norm: 0.7195549800289704, iteration: 89760
loss: 0.9891747236251831,grad_norm: 0.8755583506758307, iteration: 89761
loss: 1.0325040817260742,grad_norm: 0.8596817449188984, iteration: 89762
loss: 1.0058915615081787,grad_norm: 0.999999251601367, iteration: 89763
loss: 1.0113064050674438,grad_norm: 0.9999995074182316, iteration: 89764
loss: 0.9976356625556946,grad_norm: 0.858344320796016, iteration: 89765
loss: 1.012892246246338,grad_norm: 0.8481057531181374, iteration: 89766
loss: 0.9711498618125916,grad_norm: 0.9687227439192198, iteration: 89767
loss: 1.012831211090088,grad_norm: 0.925141266122317, iteration: 89768
loss: 0.9926554560661316,grad_norm: 0.9783749888277361, iteration: 89769
loss: 1.0545042753219604,grad_norm: 0.9999990177602117, iteration: 89770
loss: 1.1412841081619263,grad_norm: 0.9999998544665261, iteration: 89771
loss: 1.008793830871582,grad_norm: 0.8892517237208605, iteration: 89772
loss: 1.0301733016967773,grad_norm: 0.9426053706029791, iteration: 89773
loss: 1.0162276029586792,grad_norm: 0.9662171782464903, iteration: 89774
loss: 1.030021071434021,grad_norm: 0.9999996628705488, iteration: 89775
loss: 0.9534369707107544,grad_norm: 0.9492592356021083, iteration: 89776
loss: 1.007544755935669,grad_norm: 0.8414800557824007, iteration: 89777
loss: 1.0014164447784424,grad_norm: 0.8949591982821128, iteration: 89778
loss: 1.0271029472351074,grad_norm: 0.8413548353725413, iteration: 89779
loss: 0.9983447790145874,grad_norm: 0.9999989399297257, iteration: 89780
loss: 1.0412923097610474,grad_norm: 0.9165015085998109, iteration: 89781
loss: 1.0226854085922241,grad_norm: 0.9999998779920375, iteration: 89782
loss: 0.9927576780319214,grad_norm: 0.9596822592855578, iteration: 89783
loss: 0.9893834590911865,grad_norm: 0.9999990905277452, iteration: 89784
loss: 0.983885645866394,grad_norm: 0.9132159633175458, iteration: 89785
loss: 0.9809311628341675,grad_norm: 0.8458814314791597, iteration: 89786
loss: 0.9910268187522888,grad_norm: 0.8626109745083077, iteration: 89787
loss: 1.0294359922409058,grad_norm: 0.825452032062503, iteration: 89788
loss: 0.9920186400413513,grad_norm: 0.8794442058612855, iteration: 89789
loss: 1.0039005279541016,grad_norm: 0.9999989866678968, iteration: 89790
loss: 0.9917507767677307,grad_norm: 0.9999991271338, iteration: 89791
loss: 1.0240565538406372,grad_norm: 0.9999991602401979, iteration: 89792
loss: 1.009087085723877,grad_norm: 0.9999990887957342, iteration: 89793
loss: 1.018439769744873,grad_norm: 0.9900950285881491, iteration: 89794
loss: 1.0173181295394897,grad_norm: 0.9999990739367265, iteration: 89795
loss: 0.9702123999595642,grad_norm: 0.999999027651344, iteration: 89796
loss: 1.2902051210403442,grad_norm: 0.99999949544745, iteration: 89797
loss: 1.0183895826339722,grad_norm: 0.9107295952930218, iteration: 89798
loss: 1.1646852493286133,grad_norm: 0.9999994956134042, iteration: 89799
loss: 1.0234256982803345,grad_norm: 0.9999991341181971, iteration: 89800
loss: 1.0094236135482788,grad_norm: 0.8441402963134998, iteration: 89801
loss: 1.0924572944641113,grad_norm: 0.9999995782867717, iteration: 89802
loss: 0.9824076294898987,grad_norm: 0.9950463245474925, iteration: 89803
loss: 1.001753330230713,grad_norm: 0.8929896859511562, iteration: 89804
loss: 1.008373498916626,grad_norm: 0.8527210244531116, iteration: 89805
loss: 1.0161608457565308,grad_norm: 0.9999991178691258, iteration: 89806
loss: 0.9621410965919495,grad_norm: 0.871670950152216, iteration: 89807
loss: 1.0206899642944336,grad_norm: 0.8576948858467756, iteration: 89808
loss: 1.0489327907562256,grad_norm: 0.9651124832977732, iteration: 89809
loss: 1.0098310708999634,grad_norm: 0.8524037232950672, iteration: 89810
loss: 1.1200156211853027,grad_norm: 0.9999995383439474, iteration: 89811
loss: 0.9677822589874268,grad_norm: 0.9744534756972979, iteration: 89812
loss: 1.013468861579895,grad_norm: 0.9999998535500589, iteration: 89813
loss: 1.051925778388977,grad_norm: 0.8488128298023885, iteration: 89814
loss: 1.0074818134307861,grad_norm: 0.9999996776052397, iteration: 89815
loss: 1.0512856245040894,grad_norm: 1.0000000540177971, iteration: 89816
loss: 1.0855872631072998,grad_norm: 0.999999748814682, iteration: 89817
loss: 0.9910439848899841,grad_norm: 0.972768659639493, iteration: 89818
loss: 0.989388644695282,grad_norm: 0.9999990399257022, iteration: 89819
loss: 1.1348942518234253,grad_norm: 0.9999997672402882, iteration: 89820
loss: 1.106345295906067,grad_norm: 0.9999991103754846, iteration: 89821
loss: 1.0487678050994873,grad_norm: 0.9999992435421418, iteration: 89822
loss: 1.104629397392273,grad_norm: 0.9999992436807397, iteration: 89823
loss: 1.0092741250991821,grad_norm: 0.8588453637780042, iteration: 89824
loss: 0.986741840839386,grad_norm: 0.9669110143928386, iteration: 89825
loss: 1.210828423500061,grad_norm: 0.9999994678494016, iteration: 89826
loss: 1.0441904067993164,grad_norm: 0.9999995905444202, iteration: 89827
loss: 1.1226414442062378,grad_norm: 0.999999396099317, iteration: 89828
loss: 1.0059750080108643,grad_norm: 0.9716803597309983, iteration: 89829
loss: 1.0343085527420044,grad_norm: 0.850044491131704, iteration: 89830
loss: 0.9962244629859924,grad_norm: 0.9999990115526682, iteration: 89831
loss: 1.0814272165298462,grad_norm: 0.9999993709425791, iteration: 89832
loss: 1.0249584913253784,grad_norm: 0.8785323891516379, iteration: 89833
loss: 1.0071409940719604,grad_norm: 0.866981647934319, iteration: 89834
loss: 0.97479647397995,grad_norm: 0.8380492457777037, iteration: 89835
loss: 1.044312834739685,grad_norm: 0.9080147337467689, iteration: 89836
loss: 1.0038557052612305,grad_norm: 0.9416492791514577, iteration: 89837
loss: 1.053101897239685,grad_norm: 0.9320463166465098, iteration: 89838
loss: 1.0063246488571167,grad_norm: 0.9999990870781024, iteration: 89839
loss: 1.0097546577453613,grad_norm: 0.9815863404559176, iteration: 89840
loss: 0.9766877889633179,grad_norm: 0.749540785802923, iteration: 89841
loss: 0.9868626594543457,grad_norm: 0.9999992629449658, iteration: 89842
loss: 0.9952044486999512,grad_norm: 0.7781632947265973, iteration: 89843
loss: 0.9908785223960876,grad_norm: 0.95161108640687, iteration: 89844
loss: 1.0008074045181274,grad_norm: 0.9044438013862827, iteration: 89845
loss: 0.9955949187278748,grad_norm: 0.9999990944273957, iteration: 89846
loss: 1.0335015058517456,grad_norm: 0.9009410518128251, iteration: 89847
loss: 1.0016719102859497,grad_norm: 0.9999991925384762, iteration: 89848
loss: 1.0887196063995361,grad_norm: 0.9999999516061955, iteration: 89849
loss: 1.1188461780548096,grad_norm: 0.9999994069497635, iteration: 89850
loss: 1.0582424402236938,grad_norm: 0.9999992870187611, iteration: 89851
loss: 1.0427007675170898,grad_norm: 0.9541181310233289, iteration: 89852
loss: 1.0803344249725342,grad_norm: 0.9406181385549611, iteration: 89853
loss: 1.0477709770202637,grad_norm: 0.9098864603526757, iteration: 89854
loss: 1.1813111305236816,grad_norm: 0.8933267767367878, iteration: 89855
loss: 0.9796822667121887,grad_norm: 0.9999990972702614, iteration: 89856
loss: 0.9753063321113586,grad_norm: 0.9999991031169813, iteration: 89857
loss: 0.9751169085502625,grad_norm: 0.8054075742759398, iteration: 89858
loss: 1.04586923122406,grad_norm: 0.999999410629311, iteration: 89859
loss: 1.0492973327636719,grad_norm: 0.9999996487963878, iteration: 89860
loss: 1.0330997705459595,grad_norm: 0.7810962574662634, iteration: 89861
loss: 1.0302485227584839,grad_norm: 0.8536445692323212, iteration: 89862
loss: 1.0682317018508911,grad_norm: 0.9999990441560476, iteration: 89863
loss: 1.0051931142807007,grad_norm: 0.9808585911060614, iteration: 89864
loss: 0.9601815938949585,grad_norm: 0.9673329306664858, iteration: 89865
loss: 1.0701044797897339,grad_norm: 0.9999993439479332, iteration: 89866
loss: 1.0107123851776123,grad_norm: 0.9408864451509386, iteration: 89867
loss: 0.9866460561752319,grad_norm: 0.7931463130899733, iteration: 89868
loss: 1.143587350845337,grad_norm: 0.9999999711851364, iteration: 89869
loss: 1.0040498971939087,grad_norm: 0.9999993014557876, iteration: 89870
loss: 1.0412161350250244,grad_norm: 0.9999994943180959, iteration: 89871
loss: 1.010167121887207,grad_norm: 0.986390458498981, iteration: 89872
loss: 0.9901982545852661,grad_norm: 0.9796620608761833, iteration: 89873
loss: 1.0387732982635498,grad_norm: 0.9999996953413894, iteration: 89874
loss: 1.2687779664993286,grad_norm: 0.9999991701108962, iteration: 89875
loss: 0.9925947189331055,grad_norm: 0.8571468331119015, iteration: 89876
loss: 1.0611218214035034,grad_norm: 0.9999990857322275, iteration: 89877
loss: 1.0113633871078491,grad_norm: 0.9912145445469953, iteration: 89878
loss: 1.053213119506836,grad_norm: 0.9999992036184449, iteration: 89879
loss: 0.9995602965354919,grad_norm: 0.9999996545717431, iteration: 89880
loss: 1.02692711353302,grad_norm: 0.9361650438049061, iteration: 89881
loss: 0.9682533740997314,grad_norm: 0.9552805356063552, iteration: 89882
loss: 0.9859758019447327,grad_norm: 0.9490748019010151, iteration: 89883
loss: 1.0669238567352295,grad_norm: 0.9999991303729671, iteration: 89884
loss: 0.9860371351242065,grad_norm: 0.9999994445183884, iteration: 89885
loss: 1.0355067253112793,grad_norm: 0.969439178721571, iteration: 89886
loss: 1.015842318534851,grad_norm: 0.9999992109663435, iteration: 89887
loss: 1.0591224431991577,grad_norm: 0.9999991198210114, iteration: 89888
loss: 1.0582005977630615,grad_norm: 0.9999995794378502, iteration: 89889
loss: 1.0258711576461792,grad_norm: 0.8306977908224124, iteration: 89890
loss: 1.0244773626327515,grad_norm: 0.9999997520607253, iteration: 89891
loss: 1.0171043872833252,grad_norm: 0.9999991057969734, iteration: 89892
loss: 1.0231577157974243,grad_norm: 0.9651672810152084, iteration: 89893
loss: 0.9937137365341187,grad_norm: 0.9999991454916048, iteration: 89894
loss: 0.9871504902839661,grad_norm: 0.7797648686945033, iteration: 89895
loss: 1.0222913026809692,grad_norm: 0.9999991647648678, iteration: 89896
loss: 1.0522719621658325,grad_norm: 0.8690219384958746, iteration: 89897
loss: 1.0111826658248901,grad_norm: 0.9999990544329095, iteration: 89898
loss: 1.0166138410568237,grad_norm: 0.9169816583465847, iteration: 89899
loss: 0.9816621541976929,grad_norm: 0.9584812646698753, iteration: 89900
loss: 0.9896983504295349,grad_norm: 0.8448452190740761, iteration: 89901
loss: 1.0827240943908691,grad_norm: 0.9999996620834076, iteration: 89902
loss: 0.9905979633331299,grad_norm: 0.8045442444435638, iteration: 89903
loss: 1.026419758796692,grad_norm: 0.9999992064977322, iteration: 89904
loss: 1.015960693359375,grad_norm: 0.999999053473379, iteration: 89905
loss: 1.0041838884353638,grad_norm: 0.9999990913525736, iteration: 89906
loss: 1.0010892152786255,grad_norm: 0.8593976683977065, iteration: 89907
loss: 1.0778038501739502,grad_norm: 0.7953253438248529, iteration: 89908
loss: 0.986061692237854,grad_norm: 0.9999991546288576, iteration: 89909
loss: 1.1624135971069336,grad_norm: 0.9999999085541107, iteration: 89910
loss: 0.9798455238342285,grad_norm: 0.9141201501852129, iteration: 89911
loss: 1.0324699878692627,grad_norm: 0.9999990416510782, iteration: 89912
loss: 0.9925129413604736,grad_norm: 0.9735151570044788, iteration: 89913
loss: 1.0289233922958374,grad_norm: 0.9999998649326178, iteration: 89914
loss: 1.0563218593597412,grad_norm: 0.8952604131581914, iteration: 89915
loss: 1.015159010887146,grad_norm: 0.871841608940164, iteration: 89916
loss: 0.9927564859390259,grad_norm: 0.8296645398468394, iteration: 89917
loss: 1.0329010486602783,grad_norm: 0.9999992547050218, iteration: 89918
loss: 1.045910358428955,grad_norm: 0.9532879057843695, iteration: 89919
loss: 1.0239026546478271,grad_norm: 0.9999993504383538, iteration: 89920
loss: 1.031543493270874,grad_norm: 0.8291344811710254, iteration: 89921
loss: 0.9867239594459534,grad_norm: 0.9056947027112779, iteration: 89922
loss: 1.0126464366912842,grad_norm: 0.9999992417005217, iteration: 89923
loss: 0.997861921787262,grad_norm: 0.7413238666299489, iteration: 89924
loss: 1.0021358728408813,grad_norm: 0.9611918074858777, iteration: 89925
loss: 0.9775597453117371,grad_norm: 0.895753930232366, iteration: 89926
loss: 1.0239464044570923,grad_norm: 0.830723018514328, iteration: 89927
loss: 0.991032600402832,grad_norm: 0.7964808477015697, iteration: 89928
loss: 1.0746181011199951,grad_norm: 0.9999990173650216, iteration: 89929
loss: 1.0223653316497803,grad_norm: 0.9999994837707458, iteration: 89930
loss: 0.9922381639480591,grad_norm: 0.9001389866734392, iteration: 89931
loss: 1.0549044609069824,grad_norm: 0.7753667267515908, iteration: 89932
loss: 0.9789906144142151,grad_norm: 0.8715622992694754, iteration: 89933
loss: 0.9574297666549683,grad_norm: 0.971324430340478, iteration: 89934
loss: 1.0225179195404053,grad_norm: 0.9999990953548463, iteration: 89935
loss: 1.066576361656189,grad_norm: 0.9999992670400552, iteration: 89936
loss: 1.0015233755111694,grad_norm: 0.9999991570785451, iteration: 89937
loss: 0.9803537130355835,grad_norm: 0.9999992613579191, iteration: 89938
loss: 0.9605430960655212,grad_norm: 0.8076358174624461, iteration: 89939
loss: 0.9837844371795654,grad_norm: 0.8675451066424428, iteration: 89940
loss: 1.1358461380004883,grad_norm: 0.9999993384782718, iteration: 89941
loss: 0.9482644200325012,grad_norm: 0.8479910579709292, iteration: 89942
loss: 0.971390962600708,grad_norm: 0.8501011081359788, iteration: 89943
loss: 1.046247959136963,grad_norm: 0.9999992082827807, iteration: 89944
loss: 1.0191751718521118,grad_norm: 0.9596598300794499, iteration: 89945
loss: 1.0228314399719238,grad_norm: 0.9999998768809841, iteration: 89946
loss: 0.9742578268051147,grad_norm: 0.8849592502511454, iteration: 89947
loss: 1.004225730895996,grad_norm: 0.8145599221312799, iteration: 89948
loss: 1.0360445976257324,grad_norm: 0.9999991329060763, iteration: 89949
loss: 0.9792988300323486,grad_norm: 0.8884121335261753, iteration: 89950
loss: 1.030301809310913,grad_norm: 0.9999990104671949, iteration: 89951
loss: 0.9866845011711121,grad_norm: 0.8651605611472214, iteration: 89952
loss: 1.0109416246414185,grad_norm: 0.8593492344358397, iteration: 89953
loss: 0.9750688672065735,grad_norm: 0.8790349634769509, iteration: 89954
loss: 1.028354287147522,grad_norm: 0.9932626549466448, iteration: 89955
loss: 1.0187910795211792,grad_norm: 0.9999996167192942, iteration: 89956
loss: 1.0582096576690674,grad_norm: 0.9999990935070573, iteration: 89957
loss: 0.9990407824516296,grad_norm: 0.9999990162344247, iteration: 89958
loss: 0.995556652545929,grad_norm: 0.9136093671666007, iteration: 89959
loss: 0.9906519055366516,grad_norm: 0.9813283737863843, iteration: 89960
loss: 1.0159887075424194,grad_norm: 0.8499806669748412, iteration: 89961
loss: 0.9976580142974854,grad_norm: 0.9494208545457877, iteration: 89962
loss: 0.9784188270568848,grad_norm: 0.9245659490225883, iteration: 89963
loss: 1.0058304071426392,grad_norm: 0.999999126077942, iteration: 89964
loss: 1.0055004358291626,grad_norm: 0.9999989701920932, iteration: 89965
loss: 1.025214433670044,grad_norm: 0.9999990288819735, iteration: 89966
loss: 1.0241588354110718,grad_norm: 0.7652902828095984, iteration: 89967
loss: 0.9902888536453247,grad_norm: 0.9999991161623673, iteration: 89968
loss: 1.0554864406585693,grad_norm: 0.9999990668554416, iteration: 89969
loss: 0.9605321288108826,grad_norm: 0.8293475100882957, iteration: 89970
loss: 1.0235158205032349,grad_norm: 0.9999994746241063, iteration: 89971
loss: 1.0733484029769897,grad_norm: 0.8990889393177849, iteration: 89972
loss: 1.0180819034576416,grad_norm: 0.9836946341723696, iteration: 89973
loss: 1.029866337776184,grad_norm: 0.9999997882872111, iteration: 89974
loss: 1.0286093950271606,grad_norm: 0.9999994748990297, iteration: 89975
loss: 1.0293523073196411,grad_norm: 0.999999674176047, iteration: 89976
loss: 1.0419424772262573,grad_norm: 0.9871168270892651, iteration: 89977
loss: 1.0371469259262085,grad_norm: 0.9999990569466548, iteration: 89978
loss: 0.9576199054718018,grad_norm: 0.8647422874956593, iteration: 89979
loss: 0.9610805511474609,grad_norm: 0.999999125590444, iteration: 89980
loss: 1.0572564601898193,grad_norm: 0.9999990844066873, iteration: 89981
loss: 1.0323611497879028,grad_norm: 0.79196152024458, iteration: 89982
loss: 1.005751371383667,grad_norm: 0.9390771310243918, iteration: 89983
loss: 1.0340932607650757,grad_norm: 0.9262565294814442, iteration: 89984
loss: 1.1019036769866943,grad_norm: 0.9999990845707056, iteration: 89985
loss: 1.028467059135437,grad_norm: 0.945847868808864, iteration: 89986
loss: 1.0477938652038574,grad_norm: 0.8921845358469271, iteration: 89987
loss: 0.9982015490531921,grad_norm: 0.9909973203054159, iteration: 89988
loss: 1.1291924715042114,grad_norm: 0.9999993206258345, iteration: 89989
loss: 1.0680776834487915,grad_norm: 0.9999993873918727, iteration: 89990
loss: 0.9499844312667847,grad_norm: 0.7907168423782935, iteration: 89991
loss: 1.0212469100952148,grad_norm: 0.9999996747053144, iteration: 89992
loss: 1.1046321392059326,grad_norm: 0.9999995781171461, iteration: 89993
loss: 1.2297981977462769,grad_norm: 0.9999992961212236, iteration: 89994
loss: 0.9941121339797974,grad_norm: 0.9999990693698352, iteration: 89995
loss: 1.0032678842544556,grad_norm: 0.999999228778201, iteration: 89996
loss: 1.0720447301864624,grad_norm: 0.9999991754858468, iteration: 89997
loss: 1.08613920211792,grad_norm: 0.9999996749052069, iteration: 89998
loss: 1.1971449851989746,grad_norm: 0.9999999634368877, iteration: 89999
loss: 0.9862685203552246,grad_norm: 0.8548777089219634, iteration: 90000
Evaluating at step 90000
{'val': 1.0340343657881021, 'test': 2.2091424468988046}
loss: 1.0103672742843628,grad_norm: 0.9999993444902451, iteration: 90001
loss: 1.0321931838989258,grad_norm: 0.9999996589304024, iteration: 90002
loss: 1.1439911127090454,grad_norm: 0.9999995542889633, iteration: 90003
loss: 1.0488996505737305,grad_norm: 0.8905735647643539, iteration: 90004
loss: 1.0610922574996948,grad_norm: 0.9999994178308218, iteration: 90005
loss: 1.046053409576416,grad_norm: 0.8159692457753811, iteration: 90006
loss: 1.0306183099746704,grad_norm: 0.9999996360364783, iteration: 90007
loss: 1.0823500156402588,grad_norm: 0.9999994889155212, iteration: 90008
loss: 1.1710205078125,grad_norm: 0.9999999743515829, iteration: 90009
loss: 1.006644606590271,grad_norm: 0.9999993997561604, iteration: 90010
loss: 1.02608323097229,grad_norm: 0.8370344443890673, iteration: 90011
loss: 1.3022996187210083,grad_norm: 0.9999997355769965, iteration: 90012
loss: 1.105472207069397,grad_norm: 0.9777439960325855, iteration: 90013
loss: 1.0971969366073608,grad_norm: 0.9999994594331909, iteration: 90014
loss: 1.0698659420013428,grad_norm: 0.9527105749897248, iteration: 90015
loss: 1.0077521800994873,grad_norm: 0.820058891475831, iteration: 90016
loss: 1.065238356590271,grad_norm: 0.9999995326007212, iteration: 90017
loss: 1.1359390020370483,grad_norm: 0.9999995720903064, iteration: 90018
loss: 1.1609158515930176,grad_norm: 0.9999998442970766, iteration: 90019
loss: 0.9628655314445496,grad_norm: 0.9999992438518821, iteration: 90020
loss: 1.184038758277893,grad_norm: 1.0000000797734139, iteration: 90021
loss: 1.000974416732788,grad_norm: 0.9999991510142413, iteration: 90022
loss: 1.067373514175415,grad_norm: 0.9999994681498646, iteration: 90023
loss: 1.000778079032898,grad_norm: 0.9034153534722431, iteration: 90024
loss: 1.0343230962753296,grad_norm: 0.999999730645721, iteration: 90025
loss: 1.1254934072494507,grad_norm: 0.9999992644309136, iteration: 90026
loss: 1.003603458404541,grad_norm: 0.8802437736086204, iteration: 90027
loss: 1.0256452560424805,grad_norm: 0.9999990869447989, iteration: 90028
loss: 1.0181485414505005,grad_norm: 0.8491103790065414, iteration: 90029
loss: 1.1568657159805298,grad_norm: 0.99999973162374, iteration: 90030
loss: 0.9825790524482727,grad_norm: 0.9999989460961883, iteration: 90031
loss: 1.0417860746383667,grad_norm: 0.9999991802398833, iteration: 90032
loss: 1.0076111555099487,grad_norm: 0.999999399586253, iteration: 90033
loss: 1.1181718111038208,grad_norm: 0.9999990883378944, iteration: 90034
loss: 1.0303707122802734,grad_norm: 0.9439538789974006, iteration: 90035
loss: 1.0176961421966553,grad_norm: 0.9999994400084133, iteration: 90036
loss: 0.9865851402282715,grad_norm: 0.9999990588875102, iteration: 90037
loss: 1.0456610918045044,grad_norm: 0.9731757943202983, iteration: 90038
loss: 1.017717957496643,grad_norm: 0.7575657947534873, iteration: 90039
loss: 1.025689721107483,grad_norm: 0.9999992230087255, iteration: 90040
loss: 1.0055135488510132,grad_norm: 0.9999998213723612, iteration: 90041
loss: 1.0526267290115356,grad_norm: 0.9999994263875025, iteration: 90042
loss: 1.0591405630111694,grad_norm: 0.9999992685421867, iteration: 90043
loss: 0.9998108744621277,grad_norm: 0.7744387942039899, iteration: 90044
loss: 1.0368096828460693,grad_norm: 0.9999998901925042, iteration: 90045
loss: 1.1759676933288574,grad_norm: 0.9456245215500021, iteration: 90046
loss: 1.0806504487991333,grad_norm: 0.9999990197441879, iteration: 90047
loss: 1.0030845403671265,grad_norm: 0.9638897280020267, iteration: 90048
loss: 1.0330969095230103,grad_norm: 0.9409926841010865, iteration: 90049
loss: 1.0092074871063232,grad_norm: 0.8445472899587071, iteration: 90050
loss: 0.9864442348480225,grad_norm: 0.9999992194992073, iteration: 90051
loss: 1.0462870597839355,grad_norm: 0.9999995944234864, iteration: 90052
loss: 0.9931640028953552,grad_norm: 0.8717300241482379, iteration: 90053
loss: 1.037307620048523,grad_norm: 0.9999994606188227, iteration: 90054
loss: 1.0062308311462402,grad_norm: 0.999999017509246, iteration: 90055
loss: 1.014255166053772,grad_norm: 0.9080355093187334, iteration: 90056
loss: 0.9962120056152344,grad_norm: 0.9999995575323453, iteration: 90057
loss: 1.1445966958999634,grad_norm: 0.9999991479828879, iteration: 90058
loss: 1.006990909576416,grad_norm: 0.9999993317352279, iteration: 90059
loss: 1.0199098587036133,grad_norm: 0.8833592734452436, iteration: 90060
loss: 1.0831540822982788,grad_norm: 0.9999997801566893, iteration: 90061
loss: 0.9841639995574951,grad_norm: 0.7767740674101201, iteration: 90062
loss: 1.0253410339355469,grad_norm: 0.9999997281472901, iteration: 90063
loss: 0.9950128793716431,grad_norm: 0.9999992879997008, iteration: 90064
loss: 1.0140398740768433,grad_norm: 0.9685336122165781, iteration: 90065
loss: 0.9956139922142029,grad_norm: 0.9999992566522354, iteration: 90066
loss: 1.0133211612701416,grad_norm: 0.9657411408945751, iteration: 90067
loss: 0.9976601600646973,grad_norm: 0.9984008004035759, iteration: 90068
loss: 0.9920667409896851,grad_norm: 0.6794893281932183, iteration: 90069
loss: 0.9927581548690796,grad_norm: 0.9973014767392182, iteration: 90070
loss: 1.017735242843628,grad_norm: 0.7459097857021696, iteration: 90071
loss: 0.9841328859329224,grad_norm: 0.9999992407006291, iteration: 90072
loss: 1.026307463645935,grad_norm: 0.9999996938252818, iteration: 90073
loss: 1.200344204902649,grad_norm: 0.9999996783540264, iteration: 90074
loss: 1.0093584060668945,grad_norm: 0.871622665796625, iteration: 90075
loss: 1.0463311672210693,grad_norm: 0.9999993946145128, iteration: 90076
loss: 1.0123381614685059,grad_norm: 0.9999990367285779, iteration: 90077
loss: 0.9869334101676941,grad_norm: 0.9580236898273763, iteration: 90078
loss: 1.103796362876892,grad_norm: 0.9773349647218197, iteration: 90079
loss: 0.9978912472724915,grad_norm: 0.8324894529771362, iteration: 90080
loss: 1.0310932397842407,grad_norm: 0.9999998268195975, iteration: 90081
loss: 1.034591794013977,grad_norm: 0.9999997777934396, iteration: 90082
loss: 0.9948448538780212,grad_norm: 0.9999990531759407, iteration: 90083
loss: 1.0028636455535889,grad_norm: 0.8020471863339608, iteration: 90084
loss: 1.0188590288162231,grad_norm: 0.9901354457354847, iteration: 90085
loss: 1.0245968103408813,grad_norm: 0.9999990662513656, iteration: 90086
loss: 1.0588117837905884,grad_norm: 0.999999355020998, iteration: 90087
loss: 1.0417901277542114,grad_norm: 0.9376991122887652, iteration: 90088
loss: 1.0165176391601562,grad_norm: 0.9999992007472125, iteration: 90089
loss: 0.9884457588195801,grad_norm: 0.9128091653891344, iteration: 90090
loss: 1.011288046836853,grad_norm: 0.9169806645676278, iteration: 90091
loss: 1.0740231275558472,grad_norm: 0.9999994712855138, iteration: 90092
loss: 1.032799482345581,grad_norm: 0.9999993247037996, iteration: 90093
loss: 1.04989755153656,grad_norm: 0.9999998499505725, iteration: 90094
loss: 1.0216779708862305,grad_norm: 0.8582919795698256, iteration: 90095
loss: 0.9926499128341675,grad_norm: 0.9999995470222854, iteration: 90096
loss: 0.9746482968330383,grad_norm: 0.9162080650802825, iteration: 90097
loss: 0.9980533719062805,grad_norm: 0.9999990419711015, iteration: 90098
loss: 1.0268667936325073,grad_norm: 0.9999990688080066, iteration: 90099
loss: 1.0362873077392578,grad_norm: 0.8909608004675746, iteration: 90100
loss: 1.027451992034912,grad_norm: 1.0000000172275578, iteration: 90101
loss: 1.0211139917373657,grad_norm: 0.9771637254120246, iteration: 90102
loss: 1.023974061012268,grad_norm: 0.8430873214190221, iteration: 90103
loss: 1.0089043378829956,grad_norm: 0.9905647511285269, iteration: 90104
loss: 1.0093291997909546,grad_norm: 0.8228350694660674, iteration: 90105
loss: 1.0021672248840332,grad_norm: 0.9999990303524908, iteration: 90106
loss: 1.0208324193954468,grad_norm: 0.9478421126904304, iteration: 90107
loss: 1.0174756050109863,grad_norm: 0.9999993841784993, iteration: 90108
loss: 1.0116785764694214,grad_norm: 0.9999992138433355, iteration: 90109
loss: 1.055909276008606,grad_norm: 0.9999990709845391, iteration: 90110
loss: 0.9934350252151489,grad_norm: 0.9999991618839179, iteration: 90111
loss: 1.0334738492965698,grad_norm: 0.963186315718175, iteration: 90112
loss: 1.043502688407898,grad_norm: 0.9999994348260592, iteration: 90113
loss: 1.0229954719543457,grad_norm: 0.9425804829133122, iteration: 90114
loss: 0.9839006066322327,grad_norm: 0.9443558539619349, iteration: 90115
loss: 1.001800298690796,grad_norm: 0.9999991492869664, iteration: 90116
loss: 1.023508071899414,grad_norm: 0.9999991224457635, iteration: 90117
loss: 1.0624620914459229,grad_norm: 0.9999998127509748, iteration: 90118
loss: 1.032183051109314,grad_norm: 0.9999996629114773, iteration: 90119
loss: 1.0104182958602905,grad_norm: 0.881281719568953, iteration: 90120
loss: 1.0437391996383667,grad_norm: 0.9318819883071556, iteration: 90121
loss: 1.0204885005950928,grad_norm: 0.9898753822806263, iteration: 90122
loss: 1.0666615962982178,grad_norm: 0.9999995184158854, iteration: 90123
loss: 1.0097464323043823,grad_norm: 0.9999989430644258, iteration: 90124
loss: 1.066757321357727,grad_norm: 0.9999996855152103, iteration: 90125
loss: 1.0508612394332886,grad_norm: 0.9201789415774964, iteration: 90126
loss: 0.962859570980072,grad_norm: 0.999999195414526, iteration: 90127
loss: 0.9857571125030518,grad_norm: 0.897584614572347, iteration: 90128
loss: 0.9994368553161621,grad_norm: 0.9999990867198418, iteration: 90129
loss: 0.982245922088623,grad_norm: 0.9999991285117075, iteration: 90130
loss: 1.024440050125122,grad_norm: 0.9384454201350612, iteration: 90131
loss: 1.0361032485961914,grad_norm: 0.7111586404177245, iteration: 90132
loss: 1.0406445264816284,grad_norm: 0.9999992097956918, iteration: 90133
loss: 0.9861099123954773,grad_norm: 0.9999991519826651, iteration: 90134
loss: 1.0663963556289673,grad_norm: 0.9724330240246843, iteration: 90135
loss: 0.9750103950500488,grad_norm: 0.9773897666942357, iteration: 90136
loss: 1.0154048204421997,grad_norm: 0.9999998545851849, iteration: 90137
loss: 0.9924591779708862,grad_norm: 0.9028040268002374, iteration: 90138
loss: 1.0293956995010376,grad_norm: 0.9344917873709833, iteration: 90139
loss: 1.0071533918380737,grad_norm: 0.9842981082457877, iteration: 90140
loss: 1.0076193809509277,grad_norm: 0.9999998970022823, iteration: 90141
loss: 1.0362589359283447,grad_norm: 0.9999992167468756, iteration: 90142
loss: 1.0062270164489746,grad_norm: 0.9999997902211399, iteration: 90143
loss: 0.9941965341567993,grad_norm: 0.9988952493781776, iteration: 90144
loss: 1.0263668298721313,grad_norm: 0.9999991579024794, iteration: 90145
loss: 1.0169336795806885,grad_norm: 0.9999998391688556, iteration: 90146
loss: 1.022231936454773,grad_norm: 0.9392519381204023, iteration: 90147
loss: 1.0251901149749756,grad_norm: 0.9999991830736857, iteration: 90148
loss: 1.0143836736679077,grad_norm: 0.8560082267766383, iteration: 90149
loss: 0.9735761284828186,grad_norm: 0.9925536668318508, iteration: 90150
loss: 0.995829164981842,grad_norm: 0.8365803840006312, iteration: 90151
loss: 0.987621009349823,grad_norm: 0.8706349044314002, iteration: 90152
loss: 0.978324830532074,grad_norm: 0.9606258864957665, iteration: 90153
loss: 1.040563941001892,grad_norm: 0.9999997596247959, iteration: 90154
loss: 1.1270073652267456,grad_norm: 0.9999992284712222, iteration: 90155
loss: 0.9859982132911682,grad_norm: 0.999999215431946, iteration: 90156
loss: 0.9671332240104675,grad_norm: 0.8139958160494903, iteration: 90157
loss: 0.996803879737854,grad_norm: 0.9799428264511905, iteration: 90158
loss: 0.9640163779258728,grad_norm: 0.8484171605328664, iteration: 90159
loss: 1.003509759902954,grad_norm: 0.9611711531431252, iteration: 90160
loss: 1.0613818168640137,grad_norm: 0.9999994519025781, iteration: 90161
loss: 1.0003547668457031,grad_norm: 0.9999991391738274, iteration: 90162
loss: 0.9781898856163025,grad_norm: 0.9343229090727591, iteration: 90163
loss: 0.9849221110343933,grad_norm: 0.8762018795657002, iteration: 90164
loss: 0.9591853022575378,grad_norm: 0.9984256134964623, iteration: 90165
loss: 1.0476750135421753,grad_norm: 0.9023996703857831, iteration: 90166
loss: 1.0214940309524536,grad_norm: 0.9999996855822042, iteration: 90167
loss: 0.9900856018066406,grad_norm: 0.9999991182688144, iteration: 90168
loss: 1.126334547996521,grad_norm: 0.9999993391839609, iteration: 90169
loss: 1.0240941047668457,grad_norm: 0.9999992495643936, iteration: 90170
loss: 1.0179938077926636,grad_norm: 0.9999992231005452, iteration: 90171
loss: 1.1856591701507568,grad_norm: 0.999999641706484, iteration: 90172
loss: 0.9972322583198547,grad_norm: 0.9245023823823121, iteration: 90173
loss: 0.9924461245536804,grad_norm: 0.8923249203198654, iteration: 90174
loss: 0.9855687022209167,grad_norm: 0.8928971848789072, iteration: 90175
loss: 1.043115258216858,grad_norm: 0.999999486577033, iteration: 90176
loss: 0.946162223815918,grad_norm: 0.9256777475692045, iteration: 90177
loss: 1.0285130739212036,grad_norm: 0.9999990277689523, iteration: 90178
loss: 0.9880077838897705,grad_norm: 0.8730918123099081, iteration: 90179
loss: 1.008763074874878,grad_norm: 0.9197856527907405, iteration: 90180
loss: 1.0374586582183838,grad_norm: 0.791204694570788, iteration: 90181
loss: 1.0035878419876099,grad_norm: 0.9343073212952973, iteration: 90182
loss: 0.9788541197776794,grad_norm: 0.9999993731830116, iteration: 90183
loss: 0.9701663255691528,grad_norm: 0.8930741655694081, iteration: 90184
loss: 1.1228655576705933,grad_norm: 0.9999994517718064, iteration: 90185
loss: 1.0368858575820923,grad_norm: 0.953809016416998, iteration: 90186
loss: 1.0406558513641357,grad_norm: 0.9999994738610549, iteration: 90187
loss: 1.0015535354614258,grad_norm: 0.9674509534624713, iteration: 90188
loss: 0.9915717840194702,grad_norm: 0.8118757832835165, iteration: 90189
loss: 0.968167245388031,grad_norm: 0.923767373541351, iteration: 90190
loss: 1.2787595987319946,grad_norm: 0.9999998632829066, iteration: 90191
loss: 1.0256364345550537,grad_norm: 0.8986608463428111, iteration: 90192
loss: 1.0178947448730469,grad_norm: 0.9999999711898536, iteration: 90193
loss: 1.0266550779342651,grad_norm: 0.9999995937852119, iteration: 90194
loss: 0.9928432703018188,grad_norm: 0.988006427805099, iteration: 90195
loss: 1.0679187774658203,grad_norm: 0.9999991697779234, iteration: 90196
loss: 1.0336122512817383,grad_norm: 0.9204753996205703, iteration: 90197
loss: 1.0674251317977905,grad_norm: 0.7592220467265768, iteration: 90198
loss: 1.1067304611206055,grad_norm: 0.9999990953571716, iteration: 90199
loss: 1.0385798215866089,grad_norm: 0.9999995061156959, iteration: 90200
loss: 0.9952337145805359,grad_norm: 0.8680265640191933, iteration: 90201
loss: 1.0691136121749878,grad_norm: 0.9999990829434123, iteration: 90202
loss: 0.9798953533172607,grad_norm: 0.9245057656741952, iteration: 90203
loss: 1.0280611515045166,grad_norm: 0.9999995564319796, iteration: 90204
loss: 1.0389727354049683,grad_norm: 0.9999997385826602, iteration: 90205
loss: 1.0063050985336304,grad_norm: 0.9199370321792872, iteration: 90206
loss: 1.058901071548462,grad_norm: 0.999999432830225, iteration: 90207
loss: 0.978803813457489,grad_norm: 0.7344378003783544, iteration: 90208
loss: 1.0367391109466553,grad_norm: 0.9999992941621365, iteration: 90209
loss: 1.1114490032196045,grad_norm: 0.9999994869646025, iteration: 90210
loss: 1.033029317855835,grad_norm: 0.9999995957489535, iteration: 90211
loss: 1.0469708442687988,grad_norm: 0.9999990556609433, iteration: 90212
loss: 1.0043436288833618,grad_norm: 0.9270331632951386, iteration: 90213
loss: 1.0018901824951172,grad_norm: 0.8654637143088957, iteration: 90214
loss: 1.018442988395691,grad_norm: 0.8536695439249831, iteration: 90215
loss: 1.041275143623352,grad_norm: 0.9999994059933757, iteration: 90216
loss: 0.9955986738204956,grad_norm: 0.999999704880735, iteration: 90217
loss: 1.020806074142456,grad_norm: 0.9999990082106082, iteration: 90218
loss: 0.9948279857635498,grad_norm: 0.9667791314335389, iteration: 90219
loss: 1.165815830230713,grad_norm: 0.9999996260452916, iteration: 90220
loss: 1.1723824739456177,grad_norm: 0.9999998194591335, iteration: 90221
loss: 1.1401642560958862,grad_norm: 0.9999990335877835, iteration: 90222
loss: 0.980522096157074,grad_norm: 0.8389303546744181, iteration: 90223
loss: 1.0701103210449219,grad_norm: 0.9999995347912752, iteration: 90224
loss: 1.0546845197677612,grad_norm: 0.834694595013525, iteration: 90225
loss: 1.0095149278640747,grad_norm: 0.9999991009106499, iteration: 90226
loss: 1.0281561613082886,grad_norm: 0.9999994069108682, iteration: 90227
loss: 1.1567723751068115,grad_norm: 0.9999995394739015, iteration: 90228
loss: 1.0506383180618286,grad_norm: 0.9999993238833403, iteration: 90229
loss: 1.0100802183151245,grad_norm: 0.9356845162709219, iteration: 90230
loss: 1.0043339729309082,grad_norm: 0.999999788333577, iteration: 90231
loss: 1.010719895362854,grad_norm: 0.9999990190301317, iteration: 90232
loss: 1.0432928800582886,grad_norm: 0.9607181443525717, iteration: 90233
loss: 1.0580967664718628,grad_norm: 0.9999990152285583, iteration: 90234
loss: 1.0139411687850952,grad_norm: 0.8467317756090708, iteration: 90235
loss: 0.9956612586975098,grad_norm: 0.9624299284994995, iteration: 90236
loss: 1.0070089101791382,grad_norm: 0.9406454324035011, iteration: 90237
loss: 1.175341248512268,grad_norm: 0.999999862450737, iteration: 90238
loss: 1.0387400388717651,grad_norm: 0.9193595206236185, iteration: 90239
loss: 1.0439518690109253,grad_norm: 0.9999999446580699, iteration: 90240
loss: 0.989054799079895,grad_norm: 0.9999992828641663, iteration: 90241
loss: 1.0059306621551514,grad_norm: 0.9999996118822131, iteration: 90242
loss: 1.0012766122817993,grad_norm: 0.9090028090358216, iteration: 90243
loss: 0.9860422015190125,grad_norm: 0.9999995381171626, iteration: 90244
loss: 1.0142680406570435,grad_norm: 0.9999992104100828, iteration: 90245
loss: 1.076488971710205,grad_norm: 0.9999997733314807, iteration: 90246
loss: 0.9858987331390381,grad_norm: 0.9999990557972769, iteration: 90247
loss: 0.9813681244850159,grad_norm: 0.9999992839689349, iteration: 90248
loss: 1.0633186101913452,grad_norm: 0.9999999589367726, iteration: 90249
loss: 1.0038115978240967,grad_norm: 0.8637781292466327, iteration: 90250
loss: 1.0951392650604248,grad_norm: 0.999999819861583, iteration: 90251
loss: 0.9937067627906799,grad_norm: 0.8989598582985813, iteration: 90252
loss: 1.0159236192703247,grad_norm: 0.8570718784038821, iteration: 90253
loss: 1.2085914611816406,grad_norm: 0.9999996729937549, iteration: 90254
loss: 0.988066554069519,grad_norm: 0.9555533580859759, iteration: 90255
loss: 1.0395170450210571,grad_norm: 0.9999991504568807, iteration: 90256
loss: 1.0189400911331177,grad_norm: 0.8579706430075751, iteration: 90257
loss: 1.0070949792861938,grad_norm: 0.9999994063878994, iteration: 90258
loss: 1.048098087310791,grad_norm: 0.9042891864450854, iteration: 90259
loss: 1.0123900175094604,grad_norm: 0.9303903125086801, iteration: 90260
loss: 1.0161046981811523,grad_norm: 0.9999991628687401, iteration: 90261
loss: 1.0537796020507812,grad_norm: 0.9999991265238525, iteration: 90262
loss: 1.0744260549545288,grad_norm: 0.9999995680952697, iteration: 90263
loss: 1.0431865453720093,grad_norm: 0.999999861399831, iteration: 90264
loss: 0.9956108927726746,grad_norm: 0.7338821449012749, iteration: 90265
loss: 1.0932605266571045,grad_norm: 0.9999998146498185, iteration: 90266
loss: 1.0276122093200684,grad_norm: 0.9999990164558461, iteration: 90267
loss: 1.0830177068710327,grad_norm: 0.7833980096514664, iteration: 90268
loss: 1.1143237352371216,grad_norm: 0.8708301005420577, iteration: 90269
loss: 0.9991593360900879,grad_norm: 0.6825068612194802, iteration: 90270
loss: 1.1400688886642456,grad_norm: 0.9999997387794043, iteration: 90271
loss: 1.028494119644165,grad_norm: 0.9999993167365533, iteration: 90272
loss: 1.0477581024169922,grad_norm: 0.9999990367339855, iteration: 90273
loss: 1.0473674535751343,grad_norm: 0.9999994973471956, iteration: 90274
loss: 1.0121196508407593,grad_norm: 0.8245676746911237, iteration: 90275
loss: 1.0111972093582153,grad_norm: 0.8922281964215873, iteration: 90276
loss: 0.9951558709144592,grad_norm: 0.9999994472542677, iteration: 90277
loss: 1.0385174751281738,grad_norm: 0.999999058047258, iteration: 90278
loss: 1.1522934436798096,grad_norm: 0.9999996858486745, iteration: 90279
loss: 1.1334960460662842,grad_norm: 0.9999999283396689, iteration: 90280
loss: 0.9927231669425964,grad_norm: 0.9538551113546104, iteration: 90281
loss: 1.070172667503357,grad_norm: 0.9999993138608683, iteration: 90282
loss: 1.0226263999938965,grad_norm: 0.9999992422560865, iteration: 90283
loss: 1.082681655883789,grad_norm: 0.9839389978474753, iteration: 90284
loss: 1.047423243522644,grad_norm: 0.9999993763426991, iteration: 90285
loss: 1.0119670629501343,grad_norm: 0.9999993927935514, iteration: 90286
loss: 1.0123056173324585,grad_norm: 0.9999989872188618, iteration: 90287
loss: 0.968632698059082,grad_norm: 0.999999124096984, iteration: 90288
loss: 0.9996275901794434,grad_norm: 0.822533375131891, iteration: 90289
loss: 1.0014336109161377,grad_norm: 0.9012107447374276, iteration: 90290
loss: 1.025601863861084,grad_norm: 0.9051650168079337, iteration: 90291
loss: 1.0876964330673218,grad_norm: 0.9999991468035417, iteration: 90292
loss: 1.0048692226409912,grad_norm: 0.8589779607392816, iteration: 90293
loss: 1.0864049196243286,grad_norm: 0.9999991381756855, iteration: 90294
loss: 1.0268800258636475,grad_norm: 0.999999164178298, iteration: 90295
loss: 1.089139699935913,grad_norm: 0.9999990608488953, iteration: 90296
loss: 1.0617225170135498,grad_norm: 0.9999991747778233, iteration: 90297
loss: 0.9860274791717529,grad_norm: 0.858711114111946, iteration: 90298
loss: 1.0569446086883545,grad_norm: 0.999999267248881, iteration: 90299
loss: 1.0186469554901123,grad_norm: 0.9999991609690333, iteration: 90300
loss: 1.0286916494369507,grad_norm: 0.9999993567738042, iteration: 90301
loss: 1.1040699481964111,grad_norm: 0.9999998436214229, iteration: 90302
loss: 1.083276629447937,grad_norm: 0.999999970415707, iteration: 90303
loss: 1.0571625232696533,grad_norm: 0.9999995340256095, iteration: 90304
loss: 1.0833934545516968,grad_norm: 0.9999991053183582, iteration: 90305
loss: 1.101184606552124,grad_norm: 0.9999996634399875, iteration: 90306
loss: 1.091426134109497,grad_norm: 0.9999999584701095, iteration: 90307
loss: 1.0354950428009033,grad_norm: 0.9999998399652297, iteration: 90308
loss: 1.1544945240020752,grad_norm: 0.9999998930143434, iteration: 90309
loss: 1.086963415145874,grad_norm: 0.9999993610452177, iteration: 90310
loss: 1.0433895587921143,grad_norm: 0.9999994866828078, iteration: 90311
loss: 1.0113661289215088,grad_norm: 0.9522429924606384, iteration: 90312
loss: 1.1847785711288452,grad_norm: 0.9999994170331247, iteration: 90313
loss: 1.0041453838348389,grad_norm: 0.999999068238916, iteration: 90314
loss: 1.0142309665679932,grad_norm: 0.9999994976192347, iteration: 90315
loss: 0.9737387299537659,grad_norm: 0.9999995376624251, iteration: 90316
loss: 1.0844405889511108,grad_norm: 0.9999996740989019, iteration: 90317
loss: 1.0501275062561035,grad_norm: 0.8932767861639134, iteration: 90318
loss: 1.0082201957702637,grad_norm: 0.9999988903530687, iteration: 90319
loss: 1.0015549659729004,grad_norm: 0.9999998975793958, iteration: 90320
loss: 1.0519981384277344,grad_norm: 0.9999993563458952, iteration: 90321
loss: 1.0312849283218384,grad_norm: 0.9999991361762647, iteration: 90322
loss: 1.052954077720642,grad_norm: 0.9999997475814102, iteration: 90323
loss: 1.1594245433807373,grad_norm: 0.999999243343255, iteration: 90324
loss: 1.0655418634414673,grad_norm: 0.9999990691786238, iteration: 90325
loss: 1.0677930116653442,grad_norm: 0.9999992507114273, iteration: 90326
loss: 1.1079951524734497,grad_norm: 0.9999994390698073, iteration: 90327
loss: 1.0759294033050537,grad_norm: 0.9999995160502105, iteration: 90328
loss: 0.981288492679596,grad_norm: 0.9999991343819506, iteration: 90329
loss: 1.114558458328247,grad_norm: 0.9999998558144841, iteration: 90330
loss: 1.3115943670272827,grad_norm: 0.999999244234987, iteration: 90331
loss: 1.3483155965805054,grad_norm: 0.9999992934407216, iteration: 90332
loss: 1.1730722188949585,grad_norm: 0.9999993533969244, iteration: 90333
loss: 1.1255755424499512,grad_norm: 0.9999992370370149, iteration: 90334
loss: 1.0911146402359009,grad_norm: 0.999999208007856, iteration: 90335
loss: 1.1985838413238525,grad_norm: 0.9999996474522969, iteration: 90336
loss: 1.1608800888061523,grad_norm: 0.9999992765194492, iteration: 90337
loss: 0.9888355731964111,grad_norm: 0.9486298624157604, iteration: 90338
loss: 1.2413761615753174,grad_norm: 0.9999992802956046, iteration: 90339
loss: 1.0934243202209473,grad_norm: 0.9999992578839052, iteration: 90340
loss: 1.0576354265213013,grad_norm: 0.9999992127489226, iteration: 90341
loss: 1.4913463592529297,grad_norm: 0.9999998528334317, iteration: 90342
loss: 1.072985291481018,grad_norm: 0.9999999151910817, iteration: 90343
loss: 1.2568308115005493,grad_norm: 0.9999999809112416, iteration: 90344
loss: 1.0685707330703735,grad_norm: 0.9999992761814392, iteration: 90345
loss: 1.1530719995498657,grad_norm: 0.9999993577515414, iteration: 90346
loss: 1.1156212091445923,grad_norm: 0.9999992051869646, iteration: 90347
loss: 1.1710067987442017,grad_norm: 0.9999999795504929, iteration: 90348
loss: 1.1753435134887695,grad_norm: 0.9999997935663748, iteration: 90349
loss: 1.174241065979004,grad_norm: 0.9999998212079159, iteration: 90350
loss: 1.2190555334091187,grad_norm: 0.999999349351513, iteration: 90351
loss: 1.0866721868515015,grad_norm: 0.9999999024233016, iteration: 90352
loss: 1.0795817375183105,grad_norm: 0.9999992114876263, iteration: 90353
loss: 1.1092703342437744,grad_norm: 0.9999994006112802, iteration: 90354
loss: 1.1524090766906738,grad_norm: 0.9999997890805137, iteration: 90355
loss: 0.9949408173561096,grad_norm: 0.9999990295448268, iteration: 90356
loss: 0.9821498990058899,grad_norm: 0.999999658564906, iteration: 90357
loss: 0.9816280603408813,grad_norm: 0.8929019984193857, iteration: 90358
loss: 1.0807483196258545,grad_norm: 0.9999996707319757, iteration: 90359
loss: 1.0253828763961792,grad_norm: 0.8556247099942085, iteration: 90360
loss: 1.0142830610275269,grad_norm: 0.8462590666417569, iteration: 90361
loss: 1.026747226715088,grad_norm: 0.9999992384748648, iteration: 90362
loss: 0.9994757771492004,grad_norm: 0.8613347366871382, iteration: 90363
loss: 0.992816150188446,grad_norm: 0.9999990373737854, iteration: 90364
loss: 0.9925224781036377,grad_norm: 0.9999989728319996, iteration: 90365
loss: 1.14756178855896,grad_norm: 0.9999997534075727, iteration: 90366
loss: 1.021009087562561,grad_norm: 0.9999994039256354, iteration: 90367
loss: 0.97471684217453,grad_norm: 0.8129075467087533, iteration: 90368
loss: 1.0728514194488525,grad_norm: 0.999999280692509, iteration: 90369
loss: 1.063793659210205,grad_norm: 0.9216863684507666, iteration: 90370
loss: 1.030037522315979,grad_norm: 0.9999990335412595, iteration: 90371
loss: 0.9901748895645142,grad_norm: 0.9894281385521725, iteration: 90372
loss: 0.9757869243621826,grad_norm: 0.9482295661662439, iteration: 90373
loss: 1.0412884950637817,grad_norm: 0.9816371618104579, iteration: 90374
loss: 0.9862186312675476,grad_norm: 0.8866124149101242, iteration: 90375
loss: 1.0398048162460327,grad_norm: 0.9999995760183948, iteration: 90376
loss: 1.01042902469635,grad_norm: 0.7856640641265222, iteration: 90377
loss: 1.0385496616363525,grad_norm: 0.9999998721809662, iteration: 90378
loss: 1.032368540763855,grad_norm: 0.9090064381163069, iteration: 90379
loss: 1.148639440536499,grad_norm: 0.9999999533500327, iteration: 90380
loss: 1.0246003866195679,grad_norm: 0.8898799144116187, iteration: 90381
loss: 1.1441330909729004,grad_norm: 0.9999992695443896, iteration: 90382
loss: 1.0003113746643066,grad_norm: 0.8598039469039737, iteration: 90383
loss: 1.0022281408309937,grad_norm: 0.9999991877579117, iteration: 90384
loss: 1.3908641338348389,grad_norm: 0.9999994635109356, iteration: 90385
loss: 0.9926554560661316,grad_norm: 0.9192034954273035, iteration: 90386
loss: 1.0213303565979004,grad_norm: 0.9060540803597543, iteration: 90387
loss: 1.0036389827728271,grad_norm: 0.9999993374165917, iteration: 90388
loss: 1.1028127670288086,grad_norm: 0.9999996734231379, iteration: 90389
loss: 1.093781590461731,grad_norm: 0.9999999044237367, iteration: 90390
loss: 1.0341182947158813,grad_norm: 0.8725904558213068, iteration: 90391
loss: 1.0104464292526245,grad_norm: 0.9999992249501403, iteration: 90392
loss: 1.0011870861053467,grad_norm: 0.9999991548133857, iteration: 90393
loss: 1.0290367603302002,grad_norm: 0.8465538645771392, iteration: 90394
loss: 1.0255368947982788,grad_norm: 0.9131256452257229, iteration: 90395
loss: 1.2043344974517822,grad_norm: 0.9999998368949741, iteration: 90396
loss: 1.1635258197784424,grad_norm: 1.0000000757360423, iteration: 90397
loss: 1.0386368036270142,grad_norm: 0.9999999406112392, iteration: 90398
loss: 1.0912420749664307,grad_norm: 0.9999996652909828, iteration: 90399
loss: 1.0445314645767212,grad_norm: 0.9999995522889316, iteration: 90400
loss: 1.0505222082138062,grad_norm: 0.9999991545941483, iteration: 90401
loss: 0.9905452728271484,grad_norm: 0.9999991016530403, iteration: 90402
loss: 0.9985727071762085,grad_norm: 0.9999991903567701, iteration: 90403
loss: 1.0347775220870972,grad_norm: 0.9999994174838879, iteration: 90404
loss: 1.0782694816589355,grad_norm: 0.9999998437922231, iteration: 90405
loss: 1.027094841003418,grad_norm: 0.9067682628659075, iteration: 90406
loss: 1.0137073993682861,grad_norm: 0.9999991543769285, iteration: 90407
loss: 0.9727189540863037,grad_norm: 0.9999994228868613, iteration: 90408
loss: 0.9967339038848877,grad_norm: 0.8977841141467603, iteration: 90409
loss: 1.025109052658081,grad_norm: 0.9999999657150902, iteration: 90410
loss: 1.0113362073898315,grad_norm: 0.8807314998547517, iteration: 90411
loss: 1.0020902156829834,grad_norm: 0.7508021765690484, iteration: 90412
loss: 1.0462145805358887,grad_norm: 0.959740546396384, iteration: 90413
loss: 1.071415662765503,grad_norm: 0.999999415671434, iteration: 90414
loss: 0.9859734177589417,grad_norm: 0.9547268860914522, iteration: 90415
loss: 1.016653299331665,grad_norm: 0.9999998917549341, iteration: 90416
loss: 1.020365834236145,grad_norm: 0.873425429291616, iteration: 90417
loss: 1.0499110221862793,grad_norm: 0.9999998922188101, iteration: 90418
loss: 0.9681684970855713,grad_norm: 0.9999989762715765, iteration: 90419
loss: 1.0187013149261475,grad_norm: 0.9005919783249274, iteration: 90420
loss: 1.0209473371505737,grad_norm: 0.866786347508264, iteration: 90421
loss: 1.0224627256393433,grad_norm: 0.8231450020548297, iteration: 90422
loss: 0.9984655976295471,grad_norm: 0.9999991648346135, iteration: 90423
loss: 0.9928669333457947,grad_norm: 0.9999994898027672, iteration: 90424
loss: 0.9682469964027405,grad_norm: 0.8632571396470389, iteration: 90425
loss: 0.9831207990646362,grad_norm: 0.8934455827121153, iteration: 90426
loss: 1.0491493940353394,grad_norm: 0.9448486265522015, iteration: 90427
loss: 1.0843290090560913,grad_norm: 0.9999991827999098, iteration: 90428
loss: 0.9935776591300964,grad_norm: 0.9355776020349926, iteration: 90429
loss: 1.0062143802642822,grad_norm: 0.973142932542232, iteration: 90430
loss: 1.02021324634552,grad_norm: 0.9999995699215523, iteration: 90431
loss: 1.122159481048584,grad_norm: 0.9999993783435643, iteration: 90432
loss: 1.0081520080566406,grad_norm: 0.9999990166684706, iteration: 90433
loss: 1.0735701322555542,grad_norm: 0.9999996257312429, iteration: 90434
loss: 1.0969284772872925,grad_norm: 0.9999994370604404, iteration: 90435
loss: 1.215272307395935,grad_norm: 1.000000015518938, iteration: 90436
loss: 1.0224989652633667,grad_norm: 0.9168714869973661, iteration: 90437
loss: 1.1370714902877808,grad_norm: 0.9999994016802695, iteration: 90438
loss: 1.0408238172531128,grad_norm: 0.9999990811599128, iteration: 90439
loss: 0.989289402961731,grad_norm: 0.856800905136407, iteration: 90440
loss: 0.9963741302490234,grad_norm: 0.9524952035960935, iteration: 90441
loss: 1.0010510683059692,grad_norm: 0.9999997203805358, iteration: 90442
loss: 1.045597791671753,grad_norm: 0.9999991532778655, iteration: 90443
loss: 1.1980642080307007,grad_norm: 0.9999997515998882, iteration: 90444
loss: 0.9699848890304565,grad_norm: 0.9368086289848971, iteration: 90445
loss: 1.1031063795089722,grad_norm: 0.9999999930855475, iteration: 90446
loss: 1.0818992853164673,grad_norm: 0.9999997499294071, iteration: 90447
loss: 1.0020036697387695,grad_norm: 0.8947703050912428, iteration: 90448
loss: 1.0338141918182373,grad_norm: 0.9287011896955157, iteration: 90449
loss: 0.9873684644699097,grad_norm: 0.9999993884875005, iteration: 90450
loss: 1.028944492340088,grad_norm: 0.9999994803940125, iteration: 90451
loss: 1.007204532623291,grad_norm: 0.9727031208079366, iteration: 90452
loss: 1.0055536031723022,grad_norm: 0.8224594390089912, iteration: 90453
loss: 0.9909464716911316,grad_norm: 0.9710765367796613, iteration: 90454
loss: 1.0513787269592285,grad_norm: 0.982887519499934, iteration: 90455
loss: 1.033313512802124,grad_norm: 0.9333739722942587, iteration: 90456
loss: 1.0727617740631104,grad_norm: 0.9999991984818675, iteration: 90457
loss: 0.9982324242591858,grad_norm: 0.999999196401706, iteration: 90458
loss: 1.0951374769210815,grad_norm: 0.999998975094897, iteration: 90459
loss: 0.9992932081222534,grad_norm: 0.8469612724760442, iteration: 90460
loss: 0.9720912575721741,grad_norm: 0.8269969992720995, iteration: 90461
loss: 0.9874707460403442,grad_norm: 0.9999991233700154, iteration: 90462
loss: 1.003021478652954,grad_norm: 0.999999099678311, iteration: 90463
loss: 1.2196475267410278,grad_norm: 0.9999993394024311, iteration: 90464
loss: 1.2420936822891235,grad_norm: 0.9999999321264366, iteration: 90465
loss: 0.983649730682373,grad_norm: 0.9807383669807369, iteration: 90466
loss: 1.0221794843673706,grad_norm: 0.9413997343377232, iteration: 90467
loss: 0.9993354678153992,grad_norm: 0.9999990336415635, iteration: 90468
loss: 1.074513554573059,grad_norm: 0.9999992322334312, iteration: 90469
loss: 1.2117841243743896,grad_norm: 0.9999998395991271, iteration: 90470
loss: 1.0035794973373413,grad_norm: 0.8316340406299738, iteration: 90471
loss: 1.1741312742233276,grad_norm: 0.999999091782661, iteration: 90472
loss: 1.0281622409820557,grad_norm: 0.9999991854467654, iteration: 90473
loss: 1.027666687965393,grad_norm: 0.9999990917464435, iteration: 90474
loss: 1.0192722082138062,grad_norm: 0.9999991481129903, iteration: 90475
loss: 1.0191293954849243,grad_norm: 0.8452016985235191, iteration: 90476
loss: 1.050824761390686,grad_norm: 0.9999992659308821, iteration: 90477
loss: 1.048115849494934,grad_norm: 0.9999993038589339, iteration: 90478
loss: 1.0063567161560059,grad_norm: 0.9999991390466136, iteration: 90479
loss: 1.0882736444473267,grad_norm: 0.9999991034617488, iteration: 90480
loss: 0.9707834124565125,grad_norm: 0.9999990760576712, iteration: 90481
loss: 1.056113600730896,grad_norm: 0.9999990395737401, iteration: 90482
loss: 1.0486650466918945,grad_norm: 0.9258854094797895, iteration: 90483
loss: 0.9930906295776367,grad_norm: 0.9999990458908938, iteration: 90484
loss: 1.0272741317749023,grad_norm: 0.999999425363171, iteration: 90485
loss: 1.0396136045455933,grad_norm: 0.999999685344934, iteration: 90486
loss: 1.0238598585128784,grad_norm: 0.9999998384694926, iteration: 90487
loss: 0.9702860713005066,grad_norm: 0.9999990500727431, iteration: 90488
loss: 1.0488098859786987,grad_norm: 0.899003731376892, iteration: 90489
loss: 1.0409694910049438,grad_norm: 0.9999992131999342, iteration: 90490
loss: 1.0073939561843872,grad_norm: 0.9999998300456389, iteration: 90491
loss: 1.0065529346466064,grad_norm: 0.9999990191941329, iteration: 90492
loss: 1.0612750053405762,grad_norm: 0.9999999597517849, iteration: 90493
loss: 0.99970543384552,grad_norm: 0.9430739124091093, iteration: 90494
loss: 0.9798844456672668,grad_norm: 0.9616302692447468, iteration: 90495
loss: 1.0140818357467651,grad_norm: 0.937807516756641, iteration: 90496
loss: 1.0110679864883423,grad_norm: 0.9999996118554514, iteration: 90497
loss: 1.012817144393921,grad_norm: 0.9999993831591065, iteration: 90498
loss: 1.0461184978485107,grad_norm: 0.9999994950991468, iteration: 90499
loss: 1.0605195760726929,grad_norm: 0.999999103385826, iteration: 90500
loss: 1.0218082666397095,grad_norm: 0.999999227005023, iteration: 90501
loss: 1.033210277557373,grad_norm: 0.9917574862057303, iteration: 90502
loss: 1.0135972499847412,grad_norm: 0.9999990976351861, iteration: 90503
loss: 0.9726343154907227,grad_norm: 0.9999991791489141, iteration: 90504
loss: 1.0434054136276245,grad_norm: 0.999999505328611, iteration: 90505
loss: 1.0218137502670288,grad_norm: 0.9999991548765309, iteration: 90506
loss: 1.0027813911437988,grad_norm: 0.9961201447136642, iteration: 90507
loss: 1.0069150924682617,grad_norm: 0.8780636580721567, iteration: 90508
loss: 0.9698581695556641,grad_norm: 0.9999991674275827, iteration: 90509
loss: 1.0494616031646729,grad_norm: 0.9999991733508962, iteration: 90510
loss: 0.9706346392631531,grad_norm: 0.9624175752583193, iteration: 90511
loss: 1.0006685256958008,grad_norm: 0.8781912980018223, iteration: 90512
loss: 1.0029454231262207,grad_norm: 0.9999998379706717, iteration: 90513
loss: 1.0026715993881226,grad_norm: 0.9748453268137971, iteration: 90514
loss: 1.0023869276046753,grad_norm: 0.83046267474738, iteration: 90515
loss: 1.0782134532928467,grad_norm: 0.9999991696357504, iteration: 90516
loss: 1.0227302312850952,grad_norm: 0.9884768251868415, iteration: 90517
loss: 1.0469361543655396,grad_norm: 0.9999998965545889, iteration: 90518
loss: 1.0221197605133057,grad_norm: 0.8298827984915523, iteration: 90519
loss: 1.020775318145752,grad_norm: 0.9999993455559865, iteration: 90520
loss: 0.9929315447807312,grad_norm: 0.9999991149846302, iteration: 90521
loss: 1.0038325786590576,grad_norm: 0.9170009802559044, iteration: 90522
loss: 1.0171157121658325,grad_norm: 0.9649658356086412, iteration: 90523
loss: 0.9785103797912598,grad_norm: 0.9999993077497801, iteration: 90524
loss: 0.9897114634513855,grad_norm: 0.9575321991908611, iteration: 90525
loss: 0.9896550178527832,grad_norm: 0.9999992019560974, iteration: 90526
loss: 1.0114666223526,grad_norm: 0.8653905002033058, iteration: 90527
loss: 1.035036325454712,grad_norm: 0.9999998721785127, iteration: 90528
loss: 1.0348268747329712,grad_norm: 0.8416555328613785, iteration: 90529
loss: 1.1079182624816895,grad_norm: 0.9999994304065499, iteration: 90530
loss: 1.048735499382019,grad_norm: 0.8140262174788556, iteration: 90531
loss: 1.342716097831726,grad_norm: 0.9999998967734502, iteration: 90532
loss: 0.9951964616775513,grad_norm: 0.7928557758366617, iteration: 90533
loss: 1.0101512670516968,grad_norm: 0.9800538759515885, iteration: 90534
loss: 1.007793664932251,grad_norm: 0.9460268733744719, iteration: 90535
loss: 1.0224356651306152,grad_norm: 0.9999992136106354, iteration: 90536
loss: 0.9881670475006104,grad_norm: 0.7811448555196768, iteration: 90537
loss: 0.9864506125450134,grad_norm: 0.9999991713202816, iteration: 90538
loss: 1.0395910739898682,grad_norm: 0.999999252103264, iteration: 90539
loss: 1.120652675628662,grad_norm: 0.9999992292281861, iteration: 90540
loss: 0.984218180179596,grad_norm: 0.8296249135567708, iteration: 90541
loss: 0.996616542339325,grad_norm: 0.938721424064669, iteration: 90542
loss: 1.0501887798309326,grad_norm: 0.9999992969584625, iteration: 90543
loss: 1.0224193334579468,grad_norm: 0.9929646973140105, iteration: 90544
loss: 1.0163540840148926,grad_norm: 0.9999991222036688, iteration: 90545
loss: 1.0181381702423096,grad_norm: 0.9999993418907673, iteration: 90546
loss: 1.004029393196106,grad_norm: 0.9475840276571436, iteration: 90547
loss: 0.9971542358398438,grad_norm: 0.9999990379917584, iteration: 90548
loss: 1.0895589590072632,grad_norm: 0.9999992491075713, iteration: 90549
loss: 1.02469003200531,grad_norm: 0.9567948430324321, iteration: 90550
loss: 1.0578927993774414,grad_norm: 0.8534097911954247, iteration: 90551
loss: 1.0086116790771484,grad_norm: 0.9999990773830734, iteration: 90552
loss: 0.9860387444496155,grad_norm: 0.9502157113287556, iteration: 90553
loss: 1.0039716958999634,grad_norm: 0.9999997736286387, iteration: 90554
loss: 1.0033506155014038,grad_norm: 0.8313457295849273, iteration: 90555
loss: 0.9970306158065796,grad_norm: 0.999999510840546, iteration: 90556
loss: 0.9853206872940063,grad_norm: 0.8239019051493534, iteration: 90557
loss: 0.9631622433662415,grad_norm: 0.9999992139989233, iteration: 90558
loss: 1.0166363716125488,grad_norm: 0.9897685811129031, iteration: 90559
loss: 0.9890506267547607,grad_norm: 0.7915757432561488, iteration: 90560
loss: 1.0179040431976318,grad_norm: 0.9999990784916151, iteration: 90561
loss: 1.0252208709716797,grad_norm: 0.8215033097357127, iteration: 90562
loss: 1.0027830600738525,grad_norm: 0.9844312616914657, iteration: 90563
loss: 1.026018500328064,grad_norm: 0.9223522008476983, iteration: 90564
loss: 1.16605806350708,grad_norm: 0.9999998010318703, iteration: 90565
loss: 1.012068271636963,grad_norm: 0.921983423280058, iteration: 90566
loss: 1.0281717777252197,grad_norm: 0.9999992096761773, iteration: 90567
loss: 1.0405354499816895,grad_norm: 0.949301862939474, iteration: 90568
loss: 1.010440707206726,grad_norm: 0.9999990626337655, iteration: 90569
loss: 1.0671604871749878,grad_norm: 0.9955711012337651, iteration: 90570
loss: 1.1717755794525146,grad_norm: 0.9999998337743212, iteration: 90571
loss: 0.9844682812690735,grad_norm: 0.8247388072491137, iteration: 90572
loss: 1.0708199739456177,grad_norm: 0.9999995462874997, iteration: 90573
loss: 1.0363101959228516,grad_norm: 0.9999991386814968, iteration: 90574
loss: 1.000115156173706,grad_norm: 0.8772976644548209, iteration: 90575
loss: 1.1404484510421753,grad_norm: 0.9790975779898291, iteration: 90576
loss: 0.9606175422668457,grad_norm: 0.9999990666122692, iteration: 90577
loss: 0.9625901579856873,grad_norm: 0.979760456934342, iteration: 90578
loss: 1.0742543935775757,grad_norm: 0.9999991591928876, iteration: 90579
loss: 1.0406835079193115,grad_norm: 0.9999993879930134, iteration: 90580
loss: 1.0485714673995972,grad_norm: 0.9883900115129358, iteration: 90581
loss: 0.9992901086807251,grad_norm: 0.999999060439848, iteration: 90582
loss: 1.0526331663131714,grad_norm: 0.9999996015846355, iteration: 90583
loss: 0.9807795882225037,grad_norm: 0.7786147041640953, iteration: 90584
loss: 1.0087820291519165,grad_norm: 0.9999991091686955, iteration: 90585
loss: 1.004249930381775,grad_norm: 0.8182615982367054, iteration: 90586
loss: 1.0858032703399658,grad_norm: 0.9978569658312751, iteration: 90587
loss: 1.2740418910980225,grad_norm: 0.9999999338726481, iteration: 90588
loss: 1.0525217056274414,grad_norm: 0.9999990805553816, iteration: 90589
loss: 1.0704399347305298,grad_norm: 0.9740397694272276, iteration: 90590
loss: 1.0210119485855103,grad_norm: 0.8995268164265334, iteration: 90591
loss: 0.974432110786438,grad_norm: 0.9192714499043464, iteration: 90592
loss: 0.9834884405136108,grad_norm: 0.9999992150548597, iteration: 90593
loss: 1.0690028667449951,grad_norm: 0.9999991108658628, iteration: 90594
loss: 1.078881859779358,grad_norm: 0.9999997844482473, iteration: 90595
loss: 1.1239798069000244,grad_norm: 0.9999998819507738, iteration: 90596
loss: 0.9685419201850891,grad_norm: 0.9999992579924116, iteration: 90597
loss: 0.9964727759361267,grad_norm: 0.9933558810911399, iteration: 90598
loss: 0.9918699860572815,grad_norm: 0.9999993846405715, iteration: 90599
loss: 1.020902156829834,grad_norm: 0.9887075341724528, iteration: 90600
loss: 1.005021333694458,grad_norm: 0.8985310654968689, iteration: 90601
loss: 1.0275657176971436,grad_norm: 0.9999990047101674, iteration: 90602
loss: 0.9836519956588745,grad_norm: 0.949091468777308, iteration: 90603
loss: 1.0802690982818604,grad_norm: 0.9999996607149988, iteration: 90604
loss: 0.9669843316078186,grad_norm: 0.7829293294093215, iteration: 90605
loss: 1.2452068328857422,grad_norm: 0.9999999962976349, iteration: 90606
loss: 0.9897570013999939,grad_norm: 0.9623635792490581, iteration: 90607
loss: 0.9927279949188232,grad_norm: 0.9982703963148974, iteration: 90608
loss: 1.054913878440857,grad_norm: 0.9999996873761902, iteration: 90609
loss: 1.0457154512405396,grad_norm: 0.9999993048327228, iteration: 90610
loss: 1.0117459297180176,grad_norm: 0.8102974242905517, iteration: 90611
loss: 1.0208139419555664,grad_norm: 0.9618514914661412, iteration: 90612
loss: 0.9973507523536682,grad_norm: 0.8551658846221064, iteration: 90613
loss: 0.9939126372337341,grad_norm: 0.9999993474637058, iteration: 90614
loss: 1.1164319515228271,grad_norm: 0.9999998942870468, iteration: 90615
loss: 1.1039373874664307,grad_norm: 0.9999996272217022, iteration: 90616
loss: 1.0014451742172241,grad_norm: 0.9999990332887656, iteration: 90617
loss: 1.0365153551101685,grad_norm: 0.9670016288334627, iteration: 90618
loss: 1.11133873462677,grad_norm: 0.999999218666458, iteration: 90619
loss: 1.2253351211547852,grad_norm: 0.9999998549763014, iteration: 90620
loss: 1.2424067258834839,grad_norm: 0.9999998400481468, iteration: 90621
loss: 1.0146327018737793,grad_norm: 0.9999998764732676, iteration: 90622
loss: 1.0921928882598877,grad_norm: 0.999999191868329, iteration: 90623
loss: 0.9805002212524414,grad_norm: 0.8968619745337362, iteration: 90624
loss: 1.030006766319275,grad_norm: 0.9058412376797798, iteration: 90625
loss: 1.0453691482543945,grad_norm: 0.9999995433218621, iteration: 90626
loss: 1.0056469440460205,grad_norm: 0.9639522113716995, iteration: 90627
loss: 1.1658530235290527,grad_norm: 0.9999998714476693, iteration: 90628
loss: 1.013800024986267,grad_norm: 0.9739249372420233, iteration: 90629
loss: 1.0687471628189087,grad_norm: 0.9999998404820477, iteration: 90630
loss: 1.0604228973388672,grad_norm: 0.9999997682846705, iteration: 90631
loss: 1.0444380044937134,grad_norm: 0.9999991349004682, iteration: 90632
loss: 1.0020217895507812,grad_norm: 0.9807026107879218, iteration: 90633
loss: 1.1465213298797607,grad_norm: 0.9848604240518214, iteration: 90634
loss: 0.9974440932273865,grad_norm: 0.909610325819391, iteration: 90635
loss: 1.0102665424346924,grad_norm: 0.9999998451208352, iteration: 90636
loss: 1.0890494585037231,grad_norm: 0.9818094534341012, iteration: 90637
loss: 1.038673996925354,grad_norm: 0.8305716681295854, iteration: 90638
loss: 1.1907706260681152,grad_norm: 0.9999995628572724, iteration: 90639
loss: 1.1207056045532227,grad_norm: 0.99999946553128, iteration: 90640
loss: 1.0388456583023071,grad_norm: 0.9999996986550168, iteration: 90641
loss: 1.0099674463272095,grad_norm: 0.8954678229879817, iteration: 90642
loss: 0.9686530232429504,grad_norm: 0.9328710608415466, iteration: 90643
loss: 1.0069481134414673,grad_norm: 0.8341442339793128, iteration: 90644
loss: 1.0529547929763794,grad_norm: 0.9999999796109987, iteration: 90645
loss: 1.0500319004058838,grad_norm: 0.9318393881719482, iteration: 90646
loss: 1.1057428121566772,grad_norm: 0.999999719122302, iteration: 90647
loss: 1.2930965423583984,grad_norm: 0.9999997430841929, iteration: 90648
loss: 0.9794570207595825,grad_norm: 0.9999998460749707, iteration: 90649
loss: 1.2224315404891968,grad_norm: 0.9999999531493179, iteration: 90650
loss: 0.9942144155502319,grad_norm: 0.9999993714243542, iteration: 90651
loss: 1.0682176351547241,grad_norm: 0.999999334502736, iteration: 90652
loss: 1.0069595575332642,grad_norm: 0.9999994381518337, iteration: 90653
loss: 1.0114887952804565,grad_norm: 0.9999996666915221, iteration: 90654
loss: 1.101789951324463,grad_norm: 0.9999994998022251, iteration: 90655
loss: 1.0085054636001587,grad_norm: 0.9999999018523477, iteration: 90656
loss: 1.0006898641586304,grad_norm: 0.9999994227340289, iteration: 90657
loss: 1.0112149715423584,grad_norm: 0.9999990904096545, iteration: 90658
loss: 1.1489943265914917,grad_norm: 0.9999994610701383, iteration: 90659
loss: 1.0391545295715332,grad_norm: 0.9164481572016931, iteration: 90660
loss: 1.0193506479263306,grad_norm: 0.9999996141665971, iteration: 90661
loss: 1.0119656324386597,grad_norm: 0.8784375420595308, iteration: 90662
loss: 1.0584869384765625,grad_norm: 0.9999991977912712, iteration: 90663
loss: 0.9851382374763489,grad_norm: 0.999999164905856, iteration: 90664
loss: 1.0323010683059692,grad_norm: 0.9032171618689879, iteration: 90665
loss: 0.9995903372764587,grad_norm: 0.8457990738833665, iteration: 90666
loss: 0.9868256449699402,grad_norm: 0.9620732604641062, iteration: 90667
loss: 1.0896039009094238,grad_norm: 0.9999998229436997, iteration: 90668
loss: 1.0191266536712646,grad_norm: 0.7801764761537812, iteration: 90669
loss: 0.9926485419273376,grad_norm: 0.9999990911310197, iteration: 90670
loss: 1.0811504125595093,grad_norm: 0.9999991180981702, iteration: 90671
loss: 1.043753743171692,grad_norm: 0.999999106794926, iteration: 90672
loss: 1.0154073238372803,grad_norm: 0.6712709940433629, iteration: 90673
loss: 1.0087075233459473,grad_norm: 0.9999992577026052, iteration: 90674
loss: 1.0800518989562988,grad_norm: 0.9999990799944718, iteration: 90675
loss: 1.0188121795654297,grad_norm: 0.9999992211695388, iteration: 90676
loss: 1.0464701652526855,grad_norm: 0.9999990432021646, iteration: 90677
loss: 1.1716314554214478,grad_norm: 0.9999999558901969, iteration: 90678
loss: 0.9993921518325806,grad_norm: 0.919028559737989, iteration: 90679
loss: 1.0427340269088745,grad_norm: 0.9999996693689961, iteration: 90680
loss: 1.1323435306549072,grad_norm: 0.9999998596093713, iteration: 90681
loss: 1.0812212228775024,grad_norm: 0.9999993746036787, iteration: 90682
loss: 1.0440318584442139,grad_norm: 0.9999996316351785, iteration: 90683
loss: 1.0099222660064697,grad_norm: 0.9708353130989522, iteration: 90684
loss: 1.031923770904541,grad_norm: 0.947557963577454, iteration: 90685
loss: 1.056520938873291,grad_norm: 0.999999924689551, iteration: 90686
loss: 1.0383625030517578,grad_norm: 0.9999993165487371, iteration: 90687
loss: 1.0500271320343018,grad_norm: 0.9999993313357276, iteration: 90688
loss: 1.0217361450195312,grad_norm: 0.9999992179524057, iteration: 90689
loss: 1.099112629890442,grad_norm: 0.9999996194790696, iteration: 90690
loss: 1.093083143234253,grad_norm: 0.9999998575090566, iteration: 90691
loss: 1.0033681392669678,grad_norm: 0.9400627444935725, iteration: 90692
loss: 1.187159538269043,grad_norm: 0.999999578090615, iteration: 90693
loss: 1.0535129308700562,grad_norm: 0.9999996048555289, iteration: 90694
loss: 1.0462255477905273,grad_norm: 0.9999994972365721, iteration: 90695
loss: 1.037513017654419,grad_norm: 0.9999992416638794, iteration: 90696
loss: 1.0888724327087402,grad_norm: 0.9999998052831267, iteration: 90697
loss: 1.0630601644515991,grad_norm: 0.9999998462507861, iteration: 90698
loss: 1.0194216966629028,grad_norm: 0.9999999590070066, iteration: 90699
loss: 1.133638620376587,grad_norm: 0.9999994036878157, iteration: 90700
loss: 1.0638874769210815,grad_norm: 0.9999990384827884, iteration: 90701
loss: 0.995446503162384,grad_norm: 0.8379079264995709, iteration: 90702
loss: 1.1137440204620361,grad_norm: 0.99999944912109, iteration: 90703
loss: 1.225834608078003,grad_norm: 0.9999996256234053, iteration: 90704
loss: 0.9972729682922363,grad_norm: 0.9999992991307397, iteration: 90705
loss: 1.0904160737991333,grad_norm: 0.9999990403063314, iteration: 90706
loss: 1.1201156377792358,grad_norm: 0.9999998305918373, iteration: 90707
loss: 1.0409443378448486,grad_norm: 0.9999998504546267, iteration: 90708
loss: 1.0668474435806274,grad_norm: 0.9999993984037424, iteration: 90709
loss: 1.0941253900527954,grad_norm: 0.9999995196124526, iteration: 90710
loss: 1.047029972076416,grad_norm: 0.9999992366763374, iteration: 90711
loss: 1.1238023042678833,grad_norm: 1.0000000704787007, iteration: 90712
loss: 1.0748791694641113,grad_norm: 0.9999996277538268, iteration: 90713
loss: 1.1786153316497803,grad_norm: 0.9999995424868351, iteration: 90714
loss: 1.2954270839691162,grad_norm: 0.9999997790061286, iteration: 90715
loss: 1.261887788772583,grad_norm: 0.9999999950992274, iteration: 90716
loss: 1.0315437316894531,grad_norm: 0.9999991619352843, iteration: 90717
loss: 1.0948988199234009,grad_norm: 0.9999992454592391, iteration: 90718
loss: 1.0235707759857178,grad_norm: 0.8494549597166206, iteration: 90719
loss: 1.0332759618759155,grad_norm: 0.9999991303309113, iteration: 90720
loss: 1.0226556062698364,grad_norm: 0.9999992446610249, iteration: 90721
loss: 1.2193784713745117,grad_norm: 0.9999993687314472, iteration: 90722
loss: 1.1804697513580322,grad_norm: 0.9999998798193254, iteration: 90723
loss: 1.0444064140319824,grad_norm: 0.9646362996012551, iteration: 90724
loss: 0.9692703485488892,grad_norm: 0.931246122848418, iteration: 90725
loss: 1.0561095476150513,grad_norm: 0.9999993101069247, iteration: 90726
loss: 1.0213099718093872,grad_norm: 0.879336146390906, iteration: 90727
loss: 1.1355725526809692,grad_norm: 0.9999994235708694, iteration: 90728
loss: 1.069503664970398,grad_norm: 0.9999995688356894, iteration: 90729
loss: 1.076090931892395,grad_norm: 0.999999916115243, iteration: 90730
loss: 1.0059963464736938,grad_norm: 0.9999998341535666, iteration: 90731
loss: 1.061552882194519,grad_norm: 0.9999990385445944, iteration: 90732
loss: 1.1737818717956543,grad_norm: 0.9999993704384361, iteration: 90733
loss: 1.110721230506897,grad_norm: 0.9999998485244439, iteration: 90734
loss: 1.0646979808807373,grad_norm: 0.999999431893126, iteration: 90735
loss: 1.0044807195663452,grad_norm: 0.9999990923213159, iteration: 90736
loss: 1.0933209657669067,grad_norm: 0.9999992242319035, iteration: 90737
loss: 1.0468640327453613,grad_norm: 0.9999993072284458, iteration: 90738
loss: 1.0252059698104858,grad_norm: 0.9999989732498096, iteration: 90739
loss: 1.0101425647735596,grad_norm: 0.8048023788430804, iteration: 90740
loss: 1.0437676906585693,grad_norm: 0.9999998446545946, iteration: 90741
loss: 1.0433921813964844,grad_norm: 0.9999990088860856, iteration: 90742
loss: 1.055997610092163,grad_norm: 0.9999991350490596, iteration: 90743
loss: 0.987883448600769,grad_norm: 0.9310222307547525, iteration: 90744
loss: 1.0073881149291992,grad_norm: 0.9999989686727906, iteration: 90745
loss: 0.9879836440086365,grad_norm: 0.87126886688153, iteration: 90746
loss: 1.0037366151809692,grad_norm: 0.9999992281071391, iteration: 90747
loss: 1.0824909210205078,grad_norm: 0.999999974486826, iteration: 90748
loss: 0.9939330816268921,grad_norm: 0.9999990222963615, iteration: 90749
loss: 1.101147174835205,grad_norm: 0.9999999136693634, iteration: 90750
loss: 1.1855794191360474,grad_norm: 0.9999995831718296, iteration: 90751
loss: 1.0475877523422241,grad_norm: 0.9999991148167356, iteration: 90752
loss: 0.9650774002075195,grad_norm: 0.999999071022509, iteration: 90753
loss: 1.0156866312026978,grad_norm: 0.9999991616532024, iteration: 90754
loss: 1.022990345954895,grad_norm: 0.9999992718257914, iteration: 90755
loss: 0.9877758622169495,grad_norm: 0.8073180968081619, iteration: 90756
loss: 1.1068187952041626,grad_norm: 0.9999997287703354, iteration: 90757
loss: 1.0821325778961182,grad_norm: 0.9999997971666493, iteration: 90758
loss: 1.1964826583862305,grad_norm: 0.9999999354167343, iteration: 90759
loss: 1.0263407230377197,grad_norm: 0.8131705477938712, iteration: 90760
loss: 1.0137042999267578,grad_norm: 0.9999990246063504, iteration: 90761
loss: 0.9951997995376587,grad_norm: 0.999999826183387, iteration: 90762
loss: 1.1803690195083618,grad_norm: 0.9999993721242307, iteration: 90763
loss: 1.049331545829773,grad_norm: 0.8272822328476133, iteration: 90764
loss: 1.05136239528656,grad_norm: 0.9999998090641309, iteration: 90765
loss: 1.1846245527267456,grad_norm: 0.9999998953246935, iteration: 90766
loss: 0.9775015115737915,grad_norm: 0.999999233250989, iteration: 90767
loss: 0.9998484253883362,grad_norm: 0.8768230850341897, iteration: 90768
loss: 1.1278834342956543,grad_norm: 0.9999998447086482, iteration: 90769
loss: 1.152519702911377,grad_norm: 0.9999998269511937, iteration: 90770
loss: 1.006537675857544,grad_norm: 0.9999989843258811, iteration: 90771
loss: 1.121575117111206,grad_norm: 0.9999998741420189, iteration: 90772
loss: 1.016090989112854,grad_norm: 0.9999995481161684, iteration: 90773
loss: 1.1267870664596558,grad_norm: 0.9999996373708621, iteration: 90774
loss: 1.0538771152496338,grad_norm: 0.9999991721626363, iteration: 90775
loss: 1.0358606576919556,grad_norm: 0.9423935210177405, iteration: 90776
loss: 1.008071780204773,grad_norm: 0.9855819709538749, iteration: 90777
loss: 1.006900429725647,grad_norm: 0.999999227062544, iteration: 90778
loss: 1.0336130857467651,grad_norm: 0.9624665457037369, iteration: 90779
loss: 0.9820477366447449,grad_norm: 0.90281671551563, iteration: 90780
loss: 1.033857822418213,grad_norm: 0.9999990974508047, iteration: 90781
loss: 1.014107584953308,grad_norm: 0.8876170397005358, iteration: 90782
loss: 1.0370135307312012,grad_norm: 0.99999940388511, iteration: 90783
loss: 1.036415457725525,grad_norm: 0.9378342392361244, iteration: 90784
loss: 1.0908095836639404,grad_norm: 0.9999994303974373, iteration: 90785
loss: 0.9985611438751221,grad_norm: 0.8550152149667583, iteration: 90786
loss: 1.3763312101364136,grad_norm: 0.9999998432660799, iteration: 90787
loss: 1.0433619022369385,grad_norm: 0.9999999943986546, iteration: 90788
loss: 1.0472300052642822,grad_norm: 0.9999999354022004, iteration: 90789
loss: 1.079641580581665,grad_norm: 0.9999996533805233, iteration: 90790
loss: 1.043685793876648,grad_norm: 0.9999994154533577, iteration: 90791
loss: 1.073676586151123,grad_norm: 0.999999541374514, iteration: 90792
loss: 1.1284488439559937,grad_norm: 0.9999996396928358, iteration: 90793
loss: 1.0941741466522217,grad_norm: 0.9999995829924386, iteration: 90794
loss: 1.0984623432159424,grad_norm: 0.9999993055748919, iteration: 90795
loss: 1.0303562879562378,grad_norm: 0.9999999653984205, iteration: 90796
loss: 1.0163213014602661,grad_norm: 0.9999997478233947, iteration: 90797
loss: 1.036831259727478,grad_norm: 0.9254022381791327, iteration: 90798
loss: 1.0224545001983643,grad_norm: 0.9999993971051213, iteration: 90799
loss: 0.9900311827659607,grad_norm: 0.9999991291949267, iteration: 90800
loss: 1.1174848079681396,grad_norm: 0.9999992445407193, iteration: 90801
loss: 1.0131351947784424,grad_norm: 0.9999995342136734, iteration: 90802
loss: 1.035732388496399,grad_norm: 0.9999989984011369, iteration: 90803
loss: 1.0526677370071411,grad_norm: 0.9999999817474603, iteration: 90804
loss: 1.0545467138290405,grad_norm: 0.9557197104004096, iteration: 90805
loss: 1.0006579160690308,grad_norm: 0.9999990916717282, iteration: 90806
loss: 1.0317764282226562,grad_norm: 0.9999997270937743, iteration: 90807
loss: 1.1128312349319458,grad_norm: 0.9999999122570378, iteration: 90808
loss: 0.9995220899581909,grad_norm: 1.0000001207062068, iteration: 90809
loss: 0.9961104989051819,grad_norm: 0.9999991243386483, iteration: 90810
loss: 1.1091806888580322,grad_norm: 0.9999995683491748, iteration: 90811
loss: 1.0592634677886963,grad_norm: 0.999999148219133, iteration: 90812
loss: 1.0313745737075806,grad_norm: 0.999999613103679, iteration: 90813
loss: 1.0271825790405273,grad_norm: 0.999999168771697, iteration: 90814
loss: 1.0381380319595337,grad_norm: 0.9196346736702083, iteration: 90815
loss: 1.2354217767715454,grad_norm: 0.9999999229583889, iteration: 90816
loss: 1.0367567539215088,grad_norm: 0.9193381245471904, iteration: 90817
loss: 1.0084359645843506,grad_norm: 0.9999997268416645, iteration: 90818
loss: 1.0224868059158325,grad_norm: 0.9999989990102958, iteration: 90819
loss: 0.9845489263534546,grad_norm: 0.8148006377669439, iteration: 90820
loss: 0.982509970664978,grad_norm: 0.9833975676761283, iteration: 90821
loss: 0.9925770163536072,grad_norm: 0.982539793484578, iteration: 90822
loss: 1.060578465461731,grad_norm: 0.9999998895470377, iteration: 90823
loss: 1.077793836593628,grad_norm: 0.9999993885247773, iteration: 90824
loss: 1.0344864130020142,grad_norm: 0.9999992237357206, iteration: 90825
loss: 0.9898724555969238,grad_norm: 0.9999995222623174, iteration: 90826
loss: 0.9963033199310303,grad_norm: 0.9999998445523255, iteration: 90827
loss: 1.0573184490203857,grad_norm: 1.0000000071150825, iteration: 90828
loss: 1.0022245645523071,grad_norm: 0.9999993317510741, iteration: 90829
loss: 1.0156958103179932,grad_norm: 0.9999997715176444, iteration: 90830
loss: 1.0439890623092651,grad_norm: 0.929939714714934, iteration: 90831
loss: 1.134812355041504,grad_norm: 0.9999994020147087, iteration: 90832
loss: 0.9668384194374084,grad_norm: 0.9999994667415935, iteration: 90833
loss: 0.9971309900283813,grad_norm: 0.8061484891726166, iteration: 90834
loss: 1.1869629621505737,grad_norm: 0.9999996583529137, iteration: 90835
loss: 0.9723278284072876,grad_norm: 0.8826655531117431, iteration: 90836
loss: 1.0967711210250854,grad_norm: 0.999999706108623, iteration: 90837
loss: 0.9982736706733704,grad_norm: 0.7270211287183651, iteration: 90838
loss: 0.9946557283401489,grad_norm: 0.9999996112125263, iteration: 90839
loss: 1.0122450590133667,grad_norm: 0.9779204337138446, iteration: 90840
loss: 1.0206055641174316,grad_norm: 0.8533593589325353, iteration: 90841
loss: 1.0451083183288574,grad_norm: 0.9999990116976063, iteration: 90842
loss: 1.1062651872634888,grad_norm: 0.9999992441677489, iteration: 90843
loss: 1.1207133531570435,grad_norm: 0.9999999448601837, iteration: 90844
loss: 1.0131114721298218,grad_norm: 0.9479960912360533, iteration: 90845
loss: 0.9906767010688782,grad_norm: 0.9844274810899062, iteration: 90846
loss: 1.0611683130264282,grad_norm: 0.9999991252738158, iteration: 90847
loss: 1.057164192199707,grad_norm: 0.9999995225760862, iteration: 90848
loss: 0.971601128578186,grad_norm: 0.8063513648992087, iteration: 90849
loss: 1.0056644678115845,grad_norm: 0.9999990105379195, iteration: 90850
loss: 1.0223050117492676,grad_norm: 0.9999991298251092, iteration: 90851
loss: 1.025106430053711,grad_norm: 0.9999992772786732, iteration: 90852
loss: 1.0212769508361816,grad_norm: 0.9999992151558543, iteration: 90853
loss: 1.056502342224121,grad_norm: 0.9999991977439242, iteration: 90854
loss: 1.0507535934448242,grad_norm: 0.9999990863024572, iteration: 90855
loss: 1.0781025886535645,grad_norm: 0.9999993306981892, iteration: 90856
loss: 1.0083075761795044,grad_norm: 0.8584268787876521, iteration: 90857
loss: 1.1047844886779785,grad_norm: 0.9999997788894388, iteration: 90858
loss: 0.9605936408042908,grad_norm: 0.9999990754522529, iteration: 90859
loss: 1.0572319030761719,grad_norm: 0.9999992026683749, iteration: 90860
loss: 0.9829235076904297,grad_norm: 0.9999991013914131, iteration: 90861
loss: 1.0538673400878906,grad_norm: 0.9999990766589685, iteration: 90862
loss: 1.079328179359436,grad_norm: 0.9999993693866982, iteration: 90863
loss: 1.0976146459579468,grad_norm: 0.9999992599460883, iteration: 90864
loss: 1.098948359489441,grad_norm: 0.999999068120023, iteration: 90865
loss: 0.9595801830291748,grad_norm: 0.9830158136508496, iteration: 90866
loss: 0.984738290309906,grad_norm: 0.9554474078384912, iteration: 90867
loss: 0.9947119951248169,grad_norm: 0.9999992652687612, iteration: 90868
loss: 1.0189727544784546,grad_norm: 0.7929625414961483, iteration: 90869
loss: 1.0800304412841797,grad_norm: 0.9044147540837412, iteration: 90870
loss: 0.9815779328346252,grad_norm: 0.9869048207821951, iteration: 90871
loss: 1.1099578142166138,grad_norm: 0.999999510459288, iteration: 90872
loss: 1.074893593788147,grad_norm: 0.9999997491827919, iteration: 90873
loss: 0.99373459815979,grad_norm: 0.9011660150812819, iteration: 90874
loss: 0.9799146056175232,grad_norm: 0.9278724362787225, iteration: 90875
loss: 1.036248803138733,grad_norm: 0.9999992867759971, iteration: 90876
loss: 1.0570645332336426,grad_norm: 0.9999993077780296, iteration: 90877
loss: 1.031641960144043,grad_norm: 0.9962234941041079, iteration: 90878
loss: 1.0072590112686157,grad_norm: 0.9999994337546353, iteration: 90879
loss: 0.9664319753646851,grad_norm: 0.8913728402926535, iteration: 90880
loss: 0.9846899509429932,grad_norm: 0.9999997923924198, iteration: 90881
loss: 0.9762380123138428,grad_norm: 0.8603190014127624, iteration: 90882
loss: 1.0707179307937622,grad_norm: 0.9141853967988252, iteration: 90883
loss: 1.0393484830856323,grad_norm: 0.9999991837132373, iteration: 90884
loss: 1.061680555343628,grad_norm: 0.9358442469629744, iteration: 90885
loss: 1.123831868171692,grad_norm: 0.9999998955868459, iteration: 90886
loss: 1.0463926792144775,grad_norm: 0.9999996388780255, iteration: 90887
loss: 1.1325055360794067,grad_norm: 0.9999998807930356, iteration: 90888
loss: 1.176371693611145,grad_norm: 0.9999999033251893, iteration: 90889
loss: 1.02252995967865,grad_norm: 0.9999994127381796, iteration: 90890
loss: 1.0207462310791016,grad_norm: 0.9024160044134789, iteration: 90891
loss: 0.9696072936058044,grad_norm: 0.9999991815146175, iteration: 90892
loss: 0.9854432940483093,grad_norm: 0.846250842115389, iteration: 90893
loss: 1.0516608953475952,grad_norm: 0.9314709765968028, iteration: 90894
loss: 1.052598237991333,grad_norm: 0.9999996569826037, iteration: 90895
loss: 1.0341323614120483,grad_norm: 0.999999560997972, iteration: 90896
loss: 0.9859471321105957,grad_norm: 0.8628726257395583, iteration: 90897
loss: 1.0080920457839966,grad_norm: 0.9536956950838232, iteration: 90898
loss: 1.051844596862793,grad_norm: 0.9961031912770417, iteration: 90899
loss: 1.1424391269683838,grad_norm: 0.9999994050893444, iteration: 90900
loss: 0.9812361001968384,grad_norm: 0.9999990234981673, iteration: 90901
loss: 0.977806806564331,grad_norm: 0.9999990331609286, iteration: 90902
loss: 0.9669166207313538,grad_norm: 0.9200250661335942, iteration: 90903
loss: 0.9872376918792725,grad_norm: 0.9000422519518088, iteration: 90904
loss: 1.0410345792770386,grad_norm: 0.9774494806554834, iteration: 90905
loss: 1.0399705171585083,grad_norm: 0.99999925338026, iteration: 90906
loss: 1.0317022800445557,grad_norm: 0.8866388889100079, iteration: 90907
loss: 1.019639253616333,grad_norm: 0.9999992056096054, iteration: 90908
loss: 1.039508581161499,grad_norm: 0.9742040294939139, iteration: 90909
loss: 1.0169709920883179,grad_norm: 0.9999995830905594, iteration: 90910
loss: 1.1297632455825806,grad_norm: 0.9999997029027669, iteration: 90911
loss: 0.9998652338981628,grad_norm: 0.7670981487464763, iteration: 90912
loss: 1.012035608291626,grad_norm: 0.9667747765750739, iteration: 90913
loss: 1.0594018697738647,grad_norm: 0.9999998815939124, iteration: 90914
loss: 0.997292160987854,grad_norm: 0.9999990690077748, iteration: 90915
loss: 1.0375275611877441,grad_norm: 0.9999997367800408, iteration: 90916
loss: 0.9710134267807007,grad_norm: 0.8008067795285935, iteration: 90917
loss: 1.048024296760559,grad_norm: 0.9999993145692342, iteration: 90918
loss: 1.0080199241638184,grad_norm: 0.9257160185009727, iteration: 90919
loss: 1.0161128044128418,grad_norm: 0.9999992261165451, iteration: 90920
loss: 1.0327272415161133,grad_norm: 0.9999992130714861, iteration: 90921
loss: 1.0590139627456665,grad_norm: 0.9999994125102506, iteration: 90922
loss: 0.9850560426712036,grad_norm: 0.9999994140410269, iteration: 90923
loss: 1.1813278198242188,grad_norm: 0.9999991609014571, iteration: 90924
loss: 0.9637306332588196,grad_norm: 0.8695898273902528, iteration: 90925
loss: 1.0238852500915527,grad_norm: 0.8491600193155409, iteration: 90926
loss: 0.9791988730430603,grad_norm: 0.9999991661667313, iteration: 90927
loss: 0.9789606928825378,grad_norm: 0.814050696274435, iteration: 90928
loss: 1.0858889818191528,grad_norm: 0.9999995904626834, iteration: 90929
loss: 1.0152517557144165,grad_norm: 0.9999994100383094, iteration: 90930
loss: 1.0046430826187134,grad_norm: 0.9999992702707128, iteration: 90931
loss: 1.0997629165649414,grad_norm: 1.0000001194017527, iteration: 90932
loss: 1.0556285381317139,grad_norm: 0.9999991420959444, iteration: 90933
loss: 0.9922806024551392,grad_norm: 0.9078133074506789, iteration: 90934
loss: 1.042914628982544,grad_norm: 0.9999998724987458, iteration: 90935
loss: 1.001125693321228,grad_norm: 0.9096965408407908, iteration: 90936
loss: 1.0236185789108276,grad_norm: 0.9999991759680273, iteration: 90937
loss: 1.003962755203247,grad_norm: 0.9999991004048454, iteration: 90938
loss: 1.0645924806594849,grad_norm: 0.9999990993731614, iteration: 90939
loss: 1.005000352859497,grad_norm: 0.8895515297387121, iteration: 90940
loss: 0.9786751866340637,grad_norm: 0.9999993825069903, iteration: 90941
loss: 1.048614501953125,grad_norm: 0.999999106046908, iteration: 90942
loss: 1.0668143033981323,grad_norm: 0.999999121654971, iteration: 90943
loss: 1.0399872064590454,grad_norm: 0.974289110283957, iteration: 90944
loss: 1.0958188772201538,grad_norm: 0.9999991928796466, iteration: 90945
loss: 1.034365177154541,grad_norm: 0.9999990105569013, iteration: 90946
loss: 1.000468134880066,grad_norm: 0.7937656983872631, iteration: 90947
loss: 1.0083565711975098,grad_norm: 0.9999990873713681, iteration: 90948
loss: 1.0115244388580322,grad_norm: 0.9999999910893027, iteration: 90949
loss: 1.0299535989761353,grad_norm: 0.9999998906641991, iteration: 90950
loss: 1.0491669178009033,grad_norm: 0.9999992293700052, iteration: 90951
loss: 1.0764926671981812,grad_norm: 0.8533032187288547, iteration: 90952
loss: 1.0214471817016602,grad_norm: 0.9993701447001963, iteration: 90953
loss: 1.0620189905166626,grad_norm: 0.9999995865728435, iteration: 90954
loss: 1.048346996307373,grad_norm: 0.9999990593790863, iteration: 90955
loss: 0.9607956409454346,grad_norm: 0.9460714443245798, iteration: 90956
loss: 1.0522311925888062,grad_norm: 0.9122724576221015, iteration: 90957
loss: 0.9917269349098206,grad_norm: 0.9999989833998804, iteration: 90958
loss: 1.010892391204834,grad_norm: 0.9780764125494894, iteration: 90959
loss: 1.1249537467956543,grad_norm: 0.9999996144585876, iteration: 90960
loss: 1.026455044746399,grad_norm: 0.983349990562441, iteration: 90961
loss: 1.0179717540740967,grad_norm: 0.999999412933421, iteration: 90962
loss: 0.9701173305511475,grad_norm: 0.9999991581450629, iteration: 90963
loss: 1.0401129722595215,grad_norm: 0.9999996133752771, iteration: 90964
loss: 1.047502040863037,grad_norm: 0.8357914404776614, iteration: 90965
loss: 0.9942862391471863,grad_norm: 0.999999113006797, iteration: 90966
loss: 1.0556222200393677,grad_norm: 0.9999990916407041, iteration: 90967
loss: 1.0067461729049683,grad_norm: 0.9999998235715801, iteration: 90968
loss: 1.003765344619751,grad_norm: 0.9224233690040108, iteration: 90969
loss: 1.0222969055175781,grad_norm: 0.8648303122474716, iteration: 90970
loss: 0.9962453842163086,grad_norm: 0.9999995398805227, iteration: 90971
loss: 1.0043141841888428,grad_norm: 0.9594662346383811, iteration: 90972
loss: 1.2330073118209839,grad_norm: 0.999999805223437, iteration: 90973
loss: 1.051782250404358,grad_norm: 0.9682071389958312, iteration: 90974
loss: 0.9928911328315735,grad_norm: 0.8547062766071779, iteration: 90975
loss: 0.9967870712280273,grad_norm: 0.8504273322226445, iteration: 90976
loss: 1.0437414646148682,grad_norm: 0.9999995665454349, iteration: 90977
loss: 1.0071711540222168,grad_norm: 0.9948033754481312, iteration: 90978
loss: 1.011200189590454,grad_norm: 0.9999991943352141, iteration: 90979
loss: 1.0959351062774658,grad_norm: 0.9999997113321112, iteration: 90980
loss: 1.0731990337371826,grad_norm: 0.9999993118531626, iteration: 90981
loss: 1.339327335357666,grad_norm: 0.9999998505132882, iteration: 90982
loss: 0.9924517273902893,grad_norm: 0.9551356663355048, iteration: 90983
loss: 1.073278546333313,grad_norm: 0.9999998089825883, iteration: 90984
loss: 1.0455299615859985,grad_norm: 0.9999996702358933, iteration: 90985
loss: 1.027113437652588,grad_norm: 0.9999996089514439, iteration: 90986
loss: 1.103769063949585,grad_norm: 0.999999288902756, iteration: 90987
loss: 1.108806848526001,grad_norm: 0.999999810404451, iteration: 90988
loss: 1.067461609840393,grad_norm: 0.9999996914696898, iteration: 90989
loss: 1.0881034135818481,grad_norm: 0.999999698765805, iteration: 90990
loss: 1.0126326084136963,grad_norm: 0.9999998682043525, iteration: 90991
loss: 1.0369231700897217,grad_norm: 0.9999992419153598, iteration: 90992
loss: 1.0106120109558105,grad_norm: 0.9999991880603364, iteration: 90993
loss: 1.0336381196975708,grad_norm: 0.9999991687386416, iteration: 90994
loss: 1.235055685043335,grad_norm: 0.9999997763972158, iteration: 90995
loss: 1.1173889636993408,grad_norm: 0.9999993820959132, iteration: 90996
loss: 1.0011777877807617,grad_norm: 0.9999991330278709, iteration: 90997
loss: 0.9744888544082642,grad_norm: 0.9999993525137764, iteration: 90998
loss: 1.1244345903396606,grad_norm: 0.9999996618997192, iteration: 90999
loss: 0.9862257838249207,grad_norm: 0.9888096440506773, iteration: 91000
loss: 1.0971907377243042,grad_norm: 0.9455532221400724, iteration: 91001
loss: 1.1702234745025635,grad_norm: 0.9999999201071303, iteration: 91002
loss: 1.003766655921936,grad_norm: 0.9999991786312226, iteration: 91003
loss: 1.1230871677398682,grad_norm: 0.9999991561089714, iteration: 91004
loss: 1.052159070968628,grad_norm: 0.9999990664032511, iteration: 91005
loss: 1.0611083507537842,grad_norm: 0.9999991825196786, iteration: 91006
loss: 1.0941542387008667,grad_norm: 0.9999997907561116, iteration: 91007
loss: 1.0155891180038452,grad_norm: 0.9999999422384163, iteration: 91008
loss: 1.101180076599121,grad_norm: 0.9999999559572604, iteration: 91009
loss: 1.089799165725708,grad_norm: 0.9999989438722411, iteration: 91010
loss: 1.0371687412261963,grad_norm: 0.9999999781235835, iteration: 91011
loss: 1.0512245893478394,grad_norm: 0.9999990795572204, iteration: 91012
loss: 0.976782500743866,grad_norm: 0.9999993909354518, iteration: 91013
loss: 1.092463493347168,grad_norm: 1.0000000207393431, iteration: 91014
loss: 1.1937901973724365,grad_norm: 0.9999999864502054, iteration: 91015
loss: 1.2755523920059204,grad_norm: 0.99999988488075, iteration: 91016
loss: 1.1187423467636108,grad_norm: 0.9999994562665314, iteration: 91017
loss: 1.068878173828125,grad_norm: 0.9999999626167552, iteration: 91018
loss: 1.0221645832061768,grad_norm: 0.9999991885903303, iteration: 91019
loss: 1.0668188333511353,grad_norm: 0.9999992840043568, iteration: 91020
loss: 1.0517338514328003,grad_norm: 0.9999991345685888, iteration: 91021
loss: 1.1267505884170532,grad_norm: 0.9999994598742086, iteration: 91022
loss: 1.1807249784469604,grad_norm: 0.9999996861384209, iteration: 91023
loss: 1.0517356395721436,grad_norm: 0.9999997195149145, iteration: 91024
loss: 1.155280351638794,grad_norm: 0.9999997326147222, iteration: 91025
loss: 1.0914232730865479,grad_norm: 0.9999993923513368, iteration: 91026
loss: 1.2087650299072266,grad_norm: 0.9999999398966656, iteration: 91027
loss: 1.136306643486023,grad_norm: 0.9999997177937188, iteration: 91028
loss: 1.0178014039993286,grad_norm: 0.9999992079507148, iteration: 91029
loss: 1.0027469396591187,grad_norm: 0.9999990180683471, iteration: 91030
loss: 1.068145990371704,grad_norm: 0.913107654852023, iteration: 91031
loss: 1.0257656574249268,grad_norm: 0.999999308987553, iteration: 91032
loss: 1.0514217615127563,grad_norm: 0.999999039287368, iteration: 91033
loss: 1.11232590675354,grad_norm: 0.8853460013812985, iteration: 91034
loss: 1.0040148496627808,grad_norm: 0.9966999483625434, iteration: 91035
loss: 0.996489942073822,grad_norm: 0.9941147690447425, iteration: 91036
loss: 0.9974241256713867,grad_norm: 0.9537713004879042, iteration: 91037
loss: 1.1122004985809326,grad_norm: 0.9999993943228763, iteration: 91038
loss: 1.0224847793579102,grad_norm: 0.8911990504776519, iteration: 91039
loss: 1.0265010595321655,grad_norm: 0.9999993299093914, iteration: 91040
loss: 1.030652403831482,grad_norm: 0.9999993376501236, iteration: 91041
loss: 1.0941338539123535,grad_norm: 0.9999993671309926, iteration: 91042
loss: 1.0497307777404785,grad_norm: 0.9999991187559524, iteration: 91043
loss: 1.0068987607955933,grad_norm: 0.8799991493366902, iteration: 91044
loss: 0.9774677753448486,grad_norm: 0.9999999861835067, iteration: 91045
loss: 1.0315368175506592,grad_norm: 0.8094879404680155, iteration: 91046
loss: 1.0395644903182983,grad_norm: 0.9999991536731336, iteration: 91047
loss: 1.021344542503357,grad_norm: 0.9999998285599662, iteration: 91048
loss: 1.0657362937927246,grad_norm: 0.999999259447374, iteration: 91049
loss: 1.0227032899856567,grad_norm: 0.9384203606388165, iteration: 91050
loss: 1.0979704856872559,grad_norm: 0.9999991705725851, iteration: 91051
loss: 1.1269361972808838,grad_norm: 0.9999994723387083, iteration: 91052
loss: 1.1328824758529663,grad_norm: 0.9999992936676871, iteration: 91053
loss: 1.0314842462539673,grad_norm: 0.886951093049481, iteration: 91054
loss: 1.182045817375183,grad_norm: 0.99999951413709, iteration: 91055
loss: 1.0353442430496216,grad_norm: 0.9999992428831257, iteration: 91056
loss: 0.9862521290779114,grad_norm: 0.999999157362008, iteration: 91057
loss: 1.067071795463562,grad_norm: 0.9999991681353191, iteration: 91058
loss: 0.9733408093452454,grad_norm: 0.7397793931140589, iteration: 91059
loss: 1.0071231126785278,grad_norm: 0.9014574278473251, iteration: 91060
loss: 1.0677165985107422,grad_norm: 0.9999997417847375, iteration: 91061
loss: 1.0064738988876343,grad_norm: 0.945354954962449, iteration: 91062
loss: 1.002498745918274,grad_norm: 0.9472711298797322, iteration: 91063
loss: 1.0014744997024536,grad_norm: 0.9999989720935731, iteration: 91064
loss: 1.0810338258743286,grad_norm: 0.9999993589014511, iteration: 91065
loss: 1.1867334842681885,grad_norm: 0.9999999271460304, iteration: 91066
loss: 1.0463101863861084,grad_norm: 0.99999955619498, iteration: 91067
loss: 0.9785169363021851,grad_norm: 0.9553784365150512, iteration: 91068
loss: 1.0582243204116821,grad_norm: 0.9138755155224192, iteration: 91069
loss: 1.0629644393920898,grad_norm: 0.9999991821604999, iteration: 91070
loss: 1.101745843887329,grad_norm: 0.9999991509352113, iteration: 91071
loss: 1.1894181966781616,grad_norm: 0.999999375368495, iteration: 91072
loss: 1.1970707178115845,grad_norm: 0.9999996112704315, iteration: 91073
loss: 1.1404163837432861,grad_norm: 0.9361295626126697, iteration: 91074
loss: 1.095146894454956,grad_norm: 0.9999999099258893, iteration: 91075
loss: 1.1037471294403076,grad_norm: 0.9999994947511353, iteration: 91076
loss: 1.0123544931411743,grad_norm: 0.9999991879154818, iteration: 91077
loss: 1.0076993703842163,grad_norm: 0.9260232086521862, iteration: 91078
loss: 1.105991244316101,grad_norm: 0.9999992575506818, iteration: 91079
loss: 1.0768460035324097,grad_norm: 0.9999998515595631, iteration: 91080
loss: 1.0235846042633057,grad_norm: 0.9999995737741689, iteration: 91081
loss: 0.9723383784294128,grad_norm: 0.9999992049756297, iteration: 91082
loss: 1.0253915786743164,grad_norm: 0.9932599318854631, iteration: 91083
loss: 1.0501652956008911,grad_norm: 0.9999999766893344, iteration: 91084
loss: 0.9974537491798401,grad_norm: 0.8788849490166234, iteration: 91085
loss: 0.9853518605232239,grad_norm: 0.7880650001647477, iteration: 91086
loss: 1.0597360134124756,grad_norm: 0.9999990967094435, iteration: 91087
loss: 0.972399890422821,grad_norm: 0.9819384022215485, iteration: 91088
loss: 0.9573596715927124,grad_norm: 0.999999233708397, iteration: 91089
loss: 1.0527147054672241,grad_norm: 0.9999993101140864, iteration: 91090
loss: 1.0255826711654663,grad_norm: 0.9999992313755, iteration: 91091
loss: 1.023210883140564,grad_norm: 0.9999991761166424, iteration: 91092
loss: 1.0066092014312744,grad_norm: 0.7961846022328055, iteration: 91093
loss: 0.9990695118904114,grad_norm: 0.9999990674110515, iteration: 91094
loss: 1.1138081550598145,grad_norm: 0.9999993120811305, iteration: 91095
loss: 0.9735185503959656,grad_norm: 0.9999989966849191, iteration: 91096
loss: 1.0940481424331665,grad_norm: 0.9999995235595522, iteration: 91097
loss: 1.0074167251586914,grad_norm: 0.804166301052983, iteration: 91098
loss: 1.3182231187820435,grad_norm: 0.9999998246355364, iteration: 91099
loss: 1.064379096031189,grad_norm: 0.9999991318893221, iteration: 91100
loss: 1.1823662519454956,grad_norm: 0.9999993725625426, iteration: 91101
loss: 0.9992992877960205,grad_norm: 0.9999996672444545, iteration: 91102
loss: 1.0308074951171875,grad_norm: 0.9999992936502728, iteration: 91103
loss: 1.2383058071136475,grad_norm: 0.9999997332966329, iteration: 91104
loss: 1.0547804832458496,grad_norm: 0.9999995218701677, iteration: 91105
loss: 1.031822681427002,grad_norm: 0.8763961352510192, iteration: 91106
loss: 0.9983563423156738,grad_norm: 0.9999992439285676, iteration: 91107
loss: 1.0323978662490845,grad_norm: 0.999999234679218, iteration: 91108
loss: 1.1057868003845215,grad_norm: 0.9999996891338242, iteration: 91109
loss: 1.0946872234344482,grad_norm: 0.9999990971015629, iteration: 91110
loss: 1.0911816358566284,grad_norm: 0.9999991648377335, iteration: 91111
loss: 1.039801836013794,grad_norm: 0.9999996557830759, iteration: 91112
loss: 1.0783190727233887,grad_norm: 0.9999990714016651, iteration: 91113
loss: 1.231658935546875,grad_norm: 0.999999307333184, iteration: 91114
loss: 1.090585708618164,grad_norm: 0.9999997555995724, iteration: 91115
loss: 1.2171828746795654,grad_norm: 0.9999999617976437, iteration: 91116
loss: 1.200801134109497,grad_norm: 0.9999998362606544, iteration: 91117
loss: 1.0717085599899292,grad_norm: 0.999999797510114, iteration: 91118
loss: 1.040233850479126,grad_norm: 0.999999556452156, iteration: 91119
loss: 1.187648057937622,grad_norm: 0.9999998910515765, iteration: 91120
loss: 1.2182995080947876,grad_norm: 0.9999996612127469, iteration: 91121
loss: 1.074021339416504,grad_norm: 0.9999995985997325, iteration: 91122
loss: 1.1115316152572632,grad_norm: 0.9757547136733173, iteration: 91123
loss: 1.1879397630691528,grad_norm: 1.0000000106073181, iteration: 91124
loss: 1.2038826942443848,grad_norm: 0.9999999911546917, iteration: 91125
loss: 1.1072862148284912,grad_norm: 0.9999997968719044, iteration: 91126
loss: 1.116200566291809,grad_norm: 0.9999999489966198, iteration: 91127
loss: 1.134798288345337,grad_norm: 0.9999995816145699, iteration: 91128
loss: 1.1256544589996338,grad_norm: 0.9999994989754367, iteration: 91129
loss: 1.0868738889694214,grad_norm: 0.9999998299125548, iteration: 91130
loss: 1.016514539718628,grad_norm: 0.999999615027239, iteration: 91131
loss: 1.2980901002883911,grad_norm: 0.9999995856599545, iteration: 91132
loss: 1.187830924987793,grad_norm: 0.9999999531036092, iteration: 91133
loss: 1.4144647121429443,grad_norm: 0.999999900335899, iteration: 91134
loss: 1.1594409942626953,grad_norm: 0.9999997544626947, iteration: 91135
loss: 1.304808259010315,grad_norm: 0.9999997935178886, iteration: 91136
loss: 1.2455594539642334,grad_norm: 0.9999999720685756, iteration: 91137
loss: 1.3230984210968018,grad_norm: 0.999999997846438, iteration: 91138
loss: 1.270487666130066,grad_norm: 0.9999997672484016, iteration: 91139
loss: 1.4443340301513672,grad_norm: 0.9999999965270796, iteration: 91140
loss: 1.6676304340362549,grad_norm: 0.9999999858255539, iteration: 91141
loss: 1.4452804327011108,grad_norm: 0.999999873651159, iteration: 91142
loss: 1.2553138732910156,grad_norm: 0.9999998337082866, iteration: 91143
loss: 1.3383197784423828,grad_norm: 0.9999998597250443, iteration: 91144
loss: 1.4353647232055664,grad_norm: 0.9999998272846233, iteration: 91145
loss: 1.5357424020767212,grad_norm: 0.999999774833957, iteration: 91146
loss: 1.3696560859680176,grad_norm: 0.9999999177698923, iteration: 91147
loss: 1.8058243989944458,grad_norm: 0.9999999989347987, iteration: 91148
loss: 1.6124553680419922,grad_norm: 0.9999999921512641, iteration: 91149
loss: 1.8581000566482544,grad_norm: 0.9999998809428634, iteration: 91150
loss: 1.7875176668167114,grad_norm: 0.9999999992097476, iteration: 91151
loss: 1.7173373699188232,grad_norm: 0.9999998095460683, iteration: 91152
loss: 1.7858318090438843,grad_norm: 0.9999999763083726, iteration: 91153
loss: 1.7059847116470337,grad_norm: 0.9999999887967587, iteration: 91154
loss: 1.422706127166748,grad_norm: 0.9999998143634087, iteration: 91155
loss: 1.6172007322311401,grad_norm: 0.9999998816620147, iteration: 91156
loss: 1.4066948890686035,grad_norm: 0.9999999701091354, iteration: 91157
loss: 1.2748245000839233,grad_norm: 0.9999996358399477, iteration: 91158
loss: 1.3404229879379272,grad_norm: 0.9999999536397264, iteration: 91159
loss: 1.2344348430633545,grad_norm: 0.9999999000052138, iteration: 91160
loss: 1.4407528638839722,grad_norm: 0.9999997898576836, iteration: 91161
loss: 1.297116994857788,grad_norm: 0.9999998601265109, iteration: 91162
loss: 1.5824977159500122,grad_norm: 1.0000000274654857, iteration: 91163
loss: 1.64951753616333,grad_norm: 0.9999999109387756, iteration: 91164
loss: 1.3834624290466309,grad_norm: 0.9999999526918104, iteration: 91165
loss: 1.2894893884658813,grad_norm: 0.9999999734199843, iteration: 91166
loss: 1.2341346740722656,grad_norm: 0.9999994163709123, iteration: 91167
loss: 1.1288665533065796,grad_norm: 0.9999997590975291, iteration: 91168
loss: 1.5586786270141602,grad_norm: 0.9999998443935998, iteration: 91169
loss: 1.4158340692520142,grad_norm: 1.000000001158727, iteration: 91170
loss: 1.365807294845581,grad_norm: 1.000000036796542, iteration: 91171
loss: 1.2814851999282837,grad_norm: 0.999999750851889, iteration: 91172
loss: 1.2961667776107788,grad_norm: 0.9999999193936128, iteration: 91173
loss: 1.3817028999328613,grad_norm: 0.9999998201084067, iteration: 91174
loss: 1.358525276184082,grad_norm: 0.9999998657156046, iteration: 91175
loss: 1.3073179721832275,grad_norm: 0.999999892753952, iteration: 91176
loss: 1.3041139841079712,grad_norm: 0.9999998271711398, iteration: 91177
loss: 1.2455793619155884,grad_norm: 0.9999997508214074, iteration: 91178
loss: 1.157986044883728,grad_norm: 0.9999998281152729, iteration: 91179
loss: 1.3219045400619507,grad_norm: 0.9999998460899131, iteration: 91180
loss: 1.3291771411895752,grad_norm: 0.9999998487942799, iteration: 91181
loss: 1.3450607061386108,grad_norm: 0.9999997462781706, iteration: 91182
loss: 1.175353765487671,grad_norm: 0.9999999205699808, iteration: 91183
loss: 1.5499815940856934,grad_norm: 0.9999998986746657, iteration: 91184
loss: 1.3478999137878418,grad_norm: 0.9999998184487245, iteration: 91185
loss: 1.2270067930221558,grad_norm: 0.9999998388159287, iteration: 91186
loss: 1.3055377006530762,grad_norm: 0.9999998096361686, iteration: 91187
loss: 1.2378064393997192,grad_norm: 0.9999996816207302, iteration: 91188
loss: 1.210342526435852,grad_norm: 0.9999998818733309, iteration: 91189
loss: 1.3465152978897095,grad_norm: 0.9999998810517025, iteration: 91190
loss: 1.2249387502670288,grad_norm: 0.9999998759323142, iteration: 91191
loss: 1.3575407266616821,grad_norm: 0.9999999441561868, iteration: 91192
loss: 1.2583730220794678,grad_norm: 0.9999998560697342, iteration: 91193
loss: 1.2047244310379028,grad_norm: 0.9999999985636756, iteration: 91194
loss: 1.2979105710983276,grad_norm: 0.9999997297030747, iteration: 91195
loss: 1.2175238132476807,grad_norm: 0.9999995534628355, iteration: 91196
loss: 1.2131057977676392,grad_norm: 0.9999998100538775, iteration: 91197
loss: 1.1144884824752808,grad_norm: 0.9999996994378457, iteration: 91198
loss: 1.3268646001815796,grad_norm: 0.9999999079242944, iteration: 91199
loss: 1.216166615486145,grad_norm: 0.999999811924261, iteration: 91200
loss: 1.1734387874603271,grad_norm: 0.9999998673327392, iteration: 91201
loss: 1.1448734998703003,grad_norm: 0.9999998662663228, iteration: 91202
loss: 1.2610512971878052,grad_norm: 0.9999999341711506, iteration: 91203
loss: 1.3641116619110107,grad_norm: 0.9999998322416715, iteration: 91204
loss: 1.4180783033370972,grad_norm: 0.9999999439258347, iteration: 91205
loss: 1.1477370262145996,grad_norm: 0.9999998026437309, iteration: 91206
loss: 1.1227236986160278,grad_norm: 0.9999996829884188, iteration: 91207
loss: 1.4718109369277954,grad_norm: 0.9999998777572449, iteration: 91208
loss: 1.3420079946517944,grad_norm: 0.999999917593848, iteration: 91209
loss: 1.2185505628585815,grad_norm: 0.9999998666710832, iteration: 91210
loss: 1.1945880651474,grad_norm: 0.9999998823790045, iteration: 91211
loss: 1.1781569719314575,grad_norm: 0.9999995041322122, iteration: 91212
loss: 1.2033675909042358,grad_norm: 0.999999565208923, iteration: 91213
loss: 1.458693027496338,grad_norm: 0.9999999017355053, iteration: 91214
loss: 1.197893500328064,grad_norm: 0.9999998495376009, iteration: 91215
loss: 1.4419941902160645,grad_norm: 0.9999998113824379, iteration: 91216
loss: 1.2689292430877686,grad_norm: 0.9999998582532771, iteration: 91217
loss: 1.2534615993499756,grad_norm: 0.9999994132422324, iteration: 91218
loss: 1.1306957006454468,grad_norm: 0.9999997227246271, iteration: 91219
loss: 1.2985888719558716,grad_norm: 0.9999998478537034, iteration: 91220
loss: 1.1514179706573486,grad_norm: 0.9999996950492218, iteration: 91221
loss: 1.272210955619812,grad_norm: 0.9999996016792904, iteration: 91222
loss: 1.2541359663009644,grad_norm: 1.0000000049706634, iteration: 91223
loss: 1.2240393161773682,grad_norm: 0.9999996123404279, iteration: 91224
loss: 1.1568094491958618,grad_norm: 0.9999995965659558, iteration: 91225
loss: 1.2427897453308105,grad_norm: 0.9999999563485444, iteration: 91226
loss: 1.279839038848877,grad_norm: 0.9999996289972403, iteration: 91227
loss: 1.3096745014190674,grad_norm: 0.9999997460807051, iteration: 91228
loss: 1.347043514251709,grad_norm: 0.9999997698146528, iteration: 91229
loss: 1.2429429292678833,grad_norm: 0.9999995535436543, iteration: 91230
loss: 1.2442032098770142,grad_norm: 0.9999999541323179, iteration: 91231
loss: 1.2272272109985352,grad_norm: 0.9999999011705621, iteration: 91232
loss: 1.220821499824524,grad_norm: 0.9999999065853359, iteration: 91233
loss: 1.289151668548584,grad_norm: 0.9999999249400726, iteration: 91234
loss: 1.3094006776809692,grad_norm: 0.9999996386315838, iteration: 91235
loss: 1.2159758806228638,grad_norm: 0.9999995912368022, iteration: 91236
loss: 1.210291862487793,grad_norm: 0.9999998344955173, iteration: 91237
loss: 1.2078924179077148,grad_norm: 0.9999996452419255, iteration: 91238
loss: 1.1969481706619263,grad_norm: 1.0000000188408111, iteration: 91239
loss: 1.2083064317703247,grad_norm: 0.9999997318184631, iteration: 91240
loss: 1.104103446006775,grad_norm: 0.9999997993148453, iteration: 91241
loss: 1.196485996246338,grad_norm: 0.999999838996086, iteration: 91242
loss: 1.1810102462768555,grad_norm: 0.9999999988443273, iteration: 91243
loss: 1.4489036798477173,grad_norm: 0.9999998764462563, iteration: 91244
loss: 1.231020450592041,grad_norm: 0.9999999458782125, iteration: 91245
loss: 1.3561598062515259,grad_norm: 0.9999999311380016, iteration: 91246
loss: 1.292899250984192,grad_norm: 1.0000001174441948, iteration: 91247
loss: 1.147274374961853,grad_norm: 0.9999998619327964, iteration: 91248
loss: 1.3009343147277832,grad_norm: 0.999999499858934, iteration: 91249
loss: 1.3593995571136475,grad_norm: 0.9999998675703987, iteration: 91250
loss: 1.4141210317611694,grad_norm: 1.0000000294377553, iteration: 91251
loss: 1.2190282344818115,grad_norm: 0.9999998537827007, iteration: 91252
loss: 1.2457102537155151,grad_norm: 0.9999998596369434, iteration: 91253
loss: 1.334814429283142,grad_norm: 1.000000050136395, iteration: 91254
loss: 1.2282713651657104,grad_norm: 0.9999996091491171, iteration: 91255
loss: 1.1741501092910767,grad_norm: 0.9999998925128768, iteration: 91256
loss: 1.3375208377838135,grad_norm: 1.0000000040900099, iteration: 91257
loss: 1.4650990962982178,grad_norm: 0.9999999426867215, iteration: 91258
loss: 1.183741569519043,grad_norm: 0.9999998455308675, iteration: 91259
loss: 1.2735708951950073,grad_norm: 1.0000000081018274, iteration: 91260
loss: 1.1647213697433472,grad_norm: 0.9999998434836377, iteration: 91261
loss: 1.2018496990203857,grad_norm: 0.9999997122815479, iteration: 91262
loss: 1.1550565958023071,grad_norm: 0.99999966820611, iteration: 91263
loss: 1.2024084329605103,grad_norm: 0.9999998075979344, iteration: 91264
loss: 1.262055516242981,grad_norm: 0.9999997544379675, iteration: 91265
loss: 1.2265052795410156,grad_norm: 0.9999992912380223, iteration: 91266
loss: 1.176320195198059,grad_norm: 0.9999992818795792, iteration: 91267
loss: 1.1463924646377563,grad_norm: 0.9999997177489653, iteration: 91268
loss: 1.2191228866577148,grad_norm: 0.9999997500783488, iteration: 91269
loss: 1.1426277160644531,grad_norm: 0.9999999392973004, iteration: 91270
loss: 1.3128187656402588,grad_norm: 0.9999998530628743, iteration: 91271
loss: 1.278763771057129,grad_norm: 0.9999997153854897, iteration: 91272
loss: 1.0785685777664185,grad_norm: 0.9999999127288067, iteration: 91273
loss: 1.1015087366104126,grad_norm: 0.999999801542083, iteration: 91274
loss: 1.3417812585830688,grad_norm: 0.9999998274462232, iteration: 91275
loss: 1.2248560190200806,grad_norm: 0.9999997038064914, iteration: 91276
loss: 1.1985968351364136,grad_norm: 0.9999997856692618, iteration: 91277
loss: 1.1364768743515015,grad_norm: 0.9999999175637474, iteration: 91278
loss: 1.1519490480422974,grad_norm: 0.9999995760231986, iteration: 91279
loss: 1.2545465230941772,grad_norm: 0.9999998038233991, iteration: 91280
loss: 1.2332707643508911,grad_norm: 0.9999998421801193, iteration: 91281
loss: 1.2205501794815063,grad_norm: 0.9999995136126144, iteration: 91282
loss: 1.4739412069320679,grad_norm: 0.9999997903252743, iteration: 91283
loss: 1.2177211046218872,grad_norm: 0.9999997070035104, iteration: 91284
loss: 1.1558226346969604,grad_norm: 0.9999999287853931, iteration: 91285
loss: 1.0940817594528198,grad_norm: 0.9999997677410121, iteration: 91286
loss: 1.1773654222488403,grad_norm: 0.9999996819001598, iteration: 91287
loss: 1.185043454170227,grad_norm: 0.9999997480826163, iteration: 91288
loss: 1.1435190439224243,grad_norm: 0.9999994656970786, iteration: 91289
loss: 1.2351462841033936,grad_norm: 0.9999998498287382, iteration: 91290
loss: 1.136223554611206,grad_norm: 0.9999994920390545, iteration: 91291
loss: 1.1341108083724976,grad_norm: 0.9999997041036465, iteration: 91292
loss: 1.1937881708145142,grad_norm: 0.9999997806228069, iteration: 91293
loss: 1.3009538650512695,grad_norm: 0.9999996655781943, iteration: 91294
loss: 1.1531983613967896,grad_norm: 0.9999994308107167, iteration: 91295
loss: 1.2303040027618408,grad_norm: 0.999999643954177, iteration: 91296
loss: 1.4241303205490112,grad_norm: 0.9999999171278202, iteration: 91297
loss: 1.2056010961532593,grad_norm: 0.9999999806142584, iteration: 91298
loss: 1.270833134651184,grad_norm: 0.9999999089909425, iteration: 91299
loss: 1.1432068347930908,grad_norm: 0.9999994547068678, iteration: 91300
loss: 1.1211347579956055,grad_norm: 0.9999994966652619, iteration: 91301
loss: 1.442855715751648,grad_norm: 1.0000000082426346, iteration: 91302
loss: 1.368043065071106,grad_norm: 1.0000000660268067, iteration: 91303
loss: 1.319706678390503,grad_norm: 0.9999999288119229, iteration: 91304
loss: 1.1955393552780151,grad_norm: 0.9999994371170888, iteration: 91305
loss: 1.3279757499694824,grad_norm: 0.9999998987300979, iteration: 91306
loss: 1.1527070999145508,grad_norm: 0.99999929012347, iteration: 91307
loss: 1.1356260776519775,grad_norm: 0.9999997937164288, iteration: 91308
loss: 1.3260974884033203,grad_norm: 0.999999997273643, iteration: 91309
loss: 1.144850730895996,grad_norm: 0.9999994380891021, iteration: 91310
loss: 1.2483155727386475,grad_norm: 0.9999998320465137, iteration: 91311
loss: 1.1615595817565918,grad_norm: 0.9999995249509221, iteration: 91312
loss: 1.3018158674240112,grad_norm: 0.9999998104901499, iteration: 91313
loss: 1.2918716669082642,grad_norm: 0.9999999086198149, iteration: 91314
loss: 1.3092560768127441,grad_norm: 0.999999939254475, iteration: 91315
loss: 1.0569735765457153,grad_norm: 0.9999993075230951, iteration: 91316
loss: 1.1626390218734741,grad_norm: 0.9999998305396186, iteration: 91317
loss: 1.4125069379806519,grad_norm: 0.9999999889115687, iteration: 91318
loss: 1.2854562997817993,grad_norm: 0.9999996326304241, iteration: 91319
loss: 1.2026787996292114,grad_norm: 0.999999436390884, iteration: 91320
loss: 1.4038301706314087,grad_norm: 0.9999997201565096, iteration: 91321
loss: 1.2286535501480103,grad_norm: 0.9999999631075928, iteration: 91322
loss: 1.3433241844177246,grad_norm: 0.9999999258404484, iteration: 91323
loss: 1.4488176107406616,grad_norm: 0.9999998476086346, iteration: 91324
loss: 1.3885093927383423,grad_norm: 0.9999998163300731, iteration: 91325
loss: 1.3557900190353394,grad_norm: 1.00000001435744, iteration: 91326
loss: 1.2475577592849731,grad_norm: 0.9999997827273958, iteration: 91327
loss: 1.431607961654663,grad_norm: 0.9999999036875012, iteration: 91328
loss: 1.1606552600860596,grad_norm: 0.9999998940745556, iteration: 91329
loss: 1.2069352865219116,grad_norm: 0.9999996392170465, iteration: 91330
loss: 1.1465494632720947,grad_norm: 0.9999993638050133, iteration: 91331
loss: 1.4783780574798584,grad_norm: 0.999999895905991, iteration: 91332
loss: 1.2948540449142456,grad_norm: 0.9999997917494319, iteration: 91333
loss: 1.153049349784851,grad_norm: 0.9999994397380765, iteration: 91334
loss: 1.4062212705612183,grad_norm: 0.9999996887703083, iteration: 91335
loss: 1.2005934715270996,grad_norm: 0.9999997497388889, iteration: 91336
loss: 1.3024775981903076,grad_norm: 0.999999852703664, iteration: 91337
loss: 1.296625018119812,grad_norm: 0.9999996996356098, iteration: 91338
loss: 1.216090440750122,grad_norm: 0.9999997592828381, iteration: 91339
loss: 1.2473530769348145,grad_norm: 0.9999998610120369, iteration: 91340
loss: 1.1880689859390259,grad_norm: 0.999999565247104, iteration: 91341
loss: 1.2412148714065552,grad_norm: 0.9999999821813331, iteration: 91342
loss: 1.7781786918640137,grad_norm: 0.999999960979706, iteration: 91343
loss: 1.1459567546844482,grad_norm: 0.9999999610337145, iteration: 91344
loss: 1.6208487749099731,grad_norm: 1.000000077735796, iteration: 91345
loss: 1.140965461730957,grad_norm: 0.9999996803685284, iteration: 91346
loss: 1.7738760709762573,grad_norm: 0.9999998126108264, iteration: 91347
loss: 2.569127082824707,grad_norm: 1.000000031916728, iteration: 91348
loss: 2.4236180782318115,grad_norm: 0.9999999648611945, iteration: 91349
loss: 1.953206181526184,grad_norm: 0.9999999890979961, iteration: 91350
loss: 2.243626594543457,grad_norm: 0.9999999232768894, iteration: 91351
loss: 1.488149642944336,grad_norm: 0.9999999053567062, iteration: 91352
loss: 2.5707170963287354,grad_norm: 1.0000000265714581, iteration: 91353
loss: 2.4204063415527344,grad_norm: 0.9999999062206634, iteration: 91354
loss: 1.974757194519043,grad_norm: 0.9999999178477946, iteration: 91355
loss: 1.4030579328536987,grad_norm: 0.9999999623340997, iteration: 91356
loss: 1.785889744758606,grad_norm: 1.0000000075950004, iteration: 91357
loss: 2.310056447982788,grad_norm: 0.9999998290119837, iteration: 91358
loss: 1.5158787965774536,grad_norm: 1.0000000053746116, iteration: 91359
loss: 2.1151630878448486,grad_norm: 0.9999999672605708, iteration: 91360
loss: 1.8929115533828735,grad_norm: 1.0000000414258714, iteration: 91361
loss: 1.7120929956436157,grad_norm: 1.00000005382231, iteration: 91362
loss: 1.6748546361923218,grad_norm: 0.9999999593754686, iteration: 91363
loss: 1.8204128742218018,grad_norm: 0.9999998722893879, iteration: 91364
loss: 1.500150203704834,grad_norm: 1.0000000115356782, iteration: 91365
loss: 2.6825764179229736,grad_norm: 0.999999945765738, iteration: 91366
loss: 1.1192599534988403,grad_norm: 0.9999999283450498, iteration: 91367
loss: 1.3541514873504639,grad_norm: 0.9999998212172159, iteration: 91368
loss: 1.6876192092895508,grad_norm: 0.9999998627782346, iteration: 91369
loss: 1.3811722993850708,grad_norm: 0.9999997138876958, iteration: 91370
loss: 1.978010654449463,grad_norm: 0.999999998801631, iteration: 91371
loss: 1.602000117301941,grad_norm: 0.9999998574789822, iteration: 91372
loss: 1.3690547943115234,grad_norm: 0.9999996346658164, iteration: 91373
loss: 1.205162763595581,grad_norm: 0.9999997004443321, iteration: 91374
loss: 1.1794973611831665,grad_norm: 0.9999998608684595, iteration: 91375
loss: 1.2947297096252441,grad_norm: 0.9999997997459847, iteration: 91376
loss: 1.4368538856506348,grad_norm: 0.99999996117018, iteration: 91377
loss: 1.495310664176941,grad_norm: 0.9999999618264426, iteration: 91378
loss: 1.8965588808059692,grad_norm: 1.0000000386800982, iteration: 91379
loss: 1.66791570186615,grad_norm: 0.9999999056061302, iteration: 91380
loss: 1.2697827816009521,grad_norm: 0.99999968591889, iteration: 91381
loss: 1.6128989458084106,grad_norm: 0.9999999325858814, iteration: 91382
loss: 1.4315311908721924,grad_norm: 0.9999998598990459, iteration: 91383
loss: 1.7140252590179443,grad_norm: 0.9999999729497332, iteration: 91384
loss: 1.1253736019134521,grad_norm: 0.9999996341780202, iteration: 91385
loss: 1.1880959272384644,grad_norm: 0.9999998369616988, iteration: 91386
loss: 1.164394497871399,grad_norm: 0.9999998267906907, iteration: 91387
loss: 1.286326289176941,grad_norm: 0.9999999924755049, iteration: 91388
loss: 1.4110366106033325,grad_norm: 0.9999999417240436, iteration: 91389
loss: 1.5880669355392456,grad_norm: 0.9999999607552885, iteration: 91390
loss: 1.2137844562530518,grad_norm: 0.9999996953902465, iteration: 91391
loss: 1.8119560480117798,grad_norm: 0.9999999559362229, iteration: 91392
loss: 1.5276230573654175,grad_norm: 0.9999999194453424, iteration: 91393
loss: 1.596423625946045,grad_norm: 1.0000000109844078, iteration: 91394
loss: 1.4211369752883911,grad_norm: 0.9999998404719377, iteration: 91395
loss: 1.4656227827072144,grad_norm: 0.9999998834706469, iteration: 91396
loss: 1.1206589937210083,grad_norm: 0.9999993646355386, iteration: 91397
loss: 1.2050050497055054,grad_norm: 0.9999998805115479, iteration: 91398
loss: 1.2743327617645264,grad_norm: 0.9999995763446458, iteration: 91399
loss: 1.127295732498169,grad_norm: 0.9999999259271997, iteration: 91400
loss: 1.2730908393859863,grad_norm: 0.9999996935511264, iteration: 91401
loss: 1.0643726587295532,grad_norm: 1.000000002990851, iteration: 91402
loss: 1.2918672561645508,grad_norm: 0.9999994093364811, iteration: 91403
loss: 1.5640367269515991,grad_norm: 1.000000003717838, iteration: 91404
loss: 1.3895411491394043,grad_norm: 0.9999999233742027, iteration: 91405
loss: 1.1256871223449707,grad_norm: 0.9999999080147755, iteration: 91406
loss: 1.220232605934143,grad_norm: 0.9999998527499994, iteration: 91407
loss: 1.2084124088287354,grad_norm: 0.9999999513698397, iteration: 91408
loss: 1.1553531885147095,grad_norm: 0.9999994283708975, iteration: 91409
loss: 1.222138524055481,grad_norm: 0.9999994099104385, iteration: 91410
loss: 1.1218706369400024,grad_norm: 0.9999998279500837, iteration: 91411
loss: 1.2539374828338623,grad_norm: 1.0000000512626368, iteration: 91412
loss: 1.1674623489379883,grad_norm: 0.9999998986900865, iteration: 91413
loss: 1.459268569946289,grad_norm: 0.9999999265335053, iteration: 91414
loss: 1.1011219024658203,grad_norm: 0.9999996843870035, iteration: 91415
loss: 1.522119164466858,grad_norm: 0.9999999523223163, iteration: 91416
loss: 1.332430124282837,grad_norm: 0.9999996461946424, iteration: 91417
loss: 1.3578356504440308,grad_norm: 0.9999995331768576, iteration: 91418
loss: 1.2246752977371216,grad_norm: 0.9999998309038085, iteration: 91419
loss: 1.2831001281738281,grad_norm: 0.9999996317353442, iteration: 91420
loss: 1.1593447923660278,grad_norm: 0.9999998994756528, iteration: 91421
loss: 1.3711509704589844,grad_norm: 0.9999998797649041, iteration: 91422
loss: 1.4835231304168701,grad_norm: 1.000000089788661, iteration: 91423
loss: 1.578374981880188,grad_norm: 0.9999999698566967, iteration: 91424
loss: 1.4976199865341187,grad_norm: 0.9999999577650467, iteration: 91425
loss: 1.2992002964019775,grad_norm: 0.9999998022441328, iteration: 91426
loss: 1.4548591375350952,grad_norm: 0.9999998396030549, iteration: 91427
loss: 1.62079918384552,grad_norm: 0.9999998876967408, iteration: 91428
loss: 1.2839248180389404,grad_norm: 0.9999998137732328, iteration: 91429
loss: 1.4222874641418457,grad_norm: 0.999999962705402, iteration: 91430
loss: 1.2219780683517456,grad_norm: 0.9999994775411845, iteration: 91431
loss: 1.796971082687378,grad_norm: 1.0000000556473712, iteration: 91432
loss: 1.2511545419692993,grad_norm: 0.9999996811960271, iteration: 91433
loss: 1.6699968576431274,grad_norm: 0.9999999301240874, iteration: 91434
loss: 1.1755011081695557,grad_norm: 0.9999999990633667, iteration: 91435
loss: 1.1472506523132324,grad_norm: 0.9999999103233158, iteration: 91436
loss: 1.5816868543624878,grad_norm: 0.9999998375305794, iteration: 91437
loss: 1.2874655723571777,grad_norm: 0.9999996769909996, iteration: 91438
loss: 1.1573808193206787,grad_norm: 0.9999999465988246, iteration: 91439
loss: 1.3165946006774902,grad_norm: 0.9999996780973504, iteration: 91440
loss: 1.4428420066833496,grad_norm: 1.0000000332252559, iteration: 91441
loss: 1.306373953819275,grad_norm: 0.9999998931895723, iteration: 91442
loss: 1.2584283351898193,grad_norm: 0.9999997062726798, iteration: 91443
loss: 1.2990190982818604,grad_norm: 0.9999997793927639, iteration: 91444
loss: 1.1930471658706665,grad_norm: 0.9999993745792282, iteration: 91445
loss: 1.2293190956115723,grad_norm: 0.999999313103753, iteration: 91446
loss: 1.1647239923477173,grad_norm: 0.9999996895220934, iteration: 91447
loss: 1.5497517585754395,grad_norm: 0.9999998692432125, iteration: 91448
loss: 1.2300379276275635,grad_norm: 0.9999997955022274, iteration: 91449
loss: 1.0394210815429688,grad_norm: 0.9999991414054624, iteration: 91450
loss: 1.1075459718704224,grad_norm: 0.9999996358852372, iteration: 91451
loss: 1.3153764009475708,grad_norm: 0.9999997918696059, iteration: 91452
loss: 1.2436763048171997,grad_norm: 0.999999837698538, iteration: 91453
loss: 1.421062707901001,grad_norm: 0.9999998109522897, iteration: 91454
loss: 1.2680824995040894,grad_norm: 0.9999996642461715, iteration: 91455
loss: 1.3036694526672363,grad_norm: 0.999999859806721, iteration: 91456
loss: 1.2993006706237793,grad_norm: 0.9999997492985827, iteration: 91457
loss: 1.208362102508545,grad_norm: 0.9999999041227435, iteration: 91458
loss: 1.47946298122406,grad_norm: 0.9999999306292313, iteration: 91459
loss: 1.4395487308502197,grad_norm: 0.9999998390071584, iteration: 91460
loss: 1.2586719989776611,grad_norm: 0.9999998038376654, iteration: 91461
loss: 1.2607390880584717,grad_norm: 0.9999995863903612, iteration: 91462
loss: 1.7148264646530151,grad_norm: 0.9999999698436325, iteration: 91463
loss: 1.3047912120819092,grad_norm: 0.9999999828538292, iteration: 91464
loss: 1.232508897781372,grad_norm: 1.0000000522063799, iteration: 91465
loss: 1.2078965902328491,grad_norm: 1.0000000083148426, iteration: 91466
loss: 1.393498420715332,grad_norm: 0.9999998420971067, iteration: 91467
loss: 1.41524338722229,grad_norm: 0.9999996867308045, iteration: 91468
loss: 1.3979414701461792,grad_norm: 0.9999999939060717, iteration: 91469
loss: 1.5208724737167358,grad_norm: 0.9999999691456077, iteration: 91470
loss: 1.2380023002624512,grad_norm: 0.9999999841111363, iteration: 91471
loss: 1.2052814960479736,grad_norm: 0.9999998869242692, iteration: 91472
loss: 1.1213557720184326,grad_norm: 0.9999999041537011, iteration: 91473
loss: 1.3365708589553833,grad_norm: 0.9999999707655379, iteration: 91474
loss: 1.1597039699554443,grad_norm: 0.9999999012195656, iteration: 91475
loss: 1.1850436925888062,grad_norm: 0.9999998328813761, iteration: 91476
loss: 1.3066214323043823,grad_norm: 0.9999999805846026, iteration: 91477
loss: 1.1570637226104736,grad_norm: 0.9999995269079353, iteration: 91478
loss: 1.3992234468460083,grad_norm: 0.9999999200312434, iteration: 91479
loss: 1.2876960039138794,grad_norm: 0.9999998532230767, iteration: 91480
loss: 1.2831727266311646,grad_norm: 0.9999997957787651, iteration: 91481
loss: 1.1482354402542114,grad_norm: 0.999999899172538, iteration: 91482
loss: 1.0911673307418823,grad_norm: 0.9999998401083912, iteration: 91483
loss: 1.1380255222320557,grad_norm: 0.9999999670157063, iteration: 91484
loss: 1.115859031677246,grad_norm: 0.9999997838980238, iteration: 91485
loss: 1.147939682006836,grad_norm: 0.9999993312567359, iteration: 91486
loss: 1.1166102886199951,grad_norm: 0.9999992116983082, iteration: 91487
loss: 1.1276637315750122,grad_norm: 0.9999999844488058, iteration: 91488
loss: 1.3285048007965088,grad_norm: 1.0000000063756618, iteration: 91489
loss: 1.3263260126113892,grad_norm: 0.9999999226918903, iteration: 91490
loss: 1.0618683099746704,grad_norm: 0.9999995199044663, iteration: 91491
loss: 1.2157618999481201,grad_norm: 0.9999998360840533, iteration: 91492
loss: 1.2691129446029663,grad_norm: 0.9999993081253151, iteration: 91493
loss: 1.2398865222930908,grad_norm: 0.9999998508883307, iteration: 91494
loss: 1.3647575378417969,grad_norm: 0.9999998652302676, iteration: 91495
loss: 1.1005609035491943,grad_norm: 0.99999952372674, iteration: 91496
loss: 1.1153976917266846,grad_norm: 0.9999994676189363, iteration: 91497
loss: 1.2308642864227295,grad_norm: 0.9999998645584004, iteration: 91498
loss: 1.1657025814056396,grad_norm: 0.9999996452637998, iteration: 91499
loss: 1.1019953489303589,grad_norm: 0.9999999884660149, iteration: 91500
loss: 1.1071733236312866,grad_norm: 0.9999997589607517, iteration: 91501
loss: 1.446088433265686,grad_norm: 0.9999997654962719, iteration: 91502
loss: 1.2668639421463013,grad_norm: 0.999999781393884, iteration: 91503
loss: 1.2307971715927124,grad_norm: 0.9999999470080017, iteration: 91504
loss: 1.1918901205062866,grad_norm: 0.9999998950365336, iteration: 91505
loss: 1.1477203369140625,grad_norm: 0.9999997787361945, iteration: 91506
loss: 1.2015187740325928,grad_norm: 0.9999999570725321, iteration: 91507
loss: 1.2044973373413086,grad_norm: 0.9999997915297734, iteration: 91508
loss: 1.1032205820083618,grad_norm: 0.9999996811050748, iteration: 91509
loss: 1.063468098640442,grad_norm: 0.9999992402050738, iteration: 91510
loss: 1.0949403047561646,grad_norm: 0.9999998941042849, iteration: 91511
loss: 1.0924839973449707,grad_norm: 0.9999999598151742, iteration: 91512
loss: 1.1882777214050293,grad_norm: 1.0000000449266908, iteration: 91513
loss: 1.0542826652526855,grad_norm: 0.9999999936486363, iteration: 91514
loss: 1.1230127811431885,grad_norm: 0.9999997820560802, iteration: 91515
loss: 1.0999186038970947,grad_norm: 0.9999999932260558, iteration: 91516
loss: 1.1976271867752075,grad_norm: 0.9999998485466909, iteration: 91517
loss: 1.1255550384521484,grad_norm: 0.9999996114124473, iteration: 91518
loss: 1.231061577796936,grad_norm: 0.9999999130927935, iteration: 91519
loss: 1.2277247905731201,grad_norm: 0.9999997829400902, iteration: 91520
loss: 1.4392706155776978,grad_norm: 0.9999998987653914, iteration: 91521
loss: 1.117653250694275,grad_norm: 0.9999999325201704, iteration: 91522
loss: 1.1361298561096191,grad_norm: 0.9999998953526973, iteration: 91523
loss: 1.0489007234573364,grad_norm: 0.999999941948357, iteration: 91524
loss: 1.334604263305664,grad_norm: 1.0000000383169083, iteration: 91525
loss: 1.1228413581848145,grad_norm: 0.9999999149145907, iteration: 91526
loss: 1.0477550029754639,grad_norm: 0.9999994803061407, iteration: 91527
loss: 1.1166400909423828,grad_norm: 0.9999994698319947, iteration: 91528
loss: 1.3826614618301392,grad_norm: 0.9999999593337269, iteration: 91529
loss: 1.2683979272842407,grad_norm: 0.9999999111031035, iteration: 91530
loss: 1.2769562005996704,grad_norm: 0.9999998208319446, iteration: 91531
loss: 1.0894793272018433,grad_norm: 0.9999999207915874, iteration: 91532
loss: 1.1819308996200562,grad_norm: 0.9999999065663733, iteration: 91533
loss: 1.2222466468811035,grad_norm: 0.9999999470529309, iteration: 91534
loss: 1.2061240673065186,grad_norm: 1.0000000143784589, iteration: 91535
loss: 1.0683214664459229,grad_norm: 0.9999999861743405, iteration: 91536
loss: 1.297095775604248,grad_norm: 0.9999999465584561, iteration: 91537
loss: 1.0646864175796509,grad_norm: 0.9999998587576925, iteration: 91538
loss: 1.5590429306030273,grad_norm: 0.9999999100803577, iteration: 91539
loss: 1.0905208587646484,grad_norm: 0.9999998440626896, iteration: 91540
loss: 1.2234327793121338,grad_norm: 0.999999925704643, iteration: 91541
loss: 1.256269931793213,grad_norm: 0.9999998146080983, iteration: 91542
loss: 1.329501748085022,grad_norm: 1.0000000409998282, iteration: 91543
loss: 1.353342890739441,grad_norm: 0.9999998690369409, iteration: 91544
loss: 1.2791783809661865,grad_norm: 0.9999999158970029, iteration: 91545
loss: 1.2568676471710205,grad_norm: 0.999999870032739, iteration: 91546
loss: 1.3160768747329712,grad_norm: 0.9999999369254456, iteration: 91547
loss: 1.7282860279083252,grad_norm: 0.9999998961605737, iteration: 91548
loss: 1.4131771326065063,grad_norm: 1.0000000236578135, iteration: 91549
loss: 1.9515870809555054,grad_norm: 1.0000000585917, iteration: 91550
loss: 1.4121830463409424,grad_norm: 1.0000000695491238, iteration: 91551
loss: 1.5542782545089722,grad_norm: 0.999999951338289, iteration: 91552
loss: 1.5861538648605347,grad_norm: 1.000000036986019, iteration: 91553
loss: 1.201161503791809,grad_norm: 0.9999999696111361, iteration: 91554
loss: 1.5134214162826538,grad_norm: 0.9999999316256707, iteration: 91555
loss: 1.416345238685608,grad_norm: 0.9999998089772812, iteration: 91556
loss: 1.3363845348358154,grad_norm: 0.9999998836380256, iteration: 91557
loss: 1.3589775562286377,grad_norm: 0.999999683446042, iteration: 91558
loss: 1.3872754573822021,grad_norm: 0.9999997409487, iteration: 91559
loss: 1.197145700454712,grad_norm: 0.9999997568941557, iteration: 91560
loss: 1.3430782556533813,grad_norm: 0.9999998992126525, iteration: 91561
loss: 1.5150994062423706,grad_norm: 0.9999999078625819, iteration: 91562
loss: 1.2697397470474243,grad_norm: 1.000000030453315, iteration: 91563
loss: 1.407516598701477,grad_norm: 0.9999999549483208, iteration: 91564
loss: 1.3718913793563843,grad_norm: 0.9999999733737198, iteration: 91565
loss: 1.8747695684432983,grad_norm: 0.9999999041813398, iteration: 91566
loss: 1.143723487854004,grad_norm: 0.9999997394187644, iteration: 91567
loss: 1.2638543844223022,grad_norm: 0.9999999678232299, iteration: 91568
loss: 1.2309691905975342,grad_norm: 0.9999999111906097, iteration: 91569
loss: 1.1175472736358643,grad_norm: 0.9999996762745278, iteration: 91570
loss: 1.1049587726593018,grad_norm: 0.9999996759204359, iteration: 91571
loss: 1.4315245151519775,grad_norm: 0.9999997914503976, iteration: 91572
loss: 1.4938149452209473,grad_norm: 1.0000000128434594, iteration: 91573
loss: 1.3667850494384766,grad_norm: 0.9999997705687056, iteration: 91574
loss: 1.2801178693771362,grad_norm: 0.9999998379869728, iteration: 91575
loss: 1.4722527265548706,grad_norm: 0.9999998630404735, iteration: 91576
loss: 1.1994807720184326,grad_norm: 0.9999999649944702, iteration: 91577
loss: 1.3215391635894775,grad_norm: 0.9999997484725365, iteration: 91578
loss: 1.2902122735977173,grad_norm: 0.999999771874883, iteration: 91579
loss: 1.422631859779358,grad_norm: 0.999999954572133, iteration: 91580
loss: 1.249707579612732,grad_norm: 0.999999728195216, iteration: 91581
loss: 1.2553107738494873,grad_norm: 0.9999999261336189, iteration: 91582
loss: 1.3332244157791138,grad_norm: 0.9999999190745852, iteration: 91583
loss: 1.3600870370864868,grad_norm: 0.9999998465202923, iteration: 91584
loss: 1.306397557258606,grad_norm: 0.999999925784551, iteration: 91585
loss: 1.3810936212539673,grad_norm: 0.9999998558999209, iteration: 91586
loss: 1.1863921880722046,grad_norm: 0.9999998775193641, iteration: 91587
loss: 1.1297006607055664,grad_norm: 0.9999996082640514, iteration: 91588
loss: 1.2094115018844604,grad_norm: 0.9999999522365141, iteration: 91589
loss: 1.2754560708999634,grad_norm: 0.9999998561643656, iteration: 91590
loss: 1.2032935619354248,grad_norm: 0.9999995835719141, iteration: 91591
loss: 1.253108024597168,grad_norm: 0.9999998767545514, iteration: 91592
loss: 1.1837407350540161,grad_norm: 0.9999996115056373, iteration: 91593
loss: 1.1133644580841064,grad_norm: 0.9999998632203587, iteration: 91594
loss: 1.205460548400879,grad_norm: 0.9999998332350926, iteration: 91595
loss: 1.031929612159729,grad_norm: 0.999999535036139, iteration: 91596
loss: 1.1847002506256104,grad_norm: 0.9999998573187813, iteration: 91597
loss: 1.2331088781356812,grad_norm: 0.9999999162235548, iteration: 91598
loss: 1.061455249786377,grad_norm: 0.9999992210157357, iteration: 91599
loss: 1.241899847984314,grad_norm: 0.9999996171983538, iteration: 91600
loss: 1.1370197534561157,grad_norm: 0.9999998404893423, iteration: 91601
loss: 1.1528587341308594,grad_norm: 0.9999998995349471, iteration: 91602
loss: 1.3252671957015991,grad_norm: 1.0000000054279707, iteration: 91603
loss: 1.0795210599899292,grad_norm: 0.9999996395876379, iteration: 91604
loss: 1.2746604681015015,grad_norm: 0.9999997932596543, iteration: 91605
loss: 1.112252116203308,grad_norm: 0.9999994831734457, iteration: 91606
loss: 1.0248268842697144,grad_norm: 0.9999994023970138, iteration: 91607
loss: 1.5281445980072021,grad_norm: 0.9999996381501547, iteration: 91608
loss: 1.251273512840271,grad_norm: 0.9999998922186631, iteration: 91609
loss: 1.3067469596862793,grad_norm: 1.0000000346320945, iteration: 91610
loss: 1.0886805057525635,grad_norm: 0.9999998772621019, iteration: 91611
loss: 1.3735675811767578,grad_norm: 0.9999999594922897, iteration: 91612
loss: 1.0691365003585815,grad_norm: 0.9999995968499973, iteration: 91613
loss: 1.0886913537979126,grad_norm: 1.0000000053366318, iteration: 91614
loss: 1.2655576467514038,grad_norm: 0.9999997934589685, iteration: 91615
loss: 1.097761631011963,grad_norm: 0.9999993438800336, iteration: 91616
loss: 1.095513105392456,grad_norm: 0.9999998989047606, iteration: 91617
loss: 1.2943081855773926,grad_norm: 0.999999902720495, iteration: 91618
loss: 1.2302665710449219,grad_norm: 0.9999999397212842, iteration: 91619
loss: 1.1338846683502197,grad_norm: 0.9999998922880161, iteration: 91620
loss: 1.2716423273086548,grad_norm: 0.9999998709099087, iteration: 91621
loss: 1.1372417211532593,grad_norm: 0.9999998237287677, iteration: 91622
loss: 1.1154792308807373,grad_norm: 0.9999999765090508, iteration: 91623
loss: 1.353435754776001,grad_norm: 0.9999997905575568, iteration: 91624
loss: 1.1801962852478027,grad_norm: 0.9999998669863034, iteration: 91625
loss: 1.2286440134048462,grad_norm: 0.9999996673209709, iteration: 91626
loss: 1.0276662111282349,grad_norm: 0.9999991429969424, iteration: 91627
loss: 1.2031757831573486,grad_norm: 0.9999998986659172, iteration: 91628
loss: 1.597206950187683,grad_norm: 0.9999998841937284, iteration: 91629
loss: 1.2032026052474976,grad_norm: 0.999999896980503, iteration: 91630
loss: 1.1314576864242554,grad_norm: 0.9999997718819966, iteration: 91631
loss: 1.2695215940475464,grad_norm: 0.9999999677954893, iteration: 91632
loss: 1.267890453338623,grad_norm: 1.0000000427822833, iteration: 91633
loss: 1.183921217918396,grad_norm: 0.9999993907611167, iteration: 91634
loss: 1.2094089984893799,grad_norm: 0.99999963740471, iteration: 91635
loss: 1.1023844480514526,grad_norm: 0.9999994533622625, iteration: 91636
loss: 1.089091420173645,grad_norm: 0.9999997202751497, iteration: 91637
loss: 1.6548912525177002,grad_norm: 0.9999999198887726, iteration: 91638
loss: 1.088567852973938,grad_norm: 0.9999995346857458, iteration: 91639
loss: 1.3865900039672852,grad_norm: 1.000000041378301, iteration: 91640
loss: 1.1285723447799683,grad_norm: 0.9999993142510321, iteration: 91641
loss: 1.1606206893920898,grad_norm: 0.9999997473880944, iteration: 91642
loss: 1.4061274528503418,grad_norm: 1.0000000343463706, iteration: 91643
loss: 1.2351950407028198,grad_norm: 0.9999999908682524, iteration: 91644
loss: 1.2548848390579224,grad_norm: 0.9999992349416907, iteration: 91645
loss: 1.3229045867919922,grad_norm: 0.9999995777028708, iteration: 91646
loss: 1.298762321472168,grad_norm: 0.999999912517288, iteration: 91647
loss: 1.317769169807434,grad_norm: 0.9999996046716285, iteration: 91648
loss: 1.302078366279602,grad_norm: 0.999999910870726, iteration: 91649
loss: 1.0960052013397217,grad_norm: 0.9999995701816431, iteration: 91650
loss: 1.140285849571228,grad_norm: 0.9999997352896027, iteration: 91651
loss: 1.183246374130249,grad_norm: 0.99999995796629, iteration: 91652
loss: 1.4084876775741577,grad_norm: 0.9999999718312588, iteration: 91653
loss: 1.3440693616867065,grad_norm: 0.9999999832070292, iteration: 91654
loss: 1.1264008283615112,grad_norm: 0.9999993845996281, iteration: 91655
loss: 1.4997931718826294,grad_norm: 0.9999998756908095, iteration: 91656
loss: 1.3449147939682007,grad_norm: 0.9999993137642911, iteration: 91657
loss: 1.2172343730926514,grad_norm: 0.9999999446166513, iteration: 91658
loss: 1.1925432682037354,grad_norm: 0.9999998156537462, iteration: 91659
loss: 1.2076243162155151,grad_norm: 0.9999998922408707, iteration: 91660
loss: 1.0557070970535278,grad_norm: 1.000000024566081, iteration: 91661
loss: 1.0647919178009033,grad_norm: 0.9999998244007698, iteration: 91662
loss: 1.1379923820495605,grad_norm: 0.9999996442416305, iteration: 91663
loss: 1.1197651624679565,grad_norm: 0.9999994138871259, iteration: 91664
loss: 1.277048945426941,grad_norm: 0.9999998477248345, iteration: 91665
loss: 1.13822340965271,grad_norm: 0.9999997842523288, iteration: 91666
loss: 1.23298180103302,grad_norm: 0.9999998725810969, iteration: 91667
loss: 1.1078423261642456,grad_norm: 0.9999996645624089, iteration: 91668
loss: 1.0520471334457397,grad_norm: 0.9999996414051566, iteration: 91669
loss: 1.1109801530838013,grad_norm: 0.9999998488206917, iteration: 91670
loss: 1.0799731016159058,grad_norm: 0.9999996867935892, iteration: 91671
loss: 1.2207472324371338,grad_norm: 0.9999997907250173, iteration: 91672
loss: 1.073305368423462,grad_norm: 0.9999994750828132, iteration: 91673
loss: 1.0606738328933716,grad_norm: 0.9999994986014621, iteration: 91674
loss: 1.3959903717041016,grad_norm: 0.9999997844271212, iteration: 91675
loss: 1.1281622648239136,grad_norm: 1.0000000356501253, iteration: 91676
loss: 1.2294118404388428,grad_norm: 0.9999998997788937, iteration: 91677
loss: 1.1366801261901855,grad_norm: 0.9999998719370508, iteration: 91678
loss: 1.2046892642974854,grad_norm: 0.9999993227759565, iteration: 91679
loss: 1.280060052871704,grad_norm: 0.9999997552982695, iteration: 91680
loss: 1.0296835899353027,grad_norm: 0.9999996729836627, iteration: 91681
loss: 1.1444019079208374,grad_norm: 0.9999998837601665, iteration: 91682
loss: 1.1226402521133423,grad_norm: 0.9999996683808137, iteration: 91683
loss: 1.1390355825424194,grad_norm: 0.9999999590299036, iteration: 91684
loss: 1.0637706518173218,grad_norm: 0.999999739234295, iteration: 91685
loss: 1.1954954862594604,grad_norm: 0.9999999531264363, iteration: 91686
loss: 1.2172749042510986,grad_norm: 0.9999997376094792, iteration: 91687
loss: 1.081957459449768,grad_norm: 0.9873628883298089, iteration: 91688
loss: 1.131170392036438,grad_norm: 0.9999999772820538, iteration: 91689
loss: 1.2964073419570923,grad_norm: 0.9999996670799063, iteration: 91690
loss: 1.0964690446853638,grad_norm: 0.9999994052124901, iteration: 91691
loss: 1.1400773525238037,grad_norm: 0.999999802103665, iteration: 91692
loss: 1.0734742879867554,grad_norm: 0.9999997220998706, iteration: 91693
loss: 1.1319239139556885,grad_norm: 0.999999794458748, iteration: 91694
loss: 1.243660569190979,grad_norm: 0.9999998795046642, iteration: 91695
loss: 1.0692410469055176,grad_norm: 0.9999999322231622, iteration: 91696
loss: 1.3740482330322266,grad_norm: 0.9999997886396237, iteration: 91697
loss: 1.056302785873413,grad_norm: 0.9999990833245291, iteration: 91698
loss: 1.2288941144943237,grad_norm: 0.9999999231795955, iteration: 91699
loss: 1.4429595470428467,grad_norm: 0.9999999871920731, iteration: 91700
loss: 1.2968612909317017,grad_norm: 0.999999902676912, iteration: 91701
loss: 1.3395483493804932,grad_norm: 1.000000003120767, iteration: 91702
loss: 1.063278079032898,grad_norm: 0.9999992234310583, iteration: 91703
loss: 1.191957712173462,grad_norm: 0.9999997245865068, iteration: 91704
loss: 1.2355600595474243,grad_norm: 0.9999997246279634, iteration: 91705
loss: 1.1605591773986816,grad_norm: 0.9999994501889808, iteration: 91706
loss: 1.0015748739242554,grad_norm: 0.9999998963983866, iteration: 91707
loss: 1.0286139249801636,grad_norm: 0.9999995525955413, iteration: 91708
loss: 1.5208455324172974,grad_norm: 0.9999998102515727, iteration: 91709
loss: 1.1135860681533813,grad_norm: 0.9999996990889375, iteration: 91710
loss: 1.1383227109909058,grad_norm: 0.9999999286368371, iteration: 91711
loss: 1.1373621225357056,grad_norm: 0.9999999131943748, iteration: 91712
loss: 1.1117868423461914,grad_norm: 0.9999991925913877, iteration: 91713
loss: 1.0861992835998535,grad_norm: 0.9999997398894144, iteration: 91714
loss: 1.1627764701843262,grad_norm: 0.9999996352047299, iteration: 91715
loss: 1.0686959028244019,grad_norm: 0.9999998223323842, iteration: 91716
loss: 1.0564757585525513,grad_norm: 0.999999485751893, iteration: 91717
loss: 1.205806851387024,grad_norm: 0.9999997007985447, iteration: 91718
loss: 1.1378992795944214,grad_norm: 0.9999996404198569, iteration: 91719
loss: 1.0658882856369019,grad_norm: 0.9999995290954083, iteration: 91720
loss: 1.4226369857788086,grad_norm: 0.9999997635970878, iteration: 91721
loss: 1.1981137990951538,grad_norm: 1.0000000720059274, iteration: 91722
loss: 1.0857821702957153,grad_norm: 0.9999996898769155, iteration: 91723
loss: 1.0499588251113892,grad_norm: 0.9999991138972742, iteration: 91724
loss: 1.1147348880767822,grad_norm: 0.9999998759492058, iteration: 91725
loss: 1.2073167562484741,grad_norm: 0.9999999020640175, iteration: 91726
loss: 1.162899374961853,grad_norm: 1.0000000552006467, iteration: 91727
loss: 1.3486745357513428,grad_norm: 0.9999997519509763, iteration: 91728
loss: 1.0083328485488892,grad_norm: 0.9999994368763049, iteration: 91729
loss: 1.0508006811141968,grad_norm: 0.9999993846492415, iteration: 91730
loss: 1.1738982200622559,grad_norm: 0.9999995350954117, iteration: 91731
loss: 1.1581733226776123,grad_norm: 0.9999999139910654, iteration: 91732
loss: 1.16108238697052,grad_norm: 0.9999995313622226, iteration: 91733
loss: 1.0814884901046753,grad_norm: 0.9999997893262416, iteration: 91734
loss: 1.2254507541656494,grad_norm: 0.999999895721266, iteration: 91735
loss: 1.0640676021575928,grad_norm: 0.999999470184085, iteration: 91736
loss: 1.0784926414489746,grad_norm: 0.9999997128849895, iteration: 91737
loss: 1.0321508646011353,grad_norm: 0.999999394436457, iteration: 91738
loss: 1.0284595489501953,grad_norm: 0.9999989966363128, iteration: 91739
loss: 1.0366806983947754,grad_norm: 0.9999993177933943, iteration: 91740
loss: 1.1569184064865112,grad_norm: 0.9999999070226642, iteration: 91741
loss: 1.1663341522216797,grad_norm: 0.9999998570510209, iteration: 91742
loss: 1.2096331119537354,grad_norm: 0.9999996883323168, iteration: 91743
loss: 1.0109862089157104,grad_norm: 0.9999995998526814, iteration: 91744
loss: 1.0209218263626099,grad_norm: 0.9999991871422674, iteration: 91745
loss: 1.0667626857757568,grad_norm: 0.9999996712205833, iteration: 91746
loss: 1.2140381336212158,grad_norm: 0.9999995865275176, iteration: 91747
loss: 1.0736702680587769,grad_norm: 0.9999996820925348, iteration: 91748
loss: 1.1114774942398071,grad_norm: 0.9999998862655005, iteration: 91749
loss: 1.0854758024215698,grad_norm: 0.9999995215711639, iteration: 91750
loss: 1.1670444011688232,grad_norm: 0.9999995688813542, iteration: 91751
loss: 1.0255624055862427,grad_norm: 0.999999710153255, iteration: 91752
loss: 1.08762526512146,grad_norm: 0.9999996862149109, iteration: 91753
loss: 1.0759574174880981,grad_norm: 0.9999994789787444, iteration: 91754
loss: 1.1426721811294556,grad_norm: 0.9999997525944639, iteration: 91755
loss: 1.0798882246017456,grad_norm: 0.881555577734346, iteration: 91756
loss: 1.1373236179351807,grad_norm: 0.9999999170698523, iteration: 91757
loss: 1.0679956674575806,grad_norm: 0.9999992768100988, iteration: 91758
loss: 1.0381648540496826,grad_norm: 0.9999990520991191, iteration: 91759
loss: 1.123374342918396,grad_norm: 0.999999572888317, iteration: 91760
loss: 1.0487948656082153,grad_norm: 0.9999996677634329, iteration: 91761
loss: 1.0521796941757202,grad_norm: 0.9999997503736366, iteration: 91762
loss: 1.0782469511032104,grad_norm: 0.9999998304236913, iteration: 91763
loss: 0.9862580895423889,grad_norm: 0.9999993606317207, iteration: 91764
loss: 0.994227945804596,grad_norm: 0.9999991689547022, iteration: 91765
loss: 1.0225598812103271,grad_norm: 0.8630904764232921, iteration: 91766
loss: 1.0801440477371216,grad_norm: 0.9999996792094762, iteration: 91767
loss: 1.0116294622421265,grad_norm: 0.9999993656847862, iteration: 91768
loss: 1.187468409538269,grad_norm: 0.9999994682877128, iteration: 91769
loss: 1.1287437677383423,grad_norm: 0.999999823167227, iteration: 91770
loss: 1.1851130723953247,grad_norm: 0.9999993099571197, iteration: 91771
loss: 1.1096359491348267,grad_norm: 0.9999998113767331, iteration: 91772
loss: 1.1293365955352783,grad_norm: 0.9999992347454149, iteration: 91773
loss: 1.315951943397522,grad_norm: 0.9999997716167356, iteration: 91774
loss: 1.1641701459884644,grad_norm: 0.9999995018506352, iteration: 91775
loss: 1.0132269859313965,grad_norm: 0.9999993633025086, iteration: 91776
loss: 1.0400596857070923,grad_norm: 0.9999993078304784, iteration: 91777
loss: 1.1217997074127197,grad_norm: 0.9999994798806069, iteration: 91778
loss: 1.1656712293624878,grad_norm: 0.9999999365720957, iteration: 91779
loss: 1.0438519716262817,grad_norm: 0.9999995502418246, iteration: 91780
loss: 1.0578417778015137,grad_norm: 0.9999992109518412, iteration: 91781
loss: 0.9775144457817078,grad_norm: 0.9999998198568772, iteration: 91782
loss: 1.0444998741149902,grad_norm: 0.9999999020259062, iteration: 91783
loss: 0.9703546166419983,grad_norm: 0.9999992376690995, iteration: 91784
loss: 1.0289462804794312,grad_norm: 0.9999997178278641, iteration: 91785
loss: 1.1385427713394165,grad_norm: 0.9999994177666972, iteration: 91786
loss: 1.1344118118286133,grad_norm: 1.0000000538104192, iteration: 91787
loss: 1.132482886314392,grad_norm: 0.999999846246416, iteration: 91788
loss: 1.1610478162765503,grad_norm: 0.9999998783272641, iteration: 91789
loss: 1.0516424179077148,grad_norm: 0.9999998948639772, iteration: 91790
loss: 1.1671327352523804,grad_norm: 0.9999991038531503, iteration: 91791
loss: 1.0174809694290161,grad_norm: 0.999999084843439, iteration: 91792
loss: 1.1101363897323608,grad_norm: 0.9999995315361137, iteration: 91793
loss: 1.1316941976547241,grad_norm: 0.9999997577575819, iteration: 91794
loss: 1.0574347972869873,grad_norm: 0.9999993720138971, iteration: 91795
loss: 1.067841649055481,grad_norm: 0.9999999137271267, iteration: 91796
loss: 0.9928635954856873,grad_norm: 0.8920313266745366, iteration: 91797
loss: 1.037883996963501,grad_norm: 0.9999994523031981, iteration: 91798
loss: 1.1042698621749878,grad_norm: 0.9999993115630684, iteration: 91799
loss: 1.1984531879425049,grad_norm: 0.9999994952772026, iteration: 91800
loss: 1.1215349435806274,grad_norm: 0.9999996459001962, iteration: 91801
loss: 1.0330184698104858,grad_norm: 0.908590948236311, iteration: 91802
loss: 1.2138495445251465,grad_norm: 0.9999998305065215, iteration: 91803
loss: 0.9938267469406128,grad_norm: 0.9999990973794061, iteration: 91804
loss: 1.0189441442489624,grad_norm: 0.9999990394733682, iteration: 91805
loss: 1.031479001045227,grad_norm: 0.9999999276943276, iteration: 91806
loss: 1.2539702653884888,grad_norm: 0.9999998765310308, iteration: 91807
loss: 1.0313881635665894,grad_norm: 0.9999993088688717, iteration: 91808
loss: 1.058752417564392,grad_norm: 0.9999998942805122, iteration: 91809
loss: 1.049245834350586,grad_norm: 0.9999996264357689, iteration: 91810
loss: 1.0440969467163086,grad_norm: 0.9999999326739887, iteration: 91811
loss: 1.0372064113616943,grad_norm: 0.9999992951415324, iteration: 91812
loss: 0.9859451651573181,grad_norm: 0.9999997052166275, iteration: 91813
loss: 1.0782384872436523,grad_norm: 0.9999997699433274, iteration: 91814
loss: 1.0720393657684326,grad_norm: 0.9999992227187934, iteration: 91815
loss: 1.1531037092208862,grad_norm: 0.9999996794761505, iteration: 91816
loss: 0.9907944202423096,grad_norm: 0.9757809387523211, iteration: 91817
loss: 1.1220289468765259,grad_norm: 0.9999999311545419, iteration: 91818
loss: 0.9979779124259949,grad_norm: 0.9999991603772477, iteration: 91819
loss: 1.0313676595687866,grad_norm: 0.999999230295172, iteration: 91820
loss: 1.1402124166488647,grad_norm: 0.9999999706421768, iteration: 91821
loss: 1.2051279544830322,grad_norm: 0.9999998295755208, iteration: 91822
loss: 1.1254510879516602,grad_norm: 0.9999994886840206, iteration: 91823
loss: 1.1575074195861816,grad_norm: 0.9999997933559074, iteration: 91824
loss: 1.0811285972595215,grad_norm: 0.999999265298083, iteration: 91825
loss: 1.1397734880447388,grad_norm: 0.9999995834256658, iteration: 91826
loss: 1.0227159261703491,grad_norm: 0.9999993263980538, iteration: 91827
loss: 1.0651060342788696,grad_norm: 0.9999992552616974, iteration: 91828
loss: 1.0432521104812622,grad_norm: 0.9907660886327659, iteration: 91829
loss: 1.053702712059021,grad_norm: 0.9999999636917315, iteration: 91830
loss: 1.1073461771011353,grad_norm: 0.9999997280983675, iteration: 91831
loss: 1.0070583820343018,grad_norm: 0.9999996332016191, iteration: 91832
loss: 1.285521149635315,grad_norm: 0.9999998921868877, iteration: 91833
loss: 1.0764567852020264,grad_norm: 0.9999997843046704, iteration: 91834
loss: 1.1385267972946167,grad_norm: 0.9999998015976147, iteration: 91835
loss: 1.1095044612884521,grad_norm: 0.9999993644762837, iteration: 91836
loss: 1.0495151281356812,grad_norm: 0.9999998411558843, iteration: 91837
loss: 1.0948009490966797,grad_norm: 0.9999992529110416, iteration: 91838
loss: 1.0673625469207764,grad_norm: 0.9999998345932777, iteration: 91839
loss: 1.0595122575759888,grad_norm: 0.9999996695530057, iteration: 91840
loss: 0.9848394393920898,grad_norm: 0.9999990600219755, iteration: 91841
loss: 1.0359406471252441,grad_norm: 0.9999994055813971, iteration: 91842
loss: 1.045871615409851,grad_norm: 0.9999994518866163, iteration: 91843
loss: 1.0385466814041138,grad_norm: 0.9999994776579135, iteration: 91844
loss: 1.1098575592041016,grad_norm: 0.9999992818055893, iteration: 91845
loss: 1.1805914640426636,grad_norm: 0.9999995722154932, iteration: 91846
loss: 1.044947862625122,grad_norm: 0.9999993899266099, iteration: 91847
loss: 1.1189701557159424,grad_norm: 0.9999992900617438, iteration: 91848
loss: 1.1708335876464844,grad_norm: 0.9999993379766021, iteration: 91849
loss: 1.0188581943511963,grad_norm: 0.99999980967825, iteration: 91850
loss: 1.0586251020431519,grad_norm: 0.9999992964510889, iteration: 91851
loss: 1.0686057806015015,grad_norm: 0.999999970267977, iteration: 91852
loss: 1.0539658069610596,grad_norm: 0.9999993993260597, iteration: 91853
loss: 1.055307388305664,grad_norm: 0.871932831814489, iteration: 91854
loss: 1.0131431818008423,grad_norm: 0.7830837234522117, iteration: 91855
loss: 0.9761018753051758,grad_norm: 0.9999991431420945, iteration: 91856
loss: 1.033558964729309,grad_norm: 0.9999991515347737, iteration: 91857
loss: 0.9874992966651917,grad_norm: 0.9999998507768602, iteration: 91858
loss: 1.06838059425354,grad_norm: 0.9999994688316978, iteration: 91859
loss: 1.0425121784210205,grad_norm: 0.9999995253301596, iteration: 91860
loss: 1.0931355953216553,grad_norm: 0.9999997978201789, iteration: 91861
loss: 1.2163110971450806,grad_norm: 1.000000057177924, iteration: 91862
loss: 0.9697308540344238,grad_norm: 0.9999991375196109, iteration: 91863
loss: 1.2202943563461304,grad_norm: 0.9999997559889664, iteration: 91864
loss: 1.0002461671829224,grad_norm: 0.9999997908433177, iteration: 91865
loss: 1.0831855535507202,grad_norm: 0.999999627753422, iteration: 91866
loss: 0.9778649210929871,grad_norm: 0.9999995499258382, iteration: 91867
loss: 1.0778690576553345,grad_norm: 0.9999999273265755, iteration: 91868
loss: 1.0083409547805786,grad_norm: 0.9999992815321445, iteration: 91869
loss: 1.379359245300293,grad_norm: 0.9999996590676394, iteration: 91870
loss: 1.036353588104248,grad_norm: 0.9999993087966087, iteration: 91871
loss: 1.0082590579986572,grad_norm: 0.9999999417535188, iteration: 91872
loss: 1.0699915885925293,grad_norm: 0.9999995956212502, iteration: 91873
loss: 1.1336774826049805,grad_norm: 0.9999998048186781, iteration: 91874
loss: 1.0144459009170532,grad_norm: 0.9999993026269353, iteration: 91875
loss: 1.0660923719406128,grad_norm: 0.9999998200683807, iteration: 91876
loss: 0.9637919664382935,grad_norm: 0.9999992238228754, iteration: 91877
loss: 1.0270439386367798,grad_norm: 0.9999998646210598, iteration: 91878
loss: 1.0386332273483276,grad_norm: 0.9999990852834522, iteration: 91879
loss: 1.2227803468704224,grad_norm: 0.999999728375421, iteration: 91880
loss: 1.0223979949951172,grad_norm: 0.9999994283249463, iteration: 91881
loss: 1.1857486963272095,grad_norm: 0.9999999165227298, iteration: 91882
loss: 1.0471100807189941,grad_norm: 0.999999900528061, iteration: 91883
loss: 1.2485578060150146,grad_norm: 0.9999999845228953, iteration: 91884
loss: 1.0057200193405151,grad_norm: 0.9999996346534936, iteration: 91885
loss: 1.0623000860214233,grad_norm: 0.9999994593802889, iteration: 91886
loss: 1.0746986865997314,grad_norm: 0.9999997111269422, iteration: 91887
loss: 1.0087722539901733,grad_norm: 0.9999993517310707, iteration: 91888
loss: 1.002205729484558,grad_norm: 0.8499358611220738, iteration: 91889
loss: 1.0392793416976929,grad_norm: 0.9999994376169634, iteration: 91890
loss: 1.03078031539917,grad_norm: 0.9999995802234931, iteration: 91891
loss: 1.0410043001174927,grad_norm: 0.9999996683259218, iteration: 91892
loss: 1.0692923069000244,grad_norm: 0.9999998583400638, iteration: 91893
loss: 1.1423039436340332,grad_norm: 0.9999995955706079, iteration: 91894
loss: 1.0703765153884888,grad_norm: 0.9999997877481913, iteration: 91895
loss: 1.1400671005249023,grad_norm: 0.9999996998661934, iteration: 91896
loss: 1.0900547504425049,grad_norm: 0.9929445370166036, iteration: 91897
loss: 1.2614827156066895,grad_norm: 0.9999996782916991, iteration: 91898
loss: 1.1163727045059204,grad_norm: 0.9999999662354954, iteration: 91899
loss: 0.9830052852630615,grad_norm: 0.9999993614218632, iteration: 91900
loss: 1.0835371017456055,grad_norm: 0.9999999087013242, iteration: 91901
loss: 1.2159626483917236,grad_norm: 0.9999998094440161, iteration: 91902
loss: 1.0562787055969238,grad_norm: 0.9999998760914884, iteration: 91903
loss: 1.0375312566757202,grad_norm: 0.9999991514546862, iteration: 91904
loss: 1.2206014394760132,grad_norm: 0.9999999286903961, iteration: 91905
loss: 1.0581051111221313,grad_norm: 0.9999998919953307, iteration: 91906
loss: 1.0839802026748657,grad_norm: 0.9999996934641227, iteration: 91907
loss: 1.0883805751800537,grad_norm: 0.9999998067236908, iteration: 91908
loss: 1.03059983253479,grad_norm: 0.9999992228019803, iteration: 91909
loss: 1.1283519268035889,grad_norm: 0.9999991808710789, iteration: 91910
loss: 1.0198954343795776,grad_norm: 0.9999997935947715, iteration: 91911
loss: 0.9962195158004761,grad_norm: 0.9999991034184337, iteration: 91912
loss: 1.0882316827774048,grad_norm: 0.9999998843533272, iteration: 91913
loss: 1.0451905727386475,grad_norm: 0.9797875611834874, iteration: 91914
loss: 1.2450917959213257,grad_norm: 0.9999991947565651, iteration: 91915
loss: 1.0410443544387817,grad_norm: 0.9999995248142831, iteration: 91916
loss: 1.0347869396209717,grad_norm: 0.9999992188909309, iteration: 91917
loss: 1.032963752746582,grad_norm: 0.99999986378463, iteration: 91918
loss: 1.3296892642974854,grad_norm: 0.9999997517443199, iteration: 91919
loss: 1.106622576713562,grad_norm: 0.9999992718243383, iteration: 91920
loss: 1.1098427772521973,grad_norm: 0.99999998794184, iteration: 91921
loss: 0.9768855571746826,grad_norm: 0.8911912055060403, iteration: 91922
loss: 1.0100898742675781,grad_norm: 0.9999990984117731, iteration: 91923
loss: 1.0383464097976685,grad_norm: 0.9999999615264666, iteration: 91924
loss: 1.114137887954712,grad_norm: 0.9999999382480602, iteration: 91925
loss: 1.3481029272079468,grad_norm: 0.9999997156376589, iteration: 91926
loss: 1.0697287321090698,grad_norm: 0.9999999217173562, iteration: 91927
loss: 1.1717344522476196,grad_norm: 0.9999995807062045, iteration: 91928
loss: 1.1242518424987793,grad_norm: 0.9999992876305198, iteration: 91929
loss: 1.2173357009887695,grad_norm: 0.9999999143656646, iteration: 91930
loss: 1.2121496200561523,grad_norm: 0.9999998872379554, iteration: 91931
loss: 1.1529960632324219,grad_norm: 0.9999998553697869, iteration: 91932
loss: 1.166975498199463,grad_norm: 0.9999997945556979, iteration: 91933
loss: 1.1286088228225708,grad_norm: 0.999999316833725, iteration: 91934
loss: 1.0202655792236328,grad_norm: 0.9999990334801114, iteration: 91935
loss: 1.0487079620361328,grad_norm: 0.965672782598157, iteration: 91936
loss: 1.0100493431091309,grad_norm: 0.9999991727862557, iteration: 91937
loss: 1.0442701578140259,grad_norm: 0.9999991259150884, iteration: 91938
loss: 1.0124471187591553,grad_norm: 0.9999998293046236, iteration: 91939
loss: 0.9885120987892151,grad_norm: 0.9934160541299067, iteration: 91940
loss: 1.0287081003189087,grad_norm: 0.9999998010688023, iteration: 91941
loss: 1.134857416152954,grad_norm: 0.9999995977450384, iteration: 91942
loss: 1.035614013671875,grad_norm: 0.9999995672848389, iteration: 91943
loss: 1.011762022972107,grad_norm: 0.9999992101921178, iteration: 91944
loss: 1.060473084449768,grad_norm: 0.9999996553180347, iteration: 91945
loss: 1.074134111404419,grad_norm: 0.9999999275008895, iteration: 91946
loss: 0.9967626333236694,grad_norm: 0.9999998587812521, iteration: 91947
loss: 1.2316869497299194,grad_norm: 0.999999849709437, iteration: 91948
loss: 1.1555098295211792,grad_norm: 0.9999998163702869, iteration: 91949
loss: 1.2565264701843262,grad_norm: 0.9999997116440204, iteration: 91950
loss: 1.0371241569519043,grad_norm: 0.9999990869720987, iteration: 91951
loss: 1.0807521343231201,grad_norm: 0.9999992614551105, iteration: 91952
loss: 1.0872042179107666,grad_norm: 0.9999994076586841, iteration: 91953
loss: 1.0221718549728394,grad_norm: 0.9999994095646928, iteration: 91954
loss: 1.0172688961029053,grad_norm: 0.9999993488072054, iteration: 91955
loss: 1.081013798713684,grad_norm: 0.9999992842309188, iteration: 91956
loss: 1.1562237739562988,grad_norm: 0.9999992052935219, iteration: 91957
loss: 1.048454999923706,grad_norm: 0.999999662019165, iteration: 91958
loss: 0.9765861630439758,grad_norm: 0.9999991963533529, iteration: 91959
loss: 1.0354095697402954,grad_norm: 0.9999998916077539, iteration: 91960
loss: 1.0911943912506104,grad_norm: 0.9999997514508567, iteration: 91961
loss: 1.0186333656311035,grad_norm: 0.9999991027957218, iteration: 91962
loss: 1.0701912641525269,grad_norm: 0.9999998908035748, iteration: 91963
loss: 1.0600354671478271,grad_norm: 0.9999992586565856, iteration: 91964
loss: 1.0761823654174805,grad_norm: 0.9999994577636372, iteration: 91965
loss: 1.1883422136306763,grad_norm: 0.9999999336651809, iteration: 91966
loss: 1.062368392944336,grad_norm: 0.9999998022810845, iteration: 91967
loss: 1.121641993522644,grad_norm: 0.9999991350590874, iteration: 91968
loss: 1.2287930250167847,grad_norm: 0.9999998149342629, iteration: 91969
loss: 1.0723896026611328,grad_norm: 0.9999993249242997, iteration: 91970
loss: 1.0010323524475098,grad_norm: 0.9999990450342273, iteration: 91971
loss: 1.0688174962997437,grad_norm: 0.9092995375046329, iteration: 91972
loss: 1.1425410509109497,grad_norm: 0.9999992920827007, iteration: 91973
loss: 1.023598551750183,grad_norm: 0.9999994424183292, iteration: 91974
loss: 1.0325264930725098,grad_norm: 0.8791643859600243, iteration: 91975
loss: 1.0414751768112183,grad_norm: 0.9999992568921594, iteration: 91976
loss: 1.1463096141815186,grad_norm: 0.999999750434843, iteration: 91977
loss: 1.0115631818771362,grad_norm: 0.9999994837097679, iteration: 91978
loss: 1.0287424325942993,grad_norm: 0.8679504774510997, iteration: 91979
loss: 0.9723777770996094,grad_norm: 0.9142232042462313, iteration: 91980
loss: 1.0439096689224243,grad_norm: 1.0000000017302002, iteration: 91981
loss: 1.0168675184249878,grad_norm: 0.999999490950319, iteration: 91982
loss: 1.1686345338821411,grad_norm: 0.9999991297555977, iteration: 91983
loss: 1.1493828296661377,grad_norm: 0.9999998733539529, iteration: 91984
loss: 0.9607527852058411,grad_norm: 0.999999177597124, iteration: 91985
loss: 1.0892010927200317,grad_norm: 0.9999992949144335, iteration: 91986
loss: 1.0197020769119263,grad_norm: 0.9999993691587978, iteration: 91987
loss: 1.0346879959106445,grad_norm: 0.9999992976373459, iteration: 91988
loss: 1.0809701681137085,grad_norm: 0.999999446447578, iteration: 91989
loss: 1.1708884239196777,grad_norm: 0.9999993443140723, iteration: 91990
loss: 0.9967015981674194,grad_norm: 0.999999181486191, iteration: 91991
loss: 0.9676799178123474,grad_norm: 0.9999991489079758, iteration: 91992
loss: 1.053571343421936,grad_norm: 0.999999613063814, iteration: 91993
loss: 1.0658587217330933,grad_norm: 0.9999994126409308, iteration: 91994
loss: 1.104818344116211,grad_norm: 0.9999992190367358, iteration: 91995
loss: 1.181431770324707,grad_norm: 0.999999662717697, iteration: 91996
loss: 1.0079596042633057,grad_norm: 0.9999991766331077, iteration: 91997
loss: 0.9607843160629272,grad_norm: 0.9999991512923001, iteration: 91998
loss: 1.019791841506958,grad_norm: 0.8854443273226826, iteration: 91999
loss: 1.049936294555664,grad_norm: 0.9622140151784182, iteration: 92000
loss: 1.2242395877838135,grad_norm: 0.9999999515306641, iteration: 92001
loss: 1.1901578903198242,grad_norm: 0.9999996537467991, iteration: 92002
loss: 1.0085963010787964,grad_norm: 0.9999990499067797, iteration: 92003
loss: 0.9918987154960632,grad_norm: 0.8242708244841191, iteration: 92004
loss: 1.0256831645965576,grad_norm: 0.9999992497515626, iteration: 92005
loss: 1.0102490186691284,grad_norm: 0.9999990926231459, iteration: 92006
loss: 1.0080496072769165,grad_norm: 0.9999997427361661, iteration: 92007
loss: 1.1661784648895264,grad_norm: 0.999999515415239, iteration: 92008
loss: 1.0047415494918823,grad_norm: 0.9999991684533487, iteration: 92009
loss: 1.0960865020751953,grad_norm: 0.9999999306074185, iteration: 92010
loss: 1.0368564128875732,grad_norm: 0.9999998901119842, iteration: 92011
loss: 1.0346965789794922,grad_norm: 0.9999993405692383, iteration: 92012
loss: 1.0437766313552856,grad_norm: 0.9999999352940606, iteration: 92013
loss: 1.0898154973983765,grad_norm: 0.9999994765414503, iteration: 92014
loss: 1.1041065454483032,grad_norm: 0.9999992053582758, iteration: 92015
loss: 1.187441110610962,grad_norm: 0.9999991763702614, iteration: 92016
loss: 0.9937862157821655,grad_norm: 0.9999990430678667, iteration: 92017
loss: 1.023789644241333,grad_norm: 0.9999996660426883, iteration: 92018
loss: 1.0658525228500366,grad_norm: 0.9999991354727534, iteration: 92019
loss: 0.9859869480133057,grad_norm: 0.99999985872367, iteration: 92020
loss: 1.170699119567871,grad_norm: 0.9999999259685666, iteration: 92021
loss: 0.9907317161560059,grad_norm: 0.9999996639204612, iteration: 92022
loss: 1.0024752616882324,grad_norm: 0.9951027586459741, iteration: 92023
loss: 1.0574203729629517,grad_norm: 0.9999997709497517, iteration: 92024
loss: 1.0497331619262695,grad_norm: 0.9999999477781861, iteration: 92025
loss: 1.0374755859375,grad_norm: 0.9999993184973319, iteration: 92026
loss: 1.1127203702926636,grad_norm: 0.9999997279777044, iteration: 92027
loss: 1.0029631853103638,grad_norm: 0.999999246471658, iteration: 92028
loss: 0.9971632957458496,grad_norm: 0.9179554170608213, iteration: 92029
loss: 1.014702558517456,grad_norm: 0.9999996544058286, iteration: 92030
loss: 1.0109673738479614,grad_norm: 0.9999997709453785, iteration: 92031
loss: 0.9870657324790955,grad_norm: 0.999999349475026, iteration: 92032
loss: 0.9971957802772522,grad_norm: 0.9999990464917695, iteration: 92033
loss: 1.010136604309082,grad_norm: 0.9999994995882422, iteration: 92034
loss: 1.1564851999282837,grad_norm: 0.9999997268003309, iteration: 92035
loss: 1.0054899454116821,grad_norm: 0.9960499939577004, iteration: 92036
loss: 1.0237466096878052,grad_norm: 0.9999995629874808, iteration: 92037
loss: 0.9790329933166504,grad_norm: 0.9999993752688823, iteration: 92038
loss: 1.0135329961776733,grad_norm: 0.8311899113336422, iteration: 92039
loss: 1.0614080429077148,grad_norm: 0.9999991158338122, iteration: 92040
loss: 1.1712491512298584,grad_norm: 1.000000018313143, iteration: 92041
loss: 1.0069952011108398,grad_norm: 0.9999990865891549, iteration: 92042
loss: 1.039048671722412,grad_norm: 0.9999995117866698, iteration: 92043
loss: 1.0608317852020264,grad_norm: 0.9999991197309164, iteration: 92044
loss: 1.037951946258545,grad_norm: 0.9999994183958563, iteration: 92045
loss: 1.0267258882522583,grad_norm: 0.9170489357274818, iteration: 92046
loss: 1.1092833280563354,grad_norm: 0.9999993022257646, iteration: 92047
loss: 0.9987348914146423,grad_norm: 0.9607362464825092, iteration: 92048
loss: 1.0668879747390747,grad_norm: 0.9999997529957813, iteration: 92049
loss: 1.0249990224838257,grad_norm: 0.9999992336725246, iteration: 92050
loss: 1.006727933883667,grad_norm: 0.9999991918813532, iteration: 92051
loss: 1.0025348663330078,grad_norm: 0.9999993910282838, iteration: 92052
loss: 1.0913270711898804,grad_norm: 0.999999557571703, iteration: 92053
loss: 1.0354712009429932,grad_norm: 0.7872623700708213, iteration: 92054
loss: 0.9598172903060913,grad_norm: 0.9999993999503014, iteration: 92055
loss: 1.0170698165893555,grad_norm: 0.9999996709979502, iteration: 92056
loss: 1.0370253324508667,grad_norm: 0.9890403283099061, iteration: 92057
loss: 1.0234243869781494,grad_norm: 0.9999991307162719, iteration: 92058
loss: 1.1081222295761108,grad_norm: 0.9999993832515096, iteration: 92059
loss: 1.0650962591171265,grad_norm: 0.9999998762003038, iteration: 92060
loss: 1.0481287240982056,grad_norm: 0.999999661138886, iteration: 92061
loss: 0.9924915432929993,grad_norm: 0.9999995739143354, iteration: 92062
loss: 1.061867356300354,grad_norm: 0.999999121543916, iteration: 92063
loss: 1.1112264394760132,grad_norm: 0.99999962546172, iteration: 92064
loss: 1.044472098350525,grad_norm: 0.9999990453858779, iteration: 92065
loss: 0.9932836294174194,grad_norm: 0.8459626388442388, iteration: 92066
loss: 1.0222456455230713,grad_norm: 0.8873272032357133, iteration: 92067
loss: 1.3479686975479126,grad_norm: 0.9999993339168919, iteration: 92068
loss: 0.9727615714073181,grad_norm: 0.8773930518891911, iteration: 92069
loss: 1.0425662994384766,grad_norm: 0.9999998227823601, iteration: 92070
loss: 1.022658348083496,grad_norm: 0.9999998183699944, iteration: 92071
loss: 1.0451549291610718,grad_norm: 0.9999994924493856, iteration: 92072
loss: 1.0020835399627686,grad_norm: 0.9999993895147287, iteration: 92073
loss: 1.015793800354004,grad_norm: 0.9999990212565315, iteration: 92074
loss: 1.0848742723464966,grad_norm: 0.8930007344857985, iteration: 92075
loss: 1.0938743352890015,grad_norm: 0.8886942384452974, iteration: 92076
loss: 1.174707055091858,grad_norm: 1.0000000937321756, iteration: 92077
loss: 1.0013997554779053,grad_norm: 0.9999998901439803, iteration: 92078
loss: 1.285323977470398,grad_norm: 0.9999998616114121, iteration: 92079
loss: 0.9781038165092468,grad_norm: 0.9999990695093959, iteration: 92080
loss: 1.0415892601013184,grad_norm: 0.9999990399587736, iteration: 92081
loss: 0.9735734462738037,grad_norm: 0.9987922820279296, iteration: 92082
loss: 1.0551891326904297,grad_norm: 0.9999996554819272, iteration: 92083
loss: 1.0132445096969604,grad_norm: 0.9469156749418308, iteration: 92084
loss: 1.0434672832489014,grad_norm: 0.9999998318017083, iteration: 92085
loss: 1.0732728242874146,grad_norm: 0.9999996553159735, iteration: 92086
loss: 1.1796895265579224,grad_norm: 0.9999999504080415, iteration: 92087
loss: 1.3309441804885864,grad_norm: 0.9999998536695962, iteration: 92088
loss: 1.0976470708847046,grad_norm: 0.9999995725533745, iteration: 92089
loss: 1.0066145658493042,grad_norm: 0.9999997231286025, iteration: 92090
loss: 0.9925787448883057,grad_norm: 0.8808123952683019, iteration: 92091
loss: 1.048063039779663,grad_norm: 0.99999932756616, iteration: 92092
loss: 1.0523464679718018,grad_norm: 1.0000000059302954, iteration: 92093
loss: 1.0839613676071167,grad_norm: 1.0000000242924896, iteration: 92094
loss: 0.9869586229324341,grad_norm: 0.9504905317058722, iteration: 92095
loss: 1.0237370729446411,grad_norm: 0.999999706347596, iteration: 92096
loss: 1.027193546295166,grad_norm: 0.9999998986911135, iteration: 92097
loss: 1.0223830938339233,grad_norm: 0.9999991997532365, iteration: 92098
loss: 1.0121346712112427,grad_norm: 0.9038391052200107, iteration: 92099
loss: 1.0819405317306519,grad_norm: 0.954171055368784, iteration: 92100
loss: 1.0068769454956055,grad_norm: 0.9999996208247478, iteration: 92101
loss: 0.9896665215492249,grad_norm: 0.9063874028390986, iteration: 92102
loss: 1.0544320344924927,grad_norm: 0.9999994271841331, iteration: 92103
loss: 0.9933151006698608,grad_norm: 0.9024418212260783, iteration: 92104
loss: 1.0748403072357178,grad_norm: 0.9999995530064253, iteration: 92105
loss: 0.9775711894035339,grad_norm: 0.9999990642365888, iteration: 92106
loss: 1.0604206323623657,grad_norm: 0.999999928793586, iteration: 92107
loss: 1.0264132022857666,grad_norm: 0.9999995985045796, iteration: 92108
loss: 1.0300501585006714,grad_norm: 0.9999995021459817, iteration: 92109
loss: 1.1377569437026978,grad_norm: 0.9999994547926739, iteration: 92110
loss: 1.005632758140564,grad_norm: 0.9999999747117806, iteration: 92111
loss: 1.0474615097045898,grad_norm: 0.9999996209396108, iteration: 92112
loss: 1.1847418546676636,grad_norm: 0.9999998756860451, iteration: 92113
loss: 1.11365807056427,grad_norm: 0.9999995313661741, iteration: 92114
loss: 0.9930058717727661,grad_norm: 0.9748376024471853, iteration: 92115
loss: 1.1480482816696167,grad_norm: 0.9999998305406478, iteration: 92116
loss: 1.1466854810714722,grad_norm: 0.9999998581866819, iteration: 92117
loss: 1.090030550956726,grad_norm: 0.9999997888089459, iteration: 92118
loss: 1.0043727159500122,grad_norm: 0.9999991373958047, iteration: 92119
loss: 1.0586833953857422,grad_norm: 0.9999999295910276, iteration: 92120
loss: 1.0014927387237549,grad_norm: 0.9999998748780627, iteration: 92121
loss: 1.015816569328308,grad_norm: 0.999999255302172, iteration: 92122
loss: 0.9824224710464478,grad_norm: 0.8453204998765793, iteration: 92123
loss: 1.0748869180679321,grad_norm: 0.9999999576106062, iteration: 92124
loss: 1.028863549232483,grad_norm: 0.9999992085223719, iteration: 92125
loss: 1.148087501525879,grad_norm: 0.9999996860281808, iteration: 92126
loss: 1.1700135469436646,grad_norm: 0.9999992992184952, iteration: 92127
loss: 1.0525578260421753,grad_norm: 0.9999998637708043, iteration: 92128
loss: 1.1907962560653687,grad_norm: 0.9999991434774214, iteration: 92129
loss: 1.0473631620407104,grad_norm: 0.9999996264333041, iteration: 92130
loss: 1.2526935338974,grad_norm: 1.000000002043775, iteration: 92131
loss: 1.106218934059143,grad_norm: 0.9999997046016994, iteration: 92132
loss: 1.0383607149124146,grad_norm: 0.9999999245479146, iteration: 92133
loss: 1.1422369480133057,grad_norm: 0.9999997878831663, iteration: 92134
loss: 0.9577865600585938,grad_norm: 0.9999991971948857, iteration: 92135
loss: 1.076566457748413,grad_norm: 0.9999995269761018, iteration: 92136
loss: 0.9694679975509644,grad_norm: 0.9999990362956784, iteration: 92137
loss: 1.178445816040039,grad_norm: 0.9999998146893464, iteration: 92138
loss: 1.0522632598876953,grad_norm: 0.9999991891543932, iteration: 92139
loss: 1.0036306381225586,grad_norm: 0.9009440596649025, iteration: 92140
loss: 1.0795433521270752,grad_norm: 0.9999995604197166, iteration: 92141
loss: 1.0746155977249146,grad_norm: 0.9999993630304933, iteration: 92142
loss: 1.107564091682434,grad_norm: 0.9549721746473948, iteration: 92143
loss: 1.0772684812545776,grad_norm: 0.9999997489646757, iteration: 92144
loss: 1.0106689929962158,grad_norm: 0.9798529480390926, iteration: 92145
loss: 0.9675281643867493,grad_norm: 0.8174011675637125, iteration: 92146
loss: 1.007973551750183,grad_norm: 0.999999319040651, iteration: 92147
loss: 1.1412246227264404,grad_norm: 0.9999997672330868, iteration: 92148
loss: 1.0151699781417847,grad_norm: 0.9999991780108142, iteration: 92149
loss: 1.0556365251541138,grad_norm: 0.9999989807086762, iteration: 92150
loss: 1.0369956493377686,grad_norm: 0.9999994799625905, iteration: 92151
loss: 1.0717250108718872,grad_norm: 0.9999997393860455, iteration: 92152
loss: 1.0038726329803467,grad_norm: 0.999999644278272, iteration: 92153
loss: 1.085193395614624,grad_norm: 0.9451670762820784, iteration: 92154
loss: 1.0477238893508911,grad_norm: 0.9999990873956337, iteration: 92155
loss: 1.0407270193099976,grad_norm: 0.999999597966134, iteration: 92156
loss: 1.0964398384094238,grad_norm: 0.9999992615046788, iteration: 92157
loss: 1.0662877559661865,grad_norm: 0.9999994159212203, iteration: 92158
loss: 1.3060402870178223,grad_norm: 0.9999998214752083, iteration: 92159
loss: 1.0528151988983154,grad_norm: 0.999999687522551, iteration: 92160
loss: 1.093188762664795,grad_norm: 0.9999992686229381, iteration: 92161
loss: 1.0321452617645264,grad_norm: 0.99999974087993, iteration: 92162
loss: 1.0177977085113525,grad_norm: 0.9999995121966914, iteration: 92163
loss: 1.131395936012268,grad_norm: 0.999999328392193, iteration: 92164
loss: 1.1057134866714478,grad_norm: 0.999999990423453, iteration: 92165
loss: 1.0138523578643799,grad_norm: 0.9999991519771352, iteration: 92166
loss: 1.1214154958724976,grad_norm: 0.9999995394940683, iteration: 92167
loss: 1.1084530353546143,grad_norm: 0.9999999293495184, iteration: 92168
loss: 1.0312384366989136,grad_norm: 0.999999166552574, iteration: 92169
loss: 1.0832774639129639,grad_norm: 0.999999258538685, iteration: 92170
loss: 1.0297540426254272,grad_norm: 0.9999992230836829, iteration: 92171
loss: 1.0017110109329224,grad_norm: 0.999999150805191, iteration: 92172
loss: 1.034242033958435,grad_norm: 0.6839566460963209, iteration: 92173
loss: 1.0184118747711182,grad_norm: 0.9999991622720846, iteration: 92174
loss: 1.024173617362976,grad_norm: 0.99999983930634, iteration: 92175
loss: 1.4935705661773682,grad_norm: 0.9999995692744206, iteration: 92176
loss: 1.0060739517211914,grad_norm: 0.9999994955663244, iteration: 92177
loss: 1.0677043199539185,grad_norm: 0.9999997826914971, iteration: 92178
loss: 1.0269421339035034,grad_norm: 0.9999990865346696, iteration: 92179
loss: 1.098169207572937,grad_norm: 0.9999995888376046, iteration: 92180
loss: 1.104767918586731,grad_norm: 0.9999999320448776, iteration: 92181
loss: 1.1070622205734253,grad_norm: 0.9520689510864799, iteration: 92182
loss: 1.0907526016235352,grad_norm: 0.9999995354078107, iteration: 92183
loss: 1.0675963163375854,grad_norm: 0.9999993057720973, iteration: 92184
loss: 1.12092924118042,grad_norm: 0.9999998057956587, iteration: 92185
loss: 1.172642469406128,grad_norm: 0.9999991808411225, iteration: 92186
loss: 1.0791937112808228,grad_norm: 0.9999998922568537, iteration: 92187
loss: 1.017778754234314,grad_norm: 0.9446924600201174, iteration: 92188
loss: 1.0182958841323853,grad_norm: 0.9219403481201383, iteration: 92189
loss: 1.245726227760315,grad_norm: 0.9999995635977226, iteration: 92190
loss: 1.1458520889282227,grad_norm: 0.9999999349189665, iteration: 92191
loss: 1.2624293565750122,grad_norm: 0.9999999630874324, iteration: 92192
loss: 1.2033993005752563,grad_norm: 0.9999997084554778, iteration: 92193
loss: 1.0983392000198364,grad_norm: 0.9999996380713527, iteration: 92194
loss: 1.009542465209961,grad_norm: 0.9999991016120152, iteration: 92195
loss: 1.0430896282196045,grad_norm: 0.99999930444375, iteration: 92196
loss: 1.0111627578735352,grad_norm: 0.9999992723031802, iteration: 92197
loss: 1.0793993473052979,grad_norm: 0.9999991332219407, iteration: 92198
loss: 1.0011634826660156,grad_norm: 1.000000014643554, iteration: 92199
loss: 1.113744854927063,grad_norm: 0.9999991700535763, iteration: 92200
loss: 0.99479740858078,grad_norm: 0.9285849644337809, iteration: 92201
loss: 1.005906105041504,grad_norm: 0.9999991556878423, iteration: 92202
loss: 1.0097893476486206,grad_norm: 0.9381256670193701, iteration: 92203
loss: 1.014328122138977,grad_norm: 0.9999998785895966, iteration: 92204
loss: 1.0299317836761475,grad_norm: 0.9999999164154677, iteration: 92205
loss: 1.007718801498413,grad_norm: 0.999999086987124, iteration: 92206
loss: 1.1092324256896973,grad_norm: 0.999999884015487, iteration: 92207
loss: 1.00602388381958,grad_norm: 0.999999371106992, iteration: 92208
loss: 1.48129403591156,grad_norm: 0.9999998717017644, iteration: 92209
loss: 1.0223777294158936,grad_norm: 0.9999993464886056, iteration: 92210
loss: 1.0945065021514893,grad_norm: 0.9999996308134417, iteration: 92211
loss: 0.9908003807067871,grad_norm: 0.9999994356834829, iteration: 92212
loss: 1.0808753967285156,grad_norm: 0.9999998232333029, iteration: 92213
loss: 1.0172884464263916,grad_norm: 0.9999992282442897, iteration: 92214
loss: 1.0504976511001587,grad_norm: 0.999999562349498, iteration: 92215
loss: 0.9635026454925537,grad_norm: 0.9999992496264821, iteration: 92216
loss: 1.0349653959274292,grad_norm: 0.9377954079485052, iteration: 92217
loss: 1.138078212738037,grad_norm: 0.9999995335501726, iteration: 92218
loss: 1.06170654296875,grad_norm: 0.9999997859507771, iteration: 92219
loss: 1.1318283081054688,grad_norm: 0.9999991617425592, iteration: 92220
loss: 0.9825066924095154,grad_norm: 0.9999994141418601, iteration: 92221
loss: 1.0822926759719849,grad_norm: 0.9999996694149004, iteration: 92222
loss: 1.0215680599212646,grad_norm: 0.755333188753854, iteration: 92223
loss: 1.0852361917495728,grad_norm: 1.0000000965723228, iteration: 92224
loss: 1.0260895490646362,grad_norm: 0.9999999465583046, iteration: 92225
loss: 1.0204981565475464,grad_norm: 0.9999997779058809, iteration: 92226
loss: 1.0778952836990356,grad_norm: 0.9999991806188159, iteration: 92227
loss: 1.0355082750320435,grad_norm: 0.9999992940434463, iteration: 92228
loss: 1.0280877351760864,grad_norm: 0.999999075885797, iteration: 92229
loss: 1.0325582027435303,grad_norm: 0.9999997899469106, iteration: 92230
loss: 0.9855865240097046,grad_norm: 0.9999992540508551, iteration: 92231
loss: 1.0348366498947144,grad_norm: 0.999999172078318, iteration: 92232
loss: 1.0312076807022095,grad_norm: 0.9999996892867729, iteration: 92233
loss: 1.0352314710617065,grad_norm: 0.9054690933080424, iteration: 92234
loss: 0.9830248951911926,grad_norm: 0.949677840313042, iteration: 92235
loss: 1.0170599222183228,grad_norm: 0.9999997139050437, iteration: 92236
loss: 1.0143823623657227,grad_norm: 0.999999093325937, iteration: 92237
loss: 1.0573095083236694,grad_norm: 0.9999992752634934, iteration: 92238
loss: 0.9768268465995789,grad_norm: 0.9999992418075657, iteration: 92239
loss: 1.000421404838562,grad_norm: 0.99999905411741, iteration: 92240
loss: 0.992895245552063,grad_norm: 0.9862172079652554, iteration: 92241
loss: 0.9972483515739441,grad_norm: 0.9757316297002366, iteration: 92242
loss: 1.1659598350524902,grad_norm: 0.999999614828049, iteration: 92243
loss: 1.1618666648864746,grad_norm: 0.9999995376569782, iteration: 92244
loss: 1.0174115896224976,grad_norm: 0.9204321277088926, iteration: 92245
loss: 1.0377534627914429,grad_norm: 0.9620399236659951, iteration: 92246
loss: 1.0102156400680542,grad_norm: 0.8098303571619763, iteration: 92247
loss: 0.9535136818885803,grad_norm: 0.9107112439929455, iteration: 92248
loss: 1.0023880004882812,grad_norm: 0.9999994887853236, iteration: 92249
loss: 0.9564567804336548,grad_norm: 0.9105283839310131, iteration: 92250
loss: 1.049691915512085,grad_norm: 0.9999998365862769, iteration: 92251
loss: 1.2284659147262573,grad_norm: 0.9999998651651688, iteration: 92252
loss: 1.0644690990447998,grad_norm: 0.9999995618830487, iteration: 92253
loss: 1.0225584506988525,grad_norm: 0.9999991221411827, iteration: 92254
loss: 1.0015395879745483,grad_norm: 0.8072188153849851, iteration: 92255
loss: 1.1392452716827393,grad_norm: 0.9999995591861405, iteration: 92256
loss: 1.0862306356430054,grad_norm: 0.9999991713487816, iteration: 92257
loss: 0.9806680679321289,grad_norm: 0.9999998894511697, iteration: 92258
loss: 1.0022233724594116,grad_norm: 0.9999991963990775, iteration: 92259
loss: 1.0807273387908936,grad_norm: 0.9999998495687816, iteration: 92260
loss: 1.005076289176941,grad_norm: 0.9999998801369354, iteration: 92261
loss: 1.072813868522644,grad_norm: 0.9999999757350383, iteration: 92262
loss: 0.9932646155357361,grad_norm: 0.9724496131539643, iteration: 92263
loss: 1.001556158065796,grad_norm: 0.9999991649025595, iteration: 92264
loss: 1.1130388975143433,grad_norm: 0.9999995672463657, iteration: 92265
loss: 1.076045274734497,grad_norm: 0.9726119731556477, iteration: 92266
loss: 0.9910602569580078,grad_norm: 0.8592504876262512, iteration: 92267
loss: 1.0957465171813965,grad_norm: 0.9999993034493222, iteration: 92268
loss: 0.9928257465362549,grad_norm: 0.9999997584401931, iteration: 92269
loss: 1.0480440855026245,grad_norm: 0.9999997621772795, iteration: 92270
loss: 0.9727867245674133,grad_norm: 0.999999197258509, iteration: 92271
loss: 1.0790777206420898,grad_norm: 0.99999939458718, iteration: 92272
loss: 1.003283977508545,grad_norm: 0.9999991923775114, iteration: 92273
loss: 1.0127300024032593,grad_norm: 0.9624054700225598, iteration: 92274
loss: 1.0198270082473755,grad_norm: 0.9999994385681908, iteration: 92275
loss: 0.992478609085083,grad_norm: 0.8404057527774252, iteration: 92276
loss: 1.1353141069412231,grad_norm: 0.9999990550658281, iteration: 92277
loss: 1.0916414260864258,grad_norm: 0.9999991395379262, iteration: 92278
loss: 1.0712310075759888,grad_norm: 0.9999993542736652, iteration: 92279
loss: 1.04891836643219,grad_norm: 0.8611996047409759, iteration: 92280
loss: 1.0928013324737549,grad_norm: 0.9999993471884757, iteration: 92281
loss: 1.0027656555175781,grad_norm: 0.9999993692625836, iteration: 92282
loss: 0.9950793981552124,grad_norm: 0.9999994494935754, iteration: 92283
loss: 1.0153090953826904,grad_norm: 0.9999991514621629, iteration: 92284
loss: 1.0302488803863525,grad_norm: 0.9999990620118028, iteration: 92285
loss: 1.0444365739822388,grad_norm: 0.999999267972228, iteration: 92286
loss: 1.018464207649231,grad_norm: 0.9999992376726338, iteration: 92287
loss: 0.9960783123970032,grad_norm: 0.8456529076508764, iteration: 92288
loss: 1.1190296411514282,grad_norm: 0.9999993585323464, iteration: 92289
loss: 1.037597417831421,grad_norm: 0.9999995785211319, iteration: 92290
loss: 1.0265998840332031,grad_norm: 0.9999990203163496, iteration: 92291
loss: 1.0807958841323853,grad_norm: 0.9999993471150439, iteration: 92292
loss: 1.0666917562484741,grad_norm: 0.9999991199321603, iteration: 92293
loss: 1.0265722274780273,grad_norm: 0.9999994932106798, iteration: 92294
loss: 1.021266222000122,grad_norm: 0.9999991265252152, iteration: 92295
loss: 1.021743893623352,grad_norm: 0.9780260894023832, iteration: 92296
loss: 0.9891448020935059,grad_norm: 0.9999990244080772, iteration: 92297
loss: 1.0290789604187012,grad_norm: 0.9999991916324489, iteration: 92298
loss: 0.9887848496437073,grad_norm: 0.9999991187976833, iteration: 92299
loss: 1.1003788709640503,grad_norm: 0.9999992726992184, iteration: 92300
loss: 1.0008409023284912,grad_norm: 0.9255323346444675, iteration: 92301
loss: 0.9617937803268433,grad_norm: 0.9210285600058356, iteration: 92302
loss: 1.064653992652893,grad_norm: 0.999999181462233, iteration: 92303
loss: 1.0536010265350342,grad_norm: 0.9999990981995417, iteration: 92304
loss: 0.9982101321220398,grad_norm: 0.9171647092486165, iteration: 92305
loss: 1.0247968435287476,grad_norm: 0.9999995884438047, iteration: 92306
loss: 1.1808013916015625,grad_norm: 0.9999999274767358, iteration: 92307
loss: 1.0670192241668701,grad_norm: 0.9999997758735415, iteration: 92308
loss: 1.0235024690628052,grad_norm: 0.8941408337186781, iteration: 92309
loss: 1.048427939414978,grad_norm: 0.8970165926678868, iteration: 92310
loss: 1.0014082193374634,grad_norm: 0.9999996829437482, iteration: 92311
loss: 1.0219154357910156,grad_norm: 0.7965779248431135, iteration: 92312
loss: 1.0256688594818115,grad_norm: 0.9066601907765848, iteration: 92313
loss: 1.0087530612945557,grad_norm: 0.999999713667467, iteration: 92314
loss: 1.0008608102798462,grad_norm: 0.8758979010207695, iteration: 92315
loss: 0.9875152707099915,grad_norm: 0.9999992278619466, iteration: 92316
loss: 1.055470585823059,grad_norm: 0.9999994213099734, iteration: 92317
loss: 1.0616501569747925,grad_norm: 0.9999996375702164, iteration: 92318
loss: 0.9608355164527893,grad_norm: 0.999999125580664, iteration: 92319
loss: 1.0087456703186035,grad_norm: 0.9660042942793933, iteration: 92320
loss: 1.0213682651519775,grad_norm: 0.737302647229822, iteration: 92321
loss: 1.0248795747756958,grad_norm: 0.9999996247456646, iteration: 92322
loss: 1.014410376548767,grad_norm: 0.8409443506936741, iteration: 92323
loss: 0.9698609709739685,grad_norm: 0.8988497433411976, iteration: 92324
loss: 1.0143895149230957,grad_norm: 0.9999992566096291, iteration: 92325
loss: 1.0440051555633545,grad_norm: 0.9999994513412975, iteration: 92326
loss: 0.9900321364402771,grad_norm: 0.8510481917352729, iteration: 92327
loss: 1.0380299091339111,grad_norm: 0.999999604899188, iteration: 92328
loss: 1.0403966903686523,grad_norm: 0.993162180435541, iteration: 92329
loss: 0.9892844557762146,grad_norm: 0.9876260378990264, iteration: 92330
loss: 1.0877666473388672,grad_norm: 0.9999997140762751, iteration: 92331
loss: 1.0298298597335815,grad_norm: 0.9999995304528638, iteration: 92332
loss: 0.9925546646118164,grad_norm: 0.9999990448829957, iteration: 92333
loss: 1.0680994987487793,grad_norm: 0.9999996481331056, iteration: 92334
loss: 1.0137321949005127,grad_norm: 0.9999990924597714, iteration: 92335
loss: 1.1234575510025024,grad_norm: 0.9999994831134835, iteration: 92336
loss: 0.9604509472846985,grad_norm: 0.8612103438644505, iteration: 92337
loss: 1.027138352394104,grad_norm: 0.8930644133032789, iteration: 92338
loss: 1.1476686000823975,grad_norm: 0.9999999422542403, iteration: 92339
loss: 1.003772497177124,grad_norm: 0.9999992589104374, iteration: 92340
loss: 0.9782411456108093,grad_norm: 0.9392346951243303, iteration: 92341
loss: 1.0457295179367065,grad_norm: 0.9999993438452307, iteration: 92342
loss: 1.034013032913208,grad_norm: 0.9525467985701432, iteration: 92343
loss: 1.1003129482269287,grad_norm: 0.9999994041946471, iteration: 92344
loss: 1.0744726657867432,grad_norm: 0.9999999306930386, iteration: 92345
loss: 1.1085774898529053,grad_norm: 0.9999996439994803, iteration: 92346
loss: 1.1830756664276123,grad_norm: 0.9999999215300042, iteration: 92347
loss: 1.155499815940857,grad_norm: 0.9999993443995884, iteration: 92348
loss: 1.025624394416809,grad_norm: 0.9999991433495595, iteration: 92349
loss: 1.0695370435714722,grad_norm: 0.9999995540043136, iteration: 92350
loss: 0.9996724724769592,grad_norm: 0.8610998005648096, iteration: 92351
loss: 0.9896557331085205,grad_norm: 0.8872921050138552, iteration: 92352
loss: 0.9756599068641663,grad_norm: 0.8857807424404743, iteration: 92353
loss: 1.141446590423584,grad_norm: 0.9999998233820977, iteration: 92354
loss: 1.2302113771438599,grad_norm: 0.9999996449512482, iteration: 92355
loss: 1.0034449100494385,grad_norm: 0.9999999794728949, iteration: 92356
loss: 0.9964043498039246,grad_norm: 0.869188433287593, iteration: 92357
loss: 1.0183593034744263,grad_norm: 0.8373925157536768, iteration: 92358
loss: 1.0572208166122437,grad_norm: 0.9999991403074805, iteration: 92359
loss: 1.0595507621765137,grad_norm: 0.9999995863594698, iteration: 92360
loss: 1.1498206853866577,grad_norm: 0.9999998905275755, iteration: 92361
loss: 1.0088751316070557,grad_norm: 0.9999989820756211, iteration: 92362
loss: 1.0189909934997559,grad_norm: 0.9999997504372959, iteration: 92363
loss: 1.0194404125213623,grad_norm: 0.9999993315172531, iteration: 92364
loss: 1.064668893814087,grad_norm: 0.99999974722478, iteration: 92365
loss: 1.2113341093063354,grad_norm: 0.9999994120769996, iteration: 92366
loss: 1.047070860862732,grad_norm: 0.9999997007832638, iteration: 92367
loss: 0.9875174760818481,grad_norm: 0.999999720506847, iteration: 92368
loss: 1.0382591485977173,grad_norm: 0.8585166888263775, iteration: 92369
loss: 0.9939377307891846,grad_norm: 0.9589543902123567, iteration: 92370
loss: 1.313283920288086,grad_norm: 0.9999997759204815, iteration: 92371
loss: 1.0346059799194336,grad_norm: 0.9999996743271391, iteration: 92372
loss: 1.0042250156402588,grad_norm: 0.915958525401366, iteration: 92373
loss: 0.9756352305412292,grad_norm: 0.98121770682287, iteration: 92374
loss: 1.0303821563720703,grad_norm: 1.0000000237609958, iteration: 92375
loss: 1.1311975717544556,grad_norm: 0.9999996585307589, iteration: 92376
loss: 1.0151557922363281,grad_norm: 0.8950925408330686, iteration: 92377
loss: 1.0337988138198853,grad_norm: 0.8178339722126757, iteration: 92378
loss: 1.02058744430542,grad_norm: 0.9999990510283162, iteration: 92379
loss: 1.0350741147994995,grad_norm: 0.999999618153474, iteration: 92380
loss: 1.104539394378662,grad_norm: 0.9999992928255507, iteration: 92381
loss: 1.0994983911514282,grad_norm: 0.9999994754384879, iteration: 92382
loss: 1.082387924194336,grad_norm: 0.9999994779486417, iteration: 92383
loss: 0.9940007925033569,grad_norm: 0.852730742154444, iteration: 92384
loss: 0.9817278385162354,grad_norm: 0.8402476444628861, iteration: 92385
loss: 0.9868780374526978,grad_norm: 0.929676383565857, iteration: 92386
loss: 0.983978807926178,grad_norm: 0.9999990553528109, iteration: 92387
loss: 1.0461143255233765,grad_norm: 0.999999334562682, iteration: 92388
loss: 1.0056875944137573,grad_norm: 0.9999994252299407, iteration: 92389
loss: 1.0422371625900269,grad_norm: 0.9999992785271649, iteration: 92390
loss: 0.9962034821510315,grad_norm: 0.9398895670990138, iteration: 92391
loss: 1.0291613340377808,grad_norm: 0.9999996104698669, iteration: 92392
loss: 1.1180998086929321,grad_norm: 0.9999995736143162, iteration: 92393
loss: 1.021807312965393,grad_norm: 0.9999994128441286, iteration: 92394
loss: 1.0416182279586792,grad_norm: 0.9999992488406969, iteration: 92395
loss: 1.0340996980667114,grad_norm: 0.9239564104019444, iteration: 92396
loss: 0.9970503449440002,grad_norm: 0.9999993204085794, iteration: 92397
loss: 0.9995673298835754,grad_norm: 0.8478974576558278, iteration: 92398
loss: 1.0419672727584839,grad_norm: 0.9999993035504845, iteration: 92399
loss: 1.098235845565796,grad_norm: 0.9999992040906022, iteration: 92400
loss: 1.2229504585266113,grad_norm: 0.9999997878838569, iteration: 92401
loss: 1.004042625427246,grad_norm: 0.9999992416174399, iteration: 92402
loss: 1.0515258312225342,grad_norm: 0.9999991275742565, iteration: 92403
loss: 1.0015169382095337,grad_norm: 0.8571927788915761, iteration: 92404
loss: 1.0059599876403809,grad_norm: 0.7895817033543908, iteration: 92405
loss: 1.048234462738037,grad_norm: 0.8093064781489853, iteration: 92406
loss: 1.008740782737732,grad_norm: 0.9999994013800975, iteration: 92407
loss: 1.051506757736206,grad_norm: 0.9999998903144315, iteration: 92408
loss: 1.0502623319625854,grad_norm: 0.999999023266326, iteration: 92409
loss: 0.9822331666946411,grad_norm: 0.9765966092950511, iteration: 92410
loss: 1.0892208814620972,grad_norm: 0.9999990190748347, iteration: 92411
loss: 0.979331910610199,grad_norm: 0.9339950610965185, iteration: 92412
loss: 1.0807286500930786,grad_norm: 0.999999113957008, iteration: 92413
loss: 0.9951795935630798,grad_norm: 0.890743692290007, iteration: 92414
loss: 0.9842272400856018,grad_norm: 0.8831112523118004, iteration: 92415
loss: 0.9678907990455627,grad_norm: 0.9212409315874422, iteration: 92416
loss: 0.9882746338844299,grad_norm: 0.8250153762046731, iteration: 92417
loss: 0.9768722057342529,grad_norm: 0.9999991523088058, iteration: 92418
loss: 1.1935772895812988,grad_norm: 0.9999995734580432, iteration: 92419
loss: 1.0002387762069702,grad_norm: 0.9999990968429813, iteration: 92420
loss: 1.076432704925537,grad_norm: 0.8028575109249316, iteration: 92421
loss: 0.9914971590042114,grad_norm: 0.9999991016379456, iteration: 92422
loss: 0.9775750637054443,grad_norm: 0.9127074388126282, iteration: 92423
loss: 1.0809357166290283,grad_norm: 0.9999997396973158, iteration: 92424
loss: 0.9988513588905334,grad_norm: 0.9999990729623763, iteration: 92425
loss: 1.0501657724380493,grad_norm: 0.9940700677503637, iteration: 92426
loss: 1.0083909034729004,grad_norm: 0.9127569406027601, iteration: 92427
loss: 0.9770320653915405,grad_norm: 0.9999990598155618, iteration: 92428
loss: 0.9823492765426636,grad_norm: 0.99999911133543, iteration: 92429
loss: 0.9964359998703003,grad_norm: 0.9666460327176701, iteration: 92430
loss: 1.08604896068573,grad_norm: 0.9477546373443482, iteration: 92431
loss: 1.0027573108673096,grad_norm: 0.8785752961540148, iteration: 92432
loss: 1.003366470336914,grad_norm: 0.9999990693107035, iteration: 92433
loss: 0.9868715405464172,grad_norm: 0.999999324547042, iteration: 92434
loss: 1.021641492843628,grad_norm: 0.9999991640925127, iteration: 92435
loss: 0.9829674959182739,grad_norm: 0.8047243613533893, iteration: 92436
loss: 1.047285556793213,grad_norm: 0.999999655395561, iteration: 92437
loss: 1.0255378484725952,grad_norm: 0.999999759579715, iteration: 92438
loss: 1.1163485050201416,grad_norm: 0.9999992064373111, iteration: 92439
loss: 1.0177409648895264,grad_norm: 0.9494666227364038, iteration: 92440
loss: 1.0051497220993042,grad_norm: 0.9226171568028021, iteration: 92441
loss: 1.1711455583572388,grad_norm: 0.999999223414292, iteration: 92442
loss: 1.0180952548980713,grad_norm: 0.8250967445517445, iteration: 92443
loss: 1.0012444257736206,grad_norm: 0.9999996579488473, iteration: 92444
loss: 1.09055757522583,grad_norm: 0.9999995479961749, iteration: 92445
loss: 1.0200196504592896,grad_norm: 0.9364884535644769, iteration: 92446
loss: 1.0206000804901123,grad_norm: 0.9999998098444093, iteration: 92447
loss: 1.0050938129425049,grad_norm: 0.999999365668209, iteration: 92448
loss: 0.9922090768814087,grad_norm: 0.9999995308105043, iteration: 92449
loss: 0.9945186972618103,grad_norm: 0.9999990889405345, iteration: 92450
loss: 0.9961755871772766,grad_norm: 0.9999996464696547, iteration: 92451
loss: 1.0227882862091064,grad_norm: 0.8790815666304483, iteration: 92452
loss: 1.017490029335022,grad_norm: 0.9118581711942284, iteration: 92453
loss: 0.984889805316925,grad_norm: 0.8264170200003098, iteration: 92454
loss: 1.0375484228134155,grad_norm: 0.9999998555653835, iteration: 92455
loss: 0.9704374670982361,grad_norm: 0.9082692832159638, iteration: 92456
loss: 0.9950854778289795,grad_norm: 0.9349705936196591, iteration: 92457
loss: 1.0366030931472778,grad_norm: 0.9999989796424994, iteration: 92458
loss: 1.0252293348312378,grad_norm: 0.9924704148952572, iteration: 92459
loss: 1.0029339790344238,grad_norm: 0.9999991480909175, iteration: 92460
loss: 1.139386534690857,grad_norm: 0.9999999107223646, iteration: 92461
loss: 1.1066818237304688,grad_norm: 0.9999997356555798, iteration: 92462
loss: 1.0628995895385742,grad_norm: 0.885136970126319, iteration: 92463
loss: 1.0119736194610596,grad_norm: 0.8010238487171201, iteration: 92464
loss: 1.0854055881500244,grad_norm: 0.9999999820132195, iteration: 92465
loss: 1.0662909746170044,grad_norm: 0.9999997996079206, iteration: 92466
loss: 0.9842274188995361,grad_norm: 0.9999991846769174, iteration: 92467
loss: 0.980901300907135,grad_norm: 0.8824999888830433, iteration: 92468
loss: 0.9929896593093872,grad_norm: 0.9999999978675442, iteration: 92469
loss: 1.0104587078094482,grad_norm: 0.9533252152975686, iteration: 92470
loss: 0.9686227440834045,grad_norm: 0.9918708106064693, iteration: 92471
loss: 1.1618274450302124,grad_norm: 0.9999996609279511, iteration: 92472
loss: 1.051311731338501,grad_norm: 0.9888019702419805, iteration: 92473
loss: 0.9867005944252014,grad_norm: 0.999999323216384, iteration: 92474
loss: 0.9721994996070862,grad_norm: 0.8400277053262586, iteration: 92475
loss: 1.0337759256362915,grad_norm: 0.999999412514868, iteration: 92476
loss: 0.9728037714958191,grad_norm: 0.9999990738423881, iteration: 92477
loss: 1.0990269184112549,grad_norm: 0.9999995379373103, iteration: 92478
loss: 1.034924864768982,grad_norm: 0.9999995118688575, iteration: 92479
loss: 0.971956193447113,grad_norm: 0.8298119796250283, iteration: 92480
loss: 0.9675484299659729,grad_norm: 0.8707063968373661, iteration: 92481
loss: 0.9741537570953369,grad_norm: 0.9999993140569114, iteration: 92482
loss: 1.2698405981063843,grad_norm: 0.9999991349788936, iteration: 92483
loss: 1.0966840982437134,grad_norm: 0.9999998451192055, iteration: 92484
loss: 0.9572334885597229,grad_norm: 0.7632930841716266, iteration: 92485
loss: 1.1275745630264282,grad_norm: 0.9999993635210446, iteration: 92486
loss: 1.0008307695388794,grad_norm: 0.9999993575854353, iteration: 92487
loss: 1.0267494916915894,grad_norm: 0.9999990627878256, iteration: 92488
loss: 1.0353970527648926,grad_norm: 0.9999992454167207, iteration: 92489
loss: 1.1037797927856445,grad_norm: 0.9999990093062378, iteration: 92490
loss: 0.9922296404838562,grad_norm: 0.9998708245670958, iteration: 92491
loss: 0.9999889135360718,grad_norm: 0.999999134055439, iteration: 92492
loss: 1.0683691501617432,grad_norm: 0.9999990981628604, iteration: 92493
loss: 1.006528377532959,grad_norm: 0.9999991623144627, iteration: 92494
loss: 1.0103641748428345,grad_norm: 0.8990285990098191, iteration: 92495
loss: 1.0046968460083008,grad_norm: 0.9999991337294062, iteration: 92496
loss: 1.0541788339614868,grad_norm: 0.9126388308784456, iteration: 92497
loss: 0.9957138895988464,grad_norm: 0.9999990465558365, iteration: 92498
loss: 1.0512547492980957,grad_norm: 0.7677701842633948, iteration: 92499
loss: 1.0005453824996948,grad_norm: 0.8879086951218732, iteration: 92500
loss: 1.0121570825576782,grad_norm: 0.9999989227496505, iteration: 92501
loss: 1.150114893913269,grad_norm: 0.9999992311478607, iteration: 92502
loss: 1.0535298585891724,grad_norm: 0.9999993752273711, iteration: 92503
loss: 1.0460337400436401,grad_norm: 0.9342046741306098, iteration: 92504
loss: 1.0134305953979492,grad_norm: 0.9999991277277557, iteration: 92505
loss: 1.01727294921875,grad_norm: 0.9999990191657109, iteration: 92506
loss: 1.0293782949447632,grad_norm: 0.9095773079013111, iteration: 92507
loss: 1.0198209285736084,grad_norm: 0.9999990273114288, iteration: 92508
loss: 1.0089747905731201,grad_norm: 0.9830606585820935, iteration: 92509
loss: 1.0133917331695557,grad_norm: 0.8994910177505014, iteration: 92510
loss: 0.9769249558448792,grad_norm: 0.9927611522950408, iteration: 92511
loss: 0.9884431958198547,grad_norm: 0.9999992768738508, iteration: 92512
loss: 1.0242173671722412,grad_norm: 0.9618297736139331, iteration: 92513
loss: 1.0583393573760986,grad_norm: 0.9999994462119496, iteration: 92514
loss: 1.0146385431289673,grad_norm: 0.9999995676431218, iteration: 92515
loss: 1.0405864715576172,grad_norm: 0.9999999828610991, iteration: 92516
loss: 1.0020194053649902,grad_norm: 0.7911595128688297, iteration: 92517
loss: 1.0255476236343384,grad_norm: 0.9181049009253464, iteration: 92518
loss: 0.9960104823112488,grad_norm: 0.9999991908012188, iteration: 92519
loss: 1.0169717073440552,grad_norm: 0.9999992012095269, iteration: 92520
loss: 0.98030024766922,grad_norm: 0.999998977077637, iteration: 92521
loss: 1.0087120532989502,grad_norm: 0.9999994371178395, iteration: 92522
loss: 0.9926280975341797,grad_norm: 0.823557439511158, iteration: 92523
loss: 1.1435563564300537,grad_norm: 0.9999999724319505, iteration: 92524
loss: 1.00099778175354,grad_norm: 0.8445200640692505, iteration: 92525
loss: 1.0320498943328857,grad_norm: 0.9452952844896694, iteration: 92526
loss: 1.0029319524765015,grad_norm: 0.9999990800981571, iteration: 92527
loss: 1.0333865880966187,grad_norm: 0.9999998892638674, iteration: 92528
loss: 0.9835523366928101,grad_norm: 0.9999994405947524, iteration: 92529
loss: 0.9742563366889954,grad_norm: 0.9760247744618008, iteration: 92530
loss: 0.9719225168228149,grad_norm: 0.8418743521113573, iteration: 92531
loss: 0.9954495429992676,grad_norm: 0.9199633995937087, iteration: 92532
loss: 1.0034340620040894,grad_norm: 0.9021161322414585, iteration: 92533
loss: 1.0496548414230347,grad_norm: 0.9999996707609451, iteration: 92534
loss: 1.0485644340515137,grad_norm: 0.9899171653830459, iteration: 92535
loss: 0.9903594851493835,grad_norm: 0.9999996934614117, iteration: 92536
loss: 0.979157567024231,grad_norm: 0.800883828115113, iteration: 92537
loss: 1.0337556600570679,grad_norm: 0.9999998469588326, iteration: 92538
loss: 1.0100163221359253,grad_norm: 0.9999994917870917, iteration: 92539
loss: 0.9982671141624451,grad_norm: 0.8643079376572647, iteration: 92540
loss: 1.097267746925354,grad_norm: 0.9999993581865791, iteration: 92541
loss: 1.0348402261734009,grad_norm: 0.8973502644124833, iteration: 92542
loss: 1.0232372283935547,grad_norm: 0.9999989054650525, iteration: 92543
loss: 1.0826221704483032,grad_norm: 0.9999997998278652, iteration: 92544
loss: 1.0306830406188965,grad_norm: 0.9999998686328468, iteration: 92545
loss: 0.9725705981254578,grad_norm: 0.9564125237163783, iteration: 92546
loss: 1.0078142881393433,grad_norm: 0.9068245929556634, iteration: 92547
loss: 0.9822765588760376,grad_norm: 0.9999996903703634, iteration: 92548
loss: 0.9822242259979248,grad_norm: 0.9999991246885224, iteration: 92549
loss: 0.9976931214332581,grad_norm: 0.933399127015341, iteration: 92550
loss: 0.9927337169647217,grad_norm: 0.8917708639329448, iteration: 92551
loss: 1.0420640707015991,grad_norm: 0.9999992302227726, iteration: 92552
loss: 1.0667953491210938,grad_norm: 0.999999895774339, iteration: 92553
loss: 0.9531302452087402,grad_norm: 0.9587850540290175, iteration: 92554
loss: 1.026536226272583,grad_norm: 0.9977418418837335, iteration: 92555
loss: 1.17868971824646,grad_norm: 0.9999998256292434, iteration: 92556
loss: 1.0435189008712769,grad_norm: 0.9999996831117792, iteration: 92557
loss: 1.0251646041870117,grad_norm: 0.9999993052655151, iteration: 92558
loss: 1.0087991952896118,grad_norm: 0.7654185158259407, iteration: 92559
loss: 1.087173581123352,grad_norm: 0.8958583902463535, iteration: 92560
loss: 1.0846408605575562,grad_norm: 0.9999992986387061, iteration: 92561
loss: 1.017691969871521,grad_norm: 0.8650236412619836, iteration: 92562
loss: 1.0118796825408936,grad_norm: 0.9999995999003474, iteration: 92563
loss: 1.0112651586532593,grad_norm: 0.9999999437333886, iteration: 92564
loss: 1.0284123420715332,grad_norm: 0.9999991107233009, iteration: 92565
loss: 1.0132678747177124,grad_norm: 0.9076327496049313, iteration: 92566
loss: 1.0863585472106934,grad_norm: 0.9999991963838397, iteration: 92567
loss: 1.0191643238067627,grad_norm: 0.9398912531210366, iteration: 92568
loss: 0.9872311353683472,grad_norm: 0.9999991271371028, iteration: 92569
loss: 1.0707212686538696,grad_norm: 0.9999993302990186, iteration: 92570
loss: 1.0302486419677734,grad_norm: 0.9999990911282063, iteration: 92571
loss: 0.995216429233551,grad_norm: 0.9535596417208628, iteration: 92572
loss: 1.029381513595581,grad_norm: 0.9999991973317746, iteration: 92573
loss: 1.005013108253479,grad_norm: 0.8641809207016159, iteration: 92574
loss: 1.2569431066513062,grad_norm: 0.9999995758870585, iteration: 92575
loss: 1.015625,grad_norm: 0.8874852751822312, iteration: 92576
loss: 0.9774124622344971,grad_norm: 0.9999995407285505, iteration: 92577
loss: 1.0089105367660522,grad_norm: 0.9328905438568764, iteration: 92578
loss: 1.1983364820480347,grad_norm: 0.9999997751752975, iteration: 92579
loss: 1.079306960105896,grad_norm: 0.9999992687125467, iteration: 92580
loss: 0.9849920272827148,grad_norm: 0.9999990403010095, iteration: 92581
loss: 1.004716396331787,grad_norm: 0.7980624807397096, iteration: 92582
loss: 1.0061622858047485,grad_norm: 0.9999990799591494, iteration: 92583
loss: 1.0408350229263306,grad_norm: 0.8776755687904554, iteration: 92584
loss: 1.0244401693344116,grad_norm: 0.7834518426150908, iteration: 92585
loss: 1.0534214973449707,grad_norm: 0.9999992941330449, iteration: 92586
loss: 1.0288760662078857,grad_norm: 0.9999990400510895, iteration: 92587
loss: 1.0498872995376587,grad_norm: 0.9999995773673037, iteration: 92588
loss: 1.1631553173065186,grad_norm: 0.9999995824180984, iteration: 92589
loss: 1.0182245969772339,grad_norm: 0.9213273034290468, iteration: 92590
loss: 0.9711272716522217,grad_norm: 0.9999990144727845, iteration: 92591
loss: 1.0048696994781494,grad_norm: 0.9999991875345666, iteration: 92592
loss: 1.001985788345337,grad_norm: 0.9999991856764778, iteration: 92593
loss: 1.043414831161499,grad_norm: 0.9999992848672984, iteration: 92594
loss: 1.0400737524032593,grad_norm: 0.9999990962584882, iteration: 92595
loss: 0.9965641498565674,grad_norm: 0.884933020539368, iteration: 92596
loss: 0.9824517369270325,grad_norm: 0.9999995804798976, iteration: 92597
loss: 1.1022690534591675,grad_norm: 0.9999994467669506, iteration: 92598
loss: 1.0173951387405396,grad_norm: 0.9390257237812529, iteration: 92599
loss: 1.059504747390747,grad_norm: 0.9999999249204854, iteration: 92600
loss: 1.025295615196228,grad_norm: 0.9999998574663815, iteration: 92601
loss: 1.2247045040130615,grad_norm: 0.999999215777259, iteration: 92602
loss: 1.0346004962921143,grad_norm: 0.9999992139470544, iteration: 92603
loss: 0.9941323399543762,grad_norm: 0.9679755179018087, iteration: 92604
loss: 1.0904135704040527,grad_norm: 0.9999997745899972, iteration: 92605
loss: 1.0739527940750122,grad_norm: 0.99999983006992, iteration: 92606
loss: 1.0090794563293457,grad_norm: 0.9647975480552391, iteration: 92607
loss: 1.0367263555526733,grad_norm: 0.999999695155601, iteration: 92608
loss: 1.0578017234802246,grad_norm: 0.9999992297099423, iteration: 92609
loss: 1.0458638668060303,grad_norm: 0.9999991319136892, iteration: 92610
loss: 1.0374655723571777,grad_norm: 0.9999995170297535, iteration: 92611
loss: 1.037549376487732,grad_norm: 0.9999993656873324, iteration: 92612
loss: 1.0259953737258911,grad_norm: 0.9077071217527112, iteration: 92613
loss: 0.9605477452278137,grad_norm: 0.9999992134776646, iteration: 92614
loss: 1.0461246967315674,grad_norm: 0.9999996734738685, iteration: 92615
loss: 0.9955175518989563,grad_norm: 0.7200122220339695, iteration: 92616
loss: 1.079248309135437,grad_norm: 0.9999991837948051, iteration: 92617
loss: 1.1247704029083252,grad_norm: 0.9439886741238106, iteration: 92618
loss: 1.0781443119049072,grad_norm: 0.9999992441984948, iteration: 92619
loss: 0.9890614151954651,grad_norm: 0.9999991607221406, iteration: 92620
loss: 1.0278465747833252,grad_norm: 0.9999993719165577, iteration: 92621
loss: 0.9725127220153809,grad_norm: 0.9154413859923674, iteration: 92622
loss: 1.0217455625534058,grad_norm: 0.9999991438035035, iteration: 92623
loss: 1.0052183866500854,grad_norm: 0.9147078418431093, iteration: 92624
loss: 1.1406630277633667,grad_norm: 0.9999994308201239, iteration: 92625
loss: 1.0523505210876465,grad_norm: 0.9999996726654401, iteration: 92626
loss: 1.1209723949432373,grad_norm: 0.9999995714964486, iteration: 92627
loss: 1.137303113937378,grad_norm: 0.9999998168406692, iteration: 92628
loss: 1.1229102611541748,grad_norm: 0.9999998775945516, iteration: 92629
loss: 0.9715263843536377,grad_norm: 0.9999992297164448, iteration: 92630
loss: 0.9666067361831665,grad_norm: 0.9198531674929735, iteration: 92631
loss: 1.0145503282546997,grad_norm: 0.9999990941968575, iteration: 92632
loss: 1.0600413084030151,grad_norm: 0.9999994548284952, iteration: 92633
loss: 1.0966051816940308,grad_norm: 0.9999993311278343, iteration: 92634
loss: 1.0248374938964844,grad_norm: 0.999999361242185, iteration: 92635
loss: 1.0073364973068237,grad_norm: 0.9522955295794313, iteration: 92636
loss: 1.064980149269104,grad_norm: 0.999999090812456, iteration: 92637
loss: 1.1245969533920288,grad_norm: 0.9999997332033866, iteration: 92638
loss: 1.0130701065063477,grad_norm: 0.9999992401414557, iteration: 92639
loss: 1.008264183998108,grad_norm: 0.8984721453647413, iteration: 92640
loss: 1.000760793685913,grad_norm: 0.8842906415506183, iteration: 92641
loss: 0.9825145602226257,grad_norm: 0.999999094097561, iteration: 92642
loss: 1.0116736888885498,grad_norm: 0.9999991813025597, iteration: 92643
loss: 0.9959200620651245,grad_norm: 0.9999992048851113, iteration: 92644
loss: 1.0745699405670166,grad_norm: 0.9999991930756638, iteration: 92645
loss: 0.9994608759880066,grad_norm: 0.8856125781679063, iteration: 92646
loss: 0.993193507194519,grad_norm: 0.8670274280826412, iteration: 92647
loss: 1.0399436950683594,grad_norm: 0.9999998306984607, iteration: 92648
loss: 1.0458717346191406,grad_norm: 0.8469346340408113, iteration: 92649
loss: 1.0124036073684692,grad_norm: 0.9958529594674426, iteration: 92650
loss: 1.0058238506317139,grad_norm: 0.9785161422983077, iteration: 92651
loss: 1.0520509481430054,grad_norm: 0.9999991327387142, iteration: 92652
loss: 1.0229536294937134,grad_norm: 0.9710710085950449, iteration: 92653
loss: 1.1128544807434082,grad_norm: 0.931259242865823, iteration: 92654
loss: 0.9874783754348755,grad_norm: 0.9999990216953708, iteration: 92655
loss: 0.9489157795906067,grad_norm: 0.9091036663211705, iteration: 92656
loss: 1.0489380359649658,grad_norm: 0.9651480931051634, iteration: 92657
loss: 1.0000659227371216,grad_norm: 0.9999995893989152, iteration: 92658
loss: 1.0700031518936157,grad_norm: 0.999999875541752, iteration: 92659
loss: 0.9692454934120178,grad_norm: 0.8536310789525249, iteration: 92660
loss: 1.0259413719177246,grad_norm: 0.9120852801118747, iteration: 92661
loss: 0.9917795658111572,grad_norm: 0.9999994225374883, iteration: 92662
loss: 1.023935317993164,grad_norm: 0.9999992235346475, iteration: 92663
loss: 1.0218969583511353,grad_norm: 0.8678896264936581, iteration: 92664
loss: 1.083203673362732,grad_norm: 0.99999917122658, iteration: 92665
loss: 1.0122803449630737,grad_norm: 0.9763996920556147, iteration: 92666
loss: 1.0557661056518555,grad_norm: 0.999999513308193, iteration: 92667
loss: 1.0309709310531616,grad_norm: 0.9060444751142716, iteration: 92668
loss: 1.0164859294891357,grad_norm: 0.9840898177163049, iteration: 92669
loss: 0.9606456160545349,grad_norm: 0.838485823052133, iteration: 92670
loss: 1.0428955554962158,grad_norm: 0.9999990764892238, iteration: 92671
loss: 1.0719749927520752,grad_norm: 0.9999997349424613, iteration: 92672
loss: 1.0113842487335205,grad_norm: 0.8486579284555159, iteration: 92673
loss: 1.0332101583480835,grad_norm: 0.9999992078655988, iteration: 92674
loss: 0.9953406453132629,grad_norm: 0.9999994130519426, iteration: 92675
loss: 0.9990973472595215,grad_norm: 0.9999992387720776, iteration: 92676
loss: 1.0357791185379028,grad_norm: 0.9988777112964937, iteration: 92677
loss: 1.0694429874420166,grad_norm: 0.9999991228901631, iteration: 92678
loss: 1.0404151678085327,grad_norm: 0.9999993465421433, iteration: 92679
loss: 1.0563037395477295,grad_norm: 0.962852601484686, iteration: 92680
loss: 1.046927809715271,grad_norm: 0.865958006946976, iteration: 92681
loss: 0.9983102679252625,grad_norm: 0.854263439218637, iteration: 92682
loss: 1.0388330221176147,grad_norm: 0.9999993384144679, iteration: 92683
loss: 1.0957273244857788,grad_norm: 0.9999995119985322, iteration: 92684
loss: 1.040876030921936,grad_norm: 0.99999984486242, iteration: 92685
loss: 1.007337212562561,grad_norm: 0.9999992267899329, iteration: 92686
loss: 0.9811592698097229,grad_norm: 0.9999990402521011, iteration: 92687
loss: 1.0870412588119507,grad_norm: 0.9999991003955954, iteration: 92688
loss: 1.014745831489563,grad_norm: 0.903540169664489, iteration: 92689
loss: 1.0657894611358643,grad_norm: 0.9999998878854407, iteration: 92690
loss: 1.0729708671569824,grad_norm: 0.9504897906358033, iteration: 92691
loss: 0.976026713848114,grad_norm: 0.8802328630438684, iteration: 92692
loss: 1.0164705514907837,grad_norm: 0.9999992587192802, iteration: 92693
loss: 1.0223231315612793,grad_norm: 0.9999992789836389, iteration: 92694
loss: 1.0178964138031006,grad_norm: 0.9432323643872005, iteration: 92695
loss: 1.0565260648727417,grad_norm: 0.9852066129991065, iteration: 92696
loss: 1.0547451972961426,grad_norm: 0.9762053392157121, iteration: 92697
loss: 1.0583293437957764,grad_norm: 0.9999993065333029, iteration: 92698
loss: 1.0183733701705933,grad_norm: 0.9999992945331573, iteration: 92699
loss: 1.000441312789917,grad_norm: 0.9593491708085323, iteration: 92700
loss: 0.9849871397018433,grad_norm: 0.9999989149283705, iteration: 92701
loss: 1.027056097984314,grad_norm: 0.9999996011991569, iteration: 92702
loss: 1.0184351205825806,grad_norm: 0.9247693903810703, iteration: 92703
loss: 1.1208558082580566,grad_norm: 0.9334381259228515, iteration: 92704
loss: 0.9978668093681335,grad_norm: 0.9999992567431691, iteration: 92705
loss: 0.9945186376571655,grad_norm: 0.9999993329537401, iteration: 92706
loss: 1.0388954877853394,grad_norm: 0.9999996125683095, iteration: 92707
loss: 1.1752784252166748,grad_norm: 0.9999998439926876, iteration: 92708
loss: 1.0413594245910645,grad_norm: 0.9999990219706243, iteration: 92709
loss: 1.0489845275878906,grad_norm: 0.769247314569878, iteration: 92710
loss: 1.014683485031128,grad_norm: 0.9999996008850973, iteration: 92711
loss: 1.1214278936386108,grad_norm: 0.9999993004263108, iteration: 92712
loss: 0.9714410901069641,grad_norm: 0.9999991523817682, iteration: 92713
loss: 1.0873878002166748,grad_norm: 0.8739629096146728, iteration: 92714
loss: 1.022522211074829,grad_norm: 0.9999997885725261, iteration: 92715
loss: 1.0125519037246704,grad_norm: 0.9650358273768872, iteration: 92716
loss: 1.0050681829452515,grad_norm: 0.9798894798621824, iteration: 92717
loss: 0.9961073994636536,grad_norm: 0.9999990573790314, iteration: 92718
loss: 1.0200310945510864,grad_norm: 0.9999995789900546, iteration: 92719
loss: 1.0364450216293335,grad_norm: 0.9999991071757846, iteration: 92720
loss: 1.1627382040023804,grad_norm: 0.953830680907226, iteration: 92721
loss: 1.0991644859313965,grad_norm: 0.999999237483775, iteration: 92722
loss: 1.237691044807434,grad_norm: 0.9999995976599197, iteration: 92723
loss: 1.0011529922485352,grad_norm: 0.9999991539390669, iteration: 92724
loss: 1.0304511785507202,grad_norm: 0.9999993451385075, iteration: 92725
loss: 1.054110050201416,grad_norm: 0.932669763689751, iteration: 92726
loss: 1.0816210508346558,grad_norm: 0.9304819632136745, iteration: 92727
loss: 1.0101350545883179,grad_norm: 0.7755020172605632, iteration: 92728
loss: 1.0673909187316895,grad_norm: 0.9999999799661909, iteration: 92729
loss: 1.0721518993377686,grad_norm: 0.9999995785558544, iteration: 92730
loss: 1.0589463710784912,grad_norm: 0.8966424349141239, iteration: 92731
loss: 0.981618344783783,grad_norm: 0.9177512223269879, iteration: 92732
loss: 1.0497124195098877,grad_norm: 0.9999994477949313, iteration: 92733
loss: 1.020297646522522,grad_norm: 0.9999996736708184, iteration: 92734
loss: 1.0785629749298096,grad_norm: 0.9999993866610284, iteration: 92735
loss: 1.0689748525619507,grad_norm: 0.9999999012973236, iteration: 92736
loss: 1.1281768083572388,grad_norm: 0.9999998732319743, iteration: 92737
loss: 1.0816049575805664,grad_norm: 0.9999994648220764, iteration: 92738
loss: 1.029459834098816,grad_norm: 0.9127394360008492, iteration: 92739
loss: 1.0415819883346558,grad_norm: 0.9999991915760562, iteration: 92740
loss: 1.0687915086746216,grad_norm: 0.9999992173623755, iteration: 92741
loss: 1.137830376625061,grad_norm: 0.9043643125423486, iteration: 92742
loss: 1.1354050636291504,grad_norm: 0.9999998243123077, iteration: 92743
loss: 1.030929446220398,grad_norm: 0.9999993634613431, iteration: 92744
loss: 1.1361238956451416,grad_norm: 0.9999993041619955, iteration: 92745
loss: 1.0918166637420654,grad_norm: 0.9999991650560234, iteration: 92746
loss: 1.0623847246170044,grad_norm: 0.7789289594276104, iteration: 92747
loss: 1.0998765230178833,grad_norm: 0.9227191751835286, iteration: 92748
loss: 1.0926105976104736,grad_norm: 0.9999992625542122, iteration: 92749
loss: 0.969894289970398,grad_norm: 0.9999990097476633, iteration: 92750
loss: 1.0824034214019775,grad_norm: 0.9999998867662178, iteration: 92751
loss: 1.024590015411377,grad_norm: 0.9999996181553146, iteration: 92752
loss: 0.9986004829406738,grad_norm: 0.9999995364567966, iteration: 92753
loss: 0.9720448851585388,grad_norm: 0.9999992312007777, iteration: 92754
loss: 1.0701744556427002,grad_norm: 0.9999996963693576, iteration: 92755
loss: 1.062983512878418,grad_norm: 0.9999994740474685, iteration: 92756
loss: 1.0403861999511719,grad_norm: 0.8289886345302029, iteration: 92757
loss: 1.0339783430099487,grad_norm: 0.9999990908129495, iteration: 92758
loss: 1.047088861465454,grad_norm: 0.9304079902578614, iteration: 92759
loss: 1.1120491027832031,grad_norm: 0.9999992406269662, iteration: 92760
loss: 1.0176297426223755,grad_norm: 0.9999990344417282, iteration: 92761
loss: 1.0357880592346191,grad_norm: 0.9382748461389538, iteration: 92762
loss: 1.0727760791778564,grad_norm: 0.9999991665332214, iteration: 92763
loss: 1.0676308870315552,grad_norm: 0.9999995170260185, iteration: 92764
loss: 1.0443536043167114,grad_norm: 0.9999998438542247, iteration: 92765
loss: 1.0071076154708862,grad_norm: 0.8998260746522599, iteration: 92766
loss: 1.0575915575027466,grad_norm: 0.9999996893192139, iteration: 92767
loss: 1.155871868133545,grad_norm: 0.9999994400866565, iteration: 92768
loss: 1.0072036981582642,grad_norm: 0.9999991678972215, iteration: 92769
loss: 1.0114213228225708,grad_norm: 0.9999995842548, iteration: 92770
loss: 1.0236284732818604,grad_norm: 0.9999995196673045, iteration: 92771
loss: 1.0162395238876343,grad_norm: 0.8969710471299254, iteration: 92772
loss: 1.0117563009262085,grad_norm: 0.973487670858543, iteration: 92773
loss: 1.073811650276184,grad_norm: 0.9999999062314845, iteration: 92774
loss: 1.0676287412643433,grad_norm: 0.9999995404094795, iteration: 92775
loss: 1.0245811939239502,grad_norm: 0.9999997251194758, iteration: 92776
loss: 1.0195056200027466,grad_norm: 0.9999990668229718, iteration: 92777
loss: 1.0603376626968384,grad_norm: 0.9999994402328082, iteration: 92778
loss: 1.070574402809143,grad_norm: 0.999999670248191, iteration: 92779
loss: 0.9987481832504272,grad_norm: 0.9999995869629446, iteration: 92780
loss: 1.0795230865478516,grad_norm: 0.9999994002797468, iteration: 92781
loss: 1.0236341953277588,grad_norm: 0.9999992270299819, iteration: 92782
loss: 1.0163674354553223,grad_norm: 0.9858818655920415, iteration: 92783
loss: 1.1397809982299805,grad_norm: 0.999999857877336, iteration: 92784
loss: 0.9823305010795593,grad_norm: 0.9999990915875618, iteration: 92785
loss: 1.023815631866455,grad_norm: 0.9999991732895779, iteration: 92786
loss: 1.0360941886901855,grad_norm: 0.9852491618950587, iteration: 92787
loss: 0.9819952845573425,grad_norm: 0.9265737962214526, iteration: 92788
loss: 1.0251883268356323,grad_norm: 0.9999993161807104, iteration: 92789
loss: 1.0124775171279907,grad_norm: 0.9999991391058436, iteration: 92790
loss: 1.0445621013641357,grad_norm: 0.8240059681029002, iteration: 92791
loss: 1.015547275543213,grad_norm: 0.9876639471819888, iteration: 92792
loss: 1.0335502624511719,grad_norm: 0.9999998125275814, iteration: 92793
loss: 0.9882358908653259,grad_norm: 0.8454656346744326, iteration: 92794
loss: 1.0380024909973145,grad_norm: 0.9999995531271432, iteration: 92795
loss: 0.9785294532775879,grad_norm: 0.9156242264984592, iteration: 92796
loss: 1.1047053337097168,grad_norm: 0.9999997624367994, iteration: 92797
loss: 0.9884976744651794,grad_norm: 0.9442151567504337, iteration: 92798
loss: 1.0848305225372314,grad_norm: 0.9263568741047964, iteration: 92799
loss: 1.0449495315551758,grad_norm: 0.9999990772534982, iteration: 92800
loss: 1.0486868619918823,grad_norm: 0.991669385164444, iteration: 92801
loss: 1.048689603805542,grad_norm: 0.999999710284362, iteration: 92802
loss: 1.0615495443344116,grad_norm: 0.999999173402751, iteration: 92803
loss: 1.1152547597885132,grad_norm: 0.9999997530343774, iteration: 92804
loss: 1.0083926916122437,grad_norm: 0.9153132067833603, iteration: 92805
loss: 0.9851203560829163,grad_norm: 0.8708902080257127, iteration: 92806
loss: 1.068041443824768,grad_norm: 0.8268974230854965, iteration: 92807
loss: 1.0246723890304565,grad_norm: 0.999999975921914, iteration: 92808
loss: 0.9951344132423401,grad_norm: 0.9776514853150362, iteration: 92809
loss: 1.0150264501571655,grad_norm: 0.9999992595748355, iteration: 92810
loss: 0.9803927540779114,grad_norm: 0.999999201493385, iteration: 92811
loss: 1.0625510215759277,grad_norm: 0.9999996112914543, iteration: 92812
loss: 1.0138059854507446,grad_norm: 0.8786287590866263, iteration: 92813
loss: 1.0756597518920898,grad_norm: 0.9999998207519338, iteration: 92814
loss: 1.0220844745635986,grad_norm: 1.000000016324153, iteration: 92815
loss: 1.0065001249313354,grad_norm: 0.9999992027853902, iteration: 92816
loss: 0.9758273363113403,grad_norm: 0.9999995503457779, iteration: 92817
loss: 0.978934109210968,grad_norm: 0.9999991234880727, iteration: 92818
loss: 1.046999216079712,grad_norm: 0.9999999040081052, iteration: 92819
loss: 1.1075568199157715,grad_norm: 0.9999998402633251, iteration: 92820
loss: 1.0816236734390259,grad_norm: 0.9999998266007741, iteration: 92821
loss: 1.0518620014190674,grad_norm: 0.9999997084254201, iteration: 92822
loss: 1.097090721130371,grad_norm: 0.9999994841243883, iteration: 92823
loss: 0.9888826012611389,grad_norm: 0.9088051128572119, iteration: 92824
loss: 0.9977588653564453,grad_norm: 0.9999992427180665, iteration: 92825
loss: 0.999372124671936,grad_norm: 0.9999991295360793, iteration: 92826
loss: 1.0681135654449463,grad_norm: 0.9999996610824221, iteration: 92827
loss: 1.0309901237487793,grad_norm: 0.9435503452726408, iteration: 92828
loss: 1.1597660779953003,grad_norm: 0.9999996897350182, iteration: 92829
loss: 1.1091490983963013,grad_norm: 0.999999881296332, iteration: 92830
loss: 1.069562315940857,grad_norm: 0.9999993316804071, iteration: 92831
loss: 0.9909230470657349,grad_norm: 0.8926618627359172, iteration: 92832
loss: 0.996208667755127,grad_norm: 0.9999995408929773, iteration: 92833
loss: 1.0134278535842896,grad_norm: 0.9999998401610444, iteration: 92834
loss: 1.1153751611709595,grad_norm: 0.9999991466430384, iteration: 92835
loss: 1.091346263885498,grad_norm: 0.8837833310037136, iteration: 92836
loss: 0.9680052995681763,grad_norm: 0.7198852715883253, iteration: 92837
loss: 1.0065042972564697,grad_norm: 0.9999992279783745, iteration: 92838
loss: 1.1345086097717285,grad_norm: 0.9999998240205397, iteration: 92839
loss: 1.0535107851028442,grad_norm: 0.9457753654520525, iteration: 92840
loss: 1.0278587341308594,grad_norm: 0.9999995970237757, iteration: 92841
loss: 1.0771641731262207,grad_norm: 0.9999999434343523, iteration: 92842
loss: 0.9662313461303711,grad_norm: 0.999999116454883, iteration: 92843
loss: 1.0375038385391235,grad_norm: 0.9160076047601426, iteration: 92844
loss: 0.9553602933883667,grad_norm: 0.9442219539034441, iteration: 92845
loss: 1.0528653860092163,grad_norm: 0.9999999151365807, iteration: 92846
loss: 1.1549116373062134,grad_norm: 0.9999999373220501, iteration: 92847
loss: 1.0375562906265259,grad_norm: 0.9999995699373101, iteration: 92848
loss: 1.0035606622695923,grad_norm: 0.7399387413387871, iteration: 92849
loss: 1.1452678442001343,grad_norm: 0.9999996924507847, iteration: 92850
loss: 1.081628441810608,grad_norm: 0.9999994371921228, iteration: 92851
loss: 1.1454216241836548,grad_norm: 0.9999997430498674, iteration: 92852
loss: 1.054677128791809,grad_norm: 0.9999997478302003, iteration: 92853
loss: 1.196203589439392,grad_norm: 0.9999993948383756, iteration: 92854
loss: 0.9958280920982361,grad_norm: 0.8981719227938834, iteration: 92855
loss: 1.1449322700500488,grad_norm: 0.9999991329011894, iteration: 92856
loss: 1.0426256656646729,grad_norm: 0.9999990566282083, iteration: 92857
loss: 1.0519635677337646,grad_norm: 0.9999993841190706, iteration: 92858
loss: 1.0975725650787354,grad_norm: 0.9999995301105882, iteration: 92859
loss: 1.0177617073059082,grad_norm: 0.9999992444762938, iteration: 92860
loss: 1.1284754276275635,grad_norm: 0.9999998855488768, iteration: 92861
loss: 1.0168534517288208,grad_norm: 1.0000000399559312, iteration: 92862
loss: 0.9871372580528259,grad_norm: 0.898485713161922, iteration: 92863
loss: 1.0361689329147339,grad_norm: 0.9999999049504815, iteration: 92864
loss: 1.0284175872802734,grad_norm: 0.8308207903064876, iteration: 92865
loss: 1.0231653451919556,grad_norm: 0.9999994425200394, iteration: 92866
loss: 1.013540506362915,grad_norm: 0.7473372389864086, iteration: 92867
loss: 1.1407634019851685,grad_norm: 0.9999992113780182, iteration: 92868
loss: 1.0563942193984985,grad_norm: 0.9999997729904222, iteration: 92869
loss: 1.1461617946624756,grad_norm: 0.9999994731537889, iteration: 92870
loss: 1.0357130765914917,grad_norm: 0.9999997002935272, iteration: 92871
loss: 1.0706356763839722,grad_norm: 0.9967102918828347, iteration: 92872
loss: 0.9852433204650879,grad_norm: 0.8917127993330862, iteration: 92873
loss: 1.0895256996154785,grad_norm: 0.9999999263299761, iteration: 92874
loss: 0.984015703201294,grad_norm: 0.99999911484564, iteration: 92875
loss: 1.1756728887557983,grad_norm: 0.9999993792388585, iteration: 92876
loss: 1.035447359085083,grad_norm: 0.9999995954085739, iteration: 92877
loss: 1.0634757280349731,grad_norm: 0.9999996883816642, iteration: 92878
loss: 1.2003414630889893,grad_norm: 0.9999999208974201, iteration: 92879
loss: 1.0859414339065552,grad_norm: 0.9999993929815596, iteration: 92880
loss: 1.0737546682357788,grad_norm: 0.9999994636371861, iteration: 92881
loss: 0.9936624765396118,grad_norm: 0.9999992968750337, iteration: 92882
loss: 1.038227915763855,grad_norm: 0.9999997652418166, iteration: 92883
loss: 1.0105341672897339,grad_norm: 0.9999996347026348, iteration: 92884
loss: 1.1719380617141724,grad_norm: 0.9999995824884385, iteration: 92885
loss: 1.0773366689682007,grad_norm: 0.9696861461355187, iteration: 92886
loss: 1.0122389793395996,grad_norm: 0.999999233816742, iteration: 92887
loss: 0.9970436096191406,grad_norm: 0.9999996985389121, iteration: 92888
loss: 1.0030471086502075,grad_norm: 0.9999992400383871, iteration: 92889
loss: 1.0193767547607422,grad_norm: 0.9999998772051506, iteration: 92890
loss: 1.0482126474380493,grad_norm: 0.8608189154044181, iteration: 92891
loss: 1.0219855308532715,grad_norm: 0.9999996177188756, iteration: 92892
loss: 1.0206923484802246,grad_norm: 0.9999996517971419, iteration: 92893
loss: 0.9971685409545898,grad_norm: 0.976935368611176, iteration: 92894
loss: 1.017680287361145,grad_norm: 0.9999999852047862, iteration: 92895
loss: 1.0118361711502075,grad_norm: 0.9999990900996417, iteration: 92896
loss: 1.1485180854797363,grad_norm: 0.9999992049729152, iteration: 92897
loss: 1.0509206056594849,grad_norm: 0.9999996185013482, iteration: 92898
loss: 1.2145428657531738,grad_norm: 0.9999998146670326, iteration: 92899
loss: 1.0433762073516846,grad_norm: 0.911592680858065, iteration: 92900
loss: 1.0460095405578613,grad_norm: 0.938688415244744, iteration: 92901
loss: 1.0233464241027832,grad_norm: 0.7969359485242439, iteration: 92902
loss: 1.087631344795227,grad_norm: 0.8933929732278262, iteration: 92903
loss: 1.0149247646331787,grad_norm: 0.8765708410497802, iteration: 92904
loss: 1.0766887664794922,grad_norm: 0.999999803514708, iteration: 92905
loss: 1.0215760469436646,grad_norm: 0.82651452767104, iteration: 92906
loss: 0.9588152766227722,grad_norm: 0.9366578166159357, iteration: 92907
loss: 0.9871052503585815,grad_norm: 0.8566572244000817, iteration: 92908
loss: 1.1500797271728516,grad_norm: 0.9999997137396495, iteration: 92909
loss: 1.057120680809021,grad_norm: 0.9999993697801068, iteration: 92910
loss: 1.040155291557312,grad_norm: 0.999999585914589, iteration: 92911
loss: 1.0471633672714233,grad_norm: 0.9999992579435483, iteration: 92912
loss: 1.0203473567962646,grad_norm: 0.9422536262430526, iteration: 92913
loss: 1.032250165939331,grad_norm: 0.999999740835203, iteration: 92914
loss: 1.008339762687683,grad_norm: 0.9999996130999348, iteration: 92915
loss: 0.9773637652397156,grad_norm: 0.8301730254711546, iteration: 92916
loss: 1.010968804359436,grad_norm: 0.9319018055073959, iteration: 92917
loss: 1.1118297576904297,grad_norm: 0.9999999769844777, iteration: 92918
loss: 1.0368744134902954,grad_norm: 0.9999990979254052, iteration: 92919
loss: 1.0037469863891602,grad_norm: 0.9523916937748413, iteration: 92920
loss: 1.034218668937683,grad_norm: 0.9736318831570184, iteration: 92921
loss: 1.0037940740585327,grad_norm: 0.8766518757903915, iteration: 92922
loss: 1.004165768623352,grad_norm: 0.9999993483956179, iteration: 92923
loss: 1.0873384475708008,grad_norm: 0.9116085402116618, iteration: 92924
loss: 0.9952828288078308,grad_norm: 0.7944753363333393, iteration: 92925
loss: 1.0977696180343628,grad_norm: 0.9999997392309428, iteration: 92926
loss: 1.0709476470947266,grad_norm: 0.9999990878246354, iteration: 92927
loss: 1.0325254201889038,grad_norm: 0.9801574021314644, iteration: 92928
loss: 1.0037585496902466,grad_norm: 0.8631493342865371, iteration: 92929
loss: 1.070491909980774,grad_norm: 1.0000000266193543, iteration: 92930
loss: 1.0116326808929443,grad_norm: 0.9310031629978446, iteration: 92931
loss: 1.0089377164840698,grad_norm: 0.9999989918263389, iteration: 92932
loss: 0.994637668132782,grad_norm: 0.7802395963034838, iteration: 92933
loss: 1.021536111831665,grad_norm: 0.9999991046784416, iteration: 92934
loss: 0.9849940538406372,grad_norm: 0.8713352972419357, iteration: 92935
loss: 1.033278226852417,grad_norm: 0.999999204012186, iteration: 92936
loss: 1.0874106884002686,grad_norm: 0.9999997588423364, iteration: 92937
loss: 1.044136881828308,grad_norm: 0.9999991959434626, iteration: 92938
loss: 0.9901178479194641,grad_norm: 0.9389594263567859, iteration: 92939
loss: 1.3096727132797241,grad_norm: 0.9999999694748483, iteration: 92940
loss: 1.003574252128601,grad_norm: 0.9999991632511566, iteration: 92941
loss: 1.0846872329711914,grad_norm: 0.9999998180847636, iteration: 92942
loss: 1.0360485315322876,grad_norm: 0.999999117359678, iteration: 92943
loss: 1.03249192237854,grad_norm: 0.9760435671596359, iteration: 92944
loss: 0.9965013265609741,grad_norm: 0.9318084649666944, iteration: 92945
loss: 1.064359426498413,grad_norm: 0.9999993950577077, iteration: 92946
loss: 1.0112553834915161,grad_norm: 0.9412041076762295, iteration: 92947
loss: 1.0677640438079834,grad_norm: 0.9892049386520202, iteration: 92948
loss: 1.1290072202682495,grad_norm: 0.9999999381446949, iteration: 92949
loss: 1.1815831661224365,grad_norm: 1.0000000491304195, iteration: 92950
loss: 1.0188357830047607,grad_norm: 0.8591605262211209, iteration: 92951
loss: 0.9958255290985107,grad_norm: 0.9962025772803129, iteration: 92952
loss: 1.055778980255127,grad_norm: 0.8603263374167213, iteration: 92953
loss: 0.9983543753623962,grad_norm: 0.9139596746973069, iteration: 92954
loss: 1.0516127347946167,grad_norm: 0.999999946488871, iteration: 92955
loss: 0.9541522860527039,grad_norm: 0.9049118938572179, iteration: 92956
loss: 1.0247976779937744,grad_norm: 0.9999992662851479, iteration: 92957
loss: 1.0763094425201416,grad_norm: 0.9847661893312641, iteration: 92958
loss: 1.0364347696304321,grad_norm: 0.9999992208170274, iteration: 92959
loss: 1.009291648864746,grad_norm: 0.8760164329569274, iteration: 92960
loss: 1.0959463119506836,grad_norm: 0.9999996156680281, iteration: 92961
loss: 0.9726741909980774,grad_norm: 0.8664250925269213, iteration: 92962
loss: 1.136121153831482,grad_norm: 0.9999994357301207, iteration: 92963
loss: 1.0506411790847778,grad_norm: 0.9932045665549946, iteration: 92964
loss: 1.1690398454666138,grad_norm: 0.9999994842218363, iteration: 92965
loss: 1.0508755445480347,grad_norm: 0.9999991647748634, iteration: 92966
loss: 1.1021959781646729,grad_norm: 0.8553829876445329, iteration: 92967
loss: 1.0856081247329712,grad_norm: 0.9999999258902361, iteration: 92968
loss: 1.2552319765090942,grad_norm: 0.9999993652503373, iteration: 92969
loss: 1.0747343301773071,grad_norm: 0.9999992194257711, iteration: 92970
loss: 1.0906658172607422,grad_norm: 0.9784736911739369, iteration: 92971
loss: 1.2543630599975586,grad_norm: 0.9999997167680774, iteration: 92972
loss: 1.1385778188705444,grad_norm: 0.9999996293541945, iteration: 92973
loss: 1.0215314626693726,grad_norm: 0.9999991668203065, iteration: 92974
loss: 0.9952282905578613,grad_norm: 0.9183465531975213, iteration: 92975
loss: 1.1196900606155396,grad_norm: 0.9999993094755762, iteration: 92976
loss: 1.016666054725647,grad_norm: 0.9999990021162989, iteration: 92977
loss: 1.0341349840164185,grad_norm: 0.9993607866227794, iteration: 92978
loss: 1.0585508346557617,grad_norm: 0.9999990824973021, iteration: 92979
loss: 1.0311174392700195,grad_norm: 0.922122947234623, iteration: 92980
loss: 1.00239896774292,grad_norm: 0.9999992472165978, iteration: 92981
loss: 1.1981967687606812,grad_norm: 0.9999994148153298, iteration: 92982
loss: 1.0822951793670654,grad_norm: 0.9999995073923039, iteration: 92983
loss: 0.9897798895835876,grad_norm: 0.9958410240329699, iteration: 92984
loss: 1.0052465200424194,grad_norm: 0.9665626714368621, iteration: 92985
loss: 0.9980339407920837,grad_norm: 0.9999992819194377, iteration: 92986
loss: 1.1087803840637207,grad_norm: 1.00000003962633, iteration: 92987
loss: 1.076554536819458,grad_norm: 0.9999991602610022, iteration: 92988
loss: 1.1903586387634277,grad_norm: 0.9999998880675165, iteration: 92989
loss: 1.0572952032089233,grad_norm: 0.9999997189634833, iteration: 92990
loss: 0.9907130599021912,grad_norm: 0.9999989978482394, iteration: 92991
loss: 1.0768917798995972,grad_norm: 0.9999998613508092, iteration: 92992
loss: 1.1801985502243042,grad_norm: 0.9999995270896638, iteration: 92993
loss: 1.1416727304458618,grad_norm: 0.999999837856784, iteration: 92994
loss: 1.0182223320007324,grad_norm: 0.999999134158807, iteration: 92995
loss: 1.2373014688491821,grad_norm: 0.9999995116610021, iteration: 92996
loss: 1.2425645589828491,grad_norm: 0.9999994346008184, iteration: 92997
loss: 1.053401231765747,grad_norm: 0.9784160238791835, iteration: 92998
loss: 1.0688817501068115,grad_norm: 0.9999990589442577, iteration: 92999
loss: 1.5338125228881836,grad_norm: 0.9999999208916887, iteration: 93000
loss: 1.1062973737716675,grad_norm: 0.9999992443093025, iteration: 93001
loss: 1.2386479377746582,grad_norm: 0.9999993667666877, iteration: 93002
loss: 1.0594477653503418,grad_norm: 0.9999991683661138, iteration: 93003
loss: 1.0813018083572388,grad_norm: 0.9999992691623066, iteration: 93004
loss: 1.6058011054992676,grad_norm: 0.9999997399167637, iteration: 93005
loss: 1.2611216306686401,grad_norm: 0.9999998741665198, iteration: 93006
loss: 1.089369535446167,grad_norm: 0.9999993160309008, iteration: 93007
loss: 1.1496058702468872,grad_norm: 0.9999991746716873, iteration: 93008
loss: 1.2506486177444458,grad_norm: 0.9999997800984608, iteration: 93009
loss: 1.0391089916229248,grad_norm: 0.8738480819908039, iteration: 93010
loss: 1.1748952865600586,grad_norm: 0.9999992863658567, iteration: 93011
loss: 1.1439427137374878,grad_norm: 0.9999993263670931, iteration: 93012
loss: 1.0802489519119263,grad_norm: 0.9999991811146005, iteration: 93013
loss: 1.0130367279052734,grad_norm: 0.9999992920489588, iteration: 93014
loss: 1.1298857927322388,grad_norm: 0.9999997963938462, iteration: 93015
loss: 1.0785295963287354,grad_norm: 0.9999994387200964, iteration: 93016
loss: 1.2495237588882446,grad_norm: 0.9999994272780242, iteration: 93017
loss: 1.0972561836242676,grad_norm: 0.99999923907683, iteration: 93018
loss: 1.1625354290008545,grad_norm: 0.9999993414458583, iteration: 93019
loss: 1.2481064796447754,grad_norm: 0.9999993895797695, iteration: 93020
loss: 1.154268741607666,grad_norm: 0.9999992795127696, iteration: 93021
loss: 1.1215064525604248,grad_norm: 0.9999993673010787, iteration: 93022
loss: 1.1840410232543945,grad_norm: 0.9999994300675813, iteration: 93023
loss: 1.242354393005371,grad_norm: 0.9999992764568292, iteration: 93024
loss: 1.1855961084365845,grad_norm: 0.9999998186393593, iteration: 93025
loss: 0.9723663330078125,grad_norm: 0.9542514712196454, iteration: 93026
loss: 1.3491290807724,grad_norm: 0.9999999303189389, iteration: 93027
loss: 1.2088247537612915,grad_norm: 0.9999997239256383, iteration: 93028
loss: 1.051470160484314,grad_norm: 0.9999992461686122, iteration: 93029
loss: 1.4867116212844849,grad_norm: 0.99999982148723, iteration: 93030
loss: 1.2378113269805908,grad_norm: 0.9999993723905877, iteration: 93031
loss: 1.4037688970565796,grad_norm: 1.0000000202801402, iteration: 93032
loss: 1.1030592918395996,grad_norm: 0.9999993398172942, iteration: 93033
loss: 1.0777413845062256,grad_norm: 0.9999992553567641, iteration: 93034
loss: 1.0256460905075073,grad_norm: 0.9800725597759533, iteration: 93035
loss: 0.9741755127906799,grad_norm: 0.9347522555968554, iteration: 93036
loss: 1.0361577272415161,grad_norm: 0.9999991393532646, iteration: 93037
loss: 1.1153032779693604,grad_norm: 0.9999991239343282, iteration: 93038
loss: 0.9894546270370483,grad_norm: 0.9327934275799737, iteration: 93039
loss: 1.1209501028060913,grad_norm: 0.9999992513829989, iteration: 93040
loss: 1.1194744110107422,grad_norm: 0.9999994462543322, iteration: 93041
loss: 1.18795907497406,grad_norm: 0.9999998503041746, iteration: 93042
loss: 1.0539779663085938,grad_norm: 0.999999169686757, iteration: 93043
loss: 0.965732216835022,grad_norm: 0.9999992176813566, iteration: 93044
loss: 1.0227832794189453,grad_norm: 0.9999992206372845, iteration: 93045
loss: 1.0246083736419678,grad_norm: 0.9999991999454326, iteration: 93046
loss: 1.1394487619400024,grad_norm: 0.999999163392458, iteration: 93047
loss: 1.0404423475265503,grad_norm: 0.9999991707569723, iteration: 93048
loss: 1.2493563890457153,grad_norm: 0.9999996823833871, iteration: 93049
loss: 1.282982349395752,grad_norm: 0.9999996705829796, iteration: 93050
loss: 1.1027840375900269,grad_norm: 0.9999990156185444, iteration: 93051
loss: 1.0540566444396973,grad_norm: 0.9999991960413778, iteration: 93052
loss: 1.055090069770813,grad_norm: 0.9999995006689716, iteration: 93053
loss: 1.0141918659210205,grad_norm: 0.9906232237394433, iteration: 93054
loss: 1.112585186958313,grad_norm: 0.9999994950088854, iteration: 93055
loss: 1.0399115085601807,grad_norm: 0.9999991454555288, iteration: 93056
loss: 1.0598469972610474,grad_norm: 0.8489568847248886, iteration: 93057
loss: 1.0545541048049927,grad_norm: 0.9460877352721875, iteration: 93058
loss: 1.0357496738433838,grad_norm: 0.999999426678847, iteration: 93059
loss: 1.344078779220581,grad_norm: 0.9999993551181443, iteration: 93060
loss: 1.0570155382156372,grad_norm: 0.9999990238979634, iteration: 93061
loss: 1.065433144569397,grad_norm: 0.9999994861146508, iteration: 93062
loss: 1.0377596616744995,grad_norm: 0.959604249222005, iteration: 93063
loss: 1.0333610773086548,grad_norm: 0.849282704743075, iteration: 93064
loss: 0.9779964685440063,grad_norm: 0.9999991891307509, iteration: 93065
loss: 1.1191926002502441,grad_norm: 0.87622858079397, iteration: 93066
loss: 1.0424402952194214,grad_norm: 0.8628049754200101, iteration: 93067
loss: 1.0662884712219238,grad_norm: 1.0000000168985237, iteration: 93068
loss: 1.007147192955017,grad_norm: 0.9999997685348673, iteration: 93069
loss: 1.034780740737915,grad_norm: 0.8995342811009793, iteration: 93070
loss: 1.123392105102539,grad_norm: 0.9999990369204141, iteration: 93071
loss: 1.0253796577453613,grad_norm: 0.9999996292645045, iteration: 93072
loss: 1.1335253715515137,grad_norm: 0.99999988063806, iteration: 93073
loss: 1.1204452514648438,grad_norm: 0.9999995346467941, iteration: 93074
loss: 1.0697492361068726,grad_norm: 0.9999994410227317, iteration: 93075
loss: 1.2866969108581543,grad_norm: 0.9999997256220435, iteration: 93076
loss: 1.108009934425354,grad_norm: 0.9999994307278647, iteration: 93077
loss: 1.087851643562317,grad_norm: 0.9999993699200177, iteration: 93078
loss: 1.2112396955490112,grad_norm: 0.9999993177698857, iteration: 93079
loss: 1.0494283437728882,grad_norm: 0.9999996885771988, iteration: 93080
loss: 1.1043972969055176,grad_norm: 0.999999849792372, iteration: 93081
loss: 1.0187487602233887,grad_norm: 0.9999998987244753, iteration: 93082
loss: 1.052631139755249,grad_norm: 0.9999991321330786, iteration: 93083
loss: 1.008480429649353,grad_norm: 0.9999994180191496, iteration: 93084
loss: 1.0056020021438599,grad_norm: 0.9904579457358915, iteration: 93085
loss: 0.9999029040336609,grad_norm: 0.894395516325654, iteration: 93086
loss: 1.0158917903900146,grad_norm: 0.9999994481140676, iteration: 93087
loss: 1.1851576566696167,grad_norm: 0.9999997476455147, iteration: 93088
loss: 1.177364468574524,grad_norm: 0.999999665459523, iteration: 93089
loss: 1.0397394895553589,grad_norm: 0.878559712750421, iteration: 93090
loss: 1.1127240657806396,grad_norm: 0.9999993240090401, iteration: 93091
loss: 1.0073087215423584,grad_norm: 0.9999992568261086, iteration: 93092
loss: 1.095414161682129,grad_norm: 0.9462613194678272, iteration: 93093
loss: 1.0543503761291504,grad_norm: 1.0000000653187047, iteration: 93094
loss: 1.0253663063049316,grad_norm: 0.9999996133392135, iteration: 93095
loss: 1.032333493232727,grad_norm: 0.9056499272799272, iteration: 93096
loss: 1.0575698614120483,grad_norm: 0.9999993073635071, iteration: 93097
loss: 0.9845438599586487,grad_norm: 0.9999993250811238, iteration: 93098
loss: 0.9978557229042053,grad_norm: 0.9999995013230232, iteration: 93099
loss: 1.0559886693954468,grad_norm: 0.9999998790521041, iteration: 93100
loss: 1.0153005123138428,grad_norm: 0.999999932309724, iteration: 93101
loss: 1.0254416465759277,grad_norm: 0.7924839114005341, iteration: 93102
loss: 1.083714485168457,grad_norm: 0.9999993277624893, iteration: 93103
loss: 1.0579591989517212,grad_norm: 0.999999199786938, iteration: 93104
loss: 1.0136864185333252,grad_norm: 0.9999991824498599, iteration: 93105
loss: 1.0629583597183228,grad_norm: 0.9999993326809392, iteration: 93106
loss: 1.0498170852661133,grad_norm: 0.967129022260506, iteration: 93107
loss: 1.040293574333191,grad_norm: 0.9999992473182258, iteration: 93108
loss: 1.011393666267395,grad_norm: 0.9999990931722635, iteration: 93109
loss: 1.0638624429702759,grad_norm: 0.9999998927740564, iteration: 93110
loss: 1.0932230949401855,grad_norm: 0.9994849832087113, iteration: 93111
loss: 1.0144022703170776,grad_norm: 0.9999994064372433, iteration: 93112
loss: 1.0129629373550415,grad_norm: 0.8955234183521086, iteration: 93113
loss: 1.0040651559829712,grad_norm: 0.9294698916031295, iteration: 93114
loss: 1.0589591264724731,grad_norm: 0.9999997231940019, iteration: 93115
loss: 1.0115736722946167,grad_norm: 0.999999595958131, iteration: 93116
loss: 1.0834547281265259,grad_norm: 0.9999993054685934, iteration: 93117
loss: 1.073806881904602,grad_norm: 0.9999991698821236, iteration: 93118
loss: 1.1589998006820679,grad_norm: 0.9999993215549886, iteration: 93119
loss: 1.0845916271209717,grad_norm: 0.999999291411151, iteration: 93120
loss: 1.172694444656372,grad_norm: 0.9999995508340204, iteration: 93121
loss: 1.0041223764419556,grad_norm: 0.9999992410501553, iteration: 93122
loss: 1.0955978631973267,grad_norm: 0.9999993262546715, iteration: 93123
loss: 0.9959206581115723,grad_norm: 0.9504563731008956, iteration: 93124
loss: 1.0560381412506104,grad_norm: 0.9999995913682118, iteration: 93125
loss: 0.9794936776161194,grad_norm: 0.811691019717075, iteration: 93126
loss: 1.0967984199523926,grad_norm: 0.9999998275337503, iteration: 93127
loss: 1.027718424797058,grad_norm: 0.9999991918922893, iteration: 93128
loss: 1.0594242811203003,grad_norm: 0.9999999939086948, iteration: 93129
loss: 1.000558853149414,grad_norm: 0.9006428916614002, iteration: 93130
loss: 1.0793105363845825,grad_norm: 0.9999996981471269, iteration: 93131
loss: 1.0870245695114136,grad_norm: 0.9999999284960756, iteration: 93132
loss: 1.0938105583190918,grad_norm: 0.9999997726679761, iteration: 93133
loss: 1.0404196977615356,grad_norm: 0.999999881952321, iteration: 93134
loss: 1.0960510969161987,grad_norm: 0.9999998738277419, iteration: 93135
loss: 1.0075645446777344,grad_norm: 0.9999995152585017, iteration: 93136
loss: 0.9979785680770874,grad_norm: 0.9527005512422762, iteration: 93137
loss: 1.0115529298782349,grad_norm: 0.9999991394031078, iteration: 93138
loss: 1.020187497138977,grad_norm: 0.9812778199793447, iteration: 93139
loss: 0.9830893278121948,grad_norm: 0.9999990829487606, iteration: 93140
loss: 0.9769616723060608,grad_norm: 0.9587254148184605, iteration: 93141
loss: 0.9836551547050476,grad_norm: 0.9999992657863888, iteration: 93142
loss: 1.024814248085022,grad_norm: 0.9442705037371616, iteration: 93143
loss: 1.0269328355789185,grad_norm: 0.9898379901883064, iteration: 93144
loss: 0.9854810237884521,grad_norm: 0.9999993038374317, iteration: 93145
loss: 0.9879565834999084,grad_norm: 0.8624668851102552, iteration: 93146
loss: 1.0055540800094604,grad_norm: 0.9999994424027661, iteration: 93147
loss: 1.1837104558944702,grad_norm: 0.999999225917008, iteration: 93148
loss: 1.0414572954177856,grad_norm: 0.9999998763431396, iteration: 93149
loss: 0.981133222579956,grad_norm: 0.9999992290317005, iteration: 93150
loss: 1.0976862907409668,grad_norm: 0.9999998988753465, iteration: 93151
loss: 1.0276066064834595,grad_norm: 0.9458086128679289, iteration: 93152
loss: 1.028452754020691,grad_norm: 0.9745574954138393, iteration: 93153
loss: 1.0423153638839722,grad_norm: 0.9999997284860508, iteration: 93154
loss: 1.0099049806594849,grad_norm: 0.9999991033665738, iteration: 93155
loss: 1.0635565519332886,grad_norm: 0.8441110730341076, iteration: 93156
loss: 0.9999911785125732,grad_norm: 0.9999991569658288, iteration: 93157
loss: 1.0281652212142944,grad_norm: 0.9220702285019342, iteration: 93158
loss: 1.017177939414978,grad_norm: 0.9999990783525262, iteration: 93159
loss: 0.9818634986877441,grad_norm: 0.9579434621724822, iteration: 93160
loss: 1.0107558965682983,grad_norm: 0.9999990416186261, iteration: 93161
loss: 1.0107636451721191,grad_norm: 0.8115410677695479, iteration: 93162
loss: 0.9952684640884399,grad_norm: 0.9999992415723559, iteration: 93163
loss: 1.1232538223266602,grad_norm: 0.9999996943924553, iteration: 93164
loss: 1.0465087890625,grad_norm: 0.999999211939681, iteration: 93165
loss: 1.0251421928405762,grad_norm: 0.9999990418904917, iteration: 93166
loss: 0.9908281564712524,grad_norm: 0.8101789059243945, iteration: 93167
loss: 1.0590983629226685,grad_norm: 0.9999997490054774, iteration: 93168
loss: 1.0062956809997559,grad_norm: 0.8874388437332769, iteration: 93169
loss: 1.0400431156158447,grad_norm: 0.9999991944544241, iteration: 93170
loss: 1.0613449811935425,grad_norm: 0.9175597175407139, iteration: 93171
loss: 0.9998838901519775,grad_norm: 0.9999993322331269, iteration: 93172
loss: 0.9809019565582275,grad_norm: 0.8534380799835878, iteration: 93173
loss: 1.0177210569381714,grad_norm: 0.9999990066150896, iteration: 93174
loss: 0.9967752695083618,grad_norm: 0.9999991208722109, iteration: 93175
loss: 0.9997687935829163,grad_norm: 0.9177337769033186, iteration: 93176
loss: 1.0323774814605713,grad_norm: 0.9036784489404951, iteration: 93177
loss: 0.9845859408378601,grad_norm: 0.9015871928465302, iteration: 93178
loss: 1.0781399011611938,grad_norm: 0.9478963487490908, iteration: 93179
loss: 1.039560079574585,grad_norm: 0.9999992946231907, iteration: 93180
loss: 0.9858604073524475,grad_norm: 0.8160328578030461, iteration: 93181
loss: 0.9952633380889893,grad_norm: 0.9999997644701979, iteration: 93182
loss: 1.013598918914795,grad_norm: 0.928734465817325, iteration: 93183
loss: 0.9929028749465942,grad_norm: 0.9886360136755786, iteration: 93184
loss: 0.9787695407867432,grad_norm: 0.8583051820132586, iteration: 93185
loss: 0.966618001461029,grad_norm: 0.853224172484271, iteration: 93186
loss: 0.9860655069351196,grad_norm: 0.9979072798346765, iteration: 93187
loss: 0.9918254613876343,grad_norm: 0.8942606897197957, iteration: 93188
loss: 1.0024542808532715,grad_norm: 0.9999997008948992, iteration: 93189
loss: 1.0493532419204712,grad_norm: 0.9999996529372209, iteration: 93190
loss: 0.9797070026397705,grad_norm: 0.7353479069467046, iteration: 93191
loss: 1.0044317245483398,grad_norm: 0.973636956967587, iteration: 93192
loss: 1.0733176469802856,grad_norm: 0.9999992397035601, iteration: 93193
loss: 1.0288872718811035,grad_norm: 0.9999999330468251, iteration: 93194
loss: 1.211989402770996,grad_norm: 0.999999234320967, iteration: 93195
loss: 1.0178974866867065,grad_norm: 0.9999994580150716, iteration: 93196
loss: 1.0451141595840454,grad_norm: 0.9999990472885832, iteration: 93197
loss: 0.9756816029548645,grad_norm: 0.9999998246928499, iteration: 93198
loss: 0.9605268239974976,grad_norm: 0.8876883430379191, iteration: 93199
loss: 1.04327392578125,grad_norm: 0.9999993216148694, iteration: 93200
loss: 0.9945381283760071,grad_norm: 0.8066795329254691, iteration: 93201
loss: 1.0138903856277466,grad_norm: 0.999998994129109, iteration: 93202
loss: 1.2146490812301636,grad_norm: 0.9999998537181011, iteration: 93203
loss: 0.9737578630447388,grad_norm: 0.9999991174852113, iteration: 93204
loss: 1.172603726387024,grad_norm: 1.000000028311464, iteration: 93205
loss: 1.0207806825637817,grad_norm: 0.8620014003242125, iteration: 93206
loss: 1.123761534690857,grad_norm: 0.9999992022702859, iteration: 93207
loss: 1.0280154943466187,grad_norm: 0.9999993818740147, iteration: 93208
loss: 1.0264854431152344,grad_norm: 0.9765554393823274, iteration: 93209
loss: 1.037704586982727,grad_norm: 0.9999992352215051, iteration: 93210
loss: 1.006513237953186,grad_norm: 0.8615586847406203, iteration: 93211
loss: 1.1103843450546265,grad_norm: 0.9999992689934377, iteration: 93212
loss: 1.032287836074829,grad_norm: 0.9426606461378657, iteration: 93213
loss: 1.118942379951477,grad_norm: 0.9999994971758502, iteration: 93214
loss: 1.0918521881103516,grad_norm: 0.9999989493310275, iteration: 93215
loss: 1.0345426797866821,grad_norm: 0.9999992048180673, iteration: 93216
loss: 1.2552893161773682,grad_norm: 0.9999999251206874, iteration: 93217
loss: 1.2170042991638184,grad_norm: 0.9999998165758209, iteration: 93218
loss: 0.9862002730369568,grad_norm: 0.9999994757146756, iteration: 93219
loss: 1.1217862367630005,grad_norm: 0.9999990229943856, iteration: 93220
loss: 1.0699294805526733,grad_norm: 0.9999997851533274, iteration: 93221
loss: 1.032249927520752,grad_norm: 0.9108109223989495, iteration: 93222
loss: 1.0144070386886597,grad_norm: 0.9999993608847476, iteration: 93223
loss: 1.0589780807495117,grad_norm: 0.9999992702854533, iteration: 93224
loss: 1.135412335395813,grad_norm: 0.9999995220793859, iteration: 93225
loss: 1.2533438205718994,grad_norm: 0.999999789225636, iteration: 93226
loss: 1.0949169397354126,grad_norm: 0.999999869837212, iteration: 93227
loss: 1.0190907716751099,grad_norm: 0.923715619009058, iteration: 93228
loss: 1.0833231210708618,grad_norm: 0.9999990305343733, iteration: 93229
loss: 1.07063889503479,grad_norm: 0.9999990501470247, iteration: 93230
loss: 1.262715458869934,grad_norm: 0.9999998306060716, iteration: 93231
loss: 1.0525397062301636,grad_norm: 0.9999995519225051, iteration: 93232
loss: 1.0127118825912476,grad_norm: 0.999999135962164, iteration: 93233
loss: 0.9926928281784058,grad_norm: 0.9999990173540011, iteration: 93234
loss: 1.0105150938034058,grad_norm: 0.8967409702339321, iteration: 93235
loss: 1.0964027643203735,grad_norm: 0.9999999406873331, iteration: 93236
loss: 1.097959041595459,grad_norm: 0.9949789325811055, iteration: 93237
loss: 0.9751296639442444,grad_norm: 0.8158023781950506, iteration: 93238
loss: 1.1761242151260376,grad_norm: 0.9999995367903772, iteration: 93239
loss: 0.9881113767623901,grad_norm: 0.9994910270310052, iteration: 93240
loss: 1.145943284034729,grad_norm: 0.9999991751381374, iteration: 93241
loss: 1.0064005851745605,grad_norm: 0.8838856513547336, iteration: 93242
loss: 1.0277702808380127,grad_norm: 0.9999997204435198, iteration: 93243
loss: 1.0600242614746094,grad_norm: 0.999999688584709, iteration: 93244
loss: 1.016208529472351,grad_norm: 0.9999991712334867, iteration: 93245
loss: 1.3123830556869507,grad_norm: 0.9999998832983619, iteration: 93246
loss: 1.0423067808151245,grad_norm: 0.9999995376421252, iteration: 93247
loss: 1.0432385206222534,grad_norm: 0.999999717623847, iteration: 93248
loss: 1.050923466682434,grad_norm: 0.9999996159437, iteration: 93249
loss: 1.0658900737762451,grad_norm: 0.999999521428964, iteration: 93250
loss: 1.0404744148254395,grad_norm: 0.9128898099780925, iteration: 93251
loss: 1.0097942352294922,grad_norm: 0.9999996964344151, iteration: 93252
loss: 0.9757256507873535,grad_norm: 0.8245216679600084, iteration: 93253
loss: 1.0076791048049927,grad_norm: 0.9999994710039304, iteration: 93254
loss: 1.1000946760177612,grad_norm: 0.9999993311438621, iteration: 93255
loss: 0.9942563772201538,grad_norm: 0.9999990568683188, iteration: 93256
loss: 1.0627427101135254,grad_norm: 0.9999994346878043, iteration: 93257
loss: 1.077707290649414,grad_norm: 0.9999992475029611, iteration: 93258
loss: 1.034598708152771,grad_norm: 0.9999991022114461, iteration: 93259
loss: 1.1566320657730103,grad_norm: 0.9999994373914084, iteration: 93260
loss: 1.0168657302856445,grad_norm: 0.999999458033109, iteration: 93261
loss: 1.1647648811340332,grad_norm: 0.999999307147098, iteration: 93262
loss: 1.017977237701416,grad_norm: 0.8783803150536079, iteration: 93263
loss: 1.0601518154144287,grad_norm: 0.9999998847712772, iteration: 93264
loss: 1.0622252225875854,grad_norm: 0.9999991911803185, iteration: 93265
loss: 1.0294157266616821,grad_norm: 0.9999992296548766, iteration: 93266
loss: 1.0590499639511108,grad_norm: 0.9855696169384971, iteration: 93267
loss: 1.0402779579162598,grad_norm: 0.999999225601694, iteration: 93268
loss: 1.2553678750991821,grad_norm: 0.9999993642200221, iteration: 93269
loss: 1.3899204730987549,grad_norm: 0.9999993381474079, iteration: 93270
loss: 1.065922498703003,grad_norm: 0.9999994696146347, iteration: 93271
loss: 0.9860522150993347,grad_norm: 0.8841397659314795, iteration: 93272
loss: 1.0886300802230835,grad_norm: 0.9999995827048632, iteration: 93273
loss: 1.2880098819732666,grad_norm: 0.9999996202864511, iteration: 93274
loss: 1.038051962852478,grad_norm: 0.9999993801994915, iteration: 93275
loss: 1.0150790214538574,grad_norm: 0.9845297109158201, iteration: 93276
loss: 1.0201159715652466,grad_norm: 0.9999993249995103, iteration: 93277
loss: 1.7011632919311523,grad_norm: 0.9999998994106399, iteration: 93278
loss: 1.1031076908111572,grad_norm: 0.9999991875670734, iteration: 93279
loss: 1.080466866493225,grad_norm: 0.9999993380536235, iteration: 93280
loss: 1.439606785774231,grad_norm: 0.9999996761052286, iteration: 93281
loss: 1.0095337629318237,grad_norm: 0.9999992421130501, iteration: 93282
loss: 1.2908101081848145,grad_norm: 0.9999999346782559, iteration: 93283
loss: 1.1351202726364136,grad_norm: 0.9999993028109679, iteration: 93284
loss: 1.1762396097183228,grad_norm: 0.9999991868926095, iteration: 93285
loss: 1.4512115716934204,grad_norm: 0.9999998964541335, iteration: 93286
loss: 1.0026575326919556,grad_norm: 0.9999990755123558, iteration: 93287
loss: 1.0693273544311523,grad_norm: 1.0000001082432277, iteration: 93288
loss: 1.3467586040496826,grad_norm: 0.9999998507373872, iteration: 93289
loss: 1.150829792022705,grad_norm: 0.9999990463148831, iteration: 93290
loss: 1.4623221158981323,grad_norm: 0.9999994928862024, iteration: 93291
loss: 1.2914555072784424,grad_norm: 0.9999999530548234, iteration: 93292
loss: 1.0991859436035156,grad_norm: 0.9999992857811144, iteration: 93293
loss: 1.0421454906463623,grad_norm: 0.9999993760313286, iteration: 93294
loss: 1.0063445568084717,grad_norm: 0.8878991855042663, iteration: 93295
loss: 1.0716933012008667,grad_norm: 0.9999994967412367, iteration: 93296
loss: 1.1044825315475464,grad_norm: 0.8840645285148233, iteration: 93297
loss: 1.4100284576416016,grad_norm: 0.9999999256023604, iteration: 93298
loss: 1.187390685081482,grad_norm: 0.9999994831086819, iteration: 93299
loss: 1.0702005624771118,grad_norm: 1.0000000343203346, iteration: 93300
loss: 1.3702768087387085,grad_norm: 0.9999999106221078, iteration: 93301
loss: 1.0234438180923462,grad_norm: 0.9999991811170789, iteration: 93302
loss: 1.1602705717086792,grad_norm: 0.9999992915844379, iteration: 93303
loss: 1.2561225891113281,grad_norm: 1.0000000495087142, iteration: 93304
loss: 1.0203906297683716,grad_norm: 0.9999990429657367, iteration: 93305
loss: 1.1724146604537964,grad_norm: 0.999999975234321, iteration: 93306
loss: 1.080776333808899,grad_norm: 0.999999125775702, iteration: 93307
loss: 1.1153171062469482,grad_norm: 0.9999992379393438, iteration: 93308
loss: 1.3063552379608154,grad_norm: 0.9999997093165102, iteration: 93309
loss: 1.0828145742416382,grad_norm: 0.9999993174095507, iteration: 93310
loss: 1.1052309274673462,grad_norm: 0.9999997027603085, iteration: 93311
loss: 1.1207435131072998,grad_norm: 0.924948480711529, iteration: 93312
loss: 1.2961214780807495,grad_norm: 0.9999991772189456, iteration: 93313
loss: 1.0232876539230347,grad_norm: 0.9894816629291829, iteration: 93314
loss: 1.0124372243881226,grad_norm: 0.9999990170323907, iteration: 93315
loss: 1.1376979351043701,grad_norm: 0.9999994618776664, iteration: 93316
loss: 1.0874876976013184,grad_norm: 0.9853845504175966, iteration: 93317
loss: 1.0675976276397705,grad_norm: 0.9999994077805289, iteration: 93318
loss: 1.0346366167068481,grad_norm: 0.9999995239324095, iteration: 93319
loss: 1.1062101125717163,grad_norm: 0.9999993552966715, iteration: 93320
loss: 1.1862653493881226,grad_norm: 0.9999999680559241, iteration: 93321
loss: 1.121533989906311,grad_norm: 0.9999997432866489, iteration: 93322
loss: 1.2123445272445679,grad_norm: 0.9999999690721402, iteration: 93323
loss: 1.1566574573516846,grad_norm: 0.9999996030685465, iteration: 93324
loss: 1.0090835094451904,grad_norm: 0.9999999418568047, iteration: 93325
loss: 1.00972580909729,grad_norm: 0.999999481638628, iteration: 93326
loss: 1.2661261558532715,grad_norm: 0.9999994972763246, iteration: 93327
loss: 1.132936954498291,grad_norm: 0.9999997234106882, iteration: 93328
loss: 1.5370663404464722,grad_norm: 0.9999999269883157, iteration: 93329
loss: 1.1998250484466553,grad_norm: 0.9999995272121214, iteration: 93330
loss: 1.1285698413848877,grad_norm: 0.9999991553104493, iteration: 93331
loss: 1.0944072008132935,grad_norm: 0.9999997364563055, iteration: 93332
loss: 1.1442302465438843,grad_norm: 0.9999998778546858, iteration: 93333
loss: 1.0330548286437988,grad_norm: 0.9999997613196319, iteration: 93334
loss: 1.1546034812927246,grad_norm: 0.9999998861551385, iteration: 93335
loss: 1.1917370557785034,grad_norm: 0.9999995615523792, iteration: 93336
loss: 1.602004885673523,grad_norm: 0.9999998822078322, iteration: 93337
loss: 1.1789783239364624,grad_norm: 0.9999990570169699, iteration: 93338
loss: 1.1504685878753662,grad_norm: 0.9999999893463963, iteration: 93339
loss: 1.0367335081100464,grad_norm: 0.983042750118535, iteration: 93340
loss: 1.1706650257110596,grad_norm: 0.9999996835169342, iteration: 93341
loss: 0.996766984462738,grad_norm: 0.9999990124077981, iteration: 93342
loss: 1.13080632686615,grad_norm: 0.9999994536093257, iteration: 93343
loss: 1.1000739336013794,grad_norm: 0.9999997791373222, iteration: 93344
loss: 1.2159074544906616,grad_norm: 0.9999992620857776, iteration: 93345
loss: 1.4775722026824951,grad_norm: 0.9999999193952239, iteration: 93346
loss: 1.1342015266418457,grad_norm: 0.999999408039926, iteration: 93347
loss: 1.6125097274780273,grad_norm: 0.9999997429605965, iteration: 93348
loss: 1.1277762651443481,grad_norm: 0.9999996366979225, iteration: 93349
loss: 1.0451841354370117,grad_norm: 0.9720278989270436, iteration: 93350
loss: 1.135036826133728,grad_norm: 0.9999996146493156, iteration: 93351
loss: 0.9843650460243225,grad_norm: 0.9999998051927494, iteration: 93352
loss: 1.24079167842865,grad_norm: 0.9999995062017351, iteration: 93353
loss: 1.3157533407211304,grad_norm: 0.9999999350735177, iteration: 93354
loss: 1.3326681852340698,grad_norm: 1.0000000576213455, iteration: 93355
loss: 1.5804306268692017,grad_norm: 0.9999995564003981, iteration: 93356
loss: 1.306083083152771,grad_norm: 0.9999991630029914, iteration: 93357
loss: 1.2856872081756592,grad_norm: 0.9999996792049954, iteration: 93358
loss: 1.484880805015564,grad_norm: 0.9999999956254549, iteration: 93359
loss: 1.1132755279541016,grad_norm: 0.9999996745601211, iteration: 93360
loss: 2.089822769165039,grad_norm: 0.9999999371591617, iteration: 93361
loss: 1.3126561641693115,grad_norm: 0.9999998766777205, iteration: 93362
loss: 1.0095553398132324,grad_norm: 0.9455852609675448, iteration: 93363
loss: 1.181559443473816,grad_norm: 0.9999999261111823, iteration: 93364
loss: 1.331336259841919,grad_norm: 0.9999994282886557, iteration: 93365
loss: 1.205793857574463,grad_norm: 0.9999995629253466, iteration: 93366
loss: 1.090211272239685,grad_norm: 0.9999993769989742, iteration: 93367
loss: 1.3910776376724243,grad_norm: 1.0000000256059434, iteration: 93368
loss: 1.4270936250686646,grad_norm: 0.9999998558030977, iteration: 93369
loss: 1.2525184154510498,grad_norm: 1.0000000107462759, iteration: 93370
loss: 1.1505082845687866,grad_norm: 0.999999230969154, iteration: 93371
loss: 1.000898838043213,grad_norm: 0.9999990906449243, iteration: 93372
loss: 1.1423767805099487,grad_norm: 0.9999998670584843, iteration: 93373
loss: 1.2024743556976318,grad_norm: 0.999999843508287, iteration: 93374
loss: 1.1303362846374512,grad_norm: 0.9999999091071388, iteration: 93375
loss: 1.0885893106460571,grad_norm: 0.9999994307623896, iteration: 93376
loss: 1.1903353929519653,grad_norm: 0.9999997487632587, iteration: 93377
loss: 1.0603435039520264,grad_norm: 0.9544076461255291, iteration: 93378
loss: 1.0445315837860107,grad_norm: 0.9999995357930449, iteration: 93379
loss: 1.076798915863037,grad_norm: 0.9999994299561612, iteration: 93380
loss: 1.2551029920578003,grad_norm: 0.9999995191183769, iteration: 93381
loss: 1.1310170888900757,grad_norm: 0.9999997867054583, iteration: 93382
loss: 1.0953991413116455,grad_norm: 0.9999991881677285, iteration: 93383
loss: 1.0571235418319702,grad_norm: 0.9999999837539046, iteration: 93384
loss: 1.0961235761642456,grad_norm: 1.0000000333812598, iteration: 93385
loss: 1.014839768409729,grad_norm: 0.9698149211431752, iteration: 93386
loss: 1.0344910621643066,grad_norm: 0.9999998811317439, iteration: 93387
loss: 1.0693696737289429,grad_norm: 0.9999998497026488, iteration: 93388
loss: 1.2298184633255005,grad_norm: 0.9999996517268684, iteration: 93389
loss: 1.519511342048645,grad_norm: 0.999999934314531, iteration: 93390
loss: 1.0386590957641602,grad_norm: 0.9618708262867977, iteration: 93391
loss: 1.283273696899414,grad_norm: 1.000000085188537, iteration: 93392
loss: 1.0965005159378052,grad_norm: 0.9999990135443376, iteration: 93393
loss: 1.4688236713409424,grad_norm: 0.9999997497585236, iteration: 93394
loss: 1.303605318069458,grad_norm: 0.9999994285394626, iteration: 93395
loss: 1.0598413944244385,grad_norm: 0.9741168548690703, iteration: 93396
loss: 1.2786891460418701,grad_norm: 0.9999997881061511, iteration: 93397
loss: 1.1348860263824463,grad_norm: 0.9999999005566149, iteration: 93398
loss: 1.1893954277038574,grad_norm: 1.000000027200856, iteration: 93399
loss: 1.141797661781311,grad_norm: 0.9999992831687439, iteration: 93400
loss: 1.019680380821228,grad_norm: 0.9999994339284413, iteration: 93401
loss: 1.1177942752838135,grad_norm: 0.9999995873371512, iteration: 93402
loss: 1.4282656908035278,grad_norm: 0.9999999264095537, iteration: 93403
loss: 1.0719501972198486,grad_norm: 0.9999991682978125, iteration: 93404
loss: 1.3087869882583618,grad_norm: 0.999999917406315, iteration: 93405
loss: 1.0806946754455566,grad_norm: 0.9999998427343868, iteration: 93406
loss: 1.1703780889511108,grad_norm: 0.9999996633481876, iteration: 93407
loss: 1.0539462566375732,grad_norm: 0.9999993973934509, iteration: 93408
loss: 0.9653472304344177,grad_norm: 0.9321090558771308, iteration: 93409
loss: 1.4570280313491821,grad_norm: 0.9999996468308308, iteration: 93410
loss: 1.3182504177093506,grad_norm: 0.9999999043270165, iteration: 93411
loss: 1.0532214641571045,grad_norm: 0.9999996130170913, iteration: 93412
loss: 1.0512219667434692,grad_norm: 0.9999992091485298, iteration: 93413
loss: 1.0392519235610962,grad_norm: 0.9999990982476074, iteration: 93414
loss: 1.0146974325180054,grad_norm: 0.999999059217374, iteration: 93415
loss: 0.9973510503768921,grad_norm: 1.0000000183323234, iteration: 93416
loss: 1.3419266939163208,grad_norm: 0.9999999024573112, iteration: 93417
loss: 1.1870217323303223,grad_norm: 0.9999998739943993, iteration: 93418
loss: 1.1129276752471924,grad_norm: 0.9999997645177908, iteration: 93419
loss: 1.0390794277191162,grad_norm: 0.8780549633621042, iteration: 93420
loss: 1.212889313697815,grad_norm: 1.0000000817767134, iteration: 93421
loss: 1.1548765897750854,grad_norm: 0.9999998979554179, iteration: 93422
loss: 0.9385298490524292,grad_norm: 0.9999990624640822, iteration: 93423
loss: 1.020369291305542,grad_norm: 0.9999993629726637, iteration: 93424
loss: 1.3918882608413696,grad_norm: 0.9999998807734684, iteration: 93425
loss: 1.1691758632659912,grad_norm: 0.9999998684280541, iteration: 93426
loss: 0.9669021964073181,grad_norm: 0.9999990054179929, iteration: 93427
loss: 1.1458852291107178,grad_norm: 0.9999995921716409, iteration: 93428
loss: 1.094749927520752,grad_norm: 0.9999995850385498, iteration: 93429
loss: 1.3488842248916626,grad_norm: 0.9999995033311829, iteration: 93430
loss: 1.2228120565414429,grad_norm: 0.9999999397669272, iteration: 93431
loss: 1.1525603532791138,grad_norm: 0.9999998552522569, iteration: 93432
loss: 1.0266212224960327,grad_norm: 0.9999991545173039, iteration: 93433
loss: 1.0019588470458984,grad_norm: 0.999999516054836, iteration: 93434
loss: 1.0568172931671143,grad_norm: 0.9500887721491035, iteration: 93435
loss: 1.0212913751602173,grad_norm: 0.9999997805216337, iteration: 93436
loss: 1.134021282196045,grad_norm: 0.9999997078647248, iteration: 93437
loss: 1.1832941770553589,grad_norm: 0.9999998360791849, iteration: 93438
loss: 0.980769157409668,grad_norm: 0.9999992215416557, iteration: 93439
loss: 1.1296756267547607,grad_norm: 0.9999999790012657, iteration: 93440
loss: 1.0866276025772095,grad_norm: 0.9999997271911572, iteration: 93441
loss: 1.0035614967346191,grad_norm: 0.9748250503674061, iteration: 93442
loss: 1.0138295888900757,grad_norm: 0.999999709507448, iteration: 93443
loss: 1.1377863883972168,grad_norm: 0.9999992774013347, iteration: 93444
loss: 1.263840675354004,grad_norm: 0.999999834404507, iteration: 93445
loss: 1.0609248876571655,grad_norm: 0.9999994522172926, iteration: 93446
loss: 1.1352901458740234,grad_norm: 0.9999996974505386, iteration: 93447
loss: 1.033887505531311,grad_norm: 0.9999999146407557, iteration: 93448
loss: 0.9931648373603821,grad_norm: 0.8752882651433741, iteration: 93449
loss: 1.3209723234176636,grad_norm: 0.999999924525894, iteration: 93450
loss: 1.1085633039474487,grad_norm: 0.9999990507769951, iteration: 93451
loss: 1.312225341796875,grad_norm: 1.000000050195552, iteration: 93452
loss: 1.3678808212280273,grad_norm: 0.9999999751909345, iteration: 93453
loss: 1.6264015436172485,grad_norm: 0.9999998250288807, iteration: 93454
loss: 1.3999823331832886,grad_norm: 0.99999987665978, iteration: 93455
loss: 1.7257570028305054,grad_norm: 0.9999999525268043, iteration: 93456
loss: 1.1133326292037964,grad_norm: 0.9999998707403137, iteration: 93457
loss: 1.1747289896011353,grad_norm: 0.9999999834675855, iteration: 93458
loss: 1.1365697383880615,grad_norm: 0.9999997788721052, iteration: 93459
loss: 1.1312116384506226,grad_norm: 0.9999998577907107, iteration: 93460
loss: 1.1507554054260254,grad_norm: 0.9999998504225247, iteration: 93461
loss: 1.0670018196105957,grad_norm: 1.0000000160614932, iteration: 93462
loss: 1.1243072748184204,grad_norm: 0.9999998300096367, iteration: 93463
loss: 1.0238678455352783,grad_norm: 0.9999999442854112, iteration: 93464
loss: 1.0945512056350708,grad_norm: 0.9999998749713397, iteration: 93465
loss: 1.0185396671295166,grad_norm: 0.9510192671947553, iteration: 93466
loss: 1.1856379508972168,grad_norm: 0.9999996460785829, iteration: 93467
loss: 1.0234084129333496,grad_norm: 0.9999993603007871, iteration: 93468
loss: 1.1363755464553833,grad_norm: 0.99999956458139, iteration: 93469
loss: 1.1948615312576294,grad_norm: 0.9999999518194942, iteration: 93470
loss: 0.9936391711235046,grad_norm: 0.9999994776109099, iteration: 93471
loss: 1.0782617330551147,grad_norm: 0.999999161043473, iteration: 93472
loss: 1.0261483192443848,grad_norm: 0.9999996858032697, iteration: 93473
loss: 1.1324917078018188,grad_norm: 0.9999991889297288, iteration: 93474
loss: 1.2231172323226929,grad_norm: 0.9999997459006127, iteration: 93475
loss: 1.0310455560684204,grad_norm: 0.9999990380356027, iteration: 93476
loss: 0.9924972057342529,grad_norm: 0.871371114433188, iteration: 93477
loss: 1.2557549476623535,grad_norm: 0.9999996718777662, iteration: 93478
loss: 1.0072426795959473,grad_norm: 0.99999933186624, iteration: 93479
loss: 1.1028945446014404,grad_norm: 0.999999824304073, iteration: 93480
loss: 1.051066279411316,grad_norm: 0.9999994677362274, iteration: 93481
loss: 1.0479656457901,grad_norm: 0.9310263713970055, iteration: 93482
loss: 1.0436040163040161,grad_norm: 0.9999992100415842, iteration: 93483
loss: 1.020662546157837,grad_norm: 0.7856381284774094, iteration: 93484
loss: 1.1126463413238525,grad_norm: 0.9999999688058806, iteration: 93485
loss: 1.0047311782836914,grad_norm: 0.9999990793907888, iteration: 93486
loss: 0.9814966320991516,grad_norm: 0.9999989916670339, iteration: 93487
loss: 1.0265262126922607,grad_norm: 0.9999997473884643, iteration: 93488
loss: 0.9993866086006165,grad_norm: 0.7505373155034722, iteration: 93489
loss: 1.0116779804229736,grad_norm: 0.999999265617145, iteration: 93490
loss: 1.104036569595337,grad_norm: 0.9999997721592914, iteration: 93491
loss: 1.0397182703018188,grad_norm: 0.9999997635485177, iteration: 93492
loss: 1.0776976346969604,grad_norm: 0.9999995027843517, iteration: 93493
loss: 1.06857168674469,grad_norm: 0.9999989972158306, iteration: 93494
loss: 1.2003995180130005,grad_norm: 0.999999878599681, iteration: 93495
loss: 1.0105526447296143,grad_norm: 0.9999992027398119, iteration: 93496
loss: 1.122536063194275,grad_norm: 0.9999991713614534, iteration: 93497
loss: 1.012997031211853,grad_norm: 0.9777994276675158, iteration: 93498
loss: 0.9978232383728027,grad_norm: 0.8494742861092761, iteration: 93499
loss: 1.004072904586792,grad_norm: 0.9999992667371592, iteration: 93500
loss: 1.0186392068862915,grad_norm: 0.967599598629548, iteration: 93501
loss: 0.9888096451759338,grad_norm: 0.8194338456594233, iteration: 93502
loss: 1.074034571647644,grad_norm: 0.9146206277554438, iteration: 93503
loss: 1.0194098949432373,grad_norm: 0.9261005280258047, iteration: 93504
loss: 1.0301704406738281,grad_norm: 0.9999993580272304, iteration: 93505
loss: 1.1527972221374512,grad_norm: 0.9999998271006083, iteration: 93506
loss: 0.9751225113868713,grad_norm: 0.9826108948047169, iteration: 93507
loss: 1.0322171449661255,grad_norm: 0.99999978872537, iteration: 93508
loss: 1.1381067037582397,grad_norm: 0.9999996591997011, iteration: 93509
loss: 1.10078763961792,grad_norm: 0.9999997436090182, iteration: 93510
loss: 1.0244301557540894,grad_norm: 0.9999999959452601, iteration: 93511
loss: 0.9995749592781067,grad_norm: 0.9862191459513181, iteration: 93512
loss: 1.1865581274032593,grad_norm: 0.9999999182247473, iteration: 93513
loss: 1.075842261314392,grad_norm: 0.9999993730121862, iteration: 93514
loss: 0.9710407257080078,grad_norm: 0.9999997241033478, iteration: 93515
loss: 0.9602667093276978,grad_norm: 0.8026492633692335, iteration: 93516
loss: 1.0445265769958496,grad_norm: 0.9999993106108671, iteration: 93517
loss: 1.01730215549469,grad_norm: 0.8500795683103203, iteration: 93518
loss: 0.998420238494873,grad_norm: 0.9999997820023098, iteration: 93519
loss: 0.998440682888031,grad_norm: 0.999999359833678, iteration: 93520
loss: 1.097007393836975,grad_norm: 0.9999993497675297, iteration: 93521
loss: 1.1001086235046387,grad_norm: 0.9999997906042262, iteration: 93522
loss: 1.0527565479278564,grad_norm: 0.9999991371748987, iteration: 93523
loss: 1.0691797733306885,grad_norm: 0.9707468289295932, iteration: 93524
loss: 1.0225449800491333,grad_norm: 0.8984969247463617, iteration: 93525
loss: 1.014793872833252,grad_norm: 0.9126764768419541, iteration: 93526
loss: 0.9852131009101868,grad_norm: 0.999998985354041, iteration: 93527
loss: 0.9982210397720337,grad_norm: 0.9154571113100566, iteration: 93528
loss: 1.0390156507492065,grad_norm: 0.8710112274552357, iteration: 93529
loss: 1.0967919826507568,grad_norm: 0.9999993621695715, iteration: 93530
loss: 1.0359867811203003,grad_norm: 0.9999991271350401, iteration: 93531
loss: 0.998285710811615,grad_norm: 0.9850056477282604, iteration: 93532
loss: 0.996417224407196,grad_norm: 0.9765025822762594, iteration: 93533
loss: 1.0096464157104492,grad_norm: 0.9679663805388582, iteration: 93534
loss: 0.9906553030014038,grad_norm: 0.8705371987231126, iteration: 93535
loss: 1.062026858329773,grad_norm: 0.9895039720807349, iteration: 93536
loss: 1.016494631767273,grad_norm: 0.999999591322855, iteration: 93537
loss: 0.9779582023620605,grad_norm: 0.9999995717664774, iteration: 93538
loss: 1.009979248046875,grad_norm: 0.8747732622330257, iteration: 93539
loss: 1.0554485321044922,grad_norm: 0.9298005370466071, iteration: 93540
loss: 1.0501445531845093,grad_norm: 0.9568696454688627, iteration: 93541
loss: 0.9942471981048584,grad_norm: 0.8452259254423675, iteration: 93542
loss: 1.0617331266403198,grad_norm: 0.999999045225931, iteration: 93543
loss: 0.9949620366096497,grad_norm: 0.9999996786954797, iteration: 93544
loss: 0.9937425851821899,grad_norm: 0.9887613635766751, iteration: 93545
loss: 1.0346227884292603,grad_norm: 0.9999990374048453, iteration: 93546
loss: 1.0802578926086426,grad_norm: 0.9999994915504562, iteration: 93547
loss: 1.010465145111084,grad_norm: 0.999999890739116, iteration: 93548
loss: 1.0027543306350708,grad_norm: 0.9999991963173162, iteration: 93549
loss: 1.0662301778793335,grad_norm: 0.8851285971273479, iteration: 93550
loss: 1.0213005542755127,grad_norm: 0.9999989726427353, iteration: 93551
loss: 1.0695862770080566,grad_norm: 0.958766867376732, iteration: 93552
loss: 1.027486801147461,grad_norm: 0.9596657695855189, iteration: 93553
loss: 0.957655668258667,grad_norm: 0.8986875087545398, iteration: 93554
loss: 0.9949493408203125,grad_norm: 0.9999995676283576, iteration: 93555
loss: 1.0315150022506714,grad_norm: 0.9999990486252404, iteration: 93556
loss: 0.9669730067253113,grad_norm: 0.9999996320265534, iteration: 93557
loss: 0.9936162233352661,grad_norm: 0.9999990226262491, iteration: 93558
loss: 1.0029833316802979,grad_norm: 0.9118335635983913, iteration: 93559
loss: 1.0194376707077026,grad_norm: 0.8860666898627904, iteration: 93560
loss: 1.0225694179534912,grad_norm: 0.9767129829120754, iteration: 93561
loss: 1.0679757595062256,grad_norm: 0.9999991807374735, iteration: 93562
loss: 0.9898782968521118,grad_norm: 0.8242705318269488, iteration: 93563
loss: 0.9546356201171875,grad_norm: 0.9999990585783047, iteration: 93564
loss: 0.9658728837966919,grad_norm: 0.9438589490355337, iteration: 93565
loss: 0.9659250378608704,grad_norm: 0.8884323937837407, iteration: 93566
loss: 1.031506061553955,grad_norm: 0.9159360549367088, iteration: 93567
loss: 0.9638943672180176,grad_norm: 0.7852250458164712, iteration: 93568
loss: 0.9780822396278381,grad_norm: 0.8681453861795402, iteration: 93569
loss: 1.0140950679779053,grad_norm: 0.9999995413315904, iteration: 93570
loss: 0.9622706770896912,grad_norm: 0.9999989638220553, iteration: 93571
loss: 1.0139437913894653,grad_norm: 0.7906627794492995, iteration: 93572
loss: 1.0514991283416748,grad_norm: 0.9126680076434157, iteration: 93573
loss: 1.0579746961593628,grad_norm: 0.9999990877233221, iteration: 93574
loss: 0.998295783996582,grad_norm: 0.9158226600803709, iteration: 93575
loss: 0.974102795124054,grad_norm: 0.9476145661010652, iteration: 93576
loss: 1.0192276239395142,grad_norm: 0.9999992017956933, iteration: 93577
loss: 1.050701379776001,grad_norm: 0.9999996575196493, iteration: 93578
loss: 0.993092954158783,grad_norm: 1.000000046070564, iteration: 93579
loss: 0.9900988936424255,grad_norm: 0.9601850773735694, iteration: 93580
loss: 1.0164713859558105,grad_norm: 0.9999993333805538, iteration: 93581
loss: 0.986705482006073,grad_norm: 0.900714877050585, iteration: 93582
loss: 1.0312052965164185,grad_norm: 0.9999997824026134, iteration: 93583
loss: 1.0390626192092896,grad_norm: 0.8939997067537397, iteration: 93584
loss: 0.994655430316925,grad_norm: 0.9632722921586773, iteration: 93585
loss: 1.047271966934204,grad_norm: 0.9999992033229058, iteration: 93586
loss: 0.9986178278923035,grad_norm: 0.8410509321515692, iteration: 93587
loss: 1.0010204315185547,grad_norm: 0.9999989726571522, iteration: 93588
loss: 1.0292670726776123,grad_norm: 0.999999847879247, iteration: 93589
loss: 1.0395989418029785,grad_norm: 0.9999991263281822, iteration: 93590
loss: 1.0125328302383423,grad_norm: 0.9667966061325, iteration: 93591
loss: 0.9991610050201416,grad_norm: 0.9999993886213909, iteration: 93592
loss: 0.9961116909980774,grad_norm: 0.99999908384396, iteration: 93593
loss: 0.9969925284385681,grad_norm: 0.7708411408927961, iteration: 93594
loss: 1.0328986644744873,grad_norm: 0.9010381657560772, iteration: 93595
loss: 0.999427855014801,grad_norm: 0.9283979075844859, iteration: 93596
loss: 0.9909114241600037,grad_norm: 0.9999993609758999, iteration: 93597
loss: 1.0242468118667603,grad_norm: 0.8613561497825246, iteration: 93598
loss: 1.0204966068267822,grad_norm: 0.999999872910198, iteration: 93599
loss: 1.0294573307037354,grad_norm: 0.9589734626134873, iteration: 93600
loss: 0.9930983185768127,grad_norm: 0.9801756773452918, iteration: 93601
loss: 1.0731918811798096,grad_norm: 0.9999996803363905, iteration: 93602
loss: 0.9857760667800903,grad_norm: 0.9705074603202308, iteration: 93603
loss: 1.0390987396240234,grad_norm: 0.9999993713907651, iteration: 93604
loss: 1.0349055528640747,grad_norm: 0.8158413530849075, iteration: 93605
loss: 1.047299861907959,grad_norm: 0.9999991991548435, iteration: 93606
loss: 0.9826729893684387,grad_norm: 0.9999991508335435, iteration: 93607
loss: 0.9629117846488953,grad_norm: 0.814037118454034, iteration: 93608
loss: 0.9923969507217407,grad_norm: 0.9999990742887486, iteration: 93609
loss: 1.0153532028198242,grad_norm: 0.9999996784555899, iteration: 93610
loss: 0.9855164885520935,grad_norm: 0.9999994928976439, iteration: 93611
loss: 1.0479398965835571,grad_norm: 0.9999997185646221, iteration: 93612
loss: 1.114578127861023,grad_norm: 0.9999992600022348, iteration: 93613
loss: 1.0042046308517456,grad_norm: 0.8589821639461775, iteration: 93614
loss: 0.9844055771827698,grad_norm: 0.730587917117617, iteration: 93615
loss: 1.0257164239883423,grad_norm: 0.9999994208211667, iteration: 93616
loss: 1.1130701303482056,grad_norm: 0.9999990839038984, iteration: 93617
loss: 1.0644400119781494,grad_norm: 0.9999998059726578, iteration: 93618
loss: 0.971095085144043,grad_norm: 0.9999992017857978, iteration: 93619
loss: 1.099433422088623,grad_norm: 0.9999996070848256, iteration: 93620
loss: 1.027647614479065,grad_norm: 0.9182974524054336, iteration: 93621
loss: 0.9884662628173828,grad_norm: 0.9158466423712713, iteration: 93622
loss: 1.0003620386123657,grad_norm: 0.8323862973389452, iteration: 93623
loss: 0.9858770966529846,grad_norm: 0.999998989451983, iteration: 93624
loss: 1.0376522541046143,grad_norm: 0.9999994784455064, iteration: 93625
loss: 0.9911385178565979,grad_norm: 0.7871269219378004, iteration: 93626
loss: 1.0037323236465454,grad_norm: 0.9999994266192734, iteration: 93627
loss: 1.1072454452514648,grad_norm: 0.9999999425506085, iteration: 93628
loss: 0.9977514147758484,grad_norm: 0.9778222132335299, iteration: 93629
loss: 1.0066940784454346,grad_norm: 0.9999992635753548, iteration: 93630
loss: 1.0072293281555176,grad_norm: 0.9012691811899104, iteration: 93631
loss: 1.0428143739700317,grad_norm: 0.9999997972848689, iteration: 93632
loss: 0.9887757897377014,grad_norm: 0.8971769174489669, iteration: 93633
loss: 1.0357177257537842,grad_norm: 0.9113150722424244, iteration: 93634
loss: 1.1450270414352417,grad_norm: 0.9999997164002451, iteration: 93635
loss: 0.9956855177879333,grad_norm: 0.87213329310147, iteration: 93636
loss: 1.0071941614151,grad_norm: 0.9999990577586989, iteration: 93637
loss: 0.9870202541351318,grad_norm: 0.811040774337615, iteration: 93638
loss: 1.0886856317520142,grad_norm: 0.9999999194238175, iteration: 93639
loss: 1.0977071523666382,grad_norm: 0.9087452465358844, iteration: 93640
loss: 0.9466076493263245,grad_norm: 0.8655437450090905, iteration: 93641
loss: 1.0724111795425415,grad_norm: 0.9999996967648175, iteration: 93642
loss: 1.0288575887680054,grad_norm: 0.9999990544074928, iteration: 93643
loss: 0.9884485006332397,grad_norm: 0.897052741741566, iteration: 93644
loss: 1.020371913909912,grad_norm: 0.9999991614397501, iteration: 93645
loss: 1.038314700126648,grad_norm: 0.9999990795461207, iteration: 93646
loss: 1.0260961055755615,grad_norm: 0.9162123749365642, iteration: 93647
loss: 1.0310680866241455,grad_norm: 0.9138894684045997, iteration: 93648
loss: 0.989611029624939,grad_norm: 0.7173175235618586, iteration: 93649
loss: 0.9697160124778748,grad_norm: 0.9999992044517002, iteration: 93650
loss: 1.0103920698165894,grad_norm: 0.9445921367123816, iteration: 93651
loss: 1.0615254640579224,grad_norm: 0.999999205007271, iteration: 93652
loss: 0.9707251191139221,grad_norm: 0.9579350450343599, iteration: 93653
loss: 1.066473364830017,grad_norm: 0.9999992832557475, iteration: 93654
loss: 1.0654710531234741,grad_norm: 0.9999992999901733, iteration: 93655
loss: 0.9865204095840454,grad_norm: 0.9999990877946279, iteration: 93656
loss: 0.9956820607185364,grad_norm: 0.7583988232660611, iteration: 93657
loss: 1.0108813047409058,grad_norm: 0.918544011323303, iteration: 93658
loss: 1.0519856214523315,grad_norm: 0.9999992318102406, iteration: 93659
loss: 1.009804129600525,grad_norm: 0.9999992770541487, iteration: 93660
loss: 1.0958234071731567,grad_norm: 0.9999993460347567, iteration: 93661
loss: 1.0095242261886597,grad_norm: 0.9703573622064295, iteration: 93662
loss: 0.9808840751647949,grad_norm: 0.8864842663031308, iteration: 93663
loss: 0.9580593109130859,grad_norm: 0.9999993062618387, iteration: 93664
loss: 0.9901389479637146,grad_norm: 0.9999991198781741, iteration: 93665
loss: 1.066806435585022,grad_norm: 0.9032442342143655, iteration: 93666
loss: 1.076555848121643,grad_norm: 0.8588882328835921, iteration: 93667
loss: 1.0527263879776,grad_norm: 0.9999991349785873, iteration: 93668
loss: 1.2116116285324097,grad_norm: 0.9999993199042096, iteration: 93669
loss: 1.1043452024459839,grad_norm: 0.9999996633955838, iteration: 93670
loss: 0.9801824688911438,grad_norm: 0.9999990798578995, iteration: 93671
loss: 1.0024220943450928,grad_norm: 0.9999991412962235, iteration: 93672
loss: 0.9675353169441223,grad_norm: 0.881552348968722, iteration: 93673
loss: 0.9992974400520325,grad_norm: 0.9999991347031619, iteration: 93674
loss: 0.9908575415611267,grad_norm: 0.79802537013466, iteration: 93675
loss: 0.9754535555839539,grad_norm: 0.6979427786193477, iteration: 93676
loss: 1.0591117143630981,grad_norm: 0.9999999349819463, iteration: 93677
loss: 1.0004912614822388,grad_norm: 0.8804954540573798, iteration: 93678
loss: 1.0070585012435913,grad_norm: 0.8615298867307449, iteration: 93679
loss: 1.025823712348938,grad_norm: 0.8659973584616828, iteration: 93680
loss: 1.0194562673568726,grad_norm: 0.9908760680139589, iteration: 93681
loss: 1.1767500638961792,grad_norm: 0.9999998308738796, iteration: 93682
loss: 1.0019842386245728,grad_norm: 0.9999991717514142, iteration: 93683
loss: 1.0066508054733276,grad_norm: 0.8998727337430296, iteration: 93684
loss: 1.0815390348434448,grad_norm: 0.9999998161743668, iteration: 93685
loss: 1.012119174003601,grad_norm: 0.9999993090663106, iteration: 93686
loss: 1.041429042816162,grad_norm: 0.9999993199711472, iteration: 93687
loss: 1.0226969718933105,grad_norm: 0.8706203646221733, iteration: 93688
loss: 1.0572465658187866,grad_norm: 0.9999995070578853, iteration: 93689
loss: 1.0885076522827148,grad_norm: 0.999999238738162, iteration: 93690
loss: 1.0349328517913818,grad_norm: 0.9421330894091169, iteration: 93691
loss: 1.0340168476104736,grad_norm: 0.9065079151655115, iteration: 93692
loss: 1.0612130165100098,grad_norm: 0.9999991178493992, iteration: 93693
loss: 1.015120506286621,grad_norm: 0.9999998896915786, iteration: 93694
loss: 1.091869831085205,grad_norm: 0.9999990237830307, iteration: 93695
loss: 1.0127859115600586,grad_norm: 0.9999990031075463, iteration: 93696
loss: 1.025537133216858,grad_norm: 0.9999990262446051, iteration: 93697
loss: 1.0624715089797974,grad_norm: 0.9999996176847891, iteration: 93698
loss: 0.9741001725196838,grad_norm: 0.9999998966220147, iteration: 93699
loss: 1.0034937858581543,grad_norm: 0.9999995210966082, iteration: 93700
loss: 1.0110526084899902,grad_norm: 0.8823340314639205, iteration: 93701
loss: 1.023828387260437,grad_norm: 0.8417780641761693, iteration: 93702
loss: 1.0542153120040894,grad_norm: 0.9999990474654425, iteration: 93703
loss: 0.9801750779151917,grad_norm: 0.864150621703155, iteration: 93704
loss: 0.9701687693595886,grad_norm: 0.9514349798071663, iteration: 93705
loss: 1.0475913286209106,grad_norm: 0.9448316761509021, iteration: 93706
loss: 1.0032472610473633,grad_norm: 0.9836826886881976, iteration: 93707
loss: 1.1064212322235107,grad_norm: 0.9999991358073064, iteration: 93708
loss: 1.051108479499817,grad_norm: 0.9999992329051827, iteration: 93709
loss: 1.0163356065750122,grad_norm: 0.9986040529394332, iteration: 93710
loss: 1.0151690244674683,grad_norm: 0.999999177778628, iteration: 93711
loss: 1.0130481719970703,grad_norm: 0.83086241059631, iteration: 93712
loss: 0.9949952960014343,grad_norm: 0.9999996530191492, iteration: 93713
loss: 1.1855939626693726,grad_norm: 0.9999996306905873, iteration: 93714
loss: 1.1181412935256958,grad_norm: 0.9999995784262348, iteration: 93715
loss: 1.0622135400772095,grad_norm: 0.9999999711949299, iteration: 93716
loss: 1.0237523317337036,grad_norm: 0.8315295902306402, iteration: 93717
loss: 1.0802178382873535,grad_norm: 0.9999995716956973, iteration: 93718
loss: 0.9986486434936523,grad_norm: 0.9999995095600571, iteration: 93719
loss: 1.052543044090271,grad_norm: 0.9999992466787492, iteration: 93720
loss: 1.055267333984375,grad_norm: 0.999999291855348, iteration: 93721
loss: 1.0836316347122192,grad_norm: 0.9999992164268325, iteration: 93722
loss: 1.0785855054855347,grad_norm: 0.9999990172858358, iteration: 93723
loss: 1.0095852613449097,grad_norm: 0.9999992317076029, iteration: 93724
loss: 0.9846279621124268,grad_norm: 0.8502578160907446, iteration: 93725
loss: 1.0849627256393433,grad_norm: 0.9999992211060985, iteration: 93726
loss: 1.0387179851531982,grad_norm: 0.9999994454404981, iteration: 93727
loss: 0.9862747192382812,grad_norm: 0.903373685252364, iteration: 93728
loss: 0.9532138109207153,grad_norm: 0.9719145510476417, iteration: 93729
loss: 0.9847525954246521,grad_norm: 0.8582985247645865, iteration: 93730
loss: 1.0398736000061035,grad_norm: 0.8708652600545076, iteration: 93731
loss: 1.0134541988372803,grad_norm: 0.9999991422399959, iteration: 93732
loss: 1.0594040155410767,grad_norm: 0.9999999910223527, iteration: 93733
loss: 0.9944674968719482,grad_norm: 0.951369171743177, iteration: 93734
loss: 1.0150736570358276,grad_norm: 0.9999993488621222, iteration: 93735
loss: 1.046345829963684,grad_norm: 0.9999991284269296, iteration: 93736
loss: 1.007036566734314,grad_norm: 0.9999996399093705, iteration: 93737
loss: 1.0724365711212158,grad_norm: 0.9999997230834391, iteration: 93738
loss: 1.0122960805892944,grad_norm: 0.9999992501212241, iteration: 93739
loss: 1.2022417783737183,grad_norm: 0.9999992975703922, iteration: 93740
loss: 1.0133800506591797,grad_norm: 0.9999990851386383, iteration: 93741
loss: 0.9882510304450989,grad_norm: 0.9999991904630506, iteration: 93742
loss: 0.9964231848716736,grad_norm: 0.983510567414579, iteration: 93743
loss: 0.9964120388031006,grad_norm: 0.8362477867295345, iteration: 93744
loss: 1.0049251317977905,grad_norm: 0.9098227160413681, iteration: 93745
loss: 1.0103013515472412,grad_norm: 0.9999994634834831, iteration: 93746
loss: 0.960717499256134,grad_norm: 0.999999229849294, iteration: 93747
loss: 1.0288479328155518,grad_norm: 0.9999999806580748, iteration: 93748
loss: 1.0262008905410767,grad_norm: 0.9999989357516204, iteration: 93749
loss: 1.0397588014602661,grad_norm: 0.9999996075595821, iteration: 93750
loss: 1.0438170433044434,grad_norm: 0.9999997583647553, iteration: 93751
loss: 1.0736291408538818,grad_norm: 0.9999993857692864, iteration: 93752
loss: 0.9798997044563293,grad_norm: 0.7876825251948014, iteration: 93753
loss: 1.0033234357833862,grad_norm: 0.8973928882168231, iteration: 93754
loss: 1.0015130043029785,grad_norm: 0.935685687535821, iteration: 93755
loss: 1.0940678119659424,grad_norm: 0.9999990123886477, iteration: 93756
loss: 0.9857456088066101,grad_norm: 0.999999609808058, iteration: 93757
loss: 1.0798051357269287,grad_norm: 0.9286104459780093, iteration: 93758
loss: 1.0847917795181274,grad_norm: 0.9999990595726516, iteration: 93759
loss: 1.2301498651504517,grad_norm: 0.9999999739886397, iteration: 93760
loss: 1.0267832279205322,grad_norm: 0.9999993469717692, iteration: 93761
loss: 1.0202767848968506,grad_norm: 0.9999993888280349, iteration: 93762
loss: 1.0565749406814575,grad_norm: 0.9999993150737908, iteration: 93763
loss: 1.0246719121932983,grad_norm: 0.969825120982184, iteration: 93764
loss: 1.023284912109375,grad_norm: 0.9610634704550146, iteration: 93765
loss: 1.0162121057510376,grad_norm: 0.9999992697774408, iteration: 93766
loss: 1.005608320236206,grad_norm: 0.9256470274561782, iteration: 93767
loss: 0.9849321842193604,grad_norm: 0.8960008059426654, iteration: 93768
loss: 1.0116400718688965,grad_norm: 0.9999994102256783, iteration: 93769
loss: 1.076859712600708,grad_norm: 0.9999990663567857, iteration: 93770
loss: 1.109035849571228,grad_norm: 0.9999996831142045, iteration: 93771
loss: 0.977372407913208,grad_norm: 0.9999990029977595, iteration: 93772
loss: 1.0808703899383545,grad_norm: 0.9999996034777111, iteration: 93773
loss: 1.0113365650177002,grad_norm: 0.9999993167854955, iteration: 93774
loss: 1.0001981258392334,grad_norm: 0.9999994389165419, iteration: 93775
loss: 1.0236990451812744,grad_norm: 0.9999990416912963, iteration: 93776
loss: 1.0191001892089844,grad_norm: 0.999999069969838, iteration: 93777
loss: 0.9813098311424255,grad_norm: 0.9999992161524137, iteration: 93778
loss: 1.008341670036316,grad_norm: 0.9999990947855795, iteration: 93779
loss: 1.1741145849227905,grad_norm: 0.9999995441968317, iteration: 93780
loss: 1.053676962852478,grad_norm: 0.999999874867605, iteration: 93781
loss: 1.0047491788864136,grad_norm: 0.9999989567467329, iteration: 93782
loss: 1.0489252805709839,grad_norm: 0.8331309276385385, iteration: 93783
loss: 1.0054455995559692,grad_norm: 0.9999990181565312, iteration: 93784
loss: 1.117282509803772,grad_norm: 0.9999995414772911, iteration: 93785
loss: 1.101653814315796,grad_norm: 0.9999990247183893, iteration: 93786
loss: 1.0398640632629395,grad_norm: 0.9296787058255273, iteration: 93787
loss: 1.1478043794631958,grad_norm: 0.9999994828957651, iteration: 93788
loss: 1.0283756256103516,grad_norm: 0.9006261563712223, iteration: 93789
loss: 0.9818968176841736,grad_norm: 0.913071061500118, iteration: 93790
loss: 1.1224216222763062,grad_norm: 0.9999999577379892, iteration: 93791
loss: 1.0631335973739624,grad_norm: 0.9999995811683958, iteration: 93792
loss: 0.9854214191436768,grad_norm: 0.9935878210053453, iteration: 93793
loss: 1.0350189208984375,grad_norm: 0.999999231718653, iteration: 93794
loss: 1.1117939949035645,grad_norm: 1.000000003613406, iteration: 93795
loss: 0.9891415238380432,grad_norm: 0.936795597004148, iteration: 93796
loss: 1.0142943859100342,grad_norm: 0.9249486962686917, iteration: 93797
loss: 1.0152784585952759,grad_norm: 0.9104473103996008, iteration: 93798
loss: 0.9645643830299377,grad_norm: 0.884525648361438, iteration: 93799
loss: 0.9602668285369873,grad_norm: 0.9122664920996383, iteration: 93800
loss: 1.015765905380249,grad_norm: 0.9999991336789184, iteration: 93801
loss: 1.0296813249588013,grad_norm: 0.8735127117007518, iteration: 93802
loss: 1.0778684616088867,grad_norm: 0.9999999848936413, iteration: 93803
loss: 1.0857619047164917,grad_norm: 0.9095983937105244, iteration: 93804
loss: 0.9955157041549683,grad_norm: 0.855591146773442, iteration: 93805
loss: 0.96648770570755,grad_norm: 0.9999991832524169, iteration: 93806
loss: 1.0090075731277466,grad_norm: 0.9794321330969821, iteration: 93807
loss: 1.0512142181396484,grad_norm: 0.9999990565355984, iteration: 93808
loss: 1.080235481262207,grad_norm: 0.9999992824852775, iteration: 93809
loss: 0.991724967956543,grad_norm: 0.9999991529815789, iteration: 93810
loss: 1.0054363012313843,grad_norm: 0.9999997325492976, iteration: 93811
loss: 1.0163347721099854,grad_norm: 0.92286874622413, iteration: 93812
loss: 1.044232726097107,grad_norm: 0.9852166329551606, iteration: 93813
loss: 1.0188583135604858,grad_norm: 0.8809606238829233, iteration: 93814
loss: 0.9514614343643188,grad_norm: 0.9711882611158467, iteration: 93815
loss: 1.038075566291809,grad_norm: 0.9999997265459536, iteration: 93816
loss: 0.9864324331283569,grad_norm: 0.8871368468522519, iteration: 93817
loss: 1.0553349256515503,grad_norm: 0.9999989297491184, iteration: 93818
loss: 1.0110116004943848,grad_norm: 0.9999995013012691, iteration: 93819
loss: 1.0119417905807495,grad_norm: 0.8597969469339962, iteration: 93820
loss: 1.0112128257751465,grad_norm: 0.9999989933938945, iteration: 93821
loss: 0.980582058429718,grad_norm: 0.9999992301329796, iteration: 93822
loss: 1.021199107170105,grad_norm: 0.8968868082062796, iteration: 93823
loss: 1.0013220310211182,grad_norm: 0.9999990534745862, iteration: 93824
loss: 1.0444692373275757,grad_norm: 0.9999992353507188, iteration: 93825
loss: 1.0737900733947754,grad_norm: 0.9999993754207829, iteration: 93826
loss: 1.064142107963562,grad_norm: 0.9999991057974771, iteration: 93827
loss: 1.1374651193618774,grad_norm: 0.9999992772649005, iteration: 93828
loss: 0.9957433938980103,grad_norm: 0.9721750950991455, iteration: 93829
loss: 1.005579948425293,grad_norm: 0.7926236062547907, iteration: 93830
loss: 1.0320463180541992,grad_norm: 0.9999990067898821, iteration: 93831
loss: 1.0192073583602905,grad_norm: 0.9999998373771458, iteration: 93832
loss: 1.06171452999115,grad_norm: 0.8194286301101225, iteration: 93833
loss: 1.0143330097198486,grad_norm: 0.9999992058563547, iteration: 93834
loss: 0.9800702929496765,grad_norm: 0.9064924957519621, iteration: 93835
loss: 1.0313981771469116,grad_norm: 0.9833228017361942, iteration: 93836
loss: 0.9981058239936829,grad_norm: 0.9729709420722729, iteration: 93837
loss: 0.9221773743629456,grad_norm: 0.8815150993856703, iteration: 93838
loss: 0.9698264002799988,grad_norm: 0.877453766276742, iteration: 93839
loss: 1.1084202527999878,grad_norm: 0.9999991781614948, iteration: 93840
loss: 1.0167911052703857,grad_norm: 0.9999994551503442, iteration: 93841
loss: 1.024897575378418,grad_norm: 0.9636165360670991, iteration: 93842
loss: 1.00465989112854,grad_norm: 0.8932256077272779, iteration: 93843
loss: 1.0090543031692505,grad_norm: 0.999999878437072, iteration: 93844
loss: 1.05622398853302,grad_norm: 0.9999989719466089, iteration: 93845
loss: 0.9597421884536743,grad_norm: 0.9244643883109956, iteration: 93846
loss: 1.139850378036499,grad_norm: 0.9624145492960688, iteration: 93847
loss: 0.9882928729057312,grad_norm: 0.9505476711426876, iteration: 93848
loss: 1.0240578651428223,grad_norm: 0.911409275371847, iteration: 93849
loss: 1.063547968864441,grad_norm: 0.9908944103937218, iteration: 93850
loss: 1.1487185955047607,grad_norm: 0.9999998163867204, iteration: 93851
loss: 1.016711711883545,grad_norm: 1.0000000216668121, iteration: 93852
loss: 1.0042576789855957,grad_norm: 0.9048056675593598, iteration: 93853
loss: 1.0079935789108276,grad_norm: 0.9999992281114273, iteration: 93854
loss: 1.0092196464538574,grad_norm: 0.9497079039709297, iteration: 93855
loss: 1.0185062885284424,grad_norm: 0.9999990085768615, iteration: 93856
loss: 1.0379433631896973,grad_norm: 0.9999989942523761, iteration: 93857
loss: 1.0429368019104004,grad_norm: 0.9999996660663775, iteration: 93858
loss: 1.0698459148406982,grad_norm: 0.9999991883070083, iteration: 93859
loss: 1.0181615352630615,grad_norm: 0.9999994843513327, iteration: 93860
loss: 1.074407935142517,grad_norm: 0.9999995838668436, iteration: 93861
loss: 1.0042585134506226,grad_norm: 0.9788642782328814, iteration: 93862
loss: 0.9930107593536377,grad_norm: 0.9070440485252871, iteration: 93863
loss: 1.0708484649658203,grad_norm: 0.9999994606709681, iteration: 93864
loss: 0.9761411547660828,grad_norm: 0.8211983290959729, iteration: 93865
loss: 1.0746876001358032,grad_norm: 0.9999992421113376, iteration: 93866
loss: 1.019752860069275,grad_norm: 0.9310721163447506, iteration: 93867
loss: 1.0844446420669556,grad_norm: 0.8381033190168864, iteration: 93868
loss: 1.0202558040618896,grad_norm: 0.9999990907852229, iteration: 93869
loss: 0.9485025405883789,grad_norm: 0.9153649192121786, iteration: 93870
loss: 1.0226019620895386,grad_norm: 0.8724783222938275, iteration: 93871
loss: 1.0684943199157715,grad_norm: 0.9999995542421573, iteration: 93872
loss: 1.074953317642212,grad_norm: 0.9999991311618601, iteration: 93873
loss: 1.024382472038269,grad_norm: 0.9013519495966096, iteration: 93874
loss: 1.0580931901931763,grad_norm: 0.9999994851203117, iteration: 93875
loss: 1.0078431367874146,grad_norm: 0.8048573952367548, iteration: 93876
loss: 1.0167853832244873,grad_norm: 0.9999991794185215, iteration: 93877
loss: 1.0650889873504639,grad_norm: 0.999999777480235, iteration: 93878
loss: 1.0303566455841064,grad_norm: 0.9999991534899553, iteration: 93879
loss: 1.2464346885681152,grad_norm: 0.9999998662970531, iteration: 93880
loss: 1.1418246030807495,grad_norm: 0.9999997799146676, iteration: 93881
loss: 0.9735173583030701,grad_norm: 0.9999991260716786, iteration: 93882
loss: 1.0173183679580688,grad_norm: 0.8509705221695184, iteration: 93883
loss: 1.0675196647644043,grad_norm: 0.9798609614414068, iteration: 93884
loss: 1.014112114906311,grad_norm: 0.7929326779051772, iteration: 93885
loss: 0.9915104508399963,grad_norm: 0.9999990170158525, iteration: 93886
loss: 1.0227696895599365,grad_norm: 0.9999992492306147, iteration: 93887
loss: 1.0276618003845215,grad_norm: 0.9722417887514839, iteration: 93888
loss: 1.007429838180542,grad_norm: 0.9716032080370524, iteration: 93889
loss: 1.0211479663848877,grad_norm: 0.8792881700031994, iteration: 93890
loss: 1.0042563676834106,grad_norm: 0.9034526924469456, iteration: 93891
loss: 1.0138702392578125,grad_norm: 0.7848757980621881, iteration: 93892
loss: 1.0527366399765015,grad_norm: 0.9999991932701561, iteration: 93893
loss: 0.9825838804244995,grad_norm: 0.9803826523103103, iteration: 93894
loss: 1.013291597366333,grad_norm: 0.9999990884889206, iteration: 93895
loss: 1.0048000812530518,grad_norm: 0.9449630124108042, iteration: 93896
loss: 1.032862663269043,grad_norm: 0.999999754615518, iteration: 93897
loss: 0.9834897518157959,grad_norm: 0.8606121539461022, iteration: 93898
loss: 1.0337320566177368,grad_norm: 0.999999103262347, iteration: 93899
loss: 1.0381678342819214,grad_norm: 0.9190630643979395, iteration: 93900
loss: 0.967011034488678,grad_norm: 0.9646563794586933, iteration: 93901
loss: 1.0359277725219727,grad_norm: 0.9999995468326122, iteration: 93902
loss: 1.0704560279846191,grad_norm: 0.8419785018640701, iteration: 93903
loss: 0.9712538719177246,grad_norm: 0.9999990944750207, iteration: 93904
loss: 1.1218055486679077,grad_norm: 0.9999999008905561, iteration: 93905
loss: 1.0113953351974487,grad_norm: 0.8171826944523923, iteration: 93906
loss: 1.0052621364593506,grad_norm: 0.9999990188679112, iteration: 93907
loss: 1.0216532945632935,grad_norm: 0.9999994943488135, iteration: 93908
loss: 0.9964722394943237,grad_norm: 0.8640533331119561, iteration: 93909
loss: 1.0611941814422607,grad_norm: 0.8822545527138417, iteration: 93910
loss: 0.9831771850585938,grad_norm: 0.7833479683734895, iteration: 93911
loss: 1.1095619201660156,grad_norm: 0.9813918381319683, iteration: 93912
loss: 1.0420029163360596,grad_norm: 0.9319574295399528, iteration: 93913
loss: 1.027169942855835,grad_norm: 0.8768289442655981, iteration: 93914
loss: 0.9979938268661499,grad_norm: 0.9872966393226833, iteration: 93915
loss: 0.9897239804267883,grad_norm: 0.8593325106888279, iteration: 93916
loss: 0.9795323014259338,grad_norm: 0.9999990989634813, iteration: 93917
loss: 0.9913054704666138,grad_norm: 0.9999991671537997, iteration: 93918
loss: 1.032448649406433,grad_norm: 0.9999996490489784, iteration: 93919
loss: 0.9902560114860535,grad_norm: 0.999999034676999, iteration: 93920
loss: 1.0258064270019531,grad_norm: 0.9999995017085211, iteration: 93921
loss: 1.0387492179870605,grad_norm: 0.9999990815228798, iteration: 93922
loss: 1.0587608814239502,grad_norm: 0.9999992196630456, iteration: 93923
loss: 0.9695031642913818,grad_norm: 0.9905839720393503, iteration: 93924
loss: 1.0336952209472656,grad_norm: 0.9999992685441129, iteration: 93925
loss: 1.0462322235107422,grad_norm: 0.9089936775057409, iteration: 93926
loss: 0.9647104144096375,grad_norm: 0.8958158881824304, iteration: 93927
loss: 1.0123447179794312,grad_norm: 0.9351971482166629, iteration: 93928
loss: 1.0441937446594238,grad_norm: 0.8695755081354021, iteration: 93929
loss: 1.0224469900131226,grad_norm: 0.7540018124616921, iteration: 93930
loss: 1.0205388069152832,grad_norm: 0.9664621769033392, iteration: 93931
loss: 1.0002262592315674,grad_norm: 0.9999990729229021, iteration: 93932
loss: 1.0384564399719238,grad_norm: 0.999999032299933, iteration: 93933
loss: 0.9876688718795776,grad_norm: 0.9627907409598806, iteration: 93934
loss: 1.143253207206726,grad_norm: 0.9999993792686535, iteration: 93935
loss: 1.0383234024047852,grad_norm: 0.9593147124885354, iteration: 93936
loss: 1.0038530826568604,grad_norm: 0.8757258709479663, iteration: 93937
loss: 1.0576870441436768,grad_norm: 0.9999997170631583, iteration: 93938
loss: 1.0185967683792114,grad_norm: 0.9999989562701289, iteration: 93939
loss: 1.0042916536331177,grad_norm: 0.9999991808503961, iteration: 93940
loss: 0.9830378293991089,grad_norm: 0.9068147781549595, iteration: 93941
loss: 0.9858728051185608,grad_norm: 0.9999998209659503, iteration: 93942
loss: 0.9856746792793274,grad_norm: 0.8173868702980144, iteration: 93943
loss: 1.0131887197494507,grad_norm: 0.9309859001543674, iteration: 93944
loss: 1.0009703636169434,grad_norm: 0.9999990987384326, iteration: 93945
loss: 0.9971952438354492,grad_norm: 0.9999991070659991, iteration: 93946
loss: 1.0125937461853027,grad_norm: 0.8171609843362692, iteration: 93947
loss: 1.0259793996810913,grad_norm: 0.9999994595297224, iteration: 93948
loss: 1.0447101593017578,grad_norm: 0.9999996923916069, iteration: 93949
loss: 0.9990208745002747,grad_norm: 0.9999991269706721, iteration: 93950
loss: 0.9889670014381409,grad_norm: 0.8584180297598593, iteration: 93951
loss: 1.0339552164077759,grad_norm: 0.9999993258929988, iteration: 93952
loss: 1.0031626224517822,grad_norm: 0.8659681462742348, iteration: 93953
loss: 1.037142038345337,grad_norm: 0.9999998259011313, iteration: 93954
loss: 0.9706583023071289,grad_norm: 0.8854542128093251, iteration: 93955
loss: 0.9890000820159912,grad_norm: 0.8551029932906041, iteration: 93956
loss: 1.0018020868301392,grad_norm: 0.9156977289537527, iteration: 93957
loss: 1.0095218420028687,grad_norm: 0.9329168655791363, iteration: 93958
loss: 0.9931498765945435,grad_norm: 0.9999989174385685, iteration: 93959
loss: 0.9967688918113708,grad_norm: 0.8792010825928773, iteration: 93960
loss: 1.0721540451049805,grad_norm: 0.9506127959033742, iteration: 93961
loss: 1.0141247510910034,grad_norm: 0.8014164114035783, iteration: 93962
loss: 1.0053019523620605,grad_norm: 0.9999996765416531, iteration: 93963
loss: 1.003090262413025,grad_norm: 0.9999991446567326, iteration: 93964
loss: 1.0351123809814453,grad_norm: 0.9999991796108826, iteration: 93965
loss: 1.1181001663208008,grad_norm: 0.9999990253377377, iteration: 93966
loss: 1.059098720550537,grad_norm: 1.0000000120177124, iteration: 93967
loss: 0.9986814260482788,grad_norm: 0.9999990174172294, iteration: 93968
loss: 1.039244294166565,grad_norm: 0.9666936346929117, iteration: 93969
loss: 1.0766010284423828,grad_norm: 0.9999992631435449, iteration: 93970
loss: 1.0080050230026245,grad_norm: 0.7929402699708218, iteration: 93971
loss: 1.003216028213501,grad_norm: 0.9999991348953757, iteration: 93972
loss: 1.0136969089508057,grad_norm: 0.9999995134088991, iteration: 93973
loss: 0.9984405636787415,grad_norm: 0.9999992567439521, iteration: 93974
loss: 1.1477949619293213,grad_norm: 0.9999993640864864, iteration: 93975
loss: 1.045353651046753,grad_norm: 0.8233933835578063, iteration: 93976
loss: 1.0469911098480225,grad_norm: 0.9999992641744154, iteration: 93977
loss: 0.9664842486381531,grad_norm: 0.8233335868751263, iteration: 93978
loss: 1.0111606121063232,grad_norm: 0.9841638370346629, iteration: 93979
loss: 0.9938738942146301,grad_norm: 0.8661847596463961, iteration: 93980
loss: 0.9739844799041748,grad_norm: 0.9892175912379695, iteration: 93981
loss: 1.0451865196228027,grad_norm: 0.9999998272156789, iteration: 93982
loss: 1.017347812652588,grad_norm: 0.9999990448009719, iteration: 93983
loss: 1.068938970565796,grad_norm: 0.9999994112706736, iteration: 93984
loss: 1.0110474824905396,grad_norm: 0.8990259693571004, iteration: 93985
loss: 1.0089348554611206,grad_norm: 0.8814480572489418, iteration: 93986
loss: 1.0361090898513794,grad_norm: 0.8559920585293948, iteration: 93987
loss: 1.0106195211410522,grad_norm: 0.9999992799768298, iteration: 93988
loss: 1.0008182525634766,grad_norm: 0.9999990723254301, iteration: 93989
loss: 0.983493983745575,grad_norm: 0.9247579121690407, iteration: 93990
loss: 1.0137267112731934,grad_norm: 0.9116749590760559, iteration: 93991
loss: 1.0481642484664917,grad_norm: 0.9999991205051055, iteration: 93992
loss: 1.0271987915039062,grad_norm: 0.999999594849752, iteration: 93993
loss: 1.0011370182037354,grad_norm: 0.7512236254639681, iteration: 93994
loss: 1.0710116624832153,grad_norm: 0.9999991666546763, iteration: 93995
loss: 1.0595300197601318,grad_norm: 0.9999996453431594, iteration: 93996
loss: 1.0212128162384033,grad_norm: 0.7958532075453915, iteration: 93997
loss: 0.9810283184051514,grad_norm: 0.8539237164602177, iteration: 93998
loss: 1.1082463264465332,grad_norm: 1.000000023656377, iteration: 93999
loss: 1.0011537075042725,grad_norm: 0.9999993195926077, iteration: 94000
loss: 1.028832197189331,grad_norm: 0.9130853985266011, iteration: 94001
loss: 1.0121625661849976,grad_norm: 0.7586584691156844, iteration: 94002
loss: 1.0253031253814697,grad_norm: 0.9999991473381695, iteration: 94003
loss: 0.9759783148765564,grad_norm: 0.956022613236819, iteration: 94004
loss: 1.0034102201461792,grad_norm: 0.9333032486196272, iteration: 94005
loss: 1.001735806465149,grad_norm: 0.8125574596322687, iteration: 94006
loss: 1.0290277004241943,grad_norm: 0.9999997054039335, iteration: 94007
loss: 0.9993539452552795,grad_norm: 0.9108559653479957, iteration: 94008
loss: 1.005340576171875,grad_norm: 0.8203297937823899, iteration: 94009
loss: 1.0231711864471436,grad_norm: 0.9999998352827505, iteration: 94010
loss: 1.0087676048278809,grad_norm: 0.9999991446487936, iteration: 94011
loss: 0.9916993975639343,grad_norm: 0.8278805309073665, iteration: 94012
loss: 0.9980535507202148,grad_norm: 0.8934397525906161, iteration: 94013
loss: 1.0054932832717896,grad_norm: 0.8619402691963705, iteration: 94014
loss: 0.9893307089805603,grad_norm: 0.9999990197477322, iteration: 94015
loss: 1.0268269777297974,grad_norm: 0.9999991726822366, iteration: 94016
loss: 1.0087172985076904,grad_norm: 0.9370998798003192, iteration: 94017
loss: 1.0434952974319458,grad_norm: 0.9999991697047016, iteration: 94018
loss: 1.025215744972229,grad_norm: 0.9999995633506008, iteration: 94019
loss: 1.030592441558838,grad_norm: 0.8953189804747692, iteration: 94020
loss: 1.1310219764709473,grad_norm: 0.9999991402413512, iteration: 94021
loss: 1.0051870346069336,grad_norm: 0.9491791589154195, iteration: 94022
loss: 1.0189203023910522,grad_norm: 0.879430493427781, iteration: 94023
loss: 1.008113145828247,grad_norm: 0.9999992881362183, iteration: 94024
loss: 1.003340482711792,grad_norm: 0.8988734784811825, iteration: 94025
loss: 1.0597301721572876,grad_norm: 0.9999991756083534, iteration: 94026
loss: 1.0813806056976318,grad_norm: 0.9999990986888061, iteration: 94027
loss: 1.030018925666809,grad_norm: 0.9168402589523159, iteration: 94028
loss: 0.9373139142990112,grad_norm: 0.9366116504840547, iteration: 94029
loss: 0.980309784412384,grad_norm: 0.8714329571066174, iteration: 94030
loss: 1.0291786193847656,grad_norm: 0.7686969638094925, iteration: 94031
loss: 0.9975869059562683,grad_norm: 0.7830867879091877, iteration: 94032
loss: 0.9632418155670166,grad_norm: 0.8059298315714256, iteration: 94033
loss: 0.9716113805770874,grad_norm: 0.7448228707019123, iteration: 94034
loss: 0.9880340695381165,grad_norm: 0.9999991202898096, iteration: 94035
loss: 1.010194182395935,grad_norm: 0.9999992682807888, iteration: 94036
loss: 1.011765956878662,grad_norm: 0.7425818679879357, iteration: 94037
loss: 0.9789881706237793,grad_norm: 0.9029161034943659, iteration: 94038
loss: 1.0064952373504639,grad_norm: 0.9999991084008938, iteration: 94039
loss: 1.0502221584320068,grad_norm: 0.9999997919454887, iteration: 94040
loss: 1.0304508209228516,grad_norm: 0.9482580301504608, iteration: 94041
loss: 1.0000633001327515,grad_norm: 0.8920855448764378, iteration: 94042
loss: 1.0410538911819458,grad_norm: 0.9999995521659799, iteration: 94043
loss: 1.0376172065734863,grad_norm: 0.9003264455115235, iteration: 94044
loss: 0.984706699848175,grad_norm: 0.8188514100454592, iteration: 94045
loss: 0.9914960265159607,grad_norm: 0.9999991112273058, iteration: 94046
loss: 1.0028003454208374,grad_norm: 0.9999991252630275, iteration: 94047
loss: 1.013702630996704,grad_norm: 0.7670217157697694, iteration: 94048
loss: 1.050121545791626,grad_norm: 0.9999989748610646, iteration: 94049
loss: 1.0026777982711792,grad_norm: 0.9999991740704283, iteration: 94050
loss: 0.9732517600059509,grad_norm: 0.9999990795018106, iteration: 94051
loss: 1.1296095848083496,grad_norm: 0.9999990827873763, iteration: 94052
loss: 0.964699923992157,grad_norm: 0.8177742991099278, iteration: 94053
loss: 1.0035688877105713,grad_norm: 0.9999991946759896, iteration: 94054
loss: 1.0096471309661865,grad_norm: 0.9999991836284313, iteration: 94055
loss: 1.0782824754714966,grad_norm: 0.9999994592616652, iteration: 94056
loss: 0.9898071885108948,grad_norm: 0.7955687009117357, iteration: 94057
loss: 1.0047606229782104,grad_norm: 0.891337042359426, iteration: 94058
loss: 1.0082770586013794,grad_norm: 0.8470922645202661, iteration: 94059
loss: 1.0320948362350464,grad_norm: 0.9699025797050956, iteration: 94060
loss: 1.0364729166030884,grad_norm: 0.8785501981097789, iteration: 94061
loss: 1.0023977756500244,grad_norm: 0.8272857951837563, iteration: 94062
loss: 1.0037336349487305,grad_norm: 0.8941571539914603, iteration: 94063
loss: 1.051842451095581,grad_norm: 0.9999990750762192, iteration: 94064
loss: 1.0059876441955566,grad_norm: 0.9999990408899415, iteration: 94065
loss: 1.0060440301895142,grad_norm: 0.9895342082054549, iteration: 94066
loss: 1.0132372379302979,grad_norm: 0.8772000326865834, iteration: 94067
loss: 1.0688153505325317,grad_norm: 0.9999992250935776, iteration: 94068
loss: 1.0098779201507568,grad_norm: 0.7583783680790226, iteration: 94069
loss: 0.9762879610061646,grad_norm: 0.9743033375455163, iteration: 94070
loss: 1.0741920471191406,grad_norm: 0.9711385472747455, iteration: 94071
loss: 1.0351927280426025,grad_norm: 0.8901253492213603, iteration: 94072
loss: 0.9784823656082153,grad_norm: 0.9273650816158937, iteration: 94073
loss: 1.0293707847595215,grad_norm: 0.9999991533117628, iteration: 94074
loss: 1.0105311870574951,grad_norm: 0.821666317957876, iteration: 94075
loss: 1.0366151332855225,grad_norm: 0.9999990820585957, iteration: 94076
loss: 1.0216022729873657,grad_norm: 0.7940487031026467, iteration: 94077
loss: 1.0310323238372803,grad_norm: 0.9646713339893873, iteration: 94078
loss: 0.9944076538085938,grad_norm: 0.9999991138750771, iteration: 94079
loss: 0.9934369921684265,grad_norm: 0.9999990357536183, iteration: 94080
loss: 1.0017551183700562,grad_norm: 0.7760952502520212, iteration: 94081
loss: 0.973416805267334,grad_norm: 0.9999991944913468, iteration: 94082
loss: 1.003853678703308,grad_norm: 0.874150131649537, iteration: 94083
loss: 0.978763997554779,grad_norm: 0.8336201595389094, iteration: 94084
loss: 1.0195732116699219,grad_norm: 0.9940276027582292, iteration: 94085
loss: 1.0090855360031128,grad_norm: 0.7589415773859671, iteration: 94086
loss: 0.9911105632781982,grad_norm: 0.9816477284649857, iteration: 94087
loss: 1.008508563041687,grad_norm: 0.9999994134189641, iteration: 94088
loss: 1.0226932764053345,grad_norm: 0.9213717445897592, iteration: 94089
loss: 1.0069761276245117,grad_norm: 0.9651634668409412, iteration: 94090
loss: 1.0365246534347534,grad_norm: 0.9065486122185595, iteration: 94091
loss: 0.9687539339065552,grad_norm: 0.999999009416955, iteration: 94092
loss: 0.9841142892837524,grad_norm: 0.9357044364149036, iteration: 94093
loss: 1.0157374143600464,grad_norm: 0.9649690143258672, iteration: 94094
loss: 0.9744605422019958,grad_norm: 0.9177390924197105, iteration: 94095
loss: 1.0554649829864502,grad_norm: 0.8244813720551392, iteration: 94096
loss: 1.0456061363220215,grad_norm: 0.9009271520402653, iteration: 94097
loss: 1.0061874389648438,grad_norm: 0.7083786321345235, iteration: 94098
loss: 1.0036184787750244,grad_norm: 0.7968761439814173, iteration: 94099
loss: 0.9973966479301453,grad_norm: 0.8411090093679551, iteration: 94100
loss: 1.039008378982544,grad_norm: 0.9999991255139752, iteration: 94101
loss: 1.0661392211914062,grad_norm: 0.9999994116191848, iteration: 94102
loss: 0.9700620174407959,grad_norm: 0.9702038499899682, iteration: 94103
loss: 0.9995003342628479,grad_norm: 0.9999991754043963, iteration: 94104
loss: 1.016171932220459,grad_norm: 0.8176452056309863, iteration: 94105
loss: 0.9845588207244873,grad_norm: 0.9999991158387735, iteration: 94106
loss: 0.9890766143798828,grad_norm: 0.9999990143465991, iteration: 94107
loss: 1.1106458902359009,grad_norm: 0.987395144206424, iteration: 94108
loss: 0.9524625539779663,grad_norm: 0.8804011039870671, iteration: 94109
loss: 0.9827577471733093,grad_norm: 0.9999997953151939, iteration: 94110
loss: 0.9694275259971619,grad_norm: 0.9746018570575496, iteration: 94111
loss: 1.0134159326553345,grad_norm: 0.8146922308493204, iteration: 94112
loss: 1.0383087396621704,grad_norm: 0.9999990431073443, iteration: 94113
loss: 0.9987589716911316,grad_norm: 0.9999998404221822, iteration: 94114
loss: 1.0239471197128296,grad_norm: 0.9999997038909577, iteration: 94115
loss: 1.0790904760360718,grad_norm: 0.9999998751562958, iteration: 94116
loss: 1.0302191972732544,grad_norm: 0.9999991465855781, iteration: 94117
loss: 0.971392035484314,grad_norm: 0.8365387284255084, iteration: 94118
loss: 0.9911655187606812,grad_norm: 0.999999486655226, iteration: 94119
loss: 1.0487868785858154,grad_norm: 0.8036967773396697, iteration: 94120
loss: 1.0104081630706787,grad_norm: 0.8999591984367402, iteration: 94121
loss: 1.0621068477630615,grad_norm: 0.9999990990621199, iteration: 94122
loss: 1.0677357912063599,grad_norm: 0.9999998130212987, iteration: 94123
loss: 1.229270577430725,grad_norm: 0.9999998636138525, iteration: 94124
loss: 1.1670935153961182,grad_norm: 0.9999992166347489, iteration: 94125
loss: 1.0921499729156494,grad_norm: 0.9999994108539182, iteration: 94126
loss: 1.014146327972412,grad_norm: 0.8417473234428406, iteration: 94127
loss: 0.9940658807754517,grad_norm: 0.89880723040814, iteration: 94128
loss: 1.1002404689788818,grad_norm: 0.9999991382682982, iteration: 94129
loss: 1.1081422567367554,grad_norm: 0.9999994078596963, iteration: 94130
loss: 1.0169733762741089,grad_norm: 0.9587924863559206, iteration: 94131
loss: 1.02918541431427,grad_norm: 0.9999999550622009, iteration: 94132
loss: 1.054902195930481,grad_norm: 0.9205847940563706, iteration: 94133
loss: 0.9899992942810059,grad_norm: 0.893348857155034, iteration: 94134
loss: 1.181482195854187,grad_norm: 0.9999992890012385, iteration: 94135
loss: 1.1335110664367676,grad_norm: 0.9999992600339466, iteration: 94136
loss: 1.0407116413116455,grad_norm: 0.9861711853082026, iteration: 94137
loss: 0.9827951192855835,grad_norm: 0.9138046587636998, iteration: 94138
loss: 1.1365522146224976,grad_norm: 0.999999400832515, iteration: 94139
loss: 1.0666199922561646,grad_norm: 0.9999991035408191, iteration: 94140
loss: 1.0038526058197021,grad_norm: 0.9999991186709828, iteration: 94141
loss: 1.3999178409576416,grad_norm: 0.9999997853350067, iteration: 94142
loss: 1.1858738660812378,grad_norm: 0.9999999969592864, iteration: 94143
loss: 1.0531262159347534,grad_norm: 0.9999993619091964, iteration: 94144
loss: 1.1226470470428467,grad_norm: 0.9999991127609845, iteration: 94145
loss: 1.1077210903167725,grad_norm: 0.9999995895770762, iteration: 94146
loss: 1.025968074798584,grad_norm: 0.9999994104641479, iteration: 94147
loss: 1.0414953231811523,grad_norm: 0.9477958807259751, iteration: 94148
loss: 1.0852763652801514,grad_norm: 0.9999991619628192, iteration: 94149
loss: 1.0060056447982788,grad_norm: 0.9999990491280331, iteration: 94150
loss: 1.139815330505371,grad_norm: 0.9999997674560575, iteration: 94151
loss: 1.0548557043075562,grad_norm: 0.9999991539607536, iteration: 94152
loss: 1.1171183586120605,grad_norm: 0.9999992513544645, iteration: 94153
loss: 1.0215060710906982,grad_norm: 0.9378837337026236, iteration: 94154
loss: 0.9951852560043335,grad_norm: 0.9063192637086859, iteration: 94155
loss: 1.4430898427963257,grad_norm: 0.9999998104403578, iteration: 94156
loss: 1.0826737880706787,grad_norm: 0.9999996547312066, iteration: 94157
loss: 1.2593660354614258,grad_norm: 0.9999998943561387, iteration: 94158
loss: 1.0832992792129517,grad_norm: 0.9999993372269211, iteration: 94159
loss: 1.0211260318756104,grad_norm: 0.837403372715822, iteration: 94160
loss: 1.099001407623291,grad_norm: 0.9999994795714761, iteration: 94161
loss: 1.0568608045578003,grad_norm: 0.9999990588312738, iteration: 94162
loss: 0.9646720886230469,grad_norm: 0.9744849448295648, iteration: 94163
loss: 1.0891629457473755,grad_norm: 0.9999994779927718, iteration: 94164
loss: 1.0676714181900024,grad_norm: 0.9999991427860039, iteration: 94165
loss: 1.0495285987854004,grad_norm: 0.9868813140079156, iteration: 94166
loss: 0.9722561240196228,grad_norm: 0.9999991931711101, iteration: 94167
loss: 1.0149685144424438,grad_norm: 0.8747669789213393, iteration: 94168
loss: 1.0236759185791016,grad_norm: 0.9102676238571121, iteration: 94169
loss: 1.1455574035644531,grad_norm: 0.965239698254747, iteration: 94170
loss: 1.0889841318130493,grad_norm: 0.9999991705436924, iteration: 94171
loss: 1.0161958932876587,grad_norm: 0.9875990429250945, iteration: 94172
loss: 1.0220134258270264,grad_norm: 0.8293039852692259, iteration: 94173
loss: 1.1258445978164673,grad_norm: 0.999999943619811, iteration: 94174
loss: 0.9863288998603821,grad_norm: 0.9368856194235833, iteration: 94175
loss: 1.0748515129089355,grad_norm: 0.9999998116032379, iteration: 94176
loss: 1.022438645362854,grad_norm: 0.8916081361483906, iteration: 94177
loss: 1.1554365158081055,grad_norm: 0.9999999616110321, iteration: 94178
loss: 1.1009316444396973,grad_norm: 0.9999999399439518, iteration: 94179
loss: 1.0979548692703247,grad_norm: 0.9913140142816875, iteration: 94180
loss: 1.1133743524551392,grad_norm: 0.9999996151989082, iteration: 94181
loss: 1.3875787258148193,grad_norm: 0.9999996378517761, iteration: 94182
loss: 1.2099082469940186,grad_norm: 0.9999995409694397, iteration: 94183
loss: 1.1227664947509766,grad_norm: 0.9999992310982596, iteration: 94184
loss: 1.0310200452804565,grad_norm: 0.8976156096135933, iteration: 94185
loss: 1.18017578125,grad_norm: 0.9999998947666937, iteration: 94186
loss: 1.1326372623443604,grad_norm: 0.9999993096143482, iteration: 94187
loss: 0.9777676463127136,grad_norm: 0.7999368427972712, iteration: 94188
loss: 1.0264036655426025,grad_norm: 0.9999997182822187, iteration: 94189
loss: 1.1008572578430176,grad_norm: 0.9999996064837228, iteration: 94190
loss: 1.1006724834442139,grad_norm: 0.9999993990912848, iteration: 94191
loss: 0.9765213131904602,grad_norm: 0.8700050907039526, iteration: 94192
loss: 1.0045932531356812,grad_norm: 0.9502351430616637, iteration: 94193
loss: 1.4994505643844604,grad_norm: 0.9999995405201616, iteration: 94194
loss: 1.1447116136550903,grad_norm: 0.9999995196934357, iteration: 94195
loss: 1.1356710195541382,grad_norm: 0.9999990872742868, iteration: 94196
loss: 1.1545848846435547,grad_norm: 0.999999814235192, iteration: 94197
loss: 1.0795447826385498,grad_norm: 0.9999991340492951, iteration: 94198
loss: 1.0233772993087769,grad_norm: 0.9999990559658487, iteration: 94199
loss: 1.0191155672073364,grad_norm: 0.9999990617500414, iteration: 94200
loss: 0.9893302917480469,grad_norm: 0.999999546368895, iteration: 94201
loss: 1.089906096458435,grad_norm: 0.9999996623288546, iteration: 94202
loss: 0.9884768724441528,grad_norm: 0.8208705605268898, iteration: 94203
loss: 1.335003137588501,grad_norm: 0.9999995476632092, iteration: 94204
loss: 1.0095548629760742,grad_norm: 0.9999991020188185, iteration: 94205
loss: 1.1019067764282227,grad_norm: 0.9999995336214882, iteration: 94206
loss: 1.0526119470596313,grad_norm: 0.9999993751356121, iteration: 94207
loss: 1.0371321439743042,grad_norm: 0.9999995334373344, iteration: 94208
loss: 1.068763017654419,grad_norm: 0.9999994392086871, iteration: 94209
loss: 1.095799207687378,grad_norm: 0.9769877578095654, iteration: 94210
loss: 1.0641132593154907,grad_norm: 0.9999990980363797, iteration: 94211
loss: 1.0089043378829956,grad_norm: 0.7766614578409642, iteration: 94212
loss: 1.122578501701355,grad_norm: 0.9999994018313462, iteration: 94213
loss: 1.1415412425994873,grad_norm: 0.9999996185485565, iteration: 94214
loss: 1.0335383415222168,grad_norm: 0.9943003754658362, iteration: 94215
loss: 1.073020339012146,grad_norm: 0.9999995319092405, iteration: 94216
loss: 1.0516252517700195,grad_norm: 0.9999991539086962, iteration: 94217
loss: 1.0002497434616089,grad_norm: 0.9999992771528567, iteration: 94218
loss: 1.0952967405319214,grad_norm: 0.9999993189568149, iteration: 94219
loss: 1.1831647157669067,grad_norm: 0.9999991712605147, iteration: 94220
loss: 1.0027273893356323,grad_norm: 0.9999991790409385, iteration: 94221
loss: 1.0860912799835205,grad_norm: 0.999999716180398, iteration: 94222
loss: 1.1238460540771484,grad_norm: 0.9576654426651571, iteration: 94223
loss: 1.0640166997909546,grad_norm: 0.9999992573439191, iteration: 94224
loss: 1.0515791177749634,grad_norm: 0.999999338163647, iteration: 94225
loss: 1.071633219718933,grad_norm: 0.9999991718662711, iteration: 94226
loss: 1.119205355644226,grad_norm: 0.9999998787193884, iteration: 94227
loss: 0.9996738433837891,grad_norm: 0.7909464229852912, iteration: 94228
loss: 1.0557087659835815,grad_norm: 0.9400276637425542, iteration: 94229
loss: 1.2011629343032837,grad_norm: 0.9999997154742617, iteration: 94230
loss: 1.0432928800582886,grad_norm: 0.9999991636103187, iteration: 94231
loss: 1.0829825401306152,grad_norm: 0.9170851509208587, iteration: 94232
loss: 1.0814515352249146,grad_norm: 0.9999998271466821, iteration: 94233
loss: 1.092354655265808,grad_norm: 0.999999618580446, iteration: 94234
loss: 1.1300015449523926,grad_norm: 0.9999999409380375, iteration: 94235
loss: 1.3021388053894043,grad_norm: 0.9999998599404516, iteration: 94236
loss: 1.078439712524414,grad_norm: 0.8271840430766494, iteration: 94237
loss: 1.0058714151382446,grad_norm: 0.9999990670821961, iteration: 94238
loss: 1.0754348039627075,grad_norm: 0.9999996118995055, iteration: 94239
loss: 1.3025463819503784,grad_norm: 0.999999567218368, iteration: 94240
loss: 1.0859251022338867,grad_norm: 0.9999993985623652, iteration: 94241
loss: 1.1226568222045898,grad_norm: 0.8913679869166956, iteration: 94242
loss: 1.246653437614441,grad_norm: 0.999999404704844, iteration: 94243
loss: 1.088473916053772,grad_norm: 0.9999993659819695, iteration: 94244
loss: 1.0040701627731323,grad_norm: 0.9999991521337563, iteration: 94245
loss: 0.9738039374351501,grad_norm: 0.9492712748584475, iteration: 94246
loss: 1.106568694114685,grad_norm: 0.9999996500270539, iteration: 94247
loss: 1.0271471738815308,grad_norm: 0.9125380645262935, iteration: 94248
loss: 0.9981780052185059,grad_norm: 0.9504188526580316, iteration: 94249
loss: 1.0548605918884277,grad_norm: 0.9999999717484936, iteration: 94250
loss: 1.0468968152999878,grad_norm: 0.9999993296014079, iteration: 94251
loss: 1.259118676185608,grad_norm: 0.9999990811694892, iteration: 94252
loss: 1.056327223777771,grad_norm: 0.9987729757965038, iteration: 94253
loss: 1.2485781908035278,grad_norm: 0.9999996978875206, iteration: 94254
loss: 1.2992198467254639,grad_norm: 0.9999999186953977, iteration: 94255
loss: 1.0417346954345703,grad_norm: 0.9999992029157082, iteration: 94256
loss: 1.0268021821975708,grad_norm: 0.9999990960311477, iteration: 94257
loss: 1.0439218282699585,grad_norm: 0.9999999743812652, iteration: 94258
loss: 1.1920173168182373,grad_norm: 0.9999996816619171, iteration: 94259
loss: 1.1582372188568115,grad_norm: 0.9999999061752363, iteration: 94260
loss: 1.0565991401672363,grad_norm: 0.9316081401030131, iteration: 94261
loss: 1.0126382112503052,grad_norm: 0.8568830559594679, iteration: 94262
loss: 1.0317673683166504,grad_norm: 0.9999995984078031, iteration: 94263
loss: 1.167015552520752,grad_norm: 0.9999999203457923, iteration: 94264
loss: 1.1724985837936401,grad_norm: 0.9999998585468055, iteration: 94265
loss: 1.0503569841384888,grad_norm: 0.999999886094034, iteration: 94266
loss: 1.3209733963012695,grad_norm: 0.9999995815952389, iteration: 94267
loss: 1.0314218997955322,grad_norm: 0.8779293016747477, iteration: 94268
loss: 1.1202046871185303,grad_norm: 0.9407632809044233, iteration: 94269
loss: 1.009516716003418,grad_norm: 0.7804838684683953, iteration: 94270
loss: 1.053318977355957,grad_norm: 0.9999990916256721, iteration: 94271
loss: 1.220511555671692,grad_norm: 0.9999990476447359, iteration: 94272
loss: 1.1702136993408203,grad_norm: 0.9999993224319793, iteration: 94273
loss: 1.0138205289840698,grad_norm: 0.8063407882086185, iteration: 94274
loss: 1.1419240236282349,grad_norm: 0.9999992939334078, iteration: 94275
loss: 0.9776568412780762,grad_norm: 0.9999991054033549, iteration: 94276
loss: 1.1124083995819092,grad_norm: 0.9999996327431632, iteration: 94277
loss: 0.9894607067108154,grad_norm: 0.9999991301194652, iteration: 94278
loss: 1.044996738433838,grad_norm: 0.9999991739883308, iteration: 94279
loss: 0.9867252111434937,grad_norm: 0.9289189299990955, iteration: 94280
loss: 1.0983636379241943,grad_norm: 0.9999989860021407, iteration: 94281
loss: 1.0250186920166016,grad_norm: 0.9399753744025603, iteration: 94282
loss: 1.0773447751998901,grad_norm: 0.999999199330226, iteration: 94283
loss: 1.1027661561965942,grad_norm: 0.9999992689134442, iteration: 94284
loss: 1.0265166759490967,grad_norm: 0.999999647400966, iteration: 94285
loss: 1.0354201793670654,grad_norm: 0.9590913030855863, iteration: 94286
loss: 1.0311851501464844,grad_norm: 0.9999992138884053, iteration: 94287
loss: 1.034224271774292,grad_norm: 0.9999991657082903, iteration: 94288
loss: 1.2053276300430298,grad_norm: 0.9999998027085643, iteration: 94289
loss: 1.1884608268737793,grad_norm: 0.9999994882109432, iteration: 94290
loss: 1.0377423763275146,grad_norm: 0.9999992765266682, iteration: 94291
loss: 1.064828634262085,grad_norm: 0.8580274259317834, iteration: 94292
loss: 1.2480872869491577,grad_norm: 0.9999998019156097, iteration: 94293
loss: 1.1586127281188965,grad_norm: 0.9999997414481447, iteration: 94294
loss: 1.1165260076522827,grad_norm: 0.9999990877083288, iteration: 94295
loss: 1.0610257387161255,grad_norm: 0.9999991110175586, iteration: 94296
loss: 1.1740070581436157,grad_norm: 0.9999994881710145, iteration: 94297
loss: 1.0039085149765015,grad_norm: 0.9999992678564621, iteration: 94298
loss: 1.266928791999817,grad_norm: 0.999999974167886, iteration: 94299
loss: 1.0314444303512573,grad_norm: 0.9999995510069905, iteration: 94300
loss: 1.1222137212753296,grad_norm: 1.0000000632268844, iteration: 94301
loss: 1.1580148935317993,grad_norm: 0.9999991299224804, iteration: 94302
loss: 1.0680335760116577,grad_norm: 0.9999990877627778, iteration: 94303
loss: 1.2119488716125488,grad_norm: 0.9999993152734252, iteration: 94304
loss: 1.06133234500885,grad_norm: 0.9999994120155782, iteration: 94305
loss: 1.0044549703598022,grad_norm: 0.9999991962789624, iteration: 94306
loss: 1.2032766342163086,grad_norm: 0.9999997819676619, iteration: 94307
loss: 1.0412815809249878,grad_norm: 0.9999994689573626, iteration: 94308
loss: 1.0490937232971191,grad_norm: 0.9999990992459863, iteration: 94309
loss: 1.0015277862548828,grad_norm: 0.9851209396178692, iteration: 94310
loss: 1.0467936992645264,grad_norm: 0.9999994013763704, iteration: 94311
loss: 1.0769720077514648,grad_norm: 0.9999996704145342, iteration: 94312
loss: 0.9979506731033325,grad_norm: 0.9726934852093281, iteration: 94313
loss: 0.9487065672874451,grad_norm: 0.929143124097545, iteration: 94314
loss: 1.102213978767395,grad_norm: 0.8737959884908099, iteration: 94315
loss: 1.0109940767288208,grad_norm: 0.9999992046893696, iteration: 94316
loss: 1.0458378791809082,grad_norm: 0.934248249742795, iteration: 94317
loss: 0.9992491006851196,grad_norm: 0.8444361696316988, iteration: 94318
loss: 1.0173507928848267,grad_norm: 0.9770692231615932, iteration: 94319
loss: 1.07722008228302,grad_norm: 0.9999993402648387, iteration: 94320
loss: 1.1046953201293945,grad_norm: 0.9999995023937194, iteration: 94321
loss: 1.091762661933899,grad_norm: 0.9999990988279287, iteration: 94322
loss: 1.1521779298782349,grad_norm: 0.9999999643138919, iteration: 94323
loss: 1.116860032081604,grad_norm: 0.999999627173997, iteration: 94324
loss: 1.0323678255081177,grad_norm: 0.999999410224561, iteration: 94325
loss: 1.1482517719268799,grad_norm: 0.9999996890440059, iteration: 94326
loss: 0.9549326300621033,grad_norm: 0.9999993242810239, iteration: 94327
loss: 1.0091962814331055,grad_norm: 0.9999990224023217, iteration: 94328
loss: 1.0251984596252441,grad_norm: 0.8305150443286056, iteration: 94329
loss: 1.0318009853363037,grad_norm: 0.9999991700059526, iteration: 94330
loss: 1.1510337591171265,grad_norm: 0.999999868772973, iteration: 94331
loss: 1.085191249847412,grad_norm: 0.9999991950311006, iteration: 94332
loss: 1.0766079425811768,grad_norm: 0.914983484701022, iteration: 94333
loss: 1.0348687171936035,grad_norm: 0.9637051727973013, iteration: 94334
loss: 1.382271409034729,grad_norm: 0.9999998819823872, iteration: 94335
loss: 1.1033753156661987,grad_norm: 0.9999999247475043, iteration: 94336
loss: 1.019668698310852,grad_norm: 0.999999093536951, iteration: 94337
loss: 1.1031659841537476,grad_norm: 0.9829460711273925, iteration: 94338
loss: 0.9873070120811462,grad_norm: 0.9999992333943645, iteration: 94339
loss: 1.1563981771469116,grad_norm: 0.9999991819800201, iteration: 94340
loss: 1.00962495803833,grad_norm: 0.999999421586706, iteration: 94341
loss: 1.2022502422332764,grad_norm: 0.9999998717120191, iteration: 94342
loss: 1.0612175464630127,grad_norm: 0.9999998476990306, iteration: 94343
loss: 1.047919750213623,grad_norm: 0.9999993076607792, iteration: 94344
loss: 1.1420836448669434,grad_norm: 0.9999990815886824, iteration: 94345
loss: 1.0644218921661377,grad_norm: 0.9999992831941245, iteration: 94346
loss: 1.193274974822998,grad_norm: 0.9999996756523175, iteration: 94347
loss: 1.0151008367538452,grad_norm: 0.8868514493040711, iteration: 94348
loss: 1.0211910009384155,grad_norm: 0.9337820541141588, iteration: 94349
loss: 1.0089761018753052,grad_norm: 0.9639146774437362, iteration: 94350
loss: 1.1844409704208374,grad_norm: 0.9999995368247655, iteration: 94351
loss: 1.0301544666290283,grad_norm: 0.9999990006885499, iteration: 94352
loss: 1.0573941469192505,grad_norm: 0.9999991816101068, iteration: 94353
loss: 1.1314650774002075,grad_norm: 0.9999994830878288, iteration: 94354
loss: 1.1794509887695312,grad_norm: 0.9999998994982582, iteration: 94355
loss: 1.0481263399124146,grad_norm: 0.9999998844770293, iteration: 94356
loss: 1.0515536069869995,grad_norm: 0.9999991540124084, iteration: 94357
loss: 1.1196653842926025,grad_norm: 0.9999998739677795, iteration: 94358
loss: 1.0407119989395142,grad_norm: 0.9861020281763556, iteration: 94359
loss: 1.150928258895874,grad_norm: 0.9999999035830653, iteration: 94360
loss: 0.9810355305671692,grad_norm: 0.9999990427167049, iteration: 94361
loss: 1.1200199127197266,grad_norm: 0.9999993661569789, iteration: 94362
loss: 1.1354477405548096,grad_norm: 0.9933434724338248, iteration: 94363
loss: 1.252327799797058,grad_norm: 0.999999471696527, iteration: 94364
loss: 1.182909607887268,grad_norm: 0.9999997659697223, iteration: 94365
loss: 1.0480949878692627,grad_norm: 0.9999994281274345, iteration: 94366
loss: 1.1615010499954224,grad_norm: 0.9999993197758011, iteration: 94367
loss: 1.2621607780456543,grad_norm: 0.9999995568331644, iteration: 94368
loss: 1.0654546022415161,grad_norm: 0.9999992881408478, iteration: 94369
loss: 1.2130846977233887,grad_norm: 0.9999999195262314, iteration: 94370
loss: 1.0975111722946167,grad_norm: 0.999999819582228, iteration: 94371
loss: 1.1261385679244995,grad_norm: 0.9999996612846439, iteration: 94372
loss: 1.1002895832061768,grad_norm: 0.9999994490272639, iteration: 94373
loss: 1.1049033403396606,grad_norm: 0.9999998524417224, iteration: 94374
loss: 1.0780948400497437,grad_norm: 0.9999997431106293, iteration: 94375
loss: 1.2656223773956299,grad_norm: 0.9999996515503704, iteration: 94376
loss: 1.0317654609680176,grad_norm: 0.9999992903892364, iteration: 94377
loss: 1.062953233718872,grad_norm: 0.9999992688553616, iteration: 94378
loss: 1.013911247253418,grad_norm: 0.9999990584682023, iteration: 94379
loss: 1.032052993774414,grad_norm: 0.9999993629815542, iteration: 94380
loss: 1.0952335596084595,grad_norm: 0.9999995634451058, iteration: 94381
loss: 1.1528784036636353,grad_norm: 0.9999993439585291, iteration: 94382
loss: 1.0476278066635132,grad_norm: 0.9999994368078688, iteration: 94383
loss: 1.1030681133270264,grad_norm: 0.9999996659923583, iteration: 94384
loss: 1.121131420135498,grad_norm: 0.9999991182999667, iteration: 94385
loss: 1.0797474384307861,grad_norm: 1.0000000663256985, iteration: 94386
loss: 1.0636271238327026,grad_norm: 0.9999998435188159, iteration: 94387
loss: 1.0909967422485352,grad_norm: 0.9999990505525751, iteration: 94388
loss: 1.221186637878418,grad_norm: 0.9999999215877203, iteration: 94389
loss: 1.0131117105484009,grad_norm: 0.9999995588604759, iteration: 94390
loss: 1.3056081533432007,grad_norm: 0.9999996688444197, iteration: 94391
loss: 1.183336615562439,grad_norm: 0.9999998555457003, iteration: 94392
loss: 1.104892373085022,grad_norm: 0.999999303259765, iteration: 94393
loss: 1.1538273096084595,grad_norm: 0.9999991716882173, iteration: 94394
loss: 1.157475233078003,grad_norm: 0.9999996194814509, iteration: 94395
loss: 1.0244377851486206,grad_norm: 0.9999992593785564, iteration: 94396
loss: 1.0752453804016113,grad_norm: 0.9999992775248926, iteration: 94397
loss: 1.1086403131484985,grad_norm: 0.9999995098422313, iteration: 94398
loss: 1.1544663906097412,grad_norm: 0.9999995859869999, iteration: 94399
loss: 1.1486083269119263,grad_norm: 0.9999991589885016, iteration: 94400
loss: 1.0183008909225464,grad_norm: 0.9999991541870067, iteration: 94401
loss: 1.0024456977844238,grad_norm: 0.9999994229355489, iteration: 94402
loss: 1.136204719543457,grad_norm: 0.9999990278726615, iteration: 94403
loss: 1.280066728591919,grad_norm: 0.9999996019386602, iteration: 94404
loss: 1.1958186626434326,grad_norm: 0.999999671753774, iteration: 94405
loss: 1.077886939048767,grad_norm: 0.999999441650869, iteration: 94406
loss: 1.1907647848129272,grad_norm: 0.999999220105559, iteration: 94407
loss: 1.2927881479263306,grad_norm: 0.9999998806820017, iteration: 94408
loss: 1.1369720697402954,grad_norm: 0.9999996506335064, iteration: 94409
loss: 1.0353925228118896,grad_norm: 0.9999999487691514, iteration: 94410
loss: 1.0663727521896362,grad_norm: 0.9999999826918742, iteration: 94411
loss: 1.0061061382293701,grad_norm: 0.9536418887040805, iteration: 94412
loss: 1.0719043016433716,grad_norm: 0.9999995926032489, iteration: 94413
loss: 1.104507327079773,grad_norm: 0.999999147894988, iteration: 94414
loss: 1.0498626232147217,grad_norm: 0.9999990125002717, iteration: 94415
loss: 1.0211538076400757,grad_norm: 0.9519135724767592, iteration: 94416
loss: 1.1764945983886719,grad_norm: 0.9999998887537633, iteration: 94417
loss: 1.0484791994094849,grad_norm: 0.9999998973459795, iteration: 94418
loss: 1.1653655767440796,grad_norm: 0.9999996467609963, iteration: 94419
loss: 1.0480777025222778,grad_norm: 0.9667463927486127, iteration: 94420
loss: 0.9713892340660095,grad_norm: 0.9689705577544844, iteration: 94421
loss: 1.1615222692489624,grad_norm: 0.9999999282026338, iteration: 94422
loss: 1.0731011629104614,grad_norm: 0.9284465670068357, iteration: 94423
loss: 1.0953309535980225,grad_norm: 0.9999993664424448, iteration: 94424
loss: 1.0946133136749268,grad_norm: 0.9999998623407678, iteration: 94425
loss: 1.168885350227356,grad_norm: 0.9304566800623524, iteration: 94426
loss: 1.0767024755477905,grad_norm: 0.9999992284275913, iteration: 94427
loss: 1.0661273002624512,grad_norm: 0.9994008153045263, iteration: 94428
loss: 1.0914318561553955,grad_norm: 0.9999992255785631, iteration: 94429
loss: 1.0843604803085327,grad_norm: 0.9999999299717329, iteration: 94430
loss: 1.0720109939575195,grad_norm: 0.9999998382683923, iteration: 94431
loss: 1.060950756072998,grad_norm: 0.99999985101639, iteration: 94432
loss: 1.0133048295974731,grad_norm: 0.9999992443474466, iteration: 94433
loss: 1.0895169973373413,grad_norm: 0.999999264021575, iteration: 94434
loss: 1.0199933052062988,grad_norm: 0.9999995636967851, iteration: 94435
loss: 1.316955804824829,grad_norm: 0.9999997200671386, iteration: 94436
loss: 1.1515662670135498,grad_norm: 0.9999998855635449, iteration: 94437
loss: 1.1167734861373901,grad_norm: 0.9999991457707331, iteration: 94438
loss: 1.014265537261963,grad_norm: 0.9999990650468246, iteration: 94439
loss: 1.154128909111023,grad_norm: 0.9999995779785577, iteration: 94440
loss: 1.0840333700180054,grad_norm: 1.0000000485516651, iteration: 94441
loss: 0.9641967415809631,grad_norm: 0.9587505258119431, iteration: 94442
loss: 1.062033772468567,grad_norm: 0.9999991873001985, iteration: 94443
loss: 1.0898343324661255,grad_norm: 0.9999999806127768, iteration: 94444
loss: 1.0158227682113647,grad_norm: 0.9999991159763166, iteration: 94445
loss: 1.0320526361465454,grad_norm: 0.9999997377216319, iteration: 94446
loss: 1.0454773902893066,grad_norm: 0.9999994729327082, iteration: 94447
loss: 1.0568463802337646,grad_norm: 0.9999993572160021, iteration: 94448
loss: 1.0394294261932373,grad_norm: 0.9999999050362649, iteration: 94449
loss: 1.0291026830673218,grad_norm: 0.9999990856528721, iteration: 94450
loss: 0.9928315877914429,grad_norm: 0.9999992493614455, iteration: 94451
loss: 1.177231788635254,grad_norm: 0.9999994456871661, iteration: 94452
loss: 0.9975486397743225,grad_norm: 0.9999998008056475, iteration: 94453
loss: 1.0288543701171875,grad_norm: 0.9999996345614616, iteration: 94454
loss: 1.0486072301864624,grad_norm: 0.999999717323419, iteration: 94455
loss: 1.1371052265167236,grad_norm: 0.9999995728599791, iteration: 94456
loss: 1.0821900367736816,grad_norm: 0.999999146765597, iteration: 94457
loss: 1.1007895469665527,grad_norm: 0.9999992449687917, iteration: 94458
loss: 1.3192533254623413,grad_norm: 0.9999995643157018, iteration: 94459
loss: 0.9902992248535156,grad_norm: 0.9999992154925162, iteration: 94460
loss: 1.0294203758239746,grad_norm: 0.9999991579159669, iteration: 94461
loss: 1.120839238166809,grad_norm: 0.9999996338957999, iteration: 94462
loss: 1.225563645362854,grad_norm: 0.9999998363191936, iteration: 94463
loss: 1.053067684173584,grad_norm: 0.9999999242016981, iteration: 94464
loss: 1.0422807931900024,grad_norm: 0.9986089017289805, iteration: 94465
loss: 1.001076340675354,grad_norm: 0.9999990023710872, iteration: 94466
loss: 1.1335736513137817,grad_norm: 0.9999992422011924, iteration: 94467
loss: 1.0854856967926025,grad_norm: 0.9999995889561507, iteration: 94468
loss: 1.0370564460754395,grad_norm: 0.9999989334227779, iteration: 94469
loss: 1.0642943382263184,grad_norm: 0.9999993998774437, iteration: 94470
loss: 1.026627779006958,grad_norm: 0.9999999328354885, iteration: 94471
loss: 1.0484598875045776,grad_norm: 0.9999992574043618, iteration: 94472
loss: 1.0977377891540527,grad_norm: 0.9999994140065075, iteration: 94473
loss: 1.1445586681365967,grad_norm: 0.9999997235899303, iteration: 94474
loss: 1.1524149179458618,grad_norm: 0.9999995661375458, iteration: 94475
loss: 1.050743818283081,grad_norm: 0.9999996669306663, iteration: 94476
loss: 1.054885745048523,grad_norm: 0.9999998161653013, iteration: 94477
loss: 1.2583431005477905,grad_norm: 0.9999998837567248, iteration: 94478
loss: 1.16682767868042,grad_norm: 0.9999998902256376, iteration: 94479
loss: 1.1300634145736694,grad_norm: 0.9999993119736368, iteration: 94480
loss: 1.0119937658309937,grad_norm: 0.9999989988500774, iteration: 94481
loss: 1.0490411520004272,grad_norm: 0.9999998323549188, iteration: 94482
loss: 1.0035626888275146,grad_norm: 0.9999994867769041, iteration: 94483
loss: 1.0425353050231934,grad_norm: 0.9999994344546047, iteration: 94484
loss: 1.08528733253479,grad_norm: 0.9999999860049984, iteration: 94485
loss: 1.0542652606964111,grad_norm: 0.9999997555920115, iteration: 94486
loss: 1.1084834337234497,grad_norm: 0.9999990670408826, iteration: 94487
loss: 1.0554735660552979,grad_norm: 0.9999993596665251, iteration: 94488
loss: 1.0679383277893066,grad_norm: 0.9999994747915338, iteration: 94489
loss: 0.984438419342041,grad_norm: 0.99999903656825, iteration: 94490
loss: 1.0002423524856567,grad_norm: 0.9999993570225232, iteration: 94491
loss: 1.1067612171173096,grad_norm: 0.9999996969926086, iteration: 94492
loss: 1.048364281654358,grad_norm: 0.9999998096890459, iteration: 94493
loss: 1.0534471273422241,grad_norm: 0.9999998728969866, iteration: 94494
loss: 1.001452088356018,grad_norm: 0.999999234382907, iteration: 94495
loss: 0.9746493697166443,grad_norm: 0.999999352413354, iteration: 94496
loss: 1.03267240524292,grad_norm: 0.8630089968855011, iteration: 94497
loss: 1.1475697755813599,grad_norm: 0.9999994619972359, iteration: 94498
loss: 1.53331458568573,grad_norm: 0.9999997962975415, iteration: 94499
loss: 1.0461941957473755,grad_norm: 0.9999992990572167, iteration: 94500
loss: 1.082358479499817,grad_norm: 0.9999998939845723, iteration: 94501
loss: 1.0336370468139648,grad_norm: 0.9999995049140737, iteration: 94502
loss: 1.0611991882324219,grad_norm: 0.999999148134852, iteration: 94503
loss: 1.0093994140625,grad_norm: 0.9999993483218806, iteration: 94504
loss: 1.0283607244491577,grad_norm: 0.8268532667456141, iteration: 94505
loss: 1.0473178625106812,grad_norm: 0.9999989652592007, iteration: 94506
loss: 1.1641054153442383,grad_norm: 0.9999997270935933, iteration: 94507
loss: 1.0369926691055298,grad_norm: 0.999999307936722, iteration: 94508
loss: 1.074802041053772,grad_norm: 0.9999998961950216, iteration: 94509
loss: 1.0314836502075195,grad_norm: 0.9999992141077964, iteration: 94510
loss: 1.055121898651123,grad_norm: 0.9999993283445344, iteration: 94511
loss: 1.2207790613174438,grad_norm: 0.999999297292108, iteration: 94512
loss: 1.0617352724075317,grad_norm: 0.9999997659765073, iteration: 94513
loss: 1.1034972667694092,grad_norm: 0.9999995561155308, iteration: 94514
loss: 1.3192580938339233,grad_norm: 0.9999997434827, iteration: 94515
loss: 1.0000337362289429,grad_norm: 0.9999993983279473, iteration: 94516
loss: 1.0004440546035767,grad_norm: 0.8187274748311015, iteration: 94517
loss: 1.0180333852767944,grad_norm: 0.999999108373132, iteration: 94518
loss: 1.1998528242111206,grad_norm: 0.9999999048488113, iteration: 94519
loss: 1.0653069019317627,grad_norm: 0.9999991136304535, iteration: 94520
loss: 1.0754917860031128,grad_norm: 0.9999995992393161, iteration: 94521
loss: 1.0262516736984253,grad_norm: 0.9999993049573994, iteration: 94522
loss: 1.0544472932815552,grad_norm: 0.9999995991781726, iteration: 94523
loss: 1.089423418045044,grad_norm: 0.9999992119905896, iteration: 94524
loss: 0.9646955132484436,grad_norm: 0.8745330259467105, iteration: 94525
loss: 1.0597721338272095,grad_norm: 0.996630832345736, iteration: 94526
loss: 1.0783840417861938,grad_norm: 0.9999993002415478, iteration: 94527
loss: 1.1651972532272339,grad_norm: 0.9999992427255189, iteration: 94528
loss: 1.1326828002929688,grad_norm: 0.999999225617698, iteration: 94529
loss: 1.0211468935012817,grad_norm: 0.7752161575660989, iteration: 94530
loss: 1.14255690574646,grad_norm: 0.9999998635035522, iteration: 94531
loss: 0.9548296928405762,grad_norm: 0.9975427277035658, iteration: 94532
loss: 1.0451668500900269,grad_norm: 0.8673815479745898, iteration: 94533
loss: 0.973267674446106,grad_norm: 0.9423287361880526, iteration: 94534
loss: 1.021072506904602,grad_norm: 0.9999990467435371, iteration: 94535
loss: 1.1210315227508545,grad_norm: 0.9999995222547277, iteration: 94536
loss: 1.2434464693069458,grad_norm: 0.9999992682637965, iteration: 94537
loss: 0.9622671008110046,grad_norm: 0.9718451767172993, iteration: 94538
loss: 1.232089638710022,grad_norm: 0.9999999013646614, iteration: 94539
loss: 1.0355327129364014,grad_norm: 0.8850286640541787, iteration: 94540
loss: 1.0559412240982056,grad_norm: 0.9999996119661035, iteration: 94541
loss: 1.0429301261901855,grad_norm: 0.9999993793877854, iteration: 94542
loss: 1.0880324840545654,grad_norm: 0.9999993175161855, iteration: 94543
loss: 1.137142300605774,grad_norm: 0.9999999094977113, iteration: 94544
loss: 1.0412495136260986,grad_norm: 0.9999993305586428, iteration: 94545
loss: 0.9928207397460938,grad_norm: 0.9535762546276878, iteration: 94546
loss: 1.065801739692688,grad_norm: 0.9750799728521503, iteration: 94547
loss: 1.2380540370941162,grad_norm: 0.9999997841560482, iteration: 94548
loss: 1.061681866645813,grad_norm: 0.9999993790792868, iteration: 94549
loss: 1.0142202377319336,grad_norm: 0.9999991923186243, iteration: 94550
loss: 1.2029122114181519,grad_norm: 0.9999993337563667, iteration: 94551
loss: 1.0895801782608032,grad_norm: 0.9999991644372133, iteration: 94552
loss: 1.0648136138916016,grad_norm: 0.9999998959125618, iteration: 94553
loss: 1.0097379684448242,grad_norm: 0.95918297895604, iteration: 94554
loss: 1.0493433475494385,grad_norm: 0.9999991391195745, iteration: 94555
loss: 1.0382049083709717,grad_norm: 0.9999995706259401, iteration: 94556
loss: 0.9860410094261169,grad_norm: 0.8609761841832905, iteration: 94557
loss: 1.0091438293457031,grad_norm: 0.8350100588079168, iteration: 94558
loss: 0.9936604499816895,grad_norm: 0.9778351152555755, iteration: 94559
loss: 1.0278198719024658,grad_norm: 0.9999998617766896, iteration: 94560
loss: 1.0581010580062866,grad_norm: 0.9999997295801584, iteration: 94561
loss: 1.056841254234314,grad_norm: 0.9999998314744961, iteration: 94562
loss: 0.9983870387077332,grad_norm: 0.9947689042786233, iteration: 94563
loss: 1.0476655960083008,grad_norm: 0.9999990383735634, iteration: 94564
loss: 1.120132327079773,grad_norm: 0.9999998876276582, iteration: 94565
loss: 1.029922604560852,grad_norm: 0.9999991167432843, iteration: 94566
loss: 0.9991265535354614,grad_norm: 0.9999995797534, iteration: 94567
loss: 1.095767617225647,grad_norm: 0.9999996667695182, iteration: 94568
loss: 1.0587577819824219,grad_norm: 0.9999996295823667, iteration: 94569
loss: 0.9974062442779541,grad_norm: 0.8922206571200963, iteration: 94570
loss: 0.9928674697875977,grad_norm: 0.9999992335080113, iteration: 94571
loss: 1.0056761503219604,grad_norm: 0.9999990584734862, iteration: 94572
loss: 1.0005817413330078,grad_norm: 0.9999989404227856, iteration: 94573
loss: 1.019027590751648,grad_norm: 0.8466230470139325, iteration: 94574
loss: 1.007993459701538,grad_norm: 0.9999995438444761, iteration: 94575
loss: 0.982643723487854,grad_norm: 0.9234865439607002, iteration: 94576
loss: 1.0784205198287964,grad_norm: 0.999999870776633, iteration: 94577
loss: 1.1391617059707642,grad_norm: 0.9999999789638587, iteration: 94578
loss: 1.0615308284759521,grad_norm: 0.9999993125394093, iteration: 94579
loss: 1.1229863166809082,grad_norm: 0.9999993927309165, iteration: 94580
loss: 0.9992952346801758,grad_norm: 0.9999990409523328, iteration: 94581
loss: 1.041914463043213,grad_norm: 0.8239068244340818, iteration: 94582
loss: 1.0397977828979492,grad_norm: 0.9999999426944446, iteration: 94583
loss: 1.0749573707580566,grad_norm: 0.9719899379291856, iteration: 94584
loss: 1.001160979270935,grad_norm: 0.9593263930302913, iteration: 94585
loss: 1.051725149154663,grad_norm: 0.9750024624062849, iteration: 94586
loss: 1.0471128225326538,grad_norm: 0.9999999496422192, iteration: 94587
loss: 1.0082120895385742,grad_norm: 0.9999989449754623, iteration: 94588
loss: 1.0461326837539673,grad_norm: 0.9554518066680617, iteration: 94589
loss: 0.9937230944633484,grad_norm: 0.985501455014509, iteration: 94590
loss: 1.0591894388198853,grad_norm: 1.000000073701454, iteration: 94591
loss: 1.115090012550354,grad_norm: 0.9999995283499772, iteration: 94592
loss: 1.0827707052230835,grad_norm: 0.9999998645023261, iteration: 94593
loss: 1.0523295402526855,grad_norm: 0.9999996953395184, iteration: 94594
loss: 1.2320399284362793,grad_norm: 0.9999998029641165, iteration: 94595
loss: 1.0101999044418335,grad_norm: 0.9999998152162102, iteration: 94596
loss: 0.9659755825996399,grad_norm: 0.9619813087505803, iteration: 94597
loss: 1.2618170976638794,grad_norm: 0.9999997239833688, iteration: 94598
loss: 1.114147663116455,grad_norm: 0.9999997742911823, iteration: 94599
loss: 1.0331391096115112,grad_norm: 0.9999998269139867, iteration: 94600
loss: 0.9996835589408875,grad_norm: 0.999999409809537, iteration: 94601
loss: 1.0432045459747314,grad_norm: 0.9999998525088449, iteration: 94602
loss: 1.154831886291504,grad_norm: 0.9999998872594665, iteration: 94603
loss: 1.0633710622787476,grad_norm: 0.9999998000217802, iteration: 94604
loss: 0.9727450013160706,grad_norm: 0.8853287839937946, iteration: 94605
loss: 0.9821406006813049,grad_norm: 0.9999994986792295, iteration: 94606
loss: 1.1930471658706665,grad_norm: 0.9999992383041096, iteration: 94607
loss: 1.0700016021728516,grad_norm: 0.9999990449182339, iteration: 94608
loss: 1.138046145439148,grad_norm: 0.9999992720731464, iteration: 94609
loss: 0.9739326238632202,grad_norm: 0.9210332765970851, iteration: 94610
loss: 1.0400147438049316,grad_norm: 0.9999992887438762, iteration: 94611
loss: 1.0310136079788208,grad_norm: 0.9862708576114556, iteration: 94612
loss: 1.0761349201202393,grad_norm: 0.9999997461526758, iteration: 94613
loss: 1.023300051689148,grad_norm: 0.9999992052492639, iteration: 94614
loss: 1.1160156726837158,grad_norm: 0.9999998370664952, iteration: 94615
loss: 1.0110585689544678,grad_norm: 0.9999990254486372, iteration: 94616
loss: 1.0185391902923584,grad_norm: 0.9999998356229909, iteration: 94617
loss: 1.0833698511123657,grad_norm: 0.9999992038711941, iteration: 94618
loss: 1.0526540279388428,grad_norm: 0.9999994777970128, iteration: 94619
loss: 1.1924128532409668,grad_norm: 0.9999995699449274, iteration: 94620
loss: 1.0326193571090698,grad_norm: 0.9999991431358987, iteration: 94621
loss: 1.0532002449035645,grad_norm: 0.9999997271730733, iteration: 94622
loss: 1.0940614938735962,grad_norm: 0.9999999628036267, iteration: 94623
loss: 1.1675814390182495,grad_norm: 0.9999998782348293, iteration: 94624
loss: 1.1077282428741455,grad_norm: 0.9999998880968126, iteration: 94625
loss: 1.0252408981323242,grad_norm: 0.9999994051240649, iteration: 94626
loss: 0.9820288419723511,grad_norm: 0.8002140244771704, iteration: 94627
loss: 1.0644795894622803,grad_norm: 0.9999990855112176, iteration: 94628
loss: 1.0009771585464478,grad_norm: 0.9999992043403277, iteration: 94629
loss: 1.244796633720398,grad_norm: 0.9999997661146421, iteration: 94630
loss: 1.3552266359329224,grad_norm: 1.0000000349037683, iteration: 94631
loss: 1.0182960033416748,grad_norm: 0.9102347412212306, iteration: 94632
loss: 1.0102105140686035,grad_norm: 0.9999992998588391, iteration: 94633
loss: 1.0633565187454224,grad_norm: 0.9999998189003374, iteration: 94634
loss: 1.0630040168762207,grad_norm: 0.9999992261218877, iteration: 94635
loss: 1.0233491659164429,grad_norm: 0.9492894714183386, iteration: 94636
loss: 1.1134699583053589,grad_norm: 0.9235810706445874, iteration: 94637
loss: 1.0359134674072266,grad_norm: 0.9999997365890378, iteration: 94638
loss: 1.0638115406036377,grad_norm: 0.7809903422294144, iteration: 94639
loss: 1.129966378211975,grad_norm: 0.9999994478962615, iteration: 94640
loss: 1.0418564081192017,grad_norm: 0.9999994720837033, iteration: 94641
loss: 0.9981721639633179,grad_norm: 0.9999998279861845, iteration: 94642
loss: 1.018819808959961,grad_norm: 0.8916776517422621, iteration: 94643
loss: 1.271798014640808,grad_norm: 0.999999558964148, iteration: 94644
loss: 1.0441819429397583,grad_norm: 0.8666906002917419, iteration: 94645
loss: 1.2548458576202393,grad_norm: 0.9999995374915212, iteration: 94646
loss: 1.0096033811569214,grad_norm: 0.8780189880501504, iteration: 94647
loss: 1.2916960716247559,grad_norm: 0.999999862029083, iteration: 94648
loss: 1.1656075716018677,grad_norm: 0.9817124932626402, iteration: 94649
loss: 1.1662591695785522,grad_norm: 0.9999991548957988, iteration: 94650
loss: 1.220984697341919,grad_norm: 0.9999999668097518, iteration: 94651
loss: 1.1485373973846436,grad_norm: 0.9999998173510902, iteration: 94652
loss: 1.2467073202133179,grad_norm: 0.9999998874327846, iteration: 94653
loss: 1.14810049533844,grad_norm: 0.9999993193159976, iteration: 94654
loss: 1.1097984313964844,grad_norm: 0.9999999580011028, iteration: 94655
loss: 1.126175045967102,grad_norm: 0.9999995882687999, iteration: 94656
loss: 0.9470648169517517,grad_norm: 0.9999991979800669, iteration: 94657
loss: 1.0722016096115112,grad_norm: 0.7958429889273432, iteration: 94658
loss: 1.0827072858810425,grad_norm: 0.9999993389098185, iteration: 94659
loss: 1.1422853469848633,grad_norm: 0.9255280200298471, iteration: 94660
loss: 1.075559377670288,grad_norm: 0.9493776213342576, iteration: 94661
loss: 1.036063313484192,grad_norm: 0.9999991602151279, iteration: 94662
loss: 1.2859904766082764,grad_norm: 0.9999996222115706, iteration: 94663
loss: 1.1130454540252686,grad_norm: 0.9999996412711067, iteration: 94664
loss: 1.0933250188827515,grad_norm: 0.999999595543593, iteration: 94665
loss: 1.032820701599121,grad_norm: 0.9999992481843845, iteration: 94666
loss: 1.2861266136169434,grad_norm: 0.9999994782094457, iteration: 94667
loss: 1.1057013273239136,grad_norm: 0.9999991242352806, iteration: 94668
loss: 1.0385549068450928,grad_norm: 0.9554498754128603, iteration: 94669
loss: 1.0866307020187378,grad_norm: 0.9655588022755522, iteration: 94670
loss: 1.0893421173095703,grad_norm: 0.9999999352529012, iteration: 94671
loss: 1.0592385530471802,grad_norm: 0.999999064419765, iteration: 94672
loss: 1.0131779909133911,grad_norm: 0.9999994583813315, iteration: 94673
loss: 1.1294499635696411,grad_norm: 0.9999998734028654, iteration: 94674
loss: 1.0266574621200562,grad_norm: 0.999999077773761, iteration: 94675
loss: 1.1470118761062622,grad_norm: 0.9999998446310316, iteration: 94676
loss: 1.1275709867477417,grad_norm: 0.9999991012377722, iteration: 94677
loss: 1.105084776878357,grad_norm: 0.9999997904810587, iteration: 94678
loss: 0.9654940962791443,grad_norm: 0.9999990932873128, iteration: 94679
loss: 1.0269743204116821,grad_norm: 0.8396342131567215, iteration: 94680
loss: 0.9968539476394653,grad_norm: 0.9367586853613099, iteration: 94681
loss: 1.1369352340698242,grad_norm: 0.9999999159693488, iteration: 94682
loss: 1.0624226331710815,grad_norm: 0.8819904081695561, iteration: 94683
loss: 1.0303435325622559,grad_norm: 0.9999997493754651, iteration: 94684
loss: 1.0066646337509155,grad_norm: 0.9661710575996203, iteration: 94685
loss: 1.141215443611145,grad_norm: 0.9999996170983126, iteration: 94686
loss: 1.0096441507339478,grad_norm: 0.9224006024471558, iteration: 94687
loss: 1.2380659580230713,grad_norm: 0.9999997366419378, iteration: 94688
loss: 1.1021426916122437,grad_norm: 0.9999995217392833, iteration: 94689
loss: 1.1405372619628906,grad_norm: 0.9999996836050679, iteration: 94690
loss: 1.1276564598083496,grad_norm: 0.9999993711889265, iteration: 94691
loss: 1.229848861694336,grad_norm: 1.0000000378245424, iteration: 94692
loss: 1.162093162536621,grad_norm: 0.9999996512671769, iteration: 94693
loss: 1.1683608293533325,grad_norm: 0.9999997978963477, iteration: 94694
loss: 1.0758922100067139,grad_norm: 0.9999992161763289, iteration: 94695
loss: 1.460294246673584,grad_norm: 0.9999999549774244, iteration: 94696
loss: 1.0846556425094604,grad_norm: 0.9999996619688277, iteration: 94697
loss: 1.5668630599975586,grad_norm: 0.9999995570542795, iteration: 94698
loss: 1.3381462097167969,grad_norm: 0.9999994135770446, iteration: 94699
loss: 1.001644253730774,grad_norm: 0.987164154102954, iteration: 94700
loss: 1.087868094444275,grad_norm: 0.9999993013948028, iteration: 94701
loss: 1.051632046699524,grad_norm: 0.9999994645034731, iteration: 94702
loss: 1.0734704732894897,grad_norm: 0.9999989091698867, iteration: 94703
loss: 1.027942180633545,grad_norm: 0.9999992509468838, iteration: 94704
loss: 1.1145094633102417,grad_norm: 0.9999995920894231, iteration: 94705
loss: 1.0242748260498047,grad_norm: 0.9999992481339329, iteration: 94706
loss: 1.0080658197402954,grad_norm: 0.9563935821695674, iteration: 94707
loss: 1.0492987632751465,grad_norm: 0.964138591726394, iteration: 94708
loss: 1.0842260122299194,grad_norm: 0.9999991262755452, iteration: 94709
loss: 1.1348049640655518,grad_norm: 0.9999997080112818, iteration: 94710
loss: 1.161765456199646,grad_norm: 0.999999788803423, iteration: 94711
loss: 1.1837177276611328,grad_norm: 0.9999995426475806, iteration: 94712
loss: 0.9953755140304565,grad_norm: 0.9999991427314351, iteration: 94713
loss: 1.0308659076690674,grad_norm: 0.9202600437046747, iteration: 94714
loss: 1.0870616436004639,grad_norm: 0.9999997362839067, iteration: 94715
loss: 1.1107780933380127,grad_norm: 0.9999991305622867, iteration: 94716
loss: 1.029653787612915,grad_norm: 0.9999991654380218, iteration: 94717
loss: 1.0959413051605225,grad_norm: 0.9999996700289397, iteration: 94718
loss: 1.0687980651855469,grad_norm: 0.9999997582967962, iteration: 94719
loss: 1.1590313911437988,grad_norm: 0.9999998506284948, iteration: 94720
loss: 1.4117823839187622,grad_norm: 0.9999997847193817, iteration: 94721
loss: 1.062785029411316,grad_norm: 0.9999991296715798, iteration: 94722
loss: 1.0882378816604614,grad_norm: 0.9999991984284966, iteration: 94723
loss: 1.0415879487991333,grad_norm: 0.97548700287914, iteration: 94724
loss: 1.1041100025177002,grad_norm: 0.9999999548546168, iteration: 94725
loss: 1.062862515449524,grad_norm: 0.9804945761869706, iteration: 94726
loss: 1.0113227367401123,grad_norm: 0.9999991012508149, iteration: 94727
loss: 1.0642260313034058,grad_norm: 0.97007929489242, iteration: 94728
loss: 1.0241737365722656,grad_norm: 0.9999999898272539, iteration: 94729
loss: 1.0227144956588745,grad_norm: 0.9999998137528584, iteration: 94730
loss: 1.035372018814087,grad_norm: 0.999999143227538, iteration: 94731
loss: 1.0305641889572144,grad_norm: 0.9999989847615985, iteration: 94732
loss: 1.0115940570831299,grad_norm: 0.8860178248919255, iteration: 94733
loss: 1.0386914014816284,grad_norm: 0.9999999892707371, iteration: 94734
loss: 1.0228490829467773,grad_norm: 0.8712167677948979, iteration: 94735
loss: 1.0561288595199585,grad_norm: 0.9999997004813809, iteration: 94736
loss: 1.1944456100463867,grad_norm: 0.9999995510282329, iteration: 94737
loss: 1.0375971794128418,grad_norm: 0.9999994147260052, iteration: 94738
loss: 0.9994426965713501,grad_norm: 0.9999994687621375, iteration: 94739
loss: 1.005685567855835,grad_norm: 0.9999991228816774, iteration: 94740
loss: 0.9975757002830505,grad_norm: 0.9999993065597931, iteration: 94741
loss: 1.0608044862747192,grad_norm: 0.9999999576540551, iteration: 94742
loss: 1.1530834436416626,grad_norm: 0.9999991676873008, iteration: 94743
loss: 1.0821136236190796,grad_norm: 0.9999994248653, iteration: 94744
loss: 1.0129035711288452,grad_norm: 0.9999994931771565, iteration: 94745
loss: 1.3555057048797607,grad_norm: 0.9999999163648079, iteration: 94746
loss: 1.234189510345459,grad_norm: 0.9999997815015668, iteration: 94747
loss: 1.304682970046997,grad_norm: 0.9999993790780926, iteration: 94748
loss: 1.0370066165924072,grad_norm: 0.9725749134349924, iteration: 94749
loss: 1.0551085472106934,grad_norm: 0.999999270998607, iteration: 94750
loss: 0.9980764985084534,grad_norm: 0.9999991605711939, iteration: 94751
loss: 1.047586441040039,grad_norm: 0.9999992481348021, iteration: 94752
loss: 1.0382157564163208,grad_norm: 0.9999998018824443, iteration: 94753
loss: 0.9630765318870544,grad_norm: 0.9999991500370411, iteration: 94754
loss: 1.0630682706832886,grad_norm: 0.9999994683150113, iteration: 94755
loss: 1.3662383556365967,grad_norm: 0.9999998550795622, iteration: 94756
loss: 1.0593903064727783,grad_norm: 0.9999998182759665, iteration: 94757
loss: 1.029836893081665,grad_norm: 0.9999991003881119, iteration: 94758
loss: 1.107600450515747,grad_norm: 0.9999997542282942, iteration: 94759
loss: 1.0679583549499512,grad_norm: 0.9999993310053245, iteration: 94760
loss: 1.0473741292953491,grad_norm: 0.9999991084688316, iteration: 94761
loss: 1.0688916444778442,grad_norm: 0.9999997770127702, iteration: 94762
loss: 1.0273696184158325,grad_norm: 0.9017384249009559, iteration: 94763
loss: 1.2896419763565063,grad_norm: 0.9999998703830838, iteration: 94764
loss: 1.2244060039520264,grad_norm: 0.9999999693050325, iteration: 94765
loss: 1.071825623512268,grad_norm: 0.9223393439122365, iteration: 94766
loss: 1.1879686117172241,grad_norm: 0.9999997363655876, iteration: 94767
loss: 1.0708178281784058,grad_norm: 0.9999990886225849, iteration: 94768
loss: 0.9782683849334717,grad_norm: 0.8889288426290355, iteration: 94769
loss: 1.1002541780471802,grad_norm: 0.9999997409710817, iteration: 94770
loss: 1.020524501800537,grad_norm: 0.9609156824008248, iteration: 94771
loss: 0.9455505013465881,grad_norm: 0.9999995059706507, iteration: 94772
loss: 0.988068163394928,grad_norm: 0.9999996333012131, iteration: 94773
loss: 1.0199041366577148,grad_norm: 0.7987788287551152, iteration: 94774
loss: 0.9464049935340881,grad_norm: 0.8254045210763173, iteration: 94775
loss: 0.9968287944793701,grad_norm: 0.9999995080599942, iteration: 94776
loss: 1.0761113166809082,grad_norm: 0.9999993018895788, iteration: 94777
loss: 1.0824792385101318,grad_norm: 0.9999997247712887, iteration: 94778
loss: 0.9879016876220703,grad_norm: 0.9999990207391977, iteration: 94779
loss: 1.097179889678955,grad_norm: 0.9348346001631481, iteration: 94780
loss: 1.1047369241714478,grad_norm: 0.9999991422939161, iteration: 94781
loss: 1.2015596628189087,grad_norm: 1.0000000147296768, iteration: 94782
loss: 1.1228646039962769,grad_norm: 0.9999996989952733, iteration: 94783
loss: 1.1492505073547363,grad_norm: 0.9999995256403001, iteration: 94784
loss: 1.0060429573059082,grad_norm: 0.8435160594594188, iteration: 94785
loss: 1.067115068435669,grad_norm: 0.9999992063092606, iteration: 94786
loss: 1.0892399549484253,grad_norm: 0.9999995474448427, iteration: 94787
loss: 1.001732587814331,grad_norm: 0.9189581430756614, iteration: 94788
loss: 1.0151968002319336,grad_norm: 0.9999995525599641, iteration: 94789
loss: 1.038807988166809,grad_norm: 0.9999996887248919, iteration: 94790
loss: 1.0209298133850098,grad_norm: 0.999999873316261, iteration: 94791
loss: 1.0169637203216553,grad_norm: 0.8821987272670058, iteration: 94792
loss: 1.059563159942627,grad_norm: 0.9999993958255063, iteration: 94793
loss: 1.0134028196334839,grad_norm: 0.8823054351528616, iteration: 94794
loss: 1.0801116228103638,grad_norm: 0.9999997367546057, iteration: 94795
loss: 1.008330225944519,grad_norm: 0.9999990343684267, iteration: 94796
loss: 0.993358850479126,grad_norm: 0.8560090075445326, iteration: 94797
loss: 1.072139859199524,grad_norm: 0.9999998786675786, iteration: 94798
loss: 0.9768851399421692,grad_norm: 0.9242759034185731, iteration: 94799
loss: 1.119515299797058,grad_norm: 0.9999992566914425, iteration: 94800
loss: 0.9875764846801758,grad_norm: 0.7825113031581717, iteration: 94801
loss: 0.9826112985610962,grad_norm: 0.9999990998420819, iteration: 94802
loss: 1.0296217203140259,grad_norm: 0.9999991746763941, iteration: 94803
loss: 1.052232265472412,grad_norm: 0.9832774207460052, iteration: 94804
loss: 1.0172981023788452,grad_norm: 0.9999993662305543, iteration: 94805
loss: 1.013039231300354,grad_norm: 0.9999997638814797, iteration: 94806
loss: 1.0314078330993652,grad_norm: 0.9999994361762937, iteration: 94807
loss: 1.0025452375411987,grad_norm: 0.9999993005951445, iteration: 94808
loss: 1.0610798597335815,grad_norm: 0.999999531876203, iteration: 94809
loss: 1.030263900756836,grad_norm: 0.8268792147740113, iteration: 94810
loss: 0.9978814125061035,grad_norm: 0.9443524829122754, iteration: 94811
loss: 1.0477027893066406,grad_norm: 0.9999999416165214, iteration: 94812
loss: 1.0471810102462769,grad_norm: 0.9999996169741957, iteration: 94813
loss: 1.0639222860336304,grad_norm: 0.9999992145884978, iteration: 94814
loss: 1.078942894935608,grad_norm: 0.9999993305390814, iteration: 94815
loss: 1.041896939277649,grad_norm: 0.9999998628083033, iteration: 94816
loss: 1.0090967416763306,grad_norm: 0.9999991974111062, iteration: 94817
loss: 1.0623551607131958,grad_norm: 0.9999990436538027, iteration: 94818
loss: 1.0868126153945923,grad_norm: 0.9999995413518684, iteration: 94819
loss: 1.0502904653549194,grad_norm: 0.9999992923421279, iteration: 94820
loss: 1.034381628036499,grad_norm: 0.9335743356025916, iteration: 94821
loss: 1.0627577304840088,grad_norm: 0.9999991918717805, iteration: 94822
loss: 0.9962885975837708,grad_norm: 0.9999992772500526, iteration: 94823
loss: 1.0451186895370483,grad_norm: 0.9999991014124006, iteration: 94824
loss: 1.0698689222335815,grad_norm: 0.9999998388295516, iteration: 94825
loss: 1.0297679901123047,grad_norm: 0.9999994804419339, iteration: 94826
loss: 1.1442278623580933,grad_norm: 0.9999997501240784, iteration: 94827
loss: 1.0313149690628052,grad_norm: 0.999999260821391, iteration: 94828
loss: 1.1891827583312988,grad_norm: 0.999999498340061, iteration: 94829
loss: 1.120779037475586,grad_norm: 0.999999209075229, iteration: 94830
loss: 1.081311821937561,grad_norm: 0.9999994692680799, iteration: 94831
loss: 1.0602697134017944,grad_norm: 0.999999181522803, iteration: 94832
loss: 1.1014591455459595,grad_norm: 0.999999255339538, iteration: 94833
loss: 0.9992967844009399,grad_norm: 0.9343155984299056, iteration: 94834
loss: 0.9963376522064209,grad_norm: 0.9229128625823235, iteration: 94835
loss: 0.993442952632904,grad_norm: 0.9380535812450526, iteration: 94836
loss: 0.9927716851234436,grad_norm: 0.9293868404868607, iteration: 94837
loss: 1.1340701580047607,grad_norm: 0.9999992438804326, iteration: 94838
loss: 1.0069358348846436,grad_norm: 0.9016275914721676, iteration: 94839
loss: 1.0940642356872559,grad_norm: 0.9999991294388804, iteration: 94840
loss: 1.0138235092163086,grad_norm: 0.9236793179977252, iteration: 94841
loss: 1.0198997259140015,grad_norm: 0.8822684054206469, iteration: 94842
loss: 1.111379861831665,grad_norm: 0.9999997005255904, iteration: 94843
loss: 1.0325261354446411,grad_norm: 0.9999995365961942, iteration: 94844
loss: 0.9960227012634277,grad_norm: 0.863817392603405, iteration: 94845
loss: 1.0202341079711914,grad_norm: 0.948256577362385, iteration: 94846
loss: 0.9760426878929138,grad_norm: 0.9999992171538595, iteration: 94847
loss: 1.116574764251709,grad_norm: 0.9999996652005461, iteration: 94848
loss: 1.1180651187896729,grad_norm: 0.9999996766852325, iteration: 94849
loss: 1.0323821306228638,grad_norm: 0.9706563922843193, iteration: 94850
loss: 1.0229569673538208,grad_norm: 0.9152794755073328, iteration: 94851
loss: 1.0209773778915405,grad_norm: 0.9999998278283843, iteration: 94852
loss: 1.005678653717041,grad_norm: 0.8748635383492303, iteration: 94853
loss: 1.1180530786514282,grad_norm: 0.9999998173302662, iteration: 94854
loss: 1.1105382442474365,grad_norm: 0.9999997440281193, iteration: 94855
loss: 1.0854724645614624,grad_norm: 0.9999993406387389, iteration: 94856
loss: 1.0713833570480347,grad_norm: 0.9999992390178271, iteration: 94857
loss: 1.0609767436981201,grad_norm: 0.9999990883997562, iteration: 94858
loss: 1.0443240404129028,grad_norm: 0.8189282649531667, iteration: 94859
loss: 0.9897260069847107,grad_norm: 0.9561285737392439, iteration: 94860
loss: 0.9994757771492004,grad_norm: 0.999999584508752, iteration: 94861
loss: 1.0060396194458008,grad_norm: 0.8812734622307656, iteration: 94862
loss: 1.047562599182129,grad_norm: 0.9930160101664719, iteration: 94863
loss: 1.0126268863677979,grad_norm: 0.9800588633964661, iteration: 94864
loss: 0.9837433099746704,grad_norm: 0.8694666470030534, iteration: 94865
loss: 1.1528167724609375,grad_norm: 0.9880319502722777, iteration: 94866
loss: 1.0376039743423462,grad_norm: 0.9999998778806647, iteration: 94867
loss: 1.0129035711288452,grad_norm: 0.9999995334680313, iteration: 94868
loss: 1.0085073709487915,grad_norm: 0.8162399924349045, iteration: 94869
loss: 1.0659706592559814,grad_norm: 0.9999991190677257, iteration: 94870
loss: 0.9702434539794922,grad_norm: 0.9088722469376341, iteration: 94871
loss: 1.0503183603286743,grad_norm: 0.9999994351534668, iteration: 94872
loss: 1.0480132102966309,grad_norm: 0.9999992647994561, iteration: 94873
loss: 1.0586624145507812,grad_norm: 0.9999994646143378, iteration: 94874
loss: 0.9789639711380005,grad_norm: 0.9999995549808931, iteration: 94875
loss: 1.010518193244934,grad_norm: 0.8726844383655125, iteration: 94876
loss: 1.0290085077285767,grad_norm: 0.8972528898214024, iteration: 94877
loss: 1.0434398651123047,grad_norm: 0.999999467397602, iteration: 94878
loss: 1.2059415578842163,grad_norm: 0.9999996913342212, iteration: 94879
loss: 0.9852116703987122,grad_norm: 0.787469224495466, iteration: 94880
loss: 1.0831364393234253,grad_norm: 0.9999991285285555, iteration: 94881
loss: 1.0507938861846924,grad_norm: 0.9999991097468971, iteration: 94882
loss: 1.0279935598373413,grad_norm: 0.9999997131048954, iteration: 94883
loss: 0.9888994097709656,grad_norm: 0.9999989716573128, iteration: 94884
loss: 0.9776136875152588,grad_norm: 0.8383627027278489, iteration: 94885
loss: 0.9943861365318298,grad_norm: 0.7565903753198462, iteration: 94886
loss: 1.0392072200775146,grad_norm: 0.8136830041375553, iteration: 94887
loss: 0.9565354585647583,grad_norm: 0.9999995332232235, iteration: 94888
loss: 1.087897539138794,grad_norm: 0.9999995540765289, iteration: 94889
loss: 0.9824860095977783,grad_norm: 0.9471115794882726, iteration: 94890
loss: 1.0100620985031128,grad_norm: 0.9999997982347467, iteration: 94891
loss: 1.003421425819397,grad_norm: 0.9999990641622464, iteration: 94892
loss: 1.0002104043960571,grad_norm: 0.8629083126711729, iteration: 94893
loss: 1.0271852016448975,grad_norm: 0.9999994507158536, iteration: 94894
loss: 1.047965407371521,grad_norm: 0.8830697560255154, iteration: 94895
loss: 1.0325630903244019,grad_norm: 0.9999991168963048, iteration: 94896
loss: 1.1013996601104736,grad_norm: 0.9999992219384153, iteration: 94897
loss: 1.0798598527908325,grad_norm: 0.8703702694757193, iteration: 94898
loss: 1.0232548713684082,grad_norm: 0.9999992101274676, iteration: 94899
loss: 1.0515590906143188,grad_norm: 0.9999995063357152, iteration: 94900
loss: 1.0095587968826294,grad_norm: 0.9999990444692263, iteration: 94901
loss: 1.2692691087722778,grad_norm: 0.9999998204829078, iteration: 94902
loss: 1.0259430408477783,grad_norm: 0.9999991714320285, iteration: 94903
loss: 0.9871746897697449,grad_norm: 0.9398342176407434, iteration: 94904
loss: 1.174728512763977,grad_norm: 0.9999992464892193, iteration: 94905
loss: 1.0895172357559204,grad_norm: 0.9999993267251224, iteration: 94906
loss: 1.051803708076477,grad_norm: 0.9999992380169295, iteration: 94907
loss: 0.9865654110908508,grad_norm: 0.9999990997729045, iteration: 94908
loss: 0.9675853848457336,grad_norm: 0.9999990482130566, iteration: 94909
loss: 1.0203059911727905,grad_norm: 0.999999284533061, iteration: 94910
loss: 1.1087915897369385,grad_norm: 0.9999993557324672, iteration: 94911
loss: 1.020714521408081,grad_norm: 0.9940971943961974, iteration: 94912
loss: 1.0069122314453125,grad_norm: 0.8679836269544573, iteration: 94913
loss: 1.056567907333374,grad_norm: 0.9999993562552614, iteration: 94914
loss: 0.9895992875099182,grad_norm: 0.9999991490784093, iteration: 94915
loss: 1.0359760522842407,grad_norm: 0.9999991196679519, iteration: 94916
loss: 1.0301588773727417,grad_norm: 0.9933462740831734, iteration: 94917
loss: 1.0978116989135742,grad_norm: 0.9999992045676336, iteration: 94918
loss: 1.108271598815918,grad_norm: 0.999999565400397, iteration: 94919
loss: 1.0838046073913574,grad_norm: 0.9999990844750196, iteration: 94920
loss: 1.0147985219955444,grad_norm: 0.9999991894637402, iteration: 94921
loss: 1.0354278087615967,grad_norm: 0.9999991985538449, iteration: 94922
loss: 0.9635183215141296,grad_norm: 0.9366379825417679, iteration: 94923
loss: 1.0844045877456665,grad_norm: 0.9470416454836204, iteration: 94924
loss: 1.0577034950256348,grad_norm: 0.9999992449631113, iteration: 94925
loss: 1.0030103921890259,grad_norm: 0.9999991429475514, iteration: 94926
loss: 1.0473136901855469,grad_norm: 0.8510806835971791, iteration: 94927
loss: 1.0564134120941162,grad_norm: 0.9999989974643592, iteration: 94928
loss: 1.15928053855896,grad_norm: 0.9999998785156209, iteration: 94929
loss: 0.9892858862876892,grad_norm: 0.999999104074922, iteration: 94930
loss: 0.997998058795929,grad_norm: 0.983971552335982, iteration: 94931
loss: 1.013731598854065,grad_norm: 0.9999990626559097, iteration: 94932
loss: 1.0448870658874512,grad_norm: 0.8562087237126316, iteration: 94933
loss: 1.0265560150146484,grad_norm: 0.9999998794267924, iteration: 94934
loss: 0.9918418526649475,grad_norm: 0.8506578894975703, iteration: 94935
loss: 1.044493556022644,grad_norm: 0.9999993155185268, iteration: 94936
loss: 1.0104129314422607,grad_norm: 0.8591276927125144, iteration: 94937
loss: 1.0229414701461792,grad_norm: 0.941697680870535, iteration: 94938
loss: 1.051133632659912,grad_norm: 0.8347792269630563, iteration: 94939
loss: 1.0696908235549927,grad_norm: 0.999999665476823, iteration: 94940
loss: 1.0685858726501465,grad_norm: 0.9999994414889898, iteration: 94941
loss: 1.045527696609497,grad_norm: 1.0000000509556688, iteration: 94942
loss: 1.0238546133041382,grad_norm: 0.9851851385813428, iteration: 94943
loss: 1.0000510215759277,grad_norm: 0.9526846903928219, iteration: 94944
loss: 1.0562626123428345,grad_norm: 0.9999993077990207, iteration: 94945
loss: 0.9959773421287537,grad_norm: 0.9999994994485851, iteration: 94946
loss: 1.0344820022583008,grad_norm: 0.9999994277554234, iteration: 94947
loss: 1.0834647417068481,grad_norm: 0.9999991743287968, iteration: 94948
loss: 0.9892453551292419,grad_norm: 0.9923321805803926, iteration: 94949
loss: 1.0547627210617065,grad_norm: 0.9999994796469595, iteration: 94950
loss: 1.010669469833374,grad_norm: 0.9999994569110097, iteration: 94951
loss: 0.9862402081489563,grad_norm: 0.9672295508397605, iteration: 94952
loss: 1.088452935218811,grad_norm: 0.999999860783584, iteration: 94953
loss: 0.9915663599967957,grad_norm: 0.9943670332485303, iteration: 94954
loss: 1.0164304971694946,grad_norm: 0.999999039109043, iteration: 94955
loss: 1.0350126028060913,grad_norm: 0.9928714241480109, iteration: 94956
loss: 1.1644396781921387,grad_norm: 0.999999797385931, iteration: 94957
loss: 0.9836825132369995,grad_norm: 0.8624622187121906, iteration: 94958
loss: 1.0831249952316284,grad_norm: 0.9999990093893162, iteration: 94959
loss: 1.0823413133621216,grad_norm: 0.999999227895577, iteration: 94960
loss: 0.974279522895813,grad_norm: 0.9999990720554786, iteration: 94961
loss: 1.06692373752594,grad_norm: 0.9696465030684681, iteration: 94962
loss: 0.9608538150787354,grad_norm: 0.9999998315276589, iteration: 94963
loss: 1.0900996923446655,grad_norm: 0.9999992063205073, iteration: 94964
loss: 1.1074596643447876,grad_norm: 0.9999993963974831, iteration: 94965
loss: 1.0148240327835083,grad_norm: 0.9991238565964934, iteration: 94966
loss: 0.9979575276374817,grad_norm: 0.9999999546336144, iteration: 94967
loss: 0.9916015267372131,grad_norm: 0.8466551358378471, iteration: 94968
loss: 1.0464422702789307,grad_norm: 0.9999990693290546, iteration: 94969
loss: 1.028531551361084,grad_norm: 0.9999991415557952, iteration: 94970
loss: 1.091882586479187,grad_norm: 0.9999991365690113, iteration: 94971
loss: 1.0434939861297607,grad_norm: 0.9999995739582916, iteration: 94972
loss: 0.9857586622238159,grad_norm: 0.9999990406446162, iteration: 94973
loss: 1.03188955783844,grad_norm: 0.9883896920973297, iteration: 94974
loss: 1.0587743520736694,grad_norm: 0.9999990668578164, iteration: 94975
loss: 1.079764723777771,grad_norm: 0.9999992129081071, iteration: 94976
loss: 1.0558303594589233,grad_norm: 0.9999995706498338, iteration: 94977
loss: 1.0021578073501587,grad_norm: 0.9999991816729759, iteration: 94978
loss: 0.9789456725120544,grad_norm: 0.9999996811890558, iteration: 94979
loss: 1.0164870023727417,grad_norm: 0.9999991277011628, iteration: 94980
loss: 1.0360397100448608,grad_norm: 0.9999989761640873, iteration: 94981
loss: 1.0271382331848145,grad_norm: 0.999999502844576, iteration: 94982
loss: 1.0086112022399902,grad_norm: 0.9059515781442073, iteration: 94983
loss: 1.1167207956314087,grad_norm: 0.9999992321715241, iteration: 94984
loss: 1.0444704294204712,grad_norm: 0.999999210222507, iteration: 94985
loss: 1.0173460245132446,grad_norm: 0.9999991580407024, iteration: 94986
loss: 1.0541071891784668,grad_norm: 0.9999991667334465, iteration: 94987
loss: 1.1440600156784058,grad_norm: 0.9999996894463019, iteration: 94988
loss: 1.1799789667129517,grad_norm: 0.99999921216787, iteration: 94989
loss: 1.1120696067810059,grad_norm: 0.999999022470321, iteration: 94990
loss: 1.0918841361999512,grad_norm: 0.999999478862104, iteration: 94991
loss: 1.0870743989944458,grad_norm: 0.9999991406531459, iteration: 94992
loss: 0.9887112975120544,grad_norm: 0.9999990693524751, iteration: 94993
loss: 1.0106714963912964,grad_norm: 0.9696223039096615, iteration: 94994
loss: 1.021394968032837,grad_norm: 0.9999990870848279, iteration: 94995
loss: 1.0352715253829956,grad_norm: 0.9999996933779743, iteration: 94996
loss: 1.0406348705291748,grad_norm: 0.9999995697188753, iteration: 94997
loss: 0.9584355354309082,grad_norm: 0.9190711045248574, iteration: 94998
loss: 1.028446078300476,grad_norm: 0.9999999690135102, iteration: 94999
loss: 1.182600736618042,grad_norm: 0.9999996551488048, iteration: 95000
loss: 1.0523422956466675,grad_norm: 0.9999992913730762, iteration: 95001
loss: 1.0105496644973755,grad_norm: 0.9862787540623306, iteration: 95002
loss: 1.0260179042816162,grad_norm: 0.9999992777720179, iteration: 95003
loss: 1.071780800819397,grad_norm: 0.9999995711408126, iteration: 95004
loss: 0.9881273508071899,grad_norm: 0.9320603040060493, iteration: 95005
loss: 0.9969683289527893,grad_norm: 0.8109800200428088, iteration: 95006
loss: 1.0601149797439575,grad_norm: 0.9999992886213702, iteration: 95007
loss: 1.0328149795532227,grad_norm: 0.8046764708474443, iteration: 95008
loss: 1.0716882944107056,grad_norm: 0.9999997292020215, iteration: 95009
loss: 1.0284740924835205,grad_norm: 0.9782650628823991, iteration: 95010
loss: 1.1827871799468994,grad_norm: 0.9999997351792251, iteration: 95011
loss: 1.016324520111084,grad_norm: 0.9999990979817083, iteration: 95012
loss: 1.0027165412902832,grad_norm: 0.8282702340588757, iteration: 95013
loss: 0.9939403533935547,grad_norm: 0.9734906241442853, iteration: 95014
loss: 0.9989430904388428,grad_norm: 0.9716577510563358, iteration: 95015
loss: 1.0601602792739868,grad_norm: 0.9999992544510672, iteration: 95016
loss: 1.1490914821624756,grad_norm: 0.9999995117932269, iteration: 95017
loss: 1.062719702720642,grad_norm: 0.9860469260698, iteration: 95018
loss: 1.0420687198638916,grad_norm: 0.9999990720043459, iteration: 95019
loss: 1.1688576936721802,grad_norm: 0.9999995711570856, iteration: 95020
loss: 1.106594204902649,grad_norm: 0.9999995224032089, iteration: 95021
loss: 1.2102375030517578,grad_norm: 0.9999991973668096, iteration: 95022
loss: 1.2088829278945923,grad_norm: 1.0000000408804401, iteration: 95023
loss: 1.1075862646102905,grad_norm: 0.9999992446304408, iteration: 95024
loss: 1.038964033126831,grad_norm: 0.9278301433072373, iteration: 95025
loss: 1.1255897283554077,grad_norm: 0.9999993763890184, iteration: 95026
loss: 1.0515824556350708,grad_norm: 0.9999998665432123, iteration: 95027
loss: 1.048696517944336,grad_norm: 0.9999992353102661, iteration: 95028
loss: 1.0532686710357666,grad_norm: 0.8149815430642521, iteration: 95029
loss: 1.0569689273834229,grad_norm: 0.8236119144306842, iteration: 95030
loss: 1.002959132194519,grad_norm: 0.8642289388339444, iteration: 95031
loss: 1.0211806297302246,grad_norm: 0.9999992011445957, iteration: 95032
loss: 1.042984127998352,grad_norm: 0.9999993609859275, iteration: 95033
loss: 1.0258374214172363,grad_norm: 0.999999156354095, iteration: 95034
loss: 1.1213489770889282,grad_norm: 0.9999991879714328, iteration: 95035
loss: 1.043445348739624,grad_norm: 0.9999992098083296, iteration: 95036
loss: 1.0301392078399658,grad_norm: 0.9999996920956638, iteration: 95037
loss: 0.9702394008636475,grad_norm: 0.8560165103039648, iteration: 95038
loss: 1.06266188621521,grad_norm: 0.9999998380221563, iteration: 95039
loss: 1.0959444046020508,grad_norm: 0.9999998088981494, iteration: 95040
loss: 1.0465501546859741,grad_norm: 0.8709049962028877, iteration: 95041
loss: 1.0932265520095825,grad_norm: 0.9999991140389475, iteration: 95042
loss: 0.974195659160614,grad_norm: 0.9999992333729799, iteration: 95043
loss: 1.0129599571228027,grad_norm: 0.9155660684219837, iteration: 95044
loss: 0.9599657654762268,grad_norm: 0.8055343058230195, iteration: 95045
loss: 1.011469841003418,grad_norm: 0.9999995719913237, iteration: 95046
loss: 0.956839919090271,grad_norm: 0.9612323946374665, iteration: 95047
loss: 1.0704736709594727,grad_norm: 0.9999991443577935, iteration: 95048
loss: 0.9780555367469788,grad_norm: 0.8938507381918931, iteration: 95049
loss: 1.0396485328674316,grad_norm: 0.8971387478953317, iteration: 95050
loss: 1.01437509059906,grad_norm: 0.9999989833042704, iteration: 95051
loss: 1.0190037488937378,grad_norm: 0.999999067246404, iteration: 95052
loss: 1.0808876752853394,grad_norm: 0.9999998867205683, iteration: 95053
loss: 0.9923363327980042,grad_norm: 0.9999993486029615, iteration: 95054
loss: 1.026888132095337,grad_norm: 0.8024456362998039, iteration: 95055
loss: 1.0604387521743774,grad_norm: 0.999999515115485, iteration: 95056
loss: 1.0750584602355957,grad_norm: 0.9999993229936067, iteration: 95057
loss: 0.9761477112770081,grad_norm: 0.8500904170608146, iteration: 95058
loss: 1.0659980773925781,grad_norm: 0.9999998109821204, iteration: 95059
loss: 1.0420982837677002,grad_norm: 0.9999991227746129, iteration: 95060
loss: 1.004189372062683,grad_norm: 0.8109154206830356, iteration: 95061
loss: 1.050178050994873,grad_norm: 0.9999990523481435, iteration: 95062
loss: 1.0710095167160034,grad_norm: 0.9999991004512545, iteration: 95063
loss: 1.146896481513977,grad_norm: 0.9999996292931576, iteration: 95064
loss: 0.9856826663017273,grad_norm: 0.999999116645937, iteration: 95065
loss: 1.107823371887207,grad_norm: 0.9999995923170317, iteration: 95066
loss: 1.06086003780365,grad_norm: 0.9999997555066358, iteration: 95067
loss: 1.1254969835281372,grad_norm: 0.9999991017508248, iteration: 95068
loss: 0.9674407839775085,grad_norm: 0.9999992421985093, iteration: 95069
loss: 1.0503642559051514,grad_norm: 0.9999994169266686, iteration: 95070
loss: 1.0329817533493042,grad_norm: 0.9999997982863803, iteration: 95071
loss: 1.1460847854614258,grad_norm: 0.9999999194209969, iteration: 95072
loss: 1.12224280834198,grad_norm: 0.999999416123509, iteration: 95073
loss: 1.6044535636901855,grad_norm: 0.9999998031357225, iteration: 95074
loss: 1.5976917743682861,grad_norm: 0.9999998023966025, iteration: 95075
loss: 1.21883225440979,grad_norm: 0.9999999926890826, iteration: 95076
loss: 1.1273878812789917,grad_norm: 0.9999998835954034, iteration: 95077
loss: 0.9862557649612427,grad_norm: 0.9999990049644484, iteration: 95078
loss: 1.0025736093521118,grad_norm: 0.9999990816704134, iteration: 95079
loss: 1.246552586555481,grad_norm: 0.999999767904917, iteration: 95080
loss: 1.0933815240859985,grad_norm: 0.9999992276114299, iteration: 95081
loss: 1.0096896886825562,grad_norm: 0.9999991102735029, iteration: 95082
loss: 1.0221343040466309,grad_norm: 0.9999991212181766, iteration: 95083
loss: 1.1995631456375122,grad_norm: 0.9999994770406708, iteration: 95084
loss: 1.109110713005066,grad_norm: 0.9999995859316122, iteration: 95085
loss: 1.026382565498352,grad_norm: 0.9999990441866458, iteration: 95086
loss: 0.9524151682853699,grad_norm: 0.9999990297903446, iteration: 95087
loss: 1.027596354484558,grad_norm: 0.8810473506778008, iteration: 95088
loss: 1.1059859991073608,grad_norm: 0.9999999233697113, iteration: 95089
loss: 1.0412768125534058,grad_norm: 0.9999992762732833, iteration: 95090
loss: 1.1421585083007812,grad_norm: 0.9999993118248147, iteration: 95091
loss: 1.1358587741851807,grad_norm: 0.999999352463399, iteration: 95092
loss: 1.0268959999084473,grad_norm: 0.9999996619706649, iteration: 95093
loss: 1.0852289199829102,grad_norm: 0.9999993176306099, iteration: 95094
loss: 1.0225266218185425,grad_norm: 0.9999992387945984, iteration: 95095
loss: 0.9887298941612244,grad_norm: 0.9999990909133156, iteration: 95096
loss: 0.9907251000404358,grad_norm: 0.9999996542081814, iteration: 95097
loss: 1.0125988721847534,grad_norm: 0.9999995591682623, iteration: 95098
loss: 1.2226111888885498,grad_norm: 0.9999998923671872, iteration: 95099
loss: 0.990941047668457,grad_norm: 0.911591026584016, iteration: 95100
loss: 1.075377106666565,grad_norm: 0.9999992853533515, iteration: 95101
loss: 1.0929914712905884,grad_norm: 0.9999997520666262, iteration: 95102
loss: 1.2698363065719604,grad_norm: 0.9999998824693926, iteration: 95103
loss: 0.9960837364196777,grad_norm: 0.9999994637700562, iteration: 95104
loss: 1.0828937292099,grad_norm: 0.9999989307874736, iteration: 95105
loss: 0.9913597106933594,grad_norm: 0.8973452252621826, iteration: 95106
loss: 1.0819213390350342,grad_norm: 0.999999272633893, iteration: 95107
loss: 1.1306641101837158,grad_norm: 0.9999998996628172, iteration: 95108
loss: 1.0426054000854492,grad_norm: 0.9999997499372935, iteration: 95109
loss: 0.9920532703399658,grad_norm: 0.8290322126792031, iteration: 95110
loss: 1.183993935585022,grad_norm: 0.9999997800828491, iteration: 95111
loss: 1.077096700668335,grad_norm: 0.9999991504244936, iteration: 95112
loss: 1.0185362100601196,grad_norm: 0.8869363318574891, iteration: 95113
loss: 1.16452956199646,grad_norm: 0.9999990994504939, iteration: 95114
loss: 1.0450328588485718,grad_norm: 0.9999990350400971, iteration: 95115
loss: 0.9694491028785706,grad_norm: 0.8775444948281893, iteration: 95116
loss: 1.0847954750061035,grad_norm: 0.9999997364469626, iteration: 95117
loss: 1.041118860244751,grad_norm: 0.9999992266838924, iteration: 95118
loss: 1.0116651058197021,grad_norm: 0.9402411805149091, iteration: 95119
loss: 1.0415951013565063,grad_norm: 0.9334817544458568, iteration: 95120
loss: 1.022235631942749,grad_norm: 0.9999994206755632, iteration: 95121
loss: 1.0086679458618164,grad_norm: 0.9794065661286148, iteration: 95122
loss: 1.0238128900527954,grad_norm: 0.8380344594571935, iteration: 95123
loss: 1.0388884544372559,grad_norm: 0.9999992734050833, iteration: 95124
loss: 1.0041146278381348,grad_norm: 0.8413811270894824, iteration: 95125
loss: 1.0132672786712646,grad_norm: 0.9179462962520974, iteration: 95126
loss: 0.9995049834251404,grad_norm: 0.9999992157293744, iteration: 95127
loss: 1.0284758806228638,grad_norm: 0.999999086650055, iteration: 95128
loss: 1.0737152099609375,grad_norm: 0.9999997250232653, iteration: 95129
loss: 1.3845716714859009,grad_norm: 0.9999997711237689, iteration: 95130
loss: 0.9995028972625732,grad_norm: 0.9999996743960072, iteration: 95131
loss: 1.0322396755218506,grad_norm: 0.9964915332398072, iteration: 95132
loss: 1.0261527299880981,grad_norm: 0.9999989149549778, iteration: 95133
loss: 1.0033776760101318,grad_norm: 0.9155930519069744, iteration: 95134
loss: 0.9936018586158752,grad_norm: 0.9628850609594657, iteration: 95135
loss: 1.0014424324035645,grad_norm: 0.9917524763397848, iteration: 95136
loss: 0.9833343029022217,grad_norm: 0.9999993953360319, iteration: 95137
loss: 1.0766969919204712,grad_norm: 0.9999992262996662, iteration: 95138
loss: 1.026760458946228,grad_norm: 0.9999990851992413, iteration: 95139
loss: 1.0172184705734253,grad_norm: 0.999999054766225, iteration: 95140
loss: 0.9751524329185486,grad_norm: 0.858717858183888, iteration: 95141
loss: 1.0742233991622925,grad_norm: 0.9999998879077232, iteration: 95142
loss: 1.0191545486450195,grad_norm: 0.9999991612281741, iteration: 95143
loss: 0.9743918776512146,grad_norm: 0.9460395037600751, iteration: 95144
loss: 1.0655564069747925,grad_norm: 0.9999992070600393, iteration: 95145
loss: 1.0490180253982544,grad_norm: 0.9999994604330297, iteration: 95146
loss: 1.0854268074035645,grad_norm: 0.9999997202781323, iteration: 95147
loss: 1.0042887926101685,grad_norm: 0.9999991673964854, iteration: 95148
loss: 1.0065151453018188,grad_norm: 0.8929356899918426, iteration: 95149
loss: 1.0634539127349854,grad_norm: 0.9999994668957134, iteration: 95150
loss: 1.134959101676941,grad_norm: 0.9999996736509706, iteration: 95151
loss: 0.9643285870552063,grad_norm: 0.9734221408876207, iteration: 95152
loss: 1.0962592363357544,grad_norm: 0.9999994474371163, iteration: 95153
loss: 1.067326307296753,grad_norm: 0.9999998032957365, iteration: 95154
loss: 0.987840473651886,grad_norm: 0.9983389066645916, iteration: 95155
loss: 1.0925259590148926,grad_norm: 0.9999995696664549, iteration: 95156
loss: 1.0007445812225342,grad_norm: 0.7849780339088308, iteration: 95157
loss: 1.0329716205596924,grad_norm: 0.9999998882563061, iteration: 95158
loss: 0.9810640215873718,grad_norm: 0.99999918294234, iteration: 95159
loss: 1.1158599853515625,grad_norm: 0.9999997688751175, iteration: 95160
loss: 0.9942008256912231,grad_norm: 0.8798519266050565, iteration: 95161
loss: 1.2136800289154053,grad_norm: 0.9999995064911319, iteration: 95162
loss: 1.0033223628997803,grad_norm: 0.9051395518576915, iteration: 95163
loss: 1.118876338005066,grad_norm: 0.99999914106189, iteration: 95164
loss: 1.0061479806900024,grad_norm: 0.9557445719785115, iteration: 95165
loss: 1.0253942012786865,grad_norm: 0.9999994521659535, iteration: 95166
loss: 1.2535990476608276,grad_norm: 0.9999998918144529, iteration: 95167
loss: 1.1368257999420166,grad_norm: 0.9999992543101679, iteration: 95168
loss: 1.4477123022079468,grad_norm: 0.9999997079593554, iteration: 95169
loss: 1.0439069271087646,grad_norm: 0.9999992952672098, iteration: 95170
loss: 1.3014594316482544,grad_norm: 0.9999997953699211, iteration: 95171
loss: 1.2450629472732544,grad_norm: 0.9999991555278201, iteration: 95172
loss: 1.2238149642944336,grad_norm: 0.999999355370699, iteration: 95173
loss: 1.3784527778625488,grad_norm: 1.000000002021557, iteration: 95174
loss: 1.6988368034362793,grad_norm: 0.9999999339333031, iteration: 95175
loss: 1.432032585144043,grad_norm: 0.9999996495524526, iteration: 95176
loss: 1.3122755289077759,grad_norm: 0.9999995184034451, iteration: 95177
loss: 1.310874104499817,grad_norm: 0.9999998702010293, iteration: 95178
loss: 1.2079637050628662,grad_norm: 0.9999999562216461, iteration: 95179
loss: 1.1891874074935913,grad_norm: 0.99999991575172, iteration: 95180
loss: 1.1013875007629395,grad_norm: 0.9999990369940043, iteration: 95181
loss: 1.0107388496398926,grad_norm: 0.9999996616379975, iteration: 95182
loss: 1.000902533531189,grad_norm: 0.9381520138125057, iteration: 95183
loss: 0.9859728813171387,grad_norm: 0.7836916330077445, iteration: 95184
loss: 0.9923080205917358,grad_norm: 0.9835434663013065, iteration: 95185
loss: 0.9906607270240784,grad_norm: 0.768691441904777, iteration: 95186
loss: 0.9726715087890625,grad_norm: 0.9999992604047019, iteration: 95187
loss: 1.0279810428619385,grad_norm: 0.999999140924881, iteration: 95188
loss: 1.0504956245422363,grad_norm: 0.9999999356820055, iteration: 95189
loss: 1.0107043981552124,grad_norm: 0.9999992840509919, iteration: 95190
loss: 1.1322801113128662,grad_norm: 0.9999993659497998, iteration: 95191
loss: 1.0797704458236694,grad_norm: 0.9999998471072619, iteration: 95192
loss: 1.1350796222686768,grad_norm: 0.9999995845126323, iteration: 95193
loss: 1.3833550214767456,grad_norm: 0.9999998167690717, iteration: 95194
loss: 1.0032013654708862,grad_norm: 0.9999991807946332, iteration: 95195
loss: 1.0797349214553833,grad_norm: 0.999999655894244, iteration: 95196
loss: 1.0111654996871948,grad_norm: 0.8838461605860162, iteration: 95197
loss: 1.0116804838180542,grad_norm: 0.9999992300892337, iteration: 95198
loss: 1.0628334283828735,grad_norm: 0.9999992504817986, iteration: 95199
loss: 0.9996442198753357,grad_norm: 0.9999996491574077, iteration: 95200
loss: 1.0125782489776611,grad_norm: 0.9381630944127454, iteration: 95201
loss: 1.0158920288085938,grad_norm: 0.8988835990443261, iteration: 95202
loss: 1.0248453617095947,grad_norm: 0.9304907740989328, iteration: 95203
loss: 1.1846543550491333,grad_norm: 0.999999486134893, iteration: 95204
loss: 0.9811445474624634,grad_norm: 0.9033173091402958, iteration: 95205
loss: 0.9916040897369385,grad_norm: 0.8724406835808596, iteration: 95206
loss: 1.0143529176712036,grad_norm: 0.9999991296013349, iteration: 95207
loss: 1.0491089820861816,grad_norm: 0.921399997275893, iteration: 95208
loss: 1.0027440786361694,grad_norm: 0.9999994561355122, iteration: 95209
loss: 1.0037404298782349,grad_norm: 0.7782051183091978, iteration: 95210
loss: 1.1091192960739136,grad_norm: 0.9999994299404573, iteration: 95211
loss: 1.0397093296051025,grad_norm: 0.9999994476708669, iteration: 95212
loss: 1.0073219537734985,grad_norm: 0.7879857849318357, iteration: 95213
loss: 1.0301313400268555,grad_norm: 0.830330770511452, iteration: 95214
loss: 0.9738907814025879,grad_norm: 0.9999990875846484, iteration: 95215
loss: 1.0059106349945068,grad_norm: 0.9999994593284577, iteration: 95216
loss: 0.9750231504440308,grad_norm: 0.9273025544552568, iteration: 95217
loss: 1.0097013711929321,grad_norm: 0.8364992239853366, iteration: 95218
loss: 1.0124859809875488,grad_norm: 0.9170766860709415, iteration: 95219
loss: 1.0199776887893677,grad_norm: 0.9999993678920678, iteration: 95220
loss: 1.0303630828857422,grad_norm: 0.9406065530592705, iteration: 95221
loss: 1.0102946758270264,grad_norm: 0.882089246268662, iteration: 95222
loss: 1.0770761966705322,grad_norm: 0.9999991553231685, iteration: 95223
loss: 1.0007891654968262,grad_norm: 0.9999989478718978, iteration: 95224
loss: 0.9983444213867188,grad_norm: 0.9239703961215668, iteration: 95225
loss: 0.9524657130241394,grad_norm: 0.9999992289664947, iteration: 95226
loss: 1.2012609243392944,grad_norm: 0.9999993517496646, iteration: 95227
loss: 0.9903414249420166,grad_norm: 0.9999991154390384, iteration: 95228
loss: 0.9984051585197449,grad_norm: 0.7538232780121374, iteration: 95229
loss: 1.0123567581176758,grad_norm: 0.9999991663981584, iteration: 95230
loss: 0.9805404543876648,grad_norm: 0.7207879048238114, iteration: 95231
loss: 1.0245838165283203,grad_norm: 0.99999975524867, iteration: 95232
loss: 1.1248774528503418,grad_norm: 0.9999991110277167, iteration: 95233
loss: 0.9817360043525696,grad_norm: 0.9771275387791108, iteration: 95234
loss: 1.0262306928634644,grad_norm: 0.9999994038533871, iteration: 95235
loss: 1.0312082767486572,grad_norm: 0.999999956125278, iteration: 95236
loss: 0.9851100444793701,grad_norm: 0.777092617034294, iteration: 95237
loss: 1.034738302230835,grad_norm: 0.9006912288213287, iteration: 95238
loss: 1.0394480228424072,grad_norm: 0.9999991132707416, iteration: 95239
loss: 1.0601760149002075,grad_norm: 0.999999253231036, iteration: 95240
loss: 1.0964829921722412,grad_norm: 0.9999991426928314, iteration: 95241
loss: 1.0833929777145386,grad_norm: 0.9999993580133901, iteration: 95242
loss: 0.9876490235328674,grad_norm: 0.9999991863369002, iteration: 95243
loss: 1.0435247421264648,grad_norm: 0.9999991416412036, iteration: 95244
loss: 1.0334433317184448,grad_norm: 0.9266408815234053, iteration: 95245
loss: 1.1703068017959595,grad_norm: 0.9999998497679167, iteration: 95246
loss: 1.0464842319488525,grad_norm: 0.9999996239962838, iteration: 95247
loss: 1.0716792345046997,grad_norm: 0.999999706407986, iteration: 95248
loss: 1.0409815311431885,grad_norm: 0.9999994159392614, iteration: 95249
loss: 1.0336345434188843,grad_norm: 0.9999997806348566, iteration: 95250
loss: 1.0443991422653198,grad_norm: 0.9999991037973529, iteration: 95251
loss: 1.03108811378479,grad_norm: 0.9880878847499737, iteration: 95252
loss: 1.0352743864059448,grad_norm: 0.8890175650928442, iteration: 95253
loss: 0.98921138048172,grad_norm: 0.9552986193245171, iteration: 95254
loss: 0.9766592979431152,grad_norm: 0.9584796557597656, iteration: 95255
loss: 1.0601342916488647,grad_norm: 0.9999992730095884, iteration: 95256
loss: 1.0029454231262207,grad_norm: 0.8796546148802115, iteration: 95257
loss: 1.2095423936843872,grad_norm: 0.9999996729895245, iteration: 95258
loss: 0.9842260479927063,grad_norm: 0.9999993188380105, iteration: 95259
loss: 0.987443745136261,grad_norm: 0.9999993172327253, iteration: 95260
loss: 0.9878478646278381,grad_norm: 0.9082700242872449, iteration: 95261
loss: 1.056815266609192,grad_norm: 0.9917650625834527, iteration: 95262
loss: 1.078893780708313,grad_norm: 0.9999998580140079, iteration: 95263
loss: 1.0174715518951416,grad_norm: 0.9999991313411961, iteration: 95264
loss: 1.036765456199646,grad_norm: 0.9999998475697458, iteration: 95265
loss: 1.078270673751831,grad_norm: 0.9999998934114771, iteration: 95266
loss: 1.015549898147583,grad_norm: 0.9517553136102992, iteration: 95267
loss: 1.1042248010635376,grad_norm: 0.9999992761140566, iteration: 95268
loss: 1.0894184112548828,grad_norm: 0.885685621383919, iteration: 95269
loss: 1.035032868385315,grad_norm: 0.999999280321583, iteration: 95270
loss: 1.0380855798721313,grad_norm: 0.9731051455669397, iteration: 95271
loss: 1.0962506532669067,grad_norm: 0.9999997636325949, iteration: 95272
loss: 1.1068599224090576,grad_norm: 0.9607432753557792, iteration: 95273
loss: 0.9743249416351318,grad_norm: 0.9999994430246107, iteration: 95274
loss: 1.1140085458755493,grad_norm: 0.999999420811097, iteration: 95275
loss: 1.0839967727661133,grad_norm: 0.999999405647915, iteration: 95276
loss: 1.1117846965789795,grad_norm: 0.9999993554065297, iteration: 95277
loss: 1.093887209892273,grad_norm: 0.9999991804876766, iteration: 95278
loss: 1.0714058876037598,grad_norm: 0.9999993176252892, iteration: 95279
loss: 0.9562690854072571,grad_norm: 0.9741176047971912, iteration: 95280
loss: 1.1083245277404785,grad_norm: 1.0000000161019136, iteration: 95281
loss: 0.9673827290534973,grad_norm: 0.9999993577013967, iteration: 95282
loss: 1.1351712942123413,grad_norm: 0.933316994055846, iteration: 95283
loss: 1.0062304735183716,grad_norm: 0.9191982660630925, iteration: 95284
loss: 1.1202735900878906,grad_norm: 0.9999998745303189, iteration: 95285
loss: 1.0062850713729858,grad_norm: 0.8230414114067427, iteration: 95286
loss: 1.0621365308761597,grad_norm: 0.9999990881665158, iteration: 95287
loss: 0.9808869361877441,grad_norm: 0.8487838877984966, iteration: 95288
loss: 1.0025800466537476,grad_norm: 0.9941725989225909, iteration: 95289
loss: 0.9852869510650635,grad_norm: 0.9606553231378823, iteration: 95290
loss: 1.0125553607940674,grad_norm: 0.9470155112560914, iteration: 95291
loss: 1.032218337059021,grad_norm: 0.9604674098702694, iteration: 95292
loss: 1.0895551443099976,grad_norm: 0.9999990709911964, iteration: 95293
loss: 1.076635718345642,grad_norm: 0.9649416397269472, iteration: 95294
loss: 1.034469485282898,grad_norm: 0.9999992389558702, iteration: 95295
loss: 1.0454416275024414,grad_norm: 0.9999993229537805, iteration: 95296
loss: 0.9989036917686462,grad_norm: 0.9738394279287163, iteration: 95297
loss: 1.3080788850784302,grad_norm: 1.000000013501641, iteration: 95298
loss: 1.0118736028671265,grad_norm: 0.8760625649530176, iteration: 95299
loss: 0.9970293045043945,grad_norm: 0.9999993507568415, iteration: 95300
loss: 0.98655766248703,grad_norm: 0.839661127580575, iteration: 95301
loss: 1.0808840990066528,grad_norm: 0.9999991170585948, iteration: 95302
loss: 1.1227138042449951,grad_norm: 0.9999995254337306, iteration: 95303
loss: 1.0449597835540771,grad_norm: 0.9999999525385929, iteration: 95304
loss: 1.3703272342681885,grad_norm: 0.9999994986301625, iteration: 95305
loss: 1.057395339012146,grad_norm: 0.999999321030736, iteration: 95306
loss: 0.9869968295097351,grad_norm: 0.7885499035517206, iteration: 95307
loss: 1.0400192737579346,grad_norm: 0.885938516054649, iteration: 95308
loss: 1.0587881803512573,grad_norm: 0.9002719078191667, iteration: 95309
loss: 1.0229002237319946,grad_norm: 0.9999991747740343, iteration: 95310
loss: 1.0864468812942505,grad_norm: 0.9236222415041562, iteration: 95311
loss: 1.0247540473937988,grad_norm: 0.8180388928729346, iteration: 95312
loss: 1.128074049949646,grad_norm: 0.9999998829283848, iteration: 95313
loss: 1.0287766456604004,grad_norm: 0.9999990804975192, iteration: 95314
loss: 1.0225841999053955,grad_norm: 0.9292317657449829, iteration: 95315
loss: 0.9646447896957397,grad_norm: 0.9591195327878119, iteration: 95316
loss: 1.0323903560638428,grad_norm: 0.9999992696600475, iteration: 95317
loss: 0.9982327818870544,grad_norm: 0.9999991908126564, iteration: 95318
loss: 1.2926719188690186,grad_norm: 0.9999998334540945, iteration: 95319
loss: 1.0104572772979736,grad_norm: 0.8261357070372767, iteration: 95320
loss: 0.9992862939834595,grad_norm: 0.9999998425992133, iteration: 95321
loss: 1.0947978496551514,grad_norm: 0.9999997803668179, iteration: 95322
loss: 0.9832363724708557,grad_norm: 0.999999197769602, iteration: 95323
loss: 1.0379297733306885,grad_norm: 0.9999991537506953, iteration: 95324
loss: 1.0936846733093262,grad_norm: 0.999999227949055, iteration: 95325
loss: 1.1171674728393555,grad_norm: 0.9999992430985002, iteration: 95326
loss: 1.0905290842056274,grad_norm: 0.9683015324586024, iteration: 95327
loss: 1.1239479780197144,grad_norm: 0.9999991096526464, iteration: 95328
loss: 1.0666667222976685,grad_norm: 0.9999993610756839, iteration: 95329
loss: 1.0205992460250854,grad_norm: 0.8477736908113532, iteration: 95330
loss: 1.1056935787200928,grad_norm: 0.9999990993213741, iteration: 95331
loss: 1.0359430313110352,grad_norm: 0.9999995403145658, iteration: 95332
loss: 0.9915608167648315,grad_norm: 0.9999991405833011, iteration: 95333
loss: 0.9682224988937378,grad_norm: 0.9999995672596118, iteration: 95334
loss: 0.9889270663261414,grad_norm: 0.9999992405276759, iteration: 95335
loss: 1.0606878995895386,grad_norm: 0.9999996724772616, iteration: 95336
loss: 1.0215352773666382,grad_norm: 0.7917636446567251, iteration: 95337
loss: 1.1506952047348022,grad_norm: 0.9999991252344833, iteration: 95338
loss: 1.0509048700332642,grad_norm: 0.9999991390398987, iteration: 95339
loss: 0.9322265982627869,grad_norm: 0.9999998232824173, iteration: 95340
loss: 1.030293583869934,grad_norm: 0.7887221441720053, iteration: 95341
loss: 1.041331171989441,grad_norm: 0.9999998092765079, iteration: 95342
loss: 1.0904629230499268,grad_norm: 0.9999991579639145, iteration: 95343
loss: 0.9845932722091675,grad_norm: 0.9999991052381374, iteration: 95344
loss: 0.9648942351341248,grad_norm: 0.9593436719689528, iteration: 95345
loss: 1.0165960788726807,grad_norm: 0.9034227617046021, iteration: 95346
loss: 1.0006812810897827,grad_norm: 0.999999203426278, iteration: 95347
loss: 1.0431933403015137,grad_norm: 0.9999991613524248, iteration: 95348
loss: 1.0305722951889038,grad_norm: 0.9999991163701086, iteration: 95349
loss: 0.9970825910568237,grad_norm: 0.9999991077318626, iteration: 95350
loss: 1.0577178001403809,grad_norm: 0.999999395760888, iteration: 95351
loss: 1.0637712478637695,grad_norm: 0.9999996867821168, iteration: 95352
loss: 1.010027289390564,grad_norm: 0.9999993154145472, iteration: 95353
loss: 1.041298508644104,grad_norm: 0.9999990920309809, iteration: 95354
loss: 1.0255521535873413,grad_norm: 0.9999991063379498, iteration: 95355
loss: 1.00619637966156,grad_norm: 0.885281292413935, iteration: 95356
loss: 1.1570852994918823,grad_norm: 0.9999996565892258, iteration: 95357
loss: 1.0406781435012817,grad_norm: 0.9312655235749351, iteration: 95358
loss: 0.9947197437286377,grad_norm: 0.9999992883721094, iteration: 95359
loss: 1.0309863090515137,grad_norm: 0.9999996455712689, iteration: 95360
loss: 0.9876351952552795,grad_norm: 0.8430765373300747, iteration: 95361
loss: 1.1069309711456299,grad_norm: 0.9558776812483871, iteration: 95362
loss: 1.052963137626648,grad_norm: 0.9359293595209441, iteration: 95363
loss: 1.104431390762329,grad_norm: 0.9999998875670069, iteration: 95364
loss: 1.2375636100769043,grad_norm: 0.9999998170383664, iteration: 95365
loss: 1.0377238988876343,grad_norm: 0.9999989463675654, iteration: 95366
loss: 1.0138332843780518,grad_norm: 0.9999993379269452, iteration: 95367
loss: 0.9962434768676758,grad_norm: 0.9999994561829562, iteration: 95368
loss: 1.0776340961456299,grad_norm: 0.7870039996216199, iteration: 95369
loss: 1.01542329788208,grad_norm: 0.9999992713490281, iteration: 95370
loss: 1.0159317255020142,grad_norm: 0.9985961971780983, iteration: 95371
loss: 1.1074212789535522,grad_norm: 0.9786835478790669, iteration: 95372
loss: 1.062808871269226,grad_norm: 0.9999994228346265, iteration: 95373
loss: 1.046128749847412,grad_norm: 0.9999993333781471, iteration: 95374
loss: 0.9875703454017639,grad_norm: 0.9102476723708834, iteration: 95375
loss: 1.054986596107483,grad_norm: 0.9999991360073535, iteration: 95376
loss: 1.0667780637741089,grad_norm: 0.9999997040892272, iteration: 95377
loss: 1.0488249063491821,grad_norm: 0.999999242927647, iteration: 95378
loss: 0.9519197940826416,grad_norm: 0.9587472441004654, iteration: 95379
loss: 1.0018718242645264,grad_norm: 0.999999692710889, iteration: 95380
loss: 1.0886995792388916,grad_norm: 0.9999992503088084, iteration: 95381
loss: 1.0540720224380493,grad_norm: 0.8657638090772986, iteration: 95382
loss: 0.9966676235198975,grad_norm: 0.9999995300640383, iteration: 95383
loss: 1.012130618095398,grad_norm: 0.9999996988129269, iteration: 95384
loss: 1.1163311004638672,grad_norm: 0.999999803476341, iteration: 95385
loss: 1.0131860971450806,grad_norm: 0.9816338221729624, iteration: 95386
loss: 1.0471200942993164,grad_norm: 0.9648791382233521, iteration: 95387
loss: 1.0210107564926147,grad_norm: 0.9608760736419821, iteration: 95388
loss: 1.039080262184143,grad_norm: 0.999999080687542, iteration: 95389
loss: 1.1735626459121704,grad_norm: 0.9999996054908403, iteration: 95390
loss: 0.9978407025337219,grad_norm: 0.9999998861889168, iteration: 95391
loss: 1.1652659177780151,grad_norm: 0.9999999917932192, iteration: 95392
loss: 1.3246859312057495,grad_norm: 0.999999809975794, iteration: 95393
loss: 1.1413652896881104,grad_norm: 0.9999998883698986, iteration: 95394
loss: 1.0516209602355957,grad_norm: 0.9999995867601217, iteration: 95395
loss: 1.0364997386932373,grad_norm: 0.9999998923506247, iteration: 95396
loss: 1.0285736322402954,grad_norm: 0.909694669734033, iteration: 95397
loss: 1.05067777633667,grad_norm: 0.9999989505232021, iteration: 95398
loss: 1.0440726280212402,grad_norm: 0.82836111967415, iteration: 95399
loss: 1.0160759687423706,grad_norm: 0.8105440392417378, iteration: 95400
loss: 1.0663362741470337,grad_norm: 0.9999990777151232, iteration: 95401
loss: 0.9750492572784424,grad_norm: 0.9416368784912361, iteration: 95402
loss: 1.0282825231552124,grad_norm: 0.9381822490692072, iteration: 95403
loss: 1.1000394821166992,grad_norm: 0.9999992211982267, iteration: 95404
loss: 0.9658494591712952,grad_norm: 0.9503506092731163, iteration: 95405
loss: 1.0677640438079834,grad_norm: 0.8745167655629955, iteration: 95406
loss: 1.028472661972046,grad_norm: 0.9612816603023471, iteration: 95407
loss: 1.0447967052459717,grad_norm: 0.9999992148746848, iteration: 95408
loss: 1.0111134052276611,grad_norm: 0.9275739317411851, iteration: 95409
loss: 1.0016746520996094,grad_norm: 0.9999989987420432, iteration: 95410
loss: 0.9913970828056335,grad_norm: 0.9999991537216307, iteration: 95411
loss: 1.001713752746582,grad_norm: 0.8243739661367899, iteration: 95412
loss: 1.0879881381988525,grad_norm: 0.9999991720675192, iteration: 95413
loss: 1.0597705841064453,grad_norm: 0.9999991370925876, iteration: 95414
loss: 1.0220032930374146,grad_norm: 0.999999570739299, iteration: 95415
loss: 0.9617149233818054,grad_norm: 0.8211631021208671, iteration: 95416
loss: 1.008859634399414,grad_norm: 0.9999991397952942, iteration: 95417
loss: 1.013489007949829,grad_norm: 0.981354410703539, iteration: 95418
loss: 1.2069623470306396,grad_norm: 0.999999783265931, iteration: 95419
loss: 1.0646740198135376,grad_norm: 0.9999989539768692, iteration: 95420
loss: 1.1962250471115112,grad_norm: 0.9999999285710864, iteration: 95421
loss: 0.9997835159301758,grad_norm: 0.9999992356768181, iteration: 95422
loss: 1.0176606178283691,grad_norm: 0.9999992378212776, iteration: 95423
loss: 1.0899587869644165,grad_norm: 0.9999998047619523, iteration: 95424
loss: 1.0235077142715454,grad_norm: 0.8226511764537091, iteration: 95425
loss: 1.0321701765060425,grad_norm: 0.9999998647131939, iteration: 95426
loss: 1.013397455215454,grad_norm: 0.9304818369154199, iteration: 95427
loss: 1.162798285484314,grad_norm: 0.9999991888872466, iteration: 95428
loss: 1.0079925060272217,grad_norm: 0.8930439935086025, iteration: 95429
loss: 0.9910674691200256,grad_norm: 0.9999989745103792, iteration: 95430
loss: 0.987180769443512,grad_norm: 0.9999993807236996, iteration: 95431
loss: 0.9844184517860413,grad_norm: 0.9430855785230529, iteration: 95432
loss: 1.0103673934936523,grad_norm: 0.9999992199872453, iteration: 95433
loss: 1.005465030670166,grad_norm: 0.9400131690588717, iteration: 95434
loss: 1.0644484758377075,grad_norm: 0.9999992954009762, iteration: 95435
loss: 1.093706488609314,grad_norm: 0.9999991605957761, iteration: 95436
loss: 1.2225122451782227,grad_norm: 0.999999399262834, iteration: 95437
loss: 1.06955087184906,grad_norm: 0.9133005616859217, iteration: 95438
loss: 1.0279107093811035,grad_norm: 0.9999994159745694, iteration: 95439
loss: 1.0140244960784912,grad_norm: 0.984152752815123, iteration: 95440
loss: 1.0466687679290771,grad_norm: 0.7789601261878013, iteration: 95441
loss: 0.9870541095733643,grad_norm: 0.9343523387547302, iteration: 95442
loss: 1.0196826457977295,grad_norm: 0.9596338421899634, iteration: 95443
loss: 1.0791243314743042,grad_norm: 0.9999990650276035, iteration: 95444
loss: 1.0246576070785522,grad_norm: 0.9999995843468833, iteration: 95445
loss: 1.149788737297058,grad_norm: 0.9999999298311417, iteration: 95446
loss: 1.107742428779602,grad_norm: 0.9999991107070075, iteration: 95447
loss: 1.0238250494003296,grad_norm: 0.9999990623862386, iteration: 95448
loss: 1.182252049446106,grad_norm: 0.9999997865439144, iteration: 95449
loss: 0.9987582564353943,grad_norm: 0.999999126006649, iteration: 95450
loss: 1.0066555738449097,grad_norm: 0.8581477191740718, iteration: 95451
loss: 1.020583152770996,grad_norm: 0.9999990994394936, iteration: 95452
loss: 1.0342330932617188,grad_norm: 0.9871300804583166, iteration: 95453
loss: 1.029693365097046,grad_norm: 0.9999991890593656, iteration: 95454
loss: 1.0172280073165894,grad_norm: 0.9010520341173336, iteration: 95455
loss: 1.0218174457550049,grad_norm: 0.9999995760888866, iteration: 95456
loss: 1.0242338180541992,grad_norm: 0.9999992194597765, iteration: 95457
loss: 1.0350667238235474,grad_norm: 0.9999992884647156, iteration: 95458
loss: 1.0194828510284424,grad_norm: 0.9422123889778758, iteration: 95459
loss: 1.0634195804595947,grad_norm: 0.9999994830271876, iteration: 95460
loss: 1.132585048675537,grad_norm: 0.9999999170025856, iteration: 95461
loss: 1.2090598344802856,grad_norm: 0.999999755174695, iteration: 95462
loss: 1.062759518623352,grad_norm: 0.9999998969043727, iteration: 95463
loss: 1.0508373975753784,grad_norm: 0.9057779824878284, iteration: 95464
loss: 0.9980347752571106,grad_norm: 0.965461429621379, iteration: 95465
loss: 0.9857010841369629,grad_norm: 0.9433793691671399, iteration: 95466
loss: 1.1209712028503418,grad_norm: 0.9999998774577525, iteration: 95467
loss: 0.9931113719940186,grad_norm: 0.9128056081655872, iteration: 95468
loss: 1.0496011972427368,grad_norm: 0.9999995023093522, iteration: 95469
loss: 0.9944005608558655,grad_norm: 0.9242553369238284, iteration: 95470
loss: 1.0615410804748535,grad_norm: 0.9999992257765218, iteration: 95471
loss: 1.1050939559936523,grad_norm: 0.9976157073960726, iteration: 95472
loss: 1.041449785232544,grad_norm: 0.9999996888725222, iteration: 95473
loss: 0.9497449994087219,grad_norm: 0.854694691161228, iteration: 95474
loss: 1.0015217065811157,grad_norm: 0.8862865929501126, iteration: 95475
loss: 1.023498773574829,grad_norm: 0.9999992444983696, iteration: 95476
loss: 1.0897560119628906,grad_norm: 0.9999992708997671, iteration: 95477
loss: 1.2037683725357056,grad_norm: 0.9999998890928388, iteration: 95478
loss: 1.000709056854248,grad_norm: 0.9166209240342666, iteration: 95479
loss: 0.9952967166900635,grad_norm: 0.9999997073253172, iteration: 95480
loss: 1.090299367904663,grad_norm: 0.9999991655787415, iteration: 95481
loss: 1.009332299232483,grad_norm: 0.8904308281965817, iteration: 95482
loss: 1.0339523553848267,grad_norm: 0.8298177557384312, iteration: 95483
loss: 1.0292227268218994,grad_norm: 0.9057414143301783, iteration: 95484
loss: 1.0321094989776611,grad_norm: 0.8415506245202555, iteration: 95485
loss: 1.032197117805481,grad_norm: 0.9999992913454104, iteration: 95486
loss: 0.9826023578643799,grad_norm: 0.9158702775149752, iteration: 95487
loss: 1.0208176374435425,grad_norm: 0.9999991858509523, iteration: 95488
loss: 1.0862258672714233,grad_norm: 0.9999997699179897, iteration: 95489
loss: 1.010916829109192,grad_norm: 0.7780244912161661, iteration: 95490
loss: 1.0444809198379517,grad_norm: 0.9999999887898702, iteration: 95491
loss: 1.0194425582885742,grad_norm: 0.9999993439238152, iteration: 95492
loss: 1.058712124824524,grad_norm: 0.99999928750294, iteration: 95493
loss: 1.0350885391235352,grad_norm: 0.999999671690646, iteration: 95494
loss: 1.008765697479248,grad_norm: 0.9991481745453672, iteration: 95495
loss: 1.0601832866668701,grad_norm: 0.9999990796149988, iteration: 95496
loss: 1.020168423652649,grad_norm: 0.999999532321522, iteration: 95497
loss: 1.0350819826126099,grad_norm: 0.8927753992046513, iteration: 95498
loss: 0.9969692826271057,grad_norm: 0.7528093404031154, iteration: 95499
loss: 1.0694142580032349,grad_norm: 0.9999990857730983, iteration: 95500
loss: 1.0126619338989258,grad_norm: 0.877735988909822, iteration: 95501
loss: 1.144805908203125,grad_norm: 0.9999996878477901, iteration: 95502
loss: 0.9880662560462952,grad_norm: 0.9424707989602323, iteration: 95503
loss: 1.0800118446350098,grad_norm: 0.9999990883728999, iteration: 95504
loss: 1.0010250806808472,grad_norm: 0.9999990919608891, iteration: 95505
loss: 1.0069701671600342,grad_norm: 0.809951465298596, iteration: 95506
loss: 1.0676331520080566,grad_norm: 0.9999991826370511, iteration: 95507
loss: 1.2631666660308838,grad_norm: 0.9999996606882169, iteration: 95508
loss: 1.0051535367965698,grad_norm: 0.9999994164073467, iteration: 95509
loss: 1.0873563289642334,grad_norm: 0.9999997757760776, iteration: 95510
loss: 1.0615715980529785,grad_norm: 0.9999999011143217, iteration: 95511
loss: 1.0313386917114258,grad_norm: 0.9999993340898903, iteration: 95512
loss: 1.0167264938354492,grad_norm: 0.999998959994954, iteration: 95513
loss: 1.04523503780365,grad_norm: 0.908931691716076, iteration: 95514
loss: 1.1621452569961548,grad_norm: 0.999999093293473, iteration: 95515
loss: 1.0261446237564087,grad_norm: 0.9999992606361134, iteration: 95516
loss: 1.07326340675354,grad_norm: 0.9999990191209935, iteration: 95517
loss: 1.0856693983078003,grad_norm: 0.9999993224269137, iteration: 95518
loss: 1.0016735792160034,grad_norm: 0.9554246837860031, iteration: 95519
loss: 1.0397804975509644,grad_norm: 0.9230293501293619, iteration: 95520
loss: 1.0380181074142456,grad_norm: 0.9999991905270337, iteration: 95521
loss: 1.0245144367218018,grad_norm: 0.9999993934522897, iteration: 95522
loss: 0.9935900568962097,grad_norm: 0.7965079776290629, iteration: 95523
loss: 1.0500131845474243,grad_norm: 0.9688272017919011, iteration: 95524
loss: 1.0988783836364746,grad_norm: 0.9999991839497003, iteration: 95525
loss: 1.0581963062286377,grad_norm: 0.9999998091480095, iteration: 95526
loss: 1.0044842958450317,grad_norm: 0.9999993773340857, iteration: 95527
loss: 1.2074881792068481,grad_norm: 0.9999998677512477, iteration: 95528
loss: 0.9759572148323059,grad_norm: 0.9741218585656976, iteration: 95529
loss: 1.0676013231277466,grad_norm: 0.8808116883234295, iteration: 95530
loss: 1.0209382772445679,grad_norm: 0.999999030451252, iteration: 95531
loss: 0.9874480366706848,grad_norm: 0.9424345316109273, iteration: 95532
loss: 1.0972827672958374,grad_norm: 0.9999999350539932, iteration: 95533
loss: 1.0665440559387207,grad_norm: 0.9999996157041416, iteration: 95534
loss: 1.1126054525375366,grad_norm: 0.943067094183502, iteration: 95535
loss: 1.1720813512802124,grad_norm: 0.9999995538613501, iteration: 95536
loss: 1.0070675611495972,grad_norm: 0.8604517572791681, iteration: 95537
loss: 1.032390832901001,grad_norm: 0.9306389462736199, iteration: 95538
loss: 0.9655579328536987,grad_norm: 0.9416382285654143, iteration: 95539
loss: 1.1864690780639648,grad_norm: 0.9999992424427593, iteration: 95540
loss: 1.050195336341858,grad_norm: 0.9999998626211772, iteration: 95541
loss: 0.9856224060058594,grad_norm: 0.9999992324338, iteration: 95542
loss: 1.1625539064407349,grad_norm: 0.9999993932559029, iteration: 95543
loss: 1.015328049659729,grad_norm: 0.9423256067232914, iteration: 95544
loss: 1.1820579767227173,grad_norm: 0.9999999723211235, iteration: 95545
loss: 1.0244718790054321,grad_norm: 0.9163210804542535, iteration: 95546
loss: 1.0214663743972778,grad_norm: 0.9999993473617982, iteration: 95547
loss: 1.0053964853286743,grad_norm: 0.8569069322326173, iteration: 95548
loss: 1.0842636823654175,grad_norm: 0.9750446896578557, iteration: 95549
loss: 1.0102802515029907,grad_norm: 0.847983486263572, iteration: 95550
loss: 0.9755581021308899,grad_norm: 0.8622414673328126, iteration: 95551
loss: 1.0472614765167236,grad_norm: 0.9999990997704905, iteration: 95552
loss: 1.0618659257888794,grad_norm: 0.9999995422147707, iteration: 95553
loss: 1.0065447092056274,grad_norm: 0.9999990839449162, iteration: 95554
loss: 1.0886081457138062,grad_norm: 0.999999233137933, iteration: 95555
loss: 1.030331015586853,grad_norm: 0.9327582184204402, iteration: 95556
loss: 1.0638279914855957,grad_norm: 0.9999994520681573, iteration: 95557
loss: 0.9886675477027893,grad_norm: 0.934488310825011, iteration: 95558
loss: 1.0784759521484375,grad_norm: 0.9999992651268569, iteration: 95559
loss: 1.0146209001541138,grad_norm: 0.8193486058012392, iteration: 95560
loss: 1.0242867469787598,grad_norm: 0.9999991308535698, iteration: 95561
loss: 1.1905633211135864,grad_norm: 0.981412827433985, iteration: 95562
loss: 1.1785887479782104,grad_norm: 0.9999993451208214, iteration: 95563
loss: 1.253019094467163,grad_norm: 0.9999996712147845, iteration: 95564
loss: 1.0896342992782593,grad_norm: 0.9999994582771956, iteration: 95565
loss: 1.1176735162734985,grad_norm: 0.9999998808180526, iteration: 95566
loss: 0.9909042716026306,grad_norm: 0.880200604413645, iteration: 95567
loss: 1.0413001775741577,grad_norm: 0.9999997486509752, iteration: 95568
loss: 1.0862897634506226,grad_norm: 0.9999992456573348, iteration: 95569
loss: 1.019482970237732,grad_norm: 0.8380744310841878, iteration: 95570
loss: 1.0626544952392578,grad_norm: 0.9999990475466228, iteration: 95571
loss: 0.9778317809104919,grad_norm: 0.8937259238982806, iteration: 95572
loss: 1.0226938724517822,grad_norm: 0.999999101177309, iteration: 95573
loss: 1.0254039764404297,grad_norm: 0.999999691298442, iteration: 95574
loss: 1.0411423444747925,grad_norm: 0.9999990494101565, iteration: 95575
loss: 1.00055992603302,grad_norm: 0.9999994243457696, iteration: 95576
loss: 1.0470709800720215,grad_norm: 0.984456926506892, iteration: 95577
loss: 0.9868104457855225,grad_norm: 0.7999650721839228, iteration: 95578
loss: 0.9991068243980408,grad_norm: 0.9999991690018549, iteration: 95579
loss: 1.010236382484436,grad_norm: 0.9192622193460936, iteration: 95580
loss: 0.9916864037513733,grad_norm: 0.9999993302844528, iteration: 95581
loss: 1.034712314605713,grad_norm: 0.8811494331065891, iteration: 95582
loss: 1.0543735027313232,grad_norm: 0.888399135756865, iteration: 95583
loss: 1.0130304098129272,grad_norm: 0.8579574862273317, iteration: 95584
loss: 1.076827883720398,grad_norm: 0.9999993521234501, iteration: 95585
loss: 1.0234211683273315,grad_norm: 0.956221252941442, iteration: 95586
loss: 1.1432019472122192,grad_norm: 0.9999993121399554, iteration: 95587
loss: 0.981451690196991,grad_norm: 0.8642923645127581, iteration: 95588
loss: 1.0014060735702515,grad_norm: 0.9406955983976749, iteration: 95589
loss: 1.1321954727172852,grad_norm: 0.9999997405015872, iteration: 95590
loss: 0.959709107875824,grad_norm: 0.9329356441231283, iteration: 95591
loss: 1.0508918762207031,grad_norm: 0.9999997432246704, iteration: 95592
loss: 1.0340627431869507,grad_norm: 0.9208768626562794, iteration: 95593
loss: 1.04494047164917,grad_norm: 0.8455478800665247, iteration: 95594
loss: 0.9764131307601929,grad_norm: 0.9999991832466006, iteration: 95595
loss: 1.0629045963287354,grad_norm: 0.9999990606533957, iteration: 95596
loss: 1.0245895385742188,grad_norm: 0.8353938030691527, iteration: 95597
loss: 1.028739333152771,grad_norm: 0.9999995438511213, iteration: 95598
loss: 1.0622568130493164,grad_norm: 0.8461115271588293, iteration: 95599
loss: 1.0169813632965088,grad_norm: 0.8161187805728038, iteration: 95600
loss: 1.1032100915908813,grad_norm: 0.9999995535900412, iteration: 95601
loss: 1.134825348854065,grad_norm: 0.9999994533361038, iteration: 95602
loss: 1.0414791107177734,grad_norm: 0.9963469593479872, iteration: 95603
loss: 1.0022045373916626,grad_norm: 0.9654898242543474, iteration: 95604
loss: 1.0390348434448242,grad_norm: 0.9999996507553931, iteration: 95605
loss: 1.0485420227050781,grad_norm: 0.999999203179297, iteration: 95606
loss: 1.0122722387313843,grad_norm: 0.9999992472189513, iteration: 95607
loss: 1.017756700515747,grad_norm: 0.9386234766104969, iteration: 95608
loss: 1.0327565670013428,grad_norm: 0.9999996373456698, iteration: 95609
loss: 1.0534253120422363,grad_norm: 0.9677947437671768, iteration: 95610
loss: 0.9918643236160278,grad_norm: 0.9230801873620117, iteration: 95611
loss: 0.9773077368736267,grad_norm: 0.9999998697445457, iteration: 95612
loss: 0.9997417330741882,grad_norm: 0.9999999748577525, iteration: 95613
loss: 1.023572325706482,grad_norm: 0.8288536401521011, iteration: 95614
loss: 1.0320956707000732,grad_norm: 0.9210607057923209, iteration: 95615
loss: 0.9869999289512634,grad_norm: 0.9258840787076987, iteration: 95616
loss: 1.0430506467819214,grad_norm: 0.9166850357849843, iteration: 95617
loss: 1.0013798475265503,grad_norm: 0.99999912302941, iteration: 95618
loss: 1.0823453664779663,grad_norm: 0.9999995993184855, iteration: 95619
loss: 1.0181390047073364,grad_norm: 0.8697628373823973, iteration: 95620
loss: 1.0015099048614502,grad_norm: 0.8805495000810095, iteration: 95621
loss: 1.0506646633148193,grad_norm: 0.9254329231017474, iteration: 95622
loss: 1.005578875541687,grad_norm: 0.8785557857249449, iteration: 95623
loss: 1.0768836736679077,grad_norm: 0.9271749371826269, iteration: 95624
loss: 0.9870174527168274,grad_norm: 0.8491399558467678, iteration: 95625
loss: 0.9986957907676697,grad_norm: 0.9999989869830636, iteration: 95626
loss: 1.068292498588562,grad_norm: 0.9799340215627992, iteration: 95627
loss: 0.9769953489303589,grad_norm: 0.9999990947912805, iteration: 95628
loss: 1.0543091297149658,grad_norm: 0.9999996447112923, iteration: 95629
loss: 0.9455552101135254,grad_norm: 0.8290689000415511, iteration: 95630
loss: 1.047102689743042,grad_norm: 0.9240128114660097, iteration: 95631
loss: 0.9800754189491272,grad_norm: 0.9764219830419041, iteration: 95632
loss: 1.0156413316726685,grad_norm: 0.8801646906865944, iteration: 95633
loss: 1.0207027196884155,grad_norm: 0.8111618220169032, iteration: 95634
loss: 0.9839523434638977,grad_norm: 0.8944556414732148, iteration: 95635
loss: 0.9949831962585449,grad_norm: 0.7839694650477799, iteration: 95636
loss: 0.9931231141090393,grad_norm: 0.8477612788332579, iteration: 95637
loss: 1.0038059949874878,grad_norm: 0.8836750636333573, iteration: 95638
loss: 0.9703549742698669,grad_norm: 0.9999991206744381, iteration: 95639
loss: 0.9982168078422546,grad_norm: 0.8639927218777554, iteration: 95640
loss: 1.0311335325241089,grad_norm: 0.999999018408289, iteration: 95641
loss: 0.9882302284240723,grad_norm: 0.999999305966014, iteration: 95642
loss: 1.0817184448242188,grad_norm: 0.9924655456300006, iteration: 95643
loss: 1.0192131996154785,grad_norm: 0.7944847704131758, iteration: 95644
loss: 1.0489871501922607,grad_norm: 0.9522956541170093, iteration: 95645
loss: 0.9959592223167419,grad_norm: 0.8868527558161949, iteration: 95646
loss: 1.0093241930007935,grad_norm: 0.9659289046261986, iteration: 95647
loss: 1.0248444080352783,grad_norm: 0.9999989901020415, iteration: 95648
loss: 0.9996772408485413,grad_norm: 0.8463383803566212, iteration: 95649
loss: 1.024124026298523,grad_norm: 0.999999686352612, iteration: 95650
loss: 1.078170895576477,grad_norm: 0.9999991070272845, iteration: 95651
loss: 1.1167391538619995,grad_norm: 0.9999994799347767, iteration: 95652
loss: 1.0066429376602173,grad_norm: 0.9019333583735596, iteration: 95653
loss: 1.0574406385421753,grad_norm: 0.9999997264599193, iteration: 95654
loss: 1.0737838745117188,grad_norm: 0.9999990546616676, iteration: 95655
loss: 0.9821130633354187,grad_norm: 0.7780254680145174, iteration: 95656
loss: 0.9827669262886047,grad_norm: 0.7864420610524383, iteration: 95657
loss: 1.0772333145141602,grad_norm: 0.9999998261065018, iteration: 95658
loss: 0.9995139241218567,grad_norm: 0.9999994992584359, iteration: 95659
loss: 1.060768485069275,grad_norm: 0.9999998690443952, iteration: 95660
loss: 0.9718686938285828,grad_norm: 0.9520528201039254, iteration: 95661
loss: 1.0159389972686768,grad_norm: 0.7554984811515849, iteration: 95662
loss: 1.1179313659667969,grad_norm: 0.9999994241313668, iteration: 95663
loss: 0.9967365264892578,grad_norm: 0.933393796990676, iteration: 95664
loss: 1.0014194250106812,grad_norm: 0.9999991636089784, iteration: 95665
loss: 0.9964289665222168,grad_norm: 0.9999992137421959, iteration: 95666
loss: 1.0810352563858032,grad_norm: 0.999999656062322, iteration: 95667
loss: 1.029610276222229,grad_norm: 0.9999999208804399, iteration: 95668
loss: 1.216611385345459,grad_norm: 0.9999998701636547, iteration: 95669
loss: 1.0484308004379272,grad_norm: 0.881939413832273, iteration: 95670
loss: 1.1321690082550049,grad_norm: 0.9999997290111828, iteration: 95671
loss: 1.038656234741211,grad_norm: 0.9999996856567671, iteration: 95672
loss: 1.030666708946228,grad_norm: 0.8374579358288359, iteration: 95673
loss: 1.0798519849777222,grad_norm: 0.9999991268090713, iteration: 95674
loss: 1.1130305528640747,grad_norm: 0.9999993997058491, iteration: 95675
loss: 1.0225977897644043,grad_norm: 0.9531119975364276, iteration: 95676
loss: 0.9988147616386414,grad_norm: 0.9999993419974225, iteration: 95677
loss: 1.0055005550384521,grad_norm: 0.9999991715313774, iteration: 95678
loss: 1.0661523342132568,grad_norm: 0.9999989976524797, iteration: 95679
loss: 1.0229061841964722,grad_norm: 0.9779177098939779, iteration: 95680
loss: 1.003584861755371,grad_norm: 0.9753435425497763, iteration: 95681
loss: 1.00115966796875,grad_norm: 0.9994261203972409, iteration: 95682
loss: 0.9813878536224365,grad_norm: 0.999999256494508, iteration: 95683
loss: 1.053604006767273,grad_norm: 0.9999994280439191, iteration: 95684
loss: 1.0421394109725952,grad_norm: 0.8923718384465236, iteration: 95685
loss: 1.0021076202392578,grad_norm: 0.9470288756829758, iteration: 95686
loss: 0.9985142946243286,grad_norm: 0.8755668387255104, iteration: 95687
loss: 1.133615255355835,grad_norm: 0.9999998050221952, iteration: 95688
loss: 0.9636251926422119,grad_norm: 0.8986918296397935, iteration: 95689
loss: 1.1182243824005127,grad_norm: 0.9999995184350342, iteration: 95690
loss: 1.147201418876648,grad_norm: 0.9999998488944042, iteration: 95691
loss: 1.111690640449524,grad_norm: 1.0000000540484528, iteration: 95692
loss: 1.1284713745117188,grad_norm: 0.9999998876276582, iteration: 95693
loss: 0.9475283026695251,grad_norm: 0.9999995833311639, iteration: 95694
loss: 1.167963981628418,grad_norm: 0.9999994503186787, iteration: 95695
loss: 1.0927751064300537,grad_norm: 0.9999994607682539, iteration: 95696
loss: 0.9805700778961182,grad_norm: 0.985414359484792, iteration: 95697
loss: 1.1302272081375122,grad_norm: 0.9999999009398349, iteration: 95698
loss: 0.9856329560279846,grad_norm: 0.9286711744851748, iteration: 95699
loss: 1.0356335639953613,grad_norm: 0.9999993850773952, iteration: 95700
loss: 1.0217299461364746,grad_norm: 0.8248685977116138, iteration: 95701
loss: 0.9972248673439026,grad_norm: 0.8684944654967626, iteration: 95702
loss: 1.051430106163025,grad_norm: 0.9999994747004999, iteration: 95703
loss: 0.9980307221412659,grad_norm: 0.7509024349181629, iteration: 95704
loss: 1.1992579698562622,grad_norm: 0.999999241035314, iteration: 95705
loss: 1.0654807090759277,grad_norm: 0.9927037558422416, iteration: 95706
loss: 0.9979555010795593,grad_norm: 0.9999991456961903, iteration: 95707
loss: 1.0159127712249756,grad_norm: 0.9457827616769403, iteration: 95708
loss: 1.027483582496643,grad_norm: 0.9800484966240207, iteration: 95709
loss: 1.0005542039871216,grad_norm: 0.9249850369207725, iteration: 95710
loss: 1.0889391899108887,grad_norm: 0.9999994026251894, iteration: 95711
loss: 0.9845240712165833,grad_norm: 0.953644379214654, iteration: 95712
loss: 1.0190606117248535,grad_norm: 0.9999991808624049, iteration: 95713
loss: 1.0145235061645508,grad_norm: 0.7816908350691611, iteration: 95714
loss: 1.0303236246109009,grad_norm: 0.9999992942986451, iteration: 95715
loss: 0.9437891840934753,grad_norm: 0.9891696395120894, iteration: 95716
loss: 1.0663857460021973,grad_norm: 0.9143689696227716, iteration: 95717
loss: 1.0040380954742432,grad_norm: 0.9999997067776125, iteration: 95718
loss: 1.0082180500030518,grad_norm: 0.9878533302658412, iteration: 95719
loss: 1.0859431028366089,grad_norm: 0.9999999088897759, iteration: 95720
loss: 0.9856769442558289,grad_norm: 0.9685466230409235, iteration: 95721
loss: 0.9733802676200867,grad_norm: 0.8718506102760739, iteration: 95722
loss: 1.2405678033828735,grad_norm: 0.9999992611684397, iteration: 95723
loss: 1.0834580659866333,grad_norm: 0.9999993139333939, iteration: 95724
loss: 1.0458743572235107,grad_norm: 0.8888771990720986, iteration: 95725
loss: 1.0186446905136108,grad_norm: 0.9240859166805816, iteration: 95726
loss: 0.9867112636566162,grad_norm: 0.7847089277279273, iteration: 95727
loss: 1.0127451419830322,grad_norm: 0.9979953183482247, iteration: 95728
loss: 1.294280767440796,grad_norm: 0.9999999893635755, iteration: 95729
loss: 1.0197879076004028,grad_norm: 0.9999997949460988, iteration: 95730
loss: 1.0963971614837646,grad_norm: 0.999999070015427, iteration: 95731
loss: 1.000429391860962,grad_norm: 0.8905075700835272, iteration: 95732
loss: 1.112257719039917,grad_norm: 0.9999997753437965, iteration: 95733
loss: 0.9571768641471863,grad_norm: 0.9492568024032016, iteration: 95734
loss: 0.9941384196281433,grad_norm: 0.8213321885992756, iteration: 95735
loss: 1.0298118591308594,grad_norm: 0.9999994419267184, iteration: 95736
loss: 1.0311272144317627,grad_norm: 0.9999990472333152, iteration: 95737
loss: 0.99213045835495,grad_norm: 0.9999994401845406, iteration: 95738
loss: 1.2343965768814087,grad_norm: 0.9999996381161192, iteration: 95739
loss: 1.0187560319900513,grad_norm: 0.915132253679862, iteration: 95740
loss: 0.9949460625648499,grad_norm: 0.9528997755388511, iteration: 95741
loss: 1.1995271444320679,grad_norm: 0.9999994527250516, iteration: 95742
loss: 1.1745152473449707,grad_norm: 0.9999992844972636, iteration: 95743
loss: 0.9983561635017395,grad_norm: 0.9375871995054296, iteration: 95744
loss: 1.0203909873962402,grad_norm: 0.8460518079113355, iteration: 95745
loss: 1.1430519819259644,grad_norm: 0.9999999075861044, iteration: 95746
loss: 0.9906490445137024,grad_norm: 0.9119248322111151, iteration: 95747
loss: 1.0567054748535156,grad_norm: 0.999999762483858, iteration: 95748
loss: 1.0619620084762573,grad_norm: 0.9999998369685184, iteration: 95749
loss: 1.1850991249084473,grad_norm: 0.9999990372607868, iteration: 95750
loss: 0.9832695722579956,grad_norm: 0.9999990266448882, iteration: 95751
loss: 1.0132544040679932,grad_norm: 0.9999995194845294, iteration: 95752
loss: 1.0494630336761475,grad_norm: 0.9999991724659716, iteration: 95753
loss: 0.9741695523262024,grad_norm: 0.9115967796122206, iteration: 95754
loss: 0.9770507216453552,grad_norm: 0.9999992521980966, iteration: 95755
loss: 1.0533647537231445,grad_norm: 0.999999336183554, iteration: 95756
loss: 1.1776477098464966,grad_norm: 0.9999992498408912, iteration: 95757
loss: 0.9832317233085632,grad_norm: 0.896178968665121, iteration: 95758
loss: 1.0402796268463135,grad_norm: 0.9999995573939997, iteration: 95759
loss: 1.013698697090149,grad_norm: 0.9060159938540889, iteration: 95760
loss: 1.0800158977508545,grad_norm: 1.0000000347991123, iteration: 95761
loss: 0.9817169904708862,grad_norm: 0.9999994063016028, iteration: 95762
loss: 0.9660428166389465,grad_norm: 0.9580228410064103, iteration: 95763
loss: 1.0354806184768677,grad_norm: 0.9999990317307716, iteration: 95764
loss: 1.1474088430404663,grad_norm: 0.9999993732306389, iteration: 95765
loss: 1.073222041130066,grad_norm: 0.9999997978177582, iteration: 95766
loss: 0.9826604127883911,grad_norm: 0.9903148120600954, iteration: 95767
loss: 1.2921993732452393,grad_norm: 0.9999995029372585, iteration: 95768
loss: 0.9746698141098022,grad_norm: 0.8587672772344006, iteration: 95769
loss: 1.0279669761657715,grad_norm: 0.999999753311864, iteration: 95770
loss: 1.0436100959777832,grad_norm: 0.9999998741989384, iteration: 95771
loss: 1.0361695289611816,grad_norm: 0.9999992469403257, iteration: 95772
loss: 1.0103271007537842,grad_norm: 0.9443800758630253, iteration: 95773
loss: 1.1254793405532837,grad_norm: 0.9999995515152745, iteration: 95774
loss: 1.0265514850616455,grad_norm: 0.9999992017789766, iteration: 95775
loss: 1.038955569267273,grad_norm: 0.9999996134319193, iteration: 95776
loss: 1.0267115831375122,grad_norm: 0.999999151936079, iteration: 95777
loss: 1.1357558965682983,grad_norm: 0.9999995140390895, iteration: 95778
loss: 1.0402384996414185,grad_norm: 0.8328609725073338, iteration: 95779
loss: 0.9715560674667358,grad_norm: 0.9999991176769961, iteration: 95780
loss: 1.1518560647964478,grad_norm: 0.9999993280999975, iteration: 95781
loss: 1.0351794958114624,grad_norm: 0.999999187841306, iteration: 95782
loss: 1.0104998350143433,grad_norm: 0.9999994590404377, iteration: 95783
loss: 1.0287612676620483,grad_norm: 0.9999997298371727, iteration: 95784
loss: 1.0365651845932007,grad_norm: 0.8843687619095878, iteration: 95785
loss: 1.0227969884872437,grad_norm: 0.9999991667375221, iteration: 95786
loss: 1.0224297046661377,grad_norm: 0.867217717567164, iteration: 95787
loss: 1.0301539897918701,grad_norm: 0.9999993574972623, iteration: 95788
loss: 1.1302167177200317,grad_norm: 0.999999462765056, iteration: 95789
loss: 1.0494840145111084,grad_norm: 0.999999048712519, iteration: 95790
loss: 0.9934510588645935,grad_norm: 0.872005383995023, iteration: 95791
loss: 1.0297259092330933,grad_norm: 0.9999993485886162, iteration: 95792
loss: 1.0419719219207764,grad_norm: 0.743185474314439, iteration: 95793
loss: 1.0288628339767456,grad_norm: 0.8786728289461277, iteration: 95794
loss: 1.004980206489563,grad_norm: 0.9613665418119516, iteration: 95795
loss: 1.0361659526824951,grad_norm: 0.720942835741081, iteration: 95796
loss: 1.026111125946045,grad_norm: 0.7283629825512544, iteration: 95797
loss: 1.0580538511276245,grad_norm: 0.9999996668955119, iteration: 95798
loss: 1.026447057723999,grad_norm: 0.97144207776864, iteration: 95799
loss: 1.0780330896377563,grad_norm: 0.9999997832847072, iteration: 95800
loss: 1.0612232685089111,grad_norm: 0.9236243250307259, iteration: 95801
loss: 1.1888822317123413,grad_norm: 0.9999996853829364, iteration: 95802
loss: 0.9714753031730652,grad_norm: 0.9999988988628268, iteration: 95803
loss: 0.9931361079216003,grad_norm: 0.8674058165950094, iteration: 95804
loss: 1.1161625385284424,grad_norm: 0.9999995061055198, iteration: 95805
loss: 0.987083375453949,grad_norm: 0.999999171255861, iteration: 95806
loss: 0.9831396341323853,grad_norm: 0.9999996038806943, iteration: 95807
loss: 1.044749140739441,grad_norm: 0.9999994696224321, iteration: 95808
loss: 0.9860857129096985,grad_norm: 0.9999989852593967, iteration: 95809
loss: 1.0732008218765259,grad_norm: 0.9303397459014073, iteration: 95810
loss: 0.9618217945098877,grad_norm: 0.829923076459061, iteration: 95811
loss: 0.9930382370948792,grad_norm: 0.8116550215772181, iteration: 95812
loss: 0.9982815980911255,grad_norm: 0.8279865367617485, iteration: 95813
loss: 1.0455633401870728,grad_norm: 0.999999063034702, iteration: 95814
loss: 1.02913498878479,grad_norm: 0.9999994343492775, iteration: 95815
loss: 1.005010724067688,grad_norm: 0.9999993377267992, iteration: 95816
loss: 1.0038923025131226,grad_norm: 0.8202173630594776, iteration: 95817
loss: 0.9951530694961548,grad_norm: 0.9460812172681551, iteration: 95818
loss: 1.0326557159423828,grad_norm: 0.7537121568306319, iteration: 95819
loss: 1.061428427696228,grad_norm: 0.9999997094020688, iteration: 95820
loss: 0.9803661704063416,grad_norm: 0.9905130258068175, iteration: 95821
loss: 1.0873546600341797,grad_norm: 0.9999995180155004, iteration: 95822
loss: 0.9828877449035645,grad_norm: 0.8363517962752222, iteration: 95823
loss: 0.9888357520103455,grad_norm: 0.89982801630435, iteration: 95824
loss: 0.9974701404571533,grad_norm: 0.8257621322248883, iteration: 95825
loss: 1.0044381618499756,grad_norm: 0.8699083312646025, iteration: 95826
loss: 0.9854260087013245,grad_norm: 0.8380867494411914, iteration: 95827
loss: 0.9865587949752808,grad_norm: 0.8437078775790525, iteration: 95828
loss: 1.0503846406936646,grad_norm: 0.9999990239839486, iteration: 95829
loss: 0.9911019206047058,grad_norm: 0.8605087471068765, iteration: 95830
loss: 1.0496206283569336,grad_norm: 0.9999990566960167, iteration: 95831
loss: 1.0017908811569214,grad_norm: 0.8670256155967314, iteration: 95832
loss: 1.0433692932128906,grad_norm: 0.9999994645345976, iteration: 95833
loss: 1.006116271018982,grad_norm: 0.8602141719507683, iteration: 95834
loss: 1.0026334524154663,grad_norm: 0.7842179829575715, iteration: 95835
loss: 0.9839437007904053,grad_norm: 0.927404983528724, iteration: 95836
loss: 0.9804996252059937,grad_norm: 0.8873028204718121, iteration: 95837
loss: 1.0343655347824097,grad_norm: 0.9999994613285108, iteration: 95838
loss: 1.0471595525741577,grad_norm: 0.9999992670518417, iteration: 95839
loss: 1.010911464691162,grad_norm: 0.8451727620246723, iteration: 95840
loss: 1.050856590270996,grad_norm: 0.9999990118053351, iteration: 95841
loss: 1.0276920795440674,grad_norm: 0.8349003760671193, iteration: 95842
loss: 1.015281319618225,grad_norm: 0.9999994791707081, iteration: 95843
loss: 0.9956947565078735,grad_norm: 0.9499179300778274, iteration: 95844
loss: 0.9972409605979919,grad_norm: 0.9999991147976768, iteration: 95845
loss: 1.0042380094528198,grad_norm: 0.8381774109290939, iteration: 95846
loss: 0.9944211840629578,grad_norm: 0.9935421171072819, iteration: 95847
loss: 1.0010886192321777,grad_norm: 0.8698459131409788, iteration: 95848
loss: 1.0012714862823486,grad_norm: 0.9327897837280248, iteration: 95849
loss: 1.0539977550506592,grad_norm: 0.9999991774244259, iteration: 95850
loss: 0.9911541938781738,grad_norm: 0.8196363520234509, iteration: 95851
loss: 1.0226969718933105,grad_norm: 0.9999992787568391, iteration: 95852
loss: 1.0111626386642456,grad_norm: 0.9999991166304751, iteration: 95853
loss: 0.9999746680259705,grad_norm: 0.7809018349816602, iteration: 95854
loss: 0.9944090843200684,grad_norm: 0.999999144043083, iteration: 95855
loss: 1.0135889053344727,grad_norm: 0.9999991148595442, iteration: 95856
loss: 1.1738637685775757,grad_norm: 0.9999997318148495, iteration: 95857
loss: 1.0221514701843262,grad_norm: 0.9999989869483672, iteration: 95858
loss: 0.9839437007904053,grad_norm: 0.929939013713793, iteration: 95859
loss: 1.0978436470031738,grad_norm: 0.9999997231952318, iteration: 95860
loss: 1.1087629795074463,grad_norm: 0.999999940068568, iteration: 95861
loss: 1.0666486024856567,grad_norm: 0.9999991960279375, iteration: 95862
loss: 0.9915279746055603,grad_norm: 0.982093357420338, iteration: 95863
loss: 1.086348295211792,grad_norm: 0.9999996938440261, iteration: 95864
loss: 1.1258832216262817,grad_norm: 0.9999999213707306, iteration: 95865
loss: 0.9776151776313782,grad_norm: 0.9475696335502268, iteration: 95866
loss: 1.0247570276260376,grad_norm: 0.7886094968647142, iteration: 95867
loss: 1.0265921354293823,grad_norm: 0.8987575367228562, iteration: 95868
loss: 0.9659214615821838,grad_norm: 0.750938886888267, iteration: 95869
loss: 0.9555018544197083,grad_norm: 0.999999156283525, iteration: 95870
loss: 0.9914283156394958,grad_norm: 0.8489186710020292, iteration: 95871
loss: 0.9757930636405945,grad_norm: 0.8066657279314663, iteration: 95872
loss: 0.9281286001205444,grad_norm: 0.8891351352818901, iteration: 95873
loss: 0.9609900712966919,grad_norm: 0.7900354363555349, iteration: 95874
loss: 1.0381062030792236,grad_norm: 0.840978243078576, iteration: 95875
loss: 1.0188013315200806,grad_norm: 0.9152109846271713, iteration: 95876
loss: 1.0382288694381714,grad_norm: 0.9999994700394877, iteration: 95877
loss: 1.0437957048416138,grad_norm: 0.9999995124720611, iteration: 95878
loss: 1.0057387351989746,grad_norm: 0.97018703373832, iteration: 95879
loss: 0.9816872477531433,grad_norm: 0.9999992582829441, iteration: 95880
loss: 1.0041491985321045,grad_norm: 0.910828430158403, iteration: 95881
loss: 1.1134331226348877,grad_norm: 0.9999994151958062, iteration: 95882
loss: 1.0308068990707397,grad_norm: 0.9999998132613658, iteration: 95883
loss: 0.9857683181762695,grad_norm: 0.9481289459721882, iteration: 95884
loss: 1.093022346496582,grad_norm: 0.9999998325747628, iteration: 95885
loss: 0.9999871850013733,grad_norm: 0.979097667847413, iteration: 95886
loss: 0.9787994623184204,grad_norm: 0.8626020188403135, iteration: 95887
loss: 1.0498043298721313,grad_norm: 0.9999994352305851, iteration: 95888
loss: 1.0047636032104492,grad_norm: 0.9999990963209863, iteration: 95889
loss: 1.0442085266113281,grad_norm: 0.9242390872715438, iteration: 95890
loss: 1.0300531387329102,grad_norm: 0.9999990646907246, iteration: 95891
loss: 1.0299875736236572,grad_norm: 0.9999998524730547, iteration: 95892
loss: 1.0724852085113525,grad_norm: 0.9999992770043954, iteration: 95893
loss: 1.0222876071929932,grad_norm: 0.9999989741922068, iteration: 95894
loss: 0.9667344689369202,grad_norm: 0.8061769161889245, iteration: 95895
loss: 0.9860002994537354,grad_norm: 0.9999990379269982, iteration: 95896
loss: 0.9764502048492432,grad_norm: 0.9314188537833005, iteration: 95897
loss: 1.0259971618652344,grad_norm: 0.8692535140557, iteration: 95898
loss: 1.0156630277633667,grad_norm: 0.8479889450787235, iteration: 95899
loss: 0.9784137010574341,grad_norm: 0.9999998767709942, iteration: 95900
loss: 1.004050612449646,grad_norm: 0.9999991288745133, iteration: 95901
loss: 0.9887507557868958,grad_norm: 0.9739709879802971, iteration: 95902
loss: 1.0301930904388428,grad_norm: 0.999999484628691, iteration: 95903
loss: 1.0760607719421387,grad_norm: 0.9010657372573599, iteration: 95904
loss: 0.9853994250297546,grad_norm: 0.972175828581331, iteration: 95905
loss: 0.9856383800506592,grad_norm: 0.956803994734341, iteration: 95906
loss: 1.0305060148239136,grad_norm: 0.8627567840204802, iteration: 95907
loss: 0.9858316779136658,grad_norm: 0.9175762253214915, iteration: 95908
loss: 1.0181516408920288,grad_norm: 0.9999991639043114, iteration: 95909
loss: 0.9789474606513977,grad_norm: 0.9999991178329156, iteration: 95910
loss: 1.0215132236480713,grad_norm: 0.9999995113349106, iteration: 95911
loss: 0.9784771203994751,grad_norm: 0.9999991394715326, iteration: 95912
loss: 1.015500545501709,grad_norm: 0.8860426085218915, iteration: 95913
loss: 0.988210141658783,grad_norm: 0.7580065113206782, iteration: 95914
loss: 0.974579930305481,grad_norm: 0.9944728241748946, iteration: 95915
loss: 1.009255051612854,grad_norm: 0.9655498194132033, iteration: 95916
loss: 1.04511559009552,grad_norm: 0.9581007020898319, iteration: 95917
loss: 1.0234019756317139,grad_norm: 0.7562153060656366, iteration: 95918
loss: 1.0262537002563477,grad_norm: 0.9999996540563697, iteration: 95919
loss: 1.0334028005599976,grad_norm: 0.9999991093649602, iteration: 95920
loss: 1.0460325479507446,grad_norm: 0.999999853946013, iteration: 95921
loss: 1.0138139724731445,grad_norm: 0.8283374476211378, iteration: 95922
loss: 1.0202783346176147,grad_norm: 0.9999991828934679, iteration: 95923
loss: 0.993780255317688,grad_norm: 0.953653264585057, iteration: 95924
loss: 1.0008314847946167,grad_norm: 0.9468416062607172, iteration: 95925
loss: 0.995358943939209,grad_norm: 0.8121261384095348, iteration: 95926
loss: 1.0387156009674072,grad_norm: 0.9999990033683421, iteration: 95927
loss: 0.9914970993995667,grad_norm: 0.9999989820044083, iteration: 95928
loss: 1.0211477279663086,grad_norm: 0.9067279476299552, iteration: 95929
loss: 1.0213549137115479,grad_norm: 0.7182840724825805, iteration: 95930
loss: 1.0771405696868896,grad_norm: 0.9999992263307793, iteration: 95931
loss: 0.9958903193473816,grad_norm: 0.9999993475256745, iteration: 95932
loss: 1.0338151454925537,grad_norm: 0.9999991010136758, iteration: 95933
loss: 1.0065194368362427,grad_norm: 0.9999991211842889, iteration: 95934
loss: 0.9804482460021973,grad_norm: 0.9999989688719757, iteration: 95935
loss: 1.090489387512207,grad_norm: 0.9111530960954687, iteration: 95936
loss: 1.0251997709274292,grad_norm: 0.8506134616753617, iteration: 95937
loss: 1.019667148590088,grad_norm: 0.9999998743314807, iteration: 95938
loss: 1.0179117918014526,grad_norm: 0.9999996339988578, iteration: 95939
loss: 0.9775009155273438,grad_norm: 0.7953904086571177, iteration: 95940
loss: 1.1483393907546997,grad_norm: 0.9999997757921215, iteration: 95941
loss: 0.9842720627784729,grad_norm: 0.9999992844671814, iteration: 95942
loss: 1.0007262229919434,grad_norm: 0.9999991029975738, iteration: 95943
loss: 1.0044466257095337,grad_norm: 0.9648651850484103, iteration: 95944
loss: 1.0047861337661743,grad_norm: 0.9999990718081708, iteration: 95945
loss: 1.0527701377868652,grad_norm: 0.9999991310575308, iteration: 95946
loss: 0.9833199977874756,grad_norm: 0.9999990852559872, iteration: 95947
loss: 0.9811406135559082,grad_norm: 0.9999990183350829, iteration: 95948
loss: 1.0344167947769165,grad_norm: 0.86233638678067, iteration: 95949
loss: 0.9544619917869568,grad_norm: 0.8543864163819805, iteration: 95950
loss: 1.0635044574737549,grad_norm: 0.999999405568534, iteration: 95951
loss: 0.9844518899917603,grad_norm: 0.9799246274360867, iteration: 95952
loss: 1.0274443626403809,grad_norm: 0.9762616652125209, iteration: 95953
loss: 1.0079110860824585,grad_norm: 0.8561447114604235, iteration: 95954
loss: 0.9617204070091248,grad_norm: 0.8844094101160397, iteration: 95955
loss: 1.0621658563613892,grad_norm: 0.8667168859349464, iteration: 95956
loss: 1.0013353824615479,grad_norm: 0.9324661326749673, iteration: 95957
loss: 1.0526193380355835,grad_norm: 0.999999619581125, iteration: 95958
loss: 1.0773504972457886,grad_norm: 0.9920413302394144, iteration: 95959
loss: 0.9889302849769592,grad_norm: 0.9999996830481135, iteration: 95960
loss: 1.0106377601623535,grad_norm: 0.9999992179225178, iteration: 95961
loss: 1.219477891921997,grad_norm: 0.9999990579177458, iteration: 95962
loss: 0.9792536497116089,grad_norm: 0.8856898840395444, iteration: 95963
loss: 0.9873999953269958,grad_norm: 0.9205935910154142, iteration: 95964
loss: 1.0161607265472412,grad_norm: 0.903626667098795, iteration: 95965
loss: 0.9807730913162231,grad_norm: 0.9820428085781805, iteration: 95966
loss: 1.0062885284423828,grad_norm: 0.9774545844283173, iteration: 95967
loss: 1.0513954162597656,grad_norm: 0.9418608013000428, iteration: 95968
loss: 0.9708173274993896,grad_norm: 0.9096256372029242, iteration: 95969
loss: 1.2034944295883179,grad_norm: 0.9999992504861751, iteration: 95970
loss: 0.9805447459220886,grad_norm: 0.9884955098216461, iteration: 95971
loss: 0.9914342164993286,grad_norm: 0.9999990153736686, iteration: 95972
loss: 0.98094642162323,grad_norm: 0.9975697641519845, iteration: 95973
loss: 1.0538530349731445,grad_norm: 0.9999993510549863, iteration: 95974
loss: 0.9808337688446045,grad_norm: 0.8708141238856596, iteration: 95975
loss: 1.0021077394485474,grad_norm: 0.8009870067076927, iteration: 95976
loss: 1.1085442304611206,grad_norm: 0.972006601907758, iteration: 95977
loss: 1.0368435382843018,grad_norm: 0.9999993283654605, iteration: 95978
loss: 1.0384376049041748,grad_norm: 0.9999998365485903, iteration: 95979
loss: 1.0210657119750977,grad_norm: 0.9999998933597661, iteration: 95980
loss: 0.9707503318786621,grad_norm: 0.9999990063413547, iteration: 95981
loss: 1.007795810699463,grad_norm: 0.9999995648009774, iteration: 95982
loss: 0.9965267777442932,grad_norm: 0.9999991900353314, iteration: 95983
loss: 1.0082015991210938,grad_norm: 0.7538651904322624, iteration: 95984
loss: 1.0149333477020264,grad_norm: 0.8981766368584709, iteration: 95985
loss: 1.0505976676940918,grad_norm: 0.9999995878359254, iteration: 95986
loss: 1.0282330513000488,grad_norm: 0.9508489951627196, iteration: 95987
loss: 0.9603442549705505,grad_norm: 0.9408513477657915, iteration: 95988
loss: 0.9541364908218384,grad_norm: 0.9588912407755041, iteration: 95989
loss: 0.9708516001701355,grad_norm: 0.8670618500386585, iteration: 95990
loss: 1.0030832290649414,grad_norm: 0.9567819479914867, iteration: 95991
loss: 0.9652397632598877,grad_norm: 0.8781884964442099, iteration: 95992
loss: 1.1612828969955444,grad_norm: 0.9999999029676508, iteration: 95993
loss: 0.9859846234321594,grad_norm: 0.8626446510560475, iteration: 95994
loss: 1.0343035459518433,grad_norm: 0.890582001184671, iteration: 95995
loss: 0.9773333668708801,grad_norm: 0.9789858352313052, iteration: 95996
loss: 1.012330174446106,grad_norm: 0.999999093252684, iteration: 95997
loss: 1.0114229917526245,grad_norm: 0.9710607793005479, iteration: 95998
loss: 1.0115138292312622,grad_norm: 0.9999990894221514, iteration: 95999
loss: 0.996275007724762,grad_norm: 0.9171719187042598, iteration: 96000
loss: 1.1334952116012573,grad_norm: 0.999998960213108, iteration: 96001
loss: 1.0845125913619995,grad_norm: 0.8815077970340526, iteration: 96002
loss: 1.0656569004058838,grad_norm: 0.8625154109857766, iteration: 96003
loss: 1.0126646757125854,grad_norm: 0.9999994248622999, iteration: 96004
loss: 1.0112138986587524,grad_norm: 0.8851390455540157, iteration: 96005
loss: 1.0773450136184692,grad_norm: 0.999999649353703, iteration: 96006
loss: 0.9886812567710876,grad_norm: 0.9935144466791719, iteration: 96007
loss: 0.9988862872123718,grad_norm: 0.8859255334321878, iteration: 96008
loss: 1.0514538288116455,grad_norm: 0.999999191317865, iteration: 96009
loss: 1.0196088552474976,grad_norm: 0.9999996774039429, iteration: 96010
loss: 1.0374038219451904,grad_norm: 0.8804189689640484, iteration: 96011
loss: 1.0039634704589844,grad_norm: 0.9999991302719765, iteration: 96012
loss: 0.9874749779701233,grad_norm: 0.9999990153705807, iteration: 96013
loss: 1.0017175674438477,grad_norm: 0.9136825781522789, iteration: 96014
loss: 0.9749107956886292,grad_norm: 0.9999990919370282, iteration: 96015
loss: 1.0088313817977905,grad_norm: 0.9544067762096325, iteration: 96016
loss: 0.9873372316360474,grad_norm: 0.8480137452610779, iteration: 96017
loss: 1.0345611572265625,grad_norm: 0.999999037091495, iteration: 96018
loss: 1.01319420337677,grad_norm: 0.8496384351489819, iteration: 96019
loss: 1.2243372201919556,grad_norm: 0.9999996986569235, iteration: 96020
loss: 0.9924057722091675,grad_norm: 0.8695750022049705, iteration: 96021
loss: 0.9946355223655701,grad_norm: 0.9999990143487163, iteration: 96022
loss: 1.0778322219848633,grad_norm: 0.9999990983627081, iteration: 96023
loss: 1.0040173530578613,grad_norm: 0.9999994260610363, iteration: 96024
loss: 1.0135020017623901,grad_norm: 0.8678823371776881, iteration: 96025
loss: 1.0141410827636719,grad_norm: 0.8859407075839858, iteration: 96026
loss: 1.0223931074142456,grad_norm: 0.9052580432474546, iteration: 96027
loss: 1.0403001308441162,grad_norm: 0.9999998076015352, iteration: 96028
loss: 1.0208039283752441,grad_norm: 0.7781488027130097, iteration: 96029
loss: 1.0110540390014648,grad_norm: 0.9601614427011096, iteration: 96030
loss: 0.995769202709198,grad_norm: 0.8697376927957677, iteration: 96031
loss: 0.983727216720581,grad_norm: 0.8023195328872827, iteration: 96032
loss: 1.004908800125122,grad_norm: 0.858642780985135, iteration: 96033
loss: 1.0048305988311768,grad_norm: 0.8011761845836455, iteration: 96034
loss: 1.0145304203033447,grad_norm: 0.9999998128388975, iteration: 96035
loss: 0.9741182923316956,grad_norm: 0.9264879477044726, iteration: 96036
loss: 1.0258207321166992,grad_norm: 0.9999997161146119, iteration: 96037
loss: 1.0107818841934204,grad_norm: 0.8413639030688704, iteration: 96038
loss: 0.9975354075431824,grad_norm: 0.9171364461584182, iteration: 96039
loss: 1.0184106826782227,grad_norm: 0.957301402353455, iteration: 96040
loss: 1.0196040868759155,grad_norm: 0.9312357260201144, iteration: 96041
loss: 1.0368539094924927,grad_norm: 0.9999991615726865, iteration: 96042
loss: 1.0209578275680542,grad_norm: 0.9999991388232271, iteration: 96043
loss: 1.0078351497650146,grad_norm: 0.999999133645149, iteration: 96044
loss: 0.9990992546081543,grad_norm: 0.9999990006832179, iteration: 96045
loss: 1.0309358835220337,grad_norm: 0.8981044453501308, iteration: 96046
loss: 0.9720169901847839,grad_norm: 0.9101213493491089, iteration: 96047
loss: 0.9876821637153625,grad_norm: 0.7567257137059881, iteration: 96048
loss: 1.0440566539764404,grad_norm: 0.999999586104005, iteration: 96049
loss: 1.0250955820083618,grad_norm: 0.939697825205489, iteration: 96050
loss: 1.0170507431030273,grad_norm: 0.8896016624277568, iteration: 96051
loss: 1.0849955081939697,grad_norm: 0.9999997775563694, iteration: 96052
loss: 0.9935202598571777,grad_norm: 0.7859524626049837, iteration: 96053
loss: 1.0147831439971924,grad_norm: 0.9694219662243329, iteration: 96054
loss: 0.9635776281356812,grad_norm: 0.8982818072438813, iteration: 96055
loss: 1.0230399370193481,grad_norm: 0.9852798758376954, iteration: 96056
loss: 1.0187698602676392,grad_norm: 0.9604667487007961, iteration: 96057
loss: 0.9790273308753967,grad_norm: 0.9100675965841906, iteration: 96058
loss: 0.9928815960884094,grad_norm: 0.7353337549915856, iteration: 96059
loss: 1.083466649055481,grad_norm: 0.9999996932005458, iteration: 96060
loss: 1.0186288356781006,grad_norm: 0.9029967122241457, iteration: 96061
loss: 0.9917189478874207,grad_norm: 0.9939476063852516, iteration: 96062
loss: 1.0076673030853271,grad_norm: 0.8093401415193154, iteration: 96063
loss: 1.0324243307113647,grad_norm: 0.7961861258757935, iteration: 96064
loss: 1.0024336576461792,grad_norm: 0.9870995213421153, iteration: 96065
loss: 1.01534903049469,grad_norm: 0.9786656244958888, iteration: 96066
loss: 0.9725574851036072,grad_norm: 0.9238178278510701, iteration: 96067
loss: 1.0960769653320312,grad_norm: 0.9999991428744989, iteration: 96068
loss: 0.9820773601531982,grad_norm: 0.8420533201956822, iteration: 96069
loss: 0.9838741421699524,grad_norm: 0.9999989896967096, iteration: 96070
loss: 1.0069571733474731,grad_norm: 0.9114134721068585, iteration: 96071
loss: 0.9878415465354919,grad_norm: 0.8144052857157085, iteration: 96072
loss: 0.9718804359436035,grad_norm: 0.9655162994992205, iteration: 96073
loss: 0.9596831798553467,grad_norm: 0.9734898076458783, iteration: 96074
loss: 0.9851582050323486,grad_norm: 0.872094140558948, iteration: 96075
loss: 1.017993688583374,grad_norm: 0.9715014538881369, iteration: 96076
loss: 0.9779797792434692,grad_norm: 0.8598785473683135, iteration: 96077
loss: 0.9938068389892578,grad_norm: 0.9808752577957726, iteration: 96078
loss: 0.9956752061843872,grad_norm: 0.9999991064186733, iteration: 96079
loss: 1.0272157192230225,grad_norm: 0.9453993292822932, iteration: 96080
loss: 0.9906220436096191,grad_norm: 0.9648477551209738, iteration: 96081
loss: 1.0323816537857056,grad_norm: 0.9999998238374666, iteration: 96082
loss: 1.029412031173706,grad_norm: 0.9945499132816128, iteration: 96083
loss: 1.0714682340621948,grad_norm: 0.9578646508108034, iteration: 96084
loss: 1.002745509147644,grad_norm: 0.81655844362189, iteration: 96085
loss: 0.9933760166168213,grad_norm: 0.9999991228393555, iteration: 96086
loss: 0.9951133131980896,grad_norm: 0.9999990088077053, iteration: 96087
loss: 1.0155508518218994,grad_norm: 0.823986704184319, iteration: 96088
loss: 1.0163038969039917,grad_norm: 0.9999990802688539, iteration: 96089
loss: 1.021252989768982,grad_norm: 0.7971546860502468, iteration: 96090
loss: 0.9911311268806458,grad_norm: 0.9999990377930327, iteration: 96091
loss: 1.005969762802124,grad_norm: 0.9127197409017049, iteration: 96092
loss: 1.0199780464172363,grad_norm: 0.8097388643783234, iteration: 96093
loss: 1.0588563680648804,grad_norm: 0.9999992005824364, iteration: 96094
loss: 0.9707365036010742,grad_norm: 0.8599668747258975, iteration: 96095
loss: 0.9738442301750183,grad_norm: 0.9109195669982335, iteration: 96096
loss: 1.0062724351882935,grad_norm: 0.9999990837069567, iteration: 96097
loss: 1.001889705657959,grad_norm: 0.8897548287348636, iteration: 96098
loss: 1.0784653425216675,grad_norm: 0.9999998012409138, iteration: 96099
loss: 0.9758304953575134,grad_norm: 0.9878712615995644, iteration: 96100
loss: 1.00163996219635,grad_norm: 0.7469181521336217, iteration: 96101
loss: 1.040497064590454,grad_norm: 0.960330910994494, iteration: 96102
loss: 0.9851167798042297,grad_norm: 0.8468707866738772, iteration: 96103
loss: 0.9911977648735046,grad_norm: 0.9999990020235099, iteration: 96104
loss: 1.0243645906448364,grad_norm: 0.9999990858637873, iteration: 96105
loss: 0.9895095825195312,grad_norm: 0.9473303340518053, iteration: 96106
loss: 1.0342795848846436,grad_norm: 0.9999991746543718, iteration: 96107
loss: 1.2401331663131714,grad_norm: 0.999999688481612, iteration: 96108
loss: 1.0015500783920288,grad_norm: 0.9999995123976084, iteration: 96109
loss: 0.9653813242912292,grad_norm: 0.9999990613052535, iteration: 96110
loss: 0.9837499260902405,grad_norm: 0.7947422125362761, iteration: 96111
loss: 1.0162620544433594,grad_norm: 0.9796469612576596, iteration: 96112
loss: 1.0222232341766357,grad_norm: 0.9597227778091978, iteration: 96113
loss: 0.9885938763618469,grad_norm: 0.9999990381499785, iteration: 96114
loss: 0.981363832950592,grad_norm: 0.9330562044510057, iteration: 96115
loss: 1.0057320594787598,grad_norm: 0.7873547133065952, iteration: 96116
loss: 1.0112431049346924,grad_norm: 0.9143415882194869, iteration: 96117
loss: 1.050158977508545,grad_norm: 0.9999994337149462, iteration: 96118
loss: 1.0614373683929443,grad_norm: 0.9999995444709382, iteration: 96119
loss: 1.003958821296692,grad_norm: 0.9999990953815817, iteration: 96120
loss: 1.012595534324646,grad_norm: 0.999999485307428, iteration: 96121
loss: 1.0506038665771484,grad_norm: 0.9999990165893495, iteration: 96122
loss: 0.9953221678733826,grad_norm: 0.9683312907539716, iteration: 96123
loss: 0.9769951701164246,grad_norm: 0.9883205737872416, iteration: 96124
loss: 1.0019506216049194,grad_norm: 0.9999993129574742, iteration: 96125
loss: 0.9871450662612915,grad_norm: 0.802217738231657, iteration: 96126
loss: 0.9980630874633789,grad_norm: 0.9505189261089781, iteration: 96127
loss: 1.0205246210098267,grad_norm: 0.9703397231701555, iteration: 96128
loss: 0.995100200176239,grad_norm: 0.8677029239206965, iteration: 96129
loss: 1.0111480951309204,grad_norm: 0.8724332100939196, iteration: 96130
loss: 0.9800590872764587,grad_norm: 0.8126679619629221, iteration: 96131
loss: 1.1020212173461914,grad_norm: 0.9999993718608201, iteration: 96132
loss: 1.0092742443084717,grad_norm: 0.9999993235123039, iteration: 96133
loss: 1.0611436367034912,grad_norm: 0.9999990165778687, iteration: 96134
loss: 0.9971616864204407,grad_norm: 0.9292839366551149, iteration: 96135
loss: 1.0100855827331543,grad_norm: 0.9999990082777204, iteration: 96136
loss: 1.0051137208938599,grad_norm: 0.9044521215681414, iteration: 96137
loss: 0.9944736957550049,grad_norm: 0.899813663496371, iteration: 96138
loss: 0.9977368116378784,grad_norm: 0.8358333411538188, iteration: 96139
loss: 1.0115618705749512,grad_norm: 0.9999992750539136, iteration: 96140
loss: 1.008414626121521,grad_norm: 0.8172956477923939, iteration: 96141
loss: 0.9782033562660217,grad_norm: 0.999999565461009, iteration: 96142
loss: 1.0233367681503296,grad_norm: 0.9999992873820797, iteration: 96143
loss: 1.1520556211471558,grad_norm: 0.9999990804625117, iteration: 96144
loss: 1.0028774738311768,grad_norm: 0.9999999496732646, iteration: 96145
loss: 1.0019915103912354,grad_norm: 0.822581317428494, iteration: 96146
loss: 1.0357025861740112,grad_norm: 0.9999992066797936, iteration: 96147
loss: 1.022118091583252,grad_norm: 0.9999993435599711, iteration: 96148
loss: 1.0235329866409302,grad_norm: 0.9992416972581383, iteration: 96149
loss: 1.005225658416748,grad_norm: 0.7611920149071486, iteration: 96150
loss: 1.0200598239898682,grad_norm: 0.8504121597870931, iteration: 96151
loss: 0.9927822947502136,grad_norm: 0.9999990609656618, iteration: 96152
loss: 1.0142678022384644,grad_norm: 0.9373128891653778, iteration: 96153
loss: 1.0326664447784424,grad_norm: 0.7792422229885778, iteration: 96154
loss: 1.0960770845413208,grad_norm: 0.999999569106589, iteration: 96155
loss: 1.0667695999145508,grad_norm: 0.9999989886026729, iteration: 96156
loss: 0.9955204725265503,grad_norm: 0.9999998837212968, iteration: 96157
loss: 0.9929482340812683,grad_norm: 0.9887271133286983, iteration: 96158
loss: 0.9952719211578369,grad_norm: 0.9999991370084138, iteration: 96159
loss: 1.0826399326324463,grad_norm: 0.9999991574766168, iteration: 96160
loss: 1.0105750560760498,grad_norm: 0.9999991385397761, iteration: 96161
loss: 0.9940704107284546,grad_norm: 0.9662115601965554, iteration: 96162
loss: 0.9688839316368103,grad_norm: 0.8911449935965469, iteration: 96163
loss: 1.0567364692687988,grad_norm: 0.9999999621373008, iteration: 96164
loss: 0.9948447346687317,grad_norm: 0.9513278627861271, iteration: 96165
loss: 0.9974223971366882,grad_norm: 0.9436154430670419, iteration: 96166
loss: 1.0078266859054565,grad_norm: 0.9999992220985848, iteration: 96167
loss: 1.0172173976898193,grad_norm: 0.8214058979496793, iteration: 96168
loss: 0.9966226816177368,grad_norm: 0.9285282743015654, iteration: 96169
loss: 1.0313544273376465,grad_norm: 0.9931591958480205, iteration: 96170
loss: 0.9883037209510803,grad_norm: 0.9999999495343385, iteration: 96171
loss: 0.9989360570907593,grad_norm: 0.9999989621347445, iteration: 96172
loss: 1.0390703678131104,grad_norm: 0.8419613438443905, iteration: 96173
loss: 1.0037020444869995,grad_norm: 0.9999989811593911, iteration: 96174
loss: 0.9918463230133057,grad_norm: 0.9374155895300292, iteration: 96175
loss: 0.9865245819091797,grad_norm: 0.9999993845643113, iteration: 96176
loss: 1.0304163694381714,grad_norm: 0.9999996437680875, iteration: 96177
loss: 1.1204756498336792,grad_norm: 0.9999998934660679, iteration: 96178
loss: 1.170106291770935,grad_norm: 0.9999993937060341, iteration: 96179
loss: 1.0649621486663818,grad_norm: 0.9999992784344368, iteration: 96180
loss: 1.1135492324829102,grad_norm: 0.9999993158039542, iteration: 96181
loss: 1.0211008787155151,grad_norm: 0.999999104609923, iteration: 96182
loss: 1.0023165941238403,grad_norm: 0.9999991243845113, iteration: 96183
loss: 1.447977900505066,grad_norm: 0.9999998245029603, iteration: 96184
loss: 1.3481266498565674,grad_norm: 0.9999998300462627, iteration: 96185
loss: 0.9974673390388489,grad_norm: 0.7695878200600307, iteration: 96186
loss: 1.0787566900253296,grad_norm: 0.9999993294685773, iteration: 96187
loss: 0.9933724999427795,grad_norm: 0.9999991878297414, iteration: 96188
loss: 1.004515528678894,grad_norm: 0.9999989722006315, iteration: 96189
loss: 1.0014915466308594,grad_norm: 0.8929415829042772, iteration: 96190
loss: 1.0480083227157593,grad_norm: 0.9999998088988482, iteration: 96191
loss: 1.0168421268463135,grad_norm: 0.9690472667439038, iteration: 96192
loss: 1.074028730392456,grad_norm: 0.9999992639296719, iteration: 96193
loss: 1.0269896984100342,grad_norm: 0.9999999146192082, iteration: 96194
loss: 1.0515025854110718,grad_norm: 0.9999990761941931, iteration: 96195
loss: 1.0109270811080933,grad_norm: 0.8801274847439383, iteration: 96196
loss: 1.081599235534668,grad_norm: 0.999999112023779, iteration: 96197
loss: 1.023988127708435,grad_norm: 0.9158485701881329, iteration: 96198
loss: 1.0803314447402954,grad_norm: 0.9999991746175062, iteration: 96199
loss: 1.0124709606170654,grad_norm: 0.9999991881895056, iteration: 96200
loss: 1.0814356803894043,grad_norm: 0.9999998138428084, iteration: 96201
loss: 1.1034191846847534,grad_norm: 0.9999994535518215, iteration: 96202
loss: 1.0670369863510132,grad_norm: 0.9999999199608848, iteration: 96203
loss: 0.9872910976409912,grad_norm: 0.9999989432826999, iteration: 96204
loss: 1.007129192352295,grad_norm: 0.8882176431168907, iteration: 96205
loss: 0.9970123767852783,grad_norm: 0.9999993862133615, iteration: 96206
loss: 0.9841784238815308,grad_norm: 0.9999998549125391, iteration: 96207
loss: 1.0504685640335083,grad_norm: 0.9999995553084923, iteration: 96208
loss: 0.9801517724990845,grad_norm: 0.9999995974749227, iteration: 96209
loss: 0.9691411256790161,grad_norm: 0.9776086003687995, iteration: 96210
loss: 1.0394684076309204,grad_norm: 0.9999996828329198, iteration: 96211
loss: 1.091870665550232,grad_norm: 0.8994984212555847, iteration: 96212
loss: 1.0778733491897583,grad_norm: 0.99999972895125, iteration: 96213
loss: 1.0470775365829468,grad_norm: 0.9999993914472605, iteration: 96214
loss: 1.0304239988327026,grad_norm: 0.897346278018055, iteration: 96215
loss: 1.093854546546936,grad_norm: 0.9999990993685348, iteration: 96216
loss: 1.0208455324172974,grad_norm: 0.999999110645661, iteration: 96217
loss: 1.160969853401184,grad_norm: 0.9999991608215159, iteration: 96218
loss: 1.0537604093551636,grad_norm: 0.999999206940245, iteration: 96219
loss: 1.0166761875152588,grad_norm: 0.829136680820982, iteration: 96220
loss: 1.1257683038711548,grad_norm: 0.9999995042271593, iteration: 96221
loss: 1.1109740734100342,grad_norm: 0.9999995565408811, iteration: 96222
loss: 1.0235413312911987,grad_norm: 0.973309249895183, iteration: 96223
loss: 1.1807057857513428,grad_norm: 0.9999992158461208, iteration: 96224
loss: 0.9650529026985168,grad_norm: 0.9860182443588259, iteration: 96225
loss: 1.073677659034729,grad_norm: 0.999999493463567, iteration: 96226
loss: 1.0182435512542725,grad_norm: 0.9999996691873693, iteration: 96227
loss: 1.0059239864349365,grad_norm: 0.9999992381573967, iteration: 96228
loss: 1.013027548789978,grad_norm: 0.8849128584689179, iteration: 96229
loss: 1.172054648399353,grad_norm: 0.9999997898639074, iteration: 96230
loss: 1.1281973123550415,grad_norm: 0.9999993311373078, iteration: 96231
loss: 1.0697909593582153,grad_norm: 0.9999999023918915, iteration: 96232
loss: 1.1851016283035278,grad_norm: 0.9999994256142649, iteration: 96233
loss: 1.009319543838501,grad_norm: 0.9999998728039347, iteration: 96234
loss: 1.040012001991272,grad_norm: 0.9999992496909395, iteration: 96235
loss: 1.0914618968963623,grad_norm: 0.9999996327375156, iteration: 96236
loss: 0.989005982875824,grad_norm: 0.9999993751793363, iteration: 96237
loss: 1.0163744688034058,grad_norm: 0.9999992319348855, iteration: 96238
loss: 1.0303494930267334,grad_norm: 0.982136930615327, iteration: 96239
loss: 1.0124666690826416,grad_norm: 0.7754079046412734, iteration: 96240
loss: 1.0206421613693237,grad_norm: 0.9029830780844803, iteration: 96241
loss: 1.0978950262069702,grad_norm: 0.9999996122849784, iteration: 96242
loss: 1.0635945796966553,grad_norm: 0.9146214078008605, iteration: 96243
loss: 0.9964414238929749,grad_norm: 0.9999998547206299, iteration: 96244
loss: 0.9947624802589417,grad_norm: 0.9999991311900402, iteration: 96245
loss: 1.0122179985046387,grad_norm: 0.9999995021470911, iteration: 96246
loss: 1.1180986166000366,grad_norm: 0.999999706282254, iteration: 96247
loss: 1.0182760953903198,grad_norm: 0.8325600274833522, iteration: 96248
loss: 1.1938796043395996,grad_norm: 1.000000057190527, iteration: 96249
loss: 1.0993446111679077,grad_norm: 0.9999995927433588, iteration: 96250
loss: 1.1137301921844482,grad_norm: 0.9999999116414534, iteration: 96251
loss: 1.1324584484100342,grad_norm: 0.9999993020400365, iteration: 96252
loss: 1.1140215396881104,grad_norm: 0.9999992159321842, iteration: 96253
loss: 1.1172946691513062,grad_norm: 0.9999995695565954, iteration: 96254
loss: 0.993232250213623,grad_norm: 0.8820000994415004, iteration: 96255
loss: 1.0064916610717773,grad_norm: 0.9999994737441275, iteration: 96256
loss: 1.0163841247558594,grad_norm: 0.9999998535537559, iteration: 96257
loss: 1.0169336795806885,grad_norm: 0.8435816306106588, iteration: 96258
loss: 1.045137882232666,grad_norm: 0.9999994258114234, iteration: 96259
loss: 0.9863823652267456,grad_norm: 0.9999991308606928, iteration: 96260
loss: 1.0755858421325684,grad_norm: 0.9999993461253447, iteration: 96261
loss: 1.0226178169250488,grad_norm: 0.9999990493502514, iteration: 96262
loss: 1.0099140405654907,grad_norm: 0.9999991075799219, iteration: 96263
loss: 1.0273605585098267,grad_norm: 0.9999994899848073, iteration: 96264
loss: 0.9891312718391418,grad_norm: 0.89965310401807, iteration: 96265
loss: 1.0667251348495483,grad_norm: 0.9999996848624252, iteration: 96266
loss: 1.0343352556228638,grad_norm: 0.8328867148079375, iteration: 96267
loss: 0.9815667271614075,grad_norm: 0.9999995173235267, iteration: 96268
loss: 1.0369532108306885,grad_norm: 0.9999997708192253, iteration: 96269
loss: 1.1450303792953491,grad_norm: 0.9999993102824023, iteration: 96270
loss: 1.064623236656189,grad_norm: 0.9999991517488712, iteration: 96271
loss: 1.037990927696228,grad_norm: 0.9999997986993019, iteration: 96272
loss: 1.0272047519683838,grad_norm: 0.9999995439131467, iteration: 96273
loss: 1.0873596668243408,grad_norm: 0.9999997125083682, iteration: 96274
loss: 1.0783753395080566,grad_norm: 0.9999993514834711, iteration: 96275
loss: 1.0140106678009033,grad_norm: 0.9999998960717905, iteration: 96276
loss: 1.010927677154541,grad_norm: 0.9999990690866193, iteration: 96277
loss: 1.0360321998596191,grad_norm: 0.999999489961911, iteration: 96278
loss: 1.0894168615341187,grad_norm: 0.9999996795369573, iteration: 96279
loss: 1.0809643268585205,grad_norm: 0.9999994373780469, iteration: 96280
loss: 1.0129541158676147,grad_norm: 0.9999993461528381, iteration: 96281
loss: 0.9980788230895996,grad_norm: 0.9999989768684563, iteration: 96282
loss: 0.9959779977798462,grad_norm: 0.9999999577869109, iteration: 96283
loss: 1.1519899368286133,grad_norm: 0.999999912846181, iteration: 96284
loss: 0.9717056751251221,grad_norm: 0.814597304671637, iteration: 96285
loss: 0.9998024702072144,grad_norm: 0.9999992225530461, iteration: 96286
loss: 1.1221859455108643,grad_norm: 0.9239224059443762, iteration: 96287
loss: 1.0322951078414917,grad_norm: 0.9999996813188409, iteration: 96288
loss: 1.0024431943893433,grad_norm: 0.8612154857222081, iteration: 96289
loss: 1.01543390750885,grad_norm: 0.9999991907634378, iteration: 96290
loss: 1.0360093116760254,grad_norm: 0.9006088635502627, iteration: 96291
loss: 0.9923750162124634,grad_norm: 0.9999997912112045, iteration: 96292
loss: 1.048042893409729,grad_norm: 0.9999996456450148, iteration: 96293
loss: 1.0382612943649292,grad_norm: 0.9999990718374121, iteration: 96294
loss: 1.0010130405426025,grad_norm: 0.7838368500815519, iteration: 96295
loss: 1.0288002490997314,grad_norm: 0.999999445453992, iteration: 96296
loss: 1.0535885095596313,grad_norm: 0.9999990667171175, iteration: 96297
loss: 1.0770498514175415,grad_norm: 0.9999997533070476, iteration: 96298
loss: 1.0913821458816528,grad_norm: 0.9999991472669361, iteration: 96299
loss: 1.3900165557861328,grad_norm: 0.9999998732155555, iteration: 96300
loss: 1.0020630359649658,grad_norm: 0.8943435989775878, iteration: 96301
loss: 1.4540611505508423,grad_norm: 0.999999824083431, iteration: 96302
loss: 1.0737290382385254,grad_norm: 0.9999993242750859, iteration: 96303
loss: 1.023429036140442,grad_norm: 0.9965124592679452, iteration: 96304
loss: 1.0887750387191772,grad_norm: 0.9999997388373741, iteration: 96305
loss: 1.1908624172210693,grad_norm: 0.9999991693024464, iteration: 96306
loss: 0.9628247618675232,grad_norm: 0.892922060804489, iteration: 96307
loss: 1.050289511680603,grad_norm: 0.8633233840897732, iteration: 96308
loss: 0.9876830577850342,grad_norm: 0.9999989580662519, iteration: 96309
loss: 1.116864800453186,grad_norm: 0.9999993730955927, iteration: 96310
loss: 1.0057964324951172,grad_norm: 0.9399439992452241, iteration: 96311
loss: 1.1975973844528198,grad_norm: 0.9999995322021372, iteration: 96312
loss: 1.1236987113952637,grad_norm: 0.9999998617721118, iteration: 96313
loss: 1.0371425151824951,grad_norm: 0.910762534403463, iteration: 96314
loss: 1.069976806640625,grad_norm: 0.9858764610608416, iteration: 96315
loss: 1.0511305332183838,grad_norm: 0.8257798867938871, iteration: 96316
loss: 1.0178757905960083,grad_norm: 0.999999143841646, iteration: 96317
loss: 1.0456537008285522,grad_norm: 0.9999996348917989, iteration: 96318
loss: 1.0682905912399292,grad_norm: 0.999999709061437, iteration: 96319
loss: 1.0617588758468628,grad_norm: 0.9999990965310379, iteration: 96320
loss: 1.003645420074463,grad_norm: 0.9999992555028788, iteration: 96321
loss: 0.9882623553276062,grad_norm: 0.9999991013948692, iteration: 96322
loss: 1.0825889110565186,grad_norm: 0.9388632856391386, iteration: 96323
loss: 1.1228208541870117,grad_norm: 0.9999993928477232, iteration: 96324
loss: 0.9922069311141968,grad_norm: 0.8459800447948341, iteration: 96325
loss: 0.9922710657119751,grad_norm: 0.999999076273301, iteration: 96326
loss: 1.0312526226043701,grad_norm: 0.7771698895361977, iteration: 96327
loss: 1.1375261545181274,grad_norm: 0.9999993485783948, iteration: 96328
loss: 1.0708940029144287,grad_norm: 0.9999991232049945, iteration: 96329
loss: 1.0164991617202759,grad_norm: 0.9999991286892298, iteration: 96330
loss: 0.9830913543701172,grad_norm: 0.9999992389806145, iteration: 96331
loss: 1.0670006275177002,grad_norm: 0.9999992570210896, iteration: 96332
loss: 1.02772855758667,grad_norm: 0.9999992524017275, iteration: 96333
loss: 0.9979269504547119,grad_norm: 0.9999991876257743, iteration: 96334
loss: 1.0858486890792847,grad_norm: 0.99999947785306, iteration: 96335
loss: 0.9986225962638855,grad_norm: 0.9999993167402348, iteration: 96336
loss: 0.9813187718391418,grad_norm: 0.8086819907466024, iteration: 96337
loss: 1.036011815071106,grad_norm: 0.9999995142702128, iteration: 96338
loss: 1.0422782897949219,grad_norm: 0.9999995093005786, iteration: 96339
loss: 0.9883215427398682,grad_norm: 0.9999991664887222, iteration: 96340
loss: 1.0061477422714233,grad_norm: 0.8989339833951299, iteration: 96341
loss: 1.0622564554214478,grad_norm: 0.9999994434942786, iteration: 96342
loss: 1.018749475479126,grad_norm: 0.9999990653184241, iteration: 96343
loss: 1.0294370651245117,grad_norm: 0.9999994254753142, iteration: 96344
loss: 1.0329177379608154,grad_norm: 0.9999993165004235, iteration: 96345
loss: 1.0528366565704346,grad_norm: 0.9999990920165598, iteration: 96346
loss: 1.0956517457962036,grad_norm: 0.9999994768521088, iteration: 96347
loss: 1.0337902307510376,grad_norm: 0.9999992952156529, iteration: 96348
loss: 1.027024745941162,grad_norm: 0.9999995090110675, iteration: 96349
loss: 1.00468111038208,grad_norm: 0.9999991470555887, iteration: 96350
loss: 1.0068094730377197,grad_norm: 0.9999991928492814, iteration: 96351
loss: 1.0338300466537476,grad_norm: 0.9999995145198681, iteration: 96352
loss: 0.9717395305633545,grad_norm: 0.8928226255257141, iteration: 96353
loss: 1.1063058376312256,grad_norm: 0.9999993013946755, iteration: 96354
loss: 1.0080821514129639,grad_norm: 0.9887988866244701, iteration: 96355
loss: 1.031645655632019,grad_norm: 0.9999997691338453, iteration: 96356
loss: 1.10719895362854,grad_norm: 0.9999998927915023, iteration: 96357
loss: 1.0019092559814453,grad_norm: 0.9885806061768494, iteration: 96358
loss: 1.0726536512374878,grad_norm: 0.9263174290660543, iteration: 96359
loss: 0.9983245730400085,grad_norm: 0.941296537675499, iteration: 96360
loss: 1.0054022073745728,grad_norm: 0.9951592081377622, iteration: 96361
loss: 1.019940733909607,grad_norm: 0.9260956003517, iteration: 96362
loss: 1.0862787961959839,grad_norm: 0.9999998144653932, iteration: 96363
loss: 1.0375676155090332,grad_norm: 0.9999999493785664, iteration: 96364
loss: 1.0151084661483765,grad_norm: 0.999999210424274, iteration: 96365
loss: 1.1084568500518799,grad_norm: 0.9999994671274027, iteration: 96366
loss: 0.9736658334732056,grad_norm: 0.8391909610137518, iteration: 96367
loss: 1.1073250770568848,grad_norm: 0.9983241983866205, iteration: 96368
loss: 1.0999656915664673,grad_norm: 0.9999996948544915, iteration: 96369
loss: 1.030651330947876,grad_norm: 0.9999998437740054, iteration: 96370
loss: 1.2927982807159424,grad_norm: 0.9999999927303972, iteration: 96371
loss: 1.2684507369995117,grad_norm: 0.9999997026106613, iteration: 96372
loss: 1.008074164390564,grad_norm: 0.9999992706466555, iteration: 96373
loss: 0.9840115308761597,grad_norm: 0.999999945209001, iteration: 96374
loss: 1.1661850214004517,grad_norm: 0.9812608695498477, iteration: 96375
loss: 1.07856285572052,grad_norm: 0.9999994507635854, iteration: 96376
loss: 1.1930891275405884,grad_norm: 0.9999994422665309, iteration: 96377
loss: 1.0590574741363525,grad_norm: 0.9999993063710729, iteration: 96378
loss: 1.0331684350967407,grad_norm: 0.9999993428965926, iteration: 96379
loss: 1.1908990144729614,grad_norm: 0.999999923519978, iteration: 96380
loss: 1.1493040323257446,grad_norm: 0.9999996961560464, iteration: 96381
loss: 1.1442973613739014,grad_norm: 0.9999992381676391, iteration: 96382
loss: 1.208359956741333,grad_norm: 0.9999995849317672, iteration: 96383
loss: 1.1077312231063843,grad_norm: 0.9999990781451846, iteration: 96384
loss: 1.2962757349014282,grad_norm: 0.9999996904115156, iteration: 96385
loss: 1.0996779203414917,grad_norm: 0.9999991524645878, iteration: 96386
loss: 1.1065906286239624,grad_norm: 0.999999891551596, iteration: 96387
loss: 1.0544695854187012,grad_norm: 0.999999216992221, iteration: 96388
loss: 1.1009998321533203,grad_norm: 0.9999997968842596, iteration: 96389
loss: 1.2125662565231323,grad_norm: 0.9999993311613915, iteration: 96390
loss: 1.0763272047042847,grad_norm: 0.9999998717299076, iteration: 96391
loss: 1.274210810661316,grad_norm: 0.9999998564922553, iteration: 96392
loss: 1.0346741676330566,grad_norm: 0.999999719340855, iteration: 96393
loss: 1.108564019203186,grad_norm: 0.9999990778241904, iteration: 96394
loss: 0.9991705417633057,grad_norm: 0.9145840290803331, iteration: 96395
loss: 1.084183931350708,grad_norm: 0.9999999325714446, iteration: 96396
loss: 1.2450459003448486,grad_norm: 0.9999997250723166, iteration: 96397
loss: 1.0642577409744263,grad_norm: 0.9999991118737117, iteration: 96398
loss: 1.2044448852539062,grad_norm: 0.999999908169959, iteration: 96399
loss: 1.1900084018707275,grad_norm: 0.9999993447003804, iteration: 96400
loss: 1.1383957862854004,grad_norm: 0.9885469997853434, iteration: 96401
loss: 1.0079864263534546,grad_norm: 0.9999994404724574, iteration: 96402
loss: 1.0696831941604614,grad_norm: 0.9999992823256649, iteration: 96403
loss: 1.2391679286956787,grad_norm: 0.9999995936667283, iteration: 96404
loss: 1.0229921340942383,grad_norm: 0.8552585262398407, iteration: 96405
loss: 1.1065387725830078,grad_norm: 0.9999997622072205, iteration: 96406
loss: 1.2121344804763794,grad_norm: 0.9999993302174642, iteration: 96407
loss: 1.0692110061645508,grad_norm: 0.9999996044667239, iteration: 96408
loss: 1.1999391317367554,grad_norm: 0.9999993027129221, iteration: 96409
loss: 1.0272423028945923,grad_norm: 0.9999995578351092, iteration: 96410
loss: 1.169157862663269,grad_norm: 0.9999993418813874, iteration: 96411
loss: 1.041033387184143,grad_norm: 0.9999991180985645, iteration: 96412
loss: 1.1741812229156494,grad_norm: 0.9999996262538277, iteration: 96413
loss: 1.1732836961746216,grad_norm: 0.9999998314595928, iteration: 96414
loss: 1.030001163482666,grad_norm: 0.999999712376906, iteration: 96415
loss: 1.2297327518463135,grad_norm: 0.9999998448618445, iteration: 96416
loss: 1.1581201553344727,grad_norm: 0.9999999203676968, iteration: 96417
loss: 1.1549993753433228,grad_norm: 1.0000000640399873, iteration: 96418
loss: 1.1882972717285156,grad_norm: 0.999999211635883, iteration: 96419
loss: 1.1666754484176636,grad_norm: 1.0000000097748638, iteration: 96420
loss: 1.6925626993179321,grad_norm: 0.9999999341073494, iteration: 96421
loss: 1.2624372243881226,grad_norm: 0.9999995380843074, iteration: 96422
loss: 1.3379164934158325,grad_norm: 0.9999996769911482, iteration: 96423
loss: 1.0931835174560547,grad_norm: 0.9999993995131092, iteration: 96424
loss: 1.123928189277649,grad_norm: 0.9157531576982658, iteration: 96425
loss: 1.2179524898529053,grad_norm: 0.9999998973394634, iteration: 96426
loss: 1.1946364641189575,grad_norm: 0.9999996836458603, iteration: 96427
loss: 1.2044340372085571,grad_norm: 0.9999994916441569, iteration: 96428
loss: 1.2412407398223877,grad_norm: 1.0000000009135437, iteration: 96429
loss: 1.0793054103851318,grad_norm: 0.9999993596513166, iteration: 96430
loss: 1.2166193723678589,grad_norm: 0.9999996946152127, iteration: 96431
loss: 1.2046586275100708,grad_norm: 0.9999998869954775, iteration: 96432
loss: 1.233507513999939,grad_norm: 0.9999998102542627, iteration: 96433
loss: 1.3614482879638672,grad_norm: 0.9999998563939367, iteration: 96434
loss: 1.361782431602478,grad_norm: 0.9999998766901466, iteration: 96435
loss: 1.1278741359710693,grad_norm: 0.9999992610206971, iteration: 96436
loss: 1.0627988576889038,grad_norm: 0.9999991469025036, iteration: 96437
loss: 1.1698702573776245,grad_norm: 0.9999992247172477, iteration: 96438
loss: 1.5247093439102173,grad_norm: 0.9999999062222992, iteration: 96439
loss: 1.1485925912857056,grad_norm: 0.9999997726765932, iteration: 96440
loss: 1.2683234214782715,grad_norm: 0.9999996482129059, iteration: 96441
loss: 1.2159823179244995,grad_norm: 0.9999997138303576, iteration: 96442
loss: 1.3045170307159424,grad_norm: 0.9999995665661492, iteration: 96443
loss: 1.091626524925232,grad_norm: 0.9999997552822307, iteration: 96444
loss: 1.4294588565826416,grad_norm: 0.9999994442432822, iteration: 96445
loss: 1.275099754333496,grad_norm: 0.9999996648327427, iteration: 96446
loss: 1.3182096481323242,grad_norm: 0.9999995462889225, iteration: 96447
loss: 1.1149317026138306,grad_norm: 0.9999993683062833, iteration: 96448
loss: 1.252841591835022,grad_norm: 0.9999996440813513, iteration: 96449
loss: 1.2447869777679443,grad_norm: 0.9999991819092314, iteration: 96450
loss: 1.121397614479065,grad_norm: 0.9999996499612466, iteration: 96451
loss: 1.3027980327606201,grad_norm: 0.9999998185163642, iteration: 96452
loss: 1.4500770568847656,grad_norm: 0.9999997161294673, iteration: 96453
loss: 1.2311397790908813,grad_norm: 0.999999870645542, iteration: 96454
loss: 1.4047170877456665,grad_norm: 0.9999996311875653, iteration: 96455
loss: 1.3234443664550781,grad_norm: 0.9999997959556873, iteration: 96456
loss: 1.236393690109253,grad_norm: 0.9999994463393236, iteration: 96457
loss: 1.4640765190124512,grad_norm: 0.9999999634972649, iteration: 96458
loss: 1.3661084175109863,grad_norm: 0.9999995648246564, iteration: 96459
loss: 1.1925805807113647,grad_norm: 0.9999994248145645, iteration: 96460
loss: 1.4896690845489502,grad_norm: 0.9999997403409759, iteration: 96461
loss: 1.2169454097747803,grad_norm: 0.9999996780429857, iteration: 96462
loss: 1.5715137720108032,grad_norm: 0.999999902003744, iteration: 96463
loss: 1.4262421131134033,grad_norm: 0.9999998872036472, iteration: 96464
loss: 1.1636228561401367,grad_norm: 0.9999994626797793, iteration: 96465
loss: 1.6320544481277466,grad_norm: 0.999999901083173, iteration: 96466
loss: 1.2167739868164062,grad_norm: 0.9999999298343344, iteration: 96467
loss: 1.212503433227539,grad_norm: 0.999999532053034, iteration: 96468
loss: 1.2018340826034546,grad_norm: 0.9999993610528688, iteration: 96469
loss: 1.1296956539154053,grad_norm: 0.9999990488592563, iteration: 96470
loss: 1.141576886177063,grad_norm: 0.9999991140283359, iteration: 96471
loss: 1.2369945049285889,grad_norm: 0.9999994804429069, iteration: 96472
loss: 1.2342878580093384,grad_norm: 0.9999997547910793, iteration: 96473
loss: 1.2995589971542358,grad_norm: 0.999999772488577, iteration: 96474
loss: 0.9908173680305481,grad_norm: 0.9999993911966103, iteration: 96475
loss: 1.201219916343689,grad_norm: 0.999999452836346, iteration: 96476
loss: 1.3065944910049438,grad_norm: 0.999999687451981, iteration: 96477
loss: 1.244312047958374,grad_norm: 0.9999997477738964, iteration: 96478
loss: 1.2462393045425415,grad_norm: 0.9999998352970149, iteration: 96479
loss: 1.4378126859664917,grad_norm: 0.999999912535592, iteration: 96480
loss: 1.3245457410812378,grad_norm: 0.9999997183068381, iteration: 96481
loss: 1.0871301889419556,grad_norm: 0.9999993042142209, iteration: 96482
loss: 1.1545088291168213,grad_norm: 0.999999621196835, iteration: 96483
loss: 1.161619782447815,grad_norm: 0.9999997454745224, iteration: 96484
loss: 1.2014780044555664,grad_norm: 0.9999994641023107, iteration: 96485
loss: 1.317940592765808,grad_norm: 0.9999994903778208, iteration: 96486
loss: 1.0824130773544312,grad_norm: 0.9999990616815383, iteration: 96487
loss: 1.2099311351776123,grad_norm: 0.9999993382139954, iteration: 96488
loss: 1.1809691190719604,grad_norm: 0.9999993426019356, iteration: 96489
loss: 1.2602633237838745,grad_norm: 0.9999995960173381, iteration: 96490
loss: 1.497662901878357,grad_norm: 0.9999996129413168, iteration: 96491
loss: 1.4749789237976074,grad_norm: 0.9999999441877099, iteration: 96492
loss: 1.2847795486450195,grad_norm: 0.9999999832071185, iteration: 96493
loss: 1.210473895072937,grad_norm: 0.9999993005351318, iteration: 96494
loss: 1.1296963691711426,grad_norm: 0.9999996290762965, iteration: 96495
loss: 1.1170611381530762,grad_norm: 0.999999735021481, iteration: 96496
loss: 1.3618494272232056,grad_norm: 0.9999999806906619, iteration: 96497
loss: 1.2754952907562256,grad_norm: 0.9999997484580692, iteration: 96498
loss: 1.1780004501342773,grad_norm: 0.9999997577294463, iteration: 96499
loss: 1.0772483348846436,grad_norm: 0.999999793002379, iteration: 96500
loss: 1.254085659980774,grad_norm: 0.9999999839842423, iteration: 96501
loss: 1.2098493576049805,grad_norm: 0.9999999320901822, iteration: 96502
loss: 1.0617649555206299,grad_norm: 0.999999293698064, iteration: 96503
loss: 1.1031155586242676,grad_norm: 0.9999991898853666, iteration: 96504
loss: 1.0901728868484497,grad_norm: 0.9999999094601904, iteration: 96505
loss: 1.085086464881897,grad_norm: 0.999999476604549, iteration: 96506
loss: 1.1548740863800049,grad_norm: 0.9999993269676453, iteration: 96507
loss: 1.323286533355713,grad_norm: 0.9999996748492282, iteration: 96508
loss: 1.171165108680725,grad_norm: 0.9999992775598368, iteration: 96509
loss: 1.181923747062683,grad_norm: 0.9999995696158882, iteration: 96510
loss: 1.2752649784088135,grad_norm: 0.9999998270507398, iteration: 96511
loss: 1.2779043912887573,grad_norm: 0.9999999385690357, iteration: 96512
loss: 1.3650962114334106,grad_norm: 1.0000000353896068, iteration: 96513
loss: 1.1321735382080078,grad_norm: 0.9999998376740407, iteration: 96514
loss: 1.2344987392425537,grad_norm: 0.9999999315071679, iteration: 96515
loss: 1.1448845863342285,grad_norm: 0.9999991022432402, iteration: 96516
loss: 1.0515170097351074,grad_norm: 0.9999998622602447, iteration: 96517
loss: 1.3308568000793457,grad_norm: 0.9999997138433817, iteration: 96518
loss: 1.223318099975586,grad_norm: 0.9999997550758466, iteration: 96519
loss: 1.0916335582733154,grad_norm: 0.9999991575325752, iteration: 96520
loss: 1.10494065284729,grad_norm: 0.9999996117255935, iteration: 96521
loss: 1.1221795082092285,grad_norm: 0.9999995710451633, iteration: 96522
loss: 1.1567379236221313,grad_norm: 0.9999996493258149, iteration: 96523
loss: 1.0828237533569336,grad_norm: 0.9999995601704077, iteration: 96524
loss: 1.2232730388641357,grad_norm: 0.9999997813590026, iteration: 96525
loss: 1.0618157386779785,grad_norm: 0.9903318412039334, iteration: 96526
loss: 1.1690964698791504,grad_norm: 0.999999898859912, iteration: 96527
loss: 1.2969245910644531,grad_norm: 0.9999998501737903, iteration: 96528
loss: 1.2877895832061768,grad_norm: 0.9999994221403598, iteration: 96529
loss: 1.0830990076065063,grad_norm: 0.9999992597810836, iteration: 96530
loss: 1.2481300830841064,grad_norm: 0.9999995061836131, iteration: 96531
loss: 1.1530201435089111,grad_norm: 0.9999998304880648, iteration: 96532
loss: 1.2997368574142456,grad_norm: 0.9999995155531377, iteration: 96533
loss: 1.264483094215393,grad_norm: 0.9999995835840408, iteration: 96534
loss: 1.1020824909210205,grad_norm: 0.999999636771121, iteration: 96535
loss: 1.4293380975723267,grad_norm: 0.9999995403731086, iteration: 96536
loss: 1.0660028457641602,grad_norm: 0.9999996390425123, iteration: 96537
loss: 1.1594105958938599,grad_norm: 0.9999997477284517, iteration: 96538
loss: 1.050553560256958,grad_norm: 0.9999991944720271, iteration: 96539
loss: 1.309031367301941,grad_norm: 0.9999995081328145, iteration: 96540
loss: 1.109457015991211,grad_norm: 0.9999990855831765, iteration: 96541
loss: 1.1721527576446533,grad_norm: 0.9999996220667453, iteration: 96542
loss: 1.2811189889907837,grad_norm: 0.9999999749480496, iteration: 96543
loss: 1.1078842878341675,grad_norm: 0.9847927568412519, iteration: 96544
loss: 1.5844597816467285,grad_norm: 0.9999999148273762, iteration: 96545
loss: 1.1086822748184204,grad_norm: 0.999999325720491, iteration: 96546
loss: 1.072856068611145,grad_norm: 0.999999647675483, iteration: 96547
loss: 1.210970163345337,grad_norm: 0.9999998755383521, iteration: 96548
loss: 1.1120718717575073,grad_norm: 0.9999992630055762, iteration: 96549
loss: 1.2160406112670898,grad_norm: 0.999999370611153, iteration: 96550
loss: 1.2582709789276123,grad_norm: 0.9999994146535165, iteration: 96551
loss: 1.386030673980713,grad_norm: 0.9999998959141368, iteration: 96552
loss: 1.180939793586731,grad_norm: 0.9999992526055674, iteration: 96553
loss: 1.4826858043670654,grad_norm: 0.9999999099298781, iteration: 96554
loss: 1.1605472564697266,grad_norm: 0.9999996347959714, iteration: 96555
loss: 1.477179765701294,grad_norm: 0.9999999902047768, iteration: 96556
loss: 1.1017605066299438,grad_norm: 0.9999993680121538, iteration: 96557
loss: 1.35479736328125,grad_norm: 0.9999998983791107, iteration: 96558
loss: 1.0428274869918823,grad_norm: 0.9999993030028759, iteration: 96559
loss: 1.327561378479004,grad_norm: 0.999999539612316, iteration: 96560
loss: 1.0929715633392334,grad_norm: 0.9999994397293668, iteration: 96561
loss: 1.2858085632324219,grad_norm: 0.9999998304955816, iteration: 96562
loss: 1.1476877927780151,grad_norm: 0.9999999226409699, iteration: 96563
loss: 1.098442554473877,grad_norm: 0.9999999361947093, iteration: 96564
loss: 1.0697394609451294,grad_norm: 0.9999996667209045, iteration: 96565
loss: 1.0663095712661743,grad_norm: 0.9728664819462035, iteration: 96566
loss: 1.3291653394699097,grad_norm: 0.9999998869022172, iteration: 96567
loss: 1.0764987468719482,grad_norm: 0.9999999796857794, iteration: 96568
loss: 1.0680437088012695,grad_norm: 0.9999989800530762, iteration: 96569
loss: 1.0637502670288086,grad_norm: 0.9999991329448348, iteration: 96570
loss: 1.1930099725723267,grad_norm: 0.9999991957510732, iteration: 96571
loss: 1.232941746711731,grad_norm: 0.9999997144199598, iteration: 96572
loss: 1.2963323593139648,grad_norm: 0.9999996017576614, iteration: 96573
loss: 1.206559419631958,grad_norm: 0.9999994608593744, iteration: 96574
loss: 1.081474781036377,grad_norm: 0.9999995394161538, iteration: 96575
loss: 1.1821770668029785,grad_norm: 0.9999992732377918, iteration: 96576
loss: 1.2777150869369507,grad_norm: 0.9999999309780883, iteration: 96577
loss: 1.1816558837890625,grad_norm: 0.9999995201674448, iteration: 96578
loss: 1.0950686931610107,grad_norm: 0.999999465675314, iteration: 96579
loss: 1.2546442747116089,grad_norm: 0.9999996910085259, iteration: 96580
loss: 1.1402907371520996,grad_norm: 0.9999992305953876, iteration: 96581
loss: 1.163900375366211,grad_norm: 0.9999994103345223, iteration: 96582
loss: 1.140805959701538,grad_norm: 0.9999998875163606, iteration: 96583
loss: 1.2800062894821167,grad_norm: 0.9999992873323478, iteration: 96584
loss: 1.1401549577713013,grad_norm: 0.9999997714711775, iteration: 96585
loss: 1.0758274793624878,grad_norm: 0.9999990483616662, iteration: 96586
loss: 1.1786075830459595,grad_norm: 0.9999998178379244, iteration: 96587
loss: 1.1621390581130981,grad_norm: 0.9999992288557633, iteration: 96588
loss: 1.2690337896347046,grad_norm: 0.9999999008230546, iteration: 96589
loss: 1.0884705781936646,grad_norm: 0.9999989970842315, iteration: 96590
loss: 1.1426076889038086,grad_norm: 0.9999998820417407, iteration: 96591
loss: 1.1730115413665771,grad_norm: 0.9999993396807131, iteration: 96592
loss: 1.193009376525879,grad_norm: 0.9999994960469112, iteration: 96593
loss: 1.1007905006408691,grad_norm: 0.9999996555784827, iteration: 96594
loss: 1.2286006212234497,grad_norm: 0.9999996762074849, iteration: 96595
loss: 1.0769898891448975,grad_norm: 0.9999991006186449, iteration: 96596
loss: 1.19501793384552,grad_norm: 0.9999997058225522, iteration: 96597
loss: 1.1142349243164062,grad_norm: 0.9999993575870386, iteration: 96598
loss: 1.3277920484542847,grad_norm: 0.999999958060213, iteration: 96599
loss: 1.2399972677230835,grad_norm: 0.9999995567737261, iteration: 96600
loss: 1.1263712644577026,grad_norm: 0.9999993135991014, iteration: 96601
loss: 1.0349522829055786,grad_norm: 0.9999994606275214, iteration: 96602
loss: 1.2476712465286255,grad_norm: 0.9999996076120443, iteration: 96603
loss: 1.2702274322509766,grad_norm: 0.9999996562761814, iteration: 96604
loss: 1.241867184638977,grad_norm: 0.9999993481814022, iteration: 96605
loss: 1.028907299041748,grad_norm: 0.9999992360261485, iteration: 96606
loss: 1.0352251529693604,grad_norm: 0.9999995262289707, iteration: 96607
loss: 1.1373295783996582,grad_norm: 0.9999996376290143, iteration: 96608
loss: 1.181323766708374,grad_norm: 0.9999998621445096, iteration: 96609
loss: 1.1674057245254517,grad_norm: 0.9999998834149255, iteration: 96610
loss: 1.0913305282592773,grad_norm: 0.9999996373547726, iteration: 96611
loss: 1.0950175523757935,grad_norm: 0.9999991784961365, iteration: 96612
loss: 1.215205192565918,grad_norm: 0.9999996995811861, iteration: 96613
loss: 1.1840624809265137,grad_norm: 0.9999997268618365, iteration: 96614
loss: 1.2840240001678467,grad_norm: 0.9999995649142177, iteration: 96615
loss: 1.1208335161209106,grad_norm: 0.9611922038810355, iteration: 96616
loss: 1.2324620485305786,grad_norm: 0.9999991503785784, iteration: 96617
loss: 1.10005784034729,grad_norm: 0.9999996080895003, iteration: 96618
loss: 1.0603052377700806,grad_norm: 0.9999990822846163, iteration: 96619
loss: 1.2535051107406616,grad_norm: 0.9999996347235073, iteration: 96620
loss: 1.1702749729156494,grad_norm: 0.9999997110999848, iteration: 96621
loss: 1.1664676666259766,grad_norm: 0.9999997084152086, iteration: 96622
loss: 1.0852316617965698,grad_norm: 0.9999993162986607, iteration: 96623
loss: 1.3039462566375732,grad_norm: 0.9999998805118049, iteration: 96624
loss: 1.096034049987793,grad_norm: 0.9999994520631128, iteration: 96625
loss: 1.1385923624038696,grad_norm: 0.9287809120929204, iteration: 96626
loss: 1.057092308998108,grad_norm: 0.9999993753644295, iteration: 96627
loss: 1.1745916604995728,grad_norm: 0.9999994862694948, iteration: 96628
loss: 1.0656932592391968,grad_norm: 0.9999998643613318, iteration: 96629
loss: 1.1659495830535889,grad_norm: 0.999999913417106, iteration: 96630
loss: 1.1433634757995605,grad_norm: 0.9999991923843166, iteration: 96631
loss: 1.0881744623184204,grad_norm: 0.9999993098804704, iteration: 96632
loss: 1.0272761583328247,grad_norm: 0.999999664386167, iteration: 96633
loss: 1.2030245065689087,grad_norm: 0.9999997404938792, iteration: 96634
loss: 1.1459494829177856,grad_norm: 0.9999998624822438, iteration: 96635
loss: 1.06687593460083,grad_norm: 0.9999992790508989, iteration: 96636
loss: 1.0829894542694092,grad_norm: 0.9999993077750853, iteration: 96637
loss: 1.0759354829788208,grad_norm: 0.9999991693446368, iteration: 96638
loss: 1.1096277236938477,grad_norm: 0.9999994937617943, iteration: 96639
loss: 1.0796771049499512,grad_norm: 0.9999996168617311, iteration: 96640
loss: 1.0151032209396362,grad_norm: 0.9434386701603594, iteration: 96641
loss: 1.0191147327423096,grad_norm: 0.9999990326308256, iteration: 96642
loss: 1.0530815124511719,grad_norm: 0.9168766923759722, iteration: 96643
loss: 1.0873980522155762,grad_norm: 0.9999997889368265, iteration: 96644
loss: 1.08360755443573,grad_norm: 0.999999564882312, iteration: 96645
loss: 1.1417242288589478,grad_norm: 0.9999994943210107, iteration: 96646
loss: 1.0273584127426147,grad_norm: 0.9999992284233876, iteration: 96647
loss: 1.0803167819976807,grad_norm: 0.9999995851364446, iteration: 96648
loss: 1.0679893493652344,grad_norm: 0.957788954949329, iteration: 96649
loss: 1.068488597869873,grad_norm: 0.9999993112999704, iteration: 96650
loss: 1.016243815422058,grad_norm: 0.9110711903448516, iteration: 96651
loss: 1.1888736486434937,grad_norm: 0.9999997039905979, iteration: 96652
loss: 1.133287787437439,grad_norm: 0.9999998840688179, iteration: 96653
loss: 1.1005643606185913,grad_norm: 0.999999606695356, iteration: 96654
loss: 1.0531096458435059,grad_norm: 0.9999997790823506, iteration: 96655
loss: 1.0430188179016113,grad_norm: 0.8352994102084564, iteration: 96656
loss: 1.0239102840423584,grad_norm: 0.999999149853067, iteration: 96657
loss: 1.083022117614746,grad_norm: 0.9999992636041591, iteration: 96658
loss: 1.0828564167022705,grad_norm: 0.9999996554768292, iteration: 96659
loss: 1.0413657426834106,grad_norm: 0.8642538830050577, iteration: 96660
loss: 1.1143748760223389,grad_norm: 0.9999999125051174, iteration: 96661
loss: 1.0231212377548218,grad_norm: 0.9339498587034176, iteration: 96662
loss: 1.0607295036315918,grad_norm: 0.8693418484007617, iteration: 96663
loss: 1.1436604261398315,grad_norm: 0.9999995369432922, iteration: 96664
loss: 0.9807112812995911,grad_norm: 0.7782123178684411, iteration: 96665
loss: 1.0249454975128174,grad_norm: 0.9999994245030216, iteration: 96666
loss: 1.1505968570709229,grad_norm: 0.9999992843003075, iteration: 96667
loss: 1.0012903213500977,grad_norm: 0.8265610683636428, iteration: 96668
loss: 1.061065912246704,grad_norm: 0.9999991893458586, iteration: 96669
loss: 1.0628910064697266,grad_norm: 0.9999991015454742, iteration: 96670
loss: 1.0131796598434448,grad_norm: 0.739955220201722, iteration: 96671
loss: 1.0502578020095825,grad_norm: 0.9108134867368752, iteration: 96672
loss: 1.0086055994033813,grad_norm: 0.9999989948441094, iteration: 96673
loss: 1.0761252641677856,grad_norm: 0.9207554828586498, iteration: 96674
loss: 1.0656590461730957,grad_norm: 0.9999991564335052, iteration: 96675
loss: 1.0040067434310913,grad_norm: 0.7951865726862413, iteration: 96676
loss: 0.9967887997627258,grad_norm: 0.9999990189197125, iteration: 96677
loss: 1.021169662475586,grad_norm: 0.9999989001345829, iteration: 96678
loss: 0.9783525466918945,grad_norm: 0.9999991388611857, iteration: 96679
loss: 1.0897964239120483,grad_norm: 0.9999999549570519, iteration: 96680
loss: 1.0629223585128784,grad_norm: 0.9999996612344435, iteration: 96681
loss: 1.163074254989624,grad_norm: 0.9999996341706929, iteration: 96682
loss: 0.985829770565033,grad_norm: 0.957855427890762, iteration: 96683
loss: 1.0807024240493774,grad_norm: 0.9999995985465225, iteration: 96684
loss: 1.060612678527832,grad_norm: 0.9476600779316922, iteration: 96685
loss: 1.027765154838562,grad_norm: 0.9999992268983798, iteration: 96686
loss: 1.0435301065444946,grad_norm: 0.965992989343361, iteration: 96687
loss: 1.208941102027893,grad_norm: 0.9999995299880577, iteration: 96688
loss: 1.0620355606079102,grad_norm: 0.9999992602807357, iteration: 96689
loss: 1.0326128005981445,grad_norm: 0.9999990741600747, iteration: 96690
loss: 0.9938154220581055,grad_norm: 0.8766115298374244, iteration: 96691
loss: 1.0543434619903564,grad_norm: 0.9999990671976212, iteration: 96692
loss: 0.9801924824714661,grad_norm: 0.9999990610928148, iteration: 96693
loss: 1.1164523363113403,grad_norm: 0.9999991462071067, iteration: 96694
loss: 1.077439546585083,grad_norm: 0.9999995431773449, iteration: 96695
loss: 1.0700124502182007,grad_norm: 0.9999989663817181, iteration: 96696
loss: 1.037360668182373,grad_norm: 0.9999994200014015, iteration: 96697
loss: 1.0799927711486816,grad_norm: 0.9999994018739428, iteration: 96698
loss: 1.2613956928253174,grad_norm: 0.9999999561311675, iteration: 96699
loss: 1.0138664245605469,grad_norm: 0.8957917388507685, iteration: 96700
loss: 0.9684514403343201,grad_norm: 0.9075891824279333, iteration: 96701
loss: 1.3574506044387817,grad_norm: 0.9999996089257923, iteration: 96702
loss: 1.0395612716674805,grad_norm: 0.8391246536371277, iteration: 96703
loss: 1.0017452239990234,grad_norm: 0.7742859468443032, iteration: 96704
loss: 1.2370718717575073,grad_norm: 0.999999568762261, iteration: 96705
loss: 1.0550587177276611,grad_norm: 0.999999650930866, iteration: 96706
loss: 1.15605890750885,grad_norm: 0.9999994138141677, iteration: 96707
loss: 1.0638638734817505,grad_norm: 0.999999943984099, iteration: 96708
loss: 1.283822774887085,grad_norm: 0.9999998798490581, iteration: 96709
loss: 1.2001203298568726,grad_norm: 0.9999993976517846, iteration: 96710
loss: 1.020257592201233,grad_norm: 0.7875439666466615, iteration: 96711
loss: 1.2095587253570557,grad_norm: 0.999999226005975, iteration: 96712
loss: 1.0854142904281616,grad_norm: 0.9999994327458978, iteration: 96713
loss: 0.9889491200447083,grad_norm: 0.8705147561243015, iteration: 96714
loss: 1.229145884513855,grad_norm: 0.9999992871915828, iteration: 96715
loss: 1.298935890197754,grad_norm: 0.9999996488914418, iteration: 96716
loss: 1.016263723373413,grad_norm: 0.9188309115684639, iteration: 96717
loss: 1.0858808755874634,grad_norm: 0.9999996785672286, iteration: 96718
loss: 1.0858594179153442,grad_norm: 0.9999996062230081, iteration: 96719
loss: 1.0622828006744385,grad_norm: 0.9999989843841535, iteration: 96720
loss: 1.1664788722991943,grad_norm: 0.9999991732363713, iteration: 96721
loss: 1.1143839359283447,grad_norm: 0.9999993170998613, iteration: 96722
loss: 1.0985490083694458,grad_norm: 0.9999995397566932, iteration: 96723
loss: 0.9865432381629944,grad_norm: 0.9999992123113685, iteration: 96724
loss: 1.2137999534606934,grad_norm: 0.9999995446504558, iteration: 96725
loss: 1.1045883893966675,grad_norm: 0.999999992795687, iteration: 96726
loss: 0.9872342944145203,grad_norm: 0.9406528105234593, iteration: 96727
loss: 1.0383665561676025,grad_norm: 0.999999209088032, iteration: 96728
loss: 1.021883487701416,grad_norm: 0.9103964043854388, iteration: 96729
loss: 1.046028971672058,grad_norm: 0.9396738435385664, iteration: 96730
loss: 1.0337108373641968,grad_norm: 0.9999991107311378, iteration: 96731
loss: 0.9724950194358826,grad_norm: 0.9361175791550187, iteration: 96732
loss: 1.0137641429901123,grad_norm: 0.9999991759032957, iteration: 96733
loss: 0.9913978576660156,grad_norm: 0.9999992071313591, iteration: 96734
loss: 1.0872001647949219,grad_norm: 0.9999990660683554, iteration: 96735
loss: 0.9885296821594238,grad_norm: 0.9999990320467558, iteration: 96736
loss: 1.0137395858764648,grad_norm: 0.9632119384689845, iteration: 96737
loss: 1.0097832679748535,grad_norm: 0.9999995203462579, iteration: 96738
loss: 1.0204654932022095,grad_norm: 0.9784416293031009, iteration: 96739
loss: 0.9751554727554321,grad_norm: 0.9479662945494559, iteration: 96740
loss: 1.0591710805892944,grad_norm: 0.9363150616637631, iteration: 96741
loss: 1.0604158639907837,grad_norm: 0.9235772159992427, iteration: 96742
loss: 1.0079567432403564,grad_norm: 0.9231876577717031, iteration: 96743
loss: 1.0479861497879028,grad_norm: 0.999999145373466, iteration: 96744
loss: 1.0212299823760986,grad_norm: 0.99999898930147, iteration: 96745
loss: 1.0307371616363525,grad_norm: 0.8675054367399483, iteration: 96746
loss: 1.1208610534667969,grad_norm: 0.9999993195861866, iteration: 96747
loss: 1.0324726104736328,grad_norm: 0.9738953530363187, iteration: 96748
loss: 1.0244988203048706,grad_norm: 0.9999989907536391, iteration: 96749
loss: 1.02390456199646,grad_norm: 0.9508372782863619, iteration: 96750
loss: 1.0579172372817993,grad_norm: 0.9999998535192515, iteration: 96751
loss: 1.1492440700531006,grad_norm: 0.9447854542646154, iteration: 96752
loss: 0.9831119775772095,grad_norm: 0.8417291188244871, iteration: 96753
loss: 1.088580846786499,grad_norm: 0.9999996495206963, iteration: 96754
loss: 1.011900782585144,grad_norm: 0.9971609889058026, iteration: 96755
loss: 1.2599536180496216,grad_norm: 0.9999997586534197, iteration: 96756
loss: 1.0381848812103271,grad_norm: 0.9999996190202896, iteration: 96757
loss: 1.127768635749817,grad_norm: 0.9999995560988045, iteration: 96758
loss: 1.160192847251892,grad_norm: 0.9999995300992688, iteration: 96759
loss: 1.0038779973983765,grad_norm: 0.9999991655640865, iteration: 96760
loss: 1.1326005458831787,grad_norm: 0.9999994989660519, iteration: 96761
loss: 1.0664739608764648,grad_norm: 0.9999991555189279, iteration: 96762
loss: 1.0601589679718018,grad_norm: 0.8619184881659463, iteration: 96763
loss: 0.9825196266174316,grad_norm: 0.7582479970788711, iteration: 96764
loss: 1.096909523010254,grad_norm: 0.9999994505717515, iteration: 96765
loss: 1.0129613876342773,grad_norm: 0.9149402377343036, iteration: 96766
loss: 0.9768613576889038,grad_norm: 0.9921365278993157, iteration: 96767
loss: 1.1488888263702393,grad_norm: 0.9999992970087981, iteration: 96768
loss: 1.161143183708191,grad_norm: 0.9999998069833733, iteration: 96769
loss: 1.027121663093567,grad_norm: 0.9999997675018478, iteration: 96770
loss: 1.0215259790420532,grad_norm: 0.9999993640321538, iteration: 96771
loss: 1.0750614404678345,grad_norm: 0.9999993788396088, iteration: 96772
loss: 1.0042376518249512,grad_norm: 0.9999991511384954, iteration: 96773
loss: 1.1232982873916626,grad_norm: 0.9999998974714484, iteration: 96774
loss: 1.0276180505752563,grad_norm: 0.9999992858328561, iteration: 96775
loss: 0.9853045344352722,grad_norm: 0.9999991874935771, iteration: 96776
loss: 0.9795907139778137,grad_norm: 0.9999991647728587, iteration: 96777
loss: 1.0432921648025513,grad_norm: 0.9999995249144719, iteration: 96778
loss: 1.0717883110046387,grad_norm: 0.9999996921631025, iteration: 96779
loss: 1.2018954753875732,grad_norm: 0.9999999826406127, iteration: 96780
loss: 0.9815490245819092,grad_norm: 0.9999993094522925, iteration: 96781
loss: 1.018306851387024,grad_norm: 0.8795097177475454, iteration: 96782
loss: 0.9749717712402344,grad_norm: 0.9999994740334532, iteration: 96783
loss: 1.0195119380950928,grad_norm: 0.9999991549675142, iteration: 96784
loss: 1.0385082960128784,grad_norm: 0.9535852492853724, iteration: 96785
loss: 0.9937612414360046,grad_norm: 0.9999994313161978, iteration: 96786
loss: 1.010037899017334,grad_norm: 0.9011122925391284, iteration: 96787
loss: 1.022359013557434,grad_norm: 0.9999991778465873, iteration: 96788
loss: 1.0498031377792358,grad_norm: 0.890804979318677, iteration: 96789
loss: 1.012755036354065,grad_norm: 0.999998973061167, iteration: 96790
loss: 1.052891492843628,grad_norm: 0.9999991022537532, iteration: 96791
loss: 1.0907793045043945,grad_norm: 0.8715400068010921, iteration: 96792
loss: 1.1236279010772705,grad_norm: 0.9999991196405785, iteration: 96793
loss: 1.1068389415740967,grad_norm: 0.999999154473574, iteration: 96794
loss: 1.1587936878204346,grad_norm: 0.9999994411298148, iteration: 96795
loss: 1.106706976890564,grad_norm: 0.888328806607954, iteration: 96796
loss: 1.0676484107971191,grad_norm: 1.0000000271406626, iteration: 96797
loss: 1.3063948154449463,grad_norm: 0.9999999425005969, iteration: 96798
loss: 1.0664362907409668,grad_norm: 0.9999994810279536, iteration: 96799
loss: 0.993822455406189,grad_norm: 0.951969884080834, iteration: 96800
loss: 1.07870614528656,grad_norm: 0.892635266791221, iteration: 96801
loss: 1.0979987382888794,grad_norm: 0.9999990647214506, iteration: 96802
loss: 1.0080270767211914,grad_norm: 0.8977395492038528, iteration: 96803
loss: 1.0592528581619263,grad_norm: 0.9999999685703237, iteration: 96804
loss: 0.9964685440063477,grad_norm: 0.9999992685476766, iteration: 96805
loss: 1.0463231801986694,grad_norm: 0.9999990749045642, iteration: 96806
loss: 1.1150004863739014,grad_norm: 0.9999991203913504, iteration: 96807
loss: 1.0449107885360718,grad_norm: 0.9656743578933196, iteration: 96808
loss: 1.1000118255615234,grad_norm: 0.9999993266979792, iteration: 96809
loss: 1.0215034484863281,grad_norm: 0.9999996631655622, iteration: 96810
loss: 1.0093761682510376,grad_norm: 0.8378609818303849, iteration: 96811
loss: 1.1461821794509888,grad_norm: 0.9999997281768578, iteration: 96812
loss: 0.9477102160453796,grad_norm: 0.8682953483011969, iteration: 96813
loss: 0.9626139998435974,grad_norm: 0.8834971709376513, iteration: 96814
loss: 1.0556975603103638,grad_norm: 0.8817379558777612, iteration: 96815
loss: 1.1014081239700317,grad_norm: 0.9999992119787132, iteration: 96816
loss: 1.0265955924987793,grad_norm: 0.966246728824366, iteration: 96817
loss: 1.019168496131897,grad_norm: 0.9175627648280652, iteration: 96818
loss: 1.0699337720870972,grad_norm: 0.9999994254819388, iteration: 96819
loss: 1.08785879611969,grad_norm: 0.9999990963618095, iteration: 96820
loss: 1.0680668354034424,grad_norm: 0.9999992185749854, iteration: 96821
loss: 1.0652889013290405,grad_norm: 0.9999991799261754, iteration: 96822
loss: 1.0492159128189087,grad_norm: 0.9999990042561632, iteration: 96823
loss: 1.1610544919967651,grad_norm: 0.9999996809711756, iteration: 96824
loss: 1.1954596042633057,grad_norm: 0.999999543864857, iteration: 96825
loss: 1.0826404094696045,grad_norm: 0.9999990991621763, iteration: 96826
loss: 1.0345791578292847,grad_norm: 0.9999992794258323, iteration: 96827
loss: 1.0360544919967651,grad_norm: 0.999999291786824, iteration: 96828
loss: 1.045080304145813,grad_norm: 0.999999271404767, iteration: 96829
loss: 1.0348459482192993,grad_norm: 0.9999994948900681, iteration: 96830
loss: 1.1885660886764526,grad_norm: 0.9999991914735693, iteration: 96831
loss: 1.0808851718902588,grad_norm: 0.9999990767630329, iteration: 96832
loss: 1.0328296422958374,grad_norm: 0.9999993146246429, iteration: 96833
loss: 1.1365550756454468,grad_norm: 0.9999994279255631, iteration: 96834
loss: 1.0120322704315186,grad_norm: 0.8546762667671433, iteration: 96835
loss: 1.192726969718933,grad_norm: 0.9999992736297653, iteration: 96836
loss: 1.079107403755188,grad_norm: 0.9904077774556799, iteration: 96837
loss: 1.0664674043655396,grad_norm: 0.8762966063506585, iteration: 96838
loss: 1.098101258277893,grad_norm: 0.999999169185574, iteration: 96839
loss: 1.1496702432632446,grad_norm: 0.9999997144453431, iteration: 96840
loss: 1.0417397022247314,grad_norm: 0.8187521415381621, iteration: 96841
loss: 0.9903772473335266,grad_norm: 0.9796498245307179, iteration: 96842
loss: 1.123157262802124,grad_norm: 0.9999998925184643, iteration: 96843
loss: 1.076751470565796,grad_norm: 0.9999996073231732, iteration: 96844
loss: 1.0907063484191895,grad_norm: 0.9004082387207336, iteration: 96845
loss: 0.9984402656555176,grad_norm: 0.8795640074331361, iteration: 96846
loss: 1.1067754030227661,grad_norm: 0.999999729609946, iteration: 96847
loss: 1.0033752918243408,grad_norm: 0.7531865824918231, iteration: 96848
loss: 1.0382623672485352,grad_norm: 0.8983123005707059, iteration: 96849
loss: 1.0214563608169556,grad_norm: 0.9116781439346158, iteration: 96850
loss: 1.2365634441375732,grad_norm: 1.0000000249122591, iteration: 96851
loss: 1.0360032320022583,grad_norm: 0.967590858191224, iteration: 96852
loss: 1.1068085432052612,grad_norm: 0.9999990968751656, iteration: 96853
loss: 1.158338189125061,grad_norm: 0.9999995937817697, iteration: 96854
loss: 1.1528465747833252,grad_norm: 0.999999151141917, iteration: 96855
loss: 1.1538900136947632,grad_norm: 0.9999995731803445, iteration: 96856
loss: 1.0614964962005615,grad_norm: 0.9999991537671361, iteration: 96857
loss: 1.0076792240142822,grad_norm: 0.9886456436850425, iteration: 96858
loss: 1.0149849653244019,grad_norm: 0.8176476000396378, iteration: 96859
loss: 1.1048368215560913,grad_norm: 0.9999990921418151, iteration: 96860
loss: 1.0453819036483765,grad_norm: 0.8800809060511873, iteration: 96861
loss: 1.0333539247512817,grad_norm: 0.9999990054661163, iteration: 96862
loss: 1.0351203680038452,grad_norm: 0.9788095336379475, iteration: 96863
loss: 1.0763885974884033,grad_norm: 0.9999995851059, iteration: 96864
loss: 0.9853015542030334,grad_norm: 0.8487687695509089, iteration: 96865
loss: 1.0279085636138916,grad_norm: 0.9999994606297714, iteration: 96866
loss: 1.011368989944458,grad_norm: 0.9999990587450928, iteration: 96867
loss: 1.001314401626587,grad_norm: 0.9999989803844295, iteration: 96868
loss: 1.0810915231704712,grad_norm: 0.8675649215104224, iteration: 96869
loss: 1.1529756784439087,grad_norm: 0.9999996418402689, iteration: 96870
loss: 1.123156189918518,grad_norm: 0.9999995367652433, iteration: 96871
loss: 1.0357916355133057,grad_norm: 0.9419879566309787, iteration: 96872
loss: 1.0663032531738281,grad_norm: 0.9999998999332856, iteration: 96873
loss: 0.9939220547676086,grad_norm: 0.9814706811097208, iteration: 96874
loss: 1.065726637840271,grad_norm: 0.9999992886505398, iteration: 96875
loss: 1.0504530668258667,grad_norm: 0.9999992010481761, iteration: 96876
loss: 1.0296690464019775,grad_norm: 0.9128959113792074, iteration: 96877
loss: 1.0049799680709839,grad_norm: 0.8174540439800483, iteration: 96878
loss: 1.0196564197540283,grad_norm: 0.9986184468423595, iteration: 96879
loss: 1.0862629413604736,grad_norm: 0.9999999487950754, iteration: 96880
loss: 1.0601657629013062,grad_norm: 0.9999991823967618, iteration: 96881
loss: 1.1201332807540894,grad_norm: 0.999999672811773, iteration: 96882
loss: 1.0499695539474487,grad_norm: 0.9999991012254892, iteration: 96883
loss: 1.0028547048568726,grad_norm: 0.9916959032788294, iteration: 96884
loss: 1.028918981552124,grad_norm: 0.8030126157378131, iteration: 96885
loss: 1.058949589729309,grad_norm: 0.999999150364453, iteration: 96886
loss: 1.0790374279022217,grad_norm: 0.9999999123511257, iteration: 96887
loss: 1.0481822490692139,grad_norm: 0.8928555877911318, iteration: 96888
loss: 1.1220823526382446,grad_norm: 0.99999990266037, iteration: 96889
loss: 1.0134179592132568,grad_norm: 0.8460227515936874, iteration: 96890
loss: 1.1481454372406006,grad_norm: 0.9999990370242333, iteration: 96891
loss: 1.054320216178894,grad_norm: 0.9999991511336277, iteration: 96892
loss: 1.084059476852417,grad_norm: 0.9999999560545467, iteration: 96893
loss: 1.017407774925232,grad_norm: 0.9999995822682326, iteration: 96894
loss: 1.052975058555603,grad_norm: 0.9999991448448683, iteration: 96895
loss: 0.9686697721481323,grad_norm: 0.8088272232738919, iteration: 96896
loss: 1.1045093536376953,grad_norm: 0.9999996967391367, iteration: 96897
loss: 1.0468589067459106,grad_norm: 0.9999991128010485, iteration: 96898
loss: 1.1378087997436523,grad_norm: 0.999999458253067, iteration: 96899
loss: 1.2366260290145874,grad_norm: 0.9999997818755122, iteration: 96900
loss: 1.0836122035980225,grad_norm: 0.9999992114446102, iteration: 96901
loss: 0.9911428689956665,grad_norm: 0.883583020678181, iteration: 96902
loss: 1.032091736793518,grad_norm: 0.8575826348999657, iteration: 96903
loss: 1.1490983963012695,grad_norm: 0.9999990859953117, iteration: 96904
loss: 1.073249340057373,grad_norm: 0.9451519221818119, iteration: 96905
loss: 1.085952877998352,grad_norm: 0.999999505383264, iteration: 96906
loss: 1.148247480392456,grad_norm: 0.9999998756852849, iteration: 96907
loss: 1.0814177989959717,grad_norm: 0.888855673347658, iteration: 96908
loss: 1.1135449409484863,grad_norm: 0.9999998241290358, iteration: 96909
loss: 1.0444347858428955,grad_norm: 0.9999991649001193, iteration: 96910
loss: 1.0911827087402344,grad_norm: 0.9999989650315475, iteration: 96911
loss: 1.0803742408752441,grad_norm: 0.9999989926618733, iteration: 96912
loss: 1.0486444234848022,grad_norm: 0.9999989881924918, iteration: 96913
loss: 1.0665390491485596,grad_norm: 0.9262430589566879, iteration: 96914
loss: 1.0777714252471924,grad_norm: 0.9999991300558434, iteration: 96915
loss: 1.094584584236145,grad_norm: 0.999999169162619, iteration: 96916
loss: 1.106679081916809,grad_norm: 0.9999991099620605, iteration: 96917
loss: 0.9923006892204285,grad_norm: 0.7833663049330382, iteration: 96918
loss: 1.1083415746688843,grad_norm: 0.9999989938585467, iteration: 96919
loss: 1.0925222635269165,grad_norm: 0.9999993378169003, iteration: 96920
loss: 1.0598433017730713,grad_norm: 0.9999991565101579, iteration: 96921
loss: 1.2006711959838867,grad_norm: 0.9999999138999748, iteration: 96922
loss: 1.0172544717788696,grad_norm: 0.9553134926800061, iteration: 96923
loss: 1.0409057140350342,grad_norm: 0.9999991940204096, iteration: 96924
loss: 1.0485225915908813,grad_norm: 0.9999994635447784, iteration: 96925
loss: 1.0348132848739624,grad_norm: 0.9999990399817348, iteration: 96926
loss: 1.0276535749435425,grad_norm: 0.8639693108503431, iteration: 96927
loss: 1.0314232110977173,grad_norm: 0.9999990882805688, iteration: 96928
loss: 1.0575830936431885,grad_norm: 0.9999995649852805, iteration: 96929
loss: 1.1425964832305908,grad_norm: 0.9999991026868026, iteration: 96930
loss: 1.0251188278198242,grad_norm: 0.9999995484777989, iteration: 96931
loss: 1.0225741863250732,grad_norm: 0.9999993251411213, iteration: 96932
loss: 1.065958857536316,grad_norm: 0.9699682041608078, iteration: 96933
loss: 1.0098071098327637,grad_norm: 0.9999996127108444, iteration: 96934
loss: 1.1738687753677368,grad_norm: 0.9999994033180902, iteration: 96935
loss: 0.9988935589790344,grad_norm: 0.9999992070622054, iteration: 96936
loss: 1.0185580253601074,grad_norm: 0.9999993714958183, iteration: 96937
loss: 1.2012649774551392,grad_norm: 0.9999993389615698, iteration: 96938
loss: 1.0932883024215698,grad_norm: 0.9999991295065485, iteration: 96939
loss: 1.0555033683776855,grad_norm: 0.9999995252395637, iteration: 96940
loss: 1.036700963973999,grad_norm: 0.9130768518872185, iteration: 96941
loss: 1.1154323816299438,grad_norm: 0.9999995749103979, iteration: 96942
loss: 1.0218011140823364,grad_norm: 0.9999997007147002, iteration: 96943
loss: 1.0297048091888428,grad_norm: 0.9999997514923099, iteration: 96944
loss: 1.0564360618591309,grad_norm: 0.9999997468268393, iteration: 96945
loss: 1.1394433975219727,grad_norm: 0.999999617998917, iteration: 96946
loss: 1.0814203023910522,grad_norm: 0.9999997247386969, iteration: 96947
loss: 1.064600944519043,grad_norm: 0.9999997533912841, iteration: 96948
loss: 1.043021559715271,grad_norm: 0.9999997064033369, iteration: 96949
loss: 1.0986896753311157,grad_norm: 0.9999992346325148, iteration: 96950
loss: 1.1417864561080933,grad_norm: 0.9999997538430043, iteration: 96951
loss: 1.1071470975875854,grad_norm: 0.9999999492555763, iteration: 96952
loss: 1.3320977687835693,grad_norm: 0.9999993717292429, iteration: 96953
loss: 1.1347359418869019,grad_norm: 0.9999999247647688, iteration: 96954
loss: 1.0450420379638672,grad_norm: 0.9999992623878525, iteration: 96955
loss: 1.1114366054534912,grad_norm: 0.9999993852939842, iteration: 96956
loss: 1.0539990663528442,grad_norm: 0.9893830071891532, iteration: 96957
loss: 1.0047472715377808,grad_norm: 0.9358030136165469, iteration: 96958
loss: 1.1783509254455566,grad_norm: 0.9999998466002286, iteration: 96959
loss: 1.0445741415023804,grad_norm: 0.9999991647485615, iteration: 96960
loss: 1.124914288520813,grad_norm: 0.9999991848502062, iteration: 96961
loss: 1.042766809463501,grad_norm: 0.9999991262202285, iteration: 96962
loss: 1.0963152647018433,grad_norm: 0.9999994571349492, iteration: 96963
loss: 1.076880693435669,grad_norm: 0.9999993092819158, iteration: 96964
loss: 1.1053457260131836,grad_norm: 0.9999998083395979, iteration: 96965
loss: 1.0532443523406982,grad_norm: 0.9166138546413471, iteration: 96966
loss: 1.0711467266082764,grad_norm: 0.9674678535349445, iteration: 96967
loss: 1.0550190210342407,grad_norm: 0.9163147846867279, iteration: 96968
loss: 1.1138355731964111,grad_norm: 0.9999997483445741, iteration: 96969
loss: 1.147626519203186,grad_norm: 0.9999991024109591, iteration: 96970
loss: 1.1666443347930908,grad_norm: 0.999999781724422, iteration: 96971
loss: 1.307012677192688,grad_norm: 0.9999999840671252, iteration: 96972
loss: 1.0803802013397217,grad_norm: 0.9999996336866093, iteration: 96973
loss: 0.986345648765564,grad_norm: 0.7211000874250636, iteration: 96974
loss: 1.1863967180252075,grad_norm: 0.9999992312621132, iteration: 96975
loss: 1.10738205909729,grad_norm: 0.9999995042948604, iteration: 96976
loss: 1.0080357789993286,grad_norm: 0.9999997042412402, iteration: 96977
loss: 1.0170624256134033,grad_norm: 0.949194095071453, iteration: 96978
loss: 1.0019984245300293,grad_norm: 0.8953509571109856, iteration: 96979
loss: 1.0450327396392822,grad_norm: 0.9999999551685577, iteration: 96980
loss: 0.984102725982666,grad_norm: 0.7733345890665239, iteration: 96981
loss: 1.0437134504318237,grad_norm: 0.999999674851233, iteration: 96982
loss: 1.1700807809829712,grad_norm: 0.9999992383844742, iteration: 96983
loss: 0.9988161325454712,grad_norm: 1.0000000289291273, iteration: 96984
loss: 1.1582387685775757,grad_norm: 0.9999998885471357, iteration: 96985
loss: 1.0840390920639038,grad_norm: 0.9217340753785018, iteration: 96986
loss: 1.0548014640808105,grad_norm: 0.9999998765287371, iteration: 96987
loss: 1.0736875534057617,grad_norm: 0.9999994532281506, iteration: 96988
loss: 1.0172622203826904,grad_norm: 0.9999992067768431, iteration: 96989
loss: 1.112734317779541,grad_norm: 0.9999991195680101, iteration: 96990
loss: 1.112130880355835,grad_norm: 0.9999993462522415, iteration: 96991
loss: 1.0361472368240356,grad_norm: 0.9799369595476259, iteration: 96992
loss: 1.059951663017273,grad_norm: 0.9999993684775057, iteration: 96993
loss: 1.1266872882843018,grad_norm: 0.9041821613497, iteration: 96994
loss: 1.1945830583572388,grad_norm: 0.999999651330843, iteration: 96995
loss: 1.0262898206710815,grad_norm: 0.8796900014047019, iteration: 96996
loss: 1.0815951824188232,grad_norm: 0.9999999032869571, iteration: 96997
loss: 1.0300233364105225,grad_norm: 0.9999992557208235, iteration: 96998
loss: 1.130246639251709,grad_norm: 0.9999995509229868, iteration: 96999
loss: 1.0766907930374146,grad_norm: 0.9768284232399537, iteration: 97000
loss: 1.1121021509170532,grad_norm: 0.9999994771389703, iteration: 97001
loss: 1.062831163406372,grad_norm: 0.9999990320652222, iteration: 97002
loss: 1.04977548122406,grad_norm: 0.8640618764393707, iteration: 97003
loss: 1.053282380104065,grad_norm: 0.8732482386501316, iteration: 97004
loss: 1.1868840456008911,grad_norm: 0.999999714586426, iteration: 97005
loss: 1.0526704788208008,grad_norm: 0.9324344973297083, iteration: 97006
loss: 1.038074254989624,grad_norm: 0.9305164093813362, iteration: 97007
loss: 1.1557140350341797,grad_norm: 0.9999998020612734, iteration: 97008
loss: 0.9846177101135254,grad_norm: 0.9999991747281944, iteration: 97009
loss: 1.0889220237731934,grad_norm: 0.9999997762951938, iteration: 97010
loss: 0.9948930144309998,grad_norm: 0.9999992570885102, iteration: 97011
loss: 1.0212795734405518,grad_norm: 0.8011650309326973, iteration: 97012
loss: 1.1352593898773193,grad_norm: 0.9999998775861629, iteration: 97013
loss: 1.1424708366394043,grad_norm: 0.9999997704067928, iteration: 97014
loss: 1.0367416143417358,grad_norm: 0.9957912920304518, iteration: 97015
loss: 1.0298521518707275,grad_norm: 0.9999996808884952, iteration: 97016
loss: 1.0659916400909424,grad_norm: 0.9999997278808885, iteration: 97017
loss: 1.1837784051895142,grad_norm: 0.9999998227942014, iteration: 97018
loss: 1.012990117073059,grad_norm: 0.8191359623259461, iteration: 97019
loss: 0.9990342855453491,grad_norm: 0.9999997406380053, iteration: 97020
loss: 1.2022502422332764,grad_norm: 0.999999946856157, iteration: 97021
loss: 1.045056939125061,grad_norm: 0.9859455782243371, iteration: 97022
loss: 1.0589511394500732,grad_norm: 0.8830633012826221, iteration: 97023
loss: 1.0670593976974487,grad_norm: 0.999999650869206, iteration: 97024
loss: 1.3656831979751587,grad_norm: 0.9999998786878213, iteration: 97025
loss: 1.083228588104248,grad_norm: 0.9999996013679815, iteration: 97026
loss: 1.0088857412338257,grad_norm: 0.8408401554901169, iteration: 97027
loss: 1.1566191911697388,grad_norm: 0.9999994783483703, iteration: 97028
loss: 1.1402257680892944,grad_norm: 0.9999996233332306, iteration: 97029
loss: 1.0444040298461914,grad_norm: 0.9097789609022606, iteration: 97030
loss: 1.1091974973678589,grad_norm: 0.9999991596780262, iteration: 97031
loss: 1.1906200647354126,grad_norm: 0.9674567977060037, iteration: 97032
loss: 1.1017941236495972,grad_norm: 0.9999992057516486, iteration: 97033
loss: 1.0446299314498901,grad_norm: 0.935332919829158, iteration: 97034
loss: 1.0351983308792114,grad_norm: 0.9066646546959084, iteration: 97035
loss: 1.0502582788467407,grad_norm: 0.9999991074340592, iteration: 97036
loss: 1.0548434257507324,grad_norm: 0.9999995713256911, iteration: 97037
loss: 1.124738097190857,grad_norm: 0.9999991473682055, iteration: 97038
loss: 1.193869709968567,grad_norm: 0.9999991887470583, iteration: 97039
loss: 1.1080167293548584,grad_norm: 0.9999997113089966, iteration: 97040
loss: 1.079062819480896,grad_norm: 0.999999226330036, iteration: 97041
loss: 1.0177360773086548,grad_norm: 0.9999990791893102, iteration: 97042
loss: 1.048728585243225,grad_norm: 0.9199495901991841, iteration: 97043
loss: 1.0237005949020386,grad_norm: 0.9999991400665327, iteration: 97044
loss: 1.077413558959961,grad_norm: 0.8942357823211511, iteration: 97045
loss: 0.9882133603096008,grad_norm: 0.9999991618904887, iteration: 97046
loss: 1.0517023801803589,grad_norm: 0.9999992816363031, iteration: 97047
loss: 1.06776762008667,grad_norm: 0.9133862446723767, iteration: 97048
loss: 1.045921802520752,grad_norm: 0.9999990377462936, iteration: 97049
loss: 1.055754542350769,grad_norm: 0.9999991726252708, iteration: 97050
loss: 1.0877028703689575,grad_norm: 0.9999992764739292, iteration: 97051
loss: 1.1473842859268188,grad_norm: 0.9999992471877479, iteration: 97052
loss: 1.0851243734359741,grad_norm: 0.9999998268589915, iteration: 97053
loss: 1.1990046501159668,grad_norm: 0.9999992757470783, iteration: 97054
loss: 1.2087314128875732,grad_norm: 0.9999992349706377, iteration: 97055
loss: 0.9920575022697449,grad_norm: 0.9874264797961843, iteration: 97056
loss: 1.012973666191101,grad_norm: 0.999999095583448, iteration: 97057
loss: 1.0288366079330444,grad_norm: 0.9999990960421125, iteration: 97058
loss: 0.9787283539772034,grad_norm: 0.882426031325782, iteration: 97059
loss: 1.0247493982315063,grad_norm: 0.9999991725358509, iteration: 97060
loss: 1.1611754894256592,grad_norm: 0.9999994441743091, iteration: 97061
loss: 1.0658433437347412,grad_norm: 0.961629776579248, iteration: 97062
loss: 1.037584662437439,grad_norm: 0.8872674850079187, iteration: 97063
loss: 1.1098506450653076,grad_norm: 0.9999996257683935, iteration: 97064
loss: 1.0319548845291138,grad_norm: 0.999999720989544, iteration: 97065
loss: 1.0385181903839111,grad_norm: 0.9999999127865247, iteration: 97066
loss: 1.0436331033706665,grad_norm: 0.9999998558927673, iteration: 97067
loss: 1.0349433422088623,grad_norm: 0.7984214190310207, iteration: 97068
loss: 1.078091025352478,grad_norm: 0.9999997565000742, iteration: 97069
loss: 1.0584009885787964,grad_norm: 0.9133302335472527, iteration: 97070
loss: 1.175075650215149,grad_norm: 0.9999995730863486, iteration: 97071
loss: 1.0721765756607056,grad_norm: 0.9999996228291445, iteration: 97072
loss: 1.0947983264923096,grad_norm: 0.9999991459417581, iteration: 97073
loss: 1.1174439191818237,grad_norm: 0.8621549266104293, iteration: 97074
loss: 0.991959273815155,grad_norm: 0.7808142032043343, iteration: 97075
loss: 1.010716199874878,grad_norm: 0.999999171845141, iteration: 97076
loss: 1.025246500968933,grad_norm: 0.9999990709482616, iteration: 97077
loss: 1.1028475761413574,grad_norm: 0.9673536654263863, iteration: 97078
loss: 1.191607117652893,grad_norm: 0.9999997118644127, iteration: 97079
loss: 1.1705220937728882,grad_norm: 0.9762417072585162, iteration: 97080
loss: 1.0003459453582764,grad_norm: 0.9999990319495974, iteration: 97081
loss: 0.9904390573501587,grad_norm: 0.9999990607397956, iteration: 97082
loss: 1.0505495071411133,grad_norm: 0.9999996755593418, iteration: 97083
loss: 1.0800279378890991,grad_norm: 0.9999991034700607, iteration: 97084
loss: 1.0894373655319214,grad_norm: 0.8574695317697338, iteration: 97085
loss: 1.1335445642471313,grad_norm: 0.9999995015854484, iteration: 97086
loss: 1.013984203338623,grad_norm: 0.9011074943597104, iteration: 97087
loss: 1.0460236072540283,grad_norm: 0.9999991253253498, iteration: 97088
loss: 1.0741932392120361,grad_norm: 0.9999992761258657, iteration: 97089
loss: 1.073388695716858,grad_norm: 0.9999991474138354, iteration: 97090
loss: 1.102019190788269,grad_norm: 0.9999993289768683, iteration: 97091
loss: 1.0936901569366455,grad_norm: 0.9999992008111758, iteration: 97092
loss: 1.0981841087341309,grad_norm: 0.9999989996698567, iteration: 97093
loss: 1.0076632499694824,grad_norm: 0.8648918012022732, iteration: 97094
loss: 1.0529283285140991,grad_norm: 0.8420843279631831, iteration: 97095
loss: 0.9895987510681152,grad_norm: 0.8928374548164075, iteration: 97096
loss: 1.2220313549041748,grad_norm: 0.9999995092097124, iteration: 97097
loss: 1.1242676973342896,grad_norm: 0.9999991273708743, iteration: 97098
loss: 1.0631223917007446,grad_norm: 0.9999999384860895, iteration: 97099
loss: 1.057213544845581,grad_norm: 0.9549267712657626, iteration: 97100
loss: 1.033864974975586,grad_norm: 0.9999998153874576, iteration: 97101
loss: 1.1340965032577515,grad_norm: 0.9999992849746456, iteration: 97102
loss: 1.1633069515228271,grad_norm: 0.9999998905784017, iteration: 97103
loss: 1.1152535676956177,grad_norm: 0.9999990295238842, iteration: 97104
loss: 1.0606225728988647,grad_norm: 0.9999990843279968, iteration: 97105
loss: 1.0144637823104858,grad_norm: 0.879207851033012, iteration: 97106
loss: 1.0391749143600464,grad_norm: 0.9999990256101688, iteration: 97107
loss: 1.112887978553772,grad_norm: 0.9999991617506587, iteration: 97108
loss: 1.1086714267730713,grad_norm: 0.9999999933631297, iteration: 97109
loss: 0.9912590980529785,grad_norm: 0.8641018539311759, iteration: 97110
loss: 1.084317684173584,grad_norm: 0.9999991149181131, iteration: 97111
loss: 1.0619378089904785,grad_norm: 0.9999992676652383, iteration: 97112
loss: 1.052497386932373,grad_norm: 0.9999989746021936, iteration: 97113
loss: 1.029907464981079,grad_norm: 0.999999575672086, iteration: 97114
loss: 1.054661512374878,grad_norm: 0.9140904236308287, iteration: 97115
loss: 1.0774494409561157,grad_norm: 0.9276150191318371, iteration: 97116
loss: 1.2460386753082275,grad_norm: 0.99999959646815, iteration: 97117
loss: 1.0213836431503296,grad_norm: 0.9999992369345416, iteration: 97118
loss: 1.050379753112793,grad_norm: 0.9960473820405823, iteration: 97119
loss: 1.0376765727996826,grad_norm: 0.9999994179008074, iteration: 97120
loss: 1.0359545946121216,grad_norm: 0.9642721851984604, iteration: 97121
loss: 1.027695655822754,grad_norm: 0.999999057528242, iteration: 97122
loss: 1.036320447921753,grad_norm: 0.999999777199452, iteration: 97123
loss: 1.0589934587478638,grad_norm: 0.8783009717059248, iteration: 97124
loss: 0.9753344058990479,grad_norm: 0.9999989202520944, iteration: 97125
loss: 1.0870473384857178,grad_norm: 0.8942591354367296, iteration: 97126
loss: 1.0093673467636108,grad_norm: 0.9992185213167148, iteration: 97127
loss: 1.003159999847412,grad_norm: 0.9765168554435725, iteration: 97128
loss: 1.1394643783569336,grad_norm: 0.9999999112033434, iteration: 97129
loss: 1.089702844619751,grad_norm: 0.9999991952901734, iteration: 97130
loss: 1.0102018117904663,grad_norm: 0.9999993853789678, iteration: 97131
loss: 1.0897213220596313,grad_norm: 0.8958812026195012, iteration: 97132
loss: 1.0400879383087158,grad_norm: 0.9999993872073621, iteration: 97133
loss: 1.0672115087509155,grad_norm: 0.9999996739712016, iteration: 97134
loss: 1.0416288375854492,grad_norm: 0.9999991081903135, iteration: 97135
loss: 1.2248941659927368,grad_norm: 0.9999997496006573, iteration: 97136
loss: 1.2315795421600342,grad_norm: 0.9999998369670231, iteration: 97137
loss: 1.0456585884094238,grad_norm: 0.9898230502845676, iteration: 97138
loss: 0.9963648915290833,grad_norm: 0.9999993168135091, iteration: 97139
loss: 1.114443063735962,grad_norm: 0.9999993516649017, iteration: 97140
loss: 1.0060206651687622,grad_norm: 0.9999990893022009, iteration: 97141
loss: 1.0028760433197021,grad_norm: 0.9783511308225149, iteration: 97142
loss: 1.188997745513916,grad_norm: 0.9999996525100416, iteration: 97143
loss: 1.1021021604537964,grad_norm: 0.9999990817123101, iteration: 97144
loss: 1.0720337629318237,grad_norm: 0.925811895154172, iteration: 97145
loss: 0.9707778692245483,grad_norm: 0.9581378883163657, iteration: 97146
loss: 1.1115926504135132,grad_norm: 0.9999998519961484, iteration: 97147
loss: 1.1416432857513428,grad_norm: 0.9999993480564062, iteration: 97148
loss: 1.018505573272705,grad_norm: 0.9999994398398406, iteration: 97149
loss: 1.180740475654602,grad_norm: 0.9520079818935817, iteration: 97150
loss: 1.0522714853286743,grad_norm: 0.9416771136772576, iteration: 97151
loss: 1.2126431465148926,grad_norm: 0.9999992149827662, iteration: 97152
loss: 1.0323518514633179,grad_norm: 0.9933756499503882, iteration: 97153
loss: 1.2006829977035522,grad_norm: 0.999999806252413, iteration: 97154
loss: 1.0088269710540771,grad_norm: 0.9928194377235852, iteration: 97155
loss: 1.0368417501449585,grad_norm: 0.9999991782037826, iteration: 97156
loss: 0.9835036396980286,grad_norm: 0.9999992354053153, iteration: 97157
loss: 1.1251885890960693,grad_norm: 0.9999992014569651, iteration: 97158
loss: 1.188534140586853,grad_norm: 0.9999997948908028, iteration: 97159
loss: 1.013609528541565,grad_norm: 0.9999991359202509, iteration: 97160
loss: 1.1060811281204224,grad_norm: 0.9999996722952738, iteration: 97161
loss: 1.0844643115997314,grad_norm: 0.9999991266663933, iteration: 97162
loss: 1.000929594039917,grad_norm: 0.9999992496096847, iteration: 97163
loss: 1.031699776649475,grad_norm: 0.7337432536627142, iteration: 97164
loss: 1.0256458520889282,grad_norm: 0.9187946688549412, iteration: 97165
loss: 1.0451865196228027,grad_norm: 0.9999998246802753, iteration: 97166
loss: 1.104339361190796,grad_norm: 0.9999992351981638, iteration: 97167
loss: 1.0346392393112183,grad_norm: 0.9999992044997713, iteration: 97168
loss: 0.9929168820381165,grad_norm: 0.965875250608165, iteration: 97169
loss: 1.0624877214431763,grad_norm: 0.9999995135927482, iteration: 97170
loss: 1.0704518556594849,grad_norm: 0.9835087160661782, iteration: 97171
loss: 1.035225749015808,grad_norm: 0.9446919365576772, iteration: 97172
loss: 1.1176525354385376,grad_norm: 0.9999997027650985, iteration: 97173
loss: 1.134854793548584,grad_norm: 0.9999996509149341, iteration: 97174
loss: 1.0434551239013672,grad_norm: 0.9999991806660389, iteration: 97175
loss: 1.0727157592773438,grad_norm: 0.9999991664018663, iteration: 97176
loss: 0.9963505864143372,grad_norm: 0.999999101837064, iteration: 97177
loss: 1.093223214149475,grad_norm: 0.9999991510552465, iteration: 97178
loss: 1.1009830236434937,grad_norm: 0.999999824611667, iteration: 97179
loss: 1.1223325729370117,grad_norm: 0.974845574341247, iteration: 97180
loss: 1.0780287981033325,grad_norm: 0.9999992415027502, iteration: 97181
loss: 1.1166819334030151,grad_norm: 0.9999993365067121, iteration: 97182
loss: 1.0563397407531738,grad_norm: 0.9999991438174576, iteration: 97183
loss: 1.0223461389541626,grad_norm: 0.7638189529894199, iteration: 97184
loss: 1.0217198133468628,grad_norm: 0.9999999213643913, iteration: 97185
loss: 1.0318630933761597,grad_norm: 0.8938589688951972, iteration: 97186
loss: 1.10724937915802,grad_norm: 0.9999997345700004, iteration: 97187
loss: 1.0245376825332642,grad_norm: 0.9615767311907967, iteration: 97188
loss: 1.1291898488998413,grad_norm: 0.9999998456278351, iteration: 97189
loss: 1.1981488466262817,grad_norm: 0.999999877943196, iteration: 97190
loss: 1.0518808364868164,grad_norm: 0.8133624844670785, iteration: 97191
loss: 1.0929677486419678,grad_norm: 0.86336632321191, iteration: 97192
loss: 1.0537872314453125,grad_norm: 0.9007719941226685, iteration: 97193
loss: 1.1426331996917725,grad_norm: 0.9999995347500997, iteration: 97194
loss: 1.0881258249282837,grad_norm: 0.9999992931314845, iteration: 97195
loss: 1.0512163639068604,grad_norm: 0.999999088359415, iteration: 97196
loss: 1.0296356678009033,grad_norm: 0.9999998443360292, iteration: 97197
loss: 1.0466392040252686,grad_norm: 0.9999992453277187, iteration: 97198
loss: 1.0526928901672363,grad_norm: 0.9999996696933767, iteration: 97199
loss: 1.0428779125213623,grad_norm: 0.9999995873015448, iteration: 97200
loss: 1.0233514308929443,grad_norm: 0.8114192721779385, iteration: 97201
loss: 1.0132700204849243,grad_norm: 0.94441328177343, iteration: 97202
loss: 1.097692847251892,grad_norm: 0.9999997980684309, iteration: 97203
loss: 1.0130306482315063,grad_norm: 0.9999991144278618, iteration: 97204
loss: 1.0510532855987549,grad_norm: 0.9999991046750535, iteration: 97205
loss: 1.0046865940093994,grad_norm: 0.880534566187906, iteration: 97206
loss: 1.1030128002166748,grad_norm: 0.9999991883018079, iteration: 97207
loss: 0.9917606711387634,grad_norm: 0.9999990197763627, iteration: 97208
loss: 1.0931344032287598,grad_norm: 0.9999993790417379, iteration: 97209
loss: 1.077497124671936,grad_norm: 0.999999180316439, iteration: 97210
loss: 0.9894014000892639,grad_norm: 0.9616744549083414, iteration: 97211
loss: 1.010177731513977,grad_norm: 0.8430495322167357, iteration: 97212
loss: 1.065980076789856,grad_norm: 0.7867854329697721, iteration: 97213
loss: 1.018033504486084,grad_norm: 0.9999990689400108, iteration: 97214
loss: 1.0947445631027222,grad_norm: 0.9999994252341702, iteration: 97215
loss: 1.0086791515350342,grad_norm: 0.8653075603607611, iteration: 97216
loss: 1.0517464876174927,grad_norm: 0.9999994560723635, iteration: 97217
loss: 1.0421289205551147,grad_norm: 0.8754597756223436, iteration: 97218
loss: 0.967820405960083,grad_norm: 0.949190024955276, iteration: 97219
loss: 1.0557795763015747,grad_norm: 0.85879818191674, iteration: 97220
loss: 1.0687487125396729,grad_norm: 0.9999995337024581, iteration: 97221
loss: 1.0624516010284424,grad_norm: 0.9999992689661483, iteration: 97222
loss: 1.0278799533843994,grad_norm: 0.9999997828874118, iteration: 97223
loss: 1.0911405086517334,grad_norm: 0.9999997647838624, iteration: 97224
loss: 1.0048338174819946,grad_norm: 0.9688153672019164, iteration: 97225
loss: 1.0932289361953735,grad_norm: 0.931449125842913, iteration: 97226
loss: 1.0379655361175537,grad_norm: 0.9999995471701109, iteration: 97227
loss: 1.181631088256836,grad_norm: 0.9999998655982698, iteration: 97228
loss: 1.0725150108337402,grad_norm: 0.9999990112481026, iteration: 97229
loss: 1.0673596858978271,grad_norm: 0.9378464729601642, iteration: 97230
loss: 1.0059046745300293,grad_norm: 0.7992019673746305, iteration: 97231
loss: 1.0773941278457642,grad_norm: 0.999999207634599, iteration: 97232
loss: 1.0000027418136597,grad_norm: 0.999999237548177, iteration: 97233
loss: 1.0394994020462036,grad_norm: 0.9318749553309559, iteration: 97234
loss: 1.0155304670333862,grad_norm: 0.9999997176963383, iteration: 97235
loss: 1.0102975368499756,grad_norm: 0.8361751501637091, iteration: 97236
loss: 1.035062313079834,grad_norm: 0.9999996809188518, iteration: 97237
loss: 1.0467413663864136,grad_norm: 0.9645949546596092, iteration: 97238
loss: 0.9916348457336426,grad_norm: 0.8361173093561336, iteration: 97239
loss: 1.0207059383392334,grad_norm: 0.9999991144106347, iteration: 97240
loss: 1.0158344507217407,grad_norm: 0.999999409401743, iteration: 97241
loss: 1.1157939434051514,grad_norm: 0.9999994931288096, iteration: 97242
loss: 1.083834171295166,grad_norm: 0.9999992036821224, iteration: 97243
loss: 1.0136812925338745,grad_norm: 0.9999991757671095, iteration: 97244
loss: 1.117942452430725,grad_norm: 0.9999996515693999, iteration: 97245
loss: 1.20322585105896,grad_norm: 0.9999991866170916, iteration: 97246
loss: 1.056461215019226,grad_norm: 0.9999994873980951, iteration: 97247
loss: 1.008599877357483,grad_norm: 0.8587471874186613, iteration: 97248
loss: 1.0738534927368164,grad_norm: 0.999999300011666, iteration: 97249
loss: 1.1345592737197876,grad_norm: 0.9950906027311657, iteration: 97250
loss: 1.1453912258148193,grad_norm: 0.9999998829025509, iteration: 97251
loss: 1.2231403589248657,grad_norm: 0.9999991377416066, iteration: 97252
loss: 1.1016117334365845,grad_norm: 0.9863817057209627, iteration: 97253
loss: 1.0012682676315308,grad_norm: 0.9999992196361359, iteration: 97254
loss: 1.0278708934783936,grad_norm: 0.8050297040838748, iteration: 97255
loss: 1.0731228590011597,grad_norm: 0.9999993028496076, iteration: 97256
loss: 1.0442882776260376,grad_norm: 0.9850016679758602, iteration: 97257
loss: 1.1362566947937012,grad_norm: 0.9999995146286419, iteration: 97258
loss: 1.0085192918777466,grad_norm: 0.9015548491904708, iteration: 97259
loss: 1.1012743711471558,grad_norm: 0.9999995412068885, iteration: 97260
loss: 1.013081669807434,grad_norm: 0.9049531472565033, iteration: 97261
loss: 1.0645259618759155,grad_norm: 0.9999992750784052, iteration: 97262
loss: 1.0964804887771606,grad_norm: 0.9999996618291584, iteration: 97263
loss: 1.0509661436080933,grad_norm: 0.826246582913265, iteration: 97264
loss: 1.0341523885726929,grad_norm: 0.7956050275578368, iteration: 97265
loss: 1.0883369445800781,grad_norm: 0.9999993308872298, iteration: 97266
loss: 1.121143102645874,grad_norm: 0.8934941896301863, iteration: 97267
loss: 1.0587990283966064,grad_norm: 0.9999994031024207, iteration: 97268
loss: 1.0347378253936768,grad_norm: 0.9999992020140946, iteration: 97269
loss: 1.0999394655227661,grad_norm: 0.9999991859476639, iteration: 97270
loss: 1.0873608589172363,grad_norm: 0.999999419098018, iteration: 97271
loss: 1.0350626707077026,grad_norm: 0.9679968012863065, iteration: 97272
loss: 1.0213268995285034,grad_norm: 0.9170385932821808, iteration: 97273
loss: 0.9822981953620911,grad_norm: 0.9802535795144884, iteration: 97274
loss: 0.9848179221153259,grad_norm: 0.8286006793914921, iteration: 97275
loss: 1.012696623802185,grad_norm: 0.9798300167565018, iteration: 97276
loss: 1.0681285858154297,grad_norm: 0.9999992864318028, iteration: 97277
loss: 1.0266704559326172,grad_norm: 0.999999092758827, iteration: 97278
loss: 1.0063754320144653,grad_norm: 0.9999994620386459, iteration: 97279
loss: 1.104941725730896,grad_norm: 0.9999993655157788, iteration: 97280
loss: 1.0003539323806763,grad_norm: 0.895048969685819, iteration: 97281
loss: 1.0565217733383179,grad_norm: 0.7261476923175214, iteration: 97282
loss: 1.0171563625335693,grad_norm: 0.9999992043021748, iteration: 97283
loss: 1.029799461364746,grad_norm: 0.9999991797577629, iteration: 97284
loss: 1.0782747268676758,grad_norm: 0.9999991989775011, iteration: 97285
loss: 1.00886869430542,grad_norm: 0.9986537805630742, iteration: 97286
loss: 1.0262322425842285,grad_norm: 0.8361340108922924, iteration: 97287
loss: 1.0529074668884277,grad_norm: 0.9999993644674452, iteration: 97288
loss: 1.00682532787323,grad_norm: 0.8527243549751642, iteration: 97289
loss: 1.2695029973983765,grad_norm: 0.999999403168228, iteration: 97290
loss: 1.0788625478744507,grad_norm: 0.9162223215279125, iteration: 97291
loss: 1.083937406539917,grad_norm: 0.9999996846882323, iteration: 97292
loss: 1.0702425241470337,grad_norm: 0.7011256727199885, iteration: 97293
loss: 0.9999876618385315,grad_norm: 0.9599307345227587, iteration: 97294
loss: 0.9839968681335449,grad_norm: 0.9999992302867524, iteration: 97295
loss: 1.0750232934951782,grad_norm: 0.9999991031533595, iteration: 97296
loss: 1.0935661792755127,grad_norm: 0.9999991681416348, iteration: 97297
loss: 1.0056507587432861,grad_norm: 0.8379694422634274, iteration: 97298
loss: 1.061761498451233,grad_norm: 0.9999991865406318, iteration: 97299
loss: 1.038221001625061,grad_norm: 0.89491513614896, iteration: 97300
loss: 1.0241695642471313,grad_norm: 0.9999993346296726, iteration: 97301
loss: 0.9890154004096985,grad_norm: 0.8961763727712059, iteration: 97302
loss: 1.0307146310806274,grad_norm: 0.9999994746803745, iteration: 97303
loss: 1.13644278049469,grad_norm: 0.9999992202654698, iteration: 97304
loss: 1.015865445137024,grad_norm: 0.9337503130982261, iteration: 97305
loss: 1.1230937242507935,grad_norm: 0.999999139274359, iteration: 97306
loss: 1.0053774118423462,grad_norm: 0.8338484975150479, iteration: 97307
loss: 1.0293699502944946,grad_norm: 0.9999992615560304, iteration: 97308
loss: 1.0106006860733032,grad_norm: 0.9301149057482023, iteration: 97309
loss: 1.0245399475097656,grad_norm: 0.9999993176010965, iteration: 97310
loss: 1.0181434154510498,grad_norm: 0.9514344194400358, iteration: 97311
loss: 1.125786542892456,grad_norm: 0.9999991696960432, iteration: 97312
loss: 1.0134022235870361,grad_norm: 0.9567676677542262, iteration: 97313
loss: 1.0609920024871826,grad_norm: 0.9999991174138637, iteration: 97314
loss: 1.0667458772659302,grad_norm: 0.9999991257670375, iteration: 97315
loss: 1.0485811233520508,grad_norm: 0.9999991204379918, iteration: 97316
loss: 0.9962050318717957,grad_norm: 0.9999991115201081, iteration: 97317
loss: 1.0894653797149658,grad_norm: 0.9999993138748521, iteration: 97318
loss: 1.0327328443527222,grad_norm: 0.9999992718174798, iteration: 97319
loss: 0.9826441407203674,grad_norm: 0.9999995693118785, iteration: 97320
loss: 1.0236397981643677,grad_norm: 0.999999013632339, iteration: 97321
loss: 1.1163058280944824,grad_norm: 0.9999991547352782, iteration: 97322
loss: 1.0487099885940552,grad_norm: 0.9999992614722015, iteration: 97323
loss: 0.9705034494400024,grad_norm: 0.9999991578659182, iteration: 97324
loss: 1.071446418762207,grad_norm: 0.999999362647187, iteration: 97325
loss: 1.008253574371338,grad_norm: 0.999999162341058, iteration: 97326
loss: 1.0418908596038818,grad_norm: 0.9449135889183284, iteration: 97327
loss: 1.0095813274383545,grad_norm: 0.9999993704029341, iteration: 97328
loss: 1.0722570419311523,grad_norm: 0.9999990164145841, iteration: 97329
loss: 0.9413055777549744,grad_norm: 0.9999989808138213, iteration: 97330
loss: 1.006056785583496,grad_norm: 0.8349343423469636, iteration: 97331
loss: 1.0182604789733887,grad_norm: 0.9999991594939711, iteration: 97332
loss: 1.0490491390228271,grad_norm: 0.9999996419325903, iteration: 97333
loss: 1.0468034744262695,grad_norm: 0.9999992583146771, iteration: 97334
loss: 1.1132510900497437,grad_norm: 0.9999996492697981, iteration: 97335
loss: 1.0476293563842773,grad_norm: 0.9999990991714219, iteration: 97336
loss: 0.9810460209846497,grad_norm: 0.8371807747592566, iteration: 97337
loss: 1.0553511381149292,grad_norm: 0.9999998489949615, iteration: 97338
loss: 1.1096467971801758,grad_norm: 0.8780885525769656, iteration: 97339
loss: 1.0134024620056152,grad_norm: 0.9999990724988883, iteration: 97340
loss: 1.0020732879638672,grad_norm: 0.7785275574912319, iteration: 97341
loss: 1.0211312770843506,grad_norm: 0.9999996598135448, iteration: 97342
loss: 1.0469077825546265,grad_norm: 0.8461138284426631, iteration: 97343
loss: 1.113807201385498,grad_norm: 0.9999991000079428, iteration: 97344
loss: 1.0991673469543457,grad_norm: 0.9999998717336966, iteration: 97345
loss: 1.1905779838562012,grad_norm: 0.9999997440818006, iteration: 97346
loss: 0.9804497957229614,grad_norm: 0.9999996200789669, iteration: 97347
loss: 1.1163315773010254,grad_norm: 0.9999990300942075, iteration: 97348
loss: 1.1244173049926758,grad_norm: 0.9171512075442723, iteration: 97349
loss: 0.974179208278656,grad_norm: 0.8045797857530244, iteration: 97350
loss: 1.1016459465026855,grad_norm: 0.9999994634149353, iteration: 97351
loss: 0.9701764583587646,grad_norm: 0.8772225765293225, iteration: 97352
loss: 1.096219778060913,grad_norm: 0.9999990993715582, iteration: 97353
loss: 0.9891070127487183,grad_norm: 0.7976933866589468, iteration: 97354
loss: 1.034417748451233,grad_norm: 0.8604750037478571, iteration: 97355
loss: 1.0818233489990234,grad_norm: 0.9999991606424906, iteration: 97356
loss: 0.955898642539978,grad_norm: 0.8981367637360063, iteration: 97357
loss: 1.0227361917495728,grad_norm: 0.7949972761100293, iteration: 97358
loss: 1.0286680459976196,grad_norm: 0.9999991151163597, iteration: 97359
loss: 1.0768258571624756,grad_norm: 0.8694547412733524, iteration: 97360
loss: 1.0755451917648315,grad_norm: 0.9999990386824454, iteration: 97361
loss: 1.1073485612869263,grad_norm: 0.9999993417807154, iteration: 97362
loss: 1.010444164276123,grad_norm: 0.7127179210240828, iteration: 97363
loss: 1.0396353006362915,grad_norm: 0.9468993228530556, iteration: 97364
loss: 1.040303349494934,grad_norm: 0.9041982540185355, iteration: 97365
loss: 1.041619062423706,grad_norm: 0.8169085568725367, iteration: 97366
loss: 1.0235728025436401,grad_norm: 0.868689708784228, iteration: 97367
loss: 1.0365500450134277,grad_norm: 0.821149627972266, iteration: 97368
loss: 1.0342166423797607,grad_norm: 0.7393002370938826, iteration: 97369
loss: 1.0602730512619019,grad_norm: 0.9425909152974857, iteration: 97370
loss: 0.9796279072761536,grad_norm: 0.848707824150734, iteration: 97371
loss: 1.0374394655227661,grad_norm: 0.8961972995263972, iteration: 97372
loss: 1.0150071382522583,grad_norm: 0.8192917755266828, iteration: 97373
loss: 1.0986838340759277,grad_norm: 0.9999992743857956, iteration: 97374
loss: 1.1024943590164185,grad_norm: 0.9999994301735071, iteration: 97375
loss: 0.9627266526222229,grad_norm: 0.9684929116494168, iteration: 97376
loss: 1.0898791551589966,grad_norm: 0.9999991670458294, iteration: 97377
loss: 1.0044097900390625,grad_norm: 0.9841948079548795, iteration: 97378
loss: 0.9984582662582397,grad_norm: 0.9999991972593965, iteration: 97379
loss: 1.0055370330810547,grad_norm: 0.9991868779521624, iteration: 97380
loss: 1.054677128791809,grad_norm: 0.9999994276060813, iteration: 97381
loss: 1.146158218383789,grad_norm: 0.9999993548574433, iteration: 97382
loss: 1.057360053062439,grad_norm: 0.8933050365125365, iteration: 97383
loss: 1.079363465309143,grad_norm: 0.9999992772992309, iteration: 97384
loss: 1.006131649017334,grad_norm: 0.8393351486545428, iteration: 97385
loss: 1.0587629079818726,grad_norm: 0.860363834849378, iteration: 97386
loss: 1.0217530727386475,grad_norm: 0.999999680819392, iteration: 97387
loss: 1.075736165046692,grad_norm: 0.8963124739200786, iteration: 97388
loss: 1.1435381174087524,grad_norm: 0.9999994030701408, iteration: 97389
loss: 1.0567631721496582,grad_norm: 0.9470559518891536, iteration: 97390
loss: 1.0983710289001465,grad_norm: 0.9999995272642704, iteration: 97391
loss: 1.0991544723510742,grad_norm: 0.9999991927956906, iteration: 97392
loss: 1.0703461170196533,grad_norm: 0.9181964481611228, iteration: 97393
loss: 1.0124613046646118,grad_norm: 0.9814509390312781, iteration: 97394
loss: 1.05120849609375,grad_norm: 0.9999990757081005, iteration: 97395
loss: 1.044575572013855,grad_norm: 0.9040301376048506, iteration: 97396
loss: 1.0646858215332031,grad_norm: 0.9999992405405086, iteration: 97397
loss: 1.0531275272369385,grad_norm: 0.9767436246800701, iteration: 97398
loss: 1.1178301572799683,grad_norm: 0.9999992548051558, iteration: 97399
loss: 1.0690374374389648,grad_norm: 0.9543600388272041, iteration: 97400
loss: 1.0775694847106934,grad_norm: 0.9999991207780865, iteration: 97401
loss: 1.0668772459030151,grad_norm: 0.9999993240697088, iteration: 97402
loss: 0.9741945862770081,grad_norm: 0.7244531182468934, iteration: 97403
loss: 1.0670642852783203,grad_norm: 0.9593819170504122, iteration: 97404
loss: 1.0378464460372925,grad_norm: 0.8693623341039417, iteration: 97405
loss: 1.0581220388412476,grad_norm: 0.9095176816420516, iteration: 97406
loss: 0.9803087115287781,grad_norm: 0.9999990560903902, iteration: 97407
loss: 0.9813819527626038,grad_norm: 0.9999990901435835, iteration: 97408
loss: 1.019323468208313,grad_norm: 0.8836555752809495, iteration: 97409
loss: 1.044874668121338,grad_norm: 0.9555143536285873, iteration: 97410
loss: 1.0056618452072144,grad_norm: 0.8934340402501887, iteration: 97411
loss: 1.0593065023422241,grad_norm: 0.999999019845586, iteration: 97412
loss: 0.9657221436500549,grad_norm: 0.8752471966389513, iteration: 97413
loss: 1.0732439756393433,grad_norm: 0.9859796125348823, iteration: 97414
loss: 1.0478999614715576,grad_norm: 0.8578247776830461, iteration: 97415
loss: 1.0023366212844849,grad_norm: 0.9999990901966692, iteration: 97416
loss: 1.0103802680969238,grad_norm: 0.9999997986618787, iteration: 97417
loss: 0.9712905883789062,grad_norm: 0.9015626443178956, iteration: 97418
loss: 0.9781884551048279,grad_norm: 0.9115023116935355, iteration: 97419
loss: 1.004565954208374,grad_norm: 0.9999991740667862, iteration: 97420
loss: 1.2076603174209595,grad_norm: 0.9999993103747039, iteration: 97421
loss: 1.064468502998352,grad_norm: 0.99999906419305, iteration: 97422
loss: 1.0751090049743652,grad_norm: 0.9999990763992659, iteration: 97423
loss: 1.0559196472167969,grad_norm: 0.9999990976937696, iteration: 97424
loss: 1.074088215827942,grad_norm: 0.9147972871710954, iteration: 97425
loss: 1.0750223398208618,grad_norm: 0.9144857691982538, iteration: 97426
loss: 1.036508560180664,grad_norm: 0.9977565720024395, iteration: 97427
loss: 1.050462245941162,grad_norm: 0.9999996252468224, iteration: 97428
loss: 1.1295065879821777,grad_norm: 0.9999991724625089, iteration: 97429
loss: 1.0486961603164673,grad_norm: 0.9263535138650332, iteration: 97430
loss: 1.0513569116592407,grad_norm: 0.9999996767305879, iteration: 97431
loss: 1.0132542848587036,grad_norm: 0.8428485580148917, iteration: 97432
loss: 1.0410594940185547,grad_norm: 0.9999996334892807, iteration: 97433
loss: 1.0611255168914795,grad_norm: 0.9999998705580471, iteration: 97434
loss: 1.065905213356018,grad_norm: 0.9999996765882146, iteration: 97435
loss: 1.0975733995437622,grad_norm: 0.9999994534644301, iteration: 97436
loss: 0.996747612953186,grad_norm: 0.857406760243678, iteration: 97437
loss: 1.0335688591003418,grad_norm: 0.9337967269533395, iteration: 97438
loss: 1.0751982927322388,grad_norm: 0.8716392649186837, iteration: 97439
loss: 1.02461838722229,grad_norm: 0.9734339522546152, iteration: 97440
loss: 1.0025944709777832,grad_norm: 0.8057318895111498, iteration: 97441
loss: 1.0743980407714844,grad_norm: 0.9602163264653254, iteration: 97442
loss: 1.0288125276565552,grad_norm: 0.8762671641140662, iteration: 97443
loss: 1.0177862644195557,grad_norm: 0.9999997260684192, iteration: 97444
loss: 1.0294368267059326,grad_norm: 0.8286838367767272, iteration: 97445
loss: 1.0564028024673462,grad_norm: 0.9999996226705948, iteration: 97446
loss: 1.007999062538147,grad_norm: 0.8923698828328105, iteration: 97447
loss: 1.0707145929336548,grad_norm: 0.8595441606633294, iteration: 97448
loss: 1.0692110061645508,grad_norm: 0.9999992359818224, iteration: 97449
loss: 1.1047669649124146,grad_norm: 0.9999991280713003, iteration: 97450
loss: 1.0011005401611328,grad_norm: 0.9999990644249558, iteration: 97451
loss: 0.9921070337295532,grad_norm: 0.9999991763654861, iteration: 97452
loss: 1.0172052383422852,grad_norm: 0.9999995987446822, iteration: 97453
loss: 1.0042133331298828,grad_norm: 0.9585239536140079, iteration: 97454
loss: 1.0332201719284058,grad_norm: 0.8249654399560772, iteration: 97455
loss: 1.0504738092422485,grad_norm: 0.9999992743347738, iteration: 97456
loss: 1.0327157974243164,grad_norm: 0.9131586996910327, iteration: 97457
loss: 0.9837813973426819,grad_norm: 0.9999995447198413, iteration: 97458
loss: 0.9715926051139832,grad_norm: 0.9999999915809551, iteration: 97459
loss: 1.0046883821487427,grad_norm: 0.8782654953159563, iteration: 97460
loss: 0.9532017707824707,grad_norm: 0.9726624550604318, iteration: 97461
loss: 1.0447779893875122,grad_norm: 0.9699148062094449, iteration: 97462
loss: 1.0998029708862305,grad_norm: 0.9999990773426246, iteration: 97463
loss: 1.0050678253173828,grad_norm: 0.9999991494274424, iteration: 97464
loss: 1.0549349784851074,grad_norm: 0.9999991231365498, iteration: 97465
loss: 1.0449696779251099,grad_norm: 0.9999993609915654, iteration: 97466
loss: 1.0132527351379395,grad_norm: 0.7626380678169364, iteration: 97467
loss: 1.0918464660644531,grad_norm: 0.999999458923572, iteration: 97468
loss: 1.0144360065460205,grad_norm: 0.990779867511489, iteration: 97469
loss: 0.9864552617073059,grad_norm: 0.9152343292059477, iteration: 97470
loss: 1.085621953010559,grad_norm: 0.8381114407937547, iteration: 97471
loss: 1.0212780237197876,grad_norm: 0.94074487502857, iteration: 97472
loss: 1.0237351655960083,grad_norm: 0.9999993468108047, iteration: 97473
loss: 1.0451323986053467,grad_norm: 0.9999994767594501, iteration: 97474
loss: 1.0328233242034912,grad_norm: 0.9999992535066536, iteration: 97475
loss: 1.0781761407852173,grad_norm: 0.8917019067182984, iteration: 97476
loss: 1.0710129737854004,grad_norm: 0.8745473654758144, iteration: 97477
loss: 0.998508095741272,grad_norm: 0.9267984259391304, iteration: 97478
loss: 1.0942151546478271,grad_norm: 0.9168209923460601, iteration: 97479
loss: 1.003851056098938,grad_norm: 0.976212332294439, iteration: 97480
loss: 1.0997660160064697,grad_norm: 0.9999994806418261, iteration: 97481
loss: 1.0052845478057861,grad_norm: 0.896100167036006, iteration: 97482
loss: 1.0973010063171387,grad_norm: 0.903561883265791, iteration: 97483
loss: 1.0828782320022583,grad_norm: 0.8937222923228026, iteration: 97484
loss: 1.007504940032959,grad_norm: 0.9795315037848658, iteration: 97485
loss: 1.0737639665603638,grad_norm: 0.999999443969381, iteration: 97486
loss: 0.9857984185218811,grad_norm: 0.8817246226916887, iteration: 97487
loss: 1.339260458946228,grad_norm: 0.9999997709615921, iteration: 97488
loss: 1.0329110622406006,grad_norm: 0.9787704466523431, iteration: 97489
loss: 1.1133854389190674,grad_norm: 0.8628805274546517, iteration: 97490
loss: 1.02830171585083,grad_norm: 0.999999291474536, iteration: 97491
loss: 1.045521855354309,grad_norm: 0.7789100310731395, iteration: 97492
loss: 1.039774775505066,grad_norm: 0.9997009077958532, iteration: 97493
loss: 0.9905423521995544,grad_norm: 0.9810054747559461, iteration: 97494
loss: 1.0539109706878662,grad_norm: 0.9732951094342778, iteration: 97495
loss: 1.1080501079559326,grad_norm: 0.9999998504002188, iteration: 97496
loss: 0.9931557774543762,grad_norm: 0.9471518326445771, iteration: 97497
loss: 1.0655478239059448,grad_norm: 0.9999995152748347, iteration: 97498
loss: 1.0516232252120972,grad_norm: 0.8364451203812213, iteration: 97499
loss: 1.009118676185608,grad_norm: 0.9849609108212216, iteration: 97500
loss: 1.010711908340454,grad_norm: 0.9067618305867604, iteration: 97501
loss: 0.9959577322006226,grad_norm: 0.8474459836801493, iteration: 97502
loss: 1.0633279085159302,grad_norm: 0.9999995977713781, iteration: 97503
loss: 1.0283352136611938,grad_norm: 0.9966067851688909, iteration: 97504
loss: 0.99339359998703,grad_norm: 0.7855547253144924, iteration: 97505
loss: 1.041445255279541,grad_norm: 0.8355452964251056, iteration: 97506
loss: 1.0057846307754517,grad_norm: 0.8338968414315898, iteration: 97507
loss: 1.083817481994629,grad_norm: 0.999999703736168, iteration: 97508
loss: 1.0946606397628784,grad_norm: 0.999999307057374, iteration: 97509
loss: 1.0106709003448486,grad_norm: 0.9999996199946787, iteration: 97510
loss: 1.099403738975525,grad_norm: 0.9126085865483105, iteration: 97511
loss: 1.1170942783355713,grad_norm: 0.9999999240435637, iteration: 97512
loss: 1.006712794303894,grad_norm: 0.9715534078378882, iteration: 97513
loss: 1.0987852811813354,grad_norm: 0.9848718366663483, iteration: 97514
loss: 1.0570294857025146,grad_norm: 0.9999994553141845, iteration: 97515
loss: 1.0604450702667236,grad_norm: 0.9999992840986305, iteration: 97516
loss: 1.0610110759735107,grad_norm: 0.999999754701357, iteration: 97517
loss: 1.0470383167266846,grad_norm: 0.9999993665403425, iteration: 97518
loss: 1.1321042776107788,grad_norm: 0.9999997515522472, iteration: 97519
loss: 0.9902763366699219,grad_norm: 0.9999998961667793, iteration: 97520
loss: 0.9937102794647217,grad_norm: 0.80202395304862, iteration: 97521
loss: 0.9930498003959656,grad_norm: 0.9999990373296316, iteration: 97522
loss: 1.046075463294983,grad_norm: 0.9999991089924578, iteration: 97523
loss: 0.9889651536941528,grad_norm: 0.9598573836113238, iteration: 97524
loss: 0.9373853802680969,grad_norm: 0.9999990697079805, iteration: 97525
loss: 1.1131783723831177,grad_norm: 0.9999998108719024, iteration: 97526
loss: 1.0235424041748047,grad_norm: 0.999999652313862, iteration: 97527
loss: 0.9681649804115295,grad_norm: 0.9999990308341611, iteration: 97528
loss: 1.257621169090271,grad_norm: 0.9999993286245663, iteration: 97529
loss: 1.0424058437347412,grad_norm: 1.0000000256387411, iteration: 97530
loss: 0.9885306358337402,grad_norm: 0.929743318131034, iteration: 97531
loss: 1.0503180027008057,grad_norm: 0.9999996792845538, iteration: 97532
loss: 1.0972756147384644,grad_norm: 0.9999999071161012, iteration: 97533
loss: 0.9997923374176025,grad_norm: 0.8626429317637743, iteration: 97534
loss: 1.061750888824463,grad_norm: 0.999999389935726, iteration: 97535
loss: 1.0008184909820557,grad_norm: 0.9115522134030247, iteration: 97536
loss: 0.9963384866714478,grad_norm: 0.9999999485217702, iteration: 97537
loss: 1.0262624025344849,grad_norm: 0.8725080418069354, iteration: 97538
loss: 1.069841980934143,grad_norm: 0.8332517298617534, iteration: 97539
loss: 1.0131806135177612,grad_norm: 0.7822480140580487, iteration: 97540
loss: 1.0107444524765015,grad_norm: 0.9219361270504272, iteration: 97541
loss: 1.0739026069641113,grad_norm: 0.9999990468245418, iteration: 97542
loss: 1.0877304077148438,grad_norm: 0.9999995719404352, iteration: 97543
loss: 1.099132776260376,grad_norm: 0.9999991847932617, iteration: 97544
loss: 1.0373109579086304,grad_norm: 0.9978742393497813, iteration: 97545
loss: 1.077850103378296,grad_norm: 0.9999992682692922, iteration: 97546
loss: 1.062841773033142,grad_norm: 0.8740146379586, iteration: 97547
loss: 1.0922117233276367,grad_norm: 0.9999996162411839, iteration: 97548
loss: 1.034862995147705,grad_norm: 0.9999993179428337, iteration: 97549
loss: 1.0329326391220093,grad_norm: 0.9999990818547654, iteration: 97550
loss: 1.0071561336517334,grad_norm: 0.9169149437879482, iteration: 97551
loss: 1.068479299545288,grad_norm: 0.999999347079121, iteration: 97552
loss: 1.1273444890975952,grad_norm: 0.9999999164001914, iteration: 97553
loss: 0.9807137250900269,grad_norm: 0.9999989435537353, iteration: 97554
loss: 1.0437352657318115,grad_norm: 0.9398571436331477, iteration: 97555
loss: 1.001938819885254,grad_norm: 0.999999088542786, iteration: 97556
loss: 1.0290584564208984,grad_norm: 1.0000000414278356, iteration: 97557
loss: 1.0607106685638428,grad_norm: 0.9999996962984408, iteration: 97558
loss: 1.0452176332473755,grad_norm: 0.8855757512330286, iteration: 97559
loss: 1.0438538789749146,grad_norm: 0.9537079451647268, iteration: 97560
loss: 0.995113730430603,grad_norm: 0.7954743107198781, iteration: 97561
loss: 1.0237292051315308,grad_norm: 0.8262200979781235, iteration: 97562
loss: 1.0742870569229126,grad_norm: 0.9999997019008081, iteration: 97563
loss: 1.0499943494796753,grad_norm: 0.9685262375631283, iteration: 97564
loss: 1.1115795373916626,grad_norm: 0.9999996072875995, iteration: 97565
loss: 1.019162893295288,grad_norm: 0.9999995751862486, iteration: 97566
loss: 1.0763535499572754,grad_norm: 0.9999991587599326, iteration: 97567
loss: 1.0276732444763184,grad_norm: 0.9999994666066434, iteration: 97568
loss: 1.1733473539352417,grad_norm: 0.9999998484791045, iteration: 97569
loss: 1.2005735635757446,grad_norm: 0.99999938162255, iteration: 97570
loss: 1.0605400800704956,grad_norm: 0.999999272853886, iteration: 97571
loss: 1.2370496988296509,grad_norm: 0.9999999021865197, iteration: 97572
loss: 1.138488531112671,grad_norm: 0.9999998785537844, iteration: 97573
loss: 0.9934055805206299,grad_norm: 0.999999518053666, iteration: 97574
loss: 1.1285990476608276,grad_norm: 0.9999998960527793, iteration: 97575
loss: 1.3487812280654907,grad_norm: 0.999999631971662, iteration: 97576
loss: 1.0088229179382324,grad_norm: 0.9999993529392569, iteration: 97577
loss: 1.1871554851531982,grad_norm: 0.9999999577742095, iteration: 97578
loss: 1.1771471500396729,grad_norm: 0.9999997681822993, iteration: 97579
loss: 1.065676212310791,grad_norm: 0.9999992288318105, iteration: 97580
loss: 1.2551106214523315,grad_norm: 0.9999998447295962, iteration: 97581
loss: 1.046256422996521,grad_norm: 0.9999989791830697, iteration: 97582
loss: 1.0946704149246216,grad_norm: 0.9999994924461777, iteration: 97583
loss: 1.0626235008239746,grad_norm: 0.9999995388477103, iteration: 97584
loss: 1.1399786472320557,grad_norm: 0.9999994780545464, iteration: 97585
loss: 1.152779221534729,grad_norm: 1.0000000324252083, iteration: 97586
loss: 1.0789109468460083,grad_norm: 0.9999999654728629, iteration: 97587
loss: 1.0384024381637573,grad_norm: 0.9999995539606094, iteration: 97588
loss: 1.1548845767974854,grad_norm: 0.9999991332330848, iteration: 97589
loss: 1.0797942876815796,grad_norm: 0.9999991966888665, iteration: 97590
loss: 0.980160117149353,grad_norm: 0.8681158283048066, iteration: 97591
loss: 0.9811133742332458,grad_norm: 0.7960964913028623, iteration: 97592
loss: 1.0115468502044678,grad_norm: 0.9999996156708585, iteration: 97593
loss: 1.1405560970306396,grad_norm: 0.9999998608192281, iteration: 97594
loss: 1.009796380996704,grad_norm: 0.9134494025061072, iteration: 97595
loss: 1.0877915620803833,grad_norm: 0.9999996299997034, iteration: 97596
loss: 1.0140063762664795,grad_norm: 0.8684204963089777, iteration: 97597
loss: 1.1521084308624268,grad_norm: 0.9999997356466851, iteration: 97598
loss: 1.0337893962860107,grad_norm: 0.9999993508393268, iteration: 97599
loss: 1.0295026302337646,grad_norm: 0.9340746751061927, iteration: 97600
loss: 1.064749002456665,grad_norm: 0.9999991736383863, iteration: 97601
loss: 1.0635184049606323,grad_norm: 0.999999308672374, iteration: 97602
loss: 1.0191407203674316,grad_norm: 0.9999991945288429, iteration: 97603
loss: 0.9883368015289307,grad_norm: 0.9035335275796965, iteration: 97604
loss: 1.0372213125228882,grad_norm: 0.9999994887155141, iteration: 97605
loss: 1.0522072315216064,grad_norm: 0.9370453792051058, iteration: 97606
loss: 1.0055264234542847,grad_norm: 0.9999995141844407, iteration: 97607
loss: 0.9859806895256042,grad_norm: 0.8445607302005584, iteration: 97608
loss: 1.0205820798873901,grad_norm: 0.827357114082732, iteration: 97609
loss: 0.9574199914932251,grad_norm: 0.9999992018749296, iteration: 97610
loss: 1.0713179111480713,grad_norm: 0.8976926625287684, iteration: 97611
loss: 0.9915191531181335,grad_norm: 0.9999991588503794, iteration: 97612
loss: 1.0283563137054443,grad_norm: 0.7780087926823479, iteration: 97613
loss: 0.9920922517776489,grad_norm: 0.9859776693625177, iteration: 97614
loss: 1.01278817653656,grad_norm: 0.7096835415327526, iteration: 97615
loss: 1.018576741218567,grad_norm: 0.9999991965831009, iteration: 97616
loss: 0.9910607933998108,grad_norm: 0.9405564199135181, iteration: 97617
loss: 1.0348495244979858,grad_norm: 0.9999997621280131, iteration: 97618
loss: 1.014936923980713,grad_norm: 0.9999995379764111, iteration: 97619
loss: 1.0483845472335815,grad_norm: 0.9344201605104432, iteration: 97620
loss: 0.9749610424041748,grad_norm: 0.9078658579836917, iteration: 97621
loss: 1.0386871099472046,grad_norm: 0.9999995469105819, iteration: 97622
loss: 0.9772176146507263,grad_norm: 0.9467244813465497, iteration: 97623
loss: 0.9962326884269714,grad_norm: 0.8293347150821172, iteration: 97624
loss: 1.0364201068878174,grad_norm: 0.9999994509353183, iteration: 97625
loss: 1.019291639328003,grad_norm: 0.9999992680786479, iteration: 97626
loss: 1.2450785636901855,grad_norm: 0.9999998622918346, iteration: 97627
loss: 1.0174916982650757,grad_norm: 0.9999992369190027, iteration: 97628
loss: 1.09469735622406,grad_norm: 0.999999323233011, iteration: 97629
loss: 1.079374074935913,grad_norm: 0.9999999883942461, iteration: 97630
loss: 1.10110342502594,grad_norm: 0.9999994836926804, iteration: 97631
loss: 1.1004730463027954,grad_norm: 0.9999993030086414, iteration: 97632
loss: 1.1351839303970337,grad_norm: 0.9999997149155857, iteration: 97633
loss: 1.1627472639083862,grad_norm: 0.9999999367837193, iteration: 97634
loss: 1.0346654653549194,grad_norm: 0.8637656858112527, iteration: 97635
loss: 1.0144507884979248,grad_norm: 0.8788280525300101, iteration: 97636
loss: 1.0457335710525513,grad_norm: 0.9999998835909706, iteration: 97637
loss: 1.0062966346740723,grad_norm: 0.9999997759761261, iteration: 97638
loss: 1.213899850845337,grad_norm: 0.9999999315096166, iteration: 97639
loss: 1.0354503393173218,grad_norm: 0.8025843678594274, iteration: 97640
loss: 1.0260920524597168,grad_norm: 0.9999991212570127, iteration: 97641
loss: 1.0059922933578491,grad_norm: 0.9651144468839433, iteration: 97642
loss: 1.1495659351348877,grad_norm: 0.9999999496709446, iteration: 97643
loss: 1.012550950050354,grad_norm: 0.9701089629776135, iteration: 97644
loss: 1.0249595642089844,grad_norm: 0.9999993951290473, iteration: 97645
loss: 1.0609289407730103,grad_norm: 0.9600234349220083, iteration: 97646
loss: 1.0827159881591797,grad_norm: 0.9999994171688791, iteration: 97647
loss: 1.117928385734558,grad_norm: 0.7884669089813685, iteration: 97648
loss: 1.0516034364700317,grad_norm: 0.999999028246654, iteration: 97649
loss: 1.0610604286193848,grad_norm: 0.9058524757629418, iteration: 97650
loss: 1.0499258041381836,grad_norm: 0.9999993948514816, iteration: 97651
loss: 1.0605642795562744,grad_norm: 0.9938626532075571, iteration: 97652
loss: 1.0478904247283936,grad_norm: 0.8000844878010865, iteration: 97653
loss: 1.0267757177352905,grad_norm: 0.834787027815252, iteration: 97654
loss: 1.0629140138626099,grad_norm: 0.9999995226570054, iteration: 97655
loss: 1.0547606945037842,grad_norm: 0.9999997698590224, iteration: 97656
loss: 1.1078732013702393,grad_norm: 0.999999520671222, iteration: 97657
loss: 1.0569703578948975,grad_norm: 0.9999991302550989, iteration: 97658
loss: 1.0400491952896118,grad_norm: 0.999999876500504, iteration: 97659
loss: 0.9687486886978149,grad_norm: 0.8891823422621047, iteration: 97660
loss: 1.1556992530822754,grad_norm: 0.999999238342824, iteration: 97661
loss: 1.2520053386688232,grad_norm: 0.9999994518537845, iteration: 97662
loss: 0.9945472478866577,grad_norm: 0.9999992406513978, iteration: 97663
loss: 1.0352171659469604,grad_norm: 0.9999999629028739, iteration: 97664
loss: 1.026339054107666,grad_norm: 0.9999997772076211, iteration: 97665
loss: 0.9825822710990906,grad_norm: 0.8183834557014713, iteration: 97666
loss: 1.1627566814422607,grad_norm: 0.9999994240924106, iteration: 97667
loss: 1.069323182106018,grad_norm: 0.9999990502246993, iteration: 97668
loss: 1.0577759742736816,grad_norm: 0.9944760349313229, iteration: 97669
loss: 1.1207773685455322,grad_norm: 0.9999997135488905, iteration: 97670
loss: 1.0344483852386475,grad_norm: 0.9999993000360446, iteration: 97671
loss: 0.9643358588218689,grad_norm: 0.9999998591763894, iteration: 97672
loss: 1.0211397409439087,grad_norm: 0.996318743701843, iteration: 97673
loss: 1.0402436256408691,grad_norm: 0.9999990903724872, iteration: 97674
loss: 1.0641893148422241,grad_norm: 0.9008524400508734, iteration: 97675
loss: 1.0111148357391357,grad_norm: 0.6871887952702642, iteration: 97676
loss: 0.9891659021377563,grad_norm: 0.99999916384116, iteration: 97677
loss: 1.0014997720718384,grad_norm: 0.8288178160982472, iteration: 97678
loss: 1.1404379606246948,grad_norm: 0.999999959797786, iteration: 97679
loss: 1.1087268590927124,grad_norm: 0.9999994253697287, iteration: 97680
loss: 1.0836023092269897,grad_norm: 0.9999996864502099, iteration: 97681
loss: 1.0498085021972656,grad_norm: 0.9999991738102516, iteration: 97682
loss: 0.9899451732635498,grad_norm: 0.9999991355642028, iteration: 97683
loss: 1.0019381046295166,grad_norm: 0.9999992023472789, iteration: 97684
loss: 1.0057930946350098,grad_norm: 0.9999991738005473, iteration: 97685
loss: 1.0251214504241943,grad_norm: 0.7784122627689984, iteration: 97686
loss: 1.0641475915908813,grad_norm: 0.7972331537349474, iteration: 97687
loss: 1.1532844305038452,grad_norm: 0.9999993932161323, iteration: 97688
loss: 0.9882150888442993,grad_norm: 0.9878494907452393, iteration: 97689
loss: 1.0762648582458496,grad_norm: 0.99999965038022, iteration: 97690
loss: 1.0170706510543823,grad_norm: 0.9190934902440983, iteration: 97691
loss: 1.1141074895858765,grad_norm: 0.9999994925696726, iteration: 97692
loss: 1.0105798244476318,grad_norm: 0.9999991882286083, iteration: 97693
loss: 1.05085027217865,grad_norm: 0.9999990929688992, iteration: 97694
loss: 1.0025596618652344,grad_norm: 0.8891091017870286, iteration: 97695
loss: 1.0176892280578613,grad_norm: 0.9493775832996063, iteration: 97696
loss: 1.0121580362319946,grad_norm: 0.8284270354772181, iteration: 97697
loss: 1.0379502773284912,grad_norm: 0.9999997306225265, iteration: 97698
loss: 1.0456331968307495,grad_norm: 0.9999992825174361, iteration: 97699
loss: 0.9866442680358887,grad_norm: 0.8293991532391435, iteration: 97700
loss: 1.004710078239441,grad_norm: 0.9999990836420054, iteration: 97701
loss: 1.0065375566482544,grad_norm: 1.0000000496628627, iteration: 97702
loss: 0.9934576749801636,grad_norm: 0.8729675212164275, iteration: 97703
loss: 1.0321763753890991,grad_norm: 0.9999997047991737, iteration: 97704
loss: 1.00334632396698,grad_norm: 0.7581557343796393, iteration: 97705
loss: 0.9773088693618774,grad_norm: 0.9203099773787922, iteration: 97706
loss: 1.0158942937850952,grad_norm: 0.999999388094007, iteration: 97707
loss: 1.0043858289718628,grad_norm: 0.9999997561920106, iteration: 97708
loss: 0.9991855621337891,grad_norm: 0.9756986563986895, iteration: 97709
loss: 1.036141037940979,grad_norm: 0.9999990935198914, iteration: 97710
loss: 1.0558124780654907,grad_norm: 0.9999994202165903, iteration: 97711
loss: 1.1365253925323486,grad_norm: 0.9999997808665951, iteration: 97712
loss: 0.9738075733184814,grad_norm: 0.8554270358688558, iteration: 97713
loss: 1.1308547258377075,grad_norm: 0.9999999485418735, iteration: 97714
loss: 1.009229302406311,grad_norm: 0.9187125580240443, iteration: 97715
loss: 1.0031065940856934,grad_norm: 0.7815576753567723, iteration: 97716
loss: 1.089348554611206,grad_norm: 0.999999426203709, iteration: 97717
loss: 1.0644564628601074,grad_norm: 0.8928663128158517, iteration: 97718
loss: 1.02278733253479,grad_norm: 0.9999991609102833, iteration: 97719
loss: 0.9807429909706116,grad_norm: 0.9999990855548154, iteration: 97720
loss: 0.9910430312156677,grad_norm: 0.9999990356472422, iteration: 97721
loss: 1.0305719375610352,grad_norm: 0.8515072308756869, iteration: 97722
loss: 1.0278875827789307,grad_norm: 0.9999994710558588, iteration: 97723
loss: 1.014349102973938,grad_norm: 0.999999618224874, iteration: 97724
loss: 0.997494637966156,grad_norm: 0.9999998246053932, iteration: 97725
loss: 1.0138418674468994,grad_norm: 0.9569744241210328, iteration: 97726
loss: 1.0036027431488037,grad_norm: 0.9999994293832739, iteration: 97727
loss: 0.9959818720817566,grad_norm: 0.9465563645970968, iteration: 97728
loss: 1.0087426900863647,grad_norm: 0.9595377096766243, iteration: 97729
loss: 1.0557782649993896,grad_norm: 0.7967722146507146, iteration: 97730
loss: 1.018273949623108,grad_norm: 0.853968227831145, iteration: 97731
loss: 1.0169508457183838,grad_norm: 0.9999997672755995, iteration: 97732
loss: 0.9797629714012146,grad_norm: 0.9999991931599458, iteration: 97733
loss: 1.0197594165802002,grad_norm: 0.8248405344039653, iteration: 97734
loss: 1.137534260749817,grad_norm: 0.9999997441672166, iteration: 97735
loss: 1.0039609670639038,grad_norm: 0.8862243373315141, iteration: 97736
loss: 1.043166160583496,grad_norm: 0.9999997586299847, iteration: 97737
loss: 0.975808322429657,grad_norm: 0.9999994783017672, iteration: 97738
loss: 0.97709059715271,grad_norm: 0.9034033993195629, iteration: 97739
loss: 1.0191320180892944,grad_norm: 0.9999998764179855, iteration: 97740
loss: 0.9933140873908997,grad_norm: 0.9616306527673565, iteration: 97741
loss: 1.0457570552825928,grad_norm: 0.9182462570198925, iteration: 97742
loss: 1.0297185182571411,grad_norm: 0.8172893572847788, iteration: 97743
loss: 1.046849012374878,grad_norm: 0.9999990770408087, iteration: 97744
loss: 1.0236483812332153,grad_norm: 0.8624767451166216, iteration: 97745
loss: 1.1575052738189697,grad_norm: 0.999999756169842, iteration: 97746
loss: 1.0854644775390625,grad_norm: 0.9999990651389506, iteration: 97747
loss: 1.0289288759231567,grad_norm: 0.9640162237322797, iteration: 97748
loss: 0.9981478452682495,grad_norm: 0.9764930992635342, iteration: 97749
loss: 1.0248866081237793,grad_norm: 0.999999069603062, iteration: 97750
loss: 0.9797899723052979,grad_norm: 0.9999993356970677, iteration: 97751
loss: 1.093586802482605,grad_norm: 0.9999995184547311, iteration: 97752
loss: 0.9935482740402222,grad_norm: 0.9999990981140052, iteration: 97753
loss: 1.2198433876037598,grad_norm: 0.999999178049785, iteration: 97754
loss: 0.9933692216873169,grad_norm: 0.9743379948768288, iteration: 97755
loss: 1.0518879890441895,grad_norm: 0.9999991039976903, iteration: 97756
loss: 0.9523170590400696,grad_norm: 0.9999990797923055, iteration: 97757
loss: 1.0251508951187134,grad_norm: 0.9999990972538203, iteration: 97758
loss: 1.0862787961959839,grad_norm: 0.9999989593632557, iteration: 97759
loss: 1.1368660926818848,grad_norm: 0.9999998890976051, iteration: 97760
loss: 1.0409997701644897,grad_norm: 0.9999989404383207, iteration: 97761
loss: 1.0247951745986938,grad_norm: 0.9999999115648244, iteration: 97762
loss: 1.1303194761276245,grad_norm: 0.9999996026215298, iteration: 97763
loss: 1.0139622688293457,grad_norm: 0.9999991329982815, iteration: 97764
loss: 1.0402220487594604,grad_norm: 0.9999995258212456, iteration: 97765
loss: 1.0195693969726562,grad_norm: 0.7881947384030754, iteration: 97766
loss: 1.0465391874313354,grad_norm: 0.8868084444151062, iteration: 97767
loss: 0.984893798828125,grad_norm: 0.8967158192125133, iteration: 97768
loss: 1.0327377319335938,grad_norm: 0.9999992681942057, iteration: 97769
loss: 1.0487935543060303,grad_norm: 0.9999991688661292, iteration: 97770
loss: 1.0598026514053345,grad_norm: 0.9999999360282132, iteration: 97771
loss: 0.9554740190505981,grad_norm: 0.8170265622511128, iteration: 97772
loss: 1.0323121547698975,grad_norm: 0.9999991219107075, iteration: 97773
loss: 0.9792146682739258,grad_norm: 0.7266682316708687, iteration: 97774
loss: 1.0953606367111206,grad_norm: 0.9662585828743054, iteration: 97775
loss: 0.9989973306655884,grad_norm: 0.9652401170642613, iteration: 97776
loss: 1.014357566833496,grad_norm: 0.9883522456894203, iteration: 97777
loss: 1.0320016145706177,grad_norm: 0.9318745688468139, iteration: 97778
loss: 1.0047948360443115,grad_norm: 0.9999992045748013, iteration: 97779
loss: 0.9864501357078552,grad_norm: 0.99999902019415, iteration: 97780
loss: 1.049562931060791,grad_norm: 0.892723210026943, iteration: 97781
loss: 0.9988550543785095,grad_norm: 0.8957342814375986, iteration: 97782
loss: 1.0698667764663696,grad_norm: 0.9999990485841469, iteration: 97783
loss: 1.0093833208084106,grad_norm: 0.999999157589991, iteration: 97784
loss: 1.0398560762405396,grad_norm: 0.8605179934869428, iteration: 97785
loss: 1.0111308097839355,grad_norm: 0.9999995541262772, iteration: 97786
loss: 1.0008362531661987,grad_norm: 0.8756384910618424, iteration: 97787
loss: 1.0864187479019165,grad_norm: 0.9032701050712173, iteration: 97788
loss: 0.9900527000427246,grad_norm: 0.9314399058205421, iteration: 97789
loss: 1.04100501537323,grad_norm: 0.999999048491883, iteration: 97790
loss: 1.008419394493103,grad_norm: 0.9999991948761959, iteration: 97791
loss: 0.9928700923919678,grad_norm: 0.9999996949037206, iteration: 97792
loss: 1.0591577291488647,grad_norm: 0.999999463050285, iteration: 97793
loss: 0.9987828731536865,grad_norm: 0.892513411487156, iteration: 97794
loss: 1.004054069519043,grad_norm: 0.999999209966126, iteration: 97795
loss: 1.0134503841400146,grad_norm: 0.9999991411123643, iteration: 97796
loss: 1.0005980730056763,grad_norm: 0.925770798157314, iteration: 97797
loss: 1.0055909156799316,grad_norm: 0.9237518780650937, iteration: 97798
loss: 0.9991395473480225,grad_norm: 0.9032018427926959, iteration: 97799
loss: 1.0468820333480835,grad_norm: 0.9116445198728111, iteration: 97800
loss: 1.11512291431427,grad_norm: 0.9999999873945643, iteration: 97801
loss: 1.197562336921692,grad_norm: 0.9999994451132188, iteration: 97802
loss: 1.019038200378418,grad_norm: 0.9087113522935172, iteration: 97803
loss: 1.0401514768600464,grad_norm: 0.9999990972886056, iteration: 97804
loss: 0.9770897626876831,grad_norm: 0.7283326890991227, iteration: 97805
loss: 1.032952904701233,grad_norm: 0.766484527697663, iteration: 97806
loss: 1.011932134628296,grad_norm: 0.8590441939642223, iteration: 97807
loss: 1.036834955215454,grad_norm: 0.9563138589773951, iteration: 97808
loss: 1.1409657001495361,grad_norm: 0.9999995879062418, iteration: 97809
loss: 1.034169316291809,grad_norm: 0.999999413392278, iteration: 97810
loss: 1.1084266901016235,grad_norm: 0.9999991502521807, iteration: 97811
loss: 0.980162501335144,grad_norm: 0.9557018739509198, iteration: 97812
loss: 1.0691113471984863,grad_norm: 0.9999992745535925, iteration: 97813
loss: 1.0428780317306519,grad_norm: 0.9999993441684757, iteration: 97814
loss: 1.044682264328003,grad_norm: 0.9999991236034079, iteration: 97815
loss: 1.0978425741195679,grad_norm: 0.9999997521343571, iteration: 97816
loss: 1.135968804359436,grad_norm: 0.9999989793959312, iteration: 97817
loss: 1.0918558835983276,grad_norm: 0.9999991699686878, iteration: 97818
loss: 1.1842340230941772,grad_norm: 0.9999990983995036, iteration: 97819
loss: 1.0394988059997559,grad_norm: 0.9999993287904636, iteration: 97820
loss: 1.0688655376434326,grad_norm: 0.9999997713236243, iteration: 97821
loss: 1.0482782125473022,grad_norm: 0.9999998770898486, iteration: 97822
loss: 1.1657949686050415,grad_norm: 0.9999996735650439, iteration: 97823
loss: 1.1480129957199097,grad_norm: 0.9999996383070183, iteration: 97824
loss: 1.009743094444275,grad_norm: 0.9999992435469834, iteration: 97825
loss: 1.0117441415786743,grad_norm: 0.9999993985044883, iteration: 97826
loss: 1.19919753074646,grad_norm: 0.999999746756991, iteration: 97827
loss: 1.092131495475769,grad_norm: 0.9999993229074111, iteration: 97828
loss: 1.0649493932724,grad_norm: 0.9999998882429922, iteration: 97829
loss: 1.184526801109314,grad_norm: 0.9421801391542006, iteration: 97830
loss: 1.0187755823135376,grad_norm: 0.9139161444249035, iteration: 97831
loss: 1.120452642440796,grad_norm: 0.9999999088853841, iteration: 97832
loss: 1.0577186346054077,grad_norm: 0.9730112279202937, iteration: 97833
loss: 1.0470978021621704,grad_norm: 0.9921841481536441, iteration: 97834
loss: 1.171315312385559,grad_norm: 0.999999732567441, iteration: 97835
loss: 1.0010696649551392,grad_norm: 0.9999998513167909, iteration: 97836
loss: 1.0879842042922974,grad_norm: 0.999999592423423, iteration: 97837
loss: 1.1447008848190308,grad_norm: 0.9999999744483276, iteration: 97838
loss: 1.037702202796936,grad_norm: 0.9999990949769331, iteration: 97839
loss: 1.094679594039917,grad_norm: 0.9999996732055704, iteration: 97840
loss: 1.0448617935180664,grad_norm: 0.9736410201837901, iteration: 97841
loss: 1.0222657918930054,grad_norm: 0.8217923952290811, iteration: 97842
loss: 1.0500415563583374,grad_norm: 0.9999994142464405, iteration: 97843
loss: 1.0624263286590576,grad_norm: 0.9231283899549216, iteration: 97844
loss: 1.0043697357177734,grad_norm: 0.7919828809783444, iteration: 97845
loss: 0.985090970993042,grad_norm: 0.9999991934348653, iteration: 97846
loss: 1.0380899906158447,grad_norm: 0.9999997774262337, iteration: 97847
loss: 0.9980630278587341,grad_norm: 0.9999991429532509, iteration: 97848
loss: 1.0852333307266235,grad_norm: 0.9999992157316174, iteration: 97849
loss: 1.0376954078674316,grad_norm: 0.9999997961613852, iteration: 97850
loss: 1.0077663660049438,grad_norm: 0.861864568878904, iteration: 97851
loss: 1.063223958015442,grad_norm: 0.8841808329277714, iteration: 97852
loss: 1.0856578350067139,grad_norm: 0.9637677662896083, iteration: 97853
loss: 0.9980843663215637,grad_norm: 0.9999991254150057, iteration: 97854
loss: 1.032766580581665,grad_norm: 0.9999991240113693, iteration: 97855
loss: 0.971512496471405,grad_norm: 0.8745613785641038, iteration: 97856
loss: 1.04337739944458,grad_norm: 0.9999990314361814, iteration: 97857
loss: 1.2630505561828613,grad_norm: 0.9999998045362469, iteration: 97858
loss: 1.046152114868164,grad_norm: 0.9054205925033587, iteration: 97859
loss: 1.0596028566360474,grad_norm: 0.9999991840126545, iteration: 97860
loss: 1.1304875612258911,grad_norm: 0.9999998781191237, iteration: 97861
loss: 0.9921143054962158,grad_norm: 0.9796674266348083, iteration: 97862
loss: 1.0855480432510376,grad_norm: 0.999999954783495, iteration: 97863
loss: 0.9998671412467957,grad_norm: 0.8408017222169174, iteration: 97864
loss: 1.0878201723098755,grad_norm: 0.9999990913242554, iteration: 97865
loss: 1.0893967151641846,grad_norm: 0.9795074663178653, iteration: 97866
loss: 1.026121973991394,grad_norm: 0.9999989235044291, iteration: 97867
loss: 0.9889684319496155,grad_norm: 0.9999996283725711, iteration: 97868
loss: 1.0582066774368286,grad_norm: 0.9999997938685202, iteration: 97869
loss: 1.265297770500183,grad_norm: 0.9999997665688322, iteration: 97870
loss: 1.024695873260498,grad_norm: 0.9999992507608679, iteration: 97871
loss: 1.0444283485412598,grad_norm: 0.9999991711964127, iteration: 97872
loss: 1.0153064727783203,grad_norm: 0.9894517073886082, iteration: 97873
loss: 1.036547064781189,grad_norm: 0.7155144962907779, iteration: 97874
loss: 1.1064159870147705,grad_norm: 0.9999990637900154, iteration: 97875
loss: 1.062190294265747,grad_norm: 0.9999996435631696, iteration: 97876
loss: 1.039139986038208,grad_norm: 0.9999997154333565, iteration: 97877
loss: 1.0245243310928345,grad_norm: 0.8355295135262121, iteration: 97878
loss: 1.1267153024673462,grad_norm: 0.9999990505528663, iteration: 97879
loss: 1.0035959482192993,grad_norm: 0.9999991375564817, iteration: 97880
loss: 1.0010055303573608,grad_norm: 0.9201490477519092, iteration: 97881
loss: 1.0915696620941162,grad_norm: 0.9999997368868535, iteration: 97882
loss: 1.0520986318588257,grad_norm: 0.8971021618304204, iteration: 97883
loss: 1.016404390335083,grad_norm: 0.9042705585882005, iteration: 97884
loss: 0.9684267044067383,grad_norm: 0.9818301333194827, iteration: 97885
loss: 1.0456486940383911,grad_norm: 0.9999994286400452, iteration: 97886
loss: 1.0124317407608032,grad_norm: 0.8341684662504204, iteration: 97887
loss: 0.997804582118988,grad_norm: 0.8886408749293723, iteration: 97888
loss: 0.9956830739974976,grad_norm: 0.916464855309014, iteration: 97889
loss: 1.0218498706817627,grad_norm: 0.9999993854968335, iteration: 97890
loss: 1.0546257495880127,grad_norm: 0.9999991772003698, iteration: 97891
loss: 1.03974187374115,grad_norm: 0.9999994874541157, iteration: 97892
loss: 0.9783117175102234,grad_norm: 0.9803399064508374, iteration: 97893
loss: 1.0096158981323242,grad_norm: 0.911488477339694, iteration: 97894
loss: 1.0958997011184692,grad_norm: 0.9510049626767471, iteration: 97895
loss: 1.1459494829177856,grad_norm: 0.9398193290863855, iteration: 97896
loss: 1.0029101371765137,grad_norm: 0.9535860753005928, iteration: 97897
loss: 1.0999995470046997,grad_norm: 0.8488545052099123, iteration: 97898
loss: 1.0266575813293457,grad_norm: 0.9999997125482549, iteration: 97899
loss: 1.0128865242004395,grad_norm: 0.9999991286395216, iteration: 97900
loss: 1.0237358808517456,grad_norm: 0.9852486687159063, iteration: 97901
loss: 1.1616301536560059,grad_norm: 0.9999996166779832, iteration: 97902
loss: 1.0200306177139282,grad_norm: 0.8756663760420148, iteration: 97903
loss: 1.0274598598480225,grad_norm: 0.9999997855067704, iteration: 97904
loss: 1.1901463270187378,grad_norm: 0.9999997768236291, iteration: 97905
loss: 1.0667812824249268,grad_norm: 0.999999507447634, iteration: 97906
loss: 1.0278626680374146,grad_norm: 0.8701500447653339, iteration: 97907
loss: 0.9834136366844177,grad_norm: 0.9092566334513765, iteration: 97908
loss: 1.0263991355895996,grad_norm: 0.9893375109951932, iteration: 97909
loss: 1.056071400642395,grad_norm: 0.9699922551159658, iteration: 97910
loss: 1.0775398015975952,grad_norm: 0.9999997080657154, iteration: 97911
loss: 1.1028754711151123,grad_norm: 0.9999998041122078, iteration: 97912
loss: 0.9779251217842102,grad_norm: 0.7460633815534123, iteration: 97913
loss: 1.041574239730835,grad_norm: 0.9269190564894138, iteration: 97914
loss: 1.1025742292404175,grad_norm: 0.9306263648089523, iteration: 97915
loss: 1.054555058479309,grad_norm: 0.8978985592382891, iteration: 97916
loss: 0.9923968315124512,grad_norm: 0.9185101770200392, iteration: 97917
loss: 0.9977104663848877,grad_norm: 0.9883960872350918, iteration: 97918
loss: 1.072176218032837,grad_norm: 0.9201173030028288, iteration: 97919
loss: 1.0580313205718994,grad_norm: 0.7814036926939081, iteration: 97920
loss: 1.054883360862732,grad_norm: 0.9999994496708761, iteration: 97921
loss: 1.0527154207229614,grad_norm: 0.9999999349726676, iteration: 97922
loss: 1.0845190286636353,grad_norm: 0.9999992462101045, iteration: 97923
loss: 1.044049859046936,grad_norm: 0.7863686642351602, iteration: 97924
loss: 0.961148738861084,grad_norm: 0.9140910573175385, iteration: 97925
loss: 1.0475058555603027,grad_norm: 0.9999995941223413, iteration: 97926
loss: 1.0380234718322754,grad_norm: 0.9999991474552186, iteration: 97927
loss: 1.0318212509155273,grad_norm: 0.8497190585191446, iteration: 97928
loss: 0.9941869378089905,grad_norm: 0.9999992524195799, iteration: 97929
loss: 1.0129632949829102,grad_norm: 0.9738183660107481, iteration: 97930
loss: 1.0145457983016968,grad_norm: 0.9999990398507622, iteration: 97931
loss: 1.1150875091552734,grad_norm: 0.9999999140795722, iteration: 97932
loss: 1.0158876180648804,grad_norm: 0.9953790981744305, iteration: 97933
loss: 1.0312410593032837,grad_norm: 0.9999990363423568, iteration: 97934
loss: 1.0243877172470093,grad_norm: 0.8601182003301561, iteration: 97935
loss: 0.9847372174263,grad_norm: 0.871270506161789, iteration: 97936
loss: 0.963996171951294,grad_norm: 0.8501240880224102, iteration: 97937
loss: 1.0290089845657349,grad_norm: 0.999999174979801, iteration: 97938
loss: 1.031678318977356,grad_norm: 0.8621982894870097, iteration: 97939
loss: 1.0297117233276367,grad_norm: 0.9329257827587565, iteration: 97940
loss: 1.2292248010635376,grad_norm: 0.999999155138388, iteration: 97941
loss: 1.0040258169174194,grad_norm: 0.9452217299979365, iteration: 97942
loss: 1.0280200242996216,grad_norm: 0.8927336233469195, iteration: 97943
loss: 1.0316630601882935,grad_norm: 0.8946245773122355, iteration: 97944
loss: 1.0383020639419556,grad_norm: 0.9673387800822392, iteration: 97945
loss: 0.9765914082527161,grad_norm: 0.8304836469367561, iteration: 97946
loss: 0.9841776490211487,grad_norm: 0.9999991083166075, iteration: 97947
loss: 1.3803160190582275,grad_norm: 0.9999996408311941, iteration: 97948
loss: 1.0390433073043823,grad_norm: 0.9758760313233806, iteration: 97949
loss: 1.04184091091156,grad_norm: 0.7437350053914009, iteration: 97950
loss: 1.0922185182571411,grad_norm: 0.9999991808022465, iteration: 97951
loss: 1.0348780155181885,grad_norm: 0.9581123171987903, iteration: 97952
loss: 1.1023801565170288,grad_norm: 0.9655184752665706, iteration: 97953
loss: 1.0586633682250977,grad_norm: 0.9999998734099671, iteration: 97954
loss: 1.0572518110275269,grad_norm: 0.9140475353280156, iteration: 97955
loss: 0.9530189037322998,grad_norm: 0.9999989983892896, iteration: 97956
loss: 1.0975255966186523,grad_norm: 0.9999995438936037, iteration: 97957
loss: 1.0147053003311157,grad_norm: 0.8198271699045885, iteration: 97958
loss: 0.9915257096290588,grad_norm: 0.9999991207419188, iteration: 97959
loss: 1.0558451414108276,grad_norm: 0.9054730564513058, iteration: 97960
loss: 0.9939658045768738,grad_norm: 0.9999997334398287, iteration: 97961
loss: 1.288290023803711,grad_norm: 0.9999994319878625, iteration: 97962
loss: 1.1080284118652344,grad_norm: 0.9999990580106044, iteration: 97963
loss: 1.0970728397369385,grad_norm: 0.9999998960467733, iteration: 97964
loss: 1.006524920463562,grad_norm: 0.9284315544243679, iteration: 97965
loss: 1.0381159782409668,grad_norm: 0.999999063989794, iteration: 97966
loss: 1.0903321504592896,grad_norm: 0.999999144628284, iteration: 97967
loss: 1.0897233486175537,grad_norm: 0.9999998645377847, iteration: 97968
loss: 1.0591325759887695,grad_norm: 0.9874000719472517, iteration: 97969
loss: 1.0673282146453857,grad_norm: 0.999999171835886, iteration: 97970
loss: 1.0613220930099487,grad_norm: 0.8408210989268772, iteration: 97971
loss: 0.9977883696556091,grad_norm: 0.8491351800135817, iteration: 97972
loss: 1.1280969381332397,grad_norm: 0.9999995516297725, iteration: 97973
loss: 1.0514885187149048,grad_norm: 0.9999989262479891, iteration: 97974
loss: 0.982945442199707,grad_norm: 0.9340299553280995, iteration: 97975
loss: 1.1283677816390991,grad_norm: 0.999999150927664, iteration: 97976
loss: 1.1082768440246582,grad_norm: 0.9999998592645083, iteration: 97977
loss: 1.0149949789047241,grad_norm: 0.9999990717744512, iteration: 97978
loss: 1.0353846549987793,grad_norm: 0.8346120155488634, iteration: 97979
loss: 1.0314786434173584,grad_norm: 0.9283785935763657, iteration: 97980
loss: 0.967889130115509,grad_norm: 0.9006161605397378, iteration: 97981
loss: 1.1058834791183472,grad_norm: 0.9999991632459709, iteration: 97982
loss: 1.0002285242080688,grad_norm: 0.8882447399273491, iteration: 97983
loss: 1.13743257522583,grad_norm: 0.9999989966555864, iteration: 97984
loss: 1.1113401651382446,grad_norm: 0.9999996763150089, iteration: 97985
loss: 1.1204798221588135,grad_norm: 0.9999999435025319, iteration: 97986
loss: 0.9777041673660278,grad_norm: 0.9999992214781437, iteration: 97987
loss: 1.0069680213928223,grad_norm: 0.9875657544348839, iteration: 97988
loss: 0.998458743095398,grad_norm: 0.9587324467667249, iteration: 97989
loss: 1.164041519165039,grad_norm: 0.9999998470578534, iteration: 97990
loss: 1.077096939086914,grad_norm: 0.9999991659512923, iteration: 97991
loss: 0.9831802248954773,grad_norm: 0.9999991645252447, iteration: 97992
loss: 1.1007804870605469,grad_norm: 0.99999988532422, iteration: 97993
loss: 1.0947892665863037,grad_norm: 0.9999993823704525, iteration: 97994
loss: 1.2774648666381836,grad_norm: 0.9999998944718419, iteration: 97995
loss: 1.0423228740692139,grad_norm: 0.9999993124362117, iteration: 97996
loss: 1.0448554754257202,grad_norm: 0.999999201454853, iteration: 97997
loss: 1.0293495655059814,grad_norm: 0.9676250427300574, iteration: 97998
loss: 1.0974682569503784,grad_norm: 0.9999998589413069, iteration: 97999
loss: 1.0753223896026611,grad_norm: 0.9999995116340703, iteration: 98000
loss: 1.0257155895233154,grad_norm: 0.9999989792112732, iteration: 98001
loss: 0.9982833862304688,grad_norm: 0.9322119733135202, iteration: 98002
loss: 1.0893539190292358,grad_norm: 0.999999231216576, iteration: 98003
loss: 1.0744385719299316,grad_norm: 0.9999995617935254, iteration: 98004
loss: 0.9813226461410522,grad_norm: 0.8801372661175583, iteration: 98005
loss: 1.0578199625015259,grad_norm: 0.999999022167235, iteration: 98006
loss: 1.3229707479476929,grad_norm: 0.9999996329073112, iteration: 98007
loss: 1.0602258443832397,grad_norm: 0.9999991781543837, iteration: 98008
loss: 0.9792183637619019,grad_norm: 0.9999990007756967, iteration: 98009
loss: 1.0365357398986816,grad_norm: 0.9620971049058755, iteration: 98010
loss: 1.0600746870040894,grad_norm: 0.9999991358010909, iteration: 98011
loss: 1.1367546319961548,grad_norm: 0.9623369955526339, iteration: 98012
loss: 1.0446470975875854,grad_norm: 0.9820225190092229, iteration: 98013
loss: 1.0042204856872559,grad_norm: 0.850038771763499, iteration: 98014
loss: 1.0529930591583252,grad_norm: 0.9999999805653585, iteration: 98015
loss: 1.06643545627594,grad_norm: 0.999999001858063, iteration: 98016
loss: 1.1001794338226318,grad_norm: 0.9999997399815269, iteration: 98017
loss: 1.0535563230514526,grad_norm: 0.9916557234109249, iteration: 98018
loss: 1.0225213766098022,grad_norm: 0.9999993884334328, iteration: 98019
loss: 1.0855708122253418,grad_norm: 0.9999992627252796, iteration: 98020
loss: 1.0941888093948364,grad_norm: 0.9999994314857136, iteration: 98021
loss: 1.0153404474258423,grad_norm: 0.999999750311203, iteration: 98022
loss: 1.0698162317276,grad_norm: 0.9999990370145393, iteration: 98023
loss: 1.0112470388412476,grad_norm: 0.872929858896021, iteration: 98024
loss: 1.020102620124817,grad_norm: 0.9999990485771637, iteration: 98025
loss: 1.1376278400421143,grad_norm: 0.9999995263439031, iteration: 98026
loss: 1.313665747642517,grad_norm: 0.9999996534198989, iteration: 98027
loss: 1.3821125030517578,grad_norm: 0.9999998124088734, iteration: 98028
loss: 1.0061389207839966,grad_norm: 0.8782906961763789, iteration: 98029
loss: 1.2137657403945923,grad_norm: 0.9999998826656127, iteration: 98030
loss: 0.9918330311775208,grad_norm: 0.8497182766742278, iteration: 98031
loss: 1.17317795753479,grad_norm: 0.9999996263286939, iteration: 98032
loss: 1.1357355117797852,grad_norm: 0.999999625527764, iteration: 98033
loss: 1.0706254243850708,grad_norm: 0.8958054528528819, iteration: 98034
loss: 1.1320706605911255,grad_norm: 0.9999999325968525, iteration: 98035
loss: 1.0735194683074951,grad_norm: 0.9999993573351268, iteration: 98036
loss: 1.0015151500701904,grad_norm: 0.9744138654765273, iteration: 98037
loss: 1.0356570482254028,grad_norm: 0.9999998373860337, iteration: 98038
loss: 1.010110855102539,grad_norm: 0.9270072699318973, iteration: 98039
loss: 0.974716305732727,grad_norm: 0.9214262753466518, iteration: 98040
loss: 0.9956247806549072,grad_norm: 0.9999989757151833, iteration: 98041
loss: 1.0155764818191528,grad_norm: 0.9999990458121131, iteration: 98042
loss: 0.9839162826538086,grad_norm: 0.8848732242835659, iteration: 98043
loss: 1.0480716228485107,grad_norm: 0.999999844630844, iteration: 98044
loss: 1.0379524230957031,grad_norm: 0.9999996089452303, iteration: 98045
loss: 1.0143834352493286,grad_norm: 0.9230537651258087, iteration: 98046
loss: 1.012224793434143,grad_norm: 0.9999990483623031, iteration: 98047
loss: 1.008384346961975,grad_norm: 0.9635863087121291, iteration: 98048
loss: 1.0251331329345703,grad_norm: 0.9772565568468662, iteration: 98049
loss: 1.0474146604537964,grad_norm: 0.9999998799639322, iteration: 98050
loss: 1.0812395811080933,grad_norm: 0.9999990119786943, iteration: 98051
loss: 0.9928826689720154,grad_norm: 0.9999990607769776, iteration: 98052
loss: 1.024080514907837,grad_norm: 0.9999993355131096, iteration: 98053
loss: 1.0118868350982666,grad_norm: 0.9999990695454071, iteration: 98054
loss: 1.0180684328079224,grad_norm: 0.999999073433207, iteration: 98055
loss: 1.0678203105926514,grad_norm: 0.9836661141774743, iteration: 98056
loss: 1.016823410987854,grad_norm: 0.8912177368398042, iteration: 98057
loss: 1.019607663154602,grad_norm: 0.8936015200616801, iteration: 98058
loss: 0.9965198636054993,grad_norm: 0.8154698764190753, iteration: 98059
loss: 1.0206105709075928,grad_norm: 0.9999998889875162, iteration: 98060
loss: 1.0651042461395264,grad_norm: 0.9999993650646166, iteration: 98061
loss: 0.9918976426124573,grad_norm: 0.9999990723819216, iteration: 98062
loss: 1.0003927946090698,grad_norm: 0.817242622934885, iteration: 98063
loss: 1.0800467729568481,grad_norm: 0.9999990723055329, iteration: 98064
loss: 1.0009119510650635,grad_norm: 0.9999999134036799, iteration: 98065
loss: 0.9744464755058289,grad_norm: 0.9999992272648189, iteration: 98066
loss: 0.9976212978363037,grad_norm: 0.9999996382655184, iteration: 98067
loss: 1.0397385358810425,grad_norm: 0.8661003313227728, iteration: 98068
loss: 0.9692766666412354,grad_norm: 0.8823209646626041, iteration: 98069
loss: 1.0222513675689697,grad_norm: 0.8564004160874008, iteration: 98070
loss: 1.0013210773468018,grad_norm: 0.9999992047537317, iteration: 98071
loss: 0.9798425436019897,grad_norm: 0.9698408589288616, iteration: 98072
loss: 1.0649977922439575,grad_norm: 0.8849047765580695, iteration: 98073
loss: 1.0543849468231201,grad_norm: 0.9670896651272091, iteration: 98074
loss: 1.0083880424499512,grad_norm: 0.8829957998083462, iteration: 98075
loss: 1.0056867599487305,grad_norm: 0.8955512268467585, iteration: 98076
loss: 0.9634627103805542,grad_norm: 0.8685983691115584, iteration: 98077
loss: 1.005631446838379,grad_norm: 0.934229291148713, iteration: 98078
loss: 1.0116362571716309,grad_norm: 0.8833003192442814, iteration: 98079
loss: 1.0020763874053955,grad_norm: 0.9450218050130376, iteration: 98080
loss: 0.9997783899307251,grad_norm: 0.8237086977065071, iteration: 98081
loss: 0.9690691232681274,grad_norm: 0.8804438504046708, iteration: 98082
loss: 1.0106067657470703,grad_norm: 0.9999992684529453, iteration: 98083
loss: 1.0083528757095337,grad_norm: 0.8249165171294518, iteration: 98084
loss: 0.9932307004928589,grad_norm: 0.8769090812856084, iteration: 98085
loss: 0.9839329719543457,grad_norm: 0.8984071440704897, iteration: 98086
loss: 1.0236259698867798,grad_norm: 0.9999998775863504, iteration: 98087
loss: 0.9798425436019897,grad_norm: 0.9732205513792932, iteration: 98088
loss: 1.0232011079788208,grad_norm: 0.9074952003225419, iteration: 98089
loss: 1.0255461931228638,grad_norm: 0.8383636070939088, iteration: 98090
loss: 1.001230001449585,grad_norm: 0.9999991473142692, iteration: 98091
loss: 1.0197584629058838,grad_norm: 0.9999994796065069, iteration: 98092
loss: 0.9877749681472778,grad_norm: 0.8295100302627267, iteration: 98093
loss: 0.9780368208885193,grad_norm: 0.8966383468750032, iteration: 98094
loss: 1.003277063369751,grad_norm: 0.9999993425235909, iteration: 98095
loss: 1.0300782918930054,grad_norm: 0.9999991453746521, iteration: 98096
loss: 1.0090110301971436,grad_norm: 0.9451781517464157, iteration: 98097
loss: 1.0119636058807373,grad_norm: 0.7527644161837231, iteration: 98098
loss: 0.9893960356712341,grad_norm: 0.8695218400608776, iteration: 98099
loss: 0.9663294553756714,grad_norm: 0.7936559991906108, iteration: 98100
loss: 0.9906171560287476,grad_norm: 0.9569312414732184, iteration: 98101
loss: 0.9985718131065369,grad_norm: 0.9999992393180854, iteration: 98102
loss: 1.0116043090820312,grad_norm: 0.7922477974462024, iteration: 98103
loss: 0.9908831119537354,grad_norm: 0.8158545011828039, iteration: 98104
loss: 1.0161466598510742,grad_norm: 0.9999998800353426, iteration: 98105
loss: 0.9708415865898132,grad_norm: 0.93167980987965, iteration: 98106
loss: 1.0552676916122437,grad_norm: 0.9264785425907979, iteration: 98107
loss: 0.9537928104400635,grad_norm: 0.9747309337894952, iteration: 98108
loss: 1.0517714023590088,grad_norm: 0.9999998325770141, iteration: 98109
loss: 1.0009117126464844,grad_norm: 0.7970098018408356, iteration: 98110
loss: 0.9880305528640747,grad_norm: 0.980866790432737, iteration: 98111
loss: 1.0143779516220093,grad_norm: 0.8754463142206852, iteration: 98112
loss: 1.0227196216583252,grad_norm: 0.9999994797095141, iteration: 98113
loss: 1.0257328748703003,grad_norm: 0.8580476208216125, iteration: 98114
loss: 1.00959312915802,grad_norm: 0.8492284477696577, iteration: 98115
loss: 0.9607186913490295,grad_norm: 0.8220338632905947, iteration: 98116
loss: 0.9935883283615112,grad_norm: 0.8108957407024623, iteration: 98117
loss: 1.0137842893600464,grad_norm: 0.8073397638292463, iteration: 98118
loss: 0.9997896552085876,grad_norm: 0.9999990610537645, iteration: 98119
loss: 1.0143181085586548,grad_norm: 0.8740348144727477, iteration: 98120
loss: 1.0044262409210205,grad_norm: 0.8399845813814296, iteration: 98121
loss: 0.9926543235778809,grad_norm: 0.8232085095565022, iteration: 98122
loss: 1.0192177295684814,grad_norm: 0.9643699723881306, iteration: 98123
loss: 1.0026721954345703,grad_norm: 0.9811136651501899, iteration: 98124
loss: 0.9836061000823975,grad_norm: 0.9176559448318322, iteration: 98125
loss: 1.012111783027649,grad_norm: 0.9999998143868657, iteration: 98126
loss: 0.9714064598083496,grad_norm: 0.9999991095303558, iteration: 98127
loss: 0.9989840984344482,grad_norm: 0.7762372912450253, iteration: 98128
loss: 1.114798665046692,grad_norm: 0.9999995531310039, iteration: 98129
loss: 0.9892539381980896,grad_norm: 0.9999993260800601, iteration: 98130
loss: 1.0024797916412354,grad_norm: 0.9032401844448833, iteration: 98131
loss: 1.0571837425231934,grad_norm: 0.9999997965423211, iteration: 98132
loss: 1.0150525569915771,grad_norm: 0.9722160815781734, iteration: 98133
loss: 1.010854721069336,grad_norm: 0.8458658263657962, iteration: 98134
loss: 1.123778223991394,grad_norm: 0.9999999481416662, iteration: 98135
loss: 1.0192240476608276,grad_norm: 0.9139694141890253, iteration: 98136
loss: 1.0340368747711182,grad_norm: 0.9999991449339697, iteration: 98137
loss: 1.0078105926513672,grad_norm: 0.9848976307522938, iteration: 98138
loss: 0.9709032773971558,grad_norm: 0.8132337569762415, iteration: 98139
loss: 1.0545910596847534,grad_norm: 0.9999992416448923, iteration: 98140
loss: 0.9962297677993774,grad_norm: 0.8628234535571241, iteration: 98141
loss: 1.0227631330490112,grad_norm: 0.9999992136539819, iteration: 98142
loss: 0.9920659065246582,grad_norm: 0.8510580812567081, iteration: 98143
loss: 1.1983200311660767,grad_norm: 0.9999999703070043, iteration: 98144
loss: 0.9914962649345398,grad_norm: 0.9941176336563113, iteration: 98145
loss: 1.0382484197616577,grad_norm: 0.9999990913507, iteration: 98146
loss: 1.0237412452697754,grad_norm: 0.961173642781102, iteration: 98147
loss: 1.0263477563858032,grad_norm: 0.9999998667986033, iteration: 98148
loss: 0.9936872720718384,grad_norm: 0.9999996952254383, iteration: 98149
loss: 1.0230181217193604,grad_norm: 0.9999990804429039, iteration: 98150
loss: 1.0362974405288696,grad_norm: 0.9579785196845731, iteration: 98151
loss: 1.0071721076965332,grad_norm: 0.999999091318639, iteration: 98152
loss: 1.0372107028961182,grad_norm: 0.9999990371764869, iteration: 98153
loss: 1.0131126642227173,grad_norm: 0.9227188068522721, iteration: 98154
loss: 1.0841481685638428,grad_norm: 0.999999181711746, iteration: 98155
loss: 1.0207715034484863,grad_norm: 0.9386005379141467, iteration: 98156
loss: 0.9807218313217163,grad_norm: 0.8856731008877023, iteration: 98157
loss: 0.9980202317237854,grad_norm: 0.6681089577214375, iteration: 98158
loss: 1.0249658823013306,grad_norm: 0.8971678220810931, iteration: 98159
loss: 0.9878877997398376,grad_norm: 0.8066851877847582, iteration: 98160
loss: 1.006532073020935,grad_norm: 0.9999997678495878, iteration: 98161
loss: 1.0000025033950806,grad_norm: 0.9999990414100915, iteration: 98162
loss: 1.0151385068893433,grad_norm: 0.8676262121097964, iteration: 98163
loss: 1.020775318145752,grad_norm: 0.8794134256517752, iteration: 98164
loss: 1.0638939142227173,grad_norm: 0.9999995689238187, iteration: 98165
loss: 0.9979718923568726,grad_norm: 0.7910045841497584, iteration: 98166
loss: 1.0020695924758911,grad_norm: 0.9525983181964917, iteration: 98167
loss: 1.0045573711395264,grad_norm: 0.6999068365452766, iteration: 98168
loss: 0.9980900287628174,grad_norm: 0.9473666731763127, iteration: 98169
loss: 1.0069223642349243,grad_norm: 0.8998067813559302, iteration: 98170
loss: 0.9573863744735718,grad_norm: 0.8872387989293481, iteration: 98171
loss: 1.0204718112945557,grad_norm: 0.999999796908956, iteration: 98172
loss: 1.0329550504684448,grad_norm: 0.9929610914819498, iteration: 98173
loss: 1.0234678983688354,grad_norm: 0.891678510586232, iteration: 98174
loss: 1.0363794565200806,grad_norm: 0.9999996705277049, iteration: 98175
loss: 0.9681504368782043,grad_norm: 0.9999991615464011, iteration: 98176
loss: 1.0133447647094727,grad_norm: 0.9999990700135414, iteration: 98177
loss: 0.9724152684211731,grad_norm: 0.950964854532796, iteration: 98178
loss: 1.013856291770935,grad_norm: 0.9202036176826393, iteration: 98179
loss: 1.0103819370269775,grad_norm: 0.8588721498060536, iteration: 98180
loss: 0.9571444988250732,grad_norm: 0.8475373242745499, iteration: 98181
loss: 0.9813839197158813,grad_norm: 0.8553775484115086, iteration: 98182
loss: 1.0328822135925293,grad_norm: 0.9999992502601691, iteration: 98183
loss: 0.9866114854812622,grad_norm: 0.8703310717891729, iteration: 98184
loss: 1.1703194379806519,grad_norm: 0.9999998639978135, iteration: 98185
loss: 1.0142208337783813,grad_norm: 0.9999994043360364, iteration: 98186
loss: 1.0054882764816284,grad_norm: 0.9894313000309887, iteration: 98187
loss: 1.089153528213501,grad_norm: 0.9999992007415838, iteration: 98188
loss: 1.0031375885009766,grad_norm: 0.8584829778589299, iteration: 98189
loss: 0.9964874386787415,grad_norm: 0.9999991319653814, iteration: 98190
loss: 1.020835041999817,grad_norm: 0.9999990796037846, iteration: 98191
loss: 1.1667776107788086,grad_norm: 0.9999992557376384, iteration: 98192
loss: 1.021074652671814,grad_norm: 0.7466664366378315, iteration: 98193
loss: 0.9730198383331299,grad_norm: 0.8090398438942553, iteration: 98194
loss: 1.0534021854400635,grad_norm: 0.9999993840431233, iteration: 98195
loss: 0.9986143112182617,grad_norm: 0.815941042807347, iteration: 98196
loss: 0.9867652654647827,grad_norm: 0.9347532948811235, iteration: 98197
loss: 0.976950466632843,grad_norm: 0.8673444525035222, iteration: 98198
loss: 1.0118775367736816,grad_norm: 0.9999992578086281, iteration: 98199
loss: 1.0267469882965088,grad_norm: 0.8331233988079296, iteration: 98200
loss: 1.019309401512146,grad_norm: 0.9999998329476678, iteration: 98201
loss: 0.9845499992370605,grad_norm: 0.7692970412820163, iteration: 98202
loss: 1.0047972202301025,grad_norm: 0.7808377019792914, iteration: 98203
loss: 0.9849978685379028,grad_norm: 0.9266265265782846, iteration: 98204
loss: 1.022689700126648,grad_norm: 0.9561369857968948, iteration: 98205
loss: 1.0856744050979614,grad_norm: 0.9999991761183091, iteration: 98206
loss: 0.9728371500968933,grad_norm: 0.874105022402014, iteration: 98207
loss: 0.9973441362380981,grad_norm: 0.7988454091717163, iteration: 98208
loss: 0.9938731789588928,grad_norm: 0.9999992408517104, iteration: 98209
loss: 0.9932169914245605,grad_norm: 0.9243345749503734, iteration: 98210
loss: 1.0355265140533447,grad_norm: 0.9999991300877313, iteration: 98211
loss: 1.035897970199585,grad_norm: 0.9999994946327944, iteration: 98212
loss: 0.9872334599494934,grad_norm: 0.880996217419139, iteration: 98213
loss: 1.0014666318893433,grad_norm: 0.9641720101859735, iteration: 98214
loss: 1.0504734516143799,grad_norm: 0.9143674369828698, iteration: 98215
loss: 0.9567691683769226,grad_norm: 0.974847546109168, iteration: 98216
loss: 0.9997979402542114,grad_norm: 0.9999995771395686, iteration: 98217
loss: 1.0235430002212524,grad_norm: 0.8644684378074738, iteration: 98218
loss: 0.9675887823104858,grad_norm: 0.8829225021499809, iteration: 98219
loss: 1.0022298097610474,grad_norm: 0.8337013588648842, iteration: 98220
loss: 0.9962271451950073,grad_norm: 0.7803108735599453, iteration: 98221
loss: 0.9796923398971558,grad_norm: 0.8210044573860864, iteration: 98222
loss: 0.9921150803565979,grad_norm: 0.8471624452266628, iteration: 98223
loss: 1.014098048210144,grad_norm: 0.9999992023483977, iteration: 98224
loss: 0.9955918788909912,grad_norm: 0.8136188865025268, iteration: 98225
loss: 1.0009080171585083,grad_norm: 0.9999999060987277, iteration: 98226
loss: 0.9844407439231873,grad_norm: 0.7874296887804093, iteration: 98227
loss: 1.0413447618484497,grad_norm: 0.9999994783695801, iteration: 98228
loss: 1.0332633256912231,grad_norm: 0.9999993426866374, iteration: 98229
loss: 0.9867759346961975,grad_norm: 0.9999991415718601, iteration: 98230
loss: 1.1024906635284424,grad_norm: 0.9999995278540504, iteration: 98231
loss: 1.0188642740249634,grad_norm: 0.9678810998095445, iteration: 98232
loss: 1.0200766324996948,grad_norm: 0.9999990961863598, iteration: 98233
loss: 0.9929003119468689,grad_norm: 0.9539023531661188, iteration: 98234
loss: 0.9835566282272339,grad_norm: 0.8977025718095212, iteration: 98235
loss: 1.0010712146759033,grad_norm: 0.9999989970742879, iteration: 98236
loss: 1.030227541923523,grad_norm: 0.8453014391548168, iteration: 98237
loss: 0.9687309861183167,grad_norm: 0.9937326989022528, iteration: 98238
loss: 1.0281563997268677,grad_norm: 0.8886082499242907, iteration: 98239
loss: 0.9860758185386658,grad_norm: 0.9106009421370888, iteration: 98240
loss: 0.9798046350479126,grad_norm: 0.8656295403498949, iteration: 98241
loss: 0.972511351108551,grad_norm: 0.8608830052679978, iteration: 98242
loss: 0.9824418425559998,grad_norm: 0.8286965824159, iteration: 98243
loss: 1.046744704246521,grad_norm: 0.9999993726949687, iteration: 98244
loss: 0.9593085646629333,grad_norm: 0.8499870616431627, iteration: 98245
loss: 1.0372334718704224,grad_norm: 0.9195165243524774, iteration: 98246
loss: 1.0077930688858032,grad_norm: 0.8493984170360757, iteration: 98247
loss: 0.9793123006820679,grad_norm: 0.9488961403614209, iteration: 98248
loss: 1.0010091066360474,grad_norm: 1.0000000011467731, iteration: 98249
loss: 0.9962734580039978,grad_norm: 0.9999990532274903, iteration: 98250
loss: 1.011823058128357,grad_norm: 0.9274916540837633, iteration: 98251
loss: 1.0142816305160522,grad_norm: 0.9692261212414279, iteration: 98252
loss: 1.1121760606765747,grad_norm: 0.7607694576224368, iteration: 98253
loss: 1.0055103302001953,grad_norm: 0.919239311696616, iteration: 98254
loss: 1.0298962593078613,grad_norm: 0.8885592027997773, iteration: 98255
loss: 1.0041298866271973,grad_norm: 0.9438887412397869, iteration: 98256
loss: 1.0091551542282104,grad_norm: 0.9999990192282682, iteration: 98257
loss: 0.9753182530403137,grad_norm: 0.8877474939557021, iteration: 98258
loss: 1.0256808996200562,grad_norm: 0.9999990205403176, iteration: 98259
loss: 1.029983639717102,grad_norm: 0.7935752490141236, iteration: 98260
loss: 0.9691373109817505,grad_norm: 0.9640026921287398, iteration: 98261
loss: 1.0188385248184204,grad_norm: 0.8690333360542385, iteration: 98262
loss: 0.9861779808998108,grad_norm: 0.909341681378102, iteration: 98263
loss: 1.0484402179718018,grad_norm: 0.9272901369367658, iteration: 98264
loss: 1.0384933948516846,grad_norm: 0.9999991248255161, iteration: 98265
loss: 0.9964683055877686,grad_norm: 0.9313439663362049, iteration: 98266
loss: 0.9740577340126038,grad_norm: 0.9999992850638516, iteration: 98267
loss: 0.9882847666740417,grad_norm: 0.9651082957714286, iteration: 98268
loss: 1.0776307582855225,grad_norm: 0.7950711785421851, iteration: 98269
loss: 1.0393716096878052,grad_norm: 0.9999991688586684, iteration: 98270
loss: 1.0269206762313843,grad_norm: 0.9932214304820526, iteration: 98271
loss: 1.0461015701293945,grad_norm: 0.8752828802465649, iteration: 98272
loss: 1.008339762687683,grad_norm: 0.8835471073232102, iteration: 98273
loss: 1.0104795694351196,grad_norm: 0.9999998645497761, iteration: 98274
loss: 0.9924964308738708,grad_norm: 0.997448325988606, iteration: 98275
loss: 0.9820310473442078,grad_norm: 0.9999991690528693, iteration: 98276
loss: 1.0089139938354492,grad_norm: 0.9690323752867566, iteration: 98277
loss: 1.040099859237671,grad_norm: 0.9256998283975814, iteration: 98278
loss: 0.9720165133476257,grad_norm: 0.8769082326420192, iteration: 98279
loss: 1.0146867036819458,grad_norm: 0.9552290983799162, iteration: 98280
loss: 1.0020257234573364,grad_norm: 0.9913386405204632, iteration: 98281
loss: 0.9957314133644104,grad_norm: 0.9999991870434671, iteration: 98282
loss: 0.990882158279419,grad_norm: 0.9045769061229263, iteration: 98283
loss: 1.0156928300857544,grad_norm: 0.7377795789076138, iteration: 98284
loss: 1.0384007692337036,grad_norm: 0.9999991622719427, iteration: 98285
loss: 1.0238550901412964,grad_norm: 0.9999990591440887, iteration: 98286
loss: 1.0167365074157715,grad_norm: 0.9553146372676855, iteration: 98287
loss: 0.9763149619102478,grad_norm: 0.8973512857034243, iteration: 98288
loss: 0.9835295081138611,grad_norm: 0.9668542136283735, iteration: 98289
loss: 0.9934785962104797,grad_norm: 0.9123611228115217, iteration: 98290
loss: 1.0100681781768799,grad_norm: 0.77144317851924, iteration: 98291
loss: 1.0220756530761719,grad_norm: 0.9999991061253921, iteration: 98292
loss: 0.9929447174072266,grad_norm: 0.9499997752824316, iteration: 98293
loss: 1.0176583528518677,grad_norm: 0.7687899734680248, iteration: 98294
loss: 1.0136442184448242,grad_norm: 0.9999999105497734, iteration: 98295
loss: 0.9968650341033936,grad_norm: 0.7637756726999337, iteration: 98296
loss: 1.0242187976837158,grad_norm: 0.9136847529414893, iteration: 98297
loss: 1.012913465499878,grad_norm: 0.8913097709766521, iteration: 98298
loss: 0.9910777807235718,grad_norm: 0.82213392472272, iteration: 98299
loss: 0.9533177018165588,grad_norm: 0.9479633226579927, iteration: 98300
loss: 1.0033823251724243,grad_norm: 0.8197389025382599, iteration: 98301
loss: 0.9771042466163635,grad_norm: 0.7834472735504959, iteration: 98302
loss: 0.9810195565223694,grad_norm: 0.9999989917252216, iteration: 98303
loss: 1.0279852151870728,grad_norm: 0.9999997449961101, iteration: 98304
loss: 1.0049424171447754,grad_norm: 0.9216935743144651, iteration: 98305
loss: 1.0069358348846436,grad_norm: 0.8885453340205373, iteration: 98306
loss: 1.021873950958252,grad_norm: 0.9043387715679186, iteration: 98307
loss: 0.981414258480072,grad_norm: 0.8093160577569593, iteration: 98308
loss: 0.9886660575866699,grad_norm: 0.9830047926023529, iteration: 98309
loss: 0.9776942729949951,grad_norm: 0.9999992037581996, iteration: 98310
loss: 0.9582939147949219,grad_norm: 0.9999992086048601, iteration: 98311
loss: 1.004105806350708,grad_norm: 0.9999991777053446, iteration: 98312
loss: 1.0122349262237549,grad_norm: 0.9371832747193213, iteration: 98313
loss: 1.0055180788040161,grad_norm: 0.8096977839246293, iteration: 98314
loss: 0.9845978021621704,grad_norm: 0.8364261106618867, iteration: 98315
loss: 0.987652599811554,grad_norm: 0.8075459614062725, iteration: 98316
loss: 1.0044243335723877,grad_norm: 0.9071095945089842, iteration: 98317
loss: 1.0278335809707642,grad_norm: 0.989991991438747, iteration: 98318
loss: 1.0537601709365845,grad_norm: 0.9672820410029297, iteration: 98319
loss: 0.9894468784332275,grad_norm: 0.9069204912350395, iteration: 98320
loss: 0.9869657754898071,grad_norm: 0.9532713823056199, iteration: 98321
loss: 1.003604769706726,grad_norm: 0.8142218902347051, iteration: 98322
loss: 1.049063801765442,grad_norm: 0.9999991900754269, iteration: 98323
loss: 0.9991064667701721,grad_norm: 0.818480139348915, iteration: 98324
loss: 0.988820493221283,grad_norm: 0.95972607321959, iteration: 98325
loss: 0.9939050078392029,grad_norm: 0.9999990407940085, iteration: 98326
loss: 1.0562031269073486,grad_norm: 0.9999990919250681, iteration: 98327
loss: 0.9890811443328857,grad_norm: 0.9386249257821253, iteration: 98328
loss: 1.0270276069641113,grad_norm: 0.8135614241958373, iteration: 98329
loss: 0.9994552731513977,grad_norm: 0.9986860645775243, iteration: 98330
loss: 0.9775272011756897,grad_norm: 0.8344170512336128, iteration: 98331
loss: 0.9843755960464478,grad_norm: 0.9829086473619668, iteration: 98332
loss: 1.0285834074020386,grad_norm: 0.8489102216290527, iteration: 98333
loss: 1.021035075187683,grad_norm: 0.9208728073185901, iteration: 98334
loss: 1.009731650352478,grad_norm: 0.9999992061234915, iteration: 98335
loss: 0.9921432733535767,grad_norm: 0.7369850643728146, iteration: 98336
loss: 1.0346813201904297,grad_norm: 0.9999996707987241, iteration: 98337
loss: 0.9937703609466553,grad_norm: 0.9979726365160168, iteration: 98338
loss: 1.0296448469161987,grad_norm: 0.8524893586920678, iteration: 98339
loss: 1.0036323070526123,grad_norm: 0.8509107233915157, iteration: 98340
loss: 1.0663374662399292,grad_norm: 0.9999992322465107, iteration: 98341
loss: 1.1569287776947021,grad_norm: 0.9999999051251756, iteration: 98342
loss: 1.016043782234192,grad_norm: 0.7084318468625658, iteration: 98343
loss: 0.992253839969635,grad_norm: 0.9131796843488488, iteration: 98344
loss: 1.0080687999725342,grad_norm: 0.8074310765634979, iteration: 98345
loss: 0.9801214337348938,grad_norm: 0.8435844746985013, iteration: 98346
loss: 1.0569977760314941,grad_norm: 0.9999990398058619, iteration: 98347
loss: 0.991634726524353,grad_norm: 0.7919613221091389, iteration: 98348
loss: 1.0090173482894897,grad_norm: 0.9645666890749321, iteration: 98349
loss: 0.9763945937156677,grad_norm: 0.9999990936443706, iteration: 98350
loss: 1.0064946413040161,grad_norm: 0.8878321855510799, iteration: 98351
loss: 0.9924452304840088,grad_norm: 0.8478669243062172, iteration: 98352
loss: 0.9933273196220398,grad_norm: 0.8569207537326801, iteration: 98353
loss: 1.0614585876464844,grad_norm: 0.9951474018621043, iteration: 98354
loss: 1.0176327228546143,grad_norm: 0.9999994882128302, iteration: 98355
loss: 0.9954054355621338,grad_norm: 0.9999991930829322, iteration: 98356
loss: 0.9958807229995728,grad_norm: 0.9085837297222984, iteration: 98357
loss: 1.0286905765533447,grad_norm: 0.9999991121661266, iteration: 98358
loss: 0.9616203904151917,grad_norm: 0.8279826698344255, iteration: 98359
loss: 0.9842650890350342,grad_norm: 0.8977598340485282, iteration: 98360
loss: 1.0147454738616943,grad_norm: 0.8646233282461332, iteration: 98361
loss: 0.9938884973526001,grad_norm: 0.9999994624815922, iteration: 98362
loss: 1.094082236289978,grad_norm: 0.9999999854850334, iteration: 98363
loss: 0.961415708065033,grad_norm: 0.8711874441653296, iteration: 98364
loss: 0.9937154650688171,grad_norm: 0.9999997628096885, iteration: 98365
loss: 0.9877416491508484,grad_norm: 0.8052367927055344, iteration: 98366
loss: 1.0310614109039307,grad_norm: 0.999998991596457, iteration: 98367
loss: 1.0134999752044678,grad_norm: 0.9999993960658283, iteration: 98368
loss: 1.0039851665496826,grad_norm: 0.950627516390092, iteration: 98369
loss: 1.0131609439849854,grad_norm: 0.8749002669450168, iteration: 98370
loss: 0.9818710088729858,grad_norm: 0.8334914847582722, iteration: 98371
loss: 0.9918391704559326,grad_norm: 0.9290742089718628, iteration: 98372
loss: 1.0099880695343018,grad_norm: 0.82046459692656, iteration: 98373
loss: 0.9925607442855835,grad_norm: 0.9999990794864836, iteration: 98374
loss: 0.9894530773162842,grad_norm: 0.911173717673645, iteration: 98375
loss: 0.9891001582145691,grad_norm: 0.7490723729481317, iteration: 98376
loss: 0.9892016053199768,grad_norm: 0.999999096395052, iteration: 98377
loss: 1.0044785737991333,grad_norm: 0.9999990444093766, iteration: 98378
loss: 0.985497236251831,grad_norm: 0.950233593346595, iteration: 98379
loss: 1.028106927871704,grad_norm: 0.9436815462216867, iteration: 98380
loss: 1.0176175832748413,grad_norm: 0.8675964688812607, iteration: 98381
loss: 1.0279241800308228,grad_norm: 0.9999990052264089, iteration: 98382
loss: 0.9966943860054016,grad_norm: 0.8738740240271715, iteration: 98383
loss: 1.0048632621765137,grad_norm: 0.9999991895631066, iteration: 98384
loss: 1.0002086162567139,grad_norm: 0.999999101163386, iteration: 98385
loss: 1.0027207136154175,grad_norm: 0.9035378639281392, iteration: 98386
loss: 0.9716288447380066,grad_norm: 0.9121461894086875, iteration: 98387
loss: 0.9970014095306396,grad_norm: 0.9999992464014562, iteration: 98388
loss: 0.9860432744026184,grad_norm: 0.8096415999000403, iteration: 98389
loss: 1.0697145462036133,grad_norm: 0.999998969678968, iteration: 98390
loss: 0.9743390679359436,grad_norm: 0.8353115197454074, iteration: 98391
loss: 1.004697561264038,grad_norm: 0.9999989861366054, iteration: 98392
loss: 1.029399037361145,grad_norm: 0.811172923693205, iteration: 98393
loss: 1.0285571813583374,grad_norm: 0.8554287137023843, iteration: 98394
loss: 1.0215396881103516,grad_norm: 0.8224092047791486, iteration: 98395
loss: 1.032665491104126,grad_norm: 0.9999992169256628, iteration: 98396
loss: 0.9649549722671509,grad_norm: 0.8359906227504142, iteration: 98397
loss: 1.0284017324447632,grad_norm: 0.8130489882302394, iteration: 98398
loss: 1.034346342086792,grad_norm: 0.8676544410678448, iteration: 98399
loss: 1.0848807096481323,grad_norm: 0.9999994035265682, iteration: 98400
loss: 0.9801605939865112,grad_norm: 0.9425972555247522, iteration: 98401
loss: 0.9965413808822632,grad_norm: 0.7703631192776818, iteration: 98402
loss: 0.9940455555915833,grad_norm: 0.8019700472122342, iteration: 98403
loss: 0.9977238774299622,grad_norm: 0.7505027448338712, iteration: 98404
loss: 0.9896766543388367,grad_norm: 0.9287870886313992, iteration: 98405
loss: 1.016353964805603,grad_norm: 0.999999324255656, iteration: 98406
loss: 0.9776645302772522,grad_norm: 0.9999990853753769, iteration: 98407
loss: 1.0124903917312622,grad_norm: 0.8620123469308758, iteration: 98408
loss: 0.9842182397842407,grad_norm: 0.9052805290288685, iteration: 98409
loss: 1.0055204629898071,grad_norm: 0.8575760678216241, iteration: 98410
loss: 0.9819433689117432,grad_norm: 0.9999990575176262, iteration: 98411
loss: 0.9840822219848633,grad_norm: 0.976224737415795, iteration: 98412
loss: 0.964442253112793,grad_norm: 0.861233409065802, iteration: 98413
loss: 1.021298885345459,grad_norm: 0.757222606772471, iteration: 98414
loss: 1.042338490486145,grad_norm: 0.9999998434994924, iteration: 98415
loss: 1.0343892574310303,grad_norm: 0.9999992716497227, iteration: 98416
loss: 0.9993852376937866,grad_norm: 0.9770033986841975, iteration: 98417
loss: 1.0039019584655762,grad_norm: 0.9999990907578493, iteration: 98418
loss: 1.019586205482483,grad_norm: 0.9909971535746092, iteration: 98419
loss: 1.0009711980819702,grad_norm: 0.8665610220154457, iteration: 98420
loss: 1.0177834033966064,grad_norm: 0.7832508782820718, iteration: 98421
loss: 1.0252407789230347,grad_norm: 0.9999996785788254, iteration: 98422
loss: 1.0290459394454956,grad_norm: 0.8791826302274551, iteration: 98423
loss: 1.0375181436538696,grad_norm: 0.7916068912465345, iteration: 98424
loss: 1.0203388929367065,grad_norm: 0.9999999768213297, iteration: 98425
loss: 0.9754446148872375,grad_norm: 0.9467052895080695, iteration: 98426
loss: 1.049605131149292,grad_norm: 0.9999991283693289, iteration: 98427
loss: 0.9924232959747314,grad_norm: 0.9999992157133977, iteration: 98428
loss: 0.989184558391571,grad_norm: 0.9999993016965519, iteration: 98429
loss: 1.024131417274475,grad_norm: 0.8960669360198875, iteration: 98430
loss: 1.0193792581558228,grad_norm: 0.9999991023603818, iteration: 98431
loss: 1.0223138332366943,grad_norm: 0.8978784020222164, iteration: 98432
loss: 1.023256540298462,grad_norm: 0.9999997580538046, iteration: 98433
loss: 1.0131510496139526,grad_norm: 0.9999990936823265, iteration: 98434
loss: 1.0192484855651855,grad_norm: 0.9999991684020826, iteration: 98435
loss: 1.0058846473693848,grad_norm: 0.8642141978667212, iteration: 98436
loss: 1.0463258028030396,grad_norm: 0.9157002808645903, iteration: 98437
loss: 0.9859042763710022,grad_norm: 0.9917331675899096, iteration: 98438
loss: 0.9925051927566528,grad_norm: 0.9999991769325851, iteration: 98439
loss: 1.0303651094436646,grad_norm: 0.9999994703590988, iteration: 98440
loss: 1.009208083152771,grad_norm: 0.999999388079333, iteration: 98441
loss: 1.0053905248641968,grad_norm: 0.8406375690056052, iteration: 98442
loss: 1.030032753944397,grad_norm: 0.8878251528411237, iteration: 98443
loss: 1.0043885707855225,grad_norm: 0.8654747462539437, iteration: 98444
loss: 1.0527600049972534,grad_norm: 0.79546135609184, iteration: 98445
loss: 0.9918130040168762,grad_norm: 0.9999996394286791, iteration: 98446
loss: 1.0113681554794312,grad_norm: 0.9999991368286495, iteration: 98447
loss: 1.0486418008804321,grad_norm: 0.9999993554991331, iteration: 98448
loss: 0.9932243824005127,grad_norm: 0.8860617431917867, iteration: 98449
loss: 1.0129677057266235,grad_norm: 1.00000004176006, iteration: 98450
loss: 0.9260087609291077,grad_norm: 0.9006224082433055, iteration: 98451
loss: 0.9902561902999878,grad_norm: 0.9999996212872798, iteration: 98452
loss: 1.027180552482605,grad_norm: 0.9645090579524789, iteration: 98453
loss: 1.008304476737976,grad_norm: 0.9417325734592197, iteration: 98454
loss: 1.0241326093673706,grad_norm: 0.8641766436453181, iteration: 98455
loss: 0.9808470606803894,grad_norm: 0.9999993234021638, iteration: 98456
loss: 0.9791625738143921,grad_norm: 0.9999993031381362, iteration: 98457
loss: 1.035378336906433,grad_norm: 0.8813038017312639, iteration: 98458
loss: 1.0296697616577148,grad_norm: 0.8321880903327891, iteration: 98459
loss: 1.0468536615371704,grad_norm: 0.9999992109654876, iteration: 98460
loss: 1.001449465751648,grad_norm: 0.8437103077044281, iteration: 98461
loss: 0.9819672703742981,grad_norm: 0.9999991089534743, iteration: 98462
loss: 1.0259548425674438,grad_norm: 0.8325497688990633, iteration: 98463
loss: 0.9988429546356201,grad_norm: 0.9999990843625628, iteration: 98464
loss: 0.9933581948280334,grad_norm: 0.9999993446409262, iteration: 98465
loss: 1.0001171827316284,grad_norm: 0.9245144263011991, iteration: 98466
loss: 0.9914165139198303,grad_norm: 0.9595450542100242, iteration: 98467
loss: 0.9705490469932556,grad_norm: 0.8811309627172237, iteration: 98468
loss: 0.9748339653015137,grad_norm: 0.999999034584174, iteration: 98469
loss: 1.0005549192428589,grad_norm: 0.9118052010680081, iteration: 98470
loss: 0.937682569026947,grad_norm: 0.9297587832675, iteration: 98471
loss: 1.0205177068710327,grad_norm: 0.7918561990619973, iteration: 98472
loss: 1.0616185665130615,grad_norm: 0.9999994626654133, iteration: 98473
loss: 1.0317686796188354,grad_norm: 0.9036394832082562, iteration: 98474
loss: 0.9701413512229919,grad_norm: 0.8973910118714173, iteration: 98475
loss: 0.9838219285011292,grad_norm: 0.9999992349298439, iteration: 98476
loss: 1.0036933422088623,grad_norm: 0.9999992234704634, iteration: 98477
loss: 1.020122766494751,grad_norm: 0.9363544014841615, iteration: 98478
loss: 1.10763680934906,grad_norm: 0.9999995796755785, iteration: 98479
loss: 1.0080931186676025,grad_norm: 0.7624242580521768, iteration: 98480
loss: 0.9950366616249084,grad_norm: 0.8197881295285451, iteration: 98481
loss: 1.0786958932876587,grad_norm: 0.99999981513598, iteration: 98482
loss: 1.0671617984771729,grad_norm: 0.9999989569177617, iteration: 98483
loss: 0.9980476498603821,grad_norm: 0.888891636035577, iteration: 98484
loss: 0.9593383073806763,grad_norm: 0.8481628878598771, iteration: 98485
loss: 1.0370432138442993,grad_norm: 0.8550754419404532, iteration: 98486
loss: 0.9654366374015808,grad_norm: 0.999999138928847, iteration: 98487
loss: 0.9997507929801941,grad_norm: 0.9188245282658862, iteration: 98488
loss: 0.9938250184059143,grad_norm: 0.8990714788995694, iteration: 98489
loss: 1.0215340852737427,grad_norm: 0.9880152028241639, iteration: 98490
loss: 0.9698227643966675,grad_norm: 0.9001174645408666, iteration: 98491
loss: 0.9728489518165588,grad_norm: 0.7525460554544988, iteration: 98492
loss: 1.0263328552246094,grad_norm: 0.9999991673807027, iteration: 98493
loss: 0.9740627408027649,grad_norm: 0.9999991161982156, iteration: 98494
loss: 1.007557988166809,grad_norm: 0.9258018003251831, iteration: 98495
loss: 0.9698569178581238,grad_norm: 0.8936986869699919, iteration: 98496
loss: 1.0078181028366089,grad_norm: 0.9999991589519821, iteration: 98497
loss: 1.0343081951141357,grad_norm: 0.9999991020599669, iteration: 98498
loss: 1.0425430536270142,grad_norm: 0.9015930730263446, iteration: 98499
loss: 0.9893435835838318,grad_norm: 0.7101876452538994, iteration: 98500
loss: 1.021560788154602,grad_norm: 0.9999993442668589, iteration: 98501
loss: 0.9995126724243164,grad_norm: 0.8210326124705234, iteration: 98502
loss: 0.9936482310295105,grad_norm: 0.8694674178039496, iteration: 98503
loss: 1.0181139707565308,grad_norm: 0.7752000952887207, iteration: 98504
loss: 0.9607762098312378,grad_norm: 0.9999991659688334, iteration: 98505
loss: 1.0018616914749146,grad_norm: 0.909150058157298, iteration: 98506
loss: 0.9805700182914734,grad_norm: 0.965850368223565, iteration: 98507
loss: 1.0063631534576416,grad_norm: 0.9999992832628548, iteration: 98508
loss: 0.9754982590675354,grad_norm: 0.9999997223213898, iteration: 98509
loss: 0.9993628263473511,grad_norm: 0.7982586519890111, iteration: 98510
loss: 1.0959649085998535,grad_norm: 0.9475751721238307, iteration: 98511
loss: 0.9791244268417358,grad_norm: 0.8577110533247965, iteration: 98512
loss: 0.9826151728630066,grad_norm: 0.9038100681572523, iteration: 98513
loss: 0.9729437232017517,grad_norm: 0.9999992340672258, iteration: 98514
loss: 0.9866728186607361,grad_norm: 0.8423361812131324, iteration: 98515
loss: 1.0486618280410767,grad_norm: 0.9999992492025134, iteration: 98516
loss: 0.9939820766448975,grad_norm: 0.9086399719251065, iteration: 98517
loss: 0.9728190302848816,grad_norm: 0.8984751141125052, iteration: 98518
loss: 1.0004000663757324,grad_norm: 0.9999999242978596, iteration: 98519
loss: 1.0038260221481323,grad_norm: 0.894209487161185, iteration: 98520
loss: 1.0534374713897705,grad_norm: 0.9999992055465307, iteration: 98521
loss: 1.0725793838500977,grad_norm: 0.894616712189916, iteration: 98522
loss: 1.0124818086624146,grad_norm: 0.8830234385860068, iteration: 98523
loss: 1.0127671957015991,grad_norm: 0.9557474527752259, iteration: 98524
loss: 1.041730284690857,grad_norm: 0.9999992909584356, iteration: 98525
loss: 0.9820353984832764,grad_norm: 0.9999993366044919, iteration: 98526
loss: 0.9882491230964661,grad_norm: 0.8014156969053445, iteration: 98527
loss: 0.9999719858169556,grad_norm: 0.8988977384533949, iteration: 98528
loss: 1.0081202983856201,grad_norm: 0.9862987646851656, iteration: 98529
loss: 0.9922160506248474,grad_norm: 0.9338575202835184, iteration: 98530
loss: 1.0595934391021729,grad_norm: 0.9999991708158882, iteration: 98531
loss: 1.0247551202774048,grad_norm: 0.7727441031651994, iteration: 98532
loss: 0.9963139295578003,grad_norm: 0.798327278326448, iteration: 98533
loss: 1.0115277767181396,grad_norm: 0.9373400210738183, iteration: 98534
loss: 1.0313208103179932,grad_norm: 0.99999978436252, iteration: 98535
loss: 1.0064703226089478,grad_norm: 0.853973362426564, iteration: 98536
loss: 1.0363770723342896,grad_norm: 0.9665611868005228, iteration: 98537
loss: 1.0868394374847412,grad_norm: 0.999999102025839, iteration: 98538
loss: 1.018692135810852,grad_norm: 0.9999990820436241, iteration: 98539
loss: 1.0020133256912231,grad_norm: 0.9999991754922998, iteration: 98540
loss: 0.9872236847877502,grad_norm: 0.7981765128602926, iteration: 98541
loss: 1.0297915935516357,grad_norm: 0.883404179781019, iteration: 98542
loss: 1.013707160949707,grad_norm: 0.8855316138312611, iteration: 98543
loss: 1.06689453125,grad_norm: 0.9999991540655713, iteration: 98544
loss: 1.0018843412399292,grad_norm: 0.8734266128486545, iteration: 98545
loss: 0.9907506108283997,grad_norm: 0.8183794680632593, iteration: 98546
loss: 1.0267926454544067,grad_norm: 0.9999990890039101, iteration: 98547
loss: 1.1289751529693604,grad_norm: 0.9999996146850866, iteration: 98548
loss: 1.009812593460083,grad_norm: 0.7836010852519709, iteration: 98549
loss: 0.98875892162323,grad_norm: 0.7659120993595583, iteration: 98550
loss: 0.9877882599830627,grad_norm: 0.9999990926493968, iteration: 98551
loss: 0.9925511479377747,grad_norm: 0.9949168745499561, iteration: 98552
loss: 1.0428063869476318,grad_norm: 0.9999989580148684, iteration: 98553
loss: 1.0935100317001343,grad_norm: 0.9726338557744507, iteration: 98554
loss: 1.0086610317230225,grad_norm: 0.7965844046052583, iteration: 98555
loss: 1.1658105850219727,grad_norm: 0.9999999220710813, iteration: 98556
loss: 1.0598100423812866,grad_norm: 0.9517564192182263, iteration: 98557
loss: 1.0079585313796997,grad_norm: 0.9610165634588688, iteration: 98558
loss: 1.0004853010177612,grad_norm: 0.9714607997253937, iteration: 98559
loss: 0.9691385626792908,grad_norm: 0.9999991008588395, iteration: 98560
loss: 1.0148483514785767,grad_norm: 0.9744698987007518, iteration: 98561
loss: 0.9651645421981812,grad_norm: 0.8083262276818581, iteration: 98562
loss: 0.9983681440353394,grad_norm: 0.8158111362032223, iteration: 98563
loss: 1.0240442752838135,grad_norm: 0.9999992136339199, iteration: 98564
loss: 1.0254966020584106,grad_norm: 0.9999993716530323, iteration: 98565
loss: 0.9840877652168274,grad_norm: 0.9042253772502831, iteration: 98566
loss: 1.0428874492645264,grad_norm: 0.9973167815173741, iteration: 98567
loss: 1.0138893127441406,grad_norm: 0.7798918562725157, iteration: 98568
loss: 1.0729997158050537,grad_norm: 0.9999993466620715, iteration: 98569
loss: 0.9957131743431091,grad_norm: 0.9999992628087893, iteration: 98570
loss: 0.9795784950256348,grad_norm: 0.9109043088266615, iteration: 98571
loss: 1.0571508407592773,grad_norm: 0.9999996065564352, iteration: 98572
loss: 1.009342074394226,grad_norm: 0.7251791052700676, iteration: 98573
loss: 1.0042155981063843,grad_norm: 0.9999992753819578, iteration: 98574
loss: 0.9588648676872253,grad_norm: 0.8368484120915424, iteration: 98575
loss: 1.0278196334838867,grad_norm: 0.9999992552196918, iteration: 98576
loss: 1.0440222024917603,grad_norm: 0.9999990898628006, iteration: 98577
loss: 0.9917234778404236,grad_norm: 0.868119960169168, iteration: 98578
loss: 1.0249677896499634,grad_norm: 0.9999991006325902, iteration: 98579
loss: 1.027755856513977,grad_norm: 0.8278077281738431, iteration: 98580
loss: 0.9555706977844238,grad_norm: 0.8121493482704967, iteration: 98581
loss: 1.0563549995422363,grad_norm: 0.9474639783116591, iteration: 98582
loss: 0.9836553335189819,grad_norm: 0.9999991959121874, iteration: 98583
loss: 1.0676378011703491,grad_norm: 0.9999996755509881, iteration: 98584
loss: 1.0137161016464233,grad_norm: 0.9999992410938989, iteration: 98585
loss: 0.977981448173523,grad_norm: 0.9999995814783929, iteration: 98586
loss: 1.0816689729690552,grad_norm: 0.9999994854329715, iteration: 98587
loss: 0.9780644774436951,grad_norm: 0.9097850130551277, iteration: 98588
loss: 0.9830682277679443,grad_norm: 0.9007307481238133, iteration: 98589
loss: 1.0074478387832642,grad_norm: 0.8178166382523859, iteration: 98590
loss: 1.0135575532913208,grad_norm: 0.9999995094010398, iteration: 98591
loss: 1.0804964303970337,grad_norm: 0.9999993029608721, iteration: 98592
loss: 1.0098696947097778,grad_norm: 0.8645939974677613, iteration: 98593
loss: 1.0424479246139526,grad_norm: 0.9999991047920997, iteration: 98594
loss: 1.0682674646377563,grad_norm: 0.999999005793151, iteration: 98595
loss: 1.0151832103729248,grad_norm: 0.8791632959879739, iteration: 98596
loss: 0.993613600730896,grad_norm: 0.8715658958723129, iteration: 98597
loss: 1.001293659210205,grad_norm: 0.9035512016988053, iteration: 98598
loss: 1.0150047540664673,grad_norm: 0.9999990455164837, iteration: 98599
loss: 0.970319390296936,grad_norm: 0.9428822856343145, iteration: 98600
loss: 1.0379494428634644,grad_norm: 0.9999991350793176, iteration: 98601
loss: 0.999212920665741,grad_norm: 0.8580053702862671, iteration: 98602
loss: 1.004478096961975,grad_norm: 0.8889793363547887, iteration: 98603
loss: 1.0126302242279053,grad_norm: 0.9999993264873559, iteration: 98604
loss: 1.0156084299087524,grad_norm: 0.9354989611959058, iteration: 98605
loss: 0.9750866293907166,grad_norm: 0.8558750633279976, iteration: 98606
loss: 1.037943720817566,grad_norm: 0.999999376824549, iteration: 98607
loss: 1.0128636360168457,grad_norm: 0.7340448589180741, iteration: 98608
loss: 0.9917079210281372,grad_norm: 0.9999991147149226, iteration: 98609
loss: 1.0080304145812988,grad_norm: 0.9115335461306507, iteration: 98610
loss: 0.9982120990753174,grad_norm: 0.8175323699597299, iteration: 98611
loss: 0.9946509003639221,grad_norm: 0.8064315695926791, iteration: 98612
loss: 1.0146698951721191,grad_norm: 0.8239000857165175, iteration: 98613
loss: 1.0627036094665527,grad_norm: 0.848191786068734, iteration: 98614
loss: 1.0320736169815063,grad_norm: 0.9999993820356686, iteration: 98615
loss: 1.0014535188674927,grad_norm: 0.9694614440515683, iteration: 98616
loss: 1.0092709064483643,grad_norm: 0.9510554250688986, iteration: 98617
loss: 0.9941865801811218,grad_norm: 0.8557871329799657, iteration: 98618
loss: 1.0457651615142822,grad_norm: 0.9385389040154056, iteration: 98619
loss: 0.9781965017318726,grad_norm: 0.8973806958058645, iteration: 98620
loss: 1.0286846160888672,grad_norm: 0.9999995057874064, iteration: 98621
loss: 1.0071320533752441,grad_norm: 0.9642991686923951, iteration: 98622
loss: 1.0080116987228394,grad_norm: 0.8824855549227861, iteration: 98623
loss: 1.0060665607452393,grad_norm: 0.8201408004387832, iteration: 98624
loss: 1.0120552778244019,grad_norm: 0.8176887433707802, iteration: 98625
loss: 1.1129347085952759,grad_norm: 0.9999992812256137, iteration: 98626
loss: 1.057649850845337,grad_norm: 0.9176530814611501, iteration: 98627
loss: 0.9654200077056885,grad_norm: 0.8741750574302758, iteration: 98628
loss: 1.0175925493240356,grad_norm: 0.9257867239874955, iteration: 98629
loss: 0.9741815328598022,grad_norm: 0.8608012182340324, iteration: 98630
loss: 0.960136890411377,grad_norm: 0.844782474385709, iteration: 98631
loss: 0.9958842992782593,grad_norm: 0.9624794803816693, iteration: 98632
loss: 0.9889240264892578,grad_norm: 0.9488031128936107, iteration: 98633
loss: 0.9900628924369812,grad_norm: 0.9999991560399542, iteration: 98634
loss: 1.0067477226257324,grad_norm: 0.7365183097872297, iteration: 98635
loss: 1.0081806182861328,grad_norm: 0.999999243741436, iteration: 98636
loss: 0.9959860444068909,grad_norm: 0.999999892968774, iteration: 98637
loss: 0.9767923355102539,grad_norm: 0.9324516458322112, iteration: 98638
loss: 1.0171750783920288,grad_norm: 0.9467168413446245, iteration: 98639
loss: 1.0088610649108887,grad_norm: 0.8308537055698417, iteration: 98640
loss: 1.0112086534500122,grad_norm: 0.9429601821442483, iteration: 98641
loss: 1.0523724555969238,grad_norm: 0.9981712213542858, iteration: 98642
loss: 1.0375016927719116,grad_norm: 0.999999276685364, iteration: 98643
loss: 1.051687479019165,grad_norm: 0.9999991815562349, iteration: 98644
loss: 1.0478109121322632,grad_norm: 0.9706823560234321, iteration: 98645
loss: 1.0022804737091064,grad_norm: 0.9999992833333635, iteration: 98646
loss: 0.9876549243927002,grad_norm: 0.9999999267744704, iteration: 98647
loss: 1.050825595855713,grad_norm: 0.9999994400187306, iteration: 98648
loss: 1.083539605140686,grad_norm: 0.9999994919743783, iteration: 98649
loss: 1.0433448553085327,grad_norm: 0.9759298496877147, iteration: 98650
loss: 1.008863925933838,grad_norm: 0.7393065755364399, iteration: 98651
loss: 1.006382942199707,grad_norm: 0.8906755077720022, iteration: 98652
loss: 1.042272925376892,grad_norm: 0.9339654784076231, iteration: 98653
loss: 1.0179314613342285,grad_norm: 0.9066646130411015, iteration: 98654
loss: 0.9725210070610046,grad_norm: 0.8816556761857435, iteration: 98655
loss: 1.1072404384613037,grad_norm: 0.9999998869918632, iteration: 98656
loss: 1.0089348554611206,grad_norm: 0.9296901917969141, iteration: 98657
loss: 0.9684484004974365,grad_norm: 0.9999990617902821, iteration: 98658
loss: 1.0428094863891602,grad_norm: 0.7933692988281149, iteration: 98659
loss: 1.0412784814834595,grad_norm: 0.9137677919248971, iteration: 98660
loss: 1.051400899887085,grad_norm: 0.9999994042113178, iteration: 98661
loss: 1.0393671989440918,grad_norm: 0.9999997120878832, iteration: 98662
loss: 1.032166600227356,grad_norm: 0.9518074903134012, iteration: 98663
loss: 0.9933353662490845,grad_norm: 0.9999990676517387, iteration: 98664
loss: 1.017816185951233,grad_norm: 0.9381727783594765, iteration: 98665
loss: 0.9686545729637146,grad_norm: 0.9538057844248624, iteration: 98666
loss: 1.0121285915374756,grad_norm: 0.9999994990658814, iteration: 98667
loss: 0.9888654351234436,grad_norm: 0.969028382662313, iteration: 98668
loss: 1.0226960182189941,grad_norm: 0.959505939611327, iteration: 98669
loss: 1.0161793231964111,grad_norm: 0.9999990586016637, iteration: 98670
loss: 0.9697510004043579,grad_norm: 0.8776424948509127, iteration: 98671
loss: 0.9734465479850769,grad_norm: 0.9536073487756686, iteration: 98672
loss: 0.9410542249679565,grad_norm: 0.9337631199547007, iteration: 98673
loss: 0.9825219511985779,grad_norm: 0.7561023674997847, iteration: 98674
loss: 1.0224676132202148,grad_norm: 0.8769557671720754, iteration: 98675
loss: 1.0247139930725098,grad_norm: 0.999999245218668, iteration: 98676
loss: 0.9605051279067993,grad_norm: 0.9999990833443386, iteration: 98677
loss: 0.9660119414329529,grad_norm: 0.9764679468404398, iteration: 98678
loss: 1.028853416442871,grad_norm: 0.9166823496673314, iteration: 98679
loss: 0.9774906039237976,grad_norm: 0.8547542707172475, iteration: 98680
loss: 1.0523611307144165,grad_norm: 0.9999991807241572, iteration: 98681
loss: 1.013117790222168,grad_norm: 0.7700934071804945, iteration: 98682
loss: 1.061747431755066,grad_norm: 0.9999992535600147, iteration: 98683
loss: 0.9960220456123352,grad_norm: 0.8009969583030734, iteration: 98684
loss: 1.0643503665924072,grad_norm: 0.9999998432856617, iteration: 98685
loss: 1.015777587890625,grad_norm: 0.9999990705152227, iteration: 98686
loss: 1.1270267963409424,grad_norm: 0.9999993456265653, iteration: 98687
loss: 0.9917169213294983,grad_norm: 0.9999989166482453, iteration: 98688
loss: 0.989001452922821,grad_norm: 0.9999991320211319, iteration: 98689
loss: 0.9858240485191345,grad_norm: 0.929790193648549, iteration: 98690
loss: 1.015757441520691,grad_norm: 0.8584981315197232, iteration: 98691
loss: 0.9926579594612122,grad_norm: 0.7482058362474735, iteration: 98692
loss: 0.9986802935600281,grad_norm: 0.8052051371145541, iteration: 98693
loss: 1.0303553342819214,grad_norm: 0.999999130551269, iteration: 98694
loss: 1.0183740854263306,grad_norm: 0.999998996124621, iteration: 98695
loss: 1.0930356979370117,grad_norm: 0.8928913328982141, iteration: 98696
loss: 0.963158905506134,grad_norm: 0.792265150764824, iteration: 98697
loss: 1.0693752765655518,grad_norm: 0.9999993917654662, iteration: 98698
loss: 0.9865720272064209,grad_norm: 0.9999989297698759, iteration: 98699
loss: 0.9925899505615234,grad_norm: 0.9719197478866167, iteration: 98700
loss: 1.0806876420974731,grad_norm: 0.8353934034705369, iteration: 98701
loss: 1.0141425132751465,grad_norm: 0.9738429878254383, iteration: 98702
loss: 0.9997671842575073,grad_norm: 0.933024710191034, iteration: 98703
loss: 1.0076985359191895,grad_norm: 0.9277661784706633, iteration: 98704
loss: 0.9932653903961182,grad_norm: 0.8084578808421894, iteration: 98705
loss: 1.0124101638793945,grad_norm: 0.9999991653038867, iteration: 98706
loss: 1.024218201637268,grad_norm: 0.9637235266987106, iteration: 98707
loss: 1.0002436637878418,grad_norm: 0.9085417348005921, iteration: 98708
loss: 0.9764756560325623,grad_norm: 0.8163364310529415, iteration: 98709
loss: 0.9819092750549316,grad_norm: 0.8372135571679812, iteration: 98710
loss: 1.003929615020752,grad_norm: 0.9999991492659033, iteration: 98711
loss: 1.0257549285888672,grad_norm: 0.999999541113357, iteration: 98712
loss: 1.04685640335083,grad_norm: 0.999999520527449, iteration: 98713
loss: 1.132122278213501,grad_norm: 0.9999997589391043, iteration: 98714
loss: 1.0123399496078491,grad_norm: 0.9711656351552506, iteration: 98715
loss: 0.9852997660636902,grad_norm: 0.9066617000788487, iteration: 98716
loss: 1.0236947536468506,grad_norm: 0.9999998759538721, iteration: 98717
loss: 1.030968427658081,grad_norm: 0.9999991196240952, iteration: 98718
loss: 1.009377360343933,grad_norm: 0.9033367691899424, iteration: 98719
loss: 0.9793091416358948,grad_norm: 0.7892937338573389, iteration: 98720
loss: 1.003925085067749,grad_norm: 0.9999992900303936, iteration: 98721
loss: 1.0152344703674316,grad_norm: 0.7618042778725084, iteration: 98722
loss: 1.0051302909851074,grad_norm: 0.8692957959527954, iteration: 98723
loss: 1.020053744316101,grad_norm: 0.9999996433636568, iteration: 98724
loss: 0.9499289989471436,grad_norm: 0.9999991278774801, iteration: 98725
loss: 0.9994595646858215,grad_norm: 0.999999192194254, iteration: 98726
loss: 1.0136253833770752,grad_norm: 0.7493629446793764, iteration: 98727
loss: 0.9911130666732788,grad_norm: 0.9999991538534573, iteration: 98728
loss: 1.0111476182937622,grad_norm: 0.9819564040217634, iteration: 98729
loss: 0.9766053557395935,grad_norm: 0.9999990589313964, iteration: 98730
loss: 0.9786672592163086,grad_norm: 0.9081496689431686, iteration: 98731
loss: 0.9973568320274353,grad_norm: 0.9285159517645277, iteration: 98732
loss: 0.9859912395477295,grad_norm: 0.9743811535167191, iteration: 98733
loss: 1.0611978769302368,grad_norm: 0.78148818492184, iteration: 98734
loss: 0.9826985597610474,grad_norm: 0.7516318863444048, iteration: 98735
loss: 1.0450598001480103,grad_norm: 0.9999990921343004, iteration: 98736
loss: 1.000427484512329,grad_norm: 0.9999994362645989, iteration: 98737
loss: 0.9803676009178162,grad_norm: 0.7787490662705532, iteration: 98738
loss: 1.0229765176773071,grad_norm: 0.9642324750715769, iteration: 98739
loss: 1.0226601362228394,grad_norm: 0.8264761935657827, iteration: 98740
loss: 0.9989156723022461,grad_norm: 0.9133120206839838, iteration: 98741
loss: 1.109593152999878,grad_norm: 0.9999998327535254, iteration: 98742
loss: 0.9732675552368164,grad_norm: 0.842347567606588, iteration: 98743
loss: 0.9857555031776428,grad_norm: 0.8653528310283256, iteration: 98744
loss: 1.0041992664337158,grad_norm: 0.9584766051043555, iteration: 98745
loss: 1.0909254550933838,grad_norm: 0.9999993371240977, iteration: 98746
loss: 0.9785860180854797,grad_norm: 0.9999991770497837, iteration: 98747
loss: 0.9906747937202454,grad_norm: 0.850169960384945, iteration: 98748
loss: 0.9952704310417175,grad_norm: 0.9999998210171348, iteration: 98749
loss: 1.0458978414535522,grad_norm: 0.9999996502327172, iteration: 98750
loss: 0.9764660596847534,grad_norm: 0.9999990905844659, iteration: 98751
loss: 1.0256824493408203,grad_norm: 0.8419730948929022, iteration: 98752
loss: 1.0164060592651367,grad_norm: 0.7799847840144087, iteration: 98753
loss: 1.0560499429702759,grad_norm: 0.9999996032496024, iteration: 98754
loss: 1.0322939157485962,grad_norm: 0.9038987091431261, iteration: 98755
loss: 1.011252522468567,grad_norm: 0.9999990336830175, iteration: 98756
loss: 1.0364537239074707,grad_norm: 0.9999993922194778, iteration: 98757
loss: 1.0441523790359497,grad_norm: 0.9999997944737076, iteration: 98758
loss: 0.9951273798942566,grad_norm: 0.9999990043331795, iteration: 98759
loss: 1.0277358293533325,grad_norm: 0.8284687880985054, iteration: 98760
loss: 0.9841628074645996,grad_norm: 0.836218784653772, iteration: 98761
loss: 1.0204271078109741,grad_norm: 0.8459524534497858, iteration: 98762
loss: 0.9920819401741028,grad_norm: 0.9999991018899752, iteration: 98763
loss: 0.9992644190788269,grad_norm: 0.9999991099669241, iteration: 98764
loss: 0.9917444586753845,grad_norm: 0.8988413548853001, iteration: 98765
loss: 1.0096964836120605,grad_norm: 0.9999992853936839, iteration: 98766
loss: 1.0242187976837158,grad_norm: 0.8262803885231432, iteration: 98767
loss: 0.9770647287368774,grad_norm: 0.7923271592429414, iteration: 98768
loss: 1.0306795835494995,grad_norm: 0.9999991693501105, iteration: 98769
loss: 0.9850428104400635,grad_norm: 0.9999991667089009, iteration: 98770
loss: 1.1202442646026611,grad_norm: 0.9999996502400168, iteration: 98771
loss: 1.0861687660217285,grad_norm: 0.9999992036238218, iteration: 98772
loss: 0.9954352974891663,grad_norm: 0.9999990544198119, iteration: 98773
loss: 0.9862856864929199,grad_norm: 0.8227424609690533, iteration: 98774
loss: 0.9875519871711731,grad_norm: 0.7506835074079269, iteration: 98775
loss: 0.9982190132141113,grad_norm: 0.8566184300963062, iteration: 98776
loss: 1.0280351638793945,grad_norm: 0.7492609842203941, iteration: 98777
loss: 1.050768494606018,grad_norm: 0.868408578127929, iteration: 98778
loss: 1.0309066772460938,grad_norm: 0.9613444062825278, iteration: 98779
loss: 1.0056551694869995,grad_norm: 0.9581529404600364, iteration: 98780
loss: 1.0112437009811401,grad_norm: 0.9999991102471633, iteration: 98781
loss: 1.0481683015823364,grad_norm: 0.9999995015511421, iteration: 98782
loss: 1.1362444162368774,grad_norm: 0.8874307092300673, iteration: 98783
loss: 1.050186276435852,grad_norm: 0.9999995065258983, iteration: 98784
loss: 0.9836364388465881,grad_norm: 0.9525847496120352, iteration: 98785
loss: 1.016073226928711,grad_norm: 0.9999992403002672, iteration: 98786
loss: 0.9428413510322571,grad_norm: 0.999999076854556, iteration: 98787
loss: 1.0065432786941528,grad_norm: 0.9999991531354475, iteration: 98788
loss: 1.0587729215621948,grad_norm: 0.9999990344540608, iteration: 98789
loss: 1.021655797958374,grad_norm: 0.743132396832117, iteration: 98790
loss: 1.0513554811477661,grad_norm: 0.999999568238634, iteration: 98791
loss: 1.0085088014602661,grad_norm: 0.9555755039583016, iteration: 98792
loss: 1.0076574087142944,grad_norm: 0.9251166727872268, iteration: 98793
loss: 0.9823922514915466,grad_norm: 0.9999989972570125, iteration: 98794
loss: 0.974725604057312,grad_norm: 0.7753504462917422, iteration: 98795
loss: 0.9878366589546204,grad_norm: 0.9280048198888992, iteration: 98796
loss: 1.0501024723052979,grad_norm: 0.9698078079525412, iteration: 98797
loss: 1.0644022226333618,grad_norm: 0.9275467440745471, iteration: 98798
loss: 1.0292288064956665,grad_norm: 0.949552872388425, iteration: 98799
loss: 1.0122218132019043,grad_norm: 0.9999994960272758, iteration: 98800
loss: 1.0799273252487183,grad_norm: 0.9999997396591385, iteration: 98801
loss: 1.0184589624404907,grad_norm: 0.9999995832523275, iteration: 98802
loss: 0.9501391649246216,grad_norm: 0.9999991031840274, iteration: 98803
loss: 1.0260745286941528,grad_norm: 0.9999988726146543, iteration: 98804
loss: 0.9656370878219604,grad_norm: 0.9050495244400248, iteration: 98805
loss: 1.0042264461517334,grad_norm: 0.9999995886153835, iteration: 98806
loss: 1.0596778392791748,grad_norm: 0.7995616496869435, iteration: 98807
loss: 1.020150065422058,grad_norm: 0.9237302901592646, iteration: 98808
loss: 1.1469194889068604,grad_norm: 0.9999995929243543, iteration: 98809
loss: 0.9736117720603943,grad_norm: 0.9999990071136647, iteration: 98810
loss: 0.9966834783554077,grad_norm: 0.9999989367107155, iteration: 98811
loss: 1.033104658126831,grad_norm: 0.8033703358790834, iteration: 98812
loss: 0.9826349020004272,grad_norm: 0.9999991860037265, iteration: 98813
loss: 0.9882087111473083,grad_norm: 0.7716371940788819, iteration: 98814
loss: 1.0578258037567139,grad_norm: 0.9132913572451316, iteration: 98815
loss: 0.9949238896369934,grad_norm: 0.8708274677381092, iteration: 98816
loss: 0.9745369553565979,grad_norm: 0.9999992116168911, iteration: 98817
loss: 1.0321078300476074,grad_norm: 0.9999991483887257, iteration: 98818
loss: 0.9970043301582336,grad_norm: 0.9941016386747731, iteration: 98819
loss: 1.0020595788955688,grad_norm: 0.9184570265846712, iteration: 98820
loss: 0.9963960647583008,grad_norm: 0.9939094973339766, iteration: 98821
loss: 1.023388147354126,grad_norm: 0.999999094458532, iteration: 98822
loss: 1.043144702911377,grad_norm: 0.9999995944945683, iteration: 98823
loss: 0.960666298866272,grad_norm: 0.9614077847255631, iteration: 98824
loss: 1.0212968587875366,grad_norm: 0.9381178808655278, iteration: 98825
loss: 0.9808074235916138,grad_norm: 0.8971054766635488, iteration: 98826
loss: 1.0256332159042358,grad_norm: 0.9227472606022843, iteration: 98827
loss: 1.0562323331832886,grad_norm: 0.9999991031794712, iteration: 98828
loss: 0.9780537486076355,grad_norm: 0.8314025041774921, iteration: 98829
loss: 1.010701060295105,grad_norm: 0.841947047294136, iteration: 98830
loss: 1.0102864503860474,grad_norm: 0.9999996373979781, iteration: 98831
loss: 0.9884476065635681,grad_norm: 0.999999034375033, iteration: 98832
loss: 1.0167592763900757,grad_norm: 0.7943918172516156, iteration: 98833
loss: 0.9761626720428467,grad_norm: 0.9999992613794063, iteration: 98834
loss: 1.0127770900726318,grad_norm: 0.8709558060804705, iteration: 98835
loss: 1.007771611213684,grad_norm: 0.9999992202685043, iteration: 98836
loss: 0.9677248597145081,grad_norm: 0.8209424556990652, iteration: 98837
loss: 1.047598958015442,grad_norm: 0.9999998510756318, iteration: 98838
loss: 1.041300654411316,grad_norm: 0.9788338578635728, iteration: 98839
loss: 1.008805751800537,grad_norm: 0.8646885400066807, iteration: 98840
loss: 1.030111312866211,grad_norm: 0.9914554054347572, iteration: 98841
loss: 0.9921471476554871,grad_norm: 0.7177503345572276, iteration: 98842
loss: 0.9941761493682861,grad_norm: 0.8551648185251344, iteration: 98843
loss: 0.9816930294036865,grad_norm: 0.9999991401641534, iteration: 98844
loss: 1.0197772979736328,grad_norm: 0.9999999290672128, iteration: 98845
loss: 0.9916857481002808,grad_norm: 0.9232337314161767, iteration: 98846
loss: 0.9568555355072021,grad_norm: 0.9999989987205978, iteration: 98847
loss: 1.0285329818725586,grad_norm: 0.9999997789561036, iteration: 98848
loss: 1.0328174829483032,grad_norm: 0.8565263792310358, iteration: 98849
loss: 0.9788526892662048,grad_norm: 0.9853719633714937, iteration: 98850
loss: 1.0030348300933838,grad_norm: 0.9530929593320394, iteration: 98851
loss: 1.01162588596344,grad_norm: 0.997893481435707, iteration: 98852
loss: 1.0345513820648193,grad_norm: 0.8995228274181968, iteration: 98853
loss: 0.9913461804389954,grad_norm: 0.9999994129887331, iteration: 98854
loss: 1.0348416566848755,grad_norm: 0.9999998507168274, iteration: 98855
loss: 1.0514724254608154,grad_norm: 0.8630202663887319, iteration: 98856
loss: 1.069370150566101,grad_norm: 0.9999991749068698, iteration: 98857
loss: 0.9650172591209412,grad_norm: 0.9999991974000574, iteration: 98858
loss: 0.988595724105835,grad_norm: 0.999999126135326, iteration: 98859
loss: 1.0144195556640625,grad_norm: 0.8707892376919065, iteration: 98860
loss: 1.073779821395874,grad_norm: 0.9568969122567303, iteration: 98861
loss: 1.0966750383377075,grad_norm: 0.9999998268521676, iteration: 98862
loss: 1.0670433044433594,grad_norm: 0.9999991797145553, iteration: 98863
loss: 1.0487395524978638,grad_norm: 0.8976864248368801, iteration: 98864
loss: 1.0038783550262451,grad_norm: 0.9857767717476754, iteration: 98865
loss: 1.0400381088256836,grad_norm: 0.9999996230292599, iteration: 98866
loss: 1.0688234567642212,grad_norm: 0.9999995130926274, iteration: 98867
loss: 0.9667998552322388,grad_norm: 0.9764256438812187, iteration: 98868
loss: 0.9572481513023376,grad_norm: 0.8414119251328673, iteration: 98869
loss: 1.0025992393493652,grad_norm: 0.999999123901348, iteration: 98870
loss: 1.036813735961914,grad_norm: 0.9095516300050756, iteration: 98871
loss: 1.0247799158096313,grad_norm: 0.999999394836031, iteration: 98872
loss: 0.9894350171089172,grad_norm: 0.9999989766978004, iteration: 98873
loss: 1.0365757942199707,grad_norm: 0.8309766049880747, iteration: 98874
loss: 1.0017591714859009,grad_norm: 0.9999992609484903, iteration: 98875
loss: 1.0119187831878662,grad_norm: 0.9137746238692451, iteration: 98876
loss: 0.9989844560623169,grad_norm: 0.873171132387249, iteration: 98877
loss: 1.141284704208374,grad_norm: 0.9999991359950591, iteration: 98878
loss: 1.029422402381897,grad_norm: 0.9999991400237968, iteration: 98879
loss: 1.0171693563461304,grad_norm: 0.9999989991379203, iteration: 98880
loss: 0.9931036233901978,grad_norm: 0.8986969788344944, iteration: 98881
loss: 0.9985308051109314,grad_norm: 0.9739068724344041, iteration: 98882
loss: 0.9683901071548462,grad_norm: 0.8376554291877019, iteration: 98883
loss: 1.0292500257492065,grad_norm: 0.8198676417319094, iteration: 98884
loss: 1.0418089628219604,grad_norm: 0.9999991895117012, iteration: 98885
loss: 0.9737913012504578,grad_norm: 0.9775553666362043, iteration: 98886
loss: 1.0115686655044556,grad_norm: 0.9999995213413669, iteration: 98887
loss: 1.024908423423767,grad_norm: 0.9892504047821302, iteration: 98888
loss: 1.0371520519256592,grad_norm: 0.9999991282695825, iteration: 98889
loss: 1.0296046733856201,grad_norm: 0.9999989722033511, iteration: 98890
loss: 1.0321232080459595,grad_norm: 0.9999990114650994, iteration: 98891
loss: 0.9922871589660645,grad_norm: 0.9084097935418026, iteration: 98892
loss: 1.003508448600769,grad_norm: 0.9999999282072427, iteration: 98893
loss: 1.0869197845458984,grad_norm: 0.8554212809163231, iteration: 98894
loss: 1.0321552753448486,grad_norm: 0.9999991636531095, iteration: 98895
loss: 0.974483072757721,grad_norm: 0.858657609970066, iteration: 98896
loss: 1.0201468467712402,grad_norm: 0.7783091524864588, iteration: 98897
loss: 0.9574474096298218,grad_norm: 0.9438530882749286, iteration: 98898
loss: 1.0193157196044922,grad_norm: 0.943833010736728, iteration: 98899
loss: 1.0610086917877197,grad_norm: 0.9999992862803858, iteration: 98900
loss: 1.0520005226135254,grad_norm: 0.9999997564589187, iteration: 98901
loss: 1.0794495344161987,grad_norm: 0.9999999449467738, iteration: 98902
loss: 0.971287727355957,grad_norm: 0.950554938818898, iteration: 98903
loss: 1.056609869003296,grad_norm: 0.9999990732629852, iteration: 98904
loss: 1.0410457849502563,grad_norm: 0.9999993037631184, iteration: 98905
loss: 1.0509787797927856,grad_norm: 0.9999990131557424, iteration: 98906
loss: 1.0311861038208008,grad_norm: 0.9999993359479421, iteration: 98907
loss: 0.9816076159477234,grad_norm: 0.9764060177032218, iteration: 98908
loss: 1.0227144956588745,grad_norm: 0.9999995316412016, iteration: 98909
loss: 1.0525089502334595,grad_norm: 0.9999995942156932, iteration: 98910
loss: 0.9990994930267334,grad_norm: 0.8136113768681311, iteration: 98911
loss: 0.9810953140258789,grad_norm: 0.9435475701937508, iteration: 98912
loss: 0.9748750329017639,grad_norm: 0.9999994397582029, iteration: 98913
loss: 0.9983664751052856,grad_norm: 0.9977049436704402, iteration: 98914
loss: 0.9640786051750183,grad_norm: 0.8769948035084586, iteration: 98915
loss: 1.159584641456604,grad_norm: 0.9999998310980472, iteration: 98916
loss: 1.0523773431777954,grad_norm: 0.9999993834218585, iteration: 98917
loss: 1.0436869859695435,grad_norm: 0.8946221716365134, iteration: 98918
loss: 1.0347888469696045,grad_norm: 0.9851023746882681, iteration: 98919
loss: 1.0065488815307617,grad_norm: 0.9999995555197384, iteration: 98920
loss: 1.0739964246749878,grad_norm: 0.9999990618915787, iteration: 98921
loss: 1.0290753841400146,grad_norm: 0.9999990666289732, iteration: 98922
loss: 0.9346163868904114,grad_norm: 0.9848674559680266, iteration: 98923
loss: 0.9987127184867859,grad_norm: 0.9999989718980178, iteration: 98924
loss: 1.0262192487716675,grad_norm: 0.9999996404110985, iteration: 98925
loss: 0.9985083937644958,grad_norm: 0.9999993503160799, iteration: 98926
loss: 1.0329420566558838,grad_norm: 0.9999990060299753, iteration: 98927
loss: 1.025914192199707,grad_norm: 0.8250081078357827, iteration: 98928
loss: 1.0333521366119385,grad_norm: 0.9606334367328921, iteration: 98929
loss: 1.0026214122772217,grad_norm: 0.9999999265515156, iteration: 98930
loss: 1.03495192527771,grad_norm: 0.8109169026189044, iteration: 98931
loss: 1.0261629819869995,grad_norm: 0.9999996441292371, iteration: 98932
loss: 1.0146021842956543,grad_norm: 0.9376239457159522, iteration: 98933
loss: 0.9915236830711365,grad_norm: 0.9552753469927681, iteration: 98934
loss: 1.0115892887115479,grad_norm: 0.9020177038465674, iteration: 98935
loss: 1.057930588722229,grad_norm: 0.9999994202892251, iteration: 98936
loss: 0.9835184812545776,grad_norm: 0.9743204850154715, iteration: 98937
loss: 1.044171690940857,grad_norm: 0.9969559034455721, iteration: 98938
loss: 1.017811894416809,grad_norm: 0.8397503729463208, iteration: 98939
loss: 1.0047922134399414,grad_norm: 0.9999997236141001, iteration: 98940
loss: 0.9894881248474121,grad_norm: 0.9640911689939935, iteration: 98941
loss: 0.99510258436203,grad_norm: 0.9999990440722852, iteration: 98942
loss: 0.9974509477615356,grad_norm: 0.8029412578028138, iteration: 98943
loss: 1.0208864212036133,grad_norm: 0.7285368202816879, iteration: 98944
loss: 1.0034372806549072,grad_norm: 0.9999991853462313, iteration: 98945
loss: 1.0884222984313965,grad_norm: 0.9999992812521159, iteration: 98946
loss: 1.022995114326477,grad_norm: 0.9999995385555159, iteration: 98947
loss: 1.0311857461929321,grad_norm: 0.9999990299292664, iteration: 98948
loss: 0.9727660417556763,grad_norm: 0.882421229648181, iteration: 98949
loss: 0.9966307282447815,grad_norm: 0.9999992799290798, iteration: 98950
loss: 0.9834645390510559,grad_norm: 0.8422062927383238, iteration: 98951
loss: 1.0071899890899658,grad_norm: 0.9248744221277369, iteration: 98952
loss: 1.0225157737731934,grad_norm: 0.9999990876436438, iteration: 98953
loss: 0.9897682666778564,grad_norm: 0.9999989756208457, iteration: 98954
loss: 1.0772652626037598,grad_norm: 0.9999992213422548, iteration: 98955
loss: 1.0085748434066772,grad_norm: 0.9415228340049719, iteration: 98956
loss: 1.0238063335418701,grad_norm: 0.8470237703425175, iteration: 98957
loss: 0.9998626708984375,grad_norm: 0.7614305896483186, iteration: 98958
loss: 1.0096439123153687,grad_norm: 0.9977466073975639, iteration: 98959
loss: 0.9920844435691833,grad_norm: 0.8178849211250726, iteration: 98960
loss: 1.004441499710083,grad_norm: 0.9973400409821808, iteration: 98961
loss: 1.0241621732711792,grad_norm: 0.9142949606393542, iteration: 98962
loss: 1.000595211982727,grad_norm: 0.904401627211212, iteration: 98963
loss: 1.0269930362701416,grad_norm: 0.9799407749593878, iteration: 98964
loss: 0.9975234866142273,grad_norm: 0.7721197719187904, iteration: 98965
loss: 1.0276917219161987,grad_norm: 0.9999989364276458, iteration: 98966
loss: 1.0047954320907593,grad_norm: 0.8688870613063084, iteration: 98967
loss: 1.0308812856674194,grad_norm: 0.8728218081987743, iteration: 98968
loss: 1.0338870286941528,grad_norm: 0.9303760631661391, iteration: 98969
loss: 0.9748208522796631,grad_norm: 0.9714232793214853, iteration: 98970
loss: 0.9966306686401367,grad_norm: 0.9952082486917392, iteration: 98971
loss: 1.0141690969467163,grad_norm: 0.8542119718952872, iteration: 98972
loss: 1.0653012990951538,grad_norm: 0.9999990611667717, iteration: 98973
loss: 1.0201750993728638,grad_norm: 0.8757579433404302, iteration: 98974
loss: 0.9890230894088745,grad_norm: 0.9490589458202443, iteration: 98975
loss: 1.0250910520553589,grad_norm: 0.9999991490854282, iteration: 98976
loss: 1.0385626554489136,grad_norm: 0.9314943018728719, iteration: 98977
loss: 1.150052547454834,grad_norm: 0.9999998865882682, iteration: 98978
loss: 1.002989411354065,grad_norm: 0.8686878008149875, iteration: 98979
loss: 1.0332411527633667,grad_norm: 0.9108694203036595, iteration: 98980
loss: 1.0144139528274536,grad_norm: 0.9999993523892885, iteration: 98981
loss: 1.0076043605804443,grad_norm: 0.8636205049444917, iteration: 98982
loss: 1.008549451828003,grad_norm: 0.8945190945342675, iteration: 98983
loss: 1.0998893976211548,grad_norm: 0.9999993544862374, iteration: 98984
loss: 1.0102509260177612,grad_norm: 0.8760456365579609, iteration: 98985
loss: 1.038459062576294,grad_norm: 0.7874230559818478, iteration: 98986
loss: 1.0663988590240479,grad_norm: 0.9999998608867964, iteration: 98987
loss: 0.9896150827407837,grad_norm: 0.9171462664780269, iteration: 98988
loss: 1.1703152656555176,grad_norm: 0.9999993665325343, iteration: 98989
loss: 0.9778494238853455,grad_norm: 0.836906172016109, iteration: 98990
loss: 1.0138273239135742,grad_norm: 0.9907849751044026, iteration: 98991
loss: 0.991377592086792,grad_norm: 0.8130670736093012, iteration: 98992
loss: 1.2245644330978394,grad_norm: 0.9999996298867009, iteration: 98993
loss: 1.108866810798645,grad_norm: 0.9999994380599927, iteration: 98994
loss: 1.0538650751113892,grad_norm: 0.8670495183820691, iteration: 98995
loss: 1.1800062656402588,grad_norm: 0.9999993560107039, iteration: 98996
loss: 1.2399038076400757,grad_norm: 0.9999992610884549, iteration: 98997
loss: 1.2307181358337402,grad_norm: 0.9999994306001881, iteration: 98998
loss: 1.1478861570358276,grad_norm: 0.9999992889646204, iteration: 98999
loss: 1.7741384506225586,grad_norm: 1.0000000037968744, iteration: 99000
loss: 1.1124767065048218,grad_norm: 0.9841095905293116, iteration: 99001
loss: 1.4955204725265503,grad_norm: 0.9999998013299386, iteration: 99002
loss: 1.2721631526947021,grad_norm: 0.9999998169874662, iteration: 99003
loss: 1.370422124862671,grad_norm: 0.9999997071731463, iteration: 99004
loss: 1.3487401008605957,grad_norm: 0.9999996305198737, iteration: 99005
loss: 1.8565309047698975,grad_norm: 0.9999999978956899, iteration: 99006
loss: 1.7328872680664062,grad_norm: 0.9999999012153847, iteration: 99007
loss: 1.0999265909194946,grad_norm: 0.9999994179463699, iteration: 99008
loss: 1.8008891344070435,grad_norm: 0.999999918252162, iteration: 99009
loss: 1.5965520143508911,grad_norm: 0.9999998689874741, iteration: 99010
loss: 1.538142442703247,grad_norm: 0.999999811015186, iteration: 99011
loss: 1.465050458908081,grad_norm: 0.999999968608569, iteration: 99012
loss: 1.4220376014709473,grad_norm: 0.9999998683946628, iteration: 99013
loss: 1.7759321928024292,grad_norm: 0.9999999165482356, iteration: 99014
loss: 1.3150980472564697,grad_norm: 0.999999914661123, iteration: 99015
loss: 1.5576738119125366,grad_norm: 1.0000000160491638, iteration: 99016
loss: 1.6050487756729126,grad_norm: 0.9999999654969496, iteration: 99017
loss: 1.538588523864746,grad_norm: 0.999999919027574, iteration: 99018
loss: 1.2911616563796997,grad_norm: 0.9999999462195346, iteration: 99019
loss: 1.3380228281021118,grad_norm: 0.9999998657218039, iteration: 99020
loss: 1.413191795349121,grad_norm: 0.9999999498904754, iteration: 99021
loss: 1.3151663541793823,grad_norm: 1.0000000177099133, iteration: 99022
loss: 1.4821254014968872,grad_norm: 1.0000000972503849, iteration: 99023
loss: 1.0598466396331787,grad_norm: 0.9999991797825157, iteration: 99024
loss: 1.4422980546951294,grad_norm: 0.9999997985886547, iteration: 99025
loss: 1.276728868484497,grad_norm: 0.9999998713459092, iteration: 99026
loss: 1.3234412670135498,grad_norm: 0.9999999712753807, iteration: 99027
loss: 1.1523833274841309,grad_norm: 0.9999998398539557, iteration: 99028
loss: 1.1591103076934814,grad_norm: 0.9999998175439949, iteration: 99029
loss: 1.4619311094284058,grad_norm: 0.9999998752929127, iteration: 99030
loss: 1.3280478715896606,grad_norm: 0.9999996996680378, iteration: 99031
loss: 1.2817753553390503,grad_norm: 0.9999999971517348, iteration: 99032
loss: 1.2707494497299194,grad_norm: 0.9999997852133471, iteration: 99033
loss: 1.841697335243225,grad_norm: 0.9999998878196182, iteration: 99034
loss: 1.1520484685897827,grad_norm: 0.999999718159844, iteration: 99035
loss: 1.3398170471191406,grad_norm: 0.9999993651941465, iteration: 99036
loss: 1.0623562335968018,grad_norm: 0.9999998329395233, iteration: 99037
loss: 1.1793373823165894,grad_norm: 0.999999680471044, iteration: 99038
loss: 1.333082914352417,grad_norm: 0.9999993829351586, iteration: 99039
loss: 1.1536948680877686,grad_norm: 0.9999999968210227, iteration: 99040
loss: 1.2415904998779297,grad_norm: 0.9999999016220257, iteration: 99041
loss: 1.277104139328003,grad_norm: 0.9999998218741636, iteration: 99042
loss: 1.3511394262313843,grad_norm: 0.9999996648189726, iteration: 99043
loss: 1.4320570230484009,grad_norm: 0.9999997647214608, iteration: 99044
loss: 1.1990079879760742,grad_norm: 0.9999999864390101, iteration: 99045
loss: 1.172380805015564,grad_norm: 0.9999993138543831, iteration: 99046
loss: 1.4504096508026123,grad_norm: 0.9999999276307876, iteration: 99047
loss: 1.299349069595337,grad_norm: 0.9999998155357146, iteration: 99048
loss: 1.356605052947998,grad_norm: 0.9999999507975545, iteration: 99049
loss: 1.132293939590454,grad_norm: 1.0000000035191106, iteration: 99050
loss: 1.277053952217102,grad_norm: 0.9999997062182548, iteration: 99051
loss: 1.331271767616272,grad_norm: 0.9999998410228143, iteration: 99052
loss: 1.2614295482635498,grad_norm: 0.9999997935865343, iteration: 99053
loss: 1.0989240407943726,grad_norm: 0.9999998589707177, iteration: 99054
loss: 1.1561505794525146,grad_norm: 0.9999993936315319, iteration: 99055
loss: 1.180598497390747,grad_norm: 0.9999997578802189, iteration: 99056
loss: 1.3017276525497437,grad_norm: 0.9999997460859228, iteration: 99057
loss: 1.1384479999542236,grad_norm: 0.9829158574867869, iteration: 99058
loss: 1.1732741594314575,grad_norm: 0.9999996164593695, iteration: 99059
loss: 1.056190848350525,grad_norm: 0.9999991331255581, iteration: 99060
loss: 1.14643394947052,grad_norm: 0.9999997656732305, iteration: 99061
loss: 0.9954237937927246,grad_norm: 0.9072371369618437, iteration: 99062
loss: 1.0659887790679932,grad_norm: 0.919782524567953, iteration: 99063
loss: 1.200921893119812,grad_norm: 0.999999974901503, iteration: 99064
loss: 1.1215856075286865,grad_norm: 0.9999991620496236, iteration: 99065
loss: 1.0623149871826172,grad_norm: 0.9999993151362321, iteration: 99066
loss: 1.3482481241226196,grad_norm: 0.9999999817800795, iteration: 99067
loss: 1.0847669839859009,grad_norm: 0.999999474662758, iteration: 99068
loss: 0.996698260307312,grad_norm: 0.9066532501470558, iteration: 99069
loss: 1.2729079723358154,grad_norm: 1.000000166761871, iteration: 99070
loss: 1.0754177570343018,grad_norm: 0.8600736480335791, iteration: 99071
loss: 1.068942904472351,grad_norm: 0.9785831583596636, iteration: 99072
loss: 1.1046957969665527,grad_norm: 0.999999071447972, iteration: 99073
loss: 1.0064035654067993,grad_norm: 0.7773091685980987, iteration: 99074
loss: 1.005812644958496,grad_norm: 0.7082420991381749, iteration: 99075
loss: 1.0212807655334473,grad_norm: 0.9999999220564756, iteration: 99076
loss: 1.0559868812561035,grad_norm: 0.9999993909294705, iteration: 99077
loss: 1.0915749073028564,grad_norm: 0.9999989936649916, iteration: 99078
loss: 1.111229419708252,grad_norm: 0.9999993640761127, iteration: 99079
loss: 1.0027862787246704,grad_norm: 0.999999132049267, iteration: 99080
loss: 1.0346803665161133,grad_norm: 0.8624551835786707, iteration: 99081
loss: 1.0136911869049072,grad_norm: 0.881355906421057, iteration: 99082
loss: 1.1337789297103882,grad_norm: 0.9999996550374165, iteration: 99083
loss: 0.9971591234207153,grad_norm: 0.9277066971027526, iteration: 99084
loss: 1.2822760343551636,grad_norm: 0.9999998685104706, iteration: 99085
loss: 1.2123600244522095,grad_norm: 0.9999994310248201, iteration: 99086
loss: 1.1302895545959473,grad_norm: 0.9999991146115877, iteration: 99087
loss: 1.070469856262207,grad_norm: 0.9999990297478443, iteration: 99088
loss: 1.2390562295913696,grad_norm: 0.9999991955949111, iteration: 99089
loss: 1.472020149230957,grad_norm: 0.999999386301477, iteration: 99090
loss: 1.109398365020752,grad_norm: 0.9999995750465693, iteration: 99091
loss: 1.2276531457901,grad_norm: 0.999999200123911, iteration: 99092
loss: 1.1839981079101562,grad_norm: 0.9999998653116892, iteration: 99093
loss: 1.3482813835144043,grad_norm: 0.9999997750617264, iteration: 99094
loss: 1.145412564277649,grad_norm: 0.9999994482140826, iteration: 99095
loss: 1.0469008684158325,grad_norm: 0.9999992398695625, iteration: 99096
loss: 1.0663273334503174,grad_norm: 0.9999993093938533, iteration: 99097
loss: 1.0279806852340698,grad_norm: 0.9999998036615556, iteration: 99098
loss: 1.1433545351028442,grad_norm: 0.9999999238795377, iteration: 99099
loss: 1.0904712677001953,grad_norm: 0.9999994539771424, iteration: 99100
loss: 1.0837008953094482,grad_norm: 0.9526613667062774, iteration: 99101
loss: 1.0603450536727905,grad_norm: 0.9999995143910021, iteration: 99102
loss: 1.2793940305709839,grad_norm: 0.9999998410486257, iteration: 99103
loss: 1.0559741258621216,grad_norm: 0.9999998171669565, iteration: 99104
loss: 1.2571860551834106,grad_norm: 0.9999999266320375, iteration: 99105
loss: 1.0207773447036743,grad_norm: 0.7262284256334236, iteration: 99106
loss: 1.0042164325714111,grad_norm: 0.9509896345843237, iteration: 99107
loss: 1.017333745956421,grad_norm: 0.9999993235964905, iteration: 99108
loss: 1.2186784744262695,grad_norm: 0.9999998711672631, iteration: 99109
loss: 1.073199987411499,grad_norm: 0.9999999577184702, iteration: 99110
loss: 0.9970523118972778,grad_norm: 0.9999992976198898, iteration: 99111
loss: 1.1397356986999512,grad_norm: 0.9999998796535101, iteration: 99112
loss: 1.1066213846206665,grad_norm: 0.9999994202334018, iteration: 99113
loss: 0.9780958294868469,grad_norm: 0.9748685511734082, iteration: 99114
loss: 1.091130018234253,grad_norm: 0.9999993304792836, iteration: 99115
loss: 1.1135735511779785,grad_norm: 0.99999926638316, iteration: 99116
loss: 1.1225950717926025,grad_norm: 0.9999996840354589, iteration: 99117
loss: 1.1485297679901123,grad_norm: 1.0000000053897955, iteration: 99118
loss: 1.019221544265747,grad_norm: 0.9999996378785678, iteration: 99119
loss: 1.0308419466018677,grad_norm: 0.927903379209401, iteration: 99120
loss: 1.0441585779190063,grad_norm: 0.9999999920472732, iteration: 99121
loss: 1.0233433246612549,grad_norm: 0.9999999569337097, iteration: 99122
loss: 0.9820556044578552,grad_norm: 0.9999990540533746, iteration: 99123
loss: 1.0697364807128906,grad_norm: 0.9999992501906524, iteration: 99124
loss: 1.1583787202835083,grad_norm: 0.9999993382472285, iteration: 99125
loss: 1.0342906713485718,grad_norm: 0.9999992183544334, iteration: 99126
loss: 1.025054931640625,grad_norm: 0.8897540114097573, iteration: 99127
loss: 1.100081205368042,grad_norm: 0.9999998783757132, iteration: 99128
loss: 1.2485114336013794,grad_norm: 0.999999320165067, iteration: 99129
loss: 1.1435465812683105,grad_norm: 0.9999992454750363, iteration: 99130
loss: 1.2270654439926147,grad_norm: 0.9999993442115801, iteration: 99131
loss: 1.042277216911316,grad_norm: 0.9999995557324037, iteration: 99132
loss: 1.1166715621948242,grad_norm: 0.9999999708891173, iteration: 99133
loss: 1.0069977045059204,grad_norm: 0.9999990929566894, iteration: 99134
loss: 1.0455198287963867,grad_norm: 0.9999996047509891, iteration: 99135
loss: 1.1036038398742676,grad_norm: 0.9999994531356442, iteration: 99136
loss: 1.2533527612686157,grad_norm: 1.0000000121347343, iteration: 99137
loss: 1.0572419166564941,grad_norm: 0.9999990611957843, iteration: 99138
loss: 1.0264021158218384,grad_norm: 0.9999994460209884, iteration: 99139
loss: 1.087598443031311,grad_norm: 0.9999995089031393, iteration: 99140
loss: 1.0469186305999756,grad_norm: 0.8838048004404223, iteration: 99141
loss: 1.261385202407837,grad_norm: 0.9999995616252461, iteration: 99142
loss: 1.125960350036621,grad_norm: 0.9999994403364235, iteration: 99143
loss: 1.1366899013519287,grad_norm: 0.9999998991382391, iteration: 99144
loss: 1.0822306871414185,grad_norm: 0.9999998295977215, iteration: 99145
loss: 1.0905895233154297,grad_norm: 0.9999997296838221, iteration: 99146
loss: 1.1058216094970703,grad_norm: 0.999999890753688, iteration: 99147
loss: 1.3146040439605713,grad_norm: 0.9999999108776488, iteration: 99148
loss: 0.9945141673088074,grad_norm: 0.9699236928294136, iteration: 99149
loss: 1.5124584436416626,grad_norm: 0.9999999716774655, iteration: 99150
loss: 1.1512460708618164,grad_norm: 0.9999999734133315, iteration: 99151
loss: 1.245700478553772,grad_norm: 0.999999804813088, iteration: 99152
loss: 1.1796221733093262,grad_norm: 0.9999999374274673, iteration: 99153
loss: 1.2976601123809814,grad_norm: 0.99999985366656, iteration: 99154
loss: 1.1093990802764893,grad_norm: 0.9999994882710282, iteration: 99155
loss: 1.2583470344543457,grad_norm: 0.9999995715486308, iteration: 99156
loss: 1.4201220273971558,grad_norm: 0.9999999903704643, iteration: 99157
loss: 0.9764320254325867,grad_norm: 0.9999993932994822, iteration: 99158
loss: 1.2089757919311523,grad_norm: 0.9999994613338563, iteration: 99159
loss: 1.1564325094223022,grad_norm: 0.9999998350262056, iteration: 99160
loss: 1.4371525049209595,grad_norm: 0.9999999142800153, iteration: 99161
loss: 1.1841906309127808,grad_norm: 0.9999996528186305, iteration: 99162
loss: 1.0859169960021973,grad_norm: 0.9999997373270801, iteration: 99163
loss: 1.2387940883636475,grad_norm: 0.999999644534928, iteration: 99164
loss: 1.25607168674469,grad_norm: 0.9999999672099421, iteration: 99165
loss: 1.2792497873306274,grad_norm: 0.9999998736804743, iteration: 99166
loss: 1.1914290189743042,grad_norm: 0.9999998880951562, iteration: 99167
loss: 1.5162891149520874,grad_norm: 0.9999999180521927, iteration: 99168
loss: 1.6510099172592163,grad_norm: 1.0000000316263247, iteration: 99169
loss: 1.4299087524414062,grad_norm: 0.9999998296784847, iteration: 99170
loss: 1.7372909784317017,grad_norm: 0.9999999159636989, iteration: 99171
loss: 1.4310245513916016,grad_norm: 0.9999999804258739, iteration: 99172
loss: 1.794559359550476,grad_norm: 0.9999999611347101, iteration: 99173
loss: 1.7164016962051392,grad_norm: 0.9999999441940683, iteration: 99174
loss: 1.3369852304458618,grad_norm: 0.9999999575797985, iteration: 99175
loss: 1.7303524017333984,grad_norm: 0.9999998972977198, iteration: 99176
loss: 1.4804797172546387,grad_norm: 0.9999999094169333, iteration: 99177
loss: 1.7240277528762817,grad_norm: 0.9999999363001488, iteration: 99178
loss: 1.4709572792053223,grad_norm: 1.0000000207400528, iteration: 99179
loss: 1.9702340364456177,grad_norm: 0.9999999023236326, iteration: 99180
loss: 1.4213563203811646,grad_norm: 0.9999998540306212, iteration: 99181
loss: 1.9123915433883667,grad_norm: 0.9999999106621292, iteration: 99182
loss: 2.239231824874878,grad_norm: 1.0000000009831536, iteration: 99183
loss: 1.5005767345428467,grad_norm: 0.9999998457581804, iteration: 99184
loss: 1.9945262670516968,grad_norm: 1.0000000157221554, iteration: 99185
loss: 1.6782732009887695,grad_norm: 0.999999982498926, iteration: 99186
loss: 1.976736068725586,grad_norm: 0.9999999241587025, iteration: 99187
loss: 1.5319952964782715,grad_norm: 0.9999999800193108, iteration: 99188
loss: 1.4477896690368652,grad_norm: 0.9999999835129889, iteration: 99189
loss: 1.4133566617965698,grad_norm: 0.999999615172312, iteration: 99190
loss: 1.782446026802063,grad_norm: 1.0000000347222282, iteration: 99191
loss: 1.4320564270019531,grad_norm: 0.9999999115206921, iteration: 99192
loss: 1.278916358947754,grad_norm: 0.9999992467433048, iteration: 99193
loss: 1.3293979167938232,grad_norm: 0.9999998870483306, iteration: 99194
loss: 1.0479661226272583,grad_norm: 0.9999996756828594, iteration: 99195
loss: 1.8527474403381348,grad_norm: 0.9999998001789965, iteration: 99196
loss: 1.602787971496582,grad_norm: 0.9999999134743197, iteration: 99197
loss: 1.184348225593567,grad_norm: 0.9999992504689047, iteration: 99198
loss: 1.1096798181533813,grad_norm: 0.9999993191406669, iteration: 99199
loss: 1.1820906400680542,grad_norm: 0.9999997290627188, iteration: 99200
loss: 1.1578150987625122,grad_norm: 0.9999995042901847, iteration: 99201
loss: 1.2510185241699219,grad_norm: 0.9999998348448593, iteration: 99202
loss: 1.301239252090454,grad_norm: 0.9999999247121116, iteration: 99203
loss: 1.4193922281265259,grad_norm: 0.9999999768471608, iteration: 99204
loss: 1.073544979095459,grad_norm: 0.9999998712513408, iteration: 99205
loss: 1.3203797340393066,grad_norm: 0.9999993970727491, iteration: 99206
loss: 1.1468602418899536,grad_norm: 0.9999996305356965, iteration: 99207
loss: 1.1711044311523438,grad_norm: 0.9999996261276989, iteration: 99208
loss: 1.5055207014083862,grad_norm: 0.9999998009259564, iteration: 99209
loss: 1.2329262495040894,grad_norm: 0.9999997591563938, iteration: 99210
loss: 1.0602809190750122,grad_norm: 0.9999990827469266, iteration: 99211
loss: 1.0587276220321655,grad_norm: 0.9999999082721167, iteration: 99212
loss: 1.0640590190887451,grad_norm: 0.9999994408589954, iteration: 99213
loss: 1.0436030626296997,grad_norm: 0.995281100140604, iteration: 99214
loss: 1.1063772439956665,grad_norm: 0.9999991409009408, iteration: 99215
loss: 1.1151084899902344,grad_norm: 0.9999998495104929, iteration: 99216
loss: 1.0981991291046143,grad_norm: 1.000000038712157, iteration: 99217
loss: 1.0552935600280762,grad_norm: 0.999999483479225, iteration: 99218
loss: 1.1210520267486572,grad_norm: 0.9999992908145403, iteration: 99219
loss: 1.0934429168701172,grad_norm: 0.9999994922292438, iteration: 99220
loss: 1.3211398124694824,grad_norm: 0.9999998326879997, iteration: 99221
loss: 1.048406720161438,grad_norm: 0.9999998870625886, iteration: 99222
loss: 1.1594454050064087,grad_norm: 0.9999991549903476, iteration: 99223
loss: 1.0903265476226807,grad_norm: 0.9999992830419449, iteration: 99224
loss: 1.1599152088165283,grad_norm: 0.9999995964708971, iteration: 99225
loss: 1.177791953086853,grad_norm: 0.999999510368189, iteration: 99226
loss: 1.295792818069458,grad_norm: 0.999999400566746, iteration: 99227
loss: 1.2792013883590698,grad_norm: 0.9999998252590817, iteration: 99228
loss: 0.9757020473480225,grad_norm: 0.9999991135179875, iteration: 99229
loss: 1.0833083391189575,grad_norm: 0.9999991158451498, iteration: 99230
loss: 1.4843178987503052,grad_norm: 0.9999995056790756, iteration: 99231
loss: 0.9840335845947266,grad_norm: 0.9999990431042037, iteration: 99232
loss: 1.1317945718765259,grad_norm: 0.9999991365903619, iteration: 99233
loss: 1.2742260694503784,grad_norm: 0.9999998753681573, iteration: 99234
loss: 1.061302661895752,grad_norm: 0.9999996208286693, iteration: 99235
loss: 1.0667921304702759,grad_norm: 0.9999993812722002, iteration: 99236
loss: 1.0974596738815308,grad_norm: 0.9999995533649196, iteration: 99237
loss: 1.114675760269165,grad_norm: 0.9999999018957675, iteration: 99238
loss: 1.177910327911377,grad_norm: 0.9999995354189998, iteration: 99239
loss: 1.1731035709381104,grad_norm: 0.9999997878501976, iteration: 99240
loss: 1.0846930742263794,grad_norm: 0.9999993457558478, iteration: 99241
loss: 0.9668778777122498,grad_norm: 0.9999992355735177, iteration: 99242
loss: 1.0329582691192627,grad_norm: 0.9988447547123281, iteration: 99243
loss: 1.186181902885437,grad_norm: 0.9999994478183414, iteration: 99244
loss: 1.0924043655395508,grad_norm: 0.9999992922783089, iteration: 99245
loss: 1.0259020328521729,grad_norm: 0.9999995573847653, iteration: 99246
loss: 0.9800333380699158,grad_norm: 0.9214663571976007, iteration: 99247
loss: 1.0842770338058472,grad_norm: 0.9999990209083103, iteration: 99248
loss: 1.0482043027877808,grad_norm: 0.9278195407316868, iteration: 99249
loss: 1.1946089267730713,grad_norm: 0.9999996455325825, iteration: 99250
loss: 1.1111456155776978,grad_norm: 0.9999995221542468, iteration: 99251
loss: 1.0725643634796143,grad_norm: 0.9999998742087468, iteration: 99252
loss: 0.9966212511062622,grad_norm: 0.9999991669757385, iteration: 99253
loss: 1.0982452630996704,grad_norm: 0.9999993764277356, iteration: 99254
loss: 1.343847393989563,grad_norm: 0.9999997285521861, iteration: 99255
loss: 1.0337563753128052,grad_norm: 0.9999995403632397, iteration: 99256
loss: 0.9822289347648621,grad_norm: 0.876697015991002, iteration: 99257
loss: 1.0671747922897339,grad_norm: 0.9999995583129447, iteration: 99258
loss: 1.133231282234192,grad_norm: 0.9999994139918178, iteration: 99259
loss: 0.9693320393562317,grad_norm: 0.8218763501126918, iteration: 99260
loss: 1.0361931324005127,grad_norm: 0.9999991970302267, iteration: 99261
loss: 1.0910568237304688,grad_norm: 0.9999994429218019, iteration: 99262
loss: 1.2460078001022339,grad_norm: 0.999999998666786, iteration: 99263
loss: 1.136364221572876,grad_norm: 0.9999995762432576, iteration: 99264
loss: 1.219794511795044,grad_norm: 0.9999998587536981, iteration: 99265
loss: 1.1243993043899536,grad_norm: 0.9999990768725925, iteration: 99266
loss: 1.2088758945465088,grad_norm: 0.9999997576558912, iteration: 99267
loss: 1.0729800462722778,grad_norm: 0.9999996542616317, iteration: 99268
loss: 1.1731268167495728,grad_norm: 0.9999997672935704, iteration: 99269
loss: 1.146027684211731,grad_norm: 0.9999991176902716, iteration: 99270
loss: 1.0942537784576416,grad_norm: 0.9999996226026389, iteration: 99271
loss: 1.096034288406372,grad_norm: 0.9999994917975394, iteration: 99272
loss: 1.3553963899612427,grad_norm: 1.0000000038168009, iteration: 99273
loss: 1.132737398147583,grad_norm: 0.9999994790935264, iteration: 99274
loss: 1.1232426166534424,grad_norm: 0.9999995749656276, iteration: 99275
loss: 1.0002188682556152,grad_norm: 0.9999999410495949, iteration: 99276
loss: 1.0649584531784058,grad_norm: 0.9999993444081915, iteration: 99277
loss: 1.109565258026123,grad_norm: 0.9999997447463356, iteration: 99278
loss: 1.049828052520752,grad_norm: 0.999999653275656, iteration: 99279
loss: 1.057198166847229,grad_norm: 0.9999999135521577, iteration: 99280
loss: 1.0087023973464966,grad_norm: 0.796306469909589, iteration: 99281
loss: 1.072005271911621,grad_norm: 0.9999990882091255, iteration: 99282
loss: 0.992973804473877,grad_norm: 0.9999991341207556, iteration: 99283
loss: 1.013345718383789,grad_norm: 0.9999995352915523, iteration: 99284
loss: 1.2078356742858887,grad_norm: 0.99999915023256, iteration: 99285
loss: 1.085006833076477,grad_norm: 0.9999992312225658, iteration: 99286
loss: 1.1149187088012695,grad_norm: 0.9999994798676438, iteration: 99287
loss: 1.1105923652648926,grad_norm: 0.9999993980622278, iteration: 99288
loss: 1.1799955368041992,grad_norm: 0.9999996488658459, iteration: 99289
loss: 0.9753261804580688,grad_norm: 0.8581909558903665, iteration: 99290
loss: 1.069663405418396,grad_norm: 0.9999996054857855, iteration: 99291
loss: 0.9997126460075378,grad_norm: 0.9999992701638081, iteration: 99292
loss: 1.1017409563064575,grad_norm: 0.9999994215573833, iteration: 99293
loss: 1.0922811031341553,grad_norm: 0.9999996015335447, iteration: 99294
loss: 1.2910528182983398,grad_norm: 0.9999997992502898, iteration: 99295
loss: 1.0211268663406372,grad_norm: 0.9999993791381415, iteration: 99296
loss: 1.0179089307785034,grad_norm: 0.9999990921581627, iteration: 99297
loss: 1.0287302732467651,grad_norm: 0.9999990270563744, iteration: 99298
loss: 1.0396238565444946,grad_norm: 0.9399984912355274, iteration: 99299
loss: 1.0596662759780884,grad_norm: 0.8954753244466045, iteration: 99300
loss: 1.1638280153274536,grad_norm: 0.9999998814542173, iteration: 99301
loss: 1.0786408185958862,grad_norm: 0.9999996096335227, iteration: 99302
loss: 1.1660189628601074,grad_norm: 0.9999994795560073, iteration: 99303
loss: 1.2328308820724487,grad_norm: 0.9999997940103802, iteration: 99304
loss: 1.1291731595993042,grad_norm: 0.9999997306401693, iteration: 99305
loss: 0.9739783406257629,grad_norm: 0.8391181524255383, iteration: 99306
loss: 1.1836403608322144,grad_norm: 0.999999297583612, iteration: 99307
loss: 1.0125017166137695,grad_norm: 0.9999999546209173, iteration: 99308
loss: 1.0918337106704712,grad_norm: 0.9999994571303868, iteration: 99309
loss: 1.048087477684021,grad_norm: 0.9999995978015668, iteration: 99310
loss: 1.052674651145935,grad_norm: 0.9999990604148027, iteration: 99311
loss: 1.2044259309768677,grad_norm: 0.9999997816999884, iteration: 99312
loss: 1.1398797035217285,grad_norm: 0.9999997551076096, iteration: 99313
loss: 1.0312169790267944,grad_norm: 0.9999991215911307, iteration: 99314
loss: 1.0061272382736206,grad_norm: 0.9999992856646457, iteration: 99315
loss: 1.1122057437896729,grad_norm: 0.9999992267456934, iteration: 99316
loss: 1.0877103805541992,grad_norm: 0.9999990697736333, iteration: 99317
loss: 1.087257981300354,grad_norm: 0.9999997747415252, iteration: 99318
loss: 1.0152426958084106,grad_norm: 0.9999996411125688, iteration: 99319
loss: 1.1589057445526123,grad_norm: 0.9332590582178949, iteration: 99320
loss: 1.1075506210327148,grad_norm: 0.9999993756299034, iteration: 99321
loss: 1.0925694704055786,grad_norm: 0.9999995407885971, iteration: 99322
loss: 1.1121834516525269,grad_norm: 0.9723534816152325, iteration: 99323
loss: 1.098118782043457,grad_norm: 0.9999993181262563, iteration: 99324
loss: 1.2733622789382935,grad_norm: 0.9999997245023106, iteration: 99325
loss: 1.0013352632522583,grad_norm: 0.9999992695369615, iteration: 99326
loss: 1.1192880868911743,grad_norm: 0.9999993804660575, iteration: 99327
loss: 1.066943645477295,grad_norm: 0.9999993230434572, iteration: 99328
loss: 1.0527427196502686,grad_norm: 0.999999563997167, iteration: 99329
loss: 1.1087101697921753,grad_norm: 0.9999996702523185, iteration: 99330
loss: 1.1414399147033691,grad_norm: 0.9999993349531839, iteration: 99331
loss: 1.093807339668274,grad_norm: 0.9999994070963997, iteration: 99332
loss: 1.0780493021011353,grad_norm: 0.9999996752192806, iteration: 99333
loss: 1.2361663579940796,grad_norm: 0.9999998295310275, iteration: 99334
loss: 1.3043667078018188,grad_norm: 0.9999997566437844, iteration: 99335
loss: 1.0979828834533691,grad_norm: 0.9999995843436662, iteration: 99336
loss: 1.1025257110595703,grad_norm: 0.9999994596856248, iteration: 99337
loss: 1.2876231670379639,grad_norm: 0.999999871044355, iteration: 99338
loss: 1.1684445142745972,grad_norm: 0.9999991041459098, iteration: 99339
loss: 1.2043497562408447,grad_norm: 0.999999477308445, iteration: 99340
loss: 1.087929129600525,grad_norm: 0.9999990149390594, iteration: 99341
loss: 1.0461795330047607,grad_norm: 0.8605000298143292, iteration: 99342
loss: 1.1441597938537598,grad_norm: 0.9999995532632023, iteration: 99343
loss: 1.071669340133667,grad_norm: 0.9999998499548428, iteration: 99344
loss: 1.1639314889907837,grad_norm: 0.9999994165009849, iteration: 99345
loss: 1.0647779703140259,grad_norm: 0.9999991198040621, iteration: 99346
loss: 1.019027590751648,grad_norm: 0.9519152751903625, iteration: 99347
loss: 1.0225062370300293,grad_norm: 0.9999995916921507, iteration: 99348
loss: 1.0854918956756592,grad_norm: 0.9999998189185786, iteration: 99349
loss: 1.0963560342788696,grad_norm: 0.9999990369448167, iteration: 99350
loss: 1.0294365882873535,grad_norm: 0.9999991327730129, iteration: 99351
loss: 1.064642071723938,grad_norm: 0.9999991575974894, iteration: 99352
loss: 1.0747135877609253,grad_norm: 0.9999991054966958, iteration: 99353
loss: 1.1224223375320435,grad_norm: 0.999999445100793, iteration: 99354
loss: 1.061042070388794,grad_norm: 0.999999038856405, iteration: 99355
loss: 1.2220916748046875,grad_norm: 0.9999991948997414, iteration: 99356
loss: 0.9635387063026428,grad_norm: 0.9999990733661935, iteration: 99357
loss: 1.0788331031799316,grad_norm: 0.9999993058596464, iteration: 99358
loss: 1.0443260669708252,grad_norm: 1.0000000269344702, iteration: 99359
loss: 1.113030195236206,grad_norm: 0.9999995992321244, iteration: 99360
loss: 1.0490535497665405,grad_norm: 0.9999993742645441, iteration: 99361
loss: 1.0630125999450684,grad_norm: 0.9999990327923619, iteration: 99362
loss: 1.076577067375183,grad_norm: 0.9999990868689498, iteration: 99363
loss: 0.9678241610527039,grad_norm: 0.9999992619542908, iteration: 99364
loss: 1.16960608959198,grad_norm: 0.9999994590514903, iteration: 99365
loss: 1.0046234130859375,grad_norm: 0.9999991353796948, iteration: 99366
loss: 1.008574366569519,grad_norm: 0.9999993862116795, iteration: 99367
loss: 1.076107144355774,grad_norm: 0.9999992036654991, iteration: 99368
loss: 1.12461256980896,grad_norm: 0.9999992468770138, iteration: 99369
loss: 1.1098694801330566,grad_norm: 0.9999992996334792, iteration: 99370
loss: 0.9920979738235474,grad_norm: 0.902961375177176, iteration: 99371
loss: 1.0255651473999023,grad_norm: 0.9999992448455784, iteration: 99372
loss: 1.2252994775772095,grad_norm: 0.9999993414980921, iteration: 99373
loss: 0.9924203157424927,grad_norm: 0.999999039893514, iteration: 99374
loss: 1.2156225442886353,grad_norm: 0.9999999329731216, iteration: 99375
loss: 0.9739818572998047,grad_norm: 0.9999991431352365, iteration: 99376
loss: 1.0639917850494385,grad_norm: 0.9999990618212528, iteration: 99377
loss: 1.0039730072021484,grad_norm: 0.9058011109730382, iteration: 99378
loss: 1.0273524522781372,grad_norm: 0.9999994372166259, iteration: 99379
loss: 1.1170865297317505,grad_norm: 0.9999998879239856, iteration: 99380
loss: 0.9850476384162903,grad_norm: 0.999999577022311, iteration: 99381
loss: 1.1327906847000122,grad_norm: 0.9999997159419389, iteration: 99382
loss: 1.0990556478500366,grad_norm: 0.9999995625835374, iteration: 99383
loss: 1.079121470451355,grad_norm: 1.0000000088176675, iteration: 99384
loss: 1.0452643632888794,grad_norm: 0.9299557172019183, iteration: 99385
loss: 0.9427632689476013,grad_norm: 0.9126049655204553, iteration: 99386
loss: 1.0656036138534546,grad_norm: 0.9999991268393402, iteration: 99387
loss: 1.0085549354553223,grad_norm: 0.9999991905708665, iteration: 99388
loss: 1.0705503225326538,grad_norm: 0.9999992401775052, iteration: 99389
loss: 1.0575225353240967,grad_norm: 0.9999990408456263, iteration: 99390
loss: 1.028997540473938,grad_norm: 0.8948656754323973, iteration: 99391
loss: 1.1359524726867676,grad_norm: 0.9999994149787651, iteration: 99392
loss: 1.0447896718978882,grad_norm: 0.9999993496062943, iteration: 99393
loss: 1.015381932258606,grad_norm: 0.9996715043319827, iteration: 99394
loss: 1.0328203439712524,grad_norm: 0.9188629892388414, iteration: 99395
loss: 1.096813440322876,grad_norm: 0.999999095720431, iteration: 99396
loss: 1.1075036525726318,grad_norm: 0.9999991225204318, iteration: 99397
loss: 1.039069414138794,grad_norm: 0.9775881425877162, iteration: 99398
loss: 1.0121976137161255,grad_norm: 0.8234235043643344, iteration: 99399
loss: 1.0328091382980347,grad_norm: 0.9999995756623294, iteration: 99400
loss: 1.111701488494873,grad_norm: 0.9999992481911559, iteration: 99401
loss: 1.046678066253662,grad_norm: 0.9482197187207925, iteration: 99402
loss: 1.0068169832229614,grad_norm: 0.9999991901466867, iteration: 99403
loss: 1.1398179531097412,grad_norm: 0.999999870408309, iteration: 99404
loss: 1.0353567600250244,grad_norm: 0.9999993250295449, iteration: 99405
loss: 1.0649418830871582,grad_norm: 0.9069854652267119, iteration: 99406
loss: 1.0379782915115356,grad_norm: 0.9999991411799634, iteration: 99407
loss: 1.076131820678711,grad_norm: 0.9999992343390798, iteration: 99408
loss: 1.059125542640686,grad_norm: 0.9999991812838426, iteration: 99409
loss: 1.0452980995178223,grad_norm: 0.9999994195018443, iteration: 99410
loss: 0.9882479906082153,grad_norm: 0.9999990857109928, iteration: 99411
loss: 1.2402238845825195,grad_norm: 0.999999651082008, iteration: 99412
loss: 1.0840939283370972,grad_norm: 0.9999993036051831, iteration: 99413
loss: 0.9917191863059998,grad_norm: 0.8664360501363108, iteration: 99414
loss: 1.0307375192642212,grad_norm: 0.8878943800900198, iteration: 99415
loss: 1.0695624351501465,grad_norm: 0.9171315050037478, iteration: 99416
loss: 1.173936128616333,grad_norm: 0.9999997012376229, iteration: 99417
loss: 1.0400028228759766,grad_norm: 0.9999996920218048, iteration: 99418
loss: 0.9769600033760071,grad_norm: 0.9999992367759887, iteration: 99419
loss: 1.0316435098648071,grad_norm: 0.9999993906971751, iteration: 99420
loss: 1.01931631565094,grad_norm: 0.9999990858389626, iteration: 99421
loss: 1.1243430376052856,grad_norm: 0.9999997619340653, iteration: 99422
loss: 1.0036276578903198,grad_norm: 0.9999994035849401, iteration: 99423
loss: 1.0721349716186523,grad_norm: 0.9999996702760022, iteration: 99424
loss: 1.1041756868362427,grad_norm: 0.9999994476456894, iteration: 99425
loss: 1.199730396270752,grad_norm: 0.9999999211719207, iteration: 99426
loss: 1.0105781555175781,grad_norm: 0.9999993410102358, iteration: 99427
loss: 0.9809578657150269,grad_norm: 0.8920162801912808, iteration: 99428
loss: 1.1387451887130737,grad_norm: 0.9999993879122886, iteration: 99429
loss: 1.1291186809539795,grad_norm: 0.9999995636837643, iteration: 99430
loss: 1.2864948511123657,grad_norm: 0.9999996542618593, iteration: 99431
loss: 0.9979503154754639,grad_norm: 0.9824606686858924, iteration: 99432
loss: 1.0443979501724243,grad_norm: 0.9999994598785923, iteration: 99433
loss: 1.0724027156829834,grad_norm: 0.9999993050687481, iteration: 99434
loss: 1.111407995223999,grad_norm: 0.9999997815994806, iteration: 99435
loss: 1.0413627624511719,grad_norm: 0.9999991766918525, iteration: 99436
loss: 1.1440528631210327,grad_norm: 0.9999992944569465, iteration: 99437
loss: 1.0752462148666382,grad_norm: 0.9999996165832573, iteration: 99438
loss: 1.092063307762146,grad_norm: 0.9999993939333632, iteration: 99439
loss: 1.018968105316162,grad_norm: 0.967915075624459, iteration: 99440
loss: 1.091531753540039,grad_norm: 0.9999998360289695, iteration: 99441
loss: 1.0688751935958862,grad_norm: 0.9999993876165783, iteration: 99442
loss: 0.9866546988487244,grad_norm: 0.9999996080719005, iteration: 99443
loss: 0.9973573684692383,grad_norm: 0.9726396171935014, iteration: 99444
loss: 1.0416258573532104,grad_norm: 0.948411394814737, iteration: 99445
loss: 1.0889109373092651,grad_norm: 0.999999149494515, iteration: 99446
loss: 1.067058801651001,grad_norm: 0.9999991584819181, iteration: 99447
loss: 1.2444560527801514,grad_norm: 0.9999998339628136, iteration: 99448
loss: 1.1453760862350464,grad_norm: 0.9999990973257337, iteration: 99449
loss: 0.9750296473503113,grad_norm: 0.8170432801709941, iteration: 99450
loss: 1.1660598516464233,grad_norm: 0.9999991783781258, iteration: 99451
loss: 1.1449081897735596,grad_norm: 0.9999993957676011, iteration: 99452
loss: 1.1930406093597412,grad_norm: 0.9999994798225075, iteration: 99453
loss: 1.0439363718032837,grad_norm: 0.9999999126075771, iteration: 99454
loss: 0.9961764216423035,grad_norm: 0.9999997151612076, iteration: 99455
loss: 1.099509358406067,grad_norm: 0.999999223045134, iteration: 99456
loss: 1.008933186531067,grad_norm: 0.9142368336121756, iteration: 99457
loss: 1.0349681377410889,grad_norm: 0.9999993214010388, iteration: 99458
loss: 1.020621657371521,grad_norm: 0.9588476235805775, iteration: 99459
loss: 1.1177854537963867,grad_norm: 0.9999995681740553, iteration: 99460
loss: 1.1012439727783203,grad_norm: 0.9999999065949755, iteration: 99461
loss: 1.0218218564987183,grad_norm: 0.9043233343235101, iteration: 99462
loss: 1.0670500993728638,grad_norm: 0.9999992449430387, iteration: 99463
loss: 0.9694530367851257,grad_norm: 0.9367505590606131, iteration: 99464
loss: 1.082318902015686,grad_norm: 0.9999992637765268, iteration: 99465
loss: 1.0132001638412476,grad_norm: 0.9596127885594413, iteration: 99466
loss: 1.0766220092773438,grad_norm: 0.9999990337350991, iteration: 99467
loss: 1.0381098985671997,grad_norm: 0.9999996809631134, iteration: 99468
loss: 1.0664175748825073,grad_norm: 0.9999994118098798, iteration: 99469
loss: 1.059139370918274,grad_norm: 0.9999997671698265, iteration: 99470
loss: 0.9964276552200317,grad_norm: 0.8055181618157532, iteration: 99471
loss: 1.0624430179595947,grad_norm: 0.9999993488696248, iteration: 99472
loss: 1.0371084213256836,grad_norm: 0.6956784010238524, iteration: 99473
loss: 1.015756368637085,grad_norm: 0.9999994132102301, iteration: 99474
loss: 1.0191494226455688,grad_norm: 0.9999990747263059, iteration: 99475
loss: 0.9826534390449524,grad_norm: 0.8655376490650593, iteration: 99476
loss: 1.1586123704910278,grad_norm: 0.9999995119203025, iteration: 99477
loss: 1.115760326385498,grad_norm: 0.999999254127448, iteration: 99478
loss: 1.0003405809402466,grad_norm: 0.9999995205621929, iteration: 99479
loss: 1.0436450242996216,grad_norm: 0.9999990581979505, iteration: 99480
loss: 0.9502387642860413,grad_norm: 0.7945117095537416, iteration: 99481
loss: 1.0173537731170654,grad_norm: 0.8848740768200116, iteration: 99482
loss: 1.0178855657577515,grad_norm: 0.8591869119826401, iteration: 99483
loss: 1.1109733581542969,grad_norm: 0.999999142329411, iteration: 99484
loss: 1.0390852689743042,grad_norm: 0.9382737333266695, iteration: 99485
loss: 1.0527387857437134,grad_norm: 0.8614628187443543, iteration: 99486
loss: 1.0164518356323242,grad_norm: 0.8908025693351903, iteration: 99487
loss: 1.1043531894683838,grad_norm: 0.9999993152947592, iteration: 99488
loss: 1.1434009075164795,grad_norm: 0.9999998997478354, iteration: 99489
loss: 1.0069680213928223,grad_norm: 0.8874615393479077, iteration: 99490
loss: 0.9937775731086731,grad_norm: 0.9999993961191438, iteration: 99491
loss: 1.0809653997421265,grad_norm: 0.9999993254261291, iteration: 99492
loss: 0.9991818070411682,grad_norm: 0.9697425673328977, iteration: 99493
loss: 1.0198729038238525,grad_norm: 0.9999995753177732, iteration: 99494
loss: 1.161290168762207,grad_norm: 0.9999991330146192, iteration: 99495
loss: 1.058925747871399,grad_norm: 0.8769641205178671, iteration: 99496
loss: 1.0073628425598145,grad_norm: 0.9568693614874699, iteration: 99497
loss: 1.0743240118026733,grad_norm: 0.9999990523496547, iteration: 99498
loss: 1.0747380256652832,grad_norm: 0.9999993503232666, iteration: 99499
loss: 1.1101024150848389,grad_norm: 0.999999753655906, iteration: 99500
loss: 1.0535470247268677,grad_norm: 0.9999992528054528, iteration: 99501
loss: 1.0536216497421265,grad_norm: 0.9365052259337077, iteration: 99502
loss: 1.0369709730148315,grad_norm: 0.999999160891469, iteration: 99503
loss: 1.1837371587753296,grad_norm: 0.9999998407457896, iteration: 99504
loss: 1.0489420890808105,grad_norm: 0.9999999812170957, iteration: 99505
loss: 1.0636223554611206,grad_norm: 0.9999995865220385, iteration: 99506
loss: 1.0918725728988647,grad_norm: 0.9999996282833131, iteration: 99507
loss: 0.9998248815536499,grad_norm: 0.9999990742265102, iteration: 99508
loss: 0.9820053577423096,grad_norm: 0.8971871317291907, iteration: 99509
loss: 1.035702109336853,grad_norm: 0.9999991126900621, iteration: 99510
loss: 1.0120195150375366,grad_norm: 0.7527870569505521, iteration: 99511
loss: 1.112176537513733,grad_norm: 0.9999995990741113, iteration: 99512
loss: 1.0074130296707153,grad_norm: 0.9999999027887916, iteration: 99513
loss: 1.052842378616333,grad_norm: 0.9723108219552712, iteration: 99514
loss: 1.0090484619140625,grad_norm: 0.9999998419054078, iteration: 99515
loss: 1.0646052360534668,grad_norm: 0.9999990909090793, iteration: 99516
loss: 1.0155460834503174,grad_norm: 0.7855052508602052, iteration: 99517
loss: 1.0496034622192383,grad_norm: 0.999999446902265, iteration: 99518
loss: 1.0003488063812256,grad_norm: 0.885368247434123, iteration: 99519
loss: 1.0424795150756836,grad_norm: 0.9999992882658646, iteration: 99520
loss: 0.9704449772834778,grad_norm: 0.9999993247122677, iteration: 99521
loss: 1.0719515085220337,grad_norm: 0.8474176255669758, iteration: 99522
loss: 1.0274325609207153,grad_norm: 0.9999992335943629, iteration: 99523
loss: 1.0484042167663574,grad_norm: 0.9999993291994492, iteration: 99524
loss: 0.9930362105369568,grad_norm: 0.9124316785769605, iteration: 99525
loss: 1.0797767639160156,grad_norm: 0.9039730740359578, iteration: 99526
loss: 0.9979798197746277,grad_norm: 0.9999993149755222, iteration: 99527
loss: 1.021618366241455,grad_norm: 0.7968882216270876, iteration: 99528
loss: 0.98297518491745,grad_norm: 0.9999991028267233, iteration: 99529
loss: 1.0177597999572754,grad_norm: 0.9036282660215381, iteration: 99530
loss: 1.0276533365249634,grad_norm: 0.8234481946234681, iteration: 99531
loss: 1.0698375701904297,grad_norm: 0.9999997174136868, iteration: 99532
loss: 1.0949605703353882,grad_norm: 0.9999993171580519, iteration: 99533
loss: 1.043926477432251,grad_norm: 0.9999998710339387, iteration: 99534
loss: 1.0448836088180542,grad_norm: 0.9999992410954457, iteration: 99535
loss: 1.1164625883102417,grad_norm: 0.9999991677093487, iteration: 99536
loss: 1.0298830270767212,grad_norm: 0.8918708045817771, iteration: 99537
loss: 1.0159481763839722,grad_norm: 0.9749815221018043, iteration: 99538
loss: 1.0597574710845947,grad_norm: 0.9999997142190672, iteration: 99539
loss: 1.0057051181793213,grad_norm: 0.845679869030418, iteration: 99540
loss: 1.0908827781677246,grad_norm: 0.9999997444957224, iteration: 99541
loss: 1.0763229131698608,grad_norm: 0.9999998308269454, iteration: 99542
loss: 1.0073925256729126,grad_norm: 0.9995167810845087, iteration: 99543
loss: 1.021243691444397,grad_norm: 0.9999991960437205, iteration: 99544
loss: 1.0469781160354614,grad_norm: 0.9999991048397683, iteration: 99545
loss: 1.0951781272888184,grad_norm: 0.984055347266233, iteration: 99546
loss: 1.4036555290222168,grad_norm: 0.999999766907343, iteration: 99547
loss: 1.0006647109985352,grad_norm: 0.8247990033449358, iteration: 99548
loss: 1.0862014293670654,grad_norm: 0.9999997077250691, iteration: 99549
loss: 1.0125247240066528,grad_norm: 0.9999998577690116, iteration: 99550
loss: 1.086970329284668,grad_norm: 0.9999995159246585, iteration: 99551
loss: 1.1362113952636719,grad_norm: 0.9999997428064661, iteration: 99552
loss: 1.0168179273605347,grad_norm: 0.9502647152987171, iteration: 99553
loss: 1.0144129991531372,grad_norm: 0.9999992718827344, iteration: 99554
loss: 1.110665202140808,grad_norm: 0.9999995421004995, iteration: 99555
loss: 1.0345150232315063,grad_norm: 0.9999999449383771, iteration: 99556
loss: 1.0543767213821411,grad_norm: 0.9999991451899619, iteration: 99557
loss: 1.0017576217651367,grad_norm: 0.797543708292217, iteration: 99558
loss: 1.0249603986740112,grad_norm: 0.9999995831621171, iteration: 99559
loss: 1.09429132938385,grad_norm: 0.9999995386725594, iteration: 99560
loss: 1.0066568851470947,grad_norm: 0.8743714418161589, iteration: 99561
loss: 0.9647281169891357,grad_norm: 0.8778486366338639, iteration: 99562
loss: 1.156400442123413,grad_norm: 0.9999994940743289, iteration: 99563
loss: 0.9723524451255798,grad_norm: 0.947211865450813, iteration: 99564
loss: 1.1091946363449097,grad_norm: 0.9999994827540952, iteration: 99565
loss: 0.9784594774246216,grad_norm: 0.9999990170689055, iteration: 99566
loss: 1.1738417148590088,grad_norm: 0.9999997457356468, iteration: 99567
loss: 1.0760244131088257,grad_norm: 0.9999993509872718, iteration: 99568
loss: 0.9954232573509216,grad_norm: 0.957207991091605, iteration: 99569
loss: 0.9724131226539612,grad_norm: 0.8391519067968345, iteration: 99570
loss: 1.051793098449707,grad_norm: 0.9775063540809956, iteration: 99571
loss: 1.016160488128662,grad_norm: 0.9301509969399557, iteration: 99572
loss: 0.9980372786521912,grad_norm: 0.9999991087685702, iteration: 99573
loss: 1.0451955795288086,grad_norm: 0.971606838315668, iteration: 99574
loss: 1.1771526336669922,grad_norm: 0.9999996847694519, iteration: 99575
loss: 1.3979960680007935,grad_norm: 0.9999999600369691, iteration: 99576
loss: 1.1571834087371826,grad_norm: 0.9263729432787989, iteration: 99577
loss: 1.0844752788543701,grad_norm: 0.9999998288693572, iteration: 99578
loss: 1.1171168088912964,grad_norm: 0.9999994984417008, iteration: 99579
loss: 1.0799978971481323,grad_norm: 0.9999995034014053, iteration: 99580
loss: 1.0061120986938477,grad_norm: 0.8673204216697415, iteration: 99581
loss: 1.014060378074646,grad_norm: 0.9999991208348435, iteration: 99582
loss: 1.1554614305496216,grad_norm: 0.9999993806397482, iteration: 99583
loss: 1.1204129457473755,grad_norm: 0.9999992266704328, iteration: 99584
loss: 1.0503171682357788,grad_norm: 0.9999995594208163, iteration: 99585
loss: 1.1299996376037598,grad_norm: 0.9999993221829662, iteration: 99586
loss: 1.0390349626541138,grad_norm: 0.9871710951211355, iteration: 99587
loss: 1.059603214263916,grad_norm: 0.9999993755842858, iteration: 99588
loss: 1.0770636796951294,grad_norm: 0.9999993252783322, iteration: 99589
loss: 1.2243093252182007,grad_norm: 0.9999993972843508, iteration: 99590
loss: 1.0048115253448486,grad_norm: 0.9999990786939864, iteration: 99591
loss: 1.1001900434494019,grad_norm: 0.9999991973537121, iteration: 99592
loss: 1.163009524345398,grad_norm: 0.9999992722030426, iteration: 99593
loss: 1.0887048244476318,grad_norm: 0.9999992544684481, iteration: 99594
loss: 1.0343616008758545,grad_norm: 0.8193729712308988, iteration: 99595
loss: 1.0399115085601807,grad_norm: 0.7895822671790919, iteration: 99596
loss: 1.1395516395568848,grad_norm: 0.9999991150642248, iteration: 99597
loss: 1.0789786577224731,grad_norm: 0.9999999422997008, iteration: 99598
loss: 1.0563035011291504,grad_norm: 0.9999998874932178, iteration: 99599
loss: 1.0789501667022705,grad_norm: 0.9999992362162868, iteration: 99600
loss: 1.206794261932373,grad_norm: 1.0000000599455634, iteration: 99601
loss: 1.0527825355529785,grad_norm: 0.9464904843283382, iteration: 99602
loss: 1.0109890699386597,grad_norm: 0.9999992196953675, iteration: 99603
loss: 1.062900424003601,grad_norm: 0.9999998731204423, iteration: 99604
loss: 1.0648064613342285,grad_norm: 0.999999943814053, iteration: 99605
loss: 1.0631325244903564,grad_norm: 0.9999995045355429, iteration: 99606
loss: 1.009522795677185,grad_norm: 0.9999992103933169, iteration: 99607
loss: 0.992770254611969,grad_norm: 0.9000392668029488, iteration: 99608
loss: 1.0568519830703735,grad_norm: 0.9999993322560995, iteration: 99609
loss: 1.0057616233825684,grad_norm: 0.9999992016655508, iteration: 99610
loss: 1.259569764137268,grad_norm: 0.9999997738609244, iteration: 99611
loss: 0.9995959997177124,grad_norm: 0.9999991632253299, iteration: 99612
loss: 1.0133635997772217,grad_norm: 0.9999996146421797, iteration: 99613
loss: 1.0930759906768799,grad_norm: 0.9999993199777676, iteration: 99614
loss: 1.0095001459121704,grad_norm: 0.948037192464155, iteration: 99615
loss: 1.123599886894226,grad_norm: 0.9999998945611618, iteration: 99616
loss: 1.0448863506317139,grad_norm: 0.9999991520176527, iteration: 99617
loss: 1.1055716276168823,grad_norm: 0.9999992977004002, iteration: 99618
loss: 1.019993782043457,grad_norm: 0.999999176234777, iteration: 99619
loss: 1.010202169418335,grad_norm: 0.9999991370122049, iteration: 99620
loss: 1.0120640993118286,grad_norm: 0.9999997464011509, iteration: 99621
loss: 1.104026198387146,grad_norm: 0.9999994921775155, iteration: 99622
loss: 1.0012761354446411,grad_norm: 0.9999995754259903, iteration: 99623
loss: 0.9866225719451904,grad_norm: 0.9999992456915304, iteration: 99624
loss: 1.0737028121948242,grad_norm: 0.9999992057106452, iteration: 99625
loss: 1.101678729057312,grad_norm: 0.9999995107677769, iteration: 99626
loss: 1.0428236722946167,grad_norm: 0.9999991672225003, iteration: 99627
loss: 1.0141257047653198,grad_norm: 0.9999992265167468, iteration: 99628
loss: 1.0211647748947144,grad_norm: 0.937388712635563, iteration: 99629
loss: 1.0213488340377808,grad_norm: 0.9825456767798183, iteration: 99630
loss: 1.037930965423584,grad_norm: 0.9999991238639755, iteration: 99631
loss: 1.019149899482727,grad_norm: 0.9999990630911787, iteration: 99632
loss: 1.1176351308822632,grad_norm: 0.9999997434713963, iteration: 99633
loss: 1.0283626317977905,grad_norm: 0.9999991507523218, iteration: 99634
loss: 1.039175271987915,grad_norm: 0.9999990911952964, iteration: 99635
loss: 1.038111686706543,grad_norm: 0.9013330373283113, iteration: 99636
loss: 1.1289790868759155,grad_norm: 0.9999991780278605, iteration: 99637
loss: 0.9461163878440857,grad_norm: 0.8305377060428808, iteration: 99638
loss: 1.0408276319503784,grad_norm: 0.9999998372054775, iteration: 99639
loss: 1.0999081134796143,grad_norm: 0.9999995901393411, iteration: 99640
loss: 1.0487864017486572,grad_norm: 0.999999762978472, iteration: 99641
loss: 1.0716973543167114,grad_norm: 0.9999994139781366, iteration: 99642
loss: 1.0507502555847168,grad_norm: 0.9999995953850532, iteration: 99643
loss: 1.0863837003707886,grad_norm: 0.9999992627168562, iteration: 99644
loss: 1.013834834098816,grad_norm: 0.9999989376480786, iteration: 99645
loss: 1.0464354753494263,grad_norm: 0.9037962416826635, iteration: 99646
loss: 0.9883458018302917,grad_norm: 0.9999991232253194, iteration: 99647
loss: 1.0830577611923218,grad_norm: 0.9999993535159549, iteration: 99648
loss: 1.097132682800293,grad_norm: 0.9999998488756369, iteration: 99649
loss: 0.9820564985275269,grad_norm: 0.843987064171394, iteration: 99650
loss: 1.092902421951294,grad_norm: 0.9999994459916134, iteration: 99651
loss: 1.0166651010513306,grad_norm: 0.8430310236061319, iteration: 99652
loss: 1.0486115217208862,grad_norm: 0.9999999566392055, iteration: 99653
loss: 1.1029759645462036,grad_norm: 1.0000000683839843, iteration: 99654
loss: 1.0435155630111694,grad_norm: 0.9898628474592033, iteration: 99655
loss: 1.0335990190505981,grad_norm: 0.9225109397556212, iteration: 99656
loss: 1.093362808227539,grad_norm: 0.999999157958303, iteration: 99657
loss: 1.0576319694519043,grad_norm: 0.9999992137508593, iteration: 99658
loss: 1.1315585374832153,grad_norm: 0.9999992222007648, iteration: 99659
loss: 1.0095551013946533,grad_norm: 0.9999990791178818, iteration: 99660
loss: 1.0027519464492798,grad_norm: 0.9530742848234256, iteration: 99661
loss: 1.12185537815094,grad_norm: 0.999999916725138, iteration: 99662
loss: 1.0147385597229004,grad_norm: 0.9900230934405355, iteration: 99663
loss: 0.9810978174209595,grad_norm: 0.9528621760218163, iteration: 99664
loss: 1.0805414915084839,grad_norm: 0.9999993154841635, iteration: 99665
loss: 1.1210297346115112,grad_norm: 0.9999997587861065, iteration: 99666
loss: 1.0367276668548584,grad_norm: 0.9999997201909524, iteration: 99667
loss: 1.0577869415283203,grad_norm: 0.9999995474275276, iteration: 99668
loss: 1.0717260837554932,grad_norm: 0.9999991186362853, iteration: 99669
loss: 1.1055413484573364,grad_norm: 0.9999994671585037, iteration: 99670
loss: 1.1039979457855225,grad_norm: 0.9999996208364552, iteration: 99671
loss: 1.1402372121810913,grad_norm: 0.9999999183533731, iteration: 99672
loss: 1.0755573511123657,grad_norm: 0.999999959348125, iteration: 99673
loss: 1.0307995080947876,grad_norm: 0.9999996770853022, iteration: 99674
loss: 1.1069083213806152,grad_norm: 0.9560383479648832, iteration: 99675
loss: 1.034087061882019,grad_norm: 0.9999990674191697, iteration: 99676
loss: 1.0084348917007446,grad_norm: 0.9251981217999822, iteration: 99677
loss: 1.0428557395935059,grad_norm: 0.9999994287753963, iteration: 99678
loss: 1.026076316833496,grad_norm: 0.8427581596986223, iteration: 99679
loss: 1.132555365562439,grad_norm: 0.9999998992074756, iteration: 99680
loss: 1.1259740591049194,grad_norm: 0.9999997589083343, iteration: 99681
loss: 1.0596449375152588,grad_norm: 0.9999998736945522, iteration: 99682
loss: 1.1354612112045288,grad_norm: 0.9999993885370975, iteration: 99683
loss: 1.1911011934280396,grad_norm: 0.9999997756975977, iteration: 99684
loss: 1.131473183631897,grad_norm: 0.9999998644586284, iteration: 99685
loss: 1.0417814254760742,grad_norm: 0.9999993997302696, iteration: 99686
loss: 1.18071711063385,grad_norm: 0.9999998756785073, iteration: 99687
loss: 1.020578384399414,grad_norm: 0.9999990181141329, iteration: 99688
loss: 1.1063958406448364,grad_norm: 0.9999991613897952, iteration: 99689
loss: 1.2075872421264648,grad_norm: 0.9999998571327904, iteration: 99690
loss: 1.0067120790481567,grad_norm: 0.9999991482165785, iteration: 99691
loss: 1.1859928369522095,grad_norm: 0.9999995320926639, iteration: 99692
loss: 1.080089807510376,grad_norm: 0.999999256537985, iteration: 99693
loss: 1.0717660188674927,grad_norm: 0.9374901984640497, iteration: 99694
loss: 0.9991173148155212,grad_norm: 0.9999991547368839, iteration: 99695
loss: 1.0273154973983765,grad_norm: 0.9353972778000267, iteration: 99696
loss: 1.067091703414917,grad_norm: 0.9999998559453287, iteration: 99697
loss: 1.1016980409622192,grad_norm: 0.9999997984926287, iteration: 99698
loss: 1.1009808778762817,grad_norm: 0.9999996867760863, iteration: 99699
loss: 1.1596864461898804,grad_norm: 0.9999996164812817, iteration: 99700
loss: 1.1123344898223877,grad_norm: 1.0000000424467317, iteration: 99701
loss: 1.0866436958312988,grad_norm: 1.0000000112972482, iteration: 99702
loss: 1.054642677307129,grad_norm: 0.9999997810915546, iteration: 99703
loss: 1.0521527528762817,grad_norm: 0.9999992706991193, iteration: 99704
loss: 0.9713878631591797,grad_norm: 0.7776282602855684, iteration: 99705
loss: 1.101744532585144,grad_norm: 1.000000024026155, iteration: 99706
loss: 1.1824547052383423,grad_norm: 0.9999991433441503, iteration: 99707
loss: 1.1487863063812256,grad_norm: 0.9999995054724911, iteration: 99708
loss: 1.1274923086166382,grad_norm: 0.9999996410674935, iteration: 99709
loss: 1.0218963623046875,grad_norm: 0.9999989882647077, iteration: 99710
loss: 1.1756073236465454,grad_norm: 0.9999997582387357, iteration: 99711
loss: 1.0848474502563477,grad_norm: 0.933493843540927, iteration: 99712
loss: 1.143524408340454,grad_norm: 0.9999994540421298, iteration: 99713
loss: 1.0146749019622803,grad_norm: 0.9999992506438081, iteration: 99714
loss: 1.028435230255127,grad_norm: 0.9123044233473049, iteration: 99715
loss: 1.1461724042892456,grad_norm: 0.9999993651594051, iteration: 99716
loss: 1.3258949518203735,grad_norm: 0.9999998744394306, iteration: 99717
loss: 1.0669234991073608,grad_norm: 0.9999996300826303, iteration: 99718
loss: 1.1391412019729614,grad_norm: 0.9725092727979677, iteration: 99719
loss: 1.1039735078811646,grad_norm: 0.9999990306246974, iteration: 99720
loss: 1.0180399417877197,grad_norm: 0.8122290002967306, iteration: 99721
loss: 1.0873074531555176,grad_norm: 0.9999997735665661, iteration: 99722
loss: 1.2388098239898682,grad_norm: 0.9999999613953185, iteration: 99723
loss: 1.066855549812317,grad_norm: 0.9999990288634799, iteration: 99724
loss: 1.0025737285614014,grad_norm: 0.9999991659050695, iteration: 99725
loss: 1.057865858078003,grad_norm: 0.9373463152422072, iteration: 99726
loss: 1.126565933227539,grad_norm: 0.9999999935173812, iteration: 99727
loss: 1.0370091199874878,grad_norm: 0.9492840448119176, iteration: 99728
loss: 1.077712059020996,grad_norm: 0.9999992825693893, iteration: 99729
loss: 1.0485459566116333,grad_norm: 0.9999996757103424, iteration: 99730
loss: 1.1093088388442993,grad_norm: 0.9999993146085808, iteration: 99731
loss: 1.0340056419372559,grad_norm: 0.9999992517379368, iteration: 99732
loss: 1.0615346431732178,grad_norm: 0.9999995382420079, iteration: 99733
loss: 1.0364123582839966,grad_norm: 0.9714945552522489, iteration: 99734
loss: 1.0408670902252197,grad_norm: 0.9999992156359712, iteration: 99735
loss: 1.05124831199646,grad_norm: 0.9123393848957466, iteration: 99736
loss: 0.9876700043678284,grad_norm: 0.9176791935196411, iteration: 99737
loss: 1.118954062461853,grad_norm: 0.9730563619479579, iteration: 99738
loss: 1.054113507270813,grad_norm: 0.9999991655273404, iteration: 99739
loss: 1.1171482801437378,grad_norm: 0.9999993846033544, iteration: 99740
loss: 1.016343116760254,grad_norm: 0.9698614616548246, iteration: 99741
loss: 1.061691403388977,grad_norm: 0.9674368452119283, iteration: 99742
loss: 1.041720986366272,grad_norm: 0.9999991746030462, iteration: 99743
loss: 1.0547668933868408,grad_norm: 0.9941049948611534, iteration: 99744
loss: 0.997847855091095,grad_norm: 0.9999991748304435, iteration: 99745
loss: 1.0513180494308472,grad_norm: 0.9999997178961667, iteration: 99746
loss: 1.1275272369384766,grad_norm: 0.9999994776515608, iteration: 99747
loss: 1.0182420015335083,grad_norm: 0.999999453762453, iteration: 99748
loss: 1.0464776754379272,grad_norm: 0.9999990135933842, iteration: 99749
loss: 1.0139983892440796,grad_norm: 0.9999991764550858, iteration: 99750
loss: 0.9916244149208069,grad_norm: 0.8997636373179494, iteration: 99751
loss: 1.0162851810455322,grad_norm: 0.9999991982270998, iteration: 99752
loss: 1.1166685819625854,grad_norm: 0.9999992847044817, iteration: 99753
loss: 1.0019065141677856,grad_norm: 0.9036241776344099, iteration: 99754
loss: 1.0347888469696045,grad_norm: 0.999998973737606, iteration: 99755
loss: 1.008336067199707,grad_norm: 0.9440636705887692, iteration: 99756
loss: 0.9654446840286255,grad_norm: 0.9999990776436563, iteration: 99757
loss: 1.1393986940383911,grad_norm: 0.9999997068601568, iteration: 99758
loss: 1.0496848821640015,grad_norm: 0.9999990945960978, iteration: 99759
loss: 1.1096267700195312,grad_norm: 0.9999998813476029, iteration: 99760
loss: 0.9866353869438171,grad_norm: 0.9146310737791006, iteration: 99761
loss: 1.0865280628204346,grad_norm: 0.999999794620631, iteration: 99762
loss: 0.9736655950546265,grad_norm: 0.9999992009547516, iteration: 99763
loss: 1.0313423871994019,grad_norm: 0.999999188309668, iteration: 99764
loss: 1.03745436668396,grad_norm: 0.9634617403350517, iteration: 99765
loss: 1.0435556173324585,grad_norm: 0.9999990202981296, iteration: 99766
loss: 1.0046011209487915,grad_norm: 0.9999992069906295, iteration: 99767
loss: 0.9679518342018127,grad_norm: 0.999999042755378, iteration: 99768
loss: 1.134297251701355,grad_norm: 0.9999992736507956, iteration: 99769
loss: 1.0311442613601685,grad_norm: 0.8050383531793229, iteration: 99770
loss: 0.9638589024543762,grad_norm: 0.8643644651507615, iteration: 99771
loss: 1.0730137825012207,grad_norm: 0.9999995495532121, iteration: 99772
loss: 1.0352076292037964,grad_norm: 0.9430792901037811, iteration: 99773
loss: 1.0372532606124878,grad_norm: 0.9999990834870567, iteration: 99774
loss: 1.1723957061767578,grad_norm: 0.9999997604496863, iteration: 99775
loss: 1.0111280679702759,grad_norm: 0.9810026706809787, iteration: 99776
loss: 1.0229787826538086,grad_norm: 0.9999997111862986, iteration: 99777
loss: 1.052856206893921,grad_norm: 0.941031063105672, iteration: 99778
loss: 1.2314761877059937,grad_norm: 0.9999999703978764, iteration: 99779
loss: 1.0825809240341187,grad_norm: 0.9999999320791986, iteration: 99780
loss: 1.0390604734420776,grad_norm: 0.999999098551271, iteration: 99781
loss: 0.9917767643928528,grad_norm: 0.9629909165249795, iteration: 99782
loss: 1.0631023645401,grad_norm: 0.9999990810026642, iteration: 99783
loss: 1.1396843194961548,grad_norm: 0.9999995422374762, iteration: 99784
loss: 1.128031849861145,grad_norm: 0.9999998858665322, iteration: 99785
loss: 1.078527808189392,grad_norm: 0.9999995098665849, iteration: 99786
loss: 1.034263014793396,grad_norm: 0.9999990703082947, iteration: 99787
loss: 0.9563924670219421,grad_norm: 0.8430899295402765, iteration: 99788
loss: 1.0797029733657837,grad_norm: 0.9999998261424939, iteration: 99789
loss: 1.0466989278793335,grad_norm: 0.9999998056222029, iteration: 99790
loss: 1.1366117000579834,grad_norm: 0.9999998707600138, iteration: 99791
loss: 0.9829361438751221,grad_norm: 0.9999990389676087, iteration: 99792
loss: 1.0630764961242676,grad_norm: 0.9599529895900805, iteration: 99793
loss: 1.0853101015090942,grad_norm: 0.9999996431361567, iteration: 99794
loss: 1.0696642398834229,grad_norm: 0.99999955249329, iteration: 99795
loss: 1.0132739543914795,grad_norm: 0.9999991874623838, iteration: 99796
loss: 1.0924293994903564,grad_norm: 0.9999991536585786, iteration: 99797
loss: 1.0950862169265747,grad_norm: 0.9999998352556002, iteration: 99798
loss: 0.9707817435264587,grad_norm: 0.9999991142434207, iteration: 99799
loss: 1.0132211446762085,grad_norm: 0.945783674932992, iteration: 99800
loss: 1.1510603427886963,grad_norm: 0.9999993713985059, iteration: 99801
loss: 1.0199323892593384,grad_norm: 0.999999120163552, iteration: 99802
loss: 1.070542812347412,grad_norm: 0.9999996382836877, iteration: 99803
loss: 0.986151933670044,grad_norm: 0.8846994841728035, iteration: 99804
loss: 1.0065525770187378,grad_norm: 0.857609344787456, iteration: 99805
loss: 0.972364068031311,grad_norm: 0.9999990350966257, iteration: 99806
loss: 1.0944863557815552,grad_norm: 0.9999994117200087, iteration: 99807
loss: 1.0538439750671387,grad_norm: 0.970456656626915, iteration: 99808
loss: 1.026522159576416,grad_norm: 0.999999381094358, iteration: 99809
loss: 1.0141199827194214,grad_norm: 0.999999343429269, iteration: 99810
loss: 1.0020551681518555,grad_norm: 0.7602500278367197, iteration: 99811
loss: 1.1091008186340332,grad_norm: 0.999999390985932, iteration: 99812
loss: 1.0344711542129517,grad_norm: 0.999999656654451, iteration: 99813
loss: 1.0436100959777832,grad_norm: 0.9999994389297631, iteration: 99814
loss: 0.9805775284767151,grad_norm: 0.9999990470673433, iteration: 99815
loss: 1.0074162483215332,grad_norm: 0.9483454544782931, iteration: 99816
loss: 1.0217537879943848,grad_norm: 0.8909393304546926, iteration: 99817
loss: 0.9783650040626526,grad_norm: 0.9747098492285627, iteration: 99818
loss: 1.030378818511963,grad_norm: 0.999999839246007, iteration: 99819
loss: 1.084004282951355,grad_norm: 0.9999998106782196, iteration: 99820
loss: 1.089496374130249,grad_norm: 0.9999993658094284, iteration: 99821
loss: 0.9937838315963745,grad_norm: 0.999999372730443, iteration: 99822
loss: 1.0083744525909424,grad_norm: 0.9999993216686458, iteration: 99823
loss: 1.091628909111023,grad_norm: 0.9999991947454245, iteration: 99824
loss: 1.0345853567123413,grad_norm: 0.9999992488460336, iteration: 99825
loss: 0.996682345867157,grad_norm: 0.9999991918204498, iteration: 99826
loss: 0.9788495898246765,grad_norm: 0.9999992699925518, iteration: 99827
loss: 0.9838303923606873,grad_norm: 0.9008330993474875, iteration: 99828
loss: 1.0334014892578125,grad_norm: 0.9999993448720192, iteration: 99829
loss: 1.0650886297225952,grad_norm: 0.9999994132070698, iteration: 99830
loss: 1.0278587341308594,grad_norm: 0.9999991363420825, iteration: 99831
loss: 1.0045510530471802,grad_norm: 0.9693439654005214, iteration: 99832
loss: 1.0998592376708984,grad_norm: 0.999999598414703, iteration: 99833
loss: 1.017511010169983,grad_norm: 0.9999989859004849, iteration: 99834
loss: 1.0298235416412354,grad_norm: 0.9999991841131679, iteration: 99835
loss: 1.0486204624176025,grad_norm: 0.9999991429631688, iteration: 99836
loss: 1.0296906232833862,grad_norm: 0.9999997184278498, iteration: 99837
loss: 1.0468595027923584,grad_norm: 0.9999995871397169, iteration: 99838
loss: 1.001093864440918,grad_norm: 0.7650735791606439, iteration: 99839
loss: 0.9950501322746277,grad_norm: 0.8610732124023187, iteration: 99840
loss: 1.0800437927246094,grad_norm: 0.9999993145969459, iteration: 99841
loss: 1.1010627746582031,grad_norm: 0.9999994503798761, iteration: 99842
loss: 1.1372652053833008,grad_norm: 0.9999999124363798, iteration: 99843
loss: 1.082855463027954,grad_norm: 0.9999994279211921, iteration: 99844
loss: 1.000847339630127,grad_norm: 0.9999991675028641, iteration: 99845
loss: 1.0911809206008911,grad_norm: 0.9999997889414877, iteration: 99846
loss: 1.115166187286377,grad_norm: 0.9999998686356086, iteration: 99847
loss: 1.000252604484558,grad_norm: 0.8623731116759826, iteration: 99848
loss: 1.0139018297195435,grad_norm: 0.9999990231265092, iteration: 99849
loss: 1.1093038320541382,grad_norm: 0.999999539887337, iteration: 99850
loss: 1.114600419998169,grad_norm: 0.9999996530237433, iteration: 99851
loss: 1.014849066734314,grad_norm: 0.9999990559902474, iteration: 99852
loss: 0.9885452389717102,grad_norm: 0.9606264818198791, iteration: 99853
loss: 0.9790990948677063,grad_norm: 0.9999990840680987, iteration: 99854
loss: 1.0013859272003174,grad_norm: 0.9702828828199178, iteration: 99855
loss: 1.0114693641662598,grad_norm: 0.9999992193396781, iteration: 99856
loss: 1.0282975435256958,grad_norm: 0.9999996231194358, iteration: 99857
loss: 1.072203516960144,grad_norm: 0.999999162155509, iteration: 99858
loss: 1.0337963104248047,grad_norm: 0.9985182846579503, iteration: 99859
loss: 1.0045411586761475,grad_norm: 0.9999991117692698, iteration: 99860
loss: 1.0538042783737183,grad_norm: 0.9999998101801615, iteration: 99861
loss: 0.9993930459022522,grad_norm: 0.9999990286118623, iteration: 99862
loss: 1.0478770732879639,grad_norm: 0.9999995144914353, iteration: 99863
loss: 1.029726505279541,grad_norm: 0.9999990539358747, iteration: 99864
loss: 0.9966439604759216,grad_norm: 0.9999992327608936, iteration: 99865
loss: 0.9745886325836182,grad_norm: 0.9999991476400276, iteration: 99866
loss: 1.1582884788513184,grad_norm: 0.9999998148467528, iteration: 99867
loss: 0.977167546749115,grad_norm: 0.9673598049095754, iteration: 99868
loss: 1.1014379262924194,grad_norm: 0.999999330823334, iteration: 99869
loss: 1.0114309787750244,grad_norm: 0.7448453782780932, iteration: 99870
loss: 1.0689995288848877,grad_norm: 0.9999995426390623, iteration: 99871
loss: 1.032680630683899,grad_norm: 0.9999996377427018, iteration: 99872
loss: 1.0169519186019897,grad_norm: 0.9763205208438354, iteration: 99873
loss: 1.034997582435608,grad_norm: 0.9999992748274134, iteration: 99874
loss: 0.981594443321228,grad_norm: 0.9999995109602003, iteration: 99875
loss: 1.0544416904449463,grad_norm: 0.9999997243195493, iteration: 99876
loss: 1.0103822946548462,grad_norm: 0.9999991545541492, iteration: 99877
loss: 1.0169013738632202,grad_norm: 0.9768635287012137, iteration: 99878
loss: 1.0043319463729858,grad_norm: 0.9999992485409746, iteration: 99879
loss: 1.0192158222198486,grad_norm: 0.9999991952284957, iteration: 99880
loss: 1.0486102104187012,grad_norm: 0.9999992369869307, iteration: 99881
loss: 1.056703805923462,grad_norm: 0.99999912397638, iteration: 99882
loss: 1.0440529584884644,grad_norm: 0.8274694968962001, iteration: 99883
loss: 1.196385145187378,grad_norm: 0.9999996385838247, iteration: 99884
loss: 1.088501214981079,grad_norm: 0.9999993003965579, iteration: 99885
loss: 1.056221604347229,grad_norm: 0.9999997707944854, iteration: 99886
loss: 1.048905372619629,grad_norm: 0.9999989654769172, iteration: 99887
loss: 0.9634706377983093,grad_norm: 0.9999990958513206, iteration: 99888
loss: 1.0835527181625366,grad_norm: 0.9999997405333888, iteration: 99889
loss: 1.0039888620376587,grad_norm: 0.9999991303260535, iteration: 99890
loss: 1.028437614440918,grad_norm: 0.8576324049255192, iteration: 99891
loss: 1.0532909631729126,grad_norm: 0.9999991815442639, iteration: 99892
loss: 1.0473984479904175,grad_norm: 0.9999990728445687, iteration: 99893
loss: 1.0065568685531616,grad_norm: 0.8469108950762247, iteration: 99894
loss: 1.0290437936782837,grad_norm: 1.0000000223352858, iteration: 99895
loss: 0.9860700368881226,grad_norm: 0.803972888618724, iteration: 99896
loss: 1.0378612279891968,grad_norm: 0.9999999496468663, iteration: 99897
loss: 0.9364166259765625,grad_norm: 0.9305898745617787, iteration: 99898
loss: 0.9899362325668335,grad_norm: 0.9999996812824734, iteration: 99899
loss: 1.0281673669815063,grad_norm: 0.9999992313394832, iteration: 99900
loss: 1.0864344835281372,grad_norm: 0.9999991592798766, iteration: 99901
loss: 1.1903427839279175,grad_norm: 0.9999998527974323, iteration: 99902
loss: 1.049381136894226,grad_norm: 0.999999038225763, iteration: 99903
loss: 0.9697375893592834,grad_norm: 0.7865496944292426, iteration: 99904
loss: 1.0518205165863037,grad_norm: 0.999999747367629, iteration: 99905
loss: 1.0567336082458496,grad_norm: 0.8832016393149491, iteration: 99906
loss: 1.035591959953308,grad_norm: 0.9999994760510474, iteration: 99907
loss: 0.9685469269752502,grad_norm: 0.9404210513842767, iteration: 99908
loss: 1.0325437784194946,grad_norm: 0.9999998910456867, iteration: 99909
loss: 1.1297153234481812,grad_norm: 0.9999993430740135, iteration: 99910
loss: 1.001861333847046,grad_norm: 0.7985741561705227, iteration: 99911
loss: 1.0555561780929565,grad_norm: 0.9999997015231123, iteration: 99912
loss: 1.103559136390686,grad_norm: 0.9999998048351285, iteration: 99913
loss: 1.0242668390274048,grad_norm: 0.9999992663065737, iteration: 99914
loss: 1.0085270404815674,grad_norm: 0.9999992314215752, iteration: 99915
loss: 0.9870823621749878,grad_norm: 0.905222392425147, iteration: 99916
loss: 1.0802675485610962,grad_norm: 0.999999607313594, iteration: 99917
loss: 0.9811056852340698,grad_norm: 0.9999999167526772, iteration: 99918
loss: 0.9737558960914612,grad_norm: 0.9114527954379232, iteration: 99919
loss: 1.0766741037368774,grad_norm: 0.9999993499474943, iteration: 99920
loss: 1.0388569831848145,grad_norm: 0.999999582113876, iteration: 99921
loss: 1.0553839206695557,grad_norm: 0.9999998078328648, iteration: 99922
loss: 1.0978703498840332,grad_norm: 0.9999996622187272, iteration: 99923
loss: 1.0489073991775513,grad_norm: 0.9999995520892823, iteration: 99924
loss: 0.9608270525932312,grad_norm: 0.9264331465619602, iteration: 99925
loss: 1.0255132913589478,grad_norm: 0.9999991122438417, iteration: 99926
loss: 1.010634183883667,grad_norm: 0.9999998538399757, iteration: 99927
loss: 1.0350579023361206,grad_norm: 0.9999995209739038, iteration: 99928
loss: 1.0283366441726685,grad_norm: 0.8669870508820581, iteration: 99929
loss: 1.1128252744674683,grad_norm: 0.9999998808429209, iteration: 99930
loss: 1.0345560312271118,grad_norm: 0.9999990769598345, iteration: 99931
loss: 1.0715957880020142,grad_norm: 0.999999314853156, iteration: 99932
loss: 1.147357702255249,grad_norm: 0.9999997356900903, iteration: 99933
loss: 1.0753576755523682,grad_norm: 0.9999990748708936, iteration: 99934
loss: 1.0023971796035767,grad_norm: 0.9999990561488782, iteration: 99935
loss: 1.0406975746154785,grad_norm: 0.9999992105544431, iteration: 99936
loss: 0.982569694519043,grad_norm: 0.8981276087587412, iteration: 99937
loss: 1.1347146034240723,grad_norm: 0.9999997871575598, iteration: 99938
loss: 0.9735820889472961,grad_norm: 0.9999995979587871, iteration: 99939
loss: 1.0660122632980347,grad_norm: 0.9726007753347308, iteration: 99940
loss: 0.9849241971969604,grad_norm: 0.8767251610518355, iteration: 99941
loss: 1.0388882160186768,grad_norm: 0.8659224504370562, iteration: 99942
loss: 1.0077462196350098,grad_norm: 0.9999992771032368, iteration: 99943
loss: 1.0860910415649414,grad_norm: 0.999999800702401, iteration: 99944
loss: 0.9941829442977905,grad_norm: 0.9999990901983794, iteration: 99945
loss: 1.020472526550293,grad_norm: 0.8747020214215482, iteration: 99946
loss: 1.0419559478759766,grad_norm: 0.9999990319389398, iteration: 99947
loss: 1.0530660152435303,grad_norm: 0.8536282471660147, iteration: 99948
loss: 1.0013099908828735,grad_norm: 0.8527209224307757, iteration: 99949
loss: 1.067352056503296,grad_norm: 0.8910440370978377, iteration: 99950
loss: 1.0548750162124634,grad_norm: 0.9999991550433867, iteration: 99951
loss: 0.9888492226600647,grad_norm: 0.9089439787080908, iteration: 99952
loss: 1.0301817655563354,grad_norm: 0.9999990497846369, iteration: 99953
loss: 1.1249337196350098,grad_norm: 0.9999999016629699, iteration: 99954
loss: 1.0537060499191284,grad_norm: 0.98954431553546, iteration: 99955
loss: 1.0115010738372803,grad_norm: 0.999999237417136, iteration: 99956
loss: 1.0230436325073242,grad_norm: 0.999999144380148, iteration: 99957
loss: 0.9656733870506287,grad_norm: 0.8770084248763433, iteration: 99958
loss: 1.0217009782791138,grad_norm: 0.9999998826787125, iteration: 99959
loss: 1.0806668996810913,grad_norm: 0.9999998667353811, iteration: 99960
loss: 1.0015453100204468,grad_norm: 0.7729305002033579, iteration: 99961
loss: 1.0932579040527344,grad_norm: 0.9999994443150344, iteration: 99962
loss: 1.022112488746643,grad_norm: 0.999999867503503, iteration: 99963
loss: 0.9983908534049988,grad_norm: 0.9999994367782205, iteration: 99964
loss: 1.0976908206939697,grad_norm: 0.999999239830817, iteration: 99965
loss: 0.9994490146636963,grad_norm: 0.9257467708866787, iteration: 99966
loss: 1.0073055028915405,grad_norm: 0.9511090753267437, iteration: 99967
loss: 1.0022451877593994,grad_norm: 0.80415003498011, iteration: 99968
loss: 1.0573064088821411,grad_norm: 0.9999996259050115, iteration: 99969
loss: 0.997360348701477,grad_norm: 0.9999989577902642, iteration: 99970
loss: 0.987088680267334,grad_norm: 0.9999997402758356, iteration: 99971
loss: 1.0464487075805664,grad_norm: 0.9999996929446668, iteration: 99972
loss: 1.0042591094970703,grad_norm: 0.9999994411322015, iteration: 99973
loss: 1.0114279985427856,grad_norm: 0.8526801801499359, iteration: 99974
loss: 0.9869465231895447,grad_norm: 0.9999990413313627, iteration: 99975
loss: 1.0857340097427368,grad_norm: 0.9999993039595096, iteration: 99976
loss: 1.13516366481781,grad_norm: 0.999999834195268, iteration: 99977
loss: 1.0302640199661255,grad_norm: 0.7897041301536736, iteration: 99978
loss: 1.2652101516723633,grad_norm: 0.9999995612003562, iteration: 99979
loss: 1.0360232591629028,grad_norm: 0.854481096777748, iteration: 99980
loss: 1.0240581035614014,grad_norm: 0.9999999570466893, iteration: 99981
loss: 1.0332472324371338,grad_norm: 0.9999994226635036, iteration: 99982
loss: 1.154571294784546,grad_norm: 0.9999991754304463, iteration: 99983
loss: 1.0375696420669556,grad_norm: 0.9999999522467148, iteration: 99984
loss: 0.9961276054382324,grad_norm: 0.9999991492851639, iteration: 99985
loss: 1.0282015800476074,grad_norm: 0.9319707168828061, iteration: 99986
loss: 1.0696351528167725,grad_norm: 0.9999996486225984, iteration: 99987
loss: 1.066361904144287,grad_norm: 0.9316639446242434, iteration: 99988
loss: 0.9986708164215088,grad_norm: 0.84741136686571, iteration: 99989
loss: 1.0545711517333984,grad_norm: 0.9999996256558102, iteration: 99990
loss: 1.0317531824111938,grad_norm: 0.9301816529155392, iteration: 99991
loss: 1.0460840463638306,grad_norm: 0.9999990994784221, iteration: 99992
loss: 0.9887553453445435,grad_norm: 0.9604869315430208, iteration: 99993
loss: 1.1438395977020264,grad_norm: 0.9999998544412758, iteration: 99994
loss: 1.019185185432434,grad_norm: 0.8901640632479626, iteration: 99995
loss: 1.008697748184204,grad_norm: 0.9503548408176625, iteration: 99996
loss: 1.0984593629837036,grad_norm: 0.9181102694900832, iteration: 99997
loss: 1.010077714920044,grad_norm: 0.9999993354960298, iteration: 99998
loss: 1.0345432758331299,grad_norm: 0.999999408805491, iteration: 99999
loss: 1.1110787391662598,grad_norm: 0.9999999548866415, iteration: 100000
Evaluating at step 100000
{'val': 1.0206887926906347, 'test': 3.1556878447913514}
loss: 0.9867346286773682,grad_norm: 0.9469690799181042, iteration: 100001
loss: 1.0192312002182007,grad_norm: 0.9999993160584505, iteration: 100002
loss: 0.9662712216377258,grad_norm: 0.8680688396735876, iteration: 100003
loss: 0.9975083470344543,grad_norm: 0.9423248264777371, iteration: 100004
loss: 1.0134083032608032,grad_norm: 0.999999131079499, iteration: 100005
loss: 1.1018017530441284,grad_norm: 0.9999991821571258, iteration: 100006
loss: 0.9741133451461792,grad_norm: 0.9598083392907053, iteration: 100007
loss: 1.0029891729354858,grad_norm: 0.9999990879272269, iteration: 100008
loss: 1.1202222108840942,grad_norm: 0.9999992468130334, iteration: 100009
loss: 1.0677531957626343,grad_norm: 0.9999998286625449, iteration: 100010
loss: 1.0256472826004028,grad_norm: 0.9999992445467623, iteration: 100011
loss: 1.0342267751693726,grad_norm: 0.9999998795346877, iteration: 100012
loss: 1.1018832921981812,grad_norm: 0.9999997025652758, iteration: 100013
loss: 1.2663884162902832,grad_norm: 0.999999514005126, iteration: 100014
loss: 1.018738031387329,grad_norm: 0.999998974800926, iteration: 100015
loss: 1.0506465435028076,grad_norm: 0.9999999505719236, iteration: 100016
loss: 1.1203041076660156,grad_norm: 0.9999997607995517, iteration: 100017
loss: 1.018265962600708,grad_norm: 0.8168143542435119, iteration: 100018
loss: 1.022600769996643,grad_norm: 0.999999615809464, iteration: 100019
loss: 0.993552565574646,grad_norm: 0.9999995081625735, iteration: 100020
loss: 1.0601823329925537,grad_norm: 0.9999995133427321, iteration: 100021
loss: 1.147932529449463,grad_norm: 0.9999997035408894, iteration: 100022
loss: 1.1686898469924927,grad_norm: 0.9999995693598897, iteration: 100023
loss: 0.9958677291870117,grad_norm: 0.997940691328658, iteration: 100024
loss: 0.9946274757385254,grad_norm: 0.9999993061044781, iteration: 100025
loss: 1.0905036926269531,grad_norm: 0.999999568353631, iteration: 100026
loss: 1.103365182876587,grad_norm: 0.9999995013003912, iteration: 100027
loss: 1.0067874193191528,grad_norm: 0.9870444408503906, iteration: 100028
loss: 1.077160358428955,grad_norm: 0.9999995436900175, iteration: 100029
loss: 1.0140894651412964,grad_norm: 0.9999990315674578, iteration: 100030
loss: 1.0209444761276245,grad_norm: 0.9999998576317123, iteration: 100031
loss: 1.1119650602340698,grad_norm: 0.9999991296075954, iteration: 100032
loss: 1.0788064002990723,grad_norm: 0.9999990649328753, iteration: 100033
loss: 1.0269798040390015,grad_norm: 0.9999997285451837, iteration: 100034
loss: 1.0598368644714355,grad_norm: 0.9999994983147942, iteration: 100035
loss: 1.1483206748962402,grad_norm: 0.9999998743148754, iteration: 100036
loss: 1.0412496328353882,grad_norm: 0.9999991098646279, iteration: 100037
loss: 1.037938117980957,grad_norm: 0.9999993551928087, iteration: 100038
loss: 1.090606451034546,grad_norm: 1.0000000041904291, iteration: 100039
loss: 1.1083983182907104,grad_norm: 0.9999999913012266, iteration: 100040
loss: 1.1167385578155518,grad_norm: 0.9999994876312766, iteration: 100041
loss: 1.0758180618286133,grad_norm: 0.9999997598929514, iteration: 100042
loss: 0.993170976638794,grad_norm: 0.7730786014735523, iteration: 100043
loss: 1.1811597347259521,grad_norm: 0.9999999490108189, iteration: 100044
loss: 1.1203892230987549,grad_norm: 0.9999995313804295, iteration: 100045
loss: 1.1982883214950562,grad_norm: 0.9999998895229142, iteration: 100046
loss: 1.0202735662460327,grad_norm: 0.9999994558633515, iteration: 100047
loss: 1.0965776443481445,grad_norm: 0.9999992670518031, iteration: 100048
loss: 1.0849072933197021,grad_norm: 0.9999995753632965, iteration: 100049
loss: 1.046709656715393,grad_norm: 0.9999994780983424, iteration: 100050
loss: 1.0868109464645386,grad_norm: 0.9999994376121057, iteration: 100051
loss: 1.184170126914978,grad_norm: 0.9999995075299346, iteration: 100052
loss: 1.004246711730957,grad_norm: 0.897637103055629, iteration: 100053
loss: 1.078321099281311,grad_norm: 0.9999992465253569, iteration: 100054
loss: 1.0533180236816406,grad_norm: 0.999999799261471, iteration: 100055
loss: 0.9787804484367371,grad_norm: 0.9999999847540689, iteration: 100056
loss: 0.9764847159385681,grad_norm: 0.9999994761627022, iteration: 100057
loss: 1.08582603931427,grad_norm: 0.9999995154106952, iteration: 100058
loss: 0.9987470507621765,grad_norm: 0.9999999673236505, iteration: 100059
loss: 1.0961264371871948,grad_norm: 0.99999983438912, iteration: 100060
loss: 1.1189978122711182,grad_norm: 0.9999996412345369, iteration: 100061
loss: 1.23733389377594,grad_norm: 0.9999999618458257, iteration: 100062
loss: 1.1640714406967163,grad_norm: 0.9999998772244242, iteration: 100063
loss: 1.0614506006240845,grad_norm: 0.9999994836101171, iteration: 100064
loss: 1.0563174486160278,grad_norm: 0.9999997789317592, iteration: 100065
loss: 1.0087189674377441,grad_norm: 0.9193259219226927, iteration: 100066
loss: 1.0551079511642456,grad_norm: 0.9999995499730734, iteration: 100067
loss: 1.0248178243637085,grad_norm: 0.999999629545896, iteration: 100068
loss: 1.1362724304199219,grad_norm: 0.9999998091305078, iteration: 100069
loss: 1.08637535572052,grad_norm: 0.9999993822705384, iteration: 100070
loss: 1.126057744026184,grad_norm: 0.9999995370280736, iteration: 100071
loss: 1.073572039604187,grad_norm: 0.999999634178898, iteration: 100072
loss: 1.0214276313781738,grad_norm: 0.9999995631219759, iteration: 100073
loss: 1.1390272378921509,grad_norm: 0.9999994277068684, iteration: 100074
loss: 1.0106548070907593,grad_norm: 0.9999992108001919, iteration: 100075
loss: 0.9581790566444397,grad_norm: 0.9999993714944934, iteration: 100076
loss: 1.0414674282073975,grad_norm: 0.9999996918645009, iteration: 100077
loss: 1.148289680480957,grad_norm: 0.9999997765634884, iteration: 100078
loss: 1.1352474689483643,grad_norm: 0.99999945682399, iteration: 100079
loss: 1.2329509258270264,grad_norm: 0.9999999387030669, iteration: 100080
loss: 1.3643238544464111,grad_norm: 0.9999999494503662, iteration: 100081
loss: 0.9872003793716431,grad_norm: 0.9999991148615833, iteration: 100082
loss: 1.0793460607528687,grad_norm: 0.9999993928467269, iteration: 100083
loss: 1.032776951789856,grad_norm: 0.9999997841663042, iteration: 100084
loss: 1.1558223962783813,grad_norm: 0.9999999302099958, iteration: 100085
loss: 1.0764235258102417,grad_norm: 0.9999989865499752, iteration: 100086
loss: 1.0280925035476685,grad_norm: 0.999999885450499, iteration: 100087
loss: 1.006263017654419,grad_norm: 0.9999991165774357, iteration: 100088
loss: 1.0349940061569214,grad_norm: 0.9999991075106481, iteration: 100089
loss: 1.016222596168518,grad_norm: 0.9999991564526384, iteration: 100090
loss: 1.0467478036880493,grad_norm: 0.9999992855031693, iteration: 100091
loss: 1.0350897312164307,grad_norm: 0.9999993516772903, iteration: 100092
loss: 0.9717421531677246,grad_norm: 0.870948087857872, iteration: 100093
loss: 1.0111652612686157,grad_norm: 0.9999991870827765, iteration: 100094
loss: 1.0029597282409668,grad_norm: 0.9999993091273222, iteration: 100095
loss: 1.0190330743789673,grad_norm: 0.9999996588783568, iteration: 100096
loss: 1.0417582988739014,grad_norm: 0.9999990949088043, iteration: 100097
loss: 1.0623714923858643,grad_norm: 0.999999297319536, iteration: 100098
loss: 0.9870136976242065,grad_norm: 0.9999991044420667, iteration: 100099
loss: 1.1986656188964844,grad_norm: 0.9999998074064556, iteration: 100100
loss: 1.0000085830688477,grad_norm: 0.9999990854337668, iteration: 100101
loss: 0.9767217636108398,grad_norm: 0.9999990827872584, iteration: 100102
loss: 1.0374566316604614,grad_norm: 0.999999696034978, iteration: 100103
loss: 1.054197072982788,grad_norm: 0.9999995530621492, iteration: 100104
loss: 0.9820402264595032,grad_norm: 0.8627237807779613, iteration: 100105
loss: 1.0237479209899902,grad_norm: 0.9999991476627081, iteration: 100106
loss: 1.0011330842971802,grad_norm: 0.9999992437714209, iteration: 100107
loss: 1.0695232152938843,grad_norm: 0.9999993457519574, iteration: 100108
loss: 1.0135674476623535,grad_norm: 0.8156130611049967, iteration: 100109
loss: 1.0269588232040405,grad_norm: 0.9999998703028922, iteration: 100110
loss: 1.0279854536056519,grad_norm: 0.9999993461654054, iteration: 100111
loss: 1.048925757408142,grad_norm: 0.9999995825969206, iteration: 100112
loss: 1.0391818284988403,grad_norm: 0.9568267784206322, iteration: 100113
loss: 1.0076396465301514,grad_norm: 0.99999997091966, iteration: 100114
loss: 1.0569143295288086,grad_norm: 0.9999998421244506, iteration: 100115
loss: 0.9937558770179749,grad_norm: 0.9999993840394027, iteration: 100116
loss: 1.0007256269454956,grad_norm: 0.8987322360052006, iteration: 100117
loss: 0.9790652394294739,grad_norm: 0.8016695792461002, iteration: 100118
loss: 1.0117619037628174,grad_norm: 0.8794427398149294, iteration: 100119
loss: 1.005184531211853,grad_norm: 0.8018460006002266, iteration: 100120
loss: 1.0689688920974731,grad_norm: 0.8526010803322088, iteration: 100121
loss: 1.0594931840896606,grad_norm: 0.9999999296687353, iteration: 100122
loss: 1.0316340923309326,grad_norm: 0.858603928041592, iteration: 100123
loss: 1.062693476676941,grad_norm: 0.9999999068790407, iteration: 100124
loss: 1.0073213577270508,grad_norm: 1.0000000784797833, iteration: 100125
loss: 1.052206039428711,grad_norm: 0.999999547611477, iteration: 100126
loss: 1.006596326828003,grad_norm: 0.9999992159522818, iteration: 100127
loss: 1.0390453338623047,grad_norm: 0.9999997640558691, iteration: 100128
loss: 1.060375690460205,grad_norm: 0.9999998434117074, iteration: 100129
loss: 1.0339006185531616,grad_norm: 0.9266017421575, iteration: 100130
loss: 1.04646897315979,grad_norm: 0.9999995378434141, iteration: 100131
loss: 1.00003182888031,grad_norm: 0.9999999797963569, iteration: 100132
loss: 1.000442624092102,grad_norm: 0.9999999386057811, iteration: 100133
loss: 1.0278903245925903,grad_norm: 1.0000000202290567, iteration: 100134
loss: 1.0404692888259888,grad_norm: 0.9999998702909256, iteration: 100135
loss: 1.080039143562317,grad_norm: 0.9999994492427079, iteration: 100136
loss: 1.0463899374008179,grad_norm: 0.9828541399611296, iteration: 100137
loss: 0.9921410083770752,grad_norm: 0.999999310576145, iteration: 100138
loss: 0.9888850450515747,grad_norm: 0.9285055521301453, iteration: 100139
loss: 1.09501314163208,grad_norm: 0.9999997151714562, iteration: 100140
loss: 1.034600853919983,grad_norm: 0.999999254461095, iteration: 100141
loss: 1.0825660228729248,grad_norm: 0.9999992300638221, iteration: 100142
loss: 1.052437663078308,grad_norm: 0.999999879577874, iteration: 100143
loss: 1.0122771263122559,grad_norm: 0.9999996975800025, iteration: 100144
loss: 1.0869195461273193,grad_norm: 0.9999992930428171, iteration: 100145
loss: 1.05824875831604,grad_norm: 0.9999992941536298, iteration: 100146
loss: 1.0334070920944214,grad_norm: 0.99999927471416, iteration: 100147
loss: 0.9929820895195007,grad_norm: 0.999998943521519, iteration: 100148
loss: 1.1146068572998047,grad_norm: 0.9999998942874506, iteration: 100149
loss: 1.157183051109314,grad_norm: 0.9999999176516094, iteration: 100150
loss: 1.274707555770874,grad_norm: 0.9999996836060735, iteration: 100151
loss: 1.1788522005081177,grad_norm: 0.9999999976174186, iteration: 100152
loss: 1.02633798122406,grad_norm: 0.9999995589878836, iteration: 100153
loss: 1.114085078239441,grad_norm: 0.9999997743655397, iteration: 100154
loss: 1.3199785947799683,grad_norm: 0.9999998115373471, iteration: 100155
loss: 1.0135643482208252,grad_norm: 0.9999995030768745, iteration: 100156
loss: 1.112280249595642,grad_norm: 0.9999993530515702, iteration: 100157
loss: 1.240470051765442,grad_norm: 0.9999999259048148, iteration: 100158
loss: 1.3044627904891968,grad_norm: 0.9999999879011462, iteration: 100159
loss: 1.0887799263000488,grad_norm: 0.9999991291770506, iteration: 100160
loss: 1.0724060535430908,grad_norm: 0.9999996344408559, iteration: 100161
loss: 1.0392082929611206,grad_norm: 0.9999998374431631, iteration: 100162
loss: 0.9944515228271484,grad_norm: 0.9349955033401828, iteration: 100163
loss: 1.031480312347412,grad_norm: 0.9999992361232796, iteration: 100164
loss: 1.1610442399978638,grad_norm: 0.9999998886057315, iteration: 100165
loss: 1.0015374422073364,grad_norm: 0.9999995595972654, iteration: 100166
loss: 1.0852320194244385,grad_norm: 1.0000000389068426, iteration: 100167
loss: 1.2178772687911987,grad_norm: 0.9999999283336686, iteration: 100168
loss: 1.0210307836532593,grad_norm: 0.9999997647144173, iteration: 100169
loss: 1.0994575023651123,grad_norm: 0.9999999272589111, iteration: 100170
loss: 1.1429829597473145,grad_norm: 0.9999997393508655, iteration: 100171
loss: 1.047845482826233,grad_norm: 0.9999997109990681, iteration: 100172
loss: 1.2622390985488892,grad_norm: 0.999999541836866, iteration: 100173
loss: 0.9814186692237854,grad_norm: 0.9999991043269042, iteration: 100174
loss: 1.0747853517532349,grad_norm: 0.9999999227345518, iteration: 100175
loss: 1.2029849290847778,grad_norm: 0.9999996841472731, iteration: 100176
loss: 1.0189917087554932,grad_norm: 0.9999993100795237, iteration: 100177
loss: 0.9992642402648926,grad_norm: 0.9999996189272828, iteration: 100178
loss: 1.0412967205047607,grad_norm: 0.9196564685984631, iteration: 100179
loss: 1.076461911201477,grad_norm: 0.9999997770365949, iteration: 100180
loss: 1.0503590106964111,grad_norm: 0.9999995765264005, iteration: 100181
loss: 0.977177619934082,grad_norm: 0.898244182568395, iteration: 100182
loss: 0.9783815741539001,grad_norm: 0.9999994277234457, iteration: 100183
loss: 1.1477895975112915,grad_norm: 0.9999996469988077, iteration: 100184
loss: 1.052844524383545,grad_norm: 0.9999997776225272, iteration: 100185
loss: 1.0660390853881836,grad_norm: 0.9999993731268202, iteration: 100186
loss: 1.0333905220031738,grad_norm: 1.0000000050114854, iteration: 100187
loss: 1.0115766525268555,grad_norm: 0.9715361501374726, iteration: 100188
loss: 1.015249490737915,grad_norm: 0.9197348612538219, iteration: 100189
loss: 1.0189449787139893,grad_norm: 0.9973525729951149, iteration: 100190
loss: 0.9680914282798767,grad_norm: 0.8705221131743317, iteration: 100191
loss: 1.024667501449585,grad_norm: 0.8600031142328343, iteration: 100192
loss: 1.0430488586425781,grad_norm: 0.9999995019375295, iteration: 100193
loss: 1.0146478414535522,grad_norm: 0.9999991308396092, iteration: 100194
loss: 1.0566339492797852,grad_norm: 0.9999998121735418, iteration: 100195
loss: 1.143757700920105,grad_norm: 0.9999998422306604, iteration: 100196
loss: 0.9760511517524719,grad_norm: 0.9999998001793622, iteration: 100197
loss: 0.9925602078437805,grad_norm: 0.9101790175279226, iteration: 100198
loss: 0.9971161484718323,grad_norm: 0.9999998209479051, iteration: 100199
loss: 1.0427098274230957,grad_norm: 0.9999994309818928, iteration: 100200
loss: 1.0325813293457031,grad_norm: 0.9988498343538552, iteration: 100201
loss: 1.5342820882797241,grad_norm: 1.0000000089157786, iteration: 100202
loss: 1.0115835666656494,grad_norm: 0.9594760017124656, iteration: 100203
loss: 1.1253193616867065,grad_norm: 0.9999993144480267, iteration: 100204
loss: 1.0945630073547363,grad_norm: 0.9999999580261132, iteration: 100205
loss: 0.9936798810958862,grad_norm: 0.9999996321776372, iteration: 100206
loss: 1.0722888708114624,grad_norm: 0.9999999924971836, iteration: 100207
loss: 0.9938572645187378,grad_norm: 0.9999992082886817, iteration: 100208
loss: 1.1875561475753784,grad_norm: 0.99999960962401, iteration: 100209
loss: 1.065611720085144,grad_norm: 0.9999999186313923, iteration: 100210
loss: 1.0343282222747803,grad_norm: 0.8735345506124137, iteration: 100211
loss: 1.0559452772140503,grad_norm: 0.9863156944725366, iteration: 100212
loss: 1.0650476217269897,grad_norm: 0.9999999293702743, iteration: 100213
loss: 0.9960924983024597,grad_norm: 0.9288525144988105, iteration: 100214
loss: 1.0948153734207153,grad_norm: 0.9999995709666102, iteration: 100215
loss: 0.9947516322135925,grad_norm: 0.8901893890736556, iteration: 100216
loss: 1.0392783880233765,grad_norm: 0.9539307817163212, iteration: 100217
loss: 0.9901110529899597,grad_norm: 0.9999994840453593, iteration: 100218
loss: 1.1197243928909302,grad_norm: 0.9999991464119559, iteration: 100219
loss: 1.1573995351791382,grad_norm: 0.9999997805639051, iteration: 100220
loss: 1.0701628923416138,grad_norm: 0.999999092032703, iteration: 100221
loss: 1.02757728099823,grad_norm: 0.8766592974612109, iteration: 100222
loss: 1.0834953784942627,grad_norm: 0.9999999742135313, iteration: 100223
loss: 1.0931423902511597,grad_norm: 0.9999992778988207, iteration: 100224
loss: 1.015275239944458,grad_norm: 0.9999998445089667, iteration: 100225
loss: 1.1773571968078613,grad_norm: 0.9999998454288891, iteration: 100226
loss: 1.1249058246612549,grad_norm: 0.9999999370172843, iteration: 100227
loss: 1.015367865562439,grad_norm: 0.9999996981640747, iteration: 100228
loss: 1.0844902992248535,grad_norm: 0.8068317594197225, iteration: 100229
loss: 1.0113264322280884,grad_norm: 0.9999990889660705, iteration: 100230
loss: 1.0148860216140747,grad_norm: 0.9999992725656933, iteration: 100231
loss: 1.010441541671753,grad_norm: 0.9999991407140374, iteration: 100232
loss: 1.0209296941757202,grad_norm: 0.9999990967131749, iteration: 100233
loss: 1.0278104543685913,grad_norm: 0.9999998153012918, iteration: 100234
loss: 0.9686055183410645,grad_norm: 0.9999991410073694, iteration: 100235
loss: 1.0814152956008911,grad_norm: 0.9999994304378756, iteration: 100236
loss: 1.082311749458313,grad_norm: 0.9999991677819648, iteration: 100237
loss: 1.0007383823394775,grad_norm: 0.9999995873455471, iteration: 100238
loss: 1.012916922569275,grad_norm: 0.8679859075012525, iteration: 100239
loss: 0.9736450910568237,grad_norm: 0.9999996813585301, iteration: 100240
loss: 1.0600157976150513,grad_norm: 0.999999265966837, iteration: 100241
loss: 0.9905927181243896,grad_norm: 0.9267546161060309, iteration: 100242
loss: 1.0911178588867188,grad_norm: 0.9999998893192429, iteration: 100243
loss: 1.1169819831848145,grad_norm: 0.999999826664156, iteration: 100244
loss: 1.0270330905914307,grad_norm: 0.9999994807876728, iteration: 100245
loss: 0.9900649189949036,grad_norm: 0.9999990768959275, iteration: 100246
loss: 1.0037667751312256,grad_norm: 0.9795656232177549, iteration: 100247
loss: 1.0806061029434204,grad_norm: 0.8936198064435151, iteration: 100248
loss: 1.005722165107727,grad_norm: 0.9999991339398542, iteration: 100249
loss: 1.0443115234375,grad_norm: 0.8520889690128802, iteration: 100250
loss: 1.1123261451721191,grad_norm: 0.9999999232402885, iteration: 100251
loss: 1.0491666793823242,grad_norm: 0.9999992294361477, iteration: 100252
loss: 1.0285813808441162,grad_norm: 0.9999991715583603, iteration: 100253
loss: 1.0671643018722534,grad_norm: 0.9999991392475617, iteration: 100254
loss: 0.9903293251991272,grad_norm: 0.7931372227694423, iteration: 100255
loss: 1.0152288675308228,grad_norm: 0.8902878768472318, iteration: 100256
loss: 1.0232278108596802,grad_norm: 0.9999996842548842, iteration: 100257
loss: 1.0302444696426392,grad_norm: 0.9999990631467133, iteration: 100258
loss: 1.1328985691070557,grad_norm: 0.9999999638125684, iteration: 100259
loss: 1.1026017665863037,grad_norm: 0.9999992461145601, iteration: 100260
loss: 1.0368421077728271,grad_norm: 0.9999995481582706, iteration: 100261
loss: 0.9848136901855469,grad_norm: 0.9415343648749388, iteration: 100262
loss: 0.9836228489875793,grad_norm: 0.999999609440329, iteration: 100263
loss: 1.0610724687576294,grad_norm: 0.9999998156912832, iteration: 100264
loss: 1.086080551147461,grad_norm: 0.9999994292353183, iteration: 100265
loss: 1.0304356813430786,grad_norm: 0.8726664460229242, iteration: 100266
loss: 1.003050684928894,grad_norm: 0.9999992083807183, iteration: 100267
loss: 1.2811131477355957,grad_norm: 0.9999998587035316, iteration: 100268
loss: 1.0521825551986694,grad_norm: 0.9999991713911656, iteration: 100269
loss: 1.0020071268081665,grad_norm: 0.9999998478694248, iteration: 100270
loss: 1.1296478509902954,grad_norm: 0.9999993403784557, iteration: 100271
loss: 0.9626678228378296,grad_norm: 0.9999990490531729, iteration: 100272
loss: 1.1186937093734741,grad_norm: 0.9999995883287114, iteration: 100273
loss: 0.98404860496521,grad_norm: 0.9999991104945506, iteration: 100274
loss: 1.0319162607192993,grad_norm: 0.999999833752275, iteration: 100275
loss: 0.9854258894920349,grad_norm: 0.9999995922018573, iteration: 100276
loss: 1.1970620155334473,grad_norm: 0.9999998241684827, iteration: 100277
loss: 1.0171310901641846,grad_norm: 0.9999998091455315, iteration: 100278
loss: 1.089698076248169,grad_norm: 0.999999783691366, iteration: 100279
loss: 1.0277135372161865,grad_norm: 0.9999991577408043, iteration: 100280
loss: 0.97812420129776,grad_norm: 0.999999731818355, iteration: 100281
loss: 0.9860895872116089,grad_norm: 0.9618863816468509, iteration: 100282
loss: 1.0097503662109375,grad_norm: 0.8744689326565093, iteration: 100283
loss: 1.0481743812561035,grad_norm: 0.9999991332511649, iteration: 100284
loss: 1.0706366300582886,grad_norm: 0.9999991953443996, iteration: 100285
loss: 1.0198159217834473,grad_norm: 0.9999992954941687, iteration: 100286
loss: 1.1073273420333862,grad_norm: 0.9999999491775051, iteration: 100287
loss: 1.2410355806350708,grad_norm: 0.9999999218774265, iteration: 100288
loss: 0.9817578196525574,grad_norm: 0.9999990458581913, iteration: 100289
loss: 1.1022266149520874,grad_norm: 0.9999993650945715, iteration: 100290
loss: 1.0770952701568604,grad_norm: 0.9999998128765492, iteration: 100291
loss: 0.9936839938163757,grad_norm: 0.96194341089705, iteration: 100292
loss: 1.0984697341918945,grad_norm: 0.9999992810024205, iteration: 100293
loss: 1.0444841384887695,grad_norm: 0.9999997350811537, iteration: 100294
loss: 0.9841623306274414,grad_norm: 0.9999990060622193, iteration: 100295
loss: 1.1583003997802734,grad_norm: 0.9999993536315201, iteration: 100296
loss: 1.0335997343063354,grad_norm: 0.9999997149050786, iteration: 100297
loss: 1.0680657625198364,grad_norm: 0.9999997510416309, iteration: 100298
loss: 1.0449092388153076,grad_norm: 0.9999993165476161, iteration: 100299
loss: 0.9884744882583618,grad_norm: 0.9614824476366508, iteration: 100300
loss: 1.1061687469482422,grad_norm: 0.9999999529936688, iteration: 100301
loss: 0.9894657135009766,grad_norm: 0.9419982372392737, iteration: 100302
loss: 0.9945500493049622,grad_norm: 0.8215778829887279, iteration: 100303
loss: 1.0277478694915771,grad_norm: 0.9844882876323443, iteration: 100304
loss: 1.2667099237442017,grad_norm: 0.9999994944293464, iteration: 100305
loss: 1.0113954544067383,grad_norm: 0.869209434526812, iteration: 100306
loss: 1.0833040475845337,grad_norm: 0.999999499049894, iteration: 100307
loss: 1.1543774604797363,grad_norm: 0.99999903149884, iteration: 100308
loss: 1.0115787982940674,grad_norm: 0.999999281830888, iteration: 100309
loss: 1.0581505298614502,grad_norm: 0.9999996811846399, iteration: 100310
loss: 1.0683883428573608,grad_norm: 0.9999996222868137, iteration: 100311
loss: 1.0696064233779907,grad_norm: 0.999999833331536, iteration: 100312
loss: 1.1469182968139648,grad_norm: 0.9999996744173322, iteration: 100313
loss: 1.0700945854187012,grad_norm: 0.999999432970673, iteration: 100314
loss: 1.0468919277191162,grad_norm: 0.9999995326039329, iteration: 100315
loss: 0.9767971038818359,grad_norm: 0.9957030565918314, iteration: 100316
loss: 1.0449678897857666,grad_norm: 0.9999991461146022, iteration: 100317
loss: 1.073089838027954,grad_norm: 0.999999542640372, iteration: 100318
loss: 0.9881892800331116,grad_norm: 0.9999995679218161, iteration: 100319
loss: 1.0326693058013916,grad_norm: 0.9999995517364316, iteration: 100320
loss: 0.973957359790802,grad_norm: 0.7715014827927482, iteration: 100321
loss: 1.090246319770813,grad_norm: 0.9999993105084781, iteration: 100322
loss: 1.0043519735336304,grad_norm: 0.9999991997656653, iteration: 100323
loss: 1.0095690488815308,grad_norm: 0.911025122980374, iteration: 100324
loss: 0.9961847066879272,grad_norm: 0.7740672698243652, iteration: 100325
loss: 1.055670976638794,grad_norm: 0.9999997489157532, iteration: 100326
loss: 1.0414389371871948,grad_norm: 0.9999990253878358, iteration: 100327
loss: 1.1026982069015503,grad_norm: 0.9999994225065465, iteration: 100328
loss: 1.240563154220581,grad_norm: 0.9999996617801562, iteration: 100329
loss: 1.1230067014694214,grad_norm: 0.9999990994765562, iteration: 100330
loss: 0.9935871362686157,grad_norm: 0.7994671280515342, iteration: 100331
loss: 1.0190962553024292,grad_norm: 0.9999990683460446, iteration: 100332
loss: 1.127267837524414,grad_norm: 0.9999992738235092, iteration: 100333
loss: 1.0277788639068604,grad_norm: 0.8023458071957362, iteration: 100334
loss: 1.0713417530059814,grad_norm: 0.9999997778237567, iteration: 100335
loss: 1.0180368423461914,grad_norm: 0.9999994671539085, iteration: 100336
loss: 1.0710762739181519,grad_norm: 0.9999998133495949, iteration: 100337
loss: 1.0771362781524658,grad_norm: 0.9999992214476736, iteration: 100338
loss: 1.0316863059997559,grad_norm: 0.9164218418665251, iteration: 100339
loss: 1.040107011795044,grad_norm: 0.999999850210856, iteration: 100340
loss: 1.0388039350509644,grad_norm: 0.9756273034367996, iteration: 100341
loss: 1.01383376121521,grad_norm: 0.8293543120909653, iteration: 100342
loss: 1.023302435874939,grad_norm: 0.999999699411107, iteration: 100343
loss: 1.0930479764938354,grad_norm: 0.9999997906840004, iteration: 100344
loss: 1.0168691873550415,grad_norm: 0.9632568535315589, iteration: 100345
loss: 1.0381907224655151,grad_norm: 0.9999997345362066, iteration: 100346
loss: 1.0288434028625488,grad_norm: 0.9677454823054173, iteration: 100347
loss: 1.0959842205047607,grad_norm: 0.9999991399373378, iteration: 100348
loss: 0.9877817034721375,grad_norm: 0.8847878550509737, iteration: 100349
loss: 0.9962707161903381,grad_norm: 0.8289499741181012, iteration: 100350
loss: 0.9949747323989868,grad_norm: 0.9999990528223588, iteration: 100351
loss: 0.9929931163787842,grad_norm: 0.8350248328425777, iteration: 100352
loss: 1.049967885017395,grad_norm: 0.9999999190288946, iteration: 100353
loss: 1.011704683303833,grad_norm: 0.9999997262767808, iteration: 100354
loss: 1.0574296712875366,grad_norm: 0.8813135333650948, iteration: 100355
loss: 0.9992125630378723,grad_norm: 0.9999992618166664, iteration: 100356
loss: 1.090653657913208,grad_norm: 0.9999993290625316, iteration: 100357
loss: 1.0283528566360474,grad_norm: 0.9999995473271867, iteration: 100358
loss: 1.0062735080718994,grad_norm: 0.907545474012077, iteration: 100359
loss: 0.9890443682670593,grad_norm: 0.865263905155115, iteration: 100360
loss: 1.0719093084335327,grad_norm: 0.9999995150905244, iteration: 100361
loss: 1.1865679025650024,grad_norm: 0.9999997623205019, iteration: 100362
loss: 1.0741647481918335,grad_norm: 0.9999992525230136, iteration: 100363
loss: 1.095548391342163,grad_norm: 0.9999995327276702, iteration: 100364
loss: 1.0434876680374146,grad_norm: 0.907305897623756, iteration: 100365
loss: 1.1395994424819946,grad_norm: 0.9999996054164859, iteration: 100366
loss: 1.2598797082901,grad_norm: 0.9999996022178975, iteration: 100367
loss: 0.9978775382041931,grad_norm: 0.9722426796827395, iteration: 100368
loss: 1.0013281106948853,grad_norm: 0.9999990724615062, iteration: 100369
loss: 1.1186118125915527,grad_norm: 0.9999996002537409, iteration: 100370
loss: 1.035720944404602,grad_norm: 0.9999990238584947, iteration: 100371
loss: 1.0788133144378662,grad_norm: 0.9999995194956356, iteration: 100372
loss: 0.9824715256690979,grad_norm: 0.8492063500146882, iteration: 100373
loss: 1.0258160829544067,grad_norm: 0.9869548145406913, iteration: 100374
loss: 1.036395788192749,grad_norm: 0.9813396802041588, iteration: 100375
loss: 1.0800453424453735,grad_norm: 0.9999991713924034, iteration: 100376
loss: 1.0478302240371704,grad_norm: 0.9999991329749691, iteration: 100377
loss: 1.0092835426330566,grad_norm: 0.7421739452919172, iteration: 100378
loss: 1.0448181629180908,grad_norm: 0.8486022557105793, iteration: 100379
loss: 1.0345710515975952,grad_norm: 0.9999996545216324, iteration: 100380
loss: 1.0011879205703735,grad_norm: 0.9999997789783135, iteration: 100381
loss: 1.0050028562545776,grad_norm: 0.8456403601417752, iteration: 100382
loss: 1.0414365530014038,grad_norm: 0.9999996104878556, iteration: 100383
loss: 1.0297331809997559,grad_norm: 0.899339085998102, iteration: 100384
loss: 0.993721604347229,grad_norm: 0.9999991161513959, iteration: 100385
loss: 1.052852749824524,grad_norm: 0.9999990942596887, iteration: 100386
loss: 1.049206018447876,grad_norm: 0.9999994153247558, iteration: 100387
loss: 1.0769518613815308,grad_norm: 0.9999991197839967, iteration: 100388
loss: 1.0484986305236816,grad_norm: 0.9999993891933856, iteration: 100389
loss: 0.9944238662719727,grad_norm: 0.9999993211226879, iteration: 100390
loss: 1.543897271156311,grad_norm: 0.9999995849804625, iteration: 100391
loss: 0.9919036030769348,grad_norm: 0.8781871384347814, iteration: 100392
loss: 1.0470349788665771,grad_norm: 0.9999996236624894, iteration: 100393
loss: 1.0089209079742432,grad_norm: 0.8552062610109624, iteration: 100394
loss: 1.09169602394104,grad_norm: 0.9999991933483262, iteration: 100395
loss: 0.9674835801124573,grad_norm: 0.8146980650607237, iteration: 100396
loss: 1.0239812135696411,grad_norm: 0.9999997474317944, iteration: 100397
loss: 1.0155266523361206,grad_norm: 0.9265553152363168, iteration: 100398
loss: 1.0210367441177368,grad_norm: 0.9999992639795041, iteration: 100399
loss: 1.0508050918579102,grad_norm: 0.9022761426825836, iteration: 100400
loss: 0.988262414932251,grad_norm: 0.847416967652796, iteration: 100401
loss: 0.9770213961601257,grad_norm: 0.9999991392034545, iteration: 100402
loss: 1.1891742944717407,grad_norm: 0.9736285385889601, iteration: 100403
loss: 1.025832176208496,grad_norm: 0.999999260904517, iteration: 100404
loss: 1.0584545135498047,grad_norm: 0.9999993282624172, iteration: 100405
loss: 1.0220507383346558,grad_norm: 0.9999995540577207, iteration: 100406
loss: 1.0455456972122192,grad_norm: 0.9999994820614402, iteration: 100407
loss: 1.0337514877319336,grad_norm: 0.8976432498493591, iteration: 100408
loss: 1.1071140766143799,grad_norm: 0.99999940933552, iteration: 100409
loss: 1.0856343507766724,grad_norm: 0.9999993407470437, iteration: 100410
loss: 1.0582871437072754,grad_norm: 0.9999993491741829, iteration: 100411
loss: 1.184863567352295,grad_norm: 0.9999992535118067, iteration: 100412
loss: 1.0443036556243896,grad_norm: 0.978357886632518, iteration: 100413
loss: 1.0068857669830322,grad_norm: 0.9999995238316677, iteration: 100414
loss: 1.008678913116455,grad_norm: 0.9999991703916132, iteration: 100415
loss: 1.0960335731506348,grad_norm: 0.99999987193772, iteration: 100416
loss: 1.0959867238998413,grad_norm: 0.9999999708389266, iteration: 100417
loss: 0.988121747970581,grad_norm: 0.8888183803288623, iteration: 100418
loss: 1.1502355337142944,grad_norm: 0.9999996239131712, iteration: 100419
loss: 1.0305904150009155,grad_norm: 0.9999995847636041, iteration: 100420
loss: 0.9811396598815918,grad_norm: 0.8654652934342567, iteration: 100421
loss: 1.0189452171325684,grad_norm: 0.9999995600028575, iteration: 100422
loss: 1.059814214706421,grad_norm: 0.9999990333829979, iteration: 100423
loss: 1.0097451210021973,grad_norm: 0.9999991333665602, iteration: 100424
loss: 1.1016838550567627,grad_norm: 0.9999998756796026, iteration: 100425
loss: 1.0469958782196045,grad_norm: 0.9999990660997331, iteration: 100426
loss: 0.9772089719772339,grad_norm: 0.9460837177840066, iteration: 100427
loss: 1.0600956678390503,grad_norm: 0.9999997938126481, iteration: 100428
loss: 0.991713285446167,grad_norm: 0.999999583016094, iteration: 100429
loss: 1.0219534635543823,grad_norm: 0.8576874766687359, iteration: 100430
loss: 0.9966820478439331,grad_norm: 0.9685876397373895, iteration: 100431
loss: 1.0587462186813354,grad_norm: 0.9999991403642784, iteration: 100432
loss: 1.047823429107666,grad_norm: 0.9405101238572313, iteration: 100433
loss: 1.0655535459518433,grad_norm: 0.9956045857193491, iteration: 100434
loss: 1.0332432985305786,grad_norm: 0.882521048261833, iteration: 100435
loss: 1.0230865478515625,grad_norm: 0.9999990894886996, iteration: 100436
loss: 1.0240663290023804,grad_norm: 0.9999991303938656, iteration: 100437
loss: 1.011855959892273,grad_norm: 0.9988909338425165, iteration: 100438
loss: 0.9880303740501404,grad_norm: 0.8516452230384839, iteration: 100439
loss: 1.0159554481506348,grad_norm: 0.9746453090402933, iteration: 100440
loss: 1.0204600095748901,grad_norm: 0.9999992995603721, iteration: 100441
loss: 1.0398168563842773,grad_norm: 0.9999996902224514, iteration: 100442
loss: 1.062028169631958,grad_norm: 0.9999991189723472, iteration: 100443
loss: 1.0274397134780884,grad_norm: 0.8448175878460689, iteration: 100444
loss: 0.9846542477607727,grad_norm: 0.8772120811497124, iteration: 100445
loss: 1.001294493675232,grad_norm: 0.999999091357922, iteration: 100446
loss: 1.0124239921569824,grad_norm: 0.9999990918577464, iteration: 100447
loss: 1.0709271430969238,grad_norm: 0.9999991520347912, iteration: 100448
loss: 1.018904685974121,grad_norm: 0.916087918692043, iteration: 100449
loss: 1.0889183282852173,grad_norm: 0.9999989911106029, iteration: 100450
loss: 1.0359996557235718,grad_norm: 0.9999990182341753, iteration: 100451
loss: 1.0206292867660522,grad_norm: 0.9999990608673809, iteration: 100452
loss: 1.0811084508895874,grad_norm: 0.9999992721345247, iteration: 100453
loss: 1.0519399642944336,grad_norm: 0.8084763535007081, iteration: 100454
loss: 1.0506725311279297,grad_norm: 0.9999998342540705, iteration: 100455
loss: 1.0552031993865967,grad_norm: 0.9999990388229913, iteration: 100456
loss: 1.0207782983779907,grad_norm: 0.9652698020662144, iteration: 100457
loss: 0.9583402276039124,grad_norm: 0.9426063383484152, iteration: 100458
loss: 1.0468237400054932,grad_norm: 0.9795415748348223, iteration: 100459
loss: 1.0466722249984741,grad_norm: 0.99999912316369, iteration: 100460
loss: 1.0259613990783691,grad_norm: 0.9986108272067377, iteration: 100461
loss: 1.0874580144882202,grad_norm: 0.9999989573122787, iteration: 100462
loss: 1.012986421585083,grad_norm: 0.9809848723581008, iteration: 100463
loss: 0.9929735064506531,grad_norm: 0.9999997771663885, iteration: 100464
loss: 1.059826135635376,grad_norm: 0.8097041743848844, iteration: 100465
loss: 1.0333492755889893,grad_norm: 0.9999991898180794, iteration: 100466
loss: 1.0540521144866943,grad_norm: 0.8562163769321335, iteration: 100467
loss: 1.0359076261520386,grad_norm: 0.9495058953466392, iteration: 100468
loss: 1.0787124633789062,grad_norm: 0.9999996189108286, iteration: 100469
loss: 1.0484799146652222,grad_norm: 0.9999993049115143, iteration: 100470
loss: 1.0633172988891602,grad_norm: 0.9999993068675006, iteration: 100471
loss: 0.9731317758560181,grad_norm: 0.746998269093286, iteration: 100472
loss: 1.0376226902008057,grad_norm: 0.9999993933441569, iteration: 100473
loss: 1.172371745109558,grad_norm: 0.9999999520251135, iteration: 100474
loss: 1.0024875402450562,grad_norm: 0.8371714355103823, iteration: 100475
loss: 1.0049798488616943,grad_norm: 0.9411595803732227, iteration: 100476
loss: 1.0334160327911377,grad_norm: 0.9019021957287057, iteration: 100477
loss: 0.9766461253166199,grad_norm: 0.7924153221143154, iteration: 100478
loss: 1.0162330865859985,grad_norm: 0.9999991521281674, iteration: 100479
loss: 1.1867153644561768,grad_norm: 0.9999996673433118, iteration: 100480
loss: 1.0286470651626587,grad_norm: 0.7787478319353855, iteration: 100481
loss: 0.9617896676063538,grad_norm: 0.8728900274160797, iteration: 100482
loss: 1.0016024112701416,grad_norm: 0.8734154911718517, iteration: 100483
loss: 0.9902427792549133,grad_norm: 0.8625523416247003, iteration: 100484
loss: 0.9897982478141785,grad_norm: 0.8619164879757738, iteration: 100485
loss: 0.9945462942123413,grad_norm: 0.9999991233201595, iteration: 100486
loss: 1.049130916595459,grad_norm: 0.9999992426522334, iteration: 100487
loss: 1.0179334878921509,grad_norm: 0.9999990608550264, iteration: 100488
loss: 0.9892051815986633,grad_norm: 0.8938756648063665, iteration: 100489
loss: 0.9888672828674316,grad_norm: 0.9999991189871038, iteration: 100490
loss: 1.0602377653121948,grad_norm: 0.9414611790817081, iteration: 100491
loss: 1.1194920539855957,grad_norm: 0.9999994634365277, iteration: 100492
loss: 1.0451864004135132,grad_norm: 0.9999990972438502, iteration: 100493
loss: 1.0291520357131958,grad_norm: 0.9999990819699343, iteration: 100494
loss: 0.9958242774009705,grad_norm: 0.8991269894640048, iteration: 100495
loss: 1.0025042295455933,grad_norm: 0.937254017373488, iteration: 100496
loss: 1.0327696800231934,grad_norm: 0.9999991789468383, iteration: 100497
loss: 1.070439100265503,grad_norm: 0.9999993379444425, iteration: 100498
loss: 1.049299955368042,grad_norm: 0.9999994617037131, iteration: 100499
loss: 1.0675655603408813,grad_norm: 0.7687630594124566, iteration: 100500
loss: 1.049702525138855,grad_norm: 0.9999995468503461, iteration: 100501
loss: 1.0533206462860107,grad_norm: 0.9999991471821177, iteration: 100502
loss: 1.0793395042419434,grad_norm: 0.9830430236090826, iteration: 100503
loss: 0.9734476208686829,grad_norm: 0.8826370714690261, iteration: 100504
loss: 1.02332603931427,grad_norm: 0.9063702627781814, iteration: 100505
loss: 1.039842963218689,grad_norm: 0.9999992721058092, iteration: 100506
loss: 1.019835352897644,grad_norm: 0.8976799752072671, iteration: 100507
loss: 1.0209006071090698,grad_norm: 0.8728949248202665, iteration: 100508
loss: 1.0077831745147705,grad_norm: 0.9999991833121519, iteration: 100509
loss: 1.0052839517593384,grad_norm: 0.7980680692861535, iteration: 100510
loss: 1.05802321434021,grad_norm: 1.0000000746121647, iteration: 100511
loss: 1.0476739406585693,grad_norm: 0.9999991923522765, iteration: 100512
loss: 1.005208134651184,grad_norm: 0.9999990126851975, iteration: 100513
loss: 0.9868811368942261,grad_norm: 0.8133285435491459, iteration: 100514
loss: 1.0914831161499023,grad_norm: 0.9999999496281012, iteration: 100515
loss: 1.1002862453460693,grad_norm: 0.9999993523890837, iteration: 100516
loss: 1.0409785509109497,grad_norm: 0.9999993924099052, iteration: 100517
loss: 0.971065104007721,grad_norm: 0.9277356016069048, iteration: 100518
loss: 1.0407179594039917,grad_norm: 0.9999991078422701, iteration: 100519
loss: 0.9916654229164124,grad_norm: 0.9999992019887881, iteration: 100520
loss: 0.9816310405731201,grad_norm: 0.9204086389053519, iteration: 100521
loss: 0.9861515164375305,grad_norm: 0.9999996219287016, iteration: 100522
loss: 0.9793384671211243,grad_norm: 0.9999992252179551, iteration: 100523
loss: 0.9861236810684204,grad_norm: 0.999999227131507, iteration: 100524
loss: 0.9989463686943054,grad_norm: 0.942207157359122, iteration: 100525
loss: 1.004330039024353,grad_norm: 0.9999991972692606, iteration: 100526
loss: 0.9566628932952881,grad_norm: 0.9999991163611496, iteration: 100527
loss: 0.9777536392211914,grad_norm: 0.99688678337561, iteration: 100528
loss: 1.0145950317382812,grad_norm: 0.8425609604120526, iteration: 100529
loss: 0.9930108785629272,grad_norm: 0.9999998955271461, iteration: 100530
loss: 0.9952885508537292,grad_norm: 0.8549056081768405, iteration: 100531
loss: 1.0863178968429565,grad_norm: 0.9999998026765539, iteration: 100532
loss: 1.0146090984344482,grad_norm: 0.9999992301178496, iteration: 100533
loss: 0.9668342471122742,grad_norm: 0.9999990881961294, iteration: 100534
loss: 1.0274609327316284,grad_norm: 0.9999992946760234, iteration: 100535
loss: 1.0267865657806396,grad_norm: 0.9999991729331036, iteration: 100536
loss: 1.0261222124099731,grad_norm: 0.9999997451384648, iteration: 100537
loss: 0.989739179611206,grad_norm: 0.934281619894175, iteration: 100538
loss: 1.0277767181396484,grad_norm: 0.9282337519352205, iteration: 100539
loss: 1.0269577503204346,grad_norm: 0.9999991201134943, iteration: 100540
loss: 1.0155119895935059,grad_norm: 0.9999991427910153, iteration: 100541
loss: 1.0207202434539795,grad_norm: 0.9568270078079906, iteration: 100542
loss: 0.9803173542022705,grad_norm: 0.8011363706639688, iteration: 100543
loss: 0.9769741892814636,grad_norm: 0.875056646308424, iteration: 100544
loss: 0.9721503853797913,grad_norm: 0.9999998199850088, iteration: 100545
loss: 0.9606078267097473,grad_norm: 0.8955557201280666, iteration: 100546
loss: 1.0539039373397827,grad_norm: 0.9999999086807047, iteration: 100547
loss: 0.9894914627075195,grad_norm: 0.8797286333631676, iteration: 100548
loss: 1.0434573888778687,grad_norm: 0.9999997650956681, iteration: 100549
loss: 1.0065736770629883,grad_norm: 0.9210936088744048, iteration: 100550
loss: 1.0152016878128052,grad_norm: 0.9999991405956791, iteration: 100551
loss: 0.9768190979957581,grad_norm: 0.8756543308964029, iteration: 100552
loss: 1.3092701435089111,grad_norm: 0.9999999265816713, iteration: 100553
loss: 0.9904190897941589,grad_norm: 0.7708423624272581, iteration: 100554
loss: 0.9946576952934265,grad_norm: 0.9999989692573235, iteration: 100555
loss: 1.035057783126831,grad_norm: 0.9520218881171973, iteration: 100556
loss: 1.0053867101669312,grad_norm: 0.8968778287116608, iteration: 100557
loss: 1.0211971998214722,grad_norm: 0.9999998998565086, iteration: 100558
loss: 1.0859148502349854,grad_norm: 0.9869472400177882, iteration: 100559
loss: 1.0071978569030762,grad_norm: 0.9015468148340312, iteration: 100560
loss: 1.0278598070144653,grad_norm: 0.9999996034859849, iteration: 100561
loss: 1.0074807405471802,grad_norm: 0.8947607800970575, iteration: 100562
loss: 0.9851201772689819,grad_norm: 0.9825913881537808, iteration: 100563
loss: 0.9862160682678223,grad_norm: 0.8587828460267306, iteration: 100564
loss: 1.0160809755325317,grad_norm: 0.9226429910367305, iteration: 100565
loss: 1.0100135803222656,grad_norm: 0.8551392575522349, iteration: 100566
loss: 0.9703255295753479,grad_norm: 0.9999990192892814, iteration: 100567
loss: 1.0056356191635132,grad_norm: 0.9999991662999507, iteration: 100568
loss: 0.9844014048576355,grad_norm: 0.9235636707276246, iteration: 100569
loss: 1.0154712200164795,grad_norm: 0.9999996769195839, iteration: 100570
loss: 1.1474274396896362,grad_norm: 0.9999996679608031, iteration: 100571
loss: 1.049269437789917,grad_norm: 0.8339978228941843, iteration: 100572
loss: 1.0374879837036133,grad_norm: 0.9999999327371855, iteration: 100573
loss: 1.0168606042861938,grad_norm: 0.9999989895860077, iteration: 100574
loss: 1.0250264406204224,grad_norm: 0.9178610559985538, iteration: 100575
loss: 1.1469286680221558,grad_norm: 0.9999996956805316, iteration: 100576
loss: 1.0306200981140137,grad_norm: 0.9999993357787805, iteration: 100577
loss: 0.9875190258026123,grad_norm: 0.9999992897319143, iteration: 100578
loss: 1.0163627862930298,grad_norm: 0.999999221627087, iteration: 100579
loss: 1.0353909730911255,grad_norm: 0.999999201468051, iteration: 100580
loss: 1.0390751361846924,grad_norm: 0.9999993399435039, iteration: 100581
loss: 0.9907162189483643,grad_norm: 0.999999058015704, iteration: 100582
loss: 0.9955680966377258,grad_norm: 0.8110908421233467, iteration: 100583
loss: 1.0010823011398315,grad_norm: 0.9999994047927164, iteration: 100584
loss: 1.0176165103912354,grad_norm: 0.9420716193674659, iteration: 100585
loss: 1.0201455354690552,grad_norm: 0.999999427991297, iteration: 100586
loss: 0.9481302499771118,grad_norm: 0.9765385099063468, iteration: 100587
loss: 1.0674769878387451,grad_norm: 0.99999988325185, iteration: 100588
loss: 1.050374150276184,grad_norm: 0.9999993112128944, iteration: 100589
loss: 1.0042212009429932,grad_norm: 0.8363647014969664, iteration: 100590
loss: 1.0057355165481567,grad_norm: 0.8496687451089981, iteration: 100591
loss: 1.025480031967163,grad_norm: 0.9999999013381523, iteration: 100592
loss: 0.9925540685653687,grad_norm: 0.8927609101678351, iteration: 100593
loss: 1.0154461860656738,grad_norm: 0.9999991242518818, iteration: 100594
loss: 0.9767053723335266,grad_norm: 0.9999991303571499, iteration: 100595
loss: 1.0789480209350586,grad_norm: 0.9999998894465085, iteration: 100596
loss: 1.075620174407959,grad_norm: 0.9252597713960964, iteration: 100597
loss: 1.1523170471191406,grad_norm: 0.9999991651523576, iteration: 100598
loss: 1.059800624847412,grad_norm: 0.999999310653095, iteration: 100599
loss: 0.9808523058891296,grad_norm: 0.9999998505151324, iteration: 100600
loss: 1.0429658889770508,grad_norm: 0.9987104551126931, iteration: 100601
loss: 0.9999768733978271,grad_norm: 0.9317468694800586, iteration: 100602
loss: 1.0074204206466675,grad_norm: 0.9190348469169507, iteration: 100603
loss: 1.035550594329834,grad_norm: 0.9565312408769979, iteration: 100604
loss: 0.9148088097572327,grad_norm: 0.9914246927862214, iteration: 100605
loss: 0.9978674650192261,grad_norm: 0.9855218546201152, iteration: 100606
loss: 1.0405464172363281,grad_norm: 0.999999601646954, iteration: 100607
loss: 1.0258452892303467,grad_norm: 0.9999997752727628, iteration: 100608
loss: 1.0274156332015991,grad_norm: 0.9381815352585726, iteration: 100609
loss: 1.0602623224258423,grad_norm: 0.8495669023038956, iteration: 100610
loss: 1.062386155128479,grad_norm: 0.9999992789365206, iteration: 100611
loss: 1.0802711248397827,grad_norm: 0.999999124917282, iteration: 100612
loss: 1.0633610486984253,grad_norm: 0.9773681759034913, iteration: 100613
loss: 1.0526247024536133,grad_norm: 0.9999993877674901, iteration: 100614
loss: 1.0101112127304077,grad_norm: 0.9797851936436591, iteration: 100615
loss: 0.9900935888290405,grad_norm: 0.951815158999448, iteration: 100616
loss: 1.0374513864517212,grad_norm: 0.9999992597677979, iteration: 100617
loss: 1.0066919326782227,grad_norm: 0.9999989892062114, iteration: 100618
loss: 1.0589066743850708,grad_norm: 0.999999062087969, iteration: 100619
loss: 0.9990609884262085,grad_norm: 0.827585203398692, iteration: 100620
loss: 1.05476975440979,grad_norm: 0.9999991680127422, iteration: 100621
loss: 1.0059574842453003,grad_norm: 0.7778024068122487, iteration: 100622
loss: 1.0161285400390625,grad_norm: 0.9999992599974469, iteration: 100623
loss: 1.0086817741394043,grad_norm: 0.9999992479325112, iteration: 100624
loss: 0.9927463531494141,grad_norm: 0.9999996653076727, iteration: 100625
loss: 0.9703841209411621,grad_norm: 0.795720970890047, iteration: 100626
loss: 1.032498836517334,grad_norm: 0.9999995943989869, iteration: 100627
loss: 1.0490397214889526,grad_norm: 0.9999994982951447, iteration: 100628
loss: 0.9617288112640381,grad_norm: 0.9999992094437933, iteration: 100629
loss: 0.9929023385047913,grad_norm: 0.9999991718642334, iteration: 100630
loss: 1.0119011402130127,grad_norm: 0.9926113851131306, iteration: 100631
loss: 0.9904587268829346,grad_norm: 0.8130897352298233, iteration: 100632
loss: 1.0113049745559692,grad_norm: 0.9999991120910942, iteration: 100633
loss: 1.0223684310913086,grad_norm: 0.9999993255533166, iteration: 100634
loss: 0.9711341261863708,grad_norm: 0.8349292122182367, iteration: 100635
loss: 1.1679553985595703,grad_norm: 0.9056621553753963, iteration: 100636
loss: 0.9888806939125061,grad_norm: 0.9999996364576672, iteration: 100637
loss: 1.0083779096603394,grad_norm: 0.9659835190705508, iteration: 100638
loss: 1.1017999649047852,grad_norm: 0.9999999299518505, iteration: 100639
loss: 1.0938509702682495,grad_norm: 0.9999999950616202, iteration: 100640
loss: 1.0014475584030151,grad_norm: 0.9999997031484467, iteration: 100641
loss: 0.9808375239372253,grad_norm: 0.796563428333394, iteration: 100642
loss: 1.0144249200820923,grad_norm: 0.8341717572007534, iteration: 100643
loss: 0.9877089858055115,grad_norm: 0.6943842739771241, iteration: 100644
loss: 1.0372517108917236,grad_norm: 0.9999991884454967, iteration: 100645
loss: 0.9579623341560364,grad_norm: 0.9821595169369511, iteration: 100646
loss: 1.0454658269882202,grad_norm: 0.9999991508759603, iteration: 100647
loss: 1.0408613681793213,grad_norm: 0.8983545323399802, iteration: 100648
loss: 1.0450369119644165,grad_norm: 0.8806104578338055, iteration: 100649
loss: 1.0278819799423218,grad_norm: 0.9999999652513948, iteration: 100650
loss: 0.9798315167427063,grad_norm: 0.9367414740516712, iteration: 100651
loss: 1.03590726852417,grad_norm: 0.9999998049808477, iteration: 100652
loss: 0.9798831939697266,grad_norm: 0.9999997406824783, iteration: 100653
loss: 1.0152883529663086,grad_norm: 0.8384296510755043, iteration: 100654
loss: 1.0771448612213135,grad_norm: 0.9999990029417783, iteration: 100655
loss: 1.0138142108917236,grad_norm: 0.9999992122112651, iteration: 100656
loss: 1.2018784284591675,grad_norm: 0.9999993078301156, iteration: 100657
loss: 1.106754183769226,grad_norm: 0.9999993713256701, iteration: 100658
loss: 1.145444393157959,grad_norm: 0.9999997832960856, iteration: 100659
loss: 1.1018916368484497,grad_norm: 0.9999999311029063, iteration: 100660
loss: 1.2886711359024048,grad_norm: 0.9999998853070889, iteration: 100661
loss: 1.1383981704711914,grad_norm: 0.9999992905165889, iteration: 100662
loss: 1.1416456699371338,grad_norm: 0.9999994634044912, iteration: 100663
loss: 1.4070359468460083,grad_norm: 1.0000000204844248, iteration: 100664
loss: 1.0607722997665405,grad_norm: 0.9999991025741218, iteration: 100665
loss: 1.0374784469604492,grad_norm: 0.9999998576968481, iteration: 100666
loss: 1.2742037773132324,grad_norm: 0.9999994329604313, iteration: 100667
loss: 1.0589070320129395,grad_norm: 0.9999997890294289, iteration: 100668
loss: 1.1721715927124023,grad_norm: 0.9999995747419185, iteration: 100669
loss: 1.2224013805389404,grad_norm: 0.9999999195844738, iteration: 100670
loss: 1.1348717212677002,grad_norm: 0.9999998187466961, iteration: 100671
loss: 1.1292935609817505,grad_norm: 0.9999995234182762, iteration: 100672
loss: 1.0494567155838013,grad_norm: 0.9999991232878261, iteration: 100673
loss: 1.0226770639419556,grad_norm: 0.9999992775437345, iteration: 100674
loss: 0.9973470568656921,grad_norm: 0.9999991093361164, iteration: 100675
loss: 1.0770334005355835,grad_norm: 0.999999407362554, iteration: 100676
loss: 1.110331654548645,grad_norm: 0.9999997379517024, iteration: 100677
loss: 1.0164473056793213,grad_norm: 1.0000000281933488, iteration: 100678
loss: 1.135634183883667,grad_norm: 0.9999996223299199, iteration: 100679
loss: 1.1686822175979614,grad_norm: 0.999999312452544, iteration: 100680
loss: 1.0382411479949951,grad_norm: 0.9999989862519854, iteration: 100681
loss: 1.1248656511306763,grad_norm: 0.9999995992932336, iteration: 100682
loss: 1.015938401222229,grad_norm: 0.9938045281835273, iteration: 100683
loss: 1.0115879774093628,grad_norm: 0.9602788289538855, iteration: 100684
loss: 0.9907991290092468,grad_norm: 0.999999136683639, iteration: 100685
loss: 1.0497856140136719,grad_norm: 0.9999999432833699, iteration: 100686
loss: 0.9816233515739441,grad_norm: 0.9422561499402353, iteration: 100687
loss: 1.2303719520568848,grad_norm: 0.999999907334824, iteration: 100688
loss: 1.0579769611358643,grad_norm: 0.9999992587712146, iteration: 100689
loss: 0.9880477786064148,grad_norm: 0.7861735183679707, iteration: 100690
loss: 1.0601050853729248,grad_norm: 0.9999993289272212, iteration: 100691
loss: 1.0995073318481445,grad_norm: 0.9999992871368568, iteration: 100692
loss: 1.014217734336853,grad_norm: 0.7631988313418157, iteration: 100693
loss: 1.0182807445526123,grad_norm: 0.9020624321589648, iteration: 100694
loss: 1.045030951499939,grad_norm: 0.9999990800413003, iteration: 100695
loss: 1.0361374616622925,grad_norm: 0.9999996391448062, iteration: 100696
loss: 0.9972825050354004,grad_norm: 0.9999990476508118, iteration: 100697
loss: 1.0384893417358398,grad_norm: 0.9999998597458248, iteration: 100698
loss: 0.9833760261535645,grad_norm: 0.8208723061518955, iteration: 100699
loss: 1.0317540168762207,grad_norm: 1.0000000031474185, iteration: 100700
loss: 0.9886663556098938,grad_norm: 0.9774011996020817, iteration: 100701
loss: 1.026057481765747,grad_norm: 0.9999993689616553, iteration: 100702
loss: 1.0305255651474,grad_norm: 0.999999056721983, iteration: 100703
loss: 1.0788509845733643,grad_norm: 0.9999999941061091, iteration: 100704
loss: 1.0185720920562744,grad_norm: 0.9999991454330267, iteration: 100705
loss: 1.013908863067627,grad_norm: 0.8022097036460505, iteration: 100706
loss: 0.9914782047271729,grad_norm: 0.7958327104920659, iteration: 100707
loss: 1.017284631729126,grad_norm: 0.999999714086822, iteration: 100708
loss: 1.017971396446228,grad_norm: 0.9790680894223436, iteration: 100709
loss: 1.0451749563217163,grad_norm: 0.9999997771705132, iteration: 100710
loss: 1.0197252035140991,grad_norm: 0.9999993511099592, iteration: 100711
loss: 1.1090832948684692,grad_norm: 0.9999990684850866, iteration: 100712
loss: 1.2700762748718262,grad_norm: 0.999999716894533, iteration: 100713
loss: 1.0644028186798096,grad_norm: 0.9999998283640417, iteration: 100714
loss: 0.9647942185401917,grad_norm: 0.9999992275943856, iteration: 100715
loss: 1.095212697982788,grad_norm: 0.9999993797954773, iteration: 100716
loss: 1.0562450885772705,grad_norm: 0.9698609415915623, iteration: 100717
loss: 1.1028963327407837,grad_norm: 0.999999646475021, iteration: 100718
loss: 1.0681198835372925,grad_norm: 0.9766768015174875, iteration: 100719
loss: 1.1005185842514038,grad_norm: 0.9999998527080842, iteration: 100720
loss: 1.0195550918579102,grad_norm: 0.9999990466540735, iteration: 100721
loss: 0.9896929860115051,grad_norm: 0.9999998546188581, iteration: 100722
loss: 0.9846201539039612,grad_norm: 0.9999990667722239, iteration: 100723
loss: 1.10776948928833,grad_norm: 0.9999998127272239, iteration: 100724
loss: 1.013528823852539,grad_norm: 0.7179753250847963, iteration: 100725
loss: 1.0234711170196533,grad_norm: 0.878873015876512, iteration: 100726
loss: 1.0111867189407349,grad_norm: 0.9999994219457246, iteration: 100727
loss: 1.0254567861557007,grad_norm: 0.9999998719010256, iteration: 100728
loss: 1.0110459327697754,grad_norm: 0.9999990259418169, iteration: 100729
loss: 1.0343557596206665,grad_norm: 0.8079963848901762, iteration: 100730
loss: 1.0243194103240967,grad_norm: 0.9999998886524796, iteration: 100731
loss: 1.0313953161239624,grad_norm: 0.7957419819867757, iteration: 100732
loss: 1.0144319534301758,grad_norm: 0.999999062092254, iteration: 100733
loss: 1.1167265176773071,grad_norm: 1.0000000407991942, iteration: 100734
loss: 1.001443862915039,grad_norm: 0.999999637005399, iteration: 100735
loss: 0.9594740867614746,grad_norm: 0.7350248820797821, iteration: 100736
loss: 1.0339711904525757,grad_norm: 0.9999991618479103, iteration: 100737
loss: 1.0161879062652588,grad_norm: 0.8577710603567381, iteration: 100738
loss: 1.0475802421569824,grad_norm: 0.960036690409842, iteration: 100739
loss: 1.0185987949371338,grad_norm: 0.9330688425333451, iteration: 100740
loss: 0.9979707598686218,grad_norm: 0.9999992931161107, iteration: 100741
loss: 1.024764895439148,grad_norm: 0.9999995365925987, iteration: 100742
loss: 1.0371036529541016,grad_norm: 0.849785907645954, iteration: 100743
loss: 1.022858738899231,grad_norm: 0.9999994575011635, iteration: 100744
loss: 0.9807704091072083,grad_norm: 0.8926656936929538, iteration: 100745
loss: 0.9876209497451782,grad_norm: 0.8880923994598524, iteration: 100746
loss: 0.9707170128822327,grad_norm: 0.8281182392146308, iteration: 100747
loss: 1.038308024406433,grad_norm: 0.9999999612244292, iteration: 100748
loss: 1.062018632888794,grad_norm: 0.877732057628827, iteration: 100749
loss: 0.9735898971557617,grad_norm: 0.8940910665959069, iteration: 100750
loss: 0.9862827062606812,grad_norm: 0.8458097618939895, iteration: 100751
loss: 1.0250601768493652,grad_norm: 0.9932509734136228, iteration: 100752
loss: 1.0395724773406982,grad_norm: 0.8928061979975048, iteration: 100753
loss: 0.9760266542434692,grad_norm: 0.9999996719438091, iteration: 100754
loss: 0.9864358305931091,grad_norm: 0.8766448170278055, iteration: 100755
loss: 1.001410961151123,grad_norm: 0.7308332834297862, iteration: 100756
loss: 0.9926630854606628,grad_norm: 0.9999996917862738, iteration: 100757
loss: 1.0073131322860718,grad_norm: 0.6671575039387031, iteration: 100758
loss: 1.0312303304672241,grad_norm: 0.9304971347533576, iteration: 100759
loss: 1.1110029220581055,grad_norm: 0.9999994477569035, iteration: 100760
loss: 0.9839240908622742,grad_norm: 0.7938566762579843, iteration: 100761
loss: 1.0818408727645874,grad_norm: 0.9999989517604184, iteration: 100762
loss: 1.026405930519104,grad_norm: 0.9893998716197141, iteration: 100763
loss: 0.9832001328468323,grad_norm: 0.8052864170937983, iteration: 100764
loss: 0.9657914042472839,grad_norm: 0.9234648380994765, iteration: 100765
loss: 0.994595468044281,grad_norm: 0.8270855910443743, iteration: 100766
loss: 1.0332813262939453,grad_norm: 0.9048201367124643, iteration: 100767
loss: 0.9943069815635681,grad_norm: 0.7411919691853649, iteration: 100768
loss: 1.0247231721878052,grad_norm: 0.9999990503540497, iteration: 100769
loss: 1.001549243927002,grad_norm: 0.96037013892377, iteration: 100770
loss: 1.0297194719314575,grad_norm: 0.9147725510184526, iteration: 100771
loss: 1.0059185028076172,grad_norm: 0.9999992902367295, iteration: 100772
loss: 0.9547806978225708,grad_norm: 0.9182530111790541, iteration: 100773
loss: 0.9880701303482056,grad_norm: 0.8565024288245022, iteration: 100774
loss: 1.0298826694488525,grad_norm: 0.9999990276665524, iteration: 100775
loss: 0.9967783689498901,grad_norm: 0.9437058097949279, iteration: 100776
loss: 0.9790109992027283,grad_norm: 0.8406776374569735, iteration: 100777
loss: 0.9971272349357605,grad_norm: 0.9999991995368118, iteration: 100778
loss: 0.9833500385284424,grad_norm: 0.8839365371901616, iteration: 100779
loss: 1.0272561311721802,grad_norm: 0.9999993140664781, iteration: 100780
loss: 1.0107266902923584,grad_norm: 0.9814401444896127, iteration: 100781
loss: 0.9944583773612976,grad_norm: 0.9999995924153817, iteration: 100782
loss: 1.0082210302352905,grad_norm: 0.9999999114452013, iteration: 100783
loss: 1.0115516185760498,grad_norm: 0.9999997535236033, iteration: 100784
loss: 1.0540785789489746,grad_norm: 0.9999998817820427, iteration: 100785
loss: 0.978491485118866,grad_norm: 0.8563063896468289, iteration: 100786
loss: 0.9517127871513367,grad_norm: 0.9999990872307757, iteration: 100787
loss: 0.9802989959716797,grad_norm: 0.9999992305452988, iteration: 100788
loss: 0.9913234710693359,grad_norm: 0.999999320386093, iteration: 100789
loss: 1.0285899639129639,grad_norm: 0.844505292352943, iteration: 100790
loss: 1.000925898551941,grad_norm: 0.8109391183609572, iteration: 100791
loss: 0.9773041009902954,grad_norm: 0.9406836572866257, iteration: 100792
loss: 1.0133858919143677,grad_norm: 0.8812306397166064, iteration: 100793
loss: 0.9872694611549377,grad_norm: 1.0000000266239686, iteration: 100794
loss: 1.0083200931549072,grad_norm: 0.9701188756393461, iteration: 100795
loss: 0.9989973306655884,grad_norm: 0.8657946222033405, iteration: 100796
loss: 0.9736993312835693,grad_norm: 0.949702884902845, iteration: 100797
loss: 1.025530219078064,grad_norm: 0.8365270159671572, iteration: 100798
loss: 1.0136914253234863,grad_norm: 0.9724985819170088, iteration: 100799
loss: 1.0469037294387817,grad_norm: 0.8494931361202207, iteration: 100800
loss: 1.0524226427078247,grad_norm: 0.9999996210481811, iteration: 100801
loss: 1.03628671169281,grad_norm: 0.9999998654547362, iteration: 100802
loss: 0.9878751039505005,grad_norm: 0.8482326068310265, iteration: 100803
loss: 1.0677881240844727,grad_norm: 0.9999996969558946, iteration: 100804
loss: 1.0166430473327637,grad_norm: 0.8759336384683666, iteration: 100805
loss: 1.0279865264892578,grad_norm: 0.9999992230964049, iteration: 100806
loss: 1.0391595363616943,grad_norm: 0.7790385665052532, iteration: 100807
loss: 1.0102219581604004,grad_norm: 0.8572175878427634, iteration: 100808
loss: 1.0155868530273438,grad_norm: 0.8983131258518023, iteration: 100809
loss: 0.9860646724700928,grad_norm: 0.8409794063206728, iteration: 100810
loss: 1.0316487550735474,grad_norm: 0.9999991455077082, iteration: 100811
loss: 1.0811570882797241,grad_norm: 0.9999998424657508, iteration: 100812
loss: 0.9838231205940247,grad_norm: 0.8467910314527648, iteration: 100813
loss: 0.974073052406311,grad_norm: 0.9999995409570231, iteration: 100814
loss: 0.9649136662483215,grad_norm: 0.9624390205320992, iteration: 100815
loss: 1.017605185508728,grad_norm: 0.9999993620237235, iteration: 100816
loss: 1.0431838035583496,grad_norm: 0.9999992751602313, iteration: 100817
loss: 1.0120974779129028,grad_norm: 0.9999996257723712, iteration: 100818
loss: 1.0041358470916748,grad_norm: 1.0000000892737482, iteration: 100819
loss: 1.0335936546325684,grad_norm: 0.9999998133096216, iteration: 100820
loss: 1.0226190090179443,grad_norm: 0.8355196596634632, iteration: 100821
loss: 0.9874038696289062,grad_norm: 0.8254408598392697, iteration: 100822
loss: 0.9933769106864929,grad_norm: 0.9999989295730153, iteration: 100823
loss: 1.2234009504318237,grad_norm: 1.0000000210954352, iteration: 100824
loss: 1.0058152675628662,grad_norm: 0.9999991191189499, iteration: 100825
loss: 1.0665053129196167,grad_norm: 0.8816221877637659, iteration: 100826
loss: 1.2198907136917114,grad_norm: 0.9999995272085738, iteration: 100827
loss: 1.0880011320114136,grad_norm: 0.9999995978108971, iteration: 100828
loss: 1.2243191003799438,grad_norm: 0.9999999569271282, iteration: 100829
loss: 1.0309027433395386,grad_norm: 0.9481418578106134, iteration: 100830
loss: 1.0295724868774414,grad_norm: 0.815063507199758, iteration: 100831
loss: 1.0531394481658936,grad_norm: 0.9999993389500453, iteration: 100832
loss: 1.0215222835540771,grad_norm: 0.9224152842168956, iteration: 100833
loss: 1.01011323928833,grad_norm: 0.9999999678561755, iteration: 100834
loss: 1.0592412948608398,grad_norm: 0.8303422961861565, iteration: 100835
loss: 1.242127776145935,grad_norm: 0.9999999531233074, iteration: 100836
loss: 1.12473726272583,grad_norm: 0.999999595793159, iteration: 100837
loss: 1.0314786434173584,grad_norm: 0.871318841505031, iteration: 100838
loss: 1.1463831663131714,grad_norm: 0.9999996903436296, iteration: 100839
loss: 1.1728456020355225,grad_norm: 0.9999997332301944, iteration: 100840
loss: 1.1324543952941895,grad_norm: 0.9999994828994406, iteration: 100841
loss: 1.0147749185562134,grad_norm: 0.9999991841471078, iteration: 100842
loss: 1.0055996179580688,grad_norm: 0.9214203388627354, iteration: 100843
loss: 0.9770401120185852,grad_norm: 0.9999992589059951, iteration: 100844
loss: 0.9829926490783691,grad_norm: 0.9999993467504696, iteration: 100845
loss: 1.001300573348999,grad_norm: 0.9132365472003845, iteration: 100846
loss: 1.0691173076629639,grad_norm: 0.9999995707769204, iteration: 100847
loss: 0.974587619304657,grad_norm: 0.9999990570711504, iteration: 100848
loss: 1.0206187963485718,grad_norm: 0.9999995920420216, iteration: 100849
loss: 1.0974677801132202,grad_norm: 0.9999992446260638, iteration: 100850
loss: 1.0919914245605469,grad_norm: 0.9999997233042669, iteration: 100851
loss: 0.9959003925323486,grad_norm: 0.9999992504231622, iteration: 100852
loss: 1.0241069793701172,grad_norm: 0.959072146949894, iteration: 100853
loss: 0.9956766963005066,grad_norm: 0.9366696456796635, iteration: 100854
loss: 0.9910844564437866,grad_norm: 0.9498598150122911, iteration: 100855
loss: 1.0290229320526123,grad_norm: 0.9999999619561227, iteration: 100856
loss: 1.055709719657898,grad_norm: 0.9999996657913743, iteration: 100857
loss: 1.0252903699874878,grad_norm: 0.9999993626204334, iteration: 100858
loss: 1.0425773859024048,grad_norm: 0.9999991366598635, iteration: 100859
loss: 1.0506350994110107,grad_norm: 0.9999999637066515, iteration: 100860
loss: 1.024031162261963,grad_norm: 0.8573801774489964, iteration: 100861
loss: 1.0382540225982666,grad_norm: 0.910442418360038, iteration: 100862
loss: 0.9433082938194275,grad_norm: 0.9832705880903326, iteration: 100863
loss: 1.1861680746078491,grad_norm: 1.0000000123510542, iteration: 100864
loss: 1.0285868644714355,grad_norm: 0.9999997051468856, iteration: 100865
loss: 1.162603497505188,grad_norm: 0.9999999203746268, iteration: 100866
loss: 1.0799919366836548,grad_norm: 0.9999992484923726, iteration: 100867
loss: 1.0384472608566284,grad_norm: 0.9999998353259832, iteration: 100868
loss: 1.0706490278244019,grad_norm: 0.9999994713316582, iteration: 100869
loss: 1.1537258625030518,grad_norm: 0.9999999005442054, iteration: 100870
loss: 1.2774189710617065,grad_norm: 0.999999906868445, iteration: 100871
loss: 1.0156643390655518,grad_norm: 0.9999995631375543, iteration: 100872
loss: 1.0305095911026,grad_norm: 0.9999991984945108, iteration: 100873
loss: 1.1991568803787231,grad_norm: 0.9999998466498162, iteration: 100874
loss: 1.042769193649292,grad_norm: 0.999999248138694, iteration: 100875
loss: 1.203938603401184,grad_norm: 0.9999998929830437, iteration: 100876
loss: 1.0814567804336548,grad_norm: 0.9999998151001962, iteration: 100877
loss: 1.0744510889053345,grad_norm: 0.9999991239992482, iteration: 100878
loss: 1.2372174263000488,grad_norm: 0.9999994582558641, iteration: 100879
loss: 1.1534934043884277,grad_norm: 1.0000000047595266, iteration: 100880
loss: 1.0023587942123413,grad_norm: 0.9999991014913521, iteration: 100881
loss: 1.3651763200759888,grad_norm: 0.9999999931285184, iteration: 100882
loss: 1.234113335609436,grad_norm: 0.9999995289032858, iteration: 100883
loss: 1.2026381492614746,grad_norm: 0.999999930758283, iteration: 100884
loss: 1.1765961647033691,grad_norm: 0.9999994903847838, iteration: 100885
loss: 1.2394365072250366,grad_norm: 0.999999746215765, iteration: 100886
loss: 0.9927839040756226,grad_norm: 0.999999115366877, iteration: 100887
loss: 1.2301182746887207,grad_norm: 1.0000000034494965, iteration: 100888
loss: 1.264902114868164,grad_norm: 0.9999999253380276, iteration: 100889
loss: 1.6761863231658936,grad_norm: 0.9999999170288382, iteration: 100890
loss: 1.2033958435058594,grad_norm: 0.9999999411453827, iteration: 100891
loss: 1.3066296577453613,grad_norm: 0.9999999673995378, iteration: 100892
loss: 1.1720631122589111,grad_norm: 0.9999996591967915, iteration: 100893
loss: 1.1794720888137817,grad_norm: 0.9999998559150874, iteration: 100894
loss: 1.5007765293121338,grad_norm: 0.9999995551603355, iteration: 100895
loss: 1.1254299879074097,grad_norm: 0.9999996726044683, iteration: 100896
loss: 1.1462420225143433,grad_norm: 0.9999999591568705, iteration: 100897
loss: 1.0538146495819092,grad_norm: 0.9999994870668449, iteration: 100898
loss: 1.1636016368865967,grad_norm: 0.9999999384551359, iteration: 100899
loss: 1.14948308467865,grad_norm: 0.9999999485264238, iteration: 100900
loss: 1.0362372398376465,grad_norm: 0.9999996950828965, iteration: 100901
loss: 1.0239145755767822,grad_norm: 0.9999991043870338, iteration: 100902
loss: 0.9969369769096375,grad_norm: 0.8425890156499463, iteration: 100903
loss: 1.0349509716033936,grad_norm: 0.9999993214011789, iteration: 100904
loss: 1.032773494720459,grad_norm: 0.9999994738351051, iteration: 100905
loss: 1.138149380683899,grad_norm: 0.9999996515650831, iteration: 100906
loss: 1.1876195669174194,grad_norm: 0.9999999607146836, iteration: 100907
loss: 1.270367980003357,grad_norm: 0.9999998175636357, iteration: 100908
loss: 0.9943874478340149,grad_norm: 0.9999995074755081, iteration: 100909
loss: 1.1036287546157837,grad_norm: 0.99999958169012, iteration: 100910
loss: 1.057892084121704,grad_norm: 0.9999998265284211, iteration: 100911
loss: 0.996981143951416,grad_norm: 0.9999989580877887, iteration: 100912
loss: 1.0419293642044067,grad_norm: 0.9999993869893075, iteration: 100913
loss: 0.9842300415039062,grad_norm: 0.9633272352316372, iteration: 100914
loss: 1.070759654045105,grad_norm: 0.9999998726229119, iteration: 100915
loss: 1.027863621711731,grad_norm: 0.8680372536955583, iteration: 100916
loss: 1.0001298189163208,grad_norm: 0.999999499823222, iteration: 100917
loss: 1.0653849840164185,grad_norm: 0.9999990147133067, iteration: 100918
loss: 0.9828590750694275,grad_norm: 0.8490178912598342, iteration: 100919
loss: 1.0468645095825195,grad_norm: 0.9999992327697825, iteration: 100920
loss: 0.9680166244506836,grad_norm: 0.836906721829012, iteration: 100921
loss: 1.002705693244934,grad_norm: 0.9999998160521865, iteration: 100922
loss: 1.0281275510787964,grad_norm: 0.9999993974084942, iteration: 100923
loss: 1.1437592506408691,grad_norm: 0.9999999172193473, iteration: 100924
loss: 1.0504966974258423,grad_norm: 0.9999998302454096, iteration: 100925
loss: 1.0112978219985962,grad_norm: 0.9999997204339208, iteration: 100926
loss: 1.016081690788269,grad_norm: 0.8632925447481474, iteration: 100927
loss: 0.9962000846862793,grad_norm: 0.9745801865476247, iteration: 100928
loss: 1.0438590049743652,grad_norm: 0.9999991932453531, iteration: 100929
loss: 0.9671995043754578,grad_norm: 0.9885048521103323, iteration: 100930
loss: 1.067809820175171,grad_norm: 0.9999994775192239, iteration: 100931
loss: 1.0177791118621826,grad_norm: 0.9999996118710889, iteration: 100932
loss: 1.0091500282287598,grad_norm: 0.9941152559867961, iteration: 100933
loss: 1.0292521715164185,grad_norm: 0.9477185615589204, iteration: 100934
loss: 1.0048301219940186,grad_norm: 0.9511072976564886, iteration: 100935
loss: 1.050471544265747,grad_norm: 0.9999992898410082, iteration: 100936
loss: 1.1317331790924072,grad_norm: 0.9999999987235636, iteration: 100937
loss: 1.3921959400177002,grad_norm: 1.0000000514809582, iteration: 100938
loss: 1.0008490085601807,grad_norm: 0.9999993397878643, iteration: 100939
loss: 1.0057512521743774,grad_norm: 0.94669113949301, iteration: 100940
loss: 1.0166716575622559,grad_norm: 0.855140086901742, iteration: 100941
loss: 0.9967882037162781,grad_norm: 0.9863989855036633, iteration: 100942
loss: 1.0848870277404785,grad_norm: 0.9713879186217761, iteration: 100943
loss: 1.0111613273620605,grad_norm: 0.8655510869178376, iteration: 100944
loss: 0.9937026500701904,grad_norm: 0.9999994323438559, iteration: 100945
loss: 1.0364470481872559,grad_norm: 0.9999993087332142, iteration: 100946
loss: 1.058037519454956,grad_norm: 0.8669842885907432, iteration: 100947
loss: 0.9564880728721619,grad_norm: 0.9999990752374192, iteration: 100948
loss: 0.9697732329368591,grad_norm: 0.9999991030562407, iteration: 100949
loss: 1.1588473320007324,grad_norm: 0.9999993068752998, iteration: 100950
loss: 0.9997958540916443,grad_norm: 0.7873563844929343, iteration: 100951
loss: 0.9948570132255554,grad_norm: 0.9999994494079488, iteration: 100952
loss: 0.9785946607589722,grad_norm: 0.9999994466225809, iteration: 100953
loss: 1.0890392065048218,grad_norm: 0.9999997743652029, iteration: 100954
loss: 1.0486507415771484,grad_norm: 0.9999998647084656, iteration: 100955
loss: 1.0077084302902222,grad_norm: 0.9999993644666288, iteration: 100956
loss: 1.0318002700805664,grad_norm: 0.8411206369110348, iteration: 100957
loss: 1.0192371606826782,grad_norm: 0.9051874525877296, iteration: 100958
loss: 1.0324152708053589,grad_norm: 0.9999990699573584, iteration: 100959
loss: 1.0911800861358643,grad_norm: 0.9999999565945599, iteration: 100960
loss: 1.0352628231048584,grad_norm: 0.9999991274348901, iteration: 100961
loss: 1.0128456354141235,grad_norm: 0.9162617334605688, iteration: 100962
loss: 1.0569663047790527,grad_norm: 0.9999999222349257, iteration: 100963
loss: 1.2650026082992554,grad_norm: 0.9999998146610111, iteration: 100964
loss: 1.1164271831512451,grad_norm: 0.999999464730125, iteration: 100965
loss: 1.0409456491470337,grad_norm: 0.9057279340203799, iteration: 100966
loss: 1.218753695487976,grad_norm: 0.9999994941390742, iteration: 100967
loss: 0.958576500415802,grad_norm: 0.8156636969764474, iteration: 100968
loss: 1.22532320022583,grad_norm: 0.9999991526051759, iteration: 100969
loss: 1.0598808526992798,grad_norm: 0.9999997694014298, iteration: 100970
loss: 1.3616845607757568,grad_norm: 1.0000000400824487, iteration: 100971
loss: 0.9938435554504395,grad_norm: 0.9999989669934384, iteration: 100972
loss: 1.127012848854065,grad_norm: 0.9999998432475835, iteration: 100973
loss: 1.0366129875183105,grad_norm: 0.9999992475329048, iteration: 100974
loss: 1.078066110610962,grad_norm: 0.8960589468841346, iteration: 100975
loss: 1.0164169073104858,grad_norm: 0.9516201511793628, iteration: 100976
loss: 0.9849863052368164,grad_norm: 0.9999999399279691, iteration: 100977
loss: 1.00930655002594,grad_norm: 0.9999998937250292, iteration: 100978
loss: 1.0653502941131592,grad_norm: 0.9999998969072561, iteration: 100979
loss: 1.0191035270690918,grad_norm: 0.9999992630751292, iteration: 100980
loss: 0.9690702557563782,grad_norm: 0.9999993297853325, iteration: 100981
loss: 0.9604294300079346,grad_norm: 0.9999990939169057, iteration: 100982
loss: 1.1241592168807983,grad_norm: 0.9999992270117721, iteration: 100983
loss: 1.0831702947616577,grad_norm: 0.9732646188307739, iteration: 100984
loss: 1.0125654935836792,grad_norm: 0.9179687659388986, iteration: 100985
loss: 0.9524442553520203,grad_norm: 0.999999220657117, iteration: 100986
loss: 1.0108579397201538,grad_norm: 0.9999992409542375, iteration: 100987
loss: 1.0290162563323975,grad_norm: 0.9999995170466522, iteration: 100988
loss: 1.059655785560608,grad_norm: 0.9999993633003341, iteration: 100989
loss: 1.0558209419250488,grad_norm: 0.9999993478926723, iteration: 100990
loss: 1.032952904701233,grad_norm: 0.9999992684997518, iteration: 100991
loss: 1.0109018087387085,grad_norm: 0.8493107056813475, iteration: 100992
loss: 0.9646338820457458,grad_norm: 0.9520356684885626, iteration: 100993
loss: 1.1146855354309082,grad_norm: 0.99999992499475, iteration: 100994
loss: 1.0846209526062012,grad_norm: 0.9999999698520141, iteration: 100995
loss: 1.0857659578323364,grad_norm: 0.99999984419374, iteration: 100996
loss: 1.0134788751602173,grad_norm: 0.9864206499958864, iteration: 100997
loss: 1.12090003490448,grad_norm: 0.9999997476141599, iteration: 100998
loss: 1.0259554386138916,grad_norm: 0.9999996467955476, iteration: 100999
loss: 1.042364239692688,grad_norm: 0.9071427169117607, iteration: 101000
loss: 1.0189943313598633,grad_norm: 0.9999991211724387, iteration: 101001
loss: 1.031003713607788,grad_norm: 0.9999995063393309, iteration: 101002
loss: 1.0378211736679077,grad_norm: 0.9999990182356561, iteration: 101003
loss: 0.9738714098930359,grad_norm: 0.79932152366456, iteration: 101004
loss: 0.9738390445709229,grad_norm: 0.9999990802655245, iteration: 101005
loss: 1.062597393989563,grad_norm: 0.9999998270652839, iteration: 101006
loss: 0.996767520904541,grad_norm: 0.82991090142913, iteration: 101007
loss: 1.1559946537017822,grad_norm: 0.9999995631996543, iteration: 101008
loss: 1.038614273071289,grad_norm: 0.9999995923840486, iteration: 101009
loss: 1.0745023488998413,grad_norm: 0.9999990971150734, iteration: 101010
loss: 1.0124748945236206,grad_norm: 0.9999993088320123, iteration: 101011
loss: 1.0053768157958984,grad_norm: 0.9073631432483955, iteration: 101012
loss: 1.0082165002822876,grad_norm: 0.999999941607595, iteration: 101013
loss: 1.0108692646026611,grad_norm: 0.999999357543024, iteration: 101014
loss: 1.0171008110046387,grad_norm: 0.9999992867985749, iteration: 101015
loss: 0.9841417074203491,grad_norm: 0.9697471789975317, iteration: 101016
loss: 1.1470232009887695,grad_norm: 0.9999998959394236, iteration: 101017
loss: 1.0425201654434204,grad_norm: 0.9999991808949227, iteration: 101018
loss: 1.0363800525665283,grad_norm: 0.9999994522439765, iteration: 101019
loss: 1.129176378250122,grad_norm: 0.9999994854409818, iteration: 101020
loss: 1.1047567129135132,grad_norm: 0.9999997549226347, iteration: 101021
loss: 0.9768549799919128,grad_norm: 0.9357758218892459, iteration: 101022
loss: 0.9692675471305847,grad_norm: 0.8247608901328974, iteration: 101023
loss: 1.0465505123138428,grad_norm: 0.9999992208648499, iteration: 101024
loss: 1.011204719543457,grad_norm: 0.9999991366327342, iteration: 101025
loss: 1.0191813707351685,grad_norm: 0.8478146049480243, iteration: 101026
loss: 1.0115121603012085,grad_norm: 0.8752363841862619, iteration: 101027
loss: 0.998944878578186,grad_norm: 0.8971522748851349, iteration: 101028
loss: 0.9927863478660583,grad_norm: 0.7790266826578935, iteration: 101029
loss: 0.9683524966239929,grad_norm: 0.9999991584655564, iteration: 101030
loss: 1.0796105861663818,grad_norm: 0.9999998870835577, iteration: 101031
loss: 1.0496346950531006,grad_norm: 0.9999995801346271, iteration: 101032
loss: 1.0420103073120117,grad_norm: 0.922558792935705, iteration: 101033
loss: 1.0605641603469849,grad_norm: 0.9999996499636091, iteration: 101034
loss: 1.023284673690796,grad_norm: 0.9426646880632829, iteration: 101035
loss: 1.0142968893051147,grad_norm: 0.9999991357975432, iteration: 101036
loss: 1.020667552947998,grad_norm: 0.9999992325126588, iteration: 101037
loss: 1.0695860385894775,grad_norm: 0.9999998396539228, iteration: 101038
loss: 1.0733658075332642,grad_norm: 0.9999991338439274, iteration: 101039
loss: 1.063467025756836,grad_norm: 0.9999994240554683, iteration: 101040
loss: 1.0358705520629883,grad_norm: 0.9999991873816539, iteration: 101041
loss: 1.1742268800735474,grad_norm: 0.999999863701274, iteration: 101042
loss: 1.2436115741729736,grad_norm: 0.9999994237050253, iteration: 101043
loss: 1.062349796295166,grad_norm: 0.9723211225972094, iteration: 101044
loss: 1.1283384561538696,grad_norm: 0.999999459577292, iteration: 101045
loss: 1.1552355289459229,grad_norm: 0.9999995655127502, iteration: 101046
loss: 1.0497429370880127,grad_norm: 0.9999993291466747, iteration: 101047
loss: 0.9739803075790405,grad_norm: 0.9999992240406081, iteration: 101048
loss: 1.0396238565444946,grad_norm: 0.999999186019053, iteration: 101049
loss: 0.9988687634468079,grad_norm: 0.9267992709570705, iteration: 101050
loss: 1.0056167840957642,grad_norm: 0.9999992244000228, iteration: 101051
loss: 1.1189836263656616,grad_norm: 0.9999991750262459, iteration: 101052
loss: 1.0837491750717163,grad_norm: 0.9999995089316397, iteration: 101053
loss: 1.2869353294372559,grad_norm: 1.0000000324015312, iteration: 101054
loss: 1.0888437032699585,grad_norm: 0.9999996833403529, iteration: 101055
loss: 1.0979050397872925,grad_norm: 0.967260128067987, iteration: 101056
loss: 1.109207272529602,grad_norm: 0.9999994286160193, iteration: 101057
loss: 1.1243277788162231,grad_norm: 0.9999997072726182, iteration: 101058
loss: 1.0913739204406738,grad_norm: 0.9999998093086441, iteration: 101059
loss: 1.043772578239441,grad_norm: 0.9999993799637418, iteration: 101060
loss: 1.0590320825576782,grad_norm: 0.9999992624171332, iteration: 101061
loss: 1.186166524887085,grad_norm: 1.0000000609895299, iteration: 101062
loss: 1.3180720806121826,grad_norm: 0.9999998402534149, iteration: 101063
loss: 1.0605003833770752,grad_norm: 0.9311955736274072, iteration: 101064
loss: 1.0211490392684937,grad_norm: 0.9396117726325418, iteration: 101065
loss: 1.1278526782989502,grad_norm: 0.9429397614193323, iteration: 101066
loss: 1.3108329772949219,grad_norm: 0.9999994460559289, iteration: 101067
loss: 1.0826200246810913,grad_norm: 0.9999991215075251, iteration: 101068
loss: 1.1199870109558105,grad_norm: 0.9999994997400327, iteration: 101069
loss: 1.0796692371368408,grad_norm: 0.999999879350214, iteration: 101070
loss: 1.0164058208465576,grad_norm: 0.9999991434737696, iteration: 101071
loss: 1.0055092573165894,grad_norm: 0.9999991195775086, iteration: 101072
loss: 1.2153979539871216,grad_norm: 0.9999994118183889, iteration: 101073
loss: 1.1636693477630615,grad_norm: 0.9999998704227605, iteration: 101074
loss: 1.0879348516464233,grad_norm: 0.9999998833216843, iteration: 101075
loss: 1.0790438652038574,grad_norm: 0.999999084084627, iteration: 101076
loss: 1.1356019973754883,grad_norm: 0.9999994414244353, iteration: 101077
loss: 1.1397744417190552,grad_norm: 0.9999999916428454, iteration: 101078
loss: 1.1760331392288208,grad_norm: 0.9999997824548064, iteration: 101079
loss: 1.0890594720840454,grad_norm: 0.9999995490938096, iteration: 101080
loss: 0.9768269658088684,grad_norm: 0.9999991142243078, iteration: 101081
loss: 1.1511812210083008,grad_norm: 0.999999296053117, iteration: 101082
loss: 1.1009948253631592,grad_norm: 0.9999996361693254, iteration: 101083
loss: 1.2794040441513062,grad_norm: 1.000000014659893, iteration: 101084
loss: 1.134517788887024,grad_norm: 0.9999991282556764, iteration: 101085
loss: 1.2300817966461182,grad_norm: 0.9999998624682276, iteration: 101086
loss: 1.1280291080474854,grad_norm: 0.9999992049390557, iteration: 101087
loss: 1.101780652999878,grad_norm: 0.9999990841793409, iteration: 101088
loss: 1.1310361623764038,grad_norm: 0.9999993488316674, iteration: 101089
loss: 1.0595817565917969,grad_norm: 0.8367713253500004, iteration: 101090
loss: 1.0807029008865356,grad_norm: 0.9999991381757843, iteration: 101091
loss: 1.150553584098816,grad_norm: 0.9999992192606816, iteration: 101092
loss: 1.2733403444290161,grad_norm: 0.9999997032385384, iteration: 101093
loss: 1.1653698682785034,grad_norm: 0.9999991211187381, iteration: 101094
loss: 1.0388094186782837,grad_norm: 0.999999349338858, iteration: 101095
loss: 0.9773956537246704,grad_norm: 0.8501132199773089, iteration: 101096
loss: 1.0025665760040283,grad_norm: 0.8647660876976583, iteration: 101097
loss: 1.098219394683838,grad_norm: 0.9999996087817382, iteration: 101098
loss: 1.0906630754470825,grad_norm: 0.9999996118255932, iteration: 101099
loss: 1.08132004737854,grad_norm: 0.9999993547404851, iteration: 101100
loss: 1.0718486309051514,grad_norm: 0.9999992045891164, iteration: 101101
loss: 1.012960433959961,grad_norm: 0.9999996432015066, iteration: 101102
loss: 1.076067328453064,grad_norm: 0.9793615080750644, iteration: 101103
loss: 1.0274215936660767,grad_norm: 0.9999990900313837, iteration: 101104
loss: 1.0763520002365112,grad_norm: 0.9999989632833336, iteration: 101105
loss: 1.0679155588150024,grad_norm: 0.9999997035907031, iteration: 101106
loss: 1.0410258769989014,grad_norm: 0.9999991484236168, iteration: 101107
loss: 1.0103528499603271,grad_norm: 0.8100046620186574, iteration: 101108
loss: 1.1757975816726685,grad_norm: 0.9999997784505941, iteration: 101109
loss: 1.0196493864059448,grad_norm: 0.9999997697527507, iteration: 101110
loss: 1.0725065469741821,grad_norm: 0.9999999675097434, iteration: 101111
loss: 1.069693684577942,grad_norm: 0.9999996436804509, iteration: 101112
loss: 0.9860115051269531,grad_norm: 0.9999997080232633, iteration: 101113
loss: 0.9881027936935425,grad_norm: 0.8532957535542057, iteration: 101114
loss: 1.0289539098739624,grad_norm: 0.9999999244739122, iteration: 101115
loss: 1.109035611152649,grad_norm: 0.999999778915037, iteration: 101116
loss: 1.0514049530029297,grad_norm: 0.9999990526219565, iteration: 101117
loss: 1.1035147905349731,grad_norm: 0.9999997188415429, iteration: 101118
loss: 1.0564098358154297,grad_norm: 0.9257665630896964, iteration: 101119
loss: 1.038846731185913,grad_norm: 0.9999994256285998, iteration: 101120
loss: 1.2119346857070923,grad_norm: 0.9999999210593762, iteration: 101121
loss: 1.076473355293274,grad_norm: 0.999999681122965, iteration: 101122
loss: 0.9783479571342468,grad_norm: 0.9999991371218655, iteration: 101123
loss: 1.002049446105957,grad_norm: 0.7757191739084951, iteration: 101124
loss: 1.0353609323501587,grad_norm: 0.9555688340157408, iteration: 101125
loss: 1.0188707113265991,grad_norm: 0.8794078100955258, iteration: 101126
loss: 1.04733145236969,grad_norm: 0.9754585460717705, iteration: 101127
loss: 1.0308893918991089,grad_norm: 0.9999994915305956, iteration: 101128
loss: 1.1155327558517456,grad_norm: 0.999999510921698, iteration: 101129
loss: 1.0935665369033813,grad_norm: 0.9999995411939603, iteration: 101130
loss: 0.9675525426864624,grad_norm: 0.999999623407825, iteration: 101131
loss: 1.1299772262573242,grad_norm: 0.9999994889558738, iteration: 101132
loss: 1.0161430835723877,grad_norm: 0.9999993419811242, iteration: 101133
loss: 0.9943608045578003,grad_norm: 0.9999994769834791, iteration: 101134
loss: 1.0975974798202515,grad_norm: 0.9999998389369383, iteration: 101135
loss: 1.0026155710220337,grad_norm: 0.9999994023841831, iteration: 101136
loss: 1.0011205673217773,grad_norm: 0.9999990742175004, iteration: 101137
loss: 0.9684001803398132,grad_norm: 0.7377759443751527, iteration: 101138
loss: 1.068237066268921,grad_norm: 0.9999992660068936, iteration: 101139
loss: 1.1126086711883545,grad_norm: 0.9999998064586649, iteration: 101140
loss: 1.0186433792114258,grad_norm: 0.9999997424039652, iteration: 101141
loss: 1.042802095413208,grad_norm: 0.9999993268649754, iteration: 101142
loss: 1.1181466579437256,grad_norm: 0.999999837929634, iteration: 101143
loss: 0.983053982257843,grad_norm: 0.9999994844589766, iteration: 101144
loss: 1.0966368913650513,grad_norm: 0.9999994140128109, iteration: 101145
loss: 0.9997603297233582,grad_norm: 0.9585809353225396, iteration: 101146
loss: 1.0246024131774902,grad_norm: 0.8529627986335896, iteration: 101147
loss: 1.044636607170105,grad_norm: 0.8105989746900035, iteration: 101148
loss: 0.993216335773468,grad_norm: 0.9999991920044515, iteration: 101149
loss: 1.029333472251892,grad_norm: 0.9904266839949141, iteration: 101150
loss: 1.0450756549835205,grad_norm: 0.9999991423172929, iteration: 101151
loss: 0.9955170750617981,grad_norm: 0.9999994221440359, iteration: 101152
loss: 0.9988861083984375,grad_norm: 0.9564633770805182, iteration: 101153
loss: 1.0731868743896484,grad_norm: 0.9999993109045554, iteration: 101154
loss: 1.069567084312439,grad_norm: 0.999999477959125, iteration: 101155
loss: 1.0400291681289673,grad_norm: 0.9999997726915335, iteration: 101156
loss: 1.0103240013122559,grad_norm: 0.9284231452723283, iteration: 101157
loss: 1.0152366161346436,grad_norm: 0.9999995482704459, iteration: 101158
loss: 0.9967080354690552,grad_norm: 0.9999995114743859, iteration: 101159
loss: 1.0529268980026245,grad_norm: 0.9999990796518368, iteration: 101160
loss: 0.9812036156654358,grad_norm: 0.9677002982596267, iteration: 101161
loss: 1.0471932888031006,grad_norm: 0.9999996706324493, iteration: 101162
loss: 1.020318627357483,grad_norm: 0.99999918534794, iteration: 101163
loss: 1.007150411605835,grad_norm: 0.9999990457265069, iteration: 101164
loss: 0.9964665174484253,grad_norm: 0.9999990479558033, iteration: 101165
loss: 1.0641450881958008,grad_norm: 0.9999993975350651, iteration: 101166
loss: 1.0793631076812744,grad_norm: 0.8994024108416698, iteration: 101167
loss: 1.0273857116699219,grad_norm: 0.9999991669820218, iteration: 101168
loss: 1.015121340751648,grad_norm: 0.9999994836048344, iteration: 101169
loss: 1.0814822912216187,grad_norm: 0.9999997017749067, iteration: 101170
loss: 0.9651555418968201,grad_norm: 0.9999990235973801, iteration: 101171
loss: 1.0472187995910645,grad_norm: 0.9999991001066725, iteration: 101172
loss: 1.0543062686920166,grad_norm: 0.9999994074115327, iteration: 101173
loss: 0.9940844774246216,grad_norm: 0.8377346063230348, iteration: 101174
loss: 0.9909437298774719,grad_norm: 0.8735378283008641, iteration: 101175
loss: 1.0556977987289429,grad_norm: 0.9999992177372259, iteration: 101176
loss: 1.0668442249298096,grad_norm: 0.9999990866455201, iteration: 101177
loss: 1.0393718481063843,grad_norm: 0.8852945566426986, iteration: 101178
loss: 1.0283315181732178,grad_norm: 0.8793403974852572, iteration: 101179
loss: 1.0487672090530396,grad_norm: 0.9999992090308275, iteration: 101180
loss: 1.0688635110855103,grad_norm: 0.818021121928796, iteration: 101181
loss: 1.0094112157821655,grad_norm: 0.9075563877874245, iteration: 101182
loss: 1.0997071266174316,grad_norm: 0.9999991459898946, iteration: 101183
loss: 0.9895561337471008,grad_norm: 0.9999995408819358, iteration: 101184
loss: 1.0104987621307373,grad_norm: 0.9999991931472347, iteration: 101185
loss: 0.9861811995506287,grad_norm: 0.9073953695574675, iteration: 101186
loss: 1.0823904275894165,grad_norm: 0.9999996219342224, iteration: 101187
loss: 1.128183126449585,grad_norm: 0.9999999127626478, iteration: 101188
loss: 0.9863187670707703,grad_norm: 0.9999992486906781, iteration: 101189
loss: 1.069927453994751,grad_norm: 0.9999991794747803, iteration: 101190
loss: 1.0138287544250488,grad_norm: 0.9491892188928507, iteration: 101191
loss: 1.0485577583312988,grad_norm: 0.9999998940856433, iteration: 101192
loss: 1.0065226554870605,grad_norm: 0.7994880160036407, iteration: 101193
loss: 1.026692509651184,grad_norm: 0.9999990299460304, iteration: 101194
loss: 1.0849612951278687,grad_norm: 0.9214104790863585, iteration: 101195
loss: 1.0140753984451294,grad_norm: 0.9999991087019672, iteration: 101196
loss: 1.0364729166030884,grad_norm: 0.9182420080727006, iteration: 101197
loss: 1.0119513273239136,grad_norm: 0.9141006091586552, iteration: 101198
loss: 1.0473906993865967,grad_norm: 0.9999996999475773, iteration: 101199
loss: 1.0144367218017578,grad_norm: 0.8235451787017545, iteration: 101200
loss: 1.075507640838623,grad_norm: 0.9999997784650946, iteration: 101201
loss: 1.0558676719665527,grad_norm: 0.9999999316793177, iteration: 101202
loss: 1.0773123502731323,grad_norm: 0.9999997790804286, iteration: 101203
loss: 1.0466452836990356,grad_norm: 0.8407462362549557, iteration: 101204
loss: 1.0455008745193481,grad_norm: 0.9232054379780352, iteration: 101205
loss: 1.0108888149261475,grad_norm: 0.9999991806580106, iteration: 101206
loss: 1.0366239547729492,grad_norm: 0.9999997768474888, iteration: 101207
loss: 0.9904261231422424,grad_norm: 0.9999992722502499, iteration: 101208
loss: 1.0528316497802734,grad_norm: 0.999999044040727, iteration: 101209
loss: 1.0354241132736206,grad_norm: 0.9999994565934538, iteration: 101210
loss: 0.9736692309379578,grad_norm: 0.999999196097607, iteration: 101211
loss: 1.0626286268234253,grad_norm: 0.999999740439227, iteration: 101212
loss: 1.0391322374343872,grad_norm: 0.9999993750523635, iteration: 101213
loss: 1.0850704908370972,grad_norm: 0.9999994161692313, iteration: 101214
loss: 1.0509637594223022,grad_norm: 0.8505717985254458, iteration: 101215
loss: 1.0130705833435059,grad_norm: 0.8797980456911204, iteration: 101216
loss: 0.9940605163574219,grad_norm: 0.999999252684233, iteration: 101217
loss: 0.988843560218811,grad_norm: 0.9999992061466049, iteration: 101218
loss: 1.0491642951965332,grad_norm: 0.8652514548784851, iteration: 101219
loss: 1.0211678743362427,grad_norm: 0.9999990576437727, iteration: 101220
loss: 0.999952495098114,grad_norm: 0.9103290072290703, iteration: 101221
loss: 1.0897150039672852,grad_norm: 0.9723164012227217, iteration: 101222
loss: 0.9777515530586243,grad_norm: 0.9247080604194466, iteration: 101223
loss: 1.1340534687042236,grad_norm: 0.9999999341485863, iteration: 101224
loss: 0.9966498017311096,grad_norm: 0.867191385310896, iteration: 101225
loss: 1.0800065994262695,grad_norm: 0.9999994784480117, iteration: 101226
loss: 1.0798178911209106,grad_norm: 0.9999991733770442, iteration: 101227
loss: 0.9789541363716125,grad_norm: 0.9599275924010161, iteration: 101228
loss: 1.1009265184402466,grad_norm: 0.999999209495978, iteration: 101229
loss: 1.0050239562988281,grad_norm: 0.9485115078640978, iteration: 101230
loss: 1.0501729249954224,grad_norm: 0.9999996615757543, iteration: 101231
loss: 1.0229843854904175,grad_norm: 0.9999993775086622, iteration: 101232
loss: 1.0888327360153198,grad_norm: 0.9999996145373615, iteration: 101233
loss: 0.994657576084137,grad_norm: 0.9999991440091311, iteration: 101234
loss: 0.9689394235610962,grad_norm: 0.9260627662219493, iteration: 101235
loss: 1.0256969928741455,grad_norm: 0.8851286122790015, iteration: 101236
loss: 1.0150783061981201,grad_norm: 0.9747127445496778, iteration: 101237
loss: 1.0065873861312866,grad_norm: 0.978561663591175, iteration: 101238
loss: 1.1038166284561157,grad_norm: 0.9999995681424612, iteration: 101239
loss: 0.9783734083175659,grad_norm: 0.9935717891463395, iteration: 101240
loss: 1.0290495157241821,grad_norm: 0.9999992142834283, iteration: 101241
loss: 1.015068531036377,grad_norm: 0.9999999186603387, iteration: 101242
loss: 1.0028966665267944,grad_norm: 0.9999996603030052, iteration: 101243
loss: 1.0054939985275269,grad_norm: 0.9999991524933957, iteration: 101244
loss: 0.9821136593818665,grad_norm: 0.8208749968538754, iteration: 101245
loss: 1.0102485418319702,grad_norm: 0.8500588034395802, iteration: 101246
loss: 1.0117323398590088,grad_norm: 0.9999996700462651, iteration: 101247
loss: 1.0343934297561646,grad_norm: 0.9999996395304607, iteration: 101248
loss: 1.0128048658370972,grad_norm: 0.9999995205269763, iteration: 101249
loss: 1.0027894973754883,grad_norm: 0.964036470930458, iteration: 101250
loss: 1.0109946727752686,grad_norm: 0.9453911898423706, iteration: 101251
loss: 1.022253155708313,grad_norm: 0.9999998225174077, iteration: 101252
loss: 1.039106011390686,grad_norm: 0.9999991136214188, iteration: 101253
loss: 1.0089850425720215,grad_norm: 0.8851818814504206, iteration: 101254
loss: 0.9971678853034973,grad_norm: 0.8441008341640032, iteration: 101255
loss: 1.0281455516815186,grad_norm: 0.9999998160540503, iteration: 101256
loss: 1.0484517812728882,grad_norm: 0.999999204123031, iteration: 101257
loss: 1.033342957496643,grad_norm: 0.9999991455319783, iteration: 101258
loss: 0.9738976359367371,grad_norm: 0.9725393613111145, iteration: 101259
loss: 1.2682385444641113,grad_norm: 0.999999908330108, iteration: 101260
loss: 1.0069962739944458,grad_norm: 0.9309750218461208, iteration: 101261
loss: 1.0121793746948242,grad_norm: 0.9958795477040558, iteration: 101262
loss: 1.0031418800354004,grad_norm: 0.8744838713570213, iteration: 101263
loss: 1.029789686203003,grad_norm: 0.9999991542547905, iteration: 101264
loss: 1.0710854530334473,grad_norm: 0.9999991767912361, iteration: 101265
loss: 0.9851148724555969,grad_norm: 0.9066953268247477, iteration: 101266
loss: 1.0369253158569336,grad_norm: 0.7923347693260518, iteration: 101267
loss: 1.0570499897003174,grad_norm: 1.0000000169920702, iteration: 101268
loss: 0.9870043396949768,grad_norm: 0.9186398048323026, iteration: 101269
loss: 1.01094388961792,grad_norm: 0.8151819990974014, iteration: 101270
loss: 1.0076639652252197,grad_norm: 0.9999994483834911, iteration: 101271
loss: 1.0094380378723145,grad_norm: 0.9360513513682609, iteration: 101272
loss: 1.0263683795928955,grad_norm: 0.9783863395496754, iteration: 101273
loss: 0.972072422504425,grad_norm: 0.7880923679882746, iteration: 101274
loss: 1.0219104290008545,grad_norm: 0.918434616765972, iteration: 101275
loss: 1.0193378925323486,grad_norm: 0.7920688988291088, iteration: 101276
loss: 1.0559773445129395,grad_norm: 0.9999998894965708, iteration: 101277
loss: 0.9870142340660095,grad_norm: 0.8879774225467908, iteration: 101278
loss: 0.9946230053901672,grad_norm: 0.8774090881769699, iteration: 101279
loss: 1.0065183639526367,grad_norm: 0.9669767159685021, iteration: 101280
loss: 1.0118091106414795,grad_norm: 0.9652891012097735, iteration: 101281
loss: 1.0084173679351807,grad_norm: 0.9999990564990655, iteration: 101282
loss: 1.0348376035690308,grad_norm: 0.9999991222414372, iteration: 101283
loss: 1.0255556106567383,grad_norm: 0.8985325735833065, iteration: 101284
loss: 1.0676108598709106,grad_norm: 0.999999825945797, iteration: 101285
loss: 1.0917099714279175,grad_norm: 0.9999998234009234, iteration: 101286
loss: 1.0298171043395996,grad_norm: 0.999999929329652, iteration: 101287
loss: 1.0206791162490845,grad_norm: 0.8822498612166515, iteration: 101288
loss: 0.9624590873718262,grad_norm: 0.9606238786596523, iteration: 101289
loss: 1.0035303831100464,grad_norm: 0.9719576161080545, iteration: 101290
loss: 1.046923041343689,grad_norm: 0.999999586813475, iteration: 101291
loss: 0.9737710356712341,grad_norm: 0.8338470319435315, iteration: 101292
loss: 0.9910078644752502,grad_norm: 0.999999454868445, iteration: 101293
loss: 1.0244003534317017,grad_norm: 0.9999992697662069, iteration: 101294
loss: 1.0164477825164795,grad_norm: 0.9999989780978374, iteration: 101295
loss: 1.1474719047546387,grad_norm: 0.9999999100297418, iteration: 101296
loss: 1.0489780902862549,grad_norm: 0.999999615223333, iteration: 101297
loss: 1.0269008874893188,grad_norm: 0.8437099337633317, iteration: 101298
loss: 1.0158826112747192,grad_norm: 0.9999994583388105, iteration: 101299
loss: 1.0195841789245605,grad_norm: 0.963667714186561, iteration: 101300
loss: 1.0377225875854492,grad_norm: 0.9999993100815677, iteration: 101301
loss: 1.039656400680542,grad_norm: 0.9999995870325619, iteration: 101302
loss: 1.0563724040985107,grad_norm: 0.9999996749063313, iteration: 101303
loss: 0.991361141204834,grad_norm: 0.9999991241758314, iteration: 101304
loss: 1.0355510711669922,grad_norm: 0.999999334373977, iteration: 101305
loss: 1.0906602144241333,grad_norm: 0.9999998950554119, iteration: 101306
loss: 1.0155060291290283,grad_norm: 0.9999991648700769, iteration: 101307
loss: 1.07188081741333,grad_norm: 0.9999993268726138, iteration: 101308
loss: 0.9697039127349854,grad_norm: 0.901930494500513, iteration: 101309
loss: 0.9628177881240845,grad_norm: 0.8818387515664281, iteration: 101310
loss: 1.009774923324585,grad_norm: 0.860961334104881, iteration: 101311
loss: 1.0357708930969238,grad_norm: 0.819638814875417, iteration: 101312
loss: 0.9918661713600159,grad_norm: 0.9090154814893012, iteration: 101313
loss: 1.0029337406158447,grad_norm: 0.7847766568284111, iteration: 101314
loss: 0.9925765991210938,grad_norm: 0.8407347074730166, iteration: 101315
loss: 1.0475660562515259,grad_norm: 0.9999993880768218, iteration: 101316
loss: 1.0006719827651978,grad_norm: 0.757192242386083, iteration: 101317
loss: 1.0008903741836548,grad_norm: 0.9999991454447897, iteration: 101318
loss: 1.0499800443649292,grad_norm: 0.8875443172181746, iteration: 101319
loss: 1.098463535308838,grad_norm: 0.9999990323709592, iteration: 101320
loss: 1.0116360187530518,grad_norm: 0.8294791334540371, iteration: 101321
loss: 1.0238736867904663,grad_norm: 0.9999992428068926, iteration: 101322
loss: 1.0175788402557373,grad_norm: 0.9999997531696923, iteration: 101323
loss: 0.9564676880836487,grad_norm: 0.9999996331863676, iteration: 101324
loss: 0.9954624772071838,grad_norm: 0.9999995960368031, iteration: 101325
loss: 0.9941036105155945,grad_norm: 0.9997179543515031, iteration: 101326
loss: 1.0418800115585327,grad_norm: 1.0000000053064795, iteration: 101327
loss: 1.0513559579849243,grad_norm: 0.9999993517589642, iteration: 101328
loss: 1.055063009262085,grad_norm: 0.9924532109919818, iteration: 101329
loss: 0.9914748072624207,grad_norm: 0.9199856400897799, iteration: 101330
loss: 1.0134795904159546,grad_norm: 0.9628505843346968, iteration: 101331
loss: 1.0386544466018677,grad_norm: 0.9138607882037478, iteration: 101332
loss: 1.0403728485107422,grad_norm: 0.7687434349188218, iteration: 101333
loss: 0.9767833352088928,grad_norm: 0.9999990946372521, iteration: 101334
loss: 1.0228790044784546,grad_norm: 0.9100924106885576, iteration: 101335
loss: 1.0258387327194214,grad_norm: 0.9614755392231796, iteration: 101336
loss: 0.9540528655052185,grad_norm: 0.934917595386782, iteration: 101337
loss: 1.0361614227294922,grad_norm: 0.9052771313495679, iteration: 101338
loss: 0.9560087323188782,grad_norm: 0.9204885584433812, iteration: 101339
loss: 1.0740604400634766,grad_norm: 0.9999995378179839, iteration: 101340
loss: 1.0094236135482788,grad_norm: 0.9999995435413023, iteration: 101341
loss: 0.97001713514328,grad_norm: 0.923068285050175, iteration: 101342
loss: 1.0258196592330933,grad_norm: 0.8657303261448036, iteration: 101343
loss: 0.9785216450691223,grad_norm: 0.9999989852421153, iteration: 101344
loss: 1.011557936668396,grad_norm: 0.9999991292766066, iteration: 101345
loss: 1.014941930770874,grad_norm: 0.9999991900530072, iteration: 101346
loss: 1.1276088953018188,grad_norm: 0.9999996846005974, iteration: 101347
loss: 0.9890012741088867,grad_norm: 0.9999991613437759, iteration: 101348
loss: 0.9950418472290039,grad_norm: 0.8565742024707325, iteration: 101349
loss: 0.9627225399017334,grad_norm: 0.849984225087349, iteration: 101350
loss: 1.0383961200714111,grad_norm: 0.9999999419161644, iteration: 101351
loss: 1.045823097229004,grad_norm: 0.9999990399736737, iteration: 101352
loss: 1.0292097330093384,grad_norm: 0.9999994455444342, iteration: 101353
loss: 0.9545617699623108,grad_norm: 0.8420354179941095, iteration: 101354
loss: 0.986797571182251,grad_norm: 0.9999990831924092, iteration: 101355
loss: 0.9930304884910583,grad_norm: 0.9999990754498345, iteration: 101356
loss: 0.9951780438423157,grad_norm: 0.9999996415130902, iteration: 101357
loss: 1.0590919256210327,grad_norm: 0.8843079284105019, iteration: 101358
loss: 0.9776003956794739,grad_norm: 0.9999993093311282, iteration: 101359
loss: 1.0550973415374756,grad_norm: 0.9999997685653288, iteration: 101360
loss: 1.1146860122680664,grad_norm: 0.9999991171139876, iteration: 101361
loss: 0.9768329858779907,grad_norm: 0.9087185703480266, iteration: 101362
loss: 0.9823194146156311,grad_norm: 0.7146782915454108, iteration: 101363
loss: 1.021653413772583,grad_norm: 0.8452065333560338, iteration: 101364
loss: 0.9952806234359741,grad_norm: 0.8623127226112046, iteration: 101365
loss: 0.9921953678131104,grad_norm: 0.8458882609902818, iteration: 101366
loss: 1.0003085136413574,grad_norm: 0.8610926440539725, iteration: 101367
loss: 0.9994080662727356,grad_norm: 0.9671720413061444, iteration: 101368
loss: 1.008668303489685,grad_norm: 0.9505360526745974, iteration: 101369
loss: 1.0122332572937012,grad_norm: 0.8662881874506924, iteration: 101370
loss: 0.9997338652610779,grad_norm: 0.9062964477193606, iteration: 101371
loss: 1.0247948169708252,grad_norm: 0.9999991256535987, iteration: 101372
loss: 1.0484983921051025,grad_norm: 0.999999915743568, iteration: 101373
loss: 0.9599301218986511,grad_norm: 0.833269628717809, iteration: 101374
loss: 1.0222270488739014,grad_norm: 0.9999996147812703, iteration: 101375
loss: 0.9982634782791138,grad_norm: 0.9999991176183669, iteration: 101376
loss: 1.0099167823791504,grad_norm: 0.9999992568375705, iteration: 101377
loss: 1.0238975286483765,grad_norm: 0.9999990654289933, iteration: 101378
loss: 1.036218523979187,grad_norm: 0.9999992065721613, iteration: 101379
loss: 0.9686729311943054,grad_norm: 0.9999989453142243, iteration: 101380
loss: 1.004524827003479,grad_norm: 0.9563554049344715, iteration: 101381
loss: 1.047641634941101,grad_norm: 0.9056037328995482, iteration: 101382
loss: 1.0123571157455444,grad_norm: 0.9723314175242025, iteration: 101383
loss: 0.9895856976509094,grad_norm: 0.8672092810115808, iteration: 101384
loss: 0.9886994957923889,grad_norm: 0.8722984886932292, iteration: 101385
loss: 0.9734164476394653,grad_norm: 0.8561321257532767, iteration: 101386
loss: 1.0195715427398682,grad_norm: 0.8795614532726862, iteration: 101387
loss: 0.9730126857757568,grad_norm: 0.9695589758378038, iteration: 101388
loss: 0.9909818768501282,grad_norm: 1.000000104843803, iteration: 101389
loss: 0.9905686378479004,grad_norm: 0.971445738367011, iteration: 101390
loss: 1.0884618759155273,grad_norm: 0.9057944118456543, iteration: 101391
loss: 1.0293694734573364,grad_norm: 0.8252810739203547, iteration: 101392
loss: 1.0332162380218506,grad_norm: 0.9858531829102581, iteration: 101393
loss: 1.0235670804977417,grad_norm: 0.9608110165254024, iteration: 101394
loss: 0.9893102645874023,grad_norm: 0.8964070991385422, iteration: 101395
loss: 1.0463160276412964,grad_norm: 0.9999995783074618, iteration: 101396
loss: 0.9914717674255371,grad_norm: 0.908247984471027, iteration: 101397
loss: 1.0532231330871582,grad_norm: 0.9787058752780915, iteration: 101398
loss: 1.0156440734863281,grad_norm: 0.9999992602486696, iteration: 101399
loss: 1.0318975448608398,grad_norm: 0.9999995095575717, iteration: 101400
loss: 1.1293035745620728,grad_norm: 0.9999999681851497, iteration: 101401
loss: 1.0336123704910278,grad_norm: 0.9999991894222163, iteration: 101402
loss: 0.9700683355331421,grad_norm: 0.8944068665732778, iteration: 101403
loss: 0.9985358119010925,grad_norm: 0.9999998316219215, iteration: 101404
loss: 0.9943148493766785,grad_norm: 0.9107638060407095, iteration: 101405
loss: 0.9795247316360474,grad_norm: 0.9729811399271221, iteration: 101406
loss: 1.05148184299469,grad_norm: 0.999999501190403, iteration: 101407
loss: 1.0039019584655762,grad_norm: 0.9999991318655144, iteration: 101408
loss: 1.0131677389144897,grad_norm: 0.999999036972646, iteration: 101409
loss: 0.9806053638458252,grad_norm: 0.8111595470393896, iteration: 101410
loss: 0.9896671175956726,grad_norm: 0.7626118865197796, iteration: 101411
loss: 0.9871653914451599,grad_norm: 0.791070691351405, iteration: 101412
loss: 1.0101754665374756,grad_norm: 0.849527508938688, iteration: 101413
loss: 1.0056356191635132,grad_norm: 0.919960045048057, iteration: 101414
loss: 1.029326319694519,grad_norm: 0.9999990299506206, iteration: 101415
loss: 1.0280652046203613,grad_norm: 0.7781590541023846, iteration: 101416
loss: 1.1175087690353394,grad_norm: 0.9710999962667298, iteration: 101417
loss: 0.9751946330070496,grad_norm: 0.8441976139509889, iteration: 101418
loss: 1.024836778640747,grad_norm: 0.7801039211902744, iteration: 101419
loss: 1.0319209098815918,grad_norm: 0.9999991426800939, iteration: 101420
loss: 1.0212211608886719,grad_norm: 0.9999996628736916, iteration: 101421
loss: 1.0581647157669067,grad_norm: 1.0000000415097858, iteration: 101422
loss: 1.062698245048523,grad_norm: 0.9999998007465564, iteration: 101423
loss: 1.0360158681869507,grad_norm: 0.8357884183798316, iteration: 101424
loss: 1.081130862236023,grad_norm: 0.9999998826150943, iteration: 101425
loss: 0.9820677042007446,grad_norm: 0.9326059538334338, iteration: 101426
loss: 0.9862412810325623,grad_norm: 0.9999990187047413, iteration: 101427
loss: 1.0028048753738403,grad_norm: 0.9101266638137478, iteration: 101428
loss: 1.0235178470611572,grad_norm: 0.8373135691547727, iteration: 101429
loss: 0.9977957606315613,grad_norm: 0.9002556358844357, iteration: 101430
loss: 0.9974514842033386,grad_norm: 0.9999997985812074, iteration: 101431
loss: 1.0813194513320923,grad_norm: 0.9999992477295796, iteration: 101432
loss: 1.1732234954833984,grad_norm: 0.9999996311663742, iteration: 101433
loss: 0.9959585070610046,grad_norm: 0.893882540143466, iteration: 101434
loss: 1.011047601699829,grad_norm: 0.9110213474442095, iteration: 101435
loss: 1.0447651147842407,grad_norm: 0.8625021161051476, iteration: 101436
loss: 1.0146993398666382,grad_norm: 0.7464332699472569, iteration: 101437
loss: 1.0726615190505981,grad_norm: 0.9999998568225464, iteration: 101438
loss: 1.0097895860671997,grad_norm: 0.9999997138218036, iteration: 101439
loss: 1.0155303478240967,grad_norm: 0.9999999946063448, iteration: 101440
loss: 1.0678620338439941,grad_norm: 1.000000023512133, iteration: 101441
loss: 1.002762794494629,grad_norm: 0.9290533594755322, iteration: 101442
loss: 1.1274774074554443,grad_norm: 0.9999995277598452, iteration: 101443
loss: 0.9860959053039551,grad_norm: 0.9585821699554826, iteration: 101444
loss: 1.0031805038452148,grad_norm: 0.9999996121582778, iteration: 101445
loss: 1.1629571914672852,grad_norm: 0.9999999844778774, iteration: 101446
loss: 1.131304383277893,grad_norm: 0.9999997963859502, iteration: 101447
loss: 1.0238146781921387,grad_norm: 0.9999992267653001, iteration: 101448
loss: 1.0919491052627563,grad_norm: 0.9999994967842384, iteration: 101449
loss: 0.9702280759811401,grad_norm: 0.9634510435135432, iteration: 101450
loss: 1.0226298570632935,grad_norm: 0.9591113381108043, iteration: 101451
loss: 1.0406092405319214,grad_norm: 0.9999992382617006, iteration: 101452
loss: 1.0195637941360474,grad_norm: 0.9999992455274673, iteration: 101453
loss: 1.051382303237915,grad_norm: 0.9999997993158094, iteration: 101454
loss: 0.9730492234230042,grad_norm: 0.83408378394241, iteration: 101455
loss: 1.0594640970230103,grad_norm: 0.9999992780932129, iteration: 101456
loss: 0.9998797178268433,grad_norm: 0.9999997064434298, iteration: 101457
loss: 1.0334537029266357,grad_norm: 0.9999996534698263, iteration: 101458
loss: 1.0368479490280151,grad_norm: 0.9999992629736795, iteration: 101459
loss: 0.9678301811218262,grad_norm: 0.9040936203891867, iteration: 101460
loss: 1.072401762008667,grad_norm: 0.999999776614681, iteration: 101461
loss: 1.0309934616088867,grad_norm: 0.9503586030895235, iteration: 101462
loss: 1.0077214241027832,grad_norm: 0.9999990927662084, iteration: 101463
loss: 1.0268570184707642,grad_norm: 0.9999990312037641, iteration: 101464
loss: 1.0288684368133545,grad_norm: 0.8770591380988406, iteration: 101465
loss: 1.0902279615402222,grad_norm: 0.9999998475550108, iteration: 101466
loss: 1.135844111442566,grad_norm: 0.9999996269460747, iteration: 101467
loss: 0.9981677532196045,grad_norm: 0.8292090479167556, iteration: 101468
loss: 0.9990106821060181,grad_norm: 0.9999997393404222, iteration: 101469
loss: 1.0669795274734497,grad_norm: 0.9999996932186385, iteration: 101470
loss: 0.9992307424545288,grad_norm: 0.9999990480668982, iteration: 101471
loss: 1.0973860025405884,grad_norm: 0.9999994466586614, iteration: 101472
loss: 1.007777214050293,grad_norm: 0.9482824478044365, iteration: 101473
loss: 1.0151596069335938,grad_norm: 0.9999999264030813, iteration: 101474
loss: 1.0490789413452148,grad_norm: 0.9999998576857906, iteration: 101475
loss: 0.9828214645385742,grad_norm: 0.9726440562432735, iteration: 101476
loss: 0.9946399331092834,grad_norm: 0.9092868237137101, iteration: 101477
loss: 1.0196832418441772,grad_norm: 0.767286262690231, iteration: 101478
loss: 1.0251359939575195,grad_norm: 0.9999997865407426, iteration: 101479
loss: 1.022092342376709,grad_norm: 0.9999992551776277, iteration: 101480
loss: 0.9614303708076477,grad_norm: 0.9580743248034858, iteration: 101481
loss: 1.0895661115646362,grad_norm: 0.9999991873043472, iteration: 101482
loss: 0.9956849813461304,grad_norm: 0.9098985772096988, iteration: 101483
loss: 1.0081605911254883,grad_norm: 0.7840259809898735, iteration: 101484
loss: 1.0606873035430908,grad_norm: 0.9999998241084477, iteration: 101485
loss: 1.0164196491241455,grad_norm: 0.8294256143129795, iteration: 101486
loss: 1.026218295097351,grad_norm: 0.9999991890521647, iteration: 101487
loss: 1.012038230895996,grad_norm: 0.999999242924348, iteration: 101488
loss: 0.98931485414505,grad_norm: 0.9036166003710975, iteration: 101489
loss: 1.100913405418396,grad_norm: 0.9999992240913588, iteration: 101490
loss: 1.035557746887207,grad_norm: 0.9999994776161738, iteration: 101491
loss: 1.0694555044174194,grad_norm: 0.9999995942491885, iteration: 101492
loss: 1.0022250413894653,grad_norm: 0.999999168855519, iteration: 101493
loss: 1.0401455163955688,grad_norm: 0.9999997805240419, iteration: 101494
loss: 0.9942606091499329,grad_norm: 0.9639343826069862, iteration: 101495
loss: 0.9988069534301758,grad_norm: 0.9999997718121423, iteration: 101496
loss: 1.0568238496780396,grad_norm: 0.9999993636146226, iteration: 101497
loss: 0.9777933359146118,grad_norm: 0.8325366830999317, iteration: 101498
loss: 1.0356031656265259,grad_norm: 0.9999995286388483, iteration: 101499
loss: 1.0433682203292847,grad_norm: 0.9999993683876497, iteration: 101500
loss: 0.9858298897743225,grad_norm: 0.9999990954953689, iteration: 101501
loss: 1.1154147386550903,grad_norm: 0.9999998225790795, iteration: 101502
loss: 1.022710919380188,grad_norm: 0.9999992272924848, iteration: 101503
loss: 1.0369385480880737,grad_norm: 0.9999991747569069, iteration: 101504
loss: 1.095950961112976,grad_norm: 0.9999995546088656, iteration: 101505
loss: 1.1599786281585693,grad_norm: 0.9999996653612292, iteration: 101506
loss: 1.00065016746521,grad_norm: 0.925095290217805, iteration: 101507
loss: 0.9726417064666748,grad_norm: 0.999999062786272, iteration: 101508
loss: 0.9745148420333862,grad_norm: 0.9999992715235597, iteration: 101509
loss: 1.012339472770691,grad_norm: 0.9999993501582417, iteration: 101510
loss: 0.972612202167511,grad_norm: 0.7968917020623776, iteration: 101511
loss: 1.0378330945968628,grad_norm: 0.9616667233268688, iteration: 101512
loss: 1.0192372798919678,grad_norm: 0.9999997760223736, iteration: 101513
loss: 1.015414834022522,grad_norm: 0.9999998225208069, iteration: 101514
loss: 1.0937494039535522,grad_norm: 0.9999990888758274, iteration: 101515
loss: 0.981073796749115,grad_norm: 0.8944635393465002, iteration: 101516
loss: 1.1249964237213135,grad_norm: 0.9999995140842053, iteration: 101517
loss: 0.989707350730896,grad_norm: 0.8555177344164195, iteration: 101518
loss: 1.0231868028640747,grad_norm: 0.9999996539863449, iteration: 101519
loss: 0.9981281757354736,grad_norm: 0.9999997783469762, iteration: 101520
loss: 1.1496539115905762,grad_norm: 0.9999999284143828, iteration: 101521
loss: 1.161371111869812,grad_norm: 0.9999994016496898, iteration: 101522
loss: 1.0047425031661987,grad_norm: 0.9999991967681855, iteration: 101523
loss: 1.032430648803711,grad_norm: 0.9999991627634665, iteration: 101524
loss: 1.2094460725784302,grad_norm: 0.9999995718582988, iteration: 101525
loss: 1.0520176887512207,grad_norm: 0.9999999743696798, iteration: 101526
loss: 1.0611751079559326,grad_norm: 0.9999994871343078, iteration: 101527
loss: 1.1302555799484253,grad_norm: 0.9999998071053577, iteration: 101528
loss: 1.1726223230361938,grad_norm: 0.9999999444319683, iteration: 101529
loss: 1.1985245943069458,grad_norm: 0.999999039626457, iteration: 101530
loss: 1.0660756826400757,grad_norm: 0.9999994502365291, iteration: 101531
loss: 1.065599799156189,grad_norm: 0.8779271632003356, iteration: 101532
loss: 1.0702534914016724,grad_norm: 0.9999997035529845, iteration: 101533
loss: 0.992573082447052,grad_norm: 0.9999996835490282, iteration: 101534
loss: 1.370319128036499,grad_norm: 0.9999998739760168, iteration: 101535
loss: 1.2305936813354492,grad_norm: 0.9999995756807495, iteration: 101536
loss: 1.0111875534057617,grad_norm: 0.9999995991425554, iteration: 101537
loss: 1.0104585886001587,grad_norm: 0.971298163946053, iteration: 101538
loss: 1.0185033082962036,grad_norm: 0.9283786211276639, iteration: 101539
loss: 0.9730985760688782,grad_norm: 0.8510694791349511, iteration: 101540
loss: 1.0319485664367676,grad_norm: 0.9999993537126055, iteration: 101541
loss: 1.0346981287002563,grad_norm: 0.9999997444150687, iteration: 101542
loss: 1.0211745500564575,grad_norm: 0.9104541207993968, iteration: 101543
loss: 0.9989081025123596,grad_norm: 0.7627282737376856, iteration: 101544
loss: 1.0076147317886353,grad_norm: 0.8298176931249568, iteration: 101545
loss: 0.993547260761261,grad_norm: 0.818905933039885, iteration: 101546
loss: 1.0422182083129883,grad_norm: 0.9999994865066681, iteration: 101547
loss: 1.0106120109558105,grad_norm: 1.0000000209672122, iteration: 101548
loss: 1.0161068439483643,grad_norm: 0.9999991968796927, iteration: 101549
loss: 1.073211431503296,grad_norm: 0.9999994160376929, iteration: 101550
loss: 0.9596618413925171,grad_norm: 0.9570484104024113, iteration: 101551
loss: 0.9638999104499817,grad_norm: 0.9999991329523242, iteration: 101552
loss: 0.9938663244247437,grad_norm: 0.9079235042766866, iteration: 101553
loss: 0.9905561208724976,grad_norm: 0.9999992320101948, iteration: 101554
loss: 1.0066190958023071,grad_norm: 0.8551426778075585, iteration: 101555
loss: 0.9950560331344604,grad_norm: 0.999999942602609, iteration: 101556
loss: 0.9969791173934937,grad_norm: 0.9314888689453467, iteration: 101557
loss: 0.9594427347183228,grad_norm: 0.9999991212664875, iteration: 101558
loss: 1.1160359382629395,grad_norm: 0.9999998750009439, iteration: 101559
loss: 0.9983134269714355,grad_norm: 0.9999990096464856, iteration: 101560
loss: 1.0585960149765015,grad_norm: 1.0000000064947887, iteration: 101561
loss: 1.0796273946762085,grad_norm: 0.9999998880417265, iteration: 101562
loss: 1.0474404096603394,grad_norm: 0.999999335627291, iteration: 101563
loss: 1.0652031898498535,grad_norm: 0.9777423836456577, iteration: 101564
loss: 1.0209749937057495,grad_norm: 0.9999989551789412, iteration: 101565
loss: 1.0042742490768433,grad_norm: 0.9484804063367182, iteration: 101566
loss: 0.9939215183258057,grad_norm: 0.7854999619833914, iteration: 101567
loss: 1.0635199546813965,grad_norm: 0.9999991261495602, iteration: 101568
loss: 1.113218069076538,grad_norm: 0.9999998330433159, iteration: 101569
loss: 1.0663717985153198,grad_norm: 0.9999998584643849, iteration: 101570
loss: 1.0010583400726318,grad_norm: 0.9999992202962422, iteration: 101571
loss: 0.9859769940376282,grad_norm: 0.9999997319180185, iteration: 101572
loss: 0.9994051456451416,grad_norm: 0.9999990655626866, iteration: 101573
loss: 1.0025125741958618,grad_norm: 0.9999995537738213, iteration: 101574
loss: 1.0404080152511597,grad_norm: 0.9999995038860121, iteration: 101575
loss: 1.0255171060562134,grad_norm: 0.9196158555642786, iteration: 101576
loss: 1.0289616584777832,grad_norm: 0.9999996778249973, iteration: 101577
loss: 0.9968011975288391,grad_norm: 0.9999992153055445, iteration: 101578
loss: 1.0261616706848145,grad_norm: 0.9079737291356299, iteration: 101579
loss: 1.0339629650115967,grad_norm: 0.9693283929627177, iteration: 101580
loss: 1.0290381908416748,grad_norm: 0.9999996045670871, iteration: 101581
loss: 0.9738010168075562,grad_norm: 0.9882368540901589, iteration: 101582
loss: 1.1064879894256592,grad_norm: 0.9999992649907492, iteration: 101583
loss: 1.1255404949188232,grad_norm: 0.9999996197193918, iteration: 101584
loss: 0.9841762781143188,grad_norm: 0.9270914193455156, iteration: 101585
loss: 1.0181856155395508,grad_norm: 0.9377717664199884, iteration: 101586
loss: 1.0176715850830078,grad_norm: 0.999999861997212, iteration: 101587
loss: 1.042601466178894,grad_norm: 0.9999995738508123, iteration: 101588
loss: 1.0360225439071655,grad_norm: 0.9999995935750972, iteration: 101589
loss: 0.9643039107322693,grad_norm: 0.8863134221293865, iteration: 101590
loss: 1.0417319536209106,grad_norm: 0.9999997596593772, iteration: 101591
loss: 1.013745665550232,grad_norm: 0.9999997919250685, iteration: 101592
loss: 0.9810113310813904,grad_norm: 0.9366950875474495, iteration: 101593
loss: 0.9671214818954468,grad_norm: 0.9980681571599416, iteration: 101594
loss: 1.0146684646606445,grad_norm: 0.9999994235046313, iteration: 101595
loss: 1.0358128547668457,grad_norm: 0.9999999826399528, iteration: 101596
loss: 0.9991894960403442,grad_norm: 0.9999998516199582, iteration: 101597
loss: 1.0110076665878296,grad_norm: 0.9999995111528459, iteration: 101598
loss: 1.0224162340164185,grad_norm: 0.9999990596301911, iteration: 101599
loss: 1.0314100980758667,grad_norm: 0.9069335773310747, iteration: 101600
loss: 0.9944016337394714,grad_norm: 0.8866180458932631, iteration: 101601
loss: 1.0329116582870483,grad_norm: 0.9999991619119574, iteration: 101602
loss: 1.0157724618911743,grad_norm: 0.9208938956425841, iteration: 101603
loss: 1.0002496242523193,grad_norm: 0.8918634916080563, iteration: 101604
loss: 0.9705539345741272,grad_norm: 0.9945389481407941, iteration: 101605
loss: 1.0223407745361328,grad_norm: 0.9999991620285281, iteration: 101606
loss: 1.1888012886047363,grad_norm: 0.9999998272075703, iteration: 101607
loss: 1.0746632814407349,grad_norm: 0.9999999791285783, iteration: 101608
loss: 1.05311918258667,grad_norm: 0.9279344444819968, iteration: 101609
loss: 0.9900884628295898,grad_norm: 0.9999990966109095, iteration: 101610
loss: 1.0065701007843018,grad_norm: 0.922557774625807, iteration: 101611
loss: 0.9818775057792664,grad_norm: 0.8741435612237084, iteration: 101612
loss: 1.005246639251709,grad_norm: 0.9999993210085962, iteration: 101613
loss: 1.03847336769104,grad_norm: 0.9999990971276252, iteration: 101614
loss: 1.0107264518737793,grad_norm: 0.8312933614938025, iteration: 101615
loss: 1.082825779914856,grad_norm: 0.9785224495523197, iteration: 101616
loss: 1.1321051120758057,grad_norm: 0.999999301742562, iteration: 101617
loss: 0.9753562808036804,grad_norm: 0.8856578306300569, iteration: 101618
loss: 1.0413724184036255,grad_norm: 0.999999597490758, iteration: 101619
loss: 1.0211488008499146,grad_norm: 0.9999993741185454, iteration: 101620
loss: 1.0265766382217407,grad_norm: 0.9999994160482623, iteration: 101621
loss: 1.0257335901260376,grad_norm: 0.9999997553881793, iteration: 101622
loss: 0.9760806560516357,grad_norm: 0.9999990844836435, iteration: 101623
loss: 0.9584740400314331,grad_norm: 0.8733642573500681, iteration: 101624
loss: 1.0878260135650635,grad_norm: 0.9999995555781024, iteration: 101625
loss: 1.0257633924484253,grad_norm: 0.9999992776948448, iteration: 101626
loss: 1.0405274629592896,grad_norm: 0.9999997949078401, iteration: 101627
loss: 1.0998591184616089,grad_norm: 0.9999997261564968, iteration: 101628
loss: 1.0229297876358032,grad_norm: 0.9650150590491987, iteration: 101629
loss: 0.9905250668525696,grad_norm: 0.9817112768975296, iteration: 101630
loss: 1.0432374477386475,grad_norm: 0.9364018406713949, iteration: 101631
loss: 0.9696716070175171,grad_norm: 0.78006884857034, iteration: 101632
loss: 1.0522171258926392,grad_norm: 0.8845996562275926, iteration: 101633
loss: 0.9977543354034424,grad_norm: 0.8120639461212511, iteration: 101634
loss: 1.0133212804794312,grad_norm: 0.99999943899503, iteration: 101635
loss: 1.003631353378296,grad_norm: 0.9999992819134579, iteration: 101636
loss: 0.9768397808074951,grad_norm: 0.9999991326724937, iteration: 101637
loss: 0.9424334168434143,grad_norm: 0.8558394325129514, iteration: 101638
loss: 0.9928956031799316,grad_norm: 0.7742972564856785, iteration: 101639
loss: 1.0204553604125977,grad_norm: 0.8998093179574608, iteration: 101640
loss: 1.0145456790924072,grad_norm: 0.9999991889056453, iteration: 101641
loss: 0.992210865020752,grad_norm: 0.8557085261199481, iteration: 101642
loss: 1.065518856048584,grad_norm: 0.9794735574349721, iteration: 101643
loss: 1.0123106241226196,grad_norm: 0.7563519038466605, iteration: 101644
loss: 0.9960225820541382,grad_norm: 0.8008159157998246, iteration: 101645
loss: 1.0916938781738281,grad_norm: 0.9999997342710232, iteration: 101646
loss: 1.0546308755874634,grad_norm: 0.9999996347972662, iteration: 101647
loss: 1.0137251615524292,grad_norm: 0.9999999598820769, iteration: 101648
loss: 1.0060902833938599,grad_norm: 0.7975560018629719, iteration: 101649
loss: 1.0037720203399658,grad_norm: 0.7683347454192799, iteration: 101650
loss: 1.0133055448532104,grad_norm: 0.9999995552646616, iteration: 101651
loss: 1.0346564054489136,grad_norm: 0.9999991588632549, iteration: 101652
loss: 0.9416033625602722,grad_norm: 0.8854644586383211, iteration: 101653
loss: 1.0228830575942993,grad_norm: 0.9999997788990964, iteration: 101654
loss: 1.065470576286316,grad_norm: 0.9999993645150778, iteration: 101655
loss: 1.0335315465927124,grad_norm: 0.9999995149406805, iteration: 101656
loss: 1.006590723991394,grad_norm: 0.8168404571900545, iteration: 101657
loss: 0.9611701369285583,grad_norm: 0.8698300240106301, iteration: 101658
loss: 1.106803059577942,grad_norm: 0.9999992082774942, iteration: 101659
loss: 1.00676429271698,grad_norm: 0.8547953256160841, iteration: 101660
loss: 1.0444896221160889,grad_norm: 0.9999991088986432, iteration: 101661
loss: 0.9820283651351929,grad_norm: 0.8672276659234578, iteration: 101662
loss: 1.0162512063980103,grad_norm: 0.8408809121651503, iteration: 101663
loss: 1.0173877477645874,grad_norm: 0.9999993772539578, iteration: 101664
loss: 1.0071038007736206,grad_norm: 0.9999990036288874, iteration: 101665
loss: 1.0668659210205078,grad_norm: 0.9999998742878797, iteration: 101666
loss: 1.0662692785263062,grad_norm: 0.9999993186038505, iteration: 101667
loss: 1.006674885749817,grad_norm: 0.8682785185490295, iteration: 101668
loss: 0.9951955676078796,grad_norm: 0.9999989544630843, iteration: 101669
loss: 0.9929876923561096,grad_norm: 0.8679320789304935, iteration: 101670
loss: 0.995344340801239,grad_norm: 0.8022208254577992, iteration: 101671
loss: 1.0160646438598633,grad_norm: 0.9999996404558232, iteration: 101672
loss: 0.9890438914299011,grad_norm: 0.9999990349828357, iteration: 101673
loss: 1.021701693534851,grad_norm: 0.9999990687126447, iteration: 101674
loss: 1.0084201097488403,grad_norm: 0.8523461202806324, iteration: 101675
loss: 1.0303102731704712,grad_norm: 0.9999993936480615, iteration: 101676
loss: 1.0060174465179443,grad_norm: 0.9999989735270093, iteration: 101677
loss: 1.013180136680603,grad_norm: 0.9999991076349408, iteration: 101678
loss: 0.9975768327713013,grad_norm: 0.7657328031964767, iteration: 101679
loss: 0.9593982696533203,grad_norm: 0.931254545866438, iteration: 101680
loss: 1.0089294910430908,grad_norm: 0.9999998164446524, iteration: 101681
loss: 1.0163096189498901,grad_norm: 0.9999998273220054, iteration: 101682
loss: 1.0150182247161865,grad_norm: 0.9999991189747904, iteration: 101683
loss: 1.0124835968017578,grad_norm: 0.8364107691046199, iteration: 101684
loss: 0.9991224408149719,grad_norm: 0.8112875204871456, iteration: 101685
loss: 0.9772090911865234,grad_norm: 0.9322622109364417, iteration: 101686
loss: 1.016957402229309,grad_norm: 0.7571998587787793, iteration: 101687
loss: 1.014344334602356,grad_norm: 0.9999996146751512, iteration: 101688
loss: 1.0954699516296387,grad_norm: 0.9999998878704895, iteration: 101689
loss: 1.0354888439178467,grad_norm: 0.9999991688826093, iteration: 101690
loss: 1.0031943321228027,grad_norm: 0.9490389758277367, iteration: 101691
loss: 0.9938443303108215,grad_norm: 0.9999993653756699, iteration: 101692
loss: 1.0404956340789795,grad_norm: 0.9999991626741951, iteration: 101693
loss: 1.0281760692596436,grad_norm: 0.999999357776886, iteration: 101694
loss: 1.031449794769287,grad_norm: 0.999999284881224, iteration: 101695
loss: 0.974023699760437,grad_norm: 0.9999990831230586, iteration: 101696
loss: 1.1495096683502197,grad_norm: 0.9999995005260229, iteration: 101697
loss: 1.0071607828140259,grad_norm: 0.9855048681261133, iteration: 101698
loss: 1.0194437503814697,grad_norm: 0.9715163276807814, iteration: 101699
loss: 0.9877517223358154,grad_norm: 0.9240282690159316, iteration: 101700
loss: 0.9942724704742432,grad_norm: 0.8515146179732406, iteration: 101701
loss: 0.995463490486145,grad_norm: 0.9999990202244041, iteration: 101702
loss: 0.980937123298645,grad_norm: 0.913629665426445, iteration: 101703
loss: 0.9560847282409668,grad_norm: 0.999999015327418, iteration: 101704
loss: 1.0327348709106445,grad_norm: 0.999999125089917, iteration: 101705
loss: 1.0142306089401245,grad_norm: 0.7418583826474228, iteration: 101706
loss: 0.9921574592590332,grad_norm: 0.9999989771180698, iteration: 101707
loss: 0.9986140727996826,grad_norm: 0.9337192313323307, iteration: 101708
loss: 0.9922730326652527,grad_norm: 0.8499479175614912, iteration: 101709
loss: 1.014081358909607,grad_norm: 0.9976681472438899, iteration: 101710
loss: 1.0424754619598389,grad_norm: 0.9999996141146585, iteration: 101711
loss: 1.0182197093963623,grad_norm: 0.9999996569112103, iteration: 101712
loss: 0.9965040683746338,grad_norm: 0.9999991736278565, iteration: 101713
loss: 0.9927661418914795,grad_norm: 0.9251850414349113, iteration: 101714
loss: 1.055464744567871,grad_norm: 0.9999996279476706, iteration: 101715
loss: 1.02625572681427,grad_norm: 0.9300122525701217, iteration: 101716
loss: 0.9949479699134827,grad_norm: 0.9999995257205792, iteration: 101717
loss: 1.023008942604065,grad_norm: 0.9838454795149205, iteration: 101718
loss: 0.9896848797798157,grad_norm: 0.8650267984037853, iteration: 101719
loss: 1.3830044269561768,grad_norm: 0.999999675382712, iteration: 101720
loss: 0.956638514995575,grad_norm: 0.8974681667988972, iteration: 101721
loss: 0.9777442812919617,grad_norm: 0.9551091492496167, iteration: 101722
loss: 1.0299938917160034,grad_norm: 0.9188440168935345, iteration: 101723
loss: 1.0517828464508057,grad_norm: 0.8085308994818118, iteration: 101724
loss: 1.0151163339614868,grad_norm: 0.9577222290387685, iteration: 101725
loss: 0.9867793321609497,grad_norm: 0.9089384864917897, iteration: 101726
loss: 1.0074082612991333,grad_norm: 0.9999992247179442, iteration: 101727
loss: 1.0353353023529053,grad_norm: 0.9999989549785321, iteration: 101728
loss: 1.0427168607711792,grad_norm: 0.9999991257742201, iteration: 101729
loss: 1.0331125259399414,grad_norm: 0.999999567839283, iteration: 101730
loss: 1.0267493724822998,grad_norm: 0.9999991532833427, iteration: 101731
loss: 0.9935360550880432,grad_norm: 0.7264563561664338, iteration: 101732
loss: 1.0170924663543701,grad_norm: 0.9426026494849172, iteration: 101733
loss: 1.050494909286499,grad_norm: 0.9999994416147702, iteration: 101734
loss: 1.0233153104782104,grad_norm: 0.8112517248799064, iteration: 101735
loss: 0.9713336229324341,grad_norm: 0.9999996205062056, iteration: 101736
loss: 1.0239946842193604,grad_norm: 0.9773842948170803, iteration: 101737
loss: 1.008386254310608,grad_norm: 0.9999992360053266, iteration: 101738
loss: 0.9972615838050842,grad_norm: 0.9512128204507128, iteration: 101739
loss: 1.0020194053649902,grad_norm: 0.9999999793489499, iteration: 101740
loss: 0.9796470999717712,grad_norm: 0.9999993552527691, iteration: 101741
loss: 1.0794111490249634,grad_norm: 0.9536051929030405, iteration: 101742
loss: 1.0302032232284546,grad_norm: 0.9999991700761827, iteration: 101743
loss: 0.9726319909095764,grad_norm: 0.96618707226955, iteration: 101744
loss: 1.0057682991027832,grad_norm: 0.9871697389120722, iteration: 101745
loss: 1.0093650817871094,grad_norm: 0.9881559476098879, iteration: 101746
loss: 1.106149673461914,grad_norm: 0.9019251042682773, iteration: 101747
loss: 1.0466413497924805,grad_norm: 0.9999991148553362, iteration: 101748
loss: 0.9804748296737671,grad_norm: 0.7902834807760153, iteration: 101749
loss: 0.9865288138389587,grad_norm: 0.9999994107317439, iteration: 101750
loss: 1.044975757598877,grad_norm: 0.9431477237699812, iteration: 101751
loss: 1.0012667179107666,grad_norm: 0.8157665774933409, iteration: 101752
loss: 1.0085147619247437,grad_norm: 0.9297497121572699, iteration: 101753
loss: 1.0727012157440186,grad_norm: 0.8560567153636165, iteration: 101754
loss: 1.261916160583496,grad_norm: 0.9999993530964602, iteration: 101755
loss: 0.966942548751831,grad_norm: 0.8153318378603283, iteration: 101756
loss: 1.0197653770446777,grad_norm: 0.9999990790231481, iteration: 101757
loss: 1.01006281375885,grad_norm: 0.9999992915290633, iteration: 101758
loss: 1.0081583261489868,grad_norm: 0.748993573179407, iteration: 101759
loss: 1.1340258121490479,grad_norm: 0.9999996494993928, iteration: 101760
loss: 1.0524405241012573,grad_norm: 0.9999991109778947, iteration: 101761
loss: 1.072330355644226,grad_norm: 0.999999325321733, iteration: 101762
loss: 1.0396690368652344,grad_norm: 0.9999990850468744, iteration: 101763
loss: 1.0671947002410889,grad_norm: 0.9999995917681836, iteration: 101764
loss: 0.9959831237792969,grad_norm: 0.7493594369969064, iteration: 101765
loss: 0.9939230680465698,grad_norm: 0.9999993999193065, iteration: 101766
loss: 1.0148255825042725,grad_norm: 0.864618777711418, iteration: 101767
loss: 1.0913023948669434,grad_norm: 0.9999998169352482, iteration: 101768
loss: 0.9822506904602051,grad_norm: 0.8183359178265555, iteration: 101769
loss: 1.002543330192566,grad_norm: 0.8703055419349418, iteration: 101770
loss: 1.0188655853271484,grad_norm: 0.801263841283475, iteration: 101771
loss: 1.0107582807540894,grad_norm: 0.9999994425643766, iteration: 101772
loss: 1.077845811843872,grad_norm: 0.9999996128000279, iteration: 101773
loss: 1.0046720504760742,grad_norm: 0.999999330440997, iteration: 101774
loss: 1.1974897384643555,grad_norm: 0.9999991327140065, iteration: 101775
loss: 1.0729281902313232,grad_norm: 0.9999992007073736, iteration: 101776
loss: 1.0414081811904907,grad_norm: 0.8796013923115815, iteration: 101777
loss: 0.951117992401123,grad_norm: 0.9597022825846766, iteration: 101778
loss: 1.0507656335830688,grad_norm: 0.9999994362000643, iteration: 101779
loss: 1.0165414810180664,grad_norm: 0.9999996251647951, iteration: 101780
loss: 0.9991529583930969,grad_norm: 0.9999991831941415, iteration: 101781
loss: 1.3859821557998657,grad_norm: 0.9999998694898836, iteration: 101782
loss: 1.0871018171310425,grad_norm: 0.9999997204069274, iteration: 101783
loss: 1.1658753156661987,grad_norm: 0.9999999083251613, iteration: 101784
loss: 1.010325312614441,grad_norm: 0.8248488654214206, iteration: 101785
loss: 1.0825088024139404,grad_norm: 0.9999992831259789, iteration: 101786
loss: 1.1504954099655151,grad_norm: 0.999999214032297, iteration: 101787
loss: 1.0343818664550781,grad_norm: 0.9999990228001796, iteration: 101788
loss: 0.9941849112510681,grad_norm: 0.7690175351379702, iteration: 101789
loss: 1.172899842262268,grad_norm: 0.999999728539837, iteration: 101790
loss: 1.0272059440612793,grad_norm: 0.9999990539118959, iteration: 101791
loss: 1.0007020235061646,grad_norm: 0.9999992675522655, iteration: 101792
loss: 1.0258599519729614,grad_norm: 0.9999998744167187, iteration: 101793
loss: 1.0645431280136108,grad_norm: 0.8865482297102028, iteration: 101794
loss: 1.50212824344635,grad_norm: 0.9999998642332093, iteration: 101795
loss: 1.0412070751190186,grad_norm: 0.9999999219416691, iteration: 101796
loss: 1.0183101892471313,grad_norm: 0.9999990944420212, iteration: 101797
loss: 0.9843898415565491,grad_norm: 0.999998946190535, iteration: 101798
loss: 1.0502307415008545,grad_norm: 0.8281636365554412, iteration: 101799
loss: 1.0290255546569824,grad_norm: 0.9999991607710672, iteration: 101800
loss: 1.004345178604126,grad_norm: 0.9999992012778287, iteration: 101801
loss: 1.0541999340057373,grad_norm: 0.999999339621586, iteration: 101802
loss: 1.015169382095337,grad_norm: 0.9999989854615126, iteration: 101803
loss: 1.0936367511749268,grad_norm: 0.9999996954508562, iteration: 101804
loss: 0.9822686314582825,grad_norm: 0.9999990416734023, iteration: 101805
loss: 1.0024957656860352,grad_norm: 0.7948568931570785, iteration: 101806
loss: 0.9562799334526062,grad_norm: 0.9999990122699898, iteration: 101807
loss: 1.0229243040084839,grad_norm: 0.9999997607008053, iteration: 101808
loss: 1.005837321281433,grad_norm: 0.9999999605132799, iteration: 101809
loss: 0.9991252422332764,grad_norm: 0.9999990578584002, iteration: 101810
loss: 1.0844563245773315,grad_norm: 0.9999991507453927, iteration: 101811
loss: 0.9873878955841064,grad_norm: 0.9747716969378797, iteration: 101812
loss: 1.005115032196045,grad_norm: 0.9999991262838741, iteration: 101813
loss: 0.9853333234786987,grad_norm: 0.9999996152230033, iteration: 101814
loss: 1.0458377599716187,grad_norm: 0.8686845170606108, iteration: 101815
loss: 0.9990798830986023,grad_norm: 0.9999993386418279, iteration: 101816
loss: 1.03975248336792,grad_norm: 0.9999992426998128, iteration: 101817
loss: 0.9526810646057129,grad_norm: 0.7850483864798209, iteration: 101818
loss: 0.985313355922699,grad_norm: 0.8304546410585076, iteration: 101819
loss: 0.9775469899177551,grad_norm: 0.9999990038461121, iteration: 101820
loss: 0.9616034626960754,grad_norm: 0.9478914717587207, iteration: 101821
loss: 0.9741153120994568,grad_norm: 0.9899500343204701, iteration: 101822
loss: 0.9789202809333801,grad_norm: 0.9345845870799568, iteration: 101823
loss: 0.9757094979286194,grad_norm: 0.9949938113776828, iteration: 101824
loss: 0.994624674320221,grad_norm: 0.8369247100051563, iteration: 101825
loss: 0.975347638130188,grad_norm: 0.9239647780619789, iteration: 101826
loss: 1.0151896476745605,grad_norm: 0.9076302857558789, iteration: 101827
loss: 0.9924666285514832,grad_norm: 0.8258561176246786, iteration: 101828
loss: 1.0117651224136353,grad_norm: 0.9999991657160726, iteration: 101829
loss: 1.0420916080474854,grad_norm: 0.9729718614130991, iteration: 101830
loss: 1.0111033916473389,grad_norm: 0.9315355477010954, iteration: 101831
loss: 0.9660928249359131,grad_norm: 0.9292464149154102, iteration: 101832
loss: 1.0150234699249268,grad_norm: 0.8533226895434425, iteration: 101833
loss: 1.012865424156189,grad_norm: 0.9934344756122176, iteration: 101834
loss: 1.0988292694091797,grad_norm: 0.9999995769975432, iteration: 101835
loss: 1.020723581314087,grad_norm: 0.8882761673143441, iteration: 101836
loss: 1.018231749534607,grad_norm: 0.9999994627655124, iteration: 101837
loss: 1.0034232139587402,grad_norm: 0.9149938719696253, iteration: 101838
loss: 1.017897367477417,grad_norm: 0.9999994972267668, iteration: 101839
loss: 0.949877917766571,grad_norm: 0.999999203010852, iteration: 101840
loss: 1.002181053161621,grad_norm: 0.9999991594030812, iteration: 101841
loss: 1.0006134510040283,grad_norm: 0.8271429229411935, iteration: 101842
loss: 1.0234770774841309,grad_norm: 0.9999992074062356, iteration: 101843
loss: 1.0215977430343628,grad_norm: 0.9999991308196103, iteration: 101844
loss: 0.9892822504043579,grad_norm: 0.8351038203002096, iteration: 101845
loss: 0.9920090436935425,grad_norm: 0.8473467957792041, iteration: 101846
loss: 0.9420005083084106,grad_norm: 0.9999994499835929, iteration: 101847
loss: 0.9849181175231934,grad_norm: 0.9999996303095612, iteration: 101848
loss: 1.0030471086502075,grad_norm: 0.9344450415584564, iteration: 101849
loss: 1.006036639213562,grad_norm: 0.9999991910749437, iteration: 101850
loss: 1.014352560043335,grad_norm: 0.8254965798930878, iteration: 101851
loss: 1.0207628011703491,grad_norm: 0.9999991951969873, iteration: 101852
loss: 0.9755051136016846,grad_norm: 0.8706147486805014, iteration: 101853
loss: 0.9987012147903442,grad_norm: 0.9527650728330147, iteration: 101854
loss: 1.0475804805755615,grad_norm: 0.9999994343146227, iteration: 101855
loss: 1.0087066888809204,grad_norm: 0.9999991061862741, iteration: 101856
loss: 0.994268000125885,grad_norm: 0.8314346249129266, iteration: 101857
loss: 0.9969932436943054,grad_norm: 0.8802593897830052, iteration: 101858
loss: 1.006588339805603,grad_norm: 0.8202840079685946, iteration: 101859
loss: 1.0155898332595825,grad_norm: 0.7346935700587042, iteration: 101860
loss: 0.9920941591262817,grad_norm: 0.9847114341962632, iteration: 101861
loss: 1.0176262855529785,grad_norm: 0.9464003329490294, iteration: 101862
loss: 1.0077223777770996,grad_norm: 0.9767610072012228, iteration: 101863
loss: 0.9823943972587585,grad_norm: 0.7709883813037368, iteration: 101864
loss: 0.9946058392524719,grad_norm: 0.9246951228713568, iteration: 101865
loss: 0.9932297468185425,grad_norm: 0.9203169018334578, iteration: 101866
loss: 1.0634934902191162,grad_norm: 0.9999992383216314, iteration: 101867
loss: 1.0562549829483032,grad_norm: 0.9549975130884685, iteration: 101868
loss: 1.1093319654464722,grad_norm: 0.999999496763281, iteration: 101869
loss: 0.9961797595024109,grad_norm: 0.889324144546797, iteration: 101870
loss: 1.0140339136123657,grad_norm: 0.9999995403230452, iteration: 101871
loss: 0.9816286563873291,grad_norm: 0.8257285455949354, iteration: 101872
loss: 0.9901536107063293,grad_norm: 0.8820425647822053, iteration: 101873
loss: 0.9885406494140625,grad_norm: 0.8657624722591554, iteration: 101874
loss: 0.9979215264320374,grad_norm: 0.9119108729592527, iteration: 101875
loss: 1.1989260911941528,grad_norm: 0.9382900656633979, iteration: 101876
loss: 1.0442982912063599,grad_norm: 0.9999996180883797, iteration: 101877
loss: 0.969821572303772,grad_norm: 0.9569112399422335, iteration: 101878
loss: 1.021886944770813,grad_norm: 0.9063185685432955, iteration: 101879
loss: 1.0328924655914307,grad_norm: 0.784957885747853, iteration: 101880
loss: 1.0737333297729492,grad_norm: 0.9999995033543932, iteration: 101881
loss: 1.0345568656921387,grad_norm: 0.8335238700003066, iteration: 101882
loss: 0.976122260093689,grad_norm: 0.9385901594394306, iteration: 101883
loss: 0.9895192980766296,grad_norm: 0.9968918965055031, iteration: 101884
loss: 1.031374454498291,grad_norm: 0.999999176947975, iteration: 101885
loss: 1.0162789821624756,grad_norm: 0.999999768311605, iteration: 101886
loss: 0.9944420456886292,grad_norm: 0.9730944524483458, iteration: 101887
loss: 1.218073844909668,grad_norm: 0.9999997042670973, iteration: 101888
loss: 1.0023680925369263,grad_norm: 0.8862827812908797, iteration: 101889
loss: 0.9696475863456726,grad_norm: 0.9057443782644321, iteration: 101890
loss: 0.9668161273002625,grad_norm: 0.999999345707027, iteration: 101891
loss: 1.0043777227401733,grad_norm: 0.9383437873619966, iteration: 101892
loss: 0.9866607189178467,grad_norm: 0.9600439546382301, iteration: 101893
loss: 0.9955191016197205,grad_norm: 0.9999995549631612, iteration: 101894
loss: 1.0345648527145386,grad_norm: 0.9999992091321777, iteration: 101895
loss: 1.019442081451416,grad_norm: 0.9559809002066666, iteration: 101896
loss: 1.051137089729309,grad_norm: 0.8987775709839647, iteration: 101897
loss: 1.0062445402145386,grad_norm: 0.9999991915511303, iteration: 101898
loss: 1.016658902168274,grad_norm: 0.8578840388783485, iteration: 101899
loss: 1.0161008834838867,grad_norm: 0.9999992312303551, iteration: 101900
loss: 1.087587594985962,grad_norm: 0.9999993485860158, iteration: 101901
loss: 0.9828100204467773,grad_norm: 0.8137458459942774, iteration: 101902
loss: 0.9951816201210022,grad_norm: 0.9837570673617662, iteration: 101903
loss: 1.0033098459243774,grad_norm: 0.8628835096962716, iteration: 101904
loss: 1.0166724920272827,grad_norm: 0.8767270680602647, iteration: 101905
loss: 1.0518739223480225,grad_norm: 0.9999990521445539, iteration: 101906
loss: 1.0604500770568848,grad_norm: 0.8847543497874129, iteration: 101907
loss: 1.0039398670196533,grad_norm: 0.8867060392803113, iteration: 101908
loss: 1.0275977849960327,grad_norm: 0.9999990988829317, iteration: 101909
loss: 1.0341014862060547,grad_norm: 0.9999996443914902, iteration: 101910
loss: 1.068028450012207,grad_norm: 0.9999997584103972, iteration: 101911
loss: 1.1619340181350708,grad_norm: 0.9999996515415579, iteration: 101912
loss: 0.9731667637825012,grad_norm: 0.91124057917463, iteration: 101913
loss: 1.003659725189209,grad_norm: 0.9999997299788412, iteration: 101914
loss: 1.066759467124939,grad_norm: 0.999999543560901, iteration: 101915
loss: 0.9831953644752502,grad_norm: 0.8479897422815739, iteration: 101916
loss: 1.0075725317001343,grad_norm: 0.7887745533355898, iteration: 101917
loss: 0.9772061109542847,grad_norm: 0.8840714279518316, iteration: 101918
loss: 1.0194108486175537,grad_norm: 0.7564184665299744, iteration: 101919
loss: 0.9909453392028809,grad_norm: 0.9595158189580055, iteration: 101920
loss: 0.998755156993866,grad_norm: 0.8797402369921591, iteration: 101921
loss: 0.98785799741745,grad_norm: 0.8514437947316584, iteration: 101922
loss: 1.0629944801330566,grad_norm: 0.8228406261478067, iteration: 101923
loss: 1.0367465019226074,grad_norm: 0.9999992757702306, iteration: 101924
loss: 1.0182774066925049,grad_norm: 0.999999116110016, iteration: 101925
loss: 1.0187371969223022,grad_norm: 0.999999602265086, iteration: 101926
loss: 0.958905041217804,grad_norm: 0.8499643812242692, iteration: 101927
loss: 0.9960753321647644,grad_norm: 0.9999990719700008, iteration: 101928
loss: 1.1618261337280273,grad_norm: 0.9999991989673686, iteration: 101929
loss: 0.9937800168991089,grad_norm: 0.7667714325696087, iteration: 101930
loss: 1.0240042209625244,grad_norm: 0.8766017442175571, iteration: 101931
loss: 1.0185818672180176,grad_norm: 0.769781014579062, iteration: 101932
loss: 1.0370659828186035,grad_norm: 0.9999995849266035, iteration: 101933
loss: 1.0197786092758179,grad_norm: 0.8917262348687324, iteration: 101934
loss: 1.053410530090332,grad_norm: 0.9999992440571714, iteration: 101935
loss: 1.005510687828064,grad_norm: 0.9192508013774925, iteration: 101936
loss: 0.9549323320388794,grad_norm: 0.9606112700883933, iteration: 101937
loss: 0.9981223940849304,grad_norm: 0.9999990143134209, iteration: 101938
loss: 1.0460188388824463,grad_norm: 0.9999994365307839, iteration: 101939
loss: 1.0125633478164673,grad_norm: 0.9999990552858204, iteration: 101940
loss: 1.0082298517227173,grad_norm: 0.8162705203169932, iteration: 101941
loss: 1.0448236465454102,grad_norm: 0.9919581701124625, iteration: 101942
loss: 1.0164164304733276,grad_norm: 0.9598006360011042, iteration: 101943
loss: 0.9730531573295593,grad_norm: 0.8882917806129447, iteration: 101944
loss: 0.9841738939285278,grad_norm: 0.9146731831978356, iteration: 101945
loss: 0.993485152721405,grad_norm: 0.9208914645366253, iteration: 101946
loss: 1.0007623434066772,grad_norm: 0.970953902644408, iteration: 101947
loss: 0.9950444102287292,grad_norm: 0.9999994366415602, iteration: 101948
loss: 0.9976019859313965,grad_norm: 0.8139241926216155, iteration: 101949
loss: 0.9882537126541138,grad_norm: 0.8583723326619301, iteration: 101950
loss: 1.1780884265899658,grad_norm: 0.9999998175396263, iteration: 101951
loss: 1.020784616470337,grad_norm: 0.9196331014967392, iteration: 101952
loss: 1.0125994682312012,grad_norm: 0.7965636152311864, iteration: 101953
loss: 1.039428472518921,grad_norm: 0.9999990918645867, iteration: 101954
loss: 0.9777067303657532,grad_norm: 0.8301105723386881, iteration: 101955
loss: 0.9785263538360596,grad_norm: 0.9940946797292836, iteration: 101956
loss: 0.9937784075737,grad_norm: 0.9155378098488005, iteration: 101957
loss: 1.0374338626861572,grad_norm: 0.9999996521741438, iteration: 101958
loss: 0.969528317451477,grad_norm: 0.8973716321230477, iteration: 101959
loss: 0.9901317954063416,grad_norm: 0.9481915938661523, iteration: 101960
loss: 1.0367412567138672,grad_norm: 0.8543258897723431, iteration: 101961
loss: 1.0642188787460327,grad_norm: 0.999999583519851, iteration: 101962
loss: 1.0206106901168823,grad_norm: 0.9999991167808109, iteration: 101963
loss: 1.0715121030807495,grad_norm: 0.999999894279449, iteration: 101964
loss: 0.9976215362548828,grad_norm: 0.9999995087727332, iteration: 101965
loss: 1.004066824913025,grad_norm: 0.8993878277540218, iteration: 101966
loss: 1.0169384479522705,grad_norm: 0.92729281070616, iteration: 101967
loss: 1.04802668094635,grad_norm: 0.9999991378943678, iteration: 101968
loss: 1.003040075302124,grad_norm: 0.9166209583064139, iteration: 101969
loss: 1.029225468635559,grad_norm: 0.9999991454391763, iteration: 101970
loss: 0.9978064894676208,grad_norm: 0.853376195258093, iteration: 101971
loss: 1.0172208547592163,grad_norm: 0.8360685475705962, iteration: 101972
loss: 1.276703119277954,grad_norm: 0.9999996405771256, iteration: 101973
loss: 0.9952973127365112,grad_norm: 0.8831849702386914, iteration: 101974
loss: 1.0372732877731323,grad_norm: 0.9448109973898877, iteration: 101975
loss: 0.9899096488952637,grad_norm: 0.8552398845352949, iteration: 101976
loss: 0.999164879322052,grad_norm: 0.972882949611273, iteration: 101977
loss: 1.035426378250122,grad_norm: 0.9999997996141586, iteration: 101978
loss: 0.9912015795707703,grad_norm: 0.9258652539784753, iteration: 101979
loss: 1.0098499059677124,grad_norm: 0.9999992423437352, iteration: 101980
loss: 0.9564038515090942,grad_norm: 0.8230584456133503, iteration: 101981
loss: 1.0176159143447876,grad_norm: 0.7868077919343336, iteration: 101982
loss: 0.9848639369010925,grad_norm: 0.9238683930734166, iteration: 101983
loss: 1.0471988916397095,grad_norm: 0.8179685439134595, iteration: 101984
loss: 1.0316264629364014,grad_norm: 0.8072525789781061, iteration: 101985
loss: 1.0448100566864014,grad_norm: 0.9999996556712705, iteration: 101986
loss: 0.9799057245254517,grad_norm: 0.9521916203771068, iteration: 101987
loss: 1.019648551940918,grad_norm: 0.9999994678675036, iteration: 101988
loss: 1.0482418537139893,grad_norm: 0.8020366430422966, iteration: 101989
loss: 0.9909295439720154,grad_norm: 0.8041735658599922, iteration: 101990
loss: 1.0047510862350464,grad_norm: 0.789906111329022, iteration: 101991
loss: 1.0135846138000488,grad_norm: 0.8237038894421708, iteration: 101992
loss: 1.011281967163086,grad_norm: 0.7793125984986119, iteration: 101993
loss: 0.9837038516998291,grad_norm: 0.9999994999103786, iteration: 101994
loss: 1.059996247291565,grad_norm: 0.9999999536194806, iteration: 101995
loss: 1.0388902425765991,grad_norm: 0.9352805897432829, iteration: 101996
loss: 0.9956631064414978,grad_norm: 0.941320982487403, iteration: 101997
loss: 1.0302393436431885,grad_norm: 0.948117130306475, iteration: 101998
loss: 0.9846283793449402,grad_norm: 0.9999990435925424, iteration: 101999
loss: 1.0175763368606567,grad_norm: 0.9999997091398668, iteration: 102000
loss: 0.9758235812187195,grad_norm: 0.8637028796521794, iteration: 102001
loss: 0.9810997247695923,grad_norm: 0.9999993421556939, iteration: 102002
loss: 1.0007777214050293,grad_norm: 0.8712837505554303, iteration: 102003
loss: 1.0340970754623413,grad_norm: 0.8746911897890789, iteration: 102004
loss: 1.004534363746643,grad_norm: 0.9338140414382032, iteration: 102005
loss: 1.0009076595306396,grad_norm: 0.8364348353522425, iteration: 102006
loss: 0.9985466599464417,grad_norm: 0.8948102390956861, iteration: 102007
loss: 0.9954898953437805,grad_norm: 0.9999991749014431, iteration: 102008
loss: 0.961402177810669,grad_norm: 0.9170187122196072, iteration: 102009
loss: 1.0089861154556274,grad_norm: 0.8471677301470556, iteration: 102010
loss: 1.0273854732513428,grad_norm: 0.9154999553882314, iteration: 102011
loss: 1.013231635093689,grad_norm: 0.7809718252142457, iteration: 102012
loss: 0.9714848399162292,grad_norm: 0.9999993700489833, iteration: 102013
loss: 1.025433897972107,grad_norm: 0.9999993414424719, iteration: 102014
loss: 0.988680899143219,grad_norm: 0.8908554672726623, iteration: 102015
loss: 1.0356954336166382,grad_norm: 0.9999996422003427, iteration: 102016
loss: 0.9824424386024475,grad_norm: 0.920057899359272, iteration: 102017
loss: 1.0585919618606567,grad_norm: 0.9999996809747912, iteration: 102018
loss: 0.9952182769775391,grad_norm: 0.9999990331848906, iteration: 102019
loss: 0.9951732754707336,grad_norm: 0.8833468920930209, iteration: 102020
loss: 1.135623574256897,grad_norm: 0.8025399749590048, iteration: 102021
loss: 0.9980571866035461,grad_norm: 0.9999990320795878, iteration: 102022
loss: 0.9904248118400574,grad_norm: 0.8156225371549998, iteration: 102023
loss: 1.0091373920440674,grad_norm: 0.9590751471842329, iteration: 102024
loss: 1.014391541481018,grad_norm: 0.8657357623323696, iteration: 102025
loss: 0.9752082824707031,grad_norm: 0.7871363398122585, iteration: 102026
loss: 0.9808477759361267,grad_norm: 0.9999991266021913, iteration: 102027
loss: 1.0687811374664307,grad_norm: 0.9999992626788503, iteration: 102028
loss: 0.9938342571258545,grad_norm: 0.9782466014593519, iteration: 102029
loss: 0.9987667202949524,grad_norm: 0.8956750438517687, iteration: 102030
loss: 1.0238144397735596,grad_norm: 0.9363565632499589, iteration: 102031
loss: 1.0299421548843384,grad_norm: 0.9999997834575208, iteration: 102032
loss: 0.9700980186462402,grad_norm: 0.9999990517646243, iteration: 102033
loss: 0.9809637069702148,grad_norm: 0.9330842317451283, iteration: 102034
loss: 1.0045580863952637,grad_norm: 0.9999997669790891, iteration: 102035
loss: 0.9992600083351135,grad_norm: 0.8098521497383566, iteration: 102036
loss: 1.0119454860687256,grad_norm: 0.7635890101023549, iteration: 102037
loss: 1.0074666738510132,grad_norm: 0.9999991290619751, iteration: 102038
loss: 1.0167711973190308,grad_norm: 0.9966776734629518, iteration: 102039
loss: 0.9978551268577576,grad_norm: 0.9049881704816399, iteration: 102040
loss: 1.0472238063812256,grad_norm: 0.7927397963647522, iteration: 102041
loss: 1.012603759765625,grad_norm: 0.9641109796957315, iteration: 102042
loss: 0.9596616625785828,grad_norm: 0.9999990359319022, iteration: 102043
loss: 1.0005567073822021,grad_norm: 0.8770275351348654, iteration: 102044
loss: 1.0001978874206543,grad_norm: 0.8604825692732536, iteration: 102045
loss: 1.0667428970336914,grad_norm: 0.9056671092873586, iteration: 102046
loss: 0.9941012263298035,grad_norm: 0.9999994045666386, iteration: 102047
loss: 1.0099316835403442,grad_norm: 0.8144501465379307, iteration: 102048
loss: 1.0344527959823608,grad_norm: 0.999999377511419, iteration: 102049
loss: 0.9911885261535645,grad_norm: 0.8732083696848177, iteration: 102050
loss: 0.9743213057518005,grad_norm: 0.7091199045784252, iteration: 102051
loss: 1.412502646446228,grad_norm: 0.9999991673145184, iteration: 102052
loss: 0.9993482828140259,grad_norm: 0.9999993589580932, iteration: 102053
loss: 1.0038262605667114,grad_norm: 0.8353138807703788, iteration: 102054
loss: 1.0261024236679077,grad_norm: 0.9358455250783081, iteration: 102055
loss: 0.9879549145698547,grad_norm: 0.9777521965233248, iteration: 102056
loss: 0.989183247089386,grad_norm: 0.9878331934607698, iteration: 102057
loss: 1.0221000909805298,grad_norm: 0.8703527660380611, iteration: 102058
loss: 0.9938891530036926,grad_norm: 0.9999990504768455, iteration: 102059
loss: 0.962107241153717,grad_norm: 0.9520161523305031, iteration: 102060
loss: 0.9966356158256531,grad_norm: 0.905821093945048, iteration: 102061
loss: 1.0311641693115234,grad_norm: 0.999999278373912, iteration: 102062
loss: 0.9907448291778564,grad_norm: 0.9999993604258355, iteration: 102063
loss: 1.0216314792633057,grad_norm: 0.8206504490059445, iteration: 102064
loss: 1.0300958156585693,grad_norm: 0.9134393314903382, iteration: 102065
loss: 0.9877365827560425,grad_norm: 0.9999990520090585, iteration: 102066
loss: 1.0042598247528076,grad_norm: 0.8398297859948773, iteration: 102067
loss: 0.984180212020874,grad_norm: 0.9999991216254439, iteration: 102068
loss: 1.0689729452133179,grad_norm: 0.9999995348589639, iteration: 102069
loss: 0.986403226852417,grad_norm: 0.9710317459603678, iteration: 102070
loss: 1.0125981569290161,grad_norm: 0.9777013316955918, iteration: 102071
loss: 1.0661441087722778,grad_norm: 0.9999995444605807, iteration: 102072
loss: 0.9752708077430725,grad_norm: 0.8978319111305543, iteration: 102073
loss: 0.9954227805137634,grad_norm: 0.9010131163008042, iteration: 102074
loss: 0.9620682597160339,grad_norm: 0.8582118261053084, iteration: 102075
loss: 1.0051217079162598,grad_norm: 0.9492490001637826, iteration: 102076
loss: 1.0189284086227417,grad_norm: 0.8208663166764879, iteration: 102077
loss: 0.9644901752471924,grad_norm: 0.9999991418599022, iteration: 102078
loss: 1.0015571117401123,grad_norm: 0.9181554613853183, iteration: 102079
loss: 1.0335726737976074,grad_norm: 0.987680465096901, iteration: 102080
loss: 1.0884840488433838,grad_norm: 0.9999994558546889, iteration: 102081
loss: 1.00973641872406,grad_norm: 0.8812466800225569, iteration: 102082
loss: 1.008804440498352,grad_norm: 0.9165907774628577, iteration: 102083
loss: 1.051056146621704,grad_norm: 0.9999996503118447, iteration: 102084
loss: 0.9841063618659973,grad_norm: 0.9999991558943416, iteration: 102085
loss: 0.9495010375976562,grad_norm: 0.8780037196319652, iteration: 102086
loss: 1.0523951053619385,grad_norm: 0.99999977414128, iteration: 102087
loss: 0.9619520306587219,grad_norm: 0.85162099528366, iteration: 102088
loss: 1.0129460096359253,grad_norm: 0.9213129648356229, iteration: 102089
loss: 0.9960823655128479,grad_norm: 0.9307774740016863, iteration: 102090
loss: 1.013976812362671,grad_norm: 0.9999991969420952, iteration: 102091
loss: 1.0314689874649048,grad_norm: 0.9548309259115093, iteration: 102092
loss: 1.0343948602676392,grad_norm: 0.8614576992590562, iteration: 102093
loss: 1.1009076833724976,grad_norm: 0.9999997520305138, iteration: 102094
loss: 1.0044276714324951,grad_norm: 0.9999991943683566, iteration: 102095
loss: 0.9785490036010742,grad_norm: 0.9999997397698761, iteration: 102096
loss: 1.0550205707550049,grad_norm: 0.9999997674957832, iteration: 102097
loss: 1.0257067680358887,grad_norm: 0.999999409924532, iteration: 102098
loss: 1.0290762186050415,grad_norm: 0.9999994423968118, iteration: 102099
loss: 1.0427343845367432,grad_norm: 0.8623923460163504, iteration: 102100
loss: 1.0070325136184692,grad_norm: 0.8606474672498627, iteration: 102101
loss: 1.038271188735962,grad_norm: 0.99999912200163, iteration: 102102
loss: 0.9944366216659546,grad_norm: 0.9580899502613481, iteration: 102103
loss: 1.0343879461288452,grad_norm: 0.8959587976668617, iteration: 102104
loss: 1.0553984642028809,grad_norm: 0.9233738046235345, iteration: 102105
loss: 1.0270755290985107,grad_norm: 0.9295134869198262, iteration: 102106
loss: 0.9916671514511108,grad_norm: 0.8276358518234912, iteration: 102107
loss: 1.0109783411026,grad_norm: 0.9999991654638117, iteration: 102108
loss: 0.9938151240348816,grad_norm: 0.9407162307226012, iteration: 102109
loss: 1.0296216011047363,grad_norm: 0.9999991893561953, iteration: 102110
loss: 0.9932827949523926,grad_norm: 0.99999933282739, iteration: 102111
loss: 0.973517656326294,grad_norm: 0.9709557343300526, iteration: 102112
loss: 1.1128841638565063,grad_norm: 0.9089371250904507, iteration: 102113
loss: 0.9923943877220154,grad_norm: 0.9999991241353063, iteration: 102114
loss: 0.9978328347206116,grad_norm: 0.9355074698233417, iteration: 102115
loss: 0.9924556612968445,grad_norm: 0.9461327313411693, iteration: 102116
loss: 0.959309995174408,grad_norm: 0.9999990380890945, iteration: 102117
loss: 0.9921236634254456,grad_norm: 0.9300212558012807, iteration: 102118
loss: 0.9992936849594116,grad_norm: 0.9999997042419281, iteration: 102119
loss: 1.028482437133789,grad_norm: 0.9999992324017593, iteration: 102120
loss: 0.9802932739257812,grad_norm: 0.9500215297150283, iteration: 102121
loss: 1.0120960474014282,grad_norm: 0.9999990161221457, iteration: 102122
loss: 0.9944403767585754,grad_norm: 0.8208620409389618, iteration: 102123
loss: 1.0007752180099487,grad_norm: 0.9999991531140612, iteration: 102124
loss: 0.9611863493919373,grad_norm: 0.8735396095835861, iteration: 102125
loss: 0.9878652691841125,grad_norm: 0.9999996248386306, iteration: 102126
loss: 0.9985713958740234,grad_norm: 0.9999991172370162, iteration: 102127
loss: 0.989963173866272,grad_norm: 0.9801285313104977, iteration: 102128
loss: 1.0033787488937378,grad_norm: 0.7986378614647744, iteration: 102129
loss: 0.9858450889587402,grad_norm: 0.9515797569761211, iteration: 102130
loss: 1.0388567447662354,grad_norm: 0.9999999802254281, iteration: 102131
loss: 0.9989123940467834,grad_norm: 0.9666965762801975, iteration: 102132
loss: 0.9823799729347229,grad_norm: 0.8125852935081482, iteration: 102133
loss: 0.9873375296592712,grad_norm: 0.9999994221457188, iteration: 102134
loss: 1.0323811769485474,grad_norm: 0.9999991703687544, iteration: 102135
loss: 1.0057380199432373,grad_norm: 0.9175899014337912, iteration: 102136
loss: 1.0077252388000488,grad_norm: 0.8774621131952405, iteration: 102137
loss: 1.053276538848877,grad_norm: 0.9999993872790036, iteration: 102138
loss: 1.0469987392425537,grad_norm: 0.9999995232256248, iteration: 102139
loss: 1.0004380941390991,grad_norm: 0.9855215671417037, iteration: 102140
loss: 0.9963091611862183,grad_norm: 0.9029066904864085, iteration: 102141
loss: 1.0308054685592651,grad_norm: 0.9999991326140368, iteration: 102142
loss: 1.0243788957595825,grad_norm: 0.999999849558824, iteration: 102143
loss: 0.9967125058174133,grad_norm: 0.9999993305123819, iteration: 102144
loss: 0.9829329252243042,grad_norm: 0.9999992070403589, iteration: 102145
loss: 0.9872394800186157,grad_norm: 0.9524940894905983, iteration: 102146
loss: 1.1034660339355469,grad_norm: 0.9999996516389821, iteration: 102147
loss: 0.9903174638748169,grad_norm: 0.9999990284404496, iteration: 102148
loss: 1.002212405204773,grad_norm: 0.8584146120525598, iteration: 102149
loss: 0.9979633092880249,grad_norm: 0.9999990092040453, iteration: 102150
loss: 1.05126953125,grad_norm: 0.9999997416876643, iteration: 102151
loss: 1.0304186344146729,grad_norm: 0.9118324006184816, iteration: 102152
loss: 0.9869340062141418,grad_norm: 0.9999992170637942, iteration: 102153
loss: 1.0266642570495605,grad_norm: 0.9999991140767291, iteration: 102154
loss: 1.0302703380584717,grad_norm: 0.9478825623124144, iteration: 102155
loss: 1.0117686986923218,grad_norm: 0.8101942462825761, iteration: 102156
loss: 1.0073485374450684,grad_norm: 0.9999995065099573, iteration: 102157
loss: 0.9995847940444946,grad_norm: 0.8890243900846206, iteration: 102158
loss: 1.0068198442459106,grad_norm: 0.9633020145410998, iteration: 102159
loss: 0.9975424408912659,grad_norm: 0.9999990368155577, iteration: 102160
loss: 1.0010677576065063,grad_norm: 0.7917215686437126, iteration: 102161
loss: 1.0010592937469482,grad_norm: 0.967983511900032, iteration: 102162
loss: 1.0223201513290405,grad_norm: 0.9999996046074525, iteration: 102163
loss: 0.9861782193183899,grad_norm: 0.9246997479307955, iteration: 102164
loss: 1.0274653434753418,grad_norm: 0.9133907917464797, iteration: 102165
loss: 1.0087231397628784,grad_norm: 0.9999989984057908, iteration: 102166
loss: 1.0149354934692383,grad_norm: 0.9045387349484014, iteration: 102167
loss: 1.0206533670425415,grad_norm: 0.9999994163634235, iteration: 102168
loss: 1.027151107788086,grad_norm: 0.8581335813585329, iteration: 102169
loss: 0.9651314616203308,grad_norm: 0.9946103710132439, iteration: 102170
loss: 1.0044561624526978,grad_norm: 0.9999994100871841, iteration: 102171
loss: 0.9906667470932007,grad_norm: 0.8850632456301943, iteration: 102172
loss: 0.9942122101783752,grad_norm: 0.8491152654687516, iteration: 102173
loss: 1.0231823921203613,grad_norm: 0.8367450548102867, iteration: 102174
loss: 1.0285935401916504,grad_norm: 0.9999991013676097, iteration: 102175
loss: 1.0404826402664185,grad_norm: 0.9449975762876187, iteration: 102176
loss: 1.0317015647888184,grad_norm: 0.9999994331127715, iteration: 102177
loss: 1.0048041343688965,grad_norm: 0.9999998684362796, iteration: 102178
loss: 1.0003743171691895,grad_norm: 0.9999991563998852, iteration: 102179
loss: 0.9940226674079895,grad_norm: 0.8032766390876492, iteration: 102180
loss: 1.0186384916305542,grad_norm: 0.9683934448786424, iteration: 102181
loss: 1.0187139511108398,grad_norm: 0.9999994898335668, iteration: 102182
loss: 1.0024179220199585,grad_norm: 0.86739780697579, iteration: 102183
loss: 1.0089471340179443,grad_norm: 0.9999991092912403, iteration: 102184
loss: 1.0235474109649658,grad_norm: 0.9902305877789078, iteration: 102185
loss: 0.9913820624351501,grad_norm: 0.9999990167330475, iteration: 102186
loss: 0.9999435544013977,grad_norm: 0.8986693194907184, iteration: 102187
loss: 1.0111744403839111,grad_norm: 0.8672371100330803, iteration: 102188
loss: 1.0152671337127686,grad_norm: 0.9252805133598285, iteration: 102189
loss: 1.0929045677185059,grad_norm: 0.9999991676941271, iteration: 102190
loss: 0.9911853671073914,grad_norm: 0.8846604466245804, iteration: 102191
loss: 1.0197314023971558,grad_norm: 0.9999991423549761, iteration: 102192
loss: 1.0378410816192627,grad_norm: 0.9962911323884897, iteration: 102193
loss: 1.0068328380584717,grad_norm: 0.9544238771154919, iteration: 102194
loss: 0.9949690103530884,grad_norm: 0.918582483519371, iteration: 102195
loss: 0.9851356148719788,grad_norm: 0.9532136613361388, iteration: 102196
loss: 1.0621168613433838,grad_norm: 0.9999992769629604, iteration: 102197
loss: 1.0156971216201782,grad_norm: 0.8083394237050903, iteration: 102198
loss: 0.9772639870643616,grad_norm: 0.9999992765734429, iteration: 102199
loss: 0.9843997359275818,grad_norm: 0.9362652020811539, iteration: 102200
loss: 1.0268659591674805,grad_norm: 0.9999991838002287, iteration: 102201
loss: 1.0239500999450684,grad_norm: 0.9999992884803708, iteration: 102202
loss: 1.0528597831726074,grad_norm: 0.999999112469142, iteration: 102203
loss: 0.9850766062736511,grad_norm: 0.9907653666314755, iteration: 102204
loss: 0.9919359087944031,grad_norm: 0.8006772979028535, iteration: 102205
loss: 1.0136394500732422,grad_norm: 0.8203445849336636, iteration: 102206
loss: 1.0153738260269165,grad_norm: 0.8293455582887433, iteration: 102207
loss: 1.019887924194336,grad_norm: 0.999999326218788, iteration: 102208
loss: 1.0220614671707153,grad_norm: 0.9999990115937568, iteration: 102209
loss: 0.9847198724746704,grad_norm: 0.891967248112703, iteration: 102210
loss: 1.0104018449783325,grad_norm: 0.8351291711068657, iteration: 102211
loss: 0.9632244110107422,grad_norm: 0.8809948996358715, iteration: 102212
loss: 1.0715621709823608,grad_norm: 0.9999992510996063, iteration: 102213
loss: 0.9914084672927856,grad_norm: 0.8199994207723202, iteration: 102214
loss: 0.988158643245697,grad_norm: 0.99999903352413, iteration: 102215
loss: 0.9978846311569214,grad_norm: 0.8709646886530482, iteration: 102216
loss: 0.9955722689628601,grad_norm: 0.8578452638205967, iteration: 102217
loss: 1.0170315504074097,grad_norm: 0.8737095070139753, iteration: 102218
loss: 1.0039162635803223,grad_norm: 0.9047310011298775, iteration: 102219
loss: 1.0003494024276733,grad_norm: 0.8868375500212998, iteration: 102220
loss: 0.9956839680671692,grad_norm: 0.9002414534551816, iteration: 102221
loss: 0.9723441004753113,grad_norm: 0.999999779719611, iteration: 102222
loss: 1.020903468132019,grad_norm: 0.884539730794116, iteration: 102223
loss: 0.9845335483551025,grad_norm: 0.8456442472973114, iteration: 102224
loss: 1.0185301303863525,grad_norm: 0.913088502119925, iteration: 102225
loss: 1.0079630613327026,grad_norm: 0.9999991454051247, iteration: 102226
loss: 1.024712085723877,grad_norm: 0.9999994418516303, iteration: 102227
loss: 1.065463662147522,grad_norm: 0.9999997704087032, iteration: 102228
loss: 1.0066925287246704,grad_norm: 0.9999989795127802, iteration: 102229
loss: 1.0203406810760498,grad_norm: 0.8983725035339731, iteration: 102230
loss: 1.01514732837677,grad_norm: 0.8346072257706444, iteration: 102231
loss: 1.120200753211975,grad_norm: 0.9999999137764726, iteration: 102232
loss: 0.9739891886711121,grad_norm: 0.9720704749564066, iteration: 102233
loss: 0.9800617694854736,grad_norm: 0.9999992447432263, iteration: 102234
loss: 1.0013933181762695,grad_norm: 0.8996110022849382, iteration: 102235
loss: 1.0065025091171265,grad_norm: 0.8719188991227913, iteration: 102236
loss: 1.047467827796936,grad_norm: 0.9999992365040683, iteration: 102237
loss: 0.9864218831062317,grad_norm: 0.912324205065791, iteration: 102238
loss: 0.9884467124938965,grad_norm: 0.8523033162497451, iteration: 102239
loss: 1.0035014152526855,grad_norm: 0.9999990676429039, iteration: 102240
loss: 0.986149787902832,grad_norm: 0.7248654034319715, iteration: 102241
loss: 1.0280075073242188,grad_norm: 0.7215528708191705, iteration: 102242
loss: 1.0077755451202393,grad_norm: 0.999998983781922, iteration: 102243
loss: 1.0142443180084229,grad_norm: 0.9666348950893876, iteration: 102244
loss: 1.0210826396942139,grad_norm: 0.9999991539702076, iteration: 102245
loss: 1.2171592712402344,grad_norm: 0.9999994597624751, iteration: 102246
loss: 0.987852156162262,grad_norm: 0.9309696418347547, iteration: 102247
loss: 0.9966465830802917,grad_norm: 0.9121583789881023, iteration: 102248
loss: 1.003035306930542,grad_norm: 0.999999141664927, iteration: 102249
loss: 0.9959272742271423,grad_norm: 0.8536199492270506, iteration: 102250
loss: 1.061761498451233,grad_norm: 0.9999997475178644, iteration: 102251
loss: 1.0571259260177612,grad_norm: 0.9999991274540584, iteration: 102252
loss: 1.173677921295166,grad_norm: 0.9999992329787633, iteration: 102253
loss: 1.0209118127822876,grad_norm: 0.9331125159197279, iteration: 102254
loss: 1.015137791633606,grad_norm: 0.8718909169000909, iteration: 102255
loss: 1.089540958404541,grad_norm: 0.9999997578154476, iteration: 102256
loss: 0.9883984923362732,grad_norm: 0.8717689588856478, iteration: 102257
loss: 1.0218334197998047,grad_norm: 0.9999991602432508, iteration: 102258
loss: 1.0632319450378418,grad_norm: 0.9999998148636413, iteration: 102259
loss: 1.0064865350723267,grad_norm: 0.6927419014242405, iteration: 102260
loss: 1.0380011796951294,grad_norm: 0.8803893426291727, iteration: 102261
loss: 0.9912681579589844,grad_norm: 0.9999990996853083, iteration: 102262
loss: 1.115633249282837,grad_norm: 0.9999999043706426, iteration: 102263
loss: 1.0049158334732056,grad_norm: 0.9999993404984333, iteration: 102264
loss: 1.0732533931732178,grad_norm: 0.9999998503748425, iteration: 102265
loss: 1.0248452425003052,grad_norm: 0.9999992309004896, iteration: 102266
loss: 0.9948513507843018,grad_norm: 0.9999991496595637, iteration: 102267
loss: 1.0205953121185303,grad_norm: 0.9999992055066564, iteration: 102268
loss: 1.1053999662399292,grad_norm: 0.9999994856065116, iteration: 102269
loss: 1.01667320728302,grad_norm: 0.7705156870802079, iteration: 102270
loss: 0.9934305548667908,grad_norm: 0.9999999968175262, iteration: 102271
loss: 1.0062612295150757,grad_norm: 0.9999993616696442, iteration: 102272
loss: 1.0371513366699219,grad_norm: 0.9999999466193884, iteration: 102273
loss: 1.0076709985733032,grad_norm: 0.9999993691182679, iteration: 102274
loss: 1.0039094686508179,grad_norm: 0.9960820303794872, iteration: 102275
loss: 1.007027268409729,grad_norm: 0.8426209688401767, iteration: 102276
loss: 0.9805546402931213,grad_norm: 0.9999991467748797, iteration: 102277
loss: 1.0210380554199219,grad_norm: 0.9898501510336456, iteration: 102278
loss: 1.007374882698059,grad_norm: 0.9999989947734974, iteration: 102279
loss: 1.1503233909606934,grad_norm: 0.9999998307511693, iteration: 102280
loss: 1.0311195850372314,grad_norm: 0.8422797419655016, iteration: 102281
loss: 0.999474287033081,grad_norm: 0.7992563259112587, iteration: 102282
loss: 0.9923845529556274,grad_norm: 0.7680575767654308, iteration: 102283
loss: 1.0011074542999268,grad_norm: 0.8239376742626737, iteration: 102284
loss: 1.0046491622924805,grad_norm: 0.8567131944685059, iteration: 102285
loss: 0.9986855387687683,grad_norm: 0.9999991638557526, iteration: 102286
loss: 1.0366460084915161,grad_norm: 0.9999997460989914, iteration: 102287
loss: 1.012774109840393,grad_norm: 0.8690103696992795, iteration: 102288
loss: 1.026755452156067,grad_norm: 0.8923968648016214, iteration: 102289
loss: 1.0413438081741333,grad_norm: 0.9999992213767462, iteration: 102290
loss: 1.0046892166137695,grad_norm: 0.9702329615762733, iteration: 102291
loss: 1.0151149034500122,grad_norm: 0.9999990122067796, iteration: 102292
loss: 0.9859434366226196,grad_norm: 0.7732970257672993, iteration: 102293
loss: 1.0639324188232422,grad_norm: 0.9999996656507406, iteration: 102294
loss: 1.014296293258667,grad_norm: 0.8353839954986877, iteration: 102295
loss: 0.969672441482544,grad_norm: 0.9999990438109219, iteration: 102296
loss: 0.9918178915977478,grad_norm: 0.7678482110374848, iteration: 102297
loss: 0.9758732318878174,grad_norm: 0.803556618161008, iteration: 102298
loss: 1.021518588066101,grad_norm: 0.9999990644643656, iteration: 102299
loss: 0.9990969300270081,grad_norm: 0.9999990944915222, iteration: 102300
loss: 1.025700569152832,grad_norm: 0.9999995084832033, iteration: 102301
loss: 0.9868422746658325,grad_norm: 0.769313803970813, iteration: 102302
loss: 0.9816668629646301,grad_norm: 0.811495419560678, iteration: 102303
loss: 1.0430899858474731,grad_norm: 0.9999993250171345, iteration: 102304
loss: 0.9786069989204407,grad_norm: 0.9999991575850776, iteration: 102305
loss: 0.9990214705467224,grad_norm: 0.9906790412743218, iteration: 102306
loss: 1.086275577545166,grad_norm: 0.9999991321216231, iteration: 102307
loss: 1.0159912109375,grad_norm: 0.7845229413819851, iteration: 102308
loss: 0.9626736044883728,grad_norm: 0.8454044872273337, iteration: 102309
loss: 0.9988530874252319,grad_norm: 0.9999991399727934, iteration: 102310
loss: 1.0131155252456665,grad_norm: 0.9999996369870395, iteration: 102311
loss: 0.992003321647644,grad_norm: 0.761377399609587, iteration: 102312
loss: 1.01885187625885,grad_norm: 0.9999995447827705, iteration: 102313
loss: 1.020102620124817,grad_norm: 0.9999999897919888, iteration: 102314
loss: 0.9972896575927734,grad_norm: 0.7730370595745467, iteration: 102315
loss: 1.1206388473510742,grad_norm: 0.9999993884379622, iteration: 102316
loss: 1.0041438341140747,grad_norm: 0.9670672390336572, iteration: 102317
loss: 0.9665165543556213,grad_norm: 0.9558929169569771, iteration: 102318
loss: 0.9921029806137085,grad_norm: 0.9999993563309844, iteration: 102319
loss: 0.9440616965293884,grad_norm: 0.9584256606782781, iteration: 102320
loss: 1.0210236310958862,grad_norm: 0.9999995885065395, iteration: 102321
loss: 0.988814115524292,grad_norm: 0.9999992654835249, iteration: 102322
loss: 0.9839163422584534,grad_norm: 0.9769698503308405, iteration: 102323
loss: 0.979823887348175,grad_norm: 0.9276513815867604, iteration: 102324
loss: 0.9970934987068176,grad_norm: 0.999999252772974, iteration: 102325
loss: 0.9995765089988708,grad_norm: 0.9999990498571031, iteration: 102326
loss: 0.9872699975967407,grad_norm: 0.9999995588982606, iteration: 102327
loss: 1.21580171585083,grad_norm: 0.9999995428382586, iteration: 102328
loss: 1.01567542552948,grad_norm: 0.854494574218585, iteration: 102329
loss: 1.0096720457077026,grad_norm: 0.6963730898200614, iteration: 102330
loss: 1.125265121459961,grad_norm: 0.9999993832407855, iteration: 102331
loss: 1.0044174194335938,grad_norm: 0.9999990854150893, iteration: 102332
loss: 1.0146631002426147,grad_norm: 0.8955939155933035, iteration: 102333
loss: 1.0543831586837769,grad_norm: 0.9999998146742511, iteration: 102334
loss: 1.0349674224853516,grad_norm: 0.8665860277034403, iteration: 102335
loss: 1.0618624687194824,grad_norm: 0.9999992997773406, iteration: 102336
loss: 1.0106390714645386,grad_norm: 0.8508621148983627, iteration: 102337
loss: 0.9805761575698853,grad_norm: 0.9366052192616414, iteration: 102338
loss: 1.0058327913284302,grad_norm: 0.999999100258011, iteration: 102339
loss: 1.0813053846359253,grad_norm: 0.9999997523791951, iteration: 102340
loss: 0.978978157043457,grad_norm: 0.9999995870437212, iteration: 102341
loss: 0.9913565516471863,grad_norm: 0.934446198330708, iteration: 102342
loss: 0.9899572134017944,grad_norm: 0.9563138204192476, iteration: 102343
loss: 1.0179554224014282,grad_norm: 0.9999991043583357, iteration: 102344
loss: 1.0291426181793213,grad_norm: 0.9216463452863718, iteration: 102345
loss: 1.0105928182601929,grad_norm: 0.8546048903540192, iteration: 102346
loss: 1.0480400323867798,grad_norm: 0.848581348574789, iteration: 102347
loss: 0.9747628569602966,grad_norm: 0.9755988891096736, iteration: 102348
loss: 0.9325231313705444,grad_norm: 0.8563005074088755, iteration: 102349
loss: 0.9830306768417358,grad_norm: 0.9023842595283555, iteration: 102350
loss: 1.0070565938949585,grad_norm: 0.8617125812759997, iteration: 102351
loss: 1.0129902362823486,grad_norm: 0.969936817668346, iteration: 102352
loss: 1.045731544494629,grad_norm: 0.9999991966110463, iteration: 102353
loss: 1.050277829170227,grad_norm: 0.999999449348607, iteration: 102354
loss: 1.0402082204818726,grad_norm: 0.9727870492056863, iteration: 102355
loss: 0.9984762668609619,grad_norm: 0.8744631203648294, iteration: 102356
loss: 1.0051684379577637,grad_norm: 0.9319437326336714, iteration: 102357
loss: 1.052122950553894,grad_norm: 0.9654582588468947, iteration: 102358
loss: 1.00909423828125,grad_norm: 0.8356897121976299, iteration: 102359
loss: 1.095409870147705,grad_norm: 0.9999991973386949, iteration: 102360
loss: 0.9873849153518677,grad_norm: 0.8826946777924901, iteration: 102361
loss: 0.9934474229812622,grad_norm: 0.853626961597073, iteration: 102362
loss: 0.977668285369873,grad_norm: 0.7931406991376253, iteration: 102363
loss: 1.110971212387085,grad_norm: 0.9999993061534853, iteration: 102364
loss: 0.9839159846305847,grad_norm: 0.9999990467979385, iteration: 102365
loss: 1.0076240301132202,grad_norm: 0.9999992315222838, iteration: 102366
loss: 0.9769518375396729,grad_norm: 0.9981684444536115, iteration: 102367
loss: 1.0792651176452637,grad_norm: 0.9999997681667803, iteration: 102368
loss: 0.9860354661941528,grad_norm: 0.8598922549320985, iteration: 102369
loss: 0.9869611859321594,grad_norm: 0.7696869708348026, iteration: 102370
loss: 1.0593522787094116,grad_norm: 0.9028246565034226, iteration: 102371
loss: 0.9916733503341675,grad_norm: 0.9999991275289059, iteration: 102372
loss: 0.9942497611045837,grad_norm: 0.8910900823567871, iteration: 102373
loss: 0.9970623254776001,grad_norm: 0.8872328038218018, iteration: 102374
loss: 1.0109790563583374,grad_norm: 0.9999992378708487, iteration: 102375
loss: 0.9936263561248779,grad_norm: 0.9678221106734414, iteration: 102376
loss: 0.9877791404724121,grad_norm: 0.8955214034643832, iteration: 102377
loss: 1.0072174072265625,grad_norm: 0.9560364242957644, iteration: 102378
loss: 1.026829481124878,grad_norm: 0.903032357955499, iteration: 102379
loss: 1.052014708518982,grad_norm: 0.9999999162330916, iteration: 102380
loss: 0.996555507183075,grad_norm: 0.7196794504308652, iteration: 102381
loss: 0.9911845326423645,grad_norm: 0.9144767103931051, iteration: 102382
loss: 1.052714467048645,grad_norm: 0.9999999633157661, iteration: 102383
loss: 1.0182697772979736,grad_norm: 0.7204822615227503, iteration: 102384
loss: 0.9961772561073303,grad_norm: 0.8298947037496952, iteration: 102385
loss: 1.053996205329895,grad_norm: 0.9999991263101113, iteration: 102386
loss: 1.0049448013305664,grad_norm: 0.9999998418159055, iteration: 102387
loss: 1.0120457410812378,grad_norm: 0.865843262045849, iteration: 102388
loss: 1.026404857635498,grad_norm: 0.999999775284186, iteration: 102389
loss: 1.0217742919921875,grad_norm: 0.9999992532173906, iteration: 102390
loss: 1.0206811428070068,grad_norm: 0.9999997107811508, iteration: 102391
loss: 1.0196179151535034,grad_norm: 0.9371268711678382, iteration: 102392
loss: 1.2053396701812744,grad_norm: 0.9999992406819623, iteration: 102393
loss: 0.9804604053497314,grad_norm: 0.692496012199731, iteration: 102394
loss: 1.0342990159988403,grad_norm: 0.8973479526331858, iteration: 102395
loss: 0.9863423705101013,grad_norm: 0.972319318770555, iteration: 102396
loss: 0.9584372639656067,grad_norm: 0.9999992144332456, iteration: 102397
loss: 1.1970475912094116,grad_norm: 0.9999992908881888, iteration: 102398
loss: 1.0195660591125488,grad_norm: 0.8160141371284596, iteration: 102399
loss: 1.0170079469680786,grad_norm: 0.9999991513767358, iteration: 102400
loss: 1.0011801719665527,grad_norm: 0.7752055781576381, iteration: 102401
loss: 1.0416001081466675,grad_norm: 0.9999991060955514, iteration: 102402
loss: 1.006872534751892,grad_norm: 0.9224058060444643, iteration: 102403
loss: 0.986903190612793,grad_norm: 0.9999995950334133, iteration: 102404
loss: 1.0688040256500244,grad_norm: 0.9999993344360748, iteration: 102405
loss: 0.9870746731758118,grad_norm: 0.8396953694584178, iteration: 102406
loss: 1.0117794275283813,grad_norm: 0.9999993577403262, iteration: 102407
loss: 1.0329763889312744,grad_norm: 0.9999991014309763, iteration: 102408
loss: 0.989721417427063,grad_norm: 0.9289674579565533, iteration: 102409
loss: 0.9857349395751953,grad_norm: 0.924637579009006, iteration: 102410
loss: 0.9760578870773315,grad_norm: 0.9999990641961604, iteration: 102411
loss: 0.9942808747291565,grad_norm: 0.8004957955949181, iteration: 102412
loss: 1.0346506834030151,grad_norm: 0.9999991669841253, iteration: 102413
loss: 1.0333174467086792,grad_norm: 0.9999999868891237, iteration: 102414
loss: 1.029060959815979,grad_norm: 0.8356902553146317, iteration: 102415
loss: 0.9804398417472839,grad_norm: 0.8262557025103529, iteration: 102416
loss: 0.9855819344520569,grad_norm: 0.8244300019762745, iteration: 102417
loss: 1.0842803716659546,grad_norm: 0.9561882991820388, iteration: 102418
loss: 0.9533939361572266,grad_norm: 0.9662926784814984, iteration: 102419
loss: 1.1153583526611328,grad_norm: 0.9999996715771537, iteration: 102420
loss: 1.0014536380767822,grad_norm: 0.9999992903390218, iteration: 102421
loss: 0.9764082431793213,grad_norm: 0.8569321768231908, iteration: 102422
loss: 1.005374550819397,grad_norm: 0.9999993772830402, iteration: 102423
loss: 1.045444130897522,grad_norm: 0.9225745748499402, iteration: 102424
loss: 1.0363640785217285,grad_norm: 0.9999997756904696, iteration: 102425
loss: 1.032882571220398,grad_norm: 0.9999991333435146, iteration: 102426
loss: 1.0242204666137695,grad_norm: 0.8287744651063, iteration: 102427
loss: 0.9978635311126709,grad_norm: 0.9133346445986634, iteration: 102428
loss: 1.0207772254943848,grad_norm: 0.9999998648480289, iteration: 102429
loss: 1.0187609195709229,grad_norm: 0.9285735476355976, iteration: 102430
loss: 0.9966244697570801,grad_norm: 0.9999991249706409, iteration: 102431
loss: 1.0240767002105713,grad_norm: 0.9523055657813788, iteration: 102432
loss: 1.0152884721755981,grad_norm: 0.8835055072776218, iteration: 102433
loss: 1.0469326972961426,grad_norm: 0.9999992620314657, iteration: 102434
loss: 0.9991486668586731,grad_norm: 0.8007601679011266, iteration: 102435
loss: 1.0367995500564575,grad_norm: 0.9999990813580957, iteration: 102436
loss: 1.032450556755066,grad_norm: 0.8188273810115617, iteration: 102437
loss: 0.9869427680969238,grad_norm: 0.9410272324326694, iteration: 102438
loss: 0.9774723649024963,grad_norm: 0.9999997182087513, iteration: 102439
loss: 1.018340826034546,grad_norm: 0.7629350542302586, iteration: 102440
loss: 1.1089682579040527,grad_norm: 0.9999994981394575, iteration: 102441
loss: 1.0011415481567383,grad_norm: 0.8385145165193247, iteration: 102442
loss: 0.988429844379425,grad_norm: 0.8953714900763944, iteration: 102443
loss: 1.278194785118103,grad_norm: 0.9999998201521556, iteration: 102444
loss: 1.0018668174743652,grad_norm: 0.9999993057978307, iteration: 102445
loss: 0.9978013634681702,grad_norm: 0.9460716297102553, iteration: 102446
loss: 1.0719141960144043,grad_norm: 0.9999996722108299, iteration: 102447
loss: 1.0928691625595093,grad_norm: 0.9999995535056511, iteration: 102448
loss: 1.0212345123291016,grad_norm: 0.9999991007596251, iteration: 102449
loss: 0.9749152660369873,grad_norm: 0.8742683268493956, iteration: 102450
loss: 0.9943785071372986,grad_norm: 0.9999991154657734, iteration: 102451
loss: 1.0054563283920288,grad_norm: 0.9999999332660526, iteration: 102452
loss: 1.0359172821044922,grad_norm: 0.9999990819756259, iteration: 102453
loss: 1.036257266998291,grad_norm: 0.7192894235160902, iteration: 102454
loss: 1.0333008766174316,grad_norm: 0.9892874303159366, iteration: 102455
loss: 1.0169190168380737,grad_norm: 0.8874882288861531, iteration: 102456
loss: 1.110114574432373,grad_norm: 0.999999532423906, iteration: 102457
loss: 1.0167654752731323,grad_norm: 0.7869398287189708, iteration: 102458
loss: 1.003639578819275,grad_norm: 0.9648812606989524, iteration: 102459
loss: 1.0253543853759766,grad_norm: 0.9375909033549713, iteration: 102460
loss: 0.9833167195320129,grad_norm: 0.8897023948124441, iteration: 102461
loss: 0.9860877394676208,grad_norm: 0.8468079493272389, iteration: 102462
loss: 1.0152207612991333,grad_norm: 0.9953157284376758, iteration: 102463
loss: 1.0606937408447266,grad_norm: 0.9999997941997234, iteration: 102464
loss: 1.0033698081970215,grad_norm: 0.9999991018500655, iteration: 102465
loss: 0.9960083961486816,grad_norm: 0.9091178491891561, iteration: 102466
loss: 0.9764658212661743,grad_norm: 0.9281231189723179, iteration: 102467
loss: 0.9878982305526733,grad_norm: 0.9008838699184131, iteration: 102468
loss: 0.9705110788345337,grad_norm: 0.9999992304494917, iteration: 102469
loss: 0.9799363613128662,grad_norm: 0.9999990583256366, iteration: 102470
loss: 1.038106083869934,grad_norm: 0.9999995082110419, iteration: 102471
loss: 1.0031142234802246,grad_norm: 0.8926001468112201, iteration: 102472
loss: 0.9984913468360901,grad_norm: 0.9999990479221178, iteration: 102473
loss: 1.0278197526931763,grad_norm: 0.9999989656071707, iteration: 102474
loss: 1.0153363943099976,grad_norm: 0.783529375074874, iteration: 102475
loss: 1.0086021423339844,grad_norm: 0.9784397184411824, iteration: 102476
loss: 1.0734196901321411,grad_norm: 0.8560400584069062, iteration: 102477
loss: 0.9788916707038879,grad_norm: 0.8556837614310526, iteration: 102478
loss: 1.087762713432312,grad_norm: 0.9999999247707158, iteration: 102479
loss: 1.00893235206604,grad_norm: 0.9999993878958343, iteration: 102480
loss: 1.0084240436553955,grad_norm: 0.9999996470778364, iteration: 102481
loss: 1.0239709615707397,grad_norm: 0.8650064198683652, iteration: 102482
loss: 1.0433176755905151,grad_norm: 0.9999996254840624, iteration: 102483
loss: 0.9732837080955505,grad_norm: 0.8383630256360987, iteration: 102484
loss: 1.0410751104354858,grad_norm: 0.8207013221693261, iteration: 102485
loss: 1.0608545541763306,grad_norm: 0.9999997145724239, iteration: 102486
loss: 1.0255335569381714,grad_norm: 0.875999538304172, iteration: 102487
loss: 1.0499708652496338,grad_norm: 0.9999989676344776, iteration: 102488
loss: 1.0738353729248047,grad_norm: 0.9437721919861025, iteration: 102489
loss: 1.0027453899383545,grad_norm: 0.7777256411323438, iteration: 102490
loss: 1.0068126916885376,grad_norm: 0.9432791467109665, iteration: 102491
loss: 1.0206526517868042,grad_norm: 0.999999691045346, iteration: 102492
loss: 0.9602248668670654,grad_norm: 0.9999992156901727, iteration: 102493
loss: 0.9898790121078491,grad_norm: 0.9597895486418527, iteration: 102494
loss: 0.9952570796012878,grad_norm: 0.9752324229523239, iteration: 102495
loss: 1.0151982307434082,grad_norm: 0.9005825203346998, iteration: 102496
loss: 0.9920349717140198,grad_norm: 0.8599866331094422, iteration: 102497
loss: 1.0180116891860962,grad_norm: 0.8265929448908407, iteration: 102498
loss: 0.9909413456916809,grad_norm: 0.875778037785837, iteration: 102499
loss: 1.052907943725586,grad_norm: 0.9999990396770632, iteration: 102500
loss: 0.9959603548049927,grad_norm: 0.9581248143259897, iteration: 102501
loss: 0.96006178855896,grad_norm: 0.9807426480258836, iteration: 102502
loss: 1.0071080923080444,grad_norm: 0.8903203639278531, iteration: 102503
loss: 1.0824337005615234,grad_norm: 0.9999992303999381, iteration: 102504
loss: 1.0089600086212158,grad_norm: 0.9999989748544383, iteration: 102505
loss: 1.0633254051208496,grad_norm: 0.9999990258145584, iteration: 102506
loss: 0.994520902633667,grad_norm: 0.8608444439560737, iteration: 102507
loss: 0.9934282302856445,grad_norm: 0.8008501755389759, iteration: 102508
loss: 0.9937043786048889,grad_norm: 0.7963198555820843, iteration: 102509
loss: 0.9996142983436584,grad_norm: 0.9999994800180426, iteration: 102510
loss: 1.0262846946716309,grad_norm: 0.9502264500350136, iteration: 102511
loss: 0.9961090683937073,grad_norm: 0.7580138231791139, iteration: 102512
loss: 0.9752944111824036,grad_norm: 0.8523419991017623, iteration: 102513
loss: 1.0937039852142334,grad_norm: 0.999999895812473, iteration: 102514
loss: 0.9838556051254272,grad_norm: 0.9199105559276969, iteration: 102515
loss: 1.0144386291503906,grad_norm: 0.8250152016327155, iteration: 102516
loss: 1.0019311904907227,grad_norm: 0.9461541270561729, iteration: 102517
loss: 1.0814204216003418,grad_norm: 0.8776730289031132, iteration: 102518
loss: 1.024467945098877,grad_norm: 0.8677295445657817, iteration: 102519
loss: 0.9924044609069824,grad_norm: 0.8113737953528928, iteration: 102520
loss: 1.0041643381118774,grad_norm: 0.9999993794779163, iteration: 102521
loss: 0.9771745800971985,grad_norm: 0.9815910482462954, iteration: 102522
loss: 1.0081981420516968,grad_norm: 0.8044154386412449, iteration: 102523
loss: 1.025174617767334,grad_norm: 0.9999990226248081, iteration: 102524
loss: 0.9990649819374084,grad_norm: 0.9838971760524984, iteration: 102525
loss: 0.96160489320755,grad_norm: 0.9235671818889901, iteration: 102526
loss: 1.0849350690841675,grad_norm: 0.999999246643051, iteration: 102527
loss: 1.0142381191253662,grad_norm: 0.8847776147738201, iteration: 102528
loss: 0.9785390496253967,grad_norm: 0.9999991876872143, iteration: 102529
loss: 1.0656970739364624,grad_norm: 0.999999152409186, iteration: 102530
loss: 1.003523349761963,grad_norm: 0.9107259509575653, iteration: 102531
loss: 1.00136137008667,grad_norm: 0.8257308214901192, iteration: 102532
loss: 0.9477434754371643,grad_norm: 0.792074290999958, iteration: 102533
loss: 1.0109686851501465,grad_norm: 0.9061489007850624, iteration: 102534
loss: 1.0320284366607666,grad_norm: 0.8830605589252407, iteration: 102535
loss: 1.0043312311172485,grad_norm: 0.9999990761884486, iteration: 102536
loss: 1.0286651849746704,grad_norm: 0.9999999675256479, iteration: 102537
loss: 1.0353260040283203,grad_norm: 0.9999995328521801, iteration: 102538
loss: 1.0439192056655884,grad_norm: 0.9271751243964096, iteration: 102539
loss: 1.0132770538330078,grad_norm: 0.9014602789826185, iteration: 102540
loss: 0.9713605046272278,grad_norm: 0.8179181485576201, iteration: 102541
loss: 1.0396572351455688,grad_norm: 0.9999993769626444, iteration: 102542
loss: 1.0122894048690796,grad_norm: 0.9999990699001242, iteration: 102543
loss: 0.9570692777633667,grad_norm: 0.9999992836745607, iteration: 102544
loss: 1.0648449659347534,grad_norm: 0.9672144891429107, iteration: 102545
loss: 1.0175683498382568,grad_norm: 0.8814605846498327, iteration: 102546
loss: 0.9858832359313965,grad_norm: 0.9475076144505009, iteration: 102547
loss: 1.0132062435150146,grad_norm: 0.9713828963021465, iteration: 102548
loss: 0.9585373401641846,grad_norm: 0.8401872064946986, iteration: 102549
loss: 0.9719890356063843,grad_norm: 0.9999989099051613, iteration: 102550
loss: 1.0252870321273804,grad_norm: 0.9999990300029608, iteration: 102551
loss: 1.0283920764923096,grad_norm: 0.9312827518785142, iteration: 102552
loss: 1.0109285116195679,grad_norm: 0.8880788966597929, iteration: 102553
loss: 1.0393825769424438,grad_norm: 0.8309334818075994, iteration: 102554
loss: 1.0377991199493408,grad_norm: 0.9802125945697167, iteration: 102555
loss: 0.9934590458869934,grad_norm: 0.9608893311175891, iteration: 102556
loss: 0.9746690392494202,grad_norm: 0.9999992140129969, iteration: 102557
loss: 1.0387719869613647,grad_norm: 0.8613415661247729, iteration: 102558
loss: 1.0545108318328857,grad_norm: 0.9999996740897774, iteration: 102559
loss: 1.0114275217056274,grad_norm: 0.7596469524567313, iteration: 102560
loss: 1.0533188581466675,grad_norm: 0.9999997201693454, iteration: 102561
loss: 1.0070281028747559,grad_norm: 0.9999993198477427, iteration: 102562
loss: 0.9911912679672241,grad_norm: 0.9092756534616933, iteration: 102563
loss: 1.0519874095916748,grad_norm: 0.9999991126314816, iteration: 102564
loss: 0.9942410588264465,grad_norm: 0.9999990409239575, iteration: 102565
loss: 1.032017707824707,grad_norm: 0.8573710233779179, iteration: 102566
loss: 1.0082755088806152,grad_norm: 0.999999694294367, iteration: 102567
loss: 1.0420438051223755,grad_norm: 0.999999118337197, iteration: 102568
loss: 1.0409327745437622,grad_norm: 0.9231829684951056, iteration: 102569
loss: 0.9822879433631897,grad_norm: 0.9999995735294243, iteration: 102570
loss: 0.9701648950576782,grad_norm: 0.7538138277118627, iteration: 102571
loss: 1.086628794670105,grad_norm: 0.9999997942107991, iteration: 102572
loss: 1.0185884237289429,grad_norm: 0.9008982305948764, iteration: 102573
loss: 0.9739025831222534,grad_norm: 0.9999989880714868, iteration: 102574
loss: 1.0131120681762695,grad_norm: 0.8090080712963665, iteration: 102575
loss: 0.9880154728889465,grad_norm: 0.9163608006904441, iteration: 102576
loss: 1.038435935974121,grad_norm: 0.9999992733894132, iteration: 102577
loss: 0.9671666026115417,grad_norm: 0.9999990679479744, iteration: 102578
loss: 1.026963710784912,grad_norm: 0.9999991142505432, iteration: 102579
loss: 1.0009686946868896,grad_norm: 0.9999994476142489, iteration: 102580
loss: 1.0031590461730957,grad_norm: 0.9999995757156217, iteration: 102581
loss: 0.9931333661079407,grad_norm: 0.7677420219990027, iteration: 102582
loss: 1.0817550420761108,grad_norm: 0.9999994215151324, iteration: 102583
loss: 1.0128675699234009,grad_norm: 0.9999998272262766, iteration: 102584
loss: 0.9761055707931519,grad_norm: 0.9276097288188064, iteration: 102585
loss: 1.013460636138916,grad_norm: 0.9999992532349747, iteration: 102586
loss: 1.0358115434646606,grad_norm: 0.9999990260640439, iteration: 102587
loss: 1.0338988304138184,grad_norm: 0.8466163267848751, iteration: 102588
loss: 1.0239810943603516,grad_norm: 0.9999991942611552, iteration: 102589
loss: 1.010688304901123,grad_norm: 0.9999991057859086, iteration: 102590
loss: 1.0366039276123047,grad_norm: 0.8531524177657714, iteration: 102591
loss: 0.9977609515190125,grad_norm: 0.862103042936764, iteration: 102592
loss: 0.9952569007873535,grad_norm: 0.9999992034556962, iteration: 102593
loss: 1.0140485763549805,grad_norm: 0.9999990516364872, iteration: 102594
loss: 0.9759813547134399,grad_norm: 0.8411362938820935, iteration: 102595
loss: 1.0777398347854614,grad_norm: 0.9999991748920395, iteration: 102596
loss: 1.0252262353897095,grad_norm: 0.9893283652347734, iteration: 102597
loss: 1.0275479555130005,grad_norm: 0.9183739987772002, iteration: 102598
loss: 1.0028960704803467,grad_norm: 0.997361669995612, iteration: 102599
loss: 1.015419840812683,grad_norm: 0.916594508162844, iteration: 102600
loss: 0.9926919341087341,grad_norm: 0.9761095486018923, iteration: 102601
loss: 1.075782060623169,grad_norm: 0.9895543589595133, iteration: 102602
loss: 0.975905179977417,grad_norm: 0.7768133664889285, iteration: 102603
loss: 1.0591281652450562,grad_norm: 0.9999990094813543, iteration: 102604
loss: 0.9955737590789795,grad_norm: 0.9173718016611334, iteration: 102605
loss: 0.9730281233787537,grad_norm: 0.7806656995319396, iteration: 102606
loss: 1.00241219997406,grad_norm: 0.9999998373852941, iteration: 102607
loss: 1.0013331174850464,grad_norm: 0.7305999935902714, iteration: 102608
loss: 0.9524475932121277,grad_norm: 0.829695881504011, iteration: 102609
loss: 1.0215365886688232,grad_norm: 0.7545363601563188, iteration: 102610
loss: 0.9911085963249207,grad_norm: 0.9656446768288468, iteration: 102611
loss: 1.0956109762191772,grad_norm: 0.9999995715182929, iteration: 102612
loss: 1.0201512575149536,grad_norm: 0.9999992322806917, iteration: 102613
loss: 0.9912494421005249,grad_norm: 0.8818913067425078, iteration: 102614
loss: 1.0102245807647705,grad_norm: 0.9999991455511347, iteration: 102615
loss: 1.0096663236618042,grad_norm: 0.8592386158828091, iteration: 102616
loss: 0.9819778800010681,grad_norm: 0.862143530072477, iteration: 102617
loss: 0.9735022783279419,grad_norm: 0.9298347359748218, iteration: 102618
loss: 1.195695161819458,grad_norm: 0.9999991998225382, iteration: 102619
loss: 1.019749641418457,grad_norm: 0.9118858078148742, iteration: 102620
loss: 1.016424298286438,grad_norm: 0.9610944463300327, iteration: 102621
loss: 0.9855037927627563,grad_norm: 0.9432946350545863, iteration: 102622
loss: 1.0154136419296265,grad_norm: 0.956261482441803, iteration: 102623
loss: 0.9856410026550293,grad_norm: 0.9066904891983578, iteration: 102624
loss: 1.0117160081863403,grad_norm: 0.9737971025977434, iteration: 102625
loss: 1.0247024297714233,grad_norm: 0.9261128830023431, iteration: 102626
loss: 0.9789553284645081,grad_norm: 0.9822630946716887, iteration: 102627
loss: 1.0806820392608643,grad_norm: 0.9999996891317939, iteration: 102628
loss: 1.0412654876708984,grad_norm: 0.9287435034019484, iteration: 102629
loss: 1.0289556980133057,grad_norm: 0.879441265937886, iteration: 102630
loss: 1.0376044511795044,grad_norm: 0.8880895614503139, iteration: 102631
loss: 1.0072418451309204,grad_norm: 0.9999996200897417, iteration: 102632
loss: 1.0191086530685425,grad_norm: 0.9999994527421476, iteration: 102633
loss: 1.0087354183197021,grad_norm: 0.9999992183830144, iteration: 102634
loss: 1.0030491352081299,grad_norm: 0.9999992176116186, iteration: 102635
loss: 0.9691283106803894,grad_norm: 0.8936246684319019, iteration: 102636
loss: 1.003211498260498,grad_norm: 0.9589272675213806, iteration: 102637
loss: 1.0491548776626587,grad_norm: 0.9999996205392562, iteration: 102638
loss: 0.9906681776046753,grad_norm: 0.8334523954209689, iteration: 102639
loss: 1.0866978168487549,grad_norm: 0.9999991693204996, iteration: 102640
loss: 1.020877480506897,grad_norm: 0.999999629871998, iteration: 102641
loss: 1.0161527395248413,grad_norm: 0.9537359162749571, iteration: 102642
loss: 0.9636691212654114,grad_norm: 0.8373002710138355, iteration: 102643
loss: 0.9790497422218323,grad_norm: 0.9999990478761843, iteration: 102644
loss: 1.0084542036056519,grad_norm: 0.8136011258833082, iteration: 102645
loss: 1.0071064233779907,grad_norm: 0.9592362763834695, iteration: 102646
loss: 1.0003700256347656,grad_norm: 0.9269451566536757, iteration: 102647
loss: 0.9876196384429932,grad_norm: 0.9403353561085697, iteration: 102648
loss: 1.0054278373718262,grad_norm: 0.9999996819300607, iteration: 102649
loss: 1.033308982849121,grad_norm: 0.999999069427474, iteration: 102650
loss: 1.0504990816116333,grad_norm: 0.999999206996584, iteration: 102651
loss: 0.997414767742157,grad_norm: 0.9999998111348777, iteration: 102652
loss: 1.0034769773483276,grad_norm: 0.9595368424445891, iteration: 102653
loss: 0.9559791684150696,grad_norm: 0.8923612456526451, iteration: 102654
loss: 1.006227970123291,grad_norm: 0.903304594031285, iteration: 102655
loss: 1.0018819570541382,grad_norm: 0.8629285405262528, iteration: 102656
loss: 0.9912065267562866,grad_norm: 0.9355127153588624, iteration: 102657
loss: 0.983869194984436,grad_norm: 0.86216751258673, iteration: 102658
loss: 0.9513939023017883,grad_norm: 0.9999995274462627, iteration: 102659
loss: 1.0136810541152954,grad_norm: 0.7973128409809699, iteration: 102660
loss: 1.0245572328567505,grad_norm: 0.8234470567527038, iteration: 102661
loss: 1.027339220046997,grad_norm: 0.9999993623023298, iteration: 102662
loss: 1.143399715423584,grad_norm: 0.9999999375213678, iteration: 102663
loss: 1.019060492515564,grad_norm: 0.9999991597464575, iteration: 102664
loss: 0.9814029335975647,grad_norm: 0.844212044017148, iteration: 102665
loss: 1.0388157367706299,grad_norm: 0.9999996513273477, iteration: 102666
loss: 0.964939534664154,grad_norm: 0.8989986646784071, iteration: 102667
loss: 1.0131217241287231,grad_norm: 0.8188423913140331, iteration: 102668
loss: 1.0037107467651367,grad_norm: 0.999999151878229, iteration: 102669
loss: 0.9789400696754456,grad_norm: 0.9364248039204631, iteration: 102670
loss: 1.0466687679290771,grad_norm: 0.9999990342837919, iteration: 102671
loss: 1.0099867582321167,grad_norm: 0.9999993008957483, iteration: 102672
loss: 1.0201644897460938,grad_norm: 0.9944274360512726, iteration: 102673
loss: 1.0049797296524048,grad_norm: 0.9045592259126566, iteration: 102674
loss: 1.0130990743637085,grad_norm: 0.9999991361420432, iteration: 102675
loss: 1.0191274881362915,grad_norm: 0.9456048812819893, iteration: 102676
loss: 0.9911087155342102,grad_norm: 0.8165441935193231, iteration: 102677
loss: 1.0095131397247314,grad_norm: 0.8116369578988452, iteration: 102678
loss: 1.0362927913665771,grad_norm: 0.9999995604692964, iteration: 102679
loss: 0.9902684688568115,grad_norm: 0.9362273355915649, iteration: 102680
loss: 1.0086568593978882,grad_norm: 0.9999990523497959, iteration: 102681
loss: 0.9721047282218933,grad_norm: 0.9093233536660378, iteration: 102682
loss: 1.0284924507141113,grad_norm: 0.844100242338497, iteration: 102683
loss: 0.985879123210907,grad_norm: 0.8739922928411712, iteration: 102684
loss: 0.9982125759124756,grad_norm: 0.8131799687877943, iteration: 102685
loss: 1.0503559112548828,grad_norm: 0.9999995301211281, iteration: 102686
loss: 1.012870192527771,grad_norm: 0.7866903971855956, iteration: 102687
loss: 0.9534887075424194,grad_norm: 0.957798377059889, iteration: 102688
loss: 1.0528336763381958,grad_norm: 0.9599346877670915, iteration: 102689
loss: 1.0031532049179077,grad_norm: 0.8184360687424527, iteration: 102690
loss: 0.9919288158416748,grad_norm: 0.8687073894976475, iteration: 102691
loss: 1.1313374042510986,grad_norm: 1.0000000319407982, iteration: 102692
loss: 0.9963080286979675,grad_norm: 0.9161193287114963, iteration: 102693
loss: 1.0014028549194336,grad_norm: 0.8339443517908918, iteration: 102694
loss: 1.0219647884368896,grad_norm: 0.9999990448020529, iteration: 102695
loss: 0.9941878318786621,grad_norm: 0.9058465325076337, iteration: 102696
loss: 0.9809594750404358,grad_norm: 0.8872182419035125, iteration: 102697
loss: 1.0402138233184814,grad_norm: 0.8191402860469054, iteration: 102698
loss: 1.0088105201721191,grad_norm: 0.9483607716007014, iteration: 102699
loss: 1.019730567932129,grad_norm: 0.7312325289554988, iteration: 102700
loss: 1.0040984153747559,grad_norm: 0.7907177548603963, iteration: 102701
loss: 1.0121269226074219,grad_norm: 0.999999804521525, iteration: 102702
loss: 1.007831335067749,grad_norm: 0.8429164655537243, iteration: 102703
loss: 0.9682552814483643,grad_norm: 0.9999990986825573, iteration: 102704
loss: 1.000645399093628,grad_norm: 0.9999991821134624, iteration: 102705
loss: 1.0167344808578491,grad_norm: 0.8586660196878713, iteration: 102706
loss: 1.0314222574234009,grad_norm: 0.9999994710393011, iteration: 102707
loss: 1.009932279586792,grad_norm: 0.9999998218249968, iteration: 102708
loss: 1.026475429534912,grad_norm: 0.9999991615728561, iteration: 102709
loss: 1.0170619487762451,grad_norm: 0.858879787793483, iteration: 102710
loss: 0.9654353260993958,grad_norm: 0.895076192457264, iteration: 102711
loss: 0.9904442429542542,grad_norm: 0.9122422854254091, iteration: 102712
loss: 1.0099953413009644,grad_norm: 0.9999991547283213, iteration: 102713
loss: 0.9870503544807434,grad_norm: 0.999999103137137, iteration: 102714
loss: 1.0454035997390747,grad_norm: 0.8284825196998263, iteration: 102715
loss: 0.9965692758560181,grad_norm: 0.7610161708437507, iteration: 102716
loss: 1.0824086666107178,grad_norm: 0.8166805547806144, iteration: 102717
loss: 1.0743545293807983,grad_norm: 0.9999990088547539, iteration: 102718
loss: 0.9914251565933228,grad_norm: 0.999999080103387, iteration: 102719
loss: 0.9896926879882812,grad_norm: 0.9835589457335816, iteration: 102720
loss: 0.9993430972099304,grad_norm: 0.9999997850281707, iteration: 102721
loss: 1.0480382442474365,grad_norm: 0.9999990092428512, iteration: 102722
loss: 0.9767828583717346,grad_norm: 0.9999992774969086, iteration: 102723
loss: 0.9968220591545105,grad_norm: 0.9221397033640408, iteration: 102724
loss: 1.0070265531539917,grad_norm: 0.9998821514992209, iteration: 102725
loss: 0.9997755885124207,grad_norm: 0.8922828178040711, iteration: 102726
loss: 0.9697716236114502,grad_norm: 0.9999992511064136, iteration: 102727
loss: 0.9902487993240356,grad_norm: 0.7808921640369135, iteration: 102728
loss: 0.9785533547401428,grad_norm: 0.963942551561922, iteration: 102729
loss: 0.9899289608001709,grad_norm: 0.9999996509470288, iteration: 102730
loss: 0.9803571105003357,grad_norm: 0.9485612886706123, iteration: 102731
loss: 0.9681249856948853,grad_norm: 0.9999989996179245, iteration: 102732
loss: 1.0141738653182983,grad_norm: 0.8283072323152311, iteration: 102733
loss: 1.033935308456421,grad_norm: 0.9999992037007959, iteration: 102734
loss: 1.0049798488616943,grad_norm: 0.9999993019091249, iteration: 102735
loss: 1.0194536447525024,grad_norm: 0.9751635202859339, iteration: 102736
loss: 1.0239936113357544,grad_norm: 0.8427314105952415, iteration: 102737
loss: 1.0423195362091064,grad_norm: 0.9564352871833847, iteration: 102738
loss: 1.022098183631897,grad_norm: 0.9999990611982225, iteration: 102739
loss: 1.006906509399414,grad_norm: 0.9543760482573557, iteration: 102740
loss: 1.0467783212661743,grad_norm: 0.9999991032018839, iteration: 102741
loss: 1.0152506828308105,grad_norm: 0.9999996351350217, iteration: 102742
loss: 0.9864108562469482,grad_norm: 0.8475326598083833, iteration: 102743
loss: 1.128886342048645,grad_norm: 0.9999995102320435, iteration: 102744
loss: 0.9969208240509033,grad_norm: 0.9999991312436278, iteration: 102745
loss: 1.0166603326797485,grad_norm: 0.999999153952146, iteration: 102746
loss: 1.0196670293807983,grad_norm: 0.8468654463485532, iteration: 102747
loss: 1.0313113927841187,grad_norm: 0.8288602737367795, iteration: 102748
loss: 1.0138912200927734,grad_norm: 0.9999992210760938, iteration: 102749
loss: 1.0739518404006958,grad_norm: 0.9907715117494238, iteration: 102750
loss: 1.009229063987732,grad_norm: 0.9999996870346571, iteration: 102751
loss: 1.0610984563827515,grad_norm: 0.999999324152976, iteration: 102752
loss: 0.9945621490478516,grad_norm: 0.926035271291206, iteration: 102753
loss: 1.0082952976226807,grad_norm: 0.7620809933193607, iteration: 102754
loss: 1.0043607950210571,grad_norm: 0.836332274283185, iteration: 102755
loss: 1.040789246559143,grad_norm: 0.9974581401619907, iteration: 102756
loss: 1.0027902126312256,grad_norm: 0.7838957245464531, iteration: 102757
loss: 1.0398963689804077,grad_norm: 0.9361837841261019, iteration: 102758
loss: 1.0163458585739136,grad_norm: 0.90753991135661, iteration: 102759
loss: 1.030932903289795,grad_norm: 0.9999995887499358, iteration: 102760
loss: 0.9991300702095032,grad_norm: 0.9046979702762457, iteration: 102761
loss: 1.0534632205963135,grad_norm: 0.9024882991511239, iteration: 102762
loss: 0.9813604354858398,grad_norm: 0.9999991147465339, iteration: 102763
loss: 1.0247026681900024,grad_norm: 0.9999992665643809, iteration: 102764
loss: 0.9964963793754578,grad_norm: 0.7188754778508085, iteration: 102765
loss: 1.020134449005127,grad_norm: 0.9999989604813312, iteration: 102766
loss: 1.0490212440490723,grad_norm: 0.999999523977081, iteration: 102767
loss: 1.0375443696975708,grad_norm: 0.9999994356428272, iteration: 102768
loss: 0.9686272144317627,grad_norm: 0.9999990746176629, iteration: 102769
loss: 1.009766936302185,grad_norm: 0.9437236623420608, iteration: 102770
loss: 0.9892944097518921,grad_norm: 0.9999988910049631, iteration: 102771
loss: 0.963260293006897,grad_norm: 0.9715579026316206, iteration: 102772
loss: 0.9974333643913269,grad_norm: 0.9728740128594507, iteration: 102773
loss: 1.0261751413345337,grad_norm: 0.9999989760195922, iteration: 102774
loss: 1.0015555620193481,grad_norm: 0.9999992278595995, iteration: 102775
loss: 1.0176849365234375,grad_norm: 0.8695885204774694, iteration: 102776
loss: 1.0278862714767456,grad_norm: 0.8868689607898285, iteration: 102777
loss: 1.0520739555358887,grad_norm: 0.9999993640816596, iteration: 102778
loss: 1.149383544921875,grad_norm: 0.9999992163328832, iteration: 102779
loss: 1.046035885810852,grad_norm: 0.9546025960516379, iteration: 102780
loss: 1.0093189477920532,grad_norm: 0.9390666486816202, iteration: 102781
loss: 0.9799584746360779,grad_norm: 0.9999991033064903, iteration: 102782
loss: 1.0495929718017578,grad_norm: 0.9999991298661768, iteration: 102783
loss: 1.031722903251648,grad_norm: 0.9999991280589026, iteration: 102784
loss: 1.0018688440322876,grad_norm: 0.9999990985207259, iteration: 102785
loss: 1.003822684288025,grad_norm: 0.9624751312958986, iteration: 102786
loss: 1.0157755613327026,grad_norm: 0.8207821759279693, iteration: 102787
loss: 1.008799433708191,grad_norm: 0.943903859905177, iteration: 102788
loss: 0.9550615549087524,grad_norm: 0.7998955036075931, iteration: 102789
loss: 1.002946138381958,grad_norm: 0.9999991252592286, iteration: 102790
loss: 1.026390790939331,grad_norm: 0.9999994893497183, iteration: 102791
loss: 1.0201170444488525,grad_norm: 0.9999990556325657, iteration: 102792
loss: 1.0055314302444458,grad_norm: 0.8187695762262778, iteration: 102793
loss: 0.9793632626533508,grad_norm: 0.9999991219329166, iteration: 102794
loss: 1.01177179813385,grad_norm: 0.9521173198112413, iteration: 102795
loss: 0.9774472713470459,grad_norm: 0.9999990536218315, iteration: 102796
loss: 1.0611610412597656,grad_norm: 0.9999995634127979, iteration: 102797
loss: 1.046005129814148,grad_norm: 0.9999990809273146, iteration: 102798
loss: 1.1077275276184082,grad_norm: 0.9999995396533317, iteration: 102799
loss: 1.1206852197647095,grad_norm: 0.7858913838693071, iteration: 102800
loss: 1.0732563734054565,grad_norm: 0.9999995237236167, iteration: 102801
loss: 1.0194439888000488,grad_norm: 0.9999991967952533, iteration: 102802
loss: 1.008337378501892,grad_norm: 0.9990272657746119, iteration: 102803
loss: 1.0383875370025635,grad_norm: 0.9126847349244865, iteration: 102804
loss: 1.0039329528808594,grad_norm: 0.9999994843875555, iteration: 102805
loss: 1.000807523727417,grad_norm: 0.8568314502652703, iteration: 102806
loss: 1.0088270902633667,grad_norm: 0.9354614804056492, iteration: 102807
loss: 0.985790491104126,grad_norm: 0.9290014596222318, iteration: 102808
loss: 0.9659340977668762,grad_norm: 0.9910034539905325, iteration: 102809
loss: 0.9584521651268005,grad_norm: 0.8608319695914799, iteration: 102810
loss: 1.0210241079330444,grad_norm: 0.8724269015942843, iteration: 102811
loss: 0.9743145704269409,grad_norm: 0.8835731727284082, iteration: 102812
loss: 1.0506669282913208,grad_norm: 0.9999997567420841, iteration: 102813
loss: 1.0149973630905151,grad_norm: 0.9999992209887807, iteration: 102814
loss: 1.0072517395019531,grad_norm: 0.9645445382901862, iteration: 102815
loss: 0.9937967658042908,grad_norm: 0.9999991709256393, iteration: 102816
loss: 0.9957053661346436,grad_norm: 0.8334030807925922, iteration: 102817
loss: 1.0231144428253174,grad_norm: 0.9235938696494487, iteration: 102818
loss: 0.9932764768600464,grad_norm: 0.8657771195465832, iteration: 102819
loss: 0.9642454981803894,grad_norm: 0.8262127662447059, iteration: 102820
loss: 1.000072717666626,grad_norm: 0.9721962295287945, iteration: 102821
loss: 0.982886552810669,grad_norm: 0.7931784407607503, iteration: 102822
loss: 1.0321403741836548,grad_norm: 0.8674668692024307, iteration: 102823
loss: 1.0105774402618408,grad_norm: 0.8707719560217719, iteration: 102824
loss: 1.0134565830230713,grad_norm: 0.9999991190484937, iteration: 102825
loss: 1.0305911302566528,grad_norm: 0.9999990806662319, iteration: 102826
loss: 1.01683509349823,grad_norm: 0.8918769912102594, iteration: 102827
loss: 1.013113021850586,grad_norm: 0.9999992355442855, iteration: 102828
loss: 0.9905978441238403,grad_norm: 0.7547705271786073, iteration: 102829
loss: 0.9586294889450073,grad_norm: 0.9999991482167941, iteration: 102830
loss: 1.0436480045318604,grad_norm: 0.999999719081633, iteration: 102831
loss: 1.0848209857940674,grad_norm: 0.9999998708111836, iteration: 102832
loss: 1.005223035812378,grad_norm: 0.9125847104102809, iteration: 102833
loss: 1.0499908924102783,grad_norm: 0.9999991495077664, iteration: 102834
loss: 1.0181018114089966,grad_norm: 0.9999997215333973, iteration: 102835
loss: 1.003043293952942,grad_norm: 0.9999992379531539, iteration: 102836
loss: 1.0147264003753662,grad_norm: 0.8794037454001795, iteration: 102837
loss: 1.0155587196350098,grad_norm: 0.8965242740775328, iteration: 102838
loss: 1.0505977869033813,grad_norm: 0.9999997929494889, iteration: 102839
loss: 1.0087358951568604,grad_norm: 0.9999990529422037, iteration: 102840
loss: 1.0069886445999146,grad_norm: 0.8893733524222904, iteration: 102841
loss: 1.0873059034347534,grad_norm: 0.9999993410490647, iteration: 102842
loss: 0.9747176170349121,grad_norm: 0.8170087684190999, iteration: 102843
loss: 1.0168347358703613,grad_norm: 0.9999991668583935, iteration: 102844
loss: 0.9767100811004639,grad_norm: 0.9999991792214523, iteration: 102845
loss: 0.9979711174964905,grad_norm: 0.9999995872730068, iteration: 102846
loss: 0.9956841468811035,grad_norm: 0.9095727473028884, iteration: 102847
loss: 1.0000075101852417,grad_norm: 0.9295597127419737, iteration: 102848
loss: 0.9867292642593384,grad_norm: 0.9314091478473316, iteration: 102849
loss: 1.053633689880371,grad_norm: 0.9999991215235773, iteration: 102850
loss: 0.994565486907959,grad_norm: 0.8092233322937018, iteration: 102851
loss: 1.0087238550186157,grad_norm: 0.9999992405053968, iteration: 102852
loss: 1.1398848295211792,grad_norm: 0.9999997780578925, iteration: 102853
loss: 1.0097734928131104,grad_norm: 0.846711150411504, iteration: 102854
loss: 1.0467487573623657,grad_norm: 0.9999999500820906, iteration: 102855
loss: 1.0004240274429321,grad_norm: 0.9833067313360726, iteration: 102856
loss: 0.9680899977684021,grad_norm: 0.8838148406106835, iteration: 102857
loss: 1.0091793537139893,grad_norm: 0.8391791381779135, iteration: 102858
loss: 1.0150480270385742,grad_norm: 0.9999998509227258, iteration: 102859
loss: 0.9673784375190735,grad_norm: 0.8747256343248745, iteration: 102860
loss: 0.9891616106033325,grad_norm: 0.9999990447637224, iteration: 102861
loss: 0.995254635810852,grad_norm: 0.9999994085905722, iteration: 102862
loss: 0.9694998860359192,grad_norm: 0.9999990837653712, iteration: 102863
loss: 1.047606348991394,grad_norm: 0.9999994178421181, iteration: 102864
loss: 1.0082093477249146,grad_norm: 0.9999998484871048, iteration: 102865
loss: 1.0155665874481201,grad_norm: 0.9909354497531092, iteration: 102866
loss: 1.1166335344314575,grad_norm: 0.9999997666332087, iteration: 102867
loss: 1.0627562999725342,grad_norm: 0.9999994463438624, iteration: 102868
loss: 1.0504330396652222,grad_norm: 0.9999993015229902, iteration: 102869
loss: 1.0020169019699097,grad_norm: 0.9999994670151424, iteration: 102870
loss: 1.0098090171813965,grad_norm: 0.8703406242462324, iteration: 102871
loss: 1.0213103294372559,grad_norm: 0.9999999392812354, iteration: 102872
loss: 1.0234185457229614,grad_norm: 0.9999990262719529, iteration: 102873
loss: 1.0339386463165283,grad_norm: 0.9999997703795872, iteration: 102874
loss: 0.994489312171936,grad_norm: 0.861179787432526, iteration: 102875
loss: 1.0523431301116943,grad_norm: 0.9999990131141309, iteration: 102876
loss: 1.0219440460205078,grad_norm: 0.9999992126800326, iteration: 102877
loss: 1.03412663936615,grad_norm: 0.9999991151568303, iteration: 102878
loss: 1.005623698234558,grad_norm: 0.9999990988431925, iteration: 102879
loss: 1.1042195558547974,grad_norm: 0.9999998675979203, iteration: 102880
loss: 1.0139579772949219,grad_norm: 0.9999990173779505, iteration: 102881
loss: 1.0739054679870605,grad_norm: 0.9999993000364993, iteration: 102882
loss: 1.0514698028564453,grad_norm: 0.9999997673703315, iteration: 102883
loss: 1.0016412734985352,grad_norm: 0.9999990517853157, iteration: 102884
loss: 1.007895588874817,grad_norm: 0.8681951946068477, iteration: 102885
loss: 0.9703094959259033,grad_norm: 0.8757271833181504, iteration: 102886
loss: 0.9854907393455505,grad_norm: 0.9739402852018361, iteration: 102887
loss: 1.1584585905075073,grad_norm: 0.9850579683982585, iteration: 102888
loss: 1.2415014505386353,grad_norm: 0.9999998794409177, iteration: 102889
loss: 1.1128473281860352,grad_norm: 0.9999991195967806, iteration: 102890
loss: 1.014542579650879,grad_norm: 0.8376767262146643, iteration: 102891
loss: 1.0189176797866821,grad_norm: 0.9999998582460947, iteration: 102892
loss: 1.0056116580963135,grad_norm: 0.9999993645970462, iteration: 102893
loss: 1.0124939680099487,grad_norm: 0.9999992006248064, iteration: 102894
loss: 1.042091727256775,grad_norm: 0.9999990882761199, iteration: 102895
loss: 1.0204423666000366,grad_norm: 0.8628781749577774, iteration: 102896
loss: 1.0664875507354736,grad_norm: 0.8446277431620631, iteration: 102897
loss: 0.9652735590934753,grad_norm: 0.9999993379797372, iteration: 102898
loss: 1.0241519212722778,grad_norm: 0.9999992910952682, iteration: 102899
loss: 1.0340956449508667,grad_norm: 0.9999998480975925, iteration: 102900
loss: 0.9688680768013,grad_norm: 0.9120946802937767, iteration: 102901
loss: 1.034948706626892,grad_norm: 0.9999999650667056, iteration: 102902
loss: 1.0959478616714478,grad_norm: 0.9999999136500785, iteration: 102903
loss: 1.0814061164855957,grad_norm: 0.9999996719512374, iteration: 102904
loss: 1.055735468864441,grad_norm: 0.9999999013177019, iteration: 102905
loss: 1.0630007982254028,grad_norm: 0.9999998618798568, iteration: 102906
loss: 1.145235538482666,grad_norm: 0.9999991250121341, iteration: 102907
loss: 1.0225303173065186,grad_norm: 0.9071624837933826, iteration: 102908
loss: 1.0114582777023315,grad_norm: 0.9999991117283846, iteration: 102909
loss: 0.9334080219268799,grad_norm: 0.8463228620870658, iteration: 102910
loss: 0.972929060459137,grad_norm: 0.8845818423232816, iteration: 102911
loss: 1.124589443206787,grad_norm: 0.9999991427459286, iteration: 102912
loss: 1.0498988628387451,grad_norm: 0.9999991268384877, iteration: 102913
loss: 0.9894492030143738,grad_norm: 0.9619841239427299, iteration: 102914
loss: 1.0428533554077148,grad_norm: 0.9141089900760198, iteration: 102915
loss: 1.0187960863113403,grad_norm: 0.9999990516242405, iteration: 102916
loss: 1.051903486251831,grad_norm: 0.9999991384460547, iteration: 102917
loss: 0.9700295925140381,grad_norm: 0.8985282876745527, iteration: 102918
loss: 1.0207645893096924,grad_norm: 0.9923822307016023, iteration: 102919
loss: 1.041046142578125,grad_norm: 0.9999993845348053, iteration: 102920
loss: 1.1121454238891602,grad_norm: 0.9999993089185222, iteration: 102921
loss: 1.0047420263290405,grad_norm: 0.75009308688201, iteration: 102922
loss: 1.0256179571151733,grad_norm: 0.9999993533990236, iteration: 102923
loss: 0.9769288897514343,grad_norm: 0.9492355448133578, iteration: 102924
loss: 1.0353565216064453,grad_norm: 0.9999990407302654, iteration: 102925
loss: 1.0532581806182861,grad_norm: 0.9999999273501289, iteration: 102926
loss: 1.0028599500656128,grad_norm: 0.8908239789386143, iteration: 102927
loss: 1.0371875762939453,grad_norm: 0.9999995286200255, iteration: 102928
loss: 1.0057575702667236,grad_norm: 0.9097263432258377, iteration: 102929
loss: 0.9944327473640442,grad_norm: 0.8071883384822357, iteration: 102930
loss: 1.0671316385269165,grad_norm: 0.9215640715191935, iteration: 102931
loss: 1.0317909717559814,grad_norm: 0.8361277977354197, iteration: 102932
loss: 1.0381940603256226,grad_norm: 0.8328733115084798, iteration: 102933
loss: 1.0071572065353394,grad_norm: 0.8548292554752662, iteration: 102934
loss: 1.0108684301376343,grad_norm: 0.8668239650192847, iteration: 102935
loss: 1.020129919052124,grad_norm: 0.9123483553885369, iteration: 102936
loss: 0.9969768524169922,grad_norm: 0.9999989791657372, iteration: 102937
loss: 1.0096431970596313,grad_norm: 0.8974763639970397, iteration: 102938
loss: 1.0017859935760498,grad_norm: 0.9999990661167427, iteration: 102939
loss: 1.012907862663269,grad_norm: 0.9087689134772937, iteration: 102940
loss: 1.0368305444717407,grad_norm: 0.8611325214882825, iteration: 102941
loss: 1.0394089221954346,grad_norm: 0.9999998342675582, iteration: 102942
loss: 1.0084872245788574,grad_norm: 0.9123275265471142, iteration: 102943
loss: 0.982428252696991,grad_norm: 0.7901700816243147, iteration: 102944
loss: 0.9909168481826782,grad_norm: 0.9999992434506207, iteration: 102945
loss: 1.0021295547485352,grad_norm: 0.9999991077719591, iteration: 102946
loss: 1.006653904914856,grad_norm: 0.9950969312030876, iteration: 102947
loss: 0.9846054315567017,grad_norm: 0.908050645084538, iteration: 102948
loss: 1.0454096794128418,grad_norm: 0.9189076722930309, iteration: 102949
loss: 0.9934812784194946,grad_norm: 0.8502280504213646, iteration: 102950
loss: 1.0324040651321411,grad_norm: 0.9833388594377316, iteration: 102951
loss: 1.0001436471939087,grad_norm: 0.9429032267401953, iteration: 102952
loss: 0.9774805903434753,grad_norm: 0.999999181870163, iteration: 102953
loss: 1.0126075744628906,grad_norm: 0.8400480415790451, iteration: 102954
loss: 1.0254021883010864,grad_norm: 0.8882558059585387, iteration: 102955
loss: 1.0034798383712769,grad_norm: 0.9999991558734973, iteration: 102956
loss: 1.0159687995910645,grad_norm: 0.9068475421432469, iteration: 102957
loss: 1.0055294036865234,grad_norm: 0.976006972052204, iteration: 102958
loss: 0.9967296123504639,grad_norm: 0.7658661483709694, iteration: 102959
loss: 1.0080124139785767,grad_norm: 0.9607945254378141, iteration: 102960
loss: 1.0931646823883057,grad_norm: 0.9999993544240023, iteration: 102961
loss: 1.0273053646087646,grad_norm: 0.9999992668969504, iteration: 102962
loss: 1.0422223806381226,grad_norm: 0.883399558817868, iteration: 102963
loss: 1.0248980522155762,grad_norm: 0.9162922428931022, iteration: 102964
loss: 0.9849979877471924,grad_norm: 0.7989098164588586, iteration: 102965
loss: 1.0185655355453491,grad_norm: 0.8815304547509496, iteration: 102966
loss: 1.0563459396362305,grad_norm: 0.866026208293442, iteration: 102967
loss: 0.9787839651107788,grad_norm: 0.8970777692383557, iteration: 102968
loss: 0.9855837821960449,grad_norm: 0.897689620454759, iteration: 102969
loss: 0.9666752815246582,grad_norm: 0.8368431949393592, iteration: 102970
loss: 0.9373559355735779,grad_norm: 0.9999990693346079, iteration: 102971
loss: 0.9859932661056519,grad_norm: 0.9662003422265354, iteration: 102972
loss: 1.0361037254333496,grad_norm: 0.9999999677221856, iteration: 102973
loss: 1.0205950736999512,grad_norm: 0.9999989687691565, iteration: 102974
loss: 1.0327974557876587,grad_norm: 0.7904388478166141, iteration: 102975
loss: 1.003490686416626,grad_norm: 0.8828505834018738, iteration: 102976
loss: 1.0143160820007324,grad_norm: 0.9999990928079759, iteration: 102977
loss: 1.0261729955673218,grad_norm: 0.9563003952959955, iteration: 102978
loss: 1.00838303565979,grad_norm: 0.9999989667590087, iteration: 102979
loss: 0.9945720434188843,grad_norm: 0.963987797636776, iteration: 102980
loss: 1.0049052238464355,grad_norm: 0.9239344410887419, iteration: 102981
loss: 0.9658783078193665,grad_norm: 0.7877376014786597, iteration: 102982
loss: 0.9792392253875732,grad_norm: 0.8138651761317802, iteration: 102983
loss: 0.9936373233795166,grad_norm: 0.8108078609971615, iteration: 102984
loss: 1.0055513381958008,grad_norm: 0.9285750138838536, iteration: 102985
loss: 0.9882296323776245,grad_norm: 0.6909858235322524, iteration: 102986
loss: 0.9737521409988403,grad_norm: 0.9898459621913043, iteration: 102987
loss: 1.0017999410629272,grad_norm: 0.8568642945627686, iteration: 102988
loss: 0.9840019941329956,grad_norm: 0.8079217989885151, iteration: 102989
loss: 1.022908329963684,grad_norm: 0.9378567498956376, iteration: 102990
loss: 0.9702061414718628,grad_norm: 0.8656879654443195, iteration: 102991
loss: 1.0009516477584839,grad_norm: 0.9999991969444081, iteration: 102992
loss: 1.0331861972808838,grad_norm: 0.9999995170778786, iteration: 102993
loss: 0.948453426361084,grad_norm: 0.8142736502441391, iteration: 102994
loss: 1.0314741134643555,grad_norm: 0.9170526810058831, iteration: 102995
loss: 1.031450629234314,grad_norm: 0.8560210762754721, iteration: 102996
loss: 1.0417399406433105,grad_norm: 0.9999990918292383, iteration: 102997
loss: 0.9730090498924255,grad_norm: 0.8504425931360546, iteration: 102998
loss: 1.01016366481781,grad_norm: 0.9999990476901868, iteration: 102999
loss: 0.9828308820724487,grad_norm: 0.8694425579381247, iteration: 103000
loss: 1.0903382301330566,grad_norm: 0.9999995100425468, iteration: 103001
loss: 0.9733211994171143,grad_norm: 0.702503309539346, iteration: 103002
loss: 1.043145775794983,grad_norm: 0.9305476041137953, iteration: 103003
loss: 1.0202536582946777,grad_norm: 0.8343960940231963, iteration: 103004
loss: 0.9825465083122253,grad_norm: 0.9999991042582513, iteration: 103005
loss: 0.9681732654571533,grad_norm: 0.9999998270613644, iteration: 103006
loss: 1.0230960845947266,grad_norm: 0.7829285329817187, iteration: 103007
loss: 0.9987487196922302,grad_norm: 0.928769498615805, iteration: 103008
loss: 1.0088937282562256,grad_norm: 0.9999992113738885, iteration: 103009
loss: 0.9728518724441528,grad_norm: 0.9854409209447909, iteration: 103010
loss: 1.0048255920410156,grad_norm: 0.9999990170595121, iteration: 103011
loss: 0.9974232912063599,grad_norm: 0.9999995521918046, iteration: 103012
loss: 0.9962993264198303,grad_norm: 0.99999900655999, iteration: 103013
loss: 1.0645196437835693,grad_norm: 0.9460577520578745, iteration: 103014
loss: 0.9747663736343384,grad_norm: 0.9999991548113737, iteration: 103015
loss: 1.0047221183776855,grad_norm: 0.8177472559790198, iteration: 103016
loss: 1.0165597200393677,grad_norm: 0.7783673968470035, iteration: 103017
loss: 1.0046370029449463,grad_norm: 0.8743484032300513, iteration: 103018
loss: 0.9907294511795044,grad_norm: 0.9999990241571047, iteration: 103019
loss: 0.9936100244522095,grad_norm: 0.9035182413164753, iteration: 103020
loss: 0.9777848124504089,grad_norm: 0.9842817462309141, iteration: 103021
loss: 1.0500375032424927,grad_norm: 0.9999996446042365, iteration: 103022
loss: 0.9820106625556946,grad_norm: 0.9999996238553615, iteration: 103023
loss: 0.9869872331619263,grad_norm: 0.7435380033851339, iteration: 103024
loss: 0.9949723482131958,grad_norm: 0.904991284373777, iteration: 103025
loss: 0.9976744055747986,grad_norm: 0.9999992100971066, iteration: 103026
loss: 1.010610818862915,grad_norm: 0.9945055551731292, iteration: 103027
loss: 0.9751119613647461,grad_norm: 0.9999996294458972, iteration: 103028
loss: 1.0102797746658325,grad_norm: 0.8597991892928972, iteration: 103029
loss: 1.023385763168335,grad_norm: 0.9115117543618958, iteration: 103030
loss: 0.9817001819610596,grad_norm: 0.989449910376179, iteration: 103031
loss: 1.0146793127059937,grad_norm: 0.9999996039976572, iteration: 103032
loss: 0.9861779808998108,grad_norm: 0.8250164707497475, iteration: 103033
loss: 0.9659579396247864,grad_norm: 0.9449762031355328, iteration: 103034
loss: 0.9855933785438538,grad_norm: 0.8181560455498798, iteration: 103035
loss: 0.997471272945404,grad_norm: 0.8223639235816861, iteration: 103036
loss: 1.0307717323303223,grad_norm: 0.7784136939349329, iteration: 103037
loss: 1.0197982788085938,grad_norm: 0.9999991352023456, iteration: 103038
loss: 1.0160324573516846,grad_norm: 0.9433741634286935, iteration: 103039
loss: 1.0203161239624023,grad_norm: 0.8450310790108752, iteration: 103040
loss: 0.9886454939842224,grad_norm: 0.8581761803029705, iteration: 103041
loss: 0.9829772114753723,grad_norm: 0.8159555287427127, iteration: 103042
loss: 1.0298125743865967,grad_norm: 0.9999991274881393, iteration: 103043
loss: 1.018842339515686,grad_norm: 0.999999575932824, iteration: 103044
loss: 1.057735800743103,grad_norm: 0.9999998865650123, iteration: 103045
loss: 1.023482322692871,grad_norm: 0.9999997525633534, iteration: 103046
loss: 0.9888516664505005,grad_norm: 0.9999994540130017, iteration: 103047
loss: 1.0056720972061157,grad_norm: 0.999999156296485, iteration: 103048
loss: 0.9866807460784912,grad_norm: 0.9788738447412532, iteration: 103049
loss: 1.0172914266586304,grad_norm: 0.9999997502205559, iteration: 103050
loss: 1.014337420463562,grad_norm: 0.9999992073195109, iteration: 103051
loss: 0.9944875836372375,grad_norm: 0.9999992412828801, iteration: 103052
loss: 0.9810068607330322,grad_norm: 0.9201784231053861, iteration: 103053
loss: 0.9811140298843384,grad_norm: 0.99999917808684, iteration: 103054
loss: 0.9978725910186768,grad_norm: 0.8846420057160109, iteration: 103055
loss: 1.020737886428833,grad_norm: 0.8484807263600781, iteration: 103056
loss: 0.9914105534553528,grad_norm: 0.9981599258419718, iteration: 103057
loss: 1.0409826040267944,grad_norm: 0.9999998137878588, iteration: 103058
loss: 0.9625967144966125,grad_norm: 0.9537145191962204, iteration: 103059
loss: 0.9789080023765564,grad_norm: 0.9910122582603142, iteration: 103060
loss: 1.037796139717102,grad_norm: 0.9999993264237137, iteration: 103061
loss: 0.9870755076408386,grad_norm: 0.7933925839295771, iteration: 103062
loss: 0.9717888832092285,grad_norm: 0.9999991198968056, iteration: 103063
loss: 1.023022174835205,grad_norm: 0.8521127376540595, iteration: 103064
loss: 0.9889100193977356,grad_norm: 0.9404124328376883, iteration: 103065
loss: 1.0270671844482422,grad_norm: 0.9999989564599586, iteration: 103066
loss: 0.9863130450248718,grad_norm: 0.8717810877630251, iteration: 103067
loss: 0.9999039173126221,grad_norm: 0.8055403580639809, iteration: 103068
loss: 0.9957677125930786,grad_norm: 0.8553075123484326, iteration: 103069
loss: 1.0464916229248047,grad_norm: 0.9999994674437793, iteration: 103070
loss: 1.0350244045257568,grad_norm: 0.9999989983413006, iteration: 103071
loss: 1.0021475553512573,grad_norm: 0.9999991710618508, iteration: 103072
loss: 1.0720322132110596,grad_norm: 0.999999362850967, iteration: 103073
loss: 1.00880765914917,grad_norm: 0.999999204207238, iteration: 103074
loss: 1.0296630859375,grad_norm: 0.7076962304857153, iteration: 103075
loss: 0.9801334142684937,grad_norm: 0.8674299524839788, iteration: 103076
loss: 1.041351318359375,grad_norm: 0.9999997848330019, iteration: 103077
loss: 0.9431515336036682,grad_norm: 0.9511930642709344, iteration: 103078
loss: 1.0250447988510132,grad_norm: 0.9649650555639047, iteration: 103079
loss: 1.0029280185699463,grad_norm: 0.9999996404844576, iteration: 103080
loss: 0.996472954750061,grad_norm: 0.9999996872932484, iteration: 103081
loss: 1.0171725749969482,grad_norm: 0.9712067615100285, iteration: 103082
loss: 1.1583378314971924,grad_norm: 0.9999996737100124, iteration: 103083
loss: 0.9959160089492798,grad_norm: 0.8458271141623388, iteration: 103084
loss: 1.085080623626709,grad_norm: 0.9999995109213354, iteration: 103085
loss: 1.0026174783706665,grad_norm: 0.9999994344999016, iteration: 103086
loss: 0.9818481206893921,grad_norm: 0.8157775794578066, iteration: 103087
loss: 0.9989272952079773,grad_norm: 0.9276359456180852, iteration: 103088
loss: 0.9940269589424133,grad_norm: 0.9965401581205807, iteration: 103089
loss: 0.9929249882698059,grad_norm: 0.8247090928520308, iteration: 103090
loss: 0.9880213737487793,grad_norm: 0.8873258471287562, iteration: 103091
loss: 1.005958080291748,grad_norm: 0.9999990093681077, iteration: 103092
loss: 0.9889416694641113,grad_norm: 0.8291394311405033, iteration: 103093
loss: 1.0149531364440918,grad_norm: 0.8738126119124203, iteration: 103094
loss: 0.9864569306373596,grad_norm: 0.9687391732569524, iteration: 103095
loss: 1.0093754529953003,grad_norm: 0.9999996621981047, iteration: 103096
loss: 1.017048716545105,grad_norm: 0.9999992418883694, iteration: 103097
loss: 1.0269427299499512,grad_norm: 0.9751463322702112, iteration: 103098
loss: 1.0384231805801392,grad_norm: 0.8175964925602136, iteration: 103099
loss: 0.9897822141647339,grad_norm: 0.9999992813277631, iteration: 103100
loss: 1.0676826238632202,grad_norm: 0.999999498452101, iteration: 103101
loss: 0.990824282169342,grad_norm: 0.8586856001814483, iteration: 103102
loss: 0.9956921935081482,grad_norm: 0.8333525411323415, iteration: 103103
loss: 1.018673300743103,grad_norm: 0.9792942956595712, iteration: 103104
loss: 0.9943860173225403,grad_norm: 0.9669612955723864, iteration: 103105
loss: 1.0307722091674805,grad_norm: 0.9691065213488128, iteration: 103106
loss: 1.0351866483688354,grad_norm: 0.6195086303506496, iteration: 103107
loss: 0.983245313167572,grad_norm: 0.8893758682842737, iteration: 103108
loss: 1.0523914098739624,grad_norm: 0.999999206111024, iteration: 103109
loss: 0.994604766368866,grad_norm: 0.8530163143082129, iteration: 103110
loss: 0.9593104720115662,grad_norm: 0.9999991168100848, iteration: 103111
loss: 0.9684755206108093,grad_norm: 0.8111713166149326, iteration: 103112
loss: 1.0404675006866455,grad_norm: 0.9999992275165231, iteration: 103113
loss: 0.9890915155410767,grad_norm: 0.872308397024246, iteration: 103114
loss: 1.0143444538116455,grad_norm: 0.8889525816614946, iteration: 103115
loss: 0.9931132197380066,grad_norm: 0.9999992003959356, iteration: 103116
loss: 1.0165839195251465,grad_norm: 0.8471802045630097, iteration: 103117
loss: 1.012399673461914,grad_norm: 0.9999997582991561, iteration: 103118
loss: 1.0406304597854614,grad_norm: 0.9433934676644128, iteration: 103119
loss: 0.9587791562080383,grad_norm: 0.9710504669940149, iteration: 103120
loss: 1.0048282146453857,grad_norm: 0.8142799630473057, iteration: 103121
loss: 1.0370733737945557,grad_norm: 0.9999998328547091, iteration: 103122
loss: 0.9886745810508728,grad_norm: 0.9999997069124451, iteration: 103123
loss: 0.979637622833252,grad_norm: 0.9767215555924543, iteration: 103124
loss: 1.0197218656539917,grad_norm: 0.9079749800488974, iteration: 103125
loss: 1.0645114183425903,grad_norm: 0.8288447925154381, iteration: 103126
loss: 1.0084766149520874,grad_norm: 0.9999994598357593, iteration: 103127
loss: 0.9812077879905701,grad_norm: 0.7920638738120139, iteration: 103128
loss: 0.9929413795471191,grad_norm: 0.8828234359440281, iteration: 103129
loss: 1.0121384859085083,grad_norm: 0.9443074282503713, iteration: 103130
loss: 1.0063992738723755,grad_norm: 0.9999997241972489, iteration: 103131
loss: 1.0238415002822876,grad_norm: 0.8611559956864441, iteration: 103132
loss: 1.0037699937820435,grad_norm: 0.8205349037069568, iteration: 103133
loss: 1.0169768333435059,grad_norm: 0.9999993761748698, iteration: 103134
loss: 0.995807945728302,grad_norm: 0.9999989451858626, iteration: 103135
loss: 1.0689653158187866,grad_norm: 0.9999996089254091, iteration: 103136
loss: 1.0209640264511108,grad_norm: 0.9999989797033763, iteration: 103137
loss: 1.0058022737503052,grad_norm: 0.999999116105918, iteration: 103138
loss: 0.9874916672706604,grad_norm: 0.9461978722127972, iteration: 103139
loss: 1.0120221376419067,grad_norm: 0.8328640722162214, iteration: 103140
loss: 1.0163884162902832,grad_norm: 0.9999998822486866, iteration: 103141
loss: 1.0265306234359741,grad_norm: 0.9999994767905376, iteration: 103142
loss: 1.0266358852386475,grad_norm: 0.7511528672872309, iteration: 103143
loss: 0.9997743964195251,grad_norm: 0.999999128977329, iteration: 103144
loss: 0.9734328389167786,grad_norm: 0.8054300247402305, iteration: 103145
loss: 0.9732657074928284,grad_norm: 0.7359845154123216, iteration: 103146
loss: 1.0403481721878052,grad_norm: 0.7454107997461784, iteration: 103147
loss: 0.9972459673881531,grad_norm: 0.9739461954286954, iteration: 103148
loss: 0.9804251790046692,grad_norm: 0.999999494018033, iteration: 103149
loss: 1.0031983852386475,grad_norm: 0.9999991757666457, iteration: 103150
loss: 0.9488134980201721,grad_norm: 0.9999990742350586, iteration: 103151
loss: 0.9862242341041565,grad_norm: 0.8374905372812785, iteration: 103152
loss: 1.0451327562332153,grad_norm: 0.7961446654622168, iteration: 103153
loss: 0.9940023422241211,grad_norm: 0.8867446986735652, iteration: 103154
loss: 1.0027568340301514,grad_norm: 0.9999993137717104, iteration: 103155
loss: 1.0087611675262451,grad_norm: 0.8328635058501176, iteration: 103156
loss: 0.9998433589935303,grad_norm: 0.9999990709713388, iteration: 103157
loss: 0.9893053770065308,grad_norm: 0.8671642347926255, iteration: 103158
loss: 1.0026746988296509,grad_norm: 0.8060445043506002, iteration: 103159
loss: 1.0873548984527588,grad_norm: 0.9999991889271752, iteration: 103160
loss: 1.020038366317749,grad_norm: 0.9999996230539551, iteration: 103161
loss: 1.0166714191436768,grad_norm: 0.9999998353078322, iteration: 103162
loss: 0.9970816969871521,grad_norm: 0.9999991353821507, iteration: 103163
loss: 0.9605146646499634,grad_norm: 0.9249819519374435, iteration: 103164
loss: 1.020750880241394,grad_norm: 0.996897462224135, iteration: 103165
loss: 0.9982539415359497,grad_norm: 0.9999995435853943, iteration: 103166
loss: 0.9717294573783875,grad_norm: 0.8367634164028994, iteration: 103167
loss: 0.9846046566963196,grad_norm: 0.8176172895899138, iteration: 103168
loss: 1.0201442241668701,grad_norm: 0.9999991832769966, iteration: 103169
loss: 0.964932382106781,grad_norm: 0.9999990106909059, iteration: 103170
loss: 0.9949154853820801,grad_norm: 0.7111832562795717, iteration: 103171
loss: 0.9970284700393677,grad_norm: 0.999999127941724, iteration: 103172
loss: 1.0144329071044922,grad_norm: 0.9999992375766951, iteration: 103173
loss: 0.9470571279525757,grad_norm: 0.9999990897293942, iteration: 103174
loss: 1.0264854431152344,grad_norm: 0.8332846040305267, iteration: 103175
loss: 1.117191195487976,grad_norm: 0.9999998826966098, iteration: 103176
loss: 0.9684218764305115,grad_norm: 0.8385440067423496, iteration: 103177
loss: 1.0184787511825562,grad_norm: 0.999999926493075, iteration: 103178
loss: 0.9835836887359619,grad_norm: 0.8858981742338226, iteration: 103179
loss: 1.054797887802124,grad_norm: 0.9999992063946017, iteration: 103180
loss: 0.9920654892921448,grad_norm: 0.8238956522666743, iteration: 103181
loss: 1.0923975706100464,grad_norm: 0.9999997946504575, iteration: 103182
loss: 1.0473154783248901,grad_norm: 0.9999994337297069, iteration: 103183
loss: 0.996042788028717,grad_norm: 0.8708655136386324, iteration: 103184
loss: 1.0016429424285889,grad_norm: 0.9999990994138988, iteration: 103185
loss: 0.9827201962471008,grad_norm: 0.9264991646698039, iteration: 103186
loss: 1.0014746189117432,grad_norm: 0.9826159990200148, iteration: 103187
loss: 0.9813875555992126,grad_norm: 0.9999989022000302, iteration: 103188
loss: 1.0287437438964844,grad_norm: 0.7942806896057941, iteration: 103189
loss: 0.9764630794525146,grad_norm: 0.8400440266249628, iteration: 103190
loss: 1.0307822227478027,grad_norm: 0.9999999062196523, iteration: 103191
loss: 1.020000696182251,grad_norm: 0.8986344751716914, iteration: 103192
loss: 0.9897493124008179,grad_norm: 0.968307626998329, iteration: 103193
loss: 1.0123462677001953,grad_norm: 0.9699617931588798, iteration: 103194
loss: 0.994513750076294,grad_norm: 0.9153142282105133, iteration: 103195
loss: 1.0262986421585083,grad_norm: 0.839831991545227, iteration: 103196
loss: 1.00790536403656,grad_norm: 0.9999990807602107, iteration: 103197
loss: 1.0898395776748657,grad_norm: 0.9999991049837766, iteration: 103198
loss: 1.0971872806549072,grad_norm: 0.9999998266156691, iteration: 103199
loss: 1.1481810808181763,grad_norm: 0.9999998621991474, iteration: 103200
loss: 1.0930440425872803,grad_norm: 0.9999996097245001, iteration: 103201
loss: 1.0360774993896484,grad_norm: 0.9999993851960061, iteration: 103202
loss: 1.0307973623275757,grad_norm: 0.9547879101776503, iteration: 103203
loss: 1.1148878335952759,grad_norm: 0.9999999697046448, iteration: 103204
loss: 1.019580364227295,grad_norm: 0.9999993862600863, iteration: 103205
loss: 1.0646034479141235,grad_norm: 0.8796198637776875, iteration: 103206
loss: 0.9640136957168579,grad_norm: 0.9999991121163087, iteration: 103207
loss: 1.0112632513046265,grad_norm: 0.9724802015071952, iteration: 103208
loss: 1.0344977378845215,grad_norm: 0.9999994813207059, iteration: 103209
loss: 1.0352274179458618,grad_norm: 0.9999991028805312, iteration: 103210
loss: 1.2327286005020142,grad_norm: 0.9999997716506396, iteration: 103211
loss: 1.0123109817504883,grad_norm: 0.939509048723077, iteration: 103212
loss: 1.107422947883606,grad_norm: 1.0000000168883176, iteration: 103213
loss: 1.0715690851211548,grad_norm: 0.9999991994990118, iteration: 103214
loss: 1.0196013450622559,grad_norm: 0.9998439720297793, iteration: 103215
loss: 1.058354377746582,grad_norm: 0.999999839757022, iteration: 103216
loss: 0.9807182550430298,grad_norm: 0.9999991873625187, iteration: 103217
loss: 1.0053093433380127,grad_norm: 0.9999990590737851, iteration: 103218
loss: 1.0110156536102295,grad_norm: 0.8221110118021017, iteration: 103219
loss: 1.0382517576217651,grad_norm: 0.9999999294784938, iteration: 103220
loss: 1.4187588691711426,grad_norm: 0.9999995863877088, iteration: 103221
loss: 1.0234599113464355,grad_norm: 0.9999991177442208, iteration: 103222
loss: 1.050902247428894,grad_norm: 0.9999990255283759, iteration: 103223
loss: 0.9816498756408691,grad_norm: 0.8515515312440731, iteration: 103224
loss: 1.1109493970870972,grad_norm: 0.9999999063103169, iteration: 103225
loss: 0.9943682551383972,grad_norm: 0.9999994072821862, iteration: 103226
loss: 1.0311118364334106,grad_norm: 0.9903356865865524, iteration: 103227
loss: 1.0636156797409058,grad_norm: 0.9999996442319452, iteration: 103228
loss: 1.0539554357528687,grad_norm: 0.9794291800376032, iteration: 103229
loss: 1.026816964149475,grad_norm: 0.9999993824149448, iteration: 103230
loss: 1.0137946605682373,grad_norm: 0.8920504269786481, iteration: 103231
loss: 1.1363998651504517,grad_norm: 0.9999997977048743, iteration: 103232
loss: 1.0162794589996338,grad_norm: 0.744687421239119, iteration: 103233
loss: 1.062929391860962,grad_norm: 0.999999206512905, iteration: 103234
loss: 1.0367107391357422,grad_norm: 0.9999992298155511, iteration: 103235
loss: 0.9987156987190247,grad_norm: 0.9144880699644284, iteration: 103236
loss: 1.0590485334396362,grad_norm: 0.9999998548144442, iteration: 103237
loss: 1.073154091835022,grad_norm: 0.9999996427365322, iteration: 103238
loss: 1.1351330280303955,grad_norm: 0.9999996926668041, iteration: 103239
loss: 1.0887784957885742,grad_norm: 0.999999715992764, iteration: 103240
loss: 1.0438432693481445,grad_norm: 0.9999995478075315, iteration: 103241
loss: 1.3351043462753296,grad_norm: 0.9999997411967556, iteration: 103242
loss: 1.0055396556854248,grad_norm: 0.9999993183080781, iteration: 103243
loss: 1.229063630104065,grad_norm: 1.0000000885822302, iteration: 103244
loss: 1.0489506721496582,grad_norm: 0.9999997642974656, iteration: 103245
loss: 1.1058356761932373,grad_norm: 0.9999994066825593, iteration: 103246
loss: 1.0880454778671265,grad_norm: 0.999999680072038, iteration: 103247
loss: 1.1084372997283936,grad_norm: 0.9999991551315349, iteration: 103248
loss: 1.0251237154006958,grad_norm: 0.9999990402030839, iteration: 103249
loss: 0.9824705719947815,grad_norm: 0.9999992051807071, iteration: 103250
loss: 0.9809728264808655,grad_norm: 0.9815667610850773, iteration: 103251
loss: 1.3344991207122803,grad_norm: 0.9999997169472691, iteration: 103252
loss: 1.0813416242599487,grad_norm: 0.999999880680794, iteration: 103253
loss: 1.1560884714126587,grad_norm: 0.9999994925844881, iteration: 103254
loss: 1.139230489730835,grad_norm: 0.999999404066746, iteration: 103255
loss: 1.124718427658081,grad_norm: 0.9999996925885677, iteration: 103256
loss: 1.1812140941619873,grad_norm: 1.0000000148284256, iteration: 103257
loss: 1.082034707069397,grad_norm: 0.9999997322899646, iteration: 103258
loss: 1.067277431488037,grad_norm: 0.9999998847812731, iteration: 103259
loss: 1.0422781705856323,grad_norm: 0.8991829876519769, iteration: 103260
loss: 0.9962595105171204,grad_norm: 0.9999991255979092, iteration: 103261
loss: 1.1185601949691772,grad_norm: 0.9999997379664024, iteration: 103262
loss: 1.1140005588531494,grad_norm: 0.9999998897856185, iteration: 103263
loss: 1.0748599767684937,grad_norm: 0.9999999283747477, iteration: 103264
loss: 0.9908999800682068,grad_norm: 0.7646136365345306, iteration: 103265
loss: 1.2708638906478882,grad_norm: 1.0000000196080159, iteration: 103266
loss: 1.082453727722168,grad_norm: 0.9999997634256852, iteration: 103267
loss: 1.1579934358596802,grad_norm: 0.9999993616528189, iteration: 103268
loss: 1.0333311557769775,grad_norm: 0.999999357576527, iteration: 103269
loss: 1.0493104457855225,grad_norm: 0.9999997657202635, iteration: 103270
loss: 1.0534546375274658,grad_norm: 0.9999995091156995, iteration: 103271
loss: 0.9944801926612854,grad_norm: 0.8920279645207957, iteration: 103272
loss: 1.0510039329528809,grad_norm: 0.9999992130674026, iteration: 103273
loss: 1.1108604669570923,grad_norm: 0.9999995293004442, iteration: 103274
loss: 0.9803112745285034,grad_norm: 0.9538834933620346, iteration: 103275
loss: 1.0992904901504517,grad_norm: 0.9999998495119345, iteration: 103276
loss: 1.1976460218429565,grad_norm: 0.9999995406837188, iteration: 103277
loss: 1.2587356567382812,grad_norm: 0.9999995516448105, iteration: 103278
loss: 1.1005858182907104,grad_norm: 0.9999994875889807, iteration: 103279
loss: 1.1888744831085205,grad_norm: 0.9999996820029811, iteration: 103280
loss: 1.2190051078796387,grad_norm: 0.9999999285710253, iteration: 103281
loss: 1.226540207862854,grad_norm: 0.9999994237535862, iteration: 103282
loss: 1.082531213760376,grad_norm: 0.9999990073257988, iteration: 103283
loss: 1.1135585308074951,grad_norm: 0.9999995998024469, iteration: 103284
loss: 1.061408281326294,grad_norm: 0.999999482054942, iteration: 103285
loss: 1.0556315183639526,grad_norm: 0.9999991024577688, iteration: 103286
loss: 1.2508220672607422,grad_norm: 0.9999998867391217, iteration: 103287
loss: 1.0109081268310547,grad_norm: 0.9999992210632367, iteration: 103288
loss: 1.1290075778961182,grad_norm: 0.9999999231498635, iteration: 103289
loss: 1.124862790107727,grad_norm: 0.9999998535054254, iteration: 103290
loss: 1.1119372844696045,grad_norm: 0.999999812399845, iteration: 103291
loss: 1.0631917715072632,grad_norm: 0.9999991822580125, iteration: 103292
loss: 1.028916597366333,grad_norm: 0.9999993598158848, iteration: 103293
loss: 0.9723976850509644,grad_norm: 0.9999992749433901, iteration: 103294
loss: 0.9985547065734863,grad_norm: 0.9999997869463033, iteration: 103295
loss: 1.1074765920639038,grad_norm: 0.9999993071266864, iteration: 103296
loss: 1.2375046014785767,grad_norm: 0.9999997251004668, iteration: 103297
loss: 1.2435969114303589,grad_norm: 0.999999864423269, iteration: 103298
loss: 1.075178623199463,grad_norm: 0.9999997141756035, iteration: 103299
loss: 1.0655457973480225,grad_norm: 0.9999996380926715, iteration: 103300
loss: 1.0448787212371826,grad_norm: 0.9999994701485759, iteration: 103301
loss: 1.011654019355774,grad_norm: 0.9199957649817123, iteration: 103302
loss: 1.240893006324768,grad_norm: 0.9999994910391178, iteration: 103303
loss: 1.1331850290298462,grad_norm: 0.9999991106741538, iteration: 103304
loss: 1.005159616470337,grad_norm: 0.999999655490123, iteration: 103305
loss: 1.262245535850525,grad_norm: 0.9999998911516321, iteration: 103306
loss: 1.025014042854309,grad_norm: 0.9739348750526292, iteration: 103307
loss: 1.0146210193634033,grad_norm: 0.9999995141418762, iteration: 103308
loss: 0.9842937588691711,grad_norm: 0.9999991496253231, iteration: 103309
loss: 1.1637049913406372,grad_norm: 0.9999994672045652, iteration: 103310
loss: 1.0316903591156006,grad_norm: 0.999999120749693, iteration: 103311
loss: 1.0012387037277222,grad_norm: 0.9999997179706395, iteration: 103312
loss: 1.1465656757354736,grad_norm: 0.9999993132489002, iteration: 103313
loss: 1.1574747562408447,grad_norm: 0.9999991095378858, iteration: 103314
loss: 1.0610376596450806,grad_norm: 0.9999991433036154, iteration: 103315
loss: 1.0414140224456787,grad_norm: 0.9789541487898754, iteration: 103316
loss: 0.9980806112289429,grad_norm: 0.8060289208207355, iteration: 103317
loss: 1.169573426246643,grad_norm: 0.9999994795773796, iteration: 103318
loss: 1.0457249879837036,grad_norm: 0.9999994291768209, iteration: 103319
loss: 1.1398723125457764,grad_norm: 0.99999971836894, iteration: 103320
loss: 1.2772676944732666,grad_norm: 0.999999815083409, iteration: 103321
loss: 1.2134591341018677,grad_norm: 0.9999998037064952, iteration: 103322
loss: 1.1352108716964722,grad_norm: 0.9999995692219512, iteration: 103323
loss: 1.030332326889038,grad_norm: 0.999999271671072, iteration: 103324
loss: 1.0802853107452393,grad_norm: 0.9999998599313351, iteration: 103325
loss: 1.016544222831726,grad_norm: 0.9999992053939112, iteration: 103326
loss: 1.0245627164840698,grad_norm: 0.9999996653618576, iteration: 103327
loss: 1.05832839012146,grad_norm: 0.864461402707407, iteration: 103328
loss: 1.055117130279541,grad_norm: 0.99999911455947, iteration: 103329
loss: 1.0022492408752441,grad_norm: 0.9999995221865844, iteration: 103330
loss: 0.9806435108184814,grad_norm: 0.9999991006827501, iteration: 103331
loss: 0.9893398880958557,grad_norm: 0.9999991432622204, iteration: 103332
loss: 1.094110369682312,grad_norm: 0.9999998039993281, iteration: 103333
loss: 1.1863054037094116,grad_norm: 0.9999995916227971, iteration: 103334
loss: 1.0169875621795654,grad_norm: 0.9999994309330027, iteration: 103335
loss: 1.243428349494934,grad_norm: 0.9999999378131817, iteration: 103336
loss: 0.9851862788200378,grad_norm: 0.9027872514610581, iteration: 103337
loss: 1.0848898887634277,grad_norm: 0.8735589881736419, iteration: 103338
loss: 1.085405707359314,grad_norm: 0.835582139784971, iteration: 103339
loss: 0.9711800217628479,grad_norm: 0.8621954329725321, iteration: 103340
loss: 1.0131821632385254,grad_norm: 0.9999990329505176, iteration: 103341
loss: 1.0801548957824707,grad_norm: 0.999999574701927, iteration: 103342
loss: 1.206191897392273,grad_norm: 0.9999995202319567, iteration: 103343
loss: 1.1317564249038696,grad_norm: 0.9999999179052764, iteration: 103344
loss: 1.0235649347305298,grad_norm: 0.9999996516327289, iteration: 103345
loss: 1.078298568725586,grad_norm: 0.9999993177647128, iteration: 103346
loss: 1.044159173965454,grad_norm: 0.9999998954462803, iteration: 103347
loss: 1.1976850032806396,grad_norm: 0.9999998124619991, iteration: 103348
loss: 1.0865674018859863,grad_norm: 0.999999630244901, iteration: 103349
loss: 1.0721980333328247,grad_norm: 0.9999992799449318, iteration: 103350
loss: 1.073663592338562,grad_norm: 0.999999797788067, iteration: 103351
loss: 1.073618769645691,grad_norm: 0.9999995582149533, iteration: 103352
loss: 1.093793272972107,grad_norm: 0.9999994553283627, iteration: 103353
loss: 1.1009799242019653,grad_norm: 0.9999992715994437, iteration: 103354
loss: 0.9934549927711487,grad_norm: 0.9008510702206479, iteration: 103355
loss: 1.0891355276107788,grad_norm: 0.9999997128805493, iteration: 103356
loss: 1.0510544776916504,grad_norm: 0.9999997377667107, iteration: 103357
loss: 1.0309115648269653,grad_norm: 0.9999995597888293, iteration: 103358
loss: 1.0549057722091675,grad_norm: 0.9999994076540173, iteration: 103359
loss: 1.0197811126708984,grad_norm: 0.9999990594939681, iteration: 103360
loss: 1.0604628324508667,grad_norm: 0.9999996032191605, iteration: 103361
loss: 1.007376790046692,grad_norm: 0.999999195056035, iteration: 103362
loss: 1.0389325618743896,grad_norm: 0.9999991074698005, iteration: 103363
loss: 1.0873892307281494,grad_norm: 0.9999998149149307, iteration: 103364
loss: 1.170698881149292,grad_norm: 0.9999998325612494, iteration: 103365
loss: 1.05683171749115,grad_norm: 0.9999998864699665, iteration: 103366
loss: 1.3118443489074707,grad_norm: 0.9999996901164218, iteration: 103367
loss: 1.1795483827590942,grad_norm: 0.9999999169330306, iteration: 103368
loss: 0.9805271625518799,grad_norm: 0.9999998759522583, iteration: 103369
loss: 1.3383288383483887,grad_norm: 0.9999999832898019, iteration: 103370
loss: 1.0683196783065796,grad_norm: 1.0000000614500695, iteration: 103371
loss: 1.0740078687667847,grad_norm: 0.9999993190835772, iteration: 103372
loss: 1.3489272594451904,grad_norm: 0.999999780895069, iteration: 103373
loss: 1.3027608394622803,grad_norm: 0.999999709092511, iteration: 103374
loss: 1.1328948736190796,grad_norm: 0.9999999455961514, iteration: 103375
loss: 1.0173410177230835,grad_norm: 0.9999997182320778, iteration: 103376
loss: 1.2800610065460205,grad_norm: 0.999999907498499, iteration: 103377
loss: 1.1708064079284668,grad_norm: 0.9999998184545772, iteration: 103378
loss: 0.9706655740737915,grad_norm: 0.9999995212174138, iteration: 103379
loss: 1.330885887145996,grad_norm: 0.9999995977545134, iteration: 103380
loss: 1.0728987455368042,grad_norm: 1.0000000189616074, iteration: 103381
loss: 0.9794467687606812,grad_norm: 0.8666308516907908, iteration: 103382
loss: 1.0606745481491089,grad_norm: 0.9999996964665653, iteration: 103383
loss: 1.0208752155303955,grad_norm: 0.9999992365667697, iteration: 103384
loss: 1.3937755823135376,grad_norm: 0.9999998982594571, iteration: 103385
loss: 1.011738896369934,grad_norm: 0.9999997719735847, iteration: 103386
loss: 1.0678564310073853,grad_norm: 0.9999998145665963, iteration: 103387
loss: 1.3236380815505981,grad_norm: 0.9999997816547658, iteration: 103388
loss: 1.1466145515441895,grad_norm: 0.9999999992534099, iteration: 103389
loss: 1.3484123945236206,grad_norm: 0.9999999754844987, iteration: 103390
loss: 1.1729143857955933,grad_norm: 0.9999998174060091, iteration: 103391
loss: 0.9863225817680359,grad_norm: 0.9999996390127026, iteration: 103392
loss: 1.005352258682251,grad_norm: 0.9212627986870979, iteration: 103393
loss: 1.0413286685943604,grad_norm: 0.9999992506701995, iteration: 103394
loss: 1.061686396598816,grad_norm: 0.9806672011356014, iteration: 103395
loss: 1.542185664176941,grad_norm: 0.9999999597838173, iteration: 103396
loss: 1.2996002435684204,grad_norm: 0.999999789541488, iteration: 103397
loss: 1.098219633102417,grad_norm: 0.9999991737749865, iteration: 103398
loss: 0.9969541430473328,grad_norm: 0.9999995559246678, iteration: 103399
loss: 1.060326099395752,grad_norm: 0.9981828592708262, iteration: 103400
loss: 1.1067289113998413,grad_norm: 0.9999997602267454, iteration: 103401
loss: 1.097344994544983,grad_norm: 0.9999996923481568, iteration: 103402
loss: 1.0388202667236328,grad_norm: 1.0000000408721204, iteration: 103403
loss: 1.0749969482421875,grad_norm: 0.9999993673084648, iteration: 103404
loss: 1.0633355379104614,grad_norm: 0.9999992358553675, iteration: 103405
loss: 1.0456949472427368,grad_norm: 0.8420598012211608, iteration: 103406
loss: 1.2411190271377563,grad_norm: 0.999999938737268, iteration: 103407
loss: 1.2881983518600464,grad_norm: 0.9999998014636192, iteration: 103408
loss: 1.0026801824569702,grad_norm: 0.983154753596715, iteration: 103409
loss: 1.0229480266571045,grad_norm: 0.9802318745153029, iteration: 103410
loss: 1.036283016204834,grad_norm: 0.9999994032845745, iteration: 103411
loss: 1.2217530012130737,grad_norm: 0.9999998213657506, iteration: 103412
loss: 1.0056415796279907,grad_norm: 0.9999991826520441, iteration: 103413
loss: 1.0443034172058105,grad_norm: 0.9999996509944817, iteration: 103414
loss: 1.0076305866241455,grad_norm: 0.9999990756579563, iteration: 103415
loss: 1.029026746749878,grad_norm: 0.9999990446193393, iteration: 103416
loss: 1.0841354131698608,grad_norm: 0.9999997001004612, iteration: 103417
loss: 0.9746065735816956,grad_norm: 0.8678376315560417, iteration: 103418
loss: 0.9880710244178772,grad_norm: 0.8305900156010665, iteration: 103419
loss: 1.264411449432373,grad_norm: 0.9999997779499642, iteration: 103420
loss: 1.1261953115463257,grad_norm: 0.9999994072605958, iteration: 103421
loss: 1.0026148557662964,grad_norm: 0.9205628989947203, iteration: 103422
loss: 0.9952036738395691,grad_norm: 0.999999152216227, iteration: 103423
loss: 0.9992527961730957,grad_norm: 0.9499782745123652, iteration: 103424
loss: 1.0491440296173096,grad_norm: 0.9999994794511755, iteration: 103425
loss: 1.141299843788147,grad_norm: 0.9999992263189759, iteration: 103426
loss: 1.1016701459884644,grad_norm: 0.9999998627372712, iteration: 103427
loss: 1.0184404850006104,grad_norm: 0.9962996316467214, iteration: 103428
loss: 1.2176955938339233,grad_norm: 0.9999996452927076, iteration: 103429
loss: 1.1026363372802734,grad_norm: 0.9999995553329826, iteration: 103430
loss: 1.0186662673950195,grad_norm: 0.9999991978018095, iteration: 103431
loss: 1.0859181880950928,grad_norm: 0.999999967166908, iteration: 103432
loss: 1.0837478637695312,grad_norm: 0.9999996772464176, iteration: 103433
loss: 1.0815088748931885,grad_norm: 0.9108832754875221, iteration: 103434
loss: 1.0467033386230469,grad_norm: 0.9999990816725682, iteration: 103435
loss: 1.0258651971817017,grad_norm: 0.9999995272711961, iteration: 103436
loss: 1.0587544441223145,grad_norm: 0.9999993507619673, iteration: 103437
loss: 1.109245777130127,grad_norm: 0.9999993301800012, iteration: 103438
loss: 1.0406296253204346,grad_norm: 0.9999994075626791, iteration: 103439
loss: 1.041931390762329,grad_norm: 0.9999999582883865, iteration: 103440
loss: 1.014058232307434,grad_norm: 0.8174479121783469, iteration: 103441
loss: 1.0602155923843384,grad_norm: 0.9999996565101148, iteration: 103442
loss: 1.1165775060653687,grad_norm: 0.9999996489970641, iteration: 103443
loss: 1.0220259428024292,grad_norm: 0.8803165492460977, iteration: 103444
loss: 1.066475749015808,grad_norm: 0.9999992746188122, iteration: 103445
loss: 1.028937578201294,grad_norm: 0.9999998154673478, iteration: 103446
loss: 1.1798855066299438,grad_norm: 0.9999997864734534, iteration: 103447
loss: 1.1050256490707397,grad_norm: 0.9999993419137347, iteration: 103448
loss: 1.0994086265563965,grad_norm: 0.9999993472392805, iteration: 103449
loss: 0.9932575225830078,grad_norm: 0.9999995950022935, iteration: 103450
loss: 1.0174013376235962,grad_norm: 0.9999999188330392, iteration: 103451
loss: 1.0662648677825928,grad_norm: 0.8646677821591204, iteration: 103452
loss: 1.118747353553772,grad_norm: 0.9999991846531496, iteration: 103453
loss: 1.0218886137008667,grad_norm: 0.8459539408481929, iteration: 103454
loss: 1.2245635986328125,grad_norm: 0.9999996210171166, iteration: 103455
loss: 1.166479468345642,grad_norm: 0.9999998137301117, iteration: 103456
loss: 1.4492424726486206,grad_norm: 0.999999938274244, iteration: 103457
loss: 1.0629703998565674,grad_norm: 0.9999996345889488, iteration: 103458
loss: 1.33375084400177,grad_norm: 0.9999994625186881, iteration: 103459
loss: 1.245543122291565,grad_norm: 0.9999997027292112, iteration: 103460
loss: 1.0451220273971558,grad_norm: 0.9018298153924043, iteration: 103461
loss: 1.070503830909729,grad_norm: 0.9999991816756868, iteration: 103462
loss: 1.199216365814209,grad_norm: 0.9999998975058274, iteration: 103463
loss: 1.0265401601791382,grad_norm: 0.9999991147313411, iteration: 103464
loss: 1.0375959873199463,grad_norm: 0.9999990803853577, iteration: 103465
loss: 1.1463050842285156,grad_norm: 0.9999995839008942, iteration: 103466
loss: 1.1222732067108154,grad_norm: 0.9999993017727852, iteration: 103467
loss: 1.0462934970855713,grad_norm: 0.9803236536263246, iteration: 103468
loss: 1.1800713539123535,grad_norm: 0.9999997597542273, iteration: 103469
loss: 1.2296706438064575,grad_norm: 0.999999646327814, iteration: 103470
loss: 1.1650606393814087,grad_norm: 0.9999991253339744, iteration: 103471
loss: 1.2642208337783813,grad_norm: 0.9999998654234185, iteration: 103472
loss: 1.0755654573440552,grad_norm: 1.0000000067758585, iteration: 103473
loss: 1.2490832805633545,grad_norm: 0.9999997509555288, iteration: 103474
loss: 1.1420133113861084,grad_norm: 0.9999995362366411, iteration: 103475
loss: 1.2149759531021118,grad_norm: 0.999999761916516, iteration: 103476
loss: 1.124956488609314,grad_norm: 0.999999859597525, iteration: 103477
loss: 1.076998233795166,grad_norm: 0.999999316112979, iteration: 103478
loss: 1.7254596948623657,grad_norm: 0.9999998975705497, iteration: 103479
loss: 1.341801643371582,grad_norm: 0.9999999745227076, iteration: 103480
loss: 0.9962480068206787,grad_norm: 0.99999926305555, iteration: 103481
loss: 1.0664154291152954,grad_norm: 0.9999991725989936, iteration: 103482
loss: 0.9755263328552246,grad_norm: 0.8266150807209268, iteration: 103483
loss: 1.1615856885910034,grad_norm: 0.9999991139063799, iteration: 103484
loss: 1.1902472972869873,grad_norm: 0.9999994642506858, iteration: 103485
loss: 1.1103622913360596,grad_norm: 0.9999995875065768, iteration: 103486
loss: 1.0639983415603638,grad_norm: 0.9788460652923108, iteration: 103487
loss: 1.0666191577911377,grad_norm: 0.9773910126844093, iteration: 103488
loss: 1.0729767084121704,grad_norm: 0.9999998651200183, iteration: 103489
loss: 1.012355923652649,grad_norm: 0.98372460050172, iteration: 103490
loss: 1.0185967683792114,grad_norm: 0.9747834184789957, iteration: 103491
loss: 1.1523183584213257,grad_norm: 0.999999892732747, iteration: 103492
loss: 1.0296882390975952,grad_norm: 0.9999994595886356, iteration: 103493
loss: 1.0643610954284668,grad_norm: 0.9999999680846858, iteration: 103494
loss: 1.0667378902435303,grad_norm: 0.9999990842875579, iteration: 103495
loss: 1.0133394002914429,grad_norm: 0.9999994687372968, iteration: 103496
loss: 1.0687352418899536,grad_norm: 0.9999996151566173, iteration: 103497
loss: 1.0981080532073975,grad_norm: 0.9999991492390342, iteration: 103498
loss: 1.0907906293869019,grad_norm: 0.9999991258691477, iteration: 103499
loss: 1.0657278299331665,grad_norm: 0.9999994694451003, iteration: 103500
loss: 1.011557698249817,grad_norm: 0.9999991025540504, iteration: 103501
loss: 1.1389634609222412,grad_norm: 0.9999991814680627, iteration: 103502
loss: 1.3723714351654053,grad_norm: 0.9999998693548283, iteration: 103503
loss: 1.1412842273712158,grad_norm: 0.9999997771474928, iteration: 103504
loss: 1.3023021221160889,grad_norm: 0.9999990006868048, iteration: 103505
loss: 1.2150546312332153,grad_norm: 0.9999994441255913, iteration: 103506
loss: 1.3069857358932495,grad_norm: 0.9999999705922606, iteration: 103507
loss: 1.230962872505188,grad_norm: 0.9999992623700612, iteration: 103508
loss: 1.160332202911377,grad_norm: 0.9999990608607108, iteration: 103509
loss: 1.4225515127182007,grad_norm: 0.9999996743509233, iteration: 103510
loss: 1.2142950296401978,grad_norm: 0.9999991748334947, iteration: 103511
loss: 1.4499409198760986,grad_norm: 0.9999997432061063, iteration: 103512
loss: 1.3227704763412476,grad_norm: 0.9999996205800059, iteration: 103513
loss: 1.3495707511901855,grad_norm: 0.9999992295811464, iteration: 103514
loss: 1.5078613758087158,grad_norm: 0.9999999560395789, iteration: 103515
loss: 1.6057032346725464,grad_norm: 0.9999999483059705, iteration: 103516
loss: 1.5008364915847778,grad_norm: 1.0000000411879852, iteration: 103517
loss: 1.3738857507705688,grad_norm: 0.999999898210106, iteration: 103518
loss: 1.4622279405593872,grad_norm: 1.0000000455607094, iteration: 103519
loss: 1.2856255769729614,grad_norm: 0.9999999797601069, iteration: 103520
loss: 1.5511711835861206,grad_norm: 0.9999995627855658, iteration: 103521
loss: 1.1982934474945068,grad_norm: 0.9999992002847626, iteration: 103522
loss: 1.2279021739959717,grad_norm: 0.999999554846232, iteration: 103523
loss: 1.1722095012664795,grad_norm: 0.9999993859348835, iteration: 103524
loss: 1.4149250984191895,grad_norm: 0.9999997560960435, iteration: 103525
loss: 1.1983729600906372,grad_norm: 0.999999175765724, iteration: 103526
loss: 1.4000585079193115,grad_norm: 0.9999998685915344, iteration: 103527
loss: 1.3579113483428955,grad_norm: 0.9999995998998835, iteration: 103528
loss: 1.3615800142288208,grad_norm: 0.9999996648121852, iteration: 103529
loss: 1.4126824140548706,grad_norm: 1.0000000348662326, iteration: 103530
loss: 1.5212526321411133,grad_norm: 0.9999998022043739, iteration: 103531
loss: 1.1866587400436401,grad_norm: 0.9999998237763418, iteration: 103532
loss: 1.221267819404602,grad_norm: 0.9999999138235988, iteration: 103533
loss: 1.3066380023956299,grad_norm: 0.9999998402889531, iteration: 103534
loss: 1.1863383054733276,grad_norm: 0.9999993630554043, iteration: 103535
loss: 1.1996990442276,grad_norm: 1.0000000057745613, iteration: 103536
loss: 1.088046908378601,grad_norm: 0.9999991583640606, iteration: 103537
loss: 1.3635836839675903,grad_norm: 0.9999999652897908, iteration: 103538
loss: 1.3855949640274048,grad_norm: 0.9999997933154069, iteration: 103539
loss: 1.3865022659301758,grad_norm: 0.9999997479970929, iteration: 103540
loss: 1.391574501991272,grad_norm: 0.9999997455712876, iteration: 103541
loss: 1.4305366277694702,grad_norm: 0.9999997426432046, iteration: 103542
loss: 1.4443062543869019,grad_norm: 0.9999995938332414, iteration: 103543
loss: 1.1936269998550415,grad_norm: 0.9999994705377822, iteration: 103544
loss: 1.099908471107483,grad_norm: 0.9999993998315513, iteration: 103545
loss: 1.1843708753585815,grad_norm: 1.0000000949676504, iteration: 103546
loss: 1.1815848350524902,grad_norm: 0.999999894823893, iteration: 103547
loss: 1.292734980583191,grad_norm: 0.9999996723994591, iteration: 103548
loss: 1.1378378868103027,grad_norm: 0.9999994839594107, iteration: 103549
loss: 1.1551713943481445,grad_norm: 0.9999995653339963, iteration: 103550
loss: 1.3323147296905518,grad_norm: 0.9999998499441735, iteration: 103551
loss: 1.2217864990234375,grad_norm: 0.9999994710863725, iteration: 103552
loss: 1.4346835613250732,grad_norm: 0.9999998092151323, iteration: 103553
loss: 1.3257088661193848,grad_norm: 0.9999996838363828, iteration: 103554
loss: 1.452910304069519,grad_norm: 0.999999622264477, iteration: 103555
loss: 1.3156700134277344,grad_norm: 0.9999997892753613, iteration: 103556
loss: 1.2302135229110718,grad_norm: 0.9999998627684881, iteration: 103557
loss: 1.2975990772247314,grad_norm: 0.9999999171355348, iteration: 103558
loss: 1.3855366706848145,grad_norm: 0.9999999700507153, iteration: 103559
loss: 1.3902965784072876,grad_norm: 0.9999997505941338, iteration: 103560
loss: 1.2629529237747192,grad_norm: 0.9999991493831809, iteration: 103561
loss: 1.4725933074951172,grad_norm: 0.999999800105106, iteration: 103562
loss: 1.1268061399459839,grad_norm: 0.9999993077871254, iteration: 103563
loss: 1.2005435228347778,grad_norm: 0.999999492979446, iteration: 103564
loss: 1.2798856496810913,grad_norm: 0.9999998728333288, iteration: 103565
loss: 1.466955304145813,grad_norm: 0.9999999932138118, iteration: 103566
loss: 1.5056722164154053,grad_norm: 0.9999997886557807, iteration: 103567
loss: 1.1944177150726318,grad_norm: 0.9999994964337237, iteration: 103568
loss: 1.3953903913497925,grad_norm: 0.9999998185251954, iteration: 103569
loss: 1.273997187614441,grad_norm: 0.9999991325644414, iteration: 103570
loss: 1.4530807733535767,grad_norm: 0.9999998798695581, iteration: 103571
loss: 1.1613730192184448,grad_norm: 0.9999990848402324, iteration: 103572
loss: 1.3791725635528564,grad_norm: 0.9999996545643997, iteration: 103573
loss: 1.3441011905670166,grad_norm: 0.9999997364791945, iteration: 103574
loss: 1.3000723123550415,grad_norm: 0.9999997998832701, iteration: 103575
loss: 1.2616649866104126,grad_norm: 0.999999809170378, iteration: 103576
loss: 1.1944574117660522,grad_norm: 0.9999997959085821, iteration: 103577
loss: 1.1829006671905518,grad_norm: 0.9999998552919158, iteration: 103578
loss: 1.4092224836349487,grad_norm: 0.9999998898282836, iteration: 103579
loss: 1.1621363162994385,grad_norm: 0.9999991261933681, iteration: 103580
loss: 1.3486419916152954,grad_norm: 0.9999996584212587, iteration: 103581
loss: 1.2202361822128296,grad_norm: 0.9999996141020122, iteration: 103582
loss: 1.2472882270812988,grad_norm: 0.999999937157775, iteration: 103583
loss: 1.3995916843414307,grad_norm: 0.9999999165590645, iteration: 103584
loss: 1.3487972021102905,grad_norm: 0.9999999216714222, iteration: 103585
loss: 1.2827847003936768,grad_norm: 0.9999996244699682, iteration: 103586
loss: 1.1041264533996582,grad_norm: 0.9999989938575926, iteration: 103587
loss: 1.1741955280303955,grad_norm: 0.9999998561457263, iteration: 103588
loss: 1.143979549407959,grad_norm: 0.9999991669653462, iteration: 103589
loss: 1.3322253227233887,grad_norm: 0.9999996359353546, iteration: 103590
loss: 1.2904869318008423,grad_norm: 0.9999997301663328, iteration: 103591
loss: 1.3295302391052246,grad_norm: 0.9999998913718028, iteration: 103592
loss: 1.2188537120819092,grad_norm: 0.9999994319376122, iteration: 103593
loss: 1.2199875116348267,grad_norm: 0.9999996045090512, iteration: 103594
loss: 1.1151697635650635,grad_norm: 0.9999998718426949, iteration: 103595
loss: 1.3436630964279175,grad_norm: 0.9999998752245435, iteration: 103596
loss: 1.1305997371673584,grad_norm: 0.9999993543786687, iteration: 103597
loss: 1.3977855443954468,grad_norm: 0.9999999060420061, iteration: 103598
loss: 1.357217788696289,grad_norm: 0.9999997481318668, iteration: 103599
loss: 1.2212198972702026,grad_norm: 0.9999997178731831, iteration: 103600
loss: 1.2379931211471558,grad_norm: 0.9999998675645664, iteration: 103601
loss: 1.3363430500030518,grad_norm: 0.9999998630340623, iteration: 103602
loss: 1.3227667808532715,grad_norm: 1.0000000049066315, iteration: 103603
loss: 1.2807707786560059,grad_norm: 0.9999999198298648, iteration: 103604
loss: 1.2197195291519165,grad_norm: 0.9999998845610841, iteration: 103605
loss: 1.1667238473892212,grad_norm: 0.9999998102318951, iteration: 103606
loss: 1.173971176147461,grad_norm: 0.9999998793960642, iteration: 103607
loss: 1.2716631889343262,grad_norm: 0.9999999497570168, iteration: 103608
loss: 1.2752596139907837,grad_norm: 0.9999999478391204, iteration: 103609
loss: 1.1148289442062378,grad_norm: 0.9999993846763683, iteration: 103610
loss: 1.151577115058899,grad_norm: 0.9999992534427313, iteration: 103611
loss: 1.2267122268676758,grad_norm: 0.9999994431598541, iteration: 103612
loss: 1.284924864768982,grad_norm: 0.9999997765737614, iteration: 103613
loss: 1.0345451831817627,grad_norm: 0.9495090002657456, iteration: 103614
loss: 1.1172707080841064,grad_norm: 0.9999992392903213, iteration: 103615
loss: 1.5258405208587646,grad_norm: 0.999999929562257, iteration: 103616
loss: 1.163650393486023,grad_norm: 0.9999998992290883, iteration: 103617
loss: 1.1414482593536377,grad_norm: 0.9999998940510819, iteration: 103618
loss: 1.1191861629486084,grad_norm: 0.9999994182046599, iteration: 103619
loss: 1.1977732181549072,grad_norm: 0.9999998595630152, iteration: 103620
loss: 1.2094014883041382,grad_norm: 0.9999998632769759, iteration: 103621
loss: 1.3457047939300537,grad_norm: 0.9999999269393701, iteration: 103622
loss: 1.1727219820022583,grad_norm: 0.9999992200397888, iteration: 103623
loss: 1.2654143571853638,grad_norm: 0.9999997633836718, iteration: 103624
loss: 1.4300616979599,grad_norm: 0.9999998969518022, iteration: 103625
loss: 1.1618183851242065,grad_norm: 0.9999996478449378, iteration: 103626
loss: 1.028253436088562,grad_norm: 0.9999992444682771, iteration: 103627
loss: 1.265216588973999,grad_norm: 0.9999999887577946, iteration: 103628
loss: 1.212410569190979,grad_norm: 0.9999992915226701, iteration: 103629
loss: 1.0856785774230957,grad_norm: 0.9999998592734017, iteration: 103630
loss: 1.2413915395736694,grad_norm: 0.9999999209430364, iteration: 103631
loss: 1.2190872430801392,grad_norm: 0.9999999665460606, iteration: 103632
loss: 1.2756454944610596,grad_norm: 1.0000000242580784, iteration: 103633
loss: 1.109826922416687,grad_norm: 0.9999991822600334, iteration: 103634
loss: 1.2935298681259155,grad_norm: 0.9999999231800772, iteration: 103635
loss: 1.2549076080322266,grad_norm: 0.9999998973230337, iteration: 103636
loss: 1.3601677417755127,grad_norm: 1.0000000136148341, iteration: 103637
loss: 1.5875846147537231,grad_norm: 1.0000000228358157, iteration: 103638
loss: 1.199194312095642,grad_norm: 0.9999989873220253, iteration: 103639
loss: 1.1716229915618896,grad_norm: 1.0000000710479375, iteration: 103640
loss: 1.348324179649353,grad_norm: 0.9999999157603043, iteration: 103641
loss: 1.268972635269165,grad_norm: 0.9999998326534411, iteration: 103642
loss: 1.1071192026138306,grad_norm: 0.9999997038984392, iteration: 103643
loss: 1.2410030364990234,grad_norm: 0.9999996739492437, iteration: 103644
loss: 1.1372311115264893,grad_norm: 0.9999998934710597, iteration: 103645
loss: 1.1518187522888184,grad_norm: 0.9999997664637392, iteration: 103646
loss: 1.3616178035736084,grad_norm: 0.9999998961981146, iteration: 103647
loss: 1.2406797409057617,grad_norm: 0.999999661907715, iteration: 103648
loss: 1.2293211221694946,grad_norm: 0.9999998900519718, iteration: 103649
loss: 1.150801658630371,grad_norm: 0.9999996371809952, iteration: 103650
loss: 1.182471513748169,grad_norm: 0.9999995670456446, iteration: 103651
loss: 1.0628262758255005,grad_norm: 0.9999990535391625, iteration: 103652
loss: 1.0810675621032715,grad_norm: 0.9999993304239807, iteration: 103653
loss: 1.3336262702941895,grad_norm: 0.99999993811644, iteration: 103654
loss: 1.2832142114639282,grad_norm: 0.9999999639787285, iteration: 103655
loss: 1.0135648250579834,grad_norm: 0.8990508084602291, iteration: 103656
loss: 1.259274959564209,grad_norm: 0.9999995252533466, iteration: 103657
loss: 1.1961066722869873,grad_norm: 0.9999997425341283, iteration: 103658
loss: 1.0516331195831299,grad_norm: 0.9999998206001651, iteration: 103659
loss: 1.1052360534667969,grad_norm: 1.0000000653030254, iteration: 103660
loss: 1.1907085180282593,grad_norm: 0.9999990398230407, iteration: 103661
loss: 1.1661171913146973,grad_norm: 0.9999998801319229, iteration: 103662
loss: 1.0925742387771606,grad_norm: 0.9999998231394787, iteration: 103663
loss: 1.0413607358932495,grad_norm: 0.9999991511484799, iteration: 103664
loss: 1.2043614387512207,grad_norm: 0.9999993795847729, iteration: 103665
loss: 1.1576861143112183,grad_norm: 0.9999995788588829, iteration: 103666
loss: 1.1018602848052979,grad_norm: 0.9999993510412318, iteration: 103667
loss: 1.1583305597305298,grad_norm: 0.9999994838403574, iteration: 103668
loss: 1.1946606636047363,grad_norm: 0.9999991376535485, iteration: 103669
loss: 1.0940189361572266,grad_norm: 0.9999998770432776, iteration: 103670
loss: 1.1131725311279297,grad_norm: 0.9999991248302275, iteration: 103671
loss: 1.0626213550567627,grad_norm: 0.9999993123734545, iteration: 103672
loss: 1.2022225856781006,grad_norm: 0.999999733059734, iteration: 103673
loss: 1.0702754259109497,grad_norm: 0.9999992777159463, iteration: 103674
loss: 1.32603919506073,grad_norm: 0.9999992960605608, iteration: 103675
loss: 1.3125308752059937,grad_norm: 0.9999996641014628, iteration: 103676
loss: 1.2336260080337524,grad_norm: 0.9999994703017564, iteration: 103677
loss: 1.1795357465744019,grad_norm: 0.9999996131121266, iteration: 103678
loss: 1.1270928382873535,grad_norm: 0.9999990494744124, iteration: 103679
loss: 1.2700462341308594,grad_norm: 0.9999995687671577, iteration: 103680
loss: 1.029329538345337,grad_norm: 0.9704684569783663, iteration: 103681
loss: 1.0691381692886353,grad_norm: 1.0000000107354987, iteration: 103682
loss: 1.2036781311035156,grad_norm: 0.9999994152825328, iteration: 103683
loss: 1.436332106590271,grad_norm: 0.9999999226444378, iteration: 103684
loss: 1.238789439201355,grad_norm: 0.999999295758359, iteration: 103685
loss: 1.1651322841644287,grad_norm: 0.9999999031545183, iteration: 103686
loss: 1.1344083547592163,grad_norm: 0.9999996835509732, iteration: 103687
loss: 1.084006667137146,grad_norm: 0.9999991988128685, iteration: 103688
loss: 1.153822422027588,grad_norm: 0.9999995145244608, iteration: 103689
loss: 1.174808144569397,grad_norm: 0.9999990904658884, iteration: 103690
loss: 1.1807445287704468,grad_norm: 0.9999995468163653, iteration: 103691
loss: 1.229503870010376,grad_norm: 0.9999999436489523, iteration: 103692
loss: 1.1425821781158447,grad_norm: 0.9999994309435455, iteration: 103693
loss: 1.1795496940612793,grad_norm: 0.9999998943885176, iteration: 103694
loss: 1.0629974603652954,grad_norm: 0.9999999182670511, iteration: 103695
loss: 1.1664760112762451,grad_norm: 0.9999996735044187, iteration: 103696
loss: 1.2005974054336548,grad_norm: 0.9999997249520757, iteration: 103697
loss: 1.2208436727523804,grad_norm: 0.999999093312478, iteration: 103698
loss: 1.3068976402282715,grad_norm: 0.9999997396408432, iteration: 103699
loss: 1.2788056135177612,grad_norm: 0.9999998536659632, iteration: 103700
loss: 1.2550146579742432,grad_norm: 0.9999997899756865, iteration: 103701
loss: 1.087693691253662,grad_norm: 0.9999998197637897, iteration: 103702
loss: 1.3171639442443848,grad_norm: 0.9999995614114955, iteration: 103703
loss: 1.0037461519241333,grad_norm: 0.9520296692392383, iteration: 103704
loss: 1.3504045009613037,grad_norm: 0.999999979831825, iteration: 103705
loss: 1.401989221572876,grad_norm: 0.99999973137525, iteration: 103706
loss: 1.168696403503418,grad_norm: 0.9999999516386944, iteration: 103707
loss: 1.3062832355499268,grad_norm: 0.9999999890991301, iteration: 103708
loss: 1.3456411361694336,grad_norm: 0.9999998106834617, iteration: 103709
loss: 1.1833231449127197,grad_norm: 0.999999864505773, iteration: 103710
loss: 1.1476359367370605,grad_norm: 0.9999993792184808, iteration: 103711
loss: 1.1388791799545288,grad_norm: 0.9999995778497731, iteration: 103712
loss: 1.3166905641555786,grad_norm: 0.99999970969171, iteration: 103713
loss: 1.3180968761444092,grad_norm: 0.9999998216160226, iteration: 103714
loss: 1.0879201889038086,grad_norm: 0.9999990523942979, iteration: 103715
loss: 1.0024482011795044,grad_norm: 0.9999995412791988, iteration: 103716
loss: 1.2669485807418823,grad_norm: 0.9999999104070155, iteration: 103717
loss: 1.2362204790115356,grad_norm: 0.9999998073053485, iteration: 103718
loss: 1.1263833045959473,grad_norm: 0.9999997033208358, iteration: 103719
loss: 1.2973123788833618,grad_norm: 0.9999994957866621, iteration: 103720
loss: 1.3102335929870605,grad_norm: 0.9999996802115848, iteration: 103721
loss: 1.4915436506271362,grad_norm: 0.9999999327195966, iteration: 103722
loss: 1.4033284187316895,grad_norm: 0.9999999147016533, iteration: 103723
loss: 1.3684765100479126,grad_norm: 0.9999999574486463, iteration: 103724
loss: 1.1765910387039185,grad_norm: 0.9999999381230964, iteration: 103725
loss: 1.265876054763794,grad_norm: 0.9999998758423814, iteration: 103726
loss: 1.701209545135498,grad_norm: 1.000000067101927, iteration: 103727
loss: 1.7150388956069946,grad_norm: 0.9999998923069404, iteration: 103728
loss: 1.081290602684021,grad_norm: 0.9999992728803706, iteration: 103729
loss: 1.4855618476867676,grad_norm: 0.9999997778045361, iteration: 103730
loss: 1.3124510049819946,grad_norm: 0.9999999811341321, iteration: 103731
loss: 1.4422725439071655,grad_norm: 0.9999999590107062, iteration: 103732
loss: 1.1873290538787842,grad_norm: 0.9999993600922398, iteration: 103733
loss: 1.5457816123962402,grad_norm: 0.9999999298833089, iteration: 103734
loss: 1.3225584030151367,grad_norm: 0.999999973445402, iteration: 103735
loss: 1.3901523351669312,grad_norm: 0.999999870367661, iteration: 103736
loss: 1.3981529474258423,grad_norm: 0.9999999322909946, iteration: 103737
loss: 1.4372305870056152,grad_norm: 0.9999998993137827, iteration: 103738
loss: 1.3789467811584473,grad_norm: 0.9999997806264352, iteration: 103739
loss: 1.4068844318389893,grad_norm: 0.9999998514264434, iteration: 103740
loss: 1.2435674667358398,grad_norm: 0.9999998430290278, iteration: 103741
loss: 1.227251410484314,grad_norm: 0.9999998492155221, iteration: 103742
loss: 1.4882686138153076,grad_norm: 0.9999999066354845, iteration: 103743
loss: 1.1993582248687744,grad_norm: 0.9999999444954011, iteration: 103744
loss: 1.08915376663208,grad_norm: 0.9999992455368976, iteration: 103745
loss: 1.5929757356643677,grad_norm: 0.9999999503721501, iteration: 103746
loss: 1.3891208171844482,grad_norm: 1.0000000111450393, iteration: 103747
loss: 1.326690673828125,grad_norm: 1.0000000171448333, iteration: 103748
loss: 1.2336889505386353,grad_norm: 0.9999994869039757, iteration: 103749
loss: 1.2011200189590454,grad_norm: 0.9999998879502005, iteration: 103750
loss: 1.2921661138534546,grad_norm: 0.999999555027588, iteration: 103751
loss: 1.222516417503357,grad_norm: 1.0000000391350086, iteration: 103752
loss: 1.1295274496078491,grad_norm: 1.0000000308174946, iteration: 103753
loss: 1.4351640939712524,grad_norm: 0.9999998121660254, iteration: 103754
loss: 1.1813057661056519,grad_norm: 1.0000000093817645, iteration: 103755
loss: 1.26676607131958,grad_norm: 0.9999998212937092, iteration: 103756
loss: 1.1925418376922607,grad_norm: 0.9999999220309528, iteration: 103757
loss: 1.173287034034729,grad_norm: 1.000000066592383, iteration: 103758
loss: 1.2118295431137085,grad_norm: 1.000000021674969, iteration: 103759
loss: 1.3091782331466675,grad_norm: 0.9999999032610823, iteration: 103760
loss: 1.273488998413086,grad_norm: 0.9999993666376662, iteration: 103761
loss: 1.4868879318237305,grad_norm: 0.9999996561377626, iteration: 103762
loss: 1.2368770837783813,grad_norm: 0.9999995176634517, iteration: 103763
loss: 1.4786161184310913,grad_norm: 0.9999998794940895, iteration: 103764
loss: 1.4330300092697144,grad_norm: 0.9999998579492507, iteration: 103765
loss: 1.200238585472107,grad_norm: 0.9999992661174478, iteration: 103766
loss: 1.3979268074035645,grad_norm: 0.9999998225137614, iteration: 103767
loss: 1.2963567972183228,grad_norm: 0.9999998551704106, iteration: 103768
loss: 1.2489391565322876,grad_norm: 1.0000000133131954, iteration: 103769
loss: 1.2107454538345337,grad_norm: 0.999999817919603, iteration: 103770
loss: 1.1735529899597168,grad_norm: 0.9999997324755864, iteration: 103771
loss: 1.4795647859573364,grad_norm: 0.9999999212711881, iteration: 103772
loss: 1.488013744354248,grad_norm: 0.9999999085127653, iteration: 103773
loss: 1.2793678045272827,grad_norm: 0.9999997664832821, iteration: 103774
loss: 1.3490843772888184,grad_norm: 0.9999998603253502, iteration: 103775
loss: 1.4111614227294922,grad_norm: 0.9999998429530809, iteration: 103776
loss: 1.3188642263412476,grad_norm: 0.9999998546743398, iteration: 103777
loss: 1.2960400581359863,grad_norm: 0.9999997965323937, iteration: 103778
loss: 1.37267005443573,grad_norm: 0.9999996569212993, iteration: 103779
loss: 1.2999526262283325,grad_norm: 0.999999855780461, iteration: 103780
loss: 1.478771686553955,grad_norm: 0.9999999098103605, iteration: 103781
loss: 1.487502932548523,grad_norm: 0.9999995756607604, iteration: 103782
loss: 1.7824254035949707,grad_norm: 0.9999998890710807, iteration: 103783
loss: 1.248146414756775,grad_norm: 0.9999997981132689, iteration: 103784
loss: 1.5328372716903687,grad_norm: 0.999999727620696, iteration: 103785
loss: 1.6744588613510132,grad_norm: 1.0000000492603436, iteration: 103786
loss: 1.1995996236801147,grad_norm: 0.9999998719002865, iteration: 103787
loss: 1.4043896198272705,grad_norm: 0.9999998336514077, iteration: 103788
loss: 1.3564869165420532,grad_norm: 0.9999999558407242, iteration: 103789
loss: 1.4735946655273438,grad_norm: 0.9999998751370414, iteration: 103790
loss: 1.3955886363983154,grad_norm: 0.999999587478463, iteration: 103791
loss: 1.3652046918869019,grad_norm: 0.9999999078512796, iteration: 103792
loss: 1.4306542873382568,grad_norm: 0.9999998123510896, iteration: 103793
loss: 1.5252809524536133,grad_norm: 1.000000026147148, iteration: 103794
loss: 1.8496564626693726,grad_norm: 0.9999999220446888, iteration: 103795
loss: 1.354353666305542,grad_norm: 0.9999998445422306, iteration: 103796
loss: 1.6417235136032104,grad_norm: 0.9999998390096803, iteration: 103797
loss: 1.232261061668396,grad_norm: 0.9999998733173351, iteration: 103798
loss: 1.4686698913574219,grad_norm: 0.9999998552914201, iteration: 103799
loss: 1.414284110069275,grad_norm: 0.9999998569270887, iteration: 103800
loss: 1.2879066467285156,grad_norm: 0.9999996255904845, iteration: 103801
loss: 1.321176528930664,grad_norm: 0.9999999316643349, iteration: 103802
loss: 1.514600396156311,grad_norm: 0.9999999395348143, iteration: 103803
loss: 1.6010199785232544,grad_norm: 0.9999999167464497, iteration: 103804
loss: 1.6562711000442505,grad_norm: 0.9999997111306036, iteration: 103805
loss: 1.6692167520523071,grad_norm: 0.999999918376474, iteration: 103806
loss: 1.4287266731262207,grad_norm: 0.9999999846031828, iteration: 103807
loss: 1.338276743888855,grad_norm: 0.9999997662811843, iteration: 103808
loss: 1.2513704299926758,grad_norm: 0.9999995562433889, iteration: 103809
loss: 1.3606153726577759,grad_norm: 0.9999999567379346, iteration: 103810
loss: 1.3640903234481812,grad_norm: 0.9999997202559522, iteration: 103811
loss: 1.3520432710647583,grad_norm: 0.9999997956507438, iteration: 103812
loss: 1.2389267683029175,grad_norm: 1.000000006479064, iteration: 103813
loss: 1.20196533203125,grad_norm: 0.9999998390659178, iteration: 103814
loss: 1.2470699548721313,grad_norm: 0.9999998189392287, iteration: 103815
loss: 1.481997013092041,grad_norm: 0.999999967358233, iteration: 103816
loss: 1.216835379600525,grad_norm: 0.9999998556682329, iteration: 103817
loss: 1.5193183422088623,grad_norm: 1.000000015025346, iteration: 103818
loss: 1.368908166885376,grad_norm: 0.9999998067131841, iteration: 103819
loss: 1.1645538806915283,grad_norm: 0.9999996124035492, iteration: 103820
loss: 1.2977739572525024,grad_norm: 0.9999992741386283, iteration: 103821
loss: 1.2673695087432861,grad_norm: 0.9999996236958443, iteration: 103822
loss: 1.4275567531585693,grad_norm: 0.9999994480554627, iteration: 103823
loss: 1.2967060804367065,grad_norm: 0.9999998607106758, iteration: 103824
loss: 1.2816424369812012,grad_norm: 0.9999997708203436, iteration: 103825
loss: 1.3144975900650024,grad_norm: 0.9999998533825569, iteration: 103826
loss: 1.1412866115570068,grad_norm: 0.9999997352198998, iteration: 103827
loss: 1.1410400867462158,grad_norm: 0.9999998169480664, iteration: 103828
loss: 1.1116583347320557,grad_norm: 0.9999991974532869, iteration: 103829
loss: 1.270050287246704,grad_norm: 0.9999992637365787, iteration: 103830
loss: 1.1896154880523682,grad_norm: 0.9999997960737542, iteration: 103831
loss: 1.3168394565582275,grad_norm: 0.9999998558100384, iteration: 103832
loss: 1.3918181657791138,grad_norm: 0.9999999144843259, iteration: 103833
loss: 1.256590723991394,grad_norm: 0.999999746342357, iteration: 103834
loss: 1.2991304397583008,grad_norm: 0.9999994373505433, iteration: 103835
loss: 1.084436297416687,grad_norm: 0.9999990789155203, iteration: 103836
loss: 1.1997148990631104,grad_norm: 0.999999870148532, iteration: 103837
loss: 1.368881344795227,grad_norm: 0.9999997397223488, iteration: 103838
loss: 1.2090462446212769,grad_norm: 0.9999995328136452, iteration: 103839
loss: 1.270933747291565,grad_norm: 0.9999999379714127, iteration: 103840
loss: 1.1968880891799927,grad_norm: 0.9999992869879485, iteration: 103841
loss: 1.298599362373352,grad_norm: 1.0000000241644098, iteration: 103842
loss: 1.205440640449524,grad_norm: 0.9999996529849724, iteration: 103843
loss: 1.4116779565811157,grad_norm: 0.9999997502349781, iteration: 103844
loss: 1.3248605728149414,grad_norm: 0.9999997736498892, iteration: 103845
loss: 1.2203723192214966,grad_norm: 0.9999997851070955, iteration: 103846
loss: 1.363172173500061,grad_norm: 0.9999997088199889, iteration: 103847
loss: 1.4186973571777344,grad_norm: 0.999999726899113, iteration: 103848
loss: 1.4191752672195435,grad_norm: 0.9999999258211333, iteration: 103849
loss: 1.4102505445480347,grad_norm: 0.9999998585006336, iteration: 103850
loss: 1.1929372549057007,grad_norm: 0.9999998687670226, iteration: 103851
loss: 1.4614622592926025,grad_norm: 0.9999998093057502, iteration: 103852
loss: 1.3120667934417725,grad_norm: 0.999999845342047, iteration: 103853
loss: 1.2095166444778442,grad_norm: 0.9999997145005056, iteration: 103854
loss: 1.1653802394866943,grad_norm: 0.9999991669407672, iteration: 103855
loss: 1.2470794916152954,grad_norm: 0.9999999017315123, iteration: 103856
loss: 1.2140942811965942,grad_norm: 0.9999995417061551, iteration: 103857
loss: 1.3243396282196045,grad_norm: 0.9999994731771661, iteration: 103858
loss: 1.225503921508789,grad_norm: 0.9999996728191382, iteration: 103859
loss: 1.2962582111358643,grad_norm: 0.9999997765408746, iteration: 103860
loss: 1.3089748620986938,grad_norm: 0.9999998570718819, iteration: 103861
loss: 1.343322515487671,grad_norm: 0.9999995336636062, iteration: 103862
loss: 1.3240776062011719,grad_norm: 0.999999753997372, iteration: 103863
loss: 1.2138245105743408,grad_norm: 0.9999997257044654, iteration: 103864
loss: 1.1103752851486206,grad_norm: 0.9999994117912293, iteration: 103865
loss: 1.2714024782180786,grad_norm: 0.9999993374711243, iteration: 103866
loss: 1.2403578758239746,grad_norm: 0.9999999427226803, iteration: 103867
loss: 1.295727014541626,grad_norm: 0.9999995446853464, iteration: 103868
loss: 1.1902787685394287,grad_norm: 0.9999992141450282, iteration: 103869
loss: 1.4479604959487915,grad_norm: 0.9999999470575839, iteration: 103870
loss: 1.2347100973129272,grad_norm: 0.9999996446182177, iteration: 103871
loss: 1.4521430730819702,grad_norm: 0.9999997594604095, iteration: 103872
loss: 1.3342078924179077,grad_norm: 0.9999994995601891, iteration: 103873
loss: 1.3276888132095337,grad_norm: 0.9999999843464399, iteration: 103874
loss: 1.2543100118637085,grad_norm: 0.9999994502174672, iteration: 103875
loss: 1.240517020225525,grad_norm: 0.9999994302501791, iteration: 103876
loss: 1.2876229286193848,grad_norm: 0.9999997367259261, iteration: 103877
loss: 1.3758113384246826,grad_norm: 0.99999981054176, iteration: 103878
loss: 1.2973488569259644,grad_norm: 0.9999998094778683, iteration: 103879
loss: 1.2446491718292236,grad_norm: 0.9999997321541018, iteration: 103880
loss: 1.1148301362991333,grad_norm: 0.9999991768593709, iteration: 103881
loss: 1.5445219278335571,grad_norm: 0.9999998551703247, iteration: 103882
loss: 1.4387751817703247,grad_norm: 1.0000000689891686, iteration: 103883
loss: 1.3665870428085327,grad_norm: 0.9999995073055031, iteration: 103884
loss: 1.3349556922912598,grad_norm: 0.9999999398324163, iteration: 103885
loss: 1.2281166315078735,grad_norm: 0.9999992816808968, iteration: 103886
loss: 1.17996346950531,grad_norm: 0.9999996622059578, iteration: 103887
loss: 1.126357913017273,grad_norm: 0.9999995185413273, iteration: 103888
loss: 1.2755978107452393,grad_norm: 0.9999993756303374, iteration: 103889
loss: 1.3107168674468994,grad_norm: 0.9999998157467389, iteration: 103890
loss: 1.1335103511810303,grad_norm: 0.9999998749758172, iteration: 103891
loss: 1.4538068771362305,grad_norm: 0.9999994783037577, iteration: 103892
loss: 1.1932767629623413,grad_norm: 0.9999995756539108, iteration: 103893
loss: 1.3431528806686401,grad_norm: 0.9999998393582662, iteration: 103894
loss: 1.4400787353515625,grad_norm: 0.9999994611665649, iteration: 103895
loss: 1.282322883605957,grad_norm: 0.9999995936977261, iteration: 103896
loss: 1.2092164754867554,grad_norm: 0.9999992271318001, iteration: 103897
loss: 1.2539628744125366,grad_norm: 1.0000000505421989, iteration: 103898
loss: 1.1896169185638428,grad_norm: 0.9999996564720456, iteration: 103899
loss: 1.226515531539917,grad_norm: 0.9999998418200178, iteration: 103900
loss: 1.2417105436325073,grad_norm: 0.9999998366427285, iteration: 103901
loss: 1.1092896461486816,grad_norm: 0.9999997347351248, iteration: 103902
loss: 1.0959067344665527,grad_norm: 0.9999990231154264, iteration: 103903
loss: 1.3139610290527344,grad_norm: 0.9999992693099262, iteration: 103904
loss: 1.2587507963180542,grad_norm: 0.9999992909821038, iteration: 103905
loss: 1.2012134790420532,grad_norm: 0.9999991314839731, iteration: 103906
loss: 1.4654954671859741,grad_norm: 0.9999998122486191, iteration: 103907
loss: 1.2447885274887085,grad_norm: 0.9999998854935526, iteration: 103908
loss: 1.2332873344421387,grad_norm: 0.9999999178255564, iteration: 103909
loss: 1.338798999786377,grad_norm: 0.9999995509854817, iteration: 103910
loss: 1.3922704458236694,grad_norm: 0.9999995672344836, iteration: 103911
loss: 1.2044981718063354,grad_norm: 0.9999994322868058, iteration: 103912
loss: 1.3022103309631348,grad_norm: 0.9999998276418912, iteration: 103913
loss: 1.2386528253555298,grad_norm: 0.999999253810487, iteration: 103914
loss: 1.3173648118972778,grad_norm: 0.9999998491428082, iteration: 103915
loss: 1.2828611135482788,grad_norm: 0.9999997193602469, iteration: 103916
loss: 1.3572689294815063,grad_norm: 0.9999999194807977, iteration: 103917
loss: 1.2247819900512695,grad_norm: 0.9999996437622344, iteration: 103918
loss: 1.315670132637024,grad_norm: 0.9999999736457011, iteration: 103919
loss: 1.163623332977295,grad_norm: 0.9999996136099764, iteration: 103920
loss: 1.3504199981689453,grad_norm: 0.9999997733736965, iteration: 103921
loss: 1.2415763139724731,grad_norm: 0.9999993396067411, iteration: 103922
loss: 1.472274661064148,grad_norm: 0.9999995689216681, iteration: 103923
loss: 1.415022850036621,grad_norm: 0.9999997800972017, iteration: 103924
loss: 1.2368080615997314,grad_norm: 0.9999998120671483, iteration: 103925
loss: 1.4889873266220093,grad_norm: 0.9999998401687139, iteration: 103926
loss: 1.2704734802246094,grad_norm: 0.9999996393972235, iteration: 103927
loss: 1.461563229560852,grad_norm: 0.9999996267383381, iteration: 103928
loss: 1.4906723499298096,grad_norm: 0.999999756199084, iteration: 103929
loss: 1.393282175064087,grad_norm: 0.999999354592518, iteration: 103930
loss: 1.4692403078079224,grad_norm: 0.9999995570776694, iteration: 103931
loss: 1.3158258199691772,grad_norm: 0.9999999619731824, iteration: 103932
loss: 1.1696202754974365,grad_norm: 0.9999999483595358, iteration: 103933
loss: 1.2610597610473633,grad_norm: 0.999999812694734, iteration: 103934
loss: 1.3962836265563965,grad_norm: 0.999999924530877, iteration: 103935
loss: 1.3903207778930664,grad_norm: 0.999999684526395, iteration: 103936
loss: 1.1327378749847412,grad_norm: 0.9999996647332267, iteration: 103937
loss: 1.1237987279891968,grad_norm: 0.9999991254009799, iteration: 103938
loss: 1.4234600067138672,grad_norm: 1.0000000017181436, iteration: 103939
loss: 1.2089143991470337,grad_norm: 0.9999998560301947, iteration: 103940
loss: 1.0981817245483398,grad_norm: 0.9999996223415099, iteration: 103941
loss: 1.3337514400482178,grad_norm: 0.9999997942700267, iteration: 103942
loss: 1.2288638353347778,grad_norm: 0.9999999381076621, iteration: 103943
loss: 1.3758556842803955,grad_norm: 0.9999995547191379, iteration: 103944
loss: 1.3192083835601807,grad_norm: 0.9999999649164277, iteration: 103945
loss: 1.19456946849823,grad_norm: 0.999999303118465, iteration: 103946
loss: 1.1806260347366333,grad_norm: 0.9999991158842693, iteration: 103947
loss: 1.1207776069641113,grad_norm: 0.9999997211998254, iteration: 103948
loss: 1.1932512521743774,grad_norm: 0.9999997099397224, iteration: 103949
loss: 1.2239525318145752,grad_norm: 0.9999998637791911, iteration: 103950
loss: 1.1306588649749756,grad_norm: 0.9999995689502671, iteration: 103951
loss: 1.400596022605896,grad_norm: 0.9999996799760303, iteration: 103952
loss: 1.1764875650405884,grad_norm: 0.9937567522854076, iteration: 103953
loss: 1.2005386352539062,grad_norm: 0.999999857711509, iteration: 103954
loss: 1.3435635566711426,grad_norm: 0.9999993968044282, iteration: 103955
loss: 1.174758791923523,grad_norm: 0.9999994999117815, iteration: 103956
loss: 1.150496244430542,grad_norm: 0.9999994420907335, iteration: 103957
loss: 1.2781507968902588,grad_norm: 0.9999998739116832, iteration: 103958
loss: 1.212941288948059,grad_norm: 0.9999994851686117, iteration: 103959
loss: 1.1115281581878662,grad_norm: 0.9999994121949065, iteration: 103960
loss: 1.20525324344635,grad_norm: 0.9999993954330275, iteration: 103961
loss: 1.3213679790496826,grad_norm: 0.9999997821785753, iteration: 103962
loss: 1.1673821210861206,grad_norm: 0.9999999114465591, iteration: 103963
loss: 1.163007378578186,grad_norm: 0.9999993013116427, iteration: 103964
loss: 1.2568812370300293,grad_norm: 0.999999808526393, iteration: 103965
loss: 1.3412779569625854,grad_norm: 0.9999996715004253, iteration: 103966
loss: 1.1748054027557373,grad_norm: 0.9999996546418534, iteration: 103967
loss: 1.2689250707626343,grad_norm: 0.9999993764275, iteration: 103968
loss: 1.172603964805603,grad_norm: 0.9999998757856162, iteration: 103969
loss: 1.2935171127319336,grad_norm: 0.9999996606632517, iteration: 103970
loss: 1.1252379417419434,grad_norm: 0.9999996928721, iteration: 103971
loss: 1.168796181678772,grad_norm: 0.9999998080235959, iteration: 103972
loss: 1.229723334312439,grad_norm: 0.9999995797042183, iteration: 103973
loss: 1.2254829406738281,grad_norm: 0.999999565641485, iteration: 103974
loss: 1.4318493604660034,grad_norm: 0.9999997164867502, iteration: 103975
loss: 1.2724169492721558,grad_norm: 0.9999994684744469, iteration: 103976
loss: 1.2448369264602661,grad_norm: 0.9999999269112227, iteration: 103977
loss: 1.4246309995651245,grad_norm: 0.999999514831259, iteration: 103978
loss: 1.1191343069076538,grad_norm: 0.9999997826028919, iteration: 103979
loss: 1.22664213180542,grad_norm: 0.9999995168669845, iteration: 103980
loss: 1.1896073818206787,grad_norm: 0.9999996491835778, iteration: 103981
loss: 1.312886357307434,grad_norm: 0.9999996613118888, iteration: 103982
loss: 1.0930352210998535,grad_norm: 1.000000012715899, iteration: 103983
loss: 1.2781991958618164,grad_norm: 0.9999999590547445, iteration: 103984
loss: 1.3318438529968262,grad_norm: 1.0000000135118379, iteration: 103985
loss: 1.2738206386566162,grad_norm: 0.9999995746571962, iteration: 103986
loss: 1.1577715873718262,grad_norm: 0.9999998696876768, iteration: 103987
loss: 1.4062122106552124,grad_norm: 0.9999998403393369, iteration: 103988
loss: 1.2606126070022583,grad_norm: 0.9999995222987315, iteration: 103989
loss: 1.245086908340454,grad_norm: 0.9999996801078143, iteration: 103990
loss: 1.1340960264205933,grad_norm: 0.9999998961786853, iteration: 103991
loss: 1.328879714012146,grad_norm: 0.9999995027007342, iteration: 103992
loss: 1.2423814535140991,grad_norm: 0.9999995152595659, iteration: 103993
loss: 1.3441158533096313,grad_norm: 0.9999995761166486, iteration: 103994
loss: 1.2740989923477173,grad_norm: 0.9999998343272238, iteration: 103995
loss: 1.1471552848815918,grad_norm: 0.999999239507882, iteration: 103996
loss: 1.3639941215515137,grad_norm: 0.9999998694137452, iteration: 103997
loss: 1.3899611234664917,grad_norm: 1.0000000109782898, iteration: 103998
loss: 1.3499327898025513,grad_norm: 0.9999995542140617, iteration: 103999
loss: 1.3117071390151978,grad_norm: 0.9999999955211286, iteration: 104000
loss: 1.2975064516067505,grad_norm: 0.9999995560509054, iteration: 104001
loss: 1.3664644956588745,grad_norm: 0.9999999677071811, iteration: 104002
loss: 1.340039849281311,grad_norm: 0.9999998461399107, iteration: 104003
loss: 1.1370826959609985,grad_norm: 0.9999993669991722, iteration: 104004
loss: 1.4195199012756348,grad_norm: 0.9999998306892023, iteration: 104005
loss: 1.357365608215332,grad_norm: 0.999999959430949, iteration: 104006
loss: 1.2917563915252686,grad_norm: 0.9999998564461107, iteration: 104007
loss: 1.2584952116012573,grad_norm: 0.9999994222408243, iteration: 104008
loss: 1.1886531114578247,grad_norm: 0.999999697054572, iteration: 104009
loss: 1.2515150308609009,grad_norm: 1.0000000357570413, iteration: 104010
loss: 1.154624104499817,grad_norm: 0.9999993305970195, iteration: 104011
loss: 1.4338632822036743,grad_norm: 0.9999999045011982, iteration: 104012
loss: 1.2840441465377808,grad_norm: 0.9999996643957126, iteration: 104013
loss: 1.4926533699035645,grad_norm: 0.9999995476942174, iteration: 104014
loss: 1.1764646768569946,grad_norm: 0.9999995410217862, iteration: 104015
loss: 1.1789594888687134,grad_norm: 0.9999999381302954, iteration: 104016
loss: 1.2508597373962402,grad_norm: 0.9999995873201147, iteration: 104017
loss: 1.1484863758087158,grad_norm: 0.999999445760113, iteration: 104018
loss: 1.1644737720489502,grad_norm: 0.9999995109336915, iteration: 104019
loss: 1.410225749015808,grad_norm: 0.9999997207559874, iteration: 104020
loss: 1.2623002529144287,grad_norm: 0.9999997495849717, iteration: 104021
loss: 1.1729553937911987,grad_norm: 0.9999998329297893, iteration: 104022
loss: 1.2468253374099731,grad_norm: 0.9999992638227363, iteration: 104023
loss: 1.14478600025177,grad_norm: 0.9999998465739286, iteration: 104024
loss: 1.2459431886672974,grad_norm: 0.999999799947578, iteration: 104025
loss: 1.2470781803131104,grad_norm: 1.0000000301030976, iteration: 104026
loss: 1.3250004053115845,grad_norm: 0.9999998062975624, iteration: 104027
loss: 1.5553579330444336,grad_norm: 0.9999998787847459, iteration: 104028
loss: 1.1867637634277344,grad_norm: 0.9999996193508561, iteration: 104029
loss: 1.1913373470306396,grad_norm: 0.9999994475462116, iteration: 104030
loss: 1.269600749015808,grad_norm: 0.9999996957378081, iteration: 104031
loss: 1.1247164011001587,grad_norm: 0.9999995410549292, iteration: 104032
loss: 1.337028980255127,grad_norm: 0.9999998398243082, iteration: 104033
loss: 1.23318612575531,grad_norm: 0.9999995827815725, iteration: 104034
loss: 1.1494807004928589,grad_norm: 0.999999585011131, iteration: 104035
loss: 1.3398188352584839,grad_norm: 0.999999738019528, iteration: 104036
loss: 1.0691033601760864,grad_norm: 0.9999992332903644, iteration: 104037
loss: 1.2319871187210083,grad_norm: 0.99999933831915, iteration: 104038
loss: 1.3809648752212524,grad_norm: 0.9999995992395161, iteration: 104039
loss: 1.171761155128479,grad_norm: 0.9999999384813616, iteration: 104040
loss: 1.1338132619857788,grad_norm: 0.9999993018242727, iteration: 104041
loss: 1.1540803909301758,grad_norm: 0.9999993521977102, iteration: 104042
loss: 1.1305761337280273,grad_norm: 0.9999995550068509, iteration: 104043
loss: 1.1063920259475708,grad_norm: 0.9999993969769245, iteration: 104044
loss: 1.3391468524932861,grad_norm: 0.999999536743458, iteration: 104045
loss: 1.2788058519363403,grad_norm: 0.9999992998147822, iteration: 104046
loss: 1.169142484664917,grad_norm: 0.9999997876262356, iteration: 104047
loss: 1.1557917594909668,grad_norm: 0.920553455621186, iteration: 104048
loss: 1.0211904048919678,grad_norm: 0.9999992175765606, iteration: 104049
loss: 1.2942997217178345,grad_norm: 0.9999999003070515, iteration: 104050
loss: 1.1780896186828613,grad_norm: 0.9999996297091291, iteration: 104051
loss: 1.1144959926605225,grad_norm: 0.9999992011293666, iteration: 104052
loss: 1.2840524911880493,grad_norm: 0.9999999406384343, iteration: 104053
loss: 1.3291457891464233,grad_norm: 0.9999998963434882, iteration: 104054
loss: 1.2246836423873901,grad_norm: 0.9999998182536455, iteration: 104055
loss: 1.179507851600647,grad_norm: 0.9999999845348728, iteration: 104056
loss: 1.2932171821594238,grad_norm: 0.9999996422890682, iteration: 104057
loss: 1.3860948085784912,grad_norm: 0.9999996837331608, iteration: 104058
loss: 1.1876202821731567,grad_norm: 0.9999998411676841, iteration: 104059
loss: 1.264625072479248,grad_norm: 0.9999997822620856, iteration: 104060
loss: 1.2385716438293457,grad_norm: 0.9999996520678356, iteration: 104061
loss: 1.1551131010055542,grad_norm: 1.0000000158449731, iteration: 104062
loss: 1.262552261352539,grad_norm: 0.9999992809745527, iteration: 104063
loss: 1.1381062269210815,grad_norm: 0.9999994080984088, iteration: 104064
loss: 1.221034288406372,grad_norm: 0.9999994758371772, iteration: 104065
loss: 1.2013013362884521,grad_norm: 0.9919171056285727, iteration: 104066
loss: 1.1653038263320923,grad_norm: 0.9999998760990461, iteration: 104067
loss: 1.141698956489563,grad_norm: 0.9999999792643244, iteration: 104068
loss: 1.336435317993164,grad_norm: 0.9999998451325833, iteration: 104069
loss: 1.311952829360962,grad_norm: 0.9999995101414014, iteration: 104070
loss: 1.3200137615203857,grad_norm: 0.9999997654135508, iteration: 104071
loss: 1.2533011436462402,grad_norm: 0.9999999742286619, iteration: 104072
loss: 1.0168370008468628,grad_norm: 0.9999990652911185, iteration: 104073
loss: 1.1256191730499268,grad_norm: 0.9999992406258861, iteration: 104074
loss: 1.1021277904510498,grad_norm: 0.9999991364572324, iteration: 104075
loss: 1.2542531490325928,grad_norm: 0.9999994620354749, iteration: 104076
loss: 1.1615991592407227,grad_norm: 0.9999992477753049, iteration: 104077
loss: 1.3002759218215942,grad_norm: 0.9999996159972991, iteration: 104078
loss: 1.101973295211792,grad_norm: 0.9999997466788808, iteration: 104079
loss: 1.242820143699646,grad_norm: 0.9999991833794276, iteration: 104080
loss: 1.2434378862380981,grad_norm: 0.9999998625814701, iteration: 104081
loss: 1.161616563796997,grad_norm: 0.9999993133179127, iteration: 104082
loss: 1.061985731124878,grad_norm: 0.999999049512671, iteration: 104083
loss: 1.2608695030212402,grad_norm: 0.9999998636443844, iteration: 104084
loss: 1.2605342864990234,grad_norm: 0.999999746116957, iteration: 104085
loss: 1.028067708015442,grad_norm: 0.9769282974557607, iteration: 104086
loss: 1.0704940557479858,grad_norm: 0.9565958241177447, iteration: 104087
loss: 1.129781723022461,grad_norm: 0.9999993124647927, iteration: 104088
loss: 1.2855417728424072,grad_norm: 0.9999992647925317, iteration: 104089
loss: 1.239427924156189,grad_norm: 0.99999978443609, iteration: 104090
loss: 1.3453335762023926,grad_norm: 0.9999994568599604, iteration: 104091
loss: 1.3198915719985962,grad_norm: 0.9999999635948681, iteration: 104092
loss: 1.2984942197799683,grad_norm: 0.9999995593755956, iteration: 104093
loss: 1.1888668537139893,grad_norm: 0.9999998510010043, iteration: 104094
loss: 1.1547728776931763,grad_norm: 0.9999991122889066, iteration: 104095
loss: 1.1855977773666382,grad_norm: 0.9999990941365796, iteration: 104096
loss: 1.2469950914382935,grad_norm: 0.9999996520173254, iteration: 104097
loss: 1.0536377429962158,grad_norm: 0.9990887181175298, iteration: 104098
loss: 1.1881103515625,grad_norm: 0.9999995014796711, iteration: 104099
loss: 1.1664420366287231,grad_norm: 0.9999999448777366, iteration: 104100
loss: 1.1388566493988037,grad_norm: 0.9999993667798397, iteration: 104101
loss: 1.3844940662384033,grad_norm: 0.9999998988723793, iteration: 104102
loss: 1.1135587692260742,grad_norm: 0.9999997761839935, iteration: 104103
loss: 1.208433747291565,grad_norm: 0.9999997650155821, iteration: 104104
loss: 1.2096173763275146,grad_norm: 0.9999994481909411, iteration: 104105
loss: 1.166486144065857,grad_norm: 0.9999996846027146, iteration: 104106
loss: 1.2260143756866455,grad_norm: 0.999999985035923, iteration: 104107
loss: 1.0696831941604614,grad_norm: 0.9999990423422943, iteration: 104108
loss: 1.1185261011123657,grad_norm: 0.9999992688745956, iteration: 104109
loss: 1.3012444972991943,grad_norm: 0.9999996824446057, iteration: 104110
loss: 1.1998447179794312,grad_norm: 0.9999996443495879, iteration: 104111
loss: 1.0647964477539062,grad_norm: 0.9999998044903425, iteration: 104112
loss: 1.2389711141586304,grad_norm: 0.9999995948722618, iteration: 104113
loss: 1.1578450202941895,grad_norm: 0.9999994272927356, iteration: 104114
loss: 1.1053524017333984,grad_norm: 0.9999991883444578, iteration: 104115
loss: 1.1961946487426758,grad_norm: 0.9999995012927483, iteration: 104116
loss: 1.1930949687957764,grad_norm: 0.9999994650984778, iteration: 104117
loss: 1.1789315938949585,grad_norm: 0.9999995081208662, iteration: 104118
loss: 1.2771402597427368,grad_norm: 0.9999995146588297, iteration: 104119
loss: 1.239241361618042,grad_norm: 0.999999728193227, iteration: 104120
loss: 1.171115756034851,grad_norm: 0.9999993196710936, iteration: 104121
loss: 1.2585183382034302,grad_norm: 0.9999992135287299, iteration: 104122
loss: 1.1516848802566528,grad_norm: 0.9999992936096368, iteration: 104123
loss: 1.1100196838378906,grad_norm: 0.9999991733549233, iteration: 104124
loss: 1.0702868700027466,grad_norm: 0.9999993362274924, iteration: 104125
loss: 1.1868318319320679,grad_norm: 0.9999995613131999, iteration: 104126
loss: 1.1769014596939087,grad_norm: 0.9999995418388048, iteration: 104127
loss: 1.2732402086257935,grad_norm: 0.9999998616959189, iteration: 104128
loss: 1.2552517652511597,grad_norm: 0.9999998663772429, iteration: 104129
loss: 1.1318612098693848,grad_norm: 0.9999991037248797, iteration: 104130
loss: 1.7129089832305908,grad_norm: 0.999999937800967, iteration: 104131
loss: 1.1380820274353027,grad_norm: 0.9999997834941309, iteration: 104132
loss: 1.194802165031433,grad_norm: 0.999999657015761, iteration: 104133
loss: 1.1673195362091064,grad_norm: 0.9999995447149735, iteration: 104134
loss: 1.2563308477401733,grad_norm: 0.9999994752107166, iteration: 104135
loss: 1.1569757461547852,grad_norm: 0.9999994205954451, iteration: 104136
loss: 1.3567367792129517,grad_norm: 0.9999994431951525, iteration: 104137
loss: 1.2773325443267822,grad_norm: 0.9999995466856716, iteration: 104138
loss: 1.2100173234939575,grad_norm: 0.9999994687318611, iteration: 104139
loss: 1.2233561277389526,grad_norm: 0.9999997368514643, iteration: 104140
loss: 1.1446406841278076,grad_norm: 0.9359083129318964, iteration: 104141
loss: 1.1395097970962524,grad_norm: 0.9999997093822262, iteration: 104142
loss: 1.3262596130371094,grad_norm: 0.9999999199108541, iteration: 104143
loss: 1.1943554878234863,grad_norm: 0.9999997232423281, iteration: 104144
loss: 1.313729166984558,grad_norm: 0.9999996954562564, iteration: 104145
loss: 1.1131939888000488,grad_norm: 0.9999994834975304, iteration: 104146
loss: 1.1033328771591187,grad_norm: 0.9999997757423094, iteration: 104147
loss: 1.1831905841827393,grad_norm: 0.9999991562270736, iteration: 104148
loss: 1.1342960596084595,grad_norm: 0.9999994280176451, iteration: 104149
loss: 1.111141324043274,grad_norm: 0.9999991441081605, iteration: 104150
loss: 1.544470191001892,grad_norm: 1.000000096665399, iteration: 104151
loss: 1.1326546669006348,grad_norm: 0.9999997555283109, iteration: 104152
loss: 1.09735107421875,grad_norm: 0.9999991565724811, iteration: 104153
loss: 1.1113227605819702,grad_norm: 0.999999682423759, iteration: 104154
loss: 1.2211424112319946,grad_norm: 0.9999998671782259, iteration: 104155
loss: 1.417641520500183,grad_norm: 0.9999998361763892, iteration: 104156
loss: 1.1327883005142212,grad_norm: 0.9999990730474726, iteration: 104157
loss: 1.1367994546890259,grad_norm: 0.9348884695026458, iteration: 104158
loss: 1.22602379322052,grad_norm: 1.0000000118119112, iteration: 104159
loss: 1.086154818534851,grad_norm: 0.9999989971725857, iteration: 104160
loss: 1.1828325986862183,grad_norm: 0.9999998598775125, iteration: 104161
loss: 1.2859245538711548,grad_norm: 0.9999992282898345, iteration: 104162
loss: 1.0572296380996704,grad_norm: 0.9999993919364498, iteration: 104163
loss: 1.2744464874267578,grad_norm: 1.0000000332338717, iteration: 104164
loss: 1.2517589330673218,grad_norm: 0.9999998918415486, iteration: 104165
loss: 1.3988040685653687,grad_norm: 0.9999994693122486, iteration: 104166
loss: 1.1390419006347656,grad_norm: 0.9999992008622453, iteration: 104167
loss: 1.151158094406128,grad_norm: 0.999999867190234, iteration: 104168
loss: 1.1622765064239502,grad_norm: 0.9999998112239216, iteration: 104169
loss: 1.1392496824264526,grad_norm: 0.999999913962146, iteration: 104170
loss: 1.2416768074035645,grad_norm: 0.9999993615272202, iteration: 104171
loss: 1.2223997116088867,grad_norm: 0.9999992444073882, iteration: 104172
loss: 1.361091136932373,grad_norm: 0.999999518981613, iteration: 104173
loss: 1.22995924949646,grad_norm: 0.9999995350205853, iteration: 104174
loss: 1.1768945455551147,grad_norm: 0.9999998597480181, iteration: 104175
loss: 1.2246880531311035,grad_norm: 0.9999998092365053, iteration: 104176
loss: 1.1631313562393188,grad_norm: 0.9999998937067522, iteration: 104177
loss: 1.193183422088623,grad_norm: 0.9999992733192371, iteration: 104178
loss: 1.302451729774475,grad_norm: 0.9999998443870413, iteration: 104179
loss: 1.0680527687072754,grad_norm: 0.999999206479557, iteration: 104180
loss: 1.1099631786346436,grad_norm: 0.9999996274699198, iteration: 104181
loss: 1.185921549797058,grad_norm: 0.9999990690244495, iteration: 104182
loss: 1.1482713222503662,grad_norm: 0.9999995611648668, iteration: 104183
loss: 1.188310146331787,grad_norm: 0.9999993866439286, iteration: 104184
loss: 1.0831259489059448,grad_norm: 0.9999991706543331, iteration: 104185
loss: 1.1702927350997925,grad_norm: 0.9999994343503016, iteration: 104186
loss: 1.0897998809814453,grad_norm: 0.9999997719040669, iteration: 104187
loss: 1.1045323610305786,grad_norm: 0.9999990619034281, iteration: 104188
loss: 1.173548936843872,grad_norm: 0.8983066986041149, iteration: 104189
loss: 1.2813078165054321,grad_norm: 0.9999996188447533, iteration: 104190
loss: 1.1680561304092407,grad_norm: 0.9999991233125162, iteration: 104191
loss: 1.153066873550415,grad_norm: 0.9999989851316022, iteration: 104192
loss: 1.1079777479171753,grad_norm: 0.9999990436911094, iteration: 104193
loss: 1.1145933866500854,grad_norm: 0.9999994166687504, iteration: 104194
loss: 1.0836982727050781,grad_norm: 0.8350648104668765, iteration: 104195
loss: 1.2232847213745117,grad_norm: 0.9999995233255301, iteration: 104196
loss: 1.1806243658065796,grad_norm: 0.9999998721162078, iteration: 104197
loss: 1.1972503662109375,grad_norm: 0.9999991184180235, iteration: 104198
loss: 1.0958731174468994,grad_norm: 0.9535310250121021, iteration: 104199
loss: 1.0770121812820435,grad_norm: 0.999999786998941, iteration: 104200
loss: 1.0452841520309448,grad_norm: 0.9782129056603663, iteration: 104201
loss: 1.232785701751709,grad_norm: 0.9999994672819471, iteration: 104202
loss: 1.122084140777588,grad_norm: 0.999999264926877, iteration: 104203
loss: 1.095269799232483,grad_norm: 0.9999993908500974, iteration: 104204
loss: 1.1264979839324951,grad_norm: 0.9999990246439268, iteration: 104205
loss: 1.1151726245880127,grad_norm: 0.9999991122066091, iteration: 104206
loss: 1.2956905364990234,grad_norm: 0.9999994192075188, iteration: 104207
loss: 1.1264585256576538,grad_norm: 0.9999992091318207, iteration: 104208
loss: 1.1214686632156372,grad_norm: 0.9999994985904604, iteration: 104209
loss: 1.2000876665115356,grad_norm: 0.9999998564213296, iteration: 104210
loss: 1.0596299171447754,grad_norm: 0.9999992252966107, iteration: 104211
loss: 1.338546872138977,grad_norm: 0.9999996124381507, iteration: 104212
loss: 1.2938926219940186,grad_norm: 0.9999995700262553, iteration: 104213
loss: 1.2215865850448608,grad_norm: 0.9999994711772905, iteration: 104214
loss: 1.174648642539978,grad_norm: 0.999999226437133, iteration: 104215
loss: 1.3953801393508911,grad_norm: 0.999999555323736, iteration: 104216
loss: 1.2586240768432617,grad_norm: 0.999999749183261, iteration: 104217
loss: 1.1732476949691772,grad_norm: 0.9999999311875443, iteration: 104218
loss: 1.1907330751419067,grad_norm: 0.9999992517583272, iteration: 104219
loss: 1.1215540170669556,grad_norm: 0.9999991211081055, iteration: 104220
loss: 1.221082091331482,grad_norm: 0.9999994952068932, iteration: 104221
loss: 1.1156084537506104,grad_norm: 0.9999998659445598, iteration: 104222
loss: 1.04286789894104,grad_norm: 0.9999993473107172, iteration: 104223
loss: 1.1437599658966064,grad_norm: 0.9999991989728182, iteration: 104224
loss: 1.1078063249588013,grad_norm: 0.999999437215309, iteration: 104225
loss: 1.1060820817947388,grad_norm: 1.0000000521663268, iteration: 104226
loss: 1.1980631351470947,grad_norm: 0.9999996468757043, iteration: 104227
loss: 1.3436720371246338,grad_norm: 0.999999858143614, iteration: 104228
loss: 1.1828570365905762,grad_norm: 0.999999815059033, iteration: 104229
loss: 1.1512551307678223,grad_norm: 0.9999993928568189, iteration: 104230
loss: 1.1152942180633545,grad_norm: 0.999999202719414, iteration: 104231
loss: 1.074570894241333,grad_norm: 0.999999688858715, iteration: 104232
loss: 1.0973944664001465,grad_norm: 0.9999993385395901, iteration: 104233
loss: 1.399168848991394,grad_norm: 0.9999997954853485, iteration: 104234
loss: 1.2855606079101562,grad_norm: 0.9999999390378006, iteration: 104235
loss: 1.126940369606018,grad_norm: 0.9999997890019368, iteration: 104236
loss: 1.2049448490142822,grad_norm: 0.999999482691329, iteration: 104237
loss: 1.1348121166229248,grad_norm: 0.9999996690845854, iteration: 104238
loss: 1.222069501876831,grad_norm: 0.9999991268901121, iteration: 104239
loss: 1.213166356086731,grad_norm: 0.9999994606074252, iteration: 104240
loss: 1.189875602722168,grad_norm: 0.9999992435432259, iteration: 104241
loss: 1.3298853635787964,grad_norm: 0.9999996562401929, iteration: 104242
loss: 1.2709602117538452,grad_norm: 0.999999769581926, iteration: 104243
loss: 1.3160982131958008,grad_norm: 0.999999876100637, iteration: 104244
loss: 1.1193910837173462,grad_norm: 0.9999997398125628, iteration: 104245
loss: 1.2571920156478882,grad_norm: 0.999999693193045, iteration: 104246
loss: 1.1548888683319092,grad_norm: 0.9999991998249326, iteration: 104247
loss: 1.2997511625289917,grad_norm: 0.9999998335160462, iteration: 104248
loss: 1.2429026365280151,grad_norm: 0.9999999998369034, iteration: 104249
loss: 1.169512152671814,grad_norm: 0.9999995667665887, iteration: 104250
loss: 1.2891693115234375,grad_norm: 0.9999994937288146, iteration: 104251
loss: 1.17182195186615,grad_norm: 0.9999993700805658, iteration: 104252
loss: 1.2441679239273071,grad_norm: 0.9999995702012155, iteration: 104253
loss: 1.1596028804779053,grad_norm: 0.9999995174032815, iteration: 104254
loss: 1.0701433420181274,grad_norm: 0.9999992134200333, iteration: 104255
loss: 1.1350054740905762,grad_norm: 0.9999993666885794, iteration: 104256
loss: 1.172398567199707,grad_norm: 0.9999996775956239, iteration: 104257
loss: 1.1195166110992432,grad_norm: 0.9999992414455228, iteration: 104258
loss: 1.2717320919036865,grad_norm: 0.9999994578023446, iteration: 104259
loss: 1.2553095817565918,grad_norm: 0.9999993722341362, iteration: 104260
loss: 1.3035297393798828,grad_norm: 0.9999998139748301, iteration: 104261
loss: 1.249989628791809,grad_norm: 0.9999995099644703, iteration: 104262
loss: 1.2078708410263062,grad_norm: 0.9999998153926821, iteration: 104263
loss: 1.1898528337478638,grad_norm: 0.9999999337324076, iteration: 104264
loss: 1.2159974575042725,grad_norm: 0.999999899937655, iteration: 104265
loss: 1.2613261938095093,grad_norm: 0.9999994682466045, iteration: 104266
loss: 1.2123783826828003,grad_norm: 0.9999997026858959, iteration: 104267
loss: 1.1378403902053833,grad_norm: 0.999999110519621, iteration: 104268
loss: 1.283666968345642,grad_norm: 0.9999999157268614, iteration: 104269
loss: 1.2380338907241821,grad_norm: 0.9999998795638044, iteration: 104270
loss: 1.2369712591171265,grad_norm: 0.9999998271759855, iteration: 104271
loss: 1.2522408962249756,grad_norm: 0.9999996551654214, iteration: 104272
loss: 1.2284079790115356,grad_norm: 1.0000000432660978, iteration: 104273
loss: 1.2320494651794434,grad_norm: 0.999999580545314, iteration: 104274
loss: 1.2031205892562866,grad_norm: 0.9999997587765592, iteration: 104275
loss: 1.3891018629074097,grad_norm: 0.9999992538332104, iteration: 104276
loss: 1.2097331285476685,grad_norm: 0.9999995783400046, iteration: 104277
loss: 1.2204854488372803,grad_norm: 0.9999995524704374, iteration: 104278
loss: 1.1176984310150146,grad_norm: 0.9999993829509296, iteration: 104279
loss: 1.4294692277908325,grad_norm: 0.9999995309053954, iteration: 104280
loss: 1.0573811531066895,grad_norm: 0.9999997660679995, iteration: 104281
loss: 1.1484310626983643,grad_norm: 0.9999995895239685, iteration: 104282
loss: 1.2533886432647705,grad_norm: 0.9999998780182611, iteration: 104283
loss: 1.1743313074111938,grad_norm: 0.9999991898985258, iteration: 104284
loss: 1.0612820386886597,grad_norm: 0.9999992057697454, iteration: 104285
loss: 1.156476378440857,grad_norm: 0.9999992909881344, iteration: 104286
loss: 1.1398749351501465,grad_norm: 0.9999989327080739, iteration: 104287
loss: 1.1830028295516968,grad_norm: 0.9999998052734754, iteration: 104288
loss: 1.1146552562713623,grad_norm: 0.9999992587729867, iteration: 104289
loss: 1.0898698568344116,grad_norm: 0.9999992221074852, iteration: 104290
loss: 1.185978889465332,grad_norm: 0.9999997650556471, iteration: 104291
loss: 1.1752197742462158,grad_norm: 0.9999999368162534, iteration: 104292
loss: 1.2327818870544434,grad_norm: 0.9999993585129028, iteration: 104293
loss: 1.1135510206222534,grad_norm: 0.9999991931945403, iteration: 104294
loss: 1.1919320821762085,grad_norm: 0.9999991918456481, iteration: 104295
loss: 1.1912851333618164,grad_norm: 0.9999997732510668, iteration: 104296
loss: 1.243869662284851,grad_norm: 0.9999994398311663, iteration: 104297
loss: 1.2016549110412598,grad_norm: 0.9999996234198342, iteration: 104298
loss: 1.0792969465255737,grad_norm: 0.9999995945202443, iteration: 104299
loss: 1.230257511138916,grad_norm: 0.9999999259750116, iteration: 104300
loss: 1.3073934316635132,grad_norm: 0.9999996984602831, iteration: 104301
loss: 1.1637556552886963,grad_norm: 0.9999993126497558, iteration: 104302
loss: 1.0351276397705078,grad_norm: 0.9855631007759659, iteration: 104303
loss: 1.1285533905029297,grad_norm: 0.9999996399090809, iteration: 104304
loss: 1.1540600061416626,grad_norm: 0.9999997240635852, iteration: 104305
loss: 1.1869583129882812,grad_norm: 0.9999999572770778, iteration: 104306
loss: 1.088068962097168,grad_norm: 0.9999998215022301, iteration: 104307
loss: 1.1067835092544556,grad_norm: 0.9999992576095837, iteration: 104308
loss: 1.2252384424209595,grad_norm: 0.9999999490338236, iteration: 104309
loss: 1.1786456108093262,grad_norm: 0.9999996496427035, iteration: 104310
loss: 1.3306069374084473,grad_norm: 0.9999999167608039, iteration: 104311
loss: 1.2268747091293335,grad_norm: 0.999999744601781, iteration: 104312
loss: 1.2902255058288574,grad_norm: 0.9999993993404087, iteration: 104313
loss: 1.739115595817566,grad_norm: 0.9999999139969633, iteration: 104314
loss: 1.1918317079544067,grad_norm: 0.9999992201755125, iteration: 104315
loss: 1.20158851146698,grad_norm: 0.9999993641599145, iteration: 104316
loss: 1.1406763792037964,grad_norm: 0.9999993274030998, iteration: 104317
loss: 1.1548140048980713,grad_norm: 0.9999998114371652, iteration: 104318
loss: 1.1288305521011353,grad_norm: 0.999999486737461, iteration: 104319
loss: 1.2321076393127441,grad_norm: 0.9999991316780142, iteration: 104320
loss: 1.096750020980835,grad_norm: 0.9999994857437974, iteration: 104321
loss: 1.116362452507019,grad_norm: 0.9999991950511177, iteration: 104322
loss: 1.4724829196929932,grad_norm: 0.9999994579652344, iteration: 104323
loss: 1.2198820114135742,grad_norm: 0.9999997261136999, iteration: 104324
loss: 1.3009402751922607,grad_norm: 0.9999998402620394, iteration: 104325
loss: 1.1403254270553589,grad_norm: 0.999999366144613, iteration: 104326
loss: 1.3002111911773682,grad_norm: 0.9999996730729075, iteration: 104327
loss: 1.135335087776184,grad_norm: 0.9999993824120972, iteration: 104328
loss: 1.2328839302062988,grad_norm: 0.9999998627948037, iteration: 104329
loss: 1.1990687847137451,grad_norm: 0.999999846348563, iteration: 104330
loss: 1.1959699392318726,grad_norm: 0.9999999476065359, iteration: 104331
loss: 1.202878713607788,grad_norm: 0.9999994277446649, iteration: 104332
loss: 1.2378147840499878,grad_norm: 0.9999998027269167, iteration: 104333
loss: 1.1595454216003418,grad_norm: 0.9999996344313457, iteration: 104334
loss: 1.2913869619369507,grad_norm: 0.9999992598398768, iteration: 104335
loss: 1.655462384223938,grad_norm: 1.0000000049104234, iteration: 104336
loss: 1.2785885334014893,grad_norm: 0.9999998461278108, iteration: 104337
loss: 1.2311558723449707,grad_norm: 0.9999993316975708, iteration: 104338
loss: 1.202210545539856,grad_norm: 0.9999998380044989, iteration: 104339
loss: 1.1441224813461304,grad_norm: 0.9999991247553977, iteration: 104340
loss: 1.231771469116211,grad_norm: 0.9999996421292455, iteration: 104341
loss: 1.118769884109497,grad_norm: 0.9999991157307115, iteration: 104342
loss: 1.1559680700302124,grad_norm: 0.9999996453076364, iteration: 104343
loss: 1.2367982864379883,grad_norm: 1.0000000340875743, iteration: 104344
loss: 1.3426344394683838,grad_norm: 0.9999999249963414, iteration: 104345
loss: 1.3326284885406494,grad_norm: 0.9999999162295248, iteration: 104346
loss: 1.3421237468719482,grad_norm: 0.9999998533376293, iteration: 104347
loss: 1.3097639083862305,grad_norm: 0.9999998025923296, iteration: 104348
loss: 1.2861919403076172,grad_norm: 0.999999581904987, iteration: 104349
loss: 1.1028661727905273,grad_norm: 0.9999992795523851, iteration: 104350
loss: 1.2452237606048584,grad_norm: 0.9999998817843475, iteration: 104351
loss: 1.2659145593643188,grad_norm: 0.999999947777734, iteration: 104352
loss: 1.1109570264816284,grad_norm: 0.9999999354872952, iteration: 104353
loss: 1.1835514307022095,grad_norm: 0.9999990857966383, iteration: 104354
loss: 1.146966576576233,grad_norm: 0.9999993907671043, iteration: 104355
loss: 1.1451834440231323,grad_norm: 0.9999996626841645, iteration: 104356
loss: 1.2921005487442017,grad_norm: 0.9999998598909368, iteration: 104357
loss: 1.201712727546692,grad_norm: 0.9999992448181736, iteration: 104358
loss: 1.3306204080581665,grad_norm: 0.9999996831388749, iteration: 104359
loss: 1.1461554765701294,grad_norm: 0.9999997551092182, iteration: 104360
loss: 1.2953357696533203,grad_norm: 0.9999994753853767, iteration: 104361
loss: 1.258118987083435,grad_norm: 0.9999997589451345, iteration: 104362
loss: 1.3227038383483887,grad_norm: 0.9999999307751728, iteration: 104363
loss: 1.19749915599823,grad_norm: 0.9999998462022959, iteration: 104364
loss: 1.2974982261657715,grad_norm: 0.9999996181805562, iteration: 104365
loss: 1.2691110372543335,grad_norm: 0.9999997715711713, iteration: 104366
loss: 1.3705098628997803,grad_norm: 0.9999999542890964, iteration: 104367
loss: 1.375022053718567,grad_norm: 0.9999998971029643, iteration: 104368
loss: 1.2804549932479858,grad_norm: 0.9999994982266032, iteration: 104369
loss: 1.067290186882019,grad_norm: 0.9999991853218085, iteration: 104370
loss: 1.362165927886963,grad_norm: 0.9999997597243412, iteration: 104371
loss: 1.248482346534729,grad_norm: 0.9999998359533339, iteration: 104372
loss: 1.2386696338653564,grad_norm: 0.9999998182185247, iteration: 104373
loss: 1.2321853637695312,grad_norm: 0.9999998957464493, iteration: 104374
loss: 1.3126810789108276,grad_norm: 0.9999995310035493, iteration: 104375
loss: 1.182665467262268,grad_norm: 1.0000000455317966, iteration: 104376
loss: 1.3479876518249512,grad_norm: 0.9999999979966031, iteration: 104377
loss: 1.2080215215682983,grad_norm: 0.9999999089747861, iteration: 104378
loss: 1.2881382703781128,grad_norm: 0.9999995766106934, iteration: 104379
loss: 1.2754545211791992,grad_norm: 0.9999998913290525, iteration: 104380
loss: 1.3184603452682495,grad_norm: 0.9999999712901108, iteration: 104381
loss: 1.4152557849884033,grad_norm: 0.9999995415365052, iteration: 104382
loss: 1.2702429294586182,grad_norm: 0.999999941358353, iteration: 104383
loss: 1.2453080415725708,grad_norm: 0.9999996970303497, iteration: 104384
loss: 1.296167016029358,grad_norm: 0.9999999530716328, iteration: 104385
loss: 1.3019475936889648,grad_norm: 0.9999997044833352, iteration: 104386
loss: 1.2547537088394165,grad_norm: 0.9999994954151217, iteration: 104387
loss: 1.3653053045272827,grad_norm: 1.0000000325863845, iteration: 104388
loss: 1.376017451286316,grad_norm: 0.9999995345435426, iteration: 104389
loss: 1.2329721450805664,grad_norm: 0.9999996639753671, iteration: 104390
loss: 1.270329475402832,grad_norm: 0.9999995647366035, iteration: 104391
loss: 1.1106194257736206,grad_norm: 0.999999793300621, iteration: 104392
loss: 1.3375804424285889,grad_norm: 0.9999996894994069, iteration: 104393
loss: 1.1139411926269531,grad_norm: 0.999999859339934, iteration: 104394
loss: 1.238623023033142,grad_norm: 0.9999998065696835, iteration: 104395
loss: 1.2096871137619019,grad_norm: 0.9999996547472969, iteration: 104396
loss: 1.517366886138916,grad_norm: 0.9999999275549221, iteration: 104397
loss: 1.3191580772399902,grad_norm: 0.9999998977228632, iteration: 104398
loss: 1.3108986616134644,grad_norm: 0.9999997022240183, iteration: 104399
loss: 1.3172330856323242,grad_norm: 0.9999996376156284, iteration: 104400
loss: 1.2825493812561035,grad_norm: 0.9999999230275043, iteration: 104401
loss: 1.245550513267517,grad_norm: 0.9999998597729576, iteration: 104402
loss: 1.2780585289001465,grad_norm: 0.9999998925694616, iteration: 104403
loss: 1.4351297616958618,grad_norm: 0.9999998209114416, iteration: 104404
loss: 1.3515739440917969,grad_norm: 0.9999998647776531, iteration: 104405
loss: 1.477613091468811,grad_norm: 0.9999998795434, iteration: 104406
loss: 1.6667301654815674,grad_norm: 0.9999998372203198, iteration: 104407
loss: 1.4683568477630615,grad_norm: 0.9999999139231891, iteration: 104408
loss: 1.138800859451294,grad_norm: 0.9999994946450573, iteration: 104409
loss: 1.3445889949798584,grad_norm: 0.9999999874058462, iteration: 104410
loss: 1.2215752601623535,grad_norm: 0.9999995554836355, iteration: 104411
loss: 1.2497131824493408,grad_norm: 0.9999993978742445, iteration: 104412
loss: 1.367206335067749,grad_norm: 0.9999999408988477, iteration: 104413
loss: 1.5228828191757202,grad_norm: 0.9999996207056675, iteration: 104414
loss: 1.157372236251831,grad_norm: 0.9999993684674915, iteration: 104415
loss: 1.3294845819473267,grad_norm: 0.9999997533385658, iteration: 104416
loss: 1.361740231513977,grad_norm: 0.9999996935840266, iteration: 104417
loss: 1.2506110668182373,grad_norm: 0.9999998482653315, iteration: 104418
loss: 1.2774147987365723,grad_norm: 0.9999999284477186, iteration: 104419
loss: 1.1777839660644531,grad_norm: 0.9999991971212447, iteration: 104420
loss: 1.1018832921981812,grad_norm: 0.9999997019217934, iteration: 104421
loss: 1.3617801666259766,grad_norm: 0.9999996850031342, iteration: 104422
loss: 1.3618512153625488,grad_norm: 0.9999998776651701, iteration: 104423
loss: 1.4423609972000122,grad_norm: 0.999999742121768, iteration: 104424
loss: 1.3440724611282349,grad_norm: 0.9999998662374071, iteration: 104425
loss: 1.1536630392074585,grad_norm: 0.9999993107893969, iteration: 104426
loss: 1.441311001777649,grad_norm: 0.999999865566448, iteration: 104427
loss: 1.177488088607788,grad_norm: 0.9999994967374327, iteration: 104428
loss: 1.138951063156128,grad_norm: 0.9999998142666652, iteration: 104429
loss: 1.4138104915618896,grad_norm: 1.0000000822796253, iteration: 104430
loss: 1.1930716037750244,grad_norm: 1.0000000257448276, iteration: 104431
loss: 1.2345303297042847,grad_norm: 0.9999996860048224, iteration: 104432
loss: 1.3086676597595215,grad_norm: 0.9999995319381582, iteration: 104433
loss: 1.4096988439559937,grad_norm: 0.9999998295018907, iteration: 104434
loss: 1.1967873573303223,grad_norm: 0.9999995825912, iteration: 104435
loss: 1.2668299674987793,grad_norm: 0.9999993443367655, iteration: 104436
loss: 1.2266422510147095,grad_norm: 0.9999997103161464, iteration: 104437
loss: 1.4054139852523804,grad_norm: 1.0000000769862263, iteration: 104438
loss: 1.2666807174682617,grad_norm: 0.9999998758385662, iteration: 104439
loss: 1.3897583484649658,grad_norm: 0.9999999238414721, iteration: 104440
loss: 1.2081773281097412,grad_norm: 0.9999998301758483, iteration: 104441
loss: 1.2733180522918701,grad_norm: 1.0000000246817644, iteration: 104442
loss: 1.4058632850646973,grad_norm: 0.9999999008179271, iteration: 104443
loss: 1.2968459129333496,grad_norm: 0.9999999597674462, iteration: 104444
loss: 1.3637763261795044,grad_norm: 0.9999999882558854, iteration: 104445
loss: 1.2659720182418823,grad_norm: 0.999999705830033, iteration: 104446
loss: 1.3277578353881836,grad_norm: 0.9999994575115433, iteration: 104447
loss: 1.192282795906067,grad_norm: 0.9999999947592962, iteration: 104448
loss: 1.2889745235443115,grad_norm: 0.9999999478516238, iteration: 104449
loss: 1.2260764837265015,grad_norm: 0.9999999558013652, iteration: 104450
loss: 1.231437087059021,grad_norm: 0.9999996721446254, iteration: 104451
loss: 1.342077374458313,grad_norm: 0.9999996353036806, iteration: 104452
loss: 1.2011927366256714,grad_norm: 0.9999998978214664, iteration: 104453
loss: 1.1956195831298828,grad_norm: 0.9999996435133037, iteration: 104454
loss: 1.2335517406463623,grad_norm: 1.0000000130212816, iteration: 104455
loss: 1.132570505142212,grad_norm: 0.9999995031927958, iteration: 104456
loss: 1.3895659446716309,grad_norm: 1.0000000588556133, iteration: 104457
loss: 1.2244755029678345,grad_norm: 0.999999804182255, iteration: 104458
loss: 1.3953449726104736,grad_norm: 0.9999999259003007, iteration: 104459
loss: 1.144073486328125,grad_norm: 0.9999996653968781, iteration: 104460
loss: 1.0558712482452393,grad_norm: 0.9999994879154085, iteration: 104461
loss: 1.3333582878112793,grad_norm: 0.9999998216721016, iteration: 104462
loss: 1.1466472148895264,grad_norm: 0.9999995637633786, iteration: 104463
loss: 1.2942358255386353,grad_norm: 0.9999997314313517, iteration: 104464
loss: 1.2433534860610962,grad_norm: 1.000000057081654, iteration: 104465
loss: 1.2877811193466187,grad_norm: 0.9999999668327163, iteration: 104466
loss: 1.4137877225875854,grad_norm: 0.9999998895741004, iteration: 104467
loss: 1.1389565467834473,grad_norm: 0.9999998052506625, iteration: 104468
loss: 1.2397804260253906,grad_norm: 0.9999999026431947, iteration: 104469
loss: 1.4138389825820923,grad_norm: 0.9999998959643106, iteration: 104470
loss: 1.2844990491867065,grad_norm: 0.9999998113684617, iteration: 104471
loss: 1.201346516609192,grad_norm: 0.9999996075768197, iteration: 104472
loss: 1.280455231666565,grad_norm: 0.9999996586577083, iteration: 104473
loss: 1.2096247673034668,grad_norm: 0.9999998775781826, iteration: 104474
loss: 1.1557800769805908,grad_norm: 0.9999997942709685, iteration: 104475
loss: 1.1942963600158691,grad_norm: 0.9999995667567302, iteration: 104476
loss: 1.2978507280349731,grad_norm: 0.9999998280683629, iteration: 104477
loss: 1.2770780324935913,grad_norm: 0.9999997063185705, iteration: 104478
loss: 1.3318253755569458,grad_norm: 0.9999998049535739, iteration: 104479
loss: 1.5764029026031494,grad_norm: 1.000000002276604, iteration: 104480
loss: 1.3752341270446777,grad_norm: 0.9999998567364355, iteration: 104481
loss: 1.4875507354736328,grad_norm: 0.9999998226529639, iteration: 104482
loss: 1.2841941118240356,grad_norm: 0.9999999008351992, iteration: 104483
loss: 1.3321492671966553,grad_norm: 1.0000000234849882, iteration: 104484
loss: 1.7328391075134277,grad_norm: 0.9999997430171077, iteration: 104485
loss: 1.2196282148361206,grad_norm: 0.9999996926318637, iteration: 104486
loss: 1.2208648920059204,grad_norm: 0.9999994002166803, iteration: 104487
loss: 1.208293080329895,grad_norm: 0.9999999524255934, iteration: 104488
loss: 1.4732083082199097,grad_norm: 0.9999998936674996, iteration: 104489
loss: 1.3626450300216675,grad_norm: 0.9999999748459565, iteration: 104490
loss: 1.239294171333313,grad_norm: 0.9999998297582594, iteration: 104491
loss: 1.3293298482894897,grad_norm: 0.9999998164358286, iteration: 104492
loss: 1.6929222345352173,grad_norm: 0.9999999873832905, iteration: 104493
loss: 1.406441330909729,grad_norm: 0.9999999853759582, iteration: 104494
loss: 1.3060873746871948,grad_norm: 0.9999999280938074, iteration: 104495
loss: 1.245345115661621,grad_norm: 0.999999995123087, iteration: 104496
loss: 1.2750773429870605,grad_norm: 0.9999999263401645, iteration: 104497
loss: 1.3441884517669678,grad_norm: 0.9999997617306224, iteration: 104498
loss: 1.4675445556640625,grad_norm: 0.9999999488718329, iteration: 104499
loss: 1.4251817464828491,grad_norm: 1.0000001036185588, iteration: 104500
loss: 1.772330403327942,grad_norm: 0.9999999572394006, iteration: 104501
loss: 1.293407678604126,grad_norm: 0.9999998360181881, iteration: 104502
loss: 1.298520565032959,grad_norm: 0.9999998372178696, iteration: 104503
loss: 1.1631009578704834,grad_norm: 0.9999995496695361, iteration: 104504
loss: 1.241470217704773,grad_norm: 0.9999996686445444, iteration: 104505
loss: 1.2293988466262817,grad_norm: 0.999999409015766, iteration: 104506
loss: 1.2218221426010132,grad_norm: 0.9999998998489387, iteration: 104507
loss: 1.237800121307373,grad_norm: 0.9999993929025321, iteration: 104508
loss: 1.1873856782913208,grad_norm: 0.9999999233319188, iteration: 104509
loss: 1.2590466737747192,grad_norm: 0.9999999917203124, iteration: 104510
loss: 1.2635917663574219,grad_norm: 0.9999996898341217, iteration: 104511
loss: 1.2707018852233887,grad_norm: 0.9999998844340442, iteration: 104512
loss: 1.3087091445922852,grad_norm: 0.9999996984906226, iteration: 104513
loss: 1.2718181610107422,grad_norm: 0.9999999043235682, iteration: 104514
loss: 1.2821636199951172,grad_norm: 0.9999998558283437, iteration: 104515
loss: 1.1314692497253418,grad_norm: 0.9999991135807373, iteration: 104516
loss: 1.1679472923278809,grad_norm: 0.9999999092194916, iteration: 104517
loss: 1.311143159866333,grad_norm: 0.9999998357121682, iteration: 104518
loss: 1.132245421409607,grad_norm: 0.999999277207661, iteration: 104519
loss: 1.0612152814865112,grad_norm: 0.9999990596738506, iteration: 104520
loss: 1.1214756965637207,grad_norm: 0.9999999187351475, iteration: 104521
loss: 1.3556197881698608,grad_norm: 0.9999997050539547, iteration: 104522
loss: 1.1948306560516357,grad_norm: 0.9999994175309082, iteration: 104523
loss: 1.1277607679367065,grad_norm: 0.9999992803556859, iteration: 104524
loss: 1.2775486707687378,grad_norm: 0.9999997688705223, iteration: 104525
loss: 1.0975596904754639,grad_norm: 0.9999995799901432, iteration: 104526
loss: 1.2188045978546143,grad_norm: 0.9999993493994704, iteration: 104527
loss: 1.0926350355148315,grad_norm: 0.9999991658439544, iteration: 104528
loss: 1.2965803146362305,grad_norm: 0.9999997633704288, iteration: 104529
loss: 1.084218144416809,grad_norm: 0.999999629083414, iteration: 104530
loss: 1.062641978263855,grad_norm: 0.9999990529028782, iteration: 104531
loss: 1.1806904077529907,grad_norm: 0.999999898005317, iteration: 104532
loss: 1.0218284130096436,grad_norm: 0.9999997612437489, iteration: 104533
loss: 1.0098038911819458,grad_norm: 0.999999202148812, iteration: 104534
loss: 1.0610004663467407,grad_norm: 0.9999999253924429, iteration: 104535
loss: 1.1299740076065063,grad_norm: 0.9999998714309036, iteration: 104536
loss: 1.117081642150879,grad_norm: 0.9999996498469548, iteration: 104537
loss: 1.2526518106460571,grad_norm: 0.9999996568312068, iteration: 104538
loss: 0.9971709251403809,grad_norm: 0.9516524814351682, iteration: 104539
loss: 1.194132924079895,grad_norm: 0.9999999802406988, iteration: 104540
loss: 1.0353248119354248,grad_norm: 0.9999996607430078, iteration: 104541
loss: 1.0337730646133423,grad_norm: 0.9999992623573349, iteration: 104542
loss: 1.1819615364074707,grad_norm: 0.9999995972420772, iteration: 104543
loss: 1.1433725357055664,grad_norm: 0.9999992202914874, iteration: 104544
loss: 1.0730007886886597,grad_norm: 0.9999990603415917, iteration: 104545
loss: 1.067853331565857,grad_norm: 0.9999991157171328, iteration: 104546
loss: 1.3469592332839966,grad_norm: 0.9999999494490575, iteration: 104547
loss: 1.0620274543762207,grad_norm: 0.9999993128696135, iteration: 104548
loss: 1.1765244007110596,grad_norm: 0.9999993287776936, iteration: 104549
loss: 1.0895965099334717,grad_norm: 0.9999994409942848, iteration: 104550
loss: 1.127813696861267,grad_norm: 0.9999993671246219, iteration: 104551
loss: 1.1334275007247925,grad_norm: 0.9999994072379178, iteration: 104552
loss: 1.1013792753219604,grad_norm: 0.9999999906257778, iteration: 104553
loss: 0.9250078797340393,grad_norm: 0.8879884195839635, iteration: 104554
loss: 1.1862623691558838,grad_norm: 0.999999688955224, iteration: 104555
loss: 1.0312597751617432,grad_norm: 0.9999995923068545, iteration: 104556
loss: 1.0559005737304688,grad_norm: 0.9999994315771343, iteration: 104557
loss: 1.2730497121810913,grad_norm: 0.9999996842925024, iteration: 104558
loss: 1.2036010026931763,grad_norm: 0.9999996719116547, iteration: 104559
loss: 1.1712538003921509,grad_norm: 0.9999995859772272, iteration: 104560
loss: 1.231130838394165,grad_norm: 0.9999999861476139, iteration: 104561
loss: 1.174443244934082,grad_norm: 0.9999991900502623, iteration: 104562
loss: 1.1220369338989258,grad_norm: 1.0000000620753176, iteration: 104563
loss: 1.3046212196350098,grad_norm: 0.999999786803782, iteration: 104564
loss: 1.5519152879714966,grad_norm: 1.0000000073752162, iteration: 104565
loss: 1.2347536087036133,grad_norm: 0.9999993897764736, iteration: 104566
loss: 1.3389226198196411,grad_norm: 1.0000000577994645, iteration: 104567
loss: 1.6003714799880981,grad_norm: 0.9999997618212031, iteration: 104568
loss: 1.7265187501907349,grad_norm: 0.9999995802131127, iteration: 104569
loss: 1.7019555568695068,grad_norm: 0.9999995179385613, iteration: 104570
loss: 1.396584391593933,grad_norm: 0.9999995297893381, iteration: 104571
loss: 1.804610013961792,grad_norm: 0.9999998260306417, iteration: 104572
loss: 1.402053952217102,grad_norm: 1.0000000146189585, iteration: 104573
loss: 1.394981026649475,grad_norm: 0.9999998165126127, iteration: 104574
loss: 1.4242349863052368,grad_norm: 0.9999994644943666, iteration: 104575
loss: 1.5911579132080078,grad_norm: 0.9999998553363756, iteration: 104576
loss: 1.4263269901275635,grad_norm: 0.9999995501794331, iteration: 104577
loss: 1.510561227798462,grad_norm: 0.9999998267770314, iteration: 104578
loss: 1.390384316444397,grad_norm: 0.9999995773168112, iteration: 104579
loss: 1.5538384914398193,grad_norm: 0.9999998477004122, iteration: 104580
loss: 1.4361083507537842,grad_norm: 0.9999997483972916, iteration: 104581
loss: 1.5076676607131958,grad_norm: 0.9999998362681537, iteration: 104582
loss: 1.5483978986740112,grad_norm: 0.9999999051120498, iteration: 104583
loss: 1.4132589101791382,grad_norm: 0.9999998360934332, iteration: 104584
loss: 1.356671690940857,grad_norm: 0.9999998021894437, iteration: 104585
loss: 1.2770514488220215,grad_norm: 0.9999994633018764, iteration: 104586
loss: 1.3021492958068848,grad_norm: 0.99999937404299, iteration: 104587
loss: 1.2453011274337769,grad_norm: 0.9999993441593253, iteration: 104588
loss: 1.3271976709365845,grad_norm: 0.9999994983478891, iteration: 104589
loss: 1.1587375402450562,grad_norm: 0.9999999045919398, iteration: 104590
loss: 1.139846682548523,grad_norm: 0.999999928652077, iteration: 104591
loss: 1.2317167520523071,grad_norm: 0.999999537073963, iteration: 104592
loss: 1.1056256294250488,grad_norm: 0.9999991902670131, iteration: 104593
loss: 1.0952199697494507,grad_norm: 0.9999998309781905, iteration: 104594
loss: 1.0946991443634033,grad_norm: 0.9999997184902644, iteration: 104595
loss: 1.0660914182662964,grad_norm: 0.9999991146799951, iteration: 104596
loss: 1.1523263454437256,grad_norm: 0.9999997394805745, iteration: 104597
loss: 1.0421648025512695,grad_norm: 0.8777551362986883, iteration: 104598
loss: 1.0803085565567017,grad_norm: 0.99999992375658, iteration: 104599
loss: 1.0774301290512085,grad_norm: 0.8920764452700513, iteration: 104600
loss: 1.1631709337234497,grad_norm: 0.9999997551919911, iteration: 104601
loss: 1.1853498220443726,grad_norm: 0.9999998674616472, iteration: 104602
loss: 1.2410637140274048,grad_norm: 0.9999993539216148, iteration: 104603
loss: 1.061574101448059,grad_norm: 0.9999998080605976, iteration: 104604
loss: 1.078700065612793,grad_norm: 0.988508335415242, iteration: 104605
loss: 1.074924349784851,grad_norm: 0.8346472285169474, iteration: 104606
loss: 1.041717767715454,grad_norm: 0.9999992124190588, iteration: 104607
loss: 1.0704888105392456,grad_norm: 0.9999996362781348, iteration: 104608
loss: 1.1146090030670166,grad_norm: 0.9999991834425279, iteration: 104609
loss: 1.1000182628631592,grad_norm: 0.9999998048624924, iteration: 104610
loss: 1.0440059900283813,grad_norm: 0.9999994354794692, iteration: 104611
loss: 1.0308072566986084,grad_norm: 0.9999990898420458, iteration: 104612
loss: 1.1482325792312622,grad_norm: 0.9999994682010576, iteration: 104613
loss: 0.9963366389274597,grad_norm: 0.9762836672561596, iteration: 104614
loss: 1.0436795949935913,grad_norm: 0.9434303853525681, iteration: 104615
loss: 1.0646568536758423,grad_norm: 0.9999993983037182, iteration: 104616
loss: 1.075488805770874,grad_norm: 0.9999998854229974, iteration: 104617
loss: 1.1500022411346436,grad_norm: 0.9999997976615991, iteration: 104618
loss: 1.005882978439331,grad_norm: 0.9009538063567247, iteration: 104619
loss: 1.0732063055038452,grad_norm: 0.999999433775901, iteration: 104620
loss: 1.012823462486267,grad_norm: 0.9999998164382162, iteration: 104621
loss: 1.2430741786956787,grad_norm: 0.9999998843265764, iteration: 104622
loss: 1.0265610218048096,grad_norm: 0.9064712759949525, iteration: 104623
loss: 1.0958980321884155,grad_norm: 0.9999997601838594, iteration: 104624
loss: 1.0533298254013062,grad_norm: 0.9999997516377511, iteration: 104625
loss: 1.086301326751709,grad_norm: 0.9999999241571981, iteration: 104626
loss: 0.986710786819458,grad_norm: 0.8488048407183459, iteration: 104627
loss: 1.1306960582733154,grad_norm: 0.9999996154253804, iteration: 104628
loss: 1.013555884361267,grad_norm: 0.9999996070509347, iteration: 104629
loss: 0.9992758631706238,grad_norm: 0.9999990622256327, iteration: 104630
loss: 1.0015307664871216,grad_norm: 0.936334859045812, iteration: 104631
loss: 1.004380702972412,grad_norm: 0.9114148040947616, iteration: 104632
loss: 1.0211676359176636,grad_norm: 0.9999989921397414, iteration: 104633
loss: 1.0586220026016235,grad_norm: 0.9999999545977541, iteration: 104634
loss: 1.03659987449646,grad_norm: 0.9999990664264189, iteration: 104635
loss: 1.0980952978134155,grad_norm: 0.9999990804707203, iteration: 104636
loss: 1.4602291584014893,grad_norm: 0.9999997566424934, iteration: 104637
loss: 1.0746487379074097,grad_norm: 0.9999997589112115, iteration: 104638
loss: 1.0420953035354614,grad_norm: 0.9999993481952288, iteration: 104639
loss: 1.011749029159546,grad_norm: 0.9999992658907234, iteration: 104640
loss: 1.0701395273208618,grad_norm: 0.9999998893047901, iteration: 104641
loss: 1.0720648765563965,grad_norm: 0.7788532231524644, iteration: 104642
loss: 1.0678719282150269,grad_norm: 0.9999993121367791, iteration: 104643
loss: 1.1007792949676514,grad_norm: 0.9999993657444625, iteration: 104644
loss: 1.0614694356918335,grad_norm: 0.9999990824126626, iteration: 104645
loss: 1.0208865404129028,grad_norm: 0.8929044229875469, iteration: 104646
loss: 1.2586336135864258,grad_norm: 0.9999999921589571, iteration: 104647
loss: 1.0430551767349243,grad_norm: 0.9999998090198793, iteration: 104648
loss: 1.0967413187026978,grad_norm: 0.9999994114013615, iteration: 104649
loss: 1.0788967609405518,grad_norm: 0.9069996261411624, iteration: 104650
loss: 1.0933806896209717,grad_norm: 0.95740717412889, iteration: 104651
loss: 1.0826354026794434,grad_norm: 0.9167362977067007, iteration: 104652
loss: 1.0355075597763062,grad_norm: 0.9999996422953447, iteration: 104653
loss: 1.0701904296875,grad_norm: 0.9999996334963983, iteration: 104654
loss: 1.1522326469421387,grad_norm: 0.9999994335453516, iteration: 104655
loss: 1.0207301378250122,grad_norm: 0.9999991593720436, iteration: 104656
loss: 1.1286908388137817,grad_norm: 0.9999999295855504, iteration: 104657
loss: 1.1595011949539185,grad_norm: 0.9999993947760945, iteration: 104658
loss: 1.0768802165985107,grad_norm: 0.9999993096369059, iteration: 104659
loss: 1.1063157320022583,grad_norm: 0.9999992334979281, iteration: 104660
loss: 1.2115353345870972,grad_norm: 0.9999991026237397, iteration: 104661
loss: 1.1666769981384277,grad_norm: 0.9999991327246361, iteration: 104662
loss: 1.1430976390838623,grad_norm: 0.9999997451146766, iteration: 104663
loss: 1.071169137954712,grad_norm: 0.9999995758685372, iteration: 104664
loss: 1.425974726676941,grad_norm: 0.9999994368744656, iteration: 104665
loss: 1.2170383930206299,grad_norm: 0.9999996800613977, iteration: 104666
loss: 1.1851775646209717,grad_norm: 0.9999992339300979, iteration: 104667
loss: 1.2661818265914917,grad_norm: 0.9999998768732324, iteration: 104668
loss: 1.152315616607666,grad_norm: 0.9999999065645501, iteration: 104669
loss: 1.2347136735916138,grad_norm: 0.9999998544925137, iteration: 104670
loss: 1.2000937461853027,grad_norm: 0.9999997209484969, iteration: 104671
loss: 1.1341915130615234,grad_norm: 1.000000058834544, iteration: 104672
loss: 1.178218126296997,grad_norm: 0.9999998529629212, iteration: 104673
loss: 1.1176819801330566,grad_norm: 0.9999998425493327, iteration: 104674
loss: 1.2639200687408447,grad_norm: 0.9999998984224783, iteration: 104675
loss: 1.189264178276062,grad_norm: 0.9999999225768054, iteration: 104676
loss: 1.2558436393737793,grad_norm: 0.9999996031569699, iteration: 104677
loss: 1.1335407495498657,grad_norm: 0.999999669881603, iteration: 104678
loss: 1.1844438314437866,grad_norm: 0.9999996413606232, iteration: 104679
loss: 1.2134983539581299,grad_norm: 0.999999700738466, iteration: 104680
loss: 1.1754342317581177,grad_norm: 0.9999997152007829, iteration: 104681
loss: 1.1643002033233643,grad_norm: 0.9999993689193147, iteration: 104682
loss: 1.1370540857315063,grad_norm: 0.999999767632121, iteration: 104683
loss: 1.2063260078430176,grad_norm: 0.9999996006071166, iteration: 104684
loss: 1.226410984992981,grad_norm: 0.9999997756972258, iteration: 104685
loss: 1.2143656015396118,grad_norm: 0.9999997176029476, iteration: 104686
loss: 1.0843578577041626,grad_norm: 0.9999992378497955, iteration: 104687
loss: 1.1741787195205688,grad_norm: 0.9999994737870221, iteration: 104688
loss: 1.2372946739196777,grad_norm: 0.9999995069836103, iteration: 104689
loss: 1.1354639530181885,grad_norm: 0.9999997703238479, iteration: 104690
loss: 1.127151608467102,grad_norm: 0.9999992993268773, iteration: 104691
loss: 1.3419837951660156,grad_norm: 0.9999995694371321, iteration: 104692
loss: 1.2241450548171997,grad_norm: 0.9999998972570737, iteration: 104693
loss: 1.4048856496810913,grad_norm: 0.999999727567886, iteration: 104694
loss: 1.1719350814819336,grad_norm: 0.9999999011297898, iteration: 104695
loss: 1.6799700260162354,grad_norm: 1.0000000697734046, iteration: 104696
loss: 1.5709660053253174,grad_norm: 0.9999995985847708, iteration: 104697
loss: 1.2142443656921387,grad_norm: 0.9999999854037305, iteration: 104698
loss: 1.3993819952011108,grad_norm: 0.9999998275724498, iteration: 104699
loss: 1.5856200456619263,grad_norm: 0.9999997391493445, iteration: 104700
loss: 1.3903083801269531,grad_norm: 1.0000000430633136, iteration: 104701
loss: 1.3435224294662476,grad_norm: 0.9999998678082418, iteration: 104702
loss: 1.5436986684799194,grad_norm: 0.999999510728545, iteration: 104703
loss: 1.4068275690078735,grad_norm: 0.9999999089449505, iteration: 104704
loss: 1.4142223596572876,grad_norm: 0.9999999717473499, iteration: 104705
loss: 1.2130273580551147,grad_norm: 0.9999997443573464, iteration: 104706
loss: 1.2106106281280518,grad_norm: 0.9999998916495715, iteration: 104707
loss: 1.128090739250183,grad_norm: 0.9999995086020835, iteration: 104708
loss: 1.1096584796905518,grad_norm: 0.999999706996992, iteration: 104709
loss: 1.2629891633987427,grad_norm: 0.999999670797443, iteration: 104710
loss: 1.263163447380066,grad_norm: 1.0000000703229939, iteration: 104711
loss: 1.2787597179412842,grad_norm: 0.9999998052605731, iteration: 104712
loss: 1.3281199932098389,grad_norm: 0.9999998838986509, iteration: 104713
loss: 1.2218453884124756,grad_norm: 0.9999998979720213, iteration: 104714
loss: 1.1256542205810547,grad_norm: 0.9999997925093389, iteration: 104715
loss: 1.3309645652770996,grad_norm: 0.9999998003099579, iteration: 104716
loss: 1.2735434770584106,grad_norm: 0.9999996136154143, iteration: 104717
loss: 1.14048171043396,grad_norm: 0.9999998894790558, iteration: 104718
loss: 1.0888769626617432,grad_norm: 0.9999994986918106, iteration: 104719
loss: 1.0242750644683838,grad_norm: 0.9999992778193102, iteration: 104720
loss: 1.0953274965286255,grad_norm: 0.999999829889998, iteration: 104721
loss: 1.1522613763809204,grad_norm: 0.9999998604377864, iteration: 104722
loss: 1.2096552848815918,grad_norm: 0.9999997087111353, iteration: 104723
loss: 1.113477349281311,grad_norm: 0.9999996447200598, iteration: 104724
loss: 1.0858635902404785,grad_norm: 0.9999996275520895, iteration: 104725
loss: 1.3366789817810059,grad_norm: 0.9999998581082122, iteration: 104726
loss: 1.0778725147247314,grad_norm: 0.9999994422905435, iteration: 104727
loss: 1.1067665815353394,grad_norm: 0.9999995652089465, iteration: 104728
loss: 1.0842533111572266,grad_norm: 0.9999997239899986, iteration: 104729
loss: 1.1814754009246826,grad_norm: 0.9999993941869674, iteration: 104730
loss: 1.0929094552993774,grad_norm: 0.9999998611916185, iteration: 104731
loss: 1.2043176889419556,grad_norm: 0.9999999789068013, iteration: 104732
loss: 1.0693297386169434,grad_norm: 0.9999996234898935, iteration: 104733
loss: 1.0858333110809326,grad_norm: 0.9999994488178805, iteration: 104734
loss: 1.0411951541900635,grad_norm: 0.9999995157023307, iteration: 104735
loss: 1.0873490571975708,grad_norm: 0.9999991848079283, iteration: 104736
loss: 1.058946132659912,grad_norm: 0.9999996963670204, iteration: 104737
loss: 1.0562760829925537,grad_norm: 0.9999992583058762, iteration: 104738
loss: 1.0231034755706787,grad_norm: 0.9999994532371012, iteration: 104739
loss: 1.0879731178283691,grad_norm: 0.9999992268389973, iteration: 104740
loss: 1.0283937454223633,grad_norm: 0.9999997979826716, iteration: 104741
loss: 1.0465598106384277,grad_norm: 0.9999993004840629, iteration: 104742
loss: 1.1386382579803467,grad_norm: 0.999999892698003, iteration: 104743
loss: 1.0007344484329224,grad_norm: 0.9999993426858222, iteration: 104744
loss: 1.2320207357406616,grad_norm: 0.9999999176034232, iteration: 104745
loss: 1.0567680597305298,grad_norm: 0.9999999707995166, iteration: 104746
loss: 1.0417578220367432,grad_norm: 0.9175568126174762, iteration: 104747
loss: 1.0238721370697021,grad_norm: 0.9999990579274695, iteration: 104748
loss: 1.055880069732666,grad_norm: 0.9999995237763606, iteration: 104749
loss: 1.0029484033584595,grad_norm: 0.9362903804693891, iteration: 104750
loss: 1.040116786956787,grad_norm: 0.9999992664622188, iteration: 104751
loss: 1.0461736917495728,grad_norm: 0.9999993198419157, iteration: 104752
loss: 1.2184489965438843,grad_norm: 0.999999762090838, iteration: 104753
loss: 1.2350932359695435,grad_norm: 0.9999997224953182, iteration: 104754
loss: 1.3878223896026611,grad_norm: 0.9999998067947556, iteration: 104755
loss: 1.1982920169830322,grad_norm: 0.9999994330621647, iteration: 104756
loss: 1.1270474195480347,grad_norm: 0.9999994806116125, iteration: 104757
loss: 1.132302165031433,grad_norm: 0.9999996030443836, iteration: 104758
loss: 1.0913026332855225,grad_norm: 0.9999998043974135, iteration: 104759
loss: 0.9792380332946777,grad_norm: 0.8562518966001761, iteration: 104760
loss: 1.050412654876709,grad_norm: 0.9999990425851104, iteration: 104761
loss: 1.006226658821106,grad_norm: 0.9999999357942115, iteration: 104762
loss: 1.167015790939331,grad_norm: 0.999999967456504, iteration: 104763
loss: 1.191396951675415,grad_norm: 0.9999991282013146, iteration: 104764
loss: 1.2063121795654297,grad_norm: 0.9999998532483297, iteration: 104765
loss: 0.9916905760765076,grad_norm: 0.999999936056373, iteration: 104766
loss: 1.336975336074829,grad_norm: 0.9999998339055185, iteration: 104767
loss: 1.1314290761947632,grad_norm: 0.9999998899736848, iteration: 104768
loss: 1.101501703262329,grad_norm: 0.9999997297532675, iteration: 104769
loss: 1.1728708744049072,grad_norm: 0.9999994704531533, iteration: 104770
loss: 1.036608338356018,grad_norm: 0.9999998302422671, iteration: 104771
loss: 1.124843955039978,grad_norm: 0.9999999369103064, iteration: 104772
loss: 1.054251790046692,grad_norm: 0.9999994594026359, iteration: 104773
loss: 1.2970856428146362,grad_norm: 0.9999999645127181, iteration: 104774
loss: 1.259732961654663,grad_norm: 0.9999998419603802, iteration: 104775
loss: 1.0928717851638794,grad_norm: 0.9999996027023086, iteration: 104776
loss: 1.2345030307769775,grad_norm: 0.9999998813239483, iteration: 104777
loss: 1.2212092876434326,grad_norm: 0.9999997431716714, iteration: 104778
loss: 1.0755503177642822,grad_norm: 0.9999995971667655, iteration: 104779
loss: 1.2259494066238403,grad_norm: 0.9999995815711124, iteration: 104780
loss: 1.4817827939987183,grad_norm: 0.999999892498358, iteration: 104781
loss: 1.2625449895858765,grad_norm: 0.9999999784800109, iteration: 104782
loss: 1.1292370557785034,grad_norm: 0.9999996524956879, iteration: 104783
loss: 1.143894910812378,grad_norm: 0.9999998528887877, iteration: 104784
loss: 1.1847363710403442,grad_norm: 0.9999998224104408, iteration: 104785
loss: 1.3600598573684692,grad_norm: 0.9999999506186437, iteration: 104786
loss: 1.1259132623672485,grad_norm: 0.9999999389340309, iteration: 104787
loss: 1.2725954055786133,grad_norm: 0.9999998523242153, iteration: 104788
loss: 1.0324594974517822,grad_norm: 0.9999999415245326, iteration: 104789
loss: 1.2737935781478882,grad_norm: 0.9999998818020246, iteration: 104790
loss: 1.048905849456787,grad_norm: 0.9999993792611125, iteration: 104791
loss: 1.1809741258621216,grad_norm: 0.9999996403692701, iteration: 104792
loss: 1.2613952159881592,grad_norm: 0.9999998733294272, iteration: 104793
loss: 1.1705069541931152,grad_norm: 0.999999892417895, iteration: 104794
loss: 1.032646894454956,grad_norm: 0.9999992739491761, iteration: 104795
loss: 1.2846713066101074,grad_norm: 1.0000000353626117, iteration: 104796
loss: 1.05547034740448,grad_norm: 0.9999996813266052, iteration: 104797
loss: 1.28562593460083,grad_norm: 0.9999998023314568, iteration: 104798
loss: 1.2959221601486206,grad_norm: 0.9999997626659354, iteration: 104799
loss: 1.6729656457901,grad_norm: 0.9999998297942286, iteration: 104800
loss: 1.1170496940612793,grad_norm: 0.9999998721997561, iteration: 104801
loss: 1.2375433444976807,grad_norm: 0.9999999102742226, iteration: 104802
loss: 1.0322328805923462,grad_norm: 0.9999989960670577, iteration: 104803
loss: 1.069520115852356,grad_norm: 0.9999994835127902, iteration: 104804
loss: 1.0465096235275269,grad_norm: 0.9999992455323736, iteration: 104805
loss: 1.3386214971542358,grad_norm: 0.9999997213723724, iteration: 104806
loss: 1.1524370908737183,grad_norm: 0.9999999847691792, iteration: 104807
loss: 1.375343680381775,grad_norm: 0.9999998797799928, iteration: 104808
loss: 1.3543801307678223,grad_norm: 0.9999999179885867, iteration: 104809
loss: 1.1534925699234009,grad_norm: 0.9999993315854627, iteration: 104810
loss: 1.0810778141021729,grad_norm: 0.999999977368256, iteration: 104811
loss: 1.1810673475265503,grad_norm: 0.9999992576318266, iteration: 104812
loss: 1.2678956985473633,grad_norm: 0.9999997885990203, iteration: 104813
loss: 1.2127455472946167,grad_norm: 0.9999993023771859, iteration: 104814
loss: 1.3744332790374756,grad_norm: 0.9999998891106127, iteration: 104815
loss: 1.345710277557373,grad_norm: 1.0000000139256666, iteration: 104816
loss: 1.281908631324768,grad_norm: 0.9999998498223303, iteration: 104817
loss: 1.2220350503921509,grad_norm: 0.9999998040664164, iteration: 104818
loss: 1.1308083534240723,grad_norm: 0.9999998165611914, iteration: 104819
loss: 1.3880059719085693,grad_norm: 1.0000000075451574, iteration: 104820
loss: 1.203540563583374,grad_norm: 0.999999577911076, iteration: 104821
loss: 1.1171642541885376,grad_norm: 0.9999998038809251, iteration: 104822
loss: 1.2309296131134033,grad_norm: 1.0000000310842547, iteration: 104823
loss: 1.2187163829803467,grad_norm: 1.0000000118382224, iteration: 104824
loss: 1.1725926399230957,grad_norm: 0.9999997659083082, iteration: 104825
loss: 1.2897329330444336,grad_norm: 0.9999998394934723, iteration: 104826
loss: 1.0968319177627563,grad_norm: 0.999999170490014, iteration: 104827
loss: 1.3541438579559326,grad_norm: 1.0000001268349732, iteration: 104828
loss: 1.195354700088501,grad_norm: 0.9999998286259755, iteration: 104829
loss: 1.3380376100540161,grad_norm: 1.0000000325769762, iteration: 104830
loss: 1.1390776634216309,grad_norm: 0.999999718821136, iteration: 104831
loss: 1.140289306640625,grad_norm: 0.9999999034137586, iteration: 104832
loss: 1.1086931228637695,grad_norm: 0.9999998016407694, iteration: 104833
loss: 1.1280169486999512,grad_norm: 0.9999993400703003, iteration: 104834
loss: 1.214187741279602,grad_norm: 0.9999999465006627, iteration: 104835
loss: 1.2461868524551392,grad_norm: 0.9999994943703967, iteration: 104836
loss: 1.1594399213790894,grad_norm: 0.999999855594465, iteration: 104837
loss: 1.1879584789276123,grad_norm: 0.99999969953756, iteration: 104838
loss: 1.1317416429519653,grad_norm: 0.9999998012720869, iteration: 104839
loss: 1.2474119663238525,grad_norm: 0.9999998906916183, iteration: 104840
loss: 1.3616136312484741,grad_norm: 0.9999999211116832, iteration: 104841
loss: 1.2447097301483154,grad_norm: 0.9999998688631495, iteration: 104842
loss: 1.193682074546814,grad_norm: 0.9999998352272701, iteration: 104843
loss: 1.334588885307312,grad_norm: 0.9999998917587956, iteration: 104844
loss: 1.1778110265731812,grad_norm: 0.9999998757440043, iteration: 104845
loss: 1.471958875656128,grad_norm: 0.999999925274028, iteration: 104846
loss: 1.1714959144592285,grad_norm: 0.9999997382683863, iteration: 104847
loss: 1.245219111442566,grad_norm: 0.9999998156361506, iteration: 104848
loss: 1.1620874404907227,grad_norm: 0.9999998682377385, iteration: 104849
loss: 1.3231780529022217,grad_norm: 0.9999997788580114, iteration: 104850
loss: 1.2734428644180298,grad_norm: 0.9999999013280955, iteration: 104851
loss: 1.2208508253097534,grad_norm: 0.9999997200981483, iteration: 104852
loss: 1.3508716821670532,grad_norm: 0.9999999441171631, iteration: 104853
loss: 1.2611497640609741,grad_norm: 0.9999999538269465, iteration: 104854
loss: 1.210766315460205,grad_norm: 0.9999993346345869, iteration: 104855
loss: 1.0676820278167725,grad_norm: 0.9999992206682288, iteration: 104856
loss: 1.2106943130493164,grad_norm: 0.9999998425622714, iteration: 104857
loss: 1.2542643547058105,grad_norm: 0.9999997883586282, iteration: 104858
loss: 1.3085474967956543,grad_norm: 0.9999999062559196, iteration: 104859
loss: 1.452004313468933,grad_norm: 0.9999998864187176, iteration: 104860
loss: 1.3147175312042236,grad_norm: 0.9999998949930519, iteration: 104861
loss: 1.0721665620803833,grad_norm: 0.9999997928128553, iteration: 104862
loss: 1.134474515914917,grad_norm: 0.9999999401345924, iteration: 104863
loss: 1.1345317363739014,grad_norm: 0.9999999866021986, iteration: 104864
loss: 1.4700649976730347,grad_norm: 0.999999917032381, iteration: 104865
loss: 1.0419056415557861,grad_norm: 0.9999999435214486, iteration: 104866
loss: 1.212465524673462,grad_norm: 0.9999996514633013, iteration: 104867
loss: 1.2847559452056885,grad_norm: 0.9999995099904185, iteration: 104868
loss: 1.267909288406372,grad_norm: 0.9999999497008667, iteration: 104869
loss: 1.4763139486312866,grad_norm: 0.9999999457072619, iteration: 104870
loss: 1.248486876487732,grad_norm: 0.999999936050214, iteration: 104871
loss: 1.2900112867355347,grad_norm: 0.9999997626003758, iteration: 104872
loss: 1.096839427947998,grad_norm: 0.9999994291128429, iteration: 104873
loss: 1.0699678659439087,grad_norm: 0.9999998143590337, iteration: 104874
loss: 1.124992847442627,grad_norm: 0.9999999333555826, iteration: 104875
loss: 1.1440489292144775,grad_norm: 0.9999997119904519, iteration: 104876
loss: 1.1498695611953735,grad_norm: 0.9999997956938435, iteration: 104877
loss: 1.1718021631240845,grad_norm: 0.9999999127275861, iteration: 104878
loss: 1.0776941776275635,grad_norm: 0.9999995489162691, iteration: 104879
loss: 1.2878150939941406,grad_norm: 0.9999997252425848, iteration: 104880
loss: 1.1455926895141602,grad_norm: 0.9999995394664977, iteration: 104881
loss: 1.0742456912994385,grad_norm: 0.9999998645778008, iteration: 104882
loss: 1.1110566854476929,grad_norm: 0.9999997426643031, iteration: 104883
loss: 1.0250755548477173,grad_norm: 0.9999998099497643, iteration: 104884
loss: 1.1685007810592651,grad_norm: 0.9999999011951354, iteration: 104885
loss: 1.051135778427124,grad_norm: 0.9999997466342501, iteration: 104886
loss: 1.2965346574783325,grad_norm: 0.99999996942262, iteration: 104887
loss: 1.1575753688812256,grad_norm: 0.9999999352263745, iteration: 104888
loss: 1.3122023344039917,grad_norm: 0.9999997655635059, iteration: 104889
loss: 1.1964579820632935,grad_norm: 0.9999998875778936, iteration: 104890
loss: 1.0944550037384033,grad_norm: 0.999999174934408, iteration: 104891
loss: 1.091745376586914,grad_norm: 0.9999996748213688, iteration: 104892
loss: 1.1444934606552124,grad_norm: 0.9999997482938595, iteration: 104893
loss: 1.2272707223892212,grad_norm: 0.999999922961293, iteration: 104894
loss: 1.0664308071136475,grad_norm: 0.9999990706397912, iteration: 104895
loss: 1.2553424835205078,grad_norm: 0.9999998856200811, iteration: 104896
loss: 1.207747459411621,grad_norm: 0.9999999438109012, iteration: 104897
loss: 1.2593320608139038,grad_norm: 0.999999625176455, iteration: 104898
loss: 1.2206109762191772,grad_norm: 0.9999997754450465, iteration: 104899
loss: 1.255326747894287,grad_norm: 0.9999995551762275, iteration: 104900
loss: 1.1392648220062256,grad_norm: 0.9999998532044639, iteration: 104901
loss: 1.1084290742874146,grad_norm: 0.9999994827793712, iteration: 104902
loss: 1.4044551849365234,grad_norm: 0.9999998142295095, iteration: 104903
loss: 1.063643455505371,grad_norm: 0.9999993721695265, iteration: 104904
loss: 1.102889895439148,grad_norm: 0.999999908174092, iteration: 104905
loss: 1.122542142868042,grad_norm: 0.9999998635287063, iteration: 104906
loss: 1.4001376628875732,grad_norm: 0.9999998610419826, iteration: 104907
loss: 1.664109468460083,grad_norm: 0.999999953363825, iteration: 104908
loss: 1.150429368019104,grad_norm: 0.9999997383821633, iteration: 104909
loss: 1.2069212198257446,grad_norm: 0.9999998901861015, iteration: 104910
loss: 1.4231444597244263,grad_norm: 0.9999995822983436, iteration: 104911
loss: 1.120529055595398,grad_norm: 0.9999994066496285, iteration: 104912
loss: 1.162480115890503,grad_norm: 1.0000000827363509, iteration: 104913
loss: 1.1940784454345703,grad_norm: 0.9999998863146256, iteration: 104914
loss: 1.151688814163208,grad_norm: 0.9999998853792834, iteration: 104915
loss: 1.1597477197647095,grad_norm: 0.9999993094268167, iteration: 104916
loss: 1.0488488674163818,grad_norm: 0.9999992866623011, iteration: 104917
loss: 1.3084745407104492,grad_norm: 0.9999999795987261, iteration: 104918
loss: 1.0756572484970093,grad_norm: 0.9999998509853284, iteration: 104919
loss: 1.2317404747009277,grad_norm: 0.9999998559571336, iteration: 104920
loss: 1.322636365890503,grad_norm: 0.9999999584684856, iteration: 104921
loss: 1.1465891599655151,grad_norm: 0.9999998902647304, iteration: 104922
loss: 1.2593878507614136,grad_norm: 1.0000000125128334, iteration: 104923
loss: 1.1612751483917236,grad_norm: 0.9225284949715555, iteration: 104924
loss: 1.2677278518676758,grad_norm: 0.9999996100409136, iteration: 104925
loss: 1.2238731384277344,grad_norm: 0.9999994751742355, iteration: 104926
loss: 1.2275245189666748,grad_norm: 0.9999998775076648, iteration: 104927
loss: 1.3250494003295898,grad_norm: 0.9999996023481321, iteration: 104928
loss: 1.157660722732544,grad_norm: 0.9999998080557786, iteration: 104929
loss: 1.2031669616699219,grad_norm: 0.9999994741226034, iteration: 104930
loss: 1.2080203294754028,grad_norm: 0.9999999258527347, iteration: 104931
loss: 1.2836174964904785,grad_norm: 0.9999997893808287, iteration: 104932
loss: 1.1099528074264526,grad_norm: 0.9999998028320918, iteration: 104933
loss: 1.2414008378982544,grad_norm: 0.9999996989249157, iteration: 104934
loss: 1.1441876888275146,grad_norm: 0.9082968620841209, iteration: 104935
loss: 1.070702075958252,grad_norm: 0.9999998766562384, iteration: 104936
loss: 1.2259061336517334,grad_norm: 0.9999998741166536, iteration: 104937
loss: 1.1021589040756226,grad_norm: 0.999999755451768, iteration: 104938
loss: 1.1222748756408691,grad_norm: 0.9999996242657512, iteration: 104939
loss: 1.4328430891036987,grad_norm: 0.9999995452047596, iteration: 104940
loss: 1.1910991668701172,grad_norm: 0.9999998575043174, iteration: 104941
loss: 1.1906062364578247,grad_norm: 0.99999964202638, iteration: 104942
loss: 1.4058184623718262,grad_norm: 0.9999998774776309, iteration: 104943
loss: 1.228169322013855,grad_norm: 0.99999974920489, iteration: 104944
loss: 1.1125445365905762,grad_norm: 0.9999999813042774, iteration: 104945
loss: 1.1702200174331665,grad_norm: 0.9999997238505519, iteration: 104946
loss: 1.3774704933166504,grad_norm: 0.9999999272609418, iteration: 104947
loss: 1.010477066040039,grad_norm: 0.9999997859400702, iteration: 104948
loss: 1.1625889539718628,grad_norm: 0.9999996533323284, iteration: 104949
loss: 1.2588082551956177,grad_norm: 0.9999995659802138, iteration: 104950
loss: 1.1278454065322876,grad_norm: 0.9999998992291326, iteration: 104951
loss: 1.1371715068817139,grad_norm: 0.9999993452078657, iteration: 104952
loss: 1.2634207010269165,grad_norm: 0.9999999269479007, iteration: 104953
loss: 1.345068097114563,grad_norm: 0.9999998917618822, iteration: 104954
loss: 0.989163875579834,grad_norm: 0.9999991471396312, iteration: 104955
loss: 1.1407063007354736,grad_norm: 1.0000000506833706, iteration: 104956
loss: 1.3333436250686646,grad_norm: 0.999999808875776, iteration: 104957
loss: 1.1857411861419678,grad_norm: 0.9999994139719945, iteration: 104958
loss: 1.113846778869629,grad_norm: 0.9999997158202609, iteration: 104959
loss: 1.125116229057312,grad_norm: 0.999999134682335, iteration: 104960
loss: 1.2398682832717896,grad_norm: 0.9999998142599934, iteration: 104961
loss: 1.1093876361846924,grad_norm: 0.9999994669410048, iteration: 104962
loss: 1.1407965421676636,grad_norm: 0.9999999276132476, iteration: 104963
loss: 1.0369107723236084,grad_norm: 0.9999998221540651, iteration: 104964
loss: 1.0421849489212036,grad_norm: 0.9999991687291494, iteration: 104965
loss: 1.0459184646606445,grad_norm: 0.9999991497552195, iteration: 104966
loss: 1.1656286716461182,grad_norm: 0.9999993174580333, iteration: 104967
loss: 1.1433217525482178,grad_norm: 0.9999997522844637, iteration: 104968
loss: 1.0889421701431274,grad_norm: 0.9999998764407024, iteration: 104969
loss: 1.1465955972671509,grad_norm: 0.9999997217867916, iteration: 104970
loss: 1.0520024299621582,grad_norm: 0.9999996938315614, iteration: 104971
loss: 1.186286211013794,grad_norm: 0.9999992374737451, iteration: 104972
loss: 1.1882394552230835,grad_norm: 0.999999954943004, iteration: 104973
loss: 1.3480637073516846,grad_norm: 0.9999999297934559, iteration: 104974
loss: 1.0198602676391602,grad_norm: 0.9999994111343832, iteration: 104975
loss: 1.1394456624984741,grad_norm: 0.9999993489220638, iteration: 104976
loss: 1.064389705657959,grad_norm: 0.9999995682273258, iteration: 104977
loss: 1.1860753297805786,grad_norm: 0.9999996638620979, iteration: 104978
loss: 1.0821841955184937,grad_norm: 0.9999992614662249, iteration: 104979
loss: 1.0718274116516113,grad_norm: 0.9999992445731148, iteration: 104980
loss: 1.3216733932495117,grad_norm: 0.9999999694687802, iteration: 104981
loss: 1.102178931236267,grad_norm: 0.999999568283246, iteration: 104982
loss: 1.0670666694641113,grad_norm: 0.9999993546524398, iteration: 104983
loss: 1.1027393341064453,grad_norm: 0.9999998665336044, iteration: 104984
loss: 1.1600122451782227,grad_norm: 0.9999998453688264, iteration: 104985
loss: 1.1710976362228394,grad_norm: 0.999999882354272, iteration: 104986
loss: 1.09039306640625,grad_norm: 0.9999996549076354, iteration: 104987
loss: 1.1708416938781738,grad_norm: 0.9999998233785226, iteration: 104988
loss: 1.064475655555725,grad_norm: 0.999999089218748, iteration: 104989
loss: 1.0525089502334595,grad_norm: 0.9999999160421511, iteration: 104990
loss: 1.0410029888153076,grad_norm: 0.9999999671944577, iteration: 104991
loss: 1.3644708395004272,grad_norm: 1.0000000749059936, iteration: 104992
loss: 1.134511113166809,grad_norm: 0.9999991637926994, iteration: 104993
loss: 1.0901706218719482,grad_norm: 0.9999999389873582, iteration: 104994
loss: 0.9789179563522339,grad_norm: 0.9999991248086768, iteration: 104995
loss: 1.09571373462677,grad_norm: 0.9999991641186653, iteration: 104996
loss: 1.0894945859909058,grad_norm: 0.9999998281154844, iteration: 104997
loss: 1.13169264793396,grad_norm: 0.9999998179857615, iteration: 104998
loss: 1.3272444009780884,grad_norm: 0.9999997681542566, iteration: 104999
loss: 1.2566159963607788,grad_norm: 0.9999997483522374, iteration: 105000
loss: 1.0417221784591675,grad_norm: 0.9999993166251722, iteration: 105001
loss: 0.9661867022514343,grad_norm: 0.7787688755746291, iteration: 105002
loss: 1.127890706062317,grad_norm: 0.999999331052941, iteration: 105003
loss: 1.345380187034607,grad_norm: 0.9999999153886417, iteration: 105004
loss: 1.1084078550338745,grad_norm: 0.999999822002271, iteration: 105005
loss: 1.1665140390396118,grad_norm: 1.000000019574608, iteration: 105006
loss: 1.2165944576263428,grad_norm: 0.9999998820768511, iteration: 105007
loss: 1.0750279426574707,grad_norm: 0.9999994117840537, iteration: 105008
loss: 1.0413267612457275,grad_norm: 0.9818884305711268, iteration: 105009
loss: 1.0540621280670166,grad_norm: 0.9999991549923211, iteration: 105010
loss: 1.189457893371582,grad_norm: 0.9999995430660146, iteration: 105011
loss: 1.0410689115524292,grad_norm: 0.9999999293506251, iteration: 105012
loss: 1.1761343479156494,grad_norm: 0.9999999369301084, iteration: 105013
loss: 1.216271996498108,grad_norm: 0.9999999447069127, iteration: 105014
loss: 1.2051271200180054,grad_norm: 1.0000000226236594, iteration: 105015
loss: 1.0422691106796265,grad_norm: 0.9999999687169293, iteration: 105016
loss: 1.0404900312423706,grad_norm: 0.9999993840934712, iteration: 105017
loss: 1.1334595680236816,grad_norm: 0.9999999467926514, iteration: 105018
loss: 1.3512563705444336,grad_norm: 0.9999999406328671, iteration: 105019
loss: 1.298352837562561,grad_norm: 1.0000000595882013, iteration: 105020
loss: 1.0897948741912842,grad_norm: 0.999999257266836, iteration: 105021
loss: 1.5738582611083984,grad_norm: 0.9999999022508907, iteration: 105022
loss: 1.0965946912765503,grad_norm: 0.9999990112696442, iteration: 105023
loss: 1.4507859945297241,grad_norm: 0.9999998539961162, iteration: 105024
loss: 1.2497622966766357,grad_norm: 1.000000106215528, iteration: 105025
loss: 1.2088366746902466,grad_norm: 0.9999994183196517, iteration: 105026
loss: 1.2050689458847046,grad_norm: 0.9999999483361443, iteration: 105027
loss: 1.015604019165039,grad_norm: 0.9999998933963617, iteration: 105028
loss: 1.3933873176574707,grad_norm: 0.9999998056428409, iteration: 105029
loss: 1.1538128852844238,grad_norm: 0.999999651379432, iteration: 105030
loss: 1.179661750793457,grad_norm: 0.9999996796631294, iteration: 105031
loss: 1.2574506998062134,grad_norm: 0.999999688961512, iteration: 105032
loss: 1.367981195449829,grad_norm: 0.9999999160513815, iteration: 105033
loss: 1.3846582174301147,grad_norm: 0.9999999405384844, iteration: 105034
loss: 1.1140300035476685,grad_norm: 0.99999947231085, iteration: 105035
loss: 1.1035529375076294,grad_norm: 0.99999991587173, iteration: 105036
loss: 1.0804990530014038,grad_norm: 0.8446291304634179, iteration: 105037
loss: 1.0765981674194336,grad_norm: 0.999332692523804, iteration: 105038
loss: 1.1804018020629883,grad_norm: 0.9999999455625339, iteration: 105039
loss: 1.1600091457366943,grad_norm: 0.9999994609162092, iteration: 105040
loss: 1.217016577720642,grad_norm: 0.9999998810977347, iteration: 105041
loss: 1.1564675569534302,grad_norm: 0.9999996268336195, iteration: 105042
loss: 0.995491623878479,grad_norm: 0.9440115075897862, iteration: 105043
loss: 1.137229561805725,grad_norm: 0.9999996601028377, iteration: 105044
loss: 1.2369333505630493,grad_norm: 0.9999998326664371, iteration: 105045
loss: 1.1972979307174683,grad_norm: 0.9999998652981198, iteration: 105046
loss: 1.2244164943695068,grad_norm: 0.9999999334863958, iteration: 105047
loss: 1.1564422845840454,grad_norm: 0.9999990383738925, iteration: 105048
loss: 1.136502981185913,grad_norm: 0.9999993932312384, iteration: 105049
loss: 1.2206507921218872,grad_norm: 0.9999998210902535, iteration: 105050
loss: 1.1075570583343506,grad_norm: 0.9999997454485119, iteration: 105051
loss: 1.2039790153503418,grad_norm: 0.9999999075746812, iteration: 105052
loss: 1.4046978950500488,grad_norm: 0.999999961230983, iteration: 105053
loss: 1.1767680644989014,grad_norm: 0.9999997995601633, iteration: 105054
loss: 1.0393328666687012,grad_norm: 0.9999994145620952, iteration: 105055
loss: 1.2148667573928833,grad_norm: 0.9999996118675383, iteration: 105056
loss: 1.088606357574463,grad_norm: 0.9999996875571929, iteration: 105057
loss: 1.0918253660202026,grad_norm: 0.9999999812122253, iteration: 105058
loss: 1.3228248357772827,grad_norm: 0.9999996769549377, iteration: 105059
loss: 1.0883921384811401,grad_norm: 1.0000000462985026, iteration: 105060
loss: 1.2164303064346313,grad_norm: 0.9999998999127323, iteration: 105061
loss: 1.3860626220703125,grad_norm: 0.9999999076573415, iteration: 105062
loss: 1.1679224967956543,grad_norm: 0.9999996754218783, iteration: 105063
loss: 1.2491244077682495,grad_norm: 0.999999952599897, iteration: 105064
loss: 1.339773416519165,grad_norm: 0.9999997713450561, iteration: 105065
loss: 1.2238823175430298,grad_norm: 0.9999998494107856, iteration: 105066
loss: 1.262844443321228,grad_norm: 0.9999994471205599, iteration: 105067
loss: 1.3719048500061035,grad_norm: 0.999999876702884, iteration: 105068
loss: 1.2471795082092285,grad_norm: 0.9999994114636808, iteration: 105069
loss: 1.192528486251831,grad_norm: 0.9999993977527357, iteration: 105070
loss: 1.039420485496521,grad_norm: 0.9999991267464179, iteration: 105071
loss: 1.132899522781372,grad_norm: 0.9999997766689722, iteration: 105072
loss: 1.2654576301574707,grad_norm: 0.9999996286851932, iteration: 105073
loss: 1.0619276762008667,grad_norm: 0.9999996027704781, iteration: 105074
loss: 1.1220210790634155,grad_norm: 0.999999137426033, iteration: 105075
loss: 1.1285927295684814,grad_norm: 0.9999994630917013, iteration: 105076
loss: 1.1460810899734497,grad_norm: 0.9999994508026013, iteration: 105077
loss: 1.1154117584228516,grad_norm: 0.999999623688854, iteration: 105078
loss: 1.0594139099121094,grad_norm: 0.9999994968323296, iteration: 105079
loss: 1.159048080444336,grad_norm: 0.9999996739945289, iteration: 105080
loss: 1.173499345779419,grad_norm: 0.9999999279791562, iteration: 105081
loss: 1.0217299461364746,grad_norm: 0.9999998778444649, iteration: 105082
loss: 1.0703521966934204,grad_norm: 0.9999997671743317, iteration: 105083
loss: 1.0591446161270142,grad_norm: 0.916446617414385, iteration: 105084
loss: 1.0639530420303345,grad_norm: 0.999999879601304, iteration: 105085
loss: 1.0573948621749878,grad_norm: 0.9999996315948942, iteration: 105086
loss: 1.1704072952270508,grad_norm: 0.9999992076414599, iteration: 105087
loss: 1.0754809379577637,grad_norm: 0.9999993977657374, iteration: 105088
loss: 1.045442819595337,grad_norm: 0.9999993244561488, iteration: 105089
loss: 1.142243504524231,grad_norm: 0.9999995059507651, iteration: 105090
loss: 1.1647059917449951,grad_norm: 0.9999995538303895, iteration: 105091
loss: 1.1802408695220947,grad_norm: 0.9999999159988062, iteration: 105092
loss: 1.1189846992492676,grad_norm: 1.0000000908204496, iteration: 105093
loss: 1.154773473739624,grad_norm: 0.9999991841678735, iteration: 105094
loss: 1.0620723962783813,grad_norm: 1.0000000153407698, iteration: 105095
loss: 1.303236722946167,grad_norm: 0.9999999351503384, iteration: 105096
loss: 1.0967694520950317,grad_norm: 0.9999999031754783, iteration: 105097
loss: 1.0062764883041382,grad_norm: 0.9999990123256378, iteration: 105098
loss: 1.0658223628997803,grad_norm: 0.9856847563631457, iteration: 105099
loss: 1.0803035497665405,grad_norm: 0.9999993442318679, iteration: 105100
loss: 1.0302636623382568,grad_norm: 0.9999992936510763, iteration: 105101
loss: 1.0434961318969727,grad_norm: 0.8637704255139982, iteration: 105102
loss: 1.1032453775405884,grad_norm: 0.9999997461698805, iteration: 105103
loss: 1.2554101943969727,grad_norm: 0.9999998098402189, iteration: 105104
loss: 1.032639503479004,grad_norm: 0.9999998840337161, iteration: 105105
loss: 1.197829246520996,grad_norm: 0.9999996903415648, iteration: 105106
loss: 1.1327282190322876,grad_norm: 0.999999831078334, iteration: 105107
loss: 1.0486576557159424,grad_norm: 0.9999991724159485, iteration: 105108
loss: 1.0883795022964478,grad_norm: 0.999999836350845, iteration: 105109
loss: 1.0387930870056152,grad_norm: 0.8315618409955546, iteration: 105110
loss: 1.0660967826843262,grad_norm: 0.999999114832717, iteration: 105111
loss: 1.1344799995422363,grad_norm: 0.9999995999058868, iteration: 105112
loss: 1.041147232055664,grad_norm: 0.999999449116915, iteration: 105113
loss: 1.1943774223327637,grad_norm: 0.9999992647296854, iteration: 105114
loss: 1.1140482425689697,grad_norm: 0.9999997002357309, iteration: 105115
loss: 1.0401253700256348,grad_norm: 0.9999998238005001, iteration: 105116
loss: 1.115649700164795,grad_norm: 0.9999997031378286, iteration: 105117
loss: 1.1488057374954224,grad_norm: 0.9999993764455857, iteration: 105118
loss: 1.231857419013977,grad_norm: 0.9999998694211536, iteration: 105119
loss: 1.0769338607788086,grad_norm: 0.8922803662619968, iteration: 105120
loss: 1.1102228164672852,grad_norm: 0.9999994634062902, iteration: 105121
loss: 1.2096861600875854,grad_norm: 0.9999998792440015, iteration: 105122
loss: 1.0977433919906616,grad_norm: 0.9999994336248534, iteration: 105123
loss: 1.1505191326141357,grad_norm: 0.9999995379394767, iteration: 105124
loss: 1.3711981773376465,grad_norm: 0.9999998567887881, iteration: 105125
loss: 1.034754753112793,grad_norm: 0.9999997783531982, iteration: 105126
loss: 1.0577280521392822,grad_norm: 0.9999993740105539, iteration: 105127
loss: 1.0318127870559692,grad_norm: 0.9999992431737749, iteration: 105128
loss: 1.1044692993164062,grad_norm: 0.9999991600782547, iteration: 105129
loss: 1.078986406326294,grad_norm: 0.9999996997392808, iteration: 105130
loss: 1.1116923093795776,grad_norm: 0.9999992518600299, iteration: 105131
loss: 1.0641566514968872,grad_norm: 0.9999991299039963, iteration: 105132
loss: 1.2324941158294678,grad_norm: 0.9999998402196371, iteration: 105133
loss: 1.0606166124343872,grad_norm: 0.7247606927029054, iteration: 105134
loss: 1.1351921558380127,grad_norm: 0.9999991013992781, iteration: 105135
loss: 1.0430341958999634,grad_norm: 0.9999990452738634, iteration: 105136
loss: 1.3362458944320679,grad_norm: 0.9999999143496626, iteration: 105137
loss: 1.0840213298797607,grad_norm: 0.9999990694979035, iteration: 105138
loss: 1.0610206127166748,grad_norm: 0.9999998135093505, iteration: 105139
loss: 1.0678073167800903,grad_norm: 0.9999993295418156, iteration: 105140
loss: 1.1853173971176147,grad_norm: 0.9999997066394276, iteration: 105141
loss: 1.0230087041854858,grad_norm: 0.9999995919033878, iteration: 105142
loss: 1.1239049434661865,grad_norm: 0.999999346628015, iteration: 105143
loss: 1.0425841808319092,grad_norm: 0.9999990959966131, iteration: 105144
loss: 1.0420057773590088,grad_norm: 0.999999164467843, iteration: 105145
loss: 1.0495789051055908,grad_norm: 0.9709760823928929, iteration: 105146
loss: 1.0346271991729736,grad_norm: 0.9999997931737983, iteration: 105147
loss: 1.0523403882980347,grad_norm: 0.9999992946091589, iteration: 105148
loss: 1.0363661050796509,grad_norm: 0.9999990785956925, iteration: 105149
loss: 1.0494712591171265,grad_norm: 0.9999998414833444, iteration: 105150
loss: 1.0347760915756226,grad_norm: 0.9497848440380886, iteration: 105151
loss: 1.3886486291885376,grad_norm: 0.9999998690384743, iteration: 105152
loss: 1.096239686012268,grad_norm: 0.9999992792647003, iteration: 105153
loss: 1.0838096141815186,grad_norm: 0.9999998012890335, iteration: 105154
loss: 1.2347922325134277,grad_norm: 0.9999999225834663, iteration: 105155
loss: 1.1225425004959106,grad_norm: 0.9999991827591035, iteration: 105156
loss: 1.1968226432800293,grad_norm: 0.9999993073515847, iteration: 105157
loss: 1.237148404121399,grad_norm: 0.9999999713945796, iteration: 105158
loss: 1.1286989450454712,grad_norm: 0.9999997585272491, iteration: 105159
loss: 1.0350918769836426,grad_norm: 0.9999994010945613, iteration: 105160
loss: 1.1132186651229858,grad_norm: 0.9999992551610236, iteration: 105161
loss: 1.2215209007263184,grad_norm: 0.9999994648389886, iteration: 105162
loss: 1.0533745288848877,grad_norm: 0.9493843472825121, iteration: 105163
loss: 1.1082429885864258,grad_norm: 0.9999999593427636, iteration: 105164
loss: 1.1914948225021362,grad_norm: 0.9999994429755926, iteration: 105165
loss: 1.0614831447601318,grad_norm: 1.0000000208226278, iteration: 105166
loss: 1.024549126625061,grad_norm: 0.9999991887166402, iteration: 105167
loss: 1.0553687810897827,grad_norm: 0.9250128163840605, iteration: 105168
loss: 1.1197891235351562,grad_norm: 0.9999995860093025, iteration: 105169
loss: 1.0854408740997314,grad_norm: 0.9999991574673917, iteration: 105170
loss: 1.0396531820297241,grad_norm: 0.9999991737056376, iteration: 105171
loss: 1.0158454179763794,grad_norm: 0.8455262905363138, iteration: 105172
loss: 1.1051639318466187,grad_norm: 0.9999998010791004, iteration: 105173
loss: 1.098751425743103,grad_norm: 0.9999995645497918, iteration: 105174
loss: 1.0755674839019775,grad_norm: 0.9999999575349209, iteration: 105175
loss: 1.1004095077514648,grad_norm: 0.9999990494457154, iteration: 105176
loss: 1.0032190084457397,grad_norm: 0.9999997929702098, iteration: 105177
loss: 1.1173487901687622,grad_norm: 0.9999998118398554, iteration: 105178
loss: 1.0607354640960693,grad_norm: 0.9999998347547147, iteration: 105179
loss: 1.0211302042007446,grad_norm: 0.9999990227185809, iteration: 105180
loss: 1.053331971168518,grad_norm: 0.9999999315478529, iteration: 105181
loss: 1.0938184261322021,grad_norm: 0.9875862816281152, iteration: 105182
loss: 1.055836796760559,grad_norm: 0.9999994093876037, iteration: 105183
loss: 1.009057879447937,grad_norm: 0.999999343159268, iteration: 105184
loss: 1.07024347782135,grad_norm: 0.9999991066407615, iteration: 105185
loss: 1.0531991720199585,grad_norm: 0.9999994656171013, iteration: 105186
loss: 1.1496171951293945,grad_norm: 0.9999997751061355, iteration: 105187
loss: 1.0970757007598877,grad_norm: 0.9999992814419186, iteration: 105188
loss: 1.1658183336257935,grad_norm: 0.9999997330753474, iteration: 105189
loss: 1.0060559511184692,grad_norm: 0.9999990708662128, iteration: 105190
loss: 1.0134679079055786,grad_norm: 0.9999994419991853, iteration: 105191
loss: 1.074965000152588,grad_norm: 0.999999986327796, iteration: 105192
loss: 1.081436276435852,grad_norm: 0.8831275539387692, iteration: 105193
loss: 1.0221235752105713,grad_norm: 0.9999992520443179, iteration: 105194
loss: 1.004340410232544,grad_norm: 0.9999992465725924, iteration: 105195
loss: 1.1244614124298096,grad_norm: 0.9999995415755254, iteration: 105196
loss: 1.2771040201187134,grad_norm: 0.9999999198853199, iteration: 105197
loss: 1.0551007986068726,grad_norm: 0.9999991860231028, iteration: 105198
loss: 1.0732271671295166,grad_norm: 0.9999997150822397, iteration: 105199
loss: 1.1791325807571411,grad_norm: 0.9999998953275567, iteration: 105200
loss: 0.943787157535553,grad_norm: 0.9203263087844422, iteration: 105201
loss: 1.255844235420227,grad_norm: 0.999999702934027, iteration: 105202
loss: 1.1435600519180298,grad_norm: 0.9999994505006414, iteration: 105203
loss: 1.2062294483184814,grad_norm: 0.9999997662881679, iteration: 105204
loss: 1.131500244140625,grad_norm: 0.9999997040825175, iteration: 105205
loss: 1.1026179790496826,grad_norm: 0.9999995948147652, iteration: 105206
loss: 1.0777088403701782,grad_norm: 0.9999999097975637, iteration: 105207
loss: 1.0905629396438599,grad_norm: 0.9999997434887669, iteration: 105208
loss: 1.453671932220459,grad_norm: 0.999999885484172, iteration: 105209
loss: 1.0330195426940918,grad_norm: 0.999999002910555, iteration: 105210
loss: 1.244975209236145,grad_norm: 0.9999998769813039, iteration: 105211
loss: 1.1191825866699219,grad_norm: 0.9999995227012292, iteration: 105212
loss: 1.157632827758789,grad_norm: 0.999999096821543, iteration: 105213
loss: 1.199808120727539,grad_norm: 0.999999724060626, iteration: 105214
loss: 1.0468618869781494,grad_norm: 0.873404524175222, iteration: 105215
loss: 1.2092106342315674,grad_norm: 0.9999998390510442, iteration: 105216
loss: 1.124528169631958,grad_norm: 0.9999994482648379, iteration: 105217
loss: 0.9414017796516418,grad_norm: 0.8185360199443977, iteration: 105218
loss: 1.2541536092758179,grad_norm: 0.9999999015803032, iteration: 105219
loss: 1.08476984500885,grad_norm: 0.9999994935841436, iteration: 105220
loss: 1.074722170829773,grad_norm: 0.9999994981922692, iteration: 105221
loss: 1.124619722366333,grad_norm: 0.9999999501085142, iteration: 105222
loss: 1.120071530342102,grad_norm: 0.9999995931569301, iteration: 105223
loss: 1.0866109132766724,grad_norm: 0.9999992326946274, iteration: 105224
loss: 1.556764006614685,grad_norm: 0.9999996946136158, iteration: 105225
loss: 1.129011631011963,grad_norm: 0.9999996000978196, iteration: 105226
loss: 1.1245046854019165,grad_norm: 0.9999995873453431, iteration: 105227
loss: 1.1806594133377075,grad_norm: 0.9999998061880928, iteration: 105228
loss: 1.0108866691589355,grad_norm: 0.9999990940720369, iteration: 105229
loss: 1.2768938541412354,grad_norm: 0.9999991318547972, iteration: 105230
loss: 1.0165166854858398,grad_norm: 0.9999995175599966, iteration: 105231
loss: 1.0507761240005493,grad_norm: 0.9999993876288937, iteration: 105232
loss: 1.0638909339904785,grad_norm: 0.9999998025332143, iteration: 105233
loss: 1.0228655338287354,grad_norm: 0.9999992058283371, iteration: 105234
loss: 1.1110185384750366,grad_norm: 0.9999997494421777, iteration: 105235
loss: 1.1308456659317017,grad_norm: 0.9999996318755723, iteration: 105236
loss: 1.3007819652557373,grad_norm: 0.9999999287991436, iteration: 105237
loss: 1.40867018699646,grad_norm: 0.9999999733249136, iteration: 105238
loss: 1.1708201169967651,grad_norm: 0.9999994292285517, iteration: 105239
loss: 1.163325548171997,grad_norm: 0.9999998694878646, iteration: 105240
loss: 1.2885493040084839,grad_norm: 0.9999999111247276, iteration: 105241
loss: 1.2130712270736694,grad_norm: 0.9999998911916289, iteration: 105242
loss: 1.1837600469589233,grad_norm: 0.9999999869342353, iteration: 105243
loss: 1.1196783781051636,grad_norm: 0.999999457022953, iteration: 105244
loss: 1.0853873491287231,grad_norm: 0.9999994575722142, iteration: 105245
loss: 1.2584991455078125,grad_norm: 0.999999213676001, iteration: 105246
loss: 1.3153001070022583,grad_norm: 0.999999685948847, iteration: 105247
loss: 1.0727447271347046,grad_norm: 0.9999999139730724, iteration: 105248
loss: 1.140277624130249,grad_norm: 0.9999993995285366, iteration: 105249
loss: 1.0909501314163208,grad_norm: 0.9999995207447379, iteration: 105250
loss: 1.1629136800765991,grad_norm: 0.9999993526700373, iteration: 105251
loss: 1.0463740825653076,grad_norm: 0.8508447728080931, iteration: 105252
loss: 1.178875207901001,grad_norm: 0.9999996415270341, iteration: 105253
loss: 1.0938657522201538,grad_norm: 0.9999996778152965, iteration: 105254
loss: 1.2037640810012817,grad_norm: 0.9999999417861285, iteration: 105255
loss: 1.0719935894012451,grad_norm: 0.9999998858500139, iteration: 105256
loss: 1.1023235321044922,grad_norm: 0.9999997388520914, iteration: 105257
loss: 1.185716152191162,grad_norm: 0.9999991551360499, iteration: 105258
loss: 1.1401233673095703,grad_norm: 0.9999997537040201, iteration: 105259
loss: 1.264129877090454,grad_norm: 0.9999998743859355, iteration: 105260
loss: 1.2738189697265625,grad_norm: 0.9999999908088639, iteration: 105261
loss: 1.3343266248703003,grad_norm: 0.9999995859848029, iteration: 105262
loss: 1.0837701559066772,grad_norm: 0.9999990619935673, iteration: 105263
loss: 1.1296429634094238,grad_norm: 0.9999993645648918, iteration: 105264
loss: 1.1287840604782104,grad_norm: 0.9999994382027108, iteration: 105265
loss: 1.0374982357025146,grad_norm: 0.9999997186932262, iteration: 105266
loss: 1.120638132095337,grad_norm: 0.9999992714569628, iteration: 105267
loss: 1.0320879220962524,grad_norm: 0.9267746038271997, iteration: 105268
loss: 1.1828705072402954,grad_norm: 0.9999996900198359, iteration: 105269
loss: 1.1079459190368652,grad_norm: 0.999999685254551, iteration: 105270
loss: 1.117419719696045,grad_norm: 0.999999339753434, iteration: 105271
loss: 1.1423226594924927,grad_norm: 0.9999998151498429, iteration: 105272
loss: 1.0798360109329224,grad_norm: 0.9999992534484027, iteration: 105273
loss: 1.1398744583129883,grad_norm: 0.9999998468567811, iteration: 105274
loss: 1.0129958391189575,grad_norm: 0.7994685769644686, iteration: 105275
loss: 1.0397040843963623,grad_norm: 0.9999993993592723, iteration: 105276
loss: 1.1895015239715576,grad_norm: 0.9999996197385482, iteration: 105277
loss: 1.0585843324661255,grad_norm: 0.9999996248675316, iteration: 105278
loss: 1.0109734535217285,grad_norm: 0.9999993914491548, iteration: 105279
loss: 1.1300058364868164,grad_norm: 0.9999997138468659, iteration: 105280
loss: 1.243727445602417,grad_norm: 0.9999997500212592, iteration: 105281
loss: 1.0455714464187622,grad_norm: 0.9999995077115605, iteration: 105282
loss: 1.3090170621871948,grad_norm: 0.999999479002093, iteration: 105283
loss: 1.2393537759780884,grad_norm: 0.9999999102441048, iteration: 105284
loss: 1.2374032735824585,grad_norm: 0.9999998392419365, iteration: 105285
loss: 0.9731922745704651,grad_norm: 0.9999990177342719, iteration: 105286
loss: 1.1991825103759766,grad_norm: 0.9999992722888178, iteration: 105287
loss: 1.1428672075271606,grad_norm: 0.9999997105806117, iteration: 105288
loss: 1.1456190347671509,grad_norm: 0.9999998512136237, iteration: 105289
loss: 1.094020128250122,grad_norm: 0.9999995203761896, iteration: 105290
loss: 1.1268031597137451,grad_norm: 0.9999993945992585, iteration: 105291
loss: 1.1159355640411377,grad_norm: 0.9999991645065669, iteration: 105292
loss: 1.0703043937683105,grad_norm: 0.9999994023807663, iteration: 105293
loss: 1.1614196300506592,grad_norm: 0.999999509192553, iteration: 105294
loss: 1.0547682046890259,grad_norm: 0.9848711295054617, iteration: 105295
loss: 1.344937801361084,grad_norm: 0.9999997234758113, iteration: 105296
loss: 1.1592618227005005,grad_norm: 0.999999413143623, iteration: 105297
loss: 1.0614986419677734,grad_norm: 0.9999998393951653, iteration: 105298
loss: 1.084026575088501,grad_norm: 0.9999992936170231, iteration: 105299
loss: 1.0620923042297363,grad_norm: 0.9999999647041278, iteration: 105300
loss: 1.092954158782959,grad_norm: 0.9999992753829495, iteration: 105301
loss: 1.2954362630844116,grad_norm: 0.9999999676731888, iteration: 105302
loss: 1.0913840532302856,grad_norm: 0.9999996874992643, iteration: 105303
loss: 1.1370420455932617,grad_norm: 0.9999994673453958, iteration: 105304
loss: 1.0769985914230347,grad_norm: 0.9999994085879406, iteration: 105305
loss: 1.1176238059997559,grad_norm: 0.999999276065318, iteration: 105306
loss: 1.1230533123016357,grad_norm: 0.9999998631421081, iteration: 105307
loss: 1.1289974451065063,grad_norm: 0.9999994035461297, iteration: 105308
loss: 1.246558427810669,grad_norm: 0.9999997565752258, iteration: 105309
loss: 1.0167269706726074,grad_norm: 0.9999995424902749, iteration: 105310
loss: 1.0364969968795776,grad_norm: 0.8974178159743799, iteration: 105311
loss: 1.0743032693862915,grad_norm: 0.9999996737414399, iteration: 105312
loss: 1.0380340814590454,grad_norm: 0.9999998580474038, iteration: 105313
loss: 1.0493077039718628,grad_norm: 0.8351313617201731, iteration: 105314
loss: 1.1015074253082275,grad_norm: 0.9999992489117778, iteration: 105315
loss: 1.0089105367660522,grad_norm: 0.9999998282894973, iteration: 105316
loss: 1.0990320444107056,grad_norm: 0.9999999391003178, iteration: 105317
loss: 1.1124255657196045,grad_norm: 0.9999991566403574, iteration: 105318
loss: 1.0439916849136353,grad_norm: 0.9999992521472502, iteration: 105319
loss: 1.0537607669830322,grad_norm: 0.9999991870275193, iteration: 105320
loss: 1.0866801738739014,grad_norm: 0.9999993262061436, iteration: 105321
loss: 1.0309405326843262,grad_norm: 0.9999991900305132, iteration: 105322
loss: 1.0345441102981567,grad_norm: 0.9293118118506284, iteration: 105323
loss: 1.090831995010376,grad_norm: 0.9999996536979915, iteration: 105324
loss: 1.0836344957351685,grad_norm: 0.9999999489805774, iteration: 105325
loss: 1.0158319473266602,grad_norm: 0.9999991633663191, iteration: 105326
loss: 1.2064728736877441,grad_norm: 0.9999998262934965, iteration: 105327
loss: 1.0469541549682617,grad_norm: 0.9999992922148027, iteration: 105328
loss: 1.063886046409607,grad_norm: 0.9999991979528247, iteration: 105329
loss: 1.0708998441696167,grad_norm: 0.999999202104327, iteration: 105330
loss: 1.0618005990982056,grad_norm: 0.9999991049550613, iteration: 105331
loss: 1.0743613243103027,grad_norm: 0.9781022581592644, iteration: 105332
loss: 1.1383225917816162,grad_norm: 0.9999994621844297, iteration: 105333
loss: 1.1489794254302979,grad_norm: 0.9999996743408035, iteration: 105334
loss: 1.0177323818206787,grad_norm: 0.9999999653317082, iteration: 105335
loss: 1.1934735774993896,grad_norm: 0.9999995079061281, iteration: 105336
loss: 1.1815237998962402,grad_norm: 0.9999999342586544, iteration: 105337
loss: 1.0552302598953247,grad_norm: 0.9999990019362293, iteration: 105338
loss: 1.2098193168640137,grad_norm: 0.9999992287240327, iteration: 105339
loss: 0.9719690680503845,grad_norm: 0.9999998530508727, iteration: 105340
loss: 1.0323201417922974,grad_norm: 0.9999992969095443, iteration: 105341
loss: 1.0344349145889282,grad_norm: 0.9999993137080956, iteration: 105342
loss: 1.0826096534729004,grad_norm: 0.9999999339315653, iteration: 105343
loss: 1.0224138498306274,grad_norm: 0.9999992046818972, iteration: 105344
loss: 1.0220696926116943,grad_norm: 0.9999990538086481, iteration: 105345
loss: 1.1183888912200928,grad_norm: 0.999999965775169, iteration: 105346
loss: 1.0918313264846802,grad_norm: 0.9999995706515074, iteration: 105347
loss: 1.046708106994629,grad_norm: 0.9999996395116113, iteration: 105348
loss: 1.0363824367523193,grad_norm: 0.9999994953484783, iteration: 105349
loss: 1.0373778343200684,grad_norm: 0.9999990677407278, iteration: 105350
loss: 1.0898942947387695,grad_norm: 0.9999998173514842, iteration: 105351
loss: 1.0082699060440063,grad_norm: 0.9999992696414767, iteration: 105352
loss: 1.0413979291915894,grad_norm: 0.9999998378552227, iteration: 105353
loss: 1.0040136575698853,grad_norm: 0.9999990991356209, iteration: 105354
loss: 1.0636950731277466,grad_norm: 0.9999994618886832, iteration: 105355
loss: 1.0540026426315308,grad_norm: 0.9999997089372665, iteration: 105356
loss: 1.1151175498962402,grad_norm: 0.9999994636362202, iteration: 105357
loss: 1.152264952659607,grad_norm: 0.9999998338362653, iteration: 105358
loss: 1.0808720588684082,grad_norm: 0.9055669676373858, iteration: 105359
loss: 1.011110544204712,grad_norm: 0.9999992774409167, iteration: 105360
loss: 1.046715259552002,grad_norm: 0.999999787549488, iteration: 105361
loss: 1.0934773683547974,grad_norm: 0.9999994130208714, iteration: 105362
loss: 1.0740951299667358,grad_norm: 0.9999991823581017, iteration: 105363
loss: 1.0628750324249268,grad_norm: 0.8813240141397694, iteration: 105364
loss: 1.048484444618225,grad_norm: 0.8426911562764348, iteration: 105365
loss: 1.013376235961914,grad_norm: 0.9064627151033632, iteration: 105366
loss: 1.0709865093231201,grad_norm: 0.9999993168899551, iteration: 105367
loss: 1.1295298337936401,grad_norm: 0.9999997630547521, iteration: 105368
loss: 1.0821360349655151,grad_norm: 0.999999721887323, iteration: 105369
loss: 1.0365028381347656,grad_norm: 0.9999996746464168, iteration: 105370
loss: 1.1172845363616943,grad_norm: 0.9999995036097508, iteration: 105371
loss: 1.055277705192566,grad_norm: 0.9999991758848079, iteration: 105372
loss: 0.997523844242096,grad_norm: 0.9790122202858718, iteration: 105373
loss: 1.1300371885299683,grad_norm: 0.9999991576098185, iteration: 105374
loss: 1.0508708953857422,grad_norm: 0.9999991839369105, iteration: 105375
loss: 1.0530046224594116,grad_norm: 0.9999991953872645, iteration: 105376
loss: 1.1020420789718628,grad_norm: 0.9999993556436186, iteration: 105377
loss: 1.0932670831680298,grad_norm: 0.9999993901898645, iteration: 105378
loss: 1.0225131511688232,grad_norm: 0.9335635156913493, iteration: 105379
loss: 1.074270248413086,grad_norm: 0.8863135451030707, iteration: 105380
loss: 1.0763572454452515,grad_norm: 0.9949548321454866, iteration: 105381
loss: 1.2243256568908691,grad_norm: 0.9999999289070324, iteration: 105382
loss: 1.0150178670883179,grad_norm: 0.8513339264190758, iteration: 105383
loss: 1.002221703529358,grad_norm: 0.8959990920494693, iteration: 105384
loss: 1.0585219860076904,grad_norm: 0.9999992353209574, iteration: 105385
loss: 0.9959923028945923,grad_norm: 0.9488376909298853, iteration: 105386
loss: 1.0530502796173096,grad_norm: 0.9999992757034235, iteration: 105387
loss: 1.0084832906723022,grad_norm: 0.9999992128919641, iteration: 105388
loss: 1.0614829063415527,grad_norm: 0.999999197113686, iteration: 105389
loss: 1.0786868333816528,grad_norm: 0.9999992510102153, iteration: 105390
loss: 0.9653927087783813,grad_norm: 0.8974383198392376, iteration: 105391
loss: 0.9849385023117065,grad_norm: 0.8940700149086308, iteration: 105392
loss: 1.0685899257659912,grad_norm: 0.9999993491106637, iteration: 105393
loss: 1.027359962463379,grad_norm: 0.977710423337953, iteration: 105394
loss: 1.0402307510375977,grad_norm: 0.999999570165354, iteration: 105395
loss: 1.0596450567245483,grad_norm: 0.9999996052870427, iteration: 105396
loss: 1.0328137874603271,grad_norm: 0.999999116479761, iteration: 105397
loss: 1.087403655052185,grad_norm: 0.9999998063331981, iteration: 105398
loss: 1.1103986501693726,grad_norm: 0.9999995177037924, iteration: 105399
loss: 0.9872610569000244,grad_norm: 0.9999990372205444, iteration: 105400
loss: 1.0292373895645142,grad_norm: 0.9999992943934888, iteration: 105401
loss: 1.1051994562149048,grad_norm: 0.9999992420155678, iteration: 105402
loss: 0.9788689017295837,grad_norm: 0.9999991529554956, iteration: 105403
loss: 1.0525308847427368,grad_norm: 0.9999999420019088, iteration: 105404
loss: 1.0149511098861694,grad_norm: 0.9999990394015432, iteration: 105405
loss: 1.0671271085739136,grad_norm: 0.9999999086599408, iteration: 105406
loss: 1.11782968044281,grad_norm: 0.9999994373286913, iteration: 105407
loss: 1.0424180030822754,grad_norm: 0.999999895562992, iteration: 105408
loss: 0.9955037832260132,grad_norm: 0.9999991223133173, iteration: 105409
loss: 0.9996313452720642,grad_norm: 0.9999991092723004, iteration: 105410
loss: 1.1396225690841675,grad_norm: 0.9999998437135801, iteration: 105411
loss: 1.2752025127410889,grad_norm: 0.9999996077141147, iteration: 105412
loss: 1.0435199737548828,grad_norm: 0.9999992938335924, iteration: 105413
loss: 1.2437188625335693,grad_norm: 0.999999959596516, iteration: 105414
loss: 1.052179217338562,grad_norm: 0.9668579096650927, iteration: 105415
loss: 1.02085542678833,grad_norm: 0.999999837865964, iteration: 105416
loss: 1.032291293144226,grad_norm: 0.8329349159010379, iteration: 105417
loss: 1.1556073427200317,grad_norm: 0.9999998592508598, iteration: 105418
loss: 1.090340256690979,grad_norm: 0.999999212981967, iteration: 105419
loss: 1.0731828212738037,grad_norm: 0.813645023148756, iteration: 105420
loss: 1.053676724433899,grad_norm: 0.9999995187884936, iteration: 105421
loss: 1.2287307977676392,grad_norm: 0.9999999725850919, iteration: 105422
loss: 1.1010780334472656,grad_norm: 0.9999991541420648, iteration: 105423
loss: 1.0894241333007812,grad_norm: 0.9999991786756192, iteration: 105424
loss: 0.9938897490501404,grad_norm: 0.9855521856073133, iteration: 105425
loss: 1.1025668382644653,grad_norm: 0.9999990191757239, iteration: 105426
loss: 1.0670826435089111,grad_norm: 0.9999992250727869, iteration: 105427
loss: 1.1232068538665771,grad_norm: 0.9999998630465196, iteration: 105428
loss: 1.1583667993545532,grad_norm: 0.9999996591961237, iteration: 105429
loss: 1.1195249557495117,grad_norm: 0.9999994092170152, iteration: 105430
loss: 1.0796228647232056,grad_norm: 0.871353591239566, iteration: 105431
loss: 0.9677452445030212,grad_norm: 0.8678806233470249, iteration: 105432
loss: 1.0468270778656006,grad_norm: 0.9999999931113243, iteration: 105433
loss: 1.089322805404663,grad_norm: 0.9999990497792975, iteration: 105434
loss: 1.0672755241394043,grad_norm: 0.9999998251143929, iteration: 105435
loss: 1.0997811555862427,grad_norm: 0.9999998325918823, iteration: 105436
loss: 1.125710368156433,grad_norm: 0.999999654857892, iteration: 105437
loss: 1.0403928756713867,grad_norm: 0.9999991222375918, iteration: 105438
loss: 1.0807045698165894,grad_norm: 0.9999997417411939, iteration: 105439
loss: 1.0698436498641968,grad_norm: 0.9999993270309351, iteration: 105440
loss: 0.9891763925552368,grad_norm: 0.9999989760719875, iteration: 105441
loss: 1.0878900289535522,grad_norm: 0.9999993267685917, iteration: 105442
loss: 1.0880558490753174,grad_norm: 0.9999991793768256, iteration: 105443
loss: 1.0622800588607788,grad_norm: 0.9999991582086907, iteration: 105444
loss: 1.009040355682373,grad_norm: 0.999999188447479, iteration: 105445
loss: 1.0477207899093628,grad_norm: 0.9999991531129866, iteration: 105446
loss: 1.0959739685058594,grad_norm: 0.9999992750477994, iteration: 105447
loss: 1.1036502122879028,grad_norm: 0.9999994340468988, iteration: 105448
loss: 1.1586909294128418,grad_norm: 0.9999999222756346, iteration: 105449
loss: 1.0971263647079468,grad_norm: 0.999999225454911, iteration: 105450
loss: 1.1739012002944946,grad_norm: 0.9999996962474752, iteration: 105451
loss: 1.0436068773269653,grad_norm: 0.9999999719518555, iteration: 105452
loss: 1.1249772310256958,grad_norm: 0.9999992873681521, iteration: 105453
loss: 1.1627838611602783,grad_norm: 0.9999998411695117, iteration: 105454
loss: 1.1491354703903198,grad_norm: 0.9999994958277979, iteration: 105455
loss: 1.0243535041809082,grad_norm: 0.9999994519807274, iteration: 105456
loss: 1.2915400266647339,grad_norm: 0.9999999174822088, iteration: 105457
loss: 1.3180207014083862,grad_norm: 0.9999997920412498, iteration: 105458
loss: 1.448533058166504,grad_norm: 0.9999999326152599, iteration: 105459
loss: 1.1036192178726196,grad_norm: 1.000000037188117, iteration: 105460
loss: 1.242177128791809,grad_norm: 0.9999998170435024, iteration: 105461
loss: 1.2256356477737427,grad_norm: 0.9999991989129607, iteration: 105462
loss: 1.1550899744033813,grad_norm: 0.9999996610895913, iteration: 105463
loss: 1.1413873434066772,grad_norm: 0.9999996664196404, iteration: 105464
loss: 1.1643725633621216,grad_norm: 1.0000000545941823, iteration: 105465
loss: 1.2948338985443115,grad_norm: 0.9999996871455897, iteration: 105466
loss: 1.0977716445922852,grad_norm: 0.9999990467306535, iteration: 105467
loss: 1.3213069438934326,grad_norm: 0.999999753828552, iteration: 105468
loss: 1.0177600383758545,grad_norm: 0.9999992354910369, iteration: 105469
loss: 1.0962437391281128,grad_norm: 0.9999992612339379, iteration: 105470
loss: 1.2308820486068726,grad_norm: 0.9999999727617291, iteration: 105471
loss: 1.204142451286316,grad_norm: 0.999999741526739, iteration: 105472
loss: 1.2149181365966797,grad_norm: 0.9999998486494838, iteration: 105473
loss: 1.0822757482528687,grad_norm: 0.9999994865864285, iteration: 105474
loss: 1.029529094696045,grad_norm: 0.9999997740444692, iteration: 105475
loss: 1.0818811655044556,grad_norm: 0.9999992185425636, iteration: 105476
loss: 1.1812993288040161,grad_norm: 0.9999999212360738, iteration: 105477
loss: 1.1395790576934814,grad_norm: 0.9999999170857199, iteration: 105478
loss: 1.2001343965530396,grad_norm: 0.9999998555760262, iteration: 105479
loss: 1.0842946767807007,grad_norm: 0.999999416031701, iteration: 105480
loss: 1.068827509880066,grad_norm: 0.999999942859803, iteration: 105481
loss: 1.2041385173797607,grad_norm: 0.9999999343376982, iteration: 105482
loss: 1.0643073320388794,grad_norm: 0.9999992276094652, iteration: 105483
loss: 1.2023333311080933,grad_norm: 0.9999994002018664, iteration: 105484
loss: 1.0648348331451416,grad_norm: 0.9999998358746067, iteration: 105485
loss: 1.0817666053771973,grad_norm: 0.999999930231674, iteration: 105486
loss: 1.080459713935852,grad_norm: 0.9999999554810086, iteration: 105487
loss: 1.0921027660369873,grad_norm: 0.9999997351194948, iteration: 105488
loss: 1.0603197813034058,grad_norm: 0.9999995527604585, iteration: 105489
loss: 1.1246613264083862,grad_norm: 0.9999994744755354, iteration: 105490
loss: 1.0670644044876099,grad_norm: 0.9999999776919912, iteration: 105491
loss: 1.0467208623886108,grad_norm: 0.931921710095436, iteration: 105492
loss: 1.0345741510391235,grad_norm: 0.9580591205097438, iteration: 105493
loss: 1.0701751708984375,grad_norm: 0.9999999255232083, iteration: 105494
loss: 1.127489447593689,grad_norm: 0.9999992667497329, iteration: 105495
loss: 1.133829951286316,grad_norm: 0.999999485039377, iteration: 105496
loss: 1.158152461051941,grad_norm: 0.9999998023914691, iteration: 105497
loss: 1.1266226768493652,grad_norm: 0.9999993161101749, iteration: 105498
loss: 1.0717359781265259,grad_norm: 0.996907895164252, iteration: 105499
loss: 1.021095633506775,grad_norm: 0.7496563329000516, iteration: 105500
loss: 1.0717108249664307,grad_norm: 0.9999998857555055, iteration: 105501
loss: 1.1187421083450317,grad_norm: 0.9999991127039425, iteration: 105502
loss: 1.1632075309753418,grad_norm: 0.9999996735439785, iteration: 105503
loss: 0.976018488407135,grad_norm: 0.8979812240562909, iteration: 105504
loss: 1.0459235906600952,grad_norm: 0.999999335972095, iteration: 105505
loss: 1.0221235752105713,grad_norm: 0.9999991288969922, iteration: 105506
loss: 1.0075021982192993,grad_norm: 0.9999991181265946, iteration: 105507
loss: 1.103553295135498,grad_norm: 0.9999990039395071, iteration: 105508
loss: 0.965623140335083,grad_norm: 0.9241333254892548, iteration: 105509
loss: 1.0553323030471802,grad_norm: 0.9999992154757338, iteration: 105510
loss: 0.9968231320381165,grad_norm: 0.9275068600563378, iteration: 105511
loss: 1.0470268726348877,grad_norm: 0.9999989606912981, iteration: 105512
loss: 1.169825792312622,grad_norm: 0.9999998782756896, iteration: 105513
loss: 1.0039029121398926,grad_norm: 0.840599819230647, iteration: 105514
loss: 1.0213825702667236,grad_norm: 0.9999991344100603, iteration: 105515
loss: 1.1096644401550293,grad_norm: 0.9999998103684089, iteration: 105516
loss: 1.1395026445388794,grad_norm: 0.9999999203495121, iteration: 105517
loss: 1.045640230178833,grad_norm: 0.9999989756057771, iteration: 105518
loss: 1.05784010887146,grad_norm: 0.999999311391512, iteration: 105519
loss: 1.0237748622894287,grad_norm: 0.9999991076593696, iteration: 105520
loss: 1.3681988716125488,grad_norm: 0.9999996638528889, iteration: 105521
loss: 1.2026623487472534,grad_norm: 0.999999706398169, iteration: 105522
loss: 1.1216181516647339,grad_norm: 0.9999992149750244, iteration: 105523
loss: 1.0501703023910522,grad_norm: 0.9999999356598428, iteration: 105524
loss: 1.1869680881500244,grad_norm: 0.9999991409831026, iteration: 105525
loss: 1.0572117567062378,grad_norm: 0.999999111586008, iteration: 105526
loss: 1.0604068040847778,grad_norm: 0.9999996344805685, iteration: 105527
loss: 1.0368179082870483,grad_norm: 0.999999347040115, iteration: 105528
loss: 1.010141372680664,grad_norm: 0.9999990589014911, iteration: 105529
loss: 1.003009557723999,grad_norm: 0.9999991847729942, iteration: 105530
loss: 1.022239327430725,grad_norm: 0.9999997191883098, iteration: 105531
loss: 1.0152530670166016,grad_norm: 0.9999992605764745, iteration: 105532
loss: 1.0790446996688843,grad_norm: 0.9999993917880495, iteration: 105533
loss: 1.0336405038833618,grad_norm: 0.9999996824177473, iteration: 105534
loss: 1.0133074522018433,grad_norm: 0.9999991164998834, iteration: 105535
loss: 1.0105386972427368,grad_norm: 0.83578532684534, iteration: 105536
loss: 1.0854846239089966,grad_norm: 0.999999182040656, iteration: 105537
loss: 1.0649726390838623,grad_norm: 0.9999999055552403, iteration: 105538
loss: 0.9723432660102844,grad_norm: 0.9999992183164356, iteration: 105539
loss: 1.0739086866378784,grad_norm: 0.9999992298765819, iteration: 105540
loss: 1.0196384191513062,grad_norm: 0.9999990041711266, iteration: 105541
loss: 0.9982244372367859,grad_norm: 0.999999257126936, iteration: 105542
loss: 1.0301035642623901,grad_norm: 0.9999993759763005, iteration: 105543
loss: 1.0559053421020508,grad_norm: 0.9999992282198746, iteration: 105544
loss: 1.0683432817459106,grad_norm: 0.9999994083668514, iteration: 105545
loss: 1.0251277685165405,grad_norm: 0.9999997450501462, iteration: 105546
loss: 1.0714988708496094,grad_norm: 0.998402961425811, iteration: 105547
loss: 1.0322264432907104,grad_norm: 0.9999998400240805, iteration: 105548
loss: 1.015299677848816,grad_norm: 0.9999992369677243, iteration: 105549
loss: 1.072026014328003,grad_norm: 0.996314855677723, iteration: 105550
loss: 1.1527900695800781,grad_norm: 0.9999994319269643, iteration: 105551
loss: 1.0820800065994263,grad_norm: 0.9999991682412832, iteration: 105552
loss: 1.0662723779678345,grad_norm: 0.9999997040065498, iteration: 105553
loss: 1.0192996263504028,grad_norm: 0.9999988956686513, iteration: 105554
loss: 0.9912558197975159,grad_norm: 0.9999990887926212, iteration: 105555
loss: 0.9952099323272705,grad_norm: 0.9999998217512588, iteration: 105556
loss: 1.075523018836975,grad_norm: 0.9999992318837763, iteration: 105557
loss: 1.107663869857788,grad_norm: 0.9728562312598248, iteration: 105558
loss: 1.01969313621521,grad_norm: 0.9999990531013706, iteration: 105559
loss: 1.023648977279663,grad_norm: 0.9999991289461004, iteration: 105560
loss: 1.0658879280090332,grad_norm: 0.9999992580484275, iteration: 105561
loss: 1.0524019002914429,grad_norm: 0.9999992281598477, iteration: 105562
loss: 1.064440369606018,grad_norm: 0.9999998960746801, iteration: 105563
loss: 1.0958632230758667,grad_norm: 0.99999907035152, iteration: 105564
loss: 1.0713595151901245,grad_norm: 0.9999997541284882, iteration: 105565
loss: 1.0694801807403564,grad_norm: 0.9999998370810476, iteration: 105566
loss: 1.0346590280532837,grad_norm: 0.7971122070355832, iteration: 105567
loss: 1.028910517692566,grad_norm: 0.999999087994598, iteration: 105568
loss: 1.0283571481704712,grad_norm: 0.9999992970021028, iteration: 105569
loss: 1.051318645477295,grad_norm: 0.930877631963152, iteration: 105570
loss: 1.0207655429840088,grad_norm: 0.9999991465375401, iteration: 105571
loss: 1.1007598638534546,grad_norm: 0.9999994227556676, iteration: 105572
loss: 1.148806095123291,grad_norm: 0.9999998116345413, iteration: 105573
loss: 1.0647598505020142,grad_norm: 0.9999999672275404, iteration: 105574
loss: 1.1293797492980957,grad_norm: 0.9999996017814989, iteration: 105575
loss: 1.1070936918258667,grad_norm: 0.9999997850255854, iteration: 105576
loss: 1.021195888519287,grad_norm: 0.9999998133361835, iteration: 105577
loss: 1.075644850730896,grad_norm: 0.9999991727431022, iteration: 105578
loss: 1.0368831157684326,grad_norm: 0.9999993101318976, iteration: 105579
loss: 1.210968017578125,grad_norm: 0.9999999409817877, iteration: 105580
loss: 1.110154628753662,grad_norm: 0.9999990897118237, iteration: 105581
loss: 1.0632352828979492,grad_norm: 0.9999994226599506, iteration: 105582
loss: 1.0113612413406372,grad_norm: 0.9999992229507092, iteration: 105583
loss: 1.0890671014785767,grad_norm: 0.9999998209516551, iteration: 105584
loss: 1.0705903768539429,grad_norm: 0.9999990545332018, iteration: 105585
loss: 1.0751577615737915,grad_norm: 1.0000000702075391, iteration: 105586
loss: 1.1043351888656616,grad_norm: 0.9999993204714376, iteration: 105587
loss: 1.0337255001068115,grad_norm: 0.9999992955931551, iteration: 105588
loss: 1.0083428621292114,grad_norm: 0.8229426004784607, iteration: 105589
loss: 0.9822037220001221,grad_norm: 0.9999995619959271, iteration: 105590
loss: 1.0642025470733643,grad_norm: 0.9999992865094616, iteration: 105591
loss: 1.02536141872406,grad_norm: 0.7971113782294376, iteration: 105592
loss: 1.0398606061935425,grad_norm: 0.9526391152407496, iteration: 105593
loss: 1.0079187154769897,grad_norm: 0.999999509268459, iteration: 105594
loss: 1.0912585258483887,grad_norm: 0.9999996257525786, iteration: 105595
loss: 1.092604637145996,grad_norm: 0.9999998969113132, iteration: 105596
loss: 1.0269720554351807,grad_norm: 0.9999995388706223, iteration: 105597
loss: 0.9755139946937561,grad_norm: 0.8492864790875269, iteration: 105598
loss: 1.0199768543243408,grad_norm: 0.9999996572129203, iteration: 105599
loss: 1.0999685525894165,grad_norm: 0.9999990043032091, iteration: 105600
loss: 1.200275182723999,grad_norm: 0.9999998802282567, iteration: 105601
loss: 1.062788486480713,grad_norm: 0.9999998292770497, iteration: 105602
loss: 1.0444706678390503,grad_norm: 0.9999994750398503, iteration: 105603
loss: 1.0370644330978394,grad_norm: 0.9999991791872169, iteration: 105604
loss: 1.0219900608062744,grad_norm: 0.9999991004759854, iteration: 105605
loss: 1.0777816772460938,grad_norm: 0.999999372907755, iteration: 105606
loss: 1.0421738624572754,grad_norm: 0.8723827798580843, iteration: 105607
loss: 1.1340802907943726,grad_norm: 0.9999994187960471, iteration: 105608
loss: 1.1170520782470703,grad_norm: 0.9999997761785797, iteration: 105609
loss: 0.9967272877693176,grad_norm: 0.9999996777873436, iteration: 105610
loss: 1.038618803024292,grad_norm: 0.7611522113721876, iteration: 105611
loss: 1.0381522178649902,grad_norm: 0.901975969079061, iteration: 105612
loss: 1.1103886365890503,grad_norm: 0.9999993261263248, iteration: 105613
loss: 1.0758730173110962,grad_norm: 0.9999991399881248, iteration: 105614
loss: 1.0295099020004272,grad_norm: 0.9999991420049611, iteration: 105615
loss: 1.0888744592666626,grad_norm: 0.9999994954578433, iteration: 105616
loss: 1.0758345127105713,grad_norm: 0.9999995047721377, iteration: 105617
loss: 1.2873855829238892,grad_norm: 0.9999999878701978, iteration: 105618
loss: 0.9797310829162598,grad_norm: 0.9999995666636013, iteration: 105619
loss: 1.0099824666976929,grad_norm: 0.999999916205832, iteration: 105620
loss: 1.2444052696228027,grad_norm: 0.999999636458149, iteration: 105621
loss: 0.9889016151428223,grad_norm: 0.9347440211168843, iteration: 105622
loss: 0.9976008534431458,grad_norm: 0.7562848769000493, iteration: 105623
loss: 1.270999789237976,grad_norm: 0.9999997528260941, iteration: 105624
loss: 1.097704529762268,grad_norm: 0.9999998701765307, iteration: 105625
loss: 1.161392092704773,grad_norm: 0.9999997602469993, iteration: 105626
loss: 1.0385653972625732,grad_norm: 0.9999998745474119, iteration: 105627
loss: 1.0277252197265625,grad_norm: 0.9999991884362701, iteration: 105628
loss: 1.0256236791610718,grad_norm: 0.9565994439635139, iteration: 105629
loss: 1.0950047969818115,grad_norm: 1.000000023514723, iteration: 105630
loss: 1.046345829963684,grad_norm: 0.9999992079858363, iteration: 105631
loss: 0.9914522767066956,grad_norm: 0.9999991681369254, iteration: 105632
loss: 1.0043643712997437,grad_norm: 0.9999998969523698, iteration: 105633
loss: 1.026799201965332,grad_norm: 0.9999996324778726, iteration: 105634
loss: 1.2467492818832397,grad_norm: 0.9999995353059037, iteration: 105635
loss: 1.0755409002304077,grad_norm: 0.9999992055896848, iteration: 105636
loss: 1.1949278116226196,grad_norm: 0.9999997295159656, iteration: 105637
loss: 1.0831105709075928,grad_norm: 0.9999999491825753, iteration: 105638
loss: 1.090542197227478,grad_norm: 0.9999996848382453, iteration: 105639
loss: 1.0563350915908813,grad_norm: 0.9999999260376519, iteration: 105640
loss: 1.0420734882354736,grad_norm: 0.9999999035987797, iteration: 105641
loss: 1.1528576612472534,grad_norm: 0.9999999042030272, iteration: 105642
loss: 1.0385675430297852,grad_norm: 0.9999995416804571, iteration: 105643
loss: 1.0224006175994873,grad_norm: 0.7811513282894601, iteration: 105644
loss: 1.0790148973464966,grad_norm: 0.9999996035839251, iteration: 105645
loss: 1.005764126777649,grad_norm: 0.9999991142939615, iteration: 105646
loss: 1.0684078931808472,grad_norm: 0.921399305876424, iteration: 105647
loss: 1.0551671981811523,grad_norm: 0.9999992991567452, iteration: 105648
loss: 1.0610055923461914,grad_norm: 0.9999996969294072, iteration: 105649
loss: 1.03837251663208,grad_norm: 0.9999994607249879, iteration: 105650
loss: 0.9979342222213745,grad_norm: 0.9999990691206573, iteration: 105651
loss: 1.0458842515945435,grad_norm: 0.9999991024076288, iteration: 105652
loss: 1.0885618925094604,grad_norm: 0.9999993805208649, iteration: 105653
loss: 1.0097652673721313,grad_norm: 0.9999997751413363, iteration: 105654
loss: 1.0627143383026123,grad_norm: 0.9999992672390907, iteration: 105655
loss: 1.1443049907684326,grad_norm: 0.999999938487014, iteration: 105656
loss: 1.0561667680740356,grad_norm: 0.999999466885933, iteration: 105657
loss: 1.0126432180404663,grad_norm: 0.9999991406802037, iteration: 105658
loss: 1.049045443534851,grad_norm: 0.9999997907357987, iteration: 105659
loss: 1.0240103006362915,grad_norm: 0.9999992494624057, iteration: 105660
loss: 1.0489192008972168,grad_norm: 0.999999271272428, iteration: 105661
loss: 1.048313856124878,grad_norm: 0.9999992114856721, iteration: 105662
loss: 1.0604580640792847,grad_norm: 0.9999996785575713, iteration: 105663
loss: 1.0765000581741333,grad_norm: 0.9999994894496783, iteration: 105664
loss: 1.08475923538208,grad_norm: 0.9999995295127135, iteration: 105665
loss: 1.0939249992370605,grad_norm: 0.9999996106002236, iteration: 105666
loss: 1.029665470123291,grad_norm: 0.9847377947813437, iteration: 105667
loss: 1.1090996265411377,grad_norm: 0.9999995606738744, iteration: 105668
loss: 1.0616482496261597,grad_norm: 0.8659328870775321, iteration: 105669
loss: 1.0596225261688232,grad_norm: 0.9999990460487698, iteration: 105670
loss: 1.0332432985305786,grad_norm: 0.7886986136152611, iteration: 105671
loss: 1.1334398984909058,grad_norm: 0.9999996678280029, iteration: 105672
loss: 1.0333471298217773,grad_norm: 0.9999995183387034, iteration: 105673
loss: 1.0840675830841064,grad_norm: 0.9970981543204056, iteration: 105674
loss: 1.1053520441055298,grad_norm: 0.9452314481438797, iteration: 105675
loss: 1.0237953662872314,grad_norm: 0.9172288300189108, iteration: 105676
loss: 1.0649021863937378,grad_norm: 0.999999066656276, iteration: 105677
loss: 1.0175340175628662,grad_norm: 0.9999991673957855, iteration: 105678
loss: 1.1149617433547974,grad_norm: 0.9999994556614462, iteration: 105679
loss: 1.1084563732147217,grad_norm: 0.9999997724014236, iteration: 105680
loss: 1.0961031913757324,grad_norm: 0.9999994056648376, iteration: 105681
loss: 1.0623114109039307,grad_norm: 0.9999999028119831, iteration: 105682
loss: 1.036434292793274,grad_norm: 0.9999990376004743, iteration: 105683
loss: 1.0832273960113525,grad_norm: 0.9999993430234668, iteration: 105684
loss: 1.1174118518829346,grad_norm: 0.9999999003515622, iteration: 105685
loss: 1.0933935642242432,grad_norm: 0.9999999581341997, iteration: 105686
loss: 1.0022083520889282,grad_norm: 0.9071056993255405, iteration: 105687
loss: 1.0827033519744873,grad_norm: 0.9999998597652984, iteration: 105688
loss: 1.0793567895889282,grad_norm: 0.9999999004701695, iteration: 105689
loss: 1.1030243635177612,grad_norm: 0.9999999010075551, iteration: 105690
loss: 1.1097512245178223,grad_norm: 0.9999997885715832, iteration: 105691
loss: 1.0945852994918823,grad_norm: 0.9999999877041903, iteration: 105692
loss: 1.0408833026885986,grad_norm: 0.9727246268506242, iteration: 105693
loss: 1.1244696378707886,grad_norm: 0.9999998501138618, iteration: 105694
loss: 1.1023935079574585,grad_norm: 0.9999997330100052, iteration: 105695
loss: 1.0383737087249756,grad_norm: 0.9999994054831595, iteration: 105696
loss: 1.0529760122299194,grad_norm: 1.0000000294549825, iteration: 105697
loss: 1.0815095901489258,grad_norm: 0.999999494354161, iteration: 105698
loss: 1.1156076192855835,grad_norm: 0.9999997874653053, iteration: 105699
loss: 1.1623740196228027,grad_norm: 0.9999999938797401, iteration: 105700
loss: 1.0565797090530396,grad_norm: 0.9999990622098754, iteration: 105701
loss: 1.0683232545852661,grad_norm: 0.9999990179859648, iteration: 105702
loss: 1.094416618347168,grad_norm: 0.9999998847181001, iteration: 105703
loss: 1.1439844369888306,grad_norm: 0.9999992946717032, iteration: 105704
loss: 0.9965829253196716,grad_norm: 0.831417546475471, iteration: 105705
loss: 1.0446817874908447,grad_norm: 0.9999994091555429, iteration: 105706
loss: 1.1418399810791016,grad_norm: 0.9999995014317573, iteration: 105707
loss: 1.027465581893921,grad_norm: 0.9999989979712106, iteration: 105708
loss: 1.0477335453033447,grad_norm: 0.9999991959600161, iteration: 105709
loss: 1.1611855030059814,grad_norm: 0.9999997524554444, iteration: 105710
loss: 1.171682596206665,grad_norm: 0.9999997339186957, iteration: 105711
loss: 1.0735697746276855,grad_norm: 0.8837808200498867, iteration: 105712
loss: 1.073729395866394,grad_norm: 0.9999999696195567, iteration: 105713
loss: 1.1015745401382446,grad_norm: 0.9999993944012694, iteration: 105714
loss: 1.070158839225769,grad_norm: 1.0000000224958343, iteration: 105715
loss: 1.083343505859375,grad_norm: 0.9999991235816604, iteration: 105716
loss: 1.1146987676620483,grad_norm: 0.9999998908468378, iteration: 105717
loss: 1.1822547912597656,grad_norm: 0.9999999482573543, iteration: 105718
loss: 1.2608524560928345,grad_norm: 0.9999997159862397, iteration: 105719
loss: 1.2746673822402954,grad_norm: 0.9999997989284239, iteration: 105720
loss: 1.142090916633606,grad_norm: 0.9999993589587444, iteration: 105721
loss: 1.0822697877883911,grad_norm: 0.9999993070670291, iteration: 105722
loss: 1.213443398475647,grad_norm: 0.9999998224505946, iteration: 105723
loss: 1.0355033874511719,grad_norm: 0.9999989961403886, iteration: 105724
loss: 1.0304192304611206,grad_norm: 0.9999991094220497, iteration: 105725
loss: 1.0681145191192627,grad_norm: 0.9999992924493069, iteration: 105726
loss: 1.3077818155288696,grad_norm: 0.9999999172869724, iteration: 105727
loss: 1.1660979986190796,grad_norm: 0.9999990738146832, iteration: 105728
loss: 1.1501480340957642,grad_norm: 0.9999997205987192, iteration: 105729
loss: 1.257802128791809,grad_norm: 0.9999994476245178, iteration: 105730
loss: 1.3897027969360352,grad_norm: 0.9999998709291058, iteration: 105731
loss: 1.158236026763916,grad_norm: 0.9999997671495958, iteration: 105732
loss: 1.1502838134765625,grad_norm: 0.9999997069376741, iteration: 105733
loss: 1.2961368560791016,grad_norm: 0.9999999722303222, iteration: 105734
loss: 1.206379771232605,grad_norm: 0.9999999144684101, iteration: 105735
loss: 1.2970435619354248,grad_norm: 0.999999858330388, iteration: 105736
loss: 1.133866548538208,grad_norm: 0.9999993587406375, iteration: 105737
loss: 1.1285581588745117,grad_norm: 0.9999996609389326, iteration: 105738
loss: 1.0286141633987427,grad_norm: 0.9999991122263718, iteration: 105739
loss: 1.169204592704773,grad_norm: 0.9999994142385601, iteration: 105740
loss: 1.1037107706069946,grad_norm: 0.9999996576310534, iteration: 105741
loss: 1.1187694072723389,grad_norm: 0.9999996628536867, iteration: 105742
loss: 1.2857509851455688,grad_norm: 0.9999999033546066, iteration: 105743
loss: 1.347153663635254,grad_norm: 0.9999998266197483, iteration: 105744
loss: 1.0772435665130615,grad_norm: 0.9999992235517208, iteration: 105745
loss: 1.0546858310699463,grad_norm: 0.999999575049892, iteration: 105746
loss: 1.2185508012771606,grad_norm: 0.9999993600903891, iteration: 105747
loss: 1.3296971321105957,grad_norm: 0.9999995615863787, iteration: 105748
loss: 1.266085147857666,grad_norm: 0.999999482196417, iteration: 105749
loss: 1.0312384366989136,grad_norm: 0.8972544157116904, iteration: 105750
loss: 1.2575243711471558,grad_norm: 0.9999998351466959, iteration: 105751
loss: 1.0709638595581055,grad_norm: 0.9999996742030505, iteration: 105752
loss: 1.0575323104858398,grad_norm: 0.9999991634171519, iteration: 105753
loss: 1.0656262636184692,grad_norm: 0.9999991811712676, iteration: 105754
loss: 1.1591287851333618,grad_norm: 0.9999995952092301, iteration: 105755
loss: 1.1492087841033936,grad_norm: 0.9999995411410149, iteration: 105756
loss: 1.1055330038070679,grad_norm: 0.9999994932271835, iteration: 105757
loss: 1.1366581916809082,grad_norm: 0.9999995827420343, iteration: 105758
loss: 1.1425809860229492,grad_norm: 0.9999992136498217, iteration: 105759
loss: 1.175074577331543,grad_norm: 0.999999672955705, iteration: 105760
loss: 1.1052132844924927,grad_norm: 0.9999991512260641, iteration: 105761
loss: 1.0889273881912231,grad_norm: 0.9999997016594006, iteration: 105762
loss: 1.0659767389297485,grad_norm: 0.9999989747167644, iteration: 105763
loss: 1.0594652891159058,grad_norm: 0.9999991001449466, iteration: 105764
loss: 1.0139193534851074,grad_norm: 0.9070285971076615, iteration: 105765
loss: 1.082683801651001,grad_norm: 0.9999993888967555, iteration: 105766
loss: 1.2088611125946045,grad_norm: 0.9999993175644021, iteration: 105767
loss: 1.1643184423446655,grad_norm: 0.9999997791432171, iteration: 105768
loss: 1.0657684803009033,grad_norm: 0.9999997991667516, iteration: 105769
loss: 1.0394576787948608,grad_norm: 0.9999992959546649, iteration: 105770
loss: 1.1369214057922363,grad_norm: 0.9999994303033494, iteration: 105771
loss: 1.0585187673568726,grad_norm: 0.8205956015280669, iteration: 105772
loss: 1.0829648971557617,grad_norm: 0.999999866146729, iteration: 105773
loss: 1.1177482604980469,grad_norm: 0.9999991717936212, iteration: 105774
loss: 1.035775899887085,grad_norm: 0.9999999095200771, iteration: 105775
loss: 1.0045503377914429,grad_norm: 0.999999484435694, iteration: 105776
loss: 1.044353723526001,grad_norm: 0.9999999701352187, iteration: 105777
loss: 1.1712756156921387,grad_norm: 0.9999997065211943, iteration: 105778
loss: 1.0078098773956299,grad_norm: 0.9999989959463561, iteration: 105779
loss: 0.9842134714126587,grad_norm: 0.999999050713761, iteration: 105780
loss: 1.0484275817871094,grad_norm: 0.9999991836180105, iteration: 105781
loss: 1.119140386581421,grad_norm: 0.9999996880428776, iteration: 105782
loss: 1.121229648590088,grad_norm: 0.9999996357472548, iteration: 105783
loss: 1.2311874628067017,grad_norm: 0.9999997827834873, iteration: 105784
loss: 1.020018219947815,grad_norm: 0.999999112114451, iteration: 105785
loss: 1.0778306722640991,grad_norm: 0.999999784651362, iteration: 105786
loss: 1.1034717559814453,grad_norm: 0.9999998492838442, iteration: 105787
loss: 1.0531315803527832,grad_norm: 0.9999991368391443, iteration: 105788
loss: 1.003180980682373,grad_norm: 0.9312816667318428, iteration: 105789
loss: 1.0227925777435303,grad_norm: 0.9999991269889058, iteration: 105790
loss: 1.0444928407669067,grad_norm: 0.9999991717038075, iteration: 105791
loss: 1.2819125652313232,grad_norm: 0.9999997760643979, iteration: 105792
loss: 1.0282682180404663,grad_norm: 0.9999993371144533, iteration: 105793
loss: 1.1949610710144043,grad_norm: 0.9999997205464487, iteration: 105794
loss: 1.0412237644195557,grad_norm: 0.999999321918102, iteration: 105795
loss: 1.0265419483184814,grad_norm: 0.9999993222518518, iteration: 105796
loss: 1.0061702728271484,grad_norm: 0.999999359109683, iteration: 105797
loss: 1.1681290864944458,grad_norm: 0.9999998539664215, iteration: 105798
loss: 1.1235228776931763,grad_norm: 0.9999993371934863, iteration: 105799
loss: 1.2755452394485474,grad_norm: 0.9999999180562329, iteration: 105800
loss: 1.1577757596969604,grad_norm: 0.9999999339099419, iteration: 105801
loss: 0.99080890417099,grad_norm: 0.9065703767546135, iteration: 105802
loss: 1.0728777647018433,grad_norm: 0.9999995742698037, iteration: 105803
loss: 1.0181268453598022,grad_norm: 0.8733142896221165, iteration: 105804
loss: 1.1776589155197144,grad_norm: 0.9999997228400359, iteration: 105805
loss: 1.110505223274231,grad_norm: 0.9999996577187933, iteration: 105806
loss: 1.0643126964569092,grad_norm: 0.9970697083739444, iteration: 105807
loss: 1.0055090188980103,grad_norm: 0.9999990443337845, iteration: 105808
loss: 1.1571534872055054,grad_norm: 0.999999544650383, iteration: 105809
loss: 1.0359950065612793,grad_norm: 0.9194745451637065, iteration: 105810
loss: 1.1176055669784546,grad_norm: 0.9999990952127269, iteration: 105811
loss: 1.0214452743530273,grad_norm: 0.999999275762796, iteration: 105812
loss: 1.0427602529525757,grad_norm: 0.9926987982876685, iteration: 105813
loss: 1.045371174812317,grad_norm: 0.9999999146213859, iteration: 105814
loss: 1.0593516826629639,grad_norm: 0.9999991947502734, iteration: 105815
loss: 1.1017411947250366,grad_norm: 0.9999996358127714, iteration: 105816
loss: 1.0603636503219604,grad_norm: 0.9999997931247843, iteration: 105817
loss: 1.2272849082946777,grad_norm: 0.9999995603231189, iteration: 105818
loss: 1.1501754522323608,grad_norm: 0.9999994795416066, iteration: 105819
loss: 1.1000134944915771,grad_norm: 0.9999997430354973, iteration: 105820
loss: 1.1812835931777954,grad_norm: 0.9999996687103553, iteration: 105821
loss: 1.0741603374481201,grad_norm: 0.9999995323157095, iteration: 105822
loss: 1.1078159809112549,grad_norm: 0.9999992432022397, iteration: 105823
loss: 1.008309006690979,grad_norm: 0.9999991092502619, iteration: 105824
loss: 1.0518008470535278,grad_norm: 0.9999992212656188, iteration: 105825
loss: 1.0213391780853271,grad_norm: 0.9567010809482633, iteration: 105826
loss: 1.036505103111267,grad_norm: 0.9999995748987909, iteration: 105827
loss: 1.0986285209655762,grad_norm: 0.9999998251783218, iteration: 105828
loss: 1.0681716203689575,grad_norm: 0.9999999213336482, iteration: 105829
loss: 1.000440239906311,grad_norm: 0.9999998567246061, iteration: 105830
loss: 1.0758697986602783,grad_norm: 0.9999995115972309, iteration: 105831
loss: 1.093063473701477,grad_norm: 0.9999996371732582, iteration: 105832
loss: 1.0611228942871094,grad_norm: 0.99999973454987, iteration: 105833
loss: 1.0329207181930542,grad_norm: 0.9999994561110399, iteration: 105834
loss: 1.032981276512146,grad_norm: 0.9465786498968868, iteration: 105835
loss: 1.031632900238037,grad_norm: 0.9999991155892699, iteration: 105836
loss: 1.1162079572677612,grad_norm: 0.9999993189168128, iteration: 105837
loss: 1.2817257642745972,grad_norm: 0.9999996278996358, iteration: 105838
loss: 1.1820389032363892,grad_norm: 0.9999998189774165, iteration: 105839
loss: 1.1128767728805542,grad_norm: 0.9999995158558302, iteration: 105840
loss: 1.08737051486969,grad_norm: 0.9999996308878449, iteration: 105841
loss: 1.1737544536590576,grad_norm: 0.9999997159639635, iteration: 105842
loss: 1.0812866687774658,grad_norm: 0.9999994629288518, iteration: 105843
loss: 1.0589425563812256,grad_norm: 0.9519986589397084, iteration: 105844
loss: 1.255456805229187,grad_norm: 0.9999993896997482, iteration: 105845
loss: 1.0870418548583984,grad_norm: 0.9389003405389367, iteration: 105846
loss: 1.104320764541626,grad_norm: 0.999999079832916, iteration: 105847
loss: 1.1007630825042725,grad_norm: 0.9999995147493246, iteration: 105848
loss: 1.0156232118606567,grad_norm: 0.8202326321054088, iteration: 105849
loss: 1.0517719984054565,grad_norm: 0.9999998859045928, iteration: 105850
loss: 1.0141167640686035,grad_norm: 0.999999058818593, iteration: 105851
loss: 1.0393967628479004,grad_norm: 0.9534233962633512, iteration: 105852
loss: 1.0563671588897705,grad_norm: 0.9999992198864391, iteration: 105853
loss: 1.01329505443573,grad_norm: 0.9999991210389866, iteration: 105854
loss: 1.0188149213790894,grad_norm: 0.9999993230473194, iteration: 105855
loss: 1.0864202976226807,grad_norm: 0.9999991665047012, iteration: 105856
loss: 1.0234653949737549,grad_norm: 0.9999998030588495, iteration: 105857
loss: 1.0550941228866577,grad_norm: 0.99999919495914, iteration: 105858
loss: 0.988199770450592,grad_norm: 0.8961437406810536, iteration: 105859
loss: 0.9760862588882446,grad_norm: 0.958100913966067, iteration: 105860
loss: 1.1177525520324707,grad_norm: 0.9999994537557325, iteration: 105861
loss: 0.9840273857116699,grad_norm: 0.8602749336124081, iteration: 105862
loss: 0.9772745966911316,grad_norm: 0.8164299989038646, iteration: 105863
loss: 1.0131772756576538,grad_norm: 0.8847649701014673, iteration: 105864
loss: 1.0493124723434448,grad_norm: 0.9008922762132716, iteration: 105865
loss: 0.999931812286377,grad_norm: 0.9611357030725978, iteration: 105866
loss: 1.0229212045669556,grad_norm: 0.9999991454776505, iteration: 105867
loss: 1.0454388856887817,grad_norm: 0.9512735863293673, iteration: 105868
loss: 1.11381196975708,grad_norm: 0.999999873981451, iteration: 105869
loss: 1.0147663354873657,grad_norm: 0.9593879736743371, iteration: 105870
loss: 1.1087262630462646,grad_norm: 0.9999999601267098, iteration: 105871
loss: 1.1007894277572632,grad_norm: 1.0000000127332491, iteration: 105872
loss: 1.0258268117904663,grad_norm: 0.911285146723667, iteration: 105873
loss: 1.013771891593933,grad_norm: 0.9999991897325825, iteration: 105874
loss: 1.0356065034866333,grad_norm: 0.9999997808170735, iteration: 105875
loss: 1.1808085441589355,grad_norm: 0.999999824446214, iteration: 105876
loss: 1.1065772771835327,grad_norm: 0.999999583615112, iteration: 105877
loss: 1.0670819282531738,grad_norm: 0.9999993184037448, iteration: 105878
loss: 1.0085328817367554,grad_norm: 0.8518283383740831, iteration: 105879
loss: 1.0845022201538086,grad_norm: 0.9999992626310528, iteration: 105880
loss: 0.989169180393219,grad_norm: 0.7924421364157367, iteration: 105881
loss: 1.0408167839050293,grad_norm: 0.9354463282521444, iteration: 105882
loss: 1.0131880044937134,grad_norm: 0.9689482425155841, iteration: 105883
loss: 1.1060246229171753,grad_norm: 0.9999998664501304, iteration: 105884
loss: 0.9921927452087402,grad_norm: 0.8919391074247449, iteration: 105885
loss: 1.0843188762664795,grad_norm: 0.9999993389163954, iteration: 105886
loss: 1.0237774848937988,grad_norm: 0.9031312856370883, iteration: 105887
loss: 1.2179268598556519,grad_norm: 0.9999991983546176, iteration: 105888
loss: 1.027347445487976,grad_norm: 0.9498626932178601, iteration: 105889
loss: 0.9993374943733215,grad_norm: 0.8229546167416478, iteration: 105890
loss: 1.0006226301193237,grad_norm: 0.9999992142230127, iteration: 105891
loss: 1.086344838142395,grad_norm: 0.9999991284887688, iteration: 105892
loss: 1.1103172302246094,grad_norm: 0.9999995829775011, iteration: 105893
loss: 1.1248035430908203,grad_norm: 0.9999993703769033, iteration: 105894
loss: 1.077195405960083,grad_norm: 0.975135827136915, iteration: 105895
loss: 1.10630202293396,grad_norm: 0.9999994847267072, iteration: 105896
loss: 1.065827488899231,grad_norm: 0.9954861289836617, iteration: 105897
loss: 1.004944920539856,grad_norm: 0.903051806544641, iteration: 105898
loss: 1.0552040338516235,grad_norm: 0.9999995157908913, iteration: 105899
loss: 1.0613152980804443,grad_norm: 0.9999994848482134, iteration: 105900
loss: 1.1951746940612793,grad_norm: 0.9999997751727552, iteration: 105901
loss: 1.0051219463348389,grad_norm: 0.9999990628560628, iteration: 105902
loss: 1.017268419265747,grad_norm: 0.9999993646145606, iteration: 105903
loss: 1.0526607036590576,grad_norm: 0.9999998342111613, iteration: 105904
loss: 1.049457311630249,grad_norm: 0.9999990386999654, iteration: 105905
loss: 0.9784452319145203,grad_norm: 0.9650730157559237, iteration: 105906
loss: 1.021136999130249,grad_norm: 0.9132798532245494, iteration: 105907
loss: 1.417933702468872,grad_norm: 0.9999994634409982, iteration: 105908
loss: 1.004892110824585,grad_norm: 0.8237642878783022, iteration: 105909
loss: 1.1099827289581299,grad_norm: 0.9999997855361975, iteration: 105910
loss: 1.036246418952942,grad_norm: 0.8166198546613888, iteration: 105911
loss: 1.0473663806915283,grad_norm: 0.9492687635086269, iteration: 105912
loss: 1.1164971590042114,grad_norm: 0.9999998955193773, iteration: 105913
loss: 1.073795199394226,grad_norm: 0.9999997926124569, iteration: 105914
loss: 1.2115025520324707,grad_norm: 0.9999995379546218, iteration: 105915
loss: 1.0066403150558472,grad_norm: 0.999999603565387, iteration: 105916
loss: 1.0091042518615723,grad_norm: 0.9999997148978744, iteration: 105917
loss: 1.0364608764648438,grad_norm: 0.8656358070496488, iteration: 105918
loss: 1.100785255432129,grad_norm: 0.9999993271621574, iteration: 105919
loss: 1.0789422988891602,grad_norm: 0.9999990049689119, iteration: 105920
loss: 1.0243096351623535,grad_norm: 0.9999999712585357, iteration: 105921
loss: 1.0291329622268677,grad_norm: 0.9999991595652531, iteration: 105922
loss: 1.1047534942626953,grad_norm: 0.9999992335707786, iteration: 105923
loss: 1.0020561218261719,grad_norm: 0.9999989152921959, iteration: 105924
loss: 1.0398708581924438,grad_norm: 0.9999993681542333, iteration: 105925
loss: 1.085722804069519,grad_norm: 0.999999160506846, iteration: 105926
loss: 1.0140464305877686,grad_norm: 0.9999990540723059, iteration: 105927
loss: 1.087577223777771,grad_norm: 0.9999993649809474, iteration: 105928
loss: 1.026089072227478,grad_norm: 0.9808085288895495, iteration: 105929
loss: 1.0580114126205444,grad_norm: 0.8339659673580933, iteration: 105930
loss: 1.0131231546401978,grad_norm: 0.930974056193389, iteration: 105931
loss: 1.060968041419983,grad_norm: 0.9999991716868064, iteration: 105932
loss: 1.044379711151123,grad_norm: 0.9999993571568841, iteration: 105933
loss: 1.0836423635482788,grad_norm: 0.9711609690874764, iteration: 105934
loss: 1.0693814754486084,grad_norm: 0.9999991363016332, iteration: 105935
loss: 1.0177165269851685,grad_norm: 0.9999998499010645, iteration: 105936
loss: 1.090498447418213,grad_norm: 0.9999994109514886, iteration: 105937
loss: 1.0730844736099243,grad_norm: 0.9999998313971259, iteration: 105938
loss: 0.9768892526626587,grad_norm: 0.8476315900434757, iteration: 105939
loss: 1.1165317296981812,grad_norm: 0.9999995621477314, iteration: 105940
loss: 1.1761595010757446,grad_norm: 0.9999996426296553, iteration: 105941
loss: 1.146213412284851,grad_norm: 0.9999995892061712, iteration: 105942
loss: 1.0896767377853394,grad_norm: 0.9999994229725209, iteration: 105943
loss: 1.4885319471359253,grad_norm: 0.9999999239518579, iteration: 105944
loss: 1.051414132118225,grad_norm: 1.0000000128022277, iteration: 105945
loss: 1.0410284996032715,grad_norm: 0.9999990685690826, iteration: 105946
loss: 1.1430888175964355,grad_norm: 0.9999993731213562, iteration: 105947
loss: 1.0927295684814453,grad_norm: 0.8764120554451382, iteration: 105948
loss: 1.0497275590896606,grad_norm: 0.9697543197495527, iteration: 105949
loss: 1.0505081415176392,grad_norm: 0.9999993415592164, iteration: 105950
loss: 1.0174064636230469,grad_norm: 0.9999998978014729, iteration: 105951
loss: 1.4185539484024048,grad_norm: 0.9999998678686927, iteration: 105952
loss: 1.0433882474899292,grad_norm: 0.999998950973108, iteration: 105953
loss: 1.148959755897522,grad_norm: 0.999999372416052, iteration: 105954
loss: 1.065301537513733,grad_norm: 0.9999993950008332, iteration: 105955
loss: 1.0477832555770874,grad_norm: 0.9999990999586809, iteration: 105956
loss: 1.084120512008667,grad_norm: 0.999999977127904, iteration: 105957
loss: 1.0144884586334229,grad_norm: 0.9398880928411524, iteration: 105958
loss: 1.0273327827453613,grad_norm: 0.9999995519033725, iteration: 105959
loss: 1.2876663208007812,grad_norm: 0.9999995278260395, iteration: 105960
loss: 1.1439213752746582,grad_norm: 0.9999995371929306, iteration: 105961
loss: 1.020729660987854,grad_norm: 0.9999990278446064, iteration: 105962
loss: 1.1895897388458252,grad_norm: 0.9999999032250636, iteration: 105963
loss: 1.0692356824874878,grad_norm: 0.9999990494546489, iteration: 105964
loss: 1.1061550378799438,grad_norm: 0.9999992006257992, iteration: 105965
loss: 1.0131926536560059,grad_norm: 0.9999991339714926, iteration: 105966
loss: 1.2542132139205933,grad_norm: 0.9999995178530781, iteration: 105967
loss: 1.0106078386306763,grad_norm: 0.8401334610040178, iteration: 105968
loss: 1.104175329208374,grad_norm: 0.9999993420224889, iteration: 105969
loss: 1.0340157747268677,grad_norm: 0.9999995853809444, iteration: 105970
loss: 1.1022645235061646,grad_norm: 0.9999996284963302, iteration: 105971
loss: 1.0103167295455933,grad_norm: 0.9999993884538171, iteration: 105972
loss: 1.0245507955551147,grad_norm: 0.8025915120724726, iteration: 105973
loss: 1.0038838386535645,grad_norm: 0.9999995017574294, iteration: 105974
loss: 1.0355130434036255,grad_norm: 0.942349963366348, iteration: 105975
loss: 1.1007132530212402,grad_norm: 0.9999995694616851, iteration: 105976
loss: 1.0222625732421875,grad_norm: 0.8835673670765923, iteration: 105977
loss: 1.0574846267700195,grad_norm: 0.9999990228217012, iteration: 105978
loss: 1.0100581645965576,grad_norm: 0.870433369023757, iteration: 105979
loss: 1.0083872079849243,grad_norm: 0.9447211093519167, iteration: 105980
loss: 1.0420702695846558,grad_norm: 0.9999991206107927, iteration: 105981
loss: 0.9890478849411011,grad_norm: 0.9999991164276694, iteration: 105982
loss: 1.0685619115829468,grad_norm: 0.9999992856467493, iteration: 105983
loss: 0.9964475035667419,grad_norm: 0.9198149866846096, iteration: 105984
loss: 1.0245251655578613,grad_norm: 0.9551346068014216, iteration: 105985
loss: 1.1491036415100098,grad_norm: 0.999999789286123, iteration: 105986
loss: 1.0690027475357056,grad_norm: 1.0000000077976066, iteration: 105987
loss: 1.0641779899597168,grad_norm: 0.9999990432363479, iteration: 105988
loss: 0.9773169755935669,grad_norm: 0.9060899885574263, iteration: 105989
loss: 1.169158935546875,grad_norm: 0.9999997264198661, iteration: 105990
loss: 1.0113544464111328,grad_norm: 0.9129110201171214, iteration: 105991
loss: 1.0040706396102905,grad_norm: 0.8737101614328622, iteration: 105992
loss: 1.0217570066452026,grad_norm: 0.9999993257097458, iteration: 105993
loss: 1.0360840559005737,grad_norm: 0.8081815098963764, iteration: 105994
loss: 1.0221470594406128,grad_norm: 0.9090169718527161, iteration: 105995
loss: 1.0048246383666992,grad_norm: 0.9999996059988101, iteration: 105996
loss: 1.0136245489120483,grad_norm: 0.9999991629853752, iteration: 105997
loss: 1.2362494468688965,grad_norm: 0.999999302035377, iteration: 105998
loss: 1.0975656509399414,grad_norm: 0.9999990983236668, iteration: 105999
loss: 1.0657447576522827,grad_norm: 0.9999992574041087, iteration: 106000
loss: 1.0162864923477173,grad_norm: 0.999999250711346, iteration: 106001
loss: 1.0903406143188477,grad_norm: 0.9999997996075223, iteration: 106002
loss: 1.438675880432129,grad_norm: 0.999999747141252, iteration: 106003
loss: 1.361671805381775,grad_norm: 0.9999993596407687, iteration: 106004
loss: 1.0422900915145874,grad_norm: 0.9999997407195964, iteration: 106005
loss: 1.4636878967285156,grad_norm: 0.9999995997961332, iteration: 106006
loss: 1.0720665454864502,grad_norm: 0.999999963912799, iteration: 106007
loss: 1.0925723314285278,grad_norm: 0.9999995868659368, iteration: 106008
loss: 1.1052378416061401,grad_norm: 0.9999996338587179, iteration: 106009
loss: 1.0812797546386719,grad_norm: 0.9999996530008217, iteration: 106010
loss: 1.015568733215332,grad_norm: 0.9511036365971882, iteration: 106011
loss: 1.0683246850967407,grad_norm: 0.9999991803690307, iteration: 106012
loss: 1.324888825416565,grad_norm: 0.9999999235903319, iteration: 106013
loss: 1.003800868988037,grad_norm: 0.6542167128519686, iteration: 106014
loss: 1.1414713859558105,grad_norm: 0.9999998797339842, iteration: 106015
loss: 1.168685793876648,grad_norm: 0.9999992169102808, iteration: 106016
loss: 1.210130214691162,grad_norm: 0.9999996550943941, iteration: 106017
loss: 1.1030423641204834,grad_norm: 0.9999998779985574, iteration: 106018
loss: 1.1167024374008179,grad_norm: 0.9999996970776495, iteration: 106019
loss: 1.065659523010254,grad_norm: 0.9999998365653232, iteration: 106020
loss: 1.2753770351409912,grad_norm: 0.9999997606739376, iteration: 106021
loss: 1.049076795578003,grad_norm: 0.9999992308184129, iteration: 106022
loss: 1.1004600524902344,grad_norm: 0.9999994353819337, iteration: 106023
loss: 1.0203540325164795,grad_norm: 0.8616105651487622, iteration: 106024
loss: 1.0475459098815918,grad_norm: 0.9999990641610705, iteration: 106025
loss: 1.181265950202942,grad_norm: 0.9999994152088941, iteration: 106026
loss: 1.021166205406189,grad_norm: 0.9975793953662339, iteration: 106027
loss: 1.0460461378097534,grad_norm: 0.9779747671666873, iteration: 106028
loss: 1.0181814432144165,grad_norm: 0.9999990047237673, iteration: 106029
loss: 1.0681216716766357,grad_norm: 0.9999992900082187, iteration: 106030
loss: 1.1401588916778564,grad_norm: 0.9999991328955888, iteration: 106031
loss: 1.0345649719238281,grad_norm: 0.999999509842245, iteration: 106032
loss: 1.1210362911224365,grad_norm: 0.9999991834986853, iteration: 106033
loss: 1.0482289791107178,grad_norm: 0.9999999564397175, iteration: 106034
loss: 1.2178351879119873,grad_norm: 0.999999652226271, iteration: 106035
loss: 1.066938042640686,grad_norm: 0.9999994813142354, iteration: 106036
loss: 1.0161653757095337,grad_norm: 0.9999996993856998, iteration: 106037
loss: 1.3684234619140625,grad_norm: 0.9999995868441433, iteration: 106038
loss: 1.1389214992523193,grad_norm: 0.9999991097873104, iteration: 106039
loss: 1.1067322492599487,grad_norm: 0.9999991528689078, iteration: 106040
loss: 1.0139080286026,grad_norm: 0.9999991863910056, iteration: 106041
loss: 1.0416830778121948,grad_norm: 0.8247072481807305, iteration: 106042
loss: 0.9919422268867493,grad_norm: 0.9137583518913054, iteration: 106043
loss: 1.0730515718460083,grad_norm: 0.9770631352498168, iteration: 106044
loss: 1.0225162506103516,grad_norm: 0.9999990673100045, iteration: 106045
loss: 1.0924943685531616,grad_norm: 0.9999990388921564, iteration: 106046
loss: 1.029905915260315,grad_norm: 0.9999996267506028, iteration: 106047
loss: 1.2204444408416748,grad_norm: 1.0000000294070424, iteration: 106048
loss: 1.0788660049438477,grad_norm: 0.9999999005098786, iteration: 106049
loss: 1.0670561790466309,grad_norm: 0.9999994227343609, iteration: 106050
loss: 1.0315436124801636,grad_norm: 0.9999991465115848, iteration: 106051
loss: 1.1417595148086548,grad_norm: 0.9999994824670637, iteration: 106052
loss: 1.0319178104400635,grad_norm: 0.9999999156037988, iteration: 106053
loss: 1.124390959739685,grad_norm: 0.9999993081806244, iteration: 106054
loss: 1.0754474401474,grad_norm: 0.99999976664708, iteration: 106055
loss: 1.0935543775558472,grad_norm: 0.9999993903321841, iteration: 106056
loss: 0.9808710217475891,grad_norm: 0.8538409160866431, iteration: 106057
loss: 1.0446751117706299,grad_norm: 0.9999996365578595, iteration: 106058
loss: 1.0619055032730103,grad_norm: 0.9999999045524619, iteration: 106059
loss: 1.0591485500335693,grad_norm: 0.9999991395473472, iteration: 106060
loss: 1.0333774089813232,grad_norm: 0.9366465706160154, iteration: 106061
loss: 1.1297296285629272,grad_norm: 0.9999995784297335, iteration: 106062
loss: 0.9957545399665833,grad_norm: 0.9240358764863803, iteration: 106063
loss: 1.0274920463562012,grad_norm: 0.9999991751969484, iteration: 106064
loss: 1.0238628387451172,grad_norm: 0.9999991987031244, iteration: 106065
loss: 1.0777415037155151,grad_norm: 0.9411733701330214, iteration: 106066
loss: 1.0578672885894775,grad_norm: 0.9796272853803865, iteration: 106067
loss: 0.9827159643173218,grad_norm: 0.7829622233537193, iteration: 106068
loss: 1.1495496034622192,grad_norm: 0.9999996173604797, iteration: 106069
loss: 1.223987102508545,grad_norm: 0.9999993392255202, iteration: 106070
loss: 1.0414392948150635,grad_norm: 0.85524952410462, iteration: 106071
loss: 1.0559067726135254,grad_norm: 0.9999995161567761, iteration: 106072
loss: 1.0970619916915894,grad_norm: 0.9999992724916468, iteration: 106073
loss: 1.0157192945480347,grad_norm: 0.8089346862787317, iteration: 106074
loss: 1.1175816059112549,grad_norm: 0.8895208129250609, iteration: 106075
loss: 1.0294724702835083,grad_norm: 0.9999993574571007, iteration: 106076
loss: 1.159591555595398,grad_norm: 0.9999997184655173, iteration: 106077
loss: 1.0398824214935303,grad_norm: 0.8441045421718257, iteration: 106078
loss: 1.131819248199463,grad_norm: 0.9999994334243485, iteration: 106079
loss: 1.0137429237365723,grad_norm: 0.8361189911037357, iteration: 106080
loss: 1.0962622165679932,grad_norm: 0.9999992691876833, iteration: 106081
loss: 1.0389167070388794,grad_norm: 0.8520326428130525, iteration: 106082
loss: 1.1025269031524658,grad_norm: 0.9999993052998236, iteration: 106083
loss: 1.0090848207473755,grad_norm: 0.9069733250162844, iteration: 106084
loss: 1.0973228216171265,grad_norm: 0.9999996690895848, iteration: 106085
loss: 1.0691354274749756,grad_norm: 0.999999808043335, iteration: 106086
loss: 1.0024137496948242,grad_norm: 0.8103635607800874, iteration: 106087
loss: 1.118214726448059,grad_norm: 1.0000000739742345, iteration: 106088
loss: 1.0657089948654175,grad_norm: 0.9999996549293243, iteration: 106089
loss: 1.1058577299118042,grad_norm: 0.9999996486824032, iteration: 106090
loss: 1.0283784866333008,grad_norm: 0.9999990694605988, iteration: 106091
loss: 1.0867515802383423,grad_norm: 0.9999998401551446, iteration: 106092
loss: 1.0373549461364746,grad_norm: 0.9232734963138687, iteration: 106093
loss: 1.1638463735580444,grad_norm: 0.9999999427617899, iteration: 106094
loss: 1.015857458114624,grad_norm: 0.999999761841371, iteration: 106095
loss: 1.1099097728729248,grad_norm: 0.9999997549354109, iteration: 106096
loss: 1.0242979526519775,grad_norm: 0.9999993968475462, iteration: 106097
loss: 1.0489065647125244,grad_norm: 0.8657973639563799, iteration: 106098
loss: 1.1537749767303467,grad_norm: 0.9999995009912193, iteration: 106099
loss: 1.0231215953826904,grad_norm: 0.9999998664050475, iteration: 106100
loss: 1.0673658847808838,grad_norm: 0.9999992708693272, iteration: 106101
loss: 1.1843518018722534,grad_norm: 0.9999996251607938, iteration: 106102
loss: 1.0162606239318848,grad_norm: 0.9999996113193125, iteration: 106103
loss: 1.0669140815734863,grad_norm: 0.8804879175713363, iteration: 106104
loss: 1.1244219541549683,grad_norm: 0.999999679898544, iteration: 106105
loss: 1.1695221662521362,grad_norm: 0.999999099244177, iteration: 106106
loss: 1.078432559967041,grad_norm: 0.9999995181262358, iteration: 106107
loss: 1.0490009784698486,grad_norm: 0.9999990304339944, iteration: 106108
loss: 1.0330836772918701,grad_norm: 0.85983150796908, iteration: 106109
loss: 1.054195523262024,grad_norm: 0.9999998456302837, iteration: 106110
loss: 1.1275663375854492,grad_norm: 0.9999997218683403, iteration: 106111
loss: 1.3000880479812622,grad_norm: 0.9999997641369883, iteration: 106112
loss: 1.0724866390228271,grad_norm: 0.8394530230281919, iteration: 106113
loss: 1.109250545501709,grad_norm: 0.9999990747019704, iteration: 106114
loss: 1.0339568853378296,grad_norm: 0.9133398386486461, iteration: 106115
loss: 1.2653460502624512,grad_norm: 0.999999373421668, iteration: 106116
loss: 0.9923717379570007,grad_norm: 0.9336595973729731, iteration: 106117
loss: 1.1079226732254028,grad_norm: 0.9999993955879114, iteration: 106118
loss: 1.1975094079971313,grad_norm: 0.9999998823154357, iteration: 106119
loss: 1.0850709676742554,grad_norm: 0.9999992251469229, iteration: 106120
loss: 1.0922014713287354,grad_norm: 0.9999999500805496, iteration: 106121
loss: 1.08491849899292,grad_norm: 0.9999990777049318, iteration: 106122
loss: 1.1510425806045532,grad_norm: 0.9999994037489944, iteration: 106123
loss: 1.1108392477035522,grad_norm: 0.9999992517567429, iteration: 106124
loss: 1.0538891553878784,grad_norm: 0.9999993260897513, iteration: 106125
loss: 1.289422869682312,grad_norm: 0.9999998225628831, iteration: 106126
loss: 1.1699798107147217,grad_norm: 0.9999995766827113, iteration: 106127
loss: 1.1727015972137451,grad_norm: 0.9999995459204447, iteration: 106128
loss: 1.0700349807739258,grad_norm: 0.9999993093652816, iteration: 106129
loss: 1.340856671333313,grad_norm: 0.9999995475452277, iteration: 106130
loss: 1.2274943590164185,grad_norm: 0.999999902334554, iteration: 106131
loss: 1.188172698020935,grad_norm: 0.9999995965598031, iteration: 106132
loss: 1.026743769645691,grad_norm: 0.9838458331964446, iteration: 106133
loss: 1.1637742519378662,grad_norm: 0.9999993297168934, iteration: 106134
loss: 1.1309374570846558,grad_norm: 0.9999993401800026, iteration: 106135
loss: 1.0105940103530884,grad_norm: 0.9999991035778055, iteration: 106136
loss: 1.0311826467514038,grad_norm: 0.9999989842383199, iteration: 106137
loss: 1.0238031148910522,grad_norm: 0.9303184607672822, iteration: 106138
loss: 1.0783097743988037,grad_norm: 0.9999992664355821, iteration: 106139
loss: 1.010779619216919,grad_norm: 0.9999997288418037, iteration: 106140
loss: 1.0112113952636719,grad_norm: 0.7031521423124781, iteration: 106141
loss: 1.1271189451217651,grad_norm: 0.9999992545954581, iteration: 106142
loss: 1.1995046138763428,grad_norm: 0.9999992426461898, iteration: 106143
loss: 1.0896607637405396,grad_norm: 0.999999241497635, iteration: 106144
loss: 1.122498869895935,grad_norm: 0.9999992981360012, iteration: 106145
loss: 1.2758079767227173,grad_norm: 0.9999995974488323, iteration: 106146
loss: 1.0063902139663696,grad_norm: 0.9014324139826192, iteration: 106147
loss: 1.4639480113983154,grad_norm: 0.9999996287020204, iteration: 106148
loss: 1.0238724946975708,grad_norm: 0.9999993160153682, iteration: 106149
loss: 1.2317473888397217,grad_norm: 0.9999996787215641, iteration: 106150
loss: 0.9859899282455444,grad_norm: 0.9999991237255837, iteration: 106151
loss: 1.2270184755325317,grad_norm: 0.9999997962440738, iteration: 106152
loss: 1.1265932321548462,grad_norm: 0.9999991383450186, iteration: 106153
loss: 1.0852971076965332,grad_norm: 0.8914258323588109, iteration: 106154
loss: 1.129594326019287,grad_norm: 0.9999993526277854, iteration: 106155
loss: 1.1718906164169312,grad_norm: 0.9999996880651194, iteration: 106156
loss: 1.0010820627212524,grad_norm: 0.9999999592063189, iteration: 106157
loss: 1.229408621788025,grad_norm: 0.999999601174021, iteration: 106158
loss: 1.5894279479980469,grad_norm: 0.999999503067944, iteration: 106159
loss: 1.328000545501709,grad_norm: 0.9999994496091565, iteration: 106160
loss: 1.035560965538025,grad_norm: 0.9999989775333237, iteration: 106161
loss: 1.1869475841522217,grad_norm: 0.9999996829072443, iteration: 106162
loss: 1.0892220735549927,grad_norm: 0.9999997068905458, iteration: 106163
loss: 1.2715996503829956,grad_norm: 0.9999994743419949, iteration: 106164
loss: 1.1891577243804932,grad_norm: 0.9999996865955257, iteration: 106165
loss: 1.1404695510864258,grad_norm: 0.999999216447774, iteration: 106166
loss: 1.1401129961013794,grad_norm: 0.9999996392559892, iteration: 106167
loss: 1.1921004056930542,grad_norm: 0.9999995733663777, iteration: 106168
loss: 1.172520399093628,grad_norm: 0.9999991038312956, iteration: 106169
loss: 1.5047624111175537,grad_norm: 0.9999995872115325, iteration: 106170
loss: 1.2952263355255127,grad_norm: 0.9999995733122956, iteration: 106171
loss: 1.4858955144882202,grad_norm: 0.9999997113934925, iteration: 106172
loss: 1.2462114095687866,grad_norm: 0.9999992143849312, iteration: 106173
loss: 1.2609220743179321,grad_norm: 0.9999995070056069, iteration: 106174
loss: 1.2545703649520874,grad_norm: 0.9999994811981204, iteration: 106175
loss: 1.4707155227661133,grad_norm: 0.9999998118908711, iteration: 106176
loss: 1.4592400789260864,grad_norm: 1.0000000725373785, iteration: 106177
loss: 1.285902500152588,grad_norm: 0.9999992180302899, iteration: 106178
loss: 1.4217274188995361,grad_norm: 0.9999998628655461, iteration: 106179
loss: 1.1073124408721924,grad_norm: 0.9999999842337438, iteration: 106180
loss: 1.4540388584136963,grad_norm: 0.999999661241263, iteration: 106181
loss: 1.3000842332839966,grad_norm: 0.9999993626350946, iteration: 106182
loss: 1.4382840394973755,grad_norm: 0.9999995355009337, iteration: 106183
loss: 1.3084497451782227,grad_norm: 0.9999998857685862, iteration: 106184
loss: 1.1369599103927612,grad_norm: 0.9999991213968367, iteration: 106185
loss: 1.5040627717971802,grad_norm: 0.999999559575485, iteration: 106186
loss: 1.2020390033721924,grad_norm: 0.9999993884490408, iteration: 106187
loss: 1.073219656944275,grad_norm: 0.8406781131480404, iteration: 106188
loss: 1.2442582845687866,grad_norm: 0.9999997218343322, iteration: 106189
loss: 1.3084958791732788,grad_norm: 0.9999996678909371, iteration: 106190
loss: 1.1134989261627197,grad_norm: 0.999999711991631, iteration: 106191
loss: 1.4488582611083984,grad_norm: 0.9999998977507459, iteration: 106192
loss: 1.0602036714553833,grad_norm: 0.9999990071285206, iteration: 106193
loss: 1.402616024017334,grad_norm: 0.999999370446826, iteration: 106194
loss: 1.6478195190429688,grad_norm: 0.9999998395975671, iteration: 106195
loss: 1.1219998598098755,grad_norm: 0.9999992569137911, iteration: 106196
loss: 1.132516622543335,grad_norm: 0.9999996863673498, iteration: 106197
loss: 1.3034037351608276,grad_norm: 0.999999255244452, iteration: 106198
loss: 1.2605528831481934,grad_norm: 0.9999992915976974, iteration: 106199
loss: 1.5756701231002808,grad_norm: 0.9999997828341848, iteration: 106200
loss: 1.4595577716827393,grad_norm: 1.0000000292425681, iteration: 106201
loss: 1.1681811809539795,grad_norm: 0.9999994221184554, iteration: 106202
loss: 1.5838384628295898,grad_norm: 0.999999836919008, iteration: 106203
loss: 1.3781042098999023,grad_norm: 0.9999994820852, iteration: 106204
loss: 1.5550304651260376,grad_norm: 0.9999993931569233, iteration: 106205
loss: 1.502841591835022,grad_norm: 0.9999997303485724, iteration: 106206
loss: 1.4713002443313599,grad_norm: 0.9999995238443702, iteration: 106207
loss: 1.1424599885940552,grad_norm: 0.9999997099905764, iteration: 106208
loss: 1.313020944595337,grad_norm: 0.9999998809267092, iteration: 106209
loss: 1.5421738624572754,grad_norm: 0.999999694190399, iteration: 106210
loss: 1.398653268814087,grad_norm: 0.9999998133018115, iteration: 106211
loss: 1.3441574573516846,grad_norm: 0.9999998973765961, iteration: 106212
loss: 1.4578526020050049,grad_norm: 0.9999997130408695, iteration: 106213
loss: 1.2492597103118896,grad_norm: 0.9999993077532675, iteration: 106214
loss: 1.0940264463424683,grad_norm: 0.9999990838541049, iteration: 106215
loss: 1.4073666334152222,grad_norm: 0.9999997298330263, iteration: 106216
loss: 1.1002891063690186,grad_norm: 0.9999992593545504, iteration: 106217
loss: 1.2268099784851074,grad_norm: 0.999999654978101, iteration: 106218
loss: 1.1826523542404175,grad_norm: 0.999999313482746, iteration: 106219
loss: 1.2157825231552124,grad_norm: 0.9999995821952673, iteration: 106220
loss: 1.3361510038375854,grad_norm: 0.999999921414921, iteration: 106221
loss: 1.404152750968933,grad_norm: 0.9999995730438224, iteration: 106222
loss: 1.2213194370269775,grad_norm: 0.999999856263264, iteration: 106223
loss: 1.0772534608840942,grad_norm: 0.9695535908887364, iteration: 106224
loss: 1.4112212657928467,grad_norm: 0.999999724918695, iteration: 106225
loss: 1.2442866563796997,grad_norm: 0.9999994750720322, iteration: 106226
loss: 1.232066035270691,grad_norm: 0.9999994902300046, iteration: 106227
loss: 1.0202137231826782,grad_norm: 0.9999992880436744, iteration: 106228
loss: 1.3349665403366089,grad_norm: 0.9999998665902267, iteration: 106229
loss: 1.338082194328308,grad_norm: 0.9999998850281282, iteration: 106230
loss: 1.3199321031570435,grad_norm: 0.9999999186504975, iteration: 106231
loss: 1.3505877256393433,grad_norm: 0.9999997138162399, iteration: 106232
loss: 1.1086468696594238,grad_norm: 1.0000000213185156, iteration: 106233
loss: 1.1648294925689697,grad_norm: 0.9999999211144647, iteration: 106234
loss: 1.5341463088989258,grad_norm: 0.9999996999171812, iteration: 106235
loss: 1.1529489755630493,grad_norm: 0.9999999403531795, iteration: 106236
loss: 1.4374799728393555,grad_norm: 0.9999999041029386, iteration: 106237
loss: 1.202329397201538,grad_norm: 0.9999995540095962, iteration: 106238
loss: 1.239898681640625,grad_norm: 0.9999997210169183, iteration: 106239
loss: 1.296428918838501,grad_norm: 0.9999994856995346, iteration: 106240
loss: 1.1153674125671387,grad_norm: 0.9999999602863848, iteration: 106241
loss: 1.1145235300064087,grad_norm: 0.9999997310926234, iteration: 106242
loss: 1.249313473701477,grad_norm: 0.9999992897676089, iteration: 106243
loss: 1.223368763923645,grad_norm: 0.9999996228353474, iteration: 106244
loss: 1.0495096445083618,grad_norm: 0.9999991660039441, iteration: 106245
loss: 1.1535671949386597,grad_norm: 0.9999995753596311, iteration: 106246
loss: 0.9961865544319153,grad_norm: 0.9999998614210405, iteration: 106247
loss: 1.4927127361297607,grad_norm: 0.9999995396978851, iteration: 106248
loss: 1.0973690748214722,grad_norm: 0.9999992652526182, iteration: 106249
loss: 1.1831097602844238,grad_norm: 0.9999995399199306, iteration: 106250
loss: 1.2639565467834473,grad_norm: 0.9999997699238765, iteration: 106251
loss: 1.0488122701644897,grad_norm: 0.9999994457913183, iteration: 106252
loss: 1.0984638929367065,grad_norm: 0.9999996587947995, iteration: 106253
loss: 1.3885337114334106,grad_norm: 0.9999999272533335, iteration: 106254
loss: 1.053330659866333,grad_norm: 0.9999989563964367, iteration: 106255
loss: 1.1652185916900635,grad_norm: 0.9999993094510701, iteration: 106256
loss: 1.0661065578460693,grad_norm: 0.9999992869857083, iteration: 106257
loss: 1.1700562238693237,grad_norm: 0.9999999267062268, iteration: 106258
loss: 1.0643750429153442,grad_norm: 0.9999990451386759, iteration: 106259
loss: 1.5468190908432007,grad_norm: 0.9999996722734299, iteration: 106260
loss: 1.2903861999511719,grad_norm: 0.9999993905917569, iteration: 106261
loss: 1.0619838237762451,grad_norm: 0.9999998130877656, iteration: 106262
loss: 1.1430169343948364,grad_norm: 0.9999996305535391, iteration: 106263
loss: 1.0365346670150757,grad_norm: 0.9999995156861764, iteration: 106264
loss: 1.136073350906372,grad_norm: 0.9999996127708053, iteration: 106265
loss: 1.049655556678772,grad_norm: 0.9999994179243046, iteration: 106266
loss: 1.1535390615463257,grad_norm: 0.9999996162654899, iteration: 106267
loss: 1.1588772535324097,grad_norm: 0.9999992432625862, iteration: 106268
loss: 1.1257309913635254,grad_norm: 0.9999992578220372, iteration: 106269
loss: 1.3581819534301758,grad_norm: 0.9999999779528619, iteration: 106270
loss: 1.0970768928527832,grad_norm: 0.9999992041433173, iteration: 106271
loss: 1.0187642574310303,grad_norm: 0.9999993088917114, iteration: 106272
loss: 1.1659519672393799,grad_norm: 0.9999999246250671, iteration: 106273
loss: 1.2621054649353027,grad_norm: 0.9999997865906495, iteration: 106274
loss: 1.1037945747375488,grad_norm: 0.9999992602236331, iteration: 106275
loss: 1.259161114692688,grad_norm: 0.9999999070245378, iteration: 106276
loss: 1.0178890228271484,grad_norm: 0.9999997341797915, iteration: 106277
loss: 1.0979511737823486,grad_norm: 0.9999999319321917, iteration: 106278
loss: 1.2100275754928589,grad_norm: 1.0000000213813631, iteration: 106279
loss: 1.2225570678710938,grad_norm: 0.9999998701751515, iteration: 106280
loss: 1.1054553985595703,grad_norm: 0.9999998342283206, iteration: 106281
loss: 1.0344740152359009,grad_norm: 0.9999995122412614, iteration: 106282
loss: 1.1250096559524536,grad_norm: 0.9384753176045223, iteration: 106283
loss: 1.0033665895462036,grad_norm: 0.8950712765284702, iteration: 106284
loss: 1.025299310684204,grad_norm: 0.9855168817127303, iteration: 106285
loss: 1.2753608226776123,grad_norm: 0.9999999146232784, iteration: 106286
loss: 1.0766854286193848,grad_norm: 0.9999991646278614, iteration: 106287
loss: 1.319433569908142,grad_norm: 0.9999999788094284, iteration: 106288
loss: 1.0238184928894043,grad_norm: 1.0000000250374377, iteration: 106289
loss: 1.1652034521102905,grad_norm: 0.9999992405656046, iteration: 106290
loss: 1.1500444412231445,grad_norm: 0.9999991931654938, iteration: 106291
loss: 1.1266648769378662,grad_norm: 0.9999998852446896, iteration: 106292
loss: 1.1571059226989746,grad_norm: 0.9999996944743463, iteration: 106293
loss: 1.0599721670150757,grad_norm: 0.9457791596970072, iteration: 106294
loss: 1.0881531238555908,grad_norm: 0.9999998993016348, iteration: 106295
loss: 1.0807900428771973,grad_norm: 0.9999993234444391, iteration: 106296
loss: 1.1320524215698242,grad_norm: 0.9999991430539568, iteration: 106297
loss: 1.0540552139282227,grad_norm: 0.9999999589192583, iteration: 106298
loss: 1.0605238676071167,grad_norm: 0.9999996342542419, iteration: 106299
loss: 1.2752994298934937,grad_norm: 1.0000000009292966, iteration: 106300
loss: 1.169620156288147,grad_norm: 0.999999477186063, iteration: 106301
loss: 1.070602297782898,grad_norm: 0.9999991519855905, iteration: 106302
loss: 1.1595596075057983,grad_norm: 0.9999994941100679, iteration: 106303
loss: 1.0251264572143555,grad_norm: 0.9999991420100922, iteration: 106304
loss: 1.0767067670822144,grad_norm: 0.9796291013294006, iteration: 106305
loss: 1.1376558542251587,grad_norm: 0.999999512707207, iteration: 106306
loss: 1.2832564115524292,grad_norm: 0.999999587389337, iteration: 106307
loss: 1.0149407386779785,grad_norm: 0.7979974501048764, iteration: 106308
loss: 1.1881577968597412,grad_norm: 0.9999992629276249, iteration: 106309
loss: 1.1283345222473145,grad_norm: 0.9999998330057861, iteration: 106310
loss: 1.0397673845291138,grad_norm: 0.9999992759510973, iteration: 106311
loss: 1.128409504890442,grad_norm: 1.0000000074285378, iteration: 106312
loss: 1.1544054746627808,grad_norm: 0.9999996584693664, iteration: 106313
loss: 1.1649038791656494,grad_norm: 0.99999991929327, iteration: 106314
loss: 0.9998634457588196,grad_norm: 0.9999990260475043, iteration: 106315
loss: 1.0390129089355469,grad_norm: 0.9638953111864104, iteration: 106316
loss: 1.170324444770813,grad_norm: 0.9999999184344588, iteration: 106317
loss: 1.2505865097045898,grad_norm: 0.9999997239680775, iteration: 106318
loss: 1.0372554063796997,grad_norm: 0.9999992066963744, iteration: 106319
loss: 1.2303876876831055,grad_norm: 0.9999996577020966, iteration: 106320
loss: 1.092623233795166,grad_norm: 0.9999995950798812, iteration: 106321
loss: 1.2975870370864868,grad_norm: 0.9999999121127009, iteration: 106322
loss: 1.0947104692459106,grad_norm: 0.9999994810150514, iteration: 106323
loss: 1.0139085054397583,grad_norm: 0.9999991389645938, iteration: 106324
loss: 1.0286601781845093,grad_norm: 0.9999995636353024, iteration: 106325
loss: 1.3485655784606934,grad_norm: 0.9999998524610249, iteration: 106326
loss: 1.1995569467544556,grad_norm: 0.9999999120511371, iteration: 106327
loss: 1.2007052898406982,grad_norm: 0.9999998441917206, iteration: 106328
loss: 1.1171700954437256,grad_norm: 0.9999998371252301, iteration: 106329
loss: 1.0346782207489014,grad_norm: 0.9999994388411856, iteration: 106330
loss: 1.258992314338684,grad_norm: 0.9999995935512898, iteration: 106331
loss: 1.009359359741211,grad_norm: 0.9999999408646941, iteration: 106332
loss: 1.1650152206420898,grad_norm: 0.9999995536551751, iteration: 106333
loss: 1.10588800907135,grad_norm: 1.0000000112070795, iteration: 106334
loss: 1.052872657775879,grad_norm: 0.9642144623940319, iteration: 106335
loss: 1.163873314857483,grad_norm: 0.9999997214664831, iteration: 106336
loss: 1.0937756299972534,grad_norm: 0.9999995302592913, iteration: 106337
loss: 1.0079231262207031,grad_norm: 0.9999990985786015, iteration: 106338
loss: 1.093610167503357,grad_norm: 0.9999996105962699, iteration: 106339
loss: 1.0529298782348633,grad_norm: 0.9999998391814768, iteration: 106340
loss: 1.0957578420639038,grad_norm: 0.9999996707125878, iteration: 106341
loss: 1.1130608320236206,grad_norm: 0.9999990929571718, iteration: 106342
loss: 1.0234671831130981,grad_norm: 0.9295079551668969, iteration: 106343
loss: 1.0057166814804077,grad_norm: 0.9999994660840467, iteration: 106344
loss: 1.0824378728866577,grad_norm: 0.9999994351812866, iteration: 106345
loss: 1.1735584735870361,grad_norm: 1.0000000301939842, iteration: 106346
loss: 1.1010075807571411,grad_norm: 0.9999998466577583, iteration: 106347
loss: 1.296561360359192,grad_norm: 0.999999789938093, iteration: 106348
loss: 1.1851773262023926,grad_norm: 0.9999996997486795, iteration: 106349
loss: 1.0059292316436768,grad_norm: 0.9999991722985981, iteration: 106350
loss: 0.9725401997566223,grad_norm: 0.8746753884284016, iteration: 106351
loss: 1.0835671424865723,grad_norm: 0.999999126454883, iteration: 106352
loss: 1.0919736623764038,grad_norm: 0.9999998569254558, iteration: 106353
loss: 1.090205430984497,grad_norm: 0.9999997408058756, iteration: 106354
loss: 1.0610886812210083,grad_norm: 0.9999993295540848, iteration: 106355
loss: 1.1328140497207642,grad_norm: 1.0000000446365287, iteration: 106356
loss: 1.0234978199005127,grad_norm: 0.9999999291433957, iteration: 106357
loss: 1.0065042972564697,grad_norm: 0.9262653334816131, iteration: 106358
loss: 1.1492640972137451,grad_norm: 0.9999993328973978, iteration: 106359
loss: 1.035956621170044,grad_norm: 0.9999990988970641, iteration: 106360
loss: 1.0580735206604004,grad_norm: 0.9999994504696659, iteration: 106361
loss: 1.1048532724380493,grad_norm: 0.9999998683964618, iteration: 106362
loss: 1.085321068763733,grad_norm: 0.9999993808770453, iteration: 106363
loss: 1.1701512336730957,grad_norm: 0.9999998795775575, iteration: 106364
loss: 1.2196000814437866,grad_norm: 0.9999995971250837, iteration: 106365
loss: 1.1214553117752075,grad_norm: 0.9999998089252382, iteration: 106366
loss: 1.0066170692443848,grad_norm: 0.9999996053627274, iteration: 106367
loss: 1.089666485786438,grad_norm: 0.9999996069844387, iteration: 106368
loss: 1.0874114036560059,grad_norm: 0.9999995803779637, iteration: 106369
loss: 1.091681718826294,grad_norm: 0.9999994018058513, iteration: 106370
loss: 1.0805981159210205,grad_norm: 0.9999991395721851, iteration: 106371
loss: 1.0512582063674927,grad_norm: 0.9999996392976835, iteration: 106372
loss: 1.0535988807678223,grad_norm: 0.9999995720797826, iteration: 106373
loss: 1.0014379024505615,grad_norm: 0.9415807041681686, iteration: 106374
loss: 1.1739650964736938,grad_norm: 0.9999995206507425, iteration: 106375
loss: 1.2283389568328857,grad_norm: 0.9999996060482854, iteration: 106376
loss: 1.1941930055618286,grad_norm: 0.9999997277225111, iteration: 106377
loss: 1.1062418222427368,grad_norm: 0.9999990559091083, iteration: 106378
loss: 1.3020061254501343,grad_norm: 0.9999996834906304, iteration: 106379
loss: 1.021438717842102,grad_norm: 0.9581858154381608, iteration: 106380
loss: 1.118056058883667,grad_norm: 0.9999998314965186, iteration: 106381
loss: 1.1133702993392944,grad_norm: 0.9999992268374943, iteration: 106382
loss: 1.0859229564666748,grad_norm: 0.9999992155424087, iteration: 106383
loss: 0.9741946458816528,grad_norm: 0.9481679921257551, iteration: 106384
loss: 1.2423685789108276,grad_norm: 1.0000000369204751, iteration: 106385
loss: 1.1621495485305786,grad_norm: 0.9999997029905128, iteration: 106386
loss: 1.1617841720581055,grad_norm: 0.9999999856617133, iteration: 106387
loss: 1.0386936664581299,grad_norm: 0.9999993155854134, iteration: 106388
loss: 1.1085950136184692,grad_norm: 0.9999994348532619, iteration: 106389
loss: 1.0142098665237427,grad_norm: 0.9132263212494324, iteration: 106390
loss: 1.0560897588729858,grad_norm: 0.9999995858903625, iteration: 106391
loss: 1.0052690505981445,grad_norm: 0.9499324703851556, iteration: 106392
loss: 1.1713967323303223,grad_norm: 0.9999995869023937, iteration: 106393
loss: 1.1108245849609375,grad_norm: 0.9999991374662368, iteration: 106394
loss: 1.0167335271835327,grad_norm: 0.9999990200345421, iteration: 106395
loss: 1.1466782093048096,grad_norm: 0.9999994382709104, iteration: 106396
loss: 1.031546950340271,grad_norm: 0.9999995323918889, iteration: 106397
loss: 1.005374789237976,grad_norm: 0.999999451713202, iteration: 106398
loss: 1.0149815082550049,grad_norm: 0.9999999027843488, iteration: 106399
loss: 1.0594542026519775,grad_norm: 0.999999495564275, iteration: 106400
loss: 1.0682072639465332,grad_norm: 0.9999993024336576, iteration: 106401
loss: 1.128836989402771,grad_norm: 0.9999994953535027, iteration: 106402
loss: 1.1366634368896484,grad_norm: 0.9999997165069374, iteration: 106403
loss: 1.061398983001709,grad_norm: 0.9999992764966807, iteration: 106404
loss: 1.1187936067581177,grad_norm: 0.9999992130584944, iteration: 106405
loss: 1.006032109260559,grad_norm: 0.9999991937651508, iteration: 106406
loss: 1.0750995874404907,grad_norm: 0.9999996946985705, iteration: 106407
loss: 1.208296537399292,grad_norm: 1.0000000139089635, iteration: 106408
loss: 1.025382161140442,grad_norm: 0.8773691580107585, iteration: 106409
loss: 1.2080445289611816,grad_norm: 0.9999997619707279, iteration: 106410
loss: 1.169075608253479,grad_norm: 0.9999999511968852, iteration: 106411
loss: 1.1150552034378052,grad_norm: 0.9999997010008136, iteration: 106412
loss: 1.0744198560714722,grad_norm: 0.9999990713487243, iteration: 106413
loss: 1.1026697158813477,grad_norm: 0.9999994097249332, iteration: 106414
loss: 1.1504451036453247,grad_norm: 0.9999997055702302, iteration: 106415
loss: 1.1573179960250854,grad_norm: 0.9999993759056901, iteration: 106416
loss: 1.0755537748336792,grad_norm: 0.9999994282026118, iteration: 106417
loss: 1.0144884586334229,grad_norm: 0.9999998371874632, iteration: 106418
loss: 1.0533822774887085,grad_norm: 0.9999994264673291, iteration: 106419
loss: 1.141987681388855,grad_norm: 0.9999992169329304, iteration: 106420
loss: 1.012122631072998,grad_norm: 0.9078794852496729, iteration: 106421
loss: 1.1263339519500732,grad_norm: 0.9999999653073114, iteration: 106422
loss: 1.0820673704147339,grad_norm: 0.9999996368032897, iteration: 106423
loss: 1.37760329246521,grad_norm: 1.000000002457298, iteration: 106424
loss: 1.0606169700622559,grad_norm: 0.9999999018848785, iteration: 106425
loss: 1.0450900793075562,grad_norm: 0.999999115624143, iteration: 106426
loss: 1.210752010345459,grad_norm: 0.9999996965063608, iteration: 106427
loss: 1.2223213911056519,grad_norm: 0.9999997440524396, iteration: 106428
loss: 1.2114399671554565,grad_norm: 0.99999987734651, iteration: 106429
loss: 1.136146903038025,grad_norm: 0.9999997611807409, iteration: 106430
loss: 1.0599533319473267,grad_norm: 0.9398370702359197, iteration: 106431
loss: 1.2387449741363525,grad_norm: 0.9999996750094925, iteration: 106432
loss: 1.248863935470581,grad_norm: 0.999999684981614, iteration: 106433
loss: 1.1007497310638428,grad_norm: 0.9999990429933888, iteration: 106434
loss: 1.117647647857666,grad_norm: 0.9999996775904265, iteration: 106435
loss: 1.1590070724487305,grad_norm: 0.9999997239356764, iteration: 106436
loss: 1.294130563735962,grad_norm: 1.000000139215195, iteration: 106437
loss: 1.1002434492111206,grad_norm: 0.9999992208574819, iteration: 106438
loss: 1.1267986297607422,grad_norm: 0.999999810029296, iteration: 106439
loss: 1.1180768013000488,grad_norm: 0.9999997770599228, iteration: 106440
loss: 0.9591109752655029,grad_norm: 0.8635431395015424, iteration: 106441
loss: 1.1344300508499146,grad_norm: 0.9999992716787562, iteration: 106442
loss: 1.039232611656189,grad_norm: 0.9999996968623106, iteration: 106443
loss: 1.3287707567214966,grad_norm: 0.9999998052490848, iteration: 106444
loss: 1.450369954109192,grad_norm: 0.9999998650848687, iteration: 106445
loss: 1.1376157999038696,grad_norm: 0.9999993160517221, iteration: 106446
loss: 1.0050047636032104,grad_norm: 0.9999995872925543, iteration: 106447
loss: 1.2896716594696045,grad_norm: 0.9999997343584487, iteration: 106448
loss: 1.0473015308380127,grad_norm: 0.9303856854481548, iteration: 106449
loss: 1.2284703254699707,grad_norm: 0.9999999589470335, iteration: 106450
loss: 1.1475632190704346,grad_norm: 0.9999997834341956, iteration: 106451
loss: 1.192348599433899,grad_norm: 0.9999999638882202, iteration: 106452
loss: 1.0653424263000488,grad_norm: 0.999999303532471, iteration: 106453
loss: 1.2241995334625244,grad_norm: 1.000000047407694, iteration: 106454
loss: 1.2851649522781372,grad_norm: 0.9999998941870384, iteration: 106455
loss: 1.1089576482772827,grad_norm: 0.9999997620075527, iteration: 106456
loss: 1.3201899528503418,grad_norm: 0.9999998506766195, iteration: 106457
loss: 1.0816481113433838,grad_norm: 0.9999993657560525, iteration: 106458
loss: 1.1086028814315796,grad_norm: 0.9999999806938277, iteration: 106459
loss: 1.21383798122406,grad_norm: 0.9999999791931019, iteration: 106460
loss: 1.2238216400146484,grad_norm: 0.9999993971693385, iteration: 106461
loss: 1.1304090023040771,grad_norm: 0.9999999305973251, iteration: 106462
loss: 1.103005051612854,grad_norm: 0.999999826135894, iteration: 106463
loss: 1.1370011568069458,grad_norm: 0.999999906488384, iteration: 106464
loss: 1.1675918102264404,grad_norm: 0.9999999338594476, iteration: 106465
loss: 1.0831756591796875,grad_norm: 0.9999999542177848, iteration: 106466
loss: 1.3708051443099976,grad_norm: 0.9999999485305514, iteration: 106467
loss: 1.045982837677002,grad_norm: 0.8878031940518547, iteration: 106468
loss: 1.2248512506484985,grad_norm: 0.9999998034708544, iteration: 106469
loss: 1.244796872138977,grad_norm: 0.9999995474707978, iteration: 106470
loss: 1.093643069267273,grad_norm: 0.99999977750357, iteration: 106471
loss: 1.227262020111084,grad_norm: 0.9999994630750826, iteration: 106472
loss: 1.0784335136413574,grad_norm: 0.9999991840742164, iteration: 106473
loss: 1.0477787256240845,grad_norm: 0.9999998445140816, iteration: 106474
loss: 1.0457206964492798,grad_norm: 0.999999401593572, iteration: 106475
loss: 1.2303284406661987,grad_norm: 0.99999996499273, iteration: 106476
loss: 1.1208219528198242,grad_norm: 0.9999996546594341, iteration: 106477
loss: 1.205426573753357,grad_norm: 0.9999999101734883, iteration: 106478
loss: 1.0689584016799927,grad_norm: 0.9999995137891514, iteration: 106479
loss: 1.074772596359253,grad_norm: 0.9004927119820063, iteration: 106480
loss: 1.1545350551605225,grad_norm: 0.9999996229454206, iteration: 106481
loss: 1.0632268190383911,grad_norm: 0.9999991621643446, iteration: 106482
loss: 1.04425847530365,grad_norm: 1.00000005128681, iteration: 106483
loss: 1.0330522060394287,grad_norm: 0.9999993096051307, iteration: 106484
loss: 1.0313408374786377,grad_norm: 0.9999991393985181, iteration: 106485
loss: 1.1346824169158936,grad_norm: 0.9999998576907658, iteration: 106486
loss: 1.0633244514465332,grad_norm: 0.9999993553746549, iteration: 106487
loss: 1.248561143875122,grad_norm: 0.9999998423453734, iteration: 106488
loss: 1.0864245891571045,grad_norm: 0.7753434657174434, iteration: 106489
loss: 1.2969661951065063,grad_norm: 0.9999992148013492, iteration: 106490
loss: 1.1330556869506836,grad_norm: 0.9999996137456577, iteration: 106491
loss: 1.1252481937408447,grad_norm: 0.9999995934584757, iteration: 106492
loss: 1.3124580383300781,grad_norm: 0.9999997542280669, iteration: 106493
loss: 1.416725754737854,grad_norm: 0.9999997106587689, iteration: 106494
loss: 1.2405190467834473,grad_norm: 0.9999997747276116, iteration: 106495
loss: 1.3679903745651245,grad_norm: 0.9999999910085464, iteration: 106496
loss: 1.123236894607544,grad_norm: 0.9999994815486449, iteration: 106497
loss: 1.3959472179412842,grad_norm: 0.9999999524789944, iteration: 106498
loss: 1.132620930671692,grad_norm: 0.9999992251174276, iteration: 106499
loss: 1.2282655239105225,grad_norm: 0.9999999427659333, iteration: 106500
loss: 1.149742603302002,grad_norm: 0.9999997168679781, iteration: 106501
loss: 1.125261664390564,grad_norm: 0.99999996857762, iteration: 106502
loss: 1.2703028917312622,grad_norm: 0.9999997907382298, iteration: 106503
loss: 1.4354556798934937,grad_norm: 0.9999999828655117, iteration: 106504
loss: 1.191797137260437,grad_norm: 0.9999997366539236, iteration: 106505
loss: 1.313597321510315,grad_norm: 0.9999995886174214, iteration: 106506
loss: 1.331019639968872,grad_norm: 0.9999996098009653, iteration: 106507
loss: 1.3380341529846191,grad_norm: 0.9999999274921687, iteration: 106508
loss: 1.2619004249572754,grad_norm: 0.9999999601549173, iteration: 106509
loss: 1.3077130317687988,grad_norm: 0.9999999886503015, iteration: 106510
loss: 1.2348021268844604,grad_norm: 0.9999992117966556, iteration: 106511
loss: 1.224810004234314,grad_norm: 0.9999998759824474, iteration: 106512
loss: 1.3695106506347656,grad_norm: 0.9999996823444932, iteration: 106513
loss: 1.2786012887954712,grad_norm: 0.9999998651335014, iteration: 106514
loss: 1.5263055562973022,grad_norm: 1.0000001028934193, iteration: 106515
loss: 1.4623688459396362,grad_norm: 0.9999998889543078, iteration: 106516
loss: 1.253382682800293,grad_norm: 0.9999994101056371, iteration: 106517
loss: 1.12870192527771,grad_norm: 0.9999990147971451, iteration: 106518
loss: 1.4270704984664917,grad_norm: 0.9999998881184979, iteration: 106519
loss: 1.0820295810699463,grad_norm: 0.9999998664757974, iteration: 106520
loss: 1.339901328086853,grad_norm: 0.9999995850885365, iteration: 106521
loss: 1.4528119564056396,grad_norm: 0.9999997861088293, iteration: 106522
loss: 1.3571919202804565,grad_norm: 0.9999997989995774, iteration: 106523
loss: 1.2778774499893188,grad_norm: 0.9999999072090814, iteration: 106524
loss: 1.320397138595581,grad_norm: 0.9999994606201903, iteration: 106525
loss: 1.21760094165802,grad_norm: 0.9999997427568352, iteration: 106526
loss: 1.2611608505249023,grad_norm: 0.9999995176819635, iteration: 106527
loss: 1.2617626190185547,grad_norm: 0.9999998888517528, iteration: 106528
loss: 1.4367722272872925,grad_norm: 0.999999651932866, iteration: 106529
loss: 1.175098180770874,grad_norm: 0.9999996775047324, iteration: 106530
loss: 1.397011637687683,grad_norm: 0.9999999471250394, iteration: 106531
loss: 1.223786473274231,grad_norm: 0.9999998002034273, iteration: 106532
loss: 1.1597018241882324,grad_norm: 0.9999998498882077, iteration: 106533
loss: 1.5388034582138062,grad_norm: 0.9999996355344241, iteration: 106534
loss: 1.1139545440673828,grad_norm: 0.9999998505547437, iteration: 106535
loss: 1.3515167236328125,grad_norm: 0.9999999653537621, iteration: 106536
loss: 1.3533759117126465,grad_norm: 0.9999998433758842, iteration: 106537
loss: 1.224984884262085,grad_norm: 1.000000124826477, iteration: 106538
loss: 1.4484062194824219,grad_norm: 0.9999999546149849, iteration: 106539
loss: 1.4109020233154297,grad_norm: 0.9999999934570976, iteration: 106540
loss: 1.3062591552734375,grad_norm: 0.9999999676035956, iteration: 106541
loss: 1.138479232788086,grad_norm: 0.9999996437070683, iteration: 106542
loss: 1.2404717206954956,grad_norm: 0.9999999296886999, iteration: 106543
loss: 1.2313364744186401,grad_norm: 0.9999997004346783, iteration: 106544
loss: 1.8712416887283325,grad_norm: 1.0000000100961437, iteration: 106545
loss: 1.400156855583191,grad_norm: 0.9999999353999971, iteration: 106546
loss: 1.2255761623382568,grad_norm: 0.9999999474983707, iteration: 106547
loss: 1.2201911211013794,grad_norm: 0.9999996725868572, iteration: 106548
loss: 1.1997309923171997,grad_norm: 0.9999999094800047, iteration: 106549
loss: 1.3146978616714478,grad_norm: 1.0000000029820033, iteration: 106550
loss: 1.4551597833633423,grad_norm: 0.999999863209965, iteration: 106551
loss: 1.444656491279602,grad_norm: 0.9999998547355333, iteration: 106552
loss: 1.2165322303771973,grad_norm: 0.9999998575123453, iteration: 106553
loss: 1.4877915382385254,grad_norm: 0.9999998481674687, iteration: 106554
loss: 1.3310552835464478,grad_norm: 0.9999997028705304, iteration: 106555
loss: 1.3647595643997192,grad_norm: 0.9999999409896716, iteration: 106556
loss: 1.311263084411621,grad_norm: 0.999999957021032, iteration: 106557
loss: 1.4349863529205322,grad_norm: 1.0000000380275562, iteration: 106558
loss: 1.9298990964889526,grad_norm: 0.9999998699463013, iteration: 106559
loss: 1.4049628973007202,grad_norm: 0.9999998690818624, iteration: 106560
loss: 1.3130038976669312,grad_norm: 1.00000002148761, iteration: 106561
loss: 1.3765226602554321,grad_norm: 1.0000000080706615, iteration: 106562
loss: 1.3882379531860352,grad_norm: 0.999999899136164, iteration: 106563
loss: 1.5713605880737305,grad_norm: 0.9999999115544537, iteration: 106564
loss: 1.527861475944519,grad_norm: 1.000000042714702, iteration: 106565
loss: 1.4119420051574707,grad_norm: 0.9999999757693242, iteration: 106566
loss: 1.9000180959701538,grad_norm: 0.9999998804587629, iteration: 106567
loss: 1.139974594116211,grad_norm: 0.9999998554081935, iteration: 106568
loss: 1.682837963104248,grad_norm: 0.9999999115489747, iteration: 106569
loss: 1.4108896255493164,grad_norm: 0.9999999019869421, iteration: 106570
loss: 1.651327133178711,grad_norm: 0.9999999171693359, iteration: 106571
loss: 1.3328657150268555,grad_norm: 0.999999883509487, iteration: 106572
loss: 1.5139490365982056,grad_norm: 0.9999999621215788, iteration: 106573
loss: 1.5250037908554077,grad_norm: 0.9999999456606617, iteration: 106574
loss: 1.618442177772522,grad_norm: 0.9999998421684225, iteration: 106575
loss: 1.7007523775100708,grad_norm: 1.0000000199894545, iteration: 106576
loss: 1.3457478284835815,grad_norm: 0.999999964430286, iteration: 106577
loss: 1.5887752771377563,grad_norm: 0.9999998310992011, iteration: 106578
loss: 1.9721553325653076,grad_norm: 1.0000000326240541, iteration: 106579
loss: 1.6008782386779785,grad_norm: 0.9999998920050949, iteration: 106580
loss: 2.00500226020813,grad_norm: 0.9999999152779312, iteration: 106581
loss: 1.3691976070404053,grad_norm: 0.9999999145843111, iteration: 106582
loss: 1.6830568313598633,grad_norm: 0.999999944126535, iteration: 106583
loss: 1.755682349205017,grad_norm: 0.9999999089213645, iteration: 106584
loss: 1.830487847328186,grad_norm: 0.99999988855473, iteration: 106585
loss: 1.5409939289093018,grad_norm: 0.9999999128460789, iteration: 106586
loss: 2.31118106842041,grad_norm: 1.0000000212738591, iteration: 106587
loss: 1.5655508041381836,grad_norm: 0.9999999541771551, iteration: 106588
loss: 1.3979753255844116,grad_norm: 0.9999998824183228, iteration: 106589
loss: 1.5964199304580688,grad_norm: 0.9999998587830784, iteration: 106590
loss: 1.522524356842041,grad_norm: 1.000000025185349, iteration: 106591
loss: 1.3641277551651,grad_norm: 1.0000000968589244, iteration: 106592
loss: 2.0944664478302,grad_norm: 0.9999999972522207, iteration: 106593
loss: 1.8848891258239746,grad_norm: 1.0000000747158986, iteration: 106594
loss: 1.6261916160583496,grad_norm: 1.0000000448819715, iteration: 106595
loss: 1.685523509979248,grad_norm: 0.9999999213694284, iteration: 106596
loss: 1.6707983016967773,grad_norm: 0.99999985124447, iteration: 106597
loss: 1.8045587539672852,grad_norm: 0.9999999899875995, iteration: 106598
loss: 1.9830341339111328,grad_norm: 0.9999999907420607, iteration: 106599
loss: 1.899877667427063,grad_norm: 1.0000000631022796, iteration: 106600
loss: 2.0621490478515625,grad_norm: 0.9999999784346102, iteration: 106601
loss: 1.8595755100250244,grad_norm: 0.9999999140151677, iteration: 106602
loss: 1.955540657043457,grad_norm: 1.000000038942961, iteration: 106603
loss: 2.0464189052581787,grad_norm: 1.0000000012103663, iteration: 106604
loss: 2.241447687149048,grad_norm: 0.9999999288688755, iteration: 106605
loss: 2.387202739715576,grad_norm: 0.9999999251357202, iteration: 106606
loss: 1.7711107730865479,grad_norm: 0.9999999831005072, iteration: 106607
loss: 2.0786051750183105,grad_norm: 0.9999999230192143, iteration: 106608
loss: 2.0491416454315186,grad_norm: 1.0000001013411264, iteration: 106609
loss: 1.7113386392593384,grad_norm: 1.0000000480192146, iteration: 106610
loss: 1.725119709968567,grad_norm: 0.999999924481583, iteration: 106611
loss: 1.7386530637741089,grad_norm: 0.9999998834680199, iteration: 106612
loss: 1.8919997215270996,grad_norm: 0.9999998803189175, iteration: 106613
loss: 1.6010783910751343,grad_norm: 1.0000000112974505, iteration: 106614
loss: 1.6187883615493774,grad_norm: 1.00000001219291, iteration: 106615
loss: 1.7909111976623535,grad_norm: 0.9999999799490856, iteration: 106616
loss: 1.9390368461608887,grad_norm: 0.999999955052713, iteration: 106617
loss: 1.9350930452346802,grad_norm: 1.0000000308934227, iteration: 106618
loss: 1.9271624088287354,grad_norm: 0.9999998951259422, iteration: 106619
loss: 2.046398401260376,grad_norm: 0.9999999213102344, iteration: 106620
loss: 1.9041322469711304,grad_norm: 1.000000045812222, iteration: 106621
loss: 1.639338493347168,grad_norm: 0.9999998747685949, iteration: 106622
loss: 1.6091196537017822,grad_norm: 0.9999999787915788, iteration: 106623
loss: 1.6908957958221436,grad_norm: 0.999999949067919, iteration: 106624
loss: 1.9455997943878174,grad_norm: 0.9999999651959665, iteration: 106625
loss: 1.8880693912506104,grad_norm: 0.9999998533485113, iteration: 106626
loss: 1.8337609767913818,grad_norm: 0.9999998981797898, iteration: 106627
loss: 1.8032937049865723,grad_norm: 1.000000001513362, iteration: 106628
loss: 1.680713176727295,grad_norm: 0.9999999874840598, iteration: 106629
loss: 1.516726016998291,grad_norm: 1.0000000599596464, iteration: 106630
loss: 1.781840205192566,grad_norm: 0.9999998926636209, iteration: 106631
loss: 1.7825162410736084,grad_norm: 0.9999999644374136, iteration: 106632
loss: 1.6253471374511719,grad_norm: 0.9999998978566356, iteration: 106633
loss: 1.7503365278244019,grad_norm: 0.999999989712372, iteration: 106634
loss: 1.5419687032699585,grad_norm: 0.9999999883110258, iteration: 106635
loss: 1.5432049036026,grad_norm: 0.9999999762997483, iteration: 106636
loss: 1.8900187015533447,grad_norm: 0.9999999440313551, iteration: 106637
loss: 1.3934072256088257,grad_norm: 0.9999997108813875, iteration: 106638
loss: 1.3107151985168457,grad_norm: 0.9999998981804454, iteration: 106639
loss: 1.6795910596847534,grad_norm: 1.0000000104729507, iteration: 106640
loss: 1.3988471031188965,grad_norm: 0.9999998192184248, iteration: 106641
loss: 1.3597403764724731,grad_norm: 1.0000000751431941, iteration: 106642
loss: 1.3024379014968872,grad_norm: 0.999999730201009, iteration: 106643
loss: 1.3310561180114746,grad_norm: 0.9999997997061966, iteration: 106644
loss: 1.3418687582015991,grad_norm: 0.9999999655635264, iteration: 106645
loss: 1.4963183403015137,grad_norm: 0.999999876926137, iteration: 106646
loss: 1.2042306661605835,grad_norm: 0.9999994185974465, iteration: 106647
loss: 1.350711464881897,grad_norm: 0.9999997973323196, iteration: 106648
loss: 1.4228339195251465,grad_norm: 0.999999583466558, iteration: 106649
loss: 1.2224029302597046,grad_norm: 0.999999888086654, iteration: 106650
loss: 1.3808300495147705,grad_norm: 0.9999998525864798, iteration: 106651
loss: 1.4835139513015747,grad_norm: 0.999999776711062, iteration: 106652
loss: 1.3733173608779907,grad_norm: 0.9999998617462152, iteration: 106653
loss: 1.3038502931594849,grad_norm: 0.9999995949413751, iteration: 106654
loss: 1.4169563055038452,grad_norm: 0.9999998888835583, iteration: 106655
loss: 1.532745361328125,grad_norm: 1.0000000222210594, iteration: 106656
loss: 1.2170193195343018,grad_norm: 0.9999999971373275, iteration: 106657
loss: 1.4313033819198608,grad_norm: 0.9999999851932873, iteration: 106658
loss: 1.3010470867156982,grad_norm: 0.9999997382838767, iteration: 106659
loss: 1.5089622735977173,grad_norm: 0.9999999940018567, iteration: 106660
loss: 1.3515739440917969,grad_norm: 0.9999996653452463, iteration: 106661
loss: 1.4049431085586548,grad_norm: 0.9999999192845948, iteration: 106662
loss: 1.087668776512146,grad_norm: 0.9644563600385374, iteration: 106663
loss: 1.3429815769195557,grad_norm: 0.9999999190279254, iteration: 106664
loss: 1.5976893901824951,grad_norm: 1.0000000210847306, iteration: 106665
loss: 1.3772687911987305,grad_norm: 0.9999999613058477, iteration: 106666
loss: 1.2698944807052612,grad_norm: 1.0000000563756493, iteration: 106667
loss: 1.3813104629516602,grad_norm: 0.9999999604464563, iteration: 106668
loss: 1.3023194074630737,grad_norm: 0.9999998851385336, iteration: 106669
loss: 1.3645107746124268,grad_norm: 0.9999999007347193, iteration: 106670
loss: 1.1975499391555786,grad_norm: 0.9999997573432033, iteration: 106671
loss: 1.1945616006851196,grad_norm: 0.999999700753351, iteration: 106672
loss: 1.1620267629623413,grad_norm: 0.9999996464532788, iteration: 106673
loss: 1.5347588062286377,grad_norm: 0.9999999888421311, iteration: 106674
loss: 1.3435473442077637,grad_norm: 0.9999994509323114, iteration: 106675
loss: 1.0919897556304932,grad_norm: 0.9999993204487734, iteration: 106676
loss: 1.1494404077529907,grad_norm: 0.999999597051415, iteration: 106677
loss: 1.330496072769165,grad_norm: 0.999999870816668, iteration: 106678
loss: 1.3076356649398804,grad_norm: 0.9999998717081118, iteration: 106679
loss: 1.2274045944213867,grad_norm: 0.999999812406947, iteration: 106680
loss: 1.352745771408081,grad_norm: 0.9999999435679866, iteration: 106681
loss: 1.4464054107666016,grad_norm: 0.9999999146659985, iteration: 106682
loss: 1.2855126857757568,grad_norm: 0.9999996602972443, iteration: 106683
loss: 1.2483049631118774,grad_norm: 0.9999995949506887, iteration: 106684
loss: 1.4470220804214478,grad_norm: 0.9999999063261845, iteration: 106685
loss: 1.3644967079162598,grad_norm: 0.9999997491213349, iteration: 106686
loss: 1.3645451068878174,grad_norm: 0.9999999061741994, iteration: 106687
loss: 1.2827943563461304,grad_norm: 0.9999998382126328, iteration: 106688
loss: 1.5026030540466309,grad_norm: 0.9999999455186416, iteration: 106689
loss: 1.2716456651687622,grad_norm: 0.9999999837684097, iteration: 106690
loss: 1.2922801971435547,grad_norm: 0.9999997800824345, iteration: 106691
loss: 1.4085569381713867,grad_norm: 1.00000008451408, iteration: 106692
loss: 1.2837467193603516,grad_norm: 1.0000000121875847, iteration: 106693
loss: 1.2267056703567505,grad_norm: 0.9999997646452307, iteration: 106694
loss: 1.209250807762146,grad_norm: 0.9999999583147242, iteration: 106695
loss: 1.3336223363876343,grad_norm: 0.9999998725818467, iteration: 106696
loss: 1.2896944284439087,grad_norm: 0.9999999209548498, iteration: 106697
loss: 1.1470202207565308,grad_norm: 0.9999997861810036, iteration: 106698
loss: 1.3226162195205688,grad_norm: 0.9999996827982267, iteration: 106699
loss: 1.3101847171783447,grad_norm: 0.9999997013775219, iteration: 106700
loss: 1.2153124809265137,grad_norm: 0.9999997514776994, iteration: 106701
loss: 1.3418903350830078,grad_norm: 0.9999998192211617, iteration: 106702
loss: 1.2583472728729248,grad_norm: 1.0000000492974535, iteration: 106703
loss: 1.2658870220184326,grad_norm: 0.9999997457147836, iteration: 106704
loss: 1.232739806175232,grad_norm: 0.999999946830766, iteration: 106705
loss: 1.2297719717025757,grad_norm: 0.9999996905334056, iteration: 106706
loss: 1.340208649635315,grad_norm: 0.999999686315122, iteration: 106707
loss: 1.400075078010559,grad_norm: 0.9999998855320174, iteration: 106708
loss: 1.2656745910644531,grad_norm: 0.9999998735565994, iteration: 106709
loss: 1.431615948677063,grad_norm: 0.9999995364758519, iteration: 106710
loss: 1.1981549263000488,grad_norm: 0.9999993234020786, iteration: 106711
loss: 1.348813533782959,grad_norm: 0.9999999744456329, iteration: 106712
loss: 1.2758533954620361,grad_norm: 0.999999890727595, iteration: 106713
loss: 1.4129726886749268,grad_norm: 0.9999994853208971, iteration: 106714
loss: 1.289489507675171,grad_norm: 0.9999998266035435, iteration: 106715
loss: 1.1935973167419434,grad_norm: 0.9999998715067766, iteration: 106716
loss: 1.2541168928146362,grad_norm: 0.999999591589252, iteration: 106717
loss: 1.463472843170166,grad_norm: 0.9999996435047573, iteration: 106718
loss: 1.316867709159851,grad_norm: 0.9999999719480532, iteration: 106719
loss: 1.3262773752212524,grad_norm: 0.9999997866517645, iteration: 106720
loss: 1.2511850595474243,grad_norm: 0.9999998044293301, iteration: 106721
loss: 1.3558231592178345,grad_norm: 0.9999999645907388, iteration: 106722
loss: 1.1726921796798706,grad_norm: 0.9999993826635597, iteration: 106723
loss: 1.3075906038284302,grad_norm: 0.9999999332639854, iteration: 106724
loss: 1.1564314365386963,grad_norm: 0.9999998807901431, iteration: 106725
loss: 1.334603190422058,grad_norm: 0.9999997832615783, iteration: 106726
loss: 1.2713762521743774,grad_norm: 0.9999999905002714, iteration: 106727
loss: 1.3657501935958862,grad_norm: 0.9999997206344018, iteration: 106728
loss: 1.6012523174285889,grad_norm: 1.0000000154751225, iteration: 106729
loss: 1.5755633115768433,grad_norm: 0.9999998616154518, iteration: 106730
loss: 1.2534030675888062,grad_norm: 0.9999998614458188, iteration: 106731
loss: 1.431355595588684,grad_norm: 0.9999999043220458, iteration: 106732
loss: 1.292600154876709,grad_norm: 0.9999997396938012, iteration: 106733
loss: 1.516408085823059,grad_norm: 1.000000055070634, iteration: 106734
loss: 1.430579662322998,grad_norm: 0.9999998684457371, iteration: 106735
loss: 1.3892943859100342,grad_norm: 0.9999998869036651, iteration: 106736
loss: 1.4638772010803223,grad_norm: 0.9999999439828707, iteration: 106737
loss: 1.1536533832550049,grad_norm: 0.9999998599716752, iteration: 106738
loss: 1.4167262315750122,grad_norm: 0.9999997839266787, iteration: 106739
loss: 1.440877079963684,grad_norm: 1.0000000349038436, iteration: 106740
loss: 1.559289574623108,grad_norm: 0.9999998509096991, iteration: 106741
loss: 1.4911744594573975,grad_norm: 0.9999998806382302, iteration: 106742
loss: 1.304317593574524,grad_norm: 1.000000026141188, iteration: 106743
loss: 1.2788983583450317,grad_norm: 0.9999999089799388, iteration: 106744
loss: 1.4694901704788208,grad_norm: 0.9999999891828797, iteration: 106745
loss: 1.5362632274627686,grad_norm: 1.0000000015851567, iteration: 106746
loss: 1.4022680521011353,grad_norm: 0.9999998811097941, iteration: 106747
loss: 1.5122393369674683,grad_norm: 0.9999999529628399, iteration: 106748
loss: 1.4218887090682983,grad_norm: 1.0000000747547617, iteration: 106749
loss: 1.4898356199264526,grad_norm: 0.999999971321035, iteration: 106750
loss: 1.1571711301803589,grad_norm: 0.9999996169080537, iteration: 106751
loss: 1.470331072807312,grad_norm: 0.9999999726545121, iteration: 106752
loss: 1.3763508796691895,grad_norm: 0.9999998796204016, iteration: 106753
loss: 1.555009365081787,grad_norm: 0.9999998933939294, iteration: 106754
loss: 1.2275310754776,grad_norm: 0.9999996306944448, iteration: 106755
loss: 1.2085946798324585,grad_norm: 0.9999994888087493, iteration: 106756
loss: 1.2564836740493774,grad_norm: 1.0000000880594735, iteration: 106757
loss: 1.3235411643981934,grad_norm: 0.9999999292916214, iteration: 106758
loss: 1.3116179704666138,grad_norm: 0.9999999735637219, iteration: 106759
loss: 1.5470329523086548,grad_norm: 1.0000001470758368, iteration: 106760
loss: 1.5899406671524048,grad_norm: 0.9999998352809524, iteration: 106761
loss: 1.3094096183776855,grad_norm: 0.9999998206882204, iteration: 106762
loss: 1.2798255681991577,grad_norm: 0.9999999520849469, iteration: 106763
loss: 1.1690279245376587,grad_norm: 0.9999998390163428, iteration: 106764
loss: 1.4819200038909912,grad_norm: 0.9999997727670346, iteration: 106765
loss: 1.2976762056350708,grad_norm: 0.99999959649041, iteration: 106766
loss: 1.4616413116455078,grad_norm: 0.9999998770648786, iteration: 106767
loss: 1.4177641868591309,grad_norm: 0.9999998900751906, iteration: 106768
loss: 1.2243064641952515,grad_norm: 0.9999998194777662, iteration: 106769
loss: 1.493186354637146,grad_norm: 0.9999998725868207, iteration: 106770
loss: 1.3110876083374023,grad_norm: 0.9999999224689865, iteration: 106771
loss: 1.4671093225479126,grad_norm: 0.9999999482613335, iteration: 106772
loss: 1.2558586597442627,grad_norm: 0.9999993050884681, iteration: 106773
loss: 1.342527151107788,grad_norm: 0.9999998114453184, iteration: 106774
loss: 1.4823031425476074,grad_norm: 0.9999998160735861, iteration: 106775
loss: 1.2538658380508423,grad_norm: 0.9999999599268874, iteration: 106776
loss: 1.3717411756515503,grad_norm: 0.9999998822511925, iteration: 106777
loss: 1.263445496559143,grad_norm: 0.9999996865141061, iteration: 106778
loss: 1.2371296882629395,grad_norm: 0.9999997264568179, iteration: 106779
loss: 1.2271528244018555,grad_norm: 0.9999998876535652, iteration: 106780
loss: 1.2128026485443115,grad_norm: 0.9999999264767077, iteration: 106781
loss: 1.5466057062149048,grad_norm: 0.9999999289101386, iteration: 106782
loss: 1.2335423231124878,grad_norm: 0.9999997086082043, iteration: 106783
loss: 1.374809980392456,grad_norm: 0.9999997709045406, iteration: 106784
loss: 1.2904456853866577,grad_norm: 0.9999998223512516, iteration: 106785
loss: 1.3552346229553223,grad_norm: 0.9999997633370421, iteration: 106786
loss: 1.4553844928741455,grad_norm: 0.9999999812837072, iteration: 106787
loss: 1.2671912908554077,grad_norm: 0.999999539842059, iteration: 106788
loss: 1.4928040504455566,grad_norm: 0.9999997631840409, iteration: 106789
loss: 1.283659815788269,grad_norm: 0.9999999910184278, iteration: 106790
loss: 1.3916761875152588,grad_norm: 0.9999998920708787, iteration: 106791
loss: 1.1330041885375977,grad_norm: 0.9999996431706971, iteration: 106792
loss: 1.058709740638733,grad_norm: 0.9999992152405314, iteration: 106793
loss: 1.3173904418945312,grad_norm: 0.9999996478110955, iteration: 106794
loss: 1.308899164199829,grad_norm: 0.9999997363181676, iteration: 106795
loss: 1.3336117267608643,grad_norm: 1.0000000150417205, iteration: 106796
loss: 1.2176010608673096,grad_norm: 0.9999992768816046, iteration: 106797
loss: 1.2316302061080933,grad_norm: 0.999999681624427, iteration: 106798
loss: 1.3773598670959473,grad_norm: 1.0000000480749256, iteration: 106799
loss: 1.4612091779708862,grad_norm: 0.9999999227470624, iteration: 106800
loss: 1.2263877391815186,grad_norm: 0.9999997853612621, iteration: 106801
loss: 1.4956492185592651,grad_norm: 0.9999998773755262, iteration: 106802
loss: 1.4743211269378662,grad_norm: 0.9999999427719491, iteration: 106803
loss: 1.402608871459961,grad_norm: 0.9999997369321303, iteration: 106804
loss: 1.7049269676208496,grad_norm: 1.0000000686859147, iteration: 106805
loss: 1.425408959388733,grad_norm: 0.9999997535092727, iteration: 106806
loss: 1.4592704772949219,grad_norm: 0.9999998009272671, iteration: 106807
loss: 1.4006679058074951,grad_norm: 0.9999998223906179, iteration: 106808
loss: 1.342644453048706,grad_norm: 0.9999998744252971, iteration: 106809
loss: 1.509981393814087,grad_norm: 0.999999989627231, iteration: 106810
loss: 1.3116850852966309,grad_norm: 0.9999998428802105, iteration: 106811
loss: 1.4002031087875366,grad_norm: 0.9999998347289575, iteration: 106812
loss: 1.4811344146728516,grad_norm: 0.9999998720713625, iteration: 106813
loss: 1.2087293863296509,grad_norm: 1.0000000122051118, iteration: 106814
loss: 1.3981345891952515,grad_norm: 0.9999999625906361, iteration: 106815
loss: 1.4359685182571411,grad_norm: 0.9999999947475643, iteration: 106816
loss: 1.7215813398361206,grad_norm: 0.9999998330330849, iteration: 106817
loss: 1.4356074333190918,grad_norm: 0.9999999341129375, iteration: 106818
loss: 1.2524021863937378,grad_norm: 0.9999999045236836, iteration: 106819
loss: 1.1865073442459106,grad_norm: 0.9999993735682366, iteration: 106820
loss: 1.4815105199813843,grad_norm: 0.9999999070019814, iteration: 106821
loss: 1.5181314945220947,grad_norm: 0.9999999821691806, iteration: 106822
loss: 1.4218850135803223,grad_norm: 0.9999996406552615, iteration: 106823
loss: 1.306964635848999,grad_norm: 0.9999999795983416, iteration: 106824
loss: 1.4148271083831787,grad_norm: 0.9999997640074415, iteration: 106825
loss: 1.3516331911087036,grad_norm: 0.999999868643598, iteration: 106826
loss: 1.4632792472839355,grad_norm: 0.9999996974822235, iteration: 106827
loss: 1.417526364326477,grad_norm: 0.9999999323197908, iteration: 106828
loss: 1.2974951267242432,grad_norm: 0.9999994928054013, iteration: 106829
loss: 1.3380047082901,grad_norm: 0.9999995880553759, iteration: 106830
loss: 1.2926948070526123,grad_norm: 0.9999994131866944, iteration: 106831
loss: 1.2706409692764282,grad_norm: 0.999999984074506, iteration: 106832
loss: 1.1892768144607544,grad_norm: 0.9999996324608034, iteration: 106833
loss: 1.2608177661895752,grad_norm: 0.9999999836753346, iteration: 106834
loss: 1.2428325414657593,grad_norm: 0.9999997502116882, iteration: 106835
loss: 1.5707170963287354,grad_norm: 1.0000000142773846, iteration: 106836
loss: 1.2905638217926025,grad_norm: 0.9999998872540166, iteration: 106837
loss: 1.2571823596954346,grad_norm: 0.9999996562315918, iteration: 106838
loss: 1.239273190498352,grad_norm: 0.9999998813228087, iteration: 106839
loss: 1.3721972703933716,grad_norm: 0.9999997938651695, iteration: 106840
loss: 1.4070792198181152,grad_norm: 0.9999999930468569, iteration: 106841
loss: 1.4419971704483032,grad_norm: 0.9999999268673189, iteration: 106842
loss: 1.2530570030212402,grad_norm: 0.9999996789707393, iteration: 106843
loss: 1.2430168390274048,grad_norm: 1.0000000520738581, iteration: 106844
loss: 1.524867057800293,grad_norm: 1.0000000293977949, iteration: 106845
loss: 1.613396406173706,grad_norm: 0.9999999286150666, iteration: 106846
loss: 1.4548434019088745,grad_norm: 0.9999996395223301, iteration: 106847
loss: 1.2272908687591553,grad_norm: 0.9999993799838452, iteration: 106848
loss: 1.1166354417800903,grad_norm: 0.999999305476725, iteration: 106849
loss: 1.2004743814468384,grad_norm: 0.9999995404509519, iteration: 106850
loss: 1.3072315454483032,grad_norm: 0.9999995138786526, iteration: 106851
loss: 1.3602161407470703,grad_norm: 0.9999999164449178, iteration: 106852
loss: 1.188991665840149,grad_norm: 0.9999996544667906, iteration: 106853
loss: 1.291070818901062,grad_norm: 0.9999997440997689, iteration: 106854
loss: 1.2287558317184448,grad_norm: 0.9999995178326202, iteration: 106855
loss: 1.3547366857528687,grad_norm: 0.9999998795529808, iteration: 106856
loss: 1.1935397386550903,grad_norm: 0.9999996394905821, iteration: 106857
loss: 1.1357274055480957,grad_norm: 0.9999999131252986, iteration: 106858
loss: 1.2677053213119507,grad_norm: 0.9999993072077692, iteration: 106859
loss: 1.348473072052002,grad_norm: 0.9999994764718663, iteration: 106860
loss: 1.2770005464553833,grad_norm: 0.9999999475203049, iteration: 106861
loss: 1.3314718008041382,grad_norm: 0.9999996203229367, iteration: 106862
loss: 1.176365613937378,grad_norm: 0.9999994586075809, iteration: 106863
loss: 1.2343775033950806,grad_norm: 0.9999992077893761, iteration: 106864
loss: 1.4118164777755737,grad_norm: 0.9999998377744231, iteration: 106865
loss: 1.4316383600234985,grad_norm: 0.9999998058667003, iteration: 106866
loss: 1.2365086078643799,grad_norm: 0.9999998514507229, iteration: 106867
loss: 1.1447879076004028,grad_norm: 0.9999992829263936, iteration: 106868
loss: 1.2901618480682373,grad_norm: 0.9999996450406462, iteration: 106869
loss: 1.185080647468567,grad_norm: 0.9999992622344642, iteration: 106870
loss: 1.1640543937683105,grad_norm: 0.9999996932323745, iteration: 106871
loss: 1.2376699447631836,grad_norm: 0.9999998372373514, iteration: 106872
loss: 1.2379021644592285,grad_norm: 0.9999998849628305, iteration: 106873
loss: 1.1407333612442017,grad_norm: 0.9999996323926806, iteration: 106874
loss: 1.1973360776901245,grad_norm: 1.000000039531762, iteration: 106875
loss: 1.4671329259872437,grad_norm: 0.9999999215481398, iteration: 106876
loss: 1.2290798425674438,grad_norm: 0.9999998335497654, iteration: 106877
loss: 1.2516536712646484,grad_norm: 0.9999997517016151, iteration: 106878
loss: 1.0464131832122803,grad_norm: 0.9999992514721795, iteration: 106879
loss: 1.2210993766784668,grad_norm: 0.9999999757060491, iteration: 106880
loss: 1.2291041612625122,grad_norm: 0.9999996890336982, iteration: 106881
loss: 1.2421817779541016,grad_norm: 1.0000000585027633, iteration: 106882
loss: 1.1962220668792725,grad_norm: 0.9999995544440374, iteration: 106883
loss: 1.1754000186920166,grad_norm: 0.9999999225626958, iteration: 106884
loss: 1.0830110311508179,grad_norm: 0.9999993946470516, iteration: 106885
loss: 1.4356276988983154,grad_norm: 0.9999999455795292, iteration: 106886
loss: 1.2115180492401123,grad_norm: 0.9999997398200535, iteration: 106887
loss: 1.1962621212005615,grad_norm: 0.9999997779223607, iteration: 106888
loss: 1.2634313106536865,grad_norm: 0.9999999068212833, iteration: 106889
loss: 1.2401787042617798,grad_norm: 0.9999999161572086, iteration: 106890
loss: 1.2137192487716675,grad_norm: 0.9999999884818203, iteration: 106891
loss: 1.284888744354248,grad_norm: 0.9999998661753916, iteration: 106892
loss: 1.2584398984909058,grad_norm: 0.9999998527051517, iteration: 106893
loss: 1.1808065176010132,grad_norm: 0.9999995087243729, iteration: 106894
loss: 1.307063341140747,grad_norm: 1.0000000039549417, iteration: 106895
loss: 1.067433476448059,grad_norm: 0.9999993349308063, iteration: 106896
loss: 1.1242512464523315,grad_norm: 0.9999998325757824, iteration: 106897
loss: 1.3104796409606934,grad_norm: 0.999999537404354, iteration: 106898
loss: 1.219723105430603,grad_norm: 0.9999998901348515, iteration: 106899
loss: 1.2173758745193481,grad_norm: 0.9999999549009705, iteration: 106900
loss: 1.2010962963104248,grad_norm: 0.9999999286250308, iteration: 106901
loss: 1.0958486795425415,grad_norm: 0.9999996911970981, iteration: 106902
loss: 1.1993179321289062,grad_norm: 0.9999994275884747, iteration: 106903
loss: 1.0492517948150635,grad_norm: 0.9999998996172993, iteration: 106904
loss: 1.0683175325393677,grad_norm: 0.9999992388314451, iteration: 106905
loss: 1.2795449495315552,grad_norm: 0.9999994669435106, iteration: 106906
loss: 1.1989768743515015,grad_norm: 0.9999995940249652, iteration: 106907
loss: 1.1514151096343994,grad_norm: 0.9999997190636143, iteration: 106908
loss: 1.1220682859420776,grad_norm: 0.9999995855052887, iteration: 106909
loss: 1.1209102869033813,grad_norm: 0.9999996715471171, iteration: 106910
loss: 1.2020434141159058,grad_norm: 0.9999998650961927, iteration: 106911
loss: 1.1319947242736816,grad_norm: 0.9999996675740335, iteration: 106912
loss: 1.1947752237319946,grad_norm: 0.9999998843288533, iteration: 106913
loss: 1.0992374420166016,grad_norm: 0.9999993314587637, iteration: 106914
loss: 1.0310105085372925,grad_norm: 0.9760182118853631, iteration: 106915
loss: 1.1128019094467163,grad_norm: 0.9999993184908449, iteration: 106916
loss: 1.2538800239562988,grad_norm: 0.9999998728776843, iteration: 106917
loss: 1.0877183675765991,grad_norm: 0.9999997669738494, iteration: 106918
loss: 1.3616079092025757,grad_norm: 1.0000000130274307, iteration: 106919
loss: 1.1351186037063599,grad_norm: 0.9999995006723688, iteration: 106920
loss: 1.1820294857025146,grad_norm: 0.999999815003112, iteration: 106921
loss: 1.1634347438812256,grad_norm: 0.9999999899974427, iteration: 106922
loss: 1.2671031951904297,grad_norm: 0.9999999839501094, iteration: 106923
loss: 1.2044121026992798,grad_norm: 0.9999999590469788, iteration: 106924
loss: 1.3299992084503174,grad_norm: 0.9999996285168997, iteration: 106925
loss: 1.2118678092956543,grad_norm: 0.9999993735031322, iteration: 106926
loss: 1.3086624145507812,grad_norm: 0.9999997997440365, iteration: 106927
loss: 1.1679850816726685,grad_norm: 0.9999995077357989, iteration: 106928
loss: 1.2427492141723633,grad_norm: 0.9999998031625197, iteration: 106929
loss: 1.0967075824737549,grad_norm: 0.9999997067349745, iteration: 106930
loss: 1.2189676761627197,grad_norm: 0.9999998811928227, iteration: 106931
loss: 1.2238813638687134,grad_norm: 0.9999998121393583, iteration: 106932
loss: 1.1031105518341064,grad_norm: 0.9999994209837971, iteration: 106933
loss: 1.0300472974777222,grad_norm: 0.9999996732479542, iteration: 106934
loss: 1.1846963167190552,grad_norm: 0.9999995290733356, iteration: 106935
loss: 1.0394351482391357,grad_norm: 0.9999991849898286, iteration: 106936
loss: 1.0595571994781494,grad_norm: 0.9999993103939532, iteration: 106937
loss: 1.1394147872924805,grad_norm: 0.999999392571408, iteration: 106938
loss: 1.0586676597595215,grad_norm: 0.9999992820208827, iteration: 106939
loss: 1.1761353015899658,grad_norm: 0.9999993361978652, iteration: 106940
loss: 1.2998000383377075,grad_norm: 0.9999996713384683, iteration: 106941
loss: 1.139910101890564,grad_norm: 0.9999992503486415, iteration: 106942
loss: 1.1606290340423584,grad_norm: 0.9999998220154941, iteration: 106943
loss: 1.1930615901947021,grad_norm: 0.9999997533530328, iteration: 106944
loss: 1.1068534851074219,grad_norm: 0.9999992118348914, iteration: 106945
loss: 1.1390736103057861,grad_norm: 0.9999997326287804, iteration: 106946
loss: 1.0773564577102661,grad_norm: 0.9999992253409645, iteration: 106947
loss: 1.0755071640014648,grad_norm: 0.9999992721824172, iteration: 106948
loss: 1.2261404991149902,grad_norm: 0.9999995770207937, iteration: 106949
loss: 1.1110092401504517,grad_norm: 0.9999999691047949, iteration: 106950
loss: 1.289102554321289,grad_norm: 0.9999999238628674, iteration: 106951
loss: 1.2495321035385132,grad_norm: 0.9999999702964476, iteration: 106952
loss: 1.107264757156372,grad_norm: 0.9999999864381078, iteration: 106953
loss: 1.1444085836410522,grad_norm: 0.9999995180398487, iteration: 106954
loss: 1.1851152181625366,grad_norm: 0.9999999730379646, iteration: 106955
loss: 1.0755106210708618,grad_norm: 0.9999998958627214, iteration: 106956
loss: 1.1788634061813354,grad_norm: 0.9999996415600646, iteration: 106957
loss: 1.045939326286316,grad_norm: 0.9399552951561978, iteration: 106958
loss: 1.262411117553711,grad_norm: 0.99999936307868, iteration: 106959
loss: 1.1069577932357788,grad_norm: 0.9999993308983163, iteration: 106960
loss: 1.255020260810852,grad_norm: 0.9999997166567101, iteration: 106961
loss: 1.2476844787597656,grad_norm: 0.9999998449045174, iteration: 106962
loss: 1.0706348419189453,grad_norm: 0.9999994766935361, iteration: 106963
loss: 1.1463812589645386,grad_norm: 0.9999993462813019, iteration: 106964
loss: 1.3219293355941772,grad_norm: 0.9999998268881282, iteration: 106965
loss: 1.1448974609375,grad_norm: 0.9999998735713063, iteration: 106966
loss: 1.208677053451538,grad_norm: 0.9999995600257102, iteration: 106967
loss: 1.2082064151763916,grad_norm: 0.9999997759975826, iteration: 106968
loss: 1.0545650720596313,grad_norm: 0.9999994203051545, iteration: 106969
loss: 1.084919810295105,grad_norm: 0.9999999741360361, iteration: 106970
loss: 1.1538262367248535,grad_norm: 0.9999992023232196, iteration: 106971
loss: 1.1421128511428833,grad_norm: 0.999999332492241, iteration: 106972
loss: 1.2337559461593628,grad_norm: 0.9999994431683026, iteration: 106973
loss: 1.2302401065826416,grad_norm: 0.9999999068053538, iteration: 106974
loss: 1.2495529651641846,grad_norm: 0.9999996224075094, iteration: 106975
loss: 1.355141043663025,grad_norm: 0.9999997439640517, iteration: 106976
loss: 1.3425244092941284,grad_norm: 0.9999997270898785, iteration: 106977
loss: 1.0907944440841675,grad_norm: 0.9999997414396173, iteration: 106978
loss: 1.1485449075698853,grad_norm: 1.0000000081733784, iteration: 106979
loss: 1.2321457862854004,grad_norm: 0.999999777457404, iteration: 106980
loss: 1.1288944482803345,grad_norm: 0.9999997155570168, iteration: 106981
loss: 1.176726222038269,grad_norm: 0.9999994979890251, iteration: 106982
loss: 1.2271840572357178,grad_norm: 0.9999996847404852, iteration: 106983
loss: 1.073760747909546,grad_norm: 0.9999990911025828, iteration: 106984
loss: 1.204744815826416,grad_norm: 0.9999996210711292, iteration: 106985
loss: 1.1325565576553345,grad_norm: 0.9999998631049237, iteration: 106986
loss: 1.0746393203735352,grad_norm: 0.9999993055956806, iteration: 106987
loss: 1.1110026836395264,grad_norm: 0.9999999378800314, iteration: 106988
loss: 1.3183083534240723,grad_norm: 0.9999998153848312, iteration: 106989
loss: 1.3003097772598267,grad_norm: 0.9999997525327732, iteration: 106990
loss: 1.0774691104888916,grad_norm: 0.9999990385470366, iteration: 106991
loss: 1.152547001838684,grad_norm: 0.99999955102344, iteration: 106992
loss: 1.156794548034668,grad_norm: 0.9999997063492059, iteration: 106993
loss: 1.0722185373306274,grad_norm: 0.9999990899945717, iteration: 106994
loss: 1.1273295879364014,grad_norm: 0.9999997708953529, iteration: 106995
loss: 1.048362374305725,grad_norm: 0.9999998663740308, iteration: 106996
loss: 1.0584453344345093,grad_norm: 0.9999997212289741, iteration: 106997
loss: 1.1501785516738892,grad_norm: 0.999999715136696, iteration: 106998
loss: 1.1748056411743164,grad_norm: 0.9999999956269964, iteration: 106999
loss: 1.3042373657226562,grad_norm: 0.9999997298196129, iteration: 107000
loss: 1.0102838277816772,grad_norm: 0.9999991169880506, iteration: 107001
loss: 1.153731107711792,grad_norm: 0.9999994651800072, iteration: 107002
loss: 1.2539478540420532,grad_norm: 0.9999999370490974, iteration: 107003
loss: 1.3002891540527344,grad_norm: 0.9999997828494884, iteration: 107004
loss: 1.2343626022338867,grad_norm: 0.9999996792765466, iteration: 107005
loss: 1.0767170190811157,grad_norm: 0.9999999218737631, iteration: 107006
loss: 1.2726101875305176,grad_norm: 0.9999998179621173, iteration: 107007
loss: 1.2821142673492432,grad_norm: 0.9999998995473691, iteration: 107008
loss: 1.0987895727157593,grad_norm: 0.9999998443947871, iteration: 107009
loss: 1.1190353631973267,grad_norm: 0.9999996955643876, iteration: 107010
loss: 1.0880597829818726,grad_norm: 0.9999995787409106, iteration: 107011
loss: 1.0887178182601929,grad_norm: 0.9999993388281988, iteration: 107012
loss: 1.1505365371704102,grad_norm: 0.9999993235320086, iteration: 107013
loss: 1.0567328929901123,grad_norm: 0.9999991759058922, iteration: 107014
loss: 1.2230191230773926,grad_norm: 0.9999995276631778, iteration: 107015
loss: 1.1198369264602661,grad_norm: 0.9999992898806768, iteration: 107016
loss: 1.011322259902954,grad_norm: 0.9999991234970469, iteration: 107017
loss: 1.1167677640914917,grad_norm: 0.9999998843844933, iteration: 107018
loss: 1.2372221946716309,grad_norm: 0.9999999610642086, iteration: 107019
loss: 1.0693800449371338,grad_norm: 0.9999996240850432, iteration: 107020
loss: 1.5142931938171387,grad_norm: 0.9999998351544388, iteration: 107021
loss: 1.3113806247711182,grad_norm: 1.000000010982159, iteration: 107022
loss: 1.1736477613449097,grad_norm: 0.9999994924640188, iteration: 107023
loss: 1.0657868385314941,grad_norm: 0.9999993668687981, iteration: 107024
loss: 1.0825566053390503,grad_norm: 0.9999995323598869, iteration: 107025
loss: 1.0492342710494995,grad_norm: 0.9999992842623883, iteration: 107026
loss: 1.3775628805160522,grad_norm: 0.9999997761922174, iteration: 107027
loss: 1.1348713636398315,grad_norm: 0.9999995182493024, iteration: 107028
loss: 1.1398849487304688,grad_norm: 0.9999998544418066, iteration: 107029
loss: 1.0641241073608398,grad_norm: 0.9999998570874621, iteration: 107030
loss: 1.0683590173721313,grad_norm: 0.999999566187534, iteration: 107031
loss: 1.1971521377563477,grad_norm: 0.9999995166227976, iteration: 107032
loss: 1.0933111906051636,grad_norm: 0.9999996428325981, iteration: 107033
loss: 1.0806385278701782,grad_norm: 0.9999999313789154, iteration: 107034
loss: 1.1008024215698242,grad_norm: 0.9999998348385871, iteration: 107035
loss: 1.1211942434310913,grad_norm: 0.9999997938338526, iteration: 107036
loss: 1.1853383779525757,grad_norm: 0.9999998480352713, iteration: 107037
loss: 1.1032447814941406,grad_norm: 0.9999992711200761, iteration: 107038
loss: 1.0200632810592651,grad_norm: 0.9999991201720261, iteration: 107039
loss: 1.0379688739776611,grad_norm: 0.9999992266544446, iteration: 107040
loss: 1.027112364768982,grad_norm: 0.9999995555539604, iteration: 107041
loss: 1.0372428894042969,grad_norm: 0.9999995762921708, iteration: 107042
loss: 1.064529538154602,grad_norm: 0.9999997700037525, iteration: 107043
loss: 1.0957715511322021,grad_norm: 0.999999920452232, iteration: 107044
loss: 1.007848858833313,grad_norm: 0.9999998784060419, iteration: 107045
loss: 1.0686655044555664,grad_norm: 0.9999992159616123, iteration: 107046
loss: 1.0157439708709717,grad_norm: 0.981537371815623, iteration: 107047
loss: 1.060553789138794,grad_norm: 0.9999993242398332, iteration: 107048
loss: 1.2974225282669067,grad_norm: 0.9999999928505219, iteration: 107049
loss: 1.1525282859802246,grad_norm: 0.9999993968627005, iteration: 107050
loss: 1.1209429502487183,grad_norm: 0.9999990666039511, iteration: 107051
loss: 1.1553122997283936,grad_norm: 0.9999999633892049, iteration: 107052
loss: 1.0552401542663574,grad_norm: 0.9999997636566278, iteration: 107053
loss: 1.0601401329040527,grad_norm: 0.9999991790935401, iteration: 107054
loss: 1.0447138547897339,grad_norm: 0.9999997618036397, iteration: 107055
loss: 1.147723913192749,grad_norm: 0.9999998036276752, iteration: 107056
loss: 0.9960519671440125,grad_norm: 0.9867759478101975, iteration: 107057
loss: 1.1890562772750854,grad_norm: 0.9999997611543796, iteration: 107058
loss: 1.019051432609558,grad_norm: 0.9999994708004183, iteration: 107059
loss: 1.1396586894989014,grad_norm: 0.9999995540466183, iteration: 107060
loss: 1.0024480819702148,grad_norm: 0.9999997934967415, iteration: 107061
loss: 1.0930147171020508,grad_norm: 0.9999994112484338, iteration: 107062
loss: 1.1287868022918701,grad_norm: 0.9999998795157322, iteration: 107063
loss: 1.0894888639450073,grad_norm: 0.9999996044876727, iteration: 107064
loss: 1.1251739263534546,grad_norm: 0.9999999087988707, iteration: 107065
loss: 1.0493746995925903,grad_norm: 0.9999991150467344, iteration: 107066
loss: 1.0311638116836548,grad_norm: 0.9999995162194226, iteration: 107067
loss: 1.0907518863677979,grad_norm: 0.9999990858893089, iteration: 107068
loss: 1.0513418912887573,grad_norm: 0.9999997658326115, iteration: 107069
loss: 1.034415364265442,grad_norm: 0.9999992800570693, iteration: 107070
loss: 1.1078128814697266,grad_norm: 0.9999999104050509, iteration: 107071
loss: 1.1334997415542603,grad_norm: 0.9999991408294747, iteration: 107072
loss: 1.0876660346984863,grad_norm: 0.9999991379645445, iteration: 107073
loss: 1.1531012058258057,grad_norm: 0.9999992700566741, iteration: 107074
loss: 1.3559030294418335,grad_norm: 0.999999989637636, iteration: 107075
loss: 1.0147885084152222,grad_norm: 0.9999993821580867, iteration: 107076
loss: 1.146275520324707,grad_norm: 0.9999997817700677, iteration: 107077
loss: 1.0787506103515625,grad_norm: 0.9999998059063592, iteration: 107078
loss: 1.0778567790985107,grad_norm: 0.9999993937222638, iteration: 107079
loss: 1.0922189950942993,grad_norm: 0.99999957148952, iteration: 107080
loss: 1.0654836893081665,grad_norm: 0.9999997627085548, iteration: 107081
loss: 1.0454022884368896,grad_norm: 0.9999990941645992, iteration: 107082
loss: 1.049177646636963,grad_norm: 0.9999993722323853, iteration: 107083
loss: 1.308218240737915,grad_norm: 0.9999998829409982, iteration: 107084
loss: 1.0338133573532104,grad_norm: 0.9999998000472643, iteration: 107085
loss: 1.1394991874694824,grad_norm: 0.9999996061480074, iteration: 107086
loss: 1.020081639289856,grad_norm: 0.9999991234069595, iteration: 107087
loss: 1.1141091585159302,grad_norm: 0.999999970727393, iteration: 107088
loss: 1.0555768013000488,grad_norm: 0.9999992533157329, iteration: 107089
loss: 1.0500900745391846,grad_norm: 0.999999810076576, iteration: 107090
loss: 1.0318151712417603,grad_norm: 0.917735471310216, iteration: 107091
loss: 1.059066891670227,grad_norm: 0.9999996090693203, iteration: 107092
loss: 1.0828912258148193,grad_norm: 0.9999992514887505, iteration: 107093
loss: 1.0087568759918213,grad_norm: 0.8996956766300346, iteration: 107094
loss: 1.1222442388534546,grad_norm: 0.9999998319916859, iteration: 107095
loss: 1.2191121578216553,grad_norm: 0.9999998977755316, iteration: 107096
loss: 1.0536035299301147,grad_norm: 0.9999996746427305, iteration: 107097
loss: 0.9990582466125488,grad_norm: 0.9999994638953581, iteration: 107098
loss: 1.1000615358352661,grad_norm: 0.9999996604448871, iteration: 107099
loss: 1.0571178197860718,grad_norm: 0.9999997065750115, iteration: 107100
loss: 1.05404794216156,grad_norm: 0.9999993173258582, iteration: 107101
loss: 1.013907551765442,grad_norm: 0.8326785146095442, iteration: 107102
loss: 1.0976736545562744,grad_norm: 0.9999992267590215, iteration: 107103
loss: 1.01241135597229,grad_norm: 0.9175932927941709, iteration: 107104
loss: 1.0101864337921143,grad_norm: 0.9500538824123159, iteration: 107105
loss: 1.0059233903884888,grad_norm: 0.8798899943912386, iteration: 107106
loss: 1.032604455947876,grad_norm: 0.9999991184814183, iteration: 107107
loss: 1.1410303115844727,grad_norm: 0.9999995455673183, iteration: 107108
loss: 1.068695306777954,grad_norm: 0.9999997265551781, iteration: 107109
loss: 1.0424325466156006,grad_norm: 0.9999994914796039, iteration: 107110
loss: 1.0570459365844727,grad_norm: 0.9999993265593018, iteration: 107111
loss: 0.9945318102836609,grad_norm: 0.8183356576676352, iteration: 107112
loss: 1.0439764261245728,grad_norm: 0.9999993208733058, iteration: 107113
loss: 1.0646251440048218,grad_norm: 0.9999999265703768, iteration: 107114
loss: 1.1557285785675049,grad_norm: 0.9999993498143813, iteration: 107115
loss: 1.1108373403549194,grad_norm: 0.9999992572201968, iteration: 107116
loss: 1.1069743633270264,grad_norm: 0.9999994399965225, iteration: 107117
loss: 1.2122480869293213,grad_norm: 0.9999993934995618, iteration: 107118
loss: 1.0330157279968262,grad_norm: 0.9999994429973529, iteration: 107119
loss: 1.1689327955245972,grad_norm: 0.9999998412257959, iteration: 107120
loss: 1.1189358234405518,grad_norm: 0.8855340736515264, iteration: 107121
loss: 1.1164934635162354,grad_norm: 0.9999999189767471, iteration: 107122
loss: 1.0572445392608643,grad_norm: 0.9999997321735774, iteration: 107123
loss: 1.01847505569458,grad_norm: 0.9518383493768588, iteration: 107124
loss: 1.0971574783325195,grad_norm: 0.999999891808994, iteration: 107125
loss: 1.0613933801651,grad_norm: 0.9999994467967058, iteration: 107126
loss: 1.0887478590011597,grad_norm: 0.9999990083554828, iteration: 107127
loss: 1.0945483446121216,grad_norm: 0.9999999734302761, iteration: 107128
loss: 1.043433427810669,grad_norm: 0.9999993607163811, iteration: 107129
loss: 1.0344791412353516,grad_norm: 0.9999993827009733, iteration: 107130
loss: 1.085473656654358,grad_norm: 0.9999991710846476, iteration: 107131
loss: 1.059980034828186,grad_norm: 0.9999997478423183, iteration: 107132
loss: 1.00612473487854,grad_norm: 0.9999999737657026, iteration: 107133
loss: 1.0423489809036255,grad_norm: 0.9025815206037016, iteration: 107134
loss: 1.0950733423233032,grad_norm: 0.9999994748165885, iteration: 107135
loss: 1.106882929801941,grad_norm: 0.9999993318360728, iteration: 107136
loss: 1.0103346109390259,grad_norm: 0.9999993069523424, iteration: 107137
loss: 1.0279330015182495,grad_norm: 0.7437859360264103, iteration: 107138
loss: 1.0312435626983643,grad_norm: 0.999999280414049, iteration: 107139
loss: 1.035808801651001,grad_norm: 0.9999995519140006, iteration: 107140
loss: 1.1202195882797241,grad_norm: 0.9999994273741433, iteration: 107141
loss: 1.1245348453521729,grad_norm: 0.9999994462707332, iteration: 107142
loss: 1.0579752922058105,grad_norm: 0.9999994644500232, iteration: 107143
loss: 1.0638083219528198,grad_norm: 1.0000000036559054, iteration: 107144
loss: 1.059316873550415,grad_norm: 0.999999401123861, iteration: 107145
loss: 1.0359307527542114,grad_norm: 0.999999430795771, iteration: 107146
loss: 1.0727198123931885,grad_norm: 0.9999995717055712, iteration: 107147
loss: 1.2447965145111084,grad_norm: 0.9999999964120354, iteration: 107148
loss: 1.0731685161590576,grad_norm: 0.9999992402085379, iteration: 107149
loss: 1.1007211208343506,grad_norm: 0.8862193561523701, iteration: 107150
loss: 1.0379761457443237,grad_norm: 0.9999991761739931, iteration: 107151
loss: 0.9935507774353027,grad_norm: 0.9999995741074572, iteration: 107152
loss: 1.0176496505737305,grad_norm: 0.9999996485251718, iteration: 107153
loss: 1.0701543092727661,grad_norm: 0.9999993061779643, iteration: 107154
loss: 1.002298355102539,grad_norm: 0.9999991978976933, iteration: 107155
loss: 1.0198471546173096,grad_norm: 0.9999989813597101, iteration: 107156
loss: 1.0497065782546997,grad_norm: 0.9999991643119492, iteration: 107157
loss: 1.0957493782043457,grad_norm: 0.9999995579125379, iteration: 107158
loss: 1.0683715343475342,grad_norm: 0.9999995308165098, iteration: 107159
loss: 1.0235916376113892,grad_norm: 0.999998947544136, iteration: 107160
loss: 1.105779767036438,grad_norm: 0.8910539501580822, iteration: 107161
loss: 1.0452903509140015,grad_norm: 0.9999996141220211, iteration: 107162
loss: 1.0458120107650757,grad_norm: 0.9511271533924054, iteration: 107163
loss: 1.0295617580413818,grad_norm: 0.9999991785497733, iteration: 107164
loss: 1.0083186626434326,grad_norm: 0.9429071119335461, iteration: 107165
loss: 1.0386157035827637,grad_norm: 0.9999994171545404, iteration: 107166
loss: 1.0218769311904907,grad_norm: 0.875193230694307, iteration: 107167
loss: 1.0727505683898926,grad_norm: 0.9999995125808607, iteration: 107168
loss: 1.0387613773345947,grad_norm: 0.9999993666046422, iteration: 107169
loss: 1.0719138383865356,grad_norm: 0.9472984630466273, iteration: 107170
loss: 1.0420212745666504,grad_norm: 0.999999076433576, iteration: 107171
loss: 1.0449579954147339,grad_norm: 0.9999991750041725, iteration: 107172
loss: 0.9885655045509338,grad_norm: 0.9675271842572735, iteration: 107173
loss: 1.212490439414978,grad_norm: 0.9999994382945592, iteration: 107174
loss: 1.0186134576797485,grad_norm: 0.999999658316931, iteration: 107175
loss: 1.0445233583450317,grad_norm: 0.9999992147253783, iteration: 107176
loss: 1.0908973217010498,grad_norm: 0.999999379894899, iteration: 107177
loss: 1.0041327476501465,grad_norm: 0.9999993207671334, iteration: 107178
loss: 1.0371754169464111,grad_norm: 0.9999990449535656, iteration: 107179
loss: 1.010793685913086,grad_norm: 0.999999251539786, iteration: 107180
loss: 1.0657069683074951,grad_norm: 0.999999911893778, iteration: 107181
loss: 1.0294010639190674,grad_norm: 0.9999992286154136, iteration: 107182
loss: 1.1786571741104126,grad_norm: 0.9999998279771298, iteration: 107183
loss: 1.0591888427734375,grad_norm: 0.9999993884080297, iteration: 107184
loss: 1.0140131711959839,grad_norm: 0.89233759963482, iteration: 107185
loss: 1.0029003620147705,grad_norm: 0.9999992835875297, iteration: 107186
loss: 1.0409435033798218,grad_norm: 0.8815886241411582, iteration: 107187
loss: 1.013310432434082,grad_norm: 0.8709260789003226, iteration: 107188
loss: 0.9799259305000305,grad_norm: 0.9999991312455591, iteration: 107189
loss: 1.0294233560562134,grad_norm: 0.9999997999896962, iteration: 107190
loss: 1.0919291973114014,grad_norm: 0.9999991903671263, iteration: 107191
loss: 1.1509612798690796,grad_norm: 0.9999998303864261, iteration: 107192
loss: 1.0765701532363892,grad_norm: 0.9999995911182785, iteration: 107193
loss: 1.0029654502868652,grad_norm: 0.7689329513388211, iteration: 107194
loss: 1.2029540538787842,grad_norm: 0.9999990756529435, iteration: 107195
loss: 1.0670359134674072,grad_norm: 0.999999259236462, iteration: 107196
loss: 1.2215617895126343,grad_norm: 0.9999993811906883, iteration: 107197
loss: 1.1441222429275513,grad_norm: 0.9999998286056128, iteration: 107198
loss: 1.03275465965271,grad_norm: 0.9999989608749763, iteration: 107199
loss: 1.1718372106552124,grad_norm: 0.9999996058894486, iteration: 107200
loss: 1.0715608596801758,grad_norm: 0.9999994478482939, iteration: 107201
loss: 1.0480226278305054,grad_norm: 0.9999992084060325, iteration: 107202
loss: 1.1320232152938843,grad_norm: 0.999999996173092, iteration: 107203
loss: 1.0383875370025635,grad_norm: 0.9999995413435363, iteration: 107204
loss: 1.0178933143615723,grad_norm: 0.8608093897194343, iteration: 107205
loss: 1.0672627687454224,grad_norm: 0.9999997083307561, iteration: 107206
loss: 1.0992200374603271,grad_norm: 0.9999992776087898, iteration: 107207
loss: 1.1481019258499146,grad_norm: 0.9999995417988994, iteration: 107208
loss: 1.0530993938446045,grad_norm: 0.9999996338395902, iteration: 107209
loss: 1.076106071472168,grad_norm: 0.9999999412490829, iteration: 107210
loss: 1.0183699131011963,grad_norm: 0.9999992293768606, iteration: 107211
loss: 1.0228573083877563,grad_norm: 0.9999990670229394, iteration: 107212
loss: 0.9983622431755066,grad_norm: 0.9344984135679814, iteration: 107213
loss: 1.0217931270599365,grad_norm: 0.9561367489444567, iteration: 107214
loss: 1.0045857429504395,grad_norm: 0.806512073474048, iteration: 107215
loss: 1.0344884395599365,grad_norm: 0.9999992489673597, iteration: 107216
loss: 1.248609185218811,grad_norm: 0.9999995520155404, iteration: 107217
loss: 1.082108974456787,grad_norm: 0.9999992616614476, iteration: 107218
loss: 1.0559419393539429,grad_norm: 0.9999991945938427, iteration: 107219
loss: 1.0907397270202637,grad_norm: 0.9999997854808627, iteration: 107220
loss: 0.9897887706756592,grad_norm: 0.9880299080225938, iteration: 107221
loss: 1.0238075256347656,grad_norm: 0.998970068525933, iteration: 107222
loss: 1.017383337020874,grad_norm: 0.9999993749728469, iteration: 107223
loss: 1.0668175220489502,grad_norm: 0.9999998837782519, iteration: 107224
loss: 1.1045637130737305,grad_norm: 0.9999991992950333, iteration: 107225
loss: 1.0479247570037842,grad_norm: 0.9999994179646271, iteration: 107226
loss: 1.0168057680130005,grad_norm: 0.9300111442459131, iteration: 107227
loss: 1.003553867340088,grad_norm: 0.9999992013970325, iteration: 107228
loss: 1.0172148942947388,grad_norm: 0.9999992435716896, iteration: 107229
loss: 1.0326541662216187,grad_norm: 0.9999998640042281, iteration: 107230
loss: 1.0163697004318237,grad_norm: 0.9999990492007711, iteration: 107231
loss: 1.0922638177871704,grad_norm: 0.9999990900300428, iteration: 107232
loss: 1.0908701419830322,grad_norm: 0.999999374924815, iteration: 107233
loss: 1.1066367626190186,grad_norm: 0.9999997636743662, iteration: 107234
loss: 1.0145535469055176,grad_norm: 0.9999991029774882, iteration: 107235
loss: 1.0367426872253418,grad_norm: 0.9999994747380401, iteration: 107236
loss: 0.9758106470108032,grad_norm: 0.9999990170559598, iteration: 107237
loss: 0.9920861124992371,grad_norm: 0.9870701590393667, iteration: 107238
loss: 1.0457276105880737,grad_norm: 0.9999995276253454, iteration: 107239
loss: 1.0626145601272583,grad_norm: 0.9999991423763448, iteration: 107240
loss: 1.051108956336975,grad_norm: 0.9999991446335103, iteration: 107241
loss: 1.1496658325195312,grad_norm: 0.999999750171772, iteration: 107242
loss: 1.0396382808685303,grad_norm: 0.9999991025181331, iteration: 107243
loss: 1.0097532272338867,grad_norm: 0.9435367745621728, iteration: 107244
loss: 1.0020400285720825,grad_norm: 0.9999990877700912, iteration: 107245
loss: 1.016998291015625,grad_norm: 0.9999991242525216, iteration: 107246
loss: 1.015485405921936,grad_norm: 0.8987715449365927, iteration: 107247
loss: 1.0344932079315186,grad_norm: 0.9851714362813929, iteration: 107248
loss: 1.0434699058532715,grad_norm: 0.9999993841331254, iteration: 107249
loss: 1.051425576210022,grad_norm: 0.9999994972564177, iteration: 107250
loss: 1.0161943435668945,grad_norm: 0.9999993786254244, iteration: 107251
loss: 1.014540195465088,grad_norm: 0.9999991373271473, iteration: 107252
loss: 1.0625463724136353,grad_norm: 0.999999817411576, iteration: 107253
loss: 1.1439768075942993,grad_norm: 0.9999998718074693, iteration: 107254
loss: 0.9701524972915649,grad_norm: 0.9999994277066601, iteration: 107255
loss: 1.0565335750579834,grad_norm: 1.0000000549995616, iteration: 107256
loss: 1.0458203554153442,grad_norm: 0.9999989291463439, iteration: 107257
loss: 1.09572172164917,grad_norm: 0.999999223948948, iteration: 107258
loss: 1.3104091882705688,grad_norm: 0.9999996969657169, iteration: 107259
loss: 1.0406603813171387,grad_norm: 0.9999991850833772, iteration: 107260
loss: 1.00590181350708,grad_norm: 0.9999990421559736, iteration: 107261
loss: 1.199212670326233,grad_norm: 0.9999995081564363, iteration: 107262
loss: 1.0925899744033813,grad_norm: 0.9999993364741511, iteration: 107263
loss: 1.0355567932128906,grad_norm: 0.9999991729822325, iteration: 107264
loss: 1.0154362916946411,grad_norm: 0.8167836336660778, iteration: 107265
loss: 1.0977107286453247,grad_norm: 0.9999998039293684, iteration: 107266
loss: 0.9778106808662415,grad_norm: 0.9999990521023943, iteration: 107267
loss: 1.0129207372665405,grad_norm: 0.9637887595344057, iteration: 107268
loss: 1.0779722929000854,grad_norm: 0.9999998716224481, iteration: 107269
loss: 1.0912771224975586,grad_norm: 0.9999999775971249, iteration: 107270
loss: 1.0493359565734863,grad_norm: 0.9587802635495049, iteration: 107271
loss: 1.1976311206817627,grad_norm: 0.9999992734672422, iteration: 107272
loss: 0.9998846054077148,grad_norm: 0.9999991657444663, iteration: 107273
loss: 1.0780417919158936,grad_norm: 0.999999336547193, iteration: 107274
loss: 1.1691884994506836,grad_norm: 0.9999998697721073, iteration: 107275
loss: 1.1474815607070923,grad_norm: 0.999999558586433, iteration: 107276
loss: 1.0206549167633057,grad_norm: 0.9999997679742775, iteration: 107277
loss: 1.049583077430725,grad_norm: 0.9999991978846445, iteration: 107278
loss: 1.0475574731826782,grad_norm: 0.883069992920325, iteration: 107279
loss: 1.1096622943878174,grad_norm: 0.9999997325137943, iteration: 107280
loss: 1.0497058629989624,grad_norm: 0.9999992587411111, iteration: 107281
loss: 1.0918989181518555,grad_norm: 0.9999991701970742, iteration: 107282
loss: 1.034014105796814,grad_norm: 0.9180962709525012, iteration: 107283
loss: 1.221054196357727,grad_norm: 0.9999992863867586, iteration: 107284
loss: 1.037755012512207,grad_norm: 0.99999952776204, iteration: 107285
loss: 1.1686593294143677,grad_norm: 0.9999992084006297, iteration: 107286
loss: 1.106225609779358,grad_norm: 1.0000000241330373, iteration: 107287
loss: 0.9953811764717102,grad_norm: 0.9657768956932418, iteration: 107288
loss: 1.0360407829284668,grad_norm: 0.9999991920844485, iteration: 107289
loss: 1.0737831592559814,grad_norm: 0.8916957430405708, iteration: 107290
loss: 1.0644038915634155,grad_norm: 1.0000000129009312, iteration: 107291
loss: 1.0684106349945068,grad_norm: 0.9809940349686476, iteration: 107292
loss: 1.1482524871826172,grad_norm: 0.9999991357312014, iteration: 107293
loss: 1.044075608253479,grad_norm: 0.9411171203771093, iteration: 107294
loss: 1.153485894203186,grad_norm: 0.9999998344720775, iteration: 107295
loss: 0.999626100063324,grad_norm: 0.8376108433108418, iteration: 107296
loss: 1.0607049465179443,grad_norm: 0.9999990866988151, iteration: 107297
loss: 1.1155781745910645,grad_norm: 0.9999992184937058, iteration: 107298
loss: 1.1704750061035156,grad_norm: 0.9999997708352428, iteration: 107299
loss: 1.0172570943832397,grad_norm: 0.9220756669670656, iteration: 107300
loss: 1.0240212678909302,grad_norm: 0.9236273081706682, iteration: 107301
loss: 1.0840110778808594,grad_norm: 0.9999996265430293, iteration: 107302
loss: 0.9764239192008972,grad_norm: 0.8465717768225102, iteration: 107303
loss: 0.998871922492981,grad_norm: 0.9267669726687057, iteration: 107304
loss: 1.0649631023406982,grad_norm: 0.9999991719606556, iteration: 107305
loss: 1.0853520631790161,grad_norm: 0.9999997199883941, iteration: 107306
loss: 1.0849106311798096,grad_norm: 0.9999993775568153, iteration: 107307
loss: 1.0583534240722656,grad_norm: 0.9999992773989999, iteration: 107308
loss: 1.1168476343154907,grad_norm: 0.9999993805823796, iteration: 107309
loss: 1.0560524463653564,grad_norm: 0.999999144479707, iteration: 107310
loss: 1.089501142501831,grad_norm: 0.9999992116550647, iteration: 107311
loss: 1.118794560432434,grad_norm: 0.9999994404295723, iteration: 107312
loss: 1.034685492515564,grad_norm: 0.8567189907233455, iteration: 107313
loss: 1.0736569166183472,grad_norm: 0.999999060801631, iteration: 107314
loss: 0.9943980574607849,grad_norm: 0.8330696093993909, iteration: 107315
loss: 1.0340795516967773,grad_norm: 0.9999991533365988, iteration: 107316
loss: 1.0469216108322144,grad_norm: 0.9372935652964406, iteration: 107317
loss: 1.0290825366973877,grad_norm: 0.9999994479616149, iteration: 107318
loss: 1.0273308753967285,grad_norm: 0.9999990745747626, iteration: 107319
loss: 1.004565715789795,grad_norm: 0.7987212953506502, iteration: 107320
loss: 1.0264257192611694,grad_norm: 0.9999999138731169, iteration: 107321
loss: 1.0150809288024902,grad_norm: 0.9969399867020063, iteration: 107322
loss: 1.129632830619812,grad_norm: 0.9999997429471678, iteration: 107323
loss: 1.034691572189331,grad_norm: 0.999999488688709, iteration: 107324
loss: 0.9892886877059937,grad_norm: 0.9436641237565517, iteration: 107325
loss: 1.1000142097473145,grad_norm: 0.9999994955003335, iteration: 107326
loss: 0.9962790608406067,grad_norm: 0.9980105177730633, iteration: 107327
loss: 1.0956190824508667,grad_norm: 0.9999992836986374, iteration: 107328
loss: 1.0799885988235474,grad_norm: 0.9999992999632356, iteration: 107329
loss: 1.0885554552078247,grad_norm: 0.9999996681986745, iteration: 107330
loss: 1.1094377040863037,grad_norm: 0.9999994906819287, iteration: 107331
loss: 1.057590365409851,grad_norm: 0.9261774844100741, iteration: 107332
loss: 1.0397340059280396,grad_norm: 0.9409205162916272, iteration: 107333
loss: 1.032313346862793,grad_norm: 0.9999997800898321, iteration: 107334
loss: 1.048041820526123,grad_norm: 0.9999992550937875, iteration: 107335
loss: 1.0269124507904053,grad_norm: 0.9999999362962706, iteration: 107336
loss: 1.0815842151641846,grad_norm: 0.9999999781162567, iteration: 107337
loss: 1.0707727670669556,grad_norm: 0.9999991904192425, iteration: 107338
loss: 1.049918532371521,grad_norm: 0.999999223425811, iteration: 107339
loss: 1.0640578269958496,grad_norm: 0.9999994767754191, iteration: 107340
loss: 1.1001574993133545,grad_norm: 0.99999967348878, iteration: 107341
loss: 0.980587899684906,grad_norm: 0.9023743322814436, iteration: 107342
loss: 1.1732425689697266,grad_norm: 0.9999998292394069, iteration: 107343
loss: 1.0773897171020508,grad_norm: 0.9999990327498505, iteration: 107344
loss: 1.061740756034851,grad_norm: 0.9999990957260512, iteration: 107345
loss: 1.0234177112579346,grad_norm: 0.9999999450380801, iteration: 107346
loss: 1.0067657232284546,grad_norm: 0.9999991657576053, iteration: 107347
loss: 1.2005531787872314,grad_norm: 0.9999992241260467, iteration: 107348
loss: 1.0140037536621094,grad_norm: 0.9999992331507778, iteration: 107349
loss: 1.1404368877410889,grad_norm: 0.9999998315771125, iteration: 107350
loss: 1.0387049913406372,grad_norm: 0.9327195984120128, iteration: 107351
loss: 1.0354337692260742,grad_norm: 0.9999997828500647, iteration: 107352
loss: 1.036435842514038,grad_norm: 0.9999997344402544, iteration: 107353
loss: 1.0132200717926025,grad_norm: 0.9999992597832441, iteration: 107354
loss: 1.0312514305114746,grad_norm: 0.9999996342863429, iteration: 107355
loss: 1.0831722021102905,grad_norm: 0.9999994014971054, iteration: 107356
loss: 1.1356278657913208,grad_norm: 0.9786940727767596, iteration: 107357
loss: 1.103505253791809,grad_norm: 0.9999998291938919, iteration: 107358
loss: 1.0067832469940186,grad_norm: 0.7173892782325757, iteration: 107359
loss: 0.9901939034461975,grad_norm: 0.9999990552161917, iteration: 107360
loss: 1.0465255975723267,grad_norm: 0.99999917985475, iteration: 107361
loss: 1.0346393585205078,grad_norm: 0.9999993217991557, iteration: 107362
loss: 1.0216152667999268,grad_norm: 0.9348799397330024, iteration: 107363
loss: 1.0122509002685547,grad_norm: 0.9999991943557769, iteration: 107364
loss: 1.0956931114196777,grad_norm: 0.9999998140785717, iteration: 107365
loss: 1.1587945222854614,grad_norm: 0.9999996839897173, iteration: 107366
loss: 0.9900675415992737,grad_norm: 0.9999990599741512, iteration: 107367
loss: 1.0751471519470215,grad_norm: 0.9999996235637234, iteration: 107368
loss: 1.0831843614578247,grad_norm: 0.999999582850281, iteration: 107369
loss: 0.9724636673927307,grad_norm: 0.7968281339804327, iteration: 107370
loss: 1.0009180307388306,grad_norm: 0.9835072064397753, iteration: 107371
loss: 1.032928228378296,grad_norm: 0.999999130067521, iteration: 107372
loss: 1.13792884349823,grad_norm: 0.9999996008870771, iteration: 107373
loss: 1.0195770263671875,grad_norm: 0.9012524652236878, iteration: 107374
loss: 1.003919243812561,grad_norm: 0.9999998719376432, iteration: 107375
loss: 1.0248429775238037,grad_norm: 0.9999998717868536, iteration: 107376
loss: 1.1396821737289429,grad_norm: 0.999999105202834, iteration: 107377
loss: 1.0207768678665161,grad_norm: 0.8350894139947944, iteration: 107378
loss: 1.0299129486083984,grad_norm: 0.9999992851799387, iteration: 107379
loss: 1.0850942134857178,grad_norm: 0.9999995280168322, iteration: 107380
loss: 1.0709223747253418,grad_norm: 0.9999994644625557, iteration: 107381
loss: 1.1419026851654053,grad_norm: 0.9999992420007819, iteration: 107382
loss: 1.0411381721496582,grad_norm: 0.9999992033077737, iteration: 107383
loss: 1.0251529216766357,grad_norm: 0.975665533283224, iteration: 107384
loss: 1.0333991050720215,grad_norm: 0.9999994500852277, iteration: 107385
loss: 1.0637884140014648,grad_norm: 0.9486190573044228, iteration: 107386
loss: 1.0199189186096191,grad_norm: 0.9352099905275271, iteration: 107387
loss: 1.0416359901428223,grad_norm: 0.815505445870209, iteration: 107388
loss: 1.0133943557739258,grad_norm: 0.9999997917537906, iteration: 107389
loss: 1.0946226119995117,grad_norm: 0.8858406464738853, iteration: 107390
loss: 1.038764238357544,grad_norm: 0.9999993986958544, iteration: 107391
loss: 1.0659644603729248,grad_norm: 0.9999990752910143, iteration: 107392
loss: 1.0284370183944702,grad_norm: 0.8693856067353632, iteration: 107393
loss: 1.097888469696045,grad_norm: 0.9999997014639156, iteration: 107394
loss: 1.0187238454818726,grad_norm: 0.9999997777910756, iteration: 107395
loss: 1.0241029262542725,grad_norm: 0.9999991844951657, iteration: 107396
loss: 1.0604944229125977,grad_norm: 0.9999997996041164, iteration: 107397
loss: 1.0819002389907837,grad_norm: 0.9999995688275304, iteration: 107398
loss: 1.0526942014694214,grad_norm: 0.9712946551207183, iteration: 107399
loss: 1.1066129207611084,grad_norm: 0.9999994647371933, iteration: 107400
loss: 1.1203699111938477,grad_norm: 0.9999998661103231, iteration: 107401
loss: 1.0276257991790771,grad_norm: 0.9999993593095955, iteration: 107402
loss: 1.1456847190856934,grad_norm: 0.9661410487624552, iteration: 107403
loss: 1.0258054733276367,grad_norm: 0.9999991341320115, iteration: 107404
loss: 1.0411560535430908,grad_norm: 0.9999994298713315, iteration: 107405
loss: 1.0881717205047607,grad_norm: 0.9999992759634747, iteration: 107406
loss: 1.0465517044067383,grad_norm: 0.9999992378740156, iteration: 107407
loss: 1.0474928617477417,grad_norm: 0.9999994405375721, iteration: 107408
loss: 1.1082185506820679,grad_norm: 0.9999999960411945, iteration: 107409
loss: 1.1117186546325684,grad_norm: 0.9999995127179919, iteration: 107410
loss: 1.0579973459243774,grad_norm: 0.9999991829642598, iteration: 107411
loss: 1.0890461206436157,grad_norm: 0.964700145765786, iteration: 107412
loss: 0.9944453835487366,grad_norm: 0.8927986198046504, iteration: 107413
loss: 1.007889986038208,grad_norm: 0.9431318679050488, iteration: 107414
loss: 1.0299699306488037,grad_norm: 0.9181589435275224, iteration: 107415
loss: 1.0251127481460571,grad_norm: 0.9999996720183504, iteration: 107416
loss: 1.1139695644378662,grad_norm: 0.9999998517831827, iteration: 107417
loss: 1.0839684009552002,grad_norm: 0.9999993372521586, iteration: 107418
loss: 1.000091314315796,grad_norm: 0.9999998516325439, iteration: 107419
loss: 1.038049340248108,grad_norm: 0.9999994092655393, iteration: 107420
loss: 1.0227246284484863,grad_norm: 0.9999992377223772, iteration: 107421
loss: 1.0299854278564453,grad_norm: 0.9999991932255026, iteration: 107422
loss: 1.0224277973175049,grad_norm: 0.852233004116526, iteration: 107423
loss: 1.030999779701233,grad_norm: 0.9999994815052485, iteration: 107424
loss: 0.9875856637954712,grad_norm: 0.9392313535051464, iteration: 107425
loss: 1.0940624475479126,grad_norm: 0.9999993876119225, iteration: 107426
loss: 1.045170545578003,grad_norm: 0.9999998570331536, iteration: 107427
loss: 1.069388508796692,grad_norm: 0.9999997060404335, iteration: 107428
loss: 0.9932563304901123,grad_norm: 0.8687266252496948, iteration: 107429
loss: 1.0162169933319092,grad_norm: 0.9999991092098596, iteration: 107430
loss: 1.1096426248550415,grad_norm: 0.99999940520382, iteration: 107431
loss: 1.0331686735153198,grad_norm: 0.9999995210796268, iteration: 107432
loss: 1.0216807126998901,grad_norm: 0.9999992481189941, iteration: 107433
loss: 1.071798324584961,grad_norm: 0.9999991810417482, iteration: 107434
loss: 1.0411745309829712,grad_norm: 0.9999990291783322, iteration: 107435
loss: 1.0654816627502441,grad_norm: 0.9999990909980561, iteration: 107436
loss: 1.0848890542984009,grad_norm: 0.9999998475404632, iteration: 107437
loss: 1.021351933479309,grad_norm: 0.794322550309685, iteration: 107438
loss: 1.05082106590271,grad_norm: 0.9999998944663575, iteration: 107439
loss: 1.0698508024215698,grad_norm: 0.9999993754519408, iteration: 107440
loss: 1.0122023820877075,grad_norm: 0.8681894885143212, iteration: 107441
loss: 1.0081297159194946,grad_norm: 0.9999990191491033, iteration: 107442
loss: 1.1045125722885132,grad_norm: 0.9999998052986432, iteration: 107443
loss: 1.0208454132080078,grad_norm: 0.8963100049670942, iteration: 107444
loss: 0.9721082448959351,grad_norm: 0.8079923807066886, iteration: 107445
loss: 1.0077906847000122,grad_norm: 0.9999990688392223, iteration: 107446
loss: 1.1017390489578247,grad_norm: 0.9999992331232478, iteration: 107447
loss: 1.183081865310669,grad_norm: 0.9999996331601811, iteration: 107448
loss: 1.0459165573120117,grad_norm: 0.8866519267466952, iteration: 107449
loss: 1.2108949422836304,grad_norm: 0.9999994965109961, iteration: 107450
loss: 1.0287833213806152,grad_norm: 0.9999990973435732, iteration: 107451
loss: 0.9928773045539856,grad_norm: 0.7578892632813539, iteration: 107452
loss: 1.014365315437317,grad_norm: 0.9999997527318035, iteration: 107453
loss: 1.1007333993911743,grad_norm: 0.9999994843283075, iteration: 107454
loss: 1.0870254039764404,grad_norm: 0.9999997881614923, iteration: 107455
loss: 1.0208399295806885,grad_norm: 0.999999201255615, iteration: 107456
loss: 1.0956443548202515,grad_norm: 0.9999993822348627, iteration: 107457
loss: 1.040097951889038,grad_norm: 0.9018975667046599, iteration: 107458
loss: 1.257033109664917,grad_norm: 0.9999998402992741, iteration: 107459
loss: 1.0287379026412964,grad_norm: 0.8345469738945008, iteration: 107460
loss: 1.0150574445724487,grad_norm: 0.9999991846035476, iteration: 107461
loss: 1.0611317157745361,grad_norm: 0.9999992948695864, iteration: 107462
loss: 1.0130211114883423,grad_norm: 0.9999990732622799, iteration: 107463
loss: 1.1984835863113403,grad_norm: 0.9999995561919706, iteration: 107464
loss: 1.0415242910385132,grad_norm: 0.9999996126367645, iteration: 107465
loss: 1.0259019136428833,grad_norm: 0.9471849124255668, iteration: 107466
loss: 1.0451289415359497,grad_norm: 0.9999999646343396, iteration: 107467
loss: 1.0582830905914307,grad_norm: 0.9999998618352511, iteration: 107468
loss: 1.1720366477966309,grad_norm: 0.9999996675272105, iteration: 107469
loss: 1.07392418384552,grad_norm: 0.9999996439266469, iteration: 107470
loss: 1.0883829593658447,grad_norm: 0.9999996123158501, iteration: 107471
loss: 1.0169309377670288,grad_norm: 0.9999991365801227, iteration: 107472
loss: 0.9961391687393188,grad_norm: 0.8785352418466651, iteration: 107473
loss: 1.055220365524292,grad_norm: 0.9999999253695064, iteration: 107474
loss: 1.1233201026916504,grad_norm: 0.999999238671193, iteration: 107475
loss: 1.1548701524734497,grad_norm: 0.9999993937028774, iteration: 107476
loss: 1.276201844215393,grad_norm: 0.9999998012045632, iteration: 107477
loss: 1.0819898843765259,grad_norm: 0.9999996552164514, iteration: 107478
loss: 1.0461554527282715,grad_norm: 0.9999994374465557, iteration: 107479
loss: 1.0022273063659668,grad_norm: 0.9504695030542719, iteration: 107480
loss: 1.061474323272705,grad_norm: 0.9999991091194445, iteration: 107481
loss: 1.2772889137268066,grad_norm: 0.999999855831577, iteration: 107482
loss: 1.1153697967529297,grad_norm: 0.999999744935209, iteration: 107483
loss: 1.0429508686065674,grad_norm: 0.9999997473796204, iteration: 107484
loss: 1.1558412313461304,grad_norm: 0.9999999618663922, iteration: 107485
loss: 1.1362522840499878,grad_norm: 0.9999994257447931, iteration: 107486
loss: 0.9920881986618042,grad_norm: 0.9999998602621103, iteration: 107487
loss: 1.0476588010787964,grad_norm: 0.9999996843767247, iteration: 107488
loss: 1.1497608423233032,grad_norm: 0.9999998432262995, iteration: 107489
loss: 1.0073530673980713,grad_norm: 0.9999998084451415, iteration: 107490
loss: 1.0213122367858887,grad_norm: 0.9999991040025441, iteration: 107491
loss: 0.9998658895492554,grad_norm: 0.8736788961063195, iteration: 107492
loss: 1.0228742361068726,grad_norm: 0.8093972607389363, iteration: 107493
loss: 0.979694664478302,grad_norm: 0.8846860943672876, iteration: 107494
loss: 1.018243670463562,grad_norm: 0.9999991351751492, iteration: 107495
loss: 1.012050986289978,grad_norm: 0.8569199298180429, iteration: 107496
loss: 1.0763580799102783,grad_norm: 0.9523609384344339, iteration: 107497
loss: 1.0051528215408325,grad_norm: 0.8636927760724208, iteration: 107498
loss: 1.0055720806121826,grad_norm: 0.99999923002254, iteration: 107499
loss: 1.0247070789337158,grad_norm: 0.9999995721742511, iteration: 107500
loss: 1.058851957321167,grad_norm: 0.8570839479551585, iteration: 107501
loss: 1.0202356576919556,grad_norm: 0.9999992849898252, iteration: 107502
loss: 1.0636134147644043,grad_norm: 0.9999992423239739, iteration: 107503
loss: 1.0051112174987793,grad_norm: 0.8917909744709448, iteration: 107504
loss: 1.0750492811203003,grad_norm: 0.9999995157018445, iteration: 107505
loss: 0.9984121918678284,grad_norm: 0.9999990491042381, iteration: 107506
loss: 1.0271761417388916,grad_norm: 0.9999992447500194, iteration: 107507
loss: 1.312070369720459,grad_norm: 0.9999993280359953, iteration: 107508
loss: 1.1299567222595215,grad_norm: 0.9999992562322104, iteration: 107509
loss: 1.0331367254257202,grad_norm: 0.999999543970234, iteration: 107510
loss: 1.0283215045928955,grad_norm: 0.80177439890175, iteration: 107511
loss: 1.1960636377334595,grad_norm: 0.9999993571387346, iteration: 107512
loss: 1.1861610412597656,grad_norm: 0.9999997303738669, iteration: 107513
loss: 1.0301527976989746,grad_norm: 0.9566604693688193, iteration: 107514
loss: 1.3146724700927734,grad_norm: 0.9999995830107428, iteration: 107515
loss: 1.1557241678237915,grad_norm: 0.999999131606661, iteration: 107516
loss: 1.0432319641113281,grad_norm: 0.9999990461408844, iteration: 107517
loss: 1.0509333610534668,grad_norm: 0.999999203277002, iteration: 107518
loss: 0.9744713306427002,grad_norm: 0.8466261385365478, iteration: 107519
loss: 1.196320652961731,grad_norm: 0.9999991473607489, iteration: 107520
loss: 1.1389386653900146,grad_norm: 0.999999372066685, iteration: 107521
loss: 1.0686602592468262,grad_norm: 0.9999994286151014, iteration: 107522
loss: 1.0734328031539917,grad_norm: 0.9999998902143958, iteration: 107523
loss: 1.049875259399414,grad_norm: 0.9999996409328109, iteration: 107524
loss: 1.0122400522232056,grad_norm: 0.9219353469020933, iteration: 107525
loss: 1.083994746208191,grad_norm: 0.9986369678985794, iteration: 107526
loss: 0.9821106195449829,grad_norm: 0.926922441758357, iteration: 107527
loss: 0.9794577956199646,grad_norm: 0.999998989532173, iteration: 107528
loss: 1.0447494983673096,grad_norm: 0.9999999635289225, iteration: 107529
loss: 1.125129222869873,grad_norm: 0.9999994684760716, iteration: 107530
loss: 0.9990870356559753,grad_norm: 0.8665795011137627, iteration: 107531
loss: 1.0201575756072998,grad_norm: 0.9999992233995888, iteration: 107532
loss: 1.0307519435882568,grad_norm: 0.9999997226062902, iteration: 107533
loss: 1.0204799175262451,grad_norm: 0.8913316235146456, iteration: 107534
loss: 1.030083417892456,grad_norm: 0.9999992378554632, iteration: 107535
loss: 1.0280067920684814,grad_norm: 0.9999991735833729, iteration: 107536
loss: 1.100681185722351,grad_norm: 0.9999996451454277, iteration: 107537
loss: 0.9615870714187622,grad_norm: 0.9550610536362549, iteration: 107538
loss: 1.0100079774856567,grad_norm: 0.8692737164273519, iteration: 107539
loss: 1.0432583093643188,grad_norm: 0.9999996782936765, iteration: 107540
loss: 1.2168946266174316,grad_norm: 0.9999997694406502, iteration: 107541
loss: 1.1018685102462769,grad_norm: 0.9999995089394409, iteration: 107542
loss: 1.0484999418258667,grad_norm: 0.9999996003432262, iteration: 107543
loss: 1.0646458864212036,grad_norm: 0.9096000276103239, iteration: 107544
loss: 1.037602186203003,grad_norm: 0.9999999544689699, iteration: 107545
loss: 1.031398057937622,grad_norm: 0.8941105927976498, iteration: 107546
loss: 1.0817152261734009,grad_norm: 0.9999992180694742, iteration: 107547
loss: 0.9693102240562439,grad_norm: 0.7476389607310089, iteration: 107548
loss: 1.261590838432312,grad_norm: 0.9999994382012369, iteration: 107549
loss: 1.1365031003952026,grad_norm: 0.999999579243935, iteration: 107550
loss: 0.9884653687477112,grad_norm: 0.9630084366528421, iteration: 107551
loss: 1.0067553520202637,grad_norm: 0.940037760746637, iteration: 107552
loss: 0.9854575991630554,grad_norm: 0.9999992089684185, iteration: 107553
loss: 1.0613511800765991,grad_norm: 0.9999998201801317, iteration: 107554
loss: 1.009168028831482,grad_norm: 0.9999991409591548, iteration: 107555
loss: 1.1544257402420044,grad_norm: 0.9999993935189767, iteration: 107556
loss: 1.0398718118667603,grad_norm: 0.8118638392042332, iteration: 107557
loss: 0.9634973406791687,grad_norm: 0.7654273717685618, iteration: 107558
loss: 1.1198711395263672,grad_norm: 0.9999992345090981, iteration: 107559
loss: 1.0331555604934692,grad_norm: 0.9718904371624536, iteration: 107560
loss: 1.0069810152053833,grad_norm: 0.9796978369153176, iteration: 107561
loss: 0.9786712527275085,grad_norm: 0.8812643966429329, iteration: 107562
loss: 1.1360461711883545,grad_norm: 0.999999145841106, iteration: 107563
loss: 0.9441685676574707,grad_norm: 0.9355495480188701, iteration: 107564
loss: 0.9919766187667847,grad_norm: 0.9999992575675616, iteration: 107565
loss: 1.0340888500213623,grad_norm: 0.8670678785517968, iteration: 107566
loss: 1.009201169013977,grad_norm: 0.9685493279074098, iteration: 107567
loss: 0.9966906905174255,grad_norm: 0.8728654178398902, iteration: 107568
loss: 1.0146888494491577,grad_norm: 0.9999991699882932, iteration: 107569
loss: 1.1121842861175537,grad_norm: 0.9945976715230836, iteration: 107570
loss: 1.107386589050293,grad_norm: 0.9999994991486651, iteration: 107571
loss: 1.0070199966430664,grad_norm: 0.9959096493669434, iteration: 107572
loss: 1.0063754320144653,grad_norm: 0.9999990592995358, iteration: 107573
loss: 1.021407127380371,grad_norm: 0.7418177332096839, iteration: 107574
loss: 1.0177136659622192,grad_norm: 0.9051905193748078, iteration: 107575
loss: 1.0383778810501099,grad_norm: 0.9836752175734997, iteration: 107576
loss: 0.9774097800254822,grad_norm: 0.8546377297772163, iteration: 107577
loss: 1.0055108070373535,grad_norm: 0.7874594793238346, iteration: 107578
loss: 1.1211268901824951,grad_norm: 0.9999999975055628, iteration: 107579
loss: 1.1298434734344482,grad_norm: 0.9999995156033995, iteration: 107580
loss: 1.04792320728302,grad_norm: 0.9999993201371568, iteration: 107581
loss: 1.0180171728134155,grad_norm: 0.9928589463315286, iteration: 107582
loss: 1.034130334854126,grad_norm: 0.9999997437958091, iteration: 107583
loss: 0.9903907775878906,grad_norm: 0.8511718523594196, iteration: 107584
loss: 1.103268027305603,grad_norm: 0.9999998541763347, iteration: 107585
loss: 1.0683757066726685,grad_norm: 0.8967128987560278, iteration: 107586
loss: 1.037922978401184,grad_norm: 0.9999990656118389, iteration: 107587
loss: 0.9963036775588989,grad_norm: 0.9332128904520315, iteration: 107588
loss: 1.0042831897735596,grad_norm: 0.8976246710380462, iteration: 107589
loss: 1.047494649887085,grad_norm: 0.9999990787569122, iteration: 107590
loss: 0.9496728181838989,grad_norm: 0.9122779863891615, iteration: 107591
loss: 1.0392283201217651,grad_norm: 0.9842326994650634, iteration: 107592
loss: 1.0069726705551147,grad_norm: 0.9354289533584357, iteration: 107593
loss: 1.01641845703125,grad_norm: 0.7389201622243855, iteration: 107594
loss: 1.0222545862197876,grad_norm: 0.9577082959404306, iteration: 107595
loss: 1.043429970741272,grad_norm: 0.9999991813482377, iteration: 107596
loss: 0.9698488712310791,grad_norm: 0.9852372762622569, iteration: 107597
loss: 1.0908952951431274,grad_norm: 0.9999991368199475, iteration: 107598
loss: 1.0741320848464966,grad_norm: 0.9999995391451529, iteration: 107599
loss: 1.0077003240585327,grad_norm: 0.999999138328124, iteration: 107600
loss: 1.0326180458068848,grad_norm: 0.9643143035937822, iteration: 107601
loss: 1.030094027519226,grad_norm: 0.9515531499454486, iteration: 107602
loss: 1.0451178550720215,grad_norm: 0.999999284423979, iteration: 107603
loss: 1.0097993612289429,grad_norm: 0.8732987828394385, iteration: 107604
loss: 1.0742968320846558,grad_norm: 0.9999996520871505, iteration: 107605
loss: 1.035506248474121,grad_norm: 0.9913179935071117, iteration: 107606
loss: 1.0078859329223633,grad_norm: 0.9730880829439897, iteration: 107607
loss: 1.0403594970703125,grad_norm: 0.9234511967184009, iteration: 107608
loss: 1.0106734037399292,grad_norm: 0.924613349612479, iteration: 107609
loss: 1.0627814531326294,grad_norm: 0.9999993298896324, iteration: 107610
loss: 0.9740496277809143,grad_norm: 0.999999550834988, iteration: 107611
loss: 0.981390118598938,grad_norm: 0.9999991673411383, iteration: 107612
loss: 1.0656949281692505,grad_norm: 0.9999990730136652, iteration: 107613
loss: 1.0822807550430298,grad_norm: 0.984308295741099, iteration: 107614
loss: 1.0206964015960693,grad_norm: 0.9999993006851389, iteration: 107615
loss: 1.0299724340438843,grad_norm: 0.9999991173645641, iteration: 107616
loss: 1.0875403881072998,grad_norm: 0.9999995429765894, iteration: 107617
loss: 1.0081008672714233,grad_norm: 0.9370180274037798, iteration: 107618
loss: 1.0189117193222046,grad_norm: 0.9999997250161332, iteration: 107619
loss: 1.0075714588165283,grad_norm: 0.9999992340192901, iteration: 107620
loss: 0.9887376427650452,grad_norm: 0.9297406996317508, iteration: 107621
loss: 0.9995412826538086,grad_norm: 0.8753525209615369, iteration: 107622
loss: 1.0232855081558228,grad_norm: 0.811428074934357, iteration: 107623
loss: 1.0648689270019531,grad_norm: 0.9999991763476066, iteration: 107624
loss: 1.013710379600525,grad_norm: 0.8318225983340983, iteration: 107625
loss: 1.023169755935669,grad_norm: 0.7742432709820953, iteration: 107626
loss: 1.0268322229385376,grad_norm: 0.9099514930970563, iteration: 107627
loss: 1.0163270235061646,grad_norm: 0.9999990743929227, iteration: 107628
loss: 1.0501049757003784,grad_norm: 0.9999994621838594, iteration: 107629
loss: 1.1827985048294067,grad_norm: 0.9999999329094917, iteration: 107630
loss: 1.0264980792999268,grad_norm: 0.973111087333658, iteration: 107631
loss: 1.0989816188812256,grad_norm: 0.9999996329247589, iteration: 107632
loss: 1.0916893482208252,grad_norm: 0.9999998853779057, iteration: 107633
loss: 1.0163220167160034,grad_norm: 0.7842211313787258, iteration: 107634
loss: 1.0560381412506104,grad_norm: 0.9999992241784585, iteration: 107635
loss: 1.097866415977478,grad_norm: 0.9143392591284397, iteration: 107636
loss: 1.0006541013717651,grad_norm: 0.8499579946272748, iteration: 107637
loss: 1.1776270866394043,grad_norm: 0.9999997138620034, iteration: 107638
loss: 1.0505508184432983,grad_norm: 0.9999998499341192, iteration: 107639
loss: 1.0064623355865479,grad_norm: 0.790844363122862, iteration: 107640
loss: 1.0298805236816406,grad_norm: 0.9999997369300708, iteration: 107641
loss: 0.9804337620735168,grad_norm: 0.8245066507841048, iteration: 107642
loss: 0.9660620093345642,grad_norm: 0.9828271618100071, iteration: 107643
loss: 0.9900070428848267,grad_norm: 0.8756727042467509, iteration: 107644
loss: 1.0393604040145874,grad_norm: 0.8761639449718049, iteration: 107645
loss: 1.0769007205963135,grad_norm: 0.9999990490009686, iteration: 107646
loss: 1.159620761871338,grad_norm: 0.9999993184042039, iteration: 107647
loss: 1.0798053741455078,grad_norm: 0.999999184082343, iteration: 107648
loss: 1.0621083974838257,grad_norm: 0.8038557278164293, iteration: 107649
loss: 0.9636679887771606,grad_norm: 0.8100456038433602, iteration: 107650
loss: 0.9789876937866211,grad_norm: 0.9628909091218911, iteration: 107651
loss: 1.1334596872329712,grad_norm: 0.9999999092822168, iteration: 107652
loss: 1.00624680519104,grad_norm: 0.8088616283178748, iteration: 107653
loss: 1.0700438022613525,grad_norm: 0.9999992888022684, iteration: 107654
loss: 1.1044739484786987,grad_norm: 0.999999229903048, iteration: 107655
loss: 1.0250725746154785,grad_norm: 0.9999991626179375, iteration: 107656
loss: 1.0315464735031128,grad_norm: 0.9132599924844503, iteration: 107657
loss: 1.0070537328720093,grad_norm: 0.9999991505782357, iteration: 107658
loss: 1.0188605785369873,grad_norm: 0.9999993795717526, iteration: 107659
loss: 1.1074045896530151,grad_norm: 0.9999999049462807, iteration: 107660
loss: 0.9888139963150024,grad_norm: 0.9999993459353801, iteration: 107661
loss: 1.0201518535614014,grad_norm: 0.8697037179891304, iteration: 107662
loss: 1.109569787979126,grad_norm: 0.9999990454355616, iteration: 107663
loss: 1.0461418628692627,grad_norm: 0.7545336897129394, iteration: 107664
loss: 1.068374514579773,grad_norm: 0.8693827946175016, iteration: 107665
loss: 1.0195063352584839,grad_norm: 1.0000000281115302, iteration: 107666
loss: 1.0447590351104736,grad_norm: 0.9999991763396371, iteration: 107667
loss: 0.9999067783355713,grad_norm: 0.9999991218588206, iteration: 107668
loss: 1.0292277336120605,grad_norm: 0.8391164356303018, iteration: 107669
loss: 1.0277349948883057,grad_norm: 0.9999991378029977, iteration: 107670
loss: 1.0228443145751953,grad_norm: 0.9521537088946886, iteration: 107671
loss: 1.1056374311447144,grad_norm: 0.9999996389536749, iteration: 107672
loss: 1.0178853273391724,grad_norm: 0.9812456812213777, iteration: 107673
loss: 0.9927664399147034,grad_norm: 0.9575675590795575, iteration: 107674
loss: 1.0323699712753296,grad_norm: 0.9999991938322256, iteration: 107675
loss: 1.0203280448913574,grad_norm: 0.8581124697280322, iteration: 107676
loss: 1.0051063299179077,grad_norm: 0.8929640104572634, iteration: 107677
loss: 1.0962754487991333,grad_norm: 0.9668947459457071, iteration: 107678
loss: 1.0502263307571411,grad_norm: 0.9122638437873691, iteration: 107679
loss: 1.0506420135498047,grad_norm: 0.999999059404718, iteration: 107680
loss: 0.9944086670875549,grad_norm: 0.9999994227401742, iteration: 107681
loss: 1.0344748497009277,grad_norm: 0.9856283828459796, iteration: 107682
loss: 1.0226482152938843,grad_norm: 0.9999995887911967, iteration: 107683
loss: 1.161124348640442,grad_norm: 0.9999996951988402, iteration: 107684
loss: 1.0533331632614136,grad_norm: 0.9999994981644437, iteration: 107685
loss: 0.9624784588813782,grad_norm: 0.9237794634031921, iteration: 107686
loss: 0.9891204237937927,grad_norm: 0.9999992236696569, iteration: 107687
loss: 1.0547410249710083,grad_norm: 0.9999999949927754, iteration: 107688
loss: 1.005251407623291,grad_norm: 0.8590059593788089, iteration: 107689
loss: 1.0066009759902954,grad_norm: 0.8573955214749077, iteration: 107690
loss: 0.9795777201652527,grad_norm: 0.7833246533263702, iteration: 107691
loss: 0.9798648357391357,grad_norm: 0.7325672221257288, iteration: 107692
loss: 1.0528407096862793,grad_norm: 0.999999752827163, iteration: 107693
loss: 0.9940548539161682,grad_norm: 0.9999994360831759, iteration: 107694
loss: 1.0026941299438477,grad_norm: 0.9142020055196676, iteration: 107695
loss: 1.0605237483978271,grad_norm: 0.9999997831862647, iteration: 107696
loss: 1.0668026208877563,grad_norm: 0.9869612996596389, iteration: 107697
loss: 1.0159252882003784,grad_norm: 0.9582738418103746, iteration: 107698
loss: 1.0084112882614136,grad_norm: 0.9152657478079707, iteration: 107699
loss: 1.0147253274917603,grad_norm: 0.900202427981673, iteration: 107700
loss: 1.049659013748169,grad_norm: 0.9999992482664586, iteration: 107701
loss: 1.0268560647964478,grad_norm: 0.8819975617935538, iteration: 107702
loss: 1.042038083076477,grad_norm: 0.8361757081368312, iteration: 107703
loss: 1.000827431678772,grad_norm: 0.7833745441574015, iteration: 107704
loss: 1.0539873838424683,grad_norm: 0.9999992017121591, iteration: 107705
loss: 0.9803736805915833,grad_norm: 0.9109593161417062, iteration: 107706
loss: 1.0354112386703491,grad_norm: 0.9999992254870793, iteration: 107707
loss: 1.0293076038360596,grad_norm: 0.9999997786164166, iteration: 107708
loss: 0.9861738681793213,grad_norm: 0.9140727688349672, iteration: 107709
loss: 1.1313661336898804,grad_norm: 0.9999996659613801, iteration: 107710
loss: 0.980400562286377,grad_norm: 0.972516692972323, iteration: 107711
loss: 1.050679087638855,grad_norm: 0.9999989737369951, iteration: 107712
loss: 1.0529141426086426,grad_norm: 0.9999990976970528, iteration: 107713
loss: 1.0382837057113647,grad_norm: 0.999999000190217, iteration: 107714
loss: 1.0354868173599243,grad_norm: 0.9999998537340264, iteration: 107715
loss: 1.0037760734558105,grad_norm: 0.9859618590778554, iteration: 107716
loss: 1.013259768486023,grad_norm: 0.9999994044131397, iteration: 107717
loss: 1.1142594814300537,grad_norm: 0.999999358753665, iteration: 107718
loss: 0.992094099521637,grad_norm: 0.880147062373849, iteration: 107719
loss: 1.0879571437835693,grad_norm: 0.9999991744273128, iteration: 107720
loss: 0.9716068506240845,grad_norm: 0.9489535898304956, iteration: 107721
loss: 0.9676411151885986,grad_norm: 0.9999991575126977, iteration: 107722
loss: 0.9939061999320984,grad_norm: 0.9999990476538932, iteration: 107723
loss: 1.0589760541915894,grad_norm: 0.9797293800491164, iteration: 107724
loss: 1.027816891670227,grad_norm: 0.9999991999199418, iteration: 107725
loss: 1.0077356100082397,grad_norm: 0.9586992785951155, iteration: 107726
loss: 1.0168428421020508,grad_norm: 0.9999990966321793, iteration: 107727
loss: 1.0874489545822144,grad_norm: 0.9999992209142424, iteration: 107728
loss: 0.9758760333061218,grad_norm: 0.8498034367235415, iteration: 107729
loss: 1.0191177129745483,grad_norm: 0.9999999422322059, iteration: 107730
loss: 1.0710134506225586,grad_norm: 0.9999995574472572, iteration: 107731
loss: 1.0504411458969116,grad_norm: 0.852177006323991, iteration: 107732
loss: 1.0678461790084839,grad_norm: 0.9109820992596721, iteration: 107733
loss: 1.0318660736083984,grad_norm: 0.9999994570547426, iteration: 107734
loss: 0.9939836263656616,grad_norm: 0.9999991668521634, iteration: 107735
loss: 1.0460572242736816,grad_norm: 0.9999993333444294, iteration: 107736
loss: 0.9715511202812195,grad_norm: 0.8632113126027646, iteration: 107737
loss: 1.0719568729400635,grad_norm: 0.9999996479010201, iteration: 107738
loss: 1.2546566724777222,grad_norm: 0.9999994979831915, iteration: 107739
loss: 1.0223886966705322,grad_norm: 0.932517973089743, iteration: 107740
loss: 1.0197441577911377,grad_norm: 0.7743432233302436, iteration: 107741
loss: 1.0403900146484375,grad_norm: 0.9365648390541595, iteration: 107742
loss: 1.0283831357955933,grad_norm: 0.8762754434085683, iteration: 107743
loss: 1.0099706649780273,grad_norm: 0.9999991211750795, iteration: 107744
loss: 0.9563735723495483,grad_norm: 0.9248978878871197, iteration: 107745
loss: 0.9920411109924316,grad_norm: 0.911273653035589, iteration: 107746
loss: 1.0262117385864258,grad_norm: 0.8645069075919876, iteration: 107747
loss: 1.0222547054290771,grad_norm: 0.9620760731863194, iteration: 107748
loss: 1.0053790807724,grad_norm: 0.9023365676454378, iteration: 107749
loss: 0.9759121537208557,grad_norm: 0.9469352551219675, iteration: 107750
loss: 1.0031838417053223,grad_norm: 0.8851238098224444, iteration: 107751
loss: 0.9735068678855896,grad_norm: 0.9044237887644092, iteration: 107752
loss: 1.016638994216919,grad_norm: 0.9999995181177221, iteration: 107753
loss: 1.1247371435165405,grad_norm: 0.9999995604715312, iteration: 107754
loss: 0.9925218224525452,grad_norm: 0.9999992123284349, iteration: 107755
loss: 1.0680540800094604,grad_norm: 0.9999992116396855, iteration: 107756
loss: 1.032872200012207,grad_norm: 0.9999991478267697, iteration: 107757
loss: 1.0077683925628662,grad_norm: 0.7530527850047641, iteration: 107758
loss: 1.0498273372650146,grad_norm: 0.9999993143723492, iteration: 107759
loss: 1.0256563425064087,grad_norm: 0.9288902620505406, iteration: 107760
loss: 1.0597172975540161,grad_norm: 0.9999998809790926, iteration: 107761
loss: 1.0111159086227417,grad_norm: 0.9873355740511195, iteration: 107762
loss: 1.0107567310333252,grad_norm: 0.7982181200943039, iteration: 107763
loss: 0.9945803284645081,grad_norm: 0.9999993706098345, iteration: 107764
loss: 1.066186547279358,grad_norm: 0.9999997409833629, iteration: 107765
loss: 1.1975839138031006,grad_norm: 0.9999996079017525, iteration: 107766
loss: 1.0376026630401611,grad_norm: 0.999999275519235, iteration: 107767
loss: 1.1008305549621582,grad_norm: 0.9999993401751789, iteration: 107768
loss: 0.975792646408081,grad_norm: 0.8696490045120338, iteration: 107769
loss: 1.0094469785690308,grad_norm: 0.805005168038242, iteration: 107770
loss: 0.9912809133529663,grad_norm: 0.9999990436735251, iteration: 107771
loss: 0.9851166009902954,grad_norm: 0.7709452038620067, iteration: 107772
loss: 1.036110520362854,grad_norm: 0.9799187505976741, iteration: 107773
loss: 1.0297350883483887,grad_norm: 0.9999990443267327, iteration: 107774
loss: 0.993048906326294,grad_norm: 0.9999997061450645, iteration: 107775
loss: 0.9820983409881592,grad_norm: 0.7408584995398503, iteration: 107776
loss: 1.0982509851455688,grad_norm: 0.9999999353839005, iteration: 107777
loss: 1.038276195526123,grad_norm: 0.9999992075803746, iteration: 107778
loss: 0.962562084197998,grad_norm: 0.8727581374100812, iteration: 107779
loss: 0.9942665100097656,grad_norm: 0.8147293332858565, iteration: 107780
loss: 1.045724630355835,grad_norm: 0.9999991647468698, iteration: 107781
loss: 1.023084044456482,grad_norm: 0.8036368934554304, iteration: 107782
loss: 1.01862370967865,grad_norm: 0.9302701083817325, iteration: 107783
loss: 1.183251142501831,grad_norm: 0.999999646169718, iteration: 107784
loss: 1.0692634582519531,grad_norm: 0.9999991955883368, iteration: 107785
loss: 1.0130770206451416,grad_norm: 0.7994814514649794, iteration: 107786
loss: 1.0238765478134155,grad_norm: 0.9999990677141599, iteration: 107787
loss: 1.0507256984710693,grad_norm: 0.9999991977371233, iteration: 107788
loss: 1.153307557106018,grad_norm: 0.9999998174977162, iteration: 107789
loss: 1.0078763961791992,grad_norm: 0.9999997525768278, iteration: 107790
loss: 1.0041000843048096,grad_norm: 0.9999998325068852, iteration: 107791
loss: 0.9972452521324158,grad_norm: 0.7493915147084602, iteration: 107792
loss: 1.0399296283721924,grad_norm: 0.9999995201868301, iteration: 107793
loss: 1.1611342430114746,grad_norm: 0.9999997504754811, iteration: 107794
loss: 1.0228410959243774,grad_norm: 0.9999990910794002, iteration: 107795
loss: 1.0177510976791382,grad_norm: 0.9377893570995618, iteration: 107796
loss: 0.9950769543647766,grad_norm: 0.8397383875857944, iteration: 107797
loss: 1.2008626461029053,grad_norm: 0.9999996287472945, iteration: 107798
loss: 1.0117928981781006,grad_norm: 0.7596629161595267, iteration: 107799
loss: 0.9958676695823669,grad_norm: 0.8118784076135674, iteration: 107800
loss: 1.0489652156829834,grad_norm: 0.9999991296715091, iteration: 107801
loss: 0.9922338128089905,grad_norm: 0.999999794506081, iteration: 107802
loss: 1.0913091897964478,grad_norm: 0.9999996112343541, iteration: 107803
loss: 1.008380651473999,grad_norm: 0.923413107458156, iteration: 107804
loss: 1.0263326168060303,grad_norm: 0.917428827112024, iteration: 107805
loss: 1.138480544090271,grad_norm: 0.9999996070054831, iteration: 107806
loss: 1.04683518409729,grad_norm: 0.9999990783670285, iteration: 107807
loss: 0.9774051308631897,grad_norm: 0.9269948156012203, iteration: 107808
loss: 1.032809853553772,grad_norm: 0.9999998014162078, iteration: 107809
loss: 1.0377973318099976,grad_norm: 0.9596726045660541, iteration: 107810
loss: 0.9906243681907654,grad_norm: 0.7939643457985988, iteration: 107811
loss: 0.9843135476112366,grad_norm: 0.8014343215516067, iteration: 107812
loss: 1.0228830575942993,grad_norm: 0.9999992944015172, iteration: 107813
loss: 1.015557050704956,grad_norm: 0.9999991442152086, iteration: 107814
loss: 0.981798529624939,grad_norm: 0.9831310693174211, iteration: 107815
loss: 0.968626856803894,grad_norm: 0.9999992054123552, iteration: 107816
loss: 1.175106167793274,grad_norm: 0.9999990704656394, iteration: 107817
loss: 1.0216892957687378,grad_norm: 0.9999996863213153, iteration: 107818
loss: 0.967696487903595,grad_norm: 0.9999989952453222, iteration: 107819
loss: 1.0966956615447998,grad_norm: 0.999999605242266, iteration: 107820
loss: 1.0900112390518188,grad_norm: 0.9999994712552056, iteration: 107821
loss: 1.014631748199463,grad_norm: 0.8186024186196535, iteration: 107822
loss: 1.0648990869522095,grad_norm: 0.9999991003834994, iteration: 107823
loss: 1.1147202253341675,grad_norm: 0.9999998110198818, iteration: 107824
loss: 1.0626047849655151,grad_norm: 0.9999993209144524, iteration: 107825
loss: 1.0076709985733032,grad_norm: 0.9999994172991773, iteration: 107826
loss: 1.0756862163543701,grad_norm: 0.9367978006462336, iteration: 107827
loss: 1.033607840538025,grad_norm: 0.8026906332450062, iteration: 107828
loss: 1.0266213417053223,grad_norm: 0.9999992016937225, iteration: 107829
loss: 1.0022125244140625,grad_norm: 0.999999627027929, iteration: 107830
loss: 1.062983751296997,grad_norm: 0.9999993848027602, iteration: 107831
loss: 1.007582187652588,grad_norm: 0.8579839598251314, iteration: 107832
loss: 0.984303891658783,grad_norm: 0.9999991499446487, iteration: 107833
loss: 1.0757979154586792,grad_norm: 0.9999999250926094, iteration: 107834
loss: 1.179071307182312,grad_norm: 0.9999993542664253, iteration: 107835
loss: 0.9927042722702026,grad_norm: 0.8949859479670963, iteration: 107836
loss: 1.017292857170105,grad_norm: 0.9999994358974128, iteration: 107837
loss: 1.0964913368225098,grad_norm: 0.9999990827534285, iteration: 107838
loss: 1.0357896089553833,grad_norm: 0.9999992347672108, iteration: 107839
loss: 1.004992127418518,grad_norm: 0.802286576793853, iteration: 107840
loss: 1.056955099105835,grad_norm: 0.9999995057936165, iteration: 107841
loss: 0.9828634262084961,grad_norm: 0.9999991016917867, iteration: 107842
loss: 0.9689750075340271,grad_norm: 0.9999994705751857, iteration: 107843
loss: 1.020675539970398,grad_norm: 0.9039908764597646, iteration: 107844
loss: 1.0239920616149902,grad_norm: 0.9999991574558948, iteration: 107845
loss: 1.0145076513290405,grad_norm: 0.999999111915433, iteration: 107846
loss: 1.011836051940918,grad_norm: 0.920584976404394, iteration: 107847
loss: 1.0402157306671143,grad_norm: 0.9999992481953781, iteration: 107848
loss: 1.0152769088745117,grad_norm: 0.9999994365844865, iteration: 107849
loss: 0.9869109988212585,grad_norm: 0.9335010218476427, iteration: 107850
loss: 1.2221076488494873,grad_norm: 0.9999999704313532, iteration: 107851
loss: 0.9741926193237305,grad_norm: 0.999999132381598, iteration: 107852
loss: 1.060916543006897,grad_norm: 0.9999992067546448, iteration: 107853
loss: 0.988935112953186,grad_norm: 0.9361205779547714, iteration: 107854
loss: 1.0747628211975098,grad_norm: 0.9999992975291563, iteration: 107855
loss: 1.070355772972107,grad_norm: 0.9999996958653111, iteration: 107856
loss: 0.9631145596504211,grad_norm: 0.9095349408554667, iteration: 107857
loss: 1.0037996768951416,grad_norm: 0.9999993435954069, iteration: 107858
loss: 1.018405795097351,grad_norm: 0.9999997996422864, iteration: 107859
loss: 0.9869887232780457,grad_norm: 0.9247720583751576, iteration: 107860
loss: 1.095354437828064,grad_norm: 0.999999558586762, iteration: 107861
loss: 1.02186918258667,grad_norm: 0.9598205516357643, iteration: 107862
loss: 1.0040425062179565,grad_norm: 0.9999992151471219, iteration: 107863
loss: 1.0595028400421143,grad_norm: 0.9999993944781247, iteration: 107864
loss: 1.0441375970840454,grad_norm: 0.9999995539721518, iteration: 107865
loss: 1.041631817817688,grad_norm: 0.999999119759389, iteration: 107866
loss: 1.0699596405029297,grad_norm: 0.7777963492397121, iteration: 107867
loss: 1.047707200050354,grad_norm: 0.9999997544408381, iteration: 107868
loss: 1.0873948335647583,grad_norm: 0.8638281422234517, iteration: 107869
loss: 1.0225191116333008,grad_norm: 0.864903421232651, iteration: 107870
loss: 1.0129715204238892,grad_norm: 0.7320580728856239, iteration: 107871
loss: 1.0427451133728027,grad_norm: 0.9999996154235055, iteration: 107872
loss: 1.031615972518921,grad_norm: 0.9999992661491226, iteration: 107873
loss: 1.0287278890609741,grad_norm: 0.9999992071473861, iteration: 107874
loss: 1.028860092163086,grad_norm: 0.733814882712364, iteration: 107875
loss: 1.0475691556930542,grad_norm: 0.999999145991763, iteration: 107876
loss: 1.041194200515747,grad_norm: 0.9999993490136858, iteration: 107877
loss: 1.0441508293151855,grad_norm: 0.9999990855999321, iteration: 107878
loss: 1.105182409286499,grad_norm: 0.9999996713179583, iteration: 107879
loss: 1.0596548318862915,grad_norm: 0.9999993307509728, iteration: 107880
loss: 1.0711578130722046,grad_norm: 0.9999996079146458, iteration: 107881
loss: 1.0162606239318848,grad_norm: 0.9999989913022926, iteration: 107882
loss: 1.0671859979629517,grad_norm: 0.9999998121019599, iteration: 107883
loss: 1.029532551765442,grad_norm: 0.9999992350656489, iteration: 107884
loss: 0.9897621870040894,grad_norm: 0.9999990706803995, iteration: 107885
loss: 1.0396307706832886,grad_norm: 0.9999996286000402, iteration: 107886
loss: 1.064753532409668,grad_norm: 0.99999918898794, iteration: 107887
loss: 1.0307092666625977,grad_norm: 0.8966413636381647, iteration: 107888
loss: 1.0134549140930176,grad_norm: 0.9999991531968231, iteration: 107889
loss: 0.9932401180267334,grad_norm: 0.9999991197642946, iteration: 107890
loss: 1.0751386880874634,grad_norm: 0.999999479977748, iteration: 107891
loss: 1.3697779178619385,grad_norm: 0.9999999863279957, iteration: 107892
loss: 0.9853761792182922,grad_norm: 0.8357996106794177, iteration: 107893
loss: 1.0280426740646362,grad_norm: 0.8596169657013001, iteration: 107894
loss: 1.0013142824172974,grad_norm: 0.8209754322011993, iteration: 107895
loss: 1.0021777153015137,grad_norm: 0.9999992989740074, iteration: 107896
loss: 1.0289568901062012,grad_norm: 0.875943467854394, iteration: 107897
loss: 1.0586787462234497,grad_norm: 0.9999996649064835, iteration: 107898
loss: 1.0285238027572632,grad_norm: 0.9999990535386718, iteration: 107899
loss: 1.0485962629318237,grad_norm: 0.8833944558971487, iteration: 107900
loss: 0.9935809373855591,grad_norm: 0.9300373018486171, iteration: 107901
loss: 1.0128941535949707,grad_norm: 0.8508245500890859, iteration: 107902
loss: 1.0141716003417969,grad_norm: 0.9999999865863239, iteration: 107903
loss: 1.027341365814209,grad_norm: 0.8672698318268223, iteration: 107904
loss: 1.0744421482086182,grad_norm: 0.9999996099362742, iteration: 107905
loss: 1.0321134328842163,grad_norm: 0.9999999325720721, iteration: 107906
loss: 1.027111291885376,grad_norm: 0.9999996295024614, iteration: 107907
loss: 1.0226277112960815,grad_norm: 0.9999998505222947, iteration: 107908
loss: 0.995423436164856,grad_norm: 0.9999989713986879, iteration: 107909
loss: 1.1091315746307373,grad_norm: 0.9999996419219721, iteration: 107910
loss: 1.0697565078735352,grad_norm: 0.9999997364600257, iteration: 107911
loss: 1.0464987754821777,grad_norm: 0.9999990332773024, iteration: 107912
loss: 1.1685712337493896,grad_norm: 0.9999997089464422, iteration: 107913
loss: 1.0464600324630737,grad_norm: 0.9999998991238506, iteration: 107914
loss: 0.9906102418899536,grad_norm: 0.7283969458528338, iteration: 107915
loss: 1.0018163919448853,grad_norm: 0.9009835938811379, iteration: 107916
loss: 1.1183627843856812,grad_norm: 0.999999916143291, iteration: 107917
loss: 0.9930793046951294,grad_norm: 0.9636326123805651, iteration: 107918
loss: 1.0799586772918701,grad_norm: 0.9999995466559247, iteration: 107919
loss: 1.1187381744384766,grad_norm: 0.9999997503111733, iteration: 107920
loss: 1.0154832601547241,grad_norm: 0.9999998363333152, iteration: 107921
loss: 0.9800211191177368,grad_norm: 0.8591157046246641, iteration: 107922
loss: 1.0439668893814087,grad_norm: 0.9999996348027348, iteration: 107923
loss: 1.0528877973556519,grad_norm: 0.7515609827471792, iteration: 107924
loss: 1.0445196628570557,grad_norm: 0.8111511829707766, iteration: 107925
loss: 1.1884236335754395,grad_norm: 0.9999996947945997, iteration: 107926
loss: 1.0937596559524536,grad_norm: 0.9849520195483175, iteration: 107927
loss: 1.1614813804626465,grad_norm: 0.9999999454230727, iteration: 107928
loss: 1.0460951328277588,grad_norm: 0.999999258398072, iteration: 107929
loss: 1.060807704925537,grad_norm: 0.9780387320941373, iteration: 107930
loss: 1.0618215799331665,grad_norm: 0.9999996023369476, iteration: 107931
loss: 0.9692680835723877,grad_norm: 0.9757845092358866, iteration: 107932
loss: 1.2615021467208862,grad_norm: 0.9999993485331407, iteration: 107933
loss: 1.0130784511566162,grad_norm: 0.9999993358247306, iteration: 107934
loss: 1.1281331777572632,grad_norm: 0.9999995397688274, iteration: 107935
loss: 1.0906542539596558,grad_norm: 0.9999993408091331, iteration: 107936
loss: 1.0796294212341309,grad_norm: 0.999999819572051, iteration: 107937
loss: 0.9887560606002808,grad_norm: 0.9489938516709592, iteration: 107938
loss: 1.047609567642212,grad_norm: 0.8878797210354133, iteration: 107939
loss: 0.9718134999275208,grad_norm: 0.9999990785751866, iteration: 107940
loss: 1.1972075700759888,grad_norm: 0.999999940485393, iteration: 107941
loss: 1.1282844543457031,grad_norm: 0.9999995845634227, iteration: 107942
loss: 0.9965724349021912,grad_norm: 0.9999996318639633, iteration: 107943
loss: 1.2589770555496216,grad_norm: 1.0000000067049246, iteration: 107944
loss: 1.0042634010314941,grad_norm: 0.9248804727403183, iteration: 107945
loss: 1.0452078580856323,grad_norm: 0.9999998918517923, iteration: 107946
loss: 1.0293208360671997,grad_norm: 0.9999990555169036, iteration: 107947
loss: 1.0122932195663452,grad_norm: 0.9999991403782327, iteration: 107948
loss: 1.0315288305282593,grad_norm: 0.9999991241145404, iteration: 107949
loss: 1.0458472967147827,grad_norm: 0.9999989876553669, iteration: 107950
loss: 1.016579508781433,grad_norm: 0.9867587778280812, iteration: 107951
loss: 1.1141246557235718,grad_norm: 0.9999997726061413, iteration: 107952
loss: 1.0336004495620728,grad_norm: 0.9999999186214159, iteration: 107953
loss: 1.0405017137527466,grad_norm: 0.9999992387814935, iteration: 107954
loss: 1.0390321016311646,grad_norm: 0.9433214190192614, iteration: 107955
loss: 1.0498908758163452,grad_norm: 0.9999995304785295, iteration: 107956
loss: 1.024562954902649,grad_norm: 0.9999999083473888, iteration: 107957
loss: 1.002497673034668,grad_norm: 0.8184747189128836, iteration: 107958
loss: 1.1401734352111816,grad_norm: 0.9372401058419843, iteration: 107959
loss: 1.0266380310058594,grad_norm: 0.9999995737077043, iteration: 107960
loss: 1.1479220390319824,grad_norm: 0.9999999443816838, iteration: 107961
loss: 1.0022740364074707,grad_norm: 0.9999992232326376, iteration: 107962
loss: 1.0927484035491943,grad_norm: 0.9848654928174431, iteration: 107963
loss: 1.0964521169662476,grad_norm: 0.8935876777590045, iteration: 107964
loss: 1.0701686143875122,grad_norm: 0.9999991469209655, iteration: 107965
loss: 1.0744882822036743,grad_norm: 0.9999996577733409, iteration: 107966
loss: 1.0238807201385498,grad_norm: 0.883604381449488, iteration: 107967
loss: 0.9795562028884888,grad_norm: 0.9999995744631032, iteration: 107968
loss: 0.9883160591125488,grad_norm: 0.9978250070426707, iteration: 107969
loss: 1.019715428352356,grad_norm: 0.999999715529497, iteration: 107970
loss: 1.0136123895645142,grad_norm: 0.9384342647266645, iteration: 107971
loss: 0.9907386898994446,grad_norm: 0.8961393643066874, iteration: 107972
loss: 1.1659631729125977,grad_norm: 0.9999992233894769, iteration: 107973
loss: 1.0287868976593018,grad_norm: 0.9999994056505631, iteration: 107974
loss: 1.0592273473739624,grad_norm: 0.9999996502366868, iteration: 107975
loss: 1.0236363410949707,grad_norm: 0.9999990798715697, iteration: 107976
loss: 1.1764825582504272,grad_norm: 0.9999998729961483, iteration: 107977
loss: 1.1184628009796143,grad_norm: 0.9999994513777456, iteration: 107978
loss: 1.0856703519821167,grad_norm: 0.9999990755586002, iteration: 107979
loss: 1.008340835571289,grad_norm: 0.9516742710596389, iteration: 107980
loss: 1.0479037761688232,grad_norm: 0.9999995183607545, iteration: 107981
loss: 1.145326852798462,grad_norm: 0.9999996616469532, iteration: 107982
loss: 1.0180027484893799,grad_norm: 0.9999992319039486, iteration: 107983
loss: 1.0647982358932495,grad_norm: 0.9999990318251784, iteration: 107984
loss: 1.0169398784637451,grad_norm: 0.9999990724127259, iteration: 107985
loss: 1.008325457572937,grad_norm: 0.8668470118570453, iteration: 107986
loss: 1.017641544342041,grad_norm: 0.9052962085504944, iteration: 107987
loss: 0.9931630492210388,grad_norm: 0.8876114000059196, iteration: 107988
loss: 1.034242868423462,grad_norm: 0.9999992274172141, iteration: 107989
loss: 1.002954363822937,grad_norm: 0.8294800960180313, iteration: 107990
loss: 1.1377158164978027,grad_norm: 0.9999998209279014, iteration: 107991
loss: 1.025607943534851,grad_norm: 0.8375311189955074, iteration: 107992
loss: 1.0394598245620728,grad_norm: 0.9999991439295739, iteration: 107993
loss: 1.0452501773834229,grad_norm: 0.999999883493843, iteration: 107994
loss: 1.2231889963150024,grad_norm: 0.9999994938086083, iteration: 107995
loss: 1.0014331340789795,grad_norm: 0.9999990693997405, iteration: 107996
loss: 1.128173828125,grad_norm: 0.9999994130831221, iteration: 107997
loss: 0.9823379516601562,grad_norm: 0.8660103853685545, iteration: 107998
loss: 1.0135172605514526,grad_norm: 0.9999998148791129, iteration: 107999
loss: 1.0997507572174072,grad_norm: 0.9999996122881166, iteration: 108000
loss: 0.9977162480354309,grad_norm: 0.9999991047883131, iteration: 108001
loss: 1.0106719732284546,grad_norm: 0.9999991524967723, iteration: 108002
loss: 1.0252585411071777,grad_norm: 0.9999991973640018, iteration: 108003
loss: 1.0190255641937256,grad_norm: 0.9438109452056009, iteration: 108004
loss: 1.074442744255066,grad_norm: 0.9999992713294779, iteration: 108005
loss: 0.9640796780586243,grad_norm: 0.8696019824690358, iteration: 108006
loss: 1.0303431749343872,grad_norm: 0.9999990769894389, iteration: 108007
loss: 0.9961939454078674,grad_norm: 0.8935749880281468, iteration: 108008
loss: 1.0289415121078491,grad_norm: 0.9999993450462035, iteration: 108009
loss: 1.0524152517318726,grad_norm: 1.0000000102212274, iteration: 108010
loss: 1.1315910816192627,grad_norm: 0.9999994687333144, iteration: 108011
loss: 1.0669615268707275,grad_norm: 0.9999994489775208, iteration: 108012
loss: 0.9981776475906372,grad_norm: 0.9999992276391557, iteration: 108013
loss: 1.000026822090149,grad_norm: 0.857182514181289, iteration: 108014
loss: 1.0504997968673706,grad_norm: 0.9999994023401002, iteration: 108015
loss: 1.0219303369522095,grad_norm: 0.9999990325227646, iteration: 108016
loss: 0.9902236461639404,grad_norm: 0.8003062650729325, iteration: 108017
loss: 0.9843088984489441,grad_norm: 0.8092798571623695, iteration: 108018
loss: 1.0331536531448364,grad_norm: 0.9999991224151598, iteration: 108019
loss: 1.0123482942581177,grad_norm: 0.8931054872696789, iteration: 108020
loss: 1.1045880317687988,grad_norm: 0.999999414120882, iteration: 108021
loss: 1.0016735792160034,grad_norm: 0.8344227493398365, iteration: 108022
loss: 1.0820889472961426,grad_norm: 0.9999999203349234, iteration: 108023
loss: 1.060492992401123,grad_norm: 0.9999992937707397, iteration: 108024
loss: 1.0663989782333374,grad_norm: 0.9999996648598434, iteration: 108025
loss: 1.0055522918701172,grad_norm: 1.0000000666540076, iteration: 108026
loss: 1.0713273286819458,grad_norm: 0.9999993651897341, iteration: 108027
loss: 1.0644466876983643,grad_norm: 0.9999998088812333, iteration: 108028
loss: 1.079582929611206,grad_norm: 0.999999073227708, iteration: 108029
loss: 1.0380610227584839,grad_norm: 0.9999989674290464, iteration: 108030
loss: 0.9975374937057495,grad_norm: 0.9999993263503382, iteration: 108031
loss: 0.9745856523513794,grad_norm: 0.7830502404858605, iteration: 108032
loss: 1.0036344528198242,grad_norm: 0.9236184859408029, iteration: 108033
loss: 1.4598881006240845,grad_norm: 0.9999997270186808, iteration: 108034
loss: 1.172107219696045,grad_norm: 0.9999991406825409, iteration: 108035
loss: 1.0294548273086548,grad_norm: 0.9835219836980826, iteration: 108036
loss: 1.0323290824890137,grad_norm: 0.9999998016112429, iteration: 108037
loss: 1.0367692708969116,grad_norm: 0.9999996295177738, iteration: 108038
loss: 1.058868169784546,grad_norm: 0.7979450751477347, iteration: 108039
loss: 1.0565098524093628,grad_norm: 0.9999999570616586, iteration: 108040
loss: 1.0104089975357056,grad_norm: 0.7853872551151475, iteration: 108041
loss: 1.0121244192123413,grad_norm: 0.8336634659410963, iteration: 108042
loss: 1.0317902565002441,grad_norm: 0.7850185305399552, iteration: 108043
loss: 0.997302234172821,grad_norm: 0.777211810368231, iteration: 108044
loss: 0.978337824344635,grad_norm: 0.8921222415353731, iteration: 108045
loss: 1.0386897325515747,grad_norm: 0.9999991190627807, iteration: 108046
loss: 1.0119391679763794,grad_norm: 0.873537619187233, iteration: 108047
loss: 1.0225181579589844,grad_norm: 0.8821565637965987, iteration: 108048
loss: 1.1141875982284546,grad_norm: 0.9999999784877581, iteration: 108049
loss: 0.989866316318512,grad_norm: 0.9999988256076474, iteration: 108050
loss: 1.051669955253601,grad_norm: 0.9999997088642754, iteration: 108051
loss: 1.0140315294265747,grad_norm: 0.9999993333499112, iteration: 108052
loss: 0.9573527574539185,grad_norm: 0.9017982281712585, iteration: 108053
loss: 1.021692156791687,grad_norm: 0.8920355442425045, iteration: 108054
loss: 1.024198293685913,grad_norm: 0.9970662920065444, iteration: 108055
loss: 1.017807960510254,grad_norm: 0.9999994785143148, iteration: 108056
loss: 1.0633606910705566,grad_norm: 0.9999999269301749, iteration: 108057
loss: 1.0237908363342285,grad_norm: 0.9006731631882812, iteration: 108058
loss: 1.005057692527771,grad_norm: 0.9999990425052707, iteration: 108059
loss: 1.05963933467865,grad_norm: 0.999999175839654, iteration: 108060
loss: 0.9594499468803406,grad_norm: 0.9141287618914778, iteration: 108061
loss: 1.1225855350494385,grad_norm: 0.9999991707055431, iteration: 108062
loss: 0.994472086429596,grad_norm: 0.9999994375148807, iteration: 108063
loss: 1.0200847387313843,grad_norm: 0.9356092093905095, iteration: 108064
loss: 0.9819715619087219,grad_norm: 0.9999990674702924, iteration: 108065
loss: 1.0888885259628296,grad_norm: 0.9999998956183047, iteration: 108066
loss: 0.9945612549781799,grad_norm: 0.9999992657679873, iteration: 108067
loss: 1.0041950941085815,grad_norm: 0.8622821646329161, iteration: 108068
loss: 1.0248993635177612,grad_norm: 0.9296262344462671, iteration: 108069
loss: 1.028253436088562,grad_norm: 0.9999991273491109, iteration: 108070
loss: 1.030280590057373,grad_norm: 0.931245715102688, iteration: 108071
loss: 1.0671213865280151,grad_norm: 0.9999998439879128, iteration: 108072
loss: 0.9536294937133789,grad_norm: 0.9726639835473535, iteration: 108073
loss: 1.013822078704834,grad_norm: 0.9999991186748528, iteration: 108074
loss: 0.9666978716850281,grad_norm: 0.8758205928354098, iteration: 108075
loss: 1.0134810209274292,grad_norm: 0.9999989497419294, iteration: 108076
loss: 1.0425819158554077,grad_norm: 0.9471794206811114, iteration: 108077
loss: 0.9899149537086487,grad_norm: 0.9999991750092069, iteration: 108078
loss: 1.07804274559021,grad_norm: 0.9999996074569444, iteration: 108079
loss: 1.0713801383972168,grad_norm: 0.9999992320903401, iteration: 108080
loss: 1.0328576564788818,grad_norm: 0.9999991100580534, iteration: 108081
loss: 1.038649320602417,grad_norm: 0.999999340464602, iteration: 108082
loss: 1.0087910890579224,grad_norm: 0.9999992791243973, iteration: 108083
loss: 1.0325007438659668,grad_norm: 0.7656354321181912, iteration: 108084
loss: 1.005921721458435,grad_norm: 0.7959118491468232, iteration: 108085
loss: 1.0245976448059082,grad_norm: 0.9792855003897355, iteration: 108086
loss: 1.0328925848007202,grad_norm: 0.9999995668260477, iteration: 108087
loss: 0.987812876701355,grad_norm: 0.8072801612191118, iteration: 108088
loss: 1.0013633966445923,grad_norm: 0.9999993263547902, iteration: 108089
loss: 1.0360140800476074,grad_norm: 0.9999991674852227, iteration: 108090
loss: 1.0150812864303589,grad_norm: 0.9999992339248626, iteration: 108091
loss: 0.9749361872673035,grad_norm: 0.7881722403443795, iteration: 108092
loss: 1.0558489561080933,grad_norm: 0.9999999416291794, iteration: 108093
loss: 1.0210715532302856,grad_norm: 0.8112674211970494, iteration: 108094
loss: 1.049453616142273,grad_norm: 0.9993899922077876, iteration: 108095
loss: 1.1073789596557617,grad_norm: 0.9999993035445454, iteration: 108096
loss: 0.9962210059165955,grad_norm: 0.9999991546340562, iteration: 108097
loss: 0.9954072833061218,grad_norm: 0.9999998995979346, iteration: 108098
loss: 1.154159665107727,grad_norm: 0.9999998161152834, iteration: 108099
loss: 0.9951187968254089,grad_norm: 0.8268767574809024, iteration: 108100
loss: 1.0588144063949585,grad_norm: 0.9999998150332121, iteration: 108101
loss: 1.03477942943573,grad_norm: 0.9999994953085892, iteration: 108102
loss: 0.9745954275131226,grad_norm: 0.9145675168219172, iteration: 108103
loss: 1.0464171171188354,grad_norm: 0.9999992291951564, iteration: 108104
loss: 1.0050369501113892,grad_norm: 0.9612007349761789, iteration: 108105
loss: 1.0464762449264526,grad_norm: 0.9999993660524173, iteration: 108106
loss: 1.150394082069397,grad_norm: 0.999999917425508, iteration: 108107
loss: 0.948876142501831,grad_norm: 0.9655033816072108, iteration: 108108
loss: 1.0032012462615967,grad_norm: 0.9932310696295814, iteration: 108109
loss: 1.0761810541152954,grad_norm: 0.9999997234980466, iteration: 108110
loss: 1.0544323921203613,grad_norm: 0.9999996882792578, iteration: 108111
loss: 1.0707638263702393,grad_norm: 0.9999992480411865, iteration: 108112
loss: 1.008144736289978,grad_norm: 0.9999994565567184, iteration: 108113
loss: 1.0576740503311157,grad_norm: 0.8955076414237855, iteration: 108114
loss: 1.020713448524475,grad_norm: 0.9999991502405438, iteration: 108115
loss: 1.0258686542510986,grad_norm: 0.8594728491752587, iteration: 108116
loss: 1.0227471590042114,grad_norm: 0.9109082946098632, iteration: 108117
loss: 1.0271871089935303,grad_norm: 0.9681998887097768, iteration: 108118
loss: 1.0106805562973022,grad_norm: 0.9818159100288407, iteration: 108119
loss: 1.0257823467254639,grad_norm: 0.9999994676547889, iteration: 108120
loss: 1.0448681116104126,grad_norm: 0.999999166572223, iteration: 108121
loss: 1.1264151334762573,grad_norm: 0.9999995353999197, iteration: 108122
loss: 0.9788092374801636,grad_norm: 0.9336893212190825, iteration: 108123
loss: 1.0070801973342896,grad_norm: 0.9635136802050138, iteration: 108124
loss: 1.1543402671813965,grad_norm: 0.9999997163921619, iteration: 108125
loss: 0.9998792409896851,grad_norm: 0.9202754522047986, iteration: 108126
loss: 1.0081404447555542,grad_norm: 0.9999996208328832, iteration: 108127
loss: 1.116652250289917,grad_norm: 0.9999990746383186, iteration: 108128
loss: 1.003894567489624,grad_norm: 0.9999991847287265, iteration: 108129
loss: 1.0107412338256836,grad_norm: 0.99999977809864, iteration: 108130
loss: 1.008544921875,grad_norm: 0.9999992979167402, iteration: 108131
loss: 0.9715396165847778,grad_norm: 0.9999990092366038, iteration: 108132
loss: 1.0091725587844849,grad_norm: 0.8865828175501096, iteration: 108133
loss: 1.0073610544204712,grad_norm: 0.9999990679370571, iteration: 108134
loss: 0.9644267559051514,grad_norm: 0.9999994018148473, iteration: 108135
loss: 1.083064079284668,grad_norm: 0.9999991920327079, iteration: 108136
loss: 1.0209749937057495,grad_norm: 0.9999993753308299, iteration: 108137
loss: 1.0015078783035278,grad_norm: 0.8508993455334453, iteration: 108138
loss: 0.9928479790687561,grad_norm: 0.8970727926444584, iteration: 108139
loss: 0.9706727862358093,grad_norm: 0.8653883333395663, iteration: 108140
loss: 1.0201600790023804,grad_norm: 0.999999067347739, iteration: 108141
loss: 1.0063738822937012,grad_norm: 0.9999992255555971, iteration: 108142
loss: 1.0858865976333618,grad_norm: 0.9999997027696618, iteration: 108143
loss: 1.002740740776062,grad_norm: 0.7953700018715658, iteration: 108144
loss: 1.0175100564956665,grad_norm: 0.9738879531595379, iteration: 108145
loss: 1.026585578918457,grad_norm: 0.9999995144952823, iteration: 108146
loss: 0.9937346577644348,grad_norm: 0.8415382117719137, iteration: 108147
loss: 1.0718326568603516,grad_norm: 0.999999623184026, iteration: 108148
loss: 1.1464931964874268,grad_norm: 0.999999998970749, iteration: 108149
loss: 1.0099958181381226,grad_norm: 0.9296160784071785, iteration: 108150
loss: 1.035204291343689,grad_norm: 0.8620379521725834, iteration: 108151
loss: 1.002803087234497,grad_norm: 0.8984079956713789, iteration: 108152
loss: 1.028961181640625,grad_norm: 0.9981250398203144, iteration: 108153
loss: 1.0293093919754028,grad_norm: 0.999999427169421, iteration: 108154
loss: 1.0367625951766968,grad_norm: 0.9999989808477776, iteration: 108155
loss: 0.9940304756164551,grad_norm: 0.9999990273176426, iteration: 108156
loss: 1.0206143856048584,grad_norm: 0.9084028316387464, iteration: 108157
loss: 0.9665191769599915,grad_norm: 0.9999992566469549, iteration: 108158
loss: 0.9800653457641602,grad_norm: 0.9999991274220685, iteration: 108159
loss: 1.0549951791763306,grad_norm: 0.9999997586316738, iteration: 108160
loss: 1.0140676498413086,grad_norm: 0.9999991134788425, iteration: 108161
loss: 1.0112968683242798,grad_norm: 0.8763545909388434, iteration: 108162
loss: 1.0443263053894043,grad_norm: 0.9999993135755344, iteration: 108163
loss: 1.1080315113067627,grad_norm: 0.9999993695785675, iteration: 108164
loss: 1.0795252323150635,grad_norm: 0.8894058740026705, iteration: 108165
loss: 1.0229136943817139,grad_norm: 0.999999234781998, iteration: 108166
loss: 1.025775671005249,grad_norm: 0.9999990230779645, iteration: 108167
loss: 1.0436569452285767,grad_norm: 0.7609676876859344, iteration: 108168
loss: 0.9892441034317017,grad_norm: 0.9999998286763929, iteration: 108169
loss: 1.002577304840088,grad_norm: 0.9075682748210923, iteration: 108170
loss: 1.0060063600540161,grad_norm: 0.9999989519003905, iteration: 108171
loss: 0.9607465863227844,grad_norm: 0.9999991153353995, iteration: 108172
loss: 1.0398881435394287,grad_norm: 0.9999993079002082, iteration: 108173
loss: 1.0273072719573975,grad_norm: 0.9572275041919855, iteration: 108174
loss: 1.096427321434021,grad_norm: 0.904169289667364, iteration: 108175
loss: 1.016601800918579,grad_norm: 0.9999992825142651, iteration: 108176
loss: 0.9833403825759888,grad_norm: 0.9999991981613134, iteration: 108177
loss: 1.0330567359924316,grad_norm: 0.9999996040660089, iteration: 108178
loss: 1.074462652206421,grad_norm: 0.9999991751651085, iteration: 108179
loss: 1.052243709564209,grad_norm: 0.9999992992925215, iteration: 108180
loss: 0.9938896298408508,grad_norm: 0.9999991199363868, iteration: 108181
loss: 0.9960020184516907,grad_norm: 0.9211368289273671, iteration: 108182
loss: 0.994803249835968,grad_norm: 0.885047199762477, iteration: 108183
loss: 0.9648138880729675,grad_norm: 0.7265119303792712, iteration: 108184
loss: 0.9955205917358398,grad_norm: 0.9313119749558381, iteration: 108185
loss: 1.047977328300476,grad_norm: 0.9999991669778479, iteration: 108186
loss: 0.99104905128479,grad_norm: 0.9999992931177295, iteration: 108187
loss: 1.011980652809143,grad_norm: 0.9999992002988812, iteration: 108188
loss: 0.9929837584495544,grad_norm: 0.9999999367378464, iteration: 108189
loss: 0.9939777255058289,grad_norm: 0.8883893532827927, iteration: 108190
loss: 1.0314428806304932,grad_norm: 0.9007175373013939, iteration: 108191
loss: 1.1222354173660278,grad_norm: 0.9999996143183351, iteration: 108192
loss: 0.9854923486709595,grad_norm: 0.9356921451536894, iteration: 108193
loss: 0.9788339734077454,grad_norm: 0.9253221058970303, iteration: 108194
loss: 1.1011747121810913,grad_norm: 0.999999474716436, iteration: 108195
loss: 0.9920347332954407,grad_norm: 0.9191310580272397, iteration: 108196
loss: 1.0517730712890625,grad_norm: 0.999999758760137, iteration: 108197
loss: 0.9967191219329834,grad_norm: 0.8345867078973254, iteration: 108198
loss: 1.0063923597335815,grad_norm: 0.9999993167456317, iteration: 108199
loss: 1.0130283832550049,grad_norm: 0.9999989690090068, iteration: 108200
loss: 1.0622857809066772,grad_norm: 0.9999991018485159, iteration: 108201
loss: 1.0636110305786133,grad_norm: 0.9999993398171546, iteration: 108202
loss: 1.0754845142364502,grad_norm: 0.9999995566392895, iteration: 108203
loss: 1.0249650478363037,grad_norm: 0.9999990885447686, iteration: 108204
loss: 1.0671284198760986,grad_norm: 0.9999997035028184, iteration: 108205
loss: 1.050837755203247,grad_norm: 0.9999991020267291, iteration: 108206
loss: 1.0837167501449585,grad_norm: 0.9999994081234241, iteration: 108207
loss: 0.9993889331817627,grad_norm: 0.8768992888356866, iteration: 108208
loss: 1.0201101303100586,grad_norm: 0.8495588043093301, iteration: 108209
loss: 0.9759095907211304,grad_norm: 0.9796176980233768, iteration: 108210
loss: 1.0088614225387573,grad_norm: 0.9465827024669524, iteration: 108211
loss: 0.9970976710319519,grad_norm: 0.9621228205170673, iteration: 108212
loss: 0.9873179197311401,grad_norm: 0.9750575527621649, iteration: 108213
loss: 1.098368763923645,grad_norm: 0.9999991404696671, iteration: 108214
loss: 1.0573391914367676,grad_norm: 0.9999992064315778, iteration: 108215
loss: 0.9894479513168335,grad_norm: 0.9443305850221454, iteration: 108216
loss: 0.9728105068206787,grad_norm: 0.9999991340557024, iteration: 108217
loss: 1.0372668504714966,grad_norm: 0.9010291712879175, iteration: 108218
loss: 1.079689383506775,grad_norm: 0.9999995075307725, iteration: 108219
loss: 1.052979826927185,grad_norm: 0.9999990180597413, iteration: 108220
loss: 0.9933739304542542,grad_norm: 0.9815210622487864, iteration: 108221
loss: 1.0986478328704834,grad_norm: 0.9999993762140439, iteration: 108222
loss: 1.058650016784668,grad_norm: 0.9999993297690569, iteration: 108223
loss: 1.0446560382843018,grad_norm: 0.9999993954488154, iteration: 108224
loss: 1.0907859802246094,grad_norm: 0.999999524093744, iteration: 108225
loss: 0.991741418838501,grad_norm: 0.9999992503470275, iteration: 108226
loss: 1.0232011079788208,grad_norm: 0.8347871824995413, iteration: 108227
loss: 1.0157092809677124,grad_norm: 0.8783927048970804, iteration: 108228
loss: 0.9712517261505127,grad_norm: 0.8834286165269831, iteration: 108229
loss: 1.0322988033294678,grad_norm: 0.9514845425424422, iteration: 108230
loss: 1.0689212083816528,grad_norm: 0.9999993131819123, iteration: 108231
loss: 1.0213100910186768,grad_norm: 0.9999991106137373, iteration: 108232
loss: 1.0264477729797363,grad_norm: 0.9999992104764781, iteration: 108233
loss: 1.0556275844573975,grad_norm: 0.9999992771156918, iteration: 108234
loss: 1.0246264934539795,grad_norm: 0.9999993336514891, iteration: 108235
loss: 0.9918524026870728,grad_norm: 0.999999087507669, iteration: 108236
loss: 1.0216526985168457,grad_norm: 0.9999998208858929, iteration: 108237
loss: 1.047861099243164,grad_norm: 0.9999991279760955, iteration: 108238
loss: 1.0045018196105957,grad_norm: 0.925893094320549, iteration: 108239
loss: 1.0280406475067139,grad_norm: 0.9999995072345393, iteration: 108240
loss: 1.080953598022461,grad_norm: 0.9999996800308603, iteration: 108241
loss: 1.0260343551635742,grad_norm: 0.9999993054127301, iteration: 108242
loss: 0.9806759357452393,grad_norm: 0.8769142604065553, iteration: 108243
loss: 1.0195552110671997,grad_norm: 0.9999990970844967, iteration: 108244
loss: 1.0033520460128784,grad_norm: 0.9999999824707356, iteration: 108245
loss: 1.000533103942871,grad_norm: 0.957884966787728, iteration: 108246
loss: 1.0294852256774902,grad_norm: 0.7828945346919253, iteration: 108247
loss: 1.008974313735962,grad_norm: 0.8945149473521041, iteration: 108248
loss: 0.9900261759757996,grad_norm: 0.9999993533077792, iteration: 108249
loss: 1.0548982620239258,grad_norm: 0.9999999633059118, iteration: 108250
loss: 0.9867874383926392,grad_norm: 0.9999992747793581, iteration: 108251
loss: 1.0204963684082031,grad_norm: 0.8945542727644329, iteration: 108252
loss: 1.0348701477050781,grad_norm: 0.7152146531381229, iteration: 108253
loss: 1.1951082944869995,grad_norm: 0.9999998188997303, iteration: 108254
loss: 0.9944065809249878,grad_norm: 0.8745906508312095, iteration: 108255
loss: 1.0054248571395874,grad_norm: 0.8750362420062375, iteration: 108256
loss: 1.1506208181381226,grad_norm: 0.9999995825905404, iteration: 108257
loss: 0.9823028445243835,grad_norm: 0.7973611942079716, iteration: 108258
loss: 1.0448471307754517,grad_norm: 0.9024662798970748, iteration: 108259
loss: 0.9847847819328308,grad_norm: 0.9855096422498262, iteration: 108260
loss: 0.9915524125099182,grad_norm: 0.8744566399547077, iteration: 108261
loss: 1.0190883874893188,grad_norm: 0.8650733072478308, iteration: 108262
loss: 1.0107638835906982,grad_norm: 0.9634469343924583, iteration: 108263
loss: 1.0681921243667603,grad_norm: 0.9999992845978762, iteration: 108264
loss: 1.0964714288711548,grad_norm: 0.8429617873690965, iteration: 108265
loss: 1.0318067073822021,grad_norm: 0.7981775376604308, iteration: 108266
loss: 1.1051591634750366,grad_norm: 0.999999676745208, iteration: 108267
loss: 1.0312323570251465,grad_norm: 0.9491657526623152, iteration: 108268
loss: 0.9961541891098022,grad_norm: 0.9999995354873212, iteration: 108269
loss: 0.9727671146392822,grad_norm: 0.9999999045529664, iteration: 108270
loss: 1.0902807712554932,grad_norm: 0.9999989829118986, iteration: 108271
loss: 0.9418384432792664,grad_norm: 0.9999990600640848, iteration: 108272
loss: 0.9937981367111206,grad_norm: 0.9999991099794984, iteration: 108273
loss: 0.9741614460945129,grad_norm: 0.8824020165247957, iteration: 108274
loss: 0.979682445526123,grad_norm: 0.9999989928431658, iteration: 108275
loss: 1.0070769786834717,grad_norm: 0.9999992453405995, iteration: 108276
loss: 1.06776762008667,grad_norm: 0.9999991726619425, iteration: 108277
loss: 1.0523532629013062,grad_norm: 0.9999996398398596, iteration: 108278
loss: 1.0104193687438965,grad_norm: 0.7447974402413683, iteration: 108279
loss: 1.012196660041809,grad_norm: 0.9999991626277002, iteration: 108280
loss: 1.050349473953247,grad_norm: 0.9999991993050066, iteration: 108281
loss: 0.9915288090705872,grad_norm: 0.8678998346844183, iteration: 108282
loss: 0.9903609156608582,grad_norm: 0.7802068866080509, iteration: 108283
loss: 1.001578688621521,grad_norm: 0.9999990960546442, iteration: 108284
loss: 1.0043059587478638,grad_norm: 0.8062664479372839, iteration: 108285
loss: 0.9928357601165771,grad_norm: 0.8126643888122551, iteration: 108286
loss: 0.9889584183692932,grad_norm: 0.944628956715491, iteration: 108287
loss: 0.9932036399841309,grad_norm: 0.9999991010401397, iteration: 108288
loss: 1.0574954748153687,grad_norm: 0.9265010248456802, iteration: 108289
loss: 0.9949008822441101,grad_norm: 0.9770404860230347, iteration: 108290
loss: 0.9953305125236511,grad_norm: 0.9999993831937151, iteration: 108291
loss: 1.0343116521835327,grad_norm: 0.9999995492830157, iteration: 108292
loss: 1.020138144493103,grad_norm: 0.9999992755960159, iteration: 108293
loss: 1.0554766654968262,grad_norm: 0.9999991148578925, iteration: 108294
loss: 1.1225378513336182,grad_norm: 0.9999994969042711, iteration: 108295
loss: 1.0085117816925049,grad_norm: 0.8987891895334365, iteration: 108296
loss: 0.9802068471908569,grad_norm: 0.8884442948374632, iteration: 108297
loss: 0.985650360584259,grad_norm: 0.9999991417502134, iteration: 108298
loss: 1.1096404790878296,grad_norm: 0.9999994026831335, iteration: 108299
loss: 1.0200430154800415,grad_norm: 0.99999921393089, iteration: 108300
loss: 1.0405653715133667,grad_norm: 0.9999991830725773, iteration: 108301
loss: 1.020033836364746,grad_norm: 0.8259442634880407, iteration: 108302
loss: 1.0121684074401855,grad_norm: 0.9742668773151555, iteration: 108303
loss: 1.0311594009399414,grad_norm: 0.9886958558447984, iteration: 108304
loss: 0.9457964301109314,grad_norm: 0.8501601317257929, iteration: 108305
loss: 1.055585265159607,grad_norm: 1.0000000136023985, iteration: 108306
loss: 1.0360146760940552,grad_norm: 0.999999118348963, iteration: 108307
loss: 0.9984950423240662,grad_norm: 0.8883266648948556, iteration: 108308
loss: 1.0131107568740845,grad_norm: 0.8489298653630798, iteration: 108309
loss: 1.011704444885254,grad_norm: 0.9717319914729639, iteration: 108310
loss: 1.0095951557159424,grad_norm: 0.9597059947230762, iteration: 108311
loss: 0.9933385252952576,grad_norm: 0.9999995436107721, iteration: 108312
loss: 0.9851410984992981,grad_norm: 0.9999991123457549, iteration: 108313
loss: 1.2397263050079346,grad_norm: 0.9999995078125576, iteration: 108314
loss: 1.0409632921218872,grad_norm: 0.9999991671236764, iteration: 108315
loss: 1.0132112503051758,grad_norm: 0.9518508200607877, iteration: 108316
loss: 1.0933418273925781,grad_norm: 0.9999997946442293, iteration: 108317
loss: 1.0359547138214111,grad_norm: 0.9027203385935484, iteration: 108318
loss: 1.10706627368927,grad_norm: 0.9999992390174748, iteration: 108319
loss: 1.0369988679885864,grad_norm: 0.9999993130308292, iteration: 108320
loss: 1.0741417407989502,grad_norm: 0.9999995159267944, iteration: 108321
loss: 1.0719363689422607,grad_norm: 0.816567877606418, iteration: 108322
loss: 1.0041487216949463,grad_norm: 0.9999993040256028, iteration: 108323
loss: 0.9877737164497375,grad_norm: 0.9999994063483504, iteration: 108324
loss: 1.0149762630462646,grad_norm: 0.999999534799317, iteration: 108325
loss: 1.0695772171020508,grad_norm: 0.9999998037354789, iteration: 108326
loss: 1.0383356809616089,grad_norm: 0.9999996254394226, iteration: 108327
loss: 1.0027903318405151,grad_norm: 0.8368239166488582, iteration: 108328
loss: 0.9916616678237915,grad_norm: 0.8034877790575545, iteration: 108329
loss: 1.0067100524902344,grad_norm: 0.9732986628328355, iteration: 108330
loss: 1.0507632493972778,grad_norm: 0.9999991579101827, iteration: 108331
loss: 1.0555802583694458,grad_norm: 0.999999163089012, iteration: 108332
loss: 0.9776622653007507,grad_norm: 0.8713711691220943, iteration: 108333
loss: 1.026822805404663,grad_norm: 0.8111423228728392, iteration: 108334
loss: 0.972150981426239,grad_norm: 0.792906822628914, iteration: 108335
loss: 0.9902845621109009,grad_norm: 0.8324021186183013, iteration: 108336
loss: 1.0009042024612427,grad_norm: 0.9999995009789995, iteration: 108337
loss: 1.019262433052063,grad_norm: 0.8866145390802961, iteration: 108338
loss: 1.024953842163086,grad_norm: 0.8783131124895066, iteration: 108339
loss: 1.069772720336914,grad_norm: 0.9999997407129051, iteration: 108340
loss: 1.080386757850647,grad_norm: 0.9999998374371171, iteration: 108341
loss: 1.0216578245162964,grad_norm: 0.9999995842990355, iteration: 108342
loss: 1.0390137434005737,grad_norm: 0.9999991893791041, iteration: 108343
loss: 1.0142204761505127,grad_norm: 0.7868699427445083, iteration: 108344
loss: 1.0546644926071167,grad_norm: 0.894592018619023, iteration: 108345
loss: 0.9703916311264038,grad_norm: 0.8820224837046208, iteration: 108346
loss: 1.0169774293899536,grad_norm: 0.8911340601516609, iteration: 108347
loss: 1.029929280281067,grad_norm: 0.9314165951534809, iteration: 108348
loss: 0.9821409583091736,grad_norm: 0.9225350843600568, iteration: 108349
loss: 1.0075498819351196,grad_norm: 0.980267735789188, iteration: 108350
loss: 0.9856998324394226,grad_norm: 0.7287458218004443, iteration: 108351
loss: 0.9821043610572815,grad_norm: 0.8278099798268869, iteration: 108352
loss: 0.9899065494537354,grad_norm: 0.8418448155482904, iteration: 108353
loss: 1.0192742347717285,grad_norm: 0.8676535307057854, iteration: 108354
loss: 1.0282883644104004,grad_norm: 0.8573524901810989, iteration: 108355
loss: 1.0991910696029663,grad_norm: 0.9999997107783448, iteration: 108356
loss: 1.084499716758728,grad_norm: 0.9774592837405842, iteration: 108357
loss: 1.0198537111282349,grad_norm: 0.7814621454700386, iteration: 108358
loss: 0.9868454933166504,grad_norm: 0.999999447056472, iteration: 108359
loss: 1.0143325328826904,grad_norm: 0.792271230746284, iteration: 108360
loss: 1.0452028512954712,grad_norm: 0.9999991121836307, iteration: 108361
loss: 1.0266566276550293,grad_norm: 0.9999991901302023, iteration: 108362
loss: 0.9939315915107727,grad_norm: 0.9999998948061064, iteration: 108363
loss: 0.979572057723999,grad_norm: 0.9749526303321173, iteration: 108364
loss: 0.9895744323730469,grad_norm: 0.8734320028601109, iteration: 108365
loss: 1.000694990158081,grad_norm: 0.9420157607702095, iteration: 108366
loss: 1.0021744966506958,grad_norm: 0.8235051297205135, iteration: 108367
loss: 1.0227712392807007,grad_norm: 0.9999990689034804, iteration: 108368
loss: 0.9755697846412659,grad_norm: 0.8193569821360649, iteration: 108369
loss: 0.9954979419708252,grad_norm: 0.9502918333714221, iteration: 108370
loss: 1.0058506727218628,grad_norm: 0.9181382978418767, iteration: 108371
loss: 1.0136111974716187,grad_norm: 0.9999991260728789, iteration: 108372
loss: 1.0366352796554565,grad_norm: 0.9999996243580745, iteration: 108373
loss: 1.0766115188598633,grad_norm: 0.9141320259528228, iteration: 108374
loss: 1.0246827602386475,grad_norm: 0.8307397569282463, iteration: 108375
loss: 1.0091687440872192,grad_norm: 0.8249989071445888, iteration: 108376
loss: 0.9981208443641663,grad_norm: 0.876222052141657, iteration: 108377
loss: 0.9847478270530701,grad_norm: 0.9999991700119767, iteration: 108378
loss: 1.023004412651062,grad_norm: 0.9999998970462808, iteration: 108379
loss: 1.0249748229980469,grad_norm: 0.8020548033203558, iteration: 108380
loss: 0.9698362946510315,grad_norm: 0.9663289647169926, iteration: 108381
loss: 1.0034016370773315,grad_norm: 0.8673518763025615, iteration: 108382
loss: 1.0227006673812866,grad_norm: 0.999999118507157, iteration: 108383
loss: 1.0186673402786255,grad_norm: 0.9999992706461402, iteration: 108384
loss: 0.9978055953979492,grad_norm: 0.8998802902707337, iteration: 108385
loss: 1.0010770559310913,grad_norm: 0.8427984795041956, iteration: 108386
loss: 1.0345447063446045,grad_norm: 0.9999998244567956, iteration: 108387
loss: 1.0053049325942993,grad_norm: 0.7087828076300837, iteration: 108388
loss: 1.0202052593231201,grad_norm: 0.9999996452409958, iteration: 108389
loss: 1.0399833917617798,grad_norm: 0.99999976265128, iteration: 108390
loss: 1.0633084774017334,grad_norm: 0.9999998143315577, iteration: 108391
loss: 1.0123100280761719,grad_norm: 0.8605990640494212, iteration: 108392
loss: 1.0425139665603638,grad_norm: 0.8731378199009613, iteration: 108393
loss: 1.0286030769348145,grad_norm: 0.9999993550822027, iteration: 108394
loss: 1.0284620523452759,grad_norm: 0.942750889373158, iteration: 108395
loss: 1.0247809886932373,grad_norm: 0.8956302763640747, iteration: 108396
loss: 0.9832297563552856,grad_norm: 0.8955946540723764, iteration: 108397
loss: 1.116978406906128,grad_norm: 0.9999995153385763, iteration: 108398
loss: 1.0041182041168213,grad_norm: 0.9999156782649867, iteration: 108399
loss: 1.0109609365463257,grad_norm: 0.9999991219492289, iteration: 108400
loss: 1.0415350198745728,grad_norm: 0.9999997283669506, iteration: 108401
loss: 0.9982749223709106,grad_norm: 0.8242757331207493, iteration: 108402
loss: 1.0386136770248413,grad_norm: 0.9999998409649407, iteration: 108403
loss: 1.0351487398147583,grad_norm: 0.9999997330958126, iteration: 108404
loss: 0.9932292699813843,grad_norm: 0.999999261503005, iteration: 108405
loss: 1.059706449508667,grad_norm: 0.9999994961457638, iteration: 108406
loss: 1.0530215501785278,grad_norm: 0.9999996277733076, iteration: 108407
loss: 1.0360950231552124,grad_norm: 0.9999997191553996, iteration: 108408
loss: 0.9893184304237366,grad_norm: 0.9046680662164833, iteration: 108409
loss: 0.9957292079925537,grad_norm: 0.9999989792678962, iteration: 108410
loss: 1.0264228582382202,grad_norm: 0.9012943098226036, iteration: 108411
loss: 0.9921121597290039,grad_norm: 0.9999991097180767, iteration: 108412
loss: 1.2886172533035278,grad_norm: 0.9999996727504672, iteration: 108413
loss: 1.066144585609436,grad_norm: 0.9223885049882035, iteration: 108414
loss: 1.090494990348816,grad_norm: 0.9999998996901847, iteration: 108415
loss: 1.11953604221344,grad_norm: 0.9999998281632957, iteration: 108416
loss: 1.0012840032577515,grad_norm: 0.8761801955186167, iteration: 108417
loss: 1.1378594636917114,grad_norm: 0.9999998804994471, iteration: 108418
loss: 0.9891357421875,grad_norm: 0.9999990246994475, iteration: 108419
loss: 1.0831273794174194,grad_norm: 0.9999992663383684, iteration: 108420
loss: 1.1230111122131348,grad_norm: 0.9999996906478603, iteration: 108421
loss: 0.9463343620300293,grad_norm: 0.8664532277776137, iteration: 108422
loss: 1.0541956424713135,grad_norm: 0.9211482174577341, iteration: 108423
loss: 1.112790584564209,grad_norm: 0.9883978520318812, iteration: 108424
loss: 1.0098052024841309,grad_norm: 0.9999995510122944, iteration: 108425
loss: 0.9740203619003296,grad_norm: 0.8732664080184019, iteration: 108426
loss: 1.0364919900894165,grad_norm: 0.9999993867496403, iteration: 108427
loss: 1.0339096784591675,grad_norm: 0.999999689912803, iteration: 108428
loss: 1.0199992656707764,grad_norm: 0.7944533696417242, iteration: 108429
loss: 1.0325459241867065,grad_norm: 0.9999991155211974, iteration: 108430
loss: 1.0588855743408203,grad_norm: 0.9999990695641273, iteration: 108431
loss: 1.0367827415466309,grad_norm: 0.9999990385889754, iteration: 108432
loss: 1.069093942642212,grad_norm: 0.9999994114843431, iteration: 108433
loss: 1.0883665084838867,grad_norm: 0.9999990429840524, iteration: 108434
loss: 0.979803740978241,grad_norm: 0.9645569227419929, iteration: 108435
loss: 1.3523372411727905,grad_norm: 0.9999998370805909, iteration: 108436
loss: 1.009126901626587,grad_norm: 0.9999992067508364, iteration: 108437
loss: 1.0626521110534668,grad_norm: 1.0000000651043555, iteration: 108438
loss: 0.9989891648292542,grad_norm: 0.9999993365064274, iteration: 108439
loss: 1.0324262380599976,grad_norm: 0.999999642875478, iteration: 108440
loss: 1.0106606483459473,grad_norm: 0.9213777741656576, iteration: 108441
loss: 1.1291801929473877,grad_norm: 0.9999996166619943, iteration: 108442
loss: 1.0452958345413208,grad_norm: 0.9999996536511755, iteration: 108443
loss: 1.0877398252487183,grad_norm: 0.9999992043392165, iteration: 108444
loss: 1.004717230796814,grad_norm: 0.9999989543428002, iteration: 108445
loss: 1.0553187131881714,grad_norm: 0.999999093968558, iteration: 108446
loss: 1.002726435661316,grad_norm: 0.9789720228462393, iteration: 108447
loss: 1.0476819276809692,grad_norm: 0.8604617006370087, iteration: 108448
loss: 1.0003612041473389,grad_norm: 0.8674216237240254, iteration: 108449
loss: 1.0736430883407593,grad_norm: 0.9999998362097764, iteration: 108450
loss: 0.9775258898735046,grad_norm: 0.9269926324451654, iteration: 108451
loss: 1.2062461376190186,grad_norm: 0.9999996952571785, iteration: 108452
loss: 0.9647946953773499,grad_norm: 0.9999989823477984, iteration: 108453
loss: 1.0925894975662231,grad_norm: 0.9999996006544447, iteration: 108454
loss: 1.0340673923492432,grad_norm: 0.8035648966321027, iteration: 108455
loss: 1.0343133211135864,grad_norm: 0.9120974696935508, iteration: 108456
loss: 1.0316263437271118,grad_norm: 0.9999996348159255, iteration: 108457
loss: 0.9954966306686401,grad_norm: 0.9726140685808095, iteration: 108458
loss: 0.9823745489120483,grad_norm: 0.9062292738149541, iteration: 108459
loss: 0.9984335899353027,grad_norm: 0.8132431385726784, iteration: 108460
loss: 1.050866723060608,grad_norm: 0.9315581754775175, iteration: 108461
loss: 1.0236246585845947,grad_norm: 0.9999995790694477, iteration: 108462
loss: 1.3624879121780396,grad_norm: 1.000000090484132, iteration: 108463
loss: 1.0276962518692017,grad_norm: 0.9999998552505927, iteration: 108464
loss: 1.0077100992202759,grad_norm: 0.9999994622834751, iteration: 108465
loss: 1.1115871667861938,grad_norm: 1.0000000197222039, iteration: 108466
loss: 1.0400276184082031,grad_norm: 0.9999990359341583, iteration: 108467
loss: 1.0865079164505005,grad_norm: 0.9999992253296516, iteration: 108468
loss: 1.2399835586547852,grad_norm: 0.999999774256042, iteration: 108469
loss: 1.0390064716339111,grad_norm: 0.9999994976265804, iteration: 108470
loss: 1.0602353811264038,grad_norm: 0.9999991081583386, iteration: 108471
loss: 1.083928108215332,grad_norm: 0.9999990748130645, iteration: 108472
loss: 1.1785024404525757,grad_norm: 0.9999993479505201, iteration: 108473
loss: 1.0653612613677979,grad_norm: 0.9999994616974873, iteration: 108474
loss: 1.0686360597610474,grad_norm: 0.999999905298186, iteration: 108475
loss: 1.0233054161071777,grad_norm: 0.9999992080333115, iteration: 108476
loss: 1.0487803220748901,grad_norm: 0.9999991041881248, iteration: 108477
loss: 1.011202335357666,grad_norm: 0.7945784798096954, iteration: 108478
loss: 0.985424280166626,grad_norm: 0.9115077508873085, iteration: 108479
loss: 1.0296489000320435,grad_norm: 0.999999496444412, iteration: 108480
loss: 1.0167882442474365,grad_norm: 0.9999991814709159, iteration: 108481
loss: 1.079822063446045,grad_norm: 0.9999991198209827, iteration: 108482
loss: 1.066854476928711,grad_norm: 0.9999991886912998, iteration: 108483
loss: 1.0134644508361816,grad_norm: 0.9999995625454999, iteration: 108484
loss: 1.196089267730713,grad_norm: 0.9999992507354779, iteration: 108485
loss: 0.9765915870666504,grad_norm: 0.9999994135049171, iteration: 108486
loss: 1.006173014640808,grad_norm: 0.7078335886360198, iteration: 108487
loss: 1.0169180631637573,grad_norm: 0.9211612309320891, iteration: 108488
loss: 1.0185095071792603,grad_norm: 0.9999999324149387, iteration: 108489
loss: 1.0437361001968384,grad_norm: 0.9957333451563002, iteration: 108490
loss: 1.0376638174057007,grad_norm: 0.9656371035736061, iteration: 108491
loss: 1.0158156156539917,grad_norm: 0.9999991718714137, iteration: 108492
loss: 1.0332579612731934,grad_norm: 0.8719725274724879, iteration: 108493
loss: 1.1050935983657837,grad_norm: 0.9999990947345487, iteration: 108494
loss: 1.0345038175582886,grad_norm: 0.9999991048032476, iteration: 108495
loss: 1.0067561864852905,grad_norm: 0.9399399038283236, iteration: 108496
loss: 0.996638298034668,grad_norm: 0.9999991624707365, iteration: 108497
loss: 1.0210200548171997,grad_norm: 0.9041986427930174, iteration: 108498
loss: 1.087910771369934,grad_norm: 0.8995550005278213, iteration: 108499
loss: 1.023134708404541,grad_norm: 0.9178771455426253, iteration: 108500
loss: 0.9742873311042786,grad_norm: 0.9999991030341986, iteration: 108501
loss: 0.9965270161628723,grad_norm: 0.9330910723650053, iteration: 108502
loss: 1.1030266284942627,grad_norm: 0.9999997142151552, iteration: 108503
loss: 1.2040857076644897,grad_norm: 0.9999998193604818, iteration: 108504
loss: 1.0165060758590698,grad_norm: 0.8514094515913612, iteration: 108505
loss: 1.1300747394561768,grad_norm: 0.9999997001362315, iteration: 108506
loss: 1.0646147727966309,grad_norm: 0.9999991169002668, iteration: 108507
loss: 1.0625897645950317,grad_norm: 0.999999618308796, iteration: 108508
loss: 1.0014585256576538,grad_norm: 0.9902256126013672, iteration: 108509
loss: 1.018537163734436,grad_norm: 0.9396939973704415, iteration: 108510
loss: 1.1272720098495483,grad_norm: 0.9999994000325323, iteration: 108511
loss: 1.2849410772323608,grad_norm: 0.9999998259652593, iteration: 108512
loss: 1.0484329462051392,grad_norm: 0.999999804929926, iteration: 108513
loss: 1.0688304901123047,grad_norm: 0.9999997532418154, iteration: 108514
loss: 1.1850957870483398,grad_norm: 0.9999996801475503, iteration: 108515
loss: 1.0545215606689453,grad_norm: 0.9999991727457337, iteration: 108516
loss: 1.1393003463745117,grad_norm: 0.9999994778814324, iteration: 108517
loss: 1.1655280590057373,grad_norm: 0.999999263381624, iteration: 108518
loss: 1.0240392684936523,grad_norm: 0.9999992544318835, iteration: 108519
loss: 1.042026400566101,grad_norm: 0.9999991226238665, iteration: 108520
loss: 1.0680831670761108,grad_norm: 0.9999998936720411, iteration: 108521
loss: 1.183933973312378,grad_norm: 0.9999995834811515, iteration: 108522
loss: 1.0590784549713135,grad_norm: 0.9560598599166544, iteration: 108523
loss: 1.0014442205429077,grad_norm: 0.9999990565383868, iteration: 108524
loss: 1.0068469047546387,grad_norm: 0.9999992606739633, iteration: 108525
loss: 1.0293712615966797,grad_norm: 0.9999993733593731, iteration: 108526
loss: 0.999269425868988,grad_norm: 0.9999998875081599, iteration: 108527
loss: 1.109045147895813,grad_norm: 1.0000000298596683, iteration: 108528
loss: 1.0669300556182861,grad_norm: 0.9767569915236327, iteration: 108529
loss: 1.0604710578918457,grad_norm: 0.9999998234472914, iteration: 108530
loss: 1.1399511098861694,grad_norm: 0.9999995805280202, iteration: 108531
loss: 1.0052841901779175,grad_norm: 0.857845705103261, iteration: 108532
loss: 0.9817848801612854,grad_norm: 0.9999991338508509, iteration: 108533
loss: 1.0982989072799683,grad_norm: 0.9999997890623828, iteration: 108534
loss: 0.9758982062339783,grad_norm: 0.9999991397284995, iteration: 108535
loss: 1.0522150993347168,grad_norm: 0.9999999047242802, iteration: 108536
loss: 1.0129518508911133,grad_norm: 0.9999990281282576, iteration: 108537
loss: 0.9647621512413025,grad_norm: 0.9999989433321389, iteration: 108538
loss: 1.0579073429107666,grad_norm: 0.9999998986875155, iteration: 108539
loss: 0.9921810626983643,grad_norm: 0.9999994122700293, iteration: 108540
loss: 0.9957172274589539,grad_norm: 0.9999993562618013, iteration: 108541
loss: 1.0368475914001465,grad_norm: 0.9373810180739298, iteration: 108542
loss: 1.0578978061676025,grad_norm: 0.999999272746375, iteration: 108543
loss: 1.1521601676940918,grad_norm: 0.9999996131681663, iteration: 108544
loss: 0.9923843741416931,grad_norm: 0.7353715072599055, iteration: 108545
loss: 1.1163665056228638,grad_norm: 0.9976390506789494, iteration: 108546
loss: 0.9811190366744995,grad_norm: 0.9999993301469877, iteration: 108547
loss: 1.015663743019104,grad_norm: 0.9999990414653378, iteration: 108548
loss: 1.0185792446136475,grad_norm: 0.9999994142073665, iteration: 108549
loss: 1.0556011199951172,grad_norm: 0.9999998327978459, iteration: 108550
loss: 1.0340608358383179,grad_norm: 0.9999995079949133, iteration: 108551
loss: 1.005339503288269,grad_norm: 0.9999990653693448, iteration: 108552
loss: 1.0496411323547363,grad_norm: 0.9162925262772985, iteration: 108553
loss: 1.0804435014724731,grad_norm: 0.9999999379260772, iteration: 108554
loss: 0.9925860166549683,grad_norm: 0.9999989858644817, iteration: 108555
loss: 1.0008443593978882,grad_norm: 0.9999994663130276, iteration: 108556
loss: 1.095774531364441,grad_norm: 1.0000000615530924, iteration: 108557
loss: 1.097561001777649,grad_norm: 0.9999993714782112, iteration: 108558
loss: 1.1014715433120728,grad_norm: 0.9999992673255081, iteration: 108559
loss: 1.0712658166885376,grad_norm: 0.9999998542886419, iteration: 108560
loss: 1.0971570014953613,grad_norm: 0.9999992064003076, iteration: 108561
loss: 1.0390456914901733,grad_norm: 0.9999997663456569, iteration: 108562
loss: 1.0310461521148682,grad_norm: 0.999999403750027, iteration: 108563
loss: 1.0150469541549683,grad_norm: 0.9999992512240081, iteration: 108564
loss: 1.1658737659454346,grad_norm: 0.9999994591437025, iteration: 108565
loss: 0.9745696783065796,grad_norm: 0.7759999027331934, iteration: 108566
loss: 1.035867691040039,grad_norm: 0.9999999098830029, iteration: 108567
loss: 1.1352334022521973,grad_norm: 0.9999994698233504, iteration: 108568
loss: 1.0139760971069336,grad_norm: 0.9999991886012656, iteration: 108569
loss: 1.0969955921173096,grad_norm: 0.9999998431658117, iteration: 108570
loss: 0.9898421168327332,grad_norm: 0.9999992324554017, iteration: 108571
loss: 1.051823377609253,grad_norm: 0.9999996500944849, iteration: 108572
loss: 1.01031494140625,grad_norm: 0.9999989919296884, iteration: 108573
loss: 1.0753469467163086,grad_norm: 0.9999997485350628, iteration: 108574
loss: 1.0763975381851196,grad_norm: 0.9999993912183515, iteration: 108575
loss: 1.0694243907928467,grad_norm: 0.9999997515415772, iteration: 108576
loss: 1.005744457244873,grad_norm: 0.7671907438809816, iteration: 108577
loss: 1.059903860092163,grad_norm: 0.999999216654192, iteration: 108578
loss: 1.1043622493743896,grad_norm: 0.9999993391534144, iteration: 108579
loss: 0.9766298532485962,grad_norm: 0.9999993811045954, iteration: 108580
loss: 1.0393602848052979,grad_norm: 0.9999992671441209, iteration: 108581
loss: 1.0230088233947754,grad_norm: 0.8478693201710436, iteration: 108582
loss: 1.1260827779769897,grad_norm: 0.9999998982103042, iteration: 108583
loss: 1.0253809690475464,grad_norm: 0.9999991688306568, iteration: 108584
loss: 1.083149790763855,grad_norm: 0.9999994154127947, iteration: 108585
loss: 0.9624886512756348,grad_norm: 0.9537431892479371, iteration: 108586
loss: 1.0090030431747437,grad_norm: 0.9299183843430587, iteration: 108587
loss: 1.0524566173553467,grad_norm: 0.9999998692530316, iteration: 108588
loss: 1.005143165588379,grad_norm: 0.9999991267451337, iteration: 108589
loss: 1.0390938520431519,grad_norm: 0.9828618954254029, iteration: 108590
loss: 1.056151032447815,grad_norm: 0.9794784599574282, iteration: 108591
loss: 1.0456209182739258,grad_norm: 0.9999991077255042, iteration: 108592
loss: 0.949846625328064,grad_norm: 0.9999992731400036, iteration: 108593
loss: 1.0055593252182007,grad_norm: 0.9115366103735081, iteration: 108594
loss: 1.000609278678894,grad_norm: 0.9999997054765637, iteration: 108595
loss: 1.0049290657043457,grad_norm: 0.9999991457329211, iteration: 108596
loss: 0.989326000213623,grad_norm: 0.8377802872358298, iteration: 108597
loss: 1.0120738744735718,grad_norm: 0.8091985037555433, iteration: 108598
loss: 1.0297527313232422,grad_norm: 0.9966209911229869, iteration: 108599
loss: 0.9936792850494385,grad_norm: 0.9999992150582219, iteration: 108600
loss: 1.0652055740356445,grad_norm: 0.9999994373802269, iteration: 108601
loss: 1.0226374864578247,grad_norm: 0.9447122696070689, iteration: 108602
loss: 1.0542818307876587,grad_norm: 0.999999556394, iteration: 108603
loss: 1.0764662027359009,grad_norm: 0.9999991914308763, iteration: 108604
loss: 1.0826274156570435,grad_norm: 0.9999992539446102, iteration: 108605
loss: 1.1099209785461426,grad_norm: 0.9999997528823877, iteration: 108606
loss: 1.0656205415725708,grad_norm: 0.715793595171187, iteration: 108607
loss: 1.0904127359390259,grad_norm: 0.9999994215808486, iteration: 108608
loss: 1.0180128812789917,grad_norm: 0.9999999470624725, iteration: 108609
loss: 1.019524335861206,grad_norm: 0.9999996281034325, iteration: 108610
loss: 1.0458614826202393,grad_norm: 0.9999992376116412, iteration: 108611
loss: 1.0274150371551514,grad_norm: 0.9123647506387411, iteration: 108612
loss: 1.0244534015655518,grad_norm: 0.9999991236027529, iteration: 108613
loss: 1.0734896659851074,grad_norm: 0.999999847643161, iteration: 108614
loss: 1.125796914100647,grad_norm: 0.9999993115143997, iteration: 108615
loss: 1.0331674814224243,grad_norm: 0.9999994269358091, iteration: 108616
loss: 1.1653738021850586,grad_norm: 0.9999999098016809, iteration: 108617
loss: 1.10304856300354,grad_norm: 0.9999993904092968, iteration: 108618
loss: 1.0294764041900635,grad_norm: 0.9999990541780432, iteration: 108619
loss: 1.110907793045044,grad_norm: 0.9999998104682887, iteration: 108620
loss: 1.0323596000671387,grad_norm: 0.9999994905553299, iteration: 108621
loss: 1.1721618175506592,grad_norm: 0.9999996547080535, iteration: 108622
loss: 1.0530595779418945,grad_norm: 0.9999995149594727, iteration: 108623
loss: 0.9889310598373413,grad_norm: 0.9840240195389333, iteration: 108624
loss: 1.0234205722808838,grad_norm: 0.7723216488182302, iteration: 108625
loss: 1.008589267730713,grad_norm: 0.9999992059635917, iteration: 108626
loss: 1.0176565647125244,grad_norm: 0.7625470828368626, iteration: 108627
loss: 0.9912466406822205,grad_norm: 0.9999991383873214, iteration: 108628
loss: 1.0036191940307617,grad_norm: 0.9999990502951647, iteration: 108629
loss: 0.9969503879547119,grad_norm: 0.8614177146844585, iteration: 108630
loss: 1.0492641925811768,grad_norm: 0.9999993184967856, iteration: 108631
loss: 1.0422255992889404,grad_norm: 0.9999995537311033, iteration: 108632
loss: 1.0269169807434082,grad_norm: 0.9999995051633396, iteration: 108633
loss: 1.0031707286834717,grad_norm: 0.9999991872813343, iteration: 108634
loss: 1.0575282573699951,grad_norm: 0.9999996526322391, iteration: 108635
loss: 0.9998883605003357,grad_norm: 0.9967486376803624, iteration: 108636
loss: 1.0171010494232178,grad_norm: 0.9999990984621624, iteration: 108637
loss: 1.149786353111267,grad_norm: 0.9999996957510707, iteration: 108638
loss: 1.096592903137207,grad_norm: 0.9999991142319867, iteration: 108639
loss: 1.0469398498535156,grad_norm: 0.9999996954527806, iteration: 108640
loss: 1.0421067476272583,grad_norm: 0.9999993745101865, iteration: 108641
loss: 1.1440194845199585,grad_norm: 0.9999997032618492, iteration: 108642
loss: 0.9987799525260925,grad_norm: 0.999999042126488, iteration: 108643
loss: 0.978375256061554,grad_norm: 0.953777466004564, iteration: 108644
loss: 1.0114046335220337,grad_norm: 0.9999991795803875, iteration: 108645
loss: 0.9955682158470154,grad_norm: 0.9848584228572974, iteration: 108646
loss: 0.9703967571258545,grad_norm: 0.9999991314820428, iteration: 108647
loss: 1.0004667043685913,grad_norm: 0.9999990753399197, iteration: 108648
loss: 1.0382590293884277,grad_norm: 0.9999990661288124, iteration: 108649
loss: 1.0014841556549072,grad_norm: 0.9999990933733521, iteration: 108650
loss: 1.0204856395721436,grad_norm: 0.999999264845757, iteration: 108651
loss: 1.0171138048171997,grad_norm: 0.9999993433533695, iteration: 108652
loss: 1.0330917835235596,grad_norm: 0.9999993197415298, iteration: 108653
loss: 0.9799723029136658,grad_norm: 0.8439831623892868, iteration: 108654
loss: 1.0315847396850586,grad_norm: 0.9999993797944222, iteration: 108655
loss: 0.9966152310371399,grad_norm: 0.9999991183887228, iteration: 108656
loss: 0.9614425897598267,grad_norm: 0.8889322959493798, iteration: 108657
loss: 1.0172805786132812,grad_norm: 0.8860071360719253, iteration: 108658
loss: 1.0021541118621826,grad_norm: 0.9999993168012743, iteration: 108659
loss: 1.093799114227295,grad_norm: 0.9999991216294551, iteration: 108660
loss: 1.0140804052352905,grad_norm: 0.817217158616438, iteration: 108661
loss: 0.9855882525444031,grad_norm: 0.8668205513253363, iteration: 108662
loss: 1.0295445919036865,grad_norm: 0.838563260280768, iteration: 108663
loss: 1.0365279912948608,grad_norm: 0.8840234022378116, iteration: 108664
loss: 0.9988235235214233,grad_norm: 0.999999305550775, iteration: 108665
loss: 0.9774019122123718,grad_norm: 0.9191656693212208, iteration: 108666
loss: 0.9795036911964417,grad_norm: 0.9047765765992851, iteration: 108667
loss: 1.007857322692871,grad_norm: 0.9999989978176969, iteration: 108668
loss: 1.0163335800170898,grad_norm: 0.9999991992961367, iteration: 108669
loss: 0.9989904761314392,grad_norm: 0.9242363832364483, iteration: 108670
loss: 1.0744714736938477,grad_norm: 0.9999994537933712, iteration: 108671
loss: 1.2163264751434326,grad_norm: 0.9999998858362185, iteration: 108672
loss: 1.0381745100021362,grad_norm: 0.9999995281432588, iteration: 108673
loss: 1.0300227403640747,grad_norm: 0.9738131919063633, iteration: 108674
loss: 1.1643829345703125,grad_norm: 0.9999997400604248, iteration: 108675
loss: 1.0430419445037842,grad_norm: 0.9999992046973311, iteration: 108676
loss: 1.0393187999725342,grad_norm: 0.8970412486791988, iteration: 108677
loss: 1.0338085889816284,grad_norm: 0.8263091123605857, iteration: 108678
loss: 1.0039088726043701,grad_norm: 0.9999991874339562, iteration: 108679
loss: 1.0863566398620605,grad_norm: 0.999999583952903, iteration: 108680
loss: 0.9953524470329285,grad_norm: 0.9358152909163078, iteration: 108681
loss: 1.0651580095291138,grad_norm: 0.999999379224761, iteration: 108682
loss: 1.0538721084594727,grad_norm: 0.9999993704174963, iteration: 108683
loss: 1.02180814743042,grad_norm: 0.9478094645344693, iteration: 108684
loss: 1.0484254360198975,grad_norm: 0.9999992438802324, iteration: 108685
loss: 1.1104872226715088,grad_norm: 0.9999998430380073, iteration: 108686
loss: 0.9908881187438965,grad_norm: 0.9999991449716535, iteration: 108687
loss: 1.0115560293197632,grad_norm: 0.8454345635495613, iteration: 108688
loss: 1.03921377658844,grad_norm: 0.9999991313707464, iteration: 108689
loss: 1.0113683938980103,grad_norm: 0.9897291325519476, iteration: 108690
loss: 1.0044763088226318,grad_norm: 0.9999991672691476, iteration: 108691
loss: 1.0314100980758667,grad_norm: 0.9999993345044637, iteration: 108692
loss: 1.0098036527633667,grad_norm: 0.9999993456571433, iteration: 108693
loss: 1.0388988256454468,grad_norm: 0.9415641587293366, iteration: 108694
loss: 1.0630226135253906,grad_norm: 0.9999997196224583, iteration: 108695
loss: 1.0260332822799683,grad_norm: 0.9999995224242185, iteration: 108696
loss: 1.006630778312683,grad_norm: 0.9278116004356728, iteration: 108697
loss: 1.022426962852478,grad_norm: 0.9999997106350265, iteration: 108698
loss: 1.1611305475234985,grad_norm: 1.000000014327522, iteration: 108699
loss: 1.1050519943237305,grad_norm: 0.9999998461951458, iteration: 108700
loss: 1.10584557056427,grad_norm: 0.9999998287053975, iteration: 108701
loss: 1.0674493312835693,grad_norm: 0.9999997134633213, iteration: 108702
loss: 1.0360424518585205,grad_norm: 0.999999287843663, iteration: 108703
loss: 1.2475336790084839,grad_norm: 0.9999998900826911, iteration: 108704
loss: 1.0020774602890015,grad_norm: 0.9999990294640358, iteration: 108705
loss: 0.9627946615219116,grad_norm: 0.9999991425495722, iteration: 108706
loss: 1.067700743675232,grad_norm: 0.9999997287530421, iteration: 108707
loss: 1.1221590042114258,grad_norm: 0.9999993900855362, iteration: 108708
loss: 1.0509734153747559,grad_norm: 0.9999994108371355, iteration: 108709
loss: 1.0464544296264648,grad_norm: 0.9999994414039536, iteration: 108710
loss: 1.058212161064148,grad_norm: 0.9999992116494926, iteration: 108711
loss: 1.0667511224746704,grad_norm: 0.9999994889655165, iteration: 108712
loss: 1.0174200534820557,grad_norm: 0.9999995123574201, iteration: 108713
loss: 1.0125732421875,grad_norm: 0.8018804744642595, iteration: 108714
loss: 1.091968297958374,grad_norm: 0.9999994235873014, iteration: 108715
loss: 1.0447571277618408,grad_norm: 0.8451185392716268, iteration: 108716
loss: 1.038515329360962,grad_norm: 0.9999992505254665, iteration: 108717
loss: 1.0489529371261597,grad_norm: 0.999999946904325, iteration: 108718
loss: 1.016953706741333,grad_norm: 0.9999999092450026, iteration: 108719
loss: 0.9795303344726562,grad_norm: 0.8563739961233172, iteration: 108720
loss: 1.0149272680282593,grad_norm: 0.9365190707640276, iteration: 108721
loss: 1.1328761577606201,grad_norm: 0.999999708804842, iteration: 108722
loss: 1.0261589288711548,grad_norm: 0.9999994683529828, iteration: 108723
loss: 1.0065146684646606,grad_norm: 0.9333383474310233, iteration: 108724
loss: 1.0125113725662231,grad_norm: 0.8939772656490933, iteration: 108725
loss: 1.0387877225875854,grad_norm: 0.9999998254462286, iteration: 108726
loss: 1.1076890230178833,grad_norm: 0.9999993581695128, iteration: 108727
loss: 1.0022408962249756,grad_norm: 0.896809003860799, iteration: 108728
loss: 0.9752665162086487,grad_norm: 0.9393028496695964, iteration: 108729
loss: 1.106335997581482,grad_norm: 0.9999998419906514, iteration: 108730
loss: 1.0050654411315918,grad_norm: 0.816861288043351, iteration: 108731
loss: 1.00111985206604,grad_norm: 0.8585770943816798, iteration: 108732
loss: 0.9888299703598022,grad_norm: 0.9999991463875991, iteration: 108733
loss: 1.0161815881729126,grad_norm: 0.9999992157157093, iteration: 108734
loss: 1.0267059803009033,grad_norm: 0.9999993163092991, iteration: 108735
loss: 1.1111925840377808,grad_norm: 0.9999996385182035, iteration: 108736
loss: 1.0011705160140991,grad_norm: 0.9568150549945281, iteration: 108737
loss: 1.0469969511032104,grad_norm: 0.9616511060921729, iteration: 108738
loss: 0.9765600562095642,grad_norm: 0.8883947979785088, iteration: 108739
loss: 1.074417233467102,grad_norm: 0.9999991281406527, iteration: 108740
loss: 1.0674077272415161,grad_norm: 0.9999998817749192, iteration: 108741
loss: 1.033409595489502,grad_norm: 0.9999992122348103, iteration: 108742
loss: 1.1005889177322388,grad_norm: 0.9999995751550714, iteration: 108743
loss: 1.0313315391540527,grad_norm: 0.9999989918738779, iteration: 108744
loss: 0.9423920512199402,grad_norm: 0.9854340724479498, iteration: 108745
loss: 0.9752141237258911,grad_norm: 0.9999990901903252, iteration: 108746
loss: 1.0098472833633423,grad_norm: 0.793421551661362, iteration: 108747
loss: 1.047977089881897,grad_norm: 0.9862921835253523, iteration: 108748
loss: 0.9529798030853271,grad_norm: 0.8031646094866556, iteration: 108749
loss: 1.0766255855560303,grad_norm: 0.9999997460437964, iteration: 108750
loss: 1.009883999824524,grad_norm: 0.9999990891336066, iteration: 108751
loss: 1.0355592966079712,grad_norm: 0.8610891573106979, iteration: 108752
loss: 1.330361008644104,grad_norm: 0.9999997384358881, iteration: 108753
loss: 0.9818028807640076,grad_norm: 0.8694353740215466, iteration: 108754
loss: 1.0176693201065063,grad_norm: 0.9999989941137037, iteration: 108755
loss: 1.0856668949127197,grad_norm: 0.9999992156128988, iteration: 108756
loss: 1.092054843902588,grad_norm: 1.0000000244342002, iteration: 108757
loss: 1.0381224155426025,grad_norm: 0.9999990845996837, iteration: 108758
loss: 1.043380856513977,grad_norm: 0.9999997590325814, iteration: 108759
loss: 0.9889206290245056,grad_norm: 0.9999999808780046, iteration: 108760
loss: 1.012239933013916,grad_norm: 0.9999991423674436, iteration: 108761
loss: 0.9855971336364746,grad_norm: 0.9999995772837108, iteration: 108762
loss: 1.004872441291809,grad_norm: 0.9960390028388929, iteration: 108763
loss: 1.1030032634735107,grad_norm: 0.9999998815524613, iteration: 108764
loss: 1.0569207668304443,grad_norm: 0.999999165566094, iteration: 108765
loss: 0.971921980381012,grad_norm: 0.9902048638810956, iteration: 108766
loss: 1.0191975831985474,grad_norm: 0.7925801276375891, iteration: 108767
loss: 1.1417540311813354,grad_norm: 0.9999998725495091, iteration: 108768
loss: 1.0109996795654297,grad_norm: 0.9999991508776777, iteration: 108769
loss: 0.9978976249694824,grad_norm: 0.9860925398811174, iteration: 108770
loss: 1.0070708990097046,grad_norm: 0.9470522164116174, iteration: 108771
loss: 0.9953657984733582,grad_norm: 0.9999991579369298, iteration: 108772
loss: 1.0692031383514404,grad_norm: 0.9999992104730936, iteration: 108773
loss: 1.053375244140625,grad_norm: 0.9999999011690065, iteration: 108774
loss: 1.11581552028656,grad_norm: 0.9999995301436198, iteration: 108775
loss: 1.0341864824295044,grad_norm: 0.9999998316428393, iteration: 108776
loss: 0.9976182579994202,grad_norm: 0.9999990577587632, iteration: 108777
loss: 0.990013837814331,grad_norm: 0.9189997956971991, iteration: 108778
loss: 1.0335144996643066,grad_norm: 0.8942938185345138, iteration: 108779
loss: 0.9763055443763733,grad_norm: 0.9733253984540561, iteration: 108780
loss: 1.0427833795547485,grad_norm: 0.9999992123077301, iteration: 108781
loss: 0.9737222194671631,grad_norm: 0.937447502561938, iteration: 108782
loss: 1.03048574924469,grad_norm: 0.999999879749849, iteration: 108783
loss: 0.9948176145553589,grad_norm: 0.8209181001173769, iteration: 108784
loss: 1.0799658298492432,grad_norm: 0.9999999620134752, iteration: 108785
loss: 1.0768224000930786,grad_norm: 0.9999998688948192, iteration: 108786
loss: 1.0668915510177612,grad_norm: 0.9999994315075371, iteration: 108787
loss: 1.0215730667114258,grad_norm: 0.8820003064645595, iteration: 108788
loss: 1.0283050537109375,grad_norm: 0.9999994125165963, iteration: 108789
loss: 0.9755433201789856,grad_norm: 0.8662391144640968, iteration: 108790
loss: 0.9699512124061584,grad_norm: 0.9618348084716456, iteration: 108791
loss: 1.0463378429412842,grad_norm: 0.8711298925111756, iteration: 108792
loss: 1.0205572843551636,grad_norm: 0.9999874007420388, iteration: 108793
loss: 1.0863956212997437,grad_norm: 0.9999991254555314, iteration: 108794
loss: 1.0324088335037231,grad_norm: 0.9693148813388708, iteration: 108795
loss: 1.0609607696533203,grad_norm: 0.9999991725475231, iteration: 108796
loss: 0.9568976759910583,grad_norm: 0.7793099938160787, iteration: 108797
loss: 0.9976155757904053,grad_norm: 0.8431055947421824, iteration: 108798
loss: 1.0264161825180054,grad_norm: 0.9999996410878441, iteration: 108799
loss: 0.980962872505188,grad_norm: 0.9999994541452316, iteration: 108800
loss: 1.021715521812439,grad_norm: 0.8482758517869882, iteration: 108801
loss: 1.0109013319015503,grad_norm: 0.9999996235688396, iteration: 108802
loss: 1.0209664106369019,grad_norm: 0.9554806557628865, iteration: 108803
loss: 1.0294766426086426,grad_norm: 0.9999995050295638, iteration: 108804
loss: 1.1014244556427002,grad_norm: 0.8726509835632766, iteration: 108805
loss: 0.983987033367157,grad_norm: 0.8869926630467101, iteration: 108806
loss: 1.0391844511032104,grad_norm: 0.9958902420317128, iteration: 108807
loss: 0.999841034412384,grad_norm: 0.9999992848523761, iteration: 108808
loss: 1.054652452468872,grad_norm: 0.9999998701593922, iteration: 108809
loss: 0.9771436452865601,grad_norm: 0.8744407818492097, iteration: 108810
loss: 1.0389714241027832,grad_norm: 0.999999607863899, iteration: 108811
loss: 0.9582544565200806,grad_norm: 0.964435459324993, iteration: 108812
loss: 1.0483921766281128,grad_norm: 0.9999990036051521, iteration: 108813
loss: 1.0329909324645996,grad_norm: 0.9531878617979188, iteration: 108814
loss: 0.9860970377922058,grad_norm: 0.9712414100647789, iteration: 108815
loss: 1.0060418844223022,grad_norm: 0.8774922784980265, iteration: 108816
loss: 1.0416783094406128,grad_norm: 0.9999996332592573, iteration: 108817
loss: 1.0186796188354492,grad_norm: 0.8902108633116191, iteration: 108818
loss: 1.0465162992477417,grad_norm: 0.9999992089134838, iteration: 108819
loss: 1.042150855064392,grad_norm: 0.8775964445588833, iteration: 108820
loss: 1.0045279264450073,grad_norm: 0.999999715607607, iteration: 108821
loss: 1.0185234546661377,grad_norm: 0.9999993058556976, iteration: 108822
loss: 1.137199878692627,grad_norm: 0.9999999599956038, iteration: 108823
loss: 1.0494303703308105,grad_norm: 0.9999999902945561, iteration: 108824
loss: 1.0403989553451538,grad_norm: 0.9511804454998414, iteration: 108825
loss: 1.017738938331604,grad_norm: 0.7843975494990094, iteration: 108826
loss: 0.9798173904418945,grad_norm: 0.9999989800475907, iteration: 108827
loss: 1.0092921257019043,grad_norm: 0.9999996425629737, iteration: 108828
loss: 1.0096194744110107,grad_norm: 0.8711167429280755, iteration: 108829
loss: 1.0056085586547852,grad_norm: 0.9392382277966835, iteration: 108830
loss: 0.9908099174499512,grad_norm: 0.9999990751367555, iteration: 108831
loss: 1.0181396007537842,grad_norm: 0.9999993020446462, iteration: 108832
loss: 1.045212984085083,grad_norm: 0.7871577291556401, iteration: 108833
loss: 1.0235488414764404,grad_norm: 0.9999990291659873, iteration: 108834
loss: 1.016787052154541,grad_norm: 0.9999996906806475, iteration: 108835
loss: 1.049180507659912,grad_norm: 0.9999997166648865, iteration: 108836
loss: 1.0715080499649048,grad_norm: 0.8431624960300749, iteration: 108837
loss: 1.0319278240203857,grad_norm: 0.9999994655394195, iteration: 108838
loss: 1.0273653268814087,grad_norm: 0.976956601524404, iteration: 108839
loss: 1.0062934160232544,grad_norm: 0.8735060136029463, iteration: 108840
loss: 1.0370584726333618,grad_norm: 0.9999993103210342, iteration: 108841
loss: 1.0086705684661865,grad_norm: 0.9999991801720672, iteration: 108842
loss: 1.0077550411224365,grad_norm: 0.8282312412349307, iteration: 108843
loss: 1.0086973905563354,grad_norm: 0.9189279883673936, iteration: 108844
loss: 0.9717097282409668,grad_norm: 0.9999991146884724, iteration: 108845
loss: 1.01445472240448,grad_norm: 0.9627664304468521, iteration: 108846
loss: 1.086429476737976,grad_norm: 0.999999775234202, iteration: 108847
loss: 1.024153709411621,grad_norm: 0.9999995290146838, iteration: 108848
loss: 1.058103084564209,grad_norm: 0.9999997441985627, iteration: 108849
loss: 0.9640717506408691,grad_norm: 0.9999991928837755, iteration: 108850
loss: 1.00919771194458,grad_norm: 0.9999993412272147, iteration: 108851
loss: 1.0989855527877808,grad_norm: 0.999999358925053, iteration: 108852
loss: 0.9755591154098511,grad_norm: 0.9746474280325382, iteration: 108853
loss: 1.0561374425888062,grad_norm: 0.9999999121074529, iteration: 108854
loss: 1.2669070959091187,grad_norm: 0.9999995535044357, iteration: 108855
loss: 1.0516437292099,grad_norm: 0.9999992604586434, iteration: 108856
loss: 0.9632850885391235,grad_norm: 0.9428169811786052, iteration: 108857
loss: 1.050940990447998,grad_norm: 0.9999998892367379, iteration: 108858
loss: 0.9965720772743225,grad_norm: 0.9342839144118226, iteration: 108859
loss: 0.971063494682312,grad_norm: 0.9127436289225314, iteration: 108860
loss: 1.0181865692138672,grad_norm: 0.8337461045751036, iteration: 108861
loss: 1.046769618988037,grad_norm: 0.9999997637975984, iteration: 108862
loss: 0.997236967086792,grad_norm: 0.9999991914493976, iteration: 108863
loss: 1.1021567583084106,grad_norm: 0.9999999033164225, iteration: 108864
loss: 1.0326478481292725,grad_norm: 0.656181352976316, iteration: 108865
loss: 0.9997014999389648,grad_norm: 0.8003118124443831, iteration: 108866
loss: 1.0002623796463013,grad_norm: 0.9999990138604025, iteration: 108867
loss: 1.0050374269485474,grad_norm: 0.9378206653122164, iteration: 108868
loss: 1.0142306089401245,grad_norm: 0.999999200002642, iteration: 108869
loss: 1.155142903327942,grad_norm: 0.9999996053867382, iteration: 108870
loss: 0.9586590528488159,grad_norm: 0.9999995694570042, iteration: 108871
loss: 1.1249357461929321,grad_norm: 0.9999997137979594, iteration: 108872
loss: 1.1032692193984985,grad_norm: 0.9999997631191914, iteration: 108873
loss: 1.0457818508148193,grad_norm: 0.9999997999967859, iteration: 108874
loss: 0.985148012638092,grad_norm: 0.9325198121913999, iteration: 108875
loss: 1.0328855514526367,grad_norm: 0.9999995085965824, iteration: 108876
loss: 1.0912269353866577,grad_norm: 0.9999997809722042, iteration: 108877
loss: 1.103737473487854,grad_norm: 0.9999997902607862, iteration: 108878
loss: 1.0355417728424072,grad_norm: 0.9491473351226396, iteration: 108879
loss: 0.9879428744316101,grad_norm: 0.9311395942953902, iteration: 108880
loss: 1.0467562675476074,grad_norm: 0.9999992350750971, iteration: 108881
loss: 1.0612324476242065,grad_norm: 0.9999994263371659, iteration: 108882
loss: 1.0927996635437012,grad_norm: 0.9999993112040969, iteration: 108883
loss: 0.9974578619003296,grad_norm: 0.9191150503028406, iteration: 108884
loss: 1.0134714841842651,grad_norm: 0.9999993106357594, iteration: 108885
loss: 1.0668835639953613,grad_norm: 0.999999125140393, iteration: 108886
loss: 1.0230811834335327,grad_norm: 0.962477543033996, iteration: 108887
loss: 1.1068648099899292,grad_norm: 0.9999996544209734, iteration: 108888
loss: 0.9937845468521118,grad_norm: 0.8327089987614966, iteration: 108889
loss: 1.1273603439331055,grad_norm: 0.9999994264317117, iteration: 108890
loss: 1.0662117004394531,grad_norm: 0.9999997408644915, iteration: 108891
loss: 0.9774612188339233,grad_norm: 0.9999996174361965, iteration: 108892
loss: 1.0304287672042847,grad_norm: 0.9999998138759383, iteration: 108893
loss: 1.0935146808624268,grad_norm: 0.9999997433930298, iteration: 108894
loss: 1.095472812652588,grad_norm: 0.9999995381538473, iteration: 108895
loss: 1.012787103652954,grad_norm: 0.8325796319137294, iteration: 108896
loss: 0.9631726145744324,grad_norm: 0.9999994572801242, iteration: 108897
loss: 1.0978453159332275,grad_norm: 0.999999913546232, iteration: 108898
loss: 1.1215121746063232,grad_norm: 0.9999996456305732, iteration: 108899
loss: 1.0570634603500366,grad_norm: 0.9999995733170147, iteration: 108900
loss: 1.1497324705123901,grad_norm: 0.9999998153638799, iteration: 108901
loss: 0.9912378787994385,grad_norm: 0.9999991751185833, iteration: 108902
loss: 1.0593425035476685,grad_norm: 0.9999998013446529, iteration: 108903
loss: 1.0648585557937622,grad_norm: 0.9999999323038605, iteration: 108904
loss: 1.0131062269210815,grad_norm: 0.9984694871547019, iteration: 108905
loss: 1.1786763668060303,grad_norm: 0.9999996461933564, iteration: 108906
loss: 1.025795817375183,grad_norm: 0.9999990787326067, iteration: 108907
loss: 1.0208150148391724,grad_norm: 0.9999993259168766, iteration: 108908
loss: 0.9921392202377319,grad_norm: 0.9999991576771364, iteration: 108909
loss: 1.0486409664154053,grad_norm: 0.999999921841245, iteration: 108910
loss: 0.988862931728363,grad_norm: 0.8394375915843807, iteration: 108911
loss: 1.095046877861023,grad_norm: 0.9043490178659144, iteration: 108912
loss: 1.2097481489181519,grad_norm: 0.9999997422571317, iteration: 108913
loss: 1.0142366886138916,grad_norm: 0.9999990271937416, iteration: 108914
loss: 1.0785161256790161,grad_norm: 1.0000000174976822, iteration: 108915
loss: 1.033235788345337,grad_norm: 0.9999991054253308, iteration: 108916
loss: 1.0207643508911133,grad_norm: 0.9999993474630322, iteration: 108917
loss: 0.9821158051490784,grad_norm: 0.9256706062874424, iteration: 108918
loss: 1.0317167043685913,grad_norm: 0.9999991276570842, iteration: 108919
loss: 1.0219277143478394,grad_norm: 0.945401559499812, iteration: 108920
loss: 1.0461963415145874,grad_norm: 0.9999994641820484, iteration: 108921
loss: 1.0160918235778809,grad_norm: 0.9196661558767272, iteration: 108922
loss: 1.1548161506652832,grad_norm: 0.999999679995968, iteration: 108923
loss: 0.9998347163200378,grad_norm: 0.8807723245809729, iteration: 108924
loss: 1.1067544221878052,grad_norm: 0.9999996089482963, iteration: 108925
loss: 1.0007750988006592,grad_norm: 0.8615639458265324, iteration: 108926
loss: 1.127112627029419,grad_norm: 0.9999999846772284, iteration: 108927
loss: 1.0717666149139404,grad_norm: 1.0000000061043173, iteration: 108928
loss: 1.1703670024871826,grad_norm: 0.999999603597804, iteration: 108929
loss: 1.129148244857788,grad_norm: 0.9999992622893409, iteration: 108930
loss: 1.0332145690917969,grad_norm: 0.9999992437163256, iteration: 108931
loss: 1.1842961311340332,grad_norm: 0.9999997762905504, iteration: 108932
loss: 1.0453870296478271,grad_norm: 0.999999864289223, iteration: 108933
loss: 0.9952024221420288,grad_norm: 0.8344050310020528, iteration: 108934
loss: 1.1244518756866455,grad_norm: 0.999999971910286, iteration: 108935
loss: 1.0596041679382324,grad_norm: 0.9999994912239215, iteration: 108936
loss: 1.0612447261810303,grad_norm: 0.9999999618898361, iteration: 108937
loss: 1.1888065338134766,grad_norm: 0.9999990928136798, iteration: 108938
loss: 0.9631645679473877,grad_norm: 0.9999991076267765, iteration: 108939
loss: 1.170651912689209,grad_norm: 0.9999991390636188, iteration: 108940
loss: 1.0815449953079224,grad_norm: 0.9999993934020369, iteration: 108941
loss: 1.0461432933807373,grad_norm: 0.9999991892091477, iteration: 108942
loss: 1.1600234508514404,grad_norm: 0.9999998519991461, iteration: 108943
loss: 1.0088034868240356,grad_norm: 0.8370465339053176, iteration: 108944
loss: 1.1177300214767456,grad_norm: 0.9999993488713598, iteration: 108945
loss: 1.074573040008545,grad_norm: 0.8954397868016619, iteration: 108946
loss: 1.0082792043685913,grad_norm: 0.8849038142992499, iteration: 108947
loss: 0.9941098093986511,grad_norm: 0.9999990048003533, iteration: 108948
loss: 1.1031136512756348,grad_norm: 0.9999999784460774, iteration: 108949
loss: 0.9753945469856262,grad_norm: 0.999999243212506, iteration: 108950
loss: 1.046932339668274,grad_norm: 0.8192718166775276, iteration: 108951
loss: 1.2964705228805542,grad_norm: 0.9999998960851957, iteration: 108952
loss: 1.0496398210525513,grad_norm: 0.9999991151913034, iteration: 108953
loss: 1.0518150329589844,grad_norm: 0.8470830855908249, iteration: 108954
loss: 1.0628985166549683,grad_norm: 0.9999991229455434, iteration: 108955
loss: 1.0649919509887695,grad_norm: 0.999999416177451, iteration: 108956
loss: 1.0313678979873657,grad_norm: 0.8029964565526461, iteration: 108957
loss: 1.067571997642517,grad_norm: 0.9999996667473985, iteration: 108958
loss: 1.0314953327178955,grad_norm: 0.9999998293097866, iteration: 108959
loss: 1.0396397113800049,grad_norm: 0.8001122231507166, iteration: 108960
loss: 1.0737171173095703,grad_norm: 0.978491444949731, iteration: 108961
loss: 1.0982939004898071,grad_norm: 0.9999997632672256, iteration: 108962
loss: 1.08295738697052,grad_norm: 0.9999995708277426, iteration: 108963
loss: 1.066022276878357,grad_norm: 0.9733187246304572, iteration: 108964
loss: 1.2621716260910034,grad_norm: 0.9999998572102203, iteration: 108965
loss: 1.1109602451324463,grad_norm: 0.9999998568779668, iteration: 108966
loss: 1.0834873914718628,grad_norm: 0.999999986989588, iteration: 108967
loss: 1.1236188411712646,grad_norm: 0.9999997447943284, iteration: 108968
loss: 1.0640902519226074,grad_norm: 0.9999993559590913, iteration: 108969
loss: 1.0910258293151855,grad_norm: 0.9999994811760093, iteration: 108970
loss: 1.0290250778198242,grad_norm: 0.9999990521614621, iteration: 108971
loss: 1.0338425636291504,grad_norm: 0.8842711731315352, iteration: 108972
loss: 1.1080498695373535,grad_norm: 0.9999992603813498, iteration: 108973
loss: 1.0420793294906616,grad_norm: 0.999999949222705, iteration: 108974
loss: 1.0131767988204956,grad_norm: 0.9330200650688969, iteration: 108975
loss: 1.0474580526351929,grad_norm: 0.9999992162973105, iteration: 108976
loss: 1.1434721946716309,grad_norm: 0.9999993914691874, iteration: 108977
loss: 1.003037452697754,grad_norm: 0.9623131133751582, iteration: 108978
loss: 1.1512364149093628,grad_norm: 0.8868601976901691, iteration: 108979
loss: 1.0036425590515137,grad_norm: 0.9999992312090561, iteration: 108980
loss: 0.9969973564147949,grad_norm: 0.9624517305058512, iteration: 108981
loss: 1.0372575521469116,grad_norm: 0.9999995254874827, iteration: 108982
loss: 1.0774390697479248,grad_norm: 0.9518540143407851, iteration: 108983
loss: 0.9849740266799927,grad_norm: 0.9184741663961096, iteration: 108984
loss: 1.0858030319213867,grad_norm: 0.9999997592161449, iteration: 108985
loss: 1.0825598239898682,grad_norm: 0.9999990412956493, iteration: 108986
loss: 1.0260814428329468,grad_norm: 0.769655659202229, iteration: 108987
loss: 1.013997197151184,grad_norm: 0.9999997771601432, iteration: 108988
loss: 1.0172820091247559,grad_norm: 0.8947294115117556, iteration: 108989
loss: 1.0800060033798218,grad_norm: 0.9999999260517919, iteration: 108990
loss: 1.0989258289337158,grad_norm: 0.9999999049399902, iteration: 108991
loss: 0.9582525491714478,grad_norm: 0.9489586602978737, iteration: 108992
loss: 1.0730003118515015,grad_norm: 0.9344902932254994, iteration: 108993
loss: 0.989639163017273,grad_norm: 0.8578399860742425, iteration: 108994
loss: 1.1933594942092896,grad_norm: 0.9999991122569861, iteration: 108995
loss: 1.0874778032302856,grad_norm: 0.9999998969340804, iteration: 108996
loss: 1.1008915901184082,grad_norm: 0.9999996903898649, iteration: 108997
loss: 1.0861961841583252,grad_norm: 0.9999998295401166, iteration: 108998
loss: 1.0231801271438599,grad_norm: 0.9087847352239867, iteration: 108999
loss: 1.0330923795700073,grad_norm: 0.9999995102772058, iteration: 109000
loss: 1.1231865882873535,grad_norm: 0.9999991146543479, iteration: 109001
loss: 1.0671731233596802,grad_norm: 0.9999995060917648, iteration: 109002
loss: 1.1830536127090454,grad_norm: 0.9999998846167942, iteration: 109003
loss: 1.0743768215179443,grad_norm: 0.9999991406990627, iteration: 109004
loss: 1.017458438873291,grad_norm: 0.9999991205243686, iteration: 109005
loss: 1.0744327306747437,grad_norm: 0.9999992279746669, iteration: 109006
loss: 0.9816548228263855,grad_norm: 0.9999990136647634, iteration: 109007
loss: 0.9937395453453064,grad_norm: 0.9999991180498041, iteration: 109008
loss: 1.0842348337173462,grad_norm: 0.9999990625471386, iteration: 109009
loss: 1.021233320236206,grad_norm: 0.9999993819431954, iteration: 109010
loss: 0.998989462852478,grad_norm: 0.9999996320863165, iteration: 109011
loss: 1.129809021949768,grad_norm: 0.999999720671797, iteration: 109012
loss: 1.0661481618881226,grad_norm: 0.9731546112275095, iteration: 109013
loss: 1.0436936616897583,grad_norm: 0.9999997322374482, iteration: 109014
loss: 1.0395478010177612,grad_norm: 0.9999997917360783, iteration: 109015
loss: 1.0433439016342163,grad_norm: 0.999999112495557, iteration: 109016
loss: 1.0358986854553223,grad_norm: 0.9999996040684866, iteration: 109017
loss: 0.9966937899589539,grad_norm: 0.9999991583115307, iteration: 109018
loss: 1.0024961233139038,grad_norm: 0.9999989930430191, iteration: 109019
loss: 1.1605048179626465,grad_norm: 0.999999135901571, iteration: 109020
loss: 1.1239299774169922,grad_norm: 1.000000002673752, iteration: 109021
loss: 0.9967498779296875,grad_norm: 0.9832356218850343, iteration: 109022
loss: 1.041408896446228,grad_norm: 0.9999991047078576, iteration: 109023
loss: 1.0864981412887573,grad_norm: 0.9999996952803478, iteration: 109024
loss: 1.0857563018798828,grad_norm: 0.9999993627006911, iteration: 109025
loss: 1.1603741645812988,grad_norm: 0.9999994426376448, iteration: 109026
loss: 1.1055916547775269,grad_norm: 0.9999998342919567, iteration: 109027
loss: 1.0406383275985718,grad_norm: 0.9999994005480324, iteration: 109028
loss: 1.0939302444458008,grad_norm: 0.9999995606256051, iteration: 109029
loss: 1.0012400150299072,grad_norm: 0.9999990527440628, iteration: 109030
loss: 1.0374021530151367,grad_norm: 0.9999991056332697, iteration: 109031
loss: 1.0517947673797607,grad_norm: 0.9973288681458076, iteration: 109032
loss: 1.0507084131240845,grad_norm: 0.9999992553843315, iteration: 109033
loss: 1.0451117753982544,grad_norm: 0.8286628642187281, iteration: 109034
loss: 1.082258939743042,grad_norm: 0.9999991814202432, iteration: 109035
loss: 1.0156183242797852,grad_norm: 0.9333028811567656, iteration: 109036
loss: 1.0208251476287842,grad_norm: 0.999999094840062, iteration: 109037
loss: 1.1466964483261108,grad_norm: 0.9999998253339457, iteration: 109038
loss: 0.9970877170562744,grad_norm: 0.9126552115067095, iteration: 109039
loss: 1.0528756380081177,grad_norm: 0.9999997809470674, iteration: 109040
loss: 1.0333117246627808,grad_norm: 0.999999724752229, iteration: 109041
loss: 1.0661745071411133,grad_norm: 0.9999991507905249, iteration: 109042
loss: 1.356392741203308,grad_norm: 0.9999995449962777, iteration: 109043
loss: 1.041709303855896,grad_norm: 0.9999996607194147, iteration: 109044
loss: 0.9925833344459534,grad_norm: 0.7576566917223191, iteration: 109045
loss: 1.035947561264038,grad_norm: 0.9999991888299211, iteration: 109046
loss: 1.0973565578460693,grad_norm: 0.8735546757989257, iteration: 109047
loss: 0.9962614178657532,grad_norm: 0.8680858472621308, iteration: 109048
loss: 1.11933434009552,grad_norm: 0.9999993896118494, iteration: 109049
loss: 1.0336048603057861,grad_norm: 0.9999993105947854, iteration: 109050
loss: 1.051023006439209,grad_norm: 0.9155961384628434, iteration: 109051
loss: 1.130586862564087,grad_norm: 0.9999998003715962, iteration: 109052
loss: 1.2647275924682617,grad_norm: 0.9999998725793698, iteration: 109053
loss: 1.0345414876937866,grad_norm: 0.9999992988736838, iteration: 109054
loss: 1.0485583543777466,grad_norm: 0.9999999706451143, iteration: 109055
loss: 1.0286765098571777,grad_norm: 0.9999999387181935, iteration: 109056
loss: 1.1318930387496948,grad_norm: 0.999999672081999, iteration: 109057
loss: 1.2752248048782349,grad_norm: 0.9999999661412785, iteration: 109058
loss: 1.030076026916504,grad_norm: 0.9881871459844147, iteration: 109059
loss: 1.1785871982574463,grad_norm: 0.9999998349049448, iteration: 109060
loss: 1.2974286079406738,grad_norm: 0.9999997514466056, iteration: 109061
loss: 1.157056450843811,grad_norm: 0.9999992943974919, iteration: 109062
loss: 1.1003206968307495,grad_norm: 0.9999990860653831, iteration: 109063
loss: 1.1233330965042114,grad_norm: 0.9999997190632955, iteration: 109064
loss: 1.1884605884552002,grad_norm: 0.9999998918782133, iteration: 109065
loss: 1.1566189527511597,grad_norm: 0.9999998705067032, iteration: 109066
loss: 1.2612760066986084,grad_norm: 0.999999406343806, iteration: 109067
loss: 1.1395057439804077,grad_norm: 0.9999996847086644, iteration: 109068
loss: 1.1133922338485718,grad_norm: 0.9999992607124599, iteration: 109069
loss: 1.1121156215667725,grad_norm: 0.9999995666375208, iteration: 109070
loss: 1.1837544441223145,grad_norm: 0.9999998539238926, iteration: 109071
loss: 1.2655115127563477,grad_norm: 0.9999995816397709, iteration: 109072
loss: 1.094517707824707,grad_norm: 0.9999998991662817, iteration: 109073
loss: 1.1062510013580322,grad_norm: 0.9999998817318788, iteration: 109074
loss: 1.2641685009002686,grad_norm: 0.9999997249678001, iteration: 109075
loss: 1.1577954292297363,grad_norm: 0.9999996291289873, iteration: 109076
loss: 1.228896141052246,grad_norm: 0.9999998419217734, iteration: 109077
loss: 1.0441051721572876,grad_norm: 0.9999997570631985, iteration: 109078
loss: 1.0927406549453735,grad_norm: 0.9999996894682528, iteration: 109079
loss: 1.0329973697662354,grad_norm: 0.9999997564902007, iteration: 109080
loss: 1.1120282411575317,grad_norm: 0.999999822521283, iteration: 109081
loss: 1.0927643775939941,grad_norm: 0.9999997788776098, iteration: 109082
loss: 1.0973447561264038,grad_norm: 0.9999992428317317, iteration: 109083
loss: 1.067299246788025,grad_norm: 0.9999998676297694, iteration: 109084
loss: 1.1090149879455566,grad_norm: 0.9999994713133087, iteration: 109085
loss: 1.0669499635696411,grad_norm: 0.9999998981943063, iteration: 109086
loss: 1.0236557722091675,grad_norm: 0.8600580472095852, iteration: 109087
loss: 0.984276294708252,grad_norm: 0.9999994202260993, iteration: 109088
loss: 1.0776680707931519,grad_norm: 0.9999991697034191, iteration: 109089
loss: 1.2073756456375122,grad_norm: 0.9999995683381591, iteration: 109090
loss: 1.0354697704315186,grad_norm: 0.999999128654296, iteration: 109091
loss: 1.0588470697402954,grad_norm: 0.8926373092093501, iteration: 109092
loss: 1.0266532897949219,grad_norm: 0.8769846064495344, iteration: 109093
loss: 1.0007567405700684,grad_norm: 0.8145018449899961, iteration: 109094
loss: 0.9796051979064941,grad_norm: 0.8249523731818406, iteration: 109095
loss: 1.1540230512619019,grad_norm: 0.9999998629153795, iteration: 109096
loss: 0.9881980419158936,grad_norm: 0.9692437903203308, iteration: 109097
loss: 1.0326478481292725,grad_norm: 0.8823253594282434, iteration: 109098
loss: 1.0388597249984741,grad_norm: 0.999999023610655, iteration: 109099
loss: 1.0556163787841797,grad_norm: 0.9999991260802772, iteration: 109100
loss: 1.0649536848068237,grad_norm: 0.9999993131333025, iteration: 109101
loss: 1.054822325706482,grad_norm: 0.9999994289092284, iteration: 109102
loss: 1.027483582496643,grad_norm: 0.8601628726915905, iteration: 109103
loss: 1.0921412706375122,grad_norm: 0.8382252408801213, iteration: 109104
loss: 0.9707187414169312,grad_norm: 0.99999939071723, iteration: 109105
loss: 0.984527587890625,grad_norm: 0.9588025388891054, iteration: 109106
loss: 1.0833208560943604,grad_norm: 0.9876765748186797, iteration: 109107
loss: 1.0457472801208496,grad_norm: 0.999999114542446, iteration: 109108
loss: 1.004542350769043,grad_norm: 0.9999992145804408, iteration: 109109
loss: 1.003838062286377,grad_norm: 0.9288630721965137, iteration: 109110
loss: 1.0538498163223267,grad_norm: 0.9999996530300967, iteration: 109111
loss: 1.1755675077438354,grad_norm: 0.9999994708683265, iteration: 109112
loss: 1.0406240224838257,grad_norm: 0.9999991819811781, iteration: 109113
loss: 1.0455073118209839,grad_norm: 0.9999991588744429, iteration: 109114
loss: 1.0005406141281128,grad_norm: 0.8594756099310686, iteration: 109115
loss: 0.9878424406051636,grad_norm: 0.9401170682429666, iteration: 109116
loss: 1.0054728984832764,grad_norm: 0.7705322218897285, iteration: 109117
loss: 1.1235049962997437,grad_norm: 0.999999734758232, iteration: 109118
loss: 1.000026822090149,grad_norm: 0.954505429668551, iteration: 109119
loss: 1.1163477897644043,grad_norm: 0.9999993224234268, iteration: 109120
loss: 0.9959986805915833,grad_norm: 0.9671318012626199, iteration: 109121
loss: 0.9860114455223083,grad_norm: 0.9351133232654977, iteration: 109122
loss: 1.0216985940933228,grad_norm: 0.9058080929079724, iteration: 109123
loss: 1.1831213235855103,grad_norm: 0.9999998967115581, iteration: 109124
loss: 1.0003002882003784,grad_norm: 0.9999993455913437, iteration: 109125
loss: 0.9955105781555176,grad_norm: 0.8677587166551098, iteration: 109126
loss: 1.0331741571426392,grad_norm: 0.9999991613730322, iteration: 109127
loss: 1.0155662298202515,grad_norm: 0.9999990990201962, iteration: 109128
loss: 1.0136795043945312,grad_norm: 0.9701810343043031, iteration: 109129
loss: 1.0139673948287964,grad_norm: 0.9999997434597483, iteration: 109130
loss: 1.0231685638427734,grad_norm: 0.8745112331124576, iteration: 109131
loss: 1.0263690948486328,grad_norm: 0.9999999434544492, iteration: 109132
loss: 1.0212434530258179,grad_norm: 0.9999991774621849, iteration: 109133
loss: 1.0024884939193726,grad_norm: 0.7522723547775734, iteration: 109134
loss: 1.020477294921875,grad_norm: 0.9999995210311365, iteration: 109135
loss: 0.9937631487846375,grad_norm: 0.9999993274562933, iteration: 109136
loss: 1.0451486110687256,grad_norm: 0.9999999135672326, iteration: 109137
loss: 1.0363507270812988,grad_norm: 0.9999999330098102, iteration: 109138
loss: 1.0120131969451904,grad_norm: 0.9999997402655442, iteration: 109139
loss: 1.0172697305679321,grad_norm: 0.863989412872079, iteration: 109140
loss: 1.0381916761398315,grad_norm: 0.8916541468272513, iteration: 109141
loss: 1.027462124824524,grad_norm: 0.9117085747992923, iteration: 109142
loss: 0.9856584072113037,grad_norm: 0.917729771523731, iteration: 109143
loss: 0.9784865379333496,grad_norm: 0.7543362376589259, iteration: 109144
loss: 1.00238037109375,grad_norm: 0.9999993727657268, iteration: 109145
loss: 1.0581538677215576,grad_norm: 0.999999237427021, iteration: 109146
loss: 1.075366735458374,grad_norm: 0.9999995693108535, iteration: 109147
loss: 0.9971923232078552,grad_norm: 0.8634006191651599, iteration: 109148
loss: 1.0145536661148071,grad_norm: 0.9450288526568962, iteration: 109149
loss: 1.0451451539993286,grad_norm: 0.999999274535538, iteration: 109150
loss: 1.0698251724243164,grad_norm: 0.9754673901026897, iteration: 109151
loss: 0.9956627488136292,grad_norm: 0.9999992542380215, iteration: 109152
loss: 1.1143935918807983,grad_norm: 0.9837070316069614, iteration: 109153
loss: 1.0072357654571533,grad_norm: 0.9999989969361315, iteration: 109154
loss: 1.0294767618179321,grad_norm: 0.9919545774005771, iteration: 109155
loss: 1.1088030338287354,grad_norm: 0.9999993572441355, iteration: 109156
loss: 1.000596046447754,grad_norm: 0.9999991176415998, iteration: 109157
loss: 1.0444291830062866,grad_norm: 0.9999998205477432, iteration: 109158
loss: 0.9792704582214355,grad_norm: 0.9999992855275595, iteration: 109159
loss: 1.0230032205581665,grad_norm: 0.9999993692215495, iteration: 109160
loss: 0.9663782715797424,grad_norm: 0.8969023860733634, iteration: 109161
loss: 1.0342895984649658,grad_norm: 0.9109068914155011, iteration: 109162
loss: 1.1243008375167847,grad_norm: 0.9999993800715619, iteration: 109163
loss: 1.0386114120483398,grad_norm: 0.999455660422468, iteration: 109164
loss: 1.0165575742721558,grad_norm: 0.9999992439286504, iteration: 109165
loss: 1.0538259744644165,grad_norm: 0.9999990504752428, iteration: 109166
loss: 0.993625283241272,grad_norm: 0.8346101891180399, iteration: 109167
loss: 1.0076814889907837,grad_norm: 0.7424555635296253, iteration: 109168
loss: 0.9789862036705017,grad_norm: 0.8854132216553068, iteration: 109169
loss: 1.0306453704833984,grad_norm: 0.999999149711364, iteration: 109170
loss: 0.988669216632843,grad_norm: 0.7913481008734601, iteration: 109171
loss: 1.0876795053482056,grad_norm: 0.8171029202449135, iteration: 109172
loss: 1.052414894104004,grad_norm: 0.9999996976532497, iteration: 109173
loss: 1.2608916759490967,grad_norm: 0.9999997388081299, iteration: 109174
loss: 0.9782266020774841,grad_norm: 0.9999997333585599, iteration: 109175
loss: 1.0363315343856812,grad_norm: 0.8038849483022695, iteration: 109176
loss: 1.0480990409851074,grad_norm: 0.9999993714558216, iteration: 109177
loss: 1.0566813945770264,grad_norm: 0.7455451521742907, iteration: 109178
loss: 0.9915871620178223,grad_norm: 0.9496741273704273, iteration: 109179
loss: 1.0152641534805298,grad_norm: 0.9161992805860741, iteration: 109180
loss: 1.016839861869812,grad_norm: 0.9630813933934022, iteration: 109181
loss: 1.0352226495742798,grad_norm: 0.9999997750992914, iteration: 109182
loss: 1.0489184856414795,grad_norm: 0.9999999034463135, iteration: 109183
loss: 1.040332555770874,grad_norm: 0.9999994686360908, iteration: 109184
loss: 1.0322856903076172,grad_norm: 0.9692962295749502, iteration: 109185
loss: 1.1002333164215088,grad_norm: 0.9999993385925875, iteration: 109186
loss: 1.01048743724823,grad_norm: 0.9101849935459272, iteration: 109187
loss: 1.054307222366333,grad_norm: 0.9999990608610043, iteration: 109188
loss: 1.0213541984558105,grad_norm: 0.9536036587882879, iteration: 109189
loss: 0.9908710718154907,grad_norm: 0.9999992700853951, iteration: 109190
loss: 1.0243977308273315,grad_norm: 0.7530435406108456, iteration: 109191
loss: 1.0184794664382935,grad_norm: 0.9999990746986114, iteration: 109192
loss: 0.999653697013855,grad_norm: 0.9029243219480538, iteration: 109193
loss: 0.9624640345573425,grad_norm: 0.9999990045793473, iteration: 109194
loss: 1.0667840242385864,grad_norm: 0.9999993014444731, iteration: 109195
loss: 1.096871018409729,grad_norm: 0.9999993563357472, iteration: 109196
loss: 1.068000316619873,grad_norm: 0.9999991897350736, iteration: 109197
loss: 1.0144191980361938,grad_norm: 0.9236717906121562, iteration: 109198
loss: 1.0149171352386475,grad_norm: 0.9999993678259026, iteration: 109199
loss: 1.2311487197875977,grad_norm: 0.9999996583463896, iteration: 109200
loss: 1.0866693258285522,grad_norm: 0.9267810336346665, iteration: 109201
loss: 1.067964792251587,grad_norm: 0.999999334636508, iteration: 109202
loss: 1.084854006767273,grad_norm: 0.9999992587493534, iteration: 109203
loss: 1.0076078176498413,grad_norm: 0.8011246453336377, iteration: 109204
loss: 1.0089099407196045,grad_norm: 0.9999998142045892, iteration: 109205
loss: 1.0146703720092773,grad_norm: 0.9866844070214842, iteration: 109206
loss: 1.009554147720337,grad_norm: 0.9999991099668492, iteration: 109207
loss: 1.049890398979187,grad_norm: 0.7629507257891727, iteration: 109208
loss: 0.9702284336090088,grad_norm: 0.8658134494925303, iteration: 109209
loss: 1.0183820724487305,grad_norm: 0.9999991184823052, iteration: 109210
loss: 1.0085312128067017,grad_norm: 0.9999994895764281, iteration: 109211
loss: 1.0548415184020996,grad_norm: 0.9999993260041383, iteration: 109212
loss: 1.0281606912612915,grad_norm: 0.9999994225444199, iteration: 109213
loss: 1.0233839750289917,grad_norm: 0.9999996183935597, iteration: 109214
loss: 0.9995306730270386,grad_norm: 0.8046096174136743, iteration: 109215
loss: 1.100710391998291,grad_norm: 0.9999991690042843, iteration: 109216
loss: 0.999397337436676,grad_norm: 0.9999991191633433, iteration: 109217
loss: 1.0498604774475098,grad_norm: 1.000000029176588, iteration: 109218
loss: 1.121250033378601,grad_norm: 0.9999994778172179, iteration: 109219
loss: 1.0110461711883545,grad_norm: 0.8876646351685561, iteration: 109220
loss: 1.0806041955947876,grad_norm: 0.9576893899717488, iteration: 109221
loss: 0.9951000213623047,grad_norm: 0.9999999354974657, iteration: 109222
loss: 1.0006706714630127,grad_norm: 0.9999998823928877, iteration: 109223
loss: 0.9905987977981567,grad_norm: 0.8532542215423647, iteration: 109224
loss: 1.0276132822036743,grad_norm: 0.9705917737985059, iteration: 109225
loss: 1.0007261037826538,grad_norm: 0.7953735948502059, iteration: 109226
loss: 1.016418218612671,grad_norm: 0.822178427499398, iteration: 109227
loss: 0.9831756353378296,grad_norm: 0.9999994845548107, iteration: 109228
loss: 0.9851276278495789,grad_norm: 0.999999047155818, iteration: 109229
loss: 1.0889291763305664,grad_norm: 0.9999999444208004, iteration: 109230
loss: 1.108588457107544,grad_norm: 0.9999998145281512, iteration: 109231
loss: 1.0060808658599854,grad_norm: 0.9999989688841685, iteration: 109232
loss: 1.0349606275558472,grad_norm: 0.999999629784883, iteration: 109233
loss: 1.027050495147705,grad_norm: 0.9999991226467748, iteration: 109234
loss: 1.1516952514648438,grad_norm: 0.9999991646417172, iteration: 109235
loss: 1.0608950853347778,grad_norm: 0.9999991815867884, iteration: 109236
loss: 1.0255677700042725,grad_norm: 0.9999991363949325, iteration: 109237
loss: 1.0461561679840088,grad_norm: 0.999999223580886, iteration: 109238
loss: 1.0262539386749268,grad_norm: 0.9999996504097486, iteration: 109239
loss: 1.0751084089279175,grad_norm: 0.9831234040023439, iteration: 109240
loss: 1.0986440181732178,grad_norm: 0.9999994097305195, iteration: 109241
loss: 1.0152238607406616,grad_norm: 0.9999998525976317, iteration: 109242
loss: 1.0361756086349487,grad_norm: 0.9999991486116379, iteration: 109243
loss: 1.1875914335250854,grad_norm: 0.9999995503740969, iteration: 109244
loss: 1.1615642309188843,grad_norm: 0.999999843461249, iteration: 109245
loss: 1.0694729089736938,grad_norm: 0.8659635014892857, iteration: 109246
loss: 1.1497294902801514,grad_norm: 0.9999994717382914, iteration: 109247
loss: 1.0767558813095093,grad_norm: 0.9597598034382882, iteration: 109248
loss: 1.2515723705291748,grad_norm: 0.9999997006270945, iteration: 109249
loss: 1.0771777629852295,grad_norm: 0.999999110700568, iteration: 109250
loss: 1.067508578300476,grad_norm: 0.9999991906727919, iteration: 109251
loss: 1.311636209487915,grad_norm: 0.9999999451085317, iteration: 109252
loss: 1.1225184202194214,grad_norm: 0.9999995761878693, iteration: 109253
loss: 1.0827409029006958,grad_norm: 0.859355186695297, iteration: 109254
loss: 1.0593476295471191,grad_norm: 0.9999996510240438, iteration: 109255
loss: 1.0332202911376953,grad_norm: 0.810711884411851, iteration: 109256
loss: 1.0868165493011475,grad_norm: 0.9999997090800448, iteration: 109257
loss: 1.2464289665222168,grad_norm: 0.9999995510122454, iteration: 109258
loss: 1.0593262910842896,grad_norm: 0.9999998650312053, iteration: 109259
loss: 0.993545413017273,grad_norm: 0.8062006665003223, iteration: 109260
loss: 1.114202857017517,grad_norm: 0.9999997331233074, iteration: 109261
loss: 1.074945330619812,grad_norm: 0.9718843052930413, iteration: 109262
loss: 1.1456024646759033,grad_norm: 0.9999994048776527, iteration: 109263
loss: 1.3154206275939941,grad_norm: 0.9999996074210766, iteration: 109264
loss: 1.244296669960022,grad_norm: 0.9999999794604179, iteration: 109265
loss: 1.099440097808838,grad_norm: 0.9999992080266097, iteration: 109266
loss: 1.1812540292739868,grad_norm: 0.9999996953342515, iteration: 109267
loss: 1.0407803058624268,grad_norm: 0.8415371863131185, iteration: 109268
loss: 1.1479612588882446,grad_norm: 0.9999998234116657, iteration: 109269
loss: 1.0731943845748901,grad_norm: 0.9999999134487395, iteration: 109270
loss: 0.9920035600662231,grad_norm: 0.9656826169171714, iteration: 109271
loss: 1.0905438661575317,grad_norm: 0.999999356722393, iteration: 109272
loss: 1.1936171054840088,grad_norm: 0.9999999613120808, iteration: 109273
loss: 1.0065951347351074,grad_norm: 0.9999994211074723, iteration: 109274
loss: 1.0177251100540161,grad_norm: 0.9999993389220445, iteration: 109275
loss: 1.0920120477676392,grad_norm: 0.9999994757699497, iteration: 109276
loss: 1.0138107538223267,grad_norm: 0.9999992909141229, iteration: 109277
loss: 1.1231298446655273,grad_norm: 0.9999994192005632, iteration: 109278
loss: 1.1586427688598633,grad_norm: 0.9999996168770143, iteration: 109279
loss: 1.136060357093811,grad_norm: 0.9999993224254412, iteration: 109280
loss: 1.2670656442642212,grad_norm: 0.9999994973860827, iteration: 109281
loss: 1.207263469696045,grad_norm: 0.9999999482506955, iteration: 109282
loss: 1.083258867263794,grad_norm: 0.9999995664872344, iteration: 109283
loss: 1.1421452760696411,grad_norm: 0.9999990211844092, iteration: 109284
loss: 1.1329066753387451,grad_norm: 0.9999999006062918, iteration: 109285
loss: 1.1279881000518799,grad_norm: 0.9999996013136973, iteration: 109286
loss: 1.146486759185791,grad_norm: 0.9999997555061373, iteration: 109287
loss: 1.0489780902862549,grad_norm: 0.9999991035209563, iteration: 109288
loss: 1.0662546157836914,grad_norm: 0.9999993006221416, iteration: 109289
loss: 1.0004065036773682,grad_norm: 0.8306246278846081, iteration: 109290
loss: 1.0271451473236084,grad_norm: 0.9999997546366943, iteration: 109291
loss: 1.046903371810913,grad_norm: 0.9999997339090624, iteration: 109292
loss: 1.0764753818511963,grad_norm: 0.9999990677681707, iteration: 109293
loss: 1.1137218475341797,grad_norm: 0.9999991917446467, iteration: 109294
loss: 1.1109925508499146,grad_norm: 0.9999994528503156, iteration: 109295
loss: 0.9985756278038025,grad_norm: 0.9999995327457585, iteration: 109296
loss: 1.0298175811767578,grad_norm: 0.999999161672931, iteration: 109297
loss: 1.083740234375,grad_norm: 0.9050394396877952, iteration: 109298
loss: 1.0458835363388062,grad_norm: 0.9999997644446215, iteration: 109299
loss: 1.0393166542053223,grad_norm: 0.9999995363741863, iteration: 109300
loss: 1.1497429609298706,grad_norm: 0.9999993930083447, iteration: 109301
loss: 1.0259872674942017,grad_norm: 0.999999120620309, iteration: 109302
loss: 1.042208194732666,grad_norm: 0.9999995532622592, iteration: 109303
loss: 1.091333270072937,grad_norm: 0.9999997783934164, iteration: 109304
loss: 1.0223548412322998,grad_norm: 0.8997188188658408, iteration: 109305
loss: 0.994556725025177,grad_norm: 0.9999991017718505, iteration: 109306
loss: 1.0252100229263306,grad_norm: 0.9842600027220425, iteration: 109307
loss: 1.0554015636444092,grad_norm: 0.9999993610744611, iteration: 109308
loss: 1.005284070968628,grad_norm: 0.876020969963163, iteration: 109309
loss: 1.0588736534118652,grad_norm: 0.9999992508841826, iteration: 109310
loss: 1.0120203495025635,grad_norm: 0.8619380290650553, iteration: 109311
loss: 1.0088210105895996,grad_norm: 0.9999993914201705, iteration: 109312
loss: 1.0460830926895142,grad_norm: 0.9999992499390066, iteration: 109313
loss: 1.0426528453826904,grad_norm: 0.9999996972262574, iteration: 109314
loss: 1.0570368766784668,grad_norm: 0.9383323025658447, iteration: 109315
loss: 1.0980620384216309,grad_norm: 0.9999992814063394, iteration: 109316
loss: 1.0453333854675293,grad_norm: 0.9999991388426792, iteration: 109317
loss: 1.022409439086914,grad_norm: 0.7579757249457074, iteration: 109318
loss: 1.0388405323028564,grad_norm: 0.9999991055670726, iteration: 109319
loss: 1.0966825485229492,grad_norm: 0.9999991366308632, iteration: 109320
loss: 1.1154786348342896,grad_norm: 0.999999681789195, iteration: 109321
loss: 1.0932514667510986,grad_norm: 0.9999992446816136, iteration: 109322
loss: 1.1646766662597656,grad_norm: 0.9999996092271292, iteration: 109323
loss: 0.9737658500671387,grad_norm: 0.855610363726917, iteration: 109324
loss: 1.050057053565979,grad_norm: 0.9999990519505193, iteration: 109325
loss: 1.0154987573623657,grad_norm: 0.999999099245897, iteration: 109326
loss: 0.9836707711219788,grad_norm: 0.872086496451133, iteration: 109327
loss: 1.1382564306259155,grad_norm: 0.9999992628188611, iteration: 109328
loss: 1.055809736251831,grad_norm: 0.9999995785919099, iteration: 109329
loss: 1.0682644844055176,grad_norm: 0.9999995155160718, iteration: 109330
loss: 1.0348072052001953,grad_norm: 0.9999996971729347, iteration: 109331
loss: 1.0265852212905884,grad_norm: 0.7680460199522523, iteration: 109332
loss: 1.269159197807312,grad_norm: 0.9999996127622949, iteration: 109333
loss: 1.061703085899353,grad_norm: 0.9999999054615056, iteration: 109334
loss: 1.0393136739730835,grad_norm: 0.9710508858313888, iteration: 109335
loss: 1.0064902305603027,grad_norm: 0.9526854838182454, iteration: 109336
loss: 0.9859767556190491,grad_norm: 0.9999990821657067, iteration: 109337
loss: 1.0248427391052246,grad_norm: 0.9999990927552208, iteration: 109338
loss: 1.0127679109573364,grad_norm: 0.9999998087992408, iteration: 109339
loss: 0.9536144733428955,grad_norm: 0.9379002371568114, iteration: 109340
loss: 0.9947031140327454,grad_norm: 0.9781283235259517, iteration: 109341
loss: 1.1392130851745605,grad_norm: 0.9999999346087569, iteration: 109342
loss: 1.0182183980941772,grad_norm: 0.9999992215014296, iteration: 109343
loss: 1.0347108840942383,grad_norm: 0.999999097283529, iteration: 109344
loss: 1.0236629247665405,grad_norm: 0.978957273434279, iteration: 109345
loss: 1.0367718935012817,grad_norm: 0.9999994720623137, iteration: 109346
loss: 1.0666325092315674,grad_norm: 0.9941019655286075, iteration: 109347
loss: 1.191597580909729,grad_norm: 0.9999995832545532, iteration: 109348
loss: 1.0038963556289673,grad_norm: 0.9999992276994516, iteration: 109349
loss: 1.0288575887680054,grad_norm: 0.9999993545395224, iteration: 109350
loss: 1.0253030061721802,grad_norm: 0.999999197590398, iteration: 109351
loss: 1.0891344547271729,grad_norm: 0.9999990860218478, iteration: 109352
loss: 1.0535900592803955,grad_norm: 0.9999994280292457, iteration: 109353
loss: 1.0849016904830933,grad_norm: 1.0000000247172023, iteration: 109354
loss: 1.1502782106399536,grad_norm: 0.9999990634625102, iteration: 109355
loss: 0.9569657444953918,grad_norm: 0.9091752053060494, iteration: 109356
loss: 1.0434824228286743,grad_norm: 0.999999376670261, iteration: 109357
loss: 1.1217466592788696,grad_norm: 0.9999999007742916, iteration: 109358
loss: 0.9766581058502197,grad_norm: 0.8705484640167185, iteration: 109359
loss: 1.06940758228302,grad_norm: 0.9999999631178867, iteration: 109360
loss: 1.0360546112060547,grad_norm: 0.9999991216226557, iteration: 109361
loss: 1.0360708236694336,grad_norm: 0.9999991152564671, iteration: 109362
loss: 1.0201905965805054,grad_norm: 0.9999999525590699, iteration: 109363
loss: 1.0204691886901855,grad_norm: 0.9999998187328032, iteration: 109364
loss: 1.0190309286117554,grad_norm: 0.9230402670623041, iteration: 109365
loss: 0.9871487617492676,grad_norm: 0.9999990379463993, iteration: 109366
loss: 1.0113588571548462,grad_norm: 0.9999991495707607, iteration: 109367
loss: 0.9841437935829163,grad_norm: 0.9808070789027148, iteration: 109368
loss: 1.0354392528533936,grad_norm: 0.9999995065391376, iteration: 109369
loss: 1.1617143154144287,grad_norm: 0.9999995532223716, iteration: 109370
loss: 1.1698805093765259,grad_norm: 0.9999998530656689, iteration: 109371
loss: 1.0550438165664673,grad_norm: 0.9091890484321283, iteration: 109372
loss: 0.9855570197105408,grad_norm: 0.9999998953188792, iteration: 109373
loss: 1.0024369955062866,grad_norm: 0.999999819728699, iteration: 109374
loss: 1.096194863319397,grad_norm: 0.9999989859670475, iteration: 109375
loss: 1.0135295391082764,grad_norm: 0.9999993349215155, iteration: 109376
loss: 1.0222491025924683,grad_norm: 0.999999942044872, iteration: 109377
loss: 1.0763752460479736,grad_norm: 0.9999996046045567, iteration: 109378
loss: 1.142964482307434,grad_norm: 0.9999999596707649, iteration: 109379
loss: 0.9884511828422546,grad_norm: 0.9999992994260065, iteration: 109380
loss: 1.0932966470718384,grad_norm: 0.9999998966554984, iteration: 109381
loss: 1.0280020236968994,grad_norm: 0.9999992091997758, iteration: 109382
loss: 0.9569759368896484,grad_norm: 0.8898302971383464, iteration: 109383
loss: 1.022971749305725,grad_norm: 0.8423378639491349, iteration: 109384
loss: 1.0624339580535889,grad_norm: 0.9999990661721184, iteration: 109385
loss: 0.9691451787948608,grad_norm: 0.9999990664819988, iteration: 109386
loss: 1.0910826921463013,grad_norm: 0.9999993224791354, iteration: 109387
loss: 1.0354678630828857,grad_norm: 0.9999990696394843, iteration: 109388
loss: 0.9731075167655945,grad_norm: 0.9999992139175872, iteration: 109389
loss: 0.9784079790115356,grad_norm: 0.9124183251207731, iteration: 109390
loss: 1.0344611406326294,grad_norm: 0.9999996089234772, iteration: 109391
loss: 1.0133569240570068,grad_norm: 0.9264465804370433, iteration: 109392
loss: 1.0212353467941284,grad_norm: 0.9999991696385298, iteration: 109393
loss: 1.0018283128738403,grad_norm: 0.9999994038572819, iteration: 109394
loss: 1.0120553970336914,grad_norm: 0.8730091011747526, iteration: 109395
loss: 0.9931458234786987,grad_norm: 0.9999992103374794, iteration: 109396
loss: 1.0260472297668457,grad_norm: 0.9411924649539362, iteration: 109397
loss: 1.0072656869888306,grad_norm: 0.8006380297415839, iteration: 109398
loss: 0.9735040664672852,grad_norm: 0.8381867694260333, iteration: 109399
loss: 1.012047290802002,grad_norm: 0.9999990750510781, iteration: 109400
loss: 1.0294079780578613,grad_norm: 0.9011472749238043, iteration: 109401
loss: 1.0038530826568604,grad_norm: 0.8804322229195005, iteration: 109402
loss: 1.0647157430648804,grad_norm: 0.9999999287389348, iteration: 109403
loss: 1.0292727947235107,grad_norm: 0.9999990771622974, iteration: 109404
loss: 1.0751193761825562,grad_norm: 0.9999998478456603, iteration: 109405
loss: 1.0579861402511597,grad_norm: 0.999999522876722, iteration: 109406
loss: 1.0696626901626587,grad_norm: 0.9999991231278589, iteration: 109407
loss: 1.0459295511245728,grad_norm: 0.9999992688499869, iteration: 109408
loss: 0.9476931691169739,grad_norm: 0.9999990683585364, iteration: 109409
loss: 1.0009604692459106,grad_norm: 0.8953634523515309, iteration: 109410
loss: 0.9942708611488342,grad_norm: 0.8886360315975211, iteration: 109411
loss: 1.018390417098999,grad_norm: 0.9644166773343315, iteration: 109412
loss: 1.0242754220962524,grad_norm: 0.8088885969887523, iteration: 109413
loss: 1.002699851989746,grad_norm: 0.9999990825412469, iteration: 109414
loss: 1.0093512535095215,grad_norm: 0.9999990907024641, iteration: 109415
loss: 0.9817045331001282,grad_norm: 0.9396779996809641, iteration: 109416
loss: 1.0069361925125122,grad_norm: 0.9999995666434469, iteration: 109417
loss: 1.0353585481643677,grad_norm: 0.9137176400446372, iteration: 109418
loss: 1.0589134693145752,grad_norm: 0.9999991605267523, iteration: 109419
loss: 1.0384604930877686,grad_norm: 0.9999995880511222, iteration: 109420
loss: 1.0248615741729736,grad_norm: 0.8363268489829143, iteration: 109421
loss: 1.010118007659912,grad_norm: 0.9999993917720821, iteration: 109422
loss: 0.9899398684501648,grad_norm: 0.9999998727925241, iteration: 109423
loss: 1.0247398614883423,grad_norm: 0.8323844854642993, iteration: 109424
loss: 1.0217912197113037,grad_norm: 0.9589873996301289, iteration: 109425
loss: 1.0477293729782104,grad_norm: 0.9999993769282693, iteration: 109426
loss: 0.9848939776420593,grad_norm: 0.9999991138247267, iteration: 109427
loss: 1.0170522928237915,grad_norm: 0.8093846855860759, iteration: 109428
loss: 1.007365345954895,grad_norm: 0.8852405865035098, iteration: 109429
loss: 1.0171008110046387,grad_norm: 0.8886090402939665, iteration: 109430
loss: 1.0224088430404663,grad_norm: 0.8170464475133854, iteration: 109431
loss: 1.0519654750823975,grad_norm: 0.9999994634813819, iteration: 109432
loss: 1.0434802770614624,grad_norm: 0.9999993810429116, iteration: 109433
loss: 1.0832836627960205,grad_norm: 0.9999998362989889, iteration: 109434
loss: 1.0057214498519897,grad_norm: 0.999999054901683, iteration: 109435
loss: 0.9702088832855225,grad_norm: 0.9999996880418499, iteration: 109436
loss: 0.9922275543212891,grad_norm: 0.8867663806225012, iteration: 109437
loss: 1.0881463289260864,grad_norm: 0.999999633465296, iteration: 109438
loss: 0.9976877570152283,grad_norm: 0.829034909876223, iteration: 109439
loss: 1.0262784957885742,grad_norm: 0.9999995698371908, iteration: 109440
loss: 1.181716799736023,grad_norm: 0.9999998782078738, iteration: 109441
loss: 0.9888535141944885,grad_norm: 0.9672640467071311, iteration: 109442
loss: 1.0580567121505737,grad_norm: 0.9999999476476565, iteration: 109443
loss: 1.0415925979614258,grad_norm: 0.9771938343229901, iteration: 109444
loss: 1.081863284111023,grad_norm: 0.9756901492610953, iteration: 109445
loss: 1.0188885927200317,grad_norm: 0.9999993942237069, iteration: 109446
loss: 0.951906681060791,grad_norm: 0.8994392889017544, iteration: 109447
loss: 1.0459606647491455,grad_norm: 0.9086300286933632, iteration: 109448
loss: 1.035575270652771,grad_norm: 0.9999992904534146, iteration: 109449
loss: 1.059580683708191,grad_norm: 0.9999996166928602, iteration: 109450
loss: 1.1007540225982666,grad_norm: 0.9999997505154086, iteration: 109451
loss: 0.9940612316131592,grad_norm: 0.848674942534083, iteration: 109452
loss: 1.027183175086975,grad_norm: 0.8417602322216265, iteration: 109453
loss: 1.0065491199493408,grad_norm: 0.8354313244292811, iteration: 109454
loss: 1.0183939933776855,grad_norm: 0.9999999543642829, iteration: 109455
loss: 1.0128716230392456,grad_norm: 0.9999994739820683, iteration: 109456
loss: 1.0946182012557983,grad_norm: 0.9999995092336562, iteration: 109457
loss: 1.026140570640564,grad_norm: 0.9999993998996054, iteration: 109458
loss: 1.052402138710022,grad_norm: 0.9999992363801558, iteration: 109459
loss: 1.0018713474273682,grad_norm: 0.9814401974895074, iteration: 109460
loss: 1.041025161743164,grad_norm: 0.9791821040754866, iteration: 109461
loss: 1.0017399787902832,grad_norm: 0.8861758683676706, iteration: 109462
loss: 1.0127936601638794,grad_norm: 0.9999990332364143, iteration: 109463
loss: 1.0097795724868774,grad_norm: 0.9999991039489509, iteration: 109464
loss: 1.031090259552002,grad_norm: 0.794875210898648, iteration: 109465
loss: 1.0101171731948853,grad_norm: 0.9999994380552844, iteration: 109466
loss: 1.0786776542663574,grad_norm: 0.999999122326628, iteration: 109467
loss: 0.9893993139266968,grad_norm: 0.9295221872892088, iteration: 109468
loss: 1.00153648853302,grad_norm: 0.9999994919378535, iteration: 109469
loss: 1.01297128200531,grad_norm: 0.9791867669759052, iteration: 109470
loss: 1.0339484214782715,grad_norm: 0.9999994626416262, iteration: 109471
loss: 1.1383068561553955,grad_norm: 0.9999999494680155, iteration: 109472
loss: 0.9902026653289795,grad_norm: 0.9999991809644816, iteration: 109473
loss: 1.050625205039978,grad_norm: 0.9357591593219837, iteration: 109474
loss: 1.0308347940444946,grad_norm: 0.9999992460020878, iteration: 109475
loss: 1.0489000082015991,grad_norm: 0.999999932710542, iteration: 109476
loss: 1.053128719329834,grad_norm: 0.9999995784747833, iteration: 109477
loss: 1.0926047563552856,grad_norm: 0.999999939272174, iteration: 109478
loss: 1.022802710533142,grad_norm: 0.9717797916970148, iteration: 109479
loss: 1.0181678533554077,grad_norm: 0.9999990159215315, iteration: 109480
loss: 1.001518964767456,grad_norm: 0.942231286340879, iteration: 109481
loss: 0.9862147569656372,grad_norm: 0.9284144415306887, iteration: 109482
loss: 1.0316736698150635,grad_norm: 0.8428551473604317, iteration: 109483
loss: 1.016616940498352,grad_norm: 0.8272903351758453, iteration: 109484
loss: 1.0387797355651855,grad_norm: 0.9999991274688211, iteration: 109485
loss: 0.9927902817726135,grad_norm: 0.9999990889428286, iteration: 109486
loss: 1.0162034034729004,grad_norm: 0.8642235951998212, iteration: 109487
loss: 1.0998705625534058,grad_norm: 0.9999999294910391, iteration: 109488
loss: 1.1311485767364502,grad_norm: 0.999999814725001, iteration: 109489
loss: 1.079441785812378,grad_norm: 0.9999998328877023, iteration: 109490
loss: 0.9861500859260559,grad_norm: 0.820761580528663, iteration: 109491
loss: 0.9977465867996216,grad_norm: 0.9999995044448987, iteration: 109492
loss: 1.1434736251831055,grad_norm: 0.9999996751364714, iteration: 109493
loss: 1.0442301034927368,grad_norm: 0.9999999924789039, iteration: 109494
loss: 0.9831334948539734,grad_norm: 0.8545912934813197, iteration: 109495
loss: 1.0383552312850952,grad_norm: 0.9999991628841692, iteration: 109496
loss: 1.0413931608200073,grad_norm: 1.0000000421965103, iteration: 109497
loss: 1.2180572748184204,grad_norm: 0.999999810487918, iteration: 109498
loss: 1.0076525211334229,grad_norm: 0.9999995230743923, iteration: 109499
loss: 1.0923248529434204,grad_norm: 0.9999995710803654, iteration: 109500
loss: 1.0134949684143066,grad_norm: 0.8091057435329905, iteration: 109501
loss: 1.0504320859909058,grad_norm: 0.9999994779114232, iteration: 109502
loss: 1.024969458580017,grad_norm: 0.9961219446686459, iteration: 109503
loss: 0.9829903244972229,grad_norm: 0.9999993139920286, iteration: 109504
loss: 1.088570475578308,grad_norm: 0.9999998468206895, iteration: 109505
loss: 1.0227015018463135,grad_norm: 0.9999992558887432, iteration: 109506
loss: 1.0202878713607788,grad_norm: 0.9923341392982601, iteration: 109507
loss: 1.0243221521377563,grad_norm: 0.9919204763810926, iteration: 109508
loss: 0.9965240955352783,grad_norm: 0.9317486776818432, iteration: 109509
loss: 1.1302003860473633,grad_norm: 0.9999996439885614, iteration: 109510
loss: 1.0610744953155518,grad_norm: 0.9999996668088053, iteration: 109511
loss: 1.223401665687561,grad_norm: 0.9999994490877787, iteration: 109512
loss: 0.9774317145347595,grad_norm: 0.9748742570891475, iteration: 109513
loss: 1.011536955833435,grad_norm: 0.8095488028090441, iteration: 109514
loss: 1.0124599933624268,grad_norm: 0.9830275131776584, iteration: 109515
loss: 1.0249428749084473,grad_norm: 0.9999995461608046, iteration: 109516
loss: 1.0857611894607544,grad_norm: 0.9999994893269369, iteration: 109517
loss: 1.0548994541168213,grad_norm: 0.9999995525223705, iteration: 109518
loss: 1.0806639194488525,grad_norm: 0.9999991629026915, iteration: 109519
loss: 1.0610100030899048,grad_norm: 0.9999999417020135, iteration: 109520
loss: 1.028349757194519,grad_norm: 0.9309885341623355, iteration: 109521
loss: 0.9901636838912964,grad_norm: 0.9999991652873496, iteration: 109522
loss: 0.9784989356994629,grad_norm: 0.7155506637044774, iteration: 109523
loss: 0.9907693266868591,grad_norm: 0.9894688009199942, iteration: 109524
loss: 1.2396148443222046,grad_norm: 0.9999998900944115, iteration: 109525
loss: 0.9919124841690063,grad_norm: 0.9338291400236467, iteration: 109526
loss: 1.188174843788147,grad_norm: 0.9999999974457848, iteration: 109527
loss: 1.00281822681427,grad_norm: 0.9999990364509023, iteration: 109528
loss: 0.9702290892601013,grad_norm: 0.9517574608196526, iteration: 109529
loss: 0.9987162351608276,grad_norm: 0.917866010557727, iteration: 109530
loss: 1.0435222387313843,grad_norm: 0.9999996250955474, iteration: 109531
loss: 1.0097832679748535,grad_norm: 0.8708950631063429, iteration: 109532
loss: 1.0147231817245483,grad_norm: 0.999999061948917, iteration: 109533
loss: 1.0028331279754639,grad_norm: 0.8814962638864542, iteration: 109534
loss: 0.9950678944587708,grad_norm: 0.9286207289922682, iteration: 109535
loss: 1.055130124092102,grad_norm: 0.999999141297741, iteration: 109536
loss: 0.995280385017395,grad_norm: 0.9068119633424723, iteration: 109537
loss: 1.0147361755371094,grad_norm: 0.9999996318240931, iteration: 109538
loss: 1.0040547847747803,grad_norm: 0.9397988433563095, iteration: 109539
loss: 1.2461557388305664,grad_norm: 0.9999997669122322, iteration: 109540
loss: 1.0659559965133667,grad_norm: 0.9717145283563191, iteration: 109541
loss: 1.0336817502975464,grad_norm: 0.9140306912217494, iteration: 109542
loss: 1.0829094648361206,grad_norm: 0.9948588573074374, iteration: 109543
loss: 1.05910062789917,grad_norm: 0.999999546451571, iteration: 109544
loss: 1.0386093854904175,grad_norm: 0.8610594389267505, iteration: 109545
loss: 0.9600576162338257,grad_norm: 0.834124220066181, iteration: 109546
loss: 1.0147926807403564,grad_norm: 0.8170585782178401, iteration: 109547
loss: 1.0190812349319458,grad_norm: 0.8631678418439755, iteration: 109548
loss: 0.9823570847511292,grad_norm: 0.9999992936604157, iteration: 109549
loss: 0.9997628927230835,grad_norm: 0.9999996508388601, iteration: 109550
loss: 1.0429222583770752,grad_norm: 0.9999991910327873, iteration: 109551
loss: 1.018499732017517,grad_norm: 0.999999666811919, iteration: 109552
loss: 0.978337824344635,grad_norm: 0.9999994676161482, iteration: 109553
loss: 1.0019034147262573,grad_norm: 0.9999991314439066, iteration: 109554
loss: 1.0147819519042969,grad_norm: 0.9999998961736049, iteration: 109555
loss: 1.0412081480026245,grad_norm: 0.9999993271842347, iteration: 109556
loss: 1.0595641136169434,grad_norm: 0.9999990951052549, iteration: 109557
loss: 1.0208851099014282,grad_norm: 0.9999996295329505, iteration: 109558
loss: 0.9830504655838013,grad_norm: 0.9217813876027888, iteration: 109559
loss: 1.0378021001815796,grad_norm: 0.9999994504250284, iteration: 109560
loss: 1.0457347631454468,grad_norm: 0.9906389208146363, iteration: 109561
loss: 1.0422710180282593,grad_norm: 0.8354741635305755, iteration: 109562
loss: 1.0073646306991577,grad_norm: 0.8171039202254731, iteration: 109563
loss: 1.1662440299987793,grad_norm: 0.9999994098127898, iteration: 109564
loss: 1.0225911140441895,grad_norm: 0.9626333676754587, iteration: 109565
loss: 0.9606180191040039,grad_norm: 0.9999989964064644, iteration: 109566
loss: 1.0110563039779663,grad_norm: 0.9999991856899794, iteration: 109567
loss: 1.0760939121246338,grad_norm: 0.9999992190364891, iteration: 109568
loss: 0.9908972978591919,grad_norm: 0.9999995506105246, iteration: 109569
loss: 1.0034655332565308,grad_norm: 0.9215121862694737, iteration: 109570
loss: 1.2042655944824219,grad_norm: 0.9999994823861633, iteration: 109571
loss: 1.151798129081726,grad_norm: 0.9999997422599006, iteration: 109572
loss: 1.085074543952942,grad_norm: 0.9999996260258589, iteration: 109573
loss: 1.0030783414840698,grad_norm: 0.9999996542767958, iteration: 109574
loss: 1.0061907768249512,grad_norm: 0.9999990449480836, iteration: 109575
loss: 1.0032154321670532,grad_norm: 0.8286492880232554, iteration: 109576
loss: 1.111228585243225,grad_norm: 0.9999998618415351, iteration: 109577
loss: 1.0558828115463257,grad_norm: 0.9999998933229576, iteration: 109578
loss: 1.0047614574432373,grad_norm: 0.9999997760577312, iteration: 109579
loss: 0.9846972227096558,grad_norm: 0.8670710643341288, iteration: 109580
loss: 1.0662660598754883,grad_norm: 0.9999997312130594, iteration: 109581
loss: 1.0709643363952637,grad_norm: 0.9999995380005744, iteration: 109582
loss: 0.9591103196144104,grad_norm: 0.9821858149183761, iteration: 109583
loss: 1.034915804862976,grad_norm: 0.9999992672816097, iteration: 109584
loss: 1.0417464971542358,grad_norm: 0.9999994886346547, iteration: 109585
loss: 1.0218758583068848,grad_norm: 0.7302243210442302, iteration: 109586
loss: 1.0069409608840942,grad_norm: 0.9999997131188639, iteration: 109587
loss: 1.0437374114990234,grad_norm: 0.8920147583608998, iteration: 109588
loss: 1.1164863109588623,grad_norm: 0.9999997473999354, iteration: 109589
loss: 1.0178965330123901,grad_norm: 0.8790065420644041, iteration: 109590
loss: 1.0418639183044434,grad_norm: 0.9999992800432217, iteration: 109591
loss: 0.9780156016349792,grad_norm: 0.9001808727582009, iteration: 109592
loss: 1.0251638889312744,grad_norm: 0.8892440842945726, iteration: 109593
loss: 1.0176488161087036,grad_norm: 0.8187400139489195, iteration: 109594
loss: 1.0286678075790405,grad_norm: 0.9999991062552354, iteration: 109595
loss: 1.0880628824234009,grad_norm: 0.9999994377370578, iteration: 109596
loss: 1.046217918395996,grad_norm: 0.9999990940600901, iteration: 109597
loss: 1.0894395112991333,grad_norm: 0.9999991425013236, iteration: 109598
loss: 1.015283465385437,grad_norm: 0.9635786711531615, iteration: 109599
loss: 1.0084059238433838,grad_norm: 0.8485376279665227, iteration: 109600
loss: 0.9946275949478149,grad_norm: 0.7926949846138988, iteration: 109601
loss: 1.020767331123352,grad_norm: 0.8638254909304129, iteration: 109602
loss: 1.1128054857254028,grad_norm: 0.9999993995227393, iteration: 109603
loss: 0.9979708194732666,grad_norm: 0.9632029255682423, iteration: 109604
loss: 1.04081392288208,grad_norm: 0.9999993227242306, iteration: 109605
loss: 1.0553828477859497,grad_norm: 0.9067656983799803, iteration: 109606
loss: 1.022985577583313,grad_norm: 0.8512206682622532, iteration: 109607
loss: 1.0949610471725464,grad_norm: 0.9999994902727549, iteration: 109608
loss: 1.0051825046539307,grad_norm: 0.8821257407305282, iteration: 109609
loss: 1.0098252296447754,grad_norm: 0.9999995691295323, iteration: 109610
loss: 1.0564286708831787,grad_norm: 0.9999998661185969, iteration: 109611
loss: 1.0695936679840088,grad_norm: 0.9999998949639916, iteration: 109612
loss: 1.0256096124649048,grad_norm: 0.9999990001055648, iteration: 109613
loss: 1.037571668624878,grad_norm: 0.9999995987501313, iteration: 109614
loss: 1.0153921842575073,grad_norm: 0.9151063030406605, iteration: 109615
loss: 0.9917412996292114,grad_norm: 0.9999995761972296, iteration: 109616
loss: 1.0319668054580688,grad_norm: 0.7793890141058551, iteration: 109617
loss: 0.997892439365387,grad_norm: 0.7634169125229631, iteration: 109618
loss: 1.0492876768112183,grad_norm: 0.9999994294428234, iteration: 109619
loss: 0.9948142766952515,grad_norm: 0.999999224670797, iteration: 109620
loss: 1.0162537097930908,grad_norm: 0.9999993418886046, iteration: 109621
loss: 0.9809758067131042,grad_norm: 0.7954804374995326, iteration: 109622
loss: 0.9728034734725952,grad_norm: 0.8004378482413531, iteration: 109623
loss: 1.0558521747589111,grad_norm: 0.9216347836057895, iteration: 109624
loss: 1.0251796245574951,grad_norm: 0.9554138749996771, iteration: 109625
loss: 1.0379921197891235,grad_norm: 0.9999998483784422, iteration: 109626
loss: 1.0035229921340942,grad_norm: 0.7506945388919525, iteration: 109627
loss: 1.1539583206176758,grad_norm: 0.9999996258075644, iteration: 109628
loss: 1.0313653945922852,grad_norm: 0.9999999275194578, iteration: 109629
loss: 0.9841192960739136,grad_norm: 0.9999999469802205, iteration: 109630
loss: 1.1529805660247803,grad_norm: 0.9999993643174653, iteration: 109631
loss: 0.9866169691085815,grad_norm: 0.9107319357296778, iteration: 109632
loss: 1.010911464691162,grad_norm: 0.7535442669267035, iteration: 109633
loss: 0.9961243271827698,grad_norm: 0.8670371391107686, iteration: 109634
loss: 0.9722377061843872,grad_norm: 0.8556537798051692, iteration: 109635
loss: 1.0285359621047974,grad_norm: 0.9999991827833568, iteration: 109636
loss: 0.9894756078720093,grad_norm: 0.999999063160163, iteration: 109637
loss: 1.0219913721084595,grad_norm: 0.9999992083154782, iteration: 109638
loss: 0.9864909052848816,grad_norm: 0.9999995201843558, iteration: 109639
loss: 1.023074746131897,grad_norm: 0.9190089125389667, iteration: 109640
loss: 1.0319347381591797,grad_norm: 0.9999991945675669, iteration: 109641
loss: 1.0812041759490967,grad_norm: 0.9999994878898021, iteration: 109642
loss: 1.0703561305999756,grad_norm: 0.9999998072965065, iteration: 109643
loss: 0.9999837875366211,grad_norm: 0.9999990480506219, iteration: 109644
loss: 1.042752742767334,grad_norm: 0.9082704614923627, iteration: 109645
loss: 1.0157428979873657,grad_norm: 0.8669976615689945, iteration: 109646
loss: 1.0391566753387451,grad_norm: 0.950127207360177, iteration: 109647
loss: 0.9899139404296875,grad_norm: 0.8194000307989613, iteration: 109648
loss: 1.0303757190704346,grad_norm: 0.8619472181470151, iteration: 109649
loss: 0.9975368976593018,grad_norm: 0.9711327694064383, iteration: 109650
loss: 1.0217746496200562,grad_norm: 0.9999992627423775, iteration: 109651
loss: 0.991998016834259,grad_norm: 0.9247534142921936, iteration: 109652
loss: 1.111756443977356,grad_norm: 0.9999996984405884, iteration: 109653
loss: 1.0556094646453857,grad_norm: 0.9999997604179081, iteration: 109654
loss: 1.0933864116668701,grad_norm: 0.9999994561055714, iteration: 109655
loss: 1.0241717100143433,grad_norm: 0.9678326636919906, iteration: 109656
loss: 1.0659016370773315,grad_norm: 0.8399274404491313, iteration: 109657
loss: 1.046578288078308,grad_norm: 0.9999997748638961, iteration: 109658
loss: 1.052070140838623,grad_norm: 0.9999990615010981, iteration: 109659
loss: 1.009379267692566,grad_norm: 0.9999990469623757, iteration: 109660
loss: 0.9964475035667419,grad_norm: 0.9075015830371417, iteration: 109661
loss: 1.0718997716903687,grad_norm: 0.9999997033240287, iteration: 109662
loss: 1.029128074645996,grad_norm: 0.9999994747362632, iteration: 109663
loss: 1.078551173210144,grad_norm: 0.9999990980059935, iteration: 109664
loss: 0.9951101541519165,grad_norm: 0.7945939594023693, iteration: 109665
loss: 1.013193130493164,grad_norm: 0.9079651421364957, iteration: 109666
loss: 1.0203802585601807,grad_norm: 0.9999998328690305, iteration: 109667
loss: 1.1168262958526611,grad_norm: 0.9955234584258886, iteration: 109668
loss: 1.004301905632019,grad_norm: 0.9455904283151029, iteration: 109669
loss: 1.0171977281570435,grad_norm: 0.9995349794575576, iteration: 109670
loss: 1.1593269109725952,grad_norm: 0.9999996326718921, iteration: 109671
loss: 1.0239331722259521,grad_norm: 0.9999991888470061, iteration: 109672
loss: 1.0320292711257935,grad_norm: 0.9999994548935707, iteration: 109673
loss: 1.0456781387329102,grad_norm: 0.999999829651806, iteration: 109674
loss: 1.147377848625183,grad_norm: 0.9999997869721714, iteration: 109675
loss: 1.0277247428894043,grad_norm: 0.9663813162280609, iteration: 109676
loss: 1.1167914867401123,grad_norm: 0.9999998472227658, iteration: 109677
loss: 0.9991876482963562,grad_norm: 0.9999997636816341, iteration: 109678
loss: 1.0069491863250732,grad_norm: 0.9999993118609367, iteration: 109679
loss: 1.0792087316513062,grad_norm: 0.9999992310572665, iteration: 109680
loss: 1.0135846138000488,grad_norm: 0.8622108887570621, iteration: 109681
loss: 1.0774155855178833,grad_norm: 0.9999991267285637, iteration: 109682
loss: 0.996161937713623,grad_norm: 0.9999991472888037, iteration: 109683
loss: 1.004516363143921,grad_norm: 0.9241568780120517, iteration: 109684
loss: 1.042243242263794,grad_norm: 0.9999997039003159, iteration: 109685
loss: 1.0593563318252563,grad_norm: 0.9999993309824313, iteration: 109686
loss: 1.0009143352508545,grad_norm: 0.924983599722699, iteration: 109687
loss: 1.1065231561660767,grad_norm: 0.9999994331538812, iteration: 109688
loss: 0.9824897050857544,grad_norm: 0.8219910435401383, iteration: 109689
loss: 0.9962294697761536,grad_norm: 0.999999079393608, iteration: 109690
loss: 0.9896854758262634,grad_norm: 0.999999701805077, iteration: 109691
loss: 1.1655280590057373,grad_norm: 0.9999998993559114, iteration: 109692
loss: 1.1269726753234863,grad_norm: 0.9999998117369019, iteration: 109693
loss: 0.9874923229217529,grad_norm: 0.9999998404103787, iteration: 109694
loss: 0.997603178024292,grad_norm: 0.9999990107109653, iteration: 109695
loss: 1.0244848728179932,grad_norm: 0.9734619432110928, iteration: 109696
loss: 1.0266311168670654,grad_norm: 0.9999993999452095, iteration: 109697
loss: 0.9894648790359497,grad_norm: 0.8160232902236969, iteration: 109698
loss: 0.9967073798179626,grad_norm: 0.9657446963835202, iteration: 109699
loss: 1.0209695100784302,grad_norm: 0.8900625617677927, iteration: 109700
loss: 1.0591133832931519,grad_norm: 0.9999999265969814, iteration: 109701
loss: 1.0163099765777588,grad_norm: 0.8652411472907956, iteration: 109702
loss: 1.0276165008544922,grad_norm: 0.7868489047232594, iteration: 109703
loss: 0.9584652781486511,grad_norm: 0.8432008516612464, iteration: 109704
loss: 1.0234389305114746,grad_norm: 0.9443594185388332, iteration: 109705
loss: 0.9856651425361633,grad_norm: 0.9999994598529063, iteration: 109706
loss: 1.0243655443191528,grad_norm: 0.9999996106604168, iteration: 109707
loss: 1.0559388399124146,grad_norm: 0.9999990580909459, iteration: 109708
loss: 1.0859285593032837,grad_norm: 0.9999992677175672, iteration: 109709
loss: 1.0659024715423584,grad_norm: 0.9999992989788602, iteration: 109710
loss: 0.9908471703529358,grad_norm: 0.999999713327648, iteration: 109711
loss: 1.0142688751220703,grad_norm: 0.8508908051674775, iteration: 109712
loss: 0.9887521266937256,grad_norm: 0.8223740206954475, iteration: 109713
loss: 1.3091884851455688,grad_norm: 0.9999997114888314, iteration: 109714
loss: 1.0715988874435425,grad_norm: 0.9901087154363283, iteration: 109715
loss: 1.0872907638549805,grad_norm: 0.9999994522622717, iteration: 109716
loss: 1.1076607704162598,grad_norm: 0.9999996111322836, iteration: 109717
loss: 1.0917243957519531,grad_norm: 0.999999382111906, iteration: 109718
loss: 1.1265820264816284,grad_norm: 0.9999994406121229, iteration: 109719
loss: 1.122342824935913,grad_norm: 0.9999996468021426, iteration: 109720
loss: 1.0367850065231323,grad_norm: 0.9375992823821782, iteration: 109721
loss: 1.0352516174316406,grad_norm: 0.9999993429371411, iteration: 109722
loss: 1.2728163003921509,grad_norm: 0.9999996069280068, iteration: 109723
loss: 1.0914260149002075,grad_norm: 0.9999992545723494, iteration: 109724
loss: 1.130112648010254,grad_norm: 0.9999994188072268, iteration: 109725
loss: 1.089859127998352,grad_norm: 0.860213662685918, iteration: 109726
loss: 1.1890475749969482,grad_norm: 0.9999994966854466, iteration: 109727
loss: 1.04319429397583,grad_norm: 0.9999992912772151, iteration: 109728
loss: 1.1272310018539429,grad_norm: 0.9999997910719505, iteration: 109729
loss: 1.0526098012924194,grad_norm: 0.9999992132265934, iteration: 109730
loss: 1.0204644203186035,grad_norm: 0.9069101968799919, iteration: 109731
loss: 1.0108084678649902,grad_norm: 0.9445387226465105, iteration: 109732
loss: 0.9872789978981018,grad_norm: 0.8344005703108963, iteration: 109733
loss: 1.055251121520996,grad_norm: 0.9999992254844337, iteration: 109734
loss: 1.000978946685791,grad_norm: 0.9999991976765378, iteration: 109735
loss: 1.016160488128662,grad_norm: 1.0000000765317865, iteration: 109736
loss: 1.007376790046692,grad_norm: 0.851038133323368, iteration: 109737
loss: 0.978249728679657,grad_norm: 0.866464676750256, iteration: 109738
loss: 1.0737065076828003,grad_norm: 0.9999995794211389, iteration: 109739
loss: 1.0999596118927002,grad_norm: 0.9999998276366417, iteration: 109740
loss: 1.0138297080993652,grad_norm: 0.9999991953714912, iteration: 109741
loss: 1.052930474281311,grad_norm: 0.9999995717468345, iteration: 109742
loss: 1.091238021850586,grad_norm: 0.999999739101278, iteration: 109743
loss: 1.3153696060180664,grad_norm: 0.9999993750276255, iteration: 109744
loss: 1.021252155303955,grad_norm: 0.8266518673099502, iteration: 109745
loss: 1.0211763381958008,grad_norm: 0.9821061962348292, iteration: 109746
loss: 1.0090839862823486,grad_norm: 0.9439880077732966, iteration: 109747
loss: 1.1619725227355957,grad_norm: 0.9999992901431615, iteration: 109748
loss: 1.089211344718933,grad_norm: 0.9999990742599402, iteration: 109749
loss: 1.0186229944229126,grad_norm: 1.0000000664898059, iteration: 109750
loss: 1.069791555404663,grad_norm: 0.999999840828879, iteration: 109751
loss: 1.0510677099227905,grad_norm: 0.9999997651786605, iteration: 109752
loss: 1.0385711193084717,grad_norm: 0.9999992605386149, iteration: 109753
loss: 1.0171247720718384,grad_norm: 0.8896270797436796, iteration: 109754
loss: 1.012464165687561,grad_norm: 0.9999991438551702, iteration: 109755
loss: 1.0572385787963867,grad_norm: 0.9999999137871941, iteration: 109756
loss: 1.0461558103561401,grad_norm: 0.9999997637700068, iteration: 109757
loss: 1.0404930114746094,grad_norm: 0.9999991405932245, iteration: 109758
loss: 1.019386887550354,grad_norm: 0.9999995468091674, iteration: 109759
loss: 1.02167546749115,grad_norm: 0.8958595564076253, iteration: 109760
loss: 1.025518536567688,grad_norm: 0.9587922085466267, iteration: 109761
loss: 1.0289498567581177,grad_norm: 0.9999992380298048, iteration: 109762
loss: 1.0498590469360352,grad_norm: 0.9478077598751287, iteration: 109763
loss: 1.1008222103118896,grad_norm: 0.99999943685826, iteration: 109764
loss: 1.053868055343628,grad_norm: 0.9999991483665447, iteration: 109765
loss: 1.0957640409469604,grad_norm: 0.9999991950276399, iteration: 109766
loss: 1.0423310995101929,grad_norm: 0.9999992336876452, iteration: 109767
loss: 1.0262821912765503,grad_norm: 0.9999997109787594, iteration: 109768
loss: 1.0051518678665161,grad_norm: 0.9999991288076374, iteration: 109769
loss: 1.0466358661651611,grad_norm: 0.8561260708158971, iteration: 109770
loss: 1.0625971555709839,grad_norm: 0.999999854416835, iteration: 109771
loss: 0.9933378100395203,grad_norm: 0.9096117550517087, iteration: 109772
loss: 1.0137196779251099,grad_norm: 0.9924587096072874, iteration: 109773
loss: 0.9957076907157898,grad_norm: 0.9497052110275869, iteration: 109774
loss: 0.9820336103439331,grad_norm: 0.9999990680434203, iteration: 109775
loss: 0.9827489852905273,grad_norm: 0.9767390501124735, iteration: 109776
loss: 1.0642167329788208,grad_norm: 0.9999999145269671, iteration: 109777
loss: 1.1058998107910156,grad_norm: 0.999999761735996, iteration: 109778
loss: 1.0225850343704224,grad_norm: 0.9147401052798194, iteration: 109779
loss: 1.0010366439819336,grad_norm: 0.9063378625465979, iteration: 109780
loss: 1.0234453678131104,grad_norm: 0.8622831208336172, iteration: 109781
loss: 1.045653223991394,grad_norm: 0.9999993557507522, iteration: 109782
loss: 1.032594084739685,grad_norm: 0.9999990290805555, iteration: 109783
loss: 0.9928156733512878,grad_norm: 0.9533312818071835, iteration: 109784
loss: 1.0008034706115723,grad_norm: 0.9999993667857364, iteration: 109785
loss: 1.167555809020996,grad_norm: 0.9999999919604395, iteration: 109786
loss: 1.0352072715759277,grad_norm: 0.999999493505447, iteration: 109787
loss: 0.9740768074989319,grad_norm: 0.9999991238034651, iteration: 109788
loss: 1.0123851299285889,grad_norm: 0.8733993598823355, iteration: 109789
loss: 1.1265850067138672,grad_norm: 0.9999992948625644, iteration: 109790
loss: 1.0509579181671143,grad_norm: 0.9999997027997615, iteration: 109791
loss: 1.0157502889633179,grad_norm: 0.8969116706624998, iteration: 109792
loss: 1.0035964250564575,grad_norm: 0.9999996425228885, iteration: 109793
loss: 0.9896366596221924,grad_norm: 0.9999992945353803, iteration: 109794
loss: 1.0549359321594238,grad_norm: 0.9999991633279869, iteration: 109795
loss: 1.033962368965149,grad_norm: 0.8687145519111078, iteration: 109796
loss: 1.1721616983413696,grad_norm: 0.9999995110854617, iteration: 109797
loss: 1.0428816080093384,grad_norm: 0.8919801264688119, iteration: 109798
loss: 0.9688528776168823,grad_norm: 0.9735168270104065, iteration: 109799
loss: 1.029178261756897,grad_norm: 0.9999990512718753, iteration: 109800
loss: 0.9633093476295471,grad_norm: 0.9999989866826144, iteration: 109801
loss: 1.0282024145126343,grad_norm: 0.999999192082653, iteration: 109802
loss: 1.0228757858276367,grad_norm: 0.9999990260884355, iteration: 109803
loss: 1.0688756704330444,grad_norm: 0.99999914340371, iteration: 109804
loss: 0.9943701028823853,grad_norm: 0.9037370312293306, iteration: 109805
loss: 1.0091155767440796,grad_norm: 0.9999991994904236, iteration: 109806
loss: 1.0745292901992798,grad_norm: 0.9999999369007072, iteration: 109807
loss: 1.0060951709747314,grad_norm: 0.7978959436475547, iteration: 109808
loss: 0.99933260679245,grad_norm: 0.8179629334905144, iteration: 109809
loss: 1.0083297491073608,grad_norm: 0.9999997285033049, iteration: 109810
loss: 1.0551354885101318,grad_norm: 0.9615854560926137, iteration: 109811
loss: 1.0994335412979126,grad_norm: 0.999999743989247, iteration: 109812
loss: 1.011762022972107,grad_norm: 0.9930037093009951, iteration: 109813
loss: 1.0809094905853271,grad_norm: 0.999999090427606, iteration: 109814
loss: 1.0054895877838135,grad_norm: 0.999999173712406, iteration: 109815
loss: 1.0961991548538208,grad_norm: 0.9999992684291124, iteration: 109816
loss: 0.983651340007782,grad_norm: 0.8695436188135975, iteration: 109817
loss: 0.9843734502792358,grad_norm: 0.9999991519343209, iteration: 109818
loss: 0.9813605546951294,grad_norm: 0.9999991746626585, iteration: 109819
loss: 0.9940272569656372,grad_norm: 0.999999760860782, iteration: 109820
loss: 0.9671599268913269,grad_norm: 0.8301948222434296, iteration: 109821
loss: 1.020232081413269,grad_norm: 0.8046368680892327, iteration: 109822
loss: 1.0133713483810425,grad_norm: 0.9999991217715338, iteration: 109823
loss: 1.1152764558792114,grad_norm: 0.9999993161518441, iteration: 109824
loss: 1.0495142936706543,grad_norm: 0.9999992740851115, iteration: 109825
loss: 1.0317516326904297,grad_norm: 0.9999991345455226, iteration: 109826
loss: 1.1067365407943726,grad_norm: 0.9999998645456192, iteration: 109827
loss: 1.0205656290054321,grad_norm: 0.9567568216665483, iteration: 109828
loss: 1.1108704805374146,grad_norm: 0.999999751954404, iteration: 109829
loss: 1.028617024421692,grad_norm: 0.9999993900566412, iteration: 109830
loss: 1.0483882427215576,grad_norm: 0.9925837136822976, iteration: 109831
loss: 1.0484918355941772,grad_norm: 0.9999994561847567, iteration: 109832
loss: 1.0421514511108398,grad_norm: 0.8919263100907837, iteration: 109833
loss: 1.0747289657592773,grad_norm: 0.8882089979038392, iteration: 109834
loss: 0.9840143322944641,grad_norm: 0.999999145101765, iteration: 109835
loss: 1.0186853408813477,grad_norm: 0.999999227617996, iteration: 109836
loss: 1.0076271295547485,grad_norm: 0.9956664873636797, iteration: 109837
loss: 1.045405387878418,grad_norm: 0.9999990453004399, iteration: 109838
loss: 0.9875551462173462,grad_norm: 0.9999996715149176, iteration: 109839
loss: 1.0851448774337769,grad_norm: 0.9999996318852556, iteration: 109840
loss: 0.9832199215888977,grad_norm: 0.8693270540558101, iteration: 109841
loss: 1.023237943649292,grad_norm: 0.9999996838925833, iteration: 109842
loss: 1.071756362915039,grad_norm: 0.9999993365521687, iteration: 109843
loss: 0.9552833437919617,grad_norm: 0.9372025921568666, iteration: 109844
loss: 1.003899097442627,grad_norm: 0.9999991550706204, iteration: 109845
loss: 0.9732505083084106,grad_norm: 0.9999991768470722, iteration: 109846
loss: 1.0276622772216797,grad_norm: 0.9999992982338629, iteration: 109847
loss: 1.0426769256591797,grad_norm: 0.9999997757286534, iteration: 109848
loss: 1.01687753200531,grad_norm: 0.9999996051288678, iteration: 109849
loss: 1.0565294027328491,grad_norm: 0.9025013059048166, iteration: 109850
loss: 0.9827304482460022,grad_norm: 0.9999991062508001, iteration: 109851
loss: 1.0057952404022217,grad_norm: 0.8718726792680984, iteration: 109852
loss: 1.0272934436798096,grad_norm: 0.999999222704627, iteration: 109853
loss: 0.9775207042694092,grad_norm: 0.8939250076932218, iteration: 109854
loss: 0.9669612050056458,grad_norm: 0.9999990581297982, iteration: 109855
loss: 1.0137077569961548,grad_norm: 0.9094166032530905, iteration: 109856
loss: 1.0444691181182861,grad_norm: 0.9999993057466938, iteration: 109857
loss: 1.014809489250183,grad_norm: 0.8160272412270834, iteration: 109858
loss: 1.0240952968597412,grad_norm: 0.9999996476810102, iteration: 109859
loss: 1.0122761726379395,grad_norm: 0.99999917029027, iteration: 109860
loss: 0.985862672328949,grad_norm: 0.9999999374005214, iteration: 109861
loss: 1.0346078872680664,grad_norm: 0.9999997164929606, iteration: 109862
loss: 1.0484312772750854,grad_norm: 0.9999992897478659, iteration: 109863
loss: 1.0177233219146729,grad_norm: 0.9999994514983036, iteration: 109864
loss: 1.074591040611267,grad_norm: 0.8585605326633806, iteration: 109865
loss: 0.995155930519104,grad_norm: 0.9558494689039483, iteration: 109866
loss: 1.0411739349365234,grad_norm: 0.9999991009955465, iteration: 109867
loss: 1.0249956846237183,grad_norm: 0.9999995344974926, iteration: 109868
loss: 1.1367223262786865,grad_norm: 0.9999998138740883, iteration: 109869
loss: 0.9943450093269348,grad_norm: 0.999999210696678, iteration: 109870
loss: 0.9969233274459839,grad_norm: 0.9555714474292789, iteration: 109871
loss: 1.0037331581115723,grad_norm: 0.8597272294770504, iteration: 109872
loss: 0.9979572892189026,grad_norm: 0.9999992951532372, iteration: 109873
loss: 1.0117806196212769,grad_norm: 0.9999995771564966, iteration: 109874
loss: 1.103020191192627,grad_norm: 0.99999922339888, iteration: 109875
loss: 1.0499563217163086,grad_norm: 0.9999993356699356, iteration: 109876
loss: 1.0731762647628784,grad_norm: 0.9999996806154549, iteration: 109877
loss: 1.048464059829712,grad_norm: 0.9857060595346299, iteration: 109878
loss: 1.0810208320617676,grad_norm: 0.9999995493946267, iteration: 109879
loss: 0.9838138222694397,grad_norm: 0.9999992224872578, iteration: 109880
loss: 0.9833011031150818,grad_norm: 0.9405107188652595, iteration: 109881
loss: 1.1845991611480713,grad_norm: 0.9999995673208086, iteration: 109882
loss: 0.9731870889663696,grad_norm: 0.7191287130789868, iteration: 109883
loss: 1.1151437759399414,grad_norm: 0.9999995952440357, iteration: 109884
loss: 1.0165363550186157,grad_norm: 0.9999991407149288, iteration: 109885
loss: 0.9965319633483887,grad_norm: 0.9999991936659186, iteration: 109886
loss: 1.0505235195159912,grad_norm: 0.8765035152591105, iteration: 109887
loss: 1.0984742641448975,grad_norm: 0.999999736741155, iteration: 109888
loss: 1.0191200971603394,grad_norm: 0.8752754004327936, iteration: 109889
loss: 1.0548864603042603,grad_norm: 0.9999997877246022, iteration: 109890
loss: 1.0966930389404297,grad_norm: 0.9999998254757823, iteration: 109891
loss: 1.0114699602127075,grad_norm: 0.8417458550914713, iteration: 109892
loss: 0.9811545014381409,grad_norm: 0.9233013200470044, iteration: 109893
loss: 1.0852569341659546,grad_norm: 0.9999998194815689, iteration: 109894
loss: 1.0984222888946533,grad_norm: 0.9999990467100124, iteration: 109895
loss: 1.0625592470169067,grad_norm: 0.999999283939423, iteration: 109896
loss: 1.0044761896133423,grad_norm: 0.9999991175654687, iteration: 109897
loss: 1.1460412740707397,grad_norm: 0.9999999249323108, iteration: 109898
loss: 1.0163010358810425,grad_norm: 0.8441335548902295, iteration: 109899
loss: 1.1271417140960693,grad_norm: 0.9999993378389895, iteration: 109900
loss: 1.0313631296157837,grad_norm: 0.9999992197402541, iteration: 109901
loss: 1.044651985168457,grad_norm: 0.999999395382534, iteration: 109902
loss: 1.0354814529418945,grad_norm: 0.8159358085072344, iteration: 109903
loss: 1.0006294250488281,grad_norm: 0.9266568680277312, iteration: 109904
loss: 1.1221046447753906,grad_norm: 0.9999992605536623, iteration: 109905
loss: 1.036615252494812,grad_norm: 0.9442965075256022, iteration: 109906
loss: 1.0149204730987549,grad_norm: 0.8662620853904625, iteration: 109907
loss: 1.0440807342529297,grad_norm: 0.9999996355294146, iteration: 109908
loss: 1.0826704502105713,grad_norm: 0.9999991723673936, iteration: 109909
loss: 1.060849905014038,grad_norm: 0.9999991571768259, iteration: 109910
loss: 1.020708680152893,grad_norm: 0.925553944170238, iteration: 109911
loss: 1.0048141479492188,grad_norm: 0.7601335224330175, iteration: 109912
loss: 1.0139532089233398,grad_norm: 0.9999990399097001, iteration: 109913
loss: 1.002252221107483,grad_norm: 0.9866037729023788, iteration: 109914
loss: 1.0152101516723633,grad_norm: 0.9999990874110001, iteration: 109915
loss: 1.1012463569641113,grad_norm: 0.9999991051563533, iteration: 109916
loss: 1.0312296152114868,grad_norm: 0.9708464654447819, iteration: 109917
loss: 1.029430866241455,grad_norm: 0.9999995860562979, iteration: 109918
loss: 1.0024044513702393,grad_norm: 0.9999990225672978, iteration: 109919
loss: 1.1818323135375977,grad_norm: 0.9999996015282487, iteration: 109920
loss: 1.0069656372070312,grad_norm: 0.9999993415447058, iteration: 109921
loss: 0.9882252216339111,grad_norm: 0.9197445917936732, iteration: 109922
loss: 1.0542834997177124,grad_norm: 0.9999992054364504, iteration: 109923
loss: 1.0240428447723389,grad_norm: 0.8702001956921154, iteration: 109924
loss: 1.0791943073272705,grad_norm: 0.8883724041694325, iteration: 109925
loss: 1.1094764471054077,grad_norm: 0.9999992931034719, iteration: 109926
loss: 1.0977084636688232,grad_norm: 0.9999994397347746, iteration: 109927
loss: 1.063438892364502,grad_norm: 0.9999998685621012, iteration: 109928
loss: 1.0505794286727905,grad_norm: 0.9696185881044703, iteration: 109929
loss: 1.0335358381271362,grad_norm: 0.8920644409888214, iteration: 109930
loss: 0.9987769722938538,grad_norm: 0.9954898136309185, iteration: 109931
loss: 1.085904598236084,grad_norm: 0.999999596105923, iteration: 109932
loss: 0.9887121319770813,grad_norm: 0.9385335241148599, iteration: 109933
loss: 0.9625792503356934,grad_norm: 0.9199694537219849, iteration: 109934
loss: 1.0903531312942505,grad_norm: 0.9999993370731032, iteration: 109935
loss: 1.0441548824310303,grad_norm: 0.9999995469248247, iteration: 109936
loss: 1.124224066734314,grad_norm: 0.9999994373333412, iteration: 109937
loss: 1.1468905210494995,grad_norm: 0.9999994630572454, iteration: 109938
loss: 1.0639678239822388,grad_norm: 0.9999991589371148, iteration: 109939
loss: 1.0608330965042114,grad_norm: 0.9371097068106107, iteration: 109940
loss: 1.1720057725906372,grad_norm: 1.0000000611673285, iteration: 109941
loss: 1.0323691368103027,grad_norm: 0.9999993818737195, iteration: 109942
loss: 0.9991539716720581,grad_norm: 0.9131584771857609, iteration: 109943
loss: 1.1169805526733398,grad_norm: 0.9999994224087342, iteration: 109944
loss: 1.0133448839187622,grad_norm: 0.9174511147484583, iteration: 109945
loss: 0.9814993143081665,grad_norm: 0.8638495095537385, iteration: 109946
loss: 1.0268863439559937,grad_norm: 0.9352428308035008, iteration: 109947
loss: 1.0489598512649536,grad_norm: 0.880342104199629, iteration: 109948
loss: 1.0387849807739258,grad_norm: 0.9300230959877502, iteration: 109949
loss: 0.9997745752334595,grad_norm: 0.9999990585206235, iteration: 109950
loss: 1.015369176864624,grad_norm: 0.9999999595008504, iteration: 109951
loss: 1.062088966369629,grad_norm: 0.9999999804039521, iteration: 109952
loss: 0.9992609024047852,grad_norm: 0.9152121609831583, iteration: 109953
loss: 0.9989275932312012,grad_norm: 0.9999990727469101, iteration: 109954
loss: 0.9975531697273254,grad_norm: 0.8047386372888944, iteration: 109955
loss: 1.025545597076416,grad_norm: 0.9712595932093904, iteration: 109956
loss: 1.1102113723754883,grad_norm: 0.9999992578181911, iteration: 109957
loss: 0.9780738949775696,grad_norm: 0.8996379992072971, iteration: 109958
loss: 1.0150578022003174,grad_norm: 0.9373084814613891, iteration: 109959
loss: 0.9989652037620544,grad_norm: 0.9574298727133188, iteration: 109960
loss: 1.0331047773361206,grad_norm: 0.8464227673307569, iteration: 109961
loss: 1.0377057790756226,grad_norm: 0.9298458661619091, iteration: 109962
loss: 1.0383962392807007,grad_norm: 0.9999990791883947, iteration: 109963
loss: 1.1706197261810303,grad_norm: 0.9999993756681208, iteration: 109964
loss: 0.996711015701294,grad_norm: 0.8810169797860793, iteration: 109965
loss: 0.9986096620559692,grad_norm: 0.8449020993403876, iteration: 109966
loss: 1.1398468017578125,grad_norm: 0.999999253057724, iteration: 109967
loss: 1.0094733238220215,grad_norm: 0.9999991361418943, iteration: 109968
loss: 1.054513692855835,grad_norm: 0.9999998396755858, iteration: 109969
loss: 1.0231261253356934,grad_norm: 0.8255676742231043, iteration: 109970
loss: 1.0964343547821045,grad_norm: 0.9999997695249565, iteration: 109971
loss: 1.0644248723983765,grad_norm: 0.9999998310197767, iteration: 109972
loss: 1.0464617013931274,grad_norm: 0.8539538079105827, iteration: 109973
loss: 0.9893996119499207,grad_norm: 0.8095062013840831, iteration: 109974
loss: 0.9859856963157654,grad_norm: 0.8550105950382828, iteration: 109975
loss: 1.0271626710891724,grad_norm: 0.8858943407698381, iteration: 109976
loss: 1.0253031253814697,grad_norm: 0.999999103185118, iteration: 109977
loss: 0.9870423674583435,grad_norm: 0.8936753376837728, iteration: 109978
loss: 0.9688859581947327,grad_norm: 0.9999991710953371, iteration: 109979
loss: 1.215847134590149,grad_norm: 0.9999991970406396, iteration: 109980
loss: 1.0374336242675781,grad_norm: 0.8973828296208288, iteration: 109981
loss: 0.9810346961021423,grad_norm: 0.999999644514392, iteration: 109982
loss: 1.0512948036193848,grad_norm: 0.8272987426414792, iteration: 109983
loss: 0.9889581203460693,grad_norm: 0.9999991606152747, iteration: 109984
loss: 1.092020034790039,grad_norm: 0.9999995780701296, iteration: 109985
loss: 1.002992033958435,grad_norm: 0.9209282160941494, iteration: 109986
loss: 1.0426632165908813,grad_norm: 0.9999993283295514, iteration: 109987
loss: 1.0410528182983398,grad_norm: 0.9999993795544502, iteration: 109988
loss: 1.0540815591812134,grad_norm: 0.9999992648213709, iteration: 109989
loss: 0.952646791934967,grad_norm: 0.994403890622207, iteration: 109990
loss: 1.006163477897644,grad_norm: 0.9999992057397975, iteration: 109991
loss: 1.010360836982727,grad_norm: 0.9568036759004517, iteration: 109992
loss: 1.039414882659912,grad_norm: 0.9999991146476274, iteration: 109993
loss: 1.0733616352081299,grad_norm: 0.9999998335229653, iteration: 109994
loss: 0.9864762425422668,grad_norm: 0.8222473232711878, iteration: 109995
loss: 0.9996934533119202,grad_norm: 0.7785607047386166, iteration: 109996
loss: 1.002309799194336,grad_norm: 0.9999990600273486, iteration: 109997
loss: 0.9970823526382446,grad_norm: 0.9999990919132322, iteration: 109998
loss: 0.9917018413543701,grad_norm: 0.8277974846114299, iteration: 109999
loss: 1.015836238861084,grad_norm: 0.7558939296304068, iteration: 110000
Evaluating at step 110000
{'val': 1.0099925007671118, 'test': 2.1452774936779617}
loss: 1.0333706140518188,grad_norm: 0.9999992576050933, iteration: 110001
loss: 1.0691121816635132,grad_norm: 0.9999999096732943, iteration: 110002
loss: 1.0577713251113892,grad_norm: 0.9999994738614493, iteration: 110003
loss: 1.1424474716186523,grad_norm: 0.9999994308757427, iteration: 110004
loss: 1.1655384302139282,grad_norm: 0.9999991831922276, iteration: 110005
loss: 1.120875597000122,grad_norm: 0.9999997300343041, iteration: 110006
loss: 0.9898188710212708,grad_norm: 0.9105708164327928, iteration: 110007
loss: 1.0103614330291748,grad_norm: 0.9999991954266149, iteration: 110008
loss: 1.0300122499465942,grad_norm: 0.9999993522586726, iteration: 110009
loss: 1.0107476711273193,grad_norm: 0.9999992347073104, iteration: 110010
loss: 1.0122839212417603,grad_norm: 0.844955333167571, iteration: 110011
loss: 1.0341144800186157,grad_norm: 0.9999999286150674, iteration: 110012
loss: 0.9990697503089905,grad_norm: 0.907541528468245, iteration: 110013
loss: 1.0091203451156616,grad_norm: 0.9999990699381003, iteration: 110014
loss: 0.9902905225753784,grad_norm: 0.9488906410257975, iteration: 110015
loss: 0.9795542359352112,grad_norm: 0.9999992090252802, iteration: 110016
loss: 1.044864296913147,grad_norm: 0.9999994303195961, iteration: 110017
loss: 1.0916086435317993,grad_norm: 0.9999997956452283, iteration: 110018
loss: 0.9941514134407043,grad_norm: 0.9999990928835779, iteration: 110019
loss: 1.0320347547531128,grad_norm: 0.9108761302992651, iteration: 110020
loss: 0.9944179058074951,grad_norm: 0.9999994495876089, iteration: 110021
loss: 1.0401809215545654,grad_norm: 0.9999992787283198, iteration: 110022
loss: 1.0186043977737427,grad_norm: 0.9999993043355428, iteration: 110023
loss: 1.0120195150375366,grad_norm: 0.9999990879015109, iteration: 110024
loss: 1.1523282527923584,grad_norm: 0.9999997081940538, iteration: 110025
loss: 0.9880138039588928,grad_norm: 0.8776260721061439, iteration: 110026
loss: 0.9860403537750244,grad_norm: 0.9999991505725325, iteration: 110027
loss: 1.0429651737213135,grad_norm: 0.9999993456662377, iteration: 110028
loss: 0.9967746138572693,grad_norm: 0.9999990951660639, iteration: 110029
loss: 1.0474787950515747,grad_norm: 0.8926000369907703, iteration: 110030
loss: 1.0665013790130615,grad_norm: 0.8932719750173095, iteration: 110031
loss: 1.033007025718689,grad_norm: 0.9999990937953596, iteration: 110032
loss: 1.579251766204834,grad_norm: 0.999999811099719, iteration: 110033
loss: 1.0040417909622192,grad_norm: 0.886137697297088, iteration: 110034
loss: 0.988304078578949,grad_norm: 0.9999990524884793, iteration: 110035
loss: 1.0214449167251587,grad_norm: 0.9923657769450523, iteration: 110036
loss: 1.0223815441131592,grad_norm: 0.7680002724733543, iteration: 110037
loss: 1.0864648818969727,grad_norm: 0.9999990354892098, iteration: 110038
loss: 1.0297154188156128,grad_norm: 0.9999994585622063, iteration: 110039
loss: 1.0424296855926514,grad_norm: 0.9999993953807658, iteration: 110040
loss: 1.040462613105774,grad_norm: 0.8088172843950522, iteration: 110041
loss: 1.0251445770263672,grad_norm: 0.8254315815586848, iteration: 110042
loss: 1.0238994359970093,grad_norm: 0.9999990413791255, iteration: 110043
loss: 0.9859622716903687,grad_norm: 0.9999990978646899, iteration: 110044
loss: 1.080787181854248,grad_norm: 0.9999992058674505, iteration: 110045
loss: 1.0089386701583862,grad_norm: 0.9999991304240858, iteration: 110046
loss: 1.0265932083129883,grad_norm: 0.9043158111307718, iteration: 110047
loss: 0.9857310056686401,grad_norm: 0.8608998304885058, iteration: 110048
loss: 1.049752116203308,grad_norm: 0.9999991035100912, iteration: 110049
loss: 1.0483647584915161,grad_norm: 0.9244086227266071, iteration: 110050
loss: 1.088098406791687,grad_norm: 0.9999991564002475, iteration: 110051
loss: 1.0100194215774536,grad_norm: 0.9999993379556362, iteration: 110052
loss: 1.122512698173523,grad_norm: 0.9999993807900396, iteration: 110053
loss: 1.0152342319488525,grad_norm: 0.8971671235581463, iteration: 110054
loss: 1.0149527788162231,grad_norm: 0.8708958300490507, iteration: 110055
loss: 1.0028506517410278,grad_norm: 0.9999990953971267, iteration: 110056
loss: 1.1302255392074585,grad_norm: 0.9999999321782816, iteration: 110057
loss: 1.0176424980163574,grad_norm: 0.9366690668924895, iteration: 110058
loss: 1.0165408849716187,grad_norm: 0.9999999198806601, iteration: 110059
loss: 1.0250725746154785,grad_norm: 0.8436469285683676, iteration: 110060
loss: 1.0419596433639526,grad_norm: 0.9999998254297418, iteration: 110061
loss: 1.1080268621444702,grad_norm: 0.9999992367375642, iteration: 110062
loss: 1.0699518918991089,grad_norm: 0.9999991689756693, iteration: 110063
loss: 1.01530921459198,grad_norm: 0.9999994794353521, iteration: 110064
loss: 1.1540967226028442,grad_norm: 0.9999998350914306, iteration: 110065
loss: 1.0420960187911987,grad_norm: 0.8298558799314434, iteration: 110066
loss: 1.089091181755066,grad_norm: 0.9999990815781025, iteration: 110067
loss: 1.0481724739074707,grad_norm: 0.999999437812624, iteration: 110068
loss: 1.0077061653137207,grad_norm: 0.999999890040802, iteration: 110069
loss: 1.0116139650344849,grad_norm: 0.9999992414938706, iteration: 110070
loss: 1.0136181116104126,grad_norm: 0.95708148247761, iteration: 110071
loss: 1.040159821510315,grad_norm: 0.999999810435834, iteration: 110072
loss: 1.0430008172988892,grad_norm: 0.9999992429949132, iteration: 110073
loss: 1.0284969806671143,grad_norm: 0.9999999985106799, iteration: 110074
loss: 1.0286909341812134,grad_norm: 0.9999992442536726, iteration: 110075
loss: 1.0064319372177124,grad_norm: 0.9586860434853173, iteration: 110076
loss: 0.983437180519104,grad_norm: 0.9591964841871506, iteration: 110077
loss: 1.0108357667922974,grad_norm: 0.9999997088718562, iteration: 110078
loss: 0.985345184803009,grad_norm: 0.8214312386194089, iteration: 110079
loss: 1.0335603952407837,grad_norm: 0.8790611443575178, iteration: 110080
loss: 1.0135314464569092,grad_norm: 0.8306936433428306, iteration: 110081
loss: 1.0311803817749023,grad_norm: 0.9597494605747294, iteration: 110082
loss: 1.0029661655426025,grad_norm: 0.9289925705070964, iteration: 110083
loss: 1.0114469528198242,grad_norm: 0.9999992290220536, iteration: 110084
loss: 1.0532052516937256,grad_norm: 0.9999991563212258, iteration: 110085
loss: 1.0780134201049805,grad_norm: 0.9999995568172103, iteration: 110086
loss: 1.006333589553833,grad_norm: 0.7859137871647085, iteration: 110087
loss: 0.9682828187942505,grad_norm: 0.999999098185937, iteration: 110088
loss: 1.0235143899917603,grad_norm: 0.9973839305951208, iteration: 110089
loss: 1.0683356523513794,grad_norm: 0.9999993314013573, iteration: 110090
loss: 0.9959837794303894,grad_norm: 0.8022101924722913, iteration: 110091
loss: 0.9969382286071777,grad_norm: 0.9379212483037234, iteration: 110092
loss: 0.9983172416687012,grad_norm: 0.855440071300768, iteration: 110093
loss: 1.052324891090393,grad_norm: 0.9999995604690296, iteration: 110094
loss: 1.0136187076568604,grad_norm: 0.8810099160330711, iteration: 110095
loss: 1.0021570920944214,grad_norm: 0.6609648615725106, iteration: 110096
loss: 1.0112216472625732,grad_norm: 0.9999998384845312, iteration: 110097
loss: 1.0094635486602783,grad_norm: 0.9763091384663521, iteration: 110098
loss: 1.0225858688354492,grad_norm: 0.9455115990352376, iteration: 110099
loss: 1.0141661167144775,grad_norm: 0.9379333972680063, iteration: 110100
loss: 1.1043087244033813,grad_norm: 0.9999996578937895, iteration: 110101
loss: 1.0226014852523804,grad_norm: 0.99999907827816, iteration: 110102
loss: 0.9977352023124695,grad_norm: 0.9999992150041085, iteration: 110103
loss: 0.9674746990203857,grad_norm: 0.8533053096606592, iteration: 110104
loss: 0.9530130624771118,grad_norm: 0.8744799123597099, iteration: 110105
loss: 0.9909327030181885,grad_norm: 0.9999994740812909, iteration: 110106
loss: 1.0679423809051514,grad_norm: 0.9999998038398936, iteration: 110107
loss: 0.9942321181297302,grad_norm: 0.9999995277908941, iteration: 110108
loss: 1.0593061447143555,grad_norm: 0.9999992054393824, iteration: 110109
loss: 1.073298454284668,grad_norm: 0.9999991823096179, iteration: 110110
loss: 1.0044702291488647,grad_norm: 0.8333414785409766, iteration: 110111
loss: 1.0050002336502075,grad_norm: 0.9999996311876304, iteration: 110112
loss: 0.9706619381904602,grad_norm: 0.9410855757218405, iteration: 110113
loss: 1.0784289836883545,grad_norm: 0.9999998458606594, iteration: 110114
loss: 1.0124456882476807,grad_norm: 0.8889778267631883, iteration: 110115
loss: 0.99936842918396,grad_norm: 0.925370180104999, iteration: 110116
loss: 0.9983413219451904,grad_norm: 0.999999387493735, iteration: 110117
loss: 1.0342198610305786,grad_norm: 0.8111376165001893, iteration: 110118
loss: 0.9902600049972534,grad_norm: 0.9999993136760487, iteration: 110119
loss: 0.9986908435821533,grad_norm: 0.8006609546689097, iteration: 110120
loss: 1.0228906869888306,grad_norm: 0.8823602423765113, iteration: 110121
loss: 1.0062905550003052,grad_norm: 0.8113886385007997, iteration: 110122
loss: 0.9943256974220276,grad_norm: 0.8764980253970234, iteration: 110123
loss: 1.0501469373703003,grad_norm: 0.9999990639270667, iteration: 110124
loss: 0.959253191947937,grad_norm: 0.8126801911702916, iteration: 110125
loss: 1.0230602025985718,grad_norm: 0.9999997739273022, iteration: 110126
loss: 1.021971344947815,grad_norm: 0.8759911996042876, iteration: 110127
loss: 0.9449903964996338,grad_norm: 0.942042186083665, iteration: 110128
loss: 1.010319471359253,grad_norm: 0.8988416114487758, iteration: 110129
loss: 1.0042169094085693,grad_norm: 0.9999995581481004, iteration: 110130
loss: 1.014198660850525,grad_norm: 0.9320875114253421, iteration: 110131
loss: 1.0347436666488647,grad_norm: 0.8078028876202293, iteration: 110132
loss: 1.0662041902542114,grad_norm: 0.9999993285956242, iteration: 110133
loss: 0.9836217164993286,grad_norm: 0.9624169529245434, iteration: 110134
loss: 1.0070900917053223,grad_norm: 0.9999990929502662, iteration: 110135
loss: 1.030239462852478,grad_norm: 0.9999990592631279, iteration: 110136
loss: 0.9959501028060913,grad_norm: 0.9999992826835624, iteration: 110137
loss: 0.9780259132385254,grad_norm: 0.9999991677334819, iteration: 110138
loss: 0.9724099636077881,grad_norm: 0.9999992780638306, iteration: 110139
loss: 1.0294032096862793,grad_norm: 0.9999990628619614, iteration: 110140
loss: 1.0328370332717896,grad_norm: 0.7495993062983255, iteration: 110141
loss: 1.0308711528778076,grad_norm: 0.9999996935318558, iteration: 110142
loss: 1.0119613409042358,grad_norm: 0.9999990688659812, iteration: 110143
loss: 1.1388797760009766,grad_norm: 0.9999994405651642, iteration: 110144
loss: 1.020601511001587,grad_norm: 0.9999995698283963, iteration: 110145
loss: 0.9703809022903442,grad_norm: 0.946239908229556, iteration: 110146
loss: 1.0269386768341064,grad_norm: 0.8561192822539783, iteration: 110147
loss: 1.0500258207321167,grad_norm: 0.9999995673008669, iteration: 110148
loss: 0.9839949011802673,grad_norm: 0.7791656887393429, iteration: 110149
loss: 1.007939338684082,grad_norm: 0.9180778879448839, iteration: 110150
loss: 0.9743856191635132,grad_norm: 0.8739394000624092, iteration: 110151
loss: 0.9966566562652588,grad_norm: 0.9999990621828926, iteration: 110152
loss: 1.0328342914581299,grad_norm: 0.8889572117919259, iteration: 110153
loss: 0.9846460819244385,grad_norm: 0.9999994594700315, iteration: 110154
loss: 1.013969898223877,grad_norm: 0.9793764020497899, iteration: 110155
loss: 0.973339855670929,grad_norm: 0.9999991646875346, iteration: 110156
loss: 1.0318248271942139,grad_norm: 0.9999990383675851, iteration: 110157
loss: 0.9622751474380493,grad_norm: 0.9100153378368547, iteration: 110158
loss: 1.0608445405960083,grad_norm: 0.9999992170831979, iteration: 110159
loss: 1.033738613128662,grad_norm: 0.9999992126670062, iteration: 110160
loss: 1.046838641166687,grad_norm: 0.9874543315359087, iteration: 110161
loss: 1.0061445236206055,grad_norm: 0.9999991156691662, iteration: 110162
loss: 1.1537392139434814,grad_norm: 0.9999993682139053, iteration: 110163
loss: 1.0357681512832642,grad_norm: 0.9839403687530115, iteration: 110164
loss: 0.9949131011962891,grad_norm: 0.967369257066914, iteration: 110165
loss: 1.0199081897735596,grad_norm: 0.9999992041049198, iteration: 110166
loss: 0.9946520924568176,grad_norm: 0.6972442764627731, iteration: 110167
loss: 1.1300369501113892,grad_norm: 0.9999995230729707, iteration: 110168
loss: 1.094523549079895,grad_norm: 0.9999991655073327, iteration: 110169
loss: 1.0225294828414917,grad_norm: 0.999999518345034, iteration: 110170
loss: 1.0056910514831543,grad_norm: 0.8008772510416668, iteration: 110171
loss: 1.0032577514648438,grad_norm: 0.8167669673590877, iteration: 110172
loss: 1.0703462362289429,grad_norm: 0.9999997378074621, iteration: 110173
loss: 1.0416473150253296,grad_norm: 0.9999996067431146, iteration: 110174
loss: 1.0074681043624878,grad_norm: 0.9999996545485687, iteration: 110175
loss: 1.0260393619537354,grad_norm: 0.9583261818983884, iteration: 110176
loss: 1.0018775463104248,grad_norm: 0.8854878242103444, iteration: 110177
loss: 1.006136178970337,grad_norm: 0.9999992898923205, iteration: 110178
loss: 0.9651214480400085,grad_norm: 0.9999992533769673, iteration: 110179
loss: 1.0225274562835693,grad_norm: 0.9999995414440074, iteration: 110180
loss: 1.017537236213684,grad_norm: 0.9837894049280216, iteration: 110181
loss: 0.991040050983429,grad_norm: 0.9999997011434867, iteration: 110182
loss: 1.0068625211715698,grad_norm: 0.8814256725162127, iteration: 110183
loss: 1.1002531051635742,grad_norm: 0.9999995758802033, iteration: 110184
loss: 0.9898508787155151,grad_norm: 0.889063155302685, iteration: 110185
loss: 1.0066273212432861,grad_norm: 0.999999901955406, iteration: 110186
loss: 1.006663203239441,grad_norm: 0.999999349506698, iteration: 110187
loss: 1.0318704843521118,grad_norm: 0.9311001230608453, iteration: 110188
loss: 0.985249936580658,grad_norm: 0.8249597541371785, iteration: 110189
loss: 1.0063369274139404,grad_norm: 0.9999990894254281, iteration: 110190
loss: 0.9722568392753601,grad_norm: 0.9033721824450266, iteration: 110191
loss: 1.0046501159667969,grad_norm: 0.9944786683457859, iteration: 110192
loss: 0.9709228277206421,grad_norm: 0.9993121351619527, iteration: 110193
loss: 1.023716926574707,grad_norm: 0.8430897507324269, iteration: 110194
loss: 1.0177359580993652,grad_norm: 0.9999990983018092, iteration: 110195
loss: 1.0163136720657349,grad_norm: 0.9999991078443234, iteration: 110196
loss: 0.9643639922142029,grad_norm: 0.9441345464711711, iteration: 110197
loss: 1.041603446006775,grad_norm: 0.8995199067491071, iteration: 110198
loss: 0.9862834811210632,grad_norm: 0.8342496826813259, iteration: 110199
loss: 1.0349839925765991,grad_norm: 0.999999788225411, iteration: 110200
loss: 1.0422770977020264,grad_norm: 0.9999997121520295, iteration: 110201
loss: 1.03220796585083,grad_norm: 0.9999992607076429, iteration: 110202
loss: 1.013221025466919,grad_norm: 0.9511437786719937, iteration: 110203
loss: 1.003443717956543,grad_norm: 0.9004668588705066, iteration: 110204
loss: 0.9932589530944824,grad_norm: 0.9999994427579466, iteration: 110205
loss: 1.0336284637451172,grad_norm: 0.830477498544695, iteration: 110206
loss: 1.0099689960479736,grad_norm: 0.85108808823766, iteration: 110207
loss: 0.9900977611541748,grad_norm: 0.818348593301993, iteration: 110208
loss: 1.0010210275650024,grad_norm: 0.9092788115140877, iteration: 110209
loss: 1.0381098985671997,grad_norm: 0.9999990529238815, iteration: 110210
loss: 0.9865044355392456,grad_norm: 0.7677279941850979, iteration: 110211
loss: 0.9584529399871826,grad_norm: 0.9999991749354306, iteration: 110212
loss: 1.0141305923461914,grad_norm: 0.7600882851061315, iteration: 110213
loss: 0.9985199570655823,grad_norm: 0.8501880567049728, iteration: 110214
loss: 1.017420768737793,grad_norm: 0.9999992320611579, iteration: 110215
loss: 0.9760174751281738,grad_norm: 0.9999989642257637, iteration: 110216
loss: 0.9965726733207703,grad_norm: 0.999999095457868, iteration: 110217
loss: 1.028275728225708,grad_norm: 0.8449419539405012, iteration: 110218
loss: 1.0050641298294067,grad_norm: 0.999999188931434, iteration: 110219
loss: 1.0281612873077393,grad_norm: 0.9999993028180891, iteration: 110220
loss: 1.049403190612793,grad_norm: 1.0000000353150864, iteration: 110221
loss: 1.0004544258117676,grad_norm: 0.7933689019748003, iteration: 110222
loss: 1.0259672403335571,grad_norm: 0.8252026174880271, iteration: 110223
loss: 1.0188026428222656,grad_norm: 0.9951608112488242, iteration: 110224
loss: 0.9787355065345764,grad_norm: 0.8663958807017563, iteration: 110225
loss: 1.0484918355941772,grad_norm: 0.9999995027059995, iteration: 110226
loss: 1.0071853399276733,grad_norm: 0.9628123358862356, iteration: 110227
loss: 1.020660400390625,grad_norm: 0.9569743423329343, iteration: 110228
loss: 1.022815465927124,grad_norm: 0.8369703630170227, iteration: 110229
loss: 1.0099983215332031,grad_norm: 0.9999990421905642, iteration: 110230
loss: 1.0349197387695312,grad_norm: 0.9999996919902404, iteration: 110231
loss: 0.9894774556159973,grad_norm: 0.7544573908112913, iteration: 110232
loss: 1.0119738578796387,grad_norm: 0.9999991207572565, iteration: 110233
loss: 0.9954408407211304,grad_norm: 0.9999994028076613, iteration: 110234
loss: 0.9406023621559143,grad_norm: 0.9999990383186933, iteration: 110235
loss: 1.0336705446243286,grad_norm: 0.9999990522623395, iteration: 110236
loss: 1.1630184650421143,grad_norm: 0.9999999205718867, iteration: 110237
loss: 0.9560340046882629,grad_norm: 0.999999139051124, iteration: 110238
loss: 1.0025088787078857,grad_norm: 0.9999992171257424, iteration: 110239
loss: 1.0015279054641724,grad_norm: 0.8334745133439055, iteration: 110240
loss: 1.0332462787628174,grad_norm: 0.8603140577174961, iteration: 110241
loss: 1.0015867948532104,grad_norm: 0.9996548367233783, iteration: 110242
loss: 0.9777345061302185,grad_norm: 0.7615781145136354, iteration: 110243
loss: 1.0751900672912598,grad_norm: 0.9999994859698528, iteration: 110244
loss: 0.9892956018447876,grad_norm: 0.9651259866009065, iteration: 110245
loss: 1.0215319395065308,grad_norm: 0.9999990904546708, iteration: 110246
loss: 1.0678136348724365,grad_norm: 0.9999995639432274, iteration: 110247
loss: 1.0000132322311401,grad_norm: 0.8248273823972674, iteration: 110248
loss: 1.004647970199585,grad_norm: 0.7497513686217991, iteration: 110249
loss: 1.0311695337295532,grad_norm: 0.999999240073567, iteration: 110250
loss: 0.9978307485580444,grad_norm: 0.9773731490057433, iteration: 110251
loss: 1.0380029678344727,grad_norm: 0.9999994538515319, iteration: 110252
loss: 1.0079994201660156,grad_norm: 0.9999991032380994, iteration: 110253
loss: 1.2076258659362793,grad_norm: 0.9999998330732708, iteration: 110254
loss: 1.042754888534546,grad_norm: 0.9999991142557747, iteration: 110255
loss: 0.9715182185173035,grad_norm: 0.8192206495957158, iteration: 110256
loss: 1.044140338897705,grad_norm: 0.8888081072876161, iteration: 110257
loss: 1.0349245071411133,grad_norm: 0.9053542421375474, iteration: 110258
loss: 0.9900668859481812,grad_norm: 0.8263328419695154, iteration: 110259
loss: 1.0242550373077393,grad_norm: 0.9999993330256972, iteration: 110260
loss: 1.0500181913375854,grad_norm: 0.9999997664847174, iteration: 110261
loss: 1.0218573808670044,grad_norm: 0.9999998055903209, iteration: 110262
loss: 1.0278077125549316,grad_norm: 0.9999990460224997, iteration: 110263
loss: 1.0020195245742798,grad_norm: 0.9999991713361708, iteration: 110264
loss: 0.9866225719451904,grad_norm: 0.9314857754828105, iteration: 110265
loss: 0.9753243327140808,grad_norm: 0.9804218849187457, iteration: 110266
loss: 1.0273617506027222,grad_norm: 0.946433011851056, iteration: 110267
loss: 0.999652087688446,grad_norm: 0.9853793749264392, iteration: 110268
loss: 1.0606942176818848,grad_norm: 0.9999999023529679, iteration: 110269
loss: 0.9747467041015625,grad_norm: 0.709218589072029, iteration: 110270
loss: 0.984851062297821,grad_norm: 0.9999991932402331, iteration: 110271
loss: 1.068831205368042,grad_norm: 0.8557638630116055, iteration: 110272
loss: 0.9875033497810364,grad_norm: 0.9999995841460002, iteration: 110273
loss: 0.9804912209510803,grad_norm: 0.9855897801926471, iteration: 110274
loss: 1.0808919668197632,grad_norm: 0.9999996548898328, iteration: 110275
loss: 1.0264698266983032,grad_norm: 0.9999998068387498, iteration: 110276
loss: 1.0089941024780273,grad_norm: 0.9007777939262602, iteration: 110277
loss: 0.9850907325744629,grad_norm: 0.9999989715179197, iteration: 110278
loss: 0.9653723835945129,grad_norm: 0.8508293310532203, iteration: 110279
loss: 0.9442893862724304,grad_norm: 0.999999131507638, iteration: 110280
loss: 1.0082329511642456,grad_norm: 0.8149580412960772, iteration: 110281
loss: 1.0025886297225952,grad_norm: 0.8878836466547324, iteration: 110282
loss: 1.054548978805542,grad_norm: 0.9999992765032911, iteration: 110283
loss: 1.0413833856582642,grad_norm: 0.9999992394799254, iteration: 110284
loss: 1.029412031173706,grad_norm: 0.9006053187341057, iteration: 110285
loss: 0.9725091457366943,grad_norm: 0.975136966055079, iteration: 110286
loss: 0.9905414581298828,grad_norm: 0.8798718900781028, iteration: 110287
loss: 0.995488703250885,grad_norm: 0.8445138001838993, iteration: 110288
loss: 0.9905002117156982,grad_norm: 0.9999993392689039, iteration: 110289
loss: 1.0872833728790283,grad_norm: 0.9999995861888175, iteration: 110290
loss: 1.0251041650772095,grad_norm: 0.9571156113872575, iteration: 110291
loss: 1.1393649578094482,grad_norm: 0.9999999523975738, iteration: 110292
loss: 1.0229036808013916,grad_norm: 0.804970431304469, iteration: 110293
loss: 0.9462801814079285,grad_norm: 0.9968820406065656, iteration: 110294
loss: 0.9982599020004272,grad_norm: 0.8428855493394837, iteration: 110295
loss: 0.9591140151023865,grad_norm: 0.7803060820533845, iteration: 110296
loss: 1.0048880577087402,grad_norm: 0.9163289408194707, iteration: 110297
loss: 0.9799752831459045,grad_norm: 0.8998651362875826, iteration: 110298
loss: 1.033776879310608,grad_norm: 0.9999992903697885, iteration: 110299
loss: 0.9865043759346008,grad_norm: 0.999999930179312, iteration: 110300
loss: 1.038896918296814,grad_norm: 0.9999998318260025, iteration: 110301
loss: 1.1135574579238892,grad_norm: 0.999999864900049, iteration: 110302
loss: 1.0453391075134277,grad_norm: 0.9423562287235255, iteration: 110303
loss: 1.0166288614273071,grad_norm: 0.837239689004231, iteration: 110304
loss: 1.101621389389038,grad_norm: 0.9999998340410016, iteration: 110305
loss: 1.0187886953353882,grad_norm: 0.9191263799578859, iteration: 110306
loss: 1.0057125091552734,grad_norm: 0.8402620093598838, iteration: 110307
loss: 1.0335279703140259,grad_norm: 0.760845494753856, iteration: 110308
loss: 1.0224179029464722,grad_norm: 0.9999989925578396, iteration: 110309
loss: 1.013595700263977,grad_norm: 0.9999994354872227, iteration: 110310
loss: 1.0243486166000366,grad_norm: 0.9999995612665784, iteration: 110311
loss: 0.9835377335548401,grad_norm: 0.9999990353509806, iteration: 110312
loss: 1.0122419595718384,grad_norm: 0.9999991230701324, iteration: 110313
loss: 0.9486013650894165,grad_norm: 0.9999991136480769, iteration: 110314
loss: 1.0295932292938232,grad_norm: 0.9999991816691103, iteration: 110315
loss: 1.0256497859954834,grad_norm: 0.9034482380704529, iteration: 110316
loss: 1.2057344913482666,grad_norm: 0.9999996946438404, iteration: 110317
loss: 0.9491750001907349,grad_norm: 0.883321085451776, iteration: 110318
loss: 0.9661829471588135,grad_norm: 0.9936056709700989, iteration: 110319
loss: 0.9862352013587952,grad_norm: 0.8307626170079488, iteration: 110320
loss: 1.031712532043457,grad_norm: 0.999999613947886, iteration: 110321
loss: 1.0455970764160156,grad_norm: 0.9324500782335808, iteration: 110322
loss: 1.0125553607940674,grad_norm: 0.9999993092545041, iteration: 110323
loss: 1.0194686651229858,grad_norm: 0.9116402315295875, iteration: 110324
loss: 1.0384559631347656,grad_norm: 0.8718999881274014, iteration: 110325
loss: 1.0012789964675903,grad_norm: 0.9066505050602176, iteration: 110326
loss: 0.9977446794509888,grad_norm: 0.963522619435166, iteration: 110327
loss: 1.0178148746490479,grad_norm: 0.9778667095700342, iteration: 110328
loss: 1.005467414855957,grad_norm: 0.9999998752780914, iteration: 110329
loss: 0.9818959832191467,grad_norm: 0.995874257934981, iteration: 110330
loss: 1.0683809518814087,grad_norm: 0.9999998061117558, iteration: 110331
loss: 1.0656960010528564,grad_norm: 0.9352481735949901, iteration: 110332
loss: 1.0754914283752441,grad_norm: 0.999999699537035, iteration: 110333
loss: 1.0213333368301392,grad_norm: 0.8631579845174486, iteration: 110334
loss: 0.993079423904419,grad_norm: 0.8990245434068667, iteration: 110335
loss: 1.021700382232666,grad_norm: 0.9854191946167563, iteration: 110336
loss: 1.0398906469345093,grad_norm: 0.9273457062921809, iteration: 110337
loss: 1.0101416110992432,grad_norm: 0.843857116643962, iteration: 110338
loss: 1.005889892578125,grad_norm: 0.8423638193488237, iteration: 110339
loss: 1.0176011323928833,grad_norm: 0.9999998720695698, iteration: 110340
loss: 1.0345115661621094,grad_norm: 0.9036871151236979, iteration: 110341
loss: 1.0073291063308716,grad_norm: 0.9999992109090803, iteration: 110342
loss: 0.9563590884208679,grad_norm: 0.9999991602222099, iteration: 110343
loss: 1.0490859746932983,grad_norm: 0.9498800217764416, iteration: 110344
loss: 1.012843370437622,grad_norm: 0.7774353423518185, iteration: 110345
loss: 1.0192588567733765,grad_norm: 0.841128763960091, iteration: 110346
loss: 1.0383867025375366,grad_norm: 0.9999993968479629, iteration: 110347
loss: 1.0525134801864624,grad_norm: 0.9999991324269223, iteration: 110348
loss: 1.0263208150863647,grad_norm: 0.9999990621258458, iteration: 110349
loss: 1.0513136386871338,grad_norm: 0.9999996553356352, iteration: 110350
loss: 0.9974642395973206,grad_norm: 0.83540782182856, iteration: 110351
loss: 0.9939042925834656,grad_norm: 0.9140531988351334, iteration: 110352
loss: 0.997890055179596,grad_norm: 0.9999990942628714, iteration: 110353
loss: 1.0432977676391602,grad_norm: 0.9067783146415574, iteration: 110354
loss: 0.9985332489013672,grad_norm: 0.9999996406574798, iteration: 110355
loss: 1.0315254926681519,grad_norm: 0.9999990352074991, iteration: 110356
loss: 0.9900107979774475,grad_norm: 0.8190773525043499, iteration: 110357
loss: 0.9797344207763672,grad_norm: 0.9999994463694992, iteration: 110358
loss: 1.0777214765548706,grad_norm: 0.9999996394254821, iteration: 110359
loss: 0.9948405623435974,grad_norm: 0.7860382872334641, iteration: 110360
loss: 1.0227893590927124,grad_norm: 0.9808003807791097, iteration: 110361
loss: 1.0076617002487183,grad_norm: 0.9999993044535493, iteration: 110362
loss: 1.0021886825561523,grad_norm: 0.9581576866953575, iteration: 110363
loss: 1.0056605339050293,grad_norm: 0.8713328331953905, iteration: 110364
loss: 1.0209527015686035,grad_norm: 0.924879839710272, iteration: 110365
loss: 1.0846492052078247,grad_norm: 0.999999159736286, iteration: 110366
loss: 1.0613430738449097,grad_norm: 0.9999993691931169, iteration: 110367
loss: 1.0038245916366577,grad_norm: 0.9159552020316276, iteration: 110368
loss: 1.0047615766525269,grad_norm: 0.9999991143119944, iteration: 110369
loss: 1.024935245513916,grad_norm: 0.9999991437418301, iteration: 110370
loss: 1.0748400688171387,grad_norm: 0.8347850048631132, iteration: 110371
loss: 0.9802733063697815,grad_norm: 0.9845534986151268, iteration: 110372
loss: 1.009883999824524,grad_norm: 0.9255580740267166, iteration: 110373
loss: 1.0692038536071777,grad_norm: 1.000000012917667, iteration: 110374
loss: 1.04203462600708,grad_norm: 0.9999998470823026, iteration: 110375
loss: 1.0036104917526245,grad_norm: 0.9961022880697072, iteration: 110376
loss: 0.9740709066390991,grad_norm: 0.8981749910661605, iteration: 110377
loss: 1.0013482570648193,grad_norm: 0.8577535944500422, iteration: 110378
loss: 1.1981843709945679,grad_norm: 0.9999995366202858, iteration: 110379
loss: 1.0305116176605225,grad_norm: 0.8706412923980986, iteration: 110380
loss: 0.9820528626441956,grad_norm: 0.8820935846654312, iteration: 110381
loss: 0.9576805233955383,grad_norm: 0.9999991179848229, iteration: 110382
loss: 1.0543721914291382,grad_norm: 0.7909679648094298, iteration: 110383
loss: 1.001390814781189,grad_norm: 0.8348151649520241, iteration: 110384
loss: 1.0274361371994019,grad_norm: 0.8443217700387873, iteration: 110385
loss: 1.054358959197998,grad_norm: 0.999999480795421, iteration: 110386
loss: 0.9768415689468384,grad_norm: 0.9999991910851318, iteration: 110387
loss: 1.0736738443374634,grad_norm: 0.9960559413865944, iteration: 110388
loss: 1.019020676612854,grad_norm: 0.9999995726988345, iteration: 110389
loss: 1.0204263925552368,grad_norm: 0.9999992643708205, iteration: 110390
loss: 1.021936297416687,grad_norm: 0.9999992183240668, iteration: 110391
loss: 0.9785169363021851,grad_norm: 0.8440645353125208, iteration: 110392
loss: 0.9959824085235596,grad_norm: 0.9031961185883111, iteration: 110393
loss: 1.0961250066757202,grad_norm: 0.9999999963822559, iteration: 110394
loss: 0.9982497692108154,grad_norm: 0.9999990990422728, iteration: 110395
loss: 1.1258145570755005,grad_norm: 0.9999992370828398, iteration: 110396
loss: 0.9884393811225891,grad_norm: 0.9999996420371293, iteration: 110397
loss: 1.0662208795547485,grad_norm: 0.7998437061107696, iteration: 110398
loss: 0.9990039467811584,grad_norm: 0.999999267963125, iteration: 110399
loss: 1.0516674518585205,grad_norm: 0.9211339244232267, iteration: 110400
loss: 1.0020447969436646,grad_norm: 0.8373938163786235, iteration: 110401
loss: 1.0480951070785522,grad_norm: 0.9296247679477083, iteration: 110402
loss: 1.017457127571106,grad_norm: 0.9999999375008551, iteration: 110403
loss: 1.0076614618301392,grad_norm: 0.9999993245643708, iteration: 110404
loss: 1.0606857538223267,grad_norm: 0.9999992645437515, iteration: 110405
loss: 1.0310555696487427,grad_norm: 0.9999994772880377, iteration: 110406
loss: 1.0092095136642456,grad_norm: 0.9999993139010231, iteration: 110407
loss: 0.9893319010734558,grad_norm: 0.9231391704667361, iteration: 110408
loss: 1.0226093530654907,grad_norm: 0.9999994277446597, iteration: 110409
loss: 0.998278021812439,grad_norm: 0.8683766353958448, iteration: 110410
loss: 1.2690505981445312,grad_norm: 0.9999998523841469, iteration: 110411
loss: 0.9887218475341797,grad_norm: 0.9999990530201568, iteration: 110412
loss: 0.9846124053001404,grad_norm: 0.9844287858948748, iteration: 110413
loss: 1.0428929328918457,grad_norm: 0.9999997763774805, iteration: 110414
loss: 0.9906261563301086,grad_norm: 0.999999125903368, iteration: 110415
loss: 0.9961803555488586,grad_norm: 0.9999989999616746, iteration: 110416
loss: 1.0237423181533813,grad_norm: 0.8445628004179576, iteration: 110417
loss: 0.9847596287727356,grad_norm: 0.9593614652011272, iteration: 110418
loss: 1.0486416816711426,grad_norm: 0.9999995974867062, iteration: 110419
loss: 1.036932349205017,grad_norm: 0.9999993671361163, iteration: 110420
loss: 1.0127578973770142,grad_norm: 0.9764871297460103, iteration: 110421
loss: 1.0143065452575684,grad_norm: 0.8489325101238193, iteration: 110422
loss: 1.0466042757034302,grad_norm: 0.8866601755269136, iteration: 110423
loss: 0.9755235910415649,grad_norm: 0.9999991588356869, iteration: 110424
loss: 1.0146327018737793,grad_norm: 0.999999153880526, iteration: 110425
loss: 0.9781503081321716,grad_norm: 0.9388786957097343, iteration: 110426
loss: 0.9961539506912231,grad_norm: 0.8507955121771638, iteration: 110427
loss: 0.9938660264015198,grad_norm: 0.9280614740965492, iteration: 110428
loss: 1.1675727367401123,grad_norm: 1.0000000110213323, iteration: 110429
loss: 1.078058123588562,grad_norm: 0.9999993026476351, iteration: 110430
loss: 1.0168696641921997,grad_norm: 0.8465751136299513, iteration: 110431
loss: 1.0682415962219238,grad_norm: 0.9999993777557322, iteration: 110432
loss: 1.005630373954773,grad_norm: 0.9999991423854376, iteration: 110433
loss: 0.9749118685722351,grad_norm: 0.916201135489826, iteration: 110434
loss: 1.055587649345398,grad_norm: 0.9999993491705857, iteration: 110435
loss: 1.0536428689956665,grad_norm: 0.9999995363206016, iteration: 110436
loss: 0.9782649278640747,grad_norm: 0.9329626203042883, iteration: 110437
loss: 0.969771146774292,grad_norm: 0.9999991351254802, iteration: 110438
loss: 1.0243703126907349,grad_norm: 0.9999996013219076, iteration: 110439
loss: 1.011692762374878,grad_norm: 0.7838249523506579, iteration: 110440
loss: 1.0198012590408325,grad_norm: 0.8225871509299845, iteration: 110441
loss: 1.1552623510360718,grad_norm: 0.9999999757045701, iteration: 110442
loss: 1.0682120323181152,grad_norm: 0.9999992107155092, iteration: 110443
loss: 1.0200927257537842,grad_norm: 0.9999991471961088, iteration: 110444
loss: 1.0132575035095215,grad_norm: 0.9999990178658114, iteration: 110445
loss: 0.9724656939506531,grad_norm: 0.9695822972114883, iteration: 110446
loss: 0.9809489846229553,grad_norm: 0.8438613912567747, iteration: 110447
loss: 0.9736112952232361,grad_norm: 0.8723674333766818, iteration: 110448
loss: 1.018333077430725,grad_norm: 0.8137078615778358, iteration: 110449
loss: 1.0163543224334717,grad_norm: 0.9999991945672488, iteration: 110450
loss: 0.9660311341285706,grad_norm: 0.9041380654488588, iteration: 110451
loss: 1.0295627117156982,grad_norm: 0.9999992919360547, iteration: 110452
loss: 1.0813181400299072,grad_norm: 1.00000002802089, iteration: 110453
loss: 1.0129759311676025,grad_norm: 0.8913172588267979, iteration: 110454
loss: 0.9990478157997131,grad_norm: 0.8008382662292671, iteration: 110455
loss: 1.0836485624313354,grad_norm: 0.9999994157916107, iteration: 110456
loss: 1.0039671659469604,grad_norm: 0.9999992379571142, iteration: 110457
loss: 1.0097044706344604,grad_norm: 0.9999991487685422, iteration: 110458
loss: 1.0003689527511597,grad_norm: 0.9999993290359984, iteration: 110459
loss: 1.0054978132247925,grad_norm: 0.9999991990262938, iteration: 110460
loss: 1.0451102256774902,grad_norm: 0.961537659481145, iteration: 110461
loss: 0.9697104096412659,grad_norm: 0.9999990489507009, iteration: 110462
loss: 1.0125995874404907,grad_norm: 0.8430714828786305, iteration: 110463
loss: 1.0796650648117065,grad_norm: 1.000000020487036, iteration: 110464
loss: 0.9732406735420227,grad_norm: 0.9999988966142459, iteration: 110465
loss: 1.0008728504180908,grad_norm: 0.8372179523787509, iteration: 110466
loss: 1.0067651271820068,grad_norm: 0.89293617413847, iteration: 110467
loss: 1.0160990953445435,grad_norm: 0.8492998841458704, iteration: 110468
loss: 1.0626513957977295,grad_norm: 0.9999994049330179, iteration: 110469
loss: 1.0231032371520996,grad_norm: 0.9999997410116527, iteration: 110470
loss: 1.044140338897705,grad_norm: 0.9999997131662522, iteration: 110471
loss: 1.029653549194336,grad_norm: 0.9735462230941552, iteration: 110472
loss: 1.0151822566986084,grad_norm: 0.9234766386489052, iteration: 110473
loss: 0.9915457963943481,grad_norm: 0.8129426510507468, iteration: 110474
loss: 0.9757899641990662,grad_norm: 0.9199204015082518, iteration: 110475
loss: 0.961329460144043,grad_norm: 0.8884959627578508, iteration: 110476
loss: 0.9908590912818909,grad_norm: 0.9999991217207783, iteration: 110477
loss: 1.0003405809402466,grad_norm: 0.7912946949749619, iteration: 110478
loss: 1.2488192319869995,grad_norm: 0.999999194642784, iteration: 110479
loss: 0.9792683720588684,grad_norm: 0.9999993992549786, iteration: 110480
loss: 0.9795547127723694,grad_norm: 0.9490664417101997, iteration: 110481
loss: 1.0269449949264526,grad_norm: 0.9999993251795944, iteration: 110482
loss: 0.9984736442565918,grad_norm: 0.8598645238874879, iteration: 110483
loss: 1.0131466388702393,grad_norm: 0.9999992141715613, iteration: 110484
loss: 1.0163826942443848,grad_norm: 0.7940276773728898, iteration: 110485
loss: 1.062013864517212,grad_norm: 0.9999992136098201, iteration: 110486
loss: 1.0309456586837769,grad_norm: 0.9208382429182427, iteration: 110487
loss: 1.029327392578125,grad_norm: 0.7812174816594574, iteration: 110488
loss: 0.9729892015457153,grad_norm: 0.8967255577059333, iteration: 110489
loss: 1.028576374053955,grad_norm: 0.9999998781294466, iteration: 110490
loss: 1.0122747421264648,grad_norm: 0.8516169897173568, iteration: 110491
loss: 1.0387639999389648,grad_norm: 0.9344010642131235, iteration: 110492
loss: 1.0416632890701294,grad_norm: 0.9999992708907873, iteration: 110493
loss: 1.0190731287002563,grad_norm: 0.9999995672311229, iteration: 110494
loss: 1.0969713926315308,grad_norm: 0.9752822466009256, iteration: 110495
loss: 1.0203789472579956,grad_norm: 0.8188543714748139, iteration: 110496
loss: 0.978149950504303,grad_norm: 0.8140464183438895, iteration: 110497
loss: 1.0324524641036987,grad_norm: 0.9121701556709159, iteration: 110498
loss: 1.0711524486541748,grad_norm: 0.9999991339155337, iteration: 110499
loss: 1.0191954374313354,grad_norm: 0.9461243187202518, iteration: 110500
loss: 1.0129835605621338,grad_norm: 0.9473118998244634, iteration: 110501
loss: 0.9932571649551392,grad_norm: 0.8038362576473295, iteration: 110502
loss: 1.0871797800064087,grad_norm: 0.9999994861496783, iteration: 110503
loss: 0.9781035780906677,grad_norm: 0.9543552319975016, iteration: 110504
loss: 1.000045657157898,grad_norm: 0.9999989958992734, iteration: 110505
loss: 0.9975292682647705,grad_norm: 0.9930130606436448, iteration: 110506
loss: 0.9780533909797668,grad_norm: 0.8578752936690901, iteration: 110507
loss: 1.01514732837677,grad_norm: 0.9999997214488611, iteration: 110508
loss: 1.0256590843200684,grad_norm: 0.9787531775039968, iteration: 110509
loss: 1.0246878862380981,grad_norm: 0.8481250587881557, iteration: 110510
loss: 1.0824601650238037,grad_norm: 0.9999990560108829, iteration: 110511
loss: 1.012869954109192,grad_norm: 0.9999994058982533, iteration: 110512
loss: 0.9815323352813721,grad_norm: 0.8000721001373612, iteration: 110513
loss: 1.0059499740600586,grad_norm: 0.8418865865914459, iteration: 110514
loss: 1.0319682359695435,grad_norm: 0.9999991814914525, iteration: 110515
loss: 1.0346060991287231,grad_norm: 0.9839083101366983, iteration: 110516
loss: 0.9544371962547302,grad_norm: 0.9999989923421243, iteration: 110517
loss: 1.0362712144851685,grad_norm: 0.8343984866668294, iteration: 110518
loss: 1.0681780576705933,grad_norm: 0.9999996388102821, iteration: 110519
loss: 0.9967360496520996,grad_norm: 0.9999992294890041, iteration: 110520
loss: 1.0096869468688965,grad_norm: 0.9999991212708083, iteration: 110521
loss: 1.0084474086761475,grad_norm: 0.8959729459007294, iteration: 110522
loss: 0.9944515228271484,grad_norm: 0.802774042894926, iteration: 110523
loss: 1.054976224899292,grad_norm: 0.9999992396505988, iteration: 110524
loss: 1.0762403011322021,grad_norm: 0.9999997269928901, iteration: 110525
loss: 0.9754689335823059,grad_norm: 0.9675031700063478, iteration: 110526
loss: 1.0134140253067017,grad_norm: 0.9999997991274109, iteration: 110527
loss: 1.0404143333435059,grad_norm: 0.9999992327226325, iteration: 110528
loss: 1.029158353805542,grad_norm: 0.8427820049589042, iteration: 110529
loss: 1.007093071937561,grad_norm: 0.9999993685386339, iteration: 110530
loss: 0.9885290861129761,grad_norm: 0.8127121433171245, iteration: 110531
loss: 1.012477993965149,grad_norm: 0.8486066207331954, iteration: 110532
loss: 0.9909513592720032,grad_norm: 0.9999991586687687, iteration: 110533
loss: 1.030838131904602,grad_norm: 0.9999990325872105, iteration: 110534
loss: 0.9772422313690186,grad_norm: 0.9999992933752871, iteration: 110535
loss: 1.0468376874923706,grad_norm: 0.9999990451540867, iteration: 110536
loss: 0.9537885189056396,grad_norm: 0.8114685783425926, iteration: 110537
loss: 0.9942439198493958,grad_norm: 0.9999996216386833, iteration: 110538
loss: 1.0323374271392822,grad_norm: 0.7108362056157046, iteration: 110539
loss: 1.0271613597869873,grad_norm: 0.9999991219577993, iteration: 110540
loss: 1.0488455295562744,grad_norm: 0.999999707289836, iteration: 110541
loss: 1.051539421081543,grad_norm: 0.9472793189529385, iteration: 110542
loss: 1.0106385946273804,grad_norm: 0.9999991119823389, iteration: 110543
loss: 0.9697621464729309,grad_norm: 0.9999991318462654, iteration: 110544
loss: 0.9817074537277222,grad_norm: 0.9999990290047235, iteration: 110545
loss: 0.997696578502655,grad_norm: 0.768083925209419, iteration: 110546
loss: 0.9734862446784973,grad_norm: 0.9567764675182421, iteration: 110547
loss: 1.061365008354187,grad_norm: 0.9999992126643695, iteration: 110548
loss: 0.997789204120636,grad_norm: 0.8758082526222631, iteration: 110549
loss: 1.0716803073883057,grad_norm: 0.9999993922453729, iteration: 110550
loss: 0.9760461449623108,grad_norm: 0.8871526519820956, iteration: 110551
loss: 0.9905499219894409,grad_norm: 0.9520082806282614, iteration: 110552
loss: 1.0743721723556519,grad_norm: 0.9999996461311248, iteration: 110553
loss: 1.000553846359253,grad_norm: 0.9560228920421668, iteration: 110554
loss: 1.0217266082763672,grad_norm: 0.9999991004471874, iteration: 110555
loss: 1.199264645576477,grad_norm: 0.9999999951658282, iteration: 110556
loss: 0.9878694415092468,grad_norm: 0.9999993832688534, iteration: 110557
loss: 1.1697629690170288,grad_norm: 0.9999999054821709, iteration: 110558
loss: 1.0094873905181885,grad_norm: 0.9694543860092687, iteration: 110559
loss: 1.0348414182662964,grad_norm: 0.9292266342388706, iteration: 110560
loss: 1.0260496139526367,grad_norm: 0.8430606489389418, iteration: 110561
loss: 1.0616084337234497,grad_norm: 0.9999999597662372, iteration: 110562
loss: 1.0012092590332031,grad_norm: 0.8397288141655209, iteration: 110563
loss: 1.0087515115737915,grad_norm: 0.9281454865141616, iteration: 110564
loss: 0.9903302788734436,grad_norm: 0.8707402136565995, iteration: 110565
loss: 1.0431628227233887,grad_norm: 0.939492011291664, iteration: 110566
loss: 1.0076210498809814,grad_norm: 0.9967950380009414, iteration: 110567
loss: 0.9720581769943237,grad_norm: 0.8890988980659825, iteration: 110568
loss: 1.1565855741500854,grad_norm: 0.9999997873416183, iteration: 110569
loss: 1.0996698141098022,grad_norm: 0.9074475020804055, iteration: 110570
loss: 0.9960572123527527,grad_norm: 0.9999990076918858, iteration: 110571
loss: 0.9986509680747986,grad_norm: 0.9999995946667579, iteration: 110572
loss: 0.9777092933654785,grad_norm: 0.9402741808438349, iteration: 110573
loss: 1.0851413011550903,grad_norm: 0.9790259472842795, iteration: 110574
loss: 0.9983385801315308,grad_norm: 0.9999991084366362, iteration: 110575
loss: 1.0294959545135498,grad_norm: 0.8608527133325162, iteration: 110576
loss: 1.0414493083953857,grad_norm: 0.9999996418182913, iteration: 110577
loss: 1.0446217060089111,grad_norm: 0.9999991567434382, iteration: 110578
loss: 0.9875654578208923,grad_norm: 0.8107822979465031, iteration: 110579
loss: 0.9941644668579102,grad_norm: 0.9512157338726887, iteration: 110580
loss: 0.9791334271430969,grad_norm: 0.9631631756012384, iteration: 110581
loss: 0.9821537733078003,grad_norm: 0.9999994579724566, iteration: 110582
loss: 1.121530532836914,grad_norm: 1.0000000040828347, iteration: 110583
loss: 0.999258279800415,grad_norm: 0.8743620521577526, iteration: 110584
loss: 1.0191792249679565,grad_norm: 0.8309010174894425, iteration: 110585
loss: 1.0316344499588013,grad_norm: 0.999999964857434, iteration: 110586
loss: 0.9955781102180481,grad_norm: 0.7688702737433514, iteration: 110587
loss: 1.049607753753662,grad_norm: 0.9999991766933403, iteration: 110588
loss: 1.0146702527999878,grad_norm: 0.999999124456209, iteration: 110589
loss: 1.0567705631256104,grad_norm: 0.9999996955504985, iteration: 110590
loss: 1.0140553712844849,grad_norm: 0.9999991361280371, iteration: 110591
loss: 1.0194014310836792,grad_norm: 0.9999994033074575, iteration: 110592
loss: 0.9693767428398132,grad_norm: 0.850905523048642, iteration: 110593
loss: 0.9704222679138184,grad_norm: 0.9999991956174222, iteration: 110594
loss: 1.124292254447937,grad_norm: 0.9999992825985048, iteration: 110595
loss: 1.0082865953445435,grad_norm: 0.8749280746365895, iteration: 110596
loss: 1.0616768598556519,grad_norm: 0.9999998473366987, iteration: 110597
loss: 1.0738165378570557,grad_norm: 0.9595670881909981, iteration: 110598
loss: 1.030457615852356,grad_norm: 0.9999999793726911, iteration: 110599
loss: 1.001305341720581,grad_norm: 0.8671158654767618, iteration: 110600
loss: 0.9616761207580566,grad_norm: 0.9999991204407833, iteration: 110601
loss: 1.0149646997451782,grad_norm: 0.9999990986390128, iteration: 110602
loss: 1.1100621223449707,grad_norm: 0.9999996328578092, iteration: 110603
loss: 0.9869275689125061,grad_norm: 0.8104072428923398, iteration: 110604
loss: 1.0118138790130615,grad_norm: 0.9663200574097464, iteration: 110605
loss: 1.061643362045288,grad_norm: 0.9661885222345854, iteration: 110606
loss: 0.9886302947998047,grad_norm: 0.8528379862261316, iteration: 110607
loss: 1.0479393005371094,grad_norm: 0.9999998546518774, iteration: 110608
loss: 1.0199354887008667,grad_norm: 0.9999993159550923, iteration: 110609
loss: 1.0183477401733398,grad_norm: 0.9999994902280956, iteration: 110610
loss: 1.091422438621521,grad_norm: 0.9999997692633398, iteration: 110611
loss: 1.0285636186599731,grad_norm: 0.9999991517294878, iteration: 110612
loss: 0.9990434050559998,grad_norm: 0.999999145235378, iteration: 110613
loss: 1.0568358898162842,grad_norm: 0.9999991376254519, iteration: 110614
loss: 1.0572264194488525,grad_norm: 0.999999486505696, iteration: 110615
loss: 1.0183969736099243,grad_norm: 0.999999485586299, iteration: 110616
loss: 1.007147192955017,grad_norm: 0.9999994574286686, iteration: 110617
loss: 0.9541166424751282,grad_norm: 0.921911013100841, iteration: 110618
loss: 0.9804429411888123,grad_norm: 0.9999993243090011, iteration: 110619
loss: 1.0438587665557861,grad_norm: 0.9191512113462997, iteration: 110620
loss: 1.0252951383590698,grad_norm: 0.8717260031631053, iteration: 110621
loss: 1.001054048538208,grad_norm: 0.8057326353416956, iteration: 110622
loss: 0.9854647517204285,grad_norm: 0.9999996236107442, iteration: 110623
loss: 0.9931327700614929,grad_norm: 0.9999992942141299, iteration: 110624
loss: 1.078429937362671,grad_norm: 0.9999999468306408, iteration: 110625
loss: 0.9871158599853516,grad_norm: 0.9999991447142434, iteration: 110626
loss: 1.0491523742675781,grad_norm: 0.9999996789168776, iteration: 110627
loss: 1.0470072031021118,grad_norm: 0.9999994249629444, iteration: 110628
loss: 1.0071865320205688,grad_norm: 0.9999996593373739, iteration: 110629
loss: 0.9681704640388489,grad_norm: 0.999999082204218, iteration: 110630
loss: 1.0137943029403687,grad_norm: 0.8246152960236114, iteration: 110631
loss: 1.0343083143234253,grad_norm: 0.9999998581412619, iteration: 110632
loss: 1.0246567726135254,grad_norm: 0.9373618688115823, iteration: 110633
loss: 1.0325663089752197,grad_norm: 0.9963075234597587, iteration: 110634
loss: 1.0728949308395386,grad_norm: 0.9999994372084353, iteration: 110635
loss: 1.023958683013916,grad_norm: 0.999999027510708, iteration: 110636
loss: 1.0195810794830322,grad_norm: 0.9711448892409735, iteration: 110637
loss: 1.0301328897476196,grad_norm: 0.8332519625212472, iteration: 110638
loss: 1.0091180801391602,grad_norm: 0.9154010362078361, iteration: 110639
loss: 1.0055691003799438,grad_norm: 0.9999998943922298, iteration: 110640
loss: 1.0003125667572021,grad_norm: 0.8459968556502189, iteration: 110641
loss: 1.0245352983474731,grad_norm: 0.9999991924362404, iteration: 110642
loss: 1.0042665004730225,grad_norm: 0.9999991092353206, iteration: 110643
loss: 1.012635350227356,grad_norm: 0.9999990715417847, iteration: 110644
loss: 0.9895821213722229,grad_norm: 0.9999990421555496, iteration: 110645
loss: 0.992469310760498,grad_norm: 0.9999993977897981, iteration: 110646
loss: 0.9875685572624207,grad_norm: 0.999998963853684, iteration: 110647
loss: 0.9825038313865662,grad_norm: 0.7268442585498294, iteration: 110648
loss: 1.0224967002868652,grad_norm: 0.9999991283632224, iteration: 110649
loss: 1.016985535621643,grad_norm: 0.9999992512011845, iteration: 110650
loss: 1.055837869644165,grad_norm: 0.9999995165658866, iteration: 110651
loss: 1.000156283378601,grad_norm: 0.999999129019917, iteration: 110652
loss: 1.3657766580581665,grad_norm: 1.0000000130978681, iteration: 110653
loss: 0.998917281627655,grad_norm: 0.8472161591429652, iteration: 110654
loss: 0.9858717322349548,grad_norm: 0.8484054229261521, iteration: 110655
loss: 1.011614441871643,grad_norm: 0.8304251083233513, iteration: 110656
loss: 1.0078727006912231,grad_norm: 0.8772767983269409, iteration: 110657
loss: 0.9921729564666748,grad_norm: 0.933130438540299, iteration: 110658
loss: 1.0210297107696533,grad_norm: 0.8609973145557555, iteration: 110659
loss: 0.9835285544395447,grad_norm: 0.8005581602767905, iteration: 110660
loss: 1.0007137060165405,grad_norm: 0.7677704342649148, iteration: 110661
loss: 1.053838849067688,grad_norm: 0.9999994867210146, iteration: 110662
loss: 0.9773508906364441,grad_norm: 0.9717701276543319, iteration: 110663
loss: 1.0004476308822632,grad_norm: 0.8164033731357826, iteration: 110664
loss: 1.0047574043273926,grad_norm: 0.9229369714670809, iteration: 110665
loss: 0.995144784450531,grad_norm: 0.9999990672266769, iteration: 110666
loss: 1.0279752016067505,grad_norm: 0.8760636640446265, iteration: 110667
loss: 1.0332002639770508,grad_norm: 0.8279682106247869, iteration: 110668
loss: 1.0019726753234863,grad_norm: 0.9999991076906414, iteration: 110669
loss: 1.0331422090530396,grad_norm: 0.9999994261895507, iteration: 110670
loss: 1.1502909660339355,grad_norm: 0.9999993131308252, iteration: 110671
loss: 1.0432535409927368,grad_norm: 0.9999999198070342, iteration: 110672
loss: 0.9999744296073914,grad_norm: 0.9999996466695363, iteration: 110673
loss: 1.023503303527832,grad_norm: 0.858217558000537, iteration: 110674
loss: 0.9805184602737427,grad_norm: 0.999999052108531, iteration: 110675
loss: 0.9670330882072449,grad_norm: 0.9999990364459705, iteration: 110676
loss: 0.99471515417099,grad_norm: 0.8087361750612937, iteration: 110677
loss: 1.0059024095535278,grad_norm: 0.9083581252414583, iteration: 110678
loss: 1.0147746801376343,grad_norm: 0.9999992755583043, iteration: 110679
loss: 1.2420663833618164,grad_norm: 0.99999969346925, iteration: 110680
loss: 1.0011073350906372,grad_norm: 0.763468548344061, iteration: 110681
loss: 1.125098705291748,grad_norm: 0.9999997145926904, iteration: 110682
loss: 0.9843950867652893,grad_norm: 0.7705046469573498, iteration: 110683
loss: 1.0241962671279907,grad_norm: 0.9999990819993426, iteration: 110684
loss: 1.0128504037857056,grad_norm: 0.9328857646383433, iteration: 110685
loss: 1.0397350788116455,grad_norm: 0.9999998834033055, iteration: 110686
loss: 1.020332932472229,grad_norm: 0.7774650329510984, iteration: 110687
loss: 0.9974778294563293,grad_norm: 0.8746398724791525, iteration: 110688
loss: 0.9845081567764282,grad_norm: 0.740365905079005, iteration: 110689
loss: 1.185219168663025,grad_norm: 1.0000000208386646, iteration: 110690
loss: 1.0204237699508667,grad_norm: 0.8591989342663714, iteration: 110691
loss: 1.0154889822006226,grad_norm: 0.9999990645540489, iteration: 110692
loss: 0.99617600440979,grad_norm: 0.8160874405954586, iteration: 110693
loss: 1.0188114643096924,grad_norm: 0.833616219586934, iteration: 110694
loss: 1.067617416381836,grad_norm: 0.9999990812064546, iteration: 110695
loss: 1.0359060764312744,grad_norm: 0.9999989500248072, iteration: 110696
loss: 0.9590179324150085,grad_norm: 0.874729056702592, iteration: 110697
loss: 0.975681483745575,grad_norm: 0.9645418989909618, iteration: 110698
loss: 0.9769253134727478,grad_norm: 0.999999687359323, iteration: 110699
loss: 1.0029889345169067,grad_norm: 0.899380932681682, iteration: 110700
loss: 1.0490329265594482,grad_norm: 0.9999990895233927, iteration: 110701
loss: 1.163366675376892,grad_norm: 0.9999991186692078, iteration: 110702
loss: 0.995751142501831,grad_norm: 0.8024430121447798, iteration: 110703
loss: 1.0024718046188354,grad_norm: 0.9012492378338718, iteration: 110704
loss: 1.0578300952911377,grad_norm: 0.9999997711368692, iteration: 110705
loss: 1.0331231355667114,grad_norm: 0.9999996026082822, iteration: 110706
loss: 1.0249041318893433,grad_norm: 0.9999991371218157, iteration: 110707
loss: 1.229705810546875,grad_norm: 0.999999522159493, iteration: 110708
loss: 1.106042742729187,grad_norm: 0.9999992057647175, iteration: 110709
loss: 0.976487398147583,grad_norm: 0.8983965963235571, iteration: 110710
loss: 1.031787395477295,grad_norm: 0.9999993103726564, iteration: 110711
loss: 1.0492663383483887,grad_norm: 0.9684376418862716, iteration: 110712
loss: 1.0832438468933105,grad_norm: 0.9999993622081196, iteration: 110713
loss: 1.0633543729782104,grad_norm: 0.9999990608151171, iteration: 110714
loss: 1.0120872259140015,grad_norm: 0.8954371486522724, iteration: 110715
loss: 1.2082685232162476,grad_norm: 0.9999997334931803, iteration: 110716
loss: 1.0353282690048218,grad_norm: 0.9999995167082488, iteration: 110717
loss: 1.0020744800567627,grad_norm: 0.9999993943406971, iteration: 110718
loss: 1.020068645477295,grad_norm: 0.9999999424203505, iteration: 110719
loss: 1.1114251613616943,grad_norm: 0.9999997415562065, iteration: 110720
loss: 1.0205844640731812,grad_norm: 0.9999991761340421, iteration: 110721
loss: 1.0875979661941528,grad_norm: 0.9999991517729686, iteration: 110722
loss: 0.9686521291732788,grad_norm: 0.9723197729014014, iteration: 110723
loss: 1.0434770584106445,grad_norm: 0.8574601132999963, iteration: 110724
loss: 0.9819353818893433,grad_norm: 0.9255000017490472, iteration: 110725
loss: 0.9737251400947571,grad_norm: 0.9999989819516749, iteration: 110726
loss: 1.0133655071258545,grad_norm: 0.9480546692882528, iteration: 110727
loss: 1.0554018020629883,grad_norm: 0.9734631781928619, iteration: 110728
loss: 1.0300506353378296,grad_norm: 0.9999991216638768, iteration: 110729
loss: 1.0676333904266357,grad_norm: 0.9999997691556234, iteration: 110730
loss: 1.0180658102035522,grad_norm: 0.9999991802263043, iteration: 110731
loss: 0.9991614818572998,grad_norm: 0.9999990899501365, iteration: 110732
loss: 1.0213266611099243,grad_norm: 0.9999997464398006, iteration: 110733
loss: 1.0986013412475586,grad_norm: 0.9999994742867294, iteration: 110734
loss: 1.0526405572891235,grad_norm: 0.9823556236422932, iteration: 110735
loss: 0.9525293111801147,grad_norm: 0.9305076563511001, iteration: 110736
loss: 1.004050374031067,grad_norm: 0.9999992868947891, iteration: 110737
loss: 1.0296026468276978,grad_norm: 0.999999076227804, iteration: 110738
loss: 1.1100260019302368,grad_norm: 0.9318133577717929, iteration: 110739
loss: 1.0018393993377686,grad_norm: 0.939453097683697, iteration: 110740
loss: 1.1308395862579346,grad_norm: 0.9999992271534589, iteration: 110741
loss: 1.0194114446640015,grad_norm: 0.9979405992646887, iteration: 110742
loss: 1.0175782442092896,grad_norm: 0.8296757719508631, iteration: 110743
loss: 1.1700992584228516,grad_norm: 0.9999992922840478, iteration: 110744
loss: 1.003738522529602,grad_norm: 0.9999991192206794, iteration: 110745
loss: 1.0193973779678345,grad_norm: 0.764730569233583, iteration: 110746
loss: 1.2011603116989136,grad_norm: 0.9999999178647334, iteration: 110747
loss: 1.01926851272583,grad_norm: 0.999999864140765, iteration: 110748
loss: 1.1049108505249023,grad_norm: 0.9657505243759071, iteration: 110749
loss: 1.1405274868011475,grad_norm: 0.999999842888091, iteration: 110750
loss: 1.0433533191680908,grad_norm: 0.9999997706880293, iteration: 110751
loss: 1.0632330179214478,grad_norm: 0.9999992450592324, iteration: 110752
loss: 1.0393407344818115,grad_norm: 0.9999995030186818, iteration: 110753
loss: 1.0365643501281738,grad_norm: 0.999999669513361, iteration: 110754
loss: 1.016828179359436,grad_norm: 0.8665223602028013, iteration: 110755
loss: 1.1069284677505493,grad_norm: 0.9999999564421113, iteration: 110756
loss: 1.1395955085754395,grad_norm: 0.9999995591686782, iteration: 110757
loss: 1.0576651096343994,grad_norm: 0.9999996421060421, iteration: 110758
loss: 1.2287923097610474,grad_norm: 0.9999996610428608, iteration: 110759
loss: 1.114675760269165,grad_norm: 0.9999992264063328, iteration: 110760
loss: 1.153266191482544,grad_norm: 0.9999998706920198, iteration: 110761
loss: 1.0570156574249268,grad_norm: 0.9999992466975333, iteration: 110762
loss: 1.2284963130950928,grad_norm: 0.9999994475096624, iteration: 110763
loss: 1.0395408868789673,grad_norm: 0.9999992819656441, iteration: 110764
loss: 1.0081164836883545,grad_norm: 0.9129841267171379, iteration: 110765
loss: 1.024886131286621,grad_norm: 0.9999997406456983, iteration: 110766
loss: 0.9999047517776489,grad_norm: 0.8764730684305237, iteration: 110767
loss: 1.0948741436004639,grad_norm: 0.9999990224680342, iteration: 110768
loss: 1.1732937097549438,grad_norm: 0.9999997823202473, iteration: 110769
loss: 1.1104978322982788,grad_norm: 0.9999998287183728, iteration: 110770
loss: 0.9794425368309021,grad_norm: 0.8906582484988894, iteration: 110771
loss: 1.1396431922912598,grad_norm: 0.9999998907559596, iteration: 110772
loss: 1.2981889247894287,grad_norm: 0.9999999248996319, iteration: 110773
loss: 1.0844916105270386,grad_norm: 0.9999997438617862, iteration: 110774
loss: 1.1134217977523804,grad_norm: 0.9999997996217715, iteration: 110775
loss: 1.0721255540847778,grad_norm: 1.0000000890733496, iteration: 110776
loss: 1.063055396080017,grad_norm: 0.9999995954954415, iteration: 110777
loss: 1.1359663009643555,grad_norm: 0.9999998583578886, iteration: 110778
loss: 1.0812889337539673,grad_norm: 0.9999990906174182, iteration: 110779
loss: 1.0800126791000366,grad_norm: 0.9999997129414391, iteration: 110780
loss: 1.0296486616134644,grad_norm: 0.823786983930188, iteration: 110781
loss: 0.9566967487335205,grad_norm: 0.9999991758759129, iteration: 110782
loss: 1.2401020526885986,grad_norm: 0.9999999863524818, iteration: 110783
loss: 1.1638545989990234,grad_norm: 0.9999994263479806, iteration: 110784
loss: 1.2050646543502808,grad_norm: 0.999999930930346, iteration: 110785
loss: 1.1826581954956055,grad_norm: 0.9999998097647658, iteration: 110786
loss: 1.0918692350387573,grad_norm: 0.999999352078691, iteration: 110787
loss: 1.202986240386963,grad_norm: 0.9999999278710654, iteration: 110788
loss: 1.087094783782959,grad_norm: 0.9999998410238782, iteration: 110789
loss: 1.2605974674224854,grad_norm: 0.9999999238158656, iteration: 110790
loss: 1.1382330656051636,grad_norm: 0.9999998935312044, iteration: 110791
loss: 1.1146303415298462,grad_norm: 0.9999996239460547, iteration: 110792
loss: 1.3156696557998657,grad_norm: 0.9999999157488064, iteration: 110793
loss: 1.0292582511901855,grad_norm: 0.9999996421660555, iteration: 110794
loss: 1.1500537395477295,grad_norm: 0.9999998442130601, iteration: 110795
loss: 1.1485689878463745,grad_norm: 1.0000000049863618, iteration: 110796
loss: 1.03260338306427,grad_norm: 0.9999997067650771, iteration: 110797
loss: 1.1083317995071411,grad_norm: 1.0000001058089345, iteration: 110798
loss: 1.0983178615570068,grad_norm: 0.99999938758718, iteration: 110799
loss: 1.114455223083496,grad_norm: 0.9999997166751088, iteration: 110800
loss: 0.9942483305931091,grad_norm: 0.9955168371822114, iteration: 110801
loss: 1.1432034969329834,grad_norm: 0.9999999086369956, iteration: 110802
loss: 1.2395589351654053,grad_norm: 0.9999999396384991, iteration: 110803
loss: 1.0826631784439087,grad_norm: 0.9999995016009952, iteration: 110804
loss: 1.057813048362732,grad_norm: 0.999999136511945, iteration: 110805
loss: 0.9925256967544556,grad_norm: 0.9999992204601673, iteration: 110806
loss: 1.0704189538955688,grad_norm: 0.9999998387215382, iteration: 110807
loss: 1.0973891019821167,grad_norm: 0.9999996941063484, iteration: 110808
loss: 1.2231847047805786,grad_norm: 0.9999997898617787, iteration: 110809
loss: 1.075709581375122,grad_norm: 0.9999991405300985, iteration: 110810
loss: 1.1000144481658936,grad_norm: 0.9999997861366463, iteration: 110811
loss: 1.1403833627700806,grad_norm: 0.9999997619941594, iteration: 110812
loss: 1.0446174144744873,grad_norm: 0.9999999798502446, iteration: 110813
loss: 1.0531197786331177,grad_norm: 0.9999994623351672, iteration: 110814
loss: 1.007703185081482,grad_norm: 0.999999581572546, iteration: 110815
loss: 1.0916204452514648,grad_norm: 0.9999999223887636, iteration: 110816
loss: 1.1119390726089478,grad_norm: 0.9999991227084561, iteration: 110817
loss: 1.0288864374160767,grad_norm: 0.8279038260449028, iteration: 110818
loss: 1.079930067062378,grad_norm: 0.9999997925298005, iteration: 110819
loss: 0.970196545124054,grad_norm: 0.9800498571973658, iteration: 110820
loss: 1.2368534803390503,grad_norm: 0.9999997155382775, iteration: 110821
loss: 1.09076988697052,grad_norm: 0.9999992612199262, iteration: 110822
loss: 1.1249011754989624,grad_norm: 1.000000019996545, iteration: 110823
loss: 1.047278881072998,grad_norm: 0.9999997876270357, iteration: 110824
loss: 1.1308810710906982,grad_norm: 0.9999997395700189, iteration: 110825
loss: 1.0154497623443604,grad_norm: 0.853120705148905, iteration: 110826
loss: 0.9817432165145874,grad_norm: 0.9678427310401347, iteration: 110827
loss: 1.1388522386550903,grad_norm: 0.9701822328232117, iteration: 110828
loss: 0.9844425916671753,grad_norm: 0.9999997039757953, iteration: 110829
loss: 1.0367681980133057,grad_norm: 0.888118205069298, iteration: 110830
loss: 0.9821321368217468,grad_norm: 0.9736273672089901, iteration: 110831
loss: 1.0370514392852783,grad_norm: 0.999999219251681, iteration: 110832
loss: 1.029378890991211,grad_norm: 0.9999990223177798, iteration: 110833
loss: 1.08663010597229,grad_norm: 0.9999994610430017, iteration: 110834
loss: 1.0259535312652588,grad_norm: 0.999999773080677, iteration: 110835
loss: 1.1546872854232788,grad_norm: 1.0000001122442448, iteration: 110836
loss: 1.1017427444458008,grad_norm: 0.9999997091481063, iteration: 110837
loss: 1.0826951265335083,grad_norm: 0.9999995344123529, iteration: 110838
loss: 1.3686819076538086,grad_norm: 0.9999998322586945, iteration: 110839
loss: 1.0172978639602661,grad_norm: 0.9999992469505463, iteration: 110840
loss: 1.0157493352890015,grad_norm: 0.9652287251820103, iteration: 110841
loss: 1.3852492570877075,grad_norm: 0.9999991900100574, iteration: 110842
loss: 1.1672109365463257,grad_norm: 0.9999996652213746, iteration: 110843
loss: 1.5929179191589355,grad_norm: 0.9999997682382905, iteration: 110844
loss: 1.1354621648788452,grad_norm: 0.9999998848241651, iteration: 110845
loss: 1.2946842908859253,grad_norm: 0.9999998776911143, iteration: 110846
loss: 1.1198396682739258,grad_norm: 0.999999518847955, iteration: 110847
loss: 1.2129837274551392,grad_norm: 0.999999867168493, iteration: 110848
loss: 1.3783502578735352,grad_norm: 0.9999999204273192, iteration: 110849
loss: 1.036346435546875,grad_norm: 0.9999994375868435, iteration: 110850
loss: 1.205733299255371,grad_norm: 0.9999995457693751, iteration: 110851
loss: 0.987408459186554,grad_norm: 0.9495346173428607, iteration: 110852
loss: 1.2778356075286865,grad_norm: 0.9999995834309435, iteration: 110853
loss: 1.554394245147705,grad_norm: 0.9999997107935009, iteration: 110854
loss: 1.2889198064804077,grad_norm: 0.9999993921804693, iteration: 110855
loss: 1.097388744354248,grad_norm: 0.9999995956666031, iteration: 110856
loss: 1.3149687051773071,grad_norm: 0.9999996149172141, iteration: 110857
loss: 1.1311531066894531,grad_norm: 1.0000000375347549, iteration: 110858
loss: 1.2332098484039307,grad_norm: 0.9999996825295001, iteration: 110859
loss: 1.0979257822036743,grad_norm: 0.9999998716093466, iteration: 110860
loss: 1.196858525276184,grad_norm: 0.9999996919829506, iteration: 110861
loss: 1.147549033164978,grad_norm: 0.9999996096114038, iteration: 110862
loss: 1.027524471282959,grad_norm: 0.9999990590296242, iteration: 110863
loss: 1.1202949285507202,grad_norm: 0.9999995955960579, iteration: 110864
loss: 1.0386673212051392,grad_norm: 0.9999996931577685, iteration: 110865
loss: 1.143001675605774,grad_norm: 0.9999990857406338, iteration: 110866
loss: 1.0977506637573242,grad_norm: 0.9999990256611275, iteration: 110867
loss: 1.201250433921814,grad_norm: 0.9999999920071382, iteration: 110868
loss: 1.1535362005233765,grad_norm: 0.9999995311322741, iteration: 110869
loss: 1.1094318628311157,grad_norm: 0.9999992417481123, iteration: 110870
loss: 1.0259581804275513,grad_norm: 0.999999749302963, iteration: 110871
loss: 1.0069218873977661,grad_norm: 0.999999723184697, iteration: 110872
loss: 1.1234439611434937,grad_norm: 0.9999995286088644, iteration: 110873
loss: 1.0639208555221558,grad_norm: 0.956078208068588, iteration: 110874
loss: 1.1635537147521973,grad_norm: 0.9999994513626441, iteration: 110875
loss: 1.1113182306289673,grad_norm: 0.9999996996843711, iteration: 110876
loss: 1.092949628829956,grad_norm: 0.9999991453935642, iteration: 110877
loss: 1.0549637079238892,grad_norm: 0.9999991639733284, iteration: 110878
loss: 1.0200409889221191,grad_norm: 0.9999993827267015, iteration: 110879
loss: 1.0315486192703247,grad_norm: 0.8914194353830951, iteration: 110880
loss: 1.0422192811965942,grad_norm: 0.9999992642853103, iteration: 110881
loss: 1.1485036611557007,grad_norm: 0.9999995136027561, iteration: 110882
loss: 1.0503709316253662,grad_norm: 0.9999999697952443, iteration: 110883
loss: 1.1998682022094727,grad_norm: 0.9999999332479402, iteration: 110884
loss: 1.1638766527175903,grad_norm: 0.9999994921753605, iteration: 110885
loss: 1.029261589050293,grad_norm: 0.9530207091873978, iteration: 110886
loss: 1.1586263179779053,grad_norm: 0.9999993733146053, iteration: 110887
loss: 1.0225964784622192,grad_norm: 0.9999992064409362, iteration: 110888
loss: 1.070406198501587,grad_norm: 0.9999993201912788, iteration: 110889
loss: 0.985954999923706,grad_norm: 0.999999060562181, iteration: 110890
loss: 1.2575008869171143,grad_norm: 0.9999995380015083, iteration: 110891
loss: 1.0479637384414673,grad_norm: 0.9999991410999923, iteration: 110892
loss: 1.298532247543335,grad_norm: 0.9999995804416046, iteration: 110893
loss: 1.1322323083877563,grad_norm: 0.9999999092076581, iteration: 110894
loss: 1.0544761419296265,grad_norm: 0.9057578382998658, iteration: 110895
loss: 0.9998438358306885,grad_norm: 0.9999992332227856, iteration: 110896
loss: 1.0785788297653198,grad_norm: 0.9999996870786445, iteration: 110897
loss: 1.1041598320007324,grad_norm: 0.9999997084738703, iteration: 110898
loss: 1.037997841835022,grad_norm: 0.9999996827701814, iteration: 110899
loss: 1.0433603525161743,grad_norm: 0.9999992785488299, iteration: 110900
loss: 1.2297935485839844,grad_norm: 0.9999997742974608, iteration: 110901
loss: 1.0485252141952515,grad_norm: 0.9954973874879751, iteration: 110902
loss: 1.1007663011550903,grad_norm: 0.9999993909686022, iteration: 110903
loss: 1.094109296798706,grad_norm: 0.9999993545034782, iteration: 110904
loss: 1.278554081916809,grad_norm: 0.9999997627698594, iteration: 110905
loss: 0.9842733144760132,grad_norm: 0.9999993303603176, iteration: 110906
loss: 1.0540105104446411,grad_norm: 0.9999995329612239, iteration: 110907
loss: 1.1638697385787964,grad_norm: 0.9999994267964192, iteration: 110908
loss: 1.106595516204834,grad_norm: 0.9999993319554257, iteration: 110909
loss: 1.082986831665039,grad_norm: 0.9999999752886586, iteration: 110910
loss: 0.9674355983734131,grad_norm: 0.8638794148174529, iteration: 110911
loss: 1.3747283220291138,grad_norm: 0.9999996761983136, iteration: 110912
loss: 1.2792047262191772,grad_norm: 0.9999996790181367, iteration: 110913
loss: 1.1175676584243774,grad_norm: 0.999999743351418, iteration: 110914
loss: 1.0898889303207397,grad_norm: 0.99999989917384, iteration: 110915
loss: 1.2029136419296265,grad_norm: 0.9999992117577613, iteration: 110916
loss: 0.9844383597373962,grad_norm: 0.9999995534285242, iteration: 110917
loss: 1.0399222373962402,grad_norm: 0.9999998590864505, iteration: 110918
loss: 1.0286970138549805,grad_norm: 0.9999995852781828, iteration: 110919
loss: 1.1572401523590088,grad_norm: 0.9999998678048236, iteration: 110920
loss: 1.0742480754852295,grad_norm: 0.9999999123276697, iteration: 110921
loss: 1.086003065109253,grad_norm: 0.9999993193798621, iteration: 110922
loss: 1.1574636697769165,grad_norm: 0.9999998443041084, iteration: 110923
loss: 1.17108154296875,grad_norm: 0.9999999605274074, iteration: 110924
loss: 1.0377073287963867,grad_norm: 0.9999992361812802, iteration: 110925
loss: 1.124449372291565,grad_norm: 0.9999995392562452, iteration: 110926
loss: 0.9892189502716064,grad_norm: 0.9999996228353338, iteration: 110927
loss: 1.2548270225524902,grad_norm: 0.9999991712411874, iteration: 110928
loss: 1.0464898347854614,grad_norm: 0.9999993221699767, iteration: 110929
loss: 1.0116791725158691,grad_norm: 0.9999996319034462, iteration: 110930
loss: 0.9956606030464172,grad_norm: 0.799843988732033, iteration: 110931
loss: 1.0660170316696167,grad_norm: 0.9999990996123553, iteration: 110932
loss: 1.183797001838684,grad_norm: 0.999999880665539, iteration: 110933
loss: 1.1027178764343262,grad_norm: 0.999999436907163, iteration: 110934
loss: 1.2946815490722656,grad_norm: 0.9999997991720947, iteration: 110935
loss: 0.9939848780632019,grad_norm: 0.7689674359357924, iteration: 110936
loss: 1.2177690267562866,grad_norm: 0.9999992872027413, iteration: 110937
loss: 1.1642661094665527,grad_norm: 0.999999162972771, iteration: 110938
loss: 1.0064796209335327,grad_norm: 0.9349774941241191, iteration: 110939
loss: 1.1449660062789917,grad_norm: 0.999999336987627, iteration: 110940
loss: 1.133548378944397,grad_norm: 0.9999995329134459, iteration: 110941
loss: 1.0746926069259644,grad_norm: 0.9999993979723265, iteration: 110942
loss: 1.1262259483337402,grad_norm: 0.9999992709838932, iteration: 110943
loss: 1.2381900548934937,grad_norm: 0.9999994734311327, iteration: 110944
loss: 1.0114425420761108,grad_norm: 0.9135973537213784, iteration: 110945
loss: 1.1220773458480835,grad_norm: 0.9999991099556615, iteration: 110946
loss: 1.144079566001892,grad_norm: 0.9999993502436478, iteration: 110947
loss: 1.085945963859558,grad_norm: 0.9999993237919058, iteration: 110948
loss: 1.0040291547775269,grad_norm: 0.9283991776510637, iteration: 110949
loss: 1.2748074531555176,grad_norm: 0.9999999473240214, iteration: 110950
loss: 1.0741575956344604,grad_norm: 0.9999992842204795, iteration: 110951
loss: 0.9932451248168945,grad_norm: 0.9548934815168563, iteration: 110952
loss: 1.095942497253418,grad_norm: 0.9999993185424507, iteration: 110953
loss: 1.4842058420181274,grad_norm: 0.9999999796800642, iteration: 110954
loss: 1.1880438327789307,grad_norm: 0.9999997877574386, iteration: 110955
loss: 1.0617480278015137,grad_norm: 0.9999991373782663, iteration: 110956
loss: 1.0670313835144043,grad_norm: 0.9999990421955803, iteration: 110957
loss: 1.019279956817627,grad_norm: 0.999999171211705, iteration: 110958
loss: 0.9995718598365784,grad_norm: 0.9999992309792093, iteration: 110959
loss: 1.0157076120376587,grad_norm: 0.9999993097276196, iteration: 110960
loss: 1.1180871725082397,grad_norm: 0.9999994529414777, iteration: 110961
loss: 1.0116103887557983,grad_norm: 0.999999453399556, iteration: 110962
loss: 1.0957372188568115,grad_norm: 0.9999992206611785, iteration: 110963
loss: 1.1654971837997437,grad_norm: 0.999999423274382, iteration: 110964
loss: 1.0229796171188354,grad_norm: 0.9582723032820016, iteration: 110965
loss: 0.9910418391227722,grad_norm: 0.9999995133093628, iteration: 110966
loss: 1.1593821048736572,grad_norm: 1.0000000281678638, iteration: 110967
loss: 1.1351126432418823,grad_norm: 0.9999996372379548, iteration: 110968
loss: 1.046593427658081,grad_norm: 0.9905883669007284, iteration: 110969
loss: 1.1602206230163574,grad_norm: 0.9999993422408808, iteration: 110970
loss: 1.0334584712982178,grad_norm: 0.9999991359129896, iteration: 110971
loss: 1.5331952571868896,grad_norm: 0.9999999872314788, iteration: 110972
loss: 0.9769393801689148,grad_norm: 0.9999991144956423, iteration: 110973
loss: 0.9721364974975586,grad_norm: 0.8804137825503283, iteration: 110974
loss: 1.137271523475647,grad_norm: 0.9999995946404125, iteration: 110975
loss: 1.0566529035568237,grad_norm: 0.9999999926362833, iteration: 110976
loss: 1.154414415359497,grad_norm: 0.999999373312349, iteration: 110977
loss: 1.0457872152328491,grad_norm: 0.9999990998573991, iteration: 110978
loss: 1.034622073173523,grad_norm: 0.786833801196554, iteration: 110979
loss: 1.031368613243103,grad_norm: 0.9999991432244536, iteration: 110980
loss: 1.0444762706756592,grad_norm: 1.0000000037967647, iteration: 110981
loss: 1.6034952402114868,grad_norm: 0.9999998580223817, iteration: 110982
loss: 1.030882716178894,grad_norm: 0.9999994418456154, iteration: 110983
loss: 1.105029821395874,grad_norm: 0.9999997519837893, iteration: 110984
loss: 1.0629559755325317,grad_norm: 0.9999992495689928, iteration: 110985
loss: 1.0742517709732056,grad_norm: 0.9999994454691301, iteration: 110986
loss: 1.0512901544570923,grad_norm: 0.9999990078722361, iteration: 110987
loss: 1.172393798828125,grad_norm: 0.999999982067477, iteration: 110988
loss: 1.1536250114440918,grad_norm: 0.9999999604617624, iteration: 110989
loss: 1.0560123920440674,grad_norm: 0.9999999035262666, iteration: 110990
loss: 1.027143120765686,grad_norm: 0.9999997537745955, iteration: 110991
loss: 1.1180177927017212,grad_norm: 0.9999995864977421, iteration: 110992
loss: 1.0173944234848022,grad_norm: 0.9049414995376877, iteration: 110993
loss: 1.1570574045181274,grad_norm: 0.9999994628094472, iteration: 110994
loss: 1.0804365873336792,grad_norm: 0.9999995735698465, iteration: 110995
loss: 1.009984016418457,grad_norm: 0.9087857205613059, iteration: 110996
loss: 1.120021104812622,grad_norm: 0.999999866146676, iteration: 110997
loss: 1.0955994129180908,grad_norm: 0.9145592461012, iteration: 110998
loss: 1.0931332111358643,grad_norm: 0.9999995343485967, iteration: 110999
loss: 1.055017113685608,grad_norm: 0.9999999133420111, iteration: 111000
loss: 1.058921217918396,grad_norm: 0.9462598152648719, iteration: 111001
loss: 1.3564822673797607,grad_norm: 0.999999663218699, iteration: 111002
loss: 1.4805026054382324,grad_norm: 1.000000061110776, iteration: 111003
loss: 1.0279743671417236,grad_norm: 0.999999064454771, iteration: 111004
loss: 1.0852693319320679,grad_norm: 0.9999991305809476, iteration: 111005
loss: 1.1427197456359863,grad_norm: 0.9999996599818126, iteration: 111006
loss: 1.0228562355041504,grad_norm: 0.999999311571728, iteration: 111007
loss: 1.0242602825164795,grad_norm: 0.9999992549062447, iteration: 111008
loss: 1.0140187740325928,grad_norm: 0.9999991930243699, iteration: 111009
loss: 1.0341242551803589,grad_norm: 0.9999990930802125, iteration: 111010
loss: 1.0834810733795166,grad_norm: 0.9999994336985782, iteration: 111011
loss: 1.0494232177734375,grad_norm: 0.9999996437896973, iteration: 111012
loss: 1.0412664413452148,grad_norm: 0.7620345286453262, iteration: 111013
loss: 1.0299594402313232,grad_norm: 0.9999990957103669, iteration: 111014
loss: 1.0622246265411377,grad_norm: 0.9237953985721427, iteration: 111015
loss: 1.3227030038833618,grad_norm: 0.9999999184564513, iteration: 111016
loss: 0.9967525601387024,grad_norm: 0.8500047860874543, iteration: 111017
loss: 1.0042285919189453,grad_norm: 0.9172283300622103, iteration: 111018
loss: 1.0179996490478516,grad_norm: 0.9999999438834529, iteration: 111019
loss: 1.005952000617981,grad_norm: 0.7802484990211247, iteration: 111020
loss: 1.1272127628326416,grad_norm: 0.9999999202299517, iteration: 111021
loss: 1.0480799674987793,grad_norm: 0.9999996277050736, iteration: 111022
loss: 1.011455774307251,grad_norm: 0.9806343252455854, iteration: 111023
loss: 1.0347437858581543,grad_norm: 0.85436316854056, iteration: 111024
loss: 1.1626089811325073,grad_norm: 0.9999993002650259, iteration: 111025
loss: 1.2628370523452759,grad_norm: 0.9999999918232834, iteration: 111026
loss: 0.997748076915741,grad_norm: 0.999999182331274, iteration: 111027
loss: 1.0173604488372803,grad_norm: 0.9999997004189088, iteration: 111028
loss: 1.075128436088562,grad_norm: 0.9184408918998495, iteration: 111029
loss: 1.0219382047653198,grad_norm: 0.9670170185416245, iteration: 111030
loss: 1.0844093561172485,grad_norm: 0.9999997719229909, iteration: 111031
loss: 1.0638437271118164,grad_norm: 0.999999370709555, iteration: 111032
loss: 1.0865639448165894,grad_norm: 0.9999992818614498, iteration: 111033
loss: 1.0178470611572266,grad_norm: 0.9999990892414127, iteration: 111034
loss: 1.1341845989227295,grad_norm: 0.9999994694238993, iteration: 111035
loss: 1.0930641889572144,grad_norm: 0.9999990178063218, iteration: 111036
loss: 1.0504399538040161,grad_norm: 0.9979900065581511, iteration: 111037
loss: 1.0024387836456299,grad_norm: 0.9999996050522686, iteration: 111038
loss: 1.1514230966567993,grad_norm: 0.9999999683379492, iteration: 111039
loss: 1.0730618238449097,grad_norm: 0.9999996713158879, iteration: 111040
loss: 1.0169265270233154,grad_norm: 0.9374009573174497, iteration: 111041
loss: 1.0935250520706177,grad_norm: 0.9999995494092132, iteration: 111042
loss: 1.033430576324463,grad_norm: 0.9999991168064516, iteration: 111043
loss: 1.0511354207992554,grad_norm: 0.9999997633039392, iteration: 111044
loss: 1.046350121498108,grad_norm: 0.9999991561102175, iteration: 111045
loss: 1.0093300342559814,grad_norm: 0.9999989947620582, iteration: 111046
loss: 1.0118762254714966,grad_norm: 0.8268902496135354, iteration: 111047
loss: 1.0116170644760132,grad_norm: 0.9999993312092709, iteration: 111048
loss: 1.1375871896743774,grad_norm: 0.9999998530486256, iteration: 111049
loss: 0.9932088851928711,grad_norm: 0.884219911038689, iteration: 111050
loss: 0.9916115403175354,grad_norm: 0.9410782955141247, iteration: 111051
loss: 1.1036462783813477,grad_norm: 0.9999991918272906, iteration: 111052
loss: 1.0179390907287598,grad_norm: 0.9490104994756989, iteration: 111053
loss: 1.0964282751083374,grad_norm: 0.9999996677995733, iteration: 111054
loss: 1.0323405265808105,grad_norm: 0.999999350837814, iteration: 111055
loss: 1.0074353218078613,grad_norm: 0.999999283325793, iteration: 111056
loss: 1.0776560306549072,grad_norm: 0.9999992269702217, iteration: 111057
loss: 1.027693271636963,grad_norm: 0.999999933400534, iteration: 111058
loss: 1.32589590549469,grad_norm: 0.9999997942894449, iteration: 111059
loss: 1.0043907165527344,grad_norm: 0.8872567780816942, iteration: 111060
loss: 1.0064197778701782,grad_norm: 0.777628629228483, iteration: 111061
loss: 0.9854745864868164,grad_norm: 0.9999990475849614, iteration: 111062
loss: 1.0145909786224365,grad_norm: 0.8482422913565366, iteration: 111063
loss: 1.0371100902557373,grad_norm: 0.9668743874983611, iteration: 111064
loss: 1.0655754804611206,grad_norm: 0.9999991201060316, iteration: 111065
loss: 1.0064504146575928,grad_norm: 0.7549144225870824, iteration: 111066
loss: 1.1137714385986328,grad_norm: 0.999999542957337, iteration: 111067
loss: 1.0137269496917725,grad_norm: 0.958594848350726, iteration: 111068
loss: 0.9926653504371643,grad_norm: 0.7377272599060356, iteration: 111069
loss: 1.1603387594223022,grad_norm: 0.9999999099501645, iteration: 111070
loss: 1.0474709272384644,grad_norm: 0.9999995070724574, iteration: 111071
loss: 0.9971606135368347,grad_norm: 0.9999991065155787, iteration: 111072
loss: 0.9953494668006897,grad_norm: 0.9999991972033022, iteration: 111073
loss: 0.9694298505783081,grad_norm: 0.8959683867970468, iteration: 111074
loss: 0.9824227690696716,grad_norm: 0.9745672829642104, iteration: 111075
loss: 1.1871259212493896,grad_norm: 0.9999992040094225, iteration: 111076
loss: 0.9944210052490234,grad_norm: 0.999999123545469, iteration: 111077
loss: 1.0454566478729248,grad_norm: 0.9999990605017492, iteration: 111078
loss: 0.9742896556854248,grad_norm: 0.7450671936913202, iteration: 111079
loss: 0.9978246688842773,grad_norm: 0.9070620075106549, iteration: 111080
loss: 1.0887073278427124,grad_norm: 0.9459524391667231, iteration: 111081
loss: 0.9898664951324463,grad_norm: 0.8513351211620083, iteration: 111082
loss: 1.034361481666565,grad_norm: 0.7909773928936709, iteration: 111083
loss: 1.072310209274292,grad_norm: 0.9999994244150834, iteration: 111084
loss: 1.0175377130508423,grad_norm: 0.9999992490593873, iteration: 111085
loss: 1.142608404159546,grad_norm: 0.9999994633384026, iteration: 111086
loss: 1.0125267505645752,grad_norm: 0.7822619103202394, iteration: 111087
loss: 1.01256263256073,grad_norm: 0.880054970390414, iteration: 111088
loss: 0.9955102801322937,grad_norm: 0.9999991504447957, iteration: 111089
loss: 1.1639777421951294,grad_norm: 0.999999106304952, iteration: 111090
loss: 1.024025321006775,grad_norm: 0.9999991181700294, iteration: 111091
loss: 1.0147842168807983,grad_norm: 0.9692060062679694, iteration: 111092
loss: 1.0306754112243652,grad_norm: 0.9999999773279719, iteration: 111093
loss: 1.0349957942962646,grad_norm: 0.999999089501031, iteration: 111094
loss: 1.0156373977661133,grad_norm: 0.7931242825609265, iteration: 111095
loss: 1.0752493143081665,grad_norm: 0.9999998863442369, iteration: 111096
loss: 0.978253185749054,grad_norm: 0.9030545232489399, iteration: 111097
loss: 1.0069130659103394,grad_norm: 0.9999994531005427, iteration: 111098
loss: 1.0276426076889038,grad_norm: 0.9999989872541076, iteration: 111099
loss: 1.0671703815460205,grad_norm: 0.999999087659905, iteration: 111100
loss: 1.1225017309188843,grad_norm: 0.9999997108236229, iteration: 111101
loss: 0.9947391152381897,grad_norm: 0.939502002109915, iteration: 111102
loss: 0.9801192879676819,grad_norm: 0.9245363331260413, iteration: 111103
loss: 1.1105307340621948,grad_norm: 1.000000046785219, iteration: 111104
loss: 1.035831093788147,grad_norm: 0.8771986325450517, iteration: 111105
loss: 1.1041390895843506,grad_norm: 0.9999990942035224, iteration: 111106
loss: 1.0578614473342896,grad_norm: 0.9999999514639901, iteration: 111107
loss: 1.0002782344818115,grad_norm: 0.9903037079242831, iteration: 111108
loss: 1.050416350364685,grad_norm: 0.9999990536694218, iteration: 111109
loss: 1.0693565607070923,grad_norm: 0.9999993152493669, iteration: 111110
loss: 1.1282857656478882,grad_norm: 0.9999991376798061, iteration: 111111
loss: 1.227678656578064,grad_norm: 0.9999999030596624, iteration: 111112
loss: 0.9576431512832642,grad_norm: 0.9535230496511022, iteration: 111113
loss: 1.0012598037719727,grad_norm: 0.9999992086453017, iteration: 111114
loss: 1.078849196434021,grad_norm: 0.9999991383690163, iteration: 111115
loss: 1.0854660272598267,grad_norm: 0.9999992326130429, iteration: 111116
loss: 1.0342774391174316,grad_norm: 0.9999996821497924, iteration: 111117
loss: 1.2214910984039307,grad_norm: 0.9999998024601141, iteration: 111118
loss: 1.0260647535324097,grad_norm: 0.9999992364706058, iteration: 111119
loss: 1.0317935943603516,grad_norm: 0.9999991483155519, iteration: 111120
loss: 1.019515872001648,grad_norm: 0.9999997922967162, iteration: 111121
loss: 1.0344123840332031,grad_norm: 0.9999998341770127, iteration: 111122
loss: 1.0136858224868774,grad_norm: 0.9999992243368144, iteration: 111123
loss: 1.0256644487380981,grad_norm: 0.9869180970853721, iteration: 111124
loss: 1.019850730895996,grad_norm: 0.9999991141366212, iteration: 111125
loss: 1.083088755607605,grad_norm: 0.8586635204669401, iteration: 111126
loss: 1.110731601715088,grad_norm: 0.9999995642018709, iteration: 111127
loss: 1.0228489637374878,grad_norm: 0.9999992833461643, iteration: 111128
loss: 1.055590271949768,grad_norm: 0.8385742426537987, iteration: 111129
loss: 1.0772593021392822,grad_norm: 0.8818774164107659, iteration: 111130
loss: 1.0059655904769897,grad_norm: 0.999999158696315, iteration: 111131
loss: 1.120273470878601,grad_norm: 0.9999992043074832, iteration: 111132
loss: 0.9880314469337463,grad_norm: 0.9999997966073173, iteration: 111133
loss: 1.3377131223678589,grad_norm: 0.9999998981040152, iteration: 111134
loss: 1.046297550201416,grad_norm: 0.8637877652993105, iteration: 111135
loss: 1.0246673822402954,grad_norm: 0.9999992093050116, iteration: 111136
loss: 1.0203663110733032,grad_norm: 0.9999992197698218, iteration: 111137
loss: 1.0001386404037476,grad_norm: 0.9173715379195723, iteration: 111138
loss: 1.0188490152359009,grad_norm: 0.8430835803542244, iteration: 111139
loss: 1.0862064361572266,grad_norm: 0.9999994380339553, iteration: 111140
loss: 1.093656063079834,grad_norm: 0.9999990604508312, iteration: 111141
loss: 1.076492190361023,grad_norm: 0.9999999469422298, iteration: 111142
loss: 1.0463390350341797,grad_norm: 0.9999996346768967, iteration: 111143
loss: 1.0474741458892822,grad_norm: 0.9999993352900974, iteration: 111144
loss: 0.9914736747741699,grad_norm: 0.7570245973456025, iteration: 111145
loss: 1.0223571062088013,grad_norm: 0.9237248427130779, iteration: 111146
loss: 1.0163401365280151,grad_norm: 0.9999992416383384, iteration: 111147
loss: 1.1690443754196167,grad_norm: 0.999999945815136, iteration: 111148
loss: 1.1790549755096436,grad_norm: 0.9999994973851469, iteration: 111149
loss: 1.0850354433059692,grad_norm: 0.9999992861811021, iteration: 111150
loss: 1.1469719409942627,grad_norm: 0.9999997561175906, iteration: 111151
loss: 0.9850994944572449,grad_norm: 0.8859488509552765, iteration: 111152
loss: 1.0952612161636353,grad_norm: 0.9999993737663834, iteration: 111153
loss: 0.9996356964111328,grad_norm: 0.9999991479468158, iteration: 111154
loss: 1.1742609739303589,grad_norm: 0.9999999049202742, iteration: 111155
loss: 1.049157738685608,grad_norm: 0.9999994237690829, iteration: 111156
loss: 1.1539316177368164,grad_norm: 0.9999993970577972, iteration: 111157
loss: 1.2202708721160889,grad_norm: 0.9999997696824535, iteration: 111158
loss: 1.000019907951355,grad_norm: 0.8506801751144831, iteration: 111159
loss: 1.0849765539169312,grad_norm: 0.999999429435303, iteration: 111160
loss: 1.26036536693573,grad_norm: 0.9999999144337676, iteration: 111161
loss: 1.082040548324585,grad_norm: 0.9999991236810107, iteration: 111162
loss: 0.9833683967590332,grad_norm: 0.9380189730161129, iteration: 111163
loss: 1.0795286893844604,grad_norm: 0.9999990520198826, iteration: 111164
loss: 1.1154773235321045,grad_norm: 0.9999994760695857, iteration: 111165
loss: 1.0157201290130615,grad_norm: 0.9999996429938961, iteration: 111166
loss: 1.3234233856201172,grad_norm: 0.9999995082355662, iteration: 111167
loss: 1.1240791082382202,grad_norm: 0.9999991311751303, iteration: 111168
loss: 1.07275390625,grad_norm: 0.9999994710753632, iteration: 111169
loss: 1.098662257194519,grad_norm: 0.9999992515092349, iteration: 111170
loss: 0.9923638701438904,grad_norm: 0.9999989576986156, iteration: 111171
loss: 1.0312432050704956,grad_norm: 0.9999991786306461, iteration: 111172
loss: 1.1322739124298096,grad_norm: 0.9999995385765147, iteration: 111173
loss: 1.2225421667099,grad_norm: 0.9999999112006943, iteration: 111174
loss: 1.0527997016906738,grad_norm: 0.9999994255313066, iteration: 111175
loss: 1.0814448595046997,grad_norm: 0.999999173905344, iteration: 111176
loss: 1.0329521894454956,grad_norm: 0.9737287456650183, iteration: 111177
loss: 1.1058789491653442,grad_norm: 0.9999993438903632, iteration: 111178
loss: 1.0681073665618896,grad_norm: 0.99999912113756, iteration: 111179
loss: 1.1476587057113647,grad_norm: 0.9999999390807637, iteration: 111180
loss: 1.2113494873046875,grad_norm: 0.9999998147606739, iteration: 111181
loss: 1.0475001335144043,grad_norm: 0.9999995033773568, iteration: 111182
loss: 1.150663137435913,grad_norm: 0.9999999247884854, iteration: 111183
loss: 1.0443462133407593,grad_norm: 0.918038336941314, iteration: 111184
loss: 1.058754563331604,grad_norm: 0.9999998216396395, iteration: 111185
loss: 1.2466542720794678,grad_norm: 0.9999998644203968, iteration: 111186
loss: 1.0839755535125732,grad_norm: 0.9999991014405035, iteration: 111187
loss: 1.2842532396316528,grad_norm: 0.9999999169928173, iteration: 111188
loss: 1.2289996147155762,grad_norm: 0.9999996283996164, iteration: 111189
loss: 1.0243700742721558,grad_norm: 0.9999990841279276, iteration: 111190
loss: 1.0846967697143555,grad_norm: 0.9999999404914772, iteration: 111191
loss: 1.0749198198318481,grad_norm: 0.9999995938761987, iteration: 111192
loss: 1.2809704542160034,grad_norm: 0.9999995438479384, iteration: 111193
loss: 1.1502265930175781,grad_norm: 0.9999991727055847, iteration: 111194
loss: 1.0593926906585693,grad_norm: 0.9999990605788228, iteration: 111195
loss: 1.3689303398132324,grad_norm: 0.9999999405723982, iteration: 111196
loss: 1.2316615581512451,grad_norm: 0.9999998851155937, iteration: 111197
loss: 1.1514616012573242,grad_norm: 0.9999994603028207, iteration: 111198
loss: 1.2459511756896973,grad_norm: 0.9999994782254945, iteration: 111199
loss: 1.270941972732544,grad_norm: 0.9999999205579152, iteration: 111200
loss: 1.2366182804107666,grad_norm: 0.9999994836212474, iteration: 111201
loss: 1.2783801555633545,grad_norm: 0.9999999670084686, iteration: 111202
loss: 1.1860545873641968,grad_norm: 0.9999999345302161, iteration: 111203
loss: 1.284353494644165,grad_norm: 0.9999995674085064, iteration: 111204
loss: 1.132243275642395,grad_norm: 0.9999993711874864, iteration: 111205
loss: 1.2486863136291504,grad_norm: 0.9999999742350845, iteration: 111206
loss: 1.3500871658325195,grad_norm: 0.9999997904540235, iteration: 111207
loss: 1.330255150794983,grad_norm: 0.999999980702067, iteration: 111208
loss: 1.0910955667495728,grad_norm: 0.9999994983715506, iteration: 111209
loss: 1.1517187356948853,grad_norm: 0.9999995094470343, iteration: 111210
loss: 1.5632480382919312,grad_norm: 1.000000075097171, iteration: 111211
loss: 1.2508596181869507,grad_norm: 0.9999999408073207, iteration: 111212
loss: 1.1840497255325317,grad_norm: 0.9999996810036015, iteration: 111213
loss: 1.1472288370132446,grad_norm: 0.9999991108138868, iteration: 111214
loss: 1.8228124380111694,grad_norm: 0.9999999846538125, iteration: 111215
loss: 1.1197739839553833,grad_norm: 0.9999993179762698, iteration: 111216
loss: 1.2276452779769897,grad_norm: 0.9999999316574861, iteration: 111217
loss: 1.7743735313415527,grad_norm: 0.999999920194399, iteration: 111218
loss: 1.5918943881988525,grad_norm: 1.0000000139572611, iteration: 111219
loss: 1.251274585723877,grad_norm: 0.9999996139575731, iteration: 111220
loss: 2.3282694816589355,grad_norm: 0.999999969637972, iteration: 111221
loss: 1.5985091924667358,grad_norm: 0.9999999303378135, iteration: 111222
loss: 1.3634799718856812,grad_norm: 0.9999999905098482, iteration: 111223
loss: 1.5988378524780273,grad_norm: 1.000000054291852, iteration: 111224
loss: 1.6029607057571411,grad_norm: 1.0000000409822278, iteration: 111225
loss: 1.8914248943328857,grad_norm: 1.0000000203828678, iteration: 111226
loss: 1.3636754751205444,grad_norm: 0.9999998451558388, iteration: 111227
loss: 1.4125455617904663,grad_norm: 0.9999997862305985, iteration: 111228
loss: 1.3476917743682861,grad_norm: 1.0000000197811076, iteration: 111229
loss: 1.4468799829483032,grad_norm: 0.9999997704990067, iteration: 111230
loss: 1.5638861656188965,grad_norm: 0.9999999492667391, iteration: 111231
loss: 1.3088188171386719,grad_norm: 0.9999998636370692, iteration: 111232
loss: 1.3138785362243652,grad_norm: 1.000000046898279, iteration: 111233
loss: 1.3800777196884155,grad_norm: 0.9999999535630136, iteration: 111234
loss: 1.6584781408309937,grad_norm: 1.0000000100685744, iteration: 111235
loss: 1.879865288734436,grad_norm: 0.9999999639700025, iteration: 111236
loss: 1.3088840246200562,grad_norm: 0.9999998332802732, iteration: 111237
loss: 1.2258559465408325,grad_norm: 0.9999998200100234, iteration: 111238
loss: 1.271013855934143,grad_norm: 0.9999997780265667, iteration: 111239
loss: 1.2080191373825073,grad_norm: 0.9999998254429648, iteration: 111240
loss: 1.0304055213928223,grad_norm: 0.9654343658171686, iteration: 111241
loss: 1.1736984252929688,grad_norm: 0.9999995726076127, iteration: 111242
loss: 1.1698253154754639,grad_norm: 0.9999995727463273, iteration: 111243
loss: 1.1725035905838013,grad_norm: 0.9999999064969176, iteration: 111244
loss: 1.1725951433181763,grad_norm: 0.999999533612763, iteration: 111245
loss: 1.2776365280151367,grad_norm: 0.9999999076673651, iteration: 111246
loss: 1.2684688568115234,grad_norm: 0.9999995797537138, iteration: 111247
loss: 1.1361336708068848,grad_norm: 0.9999998371787027, iteration: 111248
loss: 1.2166796922683716,grad_norm: 0.9999998382999512, iteration: 111249
loss: 1.0731714963912964,grad_norm: 0.978570961148454, iteration: 111250
loss: 1.1017042398452759,grad_norm: 0.9999994127377725, iteration: 111251
loss: 1.0968965291976929,grad_norm: 0.9999991047907453, iteration: 111252
loss: 1.049727439880371,grad_norm: 0.9999995782524174, iteration: 111253
loss: 1.056154489517212,grad_norm: 0.9999994033478043, iteration: 111254
loss: 1.1451014280319214,grad_norm: 0.9999998463636578, iteration: 111255
loss: 1.100581407546997,grad_norm: 0.999999499818115, iteration: 111256
loss: 1.0767192840576172,grad_norm: 0.999999288081155, iteration: 111257
loss: 1.1859997510910034,grad_norm: 0.9999992493028843, iteration: 111258
loss: 1.1261160373687744,grad_norm: 0.9999994717683364, iteration: 111259
loss: 1.052217721939087,grad_norm: 0.9999996923918826, iteration: 111260
loss: 1.0240885019302368,grad_norm: 0.9203325773632683, iteration: 111261
loss: 1.0245336294174194,grad_norm: 0.9999996343126477, iteration: 111262
loss: 1.074288249015808,grad_norm: 0.9999994508269808, iteration: 111263
loss: 1.0706769227981567,grad_norm: 0.9999990828928782, iteration: 111264
loss: 1.1241800785064697,grad_norm: 0.9999997798799168, iteration: 111265
loss: 1.015981674194336,grad_norm: 0.9999991332276152, iteration: 111266
loss: 1.09116530418396,grad_norm: 0.9999999205940268, iteration: 111267
loss: 1.03257417678833,grad_norm: 0.9999995287871509, iteration: 111268
loss: 1.1591204404830933,grad_norm: 0.9999994101000027, iteration: 111269
loss: 1.0197550058364868,grad_norm: 0.9999989813366519, iteration: 111270
loss: 1.0256038904190063,grad_norm: 0.9999991159703514, iteration: 111271
loss: 1.0290427207946777,grad_norm: 0.9999998196276465, iteration: 111272
loss: 1.0721549987792969,grad_norm: 1.0000000130763216, iteration: 111273
loss: 1.0219718217849731,grad_norm: 0.9999993415636323, iteration: 111274
loss: 0.969688355922699,grad_norm: 0.9999990630272666, iteration: 111275
loss: 1.2102640867233276,grad_norm: 0.9999998603701888, iteration: 111276
loss: 1.0055307149887085,grad_norm: 0.9395867989090484, iteration: 111277
loss: 0.9951292276382446,grad_norm: 0.6739609577214679, iteration: 111278
loss: 1.1901984214782715,grad_norm: 0.9999998010236397, iteration: 111279
loss: 0.9884204864501953,grad_norm: 0.9999990944947826, iteration: 111280
loss: 1.0563101768493652,grad_norm: 0.9999993826148427, iteration: 111281
loss: 1.0468101501464844,grad_norm: 0.9999995034729481, iteration: 111282
loss: 0.9995086789131165,grad_norm: 0.9103322225398582, iteration: 111283
loss: 1.0174989700317383,grad_norm: 0.9999992026508588, iteration: 111284
loss: 1.0399268865585327,grad_norm: 0.9996065788967635, iteration: 111285
loss: 0.9843034744262695,grad_norm: 0.8352093803862243, iteration: 111286
loss: 1.0467122793197632,grad_norm: 0.9999993520105813, iteration: 111287
loss: 1.100574254989624,grad_norm: 0.9999998631436777, iteration: 111288
loss: 1.1494895219802856,grad_norm: 0.9999999759506271, iteration: 111289
loss: 0.9943316578865051,grad_norm: 0.9999992535757616, iteration: 111290
loss: 1.0448172092437744,grad_norm: 0.9999995156128869, iteration: 111291
loss: 1.017970323562622,grad_norm: 0.9999990885968152, iteration: 111292
loss: 0.9972255229949951,grad_norm: 0.9999658555627329, iteration: 111293
loss: 0.9922878742218018,grad_norm: 0.8936954120696037, iteration: 111294
loss: 1.0439939498901367,grad_norm: 0.9999992976859692, iteration: 111295
loss: 1.0693796873092651,grad_norm: 0.9999995558443753, iteration: 111296
loss: 1.0000436305999756,grad_norm: 0.9999993986051562, iteration: 111297
loss: 1.0368385314941406,grad_norm: 0.9999990389171803, iteration: 111298
loss: 1.0848881006240845,grad_norm: 0.9999998818236426, iteration: 111299
loss: 1.1643050909042358,grad_norm: 0.9999994415299448, iteration: 111300
loss: 1.0782657861709595,grad_norm: 0.9999999169561885, iteration: 111301
loss: 1.0121972560882568,grad_norm: 0.9245347114790601, iteration: 111302
loss: 0.9704211354255676,grad_norm: 0.8451930764427384, iteration: 111303
loss: 1.0700207948684692,grad_norm: 0.9999995783178449, iteration: 111304
loss: 0.9776323437690735,grad_norm: 0.9379648810700731, iteration: 111305
loss: 0.9781277179718018,grad_norm: 0.90665330009715, iteration: 111306
loss: 1.039570689201355,grad_norm: 0.8436515380874346, iteration: 111307
loss: 1.042024850845337,grad_norm: 0.9999996554475159, iteration: 111308
loss: 1.2371951341629028,grad_norm: 0.9999999163647267, iteration: 111309
loss: 1.0186524391174316,grad_norm: 0.9999997699508475, iteration: 111310
loss: 0.9970991611480713,grad_norm: 0.9999990273875539, iteration: 111311
loss: 1.0310044288635254,grad_norm: 0.9245931533958457, iteration: 111312
loss: 1.067670464515686,grad_norm: 0.9999997356688362, iteration: 111313
loss: 1.103838562965393,grad_norm: 0.8968679016204462, iteration: 111314
loss: 1.163415551185608,grad_norm: 0.9999999260782592, iteration: 111315
loss: 0.9898815751075745,grad_norm: 0.9999997010573297, iteration: 111316
loss: 0.9813185334205627,grad_norm: 0.9999991344960644, iteration: 111317
loss: 1.0261913537979126,grad_norm: 0.9999994462908521, iteration: 111318
loss: 1.1321768760681152,grad_norm: 0.9999996156724935, iteration: 111319
loss: 0.9999594688415527,grad_norm: 0.8373059082823134, iteration: 111320
loss: 1.0315518379211426,grad_norm: 0.9999997202251127, iteration: 111321
loss: 1.074295163154602,grad_norm: 0.7883600298082395, iteration: 111322
loss: 1.0190619230270386,grad_norm: 0.9999989956672376, iteration: 111323
loss: 0.9897868037223816,grad_norm: 0.9999997517415922, iteration: 111324
loss: 1.1126004457473755,grad_norm: 0.9999991713236206, iteration: 111325
loss: 1.0058506727218628,grad_norm: 0.9999993066863923, iteration: 111326
loss: 1.0029585361480713,grad_norm: 0.9999995612763072, iteration: 111327
loss: 1.0176284313201904,grad_norm: 0.9999997930528011, iteration: 111328
loss: 1.0884422063827515,grad_norm: 0.9999990807556175, iteration: 111329
loss: 0.9862110018730164,grad_norm: 0.9999990993454018, iteration: 111330
loss: 1.0454909801483154,grad_norm: 0.9999992385650365, iteration: 111331
loss: 1.1239395141601562,grad_norm: 0.9999999537231443, iteration: 111332
loss: 1.075684905052185,grad_norm: 0.9999995042067388, iteration: 111333
loss: 1.0915398597717285,grad_norm: 0.9999991988723876, iteration: 111334
loss: 1.060532569885254,grad_norm: 0.9209341652282775, iteration: 111335
loss: 1.0108816623687744,grad_norm: 0.8292097692457292, iteration: 111336
loss: 1.0223594903945923,grad_norm: 0.9999997179079296, iteration: 111337
loss: 1.1075259447097778,grad_norm: 0.9999997121091796, iteration: 111338
loss: 1.1659091711044312,grad_norm: 0.999999851141512, iteration: 111339
loss: 1.0426313877105713,grad_norm: 0.999999257643068, iteration: 111340
loss: 1.053698182106018,grad_norm: 0.999999680176681, iteration: 111341
loss: 1.0869858264923096,grad_norm: 0.9999991927266154, iteration: 111342
loss: 1.0406057834625244,grad_norm: 0.9999990771664199, iteration: 111343
loss: 1.001634955406189,grad_norm: 0.9433524094436561, iteration: 111344
loss: 1.03298020362854,grad_norm: 0.9999992120131083, iteration: 111345
loss: 1.093395471572876,grad_norm: 0.9999994822214044, iteration: 111346
loss: 1.0274229049682617,grad_norm: 0.8697745904630795, iteration: 111347
loss: 1.056546926498413,grad_norm: 0.9999995305167283, iteration: 111348
loss: 1.0230811834335327,grad_norm: 0.9622214836661389, iteration: 111349
loss: 1.0135605335235596,grad_norm: 0.9506402902299963, iteration: 111350
loss: 1.1888777017593384,grad_norm: 0.9999997089534611, iteration: 111351
loss: 1.09809148311615,grad_norm: 0.9999994551696637, iteration: 111352
loss: 1.0426225662231445,grad_norm: 0.9999994559959412, iteration: 111353
loss: 1.0493412017822266,grad_norm: 0.9999996260437051, iteration: 111354
loss: 1.0279537439346313,grad_norm: 0.9597399943942503, iteration: 111355
loss: 1.1333636045455933,grad_norm: 0.9999992324512962, iteration: 111356
loss: 1.0602573156356812,grad_norm: 0.9999999952444093, iteration: 111357
loss: 1.1621416807174683,grad_norm: 0.9999994545614266, iteration: 111358
loss: 1.0145736932754517,grad_norm: 0.9123906395790342, iteration: 111359
loss: 1.1167093515396118,grad_norm: 0.8989852913790239, iteration: 111360
loss: 1.0083303451538086,grad_norm: 0.8670898898177555, iteration: 111361
loss: 1.0121477842330933,grad_norm: 0.8498160011443684, iteration: 111362
loss: 1.0964137315750122,grad_norm: 0.9999998718899663, iteration: 111363
loss: 1.1579524278640747,grad_norm: 0.999999831703886, iteration: 111364
loss: 1.0484898090362549,grad_norm: 0.9947262731374654, iteration: 111365
loss: 1.3089154958724976,grad_norm: 0.9999997881239407, iteration: 111366
loss: 1.1369760036468506,grad_norm: 0.9999995561024633, iteration: 111367
loss: 1.1054211854934692,grad_norm: 0.9999997957983745, iteration: 111368
loss: 1.0290592908859253,grad_norm: 0.999999863847266, iteration: 111369
loss: 1.0755289793014526,grad_norm: 0.8077352306706276, iteration: 111370
loss: 1.0858831405639648,grad_norm: 0.999999099818276, iteration: 111371
loss: 1.1576114892959595,grad_norm: 0.9999999521115598, iteration: 111372
loss: 1.0881460905075073,grad_norm: 0.9999993530234405, iteration: 111373
loss: 1.2153252363204956,grad_norm: 0.9999999022641455, iteration: 111374
loss: 1.0238455533981323,grad_norm: 0.9999990514806273, iteration: 111375
loss: 0.9992574453353882,grad_norm: 0.9999992735634575, iteration: 111376
loss: 1.1066890954971313,grad_norm: 0.9999991093890617, iteration: 111377
loss: 1.047561526298523,grad_norm: 0.9999994455669755, iteration: 111378
loss: 1.350607991218567,grad_norm: 0.9999995589700539, iteration: 111379
loss: 1.1307156085968018,grad_norm: 0.9999993118990005, iteration: 111380
loss: 1.2373601198196411,grad_norm: 0.9999999097681221, iteration: 111381
loss: 1.0609928369522095,grad_norm: 0.9999995216413033, iteration: 111382
loss: 1.073161244392395,grad_norm: 1.0000000736093486, iteration: 111383
loss: 1.134602665901184,grad_norm: 0.9999998589477146, iteration: 111384
loss: 1.2851392030715942,grad_norm: 0.9999996731203346, iteration: 111385
loss: 1.0457103252410889,grad_norm: 0.9999990740386343, iteration: 111386
loss: 1.0083507299423218,grad_norm: 0.7982648341744519, iteration: 111387
loss: 1.3957608938217163,grad_norm: 0.999999656729349, iteration: 111388
loss: 1.0318063497543335,grad_norm: 0.8724161939060991, iteration: 111389
loss: 1.1238367557525635,grad_norm: 0.9999994188390811, iteration: 111390
loss: 1.0864949226379395,grad_norm: 0.8553991209569344, iteration: 111391
loss: 1.4576842784881592,grad_norm: 0.9999998122374958, iteration: 111392
loss: 1.0410535335540771,grad_norm: 0.9999996867440757, iteration: 111393
loss: 1.2091058492660522,grad_norm: 0.999999897879653, iteration: 111394
loss: 1.1558513641357422,grad_norm: 0.9999998419782052, iteration: 111395
loss: 1.2244701385498047,grad_norm: 0.9999996221336533, iteration: 111396
loss: 1.1893683671951294,grad_norm: 0.9999999215982321, iteration: 111397
loss: 1.0937395095825195,grad_norm: 0.9999999678410039, iteration: 111398
loss: 1.516414761543274,grad_norm: 1.0000000506514324, iteration: 111399
loss: 1.1002384424209595,grad_norm: 1.0000000088826144, iteration: 111400
loss: 1.1263976097106934,grad_norm: 0.9999995804599408, iteration: 111401
loss: 1.112716555595398,grad_norm: 0.8542805381566358, iteration: 111402
loss: 1.1134332418441772,grad_norm: 0.9999990372595304, iteration: 111403
loss: 1.0105441808700562,grad_norm: 0.9999996684174841, iteration: 111404
loss: 1.3345627784729004,grad_norm: 1.0000001271924583, iteration: 111405
loss: 1.250396728515625,grad_norm: 1.000000053692113, iteration: 111406
loss: 1.2820229530334473,grad_norm: 0.9999999515078565, iteration: 111407
loss: 1.2274446487426758,grad_norm: 0.9999995253628784, iteration: 111408
loss: 1.323004961013794,grad_norm: 0.9999995675907604, iteration: 111409
loss: 1.0146034955978394,grad_norm: 0.9999995461239963, iteration: 111410
loss: 1.0831372737884521,grad_norm: 0.9999998256961115, iteration: 111411
loss: 1.4720550775527954,grad_norm: 0.9999996972289611, iteration: 111412
loss: 1.3226746320724487,grad_norm: 0.9999999254368284, iteration: 111413
loss: 1.2176445722579956,grad_norm: 0.9999999465691617, iteration: 111414
loss: 1.2124905586242676,grad_norm: 0.9999992235096914, iteration: 111415
loss: 1.3849670886993408,grad_norm: 0.9999999513862129, iteration: 111416
loss: 1.1431647539138794,grad_norm: 0.9999999305566354, iteration: 111417
loss: 1.0642915964126587,grad_norm: 0.9999991447373728, iteration: 111418
loss: 1.1632288694381714,grad_norm: 0.9999996191103723, iteration: 111419
loss: 1.044937014579773,grad_norm: 0.9526923122637013, iteration: 111420
loss: 1.1419503688812256,grad_norm: 1.0000000071814936, iteration: 111421
loss: 1.4557054042816162,grad_norm: 0.9999996047952696, iteration: 111422
loss: 1.0315581560134888,grad_norm: 0.9999996744826453, iteration: 111423
loss: 1.0723780393600464,grad_norm: 0.9999995461289743, iteration: 111424
loss: 1.1452727317810059,grad_norm: 1.0000000065714552, iteration: 111425
loss: 0.9907072186470032,grad_norm: 0.9114125396612635, iteration: 111426
loss: 1.302994728088379,grad_norm: 0.9999998282733753, iteration: 111427
loss: 1.0113813877105713,grad_norm: 0.9999994668851071, iteration: 111428
loss: 1.0646543502807617,grad_norm: 0.9999991398242443, iteration: 111429
loss: 1.1741859912872314,grad_norm: 0.9999994258565893, iteration: 111430
loss: 1.0574067831039429,grad_norm: 0.9673140859439348, iteration: 111431
loss: 1.1838877201080322,grad_norm: 0.9999998749049847, iteration: 111432
loss: 1.3289215564727783,grad_norm: 0.9999998403165061, iteration: 111433
loss: 1.0413897037506104,grad_norm: 0.9999993776379928, iteration: 111434
loss: 0.9829323887825012,grad_norm: 0.8811305520092879, iteration: 111435
loss: 1.0000486373901367,grad_norm: 0.9999992563706387, iteration: 111436
loss: 1.038697600364685,grad_norm: 0.9983006828263411, iteration: 111437
loss: 1.058871865272522,grad_norm: 0.9999997758848294, iteration: 111438
loss: 1.0652207136154175,grad_norm: 0.9999998349419166, iteration: 111439
loss: 1.0371294021606445,grad_norm: 0.961151868782638, iteration: 111440
loss: 1.0729811191558838,grad_norm: 0.9999996027578767, iteration: 111441
loss: 1.1284016370773315,grad_norm: 0.9999991850347419, iteration: 111442
loss: 1.3221644163131714,grad_norm: 0.9999999233735037, iteration: 111443
loss: 1.0883530378341675,grad_norm: 0.9999991922018394, iteration: 111444
loss: 1.0366489887237549,grad_norm: 0.9999991407870655, iteration: 111445
loss: 1.0881872177124023,grad_norm: 0.9999998191083216, iteration: 111446
loss: 1.0711493492126465,grad_norm: 0.9999999003149246, iteration: 111447
loss: 1.0988839864730835,grad_norm: 0.9999999152667135, iteration: 111448
loss: 1.0187784433364868,grad_norm: 0.8141476477035627, iteration: 111449
loss: 1.068671703338623,grad_norm: 0.9192997547311643, iteration: 111450
loss: 1.0232255458831787,grad_norm: 0.9999991849888381, iteration: 111451
loss: 1.193576693534851,grad_norm: 0.9999996294532545, iteration: 111452
loss: 1.019501805305481,grad_norm: 0.9747744307748721, iteration: 111453
loss: 1.1848407983779907,grad_norm: 0.9999998467620612, iteration: 111454
loss: 1.1159247159957886,grad_norm: 0.9999993525036814, iteration: 111455
loss: 0.9862232208251953,grad_norm: 0.9999992260437063, iteration: 111456
loss: 1.0609866380691528,grad_norm: 0.9999991635245611, iteration: 111457
loss: 1.040709137916565,grad_norm: 0.9999996507997251, iteration: 111458
loss: 1.0274722576141357,grad_norm: 0.9999994944610414, iteration: 111459
loss: 1.0310114622116089,grad_norm: 0.9999991903427898, iteration: 111460
loss: 1.0191336870193481,grad_norm: 0.9999993148048446, iteration: 111461
loss: 1.051063060760498,grad_norm: 0.9999993262531254, iteration: 111462
loss: 1.0417174100875854,grad_norm: 0.9861819499360132, iteration: 111463
loss: 1.0156108140945435,grad_norm: 0.9048574470061221, iteration: 111464
loss: 1.1642510890960693,grad_norm: 0.9999999944267588, iteration: 111465
loss: 1.0067931413650513,grad_norm: 0.9999994371835539, iteration: 111466
loss: 1.008724570274353,grad_norm: 0.9253550255629345, iteration: 111467
loss: 1.0459455251693726,grad_norm: 0.9999995838864832, iteration: 111468
loss: 1.0071008205413818,grad_norm: 0.9999994131660647, iteration: 111469
loss: 1.0376826524734497,grad_norm: 0.9999998467969311, iteration: 111470
loss: 1.207155704498291,grad_norm: 0.999999901804549, iteration: 111471
loss: 1.3135923147201538,grad_norm: 0.9999999437606598, iteration: 111472
loss: 0.9728320837020874,grad_norm: 0.9999990324448881, iteration: 111473
loss: 1.0231781005859375,grad_norm: 0.9999997604228811, iteration: 111474
loss: 1.0451630353927612,grad_norm: 1.0000000154537763, iteration: 111475
loss: 1.004875898361206,grad_norm: 0.919741795562554, iteration: 111476
loss: 1.0957883596420288,grad_norm: 0.9999999353598756, iteration: 111477
loss: 1.0654966831207275,grad_norm: 0.9999991791025233, iteration: 111478
loss: 1.1382193565368652,grad_norm: 0.9999998899147098, iteration: 111479
loss: 1.2183488607406616,grad_norm: 0.9999998194375721, iteration: 111480
loss: 0.9847307801246643,grad_norm: 0.9252515163649927, iteration: 111481
loss: 0.9822433590888977,grad_norm: 0.9999992913133432, iteration: 111482
loss: 0.9889971017837524,grad_norm: 0.9891173778545207, iteration: 111483
loss: 1.1348718404769897,grad_norm: 0.9999995411060847, iteration: 111484
loss: 1.1892975568771362,grad_norm: 0.9999999460550968, iteration: 111485
loss: 1.208592414855957,grad_norm: 0.9999997339267639, iteration: 111486
loss: 1.2166118621826172,grad_norm: 0.9999995066251484, iteration: 111487
loss: 1.1172956228256226,grad_norm: 0.9999995565525631, iteration: 111488
loss: 1.1025582551956177,grad_norm: 0.9999991147334443, iteration: 111489
loss: 1.0913103818893433,grad_norm: 0.9999998614422507, iteration: 111490
loss: 1.1997050046920776,grad_norm: 0.9999997928075482, iteration: 111491
loss: 1.056915521621704,grad_norm: 0.9999998983176089, iteration: 111492
loss: 1.0576212406158447,grad_norm: 0.9999992816648596, iteration: 111493
loss: 0.9768544435501099,grad_norm: 0.8455449555158157, iteration: 111494
loss: 1.2228904962539673,grad_norm: 0.9999998213002464, iteration: 111495
loss: 0.9803615808486938,grad_norm: 0.909249425121968, iteration: 111496
loss: 1.017360806465149,grad_norm: 0.802765404416514, iteration: 111497
loss: 1.0822237730026245,grad_norm: 0.9999991555402168, iteration: 111498
loss: 1.1688545942306519,grad_norm: 0.9999998496844693, iteration: 111499
loss: 1.1034283638000488,grad_norm: 0.9999999668427532, iteration: 111500
loss: 1.0515434741973877,grad_norm: 0.9999994002351908, iteration: 111501
loss: 1.0492397546768188,grad_norm: 0.9738525206051705, iteration: 111502
loss: 1.1221199035644531,grad_norm: 0.9999996224513373, iteration: 111503
loss: 1.135179042816162,grad_norm: 0.9999999262835351, iteration: 111504
loss: 1.1937839984893799,grad_norm: 0.9999995853190277, iteration: 111505
loss: 1.3098088502883911,grad_norm: 0.9999995826912169, iteration: 111506
loss: 1.0781004428863525,grad_norm: 0.9999997934051882, iteration: 111507
loss: 1.1063894033432007,grad_norm: 0.9999998094920443, iteration: 111508
loss: 1.3055499792099,grad_norm: 0.9999998627091121, iteration: 111509
loss: 1.1595381498336792,grad_norm: 0.9999997135824723, iteration: 111510
loss: 1.0425727367401123,grad_norm: 0.9999991608494587, iteration: 111511
loss: 1.11306893825531,grad_norm: 0.9999994541432481, iteration: 111512
loss: 1.1929996013641357,grad_norm: 0.9999997376412199, iteration: 111513
loss: 1.1263275146484375,grad_norm: 0.9999999787445288, iteration: 111514
loss: 1.052232265472412,grad_norm: 0.99999969033585, iteration: 111515
loss: 1.0432902574539185,grad_norm: 0.9999999757465675, iteration: 111516
loss: 1.0118935108184814,grad_norm: 0.8258065861613322, iteration: 111517
loss: 1.1657923460006714,grad_norm: 0.9999996727721284, iteration: 111518
loss: 1.595044493675232,grad_norm: 0.9999999901399469, iteration: 111519
loss: 1.1653777360916138,grad_norm: 0.999999483883877, iteration: 111520
loss: 1.4232935905456543,grad_norm: 0.9999998455731964, iteration: 111521
loss: 1.0689493417739868,grad_norm: 0.9999998559858755, iteration: 111522
loss: 1.0761282444000244,grad_norm: 0.9999992030287927, iteration: 111523
loss: 1.2986019849777222,grad_norm: 0.9999997788159126, iteration: 111524
loss: 1.0401443243026733,grad_norm: 0.9999992566535357, iteration: 111525
loss: 1.0938200950622559,grad_norm: 0.9999997284198091, iteration: 111526
loss: 1.2291522026062012,grad_norm: 0.9999991342586112, iteration: 111527
loss: 1.064626932144165,grad_norm: 0.9999992198166722, iteration: 111528
loss: 1.131279706954956,grad_norm: 0.9999999739938006, iteration: 111529
loss: 1.1366708278656006,grad_norm: 0.9070630917862872, iteration: 111530
loss: 1.0144399404525757,grad_norm: 0.8508578911685959, iteration: 111531
loss: 1.2039893865585327,grad_norm: 0.9999996190540739, iteration: 111532
loss: 1.5058650970458984,grad_norm: 0.9999998105547528, iteration: 111533
loss: 1.1447032690048218,grad_norm: 0.9999994126749868, iteration: 111534
loss: 1.1052546501159668,grad_norm: 0.9999993002505325, iteration: 111535
loss: 1.2355964183807373,grad_norm: 0.9999995625578701, iteration: 111536
loss: 1.0917596817016602,grad_norm: 0.9999994589646635, iteration: 111537
loss: 1.1003501415252686,grad_norm: 0.9999998329060975, iteration: 111538
loss: 1.0745118856430054,grad_norm: 0.9999998573573152, iteration: 111539
loss: 1.0642575025558472,grad_norm: 0.9999999064960469, iteration: 111540
loss: 1.3075629472732544,grad_norm: 0.9999995563534267, iteration: 111541
loss: 1.187142252922058,grad_norm: 0.999999800822852, iteration: 111542
loss: 1.424333930015564,grad_norm: 0.9999998535562317, iteration: 111543
loss: 1.211843490600586,grad_norm: 1.0000000543237753, iteration: 111544
loss: 1.284253478050232,grad_norm: 0.9999995094946441, iteration: 111545
loss: 0.9872494339942932,grad_norm: 0.9825801163288812, iteration: 111546
loss: 1.0934675931930542,grad_norm: 0.9999999859594095, iteration: 111547
loss: 1.263649582862854,grad_norm: 0.9999995780051347, iteration: 111548
loss: 1.253336787223816,grad_norm: 0.9999994374530056, iteration: 111549
loss: 1.5283676385879517,grad_norm: 0.9999999296437055, iteration: 111550
loss: 1.3706070184707642,grad_norm: 0.9999995708477041, iteration: 111551
loss: 1.39857017993927,grad_norm: 0.9999998459102906, iteration: 111552
loss: 1.377505898475647,grad_norm: 1.0000000169997114, iteration: 111553
loss: 1.3151686191558838,grad_norm: 0.9999999029970916, iteration: 111554
loss: 1.1874253749847412,grad_norm: 0.9999997605499256, iteration: 111555
loss: 0.9708629846572876,grad_norm: 0.9999990906955739, iteration: 111556
loss: 1.0401197671890259,grad_norm: 0.999999202572675, iteration: 111557
loss: 1.3102879524230957,grad_norm: 0.9999998525729253, iteration: 111558
loss: 1.0828129053115845,grad_norm: 0.9999993196955133, iteration: 111559
loss: 1.121269941329956,grad_norm: 0.9999997089973607, iteration: 111560
loss: 0.9805365204811096,grad_norm: 0.9352965154891824, iteration: 111561
loss: 1.1128547191619873,grad_norm: 1.000000001820126, iteration: 111562
loss: 1.0209826231002808,grad_norm: 0.9607132366879315, iteration: 111563
loss: 1.03566575050354,grad_norm: 0.9999995524958534, iteration: 111564
loss: 1.054045557975769,grad_norm: 0.9999999186445132, iteration: 111565
loss: 1.3319748640060425,grad_norm: 0.9999998052973411, iteration: 111566
loss: 1.0847270488739014,grad_norm: 0.9999993359417122, iteration: 111567
loss: 1.107540249824524,grad_norm: 0.9999996184934073, iteration: 111568
loss: 0.9859713912010193,grad_norm: 0.9642651521596995, iteration: 111569
loss: 1.014583945274353,grad_norm: 0.924492615391524, iteration: 111570
loss: 1.0894496440887451,grad_norm: 0.9999993520685405, iteration: 111571
loss: 1.2733755111694336,grad_norm: 0.9999998489851709, iteration: 111572
loss: 1.1077487468719482,grad_norm: 0.999999957595967, iteration: 111573
loss: 1.0324993133544922,grad_norm: 0.9112307842383126, iteration: 111574
loss: 1.0353654623031616,grad_norm: 0.9999998427364561, iteration: 111575
loss: 1.0442204475402832,grad_norm: 0.9999998942480687, iteration: 111576
loss: 1.0950764417648315,grad_norm: 0.9999997616198077, iteration: 111577
loss: 1.0568928718566895,grad_norm: 0.9999990671085006, iteration: 111578
loss: 1.0142953395843506,grad_norm: 0.8372636787264759, iteration: 111579
loss: 1.211495041847229,grad_norm: 0.9999996661960644, iteration: 111580
loss: 1.0165441036224365,grad_norm: 0.9999994276140429, iteration: 111581
loss: 1.0449929237365723,grad_norm: 0.9586600810322272, iteration: 111582
loss: 1.0193899869918823,grad_norm: 0.9999992650258269, iteration: 111583
loss: 1.0117517709732056,grad_norm: 0.9460807667101484, iteration: 111584
loss: 1.1175470352172852,grad_norm: 0.999999733971123, iteration: 111585
loss: 1.0664113759994507,grad_norm: 0.9999994902355145, iteration: 111586
loss: 1.3837671279907227,grad_norm: 0.9999999122303977, iteration: 111587
loss: 1.1863195896148682,grad_norm: 0.9999994522322414, iteration: 111588
loss: 1.0404560565948486,grad_norm: 0.9999998928428561, iteration: 111589
loss: 1.1426386833190918,grad_norm: 0.9999999248923132, iteration: 111590
loss: 1.231634259223938,grad_norm: 0.9999997786577781, iteration: 111591
loss: 1.1127123832702637,grad_norm: 0.9999999368585376, iteration: 111592
loss: 1.0727665424346924,grad_norm: 0.9999994968421945, iteration: 111593
loss: 1.1217055320739746,grad_norm: 0.9999996580042859, iteration: 111594
loss: 1.0302056074142456,grad_norm: 0.8506412634907513, iteration: 111595
loss: 1.345670461654663,grad_norm: 0.9999996446388675, iteration: 111596
loss: 1.129536747932434,grad_norm: 0.999999654529246, iteration: 111597
loss: 1.014986276626587,grad_norm: 0.9999992511230695, iteration: 111598
loss: 1.195924162864685,grad_norm: 0.9999999195820025, iteration: 111599
loss: 1.2355599403381348,grad_norm: 0.9999997396501803, iteration: 111600
loss: 1.2617850303649902,grad_norm: 0.9999999587865166, iteration: 111601
loss: 1.4237223863601685,grad_norm: 0.9999997270093554, iteration: 111602
loss: 1.182144284248352,grad_norm: 0.9999998039824002, iteration: 111603
loss: 1.3235434293746948,grad_norm: 0.999999765033088, iteration: 111604
loss: 1.1539016962051392,grad_norm: 0.9999995697597311, iteration: 111605
loss: 1.288797378540039,grad_norm: 0.9999998150397728, iteration: 111606
loss: 1.1262940168380737,grad_norm: 0.9999995718024772, iteration: 111607
loss: 1.0370498895645142,grad_norm: 0.9999993045393698, iteration: 111608
loss: 1.1167963743209839,grad_norm: 0.9999994071777005, iteration: 111609
loss: 1.117168664932251,grad_norm: 0.9999995954373538, iteration: 111610
loss: 1.1184817552566528,grad_norm: 0.999999673834126, iteration: 111611
loss: 1.1384905576705933,grad_norm: 0.9999990379677076, iteration: 111612
loss: 1.0610566139221191,grad_norm: 0.9999992309141705, iteration: 111613
loss: 1.204640507698059,grad_norm: 0.9999997543470753, iteration: 111614
loss: 1.2992908954620361,grad_norm: 0.9999997764196976, iteration: 111615
loss: 1.2296706438064575,grad_norm: 0.9999993000640788, iteration: 111616
loss: 1.1892091035842896,grad_norm: 0.9999993059180079, iteration: 111617
loss: 1.0038553476333618,grad_norm: 0.9999996803139084, iteration: 111618
loss: 1.0203777551651,grad_norm: 0.9693372549061378, iteration: 111619
loss: 1.2010588645935059,grad_norm: 0.9999997906267551, iteration: 111620
loss: 1.0116909742355347,grad_norm: 0.9999991090393346, iteration: 111621
loss: 1.0717594623565674,grad_norm: 0.9999991026368859, iteration: 111622
loss: 1.431731939315796,grad_norm: 1.0000000165699794, iteration: 111623
loss: 1.138017177581787,grad_norm: 1.0000000481865936, iteration: 111624
loss: 1.1657570600509644,grad_norm: 0.9999997384631018, iteration: 111625
loss: 1.007460355758667,grad_norm: 0.9999997870258577, iteration: 111626
loss: 1.0236567258834839,grad_norm: 0.9999992542783971, iteration: 111627
loss: 1.0454819202423096,grad_norm: 0.9999997791427703, iteration: 111628
loss: 1.0487889051437378,grad_norm: 0.9999991897752876, iteration: 111629
loss: 1.0516260862350464,grad_norm: 0.9999997567431954, iteration: 111630
loss: 1.0338894128799438,grad_norm: 0.9999991556114223, iteration: 111631
loss: 1.0280041694641113,grad_norm: 1.0000000361247254, iteration: 111632
loss: 1.1630234718322754,grad_norm: 0.9999997270862847, iteration: 111633
loss: 1.311734676361084,grad_norm: 0.9999999253988475, iteration: 111634
loss: 1.2748552560806274,grad_norm: 0.9999998956884875, iteration: 111635
loss: 1.0880769491195679,grad_norm: 0.9371503850550812, iteration: 111636
loss: 0.986973226070404,grad_norm: 0.7738435992441313, iteration: 111637
loss: 0.9862300753593445,grad_norm: 0.9337318360275911, iteration: 111638
loss: 1.1139036417007446,grad_norm: 0.9999998829161977, iteration: 111639
loss: 1.0197404623031616,grad_norm: 0.9999999010344681, iteration: 111640
loss: 1.0279531478881836,grad_norm: 0.999999027977314, iteration: 111641
loss: 1.0108685493469238,grad_norm: 0.9999993634028203, iteration: 111642
loss: 1.0312919616699219,grad_norm: 0.8571252574341319, iteration: 111643
loss: 0.9620571136474609,grad_norm: 0.9999996554211212, iteration: 111644
loss: 1.0414334535598755,grad_norm: 0.9999999284858319, iteration: 111645
loss: 1.2701846361160278,grad_norm: 0.99999993564725, iteration: 111646
loss: 1.0933997631072998,grad_norm: 0.9546581363901603, iteration: 111647
loss: 1.01654851436615,grad_norm: 0.9822987961231539, iteration: 111648
loss: 1.105904459953308,grad_norm: 0.9999998577680362, iteration: 111649
loss: 1.0338184833526611,grad_norm: 0.9999990795788758, iteration: 111650
loss: 1.2157617807388306,grad_norm: 0.9999997682392381, iteration: 111651
loss: 1.0321866273880005,grad_norm: 0.999999387801504, iteration: 111652
loss: 1.0565907955169678,grad_norm: 0.9999998540332125, iteration: 111653
loss: 0.9769490361213684,grad_norm: 0.9999991308873447, iteration: 111654
loss: 1.3118438720703125,grad_norm: 0.9999999276978301, iteration: 111655
loss: 1.0466581583023071,grad_norm: 0.9999992382336689, iteration: 111656
loss: 1.0226292610168457,grad_norm: 0.9999992159156653, iteration: 111657
loss: 1.158116102218628,grad_norm: 1.0000000468867152, iteration: 111658
loss: 1.0903899669647217,grad_norm: 0.9999995699938184, iteration: 111659
loss: 1.0473318099975586,grad_norm: 0.8419572021527945, iteration: 111660
loss: 1.2166005373001099,grad_norm: 0.9999999072961862, iteration: 111661
loss: 1.330580472946167,grad_norm: 0.9999997661986643, iteration: 111662
loss: 1.093416690826416,grad_norm: 0.9999994229525898, iteration: 111663
loss: 1.015213966369629,grad_norm: 0.9999992174993716, iteration: 111664
loss: 1.3855012655258179,grad_norm: 1.0000000449694442, iteration: 111665
loss: 1.2082520723342896,grad_norm: 0.9999996973181732, iteration: 111666
loss: 1.3238863945007324,grad_norm: 0.9999999365124208, iteration: 111667
loss: 0.9673289060592651,grad_norm: 0.9999990467391516, iteration: 111668
loss: 1.0247879028320312,grad_norm: 0.9999994750299013, iteration: 111669
loss: 1.0088598728179932,grad_norm: 0.9432538330762147, iteration: 111670
loss: 1.5407674312591553,grad_norm: 1.0000000244370961, iteration: 111671
loss: 1.1537871360778809,grad_norm: 0.9999998468405931, iteration: 111672
loss: 1.0275228023529053,grad_norm: 0.9999997982785922, iteration: 111673
loss: 1.0868172645568848,grad_norm: 0.9999995651033805, iteration: 111674
loss: 1.1461595296859741,grad_norm: 0.9999998235126643, iteration: 111675
loss: 1.1347384452819824,grad_norm: 0.9999998173086273, iteration: 111676
loss: 1.2144736051559448,grad_norm: 0.9999997538367206, iteration: 111677
loss: 1.1572751998901367,grad_norm: 0.9999992678715294, iteration: 111678
loss: 1.0303090810775757,grad_norm: 0.9885334370642409, iteration: 111679
loss: 1.029494285583496,grad_norm: 0.9999990865339944, iteration: 111680
loss: 1.124363660812378,grad_norm: 0.9999992384885594, iteration: 111681
loss: 1.3334496021270752,grad_norm: 0.9999999600995961, iteration: 111682
loss: 1.0288424491882324,grad_norm: 0.99999983117014, iteration: 111683
loss: 1.0953755378723145,grad_norm: 0.9999991514094834, iteration: 111684
loss: 1.271965503692627,grad_norm: 0.9999998762357536, iteration: 111685
loss: 1.1113266944885254,grad_norm: 0.9999997009301896, iteration: 111686
loss: 1.0801153182983398,grad_norm: 0.9999992958449743, iteration: 111687
loss: 1.0947105884552002,grad_norm: 0.9999993266860956, iteration: 111688
loss: 1.2618911266326904,grad_norm: 0.9999999612312007, iteration: 111689
loss: 1.129807710647583,grad_norm: 0.9999998911222905, iteration: 111690
loss: 1.2217837572097778,grad_norm: 0.9999996717153765, iteration: 111691
loss: 1.0388652086257935,grad_norm: 0.9749628229455016, iteration: 111692
loss: 1.038148045539856,grad_norm: 0.9220546599890721, iteration: 111693
loss: 1.0337904691696167,grad_norm: 0.9999998668254366, iteration: 111694
loss: 0.9737204313278198,grad_norm: 0.9999999687143774, iteration: 111695
loss: 1.2205619812011719,grad_norm: 0.9999999439858904, iteration: 111696
loss: 1.0541642904281616,grad_norm: 0.999999385269665, iteration: 111697
loss: 1.0370917320251465,grad_norm: 0.9999996692486605, iteration: 111698
loss: 1.0139459371566772,grad_norm: 0.99999940764, iteration: 111699
loss: 1.0440673828125,grad_norm: 0.9999994871829505, iteration: 111700
loss: 1.070813775062561,grad_norm: 0.9999990909594716, iteration: 111701
loss: 1.0640487670898438,grad_norm: 1.000000085851413, iteration: 111702
loss: 1.0053050518035889,grad_norm: 0.9474635349478756, iteration: 111703
loss: 1.0561084747314453,grad_norm: 0.9999997441427098, iteration: 111704
loss: 1.11678946018219,grad_norm: 0.9999995540259079, iteration: 111705
loss: 1.0168591737747192,grad_norm: 0.9999991605164896, iteration: 111706
loss: 1.043590784072876,grad_norm: 0.9999996866282144, iteration: 111707
loss: 1.2893009185791016,grad_norm: 1.0000000108332276, iteration: 111708
loss: 1.0524319410324097,grad_norm: 0.9999994080378531, iteration: 111709
loss: 1.156834363937378,grad_norm: 0.9999997987330492, iteration: 111710
loss: 1.041008710861206,grad_norm: 0.9999997905567264, iteration: 111711
loss: 0.9862263202667236,grad_norm: 0.999999225866003, iteration: 111712
loss: 1.1143726110458374,grad_norm: 0.9999996091602974, iteration: 111713
loss: 1.0685919523239136,grad_norm: 0.8767233754983355, iteration: 111714
loss: 1.3770803213119507,grad_norm: 0.9999996526751603, iteration: 111715
loss: 0.9871394634246826,grad_norm: 0.9999997951503062, iteration: 111716
loss: 1.086312174797058,grad_norm: 0.9999990455276646, iteration: 111717
loss: 1.0317049026489258,grad_norm: 0.9236298982885526, iteration: 111718
loss: 0.9945425391197205,grad_norm: 0.9999992922446361, iteration: 111719
loss: 1.0025954246520996,grad_norm: 0.8151556128991481, iteration: 111720
loss: 1.0957589149475098,grad_norm: 0.9999999612010995, iteration: 111721
loss: 1.215646505355835,grad_norm: 0.9999998859369376, iteration: 111722
loss: 1.0797582864761353,grad_norm: 0.9999992596566757, iteration: 111723
loss: 0.9934283494949341,grad_norm: 0.9999991555188378, iteration: 111724
loss: 1.051066279411316,grad_norm: 0.9999994429624975, iteration: 111725
loss: 1.0319794416427612,grad_norm: 0.9999998465034051, iteration: 111726
loss: 1.1789878606796265,grad_norm: 0.9999999128618785, iteration: 111727
loss: 0.9633696675300598,grad_norm: 0.9999992637786648, iteration: 111728
loss: 0.9998026490211487,grad_norm: 0.8383141695576806, iteration: 111729
loss: 0.9882093071937561,grad_norm: 0.9999995544819569, iteration: 111730
loss: 1.0887953042984009,grad_norm: 0.9999999581363792, iteration: 111731
loss: 1.1913650035858154,grad_norm: 0.9999997944380676, iteration: 111732
loss: 1.052242636680603,grad_norm: 0.9999995191745557, iteration: 111733
loss: 1.0710101127624512,grad_norm: 0.9056718922042296, iteration: 111734
loss: 1.0261856317520142,grad_norm: 0.9435468042031014, iteration: 111735
loss: 1.0394744873046875,grad_norm: 0.9999992736467314, iteration: 111736
loss: 0.9846118688583374,grad_norm: 0.9999992826495048, iteration: 111737
loss: 1.01577627658844,grad_norm: 0.9999989979673085, iteration: 111738
loss: 1.009735345840454,grad_norm: 0.9462280040957051, iteration: 111739
loss: 0.97514808177948,grad_norm: 0.9408544342126288, iteration: 111740
loss: 1.1377915143966675,grad_norm: 0.9999995423847658, iteration: 111741
loss: 0.9856337904930115,grad_norm: 0.999999241344838, iteration: 111742
loss: 1.0310653448104858,grad_norm: 0.8499281862570093, iteration: 111743
loss: 1.132938027381897,grad_norm: 0.9999996533872818, iteration: 111744
loss: 1.0786354541778564,grad_norm: 0.9999999385047815, iteration: 111745
loss: 1.0107122659683228,grad_norm: 0.9999994305491492, iteration: 111746
loss: 1.1674081087112427,grad_norm: 0.9999996688296021, iteration: 111747
loss: 0.9807462692260742,grad_norm: 0.7524406619324036, iteration: 111748
loss: 1.0398043394088745,grad_norm: 0.9999993277185759, iteration: 111749
loss: 1.000147819519043,grad_norm: 0.9999992086866116, iteration: 111750
loss: 1.013503074645996,grad_norm: 0.9999990348503397, iteration: 111751
loss: 1.0762114524841309,grad_norm: 0.9999996755973809, iteration: 111752
loss: 1.1357624530792236,grad_norm: 0.9999994615055717, iteration: 111753
loss: 1.0283339023590088,grad_norm: 0.9999998695743694, iteration: 111754
loss: 1.0886982679367065,grad_norm: 0.9999997621098089, iteration: 111755
loss: 1.0944162607192993,grad_norm: 0.9999990455974442, iteration: 111756
loss: 1.0966928005218506,grad_norm: 0.9999993806313799, iteration: 111757
loss: 1.0998470783233643,grad_norm: 0.9999992125219668, iteration: 111758
loss: 1.0399158000946045,grad_norm: 0.8516244756194615, iteration: 111759
loss: 1.0167728662490845,grad_norm: 0.9175198806881654, iteration: 111760
loss: 1.1271532773971558,grad_norm: 0.9999999777999639, iteration: 111761
loss: 1.05061936378479,grad_norm: 0.9999996867676407, iteration: 111762
loss: 1.0147027969360352,grad_norm: 0.8181645974973328, iteration: 111763
loss: 1.0072811841964722,grad_norm: 0.9999992097969034, iteration: 111764
loss: 0.9664298295974731,grad_norm: 0.8763813752623553, iteration: 111765
loss: 1.0033679008483887,grad_norm: 0.8964358572476796, iteration: 111766
loss: 1.0045915842056274,grad_norm: 0.8474980870699205, iteration: 111767
loss: 1.1201574802398682,grad_norm: 0.9999996706130081, iteration: 111768
loss: 0.987619161605835,grad_norm: 0.8375188110914275, iteration: 111769
loss: 0.9936677813529968,grad_norm: 0.9999996150154451, iteration: 111770
loss: 1.0591917037963867,grad_norm: 0.9999999493163163, iteration: 111771
loss: 1.1176384687423706,grad_norm: 0.9999991736852116, iteration: 111772
loss: 1.0176936388015747,grad_norm: 0.9999989546089049, iteration: 111773
loss: 1.007738471031189,grad_norm: 0.8073462493092964, iteration: 111774
loss: 1.0239617824554443,grad_norm: 0.9999990625882986, iteration: 111775
loss: 0.9757567644119263,grad_norm: 0.9202422877829951, iteration: 111776
loss: 1.0427078008651733,grad_norm: 0.9999997108748702, iteration: 111777
loss: 0.9805691838264465,grad_norm: 0.999999157525948, iteration: 111778
loss: 1.0744714736938477,grad_norm: 0.9999994662422882, iteration: 111779
loss: 1.035498023033142,grad_norm: 0.8203436644375797, iteration: 111780
loss: 1.055261254310608,grad_norm: 0.9999993505336382, iteration: 111781
loss: 1.18522310256958,grad_norm: 0.9999996524108498, iteration: 111782
loss: 1.0367274284362793,grad_norm: 1.0000000180514168, iteration: 111783
loss: 1.1015933752059937,grad_norm: 0.9999992859267264, iteration: 111784
loss: 1.0490809679031372,grad_norm: 0.9999995170527568, iteration: 111785
loss: 1.070088505744934,grad_norm: 0.9999996885465334, iteration: 111786
loss: 0.986909806728363,grad_norm: 0.8567776675430762, iteration: 111787
loss: 1.0184803009033203,grad_norm: 0.7399103576020609, iteration: 111788
loss: 1.000900149345398,grad_norm: 0.8074439770474379, iteration: 111789
loss: 0.9921739101409912,grad_norm: 0.9999991235371496, iteration: 111790
loss: 0.9597579836845398,grad_norm: 0.8897211408202529, iteration: 111791
loss: 1.020276665687561,grad_norm: 0.9999999090818347, iteration: 111792
loss: 1.0754201412200928,grad_norm: 0.9999993513458052, iteration: 111793
loss: 1.053524374961853,grad_norm: 0.9999993775922009, iteration: 111794
loss: 1.0292962789535522,grad_norm: 0.9999999146622279, iteration: 111795
loss: 1.1350356340408325,grad_norm: 0.9999997955991151, iteration: 111796
loss: 0.9690989255905151,grad_norm: 0.9999992245567367, iteration: 111797
loss: 1.0807634592056274,grad_norm: 0.9999990299252505, iteration: 111798
loss: 1.0699348449707031,grad_norm: 0.9999997026295347, iteration: 111799
loss: 1.0947444438934326,grad_norm: 0.9999997093512305, iteration: 111800
loss: 1.0692239999771118,grad_norm: 0.9999996631097314, iteration: 111801
loss: 1.0667136907577515,grad_norm: 0.999999258258269, iteration: 111802
loss: 0.9867546558380127,grad_norm: 0.7229757878589858, iteration: 111803
loss: 1.1078126430511475,grad_norm: 0.9999998526150491, iteration: 111804
loss: 1.0490100383758545,grad_norm: 1.0000000291166962, iteration: 111805
loss: 1.0718963146209717,grad_norm: 0.9999991931952658, iteration: 111806
loss: 1.0964808464050293,grad_norm: 0.9999998958660518, iteration: 111807
loss: 1.0094947814941406,grad_norm: 0.9787027219651078, iteration: 111808
loss: 1.4390826225280762,grad_norm: 0.9999998326544385, iteration: 111809
loss: 1.0428752899169922,grad_norm: 0.7092006727191555, iteration: 111810
loss: 1.065664529800415,grad_norm: 0.9999993461280141, iteration: 111811
loss: 1.0750110149383545,grad_norm: 0.9999991978818707, iteration: 111812
loss: 1.0695301294326782,grad_norm: 0.9999997254619982, iteration: 111813
loss: 1.1140005588531494,grad_norm: 0.9154757249244014, iteration: 111814
loss: 1.0368022918701172,grad_norm: 0.999999951801143, iteration: 111815
loss: 1.2152150869369507,grad_norm: 0.9999999453843145, iteration: 111816
loss: 1.0717030763626099,grad_norm: 0.9801139181144944, iteration: 111817
loss: 1.4397050142288208,grad_norm: 0.999999592275451, iteration: 111818
loss: 1.0299235582351685,grad_norm: 0.9086844580146567, iteration: 111819
loss: 1.056567668914795,grad_norm: 0.9999995411348455, iteration: 111820
loss: 1.2822543382644653,grad_norm: 0.9999998340288919, iteration: 111821
loss: 1.2007238864898682,grad_norm: 0.9999998102246092, iteration: 111822
loss: 1.1120131015777588,grad_norm: 0.9999998327658046, iteration: 111823
loss: 1.1963967084884644,grad_norm: 0.9999997094472975, iteration: 111824
loss: 1.02271568775177,grad_norm: 0.9999990267830848, iteration: 111825
loss: 1.2398542165756226,grad_norm: 0.9999998364676901, iteration: 111826
loss: 1.0454521179199219,grad_norm: 0.9999992563529136, iteration: 111827
loss: 0.9829025864601135,grad_norm: 0.9038839131289589, iteration: 111828
loss: 1.1726953983306885,grad_norm: 0.9999991576153255, iteration: 111829
loss: 1.2999221086502075,grad_norm: 0.9999998777910711, iteration: 111830
loss: 1.0467623472213745,grad_norm: 0.9583218386743791, iteration: 111831
loss: 1.0852758884429932,grad_norm: 1.000000009406049, iteration: 111832
loss: 1.09815514087677,grad_norm: 0.9999997596388818, iteration: 111833
loss: 1.0084953308105469,grad_norm: 0.9999993335115207, iteration: 111834
loss: 1.076843500137329,grad_norm: 0.9999993878000871, iteration: 111835
loss: 1.2662354707717896,grad_norm: 0.999999834025873, iteration: 111836
loss: 1.211773157119751,grad_norm: 0.9999999331826251, iteration: 111837
loss: 1.0153385400772095,grad_norm: 0.9999995372781443, iteration: 111838
loss: 1.0133126974105835,grad_norm: 0.9999997765496674, iteration: 111839
loss: 1.0483955144882202,grad_norm: 0.9999995593887101, iteration: 111840
loss: 0.9996962547302246,grad_norm: 0.9999993319740165, iteration: 111841
loss: 1.0114028453826904,grad_norm: 0.999999792406047, iteration: 111842
loss: 0.9974852800369263,grad_norm: 0.9920213454186748, iteration: 111843
loss: 1.2114677429199219,grad_norm: 0.9999999142634344, iteration: 111844
loss: 1.1474595069885254,grad_norm: 0.999999296035179, iteration: 111845
loss: 1.0178567171096802,grad_norm: 0.9999991852708712, iteration: 111846
loss: 1.190282940864563,grad_norm: 0.9999997521442159, iteration: 111847
loss: 1.0420506000518799,grad_norm: 0.9999991373006581, iteration: 111848
loss: 1.0353803634643555,grad_norm: 0.8539170627456049, iteration: 111849
loss: 1.1266155242919922,grad_norm: 0.9999990875746892, iteration: 111850
loss: 1.1153151988983154,grad_norm: 0.999999147926927, iteration: 111851
loss: 1.1732635498046875,grad_norm: 0.9999994699404401, iteration: 111852
loss: 1.1308060884475708,grad_norm: 0.9999992228170487, iteration: 111853
loss: 1.1446845531463623,grad_norm: 0.9999998467754102, iteration: 111854
loss: 1.1643743515014648,grad_norm: 0.9999997207695959, iteration: 111855
loss: 1.0270404815673828,grad_norm: 0.8517146147264735, iteration: 111856
loss: 1.1491539478302002,grad_norm: 0.9999997993655558, iteration: 111857
loss: 1.0770412683486938,grad_norm: 0.9999992442606347, iteration: 111858
loss: 1.2235954999923706,grad_norm: 0.9999997815239277, iteration: 111859
loss: 1.1555286645889282,grad_norm: 0.9999996660514123, iteration: 111860
loss: 1.0950360298156738,grad_norm: 0.999999179439058, iteration: 111861
loss: 1.0732340812683105,grad_norm: 0.9999997839285119, iteration: 111862
loss: 1.05158269405365,grad_norm: 0.9999991274662425, iteration: 111863
loss: 1.0350180864334106,grad_norm: 0.9999997310278607, iteration: 111864
loss: 1.0425959825515747,grad_norm: 0.9999990621410814, iteration: 111865
loss: 1.104998230934143,grad_norm: 0.9999998861608095, iteration: 111866
loss: 1.1060962677001953,grad_norm: 0.9999993474095, iteration: 111867
loss: 1.1189324855804443,grad_norm: 0.9999997387747207, iteration: 111868
loss: 0.9670893549919128,grad_norm: 0.9606443741631613, iteration: 111869
loss: 1.0446358919143677,grad_norm: 0.9999995768045795, iteration: 111870
loss: 1.0106674432754517,grad_norm: 0.7857181557728897, iteration: 111871
loss: 1.0238944292068481,grad_norm: 0.9263725642051713, iteration: 111872
loss: 1.0881596803665161,grad_norm: 0.9999993522156401, iteration: 111873
loss: 1.113793134689331,grad_norm: 0.9999997038769629, iteration: 111874
loss: 0.9899621605873108,grad_norm: 0.8478051659636099, iteration: 111875
loss: 1.129898190498352,grad_norm: 0.9999998914690879, iteration: 111876
loss: 1.131895899772644,grad_norm: 0.9999999133387565, iteration: 111877
loss: 0.9960544109344482,grad_norm: 0.8141587790176773, iteration: 111878
loss: 1.0553511381149292,grad_norm: 0.9999990892690657, iteration: 111879
loss: 1.0649288892745972,grad_norm: 0.9437125574602623, iteration: 111880
loss: 1.1397439241409302,grad_norm: 0.9999992684146972, iteration: 111881
loss: 1.2670656442642212,grad_norm: 0.9999995309965987, iteration: 111882
loss: 1.1104480028152466,grad_norm: 0.9999998018576077, iteration: 111883
loss: 1.1452419757843018,grad_norm: 0.9999992482376866, iteration: 111884
loss: 1.030318260192871,grad_norm: 0.985078601042948, iteration: 111885
loss: 1.034066081047058,grad_norm: 0.9999994609738065, iteration: 111886
loss: 1.0496419668197632,grad_norm: 0.9999999015409821, iteration: 111887
loss: 1.040954351425171,grad_norm: 0.8650636132679573, iteration: 111888
loss: 1.2043200731277466,grad_norm: 0.9999994660823194, iteration: 111889
loss: 1.0415257215499878,grad_norm: 0.9999997428149611, iteration: 111890
loss: 1.0888490676879883,grad_norm: 0.9999996453850286, iteration: 111891
loss: 1.1504311561584473,grad_norm: 0.9999992733255435, iteration: 111892
loss: 1.0284115076065063,grad_norm: 0.9999997337996773, iteration: 111893
loss: 0.989502489566803,grad_norm: 0.9999990547399403, iteration: 111894
loss: 1.0306884050369263,grad_norm: 0.7808337647988951, iteration: 111895
loss: 1.013423204421997,grad_norm: 0.9999990851488383, iteration: 111896
loss: 1.0920710563659668,grad_norm: 1.0000000074945576, iteration: 111897
loss: 1.2809523344039917,grad_norm: 0.9999998968872083, iteration: 111898
loss: 1.1054067611694336,grad_norm: 0.9999996406915241, iteration: 111899
loss: 1.0839059352874756,grad_norm: 0.9999990872933335, iteration: 111900
loss: 1.0073399543762207,grad_norm: 0.8396906405648109, iteration: 111901
loss: 1.0561888217926025,grad_norm: 0.9999996250596601, iteration: 111902
loss: 1.037745475769043,grad_norm: 0.9403555984123192, iteration: 111903
loss: 0.9860069751739502,grad_norm: 0.7703824531732099, iteration: 111904
loss: 1.0278334617614746,grad_norm: 0.999999496586634, iteration: 111905
loss: 1.0151865482330322,grad_norm: 0.9999990428568807, iteration: 111906
loss: 1.0598489046096802,grad_norm: 0.9999992598024574, iteration: 111907
loss: 1.047298550605774,grad_norm: 0.999999238538963, iteration: 111908
loss: 1.1028333902359009,grad_norm: 0.9999992547059767, iteration: 111909
loss: 1.1534620523452759,grad_norm: 0.9999999518034066, iteration: 111910
loss: 1.2391055822372437,grad_norm: 0.9999998575370257, iteration: 111911
loss: 1.1760799884796143,grad_norm: 0.9999998998249594, iteration: 111912
loss: 1.026713252067566,grad_norm: 0.9999997736478773, iteration: 111913
loss: 1.017421007156372,grad_norm: 0.9999998022292741, iteration: 111914
loss: 1.0510996580123901,grad_norm: 0.9999992628159393, iteration: 111915
loss: 1.0558298826217651,grad_norm: 0.9999993869447137, iteration: 111916
loss: 0.9984127283096313,grad_norm: 0.9063684866267411, iteration: 111917
loss: 1.0583176612854004,grad_norm: 0.9208246008010182, iteration: 111918
loss: 1.1703619956970215,grad_norm: 0.9999998450740318, iteration: 111919
loss: 1.0235177278518677,grad_norm: 0.9318236510346766, iteration: 111920
loss: 1.0461419820785522,grad_norm: 0.9764324277277305, iteration: 111921
loss: 1.0071074962615967,grad_norm: 0.8745944282617912, iteration: 111922
loss: 1.2079051733016968,grad_norm: 0.9999997449681878, iteration: 111923
loss: 1.1160117387771606,grad_norm: 0.9999997967207105, iteration: 111924
loss: 1.009792447090149,grad_norm: 0.9999991370769418, iteration: 111925
loss: 0.9753934144973755,grad_norm: 0.9999991392375497, iteration: 111926
loss: 1.036442518234253,grad_norm: 0.9999991406015684, iteration: 111927
loss: 1.0839002132415771,grad_norm: 0.9999992317858872, iteration: 111928
loss: 0.9897938370704651,grad_norm: 0.999999123539757, iteration: 111929
loss: 1.0170257091522217,grad_norm: 0.999999727960297, iteration: 111930
loss: 1.1112340688705444,grad_norm: 0.9999999389507583, iteration: 111931
loss: 1.1171518564224243,grad_norm: 0.9999999530434199, iteration: 111932
loss: 0.9824770092964172,grad_norm: 0.9999992518339669, iteration: 111933
loss: 1.0530266761779785,grad_norm: 0.8670327040221072, iteration: 111934
loss: 1.0157920122146606,grad_norm: 0.8578039835212269, iteration: 111935
loss: 1.0056875944137573,grad_norm: 0.9999991178676113, iteration: 111936
loss: 1.0076645612716675,grad_norm: 0.9999992395614252, iteration: 111937
loss: 1.0461822748184204,grad_norm: 0.9999993862919118, iteration: 111938
loss: 1.0315132141113281,grad_norm: 0.8917294383816586, iteration: 111939
loss: 1.0948936939239502,grad_norm: 0.9999991554831056, iteration: 111940
loss: 1.086739420890808,grad_norm: 0.9999994056622215, iteration: 111941
loss: 0.9750998616218567,grad_norm: 0.7525312517492675, iteration: 111942
loss: 1.0137945413589478,grad_norm: 0.9999992189636215, iteration: 111943
loss: 1.083572268486023,grad_norm: 0.9272269856415971, iteration: 111944
loss: 1.0352389812469482,grad_norm: 0.9999996993958092, iteration: 111945
loss: 1.0009702444076538,grad_norm: 0.9984035385279184, iteration: 111946
loss: 0.9822753071784973,grad_norm: 0.9999996400439726, iteration: 111947
loss: 1.1073466539382935,grad_norm: 0.9999992871448805, iteration: 111948
loss: 1.016875147819519,grad_norm: 0.9999990319822304, iteration: 111949
loss: 0.9635238647460938,grad_norm: 0.9812663053495765, iteration: 111950
loss: 1.0456806421279907,grad_norm: 0.999999102209046, iteration: 111951
loss: 1.1826558113098145,grad_norm: 0.999999928157465, iteration: 111952
loss: 1.0459342002868652,grad_norm: 0.841047710710746, iteration: 111953
loss: 1.1677610874176025,grad_norm: 0.9999992193981617, iteration: 111954
loss: 1.0720734596252441,grad_norm: 0.9999999652139374, iteration: 111955
loss: 1.0065691471099854,grad_norm: 0.9999991457299529, iteration: 111956
loss: 0.9257038235664368,grad_norm: 0.9191187768570901, iteration: 111957
loss: 0.9930831789970398,grad_norm: 0.9999991146693524, iteration: 111958
loss: 0.9947565197944641,grad_norm: 0.9976655528298802, iteration: 111959
loss: 1.0056856870651245,grad_norm: 0.9999992982125289, iteration: 111960
loss: 0.9968197345733643,grad_norm: 0.9999992269576568, iteration: 111961
loss: 1.0277633666992188,grad_norm: 0.8901623543567221, iteration: 111962
loss: 1.2806508541107178,grad_norm: 0.9999998244845053, iteration: 111963
loss: 0.9911415576934814,grad_norm: 0.8465686142434341, iteration: 111964
loss: 1.0694500207901,grad_norm: 0.8894641936010452, iteration: 111965
loss: 1.0442863702774048,grad_norm: 0.9999996976460949, iteration: 111966
loss: 1.124275803565979,grad_norm: 0.8692747924618175, iteration: 111967
loss: 1.0081827640533447,grad_norm: 0.9951821815172143, iteration: 111968
loss: 1.0648667812347412,grad_norm: 0.9999999342670212, iteration: 111969
loss: 1.098854899406433,grad_norm: 0.9999998291945058, iteration: 111970
loss: 0.9835212230682373,grad_norm: 0.9999991915087729, iteration: 111971
loss: 1.046234130859375,grad_norm: 1.0000000228786103, iteration: 111972
loss: 1.0446373224258423,grad_norm: 0.9999990804075104, iteration: 111973
loss: 1.1352237462997437,grad_norm: 0.9999997106518341, iteration: 111974
loss: 1.0718872547149658,grad_norm: 0.9999996603827805, iteration: 111975
loss: 0.9805139303207397,grad_norm: 0.9215822651185496, iteration: 111976
loss: 1.0177314281463623,grad_norm: 0.9999996112612276, iteration: 111977
loss: 1.0430471897125244,grad_norm: 0.9999993888370331, iteration: 111978
loss: 1.0096352100372314,grad_norm: 0.8575032879049381, iteration: 111979
loss: 1.0587084293365479,grad_norm: 0.9999997488979326, iteration: 111980
loss: 1.0684629678726196,grad_norm: 0.9999993623949529, iteration: 111981
loss: 1.03946852684021,grad_norm: 0.9999990882751517, iteration: 111982
loss: 0.9932671189308167,grad_norm: 0.9084159445858023, iteration: 111983
loss: 0.9956126809120178,grad_norm: 0.9999995023875803, iteration: 111984
loss: 1.0328818559646606,grad_norm: 0.9999995871187423, iteration: 111985
loss: 1.0441265106201172,grad_norm: 0.999999405235889, iteration: 111986
loss: 1.023210883140564,grad_norm: 0.9999992260696521, iteration: 111987
loss: 1.069223403930664,grad_norm: 0.9999996113085088, iteration: 111988
loss: 1.0831679105758667,grad_norm: 0.999999524398956, iteration: 111989
loss: 0.9953877329826355,grad_norm: 0.9224768783936281, iteration: 111990
loss: 1.0217689275741577,grad_norm: 0.873691435765156, iteration: 111991
loss: 1.031929612159729,grad_norm: 0.9999993589465863, iteration: 111992
loss: 1.0222632884979248,grad_norm: 0.8664225749664031, iteration: 111993
loss: 1.020713210105896,grad_norm: 0.9999995900773886, iteration: 111994
loss: 1.0733367204666138,grad_norm: 0.9999990928049232, iteration: 111995
loss: 1.035288691520691,grad_norm: 0.999999228405246, iteration: 111996
loss: 1.0394434928894043,grad_norm: 0.9999996092303949, iteration: 111997
loss: 0.9856154322624207,grad_norm: 0.8206933354245837, iteration: 111998
loss: 1.2886897325515747,grad_norm: 0.9999996809360623, iteration: 111999
loss: 1.0618624687194824,grad_norm: 0.999999746499192, iteration: 112000
loss: 1.1534727811813354,grad_norm: 0.9999992733182298, iteration: 112001
loss: 1.010880708694458,grad_norm: 0.89810042859832, iteration: 112002
loss: 1.0183073282241821,grad_norm: 0.9999991739372509, iteration: 112003
loss: 1.0302579402923584,grad_norm: 0.9999990575236358, iteration: 112004
loss: 1.0078407526016235,grad_norm: 0.9999991140112305, iteration: 112005
loss: 0.9958070516586304,grad_norm: 0.9999994225277894, iteration: 112006
loss: 1.0779483318328857,grad_norm: 1.0000000086388972, iteration: 112007
loss: 1.0069273710250854,grad_norm: 0.999999087739429, iteration: 112008
loss: 1.0011812448501587,grad_norm: 0.9112859908036385, iteration: 112009
loss: 1.0560170412063599,grad_norm: 0.999999144069972, iteration: 112010
loss: 1.015113115310669,grad_norm: 0.9623992804528351, iteration: 112011
loss: 1.1327019929885864,grad_norm: 0.9999997762671831, iteration: 112012
loss: 1.2183924913406372,grad_norm: 0.9999999819139334, iteration: 112013
loss: 1.1064058542251587,grad_norm: 0.9999999538579866, iteration: 112014
loss: 1.0304666757583618,grad_norm: 0.9999992098788707, iteration: 112015
loss: 1.1360284090042114,grad_norm: 0.9999997537537622, iteration: 112016
loss: 1.0482341051101685,grad_norm: 0.999999279759159, iteration: 112017
loss: 1.1422195434570312,grad_norm: 0.9999997142614107, iteration: 112018
loss: 0.9649593830108643,grad_norm: 0.9999994206105822, iteration: 112019
loss: 1.0438016653060913,grad_norm: 0.9999997302495697, iteration: 112020
loss: 1.0041066408157349,grad_norm: 0.9785036301464369, iteration: 112021
loss: 1.0389033555984497,grad_norm: 0.9999995534346938, iteration: 112022
loss: 1.0555870532989502,grad_norm: 0.894958832450676, iteration: 112023
loss: 1.0382318496704102,grad_norm: 0.9999992745710877, iteration: 112024
loss: 1.0007680654525757,grad_norm: 0.9999992066542128, iteration: 112025
loss: 0.9666752815246582,grad_norm: 0.9999990277777921, iteration: 112026
loss: 1.057841181755066,grad_norm: 0.9999996010835152, iteration: 112027
loss: 1.0376906394958496,grad_norm: 0.9999990555890872, iteration: 112028
loss: 1.053887128829956,grad_norm: 0.921295337601321, iteration: 112029
loss: 1.002625584602356,grad_norm: 0.999999015042302, iteration: 112030
loss: 1.0392125844955444,grad_norm: 0.9999991285025078, iteration: 112031
loss: 1.0623500347137451,grad_norm: 0.9999994384892308, iteration: 112032
loss: 1.0050015449523926,grad_norm: 0.780179574492752, iteration: 112033
loss: 1.0643818378448486,grad_norm: 0.9999992264048342, iteration: 112034
loss: 1.0087190866470337,grad_norm: 0.9704043310098562, iteration: 112035
loss: 1.0502586364746094,grad_norm: 0.9999999829647327, iteration: 112036
loss: 1.0447521209716797,grad_norm: 0.9999996254239043, iteration: 112037
loss: 0.9956931471824646,grad_norm: 0.9649724714090895, iteration: 112038
loss: 1.000640630722046,grad_norm: 0.9999998995621628, iteration: 112039
loss: 1.122309684753418,grad_norm: 0.9999994825098325, iteration: 112040
loss: 1.0020354986190796,grad_norm: 0.9999998888553758, iteration: 112041
loss: 1.1928284168243408,grad_norm: 0.9999997071606803, iteration: 112042
loss: 1.105606198310852,grad_norm: 0.9999991116132011, iteration: 112043
loss: 1.0593130588531494,grad_norm: 0.9999997696703053, iteration: 112044
loss: 0.992778480052948,grad_norm: 0.9093192277906161, iteration: 112045
loss: 1.0462309122085571,grad_norm: 0.9999992409238418, iteration: 112046
loss: 1.0458427667617798,grad_norm: 0.8360763947609536, iteration: 112047
loss: 0.9864479899406433,grad_norm: 0.9999993696289317, iteration: 112048
loss: 1.0921183824539185,grad_norm: 0.9999992470525211, iteration: 112049
loss: 1.0735843181610107,grad_norm: 0.999999292899781, iteration: 112050
loss: 1.046116828918457,grad_norm: 0.9999993990697801, iteration: 112051
loss: 0.9965611696243286,grad_norm: 0.9080629156123328, iteration: 112052
loss: 0.9569886326789856,grad_norm: 0.9549829674215464, iteration: 112053
loss: 1.1047310829162598,grad_norm: 0.9999994509683885, iteration: 112054
loss: 1.024675965309143,grad_norm: 0.9999992697441592, iteration: 112055
loss: 0.9766044616699219,grad_norm: 0.8409586777031326, iteration: 112056
loss: 1.0765434503555298,grad_norm: 0.9999994393844209, iteration: 112057
loss: 1.125658631324768,grad_norm: 0.9999993478161914, iteration: 112058
loss: 1.1686336994171143,grad_norm: 0.999999886914695, iteration: 112059
loss: 1.057411789894104,grad_norm: 0.9999991834707852, iteration: 112060
loss: 1.0556944608688354,grad_norm: 0.9013002706545017, iteration: 112061
loss: 1.168235182762146,grad_norm: 0.9999994912535792, iteration: 112062
loss: 1.0747599601745605,grad_norm: 0.9999992227154327, iteration: 112063
loss: 1.0123941898345947,grad_norm: 0.8780945450318945, iteration: 112064
loss: 1.1645910739898682,grad_norm: 0.9999998506298456, iteration: 112065
loss: 1.0268868207931519,grad_norm: 0.9999998415396424, iteration: 112066
loss: 0.9772262573242188,grad_norm: 0.9999999213308157, iteration: 112067
loss: 1.1619219779968262,grad_norm: 0.9999997900370227, iteration: 112068
loss: 1.1285122632980347,grad_norm: 0.9999992306644928, iteration: 112069
loss: 1.1143512725830078,grad_norm: 0.9999996162589686, iteration: 112070
loss: 1.1373317241668701,grad_norm: 0.9999996182972707, iteration: 112071
loss: 1.117532730102539,grad_norm: 0.9999998270234018, iteration: 112072
loss: 1.0925887823104858,grad_norm: 0.9999993917037796, iteration: 112073
loss: 1.0670263767242432,grad_norm: 0.9999999438522907, iteration: 112074
loss: 1.1179132461547852,grad_norm: 0.9999994753510427, iteration: 112075
loss: 1.0089442729949951,grad_norm: 0.911036560901592, iteration: 112076
loss: 1.0397586822509766,grad_norm: 0.9999990241920234, iteration: 112077
loss: 1.1980818510055542,grad_norm: 0.9999993330442816, iteration: 112078
loss: 1.0299478769302368,grad_norm: 0.9410254847575653, iteration: 112079
loss: 1.030776023864746,grad_norm: 0.9999993182186925, iteration: 112080
loss: 1.07217276096344,grad_norm: 0.9999993995833169, iteration: 112081
loss: 1.0936354398727417,grad_norm: 1.0000000084071872, iteration: 112082
loss: 0.9800598621368408,grad_norm: 0.9999989869265662, iteration: 112083
loss: 1.0510374307632446,grad_norm: 0.8789161605627213, iteration: 112084
loss: 1.0703119039535522,grad_norm: 1.0000001113461432, iteration: 112085
loss: 1.0489922761917114,grad_norm: 0.9999991652548631, iteration: 112086
loss: 1.1771632432937622,grad_norm: 1.000000011730759, iteration: 112087
loss: 1.0634870529174805,grad_norm: 0.9999998875023614, iteration: 112088
loss: 0.9670435190200806,grad_norm: 0.8351451833163624, iteration: 112089
loss: 1.1207572221755981,grad_norm: 0.9999998803071569, iteration: 112090
loss: 1.0310617685317993,grad_norm: 0.9566729414409415, iteration: 112091
loss: 1.1271209716796875,grad_norm: 0.9999994604278186, iteration: 112092
loss: 1.4527393579483032,grad_norm: 0.9999998244902647, iteration: 112093
loss: 1.0592987537384033,grad_norm: 0.9999990944186407, iteration: 112094
loss: 1.1469342708587646,grad_norm: 0.9999997007578579, iteration: 112095
loss: 1.00511634349823,grad_norm: 0.9999990691153514, iteration: 112096
loss: 1.1365665197372437,grad_norm: 0.9999997933400876, iteration: 112097
loss: 1.2146435976028442,grad_norm: 0.9999996155418278, iteration: 112098
loss: 1.240962266921997,grad_norm: 0.9999998618734895, iteration: 112099
loss: 1.0642441511154175,grad_norm: 0.9999997113137816, iteration: 112100
loss: 1.0665096044540405,grad_norm: 0.999999835371515, iteration: 112101
loss: 1.0233640670776367,grad_norm: 0.999999643158226, iteration: 112102
loss: 1.2616868019104004,grad_norm: 1.0000000431227931, iteration: 112103
loss: 1.1725564002990723,grad_norm: 0.9999996976739202, iteration: 112104
loss: 1.0961565971374512,grad_norm: 0.999999940007273, iteration: 112105
loss: 1.0291630029678345,grad_norm: 0.9999997226428983, iteration: 112106
loss: 1.2334672212600708,grad_norm: 0.999999767194721, iteration: 112107
loss: 1.0288370847702026,grad_norm: 0.9178071228481889, iteration: 112108
loss: 1.0838819742202759,grad_norm: 0.9999997079237142, iteration: 112109
loss: 1.045822024345398,grad_norm: 0.99999958088899, iteration: 112110
loss: 1.1091676950454712,grad_norm: 0.9999996183574801, iteration: 112111
loss: 0.9992178678512573,grad_norm: 0.9935548869013623, iteration: 112112
loss: 1.0442386865615845,grad_norm: 0.9999991886565915, iteration: 112113
loss: 1.0161066055297852,grad_norm: 0.9999991369942829, iteration: 112114
loss: 0.999936580657959,grad_norm: 0.9514659633752083, iteration: 112115
loss: 0.9950321316719055,grad_norm: 0.9292024731259341, iteration: 112116
loss: 1.0224429368972778,grad_norm: 0.9999991267831327, iteration: 112117
loss: 1.1854933500289917,grad_norm: 0.9999998549513998, iteration: 112118
loss: 1.2101985216140747,grad_norm: 1.000000043555384, iteration: 112119
loss: 1.0958077907562256,grad_norm: 0.9999992654041618, iteration: 112120
loss: 1.0443027019500732,grad_norm: 0.9999991634715352, iteration: 112121
loss: 1.0126079320907593,grad_norm: 0.9999990885873907, iteration: 112122
loss: 1.045119285583496,grad_norm: 0.999999763303375, iteration: 112123
loss: 1.0482944250106812,grad_norm: 0.999999026193831, iteration: 112124
loss: 0.9982845187187195,grad_norm: 0.9999991614882705, iteration: 112125
loss: 1.0866405963897705,grad_norm: 0.9999999486276313, iteration: 112126
loss: 1.0432628393173218,grad_norm: 0.9999994753423889, iteration: 112127
loss: 1.015638828277588,grad_norm: 0.9999992535475011, iteration: 112128
loss: 1.0421462059020996,grad_norm: 0.9999991308245061, iteration: 112129
loss: 1.1016349792480469,grad_norm: 0.9999997245222916, iteration: 112130
loss: 0.9608197212219238,grad_norm: 0.9689636441953494, iteration: 112131
loss: 0.9786429405212402,grad_norm: 0.7789475414436935, iteration: 112132
loss: 0.9973482489585876,grad_norm: 0.9999998307439227, iteration: 112133
loss: 0.9813281297683716,grad_norm: 0.9999991521505728, iteration: 112134
loss: 1.1490017175674438,grad_norm: 0.9999997970221475, iteration: 112135
loss: 1.205043077468872,grad_norm: 0.9999997464566501, iteration: 112136
loss: 1.0111291408538818,grad_norm: 0.9999998651469862, iteration: 112137
loss: 1.0184996128082275,grad_norm: 0.9999997832235132, iteration: 112138
loss: 0.9983649253845215,grad_norm: 0.9999993194267619, iteration: 112139
loss: 1.0107536315917969,grad_norm: 0.881092441543595, iteration: 112140
loss: 1.0617870092391968,grad_norm: 0.9322088910590117, iteration: 112141
loss: 1.057770848274231,grad_norm: 0.8522136617373044, iteration: 112142
loss: 1.0300614833831787,grad_norm: 0.9999996976654474, iteration: 112143
loss: 1.0413687229156494,grad_norm: 0.9999991318944083, iteration: 112144
loss: 1.0974622964859009,grad_norm: 0.9999994344873742, iteration: 112145
loss: 1.0621538162231445,grad_norm: 0.9999998928616579, iteration: 112146
loss: 1.0408517122268677,grad_norm: 0.9999998749917537, iteration: 112147
loss: 1.059343934059143,grad_norm: 0.9999990768409912, iteration: 112148
loss: 1.1475332975387573,grad_norm: 0.9999995623196843, iteration: 112149
loss: 1.2105494737625122,grad_norm: 0.9999997047550039, iteration: 112150
loss: 1.087051510810852,grad_norm: 0.9999991145140159, iteration: 112151
loss: 1.074397087097168,grad_norm: 0.9999991264248532, iteration: 112152
loss: 1.1487268209457397,grad_norm: 0.9999998533965276, iteration: 112153
loss: 1.1341495513916016,grad_norm: 0.9999998674077276, iteration: 112154
loss: 1.0698579549789429,grad_norm: 0.8921479382151395, iteration: 112155
loss: 1.031474232673645,grad_norm: 0.9999992746243758, iteration: 112156
loss: 0.9747874140739441,grad_norm: 0.9999991345318949, iteration: 112157
loss: 1.0254900455474854,grad_norm: 0.9268866304159848, iteration: 112158
loss: 0.9850655198097229,grad_norm: 0.9999991944536102, iteration: 112159
loss: 0.9775568246841431,grad_norm: 0.9999998302777998, iteration: 112160
loss: 1.1394401788711548,grad_norm: 0.9999998403127123, iteration: 112161
loss: 1.154620885848999,grad_norm: 0.9736509447421599, iteration: 112162
loss: 1.0961273908615112,grad_norm: 0.9999997851163369, iteration: 112163
loss: 1.0649276971817017,grad_norm: 0.9999990825189522, iteration: 112164
loss: 1.0475149154663086,grad_norm: 0.9999997538456882, iteration: 112165
loss: 1.0349335670471191,grad_norm: 0.9999997130817969, iteration: 112166
loss: 1.0205308198928833,grad_norm: 0.9451971526420246, iteration: 112167
loss: 1.0251233577728271,grad_norm: 0.9999993672483332, iteration: 112168
loss: 1.133997917175293,grad_norm: 1.0000000747774818, iteration: 112169
loss: 0.9726818203926086,grad_norm: 0.9999990372364256, iteration: 112170
loss: 0.99517822265625,grad_norm: 0.9999992323387848, iteration: 112171
loss: 1.0003252029418945,grad_norm: 0.9999989935676735, iteration: 112172
loss: 1.2876137495040894,grad_norm: 0.9999998850922741, iteration: 112173
loss: 0.9749413132667542,grad_norm: 0.9456678341362997, iteration: 112174
loss: 0.9787789583206177,grad_norm: 0.7696891985998494, iteration: 112175
loss: 1.0333411693572998,grad_norm: 0.9999994915819309, iteration: 112176
loss: 0.9982320070266724,grad_norm: 0.9999992795630076, iteration: 112177
loss: 1.1064338684082031,grad_norm: 0.9999997039273981, iteration: 112178
loss: 1.138518214225769,grad_norm: 0.999999750537503, iteration: 112179
loss: 1.060827374458313,grad_norm: 0.9182129793623235, iteration: 112180
loss: 1.0746955871582031,grad_norm: 0.9516574881974343, iteration: 112181
loss: 1.1505439281463623,grad_norm: 0.9999992357364237, iteration: 112182
loss: 1.0851858854293823,grad_norm: 0.9999992790987523, iteration: 112183
loss: 0.992307186126709,grad_norm: 0.9271555417922952, iteration: 112184
loss: 1.0802502632141113,grad_norm: 0.9999993400391247, iteration: 112185
loss: 1.0328607559204102,grad_norm: 0.9999989831725273, iteration: 112186
loss: 1.0534353256225586,grad_norm: 0.9999993318832456, iteration: 112187
loss: 1.0085541009902954,grad_norm: 0.8516892588937782, iteration: 112188
loss: 1.089518666267395,grad_norm: 0.9999993621371018, iteration: 112189
loss: 1.0381255149841309,grad_norm: 0.9999999705291313, iteration: 112190
loss: 1.011146903038025,grad_norm: 0.7622310520924811, iteration: 112191
loss: 0.9878959059715271,grad_norm: 0.999999264761616, iteration: 112192
loss: 1.0303772687911987,grad_norm: 0.9999991573768736, iteration: 112193
loss: 0.9614774584770203,grad_norm: 0.8524158766046006, iteration: 112194
loss: 1.0413728952407837,grad_norm: 0.999999293896763, iteration: 112195
loss: 1.0587457418441772,grad_norm: 0.9178862016027515, iteration: 112196
loss: 0.9502447247505188,grad_norm: 0.8968510786794067, iteration: 112197
loss: 0.9460908770561218,grad_norm: 0.9541387586205002, iteration: 112198
loss: 1.1418712139129639,grad_norm: 0.9999996242484144, iteration: 112199
loss: 1.0652025938034058,grad_norm: 0.9999992429211707, iteration: 112200
loss: 1.1935584545135498,grad_norm: 0.9999993930249971, iteration: 112201
loss: 1.0126488208770752,grad_norm: 0.961979700913001, iteration: 112202
loss: 1.038453459739685,grad_norm: 0.9999998267087374, iteration: 112203
loss: 1.0278043746948242,grad_norm: 0.9999994538968707, iteration: 112204
loss: 1.0475091934204102,grad_norm: 0.9999999087768564, iteration: 112205
loss: 1.0171009302139282,grad_norm: 0.9999994110537856, iteration: 112206
loss: 1.0228278636932373,grad_norm: 0.9999992248186419, iteration: 112207
loss: 0.9402952194213867,grad_norm: 0.9999990593538861, iteration: 112208
loss: 1.0436922311782837,grad_norm: 0.9920136602929674, iteration: 112209
loss: 0.9873241186141968,grad_norm: 0.8004753509282401, iteration: 112210
loss: 1.0460489988327026,grad_norm: 0.9999995411176165, iteration: 112211
loss: 1.0466099977493286,grad_norm: 0.9999993301738879, iteration: 112212
loss: 0.9910662770271301,grad_norm: 0.9114678239527675, iteration: 112213
loss: 1.0589476823806763,grad_norm: 0.9999993153332423, iteration: 112214
loss: 1.0388472080230713,grad_norm: 0.9909868328909835, iteration: 112215
loss: 1.029647946357727,grad_norm: 0.9999996637729708, iteration: 112216
loss: 1.0188652276992798,grad_norm: 0.9999990777163185, iteration: 112217
loss: 1.1788270473480225,grad_norm: 0.999999779283412, iteration: 112218
loss: 1.009360671043396,grad_norm: 0.8563259860599479, iteration: 112219
loss: 1.016917109489441,grad_norm: 0.9999991451663497, iteration: 112220
loss: 1.0965445041656494,grad_norm: 0.9999992892719194, iteration: 112221
loss: 1.023238182067871,grad_norm: 0.9999999698190529, iteration: 112222
loss: 0.996327817440033,grad_norm: 0.8325237281313144, iteration: 112223
loss: 1.0352346897125244,grad_norm: 0.9388480576064913, iteration: 112224
loss: 1.0636621713638306,grad_norm: 0.9999998827303933, iteration: 112225
loss: 1.1595243215560913,grad_norm: 0.999999834927113, iteration: 112226
loss: 1.0114134550094604,grad_norm: 0.9847034991439187, iteration: 112227
loss: 0.9821128249168396,grad_norm: 0.9804680108961394, iteration: 112228
loss: 1.093704342842102,grad_norm: 0.9102264807316273, iteration: 112229
loss: 1.0346269607543945,grad_norm: 0.9999991601506545, iteration: 112230
loss: 1.3482916355133057,grad_norm: 0.9999994819942857, iteration: 112231
loss: 1.100958228111267,grad_norm: 0.9999997768038295, iteration: 112232
loss: 1.0702576637268066,grad_norm: 0.9999998707995436, iteration: 112233
loss: 1.0516804456710815,grad_norm: 0.999999384850053, iteration: 112234
loss: 1.1057192087173462,grad_norm: 0.9999994994906807, iteration: 112235
loss: 1.0002555847167969,grad_norm: 0.9999999106434726, iteration: 112236
loss: 0.9933902025222778,grad_norm: 0.9999997424027345, iteration: 112237
loss: 1.0171446800231934,grad_norm: 0.99999892777018, iteration: 112238
loss: 1.0702794790267944,grad_norm: 0.9999999084588875, iteration: 112239
loss: 1.0243338346481323,grad_norm: 0.999999422570965, iteration: 112240
loss: 1.1548621654510498,grad_norm: 0.999999411390973, iteration: 112241
loss: 1.0698374509811401,grad_norm: 0.9999993369872685, iteration: 112242
loss: 0.9940645098686218,grad_norm: 0.8441067157131796, iteration: 112243
loss: 1.091320276260376,grad_norm: 0.9999998431994512, iteration: 112244
loss: 1.0723183155059814,grad_norm: 0.9999998721868727, iteration: 112245
loss: 1.0694090127944946,grad_norm: 0.999999910573871, iteration: 112246
loss: 1.0046182870864868,grad_norm: 0.8824458694560297, iteration: 112247
loss: 0.9986258149147034,grad_norm: 0.9999997282617098, iteration: 112248
loss: 1.0069726705551147,grad_norm: 0.9373683883559397, iteration: 112249
loss: 1.0048638582229614,grad_norm: 0.9287428708702908, iteration: 112250
loss: 0.9813746809959412,grad_norm: 0.9808123646985681, iteration: 112251
loss: 0.9675723314285278,grad_norm: 0.999999177263264, iteration: 112252
loss: 1.0560081005096436,grad_norm: 0.9999991179490317, iteration: 112253
loss: 1.1673963069915771,grad_norm: 1.000000061816123, iteration: 112254
loss: 1.0364067554473877,grad_norm: 0.9999992345335028, iteration: 112255
loss: 1.0282326936721802,grad_norm: 0.9290809525725484, iteration: 112256
loss: 1.0871206521987915,grad_norm: 0.8410746481182833, iteration: 112257
loss: 1.1034438610076904,grad_norm: 0.9999999655564271, iteration: 112258
loss: 0.9949204325675964,grad_norm: 0.90320576501155, iteration: 112259
loss: 1.0257997512817383,grad_norm: 0.9999991542904784, iteration: 112260
loss: 1.0517159700393677,grad_norm: 0.9999999903226875, iteration: 112261
loss: 1.0403614044189453,grad_norm: 0.9999995085250115, iteration: 112262
loss: 0.9984898567199707,grad_norm: 0.8689452278390752, iteration: 112263
loss: 1.0391706228256226,grad_norm: 0.9015481668324788, iteration: 112264
loss: 1.1326824426651,grad_norm: 0.9999996091308361, iteration: 112265
loss: 1.054471731185913,grad_norm: 0.9999997805761303, iteration: 112266
loss: 1.0152850151062012,grad_norm: 0.999999690790645, iteration: 112267
loss: 1.0143834352493286,grad_norm: 0.9999999603085512, iteration: 112268
loss: 1.029197335243225,grad_norm: 0.9999991426263223, iteration: 112269
loss: 1.0105067491531372,grad_norm: 0.9999992294255138, iteration: 112270
loss: 1.0891900062561035,grad_norm: 0.9999991486807882, iteration: 112271
loss: 1.0720558166503906,grad_norm: 0.9999993025875485, iteration: 112272
loss: 1.1129271984100342,grad_norm: 0.9999993397688246, iteration: 112273
loss: 1.0179294347763062,grad_norm: 0.8791456168323053, iteration: 112274
loss: 0.9948989748954773,grad_norm: 0.9611695169950544, iteration: 112275
loss: 0.9895954132080078,grad_norm: 0.8156091394047678, iteration: 112276
loss: 1.0278609991073608,grad_norm: 0.9999993689588128, iteration: 112277
loss: 1.042918086051941,grad_norm: 0.9999997969012504, iteration: 112278
loss: 0.9881870150566101,grad_norm: 0.89316933650317, iteration: 112279
loss: 1.0634138584136963,grad_norm: 0.9999996642753468, iteration: 112280
loss: 1.0323017835617065,grad_norm: 0.98483364471862, iteration: 112281
loss: 1.0624539852142334,grad_norm: 0.9999997237584857, iteration: 112282
loss: 1.0149645805358887,grad_norm: 0.9999991245597502, iteration: 112283
loss: 1.0746848583221436,grad_norm: 0.999999812650868, iteration: 112284
loss: 1.236248254776001,grad_norm: 0.9999993973513303, iteration: 112285
loss: 1.0773545503616333,grad_norm: 0.999999307829652, iteration: 112286
loss: 1.0313568115234375,grad_norm: 0.9999992362756671, iteration: 112287
loss: 1.053698182106018,grad_norm: 0.9999994363394241, iteration: 112288
loss: 0.9768270254135132,grad_norm: 0.9018471551631843, iteration: 112289
loss: 1.0487933158874512,grad_norm: 0.8393247192186915, iteration: 112290
loss: 1.0269575119018555,grad_norm: 0.9819989462088001, iteration: 112291
loss: 1.037558674812317,grad_norm: 0.9999989845340302, iteration: 112292
loss: 1.0316590070724487,grad_norm: 0.9999991695373835, iteration: 112293
loss: 1.031930685043335,grad_norm: 0.842120602346537, iteration: 112294
loss: 1.06816828250885,grad_norm: 0.9999996174876549, iteration: 112295
loss: 0.994924008846283,grad_norm: 0.9999993961605343, iteration: 112296
loss: 1.0009565353393555,grad_norm: 0.9999999134032944, iteration: 112297
loss: 1.0647939443588257,grad_norm: 0.7687846275171684, iteration: 112298
loss: 0.9615252614021301,grad_norm: 0.9702296099329223, iteration: 112299
loss: 1.0630196332931519,grad_norm: 0.999999841124541, iteration: 112300
loss: 0.9625334739685059,grad_norm: 0.9558907068036986, iteration: 112301
loss: 1.0885268449783325,grad_norm: 0.9999992272194972, iteration: 112302
loss: 1.0033143758773804,grad_norm: 0.9999995610814373, iteration: 112303
loss: 1.0241742134094238,grad_norm: 0.9999997612288171, iteration: 112304
loss: 1.0141704082489014,grad_norm: 0.9999992613271513, iteration: 112305
loss: 0.9910168051719666,grad_norm: 0.9999995355271096, iteration: 112306
loss: 0.9849498271942139,grad_norm: 0.9999989747198603, iteration: 112307
loss: 0.9967644810676575,grad_norm: 0.9999996533536857, iteration: 112308
loss: 1.0406577587127686,grad_norm: 0.9999991708799587, iteration: 112309
loss: 1.1256650686264038,grad_norm: 0.9999992335275598, iteration: 112310
loss: 1.0407991409301758,grad_norm: 0.9999991803720607, iteration: 112311
loss: 1.053610920906067,grad_norm: 0.968963922473274, iteration: 112312
loss: 1.0497570037841797,grad_norm: 0.9307552818157927, iteration: 112313
loss: 1.0292134284973145,grad_norm: 0.999999753650536, iteration: 112314
loss: 1.0041955709457397,grad_norm: 0.9999998804262691, iteration: 112315
loss: 0.9937740564346313,grad_norm: 0.999999842075531, iteration: 112316
loss: 0.9919877052307129,grad_norm: 0.999999659203444, iteration: 112317
loss: 1.0355948209762573,grad_norm: 0.8655371071831053, iteration: 112318
loss: 1.0117549896240234,grad_norm: 0.9999991377950183, iteration: 112319
loss: 1.0913385152816772,grad_norm: 0.9999994144226637, iteration: 112320
loss: 1.0289943218231201,grad_norm: 0.9999998019194097, iteration: 112321
loss: 0.9980336427688599,grad_norm: 0.9138241744012396, iteration: 112322
loss: 1.016924500465393,grad_norm: 0.9999995059425905, iteration: 112323
loss: 1.0979665517807007,grad_norm: 0.9999998667414656, iteration: 112324
loss: 0.9700313806533813,grad_norm: 0.9999995642111612, iteration: 112325
loss: 0.9979943037033081,grad_norm: 0.8423788907294059, iteration: 112326
loss: 0.984734058380127,grad_norm: 0.8613489596371461, iteration: 112327
loss: 1.1458790302276611,grad_norm: 0.999999680052137, iteration: 112328
loss: 1.1096867322921753,grad_norm: 0.9999996533045246, iteration: 112329
loss: 1.0497578382492065,grad_norm: 0.9999999163557153, iteration: 112330
loss: 1.069665551185608,grad_norm: 0.9999991514173413, iteration: 112331
loss: 1.0734986066818237,grad_norm: 0.9999990620598793, iteration: 112332
loss: 1.2960916757583618,grad_norm: 0.9999994747076473, iteration: 112333
loss: 0.9769161939620972,grad_norm: 0.8349342147474366, iteration: 112334
loss: 1.0664231777191162,grad_norm: 0.9999995493615543, iteration: 112335
loss: 1.1131529808044434,grad_norm: 0.9883864505788799, iteration: 112336
loss: 1.0324230194091797,grad_norm: 0.9999990402154494, iteration: 112337
loss: 0.988462507724762,grad_norm: 0.9999991743202717, iteration: 112338
loss: 1.1736787557601929,grad_norm: 0.9999993714438911, iteration: 112339
loss: 1.005287766456604,grad_norm: 0.9999993874054048, iteration: 112340
loss: 1.0656969547271729,grad_norm: 0.9999992307327527, iteration: 112341
loss: 0.9968987703323364,grad_norm: 0.8957108722362119, iteration: 112342
loss: 1.0753880739212036,grad_norm: 0.9999992414242017, iteration: 112343
loss: 1.0192419290542603,grad_norm: 0.9999993917016917, iteration: 112344
loss: 1.183119535446167,grad_norm: 0.999999907756192, iteration: 112345
loss: 1.023390769958496,grad_norm: 0.999999271218719, iteration: 112346
loss: 1.1091487407684326,grad_norm: 0.9999997742783632, iteration: 112347
loss: 1.124712347984314,grad_norm: 0.9999997079442247, iteration: 112348
loss: 0.9912077784538269,grad_norm: 0.9228209501809722, iteration: 112349
loss: 1.0098063945770264,grad_norm: 0.8521740008344109, iteration: 112350
loss: 1.0062410831451416,grad_norm: 0.9426524812847069, iteration: 112351
loss: 1.054762363433838,grad_norm: 0.9999991727889981, iteration: 112352
loss: 1.005315899848938,grad_norm: 0.9686305906908389, iteration: 112353
loss: 1.0374853610992432,grad_norm: 0.9999994235294686, iteration: 112354
loss: 1.0555251836776733,grad_norm: 0.9999990000508656, iteration: 112355
loss: 1.0025972127914429,grad_norm: 0.9999996059005766, iteration: 112356
loss: 0.9951006174087524,grad_norm: 0.8379568762236645, iteration: 112357
loss: 1.012359380722046,grad_norm: 0.9999991810589642, iteration: 112358
loss: 1.0122301578521729,grad_norm: 0.9999992428812194, iteration: 112359
loss: 1.085556983947754,grad_norm: 0.9999997792640644, iteration: 112360
loss: 1.1724203824996948,grad_norm: 0.999999885921813, iteration: 112361
loss: 1.0268683433532715,grad_norm: 0.9999999825187007, iteration: 112362
loss: 1.101094365119934,grad_norm: 0.999999988575735, iteration: 112363
loss: 1.0850751399993896,grad_norm: 0.9999993273755781, iteration: 112364
loss: 1.0401502847671509,grad_norm: 0.9999996777330723, iteration: 112365
loss: 1.1286333799362183,grad_norm: 0.9999996330206234, iteration: 112366
loss: 0.9904764294624329,grad_norm: 0.9999996315325083, iteration: 112367
loss: 1.0119941234588623,grad_norm: 0.9776656823091624, iteration: 112368
loss: 1.0963108539581299,grad_norm: 0.9999999333571161, iteration: 112369
loss: 1.0851447582244873,grad_norm: 0.902523773779165, iteration: 112370
loss: 1.0568552017211914,grad_norm: 0.9999994485785475, iteration: 112371
loss: 0.9966008067131042,grad_norm: 0.9999995398603517, iteration: 112372
loss: 1.071755290031433,grad_norm: 0.9999993941612685, iteration: 112373
loss: 1.0039457082748413,grad_norm: 0.9999995478424131, iteration: 112374
loss: 1.0166393518447876,grad_norm: 0.9999992099319976, iteration: 112375
loss: 0.9989464282989502,grad_norm: 0.9999996808504155, iteration: 112376
loss: 1.0141295194625854,grad_norm: 0.9746442502701689, iteration: 112377
loss: 1.0294095277786255,grad_norm: 0.8182770818303651, iteration: 112378
loss: 1.0837312936782837,grad_norm: 0.9999998565564838, iteration: 112379
loss: 1.0359604358673096,grad_norm: 0.999999465099901, iteration: 112380
loss: 1.1214160919189453,grad_norm: 0.9999992862344974, iteration: 112381
loss: 1.1098854541778564,grad_norm: 0.9999999949009571, iteration: 112382
loss: 1.1520805358886719,grad_norm: 0.9999998477861572, iteration: 112383
loss: 1.051043152809143,grad_norm: 0.9999992330530186, iteration: 112384
loss: 1.1552422046661377,grad_norm: 0.9999998019369003, iteration: 112385
loss: 1.0468652248382568,grad_norm: 0.9999997238226346, iteration: 112386
loss: 1.0304253101348877,grad_norm: 0.9999993043491308, iteration: 112387
loss: 1.037295937538147,grad_norm: 0.9999991859867844, iteration: 112388
loss: 1.06661057472229,grad_norm: 0.9999993312569996, iteration: 112389
loss: 1.2492948770523071,grad_norm: 0.999999791647501, iteration: 112390
loss: 1.041650414466858,grad_norm: 0.9474478099335598, iteration: 112391
loss: 1.0611568689346313,grad_norm: 0.9999991756664109, iteration: 112392
loss: 1.203813910484314,grad_norm: 0.9999998146093505, iteration: 112393
loss: 1.2084680795669556,grad_norm: 0.9999993626831515, iteration: 112394
loss: 1.1409615278244019,grad_norm: 0.999999736411854, iteration: 112395
loss: 1.2678775787353516,grad_norm: 1.0000000209411346, iteration: 112396
loss: 1.09543776512146,grad_norm: 0.9999998844593806, iteration: 112397
loss: 1.0415780544281006,grad_norm: 0.9999994866639399, iteration: 112398
loss: 1.0214638710021973,grad_norm: 0.999999870858416, iteration: 112399
loss: 1.0408942699432373,grad_norm: 0.9220421352807674, iteration: 112400
loss: 1.1502201557159424,grad_norm: 0.9999999479971726, iteration: 112401
loss: 1.0548856258392334,grad_norm: 0.999999144591086, iteration: 112402
loss: 1.0511184930801392,grad_norm: 0.9901430046581456, iteration: 112403
loss: 1.0450373888015747,grad_norm: 0.999999563860964, iteration: 112404
loss: 1.0628759860992432,grad_norm: 0.9999998302686771, iteration: 112405
loss: 1.01180899143219,grad_norm: 0.9999991337180723, iteration: 112406
loss: 1.015379786491394,grad_norm: 0.9999999228674923, iteration: 112407
loss: 1.096422553062439,grad_norm: 0.9999994355317132, iteration: 112408
loss: 0.9945885539054871,grad_norm: 0.7650700035208139, iteration: 112409
loss: 1.0360928773880005,grad_norm: 0.9999991176033862, iteration: 112410
loss: 1.0747336149215698,grad_norm: 0.9999991802217926, iteration: 112411
loss: 1.0455236434936523,grad_norm: 0.9999994135467017, iteration: 112412
loss: 1.0550785064697266,grad_norm: 0.9999992768496448, iteration: 112413
loss: 1.059713363647461,grad_norm: 0.9999999469594383, iteration: 112414
loss: 1.0511637926101685,grad_norm: 0.8464012009871602, iteration: 112415
loss: 0.9988132119178772,grad_norm: 0.9999991479382099, iteration: 112416
loss: 1.0700093507766724,grad_norm: 0.999999082916808, iteration: 112417
loss: 1.0639485120773315,grad_norm: 0.9999992301815682, iteration: 112418
loss: 1.0101758241653442,grad_norm: 0.9999991328498882, iteration: 112419
loss: 1.0993295907974243,grad_norm: 0.9999997023670769, iteration: 112420
loss: 1.0120782852172852,grad_norm: 0.8269998700017221, iteration: 112421
loss: 1.059255838394165,grad_norm: 0.9958643498105599, iteration: 112422
loss: 0.9718763828277588,grad_norm: 0.7744822107012516, iteration: 112423
loss: 1.1027475595474243,grad_norm: 0.8509610771746527, iteration: 112424
loss: 0.9772774577140808,grad_norm: 0.9540184417070049, iteration: 112425
loss: 1.001644253730774,grad_norm: 0.9466381350258533, iteration: 112426
loss: 1.0694166421890259,grad_norm: 0.9651366345976042, iteration: 112427
loss: 1.003467321395874,grad_norm: 0.8060465526285556, iteration: 112428
loss: 1.1062114238739014,grad_norm: 0.9999996837319126, iteration: 112429
loss: 1.0161429643630981,grad_norm: 0.8305810980666958, iteration: 112430
loss: 1.0046355724334717,grad_norm: 0.9999990841862694, iteration: 112431
loss: 1.0249477624893188,grad_norm: 0.999999250086685, iteration: 112432
loss: 0.9961152076721191,grad_norm: 0.9999993943330734, iteration: 112433
loss: 1.042206883430481,grad_norm: 0.9999995889368479, iteration: 112434
loss: 0.996715247631073,grad_norm: 0.8835020119091022, iteration: 112435
loss: 1.0035234689712524,grad_norm: 0.9841858563437341, iteration: 112436
loss: 1.0145397186279297,grad_norm: 0.9999991214541122, iteration: 112437
loss: 1.0962011814117432,grad_norm: 0.9999993686304036, iteration: 112438
loss: 1.0473606586456299,grad_norm: 0.9999992113869048, iteration: 112439
loss: 1.0167694091796875,grad_norm: 0.9999991736919278, iteration: 112440
loss: 1.0113059282302856,grad_norm: 0.9863168159169967, iteration: 112441
loss: 1.12063729763031,grad_norm: 0.9999995815888628, iteration: 112442
loss: 1.0023125410079956,grad_norm: 0.7913667261819974, iteration: 112443
loss: 1.04572594165802,grad_norm: 0.9036967569177531, iteration: 112444
loss: 1.0161432027816772,grad_norm: 0.9999991953211468, iteration: 112445
loss: 0.9997991323471069,grad_norm: 0.7890938252400748, iteration: 112446
loss: 0.9767779111862183,grad_norm: 0.8686208834128606, iteration: 112447
loss: 0.9797491431236267,grad_norm: 0.842685886472187, iteration: 112448
loss: 1.1390783786773682,grad_norm: 0.9999999710703861, iteration: 112449
loss: 1.0622613430023193,grad_norm: 0.8858591195289581, iteration: 112450
loss: 1.0397361516952515,grad_norm: 0.9999998355480159, iteration: 112451
loss: 1.0748355388641357,grad_norm: 0.9999993037467557, iteration: 112452
loss: 1.0006182193756104,grad_norm: 0.9224090803835651, iteration: 112453
loss: 1.021705150604248,grad_norm: 0.9519141231120152, iteration: 112454
loss: 0.9792585968971252,grad_norm: 0.9999989741966159, iteration: 112455
loss: 1.0459922552108765,grad_norm: 0.783403087973296, iteration: 112456
loss: 0.9960380792617798,grad_norm: 0.999999405777241, iteration: 112457
loss: 1.0058348178863525,grad_norm: 0.9999990569510415, iteration: 112458
loss: 1.0053414106369019,grad_norm: 0.9999991367611739, iteration: 112459
loss: 1.094841480255127,grad_norm: 0.9999989971012068, iteration: 112460
loss: 1.0280643701553345,grad_norm: 0.892330252123354, iteration: 112461
loss: 0.9972212314605713,grad_norm: 0.999999059992574, iteration: 112462
loss: 1.0443509817123413,grad_norm: 0.9021438117390151, iteration: 112463
loss: 0.953491747379303,grad_norm: 0.9323796160119718, iteration: 112464
loss: 1.0683294534683228,grad_norm: 0.9882779317653284, iteration: 112465
loss: 1.0257617235183716,grad_norm: 0.8449107494807101, iteration: 112466
loss: 0.9937497973442078,grad_norm: 0.9999991297614395, iteration: 112467
loss: 1.0536236763000488,grad_norm: 0.9999990955079463, iteration: 112468
loss: 1.0367172956466675,grad_norm: 0.8332942961727106, iteration: 112469
loss: 1.01957106590271,grad_norm: 0.9366106401277058, iteration: 112470
loss: 0.9552997946739197,grad_norm: 0.8705531887610293, iteration: 112471
loss: 1.0014299154281616,grad_norm: 0.7819069050841565, iteration: 112472
loss: 1.016196846961975,grad_norm: 0.9999999208753858, iteration: 112473
loss: 0.9860609173774719,grad_norm: 0.9660566650162801, iteration: 112474
loss: 1.0326799154281616,grad_norm: 0.9999997697035228, iteration: 112475
loss: 0.9906349778175354,grad_norm: 0.9999998042123358, iteration: 112476
loss: 1.060219645500183,grad_norm: 0.9999994198499301, iteration: 112477
loss: 1.074055552482605,grad_norm: 0.9999991309276971, iteration: 112478
loss: 1.0620392560958862,grad_norm: 0.9999990838373605, iteration: 112479
loss: 1.0388753414154053,grad_norm: 0.9999998171160488, iteration: 112480
loss: 1.1009100675582886,grad_norm: 0.9999995419263116, iteration: 112481
loss: 0.9963793158531189,grad_norm: 0.9999998251577217, iteration: 112482
loss: 1.0585349798202515,grad_norm: 0.9999994331480087, iteration: 112483
loss: 1.0252795219421387,grad_norm: 0.9335261427741197, iteration: 112484
loss: 1.0070786476135254,grad_norm: 0.9058961577556585, iteration: 112485
loss: 1.028559684753418,grad_norm: 0.8616349131053359, iteration: 112486
loss: 1.1013760566711426,grad_norm: 0.9999996047772335, iteration: 112487
loss: 1.0564460754394531,grad_norm: 0.9999992507822881, iteration: 112488
loss: 1.1309444904327393,grad_norm: 0.9999994710244919, iteration: 112489
loss: 1.0146260261535645,grad_norm: 0.9274758166178171, iteration: 112490
loss: 0.9946198463439941,grad_norm: 0.9999992656587763, iteration: 112491
loss: 1.0470799207687378,grad_norm: 0.9999998073644545, iteration: 112492
loss: 1.1683167219161987,grad_norm: 0.9999999229211035, iteration: 112493
loss: 1.0151268243789673,grad_norm: 0.9532918572652621, iteration: 112494
loss: 1.0210310220718384,grad_norm: 0.9666651780209743, iteration: 112495
loss: 1.0775562524795532,grad_norm: 0.9999997869437731, iteration: 112496
loss: 1.031827449798584,grad_norm: 0.9999994083492285, iteration: 112497
loss: 1.0619549751281738,grad_norm: 0.9999992587341426, iteration: 112498
loss: 1.050096869468689,grad_norm: 0.9999992108821719, iteration: 112499
loss: 0.9777653813362122,grad_norm: 0.887590716541186, iteration: 112500
loss: 1.003781795501709,grad_norm: 0.999999157232132, iteration: 112501
loss: 1.03946852684021,grad_norm: 0.9999991897875595, iteration: 112502
loss: 1.0139708518981934,grad_norm: 0.7402770186056512, iteration: 112503
loss: 1.0397417545318604,grad_norm: 0.999999235850329, iteration: 112504
loss: 1.0389187335968018,grad_norm: 0.8727964362832787, iteration: 112505
loss: 0.9983993172645569,grad_norm: 0.9999993111081256, iteration: 112506
loss: 1.0225257873535156,grad_norm: 0.9999992539197148, iteration: 112507
loss: 1.038159966468811,grad_norm: 0.7859386213113245, iteration: 112508
loss: 1.055770993232727,grad_norm: 0.9781348675762649, iteration: 112509
loss: 1.0366442203521729,grad_norm: 0.9999998047359547, iteration: 112510
loss: 1.025004506111145,grad_norm: 0.9999992721286451, iteration: 112511
loss: 0.9912976622581482,grad_norm: 0.8478806824632814, iteration: 112512
loss: 1.0146762132644653,grad_norm: 0.9999995185308879, iteration: 112513
loss: 1.0521539449691772,grad_norm: 0.9842378242558782, iteration: 112514
loss: 0.9542809128761292,grad_norm: 0.9817635647637843, iteration: 112515
loss: 1.0661711692810059,grad_norm: 0.9999990531192966, iteration: 112516
loss: 1.019089698791504,grad_norm: 1.0000000587279942, iteration: 112517
loss: 1.0318490266799927,grad_norm: 0.768807257184761, iteration: 112518
loss: 0.9921455979347229,grad_norm: 0.9275477600170002, iteration: 112519
loss: 1.0193387269973755,grad_norm: 0.9784538546802678, iteration: 112520
loss: 1.0424646139144897,grad_norm: 0.999999788403791, iteration: 112521
loss: 1.027998924255371,grad_norm: 0.9157700379750823, iteration: 112522
loss: 1.031241536140442,grad_norm: 0.9999991182244935, iteration: 112523
loss: 1.0910390615463257,grad_norm: 1.0000000407551048, iteration: 112524
loss: 1.0328679084777832,grad_norm: 0.9999991566116365, iteration: 112525
loss: 1.0353960990905762,grad_norm: 0.9999998885954556, iteration: 112526
loss: 1.0302855968475342,grad_norm: 0.999999446458646, iteration: 112527
loss: 1.0005801916122437,grad_norm: 0.8373600541758925, iteration: 112528
loss: 1.030106544494629,grad_norm: 0.999999067109813, iteration: 112529
loss: 0.9962797164916992,grad_norm: 0.9234786335607682, iteration: 112530
loss: 0.9855655431747437,grad_norm: 0.9292307429438874, iteration: 112531
loss: 1.0058870315551758,grad_norm: 0.9443929143941254, iteration: 112532
loss: 1.0000048875808716,grad_norm: 0.9999992321082348, iteration: 112533
loss: 1.259855031967163,grad_norm: 0.9999998451114521, iteration: 112534
loss: 1.0950957536697388,grad_norm: 0.9999997089146634, iteration: 112535
loss: 1.0108786821365356,grad_norm: 0.9999990975238233, iteration: 112536
loss: 1.0316853523254395,grad_norm: 0.9999993093958085, iteration: 112537
loss: 1.0583640336990356,grad_norm: 0.9999994668430537, iteration: 112538
loss: 0.9938789010047913,grad_norm: 0.8428323025153163, iteration: 112539
loss: 1.0286235809326172,grad_norm: 0.9131562036496365, iteration: 112540
loss: 1.0463544130325317,grad_norm: 0.9999995844701096, iteration: 112541
loss: 1.0564042329788208,grad_norm: 0.9517934798656468, iteration: 112542
loss: 0.9976652264595032,grad_norm: 0.8732823337290352, iteration: 112543
loss: 1.0344014167785645,grad_norm: 0.9999996063473653, iteration: 112544
loss: 1.0282808542251587,grad_norm: 0.8780366112972507, iteration: 112545
loss: 1.0262686014175415,grad_norm: 0.9999991367004358, iteration: 112546
loss: 1.0339183807373047,grad_norm: 0.9999993608808819, iteration: 112547
loss: 1.072371482849121,grad_norm: 0.9999990838037687, iteration: 112548
loss: 1.0128798484802246,grad_norm: 0.8612164769797519, iteration: 112549
loss: 0.9769518375396729,grad_norm: 0.9999989474138671, iteration: 112550
loss: 1.0010377168655396,grad_norm: 0.9999996590396978, iteration: 112551
loss: 1.0231443643569946,grad_norm: 0.8598241757328839, iteration: 112552
loss: 1.0099525451660156,grad_norm: 0.999999109094391, iteration: 112553
loss: 1.0025451183319092,grad_norm: 0.8424009822191774, iteration: 112554
loss: 1.0038641691207886,grad_norm: 0.999999511300967, iteration: 112555
loss: 0.9620562791824341,grad_norm: 0.9427958708182961, iteration: 112556
loss: 0.9672611355781555,grad_norm: 0.9999996550405231, iteration: 112557
loss: 0.9970165491104126,grad_norm: 0.999999412745988, iteration: 112558
loss: 1.261543869972229,grad_norm: 0.9999998338035367, iteration: 112559
loss: 1.207558512687683,grad_norm: 0.9999996097650361, iteration: 112560
loss: 1.0133126974105835,grad_norm: 0.9999990422928132, iteration: 112561
loss: 0.9674456119537354,grad_norm: 0.841048737313399, iteration: 112562
loss: 1.0368812084197998,grad_norm: 0.8922700345363719, iteration: 112563
loss: 1.019481897354126,grad_norm: 0.9999999257566718, iteration: 112564
loss: 0.9904322624206543,grad_norm: 0.9999995063678296, iteration: 112565
loss: 0.9835458397865295,grad_norm: 0.9999996937920336, iteration: 112566
loss: 0.9894909262657166,grad_norm: 0.9999991842872776, iteration: 112567
loss: 1.1904035806655884,grad_norm: 0.9999994627729183, iteration: 112568
loss: 0.9899892807006836,grad_norm: 0.9999994745052592, iteration: 112569
loss: 1.0356192588806152,grad_norm: 0.9999998220405052, iteration: 112570
loss: 1.0200355052947998,grad_norm: 0.9999990524418438, iteration: 112571
loss: 0.9992628693580627,grad_norm: 0.9086214594258869, iteration: 112572
loss: 0.972114086151123,grad_norm: 0.9999990419600335, iteration: 112573
loss: 1.036253571510315,grad_norm: 0.999999630838687, iteration: 112574
loss: 1.0556613206863403,grad_norm: 0.99999964599645, iteration: 112575
loss: 1.030348777770996,grad_norm: 0.9999991459918411, iteration: 112576
loss: 1.0903679132461548,grad_norm: 0.999999771423019, iteration: 112577
loss: 1.039231538772583,grad_norm: 0.9999996183732448, iteration: 112578
loss: 1.029564619064331,grad_norm: 0.9999993062933328, iteration: 112579
loss: 0.9891581535339355,grad_norm: 0.9999991137420587, iteration: 112580
loss: 1.0197696685791016,grad_norm: 0.8384947333964273, iteration: 112581
loss: 1.020975112915039,grad_norm: 0.9294383890473175, iteration: 112582
loss: 0.9811150431632996,grad_norm: 0.9999991614226353, iteration: 112583
loss: 0.993137538433075,grad_norm: 0.8847992116724922, iteration: 112584
loss: 0.9844492673873901,grad_norm: 0.9999991211902728, iteration: 112585
loss: 1.1274422407150269,grad_norm: 0.9999994798536936, iteration: 112586
loss: 1.078510046005249,grad_norm: 0.9999998849030729, iteration: 112587
loss: 1.0453839302062988,grad_norm: 0.9999992005629769, iteration: 112588
loss: 1.0455878973007202,grad_norm: 0.9933607392459255, iteration: 112589
loss: 1.0448285341262817,grad_norm: 0.9999991257461945, iteration: 112590
loss: 1.0361307859420776,grad_norm: 0.9999991469905485, iteration: 112591
loss: 1.0723671913146973,grad_norm: 0.9999992907202174, iteration: 112592
loss: 1.043601393699646,grad_norm: 0.9999992048874037, iteration: 112593
loss: 1.0178954601287842,grad_norm: 0.9014223673944325, iteration: 112594
loss: 1.0298779010772705,grad_norm: 0.999999111787906, iteration: 112595
loss: 1.0212960243225098,grad_norm: 0.9999990710982217, iteration: 112596
loss: 0.9764677882194519,grad_norm: 0.8846337300063094, iteration: 112597
loss: 0.9890987873077393,grad_norm: 0.8055222430701446, iteration: 112598
loss: 1.0193853378295898,grad_norm: 0.9999995939863163, iteration: 112599
loss: 1.0001310110092163,grad_norm: 0.8565854381556756, iteration: 112600
loss: 1.0288468599319458,grad_norm: 0.9999998196275244, iteration: 112601
loss: 1.0225234031677246,grad_norm: 0.8213757157887984, iteration: 112602
loss: 1.1035535335540771,grad_norm: 0.9999995017885079, iteration: 112603
loss: 1.045462727546692,grad_norm: 0.9999994328627879, iteration: 112604
loss: 1.1369613409042358,grad_norm: 0.9999995995604597, iteration: 112605
loss: 1.0433540344238281,grad_norm: 0.9999992769756179, iteration: 112606
loss: 1.0199533700942993,grad_norm: 0.9999996392807821, iteration: 112607
loss: 1.003582239151001,grad_norm: 0.9560131828927707, iteration: 112608
loss: 1.0737766027450562,grad_norm: 0.9999991427599136, iteration: 112609
loss: 1.0641382932662964,grad_norm: 0.999999670364085, iteration: 112610
loss: 0.9801567792892456,grad_norm: 0.9999990594983715, iteration: 112611
loss: 1.101430058479309,grad_norm: 0.9999990731228459, iteration: 112612
loss: 0.9993482828140259,grad_norm: 0.9999990869970495, iteration: 112613
loss: 1.0408374071121216,grad_norm: 0.9999993652426039, iteration: 112614
loss: 1.1114320755004883,grad_norm: 0.9999991188035308, iteration: 112615
loss: 1.0782535076141357,grad_norm: 0.9999991670049759, iteration: 112616
loss: 1.0618001222610474,grad_norm: 0.999999092372324, iteration: 112617
loss: 1.0362080335617065,grad_norm: 0.9999990739481268, iteration: 112618
loss: 0.9749832153320312,grad_norm: 0.9999994591695213, iteration: 112619
loss: 1.060333490371704,grad_norm: 0.9999999210055959, iteration: 112620
loss: 1.0035287141799927,grad_norm: 0.89089518081431, iteration: 112621
loss: 0.9906373620033264,grad_norm: 0.887865085691532, iteration: 112622
loss: 1.233139991760254,grad_norm: 1.0000000198810206, iteration: 112623
loss: 0.9907466173171997,grad_norm: 0.8442040869893649, iteration: 112624
loss: 0.970374584197998,grad_norm: 0.9999993838143709, iteration: 112625
loss: 1.0030750036239624,grad_norm: 0.9999995179462935, iteration: 112626
loss: 1.000190258026123,grad_norm: 0.9253905021294506, iteration: 112627
loss: 1.0001286268234253,grad_norm: 0.9805691377727613, iteration: 112628
loss: 1.0982325077056885,grad_norm: 0.9999994564655896, iteration: 112629
loss: 1.2193279266357422,grad_norm: 0.9999996060401357, iteration: 112630
loss: 1.0522429943084717,grad_norm: 0.9999998341526489, iteration: 112631
loss: 1.0317785739898682,grad_norm: 0.999999227969058, iteration: 112632
loss: 1.0060564279556274,grad_norm: 0.9999994546831641, iteration: 112633
loss: 1.0054126977920532,grad_norm: 0.9716185380893486, iteration: 112634
loss: 1.0652737617492676,grad_norm: 0.9999997618856603, iteration: 112635
loss: 1.0726686716079712,grad_norm: 0.9999992831451682, iteration: 112636
loss: 0.9699586629867554,grad_norm: 0.9999991729975678, iteration: 112637
loss: 1.0093709230422974,grad_norm: 0.9999993305029744, iteration: 112638
loss: 1.063477635383606,grad_norm: 0.9999991275553314, iteration: 112639
loss: 1.0192914009094238,grad_norm: 0.9681642691903328, iteration: 112640
loss: 1.0349982976913452,grad_norm: 0.999999475442374, iteration: 112641
loss: 1.0038986206054688,grad_norm: 0.9380394773139052, iteration: 112642
loss: 1.1243263483047485,grad_norm: 1.000000007552259, iteration: 112643
loss: 1.142035722732544,grad_norm: 0.9999994719341804, iteration: 112644
loss: 0.990178644657135,grad_norm: 0.9115603940823448, iteration: 112645
loss: 1.0349881649017334,grad_norm: 0.9999995961368928, iteration: 112646
loss: 1.0111159086227417,grad_norm: 0.999999182324334, iteration: 112647
loss: 1.027334213256836,grad_norm: 0.999999123230115, iteration: 112648
loss: 1.0461266040802002,grad_norm: 0.9999999507484453, iteration: 112649
loss: 1.0303089618682861,grad_norm: 0.9376677971288339, iteration: 112650
loss: 0.983882486820221,grad_norm: 0.8837875794825315, iteration: 112651
loss: 1.116782546043396,grad_norm: 0.9999999194793314, iteration: 112652
loss: 1.0198631286621094,grad_norm: 0.7352929574896294, iteration: 112653
loss: 1.0814026594161987,grad_norm: 0.9999994562751817, iteration: 112654
loss: 1.0780627727508545,grad_norm: 0.9999999673259212, iteration: 112655
loss: 1.1544946432113647,grad_norm: 0.9999991688206511, iteration: 112656
loss: 1.0529764890670776,grad_norm: 0.9999998312400952, iteration: 112657
loss: 1.1221174001693726,grad_norm: 0.9999999911983815, iteration: 112658
loss: 1.1147968769073486,grad_norm: 0.9999998849733969, iteration: 112659
loss: 1.037992000579834,grad_norm: 0.9999999293094987, iteration: 112660
loss: 1.1259478330612183,grad_norm: 0.9999997206639548, iteration: 112661
loss: 1.0299659967422485,grad_norm: 0.9999998399299985, iteration: 112662
loss: 1.036117672920227,grad_norm: 0.9999998663732759, iteration: 112663
loss: 1.1011539697647095,grad_norm: 0.9999998463012524, iteration: 112664
loss: 1.0271586179733276,grad_norm: 0.9999997647257179, iteration: 112665
loss: 1.0048686265945435,grad_norm: 0.8563787046467155, iteration: 112666
loss: 1.0177791118621826,grad_norm: 0.9999991281178908, iteration: 112667
loss: 1.0346161127090454,grad_norm: 0.9999999404288767, iteration: 112668
loss: 1.002892255783081,grad_norm: 0.9999995334171237, iteration: 112669
loss: 1.012318730354309,grad_norm: 0.9999991217809756, iteration: 112670
loss: 1.0953274965286255,grad_norm: 0.9999998112069763, iteration: 112671
loss: 0.9946708679199219,grad_norm: 0.9293834693410766, iteration: 112672
loss: 1.0204635858535767,grad_norm: 0.9999991031279302, iteration: 112673
loss: 1.0732848644256592,grad_norm: 0.9999996351149815, iteration: 112674
loss: 1.0548309087753296,grad_norm: 0.9999991889295596, iteration: 112675
loss: 1.1181895732879639,grad_norm: 0.9999999833674202, iteration: 112676
loss: 0.9758419990539551,grad_norm: 0.9737786327024025, iteration: 112677
loss: 0.9772204756736755,grad_norm: 0.8994384953505857, iteration: 112678
loss: 1.042852520942688,grad_norm: 0.999999228095106, iteration: 112679
loss: 1.0523639917373657,grad_norm: 0.9999993037948945, iteration: 112680
loss: 1.1040451526641846,grad_norm: 0.9999999572352017, iteration: 112681
loss: 1.1016013622283936,grad_norm: 0.9999998022734333, iteration: 112682
loss: 1.1606347560882568,grad_norm: 0.9999998219803664, iteration: 112683
loss: 0.9948116540908813,grad_norm: 0.9999993382119129, iteration: 112684
loss: 1.0280274152755737,grad_norm: 0.9567685659288354, iteration: 112685
loss: 1.0751895904541016,grad_norm: 0.9999995580047562, iteration: 112686
loss: 0.9923380613327026,grad_norm: 0.9273325228871673, iteration: 112687
loss: 1.1626877784729004,grad_norm: 0.9999997718948301, iteration: 112688
loss: 1.0473631620407104,grad_norm: 0.9999990785881774, iteration: 112689
loss: 0.9712761044502258,grad_norm: 0.8810174789637767, iteration: 112690
loss: 1.0109930038452148,grad_norm: 0.9438345483497798, iteration: 112691
loss: 1.1304219961166382,grad_norm: 0.9999995403936263, iteration: 112692
loss: 1.0390383005142212,grad_norm: 0.9532580918425357, iteration: 112693
loss: 1.0564521551132202,grad_norm: 0.9999997752296627, iteration: 112694
loss: 1.0152941942214966,grad_norm: 0.9999996804425347, iteration: 112695
loss: 1.0943973064422607,grad_norm: 0.9999997203385427, iteration: 112696
loss: 1.0327805280685425,grad_norm: 0.9999997673610552, iteration: 112697
loss: 1.1102945804595947,grad_norm: 0.9999995591281259, iteration: 112698
loss: 1.0180555582046509,grad_norm: 0.9390954896275898, iteration: 112699
loss: 1.0436809062957764,grad_norm: 0.9999998419489017, iteration: 112700
loss: 1.015740990638733,grad_norm: 0.9451264071589366, iteration: 112701
loss: 1.0954526662826538,grad_norm: 0.999999355540925, iteration: 112702
loss: 1.0228866338729858,grad_norm: 0.9999994988599196, iteration: 112703
loss: 1.0813590288162231,grad_norm: 0.9999990324415315, iteration: 112704
loss: 1.0921813249588013,grad_norm: 0.9999994134052143, iteration: 112705
loss: 1.0508748292922974,grad_norm: 0.9999994372446742, iteration: 112706
loss: 1.022903561592102,grad_norm: 0.999999160696599, iteration: 112707
loss: 1.05087149143219,grad_norm: 0.934008447113453, iteration: 112708
loss: 1.0267504453659058,grad_norm: 0.9999997214404166, iteration: 112709
loss: 1.0213992595672607,grad_norm: 0.9999991179720051, iteration: 112710
loss: 1.1928540468215942,grad_norm: 0.9999999450730688, iteration: 112711
loss: 1.0603551864624023,grad_norm: 0.9999999678400743, iteration: 112712
loss: 1.020116925239563,grad_norm: 0.9999991965977582, iteration: 112713
loss: 1.0672235488891602,grad_norm: 0.9999999157120371, iteration: 112714
loss: 1.1655082702636719,grad_norm: 0.9999996404744047, iteration: 112715
loss: 1.1310069561004639,grad_norm: 0.9999996863052119, iteration: 112716
loss: 1.0505722761154175,grad_norm: 0.9219820831748345, iteration: 112717
loss: 1.110307216644287,grad_norm: 0.999999945809856, iteration: 112718
loss: 1.1893243789672852,grad_norm: 0.9999990998249402, iteration: 112719
loss: 1.1200486421585083,grad_norm: 0.9999999835863811, iteration: 112720
loss: 0.9939092397689819,grad_norm: 0.8833209745374341, iteration: 112721
loss: 1.0634846687316895,grad_norm: 0.9424171641260377, iteration: 112722
loss: 1.0879805088043213,grad_norm: 0.999999457414415, iteration: 112723
loss: 1.0556538105010986,grad_norm: 0.9999994186161275, iteration: 112724
loss: 1.0309710502624512,grad_norm: 0.9999997327783193, iteration: 112725
loss: 1.0342750549316406,grad_norm: 0.9999996624523984, iteration: 112726
loss: 1.1437077522277832,grad_norm: 0.9999996508872172, iteration: 112727
loss: 1.0739728212356567,grad_norm: 0.9999996104470635, iteration: 112728
loss: 1.0318220853805542,grad_norm: 0.9999996238510025, iteration: 112729
loss: 1.1036485433578491,grad_norm: 0.9999995881666607, iteration: 112730
loss: 1.0001492500305176,grad_norm: 0.8652680891167599, iteration: 112731
loss: 1.0370371341705322,grad_norm: 0.9999998377338736, iteration: 112732
loss: 1.0452535152435303,grad_norm: 0.9999989965502032, iteration: 112733
loss: 1.067647099494934,grad_norm: 0.9999991220655723, iteration: 112734
loss: 1.0781930685043335,grad_norm: 0.9999992934128065, iteration: 112735
loss: 1.1465604305267334,grad_norm: 0.9999994755196414, iteration: 112736
loss: 1.0765541791915894,grad_norm: 0.9999996192386502, iteration: 112737
loss: 1.0374935865402222,grad_norm: 0.9999996141855519, iteration: 112738
loss: 1.0510272979736328,grad_norm: 0.9999997993917381, iteration: 112739
loss: 1.0102901458740234,grad_norm: 0.9185083801824334, iteration: 112740
loss: 0.9987890720367432,grad_norm: 0.9999992107690142, iteration: 112741
loss: 1.1075706481933594,grad_norm: 0.9999999764470247, iteration: 112742
loss: 1.0002201795578003,grad_norm: 0.9999991030325475, iteration: 112743
loss: 0.9964948892593384,grad_norm: 0.9999989994101383, iteration: 112744
loss: 1.060963749885559,grad_norm: 0.9999999517316539, iteration: 112745
loss: 1.0903522968292236,grad_norm: 0.9999991534721747, iteration: 112746
loss: 1.0585370063781738,grad_norm: 0.9999997422744261, iteration: 112747
loss: 1.0079635381698608,grad_norm: 0.8802030307988813, iteration: 112748
loss: 1.0848007202148438,grad_norm: 0.9999999401208784, iteration: 112749
loss: 1.039149522781372,grad_norm: 0.9999996771460155, iteration: 112750
loss: 1.2758954763412476,grad_norm: 0.999999231424945, iteration: 112751
loss: 1.048600196838379,grad_norm: 0.9999998166237207, iteration: 112752
loss: 1.1645339727401733,grad_norm: 0.9999998758673346, iteration: 112753
loss: 1.0176900625228882,grad_norm: 0.9999996662304551, iteration: 112754
loss: 1.0269439220428467,grad_norm: 0.9999992961020382, iteration: 112755
loss: 1.1489201784133911,grad_norm: 0.9999998698802486, iteration: 112756
loss: 1.0264801979064941,grad_norm: 0.9999991997792046, iteration: 112757
loss: 1.170976996421814,grad_norm: 0.9999997524226497, iteration: 112758
loss: 1.0529698133468628,grad_norm: 0.9999995232169112, iteration: 112759
loss: 0.9720824360847473,grad_norm: 0.909171016428127, iteration: 112760
loss: 1.0534472465515137,grad_norm: 0.9999997875725221, iteration: 112761
loss: 1.0676864385604858,grad_norm: 0.9999996158704593, iteration: 112762
loss: 1.2388875484466553,grad_norm: 0.9999999155201965, iteration: 112763
loss: 1.1573412418365479,grad_norm: 0.9999997837069525, iteration: 112764
loss: 1.1085968017578125,grad_norm: 0.9999998419062515, iteration: 112765
loss: 1.0264321565628052,grad_norm: 0.9999990742996132, iteration: 112766
loss: 1.1662911176681519,grad_norm: 0.9999998322661172, iteration: 112767
loss: 1.0835926532745361,grad_norm: 0.9244546260298576, iteration: 112768
loss: 1.0104721784591675,grad_norm: 0.8549505037388998, iteration: 112769
loss: 1.037007451057434,grad_norm: 0.9999992937727162, iteration: 112770
loss: 1.0745658874511719,grad_norm: 0.9999998827148237, iteration: 112771
loss: 1.0961161851882935,grad_norm: 0.9999991296113734, iteration: 112772
loss: 1.0847866535186768,grad_norm: 0.9999996795220855, iteration: 112773
loss: 1.1242321729660034,grad_norm: 0.99999936576292, iteration: 112774
loss: 1.050781011581421,grad_norm: 0.9999996174513502, iteration: 112775
loss: 1.2741338014602661,grad_norm: 0.9999996367208778, iteration: 112776
loss: 1.084973931312561,grad_norm: 0.9999998592090065, iteration: 112777
loss: 1.2973567247390747,grad_norm: 0.9999995244246609, iteration: 112778
loss: 1.138251543045044,grad_norm: 0.9999999508557831, iteration: 112779
loss: 1.370760440826416,grad_norm: 0.9999999695566026, iteration: 112780
loss: 1.119958519935608,grad_norm: 0.9999999369087107, iteration: 112781
loss: 1.0404812097549438,grad_norm: 0.9999992694567125, iteration: 112782
loss: 0.9739225506782532,grad_norm: 0.9999991365368532, iteration: 112783
loss: 1.1043044328689575,grad_norm: 0.999999681711611, iteration: 112784
loss: 1.009721040725708,grad_norm: 0.8770330017942284, iteration: 112785
loss: 1.0230729579925537,grad_norm: 0.9999992516847788, iteration: 112786
loss: 1.0523039102554321,grad_norm: 0.9957806360478223, iteration: 112787
loss: 1.1310803890228271,grad_norm: 0.9999993492679612, iteration: 112788
loss: 1.0158048868179321,grad_norm: 0.9999994047576438, iteration: 112789
loss: 1.118751883506775,grad_norm: 0.9999997761472162, iteration: 112790
loss: 1.0339076519012451,grad_norm: 0.999999957683082, iteration: 112791
loss: 1.0570365190505981,grad_norm: 0.9999999473051108, iteration: 112792
loss: 1.0577009916305542,grad_norm: 0.9999991783152219, iteration: 112793
loss: 1.0328054428100586,grad_norm: 0.9999999394279243, iteration: 112794
loss: 1.0808740854263306,grad_norm: 0.9999998933734429, iteration: 112795
loss: 1.0730239152908325,grad_norm: 0.9999995480779146, iteration: 112796
loss: 1.2001125812530518,grad_norm: 0.9999992705313012, iteration: 112797
loss: 0.9671154618263245,grad_norm: 0.9999991678133187, iteration: 112798
loss: 1.037528157234192,grad_norm: 0.9999998845099476, iteration: 112799
loss: 1.0622233152389526,grad_norm: 0.9330830396724624, iteration: 112800
loss: 1.052238941192627,grad_norm: 0.9999993415932172, iteration: 112801
loss: 0.9662303924560547,grad_norm: 0.8576759714992283, iteration: 112802
loss: 0.9832219481468201,grad_norm: 0.9999999164378118, iteration: 112803
loss: 1.0746814012527466,grad_norm: 0.9999991949852781, iteration: 112804
loss: 0.9765162467956543,grad_norm: 0.9999998799491041, iteration: 112805
loss: 1.1517964601516724,grad_norm: 0.9999997454438816, iteration: 112806
loss: 1.0895639657974243,grad_norm: 0.9999998621273197, iteration: 112807
loss: 1.3753702640533447,grad_norm: 0.9999998771409941, iteration: 112808
loss: 1.1075379848480225,grad_norm: 0.9999995447288975, iteration: 112809
loss: 1.1056054830551147,grad_norm: 0.9999992195407504, iteration: 112810
loss: 1.131211519241333,grad_norm: 0.9999995661084874, iteration: 112811
loss: 1.0178042650222778,grad_norm: 0.9999998710817918, iteration: 112812
loss: 1.0032379627227783,grad_norm: 0.8921693146359474, iteration: 112813
loss: 1.0424416065216064,grad_norm: 0.999999626985971, iteration: 112814
loss: 1.1833237409591675,grad_norm: 0.9999997950419751, iteration: 112815
loss: 1.0854527950286865,grad_norm: 0.9923580147416133, iteration: 112816
loss: 1.2038757801055908,grad_norm: 0.9999999356614087, iteration: 112817
loss: 1.1272333860397339,grad_norm: 0.9999995149342468, iteration: 112818
loss: 1.1728719472885132,grad_norm: 0.9999997848946809, iteration: 112819
loss: 1.0681883096694946,grad_norm: 0.9999991735741498, iteration: 112820
loss: 1.281082034111023,grad_norm: 0.9999996376257339, iteration: 112821
loss: 1.215383529663086,grad_norm: 0.9999993561044763, iteration: 112822
loss: 1.0568255186080933,grad_norm: 0.9701122399942212, iteration: 112823
loss: 1.0887117385864258,grad_norm: 0.9999995573147196, iteration: 112824
loss: 1.0065020322799683,grad_norm: 0.8494332299215933, iteration: 112825
loss: 1.1022915840148926,grad_norm: 0.9999991317438589, iteration: 112826
loss: 1.4370492696762085,grad_norm: 0.9999995693803237, iteration: 112827
loss: 1.1191015243530273,grad_norm: 0.9999991600131459, iteration: 112828
loss: 1.1345843076705933,grad_norm: 0.9999995550149708, iteration: 112829
loss: 1.062620759010315,grad_norm: 0.9999998895453729, iteration: 112830
loss: 1.12015962600708,grad_norm: 0.9999991935300971, iteration: 112831
loss: 1.1127095222473145,grad_norm: 0.9999998200708874, iteration: 112832
loss: 1.3794910907745361,grad_norm: 0.9999999215727139, iteration: 112833
loss: 1.1948683261871338,grad_norm: 0.9999992084914089, iteration: 112834
loss: 1.091984510421753,grad_norm: 0.9999996528094425, iteration: 112835
loss: 1.100233793258667,grad_norm: 0.9999998123521944, iteration: 112836
loss: 1.55659818649292,grad_norm: 0.999999858102141, iteration: 112837
loss: 1.4543300867080688,grad_norm: 0.9999998199851033, iteration: 112838
loss: 1.291792869567871,grad_norm: 0.9999996128737991, iteration: 112839
loss: 1.0772528648376465,grad_norm: 0.999999353716741, iteration: 112840
loss: 1.1336615085601807,grad_norm: 0.9999991116472161, iteration: 112841
loss: 1.069940209388733,grad_norm: 0.9999996899785126, iteration: 112842
loss: 0.9985816478729248,grad_norm: 0.9999995961668515, iteration: 112843
loss: 1.2452870607376099,grad_norm: 0.9999997080492982, iteration: 112844
loss: 1.2630088329315186,grad_norm: 0.9999996991219344, iteration: 112845
loss: 1.088957667350769,grad_norm: 0.9999994351056504, iteration: 112846
loss: 1.0846149921417236,grad_norm: 0.9999997128037477, iteration: 112847
loss: 1.0411945581436157,grad_norm: 0.9999996640705824, iteration: 112848
loss: 1.1116394996643066,grad_norm: 0.9999992122096522, iteration: 112849
loss: 1.09184730052948,grad_norm: 0.9999992139425344, iteration: 112850
loss: 1.131433367729187,grad_norm: 0.9999992382880266, iteration: 112851
loss: 1.0407460927963257,grad_norm: 0.9999994352293469, iteration: 112852
loss: 1.124886155128479,grad_norm: 0.9999992784086105, iteration: 112853
loss: 1.1543805599212646,grad_norm: 0.9999996758182859, iteration: 112854
loss: 1.3959251642227173,grad_norm: 0.9999999202294604, iteration: 112855
loss: 1.1246275901794434,grad_norm: 0.9999990022944227, iteration: 112856
loss: 1.0811161994934082,grad_norm: 0.9849371678961957, iteration: 112857
loss: 1.1043996810913086,grad_norm: 0.9999995922773329, iteration: 112858
loss: 1.363241195678711,grad_norm: 0.9999999378798755, iteration: 112859
loss: 1.1230486631393433,grad_norm: 0.9999997992243885, iteration: 112860
loss: 1.2508878707885742,grad_norm: 0.9999994052692475, iteration: 112861
loss: 1.274094820022583,grad_norm: 0.999999725524984, iteration: 112862
loss: 1.339003086090088,grad_norm: 0.9999996318242069, iteration: 112863
loss: 1.0495924949645996,grad_norm: 0.9999993609332689, iteration: 112864
loss: 1.0232375860214233,grad_norm: 0.8431375816612624, iteration: 112865
loss: 1.116365909576416,grad_norm: 0.9999995284582764, iteration: 112866
loss: 1.3790282011032104,grad_norm: 0.9999994271499958, iteration: 112867
loss: 1.4035927057266235,grad_norm: 0.99999974707181, iteration: 112868
loss: 1.242249846458435,grad_norm: 0.9999999599478526, iteration: 112869
loss: 1.08289635181427,grad_norm: 0.9999998209867217, iteration: 112870
loss: 1.340777039527893,grad_norm: 0.9999997607673557, iteration: 112871
loss: 1.0522626638412476,grad_norm: 0.9999991934574809, iteration: 112872
loss: 1.3462728261947632,grad_norm: 0.9999999194757692, iteration: 112873
loss: 1.4226964712142944,grad_norm: 1.0000000169120384, iteration: 112874
loss: 1.0295991897583008,grad_norm: 0.9999993416511245, iteration: 112875
loss: 1.0546455383300781,grad_norm: 0.9999993965206433, iteration: 112876
loss: 1.1719492673873901,grad_norm: 0.9999996753306836, iteration: 112877
loss: 1.0159896612167358,grad_norm: 0.9999995026960619, iteration: 112878
loss: 1.307376503944397,grad_norm: 0.9999999033284533, iteration: 112879
loss: 1.4371663331985474,grad_norm: 0.9999998063745189, iteration: 112880
loss: 1.1708767414093018,grad_norm: 0.9999993304819874, iteration: 112881
loss: 1.0675108432769775,grad_norm: 0.9999995452214908, iteration: 112882
loss: 1.0139641761779785,grad_norm: 0.9999994096261597, iteration: 112883
loss: 1.4451414346694946,grad_norm: 0.9999998643684469, iteration: 112884
loss: 1.4251556396484375,grad_norm: 0.9999998408633304, iteration: 112885
loss: 1.3333594799041748,grad_norm: 0.9999999814135622, iteration: 112886
loss: 1.1000117063522339,grad_norm: 0.999999685919012, iteration: 112887
loss: 1.1565271615982056,grad_norm: 0.9999996560457625, iteration: 112888
loss: 1.0356149673461914,grad_norm: 0.9999990905559222, iteration: 112889
loss: 1.5927644968032837,grad_norm: 0.9999998423372823, iteration: 112890
loss: 1.4162712097167969,grad_norm: 1.0000000503182767, iteration: 112891
loss: 1.0938128232955933,grad_norm: 0.9999993831955485, iteration: 112892
loss: 1.2989439964294434,grad_norm: 0.9999997861663648, iteration: 112893
loss: 1.1373366117477417,grad_norm: 0.9999999837815646, iteration: 112894
loss: 1.0613282918930054,grad_norm: 0.9999997837153769, iteration: 112895
loss: 1.7376742362976074,grad_norm: 0.9999999569863605, iteration: 112896
loss: 1.0876644849777222,grad_norm: 0.9999996546515186, iteration: 112897
loss: 1.3766040802001953,grad_norm: 0.99999985172992, iteration: 112898
loss: 1.0168566703796387,grad_norm: 0.9999994766501363, iteration: 112899
loss: 1.7640413045883179,grad_norm: 0.9999998447577862, iteration: 112900
loss: 1.086173176765442,grad_norm: 0.9999995313248821, iteration: 112901
loss: 1.511536955833435,grad_norm: 1.0000000518987193, iteration: 112902
loss: 1.36051344871521,grad_norm: 1.0000000213130775, iteration: 112903
loss: 1.107627511024475,grad_norm: 0.9999993478485005, iteration: 112904
loss: 1.037724256515503,grad_norm: 0.9999999917389378, iteration: 112905
loss: 1.0620001554489136,grad_norm: 0.9999993873727929, iteration: 112906
loss: 1.031274676322937,grad_norm: 0.999999459843093, iteration: 112907
loss: 1.1292881965637207,grad_norm: 0.9999998199805554, iteration: 112908
loss: 1.2207194566726685,grad_norm: 0.9999993826219302, iteration: 112909
loss: 1.0196400880813599,grad_norm: 0.9999990570986194, iteration: 112910
loss: 1.4432692527770996,grad_norm: 0.9999995902342366, iteration: 112911
loss: 1.2450594902038574,grad_norm: 0.9999998234297488, iteration: 112912
loss: 1.2320791482925415,grad_norm: 0.9999998557979449, iteration: 112913
loss: 1.009323000907898,grad_norm: 0.8346197140756633, iteration: 112914
loss: 1.1333277225494385,grad_norm: 0.9999992365225375, iteration: 112915
loss: 1.192244529724121,grad_norm: 0.999999482149207, iteration: 112916
loss: 1.0983128547668457,grad_norm: 0.9999994942541504, iteration: 112917
loss: 0.9974902868270874,grad_norm: 0.9245300682120197, iteration: 112918
loss: 1.1786621809005737,grad_norm: 0.9999994378026251, iteration: 112919
loss: 1.0281484127044678,grad_norm: 0.98864396033369, iteration: 112920
loss: 1.2608556747436523,grad_norm: 0.9999997757357462, iteration: 112921
loss: 0.9549742937088013,grad_norm: 0.8723795601111636, iteration: 112922
loss: 1.17710280418396,grad_norm: 0.9999998605654801, iteration: 112923
loss: 1.1391268968582153,grad_norm: 0.9999994313486554, iteration: 112924
loss: 1.3785109519958496,grad_norm: 0.999999916610738, iteration: 112925
loss: 1.2030833959579468,grad_norm: 0.9999998621739892, iteration: 112926
loss: 1.184036374092102,grad_norm: 0.9999991708508387, iteration: 112927
loss: 1.0519592761993408,grad_norm: 0.999999245557448, iteration: 112928
loss: 1.163373351097107,grad_norm: 0.9999994860308989, iteration: 112929
loss: 1.1812357902526855,grad_norm: 0.9999997430155739, iteration: 112930
loss: 1.083875060081482,grad_norm: 0.9999996845920305, iteration: 112931
loss: 1.0322444438934326,grad_norm: 0.9999993219855281, iteration: 112932
loss: 1.2723991870880127,grad_norm: 0.9999997467401848, iteration: 112933
loss: 1.2863329648971558,grad_norm: 0.9999997315100276, iteration: 112934
loss: 1.1503448486328125,grad_norm: 0.9999994398545277, iteration: 112935
loss: 1.0819405317306519,grad_norm: 0.9999999204593949, iteration: 112936
loss: 1.215681791305542,grad_norm: 0.9999997762222281, iteration: 112937
loss: 1.1137206554412842,grad_norm: 0.9999995340380301, iteration: 112938
loss: 1.0945203304290771,grad_norm: 0.9999999225482359, iteration: 112939
loss: 1.2437852621078491,grad_norm: 0.9999997421487005, iteration: 112940
loss: 1.070067286491394,grad_norm: 0.9999997637817369, iteration: 112941
loss: 1.164867639541626,grad_norm: 0.999999606034743, iteration: 112942
loss: 1.0177985429763794,grad_norm: 0.999999224293309, iteration: 112943
loss: 1.0754725933074951,grad_norm: 0.9999993385312321, iteration: 112944
loss: 1.04948890209198,grad_norm: 0.9999990809717959, iteration: 112945
loss: 1.2252193689346313,grad_norm: 0.9999998502827261, iteration: 112946
loss: 1.1108660697937012,grad_norm: 0.9999999340018852, iteration: 112947
loss: 1.0629349946975708,grad_norm: 0.9999991880730889, iteration: 112948
loss: 1.2875556945800781,grad_norm: 0.999999795554732, iteration: 112949
loss: 1.1875226497650146,grad_norm: 0.999999667288284, iteration: 112950
loss: 1.1873530149459839,grad_norm: 0.9999999397355096, iteration: 112951
loss: 1.0065054893493652,grad_norm: 0.8867386695825185, iteration: 112952
loss: 1.020861268043518,grad_norm: 0.8169235188099895, iteration: 112953
loss: 1.0314874649047852,grad_norm: 0.9999990833686542, iteration: 112954
loss: 0.9976853132247925,grad_norm: 0.9999992450088411, iteration: 112955
loss: 1.0756806135177612,grad_norm: 0.999999383438213, iteration: 112956
loss: 1.018914818763733,grad_norm: 0.9999992115435608, iteration: 112957
loss: 1.0852562189102173,grad_norm: 0.9999997429741556, iteration: 112958
loss: 1.034447431564331,grad_norm: 0.8778784671850645, iteration: 112959
loss: 1.0880900621414185,grad_norm: 0.9999998199444804, iteration: 112960
loss: 1.0322319269180298,grad_norm: 0.896256771072626, iteration: 112961
loss: 1.0082752704620361,grad_norm: 0.9999990855327691, iteration: 112962
loss: 1.029958724975586,grad_norm: 0.9999999452704067, iteration: 112963
loss: 1.112485647201538,grad_norm: 0.9999991429612474, iteration: 112964
loss: 1.00653874874115,grad_norm: 0.859755514059929, iteration: 112965
loss: 1.2148292064666748,grad_norm: 0.9999997865867656, iteration: 112966
loss: 1.0143755674362183,grad_norm: 0.8856850581688556, iteration: 112967
loss: 1.0213454961776733,grad_norm: 0.9999996346291636, iteration: 112968
loss: 1.0183156728744507,grad_norm: 0.8936569138439769, iteration: 112969
loss: 1.0518243312835693,grad_norm: 0.9999996725930551, iteration: 112970
loss: 1.085577130317688,grad_norm: 0.9999997490823667, iteration: 112971
loss: 1.0014711618423462,grad_norm: 0.8435167727121126, iteration: 112972
loss: 1.1001887321472168,grad_norm: 0.9999996149815792, iteration: 112973
loss: 0.9927451610565186,grad_norm: 0.9999998629539678, iteration: 112974
loss: 1.0171207189559937,grad_norm: 0.80923528223665, iteration: 112975
loss: 1.0019336938858032,grad_norm: 0.9999995706925309, iteration: 112976
loss: 1.0356887578964233,grad_norm: 0.9999996783410532, iteration: 112977
loss: 0.9973937273025513,grad_norm: 0.9328841927380076, iteration: 112978
loss: 1.0285000801086426,grad_norm: 0.9999992279457147, iteration: 112979
loss: 0.9913794994354248,grad_norm: 0.8549113663268155, iteration: 112980
loss: 1.0184547901153564,grad_norm: 0.9434405042352997, iteration: 112981
loss: 1.0854698419570923,grad_norm: 0.9999993653630658, iteration: 112982
loss: 0.9729447960853577,grad_norm: 0.9999997362237819, iteration: 112983
loss: 0.9759959578514099,grad_norm: 0.8702914644007692, iteration: 112984
loss: 0.9782457947731018,grad_norm: 0.9032243648364994, iteration: 112985
loss: 1.0094748735427856,grad_norm: 0.9999997054918225, iteration: 112986
loss: 0.9790641665458679,grad_norm: 0.9819408833383887, iteration: 112987
loss: 1.056588053703308,grad_norm: 0.9999994811401404, iteration: 112988
loss: 1.0246789455413818,grad_norm: 0.9487624453716658, iteration: 112989
loss: 0.992164134979248,grad_norm: 0.8532119618599686, iteration: 112990
loss: 1.0816744565963745,grad_norm: 0.9999996099317807, iteration: 112991
loss: 1.0910966396331787,grad_norm: 0.9999999607845179, iteration: 112992
loss: 1.0892245769500732,grad_norm: 0.9999995174686978, iteration: 112993
loss: 1.0292351245880127,grad_norm: 0.8667288514960347, iteration: 112994
loss: 1.0597625970840454,grad_norm: 0.9999991831319993, iteration: 112995
loss: 1.0282752513885498,grad_norm: 0.9999994998255496, iteration: 112996
loss: 1.0461920499801636,grad_norm: 0.9999996436945064, iteration: 112997
loss: 1.0310351848602295,grad_norm: 0.9999990082637072, iteration: 112998
loss: 1.1471201181411743,grad_norm: 0.999999952421741, iteration: 112999
loss: 1.037858009338379,grad_norm: 0.9999994891232148, iteration: 113000
loss: 1.0765187740325928,grad_norm: 0.999999155278122, iteration: 113001
loss: 0.9898400902748108,grad_norm: 0.8026920880108246, iteration: 113002
loss: 1.0090240240097046,grad_norm: 0.9999994253289566, iteration: 113003
loss: 0.9921867847442627,grad_norm: 0.9999991664494403, iteration: 113004
loss: 1.0240000486373901,grad_norm: 0.9999992449720225, iteration: 113005
loss: 1.0303099155426025,grad_norm: 0.9999994197004697, iteration: 113006
loss: 0.9846290946006775,grad_norm: 0.9999991416709627, iteration: 113007
loss: 1.0167927742004395,grad_norm: 0.9999998728225565, iteration: 113008
loss: 1.086719036102295,grad_norm: 0.9999996712757117, iteration: 113009
loss: 1.0142812728881836,grad_norm: 0.9999992658926792, iteration: 113010
loss: 1.0366872549057007,grad_norm: 0.9999995778605727, iteration: 113011
loss: 0.9896496534347534,grad_norm: 0.9626148515570153, iteration: 113012
loss: 1.2173652648925781,grad_norm: 0.99999953778205, iteration: 113013
loss: 0.9767976999282837,grad_norm: 0.9177278995436499, iteration: 113014
loss: 1.0466302633285522,grad_norm: 0.9999993935791068, iteration: 113015
loss: 1.0950418710708618,grad_norm: 0.9999993945612555, iteration: 113016
loss: 0.9917678833007812,grad_norm: 0.9999995238014788, iteration: 113017
loss: 0.9707772731781006,grad_norm: 0.9999990830761767, iteration: 113018
loss: 0.9962250590324402,grad_norm: 0.9999991123694323, iteration: 113019
loss: 1.0781916379928589,grad_norm: 0.9999999179847727, iteration: 113020
loss: 1.0414912700653076,grad_norm: 0.9999991843002403, iteration: 113021
loss: 1.1383516788482666,grad_norm: 0.9999997504028845, iteration: 113022
loss: 1.039279818534851,grad_norm: 0.8660220894078693, iteration: 113023
loss: 1.0582842826843262,grad_norm: 0.9999993621663317, iteration: 113024
loss: 1.00853431224823,grad_norm: 0.9999991417994841, iteration: 113025
loss: 1.007100224494934,grad_norm: 0.9893534970074442, iteration: 113026
loss: 1.0156383514404297,grad_norm: 0.999999609199765, iteration: 113027
loss: 1.0307685136795044,grad_norm: 0.9999992800496846, iteration: 113028
loss: 1.0356272459030151,grad_norm: 0.8376789103871287, iteration: 113029
loss: 1.026270866394043,grad_norm: 0.7681856018723949, iteration: 113030
loss: 1.012702226638794,grad_norm: 0.9521284342801574, iteration: 113031
loss: 1.130355954170227,grad_norm: 0.9999993294113384, iteration: 113032
loss: 1.0293642282485962,grad_norm: 0.9225945417299838, iteration: 113033
loss: 1.0286543369293213,grad_norm: 0.9999992147957153, iteration: 113034
loss: 1.0222806930541992,grad_norm: 0.9999998767124312, iteration: 113035
loss: 1.0704425573349,grad_norm: 0.9999994271885834, iteration: 113036
loss: 1.0527061223983765,grad_norm: 0.8826301979799621, iteration: 113037
loss: 1.2393447160720825,grad_norm: 0.9999997959135708, iteration: 113038
loss: 1.0331202745437622,grad_norm: 0.9477418436430104, iteration: 113039
loss: 1.0432072877883911,grad_norm: 0.9999991384253821, iteration: 113040
loss: 1.017930030822754,grad_norm: 0.9709600865178885, iteration: 113041
loss: 1.13139009475708,grad_norm: 0.9999997123460328, iteration: 113042
loss: 1.0209252834320068,grad_norm: 0.9999990682045438, iteration: 113043
loss: 1.0126829147338867,grad_norm: 0.9999991327568654, iteration: 113044
loss: 1.129211187362671,grad_norm: 0.9999996343203249, iteration: 113045
loss: 0.9990203380584717,grad_norm: 0.9999990262244772, iteration: 113046
loss: 1.0407416820526123,grad_norm: 0.9191893740263105, iteration: 113047
loss: 1.001356840133667,grad_norm: 0.8501193388288435, iteration: 113048
loss: 1.064732551574707,grad_norm: 0.9999994684874507, iteration: 113049
loss: 1.099234700202942,grad_norm: 0.9999996569670384, iteration: 113050
loss: 1.0054105520248413,grad_norm: 0.9999989886572629, iteration: 113051
loss: 1.2371224164962769,grad_norm: 0.9999992582637536, iteration: 113052
loss: 1.0120618343353271,grad_norm: 0.9999994959289379, iteration: 113053
loss: 1.1216118335723877,grad_norm: 0.9999998308226115, iteration: 113054
loss: 1.0017458200454712,grad_norm: 0.9051412092191652, iteration: 113055
loss: 1.0651469230651855,grad_norm: 0.9999994182889897, iteration: 113056
loss: 1.0188393592834473,grad_norm: 0.999999212421854, iteration: 113057
loss: 1.0373866558074951,grad_norm: 0.9999996177857834, iteration: 113058
loss: 1.0639225244522095,grad_norm: 0.9838963172763956, iteration: 113059
loss: 1.0454565286636353,grad_norm: 0.9999995694809166, iteration: 113060
loss: 0.9877159595489502,grad_norm: 0.8708656769553942, iteration: 113061
loss: 1.0205793380737305,grad_norm: 0.9433135166521238, iteration: 113062
loss: 1.0743273496627808,grad_norm: 0.999999308698427, iteration: 113063
loss: 1.0369876623153687,grad_norm: 0.9999995070997405, iteration: 113064
loss: 0.9976266026496887,grad_norm: 0.9999990838534507, iteration: 113065
loss: 1.0786820650100708,grad_norm: 0.999999333953118, iteration: 113066
loss: 1.0675499439239502,grad_norm: 0.9999995095149811, iteration: 113067
loss: 1.0449851751327515,grad_norm: 0.9999992498846493, iteration: 113068
loss: 1.0028290748596191,grad_norm: 0.9999992161647627, iteration: 113069
loss: 1.0593042373657227,grad_norm: 0.9999999031643704, iteration: 113070
loss: 1.054416537284851,grad_norm: 0.9999999397202362, iteration: 113071
loss: 0.9467874765396118,grad_norm: 0.7628286851882842, iteration: 113072
loss: 1.0078266859054565,grad_norm: 0.9999992626230637, iteration: 113073
loss: 1.0243457555770874,grad_norm: 0.9150339292751646, iteration: 113074
loss: 1.136405110359192,grad_norm: 0.9999998765149304, iteration: 113075
loss: 1.041768193244934,grad_norm: 0.9999993543057139, iteration: 113076
loss: 0.9609281420707703,grad_norm: 0.8937736819653735, iteration: 113077
loss: 1.0987801551818848,grad_norm: 0.9999996117841861, iteration: 113078
loss: 1.0766441822052002,grad_norm: 0.9278038466026721, iteration: 113079
loss: 1.036731243133545,grad_norm: 0.9999998401822712, iteration: 113080
loss: 1.0039515495300293,grad_norm: 0.9999990992759219, iteration: 113081
loss: 1.0338685512542725,grad_norm: 0.9999995931713048, iteration: 113082
loss: 0.9882659912109375,grad_norm: 0.9226330865504452, iteration: 113083
loss: 1.0356370210647583,grad_norm: 0.9999994351708273, iteration: 113084
loss: 0.9900425672531128,grad_norm: 0.7737374140130117, iteration: 113085
loss: 1.1265432834625244,grad_norm: 0.9999996899188859, iteration: 113086
loss: 1.0342741012573242,grad_norm: 0.9999991772754501, iteration: 113087
loss: 0.99055016040802,grad_norm: 0.8860379829148577, iteration: 113088
loss: 1.0231364965438843,grad_norm: 0.8588753129798149, iteration: 113089
loss: 1.0044432878494263,grad_norm: 0.9999992395361386, iteration: 113090
loss: 1.039492130279541,grad_norm: 0.9999995803856665, iteration: 113091
loss: 1.0848969221115112,grad_norm: 0.9999994647622534, iteration: 113092
loss: 0.9777956604957581,grad_norm: 0.8853585493537872, iteration: 113093
loss: 1.0081491470336914,grad_norm: 0.9999991121829563, iteration: 113094
loss: 0.9917019605636597,grad_norm: 0.8471934117245117, iteration: 113095
loss: 0.9959484934806824,grad_norm: 0.9712179820021208, iteration: 113096
loss: 1.0601829290390015,grad_norm: 0.9999996904662545, iteration: 113097
loss: 1.0858458280563354,grad_norm: 0.999999217344816, iteration: 113098
loss: 1.1496877670288086,grad_norm: 0.9999997876645136, iteration: 113099
loss: 1.1083612442016602,grad_norm: 0.9999991736760712, iteration: 113100
loss: 1.032912015914917,grad_norm: 0.9999992758082706, iteration: 113101
loss: 0.9777069091796875,grad_norm: 0.8201130542766994, iteration: 113102
loss: 1.0190876722335815,grad_norm: 0.9999994007467169, iteration: 113103
loss: 1.0243749618530273,grad_norm: 0.9999993622617389, iteration: 113104
loss: 1.0338796377182007,grad_norm: 0.9070801654914006, iteration: 113105
loss: 1.0341227054595947,grad_norm: 0.9999992643484646, iteration: 113106
loss: 0.9985445737838745,grad_norm: 0.9999991020294591, iteration: 113107
loss: 0.9912940263748169,grad_norm: 0.8774271211683179, iteration: 113108
loss: 1.099658489227295,grad_norm: 0.9999991061795355, iteration: 113109
loss: 1.0897361040115356,grad_norm: 0.9999991473154007, iteration: 113110
loss: 1.012036919593811,grad_norm: 0.9999990657638398, iteration: 113111
loss: 1.2473053932189941,grad_norm: 0.9999999195480925, iteration: 113112
loss: 1.0479803085327148,grad_norm: 0.999999405583155, iteration: 113113
loss: 0.9982394576072693,grad_norm: 0.9999991387843624, iteration: 113114
loss: 1.0563842058181763,grad_norm: 0.9999990116190679, iteration: 113115
loss: 1.0148831605911255,grad_norm: 0.8694036521131336, iteration: 113116
loss: 1.0702751874923706,grad_norm: 0.8382794548533312, iteration: 113117
loss: 1.088437795639038,grad_norm: 0.9999998286229914, iteration: 113118
loss: 0.9878510236740112,grad_norm: 0.7639129451381305, iteration: 113119
loss: 0.9734005331993103,grad_norm: 0.8443261089149126, iteration: 113120
loss: 1.0143424272537231,grad_norm: 0.9596799662207879, iteration: 113121
loss: 1.0236458778381348,grad_norm: 0.9475313804197842, iteration: 113122
loss: 1.0469926595687866,grad_norm: 0.9880423347792223, iteration: 113123
loss: 1.0483850240707397,grad_norm: 0.9999989774976907, iteration: 113124
loss: 1.0357054471969604,grad_norm: 0.9999990778326046, iteration: 113125
loss: 1.0173883438110352,grad_norm: 0.9999990564603076, iteration: 113126
loss: 0.9750974178314209,grad_norm: 0.7744452329568025, iteration: 113127
loss: 1.02839994430542,grad_norm: 0.9999991250409515, iteration: 113128
loss: 1.0350899696350098,grad_norm: 0.8303296009064245, iteration: 113129
loss: 1.0545461177825928,grad_norm: 0.9999996005168251, iteration: 113130
loss: 0.993259847164154,grad_norm: 0.9999994750756106, iteration: 113131
loss: 0.9608043432235718,grad_norm: 0.9999991441355498, iteration: 113132
loss: 1.0356837511062622,grad_norm: 0.9341650019998277, iteration: 113133
loss: 0.9799165725708008,grad_norm: 0.8594608490317466, iteration: 113134
loss: 1.1467376947402954,grad_norm: 0.9999996438963985, iteration: 113135
loss: 1.0885337591171265,grad_norm: 0.9999990301424354, iteration: 113136
loss: 0.9762848019599915,grad_norm: 0.82821390741137, iteration: 113137
loss: 0.9948287010192871,grad_norm: 0.9659061956742755, iteration: 113138
loss: 1.019723892211914,grad_norm: 0.999999043107093, iteration: 113139
loss: 1.001954197883606,grad_norm: 0.8800538594087355, iteration: 113140
loss: 1.0454691648483276,grad_norm: 0.814011468158644, iteration: 113141
loss: 1.053797960281372,grad_norm: 0.9999998885330111, iteration: 113142
loss: 1.0407519340515137,grad_norm: 0.9409477805372763, iteration: 113143
loss: 0.9787405729293823,grad_norm: 0.913559838948115, iteration: 113144
loss: 1.0406140089035034,grad_norm: 0.9999992237401196, iteration: 113145
loss: 1.0089187622070312,grad_norm: 0.9033695532111543, iteration: 113146
loss: 0.9805823564529419,grad_norm: 0.9999991438106628, iteration: 113147
loss: 1.1035157442092896,grad_norm: 0.9999992294648677, iteration: 113148
loss: 1.0554136037826538,grad_norm: 0.999999151528115, iteration: 113149
loss: 1.0188686847686768,grad_norm: 0.9137191488765778, iteration: 113150
loss: 0.999674379825592,grad_norm: 0.8949496894402772, iteration: 113151
loss: 1.0124808549880981,grad_norm: 0.8724588846230763, iteration: 113152
loss: 1.0139243602752686,grad_norm: 0.9999993935882338, iteration: 113153
loss: 0.9708479046821594,grad_norm: 0.9006724517403036, iteration: 113154
loss: 0.9857041239738464,grad_norm: 0.8231713078389149, iteration: 113155
loss: 0.98028165102005,grad_norm: 0.8063513821187269, iteration: 113156
loss: 0.9997233152389526,grad_norm: 1.000000019889889, iteration: 113157
loss: 0.9901784658432007,grad_norm: 0.9204230586869352, iteration: 113158
loss: 1.088631510734558,grad_norm: 0.9999996778345003, iteration: 113159
loss: 0.9927542209625244,grad_norm: 0.7353463493670169, iteration: 113160
loss: 1.002888560295105,grad_norm: 0.8986283995378506, iteration: 113161
loss: 0.9634407758712769,grad_norm: 0.9203665617953448, iteration: 113162
loss: 1.0650349855422974,grad_norm: 0.8835296998296318, iteration: 113163
loss: 1.0189918279647827,grad_norm: 0.9999996994496039, iteration: 113164
loss: 1.0325807332992554,grad_norm: 0.8401619407608167, iteration: 113165
loss: 1.048944354057312,grad_norm: 0.9999992757127012, iteration: 113166
loss: 0.9894528985023499,grad_norm: 0.7852831267806779, iteration: 113167
loss: 0.9940652847290039,grad_norm: 0.9774084179921565, iteration: 113168
loss: 0.9627014994621277,grad_norm: 0.8104630180380148, iteration: 113169
loss: 0.9481769800186157,grad_norm: 0.8669983863732974, iteration: 113170
loss: 1.031092643737793,grad_norm: 0.9999997659392957, iteration: 113171
loss: 1.0767797231674194,grad_norm: 0.8622727149220276, iteration: 113172
loss: 1.025052785873413,grad_norm: 0.9999995743976243, iteration: 113173
loss: 1.0290393829345703,grad_norm: 0.9418674697501077, iteration: 113174
loss: 1.0007975101470947,grad_norm: 0.8924646347071519, iteration: 113175
loss: 1.0141503810882568,grad_norm: 0.999999144985949, iteration: 113176
loss: 1.025910496711731,grad_norm: 0.9614813034936105, iteration: 113177
loss: 1.0974444150924683,grad_norm: 0.9999995943259228, iteration: 113178
loss: 1.039844274520874,grad_norm: 0.8948484241010253, iteration: 113179
loss: 1.0233349800109863,grad_norm: 0.841347942608683, iteration: 113180
loss: 1.0116428136825562,grad_norm: 0.9999992429357139, iteration: 113181
loss: 1.0226243734359741,grad_norm: 0.9772029059308527, iteration: 113182
loss: 1.01899254322052,grad_norm: 0.725159258091487, iteration: 113183
loss: 1.02589750289917,grad_norm: 0.9999992611459579, iteration: 113184
loss: 1.0509576797485352,grad_norm: 0.9999996811324627, iteration: 113185
loss: 1.005130648612976,grad_norm: 0.9679962754921642, iteration: 113186
loss: 1.058298945426941,grad_norm: 0.9999993098304113, iteration: 113187
loss: 1.0074647665023804,grad_norm: 0.9999996979034358, iteration: 113188
loss: 0.9781227111816406,grad_norm: 0.9999994374917188, iteration: 113189
loss: 0.9859135150909424,grad_norm: 0.8580246516259958, iteration: 113190
loss: 1.123500943183899,grad_norm: 0.9999996859548727, iteration: 113191
loss: 0.9920028448104858,grad_norm: 0.9086165876136221, iteration: 113192
loss: 1.0079957246780396,grad_norm: 0.9999990624624141, iteration: 113193
loss: 1.0418610572814941,grad_norm: 0.9999991039926063, iteration: 113194
loss: 0.9919431805610657,grad_norm: 0.9237860982158551, iteration: 113195
loss: 1.0457043647766113,grad_norm: 0.9999994871105933, iteration: 113196
loss: 0.9690470695495605,grad_norm: 0.8061851917071615, iteration: 113197
loss: 1.0354104042053223,grad_norm: 0.9999991381039588, iteration: 113198
loss: 0.986933708190918,grad_norm: 0.8738348015675492, iteration: 113199
loss: 0.9982219934463501,grad_norm: 0.9656396290037081, iteration: 113200
loss: 0.9944895505905151,grad_norm: 0.9999989843617179, iteration: 113201
loss: 1.0338988304138184,grad_norm: 0.9999992799944766, iteration: 113202
loss: 1.0456700325012207,grad_norm: 0.9999990683705914, iteration: 113203
loss: 1.0063467025756836,grad_norm: 0.9999990734047551, iteration: 113204
loss: 1.0051189661026,grad_norm: 0.999999105411206, iteration: 113205
loss: 1.0102075338363647,grad_norm: 0.8111849048407526, iteration: 113206
loss: 1.0386090278625488,grad_norm: 0.9999995936275003, iteration: 113207
loss: 0.9777494072914124,grad_norm: 0.9345192706552515, iteration: 113208
loss: 1.013489842414856,grad_norm: 0.999999225009829, iteration: 113209
loss: 0.987380862236023,grad_norm: 0.8357096660620372, iteration: 113210
loss: 1.0039489269256592,grad_norm: 0.9185005479320225, iteration: 113211
loss: 1.0148385763168335,grad_norm: 0.9999995999790776, iteration: 113212
loss: 1.000680685043335,grad_norm: 0.9999998934457653, iteration: 113213
loss: 0.9809777140617371,grad_norm: 0.9038532733777797, iteration: 113214
loss: 0.9741313457489014,grad_norm: 0.9999990188489832, iteration: 113215
loss: 0.9665539860725403,grad_norm: 0.8625766820101244, iteration: 113216
loss: 1.019977331161499,grad_norm: 0.9999992449691049, iteration: 113217
loss: 1.104101538658142,grad_norm: 0.9999993184785373, iteration: 113218
loss: 1.0513105392456055,grad_norm: 0.9999995554062148, iteration: 113219
loss: 1.0104278326034546,grad_norm: 0.8536335937527286, iteration: 113220
loss: 0.9688546657562256,grad_norm: 0.8128560429145052, iteration: 113221
loss: 1.0266425609588623,grad_norm: 0.8338337996389632, iteration: 113222
loss: 0.9889281988143921,grad_norm: 0.9999991523051275, iteration: 113223
loss: 0.9986662268638611,grad_norm: 0.9999989831350711, iteration: 113224
loss: 1.0090246200561523,grad_norm: 0.9178842246363854, iteration: 113225
loss: 1.0303562879562378,grad_norm: 0.8385600144899216, iteration: 113226
loss: 1.0072706937789917,grad_norm: 0.9586516973511355, iteration: 113227
loss: 1.005589485168457,grad_norm: 0.9827020492122875, iteration: 113228
loss: 1.00835382938385,grad_norm: 0.9999993117466287, iteration: 113229
loss: 1.1148751974105835,grad_norm: 0.9999996702980035, iteration: 113230
loss: 1.0906733274459839,grad_norm: 0.9999998448104972, iteration: 113231
loss: 1.0039377212524414,grad_norm: 0.9999991066666034, iteration: 113232
loss: 0.9847376346588135,grad_norm: 0.9999990484816642, iteration: 113233
loss: 1.1061809062957764,grad_norm: 0.999999647776652, iteration: 113234
loss: 1.0267547369003296,grad_norm: 0.999999158771125, iteration: 113235
loss: 1.0145317316055298,grad_norm: 0.9999994330432654, iteration: 113236
loss: 1.1130523681640625,grad_norm: 0.9999990488506272, iteration: 113237
loss: 1.0118025541305542,grad_norm: 0.9999994711710674, iteration: 113238
loss: 1.1260840892791748,grad_norm: 0.9999996050931276, iteration: 113239
loss: 0.9780315160751343,grad_norm: 0.8872665914604445, iteration: 113240
loss: 1.021515130996704,grad_norm: 0.9999997463844695, iteration: 113241
loss: 1.0025429725646973,grad_norm: 0.9999990982154078, iteration: 113242
loss: 1.0143871307373047,grad_norm: 0.9999992400507459, iteration: 113243
loss: 1.0261560678482056,grad_norm: 0.8034217066459373, iteration: 113244
loss: 1.022566795349121,grad_norm: 0.9999991532584493, iteration: 113245
loss: 1.0586001873016357,grad_norm: 0.9999997957007356, iteration: 113246
loss: 1.0472718477249146,grad_norm: 0.9999994704915722, iteration: 113247
loss: 0.9821851849555969,grad_norm: 0.9999991682673849, iteration: 113248
loss: 1.0599673986434937,grad_norm: 0.9196822612426837, iteration: 113249
loss: 0.9443137049674988,grad_norm: 0.8309674419635873, iteration: 113250
loss: 1.0205585956573486,grad_norm: 0.8109368158236878, iteration: 113251
loss: 0.981040894985199,grad_norm: 0.8398210933083929, iteration: 113252
loss: 1.019668459892273,grad_norm: 0.9999996166893491, iteration: 113253
loss: 1.0059340000152588,grad_norm: 0.8867094873400393, iteration: 113254
loss: 0.9677106738090515,grad_norm: 0.8697914803963913, iteration: 113255
loss: 1.0386806726455688,grad_norm: 0.9999991649561003, iteration: 113256
loss: 1.009116768836975,grad_norm: 0.9999996324128183, iteration: 113257
loss: 0.9988971948623657,grad_norm: 0.7657164737278384, iteration: 113258
loss: 1.0817787647247314,grad_norm: 1.000000034142852, iteration: 113259
loss: 1.0116320848464966,grad_norm: 0.8304351115266179, iteration: 113260
loss: 1.0014103651046753,grad_norm: 0.9999990863499572, iteration: 113261
loss: 1.0413730144500732,grad_norm: 0.9775281066249379, iteration: 113262
loss: 0.9972406625747681,grad_norm: 0.9999994531500369, iteration: 113263
loss: 1.0357400178909302,grad_norm: 0.9320183265069271, iteration: 113264
loss: 0.9676334261894226,grad_norm: 0.9680391766892825, iteration: 113265
loss: 0.9888663291931152,grad_norm: 0.9930922270260948, iteration: 113266
loss: 1.0137161016464233,grad_norm: 0.8167993283021057, iteration: 113267
loss: 0.9995197057723999,grad_norm: 0.9446956764308352, iteration: 113268
loss: 1.012112021446228,grad_norm: 0.8240440081500094, iteration: 113269
loss: 0.9946226477622986,grad_norm: 0.9904594229180678, iteration: 113270
loss: 1.0039209127426147,grad_norm: 0.9999990704760038, iteration: 113271
loss: 1.094226360321045,grad_norm: 0.9999991902029591, iteration: 113272
loss: 0.9656157493591309,grad_norm: 0.7148034486943162, iteration: 113273
loss: 0.9978761672973633,grad_norm: 0.9999990574444669, iteration: 113274
loss: 0.9764412641525269,grad_norm: 0.9649016493480506, iteration: 113275
loss: 1.0404398441314697,grad_norm: 0.8549918721843462, iteration: 113276
loss: 1.0498626232147217,grad_norm: 0.8506593813204506, iteration: 113277
loss: 1.0560870170593262,grad_norm: 0.9999994258241303, iteration: 113278
loss: 1.025385856628418,grad_norm: 0.9266351430425622, iteration: 113279
loss: 1.0356179475784302,grad_norm: 0.958906564138603, iteration: 113280
loss: 0.9943508505821228,grad_norm: 0.8163686646070916, iteration: 113281
loss: 1.1023718118667603,grad_norm: 0.9999993442428319, iteration: 113282
loss: 0.9841839075088501,grad_norm: 0.8466984089119449, iteration: 113283
loss: 1.1511646509170532,grad_norm: 0.9999997979959379, iteration: 113284
loss: 1.031983733177185,grad_norm: 0.8517903442691142, iteration: 113285
loss: 1.0297636985778809,grad_norm: 0.9999992350537478, iteration: 113286
loss: 1.0223028659820557,grad_norm: 0.9322361095769931, iteration: 113287
loss: 0.9964552521705627,grad_norm: 0.9486486634287873, iteration: 113288
loss: 0.989726185798645,grad_norm: 0.9175475398550372, iteration: 113289
loss: 1.0184475183486938,grad_norm: 0.9999990402705603, iteration: 113290
loss: 1.0269805192947388,grad_norm: 0.9999996300065278, iteration: 113291
loss: 1.0016556978225708,grad_norm: 0.9999995820343874, iteration: 113292
loss: 0.9977641701698303,grad_norm: 0.7845747645305748, iteration: 113293
loss: 1.0175291299819946,grad_norm: 0.9999995998534638, iteration: 113294
loss: 1.0378650426864624,grad_norm: 0.8499837232468581, iteration: 113295
loss: 1.069878101348877,grad_norm: 0.9999994377969011, iteration: 113296
loss: 1.0617518424987793,grad_norm: 0.9217822390004698, iteration: 113297
loss: 1.062345266342163,grad_norm: 0.9999994590941776, iteration: 113298
loss: 1.0252765417099,grad_norm: 0.9999991452349507, iteration: 113299
loss: 0.9972648024559021,grad_norm: 0.9793305494581152, iteration: 113300
loss: 0.9540311694145203,grad_norm: 0.8062617518857328, iteration: 113301
loss: 1.0054463148117065,grad_norm: 0.8279498636644929, iteration: 113302
loss: 0.9999918937683105,grad_norm: 0.8948380735324121, iteration: 113303
loss: 1.0902526378631592,grad_norm: 0.9999995026620717, iteration: 113304
loss: 1.0060756206512451,grad_norm: 0.925337401382799, iteration: 113305
loss: 1.0504626035690308,grad_norm: 0.983701511931396, iteration: 113306
loss: 0.9788373112678528,grad_norm: 0.9999991158501033, iteration: 113307
loss: 1.0049962997436523,grad_norm: 0.9999990769000757, iteration: 113308
loss: 1.0617948770523071,grad_norm: 0.9999992782754384, iteration: 113309
loss: 1.0164525508880615,grad_norm: 0.983838327871124, iteration: 113310
loss: 1.023164987564087,grad_norm: 0.999998939563699, iteration: 113311
loss: 0.9665992259979248,grad_norm: 0.9318939291924496, iteration: 113312
loss: 0.9852258563041687,grad_norm: 0.7781053520243923, iteration: 113313
loss: 0.9739457368850708,grad_norm: 0.9999990961761351, iteration: 113314
loss: 1.0506600141525269,grad_norm: 0.9999999034962427, iteration: 113315
loss: 0.9591359496116638,grad_norm: 0.9609579156555279, iteration: 113316
loss: 1.0214076042175293,grad_norm: 0.8601669830423029, iteration: 113317
loss: 1.0603868961334229,grad_norm: 0.9999996818441975, iteration: 113318
loss: 1.0009163618087769,grad_norm: 0.9999990983757706, iteration: 113319
loss: 0.9913260340690613,grad_norm: 0.9999993996307903, iteration: 113320
loss: 1.014855146408081,grad_norm: 0.9999991568424684, iteration: 113321
loss: 0.9916452765464783,grad_norm: 0.9999995621870504, iteration: 113322
loss: 1.0383766889572144,grad_norm: 0.999999351090677, iteration: 113323
loss: 0.9909231066703796,grad_norm: 0.9999992248327314, iteration: 113324
loss: 0.9541441798210144,grad_norm: 0.9999990694980462, iteration: 113325
loss: 0.9629855751991272,grad_norm: 0.9999989333708909, iteration: 113326
loss: 0.997140109539032,grad_norm: 0.9999990810066268, iteration: 113327
loss: 1.042901635169983,grad_norm: 0.9999999642078616, iteration: 113328
loss: 1.049386739730835,grad_norm: 0.9999992465392116, iteration: 113329
loss: 1.000930905342102,grad_norm: 0.8664046768393234, iteration: 113330
loss: 1.1122159957885742,grad_norm: 0.9999995163788307, iteration: 113331
loss: 1.0361011028289795,grad_norm: 0.9999996402292985, iteration: 113332
loss: 1.0081835985183716,grad_norm: 0.9999992798314277, iteration: 113333
loss: 1.0883195400238037,grad_norm: 0.9999993421782043, iteration: 113334
loss: 0.9843267202377319,grad_norm: 0.9999989376997496, iteration: 113335
loss: 1.0150209665298462,grad_norm: 0.8192851533849084, iteration: 113336
loss: 1.061755657196045,grad_norm: 0.788632167593931, iteration: 113337
loss: 1.148309350013733,grad_norm: 0.9999997737118129, iteration: 113338
loss: 0.9518260955810547,grad_norm: 0.8994978104721003, iteration: 113339
loss: 1.4429560899734497,grad_norm: 0.9999998611150479, iteration: 113340
loss: 0.9719834923744202,grad_norm: 0.999999155060369, iteration: 113341
loss: 0.9968059062957764,grad_norm: 0.9999990910530627, iteration: 113342
loss: 1.0986309051513672,grad_norm: 0.999999175956357, iteration: 113343
loss: 1.0015802383422852,grad_norm: 0.7403076958452067, iteration: 113344
loss: 1.1098980903625488,grad_norm: 0.9999998645715908, iteration: 113345
loss: 1.000529170036316,grad_norm: 0.8646244177830492, iteration: 113346
loss: 0.9981154799461365,grad_norm: 0.8206706036442328, iteration: 113347
loss: 0.9700967073440552,grad_norm: 0.7603631510327727, iteration: 113348
loss: 1.1052213907241821,grad_norm: 0.9999991303195848, iteration: 113349
loss: 1.0541088581085205,grad_norm: 0.9999990078486912, iteration: 113350
loss: 1.0157877206802368,grad_norm: 0.9603286771981895, iteration: 113351
loss: 0.9825783371925354,grad_norm: 0.9999996184285503, iteration: 113352
loss: 0.9690245389938354,grad_norm: 0.9006460387093731, iteration: 113353
loss: 1.0308477878570557,grad_norm: 0.8173293498144643, iteration: 113354
loss: 0.9955930709838867,grad_norm: 0.8240884892734899, iteration: 113355
loss: 1.0544542074203491,grad_norm: 0.9999995388575627, iteration: 113356
loss: 1.0122196674346924,grad_norm: 0.8767280875567675, iteration: 113357
loss: 1.011263132095337,grad_norm: 0.9507175902597644, iteration: 113358
loss: 0.9955999851226807,grad_norm: 0.9322538324030502, iteration: 113359
loss: 1.016443133354187,grad_norm: 0.9999991970203115, iteration: 113360
loss: 1.0080349445343018,grad_norm: 0.9999992842806643, iteration: 113361
loss: 1.0793172121047974,grad_norm: 0.9999992557969676, iteration: 113362
loss: 1.0156370401382446,grad_norm: 0.6977871963548274, iteration: 113363
loss: 1.0194687843322754,grad_norm: 0.7197109769711381, iteration: 113364
loss: 0.9706235527992249,grad_norm: 0.99999919474853, iteration: 113365
loss: 0.9739334583282471,grad_norm: 0.9999990351210992, iteration: 113366
loss: 1.0028266906738281,grad_norm: 0.9999991841233171, iteration: 113367
loss: 1.033584713935852,grad_norm: 0.9019206970925188, iteration: 113368
loss: 0.9916018843650818,grad_norm: 0.9848466926867654, iteration: 113369
loss: 1.0100566148757935,grad_norm: 0.9630066801093685, iteration: 113370
loss: 1.015930414199829,grad_norm: 0.9713188610413495, iteration: 113371
loss: 1.0344271659851074,grad_norm: 0.9999992589118845, iteration: 113372
loss: 0.9888997077941895,grad_norm: 0.8671453020660376, iteration: 113373
loss: 1.2023391723632812,grad_norm: 0.9999997052944022, iteration: 113374
loss: 1.0057305097579956,grad_norm: 0.9067987028480778, iteration: 113375
loss: 1.065901756286621,grad_norm: 0.9530124450966928, iteration: 113376
loss: 1.0584323406219482,grad_norm: 0.9999994976300144, iteration: 113377
loss: 1.0273476839065552,grad_norm: 0.9999990076253166, iteration: 113378
loss: 1.0273581743240356,grad_norm: 0.7215183450375338, iteration: 113379
loss: 1.0234276056289673,grad_norm: 0.8629920429978869, iteration: 113380
loss: 1.126247763633728,grad_norm: 0.9999997001318054, iteration: 113381
loss: 1.0238083600997925,grad_norm: 0.8673442986508922, iteration: 113382
loss: 1.03865647315979,grad_norm: 0.900223616931499, iteration: 113383
loss: 1.0056817531585693,grad_norm: 0.9999993847633669, iteration: 113384
loss: 1.0013748407363892,grad_norm: 0.9753732351249959, iteration: 113385
loss: 1.0073570013046265,grad_norm: 0.8204606635100694, iteration: 113386
loss: 1.368584156036377,grad_norm: 0.9999997480232495, iteration: 113387
loss: 1.0262178182601929,grad_norm: 0.9999991319592925, iteration: 113388
loss: 1.0318119525909424,grad_norm: 0.9803231290167634, iteration: 113389
loss: 1.1927354335784912,grad_norm: 0.999999418304654, iteration: 113390
loss: 1.078816533088684,grad_norm: 0.999999040644479, iteration: 113391
loss: 0.9816314578056335,grad_norm: 0.9999994317930303, iteration: 113392
loss: 1.2726203203201294,grad_norm: 0.9999999184083813, iteration: 113393
loss: 1.0547980070114136,grad_norm: 0.9999993364977573, iteration: 113394
loss: 1.0291709899902344,grad_norm: 0.9576930730829197, iteration: 113395
loss: 1.0548793077468872,grad_norm: 0.9246912339244137, iteration: 113396
loss: 1.0369960069656372,grad_norm: 0.912283970267845, iteration: 113397
loss: 1.0114890336990356,grad_norm: 0.9999992605414043, iteration: 113398
loss: 1.0673669576644897,grad_norm: 0.9999998946711094, iteration: 113399
loss: 1.02082097530365,grad_norm: 0.9999991596775066, iteration: 113400
loss: 1.0447176694869995,grad_norm: 0.9269468845731551, iteration: 113401
loss: 0.9427942633628845,grad_norm: 0.9999990609684414, iteration: 113402
loss: 1.0285309553146362,grad_norm: 0.7676824588553857, iteration: 113403
loss: 0.9823852777481079,grad_norm: 0.8781237419939938, iteration: 113404
loss: 1.0294204950332642,grad_norm: 0.9999990972906335, iteration: 113405
loss: 0.9838382601737976,grad_norm: 0.9999998933783294, iteration: 113406
loss: 1.0470967292785645,grad_norm: 0.9999994272024207, iteration: 113407
loss: 1.0089268684387207,grad_norm: 0.9041284547213085, iteration: 113408
loss: 0.9959434866905212,grad_norm: 0.9999990988247808, iteration: 113409
loss: 1.021614670753479,grad_norm: 0.9193505029227388, iteration: 113410
loss: 0.9861939549446106,grad_norm: 0.9069896680396824, iteration: 113411
loss: 1.012617826461792,grad_norm: 0.999999014137798, iteration: 113412
loss: 1.008638620376587,grad_norm: 0.9610372206576272, iteration: 113413
loss: 1.0019452571868896,grad_norm: 0.9999991272484957, iteration: 113414
loss: 1.0492364168167114,grad_norm: 0.9268063250073698, iteration: 113415
loss: 0.9891054034233093,grad_norm: 0.9999990628577686, iteration: 113416
loss: 1.0776506662368774,grad_norm: 0.9600852068766855, iteration: 113417
loss: 1.0159567594528198,grad_norm: 0.9999995113614893, iteration: 113418
loss: 1.0188617706298828,grad_norm: 0.8647114816151192, iteration: 113419
loss: 1.0055447816848755,grad_norm: 0.8543751480344697, iteration: 113420
loss: 1.0268296003341675,grad_norm: 0.9999991763987846, iteration: 113421
loss: 0.9947296977043152,grad_norm: 0.9999992545369881, iteration: 113422
loss: 1.0163531303405762,grad_norm: 0.9247198639457955, iteration: 113423
loss: 0.9913051724433899,grad_norm: 0.9032846288050256, iteration: 113424
loss: 1.009975790977478,grad_norm: 0.7650665837123325, iteration: 113425
loss: 1.0343791246414185,grad_norm: 0.9999997172409244, iteration: 113426
loss: 1.0207505226135254,grad_norm: 0.8268964375182222, iteration: 113427
loss: 0.9755149483680725,grad_norm: 0.9999991406248417, iteration: 113428
loss: 0.9757841229438782,grad_norm: 0.8498746438229817, iteration: 113429
loss: 1.0297614336013794,grad_norm: 0.9999993734176517, iteration: 113430
loss: 1.0044978857040405,grad_norm: 0.8487844053757038, iteration: 113431
loss: 1.030295729637146,grad_norm: 0.9591082485557956, iteration: 113432
loss: 1.0327796936035156,grad_norm: 0.9999991100601713, iteration: 113433
loss: 0.9685809016227722,grad_norm: 0.9999991387691312, iteration: 113434
loss: 0.9958789348602295,grad_norm: 0.9999991587440266, iteration: 113435
loss: 1.0365879535675049,grad_norm: 0.9999990980549407, iteration: 113436
loss: 1.0132501125335693,grad_norm: 0.999999070236246, iteration: 113437
loss: 1.0651187896728516,grad_norm: 0.9999991120161138, iteration: 113438
loss: 1.0284019708633423,grad_norm: 0.9095725724365842, iteration: 113439
loss: 0.9984533190727234,grad_norm: 0.9999989933589054, iteration: 113440
loss: 1.0514003038406372,grad_norm: 0.9999995831449394, iteration: 113441
loss: 1.0640478134155273,grad_norm: 0.9999998298773239, iteration: 113442
loss: 1.0145354270935059,grad_norm: 0.7972832760298403, iteration: 113443
loss: 1.0010101795196533,grad_norm: 0.8701903688156981, iteration: 113444
loss: 1.0125291347503662,grad_norm: 0.9999996800574061, iteration: 113445
loss: 1.0179517269134521,grad_norm: 0.9999991275418685, iteration: 113446
loss: 1.003196358680725,grad_norm: 0.9999993703318855, iteration: 113447
loss: 1.0647614002227783,grad_norm: 0.9484696759674793, iteration: 113448
loss: 0.9930068850517273,grad_norm: 0.9999991163402843, iteration: 113449
loss: 1.0653539896011353,grad_norm: 0.9999991144604544, iteration: 113450
loss: 0.9735170006752014,grad_norm: 0.8206411445348838, iteration: 113451
loss: 1.0960427522659302,grad_norm: 0.9999993545078076, iteration: 113452
loss: 1.0390173196792603,grad_norm: 0.9999996976470583, iteration: 113453
loss: 0.9880595207214355,grad_norm: 0.9999995177753556, iteration: 113454
loss: 0.9935377240180969,grad_norm: 0.9385259495769305, iteration: 113455
loss: 1.0174413919448853,grad_norm: 0.9519420287885109, iteration: 113456
loss: 1.015861988067627,grad_norm: 0.9999991198147684, iteration: 113457
loss: 0.9566730260848999,grad_norm: 0.9414248504957881, iteration: 113458
loss: 1.0138523578643799,grad_norm: 0.9999991270383515, iteration: 113459
loss: 1.1508309841156006,grad_norm: 0.999999898614348, iteration: 113460
loss: 1.093888521194458,grad_norm: 0.999999154155333, iteration: 113461
loss: 0.994758129119873,grad_norm: 0.8865108409157725, iteration: 113462
loss: 0.9804968237876892,grad_norm: 0.9403955674434497, iteration: 113463
loss: 0.9742283821105957,grad_norm: 0.9887654969490971, iteration: 113464
loss: 1.035393476486206,grad_norm: 0.8760161685047321, iteration: 113465
loss: 0.9883219599723816,grad_norm: 0.9999994317931077, iteration: 113466
loss: 1.0275304317474365,grad_norm: 0.9999991639346464, iteration: 113467
loss: 0.9739737510681152,grad_norm: 0.9999990049415401, iteration: 113468
loss: 0.9533180594444275,grad_norm: 0.9999989306634571, iteration: 113469
loss: 1.0442668199539185,grad_norm: 0.7071180153368183, iteration: 113470
loss: 1.0180299282073975,grad_norm: 0.8340899587890438, iteration: 113471
loss: 1.0616446733474731,grad_norm: 0.999999203634305, iteration: 113472
loss: 1.031383752822876,grad_norm: 0.9999999384623671, iteration: 113473
loss: 1.0350784063339233,grad_norm: 0.9999990495700811, iteration: 113474
loss: 1.0537196397781372,grad_norm: 0.9999991553042498, iteration: 113475
loss: 1.0068296194076538,grad_norm: 0.9066109629213635, iteration: 113476
loss: 1.0439478158950806,grad_norm: 0.9999991673886586, iteration: 113477
loss: 1.0472774505615234,grad_norm: 0.9999989631356225, iteration: 113478
loss: 0.9802680611610413,grad_norm: 0.9999991050530317, iteration: 113479
loss: 1.0114496946334839,grad_norm: 0.8079665130014128, iteration: 113480
loss: 0.9848025441169739,grad_norm: 0.999999323525344, iteration: 113481
loss: 1.0449767112731934,grad_norm: 0.9999994902840709, iteration: 113482
loss: 1.0145857334136963,grad_norm: 0.854918894551042, iteration: 113483
loss: 1.1094954013824463,grad_norm: 0.9999994460862586, iteration: 113484
loss: 0.9897797107696533,grad_norm: 0.8762650919764702, iteration: 113485
loss: 1.016501545906067,grad_norm: 0.949019142516895, iteration: 113486
loss: 0.9724686741828918,grad_norm: 0.9999991589910985, iteration: 113487
loss: 1.0342191457748413,grad_norm: 0.9999993565445519, iteration: 113488
loss: 1.0119917392730713,grad_norm: 0.9999990135661395, iteration: 113489
loss: 0.9952768683433533,grad_norm: 0.7578841719310938, iteration: 113490
loss: 1.000493049621582,grad_norm: 0.915482283552108, iteration: 113491
loss: 1.1216638088226318,grad_norm: 0.9999992709892851, iteration: 113492
loss: 1.057233452796936,grad_norm: 0.8804696960175676, iteration: 113493
loss: 1.1046180725097656,grad_norm: 0.9999994561185976, iteration: 113494
loss: 1.0225127935409546,grad_norm: 0.9553819541053307, iteration: 113495
loss: 1.0784679651260376,grad_norm: 0.9999998122806033, iteration: 113496
loss: 0.9842414855957031,grad_norm: 0.948223233469867, iteration: 113497
loss: 1.0255093574523926,grad_norm: 0.9999990277151427, iteration: 113498
loss: 1.0008323192596436,grad_norm: 0.8015011304580495, iteration: 113499
loss: 1.0058767795562744,grad_norm: 0.9265713457095966, iteration: 113500
loss: 0.988457441329956,grad_norm: 0.9878639611523903, iteration: 113501
loss: 1.0105518102645874,grad_norm: 0.9999992893053307, iteration: 113502
loss: 1.013671875,grad_norm: 0.8906179593585519, iteration: 113503
loss: 1.009405255317688,grad_norm: 0.971959139712297, iteration: 113504
loss: 0.9973446130752563,grad_norm: 1.000000002889238, iteration: 113505
loss: 1.0124822854995728,grad_norm: 0.9948504040028107, iteration: 113506
loss: 1.0287965536117554,grad_norm: 0.9999996571783022, iteration: 113507
loss: 1.0125136375427246,grad_norm: 0.9999993683143962, iteration: 113508
loss: 1.0408250093460083,grad_norm: 0.9999991110007018, iteration: 113509
loss: 0.9853184819221497,grad_norm: 0.9999995494306757, iteration: 113510
loss: 0.9970257878303528,grad_norm: 0.8298792708014411, iteration: 113511
loss: 1.0444425344467163,grad_norm: 0.9999997928756627, iteration: 113512
loss: 0.9899975657463074,grad_norm: 0.9999991368804988, iteration: 113513
loss: 0.9904561638832092,grad_norm: 0.8777384484941755, iteration: 113514
loss: 1.0403146743774414,grad_norm: 0.9999998627762845, iteration: 113515
loss: 1.1382427215576172,grad_norm: 0.9999999891371868, iteration: 113516
loss: 0.998793363571167,grad_norm: 0.8108219890354269, iteration: 113517
loss: 0.974376380443573,grad_norm: 0.8634547919222867, iteration: 113518
loss: 1.0212712287902832,grad_norm: 0.9147445286491223, iteration: 113519
loss: 0.9798535108566284,grad_norm: 0.8999798269091712, iteration: 113520
loss: 0.9973252415657043,grad_norm: 0.9018328012727583, iteration: 113521
loss: 0.9897180199623108,grad_norm: 0.8883056894525743, iteration: 113522
loss: 1.1161526441574097,grad_norm: 0.9999995430865602, iteration: 113523
loss: 1.0394662618637085,grad_norm: 0.902152234396903, iteration: 113524
loss: 0.9795441031455994,grad_norm: 0.8838727775741047, iteration: 113525
loss: 1.060302734375,grad_norm: 0.8886944839521268, iteration: 113526
loss: 1.226704478263855,grad_norm: 0.9999996421311954, iteration: 113527
loss: 0.9850011467933655,grad_norm: 0.9345406834403848, iteration: 113528
loss: 1.0475388765335083,grad_norm: 0.9605697235346048, iteration: 113529
loss: 0.9648371934890747,grad_norm: 0.8182930194893572, iteration: 113530
loss: 0.9777168035507202,grad_norm: 0.9076791365572604, iteration: 113531
loss: 0.9570085406303406,grad_norm: 0.8698976525000147, iteration: 113532
loss: 0.9774368405342102,grad_norm: 0.9999999734192625, iteration: 113533
loss: 1.0187920331954956,grad_norm: 0.9999993898036675, iteration: 113534
loss: 1.0104780197143555,grad_norm: 0.9378330305550876, iteration: 113535
loss: 0.9905121922492981,grad_norm: 0.9999989402959286, iteration: 113536
loss: 0.9643716216087341,grad_norm: 0.9999992938733858, iteration: 113537
loss: 0.9976255297660828,grad_norm: 0.8087649834402695, iteration: 113538
loss: 0.9800509214401245,grad_norm: 0.9805307749298895, iteration: 113539
loss: 1.0243780612945557,grad_norm: 0.9999999073219877, iteration: 113540
loss: 0.9953324198722839,grad_norm: 0.9920900357184124, iteration: 113541
loss: 1.0171529054641724,grad_norm: 0.9999993561538246, iteration: 113542
loss: 1.0192784070968628,grad_norm: 0.9999999786774267, iteration: 113543
loss: 1.0049971342086792,grad_norm: 0.963749794448355, iteration: 113544
loss: 1.020400047302246,grad_norm: 0.9796300499081224, iteration: 113545
loss: 1.0010937452316284,grad_norm: 0.8535771494334122, iteration: 113546
loss: 0.9946877360343933,grad_norm: 0.8322725177624507, iteration: 113547
loss: 0.9710502624511719,grad_norm: 0.8705845509117394, iteration: 113548
loss: 1.0224249362945557,grad_norm: 0.9999992792322886, iteration: 113549
loss: 1.0643504858016968,grad_norm: 0.9999995480912843, iteration: 113550
loss: 0.9917901158332825,grad_norm: 0.9155388193767029, iteration: 113551
loss: 1.02580988407135,grad_norm: 0.9999994471884922, iteration: 113552
loss: 1.0188279151916504,grad_norm: 0.9999993304596841, iteration: 113553
loss: 1.0255844593048096,grad_norm: 0.8245787686860997, iteration: 113554
loss: 1.0604411363601685,grad_norm: 0.9999994734787706, iteration: 113555
loss: 0.9680768251419067,grad_norm: 0.999998931706821, iteration: 113556
loss: 0.9836224317550659,grad_norm: 0.937438690615284, iteration: 113557
loss: 0.9956130981445312,grad_norm: 0.774784349088434, iteration: 113558
loss: 0.9917494654655457,grad_norm: 0.9999991798589362, iteration: 113559
loss: 0.9965999126434326,grad_norm: 0.9251041982005953, iteration: 113560
loss: 1.0605725049972534,grad_norm: 0.9999992419211754, iteration: 113561
loss: 1.2516505718231201,grad_norm: 0.999999617285351, iteration: 113562
loss: 1.0024867057800293,grad_norm: 0.9999993366556094, iteration: 113563
loss: 1.0033422708511353,grad_norm: 0.8460403780359583, iteration: 113564
loss: 1.0304733514785767,grad_norm: 0.8771399225662314, iteration: 113565
loss: 1.0520824193954468,grad_norm: 0.9999991663998838, iteration: 113566
loss: 0.9746015667915344,grad_norm: 0.9647227388274782, iteration: 113567
loss: 1.0291647911071777,grad_norm: 0.8272395121539436, iteration: 113568
loss: 0.9956091046333313,grad_norm: 0.9999991742552569, iteration: 113569
loss: 1.0262848138809204,grad_norm: 0.999999401515268, iteration: 113570
loss: 0.9811090230941772,grad_norm: 0.777360002560409, iteration: 113571
loss: 1.0044723749160767,grad_norm: 0.9999990470215528, iteration: 113572
loss: 1.0227988958358765,grad_norm: 0.9999997589658204, iteration: 113573
loss: 1.0005608797073364,grad_norm: 0.9711691533606829, iteration: 113574
loss: 1.006664514541626,grad_norm: 0.9269865437313947, iteration: 113575
loss: 1.026707410812378,grad_norm: 0.9999990720370141, iteration: 113576
loss: 0.9543713331222534,grad_norm: 0.8352159293931083, iteration: 113577
loss: 0.9645889401435852,grad_norm: 0.9999989948849265, iteration: 113578
loss: 0.9914534687995911,grad_norm: 0.9444018981624729, iteration: 113579
loss: 0.9806806445121765,grad_norm: 0.9450351909226062, iteration: 113580
loss: 1.0346070528030396,grad_norm: 0.9999995385057611, iteration: 113581
loss: 0.9734976887702942,grad_norm: 0.9999990719663694, iteration: 113582
loss: 1.0411062240600586,grad_norm: 0.9999993826507184, iteration: 113583
loss: 0.9682868719100952,grad_norm: 0.9999992654155976, iteration: 113584
loss: 1.029251217842102,grad_norm: 0.9906654502492166, iteration: 113585
loss: 1.0966063737869263,grad_norm: 0.9999992202727511, iteration: 113586
loss: 1.0905823707580566,grad_norm: 1.0000000282414385, iteration: 113587
loss: 1.021843671798706,grad_norm: 0.8840134826943868, iteration: 113588
loss: 1.0009092092514038,grad_norm: 0.9657402969141534, iteration: 113589
loss: 0.9595299959182739,grad_norm: 0.9999988904163059, iteration: 113590
loss: 0.9880996942520142,grad_norm: 0.9999995229969361, iteration: 113591
loss: 1.0217913389205933,grad_norm: 0.9817552450258211, iteration: 113592
loss: 1.0195138454437256,grad_norm: 0.9999990940858167, iteration: 113593
loss: 1.0108327865600586,grad_norm: 0.9999990444415169, iteration: 113594
loss: 0.9927670955657959,grad_norm: 0.9999996527831503, iteration: 113595
loss: 1.0040720701217651,grad_norm: 0.9003368883689116, iteration: 113596
loss: 1.0042527914047241,grad_norm: 0.9999988851305777, iteration: 113597
loss: 0.9755666255950928,grad_norm: 0.9449190072509351, iteration: 113598
loss: 1.0124461650848389,grad_norm: 0.9833629706270244, iteration: 113599
loss: 0.986520528793335,grad_norm: 0.8037900126699862, iteration: 113600
loss: 1.065146565437317,grad_norm: 0.9999991011177697, iteration: 113601
loss: 1.0103167295455933,grad_norm: 0.9155259251424601, iteration: 113602
loss: 1.0424309968948364,grad_norm: 0.920653463107152, iteration: 113603
loss: 1.066698431968689,grad_norm: 0.8425251342680045, iteration: 113604
loss: 1.0676363706588745,grad_norm: 0.999998973959756, iteration: 113605
loss: 1.0564918518066406,grad_norm: 0.9842161289915283, iteration: 113606
loss: 1.0223824977874756,grad_norm: 0.9999991710446378, iteration: 113607
loss: 1.0157842636108398,grad_norm: 0.9999990905942607, iteration: 113608
loss: 1.036215901374817,grad_norm: 0.9999999286691599, iteration: 113609
loss: 0.971951425075531,grad_norm: 0.8959701120172427, iteration: 113610
loss: 0.9564951062202454,grad_norm: 0.9022172857888447, iteration: 113611
loss: 1.010143756866455,grad_norm: 0.7746172290817425, iteration: 113612
loss: 1.0298219919204712,grad_norm: 0.9999994550378907, iteration: 113613
loss: 1.0020591020584106,grad_norm: 0.8451810850562916, iteration: 113614
loss: 0.9904334545135498,grad_norm: 0.9999994197532939, iteration: 113615
loss: 1.052706241607666,grad_norm: 0.9999990544501083, iteration: 113616
loss: 0.9914794564247131,grad_norm: 0.871911521301497, iteration: 113617
loss: 1.0498614311218262,grad_norm: 0.9999992851784388, iteration: 113618
loss: 1.0055344104766846,grad_norm: 0.9166094999154567, iteration: 113619
loss: 1.0209203958511353,grad_norm: 0.9017744209764934, iteration: 113620
loss: 0.9824290871620178,grad_norm: 0.8315912660009737, iteration: 113621
loss: 0.9860690832138062,grad_norm: 0.8258449793569411, iteration: 113622
loss: 1.0836634635925293,grad_norm: 0.8543791723445885, iteration: 113623
loss: 1.0192490816116333,grad_norm: 0.8132218906685162, iteration: 113624
loss: 0.9979946613311768,grad_norm: 0.9999991598951261, iteration: 113625
loss: 0.9585830569267273,grad_norm: 0.9999991722277028, iteration: 113626
loss: 1.0371795892715454,grad_norm: 0.9999992248011255, iteration: 113627
loss: 1.1264606714248657,grad_norm: 0.9999991754967094, iteration: 113628
loss: 1.0097395181655884,grad_norm: 0.7835429418073023, iteration: 113629
loss: 1.015679121017456,grad_norm: 0.9999988953105262, iteration: 113630
loss: 1.01896071434021,grad_norm: 0.8969713795409543, iteration: 113631
loss: 0.9839534163475037,grad_norm: 0.9999991816488274, iteration: 113632
loss: 1.0236999988555908,grad_norm: 0.9999990708400114, iteration: 113633
loss: 0.9738811254501343,grad_norm: 0.8417435388338902, iteration: 113634
loss: 0.9856413006782532,grad_norm: 0.8277607330274974, iteration: 113635
loss: 0.9531866312026978,grad_norm: 0.9491261619681846, iteration: 113636
loss: 1.0007606744766235,grad_norm: 0.9999997391846877, iteration: 113637
loss: 1.0594487190246582,grad_norm: 0.9999993975625432, iteration: 113638
loss: 0.983494758605957,grad_norm: 0.8508263505633693, iteration: 113639
loss: 1.0762639045715332,grad_norm: 0.9942462125337128, iteration: 113640
loss: 0.9791862964630127,grad_norm: 0.9999990845833008, iteration: 113641
loss: 0.9975093603134155,grad_norm: 0.9097366385712815, iteration: 113642
loss: 0.9737722873687744,grad_norm: 0.937359400776771, iteration: 113643
loss: 0.9924739599227905,grad_norm: 0.9999993430649148, iteration: 113644
loss: 1.0280183553695679,grad_norm: 0.9074324712503411, iteration: 113645
loss: 0.9976533055305481,grad_norm: 0.7443202334064999, iteration: 113646
loss: 0.9963999390602112,grad_norm: 0.8242509660009759, iteration: 113647
loss: 0.9694186449050903,grad_norm: 0.9999991434494656, iteration: 113648
loss: 1.15684175491333,grad_norm: 0.9999994098992279, iteration: 113649
loss: 0.9457366466522217,grad_norm: 0.9247850679772337, iteration: 113650
loss: 1.0465595722198486,grad_norm: 0.9999995516329729, iteration: 113651
loss: 1.0439388751983643,grad_norm: 0.9999993524588269, iteration: 113652
loss: 1.0558656454086304,grad_norm: 0.9999995974252451, iteration: 113653
loss: 1.002505898475647,grad_norm: 0.9999989910701367, iteration: 113654
loss: 0.9833270907402039,grad_norm: 0.9069844199734256, iteration: 113655
loss: 1.0140761137008667,grad_norm: 0.9999996866905785, iteration: 113656
loss: 1.01608407497406,grad_norm: 0.9999991595917644, iteration: 113657
loss: 0.991595983505249,grad_norm: 0.9999990084983466, iteration: 113658
loss: 1.0291587114334106,grad_norm: 0.9999995937564325, iteration: 113659
loss: 1.0068532228469849,grad_norm: 0.9880245342978974, iteration: 113660
loss: 1.040411114692688,grad_norm: 0.9999991283956863, iteration: 113661
loss: 0.9937378168106079,grad_norm: 0.9585970916677046, iteration: 113662
loss: 1.052898645401001,grad_norm: 0.9999993664965118, iteration: 113663
loss: 0.9928708076477051,grad_norm: 0.9173533158310639, iteration: 113664
loss: 1.0453087091445923,grad_norm: 0.910713551574081, iteration: 113665
loss: 1.0376076698303223,grad_norm: 0.9999990265187794, iteration: 113666
loss: 0.9778007864952087,grad_norm: 0.839185522201435, iteration: 113667
loss: 0.9973453879356384,grad_norm: 0.9999989827937288, iteration: 113668
loss: 1.0533311367034912,grad_norm: 0.8837201466177856, iteration: 113669
loss: 1.0310792922973633,grad_norm: 0.9999994989343909, iteration: 113670
loss: 1.0793856382369995,grad_norm: 0.999999031308327, iteration: 113671
loss: 0.958444356918335,grad_norm: 0.8831380005547322, iteration: 113672
loss: 1.0097825527191162,grad_norm: 0.7651751163006555, iteration: 113673
loss: 1.0004817247390747,grad_norm: 0.9498558531210786, iteration: 113674
loss: 1.0846197605133057,grad_norm: 0.9999991874281867, iteration: 113675
loss: 1.0044779777526855,grad_norm: 0.9634811465123451, iteration: 113676
loss: 0.9888439178466797,grad_norm: 0.9961108462192401, iteration: 113677
loss: 1.0347224473953247,grad_norm: 0.9414877282391892, iteration: 113678
loss: 1.0172287225723267,grad_norm: 0.9741856206657182, iteration: 113679
loss: 0.9989021420478821,grad_norm: 0.9084878517022658, iteration: 113680
loss: 0.9930750131607056,grad_norm: 0.8916629478024336, iteration: 113681
loss: 0.9859829545021057,grad_norm: 0.9243864758532686, iteration: 113682
loss: 1.0726252794265747,grad_norm: 0.9999998701166828, iteration: 113683
loss: 1.0168119668960571,grad_norm: 0.9816047551877874, iteration: 113684
loss: 0.9972624182701111,grad_norm: 0.9463334013954772, iteration: 113685
loss: 0.9520533084869385,grad_norm: 0.850472242721666, iteration: 113686
loss: 1.0531786680221558,grad_norm: 0.9999991622753951, iteration: 113687
loss: 1.076606273651123,grad_norm: 0.9999998379294148, iteration: 113688
loss: 1.0393152236938477,grad_norm: 0.9999991060790867, iteration: 113689
loss: 1.0125890970230103,grad_norm: 0.9999991135449082, iteration: 113690
loss: 0.9798649549484253,grad_norm: 0.9999996975470978, iteration: 113691
loss: 0.9763347506523132,grad_norm: 0.8417395856661811, iteration: 113692
loss: 1.0912878513336182,grad_norm: 0.9999994398539218, iteration: 113693
loss: 1.018835186958313,grad_norm: 0.9999992540342093, iteration: 113694
loss: 0.9884624481201172,grad_norm: 0.8916081795552039, iteration: 113695
loss: 0.972856879234314,grad_norm: 0.8211735295473614, iteration: 113696
loss: 1.0859040021896362,grad_norm: 0.9999991010286011, iteration: 113697
loss: 0.988554835319519,grad_norm: 0.7949186361963055, iteration: 113698
loss: 0.9583753347396851,grad_norm: 0.919524968941612, iteration: 113699
loss: 1.0385280847549438,grad_norm: 0.8332803001431573, iteration: 113700
loss: 1.0322405099868774,grad_norm: 0.9999998434609074, iteration: 113701
loss: 1.002686619758606,grad_norm: 0.829393855194972, iteration: 113702
loss: 1.000257968902588,grad_norm: 0.8312851143780712, iteration: 113703
loss: 0.9686256647109985,grad_norm: 0.9428898857259782, iteration: 113704
loss: 1.013607144355774,grad_norm: 0.8221662047911594, iteration: 113705
loss: 1.0372332334518433,grad_norm: 0.8124959232819037, iteration: 113706
loss: 1.0626164674758911,grad_norm: 0.999999860621692, iteration: 113707
loss: 1.053684115409851,grad_norm: 0.9999991750412038, iteration: 113708
loss: 1.0285407304763794,grad_norm: 0.9999994782081539, iteration: 113709
loss: 0.9754860401153564,grad_norm: 0.999999320438115, iteration: 113710
loss: 1.0321736335754395,grad_norm: 0.9926876297361672, iteration: 113711
loss: 1.008821964263916,grad_norm: 0.9999993057250076, iteration: 113712
loss: 1.0070008039474487,grad_norm: 0.9999992539277761, iteration: 113713
loss: 1.0072126388549805,grad_norm: 0.9999998571137211, iteration: 113714
loss: 1.0929152965545654,grad_norm: 0.9352248851187696, iteration: 113715
loss: 1.067217469215393,grad_norm: 0.9877526999056353, iteration: 113716
loss: 0.9494605660438538,grad_norm: 0.8977849117374718, iteration: 113717
loss: 1.0801571607589722,grad_norm: 0.9999998183357304, iteration: 113718
loss: 1.0786466598510742,grad_norm: 0.9999993864500251, iteration: 113719
loss: 1.010086178779602,grad_norm: 0.9287629898655139, iteration: 113720
loss: 1.0394580364227295,grad_norm: 0.9999997428949743, iteration: 113721
loss: 0.9620226621627808,grad_norm: 0.9999993610762279, iteration: 113722
loss: 1.0190351009368896,grad_norm: 0.9999994373063077, iteration: 113723
loss: 1.165876030921936,grad_norm: 0.9999999286662732, iteration: 113724
loss: 1.0058413743972778,grad_norm: 0.8251467961350714, iteration: 113725
loss: 1.0558338165283203,grad_norm: 1.000000037460666, iteration: 113726
loss: 1.0008617639541626,grad_norm: 0.9999999945005102, iteration: 113727
loss: 0.968308687210083,grad_norm: 0.961485557537903, iteration: 113728
loss: 1.090693473815918,grad_norm: 0.9999995770942907, iteration: 113729
loss: 1.0023255348205566,grad_norm: 0.9807454290180719, iteration: 113730
loss: 0.9978322386741638,grad_norm: 0.9999989497008054, iteration: 113731
loss: 1.1138888597488403,grad_norm: 0.9999991396556741, iteration: 113732
loss: 1.070116639137268,grad_norm: 0.9999996242314986, iteration: 113733
loss: 1.0503771305084229,grad_norm: 0.9999991144743813, iteration: 113734
loss: 1.0858064889907837,grad_norm: 0.9999997391617816, iteration: 113735
loss: 0.9668514728546143,grad_norm: 0.8712841991630581, iteration: 113736
loss: 1.0298455953598022,grad_norm: 0.8607835544825475, iteration: 113737
loss: 0.9952635765075684,grad_norm: 0.9999991070020274, iteration: 113738
loss: 1.0177627801895142,grad_norm: 0.7924801960816653, iteration: 113739
loss: 0.9836743474006653,grad_norm: 0.94567109028663, iteration: 113740
loss: 1.0283719301223755,grad_norm: 0.9999994129868423, iteration: 113741
loss: 1.0049480199813843,grad_norm: 0.9999994722208981, iteration: 113742
loss: 0.9959700703620911,grad_norm: 0.9627656589161517, iteration: 113743
loss: 0.9892717599868774,grad_norm: 0.8498583087754465, iteration: 113744
loss: 1.0518808364868164,grad_norm: 0.9999994537384417, iteration: 113745
loss: 0.9877124428749084,grad_norm: 0.9999993707696613, iteration: 113746
loss: 1.0623613595962524,grad_norm: 0.9763845735240854, iteration: 113747
loss: 1.0145820379257202,grad_norm: 0.8864479013819262, iteration: 113748
loss: 1.0608481168746948,grad_norm: 0.9999999248971947, iteration: 113749
loss: 1.0153149366378784,grad_norm: 0.9122824910695594, iteration: 113750
loss: 0.9957682490348816,grad_norm: 0.883468423653289, iteration: 113751
loss: 1.0259836912155151,grad_norm: 0.9999991623862623, iteration: 113752
loss: 1.0011173486709595,grad_norm: 0.9999990930433721, iteration: 113753
loss: 1.1296734809875488,grad_norm: 0.8782502028900667, iteration: 113754
loss: 1.1226260662078857,grad_norm: 0.9999996845991174, iteration: 113755
loss: 1.07643723487854,grad_norm: 0.999999335892148, iteration: 113756
loss: 1.1506222486495972,grad_norm: 0.9999998446383603, iteration: 113757
loss: 1.0567718744277954,grad_norm: 0.999999199803266, iteration: 113758
loss: 0.9742851257324219,grad_norm: 0.8253497808001323, iteration: 113759
loss: 0.996311604976654,grad_norm: 0.9999995167536496, iteration: 113760
loss: 1.22728431224823,grad_norm: 0.9999995601891764, iteration: 113761
loss: 1.1060324907302856,grad_norm: 0.9731887730906508, iteration: 113762
loss: 1.0250706672668457,grad_norm: 0.9999991901137167, iteration: 113763
loss: 1.0836833715438843,grad_norm: 0.999999071850086, iteration: 113764
loss: 1.0580403804779053,grad_norm: 0.9999993057764298, iteration: 113765
loss: 1.0165423154830933,grad_norm: 0.9999992952171349, iteration: 113766
loss: 0.9923942685127258,grad_norm: 0.9255541499837394, iteration: 113767
loss: 0.9839474558830261,grad_norm: 0.9999992072113442, iteration: 113768
loss: 1.04796302318573,grad_norm: 0.9999994464071758, iteration: 113769
loss: 1.0171058177947998,grad_norm: 0.9910793173607775, iteration: 113770
loss: 0.9675400257110596,grad_norm: 0.9999991900545291, iteration: 113771
loss: 1.0493173599243164,grad_norm: 0.9579358072161103, iteration: 113772
loss: 1.0555683374404907,grad_norm: 0.9999991198110454, iteration: 113773
loss: 1.054284930229187,grad_norm: 0.9999992612212367, iteration: 113774
loss: 1.0050331354141235,grad_norm: 0.7979461259753652, iteration: 113775
loss: 1.0333126783370972,grad_norm: 0.9999992956968782, iteration: 113776
loss: 1.1249067783355713,grad_norm: 0.9999999586738649, iteration: 113777
loss: 1.0335389375686646,grad_norm: 0.9602690647255673, iteration: 113778
loss: 0.9632185101509094,grad_norm: 0.999999135793744, iteration: 113779
loss: 0.9877269864082336,grad_norm: 0.9999991653015884, iteration: 113780
loss: 1.0095412731170654,grad_norm: 0.8572548126305043, iteration: 113781
loss: 1.0036529302597046,grad_norm: 0.8166397158230795, iteration: 113782
loss: 0.9848328828811646,grad_norm: 0.9999990283552825, iteration: 113783
loss: 1.1515474319458008,grad_norm: 0.9999999219035538, iteration: 113784
loss: 1.1238762140274048,grad_norm: 0.9999991324997806, iteration: 113785
loss: 0.9917240738868713,grad_norm: 0.8041393080127248, iteration: 113786
loss: 1.0377459526062012,grad_norm: 0.9640566950610465, iteration: 113787
loss: 0.9843300580978394,grad_norm: 0.999999062190127, iteration: 113788
loss: 1.0498288869857788,grad_norm: 0.9999991875351525, iteration: 113789
loss: 1.0310622453689575,grad_norm: 0.9999989356615693, iteration: 113790
loss: 1.0639926195144653,grad_norm: 0.9999996694075994, iteration: 113791
loss: 1.0373632907867432,grad_norm: 0.9999993830446279, iteration: 113792
loss: 0.9691003561019897,grad_norm: 0.9355501954046622, iteration: 113793
loss: 1.0372462272644043,grad_norm: 0.9754194035263063, iteration: 113794
loss: 1.016915202140808,grad_norm: 0.9999991309530711, iteration: 113795
loss: 1.0164629220962524,grad_norm: 0.9999990656783841, iteration: 113796
loss: 1.0275557041168213,grad_norm: 0.9999995448261811, iteration: 113797
loss: 0.9857261180877686,grad_norm: 0.9703982846885154, iteration: 113798
loss: 1.0755678415298462,grad_norm: 0.9999990268680541, iteration: 113799
loss: 1.0734975337982178,grad_norm: 0.9999998828900396, iteration: 113800
loss: 0.9808436036109924,grad_norm: 0.8696200313370731, iteration: 113801
loss: 1.0043115615844727,grad_norm: 0.8657852982421412, iteration: 113802
loss: 1.053335189819336,grad_norm: 0.9999995571922884, iteration: 113803
loss: 0.9874783158302307,grad_norm: 0.910910476253802, iteration: 113804
loss: 1.0282883644104004,grad_norm: 0.9762465704013602, iteration: 113805
loss: 0.9832607507705688,grad_norm: 0.9719239543669027, iteration: 113806
loss: 1.0474013090133667,grad_norm: 0.9999998291565636, iteration: 113807
loss: 1.0140557289123535,grad_norm: 0.879196803218006, iteration: 113808
loss: 1.0007394552230835,grad_norm: 0.9999990527482884, iteration: 113809
loss: 1.1029939651489258,grad_norm: 0.9999994262276465, iteration: 113810
loss: 1.1249858140945435,grad_norm: 0.999999425615284, iteration: 113811
loss: 1.040671706199646,grad_norm: 0.9999997497135823, iteration: 113812
loss: 0.9692047238349915,grad_norm: 0.8201173094209333, iteration: 113813
loss: 1.068584680557251,grad_norm: 0.9999993722255186, iteration: 113814
loss: 0.9627700448036194,grad_norm: 0.8745869327268262, iteration: 113815
loss: 1.0331213474273682,grad_norm: 0.9999991301333726, iteration: 113816
loss: 1.020279049873352,grad_norm: 0.8076221219256262, iteration: 113817
loss: 1.0504392385482788,grad_norm: 0.9927456239185952, iteration: 113818
loss: 1.0529955625534058,grad_norm: 0.9162483794936099, iteration: 113819
loss: 0.9852379560470581,grad_norm: 0.9999999487909433, iteration: 113820
loss: 1.0059995651245117,grad_norm: 0.9999991634966892, iteration: 113821
loss: 1.0220983028411865,grad_norm: 0.9095497975115102, iteration: 113822
loss: 1.0788484811782837,grad_norm: 0.999999220980221, iteration: 113823
loss: 1.0132341384887695,grad_norm: 0.9999990610455516, iteration: 113824
loss: 1.0657092332839966,grad_norm: 0.8892073107154153, iteration: 113825
loss: 1.06351900100708,grad_norm: 0.9664728226509297, iteration: 113826
loss: 1.0159169435501099,grad_norm: 0.9999998342676117, iteration: 113827
loss: 1.0694084167480469,grad_norm: 0.9999996560971369, iteration: 113828
loss: 1.0374162197113037,grad_norm: 0.9960971181082638, iteration: 113829
loss: 1.0082815885543823,grad_norm: 0.9273694657278929, iteration: 113830
loss: 1.0202891826629639,grad_norm: 0.9999992850325149, iteration: 113831
loss: 1.0402847528457642,grad_norm: 0.9999993125401921, iteration: 113832
loss: 0.9926010966300964,grad_norm: 0.8818138079292158, iteration: 113833
loss: 1.177198052406311,grad_norm: 0.9999997254160357, iteration: 113834
loss: 1.0665279626846313,grad_norm: 0.9999993795950463, iteration: 113835
loss: 0.9852968454360962,grad_norm: 0.9999991397835067, iteration: 113836
loss: 1.074473261833191,grad_norm: 0.8928414169840745, iteration: 113837
loss: 1.0682138204574585,grad_norm: 0.9999994615903042, iteration: 113838
loss: 0.9739716053009033,grad_norm: 0.7688945355847487, iteration: 113839
loss: 0.9912660121917725,grad_norm: 0.7646679833326175, iteration: 113840
loss: 1.0683602094650269,grad_norm: 0.9955153388678, iteration: 113841
loss: 1.0317342281341553,grad_norm: 0.9999992857001726, iteration: 113842
loss: 1.0419631004333496,grad_norm: 0.9039789491773875, iteration: 113843
loss: 1.084017038345337,grad_norm: 0.9999994453876708, iteration: 113844
loss: 1.0027081966400146,grad_norm: 0.881402590611175, iteration: 113845
loss: 1.031723976135254,grad_norm: 0.9829010905117465, iteration: 113846
loss: 1.119662880897522,grad_norm: 0.9999992513547578, iteration: 113847
loss: 1.1086145639419556,grad_norm: 0.9999994233501326, iteration: 113848
loss: 1.0493719577789307,grad_norm: 0.9999995485485208, iteration: 113849
loss: 1.0485237836837769,grad_norm: 0.9999998939234713, iteration: 113850
loss: 1.0705573558807373,grad_norm: 0.9509635466716697, iteration: 113851
loss: 1.1961663961410522,grad_norm: 0.9999999860722019, iteration: 113852
loss: 1.0217483043670654,grad_norm: 0.999999419594885, iteration: 113853
loss: 1.012332797050476,grad_norm: 0.9999990265775783, iteration: 113854
loss: 1.0337610244750977,grad_norm: 0.9999992204838792, iteration: 113855
loss: 1.0111584663391113,grad_norm: 0.9999991912527201, iteration: 113856
loss: 0.9883857369422913,grad_norm: 0.9230834446506664, iteration: 113857
loss: 0.9847065210342407,grad_norm: 0.9999991381579613, iteration: 113858
loss: 1.086654782295227,grad_norm: 0.9999998490432559, iteration: 113859
loss: 0.9756659269332886,grad_norm: 0.9999993332196041, iteration: 113860
loss: 0.9906303882598877,grad_norm: 0.9159866772174677, iteration: 113861
loss: 0.9953068494796753,grad_norm: 0.9217154191540732, iteration: 113862
loss: 1.0851292610168457,grad_norm: 0.8833840803398698, iteration: 113863
loss: 0.9975546002388,grad_norm: 0.925172714896505, iteration: 113864
loss: 1.097983717918396,grad_norm: 0.9999995959692326, iteration: 113865
loss: 1.0774699449539185,grad_norm: 0.9999995577535641, iteration: 113866
loss: 1.036555528640747,grad_norm: 0.9999998735898277, iteration: 113867
loss: 1.0331873893737793,grad_norm: 0.9999992348891472, iteration: 113868
loss: 1.0462526082992554,grad_norm: 0.9999997996844008, iteration: 113869
loss: 1.0093804597854614,grad_norm: 0.9999997059500337, iteration: 113870
loss: 1.023850440979004,grad_norm: 0.822355114986122, iteration: 113871
loss: 1.1187760829925537,grad_norm: 0.9999992535571549, iteration: 113872
loss: 0.9874199628829956,grad_norm: 0.9999990488210598, iteration: 113873
loss: 1.0263491868972778,grad_norm: 0.9999995024564837, iteration: 113874
loss: 1.0044864416122437,grad_norm: 0.9999991175037688, iteration: 113875
loss: 1.0705053806304932,grad_norm: 0.999999448057306, iteration: 113876
loss: 1.1254889965057373,grad_norm: 0.9999995185823325, iteration: 113877
loss: 1.0336768627166748,grad_norm: 0.9602526723258271, iteration: 113878
loss: 1.0761442184448242,grad_norm: 0.9999993505374414, iteration: 113879
loss: 1.056145191192627,grad_norm: 0.8924579789824445, iteration: 113880
loss: 1.0532761812210083,grad_norm: 0.9999992607083604, iteration: 113881
loss: 0.9558455944061279,grad_norm: 0.9999995007830912, iteration: 113882
loss: 1.0863630771636963,grad_norm: 0.999999180404843, iteration: 113883
loss: 1.024070382118225,grad_norm: 0.999999079660911, iteration: 113884
loss: 1.1023815870285034,grad_norm: 0.9999996121900547, iteration: 113885
loss: 1.0384608507156372,grad_norm: 0.9999999116094934, iteration: 113886
loss: 1.0038738250732422,grad_norm: 0.999999688756226, iteration: 113887
loss: 1.2518103122711182,grad_norm: 0.9999998618972611, iteration: 113888
loss: 1.0297406911849976,grad_norm: 0.9455139479662207, iteration: 113889
loss: 1.067469835281372,grad_norm: 0.999999320351509, iteration: 113890
loss: 0.9760048389434814,grad_norm: 0.9999990989259904, iteration: 113891
loss: 1.055997371673584,grad_norm: 0.9999994929867475, iteration: 113892
loss: 0.9973223209381104,grad_norm: 0.9999998072458985, iteration: 113893
loss: 1.1517637968063354,grad_norm: 0.9999993761146843, iteration: 113894
loss: 0.9721700549125671,grad_norm: 0.943351386839596, iteration: 113895
loss: 0.9722959995269775,grad_norm: 0.9007686077505461, iteration: 113896
loss: 1.0329279899597168,grad_norm: 0.9689268243077281, iteration: 113897
loss: 1.0338572263717651,grad_norm: 0.9999992437075952, iteration: 113898
loss: 1.0987392663955688,grad_norm: 0.9999995488628435, iteration: 113899
loss: 1.0483956336975098,grad_norm: 0.9999992310890173, iteration: 113900
loss: 1.0881574153900146,grad_norm: 0.9999996380586819, iteration: 113901
loss: 1.0175963640213013,grad_norm: 0.9999991671760091, iteration: 113902
loss: 1.024619698524475,grad_norm: 0.9765736860034968, iteration: 113903
loss: 1.0285934209823608,grad_norm: 0.9999990505120812, iteration: 113904
loss: 0.9663626551628113,grad_norm: 0.956510180219346, iteration: 113905
loss: 1.0174568891525269,grad_norm: 0.9999994217607066, iteration: 113906
loss: 1.036753535270691,grad_norm: 0.9999999641103066, iteration: 113907
loss: 1.1047154664993286,grad_norm: 0.999999327285375, iteration: 113908
loss: 1.0035593509674072,grad_norm: 0.9999991123457984, iteration: 113909
loss: 1.1547762155532837,grad_norm: 0.9999993426668974, iteration: 113910
loss: 0.9866999387741089,grad_norm: 0.9079551827220488, iteration: 113911
loss: 1.0077018737792969,grad_norm: 0.8339006073189275, iteration: 113912
loss: 1.2875112295150757,grad_norm: 1.0000000339166926, iteration: 113913
loss: 1.0197044610977173,grad_norm: 0.9999992701559938, iteration: 113914
loss: 1.0204917192459106,grad_norm: 0.9152968371932921, iteration: 113915
loss: 1.0508224964141846,grad_norm: 0.9999997758478404, iteration: 113916
loss: 0.9795259833335876,grad_norm: 0.9076529399515852, iteration: 113917
loss: 1.0352272987365723,grad_norm: 0.9999991089993489, iteration: 113918
loss: 1.079242467880249,grad_norm: 0.9999999034984612, iteration: 113919
loss: 0.9818089604377747,grad_norm: 0.8380030961942, iteration: 113920
loss: 1.0504450798034668,grad_norm: 0.9999990299224516, iteration: 113921
loss: 1.0008387565612793,grad_norm: 0.9999995453062228, iteration: 113922
loss: 1.0231186151504517,grad_norm: 0.8685241765288529, iteration: 113923
loss: 1.0083060264587402,grad_norm: 0.9999992681380824, iteration: 113924
loss: 0.99875807762146,grad_norm: 0.9497572450814477, iteration: 113925
loss: 1.0175071954727173,grad_norm: 0.9944750641303709, iteration: 113926
loss: 1.0885035991668701,grad_norm: 0.9999997467664, iteration: 113927
loss: 1.1124688386917114,grad_norm: 1.0000000168730059, iteration: 113928
loss: 0.9779057502746582,grad_norm: 0.9999999243622605, iteration: 113929
loss: 0.9765352606773376,grad_norm: 0.9529196590450566, iteration: 113930
loss: 0.9938175678253174,grad_norm: 0.9999991427389516, iteration: 113931
loss: 1.0796608924865723,grad_norm: 0.9999999070831174, iteration: 113932
loss: 1.0500409603118896,grad_norm: 0.9999996412635801, iteration: 113933
loss: 1.1776723861694336,grad_norm: 0.9999994190697987, iteration: 113934
loss: 0.9884729981422424,grad_norm: 0.9623310657432593, iteration: 113935
loss: 1.0895354747772217,grad_norm: 0.999999694727167, iteration: 113936
loss: 1.0344223976135254,grad_norm: 0.832406083761707, iteration: 113937
loss: 1.0679209232330322,grad_norm: 0.9999996774693359, iteration: 113938
loss: 0.9861956238746643,grad_norm: 0.8958058867185226, iteration: 113939
loss: 1.0902652740478516,grad_norm: 0.9999992377490831, iteration: 113940
loss: 0.9996205568313599,grad_norm: 0.9999990650433902, iteration: 113941
loss: 1.0174778699874878,grad_norm: 0.9051832552432151, iteration: 113942
loss: 1.053933024406433,grad_norm: 0.9163735632402435, iteration: 113943
loss: 1.0904555320739746,grad_norm: 0.9999993571356223, iteration: 113944
loss: 1.0664702653884888,grad_norm: 0.9999990758772268, iteration: 113945
loss: 0.9789867401123047,grad_norm: 0.9999990469833727, iteration: 113946
loss: 1.089481234550476,grad_norm: 0.9999999604700559, iteration: 113947
loss: 1.039732575416565,grad_norm: 0.9999995935551144, iteration: 113948
loss: 1.0128291845321655,grad_norm: 0.999999094762013, iteration: 113949
loss: 0.9654463529586792,grad_norm: 0.9999990108613721, iteration: 113950
loss: 0.9850536584854126,grad_norm: 0.9999991479576525, iteration: 113951
loss: 1.0645358562469482,grad_norm: 0.9999996377264266, iteration: 113952
loss: 1.0889968872070312,grad_norm: 0.999999980820939, iteration: 113953
loss: 1.0396265983581543,grad_norm: 0.9390542679431464, iteration: 113954
loss: 1.1384650468826294,grad_norm: 0.9999993612224324, iteration: 113955
loss: 1.1302759647369385,grad_norm: 0.999999380638231, iteration: 113956
loss: 1.044798731803894,grad_norm: 0.9999997719029238, iteration: 113957
loss: 1.0120761394500732,grad_norm: 0.9999992305824449, iteration: 113958
loss: 1.0119653940200806,grad_norm: 0.9322492516940645, iteration: 113959
loss: 1.0637186765670776,grad_norm: 0.999999299546999, iteration: 113960
loss: 1.2256265878677368,grad_norm: 0.9999997007977992, iteration: 113961
loss: 1.0401501655578613,grad_norm: 0.9999991513027294, iteration: 113962
loss: 1.0618889331817627,grad_norm: 0.9999995700729295, iteration: 113963
loss: 1.0965996980667114,grad_norm: 0.9999998810093929, iteration: 113964
loss: 1.180770993232727,grad_norm: 0.999999147608826, iteration: 113965
loss: 1.203234314918518,grad_norm: 0.9999999115610082, iteration: 113966
loss: 1.0503464937210083,grad_norm: 0.9563826771652173, iteration: 113967
loss: 1.0345284938812256,grad_norm: 0.9999993774828918, iteration: 113968
loss: 1.0218086242675781,grad_norm: 0.9999990968106712, iteration: 113969
loss: 1.3057941198349,grad_norm: 0.9999994198437594, iteration: 113970
loss: 1.2331318855285645,grad_norm: 0.9999999283918989, iteration: 113971
loss: 1.031209945678711,grad_norm: 0.9999997946693292, iteration: 113972
loss: 1.1772634983062744,grad_norm: 0.9999999634925676, iteration: 113973
loss: 1.193912148475647,grad_norm: 0.9999999908392208, iteration: 113974
loss: 1.1579289436340332,grad_norm: 0.9999999550378871, iteration: 113975
loss: 1.201648235321045,grad_norm: 0.99999985387511, iteration: 113976
loss: 1.3096885681152344,grad_norm: 0.9999997865121842, iteration: 113977
loss: 1.2604340314865112,grad_norm: 0.9999997819360138, iteration: 113978
loss: 1.0421178340911865,grad_norm: 0.9999992068772029, iteration: 113979
loss: 1.2261019945144653,grad_norm: 0.99999979736195, iteration: 113980
loss: 1.154398798942566,grad_norm: 0.9999995476027957, iteration: 113981
loss: 1.223641037940979,grad_norm: 0.9999998798510202, iteration: 113982
loss: 1.253255844116211,grad_norm: 0.99999988566694, iteration: 113983
loss: 1.2277112007141113,grad_norm: 0.9999998796227606, iteration: 113984
loss: 1.1019569635391235,grad_norm: 0.9999992379584757, iteration: 113985
loss: 1.0223394632339478,grad_norm: 0.9999993701524874, iteration: 113986
loss: 1.1837304830551147,grad_norm: 0.9999994316956897, iteration: 113987
loss: 1.1722877025604248,grad_norm: 0.9999995894952232, iteration: 113988
loss: 1.1465641260147095,grad_norm: 0.9999998954092619, iteration: 113989
loss: 1.1389453411102295,grad_norm: 0.9999997400565694, iteration: 113990
loss: 1.0156428813934326,grad_norm: 0.9999992184085885, iteration: 113991
loss: 1.0184212923049927,grad_norm: 0.9999990902177459, iteration: 113992
loss: 1.2301377058029175,grad_norm: 0.9999995799157044, iteration: 113993
loss: 0.9906092882156372,grad_norm: 0.8873755621908627, iteration: 113994
loss: 1.0869114398956299,grad_norm: 0.9999991934755716, iteration: 113995
loss: 1.0771716833114624,grad_norm: 0.9125744453011766, iteration: 113996
loss: 1.0919467210769653,grad_norm: 0.9999991033739921, iteration: 113997
loss: 1.0328724384307861,grad_norm: 0.9999999312121254, iteration: 113998
loss: 1.0392122268676758,grad_norm: 0.999999306978821, iteration: 113999
loss: 0.9810951352119446,grad_norm: 0.8748535983564201, iteration: 114000
loss: 1.0309035778045654,grad_norm: 0.9999995396272361, iteration: 114001
loss: 1.0026339292526245,grad_norm: 0.9090076460647892, iteration: 114002
loss: 1.0417749881744385,grad_norm: 0.9999995540710552, iteration: 114003
loss: 1.0237785577774048,grad_norm: 0.994461863329038, iteration: 114004
loss: 0.9973738193511963,grad_norm: 0.999743119484267, iteration: 114005
loss: 1.0468426942825317,grad_norm: 0.9999994158724546, iteration: 114006
loss: 1.0273621082305908,grad_norm: 0.9999998166819718, iteration: 114007
loss: 1.041109561920166,grad_norm: 0.9999993710874836, iteration: 114008
loss: 1.07827627658844,grad_norm: 0.9999996711755297, iteration: 114009
loss: 1.0778623819351196,grad_norm: 0.9999996373682344, iteration: 114010
loss: 1.068612813949585,grad_norm: 0.9999992866688964, iteration: 114011
loss: 0.9836755990982056,grad_norm: 0.9999990136623097, iteration: 114012
loss: 0.9931331276893616,grad_norm: 0.8915858167595668, iteration: 114013
loss: 0.9680516123771667,grad_norm: 0.9944777273811318, iteration: 114014
loss: 1.063970923423767,grad_norm: 0.9999990732608436, iteration: 114015
loss: 0.9909223914146423,grad_norm: 0.9999992634286485, iteration: 114016
loss: 1.0015575885772705,grad_norm: 0.9999991579148019, iteration: 114017
loss: 1.0048749446868896,grad_norm: 0.9999994301872732, iteration: 114018
loss: 1.0018329620361328,grad_norm: 0.9999991707709452, iteration: 114019
loss: 1.0320100784301758,grad_norm: 0.999999074302654, iteration: 114020
loss: 1.041666030883789,grad_norm: 0.8826702552609167, iteration: 114021
loss: 1.1061185598373413,grad_norm: 0.9999991890474035, iteration: 114022
loss: 1.0140057802200317,grad_norm: 0.9999995344833004, iteration: 114023
loss: 0.9812954664230347,grad_norm: 0.7924150666252667, iteration: 114024
loss: 1.023842692375183,grad_norm: 0.7493209177103175, iteration: 114025
loss: 1.07289719581604,grad_norm: 0.9999991552024016, iteration: 114026
loss: 1.0214197635650635,grad_norm: 0.9999992448550324, iteration: 114027
loss: 0.9751154184341431,grad_norm: 0.97934744018, iteration: 114028
loss: 0.988415002822876,grad_norm: 0.8778365359241201, iteration: 114029
loss: 1.0318567752838135,grad_norm: 0.8932570974438425, iteration: 114030
loss: 1.049679160118103,grad_norm: 0.9999994734256114, iteration: 114031
loss: 1.0566643476486206,grad_norm: 0.9999992895336098, iteration: 114032
loss: 0.9962315559387207,grad_norm: 0.9999992993559157, iteration: 114033
loss: 1.0315526723861694,grad_norm: 0.9999995739179324, iteration: 114034
loss: 1.1476110219955444,grad_norm: 0.9999997768709912, iteration: 114035
loss: 1.0545724630355835,grad_norm: 0.9999991067807826, iteration: 114036
loss: 1.0541284084320068,grad_norm: 0.999999106683163, iteration: 114037
loss: 1.0241793394088745,grad_norm: 0.8743322410531084, iteration: 114038
loss: 1.0508116483688354,grad_norm: 0.9999996889256277, iteration: 114039
loss: 1.0013277530670166,grad_norm: 0.9956705528621979, iteration: 114040
loss: 1.0760297775268555,grad_norm: 0.9999994356924713, iteration: 114041
loss: 1.041520118713379,grad_norm: 0.9999992448973276, iteration: 114042
loss: 1.0334105491638184,grad_norm: 0.9999992143316835, iteration: 114043
loss: 1.006966233253479,grad_norm: 0.9999994268851357, iteration: 114044
loss: 0.9978623986244202,grad_norm: 0.8922410813773194, iteration: 114045
loss: 1.0859616994857788,grad_norm: 0.9999994871332698, iteration: 114046
loss: 1.046727180480957,grad_norm: 0.9999993381452075, iteration: 114047
loss: 1.0440616607666016,grad_norm: 0.9999991483180153, iteration: 114048
loss: 1.0295889377593994,grad_norm: 0.9999995902314913, iteration: 114049
loss: 1.0246998071670532,grad_norm: 0.9999999252849748, iteration: 114050
loss: 1.1213332414627075,grad_norm: 0.9999998645275806, iteration: 114051
loss: 1.0265086889266968,grad_norm: 0.9999992648489372, iteration: 114052
loss: 0.9868277311325073,grad_norm: 0.8096055209800799, iteration: 114053
loss: 1.0488741397857666,grad_norm: 1.0000000871401429, iteration: 114054
loss: 1.0035711526870728,grad_norm: 1.0000000199924997, iteration: 114055
loss: 1.0063223838806152,grad_norm: 0.7484925474096601, iteration: 114056
loss: 1.0692200660705566,grad_norm: 0.9999999369764453, iteration: 114057
loss: 1.026057243347168,grad_norm: 0.9999995396463646, iteration: 114058
loss: 1.040015697479248,grad_norm: 0.9999996096911308, iteration: 114059
loss: 0.9996494650840759,grad_norm: 0.9999991947898633, iteration: 114060
loss: 1.0021061897277832,grad_norm: 0.8565901471565791, iteration: 114061
loss: 0.9856535196304321,grad_norm: 0.9560186360481191, iteration: 114062
loss: 1.1131789684295654,grad_norm: 0.999999992974256, iteration: 114063
loss: 0.9537286162376404,grad_norm: 0.9953396422752591, iteration: 114064
loss: 1.0340869426727295,grad_norm: 0.9860893973248235, iteration: 114065
loss: 1.0294150114059448,grad_norm: 0.9999993065058485, iteration: 114066
loss: 1.1562846899032593,grad_norm: 0.99999950355466, iteration: 114067
loss: 1.0732783079147339,grad_norm: 0.9999997507382269, iteration: 114068
loss: 1.0018553733825684,grad_norm: 0.9999990978107526, iteration: 114069
loss: 1.0195802450180054,grad_norm: 0.8288049791827413, iteration: 114070
loss: 0.9811846613883972,grad_norm: 0.9999997085392648, iteration: 114071
loss: 1.026881217956543,grad_norm: 0.9999993758356256, iteration: 114072
loss: 1.1748201847076416,grad_norm: 1.0000000566004774, iteration: 114073
loss: 1.0348414182662964,grad_norm: 0.9999990459911268, iteration: 114074
loss: 0.9857104420661926,grad_norm: 0.9999991386744771, iteration: 114075
loss: 1.16103196144104,grad_norm: 0.9999999369583826, iteration: 114076
loss: 1.041802167892456,grad_norm: 0.9999993358037098, iteration: 114077
loss: 1.040908694267273,grad_norm: 0.9660916139585223, iteration: 114078
loss: 1.0095106363296509,grad_norm: 0.9999995601088109, iteration: 114079
loss: 1.1897928714752197,grad_norm: 0.9999998639658786, iteration: 114080
loss: 1.0714645385742188,grad_norm: 0.9999993762280321, iteration: 114081
loss: 1.13774573802948,grad_norm: 0.9999999289508716, iteration: 114082
loss: 1.0678749084472656,grad_norm: 0.9999994096996366, iteration: 114083
loss: 1.1180158853530884,grad_norm: 0.9999998874753678, iteration: 114084
loss: 0.9782465696334839,grad_norm: 0.9085408521679955, iteration: 114085
loss: 1.0742270946502686,grad_norm: 0.9999991799993847, iteration: 114086
loss: 1.0681030750274658,grad_norm: 0.9999997413547875, iteration: 114087
loss: 1.0287370681762695,grad_norm: 0.9999995609708786, iteration: 114088
loss: 1.1613019704818726,grad_norm: 0.9999990341825846, iteration: 114089
loss: 1.0082751512527466,grad_norm: 0.9999989251252414, iteration: 114090
loss: 1.003900170326233,grad_norm: 0.7024829921588855, iteration: 114091
loss: 1.0530437231063843,grad_norm: 0.9999992301900967, iteration: 114092
loss: 1.0283896923065186,grad_norm: 0.9716637704427926, iteration: 114093
loss: 1.0983198881149292,grad_norm: 0.9999998666989307, iteration: 114094
loss: 1.0204555988311768,grad_norm: 0.999999958167249, iteration: 114095
loss: 1.1555486917495728,grad_norm: 0.9999998640246529, iteration: 114096
loss: 1.1004952192306519,grad_norm: 0.9999999304122613, iteration: 114097
loss: 1.0817452669143677,grad_norm: 0.9999993309473574, iteration: 114098
loss: 1.08668053150177,grad_norm: 0.9999997586253054, iteration: 114099
loss: 1.0100605487823486,grad_norm: 0.9999998589168728, iteration: 114100
loss: 1.0029562711715698,grad_norm: 0.9999992121848704, iteration: 114101
loss: 1.0226271152496338,grad_norm: 0.9462612819973404, iteration: 114102
loss: 1.1048777103424072,grad_norm: 0.9999999030803297, iteration: 114103
loss: 1.0033082962036133,grad_norm: 0.9999991630317525, iteration: 114104
loss: 1.0925253629684448,grad_norm: 0.9999998163510415, iteration: 114105
loss: 1.0962152481079102,grad_norm: 0.9999997790994574, iteration: 114106
loss: 1.1710296869277954,grad_norm: 0.9999998979531818, iteration: 114107
loss: 1.0284912586212158,grad_norm: 0.9999993000904536, iteration: 114108
loss: 1.075124979019165,grad_norm: 0.999999180657513, iteration: 114109
loss: 1.0999020338058472,grad_norm: 0.9999998777353409, iteration: 114110
loss: 1.0942919254302979,grad_norm: 0.9999992016403837, iteration: 114111
loss: 1.1437674760818481,grad_norm: 0.9999994536623534, iteration: 114112
loss: 1.1071996688842773,grad_norm: 0.9999990614026113, iteration: 114113
loss: 1.0679779052734375,grad_norm: 0.9999997094816536, iteration: 114114
loss: 1.0936185121536255,grad_norm: 0.9999992805572886, iteration: 114115
loss: 1.0366603136062622,grad_norm: 0.9999995457500225, iteration: 114116
loss: 1.0148348808288574,grad_norm: 0.9999999721007243, iteration: 114117
loss: 1.0589780807495117,grad_norm: 0.9999993375995025, iteration: 114118
loss: 1.0567625761032104,grad_norm: 0.9999992187537323, iteration: 114119
loss: 1.020262360572815,grad_norm: 0.8578349941128064, iteration: 114120
loss: 1.0799777507781982,grad_norm: 0.9999997756724232, iteration: 114121
loss: 1.0809178352355957,grad_norm: 0.9999991072680549, iteration: 114122
loss: 1.2463213205337524,grad_norm: 0.999999355211339, iteration: 114123
loss: 1.1262696981430054,grad_norm: 0.9999998344965453, iteration: 114124
loss: 1.4859846830368042,grad_norm: 0.9999999344796937, iteration: 114125
loss: 1.1480598449707031,grad_norm: 0.9999998296841477, iteration: 114126
loss: 1.0220824480056763,grad_norm: 0.9999996840082851, iteration: 114127
loss: 1.081629753112793,grad_norm: 0.9999993477776583, iteration: 114128
loss: 1.2600860595703125,grad_norm: 0.9999998126914922, iteration: 114129
loss: 1.0536067485809326,grad_norm: 0.9999994641419077, iteration: 114130
loss: 1.1790037155151367,grad_norm: 0.999999404242538, iteration: 114131
loss: 0.9941774010658264,grad_norm: 0.9999991541210176, iteration: 114132
loss: 1.079962134361267,grad_norm: 0.9999994433094576, iteration: 114133
loss: 1.3261497020721436,grad_norm: 0.9999998493811489, iteration: 114134
loss: 1.0848158597946167,grad_norm: 0.9999994749587418, iteration: 114135
loss: 0.9800171256065369,grad_norm: 0.9999991502910887, iteration: 114136
loss: 1.021287202835083,grad_norm: 0.9999995010209479, iteration: 114137
loss: 0.9937035441398621,grad_norm: 0.9999995762860544, iteration: 114138
loss: 1.045465111732483,grad_norm: 0.9999990507270821, iteration: 114139
loss: 1.0696083307266235,grad_norm: 0.9999996781013233, iteration: 114140
loss: 1.0667442083358765,grad_norm: 0.9999999285106065, iteration: 114141
loss: 1.2246429920196533,grad_norm: 0.9999998239368574, iteration: 114142
loss: 1.0176655054092407,grad_norm: 0.9349800513324465, iteration: 114143
loss: 1.3985849618911743,grad_norm: 0.9999998633845115, iteration: 114144
loss: 1.030816674232483,grad_norm: 0.9999998124229105, iteration: 114145
loss: 1.2022058963775635,grad_norm: 0.9999999439913395, iteration: 114146
loss: 1.0748608112335205,grad_norm: 0.9999999750666048, iteration: 114147
loss: 1.0047318935394287,grad_norm: 0.999999324229829, iteration: 114148
loss: 1.0994906425476074,grad_norm: 0.9999996194886308, iteration: 114149
loss: 1.1337974071502686,grad_norm: 0.9999996176454069, iteration: 114150
loss: 1.0227434635162354,grad_norm: 0.9999996808290923, iteration: 114151
loss: 1.028961181640625,grad_norm: 0.9999997609732411, iteration: 114152
loss: 1.0367399454116821,grad_norm: 0.9999996948925458, iteration: 114153
loss: 1.4989174604415894,grad_norm: 1.0000000008818375, iteration: 114154
loss: 1.2050315141677856,grad_norm: 0.99999993028648, iteration: 114155
loss: 1.0810067653656006,grad_norm: 0.9999996818255658, iteration: 114156
loss: 1.1890608072280884,grad_norm: 0.9999998812956148, iteration: 114157
loss: 1.0064705610275269,grad_norm: 0.9675839856278974, iteration: 114158
loss: 1.0225107669830322,grad_norm: 0.9999995819456265, iteration: 114159
loss: 0.9836959838867188,grad_norm: 0.8286365192989044, iteration: 114160
loss: 1.0629103183746338,grad_norm: 0.9999990147395826, iteration: 114161
loss: 1.0487021207809448,grad_norm: 0.9999999065189403, iteration: 114162
loss: 1.0822725296020508,grad_norm: 0.9999996777035483, iteration: 114163
loss: 1.0075284242630005,grad_norm: 0.999999095197398, iteration: 114164
loss: 1.0564968585968018,grad_norm: 0.9999997737558611, iteration: 114165
loss: 0.9706200361251831,grad_norm: 0.9468121248227414, iteration: 114166
loss: 1.009088397026062,grad_norm: 0.9999996667230409, iteration: 114167
loss: 1.006149172782898,grad_norm: 0.9999997158353751, iteration: 114168
loss: 1.0695282220840454,grad_norm: 0.9999997630526571, iteration: 114169
loss: 1.1171977519989014,grad_norm: 0.944473645981645, iteration: 114170
loss: 1.049272060394287,grad_norm: 0.9673486324749614, iteration: 114171
loss: 1.1156009435653687,grad_norm: 0.9999998342804447, iteration: 114172
loss: 0.9355910420417786,grad_norm: 0.9999991366473027, iteration: 114173
loss: 1.0403366088867188,grad_norm: 0.9999998431997668, iteration: 114174
loss: 1.0297510623931885,grad_norm: 0.9999995711697298, iteration: 114175
loss: 1.0653234720230103,grad_norm: 0.9999990159450624, iteration: 114176
loss: 1.0295264720916748,grad_norm: 0.9999999821825818, iteration: 114177
loss: 1.0303900241851807,grad_norm: 0.9999998632125136, iteration: 114178
loss: 1.0295464992523193,grad_norm: 0.8397306420104297, iteration: 114179
loss: 0.984670877456665,grad_norm: 0.9999991469292109, iteration: 114180
loss: 0.9769699573516846,grad_norm: 0.9516951691445912, iteration: 114181
loss: 1.1788580417633057,grad_norm: 0.9999999623310871, iteration: 114182
loss: 0.9883590936660767,grad_norm: 0.9999989799646049, iteration: 114183
loss: 1.0452505350112915,grad_norm: 0.9999990513055063, iteration: 114184
loss: 1.0805668830871582,grad_norm: 0.9999998368828917, iteration: 114185
loss: 1.0207602977752686,grad_norm: 0.9514173974496328, iteration: 114186
loss: 1.0136910676956177,grad_norm: 0.9999996722609371, iteration: 114187
loss: 1.0624884366989136,grad_norm: 0.9999995968689774, iteration: 114188
loss: 1.0005184412002563,grad_norm: 0.999999116404551, iteration: 114189
loss: 1.0802059173583984,grad_norm: 0.9999997592956036, iteration: 114190
loss: 1.0171005725860596,grad_norm: 0.9649054173087701, iteration: 114191
loss: 1.138176441192627,grad_norm: 0.9999995866268769, iteration: 114192
loss: 1.0214194059371948,grad_norm: 0.8543006861416091, iteration: 114193
loss: 1.1035654544830322,grad_norm: 0.9999997607206191, iteration: 114194
loss: 0.9940053820610046,grad_norm: 0.999998996322765, iteration: 114195
loss: 1.0279723405838013,grad_norm: 0.9999999257543528, iteration: 114196
loss: 0.9860078692436218,grad_norm: 0.8690789818654444, iteration: 114197
loss: 1.0868226289749146,grad_norm: 0.9999996382344205, iteration: 114198
loss: 1.00221586227417,grad_norm: 0.9999994230764476, iteration: 114199
loss: 1.0514183044433594,grad_norm: 0.9999997391127176, iteration: 114200
loss: 1.162940263748169,grad_norm: 0.9999996677504671, iteration: 114201
loss: 1.0288211107254028,grad_norm: 0.999999247749074, iteration: 114202
loss: 1.1881496906280518,grad_norm: 0.9999998131163069, iteration: 114203
loss: 1.032342791557312,grad_norm: 0.9999995413242818, iteration: 114204
loss: 1.388350248336792,grad_norm: 0.9999997989499877, iteration: 114205
loss: 0.9870766997337341,grad_norm: 0.8682170857951164, iteration: 114206
loss: 1.0069191455841064,grad_norm: 0.9999993837844604, iteration: 114207
loss: 1.0374478101730347,grad_norm: 0.9999997112891876, iteration: 114208
loss: 1.0112793445587158,grad_norm: 0.9999996715839062, iteration: 114209
loss: 1.0917692184448242,grad_norm: 0.999999474002986, iteration: 114210
loss: 0.9777940511703491,grad_norm: 0.9168063474647528, iteration: 114211
loss: 1.1531026363372803,grad_norm: 1.0000000388301247, iteration: 114212
loss: 1.0828135013580322,grad_norm: 0.9999991291469562, iteration: 114213
loss: 1.0012891292572021,grad_norm: 0.9999993779757751, iteration: 114214
loss: 1.1283764839172363,grad_norm: 0.9999990769661435, iteration: 114215
loss: 1.0702985525131226,grad_norm: 0.9999998475879185, iteration: 114216
loss: 1.0021761655807495,grad_norm: 0.999999051777304, iteration: 114217
loss: 0.9803330302238464,grad_norm: 0.7617777491735579, iteration: 114218
loss: 1.0655595064163208,grad_norm: 0.9482523096074754, iteration: 114219
loss: 1.0699397325515747,grad_norm: 0.9999991529534521, iteration: 114220
loss: 1.0918104648590088,grad_norm: 0.9999994237230762, iteration: 114221
loss: 1.0825492143630981,grad_norm: 0.9999997515348659, iteration: 114222
loss: 1.0940682888031006,grad_norm: 0.9999996847221413, iteration: 114223
loss: 1.2145801782608032,grad_norm: 0.9999998221327748, iteration: 114224
loss: 1.1571556329727173,grad_norm: 0.9999992403323444, iteration: 114225
loss: 1.0519386529922485,grad_norm: 0.9999994258522926, iteration: 114226
loss: 1.095863938331604,grad_norm: 0.9999994913625175, iteration: 114227
loss: 1.017078161239624,grad_norm: 0.9184984120472682, iteration: 114228
loss: 1.0541459321975708,grad_norm: 0.9999996831056821, iteration: 114229
loss: 1.0080974102020264,grad_norm: 0.9999990581574555, iteration: 114230
loss: 1.0742998123168945,grad_norm: 0.9999994540827392, iteration: 114231
loss: 0.9796038269996643,grad_norm: 0.9999992263525834, iteration: 114232
loss: 1.117446780204773,grad_norm: 0.9999995444561139, iteration: 114233
loss: 1.181177020072937,grad_norm: 0.9999996751288961, iteration: 114234
loss: 1.1469546556472778,grad_norm: 0.9999997849592833, iteration: 114235
loss: 1.010754942893982,grad_norm: 0.9999998592234453, iteration: 114236
loss: 1.0493179559707642,grad_norm: 0.9999996891391616, iteration: 114237
loss: 1.0300363302230835,grad_norm: 0.8766575393019884, iteration: 114238
loss: 1.171778917312622,grad_norm: 0.9999999618286571, iteration: 114239
loss: 1.202410101890564,grad_norm: 0.9999998699245778, iteration: 114240
loss: 1.0451850891113281,grad_norm: 0.9999997429303005, iteration: 114241
loss: 0.9868728518486023,grad_norm: 0.9999990007810932, iteration: 114242
loss: 1.2155083417892456,grad_norm: 0.9999994761595654, iteration: 114243
loss: 1.2389988899230957,grad_norm: 0.999999690389334, iteration: 114244
loss: 1.2020697593688965,grad_norm: 0.9999997899119956, iteration: 114245
loss: 1.1571835279464722,grad_norm: 0.9999998764259518, iteration: 114246
loss: 1.0457762479782104,grad_norm: 0.9999999610521553, iteration: 114247
loss: 1.0793399810791016,grad_norm: 0.9999998248324312, iteration: 114248
loss: 1.0723137855529785,grad_norm: 0.9999998964417811, iteration: 114249
loss: 1.0853464603424072,grad_norm: 0.9999991107510203, iteration: 114250
loss: 1.2089085578918457,grad_norm: 0.9999997304693894, iteration: 114251
loss: 1.085815668106079,grad_norm: 1.0000000127921143, iteration: 114252
loss: 1.0164594650268555,grad_norm: 0.9999992035456612, iteration: 114253
loss: 1.0584403276443481,grad_norm: 0.999999210979091, iteration: 114254
loss: 0.974991500377655,grad_norm: 0.9434354447853991, iteration: 114255
loss: 1.0208748579025269,grad_norm: 0.9999993301468707, iteration: 114256
loss: 1.0130579471588135,grad_norm: 0.9596619307885739, iteration: 114257
loss: 1.0521764755249023,grad_norm: 0.9350731721124413, iteration: 114258
loss: 1.221990704536438,grad_norm: 0.9999997322178783, iteration: 114259
loss: 1.012852668762207,grad_norm: 0.9999997053496853, iteration: 114260
loss: 1.0791759490966797,grad_norm: 0.9999991306667487, iteration: 114261
loss: 1.0407078266143799,grad_norm: 0.9999991873773703, iteration: 114262
loss: 1.3137800693511963,grad_norm: 0.9999997941098414, iteration: 114263
loss: 1.0460907220840454,grad_norm: 0.9746790858734466, iteration: 114264
loss: 1.1603894233703613,grad_norm: 0.9999997666210926, iteration: 114265
loss: 1.0314664840698242,grad_norm: 0.999999137505804, iteration: 114266
loss: 1.004509449005127,grad_norm: 0.9999992541963285, iteration: 114267
loss: 1.1264110803604126,grad_norm: 0.9999994520937334, iteration: 114268
loss: 1.1315661668777466,grad_norm: 0.9999993598039669, iteration: 114269
loss: 1.1870028972625732,grad_norm: 0.9999999130089804, iteration: 114270
loss: 1.1818827390670776,grad_norm: 0.9999998554948294, iteration: 114271
loss: 1.2592278718948364,grad_norm: 0.9999999417720421, iteration: 114272
loss: 0.9619868397712708,grad_norm: 0.9081626054836582, iteration: 114273
loss: 1.039633870124817,grad_norm: 0.9999990920263238, iteration: 114274
loss: 1.0197198390960693,grad_norm: 0.9999995631079495, iteration: 114275
loss: 0.9954678416252136,grad_norm: 0.9091164912891184, iteration: 114276
loss: 1.2735462188720703,grad_norm: 0.9999999238738635, iteration: 114277
loss: 1.0413509607315063,grad_norm: 0.9999992142132833, iteration: 114278
loss: 1.1009635925292969,grad_norm: 0.9999998166796016, iteration: 114279
loss: 1.1299937963485718,grad_norm: 0.9999998279100046, iteration: 114280
loss: 0.9594731330871582,grad_norm: 0.9999989816542331, iteration: 114281
loss: 1.0718952417373657,grad_norm: 0.9999995398505445, iteration: 114282
loss: 1.0526628494262695,grad_norm: 0.9999998296922881, iteration: 114283
loss: 1.0448964834213257,grad_norm: 0.9999991179302308, iteration: 114284
loss: 1.0144002437591553,grad_norm: 0.9369547056812054, iteration: 114285
loss: 1.704816460609436,grad_norm: 0.9999999113893103, iteration: 114286
loss: 1.0549784898757935,grad_norm: 0.9999999257628902, iteration: 114287
loss: 1.183722734451294,grad_norm: 0.9999998067630929, iteration: 114288
loss: 1.106968879699707,grad_norm: 0.999999992090733, iteration: 114289
loss: 1.0562514066696167,grad_norm: 0.9999999426428097, iteration: 114290
loss: 1.0093737840652466,grad_norm: 0.9999995860371641, iteration: 114291
loss: 1.048573613166809,grad_norm: 0.9054150136707152, iteration: 114292
loss: 0.9753159880638123,grad_norm: 0.9999990316700847, iteration: 114293
loss: 1.1408498287200928,grad_norm: 0.9999998547784313, iteration: 114294
loss: 1.306905746459961,grad_norm: 0.999999822652824, iteration: 114295
loss: 0.9885876178741455,grad_norm: 0.9999997148431725, iteration: 114296
loss: 1.0508302450180054,grad_norm: 0.9999995118914332, iteration: 114297
loss: 1.0025984048843384,grad_norm: 0.9254898356688388, iteration: 114298
loss: 1.7566779851913452,grad_norm: 0.9999999325120011, iteration: 114299
loss: 1.2312006950378418,grad_norm: 0.9999994626204796, iteration: 114300
loss: 1.1135883331298828,grad_norm: 0.999999277615878, iteration: 114301
loss: 1.0953699350357056,grad_norm: 0.9999992190731485, iteration: 114302
loss: 0.9759463667869568,grad_norm: 0.8981189783551122, iteration: 114303
loss: 1.1050525903701782,grad_norm: 0.9999997901604655, iteration: 114304
loss: 1.238981008529663,grad_norm: 0.9999995941668101, iteration: 114305
loss: 1.1501712799072266,grad_norm: 0.9999999319529128, iteration: 114306
loss: 1.4561502933502197,grad_norm: 0.999999937950467, iteration: 114307
loss: 1.0907301902770996,grad_norm: 0.9999999517811349, iteration: 114308
loss: 1.137566328048706,grad_norm: 0.9999992169105953, iteration: 114309
loss: 1.0406761169433594,grad_norm: 0.9855989161887556, iteration: 114310
loss: 1.2073884010314941,grad_norm: 0.9999996943122411, iteration: 114311
loss: 1.2745001316070557,grad_norm: 0.9999997708969443, iteration: 114312
loss: 1.0390243530273438,grad_norm: 0.9999998914546825, iteration: 114313
loss: 1.196742057800293,grad_norm: 0.999999556646701, iteration: 114314
loss: 0.9974181652069092,grad_norm: 0.9165667750265357, iteration: 114315
loss: 0.9886278510093689,grad_norm: 0.9912095734175368, iteration: 114316
loss: 0.9646731615066528,grad_norm: 0.8421251809940142, iteration: 114317
loss: 1.0162134170532227,grad_norm: 0.9999992271784435, iteration: 114318
loss: 1.0522876977920532,grad_norm: 0.9999992703194631, iteration: 114319
loss: 1.0042190551757812,grad_norm: 0.8405764410808819, iteration: 114320
loss: 1.2575064897537231,grad_norm: 0.9999997451467599, iteration: 114321
loss: 1.1777105331420898,grad_norm: 1.0000000070306825, iteration: 114322
loss: 0.9958406686782837,grad_norm: 0.9999998183698408, iteration: 114323
loss: 1.4306925535202026,grad_norm: 0.9999997701399549, iteration: 114324
loss: 1.252519965171814,grad_norm: 0.9999997485356554, iteration: 114325
loss: 1.0860927104949951,grad_norm: 0.9999992154314724, iteration: 114326
loss: 1.0123834609985352,grad_norm: 0.999999086665397, iteration: 114327
loss: 1.140244960784912,grad_norm: 0.9999999437654282, iteration: 114328
loss: 1.0761284828186035,grad_norm: 0.9999992979490959, iteration: 114329
loss: 1.0244637727737427,grad_norm: 0.9999998006333507, iteration: 114330
loss: 0.9762160778045654,grad_norm: 0.9999993905856462, iteration: 114331
loss: 1.1172447204589844,grad_norm: 0.9999992250922958, iteration: 114332
loss: 1.2536228895187378,grad_norm: 0.9999998498671652, iteration: 114333
loss: 1.1529748439788818,grad_norm: 0.9999999097674758, iteration: 114334
loss: 1.3259031772613525,grad_norm: 0.999999926370604, iteration: 114335
loss: 1.0697815418243408,grad_norm: 0.9999992558411749, iteration: 114336
loss: 1.218706488609314,grad_norm: 0.9999998976610177, iteration: 114337
loss: 1.0618271827697754,grad_norm: 0.9999993874907056, iteration: 114338
loss: 0.9867263436317444,grad_norm: 0.999999076392564, iteration: 114339
loss: 1.162380576133728,grad_norm: 0.9999998641992588, iteration: 114340
loss: 1.180288314819336,grad_norm: 0.9999991730839383, iteration: 114341
loss: 1.1872154474258423,grad_norm: 0.9999999281324268, iteration: 114342
loss: 1.2115179300308228,grad_norm: 0.9999996607490537, iteration: 114343
loss: 1.03778874874115,grad_norm: 0.9999992772518663, iteration: 114344
loss: 0.9773263335227966,grad_norm: 0.923077253862006, iteration: 114345
loss: 1.0989742279052734,grad_norm: 0.9999998831809586, iteration: 114346
loss: 1.0525189638137817,grad_norm: 0.9999992527129598, iteration: 114347
loss: 1.1863172054290771,grad_norm: 0.9999994073579593, iteration: 114348
loss: 1.2086787223815918,grad_norm: 0.999999821205094, iteration: 114349
loss: 0.9949473738670349,grad_norm: 0.9999999584047524, iteration: 114350
loss: 1.210028886795044,grad_norm: 0.9999997829039587, iteration: 114351
loss: 1.1947696208953857,grad_norm: 0.9999997661453364, iteration: 114352
loss: 1.2099387645721436,grad_norm: 0.9999999333121746, iteration: 114353
loss: 1.007012128829956,grad_norm: 0.9999990796910927, iteration: 114354
loss: 0.9742946028709412,grad_norm: 0.934069474874484, iteration: 114355
loss: 1.1932213306427002,grad_norm: 0.9999994866008848, iteration: 114356
loss: 1.1410762071609497,grad_norm: 0.9999999014196945, iteration: 114357
loss: 1.0672993659973145,grad_norm: 0.9999997711534845, iteration: 114358
loss: 1.1951005458831787,grad_norm: 0.9999994069074423, iteration: 114359
loss: 1.3944878578186035,grad_norm: 0.999999716667935, iteration: 114360
loss: 1.2807894945144653,grad_norm: 0.999999841800498, iteration: 114361
loss: 1.225380778312683,grad_norm: 0.999999701604153, iteration: 114362
loss: 1.404870867729187,grad_norm: 0.9999999467551645, iteration: 114363
loss: 1.2074517011642456,grad_norm: 0.9999995196849223, iteration: 114364
loss: 1.1926053762435913,grad_norm: 0.9999999822297291, iteration: 114365
loss: 1.2187538146972656,grad_norm: 1.00000010313943, iteration: 114366
loss: 1.2468984127044678,grad_norm: 0.9999998821365901, iteration: 114367
loss: 1.2286826372146606,grad_norm: 0.9999998126745707, iteration: 114368
loss: 1.2439076900482178,grad_norm: 0.9999999463688912, iteration: 114369
loss: 1.2632147073745728,grad_norm: 0.9999995775984185, iteration: 114370
loss: 1.1854740381240845,grad_norm: 0.9999999152461229, iteration: 114371
loss: 1.1992019414901733,grad_norm: 0.9999999013038462, iteration: 114372
loss: 1.2501025199890137,grad_norm: 0.9999994408115814, iteration: 114373
loss: 1.3552531003952026,grad_norm: 0.9999999530783931, iteration: 114374
loss: 1.0719431638717651,grad_norm: 0.9999999212355908, iteration: 114375
loss: 1.0848805904388428,grad_norm: 0.9999997281331615, iteration: 114376
loss: 1.6088365316390991,grad_norm: 0.9999998785525771, iteration: 114377
loss: 1.055649995803833,grad_norm: 0.9999997314362346, iteration: 114378
loss: 0.9860623478889465,grad_norm: 0.9999990292056397, iteration: 114379
loss: 1.0592471361160278,grad_norm: 0.9999991232807833, iteration: 114380
loss: 1.1393625736236572,grad_norm: 0.999999696256576, iteration: 114381
loss: 1.0266590118408203,grad_norm: 0.9893264188151097, iteration: 114382
loss: 1.0802663564682007,grad_norm: 0.9999991365276297, iteration: 114383
loss: 1.1656700372695923,grad_norm: 0.9999998238245956, iteration: 114384
loss: 1.0268436670303345,grad_norm: 0.9999991560858488, iteration: 114385
loss: 1.0345914363861084,grad_norm: 0.9999990750676033, iteration: 114386
loss: 0.9768763184547424,grad_norm: 0.8560787084044942, iteration: 114387
loss: 1.0025097131729126,grad_norm: 0.9999991455612209, iteration: 114388
loss: 1.1058841943740845,grad_norm: 0.9999998840112908, iteration: 114389
loss: 1.1927807331085205,grad_norm: 0.9999998856308742, iteration: 114390
loss: 1.2410452365875244,grad_norm: 0.9999997206850453, iteration: 114391
loss: 1.0751951932907104,grad_norm: 0.9999998253652866, iteration: 114392
loss: 1.1177117824554443,grad_norm: 0.9999997916838437, iteration: 114393
loss: 1.0296281576156616,grad_norm: 0.9999994475749859, iteration: 114394
loss: 1.090384840965271,grad_norm: 0.9999992590837999, iteration: 114395
loss: 1.0193358659744263,grad_norm: 0.9999994312667071, iteration: 114396
loss: 1.2445589303970337,grad_norm: 0.9999994649554589, iteration: 114397
loss: 1.067780613899231,grad_norm: 0.9955222688025438, iteration: 114398
loss: 1.1049309968948364,grad_norm: 0.9999992614067077, iteration: 114399
loss: 1.0930713415145874,grad_norm: 0.9999993700170559, iteration: 114400
loss: 1.2621768712997437,grad_norm: 0.9999993628385149, iteration: 114401
loss: 1.3051695823669434,grad_norm: 0.99999955127448, iteration: 114402
loss: 1.0065650939941406,grad_norm: 0.9999995700104329, iteration: 114403
loss: 1.0845472812652588,grad_norm: 0.9999995538959328, iteration: 114404
loss: 1.1805849075317383,grad_norm: 0.9999997551715293, iteration: 114405
loss: 1.3097470998764038,grad_norm: 0.9999997885366032, iteration: 114406
loss: 1.0670169591903687,grad_norm: 0.9459943999804229, iteration: 114407
loss: 1.2006241083145142,grad_norm: 0.9999999397716133, iteration: 114408
loss: 1.0739210844039917,grad_norm: 0.9999990810022285, iteration: 114409
loss: 1.0575560331344604,grad_norm: 0.8271521419460026, iteration: 114410
loss: 1.1027835607528687,grad_norm: 0.9999995715494928, iteration: 114411
loss: 1.078281283378601,grad_norm: 0.9999992331485033, iteration: 114412
loss: 1.2514692544937134,grad_norm: 0.999999626337701, iteration: 114413
loss: 1.054418683052063,grad_norm: 0.9999997871416662, iteration: 114414
loss: 1.0555355548858643,grad_norm: 0.9999993336232014, iteration: 114415
loss: 1.2091567516326904,grad_norm: 0.999999868739348, iteration: 114416
loss: 1.0198185443878174,grad_norm: 0.8794194436605502, iteration: 114417
loss: 0.9866812229156494,grad_norm: 0.9999990437553453, iteration: 114418
loss: 0.9834852814674377,grad_norm: 0.9999991106565728, iteration: 114419
loss: 1.0361794233322144,grad_norm: 0.9999995691530866, iteration: 114420
loss: 1.2838923931121826,grad_norm: 0.9999999273519268, iteration: 114421
loss: 1.1245274543762207,grad_norm: 0.9999998800886983, iteration: 114422
loss: 1.0210084915161133,grad_norm: 0.9108455192537068, iteration: 114423
loss: 1.0031005144119263,grad_norm: 0.9999998292679861, iteration: 114424
loss: 1.057557463645935,grad_norm: 0.9999991310281907, iteration: 114425
loss: 1.0405811071395874,grad_norm: 0.9554583472804387, iteration: 114426
loss: 1.0257368087768555,grad_norm: 0.9071010642451559, iteration: 114427
loss: 1.1884751319885254,grad_norm: 0.9999999371474763, iteration: 114428
loss: 0.9974956512451172,grad_norm: 0.884827355364152, iteration: 114429
loss: 0.9624972343444824,grad_norm: 0.9293327115790943, iteration: 114430
loss: 1.1708935499191284,grad_norm: 0.9999994757763284, iteration: 114431
loss: 1.3244798183441162,grad_norm: 0.999999685688339, iteration: 114432
loss: 1.018917202949524,grad_norm: 0.9999993020045433, iteration: 114433
loss: 1.163105845451355,grad_norm: 0.999999831512401, iteration: 114434
loss: 0.994174599647522,grad_norm: 0.9125553208520752, iteration: 114435
loss: 1.0753939151763916,grad_norm: 1.0000000080464813, iteration: 114436
loss: 1.0537923574447632,grad_norm: 0.9999998566713989, iteration: 114437
loss: 1.0008790493011475,grad_norm: 0.9999992452524294, iteration: 114438
loss: 1.0145143270492554,grad_norm: 0.8770493168642665, iteration: 114439
loss: 1.1519584655761719,grad_norm: 0.9999994422739943, iteration: 114440
loss: 1.0257710218429565,grad_norm: 0.9999996057395422, iteration: 114441
loss: 1.1511664390563965,grad_norm: 0.9999999098023978, iteration: 114442
loss: 1.0570229291915894,grad_norm: 0.9999997770416058, iteration: 114443
loss: 0.9912533164024353,grad_norm: 0.8062328227386747, iteration: 114444
loss: 1.0981541872024536,grad_norm: 0.9999996463258655, iteration: 114445
loss: 1.0173832178115845,grad_norm: 0.9371302810694285, iteration: 114446
loss: 1.1924240589141846,grad_norm: 0.9999999411648922, iteration: 114447
loss: 0.9982897043228149,grad_norm: 0.999999243811947, iteration: 114448
loss: 0.9850565791130066,grad_norm: 0.9999992990437389, iteration: 114449
loss: 1.1463661193847656,grad_norm: 0.9999998974919534, iteration: 114450
loss: 1.02606999874115,grad_norm: 0.9999997221842118, iteration: 114451
loss: 1.0366853475570679,grad_norm: 0.7777659045157419, iteration: 114452
loss: 1.0834712982177734,grad_norm: 0.9999996319451219, iteration: 114453
loss: 1.0539768934249878,grad_norm: 0.9999996195420485, iteration: 114454
loss: 1.0583170652389526,grad_norm: 0.9999994329014902, iteration: 114455
loss: 1.0237293243408203,grad_norm: 0.9999990544135734, iteration: 114456
loss: 1.0052446126937866,grad_norm: 0.9451745971787522, iteration: 114457
loss: 1.0157119035720825,grad_norm: 0.9999995208047959, iteration: 114458
loss: 1.3804309368133545,grad_norm: 0.9999998083956908, iteration: 114459
loss: 0.9889651536941528,grad_norm: 0.9999994648498783, iteration: 114460
loss: 1.0536214113235474,grad_norm: 0.9999991007184474, iteration: 114461
loss: 1.0192197561264038,grad_norm: 0.8436013579024312, iteration: 114462
loss: 1.1626988649368286,grad_norm: 0.9999995807821382, iteration: 114463
loss: 0.9969384074211121,grad_norm: 0.999999715138496, iteration: 114464
loss: 1.059125304222107,grad_norm: 0.9999996253538882, iteration: 114465
loss: 1.106788158416748,grad_norm: 0.9999991434455066, iteration: 114466
loss: 1.0124620199203491,grad_norm: 0.8129917418262493, iteration: 114467
loss: 0.9904005527496338,grad_norm: 0.9999997710052599, iteration: 114468
loss: 1.0482953786849976,grad_norm: 0.9999991100535585, iteration: 114469
loss: 1.104959487915039,grad_norm: 0.999999304877413, iteration: 114470
loss: 1.2450045347213745,grad_norm: 0.9999998625165876, iteration: 114471
loss: 1.182021141052246,grad_norm: 1.0000000152809012, iteration: 114472
loss: 1.1207720041275024,grad_norm: 0.9999998871305888, iteration: 114473
loss: 1.004684329032898,grad_norm: 0.9764883526893019, iteration: 114474
loss: 1.046743392944336,grad_norm: 0.9999994616037013, iteration: 114475
loss: 1.0575668811798096,grad_norm: 0.948890410247389, iteration: 114476
loss: 1.1459887027740479,grad_norm: 0.9999997621524074, iteration: 114477
loss: 0.9958338141441345,grad_norm: 0.9999989924723656, iteration: 114478
loss: 1.033784031867981,grad_norm: 0.9436015907022305, iteration: 114479
loss: 1.2495938539505005,grad_norm: 0.999999936388633, iteration: 114480
loss: 1.0070645809173584,grad_norm: 0.9999990902016851, iteration: 114481
loss: 1.0726070404052734,grad_norm: 0.9568070876528662, iteration: 114482
loss: 1.0086365938186646,grad_norm: 0.8274985939326676, iteration: 114483
loss: 1.0256577730178833,grad_norm: 0.9999993932676475, iteration: 114484
loss: 1.0556167364120483,grad_norm: 0.9999996683209139, iteration: 114485
loss: 1.085910677909851,grad_norm: 0.9999997278375815, iteration: 114486
loss: 0.987501323223114,grad_norm: 0.9999995842537357, iteration: 114487
loss: 1.0139057636260986,grad_norm: 0.999999299993174, iteration: 114488
loss: 1.0341686010360718,grad_norm: 0.9999996140162956, iteration: 114489
loss: 1.0458778142929077,grad_norm: 0.999999838617464, iteration: 114490
loss: 0.9633111357688904,grad_norm: 0.9999991834979699, iteration: 114491
loss: 1.0238374471664429,grad_norm: 0.709865641430964, iteration: 114492
loss: 1.0250624418258667,grad_norm: 0.8825948376116465, iteration: 114493
loss: 1.0452641248703003,grad_norm: 0.8510941929385885, iteration: 114494
loss: 1.0082343816757202,grad_norm: 0.9722931632353353, iteration: 114495
loss: 1.317649483680725,grad_norm: 0.999999287279058, iteration: 114496
loss: 1.2652127742767334,grad_norm: 0.9999992508109579, iteration: 114497
loss: 1.0975979566574097,grad_norm: 0.9999993093964766, iteration: 114498
loss: 1.154396891593933,grad_norm: 0.9999998486137992, iteration: 114499
loss: 1.5079501867294312,grad_norm: 0.9999999113599626, iteration: 114500
loss: 1.6125433444976807,grad_norm: 0.9999999878028673, iteration: 114501
loss: 1.0165513753890991,grad_norm: 0.9999991146234883, iteration: 114502
loss: 0.9776334166526794,grad_norm: 1.0000000210165945, iteration: 114503
loss: 1.0622351169586182,grad_norm: 0.901357957471808, iteration: 114504
loss: 1.1747628450393677,grad_norm: 0.9999998591334854, iteration: 114505
loss: 1.179323673248291,grad_norm: 0.9999999427161305, iteration: 114506
loss: 1.1159579753875732,grad_norm: 0.9999997977590072, iteration: 114507
loss: 1.178229808807373,grad_norm: 1.0000000277917043, iteration: 114508
loss: 1.1614314317703247,grad_norm: 0.99999973864984, iteration: 114509
loss: 1.0707353353500366,grad_norm: 0.999999327151469, iteration: 114510
loss: 1.1377393007278442,grad_norm: 0.9999996960567538, iteration: 114511
loss: 1.0173149108886719,grad_norm: 0.9999991766605614, iteration: 114512
loss: 1.1715025901794434,grad_norm: 0.9999999317670188, iteration: 114513
loss: 1.1626601219177246,grad_norm: 0.9999993969051187, iteration: 114514
loss: 1.1715084314346313,grad_norm: 0.9999998448305467, iteration: 114515
loss: 0.9943713545799255,grad_norm: 0.9999992487360956, iteration: 114516
loss: 1.0024759769439697,grad_norm: 0.9999990874590774, iteration: 114517
loss: 1.1250375509262085,grad_norm: 0.9999996113304126, iteration: 114518
loss: 1.154458999633789,grad_norm: 0.9999993438971995, iteration: 114519
loss: 1.1717875003814697,grad_norm: 0.9999998416650635, iteration: 114520
loss: 1.2692389488220215,grad_norm: 1.0000000647355243, iteration: 114521
loss: 1.1648149490356445,grad_norm: 0.9999999906759662, iteration: 114522
loss: 0.9865143299102783,grad_norm: 0.9999990982930917, iteration: 114523
loss: 1.011294960975647,grad_norm: 0.9999999270903428, iteration: 114524
loss: 1.070752501487732,grad_norm: 0.9511046470375952, iteration: 114525
loss: 1.0363874435424805,grad_norm: 0.9618639625755429, iteration: 114526
loss: 1.1269739866256714,grad_norm: 0.9999993754445733, iteration: 114527
loss: 1.0728250741958618,grad_norm: 0.9999998603380951, iteration: 114528
loss: 1.1038739681243896,grad_norm: 0.9999999104383545, iteration: 114529
loss: 1.0873600244522095,grad_norm: 0.9999997255378038, iteration: 114530
loss: 1.0044032335281372,grad_norm: 0.9999991390662545, iteration: 114531
loss: 1.18245267868042,grad_norm: 0.9999994792499136, iteration: 114532
loss: 1.1201841831207275,grad_norm: 0.9999998202223076, iteration: 114533
loss: 1.0203776359558105,grad_norm: 0.999999954590822, iteration: 114534
loss: 1.0804020166397095,grad_norm: 0.9999990381433648, iteration: 114535
loss: 1.099395751953125,grad_norm: 0.9999997152017291, iteration: 114536
loss: 1.052330732345581,grad_norm: 0.9783435808656603, iteration: 114537
loss: 1.1015503406524658,grad_norm: 0.999999581403627, iteration: 114538
loss: 1.0845677852630615,grad_norm: 0.9999999508094578, iteration: 114539
loss: 1.139765977859497,grad_norm: 0.9999999271964153, iteration: 114540
loss: 1.127734661102295,grad_norm: 0.9999992580742536, iteration: 114541
loss: 1.0848532915115356,grad_norm: 0.9999996887825958, iteration: 114542
loss: 1.2140249013900757,grad_norm: 0.999999375327315, iteration: 114543
loss: 1.1194673776626587,grad_norm: 0.9999996646453672, iteration: 114544
loss: 1.1164665222167969,grad_norm: 0.9999999093388354, iteration: 114545
loss: 1.117317795753479,grad_norm: 0.999999761487965, iteration: 114546
loss: 1.2092435359954834,grad_norm: 0.9999999069964424, iteration: 114547
loss: 1.124285101890564,grad_norm: 1.0000000012346588, iteration: 114548
loss: 1.0835176706314087,grad_norm: 1.0000000231534472, iteration: 114549
loss: 1.164223074913025,grad_norm: 0.9999998890029179, iteration: 114550
loss: 1.2068979740142822,grad_norm: 0.9999998872143435, iteration: 114551
loss: 1.1049339771270752,grad_norm: 0.9999996799238418, iteration: 114552
loss: 1.2128748893737793,grad_norm: 0.9999992941940962, iteration: 114553
loss: 1.0898123979568481,grad_norm: 0.999999890262008, iteration: 114554
loss: 1.0217933654785156,grad_norm: 0.9999991546695249, iteration: 114555
loss: 1.2066665887832642,grad_norm: 0.9999996760954701, iteration: 114556
loss: 1.0843592882156372,grad_norm: 0.9999997628499845, iteration: 114557
loss: 1.1348564624786377,grad_norm: 0.9999994213909623, iteration: 114558
loss: 1.1013418436050415,grad_norm: 0.999999546135937, iteration: 114559
loss: 1.4847981929779053,grad_norm: 0.9999999369939306, iteration: 114560
loss: 1.0794758796691895,grad_norm: 0.9999999542191301, iteration: 114561
loss: 1.2036956548690796,grad_norm: 0.9999998646080893, iteration: 114562
loss: 1.0683164596557617,grad_norm: 0.9999993558474978, iteration: 114563
loss: 1.3532335758209229,grad_norm: 0.9999998336978242, iteration: 114564
loss: 1.1276476383209229,grad_norm: 0.9796456687942634, iteration: 114565
loss: 1.156355381011963,grad_norm: 0.999999605918337, iteration: 114566
loss: 1.1696264743804932,grad_norm: 0.9999996755105646, iteration: 114567
loss: 1.2819061279296875,grad_norm: 0.9999998454806228, iteration: 114568
loss: 1.2117420434951782,grad_norm: 0.999999886945329, iteration: 114569
loss: 1.1195428371429443,grad_norm: 0.999999470559296, iteration: 114570
loss: 1.1301385164260864,grad_norm: 0.9999993951128673, iteration: 114571
loss: 1.0434346199035645,grad_norm: 0.9999995161022509, iteration: 114572
loss: 1.1895740032196045,grad_norm: 0.9999996854411844, iteration: 114573
loss: 1.2877180576324463,grad_norm: 0.9999994293132707, iteration: 114574
loss: 1.0909925699234009,grad_norm: 0.9999995028053761, iteration: 114575
loss: 1.4539703130722046,grad_norm: 0.9999999460390154, iteration: 114576
loss: 1.0179200172424316,grad_norm: 0.9999993136013584, iteration: 114577
loss: 1.0269566774368286,grad_norm: 0.9999991809876123, iteration: 114578
loss: 1.3698395490646362,grad_norm: 0.9999996949217446, iteration: 114579
loss: 1.0971006155014038,grad_norm: 0.9999998806191456, iteration: 114580
loss: 1.2165477275848389,grad_norm: 0.9999998444389564, iteration: 114581
loss: 1.0694605112075806,grad_norm: 0.9999995703989594, iteration: 114582
loss: 1.1601252555847168,grad_norm: 0.999999738129444, iteration: 114583
loss: 1.1199150085449219,grad_norm: 0.9999999264858125, iteration: 114584
loss: 1.3460019826889038,grad_norm: 0.9999996783417966, iteration: 114585
loss: 1.12123703956604,grad_norm: 0.9999997560357013, iteration: 114586
loss: 1.384159803390503,grad_norm: 0.9999998271862068, iteration: 114587
loss: 1.2198718786239624,grad_norm: 0.9999998922636286, iteration: 114588
loss: 1.1455533504486084,grad_norm: 0.9999992775631129, iteration: 114589
loss: 1.1188124418258667,grad_norm: 0.9999998622385475, iteration: 114590
loss: 1.3154879808425903,grad_norm: 0.9999996375787965, iteration: 114591
loss: 1.180162787437439,grad_norm: 0.9999993234854774, iteration: 114592
loss: 1.3462125062942505,grad_norm: 0.9999999204447242, iteration: 114593
loss: 1.1962732076644897,grad_norm: 0.9999998912422831, iteration: 114594
loss: 1.05308198928833,grad_norm: 0.9999990174583779, iteration: 114595
loss: 1.3605735301971436,grad_norm: 0.999999841735476, iteration: 114596
loss: 1.0702325105667114,grad_norm: 0.9999994294160179, iteration: 114597
loss: 1.1998591423034668,grad_norm: 0.9999996057623786, iteration: 114598
loss: 1.0750336647033691,grad_norm: 0.9999996095978609, iteration: 114599
loss: 1.4819265604019165,grad_norm: 0.9999997700652852, iteration: 114600
loss: 1.3964964151382446,grad_norm: 0.9999997928156469, iteration: 114601
loss: 1.3558968305587769,grad_norm: 0.9999997743270823, iteration: 114602
loss: 1.2678523063659668,grad_norm: 0.9999997821194728, iteration: 114603
loss: 1.3447984457015991,grad_norm: 0.9999994420667114, iteration: 114604
loss: 1.0651437044143677,grad_norm: 0.9999994086264845, iteration: 114605
loss: 1.1512912511825562,grad_norm: 0.9999999598905197, iteration: 114606
loss: 1.0808030366897583,grad_norm: 0.9999995439445151, iteration: 114607
loss: 1.2188266515731812,grad_norm: 0.9999999871055243, iteration: 114608
loss: 1.1819432973861694,grad_norm: 0.9999998857510441, iteration: 114609
loss: 0.9833047986030579,grad_norm: 0.9864811679611643, iteration: 114610
loss: 1.101923942565918,grad_norm: 0.9999995125421368, iteration: 114611
loss: 1.0948562622070312,grad_norm: 0.9999990208528915, iteration: 114612
loss: 1.1742805242538452,grad_norm: 0.9999994897621073, iteration: 114613
loss: 1.189668893814087,grad_norm: 0.9999999430923731, iteration: 114614
loss: 1.2327152490615845,grad_norm: 0.9999998026537916, iteration: 114615
loss: 1.15873122215271,grad_norm: 0.9999994949232375, iteration: 114616
loss: 1.1601542234420776,grad_norm: 0.9999998179517411, iteration: 114617
loss: 1.075377345085144,grad_norm: 0.999999362361087, iteration: 114618
loss: 1.0954543352127075,grad_norm: 0.999999834083317, iteration: 114619
loss: 1.1298245191574097,grad_norm: 0.9999995886617848, iteration: 114620
loss: 1.442282795906067,grad_norm: 0.9999997802308941, iteration: 114621
loss: 1.1047892570495605,grad_norm: 0.9999998995713077, iteration: 114622
loss: 1.0774375200271606,grad_norm: 0.9999997219625333, iteration: 114623
loss: 1.326136827468872,grad_norm: 0.999999902677584, iteration: 114624
loss: 1.1646031141281128,grad_norm: 0.9999996325028743, iteration: 114625
loss: 1.258624792098999,grad_norm: 0.9999999064790985, iteration: 114626
loss: 1.1257933378219604,grad_norm: 0.9999997027188173, iteration: 114627
loss: 1.188124179840088,grad_norm: 0.9999993048141218, iteration: 114628
loss: 1.0943163633346558,grad_norm: 0.9999997245437475, iteration: 114629
loss: 1.2779234647750854,grad_norm: 0.9999996259025319, iteration: 114630
loss: 1.0682132244110107,grad_norm: 0.9999995923947523, iteration: 114631
loss: 1.3920722007751465,grad_norm: 0.9999994649509037, iteration: 114632
loss: 1.2421700954437256,grad_norm: 0.9999999908415251, iteration: 114633
loss: 1.1965769529342651,grad_norm: 0.9999998314842282, iteration: 114634
loss: 1.0659046173095703,grad_norm: 0.9999998071855727, iteration: 114635
loss: 1.316300392150879,grad_norm: 0.9999998938207443, iteration: 114636
loss: 1.1462866067886353,grad_norm: 0.9999992500310173, iteration: 114637
loss: 1.0744036436080933,grad_norm: 0.9999998292935421, iteration: 114638
loss: 1.3945834636688232,grad_norm: 0.999999911492994, iteration: 114639
loss: 1.374914288520813,grad_norm: 0.9999997118526418, iteration: 114640
loss: 1.1466025114059448,grad_norm: 0.9999997912358978, iteration: 114641
loss: 1.1259347200393677,grad_norm: 0.9999998186949511, iteration: 114642
loss: 1.2872059345245361,grad_norm: 0.9999998420874687, iteration: 114643
loss: 1.1993203163146973,grad_norm: 0.9999998765544655, iteration: 114644
loss: 1.4998756647109985,grad_norm: 1.0000000460344174, iteration: 114645
loss: 1.4294509887695312,grad_norm: 1.0000000436282297, iteration: 114646
loss: 1.4867675304412842,grad_norm: 0.9999997005318582, iteration: 114647
loss: 1.139528751373291,grad_norm: 0.999999813079595, iteration: 114648
loss: 1.3853540420532227,grad_norm: 0.9999999587055379, iteration: 114649
loss: 1.1491302251815796,grad_norm: 0.999999756453653, iteration: 114650
loss: 1.0988132953643799,grad_norm: 0.9999992557491579, iteration: 114651
loss: 1.1099735498428345,grad_norm: 0.9999994959718892, iteration: 114652
loss: 1.2667442560195923,grad_norm: 0.9999997835138154, iteration: 114653
loss: 1.2933167219161987,grad_norm: 0.9999998707092639, iteration: 114654
loss: 1.0552836656570435,grad_norm: 0.9999994895414445, iteration: 114655
loss: 1.3125625848770142,grad_norm: 0.9999998052313981, iteration: 114656
loss: 1.0753613710403442,grad_norm: 0.999999165268203, iteration: 114657
loss: 1.1185225248336792,grad_norm: 0.9999998333107836, iteration: 114658
loss: 1.0770155191421509,grad_norm: 1.0000000390173944, iteration: 114659
loss: 1.1108756065368652,grad_norm: 0.9999992131293892, iteration: 114660
loss: 1.4503769874572754,grad_norm: 0.9999998971499922, iteration: 114661
loss: 1.2017775774002075,grad_norm: 0.9999999066083103, iteration: 114662
loss: 1.1593517065048218,grad_norm: 1.0000000333738825, iteration: 114663
loss: 1.2630342245101929,grad_norm: 0.9999999168430177, iteration: 114664
loss: 1.1766008138656616,grad_norm: 0.9999999197342351, iteration: 114665
loss: 1.424343228340149,grad_norm: 0.999999848432037, iteration: 114666
loss: 1.1923574209213257,grad_norm: 0.99999984542458, iteration: 114667
loss: 1.174045443534851,grad_norm: 0.999999723447046, iteration: 114668
loss: 1.332929015159607,grad_norm: 0.9999998637736489, iteration: 114669
loss: 1.2452460527420044,grad_norm: 0.999999860877939, iteration: 114670
loss: 1.4000329971313477,grad_norm: 0.9999997969353719, iteration: 114671
loss: 1.3080735206604004,grad_norm: 1.0000001373122474, iteration: 114672
loss: 1.579599142074585,grad_norm: 0.9999996375790348, iteration: 114673
loss: 1.410416841506958,grad_norm: 1.0000000486929546, iteration: 114674
loss: 1.528304934501648,grad_norm: 0.9999996940718353, iteration: 114675
loss: 1.4451829195022583,grad_norm: 0.9999999645492231, iteration: 114676
loss: 1.3517714738845825,grad_norm: 0.9999999251193074, iteration: 114677
loss: 1.3146730661392212,grad_norm: 0.9999999284326321, iteration: 114678
loss: 1.2548221349716187,grad_norm: 0.9999999493123253, iteration: 114679
loss: 1.3178874254226685,grad_norm: 0.9999998453176876, iteration: 114680
loss: 1.2183908224105835,grad_norm: 0.9999999512073665, iteration: 114681
loss: 1.4373514652252197,grad_norm: 0.9999996446574717, iteration: 114682
loss: 1.5155097246170044,grad_norm: 0.9999999231382168, iteration: 114683
loss: 1.1953145265579224,grad_norm: 0.9999994409648315, iteration: 114684
loss: 1.2836551666259766,grad_norm: 0.9999996831990673, iteration: 114685
loss: 1.2706815004348755,grad_norm: 0.9999995782482705, iteration: 114686
loss: 1.3342430591583252,grad_norm: 0.99999995413358, iteration: 114687
loss: 1.297079086303711,grad_norm: 0.9999999358599053, iteration: 114688
loss: 1.3650541305541992,grad_norm: 0.9999999438579892, iteration: 114689
loss: 1.203697919845581,grad_norm: 0.9999995894613011, iteration: 114690
loss: 1.242821216583252,grad_norm: 0.9999997384377229, iteration: 114691
loss: 1.338120937347412,grad_norm: 0.9999993416234898, iteration: 114692
loss: 1.2984728813171387,grad_norm: 0.9999999083794489, iteration: 114693
loss: 1.287358283996582,grad_norm: 0.9999999480500488, iteration: 114694
loss: 1.1892050504684448,grad_norm: 0.9999999327994354, iteration: 114695
loss: 1.1339555978775024,grad_norm: 0.9999994153032092, iteration: 114696
loss: 1.161509394645691,grad_norm: 0.9999998604277576, iteration: 114697
loss: 1.206575870513916,grad_norm: 0.9999996645163483, iteration: 114698
loss: 1.21441650390625,grad_norm: 0.9999993925453436, iteration: 114699
loss: 1.0636614561080933,grad_norm: 0.9999996833230612, iteration: 114700
loss: 1.081595540046692,grad_norm: 0.999999415913634, iteration: 114701
loss: 1.3463622331619263,grad_norm: 0.9999997994358751, iteration: 114702
loss: 1.3080333471298218,grad_norm: 0.9999994409090706, iteration: 114703
loss: 1.2950749397277832,grad_norm: 0.9999999126960021, iteration: 114704
loss: 1.3389588594436646,grad_norm: 0.9999998380798696, iteration: 114705
loss: 1.0930298566818237,grad_norm: 0.9999998577250733, iteration: 114706
loss: 1.18930184841156,grad_norm: 0.9999999219014087, iteration: 114707
loss: 1.2024868726730347,grad_norm: 0.9999999613923782, iteration: 114708
loss: 1.2058409452438354,grad_norm: 0.9999997281330361, iteration: 114709
loss: 1.174763798713684,grad_norm: 0.999999310280888, iteration: 114710
loss: 1.2068945169448853,grad_norm: 0.9999999560523031, iteration: 114711
loss: 1.1917301416397095,grad_norm: 0.9999994151470724, iteration: 114712
loss: 1.1285606622695923,grad_norm: 0.9999998227320607, iteration: 114713
loss: 1.2283003330230713,grad_norm: 0.9999998034467067, iteration: 114714
loss: 1.2990171909332275,grad_norm: 0.9999997495740462, iteration: 114715
loss: 1.20612633228302,grad_norm: 0.9999997241584337, iteration: 114716
loss: 1.191522479057312,grad_norm: 0.9999996076163279, iteration: 114717
loss: 1.2452340126037598,grad_norm: 0.9999996212126304, iteration: 114718
loss: 1.4607006311416626,grad_norm: 0.9999999535515874, iteration: 114719
loss: 1.1912684440612793,grad_norm: 0.9999994530641182, iteration: 114720
loss: 1.2881815433502197,grad_norm: 0.9999998116824225, iteration: 114721
loss: 1.1810181140899658,grad_norm: 0.9999993061245004, iteration: 114722
loss: 1.279205083847046,grad_norm: 1.0000000159842424, iteration: 114723
loss: 1.3515615463256836,grad_norm: 0.9999995970318429, iteration: 114724
loss: 1.4550000429153442,grad_norm: 0.9999998598618198, iteration: 114725
loss: 1.1348119974136353,grad_norm: 0.9999998780531096, iteration: 114726
loss: 1.1428282260894775,grad_norm: 0.9999996469319943, iteration: 114727
loss: 1.184959888458252,grad_norm: 0.9999995911902552, iteration: 114728
loss: 1.174802541732788,grad_norm: 0.9999996162852677, iteration: 114729
loss: 1.3213306665420532,grad_norm: 0.99999990689015, iteration: 114730
loss: 1.3853425979614258,grad_norm: 1.0000000565050773, iteration: 114731
loss: 1.1608186960220337,grad_norm: 0.9999993558327696, iteration: 114732
loss: 1.2847325801849365,grad_norm: 0.9999996976347595, iteration: 114733
loss: 1.2436366081237793,grad_norm: 0.9999992843009864, iteration: 114734
loss: 1.318493127822876,grad_norm: 0.9999998706680724, iteration: 114735
loss: 1.2366204261779785,grad_norm: 0.9999998715522412, iteration: 114736
loss: 1.2610586881637573,grad_norm: 0.9999999027279798, iteration: 114737
loss: 1.142996907234192,grad_norm: 0.9999999523167387, iteration: 114738
loss: 1.1535263061523438,grad_norm: 0.9999998298800716, iteration: 114739
loss: 1.1275458335876465,grad_norm: 0.9999999131185658, iteration: 114740
loss: 1.1107863187789917,grad_norm: 0.9999998761724708, iteration: 114741
loss: 1.103468894958496,grad_norm: 0.9999998241582199, iteration: 114742
loss: 1.23408043384552,grad_norm: 0.9999996665588656, iteration: 114743
loss: 1.1997658014297485,grad_norm: 0.9999996141002728, iteration: 114744
loss: 1.093373417854309,grad_norm: 0.9999997107661034, iteration: 114745
loss: 1.4538451433181763,grad_norm: 0.9999999163629916, iteration: 114746
loss: 1.2851523160934448,grad_norm: 0.9999999883225748, iteration: 114747
loss: 1.1196444034576416,grad_norm: 0.999999632501141, iteration: 114748
loss: 1.198997139930725,grad_norm: 0.9999993320658738, iteration: 114749
loss: 1.2832520008087158,grad_norm: 0.9999997788151659, iteration: 114750
loss: 1.1439266204833984,grad_norm: 0.9999993688239199, iteration: 114751
loss: 1.2204786539077759,grad_norm: 0.9999996094746081, iteration: 114752
loss: 1.168245553970337,grad_norm: 0.9999995346529592, iteration: 114753
loss: 1.1801059246063232,grad_norm: 0.9999999922623115, iteration: 114754
loss: 1.2309353351593018,grad_norm: 0.9999997843714008, iteration: 114755
loss: 1.3555151224136353,grad_norm: 1.0000000122887915, iteration: 114756
loss: 1.1160136461257935,grad_norm: 0.9999998974135483, iteration: 114757
loss: 1.3112130165100098,grad_norm: 0.9999999699555873, iteration: 114758
loss: 1.1237245798110962,grad_norm: 0.9999995436423221, iteration: 114759
loss: 1.168091893196106,grad_norm: 0.9999996717569305, iteration: 114760
loss: 1.309959053993225,grad_norm: 0.9999997906846672, iteration: 114761
loss: 1.125493049621582,grad_norm: 0.9999999323859583, iteration: 114762
loss: 1.3256419897079468,grad_norm: 0.9999997154756918, iteration: 114763
loss: 1.4815441370010376,grad_norm: 0.9999999016359139, iteration: 114764
loss: 1.1208257675170898,grad_norm: 0.9999997583378499, iteration: 114765
loss: 1.097493052482605,grad_norm: 0.9999998066982196, iteration: 114766
loss: 1.1832584142684937,grad_norm: 0.9999996146332759, iteration: 114767
loss: 1.3858555555343628,grad_norm: 1.000000079783545, iteration: 114768
loss: 1.0472999811172485,grad_norm: 0.9999999759525287, iteration: 114769
loss: 1.0537619590759277,grad_norm: 0.9999992533617635, iteration: 114770
loss: 1.3756089210510254,grad_norm: 0.999999697966319, iteration: 114771
loss: 1.2492345571517944,grad_norm: 0.9999993587289369, iteration: 114772
loss: 1.1761549711227417,grad_norm: 0.9999998179884986, iteration: 114773
loss: 1.5293025970458984,grad_norm: 0.9999998679822153, iteration: 114774
loss: 1.386918544769287,grad_norm: 0.9999995449665741, iteration: 114775
loss: 1.1041251420974731,grad_norm: 0.9999994404958511, iteration: 114776
loss: 1.3284602165222168,grad_norm: 0.9999997343581928, iteration: 114777
loss: 1.185070276260376,grad_norm: 0.9999997532205024, iteration: 114778
loss: 1.1301606893539429,grad_norm: 0.9999998148630757, iteration: 114779
loss: 1.2378603219985962,grad_norm: 0.9999997439257176, iteration: 114780
loss: 1.1982372999191284,grad_norm: 0.9999992929840829, iteration: 114781
loss: 1.3371597528457642,grad_norm: 0.9999997654788172, iteration: 114782
loss: 1.1517695188522339,grad_norm: 0.9999999386301052, iteration: 114783
loss: 1.0927592515945435,grad_norm: 0.9999991536499844, iteration: 114784
loss: 1.0762690305709839,grad_norm: 0.999999887517669, iteration: 114785
loss: 1.2361135482788086,grad_norm: 0.9999995200872092, iteration: 114786
loss: 1.224628210067749,grad_norm: 0.999999792356592, iteration: 114787
loss: 1.191664695739746,grad_norm: 0.9999995965445879, iteration: 114788
loss: 1.0489492416381836,grad_norm: 0.9999991331558213, iteration: 114789
loss: 1.2417762279510498,grad_norm: 0.9999999514937753, iteration: 114790
loss: 1.0909383296966553,grad_norm: 0.9999997450010548, iteration: 114791
loss: 1.1271394491195679,grad_norm: 0.9999996443101629, iteration: 114792
loss: 1.0975494384765625,grad_norm: 0.9999990114031383, iteration: 114793
loss: 1.2183946371078491,grad_norm: 0.9999998234997098, iteration: 114794
loss: 1.24741792678833,grad_norm: 0.9999995009805117, iteration: 114795
loss: 1.1841049194335938,grad_norm: 0.9999995687871248, iteration: 114796
loss: 1.205583930015564,grad_norm: 0.9999997104719187, iteration: 114797
loss: 1.077774167060852,grad_norm: 0.9999991328823504, iteration: 114798
loss: 1.1506544351577759,grad_norm: 0.9999994924224448, iteration: 114799
loss: 1.230993390083313,grad_norm: 0.9999997806968001, iteration: 114800
loss: 1.2425181865692139,grad_norm: 0.999999632569898, iteration: 114801
loss: 1.097436547279358,grad_norm: 0.9999996848840222, iteration: 114802
loss: 1.1462236642837524,grad_norm: 0.9999996345618377, iteration: 114803
loss: 1.1408334970474243,grad_norm: 0.9999992404662515, iteration: 114804
loss: 1.3512144088745117,grad_norm: 0.9999998335143352, iteration: 114805
loss: 1.142309546470642,grad_norm: 0.9999998235703418, iteration: 114806
loss: 1.0745060443878174,grad_norm: 0.9999996959850453, iteration: 114807
loss: 1.0805824995040894,grad_norm: 0.9999991836466484, iteration: 114808
loss: 1.1558459997177124,grad_norm: 0.9999991983220127, iteration: 114809
loss: 1.2776508331298828,grad_norm: 1.000000033485291, iteration: 114810
loss: 1.3015382289886475,grad_norm: 0.999999652140433, iteration: 114811
loss: 1.1815905570983887,grad_norm: 0.9999995306374101, iteration: 114812
loss: 1.3290406465530396,grad_norm: 0.9999999518245362, iteration: 114813
loss: 1.2667393684387207,grad_norm: 0.9999999275208729, iteration: 114814
loss: 1.0582987070083618,grad_norm: 0.9999995637313076, iteration: 114815
loss: 1.111369252204895,grad_norm: 0.9999993415685752, iteration: 114816
loss: 1.2877144813537598,grad_norm: 0.9999999910950493, iteration: 114817
loss: 1.3242229223251343,grad_norm: 0.9999996657890688, iteration: 114818
loss: 1.2041795253753662,grad_norm: 0.9999995901972603, iteration: 114819
loss: 1.3286789655685425,grad_norm: 0.9999997489739331, iteration: 114820
loss: 1.2343294620513916,grad_norm: 0.9999997429284299, iteration: 114821
loss: 1.299544095993042,grad_norm: 0.9999999423391621, iteration: 114822
loss: 1.1770564317703247,grad_norm: 1.0000000235772943, iteration: 114823
loss: 1.141934871673584,grad_norm: 0.9999998486085113, iteration: 114824
loss: 1.0805431604385376,grad_norm: 0.9999995174864958, iteration: 114825
loss: 1.2714886665344238,grad_norm: 1.0000000262207163, iteration: 114826
loss: 1.1766388416290283,grad_norm: 0.9999993071980008, iteration: 114827
loss: 1.3281184434890747,grad_norm: 0.9999999305685534, iteration: 114828
loss: 1.5405997037887573,grad_norm: 1.0000000212952345, iteration: 114829
loss: 1.2591973543167114,grad_norm: 0.9999996343907667, iteration: 114830
loss: 1.15696382522583,grad_norm: 0.9999996516405548, iteration: 114831
loss: 1.2684710025787354,grad_norm: 0.9999995895256575, iteration: 114832
loss: 1.2422683238983154,grad_norm: 0.9999996427360969, iteration: 114833
loss: 1.257773756980896,grad_norm: 0.9999997125444721, iteration: 114834
loss: 1.504707932472229,grad_norm: 0.9999999850807764, iteration: 114835
loss: 1.1565133333206177,grad_norm: 1.000000013215147, iteration: 114836
loss: 1.140076994895935,grad_norm: 0.9999989724606649, iteration: 114837
loss: 1.1624220609664917,grad_norm: 0.9999996310218199, iteration: 114838
loss: 1.0410181283950806,grad_norm: 0.9366010012057938, iteration: 114839
loss: 1.1447086334228516,grad_norm: 0.999999907749234, iteration: 114840
loss: 1.1240661144256592,grad_norm: 0.9999996920035795, iteration: 114841
loss: 1.045259952545166,grad_norm: 0.9999992499179492, iteration: 114842
loss: 1.1209862232208252,grad_norm: 0.9999999958151317, iteration: 114843
loss: 1.3642560243606567,grad_norm: 0.9999998065094855, iteration: 114844
loss: 1.1479573249816895,grad_norm: 0.9999997121907694, iteration: 114845
loss: 1.176551342010498,grad_norm: 0.9999998454733647, iteration: 114846
loss: 1.3948614597320557,grad_norm: 0.9999998485020047, iteration: 114847
loss: 1.036630392074585,grad_norm: 0.9999992287025454, iteration: 114848
loss: 1.0759114027023315,grad_norm: 0.999999361972304, iteration: 114849
loss: 1.3557989597320557,grad_norm: 0.9999995680902538, iteration: 114850
loss: 1.1791659593582153,grad_norm: 0.9999995967869945, iteration: 114851
loss: 1.1653075218200684,grad_norm: 0.9999998386849219, iteration: 114852
loss: 1.0572985410690308,grad_norm: 0.9999993977683369, iteration: 114853
loss: 1.0431324243545532,grad_norm: 0.9999991564963373, iteration: 114854
loss: 1.164379358291626,grad_norm: 0.9999999146719102, iteration: 114855
loss: 1.2943288087844849,grad_norm: 0.9999997919692546, iteration: 114856
loss: 1.1822714805603027,grad_norm: 0.9999998817151158, iteration: 114857
loss: 1.4934390783309937,grad_norm: 0.9999999739183898, iteration: 114858
loss: 1.219544768333435,grad_norm: 0.9999998709988036, iteration: 114859
loss: 1.0571094751358032,grad_norm: 0.9999998570787506, iteration: 114860
loss: 1.2765470743179321,grad_norm: 0.99999953505301, iteration: 114861
loss: 1.2627371549606323,grad_norm: 0.9999997812517206, iteration: 114862
loss: 1.3929221630096436,grad_norm: 0.9999997229145495, iteration: 114863
loss: 1.1591941118240356,grad_norm: 0.9999993013532751, iteration: 114864
loss: 1.1497595310211182,grad_norm: 0.9999994213021433, iteration: 114865
loss: 1.3601484298706055,grad_norm: 0.9999997307344319, iteration: 114866
loss: 1.2482635974884033,grad_norm: 0.9999998788631359, iteration: 114867
loss: 1.2248133420944214,grad_norm: 0.999999304890471, iteration: 114868
loss: 1.2448545694351196,grad_norm: 0.9999993377173096, iteration: 114869
loss: 1.5248483419418335,grad_norm: 0.9999998785849705, iteration: 114870
loss: 1.2233734130859375,grad_norm: 0.9999996406744398, iteration: 114871
loss: 1.3108707666397095,grad_norm: 0.9999998523378538, iteration: 114872
loss: 1.3048933744430542,grad_norm: 0.9999999085601491, iteration: 114873
loss: 1.2600823640823364,grad_norm: 0.9999993521639415, iteration: 114874
loss: 1.1051181554794312,grad_norm: 0.9999992556365427, iteration: 114875
loss: 1.3772969245910645,grad_norm: 1.0000001018797717, iteration: 114876
loss: 1.4434705972671509,grad_norm: 0.9999998653167017, iteration: 114877
loss: 1.232816457748413,grad_norm: 0.9999993244724934, iteration: 114878
loss: 1.3009556531906128,grad_norm: 0.999999891126699, iteration: 114879
loss: 1.665928840637207,grad_norm: 0.9999998520400497, iteration: 114880
loss: 1.505666732788086,grad_norm: 1.0000000253051005, iteration: 114881
loss: 1.4910825490951538,grad_norm: 0.9999999065396633, iteration: 114882
loss: 1.3036483526229858,grad_norm: 0.9999995817993522, iteration: 114883
loss: 1.2568329572677612,grad_norm: 0.9999993850166305, iteration: 114884
loss: 1.5238429307937622,grad_norm: 0.9999998488773165, iteration: 114885
loss: 1.207831621170044,grad_norm: 0.9999995577072491, iteration: 114886
loss: 1.3015162944793701,grad_norm: 0.9999994938522223, iteration: 114887
loss: 1.3842029571533203,grad_norm: 0.9999999218029394, iteration: 114888
loss: 1.263216257095337,grad_norm: 0.9999998459855907, iteration: 114889
loss: 1.5302070379257202,grad_norm: 0.9999999110982577, iteration: 114890
loss: 1.3821802139282227,grad_norm: 0.9999996590232391, iteration: 114891
loss: 1.4579793214797974,grad_norm: 0.9999999125677218, iteration: 114892
loss: 1.4584654569625854,grad_norm: 0.9999999245995789, iteration: 114893
loss: 1.4318262338638306,grad_norm: 0.9999998121822506, iteration: 114894
loss: 1.2477625608444214,grad_norm: 0.9999994346443971, iteration: 114895
loss: 1.6370819807052612,grad_norm: 0.9999999001608784, iteration: 114896
loss: 1.345389485359192,grad_norm: 0.9999998870229587, iteration: 114897
loss: 1.501947283744812,grad_norm: 0.9999999668664967, iteration: 114898
loss: 1.239625096321106,grad_norm: 0.9999995927910256, iteration: 114899
loss: 1.5029048919677734,grad_norm: 0.9999997339235229, iteration: 114900
loss: 1.4188679456710815,grad_norm: 0.9999998560786965, iteration: 114901
loss: 1.337279200553894,grad_norm: 1.0000000218292846, iteration: 114902
loss: 1.413948893547058,grad_norm: 0.9999998082218402, iteration: 114903
loss: 1.1910325288772583,grad_norm: 0.9999997747978961, iteration: 114904
loss: 1.2420215606689453,grad_norm: 0.9999998155932515, iteration: 114905
loss: 1.4874135255813599,grad_norm: 0.9999998491637395, iteration: 114906
loss: 1.4034548997879028,grad_norm: 0.9999998216167274, iteration: 114907
loss: 1.288428783416748,grad_norm: 0.999999807318858, iteration: 114908
loss: 1.2589517831802368,grad_norm: 0.999999926803997, iteration: 114909
loss: 1.188611626625061,grad_norm: 0.9999996746686842, iteration: 114910
loss: 1.3764725923538208,grad_norm: 0.9999998755136473, iteration: 114911
loss: 1.3240402936935425,grad_norm: 0.9999998038462037, iteration: 114912
loss: 1.2669341564178467,grad_norm: 0.9999994856258748, iteration: 114913
loss: 1.2882132530212402,grad_norm: 0.9999998724406443, iteration: 114914
loss: 1.5680954456329346,grad_norm: 0.9999999329651829, iteration: 114915
loss: 1.2852809429168701,grad_norm: 0.9999993932306498, iteration: 114916
loss: 1.124144196510315,grad_norm: 0.9999996048926705, iteration: 114917
loss: 1.3134081363677979,grad_norm: 0.9999999897072857, iteration: 114918
loss: 1.4779794216156006,grad_norm: 0.9999998221881023, iteration: 114919
loss: 1.273522138595581,grad_norm: 0.9999995005681025, iteration: 114920
loss: 1.4272563457489014,grad_norm: 0.9999998918602231, iteration: 114921
loss: 1.206827998161316,grad_norm: 0.9999999454455256, iteration: 114922
loss: 1.1466431617736816,grad_norm: 0.9999998138009933, iteration: 114923
loss: 1.344474196434021,grad_norm: 0.9999998212416917, iteration: 114924
loss: 1.4668848514556885,grad_norm: 0.9999996248622587, iteration: 114925
loss: 1.192156434059143,grad_norm: 0.9999997743943698, iteration: 114926
loss: 1.21970534324646,grad_norm: 0.9999996133830987, iteration: 114927
loss: 1.1162879467010498,grad_norm: 0.9999999685936155, iteration: 114928
loss: 1.4674879312515259,grad_norm: 0.9999999085733081, iteration: 114929
loss: 1.2310659885406494,grad_norm: 0.9999992675974323, iteration: 114930
loss: 1.3078776597976685,grad_norm: 1.0000000229570316, iteration: 114931
loss: 1.4196362495422363,grad_norm: 0.9999998735260963, iteration: 114932
loss: 1.3161380290985107,grad_norm: 0.9999995182538823, iteration: 114933
loss: 1.2514053583145142,grad_norm: 1.0000000947174605, iteration: 114934
loss: 1.1407115459442139,grad_norm: 0.9999997510059259, iteration: 114935
loss: 1.2225416898727417,grad_norm: 0.9999998516105246, iteration: 114936
loss: 1.3039391040802002,grad_norm: 0.9999999065269144, iteration: 114937
loss: 1.3544549942016602,grad_norm: 0.9999998824692375, iteration: 114938
loss: 1.17123281955719,grad_norm: 0.9999997248059125, iteration: 114939
loss: 1.2640684843063354,grad_norm: 0.9999999266235211, iteration: 114940
loss: 1.0926364660263062,grad_norm: 0.9999997765330316, iteration: 114941
loss: 1.2416870594024658,grad_norm: 0.9999995271501019, iteration: 114942
loss: 1.1959542036056519,grad_norm: 0.9999998607499123, iteration: 114943
loss: 1.3716059923171997,grad_norm: 0.999999923922711, iteration: 114944
loss: 1.2418787479400635,grad_norm: 0.9999996572460453, iteration: 114945
loss: 1.0381815433502197,grad_norm: 0.999999125639011, iteration: 114946
loss: 1.321158766746521,grad_norm: 0.9999999978112856, iteration: 114947
loss: 1.1366649866104126,grad_norm: 0.9999999629536531, iteration: 114948
loss: 1.1264814138412476,grad_norm: 0.9999992796205656, iteration: 114949
loss: 1.2617493867874146,grad_norm: 0.99999994522789, iteration: 114950
loss: 1.1086572408676147,grad_norm: 0.9999992971705178, iteration: 114951
loss: 1.306386947631836,grad_norm: 0.9999998997932997, iteration: 114952
loss: 1.2005329132080078,grad_norm: 0.9999998199519895, iteration: 114953
loss: 1.255507469177246,grad_norm: 0.9999995395222814, iteration: 114954
loss: 1.1468781232833862,grad_norm: 0.9999997705165161, iteration: 114955
loss: 1.1960606575012207,grad_norm: 0.9999999645365313, iteration: 114956
loss: 1.253746747970581,grad_norm: 0.9999994924811308, iteration: 114957
loss: 1.1799119710922241,grad_norm: 0.9999998284501129, iteration: 114958
loss: 1.2703242301940918,grad_norm: 0.9999999240679663, iteration: 114959
loss: 1.2654428482055664,grad_norm: 0.99999956883751, iteration: 114960
loss: 1.3002325296401978,grad_norm: 0.9999999297328018, iteration: 114961
loss: 1.2321698665618896,grad_norm: 0.9999998460997575, iteration: 114962
loss: 1.1304751634597778,grad_norm: 0.9999991992759086, iteration: 114963
loss: 1.464932918548584,grad_norm: 0.9999999977349112, iteration: 114964
loss: 1.1702615022659302,grad_norm: 0.9999994891538349, iteration: 114965
loss: 1.1195522546768188,grad_norm: 0.9999993540557393, iteration: 114966
loss: 1.2015126943588257,grad_norm: 0.9999998401250891, iteration: 114967
loss: 1.3123583793640137,grad_norm: 0.9999995298088442, iteration: 114968
loss: 1.22036874294281,grad_norm: 0.9999995303066795, iteration: 114969
loss: 1.1925463676452637,grad_norm: 0.9999992565957363, iteration: 114970
loss: 1.2116039991378784,grad_norm: 0.9999994448131506, iteration: 114971
loss: 1.2432693243026733,grad_norm: 0.9999997073820607, iteration: 114972
loss: 1.1336151361465454,grad_norm: 0.9999997804756298, iteration: 114973
loss: 1.1718968152999878,grad_norm: 0.99999991311398, iteration: 114974
loss: 1.1199183464050293,grad_norm: 0.9999991059577832, iteration: 114975
loss: 1.1538439989089966,grad_norm: 0.9999997450293857, iteration: 114976
loss: 1.169601559638977,grad_norm: 0.9999994898380348, iteration: 114977
loss: 1.248522162437439,grad_norm: 0.9999999028596014, iteration: 114978
loss: 1.2695379257202148,grad_norm: 0.9999998907777061, iteration: 114979
loss: 1.225704550743103,grad_norm: 0.9999998886224477, iteration: 114980
loss: 1.2198069095611572,grad_norm: 0.9999995276601186, iteration: 114981
loss: 1.6916780471801758,grad_norm: 0.9999999507219988, iteration: 114982
loss: 1.273675799369812,grad_norm: 0.999999628632409, iteration: 114983
loss: 1.358185887336731,grad_norm: 0.9999993856485099, iteration: 114984
loss: 1.2560440301895142,grad_norm: 0.9999997144196727, iteration: 114985
loss: 1.2105375528335571,grad_norm: 0.9999993606868194, iteration: 114986
loss: 1.4892479181289673,grad_norm: 0.9999999365942263, iteration: 114987
loss: 1.645190954208374,grad_norm: 0.9999999529228372, iteration: 114988
loss: 1.5815078020095825,grad_norm: 0.9999999834501873, iteration: 114989
loss: 1.2242709398269653,grad_norm: 0.9999996036723662, iteration: 114990
loss: 1.0777705907821655,grad_norm: 0.9999999349138655, iteration: 114991
loss: 1.4468945264816284,grad_norm: 0.999999605259673, iteration: 114992
loss: 1.258764386177063,grad_norm: 0.999999964290591, iteration: 114993
loss: 1.3139667510986328,grad_norm: 0.9999997544842442, iteration: 114994
loss: 1.3471980094909668,grad_norm: 0.9999997510858852, iteration: 114995
loss: 1.1641404628753662,grad_norm: 0.9999997514419017, iteration: 114996
loss: 1.7034674882888794,grad_norm: 0.9999999588719116, iteration: 114997
loss: 1.3763867616653442,grad_norm: 0.9999997070854068, iteration: 114998
loss: 1.4327362775802612,grad_norm: 0.9999995968851464, iteration: 114999
loss: 1.629652976989746,grad_norm: 0.9999999899671291, iteration: 115000
loss: 1.3450835943222046,grad_norm: 0.9999998586017924, iteration: 115001
loss: 1.6533294916152954,grad_norm: 0.9999999225691276, iteration: 115002
loss: 1.8193504810333252,grad_norm: 1.0000000163513676, iteration: 115003
loss: 1.3360850811004639,grad_norm: 0.9999999351760709, iteration: 115004
loss: 1.4576648473739624,grad_norm: 1.0000000422684625, iteration: 115005
loss: 1.547433614730835,grad_norm: 0.9999998184211442, iteration: 115006
loss: 1.2253532409667969,grad_norm: 0.9999996078686172, iteration: 115007
loss: 1.1495139598846436,grad_norm: 0.9999994238491435, iteration: 115008
loss: 1.2984578609466553,grad_norm: 0.9999995830488358, iteration: 115009
loss: 1.5404553413391113,grad_norm: 0.9999999052703071, iteration: 115010
loss: 1.2454487085342407,grad_norm: 0.9999996982220223, iteration: 115011
loss: 1.221757411956787,grad_norm: 0.9999999506470542, iteration: 115012
loss: 1.2658909559249878,grad_norm: 0.999999896871346, iteration: 115013
loss: 1.4395290613174438,grad_norm: 0.9999998166763202, iteration: 115014
loss: 1.4847064018249512,grad_norm: 0.9999996684061323, iteration: 115015
loss: 1.3063746690750122,grad_norm: 0.9999998888064016, iteration: 115016
loss: 1.7317125797271729,grad_norm: 1.0000000255623693, iteration: 115017
loss: 1.1981216669082642,grad_norm: 0.9999996551414918, iteration: 115018
loss: 1.5154656171798706,grad_norm: 0.9999997700766153, iteration: 115019
loss: 1.2480485439300537,grad_norm: 0.9999997150805036, iteration: 115020
loss: 1.3931379318237305,grad_norm: 0.9999999124670202, iteration: 115021
loss: 1.3252991437911987,grad_norm: 0.9999993063408832, iteration: 115022
loss: 1.576585292816162,grad_norm: 0.9999998545385023, iteration: 115023
loss: 1.7214690446853638,grad_norm: 0.9999997435922711, iteration: 115024
loss: 1.3710635900497437,grad_norm: 0.9999998719459685, iteration: 115025
loss: 1.2205841541290283,grad_norm: 0.999999632800142, iteration: 115026
loss: 1.6056523323059082,grad_norm: 0.9999995872380267, iteration: 115027
loss: 1.3053398132324219,grad_norm: 0.9999996452189861, iteration: 115028
loss: 1.2459712028503418,grad_norm: 0.9999998548396378, iteration: 115029
loss: 1.4245003461837769,grad_norm: 0.9999997137973654, iteration: 115030
loss: 1.254410982131958,grad_norm: 0.9999997356190632, iteration: 115031
loss: 1.3466507196426392,grad_norm: 0.9999999331723196, iteration: 115032
loss: 1.3675004243850708,grad_norm: 0.9999997296829425, iteration: 115033
loss: 1.2982227802276611,grad_norm: 0.9999996736865852, iteration: 115034
loss: 1.228410005569458,grad_norm: 1.0000000335168013, iteration: 115035
loss: 1.184416651725769,grad_norm: 0.9999996713676274, iteration: 115036
loss: 1.3062472343444824,grad_norm: 0.9999996271210291, iteration: 115037
loss: 1.2920331954956055,grad_norm: 0.9999999750618804, iteration: 115038
loss: 1.268079400062561,grad_norm: 0.999999634124081, iteration: 115039
loss: 1.2328872680664062,grad_norm: 0.999999847803328, iteration: 115040
loss: 1.3576807975769043,grad_norm: 0.9999999287044604, iteration: 115041
loss: 1.3233991861343384,grad_norm: 0.9999997817422552, iteration: 115042
loss: 1.368715763092041,grad_norm: 0.9999998160547957, iteration: 115043
loss: 1.4134340286254883,grad_norm: 0.9999998436884583, iteration: 115044
loss: 1.6993520259857178,grad_norm: 0.9999997168940721, iteration: 115045
loss: 1.3511539697647095,grad_norm: 0.9999998971983951, iteration: 115046
loss: 1.2885327339172363,grad_norm: 0.9999998991270977, iteration: 115047
loss: 1.3028528690338135,grad_norm: 0.9999999019431832, iteration: 115048
loss: 1.2862228155136108,grad_norm: 0.9999998399528661, iteration: 115049
loss: 1.2946577072143555,grad_norm: 0.9999999228274273, iteration: 115050
loss: 1.536857008934021,grad_norm: 0.9999998908422219, iteration: 115051
loss: 1.3271996974945068,grad_norm: 1.0000000163291527, iteration: 115052
loss: 1.4456485509872437,grad_norm: 0.9999999454169436, iteration: 115053
loss: 1.3475602865219116,grad_norm: 0.999999796693384, iteration: 115054
loss: 1.2767198085784912,grad_norm: 0.9999998077029925, iteration: 115055
loss: 1.4553970098495483,grad_norm: 0.9999998655882203, iteration: 115056
loss: 1.2133376598358154,grad_norm: 0.9999996865798274, iteration: 115057
loss: 1.2845755815505981,grad_norm: 0.9999992803071922, iteration: 115058
loss: 1.3917425870895386,grad_norm: 0.9999997409057387, iteration: 115059
loss: 1.2612241506576538,grad_norm: 0.9999999888598706, iteration: 115060
loss: 1.2372664213180542,grad_norm: 0.9999997716172117, iteration: 115061
loss: 1.4055256843566895,grad_norm: 0.9999995833712588, iteration: 115062
loss: 1.1824073791503906,grad_norm: 0.9999993470801641, iteration: 115063
loss: 1.3085033893585205,grad_norm: 0.9999995132719602, iteration: 115064
loss: 1.1571236848831177,grad_norm: 0.999999821704017, iteration: 115065
loss: 1.2222715616226196,grad_norm: 0.9999995947447566, iteration: 115066
loss: 1.1282892227172852,grad_norm: 0.999999288531283, iteration: 115067
loss: 1.2292851209640503,grad_norm: 0.9999995814974798, iteration: 115068
loss: 1.283713459968567,grad_norm: 0.9999999304096937, iteration: 115069
loss: 1.4816009998321533,grad_norm: 0.9999999418405956, iteration: 115070
loss: 1.2977405786514282,grad_norm: 0.999999967101258, iteration: 115071
loss: 1.463240146636963,grad_norm: 0.9999998531939089, iteration: 115072
loss: 1.365769386291504,grad_norm: 0.9999997158719954, iteration: 115073
loss: 1.469773292541504,grad_norm: 0.9999999575737237, iteration: 115074
loss: 1.3832080364227295,grad_norm: 1.0000000566392575, iteration: 115075
loss: 1.1426689624786377,grad_norm: 0.9999998720278951, iteration: 115076
loss: 1.2424724102020264,grad_norm: 0.9999998572498179, iteration: 115077
loss: 1.3843401670455933,grad_norm: 0.9999999650891579, iteration: 115078
loss: 1.2787535190582275,grad_norm: 0.9999998005053656, iteration: 115079
loss: 1.3274308443069458,grad_norm: 0.9999999698756686, iteration: 115080
loss: 1.3341631889343262,grad_norm: 1.000000018156204, iteration: 115081
loss: 1.4084371328353882,grad_norm: 0.9999998616238975, iteration: 115082
loss: 1.6197057962417603,grad_norm: 0.9999998450152513, iteration: 115083
loss: 1.0932719707489014,grad_norm: 0.9999993963520518, iteration: 115084
loss: 1.1678581237792969,grad_norm: 0.9999994783842668, iteration: 115085
loss: 1.2813397645950317,grad_norm: 0.9999996717425313, iteration: 115086
loss: 1.1984907388687134,grad_norm: 0.9999994991915975, iteration: 115087
loss: 1.1525182723999023,grad_norm: 0.9999995369819118, iteration: 115088
loss: 1.1884779930114746,grad_norm: 0.9999996281312531, iteration: 115089
loss: 1.2488207817077637,grad_norm: 0.9999997704312629, iteration: 115090
loss: 1.2550290822982788,grad_norm: 0.9999998108341439, iteration: 115091
loss: 1.2652289867401123,grad_norm: 0.9999999149684357, iteration: 115092
loss: 1.1920474767684937,grad_norm: 0.999999701279746, iteration: 115093
loss: 1.1613829135894775,grad_norm: 0.9999998943539888, iteration: 115094
loss: 1.210494041442871,grad_norm: 0.9999998648783572, iteration: 115095
loss: 1.1698375940322876,grad_norm: 0.9999996834028264, iteration: 115096
loss: 1.1261677742004395,grad_norm: 0.9999998577609314, iteration: 115097
loss: 1.2108083963394165,grad_norm: 0.9999998547647363, iteration: 115098
loss: 1.0303351879119873,grad_norm: 0.9999991148725483, iteration: 115099
loss: 1.2090071439743042,grad_norm: 0.999999867942972, iteration: 115100
loss: 1.2376954555511475,grad_norm: 0.9999996579764314, iteration: 115101
loss: 1.1099921464920044,grad_norm: 0.9999993842405539, iteration: 115102
loss: 1.1152712106704712,grad_norm: 1.0000000795044612, iteration: 115103
loss: 1.1592649221420288,grad_norm: 0.9999997871222948, iteration: 115104
loss: 1.1936743259429932,grad_norm: 0.9999997869607471, iteration: 115105
loss: 1.1609877347946167,grad_norm: 0.9999992720263888, iteration: 115106
loss: 1.2473852634429932,grad_norm: 0.9999996515351905, iteration: 115107
loss: 1.4923592805862427,grad_norm: 1.0000000347202223, iteration: 115108
loss: 1.1659836769104004,grad_norm: 0.9999997849759286, iteration: 115109
loss: 1.2120885848999023,grad_norm: 0.9999998870346304, iteration: 115110
loss: 1.1836950778961182,grad_norm: 0.9999993354825402, iteration: 115111
loss: 1.1616426706314087,grad_norm: 0.9999992994977965, iteration: 115112
loss: 1.3796180486679077,grad_norm: 0.9999995546218587, iteration: 115113
loss: 1.3098851442337036,grad_norm: 0.9999998647371295, iteration: 115114
loss: 1.2621515989303589,grad_norm: 0.999999701339794, iteration: 115115
loss: 1.096508502960205,grad_norm: 0.9999995802906056, iteration: 115116
loss: 1.202195167541504,grad_norm: 0.9999999726274118, iteration: 115117
loss: 1.530325174331665,grad_norm: 0.9999997746422794, iteration: 115118
loss: 1.2606582641601562,grad_norm: 0.9999996755864541, iteration: 115119
loss: 1.3028913736343384,grad_norm: 0.999999418795864, iteration: 115120
loss: 1.1503099203109741,grad_norm: 1.0000000204476136, iteration: 115121
loss: 1.1740472316741943,grad_norm: 0.9999997667273114, iteration: 115122
loss: 1.0837016105651855,grad_norm: 0.9999994488162963, iteration: 115123
loss: 1.2738784551620483,grad_norm: 0.9999997371169629, iteration: 115124
loss: 1.123481035232544,grad_norm: 0.9999990853093048, iteration: 115125
loss: 1.213390827178955,grad_norm: 0.9999995800828443, iteration: 115126
loss: 1.161557912826538,grad_norm: 0.9999994968061073, iteration: 115127
loss: 1.0989160537719727,grad_norm: 1.0000000035669752, iteration: 115128
loss: 1.0946581363677979,grad_norm: 0.9999996016130961, iteration: 115129
loss: 1.2412773370742798,grad_norm: 0.9999994564686073, iteration: 115130
loss: 1.117346167564392,grad_norm: 0.9999994465870677, iteration: 115131
loss: 1.2156087160110474,grad_norm: 0.9999999361417852, iteration: 115132
loss: 1.0770628452301025,grad_norm: 0.999999503152798, iteration: 115133
loss: 1.1968300342559814,grad_norm: 0.9999999353644363, iteration: 115134
loss: 1.1623116731643677,grad_norm: 0.999999392582374, iteration: 115135
loss: 1.1077388525009155,grad_norm: 0.9999998843344965, iteration: 115136
loss: 1.1505590677261353,grad_norm: 0.9999999551863049, iteration: 115137
loss: 1.200000286102295,grad_norm: 0.9999998118394552, iteration: 115138
loss: 1.0897866487503052,grad_norm: 0.9999994229219658, iteration: 115139
loss: 1.0764764547348022,grad_norm: 0.999999132543495, iteration: 115140
loss: 1.0715447664260864,grad_norm: 0.9999993143931525, iteration: 115141
loss: 1.0674619674682617,grad_norm: 0.9999993259267244, iteration: 115142
loss: 1.0474891662597656,grad_norm: 0.9999998306479163, iteration: 115143
loss: 1.0730756521224976,grad_norm: 0.9999992686929574, iteration: 115144
loss: 1.1418321132659912,grad_norm: 0.9999998855678686, iteration: 115145
loss: 1.0974059104919434,grad_norm: 0.9999992170241088, iteration: 115146
loss: 1.0730136632919312,grad_norm: 0.9999994577256632, iteration: 115147
loss: 1.0813668966293335,grad_norm: 0.9999991556416383, iteration: 115148
loss: 1.0696063041687012,grad_norm: 0.9999997037739374, iteration: 115149
loss: 1.0511773824691772,grad_norm: 0.9999995152487481, iteration: 115150
loss: 1.0845253467559814,grad_norm: 0.9999994111312167, iteration: 115151
loss: 1.356843113899231,grad_norm: 0.9999999331318085, iteration: 115152
loss: 1.1589933633804321,grad_norm: 0.999999516125597, iteration: 115153
loss: 1.1013134717941284,grad_norm: 0.9999995174727584, iteration: 115154
loss: 1.102309226989746,grad_norm: 0.9999998236294757, iteration: 115155
loss: 1.091030240058899,grad_norm: 0.9999998428283946, iteration: 115156
loss: 1.0619430541992188,grad_norm: 0.9999996808234162, iteration: 115157
loss: 1.2455458641052246,grad_norm: 0.9999998877780103, iteration: 115158
loss: 1.047968864440918,grad_norm: 0.9999996001356519, iteration: 115159
loss: 1.0939589738845825,grad_norm: 0.9999998566644668, iteration: 115160
loss: 1.0293536186218262,grad_norm: 0.9999995118266882, iteration: 115161
loss: 1.0715522766113281,grad_norm: 0.999999476637331, iteration: 115162
loss: 1.1165740489959717,grad_norm: 0.9999998186548483, iteration: 115163
loss: 1.1146138906478882,grad_norm: 0.9999998622632139, iteration: 115164
loss: 1.1414098739624023,grad_norm: 0.9999992721337896, iteration: 115165
loss: 1.0723762512207031,grad_norm: 0.9999992480339639, iteration: 115166
loss: 1.0622071027755737,grad_norm: 0.9999994591101544, iteration: 115167
loss: 1.0893429517745972,grad_norm: 0.9999995563167564, iteration: 115168
loss: 1.08069908618927,grad_norm: 0.9999997146884563, iteration: 115169
loss: 1.0619399547576904,grad_norm: 0.9999992101905001, iteration: 115170
loss: 1.2322964668273926,grad_norm: 0.9999998020512159, iteration: 115171
loss: 1.1536258459091187,grad_norm: 0.9999997271061806, iteration: 115172
loss: 1.3361284732818604,grad_norm: 0.9999996869786814, iteration: 115173
loss: 1.013175368309021,grad_norm: 0.9999994547956003, iteration: 115174
loss: 1.1269123554229736,grad_norm: 0.9999992174498736, iteration: 115175
loss: 1.2510731220245361,grad_norm: 1.0000000774246234, iteration: 115176
loss: 1.1613963842391968,grad_norm: 0.9999996662695597, iteration: 115177
loss: 1.2386258840560913,grad_norm: 0.9999995754816098, iteration: 115178
loss: 1.1920527219772339,grad_norm: 1.000000065490469, iteration: 115179
loss: 1.0459909439086914,grad_norm: 0.9999990061623676, iteration: 115180
loss: 1.501952052116394,grad_norm: 0.9999996960927573, iteration: 115181
loss: 1.237023949623108,grad_norm: 0.9999996151048358, iteration: 115182
loss: 1.155524492263794,grad_norm: 0.9999992317004318, iteration: 115183
loss: 1.2875555753707886,grad_norm: 0.999999841965592, iteration: 115184
loss: 1.3263994455337524,grad_norm: 0.9999998714011394, iteration: 115185
loss: 1.0303064584732056,grad_norm: 0.9999999456141343, iteration: 115186
loss: 1.1432934999465942,grad_norm: 0.9999990993915112, iteration: 115187
loss: 1.0843251943588257,grad_norm: 0.9999990302602132, iteration: 115188
loss: 1.3334424495697021,grad_norm: 0.9999996455619994, iteration: 115189
loss: 1.328209400177002,grad_norm: 0.9999998499642305, iteration: 115190
loss: 1.1245251893997192,grad_norm: 0.9999992792488073, iteration: 115191
loss: 1.0330959558486938,grad_norm: 0.8980627231538256, iteration: 115192
loss: 1.1338701248168945,grad_norm: 0.9999993145636007, iteration: 115193
loss: 1.0422286987304688,grad_norm: 0.9999995883930637, iteration: 115194
loss: 1.1529749631881714,grad_norm: 1.0000000089905603, iteration: 115195
loss: 1.0269211530685425,grad_norm: 1.00000003594913, iteration: 115196
loss: 1.191794991493225,grad_norm: 0.9999995986304335, iteration: 115197
loss: 1.147222638130188,grad_norm: 0.9999996923528949, iteration: 115198
loss: 1.1129170656204224,grad_norm: 0.9999991391621073, iteration: 115199
loss: 1.162874460220337,grad_norm: 1.0000000096203827, iteration: 115200
loss: 1.0553104877471924,grad_norm: 0.9999993415082772, iteration: 115201
loss: 1.419546127319336,grad_norm: 0.9999997547934472, iteration: 115202
loss: 1.0081757307052612,grad_norm: 0.9604870657602529, iteration: 115203
loss: 1.1239361763000488,grad_norm: 0.999999066227355, iteration: 115204
loss: 1.2588741779327393,grad_norm: 0.9999994764966216, iteration: 115205
loss: 1.221211552619934,grad_norm: 0.999999719324817, iteration: 115206
loss: 0.9977281093597412,grad_norm: 0.9999995171674533, iteration: 115207
loss: 1.0402802228927612,grad_norm: 0.9999996761158056, iteration: 115208
loss: 1.1430054903030396,grad_norm: 0.999999887832777, iteration: 115209
loss: 1.0700945854187012,grad_norm: 0.9999999535952613, iteration: 115210
loss: 1.1228469610214233,grad_norm: 0.999999326540821, iteration: 115211
loss: 1.376049280166626,grad_norm: 1.0000000277415617, iteration: 115212
loss: 1.104286789894104,grad_norm: 0.9999995088769069, iteration: 115213
loss: 1.1349729299545288,grad_norm: 0.9999995700908235, iteration: 115214
loss: 1.0527645349502563,grad_norm: 0.9999992553839908, iteration: 115215
loss: 0.9928992390632629,grad_norm: 0.9999998679760898, iteration: 115216
loss: 1.2184348106384277,grad_norm: 0.9999997071003042, iteration: 115217
loss: 1.126783847808838,grad_norm: 0.9999994619280318, iteration: 115218
loss: 1.1363228559494019,grad_norm: 0.9999993854054164, iteration: 115219
loss: 1.117058277130127,grad_norm: 0.9999999133670509, iteration: 115220
loss: 1.0750207901000977,grad_norm: 0.9999999471560514, iteration: 115221
loss: 1.0825793743133545,grad_norm: 0.9999998743139088, iteration: 115222
loss: 1.1299430131912231,grad_norm: 0.9999997274695839, iteration: 115223
loss: 1.1330585479736328,grad_norm: 0.9999993774077397, iteration: 115224
loss: 1.0756009817123413,grad_norm: 0.9999992151454855, iteration: 115225
loss: 1.3504228591918945,grad_norm: 0.9999999807977479, iteration: 115226
loss: 1.0968632698059082,grad_norm: 0.9999994293854229, iteration: 115227
loss: 1.0883283615112305,grad_norm: 0.999999636658406, iteration: 115228
loss: 1.1653289794921875,grad_norm: 0.9999994955296412, iteration: 115229
loss: 1.051703691482544,grad_norm: 0.9999992806816009, iteration: 115230
loss: 1.1104141473770142,grad_norm: 0.9999998441340229, iteration: 115231
loss: 1.1347016096115112,grad_norm: 0.9999991199121383, iteration: 115232
loss: 1.1595064401626587,grad_norm: 0.9999996667693559, iteration: 115233
loss: 1.3082627058029175,grad_norm: 0.9999999932356035, iteration: 115234
loss: 1.060890793800354,grad_norm: 0.9211803841991187, iteration: 115235
loss: 1.0209416151046753,grad_norm: 0.9999991744731714, iteration: 115236
loss: 1.0856928825378418,grad_norm: 0.999999917350302, iteration: 115237
loss: 1.0412565469741821,grad_norm: 0.9999990655470413, iteration: 115238
loss: 1.1275831460952759,grad_norm: 0.9999994799895202, iteration: 115239
loss: 1.1641554832458496,grad_norm: 0.9999994187832133, iteration: 115240
loss: 1.137113094329834,grad_norm: 0.9999994941215546, iteration: 115241
loss: 1.078627586364746,grad_norm: 0.7651985944225821, iteration: 115242
loss: 1.0815579891204834,grad_norm: 0.9999994590003375, iteration: 115243
loss: 1.0736606121063232,grad_norm: 0.9999996458784532, iteration: 115244
loss: 1.1115437746047974,grad_norm: 0.9999993543443926, iteration: 115245
loss: 1.0440162420272827,grad_norm: 0.9999993939618317, iteration: 115246
loss: 1.0441346168518066,grad_norm: 0.9999992301687428, iteration: 115247
loss: 1.1802847385406494,grad_norm: 0.999999800096446, iteration: 115248
loss: 1.0708786249160767,grad_norm: 0.9999999420380764, iteration: 115249
loss: 1.0644421577453613,grad_norm: 0.9999996541509238, iteration: 115250
loss: 1.1004517078399658,grad_norm: 0.9999993885561757, iteration: 115251
loss: 1.2410588264465332,grad_norm: 0.999999549363358, iteration: 115252
loss: 1.1308377981185913,grad_norm: 0.9999991027179886, iteration: 115253
loss: 1.1333178281784058,grad_norm: 0.999999376517115, iteration: 115254
loss: 1.136430263519287,grad_norm: 0.9999998794882984, iteration: 115255
loss: 1.0866141319274902,grad_norm: 0.9999996567324696, iteration: 115256
loss: 1.0116498470306396,grad_norm: 0.999999076608506, iteration: 115257
loss: 1.081841230392456,grad_norm: 0.9999995366719258, iteration: 115258
loss: 1.0913280248641968,grad_norm: 0.9797862645262215, iteration: 115259
loss: 1.3872897624969482,grad_norm: 0.999999645132759, iteration: 115260
loss: 1.080592393875122,grad_norm: 0.9999993945778684, iteration: 115261
loss: 1.0363118648529053,grad_norm: 0.9999992240730401, iteration: 115262
loss: 1.0987447500228882,grad_norm: 0.9999994606399917, iteration: 115263
loss: 1.163006067276001,grad_norm: 0.9999996618773506, iteration: 115264
loss: 1.1281120777130127,grad_norm: 0.9999991532339371, iteration: 115265
loss: 1.088065266609192,grad_norm: 0.9792693558342431, iteration: 115266
loss: 1.0465643405914307,grad_norm: 0.9999996790606919, iteration: 115267
loss: 1.1099275350570679,grad_norm: 0.9999997963567445, iteration: 115268
loss: 1.1383605003356934,grad_norm: 1.000000032733965, iteration: 115269
loss: 1.0580313205718994,grad_norm: 0.9999993066507605, iteration: 115270
loss: 1.1259514093399048,grad_norm: 0.9999996980108764, iteration: 115271
loss: 1.0198652744293213,grad_norm: 0.999999003624544, iteration: 115272
loss: 1.1639515161514282,grad_norm: 1.0000000245528986, iteration: 115273
loss: 1.094414472579956,grad_norm: 0.9999997311985115, iteration: 115274
loss: 1.0658665895462036,grad_norm: 0.9999994147836216, iteration: 115275
loss: 1.1382262706756592,grad_norm: 0.9999996401324126, iteration: 115276
loss: 1.0491759777069092,grad_norm: 0.9999992690466316, iteration: 115277
loss: 1.0549888610839844,grad_norm: 0.9999994153420342, iteration: 115278
loss: 0.9693933725357056,grad_norm: 0.9999996316051517, iteration: 115279
loss: 1.0592279434204102,grad_norm: 0.9999993007667181, iteration: 115280
loss: 1.1126227378845215,grad_norm: 0.9999992178354585, iteration: 115281
loss: 1.1107860803604126,grad_norm: 0.999999856907249, iteration: 115282
loss: 1.0460319519042969,grad_norm: 0.9999993664026988, iteration: 115283
loss: 1.0767335891723633,grad_norm: 0.9999990821758845, iteration: 115284
loss: 1.0955395698547363,grad_norm: 0.9999992545669978, iteration: 115285
loss: 1.063980221748352,grad_norm: 0.9999997327093697, iteration: 115286
loss: 1.049013376235962,grad_norm: 0.8103847617296882, iteration: 115287
loss: 1.058348298072815,grad_norm: 0.9999996893965163, iteration: 115288
loss: 1.1478993892669678,grad_norm: 0.9999998628972353, iteration: 115289
loss: 1.1073648929595947,grad_norm: 0.999999756726951, iteration: 115290
loss: 0.9736602902412415,grad_norm: 0.9776566467076965, iteration: 115291
loss: 1.01176118850708,grad_norm: 0.9999993135941033, iteration: 115292
loss: 1.1194828748703003,grad_norm: 0.999999123352653, iteration: 115293
loss: 1.1646614074707031,grad_norm: 0.9999997814422696, iteration: 115294
loss: 1.0117229223251343,grad_norm: 0.9999997341292416, iteration: 115295
loss: 1.0496950149536133,grad_norm: 0.9999996335651743, iteration: 115296
loss: 1.0657089948654175,grad_norm: 0.9999994657807484, iteration: 115297
loss: 1.0317858457565308,grad_norm: 0.9999997068109423, iteration: 115298
loss: 1.0293055772781372,grad_norm: 0.859657827583711, iteration: 115299
loss: 1.0492037534713745,grad_norm: 0.9999993767566918, iteration: 115300
loss: 1.0940765142440796,grad_norm: 0.9999998112267352, iteration: 115301
loss: 1.0590189695358276,grad_norm: 0.9601481591862658, iteration: 115302
loss: 1.0640921592712402,grad_norm: 0.9999995738310707, iteration: 115303
loss: 1.1022815704345703,grad_norm: 0.999999659369534, iteration: 115304
loss: 1.0789295434951782,grad_norm: 0.9999995638859411, iteration: 115305
loss: 1.0719914436340332,grad_norm: 0.9999994028072067, iteration: 115306
loss: 1.0669397115707397,grad_norm: 0.9999995468146833, iteration: 115307
loss: 1.0960807800292969,grad_norm: 0.9999992494251211, iteration: 115308
loss: 1.0050783157348633,grad_norm: 0.999999072632746, iteration: 115309
loss: 0.9897719025611877,grad_norm: 0.9999992860544806, iteration: 115310
loss: 1.1036068201065063,grad_norm: 0.9999993293558236, iteration: 115311
loss: 1.0687751770019531,grad_norm: 0.9999992100935478, iteration: 115312
loss: 1.070037841796875,grad_norm: 0.9999996571309312, iteration: 115313
loss: 1.1043214797973633,grad_norm: 0.999999473202229, iteration: 115314
loss: 1.0941717624664307,grad_norm: 0.9999999707398711, iteration: 115315
loss: 1.044338345527649,grad_norm: 0.9605619453738685, iteration: 115316
loss: 1.04990553855896,grad_norm: 0.9999998997958326, iteration: 115317
loss: 1.0903372764587402,grad_norm: 0.9999994000095908, iteration: 115318
loss: 1.0328527688980103,grad_norm: 0.9999992827383762, iteration: 115319
loss: 1.1218538284301758,grad_norm: 0.999999655066023, iteration: 115320
loss: 1.0595672130584717,grad_norm: 0.9999998281010638, iteration: 115321
loss: 1.0641505718231201,grad_norm: 0.9999998696378676, iteration: 115322
loss: 1.1434767246246338,grad_norm: 0.9999996235642881, iteration: 115323
loss: 1.0752357244491577,grad_norm: 0.8413235612246018, iteration: 115324
loss: 1.0471806526184082,grad_norm: 0.9999992345051877, iteration: 115325
loss: 1.0599794387817383,grad_norm: 0.9999998526073648, iteration: 115326
loss: 1.1263036727905273,grad_norm: 0.9999992044382119, iteration: 115327
loss: 1.0475307703018188,grad_norm: 0.9999998106892233, iteration: 115328
loss: 1.2375737428665161,grad_norm: 0.9999998830451337, iteration: 115329
loss: 1.1001683473587036,grad_norm: 0.9999999063510276, iteration: 115330
loss: 1.2075443267822266,grad_norm: 0.9999997705099718, iteration: 115331
loss: 1.097265601158142,grad_norm: 0.99999916241069, iteration: 115332
loss: 1.0869228839874268,grad_norm: 0.9999996212490727, iteration: 115333
loss: 1.0945485830307007,grad_norm: 0.999999852926948, iteration: 115334
loss: 1.0993701219558716,grad_norm: 0.999999598951485, iteration: 115335
loss: 1.0454940795898438,grad_norm: 0.9999995827275945, iteration: 115336
loss: 1.0850516557693481,grad_norm: 0.9999994304068279, iteration: 115337
loss: 1.1516999006271362,grad_norm: 0.9999999500601748, iteration: 115338
loss: 1.0721977949142456,grad_norm: 0.9999990356672755, iteration: 115339
loss: 1.0915002822875977,grad_norm: 0.9999994069119593, iteration: 115340
loss: 1.0871771574020386,grad_norm: 0.9999995751566827, iteration: 115341
loss: 1.1039048433303833,grad_norm: 0.9999998659384652, iteration: 115342
loss: 1.056593894958496,grad_norm: 0.9999993908473672, iteration: 115343
loss: 1.158976674079895,grad_norm: 0.9999993820001346, iteration: 115344
loss: 1.2083008289337158,grad_norm: 0.9999996347879504, iteration: 115345
loss: 1.150036334991455,grad_norm: 0.9999997780717169, iteration: 115346
loss: 1.1084319353103638,grad_norm: 0.9999999107838214, iteration: 115347
loss: 0.977157711982727,grad_norm: 0.8920062128531492, iteration: 115348
loss: 1.0722976922988892,grad_norm: 0.9999995261692272, iteration: 115349
loss: 1.1929939985275269,grad_norm: 0.9999998128512949, iteration: 115350
loss: 1.0464985370635986,grad_norm: 0.9202132165104098, iteration: 115351
loss: 1.0386241674423218,grad_norm: 0.9999996204714118, iteration: 115352
loss: 1.0518466234207153,grad_norm: 0.9999990865567762, iteration: 115353
loss: 1.057287335395813,grad_norm: 0.9302270995771735, iteration: 115354
loss: 1.1113487482070923,grad_norm: 0.9999996208258826, iteration: 115355
loss: 1.0911809206008911,grad_norm: 0.999999659090365, iteration: 115356
loss: 1.1982295513153076,grad_norm: 0.9999996630186596, iteration: 115357
loss: 1.2417933940887451,grad_norm: 0.9999998793413161, iteration: 115358
loss: 1.0971652269363403,grad_norm: 0.9999993184115931, iteration: 115359
loss: 1.1253767013549805,grad_norm: 0.9999994635946922, iteration: 115360
loss: 1.152427315711975,grad_norm: 0.9999994469572444, iteration: 115361
loss: 1.0128754377365112,grad_norm: 0.9999993386389044, iteration: 115362
loss: 1.186574101448059,grad_norm: 0.9999998710368463, iteration: 115363
loss: 1.097605586051941,grad_norm: 0.99999972225316, iteration: 115364
loss: 1.0474088191986084,grad_norm: 0.9999993698928523, iteration: 115365
loss: 1.091362714767456,grad_norm: 0.9999997999655331, iteration: 115366
loss: 1.2918623685836792,grad_norm: 0.999999716141459, iteration: 115367
loss: 1.04692542552948,grad_norm: 0.9999991603654099, iteration: 115368
loss: 1.0180981159210205,grad_norm: 0.8504869494892933, iteration: 115369
loss: 1.123262882232666,grad_norm: 0.9999991837751049, iteration: 115370
loss: 1.0618772506713867,grad_norm: 0.9999995552616425, iteration: 115371
loss: 1.1268800497055054,grad_norm: 0.9999998549692025, iteration: 115372
loss: 1.0688353776931763,grad_norm: 0.9999992505538088, iteration: 115373
loss: 1.1756771802902222,grad_norm: 0.9999998915442073, iteration: 115374
loss: 1.1297184228897095,grad_norm: 0.9999995382023689, iteration: 115375
loss: 1.188590168952942,grad_norm: 0.9999993743710917, iteration: 115376
loss: 1.0899653434753418,grad_norm: 0.9999996799079374, iteration: 115377
loss: 1.0398622751235962,grad_norm: 0.9999991150937526, iteration: 115378
loss: 1.2173705101013184,grad_norm: 0.9999996554476133, iteration: 115379
loss: 1.0019546747207642,grad_norm: 0.8728455119366915, iteration: 115380
loss: 1.2680116891860962,grad_norm: 0.9999999178758134, iteration: 115381
loss: 1.2237019538879395,grad_norm: 0.9999999543263587, iteration: 115382
loss: 1.126305103302002,grad_norm: 0.9999996118167628, iteration: 115383
loss: 1.0547000169754028,grad_norm: 0.8558151633733295, iteration: 115384
loss: 1.0971027612686157,grad_norm: 0.9999998209841002, iteration: 115385
loss: 1.0826647281646729,grad_norm: 0.9999993275473659, iteration: 115386
loss: 1.2064154148101807,grad_norm: 0.9999999285965471, iteration: 115387
loss: 1.0714385509490967,grad_norm: 0.9999998285878755, iteration: 115388
loss: 1.246801733970642,grad_norm: 0.9999997995665809, iteration: 115389
loss: 1.0560674667358398,grad_norm: 0.999999245024501, iteration: 115390
loss: 1.1379872560501099,grad_norm: 0.9999993219386242, iteration: 115391
loss: 1.1967307329177856,grad_norm: 0.9999996912330187, iteration: 115392
loss: 1.0764992237091064,grad_norm: 0.9999992301188845, iteration: 115393
loss: 1.04562246799469,grad_norm: 0.9063644468757814, iteration: 115394
loss: 1.1324752569198608,grad_norm: 0.9999992522021889, iteration: 115395
loss: 1.1964199542999268,grad_norm: 0.9999997600534171, iteration: 115396
loss: 1.0428249835968018,grad_norm: 0.999999061260872, iteration: 115397
loss: 1.1391769647598267,grad_norm: 0.9999994220271802, iteration: 115398
loss: 1.299838900566101,grad_norm: 0.99999961488846, iteration: 115399
loss: 1.112632393836975,grad_norm: 0.9999993020914848, iteration: 115400
loss: 1.0004186630249023,grad_norm: 0.9999991452400009, iteration: 115401
loss: 1.1592262983322144,grad_norm: 0.9999998700060327, iteration: 115402
loss: 1.018119215965271,grad_norm: 0.9999994762780166, iteration: 115403
loss: 1.0176305770874023,grad_norm: 0.9999998005167526, iteration: 115404
loss: 1.0451526641845703,grad_norm: 0.9454825667756436, iteration: 115405
loss: 1.0126374959945679,grad_norm: 0.999999334968028, iteration: 115406
loss: 1.178501009941101,grad_norm: 1.0000000507508282, iteration: 115407
loss: 1.0164992809295654,grad_norm: 0.9999990112175805, iteration: 115408
loss: 1.1042593717575073,grad_norm: 0.9999995666356013, iteration: 115409
loss: 1.073337435722351,grad_norm: 0.9999994821638344, iteration: 115410
loss: 1.1189696788787842,grad_norm: 0.999999190355865, iteration: 115411
loss: 1.331971526145935,grad_norm: 0.9999996729691963, iteration: 115412
loss: 1.3412697315216064,grad_norm: 0.9999998981860665, iteration: 115413
loss: 1.1600288152694702,grad_norm: 0.9999993703168845, iteration: 115414
loss: 1.1724252700805664,grad_norm: 0.9999997107411128, iteration: 115415
loss: 1.169870138168335,grad_norm: 0.9999998391443523, iteration: 115416
loss: 1.1479322910308838,grad_norm: 0.9999998281484154, iteration: 115417
loss: 1.1663271188735962,grad_norm: 0.9999997152057082, iteration: 115418
loss: 1.066437840461731,grad_norm: 0.9999995843257655, iteration: 115419
loss: 1.1817429065704346,grad_norm: 0.999999843326757, iteration: 115420
loss: 1.1264266967773438,grad_norm: 0.9999999437836616, iteration: 115421
loss: 1.1319125890731812,grad_norm: 0.9999997853413799, iteration: 115422
loss: 1.333864450454712,grad_norm: 0.9999998889305803, iteration: 115423
loss: 1.0981390476226807,grad_norm: 0.99999958360318, iteration: 115424
loss: 1.078983187675476,grad_norm: 0.9999996879828719, iteration: 115425
loss: 1.0422106981277466,grad_norm: 0.9999993385411405, iteration: 115426
loss: 0.9930657148361206,grad_norm: 0.92282210311085, iteration: 115427
loss: 1.0211130380630493,grad_norm: 0.9999991594146732, iteration: 115428
loss: 1.2845841646194458,grad_norm: 0.9999997737337527, iteration: 115429
loss: 1.07610023021698,grad_norm: 0.9999995509217947, iteration: 115430
loss: 1.3731499910354614,grad_norm: 1.0000000236952908, iteration: 115431
loss: 1.279032588005066,grad_norm: 0.9999999656341346, iteration: 115432
loss: 1.2359849214553833,grad_norm: 0.9999995792675533, iteration: 115433
loss: 1.1862962245941162,grad_norm: 0.9999998412411101, iteration: 115434
loss: 1.1947602033615112,grad_norm: 0.9999993953520722, iteration: 115435
loss: 1.2110316753387451,grad_norm: 0.9999998227270399, iteration: 115436
loss: 1.2396925687789917,grad_norm: 1.0000000390333066, iteration: 115437
loss: 1.1741598844528198,grad_norm: 0.9999992689900109, iteration: 115438
loss: 1.0219892263412476,grad_norm: 0.9999990977478453, iteration: 115439
loss: 1.0997451543807983,grad_norm: 0.9999995344808977, iteration: 115440
loss: 1.1111012697219849,grad_norm: 0.9999999378689136, iteration: 115441
loss: 1.042033076286316,grad_norm: 0.8912281666930112, iteration: 115442
loss: 1.1047641038894653,grad_norm: 0.9999994413330113, iteration: 115443
loss: 1.1182706356048584,grad_norm: 0.9999992594560491, iteration: 115444
loss: 1.0752145051956177,grad_norm: 0.9999996278695941, iteration: 115445
loss: 1.0268330574035645,grad_norm: 0.999999372611201, iteration: 115446
loss: 1.0643339157104492,grad_norm: 0.9999995016508632, iteration: 115447
loss: 1.16720712184906,grad_norm: 0.9999998568666466, iteration: 115448
loss: 1.0889707803726196,grad_norm: 0.9999995129370903, iteration: 115449
loss: 1.2147034406661987,grad_norm: 0.9999998390002134, iteration: 115450
loss: 1.0425355434417725,grad_norm: 0.9999996000444765, iteration: 115451
loss: 1.206953763961792,grad_norm: 0.9999995999645843, iteration: 115452
loss: 1.2092393636703491,grad_norm: 0.9999995174881919, iteration: 115453
loss: 1.0715395212173462,grad_norm: 0.9999996037313553, iteration: 115454
loss: 1.21172034740448,grad_norm: 0.9999993942852982, iteration: 115455
loss: 1.1706979274749756,grad_norm: 0.9999999056206812, iteration: 115456
loss: 1.0799150466918945,grad_norm: 0.999999783537139, iteration: 115457
loss: 1.1385420560836792,grad_norm: 0.9999997873387172, iteration: 115458
loss: 1.03337824344635,grad_norm: 0.9999991690456301, iteration: 115459
loss: 0.997421383857727,grad_norm: 0.9999992590995351, iteration: 115460
loss: 1.160589337348938,grad_norm: 0.9999994837054154, iteration: 115461
loss: 1.127113938331604,grad_norm: 0.9999991465865964, iteration: 115462
loss: 1.109151005744934,grad_norm: 0.9999996923294887, iteration: 115463
loss: 1.114317774772644,grad_norm: 0.9999994090818782, iteration: 115464
loss: 1.1201797723770142,grad_norm: 0.9999991776623246, iteration: 115465
loss: 1.2145277261734009,grad_norm: 0.999999856586994, iteration: 115466
loss: 1.0727108716964722,grad_norm: 0.999999181335145, iteration: 115467
loss: 1.0916246175765991,grad_norm: 0.999999078915832, iteration: 115468
loss: 1.0861008167266846,grad_norm: 0.9999993541887817, iteration: 115469
loss: 1.0217903852462769,grad_norm: 0.9999990295029371, iteration: 115470
loss: 1.135158896446228,grad_norm: 0.9999998494575113, iteration: 115471
loss: 1.1593323945999146,grad_norm: 0.9999996079677322, iteration: 115472
loss: 1.1510947942733765,grad_norm: 0.9999995171219015, iteration: 115473
loss: 1.1305664777755737,grad_norm: 0.999999373988196, iteration: 115474
loss: 1.3771007061004639,grad_norm: 1.0000000820810053, iteration: 115475
loss: 1.0541608333587646,grad_norm: 0.999999336490739, iteration: 115476
loss: 1.1430134773254395,grad_norm: 0.9999994924694303, iteration: 115477
loss: 1.056010127067566,grad_norm: 0.9999996137621514, iteration: 115478
loss: 1.0964572429656982,grad_norm: 1.0000000732379974, iteration: 115479
loss: 1.0649065971374512,grad_norm: 0.999999524331312, iteration: 115480
loss: 1.1650980710983276,grad_norm: 0.9999995543511568, iteration: 115481
loss: 1.4113513231277466,grad_norm: 0.9999998918886148, iteration: 115482
loss: 1.2082499265670776,grad_norm: 1.0000000298000558, iteration: 115483
loss: 1.038068175315857,grad_norm: 0.9999992721477695, iteration: 115484
loss: 1.2878808975219727,grad_norm: 0.9999999382315732, iteration: 115485
loss: 1.1820402145385742,grad_norm: 0.9999991466961083, iteration: 115486
loss: 1.173987627029419,grad_norm: 1.0000000299645833, iteration: 115487
loss: 1.0719959735870361,grad_norm: 0.9999996749094407, iteration: 115488
loss: 1.0426993370056152,grad_norm: 0.9999996870182286, iteration: 115489
loss: 1.07343590259552,grad_norm: 0.9999991021652322, iteration: 115490
loss: 1.0718133449554443,grad_norm: 0.9999996809821667, iteration: 115491
loss: 1.1332088708877563,grad_norm: 0.9999992868797056, iteration: 115492
loss: 1.0688551664352417,grad_norm: 0.9999990697506665, iteration: 115493
loss: 1.1561135053634644,grad_norm: 0.99999969041557, iteration: 115494
loss: 1.0974054336547852,grad_norm: 0.9999996840246713, iteration: 115495
loss: 1.244668960571289,grad_norm: 0.9999999279350691, iteration: 115496
loss: 1.1640421152114868,grad_norm: 0.9999996942535876, iteration: 115497
loss: 1.0768702030181885,grad_norm: 0.9999998893374502, iteration: 115498
loss: 0.9609206914901733,grad_norm: 0.9699556914337145, iteration: 115499
loss: 1.1053813695907593,grad_norm: 0.9999991611350537, iteration: 115500
loss: 1.0973588228225708,grad_norm: 0.9999993368511827, iteration: 115501
loss: 1.1417343616485596,grad_norm: 0.999999508950357, iteration: 115502
loss: 1.0076309442520142,grad_norm: 0.9999990810187815, iteration: 115503
loss: 1.0732381343841553,grad_norm: 0.9999998203242071, iteration: 115504
loss: 1.1906442642211914,grad_norm: 0.99999989215745, iteration: 115505
loss: 1.1712188720703125,grad_norm: 0.9999994263500597, iteration: 115506
loss: 1.0345410108566284,grad_norm: 0.9941922060013109, iteration: 115507
loss: 1.380388617515564,grad_norm: 0.9999995619973099, iteration: 115508
loss: 1.0270845890045166,grad_norm: 0.9999990843008254, iteration: 115509
loss: 1.0325744152069092,grad_norm: 0.9999990804010208, iteration: 115510
loss: 1.0006848573684692,grad_norm: 0.9999991087545868, iteration: 115511
loss: 1.0388774871826172,grad_norm: 0.9999990595515053, iteration: 115512
loss: 1.0959872007369995,grad_norm: 0.9999994588942266, iteration: 115513
loss: 1.1441913843154907,grad_norm: 0.9999993007099949, iteration: 115514
loss: 1.1066975593566895,grad_norm: 0.9999994816022081, iteration: 115515
loss: 1.1068661212921143,grad_norm: 0.9999991288344499, iteration: 115516
loss: 1.078896164894104,grad_norm: 0.9999990473255572, iteration: 115517
loss: 1.2572612762451172,grad_norm: 0.9999999229612766, iteration: 115518
loss: 1.0375118255615234,grad_norm: 0.9999995031831651, iteration: 115519
loss: 1.1428731679916382,grad_norm: 0.9999997895196455, iteration: 115520
loss: 1.0245059728622437,grad_norm: 0.9999991766979424, iteration: 115521
loss: 1.0661962032318115,grad_norm: 0.9999996518569081, iteration: 115522
loss: 1.0805422067642212,grad_norm: 0.9999992502470312, iteration: 115523
loss: 1.121915340423584,grad_norm: 0.9999998967788737, iteration: 115524
loss: 1.0588104724884033,grad_norm: 0.9999998418091487, iteration: 115525
loss: 1.228907823562622,grad_norm: 0.9999997424995463, iteration: 115526
loss: 1.0073586702346802,grad_norm: 0.9999991770721759, iteration: 115527
loss: 1.1382218599319458,grad_norm: 0.999999747264757, iteration: 115528
loss: 1.0277612209320068,grad_norm: 0.9999994327109, iteration: 115529
loss: 1.0939297676086426,grad_norm: 0.9999998953940739, iteration: 115530
loss: 1.0220797061920166,grad_norm: 0.9999994461139708, iteration: 115531
loss: 1.0389539003372192,grad_norm: 0.9573851480099546, iteration: 115532
loss: 1.1032419204711914,grad_norm: 0.9999994093489953, iteration: 115533
loss: 1.1042048931121826,grad_norm: 0.9999998917416139, iteration: 115534
loss: 1.231408715248108,grad_norm: 0.9999998890882028, iteration: 115535
loss: 1.1120182275772095,grad_norm: 0.9999997793589929, iteration: 115536
loss: 1.0596473217010498,grad_norm: 0.9999998691626169, iteration: 115537
loss: 1.096826434135437,grad_norm: 0.9999993878586511, iteration: 115538
loss: 1.1862425804138184,grad_norm: 0.999999893545167, iteration: 115539
loss: 1.0579545497894287,grad_norm: 0.999999045094573, iteration: 115540
loss: 1.0770277976989746,grad_norm: 0.9999996344005337, iteration: 115541
loss: 1.084641933441162,grad_norm: 0.999999863207285, iteration: 115542
loss: 1.1103006601333618,grad_norm: 0.9999999820127148, iteration: 115543
loss: 1.0908278226852417,grad_norm: 0.999999458889263, iteration: 115544
loss: 1.0083190202713013,grad_norm: 0.9262797877780021, iteration: 115545
loss: 1.0520092248916626,grad_norm: 0.999999311749474, iteration: 115546
loss: 1.0808581113815308,grad_norm: 0.9999992513651701, iteration: 115547
loss: 1.0412297248840332,grad_norm: 0.9999997047005997, iteration: 115548
loss: 1.2145676612854004,grad_norm: 0.9999998484183124, iteration: 115549
loss: 1.0599523782730103,grad_norm: 0.9999995601350149, iteration: 115550
loss: 1.1446841955184937,grad_norm: 0.9999998087244835, iteration: 115551
loss: 1.1751269102096558,grad_norm: 0.9999994900898221, iteration: 115552
loss: 1.0274510383605957,grad_norm: 0.9973723124411008, iteration: 115553
loss: 1.0224571228027344,grad_norm: 0.9874541692164692, iteration: 115554
loss: 1.140975832939148,grad_norm: 0.9999994222315784, iteration: 115555
loss: 1.012070655822754,grad_norm: 0.9999992115205512, iteration: 115556
loss: 1.0292954444885254,grad_norm: 0.9182222908126678, iteration: 115557
loss: 1.0348803997039795,grad_norm: 0.9372890913095293, iteration: 115558
loss: 1.079932689666748,grad_norm: 0.9999994066852123, iteration: 115559
loss: 1.0228798389434814,grad_norm: 0.9608936611489582, iteration: 115560
loss: 1.065734624862671,grad_norm: 0.9999999348426281, iteration: 115561
loss: 1.3312158584594727,grad_norm: 0.9999996893914318, iteration: 115562
loss: 0.9962059259414673,grad_norm: 0.9008470971192426, iteration: 115563
loss: 1.1069930791854858,grad_norm: 0.9999996641396303, iteration: 115564
loss: 1.2225229740142822,grad_norm: 0.9999994697801426, iteration: 115565
loss: 1.0953776836395264,grad_norm: 0.9484625703137913, iteration: 115566
loss: 1.2103699445724487,grad_norm: 0.9999997392110246, iteration: 115567
loss: 1.06831693649292,grad_norm: 0.9999999091861362, iteration: 115568
loss: 1.0543166399002075,grad_norm: 0.9999993598157338, iteration: 115569
loss: 1.143140196800232,grad_norm: 0.9999997531948625, iteration: 115570
loss: 1.1471329927444458,grad_norm: 0.9999999044168267, iteration: 115571
loss: 0.9904133081436157,grad_norm: 0.8255356661082294, iteration: 115572
loss: 1.0701899528503418,grad_norm: 0.9999990337713618, iteration: 115573
loss: 1.0359532833099365,grad_norm: 0.999999811536967, iteration: 115574
loss: 1.0416977405548096,grad_norm: 0.9999991448355002, iteration: 115575
loss: 1.1076126098632812,grad_norm: 0.9999999231788997, iteration: 115576
loss: 1.0195242166519165,grad_norm: 0.9999997265323577, iteration: 115577
loss: 1.0485706329345703,grad_norm: 0.999999578258187, iteration: 115578
loss: 1.1025185585021973,grad_norm: 0.9999992145021165, iteration: 115579
loss: 1.0963084697723389,grad_norm: 0.9999999647889075, iteration: 115580
loss: 1.021116852760315,grad_norm: 0.999999252804703, iteration: 115581
loss: 1.037574291229248,grad_norm: 0.9268682393244603, iteration: 115582
loss: 1.0683835744857788,grad_norm: 1.0000000091525705, iteration: 115583
loss: 1.118551254272461,grad_norm: 0.9999994759219265, iteration: 115584
loss: 1.017351508140564,grad_norm: 0.9999993973200368, iteration: 115585
loss: 1.0440672636032104,grad_norm: 0.9999991002732624, iteration: 115586
loss: 1.1679705381393433,grad_norm: 0.9999998294925083, iteration: 115587
loss: 1.185541033744812,grad_norm: 0.9999999503465397, iteration: 115588
loss: 1.1042169332504272,grad_norm: 0.9999997157471112, iteration: 115589
loss: 1.0960065126419067,grad_norm: 0.9999995842283083, iteration: 115590
loss: 0.9980608224868774,grad_norm: 0.9999995952415786, iteration: 115591
loss: 1.0717625617980957,grad_norm: 0.9999998542144924, iteration: 115592
loss: 1.0353635549545288,grad_norm: 0.9999990474491155, iteration: 115593
loss: 1.0919981002807617,grad_norm: 0.999999475823394, iteration: 115594
loss: 1.1000897884368896,grad_norm: 0.9999994693042248, iteration: 115595
loss: 1.1894112825393677,grad_norm: 0.9999995910552566, iteration: 115596
loss: 1.0661261081695557,grad_norm: 0.9999992268429336, iteration: 115597
loss: 1.1031486988067627,grad_norm: 0.9999999287358413, iteration: 115598
loss: 1.0736405849456787,grad_norm: 0.9999992092671959, iteration: 115599
loss: 1.0944952964782715,grad_norm: 0.9999991877877134, iteration: 115600
loss: 1.1246144771575928,grad_norm: 0.9999996617399056, iteration: 115601
loss: 1.144881010055542,grad_norm: 0.9999998868882316, iteration: 115602
loss: 1.047805666923523,grad_norm: 0.9999995003683487, iteration: 115603
loss: 1.068313479423523,grad_norm: 0.9999999352979526, iteration: 115604
loss: 1.079603672027588,grad_norm: 1.0000000111853733, iteration: 115605
loss: 1.036927580833435,grad_norm: 0.9999992272653347, iteration: 115606
loss: 1.0646576881408691,grad_norm: 0.9999997011481162, iteration: 115607
loss: 1.0597021579742432,grad_norm: 0.9999995337624399, iteration: 115608
loss: 1.0043468475341797,grad_norm: 0.9144242645036941, iteration: 115609
loss: 1.040651559829712,grad_norm: 0.9999992506750854, iteration: 115610
loss: 1.004589319229126,grad_norm: 0.9999991770028324, iteration: 115611
loss: 1.1195510625839233,grad_norm: 0.9999998653443974, iteration: 115612
loss: 1.0614060163497925,grad_norm: 0.9011163004865023, iteration: 115613
loss: 1.1282929182052612,grad_norm: 0.9999994270881231, iteration: 115614
loss: 1.0023070573806763,grad_norm: 0.9999994362767524, iteration: 115615
loss: 1.0885875225067139,grad_norm: 0.999999376341229, iteration: 115616
loss: 1.026551365852356,grad_norm: 0.9999992395373554, iteration: 115617
loss: 1.074630856513977,grad_norm: 0.9999993316271228, iteration: 115618
loss: 1.0579560995101929,grad_norm: 0.9999994111811357, iteration: 115619
loss: 1.0921175479888916,grad_norm: 0.9999999545786786, iteration: 115620
loss: 1.1276576519012451,grad_norm: 0.9999995949510978, iteration: 115621
loss: 1.0852123498916626,grad_norm: 0.9999994585690415, iteration: 115622
loss: 1.0880541801452637,grad_norm: 0.9999998411648647, iteration: 115623
loss: 1.1333136558532715,grad_norm: 0.9999998351564533, iteration: 115624
loss: 1.019641637802124,grad_norm: 0.8832212069331159, iteration: 115625
loss: 1.0908440351486206,grad_norm: 0.9999995979830568, iteration: 115626
loss: 1.0174236297607422,grad_norm: 0.9999992263874129, iteration: 115627
loss: 1.1576684713363647,grad_norm: 0.9999995043001798, iteration: 115628
loss: 1.0770041942596436,grad_norm: 0.9999991772685438, iteration: 115629
loss: 1.046159267425537,grad_norm: 0.9999994990722658, iteration: 115630
loss: 1.1160633563995361,grad_norm: 0.9999997861723112, iteration: 115631
loss: 1.0318149328231812,grad_norm: 0.9999994746970857, iteration: 115632
loss: 1.042955756187439,grad_norm: 0.9999995694410548, iteration: 115633
loss: 1.0362104177474976,grad_norm: 0.9999997601949651, iteration: 115634
loss: 1.1600978374481201,grad_norm: 0.9999996444856505, iteration: 115635
loss: 1.0557178258895874,grad_norm: 0.9999992599908002, iteration: 115636
loss: 1.003007173538208,grad_norm: 0.9999991512276777, iteration: 115637
loss: 1.0311996936798096,grad_norm: 0.9999994631180482, iteration: 115638
loss: 1.1382213830947876,grad_norm: 0.999999555174267, iteration: 115639
loss: 1.0625642538070679,grad_norm: 0.9999990262749747, iteration: 115640
loss: 1.1691967248916626,grad_norm: 0.9999995938334065, iteration: 115641
loss: 1.0726897716522217,grad_norm: 0.9999994024192947, iteration: 115642
loss: 1.1135625839233398,grad_norm: 0.9999996104133794, iteration: 115643
loss: 1.0472267866134644,grad_norm: 0.9999994705075874, iteration: 115644
loss: 1.0293046236038208,grad_norm: 0.9999992733258398, iteration: 115645
loss: 1.1409602165222168,grad_norm: 0.9999997538319253, iteration: 115646
loss: 1.1100902557373047,grad_norm: 0.9999996461871703, iteration: 115647
loss: 1.005661964416504,grad_norm: 0.999999577011581, iteration: 115648
loss: 1.1281980276107788,grad_norm: 0.9999993044888598, iteration: 115649
loss: 1.0668528079986572,grad_norm: 0.9999996574845911, iteration: 115650
loss: 1.1078636646270752,grad_norm: 0.9999995536294161, iteration: 115651
loss: 1.196847677230835,grad_norm: 0.9999996949776101, iteration: 115652
loss: 1.1676361560821533,grad_norm: 0.9999999771071766, iteration: 115653
loss: 1.089765191078186,grad_norm: 0.9999998913997626, iteration: 115654
loss: 1.009670615196228,grad_norm: 0.9999991715124156, iteration: 115655
loss: 1.06821870803833,grad_norm: 0.9999990857908198, iteration: 115656
loss: 1.055788516998291,grad_norm: 0.9999992782536308, iteration: 115657
loss: 1.0184047222137451,grad_norm: 0.9999994497001805, iteration: 115658
loss: 1.1186383962631226,grad_norm: 0.9999996506930341, iteration: 115659
loss: 1.0821483135223389,grad_norm: 0.9999991303507587, iteration: 115660
loss: 1.0797419548034668,grad_norm: 0.9999991787549227, iteration: 115661
loss: 1.0527105331420898,grad_norm: 0.9999999526208901, iteration: 115662
loss: 1.122408151626587,grad_norm: 0.9999994881008012, iteration: 115663
loss: 1.1134562492370605,grad_norm: 0.9999992917686502, iteration: 115664
loss: 1.1700072288513184,grad_norm: 0.9999998890997304, iteration: 115665
loss: 1.1519502401351929,grad_norm: 0.9999993891339394, iteration: 115666
loss: 1.0253320932388306,grad_norm: 0.9999994596087328, iteration: 115667
loss: 1.1056674718856812,grad_norm: 0.9999999654078445, iteration: 115668
loss: 1.0456783771514893,grad_norm: 0.9999997354598261, iteration: 115669
loss: 1.043529987335205,grad_norm: 0.9999992209208048, iteration: 115670
loss: 1.0482620000839233,grad_norm: 0.9999997649275622, iteration: 115671
loss: 1.0340055227279663,grad_norm: 0.9999993238081868, iteration: 115672
loss: 1.1050359010696411,grad_norm: 0.9999998223504052, iteration: 115673
loss: 1.040541648864746,grad_norm: 0.9999994160593476, iteration: 115674
loss: 0.9833162426948547,grad_norm: 0.9098922218391882, iteration: 115675
loss: 1.0806101560592651,grad_norm: 0.9999996654768236, iteration: 115676
loss: 1.0842533111572266,grad_norm: 0.9999991949109808, iteration: 115677
loss: 1.0541515350341797,grad_norm: 0.885816185654168, iteration: 115678
loss: 1.0265121459960938,grad_norm: 0.9999996066175987, iteration: 115679
loss: 1.0251272916793823,grad_norm: 0.8967267436516267, iteration: 115680
loss: 1.0331772565841675,grad_norm: 0.9999993884138132, iteration: 115681
loss: 1.0816696882247925,grad_norm: 0.9999993872465271, iteration: 115682
loss: 0.9834691286087036,grad_norm: 0.9518768281870279, iteration: 115683
loss: 1.1060739755630493,grad_norm: 0.999999970407228, iteration: 115684
loss: 1.035079836845398,grad_norm: 0.999999699039599, iteration: 115685
loss: 1.1627851724624634,grad_norm: 0.9999998359534904, iteration: 115686
loss: 1.124777913093567,grad_norm: 0.9999998711273232, iteration: 115687
loss: 1.0590312480926514,grad_norm: 0.9999994533547973, iteration: 115688
loss: 1.2669899463653564,grad_norm: 0.999999954700803, iteration: 115689
loss: 1.0326857566833496,grad_norm: 0.7718095365200359, iteration: 115690
loss: 1.0487300157546997,grad_norm: 0.9461266312084272, iteration: 115691
loss: 1.009822964668274,grad_norm: 0.999999514206404, iteration: 115692
loss: 1.0426806211471558,grad_norm: 0.9999993030864011, iteration: 115693
loss: 1.063679575920105,grad_norm: 0.9801809597919768, iteration: 115694
loss: 1.0806251764297485,grad_norm: 0.9999998058980781, iteration: 115695
loss: 1.061479926109314,grad_norm: 0.999999575099087, iteration: 115696
loss: 1.0724101066589355,grad_norm: 0.9999996337428496, iteration: 115697
loss: 1.0648483037948608,grad_norm: 0.9999995474130886, iteration: 115698
loss: 1.189923882484436,grad_norm: 0.9999996154854512, iteration: 115699
loss: 0.9891307353973389,grad_norm: 0.8292460262147417, iteration: 115700
loss: 1.04888916015625,grad_norm: 0.9999991697646453, iteration: 115701
loss: 1.0526483058929443,grad_norm: 0.9392119925276207, iteration: 115702
loss: 0.9962314367294312,grad_norm: 0.9999991032336677, iteration: 115703
loss: 1.0351464748382568,grad_norm: 0.9999997177671753, iteration: 115704
loss: 1.0286415815353394,grad_norm: 0.9999993287575948, iteration: 115705
loss: 1.0305004119873047,grad_norm: 0.999999865828423, iteration: 115706
loss: 1.0417064428329468,grad_norm: 0.9999991311171476, iteration: 115707
loss: 1.3010720014572144,grad_norm: 0.9999998158386483, iteration: 115708
loss: 1.037376880645752,grad_norm: 0.949758510325638, iteration: 115709
loss: 1.0866419076919556,grad_norm: 0.9999990763471729, iteration: 115710
loss: 1.1437153816223145,grad_norm: 0.9999995986951984, iteration: 115711
loss: 1.0410938262939453,grad_norm: 0.9999999976115865, iteration: 115712
loss: 1.2249313592910767,grad_norm: 0.9999998996155479, iteration: 115713
loss: 1.0483382940292358,grad_norm: 0.9999992587062919, iteration: 115714
loss: 1.1017934083938599,grad_norm: 0.9999996467051129, iteration: 115715
loss: 1.0256409645080566,grad_norm: 0.9999991822395533, iteration: 115716
loss: 1.1617579460144043,grad_norm: 0.9999998492485471, iteration: 115717
loss: 1.0230059623718262,grad_norm: 0.9999990410219726, iteration: 115718
loss: 1.0382267236709595,grad_norm: 0.999999496178564, iteration: 115719
loss: 1.1038600206375122,grad_norm: 0.9999995995004826, iteration: 115720
loss: 1.0511959791183472,grad_norm: 0.9999999194931302, iteration: 115721
loss: 1.190048336982727,grad_norm: 0.9999999198655144, iteration: 115722
loss: 1.1295403242111206,grad_norm: 0.9999998438000395, iteration: 115723
loss: 1.0029654502868652,grad_norm: 0.9999993200089684, iteration: 115724
loss: 1.0151910781860352,grad_norm: 0.9333095263927516, iteration: 115725
loss: 1.1016292572021484,grad_norm: 0.9999997179859865, iteration: 115726
loss: 1.0254427194595337,grad_norm: 0.8841149125876007, iteration: 115727
loss: 1.0304691791534424,grad_norm: 0.999999540827204, iteration: 115728
loss: 1.0623615980148315,grad_norm: 0.9999991132459439, iteration: 115729
loss: 1.1453089714050293,grad_norm: 0.9999997788729417, iteration: 115730
loss: 1.0979567766189575,grad_norm: 0.9999999643847333, iteration: 115731
loss: 1.027738332748413,grad_norm: 0.9999998671041433, iteration: 115732
loss: 1.0497602224349976,grad_norm: 0.9999996522087535, iteration: 115733
loss: 1.0491465330123901,grad_norm: 0.9999991768784313, iteration: 115734
loss: 1.0246102809906006,grad_norm: 0.9999995051264297, iteration: 115735
loss: 1.1468197107315063,grad_norm: 0.9999996439115008, iteration: 115736
loss: 1.1519300937652588,grad_norm: 0.9999997905698961, iteration: 115737
loss: 1.0749434232711792,grad_norm: 0.9999991282638165, iteration: 115738
loss: 1.0678468942642212,grad_norm: 0.9999998499600492, iteration: 115739
loss: 1.0239768028259277,grad_norm: 0.9999993517930548, iteration: 115740
loss: 1.1003822088241577,grad_norm: 0.9999996021936732, iteration: 115741
loss: 1.164027452468872,grad_norm: 0.999999683154004, iteration: 115742
loss: 1.2755662202835083,grad_norm: 0.9999999822309864, iteration: 115743
loss: 1.0551457405090332,grad_norm: 0.9999994592807836, iteration: 115744
loss: 1.0542113780975342,grad_norm: 0.9999996362283171, iteration: 115745
loss: 1.0804345607757568,grad_norm: 0.999999620816582, iteration: 115746
loss: 1.0411854982376099,grad_norm: 0.999999192586161, iteration: 115747
loss: 1.0623191595077515,grad_norm: 0.9999996548214961, iteration: 115748
loss: 1.1534072160720825,grad_norm: 0.9999992018588847, iteration: 115749
loss: 1.0615507364273071,grad_norm: 0.9999994206964689, iteration: 115750
loss: 1.1092898845672607,grad_norm: 0.9999996246756188, iteration: 115751
loss: 1.068106770515442,grad_norm: 0.9999992468806532, iteration: 115752
loss: 0.9936745166778564,grad_norm: 0.8311746585586857, iteration: 115753
loss: 1.0013163089752197,grad_norm: 0.9999995741866032, iteration: 115754
loss: 1.0434962511062622,grad_norm: 0.9999991722466142, iteration: 115755
loss: 1.1365734338760376,grad_norm: 0.9999993487606438, iteration: 115756
loss: 1.032781958580017,grad_norm: 0.9999995362139387, iteration: 115757
loss: 1.0510374307632446,grad_norm: 0.9999991899253213, iteration: 115758
loss: 1.0133676528930664,grad_norm: 0.8314173572211145, iteration: 115759
loss: 1.046359896659851,grad_norm: 0.9999991195230012, iteration: 115760
loss: 1.166500449180603,grad_norm: 0.9999996755178694, iteration: 115761
loss: 1.0517834424972534,grad_norm: 0.9999994388006089, iteration: 115762
loss: 1.1161205768585205,grad_norm: 0.9999999369534888, iteration: 115763
loss: 1.0797514915466309,grad_norm: 0.9999995546924264, iteration: 115764
loss: 1.0811021327972412,grad_norm: 0.9999994987484303, iteration: 115765
loss: 1.1190518140792847,grad_norm: 0.9999996402887512, iteration: 115766
loss: 1.009979248046875,grad_norm: 0.9999997752655272, iteration: 115767
loss: 1.177085280418396,grad_norm: 0.9999992373308744, iteration: 115768
loss: 1.081465244293213,grad_norm: 0.9999997253195331, iteration: 115769
loss: 1.0705825090408325,grad_norm: 0.9999999181953857, iteration: 115770
loss: 1.0902491807937622,grad_norm: 0.9999996422897226, iteration: 115771
loss: 1.0334014892578125,grad_norm: 0.9999997608799834, iteration: 115772
loss: 1.1249797344207764,grad_norm: 0.9999997963458207, iteration: 115773
loss: 1.130080223083496,grad_norm: 0.9999995592791335, iteration: 115774
loss: 0.9985503554344177,grad_norm: 0.9999993828512622, iteration: 115775
loss: 1.104567289352417,grad_norm: 0.9999994298244774, iteration: 115776
loss: 1.1237119436264038,grad_norm: 0.9999995243529635, iteration: 115777
loss: 1.0957480669021606,grad_norm: 0.9999998802074164, iteration: 115778
loss: 1.004093050956726,grad_norm: 0.9999992466710479, iteration: 115779
loss: 1.055055022239685,grad_norm: 0.9301835821452389, iteration: 115780
loss: 1.0564301013946533,grad_norm: 0.999999454190114, iteration: 115781
loss: 1.0658797025680542,grad_norm: 0.9999993780432781, iteration: 115782
loss: 1.0192698240280151,grad_norm: 0.9999993814004529, iteration: 115783
loss: 1.1018937826156616,grad_norm: 0.9999999172333359, iteration: 115784
loss: 1.1075429916381836,grad_norm: 0.9999997461917408, iteration: 115785
loss: 1.0776269435882568,grad_norm: 0.9999994643780684, iteration: 115786
loss: 1.1159626245498657,grad_norm: 0.9999990300525101, iteration: 115787
loss: 1.0366390943527222,grad_norm: 0.9999999202467942, iteration: 115788
loss: 1.0742669105529785,grad_norm: 0.8786179366437018, iteration: 115789
loss: 1.0139210224151611,grad_norm: 0.999999215830471, iteration: 115790
loss: 1.0834921598434448,grad_norm: 0.9999992018022863, iteration: 115791
loss: 0.9970532655715942,grad_norm: 0.9999992003240826, iteration: 115792
loss: 1.1020337343215942,grad_norm: 0.9999995583238026, iteration: 115793
loss: 1.0834033489227295,grad_norm: 0.999999691999653, iteration: 115794
loss: 1.2211495637893677,grad_norm: 0.9999996352098728, iteration: 115795
loss: 1.043410062789917,grad_norm: 0.999999707137573, iteration: 115796
loss: 1.0389224290847778,grad_norm: 0.9999996942767836, iteration: 115797
loss: 1.1840282678604126,grad_norm: 0.999999971213046, iteration: 115798
loss: 1.1998289823532104,grad_norm: 0.9999995440962416, iteration: 115799
loss: 1.0008760690689087,grad_norm: 0.9999995376773563, iteration: 115800
loss: 1.0719882249832153,grad_norm: 1.0000000519512302, iteration: 115801
loss: 1.0526763200759888,grad_norm: 0.9999992349869049, iteration: 115802
loss: 0.9926958084106445,grad_norm: 0.9999992554767978, iteration: 115803
loss: 1.0188086032867432,grad_norm: 0.9999994955254797, iteration: 115804
loss: 1.172628402709961,grad_norm: 0.9999994939024105, iteration: 115805
loss: 1.007716417312622,grad_norm: 0.9999996575573222, iteration: 115806
loss: 1.086110234260559,grad_norm: 1.0000000096049158, iteration: 115807
loss: 1.139883279800415,grad_norm: 0.9999999380984077, iteration: 115808
loss: 1.024780035018921,grad_norm: 0.999999038138204, iteration: 115809
loss: 1.0306026935577393,grad_norm: 0.9999993109333254, iteration: 115810
loss: 0.9974701404571533,grad_norm: 0.9999994435510831, iteration: 115811
loss: 1.146622657775879,grad_norm: 0.9999996736218031, iteration: 115812
loss: 1.126001000404358,grad_norm: 0.9999995296988216, iteration: 115813
loss: 1.0741103887557983,grad_norm: 0.9999996382699008, iteration: 115814
loss: 1.0078353881835938,grad_norm: 0.999999967796116, iteration: 115815
loss: 1.1182140111923218,grad_norm: 0.9999990264482191, iteration: 115816
loss: 1.0419952869415283,grad_norm: 0.9217752781081414, iteration: 115817
loss: 1.1086983680725098,grad_norm: 0.9999995432283338, iteration: 115818
loss: 0.975155770778656,grad_norm: 0.9999990880590347, iteration: 115819
loss: 1.0948485136032104,grad_norm: 0.8983097106221738, iteration: 115820
loss: 1.0280089378356934,grad_norm: 0.9354883746827279, iteration: 115821
loss: 1.013858437538147,grad_norm: 0.9164965041530793, iteration: 115822
loss: 1.0579490661621094,grad_norm: 0.99999923817286, iteration: 115823
loss: 1.0414972305297852,grad_norm: 0.9999999420641256, iteration: 115824
loss: 1.1313589811325073,grad_norm: 0.9999995225390438, iteration: 115825
loss: 1.0265241861343384,grad_norm: 0.9999992984765507, iteration: 115826
loss: 1.0373287200927734,grad_norm: 0.9999993788604187, iteration: 115827
loss: 1.0071271657943726,grad_norm: 0.9999992098809996, iteration: 115828
loss: 1.0106576681137085,grad_norm: 0.9999992218526716, iteration: 115829
loss: 1.0226225852966309,grad_norm: 0.9999990497521098, iteration: 115830
loss: 1.0802173614501953,grad_norm: 1.000000012234535, iteration: 115831
loss: 1.0555025339126587,grad_norm: 0.9999994850256607, iteration: 115832
loss: 1.020151138305664,grad_norm: 0.8351976554922494, iteration: 115833
loss: 1.0426346063613892,grad_norm: 0.9999993520729417, iteration: 115834
loss: 1.1030546426773071,grad_norm: 0.9999994526866492, iteration: 115835
loss: 1.0233235359191895,grad_norm: 0.8856316330939221, iteration: 115836
loss: 0.982573926448822,grad_norm: 0.9999989896950862, iteration: 115837
loss: 1.059556245803833,grad_norm: 0.9999991650169858, iteration: 115838
loss: 1.0935640335083008,grad_norm: 0.9999999460443493, iteration: 115839
loss: 1.0146197080612183,grad_norm: 0.999999158741764, iteration: 115840
loss: 1.0766525268554688,grad_norm: 0.9999996516173032, iteration: 115841
loss: 1.000349760055542,grad_norm: 0.8135861810672134, iteration: 115842
loss: 1.133436918258667,grad_norm: 0.9999997193233744, iteration: 115843
loss: 0.9994116425514221,grad_norm: 0.9999995388558174, iteration: 115844
loss: 1.0294438600540161,grad_norm: 0.9233191640219095, iteration: 115845
loss: 1.214158535003662,grad_norm: 0.9999996025432262, iteration: 115846
loss: 1.0480552911758423,grad_norm: 0.9999996307714937, iteration: 115847
loss: 1.0499359369277954,grad_norm: 0.9999995970796409, iteration: 115848
loss: 1.0335019826889038,grad_norm: 0.9999998106086951, iteration: 115849
loss: 1.1104789972305298,grad_norm: 0.9999997948131126, iteration: 115850
loss: 1.0653856992721558,grad_norm: 0.9999993749611129, iteration: 115851
loss: 1.057196855545044,grad_norm: 0.9999998582102465, iteration: 115852
loss: 1.3442459106445312,grad_norm: 0.9999997588936846, iteration: 115853
loss: 0.9830119609832764,grad_norm: 0.9999993033888814, iteration: 115854
loss: 1.0980342626571655,grad_norm: 0.9770233388766056, iteration: 115855
loss: 1.0315101146697998,grad_norm: 0.9999993978888074, iteration: 115856
loss: 1.073311448097229,grad_norm: 0.9999993390704541, iteration: 115857
loss: 0.997394859790802,grad_norm: 0.9859135324795495, iteration: 115858
loss: 1.003325343132019,grad_norm: 0.9999990932511043, iteration: 115859
loss: 1.081620454788208,grad_norm: 0.9999991439414909, iteration: 115860
loss: 1.0412131547927856,grad_norm: 0.9999993189903497, iteration: 115861
loss: 1.129845142364502,grad_norm: 0.9999993926448097, iteration: 115862
loss: 1.1086792945861816,grad_norm: 0.9999994097763452, iteration: 115863
loss: 1.0974773168563843,grad_norm: 0.999999286935463, iteration: 115864
loss: 1.1246321201324463,grad_norm: 0.9999997331843913, iteration: 115865
loss: 1.0092700719833374,grad_norm: 0.9999993479939647, iteration: 115866
loss: 1.027405858039856,grad_norm: 0.9999999194940287, iteration: 115867
loss: 1.022160530090332,grad_norm: 0.9999992480416324, iteration: 115868
loss: 1.0490479469299316,grad_norm: 0.935073391811049, iteration: 115869
loss: 1.071980357170105,grad_norm: 0.9999993242842048, iteration: 115870
loss: 1.0604839324951172,grad_norm: 0.9999996349734197, iteration: 115871
loss: 1.0504331588745117,grad_norm: 0.9999993018243318, iteration: 115872
loss: 1.0695608854293823,grad_norm: 0.9999993697385618, iteration: 115873
loss: 1.2502436637878418,grad_norm: 0.9999998991107255, iteration: 115874
loss: 1.0099507570266724,grad_norm: 0.9999999522686687, iteration: 115875
loss: 1.0410284996032715,grad_norm: 0.9432161384769572, iteration: 115876
loss: 1.0561636686325073,grad_norm: 0.9999991528559449, iteration: 115877
loss: 0.9997220039367676,grad_norm: 0.9785057099262604, iteration: 115878
loss: 1.0390325784683228,grad_norm: 0.8657048231921823, iteration: 115879
loss: 1.0135471820831299,grad_norm: 0.9674115020503474, iteration: 115880
loss: 1.080826997756958,grad_norm: 0.9999997061761576, iteration: 115881
loss: 1.0199635028839111,grad_norm: 0.9999992962718491, iteration: 115882
loss: 1.0421290397644043,grad_norm: 0.9999994406787919, iteration: 115883
loss: 1.025469183921814,grad_norm: 0.9864558693117191, iteration: 115884
loss: 1.0821812152862549,grad_norm: 0.9999993238717931, iteration: 115885
loss: 1.0295131206512451,grad_norm: 0.9999991414655012, iteration: 115886
loss: 1.0439003705978394,grad_norm: 0.9999992596035612, iteration: 115887
loss: 1.0093811750411987,grad_norm: 0.8476366853511695, iteration: 115888
loss: 1.025349497795105,grad_norm: 0.9999992662752345, iteration: 115889
loss: 1.1142380237579346,grad_norm: 0.9999996388068981, iteration: 115890
loss: 1.011696219444275,grad_norm: 0.9999994134172392, iteration: 115891
loss: 1.000486969947815,grad_norm: 0.9600740583624835, iteration: 115892
loss: 0.9921934008598328,grad_norm: 0.9729246659738139, iteration: 115893
loss: 1.03446364402771,grad_norm: 0.782953322843961, iteration: 115894
loss: 1.0374841690063477,grad_norm: 0.9999996448754567, iteration: 115895
loss: 1.0413228273391724,grad_norm: 0.9999991666888758, iteration: 115896
loss: 1.001219630241394,grad_norm: 0.9999992223906307, iteration: 115897
loss: 1.0793811082839966,grad_norm: 0.9999996122058886, iteration: 115898
loss: 1.0214077234268188,grad_norm: 0.9505017020578057, iteration: 115899
loss: 1.032455325126648,grad_norm: 0.9999998356916263, iteration: 115900
loss: 1.0553123950958252,grad_norm: 0.9999995285919592, iteration: 115901
loss: 1.004528284072876,grad_norm: 0.999999232196743, iteration: 115902
loss: 1.0947335958480835,grad_norm: 0.999999437523399, iteration: 115903
loss: 1.0256069898605347,grad_norm: 0.9999992171716725, iteration: 115904
loss: 1.020408034324646,grad_norm: 0.999999144319827, iteration: 115905
loss: 1.1119775772094727,grad_norm: 0.9999998090429338, iteration: 115906
loss: 1.037430763244629,grad_norm: 0.9999990434373881, iteration: 115907
loss: 1.0270471572875977,grad_norm: 0.9999997203552505, iteration: 115908
loss: 1.1250842809677124,grad_norm: 0.9999993506269013, iteration: 115909
loss: 1.0920724868774414,grad_norm: 0.9999998096481175, iteration: 115910
loss: 1.045019507408142,grad_norm: 0.9999998716713975, iteration: 115911
loss: 1.0245890617370605,grad_norm: 0.9999995301686472, iteration: 115912
loss: 1.2118825912475586,grad_norm: 0.999999229682297, iteration: 115913
loss: 1.019832730293274,grad_norm: 0.8474439031454462, iteration: 115914
loss: 0.977616012096405,grad_norm: 0.9586920303696773, iteration: 115915
loss: 1.0331162214279175,grad_norm: 0.9999991385886994, iteration: 115916
loss: 1.0185099840164185,grad_norm: 0.9190735317095517, iteration: 115917
loss: 1.0265376567840576,grad_norm: 1.0000000248407526, iteration: 115918
loss: 1.1101452112197876,grad_norm: 0.9999992719763174, iteration: 115919
loss: 1.06294584274292,grad_norm: 0.9999993786455089, iteration: 115920
loss: 1.0455856323242188,grad_norm: 0.9999993691620269, iteration: 115921
loss: 1.0788110494613647,grad_norm: 0.9999999762907745, iteration: 115922
loss: 1.0206080675125122,grad_norm: 0.9999990822049067, iteration: 115923
loss: 1.0053976774215698,grad_norm: 0.999999060502282, iteration: 115924
loss: 1.0548477172851562,grad_norm: 0.9999992247785402, iteration: 115925
loss: 1.069249153137207,grad_norm: 0.9999998752365826, iteration: 115926
loss: 1.0376582145690918,grad_norm: 0.9999994683427378, iteration: 115927
loss: 1.0050618648529053,grad_norm: 0.9999992076422636, iteration: 115928
loss: 1.1291728019714355,grad_norm: 0.999999761556413, iteration: 115929
loss: 1.0755219459533691,grad_norm: 0.999999271286445, iteration: 115930
loss: 1.1450310945510864,grad_norm: 0.9999998600566943, iteration: 115931
loss: 1.1445783376693726,grad_norm: 0.9999998839421258, iteration: 115932
loss: 1.01361083984375,grad_norm: 0.9999990724316435, iteration: 115933
loss: 1.0062835216522217,grad_norm: 0.9999996015427007, iteration: 115934
loss: 1.0115386247634888,grad_norm: 0.9999991866494407, iteration: 115935
loss: 1.0446244478225708,grad_norm: 0.9999994605037247, iteration: 115936
loss: 1.0597769021987915,grad_norm: 0.9999996069746757, iteration: 115937
loss: 1.0516690015792847,grad_norm: 0.9999993533019391, iteration: 115938
loss: 1.0182875394821167,grad_norm: 0.9340125000955146, iteration: 115939
loss: 1.0198907852172852,grad_norm: 0.999999552081997, iteration: 115940
loss: 1.057726502418518,grad_norm: 0.9999992475439927, iteration: 115941
loss: 1.023160457611084,grad_norm: 0.9999999898707581, iteration: 115942
loss: 1.0188186168670654,grad_norm: 0.9999991460754734, iteration: 115943
loss: 0.978429913520813,grad_norm: 0.8457353591016741, iteration: 115944
loss: 1.02266263961792,grad_norm: 0.9828648939463067, iteration: 115945
loss: 1.0565247535705566,grad_norm: 0.9999993599747693, iteration: 115946
loss: 0.9658510088920593,grad_norm: 0.9824308982957264, iteration: 115947
loss: 1.0453038215637207,grad_norm: 0.9999998901849881, iteration: 115948
loss: 1.039287805557251,grad_norm: 0.986756765526078, iteration: 115949
loss: 1.0511568784713745,grad_norm: 0.9999992870969985, iteration: 115950
loss: 1.0711956024169922,grad_norm: 0.9999993642513255, iteration: 115951
loss: 1.0162367820739746,grad_norm: 0.96792659576889, iteration: 115952
loss: 0.9824842810630798,grad_norm: 0.9999995416411149, iteration: 115953
loss: 1.0369948148727417,grad_norm: 0.9999998943997833, iteration: 115954
loss: 1.0362006425857544,grad_norm: 0.9999998553314524, iteration: 115955
loss: 1.0553038120269775,grad_norm: 0.9999991441102043, iteration: 115956
loss: 1.1242806911468506,grad_norm: 0.9999996041273692, iteration: 115957
loss: 1.0180084705352783,grad_norm: 0.9999993374618441, iteration: 115958
loss: 1.0439825057983398,grad_norm: 0.9999998037813718, iteration: 115959
loss: 0.9883033633232117,grad_norm: 0.7840921912833743, iteration: 115960
loss: 1.0489331483840942,grad_norm: 0.9999997089602639, iteration: 115961
loss: 1.118614912033081,grad_norm: 0.9999995604094453, iteration: 115962
loss: 1.0210646390914917,grad_norm: 0.9999993022711945, iteration: 115963
loss: 1.025485634803772,grad_norm: 0.9999994748415127, iteration: 115964
loss: 1.0092562437057495,grad_norm: 0.9999998804225744, iteration: 115965
loss: 0.9426926970481873,grad_norm: 0.9999990275602114, iteration: 115966
loss: 0.9987348318099976,grad_norm: 0.9999993770605412, iteration: 115967
loss: 1.072809100151062,grad_norm: 0.9999995928296536, iteration: 115968
loss: 1.0309813022613525,grad_norm: 0.9999992122945327, iteration: 115969
loss: 1.1177152395248413,grad_norm: 0.9999994459961283, iteration: 115970
loss: 1.0558161735534668,grad_norm: 0.937726776746389, iteration: 115971
loss: 1.0081363916397095,grad_norm: 0.9870978646978825, iteration: 115972
loss: 1.0246710777282715,grad_norm: 0.999999979076802, iteration: 115973
loss: 1.0333738327026367,grad_norm: 0.9999994298568382, iteration: 115974
loss: 1.0281542539596558,grad_norm: 0.9999991671818109, iteration: 115975
loss: 1.0294060707092285,grad_norm: 0.9999991739795006, iteration: 115976
loss: 1.125126600265503,grad_norm: 0.999999919466864, iteration: 115977
loss: 1.0647282600402832,grad_norm: 0.9999995671964129, iteration: 115978
loss: 1.0341989994049072,grad_norm: 0.9999994753511584, iteration: 115979
loss: 1.1330724954605103,grad_norm: 0.9999999196823262, iteration: 115980
loss: 1.0306217670440674,grad_norm: 0.99999976305915, iteration: 115981
loss: 1.1140598058700562,grad_norm: 0.9999994246072891, iteration: 115982
loss: 1.0155956745147705,grad_norm: 0.9999998004684082, iteration: 115983
loss: 1.0494053363800049,grad_norm: 0.999999824341638, iteration: 115984
loss: 0.9897510409355164,grad_norm: 0.9999992681672364, iteration: 115985
loss: 1.0616629123687744,grad_norm: 0.999999164265594, iteration: 115986
loss: 1.133896827697754,grad_norm: 0.999999689520458, iteration: 115987
loss: 1.1473571062088013,grad_norm: 0.9999995883578887, iteration: 115988
loss: 1.0131404399871826,grad_norm: 0.9999994725581984, iteration: 115989
loss: 1.0401297807693481,grad_norm: 0.9999993630605785, iteration: 115990
loss: 0.9999412894248962,grad_norm: 0.9999989800375745, iteration: 115991
loss: 1.0551490783691406,grad_norm: 0.9999992571163008, iteration: 115992
loss: 1.0551767349243164,grad_norm: 0.9999994962268107, iteration: 115993
loss: 1.0069109201431274,grad_norm: 0.8697041351781961, iteration: 115994
loss: 1.0163825750350952,grad_norm: 0.9999990550651451, iteration: 115995
loss: 1.0534274578094482,grad_norm: 0.942528388108905, iteration: 115996
loss: 1.0508803129196167,grad_norm: 0.9999991342262101, iteration: 115997
loss: 0.9883825182914734,grad_norm: 0.999999208578336, iteration: 115998
loss: 1.0141714811325073,grad_norm: 0.9999993051184195, iteration: 115999
loss: 1.0046080350875854,grad_norm: 0.9999993275021438, iteration: 116000
loss: 0.9885531663894653,grad_norm: 0.9508632310830979, iteration: 116001
loss: 1.130691647529602,grad_norm: 0.9999995759413987, iteration: 116002
loss: 1.0417157411575317,grad_norm: 0.9999990527487115, iteration: 116003
loss: 1.0011682510375977,grad_norm: 0.8166318400784837, iteration: 116004
loss: 1.039137840270996,grad_norm: 0.9999992132007507, iteration: 116005
loss: 1.0192582607269287,grad_norm: 0.8961945618747116, iteration: 116006
loss: 1.0035208463668823,grad_norm: 0.8901523123618832, iteration: 116007
loss: 0.9885212182998657,grad_norm: 0.9999996858256531, iteration: 116008
loss: 1.0699317455291748,grad_norm: 0.9999995112597816, iteration: 116009
loss: 1.0766229629516602,grad_norm: 0.9999995959466653, iteration: 116010
loss: 1.0288809537887573,grad_norm: 0.9999992162686664, iteration: 116011
loss: 1.0218534469604492,grad_norm: 0.9999996271913388, iteration: 116012
loss: 1.0047633647918701,grad_norm: 0.9999992952945208, iteration: 116013
loss: 1.026070475578308,grad_norm: 0.9999995577186029, iteration: 116014
loss: 1.1412886381149292,grad_norm: 0.9999997541460385, iteration: 116015
loss: 1.0491843223571777,grad_norm: 0.9999994152314798, iteration: 116016
loss: 1.0575034618377686,grad_norm: 0.9999996530780348, iteration: 116017
loss: 1.0405802726745605,grad_norm: 0.9999999030214076, iteration: 116018
loss: 1.0626721382141113,grad_norm: 0.9671447993312174, iteration: 116019
loss: 1.0401612520217896,grad_norm: 0.7980083627900693, iteration: 116020
loss: 1.0248878002166748,grad_norm: 0.9999992961847383, iteration: 116021
loss: 1.1698898077011108,grad_norm: 0.9999996380967772, iteration: 116022
loss: 1.0416315793991089,grad_norm: 0.999999276438098, iteration: 116023
loss: 1.1049115657806396,grad_norm: 0.9999998383737466, iteration: 116024
loss: 1.0400866270065308,grad_norm: 0.999999326127416, iteration: 116025
loss: 1.0558713674545288,grad_norm: 0.9999992078783383, iteration: 116026
loss: 1.029758095741272,grad_norm: 0.9999995995208751, iteration: 116027
loss: 1.0421124696731567,grad_norm: 0.9999991161358414, iteration: 116028
loss: 1.0088225603103638,grad_norm: 0.9999998706500031, iteration: 116029
loss: 1.0341888666152954,grad_norm: 0.9999992994885907, iteration: 116030
loss: 0.9815608859062195,grad_norm: 0.9999991099051063, iteration: 116031
loss: 1.0825624465942383,grad_norm: 0.9999996024646387, iteration: 116032
loss: 1.002495527267456,grad_norm: 0.9999991208768816, iteration: 116033
loss: 1.0963736772537231,grad_norm: 0.9999999051913966, iteration: 116034
loss: 1.0551716089248657,grad_norm: 0.9999996319728218, iteration: 116035
loss: 1.0176678895950317,grad_norm: 0.9999991004136283, iteration: 116036
loss: 1.0001848936080933,grad_norm: 0.9556549924291841, iteration: 116037
loss: 1.000570297241211,grad_norm: 0.9999990951304956, iteration: 116038
loss: 1.0961267948150635,grad_norm: 0.9999992916234304, iteration: 116039
loss: 1.0130244493484497,grad_norm: 0.9999990488789647, iteration: 116040
loss: 1.046099066734314,grad_norm: 0.9999996790307786, iteration: 116041
loss: 0.973122239112854,grad_norm: 0.9999989712303756, iteration: 116042
loss: 1.0305428504943848,grad_norm: 0.9165470805919684, iteration: 116043
loss: 1.0662710666656494,grad_norm: 0.9999992439923648, iteration: 116044
loss: 1.0037603378295898,grad_norm: 0.9999990412990605, iteration: 116045
loss: 1.05332350730896,grad_norm: 0.9999995405815766, iteration: 116046
loss: 0.9911853075027466,grad_norm: 0.999999172967359, iteration: 116047
loss: 0.9930754899978638,grad_norm: 0.9999990931620109, iteration: 116048
loss: 0.9799619913101196,grad_norm: 0.9999992012362251, iteration: 116049
loss: 1.0384955406188965,grad_norm: 0.9999995153081901, iteration: 116050
loss: 1.016135334968567,grad_norm: 0.9999998885416986, iteration: 116051
loss: 0.9701175689697266,grad_norm: 0.9072425288093908, iteration: 116052
loss: 0.9901162981987,grad_norm: 0.9999991308737148, iteration: 116053
loss: 1.0102545022964478,grad_norm: 0.9999991423763281, iteration: 116054
loss: 1.053805947303772,grad_norm: 0.9999992684468144, iteration: 116055
loss: 1.0785142183303833,grad_norm: 0.9999997266083536, iteration: 116056
loss: 1.0439401865005493,grad_norm: 0.9999996120347562, iteration: 116057
loss: 1.041121244430542,grad_norm: 0.9999992390469471, iteration: 116058
loss: 1.042176604270935,grad_norm: 0.9999994556849404, iteration: 116059
loss: 1.0309804677963257,grad_norm: 0.972278895028873, iteration: 116060
loss: 0.9728319644927979,grad_norm: 0.9999991432837159, iteration: 116061
loss: 0.9932880997657776,grad_norm: 0.9999993170335668, iteration: 116062
loss: 1.0118504762649536,grad_norm: 0.8944953979290691, iteration: 116063
loss: 1.132097601890564,grad_norm: 0.9999994558149413, iteration: 116064
loss: 1.0305721759796143,grad_norm: 0.9999990980009189, iteration: 116065
loss: 1.0308406352996826,grad_norm: 0.9999994110115941, iteration: 116066
loss: 0.9978818893432617,grad_norm: 0.9215764823340539, iteration: 116067
loss: 1.0624669790267944,grad_norm: 0.9999997427483241, iteration: 116068
loss: 0.971208930015564,grad_norm: 0.9999991327463614, iteration: 116069
loss: 0.9840638041496277,grad_norm: 0.9999994321719039, iteration: 116070
loss: 1.0345423221588135,grad_norm: 0.9595760862704817, iteration: 116071
loss: 1.0808615684509277,grad_norm: 0.9999993474632196, iteration: 116072
loss: 0.9942592978477478,grad_norm: 0.8472492134991229, iteration: 116073
loss: 1.0732123851776123,grad_norm: 0.9999992043852232, iteration: 116074
loss: 0.9882148504257202,grad_norm: 0.950194254068117, iteration: 116075
loss: 1.1247488260269165,grad_norm: 0.9999993891201282, iteration: 116076
loss: 1.0986380577087402,grad_norm: 0.999999873619519, iteration: 116077
loss: 0.9993648529052734,grad_norm: 0.8959736371299339, iteration: 116078
loss: 1.0496851205825806,grad_norm: 0.9999993622908563, iteration: 116079
loss: 0.9742431640625,grad_norm: 0.9999990674133092, iteration: 116080
loss: 1.0075162649154663,grad_norm: 0.9251376318669057, iteration: 116081
loss: 0.9971096515655518,grad_norm: 0.8374253201256444, iteration: 116082
loss: 1.0289623737335205,grad_norm: 0.9999990828640084, iteration: 116083
loss: 0.9875088930130005,grad_norm: 0.9999991024741377, iteration: 116084
loss: 0.9625757336616516,grad_norm: 0.9999991936358793, iteration: 116085
loss: 1.0720100402832031,grad_norm: 0.9999994935876843, iteration: 116086
loss: 1.0509401559829712,grad_norm: 0.9999991209378117, iteration: 116087
loss: 1.046934962272644,grad_norm: 0.9320109532316584, iteration: 116088
loss: 1.0500887632369995,grad_norm: 0.9999995096196284, iteration: 116089
loss: 1.0298871994018555,grad_norm: 0.9999996878094444, iteration: 116090
loss: 1.002759337425232,grad_norm: 0.9380963919016546, iteration: 116091
loss: 1.0794206857681274,grad_norm: 0.9999993460318338, iteration: 116092
loss: 1.181664228439331,grad_norm: 0.9999999245004384, iteration: 116093
loss: 1.0180246829986572,grad_norm: 0.9956789375074663, iteration: 116094
loss: 1.0745967626571655,grad_norm: 0.999999687209288, iteration: 116095
loss: 1.053503155708313,grad_norm: 0.9999994223352383, iteration: 116096
loss: 1.0791237354278564,grad_norm: 0.9999996615209971, iteration: 116097
loss: 0.9741796255111694,grad_norm: 0.9999991448365266, iteration: 116098
loss: 1.1099454164505005,grad_norm: 0.9999992387437919, iteration: 116099
loss: 1.0791664123535156,grad_norm: 0.9815095200140495, iteration: 116100
loss: 1.076655387878418,grad_norm: 0.999999750474189, iteration: 116101
loss: 1.114918828010559,grad_norm: 0.9999995510252407, iteration: 116102
loss: 1.0163614749908447,grad_norm: 0.9999994650039357, iteration: 116103
loss: 1.0031933784484863,grad_norm: 0.9420944858781297, iteration: 116104
loss: 1.0403542518615723,grad_norm: 0.9999998512060277, iteration: 116105
loss: 1.2102571725845337,grad_norm: 0.9999996027106923, iteration: 116106
loss: 0.9802675843238831,grad_norm: 0.9999994305262289, iteration: 116107
loss: 1.1075063943862915,grad_norm: 0.9999999245745308, iteration: 116108
loss: 1.0712954998016357,grad_norm: 0.9999993490483996, iteration: 116109
loss: 1.0582219362258911,grad_norm: 0.9586546578233962, iteration: 116110
loss: 1.1044524908065796,grad_norm: 0.9999991567581885, iteration: 116111
loss: 1.0683364868164062,grad_norm: 0.9999994743369875, iteration: 116112
loss: 1.144060730934143,grad_norm: 0.9999996875571244, iteration: 116113
loss: 1.0684328079223633,grad_norm: 0.9999995421838351, iteration: 116114
loss: 1.1689507961273193,grad_norm: 0.9999995083859956, iteration: 116115
loss: 1.035377860069275,grad_norm: 0.975612353278508, iteration: 116116
loss: 1.0390987396240234,grad_norm: 0.9999992713312661, iteration: 116117
loss: 1.0252078771591187,grad_norm: 0.9999997395484646, iteration: 116118
loss: 1.1030943393707275,grad_norm: 0.9999999925937324, iteration: 116119
loss: 1.1358473300933838,grad_norm: 0.999999747741584, iteration: 116120
loss: 1.0406363010406494,grad_norm: 0.9999998568286436, iteration: 116121
loss: 1.0291978120803833,grad_norm: 0.9999995100557962, iteration: 116122
loss: 1.062674880027771,grad_norm: 0.9999998346626283, iteration: 116123
loss: 1.1210500001907349,grad_norm: 0.9999994976190989, iteration: 116124
loss: 1.0028899908065796,grad_norm: 0.9700069551489847, iteration: 116125
loss: 1.0062237977981567,grad_norm: 0.9999990819613093, iteration: 116126
loss: 1.031381607055664,grad_norm: 0.9544712807161486, iteration: 116127
loss: 0.9878277778625488,grad_norm: 0.9099897209064977, iteration: 116128
loss: 1.0284432172775269,grad_norm: 0.9999998589239402, iteration: 116129
loss: 1.0018380880355835,grad_norm: 0.9999998000829688, iteration: 116130
loss: 1.0557339191436768,grad_norm: 0.999999413338003, iteration: 116131
loss: 1.026183843612671,grad_norm: 0.9482001133236581, iteration: 116132
loss: 1.0384714603424072,grad_norm: 0.9999993869521763, iteration: 116133
loss: 1.0249733924865723,grad_norm: 0.9940527704771608, iteration: 116134
loss: 0.9850003123283386,grad_norm: 0.9579302716461041, iteration: 116135
loss: 1.0264700651168823,grad_norm: 0.9999996177514848, iteration: 116136
loss: 0.9805803298950195,grad_norm: 0.999999054912779, iteration: 116137
loss: 1.0139926671981812,grad_norm: 0.9999991140622637, iteration: 116138
loss: 1.0214139223098755,grad_norm: 0.9999991568416479, iteration: 116139
loss: 0.9955688118934631,grad_norm: 0.845158492319569, iteration: 116140
loss: 0.9942101836204529,grad_norm: 0.8050153529270203, iteration: 116141
loss: 1.0349763631820679,grad_norm: 0.9999995199217552, iteration: 116142
loss: 1.1007261276245117,grad_norm: 0.999999851279095, iteration: 116143
loss: 1.0936955213546753,grad_norm: 0.9999999398849135, iteration: 116144
loss: 1.0161186456680298,grad_norm: 0.9999995131671945, iteration: 116145
loss: 1.040405511856079,grad_norm: 0.9999998905719424, iteration: 116146
loss: 1.0030760765075684,grad_norm: 0.9999991437651371, iteration: 116147
loss: 0.9906162023544312,grad_norm: 0.924370165811207, iteration: 116148
loss: 1.0228877067565918,grad_norm: 0.8614357376350567, iteration: 116149
loss: 1.0256327390670776,grad_norm: 0.999999395746987, iteration: 116150
loss: 1.0694917440414429,grad_norm: 0.9999997959902183, iteration: 116151
loss: 1.1449952125549316,grad_norm: 0.9999995895851856, iteration: 116152
loss: 1.0363690853118896,grad_norm: 0.9999998847196352, iteration: 116153
loss: 1.0918378829956055,grad_norm: 0.9999995645181964, iteration: 116154
loss: 1.1260968446731567,grad_norm: 0.9999996409261153, iteration: 116155
loss: 1.0091179609298706,grad_norm: 0.9999994311390205, iteration: 116156
loss: 1.0701254606246948,grad_norm: 0.9999997666098012, iteration: 116157
loss: 1.0166022777557373,grad_norm: 0.9999998888939235, iteration: 116158
loss: 1.0441786050796509,grad_norm: 0.8973954215289359, iteration: 116159
loss: 1.0224745273590088,grad_norm: 0.9999996319413337, iteration: 116160
loss: 1.0419692993164062,grad_norm: 0.9999991716720449, iteration: 116161
loss: 1.0920451879501343,grad_norm: 0.9999997904975583, iteration: 116162
loss: 0.9920698404312134,grad_norm: 0.8202856371603064, iteration: 116163
loss: 1.0251281261444092,grad_norm: 0.9999994696185588, iteration: 116164
loss: 0.9995052218437195,grad_norm: 0.999999044926775, iteration: 116165
loss: 1.039900541305542,grad_norm: 0.9999990611748362, iteration: 116166
loss: 1.0770326852798462,grad_norm: 0.9999998982050498, iteration: 116167
loss: 1.0270107984542847,grad_norm: 0.9999998466389604, iteration: 116168
loss: 1.0016402006149292,grad_norm: 0.9999993670597814, iteration: 116169
loss: 1.0141055583953857,grad_norm: 0.9999991861368014, iteration: 116170
loss: 1.013668417930603,grad_norm: 0.8545194832266267, iteration: 116171
loss: 1.0879970788955688,grad_norm: 0.9999997293695833, iteration: 116172
loss: 1.0404729843139648,grad_norm: 0.999999079918603, iteration: 116173
loss: 1.1350384950637817,grad_norm: 0.9999998824824151, iteration: 116174
loss: 1.0845434665679932,grad_norm: 0.9999998591258863, iteration: 116175
loss: 1.1178392171859741,grad_norm: 0.9053630081346824, iteration: 116176
loss: 0.9955883026123047,grad_norm: 0.7930544653631232, iteration: 116177
loss: 1.1338969469070435,grad_norm: 0.9999993701123766, iteration: 116178
loss: 1.0642063617706299,grad_norm: 0.9999996056172571, iteration: 116179
loss: 0.9724564552307129,grad_norm: 0.9999990405731455, iteration: 116180
loss: 1.0720103979110718,grad_norm: 0.9999999045436326, iteration: 116181
loss: 1.2799367904663086,grad_norm: 0.9999998347977885, iteration: 116182
loss: 1.1414895057678223,grad_norm: 0.9999996557791687, iteration: 116183
loss: 1.1119426488876343,grad_norm: 0.9999997719835055, iteration: 116184
loss: 1.0990222692489624,grad_norm: 0.9999990159793444, iteration: 116185
loss: 0.9929338097572327,grad_norm: 0.9580997471406723, iteration: 116186
loss: 1.0132811069488525,grad_norm: 0.9999993565626117, iteration: 116187
loss: 1.107945442199707,grad_norm: 0.9999993204849766, iteration: 116188
loss: 1.1513347625732422,grad_norm: 0.9999994479719357, iteration: 116189
loss: 1.0614014863967896,grad_norm: 0.9999995967134173, iteration: 116190
loss: 0.9544500708580017,grad_norm: 0.8348394213172878, iteration: 116191
loss: 1.0397378206253052,grad_norm: 0.9999998820247391, iteration: 116192
loss: 1.0334213972091675,grad_norm: 0.8580092965442376, iteration: 116193
loss: 1.0584466457366943,grad_norm: 0.9999994056737666, iteration: 116194
loss: 1.0558738708496094,grad_norm: 0.9999998244284839, iteration: 116195
loss: 1.0054025650024414,grad_norm: 0.8563321585577556, iteration: 116196
loss: 0.9944722652435303,grad_norm: 0.8092611958527035, iteration: 116197
loss: 1.0632599592208862,grad_norm: 0.9999993236589794, iteration: 116198
loss: 1.0176184177398682,grad_norm: 0.7936797475580256, iteration: 116199
loss: 1.0477845668792725,grad_norm: 0.9999990875097898, iteration: 116200
loss: 0.9366862177848816,grad_norm: 0.9902689805965023, iteration: 116201
loss: 0.9823288917541504,grad_norm: 0.9883954925767924, iteration: 116202
loss: 0.9818980097770691,grad_norm: 0.9999990339525772, iteration: 116203
loss: 1.0134968757629395,grad_norm: 0.9999998273095823, iteration: 116204
loss: 0.9670413136482239,grad_norm: 0.8929508277673759, iteration: 116205
loss: 1.0376449823379517,grad_norm: 0.9999997605230446, iteration: 116206
loss: 1.034509301185608,grad_norm: 0.9560470933039116, iteration: 116207
loss: 1.0573168992996216,grad_norm: 0.999999312887284, iteration: 116208
loss: 1.0620579719543457,grad_norm: 0.9999996711412175, iteration: 116209
loss: 1.0266242027282715,grad_norm: 0.9999993649721438, iteration: 116210
loss: 1.041508436203003,grad_norm: 0.999999147073501, iteration: 116211
loss: 0.9952853918075562,grad_norm: 0.9999993933341967, iteration: 116212
loss: 1.0072894096374512,grad_norm: 0.9059894706197241, iteration: 116213
loss: 0.9810399413108826,grad_norm: 0.9999995165320137, iteration: 116214
loss: 1.025277018547058,grad_norm: 0.9999991877681664, iteration: 116215
loss: 1.0309808254241943,grad_norm: 0.9999992899236165, iteration: 116216
loss: 0.9756642580032349,grad_norm: 0.8804657712384316, iteration: 116217
loss: 1.1053235530853271,grad_norm: 0.9999999385181655, iteration: 116218
loss: 1.0413357019424438,grad_norm: 0.9999994860702905, iteration: 116219
loss: 1.05398690700531,grad_norm: 0.9999993123680894, iteration: 116220
loss: 1.0446226596832275,grad_norm: 0.9999993396498353, iteration: 116221
loss: 0.9458774924278259,grad_norm: 0.9797312889933527, iteration: 116222
loss: 0.9780256152153015,grad_norm: 0.9999992914911578, iteration: 116223
loss: 1.0179141759872437,grad_norm: 0.9999992784233779, iteration: 116224
loss: 0.9989367723464966,grad_norm: 0.9999998513500803, iteration: 116225
loss: 0.9700853228569031,grad_norm: 0.9999989956620516, iteration: 116226
loss: 1.0364493131637573,grad_norm: 0.9999993120915117, iteration: 116227
loss: 1.0690431594848633,grad_norm: 0.9848501648561356, iteration: 116228
loss: 1.0581728219985962,grad_norm: 0.9999994309988955, iteration: 116229
loss: 1.027751088142395,grad_norm: 0.9502847472956967, iteration: 116230
loss: 0.9983547925949097,grad_norm: 0.9999993681470768, iteration: 116231
loss: 0.9929191470146179,grad_norm: 0.7232801604277657, iteration: 116232
loss: 1.0138493776321411,grad_norm: 0.775532976521079, iteration: 116233
loss: 1.02729070186615,grad_norm: 0.9093177954562327, iteration: 116234
loss: 1.038925051689148,grad_norm: 0.9999997183675553, iteration: 116235
loss: 0.9837797284126282,grad_norm: 0.9549679918437236, iteration: 116236
loss: 0.991231381893158,grad_norm: 0.9999999748407499, iteration: 116237
loss: 1.0352119207382202,grad_norm: 0.9999991300342214, iteration: 116238
loss: 1.0439256429672241,grad_norm: 0.9999994344507022, iteration: 116239
loss: 1.0116400718688965,grad_norm: 0.8578945370021388, iteration: 116240
loss: 1.0487080812454224,grad_norm: 0.9999990447959541, iteration: 116241
loss: 1.0908173322677612,grad_norm: 0.999999901267541, iteration: 116242
loss: 1.051101803779602,grad_norm: 0.9999994348229531, iteration: 116243
loss: 1.0028914213180542,grad_norm: 0.8485090116611442, iteration: 116244
loss: 1.0036596059799194,grad_norm: 0.9999991882916176, iteration: 116245
loss: 1.0357072353363037,grad_norm: 0.9999997558233159, iteration: 116246
loss: 0.9640927910804749,grad_norm: 0.9999991227701261, iteration: 116247
loss: 1.0293105840682983,grad_norm: 0.9999990407591663, iteration: 116248
loss: 1.0016857385635376,grad_norm: 0.9999995207196557, iteration: 116249
loss: 0.9937387704849243,grad_norm: 0.9893821999896891, iteration: 116250
loss: 1.0462664365768433,grad_norm: 0.9999997997275352, iteration: 116251
loss: 0.9999729990959167,grad_norm: 0.7999336074433946, iteration: 116252
loss: 0.9938808679580688,grad_norm: 0.8441445952052221, iteration: 116253
loss: 1.1418334245681763,grad_norm: 0.999999254439674, iteration: 116254
loss: 1.0280309915542603,grad_norm: 0.9999998473298599, iteration: 116255
loss: 1.0273526906967163,grad_norm: 0.9999991540758476, iteration: 116256
loss: 1.0432294607162476,grad_norm: 0.999999177819142, iteration: 116257
loss: 0.9568892121315002,grad_norm: 0.9999991383720823, iteration: 116258
loss: 1.0041474103927612,grad_norm: 0.9999991936780811, iteration: 116259
loss: 1.002996563911438,grad_norm: 0.9358944240796085, iteration: 116260
loss: 1.0104186534881592,grad_norm: 0.9999992105787214, iteration: 116261
loss: 1.0662510395050049,grad_norm: 0.9999993426521684, iteration: 116262
loss: 1.0075963735580444,grad_norm: 0.9999992659207949, iteration: 116263
loss: 1.0978974103927612,grad_norm: 0.9999998875969096, iteration: 116264
loss: 1.0209953784942627,grad_norm: 0.9999992923173564, iteration: 116265
loss: 1.0371201038360596,grad_norm: 0.9999993346318822, iteration: 116266
loss: 1.0178909301757812,grad_norm: 0.9999991872252277, iteration: 116267
loss: 1.0096737146377563,grad_norm: 0.9999990827994878, iteration: 116268
loss: 1.0342835187911987,grad_norm: 0.9999990779444815, iteration: 116269
loss: 1.0494613647460938,grad_norm: 0.9999995159870726, iteration: 116270
loss: 1.0674057006835938,grad_norm: 0.9999995568806753, iteration: 116271
loss: 1.035275936126709,grad_norm: 0.9255777737877505, iteration: 116272
loss: 1.015460729598999,grad_norm: 0.9999991284127238, iteration: 116273
loss: 1.0193790197372437,grad_norm: 0.9999999147231871, iteration: 116274
loss: 1.0198678970336914,grad_norm: 0.9999991243522816, iteration: 116275
loss: 1.025442123413086,grad_norm: 0.9999990733670834, iteration: 116276
loss: 1.0196802616119385,grad_norm: 0.8798940812327036, iteration: 116277
loss: 1.0075945854187012,grad_norm: 0.9999991477859106, iteration: 116278
loss: 0.9950556755065918,grad_norm: 0.9599989567059906, iteration: 116279
loss: 1.0769481658935547,grad_norm: 0.9999994361664191, iteration: 116280
loss: 0.9952661395072937,grad_norm: 0.8507753045865157, iteration: 116281
loss: 1.0129894018173218,grad_norm: 0.9999991788477236, iteration: 116282
loss: 1.0072251558303833,grad_norm: 0.9999999205642726, iteration: 116283
loss: 1.0312966108322144,grad_norm: 0.9999992476540015, iteration: 116284
loss: 1.0838295221328735,grad_norm: 0.9830935076851297, iteration: 116285
loss: 1.0655943155288696,grad_norm: 0.9698868763232592, iteration: 116286
loss: 0.9886249899864197,grad_norm: 0.9999991206513416, iteration: 116287
loss: 0.946194052696228,grad_norm: 0.8394451130975776, iteration: 116288
loss: 0.9974908828735352,grad_norm: 0.7551110984824122, iteration: 116289
loss: 1.0733634233474731,grad_norm: 0.9999990589231548, iteration: 116290
loss: 1.0442712306976318,grad_norm: 1.000000093013355, iteration: 116291
loss: 1.0048139095306396,grad_norm: 0.9999997237306364, iteration: 116292
loss: 1.0273467302322388,grad_norm: 0.9999992809080193, iteration: 116293
loss: 1.025902509689331,grad_norm: 0.9999991604871271, iteration: 116294
loss: 0.997565507888794,grad_norm: 0.999999210476986, iteration: 116295
loss: 0.9988783001899719,grad_norm: 0.9374841963745119, iteration: 116296
loss: 0.9816633462905884,grad_norm: 0.9999991517022011, iteration: 116297
loss: 0.997006356716156,grad_norm: 0.8841629262248726, iteration: 116298
loss: 1.088914155960083,grad_norm: 0.9999991548848574, iteration: 116299
loss: 1.0047322511672974,grad_norm: 0.9999989687374855, iteration: 116300
loss: 1.0346566438674927,grad_norm: 0.9999995108811641, iteration: 116301
loss: 1.0206652879714966,grad_norm: 0.9999998716086158, iteration: 116302
loss: 1.0074732303619385,grad_norm: 0.999999235337081, iteration: 116303
loss: 1.018983006477356,grad_norm: 0.9678210024242362, iteration: 116304
loss: 1.0026153326034546,grad_norm: 0.9094995214620886, iteration: 116305
loss: 1.022270917892456,grad_norm: 0.9999993610823479, iteration: 116306
loss: 1.0398362874984741,grad_norm: 0.9275711269163767, iteration: 116307
loss: 1.0086688995361328,grad_norm: 0.999999783111015, iteration: 116308
loss: 1.0037020444869995,grad_norm: 0.933531563061478, iteration: 116309
loss: 1.0053821802139282,grad_norm: 0.904061770931434, iteration: 116310
loss: 0.9624286890029907,grad_norm: 0.9695086900515408, iteration: 116311
loss: 1.052477240562439,grad_norm: 0.9999996686025027, iteration: 116312
loss: 1.0612250566482544,grad_norm: 0.9999995955458819, iteration: 116313
loss: 1.001285433769226,grad_norm: 0.9880241103391331, iteration: 116314
loss: 1.0971806049346924,grad_norm: 0.9999994163446048, iteration: 116315
loss: 0.9723637104034424,grad_norm: 0.8728918677029456, iteration: 116316
loss: 1.0207890272140503,grad_norm: 0.9999991448505791, iteration: 116317
loss: 1.013414978981018,grad_norm: 0.9999992565014657, iteration: 116318
loss: 0.9886969923973083,grad_norm: 0.8958983280787677, iteration: 116319
loss: 1.0188851356506348,grad_norm: 0.8561517359796236, iteration: 116320
loss: 1.0223332643508911,grad_norm: 0.9999996807316306, iteration: 116321
loss: 1.070875644683838,grad_norm: 0.9720246284283098, iteration: 116322
loss: 1.012199878692627,grad_norm: 0.9999994607521606, iteration: 116323
loss: 0.9752721786499023,grad_norm: 0.9999990595360565, iteration: 116324
loss: 1.1451524496078491,grad_norm: 0.9999991887807403, iteration: 116325
loss: 1.014329433441162,grad_norm: 0.9933963991095676, iteration: 116326
loss: 1.0757756233215332,grad_norm: 0.9999992657051947, iteration: 116327
loss: 0.9994124174118042,grad_norm: 0.9999990030343996, iteration: 116328
loss: 0.9963807463645935,grad_norm: 0.9999999176650881, iteration: 116329
loss: 1.0263944864273071,grad_norm: 0.999999811189353, iteration: 116330
loss: 1.0302603244781494,grad_norm: 0.9099479962197413, iteration: 116331
loss: 1.0660773515701294,grad_norm: 0.9999994080651955, iteration: 116332
loss: 1.0441901683807373,grad_norm: 0.832225599404385, iteration: 116333
loss: 1.0636675357818604,grad_norm: 0.9999997567978501, iteration: 116334
loss: 1.04031503200531,grad_norm: 0.9575815936666445, iteration: 116335
loss: 1.0395026206970215,grad_norm: 0.9999997160752128, iteration: 116336
loss: 0.994001567363739,grad_norm: 0.9999992998394442, iteration: 116337
loss: 1.0133463144302368,grad_norm: 0.8859120973575748, iteration: 116338
loss: 1.0265809297561646,grad_norm: 0.9812401660900191, iteration: 116339
loss: 1.0410501956939697,grad_norm: 0.9322875179302204, iteration: 116340
loss: 1.0588841438293457,grad_norm: 0.9999992150183262, iteration: 116341
loss: 1.0664006471633911,grad_norm: 0.9999990800058984, iteration: 116342
loss: 1.0274238586425781,grad_norm: 0.9999992769399737, iteration: 116343
loss: 1.0996088981628418,grad_norm: 0.9999995967257177, iteration: 116344
loss: 1.0091031789779663,grad_norm: 0.9999993784432764, iteration: 116345
loss: 1.09346604347229,grad_norm: 0.9999997566196316, iteration: 116346
loss: 0.9996823072433472,grad_norm: 0.8825051091191424, iteration: 116347
loss: 1.0602765083312988,grad_norm: 0.9999989610228677, iteration: 116348
loss: 1.04710054397583,grad_norm: 0.999999188434858, iteration: 116349
loss: 1.0019183158874512,grad_norm: 0.9101622408089178, iteration: 116350
loss: 1.001291036605835,grad_norm: 0.7462344765834924, iteration: 116351
loss: 1.0119338035583496,grad_norm: 0.8746326057304398, iteration: 116352
loss: 1.0552345514297485,grad_norm: 0.9999992947276897, iteration: 116353
loss: 1.005641222000122,grad_norm: 0.9683985555902841, iteration: 116354
loss: 1.0094263553619385,grad_norm: 0.9999995711247466, iteration: 116355
loss: 0.9741174578666687,grad_norm: 0.9999994036175116, iteration: 116356
loss: 1.0467746257781982,grad_norm: 0.9999990942425313, iteration: 116357
loss: 1.0855348110198975,grad_norm: 0.9999992974065132, iteration: 116358
loss: 0.9993887543678284,grad_norm: 0.9999993098567154, iteration: 116359
loss: 1.0069314241409302,grad_norm: 0.8956558102204726, iteration: 116360
loss: 1.0902339220046997,grad_norm: 0.9999999286247723, iteration: 116361
loss: 1.075748324394226,grad_norm: 0.9999999333021894, iteration: 116362
loss: 1.035779356956482,grad_norm: 0.9999998192920646, iteration: 116363
loss: 0.9616563320159912,grad_norm: 0.8720399994683119, iteration: 116364
loss: 1.0001280307769775,grad_norm: 0.9158701404394115, iteration: 116365
loss: 1.0565061569213867,grad_norm: 0.9999997355054988, iteration: 116366
loss: 1.0018157958984375,grad_norm: 0.9999991139475396, iteration: 116367
loss: 1.023389458656311,grad_norm: 0.9999992182117063, iteration: 116368
loss: 1.060433268547058,grad_norm: 0.8268014238515593, iteration: 116369
loss: 0.9730956554412842,grad_norm: 0.84748452355563, iteration: 116370
loss: 0.9850660562515259,grad_norm: 0.8105173210514564, iteration: 116371
loss: 0.9803837537765503,grad_norm: 0.9999990970700052, iteration: 116372
loss: 1.1334431171417236,grad_norm: 0.999999414577568, iteration: 116373
loss: 1.0366055965423584,grad_norm: 0.9538651211961737, iteration: 116374
loss: 0.9576632380485535,grad_norm: 0.8650969890825844, iteration: 116375
loss: 1.049525260925293,grad_norm: 0.9999998993362934, iteration: 116376
loss: 1.008468747138977,grad_norm: 0.8814778047255475, iteration: 116377
loss: 1.0153861045837402,grad_norm: 0.8824472002424151, iteration: 116378
loss: 1.0710110664367676,grad_norm: 0.9999998818011835, iteration: 116379
loss: 1.0415198802947998,grad_norm: 0.8710211626749008, iteration: 116380
loss: 1.029297947883606,grad_norm: 0.9999997145706868, iteration: 116381
loss: 0.9688318371772766,grad_norm: 0.9672261284674442, iteration: 116382
loss: 1.307486891746521,grad_norm: 0.9999999399525692, iteration: 116383
loss: 1.029358983039856,grad_norm: 0.9999995445934207, iteration: 116384
loss: 1.1670149564743042,grad_norm: 0.999999641739231, iteration: 116385
loss: 1.0211842060089111,grad_norm: 0.8964688803702588, iteration: 116386
loss: 1.1950559616088867,grad_norm: 0.9999994674462409, iteration: 116387
loss: 1.024734616279602,grad_norm: 0.9999997777287584, iteration: 116388
loss: 1.0417444705963135,grad_norm: 0.9999993909801415, iteration: 116389
loss: 1.328505039215088,grad_norm: 0.9999996649007427, iteration: 116390
loss: 1.0438753366470337,grad_norm: 0.9295820607323885, iteration: 116391
loss: 0.9483348727226257,grad_norm: 0.9497411785220016, iteration: 116392
loss: 1.1735137701034546,grad_norm: 0.9999994656246839, iteration: 116393
loss: 1.0125763416290283,grad_norm: 0.9874662246194087, iteration: 116394
loss: 1.082176923751831,grad_norm: 0.9999996649152062, iteration: 116395
loss: 1.0551378726959229,grad_norm: 0.9999995624298696, iteration: 116396
loss: 1.0635669231414795,grad_norm: 0.9999990242218859, iteration: 116397
loss: 0.9760112166404724,grad_norm: 0.7948292084923003, iteration: 116398
loss: 1.1084178686141968,grad_norm: 0.9999991954679889, iteration: 116399
loss: 0.9893704652786255,grad_norm: 0.9999990912832049, iteration: 116400
loss: 1.303619623184204,grad_norm: 0.9999998151839842, iteration: 116401
loss: 0.989589273929596,grad_norm: 0.9925296422571088, iteration: 116402
loss: 1.1521782875061035,grad_norm: 0.9999998563267208, iteration: 116403
loss: 1.1008038520812988,grad_norm: 0.9747971424990847, iteration: 116404
loss: 1.0713237524032593,grad_norm: 0.9999991312161368, iteration: 116405
loss: 1.0429530143737793,grad_norm: 0.9999991595550577, iteration: 116406
loss: 0.9885259866714478,grad_norm: 0.9999992194705472, iteration: 116407
loss: 1.0384589433670044,grad_norm: 0.9999996584199043, iteration: 116408
loss: 1.0332441329956055,grad_norm: 0.8354912071026438, iteration: 116409
loss: 0.996907651424408,grad_norm: 0.9462527655499221, iteration: 116410
loss: 1.0783783197402954,grad_norm: 0.9999995238587639, iteration: 116411
loss: 0.9858079552650452,grad_norm: 0.8278810526863244, iteration: 116412
loss: 1.1043182611465454,grad_norm: 0.9999993238341075, iteration: 116413
loss: 1.0089296102523804,grad_norm: 0.9201283301151019, iteration: 116414
loss: 1.148828387260437,grad_norm: 0.9999997442735064, iteration: 116415
loss: 1.1607930660247803,grad_norm: 0.9999999954360762, iteration: 116416
loss: 0.9869409203529358,grad_norm: 0.9999994594403129, iteration: 116417
loss: 0.9733285307884216,grad_norm: 0.8479561254607999, iteration: 116418
loss: 1.047296166419983,grad_norm: 0.9999997847401825, iteration: 116419
loss: 0.9944997429847717,grad_norm: 0.9494061144003488, iteration: 116420
loss: 1.039536714553833,grad_norm: 0.9999991435546833, iteration: 116421
loss: 1.0052165985107422,grad_norm: 0.9697794965736215, iteration: 116422
loss: 0.9702003002166748,grad_norm: 0.9999999693639404, iteration: 116423
loss: 1.0201528072357178,grad_norm: 0.9019732468643515, iteration: 116424
loss: 1.0413964986801147,grad_norm: 0.9999998205731192, iteration: 116425
loss: 1.0174134969711304,grad_norm: 0.8582412026900302, iteration: 116426
loss: 0.9929504990577698,grad_norm: 0.9999992942300828, iteration: 116427
loss: 0.999883770942688,grad_norm: 0.8813446932112928, iteration: 116428
loss: 1.029133915901184,grad_norm: 0.9999990533631054, iteration: 116429
loss: 0.9867143630981445,grad_norm: 0.9999993397239821, iteration: 116430
loss: 1.0883816480636597,grad_norm: 0.9999992445239673, iteration: 116431
loss: 1.031784176826477,grad_norm: 0.9999992385593035, iteration: 116432
loss: 1.0281496047973633,grad_norm: 0.9999991748333542, iteration: 116433
loss: 0.9910417199134827,grad_norm: 0.9999992484469469, iteration: 116434
loss: 1.0261523723602295,grad_norm: 0.999999919982928, iteration: 116435
loss: 1.0264497995376587,grad_norm: 0.9264305926908141, iteration: 116436
loss: 1.0272506475448608,grad_norm: 0.9999993034969843, iteration: 116437
loss: 1.008736252784729,grad_norm: 0.9999998242892892, iteration: 116438
loss: 1.0236965417861938,grad_norm: 0.9462551470577539, iteration: 116439
loss: 1.033382773399353,grad_norm: 0.999999989659611, iteration: 116440
loss: 0.9711431264877319,grad_norm: 0.8960326229932405, iteration: 116441
loss: 1.0341042280197144,grad_norm: 0.9999997755806661, iteration: 116442
loss: 1.0817970037460327,grad_norm: 0.9999998839603167, iteration: 116443
loss: 0.9948816299438477,grad_norm: 0.9583095465124272, iteration: 116444
loss: 0.9968181848526001,grad_norm: 0.9092781734992524, iteration: 116445
loss: 1.1350773572921753,grad_norm: 0.9999996854035812, iteration: 116446
loss: 1.0558500289916992,grad_norm: 0.9999999038697224, iteration: 116447
loss: 1.113466739654541,grad_norm: 0.9999990901546418, iteration: 116448
loss: 0.9933493137359619,grad_norm: 0.7215595429640745, iteration: 116449
loss: 0.9840031266212463,grad_norm: 0.9999995747810642, iteration: 116450
loss: 1.0333062410354614,grad_norm: 0.9999992137466299, iteration: 116451
loss: 1.001259207725525,grad_norm: 0.9369592174529723, iteration: 116452
loss: 1.0067477226257324,grad_norm: 0.8975908921687771, iteration: 116453
loss: 1.0436674356460571,grad_norm: 0.9999995271965635, iteration: 116454
loss: 1.051786184310913,grad_norm: 0.9999996589071735, iteration: 116455
loss: 1.005086898803711,grad_norm: 0.7006808320926078, iteration: 116456
loss: 1.0068740844726562,grad_norm: 0.9621152563623019, iteration: 116457
loss: 1.0164827108383179,grad_norm: 0.8511015056228024, iteration: 116458
loss: 1.041157603263855,grad_norm: 0.9999995378575846, iteration: 116459
loss: 1.0219961404800415,grad_norm: 0.999999236005447, iteration: 116460
loss: 0.9811121821403503,grad_norm: 0.9999991021707757, iteration: 116461
loss: 1.1498048305511475,grad_norm: 0.9999997152767002, iteration: 116462
loss: 1.2588995695114136,grad_norm: 0.9999998643190237, iteration: 116463
loss: 1.1223576068878174,grad_norm: 0.9999993397720423, iteration: 116464
loss: 1.0489654541015625,grad_norm: 0.9289364223880175, iteration: 116465
loss: 1.0630522966384888,grad_norm: 0.9999999215484909, iteration: 116466
loss: 1.123786449432373,grad_norm: 0.9999992523898005, iteration: 116467
loss: 1.014681339263916,grad_norm: 0.9999995890467456, iteration: 116468
loss: 1.0278043746948242,grad_norm: 0.9999992085091599, iteration: 116469
loss: 1.0182116031646729,grad_norm: 0.9999990953864503, iteration: 116470
loss: 1.000111699104309,grad_norm: 0.9227874656501627, iteration: 116471
loss: 1.0020335912704468,grad_norm: 0.9999990950084182, iteration: 116472
loss: 1.0175321102142334,grad_norm: 0.999999113789288, iteration: 116473
loss: 1.0047338008880615,grad_norm: 0.7905890561755267, iteration: 116474
loss: 1.0082556009292603,grad_norm: 0.9999991992192596, iteration: 116475
loss: 1.0157678127288818,grad_norm: 0.9047840432162054, iteration: 116476
loss: 1.016312837600708,grad_norm: 0.9999994039568472, iteration: 116477
loss: 1.0056710243225098,grad_norm: 0.9966567880061844, iteration: 116478
loss: 1.0178015232086182,grad_norm: 0.9061935004056656, iteration: 116479
loss: 1.0167571306228638,grad_norm: 0.9999993898223097, iteration: 116480
loss: 1.1463005542755127,grad_norm: 0.9999992156663405, iteration: 116481
loss: 0.9974453449249268,grad_norm: 0.8173600309132524, iteration: 116482
loss: 1.0132699012756348,grad_norm: 0.9999991155458531, iteration: 116483
loss: 1.0189353227615356,grad_norm: 0.9244785709920259, iteration: 116484
loss: 1.0149632692337036,grad_norm: 0.9999990735638726, iteration: 116485
loss: 0.9435054063796997,grad_norm: 0.9041914876918954, iteration: 116486
loss: 1.025768756866455,grad_norm: 0.9999995533441386, iteration: 116487
loss: 1.066202998161316,grad_norm: 0.9999994027551607, iteration: 116488
loss: 1.0900030136108398,grad_norm: 0.9345177589243414, iteration: 116489
loss: 0.9853904843330383,grad_norm: 0.7890014715867466, iteration: 116490
loss: 1.0495840311050415,grad_norm: 0.9745655830894904, iteration: 116491
loss: 1.0002319812774658,grad_norm: 0.9999996013212812, iteration: 116492
loss: 1.0124040842056274,grad_norm: 0.9508633144131464, iteration: 116493
loss: 1.0458699464797974,grad_norm: 0.9999997814102545, iteration: 116494
loss: 1.0328878164291382,grad_norm: 0.9999991501450736, iteration: 116495
loss: 1.0545486211776733,grad_norm: 0.9999994098455071, iteration: 116496
loss: 1.0310715436935425,grad_norm: 0.93038291178009, iteration: 116497
loss: 1.0365158319473267,grad_norm: 0.9999997342791485, iteration: 116498
loss: 0.9828569293022156,grad_norm: 0.7525203868832785, iteration: 116499
loss: 1.003728985786438,grad_norm: 0.9287368517624204, iteration: 116500
loss: 1.0110840797424316,grad_norm: 0.9782618540744387, iteration: 116501
loss: 1.0123027563095093,grad_norm: 0.8969939226447952, iteration: 116502
loss: 1.0235036611557007,grad_norm: 0.999999447829972, iteration: 116503
loss: 0.9795965552330017,grad_norm: 0.9999989512704892, iteration: 116504
loss: 0.9821125268936157,grad_norm: 0.908033255613613, iteration: 116505
loss: 1.0303354263305664,grad_norm: 0.9999989939732356, iteration: 116506
loss: 1.0064189434051514,grad_norm: 0.9999998895016535, iteration: 116507
loss: 0.9975795149803162,grad_norm: 0.9999995718802387, iteration: 116508
loss: 1.030236840248108,grad_norm: 0.999999496604206, iteration: 116509
loss: 1.0739988088607788,grad_norm: 0.9999992141430006, iteration: 116510
loss: 1.0074297189712524,grad_norm: 0.8300232061982856, iteration: 116511
loss: 1.0611079931259155,grad_norm: 0.8838620017809181, iteration: 116512
loss: 0.9912150502204895,grad_norm: 0.9494551566390805, iteration: 116513
loss: 1.0064228773117065,grad_norm: 0.9999993594673453, iteration: 116514
loss: 0.9886536002159119,grad_norm: 0.9831204895107027, iteration: 116515
loss: 1.015297532081604,grad_norm: 0.7535673216710688, iteration: 116516
loss: 1.02449369430542,grad_norm: 0.810314552413574, iteration: 116517
loss: 0.9865207672119141,grad_norm: 0.9553844078896699, iteration: 116518
loss: 0.9887109398841858,grad_norm: 0.9999992314818668, iteration: 116519
loss: 0.9734311699867249,grad_norm: 0.933317493270341, iteration: 116520
loss: 1.0172697305679321,grad_norm: 0.967050060233561, iteration: 116521
loss: 1.0534895658493042,grad_norm: 0.9999991903419906, iteration: 116522
loss: 0.9697574973106384,grad_norm: 0.9999992511850028, iteration: 116523
loss: 1.072649598121643,grad_norm: 0.9048863603678837, iteration: 116524
loss: 0.9963772892951965,grad_norm: 0.9999995222694923, iteration: 116525
loss: 1.0597931146621704,grad_norm: 0.999999862142014, iteration: 116526
loss: 0.9957237839698792,grad_norm: 0.9183833744334924, iteration: 116527
loss: 1.0110480785369873,grad_norm: 0.999999607098004, iteration: 116528
loss: 1.0559755563735962,grad_norm: 0.99999943830751, iteration: 116529
loss: 0.9995875954627991,grad_norm: 0.8274194566384816, iteration: 116530
loss: 1.016086459159851,grad_norm: 0.9999990946502892, iteration: 116531
loss: 1.0249220132827759,grad_norm: 0.891769301007127, iteration: 116532
loss: 0.9975984692573547,grad_norm: 0.9587532161278525, iteration: 116533
loss: 0.9898860454559326,grad_norm: 0.8925291242594842, iteration: 116534
loss: 1.0239733457565308,grad_norm: 0.9858936653361476, iteration: 116535
loss: 1.0211153030395508,grad_norm: 0.9999993134332493, iteration: 116536
loss: 1.0548824071884155,grad_norm: 0.9986938618549304, iteration: 116537
loss: 0.975721538066864,grad_norm: 0.8348691642391889, iteration: 116538
loss: 1.0840047597885132,grad_norm: 0.9999998564741174, iteration: 116539
loss: 1.0050874948501587,grad_norm: 0.9999990405813733, iteration: 116540
loss: 1.0361945629119873,grad_norm: 0.8499479824732064, iteration: 116541
loss: 0.9859251976013184,grad_norm: 0.9999994425844636, iteration: 116542
loss: 1.0829473733901978,grad_norm: 0.999999310530624, iteration: 116543
loss: 1.0077555179595947,grad_norm: 0.9999992269001183, iteration: 116544
loss: 1.0727344751358032,grad_norm: 0.9999989990758178, iteration: 116545
loss: 1.0182212591171265,grad_norm: 0.9217670334013486, iteration: 116546
loss: 1.1188708543777466,grad_norm: 0.9999992024267866, iteration: 116547
loss: 1.100503921508789,grad_norm: 0.9999998626042234, iteration: 116548
loss: 1.029078483581543,grad_norm: 0.9999992275220843, iteration: 116549
loss: 1.0371086597442627,grad_norm: 0.9999998042357764, iteration: 116550
loss: 1.0300692319869995,grad_norm: 0.9999992637316734, iteration: 116551
loss: 1.043931484222412,grad_norm: 0.8157852370733965, iteration: 116552
loss: 1.0145395994186401,grad_norm: 0.9999991390201018, iteration: 116553
loss: 1.0301306247711182,grad_norm: 0.9326582776579432, iteration: 116554
loss: 1.001654028892517,grad_norm: 0.9513004362995943, iteration: 116555
loss: 0.983366072177887,grad_norm: 0.8367662765095042, iteration: 116556
loss: 0.985096275806427,grad_norm: 0.9013208701829362, iteration: 116557
loss: 1.020954966545105,grad_norm: 0.9999993275537582, iteration: 116558
loss: 1.043776512145996,grad_norm: 0.9999993464482223, iteration: 116559
loss: 1.1427797079086304,grad_norm: 0.9999996376270396, iteration: 116560
loss: 1.0071858167648315,grad_norm: 0.9364027189930062, iteration: 116561
loss: 1.024347186088562,grad_norm: 0.9999996156897599, iteration: 116562
loss: 1.0276120901107788,grad_norm: 0.9999992492899588, iteration: 116563
loss: 1.0912014245986938,grad_norm: 0.9999993555339616, iteration: 116564
loss: 1.0073349475860596,grad_norm: 0.8214768598477172, iteration: 116565
loss: 1.0697760581970215,grad_norm: 0.9384800407893601, iteration: 116566
loss: 1.0755400657653809,grad_norm: 0.9999991625201033, iteration: 116567
loss: 0.9932329058647156,grad_norm: 0.801848904789603, iteration: 116568
loss: 1.0323952436447144,grad_norm: 0.9999993258699609, iteration: 116569
loss: 1.0078189373016357,grad_norm: 0.8614831830058186, iteration: 116570
loss: 1.0062414407730103,grad_norm: 0.9999991052524162, iteration: 116571
loss: 1.0372679233551025,grad_norm: 0.9999994698772245, iteration: 116572
loss: 0.9770106077194214,grad_norm: 0.8045700475843239, iteration: 116573
loss: 0.9929450154304504,grad_norm: 0.999999066707479, iteration: 116574
loss: 1.0048483610153198,grad_norm: 0.9999992464495009, iteration: 116575
loss: 1.0184768438339233,grad_norm: 0.7880151449159338, iteration: 116576
loss: 0.9960824847221375,grad_norm: 0.9999991945800314, iteration: 116577
loss: 1.0001996755599976,grad_norm: 0.8506002763082419, iteration: 116578
loss: 0.9921849966049194,grad_norm: 0.9775797914558589, iteration: 116579
loss: 0.9907209873199463,grad_norm: 0.8373028811046955, iteration: 116580
loss: 1.0974782705307007,grad_norm: 0.9999993862898167, iteration: 116581
loss: 1.032031774520874,grad_norm: 0.9999998011883013, iteration: 116582
loss: 1.034554362297058,grad_norm: 0.746447503872684, iteration: 116583
loss: 1.0219746828079224,grad_norm: 0.928873768478351, iteration: 116584
loss: 0.966245174407959,grad_norm: 0.8961900515252789, iteration: 116585
loss: 1.0169886350631714,grad_norm: 0.9999994025100726, iteration: 116586
loss: 1.0403739213943481,grad_norm: 0.9220459746393808, iteration: 116587
loss: 1.2992358207702637,grad_norm: 0.9999997861603355, iteration: 116588
loss: 1.0195696353912354,grad_norm: 0.9999991164168626, iteration: 116589
loss: 1.002962589263916,grad_norm: 0.7785872081421562, iteration: 116590
loss: 0.9628000855445862,grad_norm: 0.9775542974754439, iteration: 116591
loss: 1.0105890035629272,grad_norm: 0.8550416556095731, iteration: 116592
loss: 1.0148537158966064,grad_norm: 0.9999999304124748, iteration: 116593
loss: 1.054715871810913,grad_norm: 0.8971369196920551, iteration: 116594
loss: 1.013121485710144,grad_norm: 0.9999991119823103, iteration: 116595
loss: 1.0204663276672363,grad_norm: 0.9854709239076432, iteration: 116596
loss: 1.0513609647750854,grad_norm: 0.9581742157956582, iteration: 116597
loss: 1.0520979166030884,grad_norm: 0.9999992387484071, iteration: 116598
loss: 1.0262452363967896,grad_norm: 0.9614122431254005, iteration: 116599
loss: 0.9956832528114319,grad_norm: 0.9508942032243659, iteration: 116600
loss: 0.992889404296875,grad_norm: 0.9999990014760154, iteration: 116601
loss: 1.0453821420669556,grad_norm: 0.9999992112259037, iteration: 116602
loss: 1.1499272584915161,grad_norm: 0.999999618612968, iteration: 116603
loss: 1.2089372873306274,grad_norm: 0.9999997943885369, iteration: 116604
loss: 1.0303537845611572,grad_norm: 0.7796016118858284, iteration: 116605
loss: 1.0581685304641724,grad_norm: 0.8718849009031702, iteration: 116606
loss: 0.99715656042099,grad_norm: 0.9999990821907954, iteration: 116607
loss: 1.0014872550964355,grad_norm: 0.9999991324782141, iteration: 116608
loss: 1.0315946340560913,grad_norm: 0.99999989998081, iteration: 116609
loss: 1.010580062866211,grad_norm: 0.9280021636052564, iteration: 116610
loss: 1.0248619318008423,grad_norm: 0.8901230070531451, iteration: 116611
loss: 0.9967983961105347,grad_norm: 0.9999992831538529, iteration: 116612
loss: 1.0357264280319214,grad_norm: 0.9225574213277983, iteration: 116613
loss: 1.0800116062164307,grad_norm: 0.9999998795176045, iteration: 116614
loss: 1.038690209388733,grad_norm: 0.9999995001162357, iteration: 116615
loss: 1.030276894569397,grad_norm: 0.9500634843283585, iteration: 116616
loss: 0.9798926711082458,grad_norm: 0.8388350729896018, iteration: 116617
loss: 1.0064412355422974,grad_norm: 0.9934155713426749, iteration: 116618
loss: 0.9833759665489197,grad_norm: 0.8573499862097891, iteration: 116619
loss: 1.1866509914398193,grad_norm: 0.9999997215308266, iteration: 116620
loss: 0.9973141551017761,grad_norm: 0.8591620120297813, iteration: 116621
loss: 1.0271024703979492,grad_norm: 0.9999997876601843, iteration: 116622
loss: 1.070008397102356,grad_norm: 0.9999992175225705, iteration: 116623
loss: 1.006420612335205,grad_norm: 0.8228431100301674, iteration: 116624
loss: 1.0105284452438354,grad_norm: 0.9771788054948268, iteration: 116625
loss: 1.0862715244293213,grad_norm: 0.9999995549750674, iteration: 116626
loss: 0.978375256061554,grad_norm: 0.8770513596989504, iteration: 116627
loss: 1.0263705253601074,grad_norm: 0.8688951058420187, iteration: 116628
loss: 1.0506994724273682,grad_norm: 0.9999993237099163, iteration: 116629
loss: 0.9512485265731812,grad_norm: 0.9999992626642107, iteration: 116630
loss: 1.0214663743972778,grad_norm: 0.9809088145821238, iteration: 116631
loss: 0.9979608654975891,grad_norm: 0.9999992759334821, iteration: 116632
loss: 1.0099183320999146,grad_norm: 0.9999990660934915, iteration: 116633
loss: 0.999768078327179,grad_norm: 0.9213051831142227, iteration: 116634
loss: 1.0355836153030396,grad_norm: 0.9999994007230517, iteration: 116635
loss: 1.12809419631958,grad_norm: 0.9999999039770047, iteration: 116636
loss: 1.0660686492919922,grad_norm: 0.9999999298381091, iteration: 116637
loss: 1.0206122398376465,grad_norm: 0.9999993117390784, iteration: 116638
loss: 1.0012167692184448,grad_norm: 0.9999993832913712, iteration: 116639
loss: 1.0367387533187866,grad_norm: 0.9999994175613554, iteration: 116640
loss: 1.052977442741394,grad_norm: 0.9999991734190303, iteration: 116641
loss: 0.9974361658096313,grad_norm: 0.9999991785421924, iteration: 116642
loss: 1.0431760549545288,grad_norm: 0.999999766999886, iteration: 116643
loss: 1.0370017290115356,grad_norm: 0.9999995568074361, iteration: 116644
loss: 1.0415846109390259,grad_norm: 0.8532499518037481, iteration: 116645
loss: 0.9875141978263855,grad_norm: 0.952477258154216, iteration: 116646
loss: 1.0027064085006714,grad_norm: 0.9332524313141151, iteration: 116647
loss: 1.1214491128921509,grad_norm: 0.9999993848360442, iteration: 116648
loss: 1.0081371068954468,grad_norm: 0.9943796514750417, iteration: 116649
loss: 1.0183309316635132,grad_norm: 0.9999991444434917, iteration: 116650
loss: 1.0116980075836182,grad_norm: 0.9999991835466022, iteration: 116651
loss: 0.9666668772697449,grad_norm: 0.912290902171661, iteration: 116652
loss: 1.1060435771942139,grad_norm: 0.9480557314088649, iteration: 116653
loss: 1.0172632932662964,grad_norm: 0.8832786484310152, iteration: 116654
loss: 0.99817955493927,grad_norm: 0.9999994447762712, iteration: 116655
loss: 0.988128125667572,grad_norm: 0.9999990236914735, iteration: 116656
loss: 1.0560530424118042,grad_norm: 0.9999996246333633, iteration: 116657
loss: 1.1292630434036255,grad_norm: 0.9999995815662102, iteration: 116658
loss: 1.012438178062439,grad_norm: 0.999999189772775, iteration: 116659
loss: 1.0460035800933838,grad_norm: 0.9999996164227223, iteration: 116660
loss: 1.0290337800979614,grad_norm: 0.999999008222394, iteration: 116661
loss: 1.0191514492034912,grad_norm: 0.8736592446028966, iteration: 116662
loss: 1.000700831413269,grad_norm: 0.9347303786645885, iteration: 116663
loss: 1.0175318717956543,grad_norm: 0.8984582651319183, iteration: 116664
loss: 0.9835867881774902,grad_norm: 0.9243044526654468, iteration: 116665
loss: 1.0724101066589355,grad_norm: 0.9999999471246734, iteration: 116666
loss: 0.9708177447319031,grad_norm: 0.9863095668862157, iteration: 116667
loss: 0.9903570413589478,grad_norm: 0.8796644927018619, iteration: 116668
loss: 0.9746891856193542,grad_norm: 0.819004031607848, iteration: 116669
loss: 1.0156995058059692,grad_norm: 0.9999995384598164, iteration: 116670
loss: 0.9839138388633728,grad_norm: 0.9011666128606539, iteration: 116671
loss: 1.0780811309814453,grad_norm: 0.999999223978694, iteration: 116672
loss: 1.0356906652450562,grad_norm: 0.9999994958698823, iteration: 116673
loss: 1.0084869861602783,grad_norm: 0.9999995827391603, iteration: 116674
loss: 0.9827412962913513,grad_norm: 0.820791398581981, iteration: 116675
loss: 1.0258609056472778,grad_norm: 0.7842112184145013, iteration: 116676
loss: 1.0133179426193237,grad_norm: 0.8979750877999862, iteration: 116677
loss: 1.0336798429489136,grad_norm: 0.8896837070394369, iteration: 116678
loss: 1.0516682863235474,grad_norm: 0.9999996860480427, iteration: 116679
loss: 1.0848405361175537,grad_norm: 1.0000000079934943, iteration: 116680
loss: 1.0219383239746094,grad_norm: 0.9999992304502127, iteration: 116681
loss: 0.9935307502746582,grad_norm: 0.9999991210724058, iteration: 116682
loss: 1.0665239095687866,grad_norm: 0.9999994047939998, iteration: 116683
loss: 0.9617576599121094,grad_norm: 0.9497086790731805, iteration: 116684
loss: 1.0552151203155518,grad_norm: 0.9999996478814512, iteration: 116685
loss: 0.996188223361969,grad_norm: 0.7911928918098653, iteration: 116686
loss: 1.0113461017608643,grad_norm: 0.9999993547146278, iteration: 116687
loss: 1.0219275951385498,grad_norm: 0.9999990965433939, iteration: 116688
loss: 0.9894332885742188,grad_norm: 0.9999991340253683, iteration: 116689
loss: 1.024715542793274,grad_norm: 0.8837667862791719, iteration: 116690
loss: 0.9791846871376038,grad_norm: 0.9999991583216833, iteration: 116691
loss: 1.0425487756729126,grad_norm: 0.9999993861638652, iteration: 116692
loss: 0.9899778962135315,grad_norm: 0.9999990126037435, iteration: 116693
loss: 1.02639639377594,grad_norm: 0.9999992522726401, iteration: 116694
loss: 1.0925923585891724,grad_norm: 0.9999996879065638, iteration: 116695
loss: 1.0500025749206543,grad_norm: 0.9999995288626015, iteration: 116696
loss: 1.075891137123108,grad_norm: 0.9999992245545681, iteration: 116697
loss: 1.0478264093399048,grad_norm: 0.9999993930993558, iteration: 116698
loss: 1.0340102910995483,grad_norm: 0.9999992201700748, iteration: 116699
loss: 1.1668466329574585,grad_norm: 0.9999997897017462, iteration: 116700
loss: 1.0094503164291382,grad_norm: 0.9057708652629404, iteration: 116701
loss: 1.0047578811645508,grad_norm: 0.9999990925935689, iteration: 116702
loss: 1.0545017719268799,grad_norm: 0.9999991462640135, iteration: 116703
loss: 1.0447523593902588,grad_norm: 0.9999993159951084, iteration: 116704
loss: 1.0582594871520996,grad_norm: 0.9603178202320223, iteration: 116705
loss: 1.0563570261001587,grad_norm: 0.9999999298554392, iteration: 116706
loss: 1.2127894163131714,grad_norm: 0.9999994403629046, iteration: 116707
loss: 1.0246365070343018,grad_norm: 0.8342810315120999, iteration: 116708
loss: 1.171966552734375,grad_norm: 0.9999998483344829, iteration: 116709
loss: 1.0087872743606567,grad_norm: 0.9999991272706494, iteration: 116710
loss: 1.030580759048462,grad_norm: 0.9999991176849737, iteration: 116711
loss: 1.0086599588394165,grad_norm: 0.999999462369091, iteration: 116712
loss: 0.9870301485061646,grad_norm: 0.8674886214313031, iteration: 116713
loss: 1.0333019495010376,grad_norm: 0.7594480428822352, iteration: 116714
loss: 0.9951740503311157,grad_norm: 0.9999991949374538, iteration: 116715
loss: 1.077677845954895,grad_norm: 0.9999993670412799, iteration: 116716
loss: 1.0885636806488037,grad_norm: 0.9999991017728697, iteration: 116717
loss: 1.0221184492111206,grad_norm: 0.9999990441274037, iteration: 116718
loss: 0.9955597519874573,grad_norm: 0.9066173881089836, iteration: 116719
loss: 1.0341341495513916,grad_norm: 0.9999999908637788, iteration: 116720
loss: 1.0531686544418335,grad_norm: 0.999999511774041, iteration: 116721
loss: 0.9693147540092468,grad_norm: 0.8522441286542213, iteration: 116722
loss: 1.1279090642929077,grad_norm: 0.9999996565975118, iteration: 116723
loss: 1.0081050395965576,grad_norm: 0.9313588700780365, iteration: 116724
loss: 0.9844833612442017,grad_norm: 0.8584597867511686, iteration: 116725
loss: 1.0768085718154907,grad_norm: 0.9999997428251644, iteration: 116726
loss: 1.0510379076004028,grad_norm: 0.9999997009529553, iteration: 116727
loss: 1.0435878038406372,grad_norm: 0.9999990121120437, iteration: 116728
loss: 1.0053406953811646,grad_norm: 0.938054281874073, iteration: 116729
loss: 1.0359303951263428,grad_norm: 0.9999999948118056, iteration: 116730
loss: 1.0258159637451172,grad_norm: 0.9999993739026992, iteration: 116731
loss: 0.9780199527740479,grad_norm: 0.9477220307525924, iteration: 116732
loss: 0.9996888041496277,grad_norm: 0.9999991694798741, iteration: 116733
loss: 1.0032795667648315,grad_norm: 0.8138283971510812, iteration: 116734
loss: 1.0251882076263428,grad_norm: 0.9999995100237558, iteration: 116735
loss: 0.9913999438285828,grad_norm: 0.999999307150676, iteration: 116736
loss: 1.001037359237671,grad_norm: 0.9303590502250694, iteration: 116737
loss: 0.9991924166679382,grad_norm: 0.999999079436464, iteration: 116738
loss: 1.0275498628616333,grad_norm: 0.8013021141702618, iteration: 116739
loss: 1.0252639055252075,grad_norm: 0.9999994471349287, iteration: 116740
loss: 1.0350416898727417,grad_norm: 0.8944560998068658, iteration: 116741
loss: 1.1198281049728394,grad_norm: 0.9999998764972479, iteration: 116742
loss: 1.0547765493392944,grad_norm: 0.9999998652568769, iteration: 116743
loss: 0.9810810089111328,grad_norm: 0.9999992024957627, iteration: 116744
loss: 1.1052402257919312,grad_norm: 0.9999999572416972, iteration: 116745
loss: 0.9975278377532959,grad_norm: 0.9999990767734943, iteration: 116746
loss: 1.0530916452407837,grad_norm: 0.9999994305172909, iteration: 116747
loss: 0.9890238642692566,grad_norm: 0.9999991965549619, iteration: 116748
loss: 0.9728581309318542,grad_norm: 0.7219780897815052, iteration: 116749
loss: 1.028093695640564,grad_norm: 0.9999999697744889, iteration: 116750
loss: 1.0015685558319092,grad_norm: 1.0000000015640713, iteration: 116751
loss: 0.9903185367584229,grad_norm: 0.9999992431911602, iteration: 116752
loss: 1.0403926372528076,grad_norm: 0.9321118442791577, iteration: 116753
loss: 0.969327986240387,grad_norm: 0.9999991750840016, iteration: 116754
loss: 0.9886460900306702,grad_norm: 0.8874905382089693, iteration: 116755
loss: 1.0047494173049927,grad_norm: 0.979435967488263, iteration: 116756
loss: 1.095686674118042,grad_norm: 0.9999994068995296, iteration: 116757
loss: 1.0167385339736938,grad_norm: 0.9999998221997977, iteration: 116758
loss: 1.1296552419662476,grad_norm: 0.9999996869671602, iteration: 116759
loss: 0.9914217591285706,grad_norm: 0.9999990568003714, iteration: 116760
loss: 1.1074271202087402,grad_norm: 0.9999998655427611, iteration: 116761
loss: 1.0495482683181763,grad_norm: 0.9909128947074924, iteration: 116762
loss: 1.185929775238037,grad_norm: 1.0000000467692576, iteration: 116763
loss: 1.064162015914917,grad_norm: 0.9999998111097623, iteration: 116764
loss: 1.139931559562683,grad_norm: 0.9999997887891262, iteration: 116765
loss: 1.061981201171875,grad_norm: 0.9999997656656694, iteration: 116766
loss: 1.1856533288955688,grad_norm: 0.9999996591748663, iteration: 116767
loss: 1.0917489528656006,grad_norm: 0.9999991680440787, iteration: 116768
loss: 1.297803521156311,grad_norm: 0.9999998272555958, iteration: 116769
loss: 1.0664318799972534,grad_norm: 0.9999998649937417, iteration: 116770
loss: 1.171675682067871,grad_norm: 0.9999998789977914, iteration: 116771
loss: 1.1478809118270874,grad_norm: 0.9999997965958296, iteration: 116772
loss: 0.9873679280281067,grad_norm: 0.9999991872974718, iteration: 116773
loss: 1.015655517578125,grad_norm: 0.9999991394864644, iteration: 116774
loss: 1.1513445377349854,grad_norm: 0.9999991817001961, iteration: 116775
loss: 1.1174815893173218,grad_norm: 0.9999991997544604, iteration: 116776
loss: 1.1007518768310547,grad_norm: 0.9999997426292914, iteration: 116777
loss: 1.2763749361038208,grad_norm: 0.9999999099053228, iteration: 116778
loss: 1.1228041648864746,grad_norm: 0.9999996700163892, iteration: 116779
loss: 1.0839418172836304,grad_norm: 0.9999999114201849, iteration: 116780
loss: 1.2495828866958618,grad_norm: 1.0000000474356063, iteration: 116781
loss: 1.0807397365570068,grad_norm: 0.9999993681354044, iteration: 116782
loss: 1.0543289184570312,grad_norm: 0.999999010177246, iteration: 116783
loss: 0.953620195388794,grad_norm: 0.8584935723355455, iteration: 116784
loss: 0.9801816940307617,grad_norm: 0.9999997930533842, iteration: 116785
loss: 1.174343228340149,grad_norm: 0.9999995689205022, iteration: 116786
loss: 1.0646498203277588,grad_norm: 0.9999996861747021, iteration: 116787
loss: 1.002917766571045,grad_norm: 0.9139020369473323, iteration: 116788
loss: 0.9922980070114136,grad_norm: 0.9999994455505696, iteration: 116789
loss: 0.9922260642051697,grad_norm: 0.9999992478034253, iteration: 116790
loss: 1.0842087268829346,grad_norm: 0.9999997723114982, iteration: 116791
loss: 1.027904748916626,grad_norm: 0.9999994450022006, iteration: 116792
loss: 1.1117708683013916,grad_norm: 0.9999999509797396, iteration: 116793
loss: 0.9903513789176941,grad_norm: 0.9709627153123536, iteration: 116794
loss: 1.0009691715240479,grad_norm: 0.9099857527128591, iteration: 116795
loss: 0.983512818813324,grad_norm: 0.8911530878496384, iteration: 116796
loss: 0.9964757561683655,grad_norm: 0.9351251348671487, iteration: 116797
loss: 1.0512332916259766,grad_norm: 0.9999992196700987, iteration: 116798
loss: 1.0345348119735718,grad_norm: 0.9999992994943439, iteration: 116799
loss: 1.0218210220336914,grad_norm: 0.9999996091263139, iteration: 116800
loss: 1.046292781829834,grad_norm: 0.9999994375983939, iteration: 116801
loss: 1.0013788938522339,grad_norm: 0.7460023074750957, iteration: 116802
loss: 1.0674140453338623,grad_norm: 0.9999996946838022, iteration: 116803
loss: 1.0215299129486084,grad_norm: 0.9999991726320151, iteration: 116804
loss: 0.9968876838684082,grad_norm: 0.9999990560204054, iteration: 116805
loss: 1.0030899047851562,grad_norm: 0.9999991761640943, iteration: 116806
loss: 1.0215744972229004,grad_norm: 0.999999127437723, iteration: 116807
loss: 0.9995193481445312,grad_norm: 0.9999993845829096, iteration: 116808
loss: 1.0873327255249023,grad_norm: 0.9999999337051155, iteration: 116809
loss: 1.0670795440673828,grad_norm: 0.9999996242974473, iteration: 116810
loss: 1.0151288509368896,grad_norm: 0.9999992052003756, iteration: 116811
loss: 1.0067249536514282,grad_norm: 0.999999328269654, iteration: 116812
loss: 1.128761649131775,grad_norm: 0.9999998790366534, iteration: 116813
loss: 1.1378848552703857,grad_norm: 0.9999990665696267, iteration: 116814
loss: 1.0038106441497803,grad_norm: 0.9858367949069617, iteration: 116815
loss: 0.9772122502326965,grad_norm: 0.9999993993724798, iteration: 116816
loss: 1.0288755893707275,grad_norm: 0.999999092217652, iteration: 116817
loss: 1.0286002159118652,grad_norm: 0.9999997157216447, iteration: 116818
loss: 0.9704998135566711,grad_norm: 0.9127328846233628, iteration: 116819
loss: 1.1838600635528564,grad_norm: 0.9999998265825519, iteration: 116820
loss: 1.0162227153778076,grad_norm: 0.9292239908713024, iteration: 116821
loss: 1.1125359535217285,grad_norm: 0.9999994955277023, iteration: 116822
loss: 1.0185271501541138,grad_norm: 0.8266026343595126, iteration: 116823
loss: 1.0065101385116577,grad_norm: 0.9200134501615981, iteration: 116824
loss: 1.1494544744491577,grad_norm: 0.9999999149905737, iteration: 116825
loss: 1.0082441568374634,grad_norm: 0.9999997544489381, iteration: 116826
loss: 1.005311131477356,grad_norm: 0.9999990022981647, iteration: 116827
loss: 1.0725793838500977,grad_norm: 0.9999991105091873, iteration: 116828
loss: 1.0129133462905884,grad_norm: 0.9999994119299597, iteration: 116829
loss: 1.0969150066375732,grad_norm: 0.999999757103276, iteration: 116830
loss: 1.0414271354675293,grad_norm: 0.9999990323040402, iteration: 116831
loss: 1.050305724143982,grad_norm: 0.9999996104227566, iteration: 116832
loss: 1.014107584953308,grad_norm: 0.7926449629711596, iteration: 116833
loss: 0.9646337628364563,grad_norm: 0.9100468109452401, iteration: 116834
loss: 1.0023428201675415,grad_norm: 0.8749365496313792, iteration: 116835
loss: 1.0036081075668335,grad_norm: 0.8990939306689993, iteration: 116836
loss: 1.0087083578109741,grad_norm: 0.9999995515430712, iteration: 116837
loss: 1.1119321584701538,grad_norm: 0.999999304968097, iteration: 116838
loss: 0.9864138960838318,grad_norm: 0.9049030137165495, iteration: 116839
loss: 1.1664637327194214,grad_norm: 0.9999998620439864, iteration: 116840
loss: 1.0128517150878906,grad_norm: 0.999999175407876, iteration: 116841
loss: 1.009820818901062,grad_norm: 0.9999991816612407, iteration: 116842
loss: 1.0223865509033203,grad_norm: 0.9583330557061498, iteration: 116843
loss: 1.0359827280044556,grad_norm: 0.9528140109518265, iteration: 116844
loss: 1.0994946956634521,grad_norm: 0.9999999332807037, iteration: 116845
loss: 1.0241178274154663,grad_norm: 0.9286655530403427, iteration: 116846
loss: 1.002279281616211,grad_norm: 0.9999995115183752, iteration: 116847
loss: 1.0013625621795654,grad_norm: 0.9773451796344276, iteration: 116848
loss: 1.011497139930725,grad_norm: 0.9999990769578662, iteration: 116849
loss: 0.9948917031288147,grad_norm: 0.8540832526943615, iteration: 116850
loss: 0.9949310421943665,grad_norm: 0.9149354492907965, iteration: 116851
loss: 1.0368428230285645,grad_norm: 0.8447121053859545, iteration: 116852
loss: 1.0252740383148193,grad_norm: 0.9999990901621254, iteration: 116853
loss: 1.120884895324707,grad_norm: 0.9999998929024918, iteration: 116854
loss: 0.9857844710350037,grad_norm: 0.9999991849332106, iteration: 116855
loss: 1.029242992401123,grad_norm: 0.9999997311496781, iteration: 116856
loss: 1.005675196647644,grad_norm: 0.9197691257087359, iteration: 116857
loss: 1.0376392602920532,grad_norm: 0.9999995217183462, iteration: 116858
loss: 1.0051636695861816,grad_norm: 0.9274132444314718, iteration: 116859
loss: 0.9957389831542969,grad_norm: 0.8651932466430328, iteration: 116860
loss: 1.0182936191558838,grad_norm: 0.9999990482986142, iteration: 116861
loss: 1.028270959854126,grad_norm: 0.9659284856759724, iteration: 116862
loss: 1.0195717811584473,grad_norm: 0.9272621321313437, iteration: 116863
loss: 1.0450167655944824,grad_norm: 0.999999759095029, iteration: 116864
loss: 0.9749858379364014,grad_norm: 0.8800511508893571, iteration: 116865
loss: 1.0735174417495728,grad_norm: 0.9999995117342171, iteration: 116866
loss: 0.9683337211608887,grad_norm: 0.8423298740014811, iteration: 116867
loss: 1.0394312143325806,grad_norm: 0.9999989806090186, iteration: 116868
loss: 1.0531738996505737,grad_norm: 0.999999102572241, iteration: 116869
loss: 1.0025349855422974,grad_norm: 0.9999990773945742, iteration: 116870
loss: 1.048105001449585,grad_norm: 0.9999993992387314, iteration: 116871
loss: 1.0140570402145386,grad_norm: 0.9133395092081755, iteration: 116872
loss: 1.1059019565582275,grad_norm: 0.9999992681661855, iteration: 116873
loss: 1.070117712020874,grad_norm: 0.9999999151963134, iteration: 116874
loss: 1.0301823616027832,grad_norm: 0.9999989800726614, iteration: 116875
loss: 0.9586252570152283,grad_norm: 0.993018356835516, iteration: 116876
loss: 1.0521072149276733,grad_norm: 0.9999998428270349, iteration: 116877
loss: 1.0080987215042114,grad_norm: 0.9999992844830724, iteration: 116878
loss: 0.9742871522903442,grad_norm: 0.9735048340916435, iteration: 116879
loss: 0.9712449908256531,grad_norm: 0.9999991166035824, iteration: 116880
loss: 1.0059977769851685,grad_norm: 0.9999990488785954, iteration: 116881
loss: 0.9894289970397949,grad_norm: 0.9446823782635014, iteration: 116882
loss: 1.0424357652664185,grad_norm: 0.9999993489866972, iteration: 116883
loss: 1.0421003103256226,grad_norm: 0.9999990957028297, iteration: 116884
loss: 1.0214145183563232,grad_norm: 0.8485228229800988, iteration: 116885
loss: 0.9915255904197693,grad_norm: 0.9999991122526474, iteration: 116886
loss: 1.028723955154419,grad_norm: 0.9999991700152505, iteration: 116887
loss: 1.016530990600586,grad_norm: 0.880787123230442, iteration: 116888
loss: 1.0125346183776855,grad_norm: 0.9999991926567093, iteration: 116889
loss: 1.021567463874817,grad_norm: 0.8991851947348677, iteration: 116890
loss: 0.9879207015037537,grad_norm: 0.9999990311675161, iteration: 116891
loss: 0.9635016918182373,grad_norm: 0.9756850030361907, iteration: 116892
loss: 0.9946492314338684,grad_norm: 0.9999993486097286, iteration: 116893
loss: 1.117720365524292,grad_norm: 0.9999996843492382, iteration: 116894
loss: 1.002943515777588,grad_norm: 0.7818397537560742, iteration: 116895
loss: 1.0568735599517822,grad_norm: 0.9999992921192469, iteration: 116896
loss: 1.0820646286010742,grad_norm: 0.9999994790375168, iteration: 116897
loss: 1.0451159477233887,grad_norm: 0.9999991443160171, iteration: 116898
loss: 0.9817073345184326,grad_norm: 0.8833220985211817, iteration: 116899
loss: 1.0191388130187988,grad_norm: 0.9999992547467444, iteration: 116900
loss: 1.0330640077590942,grad_norm: 0.999999215818026, iteration: 116901
loss: 1.033853530883789,grad_norm: 0.7393750690912374, iteration: 116902
loss: 1.0460786819458008,grad_norm: 0.999999425867045, iteration: 116903
loss: 1.009061574935913,grad_norm: 0.9999990122991465, iteration: 116904
loss: 0.9823286533355713,grad_norm: 0.9999990761131283, iteration: 116905
loss: 1.013873815536499,grad_norm: 0.9999992790137614, iteration: 116906
loss: 1.0303525924682617,grad_norm: 0.836256206337408, iteration: 116907
loss: 0.9991044402122498,grad_norm: 0.8281404709822027, iteration: 116908
loss: 1.0810600519180298,grad_norm: 0.8960649509780896, iteration: 116909
loss: 1.0026261806488037,grad_norm: 0.9999990054865407, iteration: 116910
loss: 1.0530917644500732,grad_norm: 0.9676662342875826, iteration: 116911
loss: 1.0644865036010742,grad_norm: 0.9999994047756764, iteration: 116912
loss: 1.0606982707977295,grad_norm: 0.8293905756735604, iteration: 116913
loss: 0.9916860461235046,grad_norm: 0.9106776694648873, iteration: 116914
loss: 1.0274873971939087,grad_norm: 0.8200026206610475, iteration: 116915
loss: 1.0238066911697388,grad_norm: 0.8692480236580963, iteration: 116916
loss: 0.9795123934745789,grad_norm: 0.9999992254379544, iteration: 116917
loss: 0.9868206977844238,grad_norm: 0.8316119578217593, iteration: 116918
loss: 0.9950289130210876,grad_norm: 0.9607917489281117, iteration: 116919
loss: 1.048203945159912,grad_norm: 0.8722862862244642, iteration: 116920
loss: 1.055051565170288,grad_norm: 0.9999992493352488, iteration: 116921
loss: 1.2309788465499878,grad_norm: 0.9999999137702333, iteration: 116922
loss: 0.9945096969604492,grad_norm: 0.9999994444744221, iteration: 116923
loss: 1.0039194822311401,grad_norm: 0.9119893834935852, iteration: 116924
loss: 0.9858357906341553,grad_norm: 0.9999990902614788, iteration: 116925
loss: 1.0041896104812622,grad_norm: 0.8300387910946069, iteration: 116926
loss: 0.9839897751808167,grad_norm: 0.8560448792576253, iteration: 116927
loss: 1.0339761972427368,grad_norm: 0.8571963766112682, iteration: 116928
loss: 1.0012602806091309,grad_norm: 0.9999992267462445, iteration: 116929
loss: 0.9637635946273804,grad_norm: 0.810036687330263, iteration: 116930
loss: 0.9802015423774719,grad_norm: 0.959422807373475, iteration: 116931
loss: 0.9766780138015747,grad_norm: 0.999999065823621, iteration: 116932
loss: 1.021228313446045,grad_norm: 0.9083942540992521, iteration: 116933
loss: 0.9859110116958618,grad_norm: 0.8110976138673469, iteration: 116934
loss: 1.025617241859436,grad_norm: 0.8214844434592282, iteration: 116935
loss: 1.0355867147445679,grad_norm: 0.9999998589109461, iteration: 116936
loss: 0.9946436285972595,grad_norm: 0.9999991234667831, iteration: 116937
loss: 0.9809927344322205,grad_norm: 0.9999991513957958, iteration: 116938
loss: 1.0182551145553589,grad_norm: 0.955130289518682, iteration: 116939
loss: 1.0015594959259033,grad_norm: 0.9207881732990776, iteration: 116940
loss: 1.0167180299758911,grad_norm: 0.9098313457945597, iteration: 116941
loss: 0.9763579368591309,grad_norm: 0.9999999635028799, iteration: 116942
loss: 1.047015905380249,grad_norm: 0.9999990660323936, iteration: 116943
loss: 0.984112560749054,grad_norm: 0.9999993552358204, iteration: 116944
loss: 1.0026295185089111,grad_norm: 0.9398882482353944, iteration: 116945
loss: 1.0640175342559814,grad_norm: 0.9999992185501161, iteration: 116946
loss: 1.0403740406036377,grad_norm: 0.9999995010355142, iteration: 116947
loss: 1.0126858949661255,grad_norm: 0.9999993070998323, iteration: 116948
loss: 0.9840728044509888,grad_norm: 0.8913182442398293, iteration: 116949
loss: 1.0037883520126343,grad_norm: 0.9498612618044251, iteration: 116950
loss: 0.9880207777023315,grad_norm: 0.8174075898134313, iteration: 116951
loss: 0.9837432503700256,grad_norm: 0.7337741342759664, iteration: 116952
loss: 0.9779443144798279,grad_norm: 0.9999991440258923, iteration: 116953
loss: 1.0079652070999146,grad_norm: 0.8854116606006143, iteration: 116954
loss: 1.0122292041778564,grad_norm: 0.8779503856426508, iteration: 116955
loss: 1.032976746559143,grad_norm: 0.9999995444078957, iteration: 116956
loss: 1.0028011798858643,grad_norm: 0.9999994040274214, iteration: 116957
loss: 1.0058348178863525,grad_norm: 0.9999996651442203, iteration: 116958
loss: 1.0598342418670654,grad_norm: 0.9999994174544661, iteration: 116959
loss: 0.998830258846283,grad_norm: 0.9999990771065307, iteration: 116960
loss: 0.9999547004699707,grad_norm: 1.000000032860811, iteration: 116961
loss: 1.2014251947402954,grad_norm: 0.9999998474193058, iteration: 116962
loss: 0.975165843963623,grad_norm: 0.9999993990113962, iteration: 116963
loss: 0.9963782429695129,grad_norm: 0.9201784549357775, iteration: 116964
loss: 0.9678760170936584,grad_norm: 0.9999991048382953, iteration: 116965
loss: 0.9944658875465393,grad_norm: 0.834655764181024, iteration: 116966
loss: 1.0330591201782227,grad_norm: 0.9691901467451768, iteration: 116967
loss: 1.057862639427185,grad_norm: 0.8735179576856165, iteration: 116968
loss: 0.9885582327842712,grad_norm: 0.772105001976255, iteration: 116969
loss: 1.3878878355026245,grad_norm: 0.9999998845017051, iteration: 116970
loss: 1.0082122087478638,grad_norm: 0.8156062513341424, iteration: 116971
loss: 0.9956878423690796,grad_norm: 0.9999992470142702, iteration: 116972
loss: 1.022749900817871,grad_norm: 0.9999991205847368, iteration: 116973
loss: 1.0031579732894897,grad_norm: 0.8620584017941987, iteration: 116974
loss: 0.9826977849006653,grad_norm: 0.9106647429218504, iteration: 116975
loss: 1.1205894947052002,grad_norm: 0.9999993938049098, iteration: 116976
loss: 1.0158092975616455,grad_norm: 0.9999991101731415, iteration: 116977
loss: 1.0410332679748535,grad_norm: 0.9999994742855939, iteration: 116978
loss: 1.0443291664123535,grad_norm: 0.99999982907008, iteration: 116979
loss: 1.0310864448547363,grad_norm: 0.9999996614958212, iteration: 116980
loss: 1.0153391361236572,grad_norm: 0.9592721185100272, iteration: 116981
loss: 1.0091397762298584,grad_norm: 0.9952527455773776, iteration: 116982
loss: 0.9734073281288147,grad_norm: 0.8807808396727974, iteration: 116983
loss: 1.0871084928512573,grad_norm: 0.9999992661757156, iteration: 116984
loss: 1.0691847801208496,grad_norm: 0.9999992964223119, iteration: 116985
loss: 1.0545291900634766,grad_norm: 0.9663715588587984, iteration: 116986
loss: 1.0076013803482056,grad_norm: 0.974823432621229, iteration: 116987
loss: 0.9774190783500671,grad_norm: 0.7963896689731848, iteration: 116988
loss: 1.0150256156921387,grad_norm: 0.9999991833881097, iteration: 116989
loss: 1.0417211055755615,grad_norm: 0.9999991953763017, iteration: 116990
loss: 1.0339027643203735,grad_norm: 0.9797448647299355, iteration: 116991
loss: 0.9899953603744507,grad_norm: 0.8058266957318149, iteration: 116992
loss: 1.050230622291565,grad_norm: 0.9999997145301658, iteration: 116993
loss: 1.0303133726119995,grad_norm: 0.999999113733818, iteration: 116994
loss: 0.9938637018203735,grad_norm: 0.9250112853501485, iteration: 116995
loss: 1.0280309915542603,grad_norm: 0.9971105947361054, iteration: 116996
loss: 0.9978532195091248,grad_norm: 0.856807022440137, iteration: 116997
loss: 1.0428309440612793,grad_norm: 0.9999993110789446, iteration: 116998
loss: 1.011800765991211,grad_norm: 0.920703601915715, iteration: 116999
loss: 1.0680114030838013,grad_norm: 0.955500865791818, iteration: 117000
loss: 1.0225884914398193,grad_norm: 0.8152839830324495, iteration: 117001
loss: 0.997444212436676,grad_norm: 0.753265974804513, iteration: 117002
loss: 1.0015376806259155,grad_norm: 0.9999993018834771, iteration: 117003
loss: 1.019119381904602,grad_norm: 0.8692682884439759, iteration: 117004
loss: 0.9872539043426514,grad_norm: 0.9999991984233142, iteration: 117005
loss: 1.021133303642273,grad_norm: 0.9999993122379783, iteration: 117006
loss: 0.9463315010070801,grad_norm: 0.9999992479889089, iteration: 117007
loss: 0.999232292175293,grad_norm: 0.9536162874394117, iteration: 117008
loss: 0.9860699772834778,grad_norm: 0.8347040625446103, iteration: 117009
loss: 0.9656808972358704,grad_norm: 0.9999998470951298, iteration: 117010
loss: 1.0296157598495483,grad_norm: 0.9999991433122468, iteration: 117011
loss: 1.0028414726257324,grad_norm: 0.9149871766808887, iteration: 117012
loss: 1.0194025039672852,grad_norm: 0.9999990910244543, iteration: 117013
loss: 1.0052591562271118,grad_norm: 0.9999996936070924, iteration: 117014
loss: 0.9869494438171387,grad_norm: 0.9999991117536112, iteration: 117015
loss: 1.0851950645446777,grad_norm: 0.9999996224894664, iteration: 117016
loss: 1.205499529838562,grad_norm: 0.9999999175884955, iteration: 117017
loss: 1.0109269618988037,grad_norm: 0.8493764565973972, iteration: 117018
loss: 1.2184189558029175,grad_norm: 0.999999563499306, iteration: 117019
loss: 1.0130549669265747,grad_norm: 0.9565442226555291, iteration: 117020
loss: 1.0314042568206787,grad_norm: 0.9999996554395973, iteration: 117021
loss: 1.0319167375564575,grad_norm: 0.8536023564370075, iteration: 117022
loss: 1.2033137083053589,grad_norm: 0.999999577620571, iteration: 117023
loss: 1.0315231084823608,grad_norm: 0.9999991949942532, iteration: 117024
loss: 1.0274033546447754,grad_norm: 0.972669434656127, iteration: 117025
loss: 0.9940022826194763,grad_norm: 0.929942406819304, iteration: 117026
loss: 1.0671594142913818,grad_norm: 0.9323879555105377, iteration: 117027
loss: 1.322870135307312,grad_norm: 0.9999999129671499, iteration: 117028
loss: 1.049143671989441,grad_norm: 0.9341323152746142, iteration: 117029
loss: 0.9661357402801514,grad_norm: 0.9999991357543918, iteration: 117030
loss: 0.9697412252426147,grad_norm: 0.9258481501904001, iteration: 117031
loss: 1.0833851099014282,grad_norm: 0.9999999508356391, iteration: 117032
loss: 0.9926454424858093,grad_norm: 0.9999990805056782, iteration: 117033
loss: 1.037972331047058,grad_norm: 0.9999990930544985, iteration: 117034
loss: 1.0678083896636963,grad_norm: 0.9999991917211166, iteration: 117035
loss: 1.0916826725006104,grad_norm: 0.9999993216193331, iteration: 117036
loss: 1.143090844154358,grad_norm: 0.9999996583614494, iteration: 117037
loss: 1.0886136293411255,grad_norm: 0.9999999215903433, iteration: 117038
loss: 1.0674338340759277,grad_norm: 0.9999999258727492, iteration: 117039
loss: 1.1131045818328857,grad_norm: 0.9437950973311826, iteration: 117040
loss: 1.051261305809021,grad_norm: 0.9999991179456138, iteration: 117041
loss: 1.0910675525665283,grad_norm: 0.9999992595236498, iteration: 117042
loss: 1.0533546209335327,grad_norm: 0.9999992894627919, iteration: 117043
loss: 0.9914888739585876,grad_norm: 0.8367627828378353, iteration: 117044
loss: 1.0161722898483276,grad_norm: 0.9999992283222121, iteration: 117045
loss: 1.0148650407791138,grad_norm: 0.999999727967582, iteration: 117046
loss: 0.9940186738967896,grad_norm: 0.9918827263588209, iteration: 117047
loss: 1.0129289627075195,grad_norm: 0.9999990237770734, iteration: 117048
loss: 1.025795578956604,grad_norm: 0.9999993851789919, iteration: 117049
loss: 1.0162066221237183,grad_norm: 0.9999993129363612, iteration: 117050
loss: 1.0988428592681885,grad_norm: 0.999999282394861, iteration: 117051
loss: 1.0531282424926758,grad_norm: 0.9999998161119794, iteration: 117052
loss: 1.0535258054733276,grad_norm: 0.9999991867472018, iteration: 117053
loss: 1.0537492036819458,grad_norm: 0.8596038999800871, iteration: 117054
loss: 0.9972848892211914,grad_norm: 0.9400018100294139, iteration: 117055
loss: 1.0749963521957397,grad_norm: 0.9999992129128176, iteration: 117056
loss: 1.0037662982940674,grad_norm: 0.9999991902239148, iteration: 117057
loss: 1.035055160522461,grad_norm: 0.9999991215039502, iteration: 117058
loss: 1.077354907989502,grad_norm: 0.933478264715816, iteration: 117059
loss: 1.0814117193222046,grad_norm: 0.9999992680121388, iteration: 117060
loss: 1.029378056526184,grad_norm: 0.9999997650576958, iteration: 117061
loss: 1.0431041717529297,grad_norm: 0.9015273778887275, iteration: 117062
loss: 1.0029382705688477,grad_norm: 0.9999991587912482, iteration: 117063
loss: 0.9935269355773926,grad_norm: 0.938055374690969, iteration: 117064
loss: 1.051852822303772,grad_norm: 0.9999995083697509, iteration: 117065
loss: 1.0159677267074585,grad_norm: 0.9999992182152373, iteration: 117066
loss: 1.0892380475997925,grad_norm: 0.9999990430390554, iteration: 117067
loss: 1.0015641450881958,grad_norm: 0.8917561320401403, iteration: 117068
loss: 1.104669451713562,grad_norm: 0.9999997547691433, iteration: 117069
loss: 0.9999048709869385,grad_norm: 0.9999992486386403, iteration: 117070
loss: 1.1023004055023193,grad_norm: 0.9999992491691727, iteration: 117071
loss: 1.0257128477096558,grad_norm: 0.9999993062422555, iteration: 117072
loss: 1.0325554609298706,grad_norm: 0.9999994171614457, iteration: 117073
loss: 1.1404640674591064,grad_norm: 0.9999998353451576, iteration: 117074
loss: 0.9733847975730896,grad_norm: 0.99999950390654, iteration: 117075
loss: 1.0171914100646973,grad_norm: 0.9999998828551913, iteration: 117076
loss: 1.1204888820648193,grad_norm: 0.9999995402002693, iteration: 117077
loss: 1.0639883279800415,grad_norm: 0.871491640529152, iteration: 117078
loss: 1.060317873954773,grad_norm: 0.9999996310030173, iteration: 117079
loss: 1.028131365776062,grad_norm: 0.9999991634117239, iteration: 117080
loss: 0.9926590919494629,grad_norm: 0.9999991167288902, iteration: 117081
loss: 1.0127636194229126,grad_norm: 0.9999999069661378, iteration: 117082
loss: 1.020257592201233,grad_norm: 0.9815903305136868, iteration: 117083
loss: 1.0059614181518555,grad_norm: 0.9999992473993471, iteration: 117084
loss: 0.9887924194335938,grad_norm: 0.8894460464612127, iteration: 117085
loss: 1.005622148513794,grad_norm: 0.8675147917492937, iteration: 117086
loss: 1.0385475158691406,grad_norm: 0.9999992410471374, iteration: 117087
loss: 1.0651599168777466,grad_norm: 0.9999993354111865, iteration: 117088
loss: 1.0102838277816772,grad_norm: 0.9999992419380039, iteration: 117089
loss: 0.983978271484375,grad_norm: 0.9831628019423646, iteration: 117090
loss: 1.0884292125701904,grad_norm: 0.999999628389077, iteration: 117091
loss: 1.0065183639526367,grad_norm: 0.9999993304323094, iteration: 117092
loss: 1.0912752151489258,grad_norm: 0.9999996585312543, iteration: 117093
loss: 1.0604069232940674,grad_norm: 0.9999995030182997, iteration: 117094
loss: 1.042175531387329,grad_norm: 0.9999996597104103, iteration: 117095
loss: 0.9597474336624146,grad_norm: 0.8972273294842383, iteration: 117096
loss: 0.9921425580978394,grad_norm: 0.999999238600022, iteration: 117097
loss: 1.1036014556884766,grad_norm: 0.9999997010936174, iteration: 117098
loss: 0.9976227283477783,grad_norm: 0.9697564022246631, iteration: 117099
loss: 1.0090460777282715,grad_norm: 0.9010964670948921, iteration: 117100
loss: 1.0302304029464722,grad_norm: 0.999999030687412, iteration: 117101
loss: 0.9536059498786926,grad_norm: 0.9933945240231445, iteration: 117102
loss: 1.0675969123840332,grad_norm: 0.9999993203801767, iteration: 117103
loss: 0.9724380373954773,grad_norm: 0.9999989364395323, iteration: 117104
loss: 1.0028208494186401,grad_norm: 0.8610579522093458, iteration: 117105
loss: 1.0357617139816284,grad_norm: 0.9999992310553473, iteration: 117106
loss: 1.0298142433166504,grad_norm: 0.9999991672679702, iteration: 117107
loss: 1.0252782106399536,grad_norm: 0.9336536533367904, iteration: 117108
loss: 0.9657912850379944,grad_norm: 0.9999994482976702, iteration: 117109
loss: 1.0111335515975952,grad_norm: 0.8593082510953086, iteration: 117110
loss: 0.9820022583007812,grad_norm: 0.982658247017685, iteration: 117111
loss: 1.0115697383880615,grad_norm: 0.8522271222188076, iteration: 117112
loss: 1.0130467414855957,grad_norm: 0.9999992596726305, iteration: 117113
loss: 1.0550739765167236,grad_norm: 0.999999418766355, iteration: 117114
loss: 0.9745105504989624,grad_norm: 0.9838805482972424, iteration: 117115
loss: 1.0093903541564941,grad_norm: 0.9999995842889724, iteration: 117116
loss: 0.9980831146240234,grad_norm: 0.8130941550775963, iteration: 117117
loss: 0.9879757761955261,grad_norm: 0.999999839945244, iteration: 117118
loss: 1.0493606328964233,grad_norm: 0.9999989559901463, iteration: 117119
loss: 0.9931501746177673,grad_norm: 0.9721310815502103, iteration: 117120
loss: 1.0445514917373657,grad_norm: 0.9344642006076032, iteration: 117121
loss: 1.0498840808868408,grad_norm: 0.7263250702731373, iteration: 117122
loss: 0.9821866750717163,grad_norm: 0.8738214183911465, iteration: 117123
loss: 1.0210379362106323,grad_norm: 0.9983751985414001, iteration: 117124
loss: 1.100611686706543,grad_norm: 0.9999993028428301, iteration: 117125
loss: 1.0061390399932861,grad_norm: 0.9999990534013843, iteration: 117126
loss: 1.1005505323410034,grad_norm: 0.9999991471642933, iteration: 117127
loss: 1.013105869293213,grad_norm: 0.9999993004522346, iteration: 117128
loss: 1.0800292491912842,grad_norm: 0.9999993299231291, iteration: 117129
loss: 1.0654048919677734,grad_norm: 0.9999993242751579, iteration: 117130
loss: 1.0612387657165527,grad_norm: 0.831351518727898, iteration: 117131
loss: 0.9752920866012573,grad_norm: 0.913924650156347, iteration: 117132
loss: 1.0384845733642578,grad_norm: 0.9999990139843941, iteration: 117133
loss: 1.0526492595672607,grad_norm: 0.9999998055156186, iteration: 117134
loss: 1.0233657360076904,grad_norm: 0.9459195324302516, iteration: 117135
loss: 1.01374351978302,grad_norm: 0.9109620343677343, iteration: 117136
loss: 1.1158403158187866,grad_norm: 0.9999995178783172, iteration: 117137
loss: 1.0850563049316406,grad_norm: 0.9999991427304824, iteration: 117138
loss: 1.0836063623428345,grad_norm: 0.9064879950959209, iteration: 117139
loss: 1.0557528734207153,grad_norm: 0.999999474370064, iteration: 117140
loss: 1.0817368030548096,grad_norm: 0.9999998708550099, iteration: 117141
loss: 1.0151408910751343,grad_norm: 0.9999992211951113, iteration: 117142
loss: 1.082202672958374,grad_norm: 0.9998202501058862, iteration: 117143
loss: 1.0343084335327148,grad_norm: 0.9999996810774011, iteration: 117144
loss: 1.0545202493667603,grad_norm: 0.977115227639193, iteration: 117145
loss: 1.1789836883544922,grad_norm: 0.999999082393994, iteration: 117146
loss: 0.9877040386199951,grad_norm: 0.9385269221884356, iteration: 117147
loss: 1.0606863498687744,grad_norm: 0.9999997252799403, iteration: 117148
loss: 1.038662075996399,grad_norm: 0.9999991144988208, iteration: 117149
loss: 1.0432472229003906,grad_norm: 1.0000000943893534, iteration: 117150
loss: 1.0183289051055908,grad_norm: 0.9999996218424452, iteration: 117151
loss: 1.0067870616912842,grad_norm: 0.9999998305851693, iteration: 117152
loss: 0.9819352626800537,grad_norm: 1.0000000101234734, iteration: 117153
loss: 1.0383676290512085,grad_norm: 0.9999994819185744, iteration: 117154
loss: 1.0141123533248901,grad_norm: 0.9087516251114229, iteration: 117155
loss: 1.0531294345855713,grad_norm: 0.9999998805235815, iteration: 117156
loss: 0.9831701517105103,grad_norm: 0.9966952051745872, iteration: 117157
loss: 1.037415623664856,grad_norm: 0.9191400407173707, iteration: 117158
loss: 1.008256435394287,grad_norm: 0.9388195866160066, iteration: 117159
loss: 1.0293121337890625,grad_norm: 0.9999994985271319, iteration: 117160
loss: 1.0106052160263062,grad_norm: 0.8411947572798936, iteration: 117161
loss: 0.993816077709198,grad_norm: 0.999999725400345, iteration: 117162
loss: 1.0612949132919312,grad_norm: 0.907785930626142, iteration: 117163
loss: 1.0179204940795898,grad_norm: 0.8906435436141718, iteration: 117164
loss: 1.2181246280670166,grad_norm: 0.999999530846617, iteration: 117165
loss: 0.9812905788421631,grad_norm: 0.7477129701719537, iteration: 117166
loss: 1.0263004302978516,grad_norm: 0.9999997558563994, iteration: 117167
loss: 1.035995602607727,grad_norm: 0.9999993609262826, iteration: 117168
loss: 1.0852261781692505,grad_norm: 0.9999992304602462, iteration: 117169
loss: 0.9964523315429688,grad_norm: 0.9882203479815983, iteration: 117170
loss: 0.9912087321281433,grad_norm: 0.9919443887039247, iteration: 117171
loss: 1.0261482000350952,grad_norm: 0.9999991146125842, iteration: 117172
loss: 1.0237537622451782,grad_norm: 0.9999990826858818, iteration: 117173
loss: 1.0266588926315308,grad_norm: 0.999999627330083, iteration: 117174
loss: 1.0121946334838867,grad_norm: 0.9999991466108473, iteration: 117175
loss: 0.9885086417198181,grad_norm: 0.9999990453081444, iteration: 117176
loss: 0.9827138781547546,grad_norm: 0.9999996562926493, iteration: 117177
loss: 1.0094213485717773,grad_norm: 0.9999991020800527, iteration: 117178
loss: 1.0530767440795898,grad_norm: 0.999999629135652, iteration: 117179
loss: 1.0078480243682861,grad_norm: 0.9999998656878204, iteration: 117180
loss: 1.0323245525360107,grad_norm: 0.9150555931983106, iteration: 117181
loss: 0.9795826077461243,grad_norm: 0.7950898625837933, iteration: 117182
loss: 1.0239664316177368,grad_norm: 0.9617170398231286, iteration: 117183
loss: 1.0069276094436646,grad_norm: 0.9999995039095656, iteration: 117184
loss: 1.0863327980041504,grad_norm: 0.9999998949075721, iteration: 117185
loss: 1.0283944606781006,grad_norm: 0.9121802215602562, iteration: 117186
loss: 0.9771978855133057,grad_norm: 0.9999991486871622, iteration: 117187
loss: 1.0557866096496582,grad_norm: 0.999999244502352, iteration: 117188
loss: 1.018097162246704,grad_norm: 0.9321714557479298, iteration: 117189
loss: 1.0000262260437012,grad_norm: 0.7855779147713475, iteration: 117190
loss: 1.0025967359542847,grad_norm: 0.8988299666986671, iteration: 117191
loss: 0.9958242177963257,grad_norm: 0.8466824649218181, iteration: 117192
loss: 1.0032554864883423,grad_norm: 0.9181941042131618, iteration: 117193
loss: 1.0511301755905151,grad_norm: 0.9999998023072006, iteration: 117194
loss: 1.0162405967712402,grad_norm: 0.9999990805467163, iteration: 117195
loss: 0.9991157650947571,grad_norm: 0.9999990596914018, iteration: 117196
loss: 1.0197285413742065,grad_norm: 0.9999991373168097, iteration: 117197
loss: 1.022262454032898,grad_norm: 0.9867010273847916, iteration: 117198
loss: 1.0113581418991089,grad_norm: 0.9952934102854477, iteration: 117199
loss: 1.0027570724487305,grad_norm: 0.7879645834738657, iteration: 117200
loss: 1.0509912967681885,grad_norm: 0.9999999567187299, iteration: 117201
loss: 1.0563085079193115,grad_norm: 0.9342200595549482, iteration: 117202
loss: 1.0415034294128418,grad_norm: 0.9999998573636872, iteration: 117203
loss: 1.014694094657898,grad_norm: 0.9999990639875768, iteration: 117204
loss: 1.0200581550598145,grad_norm: 0.9999991993812956, iteration: 117205
loss: 1.014755129814148,grad_norm: 0.9999992380340925, iteration: 117206
loss: 0.9993835687637329,grad_norm: 0.999999422149347, iteration: 117207
loss: 1.0042659044265747,grad_norm: 0.999999347335391, iteration: 117208
loss: 1.0156569480895996,grad_norm: 0.8380833583256981, iteration: 117209
loss: 1.0309597253799438,grad_norm: 0.99999975759745, iteration: 117210
loss: 0.9786800146102905,grad_norm: 0.7573313360853448, iteration: 117211
loss: 1.002495527267456,grad_norm: 0.999999205017603, iteration: 117212
loss: 1.0699154138565063,grad_norm: 0.999999806356774, iteration: 117213
loss: 0.9670517444610596,grad_norm: 0.9999990530046662, iteration: 117214
loss: 0.986456573009491,grad_norm: 0.9999991126263222, iteration: 117215
loss: 1.019271969795227,grad_norm: 0.9478665786166754, iteration: 117216
loss: 0.9928930401802063,grad_norm: 0.8081591366359409, iteration: 117217
loss: 1.0312250852584839,grad_norm: 0.9338496266690283, iteration: 117218
loss: 1.0388988256454468,grad_norm: 0.9999994913855015, iteration: 117219
loss: 0.9937864542007446,grad_norm: 0.9999991325003598, iteration: 117220
loss: 0.9874966144561768,grad_norm: 0.9999991878863055, iteration: 117221
loss: 1.0153264999389648,grad_norm: 0.9179849114609777, iteration: 117222
loss: 0.9636056423187256,grad_norm: 0.8819437744029047, iteration: 117223
loss: 0.9704853892326355,grad_norm: 0.9742070972325895, iteration: 117224
loss: 1.0349311828613281,grad_norm: 0.9999994809538016, iteration: 117225
loss: 1.0478510856628418,grad_norm: 0.9999993123902634, iteration: 117226
loss: 1.0209875106811523,grad_norm: 0.7870008759058957, iteration: 117227
loss: 1.0369125604629517,grad_norm: 0.9999990955765675, iteration: 117228
loss: 1.00079345703125,grad_norm: 0.8285872948302522, iteration: 117229
loss: 0.9776224493980408,grad_norm: 0.8460284338120935, iteration: 117230
loss: 1.0116260051727295,grad_norm: 0.9186443604948773, iteration: 117231
loss: 1.0092437267303467,grad_norm: 0.999998997753277, iteration: 117232
loss: 1.0207602977752686,grad_norm: 0.7899649870380847, iteration: 117233
loss: 1.0203899145126343,grad_norm: 0.8535937039530896, iteration: 117234
loss: 0.9691644906997681,grad_norm: 0.9207592227106888, iteration: 117235
loss: 1.0071238279342651,grad_norm: 0.9711650758847085, iteration: 117236
loss: 0.9911555051803589,grad_norm: 0.9156762122735589, iteration: 117237
loss: 0.9523903727531433,grad_norm: 0.9306854542199108, iteration: 117238
loss: 1.166345477104187,grad_norm: 0.9999997631156066, iteration: 117239
loss: 1.0749599933624268,grad_norm: 0.9999999557962335, iteration: 117240
loss: 1.015688419342041,grad_norm: 0.9556045959816879, iteration: 117241
loss: 1.0336909294128418,grad_norm: 0.9141761499500365, iteration: 117242
loss: 1.0116636753082275,grad_norm: 0.8034893639431591, iteration: 117243
loss: 1.0127283334732056,grad_norm: 0.9999995978094629, iteration: 117244
loss: 1.0209730863571167,grad_norm: 0.8903527944541838, iteration: 117245
loss: 0.9634272456169128,grad_norm: 0.9509286778406754, iteration: 117246
loss: 1.023936152458191,grad_norm: 0.9999991514714902, iteration: 117247
loss: 1.0341323614120483,grad_norm: 0.9999990039457294, iteration: 117248
loss: 1.0023465156555176,grad_norm: 0.9525013437357819, iteration: 117249
loss: 1.0248606204986572,grad_norm: 0.9999999664724417, iteration: 117250
loss: 1.0277127027511597,grad_norm: 0.9161498174917339, iteration: 117251
loss: 1.0422366857528687,grad_norm: 0.9999999981385932, iteration: 117252
loss: 1.0039360523223877,grad_norm: 0.9644695606676884, iteration: 117253
loss: 0.9579882025718689,grad_norm: 0.9999991288732611, iteration: 117254
loss: 1.0770699977874756,grad_norm: 0.9999991446192451, iteration: 117255
loss: 1.1124926805496216,grad_norm: 0.9999991917776183, iteration: 117256
loss: 1.2237516641616821,grad_norm: 0.9999999045722253, iteration: 117257
loss: 0.9736312627792358,grad_norm: 0.9999991339285984, iteration: 117258
loss: 0.9902700781822205,grad_norm: 0.8306478576573593, iteration: 117259
loss: 1.0105456113815308,grad_norm: 0.9999996239976247, iteration: 117260
loss: 1.1311910152435303,grad_norm: 0.9999994767839375, iteration: 117261
loss: 1.112221121788025,grad_norm: 0.9999996158147504, iteration: 117262
loss: 0.9607018232345581,grad_norm: 0.9372888776873031, iteration: 117263
loss: 1.021003246307373,grad_norm: 0.9999991890746628, iteration: 117264
loss: 1.039216160774231,grad_norm: 0.999999253703783, iteration: 117265
loss: 1.052692174911499,grad_norm: 0.9999991259076255, iteration: 117266
loss: 1.010626196861267,grad_norm: 0.999999179679609, iteration: 117267
loss: 1.0283291339874268,grad_norm: 0.8013913512221752, iteration: 117268
loss: 0.9998663663864136,grad_norm: 0.8793338896731845, iteration: 117269
loss: 1.0166109800338745,grad_norm: 0.9236420514104524, iteration: 117270
loss: 1.056054711341858,grad_norm: 0.8725163206291617, iteration: 117271
loss: 1.0920960903167725,grad_norm: 0.9999998319105976, iteration: 117272
loss: 1.0556198358535767,grad_norm: 0.9999994520028291, iteration: 117273
loss: 1.2619643211364746,grad_norm: 0.9999997718165327, iteration: 117274
loss: 0.9931696653366089,grad_norm: 0.8680529471946111, iteration: 117275
loss: 0.9794568419456482,grad_norm: 0.9999991417410194, iteration: 117276
loss: 1.0871394872665405,grad_norm: 0.9999997899217331, iteration: 117277
loss: 1.0949904918670654,grad_norm: 0.999999483425383, iteration: 117278
loss: 1.3284687995910645,grad_norm: 0.9999995857894275, iteration: 117279
loss: 1.3801318407058716,grad_norm: 0.9999998915815185, iteration: 117280
loss: 1.1411590576171875,grad_norm: 0.9999996102862447, iteration: 117281
loss: 0.9926739931106567,grad_norm: 0.9999995097325994, iteration: 117282
loss: 1.0910732746124268,grad_norm: 0.8851101501881143, iteration: 117283
loss: 1.0093588829040527,grad_norm: 0.9999993020107553, iteration: 117284
loss: 1.1801668405532837,grad_norm: 0.9999998895147573, iteration: 117285
loss: 1.3737812042236328,grad_norm: 0.9999998165855997, iteration: 117286
loss: 1.3600006103515625,grad_norm: 0.9999997729624961, iteration: 117287
loss: 1.111585021018982,grad_norm: 0.9999990117068417, iteration: 117288
loss: 1.2128740549087524,grad_norm: 0.9999995911172953, iteration: 117289
loss: 1.0815142393112183,grad_norm: 0.9999993735445118, iteration: 117290
loss: 1.3077034950256348,grad_norm: 0.9999999342028724, iteration: 117291
loss: 1.0091052055358887,grad_norm: 0.9281004377686112, iteration: 117292
loss: 1.0176368951797485,grad_norm: 0.9999993354873064, iteration: 117293
loss: 0.9804961085319519,grad_norm: 0.8092395287605352, iteration: 117294
loss: 1.0151598453521729,grad_norm: 0.9999992060641975, iteration: 117295
loss: 1.1528335809707642,grad_norm: 0.9999993782433763, iteration: 117296
loss: 1.1958099603652954,grad_norm: 0.9999994459688245, iteration: 117297
loss: 1.097726821899414,grad_norm: 0.9999990975749748, iteration: 117298
loss: 1.0857915878295898,grad_norm: 0.9999991087131705, iteration: 117299
loss: 1.210945725440979,grad_norm: 0.9999993260456846, iteration: 117300
loss: 1.247828722000122,grad_norm: 0.9999996974749575, iteration: 117301
loss: 1.003172516822815,grad_norm: 0.9999991812424205, iteration: 117302
loss: 0.951962411403656,grad_norm: 0.7537682905580707, iteration: 117303
loss: 1.0967153310775757,grad_norm: 0.9999993301894048, iteration: 117304
loss: 1.120879054069519,grad_norm: 0.9999996680640482, iteration: 117305
loss: 1.110155701637268,grad_norm: 0.9999997241458344, iteration: 117306
loss: 1.175125002861023,grad_norm: 0.9999994792462032, iteration: 117307
loss: 1.0470435619354248,grad_norm: 0.9999991887709007, iteration: 117308
loss: 1.0311636924743652,grad_norm: 0.9999991205832993, iteration: 117309
loss: 1.0125267505645752,grad_norm: 0.9999991321921392, iteration: 117310
loss: 1.0297300815582275,grad_norm: 0.999041315132463, iteration: 117311
loss: 1.1335387229919434,grad_norm: 0.9999994936107459, iteration: 117312
loss: 0.9550561308860779,grad_norm: 0.9770378343625521, iteration: 117313
loss: 1.0056146383285522,grad_norm: 0.9305354207770301, iteration: 117314
loss: 1.0309431552886963,grad_norm: 0.9999992664525297, iteration: 117315
loss: 1.045491337776184,grad_norm: 0.999999638568312, iteration: 117316
loss: 1.1141060590744019,grad_norm: 0.9999999119747844, iteration: 117317
loss: 1.0703786611557007,grad_norm: 0.9999995517091131, iteration: 117318
loss: 1.0873225927352905,grad_norm: 0.9359615984731737, iteration: 117319
loss: 1.0148195028305054,grad_norm: 0.9999991106185397, iteration: 117320
loss: 1.0132776498794556,grad_norm: 0.9999996805242176, iteration: 117321
loss: 0.9901801347732544,grad_norm: 0.9999991948995909, iteration: 117322
loss: 1.0247347354888916,grad_norm: 0.9999992564015286, iteration: 117323
loss: 1.003670334815979,grad_norm: 0.9999991643160002, iteration: 117324
loss: 1.0686557292938232,grad_norm: 0.9999991450947379, iteration: 117325
loss: 1.010575771331787,grad_norm: 0.8416335795371521, iteration: 117326
loss: 1.040661096572876,grad_norm: 0.8970478710094167, iteration: 117327
loss: 0.9907180666923523,grad_norm: 0.7875654910451296, iteration: 117328
loss: 1.1287505626678467,grad_norm: 0.9999997307075419, iteration: 117329
loss: 1.0078412294387817,grad_norm: 0.9999990780156819, iteration: 117330
loss: 1.0142688751220703,grad_norm: 0.9999992653494469, iteration: 117331
loss: 1.0087757110595703,grad_norm: 0.9663914850450281, iteration: 117332
loss: 1.0129410028457642,grad_norm: 0.9999991733943986, iteration: 117333
loss: 0.9687079787254333,grad_norm: 0.8253772488366043, iteration: 117334
loss: 1.0162986516952515,grad_norm: 0.9453023725674841, iteration: 117335
loss: 1.0156306028366089,grad_norm: 0.9999995933941282, iteration: 117336
loss: 1.1173666715621948,grad_norm: 0.9999992542621853, iteration: 117337
loss: 1.0115559101104736,grad_norm: 0.9999990545607069, iteration: 117338
loss: 1.051897406578064,grad_norm: 0.9999991938884413, iteration: 117339
loss: 0.9950816035270691,grad_norm: 0.9946606555751653, iteration: 117340
loss: 1.033223032951355,grad_norm: 0.8744776533108967, iteration: 117341
loss: 1.0831992626190186,grad_norm: 0.9589559421074584, iteration: 117342
loss: 1.0870845317840576,grad_norm: 0.9999992263293159, iteration: 117343
loss: 1.0117378234863281,grad_norm: 0.8516239490278177, iteration: 117344
loss: 1.0010335445404053,grad_norm: 0.9014229471300895, iteration: 117345
loss: 1.1089835166931152,grad_norm: 0.999999422958858, iteration: 117346
loss: 1.266170620918274,grad_norm: 0.9999999123392147, iteration: 117347
loss: 0.9968351721763611,grad_norm: 0.99999908388995, iteration: 117348
loss: 0.9936832189559937,grad_norm: 0.7507297658908468, iteration: 117349
loss: 1.0640896558761597,grad_norm: 0.9999996066914218, iteration: 117350
loss: 0.9952784180641174,grad_norm: 0.9367249441408867, iteration: 117351
loss: 1.0393521785736084,grad_norm: 0.9999992741503253, iteration: 117352
loss: 1.0029889345169067,grad_norm: 0.9999992290633662, iteration: 117353
loss: 1.0319793224334717,grad_norm: 0.9999992016320647, iteration: 117354
loss: 0.9944161772727966,grad_norm: 0.9368441366906002, iteration: 117355
loss: 1.0236455202102661,grad_norm: 0.9940187606772996, iteration: 117356
loss: 1.008447289466858,grad_norm: 0.9164684967107908, iteration: 117357
loss: 0.9990738034248352,grad_norm: 0.9603626040529847, iteration: 117358
loss: 0.9976863861083984,grad_norm: 0.9000848358219787, iteration: 117359
loss: 1.005059003829956,grad_norm: 0.9999991877203983, iteration: 117360
loss: 1.0420423746109009,grad_norm: 0.9530528692869533, iteration: 117361
loss: 1.0481609106063843,grad_norm: 0.999998966273093, iteration: 117362
loss: 0.9826609492301941,grad_norm: 0.999999149699815, iteration: 117363
loss: 1.0734089612960815,grad_norm: 0.9999994895932249, iteration: 117364
loss: 1.0049625635147095,grad_norm: 0.8861905269636918, iteration: 117365
loss: 1.1690187454223633,grad_norm: 0.9999995696514117, iteration: 117366
loss: 1.0040379762649536,grad_norm: 0.9999991761763025, iteration: 117367
loss: 1.010815143585205,grad_norm: 0.9238190424817818, iteration: 117368
loss: 1.0123015642166138,grad_norm: 0.8509553218432391, iteration: 117369
loss: 0.9833753705024719,grad_norm: 0.9999991549788291, iteration: 117370
loss: 1.0269005298614502,grad_norm: 0.9999998817441668, iteration: 117371
loss: 1.042681336402893,grad_norm: 0.999999734047124, iteration: 117372
loss: 1.0243661403656006,grad_norm: 0.9999994076023502, iteration: 117373
loss: 0.9882326126098633,grad_norm: 0.8195970398505857, iteration: 117374
loss: 0.9739975929260254,grad_norm: 0.9334905728834184, iteration: 117375
loss: 1.0368634462356567,grad_norm: 0.9999990994197878, iteration: 117376
loss: 0.9849031567573547,grad_norm: 0.867988524639123, iteration: 117377
loss: 1.0855176448822021,grad_norm: 0.9999996642685217, iteration: 117378
loss: 0.9814308881759644,grad_norm: 0.9999999867048351, iteration: 117379
loss: 1.0331969261169434,grad_norm: 0.9635330520212233, iteration: 117380
loss: 1.0167466402053833,grad_norm: 0.8955872506899377, iteration: 117381
loss: 1.0072399377822876,grad_norm: 0.8276647199505099, iteration: 117382
loss: 1.0337656736373901,grad_norm: 0.9999995690450055, iteration: 117383
loss: 1.0141637325286865,grad_norm: 0.8374429475356794, iteration: 117384
loss: 0.986177384853363,grad_norm: 0.7697572685281423, iteration: 117385
loss: 1.0579725503921509,grad_norm: 0.9999997631686067, iteration: 117386
loss: 1.0649256706237793,grad_norm: 0.9999996719986636, iteration: 117387
loss: 1.0801453590393066,grad_norm: 0.9999998766885532, iteration: 117388
loss: 1.0033164024353027,grad_norm: 0.999999247601426, iteration: 117389
loss: 1.0306013822555542,grad_norm: 0.9999999010385455, iteration: 117390
loss: 0.9710827469825745,grad_norm: 0.8647234151181495, iteration: 117391
loss: 0.9542514085769653,grad_norm: 0.8689785908237274, iteration: 117392
loss: 0.9948962926864624,grad_norm: 0.7756154584419722, iteration: 117393
loss: 1.0078139305114746,grad_norm: 0.9674844571545346, iteration: 117394
loss: 1.0065101385116577,grad_norm: 0.9999996600446879, iteration: 117395
loss: 1.0281293392181396,grad_norm: 0.9757844724593434, iteration: 117396
loss: 1.032016634941101,grad_norm: 0.9999992511534991, iteration: 117397
loss: 0.9873923659324646,grad_norm: 0.9183775556292824, iteration: 117398
loss: 1.06693434715271,grad_norm: 0.8710593126194496, iteration: 117399
loss: 1.0172291994094849,grad_norm: 0.9506113914939887, iteration: 117400
loss: 1.079743504524231,grad_norm: 0.9999992648834921, iteration: 117401
loss: 0.9894129633903503,grad_norm: 0.7877495250981926, iteration: 117402
loss: 1.0213159322738647,grad_norm: 0.8920069892904532, iteration: 117403
loss: 1.009247899055481,grad_norm: 0.7482394209459619, iteration: 117404
loss: 1.0556751489639282,grad_norm: 0.9999991473702005, iteration: 117405
loss: 0.9929821491241455,grad_norm: 0.8214803788368106, iteration: 117406
loss: 1.0220789909362793,grad_norm: 0.9999996337664949, iteration: 117407
loss: 0.98308926820755,grad_norm: 0.8965642924487058, iteration: 117408
loss: 1.073209524154663,grad_norm: 0.9999999691768493, iteration: 117409
loss: 1.0448416471481323,grad_norm: 0.9999996421516161, iteration: 117410
loss: 1.0572751760482788,grad_norm: 0.999999630288344, iteration: 117411
loss: 0.9959593415260315,grad_norm: 0.8555769762463872, iteration: 117412
loss: 0.9939079880714417,grad_norm: 0.7559294869122889, iteration: 117413
loss: 1.029520034790039,grad_norm: 0.9999992956164863, iteration: 117414
loss: 1.016411542892456,grad_norm: 0.9999995558021787, iteration: 117415
loss: 1.0671141147613525,grad_norm: 0.9029176833723721, iteration: 117416
loss: 1.0108611583709717,grad_norm: 0.9999992500339311, iteration: 117417
loss: 1.0249041318893433,grad_norm: 0.9388928449922951, iteration: 117418
loss: 1.0504175424575806,grad_norm: 0.9999997143116484, iteration: 117419
loss: 1.1548943519592285,grad_norm: 0.9999995068620345, iteration: 117420
loss: 0.9974275827407837,grad_norm: 0.9665378528197672, iteration: 117421
loss: 1.0754700899124146,grad_norm: 0.999999531821167, iteration: 117422
loss: 1.0834945440292358,grad_norm: 0.9999997062098949, iteration: 117423
loss: 1.0140684843063354,grad_norm: 0.9883213144238433, iteration: 117424
loss: 1.0023585557937622,grad_norm: 0.9238446874165495, iteration: 117425
loss: 1.0533639192581177,grad_norm: 0.9999996357668879, iteration: 117426
loss: 1.16103196144104,grad_norm: 0.9999998556111116, iteration: 117427
loss: 1.009564757347107,grad_norm: 0.9364133014801903, iteration: 117428
loss: 1.019765019416809,grad_norm: 0.9999996121866035, iteration: 117429
loss: 1.0723764896392822,grad_norm: 0.9494242802409774, iteration: 117430
loss: 1.0289992094039917,grad_norm: 0.9999994389382596, iteration: 117431
loss: 0.9938063025474548,grad_norm: 0.9999992329702964, iteration: 117432
loss: 0.9892133474349976,grad_norm: 0.9999993709526152, iteration: 117433
loss: 1.0364893674850464,grad_norm: 0.9999990726787691, iteration: 117434
loss: 1.0902011394500732,grad_norm: 0.999999364697995, iteration: 117435
loss: 1.0198429822921753,grad_norm: 0.9960383512636547, iteration: 117436
loss: 1.0672204494476318,grad_norm: 0.99999992388843, iteration: 117437
loss: 1.010094165802002,grad_norm: 0.8962170946418104, iteration: 117438
loss: 1.002087950706482,grad_norm: 0.9955460689112965, iteration: 117439
loss: 1.0815339088439941,grad_norm: 0.9999990994149026, iteration: 117440
loss: 0.9938492178916931,grad_norm: 0.9999991557382151, iteration: 117441
loss: 1.0373069047927856,grad_norm: 0.9999996601316844, iteration: 117442
loss: 1.0081242322921753,grad_norm: 0.7695778068862256, iteration: 117443
loss: 1.0248405933380127,grad_norm: 0.9999991878747128, iteration: 117444
loss: 0.9514637589454651,grad_norm: 0.8820295937430299, iteration: 117445
loss: 1.0366145372390747,grad_norm: 0.9746316987242297, iteration: 117446
loss: 1.0421568155288696,grad_norm: 0.9999993940719217, iteration: 117447
loss: 1.024980902671814,grad_norm: 0.9999990770356864, iteration: 117448
loss: 1.1036677360534668,grad_norm: 0.9999998538990962, iteration: 117449
loss: 1.0258585214614868,grad_norm: 0.8977910611963513, iteration: 117450
loss: 0.9936177730560303,grad_norm: 0.9404478821930394, iteration: 117451
loss: 1.0440208911895752,grad_norm: 0.9999999000584977, iteration: 117452
loss: 0.9903135895729065,grad_norm: 0.9999990094744721, iteration: 117453
loss: 1.0022532939910889,grad_norm: 0.930855284774789, iteration: 117454
loss: 1.0258338451385498,grad_norm: 0.7880538514081848, iteration: 117455
loss: 1.0038293600082397,grad_norm: 0.8804276945270649, iteration: 117456
loss: 1.078751802444458,grad_norm: 0.9999992670666666, iteration: 117457
loss: 0.9996969103813171,grad_norm: 0.8854610987618735, iteration: 117458
loss: 1.0442447662353516,grad_norm: 0.9999993587517018, iteration: 117459
loss: 1.0380574464797974,grad_norm: 0.9999996244386897, iteration: 117460
loss: 1.0091516971588135,grad_norm: 0.9297590790724256, iteration: 117461
loss: 0.994465172290802,grad_norm: 0.9999990671460184, iteration: 117462
loss: 0.9587216377258301,grad_norm: 0.8306424011206079, iteration: 117463
loss: 1.0042954683303833,grad_norm: 0.7968383647963431, iteration: 117464
loss: 1.0743409395217896,grad_norm: 0.9670374751819468, iteration: 117465
loss: 1.0074654817581177,grad_norm: 0.9459668048571754, iteration: 117466
loss: 1.0067490339279175,grad_norm: 0.8005664806168131, iteration: 117467
loss: 1.03035569190979,grad_norm: 0.8696278228003862, iteration: 117468
loss: 1.1689956188201904,grad_norm: 0.9999997582681721, iteration: 117469
loss: 1.0141743421554565,grad_norm: 0.797109226884631, iteration: 117470
loss: 1.016697883605957,grad_norm: 0.9999991633958473, iteration: 117471
loss: 0.9966866374015808,grad_norm: 0.999999213701758, iteration: 117472
loss: 0.997937798500061,grad_norm: 0.9256359872908745, iteration: 117473
loss: 1.0666860342025757,grad_norm: 0.9999993682366577, iteration: 117474
loss: 1.0945472717285156,grad_norm: 0.9999989712844525, iteration: 117475
loss: 1.042659044265747,grad_norm: 0.9999992800563656, iteration: 117476
loss: 1.0221688747406006,grad_norm: 0.9999992618493336, iteration: 117477
loss: 1.0369809865951538,grad_norm: 0.9996501174460817, iteration: 117478
loss: 0.9995486736297607,grad_norm: 0.9382065532659306, iteration: 117479
loss: 1.0004304647445679,grad_norm: 0.9999990637696303, iteration: 117480
loss: 0.9844793677330017,grad_norm: 0.9999990492421238, iteration: 117481
loss: 1.1054681539535522,grad_norm: 0.9999995064700602, iteration: 117482
loss: 1.0148813724517822,grad_norm: 0.9999990307571727, iteration: 117483
loss: 0.9925413131713867,grad_norm: 0.8427464330019724, iteration: 117484
loss: 1.006109356880188,grad_norm: 0.9999992206338703, iteration: 117485
loss: 0.9989029765129089,grad_norm: 0.9535315953696692, iteration: 117486
loss: 1.0217342376708984,grad_norm: 0.8780951563789597, iteration: 117487
loss: 1.1094378232955933,grad_norm: 0.9999996799399639, iteration: 117488
loss: 1.020986557006836,grad_norm: 0.9999994575328593, iteration: 117489
loss: 1.0527209043502808,grad_norm: 0.8418614579270389, iteration: 117490
loss: 1.0262924432754517,grad_norm: 0.8498329872339757, iteration: 117491
loss: 1.0537676811218262,grad_norm: 0.9999995065793919, iteration: 117492
loss: 1.021204948425293,grad_norm: 0.7849602543420864, iteration: 117493
loss: 0.9863237142562866,grad_norm: 0.9678777891989839, iteration: 117494
loss: 1.0195152759552002,grad_norm: 0.9206883814294927, iteration: 117495
loss: 0.9883323311805725,grad_norm: 0.9223920489582742, iteration: 117496
loss: 1.0519447326660156,grad_norm: 0.9809333191542441, iteration: 117497
loss: 1.0038352012634277,grad_norm: 0.99999913427125, iteration: 117498
loss: 1.0885004997253418,grad_norm: 0.9999994996782741, iteration: 117499
loss: 0.9805652499198914,grad_norm: 0.7654644187814793, iteration: 117500
loss: 0.9847943782806396,grad_norm: 0.9999990951072488, iteration: 117501
loss: 1.026376485824585,grad_norm: 0.9999995222840086, iteration: 117502
loss: 1.030455231666565,grad_norm: 0.9999991832060489, iteration: 117503
loss: 1.0084619522094727,grad_norm: 0.7825379028287122, iteration: 117504
loss: 0.9623002409934998,grad_norm: 0.8157634183952971, iteration: 117505
loss: 1.0116519927978516,grad_norm: 0.8972061248580715, iteration: 117506
loss: 1.0317940711975098,grad_norm: 0.9999992243822732, iteration: 117507
loss: 1.006338357925415,grad_norm: 0.894609769819612, iteration: 117508
loss: 1.029272437095642,grad_norm: 0.9515911681562497, iteration: 117509
loss: 1.0055298805236816,grad_norm: 0.7946657232458625, iteration: 117510
loss: 1.0127543210983276,grad_norm: 0.9393905311799203, iteration: 117511
loss: 0.9860751628875732,grad_norm: 0.999688584733129, iteration: 117512
loss: 0.996834397315979,grad_norm: 0.9999992739103654, iteration: 117513
loss: 1.1185667514801025,grad_norm: 0.99999957653134, iteration: 117514
loss: 1.0941365957260132,grad_norm: 0.9999993019519575, iteration: 117515
loss: 0.9745916128158569,grad_norm: 0.999999123782913, iteration: 117516
loss: 1.0138416290283203,grad_norm: 0.8298982522886301, iteration: 117517
loss: 0.9972134232521057,grad_norm: 0.9739528248182866, iteration: 117518
loss: 1.015125036239624,grad_norm: 0.9998503387442995, iteration: 117519
loss: 1.0161726474761963,grad_norm: 0.8578220690221114, iteration: 117520
loss: 1.0523982048034668,grad_norm: 0.9999998494569727, iteration: 117521
loss: 0.9840197563171387,grad_norm: 0.7787601364439741, iteration: 117522
loss: 1.03485107421875,grad_norm: 0.9999994656076482, iteration: 117523
loss: 1.0028307437896729,grad_norm: 0.9646045925960726, iteration: 117524
loss: 1.008628249168396,grad_norm: 0.9042527436830687, iteration: 117525
loss: 1.0110290050506592,grad_norm: 0.8090447793480741, iteration: 117526
loss: 1.01300847530365,grad_norm: 0.7917164146232434, iteration: 117527
loss: 1.0066295862197876,grad_norm: 0.8257037995311759, iteration: 117528
loss: 0.9859347939491272,grad_norm: 0.999999025437479, iteration: 117529
loss: 1.0058720111846924,grad_norm: 0.9499984773087923, iteration: 117530
loss: 1.0390089750289917,grad_norm: 0.9999995340855428, iteration: 117531
loss: 0.9577066898345947,grad_norm: 0.9999992360201307, iteration: 117532
loss: 1.061933159828186,grad_norm: 0.9430219485017336, iteration: 117533
loss: 1.003400206565857,grad_norm: 0.8308828440784453, iteration: 117534
loss: 0.9968624114990234,grad_norm: 0.9999992431805651, iteration: 117535
loss: 0.9876853227615356,grad_norm: 0.960063791901371, iteration: 117536
loss: 1.031933069229126,grad_norm: 0.9999992159270865, iteration: 117537
loss: 0.9644526243209839,grad_norm: 0.9999991808727784, iteration: 117538
loss: 0.9842668771743774,grad_norm: 0.85070357383359, iteration: 117539
loss: 1.0369595289230347,grad_norm: 0.9999991884065436, iteration: 117540
loss: 1.0038968324661255,grad_norm: 0.9208569857577343, iteration: 117541
loss: 1.0292414426803589,grad_norm: 0.9457165764566673, iteration: 117542
loss: 0.9660316705703735,grad_norm: 0.9411274468494044, iteration: 117543
loss: 1.0495110750198364,grad_norm: 0.9999994851188455, iteration: 117544
loss: 0.9777630567550659,grad_norm: 0.9320578872131967, iteration: 117545
loss: 1.0312296152114868,grad_norm: 0.9999990990253043, iteration: 117546
loss: 1.0761659145355225,grad_norm: 0.9999992264270754, iteration: 117547
loss: 1.0108014345169067,grad_norm: 0.9999990706999816, iteration: 117548
loss: 1.0165586471557617,grad_norm: 0.9999992600228873, iteration: 117549
loss: 1.0152790546417236,grad_norm: 0.999999099272591, iteration: 117550
loss: 1.0332129001617432,grad_norm: 0.9999996522814869, iteration: 117551
loss: 1.0245331525802612,grad_norm: 0.9999992313600091, iteration: 117552
loss: 1.0102300643920898,grad_norm: 0.963994815413192, iteration: 117553
loss: 1.233471393585205,grad_norm: 0.9999995703461545, iteration: 117554
loss: 1.0178632736206055,grad_norm: 0.9999991738995408, iteration: 117555
loss: 0.9906607270240784,grad_norm: 0.9009993056307403, iteration: 117556
loss: 0.9779365062713623,grad_norm: 0.9223775486116794, iteration: 117557
loss: 1.0136405229568481,grad_norm: 0.8914325931302902, iteration: 117558
loss: 0.9860504269599915,grad_norm: 0.9999990591792346, iteration: 117559
loss: 1.0073444843292236,grad_norm: 0.9541839624395315, iteration: 117560
loss: 0.979798436164856,grad_norm: 0.7923300401595439, iteration: 117561
loss: 1.0016833543777466,grad_norm: 0.8400879867784755, iteration: 117562
loss: 0.9979110956192017,grad_norm: 0.770434064530175, iteration: 117563
loss: 1.0047414302825928,grad_norm: 0.9999990777735168, iteration: 117564
loss: 1.0093438625335693,grad_norm: 0.9346889738217848, iteration: 117565
loss: 0.9981229901313782,grad_norm: 0.9311136522233013, iteration: 117566
loss: 1.018929362297058,grad_norm: 0.9091483737743568, iteration: 117567
loss: 1.0310239791870117,grad_norm: 0.9680675636869113, iteration: 117568
loss: 1.068131446838379,grad_norm: 0.9999996418890148, iteration: 117569
loss: 1.0159128904342651,grad_norm: 0.9999990685723253, iteration: 117570
loss: 0.9911567568778992,grad_norm: 0.9999992214245752, iteration: 117571
loss: 1.0059260129928589,grad_norm: 0.9999996304625227, iteration: 117572
loss: 1.026312232017517,grad_norm: 0.9999989892363309, iteration: 117573
loss: 1.0082881450653076,grad_norm: 0.8562111229681874, iteration: 117574
loss: 1.0075119733810425,grad_norm: 0.8764950163351414, iteration: 117575
loss: 1.0188283920288086,grad_norm: 0.9999991700259383, iteration: 117576
loss: 1.0002400875091553,grad_norm: 0.9362719653835512, iteration: 117577
loss: 1.0258674621582031,grad_norm: 0.7706677043662373, iteration: 117578
loss: 1.0354435443878174,grad_norm: 0.7954435603116274, iteration: 117579
loss: 0.9992424249649048,grad_norm: 0.9223171896278359, iteration: 117580
loss: 0.9742330312728882,grad_norm: 0.8532060922450166, iteration: 117581
loss: 1.0020949840545654,grad_norm: 0.8486528730117958, iteration: 117582
loss: 0.9850316047668457,grad_norm: 0.9999996474405793, iteration: 117583
loss: 1.1664588451385498,grad_norm: 0.9999990990900067, iteration: 117584
loss: 1.0363566875457764,grad_norm: 0.9662476626144955, iteration: 117585
loss: 1.0082615613937378,grad_norm: 0.9999993700182096, iteration: 117586
loss: 0.9843055009841919,grad_norm: 0.9999991395897164, iteration: 117587
loss: 0.9700344204902649,grad_norm: 0.9999992665541794, iteration: 117588
loss: 1.0073713064193726,grad_norm: 0.9999991656133385, iteration: 117589
loss: 0.9927479028701782,grad_norm: 0.9999992012528905, iteration: 117590
loss: 1.0030657052993774,grad_norm: 0.8910607799491129, iteration: 117591
loss: 1.0151731967926025,grad_norm: 0.8257568564821792, iteration: 117592
loss: 0.9876756072044373,grad_norm: 0.9391864435427588, iteration: 117593
loss: 0.996964693069458,grad_norm: 0.9999999470294443, iteration: 117594
loss: 1.0133137702941895,grad_norm: 0.9999993035301757, iteration: 117595
loss: 1.027012825012207,grad_norm: 0.9999998433728581, iteration: 117596
loss: 1.0101532936096191,grad_norm: 0.86354517072133, iteration: 117597
loss: 1.033126950263977,grad_norm: 0.999999518782285, iteration: 117598
loss: 1.014983057975769,grad_norm: 0.7494381164596357, iteration: 117599
loss: 0.9994496703147888,grad_norm: 0.9520246176842061, iteration: 117600
loss: 0.9963017702102661,grad_norm: 0.9621937521794959, iteration: 117601
loss: 1.0091866254806519,grad_norm: 0.9891685295069731, iteration: 117602
loss: 1.0217703580856323,grad_norm: 0.9999993450818713, iteration: 117603
loss: 0.9990475177764893,grad_norm: 0.9341415469589339, iteration: 117604
loss: 0.9696089029312134,grad_norm: 0.9999996377962335, iteration: 117605
loss: 1.0061553716659546,grad_norm: 0.8765614511172908, iteration: 117606
loss: 1.0041048526763916,grad_norm: 0.9999992478338765, iteration: 117607
loss: 0.9720665216445923,grad_norm: 0.825058727222654, iteration: 117608
loss: 1.05522620677948,grad_norm: 0.9999994999891857, iteration: 117609
loss: 0.9835926294326782,grad_norm: 0.8653668430502363, iteration: 117610
loss: 1.057848334312439,grad_norm: 1.0000000157324058, iteration: 117611
loss: 0.9944636821746826,grad_norm: 0.8788668709802328, iteration: 117612
loss: 1.0467662811279297,grad_norm: 0.9999995288255673, iteration: 117613
loss: 1.0518755912780762,grad_norm: 0.9797863981875172, iteration: 117614
loss: 1.0105630159378052,grad_norm: 0.9999998386255217, iteration: 117615
loss: 0.9988689422607422,grad_norm: 0.9999995258605099, iteration: 117616
loss: 0.9995746612548828,grad_norm: 0.9677037055597879, iteration: 117617
loss: 1.087813377380371,grad_norm: 0.999999319602843, iteration: 117618
loss: 0.9790892004966736,grad_norm: 0.9978520163663183, iteration: 117619
loss: 1.052377700805664,grad_norm: 0.9999999140598614, iteration: 117620
loss: 0.9925492405891418,grad_norm: 0.9999989575814633, iteration: 117621
loss: 0.9776450395584106,grad_norm: 0.9212497882553067, iteration: 117622
loss: 0.9967675805091858,grad_norm: 0.9000566163712916, iteration: 117623
loss: 1.0167014598846436,grad_norm: 0.9908473870434138, iteration: 117624
loss: 1.0085960626602173,grad_norm: 0.9999992831634649, iteration: 117625
loss: 0.9969783425331116,grad_norm: 0.8086287194407525, iteration: 117626
loss: 1.0650280714035034,grad_norm: 0.9999993668500328, iteration: 117627
loss: 1.0174369812011719,grad_norm: 0.9999993223149665, iteration: 117628
loss: 1.0053297281265259,grad_norm: 0.9898647368623837, iteration: 117629
loss: 1.00143301486969,grad_norm: 0.9999992024400582, iteration: 117630
loss: 1.0774027109146118,grad_norm: 0.999999225677358, iteration: 117631
loss: 1.0206266641616821,grad_norm: 0.8630508915049866, iteration: 117632
loss: 1.0046298503875732,grad_norm: 0.9999992737631537, iteration: 117633
loss: 1.0313712358474731,grad_norm: 0.8809139086415138, iteration: 117634
loss: 0.9981865882873535,grad_norm: 0.9361782094181614, iteration: 117635
loss: 1.0268282890319824,grad_norm: 0.9999989071095032, iteration: 117636
loss: 1.1155987977981567,grad_norm: 0.9999996797619466, iteration: 117637
loss: 0.9898340702056885,grad_norm: 0.9999990521249873, iteration: 117638
loss: 0.9727751016616821,grad_norm: 0.9999995273571805, iteration: 117639
loss: 1.0218051671981812,grad_norm: 0.8744100823248695, iteration: 117640
loss: 0.9830302000045776,grad_norm: 0.9999990427510955, iteration: 117641
loss: 1.0460032224655151,grad_norm: 0.9999998196122588, iteration: 117642
loss: 1.0571355819702148,grad_norm: 0.9486663109935412, iteration: 117643
loss: 1.035179615020752,grad_norm: 0.9999992282379633, iteration: 117644
loss: 1.0315942764282227,grad_norm: 0.7903096798920433, iteration: 117645
loss: 1.013300895690918,grad_norm: 0.9425014348413485, iteration: 117646
loss: 1.0178066492080688,grad_norm: 0.9999994123729546, iteration: 117647
loss: 0.999849796295166,grad_norm: 0.9999991454563167, iteration: 117648
loss: 0.9406472444534302,grad_norm: 0.8855131735021451, iteration: 117649
loss: 0.9992183446884155,grad_norm: 0.930565163872836, iteration: 117650
loss: 1.0223884582519531,grad_norm: 0.9999994186103888, iteration: 117651
loss: 1.0054125785827637,grad_norm: 0.9366378994045492, iteration: 117652
loss: 1.0254226922988892,grad_norm: 0.9648451137160907, iteration: 117653
loss: 1.044011116027832,grad_norm: 0.9999996443687293, iteration: 117654
loss: 1.0513403415679932,grad_norm: 0.9999993815005552, iteration: 117655
loss: 1.0049606561660767,grad_norm: 0.9999990941624277, iteration: 117656
loss: 0.9791513085365295,grad_norm: 0.8108321707824617, iteration: 117657
loss: 1.0030838251113892,grad_norm: 0.8622874822184778, iteration: 117658
loss: 1.007627010345459,grad_norm: 0.9999991084058485, iteration: 117659
loss: 1.0251257419586182,grad_norm: 0.9999992652806673, iteration: 117660
loss: 1.0016452074050903,grad_norm: 0.8391865907285014, iteration: 117661
loss: 1.0378974676132202,grad_norm: 0.9999993770899388, iteration: 117662
loss: 1.0314921140670776,grad_norm: 0.9999990606842222, iteration: 117663
loss: 1.0487072467803955,grad_norm: 0.9999997724871277, iteration: 117664
loss: 1.0263313055038452,grad_norm: 0.9999994501969778, iteration: 117665
loss: 0.9926512837409973,grad_norm: 0.870344341387747, iteration: 117666
loss: 0.9929919838905334,grad_norm: 0.9944145897536362, iteration: 117667
loss: 1.0109729766845703,grad_norm: 0.9411579738366138, iteration: 117668
loss: 1.0102624893188477,grad_norm: 0.9373030347349208, iteration: 117669
loss: 1.107222080230713,grad_norm: 0.9999993976710703, iteration: 117670
loss: 0.9923609495162964,grad_norm: 0.8658954701812889, iteration: 117671
loss: 1.0537036657333374,grad_norm: 0.9999996408231756, iteration: 117672
loss: 1.013344645500183,grad_norm: 0.9999991670219275, iteration: 117673
loss: 1.0389683246612549,grad_norm: 0.9999994437886727, iteration: 117674
loss: 1.0179890394210815,grad_norm: 0.9761661561942527, iteration: 117675
loss: 0.9935543537139893,grad_norm: 0.999999071661257, iteration: 117676
loss: 1.0187233686447144,grad_norm: 0.9999989696917111, iteration: 117677
loss: 0.9476876258850098,grad_norm: 0.9999991476531324, iteration: 117678
loss: 1.0088660717010498,grad_norm: 0.835769444378672, iteration: 117679
loss: 0.9787375926971436,grad_norm: 0.9768231414510846, iteration: 117680
loss: 1.001816749572754,grad_norm: 0.9884719633744032, iteration: 117681
loss: 0.9946784377098083,grad_norm: 0.9999989948346442, iteration: 117682
loss: 1.042080044746399,grad_norm: 0.9999997345762723, iteration: 117683
loss: 1.0066907405853271,grad_norm: 0.8710715418707086, iteration: 117684
loss: 1.0099180936813354,grad_norm: 0.9999990458847643, iteration: 117685
loss: 0.9986142516136169,grad_norm: 0.999999138702093, iteration: 117686
loss: 1.0077168941497803,grad_norm: 0.9999992331993492, iteration: 117687
loss: 1.0685198307037354,grad_norm: 0.9999992115610105, iteration: 117688
loss: 1.0132114887237549,grad_norm: 0.9999991796626293, iteration: 117689
loss: 0.990512490272522,grad_norm: 0.8905040484017688, iteration: 117690
loss: 0.9987702965736389,grad_norm: 0.8745776911615414, iteration: 117691
loss: 1.0031968355178833,grad_norm: 0.9999990971929019, iteration: 117692
loss: 0.9995163679122925,grad_norm: 0.9999991844332652, iteration: 117693
loss: 1.0252387523651123,grad_norm: 0.9613261399627024, iteration: 117694
loss: 1.0237964391708374,grad_norm: 0.8107993717948881, iteration: 117695
loss: 0.9932997226715088,grad_norm: 0.9447434130639302, iteration: 117696
loss: 0.980975329875946,grad_norm: 0.8889620401256039, iteration: 117697
loss: 1.0092555284500122,grad_norm: 0.8708106832852218, iteration: 117698
loss: 1.036880373954773,grad_norm: 0.9113473368191173, iteration: 117699
loss: 1.046116828918457,grad_norm: 0.9999997895618679, iteration: 117700
loss: 1.0116841793060303,grad_norm: 0.9999991226985105, iteration: 117701
loss: 1.014022946357727,grad_norm: 0.9395427022430431, iteration: 117702
loss: 1.0136231184005737,grad_norm: 0.8720380539473407, iteration: 117703
loss: 1.065635085105896,grad_norm: 0.9999992458313395, iteration: 117704
loss: 1.0391775369644165,grad_norm: 0.9999998255245641, iteration: 117705
loss: 0.9726293087005615,grad_norm: 0.7784508334030886, iteration: 117706
loss: 0.9690894484519958,grad_norm: 0.8416839248299873, iteration: 117707
loss: 1.0295342206954956,grad_norm: 0.9142676661873493, iteration: 117708
loss: 0.9769728779792786,grad_norm: 0.8822007628104734, iteration: 117709
loss: 0.955171525478363,grad_norm: 0.9999991009038637, iteration: 117710
loss: 0.9754985570907593,grad_norm: 0.8732593380875158, iteration: 117711
loss: 1.0040262937545776,grad_norm: 0.7946244230168376, iteration: 117712
loss: 1.0125888586044312,grad_norm: 0.9159243913134297, iteration: 117713
loss: 1.0365746021270752,grad_norm: 0.986311313759408, iteration: 117714
loss: 1.0155850648880005,grad_norm: 0.879816392582777, iteration: 117715
loss: 0.9672009348869324,grad_norm: 0.9025085579860052, iteration: 117716
loss: 1.0489343404769897,grad_norm: 0.8232628098149077, iteration: 117717
loss: 0.9734553098678589,grad_norm: 0.9744966742722633, iteration: 117718
loss: 1.0433896780014038,grad_norm: 0.9999996005328134, iteration: 117719
loss: 1.0233999490737915,grad_norm: 0.9999992122338124, iteration: 117720
loss: 1.0101525783538818,grad_norm: 0.9999990344793958, iteration: 117721
loss: 0.9999517202377319,grad_norm: 0.8357753591217484, iteration: 117722
loss: 1.032626748085022,grad_norm: 0.8565580001789813, iteration: 117723
loss: 1.0652518272399902,grad_norm: 0.9999990821446592, iteration: 117724
loss: 0.9824976325035095,grad_norm: 0.8700589504557494, iteration: 117725
loss: 1.0407497882843018,grad_norm: 0.9999991903780386, iteration: 117726
loss: 1.0438063144683838,grad_norm: 0.9999998614965345, iteration: 117727
loss: 1.0332831144332886,grad_norm: 0.9999992063599571, iteration: 117728
loss: 1.0261386632919312,grad_norm: 0.8898969103748053, iteration: 117729
loss: 0.9819102883338928,grad_norm: 0.9038416305820328, iteration: 117730
loss: 0.9912668466567993,grad_norm: 0.9999993231565674, iteration: 117731
loss: 1.0195826292037964,grad_norm: 0.8983486286503926, iteration: 117732
loss: 0.9591475129127502,grad_norm: 0.981664677394874, iteration: 117733
loss: 0.982930600643158,grad_norm: 0.8728774392616052, iteration: 117734
loss: 1.0150712728500366,grad_norm: 0.9999993661057591, iteration: 117735
loss: 1.0882551670074463,grad_norm: 0.9065310342123352, iteration: 117736
loss: 1.0268718004226685,grad_norm: 0.9182551910184008, iteration: 117737
loss: 0.9685766696929932,grad_norm: 0.9928793847678563, iteration: 117738
loss: 1.0393712520599365,grad_norm: 0.9999993391942852, iteration: 117739
loss: 1.0020955801010132,grad_norm: 0.9464509168597968, iteration: 117740
loss: 0.9806588292121887,grad_norm: 0.8376933512494654, iteration: 117741
loss: 1.0475687980651855,grad_norm: 0.999999121732887, iteration: 117742
loss: 1.0119298696517944,grad_norm: 0.9971738631045465, iteration: 117743
loss: 1.0435551404953003,grad_norm: 0.9999995385607432, iteration: 117744
loss: 0.9911039471626282,grad_norm: 0.8867179543503235, iteration: 117745
loss: 1.0206620693206787,grad_norm: 0.8692844120336486, iteration: 117746
loss: 1.0238136053085327,grad_norm: 0.9982503930715123, iteration: 117747
loss: 1.0151917934417725,grad_norm: 0.963610527655447, iteration: 117748
loss: 1.0378167629241943,grad_norm: 0.9999996921218536, iteration: 117749
loss: 0.9884455800056458,grad_norm: 0.9638006406543224, iteration: 117750
loss: 0.9795755743980408,grad_norm: 0.9529915296846466, iteration: 117751
loss: 1.069899559020996,grad_norm: 0.9999998454475992, iteration: 117752
loss: 1.0804458856582642,grad_norm: 0.9999991680579031, iteration: 117753
loss: 1.0570160150527954,grad_norm: 0.9999999492045425, iteration: 117754
loss: 0.9873371124267578,grad_norm: 0.9999990201418105, iteration: 117755
loss: 1.0233372449874878,grad_norm: 0.9999990554073713, iteration: 117756
loss: 0.9992048740386963,grad_norm: 0.8370538476635637, iteration: 117757
loss: 1.1218887567520142,grad_norm: 0.9999994784126474, iteration: 117758
loss: 1.077406883239746,grad_norm: 0.9999995257681488, iteration: 117759
loss: 0.9931678175926208,grad_norm: 0.8733850225878357, iteration: 117760
loss: 1.0536102056503296,grad_norm: 0.9999991003701547, iteration: 117761
loss: 1.0172903537750244,grad_norm: 0.9999993165335818, iteration: 117762
loss: 1.037886142730713,grad_norm: 0.999999325246468, iteration: 117763
loss: 1.0225716829299927,grad_norm: 0.9999994164510979, iteration: 117764
loss: 0.9552437663078308,grad_norm: 0.9999990739574945, iteration: 117765
loss: 1.0101078748703003,grad_norm: 0.8315202584757782, iteration: 117766
loss: 1.054329752922058,grad_norm: 0.9999990485803967, iteration: 117767
loss: 0.9989227652549744,grad_norm: 0.8818147145963006, iteration: 117768
loss: 1.0570788383483887,grad_norm: 0.9999991699297914, iteration: 117769
loss: 0.9702783226966858,grad_norm: 0.9999990517041949, iteration: 117770
loss: 1.0165154933929443,grad_norm: 0.9364291833844007, iteration: 117771
loss: 1.0092657804489136,grad_norm: 0.8249503277089522, iteration: 117772
loss: 1.0129767656326294,grad_norm: 0.9999991704797412, iteration: 117773
loss: 1.015030026435852,grad_norm: 0.9999998703735204, iteration: 117774
loss: 1.0204918384552002,grad_norm: 0.9999993770652378, iteration: 117775
loss: 1.017648696899414,grad_norm: 0.8248124705797836, iteration: 117776
loss: 0.96080482006073,grad_norm: 0.9999991142363536, iteration: 117777
loss: 0.9999285340309143,grad_norm: 0.7862052018210114, iteration: 117778
loss: 1.0229018926620483,grad_norm: 0.9150757354349376, iteration: 117779
loss: 0.9598223567008972,grad_norm: 0.8640331703244223, iteration: 117780
loss: 0.9930840134620667,grad_norm: 0.9930015847904915, iteration: 117781
loss: 1.0190085172653198,grad_norm: 0.9285615773195887, iteration: 117782
loss: 1.0161375999450684,grad_norm: 0.9999990630844774, iteration: 117783
loss: 1.0248005390167236,grad_norm: 0.8067428780427757, iteration: 117784
loss: 0.9735487103462219,grad_norm: 0.8953957380134336, iteration: 117785
loss: 1.1084831953048706,grad_norm: 0.9999999220432569, iteration: 117786
loss: 0.9894189834594727,grad_norm: 0.9999992291773999, iteration: 117787
loss: 0.9912482500076294,grad_norm: 0.9999990879307431, iteration: 117788
loss: 0.9890467524528503,grad_norm: 0.9317206793740491, iteration: 117789
loss: 1.0341466665267944,grad_norm: 0.8346382637809422, iteration: 117790
loss: 1.0656795501708984,grad_norm: 0.9999997191473349, iteration: 117791
loss: 1.0995252132415771,grad_norm: 0.9999992800838019, iteration: 117792
loss: 0.9854529500007629,grad_norm: 0.8132394361140625, iteration: 117793
loss: 0.9853090643882751,grad_norm: 0.9301915567630088, iteration: 117794
loss: 0.9842157363891602,grad_norm: 0.9999991260939616, iteration: 117795
loss: 1.0168895721435547,grad_norm: 0.8057316483013425, iteration: 117796
loss: 1.0509872436523438,grad_norm: 0.9999993215380332, iteration: 117797
loss: 1.0380322933197021,grad_norm: 0.937182991146852, iteration: 117798
loss: 1.0338586568832397,grad_norm: 0.9999991159330509, iteration: 117799
loss: 1.0139644145965576,grad_norm: 0.874965362253453, iteration: 117800
loss: 0.9935718178749084,grad_norm: 0.9999990256239623, iteration: 117801
loss: 1.0051101446151733,grad_norm: 0.9999992038763966, iteration: 117802
loss: 1.017750859260559,grad_norm: 0.9999993896081545, iteration: 117803
loss: 1.0439075231552124,grad_norm: 0.9999993934723964, iteration: 117804
loss: 1.0090270042419434,grad_norm: 0.8807202364641801, iteration: 117805
loss: 1.216368317604065,grad_norm: 0.999999844322492, iteration: 117806
loss: 1.1012808084487915,grad_norm: 0.9470120798197438, iteration: 117807
loss: 0.9632554054260254,grad_norm: 0.874911240999977, iteration: 117808
loss: 1.0158190727233887,grad_norm: 0.9330772901936395, iteration: 117809
loss: 1.0742392539978027,grad_norm: 0.9486847960763923, iteration: 117810
loss: 1.013253092765808,grad_norm: 0.9216553834079474, iteration: 117811
loss: 1.0629398822784424,grad_norm: 0.9999993224631907, iteration: 117812
loss: 1.0652291774749756,grad_norm: 0.9999994735111947, iteration: 117813
loss: 1.0152157545089722,grad_norm: 0.9999995207775925, iteration: 117814
loss: 0.967638373374939,grad_norm: 0.869836416107342, iteration: 117815
loss: 1.0166288614273071,grad_norm: 0.9999991241296041, iteration: 117816
loss: 1.0104031562805176,grad_norm: 0.9999991189722109, iteration: 117817
loss: 1.0307294130325317,grad_norm: 0.9999990649499997, iteration: 117818
loss: 1.0013114213943481,grad_norm: 0.8648454926282956, iteration: 117819
loss: 0.9954487681388855,grad_norm: 0.7804379448586359, iteration: 117820
loss: 0.9960139393806458,grad_norm: 0.9473337910868544, iteration: 117821
loss: 1.0185335874557495,grad_norm: 0.9302559565966035, iteration: 117822
loss: 0.9947370886802673,grad_norm: 0.8107675216448688, iteration: 117823
loss: 1.0481200218200684,grad_norm: 0.9999990961083711, iteration: 117824
loss: 1.0087569952011108,grad_norm: 0.977251382759087, iteration: 117825
loss: 1.0252277851104736,grad_norm: 0.798632569748454, iteration: 117826
loss: 1.0063472986221313,grad_norm: 0.9608769489782797, iteration: 117827
loss: 1.0013998746871948,grad_norm: 0.8831468708412199, iteration: 117828
loss: 0.9884670972824097,grad_norm: 0.752543974477141, iteration: 117829
loss: 1.1039786338806152,grad_norm: 0.9999993915966151, iteration: 117830
loss: 1.0360822677612305,grad_norm: 0.9537088961802509, iteration: 117831
loss: 1.00058114528656,grad_norm: 0.9999989714531172, iteration: 117832
loss: 0.9939581155776978,grad_norm: 0.9375033586266439, iteration: 117833
loss: 1.0200083255767822,grad_norm: 0.8580403426267706, iteration: 117834
loss: 1.0369174480438232,grad_norm: 0.9344546435856176, iteration: 117835
loss: 1.124729037284851,grad_norm: 0.9999992222481158, iteration: 117836
loss: 1.019560694694519,grad_norm: 0.9484152341044796, iteration: 117837
loss: 1.0057590007781982,grad_norm: 0.9999991802368953, iteration: 117838
loss: 1.0655449628829956,grad_norm: 0.9999998211718666, iteration: 117839
loss: 1.0213934183120728,grad_norm: 0.9999989699948403, iteration: 117840
loss: 1.0022567510604858,grad_norm: 0.8255823728051537, iteration: 117841
loss: 1.0750517845153809,grad_norm: 0.9999991576060047, iteration: 117842
loss: 1.0114178657531738,grad_norm: 0.7915447699690797, iteration: 117843
loss: 0.9940173029899597,grad_norm: 0.9574772582315793, iteration: 117844
loss: 1.0104120969772339,grad_norm: 0.9999998323272807, iteration: 117845
loss: 0.9955546259880066,grad_norm: 0.999999050307274, iteration: 117846
loss: 1.1916284561157227,grad_norm: 0.9999995373692451, iteration: 117847
loss: 1.0763870477676392,grad_norm: 0.9999990690587044, iteration: 117848
loss: 0.9952899813652039,grad_norm: 0.9999991253198867, iteration: 117849
loss: 1.220918893814087,grad_norm: 0.9999995676774702, iteration: 117850
loss: 1.0044149160385132,grad_norm: 0.9999999825193164, iteration: 117851
loss: 1.0043641328811646,grad_norm: 0.9343721303836439, iteration: 117852
loss: 0.9921436309814453,grad_norm: 0.8811076246805145, iteration: 117853
loss: 1.0470002889633179,grad_norm: 0.9999991710248528, iteration: 117854
loss: 1.0720187425613403,grad_norm: 0.9999999226352038, iteration: 117855
loss: 1.0071890354156494,grad_norm: 0.9999992113009745, iteration: 117856
loss: 0.9856545329093933,grad_norm: 0.9999990910981349, iteration: 117857
loss: 1.0272026062011719,grad_norm: 0.9999991068991895, iteration: 117858
loss: 1.041179895401001,grad_norm: 0.9999991352064961, iteration: 117859
loss: 1.0066940784454346,grad_norm: 0.9999991968952844, iteration: 117860
loss: 1.0093051195144653,grad_norm: 0.9999996291471359, iteration: 117861
loss: 1.0139222145080566,grad_norm: 0.999999147943394, iteration: 117862
loss: 1.047369122505188,grad_norm: 0.9999995156438152, iteration: 117863
loss: 0.9988454580307007,grad_norm: 0.999999183198823, iteration: 117864
loss: 0.9959500432014465,grad_norm: 0.9999992577251127, iteration: 117865
loss: 0.9945995211601257,grad_norm: 0.9618251730821354, iteration: 117866
loss: 1.009204387664795,grad_norm: 0.999999159610338, iteration: 117867
loss: 1.0016127824783325,grad_norm: 0.9999991179265089, iteration: 117868
loss: 1.0066853761672974,grad_norm: 0.999999547451588, iteration: 117869
loss: 0.9794514179229736,grad_norm: 0.9212194700722005, iteration: 117870
loss: 0.9987550377845764,grad_norm: 0.8964094073523514, iteration: 117871
loss: 0.9843339920043945,grad_norm: 0.7162208342844227, iteration: 117872
loss: 0.9989085793495178,grad_norm: 0.885834944937312, iteration: 117873
loss: 0.9474409222602844,grad_norm: 0.8815368824334859, iteration: 117874
loss: 0.9817953705787659,grad_norm: 0.9624792514355149, iteration: 117875
loss: 1.0209003686904907,grad_norm: 0.999999213488442, iteration: 117876
loss: 0.991632342338562,grad_norm: 0.9530543914694954, iteration: 117877
loss: 1.016781210899353,grad_norm: 0.9999992541695947, iteration: 117878
loss: 0.9542135000228882,grad_norm: 0.80197377587847, iteration: 117879
loss: 0.9811316728591919,grad_norm: 0.9203669235948446, iteration: 117880
loss: 1.0159633159637451,grad_norm: 0.9994582679163989, iteration: 117881
loss: 0.9928415417671204,grad_norm: 0.7378049734295894, iteration: 117882
loss: 0.9808055758476257,grad_norm: 0.9890670684492024, iteration: 117883
loss: 1.0147053003311157,grad_norm: 0.7773156756817153, iteration: 117884
loss: 1.03135347366333,grad_norm: 0.9999991027066919, iteration: 117885
loss: 0.9908973574638367,grad_norm: 0.9679168205630997, iteration: 117886
loss: 1.0038800239562988,grad_norm: 0.9999990899925351, iteration: 117887
loss: 1.0027529001235962,grad_norm: 0.9999992913622513, iteration: 117888
loss: 1.0744949579238892,grad_norm: 0.9999994662995896, iteration: 117889
loss: 0.9697619676589966,grad_norm: 0.999999125697931, iteration: 117890
loss: 0.9936972260475159,grad_norm: 0.9091948575597263, iteration: 117891
loss: 1.0428991317749023,grad_norm: 0.7642648908170202, iteration: 117892
loss: 0.9884554147720337,grad_norm: 0.8840839321429401, iteration: 117893
loss: 1.022760272026062,grad_norm: 0.9397650991831873, iteration: 117894
loss: 1.0066606998443604,grad_norm: 0.9999991005225137, iteration: 117895
loss: 0.9863148927688599,grad_norm: 0.9340848961748064, iteration: 117896
loss: 0.9872247576713562,grad_norm: 0.9999999442340539, iteration: 117897
loss: 1.007638692855835,grad_norm: 0.9999990357806352, iteration: 117898
loss: 1.0409510135650635,grad_norm: 0.9999992905489269, iteration: 117899
loss: 0.992905855178833,grad_norm: 0.8703637640638273, iteration: 117900
loss: 1.0222597122192383,grad_norm: 0.9765005487183108, iteration: 117901
loss: 1.0295153856277466,grad_norm: 0.9999994988718409, iteration: 117902
loss: 0.9717687964439392,grad_norm: 0.8166042622584285, iteration: 117903
loss: 1.0106709003448486,grad_norm: 0.9999991609688978, iteration: 117904
loss: 0.9902459979057312,grad_norm: 0.8593465742365362, iteration: 117905
loss: 1.040778398513794,grad_norm: 0.999999354731161, iteration: 117906
loss: 0.9949060678482056,grad_norm: 0.9031106579533963, iteration: 117907
loss: 0.9472212791442871,grad_norm: 0.9999992468414076, iteration: 117908
loss: 1.06183660030365,grad_norm: 0.9999991010456389, iteration: 117909
loss: 1.0716617107391357,grad_norm: 0.9729720634894309, iteration: 117910
loss: 0.9975137710571289,grad_norm: 0.9999994389472104, iteration: 117911
loss: 1.0213837623596191,grad_norm: 0.8316029786587531, iteration: 117912
loss: 1.050257682800293,grad_norm: 0.999999486918005, iteration: 117913
loss: 1.0647811889648438,grad_norm: 0.7855662120378827, iteration: 117914
loss: 1.0186995267868042,grad_norm: 0.99999961440625, iteration: 117915
loss: 1.0199631452560425,grad_norm: 0.9999992194428222, iteration: 117916
loss: 0.9793704152107239,grad_norm: 0.8312014777910144, iteration: 117917
loss: 1.0061566829681396,grad_norm: 0.9999991573415756, iteration: 117918
loss: 1.101837158203125,grad_norm: 0.9999998907222473, iteration: 117919
loss: 0.9847259521484375,grad_norm: 0.99999923649028, iteration: 117920
loss: 1.0521219968795776,grad_norm: 0.9999992452127549, iteration: 117921
loss: 1.007388949394226,grad_norm: 0.8208699784519603, iteration: 117922
loss: 0.986015260219574,grad_norm: 0.8922588930688715, iteration: 117923
loss: 1.1416093111038208,grad_norm: 0.9999999408146416, iteration: 117924
loss: 0.986574649810791,grad_norm: 0.9999995124419508, iteration: 117925
loss: 0.9733927249908447,grad_norm: 0.8555292183507429, iteration: 117926
loss: 0.9918060302734375,grad_norm: 0.9778897332486328, iteration: 117927
loss: 1.0028979778289795,grad_norm: 0.9999992434314049, iteration: 117928
loss: 1.0021535158157349,grad_norm: 0.9999990877772278, iteration: 117929
loss: 1.0061365365982056,grad_norm: 0.9999992941548557, iteration: 117930
loss: 0.9934009313583374,grad_norm: 0.9999995360942747, iteration: 117931
loss: 1.0692641735076904,grad_norm: 0.9999992805540528, iteration: 117932
loss: 1.0140339136123657,grad_norm: 0.8802285636802086, iteration: 117933
loss: 0.9756094217300415,grad_norm: 0.8395007381243766, iteration: 117934
loss: 1.0118215084075928,grad_norm: 0.9999990889321421, iteration: 117935
loss: 1.0414220094680786,grad_norm: 0.9999989734235573, iteration: 117936
loss: 1.0999211072921753,grad_norm: 0.9999998929414404, iteration: 117937
loss: 0.98954176902771,grad_norm: 0.8620494941386633, iteration: 117938
loss: 1.0115104913711548,grad_norm: 0.9999997585874681, iteration: 117939
loss: 0.9856323003768921,grad_norm: 0.962856298054508, iteration: 117940
loss: 0.9803214073181152,grad_norm: 0.7208811682207573, iteration: 117941
loss: 0.9664376974105835,grad_norm: 0.8925990906277427, iteration: 117942
loss: 0.9706225991249084,grad_norm: 0.8644490823715812, iteration: 117943
loss: 1.004482388496399,grad_norm: 0.8110059975717228, iteration: 117944
loss: 1.0148411989212036,grad_norm: 0.9999995982282983, iteration: 117945
loss: 1.0096759796142578,grad_norm: 0.8386601934490788, iteration: 117946
loss: 1.0042797327041626,grad_norm: 0.9999991509851305, iteration: 117947
loss: 1.261330246925354,grad_norm: 0.9999998236793979, iteration: 117948
loss: 1.074593186378479,grad_norm: 0.999999061181211, iteration: 117949
loss: 1.029348611831665,grad_norm: 0.9565807835389163, iteration: 117950
loss: 1.0066417455673218,grad_norm: 0.9999991112434231, iteration: 117951
loss: 1.1109446287155151,grad_norm: 0.9999992616689235, iteration: 117952
loss: 0.9809800982475281,grad_norm: 0.8089804514704239, iteration: 117953
loss: 1.009534478187561,grad_norm: 0.9784289513063987, iteration: 117954
loss: 1.0171127319335938,grad_norm: 0.9999990370078145, iteration: 117955
loss: 1.0050467252731323,grad_norm: 0.9999991163230832, iteration: 117956
loss: 1.0208204984664917,grad_norm: 0.9340388920743365, iteration: 117957
loss: 1.0343908071517944,grad_norm: 0.9999991389541725, iteration: 117958
loss: 1.0563924312591553,grad_norm: 0.864639129680011, iteration: 117959
loss: 1.0012608766555786,grad_norm: 0.8016028286024343, iteration: 117960
loss: 1.0108387470245361,grad_norm: 0.9245561860661586, iteration: 117961
loss: 1.0234267711639404,grad_norm: 0.796213409211488, iteration: 117962
loss: 1.0658289194107056,grad_norm: 0.999999115900277, iteration: 117963
loss: 1.0471234321594238,grad_norm: 0.9999993326706991, iteration: 117964
loss: 0.97878497838974,grad_norm: 0.9999990939388748, iteration: 117965
loss: 0.9783299565315247,grad_norm: 0.8907844111697115, iteration: 117966
loss: 1.0208802223205566,grad_norm: 0.9999992109477546, iteration: 117967
loss: 1.0159611701965332,grad_norm: 0.9650108528701369, iteration: 117968
loss: 1.0558075904846191,grad_norm: 0.9999993118286169, iteration: 117969
loss: 1.0237623453140259,grad_norm: 0.7855491121746853, iteration: 117970
loss: 0.9849724173545837,grad_norm: 0.9999991758365702, iteration: 117971
loss: 1.0704182386398315,grad_norm: 0.9999992162794817, iteration: 117972
loss: 0.9930475354194641,grad_norm: 0.798388997745434, iteration: 117973
loss: 1.0001214742660522,grad_norm: 0.7682486595834302, iteration: 117974
loss: 0.9966576099395752,grad_norm: 0.9999992540734678, iteration: 117975
loss: 1.056670904159546,grad_norm: 0.9999994293567098, iteration: 117976
loss: 1.0128132104873657,grad_norm: 0.9999995570690853, iteration: 117977
loss: 1.031507968902588,grad_norm: 0.7936725525655834, iteration: 117978
loss: 1.030014157295227,grad_norm: 0.9999997349142432, iteration: 117979
loss: 1.013944387435913,grad_norm: 0.9999995931324351, iteration: 117980
loss: 0.9952564239501953,grad_norm: 0.9947487266684993, iteration: 117981
loss: 1.0025941133499146,grad_norm: 0.9999989518809701, iteration: 117982
loss: 1.0556585788726807,grad_norm: 0.9999993857302358, iteration: 117983
loss: 1.1942781209945679,grad_norm: 0.9999996981995464, iteration: 117984
loss: 1.033407211303711,grad_norm: 0.9457698537486833, iteration: 117985
loss: 1.035536766052246,grad_norm: 0.7948693253334601, iteration: 117986
loss: 1.1021430492401123,grad_norm: 0.9999998100554048, iteration: 117987
loss: 1.0696934461593628,grad_norm: 0.9999993873372358, iteration: 117988
loss: 1.0078872442245483,grad_norm: 0.9999992388541961, iteration: 117989
loss: 1.008292555809021,grad_norm: 0.9050703845051234, iteration: 117990
loss: 1.0073601007461548,grad_norm: 0.9999990475645884, iteration: 117991
loss: 0.9607239961624146,grad_norm: 0.837559623280526, iteration: 117992
loss: 0.9848169088363647,grad_norm: 0.9999994146480327, iteration: 117993
loss: 0.9982038736343384,grad_norm: 0.8044200518354657, iteration: 117994
loss: 1.0849356651306152,grad_norm: 0.9999993815952835, iteration: 117995
loss: 1.042853832244873,grad_norm: 0.8755257876261588, iteration: 117996
loss: 1.0754367113113403,grad_norm: 0.9292894232378495, iteration: 117997
loss: 1.089714527130127,grad_norm: 0.9999998311559687, iteration: 117998
loss: 1.143588662147522,grad_norm: 0.9999991692411098, iteration: 117999
loss: 1.0756089687347412,grad_norm: 0.9999997290527367, iteration: 118000
loss: 1.074855089187622,grad_norm: 0.9999992445184716, iteration: 118001
loss: 1.1469074487686157,grad_norm: 0.9906476591652414, iteration: 118002
loss: 1.2108315229415894,grad_norm: 0.9999998583841606, iteration: 118003
loss: 1.084466576576233,grad_norm: 0.9999992175609991, iteration: 118004
loss: 1.486966609954834,grad_norm: 0.9999998670110977, iteration: 118005
loss: 1.1169606447219849,grad_norm: 0.9999993863006371, iteration: 118006
loss: 1.1137516498565674,grad_norm: 0.9604874858211752, iteration: 118007
loss: 1.0352203845977783,grad_norm: 0.8988713314711849, iteration: 118008
loss: 1.1275922060012817,grad_norm: 0.9999996460484205, iteration: 118009
loss: 1.0865797996520996,grad_norm: 0.8810708562634403, iteration: 118010
loss: 1.1388360261917114,grad_norm: 0.9999999261404997, iteration: 118011
loss: 1.1616190671920776,grad_norm: 0.9999992334258845, iteration: 118012
loss: 1.0724211931228638,grad_norm: 0.9999994316007372, iteration: 118013
loss: 1.0633081197738647,grad_norm: 0.9999992838691072, iteration: 118014
loss: 0.9964857697486877,grad_norm: 0.899885139797684, iteration: 118015
loss: 1.0557284355163574,grad_norm: 0.9999993249681317, iteration: 118016
loss: 1.0141282081604004,grad_norm: 0.9924282519394468, iteration: 118017
loss: 1.0074076652526855,grad_norm: 0.9999992367596923, iteration: 118018
loss: 1.082927942276001,grad_norm: 0.9999993631102649, iteration: 118019
loss: 1.046554684638977,grad_norm: 0.999999417834113, iteration: 118020
loss: 1.0588500499725342,grad_norm: 0.9798708016066043, iteration: 118021
loss: 0.9892386198043823,grad_norm: 0.9123749167147447, iteration: 118022
loss: 1.0566661357879639,grad_norm: 0.9999994819654079, iteration: 118023
loss: 1.3627480268478394,grad_norm: 0.9999997965970915, iteration: 118024
loss: 1.2170556783676147,grad_norm: 0.9999992960715971, iteration: 118025
loss: 1.09464430809021,grad_norm: 0.9999996527299247, iteration: 118026
loss: 1.0163456201553345,grad_norm: 0.8783273085373259, iteration: 118027
loss: 1.004097580909729,grad_norm: 0.8049719601370121, iteration: 118028
loss: 1.0501381158828735,grad_norm: 0.9999995051074025, iteration: 118029
loss: 1.0215251445770264,grad_norm: 0.8723161678424703, iteration: 118030
loss: 0.9761520028114319,grad_norm: 0.999999121699894, iteration: 118031
loss: 1.0513781309127808,grad_norm: 0.9999994764536103, iteration: 118032
loss: 1.0781936645507812,grad_norm: 0.9999992752548615, iteration: 118033
loss: 0.996941864490509,grad_norm: 0.9044079067139826, iteration: 118034
loss: 1.0263724327087402,grad_norm: 0.9999995955376259, iteration: 118035
loss: 1.0825737714767456,grad_norm: 0.9999995683332505, iteration: 118036
loss: 1.0814496278762817,grad_norm: 1.000000061139405, iteration: 118037
loss: 1.0472581386566162,grad_norm: 0.9999999074048727, iteration: 118038
loss: 1.0574530363082886,grad_norm: 0.999999472654084, iteration: 118039
loss: 1.0540517568588257,grad_norm: 0.9999994125591563, iteration: 118040
loss: 1.00905442237854,grad_norm: 0.8438321706910509, iteration: 118041
loss: 1.0725040435791016,grad_norm: 0.9999999052074405, iteration: 118042
loss: 1.0809191465377808,grad_norm: 0.9999998923647033, iteration: 118043
loss: 1.0394384860992432,grad_norm: 0.8540772325079687, iteration: 118044
loss: 0.9879937767982483,grad_norm: 0.9813604627709858, iteration: 118045
loss: 1.0599271059036255,grad_norm: 0.9999998409710231, iteration: 118046
loss: 0.975604236125946,grad_norm: 0.9999991311691658, iteration: 118047
loss: 1.0259512662887573,grad_norm: 0.9999993253281447, iteration: 118048
loss: 1.0304667949676514,grad_norm: 0.9999995049549169, iteration: 118049
loss: 0.9806039333343506,grad_norm: 0.8996068528892017, iteration: 118050
loss: 1.086281418800354,grad_norm: 0.9185674373043845, iteration: 118051
loss: 1.0058226585388184,grad_norm: 0.8072771270736574, iteration: 118052
loss: 1.0686378479003906,grad_norm: 0.9999992581120654, iteration: 118053
loss: 1.2877984046936035,grad_norm: 0.999999814541576, iteration: 118054
loss: 0.9985951781272888,grad_norm: 0.8849754762632911, iteration: 118055
loss: 0.9590505957603455,grad_norm: 0.8030810071939276, iteration: 118056
loss: 1.0878692865371704,grad_norm: 0.9999996202973804, iteration: 118057
loss: 1.0767277479171753,grad_norm: 0.888084720324289, iteration: 118058
loss: 1.0191898345947266,grad_norm: 0.8464063102128376, iteration: 118059
loss: 1.124666690826416,grad_norm: 0.9999990728859366, iteration: 118060
loss: 0.9982584118843079,grad_norm: 0.6923130818202127, iteration: 118061
loss: 1.095139980316162,grad_norm: 0.9999992843754582, iteration: 118062
loss: 1.046828031539917,grad_norm: 0.9999992183447585, iteration: 118063
loss: 0.9860895276069641,grad_norm: 0.9999991595138, iteration: 118064
loss: 1.0358054637908936,grad_norm: 0.9999991623486684, iteration: 118065
loss: 0.9996974468231201,grad_norm: 0.9999996041332689, iteration: 118066
loss: 1.079689860343933,grad_norm: 0.9999995184746266, iteration: 118067
loss: 1.0037078857421875,grad_norm: 0.8972138826325983, iteration: 118068
loss: 1.0368400812149048,grad_norm: 0.9880214202650904, iteration: 118069
loss: 1.0674219131469727,grad_norm: 0.9999995392049744, iteration: 118070
loss: 1.1312541961669922,grad_norm: 0.9999994163672261, iteration: 118071
loss: 1.0333799123764038,grad_norm: 0.9999990345950249, iteration: 118072
loss: 1.0548585653305054,grad_norm: 0.9999991046270668, iteration: 118073
loss: 1.1500288248062134,grad_norm: 0.9999995391249049, iteration: 118074
loss: 0.991569459438324,grad_norm: 0.9999990002332564, iteration: 118075
loss: 1.0186489820480347,grad_norm: 0.9844815312738936, iteration: 118076
loss: 0.9759941101074219,grad_norm: 0.9524475942026673, iteration: 118077
loss: 0.9993736147880554,grad_norm: 0.9694480544705205, iteration: 118078
loss: 0.9740359783172607,grad_norm: 0.8805202340087591, iteration: 118079
loss: 1.0404411554336548,grad_norm: 0.9066082901051481, iteration: 118080
loss: 1.028942584991455,grad_norm: 0.9999999142567915, iteration: 118081
loss: 1.0126094818115234,grad_norm: 0.9999990590353843, iteration: 118082
loss: 1.056996464729309,grad_norm: 0.999999049892882, iteration: 118083
loss: 1.03477942943573,grad_norm: 0.9999991768332008, iteration: 118084
loss: 1.0166479349136353,grad_norm: 0.9198146328862828, iteration: 118085
loss: 1.102961540222168,grad_norm: 0.9999995520591994, iteration: 118086
loss: 1.0316108465194702,grad_norm: 0.9999991256664104, iteration: 118087
loss: 1.085013508796692,grad_norm: 0.9999992580408458, iteration: 118088
loss: 1.0311000347137451,grad_norm: 0.8796904644408972, iteration: 118089
loss: 1.0709304809570312,grad_norm: 0.957701542789031, iteration: 118090
loss: 0.9671174883842468,grad_norm: 0.932499668981033, iteration: 118091
loss: 0.966020941734314,grad_norm: 0.8783299821601867, iteration: 118092
loss: 0.9943819642066956,grad_norm: 0.9999998246304821, iteration: 118093
loss: 1.0191599130630493,grad_norm: 0.8047655419159139, iteration: 118094
loss: 0.9932864904403687,grad_norm: 0.8571221221769775, iteration: 118095
loss: 1.02451753616333,grad_norm: 0.9999994725836324, iteration: 118096
loss: 1.0067481994628906,grad_norm: 0.9999992767649456, iteration: 118097
loss: 0.9737551212310791,grad_norm: 0.9999993181076018, iteration: 118098
loss: 1.0477867126464844,grad_norm: 0.9999991607821364, iteration: 118099
loss: 1.032101035118103,grad_norm: 0.999999378148069, iteration: 118100
loss: 1.0653314590454102,grad_norm: 0.8097374251399533, iteration: 118101
loss: 1.067734718322754,grad_norm: 0.9999992457508657, iteration: 118102
loss: 1.007079005241394,grad_norm: 0.8753655275812036, iteration: 118103
loss: 0.9726824164390564,grad_norm: 0.9999989769680283, iteration: 118104
loss: 0.9994319081306458,grad_norm: 0.858018010403636, iteration: 118105
loss: 1.0372114181518555,grad_norm: 0.9797731042514728, iteration: 118106
loss: 1.0815387964248657,grad_norm: 0.9999997340430218, iteration: 118107
loss: 1.1144026517868042,grad_norm: 0.9999998066015406, iteration: 118108
loss: 0.980527937412262,grad_norm: 0.9999992221297372, iteration: 118109
loss: 1.006074070930481,grad_norm: 0.970293516500975, iteration: 118110
loss: 1.060085415840149,grad_norm: 0.999999123176529, iteration: 118111
loss: 1.0447405576705933,grad_norm: 0.9999990713719762, iteration: 118112
loss: 1.0595066547393799,grad_norm: 0.9999990496947162, iteration: 118113
loss: 1.0012259483337402,grad_norm: 0.9999998949981931, iteration: 118114
loss: 1.0109162330627441,grad_norm: 0.9999994932795029, iteration: 118115
loss: 1.0685867071151733,grad_norm: 0.999999303886187, iteration: 118116
loss: 1.0668859481811523,grad_norm: 0.8544483796112423, iteration: 118117
loss: 0.9898967742919922,grad_norm: 0.8004623674610031, iteration: 118118
loss: 1.0219391584396362,grad_norm: 0.9999993803759648, iteration: 118119
loss: 1.05988609790802,grad_norm: 0.8989094461824675, iteration: 118120
loss: 1.0082818269729614,grad_norm: 0.9999991570458593, iteration: 118121
loss: 1.086310863494873,grad_norm: 0.9999991535096655, iteration: 118122
loss: 0.9959776401519775,grad_norm: 0.9113833123424597, iteration: 118123
loss: 1.0281119346618652,grad_norm: 0.9999994035663384, iteration: 118124
loss: 1.0201753377914429,grad_norm: 0.9999998514955337, iteration: 118125
loss: 1.0128675699234009,grad_norm: 0.8438244300527844, iteration: 118126
loss: 1.0094228982925415,grad_norm: 0.9999991766105852, iteration: 118127
loss: 0.9859884977340698,grad_norm: 0.9999996809794829, iteration: 118128
loss: 1.0148589611053467,grad_norm: 0.8202827726299059, iteration: 118129
loss: 1.0515409708023071,grad_norm: 0.9999991577697916, iteration: 118130
loss: 1.0188475847244263,grad_norm: 0.9999998735738593, iteration: 118131
loss: 1.013933777809143,grad_norm: 0.8657442138055383, iteration: 118132
loss: 1.0073860883712769,grad_norm: 0.9999991320527326, iteration: 118133
loss: 1.0144580602645874,grad_norm: 0.885160384893857, iteration: 118134
loss: 0.9712806940078735,grad_norm: 0.9637791916402293, iteration: 118135
loss: 1.009536623954773,grad_norm: 0.9999994489159657, iteration: 118136
loss: 0.9979597330093384,grad_norm: 0.9999994076995711, iteration: 118137
loss: 1.005005121231079,grad_norm: 0.9999992055334436, iteration: 118138
loss: 1.0744823217391968,grad_norm: 0.9999990960076636, iteration: 118139
loss: 1.0224519968032837,grad_norm: 0.9999993429742376, iteration: 118140
loss: 1.0135655403137207,grad_norm: 0.9290590301643262, iteration: 118141
loss: 1.0018558502197266,grad_norm: 0.9910068200080392, iteration: 118142
loss: 1.0267887115478516,grad_norm: 0.9999992433843773, iteration: 118143
loss: 1.0269298553466797,grad_norm: 0.9999990697792245, iteration: 118144
loss: 0.9954090118408203,grad_norm: 0.9640266785377065, iteration: 118145
loss: 1.066064476966858,grad_norm: 0.9999994038504761, iteration: 118146
loss: 1.0305982828140259,grad_norm: 0.9999989827681149, iteration: 118147
loss: 1.094223141670227,grad_norm: 0.9999998003380188, iteration: 118148
loss: 0.962510347366333,grad_norm: 0.9999989544239255, iteration: 118149
loss: 1.0195966958999634,grad_norm: 0.8388548138722264, iteration: 118150
loss: 1.0113083124160767,grad_norm: 0.9598090591879112, iteration: 118151
loss: 1.036810278892517,grad_norm: 0.9999992340609986, iteration: 118152
loss: 1.033500075340271,grad_norm: 0.8043041457924878, iteration: 118153
loss: 0.9792584776878357,grad_norm: 0.8928399318963764, iteration: 118154
loss: 1.008399486541748,grad_norm: 0.9999993583981514, iteration: 118155
loss: 1.0746499300003052,grad_norm: 0.9999997911227079, iteration: 118156
loss: 1.0218334197998047,grad_norm: 0.8942821325575143, iteration: 118157
loss: 0.9925951361656189,grad_norm: 0.9999990744903884, iteration: 118158
loss: 0.9923566579818726,grad_norm: 0.8437257465474893, iteration: 118159
loss: 1.0464708805084229,grad_norm: 0.9999998767825591, iteration: 118160
loss: 1.012069821357727,grad_norm: 0.8629870289446031, iteration: 118161
loss: 1.0171529054641724,grad_norm: 0.9999991806694289, iteration: 118162
loss: 1.0467257499694824,grad_norm: 0.9176515205683786, iteration: 118163
loss: 1.0297526121139526,grad_norm: 0.999999984878755, iteration: 118164
loss: 0.9966450929641724,grad_norm: 0.9179984185235532, iteration: 118165
loss: 1.0606298446655273,grad_norm: 1.0000001015805753, iteration: 118166
loss: 0.9921655058860779,grad_norm: 0.9999992173248573, iteration: 118167
loss: 1.0303069353103638,grad_norm: 0.9999991526512281, iteration: 118168
loss: 0.9898865222930908,grad_norm: 0.9999991988822058, iteration: 118169
loss: 1.057793378829956,grad_norm: 0.9999992239424901, iteration: 118170
loss: 1.0344799757003784,grad_norm: 0.9999991273810489, iteration: 118171
loss: 1.0383015871047974,grad_norm: 0.9999996123455003, iteration: 118172
loss: 1.00267493724823,grad_norm: 0.7912810543418397, iteration: 118173
loss: 1.0121705532073975,grad_norm: 0.9999993437255643, iteration: 118174
loss: 1.0108366012573242,grad_norm: 0.8581584777687462, iteration: 118175
loss: 1.0015205144882202,grad_norm: 0.9979493999820715, iteration: 118176
loss: 0.97554612159729,grad_norm: 0.7771677003245888, iteration: 118177
loss: 0.9796137809753418,grad_norm: 0.9999998832172864, iteration: 118178
loss: 0.9893978834152222,grad_norm: 0.9337991805199222, iteration: 118179
loss: 1.0338331460952759,grad_norm: 0.9999993584568971, iteration: 118180
loss: 1.0342077016830444,grad_norm: 0.7742721077277994, iteration: 118181
loss: 1.0282599925994873,grad_norm: 0.9192301301196294, iteration: 118182
loss: 1.0231554508209229,grad_norm: 0.8404167278417719, iteration: 118183
loss: 0.9888536334037781,grad_norm: 0.9098679702798669, iteration: 118184
loss: 1.0546460151672363,grad_norm: 0.9999993280270603, iteration: 118185
loss: 1.0256617069244385,grad_norm: 0.9999994360403794, iteration: 118186
loss: 0.9912443161010742,grad_norm: 0.9157506831286655, iteration: 118187
loss: 1.0200839042663574,grad_norm: 0.9999993389667455, iteration: 118188
loss: 1.004185438156128,grad_norm: 0.8295252528290992, iteration: 118189
loss: 0.9965885281562805,grad_norm: 0.9243941131646295, iteration: 118190
loss: 1.0112476348876953,grad_norm: 0.9999993092529883, iteration: 118191
loss: 0.9797618985176086,grad_norm: 0.7578744009950612, iteration: 118192
loss: 0.9969057440757751,grad_norm: 0.9999991565998427, iteration: 118193
loss: 1.0167832374572754,grad_norm: 0.9999992017639061, iteration: 118194
loss: 1.001270055770874,grad_norm: 0.9401552598354427, iteration: 118195
loss: 1.023249626159668,grad_norm: 0.9999991109154248, iteration: 118196
loss: 1.013100028038025,grad_norm: 0.8700511828573644, iteration: 118197
loss: 1.090661883354187,grad_norm: 0.9999996537792, iteration: 118198
loss: 0.9981188178062439,grad_norm: 0.9706554114674384, iteration: 118199
loss: 1.026962399482727,grad_norm: 0.9222421870616748, iteration: 118200
loss: 1.0160080194473267,grad_norm: 0.811158412972858, iteration: 118201
loss: 0.9762570261955261,grad_norm: 0.9466670186923825, iteration: 118202
loss: 0.9626566767692566,grad_norm: 0.9999991779284175, iteration: 118203
loss: 0.984137773513794,grad_norm: 0.9256006714503676, iteration: 118204
loss: 1.0228420495986938,grad_norm: 0.9999998978016774, iteration: 118205
loss: 0.9856157898902893,grad_norm: 0.9999993727161987, iteration: 118206
loss: 0.9887259006500244,grad_norm: 0.9372541415546861, iteration: 118207
loss: 0.9725198745727539,grad_norm: 0.8545402181479794, iteration: 118208
loss: 1.0073572397232056,grad_norm: 0.999999045810728, iteration: 118209
loss: 1.0502400398254395,grad_norm: 0.9999992917298278, iteration: 118210
loss: 0.9477308988571167,grad_norm: 0.9999990383895162, iteration: 118211
loss: 1.0726438760757446,grad_norm: 0.9999992096172438, iteration: 118212
loss: 1.0179709196090698,grad_norm: 0.9999991793892875, iteration: 118213
loss: 1.0214266777038574,grad_norm: 0.9999994256256463, iteration: 118214
loss: 1.0094187259674072,grad_norm: 0.860197716326595, iteration: 118215
loss: 1.0082664489746094,grad_norm: 0.8375862884558792, iteration: 118216
loss: 1.0097157955169678,grad_norm: 0.9999990115453538, iteration: 118217
loss: 1.0036242008209229,grad_norm: 0.9727627342319571, iteration: 118218
loss: 1.058546781539917,grad_norm: 0.999999431189202, iteration: 118219
loss: 0.9771427512168884,grad_norm: 0.999999852594225, iteration: 118220
loss: 1.0822211503982544,grad_norm: 0.9999995866822032, iteration: 118221
loss: 1.0199506282806396,grad_norm: 0.9999990967227783, iteration: 118222
loss: 0.9866978526115417,grad_norm: 0.8638510298523385, iteration: 118223
loss: 1.0224875211715698,grad_norm: 0.999999848413686, iteration: 118224
loss: 1.0211106538772583,grad_norm: 0.8580822018456927, iteration: 118225
loss: 1.0634655952453613,grad_norm: 0.9999992744541482, iteration: 118226
loss: 0.9947201609611511,grad_norm: 0.9288662992834138, iteration: 118227
loss: 1.0846863985061646,grad_norm: 0.9999996526808201, iteration: 118228
loss: 1.0215553045272827,grad_norm: 0.9999992096223634, iteration: 118229
loss: 1.0070863962173462,grad_norm: 0.9999996967177196, iteration: 118230
loss: 1.0146499872207642,grad_norm: 0.9999991029135917, iteration: 118231
loss: 1.0323559045791626,grad_norm: 0.9999993865052277, iteration: 118232
loss: 1.010905385017395,grad_norm: 0.9999993322451538, iteration: 118233
loss: 1.0616495609283447,grad_norm: 0.9999996409656645, iteration: 118234
loss: 0.9965682625770569,grad_norm: 0.9195724691500317, iteration: 118235
loss: 1.0556963682174683,grad_norm: 0.9890446668681951, iteration: 118236
loss: 1.0215115547180176,grad_norm: 0.7921838593384566, iteration: 118237
loss: 1.0548579692840576,grad_norm: 0.9999996034958683, iteration: 118238
loss: 1.0758880376815796,grad_norm: 0.9068515306877079, iteration: 118239
loss: 1.0183868408203125,grad_norm: 0.9554746434300928, iteration: 118240
loss: 1.0539904832839966,grad_norm: 0.9999990501146724, iteration: 118241
loss: 0.9597842693328857,grad_norm: 0.9999990010068203, iteration: 118242
loss: 1.025810718536377,grad_norm: 0.8261900071888449, iteration: 118243
loss: 1.0465710163116455,grad_norm: 0.9999678974998621, iteration: 118244
loss: 0.9556323289871216,grad_norm: 0.8770214530698475, iteration: 118245
loss: 1.19785475730896,grad_norm: 0.999999899993091, iteration: 118246
loss: 1.0038326978683472,grad_norm: 0.9407015039957104, iteration: 118247
loss: 1.0419065952301025,grad_norm: 0.9999992842275498, iteration: 118248
loss: 1.021079659461975,grad_norm: 0.9999991901707029, iteration: 118249
loss: 0.9710803627967834,grad_norm: 0.9999990229606194, iteration: 118250
loss: 0.9949398636817932,grad_norm: 0.8857681746897339, iteration: 118251
loss: 1.019883632659912,grad_norm: 0.9999994564428073, iteration: 118252
loss: 1.0393483638763428,grad_norm: 0.9999994646255753, iteration: 118253
loss: 1.0075315237045288,grad_norm: 0.8358182126618091, iteration: 118254
loss: 1.0656013488769531,grad_norm: 0.9999998843629442, iteration: 118255
loss: 1.0215507745742798,grad_norm: 0.8557905136182886, iteration: 118256
loss: 1.0455952882766724,grad_norm: 0.9999993117474413, iteration: 118257
loss: 1.0297774076461792,grad_norm: 0.9999997521294202, iteration: 118258
loss: 1.0488909482955933,grad_norm: 0.9999991903222245, iteration: 118259
loss: 1.0252476930618286,grad_norm: 0.8708545596332868, iteration: 118260
loss: 1.2144875526428223,grad_norm: 1.000000023253979, iteration: 118261
loss: 1.026087760925293,grad_norm: 0.9999990408580345, iteration: 118262
loss: 0.987790048122406,grad_norm: 0.9999999379891122, iteration: 118263
loss: 1.0218442678451538,grad_norm: 0.9999992129088896, iteration: 118264
loss: 1.0103719234466553,grad_norm: 0.9999992121010388, iteration: 118265
loss: 1.1218127012252808,grad_norm: 1.0000000614822375, iteration: 118266
loss: 0.9883636236190796,grad_norm: 0.9999991372751666, iteration: 118267
loss: 1.0892082452774048,grad_norm: 0.9999995768215423, iteration: 118268
loss: 0.9862642884254456,grad_norm: 0.9999990807759945, iteration: 118269
loss: 1.0115360021591187,grad_norm: 0.7990514351636755, iteration: 118270
loss: 1.0068540573120117,grad_norm: 0.8585493145609472, iteration: 118271
loss: 1.024383544921875,grad_norm: 0.8442723487810803, iteration: 118272
loss: 1.0050195455551147,grad_norm: 0.9555018505896365, iteration: 118273
loss: 1.0317414999008179,grad_norm: 0.9999991198379181, iteration: 118274
loss: 0.9960224032402039,grad_norm: 0.8885414576536347, iteration: 118275
loss: 1.0114537477493286,grad_norm: 0.9999991050039753, iteration: 118276
loss: 1.0179821252822876,grad_norm: 0.9999991015748992, iteration: 118277
loss: 1.0626145601272583,grad_norm: 0.9999996803295147, iteration: 118278
loss: 1.0630933046340942,grad_norm: 0.9999996100895094, iteration: 118279
loss: 0.9751100540161133,grad_norm: 0.8897227316487728, iteration: 118280
loss: 0.9855831861495972,grad_norm: 0.9786360564008714, iteration: 118281
loss: 1.0621861219406128,grad_norm: 0.999999425090031, iteration: 118282
loss: 1.0250802040100098,grad_norm: 0.9999990845354053, iteration: 118283
loss: 0.9988527297973633,grad_norm: 0.9999990973713337, iteration: 118284
loss: 0.9844018220901489,grad_norm: 0.9999991015906741, iteration: 118285
loss: 0.9782079458236694,grad_norm: 0.9448923630270244, iteration: 118286
loss: 1.0549784898757935,grad_norm: 0.9999993584957299, iteration: 118287
loss: 1.0804672241210938,grad_norm: 0.99999989876432, iteration: 118288
loss: 0.9548917412757874,grad_norm: 0.9361359759310672, iteration: 118289
loss: 1.0274946689605713,grad_norm: 0.8582786892262478, iteration: 118290
loss: 1.0459089279174805,grad_norm: 0.9999992204426732, iteration: 118291
loss: 0.9987725019454956,grad_norm: 0.9898292024207854, iteration: 118292
loss: 1.0033093690872192,grad_norm: 0.9999994876650533, iteration: 118293
loss: 1.0145107507705688,grad_norm: 0.9498689649692476, iteration: 118294
loss: 1.0309410095214844,grad_norm: 0.9117204668592656, iteration: 118295
loss: 1.0175403356552124,grad_norm: 0.9920300627923377, iteration: 118296
loss: 0.9930987358093262,grad_norm: 0.9022934124547138, iteration: 118297
loss: 1.0813623666763306,grad_norm: 0.9999996912327018, iteration: 118298
loss: 1.0043270587921143,grad_norm: 0.9341284019960328, iteration: 118299
loss: 1.01195228099823,grad_norm: 0.9168549843268256, iteration: 118300
loss: 1.0083674192428589,grad_norm: 0.8147808618495739, iteration: 118301
loss: 1.0099776983261108,grad_norm: 0.9378718383710511, iteration: 118302
loss: 0.9961615800857544,grad_norm: 0.9011768868087936, iteration: 118303
loss: 1.095231056213379,grad_norm: 0.9999994699861366, iteration: 118304
loss: 0.9947422742843628,grad_norm: 0.9300331644673253, iteration: 118305
loss: 0.9591131806373596,grad_norm: 0.7346399146379877, iteration: 118306
loss: 0.990266740322113,grad_norm: 0.999999373158524, iteration: 118307
loss: 1.0023382902145386,grad_norm: 0.8948410454424681, iteration: 118308
loss: 0.9814149737358093,grad_norm: 0.9026110336753396, iteration: 118309
loss: 1.042414665222168,grad_norm: 0.8493214437301577, iteration: 118310
loss: 0.9976390600204468,grad_norm: 0.9999995501716717, iteration: 118311
loss: 1.0120021104812622,grad_norm: 0.9999993273866312, iteration: 118312
loss: 1.0122660398483276,grad_norm: 0.9297446988425786, iteration: 118313
loss: 1.035032868385315,grad_norm: 0.9999991097609625, iteration: 118314
loss: 1.0252107381820679,grad_norm: 0.9866009224970484, iteration: 118315
loss: 0.9897547364234924,grad_norm: 0.8143000498711969, iteration: 118316
loss: 1.0247408151626587,grad_norm: 0.8802138671644852, iteration: 118317
loss: 0.9813178777694702,grad_norm: 0.9999989098432562, iteration: 118318
loss: 1.0091921091079712,grad_norm: 0.9999993092450188, iteration: 118319
loss: 1.0159767866134644,grad_norm: 0.9999993552657004, iteration: 118320
loss: 1.0934253931045532,grad_norm: 0.8995531278321839, iteration: 118321
loss: 1.0227007865905762,grad_norm: 0.9999994246942313, iteration: 118322
loss: 0.9851852655410767,grad_norm: 0.937903705994473, iteration: 118323
loss: 1.0786268711090088,grad_norm: 0.9999990270893927, iteration: 118324
loss: 1.047529935836792,grad_norm: 0.9999994292963288, iteration: 118325
loss: 1.008622646331787,grad_norm: 0.997392314857686, iteration: 118326
loss: 1.0716018676757812,grad_norm: 0.999999865284055, iteration: 118327
loss: 1.1342967748641968,grad_norm: 0.9999993319658411, iteration: 118328
loss: 1.0258913040161133,grad_norm: 0.8750377003612603, iteration: 118329
loss: 1.013034701347351,grad_norm: 0.9327415108689474, iteration: 118330
loss: 1.0015413761138916,grad_norm: 0.9280933750347488, iteration: 118331
loss: 1.068184733390808,grad_norm: 0.9302124282946904, iteration: 118332
loss: 1.0431870222091675,grad_norm: 0.9999992711760518, iteration: 118333
loss: 1.0089690685272217,grad_norm: 0.9999991954882511, iteration: 118334
loss: 1.0191067457199097,grad_norm: 0.9103831224438018, iteration: 118335
loss: 0.9730942845344543,grad_norm: 0.7986794859291764, iteration: 118336
loss: 1.0288383960723877,grad_norm: 0.9999993385512147, iteration: 118337
loss: 1.0735690593719482,grad_norm: 0.9672839535056905, iteration: 118338
loss: 1.0195344686508179,grad_norm: 0.8159889831514952, iteration: 118339
loss: 1.0562572479248047,grad_norm: 0.999999169490049, iteration: 118340
loss: 0.996455729007721,grad_norm: 0.8753943934238696, iteration: 118341
loss: 1.0334298610687256,grad_norm: 0.9999992999276427, iteration: 118342
loss: 1.0359082221984863,grad_norm: 0.9999992945145568, iteration: 118343
loss: 1.0336577892303467,grad_norm: 0.8130635036694754, iteration: 118344
loss: 1.0049421787261963,grad_norm: 0.8549536540449516, iteration: 118345
loss: 1.0317742824554443,grad_norm: 0.9999993421649579, iteration: 118346
loss: 1.0236244201660156,grad_norm: 0.999999129406052, iteration: 118347
loss: 1.1347025632858276,grad_norm: 0.9999992952502574, iteration: 118348
loss: 0.9699426293373108,grad_norm: 0.8475199367612841, iteration: 118349
loss: 1.090860366821289,grad_norm: 0.9999994742400197, iteration: 118350
loss: 0.9980424642562866,grad_norm: 0.9999993145893513, iteration: 118351
loss: 1.0988417863845825,grad_norm: 1.0000000551967427, iteration: 118352
loss: 0.9917743802070618,grad_norm: 0.9999992783092053, iteration: 118353
loss: 1.027573585510254,grad_norm: 0.9882126575576398, iteration: 118354
loss: 1.0193418264389038,grad_norm: 0.9999998251705344, iteration: 118355
loss: 1.053652286529541,grad_norm: 0.999999367874627, iteration: 118356
loss: 1.07228684425354,grad_norm: 0.999999316470329, iteration: 118357
loss: 1.015019178390503,grad_norm: 0.8638600958347559, iteration: 118358
loss: 1.0112425088882446,grad_norm: 0.8241571716729952, iteration: 118359
loss: 1.0015664100646973,grad_norm: 0.9731208352713855, iteration: 118360
loss: 1.0196412801742554,grad_norm: 0.9999995610778316, iteration: 118361
loss: 1.053993582725525,grad_norm: 0.9999993819956571, iteration: 118362
loss: 1.0248734951019287,grad_norm: 0.9999992097099316, iteration: 118363
loss: 1.0410387516021729,grad_norm: 0.9415349350766818, iteration: 118364
loss: 1.028806209564209,grad_norm: 0.9999992740091794, iteration: 118365
loss: 1.0980148315429688,grad_norm: 0.9999992828823093, iteration: 118366
loss: 0.9939836859703064,grad_norm: 0.8190685577054307, iteration: 118367
loss: 1.0473077297210693,grad_norm: 0.9999990685043808, iteration: 118368
loss: 1.0333315134048462,grad_norm: 0.9999997594493222, iteration: 118369
loss: 1.0144891738891602,grad_norm: 0.9999988476598942, iteration: 118370
loss: 1.000378131866455,grad_norm: 0.7640807221754184, iteration: 118371
loss: 1.071651577949524,grad_norm: 0.8522797164341659, iteration: 118372
loss: 0.9971628189086914,grad_norm: 0.9063942929804344, iteration: 118373
loss: 1.0405280590057373,grad_norm: 0.9999994091792994, iteration: 118374
loss: 0.9883260130882263,grad_norm: 0.9799198876634215, iteration: 118375
loss: 1.1437749862670898,grad_norm: 0.9999995006056122, iteration: 118376
loss: 1.0225869417190552,grad_norm: 0.9999998952213578, iteration: 118377
loss: 1.0000135898590088,grad_norm: 0.9999994558838017, iteration: 118378
loss: 1.0319896936416626,grad_norm: 0.9999991972905836, iteration: 118379
loss: 1.0338757038116455,grad_norm: 0.9999999097581243, iteration: 118380
loss: 1.0101908445358276,grad_norm: 0.9999992602360446, iteration: 118381
loss: 1.0047259330749512,grad_norm: 0.9999993250418883, iteration: 118382
loss: 1.0251588821411133,grad_norm: 0.9999997824973443, iteration: 118383
loss: 1.0723501443862915,grad_norm: 0.9999998820318619, iteration: 118384
loss: 1.06357741355896,grad_norm: 0.9999995276374062, iteration: 118385
loss: 1.0357770919799805,grad_norm: 0.9999996503000705, iteration: 118386
loss: 0.9814023375511169,grad_norm: 0.8847231065711395, iteration: 118387
loss: 1.0013800859451294,grad_norm: 0.9999991019611679, iteration: 118388
loss: 1.0238041877746582,grad_norm: 0.9645929576606652, iteration: 118389
loss: 1.0079480409622192,grad_norm: 0.7167695113159365, iteration: 118390
loss: 0.9892092347145081,grad_norm: 0.9999991639495113, iteration: 118391
loss: 0.9782592058181763,grad_norm: 0.9999993131062687, iteration: 118392
loss: 1.0011860132217407,grad_norm: 0.9567982237895809, iteration: 118393
loss: 0.9929539561271667,grad_norm: 0.999999631704252, iteration: 118394
loss: 0.9967076778411865,grad_norm: 0.999999220031702, iteration: 118395
loss: 1.016297698020935,grad_norm: 0.8876062214209335, iteration: 118396
loss: 1.084725260734558,grad_norm: 0.895333453884506, iteration: 118397
loss: 1.019179344177246,grad_norm: 0.8915794199009366, iteration: 118398
loss: 0.9925402998924255,grad_norm: 0.9999996484384294, iteration: 118399
loss: 1.0034540891647339,grad_norm: 0.9999990081708392, iteration: 118400
loss: 1.0520589351654053,grad_norm: 0.9999993233060169, iteration: 118401
loss: 0.985679030418396,grad_norm: 0.9073874467883575, iteration: 118402
loss: 0.980925977230072,grad_norm: 0.9331903488339762, iteration: 118403
loss: 1.0125396251678467,grad_norm: 0.9999992195007028, iteration: 118404
loss: 1.0152554512023926,grad_norm: 0.8519642667531055, iteration: 118405
loss: 1.0185095071792603,grad_norm: 0.8149676208044989, iteration: 118406
loss: 1.1150504350662231,grad_norm: 0.9999998898034929, iteration: 118407
loss: 1.0508331060409546,grad_norm: 0.9999998501512761, iteration: 118408
loss: 1.0665123462677002,grad_norm: 0.9999999190188746, iteration: 118409
loss: 1.0092941522598267,grad_norm: 0.9999995656837369, iteration: 118410
loss: 1.0243020057678223,grad_norm: 0.9999993303478983, iteration: 118411
loss: 1.1709080934524536,grad_norm: 0.9999995481947882, iteration: 118412
loss: 0.9952540397644043,grad_norm: 0.9999997207893541, iteration: 118413
loss: 1.0485823154449463,grad_norm: 0.9999992811450151, iteration: 118414
loss: 0.9934348464012146,grad_norm: 0.821738560520848, iteration: 118415
loss: 1.004180669784546,grad_norm: 0.8376606915890868, iteration: 118416
loss: 1.009107232093811,grad_norm: 0.9616846309413839, iteration: 118417
loss: 1.011680006980896,grad_norm: 0.9797792002711693, iteration: 118418
loss: 0.9919988512992859,grad_norm: 0.9152750290233029, iteration: 118419
loss: 0.9923390746116638,grad_norm: 0.991928225302969, iteration: 118420
loss: 0.9902986884117126,grad_norm: 0.7807978851003362, iteration: 118421
loss: 1.0013151168823242,grad_norm: 0.9999990094540511, iteration: 118422
loss: 1.0934149026870728,grad_norm: 0.9999997439433586, iteration: 118423
loss: 0.9693167805671692,grad_norm: 0.9999997923000237, iteration: 118424
loss: 0.9851551651954651,grad_norm: 0.9999990861876141, iteration: 118425
loss: 1.1534968614578247,grad_norm: 0.9999993809945454, iteration: 118426
loss: 0.9933356046676636,grad_norm: 0.8570177879058934, iteration: 118427
loss: 1.0741407871246338,grad_norm: 0.9999991346091541, iteration: 118428
loss: 1.0435409545898438,grad_norm: 0.9876469840100213, iteration: 118429
loss: 1.073046326637268,grad_norm: 0.999999945871482, iteration: 118430
loss: 1.0510882139205933,grad_norm: 0.999999301237926, iteration: 118431
loss: 1.0140331983566284,grad_norm: 0.9999990302437934, iteration: 118432
loss: 1.0206438302993774,grad_norm: 0.9500730221880602, iteration: 118433
loss: 0.9870820641517639,grad_norm: 0.9498928865900788, iteration: 118434
loss: 1.0750292539596558,grad_norm: 0.9999998754549676, iteration: 118435
loss: 1.0051074028015137,grad_norm: 0.9999996485637301, iteration: 118436
loss: 0.9951050877571106,grad_norm: 0.8725749077326866, iteration: 118437
loss: 0.9933720827102661,grad_norm: 0.8926386946352268, iteration: 118438
loss: 1.2510716915130615,grad_norm: 0.9999999284362759, iteration: 118439
loss: 1.0136483907699585,grad_norm: 0.9237521287270649, iteration: 118440
loss: 1.047650694847107,grad_norm: 0.9565099456416823, iteration: 118441
loss: 1.1901768445968628,grad_norm: 0.9999998188740896, iteration: 118442
loss: 1.050997018814087,grad_norm: 0.9999991997449653, iteration: 118443
loss: 0.9737109541893005,grad_norm: 0.924764945463334, iteration: 118444
loss: 1.129517912864685,grad_norm: 0.9999997289570319, iteration: 118445
loss: 1.0007988214492798,grad_norm: 0.9999998805488347, iteration: 118446
loss: 1.1354236602783203,grad_norm: 0.9737354940221676, iteration: 118447
loss: 1.0144222974777222,grad_norm: 0.9843763185393369, iteration: 118448
loss: 1.0358636379241943,grad_norm: 0.9999997721551296, iteration: 118449
loss: 1.1321932077407837,grad_norm: 0.9999991898114213, iteration: 118450
loss: 1.1337459087371826,grad_norm: 0.9999994779321927, iteration: 118451
loss: 1.1497814655303955,grad_norm: 0.9999995300822078, iteration: 118452
loss: 1.0597119331359863,grad_norm: 0.9999991022197782, iteration: 118453
loss: 1.0157357454299927,grad_norm: 0.9999999180706022, iteration: 118454
loss: 1.1100374460220337,grad_norm: 0.999999155925755, iteration: 118455
loss: 1.0371547937393188,grad_norm: 0.9999992143250177, iteration: 118456
loss: 1.1147329807281494,grad_norm: 0.9999995001650852, iteration: 118457
loss: 1.1309460401535034,grad_norm: 0.9999994852402003, iteration: 118458
loss: 1.0080478191375732,grad_norm: 0.9106290408966957, iteration: 118459
loss: 1.1006715297698975,grad_norm: 0.9282796583348565, iteration: 118460
loss: 0.9786810278892517,grad_norm: 0.9999998452308289, iteration: 118461
loss: 1.0517257452011108,grad_norm: 0.9560912912408225, iteration: 118462
loss: 1.0249115228652954,grad_norm: 0.9999992049873736, iteration: 118463
loss: 1.0410001277923584,grad_norm: 0.9999991448106494, iteration: 118464
loss: 1.005537748336792,grad_norm: 0.9356605027082454, iteration: 118465
loss: 1.1774636507034302,grad_norm: 0.9999998832024014, iteration: 118466
loss: 1.0212444067001343,grad_norm: 0.9999991110418186, iteration: 118467
loss: 1.0913176536560059,grad_norm: 0.9999994355581395, iteration: 118468
loss: 1.1264957189559937,grad_norm: 0.9999994144587475, iteration: 118469
loss: 1.0514999628067017,grad_norm: 0.8367274774158218, iteration: 118470
loss: 1.2410584688186646,grad_norm: 0.99999991900288, iteration: 118471
loss: 1.0785022974014282,grad_norm: 0.9999996366588293, iteration: 118472
loss: 1.0198174715042114,grad_norm: 0.7720873478775556, iteration: 118473
loss: 1.0207666158676147,grad_norm: 0.9999991279833361, iteration: 118474
loss: 1.1205766201019287,grad_norm: 0.9999994256395354, iteration: 118475
loss: 1.063188076019287,grad_norm: 0.9999993811663007, iteration: 118476
loss: 1.0639288425445557,grad_norm: 0.8952258964862745, iteration: 118477
loss: 1.0932915210723877,grad_norm: 0.9999999367848649, iteration: 118478
loss: 0.9999281167984009,grad_norm: 0.8053787753804146, iteration: 118479
loss: 1.3668739795684814,grad_norm: 0.9999997897914319, iteration: 118480
loss: 1.008850336074829,grad_norm: 0.9999995942693407, iteration: 118481
loss: 1.0595333576202393,grad_norm: 0.9999995399877043, iteration: 118482
loss: 1.0272246599197388,grad_norm: 0.9999992058131727, iteration: 118483
loss: 1.085769534111023,grad_norm: 0.8627269442521583, iteration: 118484
loss: 0.9830315113067627,grad_norm: 0.99999917370924, iteration: 118485
loss: 1.0467582941055298,grad_norm: 0.7777308251753643, iteration: 118486
loss: 1.0566085577011108,grad_norm: 0.8538635426389406, iteration: 118487
loss: 1.0125696659088135,grad_norm: 0.8494995946790774, iteration: 118488
loss: 1.0664438009262085,grad_norm: 0.9570102043695782, iteration: 118489
loss: 1.1856354475021362,grad_norm: 1.0000000063301127, iteration: 118490
loss: 1.0764107704162598,grad_norm: 0.8112818961692625, iteration: 118491
loss: 1.006339430809021,grad_norm: 0.9999991084522591, iteration: 118492
loss: 0.9945353865623474,grad_norm: 0.7739776700764263, iteration: 118493
loss: 1.228536605834961,grad_norm: 0.9999997359542868, iteration: 118494
loss: 1.0023585557937622,grad_norm: 0.9999994401297702, iteration: 118495
loss: 1.084041714668274,grad_norm: 0.9999994080637835, iteration: 118496
loss: 1.0839591026306152,grad_norm: 0.9402219715944218, iteration: 118497
loss: 0.9879233241081238,grad_norm: 0.7925651594409548, iteration: 118498
loss: 1.0899584293365479,grad_norm: 0.948231913873655, iteration: 118499
loss: 1.0468506813049316,grad_norm: 0.8883834620048544, iteration: 118500
loss: 0.9924482703208923,grad_norm: 0.9999995175689111, iteration: 118501
loss: 1.026307225227356,grad_norm: 0.9999993489635965, iteration: 118502
loss: 1.0004650354385376,grad_norm: 0.8610930372704698, iteration: 118503
loss: 1.114522933959961,grad_norm: 0.9999993078978529, iteration: 118504
loss: 1.0721428394317627,grad_norm: 0.8363357212072616, iteration: 118505
loss: 1.038374423980713,grad_norm: 0.9999992180418411, iteration: 118506
loss: 1.013045310974121,grad_norm: 0.9999991958701242, iteration: 118507
loss: 1.0327229499816895,grad_norm: 0.906814219267467, iteration: 118508
loss: 0.9954277873039246,grad_norm: 0.9999990619429271, iteration: 118509
loss: 1.0134344100952148,grad_norm: 0.91844254744707, iteration: 118510
loss: 1.0087628364562988,grad_norm: 0.8934011462517066, iteration: 118511
loss: 1.0586731433868408,grad_norm: 0.9999994627528395, iteration: 118512
loss: 1.1053522825241089,grad_norm: 0.9999989881702288, iteration: 118513
loss: 1.0111594200134277,grad_norm: 0.9999991913329734, iteration: 118514
loss: 1.015589714050293,grad_norm: 0.9796789696809044, iteration: 118515
loss: 1.0592707395553589,grad_norm: 0.9597407641656208, iteration: 118516
loss: 1.0682063102722168,grad_norm: 0.9999998954270489, iteration: 118517
loss: 1.0176862478256226,grad_norm: 0.8710246261288425, iteration: 118518
loss: 1.0922868251800537,grad_norm: 0.9999993696873675, iteration: 118519
loss: 1.087360143661499,grad_norm: 0.9999991232149992, iteration: 118520
loss: 0.9755235314369202,grad_norm: 0.8831869889257383, iteration: 118521
loss: 1.002934455871582,grad_norm: 0.8442786165793825, iteration: 118522
loss: 1.0513954162597656,grad_norm: 0.9999996064240537, iteration: 118523
loss: 1.013411045074463,grad_norm: 0.9999992390577169, iteration: 118524
loss: 0.9923015236854553,grad_norm: 0.9648930328915207, iteration: 118525
loss: 1.0690687894821167,grad_norm: 0.9999992923216037, iteration: 118526
loss: 1.0482817888259888,grad_norm: 0.9999991299301492, iteration: 118527
loss: 1.0447818040847778,grad_norm: 0.9999992546146552, iteration: 118528
loss: 1.0622421503067017,grad_norm: 0.9999993363743157, iteration: 118529
loss: 1.0531831979751587,grad_norm: 0.9999997991807483, iteration: 118530
loss: 0.9916759133338928,grad_norm: 0.8142435964977685, iteration: 118531
loss: 1.050194501876831,grad_norm: 0.9270772995606966, iteration: 118532
loss: 1.0196216106414795,grad_norm: 0.8762292780700759, iteration: 118533
loss: 0.9673848152160645,grad_norm: 0.898521891835121, iteration: 118534
loss: 1.0099676847457886,grad_norm: 0.8918706406592808, iteration: 118535
loss: 1.0489109754562378,grad_norm: 0.9999999435122658, iteration: 118536
loss: 0.9969654083251953,grad_norm: 0.9999990962602442, iteration: 118537
loss: 1.0071223974227905,grad_norm: 0.8874958035892219, iteration: 118538
loss: 0.9626424908638,grad_norm: 0.9897066311961328, iteration: 118539
loss: 1.0212504863739014,grad_norm: 0.9999991999067313, iteration: 118540
loss: 1.0066334009170532,grad_norm: 0.9999994791165373, iteration: 118541
loss: 1.0761723518371582,grad_norm: 0.9999995939503876, iteration: 118542
loss: 1.0262633562088013,grad_norm: 0.9999991267401892, iteration: 118543
loss: 1.0380553007125854,grad_norm: 0.9999996588765833, iteration: 118544
loss: 1.0188539028167725,grad_norm: 0.7889826261131434, iteration: 118545
loss: 1.217674970626831,grad_norm: 0.9999998565308718, iteration: 118546
loss: 1.146131157875061,grad_norm: 0.9999997052417193, iteration: 118547
loss: 1.0424689054489136,grad_norm: 0.9180593188534514, iteration: 118548
loss: 1.0003362894058228,grad_norm: 0.9999999683416511, iteration: 118549
loss: 1.0010031461715698,grad_norm: 0.9999993600763792, iteration: 118550
loss: 1.0147205591201782,grad_norm: 0.9999990598281436, iteration: 118551
loss: 1.0769423246383667,grad_norm: 0.9999991032651157, iteration: 118552
loss: 1.0153230428695679,grad_norm: 0.9779030257753468, iteration: 118553
loss: 1.036067247390747,grad_norm: 0.9891422823556738, iteration: 118554
loss: 0.9802423715591431,grad_norm: 0.9222223453105034, iteration: 118555
loss: 1.1487445831298828,grad_norm: 0.9999994210011128, iteration: 118556
loss: 1.0121538639068604,grad_norm: 0.9999994232558577, iteration: 118557
loss: 1.0368061065673828,grad_norm: 0.9999991210430657, iteration: 118558
loss: 1.033585548400879,grad_norm: 0.8737943679489016, iteration: 118559
loss: 1.0321294069290161,grad_norm: 0.9999991624426009, iteration: 118560
loss: 1.0261740684509277,grad_norm: 0.8367893154919915, iteration: 118561
loss: 1.046157717704773,grad_norm: 0.9999992010689827, iteration: 118562
loss: 1.057403564453125,grad_norm: 0.9999999510137217, iteration: 118563
loss: 1.0039156675338745,grad_norm: 0.9999999547628199, iteration: 118564
loss: 1.1196287870407104,grad_norm: 0.9999999418025248, iteration: 118565
loss: 1.00117027759552,grad_norm: 0.9795312908852988, iteration: 118566
loss: 1.0504294633865356,grad_norm: 0.9999998965713869, iteration: 118567
loss: 1.0188287496566772,grad_norm: 0.9999993306150016, iteration: 118568
loss: 1.022780179977417,grad_norm: 0.794808789099296, iteration: 118569
loss: 0.9810492992401123,grad_norm: 0.9953038349225886, iteration: 118570
loss: 0.9944747090339661,grad_norm: 0.9112949670763261, iteration: 118571
loss: 0.9775278568267822,grad_norm: 0.9999993347494182, iteration: 118572
loss: 1.0451472997665405,grad_norm: 0.9999993383573907, iteration: 118573
loss: 1.2179688215255737,grad_norm: 0.9999995230823119, iteration: 118574
loss: 1.0755016803741455,grad_norm: 0.9999991544196954, iteration: 118575
loss: 1.0127865076065063,grad_norm: 0.8592679185805936, iteration: 118576
loss: 1.1617012023925781,grad_norm: 0.9999995465921728, iteration: 118577
loss: 1.0486783981323242,grad_norm: 0.9785797930410979, iteration: 118578
loss: 1.0461081266403198,grad_norm: 0.9999991229055492, iteration: 118579
loss: 1.0410101413726807,grad_norm: 0.9999995887217928, iteration: 118580
loss: 1.0281987190246582,grad_norm: 0.9999991421939132, iteration: 118581
loss: 1.0784498453140259,grad_norm: 0.9999995057968071, iteration: 118582
loss: 1.0288350582122803,grad_norm: 0.9999999100195203, iteration: 118583
loss: 0.9637147784233093,grad_norm: 0.9999990703806002, iteration: 118584
loss: 1.1020737886428833,grad_norm: 0.9999998870124645, iteration: 118585
loss: 0.9816003441810608,grad_norm: 0.9999990761986903, iteration: 118586
loss: 1.0554447174072266,grad_norm: 0.9999997406464611, iteration: 118587
loss: 1.0028901100158691,grad_norm: 0.9481327524248493, iteration: 118588
loss: 1.0245476961135864,grad_norm: 0.7999542036197205, iteration: 118589
loss: 1.0337605476379395,grad_norm: 0.7666316714596229, iteration: 118590
loss: 1.0316979885101318,grad_norm: 0.9999998611119788, iteration: 118591
loss: 1.0256977081298828,grad_norm: 0.9059984411785507, iteration: 118592
loss: 0.9956027865409851,grad_norm: 0.9999992265136514, iteration: 118593
loss: 0.9981874227523804,grad_norm: 0.9999990600203088, iteration: 118594
loss: 0.9959458112716675,grad_norm: 0.9999996486889995, iteration: 118595
loss: 1.1237967014312744,grad_norm: 0.9999998011405289, iteration: 118596
loss: 1.0129705667495728,grad_norm: 0.8339643716787145, iteration: 118597
loss: 1.0091488361358643,grad_norm: 0.8721608278456062, iteration: 118598
loss: 1.2770060300827026,grad_norm: 0.9999998393997391, iteration: 118599
loss: 1.0349842309951782,grad_norm: 0.9999993010601628, iteration: 118600
loss: 1.053571105003357,grad_norm: 0.9999991668180503, iteration: 118601
loss: 1.0638306140899658,grad_norm: 0.9999990558304035, iteration: 118602
loss: 1.0360068082809448,grad_norm: 0.9999991878907218, iteration: 118603
loss: 1.1235015392303467,grad_norm: 0.999999147223609, iteration: 118604
loss: 1.0186315774917603,grad_norm: 0.9338682079750453, iteration: 118605
loss: 1.0570895671844482,grad_norm: 0.9548852739773412, iteration: 118606
loss: 0.9900355339050293,grad_norm: 0.9999998071063438, iteration: 118607
loss: 0.9721395373344421,grad_norm: 0.9999990911566533, iteration: 118608
loss: 1.0005017518997192,grad_norm: 0.8041812546593109, iteration: 118609
loss: 0.9693955183029175,grad_norm: 0.9999993440234991, iteration: 118610
loss: 1.0297456979751587,grad_norm: 0.999999228986638, iteration: 118611
loss: 1.0802801847457886,grad_norm: 0.9999999532647229, iteration: 118612
loss: 1.2184282541275024,grad_norm: 0.9999997448382999, iteration: 118613
loss: 1.0815695524215698,grad_norm: 0.9999990320504356, iteration: 118614
loss: 1.1233775615692139,grad_norm: 0.9999998988116227, iteration: 118615
loss: 1.1443790197372437,grad_norm: 0.999999678902569, iteration: 118616
loss: 0.985605001449585,grad_norm: 0.904907586600866, iteration: 118617
loss: 1.1394306421279907,grad_norm: 1.0000000383575398, iteration: 118618
loss: 1.0635943412780762,grad_norm: 0.9561620761937303, iteration: 118619
loss: 1.1719331741333008,grad_norm: 0.9999992494605997, iteration: 118620
loss: 1.157726526260376,grad_norm: 0.9999995866845842, iteration: 118621
loss: 1.109054684638977,grad_norm: 0.9999998696555742, iteration: 118622
loss: 1.1251887083053589,grad_norm: 0.9999995192211037, iteration: 118623
loss: 1.1010103225708008,grad_norm: 1.0000000098892103, iteration: 118624
loss: 1.020666241645813,grad_norm: 0.9999996332504807, iteration: 118625
loss: 1.1587715148925781,grad_norm: 0.9999994389671616, iteration: 118626
loss: 1.0536000728607178,grad_norm: 0.9999997050491587, iteration: 118627
loss: 1.0111229419708252,grad_norm: 0.8571816974208821, iteration: 118628
loss: 1.0375128984451294,grad_norm: 0.9999997209948767, iteration: 118629
loss: 1.2326455116271973,grad_norm: 1.000000002364287, iteration: 118630
loss: 1.012256383895874,grad_norm: 0.9999992536154632, iteration: 118631
loss: 1.0747483968734741,grad_norm: 0.9999995229076699, iteration: 118632
loss: 1.130976676940918,grad_norm: 0.9999993395466262, iteration: 118633
loss: 1.0143531560897827,grad_norm: 0.9999996999426926, iteration: 118634
loss: 1.0975383520126343,grad_norm: 0.9999996367676836, iteration: 118635
loss: 1.0755615234375,grad_norm: 0.9999990427582195, iteration: 118636
loss: 1.0199562311172485,grad_norm: 0.9999994437178669, iteration: 118637
loss: 1.0520693063735962,grad_norm: 0.9999996825342864, iteration: 118638
loss: 1.0731602907180786,grad_norm: 0.9999997246391596, iteration: 118639
loss: 1.0280052423477173,grad_norm: 0.9999993751122427, iteration: 118640
loss: 1.0381529331207275,grad_norm: 0.9671511507282848, iteration: 118641
loss: 1.1296520233154297,grad_norm: 0.9999999011501466, iteration: 118642
loss: 1.0098824501037598,grad_norm: 0.9999996931201762, iteration: 118643
loss: 1.0434846878051758,grad_norm: 0.999999193753157, iteration: 118644
loss: 1.064996600151062,grad_norm: 0.9999992865600602, iteration: 118645
loss: 1.1823835372924805,grad_norm: 0.9999994106017339, iteration: 118646
loss: 1.1704665422439575,grad_norm: 0.9999998563457575, iteration: 118647
loss: 1.081475019454956,grad_norm: 0.9999996949004136, iteration: 118648
loss: 1.0432883501052856,grad_norm: 0.9999995846115345, iteration: 118649
loss: 1.0603524446487427,grad_norm: 0.9999997643571764, iteration: 118650
loss: 1.4885399341583252,grad_norm: 0.9999999019176361, iteration: 118651
loss: 1.1708496809005737,grad_norm: 0.9999993626422607, iteration: 118652
loss: 1.2724096775054932,grad_norm: 0.9999999393175742, iteration: 118653
loss: 1.083876371383667,grad_norm: 0.9999996402387246, iteration: 118654
loss: 1.2222744226455688,grad_norm: 0.9999999238629644, iteration: 118655
loss: 1.3213196992874146,grad_norm: 0.9999998402330338, iteration: 118656
loss: 1.0237327814102173,grad_norm: 0.9999993553948324, iteration: 118657
loss: 1.1088672876358032,grad_norm: 1.0000000023459077, iteration: 118658
loss: 1.123284101486206,grad_norm: 0.860892682506962, iteration: 118659
loss: 1.108682632446289,grad_norm: 0.9999999490614071, iteration: 118660
loss: 1.180121660232544,grad_norm: 0.9999998861476757, iteration: 118661
loss: 1.1260590553283691,grad_norm: 0.9999996206897335, iteration: 118662
loss: 1.0118680000305176,grad_norm: 0.9999996560687177, iteration: 118663
loss: 1.0589768886566162,grad_norm: 0.9999995217070182, iteration: 118664
loss: 1.2386839389801025,grad_norm: 0.9999997011234182, iteration: 118665
loss: 0.9874143600463867,grad_norm: 0.9999994277810814, iteration: 118666
loss: 1.1151623725891113,grad_norm: 0.9999999043831204, iteration: 118667
loss: 1.1011651754379272,grad_norm: 0.9999999719667934, iteration: 118668
loss: 1.1645041704177856,grad_norm: 0.9999998173454648, iteration: 118669
loss: 1.2102805376052856,grad_norm: 0.9999998025323835, iteration: 118670
loss: 1.2728089094161987,grad_norm: 0.9999998056682525, iteration: 118671
loss: 1.0717443227767944,grad_norm: 0.9999994691421595, iteration: 118672
loss: 1.0138214826583862,grad_norm: 0.9999990410487464, iteration: 118673
loss: 1.0790055990219116,grad_norm: 0.9999998050458161, iteration: 118674
loss: 1.0720850229263306,grad_norm: 0.9999995618162195, iteration: 118675
loss: 1.1832057237625122,grad_norm: 0.9999997488055328, iteration: 118676
loss: 1.2745267152786255,grad_norm: 0.9999997725930772, iteration: 118677
loss: 1.0086171627044678,grad_norm: 0.9999998636258026, iteration: 118678
loss: 1.0495147705078125,grad_norm: 0.9999997195516186, iteration: 118679
loss: 1.0892879962921143,grad_norm: 0.9999999084168826, iteration: 118680
loss: 1.1696715354919434,grad_norm: 0.9999998773408224, iteration: 118681
loss: 1.0448534488677979,grad_norm: 0.9999992159061291, iteration: 118682
loss: 1.0825073719024658,grad_norm: 0.9999997968567491, iteration: 118683
loss: 1.1618595123291016,grad_norm: 1.000000038880576, iteration: 118684
loss: 1.0314762592315674,grad_norm: 0.9999994363478296, iteration: 118685
loss: 1.1205452680587769,grad_norm: 0.9999999634601797, iteration: 118686
loss: 1.3117586374282837,grad_norm: 0.9999995543354269, iteration: 118687
loss: 1.022814393043518,grad_norm: 0.9999998466797132, iteration: 118688
loss: 1.0308245420455933,grad_norm: 0.9778919297764728, iteration: 118689
loss: 1.0694196224212646,grad_norm: 0.9999997191277019, iteration: 118690
loss: 1.1873416900634766,grad_norm: 0.9999995552450112, iteration: 118691
loss: 1.3256765604019165,grad_norm: 0.9999999996681808, iteration: 118692
loss: 1.1130658388137817,grad_norm: 0.9999993947413559, iteration: 118693
loss: 1.028513789176941,grad_norm: 0.9999992766114161, iteration: 118694
loss: 1.0723981857299805,grad_norm: 0.9999997248405119, iteration: 118695
loss: 1.0843573808670044,grad_norm: 0.9999998180192479, iteration: 118696
loss: 1.1839898824691772,grad_norm: 0.9999997592318707, iteration: 118697
loss: 1.1002486944198608,grad_norm: 0.9999990227313379, iteration: 118698
loss: 1.193156361579895,grad_norm: 0.9999995158100632, iteration: 118699
loss: 1.1487765312194824,grad_norm: 0.9999997619774396, iteration: 118700
loss: 1.0989502668380737,grad_norm: 0.9999997627709739, iteration: 118701
loss: 0.9587602019309998,grad_norm: 0.9999991784745251, iteration: 118702
loss: 1.1149331331253052,grad_norm: 0.9999992081174164, iteration: 118703
loss: 1.2950392961502075,grad_norm: 1.0000000389907318, iteration: 118704
loss: 0.9991989731788635,grad_norm: 0.9999994648975663, iteration: 118705
loss: 1.2148798704147339,grad_norm: 0.9999993721006596, iteration: 118706
loss: 1.0407559871673584,grad_norm: 0.9999994288060664, iteration: 118707
loss: 1.1585887670516968,grad_norm: 1.0000000234846191, iteration: 118708
loss: 1.0309351682662964,grad_norm: 0.8391529949711244, iteration: 118709
loss: 1.1924620866775513,grad_norm: 0.9999998462136022, iteration: 118710
loss: 1.235319972038269,grad_norm: 0.9999997815479433, iteration: 118711
loss: 1.0304893255233765,grad_norm: 0.9999997776896568, iteration: 118712
loss: 1.0793110132217407,grad_norm: 0.9999998415353957, iteration: 118713
loss: 0.9974393844604492,grad_norm: 0.9999992810872061, iteration: 118714
loss: 1.313530445098877,grad_norm: 0.9999999362085978, iteration: 118715
loss: 1.1546181440353394,grad_norm: 0.9999996568253745, iteration: 118716
loss: 1.0309393405914307,grad_norm: 0.9999998009235278, iteration: 118717
loss: 1.2512563467025757,grad_norm: 0.9999997681090051, iteration: 118718
loss: 1.1458351612091064,grad_norm: 0.9999997328126449, iteration: 118719
loss: 1.2296878099441528,grad_norm: 0.9999998041807849, iteration: 118720
loss: 1.0628986358642578,grad_norm: 0.9999996127905862, iteration: 118721
loss: 1.147360920906067,grad_norm: 0.9999997124798375, iteration: 118722
loss: 1.2585803270339966,grad_norm: 0.9999996480628415, iteration: 118723
loss: 1.1384384632110596,grad_norm: 0.9999996514696043, iteration: 118724
loss: 1.0322961807250977,grad_norm: 0.9999990577660767, iteration: 118725
loss: 1.2506473064422607,grad_norm: 0.9999997424740698, iteration: 118726
loss: 1.1808924674987793,grad_norm: 0.9999996862055716, iteration: 118727
loss: 1.0849249362945557,grad_norm: 0.9999999821733091, iteration: 118728
loss: 1.0536762475967407,grad_norm: 0.9999999641183662, iteration: 118729
loss: 1.1473817825317383,grad_norm: 0.9999999973652365, iteration: 118730
loss: 1.2246595621109009,grad_norm: 0.9999999271047655, iteration: 118731
loss: 1.1961251497268677,grad_norm: 1.0000000067560295, iteration: 118732
loss: 1.3054016828536987,grad_norm: 0.9999999488264195, iteration: 118733
loss: 1.4275315999984741,grad_norm: 0.9999999019418138, iteration: 118734
loss: 1.1931008100509644,grad_norm: 1.0000000611891495, iteration: 118735
loss: 1.3321805000305176,grad_norm: 1.0000000958727076, iteration: 118736
loss: 1.3324320316314697,grad_norm: 0.9999999771002601, iteration: 118737
loss: 1.2589162588119507,grad_norm: 0.9999997153283487, iteration: 118738
loss: 1.513860821723938,grad_norm: 0.9999999295513096, iteration: 118739
loss: 1.2046728134155273,grad_norm: 0.9999999257011608, iteration: 118740
loss: 1.5141946077346802,grad_norm: 0.9999999137063059, iteration: 118741
loss: 1.2801228761672974,grad_norm: 0.9999999807666847, iteration: 118742
loss: 1.3999114036560059,grad_norm: 1.0000000030515603, iteration: 118743
loss: 1.6418373584747314,grad_norm: 0.9999999421157777, iteration: 118744
loss: 1.4911863803863525,grad_norm: 0.9999999607552998, iteration: 118745
loss: 1.6329587697982788,grad_norm: 0.9999999339895611, iteration: 118746
loss: 1.4823442697525024,grad_norm: 1.0000000938454812, iteration: 118747
loss: 1.2372201681137085,grad_norm: 0.9999998942011107, iteration: 118748
loss: 1.4635528326034546,grad_norm: 0.9999999992612415, iteration: 118749
loss: 1.1678752899169922,grad_norm: 0.9999997675973018, iteration: 118750
loss: 1.1542069911956787,grad_norm: 0.9999999402992049, iteration: 118751
loss: 1.5516448020935059,grad_norm: 0.9999999817786726, iteration: 118752
loss: 1.4679969549179077,grad_norm: 0.999999996552686, iteration: 118753
loss: 1.2576087713241577,grad_norm: 0.9999999391776486, iteration: 118754
loss: 1.5109679698944092,grad_norm: 0.9999999728037956, iteration: 118755
loss: 1.2146856784820557,grad_norm: 0.9999998044955548, iteration: 118756
loss: 1.9190298318862915,grad_norm: 1.0000001007512669, iteration: 118757
loss: 1.401848316192627,grad_norm: 0.9999999308383206, iteration: 118758
loss: 1.675625205039978,grad_norm: 1.0000001083633958, iteration: 118759
loss: 1.510826826095581,grad_norm: 0.9999998924967277, iteration: 118760
loss: 1.376462459564209,grad_norm: 0.9999998624738321, iteration: 118761
loss: 1.3383924961090088,grad_norm: 0.9999998308266335, iteration: 118762
loss: 1.2427918910980225,grad_norm: 0.9999997899916501, iteration: 118763
loss: 1.1940690279006958,grad_norm: 0.9999996387573666, iteration: 118764
loss: 1.1863499879837036,grad_norm: 0.9999998542667218, iteration: 118765
loss: 1.3679176568984985,grad_norm: 0.9999998434969174, iteration: 118766
loss: 1.6079299449920654,grad_norm: 0.9999999551244078, iteration: 118767
loss: 1.4965555667877197,grad_norm: 0.999999970469982, iteration: 118768
loss: 1.3408936262130737,grad_norm: 0.9999999075195154, iteration: 118769
loss: 1.5874972343444824,grad_norm: 0.9999998838695655, iteration: 118770
loss: 1.3523449897766113,grad_norm: 0.9999998829416339, iteration: 118771
loss: 1.4884363412857056,grad_norm: 0.9999999819713206, iteration: 118772
loss: 1.8813282251358032,grad_norm: 0.9999998573827803, iteration: 118773
loss: 1.4535328149795532,grad_norm: 0.9999999116737931, iteration: 118774
loss: 1.5270984172821045,grad_norm: 1.0000000445197907, iteration: 118775
loss: 1.4179778099060059,grad_norm: 1.0000000192861735, iteration: 118776
loss: 1.5682814121246338,grad_norm: 0.9999998859370697, iteration: 118777
loss: 1.6312209367752075,grad_norm: 1.0000000182597415, iteration: 118778
loss: 1.4134284257888794,grad_norm: 0.9999998956729222, iteration: 118779
loss: 1.5755525827407837,grad_norm: 0.999999830375723, iteration: 118780
loss: 1.6574679613113403,grad_norm: 0.9999999032976059, iteration: 118781
loss: 1.4054497480392456,grad_norm: 0.9999999912459595, iteration: 118782
loss: 1.603568196296692,grad_norm: 1.000000026822752, iteration: 118783
loss: 1.777111530303955,grad_norm: 0.99999998906578, iteration: 118784
loss: 1.5744965076446533,grad_norm: 1.0000000522732218, iteration: 118785
loss: 1.5746413469314575,grad_norm: 0.999999863623495, iteration: 118786
loss: 1.5227727890014648,grad_norm: 0.999999970360269, iteration: 118787
loss: 1.8791712522506714,grad_norm: 0.9999999533103233, iteration: 118788
loss: 1.8498203754425049,grad_norm: 0.9999999582921606, iteration: 118789
loss: 1.6339244842529297,grad_norm: 0.9999999627258993, iteration: 118790
loss: 1.3701080083847046,grad_norm: 0.9999998232815748, iteration: 118791
loss: 1.612744927406311,grad_norm: 1.0000000124544248, iteration: 118792
loss: 1.5518943071365356,grad_norm: 1.0000000305773753, iteration: 118793
loss: 1.6544785499572754,grad_norm: 0.9999998953644371, iteration: 118794
loss: 2.011902332305908,grad_norm: 0.9999999293086779, iteration: 118795
loss: 1.7725551128387451,grad_norm: 0.9999998983617321, iteration: 118796
loss: 1.6112170219421387,grad_norm: 0.9999999473735619, iteration: 118797
loss: 1.5981082916259766,grad_norm: 1.0000000532947504, iteration: 118798
loss: 1.6917803287506104,grad_norm: 0.9999998942666443, iteration: 118799
loss: 1.7817658185958862,grad_norm: 0.9999998865445706, iteration: 118800
loss: 1.7589919567108154,grad_norm: 0.99999998038223, iteration: 118801
loss: 1.6409357786178589,grad_norm: 0.9999997910735179, iteration: 118802
loss: 1.6725364923477173,grad_norm: 0.9999999009127069, iteration: 118803
loss: 1.643491268157959,grad_norm: 0.9999999296571499, iteration: 118804
loss: 1.5145198106765747,grad_norm: 1.0000000212660725, iteration: 118805
loss: 1.8571807146072388,grad_norm: 0.9999999910420824, iteration: 118806
loss: 1.8876792192459106,grad_norm: 0.9999999577730877, iteration: 118807
loss: 1.8184869289398193,grad_norm: 1.0000000370083697, iteration: 118808
loss: 2.005537748336792,grad_norm: 0.9999999619482044, iteration: 118809
loss: 2.052642345428467,grad_norm: 0.9999999989733852, iteration: 118810
loss: 1.8130072355270386,grad_norm: 0.9999999625974162, iteration: 118811
loss: 1.8467283248901367,grad_norm: 1.00000006708934, iteration: 118812
loss: 2.219571590423584,grad_norm: 0.9999999571350745, iteration: 118813
loss: 1.6991078853607178,grad_norm: 0.9999999209341486, iteration: 118814
loss: 1.9970316886901855,grad_norm: 1.0000000156247997, iteration: 118815
loss: 1.6004629135131836,grad_norm: 0.999999986764501, iteration: 118816
loss: 2.0388245582580566,grad_norm: 0.9999999140085031, iteration: 118817
loss: 1.8127156496047974,grad_norm: 0.9999999841629533, iteration: 118818
loss: 2.0034422874450684,grad_norm: 0.9999999381657192, iteration: 118819
loss: 2.237572431564331,grad_norm: 0.999999890306081, iteration: 118820
loss: 2.109126329421997,grad_norm: 1.0000000311017752, iteration: 118821
loss: 2.3470606803894043,grad_norm: 0.9999999989055176, iteration: 118822
loss: 2.190765857696533,grad_norm: 1.0000000003502145, iteration: 118823
loss: 2.045609712600708,grad_norm: 0.9999999641987243, iteration: 118824
loss: 1.920823574066162,grad_norm: 1.000000029166574, iteration: 118825
loss: 1.8922374248504639,grad_norm: 1.000000003408405, iteration: 118826
loss: 2.15183687210083,grad_norm: 1.0000000062481187, iteration: 118827
loss: 1.7302166223526,grad_norm: 0.9999999009603455, iteration: 118828
loss: 1.7932919263839722,grad_norm: 1.0000000915807847, iteration: 118829
loss: 1.949821949005127,grad_norm: 1.0000000167721705, iteration: 118830
loss: 1.9173098802566528,grad_norm: 0.9999999135159421, iteration: 118831
loss: 2.09429669380188,grad_norm: 1.0000000565888743, iteration: 118832
loss: 1.969387173652649,grad_norm: 0.9999999388984504, iteration: 118833
loss: 2.40458083152771,grad_norm: 0.9999999709767189, iteration: 118834
loss: 1.8164329528808594,grad_norm: 1.0000000192902734, iteration: 118835
loss: 1.8856314420700073,grad_norm: 0.9999999696993487, iteration: 118836
loss: 2.028700590133667,grad_norm: 0.9999999660350657, iteration: 118837
loss: 2.2336716651916504,grad_norm: 1.0000000179728255, iteration: 118838
loss: 2.549314498901367,grad_norm: 1.0000000224325252, iteration: 118839
loss: 2.095439910888672,grad_norm: 1.0000000315967617, iteration: 118840
loss: 1.797253131866455,grad_norm: 1.0000000068901893, iteration: 118841
loss: 1.6594291925430298,grad_norm: 0.9999999843706024, iteration: 118842
loss: 1.9909685850143433,grad_norm: 1.0000000728872014, iteration: 118843
loss: 1.9529011249542236,grad_norm: 0.999999921631877, iteration: 118844
loss: 2.2781176567077637,grad_norm: 0.9999999364064486, iteration: 118845
loss: 2.004753589630127,grad_norm: 0.9999999451611886, iteration: 118846
loss: 1.7317849397659302,grad_norm: 0.9999998329449094, iteration: 118847
loss: 1.825347900390625,grad_norm: 0.9999999489642115, iteration: 118848
loss: 1.991489291191101,grad_norm: 1.0000000409708965, iteration: 118849
loss: 2.047680377960205,grad_norm: 0.9999999105802794, iteration: 118850
loss: 1.7555087804794312,grad_norm: 0.9999999929158643, iteration: 118851
loss: 1.724165916442871,grad_norm: 0.9999999710378046, iteration: 118852
loss: 1.7068779468536377,grad_norm: 0.99999990067823, iteration: 118853
loss: 1.9119904041290283,grad_norm: 0.9999999570327225, iteration: 118854
loss: 2.0666370391845703,grad_norm: 0.9999999488144838, iteration: 118855
loss: 1.9455695152282715,grad_norm: 0.9999999985624164, iteration: 118856
loss: 1.7666871547698975,grad_norm: 0.9999999382112461, iteration: 118857
loss: 2.059931755065918,grad_norm: 0.9999998830690693, iteration: 118858
loss: 1.9537321329116821,grad_norm: 1.0000000015668298, iteration: 118859
loss: 1.851538896560669,grad_norm: 0.9999999934027499, iteration: 118860
loss: 1.9247277975082397,grad_norm: 1.0000000576950736, iteration: 118861
loss: 1.927173137664795,grad_norm: 1.0000001181417175, iteration: 118862
loss: 1.790442705154419,grad_norm: 1.0000000580752704, iteration: 118863
loss: 1.8864444494247437,grad_norm: 0.9999999925318368, iteration: 118864
loss: 2.2779147624969482,grad_norm: 0.999999994711489, iteration: 118865
loss: 1.6813863515853882,grad_norm: 0.999999959889113, iteration: 118866
loss: 1.5919157266616821,grad_norm: 1.0000000300748166, iteration: 118867
loss: 1.5304964780807495,grad_norm: 1.0000000851851147, iteration: 118868
loss: 1.9040266275405884,grad_norm: 1.000000088615469, iteration: 118869
loss: 1.6666568517684937,grad_norm: 1.0000000139991172, iteration: 118870
loss: 1.7658170461654663,grad_norm: 0.9999999307627768, iteration: 118871
loss: 1.5882625579833984,grad_norm: 0.9999999784940983, iteration: 118872
loss: 1.6839780807495117,grad_norm: 0.999999858351601, iteration: 118873
loss: 1.634626865386963,grad_norm: 1.000000026971539, iteration: 118874
loss: 1.6017303466796875,grad_norm: 0.9999998653131902, iteration: 118875
loss: 1.9591602087020874,grad_norm: 1.00000000689262, iteration: 118876
loss: 1.677314043045044,grad_norm: 1.0000000728221319, iteration: 118877
loss: 1.90090811252594,grad_norm: 0.9999999488243021, iteration: 118878
loss: 2.3823862075805664,grad_norm: 0.9999999739324349, iteration: 118879
loss: 2.0264065265655518,grad_norm: 0.9999999492924518, iteration: 118880
loss: 1.9355164766311646,grad_norm: 0.9999998967090619, iteration: 118881
loss: 2.0071702003479004,grad_norm: 1.0000000082115488, iteration: 118882
loss: 2.2125473022460938,grad_norm: 1.0000000857158775, iteration: 118883
loss: 1.9051718711853027,grad_norm: 0.9999999256217884, iteration: 118884
loss: 1.828338623046875,grad_norm: 0.9999998799654778, iteration: 118885
loss: 1.7968639135360718,grad_norm: 1.000000010882859, iteration: 118886
loss: 2.0495119094848633,grad_norm: 1.0000000116860626, iteration: 118887
loss: 1.9703031778335571,grad_norm: 0.9999998885750264, iteration: 118888
loss: 2.1102116107940674,grad_norm: 1.000000010226905, iteration: 118889
loss: 1.9166651964187622,grad_norm: 1.000000002922642, iteration: 118890
loss: 2.065622091293335,grad_norm: 0.9999999575732637, iteration: 118891
loss: 2.4226343631744385,grad_norm: 0.9999999732556464, iteration: 118892
loss: 2.0823400020599365,grad_norm: 0.9999999068446632, iteration: 118893
loss: 2.2834525108337402,grad_norm: 1.0000000386639734, iteration: 118894
loss: 1.981134057044983,grad_norm: 0.999999938262029, iteration: 118895
loss: 1.8531824350357056,grad_norm: 0.9999999657963469, iteration: 118896
loss: 1.9467204809188843,grad_norm: 0.9999999455034048, iteration: 118897
loss: 1.8507426977157593,grad_norm: 1.0000000615912368, iteration: 118898
loss: 2.1397833824157715,grad_norm: 1.0000000491088243, iteration: 118899
loss: 1.9777805805206299,grad_norm: 1.000000005765454, iteration: 118900
loss: 1.764412760734558,grad_norm: 0.9999998631477133, iteration: 118901
loss: 1.8710813522338867,grad_norm: 0.9999998660340661, iteration: 118902
loss: 1.471579909324646,grad_norm: 1.0000000074001858, iteration: 118903
loss: 1.6913084983825684,grad_norm: 1.0000000540535734, iteration: 118904
loss: 2.1754021644592285,grad_norm: 0.9999999586006856, iteration: 118905
loss: 1.8839056491851807,grad_norm: 0.9999999950538808, iteration: 118906
loss: 1.8175833225250244,grad_norm: 0.9999999736933957, iteration: 118907
loss: 1.901837944984436,grad_norm: 0.9999999803206195, iteration: 118908
loss: 1.882283091545105,grad_norm: 0.9999998818765623, iteration: 118909
loss: 2.007349967956543,grad_norm: 1.0000000061610232, iteration: 118910
loss: 2.0796518325805664,grad_norm: 0.9999999570148965, iteration: 118911
loss: 1.6500787734985352,grad_norm: 1.0000000608493638, iteration: 118912
loss: 2.0062453746795654,grad_norm: 0.9999999630888817, iteration: 118913
loss: 1.7107759714126587,grad_norm: 0.9999999633266546, iteration: 118914
loss: 2.218317985534668,grad_norm: 1.0000000754480496, iteration: 118915
loss: 1.7313487529754639,grad_norm: 0.9999998629663442, iteration: 118916
loss: 2.101146697998047,grad_norm: 0.9999997689312854, iteration: 118917
loss: 2.0562384128570557,grad_norm: 1.0000000588776026, iteration: 118918
loss: 1.8092573881149292,grad_norm: 0.9999999025780889, iteration: 118919
loss: 1.7251262664794922,grad_norm: 0.9999999812739335, iteration: 118920
loss: 1.728634238243103,grad_norm: 0.9999999420766366, iteration: 118921
loss: 1.9267479181289673,grad_norm: 0.999999904522373, iteration: 118922
loss: 2.13504695892334,grad_norm: 1.0000000082952691, iteration: 118923
loss: 2.043455123901367,grad_norm: 0.9999999758318558, iteration: 118924
loss: 1.5644339323043823,grad_norm: 1.0000000080394833, iteration: 118925
loss: 1.8193856477737427,grad_norm: 0.9999999473640077, iteration: 118926
loss: 1.95201575756073,grad_norm: 0.9999999487490123, iteration: 118927
loss: 2.0859127044677734,grad_norm: 0.9999998791604923, iteration: 118928
loss: 1.8009916543960571,grad_norm: 0.9999999618057973, iteration: 118929
loss: 2.0260908603668213,grad_norm: 1.0000000799387065, iteration: 118930
loss: 1.909407615661621,grad_norm: 0.9999998662963513, iteration: 118931
loss: 1.8912346363067627,grad_norm: 0.9999999026086693, iteration: 118932
loss: 1.9333654642105103,grad_norm: 0.9999999829579165, iteration: 118933
loss: 1.619271993637085,grad_norm: 0.9999999902838603, iteration: 118934
loss: 1.5583839416503906,grad_norm: 0.999999967392216, iteration: 118935
loss: 2.1082518100738525,grad_norm: 1.0000000677053529, iteration: 118936
loss: 1.6880152225494385,grad_norm: 0.9999998650634301, iteration: 118937
loss: 1.726579189300537,grad_norm: 0.9999999973515492, iteration: 118938
loss: 1.8373212814331055,grad_norm: 0.9999999082646653, iteration: 118939
loss: 1.9674814939498901,grad_norm: 0.9999998492693982, iteration: 118940
loss: 1.9755833148956299,grad_norm: 1.0000000971926268, iteration: 118941
loss: 1.5861092805862427,grad_norm: 0.9999998641962923, iteration: 118942
loss: 1.814592719078064,grad_norm: 0.999999920220113, iteration: 118943
loss: 1.584722638130188,grad_norm: 0.9999999988966062, iteration: 118944
loss: 1.8807053565979004,grad_norm: 0.9999999206760923, iteration: 118945
loss: 1.9672191143035889,grad_norm: 0.9999999478177372, iteration: 118946
loss: 1.7305351495742798,grad_norm: 0.9999998841087503, iteration: 118947
loss: 1.6959832906723022,grad_norm: 0.9999999359338175, iteration: 118948
loss: 1.9058340787887573,grad_norm: 0.9999999534966215, iteration: 118949
loss: 1.9165253639221191,grad_norm: 0.9999999259322251, iteration: 118950
loss: 1.7210915088653564,grad_norm: 0.9999999215379376, iteration: 118951
loss: 1.789400577545166,grad_norm: 0.9999999655216936, iteration: 118952
loss: 1.751542091369629,grad_norm: 1.0000000204681823, iteration: 118953
loss: 1.753414273262024,grad_norm: 0.9999999679589997, iteration: 118954
loss: 2.152564287185669,grad_norm: 1.0000000015529809, iteration: 118955
loss: 1.5556738376617432,grad_norm: 0.9999998539229943, iteration: 118956
loss: 1.7819938659667969,grad_norm: 0.9999999632918571, iteration: 118957
loss: 1.9250856637954712,grad_norm: 0.9999999508103197, iteration: 118958
loss: 1.7425873279571533,grad_norm: 0.999999925528879, iteration: 118959
loss: 2.013995885848999,grad_norm: 0.9999999580408739, iteration: 118960
loss: 1.7015498876571655,grad_norm: 0.999999913806919, iteration: 118961
loss: 1.9926362037658691,grad_norm: 0.9999999051898003, iteration: 118962
loss: 2.0585803985595703,grad_norm: 0.9999999747809658, iteration: 118963
loss: 2.1941702365875244,grad_norm: 0.9999999921385383, iteration: 118964
loss: 2.0411038398742676,grad_norm: 1.0000000114992007, iteration: 118965
loss: 1.9264189004898071,grad_norm: 0.9999999319464131, iteration: 118966
loss: 2.31099009513855,grad_norm: 1.0000000628158232, iteration: 118967
loss: 2.0258758068084717,grad_norm: 0.9999999167012934, iteration: 118968
loss: 1.9585442543029785,grad_norm: 0.9999998509261008, iteration: 118969
loss: 1.4680359363555908,grad_norm: 0.9999999448805132, iteration: 118970
loss: 1.635094404220581,grad_norm: 0.9999999912685189, iteration: 118971
loss: 1.8018747568130493,grad_norm: 1.000000036313095, iteration: 118972
loss: 2.0967953205108643,grad_norm: 0.999999914098492, iteration: 118973
loss: 1.892139196395874,grad_norm: 1.0000000325399645, iteration: 118974
loss: 1.9189292192459106,grad_norm: 0.999999974998624, iteration: 118975
loss: 1.8548481464385986,grad_norm: 0.999999957059706, iteration: 118976
loss: 1.9493786096572876,grad_norm: 0.9999998751139391, iteration: 118977
loss: 1.7223854064941406,grad_norm: 0.9999998734535052, iteration: 118978
loss: 1.9106570482254028,grad_norm: 1.0000000806846019, iteration: 118979
loss: 1.679714560508728,grad_norm: 0.9999999624129593, iteration: 118980
loss: 2.2622787952423096,grad_norm: 1.000000030436775, iteration: 118981
loss: 1.9083267450332642,grad_norm: 0.9999997816159116, iteration: 118982
loss: 2.1329379081726074,grad_norm: 1.000000011884034, iteration: 118983
loss: 1.8867108821868896,grad_norm: 0.9999998800319971, iteration: 118984
loss: 1.873244285583496,grad_norm: 0.9999999920324675, iteration: 118985
loss: 2.034411907196045,grad_norm: 0.9999999531040717, iteration: 118986
loss: 2.0729470252990723,grad_norm: 0.9999999791811692, iteration: 118987
loss: 1.8231881856918335,grad_norm: 0.9999999075342805, iteration: 118988
loss: 1.7525588274002075,grad_norm: 1.0000000071613575, iteration: 118989
loss: 2.0503551959991455,grad_norm: 0.9999999414465031, iteration: 118990
loss: 1.7553144693374634,grad_norm: 0.9999998762851282, iteration: 118991
loss: 1.9963916540145874,grad_norm: 0.9999999491594066, iteration: 118992
loss: 2.3039350509643555,grad_norm: 0.9999998301125684, iteration: 118993
loss: 1.8325092792510986,grad_norm: 0.9999999644304266, iteration: 118994
loss: 1.5845296382904053,grad_norm: 1.0000000585866837, iteration: 118995
loss: 2.0857324600219727,grad_norm: 0.9999998504882838, iteration: 118996
loss: 2.2809078693389893,grad_norm: 0.9999999223463758, iteration: 118997
loss: 2.0067200660705566,grad_norm: 1.0000000332604344, iteration: 118998
loss: 2.0426251888275146,grad_norm: 0.9999999969757124, iteration: 118999
loss: 1.9222772121429443,grad_norm: 0.9999999703222072, iteration: 119000
loss: 1.6824963092803955,grad_norm: 0.999999936338977, iteration: 119001
loss: 1.7364404201507568,grad_norm: 0.9999999064619198, iteration: 119002
loss: 1.6851364374160767,grad_norm: 0.9999999626946048, iteration: 119003
loss: 2.1793787479400635,grad_norm: 0.999999997130099, iteration: 119004
loss: 2.073425531387329,grad_norm: 0.9999999471090784, iteration: 119005
loss: 1.8580515384674072,grad_norm: 1.0000000733172973, iteration: 119006
loss: 1.9828544855117798,grad_norm: 0.9999999102599092, iteration: 119007
loss: 1.907545566558838,grad_norm: 0.9999998611213488, iteration: 119008
loss: 2.160118341445923,grad_norm: 1.0000000367427817, iteration: 119009
loss: 1.898187518119812,grad_norm: 0.999999965941104, iteration: 119010
loss: 1.8410634994506836,grad_norm: 0.9999999547707722, iteration: 119011
loss: 1.9007210731506348,grad_norm: 0.9999999473548752, iteration: 119012
loss: 2.1647000312805176,grad_norm: 0.9999999060362276, iteration: 119013
loss: 2.161623954772949,grad_norm: 1.000000050744064, iteration: 119014
loss: 2.236266613006592,grad_norm: 0.9999999143073142, iteration: 119015
loss: 2.0011789798736572,grad_norm: 1.0000000080372575, iteration: 119016
loss: 1.7789078950881958,grad_norm: 1.00000002203852, iteration: 119017
loss: 1.7011933326721191,grad_norm: 1.0000000060026133, iteration: 119018
loss: 1.9080909490585327,grad_norm: 0.9999999208389664, iteration: 119019
loss: 1.977126955986023,grad_norm: 1.0000000616334495, iteration: 119020
loss: 2.280698299407959,grad_norm: 0.9999998943071466, iteration: 119021
loss: 2.0653960704803467,grad_norm: 1.000000057110573, iteration: 119022
loss: 1.949078917503357,grad_norm: 0.9999999150791833, iteration: 119023
loss: 2.2187860012054443,grad_norm: 0.999999949967246, iteration: 119024
loss: 1.83403480052948,grad_norm: 1.0000000020140032, iteration: 119025
loss: 2.252030611038208,grad_norm: 0.9999999356645537, iteration: 119026
loss: 2.102815866470337,grad_norm: 1.000000071398638, iteration: 119027
loss: 2.392019033432007,grad_norm: 1.0000000342705135, iteration: 119028
loss: 1.949184775352478,grad_norm: 0.9999999120055464, iteration: 119029
loss: 2.1094863414764404,grad_norm: 1.0000000026988274, iteration: 119030
loss: 2.12207293510437,grad_norm: 0.999999900295746, iteration: 119031
loss: 1.9268959760665894,grad_norm: 0.9999998792336736, iteration: 119032
loss: 1.956958532333374,grad_norm: 0.9999999721011837, iteration: 119033
loss: 1.8916422128677368,grad_norm: 0.9999999621994718, iteration: 119034
loss: 2.0807347297668457,grad_norm: 0.999999936813506, iteration: 119035
loss: 2.2887990474700928,grad_norm: 0.9999998296178482, iteration: 119036
loss: 2.1263742446899414,grad_norm: 0.9999999784320227, iteration: 119037
loss: 2.0214433670043945,grad_norm: 0.9999998101957456, iteration: 119038
loss: 2.086470127105713,grad_norm: 0.9999999095040075, iteration: 119039
loss: 2.50915789604187,grad_norm: 0.9999999171034881, iteration: 119040
loss: 2.0273327827453613,grad_norm: 0.9999999732071485, iteration: 119041
loss: 2.240884304046631,grad_norm: 0.999999964269246, iteration: 119042
loss: 1.9049172401428223,grad_norm: 1.0000000056420955, iteration: 119043
loss: 2.0175769329071045,grad_norm: 0.9999999891323096, iteration: 119044
loss: 1.9452593326568604,grad_norm: 0.9999999559849606, iteration: 119045
loss: 2.4073734283447266,grad_norm: 0.9999998965317429, iteration: 119046
loss: 1.9298676252365112,grad_norm: 0.9999999709292162, iteration: 119047
loss: 2.296374559402466,grad_norm: 1.0000000210457305, iteration: 119048
loss: 2.3331217765808105,grad_norm: 1.0000001237176293, iteration: 119049
loss: 1.8667820692062378,grad_norm: 0.9999998696136909, iteration: 119050
loss: 1.810611605644226,grad_norm: 0.9999998343042497, iteration: 119051
loss: 2.309368371963501,grad_norm: 0.9999999409321934, iteration: 119052
loss: 2.4557223320007324,grad_norm: 0.999999990861888, iteration: 119053
loss: 2.2012863159179688,grad_norm: 0.9999999160952905, iteration: 119054
loss: 2.5151960849761963,grad_norm: 1.000000006614509, iteration: 119055
loss: 2.3925018310546875,grad_norm: 0.9999998961610592, iteration: 119056
loss: 2.3977420330047607,grad_norm: 0.9999999086505836, iteration: 119057
loss: 2.020251750946045,grad_norm: 0.9999998840270033, iteration: 119058
loss: 2.3105783462524414,grad_norm: 1.0000000003473677, iteration: 119059
loss: 2.0609371662139893,grad_norm: 1.0000000020155355, iteration: 119060
loss: 2.352074146270752,grad_norm: 0.9999998711611163, iteration: 119061
loss: 2.020850658416748,grad_norm: 0.9999999101163097, iteration: 119062
loss: 2.331789493560791,grad_norm: 0.9999999553726235, iteration: 119063
loss: 1.9978036880493164,grad_norm: 1.0000000177414217, iteration: 119064
loss: 2.243608236312866,grad_norm: 1.0000000106980265, iteration: 119065
loss: 2.3276724815368652,grad_norm: 0.9999999836687512, iteration: 119066
loss: 2.2747931480407715,grad_norm: 0.999999927652864, iteration: 119067
loss: 1.8021457195281982,grad_norm: 0.9999999499197606, iteration: 119068
loss: 2.1378777027130127,grad_norm: 0.9999999383497208, iteration: 119069
loss: 1.8772083520889282,grad_norm: 0.9999998673203552, iteration: 119070
loss: 2.2804410457611084,grad_norm: 1.0000000016098705, iteration: 119071
loss: 1.7682558298110962,grad_norm: 0.9999998270832222, iteration: 119072
loss: 2.0678093433380127,grad_norm: 1.0000000078045646, iteration: 119073
loss: 1.9216713905334473,grad_norm: 0.9999998892663942, iteration: 119074
loss: 1.5520647764205933,grad_norm: 0.9999998895066011, iteration: 119075
loss: 1.893257975578308,grad_norm: 0.9999999362235404, iteration: 119076
loss: 1.8275855779647827,grad_norm: 0.9999999657955425, iteration: 119077
loss: 2.1310231685638428,grad_norm: 0.9999998542203975, iteration: 119078
loss: 1.7901808023452759,grad_norm: 0.9999999588318559, iteration: 119079
loss: 1.6645238399505615,grad_norm: 0.9999999238223846, iteration: 119080
loss: 2.198566436767578,grad_norm: 0.9999999183383342, iteration: 119081
loss: 2.0353753566741943,grad_norm: 0.9999999631698726, iteration: 119082
loss: 1.855489730834961,grad_norm: 1.0000000155651616, iteration: 119083
loss: 2.0239808559417725,grad_norm: 1.0000000464330456, iteration: 119084
loss: 2.130843162536621,grad_norm: 0.9999999578722426, iteration: 119085
loss: 2.3526580333709717,grad_norm: 0.9999999017652627, iteration: 119086
loss: 2.0395467281341553,grad_norm: 0.9999998887391776, iteration: 119087
loss: 1.7554101943969727,grad_norm: 0.9999999200087473, iteration: 119088
loss: 1.8164154291152954,grad_norm: 0.9999998525632553, iteration: 119089
loss: 1.9893282651901245,grad_norm: 1.0000000443358381, iteration: 119090
loss: 1.9627330303192139,grad_norm: 0.9999999216999002, iteration: 119091
loss: 2.0175583362579346,grad_norm: 0.9999999522358642, iteration: 119092
loss: 1.9417357444763184,grad_norm: 1.0000000077323674, iteration: 119093
loss: 1.9620400667190552,grad_norm: 0.999999935146339, iteration: 119094
loss: 1.8647947311401367,grad_norm: 0.9999998874885345, iteration: 119095
loss: 1.9184335470199585,grad_norm: 0.9999998543810696, iteration: 119096
loss: 1.8781983852386475,grad_norm: 0.9999999655664222, iteration: 119097
loss: 1.8861280679702759,grad_norm: 0.9999999482881549, iteration: 119098
loss: 1.7663108110427856,grad_norm: 1.0000000284736703, iteration: 119099
loss: 1.833618402481079,grad_norm: 0.9999998685116323, iteration: 119100
loss: 2.1020336151123047,grad_norm: 1.000000004137513, iteration: 119101
loss: 1.7402180433273315,grad_norm: 0.9999999577945606, iteration: 119102
loss: 1.6378711462020874,grad_norm: 0.9999998649176792, iteration: 119103
loss: 1.9523394107818604,grad_norm: 0.9999998407786478, iteration: 119104
loss: 2.123232126235962,grad_norm: 0.999999998053094, iteration: 119105
loss: 1.7367397546768188,grad_norm: 0.9999998925643857, iteration: 119106
loss: 1.7034721374511719,grad_norm: 0.9999998868163239, iteration: 119107
loss: 1.8737648725509644,grad_norm: 1.0000000376330027, iteration: 119108
loss: 1.8802882432937622,grad_norm: 0.9999999519410112, iteration: 119109
loss: 1.8353437185287476,grad_norm: 0.9999997446474868, iteration: 119110
loss: 1.8118869066238403,grad_norm: 0.9999999841738676, iteration: 119111
loss: 2.0100486278533936,grad_norm: 0.9999999268155136, iteration: 119112
loss: 1.7411001920700073,grad_norm: 0.9999999361311336, iteration: 119113
loss: 1.700810194015503,grad_norm: 1.0000000170622254, iteration: 119114
loss: 1.6939808130264282,grad_norm: 0.99999989609013, iteration: 119115
loss: 1.7400274276733398,grad_norm: 0.9999999062168388, iteration: 119116
loss: 1.5672967433929443,grad_norm: 0.9999999139567444, iteration: 119117
loss: 1.9031025171279907,grad_norm: 0.9999998956529997, iteration: 119118
loss: 1.8612710237503052,grad_norm: 0.9999998206415412, iteration: 119119
loss: 1.6988441944122314,grad_norm: 0.9999998926137146, iteration: 119120
loss: 2.008044481277466,grad_norm: 0.9999999414697347, iteration: 119121
loss: 1.9825760126113892,grad_norm: 0.9999999481663074, iteration: 119122
loss: 1.9332411289215088,grad_norm: 0.999999900320621, iteration: 119123
loss: 1.6837257146835327,grad_norm: 0.9999999973867016, iteration: 119124
loss: 1.9180817604064941,grad_norm: 0.9999999655567181, iteration: 119125
loss: 1.9251638650894165,grad_norm: 0.9999999576070399, iteration: 119126
loss: 1.5646203756332397,grad_norm: 0.9999999123238088, iteration: 119127
loss: 1.782993197441101,grad_norm: 0.9999998546743327, iteration: 119128
loss: 1.8179593086242676,grad_norm: 0.9999998115980082, iteration: 119129
loss: 2.1311254501342773,grad_norm: 0.9999999625850696, iteration: 119130
loss: 1.553682565689087,grad_norm: 1.000000082261236, iteration: 119131
loss: 1.6229770183563232,grad_norm: 0.9999999949624869, iteration: 119132
loss: 2.021024465560913,grad_norm: 0.9999999016276078, iteration: 119133
loss: 1.6173003911972046,grad_norm: 0.9999998554465493, iteration: 119134
loss: 1.6620879173278809,grad_norm: 0.9999999691678375, iteration: 119135
loss: 1.8041210174560547,grad_norm: 0.9999998728175146, iteration: 119136
loss: 1.3388484716415405,grad_norm: 0.9999999701012572, iteration: 119137
loss: 2.196377992630005,grad_norm: 0.9999998501467995, iteration: 119138
loss: 1.758313536643982,grad_norm: 1.000000118418819, iteration: 119139
loss: 1.8012864589691162,grad_norm: 0.999999862431831, iteration: 119140
loss: 1.8607230186462402,grad_norm: 0.9999999564044725, iteration: 119141
loss: 1.8494349718093872,grad_norm: 1.0000000640999243, iteration: 119142
loss: 2.1074156761169434,grad_norm: 1.0000000094967676, iteration: 119143
loss: 1.6757779121398926,grad_norm: 0.9999999677264046, iteration: 119144
loss: 1.9324311017990112,grad_norm: 1.0000000835349714, iteration: 119145
loss: 1.5719068050384521,grad_norm: 0.9999999471822529, iteration: 119146
loss: 1.5120303630828857,grad_norm: 0.9999999285003313, iteration: 119147
loss: 1.6203385591506958,grad_norm: 0.9999999050938692, iteration: 119148
loss: 1.5404136180877686,grad_norm: 1.0000000278000611, iteration: 119149
loss: 1.9641183614730835,grad_norm: 0.9999999554175268, iteration: 119150
loss: 1.591787338256836,grad_norm: 0.9999998720083921, iteration: 119151
loss: 1.7189608812332153,grad_norm: 0.9999998722376914, iteration: 119152
loss: 1.490522861480713,grad_norm: 0.9999999672861131, iteration: 119153
loss: 1.5061227083206177,grad_norm: 0.9999999443076499, iteration: 119154
loss: 1.3993903398513794,grad_norm: 0.9999999996672363, iteration: 119155
loss: 1.8877105712890625,grad_norm: 1.0000000014644632, iteration: 119156
loss: 1.5985078811645508,grad_norm: 0.9999999653780491, iteration: 119157
loss: 1.566150426864624,grad_norm: 0.9999999443220028, iteration: 119158
loss: 1.551375150680542,grad_norm: 0.9999997508095023, iteration: 119159
loss: 1.6133909225463867,grad_norm: 0.9999999413226727, iteration: 119160
loss: 1.7272213697433472,grad_norm: 0.9999998441688327, iteration: 119161
loss: 1.497542142868042,grad_norm: 0.9999998973029217, iteration: 119162
loss: 1.6607612371444702,grad_norm: 0.9999999194461392, iteration: 119163
loss: 1.558885931968689,grad_norm: 0.999999861049346, iteration: 119164
loss: 1.511518955230713,grad_norm: 0.9999999219925597, iteration: 119165
loss: 2.0782642364501953,grad_norm: 0.9999999749664766, iteration: 119166
loss: 2.0363872051239014,grad_norm: 0.9999999001735007, iteration: 119167
loss: 1.5459955930709839,grad_norm: 0.99999992330864, iteration: 119168
loss: 1.96933114528656,grad_norm: 0.9999999310121537, iteration: 119169
loss: 1.6614965200424194,grad_norm: 0.9999999209100627, iteration: 119170
loss: 1.535102367401123,grad_norm: 0.9999999385678134, iteration: 119171
loss: 1.9332287311553955,grad_norm: 0.999999971514781, iteration: 119172
loss: 1.862031102180481,grad_norm: 0.9999999232563482, iteration: 119173
loss: 1.7489382028579712,grad_norm: 0.9999999569088251, iteration: 119174
loss: 2.209153890609741,grad_norm: 0.9999999618070389, iteration: 119175
loss: 1.7704448699951172,grad_norm: 1.000000002247906, iteration: 119176
loss: 2.3222076892852783,grad_norm: 0.9999999592865435, iteration: 119177
loss: 1.7268823385238647,grad_norm: 0.9999999961439947, iteration: 119178
loss: 2.052989959716797,grad_norm: 0.9999999159951926, iteration: 119179
loss: 1.686453104019165,grad_norm: 1.000000025841535, iteration: 119180
loss: 1.8923654556274414,grad_norm: 1.0000000126352124, iteration: 119181
loss: 2.0198075771331787,grad_norm: 1.000000015722097, iteration: 119182
loss: 1.576836109161377,grad_norm: 0.9999999899504703, iteration: 119183
loss: 1.7154114246368408,grad_norm: 0.9999998963341874, iteration: 119184
loss: 1.8695701360702515,grad_norm: 0.9999999564573947, iteration: 119185
loss: 1.396875262260437,grad_norm: 0.9999998659431456, iteration: 119186
loss: 2.4848310947418213,grad_norm: 1.0000000605701176, iteration: 119187
loss: 1.8521591424942017,grad_norm: 0.9999998230509143, iteration: 119188
loss: 1.6576755046844482,grad_norm: 0.9999999433786678, iteration: 119189
loss: 1.846793293952942,grad_norm: 1.000000016402679, iteration: 119190
loss: 1.6359423398971558,grad_norm: 0.9999999904971294, iteration: 119191
loss: 1.7231425046920776,grad_norm: 1.0000000314790984, iteration: 119192
loss: 1.9362050294876099,grad_norm: 0.9999998843107518, iteration: 119193
loss: 1.8502249717712402,grad_norm: 1.000000030929858, iteration: 119194
loss: 1.6217795610427856,grad_norm: 0.999999888664309, iteration: 119195
loss: 1.8833439350128174,grad_norm: 0.9999999733576309, iteration: 119196
loss: 2.0199692249298096,grad_norm: 1.0000000263576356, iteration: 119197
loss: 2.068122148513794,grad_norm: 0.9999999767689954, iteration: 119198
loss: 1.680652141571045,grad_norm: 1.000000037927327, iteration: 119199
loss: 1.5686577558517456,grad_norm: 0.9999998824761838, iteration: 119200
loss: 1.7458434104919434,grad_norm: 1.0000000500347954, iteration: 119201
loss: 1.7756952047348022,grad_norm: 0.9999999266206945, iteration: 119202
loss: 1.9079281091690063,grad_norm: 1.000000054910796, iteration: 119203
loss: 1.945816159248352,grad_norm: 1.0000000983385615, iteration: 119204
loss: 1.9000086784362793,grad_norm: 0.9999998858193446, iteration: 119205
loss: 2.2071516513824463,grad_norm: 0.9999999011881185, iteration: 119206
loss: 2.4722697734832764,grad_norm: 0.9999999948744533, iteration: 119207
loss: 1.8099632263183594,grad_norm: 1.0000000475000346, iteration: 119208
loss: 1.7708686590194702,grad_norm: 0.9999998525094027, iteration: 119209
loss: 1.5633578300476074,grad_norm: 0.9999999806993877, iteration: 119210
loss: 1.740383267402649,grad_norm: 0.9999998819850495, iteration: 119211
loss: 1.526126742362976,grad_norm: 0.9999997036835628, iteration: 119212
loss: 2.310274362564087,grad_norm: 0.9999999803241213, iteration: 119213
loss: 1.9651284217834473,grad_norm: 0.9999999645820464, iteration: 119214
loss: 1.5733777284622192,grad_norm: 0.9999999141295768, iteration: 119215
loss: 1.9853161573410034,grad_norm: 0.9999998980257536, iteration: 119216
loss: 1.7266095876693726,grad_norm: 1.0000000221794147, iteration: 119217
loss: 2.141378879547119,grad_norm: 1.0000000116226044, iteration: 119218
loss: 2.176931619644165,grad_norm: 0.9999999759526039, iteration: 119219
loss: 1.4411848783493042,grad_norm: 0.9999998178886075, iteration: 119220
loss: 1.8664342164993286,grad_norm: 0.9999998807239572, iteration: 119221
loss: 1.6197471618652344,grad_norm: 0.999999886430032, iteration: 119222
loss: 1.8535019159317017,grad_norm: 0.9999999663657846, iteration: 119223
loss: 1.8373937606811523,grad_norm: 0.9999999764821781, iteration: 119224
loss: 1.7843717336654663,grad_norm: 0.9999999809586155, iteration: 119225
loss: 1.9855854511260986,grad_norm: 1.0000000187765605, iteration: 119226
loss: 1.934228777885437,grad_norm: 0.9999998686246638, iteration: 119227
loss: 1.8256845474243164,grad_norm: 0.9999998859231984, iteration: 119228
loss: 1.5190775394439697,grad_norm: 1.0000000561854063, iteration: 119229
loss: 1.8835220336914062,grad_norm: 0.9999999604433719, iteration: 119230
loss: 1.7277406454086304,grad_norm: 1.0000000027885583, iteration: 119231
loss: 2.2918946743011475,grad_norm: 0.9999998647967206, iteration: 119232
loss: 1.657759428024292,grad_norm: 0.9999999776339128, iteration: 119233
loss: 1.744613766670227,grad_norm: 0.9999999267183471, iteration: 119234
loss: 1.2851850986480713,grad_norm: 0.9999999002236253, iteration: 119235
loss: 1.4593809843063354,grad_norm: 0.9999999334904034, iteration: 119236
loss: 1.7626800537109375,grad_norm: 1.000000030183341, iteration: 119237
loss: 1.7115272283554077,grad_norm: 0.9999999399227778, iteration: 119238
loss: 1.64887535572052,grad_norm: 0.9999999731105611, iteration: 119239
loss: 1.6024632453918457,grad_norm: 0.9999999123645688, iteration: 119240
loss: 1.4926623106002808,grad_norm: 0.9999999047659065, iteration: 119241
loss: 1.6852946281433105,grad_norm: 1.0000000358382288, iteration: 119242
loss: 1.7123216390609741,grad_norm: 0.9999999993458458, iteration: 119243
loss: 1.944611668586731,grad_norm: 1.0000000334978874, iteration: 119244
loss: 1.9516627788543701,grad_norm: 0.9999998986203168, iteration: 119245
loss: 1.9290064573287964,grad_norm: 1.0000000038750998, iteration: 119246
loss: 1.696002721786499,grad_norm: 0.9999998730017984, iteration: 119247
loss: 1.8479973077774048,grad_norm: 0.9999999821290603, iteration: 119248
loss: 1.5232895612716675,grad_norm: 0.9999999521625869, iteration: 119249
loss: 1.6630005836486816,grad_norm: 0.999999884486154, iteration: 119250
loss: 2.2202179431915283,grad_norm: 1.000000058204265, iteration: 119251
loss: 1.8087940216064453,grad_norm: 0.9999999720395779, iteration: 119252
loss: 2.429400682449341,grad_norm: 1.0000000391736297, iteration: 119253
loss: 1.813069462776184,grad_norm: 1.0000000632757016, iteration: 119254
loss: 1.8995674848556519,grad_norm: 0.999999985604118, iteration: 119255
loss: 1.927014708518982,grad_norm: 0.9999999791282466, iteration: 119256
loss: 1.9437624216079712,grad_norm: 0.9999999499492667, iteration: 119257
loss: 2.0837037563323975,grad_norm: 1.0000000588221019, iteration: 119258
loss: 1.7154788970947266,grad_norm: 0.9999998661796141, iteration: 119259
loss: 1.7033493518829346,grad_norm: 0.9999999754408677, iteration: 119260
loss: 1.9445757865905762,grad_norm: 0.9999999946577892, iteration: 119261
loss: 1.8022291660308838,grad_norm: 0.9999998651143588, iteration: 119262
loss: 2.0382845401763916,grad_norm: 0.999999901256946, iteration: 119263
loss: 1.7534185647964478,grad_norm: 0.9999999148076261, iteration: 119264
loss: 1.8031129837036133,grad_norm: 0.9999998594796836, iteration: 119265
loss: 2.0286290645599365,grad_norm: 0.9999998788889741, iteration: 119266
loss: 1.85220205783844,grad_norm: 0.9999999683214732, iteration: 119267
loss: 1.43303644657135,grad_norm: 0.9999998760480059, iteration: 119268
loss: 2.064418315887451,grad_norm: 0.9999998333081883, iteration: 119269
loss: 1.9927140474319458,grad_norm: 0.9999998520980494, iteration: 119270
loss: 2.040286064147949,grad_norm: 0.9999999655613443, iteration: 119271
loss: 2.2436294555664062,grad_norm: 0.9999999274947271, iteration: 119272
loss: 2.2591822147369385,grad_norm: 1.0000000379995693, iteration: 119273
loss: 1.882102131843567,grad_norm: 0.9999999612564394, iteration: 119274
loss: 2.15773606300354,grad_norm: 0.9999998709968643, iteration: 119275
loss: 2.4429848194122314,grad_norm: 1.0000000222827707, iteration: 119276
loss: 2.0144710540771484,grad_norm: 0.9999998522903409, iteration: 119277
loss: 1.4350661039352417,grad_norm: 0.9999997504721403, iteration: 119278
loss: 1.5659654140472412,grad_norm: 0.9999999493580382, iteration: 119279
loss: 1.9368118047714233,grad_norm: 0.9999999417925601, iteration: 119280
loss: 2.5599892139434814,grad_norm: 0.9999999053800215, iteration: 119281
loss: 2.0907139778137207,grad_norm: 1.0000000194356364, iteration: 119282
loss: 1.4327398538589478,grad_norm: 0.9999999587845744, iteration: 119283
loss: 2.4914348125457764,grad_norm: 1.000000009145669, iteration: 119284
loss: 1.5621048212051392,grad_norm: 0.9999998638352499, iteration: 119285
loss: 1.7352551221847534,grad_norm: 0.9999999792112731, iteration: 119286
loss: 1.5408562421798706,grad_norm: 0.9999998731237575, iteration: 119287
loss: 1.599498987197876,grad_norm: 0.9999998158805966, iteration: 119288
loss: 2.070706844329834,grad_norm: 0.9999999421105621, iteration: 119289
loss: 2.5199062824249268,grad_norm: 1.0000000642554985, iteration: 119290
loss: 1.848330020904541,grad_norm: 0.9999999212474121, iteration: 119291
loss: 1.9716293811798096,grad_norm: 1.0000000109698863, iteration: 119292
loss: 1.9826748371124268,grad_norm: 0.999999904156428, iteration: 119293
loss: 1.8022812604904175,grad_norm: 1.0000000634359532, iteration: 119294
loss: 1.7493369579315186,grad_norm: 1.0000000135910851, iteration: 119295
loss: 1.9073866605758667,grad_norm: 0.9999998295907023, iteration: 119296
loss: 1.6824772357940674,grad_norm: 0.9999998741837379, iteration: 119297
loss: 2.3815133571624756,grad_norm: 1.0000000168689003, iteration: 119298
loss: 1.8264485597610474,grad_norm: 1.00000006158699, iteration: 119299
loss: 2.094618558883667,grad_norm: 0.9999999714373781, iteration: 119300
loss: 1.7904034852981567,grad_norm: 0.9999999337169394, iteration: 119301
loss: 1.904465913772583,grad_norm: 0.9999999396944007, iteration: 119302
loss: 1.6421438455581665,grad_norm: 0.999999893322819, iteration: 119303
loss: 1.6485190391540527,grad_norm: 0.9999999745406911, iteration: 119304
loss: 1.7532597780227661,grad_norm: 1.0000000066399122, iteration: 119305
loss: 2.2563817501068115,grad_norm: 0.9999999154282709, iteration: 119306
loss: 2.002180814743042,grad_norm: 0.9999999737070007, iteration: 119307
loss: 1.478615164756775,grad_norm: 1.000000012915953, iteration: 119308
loss: 2.4851903915405273,grad_norm: 0.9999999240137247, iteration: 119309
loss: 2.0718815326690674,grad_norm: 0.999999980197201, iteration: 119310
loss: 1.8981318473815918,grad_norm: 0.9999999433817535, iteration: 119311
loss: 1.7949867248535156,grad_norm: 1.0000000785199312, iteration: 119312
loss: 2.3130242824554443,grad_norm: 0.9999999393623092, iteration: 119313
loss: 1.7400163412094116,grad_norm: 0.9999999517478634, iteration: 119314
loss: 1.5197001695632935,grad_norm: 0.9999998646332701, iteration: 119315
loss: 1.406041145324707,grad_norm: 0.9999999467116266, iteration: 119316
loss: 1.612260103225708,grad_norm: 0.9999998719757858, iteration: 119317
loss: 2.023866653442383,grad_norm: 0.9999999929797264, iteration: 119318
loss: 1.6334519386291504,grad_norm: 0.9999999502509487, iteration: 119319
loss: 1.741745114326477,grad_norm: 0.9999999142081822, iteration: 119320
loss: 1.767075538635254,grad_norm: 0.9999998014017273, iteration: 119321
loss: 2.4062771797180176,grad_norm: 0.9999999484764639, iteration: 119322
loss: 1.4552475214004517,grad_norm: 0.999999872548603, iteration: 119323
loss: 1.930033802986145,grad_norm: 1.0000000152422093, iteration: 119324
loss: 1.675001859664917,grad_norm: 0.9999999208825687, iteration: 119325
loss: 1.3402047157287598,grad_norm: 0.9999996401651944, iteration: 119326
loss: 1.654105544090271,grad_norm: 0.9999999211803574, iteration: 119327
loss: 1.890581488609314,grad_norm: 0.9999999135801193, iteration: 119328
loss: 2.59955096244812,grad_norm: 1.0000000383207475, iteration: 119329
loss: 1.559309959411621,grad_norm: 0.9999998440609604, iteration: 119330
loss: 1.764778971672058,grad_norm: 0.9999998520778139, iteration: 119331
loss: 1.6835740804672241,grad_norm: 0.999999896086453, iteration: 119332
loss: 1.6206220388412476,grad_norm: 1.0000000075551347, iteration: 119333
loss: 1.595386266708374,grad_norm: 0.9999999497922253, iteration: 119334
loss: 1.3645521402359009,grad_norm: 0.9999999588625118, iteration: 119335
loss: 1.9520516395568848,grad_norm: 0.9999999069410795, iteration: 119336
loss: 1.9591808319091797,grad_norm: 0.9999999326073332, iteration: 119337
loss: 1.6574506759643555,grad_norm: 0.999999812760675, iteration: 119338
loss: 1.6737921237945557,grad_norm: 1.0000000527437671, iteration: 119339
loss: 1.4059131145477295,grad_norm: 0.9999998765179102, iteration: 119340
loss: 1.383470058441162,grad_norm: 1.0000000006079761, iteration: 119341
loss: 1.5310760736465454,grad_norm: 0.9999999536867611, iteration: 119342
loss: 1.6786261796951294,grad_norm: 0.9999999580183686, iteration: 119343
loss: 2.0753188133239746,grad_norm: 1.0000000549296844, iteration: 119344
loss: 2.0170493125915527,grad_norm: 0.9999999997053515, iteration: 119345
loss: 1.4932861328125,grad_norm: 0.9999999092926602, iteration: 119346
loss: 2.463966131210327,grad_norm: 0.9999999765938258, iteration: 119347
loss: 1.4793034791946411,grad_norm: 1.0000000324816296, iteration: 119348
loss: 1.844652771949768,grad_norm: 0.9999999712923047, iteration: 119349
loss: 1.550904393196106,grad_norm: 0.9999999013868832, iteration: 119350
loss: 1.428941249847412,grad_norm: 1.0000000012703478, iteration: 119351
loss: 1.2529022693634033,grad_norm: 0.9999998086921574, iteration: 119352
loss: 1.6202442646026611,grad_norm: 1.0000000356266285, iteration: 119353
loss: 1.3763928413391113,grad_norm: 0.9999999927211445, iteration: 119354
loss: 1.7089723348617554,grad_norm: 0.9999999728381256, iteration: 119355
loss: 1.3788210153579712,grad_norm: 0.9999998211169715, iteration: 119356
loss: 1.801662564277649,grad_norm: 1.0000000123130903, iteration: 119357
loss: 1.462784767150879,grad_norm: 0.9999997751273133, iteration: 119358
loss: 2.58369517326355,grad_norm: 1.0000000203357347, iteration: 119359
loss: 2.2526285648345947,grad_norm: 0.9999999476251954, iteration: 119360
loss: 1.2602379322052002,grad_norm: 1.000000014932874, iteration: 119361
loss: 1.558817744255066,grad_norm: 0.9999999056291189, iteration: 119362
loss: 1.420685887336731,grad_norm: 0.9999999555543345, iteration: 119363
loss: 1.5452330112457275,grad_norm: 1.0000000200045465, iteration: 119364
loss: 2.2473316192626953,grad_norm: 1.000000034405749, iteration: 119365
loss: 1.8246393203735352,grad_norm: 0.9999999741078828, iteration: 119366
loss: 1.4491493701934814,grad_norm: 0.999999919122413, iteration: 119367
loss: 1.5482205152511597,grad_norm: 0.9999999710816951, iteration: 119368
loss: 1.3853479623794556,grad_norm: 0.9999999542678192, iteration: 119369
loss: 1.5993571281433105,grad_norm: 0.9999999926907778, iteration: 119370
loss: 1.3925765752792358,grad_norm: 0.9999999466539563, iteration: 119371
loss: 1.8008745908737183,grad_norm: 1.0000000217683185, iteration: 119372
loss: 1.4523403644561768,grad_norm: 0.9999998280649588, iteration: 119373
loss: 1.3828181028366089,grad_norm: 0.9999999604482674, iteration: 119374
loss: 1.7747467756271362,grad_norm: 1.000000023275294, iteration: 119375
loss: 2.430068254470825,grad_norm: 1.000000008074266, iteration: 119376
loss: 1.5641218423843384,grad_norm: 1.0000000464877012, iteration: 119377
loss: 1.3706706762313843,grad_norm: 0.9999998967799838, iteration: 119378
loss: 1.444498062133789,grad_norm: 0.9999999333136524, iteration: 119379
loss: 1.9803557395935059,grad_norm: 1.0000000878478417, iteration: 119380
loss: 1.4478425979614258,grad_norm: 1.0000000364746615, iteration: 119381
loss: 1.7451914548873901,grad_norm: 0.9999999667454254, iteration: 119382
loss: 1.998369574546814,grad_norm: 0.9999999621381768, iteration: 119383
loss: 1.8188962936401367,grad_norm: 1.0000000371590216, iteration: 119384
loss: 1.6136672496795654,grad_norm: 0.9999999262424609, iteration: 119385
loss: 1.6184240579605103,grad_norm: 1.0000000116496193, iteration: 119386
loss: 1.8453618288040161,grad_norm: 0.9999999580813698, iteration: 119387
loss: 1.4746817350387573,grad_norm: 0.999999969777869, iteration: 119388
loss: 1.6350793838500977,grad_norm: 0.9999998958919974, iteration: 119389
loss: 1.5402642488479614,grad_norm: 0.999999884170464, iteration: 119390
loss: 1.3363772630691528,grad_norm: 0.9999999958585508, iteration: 119391
loss: 1.4959253072738647,grad_norm: 0.9999999471136727, iteration: 119392
loss: 1.5796586275100708,grad_norm: 0.9999998996079069, iteration: 119393
loss: 1.639968752861023,grad_norm: 1.0000000345676046, iteration: 119394
loss: 1.4907679557800293,grad_norm: 0.9999998656821464, iteration: 119395
loss: 1.5711820125579834,grad_norm: 1.0000000249195822, iteration: 119396
loss: 1.7357081174850464,grad_norm: 1.0000000342332778, iteration: 119397
loss: 1.5916904211044312,grad_norm: 0.9999999379182554, iteration: 119398
loss: 1.5475566387176514,grad_norm: 0.9999999547313518, iteration: 119399
loss: 1.709710955619812,grad_norm: 0.999999991517874, iteration: 119400
loss: 1.7498221397399902,grad_norm: 0.9999999229933515, iteration: 119401
loss: 1.3192825317382812,grad_norm: 0.9999999220363173, iteration: 119402
loss: 1.5680278539657593,grad_norm: 0.9999999718043602, iteration: 119403
loss: 1.6846774816513062,grad_norm: 1.0000000588458144, iteration: 119404
loss: 1.6445332765579224,grad_norm: 1.0000000179121498, iteration: 119405
loss: 1.55278480052948,grad_norm: 1.0000000021254036, iteration: 119406
loss: 1.4749737977981567,grad_norm: 0.9999999294745764, iteration: 119407
loss: 1.6981925964355469,grad_norm: 1.0000000447633526, iteration: 119408
loss: 1.283037781715393,grad_norm: 0.99999995593517, iteration: 119409
loss: 1.5121984481811523,grad_norm: 1.0000000414495733, iteration: 119410
loss: 1.6267006397247314,grad_norm: 1.0000000154116617, iteration: 119411
loss: 1.2736502885818481,grad_norm: 0.9999997926022245, iteration: 119412
loss: 1.4707928895950317,grad_norm: 0.9999999129111052, iteration: 119413
loss: 1.7916207313537598,grad_norm: 0.9999999749172812, iteration: 119414
loss: 1.290855884552002,grad_norm: 0.9999997333303979, iteration: 119415
loss: 2.025819778442383,grad_norm: 0.9999999526318694, iteration: 119416
loss: 1.271605134010315,grad_norm: 0.9999996999247179, iteration: 119417
loss: 1.2954115867614746,grad_norm: 0.9999998696722363, iteration: 119418
loss: 1.4714909791946411,grad_norm: 0.9999999779179591, iteration: 119419
loss: 1.72646963596344,grad_norm: 0.9999999648701969, iteration: 119420
loss: 1.463013768196106,grad_norm: 0.999999918630022, iteration: 119421
loss: 1.4852256774902344,grad_norm: 0.999999974123943, iteration: 119422
loss: 1.638838291168213,grad_norm: 0.9999999497626147, iteration: 119423
loss: 1.541216254234314,grad_norm: 0.9999998730603997, iteration: 119424
loss: 1.3669078350067139,grad_norm: 1.0000000905552462, iteration: 119425
loss: 1.3724311590194702,grad_norm: 0.9999999482590731, iteration: 119426
loss: 1.9779741764068604,grad_norm: 0.9999998354360625, iteration: 119427
loss: 1.4625710248947144,grad_norm: 0.9999999799601763, iteration: 119428
loss: 1.678123950958252,grad_norm: 0.999999999531954, iteration: 119429
loss: 1.7727183103561401,grad_norm: 0.9999997958374756, iteration: 119430
loss: 1.7436846494674683,grad_norm: 0.9999998934065231, iteration: 119431
loss: 1.8595727682113647,grad_norm: 0.9999999933790856, iteration: 119432
loss: 1.4039254188537598,grad_norm: 0.9999999635282179, iteration: 119433
loss: 1.8085260391235352,grad_norm: 0.9999999902663074, iteration: 119434
loss: 1.801031231880188,grad_norm: 0.999999819636027, iteration: 119435
loss: 1.5373129844665527,grad_norm: 0.9999998804001939, iteration: 119436
loss: 1.418643832206726,grad_norm: 0.999999941926031, iteration: 119437
loss: 1.3416818380355835,grad_norm: 0.99999994430288, iteration: 119438
loss: 1.640587568283081,grad_norm: 0.9999998399636373, iteration: 119439
loss: 1.5882539749145508,grad_norm: 0.9999998401292322, iteration: 119440
loss: 1.5282670259475708,grad_norm: 0.9999998740596466, iteration: 119441
loss: 1.8819432258605957,grad_norm: 0.9999999067938402, iteration: 119442
loss: 1.7014919519424438,grad_norm: 0.9999999481676413, iteration: 119443
loss: 1.3469972610473633,grad_norm: 0.9999999607891353, iteration: 119444
loss: 1.3062307834625244,grad_norm: 0.9999999849981684, iteration: 119445
loss: 1.927385687828064,grad_norm: 0.9999999748050586, iteration: 119446
loss: 1.5124824047088623,grad_norm: 0.9999999045884159, iteration: 119447
loss: 1.2431951761245728,grad_norm: 0.9999998187741577, iteration: 119448
loss: 1.4004836082458496,grad_norm: 1.000000104279851, iteration: 119449
loss: 1.337544322013855,grad_norm: 0.9999999529914964, iteration: 119450
loss: 1.5885447263717651,grad_norm: 1.0000000248749852, iteration: 119451
loss: 1.3537814617156982,grad_norm: 0.9999998234770909, iteration: 119452
loss: 1.4234533309936523,grad_norm: 0.9999999877033735, iteration: 119453
loss: 1.6161733865737915,grad_norm: 0.9999999349590148, iteration: 119454
loss: 1.3456437587738037,grad_norm: 0.9999999327578845, iteration: 119455
loss: 1.2916494607925415,grad_norm: 0.9999999106033781, iteration: 119456
loss: 1.5841116905212402,grad_norm: 0.9999999911531922, iteration: 119457
loss: 1.5371763706207275,grad_norm: 0.9999999233598534, iteration: 119458
loss: 1.865505576133728,grad_norm: 0.9999998183872173, iteration: 119459
loss: 1.242618203163147,grad_norm: 1.000000016749855, iteration: 119460
loss: 1.6684107780456543,grad_norm: 0.9999998816096021, iteration: 119461
loss: 1.2305564880371094,grad_norm: 0.9999999085273853, iteration: 119462
loss: 1.441961407661438,grad_norm: 0.9999999099367888, iteration: 119463
loss: 1.633774757385254,grad_norm: 0.9999999230793972, iteration: 119464
loss: 1.3465335369110107,grad_norm: 1.0000000101290254, iteration: 119465
loss: 1.6472172737121582,grad_norm: 0.9999999637047109, iteration: 119466
loss: 1.505383849143982,grad_norm: 0.9999997979690228, iteration: 119467
loss: 1.975959300994873,grad_norm: 1.0000000160818525, iteration: 119468
loss: 1.6586788892745972,grad_norm: 0.9999996928438305, iteration: 119469
loss: 2.1671271324157715,grad_norm: 0.9999999783576168, iteration: 119470
loss: 1.8696340322494507,grad_norm: 0.9999999079292663, iteration: 119471
loss: 1.41168212890625,grad_norm: 0.9999999719657264, iteration: 119472
loss: 1.2920719385147095,grad_norm: 0.999999894776412, iteration: 119473
loss: 1.9441989660263062,grad_norm: 0.9999999765920781, iteration: 119474
loss: 1.554669976234436,grad_norm: 0.9999998853173204, iteration: 119475
loss: 1.7464714050292969,grad_norm: 0.9999999179387605, iteration: 119476
loss: 1.6422498226165771,grad_norm: 1.0000000832022762, iteration: 119477
loss: 1.5789079666137695,grad_norm: 0.9999999063556103, iteration: 119478
loss: 1.572055459022522,grad_norm: 0.9999999781206059, iteration: 119479
loss: 1.709648609161377,grad_norm: 0.999999987770822, iteration: 119480
loss: 1.5077505111694336,grad_norm: 0.9999998803111168, iteration: 119481
loss: 1.2956883907318115,grad_norm: 0.9999999996562204, iteration: 119482
loss: 1.3940850496292114,grad_norm: 0.9999999676347338, iteration: 119483
loss: 1.7131130695343018,grad_norm: 0.9999998952620657, iteration: 119484
loss: 1.6710659265518188,grad_norm: 1.0000000313898256, iteration: 119485
loss: 1.340126633644104,grad_norm: 0.9999998096889244, iteration: 119486
loss: 1.3804676532745361,grad_norm: 0.9999998779411281, iteration: 119487
loss: 1.5305521488189697,grad_norm: 0.9999996342988535, iteration: 119488
loss: 1.170442819595337,grad_norm: 0.9999998960427553, iteration: 119489
loss: 1.4316116571426392,grad_norm: 0.9999999875733647, iteration: 119490
loss: 1.4649407863616943,grad_norm: 0.999999881092525, iteration: 119491
loss: 1.6449929475784302,grad_norm: 0.9999997645981256, iteration: 119492
loss: 1.5470693111419678,grad_norm: 0.9999999063943966, iteration: 119493
loss: 1.4281883239746094,grad_norm: 0.9999998652922202, iteration: 119494
loss: 1.5024964809417725,grad_norm: 0.9999999358171165, iteration: 119495
loss: 1.8811143636703491,grad_norm: 0.9999998980157353, iteration: 119496
loss: 1.3650925159454346,grad_norm: 0.9999998492582393, iteration: 119497
loss: 1.4011420011520386,grad_norm: 0.9999999565931286, iteration: 119498
loss: 1.5317453145980835,grad_norm: 0.9999998451884482, iteration: 119499
loss: 1.2569769620895386,grad_norm: 0.9999998865252141, iteration: 119500
loss: 1.432021141052246,grad_norm: 0.9999998802996534, iteration: 119501
loss: 1.3235437870025635,grad_norm: 0.9999998076488471, iteration: 119502
loss: 1.2534592151641846,grad_norm: 0.9999999117587817, iteration: 119503
loss: 1.473862886428833,grad_norm: 0.9999998253691127, iteration: 119504
loss: 1.5441739559173584,grad_norm: 1.0000000356616685, iteration: 119505
loss: 1.4505488872528076,grad_norm: 0.9999999757599439, iteration: 119506
loss: 1.3569306135177612,grad_norm: 0.9999999642347349, iteration: 119507
loss: 1.2158931493759155,grad_norm: 0.9999998076860629, iteration: 119508
loss: 1.5635331869125366,grad_norm: 0.9999998211497796, iteration: 119509
loss: 1.1244014501571655,grad_norm: 0.9999997469204311, iteration: 119510
loss: 1.258907437324524,grad_norm: 0.9999999762729268, iteration: 119511
loss: 1.5701701641082764,grad_norm: 0.9999999491066888, iteration: 119512
loss: 1.465551495552063,grad_norm: 0.9999998611604148, iteration: 119513
loss: 1.5173932313919067,grad_norm: 1.0000000000012474, iteration: 119514
loss: 1.272739052772522,grad_norm: 0.9999999634307939, iteration: 119515
loss: 1.779943823814392,grad_norm: 1.000000022701845, iteration: 119516
loss: 1.5711172819137573,grad_norm: 0.9999998749475039, iteration: 119517
loss: 1.200319528579712,grad_norm: 0.9999998936059358, iteration: 119518
loss: 1.3692994117736816,grad_norm: 0.999999990297665, iteration: 119519
loss: 1.1974592208862305,grad_norm: 0.9999998003550289, iteration: 119520
loss: 1.0709890127182007,grad_norm: 0.9999999016720824, iteration: 119521
loss: 1.2068240642547607,grad_norm: 0.999999972728313, iteration: 119522
loss: 1.3370589017868042,grad_norm: 0.9999998723134241, iteration: 119523
loss: 1.30416738986969,grad_norm: 0.9999999055583886, iteration: 119524
loss: 1.064461588859558,grad_norm: 0.9999996932367977, iteration: 119525
loss: 1.556926965713501,grad_norm: 0.9999999754195866, iteration: 119526
loss: 1.4435352087020874,grad_norm: 0.9999999362694779, iteration: 119527
loss: 1.0793852806091309,grad_norm: 0.9999996476244848, iteration: 119528
loss: 1.5921106338500977,grad_norm: 1.0000000213862286, iteration: 119529
loss: 1.4122374057769775,grad_norm: 0.9999999485824148, iteration: 119530
loss: 1.5596002340316772,grad_norm: 0.999999952912825, iteration: 119531
loss: 1.6048988103866577,grad_norm: 1.0000000201762427, iteration: 119532
loss: 1.5353450775146484,grad_norm: 0.9999998141704917, iteration: 119533
loss: 1.1568585634231567,grad_norm: 0.9999999445829767, iteration: 119534
loss: 1.5797111988067627,grad_norm: 0.9999999127731741, iteration: 119535
loss: 1.9690436124801636,grad_norm: 0.9999998706120772, iteration: 119536
loss: 1.177276849746704,grad_norm: 0.9999996862402415, iteration: 119537
loss: 1.4637658596038818,grad_norm: 0.9999999762042752, iteration: 119538
loss: 1.1958192586898804,grad_norm: 0.9999998144303982, iteration: 119539
loss: 1.2470219135284424,grad_norm: 0.9999995913478414, iteration: 119540
loss: 1.3473083972930908,grad_norm: 0.9999998824360071, iteration: 119541
loss: 1.256213665008545,grad_norm: 0.9999998654389656, iteration: 119542
loss: 1.7179207801818848,grad_norm: 1.0000000430821132, iteration: 119543
loss: 1.3150204420089722,grad_norm: 0.9999996941553101, iteration: 119544
loss: 1.3105747699737549,grad_norm: 0.9999998684898628, iteration: 119545
loss: 1.2093106508255005,grad_norm: 0.9999995646423613, iteration: 119546
loss: 1.2319267988204956,grad_norm: 0.9999998455880866, iteration: 119547
loss: 1.562095046043396,grad_norm: 0.9999997849523133, iteration: 119548
loss: 1.5092849731445312,grad_norm: 0.9999998187612736, iteration: 119549
loss: 1.3534581661224365,grad_norm: 0.9999999025626469, iteration: 119550
loss: 1.3360613584518433,grad_norm: 0.9999998766549207, iteration: 119551
loss: 1.1344693899154663,grad_norm: 0.9999999124379438, iteration: 119552
loss: 1.7257602214813232,grad_norm: 0.9999998731136727, iteration: 119553
loss: 1.353920340538025,grad_norm: 0.999999953246461, iteration: 119554
loss: 1.3356163501739502,grad_norm: 0.9999997134519025, iteration: 119555
loss: 1.4473912715911865,grad_norm: 0.9999997069012521, iteration: 119556
loss: 1.362326741218567,grad_norm: 0.9999999176906104, iteration: 119557
loss: 1.2865583896636963,grad_norm: 0.9999997086152963, iteration: 119558
loss: 1.2554590702056885,grad_norm: 0.9999999073888359, iteration: 119559
loss: 1.2363123893737793,grad_norm: 0.999999901560039, iteration: 119560
loss: 1.5861543416976929,grad_norm: 0.9999999071804072, iteration: 119561
loss: 1.2305907011032104,grad_norm: 0.9999998294045755, iteration: 119562
loss: 1.5206183195114136,grad_norm: 0.9999999571791186, iteration: 119563
loss: 1.3291711807250977,grad_norm: 0.9999999806547979, iteration: 119564
loss: 1.4633182287216187,grad_norm: 0.9999998606799605, iteration: 119565
loss: 1.491206407546997,grad_norm: 0.9999996574803759, iteration: 119566
loss: 1.5232383012771606,grad_norm: 1.000000020003825, iteration: 119567
loss: 1.3226324319839478,grad_norm: 0.9999997495403187, iteration: 119568
loss: 1.328784465789795,grad_norm: 0.9999994697693682, iteration: 119569
loss: 1.424591064453125,grad_norm: 0.9999999704040469, iteration: 119570
loss: 1.8335784673690796,grad_norm: 0.999999900334763, iteration: 119571
loss: 1.4113870859146118,grad_norm: 0.9999998679905664, iteration: 119572
loss: 1.446685552597046,grad_norm: 0.9999999124932406, iteration: 119573
loss: 1.5434017181396484,grad_norm: 0.9999999014229483, iteration: 119574
loss: 1.1061078310012817,grad_norm: 0.9999999136592157, iteration: 119575
loss: 1.48642897605896,grad_norm: 0.9999998779102756, iteration: 119576
loss: 1.262953281402588,grad_norm: 0.9999997648018826, iteration: 119577
loss: 1.2154864072799683,grad_norm: 0.9999999455577226, iteration: 119578
loss: 1.29350745677948,grad_norm: 0.9999998938678807, iteration: 119579
loss: 1.337788462638855,grad_norm: 0.9999997647959505, iteration: 119580
loss: 1.326781153678894,grad_norm: 0.9999995908607564, iteration: 119581
loss: 1.4296033382415771,grad_norm: 0.9999998565422391, iteration: 119582
loss: 1.7586374282836914,grad_norm: 0.9999998782668466, iteration: 119583
loss: 1.1944111585617065,grad_norm: 0.9999999737063229, iteration: 119584
loss: 1.6244055032730103,grad_norm: 0.9999999970800895, iteration: 119585
loss: 1.5057260990142822,grad_norm: 0.9999999618066799, iteration: 119586
loss: 1.4828869104385376,grad_norm: 0.9999999615016395, iteration: 119587
loss: 1.3091497421264648,grad_norm: 0.9999999226520874, iteration: 119588
loss: 1.2238166332244873,grad_norm: 0.9999998795672703, iteration: 119589
loss: 1.3458013534545898,grad_norm: 1.000000035924834, iteration: 119590
loss: 1.3278803825378418,grad_norm: 1.0000000139894794, iteration: 119591
loss: 1.2772481441497803,grad_norm: 0.9999999620721844, iteration: 119592
loss: 1.3841824531555176,grad_norm: 0.9999999518445548, iteration: 119593
loss: 1.4634813070297241,grad_norm: 0.9999998296673278, iteration: 119594
loss: 1.2505842447280884,grad_norm: 0.9999999936406854, iteration: 119595
loss: 1.2481372356414795,grad_norm: 0.9999996450223768, iteration: 119596
loss: 1.4169565439224243,grad_norm: 0.9999999132856524, iteration: 119597
loss: 1.3801664113998413,grad_norm: 0.9999999423606614, iteration: 119598
loss: 1.6612696647644043,grad_norm: 0.9999999701425764, iteration: 119599
loss: 1.3371188640594482,grad_norm: 0.9999997330501486, iteration: 119600
loss: 1.511687159538269,grad_norm: 0.9999999259715199, iteration: 119601
loss: 1.5232024192810059,grad_norm: 1.0000000214125113, iteration: 119602
loss: 1.294784665107727,grad_norm: 0.9999998054453133, iteration: 119603
loss: 1.5104461908340454,grad_norm: 0.9999999361792998, iteration: 119604
loss: 1.509960412979126,grad_norm: 1.0000000209828315, iteration: 119605
loss: 1.4750651121139526,grad_norm: 0.9999998062620684, iteration: 119606
loss: 1.351719617843628,grad_norm: 0.9999998261390595, iteration: 119607
loss: 1.4625219106674194,grad_norm: 0.9999999123714964, iteration: 119608
loss: 1.1274261474609375,grad_norm: 0.9999990923346359, iteration: 119609
loss: 1.3281916379928589,grad_norm: 1.0000000637427109, iteration: 119610
loss: 1.300498366355896,grad_norm: 0.9999999241030075, iteration: 119611
loss: 1.2326682806015015,grad_norm: 0.9999998437922745, iteration: 119612
loss: 1.3078557252883911,grad_norm: 0.9999999314624731, iteration: 119613
loss: 1.5318822860717773,grad_norm: 0.9999996510249464, iteration: 119614
loss: 1.3297662734985352,grad_norm: 0.9999998505465855, iteration: 119615
loss: 1.2181541919708252,grad_norm: 0.9999996582062017, iteration: 119616
loss: 1.2216532230377197,grad_norm: 0.9999999467806823, iteration: 119617
loss: 1.2266297340393066,grad_norm: 0.99999995552534, iteration: 119618
loss: 1.6075775623321533,grad_norm: 0.9999999080998516, iteration: 119619
loss: 1.3648911714553833,grad_norm: 0.9999999887069868, iteration: 119620
loss: 1.3138302564620972,grad_norm: 0.99999999223596, iteration: 119621
loss: 1.275694727897644,grad_norm: 0.9999998194655566, iteration: 119622
loss: 1.231543779373169,grad_norm: 0.9999995733132498, iteration: 119623
loss: 1.1213325262069702,grad_norm: 0.9999997406379614, iteration: 119624
loss: 1.4011507034301758,grad_norm: 0.9999998913425195, iteration: 119625
loss: 1.5515497922897339,grad_norm: 0.9999998919982823, iteration: 119626
loss: 1.3101669549942017,grad_norm: 0.999999929091622, iteration: 119627
loss: 1.2664896249771118,grad_norm: 0.9999998645869399, iteration: 119628
loss: 1.2702878713607788,grad_norm: 0.9999995868140069, iteration: 119629
loss: 1.220765233039856,grad_norm: 0.9999999454682305, iteration: 119630
loss: 1.2992238998413086,grad_norm: 0.9999998810263383, iteration: 119631
loss: 1.2270694971084595,grad_norm: 0.9999998927200691, iteration: 119632
loss: 1.4255048036575317,grad_norm: 0.9999998808885314, iteration: 119633
loss: 1.3221497535705566,grad_norm: 0.9999998052063769, iteration: 119634
loss: 1.3594449758529663,grad_norm: 0.9999998784049929, iteration: 119635
loss: 1.2432239055633545,grad_norm: 0.9999998561233382, iteration: 119636
loss: 1.4630709886550903,grad_norm: 1.0000000202686226, iteration: 119637
loss: 1.3364276885986328,grad_norm: 0.9999998891545822, iteration: 119638
loss: 1.219576358795166,grad_norm: 1.0000000709131511, iteration: 119639
loss: 1.2736842632293701,grad_norm: 0.9999997027075727, iteration: 119640
loss: 1.2530386447906494,grad_norm: 0.9999996245641063, iteration: 119641
loss: 1.2081599235534668,grad_norm: 0.9999997765603144, iteration: 119642
loss: 1.1731501817703247,grad_norm: 0.9999999591363308, iteration: 119643
loss: 1.418330430984497,grad_norm: 0.9999998649245514, iteration: 119644
loss: 1.3430843353271484,grad_norm: 0.9999998181861497, iteration: 119645
loss: 1.3695048093795776,grad_norm: 0.9999999443620208, iteration: 119646
loss: 1.2708549499511719,grad_norm: 0.9999999737304105, iteration: 119647
loss: 1.1076020002365112,grad_norm: 0.9999999726177666, iteration: 119648
loss: 1.2277878522872925,grad_norm: 0.9999997636646832, iteration: 119649
loss: 1.1397180557250977,grad_norm: 0.9999999571879586, iteration: 119650
loss: 1.245409369468689,grad_norm: 0.9999998781675652, iteration: 119651
loss: 1.212367296218872,grad_norm: 0.9999995166165373, iteration: 119652
loss: 1.1994731426239014,grad_norm: 0.9999998818541089, iteration: 119653
loss: 1.2762125730514526,grad_norm: 0.9999996838958306, iteration: 119654
loss: 1.3156408071517944,grad_norm: 0.9999999098525375, iteration: 119655
loss: 1.7177644968032837,grad_norm: 0.9999999473792754, iteration: 119656
loss: 1.1221227645874023,grad_norm: 0.999999704406089, iteration: 119657
loss: 1.426659345626831,grad_norm: 0.9999998385386094, iteration: 119658
loss: 1.2957029342651367,grad_norm: 0.9999999306602486, iteration: 119659
loss: 1.3918497562408447,grad_norm: 0.9999995477599312, iteration: 119660
loss: 1.3483493328094482,grad_norm: 0.9999996330515792, iteration: 119661
loss: 1.252375841140747,grad_norm: 0.9999999260311212, iteration: 119662
loss: 1.611148476600647,grad_norm: 0.9999998359220927, iteration: 119663
loss: 1.1853564977645874,grad_norm: 0.999999905461704, iteration: 119664
loss: 1.2757854461669922,grad_norm: 0.9999998186073348, iteration: 119665
loss: 1.4451268911361694,grad_norm: 0.9999999022713658, iteration: 119666
loss: 1.1568278074264526,grad_norm: 0.9999997575043995, iteration: 119667
loss: 1.4660204648971558,grad_norm: 0.9999999946190614, iteration: 119668
loss: 1.2739005088806152,grad_norm: 0.9999996673152228, iteration: 119669
loss: 1.4475115537643433,grad_norm: 0.9999999119764807, iteration: 119670
loss: 1.3082654476165771,grad_norm: 0.9999996392174288, iteration: 119671
loss: 1.5793992280960083,grad_norm: 0.9999998815090531, iteration: 119672
loss: 1.1526243686676025,grad_norm: 0.9999994630053474, iteration: 119673
loss: 1.1810846328735352,grad_norm: 0.9999995606754422, iteration: 119674
loss: 1.2067458629608154,grad_norm: 0.9999999396870884, iteration: 119675
loss: 1.350642204284668,grad_norm: 0.9999994553727487, iteration: 119676
loss: 1.4367507696151733,grad_norm: 0.9999999455241524, iteration: 119677
loss: 1.3996143341064453,grad_norm: 0.9999998703338612, iteration: 119678
loss: 1.2992767095565796,grad_norm: 0.9999999245344328, iteration: 119679
loss: 1.3972713947296143,grad_norm: 0.9999998989171678, iteration: 119680
loss: 1.3323293924331665,grad_norm: 0.999999965172218, iteration: 119681
loss: 1.6718906164169312,grad_norm: 1.0000000765256487, iteration: 119682
loss: 1.2039507627487183,grad_norm: 0.9999998606188205, iteration: 119683
loss: 1.4364665746688843,grad_norm: 0.9999999533414503, iteration: 119684
loss: 1.1093993186950684,grad_norm: 0.9999996709249857, iteration: 119685
loss: 1.31107759475708,grad_norm: 0.9999999011524278, iteration: 119686
loss: 1.1833782196044922,grad_norm: 0.999999865090419, iteration: 119687
loss: 1.3868337869644165,grad_norm: 0.9999996707039609, iteration: 119688
loss: 1.2155826091766357,grad_norm: 0.9999994965799195, iteration: 119689
loss: 1.2459179162979126,grad_norm: 0.9999998657905573, iteration: 119690
loss: 1.0766311883926392,grad_norm: 0.9999998309919059, iteration: 119691
loss: 1.1368433237075806,grad_norm: 0.9999999302982377, iteration: 119692
loss: 1.4767860174179077,grad_norm: 0.9999998384300417, iteration: 119693
loss: 1.2440943717956543,grad_norm: 0.999999948079388, iteration: 119694
loss: 1.2389534711837769,grad_norm: 0.9999997504131505, iteration: 119695
loss: 1.3159507513046265,grad_norm: 0.9999998643634994, iteration: 119696
loss: 1.3171021938323975,grad_norm: 0.9999995655846157, iteration: 119697
loss: 1.1436631679534912,grad_norm: 0.9999992458946483, iteration: 119698
loss: 1.3629571199417114,grad_norm: 0.9999998589679729, iteration: 119699
loss: 1.3253872394561768,grad_norm: 0.9999999088891848, iteration: 119700
loss: 1.2010899782180786,grad_norm: 0.9999998915319779, iteration: 119701
loss: 1.2377262115478516,grad_norm: 0.9999998358430963, iteration: 119702
loss: 1.2743051052093506,grad_norm: 0.9999993736522852, iteration: 119703
loss: 1.1012840270996094,grad_norm: 0.9999990185323239, iteration: 119704
loss: 1.169080138206482,grad_norm: 0.9999998735249886, iteration: 119705
loss: 1.2238004207611084,grad_norm: 0.999999631181734, iteration: 119706
loss: 1.1142743825912476,grad_norm: 0.9999992717194184, iteration: 119707
loss: 1.2137384414672852,grad_norm: 0.9999999899045593, iteration: 119708
loss: 1.1158124208450317,grad_norm: 0.999999925607045, iteration: 119709
loss: 1.2114893198013306,grad_norm: 0.9999999197695679, iteration: 119710
loss: 1.1294070482254028,grad_norm: 0.999999486186362, iteration: 119711
loss: 1.4460506439208984,grad_norm: 0.9999999240687342, iteration: 119712
loss: 1.4404577016830444,grad_norm: 0.9999999728323761, iteration: 119713
loss: 1.2715495824813843,grad_norm: 0.9999999327676093, iteration: 119714
loss: 1.4403064250946045,grad_norm: 0.9999999112365661, iteration: 119715
loss: 1.4067398309707642,grad_norm: 0.9999998910495456, iteration: 119716
loss: 1.2860453128814697,grad_norm: 0.9999999426009376, iteration: 119717
loss: 1.5295614004135132,grad_norm: 0.9999999648002761, iteration: 119718
loss: 1.1379797458648682,grad_norm: 0.9999995389771681, iteration: 119719
loss: 1.2904059886932373,grad_norm: 0.9999998776532318, iteration: 119720
loss: 1.1202504634857178,grad_norm: 0.9999999379502453, iteration: 119721
loss: 1.3541322946548462,grad_norm: 0.9999994512741301, iteration: 119722
loss: 1.306742548942566,grad_norm: 0.999999935157931, iteration: 119723
loss: 1.298538327217102,grad_norm: 0.9999992965470489, iteration: 119724
loss: 1.4869107007980347,grad_norm: 0.9999998303243484, iteration: 119725
loss: 1.226025104522705,grad_norm: 0.9999996604088303, iteration: 119726
loss: 1.1797518730163574,grad_norm: 0.999999399481471, iteration: 119727
loss: 1.5280448198318481,grad_norm: 0.999999791042437, iteration: 119728
loss: 1.3876110315322876,grad_norm: 0.9999996757344372, iteration: 119729
loss: 1.3776198625564575,grad_norm: 0.9999994888916642, iteration: 119730
loss: 1.2073465585708618,grad_norm: 0.999999591634507, iteration: 119731
loss: 1.7203128337860107,grad_norm: 0.9999997738196791, iteration: 119732
loss: 1.1363590955734253,grad_norm: 0.9999999419294014, iteration: 119733
loss: 1.4040464162826538,grad_norm: 0.9999998758826782, iteration: 119734
loss: 1.321088194847107,grad_norm: 0.9999996259687988, iteration: 119735
loss: 1.4092860221862793,grad_norm: 0.9999998244569118, iteration: 119736
loss: 1.3885732889175415,grad_norm: 1.00000007756885, iteration: 119737
loss: 1.3931888341903687,grad_norm: 0.9999998332269138, iteration: 119738
loss: 1.5837429761886597,grad_norm: 0.9999999146148184, iteration: 119739
loss: 1.3844387531280518,grad_norm: 0.999999949900212, iteration: 119740
loss: 1.2507661581039429,grad_norm: 0.9999998748462433, iteration: 119741
loss: 1.43701171875,grad_norm: 0.9999998130094196, iteration: 119742
loss: 1.2761719226837158,grad_norm: 0.9999998420876186, iteration: 119743
loss: 1.3278871774673462,grad_norm: 0.9999998813723431, iteration: 119744
loss: 1.3224921226501465,grad_norm: 1.0000000390543757, iteration: 119745
loss: 1.400010585784912,grad_norm: 0.9999997493998303, iteration: 119746
loss: 1.2733869552612305,grad_norm: 0.9999997580918769, iteration: 119747
loss: 1.2403302192687988,grad_norm: 0.9999999079275094, iteration: 119748
loss: 1.3888678550720215,grad_norm: 0.9999999352530706, iteration: 119749
loss: 1.2252790927886963,grad_norm: 0.9999997781340321, iteration: 119750
loss: 1.6615277528762817,grad_norm: 1.000000049090609, iteration: 119751
loss: 1.1514325141906738,grad_norm: 0.9999992970194416, iteration: 119752
loss: 1.163653016090393,grad_norm: 0.9999996375257727, iteration: 119753
loss: 1.2625654935836792,grad_norm: 0.9999999294798417, iteration: 119754
loss: 1.4776525497436523,grad_norm: 0.9999998596251037, iteration: 119755
loss: 1.2976843118667603,grad_norm: 1.000000058646564, iteration: 119756
loss: 1.305533528327942,grad_norm: 0.9999999571181309, iteration: 119757
loss: 1.2529093027114868,grad_norm: 0.9999997512212101, iteration: 119758
loss: 1.1295539140701294,grad_norm: 0.9999997432790565, iteration: 119759
loss: 1.1123589277267456,grad_norm: 0.9999992888918677, iteration: 119760
loss: 1.301592230796814,grad_norm: 1.0000000554098578, iteration: 119761
loss: 1.1375741958618164,grad_norm: 0.9999994392450882, iteration: 119762
loss: 1.4830522537231445,grad_norm: 0.9999998716282285, iteration: 119763
loss: 1.3887042999267578,grad_norm: 0.9999998256359819, iteration: 119764
loss: 1.2240824699401855,grad_norm: 0.9999999062420548, iteration: 119765
loss: 1.1484471559524536,grad_norm: 0.9999997408938103, iteration: 119766
loss: 1.468298316001892,grad_norm: 0.9999995381723022, iteration: 119767
loss: 1.7252037525177002,grad_norm: 0.9999998912604554, iteration: 119768
loss: 1.2239214181900024,grad_norm: 0.9999995444335953, iteration: 119769
loss: 1.52887761592865,grad_norm: 0.9999999826702692, iteration: 119770
loss: 1.1954277753829956,grad_norm: 0.9999997985640506, iteration: 119771
loss: 1.240593433380127,grad_norm: 0.9999994387883357, iteration: 119772
loss: 1.3706457614898682,grad_norm: 0.9999999487997091, iteration: 119773
loss: 1.1115514039993286,grad_norm: 1.000000049278497, iteration: 119774
loss: 1.2743430137634277,grad_norm: 0.9999999441537198, iteration: 119775
loss: 1.2665894031524658,grad_norm: 0.9999993688991439, iteration: 119776
loss: 1.2741199731826782,grad_norm: 0.9999999683891709, iteration: 119777
loss: 1.2452317476272583,grad_norm: 1.0000000948298138, iteration: 119778
loss: 1.2793792486190796,grad_norm: 0.9999998806603572, iteration: 119779
loss: 1.2093400955200195,grad_norm: 0.9999998827546392, iteration: 119780
loss: 1.3554563522338867,grad_norm: 0.9999999048024041, iteration: 119781
loss: 1.5384507179260254,grad_norm: 0.999999857111236, iteration: 119782
loss: 1.1201252937316895,grad_norm: 0.9999995554104073, iteration: 119783
loss: 1.2954570055007935,grad_norm: 0.9999999017636405, iteration: 119784
loss: 1.2855461835861206,grad_norm: 1.0000000100846096, iteration: 119785
loss: 1.1083101034164429,grad_norm: 0.9999993808013266, iteration: 119786
loss: 1.1894084215164185,grad_norm: 0.9999999259742087, iteration: 119787
loss: 1.0841902494430542,grad_norm: 0.9999998068069466, iteration: 119788
loss: 1.0379486083984375,grad_norm: 1.000000061397883, iteration: 119789
loss: 1.30941903591156,grad_norm: 0.9999996982423518, iteration: 119790
loss: 1.479042887687683,grad_norm: 0.999999794117274, iteration: 119791
loss: 1.1800358295440674,grad_norm: 0.9999992887720881, iteration: 119792
loss: 1.3291640281677246,grad_norm: 0.9999998992739312, iteration: 119793
loss: 1.2171732187271118,grad_norm: 0.9999999310554405, iteration: 119794
loss: 1.3215645551681519,grad_norm: 0.9999999141986659, iteration: 119795
loss: 1.0850518941879272,grad_norm: 0.9999993430917755, iteration: 119796
loss: 1.2128422260284424,grad_norm: 0.9999990422314063, iteration: 119797
loss: 1.3858578205108643,grad_norm: 1.000000017004135, iteration: 119798
loss: 1.187515139579773,grad_norm: 0.9999999418109129, iteration: 119799
loss: 1.1599549055099487,grad_norm: 0.9999999203032687, iteration: 119800
loss: 1.0991450548171997,grad_norm: 0.9999994989060308, iteration: 119801
loss: 1.1008470058441162,grad_norm: 0.9999997032562511, iteration: 119802
loss: 1.2347562313079834,grad_norm: 0.9999995323583836, iteration: 119803
loss: 1.0733858346939087,grad_norm: 0.9999996898224456, iteration: 119804
loss: 1.0278128385543823,grad_norm: 0.9999996118077793, iteration: 119805
loss: 1.1522712707519531,grad_norm: 1.0000000405704332, iteration: 119806
loss: 1.1322394609451294,grad_norm: 0.9999995201415546, iteration: 119807
loss: 1.2153748273849487,grad_norm: 0.99999952060478, iteration: 119808
loss: 1.357803463935852,grad_norm: 0.999999921635446, iteration: 119809
loss: 1.149040937423706,grad_norm: 0.9999993070941066, iteration: 119810
loss: 1.2987545728683472,grad_norm: 0.9999999407289252, iteration: 119811
loss: 1.1055201292037964,grad_norm: 0.9574951080600048, iteration: 119812
loss: 1.175591230392456,grad_norm: 0.9999995804570998, iteration: 119813
loss: 1.1048270463943481,grad_norm: 0.9999993144896178, iteration: 119814
loss: 1.1627253293991089,grad_norm: 0.9999999939517842, iteration: 119815
loss: 1.0736573934555054,grad_norm: 0.9999992528658199, iteration: 119816
loss: 1.2008055448532104,grad_norm: 0.9999999162828684, iteration: 119817
loss: 1.0810731649398804,grad_norm: 0.9999998886875545, iteration: 119818
loss: 1.1620450019836426,grad_norm: 0.9999996576670928, iteration: 119819
loss: 1.1000843048095703,grad_norm: 0.9999998376249656, iteration: 119820
loss: 1.2806789875030518,grad_norm: 0.9999998583314327, iteration: 119821
loss: 1.2296124696731567,grad_norm: 0.9999998131749108, iteration: 119822
loss: 1.0673823356628418,grad_norm: 0.9999999295858238, iteration: 119823
loss: 1.3330800533294678,grad_norm: 0.9999997781517552, iteration: 119824
loss: 1.1359695196151733,grad_norm: 1.0000000187082287, iteration: 119825
loss: 1.5105048418045044,grad_norm: 0.9999999948261505, iteration: 119826
loss: 1.5602283477783203,grad_norm: 0.9999999353597275, iteration: 119827
loss: 1.283621907234192,grad_norm: 0.9999996078313559, iteration: 119828
loss: 1.1377031803131104,grad_norm: 0.9999999893033319, iteration: 119829
loss: 1.0279102325439453,grad_norm: 0.9999994700461421, iteration: 119830
loss: 1.1213349103927612,grad_norm: 0.9999999481068358, iteration: 119831
loss: 1.314610242843628,grad_norm: 0.9999999867987655, iteration: 119832
loss: 1.3321943283081055,grad_norm: 0.9999999359340404, iteration: 119833
loss: 1.0778716802597046,grad_norm: 0.9999992686266439, iteration: 119834
loss: 1.1104943752288818,grad_norm: 0.9999992775743957, iteration: 119835
loss: 1.1876825094223022,grad_norm: 0.9999998218792981, iteration: 119836
loss: 1.115373969078064,grad_norm: 0.9999992290834058, iteration: 119837
loss: 1.258001685142517,grad_norm: 0.9999995893002975, iteration: 119838
loss: 1.038919448852539,grad_norm: 0.9999992049398644, iteration: 119839
loss: 1.1834121942520142,grad_norm: 0.9999995702883638, iteration: 119840
loss: 1.2238045930862427,grad_norm: 0.9999996412881162, iteration: 119841
loss: 1.1895158290863037,grad_norm: 0.9999998976931985, iteration: 119842
loss: 1.1339882612228394,grad_norm: 0.9999996403678514, iteration: 119843
loss: 1.2247763872146606,grad_norm: 1.0000000392522592, iteration: 119844
loss: 1.3765138387680054,grad_norm: 0.9999999111039154, iteration: 119845
loss: 1.0537596940994263,grad_norm: 0.9999993202173825, iteration: 119846
loss: 1.476405143737793,grad_norm: 0.9999998943066857, iteration: 119847
loss: 1.2679810523986816,grad_norm: 0.9999998773322294, iteration: 119848
loss: 1.2311625480651855,grad_norm: 0.9999992703100816, iteration: 119849
loss: 1.2246692180633545,grad_norm: 0.9999996369932388, iteration: 119850
loss: 1.1406620740890503,grad_norm: 0.9999997098863537, iteration: 119851
loss: 1.1482841968536377,grad_norm: 0.9999995676407749, iteration: 119852
loss: 1.1636079549789429,grad_norm: 0.9999993908140699, iteration: 119853
loss: 1.1241430044174194,grad_norm: 0.9999999200503367, iteration: 119854
loss: 1.322256326675415,grad_norm: 0.9999997471458846, iteration: 119855
loss: 1.1942119598388672,grad_norm: 1.0000000780129084, iteration: 119856
loss: 1.3187049627304077,grad_norm: 0.9999997513011065, iteration: 119857
loss: 1.1857973337173462,grad_norm: 0.9999993558848074, iteration: 119858
loss: 1.1336501836776733,grad_norm: 0.9999998523326997, iteration: 119859
loss: 1.4942666292190552,grad_norm: 0.99999999327927, iteration: 119860
loss: 1.064803123474121,grad_norm: 0.9999997749211316, iteration: 119861
loss: 1.1486080884933472,grad_norm: 0.9999999478942152, iteration: 119862
loss: 1.2261617183685303,grad_norm: 0.9999994636802937, iteration: 119863
loss: 1.3026878833770752,grad_norm: 0.9999992367499054, iteration: 119864
loss: 1.1086935997009277,grad_norm: 0.9999997131488122, iteration: 119865
loss: 1.168937087059021,grad_norm: 0.9999996199643643, iteration: 119866
loss: 1.0963246822357178,grad_norm: 0.9999998424221284, iteration: 119867
loss: 1.3548884391784668,grad_norm: 0.9999998336174494, iteration: 119868
loss: 1.0380184650421143,grad_norm: 0.9999994835951936, iteration: 119869
loss: 1.1777219772338867,grad_norm: 0.9999995231382539, iteration: 119870
loss: 1.266876459121704,grad_norm: 0.9999999535220366, iteration: 119871
loss: 1.1713309288024902,grad_norm: 0.9999990221927398, iteration: 119872
loss: 1.21101975440979,grad_norm: 0.9999996408462662, iteration: 119873
loss: 1.2188034057617188,grad_norm: 0.9999996187007547, iteration: 119874
loss: 1.1893091201782227,grad_norm: 0.999999445588968, iteration: 119875
loss: 1.3005173206329346,grad_norm: 0.9999998346267996, iteration: 119876
loss: 1.1244535446166992,grad_norm: 0.9999990922765772, iteration: 119877
loss: 1.1485244035720825,grad_norm: 0.99999922811548, iteration: 119878
loss: 1.0745452642440796,grad_norm: 0.999999766200951, iteration: 119879
loss: 1.1802445650100708,grad_norm: 0.9999996710008149, iteration: 119880
loss: 1.2625325918197632,grad_norm: 0.9999996754768513, iteration: 119881
loss: 1.2405307292938232,grad_norm: 0.9999998814880171, iteration: 119882
loss: 1.1259078979492188,grad_norm: 0.9999998230956962, iteration: 119883
loss: 1.1564396619796753,grad_norm: 0.9999995100144449, iteration: 119884
loss: 1.0729901790618896,grad_norm: 0.9999999890937175, iteration: 119885
loss: 1.207824468612671,grad_norm: 0.9999995808392291, iteration: 119886
loss: 1.3013144731521606,grad_norm: 0.9999998854976044, iteration: 119887
loss: 1.2623804807662964,grad_norm: 0.9999999645429284, iteration: 119888
loss: 1.1870851516723633,grad_norm: 0.9999996974077685, iteration: 119889
loss: 1.0605270862579346,grad_norm: 0.9999997507369711, iteration: 119890
loss: 1.174065351486206,grad_norm: 0.9999998769164817, iteration: 119891
loss: 1.140195608139038,grad_norm: 0.9999996879897514, iteration: 119892
loss: 1.1131221055984497,grad_norm: 0.9999997066096503, iteration: 119893
loss: 1.0313323736190796,grad_norm: 0.999999929192652, iteration: 119894
loss: 1.1269372701644897,grad_norm: 0.9999995887487994, iteration: 119895
loss: 1.1875293254852295,grad_norm: 0.9999993615363473, iteration: 119896
loss: 1.3943089246749878,grad_norm: 0.9999999135615257, iteration: 119897
loss: 1.0555723905563354,grad_norm: 0.9999993086044896, iteration: 119898
loss: 1.1215678453445435,grad_norm: 0.9999997533852474, iteration: 119899
loss: 1.136793851852417,grad_norm: 0.999999998716948, iteration: 119900
loss: 1.2695915699005127,grad_norm: 0.9999999424001986, iteration: 119901
loss: 1.2750390768051147,grad_norm: 0.9999997360212469, iteration: 119902
loss: 1.1670833826065063,grad_norm: 0.9999993864616271, iteration: 119903
loss: 1.2357628345489502,grad_norm: 0.9999996318150378, iteration: 119904
loss: 1.174659013748169,grad_norm: 0.9999996269981484, iteration: 119905
loss: 1.3617913722991943,grad_norm: 0.9999998326387257, iteration: 119906
loss: 1.1432677507400513,grad_norm: 0.9999998491912956, iteration: 119907
loss: 1.1751422882080078,grad_norm: 1.0000000051437472, iteration: 119908
loss: 1.0618937015533447,grad_norm: 0.9999992188794913, iteration: 119909
loss: 1.1525717973709106,grad_norm: 0.9999994108399479, iteration: 119910
loss: 1.0747932195663452,grad_norm: 0.9999995381503264, iteration: 119911
loss: 1.3909486532211304,grad_norm: 0.9999999471890386, iteration: 119912
loss: 1.1314351558685303,grad_norm: 0.9999996751151264, iteration: 119913
loss: 1.1748831272125244,grad_norm: 0.9999995177588024, iteration: 119914
loss: 1.0228872299194336,grad_norm: 0.9999997441549916, iteration: 119915
loss: 0.9871204495429993,grad_norm: 0.9512720085791374, iteration: 119916
loss: 1.032235026359558,grad_norm: 0.9999992835918069, iteration: 119917
loss: 1.0612258911132812,grad_norm: 0.999999396862924, iteration: 119918
loss: 1.028262734413147,grad_norm: 0.9999993708025245, iteration: 119919
loss: 1.084984540939331,grad_norm: 0.9999996596960932, iteration: 119920
loss: 1.10209059715271,grad_norm: 0.9999996507113481, iteration: 119921
loss: 1.4301884174346924,grad_norm: 0.9999997856877758, iteration: 119922
loss: 1.1247413158416748,grad_norm: 0.9999995823997213, iteration: 119923
loss: 1.2826298475265503,grad_norm: 0.9999997223735323, iteration: 119924
loss: 1.067506194114685,grad_norm: 0.9999995692414982, iteration: 119925
loss: 1.1307841539382935,grad_norm: 0.9999998688774467, iteration: 119926
loss: 1.1239547729492188,grad_norm: 0.9999998106055259, iteration: 119927
loss: 0.9936965703964233,grad_norm: 0.9999995494848135, iteration: 119928
loss: 1.1047066450119019,grad_norm: 0.9999992198603157, iteration: 119929
loss: 1.0370625257492065,grad_norm: 0.9999995860403321, iteration: 119930
loss: 1.1171685457229614,grad_norm: 0.9999997561184081, iteration: 119931
loss: 1.2272483110427856,grad_norm: 0.999999915066368, iteration: 119932
loss: 1.1360886096954346,grad_norm: 0.9999994856790788, iteration: 119933
loss: 1.0821884870529175,grad_norm: 0.9999997352135291, iteration: 119934
loss: 1.0951899290084839,grad_norm: 0.9999991660250581, iteration: 119935
loss: 1.158619999885559,grad_norm: 0.9999998408087797, iteration: 119936
loss: 1.2322642803192139,grad_norm: 0.9999997677260689, iteration: 119937
loss: 1.0182058811187744,grad_norm: 0.9999993488329478, iteration: 119938
loss: 1.061193823814392,grad_norm: 0.9999992706032128, iteration: 119939
loss: 1.0383007526397705,grad_norm: 1.00000011993945, iteration: 119940
loss: 1.2231577634811401,grad_norm: 0.9999993992125461, iteration: 119941
loss: 1.347244143486023,grad_norm: 0.9999998052320714, iteration: 119942
loss: 1.3033466339111328,grad_norm: 0.9999999219502295, iteration: 119943
loss: 1.1292740106582642,grad_norm: 0.9999997851131649, iteration: 119944
loss: 1.300829529762268,grad_norm: 0.999999599032061, iteration: 119945
loss: 1.1326406002044678,grad_norm: 0.9999995402958859, iteration: 119946
loss: 1.4064183235168457,grad_norm: 0.9999998511570994, iteration: 119947
loss: 1.1928857564926147,grad_norm: 0.9999994528623413, iteration: 119948
loss: 1.3650423288345337,grad_norm: 0.999999487070988, iteration: 119949
loss: 1.4921902418136597,grad_norm: 0.9999999986389256, iteration: 119950
loss: 1.3970710039138794,grad_norm: 0.99999984006868, iteration: 119951
loss: 1.2184721231460571,grad_norm: 1.0000000570674987, iteration: 119952
loss: 1.3255469799041748,grad_norm: 0.999999840076567, iteration: 119953
loss: 1.033768892288208,grad_norm: 0.9999995045402961, iteration: 119954
loss: 1.140237808227539,grad_norm: 0.9999998423651942, iteration: 119955
loss: 1.1752052307128906,grad_norm: 0.9999999739345728, iteration: 119956
loss: 1.123530387878418,grad_norm: 0.9999996433870424, iteration: 119957
loss: 1.0346319675445557,grad_norm: 0.9999995899777474, iteration: 119958
loss: 1.2170665264129639,grad_norm: 0.9999999333563474, iteration: 119959
loss: 1.0218126773834229,grad_norm: 0.9999994720191636, iteration: 119960
loss: 1.0282604694366455,grad_norm: 0.8643314215241064, iteration: 119961
loss: 1.1848047971725464,grad_norm: 0.9999998710534006, iteration: 119962
loss: 1.1029958724975586,grad_norm: 0.9999991879829544, iteration: 119963
loss: 1.0737651586532593,grad_norm: 0.9999991318289562, iteration: 119964
loss: 1.1121903657913208,grad_norm: 0.9999998484204071, iteration: 119965
loss: 0.9585444331169128,grad_norm: 0.9999994531074617, iteration: 119966
loss: 1.0293086767196655,grad_norm: 0.9999991507927519, iteration: 119967
loss: 1.045393943786621,grad_norm: 0.9999992215334428, iteration: 119968
loss: 1.1294617652893066,grad_norm: 0.9999994844551326, iteration: 119969
loss: 1.0094349384307861,grad_norm: 0.999999521244345, iteration: 119970
loss: 1.0244563817977905,grad_norm: 0.9999993022158447, iteration: 119971
loss: 1.1213740110397339,grad_norm: 0.9999992911171669, iteration: 119972
loss: 1.0297824144363403,grad_norm: 0.9999993450929137, iteration: 119973
loss: 1.0518783330917358,grad_norm: 0.9999997250094517, iteration: 119974
loss: 1.1505259275436401,grad_norm: 0.9999996200750214, iteration: 119975
loss: 1.1276376247406006,grad_norm: 1.0000000302837315, iteration: 119976
loss: 1.0599792003631592,grad_norm: 0.9999993889409053, iteration: 119977
loss: 1.1698726415634155,grad_norm: 0.9999999410317414, iteration: 119978
loss: 1.0202406644821167,grad_norm: 0.7767507516367865, iteration: 119979
loss: 1.0565475225448608,grad_norm: 0.9999998766659809, iteration: 119980
loss: 1.0682889223098755,grad_norm: 0.8401710568750158, iteration: 119981
loss: 1.1751750707626343,grad_norm: 0.9999993913624547, iteration: 119982
loss: 1.001241683959961,grad_norm: 0.8842527793813543, iteration: 119983
loss: 1.04115891456604,grad_norm: 0.9999996513514184, iteration: 119984
loss: 1.0015537738800049,grad_norm: 0.9495867409949573, iteration: 119985
loss: 1.03720223903656,grad_norm: 1.0000000130898519, iteration: 119986
loss: 1.148296594619751,grad_norm: 0.9999996394835734, iteration: 119987
loss: 1.0430315732955933,grad_norm: 0.9999992911362281, iteration: 119988
loss: 1.0745447874069214,grad_norm: 0.9999991641865743, iteration: 119989
loss: 0.9562174081802368,grad_norm: 0.9999999881441316, iteration: 119990
loss: 1.1077773571014404,grad_norm: 0.9999994532619907, iteration: 119991
loss: 1.068658471107483,grad_norm: 0.9999999463929203, iteration: 119992
loss: 1.088585615158081,grad_norm: 0.9999994328119558, iteration: 119993
loss: 1.103845477104187,grad_norm: 0.9999999255478049, iteration: 119994
loss: 1.1097021102905273,grad_norm: 0.9999991641170846, iteration: 119995
loss: 1.124803066253662,grad_norm: 0.9999998715023773, iteration: 119996
loss: 1.056637167930603,grad_norm: 0.9999991179538401, iteration: 119997
loss: 1.3149057626724243,grad_norm: 0.9999997948699826, iteration: 119998
loss: 1.025955319404602,grad_norm: 0.999999793171592, iteration: 119999
loss: 1.1055644750595093,grad_norm: 0.999999586043312, iteration: 120000
Evaluating at step 120000
{'val': 1.0258961897343397, 'test': 2.4925505101871184}
loss: 1.2638498544692993,grad_norm: 0.9999994560627623, iteration: 120001
loss: 1.1318846940994263,grad_norm: 0.9999998991321735, iteration: 120002
loss: 0.9897100329399109,grad_norm: 0.9999991799908681, iteration: 120003
loss: 1.20587158203125,grad_norm: 0.9999999073296739, iteration: 120004
loss: 1.1149195432662964,grad_norm: 0.9999997315733427, iteration: 120005
loss: 1.1125789880752563,grad_norm: 1.0000000135729665, iteration: 120006
loss: 1.1726878881454468,grad_norm: 0.9999999905185356, iteration: 120007
loss: 1.2142727375030518,grad_norm: 0.9999995266913542, iteration: 120008
loss: 1.1872235536575317,grad_norm: 0.999999965420068, iteration: 120009
loss: 1.2302547693252563,grad_norm: 0.999999518700231, iteration: 120010
loss: 1.1079312562942505,grad_norm: 0.9999993655423675, iteration: 120011
loss: 1.1186769008636475,grad_norm: 0.9999997157003903, iteration: 120012
loss: 1.1619937419891357,grad_norm: 0.9999998734805169, iteration: 120013
loss: 1.1233034133911133,grad_norm: 0.9999993558477797, iteration: 120014
loss: 1.3008842468261719,grad_norm: 0.9999995575062466, iteration: 120015
loss: 1.180332899093628,grad_norm: 0.9999995869682782, iteration: 120016
loss: 1.1953505277633667,grad_norm: 0.9999997908034467, iteration: 120017
loss: 1.05296790599823,grad_norm: 0.9999991676764315, iteration: 120018
loss: 1.0090237855911255,grad_norm: 1.000000006506838, iteration: 120019
loss: 1.2381247282028198,grad_norm: 0.9999998890522105, iteration: 120020
loss: 1.0750616788864136,grad_norm: 0.9999997039037685, iteration: 120021
loss: 1.18988037109375,grad_norm: 0.9999996719021327, iteration: 120022
loss: 1.179402470588684,grad_norm: 0.9999999094191627, iteration: 120023
loss: 1.0859485864639282,grad_norm: 0.9999993713790442, iteration: 120024
loss: 1.1562144756317139,grad_norm: 0.9999995441269747, iteration: 120025
loss: 1.0840559005737305,grad_norm: 0.9999994910573452, iteration: 120026
loss: 1.1944303512573242,grad_norm: 0.9999996029956372, iteration: 120027
loss: 1.0748910903930664,grad_norm: 0.9999995608884852, iteration: 120028
loss: 1.0112940073013306,grad_norm: 0.9999990993980286, iteration: 120029
loss: 1.2070538997650146,grad_norm: 0.9999998828078798, iteration: 120030
loss: 1.1161534786224365,grad_norm: 0.9999998513172955, iteration: 120031
loss: 1.0521821975708008,grad_norm: 0.9999995333934971, iteration: 120032
loss: 1.1121360063552856,grad_norm: 0.9999992537387717, iteration: 120033
loss: 1.0638178586959839,grad_norm: 0.9999994769344236, iteration: 120034
loss: 1.04615318775177,grad_norm: 0.9999998364849616, iteration: 120035
loss: 0.976081371307373,grad_norm: 0.923259860630569, iteration: 120036
loss: 1.0752460956573486,grad_norm: 0.9999993409668528, iteration: 120037
loss: 1.0397471189498901,grad_norm: 0.9999994999448413, iteration: 120038
loss: 1.0637540817260742,grad_norm: 0.9999990003640327, iteration: 120039
loss: 1.0644211769104004,grad_norm: 0.9999991713527246, iteration: 120040
loss: 0.9746598601341248,grad_norm: 0.8565094705552881, iteration: 120041
loss: 1.0088586807250977,grad_norm: 0.9573095866411331, iteration: 120042
loss: 0.9626014828681946,grad_norm: 0.8248614136507918, iteration: 120043
loss: 1.0743993520736694,grad_norm: 0.9999990501125507, iteration: 120044
loss: 1.1336864233016968,grad_norm: 0.9999999310436112, iteration: 120045
loss: 1.1581999063491821,grad_norm: 0.9999993424159614, iteration: 120046
loss: 1.107000708580017,grad_norm: 0.9999991565407402, iteration: 120047
loss: 1.057992935180664,grad_norm: 0.9999997743243421, iteration: 120048
loss: 1.0523052215576172,grad_norm: 0.9999991122762419, iteration: 120049
loss: 1.0588873624801636,grad_norm: 0.9738731911947703, iteration: 120050
loss: 1.0082764625549316,grad_norm: 0.9999994426872442, iteration: 120051
loss: 1.0095840692520142,grad_norm: 0.9880941362081505, iteration: 120052
loss: 1.0341236591339111,grad_norm: 0.9999993297939443, iteration: 120053
loss: 1.0331065654754639,grad_norm: 0.9999992057080273, iteration: 120054
loss: 0.9621913433074951,grad_norm: 0.9999990523371866, iteration: 120055
loss: 1.239617943763733,grad_norm: 0.9999998870543532, iteration: 120056
loss: 1.0512769222259521,grad_norm: 0.9999997679047065, iteration: 120057
loss: 1.0045629739761353,grad_norm: 0.9999991389318751, iteration: 120058
loss: 0.9976426959037781,grad_norm: 0.9999994151850696, iteration: 120059
loss: 1.2275731563568115,grad_norm: 0.9999997762001203, iteration: 120060
loss: 1.037885308265686,grad_norm: 0.999999327073175, iteration: 120061
loss: 1.044287085533142,grad_norm: 0.9999990775474007, iteration: 120062
loss: 1.0150688886642456,grad_norm: 0.9380438435732628, iteration: 120063
loss: 1.1011673212051392,grad_norm: 0.9999995094499718, iteration: 120064
loss: 1.0636966228485107,grad_norm: 0.999999117439567, iteration: 120065
loss: 1.023758053779602,grad_norm: 0.9999992689089383, iteration: 120066
loss: 1.072812795639038,grad_norm: 0.9999996133477845, iteration: 120067
loss: 1.0220372676849365,grad_norm: 0.9357524092497616, iteration: 120068
loss: 1.0050172805786133,grad_norm: 0.999999756419093, iteration: 120069
loss: 1.1641302108764648,grad_norm: 0.9999998741531211, iteration: 120070
loss: 1.0988785028457642,grad_norm: 0.9999998074937904, iteration: 120071
loss: 1.0403707027435303,grad_norm: 0.9999992129890777, iteration: 120072
loss: 1.013360857963562,grad_norm: 0.9999994219537535, iteration: 120073
loss: 1.030529260635376,grad_norm: 0.9999996734968184, iteration: 120074
loss: 1.09979248046875,grad_norm: 0.9999996033837266, iteration: 120075
loss: 1.025236964225769,grad_norm: 0.9999999753417556, iteration: 120076
loss: 1.0531888008117676,grad_norm: 0.9999994334373095, iteration: 120077
loss: 1.1349027156829834,grad_norm: 0.9999997948151929, iteration: 120078
loss: 1.1073564291000366,grad_norm: 0.9999995651428153, iteration: 120079
loss: 1.0170691013336182,grad_norm: 0.9999993884361909, iteration: 120080
loss: 1.1395736932754517,grad_norm: 0.9999999175753247, iteration: 120081
loss: 1.1179488897323608,grad_norm: 0.9999994446006707, iteration: 120082
loss: 1.03568434715271,grad_norm: 0.9999997864668488, iteration: 120083
loss: 1.0296581983566284,grad_norm: 0.9999991424742813, iteration: 120084
loss: 1.0197715759277344,grad_norm: 0.9999992029252581, iteration: 120085
loss: 1.048551082611084,grad_norm: 0.9999992618696514, iteration: 120086
loss: 1.1952975988388062,grad_norm: 0.9999996962283247, iteration: 120087
loss: 0.9866268634796143,grad_norm: 0.9999996614482501, iteration: 120088
loss: 0.9828251600265503,grad_norm: 0.8636523848582112, iteration: 120089
loss: 1.0136924982070923,grad_norm: 0.9999999633241926, iteration: 120090
loss: 1.075472354888916,grad_norm: 0.9999995567208791, iteration: 120091
loss: 1.0633254051208496,grad_norm: 0.9999991799415423, iteration: 120092
loss: 1.101241946220398,grad_norm: 0.9999998838957304, iteration: 120093
loss: 1.0363688468933105,grad_norm: 0.9999998887916965, iteration: 120094
loss: 1.0757776498794556,grad_norm: 0.9999997455162504, iteration: 120095
loss: 1.0355244874954224,grad_norm: 0.999999096890772, iteration: 120096
loss: 1.1632460355758667,grad_norm: 0.9113564854894082, iteration: 120097
loss: 1.1399261951446533,grad_norm: 0.9999998112160813, iteration: 120098
loss: 1.0752954483032227,grad_norm: 0.9999998530811647, iteration: 120099
loss: 1.0751510858535767,grad_norm: 0.9999995896803404, iteration: 120100
loss: 1.064600944519043,grad_norm: 0.9999994726329964, iteration: 120101
loss: 1.1359347105026245,grad_norm: 0.9999992949678791, iteration: 120102
loss: 1.022597074508667,grad_norm: 0.9725192133800065, iteration: 120103
loss: 1.0795871019363403,grad_norm: 0.9901650008663929, iteration: 120104
loss: 1.0688732862472534,grad_norm: 0.9999998074938427, iteration: 120105
loss: 1.0595703125,grad_norm: 0.9999992574263562, iteration: 120106
loss: 1.0295521020889282,grad_norm: 1.0000000351663387, iteration: 120107
loss: 1.0370012521743774,grad_norm: 0.7874466586716038, iteration: 120108
loss: 1.060329794883728,grad_norm: 0.9999992706125669, iteration: 120109
loss: 1.0430225133895874,grad_norm: 0.9999992557424183, iteration: 120110
loss: 1.0767173767089844,grad_norm: 0.9999991653419783, iteration: 120111
loss: 1.040706753730774,grad_norm: 0.9999995031216341, iteration: 120112
loss: 1.0549237728118896,grad_norm: 1.0000000740214472, iteration: 120113
loss: 1.1153439283370972,grad_norm: 0.9999996782763314, iteration: 120114
loss: 1.0247050523757935,grad_norm: 0.86500539685276, iteration: 120115
loss: 1.0259191989898682,grad_norm: 0.9999990111070302, iteration: 120116
loss: 1.0775344371795654,grad_norm: 0.9999997854978202, iteration: 120117
loss: 1.0087617635726929,grad_norm: 0.9999995005985266, iteration: 120118
loss: 1.0626890659332275,grad_norm: 0.9783121803751103, iteration: 120119
loss: 1.138168215751648,grad_norm: 0.9999995147949331, iteration: 120120
loss: 1.001612901687622,grad_norm: 0.8287370839906297, iteration: 120121
loss: 1.0966405868530273,grad_norm: 0.9999991516539083, iteration: 120122
loss: 1.1194342374801636,grad_norm: 0.9999997154307448, iteration: 120123
loss: 0.9953790903091431,grad_norm: 0.9999991374456374, iteration: 120124
loss: 1.1135778427124023,grad_norm: 0.9999994761376207, iteration: 120125
loss: 1.0199881792068481,grad_norm: 0.9961415392856937, iteration: 120126
loss: 1.1433731317520142,grad_norm: 0.9999996229751358, iteration: 120127
loss: 1.0640192031860352,grad_norm: 0.9999997174594258, iteration: 120128
loss: 1.0107158422470093,grad_norm: 0.999998978698311, iteration: 120129
loss: 1.0540170669555664,grad_norm: 0.999999531905199, iteration: 120130
loss: 1.0229884386062622,grad_norm: 0.8340517131388797, iteration: 120131
loss: 1.0290038585662842,grad_norm: 0.999999511031503, iteration: 120132
loss: 1.0788371562957764,grad_norm: 0.9999990637224864, iteration: 120133
loss: 1.087537407875061,grad_norm: 0.9999999351551361, iteration: 120134
loss: 0.9396764636039734,grad_norm: 0.9999989102185857, iteration: 120135
loss: 1.0749818086624146,grad_norm: 0.9999991165430957, iteration: 120136
loss: 1.1427710056304932,grad_norm: 0.9999999000948202, iteration: 120137
loss: 0.9996083974838257,grad_norm: 0.8762472431903126, iteration: 120138
loss: 1.0487879514694214,grad_norm: 0.9474722094699092, iteration: 120139
loss: 1.0350922346115112,grad_norm: 0.8798286319810478, iteration: 120140
loss: 1.214121699333191,grad_norm: 0.9999998420253825, iteration: 120141
loss: 1.095353603363037,grad_norm: 0.9999994976201732, iteration: 120142
loss: 1.008471131324768,grad_norm: 0.9999994734965102, iteration: 120143
loss: 1.1917872428894043,grad_norm: 0.9999998063974476, iteration: 120144
loss: 1.0419450998306274,grad_norm: 0.9999992090927059, iteration: 120145
loss: 1.078335165977478,grad_norm: 0.9999992437710595, iteration: 120146
loss: 1.0436739921569824,grad_norm: 0.999999107392486, iteration: 120147
loss: 0.9836472868919373,grad_norm: 0.9999996661718674, iteration: 120148
loss: 1.0173485279083252,grad_norm: 0.9999993354525573, iteration: 120149
loss: 1.0854086875915527,grad_norm: 0.9999991542797245, iteration: 120150
loss: 1.0936685800552368,grad_norm: 0.9999993867218584, iteration: 120151
loss: 1.063299298286438,grad_norm: 0.9999994305946759, iteration: 120152
loss: 1.0184613466262817,grad_norm: 0.9999995745533521, iteration: 120153
loss: 1.0256108045578003,grad_norm: 0.9999993072797329, iteration: 120154
loss: 1.0843489170074463,grad_norm: 0.9999990440323592, iteration: 120155
loss: 0.994713306427002,grad_norm: 0.9394348747924854, iteration: 120156
loss: 1.1080095767974854,grad_norm: 0.9999998665044955, iteration: 120157
loss: 1.0083155632019043,grad_norm: 0.9999998576788007, iteration: 120158
loss: 1.0248465538024902,grad_norm: 0.901820017359023, iteration: 120159
loss: 1.0712817907333374,grad_norm: 0.9999992860217934, iteration: 120160
loss: 1.0053787231445312,grad_norm: 0.8895339249386822, iteration: 120161
loss: 0.9995149374008179,grad_norm: 0.9477707730822005, iteration: 120162
loss: 1.1023205518722534,grad_norm: 0.9999994892932585, iteration: 120163
loss: 1.0416113138198853,grad_norm: 0.999999202395988, iteration: 120164
loss: 0.9934186935424805,grad_norm: 0.999999449955845, iteration: 120165
loss: 1.0206754207611084,grad_norm: 0.9706566712537091, iteration: 120166
loss: 1.089241623878479,grad_norm: 0.8504116953913964, iteration: 120167
loss: 1.0222864151000977,grad_norm: 0.9999992788634403, iteration: 120168
loss: 1.0997828245162964,grad_norm: 0.9999998130018098, iteration: 120169
loss: 1.0311535596847534,grad_norm: 0.8995103082674847, iteration: 120170
loss: 1.019319772720337,grad_norm: 0.9999993000928318, iteration: 120171
loss: 1.0178194046020508,grad_norm: 0.9999993365575476, iteration: 120172
loss: 1.025092363357544,grad_norm: 0.9999991064596478, iteration: 120173
loss: 1.0303757190704346,grad_norm: 0.8421195358429252, iteration: 120174
loss: 1.0597443580627441,grad_norm: 0.9516015713894712, iteration: 120175
loss: 1.0943660736083984,grad_norm: 0.8530028435549041, iteration: 120176
loss: 0.9648382663726807,grad_norm: 0.9015806372073616, iteration: 120177
loss: 0.9836590886116028,grad_norm: 0.9336396695214211, iteration: 120178
loss: 1.0239930152893066,grad_norm: 0.9999990376170402, iteration: 120179
loss: 1.01192307472229,grad_norm: 0.9278531595558047, iteration: 120180
loss: 0.9900839328765869,grad_norm: 0.9999998494148449, iteration: 120181
loss: 1.0654915571212769,grad_norm: 0.9999992855892618, iteration: 120182
loss: 1.0440490245819092,grad_norm: 0.999999775235533, iteration: 120183
loss: 1.0232930183410645,grad_norm: 0.9999994976026021, iteration: 120184
loss: 1.0328258275985718,grad_norm: 0.9999994427310581, iteration: 120185
loss: 0.9874945282936096,grad_norm: 0.8868070815286271, iteration: 120186
loss: 1.0416538715362549,grad_norm: 0.9999995844759411, iteration: 120187
loss: 0.9998505115509033,grad_norm: 0.7770845640152683, iteration: 120188
loss: 1.1485252380371094,grad_norm: 0.9999998855075775, iteration: 120189
loss: 1.0144104957580566,grad_norm: 0.9999993772926888, iteration: 120190
loss: 0.9740899205207825,grad_norm: 0.9999993711696881, iteration: 120191
loss: 1.0021183490753174,grad_norm: 0.9999991716772908, iteration: 120192
loss: 0.9888460040092468,grad_norm: 0.9414993445843024, iteration: 120193
loss: 1.0608750581741333,grad_norm: 0.8788152054339098, iteration: 120194
loss: 1.004549264907837,grad_norm: 0.8594656715577479, iteration: 120195
loss: 1.0418914556503296,grad_norm: 0.9999996561271438, iteration: 120196
loss: 1.0236884355545044,grad_norm: 0.9999994409142376, iteration: 120197
loss: 1.0527796745300293,grad_norm: 0.9999996850559725, iteration: 120198
loss: 1.077646255493164,grad_norm: 0.9999992925244868, iteration: 120199
loss: 1.0124636888504028,grad_norm: 0.9999991843315064, iteration: 120200
loss: 0.9937127828598022,grad_norm: 0.999999171309118, iteration: 120201
loss: 1.0298994779586792,grad_norm: 0.9999991040718871, iteration: 120202
loss: 1.0043435096740723,grad_norm: 0.999999019003078, iteration: 120203
loss: 1.085873007774353,grad_norm: 0.9999995161517399, iteration: 120204
loss: 1.0632423162460327,grad_norm: 0.9483421824565549, iteration: 120205
loss: 0.9962906241416931,grad_norm: 0.8295704471052969, iteration: 120206
loss: 1.0019876956939697,grad_norm: 0.9999996660538643, iteration: 120207
loss: 1.2609155178070068,grad_norm: 0.9999999634413956, iteration: 120208
loss: 1.0215718746185303,grad_norm: 0.9999995971750343, iteration: 120209
loss: 1.144758939743042,grad_norm: 0.9999991899148177, iteration: 120210
loss: 1.1525752544403076,grad_norm: 0.9999996745392205, iteration: 120211
loss: 1.0150855779647827,grad_norm: 0.9973463723465781, iteration: 120212
loss: 1.0414272546768188,grad_norm: 1.0000000316949456, iteration: 120213
loss: 1.023076057434082,grad_norm: 0.9388449145472026, iteration: 120214
loss: 1.0390385389328003,grad_norm: 0.9079025300175901, iteration: 120215
loss: 1.0530452728271484,grad_norm: 0.8195941708156642, iteration: 120216
loss: 0.9901247620582581,grad_norm: 0.8407025833507896, iteration: 120217
loss: 1.0733469724655151,grad_norm: 0.9999994698187563, iteration: 120218
loss: 1.0785185098648071,grad_norm: 0.999999225142301, iteration: 120219
loss: 1.067549467086792,grad_norm: 0.9999990698842947, iteration: 120220
loss: 1.0470141172409058,grad_norm: 0.8675459953854096, iteration: 120221
loss: 1.0084563493728638,grad_norm: 0.9999998768943466, iteration: 120222
loss: 1.2077937126159668,grad_norm: 0.9999997407099958, iteration: 120223
loss: 1.018902063369751,grad_norm: 0.9749663680387666, iteration: 120224
loss: 1.042466163635254,grad_norm: 0.8253173915990234, iteration: 120225
loss: 1.0951863527297974,grad_norm: 0.9999997439731798, iteration: 120226
loss: 1.1048569679260254,grad_norm: 0.9999992873867843, iteration: 120227
loss: 1.0723748207092285,grad_norm: 0.9999997278125343, iteration: 120228
loss: 1.1535857915878296,grad_norm: 0.9999999383498133, iteration: 120229
loss: 1.084930181503296,grad_norm: 0.9999999699973374, iteration: 120230
loss: 1.039923906326294,grad_norm: 0.9999994447489081, iteration: 120231
loss: 1.162458062171936,grad_norm: 0.9999998943630757, iteration: 120232
loss: 0.9919674396514893,grad_norm: 0.825458617673656, iteration: 120233
loss: 1.0241326093673706,grad_norm: 0.9628146273586955, iteration: 120234
loss: 1.0730456113815308,grad_norm: 0.9999995005020079, iteration: 120235
loss: 1.0226001739501953,grad_norm: 0.9999991959302905, iteration: 120236
loss: 1.0878973007202148,grad_norm: 0.9999996412041211, iteration: 120237
loss: 1.0776078701019287,grad_norm: 0.9999997509045231, iteration: 120238
loss: 1.044737458229065,grad_norm: 0.9999999805245825, iteration: 120239
loss: 1.1760550737380981,grad_norm: 0.9999993170819438, iteration: 120240
loss: 0.9809865951538086,grad_norm: 0.9674334233166512, iteration: 120241
loss: 1.1457692384719849,grad_norm: 0.9601751411088857, iteration: 120242
loss: 1.0087965726852417,grad_norm: 0.9999998028040716, iteration: 120243
loss: 0.9882755875587463,grad_norm: 0.9029739086624607, iteration: 120244
loss: 1.088923454284668,grad_norm: 0.9999994576196399, iteration: 120245
loss: 1.059617519378662,grad_norm: 0.9999999831134609, iteration: 120246
loss: 1.0152968168258667,grad_norm: 0.9999990383737255, iteration: 120247
loss: 1.0447185039520264,grad_norm: 0.9550910211231087, iteration: 120248
loss: 1.0270686149597168,grad_norm: 0.999999504905171, iteration: 120249
loss: 1.0192055702209473,grad_norm: 0.9999990961368548, iteration: 120250
loss: 1.037642002105713,grad_norm: 0.999999860736561, iteration: 120251
loss: 1.148300290107727,grad_norm: 0.9999997045447256, iteration: 120252
loss: 1.0054165124893188,grad_norm: 0.9999998895060895, iteration: 120253
loss: 1.0036637783050537,grad_norm: 0.9999999428358828, iteration: 120254
loss: 1.1168582439422607,grad_norm: 0.9999993640390594, iteration: 120255
loss: 0.9749597907066345,grad_norm: 0.9999997935342779, iteration: 120256
loss: 1.108999490737915,grad_norm: 0.9999993223791256, iteration: 120257
loss: 0.9788273572921753,grad_norm: 0.921475263114835, iteration: 120258
loss: 1.0350849628448486,grad_norm: 0.9999997008217236, iteration: 120259
loss: 1.0794603824615479,grad_norm: 0.9999998189586162, iteration: 120260
loss: 1.0042095184326172,grad_norm: 0.999999558818467, iteration: 120261
loss: 1.0325639247894287,grad_norm: 0.9999991244880285, iteration: 120262
loss: 1.003282070159912,grad_norm: 0.9999992818712463, iteration: 120263
loss: 1.0353559255599976,grad_norm: 0.9999994813011429, iteration: 120264
loss: 1.007190465927124,grad_norm: 0.9999998315528807, iteration: 120265
loss: 1.0459405183792114,grad_norm: 0.9999996736710832, iteration: 120266
loss: 1.0595264434814453,grad_norm: 0.9535096794089999, iteration: 120267
loss: 1.0148109197616577,grad_norm: 0.9999994090322097, iteration: 120268
loss: 1.0502903461456299,grad_norm: 0.9908883304491161, iteration: 120269
loss: 1.0233877897262573,grad_norm: 0.9999991836303481, iteration: 120270
loss: 1.0111685991287231,grad_norm: 0.9613198298958913, iteration: 120271
loss: 1.121552586555481,grad_norm: 1.0000000231176143, iteration: 120272
loss: 1.047607660293579,grad_norm: 0.9999995467821675, iteration: 120273
loss: 1.0687450170516968,grad_norm: 0.9999996206739186, iteration: 120274
loss: 1.1382629871368408,grad_norm: 0.9999998118948512, iteration: 120275
loss: 1.1418023109436035,grad_norm: 0.9999992885782345, iteration: 120276
loss: 1.0067248344421387,grad_norm: 0.9999996856475021, iteration: 120277
loss: 1.0819584131240845,grad_norm: 0.9999994023808624, iteration: 120278
loss: 1.0766552686691284,grad_norm: 0.8805384392708292, iteration: 120279
loss: 1.0918333530426025,grad_norm: 0.9999998410532448, iteration: 120280
loss: 1.0245970487594604,grad_norm: 0.9617482754085334, iteration: 120281
loss: 1.0100771188735962,grad_norm: 0.9999991515119819, iteration: 120282
loss: 1.1039612293243408,grad_norm: 0.9929673476918189, iteration: 120283
loss: 1.0116087198257446,grad_norm: 0.9712322647401493, iteration: 120284
loss: 1.1401402950286865,grad_norm: 0.9999997234354341, iteration: 120285
loss: 1.0355653762817383,grad_norm: 0.9999996877787618, iteration: 120286
loss: 0.9783793091773987,grad_norm: 0.9999991752495319, iteration: 120287
loss: 1.0886428356170654,grad_norm: 0.9355906226811799, iteration: 120288
loss: 0.9825945496559143,grad_norm: 0.9999991241656067, iteration: 120289
loss: 1.160016655921936,grad_norm: 0.9999998741105051, iteration: 120290
loss: 1.1365602016448975,grad_norm: 0.9999999685202081, iteration: 120291
loss: 0.9839619398117065,grad_norm: 0.9999992828531491, iteration: 120292
loss: 1.0218744277954102,grad_norm: 0.9999998560661412, iteration: 120293
loss: 1.0643742084503174,grad_norm: 0.9999994531691794, iteration: 120294
loss: 1.0128886699676514,grad_norm: 0.9092211342777973, iteration: 120295
loss: 1.0164573192596436,grad_norm: 0.9999997948817916, iteration: 120296
loss: 1.045939326286316,grad_norm: 0.9999995799334204, iteration: 120297
loss: 1.1702438592910767,grad_norm: 0.999999914318946, iteration: 120298
loss: 1.0111018419265747,grad_norm: 0.9999990697147041, iteration: 120299
loss: 1.04056715965271,grad_norm: 0.999999751616248, iteration: 120300
loss: 1.1059958934783936,grad_norm: 0.9999996551598134, iteration: 120301
loss: 1.0532755851745605,grad_norm: 0.9999997333513079, iteration: 120302
loss: 1.0356981754302979,grad_norm: 0.9999995846983838, iteration: 120303
loss: 1.1463598012924194,grad_norm: 0.9999998441325718, iteration: 120304
loss: 1.036413311958313,grad_norm: 0.9999990181387274, iteration: 120305
loss: 1.0730799436569214,grad_norm: 0.8732273597468044, iteration: 120306
loss: 1.0473791360855103,grad_norm: 0.9999991848978369, iteration: 120307
loss: 1.0483098030090332,grad_norm: 0.9999995429993774, iteration: 120308
loss: 0.9925392866134644,grad_norm: 0.9023055105969526, iteration: 120309
loss: 1.026120662689209,grad_norm: 0.8832987971573055, iteration: 120310
loss: 1.029498815536499,grad_norm: 0.9356670809922832, iteration: 120311
loss: 1.0727062225341797,grad_norm: 0.9999999400492665, iteration: 120312
loss: 1.0621172189712524,grad_norm: 0.9999994031212105, iteration: 120313
loss: 1.1411117315292358,grad_norm: 0.9999999509161837, iteration: 120314
loss: 1.0664175748825073,grad_norm: 0.9999996700087431, iteration: 120315
loss: 1.00018310546875,grad_norm: 0.8532974309772086, iteration: 120316
loss: 1.005051851272583,grad_norm: 0.9999998832917731, iteration: 120317
loss: 0.9809335470199585,grad_norm: 0.9666099007159349, iteration: 120318
loss: 0.9830576777458191,grad_norm: 0.9999995065175417, iteration: 120319
loss: 1.0388262271881104,grad_norm: 0.9999998306260011, iteration: 120320
loss: 1.0270664691925049,grad_norm: 0.9974595216530802, iteration: 120321
loss: 1.075590968132019,grad_norm: 0.9999995788803807, iteration: 120322
loss: 1.044057846069336,grad_norm: 0.8399754336269769, iteration: 120323
loss: 1.0969383716583252,grad_norm: 0.9999990895090822, iteration: 120324
loss: 1.0982383489608765,grad_norm: 0.9999996337542892, iteration: 120325
loss: 1.1188921928405762,grad_norm: 0.9999997389924411, iteration: 120326
loss: 1.0050169229507446,grad_norm: 0.7855428235643831, iteration: 120327
loss: 1.0558394193649292,grad_norm: 0.866218740917666, iteration: 120328
loss: 1.1141911745071411,grad_norm: 0.9999998638872026, iteration: 120329
loss: 0.9965584874153137,grad_norm: 0.9013775753201442, iteration: 120330
loss: 1.1065763235092163,grad_norm: 0.9999995551419393, iteration: 120331
loss: 1.066155195236206,grad_norm: 0.9999999420306843, iteration: 120332
loss: 1.1773719787597656,grad_norm: 0.9999995310126303, iteration: 120333
loss: 1.080081820487976,grad_norm: 0.8932989783775044, iteration: 120334
loss: 1.2339147329330444,grad_norm: 0.9999998138690516, iteration: 120335
loss: 1.1347817182540894,grad_norm: 0.9999996877535492, iteration: 120336
loss: 0.9892587661743164,grad_norm: 0.999999387092468, iteration: 120337
loss: 1.1462846994400024,grad_norm: 0.9999999353651007, iteration: 120338
loss: 1.0756487846374512,grad_norm: 0.9999998677759432, iteration: 120339
loss: 1.0606350898742676,grad_norm: 0.9999997072815534, iteration: 120340
loss: 1.0762971639633179,grad_norm: 0.999999676475337, iteration: 120341
loss: 1.0462286472320557,grad_norm: 0.9999991409768524, iteration: 120342
loss: 1.1100132465362549,grad_norm: 0.9999995207301031, iteration: 120343
loss: 1.1386632919311523,grad_norm: 0.9999994324940678, iteration: 120344
loss: 1.0525425672531128,grad_norm: 0.9999994240204965, iteration: 120345
loss: 1.033119797706604,grad_norm: 0.9972509617424111, iteration: 120346
loss: 1.0423650741577148,grad_norm: 0.9999996662747863, iteration: 120347
loss: 1.0185598134994507,grad_norm: 0.9999991409338428, iteration: 120348
loss: 0.9994264841079712,grad_norm: 0.9661919542353995, iteration: 120349
loss: 1.1288135051727295,grad_norm: 0.9999993624498299, iteration: 120350
loss: 1.0379295349121094,grad_norm: 0.9999998916479228, iteration: 120351
loss: 1.051065444946289,grad_norm: 0.9741492042239993, iteration: 120352
loss: 1.3636692762374878,grad_norm: 0.9999999058428511, iteration: 120353
loss: 1.0800061225891113,grad_norm: 0.9999999219266472, iteration: 120354
loss: 1.0277806520462036,grad_norm: 0.9999995488728719, iteration: 120355
loss: 1.1207586526870728,grad_norm: 0.9999996928837669, iteration: 120356
loss: 1.0713822841644287,grad_norm: 1.0000000434412057, iteration: 120357
loss: 1.080376386642456,grad_norm: 0.9999994618064362, iteration: 120358
loss: 1.0526554584503174,grad_norm: 0.9999997962530535, iteration: 120359
loss: 1.1554120779037476,grad_norm: 0.9999998689242873, iteration: 120360
loss: 0.9934303760528564,grad_norm: 0.8471193803140689, iteration: 120361
loss: 1.046975016593933,grad_norm: 0.9999994453574675, iteration: 120362
loss: 0.9543026089668274,grad_norm: 0.9999990337842685, iteration: 120363
loss: 1.1285090446472168,grad_norm: 0.9999991872469924, iteration: 120364
loss: 1.0437055826187134,grad_norm: 0.9020641016401556, iteration: 120365
loss: 1.2541632652282715,grad_norm: 0.9999999202816244, iteration: 120366
loss: 1.1401329040527344,grad_norm: 0.999999694667205, iteration: 120367
loss: 1.2099884748458862,grad_norm: 0.9999999495186156, iteration: 120368
loss: 1.0703974962234497,grad_norm: 1.0000000208477, iteration: 120369
loss: 1.11343514919281,grad_norm: 0.9999996867159797, iteration: 120370
loss: 0.9946268200874329,grad_norm: 0.999999233474271, iteration: 120371
loss: 1.03520667552948,grad_norm: 0.9999997502209842, iteration: 120372
loss: 1.0686886310577393,grad_norm: 0.9999998612222744, iteration: 120373
loss: 1.1659672260284424,grad_norm: 0.999999405245874, iteration: 120374
loss: 1.123626947402954,grad_norm: 0.9999997441543729, iteration: 120375
loss: 1.0865761041641235,grad_norm: 0.9999994870585008, iteration: 120376
loss: 0.9933390617370605,grad_norm: 0.7824159765156243, iteration: 120377
loss: 1.0804929733276367,grad_norm: 1.0000000552010975, iteration: 120378
loss: 1.0562293529510498,grad_norm: 0.8297291400083615, iteration: 120379
loss: 1.0889850854873657,grad_norm: 0.9594448031636721, iteration: 120380
loss: 1.1806217432022095,grad_norm: 0.9999995341280212, iteration: 120381
loss: 1.1260102987289429,grad_norm: 0.9999999522942274, iteration: 120382
loss: 1.0679068565368652,grad_norm: 0.999999898042732, iteration: 120383
loss: 1.018967866897583,grad_norm: 0.7816139852595778, iteration: 120384
loss: 0.9953510165214539,grad_norm: 0.9999993050116349, iteration: 120385
loss: 1.0262339115142822,grad_norm: 0.9999998322412142, iteration: 120386
loss: 1.0244613885879517,grad_norm: 0.9999991393901975, iteration: 120387
loss: 1.353137731552124,grad_norm: 0.9999998456541266, iteration: 120388
loss: 0.9717162847518921,grad_norm: 0.9999998122181151, iteration: 120389
loss: 1.033796787261963,grad_norm: 0.9999992794387688, iteration: 120390
loss: 1.0631893873214722,grad_norm: 0.9999995942374785, iteration: 120391
loss: 1.0074721574783325,grad_norm: 0.9999995116569845, iteration: 120392
loss: 1.052852749824524,grad_norm: 0.9999996811820234, iteration: 120393
loss: 1.000679612159729,grad_norm: 0.8298459230215014, iteration: 120394
loss: 1.131068229675293,grad_norm: 0.9999998542216356, iteration: 120395
loss: 1.021302580833435,grad_norm: 0.9999993190021907, iteration: 120396
loss: 1.0750079154968262,grad_norm: 0.99999978050313, iteration: 120397
loss: 1.0123592615127563,grad_norm: 0.8682090929122036, iteration: 120398
loss: 1.1344269514083862,grad_norm: 0.9999995518277489, iteration: 120399
loss: 1.041489839553833,grad_norm: 0.999999507594231, iteration: 120400
loss: 1.0386180877685547,grad_norm: 0.8472509305954448, iteration: 120401
loss: 0.9813125133514404,grad_norm: 0.9999991828630892, iteration: 120402
loss: 1.2416330575942993,grad_norm: 0.9999999707399808, iteration: 120403
loss: 1.104744553565979,grad_norm: 0.9999998099856037, iteration: 120404
loss: 1.0463682413101196,grad_norm: 0.9999992918063811, iteration: 120405
loss: 1.0225179195404053,grad_norm: 0.8948093806368446, iteration: 120406
loss: 1.0041131973266602,grad_norm: 0.9999994454375141, iteration: 120407
loss: 0.9891824722290039,grad_norm: 0.7095105014010851, iteration: 120408
loss: 0.973095178604126,grad_norm: 0.9292406408787761, iteration: 120409
loss: 1.0434117317199707,grad_norm: 0.9999990929367009, iteration: 120410
loss: 0.9787222146987915,grad_norm: 0.821564839873818, iteration: 120411
loss: 1.0271852016448975,grad_norm: 0.9999991418496051, iteration: 120412
loss: 1.0168395042419434,grad_norm: 0.9999990599040959, iteration: 120413
loss: 1.1059622764587402,grad_norm: 0.9999998765923962, iteration: 120414
loss: 1.0431849956512451,grad_norm: 0.9999997596018304, iteration: 120415
loss: 1.0729284286499023,grad_norm: 0.9999993055645696, iteration: 120416
loss: 1.0625709295272827,grad_norm: 0.9999998268412215, iteration: 120417
loss: 1.05319082736969,grad_norm: 0.999999550652292, iteration: 120418
loss: 1.1410164833068848,grad_norm: 0.9999999261941733, iteration: 120419
loss: 1.058749794960022,grad_norm: 0.9999994707849509, iteration: 120420
loss: 1.1291062831878662,grad_norm: 0.9999996553549316, iteration: 120421
loss: 1.0209811925888062,grad_norm: 0.9999997930270067, iteration: 120422
loss: 0.9945891499519348,grad_norm: 0.9999990026748511, iteration: 120423
loss: 1.1165908575057983,grad_norm: 0.9999999145157261, iteration: 120424
loss: 1.2809534072875977,grad_norm: 0.9999998875169791, iteration: 120425
loss: 1.126403570175171,grad_norm: 0.9999995641911975, iteration: 120426
loss: 1.0410789251327515,grad_norm: 0.9999997227044493, iteration: 120427
loss: 1.0473920106887817,grad_norm: 0.9999991615583934, iteration: 120428
loss: 1.0586227178573608,grad_norm: 1.0000000207705533, iteration: 120429
loss: 0.995877742767334,grad_norm: 0.9649151562578094, iteration: 120430
loss: 1.0178898572921753,grad_norm: 0.9999991177274583, iteration: 120431
loss: 1.1314005851745605,grad_norm: 0.999999961813417, iteration: 120432
loss: 1.029086947441101,grad_norm: 0.9999998609975326, iteration: 120433
loss: 1.0172491073608398,grad_norm: 0.9999991102173957, iteration: 120434
loss: 1.0238229036331177,grad_norm: 0.999999043287538, iteration: 120435
loss: 1.0582988262176514,grad_norm: 0.9999996718675768, iteration: 120436
loss: 1.0454860925674438,grad_norm: 0.9999996716915702, iteration: 120437
loss: 1.0755691528320312,grad_norm: 0.9999997123381358, iteration: 120438
loss: 1.0260449647903442,grad_norm: 0.9999992797097107, iteration: 120439
loss: 1.1089059114456177,grad_norm: 0.9999996806038479, iteration: 120440
loss: 1.1382784843444824,grad_norm: 0.9999998929516959, iteration: 120441
loss: 1.037989854812622,grad_norm: 0.9999991909640061, iteration: 120442
loss: 1.172990083694458,grad_norm: 0.9999998328040308, iteration: 120443
loss: 1.2960706949234009,grad_norm: 0.9999998229190833, iteration: 120444
loss: 1.0432004928588867,grad_norm: 0.9999997915508979, iteration: 120445
loss: 1.0619665384292603,grad_norm: 0.9999991398293341, iteration: 120446
loss: 1.0252389907836914,grad_norm: 0.8845513408836807, iteration: 120447
loss: 1.3305388689041138,grad_norm: 0.9999999240021391, iteration: 120448
loss: 1.236154556274414,grad_norm: 0.9999997837624648, iteration: 120449
loss: 1.1053247451782227,grad_norm: 0.9999998542827744, iteration: 120450
loss: 1.0681123733520508,grad_norm: 0.9999991460689062, iteration: 120451
loss: 1.0496673583984375,grad_norm: 0.9999999188218781, iteration: 120452
loss: 1.0280115604400635,grad_norm: 0.9999992324114835, iteration: 120453
loss: 0.9817920327186584,grad_norm: 0.9999991385557332, iteration: 120454
loss: 1.0331302881240845,grad_norm: 0.9999994900963811, iteration: 120455
loss: 1.0392130613327026,grad_norm: 0.9999993826353716, iteration: 120456
loss: 1.0319617986679077,grad_norm: 0.9999992747405803, iteration: 120457
loss: 0.972987174987793,grad_norm: 0.9999992660331488, iteration: 120458
loss: 1.0678796768188477,grad_norm: 0.9999995508432401, iteration: 120459
loss: 1.0011366605758667,grad_norm: 0.9163387696647323, iteration: 120460
loss: 1.130407452583313,grad_norm: 1.0000000838525833, iteration: 120461
loss: 1.03347647190094,grad_norm: 0.9999992832693535, iteration: 120462
loss: 1.0534262657165527,grad_norm: 0.9999997382699912, iteration: 120463
loss: 1.0426346063613892,grad_norm: 0.9999993781789989, iteration: 120464
loss: 0.9860313534736633,grad_norm: 0.889468106824105, iteration: 120465
loss: 0.9744249582290649,grad_norm: 0.9619166786951645, iteration: 120466
loss: 1.0790656805038452,grad_norm: 0.9999992195633015, iteration: 120467
loss: 1.000711441040039,grad_norm: 0.9999990251102704, iteration: 120468
loss: 1.0282280445098877,grad_norm: 0.7948753480492201, iteration: 120469
loss: 0.9810265302658081,grad_norm: 0.9999992210442012, iteration: 120470
loss: 1.0320111513137817,grad_norm: 0.9999993896688182, iteration: 120471
loss: 1.104140281677246,grad_norm: 0.9999995165875523, iteration: 120472
loss: 1.0388442277908325,grad_norm: 0.9999991134957688, iteration: 120473
loss: 1.004703402519226,grad_norm: 0.9999989172217258, iteration: 120474
loss: 1.2648699283599854,grad_norm: 0.9999999205043546, iteration: 120475
loss: 1.1156911849975586,grad_norm: 0.9999994587242642, iteration: 120476
loss: 1.1037636995315552,grad_norm: 0.9999990930212558, iteration: 120477
loss: 1.0725995302200317,grad_norm: 0.9999997522715137, iteration: 120478
loss: 1.0552459955215454,grad_norm: 0.9999990844237238, iteration: 120479
loss: 1.0369760990142822,grad_norm: 0.9999996024107756, iteration: 120480
loss: 1.0490716695785522,grad_norm: 0.9999997249683912, iteration: 120481
loss: 1.0237677097320557,grad_norm: 0.999999777568892, iteration: 120482
loss: 1.0414535999298096,grad_norm: 0.9999995376415582, iteration: 120483
loss: 1.02420175075531,grad_norm: 0.9999997348100601, iteration: 120484
loss: 1.1177406311035156,grad_norm: 0.9999996041374684, iteration: 120485
loss: 1.0716121196746826,grad_norm: 0.9999998570571113, iteration: 120486
loss: 1.1781904697418213,grad_norm: 0.999999850143648, iteration: 120487
loss: 1.354062557220459,grad_norm: 0.9999999742890999, iteration: 120488
loss: 1.1900721788406372,grad_norm: 0.9999999329622751, iteration: 120489
loss: 1.0615317821502686,grad_norm: 0.9999992557457169, iteration: 120490
loss: 0.9997069239616394,grad_norm: 0.891858013073626, iteration: 120491
loss: 1.2865562438964844,grad_norm: 0.9999998316054172, iteration: 120492
loss: 1.0860151052474976,grad_norm: 0.9999999251436805, iteration: 120493
loss: 1.0413471460342407,grad_norm: 0.999999208437246, iteration: 120494
loss: 1.0842013359069824,grad_norm: 0.9999996452932791, iteration: 120495
loss: 0.9748120307922363,grad_norm: 0.9999998090706227, iteration: 120496
loss: 1.1478708982467651,grad_norm: 0.9999997177167638, iteration: 120497
loss: 1.0244768857955933,grad_norm: 0.9999992668933356, iteration: 120498
loss: 1.0239166021347046,grad_norm: 0.8294660453775499, iteration: 120499
loss: 1.1241812705993652,grad_norm: 0.9999990920496392, iteration: 120500
loss: 1.0430972576141357,grad_norm: 0.9999993360530175, iteration: 120501
loss: 1.2763077020645142,grad_norm: 0.9999998925110242, iteration: 120502
loss: 1.057895302772522,grad_norm: 0.9999992867105747, iteration: 120503
loss: 0.9949471354484558,grad_norm: 0.9999992664615152, iteration: 120504
loss: 1.0585365295410156,grad_norm: 0.9999995881221962, iteration: 120505
loss: 1.0376298427581787,grad_norm: 0.9999997528106442, iteration: 120506
loss: 1.0329843759536743,grad_norm: 0.9999991978609869, iteration: 120507
loss: 1.0753746032714844,grad_norm: 0.999999934347005, iteration: 120508
loss: 0.99821537733078,grad_norm: 0.9449449589011288, iteration: 120509
loss: 1.1538630723953247,grad_norm: 0.9999995833092563, iteration: 120510
loss: 1.016509771347046,grad_norm: 0.9999996054151136, iteration: 120511
loss: 1.0144270658493042,grad_norm: 0.9999990712113823, iteration: 120512
loss: 1.1274231672286987,grad_norm: 0.9999995964548464, iteration: 120513
loss: 1.0604212284088135,grad_norm: 0.9999995045589224, iteration: 120514
loss: 1.1235021352767944,grad_norm: 0.9999995605191517, iteration: 120515
loss: 1.0464187860488892,grad_norm: 0.9999994262182906, iteration: 120516
loss: 1.060163974761963,grad_norm: 0.9999997751209599, iteration: 120517
loss: 1.1090768575668335,grad_norm: 0.9999999157383037, iteration: 120518
loss: 1.029589295387268,grad_norm: 0.9999990282017704, iteration: 120519
loss: 1.0766671895980835,grad_norm: 0.9999999850060778, iteration: 120520
loss: 1.0193161964416504,grad_norm: 0.9999991816598552, iteration: 120521
loss: 1.0357526540756226,grad_norm: 0.9999998915217371, iteration: 120522
loss: 1.014090657234192,grad_norm: 0.9999997245633696, iteration: 120523
loss: 1.1203961372375488,grad_norm: 0.9999996596135531, iteration: 120524
loss: 1.0565824508666992,grad_norm: 0.9999996832278351, iteration: 120525
loss: 1.1705385446548462,grad_norm: 0.9999999294722854, iteration: 120526
loss: 1.0999492406845093,grad_norm: 0.9999997263755764, iteration: 120527
loss: 1.3531614542007446,grad_norm: 0.9999998737924163, iteration: 120528
loss: 1.0634266138076782,grad_norm: 0.9999998830142341, iteration: 120529
loss: 1.0866103172302246,grad_norm: 1.0000000052530256, iteration: 120530
loss: 1.0824097394943237,grad_norm: 0.9999994590869549, iteration: 120531
loss: 1.0005418062210083,grad_norm: 0.7791541433925554, iteration: 120532
loss: 1.1326408386230469,grad_norm: 0.9999995598798496, iteration: 120533
loss: 1.1192266941070557,grad_norm: 0.9999996258354759, iteration: 120534
loss: 0.9984544515609741,grad_norm: 0.9999993665882639, iteration: 120535
loss: 1.1760660409927368,grad_norm: 0.9999996015753917, iteration: 120536
loss: 1.0363624095916748,grad_norm: 0.9890990061825838, iteration: 120537
loss: 1.0672117471694946,grad_norm: 0.9999991024366098, iteration: 120538
loss: 1.1052391529083252,grad_norm: 0.9999999165456621, iteration: 120539
loss: 1.0016839504241943,grad_norm: 0.9999992730458137, iteration: 120540
loss: 1.2548587322235107,grad_norm: 0.9999997696845314, iteration: 120541
loss: 1.0750069618225098,grad_norm: 0.999197645233744, iteration: 120542
loss: 1.128180980682373,grad_norm: 0.9999994345013643, iteration: 120543
loss: 1.0399110317230225,grad_norm: 0.9999992174001464, iteration: 120544
loss: 1.0386297702789307,grad_norm: 0.9999996542934891, iteration: 120545
loss: 1.0526461601257324,grad_norm: 0.9999990981343386, iteration: 120546
loss: 1.0320532321929932,grad_norm: 0.9999998684157188, iteration: 120547
loss: 1.1191496849060059,grad_norm: 0.9999998881187161, iteration: 120548
loss: 1.2550816535949707,grad_norm: 0.9999998372413375, iteration: 120549
loss: 1.079344391822815,grad_norm: 0.9999991055193341, iteration: 120550
loss: 1.063594102859497,grad_norm: 1.000000060216327, iteration: 120551
loss: 0.9896447658538818,grad_norm: 0.9999990795945782, iteration: 120552
loss: 1.0601273775100708,grad_norm: 0.9999999121287618, iteration: 120553
loss: 1.0732405185699463,grad_norm: 0.9999998965423802, iteration: 120554
loss: 1.0094412565231323,grad_norm: 0.9999991117946527, iteration: 120555
loss: 1.015709638595581,grad_norm: 0.8146441661097125, iteration: 120556
loss: 1.022753119468689,grad_norm: 0.9999996367037971, iteration: 120557
loss: 1.1344659328460693,grad_norm: 0.9999999150200025, iteration: 120558
loss: 1.1444989442825317,grad_norm: 0.9999999566079746, iteration: 120559
loss: 1.094431757926941,grad_norm: 0.999999301651391, iteration: 120560
loss: 1.0231777429580688,grad_norm: 0.9999993533858188, iteration: 120561
loss: 1.078805685043335,grad_norm: 0.9999998995289744, iteration: 120562
loss: 1.1709589958190918,grad_norm: 0.9999998291539514, iteration: 120563
loss: 1.0660918951034546,grad_norm: 0.9999997517293149, iteration: 120564
loss: 1.0956109762191772,grad_norm: 0.9999998026375043, iteration: 120565
loss: 1.1410287618637085,grad_norm: 0.9999999924263169, iteration: 120566
loss: 1.1548168659210205,grad_norm: 0.9999992088371341, iteration: 120567
loss: 1.2798476219177246,grad_norm: 0.999999893470311, iteration: 120568
loss: 0.9965342879295349,grad_norm: 0.9999998174786312, iteration: 120569
loss: 1.0020569562911987,grad_norm: 0.9999992530486763, iteration: 120570
loss: 1.1499841213226318,grad_norm: 0.9999994187056147, iteration: 120571
loss: 1.2911874055862427,grad_norm: 0.9999998119003648, iteration: 120572
loss: 1.2307637929916382,grad_norm: 0.9999998298288647, iteration: 120573
loss: 1.0685702562332153,grad_norm: 0.9999999638408168, iteration: 120574
loss: 1.0713733434677124,grad_norm: 0.9429131320384087, iteration: 120575
loss: 1.2144230604171753,grad_norm: 0.9999998003174742, iteration: 120576
loss: 1.1698611974716187,grad_norm: 0.9999999063356991, iteration: 120577
loss: 1.7459145784378052,grad_norm: 0.9999998676733234, iteration: 120578
loss: 1.140325903892517,grad_norm: 0.9999995442624732, iteration: 120579
loss: 1.0114153623580933,grad_norm: 0.9999995072326192, iteration: 120580
loss: 1.3059970140457153,grad_norm: 0.9999997943790081, iteration: 120581
loss: 1.0018384456634521,grad_norm: 0.9999997186481573, iteration: 120582
loss: 1.0515321493148804,grad_norm: 0.9999997700194305, iteration: 120583
loss: 1.0197402238845825,grad_norm: 0.9999998188745185, iteration: 120584
loss: 1.0217658281326294,grad_norm: 0.9999995781042672, iteration: 120585
loss: 1.0559732913970947,grad_norm: 0.9999995647200256, iteration: 120586
loss: 1.0994259119033813,grad_norm: 0.9999993502382469, iteration: 120587
loss: 1.1338188648223877,grad_norm: 0.9999993530830632, iteration: 120588
loss: 1.43044912815094,grad_norm: 0.9999999871637587, iteration: 120589
loss: 1.0406956672668457,grad_norm: 0.9899667964397036, iteration: 120590
loss: 1.3749319314956665,grad_norm: 0.9999999107326815, iteration: 120591
loss: 1.209296464920044,grad_norm: 0.9999995701904891, iteration: 120592
loss: 0.9851797819137573,grad_norm: 0.8385661625070095, iteration: 120593
loss: 1.136487603187561,grad_norm: 0.999999967518236, iteration: 120594
loss: 1.0445688962936401,grad_norm: 0.9999995036072176, iteration: 120595
loss: 1.0060425996780396,grad_norm: 0.9999992631539655, iteration: 120596
loss: 1.132166862487793,grad_norm: 0.9999992574216238, iteration: 120597
loss: 1.141343593597412,grad_norm: 0.9999995820654398, iteration: 120598
loss: 1.1004741191864014,grad_norm: 0.9999998670158297, iteration: 120599
loss: 1.1730782985687256,grad_norm: 0.9999997294555489, iteration: 120600
loss: 1.0500648021697998,grad_norm: 0.999999591479687, iteration: 120601
loss: 1.0064302682876587,grad_norm: 0.9999991985871672, iteration: 120602
loss: 1.4053508043289185,grad_norm: 0.9999997408726314, iteration: 120603
loss: 1.2410249710083008,grad_norm: 0.9999996619961696, iteration: 120604
loss: 1.1749186515808105,grad_norm: 0.999999854136769, iteration: 120605
loss: 1.0832130908966064,grad_norm: 0.9999993370392667, iteration: 120606
loss: 1.3732415437698364,grad_norm: 0.9999999559733828, iteration: 120607
loss: 1.089736819267273,grad_norm: 0.9999997285519749, iteration: 120608
loss: 1.0301176309585571,grad_norm: 0.9999995262069696, iteration: 120609
loss: 1.3727604150772095,grad_norm: 0.999999940481505, iteration: 120610
loss: 1.060483455657959,grad_norm: 0.9999992304984214, iteration: 120611
loss: 1.3271223306655884,grad_norm: 0.9999999081381168, iteration: 120612
loss: 1.1941152811050415,grad_norm: 0.9999998714902362, iteration: 120613
loss: 1.2143303155899048,grad_norm: 0.9999993361846677, iteration: 120614
loss: 1.2531620264053345,grad_norm: 0.999999931542119, iteration: 120615
loss: 1.4495171308517456,grad_norm: 0.9999997217875787, iteration: 120616
loss: 1.1181912422180176,grad_norm: 0.9999996672181537, iteration: 120617
loss: 1.0450273752212524,grad_norm: 0.9999993075795421, iteration: 120618
loss: 1.3050754070281982,grad_norm: 0.9999998727666682, iteration: 120619
loss: 1.0286492109298706,grad_norm: 0.9999993450127475, iteration: 120620
loss: 1.0922871828079224,grad_norm: 0.9999992909820341, iteration: 120621
loss: 1.0798951387405396,grad_norm: 0.9999992278477298, iteration: 120622
loss: 1.1621683835983276,grad_norm: 0.9999999348470748, iteration: 120623
loss: 1.1723833084106445,grad_norm: 0.9999995950475424, iteration: 120624
loss: 1.1409022808074951,grad_norm: 0.9999996684162401, iteration: 120625
loss: 1.0634937286376953,grad_norm: 0.9999994725037352, iteration: 120626
loss: 1.0383026599884033,grad_norm: 0.9999995917467441, iteration: 120627
loss: 1.0712350606918335,grad_norm: 0.9999996883767789, iteration: 120628
loss: 1.0229696035385132,grad_norm: 0.9999992804897899, iteration: 120629
loss: 1.1654189825057983,grad_norm: 0.9999998779541651, iteration: 120630
loss: 1.224527359008789,grad_norm: 0.9999997689672528, iteration: 120631
loss: 1.2102051973342896,grad_norm: 0.9999996589525362, iteration: 120632
loss: 1.1867032051086426,grad_norm: 1.00000008978674, iteration: 120633
loss: 1.1136150360107422,grad_norm: 0.9999999125944614, iteration: 120634
loss: 1.1736350059509277,grad_norm: 0.9999998813656483, iteration: 120635
loss: 1.2194992303848267,grad_norm: 0.999999655016888, iteration: 120636
loss: 1.0221611261367798,grad_norm: 0.9999995107308532, iteration: 120637
loss: 1.1536622047424316,grad_norm: 0.9999998101085178, iteration: 120638
loss: 1.1441779136657715,grad_norm: 0.9999998157455822, iteration: 120639
loss: 1.0424422025680542,grad_norm: 0.984510906566643, iteration: 120640
loss: 1.046592354774475,grad_norm: 0.9999994699504358, iteration: 120641
loss: 0.9911932945251465,grad_norm: 0.9999999320650965, iteration: 120642
loss: 1.0553206205368042,grad_norm: 0.9999997072037603, iteration: 120643
loss: 1.0506312847137451,grad_norm: 0.9999997006016039, iteration: 120644
loss: 1.0294663906097412,grad_norm: 0.9999991899842596, iteration: 120645
loss: 1.0476685762405396,grad_norm: 0.9999997284268741, iteration: 120646
loss: 1.332090139389038,grad_norm: 0.9999999024417365, iteration: 120647
loss: 1.0330382585525513,grad_norm: 0.9999998449723653, iteration: 120648
loss: 1.1004747152328491,grad_norm: 0.9999991632076529, iteration: 120649
loss: 1.0966390371322632,grad_norm: 0.999999882739684, iteration: 120650
loss: 1.2706208229064941,grad_norm: 0.9999996594861562, iteration: 120651
loss: 1.0876673460006714,grad_norm: 0.9999998472312485, iteration: 120652
loss: 1.1474404335021973,grad_norm: 0.9999992791277434, iteration: 120653
loss: 1.11433744430542,grad_norm: 0.9999991278573157, iteration: 120654
loss: 1.135421872138977,grad_norm: 0.9999996302410152, iteration: 120655
loss: 1.1169315576553345,grad_norm: 0.9999998261235101, iteration: 120656
loss: 1.326717734336853,grad_norm: 0.999999197867838, iteration: 120657
loss: 1.033789038658142,grad_norm: 0.9999992379667241, iteration: 120658
loss: 1.2172306776046753,grad_norm: 0.9999996271474451, iteration: 120659
loss: 1.0308088064193726,grad_norm: 0.9999991177522831, iteration: 120660
loss: 1.0341079235076904,grad_norm: 0.9999993978349694, iteration: 120661
loss: 1.0391994714736938,grad_norm: 0.99999981878148, iteration: 120662
loss: 1.1095101833343506,grad_norm: 0.9999999277073635, iteration: 120663
loss: 1.023821473121643,grad_norm: 0.8966525036915735, iteration: 120664
loss: 0.9744752645492554,grad_norm: 0.9999990634971314, iteration: 120665
loss: 1.170468807220459,grad_norm: 0.9999997464744013, iteration: 120666
loss: 1.1483365297317505,grad_norm: 0.9999996838327574, iteration: 120667
loss: 1.1716517210006714,grad_norm: 0.9999997177317357, iteration: 120668
loss: 1.1144344806671143,grad_norm: 0.9999998958434843, iteration: 120669
loss: 1.0334936380386353,grad_norm: 0.9999998414481738, iteration: 120670
loss: 1.1001648902893066,grad_norm: 0.9999996353512246, iteration: 120671
loss: 1.050004482269287,grad_norm: 0.9999995779253471, iteration: 120672
loss: 0.9726881384849548,grad_norm: 0.8193929456350185, iteration: 120673
loss: 0.9990463256835938,grad_norm: 0.9999990812090798, iteration: 120674
loss: 1.0142016410827637,grad_norm: 0.999999128373803, iteration: 120675
loss: 1.0312639474868774,grad_norm: 0.9484683444918806, iteration: 120676
loss: 1.073752999305725,grad_norm: 0.9999994270610087, iteration: 120677
loss: 1.06480872631073,grad_norm: 0.9999996089538186, iteration: 120678
loss: 1.031266212463379,grad_norm: 0.8997759589783778, iteration: 120679
loss: 1.0300487279891968,grad_norm: 0.9999996585975316, iteration: 120680
loss: 1.1708098649978638,grad_norm: 0.999999819819944, iteration: 120681
loss: 1.1908036470413208,grad_norm: 0.9999996292197837, iteration: 120682
loss: 1.0532152652740479,grad_norm: 0.9592535791342622, iteration: 120683
loss: 0.9756596088409424,grad_norm: 0.9999997698110066, iteration: 120684
loss: 0.9900293350219727,grad_norm: 0.9999999595758441, iteration: 120685
loss: 1.0709097385406494,grad_norm: 0.9999997503316504, iteration: 120686
loss: 1.0615510940551758,grad_norm: 0.9999996335825848, iteration: 120687
loss: 1.0866023302078247,grad_norm: 0.9999995663512131, iteration: 120688
loss: 1.1014635562896729,grad_norm: 0.9999995215938375, iteration: 120689
loss: 1.0248380899429321,grad_norm: 0.9140583234386902, iteration: 120690
loss: 0.9648320078849792,grad_norm: 0.9999991560828184, iteration: 120691
loss: 1.0769723653793335,grad_norm: 0.9999991226389291, iteration: 120692
loss: 1.049069881439209,grad_norm: 0.9999992226617048, iteration: 120693
loss: 1.0610995292663574,grad_norm: 0.9999992718832408, iteration: 120694
loss: 1.0681967735290527,grad_norm: 0.999999381240853, iteration: 120695
loss: 1.0999916791915894,grad_norm: 0.9999999520010056, iteration: 120696
loss: 1.106415033340454,grad_norm: 0.9999998623944742, iteration: 120697
loss: 1.0667670965194702,grad_norm: 0.9999996941918254, iteration: 120698
loss: 1.0994335412979126,grad_norm: 0.9999996178063825, iteration: 120699
loss: 1.0082780122756958,grad_norm: 0.9969273472510108, iteration: 120700
loss: 1.0400476455688477,grad_norm: 0.9999998511373437, iteration: 120701
loss: 1.2284959554672241,grad_norm: 0.999999730351529, iteration: 120702
loss: 1.0204510688781738,grad_norm: 0.9480358766531872, iteration: 120703
loss: 1.1279877424240112,grad_norm: 0.8857372712387847, iteration: 120704
loss: 1.0674463510513306,grad_norm: 0.9999999449775205, iteration: 120705
loss: 1.0316686630249023,grad_norm: 0.9999998743742493, iteration: 120706
loss: 0.983519971370697,grad_norm: 0.9211381967125484, iteration: 120707
loss: 1.1569808721542358,grad_norm: 0.9999995898526303, iteration: 120708
loss: 1.010322093963623,grad_norm: 0.9999995533457041, iteration: 120709
loss: 1.0517585277557373,grad_norm: 0.9999997669841871, iteration: 120710
loss: 1.0566034317016602,grad_norm: 0.9999992910088056, iteration: 120711
loss: 1.2778406143188477,grad_norm: 0.9999994111724066, iteration: 120712
loss: 1.079784631729126,grad_norm: 0.9999992948734409, iteration: 120713
loss: 1.0009044408798218,grad_norm: 0.9999990672445902, iteration: 120714
loss: 1.0319770574569702,grad_norm: 0.9999998100845742, iteration: 120715
loss: 1.073700189590454,grad_norm: 0.9999993856143921, iteration: 120716
loss: 1.0161621570587158,grad_norm: 0.8586077795862448, iteration: 120717
loss: 1.0081214904785156,grad_norm: 0.999999290724694, iteration: 120718
loss: 1.1041412353515625,grad_norm: 0.9999994143075616, iteration: 120719
loss: 1.0582678318023682,grad_norm: 0.9999999344486143, iteration: 120720
loss: 1.1236448287963867,grad_norm: 0.9999996852333185, iteration: 120721
loss: 1.0160685777664185,grad_norm: 0.9999991766568225, iteration: 120722
loss: 1.015022873878479,grad_norm: 0.999999197295383, iteration: 120723
loss: 1.0116949081420898,grad_norm: 0.9062323632787824, iteration: 120724
loss: 1.039440631866455,grad_norm: 0.9999997427086584, iteration: 120725
loss: 1.1800715923309326,grad_norm: 0.9999999018834265, iteration: 120726
loss: 1.0453823804855347,grad_norm: 0.999999785556661, iteration: 120727
loss: 0.9992707371711731,grad_norm: 0.9999992270251432, iteration: 120728
loss: 0.9785643219947815,grad_norm: 0.9999997387542324, iteration: 120729
loss: 1.100182294845581,grad_norm: 0.999999098483203, iteration: 120730
loss: 1.0151605606079102,grad_norm: 0.9999991295948706, iteration: 120731
loss: 1.0244107246398926,grad_norm: 0.8589585174893397, iteration: 120732
loss: 1.0680993795394897,grad_norm: 0.9999999538307646, iteration: 120733
loss: 1.0444778203964233,grad_norm: 0.9999992286670696, iteration: 120734
loss: 0.9886159300804138,grad_norm: 0.9055975626150206, iteration: 120735
loss: 1.0175540447235107,grad_norm: 0.9999991824386724, iteration: 120736
loss: 1.1201837062835693,grad_norm: 0.9999998912101479, iteration: 120737
loss: 1.037700891494751,grad_norm: 0.999999104340925, iteration: 120738
loss: 1.0803589820861816,grad_norm: 0.9999995287474458, iteration: 120739
loss: 1.0698097944259644,grad_norm: 0.9999998762487059, iteration: 120740
loss: 1.0732241868972778,grad_norm: 0.9483721906415398, iteration: 120741
loss: 0.995682418346405,grad_norm: 0.9999990036082521, iteration: 120742
loss: 1.2774238586425781,grad_norm: 0.999999967684063, iteration: 120743
loss: 1.0291550159454346,grad_norm: 0.9431334772296822, iteration: 120744
loss: 1.033828616142273,grad_norm: 0.9999995205277988, iteration: 120745
loss: 1.0585942268371582,grad_norm: 0.9999993029616339, iteration: 120746
loss: 1.0069172382354736,grad_norm: 0.8283038884921629, iteration: 120747
loss: 1.0707390308380127,grad_norm: 0.9999998223667972, iteration: 120748
loss: 1.057979941368103,grad_norm: 0.9999992836877575, iteration: 120749
loss: 1.0299617052078247,grad_norm: 0.9999995630382247, iteration: 120750
loss: 1.0521612167358398,grad_norm: 0.9999998166804547, iteration: 120751
loss: 0.9916265606880188,grad_norm: 0.9999996533094925, iteration: 120752
loss: 1.0572497844696045,grad_norm: 0.999999564276837, iteration: 120753
loss: 1.028128981590271,grad_norm: 0.8877899166038493, iteration: 120754
loss: 1.0337697267532349,grad_norm: 0.9999993422178932, iteration: 120755
loss: 1.053645133972168,grad_norm: 0.9999998183333048, iteration: 120756
loss: 1.0339314937591553,grad_norm: 0.9999992831471941, iteration: 120757
loss: 1.0681365728378296,grad_norm: 0.9999997670572536, iteration: 120758
loss: 1.1753157377243042,grad_norm: 0.9999995104022572, iteration: 120759
loss: 1.3212337493896484,grad_norm: 0.9999999541000102, iteration: 120760
loss: 1.017950415611267,grad_norm: 0.886632290509162, iteration: 120761
loss: 1.0357844829559326,grad_norm: 0.9999998441805349, iteration: 120762
loss: 1.0694483518600464,grad_norm: 0.9999992956627088, iteration: 120763
loss: 1.085066556930542,grad_norm: 0.9999994862240432, iteration: 120764
loss: 1.0228182077407837,grad_norm: 0.986566608888639, iteration: 120765
loss: 1.3384852409362793,grad_norm: 0.9999998140671404, iteration: 120766
loss: 1.0174726247787476,grad_norm: 0.9072421998284687, iteration: 120767
loss: 1.079620599746704,grad_norm: 0.999999735255708, iteration: 120768
loss: 1.0701135396957397,grad_norm: 0.8850977023113494, iteration: 120769
loss: 1.0259559154510498,grad_norm: 0.9999998632642086, iteration: 120770
loss: 1.1231633424758911,grad_norm: 0.9999993093581595, iteration: 120771
loss: 1.007989764213562,grad_norm: 0.8270758448523543, iteration: 120772
loss: 1.0721145868301392,grad_norm: 0.9999993557702287, iteration: 120773
loss: 1.0288337469100952,grad_norm: 0.9999993619412995, iteration: 120774
loss: 1.0238372087478638,grad_norm: 0.999999675299619, iteration: 120775
loss: 1.078136682510376,grad_norm: 0.9999992366726277, iteration: 120776
loss: 1.1512856483459473,grad_norm: 0.999999192093822, iteration: 120777
loss: 0.988501250743866,grad_norm: 0.9999989775351834, iteration: 120778
loss: 1.0149346590042114,grad_norm: 0.9999993008681393, iteration: 120779
loss: 1.257247805595398,grad_norm: 0.9999999033248248, iteration: 120780
loss: 1.0351423025131226,grad_norm: 0.9999999040110599, iteration: 120781
loss: 1.0746034383773804,grad_norm: 0.9999998028027317, iteration: 120782
loss: 0.9950080513954163,grad_norm: 0.8667735061867223, iteration: 120783
loss: 1.0976133346557617,grad_norm: 0.9999996330077837, iteration: 120784
loss: 1.049161672592163,grad_norm: 0.9999991952177496, iteration: 120785
loss: 0.9559103846549988,grad_norm: 0.8702418145080718, iteration: 120786
loss: 1.1267341375350952,grad_norm: 0.9999993645095002, iteration: 120787
loss: 1.0436840057373047,grad_norm: 0.9999997165602965, iteration: 120788
loss: 1.0748727321624756,grad_norm: 0.9999994088858131, iteration: 120789
loss: 1.0558570623397827,grad_norm: 0.999999175184071, iteration: 120790
loss: 1.003201961517334,grad_norm: 0.9999992686537087, iteration: 120791
loss: 1.0348825454711914,grad_norm: 0.9999998614025072, iteration: 120792
loss: 0.9666838645935059,grad_norm: 0.9999991377934151, iteration: 120793
loss: 1.0098495483398438,grad_norm: 0.9999997999507453, iteration: 120794
loss: 0.992692232131958,grad_norm: 0.9567278964773657, iteration: 120795
loss: 1.0406060218811035,grad_norm: 0.9999995334708693, iteration: 120796
loss: 1.0311342477798462,grad_norm: 0.9999995355374764, iteration: 120797
loss: 0.9913538098335266,grad_norm: 0.9999993525686485, iteration: 120798
loss: 1.0394470691680908,grad_norm: 0.9467760092861368, iteration: 120799
loss: 1.0150560140609741,grad_norm: 0.9791157232624639, iteration: 120800
loss: 1.044431209564209,grad_norm: 0.9999991912777049, iteration: 120801
loss: 1.0129523277282715,grad_norm: 0.9696763128554001, iteration: 120802
loss: 1.0551304817199707,grad_norm: 0.9885723559548995, iteration: 120803
loss: 1.048944115638733,grad_norm: 0.9999992647068687, iteration: 120804
loss: 1.0384230613708496,grad_norm: 0.9999995820483237, iteration: 120805
loss: 0.9933807253837585,grad_norm: 0.9999991479691721, iteration: 120806
loss: 1.0733799934387207,grad_norm: 0.9999990878754059, iteration: 120807
loss: 0.9917833805084229,grad_norm: 0.9999990467400259, iteration: 120808
loss: 1.046761393547058,grad_norm: 0.9999991752902007, iteration: 120809
loss: 1.0243762731552124,grad_norm: 0.999999102835547, iteration: 120810
loss: 1.0605823993682861,grad_norm: 0.9999993123182715, iteration: 120811
loss: 1.002307415008545,grad_norm: 0.9999995927790695, iteration: 120812
loss: 1.0510872602462769,grad_norm: 0.9999994364986137, iteration: 120813
loss: 0.9701782464981079,grad_norm: 0.7676001050835788, iteration: 120814
loss: 1.0241235494613647,grad_norm: 0.8218968066938774, iteration: 120815
loss: 1.041724681854248,grad_norm: 0.9999999039370686, iteration: 120816
loss: 1.0071574449539185,grad_norm: 0.853720294028145, iteration: 120817
loss: 1.0565555095672607,grad_norm: 0.9999995803676874, iteration: 120818
loss: 1.0263166427612305,grad_norm: 0.9999991498805413, iteration: 120819
loss: 1.0940589904785156,grad_norm: 0.9999992565668427, iteration: 120820
loss: 1.0043178796768188,grad_norm: 0.999999281465484, iteration: 120821
loss: 1.0101815462112427,grad_norm: 0.9986613715855556, iteration: 120822
loss: 1.0367470979690552,grad_norm: 0.9643242362373702, iteration: 120823
loss: 1.169601321220398,grad_norm: 0.9999996711618439, iteration: 120824
loss: 1.11784827709198,grad_norm: 0.9999998266115409, iteration: 120825
loss: 1.0110251903533936,grad_norm: 0.999999229548941, iteration: 120826
loss: 1.0931458473205566,grad_norm: 0.9999991817735594, iteration: 120827
loss: 1.007983684539795,grad_norm: 0.9999998383563131, iteration: 120828
loss: 1.150235891342163,grad_norm: 0.8478216002634285, iteration: 120829
loss: 1.0366536378860474,grad_norm: 0.9999992529904205, iteration: 120830
loss: 1.0218256711959839,grad_norm: 0.9999998322594196, iteration: 120831
loss: 1.1251213550567627,grad_norm: 0.9999995734296827, iteration: 120832
loss: 1.2739266157150269,grad_norm: 0.9999998777397775, iteration: 120833
loss: 1.0805389881134033,grad_norm: 0.999999818980349, iteration: 120834
loss: 1.108505129814148,grad_norm: 0.9999992115411088, iteration: 120835
loss: 0.9788686037063599,grad_norm: 0.8462076778519361, iteration: 120836
loss: 1.110306739807129,grad_norm: 0.9999994378445936, iteration: 120837
loss: 1.031459927558899,grad_norm: 0.9999991142472, iteration: 120838
loss: 1.0824226140975952,grad_norm: 0.9999991916321436, iteration: 120839
loss: 1.0921645164489746,grad_norm: 0.9999994268416654, iteration: 120840
loss: 1.1167700290679932,grad_norm: 0.9999989531446762, iteration: 120841
loss: 1.042815089225769,grad_norm: 0.9999990626429632, iteration: 120842
loss: 1.2579665184020996,grad_norm: 0.9999998183054919, iteration: 120843
loss: 1.1204988956451416,grad_norm: 0.9999998227483958, iteration: 120844
loss: 1.1612915992736816,grad_norm: 0.9999995441338665, iteration: 120845
loss: 1.2758878469467163,grad_norm: 0.9999998162366565, iteration: 120846
loss: 1.04530930519104,grad_norm: 0.9999995188000065, iteration: 120847
loss: 1.0039809942245483,grad_norm: 0.9999998728820647, iteration: 120848
loss: 1.0376356840133667,grad_norm: 0.9999999070824276, iteration: 120849
loss: 1.0915958881378174,grad_norm: 0.9999994293155399, iteration: 120850
loss: 1.082032561302185,grad_norm: 0.9999996272498548, iteration: 120851
loss: 1.0263586044311523,grad_norm: 0.9999995081426182, iteration: 120852
loss: 1.1097530126571655,grad_norm: 0.9999996101744094, iteration: 120853
loss: 1.024720311164856,grad_norm: 0.9999989458543334, iteration: 120854
loss: 1.1246317625045776,grad_norm: 0.9999995893567668, iteration: 120855
loss: 1.11720871925354,grad_norm: 0.9999994706781755, iteration: 120856
loss: 1.199532389640808,grad_norm: 1.0000000138983718, iteration: 120857
loss: 1.1153745651245117,grad_norm: 0.9999992582995575, iteration: 120858
loss: 1.0106934309005737,grad_norm: 0.9999992094533975, iteration: 120859
loss: 1.017674207687378,grad_norm: 0.9999998263157666, iteration: 120860
loss: 1.002328634262085,grad_norm: 0.9999990809355728, iteration: 120861
loss: 1.0614161491394043,grad_norm: 0.9999994199500019, iteration: 120862
loss: 1.0411341190338135,grad_norm: 0.9999990707664438, iteration: 120863
loss: 1.0534425973892212,grad_norm: 0.9999994577795259, iteration: 120864
loss: 1.1020721197128296,grad_norm: 0.9999993149229874, iteration: 120865
loss: 1.0208659172058105,grad_norm: 0.8592895071001883, iteration: 120866
loss: 1.038481593132019,grad_norm: 0.9999991180435001, iteration: 120867
loss: 1.0828346014022827,grad_norm: 0.9999992549285592, iteration: 120868
loss: 1.0842297077178955,grad_norm: 0.9999993090412002, iteration: 120869
loss: 1.041595458984375,grad_norm: 0.9999991127655112, iteration: 120870
loss: 1.1575021743774414,grad_norm: 0.999999996853822, iteration: 120871
loss: 1.030076503753662,grad_norm: 0.999999059211079, iteration: 120872
loss: 1.1182806491851807,grad_norm: 0.9999999132163873, iteration: 120873
loss: 1.047230839729309,grad_norm: 0.9999998633068867, iteration: 120874
loss: 1.0814769268035889,grad_norm: 0.9999989468575621, iteration: 120875
loss: 1.1087011098861694,grad_norm: 0.9999993412948248, iteration: 120876
loss: 1.0719306468963623,grad_norm: 0.999999690797354, iteration: 120877
loss: 1.1407545804977417,grad_norm: 0.9999997648076572, iteration: 120878
loss: 1.3249783515930176,grad_norm: 0.9999997066956153, iteration: 120879
loss: 1.0953341722488403,grad_norm: 0.9999996119180923, iteration: 120880
loss: 1.0314674377441406,grad_norm: 0.9999996232323302, iteration: 120881
loss: 1.0522525310516357,grad_norm: 0.9999997353366005, iteration: 120882
loss: 1.2064006328582764,grad_norm: 0.9999998983378577, iteration: 120883
loss: 1.0510164499282837,grad_norm: 0.9999994392197535, iteration: 120884
loss: 1.2806203365325928,grad_norm: 0.9999999480822525, iteration: 120885
loss: 1.0879178047180176,grad_norm: 0.9999999246754375, iteration: 120886
loss: 1.0123043060302734,grad_norm: 0.8020280974270664, iteration: 120887
loss: 1.1190139055252075,grad_norm: 0.9999999088541541, iteration: 120888
loss: 1.0472196340560913,grad_norm: 0.9999993191557028, iteration: 120889
loss: 1.238577127456665,grad_norm: 0.9999998920485417, iteration: 120890
loss: 1.0218698978424072,grad_norm: 0.905619201798035, iteration: 120891
loss: 1.1530736684799194,grad_norm: 0.9999993792254097, iteration: 120892
loss: 1.1252994537353516,grad_norm: 0.9999992617213465, iteration: 120893
loss: 1.0648040771484375,grad_norm: 0.9699861089959789, iteration: 120894
loss: 1.03498113155365,grad_norm: 0.9999991502571349, iteration: 120895
loss: 1.1911282539367676,grad_norm: 0.9999995674415564, iteration: 120896
loss: 1.0527467727661133,grad_norm: 0.9999992096255518, iteration: 120897
loss: 1.0205153226852417,grad_norm: 0.9999996813634852, iteration: 120898
loss: 1.0767338275909424,grad_norm: 0.9999993820100755, iteration: 120899
loss: 1.3378642797470093,grad_norm: 0.999999939347788, iteration: 120900
loss: 1.1202441453933716,grad_norm: 0.9999993615854158, iteration: 120901
loss: 1.1762466430664062,grad_norm: 0.999999934462735, iteration: 120902
loss: 1.0597060918807983,grad_norm: 0.9999992070215892, iteration: 120903
loss: 1.0769884586334229,grad_norm: 0.9999998814244435, iteration: 120904
loss: 1.3557413816452026,grad_norm: 0.9999997942926873, iteration: 120905
loss: 1.1264997720718384,grad_norm: 1.000000022666669, iteration: 120906
loss: 1.0989845991134644,grad_norm: 0.9999998980771081, iteration: 120907
loss: 1.3563083410263062,grad_norm: 0.9999998816058021, iteration: 120908
loss: 1.0308337211608887,grad_norm: 0.9999999075686369, iteration: 120909
loss: 1.111765742301941,grad_norm: 0.9999996974016124, iteration: 120910
loss: 1.1226403713226318,grad_norm: 0.9999995514264121, iteration: 120911
loss: 1.0758795738220215,grad_norm: 0.9999994274204181, iteration: 120912
loss: 0.9861860275268555,grad_norm: 0.9999992694324511, iteration: 120913
loss: 1.079668402671814,grad_norm: 0.9999991840894796, iteration: 120914
loss: 1.1034079790115356,grad_norm: 0.9999996223748205, iteration: 120915
loss: 1.0354350805282593,grad_norm: 0.9999995620414248, iteration: 120916
loss: 0.9907089471817017,grad_norm: 0.9999993370950537, iteration: 120917
loss: 1.0618934631347656,grad_norm: 0.9999990656099685, iteration: 120918
loss: 1.1156721115112305,grad_norm: 0.999999110306965, iteration: 120919
loss: 1.4772429466247559,grad_norm: 0.9999999040996529, iteration: 120920
loss: 1.20932137966156,grad_norm: 0.9999992839091715, iteration: 120921
loss: 1.0729930400848389,grad_norm: 0.9999996773897332, iteration: 120922
loss: 1.069050908088684,grad_norm: 0.9999995322975624, iteration: 120923
loss: 1.1214697360992432,grad_norm: 0.999999317299245, iteration: 120924
loss: 1.0541201829910278,grad_norm: 0.9999999101850088, iteration: 120925
loss: 1.1014082431793213,grad_norm: 0.9999993330392841, iteration: 120926
loss: 1.0461386442184448,grad_norm: 0.9999994314044011, iteration: 120927
loss: 1.0367573499679565,grad_norm: 0.9999991415773999, iteration: 120928
loss: 1.0513217449188232,grad_norm: 0.9999997586462018, iteration: 120929
loss: 1.2358214855194092,grad_norm: 0.9999998016886669, iteration: 120930
loss: 0.9879480600357056,grad_norm: 0.9817450338497403, iteration: 120931
loss: 1.1139261722564697,grad_norm: 0.9999998333820082, iteration: 120932
loss: 1.0198472738265991,grad_norm: 0.9999993582350706, iteration: 120933
loss: 1.283430814743042,grad_norm: 1.0000000399991276, iteration: 120934
loss: 1.0167384147644043,grad_norm: 0.9999992813863385, iteration: 120935
loss: 1.0126675367355347,grad_norm: 0.8761211776841098, iteration: 120936
loss: 1.1379181146621704,grad_norm: 0.9999996133940882, iteration: 120937
loss: 1.0474365949630737,grad_norm: 0.9999998855627996, iteration: 120938
loss: 1.0831310749053955,grad_norm: 0.9999992832822323, iteration: 120939
loss: 1.0823962688446045,grad_norm: 0.9999995451287708, iteration: 120940
loss: 0.9872857928276062,grad_norm: 0.9999992291699378, iteration: 120941
loss: 1.0874180793762207,grad_norm: 0.9999991238482082, iteration: 120942
loss: 1.0039366483688354,grad_norm: 0.9999989961769673, iteration: 120943
loss: 1.1841014623641968,grad_norm: 0.9999999198891242, iteration: 120944
loss: 0.9915462732315063,grad_norm: 0.9999997518927066, iteration: 120945
loss: 1.1222623586654663,grad_norm: 0.9999998893381824, iteration: 120946
loss: 0.9976592063903809,grad_norm: 0.9999998366959586, iteration: 120947
loss: 1.1370933055877686,grad_norm: 0.9999993021337583, iteration: 120948
loss: 1.0264194011688232,grad_norm: 0.8287999998714815, iteration: 120949
loss: 1.0615123510360718,grad_norm: 0.823465970989957, iteration: 120950
loss: 1.0872164964675903,grad_norm: 1.0000000072707993, iteration: 120951
loss: 1.1167408227920532,grad_norm: 0.9999991504598386, iteration: 120952
loss: 1.047692894935608,grad_norm: 0.9999995400388897, iteration: 120953
loss: 1.1109001636505127,grad_norm: 0.9999996906420028, iteration: 120954
loss: 1.1102954149246216,grad_norm: 0.9999992320132535, iteration: 120955
loss: 0.9876973628997803,grad_norm: 0.9999992369948489, iteration: 120956
loss: 1.007856845855713,grad_norm: 0.9999993581079063, iteration: 120957
loss: 0.9978955388069153,grad_norm: 0.9999999765282427, iteration: 120958
loss: 1.0553224086761475,grad_norm: 0.9999992639760396, iteration: 120959
loss: 1.1259833574295044,grad_norm: 0.9999996746491504, iteration: 120960
loss: 1.1531299352645874,grad_norm: 0.999999684046225, iteration: 120961
loss: 1.0275315046310425,grad_norm: 0.9999997062187272, iteration: 120962
loss: 1.0370081663131714,grad_norm: 0.9484950052653246, iteration: 120963
loss: 1.030385136604309,grad_norm: 0.999999439868195, iteration: 120964
loss: 1.060166597366333,grad_norm: 0.9043566538931455, iteration: 120965
loss: 1.0689150094985962,grad_norm: 0.9243530479930824, iteration: 120966
loss: 1.2108707427978516,grad_norm: 0.9999999032770545, iteration: 120967
loss: 1.0425275564193726,grad_norm: 0.9999993798327409, iteration: 120968
loss: 1.0646672248840332,grad_norm: 0.9999999443293293, iteration: 120969
loss: 1.0291355848312378,grad_norm: 0.9999999811208583, iteration: 120970
loss: 1.009645938873291,grad_norm: 0.9999993202337231, iteration: 120971
loss: 1.0441900491714478,grad_norm: 0.999999806852318, iteration: 120972
loss: 1.0833832025527954,grad_norm: 0.9999997176062737, iteration: 120973
loss: 1.088991641998291,grad_norm: 0.9999998155043214, iteration: 120974
loss: 1.0390304327011108,grad_norm: 0.9999992074381999, iteration: 120975
loss: 0.995561957359314,grad_norm: 0.9452419019032056, iteration: 120976
loss: 1.0729446411132812,grad_norm: 0.9999994245931485, iteration: 120977
loss: 1.143378496170044,grad_norm: 0.9999995627848074, iteration: 120978
loss: 1.0097225904464722,grad_norm: 0.9999996982543073, iteration: 120979
loss: 1.0533443689346313,grad_norm: 0.999999178001274, iteration: 120980
loss: 1.0968791246414185,grad_norm: 0.9999996850801209, iteration: 120981
loss: 0.9803404808044434,grad_norm: 0.8359679377030866, iteration: 120982
loss: 1.0564661026000977,grad_norm: 0.9999994489353865, iteration: 120983
loss: 1.1555888652801514,grad_norm: 0.9999999077175252, iteration: 120984
loss: 1.039287805557251,grad_norm: 0.9881549815699388, iteration: 120985
loss: 1.0049585103988647,grad_norm: 0.9391755774898461, iteration: 120986
loss: 1.0266114473342896,grad_norm: 0.7853049699291892, iteration: 120987
loss: 1.0081313848495483,grad_norm: 0.9250628981381485, iteration: 120988
loss: 1.1275525093078613,grad_norm: 0.9999996188986056, iteration: 120989
loss: 0.9857232570648193,grad_norm: 0.9999991747012578, iteration: 120990
loss: 1.0991835594177246,grad_norm: 0.9999999754187917, iteration: 120991
loss: 1.046553134918213,grad_norm: 0.9999990604543094, iteration: 120992
loss: 1.163092851638794,grad_norm: 0.9999995987459388, iteration: 120993
loss: 1.010542869567871,grad_norm: 0.9999992985082903, iteration: 120994
loss: 1.0007740259170532,grad_norm: 0.9999991746463839, iteration: 120995
loss: 1.0950218439102173,grad_norm: 0.9999994880592444, iteration: 120996
loss: 1.101788878440857,grad_norm: 0.999999883157514, iteration: 120997
loss: 0.9898044466972351,grad_norm: 0.9897914003930471, iteration: 120998
loss: 1.0088495016098022,grad_norm: 0.8053268275160772, iteration: 120999
loss: 1.0280539989471436,grad_norm: 0.9999992257861254, iteration: 121000
loss: 1.0770033597946167,grad_norm: 0.9999994172377611, iteration: 121001
loss: 1.0007144212722778,grad_norm: 0.8035444304178316, iteration: 121002
loss: 1.1374260187149048,grad_norm: 0.9999999768547864, iteration: 121003
loss: 0.9682853817939758,grad_norm: 0.9999998538109275, iteration: 121004
loss: 0.9967257380485535,grad_norm: 0.9999993545678494, iteration: 121005
loss: 1.0146645307540894,grad_norm: 0.9225561217806615, iteration: 121006
loss: 1.096671462059021,grad_norm: 0.9999990838565134, iteration: 121007
loss: 1.0639439821243286,grad_norm: 0.9999994183583057, iteration: 121008
loss: 1.0285072326660156,grad_norm: 0.9999998425833228, iteration: 121009
loss: 1.1003011465072632,grad_norm: 0.9999996119655873, iteration: 121010
loss: 0.9647113680839539,grad_norm: 0.7464741378703263, iteration: 121011
loss: 1.075332522392273,grad_norm: 0.8701410961880393, iteration: 121012
loss: 1.0082695484161377,grad_norm: 0.8914026771979454, iteration: 121013
loss: 1.0083376169204712,grad_norm: 0.9501380344244847, iteration: 121014
loss: 1.0580482482910156,grad_norm: 0.954526769260359, iteration: 121015
loss: 0.9912832975387573,grad_norm: 0.9999990526672365, iteration: 121016
loss: 1.072950005531311,grad_norm: 0.9999995285769528, iteration: 121017
loss: 1.0750130414962769,grad_norm: 1.0000000073797095, iteration: 121018
loss: 0.9738698601722717,grad_norm: 0.9999999840545839, iteration: 121019
loss: 1.0224906206130981,grad_norm: 0.9999993238149889, iteration: 121020
loss: 1.0727334022521973,grad_norm: 0.9429946172997552, iteration: 121021
loss: 1.0827009677886963,grad_norm: 0.9999993742834015, iteration: 121022
loss: 1.0289195775985718,grad_norm: 0.7179294155726136, iteration: 121023
loss: 1.1336655616760254,grad_norm: 0.9999991890092065, iteration: 121024
loss: 1.013321876525879,grad_norm: 0.9999993511553518, iteration: 121025
loss: 1.0350372791290283,grad_norm: 0.9999992181151037, iteration: 121026
loss: 1.045580506324768,grad_norm: 0.9999999375242778, iteration: 121027
loss: 1.0809946060180664,grad_norm: 0.9999995731690329, iteration: 121028
loss: 0.9867483377456665,grad_norm: 0.9999991221632549, iteration: 121029
loss: 1.0472686290740967,grad_norm: 0.9999990991755843, iteration: 121030
loss: 1.0427443981170654,grad_norm: 0.9999992587249102, iteration: 121031
loss: 0.9962891936302185,grad_norm: 0.9999999130881445, iteration: 121032
loss: 1.5579415559768677,grad_norm: 1.0000000715076751, iteration: 121033
loss: 1.0089256763458252,grad_norm: 0.9999996231645009, iteration: 121034
loss: 0.9906452894210815,grad_norm: 0.9999999470371742, iteration: 121035
loss: 1.059059977531433,grad_norm: 0.9999990657794371, iteration: 121036
loss: 1.1367063522338867,grad_norm: 0.9999994980103921, iteration: 121037
loss: 0.9945396184921265,grad_norm: 0.990556543762013, iteration: 121038
loss: 1.0189145803451538,grad_norm: 0.8858423966555758, iteration: 121039
loss: 1.03481924533844,grad_norm: 0.9999993690333047, iteration: 121040
loss: 1.059343695640564,grad_norm: 0.9999996327989028, iteration: 121041
loss: 1.046297311782837,grad_norm: 0.9999996315588543, iteration: 121042
loss: 1.0137437582015991,grad_norm: 0.9999991498604933, iteration: 121043
loss: 1.0053715705871582,grad_norm: 0.8933588950008329, iteration: 121044
loss: 1.0959562063217163,grad_norm: 0.9999998371284642, iteration: 121045
loss: 1.0608904361724854,grad_norm: 0.9999994463880684, iteration: 121046
loss: 1.0091745853424072,grad_norm: 0.9999990928108365, iteration: 121047
loss: 1.0300215482711792,grad_norm: 0.9147181475110544, iteration: 121048
loss: 1.077934980392456,grad_norm: 0.9999995454620725, iteration: 121049
loss: 1.0194047689437866,grad_norm: 0.9999991133805097, iteration: 121050
loss: 1.0077977180480957,grad_norm: 0.8090052508724765, iteration: 121051
loss: 1.0630059242248535,grad_norm: 0.9999999325976855, iteration: 121052
loss: 0.961898684501648,grad_norm: 0.9999990325123568, iteration: 121053
loss: 1.0747510194778442,grad_norm: 0.953263491935819, iteration: 121054
loss: 1.0332245826721191,grad_norm: 0.9999998570806212, iteration: 121055
loss: 1.023507833480835,grad_norm: 0.7401561710554314, iteration: 121056
loss: 0.9983574151992798,grad_norm: 0.7891171153332852, iteration: 121057
loss: 1.0296518802642822,grad_norm: 0.9755908398878452, iteration: 121058
loss: 1.0039503574371338,grad_norm: 0.7899487538760737, iteration: 121059
loss: 1.0381461381912231,grad_norm: 0.999999424298823, iteration: 121060
loss: 0.9513843655586243,grad_norm: 0.9999999701127588, iteration: 121061
loss: 0.9707120060920715,grad_norm: 0.9999991109576812, iteration: 121062
loss: 1.0378330945968628,grad_norm: 0.9999991920789204, iteration: 121063
loss: 0.9992256760597229,grad_norm: 0.9999999352129461, iteration: 121064
loss: 0.9618161916732788,grad_norm: 0.9999991629664248, iteration: 121065
loss: 1.0104005336761475,grad_norm: 0.9999992887245629, iteration: 121066
loss: 1.032867193222046,grad_norm: 0.7681243709666362, iteration: 121067
loss: 1.0988073348999023,grad_norm: 0.9520039147776864, iteration: 121068
loss: 1.0093438625335693,grad_norm: 0.79438009265394, iteration: 121069
loss: 1.0477982759475708,grad_norm: 0.9999995408769509, iteration: 121070
loss: 1.0483763217926025,grad_norm: 0.8715599305456361, iteration: 121071
loss: 0.9762755036354065,grad_norm: 0.9999991874214675, iteration: 121072
loss: 1.0049365758895874,grad_norm: 0.9999994119602557, iteration: 121073
loss: 1.0449535846710205,grad_norm: 0.9999995643620336, iteration: 121074
loss: 1.0124818086624146,grad_norm: 0.7888966676972538, iteration: 121075
loss: 1.204824686050415,grad_norm: 1.0000000331263377, iteration: 121076
loss: 0.9948512315750122,grad_norm: 0.9052538056061901, iteration: 121077
loss: 1.0955853462219238,grad_norm: 0.9999997446769153, iteration: 121078
loss: 1.0085444450378418,grad_norm: 0.9999992882275955, iteration: 121079
loss: 1.0220438241958618,grad_norm: 0.8081422772440524, iteration: 121080
loss: 0.9970583915710449,grad_norm: 0.962383151948096, iteration: 121081
loss: 0.944049060344696,grad_norm: 0.9271067558611513, iteration: 121082
loss: 1.04912269115448,grad_norm: 0.9999994346693487, iteration: 121083
loss: 1.0682532787322998,grad_norm: 0.99999958298688, iteration: 121084
loss: 1.0701618194580078,grad_norm: 0.9999998750991843, iteration: 121085
loss: 1.060300350189209,grad_norm: 0.999999484039391, iteration: 121086
loss: 1.02480149269104,grad_norm: 0.8874330377307953, iteration: 121087
loss: 0.9827060699462891,grad_norm: 0.8720994203108793, iteration: 121088
loss: 1.0640406608581543,grad_norm: 0.9321824590537314, iteration: 121089
loss: 1.1244242191314697,grad_norm: 0.9999994870302896, iteration: 121090
loss: 1.1006616353988647,grad_norm: 0.9999994983829699, iteration: 121091
loss: 0.9904823303222656,grad_norm: 0.9433158000664023, iteration: 121092
loss: 0.9828855991363525,grad_norm: 0.8115053361825484, iteration: 121093
loss: 0.9859273433685303,grad_norm: 0.9443101072996656, iteration: 121094
loss: 0.9866505861282349,grad_norm: 0.9999994233872349, iteration: 121095
loss: 1.076174259185791,grad_norm: 1.0000000259298192, iteration: 121096
loss: 0.9769665002822876,grad_norm: 0.9999992367952605, iteration: 121097
loss: 1.0049467086791992,grad_norm: 0.9999991229365427, iteration: 121098
loss: 1.0482484102249146,grad_norm: 0.9999994391754103, iteration: 121099
loss: 1.023785948753357,grad_norm: 0.999999352665687, iteration: 121100
loss: 0.9811489582061768,grad_norm: 0.8583589980745341, iteration: 121101
loss: 1.0298457145690918,grad_norm: 0.9999995613513298, iteration: 121102
loss: 1.0241490602493286,grad_norm: 0.9999997871880907, iteration: 121103
loss: 1.0433006286621094,grad_norm: 0.9999991961246192, iteration: 121104
loss: 1.0580984354019165,grad_norm: 0.8157257267361903, iteration: 121105
loss: 1.1189740896224976,grad_norm: 0.9999997337392146, iteration: 121106
loss: 1.0516527891159058,grad_norm: 0.9999994886748176, iteration: 121107
loss: 1.0676616430282593,grad_norm: 0.9999994151228964, iteration: 121108
loss: 0.9827893972396851,grad_norm: 0.9503252546941774, iteration: 121109
loss: 1.1121548414230347,grad_norm: 0.9999991577231265, iteration: 121110
loss: 1.0250612497329712,grad_norm: 0.9999990250138192, iteration: 121111
loss: 0.9899781346321106,grad_norm: 0.9999994870165481, iteration: 121112
loss: 1.0106943845748901,grad_norm: 0.7344865013440773, iteration: 121113
loss: 1.0528011322021484,grad_norm: 0.9999991624293958, iteration: 121114
loss: 1.015061616897583,grad_norm: 0.9999996003717018, iteration: 121115
loss: 1.072811245918274,grad_norm: 0.9999990084363999, iteration: 121116
loss: 0.9599491953849792,grad_norm: 0.9105190514681285, iteration: 121117
loss: 1.0422217845916748,grad_norm: 0.9457350816328793, iteration: 121118
loss: 1.033673644065857,grad_norm: 0.9999992621992181, iteration: 121119
loss: 0.9943419694900513,grad_norm: 0.9449825283796391, iteration: 121120
loss: 1.0042182207107544,grad_norm: 0.9440784803992881, iteration: 121121
loss: 1.0183759927749634,grad_norm: 0.9999991630144139, iteration: 121122
loss: 1.042888879776001,grad_norm: 0.9999993472609052, iteration: 121123
loss: 1.0415388345718384,grad_norm: 0.9999990675575824, iteration: 121124
loss: 0.9914839863777161,grad_norm: 0.9999992939741128, iteration: 121125
loss: 1.039817214012146,grad_norm: 0.9999990939955314, iteration: 121126
loss: 1.0173511505126953,grad_norm: 0.9999993409453743, iteration: 121127
loss: 1.0598413944244385,grad_norm: 0.9999996359778991, iteration: 121128
loss: 1.0039047002792358,grad_norm: 0.9610439958465739, iteration: 121129
loss: 1.0172759294509888,grad_norm: 0.9999997030311318, iteration: 121130
loss: 0.9669533967971802,grad_norm: 0.9999991077276483, iteration: 121131
loss: 0.9925151467323303,grad_norm: 0.9509580145714006, iteration: 121132
loss: 1.0271713733673096,grad_norm: 0.9875596701498716, iteration: 121133
loss: 0.9584276080131531,grad_norm: 0.9552622961419217, iteration: 121134
loss: 0.9969746470451355,grad_norm: 0.9999997095930864, iteration: 121135
loss: 1.0494544506072998,grad_norm: 0.9362431640197467, iteration: 121136
loss: 0.9666004180908203,grad_norm: 0.7554106461840067, iteration: 121137
loss: 0.9765673875808716,grad_norm: 0.8873435490874987, iteration: 121138
loss: 0.9553540349006653,grad_norm: 0.8651627697972866, iteration: 121139
loss: 1.1755391359329224,grad_norm: 0.9999991433394515, iteration: 121140
loss: 0.9896758198738098,grad_norm: 0.9999990541716148, iteration: 121141
loss: 1.0564932823181152,grad_norm: 0.9999993021638616, iteration: 121142
loss: 1.0193462371826172,grad_norm: 0.9999999309826973, iteration: 121143
loss: 0.9810090065002441,grad_norm: 0.8649056121344236, iteration: 121144
loss: 1.0537736415863037,grad_norm: 1.0000000556921074, iteration: 121145
loss: 1.0767021179199219,grad_norm: 0.942448665700063, iteration: 121146
loss: 1.0829991102218628,grad_norm: 0.9999996991529412, iteration: 121147
loss: 1.0301549434661865,grad_norm: 0.9999996671918244, iteration: 121148
loss: 1.0338948965072632,grad_norm: 0.9999995459317883, iteration: 121149
loss: 1.0514240264892578,grad_norm: 0.999999133215459, iteration: 121150
loss: 0.9922107458114624,grad_norm: 0.9999991251700305, iteration: 121151
loss: 0.976190447807312,grad_norm: 0.9999991636850862, iteration: 121152
loss: 1.112143635749817,grad_norm: 0.9999990879098236, iteration: 121153
loss: 1.1236974000930786,grad_norm: 0.9999992514500021, iteration: 121154
loss: 1.0198884010314941,grad_norm: 0.9082456499512874, iteration: 121155
loss: 1.1282440423965454,grad_norm: 0.9999991502656068, iteration: 121156
loss: 1.1099727153778076,grad_norm: 0.9999998283657002, iteration: 121157
loss: 1.0384817123413086,grad_norm: 0.9520393042794907, iteration: 121158
loss: 0.984320342540741,grad_norm: 0.9005964100549829, iteration: 121159
loss: 1.002978801727295,grad_norm: 0.8773455005269835, iteration: 121160
loss: 0.9689946174621582,grad_norm: 0.9999991570170865, iteration: 121161
loss: 1.0720216035842896,grad_norm: 0.9999997393757346, iteration: 121162
loss: 1.053972601890564,grad_norm: 0.9509405351412413, iteration: 121163
loss: 1.0083527565002441,grad_norm: 0.8988442054160203, iteration: 121164
loss: 1.0299599170684814,grad_norm: 0.999999651535869, iteration: 121165
loss: 1.0418527126312256,grad_norm: 0.8916821507373182, iteration: 121166
loss: 0.9964313507080078,grad_norm: 0.9080901113778423, iteration: 121167
loss: 1.07361900806427,grad_norm: 0.999999177700429, iteration: 121168
loss: 1.1433463096618652,grad_norm: 0.9999991444113631, iteration: 121169
loss: 1.0652347803115845,grad_norm: 1.0000000419135333, iteration: 121170
loss: 0.9867127537727356,grad_norm: 0.8118528070212737, iteration: 121171
loss: 1.0693625211715698,grad_norm: 0.747031623827914, iteration: 121172
loss: 1.0989562273025513,grad_norm: 0.9999991485294119, iteration: 121173
loss: 1.0047932863235474,grad_norm: 0.9605288562300832, iteration: 121174
loss: 1.0310341119766235,grad_norm: 0.9609029635216473, iteration: 121175
loss: 1.0059244632720947,grad_norm: 0.9999991489949527, iteration: 121176
loss: 1.0255204439163208,grad_norm: 0.9882966206640005, iteration: 121177
loss: 1.0046305656433105,grad_norm: 0.942387492787346, iteration: 121178
loss: 1.054017186164856,grad_norm: 0.9999995097372136, iteration: 121179
loss: 1.0242949724197388,grad_norm: 0.999999048574459, iteration: 121180
loss: 1.0199235677719116,grad_norm: 0.9999990788118963, iteration: 121181
loss: 1.049086093902588,grad_norm: 0.9999991192085531, iteration: 121182
loss: 0.9993131756782532,grad_norm: 0.9228857598676192, iteration: 121183
loss: 1.0319738388061523,grad_norm: 0.9548824526158392, iteration: 121184
loss: 0.9713351130485535,grad_norm: 0.9288476341181423, iteration: 121185
loss: 1.0165938138961792,grad_norm: 0.999999073027017, iteration: 121186
loss: 1.0167511701583862,grad_norm: 0.8968460490059392, iteration: 121187
loss: 1.0405513048171997,grad_norm: 0.9999999233190483, iteration: 121188
loss: 1.0547806024551392,grad_norm: 0.9999997822904292, iteration: 121189
loss: 1.037714958190918,grad_norm: 0.9999995706184536, iteration: 121190
loss: 1.050967812538147,grad_norm: 0.9477920106021598, iteration: 121191
loss: 1.1327120065689087,grad_norm: 0.9999999177234377, iteration: 121192
loss: 1.0071208477020264,grad_norm: 0.8553145625715721, iteration: 121193
loss: 1.5914480686187744,grad_norm: 0.9999997263336755, iteration: 121194
loss: 1.0269391536712646,grad_norm: 0.9392468442122209, iteration: 121195
loss: 1.163748860359192,grad_norm: 0.9999999696260068, iteration: 121196
loss: 1.030726671218872,grad_norm: 0.999999793457023, iteration: 121197
loss: 0.9942522644996643,grad_norm: 0.8992081100316808, iteration: 121198
loss: 0.99000084400177,grad_norm: 0.9999990565815673, iteration: 121199
loss: 0.9979367852210999,grad_norm: 0.8575397293566216, iteration: 121200
loss: 0.9930298924446106,grad_norm: 0.8308702518676979, iteration: 121201
loss: 1.01707124710083,grad_norm: 0.9999990586337598, iteration: 121202
loss: 1.0768688917160034,grad_norm: 0.9999991795120176, iteration: 121203
loss: 1.0173895359039307,grad_norm: 0.9481058789106581, iteration: 121204
loss: 0.9994881749153137,grad_norm: 0.999999061633756, iteration: 121205
loss: 1.022544264793396,grad_norm: 0.8481265425200424, iteration: 121206
loss: 0.9929270148277283,grad_norm: 0.8712964577752123, iteration: 121207
loss: 1.0397868156433105,grad_norm: 0.999999151811421, iteration: 121208
loss: 1.0172364711761475,grad_norm: 0.9999992872574108, iteration: 121209
loss: 1.1070613861083984,grad_norm: 0.9999991666280013, iteration: 121210
loss: 1.0154757499694824,grad_norm: 0.9999993001331483, iteration: 121211
loss: 1.1415939331054688,grad_norm: 0.9999991773417579, iteration: 121212
loss: 1.0031758546829224,grad_norm: 0.9891064467358032, iteration: 121213
loss: 1.0551449060440063,grad_norm: 0.9999992431959153, iteration: 121214
loss: 1.004043698310852,grad_norm: 0.9999994385729006, iteration: 121215
loss: 0.9823544025421143,grad_norm: 0.9501782047002902, iteration: 121216
loss: 0.9846658706665039,grad_norm: 0.8517064784811491, iteration: 121217
loss: 1.035820484161377,grad_norm: 0.9999991468533478, iteration: 121218
loss: 1.004294753074646,grad_norm: 0.8438594574897174, iteration: 121219
loss: 1.0022673606872559,grad_norm: 0.9136806506426498, iteration: 121220
loss: 1.050038456916809,grad_norm: 0.9999996194160446, iteration: 121221
loss: 0.9907474517822266,grad_norm: 0.990956685410975, iteration: 121222
loss: 0.9666407108306885,grad_norm: 0.9225708349315935, iteration: 121223
loss: 1.0656216144561768,grad_norm: 0.9999994935162683, iteration: 121224
loss: 1.0736892223358154,grad_norm: 0.9999997618115732, iteration: 121225
loss: 1.0350106954574585,grad_norm: 0.9999992215588305, iteration: 121226
loss: 0.9892333745956421,grad_norm: 0.9999989648777785, iteration: 121227
loss: 1.0648181438446045,grad_norm: 0.999999370627168, iteration: 121228
loss: 0.9696275591850281,grad_norm: 0.87661514041171, iteration: 121229
loss: 1.0452536344528198,grad_norm: 0.9999995780053869, iteration: 121230
loss: 1.0897718667984009,grad_norm: 0.9999997162059721, iteration: 121231
loss: 1.018902063369751,grad_norm: 0.760225639422257, iteration: 121232
loss: 1.1257504224777222,grad_norm: 0.9999993224637691, iteration: 121233
loss: 1.0579115152359009,grad_norm: 0.9999990498460353, iteration: 121234
loss: 1.0673675537109375,grad_norm: 0.9424946736855622, iteration: 121235
loss: 1.2036207914352417,grad_norm: 0.9999996254581244, iteration: 121236
loss: 1.0296704769134521,grad_norm: 0.9999989872381302, iteration: 121237
loss: 1.1232722997665405,grad_norm: 0.9999999115394649, iteration: 121238
loss: 1.0780844688415527,grad_norm: 0.9999995565876392, iteration: 121239
loss: 1.0373847484588623,grad_norm: 0.9999990878988673, iteration: 121240
loss: 1.06572687625885,grad_norm: 0.9999995475692001, iteration: 121241
loss: 1.096913456916809,grad_norm: 0.999999749380388, iteration: 121242
loss: 1.0369467735290527,grad_norm: 0.9999992257965045, iteration: 121243
loss: 1.1148377656936646,grad_norm: 0.9999994259936698, iteration: 121244
loss: 1.025380253791809,grad_norm: 0.99999910226723, iteration: 121245
loss: 0.996159553527832,grad_norm: 0.9475348326365953, iteration: 121246
loss: 1.0198136568069458,grad_norm: 0.9999996876364639, iteration: 121247
loss: 1.0844697952270508,grad_norm: 0.9999993720472404, iteration: 121248
loss: 1.1179277896881104,grad_norm: 0.9999998666561148, iteration: 121249
loss: 1.0197410583496094,grad_norm: 0.9184839928668245, iteration: 121250
loss: 1.0457149744033813,grad_norm: 0.9999996666616426, iteration: 121251
loss: 1.0355939865112305,grad_norm: 0.9999999583001619, iteration: 121252
loss: 0.9906667470932007,grad_norm: 0.8032874283351737, iteration: 121253
loss: 1.0006943941116333,grad_norm: 0.8871525352733344, iteration: 121254
loss: 0.9947838187217712,grad_norm: 0.9999990839564595, iteration: 121255
loss: 1.0602076053619385,grad_norm: 0.9999990939684119, iteration: 121256
loss: 1.1405609846115112,grad_norm: 0.9999993464197586, iteration: 121257
loss: 1.1001391410827637,grad_norm: 0.9999999186144053, iteration: 121258
loss: 1.141497015953064,grad_norm: 0.9999990088666096, iteration: 121259
loss: 1.0348269939422607,grad_norm: 0.9999994046110386, iteration: 121260
loss: 1.0700483322143555,grad_norm: 0.9999997478648037, iteration: 121261
loss: 1.0515778064727783,grad_norm: 0.9999992024971204, iteration: 121262
loss: 1.1591770648956299,grad_norm: 0.9999992613276693, iteration: 121263
loss: 1.0131099224090576,grad_norm: 0.9999990322651777, iteration: 121264
loss: 0.9944907426834106,grad_norm: 0.7542847771513959, iteration: 121265
loss: 1.043089747428894,grad_norm: 0.9999993863083371, iteration: 121266
loss: 1.0398310422897339,grad_norm: 0.9999996991773161, iteration: 121267
loss: 1.0832418203353882,grad_norm: 0.9999998562417409, iteration: 121268
loss: 1.1323639154434204,grad_norm: 1.0000000260768425, iteration: 121269
loss: 1.063860535621643,grad_norm: 0.9999990951007983, iteration: 121270
loss: 1.0521531105041504,grad_norm: 0.9999990785834392, iteration: 121271
loss: 1.0213404893875122,grad_norm: 0.8841685662167743, iteration: 121272
loss: 1.0874500274658203,grad_norm: 0.9999999454565525, iteration: 121273
loss: 1.0642478466033936,grad_norm: 0.9999992845614596, iteration: 121274
loss: 1.1804018020629883,grad_norm: 0.9999998005133032, iteration: 121275
loss: 1.2884070873260498,grad_norm: 0.9999998143592282, iteration: 121276
loss: 1.0690845251083374,grad_norm: 0.835368786281615, iteration: 121277
loss: 1.0178722143173218,grad_norm: 0.9999990831432973, iteration: 121278
loss: 1.0098791122436523,grad_norm: 0.9892049785894824, iteration: 121279
loss: 1.0146763324737549,grad_norm: 0.9385452429331831, iteration: 121280
loss: 1.2285337448120117,grad_norm: 0.9999997421813809, iteration: 121281
loss: 0.9908231496810913,grad_norm: 0.798761705690203, iteration: 121282
loss: 1.0614101886749268,grad_norm: 0.968593954384984, iteration: 121283
loss: 1.0128382444381714,grad_norm: 0.9999991992090306, iteration: 121284
loss: 1.0361541509628296,grad_norm: 0.9999997697246379, iteration: 121285
loss: 1.043258786201477,grad_norm: 0.9999998798750047, iteration: 121286
loss: 1.044182538986206,grad_norm: 0.9999997908588752, iteration: 121287
loss: 0.9999650120735168,grad_norm: 0.9999996307688331, iteration: 121288
loss: 1.0524262189865112,grad_norm: 0.9999998586458543, iteration: 121289
loss: 1.026233434677124,grad_norm: 0.9391250905474762, iteration: 121290
loss: 1.0255646705627441,grad_norm: 0.9999998536193931, iteration: 121291
loss: 1.003827452659607,grad_norm: 0.7893197892568262, iteration: 121292
loss: 0.9952995777130127,grad_norm: 0.9825236472379814, iteration: 121293
loss: 0.9994698762893677,grad_norm: 0.9999999106232224, iteration: 121294
loss: 1.007447600364685,grad_norm: 0.9688210390626575, iteration: 121295
loss: 1.0880601406097412,grad_norm: 0.9999995764665994, iteration: 121296
loss: 1.1785932779312134,grad_norm: 0.9999997118570423, iteration: 121297
loss: 1.0446242094039917,grad_norm: 0.9800669273443526, iteration: 121298
loss: 1.0175508260726929,grad_norm: 0.9541657733931947, iteration: 121299
loss: 1.0047075748443604,grad_norm: 0.8937307107673028, iteration: 121300
loss: 1.0055437088012695,grad_norm: 0.9999999459172673, iteration: 121301
loss: 1.0115243196487427,grad_norm: 0.9999994803136046, iteration: 121302
loss: 1.0397517681121826,grad_norm: 0.9999994603674445, iteration: 121303
loss: 1.0204275846481323,grad_norm: 0.90728933818992, iteration: 121304
loss: 1.0380237102508545,grad_norm: 0.9999998699070478, iteration: 121305
loss: 1.0202522277832031,grad_norm: 0.9999999439643039, iteration: 121306
loss: 1.105685830116272,grad_norm: 1.0000000205808546, iteration: 121307
loss: 1.00967538356781,grad_norm: 0.9999993986846683, iteration: 121308
loss: 1.0503976345062256,grad_norm: 0.893990809986555, iteration: 121309
loss: 0.9722248315811157,grad_norm: 0.8608261580761639, iteration: 121310
loss: 1.1069707870483398,grad_norm: 1.000000011633316, iteration: 121311
loss: 0.9774106740951538,grad_norm: 0.9179240864350632, iteration: 121312
loss: 1.003158450126648,grad_norm: 0.8956296569046984, iteration: 121313
loss: 1.0085811614990234,grad_norm: 0.7866925284765904, iteration: 121314
loss: 1.0317275524139404,grad_norm: 0.95333631724917, iteration: 121315
loss: 0.9654079079627991,grad_norm: 0.8238519291232034, iteration: 121316
loss: 1.07738196849823,grad_norm: 0.99999911646522, iteration: 121317
loss: 1.034105896949768,grad_norm: 0.9999991404975671, iteration: 121318
loss: 1.0684393644332886,grad_norm: 0.9999993725971388, iteration: 121319
loss: 1.0510320663452148,grad_norm: 1.000000026237362, iteration: 121320
loss: 1.0097376108169556,grad_norm: 0.9999991213577689, iteration: 121321
loss: 1.0101372003555298,grad_norm: 0.9999990785478279, iteration: 121322
loss: 1.2116930484771729,grad_norm: 0.9999994536981998, iteration: 121323
loss: 1.069826364517212,grad_norm: 0.9830171602475709, iteration: 121324
loss: 1.0010650157928467,grad_norm: 0.7730583485640315, iteration: 121325
loss: 1.1143746376037598,grad_norm: 0.9999996894100074, iteration: 121326
loss: 0.9701594710350037,grad_norm: 0.9147836036575391, iteration: 121327
loss: 0.9920740723609924,grad_norm: 0.9422854578689992, iteration: 121328
loss: 1.0976803302764893,grad_norm: 0.9999997099678102, iteration: 121329
loss: 1.0040919780731201,grad_norm: 0.9999993517432769, iteration: 121330
loss: 0.9767521619796753,grad_norm: 0.9191662673834846, iteration: 121331
loss: 1.005149245262146,grad_norm: 0.7474727849317039, iteration: 121332
loss: 1.0111368894577026,grad_norm: 0.9999994426942553, iteration: 121333
loss: 1.0401805639266968,grad_norm: 0.9999994694127274, iteration: 121334
loss: 1.118820309638977,grad_norm: 0.9999999715076088, iteration: 121335
loss: 1.1457356214523315,grad_norm: 0.9999998287583381, iteration: 121336
loss: 1.0216037034988403,grad_norm: 0.8882457616640629, iteration: 121337
loss: 1.1468305587768555,grad_norm: 0.9999997874635767, iteration: 121338
loss: 1.0195409059524536,grad_norm: 0.9631452961036936, iteration: 121339
loss: 1.0268540382385254,grad_norm: 0.9002105955689551, iteration: 121340
loss: 1.0110312700271606,grad_norm: 0.9999992232125642, iteration: 121341
loss: 1.1172091960906982,grad_norm: 0.9999993081840178, iteration: 121342
loss: 0.9794459342956543,grad_norm: 0.9492357738745439, iteration: 121343
loss: 1.2725526094436646,grad_norm: 0.999999835590017, iteration: 121344
loss: 1.0318681001663208,grad_norm: 0.8652689241422725, iteration: 121345
loss: 1.0287286043167114,grad_norm: 0.9890162043222136, iteration: 121346
loss: 1.225069522857666,grad_norm: 0.9999993922003434, iteration: 121347
loss: 1.0358103513717651,grad_norm: 0.9999989864074315, iteration: 121348
loss: 1.0641651153564453,grad_norm: 0.9999996318787575, iteration: 121349
loss: 1.1337635517120361,grad_norm: 0.9999996635474897, iteration: 121350
loss: 1.064972162246704,grad_norm: 0.9999992153232519, iteration: 121351
loss: 1.1335794925689697,grad_norm: 0.9999993860319266, iteration: 121352
loss: 1.2700964212417603,grad_norm: 0.9999999049174201, iteration: 121353
loss: 0.9723965525627136,grad_norm: 0.9099983190517116, iteration: 121354
loss: 1.0073530673980713,grad_norm: 0.9985040253272911, iteration: 121355
loss: 1.183794617652893,grad_norm: 0.9999992854510329, iteration: 121356
loss: 1.048492431640625,grad_norm: 0.9362586119626194, iteration: 121357
loss: 1.0082111358642578,grad_norm: 0.9999996666950425, iteration: 121358
loss: 1.097670316696167,grad_norm: 0.9999999051548499, iteration: 121359
loss: 1.0533862113952637,grad_norm: 0.9999996655627844, iteration: 121360
loss: 1.0162761211395264,grad_norm: 0.9999993377516384, iteration: 121361
loss: 0.9942196011543274,grad_norm: 0.9344146544854942, iteration: 121362
loss: 0.9581321477890015,grad_norm: 0.94166137832704, iteration: 121363
loss: 1.0888901948928833,grad_norm: 0.9999997586735521, iteration: 121364
loss: 1.0290534496307373,grad_norm: 0.7961792617181784, iteration: 121365
loss: 0.976191520690918,grad_norm: 0.9847133016389878, iteration: 121366
loss: 1.0426743030548096,grad_norm: 0.999999358308135, iteration: 121367
loss: 1.0603471994400024,grad_norm: 0.9999999234187437, iteration: 121368
loss: 0.9766294956207275,grad_norm: 0.8635147947706149, iteration: 121369
loss: 0.9736199975013733,grad_norm: 0.9999993313551835, iteration: 121370
loss: 0.9831392765045166,grad_norm: 0.9999997079907281, iteration: 121371
loss: 0.933028519153595,grad_norm: 0.9600941113432668, iteration: 121372
loss: 1.0382869243621826,grad_norm: 0.9999993513722721, iteration: 121373
loss: 1.0851643085479736,grad_norm: 0.9258249153074638, iteration: 121374
loss: 0.9932482242584229,grad_norm: 0.9664740240794663, iteration: 121375
loss: 0.995978057384491,grad_norm: 0.8216844284572812, iteration: 121376
loss: 1.0707770586013794,grad_norm: 0.9999995243350644, iteration: 121377
loss: 1.0215950012207031,grad_norm: 0.9999996123738744, iteration: 121378
loss: 0.9958011507987976,grad_norm: 0.9999999541033368, iteration: 121379
loss: 0.9698217511177063,grad_norm: 0.8121911632542594, iteration: 121380
loss: 0.9978387951850891,grad_norm: 0.9999996624115101, iteration: 121381
loss: 1.0226560831069946,grad_norm: 0.999999807955009, iteration: 121382
loss: 1.0559256076812744,grad_norm: 0.9999991372288042, iteration: 121383
loss: 1.1325693130493164,grad_norm: 0.9999997966635277, iteration: 121384
loss: 1.0331062078475952,grad_norm: 0.8819978409067901, iteration: 121385
loss: 1.092907190322876,grad_norm: 0.9999993145215872, iteration: 121386
loss: 1.0082820653915405,grad_norm: 0.9999991170265691, iteration: 121387
loss: 1.0074141025543213,grad_norm: 0.9313171761324551, iteration: 121388
loss: 0.9580584764480591,grad_norm: 0.8325632258123177, iteration: 121389
loss: 1.0346180200576782,grad_norm: 0.9999994063211186, iteration: 121390
loss: 0.9805039167404175,grad_norm: 0.8882426136067819, iteration: 121391
loss: 1.0561727285385132,grad_norm: 0.9999997656902426, iteration: 121392
loss: 1.009275197982788,grad_norm: 0.8174407055041287, iteration: 121393
loss: 1.105825662612915,grad_norm: 0.9999995827590221, iteration: 121394
loss: 1.0533015727996826,grad_norm: 1.0000000191252796, iteration: 121395
loss: 1.0048737525939941,grad_norm: 0.893705670943108, iteration: 121396
loss: 1.0546324253082275,grad_norm: 0.9999991683390582, iteration: 121397
loss: 1.1965858936309814,grad_norm: 0.9999999126034974, iteration: 121398
loss: 0.988326370716095,grad_norm: 0.856860555484485, iteration: 121399
loss: 0.9992645978927612,grad_norm: 0.9999990764656378, iteration: 121400
loss: 1.0597323179244995,grad_norm: 0.9999992145411045, iteration: 121401
loss: 1.0272904634475708,grad_norm: 0.9999998897229838, iteration: 121402
loss: 1.0323604345321655,grad_norm: 0.9999990841705675, iteration: 121403
loss: 1.0697404146194458,grad_norm: 0.9999993813903222, iteration: 121404
loss: 1.005542278289795,grad_norm: 0.9414210101503685, iteration: 121405
loss: 1.0286678075790405,grad_norm: 0.999999872510445, iteration: 121406
loss: 1.0374104976654053,grad_norm: 0.9640091709619485, iteration: 121407
loss: 0.9972635507583618,grad_norm: 0.956583269047686, iteration: 121408
loss: 0.9584386348724365,grad_norm: 0.9317208648891968, iteration: 121409
loss: 1.0406383275985718,grad_norm: 0.9508816055526332, iteration: 121410
loss: 1.2330025434494019,grad_norm: 0.9999994927524135, iteration: 121411
loss: 1.0133826732635498,grad_norm: 0.9999997455681596, iteration: 121412
loss: 1.080719232559204,grad_norm: 0.9999997178099879, iteration: 121413
loss: 1.0148296356201172,grad_norm: 0.9999994630295784, iteration: 121414
loss: 1.0197179317474365,grad_norm: 0.963964374740605, iteration: 121415
loss: 1.0077157020568848,grad_norm: 0.907527913389303, iteration: 121416
loss: 1.0297892093658447,grad_norm: 0.9999995259911197, iteration: 121417
loss: 1.2690805196762085,grad_norm: 0.9999997684702459, iteration: 121418
loss: 1.046481966972351,grad_norm: 0.9999996810331871, iteration: 121419
loss: 1.0257046222686768,grad_norm: 0.9999990604443953, iteration: 121420
loss: 0.9964680671691895,grad_norm: 0.8398653419743166, iteration: 121421
loss: 1.0195746421813965,grad_norm: 0.9141628412365117, iteration: 121422
loss: 1.0691083669662476,grad_norm: 0.9999991993871463, iteration: 121423
loss: 1.0024665594100952,grad_norm: 0.962078308039624, iteration: 121424
loss: 1.2598888874053955,grad_norm: 0.9999998062629903, iteration: 121425
loss: 0.980484664440155,grad_norm: 0.9999992185960067, iteration: 121426
loss: 0.9805019497871399,grad_norm: 0.8288987015617767, iteration: 121427
loss: 1.0629242658615112,grad_norm: 0.9999996292957057, iteration: 121428
loss: 1.1395987272262573,grad_norm: 0.9999993679650626, iteration: 121429
loss: 1.009575605392456,grad_norm: 0.9999992857347881, iteration: 121430
loss: 1.050687551498413,grad_norm: 0.999999820100607, iteration: 121431
loss: 1.1175453662872314,grad_norm: 0.9999998517288232, iteration: 121432
loss: 1.124955177307129,grad_norm: 0.9999999332627126, iteration: 121433
loss: 1.0285276174545288,grad_norm: 0.7715670017018289, iteration: 121434
loss: 1.0643136501312256,grad_norm: 0.9999993828664551, iteration: 121435
loss: 1.0645291805267334,grad_norm: 0.9999998983906502, iteration: 121436
loss: 1.0475783348083496,grad_norm: 0.9999992253998783, iteration: 121437
loss: 1.0530604124069214,grad_norm: 0.9999992179678859, iteration: 121438
loss: 0.9809160828590393,grad_norm: 0.8636963018363587, iteration: 121439
loss: 1.1082544326782227,grad_norm: 0.9999999770198076, iteration: 121440
loss: 1.1794559955596924,grad_norm: 0.9999994005270737, iteration: 121441
loss: 1.0264177322387695,grad_norm: 0.9582370948565797, iteration: 121442
loss: 0.9932186007499695,grad_norm: 0.9999993184305632, iteration: 121443
loss: 0.987626850605011,grad_norm: 0.9648957970638408, iteration: 121444
loss: 1.108656406402588,grad_norm: 0.9348368693107082, iteration: 121445
loss: 1.1108555793762207,grad_norm: 0.9999994353256666, iteration: 121446
loss: 1.0888217687606812,grad_norm: 0.9999996190727216, iteration: 121447
loss: 1.148232340812683,grad_norm: 0.9999996060422137, iteration: 121448
loss: 1.000334620475769,grad_norm: 0.7815493225005326, iteration: 121449
loss: 1.0376837253570557,grad_norm: 0.9557240358276063, iteration: 121450
loss: 1.0098174810409546,grad_norm: 0.9999992805507355, iteration: 121451
loss: 1.047423243522644,grad_norm: 0.9999998385663662, iteration: 121452
loss: 1.0689650774002075,grad_norm: 0.9999991247216382, iteration: 121453
loss: 1.033370018005371,grad_norm: 0.999999054905803, iteration: 121454
loss: 1.0404601097106934,grad_norm: 0.87050018785188, iteration: 121455
loss: 1.0639246702194214,grad_norm: 0.9999995870498835, iteration: 121456
loss: 1.0489405393600464,grad_norm: 0.9999997877376372, iteration: 121457
loss: 0.9963369965553284,grad_norm: 0.9160746334130597, iteration: 121458
loss: 0.9646880626678467,grad_norm: 0.8980059776331687, iteration: 121459
loss: 1.052552580833435,grad_norm: 0.9999992165367866, iteration: 121460
loss: 1.0383307933807373,grad_norm: 0.9999990001254232, iteration: 121461
loss: 1.0960257053375244,grad_norm: 0.9999991196658858, iteration: 121462
loss: 1.0786716938018799,grad_norm: 0.9999995597501002, iteration: 121463
loss: 1.0891871452331543,grad_norm: 0.999999630623271, iteration: 121464
loss: 1.0051363706588745,grad_norm: 0.9999993080445687, iteration: 121465
loss: 1.1015369892120361,grad_norm: 0.9999996465603213, iteration: 121466
loss: 1.051211953163147,grad_norm: 0.999999619565063, iteration: 121467
loss: 1.1314319372177124,grad_norm: 0.9999994085845711, iteration: 121468
loss: 1.1778554916381836,grad_norm: 0.9999997319761262, iteration: 121469
loss: 1.0554990768432617,grad_norm: 0.999999479928022, iteration: 121470
loss: 1.2063919305801392,grad_norm: 0.9999992796843684, iteration: 121471
loss: 1.1969051361083984,grad_norm: 0.9999998187943967, iteration: 121472
loss: 1.0021660327911377,grad_norm: 0.9999991283241236, iteration: 121473
loss: 1.038643479347229,grad_norm: 0.9999995449361823, iteration: 121474
loss: 1.0251916646957397,grad_norm: 0.9999992873555719, iteration: 121475
loss: 1.0639406442642212,grad_norm: 0.9999993519889023, iteration: 121476
loss: 0.9814090132713318,grad_norm: 0.8471077659931229, iteration: 121477
loss: 1.0655417442321777,grad_norm: 0.9999995631801739, iteration: 121478
loss: 1.0340874195098877,grad_norm: 0.999999416101001, iteration: 121479
loss: 0.999901294708252,grad_norm: 0.999999201188748, iteration: 121480
loss: 1.045103669166565,grad_norm: 0.938360639086912, iteration: 121481
loss: 1.15752375125885,grad_norm: 0.9999998491355024, iteration: 121482
loss: 1.0355442762374878,grad_norm: 0.934843432037776, iteration: 121483
loss: 0.9880563616752625,grad_norm: 0.9999993733136204, iteration: 121484
loss: 0.9839444160461426,grad_norm: 0.7624631146522087, iteration: 121485
loss: 1.0142312049865723,grad_norm: 0.9999992614528879, iteration: 121486
loss: 1.049593448638916,grad_norm: 0.9999990864582177, iteration: 121487
loss: 0.9863126873970032,grad_norm: 0.893379854955049, iteration: 121488
loss: 1.0337140560150146,grad_norm: 0.9999994377332586, iteration: 121489
loss: 0.9943065047264099,grad_norm: 0.9999991035514725, iteration: 121490
loss: 1.0107450485229492,grad_norm: 0.9976631522498248, iteration: 121491
loss: 1.0267698764801025,grad_norm: 0.9999996789955465, iteration: 121492
loss: 1.1512943506240845,grad_norm: 0.9999994380573638, iteration: 121493
loss: 1.3246158361434937,grad_norm: 0.9999995815748642, iteration: 121494
loss: 1.0308939218521118,grad_norm: 0.9999999345564352, iteration: 121495
loss: 1.0207706689834595,grad_norm: 0.999999465073174, iteration: 121496
loss: 1.0501151084899902,grad_norm: 0.9999994989736407, iteration: 121497
loss: 1.0974434614181519,grad_norm: 0.9722814744433923, iteration: 121498
loss: 1.0461920499801636,grad_norm: 0.9999991575554494, iteration: 121499
loss: 1.195490837097168,grad_norm: 0.9999999599443273, iteration: 121500
loss: 1.0184056758880615,grad_norm: 0.8839883797459162, iteration: 121501
loss: 1.1080536842346191,grad_norm: 0.9999998091397958, iteration: 121502
loss: 1.1048413515090942,grad_norm: 0.999999474443692, iteration: 121503
loss: 0.9928167462348938,grad_norm: 0.9837617851349822, iteration: 121504
loss: 1.0606886148452759,grad_norm: 0.9999992288754347, iteration: 121505
loss: 1.1033928394317627,grad_norm: 0.9999994951396777, iteration: 121506
loss: 0.9912570118904114,grad_norm: 0.8773731489942044, iteration: 121507
loss: 1.0278711318969727,grad_norm: 0.9201230952440643, iteration: 121508
loss: 1.0957809686660767,grad_norm: 0.9999997784050292, iteration: 121509
loss: 1.0092169046401978,grad_norm: 0.9999990882175338, iteration: 121510
loss: 1.257203459739685,grad_norm: 0.9999998264029135, iteration: 121511
loss: 1.034295678138733,grad_norm: 0.9999992761926659, iteration: 121512
loss: 0.9845478534698486,grad_norm: 0.9999990945145867, iteration: 121513
loss: 1.1447372436523438,grad_norm: 0.9999992194200832, iteration: 121514
loss: 1.0232057571411133,grad_norm: 0.9999991390171795, iteration: 121515
loss: 1.1570948362350464,grad_norm: 0.9999994752622269, iteration: 121516
loss: 1.0322867631912231,grad_norm: 0.9620895850379061, iteration: 121517
loss: 1.0165127515792847,grad_norm: 0.9999998339475015, iteration: 121518
loss: 1.0241416692733765,grad_norm: 0.9999991519488636, iteration: 121519
loss: 1.166190266609192,grad_norm: 0.9999996182606341, iteration: 121520
loss: 0.9852505922317505,grad_norm: 0.9999991258265274, iteration: 121521
loss: 1.036932110786438,grad_norm: 0.9999997075111078, iteration: 121522
loss: 0.9905266761779785,grad_norm: 0.9593212261498959, iteration: 121523
loss: 1.2293610572814941,grad_norm: 0.9999995178170971, iteration: 121524
loss: 0.9978233575820923,grad_norm: 0.7553142809192742, iteration: 121525
loss: 0.995521068572998,grad_norm: 0.9608445590454511, iteration: 121526
loss: 1.0496773719787598,grad_norm: 0.9999997941201862, iteration: 121527
loss: 0.9471617341041565,grad_norm: 0.8579646597184447, iteration: 121528
loss: 1.0808099508285522,grad_norm: 0.9999996255995993, iteration: 121529
loss: 1.0076024532318115,grad_norm: 0.9999998342587803, iteration: 121530
loss: 1.0597141981124878,grad_norm: 0.9999990699186192, iteration: 121531
loss: 1.0041974782943726,grad_norm: 0.8357613597670345, iteration: 121532
loss: 1.2464758157730103,grad_norm: 0.9999996693601592, iteration: 121533
loss: 1.020743489265442,grad_norm: 0.9999999351117791, iteration: 121534
loss: 0.9957883358001709,grad_norm: 0.971200165050834, iteration: 121535
loss: 1.0966944694519043,grad_norm: 0.9999995184901453, iteration: 121536
loss: 1.0054905414581299,grad_norm: 0.9999990954156703, iteration: 121537
loss: 0.9959189891815186,grad_norm: 0.9999991500882893, iteration: 121538
loss: 1.0514676570892334,grad_norm: 0.9999991871332536, iteration: 121539
loss: 1.0250308513641357,grad_norm: 0.9849726054081323, iteration: 121540
loss: 1.0487887859344482,grad_norm: 0.9999992471311155, iteration: 121541
loss: 1.0674357414245605,grad_norm: 0.9999992616074174, iteration: 121542
loss: 1.0484317541122437,grad_norm: 0.8637048765764807, iteration: 121543
loss: 1.0187121629714966,grad_norm: 0.999999071440868, iteration: 121544
loss: 1.0555518865585327,grad_norm: 0.9999996664765216, iteration: 121545
loss: 1.2866969108581543,grad_norm: 0.9999997034119786, iteration: 121546
loss: 1.1279126405715942,grad_norm: 0.9999995528398725, iteration: 121547
loss: 1.0403043031692505,grad_norm: 0.9999992055192467, iteration: 121548
loss: 1.0080424547195435,grad_norm: 0.9999991440300445, iteration: 121549
loss: 0.9980517029762268,grad_norm: 0.9499499684035873, iteration: 121550
loss: 1.1486918926239014,grad_norm: 0.9999997682541373, iteration: 121551
loss: 0.9521563053131104,grad_norm: 0.8335641329746635, iteration: 121552
loss: 1.0514240264892578,grad_norm: 0.9999999862619269, iteration: 121553
loss: 1.011268973350525,grad_norm: 0.8446186619716193, iteration: 121554
loss: 0.9815523028373718,grad_norm: 0.9999991448261943, iteration: 121555
loss: 1.0303043127059937,grad_norm: 0.9999991655825516, iteration: 121556
loss: 1.1293256282806396,grad_norm: 0.9999993694253516, iteration: 121557
loss: 1.081492304801941,grad_norm: 0.9999991139933959, iteration: 121558
loss: 1.0179654359817505,grad_norm: 0.9999996147610057, iteration: 121559
loss: 1.0191030502319336,grad_norm: 0.9531290958199761, iteration: 121560
loss: 1.1048840284347534,grad_norm: 0.9999993422959282, iteration: 121561
loss: 1.042344331741333,grad_norm: 0.9999990649300312, iteration: 121562
loss: 0.9851104617118835,grad_norm: 0.9999990751857286, iteration: 121563
loss: 0.9947832226753235,grad_norm: 0.9999990645441102, iteration: 121564
loss: 1.1256096363067627,grad_norm: 0.9999998853505406, iteration: 121565
loss: 1.1854161024093628,grad_norm: 0.999999448691136, iteration: 121566
loss: 1.0371249914169312,grad_norm: 0.9999991600015978, iteration: 121567
loss: 0.9966902732849121,grad_norm: 0.8527246320392871, iteration: 121568
loss: 1.0639452934265137,grad_norm: 0.9999993021096183, iteration: 121569
loss: 1.0464799404144287,grad_norm: 0.91639044997954, iteration: 121570
loss: 1.0043573379516602,grad_norm: 0.9999999019848855, iteration: 121571
loss: 1.0253273248672485,grad_norm: 0.9041559396712278, iteration: 121572
loss: 1.124569296836853,grad_norm: 0.9999993426620377, iteration: 121573
loss: 1.0686384439468384,grad_norm: 0.9999999042480394, iteration: 121574
loss: 1.0105043649673462,grad_norm: 0.9741668497620082, iteration: 121575
loss: 1.042441725730896,grad_norm: 0.9999993905963842, iteration: 121576
loss: 1.0419491529464722,grad_norm: 0.8869268112197558, iteration: 121577
loss: 1.0058706998825073,grad_norm: 0.9999991461408179, iteration: 121578
loss: 1.0342891216278076,grad_norm: 0.9999998204765802, iteration: 121579
loss: 0.9839020371437073,grad_norm: 0.9999998496149883, iteration: 121580
loss: 1.1193687915802002,grad_norm: 0.9999999684047337, iteration: 121581
loss: 1.185692310333252,grad_norm: 0.9999996821356945, iteration: 121582
loss: 1.141414761543274,grad_norm: 0.9999999498478315, iteration: 121583
loss: 0.9849544763565063,grad_norm: 0.9999990009902847, iteration: 121584
loss: 0.9963846206665039,grad_norm: 0.9999991330908156, iteration: 121585
loss: 1.050765872001648,grad_norm: 0.9999990265240243, iteration: 121586
loss: 1.0154813528060913,grad_norm: 0.9999990010277402, iteration: 121587
loss: 0.9886934161186218,grad_norm: 0.999999175693987, iteration: 121588
loss: 1.0996605157852173,grad_norm: 0.9020248866915531, iteration: 121589
loss: 1.1080204248428345,grad_norm: 0.9999995592601968, iteration: 121590
loss: 1.1439616680145264,grad_norm: 0.9999993498706188, iteration: 121591
loss: 1.0551091432571411,grad_norm: 0.998313586219145, iteration: 121592
loss: 1.003913402557373,grad_norm: 0.94699058373889, iteration: 121593
loss: 1.0558191537857056,grad_norm: 0.9999992122564701, iteration: 121594
loss: 1.1395454406738281,grad_norm: 0.999999568862126, iteration: 121595
loss: 1.2541239261627197,grad_norm: 0.9999995624132898, iteration: 121596
loss: 1.0137064456939697,grad_norm: 0.9999991239300137, iteration: 121597
loss: 1.1601452827453613,grad_norm: 0.9999996613090415, iteration: 121598
loss: 1.0374153852462769,grad_norm: 0.9999992303961067, iteration: 121599
loss: 1.229805588722229,grad_norm: 0.9999996763714892, iteration: 121600
loss: 0.963106632232666,grad_norm: 0.9999996182700154, iteration: 121601
loss: 1.0295052528381348,grad_norm: 0.999999688609557, iteration: 121602
loss: 1.0298880338668823,grad_norm: 0.9999997042412565, iteration: 121603
loss: 1.0535701513290405,grad_norm: 0.9999992293941944, iteration: 121604
loss: 1.0156278610229492,grad_norm: 0.7344340835099894, iteration: 121605
loss: 1.0852079391479492,grad_norm: 0.9999995104469632, iteration: 121606
loss: 1.019321322441101,grad_norm: 0.99999920328709, iteration: 121607
loss: 0.9924342036247253,grad_norm: 0.9999992317937825, iteration: 121608
loss: 1.1153675317764282,grad_norm: 0.9999999346059799, iteration: 121609
loss: 1.022094488143921,grad_norm: 0.9755357776564868, iteration: 121610
loss: 0.9987068772315979,grad_norm: 0.943521489906961, iteration: 121611
loss: 1.0863821506500244,grad_norm: 0.9999998419140512, iteration: 121612
loss: 1.2682512998580933,grad_norm: 0.9999999354534487, iteration: 121613
loss: 0.9901719689369202,grad_norm: 0.9999993062705699, iteration: 121614
loss: 1.1006648540496826,grad_norm: 0.9999996097967044, iteration: 121615
loss: 0.9570285081863403,grad_norm: 0.9999990843618947, iteration: 121616
loss: 1.0848466157913208,grad_norm: 0.9999996790682806, iteration: 121617
loss: 1.207556128501892,grad_norm: 0.9999996824224542, iteration: 121618
loss: 0.9817066788673401,grad_norm: 0.9813812966414734, iteration: 121619
loss: 0.9556489586830139,grad_norm: 0.959144033873433, iteration: 121620
loss: 1.0213874578475952,grad_norm: 0.9999998813496853, iteration: 121621
loss: 1.0294028520584106,grad_norm: 0.9830652079173644, iteration: 121622
loss: 0.9584838151931763,grad_norm: 0.9999995279758372, iteration: 121623
loss: 1.0072228908538818,grad_norm: 0.9999991351631022, iteration: 121624
loss: 1.1324400901794434,grad_norm: 0.9999995482193648, iteration: 121625
loss: 1.0367298126220703,grad_norm: 0.9035478420831411, iteration: 121626
loss: 1.017958641052246,grad_norm: 0.9731463727476755, iteration: 121627
loss: 1.103752613067627,grad_norm: 0.9999998307606051, iteration: 121628
loss: 0.9882875084877014,grad_norm: 0.84438841946526, iteration: 121629
loss: 1.0141228437423706,grad_norm: 0.9327334094794211, iteration: 121630
loss: 0.9802705645561218,grad_norm: 0.9999992884093576, iteration: 121631
loss: 1.04207181930542,grad_norm: 0.9999998573643197, iteration: 121632
loss: 1.0011141300201416,grad_norm: 0.9999998273802403, iteration: 121633
loss: 1.3453019857406616,grad_norm: 0.9999994803007072, iteration: 121634
loss: 0.9908838868141174,grad_norm: 0.8746233744568098, iteration: 121635
loss: 1.0002089738845825,grad_norm: 0.9999990498741471, iteration: 121636
loss: 1.1467987298965454,grad_norm: 0.9999994188433858, iteration: 121637
loss: 1.0195037126541138,grad_norm: 0.9999993090988899, iteration: 121638
loss: 1.0332497358322144,grad_norm: 0.9999997555526161, iteration: 121639
loss: 0.9832128286361694,grad_norm: 0.9842781387318756, iteration: 121640
loss: 1.0061349868774414,grad_norm: 0.9289764045648986, iteration: 121641
loss: 1.0208131074905396,grad_norm: 0.9999998211345271, iteration: 121642
loss: 1.019379734992981,grad_norm: 0.8257192961856882, iteration: 121643
loss: 1.1250534057617188,grad_norm: 0.9999997891403749, iteration: 121644
loss: 0.9950887560844421,grad_norm: 0.8546936524494276, iteration: 121645
loss: 1.0283221006393433,grad_norm: 0.9579398967407825, iteration: 121646
loss: 1.0099598169326782,grad_norm: 0.9999992392179882, iteration: 121647
loss: 1.0014851093292236,grad_norm: 0.9999992149975957, iteration: 121648
loss: 1.0166428089141846,grad_norm: 0.9999996047044255, iteration: 121649
loss: 0.9920400977134705,grad_norm: 0.9999992317683909, iteration: 121650
loss: 1.0628427267074585,grad_norm: 0.9999989906807212, iteration: 121651
loss: 0.970405101776123,grad_norm: 0.7862614931384133, iteration: 121652
loss: 0.9855285286903381,grad_norm: 0.8720341717238169, iteration: 121653
loss: 1.0970710515975952,grad_norm: 0.9999999630226576, iteration: 121654
loss: 1.0350441932678223,grad_norm: 0.9999993762483216, iteration: 121655
loss: 1.002649188041687,grad_norm: 0.8654333747216181, iteration: 121656
loss: 1.0574486255645752,grad_norm: 0.9999993050758034, iteration: 121657
loss: 1.0234053134918213,grad_norm: 0.9999990795890495, iteration: 121658
loss: 1.0983885526657104,grad_norm: 0.9999999750999068, iteration: 121659
loss: 1.049762487411499,grad_norm: 0.9999999025093763, iteration: 121660
loss: 0.9607845544815063,grad_norm: 0.9191554042413865, iteration: 121661
loss: 0.9856428503990173,grad_norm: 0.9999991312112582, iteration: 121662
loss: 0.9616854786872864,grad_norm: 0.9886948683043002, iteration: 121663
loss: 0.9950979351997375,grad_norm: 0.9999990401080231, iteration: 121664
loss: 0.9889503717422485,grad_norm: 0.9999991724943396, iteration: 121665
loss: 1.0457624197006226,grad_norm: 0.8977893270503816, iteration: 121666
loss: 1.1103062629699707,grad_norm: 0.9999997098790766, iteration: 121667
loss: 1.0399658679962158,grad_norm: 0.8066518795725622, iteration: 121668
loss: 1.0088995695114136,grad_norm: 0.8874246620438168, iteration: 121669
loss: 1.181784987449646,grad_norm: 0.9999998596804174, iteration: 121670
loss: 1.0815796852111816,grad_norm: 0.9999992292244247, iteration: 121671
loss: 1.0403971672058105,grad_norm: 0.8709206820380461, iteration: 121672
loss: 0.9683102369308472,grad_norm: 0.9999992528870972, iteration: 121673
loss: 1.0033316612243652,grad_norm: 0.9999992235610935, iteration: 121674
loss: 1.107934832572937,grad_norm: 0.9999994105135313, iteration: 121675
loss: 1.0337953567504883,grad_norm: 0.961799182789731, iteration: 121676
loss: 1.108804702758789,grad_norm: 0.9999997448710689, iteration: 121677
loss: 0.9972568154335022,grad_norm: 0.9999992120235391, iteration: 121678
loss: 1.0127979516983032,grad_norm: 0.9999998727473901, iteration: 121679
loss: 1.0226339101791382,grad_norm: 0.9999991759935127, iteration: 121680
loss: 0.986951470375061,grad_norm: 0.9999993722409438, iteration: 121681
loss: 1.0817855596542358,grad_norm: 0.9999996022849081, iteration: 121682
loss: 1.073128581047058,grad_norm: 0.9999999554629552, iteration: 121683
loss: 1.177749752998352,grad_norm: 0.9999997794918989, iteration: 121684
loss: 1.0996384620666504,grad_norm: 0.9999998948336627, iteration: 121685
loss: 1.0022469758987427,grad_norm: 0.9802625810837243, iteration: 121686
loss: 0.9932215809822083,grad_norm: 0.9309402103749659, iteration: 121687
loss: 1.061853289604187,grad_norm: 0.9999999192789156, iteration: 121688
loss: 1.1572312116622925,grad_norm: 0.9999995893104021, iteration: 121689
loss: 1.1530271768569946,grad_norm: 0.9999995432366556, iteration: 121690
loss: 1.1499096155166626,grad_norm: 0.999999675675723, iteration: 121691
loss: 1.0016015768051147,grad_norm: 0.9602566498651971, iteration: 121692
loss: 1.2505981922149658,grad_norm: 0.9999999614764837, iteration: 121693
loss: 0.990088701248169,grad_norm: 0.9999989994358027, iteration: 121694
loss: 1.0631513595581055,grad_norm: 0.9999997635890026, iteration: 121695
loss: 1.0075210332870483,grad_norm: 0.870674868181166, iteration: 121696
loss: 1.2899012565612793,grad_norm: 0.9999999291467999, iteration: 121697
loss: 0.9606389999389648,grad_norm: 0.9645255044200008, iteration: 121698
loss: 1.0424411296844482,grad_norm: 0.9999993968493862, iteration: 121699
loss: 1.178083896636963,grad_norm: 0.9999998625276232, iteration: 121700
loss: 1.0259337425231934,grad_norm: 0.9999997309414764, iteration: 121701
loss: 1.1690634489059448,grad_norm: 0.99999960436433, iteration: 121702
loss: 1.26640784740448,grad_norm: 0.9999993621985038, iteration: 121703
loss: 1.4129396677017212,grad_norm: 0.9999999930601479, iteration: 121704
loss: 1.095702886581421,grad_norm: 0.9999997289252713, iteration: 121705
loss: 1.0159714221954346,grad_norm: 0.9999993037645558, iteration: 121706
loss: 1.1883327960968018,grad_norm: 0.9999999266115899, iteration: 121707
loss: 0.9988037943840027,grad_norm: 0.9345448869569483, iteration: 121708
loss: 1.1623098850250244,grad_norm: 0.9999994133379639, iteration: 121709
loss: 0.9753212928771973,grad_norm: 0.8435586275306577, iteration: 121710
loss: 1.4636482000350952,grad_norm: 1.0000000220595342, iteration: 121711
loss: 1.2128459215164185,grad_norm: 0.9999999555858876, iteration: 121712
loss: 1.166810393333435,grad_norm: 0.9999997883022316, iteration: 121713
loss: 1.2796329259872437,grad_norm: 0.9999996315905547, iteration: 121714
loss: 1.118334412574768,grad_norm: 0.9999991025440318, iteration: 121715
loss: 1.1515085697174072,grad_norm: 0.9999994507020559, iteration: 121716
loss: 1.330855369567871,grad_norm: 0.999999773411981, iteration: 121717
loss: 1.2364170551300049,grad_norm: 0.9999996881598723, iteration: 121718
loss: 0.9901204705238342,grad_norm: 0.8744188306547432, iteration: 121719
loss: 1.0642051696777344,grad_norm: 0.999999194335259, iteration: 121720
loss: 1.117504358291626,grad_norm: 0.9999995901377888, iteration: 121721
loss: 1.2681925296783447,grad_norm: 0.9999996915296632, iteration: 121722
loss: 1.1978297233581543,grad_norm: 0.999999640337542, iteration: 121723
loss: 1.2034811973571777,grad_norm: 0.9999999458412462, iteration: 121724
loss: 1.410622000694275,grad_norm: 0.9999996929179881, iteration: 121725
loss: 1.0076837539672852,grad_norm: 0.9999991628084742, iteration: 121726
loss: 1.1679329872131348,grad_norm: 0.9999996272363866, iteration: 121727
loss: 1.1648237705230713,grad_norm: 1.0000000653046226, iteration: 121728
loss: 1.2633761167526245,grad_norm: 0.9999995501872819, iteration: 121729
loss: 1.1086176633834839,grad_norm: 0.9999998089543927, iteration: 121730
loss: 1.1848008632659912,grad_norm: 0.999999691070119, iteration: 121731
loss: 1.1566896438598633,grad_norm: 0.9999998596758667, iteration: 121732
loss: 1.0385684967041016,grad_norm: 0.9999993099145332, iteration: 121733
loss: 0.9957700371742249,grad_norm: 0.9092140185345846, iteration: 121734
loss: 1.1094045639038086,grad_norm: 0.9999998904730913, iteration: 121735
loss: 0.9843167066574097,grad_norm: 0.9999993288999776, iteration: 121736
loss: 1.0908325910568237,grad_norm: 0.9999999189487295, iteration: 121737
loss: 1.2070492506027222,grad_norm: 0.9999997238916624, iteration: 121738
loss: 1.143630027770996,grad_norm: 0.9999998933130968, iteration: 121739
loss: 1.0516568422317505,grad_norm: 0.9999998616121625, iteration: 121740
loss: 1.0766675472259521,grad_norm: 0.9999993604206336, iteration: 121741
loss: 1.053873896598816,grad_norm: 0.9999992916656932, iteration: 121742
loss: 1.2041280269622803,grad_norm: 0.9999998160507045, iteration: 121743
loss: 1.0339142084121704,grad_norm: 0.9999994154045015, iteration: 121744
loss: 1.0091112852096558,grad_norm: 0.9999994002327566, iteration: 121745
loss: 1.2195481061935425,grad_norm: 0.9999996208608716, iteration: 121746
loss: 1.0261650085449219,grad_norm: 0.922251897332856, iteration: 121747
loss: 0.9846696853637695,grad_norm: 0.932424658097297, iteration: 121748
loss: 1.076366662979126,grad_norm: 0.9999994441881809, iteration: 121749
loss: 1.0720213651657104,grad_norm: 0.9999995936770614, iteration: 121750
loss: 1.0594778060913086,grad_norm: 0.9999995165969175, iteration: 121751
loss: 1.0176528692245483,grad_norm: 0.9999993119304281, iteration: 121752
loss: 1.116613507270813,grad_norm: 0.9999992990386621, iteration: 121753
loss: 1.0871096849441528,grad_norm: 0.9999994717840406, iteration: 121754
loss: 1.0738195180892944,grad_norm: 0.999999536787289, iteration: 121755
loss: 1.071391224861145,grad_norm: 0.9999994550936939, iteration: 121756
loss: 1.097062587738037,grad_norm: 0.9999996090932037, iteration: 121757
loss: 1.07106351852417,grad_norm: 0.9999997673474412, iteration: 121758
loss: 1.0233262777328491,grad_norm: 0.9999992485252168, iteration: 121759
loss: 1.0715529918670654,grad_norm: 0.9999992240203045, iteration: 121760
loss: 1.0783941745758057,grad_norm: 0.9999999545307632, iteration: 121761
loss: 1.0681111812591553,grad_norm: 0.9921362166671216, iteration: 121762
loss: 1.0323749780654907,grad_norm: 0.9698707037685477, iteration: 121763
loss: 1.0702873468399048,grad_norm: 0.9999997649467369, iteration: 121764
loss: 1.0047317743301392,grad_norm: 0.9999998631961666, iteration: 121765
loss: 1.0258638858795166,grad_norm: 0.999999126874752, iteration: 121766
loss: 0.9754700660705566,grad_norm: 0.9999991598346404, iteration: 121767
loss: 1.0599905252456665,grad_norm: 0.9999993902525192, iteration: 121768
loss: 0.9995397329330444,grad_norm: 0.9060693478897623, iteration: 121769
loss: 1.06569242477417,grad_norm: 0.9999999618863046, iteration: 121770
loss: 1.126328706741333,grad_norm: 1.0000000064789127, iteration: 121771
loss: 1.0083495378494263,grad_norm: 0.9999994607236024, iteration: 121772
loss: 1.0829890966415405,grad_norm: 0.9999997861829074, iteration: 121773
loss: 1.0979125499725342,grad_norm: 0.9999998070261443, iteration: 121774
loss: 1.1599677801132202,grad_norm: 0.9999994861665601, iteration: 121775
loss: 1.0559769868850708,grad_norm: 0.9999991806521139, iteration: 121776
loss: 1.2189439535140991,grad_norm: 0.9999998779365209, iteration: 121777
loss: 1.155898094177246,grad_norm: 0.9999997422905538, iteration: 121778
loss: 1.229716420173645,grad_norm: 0.999999604093659, iteration: 121779
loss: 1.0150485038757324,grad_norm: 0.8194850379082265, iteration: 121780
loss: 1.1523092985153198,grad_norm: 0.9999995845287326, iteration: 121781
loss: 1.1441810131072998,grad_norm: 0.9999993709417612, iteration: 121782
loss: 1.2252421379089355,grad_norm: 0.9999998213119846, iteration: 121783
loss: 1.1124144792556763,grad_norm: 0.9999995116655307, iteration: 121784
loss: 1.0208948850631714,grad_norm: 0.9999998825281118, iteration: 121785
loss: 1.038649082183838,grad_norm: 0.9999998742381606, iteration: 121786
loss: 1.1352684497833252,grad_norm: 0.9999999267274181, iteration: 121787
loss: 1.2071200609207153,grad_norm: 0.9999996855467369, iteration: 121788
loss: 1.0877336263656616,grad_norm: 0.9999993704874366, iteration: 121789
loss: 1.1152344942092896,grad_norm: 1.0000000679208096, iteration: 121790
loss: 1.1403754949569702,grad_norm: 0.999999265628078, iteration: 121791
loss: 1.3013747930526733,grad_norm: 0.9999995986090623, iteration: 121792
loss: 0.971337080001831,grad_norm: 0.999999018588999, iteration: 121793
loss: 0.9948597550392151,grad_norm: 0.9279754904141168, iteration: 121794
loss: 1.1066073179244995,grad_norm: 0.9999998290044911, iteration: 121795
loss: 1.1421703100204468,grad_norm: 0.9999998309162504, iteration: 121796
loss: 1.0739954710006714,grad_norm: 0.9999994352101871, iteration: 121797
loss: 1.1146507263183594,grad_norm: 0.9999998785567313, iteration: 121798
loss: 1.1459801197052002,grad_norm: 0.9999998907114723, iteration: 121799
loss: 1.0098248720169067,grad_norm: 0.9999990023920741, iteration: 121800
loss: 0.9963991045951843,grad_norm: 0.9165131385741048, iteration: 121801
loss: 1.140691876411438,grad_norm: 0.9999995526187665, iteration: 121802
loss: 1.1598273515701294,grad_norm: 0.9999997908052535, iteration: 121803
loss: 1.0367863178253174,grad_norm: 0.9999999301316455, iteration: 121804
loss: 1.110722303390503,grad_norm: 0.9999995168863297, iteration: 121805
loss: 1.0668468475341797,grad_norm: 0.9999998305341266, iteration: 121806
loss: 1.1052347421646118,grad_norm: 0.9999997877127237, iteration: 121807
loss: 1.0341864824295044,grad_norm: 0.9999999165343979, iteration: 121808
loss: 1.0938091278076172,grad_norm: 0.9999991853613004, iteration: 121809
loss: 1.1219077110290527,grad_norm: 0.9999998430049515, iteration: 121810
loss: 1.024031639099121,grad_norm: 0.9999992878992866, iteration: 121811
loss: 1.1377533674240112,grad_norm: 0.9999996797401045, iteration: 121812
loss: 1.0102180242538452,grad_norm: 0.9783438974018688, iteration: 121813
loss: 1.109843134880066,grad_norm: 0.9999998214096666, iteration: 121814
loss: 1.1426658630371094,grad_norm: 0.9999998377310553, iteration: 121815
loss: 1.0295010805130005,grad_norm: 1.0000000072271726, iteration: 121816
loss: 1.0505456924438477,grad_norm: 0.999999243928112, iteration: 121817
loss: 1.154424786567688,grad_norm: 0.9999996713442871, iteration: 121818
loss: 1.3567136526107788,grad_norm: 0.9999998631576612, iteration: 121819
loss: 1.0575121641159058,grad_norm: 0.9999994855732164, iteration: 121820
loss: 1.0465071201324463,grad_norm: 0.999999985660319, iteration: 121821
loss: 1.0658364295959473,grad_norm: 0.9999995131185891, iteration: 121822
loss: 1.012964129447937,grad_norm: 0.9999991180053303, iteration: 121823
loss: 1.0277527570724487,grad_norm: 0.9999998445850545, iteration: 121824
loss: 1.211998701095581,grad_norm: 0.999999787717528, iteration: 121825
loss: 1.019470453262329,grad_norm: 0.9033863214145315, iteration: 121826
loss: 1.1059811115264893,grad_norm: 0.9999999756149636, iteration: 121827
loss: 1.3054643869400024,grad_norm: 0.9999998875600427, iteration: 121828
loss: 1.1097084283828735,grad_norm: 0.9999990763649009, iteration: 121829
loss: 0.9976053833961487,grad_norm: 0.9999990728891535, iteration: 121830
loss: 1.194455862045288,grad_norm: 0.9999992477376678, iteration: 121831
loss: 1.0209078788757324,grad_norm: 0.9999990575400117, iteration: 121832
loss: 1.0072101354599,grad_norm: 0.8880090040610705, iteration: 121833
loss: 0.9810046553611755,grad_norm: 0.9999991841391493, iteration: 121834
loss: 1.0245089530944824,grad_norm: 0.8195107058543424, iteration: 121835
loss: 0.9760358929634094,grad_norm: 0.8622519473695525, iteration: 121836
loss: 1.146337628364563,grad_norm: 0.9999995966371755, iteration: 121837
loss: 1.1165924072265625,grad_norm: 0.9999994506860642, iteration: 121838
loss: 1.0157928466796875,grad_norm: 0.9999992058222692, iteration: 121839
loss: 1.2555079460144043,grad_norm: 0.9999997048578128, iteration: 121840
loss: 1.298966884613037,grad_norm: 0.9999996449444875, iteration: 121841
loss: 1.0383063554763794,grad_norm: 0.9633977150456045, iteration: 121842
loss: 1.1111701726913452,grad_norm: 0.9999995695719218, iteration: 121843
loss: 1.133015751838684,grad_norm: 0.9999995266236136, iteration: 121844
loss: 1.0850603580474854,grad_norm: 0.9999992037492432, iteration: 121845
loss: 1.131806492805481,grad_norm: 0.9999999594159537, iteration: 121846
loss: 1.0592221021652222,grad_norm: 0.9999995114032241, iteration: 121847
loss: 1.0339688062667847,grad_norm: 0.9999996012008646, iteration: 121848
loss: 1.1343328952789307,grad_norm: 0.9999997499870211, iteration: 121849
loss: 1.1005398035049438,grad_norm: 0.9999996497253171, iteration: 121850
loss: 1.0481268167495728,grad_norm: 0.912132431589898, iteration: 121851
loss: 1.0290462970733643,grad_norm: 0.9999991502871106, iteration: 121852
loss: 0.9866648316383362,grad_norm: 0.9999995334859753, iteration: 121853
loss: 1.051727533340454,grad_norm: 0.979084059849362, iteration: 121854
loss: 1.0624607801437378,grad_norm: 0.999999579902632, iteration: 121855
loss: 0.98110431432724,grad_norm: 0.9999990777622835, iteration: 121856
loss: 1.1982976198196411,grad_norm: 0.9999993908259688, iteration: 121857
loss: 1.1266156435012817,grad_norm: 0.9999993913724615, iteration: 121858
loss: 1.1384872198104858,grad_norm: 0.9999995098078049, iteration: 121859
loss: 1.215574860572815,grad_norm: 0.9999999347761589, iteration: 121860
loss: 1.0568546056747437,grad_norm: 0.9999992546969152, iteration: 121861
loss: 1.1071527004241943,grad_norm: 0.9999998491823134, iteration: 121862
loss: 1.0695743560791016,grad_norm: 0.999999448382872, iteration: 121863
loss: 1.0716990232467651,grad_norm: 0.9999995003283221, iteration: 121864
loss: 1.077092170715332,grad_norm: 0.9999997867131081, iteration: 121865
loss: 1.1586809158325195,grad_norm: 0.9999998528908168, iteration: 121866
loss: 1.1585679054260254,grad_norm: 0.9999994625916854, iteration: 121867
loss: 1.0702139139175415,grad_norm: 0.9999991456616953, iteration: 121868
loss: 1.0970170497894287,grad_norm: 0.9999996397181719, iteration: 121869
loss: 0.9995667934417725,grad_norm: 0.9999993609533767, iteration: 121870
loss: 1.0867724418640137,grad_norm: 0.9999999535436086, iteration: 121871
loss: 1.019403338432312,grad_norm: 0.9904247316671416, iteration: 121872
loss: 1.269797682762146,grad_norm: 0.9999999275083018, iteration: 121873
loss: 1.0443488359451294,grad_norm: 0.9999990405512191, iteration: 121874
loss: 0.9820940494537354,grad_norm: 0.9999994725720854, iteration: 121875
loss: 1.039369821548462,grad_norm: 0.9999991555571963, iteration: 121876
loss: 1.0421782732009888,grad_norm: 0.9999992413004376, iteration: 121877
loss: 0.9931599497795105,grad_norm: 0.9999999179493172, iteration: 121878
loss: 1.0470088720321655,grad_norm: 0.9999999639574167, iteration: 121879
loss: 1.0136566162109375,grad_norm: 0.7673517291409083, iteration: 121880
loss: 1.2190693616867065,grad_norm: 0.9999995621097142, iteration: 121881
loss: 1.003402829170227,grad_norm: 0.9999995336117847, iteration: 121882
loss: 1.2543704509735107,grad_norm: 0.999999463719483, iteration: 121883
loss: 1.0359328985214233,grad_norm: 0.9999991913921108, iteration: 121884
loss: 1.121323823928833,grad_norm: 0.9999997809384944, iteration: 121885
loss: 1.083280086517334,grad_norm: 0.9999994783019465, iteration: 121886
loss: 1.1035441160202026,grad_norm: 0.9999995586730088, iteration: 121887
loss: 1.1406214237213135,grad_norm: 1.0000000148352477, iteration: 121888
loss: 1.1144245862960815,grad_norm: 0.9999996536504224, iteration: 121889
loss: 1.0937050580978394,grad_norm: 0.9999993918350182, iteration: 121890
loss: 1.059041142463684,grad_norm: 0.8798025568327762, iteration: 121891
loss: 1.2023694515228271,grad_norm: 0.9999996778832231, iteration: 121892
loss: 1.017768383026123,grad_norm: 0.9999999616635482, iteration: 121893
loss: 1.2382314205169678,grad_norm: 0.9999999291855953, iteration: 121894
loss: 1.106048822402954,grad_norm: 0.9999993189760327, iteration: 121895
loss: 0.9618787169456482,grad_norm: 0.9999994122408868, iteration: 121896
loss: 1.1381183862686157,grad_norm: 0.9999994147533083, iteration: 121897
loss: 1.0907785892486572,grad_norm: 0.9999992856892639, iteration: 121898
loss: 1.0275846719741821,grad_norm: 0.8318567976312093, iteration: 121899
loss: 1.1558465957641602,grad_norm: 0.999999808537161, iteration: 121900
loss: 1.118841528892517,grad_norm: 0.9999995029741617, iteration: 121901
loss: 1.02606201171875,grad_norm: 0.9152630068115573, iteration: 121902
loss: 1.0261836051940918,grad_norm: 0.9999992960731876, iteration: 121903
loss: 1.0880584716796875,grad_norm: 0.9999995135059744, iteration: 121904
loss: 1.0062059164047241,grad_norm: 0.9999997425308007, iteration: 121905
loss: 1.0536452531814575,grad_norm: 0.9999993441735013, iteration: 121906
loss: 1.1023021936416626,grad_norm: 1.0000000279726282, iteration: 121907
loss: 1.2422598600387573,grad_norm: 0.9999996092618761, iteration: 121908
loss: 1.0474134683609009,grad_norm: 0.9999994517634728, iteration: 121909
loss: 1.0997486114501953,grad_norm: 0.9999998013397757, iteration: 121910
loss: 0.9910870790481567,grad_norm: 0.9999991380470536, iteration: 121911
loss: 1.014634132385254,grad_norm: 0.9999996487078284, iteration: 121912
loss: 1.0800520181655884,grad_norm: 0.9999991362846475, iteration: 121913
loss: 1.1165120601654053,grad_norm: 0.9999996418865384, iteration: 121914
loss: 1.1736663579940796,grad_norm: 0.9999999399382868, iteration: 121915
loss: 1.1289986371994019,grad_norm: 0.9999998204344507, iteration: 121916
loss: 1.1561992168426514,grad_norm: 0.9999997706267472, iteration: 121917
loss: 1.1250665187835693,grad_norm: 0.9999994799576127, iteration: 121918
loss: 1.1570688486099243,grad_norm: 0.9999998680883869, iteration: 121919
loss: 1.0424693822860718,grad_norm: 0.9999997957288254, iteration: 121920
loss: 1.0836800336837769,grad_norm: 0.9999999618054497, iteration: 121921
loss: 1.138029932975769,grad_norm: 0.9999997828951847, iteration: 121922
loss: 1.079522728919983,grad_norm: 0.999999735589722, iteration: 121923
loss: 1.0719209909439087,grad_norm: 0.9999995226766721, iteration: 121924
loss: 1.4218310117721558,grad_norm: 0.999999544109415, iteration: 121925
loss: 1.387786865234375,grad_norm: 0.9999998575388648, iteration: 121926
loss: 1.1017804145812988,grad_norm: 0.9999999799633613, iteration: 121927
loss: 1.108015775680542,grad_norm: 0.9999998466917708, iteration: 121928
loss: 1.0553908348083496,grad_norm: 0.9999999189956524, iteration: 121929
loss: 1.1888995170593262,grad_norm: 0.999999876241367, iteration: 121930
loss: 1.0040323734283447,grad_norm: 0.9999990463172709, iteration: 121931
loss: 1.2833846807479858,grad_norm: 0.9999997209758889, iteration: 121932
loss: 1.0498443841934204,grad_norm: 0.9999998636355917, iteration: 121933
loss: 1.0352914333343506,grad_norm: 0.7955354519624208, iteration: 121934
loss: 1.1088504791259766,grad_norm: 0.9999998429357699, iteration: 121935
loss: 1.0058528184890747,grad_norm: 0.9999990877198883, iteration: 121936
loss: 1.0149723291397095,grad_norm: 0.9999999711265618, iteration: 121937
loss: 1.166714072227478,grad_norm: 0.9999998656138102, iteration: 121938
loss: 1.0280842781066895,grad_norm: 0.9999999188186648, iteration: 121939
loss: 1.1909574270248413,grad_norm: 0.9999999790218441, iteration: 121940
loss: 0.9966510534286499,grad_norm: 0.9226152703508246, iteration: 121941
loss: 1.1917113065719604,grad_norm: 0.9999998635140978, iteration: 121942
loss: 1.2364329099655151,grad_norm: 0.9999995346273041, iteration: 121943
loss: 1.0146445035934448,grad_norm: 0.9140444090983528, iteration: 121944
loss: 1.2179661989212036,grad_norm: 0.9999995555644303, iteration: 121945
loss: 1.032737135887146,grad_norm: 0.9999990212388825, iteration: 121946
loss: 1.0409820079803467,grad_norm: 0.9521157262698993, iteration: 121947
loss: 1.0958575010299683,grad_norm: 0.9999994475531776, iteration: 121948
loss: 1.0928232669830322,grad_norm: 0.9999998303675639, iteration: 121949
loss: 1.0438423156738281,grad_norm: 0.9999993067274786, iteration: 121950
loss: 1.190321922302246,grad_norm: 0.9999991907756491, iteration: 121951
loss: 1.3267589807510376,grad_norm: 0.9999998874828037, iteration: 121952
loss: 1.0704418420791626,grad_norm: 0.99999917143218, iteration: 121953
loss: 1.0510622262954712,grad_norm: 0.99999915292708, iteration: 121954
loss: 0.9983335733413696,grad_norm: 0.9999996352705137, iteration: 121955
loss: 1.2049379348754883,grad_norm: 0.9999999168577958, iteration: 121956
loss: 1.0202817916870117,grad_norm: 1.0000000108962066, iteration: 121957
loss: 1.1226825714111328,grad_norm: 0.9999998137026163, iteration: 121958
loss: 1.0530329942703247,grad_norm: 0.999999937714639, iteration: 121959
loss: 1.1488947868347168,grad_norm: 0.9999991070830591, iteration: 121960
loss: 1.1438528299331665,grad_norm: 0.9999997465385071, iteration: 121961
loss: 1.0842180252075195,grad_norm: 0.9999993121996279, iteration: 121962
loss: 1.4146047830581665,grad_norm: 0.9999998804870323, iteration: 121963
loss: 1.0614093542099,grad_norm: 0.9999998704018382, iteration: 121964
loss: 0.9617233276367188,grad_norm: 0.8989936994804072, iteration: 121965
loss: 1.0791871547698975,grad_norm: 0.8798333767995017, iteration: 121966
loss: 1.0448237657546997,grad_norm: 0.8996823996991761, iteration: 121967
loss: 1.3232791423797607,grad_norm: 0.9999999182321857, iteration: 121968
loss: 1.0239126682281494,grad_norm: 0.9999991387974275, iteration: 121969
loss: 1.0997153520584106,grad_norm: 0.9999992725608581, iteration: 121970
loss: 1.049203634262085,grad_norm: 0.9999998480434169, iteration: 121971
loss: 1.1019282341003418,grad_norm: 0.9999992062390924, iteration: 121972
loss: 0.9614485502243042,grad_norm: 0.9999998823415079, iteration: 121973
loss: 1.1201530694961548,grad_norm: 1.0000000590815532, iteration: 121974
loss: 1.0106232166290283,grad_norm: 0.9999994237954897, iteration: 121975
loss: 1.2255815267562866,grad_norm: 0.9999997859974346, iteration: 121976
loss: 1.0633749961853027,grad_norm: 0.9999998404263227, iteration: 121977
loss: 1.0286502838134766,grad_norm: 0.9999993987192416, iteration: 121978
loss: 0.9732728600502014,grad_norm: 0.9217726012057706, iteration: 121979
loss: 0.9923712015151978,grad_norm: 0.8355596972999736, iteration: 121980
loss: 1.0392251014709473,grad_norm: 0.9999993049841835, iteration: 121981
loss: 1.0044549703598022,grad_norm: 0.7382171576773434, iteration: 121982
loss: 1.0274007320404053,grad_norm: 0.9999998165247777, iteration: 121983
loss: 1.0563799142837524,grad_norm: 0.9999991376439044, iteration: 121984
loss: 1.1091805696487427,grad_norm: 0.9999997822292165, iteration: 121985
loss: 1.1103452444076538,grad_norm: 0.9999991614858016, iteration: 121986
loss: 1.18430757522583,grad_norm: 0.9999991057431016, iteration: 121987
loss: 1.144369125366211,grad_norm: 0.9999999069047775, iteration: 121988
loss: 1.067948341369629,grad_norm: 0.9999997911991536, iteration: 121989
loss: 1.0835039615631104,grad_norm: 0.9999999598676607, iteration: 121990
loss: 1.0401822328567505,grad_norm: 0.9999994078967981, iteration: 121991
loss: 1.1600048542022705,grad_norm: 0.9999994428741951, iteration: 121992
loss: 1.0587464570999146,grad_norm: 0.9999998426446909, iteration: 121993
loss: 1.0295114517211914,grad_norm: 0.8522455384561878, iteration: 121994
loss: 1.1357289552688599,grad_norm: 0.9999994756899802, iteration: 121995
loss: 1.0504381656646729,grad_norm: 0.9999994057289706, iteration: 121996
loss: 1.0125190019607544,grad_norm: 0.9999992048704829, iteration: 121997
loss: 1.1407313346862793,grad_norm: 0.9999997310077655, iteration: 121998
loss: 1.0600675344467163,grad_norm: 0.9999997418425823, iteration: 121999
loss: 0.9955966472625732,grad_norm: 0.8604621613677826, iteration: 122000
loss: 1.0325344800949097,grad_norm: 0.8735402906940201, iteration: 122001
loss: 1.0790746212005615,grad_norm: 0.9999994812864152, iteration: 122002
loss: 1.070370078086853,grad_norm: 0.9999992437246223, iteration: 122003
loss: 1.007169485092163,grad_norm: 0.9999991673499697, iteration: 122004
loss: 0.9940498471260071,grad_norm: 0.9999992062018382, iteration: 122005
loss: 1.0496506690979004,grad_norm: 0.9999996406793902, iteration: 122006
loss: 1.0014610290527344,grad_norm: 0.8579263504933705, iteration: 122007
loss: 1.000496745109558,grad_norm: 0.9936579864867601, iteration: 122008
loss: 0.9862304925918579,grad_norm: 0.8414612143255733, iteration: 122009
loss: 1.0286749601364136,grad_norm: 0.9999990596682343, iteration: 122010
loss: 0.9720185995101929,grad_norm: 0.9999990634207746, iteration: 122011
loss: 1.0963541269302368,grad_norm: 0.9999993787386984, iteration: 122012
loss: 1.047538161277771,grad_norm: 0.9999991141371722, iteration: 122013
loss: 1.0001863241195679,grad_norm: 0.9999994996717506, iteration: 122014
loss: 1.1957076787948608,grad_norm: 0.9999997837487518, iteration: 122015
loss: 0.9701393246650696,grad_norm: 0.9999992792605996, iteration: 122016
loss: 1.1419174671173096,grad_norm: 0.9999999633971378, iteration: 122017
loss: 1.0183062553405762,grad_norm: 0.9999990405547515, iteration: 122018
loss: 1.0320003032684326,grad_norm: 0.9999990798111231, iteration: 122019
loss: 1.0597703456878662,grad_norm: 1.0000000056601648, iteration: 122020
loss: 1.0311989784240723,grad_norm: 0.9999990796333971, iteration: 122021
loss: 0.9795731902122498,grad_norm: 0.8436739281573612, iteration: 122022
loss: 1.0896720886230469,grad_norm: 0.9999991796661318, iteration: 122023
loss: 0.9919935464859009,grad_norm: 0.9999989576012381, iteration: 122024
loss: 1.250064730644226,grad_norm: 0.999999805599172, iteration: 122025
loss: 1.088671088218689,grad_norm: 0.969840829773111, iteration: 122026
loss: 0.9362185597419739,grad_norm: 0.9999996804987887, iteration: 122027
loss: 1.0607101917266846,grad_norm: 0.9999999179417705, iteration: 122028
loss: 1.035639762878418,grad_norm: 0.8366268898844971, iteration: 122029
loss: 1.1652981042861938,grad_norm: 0.9999991565764873, iteration: 122030
loss: 1.1029196977615356,grad_norm: 0.999999324193932, iteration: 122031
loss: 1.020075798034668,grad_norm: 0.9999992570377155, iteration: 122032
loss: 0.9845753908157349,grad_norm: 0.9999991416621564, iteration: 122033
loss: 1.048453450202942,grad_norm: 0.8803727355597676, iteration: 122034
loss: 1.0018022060394287,grad_norm: 0.9999996214782618, iteration: 122035
loss: 1.0960959196090698,grad_norm: 0.999999396549015, iteration: 122036
loss: 0.9862582683563232,grad_norm: 0.7795029778488838, iteration: 122037
loss: 1.023166298866272,grad_norm: 0.9999994450983052, iteration: 122038
loss: 1.0307420492172241,grad_norm: 0.9999993412235191, iteration: 122039
loss: 1.0354812145233154,grad_norm: 0.9763972834640329, iteration: 122040
loss: 1.0722618103027344,grad_norm: 0.999999562304736, iteration: 122041
loss: 0.989682674407959,grad_norm: 0.9999994094081183, iteration: 122042
loss: 1.004830002784729,grad_norm: 0.9999990462050791, iteration: 122043
loss: 1.0474873781204224,grad_norm: 0.9999990621881425, iteration: 122044
loss: 1.0366781949996948,grad_norm: 0.9999990809699351, iteration: 122045
loss: 1.01553475856781,grad_norm: 0.9999991328649436, iteration: 122046
loss: 0.9544659852981567,grad_norm: 0.9999991962240674, iteration: 122047
loss: 1.0531120300292969,grad_norm: 0.9999999242749529, iteration: 122048
loss: 0.9768239259719849,grad_norm: 0.9374984601552369, iteration: 122049
loss: 1.0406969785690308,grad_norm: 0.9999991951299283, iteration: 122050
loss: 1.0211457014083862,grad_norm: 0.9999996751349388, iteration: 122051
loss: 1.0141916275024414,grad_norm: 0.9999997381763797, iteration: 122052
loss: 1.0683308839797974,grad_norm: 0.9999998574013402, iteration: 122053
loss: 1.0324809551239014,grad_norm: 0.842912173754999, iteration: 122054
loss: 0.9812921285629272,grad_norm: 0.9015331549151157, iteration: 122055
loss: 1.0135152339935303,grad_norm: 0.6800625927160806, iteration: 122056
loss: 1.0297900438308716,grad_norm: 0.9999994053778706, iteration: 122057
loss: 1.0640453100204468,grad_norm: 0.9999997651936077, iteration: 122058
loss: 1.0648528337478638,grad_norm: 0.9999998154153056, iteration: 122059
loss: 1.0245479345321655,grad_norm: 0.8829459556152428, iteration: 122060
loss: 1.030728816986084,grad_norm: 0.999999233799798, iteration: 122061
loss: 0.9613896608352661,grad_norm: 0.9523262739121888, iteration: 122062
loss: 1.0179097652435303,grad_norm: 0.9918803860231755, iteration: 122063
loss: 1.018755555152893,grad_norm: 0.8513874409009067, iteration: 122064
loss: 1.0023292303085327,grad_norm: 0.9901205389575741, iteration: 122065
loss: 1.0597187280654907,grad_norm: 0.9999990693198829, iteration: 122066
loss: 1.0589121580123901,grad_norm: 0.99999954766167, iteration: 122067
loss: 1.0504740476608276,grad_norm: 0.9999993407383035, iteration: 122068
loss: 1.0088605880737305,grad_norm: 0.9291283477321493, iteration: 122069
loss: 0.9831314086914062,grad_norm: 0.8031025029941888, iteration: 122070
loss: 1.0039197206497192,grad_norm: 0.9038082308178458, iteration: 122071
loss: 1.1556733846664429,grad_norm: 0.9999995772473412, iteration: 122072
loss: 0.9841206073760986,grad_norm: 0.8578903410248735, iteration: 122073
loss: 1.087056040763855,grad_norm: 0.9999997459737621, iteration: 122074
loss: 1.0511951446533203,grad_norm: 0.9999992921216015, iteration: 122075
loss: 1.1381515264511108,grad_norm: 0.9999995412572117, iteration: 122076
loss: 1.0258183479309082,grad_norm: 0.9999995770776469, iteration: 122077
loss: 1.1008814573287964,grad_norm: 0.9999993979978178, iteration: 122078
loss: 1.0138198137283325,grad_norm: 0.9501150058085028, iteration: 122079
loss: 1.0632237195968628,grad_norm: 0.999999392148803, iteration: 122080
loss: 1.0028859376907349,grad_norm: 0.8182128196466061, iteration: 122081
loss: 1.0565966367721558,grad_norm: 0.999999420083218, iteration: 122082
loss: 1.1119221448898315,grad_norm: 0.9999989696643375, iteration: 122083
loss: 1.2154086828231812,grad_norm: 0.9999998253449099, iteration: 122084
loss: 1.0522018671035767,grad_norm: 0.9999990778671772, iteration: 122085
loss: 0.9888729453086853,grad_norm: 0.9999992189848782, iteration: 122086
loss: 1.0511329174041748,grad_norm: 0.9999997136535677, iteration: 122087
loss: 1.0293240547180176,grad_norm: 0.9999998278948494, iteration: 122088
loss: 0.9793222546577454,grad_norm: 0.7939264972352109, iteration: 122089
loss: 0.987734854221344,grad_norm: 0.9999997983274619, iteration: 122090
loss: 1.0132184028625488,grad_norm: 0.9174949957595674, iteration: 122091
loss: 1.0662569999694824,grad_norm: 0.9999992577598811, iteration: 122092
loss: 0.9260804653167725,grad_norm: 0.9999990411170481, iteration: 122093
loss: 1.0865663290023804,grad_norm: 0.9999996910196673, iteration: 122094
loss: 1.0863157510757446,grad_norm: 0.9999998719189943, iteration: 122095
loss: 0.9872317314147949,grad_norm: 0.9968959977590655, iteration: 122096
loss: 0.9532172679901123,grad_norm: 0.9999999070761233, iteration: 122097
loss: 1.0401725769042969,grad_norm: 0.9999999852036637, iteration: 122098
loss: 0.9891989231109619,grad_norm: 0.9999990024451446, iteration: 122099
loss: 1.0047334432601929,grad_norm: 0.9999992382845975, iteration: 122100
loss: 1.1257580518722534,grad_norm: 0.9999998934434268, iteration: 122101
loss: 1.0256905555725098,grad_norm: 0.9999992386328281, iteration: 122102
loss: 1.2798904180526733,grad_norm: 0.9999998897604038, iteration: 122103
loss: 1.0755268335342407,grad_norm: 0.9999998786917874, iteration: 122104
loss: 1.0438710451126099,grad_norm: 0.9999996337594895, iteration: 122105
loss: 1.0693786144256592,grad_norm: 0.9877760483313965, iteration: 122106
loss: 1.055549144744873,grad_norm: 0.9999990035386936, iteration: 122107
loss: 1.415388822555542,grad_norm: 0.9999998851068992, iteration: 122108
loss: 1.1030974388122559,grad_norm: 0.9999999019073277, iteration: 122109
loss: 1.0549241304397583,grad_norm: 0.9999997840308064, iteration: 122110
loss: 1.2247843742370605,grad_norm: 0.9999998801906214, iteration: 122111
loss: 1.1133838891983032,grad_norm: 0.9999995094548763, iteration: 122112
loss: 1.2047597169876099,grad_norm: 0.9999993538361467, iteration: 122113
loss: 1.0013337135314941,grad_norm: 0.9999995721132532, iteration: 122114
loss: 1.0601015090942383,grad_norm: 0.9999991379050736, iteration: 122115
loss: 1.0092155933380127,grad_norm: 0.9087526065946028, iteration: 122116
loss: 1.2627161741256714,grad_norm: 0.9999999967430804, iteration: 122117
loss: 1.0770450830459595,grad_norm: 0.999999925968969, iteration: 122118
loss: 1.3620742559432983,grad_norm: 0.9999997809533641, iteration: 122119
loss: 1.029595136642456,grad_norm: 0.9999998118451981, iteration: 122120
loss: 1.0801525115966797,grad_norm: 0.999999938043605, iteration: 122121
loss: 1.2500345706939697,grad_norm: 0.999999977914232, iteration: 122122
loss: 1.010890245437622,grad_norm: 0.9999998556673878, iteration: 122123
loss: 1.6471447944641113,grad_norm: 0.9999999401401061, iteration: 122124
loss: 1.3049452304840088,grad_norm: 0.9999996498636532, iteration: 122125
loss: 1.0492005348205566,grad_norm: 0.9163372404593566, iteration: 122126
loss: 1.4790900945663452,grad_norm: 0.9999999491799084, iteration: 122127
loss: 1.069948434829712,grad_norm: 0.9999998934804383, iteration: 122128
loss: 1.1265321969985962,grad_norm: 0.9999999760357832, iteration: 122129
loss: 1.0436874628067017,grad_norm: 0.9999998282315169, iteration: 122130
loss: 0.9638171195983887,grad_norm: 0.9999992176479747, iteration: 122131
loss: 1.061055302619934,grad_norm: 0.9080214601735594, iteration: 122132
loss: 1.242462396621704,grad_norm: 0.9999999686494111, iteration: 122133
loss: 1.1266186237335205,grad_norm: 1.0000000193247762, iteration: 122134
loss: 1.1038899421691895,grad_norm: 0.999999417169289, iteration: 122135
loss: 1.0904591083526611,grad_norm: 0.9999998633662285, iteration: 122136
loss: 1.0435019731521606,grad_norm: 0.9999995956803023, iteration: 122137
loss: 1.0610475540161133,grad_norm: 0.9999992600761081, iteration: 122138
loss: 1.018137812614441,grad_norm: 0.9999990772532662, iteration: 122139
loss: 1.025999665260315,grad_norm: 0.999999030814453, iteration: 122140
loss: 1.0601115226745605,grad_norm: 0.9999992165045427, iteration: 122141
loss: 1.0355746746063232,grad_norm: 0.9999994167845738, iteration: 122142
loss: 1.0810166597366333,grad_norm: 0.9999993586173208, iteration: 122143
loss: 1.0148199796676636,grad_norm: 0.9999992298611821, iteration: 122144
loss: 1.1464215517044067,grad_norm: 0.9999998006303444, iteration: 122145
loss: 1.337911605834961,grad_norm: 0.9999996223621113, iteration: 122146
loss: 0.9881237149238586,grad_norm: 0.9999997850963713, iteration: 122147
loss: 1.0487712621688843,grad_norm: 0.9999994350639846, iteration: 122148
loss: 1.0161337852478027,grad_norm: 0.9999996258794449, iteration: 122149
loss: 1.0683742761611938,grad_norm: 0.999999335955219, iteration: 122150
loss: 1.0102585554122925,grad_norm: 0.9999990661864951, iteration: 122151
loss: 1.0689493417739868,grad_norm: 0.9999994591949005, iteration: 122152
loss: 0.9818586111068726,grad_norm: 0.9999994971300789, iteration: 122153
loss: 1.0862027406692505,grad_norm: 0.9999998743614842, iteration: 122154
loss: 1.0393937826156616,grad_norm: 0.9999999926972867, iteration: 122155
loss: 1.2100722789764404,grad_norm: 0.9999997587914138, iteration: 122156
loss: 1.014773964881897,grad_norm: 0.9350435971881754, iteration: 122157
loss: 0.9853770732879639,grad_norm: 0.7676573514273822, iteration: 122158
loss: 1.0147253274917603,grad_norm: 0.9999993022634209, iteration: 122159
loss: 1.0258010625839233,grad_norm: 0.8201190929728407, iteration: 122160
loss: 1.005452036857605,grad_norm: 0.9999995240003728, iteration: 122161
loss: 1.0123342275619507,grad_norm: 0.9611850959966979, iteration: 122162
loss: 1.0997501611709595,grad_norm: 0.9999994503578572, iteration: 122163
loss: 1.1504675149917603,grad_norm: 0.9999998475193351, iteration: 122164
loss: 1.0747960805892944,grad_norm: 0.9999998068803266, iteration: 122165
loss: 1.1157082319259644,grad_norm: 0.9999993264847622, iteration: 122166
loss: 0.9908788800239563,grad_norm: 0.9733841666329027, iteration: 122167
loss: 1.0525630712509155,grad_norm: 0.999999775133901, iteration: 122168
loss: 0.9775587916374207,grad_norm: 0.9722166283665356, iteration: 122169
loss: 1.043401837348938,grad_norm: 0.9999991158518332, iteration: 122170
loss: 1.0078970193862915,grad_norm: 0.8774356094008088, iteration: 122171
loss: 1.1109888553619385,grad_norm: 0.9999992701800261, iteration: 122172
loss: 1.0721538066864014,grad_norm: 0.9999999941258031, iteration: 122173
loss: 1.0144742727279663,grad_norm: 0.999999218988976, iteration: 122174
loss: 1.0146796703338623,grad_norm: 0.7872559611609078, iteration: 122175
loss: 1.0543253421783447,grad_norm: 0.9999995742334216, iteration: 122176
loss: 0.9829692840576172,grad_norm: 0.9999990883534028, iteration: 122177
loss: 1.145788311958313,grad_norm: 0.9999996179520644, iteration: 122178
loss: 0.9852146506309509,grad_norm: 0.8961387857882508, iteration: 122179
loss: 1.1233470439910889,grad_norm: 0.9999996625051842, iteration: 122180
loss: 1.042923092842102,grad_norm: 0.9999997463530781, iteration: 122181
loss: 1.0919909477233887,grad_norm: 0.999999112787068, iteration: 122182
loss: 1.0252803564071655,grad_norm: 0.8373496458155428, iteration: 122183
loss: 1.033048152923584,grad_norm: 0.9315960818864646, iteration: 122184
loss: 1.062782883644104,grad_norm: 0.9999994821228203, iteration: 122185
loss: 1.120184302330017,grad_norm: 0.9999994779931566, iteration: 122186
loss: 1.0100488662719727,grad_norm: 0.9359822362162321, iteration: 122187
loss: 1.0082980394363403,grad_norm: 0.9616540764400117, iteration: 122188
loss: 1.0033038854599,grad_norm: 0.994893543325628, iteration: 122189
loss: 1.018410086631775,grad_norm: 0.9999990785111806, iteration: 122190
loss: 1.0168498754501343,grad_norm: 0.8719829135797029, iteration: 122191
loss: 1.007517695426941,grad_norm: 0.9114565793972186, iteration: 122192
loss: 1.0877580642700195,grad_norm: 0.9999995340076941, iteration: 122193
loss: 1.066779613494873,grad_norm: 0.9999993083722315, iteration: 122194
loss: 0.9889417886734009,grad_norm: 0.8724057286005681, iteration: 122195
loss: 1.1583068370819092,grad_norm: 0.9999994469171525, iteration: 122196
loss: 0.9925184845924377,grad_norm: 0.8975606069068932, iteration: 122197
loss: 1.0038135051727295,grad_norm: 0.853423796708976, iteration: 122198
loss: 1.0027971267700195,grad_norm: 0.8505099124536426, iteration: 122199
loss: 0.9987956881523132,grad_norm: 0.9519306858137728, iteration: 122200
loss: 1.0678213834762573,grad_norm: 0.9999993392472225, iteration: 122201
loss: 1.0813034772872925,grad_norm: 0.9472186475342705, iteration: 122202
loss: 1.052899956703186,grad_norm: 0.8800750888222906, iteration: 122203
loss: 0.9852442741394043,grad_norm: 0.912061301904262, iteration: 122204
loss: 1.1782242059707642,grad_norm: 0.9999998662610681, iteration: 122205
loss: 1.0170366764068604,grad_norm: 0.9999991917481139, iteration: 122206
loss: 1.022020697593689,grad_norm: 0.9084598526745522, iteration: 122207
loss: 1.0412728786468506,grad_norm: 0.8593292323878258, iteration: 122208
loss: 1.0458406209945679,grad_norm: 0.9999990426530541, iteration: 122209
loss: 0.9603387117385864,grad_norm: 0.999999131473983, iteration: 122210
loss: 1.1488604545593262,grad_norm: 0.9999998800364687, iteration: 122211
loss: 1.027806282043457,grad_norm: 0.999999608749413, iteration: 122212
loss: 1.0077488422393799,grad_norm: 0.9999995417066053, iteration: 122213
loss: 1.0203758478164673,grad_norm: 0.9999993108966748, iteration: 122214
loss: 1.0139198303222656,grad_norm: 0.9999993774929905, iteration: 122215
loss: 1.046446681022644,grad_norm: 0.9573830093103288, iteration: 122216
loss: 1.0435349941253662,grad_norm: 0.9999998019161893, iteration: 122217
loss: 1.010867953300476,grad_norm: 0.9901195829364017, iteration: 122218
loss: 1.0207198858261108,grad_norm: 0.8573177618413919, iteration: 122219
loss: 1.063084602355957,grad_norm: 0.9999996054301591, iteration: 122220
loss: 1.0072578191757202,grad_norm: 0.8287914163314891, iteration: 122221
loss: 0.992209255695343,grad_norm: 0.9999992914818002, iteration: 122222
loss: 1.0294749736785889,grad_norm: 0.9566394424317387, iteration: 122223
loss: 0.9884724020957947,grad_norm: 0.7679734440026226, iteration: 122224
loss: 0.9867799282073975,grad_norm: 0.8734521533728279, iteration: 122225
loss: 1.0151300430297852,grad_norm: 0.9264472430049738, iteration: 122226
loss: 0.9787728786468506,grad_norm: 0.9999994385243136, iteration: 122227
loss: 1.030629277229309,grad_norm: 0.9999993029504408, iteration: 122228
loss: 1.0143033266067505,grad_norm: 0.8721374560781823, iteration: 122229
loss: 1.1383464336395264,grad_norm: 0.9999991781148244, iteration: 122230
loss: 1.0802857875823975,grad_norm: 0.9624864872236565, iteration: 122231
loss: 1.0491690635681152,grad_norm: 0.9999991706285801, iteration: 122232
loss: 1.023941159248352,grad_norm: 0.8343409497488612, iteration: 122233
loss: 1.0276962518692017,grad_norm: 0.7988007480157893, iteration: 122234
loss: 1.0737478733062744,grad_norm: 0.9999991002349385, iteration: 122235
loss: 1.1020723581314087,grad_norm: 0.9999990868109032, iteration: 122236
loss: 1.0995680093765259,grad_norm: 0.9999990656935863, iteration: 122237
loss: 1.026573896408081,grad_norm: 0.9841202548437693, iteration: 122238
loss: 1.0193451642990112,grad_norm: 0.9844053309150479, iteration: 122239
loss: 0.9706733226776123,grad_norm: 0.9999992990012879, iteration: 122240
loss: 1.1228359937667847,grad_norm: 0.9999998423401482, iteration: 122241
loss: 0.9661699533462524,grad_norm: 0.8334662318123587, iteration: 122242
loss: 0.9595544934272766,grad_norm: 0.9538269592048226, iteration: 122243
loss: 1.0416315793991089,grad_norm: 0.8473339582272708, iteration: 122244
loss: 1.0258636474609375,grad_norm: 0.9712304718624583, iteration: 122245
loss: 1.050601840019226,grad_norm: 0.9999997615867164, iteration: 122246
loss: 0.9901701211929321,grad_norm: 0.8986415529666929, iteration: 122247
loss: 0.9781193733215332,grad_norm: 0.9788008176412282, iteration: 122248
loss: 1.14227294921875,grad_norm: 0.9999994171892451, iteration: 122249
loss: 1.0737653970718384,grad_norm: 0.9787691620259, iteration: 122250
loss: 1.1243107318878174,grad_norm: 0.9999992176113028, iteration: 122251
loss: 0.9769412875175476,grad_norm: 0.932204656752741, iteration: 122252
loss: 1.02035653591156,grad_norm: 0.9999991269649188, iteration: 122253
loss: 1.0276753902435303,grad_norm: 0.9999995306220416, iteration: 122254
loss: 0.9736464619636536,grad_norm: 0.9257625768297515, iteration: 122255
loss: 1.0362741947174072,grad_norm: 0.9754955193479232, iteration: 122256
loss: 1.0081729888916016,grad_norm: 0.6895161954669017, iteration: 122257
loss: 1.0043431520462036,grad_norm: 0.8521372062111007, iteration: 122258
loss: 1.0362019538879395,grad_norm: 0.9999995720680551, iteration: 122259
loss: 0.9777835607528687,grad_norm: 0.9999991249913494, iteration: 122260
loss: 1.0110725164413452,grad_norm: 0.9999997392646134, iteration: 122261
loss: 1.0007368326187134,grad_norm: 0.705196723193136, iteration: 122262
loss: 1.1218520402908325,grad_norm: 0.9999990141167219, iteration: 122263
loss: 1.0085906982421875,grad_norm: 0.9999996313139909, iteration: 122264
loss: 1.0101208686828613,grad_norm: 0.9741749609692506, iteration: 122265
loss: 1.0369415283203125,grad_norm: 0.9999997087103328, iteration: 122266
loss: 1.0236111879348755,grad_norm: 0.9387193123625087, iteration: 122267
loss: 1.0227630138397217,grad_norm: 0.9179104741589242, iteration: 122268
loss: 1.135087251663208,grad_norm: 0.9999999253151047, iteration: 122269
loss: 0.990031361579895,grad_norm: 0.9999998602528214, iteration: 122270
loss: 1.1244733333587646,grad_norm: 0.9999997418501789, iteration: 122271
loss: 0.987019956111908,grad_norm: 0.8232961587009896, iteration: 122272
loss: 1.1189171075820923,grad_norm: 0.8696155886356438, iteration: 122273
loss: 1.0587599277496338,grad_norm: 0.879922733211715, iteration: 122274
loss: 1.0006389617919922,grad_norm: 0.9075299217328834, iteration: 122275
loss: 1.0173028707504272,grad_norm: 0.9999991940468645, iteration: 122276
loss: 0.9972010254859924,grad_norm: 0.9999996854605753, iteration: 122277
loss: 1.000301480293274,grad_norm: 0.8430937262687842, iteration: 122278
loss: 1.030388593673706,grad_norm: 0.9999991623845602, iteration: 122279
loss: 1.0677908658981323,grad_norm: 0.9999998541313465, iteration: 122280
loss: 1.010475516319275,grad_norm: 0.9999991326882766, iteration: 122281
loss: 1.036105751991272,grad_norm: 0.9068537594627767, iteration: 122282
loss: 0.9701829552650452,grad_norm: 0.8242562417375127, iteration: 122283
loss: 1.0079697370529175,grad_norm: 0.9999991671521652, iteration: 122284
loss: 1.1077040433883667,grad_norm: 0.9999995542136739, iteration: 122285
loss: 1.0039269924163818,grad_norm: 0.9999993044035688, iteration: 122286
loss: 1.0178923606872559,grad_norm: 0.9765997727785517, iteration: 122287
loss: 1.0411018133163452,grad_norm: 0.9999996590124687, iteration: 122288
loss: 1.152398705482483,grad_norm: 0.9073744110136681, iteration: 122289
loss: 1.130228042602539,grad_norm: 0.9999998524519692, iteration: 122290
loss: 1.1437315940856934,grad_norm: 0.9999999131411221, iteration: 122291
loss: 1.0034376382827759,grad_norm: 0.8600496389164883, iteration: 122292
loss: 0.9996688961982727,grad_norm: 0.8040696359774442, iteration: 122293
loss: 1.0267430543899536,grad_norm: 0.8343126038148198, iteration: 122294
loss: 1.1465766429901123,grad_norm: 0.9999998094580967, iteration: 122295
loss: 0.9986153244972229,grad_norm: 0.9999990360429372, iteration: 122296
loss: 1.2027008533477783,grad_norm: 0.9999999587936722, iteration: 122297
loss: 0.985564112663269,grad_norm: 0.836493931955045, iteration: 122298
loss: 1.018366813659668,grad_norm: 0.9399185925355429, iteration: 122299
loss: 0.9821823835372925,grad_norm: 0.962289435295136, iteration: 122300
loss: 1.0129178762435913,grad_norm: 0.8764259244802701, iteration: 122301
loss: 1.2142064571380615,grad_norm: 0.9999996208154244, iteration: 122302
loss: 1.0086034536361694,grad_norm: 0.9357363060754841, iteration: 122303
loss: 1.0208499431610107,grad_norm: 0.9999990950508829, iteration: 122304
loss: 1.1602778434753418,grad_norm: 0.9999992408457183, iteration: 122305
loss: 1.0244567394256592,grad_norm: 0.9999993789221051, iteration: 122306
loss: 1.0113064050674438,grad_norm: 0.9999998974219245, iteration: 122307
loss: 1.0342990159988403,grad_norm: 0.883537206448718, iteration: 122308
loss: 0.9853367209434509,grad_norm: 0.8730521887960606, iteration: 122309
loss: 0.9979313015937805,grad_norm: 0.9585214440265915, iteration: 122310
loss: 1.041090965270996,grad_norm: 0.9859517741333911, iteration: 122311
loss: 1.00557279586792,grad_norm: 0.9271333192399623, iteration: 122312
loss: 1.1000583171844482,grad_norm: 0.999999292525545, iteration: 122313
loss: 1.013230800628662,grad_norm: 0.9999994713519774, iteration: 122314
loss: 0.9836142063140869,grad_norm: 0.7396941637083287, iteration: 122315
loss: 1.0822288990020752,grad_norm: 0.9999998438047254, iteration: 122316
loss: 0.9791660308837891,grad_norm: 0.9137386003251089, iteration: 122317
loss: 1.100961685180664,grad_norm: 0.9999995168474115, iteration: 122318
loss: 1.0449438095092773,grad_norm: 0.9999989998716207, iteration: 122319
loss: 1.0068849325180054,grad_norm: 0.8838004346362823, iteration: 122320
loss: 1.2040518522262573,grad_norm: 0.9999998185514575, iteration: 122321
loss: 1.0469162464141846,grad_norm: 0.8885063412169237, iteration: 122322
loss: 0.9509223103523254,grad_norm: 0.999999240918015, iteration: 122323
loss: 0.956699550151825,grad_norm: 0.9352182291707057, iteration: 122324
loss: 1.1529313325881958,grad_norm: 0.9999992563857789, iteration: 122325
loss: 0.9910921454429626,grad_norm: 0.8036147520877953, iteration: 122326
loss: 1.0131174325942993,grad_norm: 0.7874333663606916, iteration: 122327
loss: 0.9662642478942871,grad_norm: 0.8369316216743372, iteration: 122328
loss: 1.0007261037826538,grad_norm: 0.8300037033049664, iteration: 122329
loss: 1.052429437637329,grad_norm: 0.8860890336026798, iteration: 122330
loss: 1.0333428382873535,grad_norm: 0.9999998611159979, iteration: 122331
loss: 1.0042123794555664,grad_norm: 0.8524604711356871, iteration: 122332
loss: 1.041517734527588,grad_norm: 0.9999997568160848, iteration: 122333
loss: 0.9849643111228943,grad_norm: 0.8794947395296232, iteration: 122334
loss: 0.967340350151062,grad_norm: 0.9544043515369425, iteration: 122335
loss: 1.0333197116851807,grad_norm: 0.999999077858422, iteration: 122336
loss: 0.99420565366745,grad_norm: 0.9068816011039692, iteration: 122337
loss: 0.9863339066505432,grad_norm: 0.9999994457928938, iteration: 122338
loss: 1.0129066705703735,grad_norm: 0.9375930109938715, iteration: 122339
loss: 1.0273765325546265,grad_norm: 0.9999990822529502, iteration: 122340
loss: 1.0200037956237793,grad_norm: 0.8884880237479751, iteration: 122341
loss: 1.0049351453781128,grad_norm: 0.999999091154324, iteration: 122342
loss: 1.0348079204559326,grad_norm: 0.9999991425797772, iteration: 122343
loss: 0.9633215069770813,grad_norm: 0.846908412085766, iteration: 122344
loss: 0.9649198055267334,grad_norm: 0.9996782577603617, iteration: 122345
loss: 0.9870413541793823,grad_norm: 0.8512634295140213, iteration: 122346
loss: 1.0157842636108398,grad_norm: 0.8108898592676023, iteration: 122347
loss: 0.9980788826942444,grad_norm: 0.9999990738088401, iteration: 122348
loss: 1.0062810182571411,grad_norm: 0.9098270270871087, iteration: 122349
loss: 1.041900873184204,grad_norm: 0.9574372827260292, iteration: 122350
loss: 1.0284473896026611,grad_norm: 0.99999939740793, iteration: 122351
loss: 1.0027472972869873,grad_norm: 0.9999990541236037, iteration: 122352
loss: 1.0039572715759277,grad_norm: 0.8679790457518664, iteration: 122353
loss: 0.9933262467384338,grad_norm: 0.8766910871664843, iteration: 122354
loss: 1.0386199951171875,grad_norm: 0.9999995406778857, iteration: 122355
loss: 0.9803714752197266,grad_norm: 0.9999989425480106, iteration: 122356
loss: 0.9885070323944092,grad_norm: 0.9539072018604227, iteration: 122357
loss: 0.9939724206924438,grad_norm: 0.8490045417171017, iteration: 122358
loss: 1.018566608428955,grad_norm: 0.9999991532254404, iteration: 122359
loss: 1.0651425123214722,grad_norm: 0.9999996527913319, iteration: 122360
loss: 0.9574807286262512,grad_norm: 0.9999998130602302, iteration: 122361
loss: 1.0365432500839233,grad_norm: 0.9999991713463598, iteration: 122362
loss: 0.9942571520805359,grad_norm: 0.8563508347297931, iteration: 122363
loss: 0.993130087852478,grad_norm: 0.8642105479234616, iteration: 122364
loss: 1.072310447692871,grad_norm: 0.9999998368397818, iteration: 122365
loss: 0.9910261631011963,grad_norm: 0.9999990274910234, iteration: 122366
loss: 1.1473186016082764,grad_norm: 0.999999885157181, iteration: 122367
loss: 0.95623779296875,grad_norm: 0.8854586314600278, iteration: 122368
loss: 1.1611506938934326,grad_norm: 0.999999977001749, iteration: 122369
loss: 1.0530260801315308,grad_norm: 0.9999991930694929, iteration: 122370
loss: 0.9670055508613586,grad_norm: 0.9999991880462737, iteration: 122371
loss: 1.017095685005188,grad_norm: 0.9999995957792801, iteration: 122372
loss: 0.985198974609375,grad_norm: 0.8188372800017708, iteration: 122373
loss: 0.9815738797187805,grad_norm: 0.8651400153203324, iteration: 122374
loss: 1.0945652723312378,grad_norm: 0.9999993727582895, iteration: 122375
loss: 1.0205239057540894,grad_norm: 0.9348378925652794, iteration: 122376
loss: 0.9918343424797058,grad_norm: 0.9671047728284037, iteration: 122377
loss: 1.034875512123108,grad_norm: 0.9999993092753783, iteration: 122378
loss: 1.0937680006027222,grad_norm: 1.000000049294173, iteration: 122379
loss: 1.0257850885391235,grad_norm: 0.863949996707392, iteration: 122380
loss: 1.0199403762817383,grad_norm: 0.9193212592722899, iteration: 122381
loss: 1.1000863313674927,grad_norm: 0.999999212960203, iteration: 122382
loss: 1.0136922597885132,grad_norm: 0.9859273882727864, iteration: 122383
loss: 1.0389823913574219,grad_norm: 0.9999997815814755, iteration: 122384
loss: 1.0328975915908813,grad_norm: 0.9999997475181526, iteration: 122385
loss: 1.0009392499923706,grad_norm: 0.9562463339059942, iteration: 122386
loss: 1.0018025636672974,grad_norm: 0.8742243351667237, iteration: 122387
loss: 1.007517695426941,grad_norm: 0.9999991860764789, iteration: 122388
loss: 1.0025529861450195,grad_norm: 0.999999710809776, iteration: 122389
loss: 0.9707808494567871,grad_norm: 0.7363115505036854, iteration: 122390
loss: 0.9862056970596313,grad_norm: 0.8431357023707257, iteration: 122391
loss: 0.9819124937057495,grad_norm: 0.9999990222221737, iteration: 122392
loss: 0.9735772609710693,grad_norm: 0.8450428559300849, iteration: 122393
loss: 1.036124348640442,grad_norm: 0.8480656489676047, iteration: 122394
loss: 1.125916838645935,grad_norm: 1.0000000359321408, iteration: 122395
loss: 1.0311064720153809,grad_norm: 0.6884910928906585, iteration: 122396
loss: 1.038717269897461,grad_norm: 0.9999990766161047, iteration: 122397
loss: 1.0574045181274414,grad_norm: 0.8310151508551823, iteration: 122398
loss: 1.0642988681793213,grad_norm: 0.8165382994023487, iteration: 122399
loss: 1.0330150127410889,grad_norm: 0.9999992280836455, iteration: 122400
loss: 0.9985463619232178,grad_norm: 0.8185198297803838, iteration: 122401
loss: 0.9974789619445801,grad_norm: 0.9999990424774147, iteration: 122402
loss: 1.0346215963363647,grad_norm: 0.9590864206375309, iteration: 122403
loss: 0.9977909922599792,grad_norm: 0.7428865021683335, iteration: 122404
loss: 1.1103318929672241,grad_norm: 0.9999997548875754, iteration: 122405
loss: 1.0258734226226807,grad_norm: 0.9456164947148784, iteration: 122406
loss: 1.01175057888031,grad_norm: 0.9241249501862073, iteration: 122407
loss: 1.02866530418396,grad_norm: 0.999999351345728, iteration: 122408
loss: 1.0452381372451782,grad_norm: 0.9017846707603481, iteration: 122409
loss: 1.0032408237457275,grad_norm: 0.9999996117336035, iteration: 122410
loss: 0.9818112254142761,grad_norm: 0.9290828667670277, iteration: 122411
loss: 1.0069787502288818,grad_norm: 0.9999989986167359, iteration: 122412
loss: 0.9922614097595215,grad_norm: 0.9999994491432235, iteration: 122413
loss: 1.0388307571411133,grad_norm: 0.9999992028857716, iteration: 122414
loss: 1.0367894172668457,grad_norm: 0.9999999585557644, iteration: 122415
loss: 1.0175625085830688,grad_norm: 0.9999991221874739, iteration: 122416
loss: 1.0770268440246582,grad_norm: 0.9999992492834633, iteration: 122417
loss: 1.0215346813201904,grad_norm: 0.9802595916791355, iteration: 122418
loss: 0.9577593803405762,grad_norm: 0.9765998028640769, iteration: 122419
loss: 0.9786061644554138,grad_norm: 0.8564396419652135, iteration: 122420
loss: 1.002927541732788,grad_norm: 0.9999991866072413, iteration: 122421
loss: 0.9921771287918091,grad_norm: 0.9999992212019652, iteration: 122422
loss: 0.9772611260414124,grad_norm: 0.7552326214467155, iteration: 122423
loss: 0.9982286691665649,grad_norm: 0.9999997785627038, iteration: 122424
loss: 1.066096544265747,grad_norm: 0.9999998626725486, iteration: 122425
loss: 1.065407395362854,grad_norm: 0.9999999610178372, iteration: 122426
loss: 1.0185836553573608,grad_norm: 0.8711082399413391, iteration: 122427
loss: 1.0477710962295532,grad_norm: 0.9999994897882762, iteration: 122428
loss: 1.060355544090271,grad_norm: 0.9999995230694424, iteration: 122429
loss: 1.1733620166778564,grad_norm: 0.9999996270561284, iteration: 122430
loss: 1.0105327367782593,grad_norm: 0.8385453093904944, iteration: 122431
loss: 1.0318650007247925,grad_norm: 0.9999990768968148, iteration: 122432
loss: 0.9971274137496948,grad_norm: 0.786789684937729, iteration: 122433
loss: 1.025496482849121,grad_norm: 0.9629315095987682, iteration: 122434
loss: 1.0222585201263428,grad_norm: 0.9999991355997088, iteration: 122435
loss: 1.0212438106536865,grad_norm: 0.9999993168088764, iteration: 122436
loss: 0.9758893847465515,grad_norm: 0.7801678553219943, iteration: 122437
loss: 1.0515937805175781,grad_norm: 0.9999992797769424, iteration: 122438
loss: 1.0614620447158813,grad_norm: 0.9362481788551299, iteration: 122439
loss: 1.0158977508544922,grad_norm: 0.9999991992475484, iteration: 122440
loss: 1.0672607421875,grad_norm: 0.9999993188625723, iteration: 122441
loss: 0.9854292869567871,grad_norm: 0.6940371526831776, iteration: 122442
loss: 0.9801586866378784,grad_norm: 0.9875785991247359, iteration: 122443
loss: 0.976653516292572,grad_norm: 0.8644621586450066, iteration: 122444
loss: 0.9403776526451111,grad_norm: 0.9285710139383754, iteration: 122445
loss: 0.9871810674667358,grad_norm: 0.9708400256358247, iteration: 122446
loss: 1.058112621307373,grad_norm: 0.9921467271932892, iteration: 122447
loss: 1.0671766996383667,grad_norm: 0.999999449594969, iteration: 122448
loss: 1.0407829284667969,grad_norm: 0.9137753874081782, iteration: 122449
loss: 0.9931029081344604,grad_norm: 0.7906267025171827, iteration: 122450
loss: 1.0279358625411987,grad_norm: 0.9999997428350802, iteration: 122451
loss: 1.0152539014816284,grad_norm: 0.8103715788968999, iteration: 122452
loss: 1.0760743618011475,grad_norm: 0.9999994885949179, iteration: 122453
loss: 1.00223970413208,grad_norm: 0.999999227246251, iteration: 122454
loss: 1.1421103477478027,grad_norm: 0.9999995395272289, iteration: 122455
loss: 1.003426432609558,grad_norm: 0.9145413557133556, iteration: 122456
loss: 1.1070501804351807,grad_norm: 0.9999991372916202, iteration: 122457
loss: 1.0129510164260864,grad_norm: 0.9237737578716181, iteration: 122458
loss: 0.9950074553489685,grad_norm: 0.9999991144112842, iteration: 122459
loss: 0.9723030924797058,grad_norm: 0.9999999100327228, iteration: 122460
loss: 1.0814435482025146,grad_norm: 0.999999020111849, iteration: 122461
loss: 1.0162875652313232,grad_norm: 0.9999991583423369, iteration: 122462
loss: 1.0351274013519287,grad_norm: 0.9999990203237529, iteration: 122463
loss: 1.0399302244186401,grad_norm: 0.9999999072624772, iteration: 122464
loss: 1.0206395387649536,grad_norm: 0.9381927529912474, iteration: 122465
loss: 1.029075264930725,grad_norm: 0.7997173430120593, iteration: 122466
loss: 0.9827731847763062,grad_norm: 0.9225088279017816, iteration: 122467
loss: 0.9909518957138062,grad_norm: 0.9240438485390823, iteration: 122468
loss: 0.975283682346344,grad_norm: 0.8971931983181684, iteration: 122469
loss: 0.9781495928764343,grad_norm: 0.8003851916733793, iteration: 122470
loss: 1.0241883993148804,grad_norm: 0.8892096164817577, iteration: 122471
loss: 1.0171167850494385,grad_norm: 0.9098909301528635, iteration: 122472
loss: 1.0683060884475708,grad_norm: 0.9405805962376798, iteration: 122473
loss: 0.9862119555473328,grad_norm: 0.8353921938824983, iteration: 122474
loss: 0.9909217357635498,grad_norm: 0.7594244524298183, iteration: 122475
loss: 1.1669100522994995,grad_norm: 0.9999997397375828, iteration: 122476
loss: 0.9801825881004333,grad_norm: 0.999999554310397, iteration: 122477
loss: 1.0327214002609253,grad_norm: 0.9999995098248169, iteration: 122478
loss: 1.122080683708191,grad_norm: 0.9999990299346501, iteration: 122479
loss: 1.048126459121704,grad_norm: 0.9999992252280758, iteration: 122480
loss: 1.0022542476654053,grad_norm: 0.9999992342435063, iteration: 122481
loss: 1.000781536102295,grad_norm: 0.8493343154209548, iteration: 122482
loss: 1.038277268409729,grad_norm: 0.9999990578059685, iteration: 122483
loss: 0.9811773896217346,grad_norm: 0.7904300795795566, iteration: 122484
loss: 1.0486067533493042,grad_norm: 0.8198242243462504, iteration: 122485
loss: 1.0676119327545166,grad_norm: 0.9999995709701747, iteration: 122486
loss: 1.0462448596954346,grad_norm: 0.8101557852883995, iteration: 122487
loss: 1.0085457563400269,grad_norm: 0.999999041433221, iteration: 122488
loss: 1.013358235359192,grad_norm: 0.9408348967991615, iteration: 122489
loss: 0.9912176728248596,grad_norm: 0.9999990341707045, iteration: 122490
loss: 0.9948482513427734,grad_norm: 0.9182360143687657, iteration: 122491
loss: 0.993556559085846,grad_norm: 0.8151756024714703, iteration: 122492
loss: 1.0039258003234863,grad_norm: 0.9490938838296429, iteration: 122493
loss: 1.01606023311615,grad_norm: 0.9999991925646319, iteration: 122494
loss: 1.1239683628082275,grad_norm: 0.9999995168677063, iteration: 122495
loss: 1.002417802810669,grad_norm: 0.7182374927737495, iteration: 122496
loss: 1.060573935508728,grad_norm: 0.9999997274574342, iteration: 122497
loss: 0.9829986691474915,grad_norm: 0.9999991174836876, iteration: 122498
loss: 0.9676725268363953,grad_norm: 0.872601964321572, iteration: 122499
loss: 1.0347166061401367,grad_norm: 0.8526317486484204, iteration: 122500
loss: 0.9774819016456604,grad_norm: 0.999999142697347, iteration: 122501
loss: 0.9810628890991211,grad_norm: 0.9204458146573272, iteration: 122502
loss: 1.0149327516555786,grad_norm: 0.9999993122604658, iteration: 122503
loss: 1.0204812288284302,grad_norm: 0.7977948604387101, iteration: 122504
loss: 1.0841574668884277,grad_norm: 0.9999990699698292, iteration: 122505
loss: 0.9923104643821716,grad_norm: 0.9999998548068217, iteration: 122506
loss: 0.9876708984375,grad_norm: 0.8103262418941812, iteration: 122507
loss: 1.0592584609985352,grad_norm: 0.9999991609753182, iteration: 122508
loss: 1.0073659420013428,grad_norm: 0.999999155524776, iteration: 122509
loss: 1.00341796875,grad_norm: 0.9999990058053425, iteration: 122510
loss: 1.0388883352279663,grad_norm: 0.9999993539928217, iteration: 122511
loss: 1.012284278869629,grad_norm: 0.7773429684570763, iteration: 122512
loss: 1.0332989692687988,grad_norm: 0.9874926815973364, iteration: 122513
loss: 1.000878930091858,grad_norm: 0.922391282131868, iteration: 122514
loss: 0.9876085519790649,grad_norm: 0.9289279134790925, iteration: 122515
loss: 0.9572737216949463,grad_norm: 0.9999991763838985, iteration: 122516
loss: 1.0553057193756104,grad_norm: 0.9999992685227286, iteration: 122517
loss: 1.0483424663543701,grad_norm: 0.9999998083517371, iteration: 122518
loss: 0.9842923879623413,grad_norm: 0.9824193211514711, iteration: 122519
loss: 1.0074735879898071,grad_norm: 0.9999991240146818, iteration: 122520
loss: 0.9974893927574158,grad_norm: 0.9753703309103948, iteration: 122521
loss: 1.021020531654358,grad_norm: 0.8884120494751916, iteration: 122522
loss: 1.0900869369506836,grad_norm: 0.9999994128678786, iteration: 122523
loss: 1.0550014972686768,grad_norm: 0.999999440053739, iteration: 122524
loss: 1.039207935333252,grad_norm: 0.999999166971828, iteration: 122525
loss: 1.0024768114089966,grad_norm: 0.9999990843617781, iteration: 122526
loss: 0.9744171500205994,grad_norm: 0.8638722839046424, iteration: 122527
loss: 1.0411229133605957,grad_norm: 0.9999992674999512, iteration: 122528
loss: 1.007430911064148,grad_norm: 0.9999992130573067, iteration: 122529
loss: 1.0233274698257446,grad_norm: 0.9999999305609228, iteration: 122530
loss: 1.027355432510376,grad_norm: 0.8685873545172668, iteration: 122531
loss: 0.993852972984314,grad_norm: 0.9999990318328442, iteration: 122532
loss: 0.9922011494636536,grad_norm: 0.9565882864008798, iteration: 122533
loss: 0.9812818765640259,grad_norm: 0.9751710910339157, iteration: 122534
loss: 1.023905873298645,grad_norm: 0.9741914595314879, iteration: 122535
loss: 1.0316780805587769,grad_norm: 0.9836039457711018, iteration: 122536
loss: 0.9972476363182068,grad_norm: 0.8668044356940536, iteration: 122537
loss: 1.0128549337387085,grad_norm: 0.9520601155288053, iteration: 122538
loss: 1.0077005624771118,grad_norm: 0.7836928446102815, iteration: 122539
loss: 1.0059489011764526,grad_norm: 0.8388129545761858, iteration: 122540
loss: 1.0302914381027222,grad_norm: 0.8803816539623434, iteration: 122541
loss: 1.0516961812973022,grad_norm: 0.9999995444678056, iteration: 122542
loss: 0.9952747225761414,grad_norm: 0.8629725176322336, iteration: 122543
loss: 1.0471394062042236,grad_norm: 0.9582796192358105, iteration: 122544
loss: 1.0443923473358154,grad_norm: 0.9999990862644649, iteration: 122545
loss: 0.9975658059120178,grad_norm: 0.9999992055787035, iteration: 122546
loss: 0.9917224645614624,grad_norm: 0.9943830993808239, iteration: 122547
loss: 1.0422844886779785,grad_norm: 0.9999990762624201, iteration: 122548
loss: 1.048638939857483,grad_norm: 0.9999991403853477, iteration: 122549
loss: 1.1091476678848267,grad_norm: 0.9999992607285922, iteration: 122550
loss: 1.0125521421432495,grad_norm: 0.923279454395137, iteration: 122551
loss: 1.0186200141906738,grad_norm: 0.9999996397319441, iteration: 122552
loss: 1.0173671245574951,grad_norm: 0.9999991159810594, iteration: 122553
loss: 1.014195203781128,grad_norm: 0.99999910673677, iteration: 122554
loss: 1.018960952758789,grad_norm: 0.7983992988709439, iteration: 122555
loss: 0.9867518544197083,grad_norm: 0.999999688160723, iteration: 122556
loss: 0.9647045731544495,grad_norm: 0.8407834181243397, iteration: 122557
loss: 1.0266640186309814,grad_norm: 0.9999991797260404, iteration: 122558
loss: 1.006988286972046,grad_norm: 0.9999991891475893, iteration: 122559
loss: 0.9777514338493347,grad_norm: 0.9165271242219462, iteration: 122560
loss: 0.9880853295326233,grad_norm: 0.9821431213853427, iteration: 122561
loss: 0.9695461392402649,grad_norm: 0.997236922471582, iteration: 122562
loss: 1.0203793048858643,grad_norm: 0.7790634072475293, iteration: 122563
loss: 1.1384485960006714,grad_norm: 0.9999998560668675, iteration: 122564
loss: 1.004041075706482,grad_norm: 0.8629034352770053, iteration: 122565
loss: 1.0318506956100464,grad_norm: 0.7912212033528575, iteration: 122566
loss: 1.037280797958374,grad_norm: 0.9287597288818386, iteration: 122567
loss: 1.0442330837249756,grad_norm: 0.8889057254348299, iteration: 122568
loss: 0.9662138223648071,grad_norm: 0.9876332125192303, iteration: 122569
loss: 0.9838683605194092,grad_norm: 0.9076514429501586, iteration: 122570
loss: 1.01206636428833,grad_norm: 0.8512479522557124, iteration: 122571
loss: 1.020332932472229,grad_norm: 0.9999994061945179, iteration: 122572
loss: 1.0025968551635742,grad_norm: 0.8285586959750891, iteration: 122573
loss: 1.0521808862686157,grad_norm: 0.9999989820692098, iteration: 122574
loss: 1.0614709854125977,grad_norm: 0.9999992095997885, iteration: 122575
loss: 0.9923121929168701,grad_norm: 0.7729926608614698, iteration: 122576
loss: 1.086493730545044,grad_norm: 0.9999997891863323, iteration: 122577
loss: 1.029930591583252,grad_norm: 0.9999997182565225, iteration: 122578
loss: 1.011274814605713,grad_norm: 0.9999990740101038, iteration: 122579
loss: 1.0123493671417236,grad_norm: 0.923195872575218, iteration: 122580
loss: 1.029771327972412,grad_norm: 0.952682780008967, iteration: 122581
loss: 1.007204294204712,grad_norm: 0.8417740378851319, iteration: 122582
loss: 1.0207295417785645,grad_norm: 0.9999995794603603, iteration: 122583
loss: 1.0626778602600098,grad_norm: 0.9999989846557382, iteration: 122584
loss: 1.1064674854278564,grad_norm: 0.9999992016057407, iteration: 122585
loss: 1.0388782024383545,grad_norm: 0.9999991750205267, iteration: 122586
loss: 1.021079182624817,grad_norm: 0.9999998005247985, iteration: 122587
loss: 1.0176891088485718,grad_norm: 0.9439325728805166, iteration: 122588
loss: 0.9965924620628357,grad_norm: 0.9773677220459324, iteration: 122589
loss: 1.0188055038452148,grad_norm: 0.9999991853483703, iteration: 122590
loss: 0.9946022033691406,grad_norm: 0.9285746524866493, iteration: 122591
loss: 0.9870328903198242,grad_norm: 0.9999989580463655, iteration: 122592
loss: 1.006209373474121,grad_norm: 0.8294186945011438, iteration: 122593
loss: 0.9750738143920898,grad_norm: 0.9604838664059663, iteration: 122594
loss: 0.9875863194465637,grad_norm: 0.9999990980303355, iteration: 122595
loss: 1.0110442638397217,grad_norm: 0.9986916392729148, iteration: 122596
loss: 1.0049554109573364,grad_norm: 0.9999996941103786, iteration: 122597
loss: 1.0021946430206299,grad_norm: 0.9373564283463162, iteration: 122598
loss: 1.0322624444961548,grad_norm: 0.9999993741400784, iteration: 122599
loss: 1.0141606330871582,grad_norm: 0.8707337160713551, iteration: 122600
loss: 0.9902553558349609,grad_norm: 0.9752308484041164, iteration: 122601
loss: 1.000770926475525,grad_norm: 0.8816917203239155, iteration: 122602
loss: 0.9986945390701294,grad_norm: 0.9567288862130261, iteration: 122603
loss: 0.9980669021606445,grad_norm: 0.879859828983293, iteration: 122604
loss: 1.0053787231445312,grad_norm: 0.9999993223845954, iteration: 122605
loss: 0.9847205281257629,grad_norm: 0.996159477442626, iteration: 122606
loss: 1.0408321619033813,grad_norm: 0.999999743361764, iteration: 122607
loss: 1.022327184677124,grad_norm: 0.8225935429006608, iteration: 122608
loss: 1.0083788633346558,grad_norm: 0.8989322319518809, iteration: 122609
loss: 1.0394777059555054,grad_norm: 0.9999990167157676, iteration: 122610
loss: 0.996551513671875,grad_norm: 0.9258125402245179, iteration: 122611
loss: 0.9875206351280212,grad_norm: 0.8819687855002935, iteration: 122612
loss: 1.0211941003799438,grad_norm: 0.9999991739318014, iteration: 122613
loss: 1.0265765190124512,grad_norm: 0.9999991568441379, iteration: 122614
loss: 0.964047908782959,grad_norm: 0.9097594328124862, iteration: 122615
loss: 1.0139751434326172,grad_norm: 0.8274022013062569, iteration: 122616
loss: 1.0961077213287354,grad_norm: 0.9999995014326353, iteration: 122617
loss: 1.0063724517822266,grad_norm: 0.9999996513535049, iteration: 122618
loss: 1.0146499872207642,grad_norm: 0.8010897624959458, iteration: 122619
loss: 0.9778870344161987,grad_norm: 0.9973609805284882, iteration: 122620
loss: 1.0413798093795776,grad_norm: 0.99999918357615, iteration: 122621
loss: 1.0032957792282104,grad_norm: 0.9676073863323588, iteration: 122622
loss: 1.0187523365020752,grad_norm: 0.9131589124859635, iteration: 122623
loss: 0.999525785446167,grad_norm: 0.8347472802504614, iteration: 122624
loss: 0.9801629781723022,grad_norm: 0.7709924968988238, iteration: 122625
loss: 0.9982132911682129,grad_norm: 0.9999994213557423, iteration: 122626
loss: 0.9923694133758545,grad_norm: 0.8966522798036471, iteration: 122627
loss: 0.9804063439369202,grad_norm: 0.9607299418867459, iteration: 122628
loss: 0.9856777191162109,grad_norm: 0.9999991563955428, iteration: 122629
loss: 1.012518048286438,grad_norm: 0.9999993127517883, iteration: 122630
loss: 1.0358084440231323,grad_norm: 0.9999990034304722, iteration: 122631
loss: 1.1222540140151978,grad_norm: 0.9999994841325494, iteration: 122632
loss: 1.0293728113174438,grad_norm: 0.9918769568364996, iteration: 122633
loss: 1.0175471305847168,grad_norm: 0.9999989765827632, iteration: 122634
loss: 1.025824785232544,grad_norm: 0.9999993172871859, iteration: 122635
loss: 0.9961985349655151,grad_norm: 0.9999991426422938, iteration: 122636
loss: 0.9967126846313477,grad_norm: 0.9322273659146036, iteration: 122637
loss: 0.9928953051567078,grad_norm: 0.9644094282104108, iteration: 122638
loss: 1.0038282871246338,grad_norm: 0.9410547717130004, iteration: 122639
loss: 1.025129795074463,grad_norm: 0.8374095592809976, iteration: 122640
loss: 1.0105589628219604,grad_norm: 0.8624665543456234, iteration: 122641
loss: 1.0040204524993896,grad_norm: 0.9999989445677921, iteration: 122642
loss: 0.9934805035591125,grad_norm: 0.9458165100102852, iteration: 122643
loss: 0.9904568195343018,grad_norm: 0.9263542226964042, iteration: 122644
loss: 0.9900168180465698,grad_norm: 0.9999998925277161, iteration: 122645
loss: 0.964721143245697,grad_norm: 0.8497849701046714, iteration: 122646
loss: 1.010097622871399,grad_norm: 0.9999990047792204, iteration: 122647
loss: 1.1103172302246094,grad_norm: 0.999999027390476, iteration: 122648
loss: 0.9664414525032043,grad_norm: 0.9999991621165479, iteration: 122649
loss: 0.9898618459701538,grad_norm: 0.9999991489334945, iteration: 122650
loss: 0.981457531452179,grad_norm: 0.816545512368524, iteration: 122651
loss: 1.0054415464401245,grad_norm: 0.9076584763774891, iteration: 122652
loss: 0.9852387309074402,grad_norm: 0.6977838088728248, iteration: 122653
loss: 1.0013630390167236,grad_norm: 0.9425110559088535, iteration: 122654
loss: 0.991256594657898,grad_norm: 0.9999997538257577, iteration: 122655
loss: 0.975817084312439,grad_norm: 0.9999988988563923, iteration: 122656
loss: 1.024914264678955,grad_norm: 0.9999990697256527, iteration: 122657
loss: 1.0153446197509766,grad_norm: 0.9999989906018678, iteration: 122658
loss: 1.0058155059814453,grad_norm: 0.7191016529437055, iteration: 122659
loss: 0.9838142395019531,grad_norm: 0.869877330598769, iteration: 122660
loss: 1.0566025972366333,grad_norm: 0.9999992561750629, iteration: 122661
loss: 0.965340256690979,grad_norm: 0.9267052598765073, iteration: 122662
loss: 0.9683834314346313,grad_norm: 0.9999991895028016, iteration: 122663
loss: 0.9819468259811401,grad_norm: 0.9999994359602915, iteration: 122664
loss: 0.9994956851005554,grad_norm: 0.8887944065953691, iteration: 122665
loss: 1.0604132413864136,grad_norm: 0.8414156087371758, iteration: 122666
loss: 1.0114034414291382,grad_norm: 0.9382114944028028, iteration: 122667
loss: 0.9839674830436707,grad_norm: 0.8169237492579225, iteration: 122668
loss: 1.0862137079238892,grad_norm: 0.9999995890276309, iteration: 122669
loss: 1.0565674304962158,grad_norm: 0.9999995778528822, iteration: 122670
loss: 1.010299801826477,grad_norm: 0.9543794872003302, iteration: 122671
loss: 1.042921543121338,grad_norm: 0.9999992220400165, iteration: 122672
loss: 0.9615774750709534,grad_norm: 0.935661842090721, iteration: 122673
loss: 0.9641508460044861,grad_norm: 0.8397112646467587, iteration: 122674
loss: 0.9749141335487366,grad_norm: 0.9188439768863647, iteration: 122675
loss: 1.083615779876709,grad_norm: 0.9999998480801306, iteration: 122676
loss: 0.9647747874259949,grad_norm: 0.9999991386384677, iteration: 122677
loss: 0.9990721344947815,grad_norm: 0.9999989762990591, iteration: 122678
loss: 1.0164982080459595,grad_norm: 0.9999992312068253, iteration: 122679
loss: 1.0380451679229736,grad_norm: 0.9999992617198592, iteration: 122680
loss: 0.9660457968711853,grad_norm: 0.9999996075152904, iteration: 122681
loss: 1.0562340021133423,grad_norm: 0.999999775841492, iteration: 122682
loss: 1.004677414894104,grad_norm: 0.8846968311095263, iteration: 122683
loss: 1.0734962224960327,grad_norm: 0.999999065289749, iteration: 122684
loss: 0.9742205739021301,grad_norm: 0.8217791808610201, iteration: 122685
loss: 1.0369672775268555,grad_norm: 0.8788170638254261, iteration: 122686
loss: 0.9924570322036743,grad_norm: 0.792212972192098, iteration: 122687
loss: 1.017717719078064,grad_norm: 0.9340116928110235, iteration: 122688
loss: 0.9927719831466675,grad_norm: 0.8143641011364372, iteration: 122689
loss: 1.22834050655365,grad_norm: 0.9999998737792338, iteration: 122690
loss: 1.009400725364685,grad_norm: 0.9999994601700143, iteration: 122691
loss: 0.993604302406311,grad_norm: 0.9692764859728282, iteration: 122692
loss: 0.9808449745178223,grad_norm: 0.8729582830523649, iteration: 122693
loss: 0.9932639002799988,grad_norm: 0.9188403577227253, iteration: 122694
loss: 0.9632261991500854,grad_norm: 0.9607873744445711, iteration: 122695
loss: 1.2309061288833618,grad_norm: 0.9999997738678025, iteration: 122696
loss: 1.0473030805587769,grad_norm: 0.9194804745561866, iteration: 122697
loss: 1.0975946187973022,grad_norm: 0.9999991159225858, iteration: 122698
loss: 0.9924657940864563,grad_norm: 0.9999993533680722, iteration: 122699
loss: 1.0333129167556763,grad_norm: 0.880392058312778, iteration: 122700
loss: 0.9963604807853699,grad_norm: 0.8209853944382081, iteration: 122701
loss: 1.0970616340637207,grad_norm: 0.9999992872576312, iteration: 122702
loss: 1.014407992362976,grad_norm: 0.9999990371158568, iteration: 122703
loss: 0.9727986454963684,grad_norm: 0.8955744643257036, iteration: 122704
loss: 1.0332989692687988,grad_norm: 0.8910833153553347, iteration: 122705
loss: 1.1026815176010132,grad_norm: 0.9999990731987641, iteration: 122706
loss: 1.0062651634216309,grad_norm: 0.9999998399169115, iteration: 122707
loss: 1.0311808586120605,grad_norm: 0.9999990256575103, iteration: 122708
loss: 0.9782175421714783,grad_norm: 0.8803783576082586, iteration: 122709
loss: 1.014544129371643,grad_norm: 0.9999992114027771, iteration: 122710
loss: 1.0129578113555908,grad_norm: 0.9174427248971241, iteration: 122711
loss: 1.0206921100616455,grad_norm: 0.9999994744555986, iteration: 122712
loss: 0.9818962216377258,grad_norm: 0.9999991276034886, iteration: 122713
loss: 1.0081571340560913,grad_norm: 0.9551985248495309, iteration: 122714
loss: 1.090040922164917,grad_norm: 0.9273421809810788, iteration: 122715
loss: 1.0135908126831055,grad_norm: 0.8669496722814938, iteration: 122716
loss: 0.9879822134971619,grad_norm: 0.984557495260911, iteration: 122717
loss: 1.0061299800872803,grad_norm: 0.9999999355458262, iteration: 122718
loss: 1.0011041164398193,grad_norm: 0.9999990986666676, iteration: 122719
loss: 1.1121413707733154,grad_norm: 0.9933539825820491, iteration: 122720
loss: 0.9676846861839294,grad_norm: 0.8152694276490827, iteration: 122721
loss: 0.9952893257141113,grad_norm: 0.9239336566736611, iteration: 122722
loss: 1.0179449319839478,grad_norm: 0.8484616269602107, iteration: 122723
loss: 0.9974705576896667,grad_norm: 0.9057486617373484, iteration: 122724
loss: 1.0621432065963745,grad_norm: 0.9999990914052493, iteration: 122725
loss: 0.9970085024833679,grad_norm: 0.9999992396801076, iteration: 122726
loss: 1.0150989294052124,grad_norm: 0.9754876009432515, iteration: 122727
loss: 1.006612777709961,grad_norm: 0.7899404703709693, iteration: 122728
loss: 1.0263571739196777,grad_norm: 0.8587154586191569, iteration: 122729
loss: 1.00423002243042,grad_norm: 0.9999996623854506, iteration: 122730
loss: 0.9945985674858093,grad_norm: 0.9545631387581428, iteration: 122731
loss: 0.9987967610359192,grad_norm: 0.9999991139351857, iteration: 122732
loss: 1.0186865329742432,grad_norm: 0.8587023705876237, iteration: 122733
loss: 0.9818814992904663,grad_norm: 0.8990103756478598, iteration: 122734
loss: 1.0041310787200928,grad_norm: 0.9999995658326539, iteration: 122735
loss: 1.0072567462921143,grad_norm: 0.887860439207977, iteration: 122736
loss: 0.9991644620895386,grad_norm: 0.791977297542853, iteration: 122737
loss: 1.042452096939087,grad_norm: 0.9999994735331167, iteration: 122738
loss: 0.9570186138153076,grad_norm: 0.8553208609207291, iteration: 122739
loss: 1.0058355331420898,grad_norm: 0.9999992675493031, iteration: 122740
loss: 0.9769214391708374,grad_norm: 0.9999990439918802, iteration: 122741
loss: 1.041245698928833,grad_norm: 0.9279682920029013, iteration: 122742
loss: 0.9701208472251892,grad_norm: 0.885360304694763, iteration: 122743
loss: 0.9972962737083435,grad_norm: 0.8719903920338999, iteration: 122744
loss: 1.0311516523361206,grad_norm: 0.8963893116443543, iteration: 122745
loss: 1.0264909267425537,grad_norm: 0.9999991324484135, iteration: 122746
loss: 1.024436354637146,grad_norm: 0.9160868469110607, iteration: 122747
loss: 0.9771249890327454,grad_norm: 0.9999990294808324, iteration: 122748
loss: 1.023695707321167,grad_norm: 0.9999997725822113, iteration: 122749
loss: 1.001091718673706,grad_norm: 0.7908919919329079, iteration: 122750
loss: 1.0069876909255981,grad_norm: 0.9999995415585184, iteration: 122751
loss: 1.0763990879058838,grad_norm: 0.9999993475842746, iteration: 122752
loss: 1.0193226337432861,grad_norm: 0.806406486839056, iteration: 122753
loss: 1.0370092391967773,grad_norm: 0.9421843443044432, iteration: 122754
loss: 0.9220308065414429,grad_norm: 0.9187800285014777, iteration: 122755
loss: 1.0031392574310303,grad_norm: 0.9999990798190705, iteration: 122756
loss: 1.0255361795425415,grad_norm: 0.9200295881391377, iteration: 122757
loss: 0.9979457855224609,grad_norm: 0.8582175911937902, iteration: 122758
loss: 0.9846414923667908,grad_norm: 0.8472987409534771, iteration: 122759
loss: 1.0108712911605835,grad_norm: 0.9475259367971839, iteration: 122760
loss: 1.0030925273895264,grad_norm: 0.9999994276327473, iteration: 122761
loss: 1.047281265258789,grad_norm: 0.7917838867756399, iteration: 122762
loss: 1.0021322965621948,grad_norm: 0.9999995205822996, iteration: 122763
loss: 1.0353602170944214,grad_norm: 0.8243107342409185, iteration: 122764
loss: 1.009915828704834,grad_norm: 0.9999993223680453, iteration: 122765
loss: 0.9671910405158997,grad_norm: 0.8817416120285617, iteration: 122766
loss: 1.0230188369750977,grad_norm: 0.9999995304225272, iteration: 122767
loss: 0.969799280166626,grad_norm: 0.7536867604032212, iteration: 122768
loss: 1.0180635452270508,grad_norm: 0.8815191084304075, iteration: 122769
loss: 1.00980806350708,grad_norm: 0.9104481034711344, iteration: 122770
loss: 1.0027867555618286,grad_norm: 0.8171622532320447, iteration: 122771
loss: 1.0525895357131958,grad_norm: 0.9999991649824055, iteration: 122772
loss: 1.029417872428894,grad_norm: 0.9999990965390515, iteration: 122773
loss: 0.9989085793495178,grad_norm: 0.9443264157940794, iteration: 122774
loss: 0.9971792697906494,grad_norm: 0.8796177277423167, iteration: 122775
loss: 0.998640239238739,grad_norm: 0.9999993787532947, iteration: 122776
loss: 1.0067216157913208,grad_norm: 0.8872600708952675, iteration: 122777
loss: 0.9595892429351807,grad_norm: 0.8696391902265194, iteration: 122778
loss: 1.0569196939468384,grad_norm: 0.9999992602739841, iteration: 122779
loss: 0.9686794877052307,grad_norm: 0.8675740475360552, iteration: 122780
loss: 0.9975831508636475,grad_norm: 0.7277116794460922, iteration: 122781
loss: 1.0081557035446167,grad_norm: 0.891580810036603, iteration: 122782
loss: 0.9948130249977112,grad_norm: 0.9999992537993371, iteration: 122783
loss: 1.0316083431243896,grad_norm: 0.9296129796477643, iteration: 122784
loss: 1.0236703157424927,grad_norm: 0.999999789461514, iteration: 122785
loss: 1.0279582738876343,grad_norm: 0.9999997865951251, iteration: 122786
loss: 1.0655944347381592,grad_norm: 0.8919936435499178, iteration: 122787
loss: 0.9996399879455566,grad_norm: 0.7898870235951422, iteration: 122788
loss: 1.0082454681396484,grad_norm: 0.9999991219635901, iteration: 122789
loss: 1.1276522874832153,grad_norm: 0.9999993559911169, iteration: 122790
loss: 1.0785551071166992,grad_norm: 0.9999996311391323, iteration: 122791
loss: 1.0347161293029785,grad_norm: 0.8839061854275475, iteration: 122792
loss: 1.0029759407043457,grad_norm: 0.9999991350018493, iteration: 122793
loss: 0.9773563742637634,grad_norm: 0.8910171558783595, iteration: 122794
loss: 0.9675043225288391,grad_norm: 0.9999993311860118, iteration: 122795
loss: 1.1016000509262085,grad_norm: 0.9999998295096458, iteration: 122796
loss: 1.004128098487854,grad_norm: 0.8612341431259762, iteration: 122797
loss: 0.993879497051239,grad_norm: 0.9563357961987042, iteration: 122798
loss: 1.017979621887207,grad_norm: 0.9999992244002945, iteration: 122799
loss: 1.0114001035690308,grad_norm: 0.7977442591050451, iteration: 122800
loss: 1.062511920928955,grad_norm: 0.9999993886228239, iteration: 122801
loss: 1.1017402410507202,grad_norm: 0.999999827543895, iteration: 122802
loss: 0.9861675500869751,grad_norm: 0.9006838383870718, iteration: 122803
loss: 1.0577274560928345,grad_norm: 0.9403202622620728, iteration: 122804
loss: 0.9922803640365601,grad_norm: 0.911281288688322, iteration: 122805
loss: 1.0316276550292969,grad_norm: 0.7679531672524508, iteration: 122806
loss: 1.0159106254577637,grad_norm: 0.816195383504512, iteration: 122807
loss: 0.9880255460739136,grad_norm: 0.7471415449739591, iteration: 122808
loss: 0.9839826822280884,grad_norm: 0.875048152080074, iteration: 122809
loss: 1.1694141626358032,grad_norm: 0.9999999443987843, iteration: 122810
loss: 1.0239890813827515,grad_norm: 0.9941668311468416, iteration: 122811
loss: 0.9995207786560059,grad_norm: 0.8281139880118372, iteration: 122812
loss: 1.0105311870574951,grad_norm: 0.9999990034135796, iteration: 122813
loss: 0.9779520630836487,grad_norm: 0.8954475493662595, iteration: 122814
loss: 0.986693263053894,grad_norm: 1.000000081353837, iteration: 122815
loss: 1.0159960985183716,grad_norm: 0.9999993785528737, iteration: 122816
loss: 1.0193217992782593,grad_norm: 0.9999997042358053, iteration: 122817
loss: 0.9973602294921875,grad_norm: 0.9209065045165113, iteration: 122818
loss: 0.9998942017555237,grad_norm: 0.9999991130477524, iteration: 122819
loss: 1.0090430974960327,grad_norm: 0.9596367021756719, iteration: 122820
loss: 0.9574711918830872,grad_norm: 0.9999990418554386, iteration: 122821
loss: 1.0321825742721558,grad_norm: 0.8537148721467115, iteration: 122822
loss: 1.0077317953109741,grad_norm: 0.8239233675944378, iteration: 122823
loss: 1.0009654760360718,grad_norm: 0.9999992961210854, iteration: 122824
loss: 1.0137747526168823,grad_norm: 0.9999992218418416, iteration: 122825
loss: 1.0127246379852295,grad_norm: 0.9999996922744759, iteration: 122826
loss: 0.9816610217094421,grad_norm: 0.9260293276842195, iteration: 122827
loss: 1.0234670639038086,grad_norm: 0.9265992487586717, iteration: 122828
loss: 1.025949478149414,grad_norm: 0.999999538344759, iteration: 122829
loss: 0.9931431412696838,grad_norm: 1.0000000696329692, iteration: 122830
loss: 0.9953100681304932,grad_norm: 0.8118936829461574, iteration: 122831
loss: 1.0162869691848755,grad_norm: 0.9999999224962236, iteration: 122832
loss: 0.9959964752197266,grad_norm: 0.8872318301769149, iteration: 122833
loss: 1.0575554370880127,grad_norm: 0.9999993484443505, iteration: 122834
loss: 0.9915512204170227,grad_norm: 0.9999990523042572, iteration: 122835
loss: 1.0705082416534424,grad_norm: 0.9999998715754765, iteration: 122836
loss: 1.0804611444473267,grad_norm: 0.8417485256782372, iteration: 122837
loss: 1.1283773183822632,grad_norm: 0.9999998782775184, iteration: 122838
loss: 1.0405290126800537,grad_norm: 0.9999997671400418, iteration: 122839
loss: 1.0227211713790894,grad_norm: 0.9999991902587055, iteration: 122840
loss: 1.008430004119873,grad_norm: 0.9999989760375998, iteration: 122841
loss: 1.0105284452438354,grad_norm: 0.9999993272560572, iteration: 122842
loss: 0.993040144443512,grad_norm: 0.9390792272456328, iteration: 122843
loss: 1.0332008600234985,grad_norm: 0.8815207382542656, iteration: 122844
loss: 1.0699119567871094,grad_norm: 0.9999993560943, iteration: 122845
loss: 1.0387682914733887,grad_norm: 0.9999991965575321, iteration: 122846
loss: 0.9988855123519897,grad_norm: 0.883360060823665, iteration: 122847
loss: 1.0083256959915161,grad_norm: 0.999998906411436, iteration: 122848
loss: 1.1703335046768188,grad_norm: 0.9999999335837996, iteration: 122849
loss: 1.0042760372161865,grad_norm: 0.9999990473247647, iteration: 122850
loss: 1.008994221687317,grad_norm: 0.9999993401784678, iteration: 122851
loss: 1.0204139947891235,grad_norm: 0.7703553900074886, iteration: 122852
loss: 1.0761622190475464,grad_norm: 0.9999992549618815, iteration: 122853
loss: 1.0127273797988892,grad_norm: 0.9955368831120269, iteration: 122854
loss: 1.0104700326919556,grad_norm: 0.9999990452906137, iteration: 122855
loss: 0.9845110774040222,grad_norm: 0.8279658408020047, iteration: 122856
loss: 1.0178745985031128,grad_norm: 0.9999991791074307, iteration: 122857
loss: 1.0009124279022217,grad_norm: 0.9999991282913178, iteration: 122858
loss: 1.036084771156311,grad_norm: 0.9999998747979187, iteration: 122859
loss: 1.0022494792938232,grad_norm: 0.9453543315461268, iteration: 122860
loss: 1.127676248550415,grad_norm: 0.9999997527679311, iteration: 122861
loss: 1.0106303691864014,grad_norm: 0.9163615003119079, iteration: 122862
loss: 1.0171629190444946,grad_norm: 0.8449778530379376, iteration: 122863
loss: 1.0471093654632568,grad_norm: 0.9999995154490953, iteration: 122864
loss: 1.1218196153640747,grad_norm: 0.9999997195950174, iteration: 122865
loss: 1.0050005912780762,grad_norm: 0.9529286649138627, iteration: 122866
loss: 1.0170363187789917,grad_norm: 0.9999992253661655, iteration: 122867
loss: 1.0483077764511108,grad_norm: 0.9999997004525397, iteration: 122868
loss: 1.0528897047042847,grad_norm: 0.9695675809272283, iteration: 122869
loss: 1.0297669172286987,grad_norm: 0.9783666186219095, iteration: 122870
loss: 1.165300965309143,grad_norm: 0.9999998324288247, iteration: 122871
loss: 1.037939429283142,grad_norm: 0.9999988926756282, iteration: 122872
loss: 1.0795786380767822,grad_norm: 0.999999593467086, iteration: 122873
loss: 1.037935495376587,grad_norm: 0.8352880192077551, iteration: 122874
loss: 1.0223389863967896,grad_norm: 0.9999994815113766, iteration: 122875
loss: 1.0132263898849487,grad_norm: 0.7660579647703327, iteration: 122876
loss: 1.1444239616394043,grad_norm: 0.999999425609236, iteration: 122877
loss: 1.1518018245697021,grad_norm: 0.999999735405777, iteration: 122878
loss: 1.0960723161697388,grad_norm: 0.9999992165052485, iteration: 122879
loss: 1.0457310676574707,grad_norm: 0.9875784765389072, iteration: 122880
loss: 1.0127735137939453,grad_norm: 0.9999991930984893, iteration: 122881
loss: 1.0948446989059448,grad_norm: 0.9999992101816098, iteration: 122882
loss: 1.0316838026046753,grad_norm: 0.8395337134933648, iteration: 122883
loss: 1.0063965320587158,grad_norm: 0.9999999390350903, iteration: 122884
loss: 0.9666907787322998,grad_norm: 0.866935651858668, iteration: 122885
loss: 0.9839621782302856,grad_norm: 0.9039192173077972, iteration: 122886
loss: 1.0265299081802368,grad_norm: 0.9999993165626395, iteration: 122887
loss: 1.101416826248169,grad_norm: 0.999999348505619, iteration: 122888
loss: 0.9953212141990662,grad_norm: 0.9088064588740901, iteration: 122889
loss: 1.0401197671890259,grad_norm: 0.9262725398490897, iteration: 122890
loss: 0.9559618234634399,grad_norm: 0.8216292885239425, iteration: 122891
loss: 0.9785701036453247,grad_norm: 0.9366114577974833, iteration: 122892
loss: 1.0137401819229126,grad_norm: 0.8462107915440924, iteration: 122893
loss: 0.9881008267402649,grad_norm: 0.8548825049372659, iteration: 122894
loss: 1.0172655582427979,grad_norm: 0.9694681664985949, iteration: 122895
loss: 0.9847270250320435,grad_norm: 0.8233064524093809, iteration: 122896
loss: 1.0382046699523926,grad_norm: 0.9858526838526965, iteration: 122897
loss: 1.0313735008239746,grad_norm: 0.9999996311370283, iteration: 122898
loss: 0.9803394079208374,grad_norm: 0.8712658312756933, iteration: 122899
loss: 1.1525567770004272,grad_norm: 0.9999998208311209, iteration: 122900
loss: 0.986203670501709,grad_norm: 0.999998977443554, iteration: 122901
loss: 1.0521434545516968,grad_norm: 0.9999991366274452, iteration: 122902
loss: 1.107762336730957,grad_norm: 0.999999883511327, iteration: 122903
loss: 1.0191658735275269,grad_norm: 0.999999207991242, iteration: 122904
loss: 1.116431474685669,grad_norm: 0.9999999546515856, iteration: 122905
loss: 1.046474575996399,grad_norm: 0.9999990140362714, iteration: 122906
loss: 1.0091465711593628,grad_norm: 0.8270741111353658, iteration: 122907
loss: 1.007939100265503,grad_norm: 0.9999991145351442, iteration: 122908
loss: 1.0326591730117798,grad_norm: 0.9999992675668115, iteration: 122909
loss: 0.9774274826049805,grad_norm: 0.8466180325534552, iteration: 122910
loss: 1.0492839813232422,grad_norm: 0.8940863787363099, iteration: 122911
loss: 0.9921827912330627,grad_norm: 0.9382334731763212, iteration: 122912
loss: 1.1144886016845703,grad_norm: 0.8710951147761518, iteration: 122913
loss: 0.9812275171279907,grad_norm: 0.8014823514259631, iteration: 122914
loss: 1.0031732320785522,grad_norm: 0.8765246637513715, iteration: 122915
loss: 0.9555507898330688,grad_norm: 0.9610348929320407, iteration: 122916
loss: 1.0257129669189453,grad_norm: 0.9999996872953809, iteration: 122917
loss: 0.9742544293403625,grad_norm: 0.9999991591308688, iteration: 122918
loss: 1.0195144414901733,grad_norm: 0.9999999015724437, iteration: 122919
loss: 1.065878987312317,grad_norm: 0.9999991385536757, iteration: 122920
loss: 1.0371968746185303,grad_norm: 0.9999995271751952, iteration: 122921
loss: 1.0192863941192627,grad_norm: 0.9999992507770623, iteration: 122922
loss: 1.016655445098877,grad_norm: 0.9999993539566491, iteration: 122923
loss: 1.0219320058822632,grad_norm: 0.9999990715689053, iteration: 122924
loss: 1.0324500799179077,grad_norm: 0.9999996761991696, iteration: 122925
loss: 1.0080562829971313,grad_norm: 0.9999993583849405, iteration: 122926
loss: 1.0023387670516968,grad_norm: 0.9639946303907656, iteration: 122927
loss: 1.0951427221298218,grad_norm: 0.9999997416346569, iteration: 122928
loss: 1.2067937850952148,grad_norm: 0.9999994249771114, iteration: 122929
loss: 0.9592660665512085,grad_norm: 0.9778482727985213, iteration: 122930
loss: 0.9944414496421814,grad_norm: 0.9290329463281544, iteration: 122931
loss: 1.0078356266021729,grad_norm: 0.8204976984666628, iteration: 122932
loss: 0.9793184995651245,grad_norm: 0.9161814443578089, iteration: 122933
loss: 1.0129081010818481,grad_norm: 0.9999994035230618, iteration: 122934
loss: 1.0095454454421997,grad_norm: 0.8188543277928568, iteration: 122935
loss: 1.0091625452041626,grad_norm: 0.999999107471091, iteration: 122936
loss: 0.993840754032135,grad_norm: 0.81972864826789, iteration: 122937
loss: 1.0337178707122803,grad_norm: 0.9999993135432192, iteration: 122938
loss: 1.0663628578186035,grad_norm: 0.9999994060450599, iteration: 122939
loss: 1.0751593112945557,grad_norm: 0.9999999258708482, iteration: 122940
loss: 1.0807888507843018,grad_norm: 0.9999994217352609, iteration: 122941
loss: 1.0186923742294312,grad_norm: 0.9999992030790791, iteration: 122942
loss: 1.0600545406341553,grad_norm: 0.9999993834482388, iteration: 122943
loss: 1.003037691116333,grad_norm: 0.8821085210397439, iteration: 122944
loss: 0.9931318163871765,grad_norm: 0.8958886257312041, iteration: 122945
loss: 1.0916129350662231,grad_norm: 0.9999998665399208, iteration: 122946
loss: 1.028760313987732,grad_norm: 0.999999908532253, iteration: 122947
loss: 1.1016160249710083,grad_norm: 0.9999994837143997, iteration: 122948
loss: 1.1877751350402832,grad_norm: 0.999999826774292, iteration: 122949
loss: 1.068142056465149,grad_norm: 0.999999498918948, iteration: 122950
loss: 1.0396640300750732,grad_norm: 0.9999992315644094, iteration: 122951
loss: 1.021201491355896,grad_norm: 0.8042688906868339, iteration: 122952
loss: 1.0014071464538574,grad_norm: 0.8627820108676156, iteration: 122953
loss: 1.0202745199203491,grad_norm: 0.7932913780967886, iteration: 122954
loss: 1.0024067163467407,grad_norm: 0.9999996109777597, iteration: 122955
loss: 1.12705659866333,grad_norm: 0.9999991109088681, iteration: 122956
loss: 1.0819634199142456,grad_norm: 0.9999995357881156, iteration: 122957
loss: 0.9937775135040283,grad_norm: 0.9999991379803543, iteration: 122958
loss: 1.1003872156143188,grad_norm: 0.9999994427878186, iteration: 122959
loss: 0.9779636263847351,grad_norm: 0.9999992810161769, iteration: 122960
loss: 0.9872556328773499,grad_norm: 0.7279872083832248, iteration: 122961
loss: 0.9822161197662354,grad_norm: 0.907609385113402, iteration: 122962
loss: 0.9919340014457703,grad_norm: 0.9999991683440019, iteration: 122963
loss: 1.0260684490203857,grad_norm: 0.9999992360811992, iteration: 122964
loss: 1.0386097431182861,grad_norm: 0.9355593763282778, iteration: 122965
loss: 1.1431889533996582,grad_norm: 0.9999998212776186, iteration: 122966
loss: 1.0817701816558838,grad_norm: 0.9999991337399775, iteration: 122967
loss: 1.0534182786941528,grad_norm: 0.9999998008936513, iteration: 122968
loss: 1.0434011220932007,grad_norm: 0.9999995643894717, iteration: 122969
loss: 0.9707340598106384,grad_norm: 0.7747356249454126, iteration: 122970
loss: 1.0064457654953003,grad_norm: 0.8044681925845353, iteration: 122971
loss: 1.053705096244812,grad_norm: 0.9999994619439335, iteration: 122972
loss: 1.038979172706604,grad_norm: 0.9999996544980665, iteration: 122973
loss: 1.0186398029327393,grad_norm: 0.9999994058695659, iteration: 122974
loss: 1.0380536317825317,grad_norm: 0.9999998872932, iteration: 122975
loss: 1.131291151046753,grad_norm: 0.9999999976361409, iteration: 122976
loss: 1.0042309761047363,grad_norm: 0.9999995001425094, iteration: 122977
loss: 1.056501030921936,grad_norm: 0.9999994966561003, iteration: 122978
loss: 1.02448570728302,grad_norm: 0.99999936484149, iteration: 122979
loss: 1.020724892616272,grad_norm: 0.9238488492231088, iteration: 122980
loss: 1.0728890895843506,grad_norm: 0.9999998897245431, iteration: 122981
loss: 1.098416805267334,grad_norm: 0.9999994799217533, iteration: 122982
loss: 0.9782713055610657,grad_norm: 0.9999990099536603, iteration: 122983
loss: 1.0247535705566406,grad_norm: 0.8386864131594213, iteration: 122984
loss: 1.0533506870269775,grad_norm: 0.9999991114064565, iteration: 122985
loss: 1.0405981540679932,grad_norm: 0.9999990493798128, iteration: 122986
loss: 0.9752170443534851,grad_norm: 0.8390996305880682, iteration: 122987
loss: 1.0687248706817627,grad_norm: 0.9999993392667235, iteration: 122988
loss: 1.095111608505249,grad_norm: 1.0000000432566902, iteration: 122989
loss: 1.107773780822754,grad_norm: 0.9999992732527382, iteration: 122990
loss: 1.0863457918167114,grad_norm: 0.999999122244917, iteration: 122991
loss: 1.037945032119751,grad_norm: 0.9999994768940148, iteration: 122992
loss: 1.2215712070465088,grad_norm: 0.9999999865750224, iteration: 122993
loss: 1.024857997894287,grad_norm: 0.9999991375008391, iteration: 122994
loss: 1.0066581964492798,grad_norm: 0.9465085793317146, iteration: 122995
loss: 1.2163692712783813,grad_norm: 0.9999993425471675, iteration: 122996
loss: 1.0283502340316772,grad_norm: 0.9999993760109291, iteration: 122997
loss: 1.0285601615905762,grad_norm: 0.999999294711562, iteration: 122998
loss: 0.9996863603591919,grad_norm: 0.9121670943975044, iteration: 122999
loss: 1.0245534181594849,grad_norm: 0.9251135443111648, iteration: 123000
loss: 1.0856467485427856,grad_norm: 0.9999998642153495, iteration: 123001
loss: 1.02907395362854,grad_norm: 0.9999993867500689, iteration: 123002
loss: 1.051417350769043,grad_norm: 0.9999999094284303, iteration: 123003
loss: 1.002703309059143,grad_norm: 0.9655306515513976, iteration: 123004
loss: 1.0547322034835815,grad_norm: 0.9999992032081255, iteration: 123005
loss: 0.9911655187606812,grad_norm: 0.859962273557941, iteration: 123006
loss: 1.0517433881759644,grad_norm: 0.8862072878223273, iteration: 123007
loss: 1.1304434537887573,grad_norm: 0.9999993861282268, iteration: 123008
loss: 1.0072062015533447,grad_norm: 0.9999991205453498, iteration: 123009
loss: 1.0276672840118408,grad_norm: 0.9999995753898753, iteration: 123010
loss: 1.0787769556045532,grad_norm: 0.9999991303125887, iteration: 123011
loss: 1.17399263381958,grad_norm: 0.9999994208929842, iteration: 123012
loss: 0.9446701407432556,grad_norm: 0.8962863417763351, iteration: 123013
loss: 1.163120150566101,grad_norm: 0.9999998361817867, iteration: 123014
loss: 1.0803160667419434,grad_norm: 0.9999991571433607, iteration: 123015
loss: 1.0370135307312012,grad_norm: 0.97497427848197, iteration: 123016
loss: 1.0818231105804443,grad_norm: 0.9400183177272151, iteration: 123017
loss: 1.072851538658142,grad_norm: 0.9992149771148432, iteration: 123018
loss: 0.9839662313461304,grad_norm: 0.8475309877449648, iteration: 123019
loss: 1.2674434185028076,grad_norm: 0.9999994934027632, iteration: 123020
loss: 1.0416368246078491,grad_norm: 0.999999456127991, iteration: 123021
loss: 1.1524454355239868,grad_norm: 0.9999998141221965, iteration: 123022
loss: 1.0102852582931519,grad_norm: 0.8227232800138923, iteration: 123023
loss: 1.1484168767929077,grad_norm: 0.9029389360828622, iteration: 123024
loss: 1.0671095848083496,grad_norm: 0.9999997111948011, iteration: 123025
loss: 0.9942891001701355,grad_norm: 0.9403694016335327, iteration: 123026
loss: 1.0862658023834229,grad_norm: 0.9999997181298066, iteration: 123027
loss: 1.0063084363937378,grad_norm: 0.999999192365046, iteration: 123028
loss: 1.0342583656311035,grad_norm: 0.9921035362356074, iteration: 123029
loss: 1.1961909532546997,grad_norm: 0.999999947029623, iteration: 123030
loss: 0.9799990653991699,grad_norm: 0.853838511739931, iteration: 123031
loss: 1.1164355278015137,grad_norm: 0.9999992883539036, iteration: 123032
loss: 0.9993612170219421,grad_norm: 0.9999992544567605, iteration: 123033
loss: 1.135360836982727,grad_norm: 0.9999997001382941, iteration: 123034
loss: 1.0638195276260376,grad_norm: 0.974769681168727, iteration: 123035
loss: 1.0188120603561401,grad_norm: 0.9999990218794128, iteration: 123036
loss: 1.0104196071624756,grad_norm: 0.999999309097533, iteration: 123037
loss: 0.9430097341537476,grad_norm: 0.9999991186313321, iteration: 123038
loss: 0.9917429089546204,grad_norm: 0.976474095893224, iteration: 123039
loss: 1.022692322731018,grad_norm: 0.9999997033035936, iteration: 123040
loss: 0.993690013885498,grad_norm: 0.9766832906375841, iteration: 123041
loss: 0.9696441292762756,grad_norm: 0.9760604109984843, iteration: 123042
loss: 0.981675386428833,grad_norm: 0.9999991793909602, iteration: 123043
loss: 1.0013229846954346,grad_norm: 0.9999990780454492, iteration: 123044
loss: 0.9823777675628662,grad_norm: 0.7628602532607356, iteration: 123045
loss: 0.9543814063072205,grad_norm: 0.9999990501573479, iteration: 123046
loss: 0.9695910215377808,grad_norm: 0.7607346819440646, iteration: 123047
loss: 1.0224612951278687,grad_norm: 1.0000000223396521, iteration: 123048
loss: 1.0037298202514648,grad_norm: 0.8148986618553183, iteration: 123049
loss: 1.2145977020263672,grad_norm: 0.9999991732408029, iteration: 123050
loss: 0.9938399791717529,grad_norm: 0.8831207634044715, iteration: 123051
loss: 0.9796472787857056,grad_norm: 0.8431427079808232, iteration: 123052
loss: 0.9772214293479919,grad_norm: 0.8169343443579798, iteration: 123053
loss: 0.9861879944801331,grad_norm: 0.823619182823465, iteration: 123054
loss: 0.9812924265861511,grad_norm: 0.9151227663410518, iteration: 123055
loss: 1.030822515487671,grad_norm: 0.9999991959672501, iteration: 123056
loss: 1.0325374603271484,grad_norm: 0.9657696712275403, iteration: 123057
loss: 1.014540433883667,grad_norm: 0.9999993532784117, iteration: 123058
loss: 1.0270594358444214,grad_norm: 0.9285491543445739, iteration: 123059
loss: 1.0186660289764404,grad_norm: 0.9999991087722679, iteration: 123060
loss: 0.9696812629699707,grad_norm: 0.8903707741693119, iteration: 123061
loss: 0.9768517017364502,grad_norm: 0.8394942616936066, iteration: 123062
loss: 1.005823016166687,grad_norm: 0.9999990836339995, iteration: 123063
loss: 1.0176235437393188,grad_norm: 0.8538740394233152, iteration: 123064
loss: 1.020272970199585,grad_norm: 0.9999998936545003, iteration: 123065
loss: 1.140788197517395,grad_norm: 0.9999991582114147, iteration: 123066
loss: 0.983288586139679,grad_norm: 0.9645372415730264, iteration: 123067
loss: 1.040715217590332,grad_norm: 0.9999995467633881, iteration: 123068
loss: 1.0088824033737183,grad_norm: 0.8800715039609132, iteration: 123069
loss: 0.9993752241134644,grad_norm: 0.9999990707492251, iteration: 123070
loss: 0.9813084602355957,grad_norm: 0.9265354214676675, iteration: 123071
loss: 0.9989039897918701,grad_norm: 0.9046444301698017, iteration: 123072
loss: 1.018104910850525,grad_norm: 0.8063644063672231, iteration: 123073
loss: 0.9796033501625061,grad_norm: 0.9999993369518012, iteration: 123074
loss: 0.9823755621910095,grad_norm: 0.9999991272560707, iteration: 123075
loss: 1.0939888954162598,grad_norm: 0.9999996020195067, iteration: 123076
loss: 1.083669900894165,grad_norm: 0.9999997117855007, iteration: 123077
loss: 0.9891753792762756,grad_norm: 0.7683573386851528, iteration: 123078
loss: 0.9946864247322083,grad_norm: 0.9999992948136506, iteration: 123079
loss: 1.0193474292755127,grad_norm: 0.8462125045927075, iteration: 123080
loss: 0.9708282947540283,grad_norm: 0.9051187359289328, iteration: 123081
loss: 1.0336915254592896,grad_norm: 0.8648730518870968, iteration: 123082
loss: 1.0249186754226685,grad_norm: 0.9999990639129896, iteration: 123083
loss: 1.0271226167678833,grad_norm: 0.9999991644548495, iteration: 123084
loss: 1.022110939025879,grad_norm: 0.9716173347939824, iteration: 123085
loss: 0.9967949390411377,grad_norm: 0.8341319628683972, iteration: 123086
loss: 0.9905897974967957,grad_norm: 0.8821467241509127, iteration: 123087
loss: 1.0039010047912598,grad_norm: 0.8264545106672262, iteration: 123088
loss: 1.0268924236297607,grad_norm: 0.999999829612999, iteration: 123089
loss: 1.008878469467163,grad_norm: 0.9258364877338191, iteration: 123090
loss: 1.0560672283172607,grad_norm: 0.9999997351589274, iteration: 123091
loss: 0.9984212517738342,grad_norm: 0.7283745715521921, iteration: 123092
loss: 1.0194885730743408,grad_norm: 0.9711666582892994, iteration: 123093
loss: 1.0200836658477783,grad_norm: 0.9999992876932278, iteration: 123094
loss: 1.1292568445205688,grad_norm: 0.9999995347112918, iteration: 123095
loss: 1.0245814323425293,grad_norm: 0.8865883163371192, iteration: 123096
loss: 1.024032711982727,grad_norm: 0.8468177385629304, iteration: 123097
loss: 1.0045015811920166,grad_norm: 0.9254696943286733, iteration: 123098
loss: 1.1790791749954224,grad_norm: 0.9999998982842401, iteration: 123099
loss: 1.0203560590744019,grad_norm: 0.9999991136515257, iteration: 123100
loss: 1.0015472173690796,grad_norm: 0.9999991228712277, iteration: 123101
loss: 0.9610467553138733,grad_norm: 0.889120107046777, iteration: 123102
loss: 1.014377474784851,grad_norm: 0.9999990747936823, iteration: 123103
loss: 0.9244142770767212,grad_norm: 0.7848512783755043, iteration: 123104
loss: 1.0115610361099243,grad_norm: 0.9999993160007331, iteration: 123105
loss: 1.043217658996582,grad_norm: 0.9547754272591515, iteration: 123106
loss: 1.0019822120666504,grad_norm: 0.8175155623261081, iteration: 123107
loss: 0.9725714921951294,grad_norm: 0.8702253454185028, iteration: 123108
loss: 1.1019240617752075,grad_norm: 0.9999995405066793, iteration: 123109
loss: 1.038112998008728,grad_norm: 0.9068503738686557, iteration: 123110
loss: 1.0536953210830688,grad_norm: 0.9999995490584846, iteration: 123111
loss: 1.0040966272354126,grad_norm: 0.9620122863316838, iteration: 123112
loss: 1.0111215114593506,grad_norm: 0.9507104349586835, iteration: 123113
loss: 1.008812665939331,grad_norm: 0.8342861313787763, iteration: 123114
loss: 0.9997881054878235,grad_norm: 0.9912611835286728, iteration: 123115
loss: 1.0019224882125854,grad_norm: 0.9483497879865481, iteration: 123116
loss: 0.9703211784362793,grad_norm: 0.8303724970256767, iteration: 123117
loss: 0.9973996877670288,grad_norm: 0.825438686693347, iteration: 123118
loss: 1.0515904426574707,grad_norm: 0.9999991956108062, iteration: 123119
loss: 1.015842080116272,grad_norm: 0.9212597541586458, iteration: 123120
loss: 0.9799347519874573,grad_norm: 0.9999990554888957, iteration: 123121
loss: 0.961553692817688,grad_norm: 0.8381501324106442, iteration: 123122
loss: 1.0066297054290771,grad_norm: 0.999999349788532, iteration: 123123
loss: 1.019658088684082,grad_norm: 0.9999995094116261, iteration: 123124
loss: 1.016576886177063,grad_norm: 0.9999997029089325, iteration: 123125
loss: 1.0053601264953613,grad_norm: 0.9999992174000879, iteration: 123126
loss: 0.9636246562004089,grad_norm: 0.8963585513049964, iteration: 123127
loss: 1.0504549741744995,grad_norm: 0.9183734187921857, iteration: 123128
loss: 1.044942021369934,grad_norm: 0.9999994325682057, iteration: 123129
loss: 0.9777315855026245,grad_norm: 0.836611571769658, iteration: 123130
loss: 0.9827162027359009,grad_norm: 0.894378124252885, iteration: 123131
loss: 0.9894927144050598,grad_norm: 0.971505340037832, iteration: 123132
loss: 1.0277948379516602,grad_norm: 0.999999114583331, iteration: 123133
loss: 1.0091071128845215,grad_norm: 0.9999992545671125, iteration: 123134
loss: 0.9932836294174194,grad_norm: 0.8537197366060266, iteration: 123135
loss: 0.9721231460571289,grad_norm: 0.737130629294086, iteration: 123136
loss: 0.9894847273826599,grad_norm: 0.6841886701285539, iteration: 123137
loss: 0.966491162776947,grad_norm: 0.7454975754706386, iteration: 123138
loss: 1.0106045007705688,grad_norm: 0.9618407628662873, iteration: 123139
loss: 0.9769214987754822,grad_norm: 0.9239094001442342, iteration: 123140
loss: 1.006604552268982,grad_norm: 0.9615459234127913, iteration: 123141
loss: 0.9619713425636292,grad_norm: 0.9377236887686228, iteration: 123142
loss: 1.0360380411148071,grad_norm: 0.9999991126535148, iteration: 123143
loss: 0.9971738457679749,grad_norm: 0.9107021260310982, iteration: 123144
loss: 0.9596761465072632,grad_norm: 0.7691337696681265, iteration: 123145
loss: 1.0041193962097168,grad_norm: 0.963635587348829, iteration: 123146
loss: 0.9963306188583374,grad_norm: 0.8918097509206157, iteration: 123147
loss: 0.9528552293777466,grad_norm: 0.922456051746182, iteration: 123148
loss: 0.989619255065918,grad_norm: 0.9999996049947595, iteration: 123149
loss: 0.96706223487854,grad_norm: 0.8269732157922859, iteration: 123150
loss: 1.0959726572036743,grad_norm: 0.9999996748485434, iteration: 123151
loss: 1.0121097564697266,grad_norm: 0.9999994473030707, iteration: 123152
loss: 1.0011318922042847,grad_norm: 0.7969046318151936, iteration: 123153
loss: 1.0347588062286377,grad_norm: 0.9999995600925179, iteration: 123154
loss: 1.0059564113616943,grad_norm: 0.9019234711467236, iteration: 123155
loss: 1.0522218942642212,grad_norm: 0.9999991859854945, iteration: 123156
loss: 1.077660083770752,grad_norm: 0.9999993700951857, iteration: 123157
loss: 1.0273451805114746,grad_norm: 0.9999993480487404, iteration: 123158
loss: 1.0210641622543335,grad_norm: 0.9056497205920657, iteration: 123159
loss: 1.0786962509155273,grad_norm: 0.9999998853667708, iteration: 123160
loss: 0.9304263591766357,grad_norm: 0.7825306239641584, iteration: 123161
loss: 0.9818357229232788,grad_norm: 0.9999990072750309, iteration: 123162
loss: 1.0447543859481812,grad_norm: 0.9111092425966131, iteration: 123163
loss: 1.106951355934143,grad_norm: 0.9999997692640137, iteration: 123164
loss: 0.9885188341140747,grad_norm: 0.8262935764168895, iteration: 123165
loss: 1.0660048723220825,grad_norm: 0.9999991904341692, iteration: 123166
loss: 1.0521557331085205,grad_norm: 0.9999992190920884, iteration: 123167
loss: 1.0740050077438354,grad_norm: 0.9999997504571079, iteration: 123168
loss: 1.0281065702438354,grad_norm: 0.9195353627028827, iteration: 123169
loss: 1.0191593170166016,grad_norm: 0.9999991982696315, iteration: 123170
loss: 1.061384677886963,grad_norm: 0.9999997116654452, iteration: 123171
loss: 0.9679760932922363,grad_norm: 0.9999991021010354, iteration: 123172
loss: 1.016937017440796,grad_norm: 0.9999992706704629, iteration: 123173
loss: 1.004528522491455,grad_norm: 0.8755101474045263, iteration: 123174
loss: 0.9837359189987183,grad_norm: 0.9999990251787836, iteration: 123175
loss: 1.0265476703643799,grad_norm: 0.8023399011157176, iteration: 123176
loss: 0.9696167707443237,grad_norm: 0.8260705743600898, iteration: 123177
loss: 1.0121954679489136,grad_norm: 0.8649947738035997, iteration: 123178
loss: 1.0754563808441162,grad_norm: 0.9999993246276668, iteration: 123179
loss: 0.9812616109848022,grad_norm: 0.9227078549958055, iteration: 123180
loss: 1.0099414587020874,grad_norm: 0.9999990325024203, iteration: 123181
loss: 1.112288475036621,grad_norm: 0.9999993410894324, iteration: 123182
loss: 1.0124074220657349,grad_norm: 0.9999998688329548, iteration: 123183
loss: 0.992212176322937,grad_norm: 0.873851882884842, iteration: 123184
loss: 1.0470545291900635,grad_norm: 0.9999992229216729, iteration: 123185
loss: 1.0891541242599487,grad_norm: 0.9999994844536506, iteration: 123186
loss: 1.0682600736618042,grad_norm: 0.9999990328005036, iteration: 123187
loss: 1.13437819480896,grad_norm: 0.9999999440576184, iteration: 123188
loss: 1.0330252647399902,grad_norm: 0.9993944397623589, iteration: 123189
loss: 0.9958903789520264,grad_norm: 0.7593776822429604, iteration: 123190
loss: 1.0640548467636108,grad_norm: 0.9503431676826954, iteration: 123191
loss: 1.0822184085845947,grad_norm: 0.9999998782009366, iteration: 123192
loss: 0.9857814908027649,grad_norm: 0.9556767591196957, iteration: 123193
loss: 1.4747040271759033,grad_norm: 0.9999998142634658, iteration: 123194
loss: 1.5212191343307495,grad_norm: 0.9999999896882077, iteration: 123195
loss: 1.0733898878097534,grad_norm: 0.9999998916883716, iteration: 123196
loss: 1.0469882488250732,grad_norm: 0.9999993068474411, iteration: 123197
loss: 1.0663765668869019,grad_norm: 0.8889439333433281, iteration: 123198
loss: 1.0694588422775269,grad_norm: 0.9999997271468231, iteration: 123199
loss: 1.0199494361877441,grad_norm: 0.9999999005977221, iteration: 123200
loss: 1.0219238996505737,grad_norm: 0.8711928944329497, iteration: 123201
loss: 1.119309663772583,grad_norm: 0.9999994035690835, iteration: 123202
loss: 0.9790988564491272,grad_norm: 0.979976491526245, iteration: 123203
loss: 1.0681407451629639,grad_norm: 0.9999996798645338, iteration: 123204
loss: 1.0725075006484985,grad_norm: 0.9999990504960332, iteration: 123205
loss: 1.0020527839660645,grad_norm: 0.7761426600191048, iteration: 123206
loss: 1.075313687324524,grad_norm: 0.9999993190544503, iteration: 123207
loss: 0.9762465357780457,grad_norm: 0.9179548425196061, iteration: 123208
loss: 1.051048994064331,grad_norm: 0.9999997508192028, iteration: 123209
loss: 1.4055896997451782,grad_norm: 0.9999997142517013, iteration: 123210
loss: 1.7426446676254272,grad_norm: 0.9999997260872165, iteration: 123211
loss: 1.166051983833313,grad_norm: 0.9999998977996122, iteration: 123212
loss: 1.0166910886764526,grad_norm: 0.8219646106273202, iteration: 123213
loss: 1.1192224025726318,grad_norm: 1.0000000031729497, iteration: 123214
loss: 1.568886637687683,grad_norm: 1.0000000035846195, iteration: 123215
loss: 1.0760246515274048,grad_norm: 0.9999991943078615, iteration: 123216
loss: 1.0910710096359253,grad_norm: 0.9999998939416457, iteration: 123217
loss: 0.992508053779602,grad_norm: 0.8171840063557458, iteration: 123218
loss: 1.184366226196289,grad_norm: 0.9999998254904306, iteration: 123219
loss: 0.9949314594268799,grad_norm: 0.9999992959875552, iteration: 123220
loss: 1.262863278388977,grad_norm: 0.9999999073148846, iteration: 123221
loss: 0.9883902668952942,grad_norm: 0.9999994575020105, iteration: 123222
loss: 1.0629451274871826,grad_norm: 0.9999990907514061, iteration: 123223
loss: 1.2342826128005981,grad_norm: 0.9999996220868247, iteration: 123224
loss: 1.2073140144348145,grad_norm: 0.9999998948774889, iteration: 123225
loss: 1.02705717086792,grad_norm: 0.8383363743221897, iteration: 123226
loss: 1.0646023750305176,grad_norm: 0.9999994806126167, iteration: 123227
loss: 1.0729881525039673,grad_norm: 0.999999669697404, iteration: 123228
loss: 1.2433840036392212,grad_norm: 0.9999994321825775, iteration: 123229
loss: 1.0957691669464111,grad_norm: 0.9999995374417644, iteration: 123230
loss: 1.0230941772460938,grad_norm: 0.9999991974397469, iteration: 123231
loss: 1.1383841037750244,grad_norm: 0.9999997862282831, iteration: 123232
loss: 1.1507411003112793,grad_norm: 0.9999998898165955, iteration: 123233
loss: 1.42402184009552,grad_norm: 0.9999999376565752, iteration: 123234
loss: 1.071937084197998,grad_norm: 0.9999990848641956, iteration: 123235
loss: 1.083118200302124,grad_norm: 0.9728015403255417, iteration: 123236
loss: 1.0418801307678223,grad_norm: 0.9999992891700756, iteration: 123237
loss: 1.016280174255371,grad_norm: 0.9999991733263995, iteration: 123238
loss: 1.0310018062591553,grad_norm: 0.7493535178323443, iteration: 123239
loss: 1.1679517030715942,grad_norm: 0.9999996157989365, iteration: 123240
loss: 0.99741530418396,grad_norm: 0.9442178641471202, iteration: 123241
loss: 1.2235829830169678,grad_norm: 0.999999776313737, iteration: 123242
loss: 1.0252554416656494,grad_norm: 0.817010413548607, iteration: 123243
loss: 1.0210984945297241,grad_norm: 0.9572452617953757, iteration: 123244
loss: 1.0290815830230713,grad_norm: 0.8730602031436815, iteration: 123245
loss: 1.0532257556915283,grad_norm: 0.999998981970206, iteration: 123246
loss: 1.0632919073104858,grad_norm: 0.9999994759350529, iteration: 123247
loss: 0.9953054189682007,grad_norm: 0.8487979525713206, iteration: 123248
loss: 1.0644731521606445,grad_norm: 0.999999844824463, iteration: 123249
loss: 1.0254366397857666,grad_norm: 0.9999992237588295, iteration: 123250
loss: 1.3931865692138672,grad_norm: 0.9999999245961547, iteration: 123251
loss: 1.122712254524231,grad_norm: 0.9999997879585293, iteration: 123252
loss: 1.0440102815628052,grad_norm: 0.9999991053878441, iteration: 123253
loss: 1.423359751701355,grad_norm: 0.999999755689307, iteration: 123254
loss: 1.0099849700927734,grad_norm: 0.9999996169970524, iteration: 123255
loss: 1.0616788864135742,grad_norm: 0.9999994757366523, iteration: 123256
loss: 1.0967648029327393,grad_norm: 0.9468519168634297, iteration: 123257
loss: 1.091160774230957,grad_norm: 0.8769028721120138, iteration: 123258
loss: 1.0495150089263916,grad_norm: 0.8617896248572487, iteration: 123259
loss: 1.074912667274475,grad_norm: 0.9999999581356299, iteration: 123260
loss: 1.3093916177749634,grad_norm: 0.9999998943419632, iteration: 123261
loss: 1.033170223236084,grad_norm: 0.947529020568799, iteration: 123262
loss: 1.2316298484802246,grad_norm: 0.9999995772381521, iteration: 123263
loss: 1.0904659032821655,grad_norm: 0.9999994992689472, iteration: 123264
loss: 1.0396020412445068,grad_norm: 0.9710935411755148, iteration: 123265
loss: 1.0173617601394653,grad_norm: 0.9999993689950697, iteration: 123266
loss: 1.0040758848190308,grad_norm: 0.9936563729142669, iteration: 123267
loss: 1.061364769935608,grad_norm: 0.9999992471403253, iteration: 123268
loss: 1.0930321216583252,grad_norm: 0.9999998091204361, iteration: 123269
loss: 1.0369040966033936,grad_norm: 0.9029634861671284, iteration: 123270
loss: 1.021464467048645,grad_norm: 0.9999991723784745, iteration: 123271
loss: 1.0117013454437256,grad_norm: 0.894799162930053, iteration: 123272
loss: 1.0250258445739746,grad_norm: 0.9999990832301658, iteration: 123273
loss: 1.0213985443115234,grad_norm: 0.9685775686859401, iteration: 123274
loss: 1.1062898635864258,grad_norm: 0.999999766092538, iteration: 123275
loss: 0.9944775700569153,grad_norm: 0.9999992024559379, iteration: 123276
loss: 1.2848451137542725,grad_norm: 0.9999997262311191, iteration: 123277
loss: 1.0935007333755493,grad_norm: 0.9999997710287934, iteration: 123278
loss: 1.0249992609024048,grad_norm: 0.991313895166726, iteration: 123279
loss: 1.0935609340667725,grad_norm: 0.9999996672628776, iteration: 123280
loss: 1.0402696132659912,grad_norm: 0.9010423538844368, iteration: 123281
loss: 1.1554948091506958,grad_norm: 0.9999999566327024, iteration: 123282
loss: 1.0969234704971313,grad_norm: 0.963248579805508, iteration: 123283
loss: 1.0849858522415161,grad_norm: 0.9999993849962636, iteration: 123284
loss: 1.0027295351028442,grad_norm: 0.999999160708643, iteration: 123285
loss: 1.0224430561065674,grad_norm: 0.9999991327614056, iteration: 123286
loss: 1.0807534456253052,grad_norm: 0.999999578022021, iteration: 123287
loss: 1.0596554279327393,grad_norm: 0.9999992868773846, iteration: 123288
loss: 1.0686993598937988,grad_norm: 0.9999990101854282, iteration: 123289
loss: 1.1512377262115479,grad_norm: 0.9999996370517042, iteration: 123290
loss: 1.0389834642410278,grad_norm: 0.9999995135875863, iteration: 123291
loss: 1.0886425971984863,grad_norm: 0.9999993754800967, iteration: 123292
loss: 1.2128692865371704,grad_norm: 0.9999997612864439, iteration: 123293
loss: 1.0462040901184082,grad_norm: 0.9300069436575353, iteration: 123294
loss: 1.0807017087936401,grad_norm: 0.9999992524822752, iteration: 123295
loss: 1.0709246397018433,grad_norm: 0.9999991119022467, iteration: 123296
loss: 1.068840742111206,grad_norm: 0.9999992913866705, iteration: 123297
loss: 1.0350645780563354,grad_norm: 0.9999991275428403, iteration: 123298
loss: 1.0484683513641357,grad_norm: 0.9999999044839991, iteration: 123299
loss: 1.241075873374939,grad_norm: 0.9999995663679759, iteration: 123300
loss: 1.1951947212219238,grad_norm: 0.9999999238076727, iteration: 123301
loss: 1.1050392389297485,grad_norm: 0.9999995102507546, iteration: 123302
loss: 1.1486986875534058,grad_norm: 0.9999992082718577, iteration: 123303
loss: 1.122543454170227,grad_norm: 0.999999149208009, iteration: 123304
loss: 1.0648865699768066,grad_norm: 0.9999994559062684, iteration: 123305
loss: 1.2551511526107788,grad_norm: 0.99999964394796, iteration: 123306
loss: 0.9844268560409546,grad_norm: 0.9161714831131613, iteration: 123307
loss: 1.012894868850708,grad_norm: 0.9999990324468501, iteration: 123308
loss: 1.1467669010162354,grad_norm: 0.999999855224136, iteration: 123309
loss: 1.0591057538986206,grad_norm: 0.9999999881754874, iteration: 123310
loss: 1.023767113685608,grad_norm: 0.7730146848854519, iteration: 123311
loss: 1.1442779302597046,grad_norm: 0.9999996263873662, iteration: 123312
loss: 1.0610920190811157,grad_norm: 0.9999994690754122, iteration: 123313
loss: 1.2887730598449707,grad_norm: 0.9999998245078507, iteration: 123314
loss: 1.186994194984436,grad_norm: 0.9999995601031175, iteration: 123315
loss: 1.0459388494491577,grad_norm: 0.9919582213427253, iteration: 123316
loss: 1.106283187866211,grad_norm: 0.999999809026745, iteration: 123317
loss: 0.995080292224884,grad_norm: 0.9344807676908214, iteration: 123318
loss: 1.0615594387054443,grad_norm: 0.9999997478665037, iteration: 123319
loss: 1.0852652788162231,grad_norm: 0.999999845465351, iteration: 123320
loss: 1.0487669706344604,grad_norm: 0.9999993859971242, iteration: 123321
loss: 1.1504544019699097,grad_norm: 0.9999999455587602, iteration: 123322
loss: 1.0530494451522827,grad_norm: 0.9999998254523862, iteration: 123323
loss: 1.0647391080856323,grad_norm: 0.9999995666272079, iteration: 123324
loss: 0.9742118716239929,grad_norm: 0.9999995169488051, iteration: 123325
loss: 0.9979972243309021,grad_norm: 0.952396229994239, iteration: 123326
loss: 0.9878914952278137,grad_norm: 0.9999991304785293, iteration: 123327
loss: 1.0068169832229614,grad_norm: 0.9999992661616175, iteration: 123328
loss: 1.1980818510055542,grad_norm: 0.9999997216338634, iteration: 123329
loss: 0.9898434281349182,grad_norm: 0.8920279885580878, iteration: 123330
loss: 1.0218032598495483,grad_norm: 0.999999836029844, iteration: 123331
loss: 1.0251152515411377,grad_norm: 0.9722440361310195, iteration: 123332
loss: 1.1095778942108154,grad_norm: 0.9999992763950647, iteration: 123333
loss: 1.0261667966842651,grad_norm: 0.7623175827650287, iteration: 123334
loss: 0.9745836853981018,grad_norm: 0.9999996541240431, iteration: 123335
loss: 1.4638590812683105,grad_norm: 0.999999931754885, iteration: 123336
loss: 1.0566281080245972,grad_norm: 0.9999991110189115, iteration: 123337
loss: 1.0145565271377563,grad_norm: 0.9999999061294713, iteration: 123338
loss: 1.050370693206787,grad_norm: 0.9999999723563722, iteration: 123339
loss: 1.056525707244873,grad_norm: 0.9999989887437586, iteration: 123340
loss: 0.9926607012748718,grad_norm: 0.8852386771239051, iteration: 123341
loss: 1.0774301290512085,grad_norm: 0.9999991323322891, iteration: 123342
loss: 1.0197439193725586,grad_norm: 0.9999994135199108, iteration: 123343
loss: 1.0692777633666992,grad_norm: 0.9999996792684528, iteration: 123344
loss: 1.0664516687393188,grad_norm: 0.9999996427305026, iteration: 123345
loss: 1.031178593635559,grad_norm: 0.9785899385385561, iteration: 123346
loss: 1.0294864177703857,grad_norm: 0.9793131734136497, iteration: 123347
loss: 1.0510928630828857,grad_norm: 0.9999989937457973, iteration: 123348
loss: 1.181758999824524,grad_norm: 0.9999991242130181, iteration: 123349
loss: 1.0277551412582397,grad_norm: 0.83071366823173, iteration: 123350
loss: 1.061468482017517,grad_norm: 0.9999991299181873, iteration: 123351
loss: 1.0203133821487427,grad_norm: 0.9999994369634487, iteration: 123352
loss: 1.0283528566360474,grad_norm: 0.9999994238049063, iteration: 123353
loss: 1.0189192295074463,grad_norm: 0.9999998747613036, iteration: 123354
loss: 1.0237631797790527,grad_norm: 0.9999995399636233, iteration: 123355
loss: 1.001507043838501,grad_norm: 0.9556499129789252, iteration: 123356
loss: 1.335868000984192,grad_norm: 0.9999997731355108, iteration: 123357
loss: 1.0091403722763062,grad_norm: 0.8779197097087267, iteration: 123358
loss: 1.142828345298767,grad_norm: 0.9999996306774045, iteration: 123359
loss: 1.025823950767517,grad_norm: 0.7296540990795156, iteration: 123360
loss: 1.0031423568725586,grad_norm: 0.7977517828635747, iteration: 123361
loss: 1.0364477634429932,grad_norm: 0.9999993876371172, iteration: 123362
loss: 0.9960485100746155,grad_norm: 0.8612473100095596, iteration: 123363
loss: 1.0380239486694336,grad_norm: 0.9999993607020695, iteration: 123364
loss: 0.9854154586791992,grad_norm: 0.9999991600990261, iteration: 123365
loss: 1.161978840827942,grad_norm: 0.9999996030768309, iteration: 123366
loss: 1.119006872177124,grad_norm: 0.9999998297054022, iteration: 123367
loss: 1.013216495513916,grad_norm: 0.9584817798890612, iteration: 123368
loss: 1.0833717584609985,grad_norm: 0.9999998796965182, iteration: 123369
loss: 1.1404004096984863,grad_norm: 0.999999875232275, iteration: 123370
loss: 1.0669047832489014,grad_norm: 0.999999853818307, iteration: 123371
loss: 1.2020599842071533,grad_norm: 0.999999831853748, iteration: 123372
loss: 0.9815024137496948,grad_norm: 0.9143643449940115, iteration: 123373
loss: 1.1211140155792236,grad_norm: 0.9999994962566185, iteration: 123374
loss: 0.9850011467933655,grad_norm: 0.8475625009309404, iteration: 123375
loss: 1.0881524085998535,grad_norm: 0.9999996193264884, iteration: 123376
loss: 1.0070468187332153,grad_norm: 0.9999996453677432, iteration: 123377
loss: 1.079766869544983,grad_norm: 0.999999881406153, iteration: 123378
loss: 1.0547845363616943,grad_norm: 0.9999998898390424, iteration: 123379
loss: 0.986045777797699,grad_norm: 0.8572414162175075, iteration: 123380
loss: 1.045319676399231,grad_norm: 0.9305648287991876, iteration: 123381
loss: 0.9637950658798218,grad_norm: 0.9174856749373838, iteration: 123382
loss: 0.9510588645935059,grad_norm: 0.999999561235188, iteration: 123383
loss: 1.2935497760772705,grad_norm: 0.9999996818294572, iteration: 123384
loss: 0.9762844443321228,grad_norm: 0.8984724759674928, iteration: 123385
loss: 0.9751657247543335,grad_norm: 0.9440266706957076, iteration: 123386
loss: 1.0724444389343262,grad_norm: 0.9999992744140401, iteration: 123387
loss: 1.0565966367721558,grad_norm: 0.9999993641418742, iteration: 123388
loss: 1.0412254333496094,grad_norm: 0.9999994291230172, iteration: 123389
loss: 1.2081139087677002,grad_norm: 0.9999998405350063, iteration: 123390
loss: 1.208005428314209,grad_norm: 0.9999995504926191, iteration: 123391
loss: 1.0136860609054565,grad_norm: 0.8637950031976055, iteration: 123392
loss: 0.9942334294319153,grad_norm: 0.8044345448755726, iteration: 123393
loss: 1.118355631828308,grad_norm: 0.9999998292734107, iteration: 123394
loss: 1.0191149711608887,grad_norm: 0.9999995725001993, iteration: 123395
loss: 1.0638797283172607,grad_norm: 0.9999992286877305, iteration: 123396
loss: 1.1374679803848267,grad_norm: 0.9999998421412705, iteration: 123397
loss: 1.0014244318008423,grad_norm: 0.9064622353103265, iteration: 123398
loss: 1.0720120668411255,grad_norm: 0.9627298106896321, iteration: 123399
loss: 1.0784695148468018,grad_norm: 0.9999994654292184, iteration: 123400
loss: 0.992704451084137,grad_norm: 0.7473210466276586, iteration: 123401
loss: 1.093790054321289,grad_norm: 0.9999999479658868, iteration: 123402
loss: 1.062754511833191,grad_norm: 0.9999991805214443, iteration: 123403
loss: 1.017667293548584,grad_norm: 0.9999993648579761, iteration: 123404
loss: 1.1096152067184448,grad_norm: 0.9999997338029238, iteration: 123405
loss: 1.0330991744995117,grad_norm: 0.9793160597664538, iteration: 123406
loss: 1.0522797107696533,grad_norm: 0.9999995695331919, iteration: 123407
loss: 1.1177780628204346,grad_norm: 0.9999997525414256, iteration: 123408
loss: 1.0013371706008911,grad_norm: 0.9999991255654412, iteration: 123409
loss: 0.9979652166366577,grad_norm: 0.9999993042709541, iteration: 123410
loss: 1.0197523832321167,grad_norm: 0.9373410917641106, iteration: 123411
loss: 1.0212440490722656,grad_norm: 0.9999991479899464, iteration: 123412
loss: 1.066939353942871,grad_norm: 0.9999992890435484, iteration: 123413
loss: 0.9922066330909729,grad_norm: 0.9999992074051042, iteration: 123414
loss: 0.995607316493988,grad_norm: 0.9153777674710084, iteration: 123415
loss: 1.1666275262832642,grad_norm: 0.9999993171203575, iteration: 123416
loss: 1.029119610786438,grad_norm: 0.9999992378625872, iteration: 123417
loss: 0.9950390458106995,grad_norm: 0.9920908217705968, iteration: 123418
loss: 0.9965400695800781,grad_norm: 0.8663140449126104, iteration: 123419
loss: 1.014098048210144,grad_norm: 0.9999999366377927, iteration: 123420
loss: 1.033345341682434,grad_norm: 0.9999990988662202, iteration: 123421
loss: 1.1539266109466553,grad_norm: 0.999999924550487, iteration: 123422
loss: 1.3029533624649048,grad_norm: 0.9999999028812905, iteration: 123423
loss: 0.9772298336029053,grad_norm: 0.9999992850961077, iteration: 123424
loss: 1.2125773429870605,grad_norm: 0.9999997055566148, iteration: 123425
loss: 1.002953290939331,grad_norm: 0.9999997737946998, iteration: 123426
loss: 1.0008846521377563,grad_norm: 0.883620562405828, iteration: 123427
loss: 1.0191986560821533,grad_norm: 0.8076612914339908, iteration: 123428
loss: 1.0641266107559204,grad_norm: 0.9999990576854727, iteration: 123429
loss: 1.0118924379348755,grad_norm: 0.9112887823998497, iteration: 123430
loss: 1.0063867568969727,grad_norm: 0.9999991209169663, iteration: 123431
loss: 0.9816583395004272,grad_norm: 0.8823916938681937, iteration: 123432
loss: 1.015275001525879,grad_norm: 0.9999991758652955, iteration: 123433
loss: 1.040035605430603,grad_norm: 0.8863281838979346, iteration: 123434
loss: 1.051337718963623,grad_norm: 0.9999995129397956, iteration: 123435
loss: 1.204428791999817,grad_norm: 0.9999999162369296, iteration: 123436
loss: 1.3531633615493774,grad_norm: 0.9999996599158762, iteration: 123437
loss: 1.0449390411376953,grad_norm: 0.999999128142284, iteration: 123438
loss: 1.0097544193267822,grad_norm: 0.8967536496057014, iteration: 123439
loss: 1.0929267406463623,grad_norm: 0.9999999597563779, iteration: 123440
loss: 0.9845265746116638,grad_norm: 0.965761490170494, iteration: 123441
loss: 1.0706672668457031,grad_norm: 0.9999993076344678, iteration: 123442
loss: 1.1158215999603271,grad_norm: 0.9999996281309304, iteration: 123443
loss: 1.0246533155441284,grad_norm: 0.9719242803503804, iteration: 123444
loss: 1.0043538808822632,grad_norm: 0.9999990169848877, iteration: 123445
loss: 0.9990056157112122,grad_norm: 0.7033925367451511, iteration: 123446
loss: 1.07991361618042,grad_norm: 0.9999997049161523, iteration: 123447
loss: 1.1662954092025757,grad_norm: 0.9999995589168854, iteration: 123448
loss: 1.0754343271255493,grad_norm: 0.9999996306561004, iteration: 123449
loss: 1.1262681484222412,grad_norm: 0.9999998869118536, iteration: 123450
loss: 0.9904565215110779,grad_norm: 0.8144460107373924, iteration: 123451
loss: 1.0112158060073853,grad_norm: 0.8115830919185757, iteration: 123452
loss: 1.0160698890686035,grad_norm: 0.9999997815201628, iteration: 123453
loss: 0.9863651394844055,grad_norm: 0.9491845069683983, iteration: 123454
loss: 1.0202245712280273,grad_norm: 1.000000034959421, iteration: 123455
loss: 0.955017626285553,grad_norm: 0.9217052932649132, iteration: 123456
loss: 0.9973106384277344,grad_norm: 0.9999990162077534, iteration: 123457
loss: 1.101088047027588,grad_norm: 0.9999996945542832, iteration: 123458
loss: 1.0223602056503296,grad_norm: 0.74922954438644, iteration: 123459
loss: 1.0567185878753662,grad_norm: 0.9999994337015405, iteration: 123460
loss: 1.0371750593185425,grad_norm: 0.9724246847179372, iteration: 123461
loss: 1.0307143926620483,grad_norm: 0.9999991744999415, iteration: 123462
loss: 0.9685313105583191,grad_norm: 0.9999994192998433, iteration: 123463
loss: 1.0605262517929077,grad_norm: 0.9999995552295662, iteration: 123464
loss: 1.0329493284225464,grad_norm: 0.9999998769444868, iteration: 123465
loss: 0.9813194870948792,grad_norm: 0.999999431728204, iteration: 123466
loss: 1.1203827857971191,grad_norm: 0.9999998418950055, iteration: 123467
loss: 1.0551496744155884,grad_norm: 0.9999993782959004, iteration: 123468
loss: 1.001838207244873,grad_norm: 0.7974770864904026, iteration: 123469
loss: 1.0049210786819458,grad_norm: 0.7883941643776977, iteration: 123470
loss: 1.067293643951416,grad_norm: 0.9999992229888173, iteration: 123471
loss: 0.9664521813392639,grad_norm: 0.9999992055387364, iteration: 123472
loss: 1.076724886894226,grad_norm: 0.9999990441062954, iteration: 123473
loss: 1.0218886137008667,grad_norm: 0.9999992527303629, iteration: 123474
loss: 1.0245075225830078,grad_norm: 0.9999996870367913, iteration: 123475
loss: 1.0706523656845093,grad_norm: 0.9999993567567386, iteration: 123476
loss: 1.0448962450027466,grad_norm: 0.9105494913571258, iteration: 123477
loss: 0.9945928454399109,grad_norm: 0.881407746540673, iteration: 123478
loss: 1.1801621913909912,grad_norm: 1.0000000195226872, iteration: 123479
loss: 0.9707714319229126,grad_norm: 0.9999999304461334, iteration: 123480
loss: 1.0291577577590942,grad_norm: 0.9999999091746455, iteration: 123481
loss: 1.023179531097412,grad_norm: 0.9531602512914508, iteration: 123482
loss: 1.0204336643218994,grad_norm: 0.9400440942286905, iteration: 123483
loss: 1.1763958930969238,grad_norm: 0.9999997280077406, iteration: 123484
loss: 1.0704939365386963,grad_norm: 0.9999993798070744, iteration: 123485
loss: 1.012896180152893,grad_norm: 0.9999992477320563, iteration: 123486
loss: 1.0119270086288452,grad_norm: 0.9999995475093402, iteration: 123487
loss: 0.9819896817207336,grad_norm: 0.8673473379304681, iteration: 123488
loss: 1.137917160987854,grad_norm: 0.999999329119881, iteration: 123489
loss: 1.1327784061431885,grad_norm: 0.9999998702055167, iteration: 123490
loss: 1.0204888582229614,grad_norm: 0.8836383601378126, iteration: 123491
loss: 1.0059643983840942,grad_norm: 0.8995174112497353, iteration: 123492
loss: 1.0509470701217651,grad_norm: 0.9999998989149615, iteration: 123493
loss: 0.9936567544937134,grad_norm: 0.9999994670198771, iteration: 123494
loss: 1.032230019569397,grad_norm: 0.9999998970336379, iteration: 123495
loss: 0.9701312780380249,grad_norm: 0.9999991295147522, iteration: 123496
loss: 1.0964932441711426,grad_norm: 0.9999998378943263, iteration: 123497
loss: 1.0408507585525513,grad_norm: 0.9999991949535256, iteration: 123498
loss: 1.0866491794586182,grad_norm: 0.9999994933867068, iteration: 123499
loss: 1.1198810338974,grad_norm: 0.9702013031281992, iteration: 123500
loss: 1.0185314416885376,grad_norm: 0.8232194182888755, iteration: 123501
loss: 1.0264652967453003,grad_norm: 0.9999991556225466, iteration: 123502
loss: 1.0085299015045166,grad_norm: 0.7735963777997168, iteration: 123503
loss: 1.0565848350524902,grad_norm: 0.999999861668029, iteration: 123504
loss: 1.1381444931030273,grad_norm: 0.9999997959561617, iteration: 123505
loss: 1.0116922855377197,grad_norm: 0.9292670891627945, iteration: 123506
loss: 1.0069284439086914,grad_norm: 0.9999993955288773, iteration: 123507
loss: 0.97392737865448,grad_norm: 0.982293752853978, iteration: 123508
loss: 0.9933770895004272,grad_norm: 0.9414096612821047, iteration: 123509
loss: 1.073508858680725,grad_norm: 0.9999992610206286, iteration: 123510
loss: 1.0323723554611206,grad_norm: 0.9037917782785244, iteration: 123511
loss: 1.1110268831253052,grad_norm: 0.9999997155651985, iteration: 123512
loss: 1.0086376667022705,grad_norm: 0.9999994898882887, iteration: 123513
loss: 1.1477614641189575,grad_norm: 0.9999997826855019, iteration: 123514
loss: 0.9810077548027039,grad_norm: 0.871199332202521, iteration: 123515
loss: 0.9748750329017639,grad_norm: 0.9999993394034382, iteration: 123516
loss: 1.03317129611969,grad_norm: 0.9999992477186678, iteration: 123517
loss: 1.1418572664260864,grad_norm: 0.999999632252311, iteration: 123518
loss: 1.0659819841384888,grad_norm: 0.9999990769858035, iteration: 123519
loss: 1.004647135734558,grad_norm: 0.8506815945126414, iteration: 123520
loss: 1.3153936862945557,grad_norm: 0.9999999537603216, iteration: 123521
loss: 1.0950597524642944,grad_norm: 0.9733748077107097, iteration: 123522
loss: 0.9807301759719849,grad_norm: 0.9999990844135385, iteration: 123523
loss: 1.031266450881958,grad_norm: 0.9999991446229369, iteration: 123524
loss: 1.0008987188339233,grad_norm: 0.9999999359554761, iteration: 123525
loss: 1.0073974132537842,grad_norm: 0.9999997508830075, iteration: 123526
loss: 1.043790340423584,grad_norm: 0.9999992398320549, iteration: 123527
loss: 1.0415706634521484,grad_norm: 0.9999993171092804, iteration: 123528
loss: 1.0247421264648438,grad_norm: 0.9999996667919517, iteration: 123529
loss: 1.101657748222351,grad_norm: 0.9999992092151048, iteration: 123530
loss: 1.0125739574432373,grad_norm: 0.8403688008239731, iteration: 123531
loss: 1.0445653200149536,grad_norm: 0.9999997513202159, iteration: 123532
loss: 1.0370904207229614,grad_norm: 0.99999924302153, iteration: 123533
loss: 1.146641731262207,grad_norm: 0.9999991213130741, iteration: 123534
loss: 1.056633472442627,grad_norm: 0.9396898361426519, iteration: 123535
loss: 1.0221574306488037,grad_norm: 0.9999993545107594, iteration: 123536
loss: 1.0370875597000122,grad_norm: 0.9999995311460012, iteration: 123537
loss: 1.0817307233810425,grad_norm: 0.9999997885351251, iteration: 123538
loss: 1.0673487186431885,grad_norm: 0.9999993101507266, iteration: 123539
loss: 0.985975444316864,grad_norm: 0.8729544182695899, iteration: 123540
loss: 0.9965577125549316,grad_norm: 0.8743541615581466, iteration: 123541
loss: 1.0408518314361572,grad_norm: 0.8818406249257352, iteration: 123542
loss: 1.080741286277771,grad_norm: 0.9999993449773619, iteration: 123543
loss: 1.1970469951629639,grad_norm: 0.9999990749888669, iteration: 123544
loss: 1.2949272394180298,grad_norm: 0.9999998326491635, iteration: 123545
loss: 0.9910092949867249,grad_norm: 0.999999626594489, iteration: 123546
loss: 1.0627397298812866,grad_norm: 0.9999998510905013, iteration: 123547
loss: 1.1369726657867432,grad_norm: 0.9999998654312318, iteration: 123548
loss: 1.3741285800933838,grad_norm: 0.9999998799267485, iteration: 123549
loss: 1.003412127494812,grad_norm: 0.999999125760882, iteration: 123550
loss: 1.1411436796188354,grad_norm: 0.9999997777939296, iteration: 123551
loss: 1.0226937532424927,grad_norm: 0.9999990870667861, iteration: 123552
loss: 1.0156546831130981,grad_norm: 0.999999054242194, iteration: 123553
loss: 1.3412249088287354,grad_norm: 0.9999998994635386, iteration: 123554
loss: 1.035279631614685,grad_norm: 0.9999991761131694, iteration: 123555
loss: 0.9875851273536682,grad_norm: 0.999999788281698, iteration: 123556
loss: 1.0175226926803589,grad_norm: 0.9999991652213814, iteration: 123557
loss: 0.9584165215492249,grad_norm: 0.9999992550933213, iteration: 123558
loss: 0.9969857931137085,grad_norm: 0.9999994374331094, iteration: 123559
loss: 1.106122612953186,grad_norm: 0.9999998372641566, iteration: 123560
loss: 1.0252920389175415,grad_norm: 0.8822033541117998, iteration: 123561
loss: 1.0190632343292236,grad_norm: 0.999999540738954, iteration: 123562
loss: 1.0189510583877563,grad_norm: 0.9999991814648167, iteration: 123563
loss: 1.0206918716430664,grad_norm: 0.9999995685837578, iteration: 123564
loss: 1.1145974397659302,grad_norm: 0.9999998799844472, iteration: 123565
loss: 1.034393310546875,grad_norm: 0.8930787251020212, iteration: 123566
loss: 1.1047559976577759,grad_norm: 0.9999994183963881, iteration: 123567
loss: 1.026286244392395,grad_norm: 0.999999893733657, iteration: 123568
loss: 1.0359253883361816,grad_norm: 0.9999991195787032, iteration: 123569
loss: 1.0088627338409424,grad_norm: 0.9999994811485816, iteration: 123570
loss: 1.0526959896087646,grad_norm: 0.999999089183545, iteration: 123571
loss: 1.3742166757583618,grad_norm: 0.9999999216157724, iteration: 123572
loss: 0.9935014247894287,grad_norm: 0.9465310287726821, iteration: 123573
loss: 1.2193093299865723,grad_norm: 0.9999994296769558, iteration: 123574
loss: 1.043248176574707,grad_norm: 0.9999994288844752, iteration: 123575
loss: 1.0248416662216187,grad_norm: 0.999999746469733, iteration: 123576
loss: 1.0364223718643188,grad_norm: 0.9999994578392908, iteration: 123577
loss: 1.0690025091171265,grad_norm: 0.999999373447373, iteration: 123578
loss: 1.0006426572799683,grad_norm: 0.9999999015456305, iteration: 123579
loss: 1.0323275327682495,grad_norm: 0.9999991413661876, iteration: 123580
loss: 0.984522819519043,grad_norm: 0.8986409244492284, iteration: 123581
loss: 1.1798032522201538,grad_norm: 0.9999996552132778, iteration: 123582
loss: 0.9876229763031006,grad_norm: 0.9999992373477817, iteration: 123583
loss: 0.9884582757949829,grad_norm: 0.980700412052212, iteration: 123584
loss: 1.0891271829605103,grad_norm: 0.99999892199804, iteration: 123585
loss: 0.9734641909599304,grad_norm: 0.7677681455384067, iteration: 123586
loss: 1.121358036994934,grad_norm: 0.9999999016446913, iteration: 123587
loss: 0.9829761981964111,grad_norm: 0.999999120118224, iteration: 123588
loss: 0.9743829369544983,grad_norm: 0.896006222365629, iteration: 123589
loss: 1.0200449228286743,grad_norm: 0.9037795304179336, iteration: 123590
loss: 1.1973999738693237,grad_norm: 0.9999997872649855, iteration: 123591
loss: 1.1270345449447632,grad_norm: 0.9999997470616878, iteration: 123592
loss: 1.02085280418396,grad_norm: 0.9864141759122289, iteration: 123593
loss: 1.1489018201828003,grad_norm: 0.9999990181167181, iteration: 123594
loss: 1.0747488737106323,grad_norm: 0.9999990762754558, iteration: 123595
loss: 1.1112715005874634,grad_norm: 0.9999997196586201, iteration: 123596
loss: 1.0181277990341187,grad_norm: 0.7580359473274305, iteration: 123597
loss: 0.9901553988456726,grad_norm: 0.8563697898194982, iteration: 123598
loss: 1.041029453277588,grad_norm: 0.9999992152762964, iteration: 123599
loss: 1.061224102973938,grad_norm: 0.9999998929223978, iteration: 123600
loss: 1.064052939414978,grad_norm: 0.9999990459778844, iteration: 123601
loss: 1.0001215934753418,grad_norm: 0.8190713816972249, iteration: 123602
loss: 1.0297291278839111,grad_norm: 0.9999989888901679, iteration: 123603
loss: 1.055680751800537,grad_norm: 0.9999990740648713, iteration: 123604
loss: 1.029484748840332,grad_norm: 0.9999994522663204, iteration: 123605
loss: 0.9902973771095276,grad_norm: 0.9999993906821725, iteration: 123606
loss: 1.0451736450195312,grad_norm: 0.9999999742121788, iteration: 123607
loss: 1.02525794506073,grad_norm: 0.8575587091855515, iteration: 123608
loss: 1.0174963474273682,grad_norm: 0.7895912568015908, iteration: 123609
loss: 1.045475721359253,grad_norm: 0.9999994515599775, iteration: 123610
loss: 1.0145153999328613,grad_norm: 0.9167610100474188, iteration: 123611
loss: 1.0771347284317017,grad_norm: 0.9999996120944379, iteration: 123612
loss: 1.0662566423416138,grad_norm: 0.9999999035424142, iteration: 123613
loss: 0.9823787808418274,grad_norm: 0.9999991245744085, iteration: 123614
loss: 1.2977932691574097,grad_norm: 0.9999999546884443, iteration: 123615
loss: 1.1065399646759033,grad_norm: 0.9999991479232849, iteration: 123616
loss: 0.9984845519065857,grad_norm: 0.9069444328889927, iteration: 123617
loss: 1.0288575887680054,grad_norm: 0.9513573419572221, iteration: 123618
loss: 1.168350100517273,grad_norm: 0.999999798770021, iteration: 123619
loss: 1.1540138721466064,grad_norm: 0.9999997103136857, iteration: 123620
loss: 0.9767273664474487,grad_norm: 0.8298411501715435, iteration: 123621
loss: 0.9717828035354614,grad_norm: 0.9999993918532994, iteration: 123622
loss: 1.0245497226715088,grad_norm: 0.858338701266048, iteration: 123623
loss: 1.062484622001648,grad_norm: 0.99999989553781, iteration: 123624
loss: 1.1033282279968262,grad_norm: 0.9999999760925977, iteration: 123625
loss: 1.0422090291976929,grad_norm: 0.9999996295416433, iteration: 123626
loss: 1.1749069690704346,grad_norm: 0.9999991312406106, iteration: 123627
loss: 0.974337637424469,grad_norm: 0.9887567519139785, iteration: 123628
loss: 1.0058276653289795,grad_norm: 0.7314103084687602, iteration: 123629
loss: 0.9978988766670227,grad_norm: 0.9999992867044123, iteration: 123630
loss: 1.0157463550567627,grad_norm: 0.9999991292351833, iteration: 123631
loss: 0.9742951989173889,grad_norm: 0.9999996095126132, iteration: 123632
loss: 1.039550542831421,grad_norm: 0.9999994334194259, iteration: 123633
loss: 1.0020458698272705,grad_norm: 0.999999101633459, iteration: 123634
loss: 0.9663913249969482,grad_norm: 0.8953095660368039, iteration: 123635
loss: 0.9982298016548157,grad_norm: 0.8045359621502961, iteration: 123636
loss: 1.0242433547973633,grad_norm: 0.7745100391812528, iteration: 123637
loss: 1.0467950105667114,grad_norm: 0.9999992140834693, iteration: 123638
loss: 0.9807731509208679,grad_norm: 0.9997905022202106, iteration: 123639
loss: 1.0051941871643066,grad_norm: 0.9999993087966637, iteration: 123640
loss: 1.173638939857483,grad_norm: 0.9999998029008276, iteration: 123641
loss: 1.0307860374450684,grad_norm: 0.8582277462618171, iteration: 123642
loss: 1.0784257650375366,grad_norm: 0.9999991526471526, iteration: 123643
loss: 0.9863435626029968,grad_norm: 0.833794508346401, iteration: 123644
loss: 0.9673221111297607,grad_norm: 0.8981504050839136, iteration: 123645
loss: 1.0629152059555054,grad_norm: 0.9999990540344204, iteration: 123646
loss: 1.013735055923462,grad_norm: 0.8501405617892211, iteration: 123647
loss: 1.1813030242919922,grad_norm: 0.9999991248565161, iteration: 123648
loss: 1.0181803703308105,grad_norm: 0.9680573817537352, iteration: 123649
loss: 1.057719111442566,grad_norm: 0.9999996661548411, iteration: 123650
loss: 1.0134776830673218,grad_norm: 0.9382583738803794, iteration: 123651
loss: 0.9940611124038696,grad_norm: 0.831274804167048, iteration: 123652
loss: 0.9979865550994873,grad_norm: 0.9999994883451049, iteration: 123653
loss: 1.0173896551132202,grad_norm: 0.8294061227796011, iteration: 123654
loss: 1.0467214584350586,grad_norm: 0.9999991681774592, iteration: 123655
loss: 0.9938721060752869,grad_norm: 0.9361478606673084, iteration: 123656
loss: 1.0637935400009155,grad_norm: 0.9999995891171934, iteration: 123657
loss: 1.0821324586868286,grad_norm: 0.999999214627963, iteration: 123658
loss: 1.0295660495758057,grad_norm: 0.9999998935365029, iteration: 123659
loss: 0.9930052757263184,grad_norm: 0.8805606603043594, iteration: 123660
loss: 1.026942491531372,grad_norm: 0.8760562778339805, iteration: 123661
loss: 1.0033040046691895,grad_norm: 0.8654827290552736, iteration: 123662
loss: 1.0640655755996704,grad_norm: 0.9999990603880686, iteration: 123663
loss: 1.1872260570526123,grad_norm: 0.9999995422902145, iteration: 123664
loss: 0.9923238158226013,grad_norm: 0.9999995325639868, iteration: 123665
loss: 0.9723321199417114,grad_norm: 0.999999338785342, iteration: 123666
loss: 1.100805640220642,grad_norm: 0.999754056286854, iteration: 123667
loss: 1.1198374032974243,grad_norm: 0.9094238486617187, iteration: 123668
loss: 1.0032238960266113,grad_norm: 0.792660176705845, iteration: 123669
loss: 1.0206546783447266,grad_norm: 0.7987886444755712, iteration: 123670
loss: 0.9885531067848206,grad_norm: 0.8608161108815972, iteration: 123671
loss: 1.0726664066314697,grad_norm: 0.9999990399841198, iteration: 123672
loss: 1.008597731590271,grad_norm: 0.9706763618051756, iteration: 123673
loss: 1.2193942070007324,grad_norm: 0.9999995624092001, iteration: 123674
loss: 1.0002754926681519,grad_norm: 0.8815517480543053, iteration: 123675
loss: 0.9680753946304321,grad_norm: 0.9306374204524284, iteration: 123676
loss: 1.0429607629776,grad_norm: 0.9484769853521233, iteration: 123677
loss: 1.0441683530807495,grad_norm: 0.8750906571302247, iteration: 123678
loss: 1.1370469331741333,grad_norm: 0.9999991627895367, iteration: 123679
loss: 1.0162239074707031,grad_norm: 0.9999992007999849, iteration: 123680
loss: 1.0004456043243408,grad_norm: 0.9999990136476754, iteration: 123681
loss: 1.0102112293243408,grad_norm: 0.9041338482308063, iteration: 123682
loss: 1.0943607091903687,grad_norm: 0.9999991500025024, iteration: 123683
loss: 1.033464789390564,grad_norm: 0.9809432757273026, iteration: 123684
loss: 0.9891537427902222,grad_norm: 0.9999990987279946, iteration: 123685
loss: 1.0952261686325073,grad_norm: 0.9999999096220479, iteration: 123686
loss: 1.0417885780334473,grad_norm: 0.9999994407207329, iteration: 123687
loss: 1.0311710834503174,grad_norm: 0.9722490485704683, iteration: 123688
loss: 1.0291755199432373,grad_norm: 0.9999992345888917, iteration: 123689
loss: 1.0201504230499268,grad_norm: 0.9673221470427104, iteration: 123690
loss: 0.9991066455841064,grad_norm: 0.8462064942721359, iteration: 123691
loss: 1.0150943994522095,grad_norm: 0.9999990747355653, iteration: 123692
loss: 1.034769058227539,grad_norm: 0.999999438209456, iteration: 123693
loss: 1.0064507722854614,grad_norm: 0.8924814169526196, iteration: 123694
loss: 1.0418440103530884,grad_norm: 0.9999996993921296, iteration: 123695
loss: 1.066293478012085,grad_norm: 0.9999992797790511, iteration: 123696
loss: 1.0241390466690063,grad_norm: 0.8373946775209381, iteration: 123697
loss: 1.004125952720642,grad_norm: 0.8860921445236384, iteration: 123698
loss: 1.0224417448043823,grad_norm: 0.9999991832994234, iteration: 123699
loss: 1.0646013021469116,grad_norm: 0.9999999499490689, iteration: 123700
loss: 1.0279150009155273,grad_norm: 0.9106237694711193, iteration: 123701
loss: 0.9944276809692383,grad_norm: 0.9999991329245499, iteration: 123702
loss: 0.9966368079185486,grad_norm: 0.8852248095320554, iteration: 123703
loss: 1.007948398590088,grad_norm: 0.9999991293842303, iteration: 123704
loss: 1.0166301727294922,grad_norm: 0.8865333480227335, iteration: 123705
loss: 1.032378911972046,grad_norm: 0.9999991395171653, iteration: 123706
loss: 1.0154502391815186,grad_norm: 0.9999999739594372, iteration: 123707
loss: 0.9720972776412964,grad_norm: 0.838983697963551, iteration: 123708
loss: 1.2843985557556152,grad_norm: 0.9999998567338116, iteration: 123709
loss: 0.9850607514381409,grad_norm: 0.9999990475348509, iteration: 123710
loss: 0.9993865489959717,grad_norm: 0.9915301946698392, iteration: 123711
loss: 1.0256059169769287,grad_norm: 0.9999994901132916, iteration: 123712
loss: 1.038199543952942,grad_norm: 0.8094729797227974, iteration: 123713
loss: 0.9935853481292725,grad_norm: 0.8211256936810333, iteration: 123714
loss: 0.9982089996337891,grad_norm: 0.999999008605453, iteration: 123715
loss: 1.0233595371246338,grad_norm: 0.9999994438406479, iteration: 123716
loss: 0.9957850575447083,grad_norm: 0.8857128873910225, iteration: 123717
loss: 1.2037509679794312,grad_norm: 0.9999997990582444, iteration: 123718
loss: 1.0387920141220093,grad_norm: 0.9999991709280885, iteration: 123719
loss: 0.9843652248382568,grad_norm: 0.9999990777034892, iteration: 123720
loss: 0.9876828193664551,grad_norm: 0.8170226658121681, iteration: 123721
loss: 0.9943681955337524,grad_norm: 0.9724279980906343, iteration: 123722
loss: 1.1287356615066528,grad_norm: 0.9999992184630788, iteration: 123723
loss: 1.06586492061615,grad_norm: 0.9999993400532667, iteration: 123724
loss: 1.0158339738845825,grad_norm: 0.9999996763879746, iteration: 123725
loss: 1.0474015474319458,grad_norm: 0.9999992489852205, iteration: 123726
loss: 0.9939938187599182,grad_norm: 0.9999989631681518, iteration: 123727
loss: 1.0449144840240479,grad_norm: 0.8950991307321521, iteration: 123728
loss: 1.0258939266204834,grad_norm: 0.8958336126112338, iteration: 123729
loss: 1.0076011419296265,grad_norm: 0.8518818424122451, iteration: 123730
loss: 0.9981116652488708,grad_norm: 0.9876706477886792, iteration: 123731
loss: 1.1270614862442017,grad_norm: 0.9999999263708012, iteration: 123732
loss: 1.016414761543274,grad_norm: 0.9053635416590109, iteration: 123733
loss: 1.0317575931549072,grad_norm: 0.9999999695994072, iteration: 123734
loss: 1.0323777198791504,grad_norm: 0.8918843253004477, iteration: 123735
loss: 1.0236552953720093,grad_norm: 0.9999991341306252, iteration: 123736
loss: 1.1508198976516724,grad_norm: 0.9999997527767187, iteration: 123737
loss: 1.0083826780319214,grad_norm: 0.878780859522027, iteration: 123738
loss: 1.047749638557434,grad_norm: 0.9999991063627157, iteration: 123739
loss: 1.0613449811935425,grad_norm: 0.9999995359979083, iteration: 123740
loss: 1.070446252822876,grad_norm: 0.8656451271192795, iteration: 123741
loss: 0.9855839014053345,grad_norm: 0.9999991609928829, iteration: 123742
loss: 0.9908541440963745,grad_norm: 0.9999991973180781, iteration: 123743
loss: 1.0417677164077759,grad_norm: 0.9999993739229668, iteration: 123744
loss: 1.0244461297988892,grad_norm: 0.9999991574074873, iteration: 123745
loss: 1.0667989253997803,grad_norm: 0.9999993440794558, iteration: 123746
loss: 1.0381141901016235,grad_norm: 0.9148078409952607, iteration: 123747
loss: 1.0440006256103516,grad_norm: 0.9999994836515593, iteration: 123748
loss: 1.0984344482421875,grad_norm: 0.9999990658542875, iteration: 123749
loss: 1.0510987043380737,grad_norm: 0.9999990871397119, iteration: 123750
loss: 0.9653763175010681,grad_norm: 0.9999990566138712, iteration: 123751
loss: 1.1509805917739868,grad_norm: 0.9999992517178697, iteration: 123752
loss: 0.9688012003898621,grad_norm: 0.7794668208534015, iteration: 123753
loss: 1.0468214750289917,grad_norm: 0.9999994952713916, iteration: 123754
loss: 0.9927912950515747,grad_norm: 0.8576365254883639, iteration: 123755
loss: 1.1897636651992798,grad_norm: 0.9999999143081838, iteration: 123756
loss: 1.1063958406448364,grad_norm: 0.9999999931929044, iteration: 123757
loss: 1.0589878559112549,grad_norm: 0.9999995364414447, iteration: 123758
loss: 1.1119884252548218,grad_norm: 0.9999993324561776, iteration: 123759
loss: 1.05754554271698,grad_norm: 0.9999996906250577, iteration: 123760
loss: 1.0266789197921753,grad_norm: 0.9999993807048667, iteration: 123761
loss: 1.0406148433685303,grad_norm: 0.9999991209708405, iteration: 123762
loss: 1.0090497732162476,grad_norm: 0.9999991794043822, iteration: 123763
loss: 1.1910631656646729,grad_norm: 0.9999999382514605, iteration: 123764
loss: 0.9711619019508362,grad_norm: 0.9999991488692963, iteration: 123765
loss: 1.0199065208435059,grad_norm: 0.9653317151670675, iteration: 123766
loss: 1.014278531074524,grad_norm: 0.7712587175127891, iteration: 123767
loss: 1.0105111598968506,grad_norm: 0.9999995035450963, iteration: 123768
loss: 1.1019811630249023,grad_norm: 0.9999998403285356, iteration: 123769
loss: 0.982671320438385,grad_norm: 0.9924824345416028, iteration: 123770
loss: 1.0312817096710205,grad_norm: 0.9999991022594221, iteration: 123771
loss: 1.004302978515625,grad_norm: 0.9999991244770042, iteration: 123772
loss: 1.001261591911316,grad_norm: 0.999999542045143, iteration: 123773
loss: 1.0761133432388306,grad_norm: 0.9999990918553581, iteration: 123774
loss: 1.0448172092437744,grad_norm: 0.9533120049983613, iteration: 123775
loss: 1.2181422710418701,grad_norm: 0.999999886662973, iteration: 123776
loss: 0.9910756945610046,grad_norm: 0.9999993001313513, iteration: 123777
loss: 1.1266595125198364,grad_norm: 0.9999996145286462, iteration: 123778
loss: 1.0233618021011353,grad_norm: 0.9999997358666899, iteration: 123779
loss: 1.082856297492981,grad_norm: 0.9999993621409, iteration: 123780
loss: 0.9973848462104797,grad_norm: 0.7786912482308205, iteration: 123781
loss: 0.9804527759552002,grad_norm: 0.8253678090778287, iteration: 123782
loss: 1.0609750747680664,grad_norm: 0.9999998768590354, iteration: 123783
loss: 0.9961836338043213,grad_norm: 0.7427587995828564, iteration: 123784
loss: 1.0074671506881714,grad_norm: 0.861949168943192, iteration: 123785
loss: 1.0919177532196045,grad_norm: 0.9606750657054723, iteration: 123786
loss: 1.0032366514205933,grad_norm: 0.9999990681537313, iteration: 123787
loss: 1.0349185466766357,grad_norm: 0.9008105264162117, iteration: 123788
loss: 1.0474586486816406,grad_norm: 0.9296019637668794, iteration: 123789
loss: 1.1507723331451416,grad_norm: 0.999999396380877, iteration: 123790
loss: 1.110696792602539,grad_norm: 0.9999990245737713, iteration: 123791
loss: 1.0102152824401855,grad_norm: 0.8214119009430562, iteration: 123792
loss: 0.9605368375778198,grad_norm: 0.8175149781966963, iteration: 123793
loss: 1.0979785919189453,grad_norm: 0.9999993071147362, iteration: 123794
loss: 1.073622465133667,grad_norm: 0.9999996605974895, iteration: 123795
loss: 1.2567185163497925,grad_norm: 0.9999997524125864, iteration: 123796
loss: 1.0370216369628906,grad_norm: 0.9999990766165601, iteration: 123797
loss: 1.0999598503112793,grad_norm: 0.9999994499900876, iteration: 123798
loss: 1.067685604095459,grad_norm: 0.9010736716950418, iteration: 123799
loss: 1.0691558122634888,grad_norm: 0.9999991435732982, iteration: 123800
loss: 1.094688892364502,grad_norm: 0.999999316812724, iteration: 123801
loss: 1.019137978553772,grad_norm: 0.99999929468626, iteration: 123802
loss: 0.9723600149154663,grad_norm: 0.9407136004910538, iteration: 123803
loss: 0.9902938008308411,grad_norm: 0.9254189908179888, iteration: 123804
loss: 1.0102970600128174,grad_norm: 0.9270469996352106, iteration: 123805
loss: 1.0178419351577759,grad_norm: 0.9246590837756276, iteration: 123806
loss: 1.0746705532073975,grad_norm: 0.8774772724542514, iteration: 123807
loss: 1.183328628540039,grad_norm: 0.9999994954070661, iteration: 123808
loss: 0.9575918316841125,grad_norm: 0.8939554665523836, iteration: 123809
loss: 1.0754899978637695,grad_norm: 0.9999990262657764, iteration: 123810
loss: 1.0118517875671387,grad_norm: 0.977710049218531, iteration: 123811
loss: 1.0062631368637085,grad_norm: 0.9999997907929856, iteration: 123812
loss: 1.0113856792449951,grad_norm: 0.9999990763366444, iteration: 123813
loss: 1.2323449850082397,grad_norm: 0.9951563619355379, iteration: 123814
loss: 1.1463422775268555,grad_norm: 0.9999991975758126, iteration: 123815
loss: 1.0752570629119873,grad_norm: 0.9999996100628895, iteration: 123816
loss: 0.9750207662582397,grad_norm: 0.8674699083948649, iteration: 123817
loss: 1.0595651865005493,grad_norm: 1.0000000580916562, iteration: 123818
loss: 1.0335298776626587,grad_norm: 0.9999991986559126, iteration: 123819
loss: 1.1772451400756836,grad_norm: 0.9999996007863207, iteration: 123820
loss: 1.035996675491333,grad_norm: 0.9999992816033042, iteration: 123821
loss: 1.313574194908142,grad_norm: 0.9999994611964418, iteration: 123822
loss: 1.0071091651916504,grad_norm: 0.9999991743075788, iteration: 123823
loss: 0.9784982800483704,grad_norm: 0.9543213897436348, iteration: 123824
loss: 1.0775617361068726,grad_norm: 0.9999991185441689, iteration: 123825
loss: 1.0251317024230957,grad_norm: 0.9999991878763298, iteration: 123826
loss: 1.2300174236297607,grad_norm: 0.9999994654178402, iteration: 123827
loss: 1.0541402101516724,grad_norm: 0.8798054826116627, iteration: 123828
loss: 0.9777681231498718,grad_norm: 0.9999991490424716, iteration: 123829
loss: 1.1703519821166992,grad_norm: 0.9999998324734601, iteration: 123830
loss: 1.0200917720794678,grad_norm: 0.9999997820733276, iteration: 123831
loss: 1.0577061176300049,grad_norm: 0.9999997017256016, iteration: 123832
loss: 1.1697067022323608,grad_norm: 0.9999998035587702, iteration: 123833
loss: 1.1988649368286133,grad_norm: 0.9999995822341256, iteration: 123834
loss: 1.029369831085205,grad_norm: 0.9999999380782812, iteration: 123835
loss: 0.986579418182373,grad_norm: 0.8421921522012848, iteration: 123836
loss: 0.988718569278717,grad_norm: 0.9999991106805303, iteration: 123837
loss: 0.9783740043640137,grad_norm: 0.9999994256512408, iteration: 123838
loss: 0.9953058362007141,grad_norm: 0.8685359436018135, iteration: 123839
loss: 1.0141226053237915,grad_norm: 0.8524361787695771, iteration: 123840
loss: 0.996231734752655,grad_norm: 0.9999992099754885, iteration: 123841
loss: 1.0463662147521973,grad_norm: 0.9880628781685955, iteration: 123842
loss: 1.0636348724365234,grad_norm: 0.837621368513789, iteration: 123843
loss: 1.0216447114944458,grad_norm: 0.8948435919677896, iteration: 123844
loss: 1.0905267000198364,grad_norm: 0.9999993814602669, iteration: 123845
loss: 1.077721118927002,grad_norm: 0.9999994714676276, iteration: 123846
loss: 1.0261216163635254,grad_norm: 0.9999991335577065, iteration: 123847
loss: 0.9864006638526917,grad_norm: 0.9999993235557614, iteration: 123848
loss: 0.9980819821357727,grad_norm: 0.9999994954790418, iteration: 123849
loss: 1.012827754020691,grad_norm: 0.8979919597538458, iteration: 123850
loss: 1.0045000314712524,grad_norm: 0.7801734954754774, iteration: 123851
loss: 1.0893934965133667,grad_norm: 0.9840062299451448, iteration: 123852
loss: 1.0227274894714355,grad_norm: 0.9999990714240243, iteration: 123853
loss: 1.1470118761062622,grad_norm: 0.9999995238999391, iteration: 123854
loss: 1.0345056056976318,grad_norm: 0.9999992127606397, iteration: 123855
loss: 1.0072704553604126,grad_norm: 0.9999998197457758, iteration: 123856
loss: 1.0454986095428467,grad_norm: 0.8770713568866847, iteration: 123857
loss: 0.9671449661254883,grad_norm: 0.9999991189806867, iteration: 123858
loss: 1.1569849252700806,grad_norm: 0.9999999969547522, iteration: 123859
loss: 0.9988464713096619,grad_norm: 0.7426417175934824, iteration: 123860
loss: 0.9854099154472351,grad_norm: 0.8265414689822411, iteration: 123861
loss: 0.9800199270248413,grad_norm: 0.9999991513047107, iteration: 123862
loss: 1.0694409608840942,grad_norm: 0.9999990049764563, iteration: 123863
loss: 1.057900071144104,grad_norm: 0.9999994295714576, iteration: 123864
loss: 1.042483925819397,grad_norm: 0.9999995386974558, iteration: 123865
loss: 1.0758346319198608,grad_norm: 0.999999797991392, iteration: 123866
loss: 1.0564383268356323,grad_norm: 0.9999990079452561, iteration: 123867
loss: 1.113375186920166,grad_norm: 0.999999911109378, iteration: 123868
loss: 1.0470620393753052,grad_norm: 0.999999235461525, iteration: 123869
loss: 1.0367575883865356,grad_norm: 0.9999996973120183, iteration: 123870
loss: 1.0372185707092285,grad_norm: 0.6926655201267312, iteration: 123871
loss: 0.9671630263328552,grad_norm: 0.9999997709879516, iteration: 123872
loss: 0.9754446744918823,grad_norm: 0.966949595588969, iteration: 123873
loss: 1.2101285457611084,grad_norm: 0.9999991844203467, iteration: 123874
loss: 1.079607367515564,grad_norm: 0.9999991946226111, iteration: 123875
loss: 1.003259539604187,grad_norm: 0.9609615625416412, iteration: 123876
loss: 0.9874110221862793,grad_norm: 0.8382797045211883, iteration: 123877
loss: 1.016343355178833,grad_norm: 0.8794717639039732, iteration: 123878
loss: 1.0225813388824463,grad_norm: 0.9999993739314262, iteration: 123879
loss: 1.0322991609573364,grad_norm: 0.999999547129447, iteration: 123880
loss: 1.02529776096344,grad_norm: 0.9999990370841759, iteration: 123881
loss: 1.0482538938522339,grad_norm: 0.9999989152335687, iteration: 123882
loss: 1.029205083847046,grad_norm: 0.8708678216989024, iteration: 123883
loss: 1.0286122560501099,grad_norm: 0.916761016850017, iteration: 123884
loss: 1.0515862703323364,grad_norm: 0.9999996541304921, iteration: 123885
loss: 1.1230814456939697,grad_norm: 0.9999996306841366, iteration: 123886
loss: 1.1357448101043701,grad_norm: 0.9999999585672853, iteration: 123887
loss: 1.0464708805084229,grad_norm: 0.9999993611747905, iteration: 123888
loss: 1.0062789916992188,grad_norm: 0.8154504264389554, iteration: 123889
loss: 1.0548251867294312,grad_norm: 0.9999992622874534, iteration: 123890
loss: 1.0572131872177124,grad_norm: 0.999999953176701, iteration: 123891
loss: 1.0046852827072144,grad_norm: 0.9304849104819194, iteration: 123892
loss: 1.0338469743728638,grad_norm: 0.9999989842640951, iteration: 123893
loss: 1.0684601068496704,grad_norm: 0.9999990599305629, iteration: 123894
loss: 1.0118581056594849,grad_norm: 0.7822434561659037, iteration: 123895
loss: 1.079571008682251,grad_norm: 0.999999447060831, iteration: 123896
loss: 1.0554531812667847,grad_norm: 0.9999991068737197, iteration: 123897
loss: 1.082689642906189,grad_norm: 0.9999996803635409, iteration: 123898
loss: 1.070776343345642,grad_norm: 0.9999991220842224, iteration: 123899
loss: 1.0393385887145996,grad_norm: 0.9999991076049679, iteration: 123900
loss: 1.0406829118728638,grad_norm: 0.9999990848997148, iteration: 123901
loss: 1.0012916326522827,grad_norm: 0.9536517122619015, iteration: 123902
loss: 0.9906704425811768,grad_norm: 0.9999992998898283, iteration: 123903
loss: 1.046269416809082,grad_norm: 0.9999997754108714, iteration: 123904
loss: 1.034782886505127,grad_norm: 0.9999989624821173, iteration: 123905
loss: 1.0563782453536987,grad_norm: 0.9230533853496495, iteration: 123906
loss: 1.0277007818222046,grad_norm: 0.999999439390216, iteration: 123907
loss: 1.0255643129348755,grad_norm: 0.9999989025382552, iteration: 123908
loss: 1.0336889028549194,grad_norm: 0.999999337679807, iteration: 123909
loss: 1.000429630279541,grad_norm: 0.999999085506567, iteration: 123910
loss: 1.285107135772705,grad_norm: 0.9999997195938354, iteration: 123911
loss: 0.9737889170646667,grad_norm: 0.9171747527131922, iteration: 123912
loss: 1.008091926574707,grad_norm: 0.9161683407091247, iteration: 123913
loss: 0.9719560742378235,grad_norm: 0.8949019015950087, iteration: 123914
loss: 0.989483118057251,grad_norm: 0.8745284645283271, iteration: 123915
loss: 1.101576805114746,grad_norm: 0.9335100813609839, iteration: 123916
loss: 1.0682597160339355,grad_norm: 0.9999991592220442, iteration: 123917
loss: 1.0364494323730469,grad_norm: 0.9338115218568885, iteration: 123918
loss: 1.1802619695663452,grad_norm: 0.9999995693703491, iteration: 123919
loss: 0.9832257628440857,grad_norm: 0.9999994973577111, iteration: 123920
loss: 1.045721173286438,grad_norm: 0.9999993821744588, iteration: 123921
loss: 1.058966040611267,grad_norm: 0.8609140904970574, iteration: 123922
loss: 1.0127521753311157,grad_norm: 0.9745539993493284, iteration: 123923
loss: 1.0251190662384033,grad_norm: 0.9999991952180887, iteration: 123924
loss: 1.0322740077972412,grad_norm: 0.9999990771919923, iteration: 123925
loss: 1.030824065208435,grad_norm: 0.9999992399468183, iteration: 123926
loss: 1.0477726459503174,grad_norm: 0.9999992107692487, iteration: 123927
loss: 1.0286898612976074,grad_norm: 0.8121905764952593, iteration: 123928
loss: 1.145650863647461,grad_norm: 0.9999994238284661, iteration: 123929
loss: 1.1916195154190063,grad_norm: 0.9999997984836106, iteration: 123930
loss: 1.002551794052124,grad_norm: 0.9999990327477759, iteration: 123931
loss: 0.9912809133529663,grad_norm: 0.7641895187775614, iteration: 123932
loss: 1.0527212619781494,grad_norm: 0.8873968253946943, iteration: 123933
loss: 1.0525617599487305,grad_norm: 0.999999107015652, iteration: 123934
loss: 1.0051233768463135,grad_norm: 0.9233105542355877, iteration: 123935
loss: 1.2488210201263428,grad_norm: 0.9999990310566539, iteration: 123936
loss: 1.0690498352050781,grad_norm: 0.999999245572566, iteration: 123937
loss: 1.2450233697891235,grad_norm: 0.9999990972003401, iteration: 123938
loss: 1.088919758796692,grad_norm: 0.9960867586754047, iteration: 123939
loss: 1.052276611328125,grad_norm: 0.9999993718043605, iteration: 123940
loss: 1.0635088682174683,grad_norm: 0.9999994196211424, iteration: 123941
loss: 1.0043460130691528,grad_norm: 0.9239376316169527, iteration: 123942
loss: 1.4305158853530884,grad_norm: 0.9999997536836917, iteration: 123943
loss: 1.026627540588379,grad_norm: 0.9999992029586121, iteration: 123944
loss: 0.9745668172836304,grad_norm: 0.9999991355754558, iteration: 123945
loss: 0.961762547492981,grad_norm: 0.9973174585318991, iteration: 123946
loss: 1.1506575345993042,grad_norm: 0.999999114563527, iteration: 123947
loss: 1.020171046257019,grad_norm: 0.8657306850256917, iteration: 123948
loss: 1.2001487016677856,grad_norm: 0.9999995258390241, iteration: 123949
loss: 1.1532021760940552,grad_norm: 0.9999997883712588, iteration: 123950
loss: 1.086085319519043,grad_norm: 0.999999610945927, iteration: 123951
loss: 1.4745241403579712,grad_norm: 0.9999995278114024, iteration: 123952
loss: 1.2196987867355347,grad_norm: 0.9999996928719486, iteration: 123953
loss: 0.9900374412536621,grad_norm: 0.93706243752163, iteration: 123954
loss: 0.9549753665924072,grad_norm: 0.9587181758858732, iteration: 123955
loss: 1.2123057842254639,grad_norm: 0.9999994240404857, iteration: 123956
loss: 1.2963252067565918,grad_norm: 1.0000000237289393, iteration: 123957
loss: 1.2280185222625732,grad_norm: 0.9999992909374098, iteration: 123958
loss: 0.9937559366226196,grad_norm: 0.9027045729078906, iteration: 123959
loss: 1.0178114175796509,grad_norm: 0.8516417135892059, iteration: 123960
loss: 0.9889228940010071,grad_norm: 0.9238007742858301, iteration: 123961
loss: 1.0300496816635132,grad_norm: 0.9999989814454808, iteration: 123962
loss: 1.0462605953216553,grad_norm: 0.8153422231128692, iteration: 123963
loss: 1.0103174448013306,grad_norm: 0.7099048807921403, iteration: 123964
loss: 1.2092212438583374,grad_norm: 0.9999992239446475, iteration: 123965
loss: 1.0537259578704834,grad_norm: 0.9999995638953291, iteration: 123966
loss: 1.3901070356369019,grad_norm: 0.9999995750368785, iteration: 123967
loss: 1.1488131284713745,grad_norm: 0.9999997370876919, iteration: 123968
loss: 1.1788978576660156,grad_norm: 0.9999993901928806, iteration: 123969
loss: 1.0266504287719727,grad_norm: 0.9644685370963011, iteration: 123970
loss: 1.2211580276489258,grad_norm: 0.9999993469759597, iteration: 123971
loss: 1.1487929821014404,grad_norm: 0.9999991178413247, iteration: 123972
loss: 1.424661636352539,grad_norm: 0.999999612401474, iteration: 123973
loss: 1.027260422706604,grad_norm: 0.9999990401686601, iteration: 123974
loss: 1.0259480476379395,grad_norm: 0.8626555153416633, iteration: 123975
loss: 1.0909886360168457,grad_norm: 0.9999997568125946, iteration: 123976
loss: 1.0351499319076538,grad_norm: 0.9764969905562104, iteration: 123977
loss: 1.0674784183502197,grad_norm: 0.999999159051056, iteration: 123978
loss: 1.046839714050293,grad_norm: 0.9999991630560828, iteration: 123979
loss: 1.134634017944336,grad_norm: 0.999999716067824, iteration: 123980
loss: 0.9721708297729492,grad_norm: 0.9999990832500896, iteration: 123981
loss: 1.2742656469345093,grad_norm: 0.9999998272473667, iteration: 123982
loss: 1.042620301246643,grad_norm: 0.9999994312081887, iteration: 123983
loss: 0.98944091796875,grad_norm: 0.8645848585689492, iteration: 123984
loss: 1.0622369050979614,grad_norm: 0.9999999916188143, iteration: 123985
loss: 0.9955941438674927,grad_norm: 0.9131694378729194, iteration: 123986
loss: 1.0112193822860718,grad_norm: 0.8543913304015363, iteration: 123987
loss: 1.1201379299163818,grad_norm: 0.9999998072356484, iteration: 123988
loss: 1.0102527141571045,grad_norm: 0.9999994486564934, iteration: 123989
loss: 1.067219614982605,grad_norm: 0.9999999619455543, iteration: 123990
loss: 1.2368347644805908,grad_norm: 0.9999993604200476, iteration: 123991
loss: 1.2695544958114624,grad_norm: 0.9999993070075763, iteration: 123992
loss: 0.9923563003540039,grad_norm: 0.9999993266834586, iteration: 123993
loss: 1.0902559757232666,grad_norm: 0.9999992798979592, iteration: 123994
loss: 1.0046145915985107,grad_norm: 0.7875347836073734, iteration: 123995
loss: 1.0763829946517944,grad_norm: 0.9999996862523128, iteration: 123996
loss: 0.975041389465332,grad_norm: 0.839281553595221, iteration: 123997
loss: 0.9851418733596802,grad_norm: 0.9999992965205193, iteration: 123998
loss: 1.204946756362915,grad_norm: 0.9999999905848873, iteration: 123999
loss: 1.0069714784622192,grad_norm: 0.9273196377822113, iteration: 124000
loss: 0.9914159774780273,grad_norm: 0.9716459530801994, iteration: 124001
loss: 1.014726161956787,grad_norm: 0.7858900491538934, iteration: 124002
loss: 0.9963518977165222,grad_norm: 0.9999990096463237, iteration: 124003
loss: 1.0090303421020508,grad_norm: 0.9621029360904382, iteration: 124004
loss: 1.0337783098220825,grad_norm: 0.8804193716119658, iteration: 124005
loss: 0.998424768447876,grad_norm: 0.7979912146753765, iteration: 124006
loss: 1.2016104459762573,grad_norm: 0.9999997743079652, iteration: 124007
loss: 1.1995688676834106,grad_norm: 0.9999994331910977, iteration: 124008
loss: 1.0443115234375,grad_norm: 0.9999995477691898, iteration: 124009
loss: 1.2011406421661377,grad_norm: 0.9999992138825102, iteration: 124010
loss: 1.0049246549606323,grad_norm: 0.9714936011916349, iteration: 124011
loss: 1.1434781551361084,grad_norm: 0.9999993534952115, iteration: 124012
loss: 1.0406917333602905,grad_norm: 0.9786462275269344, iteration: 124013
loss: 1.085658311843872,grad_norm: 0.9999994537024448, iteration: 124014
loss: 1.25239098072052,grad_norm: 0.9999995252181425, iteration: 124015
loss: 0.9871089458465576,grad_norm: 0.7530904151125701, iteration: 124016
loss: 1.1761337518692017,grad_norm: 0.999999371561065, iteration: 124017
loss: 1.0497268438339233,grad_norm: 0.9999993610618881, iteration: 124018
loss: 1.0558972358703613,grad_norm: 0.8646791448695207, iteration: 124019
loss: 0.9697631597518921,grad_norm: 0.9199957566949423, iteration: 124020
loss: 1.0347414016723633,grad_norm: 0.9885850706249442, iteration: 124021
loss: 1.4632484912872314,grad_norm: 0.9999994834506495, iteration: 124022
loss: 0.9750828742980957,grad_norm: 0.9999992345614364, iteration: 124023
loss: 1.0647940635681152,grad_norm: 0.8779264746778587, iteration: 124024
loss: 1.0056803226470947,grad_norm: 0.9417564796911357, iteration: 124025
loss: 1.0276433229446411,grad_norm: 0.7691823529349601, iteration: 124026
loss: 1.0569233894348145,grad_norm: 0.9794050288084795, iteration: 124027
loss: 1.0132043361663818,grad_norm: 0.9922009040500186, iteration: 124028
loss: 1.3446269035339355,grad_norm: 0.9999998766804088, iteration: 124029
loss: 1.168265461921692,grad_norm: 0.9999994045140387, iteration: 124030
loss: 1.1954323053359985,grad_norm: 0.9999991651303453, iteration: 124031
loss: 1.0784807205200195,grad_norm: 0.9999993094408101, iteration: 124032
loss: 1.1219964027404785,grad_norm: 0.9999998991091376, iteration: 124033
loss: 1.0070319175720215,grad_norm: 0.8720695154435659, iteration: 124034
loss: 1.1734422445297241,grad_norm: 0.9999995777869619, iteration: 124035
loss: 1.1784076690673828,grad_norm: 0.9999993122480056, iteration: 124036
loss: 1.0911098718643188,grad_norm: 0.9999999891077703, iteration: 124037
loss: 1.0172072649002075,grad_norm: 0.9132475584210036, iteration: 124038
loss: 1.1023104190826416,grad_norm: 0.999999818597214, iteration: 124039
loss: 0.9800218939781189,grad_norm: 0.8857125642103686, iteration: 124040
loss: 1.0534359216690063,grad_norm: 0.9999998812558405, iteration: 124041
loss: 1.078101634979248,grad_norm: 0.9999996665373426, iteration: 124042
loss: 1.0244011878967285,grad_norm: 0.7953851812306252, iteration: 124043
loss: 1.1881500482559204,grad_norm: 0.9999996628484398, iteration: 124044
loss: 1.0280927419662476,grad_norm: 0.9999998344284047, iteration: 124045
loss: 1.0064228773117065,grad_norm: 0.9999990917205885, iteration: 124046
loss: 1.2244493961334229,grad_norm: 0.9999998260588453, iteration: 124047
loss: 1.0785956382751465,grad_norm: 1.0000000624535432, iteration: 124048
loss: 1.0280396938323975,grad_norm: 0.9999992621683043, iteration: 124049
loss: 1.3311480283737183,grad_norm: 0.999999914658576, iteration: 124050
loss: 1.1448431015014648,grad_norm: 0.9999990004332124, iteration: 124051
loss: 1.1863232851028442,grad_norm: 0.9999992743067954, iteration: 124052
loss: 1.0019512176513672,grad_norm: 0.9054256989850256, iteration: 124053
loss: 0.9818849563598633,grad_norm: 0.8497565111903979, iteration: 124054
loss: 1.012012243270874,grad_norm: 0.9999994872326827, iteration: 124055
loss: 1.0698496103286743,grad_norm: 0.9999993232559378, iteration: 124056
loss: 1.1458122730255127,grad_norm: 0.9999997059983147, iteration: 124057
loss: 1.3831367492675781,grad_norm: 0.9999997190457123, iteration: 124058
loss: 1.0562728643417358,grad_norm: 0.9417311183553265, iteration: 124059
loss: 1.0248193740844727,grad_norm: 0.7841206145190825, iteration: 124060
loss: 0.9876497983932495,grad_norm: 0.9752439945859038, iteration: 124061
loss: 1.1989405155181885,grad_norm: 0.9999997590933427, iteration: 124062
loss: 1.1116236448287964,grad_norm: 0.9999993722340189, iteration: 124063
loss: 1.1165528297424316,grad_norm: 0.999999466742374, iteration: 124064
loss: 1.044169545173645,grad_norm: 0.9999990335286733, iteration: 124065
loss: 1.1719263792037964,grad_norm: 0.9999991276199801, iteration: 124066
loss: 1.0965287685394287,grad_norm: 0.9999997819781371, iteration: 124067
loss: 1.0701286792755127,grad_norm: 0.999999681734858, iteration: 124068
loss: 1.2772483825683594,grad_norm: 0.9999992338659, iteration: 124069
loss: 1.0616638660430908,grad_norm: 0.9245394482827672, iteration: 124070
loss: 1.1302543878555298,grad_norm: 0.9999994931928193, iteration: 124071
loss: 1.0148719549179077,grad_norm: 0.9999989631915663, iteration: 124072
loss: 1.0725500583648682,grad_norm: 0.9999990781210554, iteration: 124073
loss: 1.0915205478668213,grad_norm: 0.8492989093047465, iteration: 124074
loss: 1.204288363456726,grad_norm: 0.9999993527898556, iteration: 124075
loss: 1.0654425621032715,grad_norm: 0.9999997656228093, iteration: 124076
loss: 1.0298551321029663,grad_norm: 0.9999989644687814, iteration: 124077
loss: 1.0555434226989746,grad_norm: 0.9999994580753775, iteration: 124078
loss: 1.253836989402771,grad_norm: 0.9999994141084432, iteration: 124079
loss: 1.4917347431182861,grad_norm: 0.9999996077412081, iteration: 124080
loss: 1.00529944896698,grad_norm: 0.9999992456465588, iteration: 124081
loss: 1.0578608512878418,grad_norm: 0.9999990976059719, iteration: 124082
loss: 1.2502458095550537,grad_norm: 0.9999995300141277, iteration: 124083
loss: 1.0473322868347168,grad_norm: 0.9999998254014291, iteration: 124084
loss: 1.1804031133651733,grad_norm: 0.9999990206055486, iteration: 124085
loss: 1.23099946975708,grad_norm: 0.9999996535475002, iteration: 124086
loss: 1.0263395309448242,grad_norm: 0.9894380878727471, iteration: 124087
loss: 0.9814802408218384,grad_norm: 0.9999991823885653, iteration: 124088
loss: 1.0399609804153442,grad_norm: 0.9461378795529808, iteration: 124089
loss: 1.028300166130066,grad_norm: 0.9666208702394832, iteration: 124090
loss: 1.0327155590057373,grad_norm: 0.8793114348683468, iteration: 124091
loss: 1.0124348402023315,grad_norm: 0.9999993981127736, iteration: 124092
loss: 1.1507893800735474,grad_norm: 0.9999995268394819, iteration: 124093
loss: 1.008493423461914,grad_norm: 0.7516417538817379, iteration: 124094
loss: 1.2156118154525757,grad_norm: 0.9999998940236432, iteration: 124095
loss: 1.0602589845657349,grad_norm: 0.9999994683905323, iteration: 124096
loss: 1.0870212316513062,grad_norm: 0.9999996182335139, iteration: 124097
loss: 1.2053924798965454,grad_norm: 0.9999997155909839, iteration: 124098
loss: 1.0040552616119385,grad_norm: 0.9999990664694418, iteration: 124099
loss: 1.1774609088897705,grad_norm: 0.9999996799633809, iteration: 124100
loss: 1.212541937828064,grad_norm: 0.999999582705823, iteration: 124101
loss: 1.080611228942871,grad_norm: 0.9999994984293191, iteration: 124102
loss: 1.0148411989212036,grad_norm: 0.9999999098275207, iteration: 124103
loss: 1.0963493585586548,grad_norm: 0.9999990560585889, iteration: 124104
loss: 1.0210351943969727,grad_norm: 0.9810637078343291, iteration: 124105
loss: 1.0469943284988403,grad_norm: 0.9999994352819562, iteration: 124106
loss: 1.342153549194336,grad_norm: 0.9999998239039082, iteration: 124107
loss: 1.0994508266448975,grad_norm: 0.9999996539913812, iteration: 124108
loss: 1.1417686939239502,grad_norm: 0.9999992136386981, iteration: 124109
loss: 1.0114867687225342,grad_norm: 0.9999993154245739, iteration: 124110
loss: 1.2773391008377075,grad_norm: 0.99999965947862, iteration: 124111
loss: 1.1879938840866089,grad_norm: 0.9999996726689019, iteration: 124112
loss: 0.952880859375,grad_norm: 0.9999993199927191, iteration: 124113
loss: 1.0047402381896973,grad_norm: 0.999999154387624, iteration: 124114
loss: 0.9903373122215271,grad_norm: 0.8536676655323482, iteration: 124115
loss: 1.3135581016540527,grad_norm: 0.9999995923555973, iteration: 124116
loss: 1.0082637071609497,grad_norm: 0.9790055313743752, iteration: 124117
loss: 1.2665042877197266,grad_norm: 0.9999997409060473, iteration: 124118
loss: 1.17243492603302,grad_norm: 0.999999488321572, iteration: 124119
loss: 1.0164493322372437,grad_norm: 0.9589670748701534, iteration: 124120
loss: 1.1397473812103271,grad_norm: 0.9999995900627355, iteration: 124121
loss: 1.096826195716858,grad_norm: 0.9999994315341292, iteration: 124122
loss: 1.007842779159546,grad_norm: 0.9999992706496559, iteration: 124123
loss: 0.9926401972770691,grad_norm: 0.9999991787542675, iteration: 124124
loss: 1.1335021257400513,grad_norm: 0.9999998699060656, iteration: 124125
loss: 1.1568803787231445,grad_norm: 0.9999993051237649, iteration: 124126
loss: 1.0246106386184692,grad_norm: 0.9999990604303873, iteration: 124127
loss: 1.3220818042755127,grad_norm: 0.9999992370158619, iteration: 124128
loss: 1.0655344724655151,grad_norm: 0.9999991135523222, iteration: 124129
loss: 1.0107824802398682,grad_norm: 0.9999992225746721, iteration: 124130
loss: 1.011976718902588,grad_norm: 0.9999991967059733, iteration: 124131
loss: 0.9871662855148315,grad_norm: 0.9999990946349585, iteration: 124132
loss: 1.0334173440933228,grad_norm: 0.9999990506989257, iteration: 124133
loss: 1.1343317031860352,grad_norm: 0.9999991065578737, iteration: 124134
loss: 1.1509326696395874,grad_norm: 0.9999995128522885, iteration: 124135
loss: 1.0891040563583374,grad_norm: 0.9999996689359959, iteration: 124136
loss: 1.0038533210754395,grad_norm: 1.0000000355225742, iteration: 124137
loss: 1.02481210231781,grad_norm: 0.9999990632986535, iteration: 124138
loss: 1.1985024213790894,grad_norm: 0.9999995811317475, iteration: 124139
loss: 1.0158627033233643,grad_norm: 0.9999993453705971, iteration: 124140
loss: 1.1484968662261963,grad_norm: 0.9999991534284224, iteration: 124141
loss: 1.1379907131195068,grad_norm: 0.9999992736511362, iteration: 124142
loss: 0.9969950914382935,grad_norm: 0.9273068936200053, iteration: 124143
loss: 1.3352283239364624,grad_norm: 0.9999995626045128, iteration: 124144
loss: 1.126073956489563,grad_norm: 0.9999996322878821, iteration: 124145
loss: 1.143774151802063,grad_norm: 0.9999997780986718, iteration: 124146
loss: 1.2167489528656006,grad_norm: 0.999999279839523, iteration: 124147
loss: 1.1452124118804932,grad_norm: 0.9999992611988718, iteration: 124148
loss: 1.0106596946716309,grad_norm: 0.9999998953927784, iteration: 124149
loss: 0.9773059487342834,grad_norm: 0.8249626661195174, iteration: 124150
loss: 1.039978265762329,grad_norm: 0.9999993173893358, iteration: 124151
loss: 1.0140726566314697,grad_norm: 0.9999996911598986, iteration: 124152
loss: 1.167852520942688,grad_norm: 1.0000000125605863, iteration: 124153
loss: 1.04000985622406,grad_norm: 0.8739432005078522, iteration: 124154
loss: 1.156171441078186,grad_norm: 0.999999401615199, iteration: 124155
loss: 1.1885772943496704,grad_norm: 0.9999995458599265, iteration: 124156
loss: 1.0018465518951416,grad_norm: 0.9999994910526524, iteration: 124157
loss: 0.9777551293373108,grad_norm: 0.8261780534393662, iteration: 124158
loss: 1.0267398357391357,grad_norm: 0.9372398626536876, iteration: 124159
loss: 1.0146255493164062,grad_norm: 0.9999991275244577, iteration: 124160
loss: 1.1427979469299316,grad_norm: 0.9999994506443824, iteration: 124161
loss: 1.1703718900680542,grad_norm: 0.9999992953010562, iteration: 124162
loss: 1.1237798929214478,grad_norm: 0.9773530572682766, iteration: 124163
loss: 1.0157158374786377,grad_norm: 0.9999996709235238, iteration: 124164
loss: 1.3146624565124512,grad_norm: 0.999999311763201, iteration: 124165
loss: 1.140639066696167,grad_norm: 0.9999996931639679, iteration: 124166
loss: 1.024996042251587,grad_norm: 0.8507345673108976, iteration: 124167
loss: 1.0627931356430054,grad_norm: 0.9999998823657217, iteration: 124168
loss: 1.1809121370315552,grad_norm: 0.9999995912349927, iteration: 124169
loss: 1.1440577507019043,grad_norm: 0.9999998241652264, iteration: 124170
loss: 1.0101191997528076,grad_norm: 0.8331739491932858, iteration: 124171
loss: 1.1729633808135986,grad_norm: 0.9999998389826642, iteration: 124172
loss: 1.115892767906189,grad_norm: 0.999999209014098, iteration: 124173
loss: 1.1214661598205566,grad_norm: 0.9999994216462607, iteration: 124174
loss: 1.1375454664230347,grad_norm: 0.9999992383659596, iteration: 124175
loss: 1.0557749271392822,grad_norm: 0.9999994250497644, iteration: 124176
loss: 1.0579962730407715,grad_norm: 0.9999994308012556, iteration: 124177
loss: 1.0335479974746704,grad_norm: 0.9999995660429851, iteration: 124178
loss: 1.2838375568389893,grad_norm: 0.9999999066855126, iteration: 124179
loss: 0.9952886700630188,grad_norm: 0.9999990270718654, iteration: 124180
loss: 0.9918856620788574,grad_norm: 0.9295552549971732, iteration: 124181
loss: 0.9867034554481506,grad_norm: 1.000000035433481, iteration: 124182
loss: 1.0296066999435425,grad_norm: 0.9430582138097237, iteration: 124183
loss: 1.049103021621704,grad_norm: 0.9999992771321381, iteration: 124184
loss: 1.007431149482727,grad_norm: 0.9999993548901627, iteration: 124185
loss: 1.2186733484268188,grad_norm: 0.9999995037553007, iteration: 124186
loss: 1.1402679681777954,grad_norm: 0.9999999079429696, iteration: 124187
loss: 1.2122694253921509,grad_norm: 0.9999994644702964, iteration: 124188
loss: 1.030128002166748,grad_norm: 0.9999994028939448, iteration: 124189
loss: 1.2665150165557861,grad_norm: 0.9999996163453899, iteration: 124190
loss: 1.0590767860412598,grad_norm: 0.9999993891185833, iteration: 124191
loss: 0.9704633355140686,grad_norm: 0.9422205043507261, iteration: 124192
loss: 1.2878403663635254,grad_norm: 0.9999997738810125, iteration: 124193
loss: 1.0328574180603027,grad_norm: 0.999999013804039, iteration: 124194
loss: 1.02668035030365,grad_norm: 0.8502927758691975, iteration: 124195
loss: 1.0415692329406738,grad_norm: 0.9999991717994547, iteration: 124196
loss: 1.035986065864563,grad_norm: 0.999999236860448, iteration: 124197
loss: 1.1419414281845093,grad_norm: 0.9999996827559842, iteration: 124198
loss: 1.171066164970398,grad_norm: 0.9999994296432628, iteration: 124199
loss: 1.136413812637329,grad_norm: 0.9999994201149544, iteration: 124200
loss: 1.1687196493148804,grad_norm: 0.9999991782072805, iteration: 124201
loss: 1.0491929054260254,grad_norm: 0.9999995567894708, iteration: 124202
loss: 0.998346745967865,grad_norm: 0.9999990548637293, iteration: 124203
loss: 0.9859752655029297,grad_norm: 0.9914573565406007, iteration: 124204
loss: 0.9972952604293823,grad_norm: 0.9562950369311377, iteration: 124205
loss: 1.0632784366607666,grad_norm: 0.9999992321640855, iteration: 124206
loss: 1.098708987236023,grad_norm: 0.9999997359476439, iteration: 124207
loss: 1.0731686353683472,grad_norm: 0.9999989727304945, iteration: 124208
loss: 1.160643458366394,grad_norm: 0.9999993245777324, iteration: 124209
loss: 1.0699008703231812,grad_norm: 0.9999991603829638, iteration: 124210
loss: 1.0797057151794434,grad_norm: 0.9999991086061136, iteration: 124211
loss: 1.0343239307403564,grad_norm: 0.9999991232322197, iteration: 124212
loss: 1.144423484802246,grad_norm: 0.9999993193332581, iteration: 124213
loss: 1.003722906112671,grad_norm: 0.9999991167938767, iteration: 124214
loss: 1.168047308921814,grad_norm: 0.9999993213029532, iteration: 124215
loss: 1.023720145225525,grad_norm: 0.877267018815085, iteration: 124216
loss: 0.9652583003044128,grad_norm: 0.8540731988603649, iteration: 124217
loss: 1.096773386001587,grad_norm: 0.9956175663080666, iteration: 124218
loss: 1.1242934465408325,grad_norm: 0.9999996652483033, iteration: 124219
loss: 1.2396254539489746,grad_norm: 1.0000000164131162, iteration: 124220
loss: 1.0336154699325562,grad_norm: 0.9999992658949717, iteration: 124221
loss: 1.0764564275741577,grad_norm: 0.9999998482227987, iteration: 124222
loss: 1.2329860925674438,grad_norm: 0.9999995494319883, iteration: 124223
loss: 0.9825603365898132,grad_norm: 0.9140216985998038, iteration: 124224
loss: 1.008277416229248,grad_norm: 0.9999991937631107, iteration: 124225
loss: 1.3756400346755981,grad_norm: 0.9999996189203261, iteration: 124226
loss: 1.2046804428100586,grad_norm: 0.9999995947836852, iteration: 124227
loss: 1.0100815296173096,grad_norm: 0.8544802681961512, iteration: 124228
loss: 0.9929460883140564,grad_norm: 0.8063462152974872, iteration: 124229
loss: 0.9923774600028992,grad_norm: 0.9999994563048928, iteration: 124230
loss: 1.4291751384735107,grad_norm: 0.9999997895129155, iteration: 124231
loss: 1.197094440460205,grad_norm: 0.999999805490853, iteration: 124232
loss: 1.1290887594223022,grad_norm: 0.9999999141989466, iteration: 124233
loss: 1.4309309720993042,grad_norm: 0.9999995984564571, iteration: 124234
loss: 1.333688497543335,grad_norm: 0.9999997441052245, iteration: 124235
loss: 1.3900721073150635,grad_norm: 0.9999997297514485, iteration: 124236
loss: 1.2177457809448242,grad_norm: 0.999999620930506, iteration: 124237
loss: 1.3461737632751465,grad_norm: 0.99999956930831, iteration: 124238
loss: 0.9838567972183228,grad_norm: 0.794618892567196, iteration: 124239
loss: 1.2694555521011353,grad_norm: 0.9999997615915225, iteration: 124240
loss: 1.279399037361145,grad_norm: 0.9999991943119119, iteration: 124241
loss: 1.0218859910964966,grad_norm: 0.9999999014043776, iteration: 124242
loss: 1.3470728397369385,grad_norm: 0.9999996708187224, iteration: 124243
loss: 0.9717540740966797,grad_norm: 0.9318525924998798, iteration: 124244
loss: 1.3240936994552612,grad_norm: 0.9999994158850267, iteration: 124245
loss: 1.138143539428711,grad_norm: 0.9999993140637574, iteration: 124246
loss: 1.191938042640686,grad_norm: 0.9999992605904455, iteration: 124247
loss: 1.2995132207870483,grad_norm: 0.9999993564562749, iteration: 124248
loss: 1.1246660947799683,grad_norm: 0.9999993590407857, iteration: 124249
loss: 1.028563380241394,grad_norm: 0.9999993602736456, iteration: 124250
loss: 1.2717833518981934,grad_norm: 0.9999996133720966, iteration: 124251
loss: 1.1916494369506836,grad_norm: 0.9999992324877778, iteration: 124252
loss: 1.1510210037231445,grad_norm: 0.9999993033987924, iteration: 124253
loss: 1.0755573511123657,grad_norm: 0.9999997126629973, iteration: 124254
loss: 1.1414306163787842,grad_norm: 0.999999599706406, iteration: 124255
loss: 0.9959539771080017,grad_norm: 0.9999991087980907, iteration: 124256
loss: 1.1506016254425049,grad_norm: 0.9999998513370315, iteration: 124257
loss: 1.024236798286438,grad_norm: 0.9678203474424828, iteration: 124258
loss: 1.07118821144104,grad_norm: 0.9999995676722783, iteration: 124259
loss: 1.2851299047470093,grad_norm: 0.9999993324235034, iteration: 124260
loss: 1.010399580001831,grad_norm: 0.871633539144643, iteration: 124261
loss: 1.4651687145233154,grad_norm: 0.9999993285769171, iteration: 124262
loss: 1.3061408996582031,grad_norm: 0.9999995824070633, iteration: 124263
loss: 1.0550270080566406,grad_norm: 1.0000000024066436, iteration: 124264
loss: 1.0462199449539185,grad_norm: 0.9999993063702171, iteration: 124265
loss: 1.1974177360534668,grad_norm: 0.999999576901439, iteration: 124266
loss: 1.0227465629577637,grad_norm: 0.9999997653433683, iteration: 124267
loss: 1.2236742973327637,grad_norm: 0.9999997002609391, iteration: 124268
loss: 1.0265402793884277,grad_norm: 0.8631177766521803, iteration: 124269
loss: 1.0825884342193604,grad_norm: 0.999999909698399, iteration: 124270
loss: 0.9946348071098328,grad_norm: 0.9999995943132981, iteration: 124271
loss: 1.1573017835617065,grad_norm: 0.9999993871672089, iteration: 124272
loss: 1.0309761762619019,grad_norm: 0.9999998795812235, iteration: 124273
loss: 1.118011474609375,grad_norm: 0.9999994691048074, iteration: 124274
loss: 1.034186601638794,grad_norm: 0.9999999217006229, iteration: 124275
loss: 1.1685270071029663,grad_norm: 0.9999994180977426, iteration: 124276
loss: 1.1522935628890991,grad_norm: 0.9999996635187155, iteration: 124277
loss: 1.1447068452835083,grad_norm: 0.9999992329410334, iteration: 124278
loss: 1.0899509191513062,grad_norm: 0.9999995077008568, iteration: 124279
loss: 1.5306681394577026,grad_norm: 1.0000000782120646, iteration: 124280
loss: 1.4694278240203857,grad_norm: 0.9999998577656211, iteration: 124281
loss: 1.006702184677124,grad_norm: 0.9999990081099353, iteration: 124282
loss: 1.7305760383605957,grad_norm: 0.9999998285292628, iteration: 124283
loss: 1.032726764678955,grad_norm: 0.9999998943649775, iteration: 124284
loss: 1.2869744300842285,grad_norm: 0.9999999575990359, iteration: 124285
loss: 1.2930188179016113,grad_norm: 0.9999996789804767, iteration: 124286
loss: 1.06322181224823,grad_norm: 0.9999996555588387, iteration: 124287
loss: 1.5157674551010132,grad_norm: 0.9999996591392236, iteration: 124288
loss: 1.0187212228775024,grad_norm: 0.9999991766856583, iteration: 124289
loss: 1.3358250856399536,grad_norm: 0.9999998647109537, iteration: 124290
loss: 1.1665767431259155,grad_norm: 0.9999992205058865, iteration: 124291
loss: 1.3350560665130615,grad_norm: 0.9999996678674157, iteration: 124292
loss: 1.2535400390625,grad_norm: 0.999999822329723, iteration: 124293
loss: 1.1021872758865356,grad_norm: 0.994157458043536, iteration: 124294
loss: 1.0653103590011597,grad_norm: 0.9999994087082478, iteration: 124295
loss: 1.082499384880066,grad_norm: 0.814146620254515, iteration: 124296
loss: 1.0007437467575073,grad_norm: 0.7504668937268436, iteration: 124297
loss: 1.0131938457489014,grad_norm: 0.9999992878121629, iteration: 124298
loss: 1.1511346101760864,grad_norm: 0.9999999796200031, iteration: 124299
loss: 1.2660927772521973,grad_norm: 0.9999995365421941, iteration: 124300
loss: 1.1574918031692505,grad_norm: 0.9999992804457545, iteration: 124301
loss: 1.0531952381134033,grad_norm: 0.9435983203406303, iteration: 124302
loss: 1.142930507659912,grad_norm: 0.9999997740187988, iteration: 124303
loss: 1.0018339157104492,grad_norm: 0.9999991684532945, iteration: 124304
loss: 1.0077860355377197,grad_norm: 0.8578530315783792, iteration: 124305
loss: 1.1599087715148926,grad_norm: 0.9999998391102102, iteration: 124306
loss: 1.2282053232192993,grad_norm: 0.9999999672053528, iteration: 124307
loss: 1.4122077226638794,grad_norm: 0.9999996889621269, iteration: 124308
loss: 1.1044772863388062,grad_norm: 0.9999997060654414, iteration: 124309
loss: 1.4039820432662964,grad_norm: 1.0000000309750898, iteration: 124310
loss: 1.1539357900619507,grad_norm: 0.9999998436478419, iteration: 124311
loss: 1.0007367134094238,grad_norm: 0.9999999023760144, iteration: 124312
loss: 1.2433905601501465,grad_norm: 0.9999996983847367, iteration: 124313
loss: 1.2739917039871216,grad_norm: 0.9999997385379864, iteration: 124314
loss: 1.0008355379104614,grad_norm: 0.9517134338857831, iteration: 124315
loss: 1.0729353427886963,grad_norm: 0.999999332302392, iteration: 124316
loss: 1.0142295360565186,grad_norm: 0.839324047089151, iteration: 124317
loss: 1.0688962936401367,grad_norm: 0.9999993966657418, iteration: 124318
loss: 1.2613589763641357,grad_norm: 0.9999998149838469, iteration: 124319
loss: 1.1411374807357788,grad_norm: 0.999999155101186, iteration: 124320
loss: 1.2460038661956787,grad_norm: 0.9999992695083523, iteration: 124321
loss: 1.104019284248352,grad_norm: 0.9999999323746411, iteration: 124322
loss: 1.2419229745864868,grad_norm: 0.9999995285226243, iteration: 124323
loss: 1.0998066663742065,grad_norm: 0.9999993125316015, iteration: 124324
loss: 1.0879623889923096,grad_norm: 0.9999991980180758, iteration: 124325
loss: 1.0201770067214966,grad_norm: 0.9999990250898375, iteration: 124326
loss: 0.9990412592887878,grad_norm: 0.8992537434516735, iteration: 124327
loss: 1.0480403900146484,grad_norm: 0.9999998964926843, iteration: 124328
loss: 1.0746874809265137,grad_norm: 0.9999992184056021, iteration: 124329
loss: 1.1273391246795654,grad_norm: 0.9999996187828141, iteration: 124330
loss: 1.1234008073806763,grad_norm: 0.9999996150590449, iteration: 124331
loss: 1.0244817733764648,grad_norm: 0.9999998379102439, iteration: 124332
loss: 1.261155128479004,grad_norm: 1.0000000301571907, iteration: 124333
loss: 1.142264485359192,grad_norm: 0.8600925430378137, iteration: 124334
loss: 1.1550847291946411,grad_norm: 0.9999996154088195, iteration: 124335
loss: 1.0810940265655518,grad_norm: 0.9999997841658873, iteration: 124336
loss: 1.02934992313385,grad_norm: 0.9999991273353299, iteration: 124337
loss: 1.0241771936416626,grad_norm: 0.7998294586540515, iteration: 124338
loss: 1.1555488109588623,grad_norm: 0.9999996593555325, iteration: 124339
loss: 1.0966322422027588,grad_norm: 0.999999163204483, iteration: 124340
loss: 1.0752692222595215,grad_norm: 0.9999991962570788, iteration: 124341
loss: 0.9779624342918396,grad_norm: 0.999999461134066, iteration: 124342
loss: 1.0285956859588623,grad_norm: 0.9999993052682993, iteration: 124343
loss: 1.0459239482879639,grad_norm: 0.9999997902380086, iteration: 124344
loss: 1.0109905004501343,grad_norm: 0.9637611994361004, iteration: 124345
loss: 1.1684772968292236,grad_norm: 0.999999417922316, iteration: 124346
loss: 1.0085123777389526,grad_norm: 0.9788243579599066, iteration: 124347
loss: 1.1231426000595093,grad_norm: 0.9999995112478302, iteration: 124348
loss: 1.1893893480300903,grad_norm: 0.9999990854113685, iteration: 124349
loss: 0.9709187746047974,grad_norm: 0.7351447331388951, iteration: 124350
loss: 1.0919339656829834,grad_norm: 0.9999992890686378, iteration: 124351
loss: 1.0927561521530151,grad_norm: 0.9999996052985075, iteration: 124352
loss: 1.0015031099319458,grad_norm: 0.8088589531666838, iteration: 124353
loss: 0.9899155497550964,grad_norm: 0.8871036125645506, iteration: 124354
loss: 1.0939230918884277,grad_norm: 0.9999996634890427, iteration: 124355
loss: 0.9989808797836304,grad_norm: 0.8895910209077765, iteration: 124356
loss: 1.0166270732879639,grad_norm: 0.9999991044949523, iteration: 124357
loss: 1.1112103462219238,grad_norm: 0.9999992912624764, iteration: 124358
loss: 1.2435630559921265,grad_norm: 0.9999995929656815, iteration: 124359
loss: 0.9984738826751709,grad_norm: 0.9744174377532369, iteration: 124360
loss: 1.1476380825042725,grad_norm: 0.9999996815748992, iteration: 124361
loss: 0.9966425895690918,grad_norm: 0.8474239432800647, iteration: 124362
loss: 1.0764178037643433,grad_norm: 0.8962714445935402, iteration: 124363
loss: 1.0676206350326538,grad_norm: 0.9999995656909277, iteration: 124364
loss: 0.9772454500198364,grad_norm: 0.9999996809899749, iteration: 124365
loss: 1.0615800619125366,grad_norm: 0.9999996730782784, iteration: 124366
loss: 1.1748087406158447,grad_norm: 0.9999996650351376, iteration: 124367
loss: 1.053444266319275,grad_norm: 0.9999995881627257, iteration: 124368
loss: 1.351588487625122,grad_norm: 0.9999995524456422, iteration: 124369
loss: 1.08208429813385,grad_norm: 0.9999997103381681, iteration: 124370
loss: 1.0093257427215576,grad_norm: 0.9999992556965391, iteration: 124371
loss: 1.1110055446624756,grad_norm: 0.9999995438506684, iteration: 124372
loss: 0.9928092360496521,grad_norm: 0.8532994303600097, iteration: 124373
loss: 1.26250422000885,grad_norm: 0.9999997148635632, iteration: 124374
loss: 1.0407991409301758,grad_norm: 0.9999998172837985, iteration: 124375
loss: 1.1412371397018433,grad_norm: 0.9999994277649671, iteration: 124376
loss: 0.9831674695014954,grad_norm: 0.9999991601161046, iteration: 124377
loss: 0.9777575731277466,grad_norm: 0.9999989665790565, iteration: 124378
loss: 0.9715601801872253,grad_norm: 0.8846085708543012, iteration: 124379
loss: 1.0615819692611694,grad_norm: 0.9999991790774091, iteration: 124380
loss: 1.0016183853149414,grad_norm: 0.7914337628414548, iteration: 124381
loss: 1.0477855205535889,grad_norm: 0.9999991600092232, iteration: 124382
loss: 1.1084034442901611,grad_norm: 0.9999993682762941, iteration: 124383
loss: 1.0201594829559326,grad_norm: 0.9999994878553623, iteration: 124384
loss: 1.0245397090911865,grad_norm: 0.996753331342907, iteration: 124385
loss: 1.051279067993164,grad_norm: 0.9191221739918859, iteration: 124386
loss: 1.1327128410339355,grad_norm: 0.9999996364673496, iteration: 124387
loss: 1.147871494293213,grad_norm: 0.9999997679857155, iteration: 124388
loss: 1.0087559223175049,grad_norm: 0.9024999067331583, iteration: 124389
loss: 1.0115426778793335,grad_norm: 0.9091376436631232, iteration: 124390
loss: 1.155350685119629,grad_norm: 0.9999996738491161, iteration: 124391
loss: 1.1228290796279907,grad_norm: 0.9999990884009501, iteration: 124392
loss: 1.1266593933105469,grad_norm: 0.9999991432037693, iteration: 124393
loss: 1.468127965927124,grad_norm: 0.999999811605297, iteration: 124394
loss: 1.073391318321228,grad_norm: 0.9999993333969553, iteration: 124395
loss: 1.0073057413101196,grad_norm: 0.9932002274233543, iteration: 124396
loss: 0.9959974884986877,grad_norm: 0.9999996261126978, iteration: 124397
loss: 1.0967894792556763,grad_norm: 0.9999995394233432, iteration: 124398
loss: 1.1629167795181274,grad_norm: 0.9999999852740987, iteration: 124399
loss: 0.9923970699310303,grad_norm: 0.9375308545885593, iteration: 124400
loss: 1.0604006052017212,grad_norm: 0.9999996640182033, iteration: 124401
loss: 1.1639801263809204,grad_norm: 0.9999994730245736, iteration: 124402
loss: 1.0397237539291382,grad_norm: 0.9999999505366434, iteration: 124403
loss: 1.1040294170379639,grad_norm: 0.9999991580183216, iteration: 124404
loss: 1.0905531644821167,grad_norm: 0.9999996551149118, iteration: 124405
loss: 0.9507567286491394,grad_norm: 0.9091587860216489, iteration: 124406
loss: 1.2177886962890625,grad_norm: 0.9999992201898534, iteration: 124407
loss: 1.1598401069641113,grad_norm: 0.999999326679295, iteration: 124408
loss: 1.0054173469543457,grad_norm: 0.9999992122879701, iteration: 124409
loss: 1.1934255361557007,grad_norm: 0.9999999150283613, iteration: 124410
loss: 1.239077091217041,grad_norm: 0.9999997955541569, iteration: 124411
loss: 1.0578041076660156,grad_norm: 0.9999998744063453, iteration: 124412
loss: 1.0636874437332153,grad_norm: 0.9999993684307373, iteration: 124413
loss: 1.0899947881698608,grad_norm: 0.9999998254998894, iteration: 124414
loss: 1.0992430448532104,grad_norm: 0.9999997599902838, iteration: 124415
loss: 1.0316166877746582,grad_norm: 0.9712758114056361, iteration: 124416
loss: 0.9737440943717957,grad_norm: 0.8385292010535044, iteration: 124417
loss: 1.317858338356018,grad_norm: 0.9999995216723402, iteration: 124418
loss: 1.0010154247283936,grad_norm: 0.7918398297237594, iteration: 124419
loss: 1.0140646696090698,grad_norm: 0.9462999580637179, iteration: 124420
loss: 1.0403627157211304,grad_norm: 0.8960663835057008, iteration: 124421
loss: 1.0235971212387085,grad_norm: 0.999999248076034, iteration: 124422
loss: 1.0815272331237793,grad_norm: 0.9999998696160608, iteration: 124423
loss: 0.9815492630004883,grad_norm: 0.9999991448099531, iteration: 124424
loss: 1.104453206062317,grad_norm: 0.9999993303351736, iteration: 124425
loss: 1.0147995948791504,grad_norm: 0.9999995216020481, iteration: 124426
loss: 1.116373062133789,grad_norm: 0.9999995596394912, iteration: 124427
loss: 0.9660412073135376,grad_norm: 0.9997958539514089, iteration: 124428
loss: 1.0412195920944214,grad_norm: 0.9737561297929119, iteration: 124429
loss: 0.9709749221801758,grad_norm: 0.8878991531366899, iteration: 124430
loss: 1.1160708665847778,grad_norm: 0.9999991765318804, iteration: 124431
loss: 1.0024737119674683,grad_norm: 0.9999998142218546, iteration: 124432
loss: 0.9715380072593689,grad_norm: 0.9999990173251275, iteration: 124433
loss: 1.0132561922073364,grad_norm: 0.7493126539263084, iteration: 124434
loss: 1.1124058961868286,grad_norm: 0.9999993058034263, iteration: 124435
loss: 1.0359644889831543,grad_norm: 0.999999693979847, iteration: 124436
loss: 1.1656612157821655,grad_norm: 0.999999112218564, iteration: 124437
loss: 1.0620650053024292,grad_norm: 0.9999998563934017, iteration: 124438
loss: 0.9648292660713196,grad_norm: 0.999999423031531, iteration: 124439
loss: 1.112414836883545,grad_norm: 0.9617543048530208, iteration: 124440
loss: 1.0218958854675293,grad_norm: 0.9999997564598725, iteration: 124441
loss: 1.086492896080017,grad_norm: 0.9999992644271049, iteration: 124442
loss: 1.01943039894104,grad_norm: 0.999999683141273, iteration: 124443
loss: 1.3272507190704346,grad_norm: 0.9999997023736767, iteration: 124444
loss: 1.1538143157958984,grad_norm: 0.9999996862564492, iteration: 124445
loss: 1.141014575958252,grad_norm: 0.999999525983792, iteration: 124446
loss: 1.033798098564148,grad_norm: 0.9598697962579293, iteration: 124447
loss: 1.0815722942352295,grad_norm: 0.9999997557792861, iteration: 124448
loss: 1.2653440237045288,grad_norm: 0.9999998883926429, iteration: 124449
loss: 1.2757824659347534,grad_norm: 0.9999997533266024, iteration: 124450
loss: 1.1750555038452148,grad_norm: 1.000000020905596, iteration: 124451
loss: 1.172020435333252,grad_norm: 0.9999993511822353, iteration: 124452
loss: 1.1994733810424805,grad_norm: 0.999999892946448, iteration: 124453
loss: 1.18779456615448,grad_norm: 0.9999998186341066, iteration: 124454
loss: 1.0547975301742554,grad_norm: 0.9999996835438167, iteration: 124455
loss: 1.076856017112732,grad_norm: 0.9999991000784116, iteration: 124456
loss: 1.1705007553100586,grad_norm: 0.9999990600301343, iteration: 124457
loss: 1.073519229888916,grad_norm: 0.9999995800387035, iteration: 124458
loss: 1.1502048969268799,grad_norm: 0.9999993972222695, iteration: 124459
loss: 1.186923861503601,grad_norm: 0.9999993491499649, iteration: 124460
loss: 1.2051641941070557,grad_norm: 0.9999997334173149, iteration: 124461
loss: 1.0975923538208008,grad_norm: 0.9999991314770783, iteration: 124462
loss: 1.1777026653289795,grad_norm: 0.9999995680822007, iteration: 124463
loss: 1.4045538902282715,grad_norm: 0.999999730762207, iteration: 124464
loss: 1.1819732189178467,grad_norm: 0.9999995529925955, iteration: 124465
loss: 1.3062471151351929,grad_norm: 0.9999995670858058, iteration: 124466
loss: 1.3909342288970947,grad_norm: 0.9999997605902662, iteration: 124467
loss: 1.3178613185882568,grad_norm: 0.9999996078798393, iteration: 124468
loss: 1.284082055091858,grad_norm: 0.9999996153355518, iteration: 124469
loss: 1.0006901025772095,grad_norm: 0.99999951995725, iteration: 124470
loss: 1.1752110719680786,grad_norm: 0.9999995820277919, iteration: 124471
loss: 1.1246072053909302,grad_norm: 0.9999996877690006, iteration: 124472
loss: 1.28225839138031,grad_norm: 0.9999998796022743, iteration: 124473
loss: 1.245436668395996,grad_norm: 0.9999995540285658, iteration: 124474
loss: 1.1978999376296997,grad_norm: 0.9999999757974776, iteration: 124475
loss: 1.29355788230896,grad_norm: 0.9999997192901559, iteration: 124476
loss: 1.2533323764801025,grad_norm: 0.9999998195717793, iteration: 124477
loss: 1.0518845319747925,grad_norm: 0.9999990950049656, iteration: 124478
loss: 1.1223247051239014,grad_norm: 0.999999216447826, iteration: 124479
loss: 1.5264325141906738,grad_norm: 0.9999999104083577, iteration: 124480
loss: 1.1624220609664917,grad_norm: 0.999999339487982, iteration: 124481
loss: 1.1505674123764038,grad_norm: 0.9999993504847712, iteration: 124482
loss: 1.1048476696014404,grad_norm: 0.9999997854692965, iteration: 124483
loss: 1.2530784606933594,grad_norm: 0.9999998517671495, iteration: 124484
loss: 1.159878134727478,grad_norm: 0.9999995004207809, iteration: 124485
loss: 1.0339131355285645,grad_norm: 0.9999989922737389, iteration: 124486
loss: 1.325412631034851,grad_norm: 0.9999997441637076, iteration: 124487
loss: 1.1140745878219604,grad_norm: 0.9999998437389045, iteration: 124488
loss: 1.2460031509399414,grad_norm: 0.9999998371714662, iteration: 124489
loss: 1.3438286781311035,grad_norm: 0.9999999032006877, iteration: 124490
loss: 1.1155831813812256,grad_norm: 0.999999165609348, iteration: 124491
loss: 1.1881002187728882,grad_norm: 0.9999997499236026, iteration: 124492
loss: 1.2977724075317383,grad_norm: 0.9999996709476825, iteration: 124493
loss: 1.533004641532898,grad_norm: 0.9999998908833043, iteration: 124494
loss: 1.1722724437713623,grad_norm: 0.9999998180705919, iteration: 124495
loss: 1.1881848573684692,grad_norm: 0.9999999250332536, iteration: 124496
loss: 1.191374659538269,grad_norm: 0.9999999936965277, iteration: 124497
loss: 1.2957099676132202,grad_norm: 0.9999998242596234, iteration: 124498
loss: 1.240491509437561,grad_norm: 0.9999994515654623, iteration: 124499
loss: 1.183082103729248,grad_norm: 0.9999993142793827, iteration: 124500
loss: 1.1528363227844238,grad_norm: 0.9999994939609587, iteration: 124501
loss: 1.1819605827331543,grad_norm: 0.9999998484573012, iteration: 124502
loss: 1.1637953519821167,grad_norm: 0.9999998734705912, iteration: 124503
loss: 1.4030762910842896,grad_norm: 0.999999587161073, iteration: 124504
loss: 1.147201657295227,grad_norm: 0.9999999703754265, iteration: 124505
loss: 1.293415904045105,grad_norm: 0.9999994674676984, iteration: 124506
loss: 1.6044011116027832,grad_norm: 0.9999999597251202, iteration: 124507
loss: 1.291678786277771,grad_norm: 0.9999998970624172, iteration: 124508
loss: 1.3033463954925537,grad_norm: 0.9999996888482703, iteration: 124509
loss: 1.283818006515503,grad_norm: 0.9999998214041808, iteration: 124510
loss: 1.1827192306518555,grad_norm: 0.9999997871109023, iteration: 124511
loss: 1.2007598876953125,grad_norm: 0.9999997726368212, iteration: 124512
loss: 1.1544557809829712,grad_norm: 0.9999996731106275, iteration: 124513
loss: 1.1194632053375244,grad_norm: 0.9999993757989609, iteration: 124514
loss: 1.1035271883010864,grad_norm: 0.999999876548212, iteration: 124515
loss: 1.0618754625320435,grad_norm: 0.9999997883525754, iteration: 124516
loss: 1.294413685798645,grad_norm: 0.9999995778952651, iteration: 124517
loss: 1.0354440212249756,grad_norm: 0.9999993384405497, iteration: 124518
loss: 1.210138201713562,grad_norm: 0.999999819052605, iteration: 124519
loss: 1.1421773433685303,grad_norm: 0.9999998759965406, iteration: 124520
loss: 1.1891157627105713,grad_norm: 0.9999991797317005, iteration: 124521
loss: 1.0606051683425903,grad_norm: 0.9999999858700751, iteration: 124522
loss: 1.0984121561050415,grad_norm: 0.9999993698969089, iteration: 124523
loss: 1.1207579374313354,grad_norm: 0.9999995262125511, iteration: 124524
loss: 1.0576121807098389,grad_norm: 0.9999998082129578, iteration: 124525
loss: 1.1229970455169678,grad_norm: 0.9999995557623372, iteration: 124526
loss: 1.1316273212432861,grad_norm: 0.9999996176072692, iteration: 124527
loss: 1.0135234594345093,grad_norm: 0.999999540474203, iteration: 124528
loss: 1.0084909200668335,grad_norm: 0.8534784672881504, iteration: 124529
loss: 1.274318814277649,grad_norm: 0.9999998605969386, iteration: 124530
loss: 1.115182876586914,grad_norm: 0.9999997228028422, iteration: 124531
loss: 1.0959373712539673,grad_norm: 0.9999996776100246, iteration: 124532
loss: 1.0440210103988647,grad_norm: 0.9999993188104673, iteration: 124533
loss: 1.0965436697006226,grad_norm: 0.9999994125390437, iteration: 124534
loss: 1.1724693775177002,grad_norm: 0.9999996017855104, iteration: 124535
loss: 1.3008850812911987,grad_norm: 0.9999995849423218, iteration: 124536
loss: 1.029671311378479,grad_norm: 0.9999992510951382, iteration: 124537
loss: 1.0323090553283691,grad_norm: 0.999999827440306, iteration: 124538
loss: 1.2581899166107178,grad_norm: 0.9999992484906631, iteration: 124539
loss: 1.179529070854187,grad_norm: 0.9999993293256602, iteration: 124540
loss: 1.3382139205932617,grad_norm: 0.999999579388619, iteration: 124541
loss: 1.081203818321228,grad_norm: 0.9999995194249768, iteration: 124542
loss: 1.0114107131958008,grad_norm: 0.9999992622250878, iteration: 124543
loss: 1.015326976776123,grad_norm: 0.9999991817693962, iteration: 124544
loss: 1.111474633216858,grad_norm: 0.9791613481387017, iteration: 124545
loss: 1.0602747201919556,grad_norm: 0.9999999165547504, iteration: 124546
loss: 1.0397684574127197,grad_norm: 0.9999994928672798, iteration: 124547
loss: 1.0484132766723633,grad_norm: 0.9999994193566724, iteration: 124548
loss: 1.054947853088379,grad_norm: 0.9999995844754983, iteration: 124549
loss: 1.170668601989746,grad_norm: 0.9999992709696909, iteration: 124550
loss: 1.0810792446136475,grad_norm: 0.9999991684187564, iteration: 124551
loss: 1.0347936153411865,grad_norm: 0.9999993489791626, iteration: 124552
loss: 1.1024564504623413,grad_norm: 0.9999994744458347, iteration: 124553
loss: 1.1655690670013428,grad_norm: 0.9999997217209602, iteration: 124554
loss: 1.0732853412628174,grad_norm: 0.9999994532578773, iteration: 124555
loss: 1.018896222114563,grad_norm: 0.9999998920512991, iteration: 124556
loss: 1.0842947959899902,grad_norm: 0.9999998962771547, iteration: 124557
loss: 1.0945736169815063,grad_norm: 0.9999998265917865, iteration: 124558
loss: 1.061360478401184,grad_norm: 0.9999991133495845, iteration: 124559
loss: 1.1148626804351807,grad_norm: 0.9999993825112414, iteration: 124560
loss: 1.1583260297775269,grad_norm: 0.9999999582754997, iteration: 124561
loss: 1.096121072769165,grad_norm: 0.999999400474337, iteration: 124562
loss: 1.0405124425888062,grad_norm: 0.999999728230368, iteration: 124563
loss: 1.2892588376998901,grad_norm: 0.99999954693247, iteration: 124564
loss: 1.0124225616455078,grad_norm: 0.9999990577979896, iteration: 124565
loss: 1.1218653917312622,grad_norm: 0.9999996951477917, iteration: 124566
loss: 1.1982874870300293,grad_norm: 0.999999445004303, iteration: 124567
loss: 1.0505647659301758,grad_norm: 0.898719196218302, iteration: 124568
loss: 1.2220778465270996,grad_norm: 0.9999993100136265, iteration: 124569
loss: 1.3688117265701294,grad_norm: 0.9999996282270049, iteration: 124570
loss: 1.0127532482147217,grad_norm: 0.9999989448464547, iteration: 124571
loss: 1.0717159509658813,grad_norm: 0.9999991499630471, iteration: 124572
loss: 1.2327908277511597,grad_norm: 0.9999996941907896, iteration: 124573
loss: 1.1093220710754395,grad_norm: 0.9797155816655452, iteration: 124574
loss: 1.3258452415466309,grad_norm: 0.9999994513101235, iteration: 124575
loss: 1.1251715421676636,grad_norm: 0.9999993964750419, iteration: 124576
loss: 1.1346174478530884,grad_norm: 0.9999991542986035, iteration: 124577
loss: 1.2285100221633911,grad_norm: 0.9999998975606512, iteration: 124578
loss: 1.0380750894546509,grad_norm: 0.9773778641111074, iteration: 124579
loss: 1.2385411262512207,grad_norm: 0.9999995929720403, iteration: 124580
loss: 1.215656042098999,grad_norm: 0.9999997953279454, iteration: 124581
loss: 1.3534244298934937,grad_norm: 0.9999996443139239, iteration: 124582
loss: 1.1357829570770264,grad_norm: 0.9999997922137295, iteration: 124583
loss: 1.2268368005752563,grad_norm: 0.9999997402655815, iteration: 124584
loss: 1.0387126207351685,grad_norm: 0.9844937218040867, iteration: 124585
loss: 1.021431803703308,grad_norm: 0.966848228660709, iteration: 124586
loss: 1.1685576438903809,grad_norm: 0.99999968470175, iteration: 124587
loss: 1.0939494371414185,grad_norm: 0.9999994378486485, iteration: 124588
loss: 1.1222325563430786,grad_norm: 0.9999994233703414, iteration: 124589
loss: 1.0459344387054443,grad_norm: 0.9956461721250146, iteration: 124590
loss: 1.0628844499588013,grad_norm: 0.9999992567779172, iteration: 124591
loss: 1.0592559576034546,grad_norm: 0.9999993792581342, iteration: 124592
loss: 1.0515028238296509,grad_norm: 0.9999996423815353, iteration: 124593
loss: 1.1818702220916748,grad_norm: 0.9999995566323425, iteration: 124594
loss: 1.00996994972229,grad_norm: 0.8767442117326067, iteration: 124595
loss: 1.0337519645690918,grad_norm: 0.9999992079148159, iteration: 124596
loss: 1.311716914176941,grad_norm: 0.999999641850804, iteration: 124597
loss: 1.1616884469985962,grad_norm: 0.9999995326887305, iteration: 124598
loss: 1.3336260318756104,grad_norm: 0.999999681007289, iteration: 124599
loss: 1.0019145011901855,grad_norm: 0.9220612720729001, iteration: 124600
loss: 1.177367091178894,grad_norm: 0.9999996326030295, iteration: 124601
loss: 1.0075139999389648,grad_norm: 0.9625844530195107, iteration: 124602
loss: 1.1681798696517944,grad_norm: 0.9999991535654327, iteration: 124603
loss: 1.0740472078323364,grad_norm: 0.9999994728094657, iteration: 124604
loss: 1.192651629447937,grad_norm: 0.999999228811518, iteration: 124605
loss: 1.0609036684036255,grad_norm: 0.9999991659991954, iteration: 124606
loss: 1.2918260097503662,grad_norm: 1.0000000160914098, iteration: 124607
loss: 1.1206766366958618,grad_norm: 0.9999996558993703, iteration: 124608
loss: 1.0851004123687744,grad_norm: 0.999999274460586, iteration: 124609
loss: 1.0822627544403076,grad_norm: 0.999999275781901, iteration: 124610
loss: 1.0419613122940063,grad_norm: 0.999999475204948, iteration: 124611
loss: 1.0719588994979858,grad_norm: 0.8640046805390135, iteration: 124612
loss: 1.0417405366897583,grad_norm: 0.9999991770253156, iteration: 124613
loss: 1.0170468091964722,grad_norm: 0.9999997512900703, iteration: 124614
loss: 1.0921237468719482,grad_norm: 0.9999992654577052, iteration: 124615
loss: 1.0513721704483032,grad_norm: 0.9801181895195921, iteration: 124616
loss: 1.3003865480422974,grad_norm: 0.9999992477590303, iteration: 124617
loss: 1.3520742654800415,grad_norm: 0.9999997224533721, iteration: 124618
loss: 1.0165170431137085,grad_norm: 0.938387862831532, iteration: 124619
loss: 1.1566579341888428,grad_norm: 0.9999996721693272, iteration: 124620
loss: 1.0531494617462158,grad_norm: 0.9999997223005073, iteration: 124621
loss: 1.0307931900024414,grad_norm: 0.9999999084196942, iteration: 124622
loss: 1.0784097909927368,grad_norm: 1.0000000442389374, iteration: 124623
loss: 1.049487829208374,grad_norm: 0.9999995833969254, iteration: 124624
loss: 1.269546627998352,grad_norm: 0.9999993159770826, iteration: 124625
loss: 1.0674384832382202,grad_norm: 0.9999994017385002, iteration: 124626
loss: 1.0273644924163818,grad_norm: 0.999999042317025, iteration: 124627
loss: 0.9817696213722229,grad_norm: 0.9999996133348844, iteration: 124628
loss: 1.2830390930175781,grad_norm: 0.9999993143392568, iteration: 124629
loss: 1.236573338508606,grad_norm: 0.9999998529916734, iteration: 124630
loss: 1.1738237142562866,grad_norm: 0.9999999090545557, iteration: 124631
loss: 1.135048508644104,grad_norm: 0.9999992564403812, iteration: 124632
loss: 0.9981786012649536,grad_norm: 0.9999991703545026, iteration: 124633
loss: 1.0030161142349243,grad_norm: 0.9999992681682397, iteration: 124634
loss: 1.2196050882339478,grad_norm: 0.9999998488017013, iteration: 124635
loss: 1.0450003147125244,grad_norm: 0.9952667917736521, iteration: 124636
loss: 1.0799556970596313,grad_norm: 0.9999993150877048, iteration: 124637
loss: 1.0164761543273926,grad_norm: 0.9999991555741515, iteration: 124638
loss: 1.0236674547195435,grad_norm: 0.9999995582651765, iteration: 124639
loss: 1.004379153251648,grad_norm: 0.7809557662897719, iteration: 124640
loss: 0.9853477478027344,grad_norm: 0.87805204776675, iteration: 124641
loss: 1.0071570873260498,grad_norm: 0.9999990400323735, iteration: 124642
loss: 1.1192673444747925,grad_norm: 0.999999611098205, iteration: 124643
loss: 0.9975228309631348,grad_norm: 0.9484547289332541, iteration: 124644
loss: 1.1573525667190552,grad_norm: 0.9999991823110722, iteration: 124645
loss: 0.9972527027130127,grad_norm: 0.9945735443925683, iteration: 124646
loss: 1.1484214067459106,grad_norm: 0.9999993077244744, iteration: 124647
loss: 0.9527853727340698,grad_norm: 0.9186010909616981, iteration: 124648
loss: 1.0173577070236206,grad_norm: 0.8478273687793634, iteration: 124649
loss: 1.1394593715667725,grad_norm: 0.9999995945235667, iteration: 124650
loss: 1.1716018915176392,grad_norm: 0.999999386231482, iteration: 124651
loss: 1.1661262512207031,grad_norm: 0.9999995107103754, iteration: 124652
loss: 1.015880823135376,grad_norm: 0.8856921221080654, iteration: 124653
loss: 1.098192572593689,grad_norm: 0.7953148070281808, iteration: 124654
loss: 1.0173170566558838,grad_norm: 0.9999991419820461, iteration: 124655
loss: 1.0272489786148071,grad_norm: 0.9321704953532087, iteration: 124656
loss: 1.1146286725997925,grad_norm: 0.9999992432632548, iteration: 124657
loss: 1.020749568939209,grad_norm: 0.9999994874533727, iteration: 124658
loss: 0.9960623979568481,grad_norm: 0.7401257997952967, iteration: 124659
loss: 1.047028660774231,grad_norm: 0.9999994734984846, iteration: 124660
loss: 1.1540298461914062,grad_norm: 0.9999993176837304, iteration: 124661
loss: 1.0128982067108154,grad_norm: 0.9999995023706933, iteration: 124662
loss: 1.1902601718902588,grad_norm: 0.9999998315749469, iteration: 124663
loss: 1.1477984189987183,grad_norm: 0.9999992792234751, iteration: 124664
loss: 1.0848408937454224,grad_norm: 0.9999990412386353, iteration: 124665
loss: 1.2002297639846802,grad_norm: 0.9999993589290829, iteration: 124666
loss: 1.276063323020935,grad_norm: 0.9999999526183595, iteration: 124667
loss: 1.1108613014221191,grad_norm: 0.9999993839324968, iteration: 124668
loss: 0.9308714270591736,grad_norm: 0.9999990258722165, iteration: 124669
loss: 0.9728145003318787,grad_norm: 0.8078114431778195, iteration: 124670
loss: 1.0529731512069702,grad_norm: 0.9999994134143779, iteration: 124671
loss: 1.0032286643981934,grad_norm: 0.9620633708123449, iteration: 124672
loss: 1.067258596420288,grad_norm: 0.9999995111033702, iteration: 124673
loss: 0.9927676916122437,grad_norm: 0.8919950020865954, iteration: 124674
loss: 1.0619654655456543,grad_norm: 0.9999993303360657, iteration: 124675
loss: 1.1170611381530762,grad_norm: 0.9999999628487931, iteration: 124676
loss: 1.200642704963684,grad_norm: 0.9999993168112684, iteration: 124677
loss: 0.9948009848594666,grad_norm: 0.9999992646550915, iteration: 124678
loss: 1.1170151233673096,grad_norm: 0.999999136550213, iteration: 124679
loss: 0.9986442923545837,grad_norm: 0.7415297889703921, iteration: 124680
loss: 1.0354883670806885,grad_norm: 0.8481624323499959, iteration: 124681
loss: 1.0074361562728882,grad_norm: 0.777338472304217, iteration: 124682
loss: 1.1127327680587769,grad_norm: 0.9999993412211449, iteration: 124683
loss: 1.0437525510787964,grad_norm: 0.9999995209630883, iteration: 124684
loss: 1.1472960710525513,grad_norm: 0.9999994396681755, iteration: 124685
loss: 0.9695965647697449,grad_norm: 0.9999990927443129, iteration: 124686
loss: 1.1354414224624634,grad_norm: 0.9999996298354856, iteration: 124687
loss: 1.081336259841919,grad_norm: 0.999999301773366, iteration: 124688
loss: 1.1555330753326416,grad_norm: 0.9999994427570691, iteration: 124689
loss: 1.0341181755065918,grad_norm: 0.9999990224602336, iteration: 124690
loss: 1.1641266345977783,grad_norm: 0.9999998711972715, iteration: 124691
loss: 0.9886676073074341,grad_norm: 0.9989072649780035, iteration: 124692
loss: 1.1473438739776611,grad_norm: 0.9999996690664246, iteration: 124693
loss: 1.3405020236968994,grad_norm: 0.99999975243964, iteration: 124694
loss: 1.045823335647583,grad_norm: 0.999999865824124, iteration: 124695
loss: 1.1958873271942139,grad_norm: 0.9999996495965485, iteration: 124696
loss: 1.037634253501892,grad_norm: 0.9245686562734594, iteration: 124697
loss: 1.130812168121338,grad_norm: 0.9999993971891169, iteration: 124698
loss: 1.001057505607605,grad_norm: 0.9999991155708318, iteration: 124699
loss: 1.0076320171356201,grad_norm: 0.9999992003483812, iteration: 124700
loss: 1.0414706468582153,grad_norm: 0.9999994252879048, iteration: 124701
loss: 1.175124168395996,grad_norm: 0.9999996667222252, iteration: 124702
loss: 1.1112046241760254,grad_norm: 0.9999992366972519, iteration: 124703
loss: 1.0787715911865234,grad_norm: 0.9703041022784507, iteration: 124704
loss: 1.0288026332855225,grad_norm: 0.9999995510714536, iteration: 124705
loss: 1.0524604320526123,grad_norm: 0.9999996808465147, iteration: 124706
loss: 0.9764539003372192,grad_norm: 0.7084231014714919, iteration: 124707
loss: 1.1730682849884033,grad_norm: 0.9999993343544208, iteration: 124708
loss: 1.1299587488174438,grad_norm: 0.9999992961694161, iteration: 124709
loss: 1.1035451889038086,grad_norm: 0.9999992528946876, iteration: 124710
loss: 1.0964176654815674,grad_norm: 0.9999993247246626, iteration: 124711
loss: 0.9801957011222839,grad_norm: 0.9999993145734214, iteration: 124712
loss: 1.0084826946258545,grad_norm: 0.9608278557531255, iteration: 124713
loss: 1.0128954648971558,grad_norm: 0.9999996161467607, iteration: 124714
loss: 0.9952197670936584,grad_norm: 0.8958286687691565, iteration: 124715
loss: 1.023476243019104,grad_norm: 0.9774964852263611, iteration: 124716
loss: 1.0845861434936523,grad_norm: 0.9999998985599924, iteration: 124717
loss: 1.0242345333099365,grad_norm: 0.7971096833865335, iteration: 124718
loss: 1.0877265930175781,grad_norm: 0.9999998463587455, iteration: 124719
loss: 1.2365974187850952,grad_norm: 0.9999998328701628, iteration: 124720
loss: 1.1604945659637451,grad_norm: 0.9999996166220413, iteration: 124721
loss: 1.0534652471542358,grad_norm: 0.9999990574149458, iteration: 124722
loss: 1.2443093061447144,grad_norm: 0.9999997194754487, iteration: 124723
loss: 1.0020465850830078,grad_norm: 0.999999099772773, iteration: 124724
loss: 1.0636018514633179,grad_norm: 0.8336076103074126, iteration: 124725
loss: 0.9985116124153137,grad_norm: 0.923873571232573, iteration: 124726
loss: 1.03813636302948,grad_norm: 0.9999997678541036, iteration: 124727
loss: 1.0577425956726074,grad_norm: 0.9999999086994596, iteration: 124728
loss: 1.0857383012771606,grad_norm: 0.9999991206817823, iteration: 124729
loss: 0.9866893887519836,grad_norm: 0.9723575878064237, iteration: 124730
loss: 1.386274814605713,grad_norm: 0.9999999886635333, iteration: 124731
loss: 1.0167841911315918,grad_norm: 0.9999995416009284, iteration: 124732
loss: 0.9932105541229248,grad_norm: 0.9999992488837468, iteration: 124733
loss: 1.180120825767517,grad_norm: 0.9999995553612282, iteration: 124734
loss: 1.041854739189148,grad_norm: 0.9999996364697131, iteration: 124735
loss: 0.9823479652404785,grad_norm: 0.8453319202456877, iteration: 124736
loss: 1.1369493007659912,grad_norm: 0.9999999282811012, iteration: 124737
loss: 1.016919493675232,grad_norm: 0.9999995198550164, iteration: 124738
loss: 1.2089930772781372,grad_norm: 0.9999994031534051, iteration: 124739
loss: 1.0070629119873047,grad_norm: 0.999999363287263, iteration: 124740
loss: 0.970357358455658,grad_norm: 0.7585482253424615, iteration: 124741
loss: 0.9939784407615662,grad_norm: 0.852424767346943, iteration: 124742
loss: 1.3176523447036743,grad_norm: 0.999999290441439, iteration: 124743
loss: 1.1307519674301147,grad_norm: 0.9999994409361865, iteration: 124744
loss: 1.156057596206665,grad_norm: 0.9999990951334085, iteration: 124745
loss: 0.9966732859611511,grad_norm: 0.8592464411200553, iteration: 124746
loss: 1.0513631105422974,grad_norm: 0.9999995742757531, iteration: 124747
loss: 1.4812583923339844,grad_norm: 0.9999996365318516, iteration: 124748
loss: 1.119330883026123,grad_norm: 0.8639634623300607, iteration: 124749
loss: 1.2924240827560425,grad_norm: 0.9999999151382946, iteration: 124750
loss: 1.0783060789108276,grad_norm: 0.9999992483071659, iteration: 124751
loss: 1.052987813949585,grad_norm: 0.9999997139888994, iteration: 124752
loss: 1.0415440797805786,grad_norm: 0.9999993594250737, iteration: 124753
loss: 1.1178004741668701,grad_norm: 0.9999993626369237, iteration: 124754
loss: 1.0307973623275757,grad_norm: 0.9592383598089221, iteration: 124755
loss: 1.083299160003662,grad_norm: 0.999999329957071, iteration: 124756
loss: 1.3497618436813354,grad_norm: 0.9999998995868701, iteration: 124757
loss: 1.0909785032272339,grad_norm: 0.9999995602609346, iteration: 124758
loss: 1.0694645643234253,grad_norm: 0.9999998371807293, iteration: 124759
loss: 1.1889095306396484,grad_norm: 0.99999959178684, iteration: 124760
loss: 1.0427278280258179,grad_norm: 0.9999995643212582, iteration: 124761
loss: 1.0583597421646118,grad_norm: 0.9999998501690547, iteration: 124762
loss: 1.0837821960449219,grad_norm: 0.9449219113042241, iteration: 124763
loss: 1.0563174486160278,grad_norm: 0.9999998139755996, iteration: 124764
loss: 1.0418579578399658,grad_norm: 0.9999992300055106, iteration: 124765
loss: 1.030643105506897,grad_norm: 0.7931620172203953, iteration: 124766
loss: 1.0966618061065674,grad_norm: 0.9999995684436228, iteration: 124767
loss: 1.1069557666778564,grad_norm: 0.9999991579789885, iteration: 124768
loss: 0.9981240034103394,grad_norm: 0.9246456532905635, iteration: 124769
loss: 1.1706470251083374,grad_norm: 0.9999995178630624, iteration: 124770
loss: 1.0744801759719849,grad_norm: 0.9999993182340808, iteration: 124771
loss: 0.991456151008606,grad_norm: 0.8718178332514708, iteration: 124772
loss: 1.0745023488998413,grad_norm: 0.9999990914885432, iteration: 124773
loss: 1.0437285900115967,grad_norm: 0.999999670651956, iteration: 124774
loss: 1.1008121967315674,grad_norm: 0.9999993417689924, iteration: 124775
loss: 1.1105351448059082,grad_norm: 0.9999994850640288, iteration: 124776
loss: 1.144856333732605,grad_norm: 0.9999994599850442, iteration: 124777
loss: 1.0474021434783936,grad_norm: 0.9999991995470704, iteration: 124778
loss: 1.0317983627319336,grad_norm: 0.9829577117816576, iteration: 124779
loss: 1.0570943355560303,grad_norm: 0.9999990417389273, iteration: 124780
loss: 1.0036646127700806,grad_norm: 0.8215882117043484, iteration: 124781
loss: 1.0259689092636108,grad_norm: 0.9999998634352449, iteration: 124782
loss: 1.0146008729934692,grad_norm: 0.8840854484477825, iteration: 124783
loss: 1.0072184801101685,grad_norm: 0.9490677875089863, iteration: 124784
loss: 1.082037329673767,grad_norm: 0.9759727427525228, iteration: 124785
loss: 1.1558403968811035,grad_norm: 0.9999997574092714, iteration: 124786
loss: 1.0332671403884888,grad_norm: 0.9999995623726426, iteration: 124787
loss: 1.0107887983322144,grad_norm: 0.9123625388194685, iteration: 124788
loss: 1.3788965940475464,grad_norm: 0.999999639872046, iteration: 124789
loss: 1.148439645767212,grad_norm: 0.9999993966572085, iteration: 124790
loss: 1.0078316926956177,grad_norm: 0.9999993705458152, iteration: 124791
loss: 0.9863875508308411,grad_norm: 0.9999992354188295, iteration: 124792
loss: 1.1345151662826538,grad_norm: 0.9999995536835804, iteration: 124793
loss: 1.0606346130371094,grad_norm: 0.9999990979486112, iteration: 124794
loss: 1.1007392406463623,grad_norm: 0.9999999205475506, iteration: 124795
loss: 1.262100100517273,grad_norm: 0.9999998750397363, iteration: 124796
loss: 1.0042061805725098,grad_norm: 0.9999993548374938, iteration: 124797
loss: 0.9980173110961914,grad_norm: 0.9999999460625446, iteration: 124798
loss: 1.0199047327041626,grad_norm: 0.9999991162242585, iteration: 124799
loss: 1.0201537609100342,grad_norm: 0.8453097463043422, iteration: 124800
loss: 1.068630337715149,grad_norm: 0.9999994155482187, iteration: 124801
loss: 1.0596492290496826,grad_norm: 0.9999992651018298, iteration: 124802
loss: 1.198896884918213,grad_norm: 0.9999991611422351, iteration: 124803
loss: 1.0416173934936523,grad_norm: 0.9999999053779015, iteration: 124804
loss: 1.0061243772506714,grad_norm: 0.8942735830387215, iteration: 124805
loss: 1.030116319656372,grad_norm: 0.9999991719337392, iteration: 124806
loss: 0.9940001964569092,grad_norm: 0.8360398181848439, iteration: 124807
loss: 1.0739868879318237,grad_norm: 0.9999998290020983, iteration: 124808
loss: 1.006885051727295,grad_norm: 0.8230539364461827, iteration: 124809
loss: 0.9728706479072571,grad_norm: 0.9999996964060345, iteration: 124810
loss: 1.153432846069336,grad_norm: 0.9999998364518524, iteration: 124811
loss: 0.9804470539093018,grad_norm: 0.9999991611137368, iteration: 124812
loss: 1.010555386543274,grad_norm: 0.9999991192713752, iteration: 124813
loss: 0.9954535961151123,grad_norm: 0.9572489663778991, iteration: 124814
loss: 0.9481672644615173,grad_norm: 0.8148634999217935, iteration: 124815
loss: 1.0489916801452637,grad_norm: 0.9999995938463786, iteration: 124816
loss: 1.2047425508499146,grad_norm: 0.9999996708070295, iteration: 124817
loss: 1.0714906454086304,grad_norm: 0.9999995340285807, iteration: 124818
loss: 1.0282708406448364,grad_norm: 0.9999994378269927, iteration: 124819
loss: 1.1908599138259888,grad_norm: 0.9999997012047558, iteration: 124820
loss: 1.2089017629623413,grad_norm: 0.9999996066248634, iteration: 124821
loss: 1.0189021825790405,grad_norm: 0.9999991801836443, iteration: 124822
loss: 1.0570658445358276,grad_norm: 0.9442452083062987, iteration: 124823
loss: 1.0803457498550415,grad_norm: 0.9999999532887128, iteration: 124824
loss: 1.0337896347045898,grad_norm: 0.9999992756707553, iteration: 124825
loss: 0.9789246320724487,grad_norm: 0.8829400150445157, iteration: 124826
loss: 1.0837249755859375,grad_norm: 0.9999999362461746, iteration: 124827
loss: 1.1467221975326538,grad_norm: 0.9999997835002874, iteration: 124828
loss: 1.0056089162826538,grad_norm: 0.7893464723042218, iteration: 124829
loss: 1.1204025745391846,grad_norm: 0.9999994689109895, iteration: 124830
loss: 1.0487205982208252,grad_norm: 0.9999992586647031, iteration: 124831
loss: 1.1060959100723267,grad_norm: 0.9999997679198322, iteration: 124832
loss: 1.0530781745910645,grad_norm: 0.999999482027852, iteration: 124833
loss: 0.989168107509613,grad_norm: 0.9999991739925156, iteration: 124834
loss: 1.1252270936965942,grad_norm: 0.9999993250263387, iteration: 124835
loss: 1.05263090133667,grad_norm: 0.9999991160543158, iteration: 124836
loss: 1.2624872922897339,grad_norm: 0.9999998860373479, iteration: 124837
loss: 0.9945828318595886,grad_norm: 0.9855828219304775, iteration: 124838
loss: 0.9876454472541809,grad_norm: 0.9999992874071902, iteration: 124839
loss: 0.9975995421409607,grad_norm: 0.9999993050341426, iteration: 124840
loss: 1.0377196073532104,grad_norm: 0.9999990169256127, iteration: 124841
loss: 1.0609183311462402,grad_norm: 0.9999998018541517, iteration: 124842
loss: 1.0271823406219482,grad_norm: 0.9999998818900622, iteration: 124843
loss: 1.0820444822311401,grad_norm: 0.9999990596605585, iteration: 124844
loss: 1.014832615852356,grad_norm: 0.9655677324745415, iteration: 124845
loss: 0.9966195821762085,grad_norm: 0.9311681737585223, iteration: 124846
loss: 1.1865553855895996,grad_norm: 0.9999998700014158, iteration: 124847
loss: 1.0606356859207153,grad_norm: 0.9999990738930185, iteration: 124848
loss: 1.057151198387146,grad_norm: 0.9999998993131872, iteration: 124849
loss: 1.0116490125656128,grad_norm: 0.9461924668553353, iteration: 124850
loss: 1.221966028213501,grad_norm: 0.9999996690611649, iteration: 124851
loss: 0.9587414860725403,grad_norm: 0.6586016776716687, iteration: 124852
loss: 0.992546021938324,grad_norm: 0.8946841764896113, iteration: 124853
loss: 1.0786412954330444,grad_norm: 0.9999993417356832, iteration: 124854
loss: 0.9709643125534058,grad_norm: 0.9999996581035047, iteration: 124855
loss: 1.1243854761123657,grad_norm: 0.9999994547604061, iteration: 124856
loss: 1.083371639251709,grad_norm: 0.9999998492609518, iteration: 124857
loss: 1.0000959634780884,grad_norm: 0.999999721720582, iteration: 124858
loss: 0.952771008014679,grad_norm: 0.8187761453613154, iteration: 124859
loss: 1.153668999671936,grad_norm: 0.9999993893663885, iteration: 124860
loss: 1.0766446590423584,grad_norm: 0.9999993487059536, iteration: 124861
loss: 1.045192837715149,grad_norm: 0.9999995914606714, iteration: 124862
loss: 1.0844022035598755,grad_norm: 0.9337526789195523, iteration: 124863
loss: 0.9994279742240906,grad_norm: 0.9627875650304509, iteration: 124864
loss: 1.042681336402893,grad_norm: 0.9999994644181169, iteration: 124865
loss: 1.038591980934143,grad_norm: 0.9999991508283294, iteration: 124866
loss: 1.059887170791626,grad_norm: 0.9999995402768901, iteration: 124867
loss: 1.061085820198059,grad_norm: 0.99999918663729, iteration: 124868
loss: 0.9960082173347473,grad_norm: 0.9999996069819916, iteration: 124869
loss: 1.0573080778121948,grad_norm: 0.9999998872033827, iteration: 124870
loss: 1.1226674318313599,grad_norm: 0.9797377853111994, iteration: 124871
loss: 0.9999199509620667,grad_norm: 0.8327046477636718, iteration: 124872
loss: 1.0772746801376343,grad_norm: 0.9999998749332641, iteration: 124873
loss: 1.012697696685791,grad_norm: 0.9999991432222374, iteration: 124874
loss: 1.2389392852783203,grad_norm: 0.9999996499140339, iteration: 124875
loss: 1.1509944200515747,grad_norm: 1.0000000332718937, iteration: 124876
loss: 1.2082805633544922,grad_norm: 0.9999995223719269, iteration: 124877
loss: 1.1489202976226807,grad_norm: 0.9999999140696055, iteration: 124878
loss: 1.1052045822143555,grad_norm: 0.9999997787241299, iteration: 124879
loss: 1.0639631748199463,grad_norm: 0.9999998248889969, iteration: 124880
loss: 1.028546929359436,grad_norm: 0.999999134724381, iteration: 124881
loss: 1.1692787408828735,grad_norm: 0.9999997661466593, iteration: 124882
loss: 1.0007553100585938,grad_norm: 0.9999997691850724, iteration: 124883
loss: 1.0211944580078125,grad_norm: 0.9999992699831635, iteration: 124884
loss: 1.3744633197784424,grad_norm: 0.9999998374822205, iteration: 124885
loss: 0.9944546222686768,grad_norm: 0.9486744785875861, iteration: 124886
loss: 1.0590273141860962,grad_norm: 0.9999998453366751, iteration: 124887
loss: 1.0787012577056885,grad_norm: 0.9999996030699122, iteration: 124888
loss: 1.1958117485046387,grad_norm: 0.9999997941111091, iteration: 124889
loss: 1.061511516571045,grad_norm: 0.9656814105059118, iteration: 124890
loss: 1.1059188842773438,grad_norm: 0.9999991587127505, iteration: 124891
loss: 1.1391490697860718,grad_norm: 0.9999993359268782, iteration: 124892
loss: 1.038680076599121,grad_norm: 0.9999990511899919, iteration: 124893
loss: 0.988450825214386,grad_norm: 0.9384573242294014, iteration: 124894
loss: 1.0281894207000732,grad_norm: 0.958974352981737, iteration: 124895
loss: 1.1441044807434082,grad_norm: 0.9999996126500066, iteration: 124896
loss: 1.0820046663284302,grad_norm: 0.9999989790002743, iteration: 124897
loss: 1.1098484992980957,grad_norm: 0.9999991112748128, iteration: 124898
loss: 1.0904790163040161,grad_norm: 0.9999997145500584, iteration: 124899
loss: 0.992128312587738,grad_norm: 0.9999990794970322, iteration: 124900
loss: 1.0136624574661255,grad_norm: 0.9999991009274101, iteration: 124901
loss: 1.0741769075393677,grad_norm: 0.8727613666684649, iteration: 124902
loss: 1.0473861694335938,grad_norm: 0.9999999743296575, iteration: 124903
loss: 0.9970327019691467,grad_norm: 0.99999900408869, iteration: 124904
loss: 1.0833076238632202,grad_norm: 0.8707096510800229, iteration: 124905
loss: 1.1840826272964478,grad_norm: 0.9999994569546843, iteration: 124906
loss: 1.0164120197296143,grad_norm: 0.8833362192289966, iteration: 124907
loss: 0.998751163482666,grad_norm: 0.8276732666504267, iteration: 124908
loss: 0.9516549110412598,grad_norm: 0.9999998532648547, iteration: 124909
loss: 1.0251716375350952,grad_norm: 0.9999991579561358, iteration: 124910
loss: 1.01467764377594,grad_norm: 0.9999996659912889, iteration: 124911
loss: 1.0516126155853271,grad_norm: 0.9999992130829605, iteration: 124912
loss: 1.0789815187454224,grad_norm: 0.9130076220379888, iteration: 124913
loss: 1.0254909992218018,grad_norm: 0.8138575753259679, iteration: 124914
loss: 1.039728045463562,grad_norm: 0.9999993563620426, iteration: 124915
loss: 1.0129083395004272,grad_norm: 0.9999994688219279, iteration: 124916
loss: 1.073014497756958,grad_norm: 0.9999996040419382, iteration: 124917
loss: 0.9997958540916443,grad_norm: 0.9999992333467401, iteration: 124918
loss: 1.0242305994033813,grad_norm: 0.9999993975255825, iteration: 124919
loss: 1.0029956102371216,grad_norm: 0.8656518223524485, iteration: 124920
loss: 1.0364935398101807,grad_norm: 0.9999993211767634, iteration: 124921
loss: 1.0687274932861328,grad_norm: 0.9623601319312335, iteration: 124922
loss: 1.0156053304672241,grad_norm: 0.9999996489352829, iteration: 124923
loss: 1.025976300239563,grad_norm: 0.9999997291919259, iteration: 124924
loss: 1.0200843811035156,grad_norm: 0.9999995412952492, iteration: 124925
loss: 0.949761152267456,grad_norm: 0.9999991022970136, iteration: 124926
loss: 1.080697774887085,grad_norm: 0.9999997917050404, iteration: 124927
loss: 0.9462399482727051,grad_norm: 0.9999991731596335, iteration: 124928
loss: 1.0459764003753662,grad_norm: 0.9999994551154109, iteration: 124929
loss: 1.122315764427185,grad_norm: 0.9999997110770436, iteration: 124930
loss: 1.016110897064209,grad_norm: 0.8869506899276367, iteration: 124931
loss: 1.1021113395690918,grad_norm: 0.999999283019723, iteration: 124932
loss: 1.0248923301696777,grad_norm: 0.8915106729675957, iteration: 124933
loss: 1.0218093395233154,grad_norm: 0.9999991087254329, iteration: 124934
loss: 1.0477861166000366,grad_norm: 0.7497283572129833, iteration: 124935
loss: 1.0039362907409668,grad_norm: 0.9999990818046678, iteration: 124936
loss: 1.0139895677566528,grad_norm: 0.8275258963045817, iteration: 124937
loss: 1.1124099493026733,grad_norm: 0.9999994248232964, iteration: 124938
loss: 1.058769941329956,grad_norm: 0.9999997374117389, iteration: 124939
loss: 1.0195387601852417,grad_norm: 0.9999996269084671, iteration: 124940
loss: 0.9949549436569214,grad_norm: 0.8943964129794627, iteration: 124941
loss: 1.0421342849731445,grad_norm: 0.999999085998755, iteration: 124942
loss: 1.0185894966125488,grad_norm: 0.9999997902208948, iteration: 124943
loss: 0.978278636932373,grad_norm: 0.8415060463792755, iteration: 124944
loss: 1.0037163496017456,grad_norm: 0.9999995023232104, iteration: 124945
loss: 1.1349660158157349,grad_norm: 0.999999391538796, iteration: 124946
loss: 1.0941886901855469,grad_norm: 0.9999996839149342, iteration: 124947
loss: 1.0873165130615234,grad_norm: 0.9999992804530609, iteration: 124948
loss: 0.9902347326278687,grad_norm: 0.9999996345696196, iteration: 124949
loss: 1.0885977745056152,grad_norm: 0.9999998620656027, iteration: 124950
loss: 1.157833218574524,grad_norm: 0.9999994831612987, iteration: 124951
loss: 1.0337828397750854,grad_norm: 0.8689621047564917, iteration: 124952
loss: 1.0226836204528809,grad_norm: 0.999999670748148, iteration: 124953
loss: 1.0053565502166748,grad_norm: 0.9999991090665842, iteration: 124954
loss: 1.0877012014389038,grad_norm: 0.9999990045860625, iteration: 124955
loss: 0.998263418674469,grad_norm: 0.999999135442919, iteration: 124956
loss: 1.0491440296173096,grad_norm: 0.9999997542109417, iteration: 124957
loss: 1.0943793058395386,grad_norm: 0.9999999277947487, iteration: 124958
loss: 1.055951714515686,grad_norm: 0.9739368820466683, iteration: 124959
loss: 1.01948881149292,grad_norm: 0.99756161473881, iteration: 124960
loss: 1.113791823387146,grad_norm: 0.9627253613208487, iteration: 124961
loss: 1.035510778427124,grad_norm: 0.8372866593583969, iteration: 124962
loss: 0.9584359526634216,grad_norm: 0.9314159870534315, iteration: 124963
loss: 1.1334125995635986,grad_norm: 0.9999993385202313, iteration: 124964
loss: 1.0677324533462524,grad_norm: 0.9999999281950647, iteration: 124965
loss: 1.0488554239273071,grad_norm: 0.9999999906289353, iteration: 124966
loss: 1.0419011116027832,grad_norm: 0.9999996284397573, iteration: 124967
loss: 1.0603076219558716,grad_norm: 0.999999664772331, iteration: 124968
loss: 1.1179320812225342,grad_norm: 0.9999999128307453, iteration: 124969
loss: 1.0928454399108887,grad_norm: 0.9999993380576754, iteration: 124970
loss: 1.0263296365737915,grad_norm: 0.99999965722116, iteration: 124971
loss: 1.1925386190414429,grad_norm: 0.999999690944105, iteration: 124972
loss: 1.061971664428711,grad_norm: 0.9999996023788584, iteration: 124973
loss: 1.2143759727478027,grad_norm: 0.9999998328297834, iteration: 124974
loss: 0.9835588932037354,grad_norm: 0.9999991615273492, iteration: 124975
loss: 0.9704411029815674,grad_norm: 0.9999993904974239, iteration: 124976
loss: 0.9633668661117554,grad_norm: 0.8868480069047349, iteration: 124977
loss: 1.147817850112915,grad_norm: 0.9999999931392926, iteration: 124978
loss: 1.0658751726150513,grad_norm: 0.9999990999670267, iteration: 124979
loss: 1.0316952466964722,grad_norm: 0.8716233719810325, iteration: 124980
loss: 1.0430350303649902,grad_norm: 0.9999995023267495, iteration: 124981
loss: 0.9872975945472717,grad_norm: 0.713570698101601, iteration: 124982
loss: 1.0285499095916748,grad_norm: 0.917642198123519, iteration: 124983
loss: 1.1960327625274658,grad_norm: 0.9999993508677869, iteration: 124984
loss: 1.0758981704711914,grad_norm: 0.9999993751704406, iteration: 124985
loss: 0.996241569519043,grad_norm: 0.9999991228071327, iteration: 124986
loss: 1.0883359909057617,grad_norm: 0.9999995554491223, iteration: 124987
loss: 1.0051995515823364,grad_norm: 0.9453197214254069, iteration: 124988
loss: 1.2477678060531616,grad_norm: 0.9999998189090862, iteration: 124989
loss: 1.064508318901062,grad_norm: 0.9999996075738864, iteration: 124990
loss: 1.0241495370864868,grad_norm: 0.9999999489598537, iteration: 124991
loss: 1.0305546522140503,grad_norm: 0.9999996995591797, iteration: 124992
loss: 1.060657024383545,grad_norm: 0.99999925060946, iteration: 124993
loss: 1.0858432054519653,grad_norm: 0.9999993453426707, iteration: 124994
loss: 1.0059823989868164,grad_norm: 0.9999990962380889, iteration: 124995
loss: 1.0972551107406616,grad_norm: 0.9999995740066041, iteration: 124996
loss: 1.1237906217575073,grad_norm: 0.9999992631978757, iteration: 124997
loss: 1.0153100490570068,grad_norm: 0.9999995176801598, iteration: 124998
loss: 1.034584879875183,grad_norm: 0.9999998247330001, iteration: 124999
loss: 1.056801438331604,grad_norm: 0.9288362079263167, iteration: 125000
loss: 1.1808522939682007,grad_norm: 0.9999999332621213, iteration: 125001
loss: 1.2260061502456665,grad_norm: 0.9999999493038509, iteration: 125002
loss: 1.0817204713821411,grad_norm: 0.9999997037505386, iteration: 125003
loss: 1.052735447883606,grad_norm: 1.0000000475983841, iteration: 125004
loss: 1.1287823915481567,grad_norm: 0.9999997855496716, iteration: 125005
loss: 1.3183679580688477,grad_norm: 0.9999997094285068, iteration: 125006
loss: 1.2269207239151,grad_norm: 1.000000024158779, iteration: 125007
loss: 1.101454496383667,grad_norm: 0.9999994244547439, iteration: 125008
loss: 1.0454682111740112,grad_norm: 0.9999996160749334, iteration: 125009
loss: 1.2976853847503662,grad_norm: 0.9999993377080072, iteration: 125010
loss: 1.0095539093017578,grad_norm: 0.9999991657059797, iteration: 125011
loss: 1.0955569744110107,grad_norm: 0.9999999068740186, iteration: 125012
loss: 1.1156418323516846,grad_norm: 0.9999994710702967, iteration: 125013
loss: 1.2484134435653687,grad_norm: 0.9999999533210734, iteration: 125014
loss: 1.0410460233688354,grad_norm: 0.9999990986267393, iteration: 125015
loss: 1.0518298149108887,grad_norm: 0.9999998656143144, iteration: 125016
loss: 1.0729326009750366,grad_norm: 0.9999992790831554, iteration: 125017
loss: 1.0655245780944824,grad_norm: 0.9999993807336324, iteration: 125018
loss: 1.0662357807159424,grad_norm: 0.9999994582969434, iteration: 125019
loss: 1.0413321256637573,grad_norm: 0.8161306193521138, iteration: 125020
loss: 1.018734097480774,grad_norm: 0.9999997814598095, iteration: 125021
loss: 1.0492074489593506,grad_norm: 0.9999999553186713, iteration: 125022
loss: 1.1617653369903564,grad_norm: 0.9999996891340991, iteration: 125023
loss: 1.1040618419647217,grad_norm: 0.9999992483946258, iteration: 125024
loss: 1.0314632654190063,grad_norm: 0.9999991612583354, iteration: 125025
loss: 1.0414094924926758,grad_norm: 0.9999996955125452, iteration: 125026
loss: 1.0634124279022217,grad_norm: 0.9999992246414342, iteration: 125027
loss: 1.3535184860229492,grad_norm: 0.9999997648643155, iteration: 125028
loss: 0.9854132533073425,grad_norm: 0.9193470106585554, iteration: 125029
loss: 1.0911521911621094,grad_norm: 0.9999993137898118, iteration: 125030
loss: 1.1321218013763428,grad_norm: 0.999999739977887, iteration: 125031
loss: 1.0810737609863281,grad_norm: 0.9999992219875927, iteration: 125032
loss: 1.0394172668457031,grad_norm: 0.9999991190127829, iteration: 125033
loss: 1.350054383277893,grad_norm: 0.9999996376010131, iteration: 125034
loss: 0.9944760799407959,grad_norm: 0.9526634075063988, iteration: 125035
loss: 1.1724623441696167,grad_norm: 0.9999997693437453, iteration: 125036
loss: 1.0780220031738281,grad_norm: 0.9999990759472256, iteration: 125037
loss: 1.0717155933380127,grad_norm: 0.9999994853127475, iteration: 125038
loss: 1.2566832304000854,grad_norm: 0.9999998004377224, iteration: 125039
loss: 1.2403148412704468,grad_norm: 0.9999994904173111, iteration: 125040
loss: 1.0582462549209595,grad_norm: 0.9999989652538884, iteration: 125041
loss: 1.1708652973175049,grad_norm: 0.9999996762688622, iteration: 125042
loss: 1.0673198699951172,grad_norm: 0.9999991578133339, iteration: 125043
loss: 1.111297607421875,grad_norm: 0.999999743497377, iteration: 125044
loss: 1.0978118181228638,grad_norm: 0.8929183458845213, iteration: 125045
loss: 1.1542770862579346,grad_norm: 0.9999998998105409, iteration: 125046
loss: 1.0716675519943237,grad_norm: 0.9999994295559437, iteration: 125047
loss: 1.0399727821350098,grad_norm: 1.0000000023896014, iteration: 125048
loss: 0.978597104549408,grad_norm: 0.9999996681787049, iteration: 125049
loss: 1.2023954391479492,grad_norm: 0.99999970581223, iteration: 125050
loss: 1.0163943767547607,grad_norm: 0.9999993745256551, iteration: 125051
loss: 1.1172059774398804,grad_norm: 0.9999997690706043, iteration: 125052
loss: 0.9820553660392761,grad_norm: 0.8611336971158173, iteration: 125053
loss: 1.0391337871551514,grad_norm: 0.9999993519388419, iteration: 125054
loss: 1.080069661140442,grad_norm: 0.9999991780526454, iteration: 125055
loss: 0.9994065761566162,grad_norm: 0.8925040346297072, iteration: 125056
loss: 1.2736990451812744,grad_norm: 0.9999997787136433, iteration: 125057
loss: 1.094840407371521,grad_norm: 0.9999995196972178, iteration: 125058
loss: 0.9734178185462952,grad_norm: 0.7758150780470255, iteration: 125059
loss: 0.9954315423965454,grad_norm: 0.8754242095210883, iteration: 125060
loss: 1.1748340129852295,grad_norm: 0.9999998997492268, iteration: 125061
loss: 1.0990333557128906,grad_norm: 0.9999994225495504, iteration: 125062
loss: 0.975788950920105,grad_norm: 0.8415830017845769, iteration: 125063
loss: 1.0690220594406128,grad_norm: 0.9999994305417125, iteration: 125064
loss: 0.9882175922393799,grad_norm: 0.8015270538916418, iteration: 125065
loss: 1.127375841140747,grad_norm: 0.9999992211201747, iteration: 125066
loss: 1.0140618085861206,grad_norm: 0.9999996222776908, iteration: 125067
loss: 1.1187776327133179,grad_norm: 0.9818273834547834, iteration: 125068
loss: 1.1359374523162842,grad_norm: 0.999999019955658, iteration: 125069
loss: 1.1678563356399536,grad_norm: 0.9999995253040649, iteration: 125070
loss: 1.0385410785675049,grad_norm: 0.9999991052393192, iteration: 125071
loss: 0.9976715445518494,grad_norm: 0.9999992111099387, iteration: 125072
loss: 1.0186207294464111,grad_norm: 0.7861843125922213, iteration: 125073
loss: 1.0077296495437622,grad_norm: 0.9443348563327056, iteration: 125074
loss: 1.0404582023620605,grad_norm: 0.9452494029623421, iteration: 125075
loss: 1.0541194677352905,grad_norm: 0.9999999657706271, iteration: 125076
loss: 1.1304877996444702,grad_norm: 0.9999998874600947, iteration: 125077
loss: 1.0458225011825562,grad_norm: 0.999999381136162, iteration: 125078
loss: 1.187914490699768,grad_norm: 0.9999995524399741, iteration: 125079
loss: 1.121172547340393,grad_norm: 0.9999997241710854, iteration: 125080
loss: 1.0098493099212646,grad_norm: 0.7967366893531381, iteration: 125081
loss: 1.0128211975097656,grad_norm: 0.8163183930327911, iteration: 125082
loss: 1.012886643409729,grad_norm: 0.999999320761863, iteration: 125083
loss: 1.0031352043151855,grad_norm: 0.9819296712167297, iteration: 125084
loss: 1.1981353759765625,grad_norm: 0.9999997989236789, iteration: 125085
loss: 1.0029090642929077,grad_norm: 0.9630286230385389, iteration: 125086
loss: 1.1855932474136353,grad_norm: 0.999999650906561, iteration: 125087
loss: 1.0706771612167358,grad_norm: 0.999999863393338, iteration: 125088
loss: 1.139265537261963,grad_norm: 0.9999991825147494, iteration: 125089
loss: 1.0072298049926758,grad_norm: 0.8556816737909426, iteration: 125090
loss: 1.0987156629562378,grad_norm: 0.9999997771533218, iteration: 125091
loss: 1.016512155532837,grad_norm: 0.9999989820063445, iteration: 125092
loss: 1.07155179977417,grad_norm: 0.9999990180567931, iteration: 125093
loss: 1.0090608596801758,grad_norm: 0.7885211600567811, iteration: 125094
loss: 1.097679615020752,grad_norm: 0.9999993758905904, iteration: 125095
loss: 1.1883138418197632,grad_norm: 0.9999993239290463, iteration: 125096
loss: 1.0488755702972412,grad_norm: 0.9999993567368576, iteration: 125097
loss: 1.2936646938323975,grad_norm: 0.9999999000856464, iteration: 125098
loss: 1.036911129951477,grad_norm: 0.999999182596018, iteration: 125099
loss: 1.0715900659561157,grad_norm: 0.9999991985718998, iteration: 125100
loss: 1.0239182710647583,grad_norm: 0.9483500906177184, iteration: 125101
loss: 1.1845362186431885,grad_norm: 0.9999999868702919, iteration: 125102
loss: 1.244750738143921,grad_norm: 0.9999991562821617, iteration: 125103
loss: 1.2012348175048828,grad_norm: 0.9999997925616273, iteration: 125104
loss: 1.102510929107666,grad_norm: 0.9999995610557206, iteration: 125105
loss: 1.2041306495666504,grad_norm: 0.9999991832118452, iteration: 125106
loss: 1.0977351665496826,grad_norm: 0.8806444663146453, iteration: 125107
loss: 1.1162699460983276,grad_norm: 0.9999996222656692, iteration: 125108
loss: 1.0549019575119019,grad_norm: 0.961097462916788, iteration: 125109
loss: 1.2326425313949585,grad_norm: 0.9999994833986999, iteration: 125110
loss: 1.1834378242492676,grad_norm: 0.9999999898566905, iteration: 125111
loss: 1.0950453281402588,grad_norm: 0.9999997407749804, iteration: 125112
loss: 1.1536920070648193,grad_norm: 0.9999998528632841, iteration: 125113
loss: 1.1833192110061646,grad_norm: 0.999999456880342, iteration: 125114
loss: 1.0896427631378174,grad_norm: 0.9999992749388075, iteration: 125115
loss: 1.0244592428207397,grad_norm: 0.9999998290314459, iteration: 125116
loss: 1.180062174797058,grad_norm: 0.9999994770107343, iteration: 125117
loss: 1.2164366245269775,grad_norm: 0.9999994613780611, iteration: 125118
loss: 1.1647018194198608,grad_norm: 0.999999195038369, iteration: 125119
loss: 1.169345498085022,grad_norm: 0.9999993958138783, iteration: 125120
loss: 1.1262736320495605,grad_norm: 0.9999993898521714, iteration: 125121
loss: 1.134965181350708,grad_norm: 0.9999996650247613, iteration: 125122
loss: 1.0272845029830933,grad_norm: 0.9999992645959281, iteration: 125123
loss: 1.2605012655258179,grad_norm: 0.9999993434942755, iteration: 125124
loss: 1.165513515472412,grad_norm: 0.9999991241106014, iteration: 125125
loss: 1.0745161771774292,grad_norm: 0.9999994183093179, iteration: 125126
loss: 1.1653698682785034,grad_norm: 0.9999991789566933, iteration: 125127
loss: 1.1980966329574585,grad_norm: 0.9999998433692904, iteration: 125128
loss: 1.1878952980041504,grad_norm: 0.9999995056226764, iteration: 125129
loss: 1.2398748397827148,grad_norm: 0.9999999189643206, iteration: 125130
loss: 1.2249667644500732,grad_norm: 0.9999995018518433, iteration: 125131
loss: 1.1848368644714355,grad_norm: 0.9999999550132386, iteration: 125132
loss: 1.1280450820922852,grad_norm: 0.9999994067085427, iteration: 125133
loss: 1.0935784578323364,grad_norm: 0.9999996985130046, iteration: 125134
loss: 1.1888407468795776,grad_norm: 0.999999572651983, iteration: 125135
loss: 1.1338491439819336,grad_norm: 0.9999997379599573, iteration: 125136
loss: 1.2774581909179688,grad_norm: 0.9999999924320693, iteration: 125137
loss: 1.3055144548416138,grad_norm: 0.9999997235649347, iteration: 125138
loss: 1.287598729133606,grad_norm: 0.9999998448789037, iteration: 125139
loss: 1.2141212224960327,grad_norm: 0.9999993424735825, iteration: 125140
loss: 1.0398708581924438,grad_norm: 0.7951376271014767, iteration: 125141
loss: 1.1444783210754395,grad_norm: 0.9999999013929759, iteration: 125142
loss: 1.0668662786483765,grad_norm: 0.9999997221934613, iteration: 125143
loss: 1.214615821838379,grad_norm: 0.9999996231010612, iteration: 125144
loss: 1.2311272621154785,grad_norm: 0.9999996340852401, iteration: 125145
loss: 1.0605659484863281,grad_norm: 0.9999997817891433, iteration: 125146
loss: 1.0967861413955688,grad_norm: 0.9999993314633302, iteration: 125147
loss: 1.1109429597854614,grad_norm: 0.999999506252564, iteration: 125148
loss: 1.0310649871826172,grad_norm: 0.9999991776218328, iteration: 125149
loss: 1.0766397714614868,grad_norm: 0.9999993114503741, iteration: 125150
loss: 1.0556037425994873,grad_norm: 0.9999995194490762, iteration: 125151
loss: 1.0930290222167969,grad_norm: 0.9999998371664409, iteration: 125152
loss: 1.2569780349731445,grad_norm: 0.999999881397594, iteration: 125153
loss: 1.0189260244369507,grad_norm: 0.9999994157100244, iteration: 125154
loss: 1.0246762037277222,grad_norm: 0.816810936117355, iteration: 125155
loss: 1.3187313079833984,grad_norm: 0.999999625980248, iteration: 125156
loss: 1.098444938659668,grad_norm: 0.9999998360526786, iteration: 125157
loss: 1.3324421644210815,grad_norm: 0.9999998836520828, iteration: 125158
loss: 1.1964539289474487,grad_norm: 0.9999996370135062, iteration: 125159
loss: 1.0419892072677612,grad_norm: 0.9999990051477434, iteration: 125160
loss: 1.1639409065246582,grad_norm: 0.9999997824030014, iteration: 125161
loss: 1.146632432937622,grad_norm: 0.999999666899931, iteration: 125162
loss: 1.1017189025878906,grad_norm: 0.9999991060187094, iteration: 125163
loss: 1.2239010334014893,grad_norm: 0.9999992368106536, iteration: 125164
loss: 1.0941171646118164,grad_norm: 0.9999995654483868, iteration: 125165
loss: 1.118894100189209,grad_norm: 0.9999996057208537, iteration: 125166
loss: 1.131212830543518,grad_norm: 0.9999996578810835, iteration: 125167
loss: 1.0438194274902344,grad_norm: 0.9143078853947407, iteration: 125168
loss: 1.0597110986709595,grad_norm: 0.9999998936760258, iteration: 125169
loss: 1.174276351928711,grad_norm: 0.9999994440104651, iteration: 125170
loss: 1.0714597702026367,grad_norm: 0.9999990226236202, iteration: 125171
loss: 1.1201951503753662,grad_norm: 0.9999994063972838, iteration: 125172
loss: 1.0576393604278564,grad_norm: 0.999999284087121, iteration: 125173
loss: 1.0055502653121948,grad_norm: 0.999999510365905, iteration: 125174
loss: 1.0155402421951294,grad_norm: 0.9999993448782221, iteration: 125175
loss: 1.2335453033447266,grad_norm: 0.9999999304186065, iteration: 125176
loss: 1.15481698513031,grad_norm: 0.9999998210712525, iteration: 125177
loss: 1.1402835845947266,grad_norm: 0.9999993939383592, iteration: 125178
loss: 1.1443918943405151,grad_norm: 0.9999994348781641, iteration: 125179
loss: 1.0528827905654907,grad_norm: 0.9632698572844485, iteration: 125180
loss: 1.0337564945220947,grad_norm: 0.9999996395351796, iteration: 125181
loss: 1.2215532064437866,grad_norm: 0.999999471015653, iteration: 125182
loss: 1.173872947692871,grad_norm: 0.9999996439057158, iteration: 125183
loss: 1.1878204345703125,grad_norm: 0.9999993240221227, iteration: 125184
loss: 1.0337340831756592,grad_norm: 0.9999992355308247, iteration: 125185
loss: 1.084132194519043,grad_norm: 0.9999992884789431, iteration: 125186
loss: 1.016565203666687,grad_norm: 0.9999991781002542, iteration: 125187
loss: 1.1292704343795776,grad_norm: 0.9999995979521431, iteration: 125188
loss: 1.186153769493103,grad_norm: 0.9999996223453893, iteration: 125189
loss: 1.126286506652832,grad_norm: 0.9999997034627885, iteration: 125190
loss: 1.1139336824417114,grad_norm: 0.9999992339631387, iteration: 125191
loss: 1.1372721195220947,grad_norm: 0.9999989558616035, iteration: 125192
loss: 1.0117733478546143,grad_norm: 0.9999995673364624, iteration: 125193
loss: 1.055930733680725,grad_norm: 0.9999995569598812, iteration: 125194
loss: 1.0079655647277832,grad_norm: 0.8875233376081886, iteration: 125195
loss: 1.0725879669189453,grad_norm: 0.9999991123649132, iteration: 125196
loss: 1.0170743465423584,grad_norm: 0.9590935336529062, iteration: 125197
loss: 1.015450358390808,grad_norm: 0.9432147588617128, iteration: 125198
loss: 1.1290714740753174,grad_norm: 0.9999998737490389, iteration: 125199
loss: 1.0494685173034668,grad_norm: 0.9999996675660464, iteration: 125200
loss: 1.118151307106018,grad_norm: 0.9999996267675939, iteration: 125201
loss: 0.996999204158783,grad_norm: 0.9999998933745397, iteration: 125202
loss: 1.0402204990386963,grad_norm: 0.9379039957175029, iteration: 125203
loss: 1.0344423055648804,grad_norm: 0.9999992919022224, iteration: 125204
loss: 1.1563622951507568,grad_norm: 0.9999993642059766, iteration: 125205
loss: 1.0192956924438477,grad_norm: 0.9427691387504783, iteration: 125206
loss: 1.037658452987671,grad_norm: 0.9999997249576367, iteration: 125207
loss: 1.0430384874343872,grad_norm: 0.9999990970485383, iteration: 125208
loss: 1.0685518980026245,grad_norm: 0.9999992173195145, iteration: 125209
loss: 1.032378077507019,grad_norm: 0.9999993108324104, iteration: 125210
loss: 1.1500779390335083,grad_norm: 0.9999996039072268, iteration: 125211
loss: 1.1480404138565063,grad_norm: 0.9999995579534858, iteration: 125212
loss: 1.043815016746521,grad_norm: 0.999999145320103, iteration: 125213
loss: 1.083591103553772,grad_norm: 0.9999994967048358, iteration: 125214
loss: 1.0903698205947876,grad_norm: 0.9999995394114982, iteration: 125215
loss: 1.0540753602981567,grad_norm: 0.9999992763597292, iteration: 125216
loss: 1.153334617614746,grad_norm: 0.9999998493934539, iteration: 125217
loss: 1.0454694032669067,grad_norm: 0.9150448673351802, iteration: 125218
loss: 1.002333164215088,grad_norm: 0.9769794247329735, iteration: 125219
loss: 0.9908953309059143,grad_norm: 0.8895408182993991, iteration: 125220
loss: 1.0489332675933838,grad_norm: 0.9999994822463508, iteration: 125221
loss: 1.0200393199920654,grad_norm: 0.9999998332357729, iteration: 125222
loss: 1.0320568084716797,grad_norm: 0.9999990542219164, iteration: 125223
loss: 1.0041296482086182,grad_norm: 0.7358294582325845, iteration: 125224
loss: 0.9868221282958984,grad_norm: 0.9043622316603289, iteration: 125225
loss: 0.9703400135040283,grad_norm: 0.9999993741212084, iteration: 125226
loss: 1.0070518255233765,grad_norm: 0.8585117155022057, iteration: 125227
loss: 1.0090423822402954,grad_norm: 0.9999996605519713, iteration: 125228
loss: 1.0820759534835815,grad_norm: 0.9585775725063443, iteration: 125229
loss: 0.9790875911712646,grad_norm: 0.9166695863013867, iteration: 125230
loss: 1.158394455909729,grad_norm: 0.9999994058734538, iteration: 125231
loss: 1.0723141431808472,grad_norm: 0.9999991472843232, iteration: 125232
loss: 1.0140522718429565,grad_norm: 0.7921027753118322, iteration: 125233
loss: 1.0473922491073608,grad_norm: 0.9154233017222014, iteration: 125234
loss: 1.0300945043563843,grad_norm: 0.9999992896105101, iteration: 125235
loss: 1.1943550109863281,grad_norm: 0.9999998197397237, iteration: 125236
loss: 1.0695792436599731,grad_norm: 0.9999996681000313, iteration: 125237
loss: 1.0467872619628906,grad_norm: 0.9999995956157848, iteration: 125238
loss: 1.0306652784347534,grad_norm: 0.8589950534666839, iteration: 125239
loss: 1.1729226112365723,grad_norm: 0.9999993379507364, iteration: 125240
loss: 1.0186246633529663,grad_norm: 0.9999996805748255, iteration: 125241
loss: 1.0214793682098389,grad_norm: 0.9999991813653847, iteration: 125242
loss: 1.0237176418304443,grad_norm: 0.9999996793738627, iteration: 125243
loss: 1.0679810047149658,grad_norm: 0.999999103383116, iteration: 125244
loss: 1.1446572542190552,grad_norm: 0.9999996296106128, iteration: 125245
loss: 1.1070760488510132,grad_norm: 0.9999998892411712, iteration: 125246
loss: 0.9987781047821045,grad_norm: 0.7682314344836366, iteration: 125247
loss: 0.9592782855033875,grad_norm: 0.7892873147436679, iteration: 125248
loss: 1.0121232271194458,grad_norm: 0.9210196283339119, iteration: 125249
loss: 1.073008418083191,grad_norm: 0.9999997437961636, iteration: 125250
loss: 1.0618689060211182,grad_norm: 0.9999992384378931, iteration: 125251
loss: 1.2211954593658447,grad_norm: 0.9999995906007622, iteration: 125252
loss: 1.0183767080307007,grad_norm: 0.9999991809324851, iteration: 125253
loss: 1.1900842189788818,grad_norm: 0.9999999010671654, iteration: 125254
loss: 1.0016953945159912,grad_norm: 0.9984595427811966, iteration: 125255
loss: 1.0257505178451538,grad_norm: 0.9328784222739273, iteration: 125256
loss: 1.0219027996063232,grad_norm: 0.8586544745034939, iteration: 125257
loss: 0.9638552069664001,grad_norm: 0.9999991590480047, iteration: 125258
loss: 1.085750937461853,grad_norm: 0.999999897391546, iteration: 125259
loss: 0.9725663661956787,grad_norm: 0.9999991154083862, iteration: 125260
loss: 1.1022404432296753,grad_norm: 0.9999999306393101, iteration: 125261
loss: 1.0822505950927734,grad_norm: 0.9999999397489852, iteration: 125262
loss: 1.0723235607147217,grad_norm: 0.9379475940268308, iteration: 125263
loss: 1.0765572786331177,grad_norm: 0.9999998330577732, iteration: 125264
loss: 1.0934892892837524,grad_norm: 0.9999997089897924, iteration: 125265
loss: 1.1197235584259033,grad_norm: 0.9999992489314854, iteration: 125266
loss: 1.0470961332321167,grad_norm: 0.999999654041157, iteration: 125267
loss: 0.9846319556236267,grad_norm: 0.9760631789825621, iteration: 125268
loss: 1.0638010501861572,grad_norm: 0.9999992140220634, iteration: 125269
loss: 1.0817945003509521,grad_norm: 0.9999989921654507, iteration: 125270
loss: 1.0467727184295654,grad_norm: 0.9999991395960888, iteration: 125271
loss: 1.0979740619659424,grad_norm: 0.9999995901861586, iteration: 125272
loss: 1.0310152769088745,grad_norm: 0.9999991939059772, iteration: 125273
loss: 1.0950734615325928,grad_norm: 0.9999994757187454, iteration: 125274
loss: 1.08949875831604,grad_norm: 0.9999992171349296, iteration: 125275
loss: 1.0774141550064087,grad_norm: 0.9838393661095383, iteration: 125276
loss: 1.036543607711792,grad_norm: 0.9999991845083457, iteration: 125277
loss: 1.012342095375061,grad_norm: 0.999999092804171, iteration: 125278
loss: 1.119064450263977,grad_norm: 0.9999994437470724, iteration: 125279
loss: 1.076853632926941,grad_norm: 0.9999999279737012, iteration: 125280
loss: 1.1137861013412476,grad_norm: 0.9999995592608717, iteration: 125281
loss: 1.0967241525650024,grad_norm: 0.9999991642663413, iteration: 125282
loss: 0.9997108578681946,grad_norm: 0.9999990894500587, iteration: 125283
loss: 1.0264841318130493,grad_norm: 0.999999743819382, iteration: 125284
loss: 1.064012885093689,grad_norm: 0.999999230749782, iteration: 125285
loss: 1.1784600019454956,grad_norm: 0.9999997798208007, iteration: 125286
loss: 1.0042948722839355,grad_norm: 0.8619218507571936, iteration: 125287
loss: 1.0564920902252197,grad_norm: 0.9999992301620088, iteration: 125288
loss: 1.0333460569381714,grad_norm: 0.8477603998977319, iteration: 125289
loss: 1.145508885383606,grad_norm: 0.999999551321568, iteration: 125290
loss: 1.1238957643508911,grad_norm: 0.9999996228496428, iteration: 125291
loss: 1.0290921926498413,grad_norm: 0.9999993280290921, iteration: 125292
loss: 1.0539909601211548,grad_norm: 0.9999995597144875, iteration: 125293
loss: 1.1527013778686523,grad_norm: 0.9999997128708902, iteration: 125294
loss: 1.0727074146270752,grad_norm: 0.9999998350359686, iteration: 125295
loss: 1.0178980827331543,grad_norm: 0.9999991936009145, iteration: 125296
loss: 1.0369830131530762,grad_norm: 0.7947597144387463, iteration: 125297
loss: 1.0829297304153442,grad_norm: 0.9999994114192566, iteration: 125298
loss: 1.1073987483978271,grad_norm: 0.9999996680927201, iteration: 125299
loss: 0.9775182008743286,grad_norm: 0.9296001781467575, iteration: 125300
loss: 1.0760643482208252,grad_norm: 0.9999997935863563, iteration: 125301
loss: 0.9899401068687439,grad_norm: 0.8205807283010598, iteration: 125302
loss: 1.0252923965454102,grad_norm: 0.9125156909842004, iteration: 125303
loss: 1.0532692670822144,grad_norm: 0.9999997336806746, iteration: 125304
loss: 1.0155624151229858,grad_norm: 0.9999997030161822, iteration: 125305
loss: 1.158284068107605,grad_norm: 0.9999992740648024, iteration: 125306
loss: 1.0033811330795288,grad_norm: 0.9999997446598587, iteration: 125307
loss: 1.0371639728546143,grad_norm: 0.9999991286014633, iteration: 125308
loss: 1.0513652563095093,grad_norm: 0.9999995922742726, iteration: 125309
loss: 1.0326343774795532,grad_norm: 0.9999995924561891, iteration: 125310
loss: 1.0835777521133423,grad_norm: 0.9999995629703139, iteration: 125311
loss: 0.9745088219642639,grad_norm: 0.7263921340594234, iteration: 125312
loss: 1.048862099647522,grad_norm: 0.9999997231900867, iteration: 125313
loss: 1.1070785522460938,grad_norm: 0.999999080884172, iteration: 125314
loss: 1.0754109621047974,grad_norm: 0.999999242916002, iteration: 125315
loss: 1.0462523698806763,grad_norm: 0.9999995516201606, iteration: 125316
loss: 1.0330424308776855,grad_norm: 0.999999736465677, iteration: 125317
loss: 0.9936538338661194,grad_norm: 0.9999993422194364, iteration: 125318
loss: 0.9714235663414001,grad_norm: 0.8679913537574542, iteration: 125319
loss: 0.9897934794425964,grad_norm: 0.893269050239384, iteration: 125320
loss: 1.059120535850525,grad_norm: 0.9999994686978854, iteration: 125321
loss: 0.999005138874054,grad_norm: 0.9999991912238443, iteration: 125322
loss: 1.0092062950134277,grad_norm: 0.9999993350969334, iteration: 125323
loss: 1.071486473083496,grad_norm: 0.9999992589484832, iteration: 125324
loss: 1.1209193468093872,grad_norm: 0.9999997109716291, iteration: 125325
loss: 1.0064690113067627,grad_norm: 0.9999999095120522, iteration: 125326
loss: 1.080753207206726,grad_norm: 0.9999996205953057, iteration: 125327
loss: 1.0373724699020386,grad_norm: 0.9999994582481478, iteration: 125328
loss: 1.310445785522461,grad_norm: 0.9999992203893991, iteration: 125329
loss: 1.0995303392410278,grad_norm: 0.9999996327012911, iteration: 125330
loss: 1.0765131711959839,grad_norm: 0.886526432924292, iteration: 125331
loss: 1.1148533821105957,grad_norm: 0.9999990938929215, iteration: 125332
loss: 1.1041557788848877,grad_norm: 0.9999998653215072, iteration: 125333
loss: 1.015678882598877,grad_norm: 0.9999992017797639, iteration: 125334
loss: 1.0029913187026978,grad_norm: 0.8318860925076095, iteration: 125335
loss: 1.1118816137313843,grad_norm: 0.99999904688253, iteration: 125336
loss: 1.0038049221038818,grad_norm: 0.9058483423051761, iteration: 125337
loss: 1.1111435890197754,grad_norm: 0.9999994346651994, iteration: 125338
loss: 1.0362402200698853,grad_norm: 0.9999994104908632, iteration: 125339
loss: 1.041223406791687,grad_norm: 0.9794451953270251, iteration: 125340
loss: 0.9853306412696838,grad_norm: 0.8921054252348123, iteration: 125341
loss: 0.9969086647033691,grad_norm: 0.983135041341379, iteration: 125342
loss: 0.9812512397766113,grad_norm: 0.9999990883491003, iteration: 125343
loss: 1.2224187850952148,grad_norm: 0.9999991162962886, iteration: 125344
loss: 1.0498355627059937,grad_norm: 0.9999999450299135, iteration: 125345
loss: 1.0543429851531982,grad_norm: 0.9529378298106249, iteration: 125346
loss: 1.071101188659668,grad_norm: 0.9999990748277587, iteration: 125347
loss: 1.047613263130188,grad_norm: 0.9999997495090686, iteration: 125348
loss: 1.0501043796539307,grad_norm: 0.9999994888998128, iteration: 125349
loss: 1.1118113994598389,grad_norm: 0.9999993043936347, iteration: 125350
loss: 1.0272856950759888,grad_norm: 0.9999993012540974, iteration: 125351
loss: 1.084795594215393,grad_norm: 0.9999994771078964, iteration: 125352
loss: 1.0572521686553955,grad_norm: 0.9893909859959049, iteration: 125353
loss: 1.0263855457305908,grad_norm: 0.957056474092219, iteration: 125354
loss: 1.0302443504333496,grad_norm: 0.9999990020517004, iteration: 125355
loss: 1.0269078016281128,grad_norm: 0.9999999804628594, iteration: 125356
loss: 1.0305109024047852,grad_norm: 0.9999991177709378, iteration: 125357
loss: 1.067815899848938,grad_norm: 0.893619032426228, iteration: 125358
loss: 1.1347541809082031,grad_norm: 0.9999990897676342, iteration: 125359
loss: 0.9822389483451843,grad_norm: 0.9999991041324165, iteration: 125360
loss: 1.1108070611953735,grad_norm: 0.9999992030332217, iteration: 125361
loss: 1.1179393529891968,grad_norm: 0.9999996926956789, iteration: 125362
loss: 1.1940242052078247,grad_norm: 0.9999998357943765, iteration: 125363
loss: 1.0823304653167725,grad_norm: 0.9999997149166279, iteration: 125364
loss: 1.102157711982727,grad_norm: 0.9999995935107642, iteration: 125365
loss: 1.0217106342315674,grad_norm: 0.9999990735508761, iteration: 125366
loss: 1.0003948211669922,grad_norm: 0.9194472799283318, iteration: 125367
loss: 1.1508724689483643,grad_norm: 0.9999997295927138, iteration: 125368
loss: 1.0343549251556396,grad_norm: 0.9999991021692518, iteration: 125369
loss: 0.9474120140075684,grad_norm: 0.9999989979671697, iteration: 125370
loss: 1.0848701000213623,grad_norm: 0.9999994238577045, iteration: 125371
loss: 1.0216432809829712,grad_norm: 0.9999997453291498, iteration: 125372
loss: 1.087772011756897,grad_norm: 0.9999998741763797, iteration: 125373
loss: 1.0441309213638306,grad_norm: 0.9999993264065494, iteration: 125374
loss: 1.233199954032898,grad_norm: 0.9999994509491346, iteration: 125375
loss: 1.1215107440948486,grad_norm: 0.9999999987633599, iteration: 125376
loss: 1.0307177305221558,grad_norm: 0.9999992499865757, iteration: 125377
loss: 1.0460354089736938,grad_norm: 0.9999996945751716, iteration: 125378
loss: 1.144950270652771,grad_norm: 0.9999994590239397, iteration: 125379
loss: 1.0433546304702759,grad_norm: 0.9999990571090825, iteration: 125380
loss: 1.0588877201080322,grad_norm: 0.9999991485140539, iteration: 125381
loss: 1.1588116884231567,grad_norm: 0.9999998975977562, iteration: 125382
loss: 1.0605934858322144,grad_norm: 0.9999996347894419, iteration: 125383
loss: 1.0114152431488037,grad_norm: 0.9999992197560166, iteration: 125384
loss: 1.2118438482284546,grad_norm: 0.9999997041294606, iteration: 125385
loss: 1.0974684953689575,grad_norm: 0.9999997147467822, iteration: 125386
loss: 1.0731984376907349,grad_norm: 0.9999993144400966, iteration: 125387
loss: 1.0168602466583252,grad_norm: 0.9999997849545197, iteration: 125388
loss: 1.0968012809753418,grad_norm: 0.9999998187318817, iteration: 125389
loss: 1.0242717266082764,grad_norm: 0.9916519219972713, iteration: 125390
loss: 1.131189227104187,grad_norm: 1.0000000041081443, iteration: 125391
loss: 1.0704402923583984,grad_norm: 0.9999991024681332, iteration: 125392
loss: 1.0554533004760742,grad_norm: 0.9999991616422049, iteration: 125393
loss: 1.1928465366363525,grad_norm: 0.9999993388472529, iteration: 125394
loss: 1.0652096271514893,grad_norm: 0.9999990917929389, iteration: 125395
loss: 1.1199538707733154,grad_norm: 0.9999990290605709, iteration: 125396
loss: 1.1883652210235596,grad_norm: 1.0000000722833313, iteration: 125397
loss: 1.0841619968414307,grad_norm: 0.9999998849134462, iteration: 125398
loss: 1.184677004814148,grad_norm: 0.9999998899838218, iteration: 125399
loss: 1.067966341972351,grad_norm: 0.9999991235165624, iteration: 125400
loss: 1.0354710817337036,grad_norm: 0.9999996841622333, iteration: 125401
loss: 1.0516806840896606,grad_norm: 0.9999992038586912, iteration: 125402
loss: 1.1730690002441406,grad_norm: 0.9999999155231876, iteration: 125403
loss: 1.309995174407959,grad_norm: 0.999999859321223, iteration: 125404
loss: 1.0082521438598633,grad_norm: 0.9999997448361589, iteration: 125405
loss: 1.103783130645752,grad_norm: 0.9999994101117673, iteration: 125406
loss: 1.1405338048934937,grad_norm: 0.9999994199344255, iteration: 125407
loss: 1.0137696266174316,grad_norm: 0.9999995121202031, iteration: 125408
loss: 1.0209970474243164,grad_norm: 0.8836721071225228, iteration: 125409
loss: 1.0016697645187378,grad_norm: 0.9999998647238144, iteration: 125410
loss: 0.959471583366394,grad_norm: 0.9999990746741372, iteration: 125411
loss: 1.2017338275909424,grad_norm: 0.999999958518109, iteration: 125412
loss: 1.146716833114624,grad_norm: 0.9999993585182564, iteration: 125413
loss: 1.196808934211731,grad_norm: 0.9999999284350798, iteration: 125414
loss: 1.0721977949142456,grad_norm: 0.9999992928371255, iteration: 125415
loss: 1.06023371219635,grad_norm: 0.9999998679060259, iteration: 125416
loss: 0.9984835386276245,grad_norm: 0.7894934751781372, iteration: 125417
loss: 1.176347255706787,grad_norm: 0.9999999650246233, iteration: 125418
loss: 1.0984026193618774,grad_norm: 0.9706527188140106, iteration: 125419
loss: 1.1605337858200073,grad_norm: 0.99999997025336, iteration: 125420
loss: 1.0648199319839478,grad_norm: 0.9999997334131882, iteration: 125421
loss: 1.2569137811660767,grad_norm: 0.9999998615212695, iteration: 125422
loss: 0.9640178084373474,grad_norm: 0.9999999733099335, iteration: 125423
loss: 1.0590323209762573,grad_norm: 0.9999997029196612, iteration: 125424
loss: 1.0957188606262207,grad_norm: 0.9999994775706699, iteration: 125425
loss: 1.0780874490737915,grad_norm: 0.9999998012680135, iteration: 125426
loss: 1.2304128408432007,grad_norm: 0.9999999213062107, iteration: 125427
loss: 1.0827847719192505,grad_norm: 0.999999354292181, iteration: 125428
loss: 1.102479338645935,grad_norm: 0.999999839543069, iteration: 125429
loss: 1.0653760433197021,grad_norm: 0.9999992508432335, iteration: 125430
loss: 1.020528793334961,grad_norm: 0.9988705490047879, iteration: 125431
loss: 1.0566208362579346,grad_norm: 0.9999993121642642, iteration: 125432
loss: 1.2545480728149414,grad_norm: 0.9999997646409904, iteration: 125433
loss: 0.9934553503990173,grad_norm: 0.999999190496845, iteration: 125434
loss: 1.0283143520355225,grad_norm: 0.9999992371048693, iteration: 125435
loss: 1.183396816253662,grad_norm: 0.9999996696703237, iteration: 125436
loss: 1.103719711303711,grad_norm: 0.9999998527398535, iteration: 125437
loss: 0.9958610534667969,grad_norm: 0.9999997620544453, iteration: 125438
loss: 1.0110172033309937,grad_norm: 0.9999995743459784, iteration: 125439
loss: 1.0006719827651978,grad_norm: 0.9999998190020607, iteration: 125440
loss: 1.115130066871643,grad_norm: 0.9999998654530443, iteration: 125441
loss: 1.0586611032485962,grad_norm: 0.9999993647525944, iteration: 125442
loss: 1.124776005744934,grad_norm: 0.9999999188796554, iteration: 125443
loss: 1.0522699356079102,grad_norm: 0.9999991118267503, iteration: 125444
loss: 1.0604794025421143,grad_norm: 0.9999990725120969, iteration: 125445
loss: 1.0266444683074951,grad_norm: 0.9979474975186786, iteration: 125446
loss: 1.1977277994155884,grad_norm: 0.999999586790045, iteration: 125447
loss: 1.1431025266647339,grad_norm: 0.9999992469499264, iteration: 125448
loss: 1.1096464395523071,grad_norm: 0.89024077642817, iteration: 125449
loss: 0.9901794195175171,grad_norm: 0.9211550515241127, iteration: 125450
loss: 1.1126211881637573,grad_norm: 0.9999995287181741, iteration: 125451
loss: 1.0599461793899536,grad_norm: 0.9999999383258847, iteration: 125452
loss: 0.9659292697906494,grad_norm: 1.000000098859617, iteration: 125453
loss: 1.1266525983810425,grad_norm: 0.9999997137807946, iteration: 125454
loss: 1.0545008182525635,grad_norm: 0.999999039107322, iteration: 125455
loss: 1.0772531032562256,grad_norm: 0.9999992181293273, iteration: 125456
loss: 1.062348484992981,grad_norm: 0.9999997982448516, iteration: 125457
loss: 1.0492221117019653,grad_norm: 0.9999993981127357, iteration: 125458
loss: 1.0881526470184326,grad_norm: 0.9999991935634363, iteration: 125459
loss: 1.1132453680038452,grad_norm: 0.9999998099458327, iteration: 125460
loss: 1.0334985256195068,grad_norm: 0.852713683142121, iteration: 125461
loss: 0.9866907596588135,grad_norm: 0.9999989765909444, iteration: 125462
loss: 1.0417357683181763,grad_norm: 0.9999997811289667, iteration: 125463
loss: 1.037532091140747,grad_norm: 0.9999994057464792, iteration: 125464
loss: 1.1528232097625732,grad_norm: 0.9999998345601773, iteration: 125465
loss: 1.0110496282577515,grad_norm: 0.9999994958270348, iteration: 125466
loss: 1.1151095628738403,grad_norm: 0.9999996881741293, iteration: 125467
loss: 1.1218352317810059,grad_norm: 0.9999998107573912, iteration: 125468
loss: 1.4740121364593506,grad_norm: 0.999999988060581, iteration: 125469
loss: 1.183260440826416,grad_norm: 0.9999998681475614, iteration: 125470
loss: 1.2746413946151733,grad_norm: 0.9999995928795515, iteration: 125471
loss: 1.170393466949463,grad_norm: 1.0000000089653895, iteration: 125472
loss: 1.053259253501892,grad_norm: 0.9999998788362101, iteration: 125473
loss: 1.171142339706421,grad_norm: 0.9999998321204295, iteration: 125474
loss: 1.4962730407714844,grad_norm: 0.999999692931091, iteration: 125475
loss: 1.0904172658920288,grad_norm: 0.9999998295875082, iteration: 125476
loss: 0.9904215335845947,grad_norm: 0.8656554047184193, iteration: 125477
loss: 0.9893122315406799,grad_norm: 0.9999994152178452, iteration: 125478
loss: 1.0274854898452759,grad_norm: 0.999999752424047, iteration: 125479
loss: 1.0270895957946777,grad_norm: 0.8611512912222119, iteration: 125480
loss: 1.2973192930221558,grad_norm: 0.9999998796822461, iteration: 125481
loss: 1.0169241428375244,grad_norm: 0.9999993896667282, iteration: 125482
loss: 1.043786644935608,grad_norm: 0.959260649836608, iteration: 125483
loss: 1.0356993675231934,grad_norm: 0.9919639954159731, iteration: 125484
loss: 1.0095723867416382,grad_norm: 0.999999535260109, iteration: 125485
loss: 1.0083152055740356,grad_norm: 0.9999995916858838, iteration: 125486
loss: 1.0472713708877563,grad_norm: 0.9999997924226478, iteration: 125487
loss: 0.9940710663795471,grad_norm: 0.9999992007454359, iteration: 125488
loss: 1.0055721998214722,grad_norm: 0.8000238511629867, iteration: 125489
loss: 1.0162321329116821,grad_norm: 0.9999997788097582, iteration: 125490
loss: 0.9866629242897034,grad_norm: 0.9185821527545848, iteration: 125491
loss: 1.0491137504577637,grad_norm: 0.8614841837283838, iteration: 125492
loss: 1.0167793035507202,grad_norm: 0.8525721968030758, iteration: 125493
loss: 0.9867826700210571,grad_norm: 0.7175179825310942, iteration: 125494
loss: 1.1018624305725098,grad_norm: 0.9999999228914286, iteration: 125495
loss: 0.985385537147522,grad_norm: 0.9999990042050287, iteration: 125496
loss: 0.9815316796302795,grad_norm: 0.8779721347201794, iteration: 125497
loss: 1.0100525617599487,grad_norm: 0.965899806378813, iteration: 125498
loss: 1.035705804824829,grad_norm: 0.9999997254042886, iteration: 125499
loss: 0.9683095216751099,grad_norm: 0.8022845264408628, iteration: 125500
loss: 1.0192416906356812,grad_norm: 0.9640532883923582, iteration: 125501
loss: 1.114147663116455,grad_norm: 0.9999995210979906, iteration: 125502
loss: 1.0051707029342651,grad_norm: 0.8828064807924413, iteration: 125503
loss: 1.1085065603256226,grad_norm: 0.9999992328781995, iteration: 125504
loss: 1.033003568649292,grad_norm: 0.9748133966978726, iteration: 125505
loss: 0.978987455368042,grad_norm: 0.8601633429769897, iteration: 125506
loss: 1.0198136568069458,grad_norm: 0.9999992755553904, iteration: 125507
loss: 1.0303866863250732,grad_norm: 0.9999990949412142, iteration: 125508
loss: 0.9865272641181946,grad_norm: 0.8591388649413587, iteration: 125509
loss: 1.0335408449172974,grad_norm: 0.9455098300422058, iteration: 125510
loss: 0.9876078963279724,grad_norm: 0.9405912840096372, iteration: 125511
loss: 1.125370740890503,grad_norm: 0.9999991239461705, iteration: 125512
loss: 1.0364388227462769,grad_norm: 0.9876221454963365, iteration: 125513
loss: 1.0674850940704346,grad_norm: 0.9999991131261042, iteration: 125514
loss: 1.0047773122787476,grad_norm: 0.9417492201777089, iteration: 125515
loss: 1.0450974702835083,grad_norm: 0.999999083770235, iteration: 125516
loss: 1.03098726272583,grad_norm: 0.7889414868385715, iteration: 125517
loss: 1.0679118633270264,grad_norm: 0.9999991759398268, iteration: 125518
loss: 1.1271021366119385,grad_norm: 0.9999993789899324, iteration: 125519
loss: 1.0359588861465454,grad_norm: 0.782375441386374, iteration: 125520
loss: 0.9946738481521606,grad_norm: 0.9028043918915961, iteration: 125521
loss: 1.0101622343063354,grad_norm: 0.999999282486213, iteration: 125522
loss: 0.98789381980896,grad_norm: 0.9999990551169182, iteration: 125523
loss: 1.048836350440979,grad_norm: 0.9999996242957947, iteration: 125524
loss: 1.0344123840332031,grad_norm: 0.9667909410315539, iteration: 125525
loss: 1.0372260808944702,grad_norm: 0.999999209827451, iteration: 125526
loss: 1.0187619924545288,grad_norm: 0.9337866733103735, iteration: 125527
loss: 1.0419561862945557,grad_norm: 0.8312963091343998, iteration: 125528
loss: 1.0104963779449463,grad_norm: 0.9933716765480896, iteration: 125529
loss: 1.0297188758850098,grad_norm: 0.864044413360169, iteration: 125530
loss: 1.0385026931762695,grad_norm: 0.9715388446362008, iteration: 125531
loss: 1.0273114442825317,grad_norm: 0.9999998837468955, iteration: 125532
loss: 0.9888465404510498,grad_norm: 0.8360303671760159, iteration: 125533
loss: 1.0395073890686035,grad_norm: 0.7894786898183662, iteration: 125534
loss: 1.0087566375732422,grad_norm: 0.7619751341418273, iteration: 125535
loss: 1.0250215530395508,grad_norm: 0.805424555638455, iteration: 125536
loss: 1.0049532651901245,grad_norm: 0.9999991660105539, iteration: 125537
loss: 1.0101126432418823,grad_norm: 0.871676278155765, iteration: 125538
loss: 1.1902328729629517,grad_norm: 0.9999999347598661, iteration: 125539
loss: 1.0659385919570923,grad_norm: 0.9999993281280213, iteration: 125540
loss: 1.0676780939102173,grad_norm: 0.9999993925657069, iteration: 125541
loss: 1.0685001611709595,grad_norm: 0.8073117181957277, iteration: 125542
loss: 0.9938997030258179,grad_norm: 0.866898831887793, iteration: 125543
loss: 0.9613823890686035,grad_norm: 0.9999990798907568, iteration: 125544
loss: 1.0012431144714355,grad_norm: 0.9999990898671084, iteration: 125545
loss: 1.094909429550171,grad_norm: 0.9999998292372403, iteration: 125546
loss: 0.9945849180221558,grad_norm: 0.999999086066968, iteration: 125547
loss: 0.9847412109375,grad_norm: 0.9999994262444613, iteration: 125548
loss: 1.0460960865020752,grad_norm: 0.999999509209774, iteration: 125549
loss: 1.0286152362823486,grad_norm: 0.9999989852720078, iteration: 125550
loss: 0.9775545597076416,grad_norm: 0.8762780682616491, iteration: 125551
loss: 1.032441258430481,grad_norm: 0.9999991135373194, iteration: 125552
loss: 0.9523696303367615,grad_norm: 0.7560721163802268, iteration: 125553
loss: 1.0272256135940552,grad_norm: 0.9999999763938812, iteration: 125554
loss: 0.9767959713935852,grad_norm: 0.999999056765678, iteration: 125555
loss: 1.0593663454055786,grad_norm: 0.9999992528154421, iteration: 125556
loss: 1.030519962310791,grad_norm: 0.9999991575841165, iteration: 125557
loss: 1.1351662874221802,grad_norm: 0.9999993630564288, iteration: 125558
loss: 1.026155710220337,grad_norm: 0.9083140895940037, iteration: 125559
loss: 1.00640070438385,grad_norm: 0.9999992330912649, iteration: 125560
loss: 1.0086276531219482,grad_norm: 0.8520734540985279, iteration: 125561
loss: 1.0352641344070435,grad_norm: 0.9999992556810017, iteration: 125562
loss: 0.9917179942131042,grad_norm: 0.8212624101916915, iteration: 125563
loss: 0.9845476746559143,grad_norm: 0.9999992443502632, iteration: 125564
loss: 0.9753758907318115,grad_norm: 0.9773452476707134, iteration: 125565
loss: 1.0498393774032593,grad_norm: 0.9999997831890178, iteration: 125566
loss: 1.0369926691055298,grad_norm: 0.9082846453619696, iteration: 125567
loss: 1.0040271282196045,grad_norm: 0.9999998858136963, iteration: 125568
loss: 1.0134484767913818,grad_norm: 0.999999192374665, iteration: 125569
loss: 1.157718539237976,grad_norm: 0.9999996399541382, iteration: 125570
loss: 1.0780106782913208,grad_norm: 0.9999999338039158, iteration: 125571
loss: 1.0284929275512695,grad_norm: 0.9999995003766179, iteration: 125572
loss: 1.3087059259414673,grad_norm: 0.9999997685408132, iteration: 125573
loss: 0.9975935220718384,grad_norm: 0.9999999132498268, iteration: 125574
loss: 1.0173709392547607,grad_norm: 0.9999996998198275, iteration: 125575
loss: 0.9884869456291199,grad_norm: 0.9999992326521452, iteration: 125576
loss: 1.0236612558364868,grad_norm: 0.8630866686134598, iteration: 125577
loss: 1.013386607170105,grad_norm: 0.999999382029054, iteration: 125578
loss: 0.9733077883720398,grad_norm: 0.8933678299271353, iteration: 125579
loss: 1.1154544353485107,grad_norm: 0.9999992599882181, iteration: 125580
loss: 1.0982749462127686,grad_norm: 0.9999996298523166, iteration: 125581
loss: 0.9783727526664734,grad_norm: 0.914125016734376, iteration: 125582
loss: 1.0024856328964233,grad_norm: 0.9999991211092558, iteration: 125583
loss: 1.0174496173858643,grad_norm: 0.8927588816861869, iteration: 125584
loss: 1.027334213256836,grad_norm: 0.9999993051511147, iteration: 125585
loss: 1.0235249996185303,grad_norm: 0.9999990894163435, iteration: 125586
loss: 1.012092113494873,grad_norm: 0.999999566976519, iteration: 125587
loss: 1.1185503005981445,grad_norm: 0.9999995386465895, iteration: 125588
loss: 1.040235996246338,grad_norm: 0.8893118135687725, iteration: 125589
loss: 1.054736852645874,grad_norm: 0.9999998460946585, iteration: 125590
loss: 0.9915862083435059,grad_norm: 0.8919550865975467, iteration: 125591
loss: 1.1297342777252197,grad_norm: 0.9999998382060933, iteration: 125592
loss: 1.0458663702011108,grad_norm: 0.9999998642383198, iteration: 125593
loss: 1.0097846984863281,grad_norm: 0.7824102548255499, iteration: 125594
loss: 1.193520426750183,grad_norm: 0.9999999234343194, iteration: 125595
loss: 1.0485068559646606,grad_norm: 0.999999241329221, iteration: 125596
loss: 1.1007962226867676,grad_norm: 0.9999993264049002, iteration: 125597
loss: 1.0018250942230225,grad_norm: 0.8857866920157267, iteration: 125598
loss: 1.154752492904663,grad_norm: 0.9999995168870807, iteration: 125599
loss: 1.0159828662872314,grad_norm: 0.9999990490066717, iteration: 125600
loss: 0.9861220121383667,grad_norm: 0.999999935915603, iteration: 125601
loss: 1.0217875242233276,grad_norm: 0.9999994787030071, iteration: 125602
loss: 1.0176388025283813,grad_norm: 0.9999991098258937, iteration: 125603
loss: 0.9806345701217651,grad_norm: 0.9999998666477311, iteration: 125604
loss: 1.0069148540496826,grad_norm: 0.9999991642142194, iteration: 125605
loss: 1.049168586730957,grad_norm: 0.9999991605613463, iteration: 125606
loss: 1.12034273147583,grad_norm: 0.9999994496542574, iteration: 125607
loss: 1.020706295967102,grad_norm: 0.99999963842884, iteration: 125608
loss: 1.0028220415115356,grad_norm: 0.9999991698652118, iteration: 125609
loss: 1.002209186553955,grad_norm: 0.9999994266729991, iteration: 125610
loss: 0.998127281665802,grad_norm: 0.9999998887182803, iteration: 125611
loss: 1.0337246656417847,grad_norm: 0.9438431655304452, iteration: 125612
loss: 1.0454533100128174,grad_norm: 0.999999847428626, iteration: 125613
loss: 1.0237799882888794,grad_norm: 0.9999992776296533, iteration: 125614
loss: 1.0164650678634644,grad_norm: 0.8383247039223571, iteration: 125615
loss: 1.005731463432312,grad_norm: 0.9999993673163917, iteration: 125616
loss: 1.0103938579559326,grad_norm: 0.999999700842617, iteration: 125617
loss: 0.9803022146224976,grad_norm: 0.9999995722959705, iteration: 125618
loss: 1.0390781164169312,grad_norm: 0.999999329906438, iteration: 125619
loss: 0.9956499934196472,grad_norm: 0.9999999989044317, iteration: 125620
loss: 0.9705309867858887,grad_norm: 0.9696439230105823, iteration: 125621
loss: 0.9845852851867676,grad_norm: 0.9999999407342018, iteration: 125622
loss: 1.033684253692627,grad_norm: 0.999999163694775, iteration: 125623
loss: 1.0746607780456543,grad_norm: 0.9372348398037395, iteration: 125624
loss: 1.0393439531326294,grad_norm: 0.9999990095955688, iteration: 125625
loss: 1.057370901107788,grad_norm: 0.9999994935136443, iteration: 125626
loss: 0.9987449645996094,grad_norm: 0.9999996423488082, iteration: 125627
loss: 0.9989192485809326,grad_norm: 0.9999991683251683, iteration: 125628
loss: 0.9840192794799805,grad_norm: 0.9999992761694593, iteration: 125629
loss: 1.065659523010254,grad_norm: 0.8899392073564971, iteration: 125630
loss: 1.0630993843078613,grad_norm: 0.9999999202076686, iteration: 125631
loss: 1.0432878732681274,grad_norm: 1.0000000201522992, iteration: 125632
loss: 0.9854564666748047,grad_norm: 0.9999998966006055, iteration: 125633
loss: 0.9772912263870239,grad_norm: 0.8698893327560796, iteration: 125634
loss: 0.9956652522087097,grad_norm: 0.7192090931024601, iteration: 125635
loss: 1.0357967615127563,grad_norm: 0.891574594920856, iteration: 125636
loss: 1.0144118070602417,grad_norm: 0.9999991407596919, iteration: 125637
loss: 1.155413269996643,grad_norm: 0.9999994982173699, iteration: 125638
loss: 1.1149239540100098,grad_norm: 0.9999994498666629, iteration: 125639
loss: 1.054319143295288,grad_norm: 0.8375335352282007, iteration: 125640
loss: 1.0136607885360718,grad_norm: 0.9509590467389314, iteration: 125641
loss: 0.9784480333328247,grad_norm: 0.9052438563128992, iteration: 125642
loss: 1.0254182815551758,grad_norm: 0.999999692623515, iteration: 125643
loss: 1.0101444721221924,grad_norm: 0.8054734327792178, iteration: 125644
loss: 1.0207946300506592,grad_norm: 0.9955701923760585, iteration: 125645
loss: 0.9918036460876465,grad_norm: 0.7876422035539465, iteration: 125646
loss: 1.0092071294784546,grad_norm: 0.9388927561157376, iteration: 125647
loss: 0.9799297451972961,grad_norm: 0.8681333874729356, iteration: 125648
loss: 1.00274658203125,grad_norm: 0.9999992481073007, iteration: 125649
loss: 1.0101181268692017,grad_norm: 0.8244646423890286, iteration: 125650
loss: 1.077972173690796,grad_norm: 0.9999991389797772, iteration: 125651
loss: 1.035237193107605,grad_norm: 0.9999991143401356, iteration: 125652
loss: 1.0226372480392456,grad_norm: 0.8418029019143779, iteration: 125653
loss: 1.0782039165496826,grad_norm: 0.9999998385331738, iteration: 125654
loss: 1.0432535409927368,grad_norm: 0.9539850659286039, iteration: 125655
loss: 1.042540192604065,grad_norm: 0.9999993615736177, iteration: 125656
loss: 0.94573575258255,grad_norm: 0.9999994750247168, iteration: 125657
loss: 0.9934236407279968,grad_norm: 0.999999095959364, iteration: 125658
loss: 0.9724375605583191,grad_norm: 0.9999991036822096, iteration: 125659
loss: 1.0475163459777832,grad_norm: 0.9999992034477172, iteration: 125660
loss: 1.019661545753479,grad_norm: 0.9629411026244907, iteration: 125661
loss: 1.0168895721435547,grad_norm: 0.9999990845026747, iteration: 125662
loss: 1.0571844577789307,grad_norm: 0.999999458267323, iteration: 125663
loss: 0.952513575553894,grad_norm: 0.8299772476236921, iteration: 125664
loss: 1.036197543144226,grad_norm: 0.9999994525853776, iteration: 125665
loss: 1.028829574584961,grad_norm: 0.8238552239447208, iteration: 125666
loss: 1.0157337188720703,grad_norm: 0.9999995532943696, iteration: 125667
loss: 1.0533751249313354,grad_norm: 0.9999994055844724, iteration: 125668
loss: 1.004478931427002,grad_norm: 0.9999998947081291, iteration: 125669
loss: 1.0272243022918701,grad_norm: 0.9999995129073637, iteration: 125670
loss: 1.0206806659698486,grad_norm: 0.9595401556693031, iteration: 125671
loss: 0.9904661774635315,grad_norm: 0.7984418648802343, iteration: 125672
loss: 0.9966294765472412,grad_norm: 0.8951048342948238, iteration: 125673
loss: 1.0177141427993774,grad_norm: 0.8879455084584332, iteration: 125674
loss: 1.0267833471298218,grad_norm: 0.9090095123508739, iteration: 125675
loss: 1.1007691621780396,grad_norm: 0.999999257962855, iteration: 125676
loss: 0.9681147933006287,grad_norm: 0.9999992489205087, iteration: 125677
loss: 0.9724434018135071,grad_norm: 0.9612117891867364, iteration: 125678
loss: 1.0141105651855469,grad_norm: 0.8689119507454479, iteration: 125679
loss: 0.9891061782836914,grad_norm: 0.9999996541471766, iteration: 125680
loss: 1.0072327852249146,grad_norm: 0.9999989156940907, iteration: 125681
loss: 1.0477988719940186,grad_norm: 0.7428948429963822, iteration: 125682
loss: 1.022339105606079,grad_norm: 0.9974489791116891, iteration: 125683
loss: 1.0835412740707397,grad_norm: 0.9999999011035065, iteration: 125684
loss: 1.00479257106781,grad_norm: 0.8620431544780688, iteration: 125685
loss: 0.9898946285247803,grad_norm: 1.0000000596246932, iteration: 125686
loss: 0.9455710649490356,grad_norm: 0.839043384965956, iteration: 125687
loss: 1.0129574537277222,grad_norm: 0.7816780278187563, iteration: 125688
loss: 0.9912112951278687,grad_norm: 0.8870688472138556, iteration: 125689
loss: 1.0199881792068481,grad_norm: 0.9366281108420897, iteration: 125690
loss: 1.0330322980880737,grad_norm: 0.9999998272744021, iteration: 125691
loss: 0.9547221064567566,grad_norm: 0.9348297535586363, iteration: 125692
loss: 1.0638155937194824,grad_norm: 0.9999997841386608, iteration: 125693
loss: 0.9859877228736877,grad_norm: 0.9999992238456382, iteration: 125694
loss: 1.0179340839385986,grad_norm: 0.9999992395101331, iteration: 125695
loss: 0.9516081809997559,grad_norm: 0.9992064860225035, iteration: 125696
loss: 1.0447652339935303,grad_norm: 0.8455123922079917, iteration: 125697
loss: 1.0014928579330444,grad_norm: 0.9999991365567754, iteration: 125698
loss: 0.9767640829086304,grad_norm: 0.8272416550388855, iteration: 125699
loss: 1.0484883785247803,grad_norm: 0.9182311238691784, iteration: 125700
loss: 1.0329992771148682,grad_norm: 0.9999997248805171, iteration: 125701
loss: 1.0460189580917358,grad_norm: 0.8976788097609815, iteration: 125702
loss: 1.0331876277923584,grad_norm: 0.9999993171626272, iteration: 125703
loss: 0.9806824326515198,grad_norm: 0.9999997897064651, iteration: 125704
loss: 1.012292504310608,grad_norm: 0.9999992308128585, iteration: 125705
loss: 0.9909849166870117,grad_norm: 0.9999993782453914, iteration: 125706
loss: 0.9714174866676331,grad_norm: 0.8323826378637834, iteration: 125707
loss: 1.0139656066894531,grad_norm: 0.801746976077893, iteration: 125708
loss: 1.0057597160339355,grad_norm: 0.8466531546346535, iteration: 125709
loss: 1.003798246383667,grad_norm: 0.8029953569435294, iteration: 125710
loss: 1.0258457660675049,grad_norm: 0.9999998157620142, iteration: 125711
loss: 0.9814474582672119,grad_norm: 0.8065873685064903, iteration: 125712
loss: 1.01320481300354,grad_norm: 0.9999992168541683, iteration: 125713
loss: 0.9809296131134033,grad_norm: 0.8823936236721147, iteration: 125714
loss: 1.0196456909179688,grad_norm: 0.8589658404078475, iteration: 125715
loss: 1.0458039045333862,grad_norm: 0.9999998517232607, iteration: 125716
loss: 1.0524563789367676,grad_norm: 0.9999992727236368, iteration: 125717
loss: 1.0164960622787476,grad_norm: 0.9999991931404072, iteration: 125718
loss: 1.0012942552566528,grad_norm: 0.9757104532809534, iteration: 125719
loss: 1.0350499153137207,grad_norm: 0.9999992138429764, iteration: 125720
loss: 1.0659997463226318,grad_norm: 0.9999992906004389, iteration: 125721
loss: 1.039646029472351,grad_norm: 0.9999994264010601, iteration: 125722
loss: 1.0638585090637207,grad_norm: 0.9999993260294281, iteration: 125723
loss: 1.0754213333129883,grad_norm: 0.9999999051922757, iteration: 125724
loss: 1.0540711879730225,grad_norm: 0.999999852718483, iteration: 125725
loss: 1.0015851259231567,grad_norm: 0.7876729467238476, iteration: 125726
loss: 1.0210930109024048,grad_norm: 0.9999991109220446, iteration: 125727
loss: 1.054875373840332,grad_norm: 0.9999999529470537, iteration: 125728
loss: 0.991206169128418,grad_norm: 0.813584587331379, iteration: 125729
loss: 1.0167063474655151,grad_norm: 0.9413353836904631, iteration: 125730
loss: 1.0174959897994995,grad_norm: 0.9999992361124157, iteration: 125731
loss: 1.0228703022003174,grad_norm: 0.9725648519227397, iteration: 125732
loss: 1.138890266418457,grad_norm: 0.9999998435254772, iteration: 125733
loss: 1.1117627620697021,grad_norm: 0.9999991692744122, iteration: 125734
loss: 1.073616623878479,grad_norm: 0.9999994166520821, iteration: 125735
loss: 1.0594446659088135,grad_norm: 0.9484230579564531, iteration: 125736
loss: 0.9895400404930115,grad_norm: 0.8263681434041111, iteration: 125737
loss: 1.0072752237319946,grad_norm: 0.9999990688530236, iteration: 125738
loss: 1.0998133420944214,grad_norm: 0.9999991143635883, iteration: 125739
loss: 1.041383147239685,grad_norm: 0.9999994808341423, iteration: 125740
loss: 0.9842904210090637,grad_norm: 0.9999990543710747, iteration: 125741
loss: 1.0506317615509033,grad_norm: 0.9999991149341688, iteration: 125742
loss: 1.0121897459030151,grad_norm: 0.8446116700589624, iteration: 125743
loss: 0.9440791606903076,grad_norm: 0.9104339681101519, iteration: 125744
loss: 0.9881263375282288,grad_norm: 0.948509682263217, iteration: 125745
loss: 1.0110867023468018,grad_norm: 0.9999991892622196, iteration: 125746
loss: 1.1066997051239014,grad_norm: 0.9999993494023158, iteration: 125747
loss: 0.988940954208374,grad_norm: 0.9999990788572777, iteration: 125748
loss: 1.0080994367599487,grad_norm: 0.8147134212528866, iteration: 125749
loss: 0.9954487085342407,grad_norm: 0.9999997689941723, iteration: 125750
loss: 0.987564742565155,grad_norm: 0.9999990471280594, iteration: 125751
loss: 0.999005913734436,grad_norm: 0.7840884218154608, iteration: 125752
loss: 1.0228012800216675,grad_norm: 0.9999990184035134, iteration: 125753
loss: 1.0165923833847046,grad_norm: 0.99522208874956, iteration: 125754
loss: 1.005237340927124,grad_norm: 0.999999902352698, iteration: 125755
loss: 1.0024919509887695,grad_norm: 0.9999990251586567, iteration: 125756
loss: 0.9902166724205017,grad_norm: 0.9999997850835014, iteration: 125757
loss: 0.987554132938385,grad_norm: 0.9999992465397716, iteration: 125758
loss: 1.089041829109192,grad_norm: 0.9999997407421265, iteration: 125759
loss: 0.9780123829841614,grad_norm: 0.9999994977280637, iteration: 125760
loss: 1.023786187171936,grad_norm: 0.803193589378888, iteration: 125761
loss: 1.0254840850830078,grad_norm: 0.9999994266359926, iteration: 125762
loss: 0.9954487085342407,grad_norm: 0.9999991778239504, iteration: 125763
loss: 0.9785294532775879,grad_norm: 0.8154379918105974, iteration: 125764
loss: 0.9927787184715271,grad_norm: 0.8229101843962692, iteration: 125765
loss: 1.1025011539459229,grad_norm: 0.9999995798542809, iteration: 125766
loss: 0.9982313513755798,grad_norm: 0.8321590030199435, iteration: 125767
loss: 1.005491852760315,grad_norm: 0.8071231791881119, iteration: 125768
loss: 1.0050748586654663,grad_norm: 0.9597163293737041, iteration: 125769
loss: 0.9848163723945618,grad_norm: 0.9999994195825904, iteration: 125770
loss: 1.0454572439193726,grad_norm: 0.9999993347522697, iteration: 125771
loss: 0.9919649362564087,grad_norm: 1.0000000386257424, iteration: 125772
loss: 1.0063167810440063,grad_norm: 0.8859726665107968, iteration: 125773
loss: 1.0505658388137817,grad_norm: 0.9999996837082684, iteration: 125774
loss: 1.0678774118423462,grad_norm: 0.9999993988721892, iteration: 125775
loss: 0.9908609390258789,grad_norm: 0.7728369418108272, iteration: 125776
loss: 1.035213828086853,grad_norm: 0.9404570057005105, iteration: 125777
loss: 0.9941829442977905,grad_norm: 0.9999991087193049, iteration: 125778
loss: 1.0679875612258911,grad_norm: 0.9999992218491686, iteration: 125779
loss: 0.9930078983306885,grad_norm: 0.921714252376867, iteration: 125780
loss: 0.9699430465698242,grad_norm: 0.9809084610095001, iteration: 125781
loss: 0.9961614608764648,grad_norm: 0.9299752918162365, iteration: 125782
loss: 1.1079134941101074,grad_norm: 0.9999997520903282, iteration: 125783
loss: 0.9431129693984985,grad_norm: 0.8966955175156291, iteration: 125784
loss: 1.038964033126831,grad_norm: 0.9999995009705309, iteration: 125785
loss: 1.0632556676864624,grad_norm: 0.999999133774165, iteration: 125786
loss: 0.9686334729194641,grad_norm: 0.936049972294979, iteration: 125787
loss: 1.0251682996749878,grad_norm: 0.99999958908468, iteration: 125788
loss: 1.0330090522766113,grad_norm: 0.8777216349142212, iteration: 125789
loss: 1.0214442014694214,grad_norm: 0.8077531251381959, iteration: 125790
loss: 0.9871086478233337,grad_norm: 0.9162621588278481, iteration: 125791
loss: 1.060166358947754,grad_norm: 0.9999998761244925, iteration: 125792
loss: 1.0857982635498047,grad_norm: 0.911222538861671, iteration: 125793
loss: 0.9878281950950623,grad_norm: 0.8528598568663794, iteration: 125794
loss: 0.9655024409294128,grad_norm: 0.8980104758954478, iteration: 125795
loss: 1.0153697729110718,grad_norm: 0.999999130910754, iteration: 125796
loss: 1.0464112758636475,grad_norm: 0.9999991776306255, iteration: 125797
loss: 0.9523735046386719,grad_norm: 0.9999990223078021, iteration: 125798
loss: 1.0250217914581299,grad_norm: 0.9047647399205068, iteration: 125799
loss: 1.0137380361557007,grad_norm: 0.9144109745723286, iteration: 125800
loss: 1.0098426342010498,grad_norm: 0.9999990950813411, iteration: 125801
loss: 1.0253762006759644,grad_norm: 0.9999994837797433, iteration: 125802
loss: 1.022159218788147,grad_norm: 0.9999994299828793, iteration: 125803
loss: 1.023380994796753,grad_norm: 0.9999994016607385, iteration: 125804
loss: 1.0527228116989136,grad_norm: 0.999999070417947, iteration: 125805
loss: 1.0343945026397705,grad_norm: 0.9999992208145038, iteration: 125806
loss: 1.0109024047851562,grad_norm: 0.9999991596848691, iteration: 125807
loss: 0.971394956111908,grad_norm: 0.9999990851689091, iteration: 125808
loss: 1.0103288888931274,grad_norm: 0.9999990585108882, iteration: 125809
loss: 1.098935842514038,grad_norm: 0.9999992758980456, iteration: 125810
loss: 0.9862878918647766,grad_norm: 0.9999990168900784, iteration: 125811
loss: 1.0095994472503662,grad_norm: 0.9999997135014943, iteration: 125812
loss: 1.0154390335083008,grad_norm: 0.99999924214263, iteration: 125813
loss: 1.0365712642669678,grad_norm: 0.9999994223775353, iteration: 125814
loss: 1.1037088632583618,grad_norm: 0.9999994606779482, iteration: 125815
loss: 1.027666687965393,grad_norm: 0.7572302936302702, iteration: 125816
loss: 0.9652799963951111,grad_norm: 0.6924423863393503, iteration: 125817
loss: 1.010728120803833,grad_norm: 0.8621872758028765, iteration: 125818
loss: 1.1410033702850342,grad_norm: 0.9999991842156593, iteration: 125819
loss: 1.009608507156372,grad_norm: 0.6931951422398176, iteration: 125820
loss: 0.9904703497886658,grad_norm: 0.9505381824563708, iteration: 125821
loss: 1.0365166664123535,grad_norm: 0.9999990381168922, iteration: 125822
loss: 1.0244669914245605,grad_norm: 0.7350509794020196, iteration: 125823
loss: 0.9701161980628967,grad_norm: 0.999999592174555, iteration: 125824
loss: 1.107272744178772,grad_norm: 0.9999990098590541, iteration: 125825
loss: 1.0261270999908447,grad_norm: 0.999999662751864, iteration: 125826
loss: 1.0028475522994995,grad_norm: 0.8370903025161952, iteration: 125827
loss: 1.0260992050170898,grad_norm: 0.809430776880611, iteration: 125828
loss: 1.025559902191162,grad_norm: 0.9999992142093412, iteration: 125829
loss: 1.0857892036437988,grad_norm: 0.9999992688090639, iteration: 125830
loss: 1.0198848247528076,grad_norm: 0.7901987806796598, iteration: 125831
loss: 1.0394288301467896,grad_norm: 0.9949781104648454, iteration: 125832
loss: 0.9871615171432495,grad_norm: 0.9208953627889253, iteration: 125833
loss: 1.1644325256347656,grad_norm: 0.9999999626085677, iteration: 125834
loss: 1.0243233442306519,grad_norm: 0.9999993095913714, iteration: 125835
loss: 1.0539730787277222,grad_norm: 0.9999994026293502, iteration: 125836
loss: 1.19143807888031,grad_norm: 0.9999996786691489, iteration: 125837
loss: 1.058160424232483,grad_norm: 0.9999999837195982, iteration: 125838
loss: 1.1039791107177734,grad_norm: 0.9999991437573756, iteration: 125839
loss: 0.9905678033828735,grad_norm: 0.9999991829417553, iteration: 125840
loss: 0.9942997694015503,grad_norm: 0.6942844489296149, iteration: 125841
loss: 1.0374433994293213,grad_norm: 0.9999994164746765, iteration: 125842
loss: 1.0412129163742065,grad_norm: 0.9999998710065897, iteration: 125843
loss: 1.0408412218093872,grad_norm: 0.9999995735332822, iteration: 125844
loss: 1.0197662115097046,grad_norm: 0.8072328052454539, iteration: 125845
loss: 1.0023059844970703,grad_norm: 0.9999999243361097, iteration: 125846
loss: 1.0283198356628418,grad_norm: 0.9999991690739694, iteration: 125847
loss: 1.025904893875122,grad_norm: 0.9999991371712336, iteration: 125848
loss: 0.973415732383728,grad_norm: 0.853359109892842, iteration: 125849
loss: 0.9940586090087891,grad_norm: 0.8903130810626341, iteration: 125850
loss: 0.9979029893875122,grad_norm: 0.999999084714813, iteration: 125851
loss: 1.061178207397461,grad_norm: 0.9999995871663899, iteration: 125852
loss: 1.0117344856262207,grad_norm: 0.9082249957200297, iteration: 125853
loss: 1.0033717155456543,grad_norm: 0.9999991531820657, iteration: 125854
loss: 1.0470091104507446,grad_norm: 0.9999994310789306, iteration: 125855
loss: 1.2175275087356567,grad_norm: 0.999999884894036, iteration: 125856
loss: 1.0781699419021606,grad_norm: 0.9999995638030151, iteration: 125857
loss: 1.1011172533035278,grad_norm: 0.9999990737966649, iteration: 125858
loss: 1.0609080791473389,grad_norm: 0.7594284268767995, iteration: 125859
loss: 1.0233867168426514,grad_norm: 0.9999998464221215, iteration: 125860
loss: 1.012011170387268,grad_norm: 0.8479762387634596, iteration: 125861
loss: 0.9990459680557251,grad_norm: 0.7393318837325129, iteration: 125862
loss: 1.325620174407959,grad_norm: 0.9999997396117882, iteration: 125863
loss: 1.0222548246383667,grad_norm: 0.9999990715597916, iteration: 125864
loss: 1.0378966331481934,grad_norm: 0.999999768773356, iteration: 125865
loss: 1.0179837942123413,grad_norm: 0.8540946625689927, iteration: 125866
loss: 0.9785014986991882,grad_norm: 0.999999060943294, iteration: 125867
loss: 1.020928978919983,grad_norm: 0.9616080569060574, iteration: 125868
loss: 1.0198187828063965,grad_norm: 0.9999993401619726, iteration: 125869
loss: 1.098968744277954,grad_norm: 0.9999997370107674, iteration: 125870
loss: 1.0708591938018799,grad_norm: 0.9999998388163391, iteration: 125871
loss: 1.0188970565795898,grad_norm: 0.8891947363328846, iteration: 125872
loss: 1.0142455101013184,grad_norm: 0.7944956041697375, iteration: 125873
loss: 1.0395115613937378,grad_norm: 0.9999992159887927, iteration: 125874
loss: 1.0515830516815186,grad_norm: 0.9999990996359696, iteration: 125875
loss: 1.3359237909317017,grad_norm: 0.9999997273922424, iteration: 125876
loss: 1.0198463201522827,grad_norm: 0.8706605235240975, iteration: 125877
loss: 1.0361475944519043,grad_norm: 0.9999997874158075, iteration: 125878
loss: 0.982164204120636,grad_norm: 0.9694277726984702, iteration: 125879
loss: 1.061164140701294,grad_norm: 0.9999993862949921, iteration: 125880
loss: 1.0115251541137695,grad_norm: 0.9999994630880008, iteration: 125881
loss: 0.9892966151237488,grad_norm: 0.9861059573789437, iteration: 125882
loss: 0.9773805141448975,grad_norm: 0.9327702921742168, iteration: 125883
loss: 1.0781360864639282,grad_norm: 0.9999992180022312, iteration: 125884
loss: 1.0130817890167236,grad_norm: 0.8017488939688519, iteration: 125885
loss: 0.9778664708137512,grad_norm: 0.9999991561071014, iteration: 125886
loss: 0.9771395921707153,grad_norm: 0.89511018972709, iteration: 125887
loss: 1.029044508934021,grad_norm: 0.9999991062274901, iteration: 125888
loss: 1.0265531539916992,grad_norm: 0.8671446205186426, iteration: 125889
loss: 0.9954636096954346,grad_norm: 0.9671370669243032, iteration: 125890
loss: 0.9800013899803162,grad_norm: 0.9999991099032755, iteration: 125891
loss: 1.006658911705017,grad_norm: 0.8456835661949058, iteration: 125892
loss: 0.9751979112625122,grad_norm: 0.9999992717182793, iteration: 125893
loss: 0.9894925951957703,grad_norm: 0.8256136556090317, iteration: 125894
loss: 1.0010976791381836,grad_norm: 0.8561397091910666, iteration: 125895
loss: 1.0007472038269043,grad_norm: 0.9688056201354183, iteration: 125896
loss: 0.9812890291213989,grad_norm: 0.9999991767637934, iteration: 125897
loss: 1.0003516674041748,grad_norm: 0.7709659422953746, iteration: 125898
loss: 1.0230695009231567,grad_norm: 0.9999990547408498, iteration: 125899
loss: 1.1297270059585571,grad_norm: 0.9999997871101012, iteration: 125900
loss: 1.02204167842865,grad_norm: 0.9999995369538597, iteration: 125901
loss: 1.0741190910339355,grad_norm: 0.9999999957771903, iteration: 125902
loss: 1.0530731678009033,grad_norm: 0.8675621737418031, iteration: 125903
loss: 0.9709340333938599,grad_norm: 0.999999575499416, iteration: 125904
loss: 0.9947099089622498,grad_norm: 0.9999990461792512, iteration: 125905
loss: 1.0236812829971313,grad_norm: 0.9999992420757873, iteration: 125906
loss: 0.9671874642372131,grad_norm: 0.7557883473550076, iteration: 125907
loss: 1.0189532041549683,grad_norm: 0.8158895943447761, iteration: 125908
loss: 0.9615693688392639,grad_norm: 0.999999158882355, iteration: 125909
loss: 1.0484585762023926,grad_norm: 0.9999994233556694, iteration: 125910
loss: 1.0040150880813599,grad_norm: 0.8975897884470554, iteration: 125911
loss: 1.1072027683258057,grad_norm: 0.9999994732829003, iteration: 125912
loss: 1.0578292608261108,grad_norm: 0.9999996800550481, iteration: 125913
loss: 1.0148916244506836,grad_norm: 0.8697626978898649, iteration: 125914
loss: 1.0011906623840332,grad_norm: 0.7267186275792896, iteration: 125915
loss: 1.027054786682129,grad_norm: 0.7704482947729778, iteration: 125916
loss: 1.021092414855957,grad_norm: 0.8124764654444366, iteration: 125917
loss: 0.9630209803581238,grad_norm: 0.9999992705639461, iteration: 125918
loss: 0.9729712605476379,grad_norm: 0.9999997876261041, iteration: 125919
loss: 1.0058547258377075,grad_norm: 0.9999999842960158, iteration: 125920
loss: 1.0437493324279785,grad_norm: 0.9999998825156213, iteration: 125921
loss: 0.9981513023376465,grad_norm: 0.9999998269185144, iteration: 125922
loss: 1.1989097595214844,grad_norm: 0.9999996271056695, iteration: 125923
loss: 1.045006275177002,grad_norm: 0.9999995994221761, iteration: 125924
loss: 1.1127424240112305,grad_norm: 0.9999991514198263, iteration: 125925
loss: 1.0246953964233398,grad_norm: 0.8699081155791631, iteration: 125926
loss: 0.997479259967804,grad_norm: 0.978661311331522, iteration: 125927
loss: 1.0060380697250366,grad_norm: 0.999999887213918, iteration: 125928
loss: 0.9977084994316101,grad_norm: 0.9205967438516719, iteration: 125929
loss: 1.049741506576538,grad_norm: 0.9999994211192699, iteration: 125930
loss: 1.0810151100158691,grad_norm: 0.9999995328487495, iteration: 125931
loss: 1.1313674449920654,grad_norm: 0.9999998732414435, iteration: 125932
loss: 1.1466894149780273,grad_norm: 0.9999996182816472, iteration: 125933
loss: 0.9907317161560059,grad_norm: 0.9543512107272656, iteration: 125934
loss: 1.020197868347168,grad_norm: 0.9999992656276108, iteration: 125935
loss: 1.190454363822937,grad_norm: 0.9999993051572914, iteration: 125936
loss: 1.208254098892212,grad_norm: 0.9999994891902625, iteration: 125937
loss: 1.0349129438400269,grad_norm: 0.9999990711956981, iteration: 125938
loss: 1.0137306451797485,grad_norm: 0.9302876583132915, iteration: 125939
loss: 1.0543174743652344,grad_norm: 0.9999993249888375, iteration: 125940
loss: 1.0085428953170776,grad_norm: 0.9999994397506382, iteration: 125941
loss: 1.0056103467941284,grad_norm: 0.9999990906465464, iteration: 125942
loss: 1.0340875387191772,grad_norm: 0.9166891514797465, iteration: 125943
loss: 1.143648386001587,grad_norm: 0.9999999663220294, iteration: 125944
loss: 1.0928150415420532,grad_norm: 0.9260086971382986, iteration: 125945
loss: 1.015359878540039,grad_norm: 0.9999994521935778, iteration: 125946
loss: 1.0152500867843628,grad_norm: 0.9999992108749248, iteration: 125947
loss: 1.0418866872787476,grad_norm: 0.9999992649006528, iteration: 125948
loss: 1.0499985218048096,grad_norm: 0.9999997949736178, iteration: 125949
loss: 1.1074820756912231,grad_norm: 0.9999993025726672, iteration: 125950
loss: 1.0163226127624512,grad_norm: 0.8998280573744802, iteration: 125951
loss: 0.961346447467804,grad_norm: 0.9999993038815703, iteration: 125952
loss: 1.0319856405258179,grad_norm: 0.999999191973528, iteration: 125953
loss: 0.9904254078865051,grad_norm: 0.9999995110237283, iteration: 125954
loss: 1.0299699306488037,grad_norm: 0.9017940946826288, iteration: 125955
loss: 0.9981142282485962,grad_norm: 0.9999993966096009, iteration: 125956
loss: 1.0292553901672363,grad_norm: 0.9999992715304229, iteration: 125957
loss: 1.0230427980422974,grad_norm: 0.8366404012268992, iteration: 125958
loss: 1.0523958206176758,grad_norm: 0.9999998118868056, iteration: 125959
loss: 0.99024498462677,grad_norm: 0.9999991701107039, iteration: 125960
loss: 1.012434720993042,grad_norm: 0.8829727785991421, iteration: 125961
loss: 1.1150665283203125,grad_norm: 0.9999997068623516, iteration: 125962
loss: 0.9750463366508484,grad_norm: 0.999999008982872, iteration: 125963
loss: 1.0220240354537964,grad_norm: 0.7925402340283855, iteration: 125964
loss: 0.9866257309913635,grad_norm: 0.9168243974727812, iteration: 125965
loss: 1.100647211074829,grad_norm: 1.0000000772741118, iteration: 125966
loss: 1.0270310640335083,grad_norm: 0.9868204300825779, iteration: 125967
loss: 0.9803937077522278,grad_norm: 0.9999992292747167, iteration: 125968
loss: 1.0040099620819092,grad_norm: 0.8115583645086217, iteration: 125969
loss: 1.035388469696045,grad_norm: 0.9999993363757861, iteration: 125970
loss: 1.0509189367294312,grad_norm: 0.9999992417131213, iteration: 125971
loss: 0.9836164116859436,grad_norm: 0.9859529236702742, iteration: 125972
loss: 1.0775234699249268,grad_norm: 0.9999993361734036, iteration: 125973
loss: 1.0059665441513062,grad_norm: 0.9865631523266921, iteration: 125974
loss: 1.1051490306854248,grad_norm: 0.9999997674366438, iteration: 125975
loss: 1.02347731590271,grad_norm: 0.9999999243979919, iteration: 125976
loss: 0.9923508763313293,grad_norm: 0.9015303302637906, iteration: 125977
loss: 1.0352985858917236,grad_norm: 0.999999041302694, iteration: 125978
loss: 1.0897918939590454,grad_norm: 0.9999992104890675, iteration: 125979
loss: 1.0096333026885986,grad_norm: 0.9999995036855117, iteration: 125980
loss: 1.0261884927749634,grad_norm: 0.9999992745951062, iteration: 125981
loss: 1.0008456707000732,grad_norm: 0.7834180976699204, iteration: 125982
loss: 1.1632215976715088,grad_norm: 0.9999997643765571, iteration: 125983
loss: 1.0707916021347046,grad_norm: 0.9999997232876766, iteration: 125984
loss: 0.9997866749763489,grad_norm: 0.999998989054874, iteration: 125985
loss: 1.0131577253341675,grad_norm: 0.9999991281541648, iteration: 125986
loss: 0.9917271137237549,grad_norm: 0.8011369566795349, iteration: 125987
loss: 1.027367115020752,grad_norm: 0.8894534167119499, iteration: 125988
loss: 1.0674515962600708,grad_norm: 0.9999991711424848, iteration: 125989
loss: 1.020153522491455,grad_norm: 0.9153813973950441, iteration: 125990
loss: 1.042741298675537,grad_norm: 0.8234655921971419, iteration: 125991
loss: 1.0728814601898193,grad_norm: 0.9999995344005349, iteration: 125992
loss: 1.015887975692749,grad_norm: 0.7865552521064132, iteration: 125993
loss: 0.9767176508903503,grad_norm: 0.881645466056074, iteration: 125994
loss: 0.9957380294799805,grad_norm: 0.7979972433818698, iteration: 125995
loss: 1.0743299722671509,grad_norm: 0.9999998776084754, iteration: 125996
loss: 1.0400221347808838,grad_norm: 0.8960867449931789, iteration: 125997
loss: 1.0085642337799072,grad_norm: 0.9184632974840791, iteration: 125998
loss: 1.0517125129699707,grad_norm: 0.9735989722683509, iteration: 125999
loss: 0.9856640100479126,grad_norm: 0.8633083669381366, iteration: 126000
loss: 1.0298429727554321,grad_norm: 0.9999991179535704, iteration: 126001
loss: 1.003854751586914,grad_norm: 0.9983991803750623, iteration: 126002
loss: 1.0757505893707275,grad_norm: 0.9999999976107241, iteration: 126003
loss: 1.0591307878494263,grad_norm: 0.9037565506411569, iteration: 126004
loss: 1.0640872716903687,grad_norm: 0.9999998660956297, iteration: 126005
loss: 1.0344783067703247,grad_norm: 0.9999998786517006, iteration: 126006
loss: 1.0204919576644897,grad_norm: 0.9999993799653144, iteration: 126007
loss: 1.109471321105957,grad_norm: 0.9999997484972555, iteration: 126008
loss: 0.992526650428772,grad_norm: 0.9999994326358451, iteration: 126009
loss: 1.0498493909835815,grad_norm: 0.9723571454816542, iteration: 126010
loss: 1.016979455947876,grad_norm: 0.8141995470359782, iteration: 126011
loss: 1.011696457862854,grad_norm: 0.7097801154081466, iteration: 126012
loss: 0.9853886961936951,grad_norm: 0.8082066011327906, iteration: 126013
loss: 0.9770016074180603,grad_norm: 0.8655511534173427, iteration: 126014
loss: 1.1276969909667969,grad_norm: 0.9999997689522543, iteration: 126015
loss: 1.0323657989501953,grad_norm: 0.8429952502295768, iteration: 126016
loss: 1.0912103652954102,grad_norm: 0.9999992558851819, iteration: 126017
loss: 0.9861391186714172,grad_norm: 0.9823446642663581, iteration: 126018
loss: 1.0470911264419556,grad_norm: 0.9999989954254214, iteration: 126019
loss: 1.0084624290466309,grad_norm: 0.8384223870902551, iteration: 126020
loss: 1.0645209550857544,grad_norm: 0.9419965573550784, iteration: 126021
loss: 1.0367612838745117,grad_norm: 0.9999994290123926, iteration: 126022
loss: 1.0314918756484985,grad_norm: 0.9999994635325257, iteration: 126023
loss: 1.0790128707885742,grad_norm: 0.9999993662942036, iteration: 126024
loss: 0.9884199500083923,grad_norm: 0.9170333217762898, iteration: 126025
loss: 1.0109766721725464,grad_norm: 0.9999992429173615, iteration: 126026
loss: 1.0146678686141968,grad_norm: 0.9646818231819853, iteration: 126027
loss: 1.034785270690918,grad_norm: 0.9999997724316043, iteration: 126028
loss: 0.9774944186210632,grad_norm: 0.8659366251440844, iteration: 126029
loss: 1.032522439956665,grad_norm: 0.9999991249968389, iteration: 126030
loss: 1.0262043476104736,grad_norm: 0.9999991426437952, iteration: 126031
loss: 1.0388789176940918,grad_norm: 0.9999995998883865, iteration: 126032
loss: 0.991266667842865,grad_norm: 0.7655842297763934, iteration: 126033
loss: 0.9874550700187683,grad_norm: 0.9999991088083992, iteration: 126034
loss: 1.0523991584777832,grad_norm: 0.9999994185782021, iteration: 126035
loss: 1.011836051940918,grad_norm: 0.8614819735551053, iteration: 126036
loss: 1.003873586654663,grad_norm: 0.7524192379133383, iteration: 126037
loss: 1.0327911376953125,grad_norm: 1.0000000328407206, iteration: 126038
loss: 0.9931114912033081,grad_norm: 0.8970030513804811, iteration: 126039
loss: 1.0085272789001465,grad_norm: 0.8680573550039937, iteration: 126040
loss: 1.0093384981155396,grad_norm: 0.9999990870677958, iteration: 126041
loss: 1.0148969888687134,grad_norm: 0.8151825148649893, iteration: 126042
loss: 1.039861798286438,grad_norm: 0.9999992785309154, iteration: 126043
loss: 1.109312891960144,grad_norm: 0.9999994350820122, iteration: 126044
loss: 1.1300418376922607,grad_norm: 0.9999993945426773, iteration: 126045
loss: 1.0429388284683228,grad_norm: 0.9999997484792436, iteration: 126046
loss: 1.0045650005340576,grad_norm: 0.9032036532815791, iteration: 126047
loss: 0.9744942784309387,grad_norm: 0.9498814434959582, iteration: 126048
loss: 0.9959590435028076,grad_norm: 0.9999990241670124, iteration: 126049
loss: 0.9913243055343628,grad_norm: 0.9999990555782893, iteration: 126050
loss: 0.9835274815559387,grad_norm: 0.9999992237857076, iteration: 126051
loss: 1.0480130910873413,grad_norm: 0.9999998221522147, iteration: 126052
loss: 0.9979400038719177,grad_norm: 0.8891271462506648, iteration: 126053
loss: 1.0233999490737915,grad_norm: 0.9999997761166952, iteration: 126054
loss: 1.019183874130249,grad_norm: 0.9999998452629368, iteration: 126055
loss: 1.0434961318969727,grad_norm: 0.9999993498809129, iteration: 126056
loss: 1.0335354804992676,grad_norm: 0.9999994137523663, iteration: 126057
loss: 1.0168657302856445,grad_norm: 0.7610645104727456, iteration: 126058
loss: 0.9748751521110535,grad_norm: 0.8525699739858746, iteration: 126059
loss: 1.1477173566818237,grad_norm: 0.9999998003080055, iteration: 126060
loss: 1.0747864246368408,grad_norm: 0.9999992417694186, iteration: 126061
loss: 0.9864076375961304,grad_norm: 0.9999991331420556, iteration: 126062
loss: 0.9739771485328674,grad_norm: 0.9999991764176627, iteration: 126063
loss: 1.0358773469924927,grad_norm: 0.974005905442018, iteration: 126064
loss: 1.0387908220291138,grad_norm: 0.8038312036187243, iteration: 126065
loss: 1.035591721534729,grad_norm: 0.9999995135214725, iteration: 126066
loss: 1.0716100931167603,grad_norm: 0.9999991001950533, iteration: 126067
loss: 1.0285876989364624,grad_norm: 0.9999999514838589, iteration: 126068
loss: 1.264501690864563,grad_norm: 0.9999998384451468, iteration: 126069
loss: 1.0950926542282104,grad_norm: 0.9999994795241693, iteration: 126070
loss: 0.9884622693061829,grad_norm: 0.858670860326922, iteration: 126071
loss: 1.0078539848327637,grad_norm: 0.9566283940286918, iteration: 126072
loss: 1.0675537586212158,grad_norm: 0.9999995371402698, iteration: 126073
loss: 1.0762536525726318,grad_norm: 0.9999991116180542, iteration: 126074
loss: 0.9851803779602051,grad_norm: 0.8589681976687014, iteration: 126075
loss: 1.0331586599349976,grad_norm: 1.0000000166584728, iteration: 126076
loss: 0.9848135113716125,grad_norm: 0.9443857432232868, iteration: 126077
loss: 1.007123351097107,grad_norm: 0.8723897965065275, iteration: 126078
loss: 1.0249050855636597,grad_norm: 0.9999991724291818, iteration: 126079
loss: 0.9874848127365112,grad_norm: 0.8336126355023107, iteration: 126080
loss: 1.0119997262954712,grad_norm: 0.9999992354945404, iteration: 126081
loss: 1.0172767639160156,grad_norm: 0.9999994279959205, iteration: 126082
loss: 0.9878751039505005,grad_norm: 0.7632476382640915, iteration: 126083
loss: 1.115706443786621,grad_norm: 0.9999999300164794, iteration: 126084
loss: 0.9905064702033997,grad_norm: 0.89222031216148, iteration: 126085
loss: 1.049586534500122,grad_norm: 0.9999996240113369, iteration: 126086
loss: 1.0075032711029053,grad_norm: 0.9264253501655756, iteration: 126087
loss: 1.0337191820144653,grad_norm: 0.9999992494295321, iteration: 126088
loss: 1.1245543956756592,grad_norm: 0.9999992297249045, iteration: 126089
loss: 1.026262879371643,grad_norm: 0.92958254854283, iteration: 126090
loss: 1.135374665260315,grad_norm: 0.9999998439037043, iteration: 126091
loss: 1.030094027519226,grad_norm: 0.9999995586375524, iteration: 126092
loss: 1.0372397899627686,grad_norm: 0.9999997004019417, iteration: 126093
loss: 1.0617568492889404,grad_norm: 0.9999991677685979, iteration: 126094
loss: 0.9911889433860779,grad_norm: 0.999999338707824, iteration: 126095
loss: 1.0659024715423584,grad_norm: 0.9999992064813656, iteration: 126096
loss: 0.9652137160301208,grad_norm: 0.8959602629599561, iteration: 126097
loss: 0.9953115582466125,grad_norm: 0.9999990840451491, iteration: 126098
loss: 1.0068233013153076,grad_norm: 0.8796013467747024, iteration: 126099
loss: 1.0379517078399658,grad_norm: 0.9690829342854377, iteration: 126100
loss: 0.9970008134841919,grad_norm: 0.9175462920178112, iteration: 126101
loss: 0.9878643155097961,grad_norm: 0.8357894057159576, iteration: 126102
loss: 0.9971333146095276,grad_norm: 0.9483460055428898, iteration: 126103
loss: 1.0456418991088867,grad_norm: 0.8850872150028384, iteration: 126104
loss: 1.1955149173736572,grad_norm: 0.9999992916495231, iteration: 126105
loss: 1.0380007028579712,grad_norm: 0.965857016532332, iteration: 126106
loss: 1.0017086267471313,grad_norm: 0.9144949659458149, iteration: 126107
loss: 1.0002763271331787,grad_norm: 0.9118437368731998, iteration: 126108
loss: 1.0120673179626465,grad_norm: 0.9999991929359533, iteration: 126109
loss: 1.0029107332229614,grad_norm: 0.79357565059698, iteration: 126110
loss: 1.0217536687850952,grad_norm: 0.8150692533132274, iteration: 126111
loss: 1.0259337425231934,grad_norm: 0.9999994930111862, iteration: 126112
loss: 1.037392497062683,grad_norm: 0.8647880784603803, iteration: 126113
loss: 0.9692416191101074,grad_norm: 0.9999990925394625, iteration: 126114
loss: 1.0004992485046387,grad_norm: 0.966945933471302, iteration: 126115
loss: 1.1348097324371338,grad_norm: 0.9999993225711498, iteration: 126116
loss: 0.9949862957000732,grad_norm: 0.9999992437343825, iteration: 126117
loss: 0.9717677235603333,grad_norm: 0.8811662660796742, iteration: 126118
loss: 1.0224581956863403,grad_norm: 0.9999991455865862, iteration: 126119
loss: 1.0035511255264282,grad_norm: 0.79057173890643, iteration: 126120
loss: 1.0245224237442017,grad_norm: 0.999999363614003, iteration: 126121
loss: 1.0146640539169312,grad_norm: 0.9999995524477581, iteration: 126122
loss: 1.0013360977172852,grad_norm: 0.9999998749186352, iteration: 126123
loss: 0.9978209733963013,grad_norm: 0.9999990811443282, iteration: 126124
loss: 1.0099118947982788,grad_norm: 0.792762831545296, iteration: 126125
loss: 1.0407944917678833,grad_norm: 0.8622883055441262, iteration: 126126
loss: 1.006252408027649,grad_norm: 0.9999990296598351, iteration: 126127
loss: 0.9962416887283325,grad_norm: 0.7876126916655829, iteration: 126128
loss: 1.1479377746582031,grad_norm: 0.9999992201090749, iteration: 126129
loss: 1.0290637016296387,grad_norm: 0.9687172031168536, iteration: 126130
loss: 1.009405493736267,grad_norm: 0.9310876683077313, iteration: 126131
loss: 1.0615568161010742,grad_norm: 0.9605798842646638, iteration: 126132
loss: 1.0451633930206299,grad_norm: 0.9999996953359439, iteration: 126133
loss: 1.0095497369766235,grad_norm: 0.7908564311333434, iteration: 126134
loss: 1.0279268026351929,grad_norm: 0.9999992954225153, iteration: 126135
loss: 1.0149191617965698,grad_norm: 0.9243590919930921, iteration: 126136
loss: 1.0032281875610352,grad_norm: 0.9086615534394215, iteration: 126137
loss: 1.0318973064422607,grad_norm: 0.999999295299365, iteration: 126138
loss: 0.9815785884857178,grad_norm: 0.9641572026459703, iteration: 126139
loss: 1.031341791152954,grad_norm: 0.9750676692365241, iteration: 126140
loss: 1.0062470436096191,grad_norm: 0.8519396465029873, iteration: 126141
loss: 1.1764814853668213,grad_norm: 0.9999997527523256, iteration: 126142
loss: 1.1838271617889404,grad_norm: 0.9999999171687526, iteration: 126143
loss: 0.9809651970863342,grad_norm: 0.7696658157129189, iteration: 126144
loss: 1.135347604751587,grad_norm: 0.9999997611159275, iteration: 126145
loss: 1.0021228790283203,grad_norm: 0.8611614890857505, iteration: 126146
loss: 1.1071357727050781,grad_norm: 0.9999999748578043, iteration: 126147
loss: 1.0123391151428223,grad_norm: 0.9673573632425164, iteration: 126148
loss: 0.9951503872871399,grad_norm: 0.9999994163596223, iteration: 126149
loss: 1.0127782821655273,grad_norm: 0.9999998250428228, iteration: 126150
loss: 1.0037505626678467,grad_norm: 0.8906668417242831, iteration: 126151
loss: 1.1178207397460938,grad_norm: 0.9999996274109193, iteration: 126152
loss: 1.0867091417312622,grad_norm: 0.9999993409118743, iteration: 126153
loss: 1.2652645111083984,grad_norm: 0.999999985866338, iteration: 126154
loss: 1.1374762058258057,grad_norm: 0.9999998186243566, iteration: 126155
loss: 1.0565670728683472,grad_norm: 0.9999999490901476, iteration: 126156
loss: 1.0468449592590332,grad_norm: 0.904456667544665, iteration: 126157
loss: 0.9958188533782959,grad_norm: 0.9999996197454962, iteration: 126158
loss: 1.0262223482131958,grad_norm: 0.9999991050284062, iteration: 126159
loss: 1.0079995393753052,grad_norm: 0.8725220350490218, iteration: 126160
loss: 1.0155210494995117,grad_norm: 0.999999364353925, iteration: 126161
loss: 1.0482326745986938,grad_norm: 0.8673977304098703, iteration: 126162
loss: 1.0482741594314575,grad_norm: 0.9999995693811164, iteration: 126163
loss: 0.9811891317367554,grad_norm: 0.9999992081978077, iteration: 126164
loss: 1.0779563188552856,grad_norm: 0.999999222322174, iteration: 126165
loss: 0.9878641963005066,grad_norm: 0.9999990353802297, iteration: 126166
loss: 1.0084763765335083,grad_norm: 0.8880957785043714, iteration: 126167
loss: 1.056726098060608,grad_norm: 0.9999995219822567, iteration: 126168
loss: 1.0367225408554077,grad_norm: 0.9999991287849331, iteration: 126169
loss: 1.025259017944336,grad_norm: 0.900591819511372, iteration: 126170
loss: 1.017958402633667,grad_norm: 0.6921394074729411, iteration: 126171
loss: 1.0390520095825195,grad_norm: 0.9999994005872826, iteration: 126172
loss: 1.0015654563903809,grad_norm: 0.9999991321124704, iteration: 126173
loss: 1.1410664319992065,grad_norm: 0.9256732053963854, iteration: 126174
loss: 0.9965774416923523,grad_norm: 0.8218604444018225, iteration: 126175
loss: 0.9888833165168762,grad_norm: 0.9999990639786848, iteration: 126176
loss: 0.9790415167808533,grad_norm: 0.9999992793588878, iteration: 126177
loss: 1.0064406394958496,grad_norm: 0.8232250711484749, iteration: 126178
loss: 0.9876769781112671,grad_norm: 0.8452326823873053, iteration: 126179
loss: 1.0688552856445312,grad_norm: 0.9999999234606327, iteration: 126180
loss: 1.0490041971206665,grad_norm: 0.999999323882261, iteration: 126181
loss: 0.9988994002342224,grad_norm: 0.8310076632441968, iteration: 126182
loss: 0.9831358790397644,grad_norm: 0.9999993077602914, iteration: 126183
loss: 1.0166540145874023,grad_norm: 0.9071569734660505, iteration: 126184
loss: 0.9745165109634399,grad_norm: 0.9999998812735794, iteration: 126185
loss: 1.1759161949157715,grad_norm: 0.9999991064191889, iteration: 126186
loss: 1.0037111043930054,grad_norm: 0.9444951187533825, iteration: 126187
loss: 1.0784485340118408,grad_norm: 0.9999994135075808, iteration: 126188
loss: 1.0223625898361206,grad_norm: 0.9999998352849153, iteration: 126189
loss: 1.0099751949310303,grad_norm: 0.9999992107820372, iteration: 126190
loss: 0.9874379634857178,grad_norm: 0.921658479659576, iteration: 126191
loss: 0.9621069431304932,grad_norm: 0.7864455950547098, iteration: 126192
loss: 0.9914101958274841,grad_norm: 0.9999993373071138, iteration: 126193
loss: 1.0521169900894165,grad_norm: 0.9999991927735458, iteration: 126194
loss: 1.022315263748169,grad_norm: 0.999999851992848, iteration: 126195
loss: 1.008405089378357,grad_norm: 0.7672378179198062, iteration: 126196
loss: 1.055860996246338,grad_norm: 0.9999990361401517, iteration: 126197
loss: 1.0301402807235718,grad_norm: 0.9999991966506305, iteration: 126198
loss: 1.018798589706421,grad_norm: 0.9583996093258321, iteration: 126199
loss: 0.9987651109695435,grad_norm: 0.9999993805771261, iteration: 126200
loss: 0.9803533554077148,grad_norm: 0.9999995318102899, iteration: 126201
loss: 0.9886291027069092,grad_norm: 0.9999991263490821, iteration: 126202
loss: 1.0830512046813965,grad_norm: 0.9999990797160228, iteration: 126203
loss: 1.3359841108322144,grad_norm: 0.9999996750334165, iteration: 126204
loss: 0.9800284504890442,grad_norm: 0.9469617391874033, iteration: 126205
loss: 0.9801002740859985,grad_norm: 0.9999996730400926, iteration: 126206
loss: 1.010529637336731,grad_norm: 0.8359271455839555, iteration: 126207
loss: 1.0005271434783936,grad_norm: 0.8716073207769863, iteration: 126208
loss: 1.1263004541397095,grad_norm: 0.9793871077321737, iteration: 126209
loss: 1.059657335281372,grad_norm: 0.9999999156257539, iteration: 126210
loss: 0.9957652688026428,grad_norm: 0.9685400342077707, iteration: 126211
loss: 0.9906593561172485,grad_norm: 0.9966311010719695, iteration: 126212
loss: 1.0305110216140747,grad_norm: 0.8127296281920283, iteration: 126213
loss: 1.0181643962860107,grad_norm: 0.960065059652979, iteration: 126214
loss: 1.2407474517822266,grad_norm: 0.999999600758052, iteration: 126215
loss: 1.0100995302200317,grad_norm: 0.9239880076084864, iteration: 126216
loss: 1.0246810913085938,grad_norm: 0.9999992976078281, iteration: 126217
loss: 1.0244636535644531,grad_norm: 0.9206849674701638, iteration: 126218
loss: 0.9875312447547913,grad_norm: 0.9999998852480523, iteration: 126219
loss: 1.0276349782943726,grad_norm: 0.8577942136134465, iteration: 126220
loss: 1.0116990804672241,grad_norm: 0.9999993834644324, iteration: 126221
loss: 0.9620116353034973,grad_norm: 0.9171933768572993, iteration: 126222
loss: 1.0534557104110718,grad_norm: 0.8968558251238608, iteration: 126223
loss: 1.0316307544708252,grad_norm: 0.9495679440735462, iteration: 126224
loss: 1.039135217666626,grad_norm: 0.8512163093210805, iteration: 126225
loss: 1.018303394317627,grad_norm: 0.8413563757004869, iteration: 126226
loss: 1.0159318447113037,grad_norm: 0.9326522823377835, iteration: 126227
loss: 1.033193826675415,grad_norm: 0.999999823538879, iteration: 126228
loss: 1.056963324546814,grad_norm: 0.9912506710328544, iteration: 126229
loss: 1.233694314956665,grad_norm: 0.9999999252532861, iteration: 126230
loss: 1.0962494611740112,grad_norm: 0.9999996966265248, iteration: 126231
loss: 1.0646698474884033,grad_norm: 0.9810623802309182, iteration: 126232
loss: 1.0174925327301025,grad_norm: 0.9999992589021496, iteration: 126233
loss: 1.0417747497558594,grad_norm: 0.9981656397967003, iteration: 126234
loss: 1.0186409950256348,grad_norm: 0.8347912530391011, iteration: 126235
loss: 1.0390127897262573,grad_norm: 0.9999995689629014, iteration: 126236
loss: 1.0291861295700073,grad_norm: 0.9999991671128594, iteration: 126237
loss: 1.0970345735549927,grad_norm: 0.9999991825461642, iteration: 126238
loss: 1.0082148313522339,grad_norm: 0.758182457614801, iteration: 126239
loss: 1.0810493230819702,grad_norm: 0.9999991599143829, iteration: 126240
loss: 0.9772983193397522,grad_norm: 0.999999038159335, iteration: 126241
loss: 0.9686645269393921,grad_norm: 0.8714412274340302, iteration: 126242
loss: 1.014717936515808,grad_norm: 0.9443329621085156, iteration: 126243
loss: 1.003488540649414,grad_norm: 0.8845744535029016, iteration: 126244
loss: 0.9987648725509644,grad_norm: 0.9999992471416255, iteration: 126245
loss: 1.035467505455017,grad_norm: 0.9999995771413531, iteration: 126246
loss: 1.0252583026885986,grad_norm: 0.9999992531682033, iteration: 126247
loss: 1.120835304260254,grad_norm: 0.9999994877396247, iteration: 126248
loss: 1.029762625694275,grad_norm: 0.9999995105737794, iteration: 126249
loss: 1.066786766052246,grad_norm: 0.9999992859682948, iteration: 126250
loss: 1.0390511751174927,grad_norm: 0.9999991718050764, iteration: 126251
loss: 1.0025030374526978,grad_norm: 0.8156079519133913, iteration: 126252
loss: 1.0257713794708252,grad_norm: 0.9999993218101318, iteration: 126253
loss: 1.0538040399551392,grad_norm: 0.845551156466616, iteration: 126254
loss: 1.0333112478256226,grad_norm: 0.9999994061719872, iteration: 126255
loss: 1.0611839294433594,grad_norm: 0.9999992550591614, iteration: 126256
loss: 1.003717064857483,grad_norm: 0.9999998768078484, iteration: 126257
loss: 1.0765788555145264,grad_norm: 0.9999995867596605, iteration: 126258
loss: 1.0714136362075806,grad_norm: 0.9999991319391747, iteration: 126259
loss: 1.0504494905471802,grad_norm: 0.999999204989248, iteration: 126260
loss: 1.0120019912719727,grad_norm: 0.9999993218902646, iteration: 126261
loss: 1.030555248260498,grad_norm: 0.9999990809746531, iteration: 126262
loss: 1.1643422842025757,grad_norm: 0.9999994030549724, iteration: 126263
loss: 1.0252610445022583,grad_norm: 0.9316950090546811, iteration: 126264
loss: 1.1003204584121704,grad_norm: 0.926684292969151, iteration: 126265
loss: 1.0875967741012573,grad_norm: 0.9999992069822475, iteration: 126266
loss: 1.081912875175476,grad_norm: 0.9765957921408392, iteration: 126267
loss: 1.0037082433700562,grad_norm: 0.9999993138049407, iteration: 126268
loss: 1.0973422527313232,grad_norm: 0.9999994326935018, iteration: 126269
loss: 1.1760143041610718,grad_norm: 0.9999997705201797, iteration: 126270
loss: 0.9856159687042236,grad_norm: 0.9729722638512, iteration: 126271
loss: 1.0451619625091553,grad_norm: 0.9999991990155375, iteration: 126272
loss: 1.0454628467559814,grad_norm: 0.9534192303507497, iteration: 126273
loss: 1.0616756677627563,grad_norm: 0.9768870653021522, iteration: 126274
loss: 1.1634808778762817,grad_norm: 0.999999872395253, iteration: 126275
loss: 1.0670088529586792,grad_norm: 0.999999109362842, iteration: 126276
loss: 1.0650635957717896,grad_norm: 0.9003437591594656, iteration: 126277
loss: 1.0823243856430054,grad_norm: 0.9999993144673164, iteration: 126278
loss: 1.0010278224945068,grad_norm: 0.8841669647090649, iteration: 126279
loss: 1.209839105606079,grad_norm: 0.9999991452676923, iteration: 126280
loss: 1.2341066598892212,grad_norm: 0.9999991437460461, iteration: 126281
loss: 1.0975021123886108,grad_norm: 0.9999996189986384, iteration: 126282
loss: 0.993459939956665,grad_norm: 0.9999992493188201, iteration: 126283
loss: 1.0247347354888916,grad_norm: 0.8513753144601272, iteration: 126284
loss: 1.0408138036727905,grad_norm: 0.9999996689617677, iteration: 126285
loss: 1.0421944856643677,grad_norm: 0.9999997134109744, iteration: 126286
loss: 1.231423258781433,grad_norm: 0.9999994171750464, iteration: 126287
loss: 1.1556336879730225,grad_norm: 0.9999995174958959, iteration: 126288
loss: 1.0760550498962402,grad_norm: 0.999999083912475, iteration: 126289
loss: 1.1825708150863647,grad_norm: 0.9117029095819911, iteration: 126290
loss: 1.008101463317871,grad_norm: 0.999999081281676, iteration: 126291
loss: 1.048577904701233,grad_norm: 0.9999993879542443, iteration: 126292
loss: 1.0216275453567505,grad_norm: 0.8897189329872421, iteration: 126293
loss: 1.142117977142334,grad_norm: 0.9999999079346811, iteration: 126294
loss: 1.0840853452682495,grad_norm: 0.9449325258116712, iteration: 126295
loss: 1.0638670921325684,grad_norm: 0.9131041617719894, iteration: 126296
loss: 1.1024391651153564,grad_norm: 0.999999833490856, iteration: 126297
loss: 1.1844013929367065,grad_norm: 0.9999998303788314, iteration: 126298
loss: 1.1848987340927124,grad_norm: 0.9999998889101468, iteration: 126299
loss: 1.1248564720153809,grad_norm: 0.9999991311475621, iteration: 126300
loss: 1.0555299520492554,grad_norm: 0.8145324026266387, iteration: 126301
loss: 1.023202657699585,grad_norm: 0.9999990099339848, iteration: 126302
loss: 1.1182841062545776,grad_norm: 0.999999441281817, iteration: 126303
loss: 1.089447021484375,grad_norm: 0.9999993947878439, iteration: 126304
loss: 1.2841618061065674,grad_norm: 0.9999998741959972, iteration: 126305
loss: 1.0751447677612305,grad_norm: 0.9999991709846835, iteration: 126306
loss: 1.2560588121414185,grad_norm: 1.000000004355151, iteration: 126307
loss: 1.0527126789093018,grad_norm: 0.9999991811679897, iteration: 126308
loss: 1.2985200881958008,grad_norm: 0.9999998839854426, iteration: 126309
loss: 1.0219449996948242,grad_norm: 0.9999992005394813, iteration: 126310
loss: 1.157992959022522,grad_norm: 0.9999997834206475, iteration: 126311
loss: 1.233210802078247,grad_norm: 0.9999994908609119, iteration: 126312
loss: 1.054076910018921,grad_norm: 0.9999991993152625, iteration: 126313
loss: 1.1490100622177124,grad_norm: 0.9999997661845205, iteration: 126314
loss: 1.2908164262771606,grad_norm: 0.999999412043191, iteration: 126315
loss: 0.9873126149177551,grad_norm: 0.9565690053586705, iteration: 126316
loss: 1.328970193862915,grad_norm: 0.9999995197961115, iteration: 126317
loss: 1.2206147909164429,grad_norm: 0.9999995986749199, iteration: 126318
loss: 1.1356172561645508,grad_norm: 0.9999997106686143, iteration: 126319
loss: 1.3075380325317383,grad_norm: 0.9999994328585488, iteration: 126320
loss: 1.228911280632019,grad_norm: 0.9999998342585324, iteration: 126321
loss: 1.080723762512207,grad_norm: 0.9999993591206346, iteration: 126322
loss: 1.0847426652908325,grad_norm: 0.9999993072015727, iteration: 126323
loss: 1.168992042541504,grad_norm: 0.9999995168378587, iteration: 126324
loss: 1.0408508777618408,grad_norm: 0.9999990927164636, iteration: 126325
loss: 1.0625461339950562,grad_norm: 0.9761818929641011, iteration: 126326
loss: 1.2318910360336304,grad_norm: 0.9999999401580468, iteration: 126327
loss: 1.1329374313354492,grad_norm: 0.9999993548374501, iteration: 126328
loss: 1.0221569538116455,grad_norm: 0.8348306822386917, iteration: 126329
loss: 1.0469520092010498,grad_norm: 0.9433325255233895, iteration: 126330
loss: 1.1011027097702026,grad_norm: 0.9999991394185147, iteration: 126331
loss: 1.0025992393493652,grad_norm: 0.9490014523506622, iteration: 126332
loss: 1.0818864107131958,grad_norm: 0.9999998882056743, iteration: 126333
loss: 1.0148056745529175,grad_norm: 0.9999992928958794, iteration: 126334
loss: 1.0525879859924316,grad_norm: 0.8458678935018079, iteration: 126335
loss: 1.050993800163269,grad_norm: 0.9999994189737088, iteration: 126336
loss: 0.9906753897666931,grad_norm: 0.9999992628488689, iteration: 126337
loss: 0.9883581399917603,grad_norm: 0.999999294984219, iteration: 126338
loss: 1.1549559831619263,grad_norm: 0.9999991346036036, iteration: 126339
loss: 1.1614564657211304,grad_norm: 0.9999998482553091, iteration: 126340
loss: 1.0373713970184326,grad_norm: 0.9999992477261995, iteration: 126341
loss: 1.0696405172348022,grad_norm: 0.9946391833361794, iteration: 126342
loss: 0.9976754188537598,grad_norm: 0.9999995037778843, iteration: 126343
loss: 1.2602289915084839,grad_norm: 0.9999995683151404, iteration: 126344
loss: 1.0773913860321045,grad_norm: 0.9999996002326017, iteration: 126345
loss: 1.012488842010498,grad_norm: 0.8870538633095554, iteration: 126346
loss: 1.0828148126602173,grad_norm: 0.7738683259685929, iteration: 126347
loss: 1.3756486177444458,grad_norm: 0.9999999961417034, iteration: 126348
loss: 1.0449615716934204,grad_norm: 0.8434745682525073, iteration: 126349
loss: 1.0648187398910522,grad_norm: 0.8233124907571922, iteration: 126350
loss: 1.277464747428894,grad_norm: 0.9999997596049908, iteration: 126351
loss: 1.2706226110458374,grad_norm: 0.9999998021666475, iteration: 126352
loss: 1.016035556793213,grad_norm: 0.951373471362025, iteration: 126353
loss: 1.2025045156478882,grad_norm: 0.9999998490148037, iteration: 126354
loss: 1.1104590892791748,grad_norm: 0.999999832407741, iteration: 126355
loss: 1.0112454891204834,grad_norm: 0.9349386327018783, iteration: 126356
loss: 1.099518060684204,grad_norm: 0.9999992209984542, iteration: 126357
loss: 1.1548504829406738,grad_norm: 0.9999995722630028, iteration: 126358
loss: 1.1516928672790527,grad_norm: 0.9999997857634346, iteration: 126359
loss: 1.0845834016799927,grad_norm: 0.9999990964587384, iteration: 126360
loss: 1.0126570463180542,grad_norm: 0.9999992371672143, iteration: 126361
loss: 1.1172693967819214,grad_norm: 0.9999991268893711, iteration: 126362
loss: 1.1339449882507324,grad_norm: 0.9999997098594436, iteration: 126363
loss: 1.1214169263839722,grad_norm: 1.0000001380739072, iteration: 126364
loss: 1.2546476125717163,grad_norm: 0.9999998615697064, iteration: 126365
loss: 1.008622407913208,grad_norm: 0.999999074780543, iteration: 126366
loss: 1.0931947231292725,grad_norm: 0.9999996096139364, iteration: 126367
loss: 1.12698233127594,grad_norm: 0.9999998294636481, iteration: 126368
loss: 1.1628941297531128,grad_norm: 0.9999996170720111, iteration: 126369
loss: 1.0496845245361328,grad_norm: 0.999999204775448, iteration: 126370
loss: 1.3161425590515137,grad_norm: 0.9999997360506279, iteration: 126371
loss: 1.0716811418533325,grad_norm: 0.9999993106216629, iteration: 126372
loss: 1.2202796936035156,grad_norm: 0.9999998542771804, iteration: 126373
loss: 1.284354567527771,grad_norm: 0.9999998465805224, iteration: 126374
loss: 1.1040902137756348,grad_norm: 0.9999994636674354, iteration: 126375
loss: 1.100140929222107,grad_norm: 0.9999991799882197, iteration: 126376
loss: 1.115370750427246,grad_norm: 0.9999997995742553, iteration: 126377
loss: 1.2151424884796143,grad_norm: 0.9999999783164079, iteration: 126378
loss: 1.1282960176467896,grad_norm: 0.999999375388728, iteration: 126379
loss: 1.125573992729187,grad_norm: 0.999999708996409, iteration: 126380
loss: 1.2958163022994995,grad_norm: 0.9999998250644045, iteration: 126381
loss: 1.1281589269638062,grad_norm: 0.9999995899014259, iteration: 126382
loss: 1.1719106435775757,grad_norm: 0.9999997537899994, iteration: 126383
loss: 1.0946236848831177,grad_norm: 0.9999994388839334, iteration: 126384
loss: 1.0629000663757324,grad_norm: 0.9999994495098042, iteration: 126385
loss: 1.1489191055297852,grad_norm: 1.0000000412105794, iteration: 126386
loss: 1.4204106330871582,grad_norm: 0.9999996558383786, iteration: 126387
loss: 1.3775321245193481,grad_norm: 0.9999996861324263, iteration: 126388
loss: 1.1677004098892212,grad_norm: 0.99999952134433, iteration: 126389
loss: 1.0074878931045532,grad_norm: 0.999999023182541, iteration: 126390
loss: 1.069978952407837,grad_norm: 0.9999995828979115, iteration: 126391
loss: 0.9929976463317871,grad_norm: 0.9999991793650082, iteration: 126392
loss: 1.2001157999038696,grad_norm: 0.9999993304518779, iteration: 126393
loss: 1.006981611251831,grad_norm: 0.8885089777453865, iteration: 126394
loss: 1.032713770866394,grad_norm: 0.9999989784733813, iteration: 126395
loss: 0.9650149345397949,grad_norm: 0.9051890359741411, iteration: 126396
loss: 1.3602887392044067,grad_norm: 0.9999999494805679, iteration: 126397
loss: 1.079318881034851,grad_norm: 0.9999992721422215, iteration: 126398
loss: 1.115726351737976,grad_norm: 0.9999999169531221, iteration: 126399
loss: 1.1069495677947998,grad_norm: 0.9999992831076078, iteration: 126400
loss: 1.1887016296386719,grad_norm: 0.9999997739598145, iteration: 126401
loss: 1.0691721439361572,grad_norm: 0.9999996012158173, iteration: 126402
loss: 1.2350082397460938,grad_norm: 0.9999996831653735, iteration: 126403
loss: 1.097631573677063,grad_norm: 0.9999992578925438, iteration: 126404
loss: 1.114789366722107,grad_norm: 0.9999995434949717, iteration: 126405
loss: 1.0246676206588745,grad_norm: 0.9999998936478223, iteration: 126406
loss: 1.027103066444397,grad_norm: 0.9999996890537309, iteration: 126407
loss: 1.1172592639923096,grad_norm: 0.9999995400763293, iteration: 126408
loss: 1.1409591436386108,grad_norm: 0.999999866155296, iteration: 126409
loss: 1.2150459289550781,grad_norm: 0.9999998233552696, iteration: 126410
loss: 1.182847261428833,grad_norm: 0.9999998671343786, iteration: 126411
loss: 1.1319974660873413,grad_norm: 0.9999998333890853, iteration: 126412
loss: 1.0693150758743286,grad_norm: 0.999999584579924, iteration: 126413
loss: 1.1452792882919312,grad_norm: 0.9999998983856626, iteration: 126414
loss: 1.126844882965088,grad_norm: 0.9999996228701894, iteration: 126415
loss: 1.0354039669036865,grad_norm: 0.999999055860334, iteration: 126416
loss: 1.051658272743225,grad_norm: 0.9999995821098548, iteration: 126417
loss: 1.054488182067871,grad_norm: 0.9999996092967371, iteration: 126418
loss: 1.1073156595230103,grad_norm: 0.9999994810432173, iteration: 126419
loss: 1.0707356929779053,grad_norm: 0.999999632548506, iteration: 126420
loss: 0.9941946268081665,grad_norm: 0.999999213063289, iteration: 126421
loss: 1.0156441926956177,grad_norm: 0.9999996471507601, iteration: 126422
loss: 1.062313437461853,grad_norm: 0.9999993255591316, iteration: 126423
loss: 1.1106544733047485,grad_norm: 0.9999995620985287, iteration: 126424
loss: 1.0390907526016235,grad_norm: 0.9999991746192308, iteration: 126425
loss: 1.010604977607727,grad_norm: 0.9657346178557483, iteration: 126426
loss: 1.0626176595687866,grad_norm: 0.9999991152341988, iteration: 126427
loss: 1.0742080211639404,grad_norm: 0.999999153429104, iteration: 126428
loss: 1.2092493772506714,grad_norm: 0.999999507584269, iteration: 126429
loss: 1.0462021827697754,grad_norm: 0.9999998753633991, iteration: 126430
loss: 1.121659755706787,grad_norm: 0.9999998100085429, iteration: 126431
loss: 1.2687517404556274,grad_norm: 0.9999999283756872, iteration: 126432
loss: 1.013232707977295,grad_norm: 0.9999992241926586, iteration: 126433
loss: 1.0203983783721924,grad_norm: 0.8563799187363633, iteration: 126434
loss: 1.0076488256454468,grad_norm: 0.999999038184464, iteration: 126435
loss: 1.0114529132843018,grad_norm: 0.9999994653391507, iteration: 126436
loss: 1.0221335887908936,grad_norm: 0.9999994438116672, iteration: 126437
loss: 1.0255227088928223,grad_norm: 0.9999990699387873, iteration: 126438
loss: 1.0617845058441162,grad_norm: 0.9999993172601522, iteration: 126439
loss: 1.0482755899429321,grad_norm: 0.9999990973312495, iteration: 126440
loss: 1.12320077419281,grad_norm: 0.9999996377128714, iteration: 126441
loss: 1.3044244050979614,grad_norm: 0.9999999300244643, iteration: 126442
loss: 1.1005655527114868,grad_norm: 0.9999996844503933, iteration: 126443
loss: 0.9918021559715271,grad_norm: 0.867304233905645, iteration: 126444
loss: 0.9976727962493896,grad_norm: 0.9274637947788487, iteration: 126445
loss: 1.0854219198226929,grad_norm: 0.930905336074652, iteration: 126446
loss: 1.2617098093032837,grad_norm: 0.9999999170785435, iteration: 126447
loss: 1.045079231262207,grad_norm: 0.999999006836633, iteration: 126448
loss: 1.0182785987854004,grad_norm: 0.7792739484293109, iteration: 126449
loss: 1.0665545463562012,grad_norm: 0.999999097669875, iteration: 126450
loss: 1.0231326818466187,grad_norm: 0.8707354078946273, iteration: 126451
loss: 0.983821451663971,grad_norm: 0.9999990140252629, iteration: 126452
loss: 1.014396071434021,grad_norm: 0.9599380780963815, iteration: 126453
loss: 0.9784368872642517,grad_norm: 0.8103238790156406, iteration: 126454
loss: 1.055408000946045,grad_norm: 0.9999993841904733, iteration: 126455
loss: 0.9698890447616577,grad_norm: 0.8331485407772176, iteration: 126456
loss: 1.0038437843322754,grad_norm: 0.9999992183086028, iteration: 126457
loss: 0.978239119052887,grad_norm: 0.9999990539690605, iteration: 126458
loss: 1.0095477104187012,grad_norm: 1.0000000150690844, iteration: 126459
loss: 1.1161460876464844,grad_norm: 0.9999996491767301, iteration: 126460
loss: 1.036812424659729,grad_norm: 0.9560385522090544, iteration: 126461
loss: 0.9502184987068176,grad_norm: 0.9380620637270759, iteration: 126462
loss: 1.0282237529754639,grad_norm: 0.9999996854816839, iteration: 126463
loss: 1.0379295349121094,grad_norm: 0.8902275779139761, iteration: 126464
loss: 1.0047922134399414,grad_norm: 0.9999991115718819, iteration: 126465
loss: 1.0659204721450806,grad_norm: 0.999999394863761, iteration: 126466
loss: 1.068371057510376,grad_norm: 1.0000000164668295, iteration: 126467
loss: 1.0445489883422852,grad_norm: 0.9999993237406037, iteration: 126468
loss: 0.9984695911407471,grad_norm: 0.9999997044352688, iteration: 126469
loss: 1.0122790336608887,grad_norm: 0.9999996641676212, iteration: 126470
loss: 0.9975113272666931,grad_norm: 0.999999089509942, iteration: 126471
loss: 0.9886031150817871,grad_norm: 0.777816210947784, iteration: 126472
loss: 1.0966497659683228,grad_norm: 0.9999996176956834, iteration: 126473
loss: 0.9963030219078064,grad_norm: 0.8844120920593682, iteration: 126474
loss: 0.972046434879303,grad_norm: 0.9722767140793384, iteration: 126475
loss: 1.1066169738769531,grad_norm: 0.9999998765782123, iteration: 126476
loss: 1.0626674890518188,grad_norm: 0.9999995026604345, iteration: 126477
loss: 0.9996354579925537,grad_norm: 0.8445623074387051, iteration: 126478
loss: 1.0583440065383911,grad_norm: 0.916229751973556, iteration: 126479
loss: 1.0298616886138916,grad_norm: 0.8859812806801405, iteration: 126480
loss: 1.0488619804382324,grad_norm: 0.9999990863631971, iteration: 126481
loss: 1.0148446559906006,grad_norm: 0.7253381089406046, iteration: 126482
loss: 1.0894140005111694,grad_norm: 0.9999994673164969, iteration: 126483
loss: 0.995914876461029,grad_norm: 0.9999990053930427, iteration: 126484
loss: 1.034143328666687,grad_norm: 0.9999996428360146, iteration: 126485
loss: 1.0181657075881958,grad_norm: 0.9545541807270167, iteration: 126486
loss: 0.9955419301986694,grad_norm: 1.0000000208199291, iteration: 126487
loss: 0.9779598116874695,grad_norm: 0.9166803130121909, iteration: 126488
loss: 1.0006369352340698,grad_norm: 0.9999991361872264, iteration: 126489
loss: 1.1523807048797607,grad_norm: 0.9999993854408352, iteration: 126490
loss: 1.0334357023239136,grad_norm: 0.9999997292712229, iteration: 126491
loss: 1.0530880689620972,grad_norm: 0.9999992372056896, iteration: 126492
loss: 0.9690638184547424,grad_norm: 0.9999991738634059, iteration: 126493
loss: 0.9854639768600464,grad_norm: 0.7599748301009257, iteration: 126494
loss: 1.004960298538208,grad_norm: 0.9999990333507156, iteration: 126495
loss: 1.005940556526184,grad_norm: 0.800815149362009, iteration: 126496
loss: 1.0609042644500732,grad_norm: 0.9999999533998434, iteration: 126497
loss: 1.0602649450302124,grad_norm: 0.8542062171799016, iteration: 126498
loss: 1.0554111003875732,grad_norm: 0.8763089092229224, iteration: 126499
loss: 1.1603831052780151,grad_norm: 0.9999992522719228, iteration: 126500
loss: 1.0197983980178833,grad_norm: 0.9999992574097104, iteration: 126501
loss: 1.061763882637024,grad_norm: 0.9999998565680042, iteration: 126502
loss: 0.9760103821754456,grad_norm: 0.9999996862661668, iteration: 126503
loss: 1.0026439428329468,grad_norm: 0.9999991745969868, iteration: 126504
loss: 1.0654752254486084,grad_norm: 0.8761035279950611, iteration: 126505
loss: 1.104699730873108,grad_norm: 0.8584363105105547, iteration: 126506
loss: 1.0986844301223755,grad_norm: 0.999999115516348, iteration: 126507
loss: 1.0550884008407593,grad_norm: 0.9999994156775184, iteration: 126508
loss: 1.0386918783187866,grad_norm: 0.9999992058385941, iteration: 126509
loss: 1.0703012943267822,grad_norm: 0.9999993282095199, iteration: 126510
loss: 0.9986734390258789,grad_norm: 0.9999998456395052, iteration: 126511
loss: 1.0116963386535645,grad_norm: 0.9999996592716472, iteration: 126512
loss: 1.2005112171173096,grad_norm: 0.9999998773933578, iteration: 126513
loss: 1.42153000831604,grad_norm: 0.9999997616777139, iteration: 126514
loss: 1.04868745803833,grad_norm: 0.9999993726681996, iteration: 126515
loss: 1.0405035018920898,grad_norm: 0.9089636079916167, iteration: 126516
loss: 0.9970259070396423,grad_norm: 0.9999994390864471, iteration: 126517
loss: 1.1832940578460693,grad_norm: 0.9999999478733692, iteration: 126518
loss: 1.0873819589614868,grad_norm: 0.9999991578082882, iteration: 126519
loss: 1.1796635389328003,grad_norm: 0.9999996341542008, iteration: 126520
loss: 1.275766372680664,grad_norm: 0.9999996313516831, iteration: 126521
loss: 1.1539369821548462,grad_norm: 0.9999997063127514, iteration: 126522
loss: 1.2512483596801758,grad_norm: 0.9999998073913157, iteration: 126523
loss: 1.1720798015594482,grad_norm: 0.9999998347414248, iteration: 126524
loss: 1.2044397592544556,grad_norm: 0.9999998169513251, iteration: 126525
loss: 0.9907827377319336,grad_norm: 0.9999993724223719, iteration: 126526
loss: 1.295291543006897,grad_norm: 0.9999998792581875, iteration: 126527
loss: 1.2036892175674438,grad_norm: 0.9999999323724823, iteration: 126528
loss: 1.0809733867645264,grad_norm: 0.9999990813984418, iteration: 126529
loss: 1.5136011838912964,grad_norm: 0.9999998556213447, iteration: 126530
loss: 1.110449194908142,grad_norm: 0.9999997678584425, iteration: 126531
loss: 1.2962855100631714,grad_norm: 0.9999997975389655, iteration: 126532
loss: 1.0499119758605957,grad_norm: 0.9999998710594774, iteration: 126533
loss: 1.0271762609481812,grad_norm: 0.9999995212847934, iteration: 126534
loss: 1.161291480064392,grad_norm: 0.99999987878392, iteration: 126535
loss: 1.5126526355743408,grad_norm: 0.9999997764329975, iteration: 126536
loss: 1.041538953781128,grad_norm: 0.9287145637322233, iteration: 126537
loss: 1.052602767944336,grad_norm: 0.8897414569875763, iteration: 126538
loss: 1.1316946744918823,grad_norm: 0.9999998721437727, iteration: 126539
loss: 1.2038112878799438,grad_norm: 0.9999998863292789, iteration: 126540
loss: 1.0986250638961792,grad_norm: 0.9999991242291164, iteration: 126541
loss: 1.113811731338501,grad_norm: 0.9999997782448765, iteration: 126542
loss: 1.062286376953125,grad_norm: 0.9999991079310242, iteration: 126543
loss: 1.0878655910491943,grad_norm: 0.9999998794241141, iteration: 126544
loss: 1.1849873065948486,grad_norm: 0.9999999948917041, iteration: 126545
loss: 1.258529543876648,grad_norm: 0.9999997861062243, iteration: 126546
loss: 1.1732527017593384,grad_norm: 0.9999996556753094, iteration: 126547
loss: 1.1590468883514404,grad_norm: 0.99999974721879, iteration: 126548
loss: 1.263624906539917,grad_norm: 0.9999998954063982, iteration: 126549
loss: 1.2069423198699951,grad_norm: 0.9999996675819935, iteration: 126550
loss: 1.2195991277694702,grad_norm: 0.9999994725949969, iteration: 126551
loss: 1.0638740062713623,grad_norm: 0.9999996246584667, iteration: 126552
loss: 1.5466008186340332,grad_norm: 0.9999996466305389, iteration: 126553
loss: 1.7505756616592407,grad_norm: 0.9999998597669637, iteration: 126554
loss: 1.1714818477630615,grad_norm: 0.9999995842559243, iteration: 126555
loss: 1.0860304832458496,grad_norm: 0.9999991179286933, iteration: 126556
loss: 1.1226307153701782,grad_norm: 0.9999998702658687, iteration: 126557
loss: 1.2924487590789795,grad_norm: 0.9999996772074543, iteration: 126558
loss: 1.7403655052185059,grad_norm: 0.999999891971981, iteration: 126559
loss: 1.1402387619018555,grad_norm: 0.9999997400283029, iteration: 126560
loss: 1.4960495233535767,grad_norm: 0.9999996837670276, iteration: 126561
loss: 1.2580373287200928,grad_norm: 0.9999997036920194, iteration: 126562
loss: 1.4083884954452515,grad_norm: 0.9999999098398432, iteration: 126563
loss: 1.3825793266296387,grad_norm: 0.9999996302920511, iteration: 126564
loss: 1.4337232112884521,grad_norm: 0.999999985590717, iteration: 126565
loss: 1.1949104070663452,grad_norm: 0.9999995703936023, iteration: 126566
loss: 1.52474045753479,grad_norm: 0.9999998150065608, iteration: 126567
loss: 1.2013227939605713,grad_norm: 0.9999998892543914, iteration: 126568
loss: 1.348690390586853,grad_norm: 0.9999999119982378, iteration: 126569
loss: 1.3885931968688965,grad_norm: 0.9999999830804904, iteration: 126570
loss: 1.4512323141098022,grad_norm: 0.999999967782829, iteration: 126571
loss: 1.3023509979248047,grad_norm: 0.9999999182096415, iteration: 126572
loss: 1.4358292818069458,grad_norm: 0.9999998311667497, iteration: 126573
loss: 1.1045327186584473,grad_norm: 0.999999851099394, iteration: 126574
loss: 1.4856973886489868,grad_norm: 0.9999997311829691, iteration: 126575
loss: 1.1755026578903198,grad_norm: 0.9999994719689976, iteration: 126576
loss: 1.127703309059143,grad_norm: 0.9999994428647011, iteration: 126577
loss: 1.248854160308838,grad_norm: 0.9999997363391187, iteration: 126578
loss: 1.609775424003601,grad_norm: 0.9999999110757086, iteration: 126579
loss: 1.3068915605545044,grad_norm: 0.9999996982468259, iteration: 126580
loss: 1.3659685850143433,grad_norm: 0.9999996992849224, iteration: 126581
loss: 1.1927214860916138,grad_norm: 1.0000000200242327, iteration: 126582
loss: 1.3079625368118286,grad_norm: 0.9999999377406286, iteration: 126583
loss: 1.4043350219726562,grad_norm: 0.9999995281520981, iteration: 126584
loss: 1.404668927192688,grad_norm: 0.9999997692165955, iteration: 126585
loss: 1.3294757604599,grad_norm: 0.9999999339081299, iteration: 126586
loss: 1.2903051376342773,grad_norm: 0.999999599263365, iteration: 126587
loss: 1.3685311079025269,grad_norm: 0.999999902981996, iteration: 126588
loss: 1.2296360731124878,grad_norm: 0.9999995132887204, iteration: 126589
loss: 1.202391266822815,grad_norm: 0.9999999086638393, iteration: 126590
loss: 1.2970644235610962,grad_norm: 0.9999997565839336, iteration: 126591
loss: 1.3798569440841675,grad_norm: 0.9999997838407804, iteration: 126592
loss: 1.0703221559524536,grad_norm: 0.9999994944863464, iteration: 126593
loss: 1.1779252290725708,grad_norm: 0.9999998325492999, iteration: 126594
loss: 1.2590725421905518,grad_norm: 0.9999997253114024, iteration: 126595
loss: 1.3814359903335571,grad_norm: 0.9999999305638595, iteration: 126596
loss: 1.104471206665039,grad_norm: 0.9999997637464493, iteration: 126597
loss: 1.2582274675369263,grad_norm: 0.9999998243115313, iteration: 126598
loss: 1.113703727722168,grad_norm: 0.9999997707766125, iteration: 126599
loss: 1.3434282541275024,grad_norm: 0.999999855221382, iteration: 126600
loss: 1.099179983139038,grad_norm: 0.9999996732843411, iteration: 126601
loss: 1.326323390007019,grad_norm: 0.9999999896360515, iteration: 126602
loss: 1.299009084701538,grad_norm: 0.9999998945058306, iteration: 126603
loss: 1.2325236797332764,grad_norm: 0.9999997761947251, iteration: 126604
loss: 1.2714678049087524,grad_norm: 0.9999996349592143, iteration: 126605
loss: 1.3884103298187256,grad_norm: 0.9999999312019895, iteration: 126606
loss: 1.1400959491729736,grad_norm: 0.9999991594512715, iteration: 126607
loss: 1.1300221681594849,grad_norm: 0.9999998200265751, iteration: 126608
loss: 1.2170456647872925,grad_norm: 0.9999995679635246, iteration: 126609
loss: 1.050499677658081,grad_norm: 0.9999994118977714, iteration: 126610
loss: 1.0709148645401,grad_norm: 0.9999994313726183, iteration: 126611
loss: 1.07284414768219,grad_norm: 0.9999997357904342, iteration: 126612
loss: 1.0515087842941284,grad_norm: 0.9999990564839578, iteration: 126613
loss: 1.0665645599365234,grad_norm: 0.999999559118679, iteration: 126614
loss: 1.1927344799041748,grad_norm: 0.9999999171995969, iteration: 126615
loss: 1.086439847946167,grad_norm: 0.9999994477211418, iteration: 126616
loss: 1.1626724004745483,grad_norm: 0.9999999612230548, iteration: 126617
loss: 1.0585596561431885,grad_norm: 0.9999992281779271, iteration: 126618
loss: 1.0797470808029175,grad_norm: 0.9999994505593555, iteration: 126619
loss: 1.1096798181533813,grad_norm: 0.9999994768484907, iteration: 126620
loss: 1.297060251235962,grad_norm: 0.9999998258707903, iteration: 126621
loss: 1.1808682680130005,grad_norm: 0.9999994296797244, iteration: 126622
loss: 1.1734312772750854,grad_norm: 0.9999994696022313, iteration: 126623
loss: 1.0233670473098755,grad_norm: 0.9999992779862261, iteration: 126624
loss: 1.0874093770980835,grad_norm: 0.9999992585422245, iteration: 126625
loss: 1.1190171241760254,grad_norm: 0.9999996495838016, iteration: 126626
loss: 1.0862557888031006,grad_norm: 0.9999996197970197, iteration: 126627
loss: 1.0907062292099,grad_norm: 0.9999993850614313, iteration: 126628
loss: 1.0836200714111328,grad_norm: 0.9999992849625787, iteration: 126629
loss: 1.13851797580719,grad_norm: 0.9999997135154062, iteration: 126630
loss: 1.105838418006897,grad_norm: 0.9999996436375692, iteration: 126631
loss: 1.0981929302215576,grad_norm: 0.9999993159940433, iteration: 126632
loss: 1.354616641998291,grad_norm: 0.9999998412747431, iteration: 126633
loss: 1.0877727270126343,grad_norm: 0.9999991623356734, iteration: 126634
loss: 1.173546552658081,grad_norm: 0.9999998189637322, iteration: 126635
loss: 1.113718032836914,grad_norm: 0.9999998666175896, iteration: 126636
loss: 1.174365520477295,grad_norm: 0.9999996922696076, iteration: 126637
loss: 1.058535099029541,grad_norm: 0.9999991532512197, iteration: 126638
loss: 1.0875909328460693,grad_norm: 0.9999996221314885, iteration: 126639
loss: 1.1797682046890259,grad_norm: 0.999999691194944, iteration: 126640
loss: 1.223300576210022,grad_norm: 0.9999998354746423, iteration: 126641
loss: 1.3084876537322998,grad_norm: 0.9999998641019593, iteration: 126642
loss: 1.2973915338516235,grad_norm: 0.999999632720605, iteration: 126643
loss: 1.0533032417297363,grad_norm: 0.9999991475827782, iteration: 126644
loss: 1.134171962738037,grad_norm: 0.9999997874936819, iteration: 126645
loss: 1.0177085399627686,grad_norm: 0.9999992845606984, iteration: 126646
loss: 1.1408774852752686,grad_norm: 0.9999997185052415, iteration: 126647
loss: 1.0757534503936768,grad_norm: 0.9999994344412467, iteration: 126648
loss: 1.1714407205581665,grad_norm: 0.9999996192811093, iteration: 126649
loss: 1.262553095817566,grad_norm: 0.9999998390870628, iteration: 126650
loss: 1.0850008726119995,grad_norm: 0.9999994741848968, iteration: 126651
loss: 1.339079737663269,grad_norm: 0.999999648210127, iteration: 126652
loss: 1.0767490863800049,grad_norm: 0.9999997318101661, iteration: 126653
loss: 1.2035558223724365,grad_norm: 0.9999995357759468, iteration: 126654
loss: 1.0514506101608276,grad_norm: 0.999999684603303, iteration: 126655
loss: 1.0347486734390259,grad_norm: 0.9999998361205764, iteration: 126656
loss: 1.077528953552246,grad_norm: 0.9999999555710154, iteration: 126657
loss: 1.270221471786499,grad_norm: 0.999999656457462, iteration: 126658
loss: 1.113000512123108,grad_norm: 0.9999999310638594, iteration: 126659
loss: 1.058206558227539,grad_norm: 0.9999992484195142, iteration: 126660
loss: 1.1884725093841553,grad_norm: 0.999999428174048, iteration: 126661
loss: 1.0511590242385864,grad_norm: 0.9817416864469, iteration: 126662
loss: 1.0831890106201172,grad_norm: 0.9999996784671374, iteration: 126663
loss: 1.0806313753128052,grad_norm: 0.9999998078736974, iteration: 126664
loss: 1.2064450979232788,grad_norm: 0.999999467611388, iteration: 126665
loss: 1.1653976440429688,grad_norm: 0.9999996355617721, iteration: 126666
loss: 1.0163542032241821,grad_norm: 0.8498226455195813, iteration: 126667
loss: 1.121749758720398,grad_norm: 0.999999924188897, iteration: 126668
loss: 1.0638328790664673,grad_norm: 0.9999992680177175, iteration: 126669
loss: 1.1538645029067993,grad_norm: 0.9999997931701171, iteration: 126670
loss: 0.9882717132568359,grad_norm: 0.8528135634953048, iteration: 126671
loss: 1.1171481609344482,grad_norm: 0.9999996497638068, iteration: 126672
loss: 1.2303199768066406,grad_norm: 0.9999996907393132, iteration: 126673
loss: 1.1516597270965576,grad_norm: 0.9999996709917537, iteration: 126674
loss: 1.0471588373184204,grad_norm: 0.9999992707686645, iteration: 126675
loss: 1.2241898775100708,grad_norm: 0.9999997956548455, iteration: 126676
loss: 1.009686827659607,grad_norm: 0.9086681208496663, iteration: 126677
loss: 1.1944838762283325,grad_norm: 0.9999995137679013, iteration: 126678
loss: 1.0851211547851562,grad_norm: 0.9999998115164965, iteration: 126679
loss: 1.1384549140930176,grad_norm: 0.9999997539831414, iteration: 126680
loss: 1.3480859994888306,grad_norm: 0.9999998168853874, iteration: 126681
loss: 1.0440795421600342,grad_norm: 0.9999992771104511, iteration: 126682
loss: 1.0811982154846191,grad_norm: 0.9999997399994686, iteration: 126683
loss: 1.034635305404663,grad_norm: 0.9999991930966126, iteration: 126684
loss: 1.0126985311508179,grad_norm: 0.9999996756697588, iteration: 126685
loss: 1.1091012954711914,grad_norm: 0.9999998065389597, iteration: 126686
loss: 0.98187255859375,grad_norm: 0.9999993840466458, iteration: 126687
loss: 1.0284695625305176,grad_norm: 0.9999992597986512, iteration: 126688
loss: 1.1108927726745605,grad_norm: 0.9999998494628672, iteration: 126689
loss: 1.0591671466827393,grad_norm: 0.9999993855975139, iteration: 126690
loss: 1.152337670326233,grad_norm: 0.9999999631669737, iteration: 126691
loss: 1.0224902629852295,grad_norm: 0.9542533578566662, iteration: 126692
loss: 1.0270423889160156,grad_norm: 0.9999990978601024, iteration: 126693
loss: 1.0009808540344238,grad_norm: 0.9999996041798845, iteration: 126694
loss: 0.9731977581977844,grad_norm: 0.9999999183544344, iteration: 126695
loss: 1.0505118370056152,grad_norm: 0.9999990766287369, iteration: 126696
loss: 1.0728617906570435,grad_norm: 0.9999998761744278, iteration: 126697
loss: 1.0136457681655884,grad_norm: 0.9116185516293415, iteration: 126698
loss: 1.0206427574157715,grad_norm: 0.9999995791159637, iteration: 126699
loss: 1.0660449266433716,grad_norm: 0.9999990878377846, iteration: 126700
loss: 1.090153455734253,grad_norm: 0.9999994552636063, iteration: 126701
loss: 1.0401740074157715,grad_norm: 0.9691849294382756, iteration: 126702
loss: 1.0110867023468018,grad_norm: 0.9999996180567164, iteration: 126703
loss: 1.023443579673767,grad_norm: 0.9894448905637544, iteration: 126704
loss: 1.0145820379257202,grad_norm: 0.9999141780538671, iteration: 126705
loss: 1.092491865158081,grad_norm: 0.9999999725260883, iteration: 126706
loss: 1.0668244361877441,grad_norm: 0.9999994697854019, iteration: 126707
loss: 1.0207602977752686,grad_norm: 0.9999992147236197, iteration: 126708
loss: 1.0768741369247437,grad_norm: 0.9999995559117594, iteration: 126709
loss: 1.1537349224090576,grad_norm: 0.999999750003526, iteration: 126710
loss: 1.1353667974472046,grad_norm: 0.9999997086826444, iteration: 126711
loss: 1.0165232419967651,grad_norm: 0.8835932068081757, iteration: 126712
loss: 1.400599718093872,grad_norm: 0.999999918173033, iteration: 126713
loss: 1.127764105796814,grad_norm: 0.9999996364121087, iteration: 126714
loss: 0.9890279173851013,grad_norm: 0.9999990123507462, iteration: 126715
loss: 1.055203914642334,grad_norm: 0.9999991353031706, iteration: 126716
loss: 1.1315866708755493,grad_norm: 0.9999995045506469, iteration: 126717
loss: 1.2770034074783325,grad_norm: 1.000000072078997, iteration: 126718
loss: 0.9917288422584534,grad_norm: 0.7529651436326322, iteration: 126719
loss: 1.0268930196762085,grad_norm: 0.9999990667346798, iteration: 126720
loss: 0.9838539958000183,grad_norm: 0.9999992449602002, iteration: 126721
loss: 1.0167444944381714,grad_norm: 0.999999038941185, iteration: 126722
loss: 1.0929174423217773,grad_norm: 0.9999991531216721, iteration: 126723
loss: 1.008431315422058,grad_norm: 0.9999996465549064, iteration: 126724
loss: 1.0192207098007202,grad_norm: 0.9999990325770556, iteration: 126725
loss: 0.9930130839347839,grad_norm: 0.9999991216225108, iteration: 126726
loss: 1.1203142404556274,grad_norm: 0.999999153191533, iteration: 126727
loss: 1.0609017610549927,grad_norm: 0.9999999401791386, iteration: 126728
loss: 1.014254093170166,grad_norm: 0.9041775850790461, iteration: 126729
loss: 1.0177451372146606,grad_norm: 0.8890039867286404, iteration: 126730
loss: 1.0457245111465454,grad_norm: 0.9999992797636165, iteration: 126731
loss: 1.0084881782531738,grad_norm: 0.7736601088852875, iteration: 126732
loss: 1.0555254220962524,grad_norm: 0.999999188544261, iteration: 126733
loss: 1.1949961185455322,grad_norm: 0.9999991635030319, iteration: 126734
loss: 0.9941484332084656,grad_norm: 0.8415817916379404, iteration: 126735
loss: 1.1471415758132935,grad_norm: 0.9999996515781528, iteration: 126736
loss: 1.024402379989624,grad_norm: 0.999999030410616, iteration: 126737
loss: 1.0011037588119507,grad_norm: 0.7985984517427951, iteration: 126738
loss: 1.0017396211624146,grad_norm: 0.9999993359496133, iteration: 126739
loss: 1.0074505805969238,grad_norm: 0.9999999150958658, iteration: 126740
loss: 1.0326184034347534,grad_norm: 0.9999992685638701, iteration: 126741
loss: 1.0073763132095337,grad_norm: 0.7708594913799615, iteration: 126742
loss: 1.0630762577056885,grad_norm: 0.9999998674326387, iteration: 126743
loss: 1.0321314334869385,grad_norm: 0.9999995211789133, iteration: 126744
loss: 1.087339162826538,grad_norm: 0.9999992740865242, iteration: 126745
loss: 1.1071155071258545,grad_norm: 0.9999999360196735, iteration: 126746
loss: 1.128963589668274,grad_norm: 0.9999990157257941, iteration: 126747
loss: 0.9950602054595947,grad_norm: 0.9999998492716172, iteration: 126748
loss: 1.340818166732788,grad_norm: 0.9999993906687998, iteration: 126749
loss: 1.0059294700622559,grad_norm: 0.9999997658469537, iteration: 126750
loss: 1.0437949895858765,grad_norm: 0.9999994907465518, iteration: 126751
loss: 1.115904688835144,grad_norm: 0.9999998724287897, iteration: 126752
loss: 0.9978277683258057,grad_norm: 0.8975946569893153, iteration: 126753
loss: 0.9612842798233032,grad_norm: 0.9078118795390976, iteration: 126754
loss: 1.067987084388733,grad_norm: 0.9999992496767217, iteration: 126755
loss: 1.0269298553466797,grad_norm: 0.9807925756104497, iteration: 126756
loss: 1.058732509613037,grad_norm: 0.9706417856531899, iteration: 126757
loss: 0.9758405685424805,grad_norm: 0.9999990798599612, iteration: 126758
loss: 0.961621105670929,grad_norm: 0.8710313510471341, iteration: 126759
loss: 1.0083948373794556,grad_norm: 0.9289324751006854, iteration: 126760
loss: 1.106534719467163,grad_norm: 0.9999997914542313, iteration: 126761
loss: 1.0962995290756226,grad_norm: 0.9999993266040603, iteration: 126762
loss: 1.068884015083313,grad_norm: 0.9999998127640366, iteration: 126763
loss: 0.9983556866645813,grad_norm: 0.9999999895578182, iteration: 126764
loss: 0.9941151738166809,grad_norm: 0.9108361661326799, iteration: 126765
loss: 1.0442999601364136,grad_norm: 0.9999994327281373, iteration: 126766
loss: 1.0765862464904785,grad_norm: 0.9999999481534937, iteration: 126767
loss: 1.1239734888076782,grad_norm: 0.9999998836719215, iteration: 126768
loss: 1.1045187711715698,grad_norm: 0.9068654669322208, iteration: 126769
loss: 1.0083391666412354,grad_norm: 0.8426974325099273, iteration: 126770
loss: 1.0577958822250366,grad_norm: 0.9360880627873723, iteration: 126771
loss: 1.005717158317566,grad_norm: 0.9662626938725222, iteration: 126772
loss: 1.0107462406158447,grad_norm: 0.9999995189259043, iteration: 126773
loss: 1.0614454746246338,grad_norm: 0.9999998418708775, iteration: 126774
loss: 1.1044400930404663,grad_norm: 0.9999991035664786, iteration: 126775
loss: 1.1159881353378296,grad_norm: 0.9999998380213018, iteration: 126776
loss: 0.9961265325546265,grad_norm: 0.9999997233506496, iteration: 126777
loss: 0.9899725914001465,grad_norm: 0.8682501663871008, iteration: 126778
loss: 1.0111829042434692,grad_norm: 0.9999998422052784, iteration: 126779
loss: 1.069245457649231,grad_norm: 0.9999993439614834, iteration: 126780
loss: 1.0878784656524658,grad_norm: 0.9999993817816221, iteration: 126781
loss: 1.0096770524978638,grad_norm: 0.8555779960052246, iteration: 126782
loss: 1.0491094589233398,grad_norm: 0.9999990503927868, iteration: 126783
loss: 1.0832592248916626,grad_norm: 0.9999996908923787, iteration: 126784
loss: 1.0766547918319702,grad_norm: 0.9999997937998805, iteration: 126785
loss: 1.0953253507614136,grad_norm: 0.9999995237486466, iteration: 126786
loss: 1.0348092317581177,grad_norm: 0.9999997142983539, iteration: 126787
loss: 1.0545545816421509,grad_norm: 0.9999994817678565, iteration: 126788
loss: 1.0773632526397705,grad_norm: 0.9999995217109435, iteration: 126789
loss: 1.0358521938323975,grad_norm: 0.999999359001547, iteration: 126790
loss: 0.9981443881988525,grad_norm: 0.9999992911815622, iteration: 126791
loss: 1.0123448371887207,grad_norm: 0.9999999507758669, iteration: 126792
loss: 1.0040271282196045,grad_norm: 0.999999228380939, iteration: 126793
loss: 0.9833393692970276,grad_norm: 0.9577077155519503, iteration: 126794
loss: 1.0147875547409058,grad_norm: 0.9836873514242395, iteration: 126795
loss: 1.0170191526412964,grad_norm: 0.9999992088390182, iteration: 126796
loss: 1.0355066061019897,grad_norm: 0.9999992575774339, iteration: 126797
loss: 0.9950700402259827,grad_norm: 0.9999999305579025, iteration: 126798
loss: 1.0585789680480957,grad_norm: 0.9999991201694504, iteration: 126799
loss: 1.403544306755066,grad_norm: 0.9999995999843533, iteration: 126800
loss: 1.1000611782073975,grad_norm: 0.9999997074926076, iteration: 126801
loss: 0.9825052618980408,grad_norm: 1.0000000002432499, iteration: 126802
loss: 0.9586285948753357,grad_norm: 0.8582121772665351, iteration: 126803
loss: 1.032697319984436,grad_norm: 0.9999999159411946, iteration: 126804
loss: 1.075802206993103,grad_norm: 1.0000000086255674, iteration: 126805
loss: 1.0959128141403198,grad_norm: 0.9999996773632228, iteration: 126806
loss: 1.3611772060394287,grad_norm: 0.9999998920641388, iteration: 126807
loss: 1.3924106359481812,grad_norm: 1.0000000524746877, iteration: 126808
loss: 1.0600439310073853,grad_norm: 0.9999998500175935, iteration: 126809
loss: 1.1248375177383423,grad_norm: 0.99999952257883, iteration: 126810
loss: 0.9876344799995422,grad_norm: 0.9999990770726386, iteration: 126811
loss: 0.9957572817802429,grad_norm: 0.918514132675781, iteration: 126812
loss: 1.0224273204803467,grad_norm: 0.999999358938245, iteration: 126813
loss: 1.0556834936141968,grad_norm: 0.9999994228751279, iteration: 126814
loss: 1.0095579624176025,grad_norm: 0.8251157829134127, iteration: 126815
loss: 1.4390760660171509,grad_norm: 0.9999998556726912, iteration: 126816
loss: 0.9774231910705566,grad_norm: 0.9999993443537911, iteration: 126817
loss: 1.026096224784851,grad_norm: 0.9999994194302664, iteration: 126818
loss: 1.0876821279525757,grad_norm: 0.8820559499029872, iteration: 126819
loss: 1.0344047546386719,grad_norm: 0.9999999173227545, iteration: 126820
loss: 0.9619049429893494,grad_norm: 0.9999993395806156, iteration: 126821
loss: 1.0831228494644165,grad_norm: 0.9999994703918124, iteration: 126822
loss: 1.1027822494506836,grad_norm: 0.9999994694833163, iteration: 126823
loss: 0.9665504693984985,grad_norm: 0.9034346882719528, iteration: 126824
loss: 1.0353387594223022,grad_norm: 0.9999994441908471, iteration: 126825
loss: 1.1900039911270142,grad_norm: 0.9999998478712943, iteration: 126826
loss: 0.9875437617301941,grad_norm: 0.8845600433672975, iteration: 126827
loss: 1.011010766029358,grad_norm: 0.9999999376949004, iteration: 126828
loss: 0.9696803689002991,grad_norm: 0.9999992341426669, iteration: 126829
loss: 1.0818266868591309,grad_norm: 0.9999990313438952, iteration: 126830
loss: 0.9988703727722168,grad_norm: 0.9691041115857281, iteration: 126831
loss: 1.2063617706298828,grad_norm: 0.9999993487020232, iteration: 126832
loss: 0.9885808825492859,grad_norm: 0.7388549285453262, iteration: 126833
loss: 0.9700598120689392,grad_norm: 0.9999995426805981, iteration: 126834
loss: 1.0278791189193726,grad_norm: 0.9771185775252924, iteration: 126835
loss: 1.0028557777404785,grad_norm: 0.9999995657857428, iteration: 126836
loss: 1.1558436155319214,grad_norm: 0.9999996633515679, iteration: 126837
loss: 1.0947232246398926,grad_norm: 0.9535891832550354, iteration: 126838
loss: 1.1133722066879272,grad_norm: 0.9999992003350876, iteration: 126839
loss: 1.1370257139205933,grad_norm: 0.9999992354930918, iteration: 126840
loss: 1.0399518013000488,grad_norm: 0.9999994873093737, iteration: 126841
loss: 1.1218198537826538,grad_norm: 0.9999995172156994, iteration: 126842
loss: 1.048335075378418,grad_norm: 0.9999999375842616, iteration: 126843
loss: 1.1673824787139893,grad_norm: 0.999999559527759, iteration: 126844
loss: 1.1628468036651611,grad_norm: 0.9999998227043263, iteration: 126845
loss: 1.2293736934661865,grad_norm: 0.9999998393057921, iteration: 126846
loss: 1.217410683631897,grad_norm: 1.000000100133734, iteration: 126847
loss: 1.0488537549972534,grad_norm: 0.9999994413154227, iteration: 126848
loss: 1.1446462869644165,grad_norm: 0.9999998988311555, iteration: 126849
loss: 1.0176583528518677,grad_norm: 0.9999993533944662, iteration: 126850
loss: 1.0641971826553345,grad_norm: 0.9999991499407352, iteration: 126851
loss: 0.9938496947288513,grad_norm: 0.8421239775431444, iteration: 126852
loss: 0.9933871626853943,grad_norm: 0.9999990569913306, iteration: 126853
loss: 1.1041042804718018,grad_norm: 0.999999519651984, iteration: 126854
loss: 1.0435616970062256,grad_norm: 0.9999993820292548, iteration: 126855
loss: 1.0265110731124878,grad_norm: 0.9108844913221487, iteration: 126856
loss: 1.0339809656143188,grad_norm: 0.8058090614393134, iteration: 126857
loss: 1.0010402202606201,grad_norm: 0.9191536702634788, iteration: 126858
loss: 1.014562964439392,grad_norm: 0.8250551897576937, iteration: 126859
loss: 1.0305289030075073,grad_norm: 0.8366048978959023, iteration: 126860
loss: 1.0530412197113037,grad_norm: 0.999999197235441, iteration: 126861
loss: 1.006914496421814,grad_norm: 0.9999993764363505, iteration: 126862
loss: 1.071615219116211,grad_norm: 0.9999991855200778, iteration: 126863
loss: 0.989679753780365,grad_norm: 0.7924164146880688, iteration: 126864
loss: 1.0337988138198853,grad_norm: 0.9999994580666574, iteration: 126865
loss: 1.0150587558746338,grad_norm: 0.7969467528563162, iteration: 126866
loss: 1.029712200164795,grad_norm: 0.839434119344018, iteration: 126867
loss: 1.0122665166854858,grad_norm: 0.9999994378976336, iteration: 126868
loss: 1.0140398740768433,grad_norm: 0.9999992534829646, iteration: 126869
loss: 1.0530403852462769,grad_norm: 0.8381192051193602, iteration: 126870
loss: 1.0056575536727905,grad_norm: 0.9043463377905779, iteration: 126871
loss: 1.021475076675415,grad_norm: 0.7306924091145752, iteration: 126872
loss: 1.2060258388519287,grad_norm: 0.999999697807555, iteration: 126873
loss: 1.0253736972808838,grad_norm: 0.9130539176366663, iteration: 126874
loss: 1.1107730865478516,grad_norm: 0.9999999014814647, iteration: 126875
loss: 1.0262607336044312,grad_norm: 0.9999995426982409, iteration: 126876
loss: 1.0103563070297241,grad_norm: 0.9769822506780489, iteration: 126877
loss: 0.9998717904090881,grad_norm: 0.9999990419960854, iteration: 126878
loss: 0.9884054064750671,grad_norm: 0.7443835977398021, iteration: 126879
loss: 0.9577419757843018,grad_norm: 0.9999991530106044, iteration: 126880
loss: 0.9972395896911621,grad_norm: 0.8563138721893279, iteration: 126881
loss: 0.9793990254402161,grad_norm: 0.8399586083612331, iteration: 126882
loss: 1.0098570585250854,grad_norm: 0.8748715519478216, iteration: 126883
loss: 0.9811173677444458,grad_norm: 0.9893982271280791, iteration: 126884
loss: 1.0410808324813843,grad_norm: 0.9999998805486072, iteration: 126885
loss: 1.062487244606018,grad_norm: 0.8401259685160621, iteration: 126886
loss: 1.1130177974700928,grad_norm: 0.9999994473837734, iteration: 126887
loss: 1.109621524810791,grad_norm: 0.9999991573878623, iteration: 126888
loss: 1.0174514055252075,grad_norm: 0.7688661372568706, iteration: 126889
loss: 1.0493085384368896,grad_norm: 0.9999992426755372, iteration: 126890
loss: 1.4117751121520996,grad_norm: 0.9999995871620908, iteration: 126891
loss: 0.9630926251411438,grad_norm: 0.9999990695594924, iteration: 126892
loss: 0.9943187832832336,grad_norm: 0.9999990826035787, iteration: 126893
loss: 1.0221658945083618,grad_norm: 0.8629106120857204, iteration: 126894
loss: 1.1326195001602173,grad_norm: 0.9999992297602455, iteration: 126895
loss: 1.3320324420928955,grad_norm: 0.9999999152287435, iteration: 126896
loss: 1.0152400732040405,grad_norm: 0.8038882456503341, iteration: 126897
loss: 0.9670324921607971,grad_norm: 0.719958150276241, iteration: 126898
loss: 1.0153355598449707,grad_norm: 0.9999998389079224, iteration: 126899
loss: 1.010558843612671,grad_norm: 0.8113872128287059, iteration: 126900
loss: 1.0279730558395386,grad_norm: 0.9999991152174115, iteration: 126901
loss: 1.136982798576355,grad_norm: 0.9999995043149227, iteration: 126902
loss: 0.9952324032783508,grad_norm: 0.8886356215642375, iteration: 126903
loss: 1.0125036239624023,grad_norm: 0.999999921275013, iteration: 126904
loss: 1.0716489553451538,grad_norm: 0.9999999621880564, iteration: 126905
loss: 0.9742957353591919,grad_norm: 0.9999993718224486, iteration: 126906
loss: 1.0978307723999023,grad_norm: 0.9737233973478292, iteration: 126907
loss: 1.003751277923584,grad_norm: 0.9999994317497872, iteration: 126908
loss: 1.0475103855133057,grad_norm: 0.9999992878824768, iteration: 126909
loss: 1.0943236351013184,grad_norm: 0.9999995563228943, iteration: 126910
loss: 1.0111422538757324,grad_norm: 0.911116444390125, iteration: 126911
loss: 1.031020998954773,grad_norm: 0.9999991405730049, iteration: 126912
loss: 1.008628487586975,grad_norm: 0.8505045662677503, iteration: 126913
loss: 0.9971548318862915,grad_norm: 0.9999990641497081, iteration: 126914
loss: 1.008284091949463,grad_norm: 0.9999999254343453, iteration: 126915
loss: 1.1158108711242676,grad_norm: 0.8526016503947124, iteration: 126916
loss: 1.0319209098815918,grad_norm: 0.9999993644674362, iteration: 126917
loss: 0.9924524426460266,grad_norm: 0.9906861837779494, iteration: 126918
loss: 1.047072410583496,grad_norm: 0.9999998307611438, iteration: 126919
loss: 1.1218063831329346,grad_norm: 0.9999995939313067, iteration: 126920
loss: 1.0541484355926514,grad_norm: 0.9999997846094643, iteration: 126921
loss: 1.0198849439620972,grad_norm: 0.9999990424660375, iteration: 126922
loss: 1.0858707427978516,grad_norm: 0.9999998788524335, iteration: 126923
loss: 0.9812593460083008,grad_norm: 0.9999994572816325, iteration: 126924
loss: 1.027983546257019,grad_norm: 0.9317593166540611, iteration: 126925
loss: 0.9792413115501404,grad_norm: 0.9999996462431215, iteration: 126926
loss: 1.073756217956543,grad_norm: 0.9999993733400636, iteration: 126927
loss: 1.0600316524505615,grad_norm: 0.9999992293274546, iteration: 126928
loss: 1.026504397392273,grad_norm: 0.9999993758468589, iteration: 126929
loss: 1.1877249479293823,grad_norm: 0.999999393962366, iteration: 126930
loss: 1.0732189416885376,grad_norm: 0.872163395565259, iteration: 126931
loss: 1.0068806409835815,grad_norm: 0.9999996376020546, iteration: 126932
loss: 1.0085148811340332,grad_norm: 0.9999990322028454, iteration: 126933
loss: 1.0090619325637817,grad_norm: 0.9999996180578204, iteration: 126934
loss: 0.9927050471305847,grad_norm: 0.9999997307872771, iteration: 126935
loss: 1.2696175575256348,grad_norm: 0.9999995710561067, iteration: 126936
loss: 1.116481900215149,grad_norm: 0.9999995703279848, iteration: 126937
loss: 1.0999499559402466,grad_norm: 0.999999444786404, iteration: 126938
loss: 1.0324957370758057,grad_norm: 0.9999997173087178, iteration: 126939
loss: 1.294386625289917,grad_norm: 0.999999954207919, iteration: 126940
loss: 1.0446624755859375,grad_norm: 0.8409931112833844, iteration: 126941
loss: 1.0701106786727905,grad_norm: 0.9999991889083857, iteration: 126942
loss: 0.9652938842773438,grad_norm: 0.9999996451490523, iteration: 126943
loss: 1.284449577331543,grad_norm: 0.9999996975518917, iteration: 126944
loss: 1.0482025146484375,grad_norm: 0.999999335783142, iteration: 126945
loss: 1.0509049892425537,grad_norm: 0.9999998558482055, iteration: 126946
loss: 1.0303360223770142,grad_norm: 0.9637838381705366, iteration: 126947
loss: 1.1380525827407837,grad_norm: 0.9999996957743478, iteration: 126948
loss: 1.0082358121871948,grad_norm: 0.9999992493157263, iteration: 126949
loss: 1.1130048036575317,grad_norm: 0.9999996193303096, iteration: 126950
loss: 0.9985538721084595,grad_norm: 0.99999916752729, iteration: 126951
loss: 1.0356593132019043,grad_norm: 0.9999997130678815, iteration: 126952
loss: 1.052654504776001,grad_norm: 0.882560122056389, iteration: 126953
loss: 0.9942795038223267,grad_norm: 0.9134729073058248, iteration: 126954
loss: 0.991959810256958,grad_norm: 0.9999999025595233, iteration: 126955
loss: 1.0264583826065063,grad_norm: 0.8090729285972836, iteration: 126956
loss: 1.1012271642684937,grad_norm: 0.9999991235866734, iteration: 126957
loss: 1.0171564817428589,grad_norm: 0.9217773933210015, iteration: 126958
loss: 1.0382918119430542,grad_norm: 0.9999990448799786, iteration: 126959
loss: 1.0430175065994263,grad_norm: 0.9999998016943749, iteration: 126960
loss: 0.9914096593856812,grad_norm: 0.8497361239514702, iteration: 126961
loss: 1.300532341003418,grad_norm: 0.9999992465860318, iteration: 126962
loss: 1.0153560638427734,grad_norm: 0.9999989791940934, iteration: 126963
loss: 1.0201724767684937,grad_norm: 0.9999996282715949, iteration: 126964
loss: 1.2749626636505127,grad_norm: 0.9999999728726375, iteration: 126965
loss: 0.9893656373023987,grad_norm: 0.9097706697861382, iteration: 126966
loss: 1.0267670154571533,grad_norm: 0.9007585419696431, iteration: 126967
loss: 1.022678017616272,grad_norm: 0.9999996503263368, iteration: 126968
loss: 1.0680601596832275,grad_norm: 0.9999994493629566, iteration: 126969
loss: 1.0807290077209473,grad_norm: 0.9999992970141026, iteration: 126970
loss: 1.0880968570709229,grad_norm: 0.999999522673872, iteration: 126971
loss: 1.0569751262664795,grad_norm: 0.9999991288259573, iteration: 126972
loss: 1.0153510570526123,grad_norm: 0.8958979836931782, iteration: 126973
loss: 1.0350208282470703,grad_norm: 0.9867723242394987, iteration: 126974
loss: 1.0291707515716553,grad_norm: 0.9147947792856452, iteration: 126975
loss: 1.249123454093933,grad_norm: 0.9999993349259088, iteration: 126976
loss: 1.0509933233261108,grad_norm: 0.9999991201625945, iteration: 126977
loss: 1.0038807392120361,grad_norm: 0.999999115232772, iteration: 126978
loss: 1.032580018043518,grad_norm: 0.9999993030088411, iteration: 126979
loss: 0.9824643731117249,grad_norm: 0.9787111198955374, iteration: 126980
loss: 1.117473840713501,grad_norm: 0.9999998855353327, iteration: 126981
loss: 1.1229033470153809,grad_norm: 0.999999406281636, iteration: 126982
loss: 1.1327377557754517,grad_norm: 0.9999999664334875, iteration: 126983
loss: 1.288553237915039,grad_norm: 0.9999995673452463, iteration: 126984
loss: 0.9827110767364502,grad_norm: 0.95669884478159, iteration: 126985
loss: 1.2010726928710938,grad_norm: 0.9999996882442632, iteration: 126986
loss: 1.224158525466919,grad_norm: 0.9999995954338512, iteration: 126987
loss: 1.0476925373077393,grad_norm: 0.9999997462400236, iteration: 126988
loss: 0.9993481040000916,grad_norm: 0.803911291862809, iteration: 126989
loss: 1.0699734687805176,grad_norm: 0.9999992525545015, iteration: 126990
loss: 0.9917303919792175,grad_norm: 0.8644640805633391, iteration: 126991
loss: 0.9833005666732788,grad_norm: 0.9999991558689291, iteration: 126992
loss: 0.9859718084335327,grad_norm: 0.8481766694211399, iteration: 126993
loss: 1.0842201709747314,grad_norm: 0.9999991345822267, iteration: 126994
loss: 1.2583249807357788,grad_norm: 0.9999997442450523, iteration: 126995
loss: 1.1652649641036987,grad_norm: 0.9999990766087652, iteration: 126996
loss: 1.0072940587997437,grad_norm: 0.9565829750630106, iteration: 126997
loss: 0.9972716569900513,grad_norm: 0.7452518108901381, iteration: 126998
loss: 1.1839841604232788,grad_norm: 0.9999992410507736, iteration: 126999
loss: 1.03715181350708,grad_norm: 0.9999991424028828, iteration: 127000
loss: 1.2079542875289917,grad_norm: 0.9999994915879309, iteration: 127001
loss: 1.0395923852920532,grad_norm: 0.9999992800449935, iteration: 127002
loss: 1.0168343782424927,grad_norm: 0.9755110682433016, iteration: 127003
loss: 1.0058531761169434,grad_norm: 0.9999990539260504, iteration: 127004
loss: 1.0202516317367554,grad_norm: 0.9217540843189477, iteration: 127005
loss: 1.5249732732772827,grad_norm: 0.9999997831241833, iteration: 127006
loss: 1.0155067443847656,grad_norm: 0.9999991860550662, iteration: 127007
loss: 1.0291591882705688,grad_norm: 0.9999990980218205, iteration: 127008
loss: 1.027639627456665,grad_norm: 0.8643998878093073, iteration: 127009
loss: 0.9962001442909241,grad_norm: 0.8364959682064498, iteration: 127010
loss: 1.0001325607299805,grad_norm: 0.924678437348433, iteration: 127011
loss: 1.263553261756897,grad_norm: 0.999999276532291, iteration: 127012
loss: 1.3730740547180176,grad_norm: 0.9999994807865377, iteration: 127013
loss: 1.0549691915512085,grad_norm: 0.9999997678827738, iteration: 127014
loss: 1.355536937713623,grad_norm: 0.9999997456172027, iteration: 127015
loss: 1.0156527757644653,grad_norm: 0.934158445012945, iteration: 127016
loss: 1.0552828311920166,grad_norm: 0.9999991232244211, iteration: 127017
loss: 1.2008637189865112,grad_norm: 0.9999991135024214, iteration: 127018
loss: 0.9916155934333801,grad_norm: 0.9999992395842675, iteration: 127019
loss: 1.3228470087051392,grad_norm: 0.9999999096987714, iteration: 127020
loss: 1.0867650508880615,grad_norm: 0.9999994022985481, iteration: 127021
loss: 1.0204005241394043,grad_norm: 0.8600113169902631, iteration: 127022
loss: 1.009412407875061,grad_norm: 0.9350891905192389, iteration: 127023
loss: 1.0160356760025024,grad_norm: 0.9999993523575026, iteration: 127024
loss: 1.0042898654937744,grad_norm: 0.9999993990942206, iteration: 127025
loss: 1.0053616762161255,grad_norm: 0.9999998958233302, iteration: 127026
loss: 1.408645510673523,grad_norm: 0.999999726058496, iteration: 127027
loss: 1.1198810338974,grad_norm: 1.000000005474474, iteration: 127028
loss: 1.4148648977279663,grad_norm: 0.999999575796177, iteration: 127029
loss: 1.0457082986831665,grad_norm: 0.9999998426470796, iteration: 127030
loss: 1.0594500303268433,grad_norm: 0.9999996001557497, iteration: 127031
loss: 1.1039713621139526,grad_norm: 0.9999995540031725, iteration: 127032
loss: 1.200772762298584,grad_norm: 0.9999995072657952, iteration: 127033
loss: 1.134213924407959,grad_norm: 0.9999995879600233, iteration: 127034
loss: 1.2185578346252441,grad_norm: 0.9999998746629012, iteration: 127035
loss: 1.0256247520446777,grad_norm: 0.999999244148934, iteration: 127036
loss: 1.0232402086257935,grad_norm: 0.9999992895172253, iteration: 127037
loss: 1.0584737062454224,grad_norm: 0.9999992612043281, iteration: 127038
loss: 1.1678178310394287,grad_norm: 0.9999995703095199, iteration: 127039
loss: 1.2666715383529663,grad_norm: 0.9999995270535437, iteration: 127040
loss: 0.9647689461708069,grad_norm: 0.9138134088358959, iteration: 127041
loss: 1.0701696872711182,grad_norm: 0.9999991067994414, iteration: 127042
loss: 1.2989634275436401,grad_norm: 0.999999867278701, iteration: 127043
loss: 1.1303703784942627,grad_norm: 0.9999999934776502, iteration: 127044
loss: 1.0119775533676147,grad_norm: 0.9999993396497807, iteration: 127045
loss: 1.0099458694458008,grad_norm: 0.8276614728898257, iteration: 127046
loss: 1.1517536640167236,grad_norm: 0.910654430885909, iteration: 127047
loss: 1.1364625692367554,grad_norm: 0.9999998498566672, iteration: 127048
loss: 0.9810525178909302,grad_norm: 0.8455832409239524, iteration: 127049
loss: 1.3209367990493774,grad_norm: 0.9999997700255806, iteration: 127050
loss: 1.0010696649551392,grad_norm: 0.9999991077197039, iteration: 127051
loss: 1.2386565208435059,grad_norm: 0.9999998472629633, iteration: 127052
loss: 1.0216871500015259,grad_norm: 0.92729619001931, iteration: 127053
loss: 1.1497654914855957,grad_norm: 0.9999999513430802, iteration: 127054
loss: 1.2480688095092773,grad_norm: 0.9999992987286084, iteration: 127055
loss: 1.1293121576309204,grad_norm: 0.9999998896681793, iteration: 127056
loss: 1.1477231979370117,grad_norm: 0.9999992502361704, iteration: 127057
loss: 1.101196050643921,grad_norm: 0.9999995061659904, iteration: 127058
loss: 1.0028220415115356,grad_norm: 0.9552829215683815, iteration: 127059
loss: 1.1567713022232056,grad_norm: 0.999999347057591, iteration: 127060
loss: 1.0031455755233765,grad_norm: 0.9999996858572666, iteration: 127061
loss: 1.189959168434143,grad_norm: 0.9999994731731937, iteration: 127062
loss: 1.0907537937164307,grad_norm: 0.8483993864391233, iteration: 127063
loss: 1.0739060640335083,grad_norm: 0.9999999082006035, iteration: 127064
loss: 1.0736602544784546,grad_norm: 0.99999919379837, iteration: 127065
loss: 1.0813053846359253,grad_norm: 0.9999992801794072, iteration: 127066
loss: 1.199079155921936,grad_norm: 0.9999996070330786, iteration: 127067
loss: 1.1441584825515747,grad_norm: 0.999999765572698, iteration: 127068
loss: 1.1164721250534058,grad_norm: 0.9999994555035435, iteration: 127069
loss: 1.110257625579834,grad_norm: 0.9999997450276162, iteration: 127070
loss: 1.320696234703064,grad_norm: 0.9999998690607708, iteration: 127071
loss: 1.0378211736679077,grad_norm: 0.9999995665070814, iteration: 127072
loss: 1.0928117036819458,grad_norm: 0.9999996201480275, iteration: 127073
loss: 1.0499300956726074,grad_norm: 0.999999126814211, iteration: 127074
loss: 1.2364223003387451,grad_norm: 0.999999951505817, iteration: 127075
loss: 1.2823066711425781,grad_norm: 0.9999997688567263, iteration: 127076
loss: 1.07174813747406,grad_norm: 0.9999993807046152, iteration: 127077
loss: 1.167646884918213,grad_norm: 0.9999997662744768, iteration: 127078
loss: 1.116804838180542,grad_norm: 0.9999996473404039, iteration: 127079
loss: 1.0305390357971191,grad_norm: 0.9999990883609409, iteration: 127080
loss: 0.9950125813484192,grad_norm: 0.9999997501378243, iteration: 127081
loss: 1.102157711982727,grad_norm: 0.999999212833202, iteration: 127082
loss: 1.0995815992355347,grad_norm: 0.9999997762880126, iteration: 127083
loss: 1.0763543844223022,grad_norm: 0.9999991577520608, iteration: 127084
loss: 1.0879720449447632,grad_norm: 0.9999999282270274, iteration: 127085
loss: 1.0408532619476318,grad_norm: 0.9999997290223307, iteration: 127086
loss: 1.3236675262451172,grad_norm: 0.9999998737164417, iteration: 127087
loss: 1.1351728439331055,grad_norm: 0.9999999277338516, iteration: 127088
loss: 1.1352440118789673,grad_norm: 0.9999997873391633, iteration: 127089
loss: 1.118086814880371,grad_norm: 0.9999998832379176, iteration: 127090
loss: 1.0417283773422241,grad_norm: 0.9999998801674898, iteration: 127091
loss: 1.051186203956604,grad_norm: 0.9999991928070534, iteration: 127092
loss: 1.0272231101989746,grad_norm: 0.9999990516153091, iteration: 127093
loss: 1.0660433769226074,grad_norm: 0.9999994672641194, iteration: 127094
loss: 1.0723543167114258,grad_norm: 0.9999997353828345, iteration: 127095
loss: 1.1956820487976074,grad_norm: 0.9999999937322576, iteration: 127096
loss: 1.3140521049499512,grad_norm: 0.9999997785604557, iteration: 127097
loss: 1.0027828216552734,grad_norm: 0.9999998424965841, iteration: 127098
loss: 1.034822940826416,grad_norm: 0.9999997930795366, iteration: 127099
loss: 1.105239987373352,grad_norm: 0.9999991816597453, iteration: 127100
loss: 1.0561938285827637,grad_norm: 0.9999991761744594, iteration: 127101
loss: 1.152525544166565,grad_norm: 0.9999995500055272, iteration: 127102
loss: 1.087371587753296,grad_norm: 0.9999994765174345, iteration: 127103
loss: 1.0475225448608398,grad_norm: 0.9999993388755214, iteration: 127104
loss: 0.9847278594970703,grad_norm: 0.9999992760277469, iteration: 127105
loss: 1.0831727981567383,grad_norm: 0.9999990411017943, iteration: 127106
loss: 1.1277458667755127,grad_norm: 0.9999999448898912, iteration: 127107
loss: 1.110529899597168,grad_norm: 0.9999998461805196, iteration: 127108
loss: 1.1911207437515259,grad_norm: 0.999999946954868, iteration: 127109
loss: 1.0805667638778687,grad_norm: 0.911083999071116, iteration: 127110
loss: 1.0559666156768799,grad_norm: 0.9999998403199983, iteration: 127111
loss: 1.1010658740997314,grad_norm: 0.9999996942010063, iteration: 127112
loss: 1.2719013690948486,grad_norm: 0.9999999032682928, iteration: 127113
loss: 1.3882932662963867,grad_norm: 0.999999880169938, iteration: 127114
loss: 1.1307469606399536,grad_norm: 0.9999998544730558, iteration: 127115
loss: 1.192795753479004,grad_norm: 0.9999993644705731, iteration: 127116
loss: 1.3369251489639282,grad_norm: 0.9999998361390753, iteration: 127117
loss: 1.0531083345413208,grad_norm: 0.9999990538405665, iteration: 127118
loss: 1.1884764432907104,grad_norm: 0.999999929999407, iteration: 127119
loss: 1.2162530422210693,grad_norm: 0.9999995507179522, iteration: 127120
loss: 1.2015187740325928,grad_norm: 0.9999993613619204, iteration: 127121
loss: 1.634630560874939,grad_norm: 0.9999999571420828, iteration: 127122
loss: 1.2115564346313477,grad_norm: 0.9999998206446942, iteration: 127123
loss: 1.4620692729949951,grad_norm: 0.9999999003855182, iteration: 127124
loss: 1.4317857027053833,grad_norm: 0.9999999652762401, iteration: 127125
loss: 1.260551929473877,grad_norm: 0.9999999126527411, iteration: 127126
loss: 1.359057068824768,grad_norm: 0.9999998870354979, iteration: 127127
loss: 1.1864852905273438,grad_norm: 0.9999998302549998, iteration: 127128
loss: 1.6440849304199219,grad_norm: 0.9999998616692573, iteration: 127129
loss: 1.5858912467956543,grad_norm: 0.9999998116096783, iteration: 127130
loss: 1.472882628440857,grad_norm: 0.999999749391856, iteration: 127131
loss: 1.4900456666946411,grad_norm: 0.9999999731184245, iteration: 127132
loss: 1.351731777191162,grad_norm: 0.9999999687986207, iteration: 127133
loss: 1.4315794706344604,grad_norm: 0.9999998986575397, iteration: 127134
loss: 1.5092765092849731,grad_norm: 0.9999998928245186, iteration: 127135
loss: 1.4369372129440308,grad_norm: 0.9999999612785766, iteration: 127136
loss: 1.5134297609329224,grad_norm: 0.9999999065256042, iteration: 127137
loss: 1.4188799858093262,grad_norm: 0.9999999248116002, iteration: 127138
loss: 1.4751580953598022,grad_norm: 0.9999999439976164, iteration: 127139
loss: 1.9742995500564575,grad_norm: 0.9999998194615087, iteration: 127140
loss: 1.2391003370285034,grad_norm: 0.999999891189085, iteration: 127141
loss: 1.3537946939468384,grad_norm: 0.9999999357198018, iteration: 127142
loss: 2.2961785793304443,grad_norm: 0.9999999626509752, iteration: 127143
loss: 1.3422197103500366,grad_norm: 0.9999996778488396, iteration: 127144
loss: 1.4817348718643188,grad_norm: 0.9999999303828057, iteration: 127145
loss: 1.6651835441589355,grad_norm: 0.9999998964360499, iteration: 127146
loss: 1.731406331062317,grad_norm: 1.0000000010046874, iteration: 127147
loss: 1.8539576530456543,grad_norm: 0.9999998648922241, iteration: 127148
loss: 1.7838670015335083,grad_norm: 0.9999999920622474, iteration: 127149
loss: 1.9192092418670654,grad_norm: 0.9999998974286758, iteration: 127150
loss: 1.6333309412002563,grad_norm: 0.9999999765990587, iteration: 127151
loss: 2.0548038482666016,grad_norm: 0.9999998856098894, iteration: 127152
loss: 2.9215407371520996,grad_norm: 0.9999999343859295, iteration: 127153
loss: 2.0946574211120605,grad_norm: 1.000000055783018, iteration: 127154
loss: 1.968645453453064,grad_norm: 0.9999999777308862, iteration: 127155
loss: 2.2080442905426025,grad_norm: 0.9999999603724399, iteration: 127156
loss: 2.3498520851135254,grad_norm: 0.9999998881819424, iteration: 127157
loss: 2.30305814743042,grad_norm: 0.9999998851081113, iteration: 127158
loss: 1.4693962335586548,grad_norm: 0.9999996967320948, iteration: 127159
loss: 1.5643208026885986,grad_norm: 0.9999998528980876, iteration: 127160
loss: 2.0564796924591064,grad_norm: 0.9999999384300862, iteration: 127161
loss: 1.8823540210723877,grad_norm: 0.9999998165885949, iteration: 127162
loss: 1.9644147157669067,grad_norm: 0.9999999411602992, iteration: 127163
loss: 2.1675994396209717,grad_norm: 0.999999960594115, iteration: 127164
loss: 1.8131035566329956,grad_norm: 1.0000000195902718, iteration: 127165
loss: 2.1297059059143066,grad_norm: 0.9999999190788017, iteration: 127166
loss: 1.6633416414260864,grad_norm: 0.9999999202229025, iteration: 127167
loss: 1.919053077697754,grad_norm: 0.9999999137958646, iteration: 127168
loss: 1.397707223892212,grad_norm: 1.0000000124269945, iteration: 127169
loss: 2.030360460281372,grad_norm: 0.9999999362774127, iteration: 127170
loss: 1.7069590091705322,grad_norm: 1.0000000231539414, iteration: 127171
loss: 1.504863977432251,grad_norm: 0.999999931442179, iteration: 127172
loss: 1.9788446426391602,grad_norm: 0.9999998580063165, iteration: 127173
loss: 1.5727020502090454,grad_norm: 1.0000000462900271, iteration: 127174
loss: 1.7593426704406738,grad_norm: 1.000000071414435, iteration: 127175
loss: 1.6782640218734741,grad_norm: 1.0000000337238106, iteration: 127176
loss: 1.8608640432357788,grad_norm: 0.9999998954337791, iteration: 127177
loss: 1.5757704973220825,grad_norm: 0.9999998400749116, iteration: 127178
loss: 1.89708411693573,grad_norm: 0.9999998670470848, iteration: 127179
loss: 1.584685206413269,grad_norm: 0.9999999162558723, iteration: 127180
loss: 1.7356127500534058,grad_norm: 0.9999999879682482, iteration: 127181
loss: 1.6175817251205444,grad_norm: 0.9999998222991956, iteration: 127182
loss: 1.6554831266403198,grad_norm: 0.9999999526025649, iteration: 127183
loss: 1.4315396547317505,grad_norm: 0.9999999629732187, iteration: 127184
loss: 1.711362600326538,grad_norm: 0.9999997449023673, iteration: 127185
loss: 1.7898292541503906,grad_norm: 0.9999997692114505, iteration: 127186
loss: 1.8169264793395996,grad_norm: 0.999999984659779, iteration: 127187
loss: 1.6508255004882812,grad_norm: 0.9999999661940051, iteration: 127188
loss: 1.858830451965332,grad_norm: 0.9999999551006623, iteration: 127189
loss: 1.6453279256820679,grad_norm: 0.9999999244613059, iteration: 127190
loss: 1.950445532798767,grad_norm: 0.9999999327740263, iteration: 127191
loss: 1.4860831499099731,grad_norm: 0.9999999370550181, iteration: 127192
loss: 1.7692351341247559,grad_norm: 0.9999999072894467, iteration: 127193
loss: 2.1437275409698486,grad_norm: 1.000000078076724, iteration: 127194
loss: 1.6904019117355347,grad_norm: 0.9999998297554146, iteration: 127195
loss: 2.3886666297912598,grad_norm: 0.999999964431728, iteration: 127196
loss: 1.7683569192886353,grad_norm: 0.9999998825909221, iteration: 127197
loss: 1.6970746517181396,grad_norm: 0.9999999341851207, iteration: 127198
loss: 1.6168862581253052,grad_norm: 0.9999999064546816, iteration: 127199
loss: 1.8352465629577637,grad_norm: 0.9999999145941699, iteration: 127200
loss: 1.6430846452713013,grad_norm: 0.9999999278238458, iteration: 127201
loss: 1.9936937093734741,grad_norm: 0.9999999413734226, iteration: 127202
loss: 1.7815417051315308,grad_norm: 0.9999999654213843, iteration: 127203
loss: 1.5553392171859741,grad_norm: 0.99999980012506, iteration: 127204
loss: 1.6313575506210327,grad_norm: 0.9999998670436321, iteration: 127205
loss: 1.7789958715438843,grad_norm: 0.9999999809137624, iteration: 127206
loss: 1.6828755140304565,grad_norm: 1.000000064679932, iteration: 127207
loss: 1.5047194957733154,grad_norm: 0.9999999354064142, iteration: 127208
loss: 1.7521835565567017,grad_norm: 0.9999999454728169, iteration: 127209
loss: 1.9624816179275513,grad_norm: 0.9999999031802002, iteration: 127210
loss: 1.8237519264221191,grad_norm: 1.0000000662132078, iteration: 127211
loss: 1.92416512966156,grad_norm: 0.9999998023282275, iteration: 127212
loss: 1.8476303815841675,grad_norm: 0.9999999016179433, iteration: 127213
loss: 1.7205512523651123,grad_norm: 1.0000000839996137, iteration: 127214
loss: 1.8803330659866333,grad_norm: 0.9999999593545069, iteration: 127215
loss: 2.132277011871338,grad_norm: 0.9999999787984687, iteration: 127216
loss: 2.1423985958099365,grad_norm: 0.9999999435687899, iteration: 127217
loss: 1.5984597206115723,grad_norm: 0.999999878534636, iteration: 127218
loss: 1.7174149751663208,grad_norm: 1.0000000142987318, iteration: 127219
loss: 1.8686668872833252,grad_norm: 1.0000000134900582, iteration: 127220
loss: 1.8876359462738037,grad_norm: 0.999999892308409, iteration: 127221
loss: 1.8657845258712769,grad_norm: 0.9999996801352627, iteration: 127222
loss: 1.6923497915267944,grad_norm: 0.999999867494066, iteration: 127223
loss: 1.5529580116271973,grad_norm: 1.0000000189814424, iteration: 127224
loss: 1.718738317489624,grad_norm: 0.9999999143137286, iteration: 127225
loss: 1.7087745666503906,grad_norm: 0.9999998472182531, iteration: 127226
loss: 1.710447907447815,grad_norm: 0.9999998558929959, iteration: 127227
loss: 1.8045129776000977,grad_norm: 0.9999999299318008, iteration: 127228
loss: 1.6081035137176514,grad_norm: 0.999999875694938, iteration: 127229
loss: 1.7776553630828857,grad_norm: 0.9999998531177161, iteration: 127230
loss: 2.0888078212738037,grad_norm: 0.999999849945195, iteration: 127231
loss: 1.4122893810272217,grad_norm: 0.9999997731051448, iteration: 127232
loss: 1.7938159704208374,grad_norm: 0.9999998844451492, iteration: 127233
loss: 1.5817558765411377,grad_norm: 0.9999997904942374, iteration: 127234
loss: 1.7905467748641968,grad_norm: 1.0000000832841727, iteration: 127235
loss: 1.8434613943099976,grad_norm: 0.9999999139410385, iteration: 127236
loss: 1.6900230646133423,grad_norm: 1.0000001110460344, iteration: 127237
loss: 1.7632677555084229,grad_norm: 0.9999999602901586, iteration: 127238
loss: 1.6536540985107422,grad_norm: 0.999999939041364, iteration: 127239
loss: 2.0086281299591064,grad_norm: 0.9999999946554579, iteration: 127240
loss: 1.8482720851898193,grad_norm: 0.9999998501610315, iteration: 127241
loss: 1.6935480833053589,grad_norm: 0.9999999269421306, iteration: 127242
loss: 2.0097060203552246,grad_norm: 1.0000000240935125, iteration: 127243
loss: 1.899287223815918,grad_norm: 0.9999998908594157, iteration: 127244
loss: 1.6648328304290771,grad_norm: 0.9999998931474262, iteration: 127245
loss: 2.1290535926818848,grad_norm: 1.000000032052759, iteration: 127246
loss: 1.9332733154296875,grad_norm: 0.9999998788210797, iteration: 127247
loss: 1.9379428625106812,grad_norm: 0.999999954646743, iteration: 127248
loss: 1.5544795989990234,grad_norm: 0.9999999607516572, iteration: 127249
loss: 1.8385056257247925,grad_norm: 0.9999999466472143, iteration: 127250
loss: 1.6325608491897583,grad_norm: 0.9999999416007161, iteration: 127251
loss: 1.7689322233200073,grad_norm: 1.0000000265973719, iteration: 127252
loss: 1.5137920379638672,grad_norm: 0.9999998377631599, iteration: 127253
loss: 1.98686683177948,grad_norm: 0.9999999604949502, iteration: 127254
loss: 1.7231918573379517,grad_norm: 0.9999999001811454, iteration: 127255
loss: 2.0453035831451416,grad_norm: 0.9999998946826223, iteration: 127256
loss: 1.8594622611999512,grad_norm: 0.9999999259519784, iteration: 127257
loss: 1.696427822113037,grad_norm: 0.9999999091333285, iteration: 127258
loss: 1.937307596206665,grad_norm: 0.9999998490096496, iteration: 127259
loss: 1.963816523551941,grad_norm: 0.9999998737782053, iteration: 127260
loss: 1.960891604423523,grad_norm: 0.9999998800501362, iteration: 127261
loss: 1.8864954710006714,grad_norm: 1.0000000176467947, iteration: 127262
loss: 1.8001216650009155,grad_norm: 0.9999998435907486, iteration: 127263
loss: 1.6007623672485352,grad_norm: 0.999999935897231, iteration: 127264
loss: 2.1325018405914307,grad_norm: 0.9999999527919768, iteration: 127265
loss: 1.7056037187576294,grad_norm: 0.999999911834815, iteration: 127266
loss: 1.6130092144012451,grad_norm: 1.0000000526281165, iteration: 127267
loss: 1.814866304397583,grad_norm: 0.999999974027041, iteration: 127268
loss: 2.113539457321167,grad_norm: 0.9999999861884732, iteration: 127269
loss: 1.7761539220809937,grad_norm: 0.9999999234685694, iteration: 127270
loss: 1.8751784563064575,grad_norm: 0.9999998872527094, iteration: 127271
loss: 1.923752784729004,grad_norm: 0.9999999605402443, iteration: 127272
loss: 2.007578134536743,grad_norm: 0.9999999287230982, iteration: 127273
loss: 1.922573208808899,grad_norm: 0.9999999595964296, iteration: 127274
loss: 1.9437768459320068,grad_norm: 0.9999999486602088, iteration: 127275
loss: 1.6334033012390137,grad_norm: 0.9999999965854258, iteration: 127276
loss: 2.0805981159210205,grad_norm: 0.9999998692019131, iteration: 127277
loss: 1.7909877300262451,grad_norm: 0.9999999157604059, iteration: 127278
loss: 1.861109972000122,grad_norm: 0.9999999091181069, iteration: 127279
loss: 1.910445213317871,grad_norm: 1.0000000354183038, iteration: 127280
loss: 1.7126439809799194,grad_norm: 0.9999999577944346, iteration: 127281
loss: 2.142949104309082,grad_norm: 0.9999999380764103, iteration: 127282
loss: 1.521622657775879,grad_norm: 0.9999998996239292, iteration: 127283
loss: 1.8215218782424927,grad_norm: 0.9999999452505791, iteration: 127284
loss: 1.4892386198043823,grad_norm: 1.0000000105434141, iteration: 127285
loss: 1.7402478456497192,grad_norm: 0.9999998719548944, iteration: 127286
loss: 1.7337126731872559,grad_norm: 0.9999999390224251, iteration: 127287
loss: 1.8353968858718872,grad_norm: 0.9999998922098372, iteration: 127288
loss: 1.6837252378463745,grad_norm: 0.9999999125015222, iteration: 127289
loss: 1.712583303451538,grad_norm: 0.9999999191817827, iteration: 127290
loss: 1.7055156230926514,grad_norm: 0.9999998672140189, iteration: 127291
loss: 1.5189857482910156,grad_norm: 0.9999997864530228, iteration: 127292
loss: 1.9793634414672852,grad_norm: 0.9999999860164716, iteration: 127293
loss: 1.3623301982879639,grad_norm: 0.9999996820809465, iteration: 127294
loss: 1.8097585439682007,grad_norm: 0.9999999855522085, iteration: 127295
loss: 1.6089880466461182,grad_norm: 0.9999999880702327, iteration: 127296
loss: 1.3509784936904907,grad_norm: 0.999999944844468, iteration: 127297
loss: 1.33296537399292,grad_norm: 0.9999998998993609, iteration: 127298
loss: 1.7064741849899292,grad_norm: 0.9999997838138746, iteration: 127299
loss: 1.9654484987258911,grad_norm: 0.9999999953555477, iteration: 127300
loss: 1.484427809715271,grad_norm: 1.0000000186369122, iteration: 127301
loss: 1.5461503267288208,grad_norm: 1.0000000186012226, iteration: 127302
loss: 1.894700527191162,grad_norm: 1.0000000114874494, iteration: 127303
loss: 1.6765714883804321,grad_norm: 1.0000000056511278, iteration: 127304
loss: 1.2715175151824951,grad_norm: 0.9999997257931081, iteration: 127305
loss: 1.7539081573486328,grad_norm: 0.9999999351854132, iteration: 127306
loss: 1.8576103448867798,grad_norm: 0.9999999694604567, iteration: 127307
loss: 1.4914072751998901,grad_norm: 0.9999998171488687, iteration: 127308
loss: 1.5890458822250366,grad_norm: 0.9999999196153101, iteration: 127309
loss: 1.5157643556594849,grad_norm: 1.0000000098643427, iteration: 127310
loss: 1.449345350265503,grad_norm: 0.9999998286048567, iteration: 127311
loss: 1.8036013841629028,grad_norm: 0.9999998837786067, iteration: 127312
loss: 1.43711519241333,grad_norm: 0.999999949516372, iteration: 127313
loss: 1.5466814041137695,grad_norm: 0.9999998829025181, iteration: 127314
loss: 1.5467756986618042,grad_norm: 0.9999997089649085, iteration: 127315
loss: 1.6491197347640991,grad_norm: 1.0000000327280496, iteration: 127316
loss: 1.7333225011825562,grad_norm: 0.9999998801736999, iteration: 127317
loss: 1.4148515462875366,grad_norm: 0.9999999732507588, iteration: 127318
loss: 1.542353868484497,grad_norm: 0.9999998935353095, iteration: 127319
loss: 1.614145278930664,grad_norm: 0.9999999218849079, iteration: 127320
loss: 1.495597004890442,grad_norm: 0.9999999656446934, iteration: 127321
loss: 1.3884665966033936,grad_norm: 0.9999996999179626, iteration: 127322
loss: 1.6239829063415527,grad_norm: 0.9999999510178935, iteration: 127323
loss: 1.4991047382354736,grad_norm: 0.9999999994816536, iteration: 127324
loss: 1.6768989562988281,grad_norm: 0.9999999493153207, iteration: 127325
loss: 1.4793224334716797,grad_norm: 0.9999998465078913, iteration: 127326
loss: 1.6881823539733887,grad_norm: 0.999999994083295, iteration: 127327
loss: 1.163131594657898,grad_norm: 0.9999998288302603, iteration: 127328
loss: 1.3813462257385254,grad_norm: 0.9999998693666491, iteration: 127329
loss: 1.4221323728561401,grad_norm: 0.9999996967881698, iteration: 127330
loss: 1.6458306312561035,grad_norm: 0.9999999335473746, iteration: 127331
loss: 1.5795800685882568,grad_norm: 0.9999999745592251, iteration: 127332
loss: 1.3575423955917358,grad_norm: 0.9999999905388047, iteration: 127333
loss: 1.4858715534210205,grad_norm: 0.9999996881752192, iteration: 127334
loss: 1.4044578075408936,grad_norm: 0.9999997736053284, iteration: 127335
loss: 1.3884460926055908,grad_norm: 0.9999998019527692, iteration: 127336
loss: 1.528584599494934,grad_norm: 1.000000092530778, iteration: 127337
loss: 1.8508057594299316,grad_norm: 0.9999999049764198, iteration: 127338
loss: 1.612038493156433,grad_norm: 0.999999964932431, iteration: 127339
loss: 1.4528427124023438,grad_norm: 0.9999998587813275, iteration: 127340
loss: 1.732924461364746,grad_norm: 0.9999999394246463, iteration: 127341
loss: 1.621424913406372,grad_norm: 0.9999996729306095, iteration: 127342
loss: 1.4951236248016357,grad_norm: 0.9999998288841542, iteration: 127343
loss: 1.4279307126998901,grad_norm: 1.0000000114777685, iteration: 127344
loss: 1.385812520980835,grad_norm: 0.999999939884986, iteration: 127345
loss: 1.1867879629135132,grad_norm: 0.9999998627901187, iteration: 127346
loss: 1.5810338258743286,grad_norm: 0.9999999228124604, iteration: 127347
loss: 1.5123645067214966,grad_norm: 0.9999999578107853, iteration: 127348
loss: 1.5029929876327515,grad_norm: 1.0000000131145346, iteration: 127349
loss: 1.5950908660888672,grad_norm: 0.9999999394305187, iteration: 127350
loss: 1.5826784372329712,grad_norm: 0.9999998776058905, iteration: 127351
loss: 1.4990744590759277,grad_norm: 0.9999999332260147, iteration: 127352
loss: 1.6379235982894897,grad_norm: 0.9999998851041458, iteration: 127353
loss: 1.293576955795288,grad_norm: 0.9999998230333195, iteration: 127354
loss: 1.4642738103866577,grad_norm: 0.999999973350705, iteration: 127355
loss: 1.3187311887741089,grad_norm: 0.9999999489615322, iteration: 127356
loss: 1.336024284362793,grad_norm: 0.999999769011579, iteration: 127357
loss: 1.2877658605575562,grad_norm: 0.999999705414263, iteration: 127358
loss: 1.5277540683746338,grad_norm: 0.9999996789963964, iteration: 127359
loss: 1.368552327156067,grad_norm: 0.9999999317264625, iteration: 127360
loss: 1.378709316253662,grad_norm: 0.9999999644463639, iteration: 127361
loss: 1.4099204540252686,grad_norm: 0.9999998202665525, iteration: 127362
loss: 1.0763612985610962,grad_norm: 0.9999994790278254, iteration: 127363
loss: 1.2734909057617188,grad_norm: 0.9999999761080435, iteration: 127364
loss: 1.4610058069229126,grad_norm: 1.0000001096625872, iteration: 127365
loss: 1.297105073928833,grad_norm: 0.9999997805624077, iteration: 127366
loss: 1.2605230808258057,grad_norm: 1.000000021669351, iteration: 127367
loss: 1.253833293914795,grad_norm: 0.9999999452808778, iteration: 127368
loss: 1.2320722341537476,grad_norm: 0.9999993480431344, iteration: 127369
loss: 1.139585018157959,grad_norm: 0.999999504025206, iteration: 127370
loss: 1.1916744709014893,grad_norm: 0.9999994192110745, iteration: 127371
loss: 1.2327221632003784,grad_norm: 0.9999998649067632, iteration: 127372
loss: 1.2656053304672241,grad_norm: 0.9999997785812441, iteration: 127373
loss: 1.4526114463806152,grad_norm: 1.0000000409408016, iteration: 127374
loss: 1.2659821510314941,grad_norm: 0.9999993476991836, iteration: 127375
loss: 1.223939061164856,grad_norm: 0.9999995724512842, iteration: 127376
loss: 1.268314242362976,grad_norm: 0.9999998432340559, iteration: 127377
loss: 1.5962095260620117,grad_norm: 0.999999842579476, iteration: 127378
loss: 1.4737846851348877,grad_norm: 0.9999997584652711, iteration: 127379
loss: 1.6207195520401,grad_norm: 0.9999998572034459, iteration: 127380
loss: 1.1051141023635864,grad_norm: 0.9999995878002486, iteration: 127381
loss: 1.265446662902832,grad_norm: 1.0000000185225226, iteration: 127382
loss: 1.4622944593429565,grad_norm: 0.9999996773644044, iteration: 127383
loss: 1.2888849973678589,grad_norm: 1.0000000148095811, iteration: 127384
loss: 1.2997063398361206,grad_norm: 0.9999999329386379, iteration: 127385
loss: 1.3222264051437378,grad_norm: 0.9999998814701965, iteration: 127386
loss: 1.5586490631103516,grad_norm: 0.9999999656738797, iteration: 127387
loss: 1.229750394821167,grad_norm: 0.9999993462703178, iteration: 127388
loss: 1.5230354070663452,grad_norm: 0.9999997972947433, iteration: 127389
loss: 1.608140468597412,grad_norm: 0.9999998811909285, iteration: 127390
loss: 1.3361872434616089,grad_norm: 0.9999999201361611, iteration: 127391
loss: 1.3635551929473877,grad_norm: 0.9999997540752645, iteration: 127392
loss: 1.47173273563385,grad_norm: 0.9999998894641765, iteration: 127393
loss: 1.3786927461624146,grad_norm: 0.9999998701846675, iteration: 127394
loss: 1.4612669944763184,grad_norm: 0.9999998033795501, iteration: 127395
loss: 1.533868432044983,grad_norm: 0.9999998936471066, iteration: 127396
loss: 1.7056466341018677,grad_norm: 0.9999998796008113, iteration: 127397
loss: 1.311441421508789,grad_norm: 0.9999999591797979, iteration: 127398
loss: 1.50254487991333,grad_norm: 0.9999998700744515, iteration: 127399
loss: 1.5496008396148682,grad_norm: 0.9999998848590334, iteration: 127400
loss: 1.5159419775009155,grad_norm: 0.9999998474550839, iteration: 127401
loss: 1.3829666376113892,grad_norm: 0.999999852000318, iteration: 127402
loss: 1.3669904470443726,grad_norm: 0.9999997007481751, iteration: 127403
loss: 1.2476937770843506,grad_norm: 0.9999999364244199, iteration: 127404
loss: 1.4356263875961304,grad_norm: 0.9999999044091967, iteration: 127405
loss: 1.4575296640396118,grad_norm: 0.9999998239649285, iteration: 127406
loss: 1.2822248935699463,grad_norm: 0.9999996962668941, iteration: 127407
loss: 1.5927056074142456,grad_norm: 0.9999999346092984, iteration: 127408
loss: 1.3233224153518677,grad_norm: 0.9999997505710835, iteration: 127409
loss: 1.3957492113113403,grad_norm: 0.9999999646203391, iteration: 127410
loss: 1.3284807205200195,grad_norm: 0.9999998784242468, iteration: 127411
loss: 1.665724277496338,grad_norm: 0.9999998740101029, iteration: 127412
loss: 1.4135875701904297,grad_norm: 0.9999999719127804, iteration: 127413
loss: 1.5295242071151733,grad_norm: 0.9999999705746248, iteration: 127414
loss: 1.3327267169952393,grad_norm: 0.9999998349870729, iteration: 127415
loss: 1.3037177324295044,grad_norm: 0.9999997327379906, iteration: 127416
loss: 1.3025580644607544,grad_norm: 0.9999999732444859, iteration: 127417
loss: 1.5705798864364624,grad_norm: 0.9999999494020465, iteration: 127418
loss: 1.734956979751587,grad_norm: 0.9999997127682828, iteration: 127419
loss: 1.465116024017334,grad_norm: 0.9999995663510078, iteration: 127420
loss: 1.4958409070968628,grad_norm: 0.9999997098416078, iteration: 127421
loss: 1.3953866958618164,grad_norm: 0.999999842539778, iteration: 127422
loss: 1.7583941221237183,grad_norm: 0.9999999209211438, iteration: 127423
loss: 1.3089417219161987,grad_norm: 0.9999999006868472, iteration: 127424
loss: 1.3719438314437866,grad_norm: 0.9999998620736855, iteration: 127425
loss: 1.632598876953125,grad_norm: 0.999999854875067, iteration: 127426
loss: 1.0997744798660278,grad_norm: 0.999999824871741, iteration: 127427
loss: 1.3594787120819092,grad_norm: 0.9999999648815544, iteration: 127428
loss: 1.5471069812774658,grad_norm: 0.9999999172448291, iteration: 127429
loss: 1.344689130783081,grad_norm: 0.9999997986535446, iteration: 127430
loss: 1.4151722192764282,grad_norm: 0.9999997340782603, iteration: 127431
loss: 1.6927387714385986,grad_norm: 0.999999746640598, iteration: 127432
loss: 1.4813063144683838,grad_norm: 0.9999998657536174, iteration: 127433
loss: 1.520504355430603,grad_norm: 1.0000000520697399, iteration: 127434
loss: 1.4196895360946655,grad_norm: 0.9999998413213237, iteration: 127435
loss: 1.4591295719146729,grad_norm: 0.9999998270402086, iteration: 127436
loss: 1.524275302886963,grad_norm: 0.9999997031439256, iteration: 127437
loss: 1.9350910186767578,grad_norm: 0.9999998499639284, iteration: 127438
loss: 1.468153715133667,grad_norm: 0.9999999335667392, iteration: 127439
loss: 1.3133758306503296,grad_norm: 0.9999999514677709, iteration: 127440
loss: 1.5332863330841064,grad_norm: 0.9999998604703445, iteration: 127441
loss: 1.4012937545776367,grad_norm: 0.9999997341396606, iteration: 127442
loss: 1.5115071535110474,grad_norm: 0.999999943205794, iteration: 127443
loss: 1.6080207824707031,grad_norm: 0.9999997933626765, iteration: 127444
loss: 1.3688523769378662,grad_norm: 0.9999997079596676, iteration: 127445
loss: 1.5523165464401245,grad_norm: 1.0000000078491111, iteration: 127446
loss: 1.1964832544326782,grad_norm: 0.9999998726845403, iteration: 127447
loss: 1.400952696800232,grad_norm: 0.9999997979878773, iteration: 127448
loss: 1.2106884717941284,grad_norm: 0.9999995503423014, iteration: 127449
loss: 1.167514443397522,grad_norm: 0.9999998304315794, iteration: 127450
loss: 1.2885557413101196,grad_norm: 0.9999998598940739, iteration: 127451
loss: 1.0834875106811523,grad_norm: 0.9999994453529916, iteration: 127452
loss: 1.3364654779434204,grad_norm: 0.9999994469828848, iteration: 127453
loss: 1.1658053398132324,grad_norm: 0.9999998781656264, iteration: 127454
loss: 1.1670970916748047,grad_norm: 0.9999997312249024, iteration: 127455
loss: 1.0886348485946655,grad_norm: 0.9999996415551797, iteration: 127456
loss: 1.290313959121704,grad_norm: 0.9999999283444437, iteration: 127457
loss: 1.4185503721237183,grad_norm: 0.9999997557720008, iteration: 127458
loss: 1.1267383098602295,grad_norm: 0.9999997215042008, iteration: 127459
loss: 1.1714974641799927,grad_norm: 0.9999998239390717, iteration: 127460
loss: 1.2944031953811646,grad_norm: 0.9999998422970587, iteration: 127461
loss: 1.5473912954330444,grad_norm: 0.9999998834439797, iteration: 127462
loss: 1.676442265510559,grad_norm: 0.9999996468193396, iteration: 127463
loss: 1.5574146509170532,grad_norm: 0.9999999713246451, iteration: 127464
loss: 1.6929230690002441,grad_norm: 1.0000000089883923, iteration: 127465
loss: 1.5388096570968628,grad_norm: 1.0000000347833073, iteration: 127466
loss: 1.2722450494766235,grad_norm: 0.9999998335232222, iteration: 127467
loss: 1.299293875694275,grad_norm: 0.9999993041006375, iteration: 127468
loss: 1.4050421714782715,grad_norm: 0.9999999296695375, iteration: 127469
loss: 1.0789499282836914,grad_norm: 0.9999997541103621, iteration: 127470
loss: 1.6856375932693481,grad_norm: 0.9999996834451435, iteration: 127471
loss: 1.1471962928771973,grad_norm: 1.0000000144161567, iteration: 127472
loss: 1.50385582447052,grad_norm: 0.9999996999672144, iteration: 127473
loss: 1.5901304483413696,grad_norm: 0.9999998197642042, iteration: 127474
loss: 1.4671529531478882,grad_norm: 0.9999999298679052, iteration: 127475
loss: 1.5379467010498047,grad_norm: 0.9999997209695454, iteration: 127476
loss: 1.551831603050232,grad_norm: 0.9999996253476618, iteration: 127477
loss: 1.4736957550048828,grad_norm: 0.9999998723908883, iteration: 127478
loss: 1.694050669670105,grad_norm: 0.9999999444421056, iteration: 127479
loss: 1.6517728567123413,grad_norm: 1.0000000110883305, iteration: 127480
loss: 1.4234533309936523,grad_norm: 0.9999999843267465, iteration: 127481
loss: 1.3679875135421753,grad_norm: 0.9999995961642002, iteration: 127482
loss: 1.7666503190994263,grad_norm: 0.9999999265436555, iteration: 127483
loss: 1.3672653436660767,grad_norm: 0.9999998602582494, iteration: 127484
loss: 1.2992727756500244,grad_norm: 0.9999999576610445, iteration: 127485
loss: 1.2333592176437378,grad_norm: 0.9999998975302666, iteration: 127486
loss: 1.495114803314209,grad_norm: 0.9999997821311657, iteration: 127487
loss: 1.6249456405639648,grad_norm: 0.9999997834971447, iteration: 127488
loss: 2.1837692260742188,grad_norm: 0.9999998892192276, iteration: 127489
loss: 1.5651651620864868,grad_norm: 0.9999997540484429, iteration: 127490
loss: 1.4054235219955444,grad_norm: 0.9999999477006889, iteration: 127491
loss: 1.178703784942627,grad_norm: 0.9999998879122588, iteration: 127492
loss: 1.1583389043807983,grad_norm: 0.9999998680533203, iteration: 127493
loss: 1.4009325504302979,grad_norm: 0.9999999334598119, iteration: 127494
loss: 1.3245517015457153,grad_norm: 0.9999999674068195, iteration: 127495
loss: 1.3024036884307861,grad_norm: 0.9999998662969876, iteration: 127496
loss: 1.105607271194458,grad_norm: 0.9999997131203059, iteration: 127497
loss: 1.3294841051101685,grad_norm: 0.9999998772225048, iteration: 127498
loss: 1.1403971910476685,grad_norm: 0.9999998165576471, iteration: 127499
loss: 1.5921530723571777,grad_norm: 1.000000014511413, iteration: 127500
loss: 1.168196678161621,grad_norm: 0.9999994422634446, iteration: 127501
loss: 1.3645316362380981,grad_norm: 0.9999998022572836, iteration: 127502
loss: 1.1065983772277832,grad_norm: 1.000000034809008, iteration: 127503
loss: 1.1928943395614624,grad_norm: 0.9999996017903181, iteration: 127504
loss: 1.4246455430984497,grad_norm: 0.9999997577654712, iteration: 127505
loss: 1.3557003736495972,grad_norm: 0.9999997504394975, iteration: 127506
loss: 1.2534986734390259,grad_norm: 0.9999997262984868, iteration: 127507
loss: 1.4786040782928467,grad_norm: 0.9999997245678384, iteration: 127508
loss: 1.4989943504333496,grad_norm: 0.9999998483677721, iteration: 127509
loss: 1.0708324909210205,grad_norm: 0.9999996544961317, iteration: 127510
loss: 1.4855479001998901,grad_norm: 0.99999985686251, iteration: 127511
loss: 1.5530439615249634,grad_norm: 0.9999999619206218, iteration: 127512
loss: 1.3601313829421997,grad_norm: 1.0000000035282672, iteration: 127513
loss: 1.3602027893066406,grad_norm: 0.9999999509696759, iteration: 127514
loss: 1.5117436647415161,grad_norm: 1.0000000070414556, iteration: 127515
loss: 1.3782600164413452,grad_norm: 0.9999998322862463, iteration: 127516
loss: 1.7925726175308228,grad_norm: 0.99999980225502, iteration: 127517
loss: 1.3653032779693604,grad_norm: 0.9999998956817263, iteration: 127518
loss: 1.248377799987793,grad_norm: 0.9999995699463201, iteration: 127519
loss: 1.3062543869018555,grad_norm: 1.0000000388401664, iteration: 127520
loss: 1.0450568199157715,grad_norm: 0.9999998423295458, iteration: 127521
loss: 1.2871301174163818,grad_norm: 0.9999996693461971, iteration: 127522
loss: 1.3425391912460327,grad_norm: 1.000000054394262, iteration: 127523
loss: 1.0607352256774902,grad_norm: 0.9999992349470465, iteration: 127524
loss: 1.2028385400772095,grad_norm: 0.9999995865895447, iteration: 127525
loss: 1.270467758178711,grad_norm: 0.9999998912070469, iteration: 127526
loss: 1.3933720588684082,grad_norm: 0.999999978395554, iteration: 127527
loss: 1.332296371459961,grad_norm: 0.9999995228582995, iteration: 127528
loss: 1.2037996053695679,grad_norm: 0.9999995367489857, iteration: 127529
loss: 1.5602171421051025,grad_norm: 0.9999997994232617, iteration: 127530
loss: 1.215266466140747,grad_norm: 0.9999998373290864, iteration: 127531
loss: 1.214775562286377,grad_norm: 0.9999999342115664, iteration: 127532
loss: 1.160579800605774,grad_norm: 0.9999997752517875, iteration: 127533
loss: 1.522786259651184,grad_norm: 0.9999999377252914, iteration: 127534
loss: 1.0499929189682007,grad_norm: 0.9999993524657343, iteration: 127535
loss: 1.5455316305160522,grad_norm: 0.9999999897640518, iteration: 127536
loss: 1.2369165420532227,grad_norm: 0.9999998320609841, iteration: 127537
loss: 1.2405472993850708,grad_norm: 0.9999996589442544, iteration: 127538
loss: 1.2856696844100952,grad_norm: 0.9999996807621184, iteration: 127539
loss: 1.4566709995269775,grad_norm: 1.000000024149859, iteration: 127540
loss: 1.2781296968460083,grad_norm: 0.9999998387273359, iteration: 127541
loss: 1.2276047468185425,grad_norm: 0.9999998128758534, iteration: 127542
loss: 1.2900948524475098,grad_norm: 0.9999999362668804, iteration: 127543
loss: 1.4391738176345825,grad_norm: 0.9999998759122335, iteration: 127544
loss: 1.0009479522705078,grad_norm: 0.9999992547257261, iteration: 127545
loss: 1.0390031337738037,grad_norm: 1.000000057613725, iteration: 127546
loss: 1.5833038091659546,grad_norm: 0.9999999336535705, iteration: 127547
loss: 1.4162089824676514,grad_norm: 0.9999993965944844, iteration: 127548
loss: 1.3160816431045532,grad_norm: 0.9999997448598174, iteration: 127549
loss: 1.25648832321167,grad_norm: 0.999999621348036, iteration: 127550
loss: 1.4769972562789917,grad_norm: 0.9999998330868615, iteration: 127551
loss: 1.3267037868499756,grad_norm: 0.9999998789768076, iteration: 127552
loss: 1.0576751232147217,grad_norm: 0.9999991504923598, iteration: 127553
loss: 1.4467576742172241,grad_norm: 0.9999998708497825, iteration: 127554
loss: 1.3421021699905396,grad_norm: 0.9999996559305958, iteration: 127555
loss: 1.387803077697754,grad_norm: 0.9999999165689218, iteration: 127556
loss: 1.3443180322647095,grad_norm: 0.9999997765827546, iteration: 127557
loss: 1.2742127180099487,grad_norm: 1.0000000283823536, iteration: 127558
loss: 1.2801698446273804,grad_norm: 0.999999748375279, iteration: 127559
loss: 1.2887457609176636,grad_norm: 0.9999997159363221, iteration: 127560
loss: 1.3408640623092651,grad_norm: 0.9999995993239289, iteration: 127561
loss: 1.3596282005310059,grad_norm: 0.9999999184068419, iteration: 127562
loss: 1.105357050895691,grad_norm: 0.9999995288946384, iteration: 127563
loss: 1.5657892227172852,grad_norm: 0.9999999204995295, iteration: 127564
loss: 1.336684226989746,grad_norm: 1.0000000214155442, iteration: 127565
loss: 1.1631830930709839,grad_norm: 0.9999998098366315, iteration: 127566
loss: 1.2115179300308228,grad_norm: 0.9999997498959953, iteration: 127567
loss: 1.2744033336639404,grad_norm: 0.9999997717742963, iteration: 127568
loss: 1.3613892793655396,grad_norm: 0.9999994496881889, iteration: 127569
loss: 1.1844617128372192,grad_norm: 0.99999963550356, iteration: 127570
loss: 1.4000626802444458,grad_norm: 0.9999995855600802, iteration: 127571
loss: 1.4531365633010864,grad_norm: 0.9999997439326174, iteration: 127572
loss: 1.3399091958999634,grad_norm: 0.9999999632354771, iteration: 127573
loss: 1.3982138633728027,grad_norm: 0.9999998603364217, iteration: 127574
loss: 1.4003962278366089,grad_norm: 0.9999995290534944, iteration: 127575
loss: 1.4709312915802002,grad_norm: 0.9999999004353449, iteration: 127576
loss: 1.3813519477844238,grad_norm: 0.9999995661370908, iteration: 127577
loss: 1.3359220027923584,grad_norm: 0.9999999054516894, iteration: 127578
loss: 1.4282467365264893,grad_norm: 0.9999998579597822, iteration: 127579
loss: 1.1347143650054932,grad_norm: 0.9999998387770531, iteration: 127580
loss: 1.78917396068573,grad_norm: 1.0000000262003017, iteration: 127581
loss: 1.2183797359466553,grad_norm: 0.999999935403745, iteration: 127582
loss: 1.4027962684631348,grad_norm: 0.999999928084383, iteration: 127583
loss: 1.2501776218414307,grad_norm: 0.9999994424277783, iteration: 127584
loss: 1.7638624906539917,grad_norm: 0.9999998366010714, iteration: 127585
loss: 1.3152689933776855,grad_norm: 0.9999995672073356, iteration: 127586
loss: 1.4885445833206177,grad_norm: 0.9999998510948176, iteration: 127587
loss: 1.1993598937988281,grad_norm: 0.9999999479662135, iteration: 127588
loss: 1.4682552814483643,grad_norm: 1.0000000215526417, iteration: 127589
loss: 1.4659841060638428,grad_norm: 0.9999998872765034, iteration: 127590
loss: 1.5801293849945068,grad_norm: 0.9999996962247351, iteration: 127591
loss: 1.3073484897613525,grad_norm: 0.9999998721583355, iteration: 127592
loss: 1.2408173084259033,grad_norm: 0.9999997496410535, iteration: 127593
loss: 1.2196176052093506,grad_norm: 0.999999436870868, iteration: 127594
loss: 1.3942829370498657,grad_norm: 0.9999999254830232, iteration: 127595
loss: 1.4165544509887695,grad_norm: 1.000000064431893, iteration: 127596
loss: 1.2928305864334106,grad_norm: 0.9999999659497367, iteration: 127597
loss: 1.422144889831543,grad_norm: 0.9999998610149382, iteration: 127598
loss: 1.3940116167068481,grad_norm: 0.9999996054500581, iteration: 127599
loss: 1.2892999649047852,grad_norm: 0.9999997506354791, iteration: 127600
loss: 1.466091513633728,grad_norm: 0.9999995955774688, iteration: 127601
loss: 1.455092430114746,grad_norm: 0.999999715423435, iteration: 127602
loss: 1.3513542413711548,grad_norm: 0.9999999200135989, iteration: 127603
loss: 1.3494549989700317,grad_norm: 0.9999999688211978, iteration: 127604
loss: 1.6974669694900513,grad_norm: 0.9999999341206233, iteration: 127605
loss: 1.2361005544662476,grad_norm: 0.9999998893817598, iteration: 127606
loss: 1.3206349611282349,grad_norm: 0.9999999836581259, iteration: 127607
loss: 1.5693864822387695,grad_norm: 0.9999998343208758, iteration: 127608
loss: 1.4673430919647217,grad_norm: 0.9999998531751471, iteration: 127609
loss: 1.2708762884140015,grad_norm: 0.9999999222183429, iteration: 127610
loss: 1.1204248666763306,grad_norm: 0.9999998396075594, iteration: 127611
loss: 1.1072912216186523,grad_norm: 0.9999992644613502, iteration: 127612
loss: 1.539427638053894,grad_norm: 0.9999999747508709, iteration: 127613
loss: 1.378696084022522,grad_norm: 0.9999998411525185, iteration: 127614
loss: 1.3684219121932983,grad_norm: 0.9999997940695142, iteration: 127615
loss: 1.6620732545852661,grad_norm: 0.9999998797914802, iteration: 127616
loss: 1.4155582189559937,grad_norm: 0.9999996948684498, iteration: 127617
loss: 1.2840642929077148,grad_norm: 0.9999996446957391, iteration: 127618
loss: 1.4574116468429565,grad_norm: 0.9999999743810186, iteration: 127619
loss: 1.3564609289169312,grad_norm: 0.999999750221103, iteration: 127620
loss: 1.4413648843765259,grad_norm: 0.9999998884988763, iteration: 127621
loss: 1.1576006412506104,grad_norm: 0.9999999830423375, iteration: 127622
loss: 1.2358003854751587,grad_norm: 0.9999998219097719, iteration: 127623
loss: 1.1545501947402954,grad_norm: 0.9999997835616082, iteration: 127624
loss: 1.7432730197906494,grad_norm: 0.9999998276339096, iteration: 127625
loss: 1.5674537420272827,grad_norm: 0.9999999037387645, iteration: 127626
loss: 1.415698528289795,grad_norm: 0.9999998615450262, iteration: 127627
loss: 1.4013420343399048,grad_norm: 0.9999999651374052, iteration: 127628
loss: 1.0743749141693115,grad_norm: 0.9999995819725279, iteration: 127629
loss: 1.261198878288269,grad_norm: 0.9999995399367303, iteration: 127630
loss: 1.3088890314102173,grad_norm: 0.999999651982183, iteration: 127631
loss: 1.4504727125167847,grad_norm: 0.999999893327517, iteration: 127632
loss: 1.3664132356643677,grad_norm: 0.9999998966152258, iteration: 127633
loss: 1.6650664806365967,grad_norm: 0.9999999018043769, iteration: 127634
loss: 1.3318368196487427,grad_norm: 0.9999999896158197, iteration: 127635
loss: 1.5827407836914062,grad_norm: 0.9999998406974883, iteration: 127636
loss: 1.5354007482528687,grad_norm: 0.999999840947732, iteration: 127637
loss: 1.4384160041809082,grad_norm: 0.9999999973128857, iteration: 127638
loss: 1.6418473720550537,grad_norm: 0.999999956294385, iteration: 127639
loss: 1.5460292100906372,grad_norm: 1.0000000169713497, iteration: 127640
loss: 1.5580488443374634,grad_norm: 0.9999998041510233, iteration: 127641
loss: 1.3194681406021118,grad_norm: 1.000000004608091, iteration: 127642
loss: 1.4411571025848389,grad_norm: 0.999999895621291, iteration: 127643
loss: 1.593605399131775,grad_norm: 0.9999999932049554, iteration: 127644
loss: 1.4987163543701172,grad_norm: 0.9999999465613543, iteration: 127645
loss: 1.155211329460144,grad_norm: 0.9999999996990775, iteration: 127646
loss: 1.5490388870239258,grad_norm: 1.000000007107174, iteration: 127647
loss: 1.6484562158584595,grad_norm: 0.9999999665755083, iteration: 127648
loss: 1.5023337602615356,grad_norm: 0.9999997960864666, iteration: 127649
loss: 1.802257776260376,grad_norm: 0.9999998852527173, iteration: 127650
loss: 1.5192902088165283,grad_norm: 0.999999750151147, iteration: 127651
loss: 1.3576257228851318,grad_norm: 0.9999998789359925, iteration: 127652
loss: 1.6336581707000732,grad_norm: 0.9999998667678573, iteration: 127653
loss: 1.4538346529006958,grad_norm: 0.9999999072038132, iteration: 127654
loss: 1.9345237016677856,grad_norm: 1.0000000399196622, iteration: 127655
loss: 1.4566571712493896,grad_norm: 0.9999997729392212, iteration: 127656
loss: 1.3418278694152832,grad_norm: 0.9999997283660226, iteration: 127657
loss: 1.457279086112976,grad_norm: 0.9999997415614222, iteration: 127658
loss: 1.3328381776809692,grad_norm: 1.0000000326403569, iteration: 127659
loss: 1.8520582914352417,grad_norm: 1.000000019898572, iteration: 127660
loss: 1.2568790912628174,grad_norm: 0.9999996470410114, iteration: 127661
loss: 1.5740374326705933,grad_norm: 0.9999998593528688, iteration: 127662
loss: 1.4542016983032227,grad_norm: 0.9999999510148273, iteration: 127663
loss: 1.4711353778839111,grad_norm: 0.9999999390407276, iteration: 127664
loss: 1.3520652055740356,grad_norm: 0.9999999682744921, iteration: 127665
loss: 1.6376590728759766,grad_norm: 0.999999804825499, iteration: 127666
loss: 1.5038809776306152,grad_norm: 0.9999999607732364, iteration: 127667
loss: 1.5867429971694946,grad_norm: 0.9999998125536683, iteration: 127668
loss: 1.2958353757858276,grad_norm: 0.9999997452028899, iteration: 127669
loss: 1.3768283128738403,grad_norm: 0.9999997673364893, iteration: 127670
loss: 1.3176766633987427,grad_norm: 0.999999929832144, iteration: 127671
loss: 1.4701180458068848,grad_norm: 0.9999998426381108, iteration: 127672
loss: 1.4924912452697754,grad_norm: 0.9999998689039876, iteration: 127673
loss: 1.3749871253967285,grad_norm: 0.9999995402157665, iteration: 127674
loss: 1.525519609451294,grad_norm: 0.9999999656618698, iteration: 127675
loss: 1.2158857583999634,grad_norm: 1.000000070608318, iteration: 127676
loss: 1.5819154977798462,grad_norm: 0.9999998502137144, iteration: 127677
loss: 1.3870652914047241,grad_norm: 0.9999995197010434, iteration: 127678
loss: 1.241621494293213,grad_norm: 0.9999999528776844, iteration: 127679
loss: 1.4938318729400635,grad_norm: 1.000000062131925, iteration: 127680
loss: 1.1617047786712646,grad_norm: 0.9999999171879078, iteration: 127681
loss: 1.6119818687438965,grad_norm: 1.000000062813862, iteration: 127682
loss: 1.6007145643234253,grad_norm: 0.9999998098735601, iteration: 127683
loss: 1.3303430080413818,grad_norm: 0.9999997658744353, iteration: 127684
loss: 1.3786622285842896,grad_norm: 0.9999999227940398, iteration: 127685
loss: 1.3574728965759277,grad_norm: 0.9999994214268987, iteration: 127686
loss: 1.2601497173309326,grad_norm: 0.9999995451576968, iteration: 127687
loss: 1.356536865234375,grad_norm: 0.9999999104705164, iteration: 127688
loss: 1.3926842212677002,grad_norm: 0.9999997356131385, iteration: 127689
loss: 1.3866283893585205,grad_norm: 0.9999997833308321, iteration: 127690
loss: 1.6782925128936768,grad_norm: 1.0000000185847346, iteration: 127691
loss: 1.3203619718551636,grad_norm: 0.9999999265517344, iteration: 127692
loss: 1.526777744293213,grad_norm: 0.9999997045546325, iteration: 127693
loss: 1.1221556663513184,grad_norm: 0.9999994366559789, iteration: 127694
loss: 1.318988561630249,grad_norm: 0.9999997985196021, iteration: 127695
loss: 1.580579161643982,grad_norm: 0.9999997558698429, iteration: 127696
loss: 1.4061471223831177,grad_norm: 0.9999999419437153, iteration: 127697
loss: 1.504006266593933,grad_norm: 0.9999998351721329, iteration: 127698
loss: 1.35696542263031,grad_norm: 0.9999999939423725, iteration: 127699
loss: 1.3339778184890747,grad_norm: 0.9999999183326471, iteration: 127700
loss: 1.4821319580078125,grad_norm: 0.9999998641555041, iteration: 127701
loss: 1.4445291757583618,grad_norm: 0.9999999037919908, iteration: 127702
loss: 1.2756634950637817,grad_norm: 0.9999996553587358, iteration: 127703
loss: 1.6519273519515991,grad_norm: 0.9999999023600966, iteration: 127704
loss: 1.7024855613708496,grad_norm: 0.9999998073556863, iteration: 127705
loss: 1.458566665649414,grad_norm: 0.9999996120560125, iteration: 127706
loss: 1.474507451057434,grad_norm: 0.9999997675743101, iteration: 127707
loss: 1.466325044631958,grad_norm: 0.9999998315604126, iteration: 127708
loss: 1.2848421335220337,grad_norm: 0.9999995317809725, iteration: 127709
loss: 1.4420263767242432,grad_norm: 0.9999998484715521, iteration: 127710
loss: 1.1742490530014038,grad_norm: 0.9999998905546388, iteration: 127711
loss: 1.3094439506530762,grad_norm: 0.9999998549011068, iteration: 127712
loss: 1.55631685256958,grad_norm: 0.9999999280648948, iteration: 127713
loss: 1.5844180583953857,grad_norm: 0.9999998603237664, iteration: 127714
loss: 1.3008652925491333,grad_norm: 0.999999554257619, iteration: 127715
loss: 1.5143988132476807,grad_norm: 0.999999956688043, iteration: 127716
loss: 1.3158034086227417,grad_norm: 0.9999997405500357, iteration: 127717
loss: 1.436314582824707,grad_norm: 0.9999999471911335, iteration: 127718
loss: 1.3759386539459229,grad_norm: 0.9999995934962491, iteration: 127719
loss: 1.1739169359207153,grad_norm: 0.9999997826886078, iteration: 127720
loss: 1.5220941305160522,grad_norm: 1.0000000503289619, iteration: 127721
loss: 1.2364163398742676,grad_norm: 0.9999996916608741, iteration: 127722
loss: 1.594556212425232,grad_norm: 1.0000000257808868, iteration: 127723
loss: 1.7601280212402344,grad_norm: 0.9999998581116727, iteration: 127724
loss: 1.6382806301116943,grad_norm: 0.9999999907053412, iteration: 127725
loss: 1.490608811378479,grad_norm: 0.9999999717486513, iteration: 127726
loss: 1.741510272026062,grad_norm: 0.9999998830459894, iteration: 127727
loss: 1.6031545400619507,grad_norm: 0.9999999673332559, iteration: 127728
loss: 1.3754500150680542,grad_norm: 1.0000000089946524, iteration: 127729
loss: 1.5780586004257202,grad_norm: 0.9999998452625162, iteration: 127730
loss: 1.384339690208435,grad_norm: 0.9999998776823193, iteration: 127731
loss: 1.312785267829895,grad_norm: 0.9999999080082419, iteration: 127732
loss: 1.372644305229187,grad_norm: 0.9999998775971362, iteration: 127733
loss: 1.4169129133224487,grad_norm: 0.9999998295541825, iteration: 127734
loss: 1.3847055435180664,grad_norm: 0.9999998526579894, iteration: 127735
loss: 1.4829351902008057,grad_norm: 0.999999864106916, iteration: 127736
loss: 1.5581737756729126,grad_norm: 0.9999998982804392, iteration: 127737
loss: 1.4048360586166382,grad_norm: 0.9999997986308042, iteration: 127738
loss: 1.5635079145431519,grad_norm: 0.9999999537328492, iteration: 127739
loss: 1.3444868326187134,grad_norm: 1.0000000017366406, iteration: 127740
loss: 1.8622386455535889,grad_norm: 0.9999999870658131, iteration: 127741
loss: 1.4721689224243164,grad_norm: 0.9999998868221421, iteration: 127742
loss: 1.59565007686615,grad_norm: 0.9999999379810193, iteration: 127743
loss: 1.7779593467712402,grad_norm: 1.0000000378962293, iteration: 127744
loss: 1.6344305276870728,grad_norm: 0.9999999470805961, iteration: 127745
loss: 1.5457359552383423,grad_norm: 0.9999999915348831, iteration: 127746
loss: 1.6888319253921509,grad_norm: 0.9999999442679746, iteration: 127747
loss: 1.4860024452209473,grad_norm: 0.9999999640223828, iteration: 127748
loss: 1.252774715423584,grad_norm: 0.9999997415320759, iteration: 127749
loss: 1.5203261375427246,grad_norm: 0.9999998574068121, iteration: 127750
loss: 1.7698560953140259,grad_norm: 0.9999999341554188, iteration: 127751
loss: 2.0661606788635254,grad_norm: 1.0000000196780223, iteration: 127752
loss: 1.4986591339111328,grad_norm: 0.9999998143398656, iteration: 127753
loss: 1.483913540840149,grad_norm: 1.0000000335930392, iteration: 127754
loss: 1.5012028217315674,grad_norm: 0.9999998083245836, iteration: 127755
loss: 1.4398274421691895,grad_norm: 0.9999997908143542, iteration: 127756
loss: 1.56681489944458,grad_norm: 0.999999850920978, iteration: 127757
loss: 1.5163592100143433,grad_norm: 1.000000042521392, iteration: 127758
loss: 1.5118261575698853,grad_norm: 0.9999999381947338, iteration: 127759
loss: 1.3976819515228271,grad_norm: 0.9999998818716458, iteration: 127760
loss: 1.5722864866256714,grad_norm: 0.9999999983016182, iteration: 127761
loss: 2.112109422683716,grad_norm: 1.0000000884201001, iteration: 127762
loss: 1.475059151649475,grad_norm: 0.9999998652165386, iteration: 127763
loss: 1.6959186792373657,grad_norm: 0.9999999124436677, iteration: 127764
loss: 1.6530306339263916,grad_norm: 0.999999812159825, iteration: 127765
loss: 1.276686191558838,grad_norm: 0.9999994053215344, iteration: 127766
loss: 1.5671757459640503,grad_norm: 0.9999999838095088, iteration: 127767
loss: 1.4575245380401611,grad_norm: 0.9999993311868459, iteration: 127768
loss: 1.4178056716918945,grad_norm: 0.9999999238351938, iteration: 127769
loss: 1.3675223588943481,grad_norm: 0.9999999844822466, iteration: 127770
loss: 1.4540880918502808,grad_norm: 0.9999998190758097, iteration: 127771
loss: 1.3838496208190918,grad_norm: 0.9999997805633322, iteration: 127772
loss: 1.2386327981948853,grad_norm: 0.9999999589185878, iteration: 127773
loss: 1.461546540260315,grad_norm: 0.9999999836199535, iteration: 127774
loss: 1.4830665588378906,grad_norm: 0.9999999427906996, iteration: 127775
loss: 1.5300480127334595,grad_norm: 0.9999999770677029, iteration: 127776
loss: 1.3909778594970703,grad_norm: 0.9999998671481778, iteration: 127777
loss: 1.3505561351776123,grad_norm: 0.9999997301956841, iteration: 127778
loss: 1.6064579486846924,grad_norm: 0.9999999008936945, iteration: 127779
loss: 1.4428796768188477,grad_norm: 0.9999998702564349, iteration: 127780
loss: 1.3408665657043457,grad_norm: 0.9999997792853922, iteration: 127781
loss: 1.203303575515747,grad_norm: 0.9999997135191971, iteration: 127782
loss: 1.4898687601089478,grad_norm: 0.9999996728048648, iteration: 127783
loss: 1.2029746770858765,grad_norm: 0.9999996504049332, iteration: 127784
loss: 1.2669603824615479,grad_norm: 0.9999999389933666, iteration: 127785
loss: 1.496989369392395,grad_norm: 0.9999997026256554, iteration: 127786
loss: 1.2839869260787964,grad_norm: 0.9999998508918289, iteration: 127787
loss: 1.7203394174575806,grad_norm: 0.9999999549561327, iteration: 127788
loss: 1.6956813335418701,grad_norm: 0.999999986008796, iteration: 127789
loss: 1.3049328327178955,grad_norm: 0.9999995827371155, iteration: 127790
loss: 1.5626448392868042,grad_norm: 0.9999999334203575, iteration: 127791
loss: 1.3225524425506592,grad_norm: 0.9999996307810559, iteration: 127792
loss: 1.3555607795715332,grad_norm: 0.9999997030550789, iteration: 127793
loss: 1.3401650190353394,grad_norm: 1.0000000744109725, iteration: 127794
loss: 1.3269422054290771,grad_norm: 0.9999997847292952, iteration: 127795
loss: 1.1405287981033325,grad_norm: 1.0000000790827208, iteration: 127796
loss: 1.2441128492355347,grad_norm: 0.9999995795696436, iteration: 127797
loss: 1.3317822217941284,grad_norm: 0.9999998085505107, iteration: 127798
loss: 1.686160922050476,grad_norm: 0.9999999050269436, iteration: 127799
loss: 1.1525957584381104,grad_norm: 0.9999992108507009, iteration: 127800
loss: 1.4083322286605835,grad_norm: 0.9999998361077871, iteration: 127801
loss: 1.3784911632537842,grad_norm: 0.9999998546347997, iteration: 127802
loss: 1.2043439149856567,grad_norm: 0.9999999112415375, iteration: 127803
loss: 1.5636416673660278,grad_norm: 0.999999641458781, iteration: 127804
loss: 1.3359158039093018,grad_norm: 0.9999999133787938, iteration: 127805
loss: 1.0571258068084717,grad_norm: 0.9999994627866384, iteration: 127806
loss: 1.436415433883667,grad_norm: 0.999999913606718, iteration: 127807
loss: 1.7052090167999268,grad_norm: 0.9999999795326092, iteration: 127808
loss: 1.6794148683547974,grad_norm: 0.9999999180353574, iteration: 127809
loss: 1.4115456342697144,grad_norm: 0.9999999020864805, iteration: 127810
loss: 1.3336589336395264,grad_norm: 0.9999998872297364, iteration: 127811
loss: 1.449652075767517,grad_norm: 0.9999998287435087, iteration: 127812
loss: 1.2765437364578247,grad_norm: 0.999999859881407, iteration: 127813
loss: 1.629006266593933,grad_norm: 0.9999998937613098, iteration: 127814
loss: 1.1979268789291382,grad_norm: 0.9999993517266422, iteration: 127815
loss: 1.301055908203125,grad_norm: 0.9999998420510638, iteration: 127816
loss: 1.4306035041809082,grad_norm: 0.9999997866035534, iteration: 127817
loss: 1.4309660196304321,grad_norm: 0.9999998617935907, iteration: 127818
loss: 1.473029375076294,grad_norm: 0.9999998769486191, iteration: 127819
loss: 1.3468447923660278,grad_norm: 0.9999997918388757, iteration: 127820
loss: 1.34870183467865,grad_norm: 0.9999998978788016, iteration: 127821
loss: 1.2332566976547241,grad_norm: 0.9999996469785517, iteration: 127822
loss: 1.2814698219299316,grad_norm: 0.9999997823612715, iteration: 127823
loss: 1.381300926208496,grad_norm: 0.9999998153797955, iteration: 127824
loss: 1.2115660905838013,grad_norm: 0.999999838353973, iteration: 127825
loss: 1.2061299085617065,grad_norm: 1.0000000166732477, iteration: 127826
loss: 1.1986414194107056,grad_norm: 0.9999999265604942, iteration: 127827
loss: 1.2730164527893066,grad_norm: 0.9999997105776315, iteration: 127828
loss: 1.252289891242981,grad_norm: 0.9999996363581023, iteration: 127829
loss: 1.1583555936813354,grad_norm: 0.9999998977862754, iteration: 127830
loss: 1.269872784614563,grad_norm: 0.9999998855327932, iteration: 127831
loss: 1.206669807434082,grad_norm: 0.9999997022075706, iteration: 127832
loss: 1.1865878105163574,grad_norm: 0.9999999944648267, iteration: 127833
loss: 1.4226747751235962,grad_norm: 0.9999999593412665, iteration: 127834
loss: 1.4874571561813354,grad_norm: 0.9999999458097336, iteration: 127835
loss: 1.4077123403549194,grad_norm: 0.9999999499634873, iteration: 127836
loss: 1.4122856855392456,grad_norm: 0.9999998721947316, iteration: 127837
loss: 1.4069576263427734,grad_norm: 0.9999998784644137, iteration: 127838
loss: 1.3241595029830933,grad_norm: 0.9999998127468659, iteration: 127839
loss: 1.3704934120178223,grad_norm: 0.9999999044996218, iteration: 127840
loss: 1.468997836112976,grad_norm: 0.9999999201600732, iteration: 127841
loss: 1.099223017692566,grad_norm: 1.0000000189282214, iteration: 127842
loss: 1.1357070207595825,grad_norm: 0.9999999353235648, iteration: 127843
loss: 1.3988442420959473,grad_norm: 0.99999988150668, iteration: 127844
loss: 1.1956382989883423,grad_norm: 0.9999999702660979, iteration: 127845
loss: 1.2709970474243164,grad_norm: 0.9999999200651476, iteration: 127846
loss: 1.6894632577896118,grad_norm: 1.0000000522964938, iteration: 127847
loss: 1.1422311067581177,grad_norm: 0.9999999117144518, iteration: 127848
loss: 1.2557992935180664,grad_norm: 0.9999999672339225, iteration: 127849
loss: 1.3864787817001343,grad_norm: 0.9999999657792531, iteration: 127850
loss: 1.2122260332107544,grad_norm: 0.9999998118033069, iteration: 127851
loss: 1.1540940999984741,grad_norm: 0.9999998316910681, iteration: 127852
loss: 1.1127538681030273,grad_norm: 0.9999997148530771, iteration: 127853
loss: 1.1979718208312988,grad_norm: 0.9999999092610247, iteration: 127854
loss: 1.1818287372589111,grad_norm: 0.9999997026783064, iteration: 127855
loss: 1.378111481666565,grad_norm: 0.9999999720841958, iteration: 127856
loss: 1.4814201593399048,grad_norm: 0.9999998528866995, iteration: 127857
loss: 1.1937358379364014,grad_norm: 0.9999999934705529, iteration: 127858
loss: 1.2199088335037231,grad_norm: 0.9999995585846256, iteration: 127859
loss: 1.0189049243927002,grad_norm: 0.9999996010455928, iteration: 127860
loss: 1.1189271211624146,grad_norm: 0.9999998397527157, iteration: 127861
loss: 1.1662344932556152,grad_norm: 0.9999996907550918, iteration: 127862
loss: 1.0773683786392212,grad_norm: 0.9999999942008787, iteration: 127863
loss: 1.2360517978668213,grad_norm: 0.9999998444271552, iteration: 127864
loss: 1.170773983001709,grad_norm: 0.999999540603142, iteration: 127865
loss: 1.185328483581543,grad_norm: 1.0000000047333384, iteration: 127866
loss: 1.3340888023376465,grad_norm: 1.0000000694899123, iteration: 127867
loss: 1.2192411422729492,grad_norm: 0.9999994835876078, iteration: 127868
loss: 1.2441596984863281,grad_norm: 1.0000000084467973, iteration: 127869
loss: 1.3181923627853394,grad_norm: 0.9999998643410292, iteration: 127870
loss: 1.1441640853881836,grad_norm: 0.9999999396923477, iteration: 127871
loss: 1.3148547410964966,grad_norm: 1.0000000205429576, iteration: 127872
loss: 1.1643633842468262,grad_norm: 0.9999999873081333, iteration: 127873
loss: 1.3507391214370728,grad_norm: 0.9999995805265436, iteration: 127874
loss: 1.3587945699691772,grad_norm: 0.9999999378940393, iteration: 127875
loss: 1.1790220737457275,grad_norm: 0.9999998685931261, iteration: 127876
loss: 1.0630693435668945,grad_norm: 0.999999798619403, iteration: 127877
loss: 1.2687171697616577,grad_norm: 1.0000000721332054, iteration: 127878
loss: 1.3072853088378906,grad_norm: 0.9999999684341525, iteration: 127879
loss: 1.3087759017944336,grad_norm: 1.0000000665101734, iteration: 127880
loss: 1.3228799104690552,grad_norm: 0.9999998860264298, iteration: 127881
loss: 1.0762710571289062,grad_norm: 0.999999875620977, iteration: 127882
loss: 1.206869125366211,grad_norm: 0.9999999855708619, iteration: 127883
loss: 1.1329642534255981,grad_norm: 0.9999991298356607, iteration: 127884
loss: 1.2156769037246704,grad_norm: 0.999999871219544, iteration: 127885
loss: 1.097335934638977,grad_norm: 0.9999998489414635, iteration: 127886
loss: 1.1488651037216187,grad_norm: 0.9999998879196569, iteration: 127887
loss: 1.1788074970245361,grad_norm: 0.9999997653443029, iteration: 127888
loss: 1.127421259880066,grad_norm: 0.9999998868937906, iteration: 127889
loss: 1.2178562879562378,grad_norm: 0.9999996988953468, iteration: 127890
loss: 1.3178260326385498,grad_norm: 0.9999999613730888, iteration: 127891
loss: 1.4146126508712769,grad_norm: 0.9999999029989466, iteration: 127892
loss: 1.3500698804855347,grad_norm: 0.9999998601051764, iteration: 127893
loss: 1.3170976638793945,grad_norm: 0.9999999646652937, iteration: 127894
loss: 1.2334134578704834,grad_norm: 0.9999997382462981, iteration: 127895
loss: 1.1657730340957642,grad_norm: 0.9999999287679093, iteration: 127896
loss: 1.2394756078720093,grad_norm: 0.9999993692861545, iteration: 127897
loss: 1.5812020301818848,grad_norm: 0.9999998029073962, iteration: 127898
loss: 1.2011361122131348,grad_norm: 0.9999999921441906, iteration: 127899
loss: 1.2404539585113525,grad_norm: 0.9999993842778777, iteration: 127900
loss: 1.1316652297973633,grad_norm: 0.9999999621447608, iteration: 127901
loss: 1.2607144117355347,grad_norm: 0.9999998642537892, iteration: 127902
loss: 1.3233786821365356,grad_norm: 0.9999997651740308, iteration: 127903
loss: 1.4903475046157837,grad_norm: 0.9999999591026277, iteration: 127904
loss: 1.0295448303222656,grad_norm: 0.9999990314504115, iteration: 127905
loss: 1.0844515562057495,grad_norm: 0.999999885823938, iteration: 127906
loss: 1.304837942123413,grad_norm: 0.9999998705890853, iteration: 127907
loss: 1.3115304708480835,grad_norm: 0.9999999019783153, iteration: 127908
loss: 1.1633204221725464,grad_norm: 0.9999998507526415, iteration: 127909
loss: 1.305120825767517,grad_norm: 0.9999998850548486, iteration: 127910
loss: 1.2535827159881592,grad_norm: 0.9999999314374194, iteration: 127911
loss: 1.1324689388275146,grad_norm: 0.9999996852594618, iteration: 127912
loss: 1.3461328744888306,grad_norm: 0.9999998680678353, iteration: 127913
loss: 1.0640263557434082,grad_norm: 0.9999999549337364, iteration: 127914
loss: 1.3552595376968384,grad_norm: 0.9999998974962946, iteration: 127915
loss: 1.0937542915344238,grad_norm: 0.9999997728819453, iteration: 127916
loss: 1.2241065502166748,grad_norm: 0.9999999203040713, iteration: 127917
loss: 1.1514379978179932,grad_norm: 0.999999769072038, iteration: 127918
loss: 1.1284722089767456,grad_norm: 0.9999997654320308, iteration: 127919
loss: 1.4385292530059814,grad_norm: 0.9999998724108339, iteration: 127920
loss: 1.0406404733657837,grad_norm: 0.999999486208353, iteration: 127921
loss: 1.1027085781097412,grad_norm: 0.9999995609605337, iteration: 127922
loss: 1.2618739604949951,grad_norm: 0.9999996678188429, iteration: 127923
loss: 1.2981219291687012,grad_norm: 0.9999998068012214, iteration: 127924
loss: 1.2116246223449707,grad_norm: 0.9999998227483359, iteration: 127925
loss: 1.2027056217193604,grad_norm: 0.9999999316178291, iteration: 127926
loss: 1.119321346282959,grad_norm: 0.9999999028630313, iteration: 127927
loss: 1.044816255569458,grad_norm: 0.9999996529119209, iteration: 127928
loss: 1.1359919309616089,grad_norm: 1.000000029621909, iteration: 127929
loss: 1.2162985801696777,grad_norm: 0.9999996798467722, iteration: 127930
loss: 1.3667089939117432,grad_norm: 0.9999999726948643, iteration: 127931
loss: 1.1639479398727417,grad_norm: 0.9999999878964897, iteration: 127932
loss: 1.311525821685791,grad_norm: 0.999999849075316, iteration: 127933
loss: 1.155895471572876,grad_norm: 0.9999998320411894, iteration: 127934
loss: 1.314289927482605,grad_norm: 0.9999996827251406, iteration: 127935
loss: 1.1298484802246094,grad_norm: 0.9999999578946568, iteration: 127936
loss: 1.2904659509658813,grad_norm: 1.0000000429587956, iteration: 127937
loss: 1.3609669208526611,grad_norm: 0.9999999164796267, iteration: 127938
loss: 1.13799250125885,grad_norm: 0.9999997714557834, iteration: 127939
loss: 1.437410831451416,grad_norm: 0.999999898031225, iteration: 127940
loss: 1.1884998083114624,grad_norm: 0.9999994940274022, iteration: 127941
loss: 1.1575500965118408,grad_norm: 0.9999999813488177, iteration: 127942
loss: 1.14517080783844,grad_norm: 0.9999997661092753, iteration: 127943
loss: 1.3117190599441528,grad_norm: 0.9999998985423604, iteration: 127944
loss: 1.3749443292617798,grad_norm: 0.9999999392321366, iteration: 127945
loss: 1.2249937057495117,grad_norm: 0.9999998279384468, iteration: 127946
loss: 1.3878778219223022,grad_norm: 0.9999999093642312, iteration: 127947
loss: 1.3780109882354736,grad_norm: 0.9999998842599854, iteration: 127948
loss: 1.3318451642990112,grad_norm: 0.999999917923397, iteration: 127949
loss: 1.0905894041061401,grad_norm: 0.9999994065764911, iteration: 127950
loss: 1.2179008722305298,grad_norm: 0.999999976493603, iteration: 127951
loss: 1.2864882946014404,grad_norm: 0.9999998749511549, iteration: 127952
loss: 1.2208540439605713,grad_norm: 0.9999998944807652, iteration: 127953
loss: 1.1038647890090942,grad_norm: 0.9999995798336382, iteration: 127954
loss: 1.1840131282806396,grad_norm: 1.0000000599598937, iteration: 127955
loss: 1.0574342012405396,grad_norm: 0.9999996186233928, iteration: 127956
loss: 1.2990713119506836,grad_norm: 0.9999999340364073, iteration: 127957
loss: 1.154022216796875,grad_norm: 0.9999996595109737, iteration: 127958
loss: 1.368300199508667,grad_norm: 0.9999999315487901, iteration: 127959
loss: 1.3413504362106323,grad_norm: 0.9999999016123232, iteration: 127960
loss: 1.3672901391983032,grad_norm: 0.9999999222702322, iteration: 127961
loss: 1.511163592338562,grad_norm: 0.9999999167369491, iteration: 127962
loss: 1.4366596937179565,grad_norm: 0.9999998306463888, iteration: 127963
loss: 1.233888864517212,grad_norm: 0.9999999144152165, iteration: 127964
loss: 1.058103084564209,grad_norm: 0.9999997272627155, iteration: 127965
loss: 1.4634190797805786,grad_norm: 0.9999998916941, iteration: 127966
loss: 1.3756697177886963,grad_norm: 0.9999999765185181, iteration: 127967
loss: 1.0421500205993652,grad_norm: 0.9999998417428871, iteration: 127968
loss: 1.068376898765564,grad_norm: 0.9999997865478992, iteration: 127969
loss: 1.3088483810424805,grad_norm: 0.9999998797405258, iteration: 127970
loss: 1.6016249656677246,grad_norm: 0.9999999005971352, iteration: 127971
loss: 1.20301353931427,grad_norm: 0.9999998675118066, iteration: 127972
loss: 1.328942894935608,grad_norm: 0.9999998519140898, iteration: 127973
loss: 1.56729257106781,grad_norm: 1.0000000285500106, iteration: 127974
loss: 1.0596184730529785,grad_norm: 0.9999991350340467, iteration: 127975
loss: 1.148949384689331,grad_norm: 0.9999995478529632, iteration: 127976
loss: 1.2235801219940186,grad_norm: 0.999999952694081, iteration: 127977
loss: 1.3135108947753906,grad_norm: 1.0000000879283792, iteration: 127978
loss: 1.086418867111206,grad_norm: 0.9999997775174507, iteration: 127979
loss: 1.509636402130127,grad_norm: 0.9999999711812092, iteration: 127980
loss: 1.2355870008468628,grad_norm: 0.9999997035283891, iteration: 127981
loss: 1.162675142288208,grad_norm: 0.9999998825746331, iteration: 127982
loss: 1.3314541578292847,grad_norm: 0.9999999127904666, iteration: 127983
loss: 1.5270158052444458,grad_norm: 0.9999999303128106, iteration: 127984
loss: 1.2531074285507202,grad_norm: 0.9999999607701431, iteration: 127985
loss: 1.1120778322219849,grad_norm: 0.9999998300763349, iteration: 127986
loss: 1.2405085563659668,grad_norm: 0.9999996195746927, iteration: 127987
loss: 1.1705758571624756,grad_norm: 1.0000000422626734, iteration: 127988
loss: 1.1465649604797363,grad_norm: 0.9999998310166043, iteration: 127989
loss: 1.3007546663284302,grad_norm: 0.9999997708964443, iteration: 127990
loss: 1.3528491258621216,grad_norm: 0.9999997812153908, iteration: 127991
loss: 1.3328988552093506,grad_norm: 0.9999999628658681, iteration: 127992
loss: 1.261557936668396,grad_norm: 1.0000000842035899, iteration: 127993
loss: 1.3897147178649902,grad_norm: 0.9999999650612864, iteration: 127994
loss: 1.1054186820983887,grad_norm: 0.9999999167068144, iteration: 127995
loss: 1.2441400289535522,grad_norm: 0.9999993045056258, iteration: 127996
loss: 1.1177544593811035,grad_norm: 0.999999772700441, iteration: 127997
loss: 1.26815927028656,grad_norm: 0.9999999976294528, iteration: 127998
loss: 1.3103309869766235,grad_norm: 0.9999999021757248, iteration: 127999
loss: 1.2188912630081177,grad_norm: 0.9999999493926668, iteration: 128000
loss: 1.3529659509658813,grad_norm: 0.9999998860492353, iteration: 128001
loss: 1.3927862644195557,grad_norm: 0.9999996655886192, iteration: 128002
loss: 1.101341724395752,grad_norm: 0.9999999054547399, iteration: 128003
loss: 1.189393401145935,grad_norm: 0.9999999424032567, iteration: 128004
loss: 1.0330194234848022,grad_norm: 0.9999995553273419, iteration: 128005
loss: 1.4928247928619385,grad_norm: 1.0000000514455452, iteration: 128006
loss: 1.0867774486541748,grad_norm: 0.9999995650880803, iteration: 128007
loss: 1.0672965049743652,grad_norm: 0.9999995037214919, iteration: 128008
loss: 1.2793910503387451,grad_norm: 0.9999998531827953, iteration: 128009
loss: 1.1655243635177612,grad_norm: 1.0000000013332202, iteration: 128010
loss: 1.2068822383880615,grad_norm: 0.9999993008516098, iteration: 128011
loss: 0.9948471784591675,grad_norm: 0.9999993820898708, iteration: 128012
loss: 1.1746981143951416,grad_norm: 0.9999998645686122, iteration: 128013
loss: 1.1319345235824585,grad_norm: 0.9999999372438669, iteration: 128014
loss: 1.154640793800354,grad_norm: 0.9999996934541311, iteration: 128015
loss: 1.0943809747695923,grad_norm: 0.9999998918975251, iteration: 128016
loss: 1.2636561393737793,grad_norm: 0.9999998343819707, iteration: 128017
loss: 1.3536134958267212,grad_norm: 0.9999997951691387, iteration: 128018
loss: 1.2815970182418823,grad_norm: 0.9999999176931466, iteration: 128019
loss: 1.1466469764709473,grad_norm: 0.999999917753154, iteration: 128020
loss: 1.2657439708709717,grad_norm: 0.9999999783625702, iteration: 128021
loss: 1.0508352518081665,grad_norm: 0.9999995617530032, iteration: 128022
loss: 1.2408177852630615,grad_norm: 0.9999997734819194, iteration: 128023
loss: 1.1398152112960815,grad_norm: 0.999999871267643, iteration: 128024
loss: 1.4256023168563843,grad_norm: 0.9999998062459585, iteration: 128025
loss: 1.1925469636917114,grad_norm: 0.9999998493760974, iteration: 128026
loss: 1.3254709243774414,grad_norm: 1.0000000052411804, iteration: 128027
loss: 1.1621135473251343,grad_norm: 0.99999968180257, iteration: 128028
loss: 1.4308154582977295,grad_norm: 1.0000000415208166, iteration: 128029
loss: 1.0502967834472656,grad_norm: 0.9999999005434939, iteration: 128030
loss: 1.133535623550415,grad_norm: 0.9999999435055774, iteration: 128031
loss: 1.357975721359253,grad_norm: 0.999999868136788, iteration: 128032
loss: 1.332413911819458,grad_norm: 0.999999942635412, iteration: 128033
loss: 1.1971659660339355,grad_norm: 0.9999999233988078, iteration: 128034
loss: 1.172143816947937,grad_norm: 1.0000000246263805, iteration: 128035
loss: 1.196758508682251,grad_norm: 0.9999995362516717, iteration: 128036
loss: 1.0901188850402832,grad_norm: 0.9999996637815254, iteration: 128037
loss: 1.1936074495315552,grad_norm: 0.9999999144070304, iteration: 128038
loss: 1.323041319847107,grad_norm: 0.9999999903804361, iteration: 128039
loss: 1.0392661094665527,grad_norm: 0.9999991507352901, iteration: 128040
loss: 1.3405616283416748,grad_norm: 0.9999999100641527, iteration: 128041
loss: 1.1004307270050049,grad_norm: 0.9999991771411962, iteration: 128042
loss: 1.4966285228729248,grad_norm: 0.9999997964384513, iteration: 128043
loss: 1.1554958820343018,grad_norm: 0.9999997669485474, iteration: 128044
loss: 1.2798367738723755,grad_norm: 0.9999996748386426, iteration: 128045
loss: 1.115651249885559,grad_norm: 0.9999998591581828, iteration: 128046
loss: 1.0985606908798218,grad_norm: 0.9999993403717846, iteration: 128047
loss: 1.24122953414917,grad_norm: 1.0000000226026404, iteration: 128048
loss: 1.1177659034729004,grad_norm: 0.9999993349581061, iteration: 128049
loss: 1.4099081754684448,grad_norm: 0.9999999092486025, iteration: 128050
loss: 1.2228049039840698,grad_norm: 0.9999995648642305, iteration: 128051
loss: 1.0596026182174683,grad_norm: 0.9999996667464324, iteration: 128052
loss: 1.2565945386886597,grad_norm: 0.9999999703322645, iteration: 128053
loss: 1.1755870580673218,grad_norm: 0.9999997688241972, iteration: 128054
loss: 1.134394645690918,grad_norm: 0.9999999265440542, iteration: 128055
loss: 1.1799044609069824,grad_norm: 0.9999997478674182, iteration: 128056
loss: 1.5946526527404785,grad_norm: 0.9999999607755173, iteration: 128057
loss: 1.2187293767929077,grad_norm: 0.9999998366811311, iteration: 128058
loss: 1.1679627895355225,grad_norm: 0.9999999342695212, iteration: 128059
loss: 1.147133708000183,grad_norm: 0.99999998461482, iteration: 128060
loss: 1.3141865730285645,grad_norm: 0.9999999180738021, iteration: 128061
loss: 1.0167429447174072,grad_norm: 0.9999995884132625, iteration: 128062
loss: 1.16667640209198,grad_norm: 0.9999999073666583, iteration: 128063
loss: 1.1767786741256714,grad_norm: 0.9999997101071969, iteration: 128064
loss: 1.4523156881332397,grad_norm: 1.0000000754194012, iteration: 128065
loss: 1.0151125192642212,grad_norm: 0.9999996757798275, iteration: 128066
loss: 1.2774118185043335,grad_norm: 0.9999998046367168, iteration: 128067
loss: 1.0661453008651733,grad_norm: 0.9999993280035431, iteration: 128068
loss: 1.1839262247085571,grad_norm: 0.9999992896884959, iteration: 128069
loss: 1.1079943180084229,grad_norm: 0.9999998521460824, iteration: 128070
loss: 1.3181086778640747,grad_norm: 0.9999996126220169, iteration: 128071
loss: 1.3821516036987305,grad_norm: 0.9999999080696252, iteration: 128072
loss: 1.0990452766418457,grad_norm: 0.999999762273321, iteration: 128073
loss: 1.5236177444458008,grad_norm: 0.999999845564457, iteration: 128074
loss: 1.227065920829773,grad_norm: 0.9999996625159631, iteration: 128075
loss: 1.4749455451965332,grad_norm: 0.9999998005613825, iteration: 128076
loss: 0.9846136569976807,grad_norm: 0.9999991238671042, iteration: 128077
loss: 1.1101412773132324,grad_norm: 0.9999998441216029, iteration: 128078
loss: 1.086894154548645,grad_norm: 0.9999990897953778, iteration: 128079
loss: 1.2758413553237915,grad_norm: 0.9999996182311077, iteration: 128080
loss: 1.5163012742996216,grad_norm: 0.9999998482520851, iteration: 128081
loss: 1.1975725889205933,grad_norm: 1.0000000159984523, iteration: 128082
loss: 1.0308464765548706,grad_norm: 0.9368847744944644, iteration: 128083
loss: 1.0314606428146362,grad_norm: 0.9999991829652568, iteration: 128084
loss: 1.071747064590454,grad_norm: 0.9999998323466633, iteration: 128085
loss: 1.1231473684310913,grad_norm: 0.9999996288719818, iteration: 128086
loss: 1.8770896196365356,grad_norm: 0.9999997940200553, iteration: 128087
loss: 1.1036951541900635,grad_norm: 0.9999999155894157, iteration: 128088
loss: 1.1905986070632935,grad_norm: 0.99999988520939, iteration: 128089
loss: 1.2214301824569702,grad_norm: 0.9999998935658182, iteration: 128090
loss: 1.2068572044372559,grad_norm: 0.9999999102984419, iteration: 128091
loss: 0.992557942867279,grad_norm: 0.9999992127360545, iteration: 128092
loss: 1.1078529357910156,grad_norm: 0.9999998171112972, iteration: 128093
loss: 1.1229627132415771,grad_norm: 0.9999998062636533, iteration: 128094
loss: 1.1466659307479858,grad_norm: 0.9999998889233205, iteration: 128095
loss: 1.2516570091247559,grad_norm: 0.9999998125593345, iteration: 128096
loss: 1.1012033224105835,grad_norm: 0.9999994600035784, iteration: 128097
loss: 1.2289862632751465,grad_norm: 0.9999998006744245, iteration: 128098
loss: 1.0874484777450562,grad_norm: 0.9999999914282824, iteration: 128099
loss: 1.0993717908859253,grad_norm: 0.9999998307963892, iteration: 128100
loss: 1.1634459495544434,grad_norm: 0.9999992415858922, iteration: 128101
loss: 1.2645806074142456,grad_norm: 0.9999998934975569, iteration: 128102
loss: 1.0823456048965454,grad_norm: 0.999999794484721, iteration: 128103
loss: 1.2563050985336304,grad_norm: 0.9999998139908295, iteration: 128104
loss: 0.9963385462760925,grad_norm: 0.9999995398321483, iteration: 128105
loss: 1.278979778289795,grad_norm: 0.9999996454138764, iteration: 128106
loss: 1.1088753938674927,grad_norm: 0.9999998863717415, iteration: 128107
loss: 1.0875698328018188,grad_norm: 0.9999992890103959, iteration: 128108
loss: 1.294442057609558,grad_norm: 0.9999998638479428, iteration: 128109
loss: 1.244225025177002,grad_norm: 0.9999999122922014, iteration: 128110
loss: 1.0340933799743652,grad_norm: 0.9999999930792839, iteration: 128111
loss: 1.0738520622253418,grad_norm: 0.999999622655721, iteration: 128112
loss: 1.168118953704834,grad_norm: 0.999999936626956, iteration: 128113
loss: 0.9383226633071899,grad_norm: 0.8764834425270311, iteration: 128114
loss: 1.0915690660476685,grad_norm: 0.9999998177177737, iteration: 128115
loss: 1.0066502094268799,grad_norm: 0.7934442822821656, iteration: 128116
loss: 1.0633208751678467,grad_norm: 0.9999996327544066, iteration: 128117
loss: 1.0933717489242554,grad_norm: 0.9999999700160325, iteration: 128118
loss: 1.0747214555740356,grad_norm: 0.9999998292036915, iteration: 128119
loss: 1.0715155601501465,grad_norm: 0.9999998891799504, iteration: 128120
loss: 1.14188551902771,grad_norm: 0.9999994491154133, iteration: 128121
loss: 1.013874888420105,grad_norm: 0.8982524719740222, iteration: 128122
loss: 1.018730640411377,grad_norm: 0.9999999795670834, iteration: 128123
loss: 0.9761165976524353,grad_norm: 0.9999992301809884, iteration: 128124
loss: 1.0924072265625,grad_norm: 0.99999992216954, iteration: 128125
loss: 1.2321797609329224,grad_norm: 0.9999996916783898, iteration: 128126
loss: 1.0627741813659668,grad_norm: 0.9999999566174087, iteration: 128127
loss: 1.0712571144104004,grad_norm: 0.9999995237159773, iteration: 128128
loss: 1.011816143989563,grad_norm: 0.9999998699521051, iteration: 128129
loss: 1.1285088062286377,grad_norm: 0.999999921220541, iteration: 128130
loss: 1.1311453580856323,grad_norm: 0.9999998440361197, iteration: 128131
loss: 1.161978840827942,grad_norm: 0.9999998168970675, iteration: 128132
loss: 1.0453643798828125,grad_norm: 0.9999997850741629, iteration: 128133
loss: 1.0158412456512451,grad_norm: 0.9999992128943587, iteration: 128134
loss: 0.9777104258537292,grad_norm: 0.9999995257346496, iteration: 128135
loss: 0.9957054853439331,grad_norm: 0.9999993909590094, iteration: 128136
loss: 1.0456002950668335,grad_norm: 0.9999991567712715, iteration: 128137
loss: 1.0429378747940063,grad_norm: 0.9999993968212598, iteration: 128138
loss: 1.1492211818695068,grad_norm: 0.9999999013903069, iteration: 128139
loss: 1.0077035427093506,grad_norm: 0.9999998058637763, iteration: 128140
loss: 1.1720219850540161,grad_norm: 0.9999998068479183, iteration: 128141
loss: 1.093100905418396,grad_norm: 0.9999999149206548, iteration: 128142
loss: 1.1049377918243408,grad_norm: 0.9999998760584632, iteration: 128143
loss: 1.0631963014602661,grad_norm: 0.9999993541181204, iteration: 128144
loss: 1.2063318490982056,grad_norm: 0.9999999663830273, iteration: 128145
loss: 1.008470058441162,grad_norm: 0.9999991326826781, iteration: 128146
loss: 1.0784451961517334,grad_norm: 0.9999995519739048, iteration: 128147
loss: 1.112034559249878,grad_norm: 0.9999999226864517, iteration: 128148
loss: 0.9908279776573181,grad_norm: 0.9278878928712155, iteration: 128149
loss: 1.085207223892212,grad_norm: 0.9999995694977935, iteration: 128150
loss: 1.0822376012802124,grad_norm: 0.9999999822119601, iteration: 128151
loss: 1.0252430438995361,grad_norm: 0.9999992738469407, iteration: 128152
loss: 1.0610086917877197,grad_norm: 0.9999997877586165, iteration: 128153
loss: 1.1768125295639038,grad_norm: 0.9999996170608987, iteration: 128154
loss: 1.0875003337860107,grad_norm: 0.999999967390632, iteration: 128155
loss: 1.1611318588256836,grad_norm: 0.9999996500039774, iteration: 128156
loss: 1.043202519416809,grad_norm: 0.9999998674854951, iteration: 128157
loss: 0.9777691960334778,grad_norm: 0.7991802766473343, iteration: 128158
loss: 1.2205973863601685,grad_norm: 0.9999998183399009, iteration: 128159
loss: 1.1654906272888184,grad_norm: 0.9999995232328364, iteration: 128160
loss: 1.0215386152267456,grad_norm: 0.999999343059185, iteration: 128161
loss: 1.085410475730896,grad_norm: 0.9999999442517329, iteration: 128162
loss: 1.1536577939987183,grad_norm: 0.9999997588406389, iteration: 128163
loss: 1.1565757989883423,grad_norm: 0.9999998373961743, iteration: 128164
loss: 1.0291640758514404,grad_norm: 0.8246286847675708, iteration: 128165
loss: 0.9827089905738831,grad_norm: 0.9587541965719645, iteration: 128166
loss: 1.0163167715072632,grad_norm: 0.9999996552820991, iteration: 128167
loss: 1.035430669784546,grad_norm: 0.9999994778474983, iteration: 128168
loss: 1.0245611667633057,grad_norm: 0.9999997092875291, iteration: 128169
loss: 1.2718944549560547,grad_norm: 0.9999999123932671, iteration: 128170
loss: 1.072346568107605,grad_norm: 0.9570049996194894, iteration: 128171
loss: 1.0104453563690186,grad_norm: 0.9999996623309986, iteration: 128172
loss: 1.0295671224594116,grad_norm: 0.9999996368367458, iteration: 128173
loss: 0.9712135195732117,grad_norm: 0.9999989367496219, iteration: 128174
loss: 1.1167209148406982,grad_norm: 0.9999993174616247, iteration: 128175
loss: 0.9992422461509705,grad_norm: 0.8559242988282558, iteration: 128176
loss: 1.036493182182312,grad_norm: 0.9999996137749538, iteration: 128177
loss: 1.0815839767456055,grad_norm: 0.9999996743495654, iteration: 128178
loss: 1.1185131072998047,grad_norm: 0.9999999693552802, iteration: 128179
loss: 1.234026551246643,grad_norm: 0.9999998875640081, iteration: 128180
loss: 1.0046461820602417,grad_norm: 0.9999995167949799, iteration: 128181
loss: 1.073164701461792,grad_norm: 0.9999991555490936, iteration: 128182
loss: 1.0005533695220947,grad_norm: 0.8329311343984475, iteration: 128183
loss: 1.0145255327224731,grad_norm: 0.9999994126334991, iteration: 128184
loss: 1.070512056350708,grad_norm: 0.9999993158485825, iteration: 128185
loss: 1.034651279449463,grad_norm: 0.9999993476958774, iteration: 128186
loss: 1.2728960514068604,grad_norm: 0.9999997250949461, iteration: 128187
loss: 1.0325396060943604,grad_norm: 0.9999994476486384, iteration: 128188
loss: 1.0086336135864258,grad_norm: 0.9999999395084459, iteration: 128189
loss: 1.0508242845535278,grad_norm: 0.8143989099783574, iteration: 128190
loss: 1.015731692314148,grad_norm: 0.9999996249524526, iteration: 128191
loss: 1.0063636302947998,grad_norm: 0.9999991659605215, iteration: 128192
loss: 1.0045522451400757,grad_norm: 0.8210685497324276, iteration: 128193
loss: 1.1815431118011475,grad_norm: 0.9999995710932189, iteration: 128194
loss: 1.0550537109375,grad_norm: 0.9999997911774229, iteration: 128195
loss: 1.0613832473754883,grad_norm: 0.9040639481956616, iteration: 128196
loss: 1.0139813423156738,grad_norm: 0.9999997733323697, iteration: 128197
loss: 1.1545944213867188,grad_norm: 0.9999996958971923, iteration: 128198
loss: 0.9868258833885193,grad_norm: 0.9999991039044573, iteration: 128199
loss: 1.0546300411224365,grad_norm: 0.9999994806493855, iteration: 128200
loss: 1.0340782403945923,grad_norm: 0.9999997943533939, iteration: 128201
loss: 0.9762380123138428,grad_norm: 0.8951364442535122, iteration: 128202
loss: 0.9930205345153809,grad_norm: 0.9999990804765132, iteration: 128203
loss: 1.119174599647522,grad_norm: 0.9999999494628098, iteration: 128204
loss: 1.1527286767959595,grad_norm: 0.9999989922861682, iteration: 128205
loss: 1.1729631423950195,grad_norm: 0.999999464371339, iteration: 128206
loss: 1.0378345251083374,grad_norm: 0.9999996397125492, iteration: 128207
loss: 1.3886512517929077,grad_norm: 0.9999995110297742, iteration: 128208
loss: 0.9990345239639282,grad_norm: 0.9999991380687634, iteration: 128209
loss: 1.134313702583313,grad_norm: 0.999999593239022, iteration: 128210
loss: 0.9971486926078796,grad_norm: 0.9999995432156416, iteration: 128211
loss: 1.213692307472229,grad_norm: 0.999999863895042, iteration: 128212
loss: 1.197210669517517,grad_norm: 0.999999649598259, iteration: 128213
loss: 0.9805328249931335,grad_norm: 0.9971149022849363, iteration: 128214
loss: 1.0523573160171509,grad_norm: 0.9999996340258127, iteration: 128215
loss: 1.091352939605713,grad_norm: 0.9999998087797437, iteration: 128216
loss: 1.0866631269454956,grad_norm: 0.9999995187261985, iteration: 128217
loss: 1.1095296144485474,grad_norm: 0.9999991727237114, iteration: 128218
loss: 1.1297714710235596,grad_norm: 0.9999999166990795, iteration: 128219
loss: 1.0504953861236572,grad_norm: 0.999999235366352, iteration: 128220
loss: 1.0061893463134766,grad_norm: 1.0000000087009298, iteration: 128221
loss: 1.0084360837936401,grad_norm: 0.9516131956939351, iteration: 128222
loss: 1.0612660646438599,grad_norm: 0.9999998438868763, iteration: 128223
loss: 1.0237305164337158,grad_norm: 0.9999999441271672, iteration: 128224
loss: 1.0623283386230469,grad_norm: 0.999999929391401, iteration: 128225
loss: 1.1157784461975098,grad_norm: 0.9999998491973691, iteration: 128226
loss: 1.1591320037841797,grad_norm: 0.9999996909989414, iteration: 128227
loss: 1.0732438564300537,grad_norm: 0.999999492192414, iteration: 128228
loss: 0.9995138049125671,grad_norm: 0.9256331207913482, iteration: 128229
loss: 0.9851679801940918,grad_norm: 0.9999997552717288, iteration: 128230
loss: 1.023943543434143,grad_norm: 0.9999995622278678, iteration: 128231
loss: 1.0223006010055542,grad_norm: 0.9999997840148526, iteration: 128232
loss: 1.0894397497177124,grad_norm: 0.999999961669464, iteration: 128233
loss: 1.16501784324646,grad_norm: 0.9999995905987554, iteration: 128234
loss: 1.045398235321045,grad_norm: 0.9999995410819833, iteration: 128235
loss: 1.092860460281372,grad_norm: 0.9999993937301881, iteration: 128236
loss: 1.2413933277130127,grad_norm: 0.9999998292638469, iteration: 128237
loss: 1.203871250152588,grad_norm: 0.9999997168143485, iteration: 128238
loss: 1.132584571838379,grad_norm: 0.9999997860664955, iteration: 128239
loss: 1.1217635869979858,grad_norm: 0.9999998841952731, iteration: 128240
loss: 1.0305832624435425,grad_norm: 0.9999990652261845, iteration: 128241
loss: 1.0113714933395386,grad_norm: 0.9999992149564667, iteration: 128242
loss: 0.9434332847595215,grad_norm: 0.9999991366815719, iteration: 128243
loss: 1.0123471021652222,grad_norm: 0.9999998120135991, iteration: 128244
loss: 1.0151381492614746,grad_norm: 0.9999991014054872, iteration: 128245
loss: 1.1302083730697632,grad_norm: 0.9999995769217593, iteration: 128246
loss: 1.0389113426208496,grad_norm: 0.9999991929366837, iteration: 128247
loss: 1.0422587394714355,grad_norm: 0.9999997857842152, iteration: 128248
loss: 1.0160964727401733,grad_norm: 0.9324670180767625, iteration: 128249
loss: 1.1425665616989136,grad_norm: 0.9999997597878977, iteration: 128250
loss: 1.3084290027618408,grad_norm: 0.999999749596168, iteration: 128251
loss: 1.040663480758667,grad_norm: 0.9999997347223321, iteration: 128252
loss: 1.1355555057525635,grad_norm: 0.9999992047226285, iteration: 128253
loss: 1.1112087965011597,grad_norm: 0.9999999912501184, iteration: 128254
loss: 1.036656379699707,grad_norm: 0.9999991048976996, iteration: 128255
loss: 1.061479926109314,grad_norm: 0.999999924866301, iteration: 128256
loss: 1.159960150718689,grad_norm: 0.999999891217082, iteration: 128257
loss: 1.028652548789978,grad_norm: 0.9999994242274021, iteration: 128258
loss: 1.0941810607910156,grad_norm: 0.999999865893944, iteration: 128259
loss: 1.0467134714126587,grad_norm: 0.9999995892361241, iteration: 128260
loss: 1.159095287322998,grad_norm: 0.9999998367664859, iteration: 128261
loss: 1.2282121181488037,grad_norm: 0.9999998372426462, iteration: 128262
loss: 0.9751076698303223,grad_norm: 0.9999999695729355, iteration: 128263
loss: 1.2448163032531738,grad_norm: 0.9999995405954527, iteration: 128264
loss: 1.0395301580429077,grad_norm: 0.8189944767549777, iteration: 128265
loss: 1.060434103012085,grad_norm: 0.9999994705210002, iteration: 128266
loss: 1.0738914012908936,grad_norm: 0.9999996455175206, iteration: 128267
loss: 1.0896090269088745,grad_norm: 0.9999997034630332, iteration: 128268
loss: 1.0417864322662354,grad_norm: 0.9999989915407408, iteration: 128269
loss: 1.0381627082824707,grad_norm: 0.9999993898979977, iteration: 128270
loss: 1.0579677820205688,grad_norm: 0.999999349357277, iteration: 128271
loss: 1.0161943435668945,grad_norm: 0.9999998299928529, iteration: 128272
loss: 1.0827404260635376,grad_norm: 0.9999999492443122, iteration: 128273
loss: 1.0416040420532227,grad_norm: 0.9999991298724329, iteration: 128274
loss: 0.9792765974998474,grad_norm: 0.9173831919734657, iteration: 128275
loss: 0.986012876033783,grad_norm: 0.9308062362923772, iteration: 128276
loss: 1.1089026927947998,grad_norm: 0.9999996691831402, iteration: 128277
loss: 1.2606892585754395,grad_norm: 0.999999342233903, iteration: 128278
loss: 1.1139341592788696,grad_norm: 0.999999944449903, iteration: 128279
loss: 1.2440515756607056,grad_norm: 1.0000000097577615, iteration: 128280
loss: 1.0539460182189941,grad_norm: 0.9999994833690853, iteration: 128281
loss: 1.2054545879364014,grad_norm: 0.9999999239744307, iteration: 128282
loss: 1.0517468452453613,grad_norm: 0.999999759127666, iteration: 128283
loss: 1.1638238430023193,grad_norm: 0.9999999380795462, iteration: 128284
loss: 1.139752745628357,grad_norm: 0.9999993757078296, iteration: 128285
loss: 1.0631543397903442,grad_norm: 0.9999996118496531, iteration: 128286
loss: 1.1111918687820435,grad_norm: 0.9999996816333195, iteration: 128287
loss: 0.9650377631187439,grad_norm: 0.9999998588651704, iteration: 128288
loss: 1.0934536457061768,grad_norm: 0.9999995333351128, iteration: 128289
loss: 1.143892526626587,grad_norm: 0.9999999055475414, iteration: 128290
loss: 1.1570183038711548,grad_norm: 0.9999995446153518, iteration: 128291
loss: 1.122255802154541,grad_norm: 0.9999992651079903, iteration: 128292
loss: 1.1355438232421875,grad_norm: 0.9999992784150666, iteration: 128293
loss: 1.1043697595596313,grad_norm: 0.9999998829325559, iteration: 128294
loss: 1.0910569429397583,grad_norm: 0.9999995009555981, iteration: 128295
loss: 1.0800209045410156,grad_norm: 0.9999994245077038, iteration: 128296
loss: 1.0303994417190552,grad_norm: 0.9999995706495319, iteration: 128297
loss: 1.0076098442077637,grad_norm: 0.9650440494378391, iteration: 128298
loss: 1.060486912727356,grad_norm: 0.9999998512326116, iteration: 128299
loss: 1.1207994222640991,grad_norm: 0.999999434526022, iteration: 128300
loss: 1.023249864578247,grad_norm: 0.9999991241093744, iteration: 128301
loss: 1.134623408317566,grad_norm: 0.999999635798321, iteration: 128302
loss: 1.0199824571609497,grad_norm: 0.9999999160323148, iteration: 128303
loss: 1.07901132106781,grad_norm: 0.9999999047638207, iteration: 128304
loss: 1.0886417627334595,grad_norm: 0.9999998354683853, iteration: 128305
loss: 1.0670268535614014,grad_norm: 0.9999992233973803, iteration: 128306
loss: 1.0590465068817139,grad_norm: 0.9999997136673485, iteration: 128307
loss: 1.0316039323806763,grad_norm: 0.7818104470047958, iteration: 128308
loss: 1.1210172176361084,grad_norm: 0.999999622693963, iteration: 128309
loss: 1.127434492111206,grad_norm: 0.9999998722123037, iteration: 128310
loss: 1.07035493850708,grad_norm: 0.9688613500111266, iteration: 128311
loss: 1.105916976928711,grad_norm: 0.9999999488864655, iteration: 128312
loss: 1.0577583312988281,grad_norm: 0.8763316139768728, iteration: 128313
loss: 1.024972677230835,grad_norm: 0.9999996378752031, iteration: 128314
loss: 1.0816665887832642,grad_norm: 0.9999991044653358, iteration: 128315
loss: 1.0063538551330566,grad_norm: 0.8271380075276001, iteration: 128316
loss: 1.0547337532043457,grad_norm: 0.9999993103908263, iteration: 128317
loss: 1.019331693649292,grad_norm: 0.999999157207787, iteration: 128318
loss: 1.1097248792648315,grad_norm: 0.9999995984474044, iteration: 128319
loss: 1.2271740436553955,grad_norm: 0.9999999188891363, iteration: 128320
loss: 1.0449192523956299,grad_norm: 0.9999998534635642, iteration: 128321
loss: 1.035078763961792,grad_norm: 0.7710408272016269, iteration: 128322
loss: 1.0078717470169067,grad_norm: 0.9245810779791059, iteration: 128323
loss: 1.0540668964385986,grad_norm: 0.9999991938614858, iteration: 128324
loss: 1.036578893661499,grad_norm: 0.9999993622559757, iteration: 128325
loss: 1.126573085784912,grad_norm: 0.9999999938430832, iteration: 128326
loss: 1.0504094362258911,grad_norm: 0.9999995371540757, iteration: 128327
loss: 1.0155574083328247,grad_norm: 0.9999999055198546, iteration: 128328
loss: 1.252789855003357,grad_norm: 0.9999995447376665, iteration: 128329
loss: 1.0536574125289917,grad_norm: 0.9807965733746548, iteration: 128330
loss: 1.0319980382919312,grad_norm: 0.9999995543311632, iteration: 128331
loss: 1.0427510738372803,grad_norm: 0.9999990694974784, iteration: 128332
loss: 0.9763075709342957,grad_norm: 0.8892727007451787, iteration: 128333
loss: 1.0245031118392944,grad_norm: 0.9999999758740646, iteration: 128334
loss: 1.107596516609192,grad_norm: 0.9999998921847562, iteration: 128335
loss: 1.004167079925537,grad_norm: 0.9999994333264426, iteration: 128336
loss: 1.0875905752182007,grad_norm: 0.9999999185453816, iteration: 128337
loss: 1.011479139328003,grad_norm: 0.9999993702405441, iteration: 128338
loss: 1.0440601110458374,grad_norm: 0.9999996453506022, iteration: 128339
loss: 1.1409786939620972,grad_norm: 0.9999998436467604, iteration: 128340
loss: 1.0375969409942627,grad_norm: 0.9999990428143517, iteration: 128341
loss: 1.1312775611877441,grad_norm: 0.9999994140836271, iteration: 128342
loss: 1.024674415588379,grad_norm: 0.9999994795199406, iteration: 128343
loss: 1.051596999168396,grad_norm: 0.9999995047636772, iteration: 128344
loss: 1.0461682081222534,grad_norm: 0.9999991120577605, iteration: 128345
loss: 1.1085591316223145,grad_norm: 0.9999995558192853, iteration: 128346
loss: 1.0355618000030518,grad_norm: 0.9999991067522908, iteration: 128347
loss: 1.1395912170410156,grad_norm: 0.9999995088200343, iteration: 128348
loss: 1.4209814071655273,grad_norm: 0.9999998366246076, iteration: 128349
loss: 1.0367157459259033,grad_norm: 0.9999993213888693, iteration: 128350
loss: 1.172274112701416,grad_norm: 0.9999992766238586, iteration: 128351
loss: 1.0316293239593506,grad_norm: 0.9999991417744568, iteration: 128352
loss: 1.0462111234664917,grad_norm: 0.9999994542846037, iteration: 128353
loss: 1.004353642463684,grad_norm: 0.9116280888253, iteration: 128354
loss: 0.9944784045219421,grad_norm: 0.999999370373356, iteration: 128355
loss: 1.028023600578308,grad_norm: 0.9999994484190663, iteration: 128356
loss: 1.0312339067459106,grad_norm: 0.9999991493852536, iteration: 128357
loss: 1.0096495151519775,grad_norm: 0.8957720093743837, iteration: 128358
loss: 1.0211613178253174,grad_norm: 0.993810241377754, iteration: 128359
loss: 1.0604737997055054,grad_norm: 0.999999658286428, iteration: 128360
loss: 1.0397822856903076,grad_norm: 0.9999989963101035, iteration: 128361
loss: 1.0073646306991577,grad_norm: 0.9999999618184316, iteration: 128362
loss: 1.0384620428085327,grad_norm: 0.9999994326808868, iteration: 128363
loss: 1.0303391218185425,grad_norm: 0.9244183736041046, iteration: 128364
loss: 0.9979613423347473,grad_norm: 0.9999994837978837, iteration: 128365
loss: 1.0173754692077637,grad_norm: 0.9999997752334794, iteration: 128366
loss: 1.10587739944458,grad_norm: 0.9999989635918483, iteration: 128367
loss: 1.045039176940918,grad_norm: 0.999999160684434, iteration: 128368
loss: 1.0940518379211426,grad_norm: 0.9999996838507311, iteration: 128369
loss: 1.0738022327423096,grad_norm: 0.9999995758497947, iteration: 128370
loss: 1.1840016841888428,grad_norm: 0.9999996917272499, iteration: 128371
loss: 1.1047970056533813,grad_norm: 0.9999999540817284, iteration: 128372
loss: 1.0694714784622192,grad_norm: 0.9999998912124619, iteration: 128373
loss: 1.138810396194458,grad_norm: 0.9999996103737441, iteration: 128374
loss: 1.0159709453582764,grad_norm: 0.999999536735344, iteration: 128375
loss: 1.0821430683135986,grad_norm: 0.99999932467873, iteration: 128376
loss: 1.023868203163147,grad_norm: 0.9677211298210567, iteration: 128377
loss: 1.1043134927749634,grad_norm: 0.9999997920846562, iteration: 128378
loss: 1.419241189956665,grad_norm: 0.9999998048770214, iteration: 128379
loss: 1.0363175868988037,grad_norm: 0.9999993769714397, iteration: 128380
loss: 1.0964257717132568,grad_norm: 0.9999996120822973, iteration: 128381
loss: 1.0944162607192993,grad_norm: 0.9999992170854621, iteration: 128382
loss: 1.1276739835739136,grad_norm: 0.9999995305788846, iteration: 128383
loss: 1.2082409858703613,grad_norm: 0.9999995135953781, iteration: 128384
loss: 1.1953340768814087,grad_norm: 0.9999993863549942, iteration: 128385
loss: 1.008954405784607,grad_norm: 0.8734266714091512, iteration: 128386
loss: 1.0641907453536987,grad_norm: 0.9999998718889762, iteration: 128387
loss: 1.1256366968154907,grad_norm: 0.9999997307869691, iteration: 128388
loss: 1.041999340057373,grad_norm: 0.8758379894087706, iteration: 128389
loss: 1.0471934080123901,grad_norm: 0.9999995196406432, iteration: 128390
loss: 1.0746737718582153,grad_norm: 0.9999997868387925, iteration: 128391
loss: 1.0682165622711182,grad_norm: 0.9999994969378014, iteration: 128392
loss: 1.084578275680542,grad_norm: 0.99999963310666, iteration: 128393
loss: 1.064789056777954,grad_norm: 0.9999991475095694, iteration: 128394
loss: 1.1137592792510986,grad_norm: 0.9999998353779146, iteration: 128395
loss: 0.9905229806900024,grad_norm: 0.999999314082111, iteration: 128396
loss: 1.1245722770690918,grad_norm: 0.9999991601930482, iteration: 128397
loss: 1.243811011314392,grad_norm: 0.9999997212890531, iteration: 128398
loss: 1.3041048049926758,grad_norm: 0.9999994851582945, iteration: 128399
loss: 1.1127318143844604,grad_norm: 0.9999997248340682, iteration: 128400
loss: 1.0611543655395508,grad_norm: 0.9999991701239868, iteration: 128401
loss: 1.1428892612457275,grad_norm: 0.9999996913428694, iteration: 128402
loss: 1.0549685955047607,grad_norm: 0.9999994607971556, iteration: 128403
loss: 1.1395996809005737,grad_norm: 0.9999998715019995, iteration: 128404
loss: 1.0382319688796997,grad_norm: 0.9999995697717377, iteration: 128405
loss: 1.1090012788772583,grad_norm: 0.9999998409606484, iteration: 128406
loss: 1.0486570596694946,grad_norm: 0.9999994861213094, iteration: 128407
loss: 1.0848854780197144,grad_norm: 0.9999993618852414, iteration: 128408
loss: 1.1811461448669434,grad_norm: 0.9999999097096154, iteration: 128409
loss: 1.0314148664474487,grad_norm: 0.8155681198453654, iteration: 128410
loss: 1.0324859619140625,grad_norm: 0.9999996423079415, iteration: 128411
loss: 1.0703121423721313,grad_norm: 0.9999991066530481, iteration: 128412
loss: 1.0438215732574463,grad_norm: 0.9999994949805902, iteration: 128413
loss: 0.9798464179039001,grad_norm: 0.9069283999244655, iteration: 128414
loss: 1.0405086278915405,grad_norm: 0.9999994712528352, iteration: 128415
loss: 1.0473668575286865,grad_norm: 0.9999992576286605, iteration: 128416
loss: 1.0498251914978027,grad_norm: 0.9999995076170038, iteration: 128417
loss: 1.1054632663726807,grad_norm: 0.9999995900135327, iteration: 128418
loss: 1.1285507678985596,grad_norm: 1.000000048765764, iteration: 128419
loss: 1.0118426084518433,grad_norm: 0.9999990323222949, iteration: 128420
loss: 1.0459256172180176,grad_norm: 0.9999997675216848, iteration: 128421
loss: 1.0325733423233032,grad_norm: 0.9999997092343781, iteration: 128422
loss: 1.103964924812317,grad_norm: 0.9999998917276636, iteration: 128423
loss: 1.136491060256958,grad_norm: 0.9999999945738632, iteration: 128424
loss: 1.1041576862335205,grad_norm: 0.9999993067938908, iteration: 128425
loss: 1.1708738803863525,grad_norm: 0.9999996907602988, iteration: 128426
loss: 1.036483883857727,grad_norm: 0.8990774925876827, iteration: 128427
loss: 1.2699556350708008,grad_norm: 0.9999999543025977, iteration: 128428
loss: 1.0329453945159912,grad_norm: 0.999999004997412, iteration: 128429
loss: 1.077773094177246,grad_norm: 0.9999999050034957, iteration: 128430
loss: 1.173069953918457,grad_norm: 0.9999994380471428, iteration: 128431
loss: 1.0262302160263062,grad_norm: 0.8254651409489088, iteration: 128432
loss: 1.010037899017334,grad_norm: 0.7605781338975189, iteration: 128433
loss: 1.0238218307495117,grad_norm: 0.9999996668071668, iteration: 128434
loss: 1.077101707458496,grad_norm: 0.9999993235560483, iteration: 128435
loss: 0.9996228218078613,grad_norm: 0.999999112257936, iteration: 128436
loss: 1.0239683389663696,grad_norm: 0.9999999492630814, iteration: 128437
loss: 1.037460446357727,grad_norm: 0.9999997176161806, iteration: 128438
loss: 1.1591390371322632,grad_norm: 0.9999993812369267, iteration: 128439
loss: 1.1542677879333496,grad_norm: 0.9999999068785769, iteration: 128440
loss: 1.1861577033996582,grad_norm: 0.9999999270899252, iteration: 128441
loss: 1.0534284114837646,grad_norm: 1.000000025324062, iteration: 128442
loss: 1.0212479829788208,grad_norm: 0.9043728384633682, iteration: 128443
loss: 1.0851714611053467,grad_norm: 0.9999997334774001, iteration: 128444
loss: 1.099081039428711,grad_norm: 0.9999991698524641, iteration: 128445
loss: 1.083604097366333,grad_norm: 0.9999999003466661, iteration: 128446
loss: 1.013170599937439,grad_norm: 0.9375755979637838, iteration: 128447
loss: 1.1375128030776978,grad_norm: 0.9999996331378748, iteration: 128448
loss: 1.1019549369812012,grad_norm: 0.9999998653538118, iteration: 128449
loss: 1.053824543952942,grad_norm: 0.9828631786575137, iteration: 128450
loss: 0.9918504953384399,grad_norm: 0.9497033628004484, iteration: 128451
loss: 1.0673797130584717,grad_norm: 0.9999991948862756, iteration: 128452
loss: 1.051234245300293,grad_norm: 0.9999999726364149, iteration: 128453
loss: 1.0379667282104492,grad_norm: 0.9999993988896622, iteration: 128454
loss: 1.0384340286254883,grad_norm: 0.9999993212735208, iteration: 128455
loss: 1.1221590042114258,grad_norm: 0.999999401179185, iteration: 128456
loss: 1.0532307624816895,grad_norm: 0.9999991785449016, iteration: 128457
loss: 1.133408784866333,grad_norm: 0.9999997739024967, iteration: 128458
loss: 1.0629609823226929,grad_norm: 0.9999992723970818, iteration: 128459
loss: 1.0084152221679688,grad_norm: 0.9999999787739917, iteration: 128460
loss: 1.0460869073867798,grad_norm: 0.9999996754667101, iteration: 128461
loss: 1.2461296319961548,grad_norm: 0.9999999130844256, iteration: 128462
loss: 1.0026735067367554,grad_norm: 0.99999996077867, iteration: 128463
loss: 1.071077585220337,grad_norm: 0.9999996498071644, iteration: 128464
loss: 1.0821644067764282,grad_norm: 0.999999927373663, iteration: 128465
loss: 1.130319595336914,grad_norm: 0.9999993059160874, iteration: 128466
loss: 1.188184380531311,grad_norm: 1.0000000791288624, iteration: 128467
loss: 1.1459345817565918,grad_norm: 0.9999999135030435, iteration: 128468
loss: 1.1533139944076538,grad_norm: 0.9999998145485015, iteration: 128469
loss: 1.4720510244369507,grad_norm: 0.9999999277135871, iteration: 128470
loss: 1.1250252723693848,grad_norm: 0.9999992302563262, iteration: 128471
loss: 1.1686829328536987,grad_norm: 0.9999996468207288, iteration: 128472
loss: 1.0040123462677002,grad_norm: 0.9999991460045959, iteration: 128473
loss: 1.1399335861206055,grad_norm: 0.9999996706384163, iteration: 128474
loss: 1.0106559991836548,grad_norm: 0.7431710988789895, iteration: 128475
loss: 1.0362205505371094,grad_norm: 0.9999999608047395, iteration: 128476
loss: 1.0375949144363403,grad_norm: 0.9999991417832791, iteration: 128477
loss: 1.024347186088562,grad_norm: 0.9999994457033196, iteration: 128478
loss: 1.0362542867660522,grad_norm: 0.999999256796901, iteration: 128479
loss: 1.210880160331726,grad_norm: 0.9999998639403584, iteration: 128480
loss: 1.0542527437210083,grad_norm: 0.9277623652004391, iteration: 128481
loss: 1.0170828104019165,grad_norm: 0.9999990867931353, iteration: 128482
loss: 1.0796526670455933,grad_norm: 0.8855268737900032, iteration: 128483
loss: 1.0725950002670288,grad_norm: 0.9999995002600962, iteration: 128484
loss: 1.1226311922073364,grad_norm: 1.000000081204412, iteration: 128485
loss: 1.083559274673462,grad_norm: 0.9211140892969839, iteration: 128486
loss: 1.1261051893234253,grad_norm: 0.9999997655808897, iteration: 128487
loss: 1.0359500646591187,grad_norm: 0.9999992095592333, iteration: 128488
loss: 1.1637089252471924,grad_norm: 1.0000000103713527, iteration: 128489
loss: 1.1665507555007935,grad_norm: 0.9999994428569867, iteration: 128490
loss: 1.038771152496338,grad_norm: 0.9999990975593506, iteration: 128491
loss: 1.0069282054901123,grad_norm: 0.8572452729754423, iteration: 128492
loss: 1.0586305856704712,grad_norm: 0.9999997783868397, iteration: 128493
loss: 1.0659844875335693,grad_norm: 0.9999996074578877, iteration: 128494
loss: 1.0459144115447998,grad_norm: 0.8186002083237949, iteration: 128495
loss: 1.0218101739883423,grad_norm: 0.9999992949178571, iteration: 128496
loss: 1.0155476331710815,grad_norm: 0.9999995950412331, iteration: 128497
loss: 1.1669212579727173,grad_norm: 0.999999797171266, iteration: 128498
loss: 1.0472922325134277,grad_norm: 0.9999991119745688, iteration: 128499
loss: 1.0018211603164673,grad_norm: 0.9999992738103414, iteration: 128500
loss: 1.0537081956863403,grad_norm: 0.9999991903993944, iteration: 128501
loss: 1.0344356298446655,grad_norm: 0.9999990605645767, iteration: 128502
loss: 1.1004284620285034,grad_norm: 0.9999994922524367, iteration: 128503
loss: 1.138577938079834,grad_norm: 0.9999998968860923, iteration: 128504
loss: 1.0153237581253052,grad_norm: 0.9999990572612182, iteration: 128505
loss: 1.0829944610595703,grad_norm: 0.9999995115177706, iteration: 128506
loss: 0.9636013507843018,grad_norm: 0.9346374769953883, iteration: 128507
loss: 1.040955662727356,grad_norm: 0.999999842122329, iteration: 128508
loss: 1.0818132162094116,grad_norm: 0.9217068188460015, iteration: 128509
loss: 1.0636693239212036,grad_norm: 1.0000000831458378, iteration: 128510
loss: 1.039573311805725,grad_norm: 0.9860464596395099, iteration: 128511
loss: 1.1503334045410156,grad_norm: 0.9999997907161879, iteration: 128512
loss: 1.0712673664093018,grad_norm: 0.8740467882727181, iteration: 128513
loss: 0.9991186857223511,grad_norm: 0.9999995363067427, iteration: 128514
loss: 1.0295110940933228,grad_norm: 0.9999999815546158, iteration: 128515
loss: 1.0160728693008423,grad_norm: 0.999999080863159, iteration: 128516
loss: 1.0487844944000244,grad_norm: 0.9999996106232407, iteration: 128517
loss: 1.0646381378173828,grad_norm: 0.9999998631958394, iteration: 128518
loss: 1.2443188428878784,grad_norm: 0.9999996146094772, iteration: 128519
loss: 1.0377615690231323,grad_norm: 0.9096018226659542, iteration: 128520
loss: 1.0539588928222656,grad_norm: 0.9999998811137393, iteration: 128521
loss: 0.9935555458068848,grad_norm: 0.9999995043748455, iteration: 128522
loss: 1.0365387201309204,grad_norm: 0.9999990570945623, iteration: 128523
loss: 0.9903433918952942,grad_norm: 0.9137085338254797, iteration: 128524
loss: 1.0050276517868042,grad_norm: 0.9297326519557744, iteration: 128525
loss: 1.0095150470733643,grad_norm: 0.999999101768598, iteration: 128526
loss: 0.9757143259048462,grad_norm: 0.9999990861706821, iteration: 128527
loss: 1.0328857898712158,grad_norm: 0.9999994884908562, iteration: 128528
loss: 0.9757854342460632,grad_norm: 0.9109169973512216, iteration: 128529
loss: 1.033385992050171,grad_norm: 0.9310712519639022, iteration: 128530
loss: 1.0325102806091309,grad_norm: 0.9999992419609031, iteration: 128531
loss: 1.1008094549179077,grad_norm: 0.9999998783818052, iteration: 128532
loss: 1.1615551710128784,grad_norm: 0.9999995741639864, iteration: 128533
loss: 1.04323148727417,grad_norm: 0.9999993556054878, iteration: 128534
loss: 1.0704487562179565,grad_norm: 0.9999992110400794, iteration: 128535
loss: 1.041137456893921,grad_norm: 0.9999997251197489, iteration: 128536
loss: 1.1720701456069946,grad_norm: 0.9999998601778114, iteration: 128537
loss: 1.0689727067947388,grad_norm: 0.9999991035430296, iteration: 128538
loss: 1.0400784015655518,grad_norm: 0.9999992410397724, iteration: 128539
loss: 1.0826841592788696,grad_norm: 0.9999989802451585, iteration: 128540
loss: 0.9871969819068909,grad_norm: 0.8674730151285508, iteration: 128541
loss: 1.0581313371658325,grad_norm: 0.9999998698983388, iteration: 128542
loss: 1.0159637928009033,grad_norm: 0.7897605603136915, iteration: 128543
loss: 1.072819709777832,grad_norm: 0.9999993193437239, iteration: 128544
loss: 1.0899018049240112,grad_norm: 0.9999996528845702, iteration: 128545
loss: 1.090212345123291,grad_norm: 0.9999992065503471, iteration: 128546
loss: 1.1769827604293823,grad_norm: 0.9999992946101256, iteration: 128547
loss: 1.0977628231048584,grad_norm: 0.9999992980348936, iteration: 128548
loss: 1.1593987941741943,grad_norm: 0.9999995694057808, iteration: 128549
loss: 1.0155982971191406,grad_norm: 0.9999989900792128, iteration: 128550
loss: 1.0683858394622803,grad_norm: 0.9999990547749982, iteration: 128551
loss: 1.0844528675079346,grad_norm: 0.9999996253908668, iteration: 128552
loss: 1.023858904838562,grad_norm: 0.8806858583252277, iteration: 128553
loss: 0.9951545000076294,grad_norm: 0.8706230487971185, iteration: 128554
loss: 1.1138254404067993,grad_norm: 0.9999994936107544, iteration: 128555
loss: 1.0142635107040405,grad_norm: 0.9999994124984535, iteration: 128556
loss: 1.0195924043655396,grad_norm: 0.9999994879352022, iteration: 128557
loss: 1.0350837707519531,grad_norm: 0.9999998403094349, iteration: 128558
loss: 1.122225284576416,grad_norm: 0.9999995867414996, iteration: 128559
loss: 1.066149353981018,grad_norm: 0.9999997976783236, iteration: 128560
loss: 1.0694992542266846,grad_norm: 0.9999993280653724, iteration: 128561
loss: 1.0543925762176514,grad_norm: 0.9999998993887648, iteration: 128562
loss: 1.0032095909118652,grad_norm: 0.9999993277341346, iteration: 128563
loss: 1.0254220962524414,grad_norm: 0.999999757279114, iteration: 128564
loss: 1.0245052576065063,grad_norm: 0.877223547023404, iteration: 128565
loss: 1.0430189371109009,grad_norm: 0.8415167373273441, iteration: 128566
loss: 1.0144973993301392,grad_norm: 0.9999993046623491, iteration: 128567
loss: 0.9707744717597961,grad_norm: 0.9999999184922668, iteration: 128568
loss: 1.0629609823226929,grad_norm: 0.9999992164841691, iteration: 128569
loss: 0.9950335621833801,grad_norm: 0.9999998504319157, iteration: 128570
loss: 1.1345674991607666,grad_norm: 0.9999997400675297, iteration: 128571
loss: 0.9994579553604126,grad_norm: 0.9640331158648703, iteration: 128572
loss: 1.085310935974121,grad_norm: 0.9999997807124139, iteration: 128573
loss: 0.9474561214447021,grad_norm: 0.8575048444507595, iteration: 128574
loss: 1.0164562463760376,grad_norm: 0.999999040377714, iteration: 128575
loss: 1.1048604249954224,grad_norm: 0.9999998354889083, iteration: 128576
loss: 1.0619717836380005,grad_norm: 0.9999995953034438, iteration: 128577
loss: 1.092146873474121,grad_norm: 0.9999994569416234, iteration: 128578
loss: 1.0301340818405151,grad_norm: 0.874307684810733, iteration: 128579
loss: 1.0642778873443604,grad_norm: 0.9999994302855499, iteration: 128580
loss: 0.9815581440925598,grad_norm: 0.9428576585773507, iteration: 128581
loss: 0.9973604083061218,grad_norm: 0.9999991222216771, iteration: 128582
loss: 1.0171693563461304,grad_norm: 0.8373315975865817, iteration: 128583
loss: 1.1142035722732544,grad_norm: 0.9999991010567912, iteration: 128584
loss: 1.034457802772522,grad_norm: 0.9309169890570337, iteration: 128585
loss: 1.113282561302185,grad_norm: 0.9999994738595367, iteration: 128586
loss: 1.0300934314727783,grad_norm: 0.9999990807681213, iteration: 128587
loss: 1.0297658443450928,grad_norm: 0.9999992080263717, iteration: 128588
loss: 1.0611262321472168,grad_norm: 0.9999994447151318, iteration: 128589
loss: 1.1018487215042114,grad_norm: 0.9999994228400608, iteration: 128590
loss: 1.0781582593917847,grad_norm: 0.9999995376512756, iteration: 128591
loss: 1.0866243839263916,grad_norm: 0.9999992320516514, iteration: 128592
loss: 1.026423454284668,grad_norm: 0.9999993003189221, iteration: 128593
loss: 1.1229119300842285,grad_norm: 0.9999990801965243, iteration: 128594
loss: 1.055928111076355,grad_norm: 0.9999992798599197, iteration: 128595
loss: 1.1196430921554565,grad_norm: 0.9999993261414041, iteration: 128596
loss: 1.0583112239837646,grad_norm: 0.9999995218596404, iteration: 128597
loss: 1.0130785703659058,grad_norm: 0.9999989506412322, iteration: 128598
loss: 0.9887010455131531,grad_norm: 0.9124238058557463, iteration: 128599
loss: 1.1115038394927979,grad_norm: 0.9999998629874106, iteration: 128600
loss: 1.032907485961914,grad_norm: 0.9643687485197147, iteration: 128601
loss: 1.0194628238677979,grad_norm: 0.9999998217423259, iteration: 128602
loss: 1.1425496339797974,grad_norm: 0.9999998091807533, iteration: 128603
loss: 1.0465532541275024,grad_norm: 0.9016282974081723, iteration: 128604
loss: 1.0938328504562378,grad_norm: 0.9999996218944275, iteration: 128605
loss: 1.0490880012512207,grad_norm: 0.9999997389818084, iteration: 128606
loss: 1.009285807609558,grad_norm: 0.9561680843248033, iteration: 128607
loss: 1.065569519996643,grad_norm: 0.999999684498138, iteration: 128608
loss: 0.9823951125144958,grad_norm: 0.9999991451640161, iteration: 128609
loss: 1.0866167545318604,grad_norm: 0.8954486058414617, iteration: 128610
loss: 1.0598978996276855,grad_norm: 0.9999997010283589, iteration: 128611
loss: 1.011407732963562,grad_norm: 0.8032613220503729, iteration: 128612
loss: 1.068851351737976,grad_norm: 0.9999997618342178, iteration: 128613
loss: 1.093712568283081,grad_norm: 0.9999993257180408, iteration: 128614
loss: 1.112797498703003,grad_norm: 0.9999995082486062, iteration: 128615
loss: 1.022350788116455,grad_norm: 0.9053125613290324, iteration: 128616
loss: 1.0096075534820557,grad_norm: 0.9999994872725252, iteration: 128617
loss: 1.0703186988830566,grad_norm: 0.9999991579396579, iteration: 128618
loss: 0.9745193719863892,grad_norm: 0.9999994897121047, iteration: 128619
loss: 1.0346013307571411,grad_norm: 0.9999998819127455, iteration: 128620
loss: 1.0775046348571777,grad_norm: 0.9999993986129908, iteration: 128621
loss: 1.0195015668869019,grad_norm: 0.999999915744939, iteration: 128622
loss: 0.9925616383552551,grad_norm: 0.9999993336210434, iteration: 128623
loss: 1.0388944149017334,grad_norm: 0.9999993586438054, iteration: 128624
loss: 0.9980289936065674,grad_norm: 0.9999990816344075, iteration: 128625
loss: 1.0378715991973877,grad_norm: 0.9999997486430198, iteration: 128626
loss: 1.0051461458206177,grad_norm: 0.7878418810331241, iteration: 128627
loss: 1.1231482028961182,grad_norm: 0.9999993866683385, iteration: 128628
loss: 1.016493320465088,grad_norm: 0.7829447152752712, iteration: 128629
loss: 1.0550734996795654,grad_norm: 0.9999990369079614, iteration: 128630
loss: 1.03023099899292,grad_norm: 0.9999990871514175, iteration: 128631
loss: 1.0350359678268433,grad_norm: 0.9999994161809358, iteration: 128632
loss: 0.9705730676651001,grad_norm: 0.8483906012879859, iteration: 128633
loss: 1.0503313541412354,grad_norm: 0.9999993282786701, iteration: 128634
loss: 1.081156611442566,grad_norm: 0.9999995291724305, iteration: 128635
loss: 1.0234348773956299,grad_norm: 0.9999990048839923, iteration: 128636
loss: 1.0523629188537598,grad_norm: 0.9999994091379268, iteration: 128637
loss: 1.0024605989456177,grad_norm: 0.8243229511053607, iteration: 128638
loss: 1.07700514793396,grad_norm: 0.9141711706126753, iteration: 128639
loss: 1.07722008228302,grad_norm: 0.9999990605386467, iteration: 128640
loss: 0.9929518699645996,grad_norm: 0.9999994550007044, iteration: 128641
loss: 1.0391600131988525,grad_norm: 0.999999179273945, iteration: 128642
loss: 1.0241981744766235,grad_norm: 0.8779589189690791, iteration: 128643
loss: 1.017738699913025,grad_norm: 0.9009967577074021, iteration: 128644
loss: 1.0854053497314453,grad_norm: 0.999999269991706, iteration: 128645
loss: 1.0571988821029663,grad_norm: 0.9762443686236739, iteration: 128646
loss: 1.0161192417144775,grad_norm: 0.9999998197544201, iteration: 128647
loss: 1.0489373207092285,grad_norm: 0.9660893013900586, iteration: 128648
loss: 0.9815276861190796,grad_norm: 0.9999992426979689, iteration: 128649
loss: 1.0267127752304077,grad_norm: 0.9395496020529711, iteration: 128650
loss: 1.0375884771347046,grad_norm: 0.9244988864413147, iteration: 128651
loss: 1.027539849281311,grad_norm: 0.9999991818610547, iteration: 128652
loss: 1.0179942846298218,grad_norm: 0.9999999428322248, iteration: 128653
loss: 0.9868919253349304,grad_norm: 0.9177857434916314, iteration: 128654
loss: 1.0031330585479736,grad_norm: 0.999998960373524, iteration: 128655
loss: 1.0166326761245728,grad_norm: 0.9999990502222589, iteration: 128656
loss: 1.0193365812301636,grad_norm: 0.9999993900158239, iteration: 128657
loss: 1.0136723518371582,grad_norm: 0.8804698016666458, iteration: 128658
loss: 1.0408519506454468,grad_norm: 0.9999992660432402, iteration: 128659
loss: 1.0466924905776978,grad_norm: 0.9999993790164996, iteration: 128660
loss: 1.0700026750564575,grad_norm: 0.9999990573376799, iteration: 128661
loss: 1.0902742147445679,grad_norm: 0.9999990973701335, iteration: 128662
loss: 1.0537819862365723,grad_norm: 0.9999991063213999, iteration: 128663
loss: 1.0338279008865356,grad_norm: 1.0000000023721645, iteration: 128664
loss: 0.9943581819534302,grad_norm: 0.9650520367842426, iteration: 128665
loss: 1.1301612854003906,grad_norm: 0.999999568030395, iteration: 128666
loss: 1.0636440515518188,grad_norm: 0.9999998306281545, iteration: 128667
loss: 0.9941741824150085,grad_norm: 1.0000000269824114, iteration: 128668
loss: 1.0197128057479858,grad_norm: 0.9999990145519916, iteration: 128669
loss: 1.1041983366012573,grad_norm: 0.999999380800665, iteration: 128670
loss: 1.0053303241729736,grad_norm: 0.9999993238833316, iteration: 128671
loss: 1.0502309799194336,grad_norm: 0.9999993232547848, iteration: 128672
loss: 1.1056379079818726,grad_norm: 0.9999993794196028, iteration: 128673
loss: 1.107730746269226,grad_norm: 0.9999991446174058, iteration: 128674
loss: 0.9973999261856079,grad_norm: 0.8288545451816393, iteration: 128675
loss: 1.115294098854065,grad_norm: 0.9999994772660173, iteration: 128676
loss: 1.0727938413619995,grad_norm: 0.9999996782872723, iteration: 128677
loss: 1.1617974042892456,grad_norm: 0.9999994820696898, iteration: 128678
loss: 1.0409809350967407,grad_norm: 0.9999992459602233, iteration: 128679
loss: 1.0401321649551392,grad_norm: 0.9999992308679104, iteration: 128680
loss: 1.0720791816711426,grad_norm: 0.999999617308043, iteration: 128681
loss: 1.0305395126342773,grad_norm: 0.9999994748673521, iteration: 128682
loss: 1.0077050924301147,grad_norm: 0.7532388213800484, iteration: 128683
loss: 1.0650842189788818,grad_norm: 0.9481384131947197, iteration: 128684
loss: 1.1254801750183105,grad_norm: 0.9999998349243703, iteration: 128685
loss: 0.9938867688179016,grad_norm: 0.9999991004054839, iteration: 128686
loss: 1.1096173524856567,grad_norm: 0.9999997119236311, iteration: 128687
loss: 1.0470218658447266,grad_norm: 0.9999992147230402, iteration: 128688
loss: 1.0382450819015503,grad_norm: 0.9504160465389675, iteration: 128689
loss: 1.0085827112197876,grad_norm: 0.9999995073733348, iteration: 128690
loss: 0.993298351764679,grad_norm: 0.9320545534887917, iteration: 128691
loss: 1.0467404127120972,grad_norm: 1.000000041716625, iteration: 128692
loss: 0.9684295058250427,grad_norm: 0.894667485117468, iteration: 128693
loss: 1.0439542531967163,grad_norm: 0.9999993924452012, iteration: 128694
loss: 1.017859697341919,grad_norm: 0.9999990743587981, iteration: 128695
loss: 1.0539437532424927,grad_norm: 0.9999990728223338, iteration: 128696
loss: 1.0709753036499023,grad_norm: 0.8470454180486612, iteration: 128697
loss: 0.9814043641090393,grad_norm: 0.9999997758641006, iteration: 128698
loss: 1.0058270692825317,grad_norm: 0.9999992339343229, iteration: 128699
loss: 1.0469931364059448,grad_norm: 0.9999994860509965, iteration: 128700
loss: 1.0383472442626953,grad_norm: 0.9144927544566533, iteration: 128701
loss: 1.0797845125198364,grad_norm: 0.9999993520139286, iteration: 128702
loss: 0.9922779202461243,grad_norm: 0.9467931766025417, iteration: 128703
loss: 1.044613003730774,grad_norm: 0.999999158359763, iteration: 128704
loss: 1.126094102859497,grad_norm: 0.9999997728672457, iteration: 128705
loss: 1.0086480379104614,grad_norm: 0.9999990271187108, iteration: 128706
loss: 1.0172573328018188,grad_norm: 0.8039995703901177, iteration: 128707
loss: 1.0598148107528687,grad_norm: 0.9999998111558691, iteration: 128708
loss: 1.0490318536758423,grad_norm: 0.999999437792319, iteration: 128709
loss: 1.04930579662323,grad_norm: 0.9999995182362463, iteration: 128710
loss: 1.041853904724121,grad_norm: 0.9999993180047593, iteration: 128711
loss: 0.976012647151947,grad_norm: 0.9999991323253962, iteration: 128712
loss: 0.9822964668273926,grad_norm: 0.9999991845946218, iteration: 128713
loss: 0.9835826754570007,grad_norm: 0.9143844157096341, iteration: 128714
loss: 0.974249005317688,grad_norm: 0.806554759916878, iteration: 128715
loss: 1.0106627941131592,grad_norm: 0.99999930558877, iteration: 128716
loss: 1.081413745880127,grad_norm: 1.0000000167893397, iteration: 128717
loss: 1.0573365688323975,grad_norm: 0.9999999125465276, iteration: 128718
loss: 0.9913695454597473,grad_norm: 0.7772549068476474, iteration: 128719
loss: 0.9886941313743591,grad_norm: 0.9271013147542444, iteration: 128720
loss: 0.9808282256126404,grad_norm: 0.9999990971800523, iteration: 128721
loss: 1.0235347747802734,grad_norm: 0.9999995252881339, iteration: 128722
loss: 1.0280510187149048,grad_norm: 0.8712493251884575, iteration: 128723
loss: 1.079093098640442,grad_norm: 0.9999993293577613, iteration: 128724
loss: 0.9678213596343994,grad_norm: 0.8372170980433721, iteration: 128725
loss: 1.0108696222305298,grad_norm: 0.8203023593913759, iteration: 128726
loss: 0.99857497215271,grad_norm: 0.7674481615356986, iteration: 128727
loss: 1.007837176322937,grad_norm: 0.9999996954676296, iteration: 128728
loss: 0.9643521904945374,grad_norm: 0.8173997015215958, iteration: 128729
loss: 1.0122742652893066,grad_norm: 0.999999067737349, iteration: 128730
loss: 1.0024943351745605,grad_norm: 0.9999991196892506, iteration: 128731
loss: 1.0158008337020874,grad_norm: 0.9666372919559504, iteration: 128732
loss: 1.0869179964065552,grad_norm: 0.9999992179144875, iteration: 128733
loss: 0.9700133800506592,grad_norm: 0.9999994255663426, iteration: 128734
loss: 1.0564513206481934,grad_norm: 0.9999993893913237, iteration: 128735
loss: 1.0434720516204834,grad_norm: 0.9999990155108995, iteration: 128736
loss: 1.043364405632019,grad_norm: 0.8086943608758677, iteration: 128737
loss: 1.0039368867874146,grad_norm: 0.7692624238482446, iteration: 128738
loss: 1.0252114534378052,grad_norm: 0.999998933435003, iteration: 128739
loss: 0.9679203033447266,grad_norm: 0.999999858537097, iteration: 128740
loss: 0.992628812789917,grad_norm: 0.9178428125997394, iteration: 128741
loss: 1.001082420349121,grad_norm: 0.9741211011353751, iteration: 128742
loss: 1.02328622341156,grad_norm: 0.9999998343990573, iteration: 128743
loss: 1.0760780572891235,grad_norm: 0.9999996707768267, iteration: 128744
loss: 1.0251893997192383,grad_norm: 0.9999991058684278, iteration: 128745
loss: 1.0257638692855835,grad_norm: 0.9999992014519569, iteration: 128746
loss: 0.9953760504722595,grad_norm: 0.9105589142576392, iteration: 128747
loss: 1.0005078315734863,grad_norm: 0.9999993633670092, iteration: 128748
loss: 1.0053588151931763,grad_norm: 0.8720587541937643, iteration: 128749
loss: 0.9723157286643982,grad_norm: 0.9580194456119903, iteration: 128750
loss: 1.0040478706359863,grad_norm: 0.9999996029936219, iteration: 128751
loss: 0.9746119379997253,grad_norm: 0.9999992259096548, iteration: 128752
loss: 1.0305001735687256,grad_norm: 0.9999998610701812, iteration: 128753
loss: 1.0900105237960815,grad_norm: 0.9999997782245935, iteration: 128754
loss: 1.093666672706604,grad_norm: 0.9999998969911716, iteration: 128755
loss: 1.0190287828445435,grad_norm: 0.999999409944843, iteration: 128756
loss: 0.9891660213470459,grad_norm: 0.8660727819971259, iteration: 128757
loss: 1.0994502305984497,grad_norm: 0.9999993307435886, iteration: 128758
loss: 1.0976879596710205,grad_norm: 0.9999998421975054, iteration: 128759
loss: 1.0059019327163696,grad_norm: 0.9999996936513668, iteration: 128760
loss: 0.9761740565299988,grad_norm: 0.9999997398094093, iteration: 128761
loss: 1.0516291856765747,grad_norm: 0.9999996209379655, iteration: 128762
loss: 1.0546748638153076,grad_norm: 0.9999991312825668, iteration: 128763
loss: 1.0257781744003296,grad_norm: 0.9999995698516126, iteration: 128764
loss: 1.030287265777588,grad_norm: 0.9219769969112137, iteration: 128765
loss: 1.1035536527633667,grad_norm: 0.9999993502821043, iteration: 128766
loss: 1.0782371759414673,grad_norm: 0.9999997005539543, iteration: 128767
loss: 1.0428868532180786,grad_norm: 0.9999992846430866, iteration: 128768
loss: 1.019673228263855,grad_norm: 0.744828584027936, iteration: 128769
loss: 0.9757983684539795,grad_norm: 0.9999996821385998, iteration: 128770
loss: 0.9887987971305847,grad_norm: 0.8908645415853715, iteration: 128771
loss: 1.0881798267364502,grad_norm: 0.9999992002303213, iteration: 128772
loss: 1.030141830444336,grad_norm: 0.9999993152925687, iteration: 128773
loss: 1.0214580297470093,grad_norm: 0.8678150457727148, iteration: 128774
loss: 1.0260214805603027,grad_norm: 0.9999998270867283, iteration: 128775
loss: 1.0977128744125366,grad_norm: 0.9999994295556296, iteration: 128776
loss: 1.0665212869644165,grad_norm: 0.9999997047759628, iteration: 128777
loss: 1.01853609085083,grad_norm: 0.9999996515370082, iteration: 128778
loss: 0.9871113896369934,grad_norm: 0.9999997949967645, iteration: 128779
loss: 1.0321203470230103,grad_norm: 0.9999997297466946, iteration: 128780
loss: 1.094618320465088,grad_norm: 0.800683884547224, iteration: 128781
loss: 0.9944780468940735,grad_norm: 0.999998959439177, iteration: 128782
loss: 1.0121474266052246,grad_norm: 0.9337803392973387, iteration: 128783
loss: 1.0329399108886719,grad_norm: 0.9999998045851521, iteration: 128784
loss: 1.0403062105178833,grad_norm: 0.9747148479938104, iteration: 128785
loss: 1.14431893825531,grad_norm: 0.9999998452406195, iteration: 128786
loss: 0.9813868403434753,grad_norm: 0.9999990847118938, iteration: 128787
loss: 1.0178025960922241,grad_norm: 0.999587285033642, iteration: 128788
loss: 0.9841597080230713,grad_norm: 0.9999991722380651, iteration: 128789
loss: 1.1131672859191895,grad_norm: 0.9999998942393242, iteration: 128790
loss: 1.0484462976455688,grad_norm: 0.9999997257206181, iteration: 128791
loss: 0.9881937503814697,grad_norm: 0.8422603204190433, iteration: 128792
loss: 0.9868586659431458,grad_norm: 0.9999995316716742, iteration: 128793
loss: 1.0253766775131226,grad_norm: 0.9999994188415147, iteration: 128794
loss: 1.0830124616622925,grad_norm: 0.9999992884648655, iteration: 128795
loss: 1.2081115245819092,grad_norm: 0.9999993833381886, iteration: 128796
loss: 0.9847157001495361,grad_norm: 0.9841360263525334, iteration: 128797
loss: 1.0260344743728638,grad_norm: 0.9999995242332572, iteration: 128798
loss: 1.0805270671844482,grad_norm: 1.0000000073247413, iteration: 128799
loss: 0.9895809888839722,grad_norm: 0.999117724079022, iteration: 128800
loss: 1.051509976387024,grad_norm: 0.9999992821191618, iteration: 128801
loss: 1.0729060173034668,grad_norm: 0.9999999501254321, iteration: 128802
loss: 1.001187801361084,grad_norm: 0.7717929167333316, iteration: 128803
loss: 1.1162030696868896,grad_norm: 0.9999994744482553, iteration: 128804
loss: 0.9857373237609863,grad_norm: 0.9999992753817227, iteration: 128805
loss: 1.0625847578048706,grad_norm: 0.9999994025777911, iteration: 128806
loss: 1.1019788980484009,grad_norm: 0.8288074733338185, iteration: 128807
loss: 1.079716682434082,grad_norm: 0.9999994341291634, iteration: 128808
loss: 1.0362826585769653,grad_norm: 0.9999996434780382, iteration: 128809
loss: 1.0344792604446411,grad_norm: 0.9999996819335818, iteration: 128810
loss: 0.973727285861969,grad_norm: 0.9079249252461731, iteration: 128811
loss: 0.9910041093826294,grad_norm: 0.9999999716420217, iteration: 128812
loss: 0.9754760265350342,grad_norm: 0.9653730807913077, iteration: 128813
loss: 1.2159278392791748,grad_norm: 0.9999998855984311, iteration: 128814
loss: 1.0033788681030273,grad_norm: 0.8605182242762702, iteration: 128815
loss: 1.1517401933670044,grad_norm: 0.9999998270981529, iteration: 128816
loss: 1.0257960557937622,grad_norm: 0.9999998969044662, iteration: 128817
loss: 0.9958086609840393,grad_norm: 0.9999991578786419, iteration: 128818
loss: 1.0829441547393799,grad_norm: 0.9999993063913514, iteration: 128819
loss: 1.049175500869751,grad_norm: 0.9999990440647554, iteration: 128820
loss: 1.0010730028152466,grad_norm: 0.9999993650072735, iteration: 128821
loss: 1.0738505125045776,grad_norm: 0.9999999026933495, iteration: 128822
loss: 1.0110678672790527,grad_norm: 0.9674321217415105, iteration: 128823
loss: 1.043696403503418,grad_norm: 0.9097461427806566, iteration: 128824
loss: 0.9856183528900146,grad_norm: 0.8708795111717497, iteration: 128825
loss: 1.040196418762207,grad_norm: 0.9562584398187315, iteration: 128826
loss: 0.9739986062049866,grad_norm: 0.8954073294567708, iteration: 128827
loss: 1.1250602006912231,grad_norm: 0.9999991749234719, iteration: 128828
loss: 1.0160408020019531,grad_norm: 0.8655924390553936, iteration: 128829
loss: 1.0055004358291626,grad_norm: 0.999999354915111, iteration: 128830
loss: 1.0248833894729614,grad_norm: 0.9999991753292882, iteration: 128831
loss: 1.0277847051620483,grad_norm: 0.9999996001195676, iteration: 128832
loss: 1.1654924154281616,grad_norm: 0.9999991899856823, iteration: 128833
loss: 1.026002049446106,grad_norm: 0.9410214294041499, iteration: 128834
loss: 1.0785256624221802,grad_norm: 0.9872094544638347, iteration: 128835
loss: 1.0596606731414795,grad_norm: 0.9999996447051617, iteration: 128836
loss: 1.023873209953308,grad_norm: 0.9999994540633043, iteration: 128837
loss: 1.0834730863571167,grad_norm: 0.9999991138746214, iteration: 128838
loss: 0.9677324295043945,grad_norm: 0.9999990795555024, iteration: 128839
loss: 1.0185483694076538,grad_norm: 0.9999994595643756, iteration: 128840
loss: 1.0121277570724487,grad_norm: 0.9999989922636723, iteration: 128841
loss: 1.0078285932540894,grad_norm: 0.9999995300967653, iteration: 128842
loss: 1.0295690298080444,grad_norm: 0.9999994062528217, iteration: 128843
loss: 1.0311611890792847,grad_norm: 0.9999991652790077, iteration: 128844
loss: 1.0735622644424438,grad_norm: 0.8359215535824989, iteration: 128845
loss: 1.100199580192566,grad_norm: 0.999999903934509, iteration: 128846
loss: 1.0158119201660156,grad_norm: 0.9560269629022236, iteration: 128847
loss: 1.0117855072021484,grad_norm: 0.9999989692099718, iteration: 128848
loss: 1.0281716585159302,grad_norm: 0.9999990742674439, iteration: 128849
loss: 1.0443141460418701,grad_norm: 0.9999991192344323, iteration: 128850
loss: 1.0425615310668945,grad_norm: 0.9999999409565136, iteration: 128851
loss: 0.9579240679740906,grad_norm: 0.780435760052317, iteration: 128852
loss: 1.0027804374694824,grad_norm: 0.9999990475673534, iteration: 128853
loss: 1.023820400238037,grad_norm: 0.8210780089014785, iteration: 128854
loss: 1.0109467506408691,grad_norm: 0.8815745514722609, iteration: 128855
loss: 1.1742466688156128,grad_norm: 0.9999998666700277, iteration: 128856
loss: 1.041016936302185,grad_norm: 0.9999993039401373, iteration: 128857
loss: 1.0244050025939941,grad_norm: 0.869460839966204, iteration: 128858
loss: 0.9807559847831726,grad_norm: 0.7524193881489543, iteration: 128859
loss: 1.0648118257522583,grad_norm: 0.9999992899808259, iteration: 128860
loss: 1.075596570968628,grad_norm: 0.9999994421650564, iteration: 128861
loss: 1.0030003786087036,grad_norm: 0.8619226117263753, iteration: 128862
loss: 0.9837134480476379,grad_norm: 0.9219367100520053, iteration: 128863
loss: 1.0370582342147827,grad_norm: 0.9401786202051686, iteration: 128864
loss: 1.0022680759429932,grad_norm: 0.9999998278174429, iteration: 128865
loss: 1.0849058628082275,grad_norm: 0.9340070544638014, iteration: 128866
loss: 0.9680992960929871,grad_norm: 0.9999995298065487, iteration: 128867
loss: 1.0594203472137451,grad_norm: 0.9999996682298746, iteration: 128868
loss: 0.984578013420105,grad_norm: 0.8956726843868993, iteration: 128869
loss: 0.973361074924469,grad_norm: 0.9638316762963731, iteration: 128870
loss: 1.0190991163253784,grad_norm: 0.9999994838323831, iteration: 128871
loss: 1.0123695135116577,grad_norm: 0.8407967622093541, iteration: 128872
loss: 1.101226568222046,grad_norm: 0.9999991714144434, iteration: 128873
loss: 1.0697948932647705,grad_norm: 0.9999995920016724, iteration: 128874
loss: 1.061927318572998,grad_norm: 0.9999999399078308, iteration: 128875
loss: 1.0166783332824707,grad_norm: 0.9999991797066322, iteration: 128876
loss: 1.002047061920166,grad_norm: 0.8218360752886956, iteration: 128877
loss: 1.0477205514907837,grad_norm: 0.9999995867612734, iteration: 128878
loss: 1.0388685464859009,grad_norm: 0.8977314559173227, iteration: 128879
loss: 1.1154040098190308,grad_norm: 0.9999994605101691, iteration: 128880
loss: 1.036190390586853,grad_norm: 0.9999992902967314, iteration: 128881
loss: 1.115042805671692,grad_norm: 0.9999995253469175, iteration: 128882
loss: 1.0142899751663208,grad_norm: 0.7976637506952412, iteration: 128883
loss: 0.9955333471298218,grad_norm: 0.999999241011122, iteration: 128884
loss: 1.0090641975402832,grad_norm: 0.9999993602958791, iteration: 128885
loss: 1.0630766153335571,grad_norm: 0.9999993350916241, iteration: 128886
loss: 0.9686830639839172,grad_norm: 0.8275397766713426, iteration: 128887
loss: 1.0156677961349487,grad_norm: 0.9999990886241454, iteration: 128888
loss: 1.014413595199585,grad_norm: 0.999999120031942, iteration: 128889
loss: 1.1184182167053223,grad_norm: 0.9999996471394828, iteration: 128890
loss: 1.0197757482528687,grad_norm: 0.7443938572635103, iteration: 128891
loss: 1.012714147567749,grad_norm: 0.9999992521249884, iteration: 128892
loss: 1.0243769884109497,grad_norm: 0.9999992815353212, iteration: 128893
loss: 1.0581996440887451,grad_norm: 0.9999998126183063, iteration: 128894
loss: 1.0237016677856445,grad_norm: 0.8893568560225177, iteration: 128895
loss: 1.075411081314087,grad_norm: 0.9999999466425616, iteration: 128896
loss: 1.00129234790802,grad_norm: 0.7691073764490899, iteration: 128897
loss: 1.0393805503845215,grad_norm: 0.9999992418043933, iteration: 128898
loss: 0.9853846430778503,grad_norm: 0.9488462042199983, iteration: 128899
loss: 1.0321680307388306,grad_norm: 0.9708444887112796, iteration: 128900
loss: 1.0102183818817139,grad_norm: 0.9089264173587714, iteration: 128901
loss: 1.047709584236145,grad_norm: 0.9999991583793185, iteration: 128902
loss: 0.9988782405853271,grad_norm: 0.9999992905621651, iteration: 128903
loss: 0.9454596042633057,grad_norm: 0.9312412272833254, iteration: 128904
loss: 0.9905023574829102,grad_norm: 0.9999991402383424, iteration: 128905
loss: 0.9465040564537048,grad_norm: 0.8888652712638628, iteration: 128906
loss: 1.0029773712158203,grad_norm: 0.9999994552779126, iteration: 128907
loss: 1.0062686204910278,grad_norm: 0.857518168069378, iteration: 128908
loss: 1.024213194847107,grad_norm: 0.9575858128995891, iteration: 128909
loss: 1.0074912309646606,grad_norm: 0.9137716326860001, iteration: 128910
loss: 1.3003469705581665,grad_norm: 0.999999748939802, iteration: 128911
loss: 1.0064191818237305,grad_norm: 0.999999188047411, iteration: 128912
loss: 1.0275717973709106,grad_norm: 0.9999993286421959, iteration: 128913
loss: 1.0048547983169556,grad_norm: 0.9999991722036681, iteration: 128914
loss: 1.0491397380828857,grad_norm: 0.9999998337043726, iteration: 128915
loss: 1.0874031782150269,grad_norm: 0.999999040990037, iteration: 128916
loss: 1.0211546421051025,grad_norm: 0.9999991446547022, iteration: 128917
loss: 1.0590603351593018,grad_norm: 0.9999997272820517, iteration: 128918
loss: 1.1606180667877197,grad_norm: 0.9999996374088143, iteration: 128919
loss: 1.0590081214904785,grad_norm: 0.9999994016237063, iteration: 128920
loss: 0.98459792137146,grad_norm: 0.914113555113126, iteration: 128921
loss: 1.058618187904358,grad_norm: 0.9999989156208138, iteration: 128922
loss: 1.0228718519210815,grad_norm: 0.9569591860103611, iteration: 128923
loss: 1.0048763751983643,grad_norm: 0.8875832654684004, iteration: 128924
loss: 1.0473660230636597,grad_norm: 0.8513496985615207, iteration: 128925
loss: 1.1543753147125244,grad_norm: 0.9999993313728888, iteration: 128926
loss: 1.1327937841415405,grad_norm: 0.999999902375132, iteration: 128927
loss: 1.1630640029907227,grad_norm: 0.999999759506727, iteration: 128928
loss: 1.0162665843963623,grad_norm: 0.9236848858357174, iteration: 128929
loss: 1.009074091911316,grad_norm: 0.9999990400326809, iteration: 128930
loss: 0.969554603099823,grad_norm: 0.9999991651175131, iteration: 128931
loss: 1.0032397508621216,grad_norm: 0.9999991615679708, iteration: 128932
loss: 1.0115597248077393,grad_norm: 0.8829662270733045, iteration: 128933
loss: 1.047776222229004,grad_norm: 0.9999999116348366, iteration: 128934
loss: 0.9289714694023132,grad_norm: 0.9999995672980289, iteration: 128935
loss: 0.9923165440559387,grad_norm: 0.9999991223907402, iteration: 128936
loss: 1.1260744333267212,grad_norm: 0.9999997542804383, iteration: 128937
loss: 1.123138189315796,grad_norm: 0.9999998428094996, iteration: 128938
loss: 1.0187431573867798,grad_norm: 0.9999992022800382, iteration: 128939
loss: 1.0456840991973877,grad_norm: 0.8948938475394391, iteration: 128940
loss: 1.0091729164123535,grad_norm: 0.9999992645356303, iteration: 128941
loss: 1.0153194665908813,grad_norm: 0.9999998400027124, iteration: 128942
loss: 1.0024847984313965,grad_norm: 0.9999990478527375, iteration: 128943
loss: 1.0007678270339966,grad_norm: 0.9456698229802852, iteration: 128944
loss: 0.9620446562767029,grad_norm: 0.8969545348150513, iteration: 128945
loss: 1.03659188747406,grad_norm: 0.9999999232169131, iteration: 128946
loss: 1.0823179483413696,grad_norm: 0.8501970160207988, iteration: 128947
loss: 0.9678767919540405,grad_norm: 0.8524297290101368, iteration: 128948
loss: 0.9975765943527222,grad_norm: 0.9999989840365027, iteration: 128949
loss: 1.023005485534668,grad_norm: 0.999999016269528, iteration: 128950
loss: 1.0006749629974365,grad_norm: 0.9876475381706372, iteration: 128951
loss: 1.05745267868042,grad_norm: 0.9999999170599534, iteration: 128952
loss: 1.0723172426223755,grad_norm: 0.9999992147576404, iteration: 128953
loss: 1.031099796295166,grad_norm: 0.9999992429871353, iteration: 128954
loss: 1.012669563293457,grad_norm: 0.9999996986370686, iteration: 128955
loss: 0.9971004724502563,grad_norm: 0.890181870207897, iteration: 128956
loss: 1.0100551843643188,grad_norm: 0.9140099089411309, iteration: 128957
loss: 1.0185127258300781,grad_norm: 0.930512727726, iteration: 128958
loss: 0.9731478095054626,grad_norm: 0.9999991079664403, iteration: 128959
loss: 1.0092121362686157,grad_norm: 0.9999994518888049, iteration: 128960
loss: 0.9873966574668884,grad_norm: 0.9151362639025398, iteration: 128961
loss: 1.0293992757797241,grad_norm: 0.999999495906306, iteration: 128962
loss: 1.0102349519729614,grad_norm: 0.8840114613619395, iteration: 128963
loss: 0.9678335189819336,grad_norm: 0.9340908576104902, iteration: 128964
loss: 1.0071516036987305,grad_norm: 0.9089945365102967, iteration: 128965
loss: 1.085620403289795,grad_norm: 0.9999998320762294, iteration: 128966
loss: 1.0900282859802246,grad_norm: 0.9999991674044899, iteration: 128967
loss: 1.0301774740219116,grad_norm: 0.9999994772060866, iteration: 128968
loss: 1.1852272748947144,grad_norm: 0.9999994888873449, iteration: 128969
loss: 1.2944716215133667,grad_norm: 0.9999998177907226, iteration: 128970
loss: 1.0340778827667236,grad_norm: 0.9999994071622491, iteration: 128971
loss: 1.0134998559951782,grad_norm: 0.9915344304960312, iteration: 128972
loss: 1.0985071659088135,grad_norm: 0.999999904279453, iteration: 128973
loss: 0.9998974800109863,grad_norm: 0.8401879581020308, iteration: 128974
loss: 1.0062123537063599,grad_norm: 0.7711402185478472, iteration: 128975
loss: 1.0371155738830566,grad_norm: 0.9999992562834157, iteration: 128976
loss: 0.9798597693443298,grad_norm: 0.9936387592310557, iteration: 128977
loss: 1.0181012153625488,grad_norm: 0.9420408294530084, iteration: 128978
loss: 1.0500174760818481,grad_norm: 0.9999993368906116, iteration: 128979
loss: 0.9837431311607361,grad_norm: 0.7934280084591374, iteration: 128980
loss: 1.0141236782073975,grad_norm: 0.9999996283401104, iteration: 128981
loss: 1.0605924129486084,grad_norm: 0.9999997240161957, iteration: 128982
loss: 1.013746976852417,grad_norm: 0.9999990251550178, iteration: 128983
loss: 1.1094977855682373,grad_norm: 0.9999999741443949, iteration: 128984
loss: 1.0048717260360718,grad_norm: 0.8867787254220112, iteration: 128985
loss: 1.0721335411071777,grad_norm: 1.0000000228440102, iteration: 128986
loss: 1.0600417852401733,grad_norm: 0.9429235360968242, iteration: 128987
loss: 0.9880107045173645,grad_norm: 0.8736827581005273, iteration: 128988
loss: 0.9856482744216919,grad_norm: 0.999999029772691, iteration: 128989
loss: 1.0401555299758911,grad_norm: 0.9999994390404668, iteration: 128990
loss: 0.9902055859565735,grad_norm: 0.9999993943580908, iteration: 128991
loss: 0.982978105545044,grad_norm: 0.851583566705294, iteration: 128992
loss: 1.038495659828186,grad_norm: 0.9999992915129583, iteration: 128993
loss: 1.061185598373413,grad_norm: 0.9999998845451475, iteration: 128994
loss: 0.9990587830543518,grad_norm: 0.9999992618186614, iteration: 128995
loss: 1.0211126804351807,grad_norm: 0.6374975662454356, iteration: 128996
loss: 0.9883811473846436,grad_norm: 0.9165668274733436, iteration: 128997
loss: 0.989142894744873,grad_norm: 0.9999991808883353, iteration: 128998
loss: 1.0893586874008179,grad_norm: 0.9999996553758773, iteration: 128999
loss: 1.0412873029708862,grad_norm: 0.9999990753510077, iteration: 129000
loss: 1.0040363073349,grad_norm: 0.9999991087341067, iteration: 129001
loss: 1.075695276260376,grad_norm: 0.9999996332374987, iteration: 129002
loss: 1.007689356803894,grad_norm: 0.7997289383106381, iteration: 129003
loss: 0.9965589046478271,grad_norm: 0.9999998993217213, iteration: 129004
loss: 1.0017164945602417,grad_norm: 0.8890499301033705, iteration: 129005
loss: 1.0080549716949463,grad_norm: 0.8001334282631728, iteration: 129006
loss: 0.9839595556259155,grad_norm: 0.8569128357275265, iteration: 129007
loss: 1.0100271701812744,grad_norm: 0.9167496122140042, iteration: 129008
loss: 1.025098443031311,grad_norm: 0.9999991526596123, iteration: 129009
loss: 1.1084368228912354,grad_norm: 0.9999996366248093, iteration: 129010
loss: 0.9643312096595764,grad_norm: 0.9409010366030162, iteration: 129011
loss: 1.0325711965560913,grad_norm: 0.9999991711641854, iteration: 129012
loss: 0.9619303941726685,grad_norm: 0.9999990278170232, iteration: 129013
loss: 1.0762027502059937,grad_norm: 0.9999992635896349, iteration: 129014
loss: 1.0358692407608032,grad_norm: 0.9999991156196656, iteration: 129015
loss: 1.011364459991455,grad_norm: 0.9999994953359912, iteration: 129016
loss: 0.9802523255348206,grad_norm: 0.9999990134189398, iteration: 129017
loss: 1.018997073173523,grad_norm: 0.9999991466395427, iteration: 129018
loss: 0.9897637963294983,grad_norm: 0.9964618326972358, iteration: 129019
loss: 1.1455992460250854,grad_norm: 0.9999997785858957, iteration: 129020
loss: 1.0153454542160034,grad_norm: 0.9999993747440461, iteration: 129021
loss: 1.0669158697128296,grad_norm: 0.9999992532645597, iteration: 129022
loss: 1.0027366876602173,grad_norm: 0.9999990180158809, iteration: 129023
loss: 1.0088406801223755,grad_norm: 0.9999991994606541, iteration: 129024
loss: 1.0406091213226318,grad_norm: 0.9999989869856643, iteration: 129025
loss: 0.9775012731552124,grad_norm: 0.8741466394025221, iteration: 129026
loss: 1.0031012296676636,grad_norm: 0.9999992299940333, iteration: 129027
loss: 0.9963368773460388,grad_norm: 0.7905911965483758, iteration: 129028
loss: 0.9885410070419312,grad_norm: 0.9321878329038112, iteration: 129029
loss: 0.9934961795806885,grad_norm: 0.9999991712921376, iteration: 129030
loss: 0.9928417801856995,grad_norm: 0.9219888850025215, iteration: 129031
loss: 1.0388277769088745,grad_norm: 0.8531002731902428, iteration: 129032
loss: 0.9747648239135742,grad_norm: 0.9999990210793659, iteration: 129033
loss: 1.1550636291503906,grad_norm: 0.9999997547054845, iteration: 129034
loss: 1.0442748069763184,grad_norm: 0.9999998324375065, iteration: 129035
loss: 1.0121748447418213,grad_norm: 0.9999992221718667, iteration: 129036
loss: 0.99901282787323,grad_norm: 0.9999991909592889, iteration: 129037
loss: 0.9760896563529968,grad_norm: 0.7059754856357843, iteration: 129038
loss: 1.0150809288024902,grad_norm: 0.974495563840392, iteration: 129039
loss: 1.0163427591323853,grad_norm: 0.968922039588971, iteration: 129040
loss: 1.0046967267990112,grad_norm: 0.9102059851994183, iteration: 129041
loss: 0.99189692735672,grad_norm: 0.999999089483128, iteration: 129042
loss: 1.0155378580093384,grad_norm: 0.9999992541841625, iteration: 129043
loss: 0.9533990025520325,grad_norm: 0.8671404452657251, iteration: 129044
loss: 1.1050634384155273,grad_norm: 0.9999997881964283, iteration: 129045
loss: 1.0145938396453857,grad_norm: 0.9942317033950947, iteration: 129046
loss: 0.9582729339599609,grad_norm: 0.813068054894106, iteration: 129047
loss: 1.01523756980896,grad_norm: 0.7867385827161181, iteration: 129048
loss: 1.019855260848999,grad_norm: 0.7013077394747204, iteration: 129049
loss: 0.9830355048179626,grad_norm: 0.9999991494127242, iteration: 129050
loss: 0.96165531873703,grad_norm: 0.9023674408123854, iteration: 129051
loss: 1.1087135076522827,grad_norm: 0.9999994288774858, iteration: 129052
loss: 1.0097901821136475,grad_norm: 0.8947350879610607, iteration: 129053
loss: 1.3131533861160278,grad_norm: 0.9999998407996055, iteration: 129054
loss: 1.0180436372756958,grad_norm: 0.9999994476329613, iteration: 129055
loss: 1.0519877672195435,grad_norm: 0.9999991304974181, iteration: 129056
loss: 1.0841275453567505,grad_norm: 0.9999994652645563, iteration: 129057
loss: 1.0590375661849976,grad_norm: 0.9999991987343476, iteration: 129058
loss: 1.0429617166519165,grad_norm: 0.9999996323911732, iteration: 129059
loss: 1.088250994682312,grad_norm: 0.9999999456747339, iteration: 129060
loss: 1.0335618257522583,grad_norm: 0.8921509334356335, iteration: 129061
loss: 0.9669964909553528,grad_norm: 0.8262812584940834, iteration: 129062
loss: 1.01113760471344,grad_norm: 0.9999990279850084, iteration: 129063
loss: 1.0059329271316528,grad_norm: 0.9999993082080469, iteration: 129064
loss: 0.9915946125984192,grad_norm: 0.8470895498797696, iteration: 129065
loss: 1.0082972049713135,grad_norm: 0.9999995299740437, iteration: 129066
loss: 0.9835413694381714,grad_norm: 0.9999991036455641, iteration: 129067
loss: 1.056581974029541,grad_norm: 0.9999998994754336, iteration: 129068
loss: 1.0001194477081299,grad_norm: 0.9894014605845789, iteration: 129069
loss: 1.0223385095596313,grad_norm: 0.9999997859109199, iteration: 129070
loss: 0.9877808690071106,grad_norm: 0.9135926209065415, iteration: 129071
loss: 1.0064142942428589,grad_norm: 0.9999991366649014, iteration: 129072
loss: 0.9955812692642212,grad_norm: 0.9421281513183402, iteration: 129073
loss: 1.0591603517532349,grad_norm: 0.8592990440072106, iteration: 129074
loss: 1.0352767705917358,grad_norm: 0.9999998688236965, iteration: 129075
loss: 1.0586421489715576,grad_norm: 0.9999992164046888, iteration: 129076
loss: 1.0014902353286743,grad_norm: 0.7780540552590863, iteration: 129077
loss: 1.001761794090271,grad_norm: 0.890747116937556, iteration: 129078
loss: 0.9937845468521118,grad_norm: 0.9999991419593873, iteration: 129079
loss: 0.9815574288368225,grad_norm: 0.7696100128964191, iteration: 129080
loss: 0.9626280069351196,grad_norm: 0.7956286172214141, iteration: 129081
loss: 1.0280897617340088,grad_norm: 0.9999995903424559, iteration: 129082
loss: 0.9987654685974121,grad_norm: 0.999999828315241, iteration: 129083
loss: 1.0739248991012573,grad_norm: 0.999999401081407, iteration: 129084
loss: 1.0482639074325562,grad_norm: 0.999999132437788, iteration: 129085
loss: 1.005004644393921,grad_norm: 0.8757301463469499, iteration: 129086
loss: 0.9917091131210327,grad_norm: 0.8743749358591665, iteration: 129087
loss: 1.010436773300171,grad_norm: 0.9043678158493945, iteration: 129088
loss: 1.0638530254364014,grad_norm: 0.9425521019226776, iteration: 129089
loss: 0.9827536940574646,grad_norm: 0.9999996260512563, iteration: 129090
loss: 0.9968831539154053,grad_norm: 0.9731349801843886, iteration: 129091
loss: 1.0594316720962524,grad_norm: 0.9999995716329467, iteration: 129092
loss: 1.0439109802246094,grad_norm: 0.999999935451999, iteration: 129093
loss: 0.9721968770027161,grad_norm: 0.9999991222699166, iteration: 129094
loss: 1.00377357006073,grad_norm: 0.9999991701643306, iteration: 129095
loss: 0.9930160641670227,grad_norm: 0.8636966918755984, iteration: 129096
loss: 0.9960981607437134,grad_norm: 0.7396410353886738, iteration: 129097
loss: 1.0222042798995972,grad_norm: 0.7635015727498446, iteration: 129098
loss: 1.0285407304763794,grad_norm: 0.9999992643191081, iteration: 129099
loss: 0.9937065243721008,grad_norm: 0.8212006329084881, iteration: 129100
loss: 1.0552918910980225,grad_norm: 0.9859280311142692, iteration: 129101
loss: 1.0346665382385254,grad_norm: 0.9811235562142854, iteration: 129102
loss: 1.0393328666687012,grad_norm: 0.8597559525902603, iteration: 129103
loss: 1.0178163051605225,grad_norm: 0.9397327791514163, iteration: 129104
loss: 0.9997071623802185,grad_norm: 0.8155166164862881, iteration: 129105
loss: 0.9945089817047119,grad_norm: 0.9205238195915002, iteration: 129106
loss: 0.9604358673095703,grad_norm: 0.9999990222567415, iteration: 129107
loss: 1.0221363306045532,grad_norm: 0.971248961518279, iteration: 129108
loss: 0.99346923828125,grad_norm: 0.9616935331403081, iteration: 129109
loss: 0.9827041625976562,grad_norm: 0.7651292380683699, iteration: 129110
loss: 1.0413061380386353,grad_norm: 0.9999990670861054, iteration: 129111
loss: 0.9553495049476624,grad_norm: 0.9225046587704726, iteration: 129112
loss: 0.9817356467247009,grad_norm: 0.8465560079960284, iteration: 129113
loss: 1.058895468711853,grad_norm: 0.9999993467749067, iteration: 129114
loss: 0.9969174861907959,grad_norm: 0.9290737331463856, iteration: 129115
loss: 0.9973198771476746,grad_norm: 0.6863162737942416, iteration: 129116
loss: 1.0165833234786987,grad_norm: 0.6771294341152692, iteration: 129117
loss: 1.0322917699813843,grad_norm: 0.9878241302621336, iteration: 129118
loss: 1.0063272714614868,grad_norm: 0.9999994665760942, iteration: 129119
loss: 0.9977014064788818,grad_norm: 0.8464072888527887, iteration: 129120
loss: 1.0056884288787842,grad_norm: 0.7564765588666296, iteration: 129121
loss: 0.9913222193717957,grad_norm: 0.6880250012434622, iteration: 129122
loss: 0.9780332446098328,grad_norm: 0.9999997278774813, iteration: 129123
loss: 1.0003001689910889,grad_norm: 0.9600325865482267, iteration: 129124
loss: 1.0100295543670654,grad_norm: 0.837511614652952, iteration: 129125
loss: 1.002692461013794,grad_norm: 0.865690860946883, iteration: 129126
loss: 1.0202701091766357,grad_norm: 0.9999994417512593, iteration: 129127
loss: 1.2230479717254639,grad_norm: 0.9999994475986329, iteration: 129128
loss: 1.0927534103393555,grad_norm: 0.9999993400830527, iteration: 129129
loss: 1.0915106534957886,grad_norm: 0.9829998934351792, iteration: 129130
loss: 0.965302050113678,grad_norm: 0.9031361391892698, iteration: 129131
loss: 0.9864912033081055,grad_norm: 0.8781080293608347, iteration: 129132
loss: 0.9819294214248657,grad_norm: 0.8576052088512844, iteration: 129133
loss: 1.02639901638031,grad_norm: 0.9999991466930981, iteration: 129134
loss: 1.0251857042312622,grad_norm: 0.7613846138083491, iteration: 129135
loss: 0.9556293487548828,grad_norm: 0.9999990625856175, iteration: 129136
loss: 1.1177970170974731,grad_norm: 0.9516709141793476, iteration: 129137
loss: 1.1061570644378662,grad_norm: 0.9999999267794188, iteration: 129138
loss: 1.0306272506713867,grad_norm: 0.9976755719355372, iteration: 129139
loss: 1.0231854915618896,grad_norm: 0.866541418868425, iteration: 129140
loss: 0.9860862493515015,grad_norm: 0.8704337775140486, iteration: 129141
loss: 1.0446279048919678,grad_norm: 0.9999993127551783, iteration: 129142
loss: 0.9921546578407288,grad_norm: 0.7937675124405624, iteration: 129143
loss: 0.9795228838920593,grad_norm: 0.8488441581343863, iteration: 129144
loss: 1.0462355613708496,grad_norm: 0.9999998662328571, iteration: 129145
loss: 0.9423677325248718,grad_norm: 0.904003394524445, iteration: 129146
loss: 1.0302903652191162,grad_norm: 0.8134205237830566, iteration: 129147
loss: 1.0287530422210693,grad_norm: 0.9999989913128211, iteration: 129148
loss: 0.9932041168212891,grad_norm: 0.9999989985978878, iteration: 129149
loss: 1.0118330717086792,grad_norm: 0.869354380178594, iteration: 129150
loss: 0.9737348556518555,grad_norm: 0.9326624902543706, iteration: 129151
loss: 1.0153158903121948,grad_norm: 0.7948542233989621, iteration: 129152
loss: 1.1406205892562866,grad_norm: 0.9999994165466463, iteration: 129153
loss: 0.9534541964530945,grad_norm: 0.9242286394199576, iteration: 129154
loss: 0.9987545013427734,grad_norm: 0.9999991475228186, iteration: 129155
loss: 0.9901698231697083,grad_norm: 0.9999991459816024, iteration: 129156
loss: 1.0237221717834473,grad_norm: 0.9999991612614403, iteration: 129157
loss: 1.0001081228256226,grad_norm: 0.8750056293225978, iteration: 129158
loss: 1.0644431114196777,grad_norm: 0.9092177727054285, iteration: 129159
loss: 0.9945159554481506,grad_norm: 0.9463526075936467, iteration: 129160
loss: 0.9665660262107849,grad_norm: 0.9907884601182458, iteration: 129161
loss: 1.134933352470398,grad_norm: 0.9999990606873277, iteration: 129162
loss: 1.0057202577590942,grad_norm: 0.9590248228811931, iteration: 129163
loss: 1.004386305809021,grad_norm: 0.8554718759195886, iteration: 129164
loss: 1.0292258262634277,grad_norm: 0.9714514940367301, iteration: 129165
loss: 1.0402734279632568,grad_norm: 0.7590537953367634, iteration: 129166
loss: 1.0510809421539307,grad_norm: 0.8175309593996833, iteration: 129167
loss: 1.0221575498580933,grad_norm: 0.8799657124965607, iteration: 129168
loss: 1.0124778747558594,grad_norm: 0.8308699047136454, iteration: 129169
loss: 0.9944615960121155,grad_norm: 0.8440669880741027, iteration: 129170
loss: 1.1108250617980957,grad_norm: 0.9999995073682175, iteration: 129171
loss: 1.1203123331069946,grad_norm: 0.9999997069739187, iteration: 129172
loss: 1.030274748802185,grad_norm: 0.7061529494996436, iteration: 129173
loss: 0.9869826436042786,grad_norm: 0.8753921741213675, iteration: 129174
loss: 1.0345011949539185,grad_norm: 0.7728588676471737, iteration: 129175
loss: 1.0064740180969238,grad_norm: 0.9517104212872625, iteration: 129176
loss: 1.0558303594589233,grad_norm: 0.9999999074405937, iteration: 129177
loss: 0.9510464072227478,grad_norm: 0.9237065210676226, iteration: 129178
loss: 1.0316524505615234,grad_norm: 0.999999330248402, iteration: 129179
loss: 1.0056735277175903,grad_norm: 0.8394124142781251, iteration: 129180
loss: 0.9712685942649841,grad_norm: 0.9168824137917403, iteration: 129181
loss: 1.0642744302749634,grad_norm: 0.9999996120362974, iteration: 129182
loss: 0.9815272092819214,grad_norm: 0.9650286172474417, iteration: 129183
loss: 0.9759599566459656,grad_norm: 0.9999991052769248, iteration: 129184
loss: 0.9918392896652222,grad_norm: 0.9999998246179362, iteration: 129185
loss: 1.0130085945129395,grad_norm: 0.8105606923057135, iteration: 129186
loss: 1.0287715196609497,grad_norm: 0.9552440411262002, iteration: 129187
loss: 0.9897172451019287,grad_norm: 0.9999992683794944, iteration: 129188
loss: 1.1091090440750122,grad_norm: 0.9999996313216146, iteration: 129189
loss: 1.047857403755188,grad_norm: 0.9999996501093069, iteration: 129190
loss: 0.9779338240623474,grad_norm: 0.8094419247942105, iteration: 129191
loss: 0.988998532295227,grad_norm: 0.9999990393201839, iteration: 129192
loss: 0.9863393306732178,grad_norm: 0.9999990501546239, iteration: 129193
loss: 1.0010004043579102,grad_norm: 0.9999994762267159, iteration: 129194
loss: 1.0598186254501343,grad_norm: 0.9370749655878716, iteration: 129195
loss: 0.9662367701530457,grad_norm: 0.821730634747527, iteration: 129196
loss: 0.9665285348892212,grad_norm: 0.8239875719219684, iteration: 129197
loss: 0.999792754650116,grad_norm: 0.7812071946661913, iteration: 129198
loss: 1.018121361732483,grad_norm: 0.9753027225625284, iteration: 129199
loss: 1.0321093797683716,grad_norm: 0.999999220608815, iteration: 129200
loss: 1.0141571760177612,grad_norm: 0.8615859855338611, iteration: 129201
loss: 1.011985421180725,grad_norm: 0.9560012296830592, iteration: 129202
loss: 0.9890689849853516,grad_norm: 0.9999991701785883, iteration: 129203
loss: 1.0021271705627441,grad_norm: 0.8160474074549146, iteration: 129204
loss: 0.9707205891609192,grad_norm: 0.9078836447453033, iteration: 129205
loss: 1.0220363140106201,grad_norm: 0.9999997170056925, iteration: 129206
loss: 0.9899494647979736,grad_norm: 0.8074432377851005, iteration: 129207
loss: 1.0044927597045898,grad_norm: 0.8650970424003175, iteration: 129208
loss: 1.0678571462631226,grad_norm: 0.9999998311210411, iteration: 129209
loss: 1.0225099325180054,grad_norm: 0.7319275494974443, iteration: 129210
loss: 1.013858437538147,grad_norm: 0.9551385503154843, iteration: 129211
loss: 0.9757773280143738,grad_norm: 0.881494179812805, iteration: 129212
loss: 1.0965975522994995,grad_norm: 0.9999998465101978, iteration: 129213
loss: 1.039824366569519,grad_norm: 0.9999995950503549, iteration: 129214
loss: 1.0018200874328613,grad_norm: 0.8369640670994889, iteration: 129215
loss: 0.9756121039390564,grad_norm: 0.9740775617467009, iteration: 129216
loss: 1.0101494789123535,grad_norm: 0.9999991285101539, iteration: 129217
loss: 0.999457597732544,grad_norm: 0.849575790540867, iteration: 129218
loss: 0.9833860993385315,grad_norm: 0.9238837437474925, iteration: 129219
loss: 1.0057944059371948,grad_norm: 0.9062810636315428, iteration: 129220
loss: 1.0018340349197388,grad_norm: 0.7636346850013509, iteration: 129221
loss: 0.9982486963272095,grad_norm: 0.9321288336232111, iteration: 129222
loss: 0.9626431465148926,grad_norm: 0.870692937675959, iteration: 129223
loss: 0.9764826893806458,grad_norm: 0.8098073870193172, iteration: 129224
loss: 1.0028374195098877,grad_norm: 0.9999993526558448, iteration: 129225
loss: 0.9820116758346558,grad_norm: 0.8847395712578179, iteration: 129226
loss: 0.9834367632865906,grad_norm: 0.7052832595634911, iteration: 129227
loss: 0.9547499418258667,grad_norm: 0.9999990142823415, iteration: 129228
loss: 0.9860625267028809,grad_norm: 0.8154760511967639, iteration: 129229
loss: 1.0341843366622925,grad_norm: 0.7531941006346016, iteration: 129230
loss: 1.0033769607543945,grad_norm: 0.7398924417658314, iteration: 129231
loss: 0.9818042516708374,grad_norm: 0.815464629855931, iteration: 129232
loss: 1.010816216468811,grad_norm: 0.9438695923091017, iteration: 129233
loss: 1.009979009628296,grad_norm: 0.7746511606039211, iteration: 129234
loss: 1.0070925951004028,grad_norm: 0.8302320036938661, iteration: 129235
loss: 1.0121976137161255,grad_norm: 0.9656545201696382, iteration: 129236
loss: 1.0090469121932983,grad_norm: 0.8397599351160623, iteration: 129237
loss: 0.9772274494171143,grad_norm: 0.9688264687436501, iteration: 129238
loss: 0.9986486434936523,grad_norm: 0.9999991107590246, iteration: 129239
loss: 1.0186952352523804,grad_norm: 0.8527479167797488, iteration: 129240
loss: 0.9902870655059814,grad_norm: 0.7671382198442587, iteration: 129241
loss: 1.025320291519165,grad_norm: 0.9999990512194789, iteration: 129242
loss: 1.0641955137252808,grad_norm: 0.9999993458350113, iteration: 129243
loss: 0.9822195172309875,grad_norm: 0.9711866010934905, iteration: 129244
loss: 1.0448368787765503,grad_norm: 0.9999994197576171, iteration: 129245
loss: 0.9914964437484741,grad_norm: 0.9999991808800608, iteration: 129246
loss: 1.0889750719070435,grad_norm: 0.9706762711359614, iteration: 129247
loss: 1.0910266637802124,grad_norm: 0.9479080297905544, iteration: 129248
loss: 1.0393041372299194,grad_norm: 0.886428368538168, iteration: 129249
loss: 1.0468696355819702,grad_norm: 0.9999998442439993, iteration: 129250
loss: 1.0049453973770142,grad_norm: 0.8718127459104649, iteration: 129251
loss: 0.9793461561203003,grad_norm: 0.8499158763169853, iteration: 129252
loss: 0.9734563827514648,grad_norm: 0.7934288199028918, iteration: 129253
loss: 1.077683925628662,grad_norm: 0.7861779663242862, iteration: 129254
loss: 1.0080543756484985,grad_norm: 0.9999993684671705, iteration: 129255
loss: 0.9458728432655334,grad_norm: 0.9346549782998259, iteration: 129256
loss: 1.0347181558609009,grad_norm: 0.8921900740369952, iteration: 129257
loss: 0.953031063079834,grad_norm: 0.9137708605009985, iteration: 129258
loss: 1.0098941326141357,grad_norm: 0.8932072253922732, iteration: 129259
loss: 1.0597589015960693,grad_norm: 0.999999626354665, iteration: 129260
loss: 0.9796308875083923,grad_norm: 0.934630959310352, iteration: 129261
loss: 0.9698124527931213,grad_norm: 0.9026775089923458, iteration: 129262
loss: 1.0564886331558228,grad_norm: 0.9999998788702645, iteration: 129263
loss: 1.017072319984436,grad_norm: 0.9999994625922466, iteration: 129264
loss: 1.0047029256820679,grad_norm: 0.9995412591424017, iteration: 129265
loss: 1.0257349014282227,grad_norm: 0.9999992571173334, iteration: 129266
loss: 1.0180679559707642,grad_norm: 0.8353578305646953, iteration: 129267
loss: 1.014247179031372,grad_norm: 0.9999993540297569, iteration: 129268
loss: 0.9724175930023193,grad_norm: 0.9207222323007987, iteration: 129269
loss: 0.9778713583946228,grad_norm: 0.8462167719996853, iteration: 129270
loss: 1.037885069847107,grad_norm: 0.999999337875243, iteration: 129271
loss: 0.9634974598884583,grad_norm: 0.9999994071725041, iteration: 129272
loss: 1.0654047727584839,grad_norm: 0.9999999214848039, iteration: 129273
loss: 1.0075324773788452,grad_norm: 0.9999993803535487, iteration: 129274
loss: 1.0468971729278564,grad_norm: 0.9999992145411073, iteration: 129275
loss: 1.029597282409668,grad_norm: 0.9999997672414489, iteration: 129276
loss: 1.0302900075912476,grad_norm: 0.9999993025430844, iteration: 129277
loss: 1.0436220169067383,grad_norm: 0.958053238976443, iteration: 129278
loss: 1.0357964038848877,grad_norm: 0.9999993430967209, iteration: 129279
loss: 1.0336483716964722,grad_norm: 0.8120785521597711, iteration: 129280
loss: 0.9943401217460632,grad_norm: 0.914570657844123, iteration: 129281
loss: 0.9966002106666565,grad_norm: 0.9999999597540601, iteration: 129282
loss: 1.0333222150802612,grad_norm: 0.9999991795881129, iteration: 129283
loss: 1.0210169553756714,grad_norm: 0.9206031882713215, iteration: 129284
loss: 1.014477252960205,grad_norm: 0.9354857580014734, iteration: 129285
loss: 0.9639191627502441,grad_norm: 0.9453422449891054, iteration: 129286
loss: 1.0289576053619385,grad_norm: 0.8912474550540989, iteration: 129287
loss: 1.0792633295059204,grad_norm: 0.9999995194178648, iteration: 129288
loss: 1.0005121231079102,grad_norm: 0.9859152161604116, iteration: 129289
loss: 1.0385640859603882,grad_norm: 0.9710588878584321, iteration: 129290
loss: 0.9928813576698303,grad_norm: 0.8073643023684215, iteration: 129291
loss: 0.9912800192832947,grad_norm: 0.8479703958415287, iteration: 129292
loss: 1.0225944519042969,grad_norm: 0.8313407432928099, iteration: 129293
loss: 0.9909816384315491,grad_norm: 0.7268162107682944, iteration: 129294
loss: 0.9891019463539124,grad_norm: 0.9999993158885694, iteration: 129295
loss: 1.00571870803833,grad_norm: 0.9999999177442163, iteration: 129296
loss: 1.025970458984375,grad_norm: 0.8900695812758892, iteration: 129297
loss: 1.0305136442184448,grad_norm: 0.784744998091298, iteration: 129298
loss: 1.0057927370071411,grad_norm: 0.9840084608359989, iteration: 129299
loss: 1.0066839456558228,grad_norm: 0.8494468793186073, iteration: 129300
loss: 0.9835338592529297,grad_norm: 0.8881953371338632, iteration: 129301
loss: 1.0030630826950073,grad_norm: 0.9999991708539977, iteration: 129302
loss: 0.9801644086837769,grad_norm: 0.8523132673605887, iteration: 129303
loss: 1.0081596374511719,grad_norm: 0.9692795825765246, iteration: 129304
loss: 1.0019561052322388,grad_norm: 0.9598519999142434, iteration: 129305
loss: 1.0742448568344116,grad_norm: 0.9999995146505937, iteration: 129306
loss: 1.0131430625915527,grad_norm: 0.9999993515466423, iteration: 129307
loss: 1.013564109802246,grad_norm: 0.98775678252533, iteration: 129308
loss: 1.0718785524368286,grad_norm: 0.9999990423428644, iteration: 129309
loss: 1.015165090560913,grad_norm: 0.9999996088268865, iteration: 129310
loss: 0.9950025677680969,grad_norm: 0.8777835940439566, iteration: 129311
loss: 0.978736162185669,grad_norm: 0.9999992082310232, iteration: 129312
loss: 0.9979062080383301,grad_norm: 0.9292090940876391, iteration: 129313
loss: 0.9529008269309998,grad_norm: 0.8492380146490575, iteration: 129314
loss: 1.0248997211456299,grad_norm: 0.9462192352838881, iteration: 129315
loss: 1.0226638317108154,grad_norm: 0.9999996958262756, iteration: 129316
loss: 1.020576000213623,grad_norm: 0.8896963641205364, iteration: 129317
loss: 0.9927098751068115,grad_norm: 0.8065715272794609, iteration: 129318
loss: 0.9791324734687805,grad_norm: 0.7893403899670993, iteration: 129319
loss: 1.0030637979507446,grad_norm: 0.9134655152708929, iteration: 129320
loss: 1.021264672279358,grad_norm: 0.8956251613670866, iteration: 129321
loss: 1.00526762008667,grad_norm: 0.9622405391159823, iteration: 129322
loss: 1.0352975130081177,grad_norm: 0.9999999048781089, iteration: 129323
loss: 1.0109553337097168,grad_norm: 0.8658879713862883, iteration: 129324
loss: 1.0060724020004272,grad_norm: 0.7123267920754054, iteration: 129325
loss: 1.0142903327941895,grad_norm: 0.9685915535185535, iteration: 129326
loss: 1.0160698890686035,grad_norm: 0.8124679616375481, iteration: 129327
loss: 0.9398068189620972,grad_norm: 0.999999436596371, iteration: 129328
loss: 0.9822304248809814,grad_norm: 0.9999997773000722, iteration: 129329
loss: 0.9653083682060242,grad_norm: 0.9999991946828297, iteration: 129330
loss: 1.0018091201782227,grad_norm: 0.8024632370807809, iteration: 129331
loss: 1.0149507522583008,grad_norm: 0.9142387531956223, iteration: 129332
loss: 1.0346828699111938,grad_norm: 0.9999995499135311, iteration: 129333
loss: 1.0457216501235962,grad_norm: 0.9999991383975056, iteration: 129334
loss: 1.026716709136963,grad_norm: 0.8868106566759778, iteration: 129335
loss: 1.0125679969787598,grad_norm: 0.9999990708461832, iteration: 129336
loss: 0.9715835452079773,grad_norm: 0.8012181973162527, iteration: 129337
loss: 1.0537046194076538,grad_norm: 0.9999992888792076, iteration: 129338
loss: 0.9687329530715942,grad_norm: 0.9999996590961691, iteration: 129339
loss: 1.061629056930542,grad_norm: 0.9999991414025676, iteration: 129340
loss: 1.0150758028030396,grad_norm: 0.9778852947946904, iteration: 129341
loss: 1.0362449884414673,grad_norm: 0.9525978543986833, iteration: 129342
loss: 1.0353949069976807,grad_norm: 0.9999995326102125, iteration: 129343
loss: 1.0193190574645996,grad_norm: 0.9069248444155538, iteration: 129344
loss: 0.97505784034729,grad_norm: 0.8395469239311352, iteration: 129345
loss: 0.9869933128356934,grad_norm: 0.8956534049713065, iteration: 129346
loss: 0.9923335909843445,grad_norm: 0.82658548986332, iteration: 129347
loss: 0.9893783330917358,grad_norm: 0.8959497523290084, iteration: 129348
loss: 1.014445185661316,grad_norm: 0.9448211887845773, iteration: 129349
loss: 0.9686855673789978,grad_norm: 0.8898071691874334, iteration: 129350
loss: 1.0397807359695435,grad_norm: 0.800669635038279, iteration: 129351
loss: 1.0129075050354004,grad_norm: 0.9638979481965283, iteration: 129352
loss: 0.9999665021896362,grad_norm: 0.8310209336505453, iteration: 129353
loss: 1.0386492013931274,grad_norm: 0.9999998092697303, iteration: 129354
loss: 0.9692769050598145,grad_norm: 0.9999990948652873, iteration: 129355
loss: 1.1433230638504028,grad_norm: 0.9999997878671019, iteration: 129356
loss: 1.0233556032180786,grad_norm: 0.9999998427058308, iteration: 129357
loss: 1.0333995819091797,grad_norm: 0.9999991770199512, iteration: 129358
loss: 1.0273046493530273,grad_norm: 0.8308270238764223, iteration: 129359
loss: 0.9838710427284241,grad_norm: 0.7798700068540557, iteration: 129360
loss: 1.036954402923584,grad_norm: 0.8563863879611651, iteration: 129361
loss: 1.0220272541046143,grad_norm: 0.9999991720590246, iteration: 129362
loss: 1.0224254131317139,grad_norm: 0.9999991946353272, iteration: 129363
loss: 0.9861652851104736,grad_norm: 0.9999991726341264, iteration: 129364
loss: 0.9740712642669678,grad_norm: 0.9999991121004735, iteration: 129365
loss: 1.0235761404037476,grad_norm: 0.9999991766080825, iteration: 129366
loss: 1.1149866580963135,grad_norm: 0.9999998641244489, iteration: 129367
loss: 1.014833688735962,grad_norm: 0.8682971774678253, iteration: 129368
loss: 1.0103572607040405,grad_norm: 0.7934818922532192, iteration: 129369
loss: 0.9935823678970337,grad_norm: 0.9220029418231633, iteration: 129370
loss: 0.9629563093185425,grad_norm: 0.8918549508708057, iteration: 129371
loss: 1.0045099258422852,grad_norm: 0.9929031971417722, iteration: 129372
loss: 0.9940930008888245,grad_norm: 0.9439112756262094, iteration: 129373
loss: 0.9463062882423401,grad_norm: 0.8611627762992574, iteration: 129374
loss: 0.9838573336601257,grad_norm: 0.864399823514332, iteration: 129375
loss: 1.035406470298767,grad_norm: 0.9999990769538422, iteration: 129376
loss: 0.9994596242904663,grad_norm: 0.8508550426104926, iteration: 129377
loss: 0.9888955950737,grad_norm: 0.9014853149650576, iteration: 129378
loss: 1.0508018732070923,grad_norm: 0.999999407549036, iteration: 129379
loss: 1.063423752784729,grad_norm: 0.999999873842769, iteration: 129380
loss: 0.9921286702156067,grad_norm: 0.8112943271348053, iteration: 129381
loss: 0.9885942935943604,grad_norm: 0.9894351724264142, iteration: 129382
loss: 1.0286344289779663,grad_norm: 0.7888831465069496, iteration: 129383
loss: 0.9972621202468872,grad_norm: 0.8864421877117461, iteration: 129384
loss: 1.029579997062683,grad_norm: 0.999999217379601, iteration: 129385
loss: 1.0318657159805298,grad_norm: 0.9967826143516827, iteration: 129386
loss: 1.0969856977462769,grad_norm: 0.999999731900943, iteration: 129387
loss: 1.0476948022842407,grad_norm: 0.9999991721661837, iteration: 129388
loss: 1.0369131565093994,grad_norm: 0.8009252498759405, iteration: 129389
loss: 0.9872455596923828,grad_norm: 0.7855311065390895, iteration: 129390
loss: 0.9962050914764404,grad_norm: 0.8132569157983114, iteration: 129391
loss: 1.0608668327331543,grad_norm: 0.9829063487833553, iteration: 129392
loss: 0.9441976547241211,grad_norm: 0.8427479679051507, iteration: 129393
loss: 0.9853768944740295,grad_norm: 0.8554811689041637, iteration: 129394
loss: 0.9953207969665527,grad_norm: 0.7296628341394868, iteration: 129395
loss: 0.9601350426673889,grad_norm: 0.8842532980004605, iteration: 129396
loss: 0.9913545846939087,grad_norm: 0.9052398943531743, iteration: 129397
loss: 1.0205330848693848,grad_norm: 0.8357703035753317, iteration: 129398
loss: 0.9949618577957153,grad_norm: 0.7367939100162862, iteration: 129399
loss: 1.2667880058288574,grad_norm: 0.9999996396742922, iteration: 129400
loss: 1.022849678993225,grad_norm: 0.9576561537248156, iteration: 129401
loss: 1.0678800344467163,grad_norm: 0.9999998759767762, iteration: 129402
loss: 0.9901101589202881,grad_norm: 0.924894954621696, iteration: 129403
loss: 1.023978590965271,grad_norm: 0.9999992815296651, iteration: 129404
loss: 0.9922075867652893,grad_norm: 0.8677068571345062, iteration: 129405
loss: 1.0602669715881348,grad_norm: 1.0000000412029613, iteration: 129406
loss: 1.0171055793762207,grad_norm: 0.7615208224061357, iteration: 129407
loss: 0.9708302021026611,grad_norm: 0.8667354553642315, iteration: 129408
loss: 1.1111985445022583,grad_norm: 0.8491251074300507, iteration: 129409
loss: 1.0223463773727417,grad_norm: 0.9999992462729229, iteration: 129410
loss: 1.015885591506958,grad_norm: 0.7768157323035677, iteration: 129411
loss: 1.013077735900879,grad_norm: 0.9999994383410508, iteration: 129412
loss: 1.0122584104537964,grad_norm: 0.9999992099738485, iteration: 129413
loss: 1.0482455492019653,grad_norm: 0.9999998155076391, iteration: 129414
loss: 1.0341153144836426,grad_norm: 0.9999992632688527, iteration: 129415
loss: 1.0514440536499023,grad_norm: 0.9700612686675006, iteration: 129416
loss: 1.2898211479187012,grad_norm: 0.9999994175830659, iteration: 129417
loss: 0.9932899475097656,grad_norm: 0.8138336209213146, iteration: 129418
loss: 0.9891505241394043,grad_norm: 0.9999990081432212, iteration: 129419
loss: 1.0269047021865845,grad_norm: 0.9999990546099334, iteration: 129420
loss: 1.0368340015411377,grad_norm: 0.9999990985403864, iteration: 129421
loss: 1.023093581199646,grad_norm: 0.9999998973093284, iteration: 129422
loss: 1.0529539585113525,grad_norm: 0.9999993056632381, iteration: 129423
loss: 1.1071977615356445,grad_norm: 0.9999993790545637, iteration: 129424
loss: 1.0089945793151855,grad_norm: 0.7902570263380311, iteration: 129425
loss: 1.0142416954040527,grad_norm: 0.9999992621772777, iteration: 129426
loss: 1.0453959703445435,grad_norm: 0.999999137300746, iteration: 129427
loss: 1.006988286972046,grad_norm: 0.9999991000832817, iteration: 129428
loss: 1.064611554145813,grad_norm: 0.9999994998897881, iteration: 129429
loss: 1.0471965074539185,grad_norm: 0.9526800137809, iteration: 129430
loss: 1.0237500667572021,grad_norm: 0.8574498673703553, iteration: 129431
loss: 1.026503324508667,grad_norm: 0.9999994649913073, iteration: 129432
loss: 0.9570260643959045,grad_norm: 0.9999990620230768, iteration: 129433
loss: 1.0819978713989258,grad_norm: 0.9999997227607965, iteration: 129434
loss: 1.0155858993530273,grad_norm: 0.9730131708376677, iteration: 129435
loss: 0.9713836312294006,grad_norm: 0.9623910185324283, iteration: 129436
loss: 1.0233725309371948,grad_norm: 0.9999991965259615, iteration: 129437
loss: 0.9688842296600342,grad_norm: 0.9999995477531787, iteration: 129438
loss: 0.9976999759674072,grad_norm: 0.7770904246304314, iteration: 129439
loss: 1.0220577716827393,grad_norm: 0.9999998310110203, iteration: 129440
loss: 1.0178892612457275,grad_norm: 0.999999217449886, iteration: 129441
loss: 1.0224735736846924,grad_norm: 0.9999993809497364, iteration: 129442
loss: 0.9894468188285828,grad_norm: 0.9999991119144465, iteration: 129443
loss: 1.034205675125122,grad_norm: 0.9999993266761366, iteration: 129444
loss: 0.9968764185905457,grad_norm: 0.9999991839013763, iteration: 129445
loss: 0.9830090403556824,grad_norm: 0.9340193584708337, iteration: 129446
loss: 1.033109188079834,grad_norm: 0.9999995701880204, iteration: 129447
loss: 0.9813194870948792,grad_norm: 0.8038191922387141, iteration: 129448
loss: 1.0417344570159912,grad_norm: 0.9999991342730795, iteration: 129449
loss: 1.015235185623169,grad_norm: 0.9999991434165689, iteration: 129450
loss: 1.0254244804382324,grad_norm: 0.9999991028511812, iteration: 129451
loss: 0.9642464518547058,grad_norm: 0.930083938235595, iteration: 129452
loss: 1.046419382095337,grad_norm: 0.9999991058904811, iteration: 129453
loss: 0.9756472706794739,grad_norm: 0.7130685587024415, iteration: 129454
loss: 1.0853723287582397,grad_norm: 0.8443564436253899, iteration: 129455
loss: 1.0473737716674805,grad_norm: 0.9999992237786113, iteration: 129456
loss: 1.089081048965454,grad_norm: 0.9999998407362327, iteration: 129457
loss: 1.1052367687225342,grad_norm: 0.9999992868935135, iteration: 129458
loss: 1.021048903465271,grad_norm: 0.855616749475802, iteration: 129459
loss: 1.0669455528259277,grad_norm: 0.9999999043715291, iteration: 129460
loss: 1.029425024986267,grad_norm: 0.963883951929975, iteration: 129461
loss: 1.1568243503570557,grad_norm: 1.0000000577107222, iteration: 129462
loss: 0.9820154309272766,grad_norm: 0.9999990713874288, iteration: 129463
loss: 1.0335321426391602,grad_norm: 0.8758325443180528, iteration: 129464
loss: 0.9762772917747498,grad_norm: 0.9999990757046443, iteration: 129465
loss: 1.0164234638214111,grad_norm: 0.9999996840761125, iteration: 129466
loss: 1.0153313875198364,grad_norm: 0.9999998879866698, iteration: 129467
loss: 1.002417802810669,grad_norm: 0.8572004796928703, iteration: 129468
loss: 1.1347407102584839,grad_norm: 0.9999998112671374, iteration: 129469
loss: 1.05812406539917,grad_norm: 0.9999993686655329, iteration: 129470
loss: 1.04721999168396,grad_norm: 0.9999991410839848, iteration: 129471
loss: 1.0088748931884766,grad_norm: 0.9999994543569434, iteration: 129472
loss: 1.091999888420105,grad_norm: 0.9999996809086446, iteration: 129473
loss: 1.0850296020507812,grad_norm: 0.999999854414511, iteration: 129474
loss: 1.0546056032180786,grad_norm: 0.9999995328908748, iteration: 129475
loss: 1.0233783721923828,grad_norm: 0.9999992387543538, iteration: 129476
loss: 1.3381991386413574,grad_norm: 0.99999972370866, iteration: 129477
loss: 1.0272327661514282,grad_norm: 0.9999993989228935, iteration: 129478
loss: 1.03593909740448,grad_norm: 0.9999994366697013, iteration: 129479
loss: 1.0220168828964233,grad_norm: 0.9999992494812531, iteration: 129480
loss: 1.0082783699035645,grad_norm: 0.9999993122231724, iteration: 129481
loss: 1.0882915258407593,grad_norm: 0.9999995559768553, iteration: 129482
loss: 1.0611621141433716,grad_norm: 0.999999859287131, iteration: 129483
loss: 1.0114063024520874,grad_norm: 0.9999990168397446, iteration: 129484
loss: 1.0458855628967285,grad_norm: 0.9999995521571892, iteration: 129485
loss: 0.984091579914093,grad_norm: 0.9611695812933204, iteration: 129486
loss: 1.0680146217346191,grad_norm: 0.9293179488569258, iteration: 129487
loss: 1.0759832859039307,grad_norm: 0.9999996088571285, iteration: 129488
loss: 1.0308802127838135,grad_norm: 0.9999990637495977, iteration: 129489
loss: 1.025956630706787,grad_norm: 0.9999992847006598, iteration: 129490
loss: 1.0020033121109009,grad_norm: 0.9999999291191989, iteration: 129491
loss: 1.0456132888793945,grad_norm: 0.9999995185287397, iteration: 129492
loss: 1.0110585689544678,grad_norm: 0.9999991776049653, iteration: 129493
loss: 1.0328060388565063,grad_norm: 0.9999999567411423, iteration: 129494
loss: 1.0274739265441895,grad_norm: 0.9999991707358573, iteration: 129495
loss: 0.9871711134910583,grad_norm: 0.9185889890728024, iteration: 129496
loss: 1.143884301185608,grad_norm: 0.9999994205752202, iteration: 129497
loss: 0.9603679180145264,grad_norm: 0.7875129905085531, iteration: 129498
loss: 1.0325278043746948,grad_norm: 0.9999995385618428, iteration: 129499
loss: 0.9992734789848328,grad_norm: 0.7816850373260994, iteration: 129500
loss: 1.100474238395691,grad_norm: 0.9999991683813843, iteration: 129501
loss: 1.0000646114349365,grad_norm: 0.7205902086938343, iteration: 129502
loss: 1.0951886177062988,grad_norm: 0.999999337446899, iteration: 129503
loss: 1.0366616249084473,grad_norm: 0.9999993539466697, iteration: 129504
loss: 1.0994161367416382,grad_norm: 0.9999990835354958, iteration: 129505
loss: 1.0362215042114258,grad_norm: 0.938671724897582, iteration: 129506
loss: 1.0840444564819336,grad_norm: 0.9999991845641656, iteration: 129507
loss: 1.0399876832962036,grad_norm: 0.9999993332166315, iteration: 129508
loss: 1.0536330938339233,grad_norm: 0.9999992372534438, iteration: 129509
loss: 1.02687668800354,grad_norm: 0.882931943330546, iteration: 129510
loss: 1.0766521692276,grad_norm: 0.9999993070527986, iteration: 129511
loss: 1.0340603590011597,grad_norm: 0.9999994039902752, iteration: 129512
loss: 1.0506231784820557,grad_norm: 0.999999167174623, iteration: 129513
loss: 0.9935068488121033,grad_norm: 0.8197312282341767, iteration: 129514
loss: 1.1027400493621826,grad_norm: 0.9999994592248355, iteration: 129515
loss: 1.052356243133545,grad_norm: 0.9999992182507748, iteration: 129516
loss: 1.1351665258407593,grad_norm: 0.9268432678079581, iteration: 129517
loss: 1.020975947380066,grad_norm: 0.9999996078004817, iteration: 129518
loss: 1.0150797367095947,grad_norm: 0.9903160625758214, iteration: 129519
loss: 1.0102306604385376,grad_norm: 0.9999995163077467, iteration: 129520
loss: 1.0188286304473877,grad_norm: 0.9999996653281225, iteration: 129521
loss: 0.9703635573387146,grad_norm: 0.9379781715198305, iteration: 129522
loss: 1.0769460201263428,grad_norm: 0.9999993200247531, iteration: 129523
loss: 1.0537642240524292,grad_norm: 0.9999990557486682, iteration: 129524
loss: 1.1228238344192505,grad_norm: 0.9999993484211435, iteration: 129525
loss: 0.951731264591217,grad_norm: 0.999999141477804, iteration: 129526
loss: 1.0123083591461182,grad_norm: 0.8742191453442321, iteration: 129527
loss: 1.0307070016860962,grad_norm: 0.9572005446801486, iteration: 129528
loss: 0.9951654672622681,grad_norm: 0.7826871249949442, iteration: 129529
loss: 1.086105465888977,grad_norm: 0.9999997776471671, iteration: 129530
loss: 1.0680214166641235,grad_norm: 0.9999992271484496, iteration: 129531
loss: 0.9453237652778625,grad_norm: 0.833439368883436, iteration: 129532
loss: 1.0208836793899536,grad_norm: 0.9999996046514313, iteration: 129533
loss: 0.9909602403640747,grad_norm: 0.7741148315925653, iteration: 129534
loss: 0.9742139577865601,grad_norm: 0.9232793692457545, iteration: 129535
loss: 1.0140269994735718,grad_norm: 0.8021487315431544, iteration: 129536
loss: 1.0417258739471436,grad_norm: 0.9999998356486138, iteration: 129537
loss: 0.9887529015541077,grad_norm: 0.9999998225753255, iteration: 129538
loss: 0.9971089363098145,grad_norm: 0.9288166867471807, iteration: 129539
loss: 0.9801783561706543,grad_norm: 0.9510897339196629, iteration: 129540
loss: 1.010028600692749,grad_norm: 0.9999992526857922, iteration: 129541
loss: 1.0287643671035767,grad_norm: 0.9999997000423911, iteration: 129542
loss: 0.988057017326355,grad_norm: 0.9797614620990651, iteration: 129543
loss: 1.0100153684616089,grad_norm: 0.9999991448441439, iteration: 129544
loss: 1.033228874206543,grad_norm: 0.980423201185767, iteration: 129545
loss: 1.0388540029525757,grad_norm: 0.9999997133233484, iteration: 129546
loss: 0.9741485714912415,grad_norm: 0.6563944605220831, iteration: 129547
loss: 1.203885555267334,grad_norm: 0.9999998970758494, iteration: 129548
loss: 1.0112146139144897,grad_norm: 0.9179991171386719, iteration: 129549
loss: 1.0172739028930664,grad_norm: 0.7825648910402321, iteration: 129550
loss: 0.9994373321533203,grad_norm: 0.8150431101109612, iteration: 129551
loss: 0.9848682880401611,grad_norm: 0.9001168990643765, iteration: 129552
loss: 1.1110255718231201,grad_norm: 0.9999996282800566, iteration: 129553
loss: 1.11077880859375,grad_norm: 0.9999998660701676, iteration: 129554
loss: 1.0166162252426147,grad_norm: 0.9999993188897233, iteration: 129555
loss: 1.028890609741211,grad_norm: 0.9846194808078637, iteration: 129556
loss: 0.9664552807807922,grad_norm: 0.8949477646230464, iteration: 129557
loss: 1.0440855026245117,grad_norm: 0.999999829655504, iteration: 129558
loss: 1.035812497138977,grad_norm: 0.9999993372539413, iteration: 129559
loss: 1.0233664512634277,grad_norm: 0.977129993519644, iteration: 129560
loss: 1.1293041706085205,grad_norm: 0.9999995420819869, iteration: 129561
loss: 1.0247732400894165,grad_norm: 0.9999996129432212, iteration: 129562
loss: 1.06647527217865,grad_norm: 0.9999990057120409, iteration: 129563
loss: 1.028074026107788,grad_norm: 0.9999993907477517, iteration: 129564
loss: 0.9691236019134521,grad_norm: 0.9999991755179608, iteration: 129565
loss: 0.9985454082489014,grad_norm: 0.7785789949009749, iteration: 129566
loss: 1.0466020107269287,grad_norm: 0.9999998729086845, iteration: 129567
loss: 0.9608008861541748,grad_norm: 0.7236965347944354, iteration: 129568
loss: 0.9844104647636414,grad_norm: 0.7293891146384455, iteration: 129569
loss: 1.0079151391983032,grad_norm: 0.8390428197387433, iteration: 129570
loss: 1.0181695222854614,grad_norm: 0.9510195701689217, iteration: 129571
loss: 1.0490936040878296,grad_norm: 0.9999992332629081, iteration: 129572
loss: 0.9891195297241211,grad_norm: 0.999128220895252, iteration: 129573
loss: 1.0474215745925903,grad_norm: 0.99999932065442, iteration: 129574
loss: 1.0424778461456299,grad_norm: 0.981552001941747, iteration: 129575
loss: 1.0520288944244385,grad_norm: 0.9999995707774698, iteration: 129576
loss: 1.0071171522140503,grad_norm: 0.9999990923219141, iteration: 129577
loss: 1.013710618019104,grad_norm: 0.9999996662614196, iteration: 129578
loss: 1.0341808795928955,grad_norm: 0.9999996534649921, iteration: 129579
loss: 1.0074820518493652,grad_norm: 0.9206713140151788, iteration: 129580
loss: 1.0554797649383545,grad_norm: 0.9999998533211598, iteration: 129581
loss: 1.0683958530426025,grad_norm: 0.9999993248549951, iteration: 129582
loss: 1.0004115104675293,grad_norm: 0.8292104884845635, iteration: 129583
loss: 1.0069257020950317,grad_norm: 0.8245218899131636, iteration: 129584
loss: 1.1242904663085938,grad_norm: 0.9999996842783773, iteration: 129585
loss: 1.0472887754440308,grad_norm: 0.9999991547917063, iteration: 129586
loss: 1.0061932802200317,grad_norm: 0.9182781746324009, iteration: 129587
loss: 1.03115713596344,grad_norm: 0.9999992469008034, iteration: 129588
loss: 1.0034762620925903,grad_norm: 0.8084006806135834, iteration: 129589
loss: 1.009102463722229,grad_norm: 0.9999991690154618, iteration: 129590
loss: 1.0426373481750488,grad_norm: 0.9999989844217074, iteration: 129591
loss: 1.021360158920288,grad_norm: 0.9448908230030554, iteration: 129592
loss: 1.015627145767212,grad_norm: 0.7993688315832191, iteration: 129593
loss: 1.0436798334121704,grad_norm: 0.9999992746686508, iteration: 129594
loss: 0.9966991543769836,grad_norm: 0.9999991915785016, iteration: 129595
loss: 1.0095274448394775,grad_norm: 0.8385332705056192, iteration: 129596
loss: 1.032041311264038,grad_norm: 0.9999999994213559, iteration: 129597
loss: 1.035131812095642,grad_norm: 0.8660131590209968, iteration: 129598
loss: 1.0198818445205688,grad_norm: 0.8426922958648742, iteration: 129599
loss: 1.0403920412063599,grad_norm: 0.9999995148958175, iteration: 129600
loss: 1.0426666736602783,grad_norm: 0.9924946212761354, iteration: 129601
loss: 1.0552901029586792,grad_norm: 0.9999998999272399, iteration: 129602
loss: 1.0531771183013916,grad_norm: 0.9999995008459894, iteration: 129603
loss: 1.0120679140090942,grad_norm: 0.9999995450865858, iteration: 129604
loss: 0.9803110361099243,grad_norm: 0.999999507036274, iteration: 129605
loss: 1.0222227573394775,grad_norm: 0.9999996393760144, iteration: 129606
loss: 1.1089482307434082,grad_norm: 0.9999992354486793, iteration: 129607
loss: 1.0882110595703125,grad_norm: 0.9999991207614526, iteration: 129608
loss: 1.0121634006500244,grad_norm: 0.8462535981796424, iteration: 129609
loss: 1.027058482170105,grad_norm: 0.8005588054732918, iteration: 129610
loss: 1.0931882858276367,grad_norm: 0.9999996980098713, iteration: 129611
loss: 1.0271399021148682,grad_norm: 0.9765631758997884, iteration: 129612
loss: 1.0779552459716797,grad_norm: 0.8912854994095315, iteration: 129613
loss: 1.3543522357940674,grad_norm: 0.9999998218486572, iteration: 129614
loss: 0.9886191487312317,grad_norm: 0.9150351497553455, iteration: 129615
loss: 0.9819772839546204,grad_norm: 0.9203884694211444, iteration: 129616
loss: 0.9562518000602722,grad_norm: 0.9999991842232931, iteration: 129617
loss: 1.104449987411499,grad_norm: 0.9999995922429611, iteration: 129618
loss: 1.0810455083847046,grad_norm: 0.9999997905180664, iteration: 129619
loss: 1.0828144550323486,grad_norm: 0.9999994579374781, iteration: 129620
loss: 1.0118136405944824,grad_norm: 0.9999990791855232, iteration: 129621
loss: 1.0000436305999756,grad_norm: 0.9999991034840175, iteration: 129622
loss: 0.9570947289466858,grad_norm: 0.8953060777564447, iteration: 129623
loss: 1.1087899208068848,grad_norm: 0.9999996079606162, iteration: 129624
loss: 1.2021602392196655,grad_norm: 0.9999997725890276, iteration: 129625
loss: 1.031402349472046,grad_norm: 0.9999999637460506, iteration: 129626
loss: 0.9948753118515015,grad_norm: 0.8050488617716491, iteration: 129627
loss: 1.0439472198486328,grad_norm: 0.999999465049494, iteration: 129628
loss: 1.1095682382583618,grad_norm: 1.0000000249936893, iteration: 129629
loss: 1.0580693483352661,grad_norm: 0.9999994112248209, iteration: 129630
loss: 0.9631761908531189,grad_norm: 0.9999991037328279, iteration: 129631
loss: 0.9993441104888916,grad_norm: 0.9999993620089825, iteration: 129632
loss: 1.0318523645401,grad_norm: 0.9999997617553734, iteration: 129633
loss: 1.0388280153274536,grad_norm: 0.7997331633538952, iteration: 129634
loss: 1.0305006504058838,grad_norm: 0.999999540628801, iteration: 129635
loss: 1.0255799293518066,grad_norm: 0.9999997184745677, iteration: 129636
loss: 1.0332096815109253,grad_norm: 0.9636029695475821, iteration: 129637
loss: 1.0094338655471802,grad_norm: 0.7913684414532187, iteration: 129638
loss: 0.9882926940917969,grad_norm: 0.7954996079930109, iteration: 129639
loss: 1.0140876770019531,grad_norm: 0.9175038189873782, iteration: 129640
loss: 1.0770928859710693,grad_norm: 0.9999990561259904, iteration: 129641
loss: 0.9935885667800903,grad_norm: 0.8678607929569865, iteration: 129642
loss: 1.2632157802581787,grad_norm: 0.9999997121675548, iteration: 129643
loss: 1.0197392702102661,grad_norm: 0.9783395731509641, iteration: 129644
loss: 1.0465912818908691,grad_norm: 0.9999991469755227, iteration: 129645
loss: 1.0337976217269897,grad_norm: 0.9999992511691244, iteration: 129646
loss: 0.9798521995544434,grad_norm: 0.9710942953961204, iteration: 129647
loss: 1.0575339794158936,grad_norm: 0.9999994218805979, iteration: 129648
loss: 0.9550018310546875,grad_norm: 0.9251536892025172, iteration: 129649
loss: 0.9955892562866211,grad_norm: 0.999999556118297, iteration: 129650
loss: 1.0440855026245117,grad_norm: 0.9999989939020661, iteration: 129651
loss: 0.9693576097488403,grad_norm: 0.8848628549491593, iteration: 129652
loss: 0.9864671230316162,grad_norm: 0.9999991155972253, iteration: 129653
loss: 0.9903113842010498,grad_norm: 0.9999997096710384, iteration: 129654
loss: 1.0071536302566528,grad_norm: 0.9999990077469649, iteration: 129655
loss: 1.0272719860076904,grad_norm: 0.9999993988330493, iteration: 129656
loss: 1.022415041923523,grad_norm: 0.9999991596739058, iteration: 129657
loss: 1.0156750679016113,grad_norm: 0.9999992009844147, iteration: 129658
loss: 1.0122098922729492,grad_norm: 0.9999997808315152, iteration: 129659
loss: 1.0132522583007812,grad_norm: 0.93527475347026, iteration: 129660
loss: 1.1120457649230957,grad_norm: 0.9999999305669931, iteration: 129661
loss: 0.9948031306266785,grad_norm: 0.9999996611306861, iteration: 129662
loss: 1.0483351945877075,grad_norm: 0.9267961245323413, iteration: 129663
loss: 1.0392823219299316,grad_norm: 0.9999994482617245, iteration: 129664
loss: 1.0194787979125977,grad_norm: 0.9809521718341526, iteration: 129665
loss: 0.9543586373329163,grad_norm: 0.9349640705126228, iteration: 129666
loss: 0.9904232025146484,grad_norm: 0.7416730956927099, iteration: 129667
loss: 0.9953580498695374,grad_norm: 0.9999991407964279, iteration: 129668
loss: 0.9854884147644043,grad_norm: 0.9999990849572487, iteration: 129669
loss: 1.0409226417541504,grad_norm: 0.9999991021590563, iteration: 129670
loss: 1.0249139070510864,grad_norm: 0.9999996671144855, iteration: 129671
loss: 0.977086067199707,grad_norm: 0.8484875825244675, iteration: 129672
loss: 1.0038388967514038,grad_norm: 0.9999997107053068, iteration: 129673
loss: 1.0242687463760376,grad_norm: 0.8120357887501565, iteration: 129674
loss: 1.0053383111953735,grad_norm: 0.9999990431034754, iteration: 129675
loss: 1.04417085647583,grad_norm: 0.999999259810065, iteration: 129676
loss: 1.002027153968811,grad_norm: 0.9999996665726396, iteration: 129677
loss: 0.9919256567955017,grad_norm: 0.8403855423619176, iteration: 129678
loss: 0.9904033541679382,grad_norm: 0.7487504907593081, iteration: 129679
loss: 1.0218029022216797,grad_norm: 0.999999178982913, iteration: 129680
loss: 1.0413368940353394,grad_norm: 0.9999995011104903, iteration: 129681
loss: 1.004193663597107,grad_norm: 0.999999092935109, iteration: 129682
loss: 1.0006612539291382,grad_norm: 0.7692649366498928, iteration: 129683
loss: 1.008104920387268,grad_norm: 0.9999994920677633, iteration: 129684
loss: 1.1404635906219482,grad_norm: 0.9999998318905526, iteration: 129685
loss: 1.0206224918365479,grad_norm: 0.9999996577561092, iteration: 129686
loss: 1.001606822013855,grad_norm: 0.8529660884201339, iteration: 129687
loss: 1.094724178314209,grad_norm: 0.9999999603659935, iteration: 129688
loss: 1.0381394624710083,grad_norm: 0.999999423980561, iteration: 129689
loss: 1.0065480470657349,grad_norm: 0.9999994301014563, iteration: 129690
loss: 1.0012829303741455,grad_norm: 0.9601606169195901, iteration: 129691
loss: 1.0287437438964844,grad_norm: 0.9999993507177579, iteration: 129692
loss: 0.9745916128158569,grad_norm: 0.8270813990774676, iteration: 129693
loss: 1.178920030593872,grad_norm: 0.9999993712949133, iteration: 129694
loss: 1.0187923908233643,grad_norm: 0.9999993188899926, iteration: 129695
loss: 0.9777688384056091,grad_norm: 0.9680433416452385, iteration: 129696
loss: 0.9789605736732483,grad_norm: 0.9299046636732368, iteration: 129697
loss: 0.979831337928772,grad_norm: 0.7985728690644182, iteration: 129698
loss: 1.0823112726211548,grad_norm: 0.999999585718871, iteration: 129699
loss: 1.0136507749557495,grad_norm: 0.9999991895392263, iteration: 129700
loss: 1.0130194425582886,grad_norm: 0.9179001458885188, iteration: 129701
loss: 1.0127580165863037,grad_norm: 0.8446631156569967, iteration: 129702
loss: 1.0088320970535278,grad_norm: 0.9863444287245587, iteration: 129703
loss: 1.0303471088409424,grad_norm: 0.9999998981108256, iteration: 129704
loss: 1.0196521282196045,grad_norm: 0.9999990242639225, iteration: 129705
loss: 1.0242938995361328,grad_norm: 0.9129419925062354, iteration: 129706
loss: 0.9864370226860046,grad_norm: 0.931006972580757, iteration: 129707
loss: 0.9798210859298706,grad_norm: 0.9226201394496671, iteration: 129708
loss: 0.9870327711105347,grad_norm: 0.9553711208628685, iteration: 129709
loss: 1.033029556274414,grad_norm: 0.9999995930081862, iteration: 129710
loss: 0.9849126935005188,grad_norm: 0.9054096469113648, iteration: 129711
loss: 0.9782275557518005,grad_norm: 0.8937852029542933, iteration: 129712
loss: 0.9978848695755005,grad_norm: 0.8345521544048184, iteration: 129713
loss: 1.0592998266220093,grad_norm: 0.9999995783347522, iteration: 129714
loss: 1.058808445930481,grad_norm: 0.9455930940478642, iteration: 129715
loss: 0.9969146251678467,grad_norm: 0.999999001017369, iteration: 129716
loss: 1.0443717241287231,grad_norm: 0.9999992817222932, iteration: 129717
loss: 1.0359739065170288,grad_norm: 0.999999075687183, iteration: 129718
loss: 1.0395314693450928,grad_norm: 0.9999996497650643, iteration: 129719
loss: 0.9713312983512878,grad_norm: 0.9999996764118162, iteration: 129720
loss: 1.0248862504959106,grad_norm: 0.9999990547458381, iteration: 129721
loss: 0.9822942018508911,grad_norm: 0.8750409131601791, iteration: 129722
loss: 0.9921525716781616,grad_norm: 0.9999994186343768, iteration: 129723
loss: 1.0779893398284912,grad_norm: 0.9999990072223416, iteration: 129724
loss: 0.9783763289451599,grad_norm: 0.9999991217637141, iteration: 129725
loss: 0.9868907928466797,grad_norm: 0.8497321871325483, iteration: 129726
loss: 0.9980359077453613,grad_norm: 0.9999992551273473, iteration: 129727
loss: 0.9960284233093262,grad_norm: 0.928939782202219, iteration: 129728
loss: 1.069863200187683,grad_norm: 0.9710726562944678, iteration: 129729
loss: 1.015235424041748,grad_norm: 0.999999188346584, iteration: 129730
loss: 1.0022944211959839,grad_norm: 0.8659937640665453, iteration: 129731
loss: 1.1005926132202148,grad_norm: 0.9999998672158252, iteration: 129732
loss: 1.034422516822815,grad_norm: 0.9999998365523822, iteration: 129733
loss: 1.0665488243103027,grad_norm: 0.9999997285977938, iteration: 129734
loss: 0.9992371797561646,grad_norm: 0.9852837479313666, iteration: 129735
loss: 1.0720493793487549,grad_norm: 0.9999999027981252, iteration: 129736
loss: 0.9796989560127258,grad_norm: 0.7768422353734684, iteration: 129737
loss: 1.0255794525146484,grad_norm: 0.7908460869538265, iteration: 129738
loss: 1.009856939315796,grad_norm: 0.9999996822229635, iteration: 129739
loss: 1.0868943929672241,grad_norm: 0.9999991708818393, iteration: 129740
loss: 0.9963140487670898,grad_norm: 0.8620790372824345, iteration: 129741
loss: 1.066405177116394,grad_norm: 0.9999998953642134, iteration: 129742
loss: 0.9931628108024597,grad_norm: 0.8694073422562315, iteration: 129743
loss: 0.9954182505607605,grad_norm: 0.9975579019350872, iteration: 129744
loss: 1.0380836725234985,grad_norm: 0.7994205345059342, iteration: 129745
loss: 1.015937089920044,grad_norm: 0.9999991943001023, iteration: 129746
loss: 0.9950249195098877,grad_norm: 0.9895778416041463, iteration: 129747
loss: 1.0180519819259644,grad_norm: 0.9060288473443084, iteration: 129748
loss: 0.990296483039856,grad_norm: 0.8846766260320582, iteration: 129749
loss: 0.9842668175697327,grad_norm: 0.9999990668326231, iteration: 129750
loss: 1.0284671783447266,grad_norm: 0.92289904736553, iteration: 129751
loss: 1.0056110620498657,grad_norm: 0.8361908931470042, iteration: 129752
loss: 1.009946584701538,grad_norm: 0.8162326608211626, iteration: 129753
loss: 1.0424392223358154,grad_norm: 0.99999970526746, iteration: 129754
loss: 1.0003629922866821,grad_norm: 0.9509331926399078, iteration: 129755
loss: 1.0066077709197998,grad_norm: 0.7591838889440419, iteration: 129756
loss: 1.0341362953186035,grad_norm: 0.9059351079209605, iteration: 129757
loss: 0.9664685130119324,grad_norm: 0.9999989965942521, iteration: 129758
loss: 0.9842283725738525,grad_norm: 0.7699593685653672, iteration: 129759
loss: 1.0219544172286987,grad_norm: 0.9365782737512095, iteration: 129760
loss: 0.9732667803764343,grad_norm: 0.8290552007589631, iteration: 129761
loss: 1.0328773260116577,grad_norm: 0.9088439942957548, iteration: 129762
loss: 1.0309083461761475,grad_norm: 0.9999991303004673, iteration: 129763
loss: 0.9988235831260681,grad_norm: 0.9793835054356067, iteration: 129764
loss: 1.0084909200668335,grad_norm: 0.8644752087568888, iteration: 129765
loss: 1.027551531791687,grad_norm: 0.7733834467225488, iteration: 129766
loss: 1.0236847400665283,grad_norm: 0.908409041111837, iteration: 129767
loss: 1.0067106485366821,grad_norm: 0.8191227023377518, iteration: 129768
loss: 1.1589404344558716,grad_norm: 0.999999731734044, iteration: 129769
loss: 1.073009967803955,grad_norm: 0.9999998984453979, iteration: 129770
loss: 1.0190693140029907,grad_norm: 0.9999998401038578, iteration: 129771
loss: 1.0010353326797485,grad_norm: 0.9999990056192215, iteration: 129772
loss: 1.0242902040481567,grad_norm: 0.7711982565581075, iteration: 129773
loss: 0.9791655540466309,grad_norm: 0.999999502656931, iteration: 129774
loss: 1.0109058618545532,grad_norm: 0.9830459718295192, iteration: 129775
loss: 1.1555712223052979,grad_norm: 0.9999998744346046, iteration: 129776
loss: 0.9693050384521484,grad_norm: 0.9577821704291121, iteration: 129777
loss: 1.013149380683899,grad_norm: 0.8950003942663785, iteration: 129778
loss: 0.9982354640960693,grad_norm: 0.8920386347170641, iteration: 129779
loss: 1.0044246912002563,grad_norm: 0.8893440835136338, iteration: 129780
loss: 1.0089629888534546,grad_norm: 0.736370608906753, iteration: 129781
loss: 1.0101960897445679,grad_norm: 0.9947289036257798, iteration: 129782
loss: 1.013624906539917,grad_norm: 0.8047981987501229, iteration: 129783
loss: 0.979366660118103,grad_norm: 0.9999991963280925, iteration: 129784
loss: 1.0336320400238037,grad_norm: 0.9444353597246135, iteration: 129785
loss: 0.9966121912002563,grad_norm: 0.758909242698825, iteration: 129786
loss: 1.0027378797531128,grad_norm: 0.7346630817530763, iteration: 129787
loss: 1.0233876705169678,grad_norm: 0.9000717790900367, iteration: 129788
loss: 1.0235698223114014,grad_norm: 0.9752478124349251, iteration: 129789
loss: 0.9854782223701477,grad_norm: 0.769738946650304, iteration: 129790
loss: 1.0886858701705933,grad_norm: 0.999999430784852, iteration: 129791
loss: 1.0239558219909668,grad_norm: 0.8724438302840204, iteration: 129792
loss: 1.0426636934280396,grad_norm: 0.9187761264206196, iteration: 129793
loss: 0.9626721739768982,grad_norm: 0.8981111146134406, iteration: 129794
loss: 1.0272700786590576,grad_norm: 0.9087907456285732, iteration: 129795
loss: 1.0119545459747314,grad_norm: 0.8034296262427411, iteration: 129796
loss: 1.001386046409607,grad_norm: 0.7474089177568015, iteration: 129797
loss: 1.0081754922866821,grad_norm: 0.8205948062122574, iteration: 129798
loss: 0.962579607963562,grad_norm: 0.8333128147632402, iteration: 129799
loss: 0.9888685941696167,grad_norm: 0.9999992272331478, iteration: 129800
loss: 1.0105500221252441,grad_norm: 0.9999992002563294, iteration: 129801
loss: 1.0059360265731812,grad_norm: 0.7233233503264642, iteration: 129802
loss: 0.9865177273750305,grad_norm: 0.872618279192264, iteration: 129803
loss: 1.0159332752227783,grad_norm: 0.9999991672726302, iteration: 129804
loss: 1.0075302124023438,grad_norm: 0.9061644024088954, iteration: 129805
loss: 1.0041776895523071,grad_norm: 0.914467322910288, iteration: 129806
loss: 1.0098141431808472,grad_norm: 0.7498365853710152, iteration: 129807
loss: 1.0623489618301392,grad_norm: 0.9999998933648596, iteration: 129808
loss: 0.9604390859603882,grad_norm: 0.8737698427965599, iteration: 129809
loss: 1.1415362358093262,grad_norm: 0.9999992288271701, iteration: 129810
loss: 1.0147455930709839,grad_norm: 0.9417243552466236, iteration: 129811
loss: 0.9947074055671692,grad_norm: 0.9999990983259774, iteration: 129812
loss: 1.0467650890350342,grad_norm: 0.996645955549969, iteration: 129813
loss: 0.9911175966262817,grad_norm: 0.7747451543977877, iteration: 129814
loss: 0.9905193448066711,grad_norm: 0.8154761519518686, iteration: 129815
loss: 1.0254451036453247,grad_norm: 0.999999504744665, iteration: 129816
loss: 1.0831660032272339,grad_norm: 0.9893600108807767, iteration: 129817
loss: 1.0460771322250366,grad_norm: 0.999999618186996, iteration: 129818
loss: 1.0292991399765015,grad_norm: 0.9999997451441194, iteration: 129819
loss: 1.0067895650863647,grad_norm: 0.9999991510317475, iteration: 129820
loss: 1.0070130825042725,grad_norm: 0.8761309295297947, iteration: 129821
loss: 1.027895450592041,grad_norm: 0.9999994020973425, iteration: 129822
loss: 1.02493155002594,grad_norm: 0.9057399587378462, iteration: 129823
loss: 1.02950918674469,grad_norm: 0.9054339994690908, iteration: 129824
loss: 1.037781000137329,grad_norm: 0.999999138542269, iteration: 129825
loss: 1.0523825883865356,grad_norm: 0.9999995698156499, iteration: 129826
loss: 1.0963705778121948,grad_norm: 0.9999999836060982, iteration: 129827
loss: 0.9806618094444275,grad_norm: 0.9999991211800608, iteration: 129828
loss: 0.9843496680259705,grad_norm: 0.8385936324464408, iteration: 129829
loss: 1.0136387348175049,grad_norm: 0.9999993433415564, iteration: 129830
loss: 1.0141794681549072,grad_norm: 0.9999989868167926, iteration: 129831
loss: 1.0782470703125,grad_norm: 0.9999993365420005, iteration: 129832
loss: 1.0246715545654297,grad_norm: 0.9999997481308736, iteration: 129833
loss: 1.0213555097579956,grad_norm: 0.9999992147412792, iteration: 129834
loss: 0.9800425171852112,grad_norm: 0.8726400578194664, iteration: 129835
loss: 0.9815419316291809,grad_norm: 0.9994547945994028, iteration: 129836
loss: 1.015023946762085,grad_norm: 0.9999990749136768, iteration: 129837
loss: 0.9943631887435913,grad_norm: 0.8719227478228415, iteration: 129838
loss: 1.0028772354125977,grad_norm: 0.9081096747391193, iteration: 129839
loss: 1.0404051542282104,grad_norm: 0.9999990819759237, iteration: 129840
loss: 1.0535869598388672,grad_norm: 1.0000000067963715, iteration: 129841
loss: 0.9986581802368164,grad_norm: 0.8603108001698145, iteration: 129842
loss: 0.991412341594696,grad_norm: 0.9999992196616689, iteration: 129843
loss: 1.0240973234176636,grad_norm: 0.9999995600347784, iteration: 129844
loss: 1.023355484008789,grad_norm: 0.9107185174348064, iteration: 129845
loss: 0.9921202063560486,grad_norm: 0.999999589710343, iteration: 129846
loss: 1.006847620010376,grad_norm: 0.7864107631209916, iteration: 129847
loss: 1.0247029066085815,grad_norm: 0.8964115793929145, iteration: 129848
loss: 1.006501317024231,grad_norm: 0.8536369262705429, iteration: 129849
loss: 1.010945200920105,grad_norm: 0.8767710098484841, iteration: 129850
loss: 1.0127696990966797,grad_norm: 0.8065701449832796, iteration: 129851
loss: 0.9819303750991821,grad_norm: 0.942216541024461, iteration: 129852
loss: 1.0169973373413086,grad_norm: 0.8272026172402438, iteration: 129853
loss: 0.9705150723457336,grad_norm: 0.7144733981396337, iteration: 129854
loss: 0.982334554195404,grad_norm: 0.7435222129941581, iteration: 129855
loss: 1.0260040760040283,grad_norm: 0.9999992407613809, iteration: 129856
loss: 1.054904818534851,grad_norm: 0.9999996681780846, iteration: 129857
loss: 1.0178494453430176,grad_norm: 0.9519270740421786, iteration: 129858
loss: 1.023130178451538,grad_norm: 0.9761841379692364, iteration: 129859
loss: 1.0308642387390137,grad_norm: 0.9999991004328327, iteration: 129860
loss: 0.9915566444396973,grad_norm: 0.8789513264695316, iteration: 129861
loss: 0.9740164279937744,grad_norm: 0.999999407297083, iteration: 129862
loss: 0.973402202129364,grad_norm: 0.7393998080734099, iteration: 129863
loss: 1.1308248043060303,grad_norm: 0.9999997419139597, iteration: 129864
loss: 1.024107575416565,grad_norm: 0.9999999693650384, iteration: 129865
loss: 1.0034642219543457,grad_norm: 0.9568210278848427, iteration: 129866
loss: 1.0150458812713623,grad_norm: 0.9999997362032851, iteration: 129867
loss: 0.9735060334205627,grad_norm: 0.8661548391924556, iteration: 129868
loss: 0.9579894542694092,grad_norm: 0.7904258962302033, iteration: 129869
loss: 1.0014225244522095,grad_norm: 0.9999990587790551, iteration: 129870
loss: 1.1339911222457886,grad_norm: 0.9999998830698339, iteration: 129871
loss: 0.9576407670974731,grad_norm: 0.8359917960271382, iteration: 129872
loss: 1.0020382404327393,grad_norm: 0.7540746535815853, iteration: 129873
loss: 1.0132319927215576,grad_norm: 0.9999995128310111, iteration: 129874
loss: 1.044962763786316,grad_norm: 0.9999995383183772, iteration: 129875
loss: 1.0130614042282104,grad_norm: 0.9498886057157343, iteration: 129876
loss: 1.0035322904586792,grad_norm: 0.9999990360251654, iteration: 129877
loss: 1.0418964624404907,grad_norm: 0.9999994610598938, iteration: 129878
loss: 1.0430865287780762,grad_norm: 0.9999993854932117, iteration: 129879
loss: 1.0148122310638428,grad_norm: 0.9999994084289963, iteration: 129880
loss: 0.9935638904571533,grad_norm: 0.8542593742309674, iteration: 129881
loss: 1.0543923377990723,grad_norm: 0.9999992168449515, iteration: 129882
loss: 1.0119668245315552,grad_norm: 0.868987653341022, iteration: 129883
loss: 1.022193431854248,grad_norm: 0.9999992730480693, iteration: 129884
loss: 1.0422062873840332,grad_norm: 0.9999996253095507, iteration: 129885
loss: 1.0213818550109863,grad_norm: 0.9999991772862334, iteration: 129886
loss: 0.9742409586906433,grad_norm: 0.8555555972868234, iteration: 129887
loss: 1.1032222509384155,grad_norm: 0.9999994528550866, iteration: 129888
loss: 1.0103529691696167,grad_norm: 0.9697866017103861, iteration: 129889
loss: 1.0545499324798584,grad_norm: 0.9999998006235101, iteration: 129890
loss: 1.0072553157806396,grad_norm: 0.8909540132024644, iteration: 129891
loss: 1.0318466424942017,grad_norm: 0.9999994934646774, iteration: 129892
loss: 0.9978370666503906,grad_norm: 0.9999994379085594, iteration: 129893
loss: 1.0021424293518066,grad_norm: 0.8356593918025167, iteration: 129894
loss: 0.9996861815452576,grad_norm: 0.9999991277411082, iteration: 129895
loss: 0.9681759476661682,grad_norm: 0.9999992462463385, iteration: 129896
loss: 1.0144652128219604,grad_norm: 0.9999991381155151, iteration: 129897
loss: 1.0294338464736938,grad_norm: 0.9999991761547719, iteration: 129898
loss: 1.0626659393310547,grad_norm: 0.999999435531124, iteration: 129899
loss: 1.0161182880401611,grad_norm: 0.9946522894015382, iteration: 129900
loss: 0.991698682308197,grad_norm: 0.999999473855086, iteration: 129901
loss: 1.1134082078933716,grad_norm: 0.9999998124847788, iteration: 129902
loss: 0.9694795608520508,grad_norm: 0.9636667105133047, iteration: 129903
loss: 1.0608450174331665,grad_norm: 0.9999995347882485, iteration: 129904
loss: 1.0964281558990479,grad_norm: 0.9999996856398323, iteration: 129905
loss: 0.9713014364242554,grad_norm: 0.9446358660444313, iteration: 129906
loss: 1.013156533241272,grad_norm: 0.9999990351921351, iteration: 129907
loss: 1.017962098121643,grad_norm: 0.973065907801239, iteration: 129908
loss: 1.0184218883514404,grad_norm: 0.8538210191069718, iteration: 129909
loss: 1.1154683828353882,grad_norm: 0.9999992215065435, iteration: 129910
loss: 1.0215604305267334,grad_norm: 0.9999989612981145, iteration: 129911
loss: 0.9806005358695984,grad_norm: 0.8482716052868289, iteration: 129912
loss: 0.9536322355270386,grad_norm: 0.8205103886224765, iteration: 129913
loss: 1.0174850225448608,grad_norm: 0.999998984168945, iteration: 129914
loss: 1.0012776851654053,grad_norm: 0.9535912159003388, iteration: 129915
loss: 1.0833598375320435,grad_norm: 0.999999105002959, iteration: 129916
loss: 0.989951491355896,grad_norm: 0.9999998275758266, iteration: 129917
loss: 1.0019135475158691,grad_norm: 0.8077400475538502, iteration: 129918
loss: 0.9891173243522644,grad_norm: 0.9343314468528171, iteration: 129919
loss: 0.988342821598053,grad_norm: 0.9999990426585228, iteration: 129920
loss: 1.196067214012146,grad_norm: 0.9999998964868434, iteration: 129921
loss: 1.0017470121383667,grad_norm: 0.8875219174218598, iteration: 129922
loss: 1.0246299505233765,grad_norm: 0.9999991424535737, iteration: 129923
loss: 1.1140241622924805,grad_norm: 0.9999997364369709, iteration: 129924
loss: 1.020426630973816,grad_norm: 0.7539415234066675, iteration: 129925
loss: 0.9995850324630737,grad_norm: 0.8314234761757586, iteration: 129926
loss: 0.9730048179626465,grad_norm: 0.999999116760682, iteration: 129927
loss: 1.0012338161468506,grad_norm: 0.9999992137840336, iteration: 129928
loss: 1.0305066108703613,grad_norm: 0.8325791732316088, iteration: 129929
loss: 1.0275599956512451,grad_norm: 0.9999992176491707, iteration: 129930
loss: 1.0421044826507568,grad_norm: 0.9999992251997106, iteration: 129931
loss: 1.052095651626587,grad_norm: 0.9999996643192345, iteration: 129932
loss: 0.9642397165298462,grad_norm: 0.9498666969640508, iteration: 129933
loss: 1.0198745727539062,grad_norm: 0.896301069354581, iteration: 129934
loss: 1.0424569845199585,grad_norm: 0.8891354282120005, iteration: 129935
loss: 1.1266155242919922,grad_norm: 0.9999996861310297, iteration: 129936
loss: 1.0217070579528809,grad_norm: 0.9999992707167957, iteration: 129937
loss: 1.0042613744735718,grad_norm: 0.7683938788296448, iteration: 129938
loss: 1.0532922744750977,grad_norm: 0.9999989961664479, iteration: 129939
loss: 1.0135847330093384,grad_norm: 0.999999765914512, iteration: 129940
loss: 1.1123130321502686,grad_norm: 0.9999995516971966, iteration: 129941
loss: 1.0048810243606567,grad_norm: 0.7927027499232546, iteration: 129942
loss: 1.04628324508667,grad_norm: 0.9999991675592502, iteration: 129943
loss: 0.9866275787353516,grad_norm: 0.9902845212435706, iteration: 129944
loss: 1.028217077255249,grad_norm: 0.99999910135245, iteration: 129945
loss: 1.0089240074157715,grad_norm: 0.9999990603599364, iteration: 129946
loss: 1.0245379209518433,grad_norm: 0.7846400202539416, iteration: 129947
loss: 1.0419001579284668,grad_norm: 0.9999998316399062, iteration: 129948
loss: 1.0597206354141235,grad_norm: 0.8422489333242831, iteration: 129949
loss: 1.0572247505187988,grad_norm: 0.9999994664834625, iteration: 129950
loss: 0.9651194214820862,grad_norm: 0.8077487151429913, iteration: 129951
loss: 0.9833159446716309,grad_norm: 0.9403754871719942, iteration: 129952
loss: 1.0116902589797974,grad_norm: 0.9999991207337249, iteration: 129953
loss: 1.0013231039047241,grad_norm: 0.999999320953548, iteration: 129954
loss: 1.0344756841659546,grad_norm: 0.8227801495538644, iteration: 129955
loss: 1.170993447303772,grad_norm: 0.9999996120875259, iteration: 129956
loss: 1.0238978862762451,grad_norm: 0.8103145463737087, iteration: 129957
loss: 1.1222095489501953,grad_norm: 0.9999991985379078, iteration: 129958
loss: 1.0466480255126953,grad_norm: 0.8639943327431073, iteration: 129959
loss: 1.0210883617401123,grad_norm: 0.9999997925784391, iteration: 129960
loss: 1.0276564359664917,grad_norm: 0.9058758585587142, iteration: 129961
loss: 1.0142072439193726,grad_norm: 0.9794563197640673, iteration: 129962
loss: 0.9543442130088806,grad_norm: 0.9999990962141891, iteration: 129963
loss: 1.0388950109481812,grad_norm: 0.9999992969611381, iteration: 129964
loss: 1.0732862949371338,grad_norm: 0.9999994675246263, iteration: 129965
loss: 1.0337885618209839,grad_norm: 0.9999990568254103, iteration: 129966
loss: 0.987480103969574,grad_norm: 0.9999990691470154, iteration: 129967
loss: 1.0807418823242188,grad_norm: 0.9999996181172451, iteration: 129968
loss: 0.9939332008361816,grad_norm: 0.8486767488900607, iteration: 129969
loss: 0.9939064383506775,grad_norm: 0.9999991833015124, iteration: 129970
loss: 1.0254805088043213,grad_norm: 0.74725622847059, iteration: 129971
loss: 0.9992427229881287,grad_norm: 0.7908851216884549, iteration: 129972
loss: 1.110594630241394,grad_norm: 0.8357149360674966, iteration: 129973
loss: 1.0645582675933838,grad_norm: 0.9999993001644889, iteration: 129974
loss: 1.0135921239852905,grad_norm: 0.999999251276963, iteration: 129975
loss: 0.9888730645179749,grad_norm: 0.8490132702360198, iteration: 129976
loss: 1.0973560810089111,grad_norm: 0.9999998921012168, iteration: 129977
loss: 1.162934422492981,grad_norm: 0.999999547153218, iteration: 129978
loss: 0.9848001599311829,grad_norm: 0.7391497964092142, iteration: 129979
loss: 1.0077154636383057,grad_norm: 0.9999990648654836, iteration: 129980
loss: 1.0115872621536255,grad_norm: 0.971204171095349, iteration: 129981
loss: 1.0540684461593628,grad_norm: 0.9999994919915446, iteration: 129982
loss: 0.9943757653236389,grad_norm: 0.8599657138847315, iteration: 129983
loss: 1.012850284576416,grad_norm: 0.9999999079491455, iteration: 129984
loss: 0.9702795743942261,grad_norm: 0.9128958729905626, iteration: 129985
loss: 1.1829168796539307,grad_norm: 0.9999992780388021, iteration: 129986
loss: 0.9803982973098755,grad_norm: 0.793322591189431, iteration: 129987
loss: 0.993744969367981,grad_norm: 0.9999994391369219, iteration: 129988
loss: 1.0271116495132446,grad_norm: 0.9999996873609013, iteration: 129989
loss: 1.0118802785873413,grad_norm: 0.9999993007125412, iteration: 129990
loss: 1.06843900680542,grad_norm: 0.9999995435396446, iteration: 129991
loss: 1.059705376625061,grad_norm: 0.9999997849372447, iteration: 129992
loss: 1.0048530101776123,grad_norm: 0.9999994577881032, iteration: 129993
loss: 0.9953849911689758,grad_norm: 0.9166599120205975, iteration: 129994
loss: 0.9911348223686218,grad_norm: 0.9798939892313753, iteration: 129995
loss: 1.0253283977508545,grad_norm: 0.7256545655011978, iteration: 129996
loss: 0.9899983406066895,grad_norm: 0.9808694720712097, iteration: 129997
loss: 1.0418442487716675,grad_norm: 0.9999995768272072, iteration: 129998
loss: 1.0291951894760132,grad_norm: 0.9999990985454095, iteration: 129999
loss: 0.9688783884048462,grad_norm: 0.9103726114523601, iteration: 130000
Evaluating at step 130000
{'val': 0.9973107594996691, 'test': 2.7063197998193127}
loss: 1.0268934965133667,grad_norm: 0.8615277348313465, iteration: 130001
loss: 0.9892603158950806,grad_norm: 0.7328785986474934, iteration: 130002
loss: 1.0017162561416626,grad_norm: 0.8152709991342088, iteration: 130003
loss: 1.0160071849822998,grad_norm: 0.8084863876154023, iteration: 130004
loss: 1.004210114479065,grad_norm: 0.978613036717546, iteration: 130005
loss: 0.995306134223938,grad_norm: 0.9999991418803328, iteration: 130006
loss: 1.016297459602356,grad_norm: 0.9999992499179932, iteration: 130007
loss: 0.946698784828186,grad_norm: 0.9394733631148703, iteration: 130008
loss: 0.973872721195221,grad_norm: 0.8474924982179007, iteration: 130009
loss: 1.019776701927185,grad_norm: 0.9481936030917095, iteration: 130010
loss: 0.977475643157959,grad_norm: 0.999999391885553, iteration: 130011
loss: 1.005772590637207,grad_norm: 0.8287575939606047, iteration: 130012
loss: 0.9660012125968933,grad_norm: 0.9999995232353232, iteration: 130013
loss: 0.9878643751144409,grad_norm: 0.824567004652019, iteration: 130014
loss: 1.0130624771118164,grad_norm: 0.9185577564586133, iteration: 130015
loss: 1.0140314102172852,grad_norm: 0.9782901631568616, iteration: 130016
loss: 0.9992721676826477,grad_norm: 0.973889855276593, iteration: 130017
loss: 1.015753149986267,grad_norm: 0.999999110970466, iteration: 130018
loss: 1.3164477348327637,grad_norm: 0.9999996849825915, iteration: 130019
loss: 1.0026967525482178,grad_norm: 0.9999995282331614, iteration: 130020
loss: 1.0087754726409912,grad_norm: 0.8601005707147326, iteration: 130021
loss: 1.1123099327087402,grad_norm: 0.9999998369582745, iteration: 130022
loss: 1.0413105487823486,grad_norm: 0.9999998968198177, iteration: 130023
loss: 0.98129802942276,grad_norm: 0.9999990430974949, iteration: 130024
loss: 0.9915223121643066,grad_norm: 0.8025160312024378, iteration: 130025
loss: 1.0112916231155396,grad_norm: 0.8660409346412886, iteration: 130026
loss: 1.0232746601104736,grad_norm: 0.9219021281606751, iteration: 130027
loss: 1.0623775720596313,grad_norm: 0.8094203454868119, iteration: 130028
loss: 0.9951059222221375,grad_norm: 0.7882131685364391, iteration: 130029
loss: 1.0067893266677856,grad_norm: 0.9282533450161778, iteration: 130030
loss: 1.0043139457702637,grad_norm: 0.9999990161422049, iteration: 130031
loss: 1.0480282306671143,grad_norm: 0.9300335997679279, iteration: 130032
loss: 1.0204037427902222,grad_norm: 0.712511971584802, iteration: 130033
loss: 1.006290078163147,grad_norm: 0.9040372366239059, iteration: 130034
loss: 1.029125452041626,grad_norm: 0.9999996695001896, iteration: 130035
loss: 1.0479447841644287,grad_norm: 0.9999994237208667, iteration: 130036
loss: 1.0343241691589355,grad_norm: 0.9999992090523823, iteration: 130037
loss: 1.0012913942337036,grad_norm: 0.8587051916009153, iteration: 130038
loss: 0.9976259469985962,grad_norm: 0.7415863268200437, iteration: 130039
loss: 1.0096420049667358,grad_norm: 0.9999989717221073, iteration: 130040
loss: 1.016639232635498,grad_norm: 0.8640439691904799, iteration: 130041
loss: 1.0027916431427002,grad_norm: 0.863627472527896, iteration: 130042
loss: 0.9862145781517029,grad_norm: 0.999999225985581, iteration: 130043
loss: 1.0096089839935303,grad_norm: 0.9633768201216392, iteration: 130044
loss: 1.012691855430603,grad_norm: 0.9999993199296889, iteration: 130045
loss: 1.0375854969024658,grad_norm: 0.999999200012783, iteration: 130046
loss: 1.0129777193069458,grad_norm: 0.8580065865035207, iteration: 130047
loss: 0.9584928750991821,grad_norm: 0.9469609912368118, iteration: 130048
loss: 1.176997423171997,grad_norm: 0.9999998880006046, iteration: 130049
loss: 0.9886873364448547,grad_norm: 0.9295168284305698, iteration: 130050
loss: 0.9827186465263367,grad_norm: 0.9472936087776501, iteration: 130051
loss: 1.0607227087020874,grad_norm: 0.9999997941084676, iteration: 130052
loss: 1.0128173828125,grad_norm: 0.9576835431401102, iteration: 130053
loss: 1.0202804803848267,grad_norm: 0.999999544858762, iteration: 130054
loss: 1.0171533823013306,grad_norm: 0.9714437124307317, iteration: 130055
loss: 1.0280174016952515,grad_norm: 0.9999993081202844, iteration: 130056
loss: 1.0023843050003052,grad_norm: 0.7277915740726127, iteration: 130057
loss: 1.003178596496582,grad_norm: 0.8087379140529315, iteration: 130058
loss: 1.1020848751068115,grad_norm: 0.9999990658188183, iteration: 130059
loss: 1.0470337867736816,grad_norm: 0.9999992694505359, iteration: 130060
loss: 0.9947869181632996,grad_norm: 0.9999990887806699, iteration: 130061
loss: 0.9917944073677063,grad_norm: 0.7417489285529011, iteration: 130062
loss: 1.034420132637024,grad_norm: 0.9056430547430684, iteration: 130063
loss: 1.0659266710281372,grad_norm: 0.9999999659608976, iteration: 130064
loss: 1.0134650468826294,grad_norm: 0.8461439224384945, iteration: 130065
loss: 0.9986986517906189,grad_norm: 0.7978878468608869, iteration: 130066
loss: 1.016325831413269,grad_norm: 0.8019848045554167, iteration: 130067
loss: 0.9968016147613525,grad_norm: 0.8833336336566173, iteration: 130068
loss: 0.9999363422393799,grad_norm: 0.9999991928427117, iteration: 130069
loss: 1.0115830898284912,grad_norm: 0.9962429322294201, iteration: 130070
loss: 1.049678921699524,grad_norm: 0.891645275421438, iteration: 130071
loss: 0.976409912109375,grad_norm: 0.9999991084546982, iteration: 130072
loss: 0.99727463722229,grad_norm: 0.9550449733906891, iteration: 130073
loss: 0.9803325533866882,grad_norm: 0.9999992000276006, iteration: 130074
loss: 0.9705854058265686,grad_norm: 0.8843688124469862, iteration: 130075
loss: 1.0031973123550415,grad_norm: 0.783439602748941, iteration: 130076
loss: 1.0495928525924683,grad_norm: 0.9999997103847563, iteration: 130077
loss: 1.0185928344726562,grad_norm: 0.8803324344827167, iteration: 130078
loss: 1.0155786275863647,grad_norm: 0.8927050630115029, iteration: 130079
loss: 0.9936014413833618,grad_norm: 0.9999991615907697, iteration: 130080
loss: 1.055008888244629,grad_norm: 0.8804098539234323, iteration: 130081
loss: 1.018782138824463,grad_norm: 0.8544367091365767, iteration: 130082
loss: 1.0120199918746948,grad_norm: 0.8738723218021737, iteration: 130083
loss: 1.021435260772705,grad_norm: 0.9999991000813178, iteration: 130084
loss: 0.9850719571113586,grad_norm: 0.9631664954756001, iteration: 130085
loss: 1.0075808763504028,grad_norm: 0.9999990021924748, iteration: 130086
loss: 1.202368974685669,grad_norm: 0.9999998116885677, iteration: 130087
loss: 0.990904688835144,grad_norm: 0.9291545965159458, iteration: 130088
loss: 1.081479787826538,grad_norm: 0.9999990550378658, iteration: 130089
loss: 1.0233932733535767,grad_norm: 0.8622842362034825, iteration: 130090
loss: 1.0872470140457153,grad_norm: 0.9867689163626358, iteration: 130091
loss: 0.9990728497505188,grad_norm: 0.8813531222786354, iteration: 130092
loss: 1.0218771696090698,grad_norm: 0.9071851020135483, iteration: 130093
loss: 1.0223989486694336,grad_norm: 0.9052786657632419, iteration: 130094
loss: 1.012900948524475,grad_norm: 0.8160994678628415, iteration: 130095
loss: 1.0309438705444336,grad_norm: 0.8054744300737925, iteration: 130096
loss: 0.9622212052345276,grad_norm: 0.892417583314178, iteration: 130097
loss: 1.0133686065673828,grad_norm: 0.7654986325122367, iteration: 130098
loss: 1.0145717859268188,grad_norm: 0.9025272830846511, iteration: 130099
loss: 1.0254015922546387,grad_norm: 0.85914681896331, iteration: 130100
loss: 1.010892629623413,grad_norm: 0.8263857077226063, iteration: 130101
loss: 1.1012097597122192,grad_norm: 0.9999994869206574, iteration: 130102
loss: 1.0506361722946167,grad_norm: 0.9999992249057701, iteration: 130103
loss: 1.0837761163711548,grad_norm: 0.8324969490573094, iteration: 130104
loss: 0.9768385291099548,grad_norm: 0.8937113840874061, iteration: 130105
loss: 1.0093337297439575,grad_norm: 0.9999992601042582, iteration: 130106
loss: 1.0121582746505737,grad_norm: 0.8100504667457669, iteration: 130107
loss: 1.0339936017990112,grad_norm: 0.7747952284874771, iteration: 130108
loss: 1.0174790620803833,grad_norm: 0.999999941845234, iteration: 130109
loss: 1.0146217346191406,grad_norm: 0.9999991415987839, iteration: 130110
loss: 1.0150580406188965,grad_norm: 0.9999991421603941, iteration: 130111
loss: 1.1164278984069824,grad_norm: 0.9999998863203321, iteration: 130112
loss: 0.9713992476463318,grad_norm: 0.9999992305691903, iteration: 130113
loss: 1.0823352336883545,grad_norm: 0.9999999117971591, iteration: 130114
loss: 1.002277135848999,grad_norm: 0.9552154490464839, iteration: 130115
loss: 1.183713436126709,grad_norm: 0.999999926846498, iteration: 130116
loss: 0.9668781757354736,grad_norm: 0.999999043870771, iteration: 130117
loss: 0.9907341599464417,grad_norm: 0.72434118052294, iteration: 130118
loss: 0.9988355040550232,grad_norm: 0.9509926310714641, iteration: 130119
loss: 1.011749267578125,grad_norm: 0.9006320449955781, iteration: 130120
loss: 1.0194306373596191,grad_norm: 0.9999990356308612, iteration: 130121
loss: 0.9995862245559692,grad_norm: 0.9734001429911846, iteration: 130122
loss: 1.046586513519287,grad_norm: 0.9999998769821175, iteration: 130123
loss: 1.0127122402191162,grad_norm: 0.8883526842139516, iteration: 130124
loss: 0.9821497797966003,grad_norm: 0.9999990288710195, iteration: 130125
loss: 1.0382068157196045,grad_norm: 0.9999991175677251, iteration: 130126
loss: 1.033002257347107,grad_norm: 0.930625591378177, iteration: 130127
loss: 0.990781307220459,grad_norm: 0.8138135466153337, iteration: 130128
loss: 0.9743045568466187,grad_norm: 0.7370548800957869, iteration: 130129
loss: 0.9819025993347168,grad_norm: 0.9999994109728795, iteration: 130130
loss: 1.0094776153564453,grad_norm: 0.9203272224600199, iteration: 130131
loss: 0.9909059405326843,grad_norm: 0.9672027638948386, iteration: 130132
loss: 1.0060874223709106,grad_norm: 0.9559688243668052, iteration: 130133
loss: 0.9721671342849731,grad_norm: 0.9999992877324668, iteration: 130134
loss: 1.037035346031189,grad_norm: 0.9999997478477339, iteration: 130135
loss: 1.0433053970336914,grad_norm: 0.9999991405936063, iteration: 130136
loss: 0.9744017124176025,grad_norm: 0.8233628949342809, iteration: 130137
loss: 1.013710618019104,grad_norm: 0.9999993695862146, iteration: 130138
loss: 1.0953861474990845,grad_norm: 0.999999565518966, iteration: 130139
loss: 1.0678479671478271,grad_norm: 0.9999990332230922, iteration: 130140
loss: 1.0872762203216553,grad_norm: 0.9999994900865762, iteration: 130141
loss: 1.035860538482666,grad_norm: 0.9999994046242799, iteration: 130142
loss: 0.9986476898193359,grad_norm: 0.8477118733852614, iteration: 130143
loss: 1.0053199529647827,grad_norm: 0.9999990817273502, iteration: 130144
loss: 0.9992456436157227,grad_norm: 0.9718169924628106, iteration: 130145
loss: 1.008137583732605,grad_norm: 0.83727817678765, iteration: 130146
loss: 1.0062662363052368,grad_norm: 0.8017573971642531, iteration: 130147
loss: 1.0381816625595093,grad_norm: 0.9999996249047415, iteration: 130148
loss: 1.0547523498535156,grad_norm: 0.9999991943413465, iteration: 130149
loss: 1.0109323263168335,grad_norm: 0.7917573786514859, iteration: 130150
loss: 0.9853595495223999,grad_norm: 0.9240232119152051, iteration: 130151
loss: 1.046281099319458,grad_norm: 0.9999995439832684, iteration: 130152
loss: 1.0254594087600708,grad_norm: 0.9999994004124818, iteration: 130153
loss: 1.023940086364746,grad_norm: 0.7514532099299102, iteration: 130154
loss: 1.0439585447311401,grad_norm: 0.9474886590566824, iteration: 130155
loss: 1.0394784212112427,grad_norm: 0.9999993244844256, iteration: 130156
loss: 1.0012526512145996,grad_norm: 0.9999997103433279, iteration: 130157
loss: 1.0259184837341309,grad_norm: 0.9999995053397003, iteration: 130158
loss: 0.9953455328941345,grad_norm: 0.9999991281185119, iteration: 130159
loss: 1.0105342864990234,grad_norm: 0.7647326233056945, iteration: 130160
loss: 1.0465688705444336,grad_norm: 0.9999995569890309, iteration: 130161
loss: 1.019097089767456,grad_norm: 0.9484458779825088, iteration: 130162
loss: 1.0037766695022583,grad_norm: 0.9999991590671394, iteration: 130163
loss: 0.9930921196937561,grad_norm: 0.9999989844878371, iteration: 130164
loss: 1.0233769416809082,grad_norm: 0.9999992036554548, iteration: 130165
loss: 0.9877767562866211,grad_norm: 0.9144450126826603, iteration: 130166
loss: 1.02751624584198,grad_norm: 0.915305604769745, iteration: 130167
loss: 0.9853339195251465,grad_norm: 0.7939206947886637, iteration: 130168
loss: 0.9976150393486023,grad_norm: 0.9999991599467873, iteration: 130169
loss: 0.9793292880058289,grad_norm: 0.9942934783077112, iteration: 130170
loss: 0.9851261973381042,grad_norm: 0.901060969202215, iteration: 130171
loss: 0.9958505034446716,grad_norm: 0.9999991816764425, iteration: 130172
loss: 0.9745031595230103,grad_norm: 0.9999991977980228, iteration: 130173
loss: 0.9821903109550476,grad_norm: 0.9999996722168987, iteration: 130174
loss: 0.9948263764381409,grad_norm: 0.776613289259592, iteration: 130175
loss: 1.1075878143310547,grad_norm: 0.99999929357283, iteration: 130176
loss: 0.9976954460144043,grad_norm: 0.9303247537391056, iteration: 130177
loss: 1.3142346143722534,grad_norm: 0.9999999056774348, iteration: 130178
loss: 1.0624622106552124,grad_norm: 0.8030525222304771, iteration: 130179
loss: 1.0206029415130615,grad_norm: 0.9999995394661875, iteration: 130180
loss: 0.9636552333831787,grad_norm: 0.9752159314961448, iteration: 130181
loss: 1.0754228830337524,grad_norm: 0.9999994888649217, iteration: 130182
loss: 1.0712099075317383,grad_norm: 0.9999997479579961, iteration: 130183
loss: 1.015350580215454,grad_norm: 0.9999995139361498, iteration: 130184
loss: 1.0602701902389526,grad_norm: 0.9999993252611049, iteration: 130185
loss: 0.9777094125747681,grad_norm: 0.8419929537830056, iteration: 130186
loss: 1.022524118423462,grad_norm: 0.9999998361482192, iteration: 130187
loss: 1.01075279712677,grad_norm: 0.9999992241207591, iteration: 130188
loss: 0.9921627044677734,grad_norm: 0.8863354746667761, iteration: 130189
loss: 1.0523889064788818,grad_norm: 0.8913821539703249, iteration: 130190
loss: 1.012245774269104,grad_norm: 0.9910278429058835, iteration: 130191
loss: 0.989274799823761,grad_norm: 0.884352113761867, iteration: 130192
loss: 1.0668840408325195,grad_norm: 0.9999995541654648, iteration: 130193
loss: 0.98846834897995,grad_norm: 0.8241354553889291, iteration: 130194
loss: 0.9937870502471924,grad_norm: 0.9999990702980073, iteration: 130195
loss: 1.0910874605178833,grad_norm: 0.9999999514757649, iteration: 130196
loss: 0.9688197374343872,grad_norm: 0.9511387878755082, iteration: 130197
loss: 0.9971135258674622,grad_norm: 0.9999991159616923, iteration: 130198
loss: 1.0134665966033936,grad_norm: 0.7945701442465474, iteration: 130199
loss: 1.0146360397338867,grad_norm: 0.9111108069167022, iteration: 130200
loss: 0.9795134663581848,grad_norm: 0.7767581912724979, iteration: 130201
loss: 1.0304967164993286,grad_norm: 0.9191742839286808, iteration: 130202
loss: 1.0149894952774048,grad_norm: 0.9041564985194623, iteration: 130203
loss: 0.9992300271987915,grad_norm: 0.9999992106793207, iteration: 130204
loss: 1.0163564682006836,grad_norm: 0.8910231348678247, iteration: 130205
loss: 0.9990201592445374,grad_norm: 0.8726600881658367, iteration: 130206
loss: 1.0776853561401367,grad_norm: 0.9999991475235713, iteration: 130207
loss: 1.0600032806396484,grad_norm: 0.8585438320792608, iteration: 130208
loss: 1.0627946853637695,grad_norm: 0.999999913793589, iteration: 130209
loss: 0.9964497685432434,grad_norm: 0.7103345375571718, iteration: 130210
loss: 1.0091803073883057,grad_norm: 0.7544652495647084, iteration: 130211
loss: 0.9884738326072693,grad_norm: 0.8978787658512737, iteration: 130212
loss: 1.1940274238586426,grad_norm: 0.9999992654390346, iteration: 130213
loss: 0.9788035154342651,grad_norm: 0.9999990901771751, iteration: 130214
loss: 0.9940316677093506,grad_norm: 0.8713913941243009, iteration: 130215
loss: 1.0030282735824585,grad_norm: 0.9999993312801047, iteration: 130216
loss: 1.3827404975891113,grad_norm: 1.0000000217728946, iteration: 130217
loss: 0.9962645173072815,grad_norm: 0.9999990403812383, iteration: 130218
loss: 0.9805709719657898,grad_norm: 0.9999998595396463, iteration: 130219
loss: 0.9932090640068054,grad_norm: 0.8920491778123603, iteration: 130220
loss: 1.0091054439544678,grad_norm: 0.9238772406865138, iteration: 130221
loss: 0.9702432751655579,grad_norm: 0.9999990337229955, iteration: 130222
loss: 0.9972456693649292,grad_norm: 0.9453706469162532, iteration: 130223
loss: 1.006840705871582,grad_norm: 0.9345544877776008, iteration: 130224
loss: 0.9940182566642761,grad_norm: 0.9999998870710805, iteration: 130225
loss: 1.1163557767868042,grad_norm: 0.9278968436598293, iteration: 130226
loss: 1.0128717422485352,grad_norm: 0.99999898791025, iteration: 130227
loss: 1.1202008724212646,grad_norm: 0.9999992594692553, iteration: 130228
loss: 1.1997507810592651,grad_norm: 0.9999999298823944, iteration: 130229
loss: 1.0393036603927612,grad_norm: 0.9999993891648392, iteration: 130230
loss: 1.0711017847061157,grad_norm: 0.8841263899448049, iteration: 130231
loss: 1.0346291065216064,grad_norm: 0.9999992212439934, iteration: 130232
loss: 1.113444447517395,grad_norm: 0.9999994825121988, iteration: 130233
loss: 1.188661813735962,grad_norm: 0.9999995490345623, iteration: 130234
loss: 1.0417500734329224,grad_norm: 0.9999992077026844, iteration: 130235
loss: 1.2009704113006592,grad_norm: 0.9999997200088381, iteration: 130236
loss: 1.0701466798782349,grad_norm: 0.9999994389109728, iteration: 130237
loss: 1.0355513095855713,grad_norm: 0.8510665048799289, iteration: 130238
loss: 1.1670007705688477,grad_norm: 0.9999992021467523, iteration: 130239
loss: 1.0530420541763306,grad_norm: 0.8621795427594803, iteration: 130240
loss: 1.0673046112060547,grad_norm: 0.9999993524893867, iteration: 130241
loss: 1.067887783050537,grad_norm: 0.9999995570317896, iteration: 130242
loss: 1.2239376306533813,grad_norm: 0.9999994206006385, iteration: 130243
loss: 1.0454156398773193,grad_norm: 0.999999778598033, iteration: 130244
loss: 1.0172632932662964,grad_norm: 0.762902459720051, iteration: 130245
loss: 0.9726192355155945,grad_norm: 0.9999990046905677, iteration: 130246
loss: 1.0969047546386719,grad_norm: 0.9999993948040758, iteration: 130247
loss: 1.0057145357131958,grad_norm: 0.9999992028657847, iteration: 130248
loss: 1.2389196157455444,grad_norm: 0.9999996477943285, iteration: 130249
loss: 1.037786602973938,grad_norm: 0.9999992064138136, iteration: 130250
loss: 0.9720574617385864,grad_norm: 0.9999993851369818, iteration: 130251
loss: 0.970150887966156,grad_norm: 0.8647832250542752, iteration: 130252
loss: 1.0976084470748901,grad_norm: 0.9999996484368239, iteration: 130253
loss: 1.0008528232574463,grad_norm: 0.8804785816954995, iteration: 130254
loss: 1.2229857444763184,grad_norm: 1.0000000531390856, iteration: 130255
loss: 1.049282431602478,grad_norm: 0.9999995399369096, iteration: 130256
loss: 1.1200019121170044,grad_norm: 0.9999998913376632, iteration: 130257
loss: 1.0723212957382202,grad_norm: 0.9999996091558889, iteration: 130258
loss: 1.044894814491272,grad_norm: 0.9999995348738804, iteration: 130259
loss: 1.0050865411758423,grad_norm: 0.9999997045333212, iteration: 130260
loss: 0.9918944835662842,grad_norm: 0.9143583321121534, iteration: 130261
loss: 1.0117658376693726,grad_norm: 0.9070381359673978, iteration: 130262
loss: 1.1808220148086548,grad_norm: 0.9999994247047929, iteration: 130263
loss: 1.1274185180664062,grad_norm: 0.9999995844662236, iteration: 130264
loss: 1.008234977722168,grad_norm: 0.9999994262311909, iteration: 130265
loss: 0.9753796458244324,grad_norm: 0.999999280370446, iteration: 130266
loss: 1.0106867551803589,grad_norm: 0.9999992891579988, iteration: 130267
loss: 1.02679443359375,grad_norm: 0.9627167322954654, iteration: 130268
loss: 1.1000691652297974,grad_norm: 0.8965792151307287, iteration: 130269
loss: 0.96780925989151,grad_norm: 0.9999990761536401, iteration: 130270
loss: 1.119458556175232,grad_norm: 0.9999995878763622, iteration: 130271
loss: 1.0671569108963013,grad_norm: 0.8252009031150964, iteration: 130272
loss: 0.994760274887085,grad_norm: 0.8686132440727182, iteration: 130273
loss: 1.0723321437835693,grad_norm: 0.999999235460498, iteration: 130274
loss: 0.9790948629379272,grad_norm: 0.964776967754039, iteration: 130275
loss: 1.0023404359817505,grad_norm: 0.9999992050640191, iteration: 130276
loss: 1.1127835512161255,grad_norm: 0.99999986007705, iteration: 130277
loss: 1.0966194868087769,grad_norm: 0.9999993080982521, iteration: 130278
loss: 1.0024386644363403,grad_norm: 0.8050862334766257, iteration: 130279
loss: 0.997555136680603,grad_norm: 0.8879956779897716, iteration: 130280
loss: 1.0193756818771362,grad_norm: 0.9999996809483116, iteration: 130281
loss: 1.022628664970398,grad_norm: 0.913653575438213, iteration: 130282
loss: 1.0123268365859985,grad_norm: 0.99999899346809, iteration: 130283
loss: 1.1038306951522827,grad_norm: 0.9999991354945511, iteration: 130284
loss: 1.0380513668060303,grad_norm: 0.9674464802009217, iteration: 130285
loss: 1.0279051065444946,grad_norm: 0.9999991607383457, iteration: 130286
loss: 1.0204122066497803,grad_norm: 0.9206151894733268, iteration: 130287
loss: 1.007298231124878,grad_norm: 0.9999994813295898, iteration: 130288
loss: 1.077591061592102,grad_norm: 0.9999992221382922, iteration: 130289
loss: 1.0141353607177734,grad_norm: 0.9999990999222732, iteration: 130290
loss: 1.0427829027175903,grad_norm: 0.9999997482175709, iteration: 130291
loss: 1.0201002359390259,grad_norm: 0.9457718219207438, iteration: 130292
loss: 1.0493031740188599,grad_norm: 0.9999998990242374, iteration: 130293
loss: 1.0570940971374512,grad_norm: 0.9999994929545443, iteration: 130294
loss: 1.0284037590026855,grad_norm: 0.789364400374494, iteration: 130295
loss: 1.0692895650863647,grad_norm: 0.9999998607148822, iteration: 130296
loss: 1.0555309057235718,grad_norm: 0.9999995229509528, iteration: 130297
loss: 1.0120599269866943,grad_norm: 0.9148816845950513, iteration: 130298
loss: 0.9750276803970337,grad_norm: 0.7441175711151015, iteration: 130299
loss: 0.9995110034942627,grad_norm: 0.9824268306961321, iteration: 130300
loss: 0.9918566942214966,grad_norm: 0.8291655444837955, iteration: 130301
loss: 1.0276868343353271,grad_norm: 0.7424266139618877, iteration: 130302
loss: 0.9997999668121338,grad_norm: 0.9259255382539462, iteration: 130303
loss: 1.012934684753418,grad_norm: 0.8799886647772553, iteration: 130304
loss: 1.0235835313796997,grad_norm: 0.9999991062923163, iteration: 130305
loss: 0.9987622499465942,grad_norm: 0.9999991908201056, iteration: 130306
loss: 1.093011498451233,grad_norm: 0.9999993344172188, iteration: 130307
loss: 1.0104918479919434,grad_norm: 0.837598107170538, iteration: 130308
loss: 1.018763780593872,grad_norm: 0.9999991908214678, iteration: 130309
loss: 1.002587080001831,grad_norm: 0.7919666539350938, iteration: 130310
loss: 1.0072606801986694,grad_norm: 0.9999994676961018, iteration: 130311
loss: 0.9582915902137756,grad_norm: 0.8630984220167992, iteration: 130312
loss: 0.9844241738319397,grad_norm: 0.908508602199355, iteration: 130313
loss: 1.0140786170959473,grad_norm: 0.999999215019757, iteration: 130314
loss: 1.042770266532898,grad_norm: 0.999999333079547, iteration: 130315
loss: 1.0220857858657837,grad_norm: 0.9590807317284739, iteration: 130316
loss: 1.0480382442474365,grad_norm: 0.9999996077057972, iteration: 130317
loss: 1.0740796327590942,grad_norm: 0.8564303372549679, iteration: 130318
loss: 0.9991621375083923,grad_norm: 0.9999991569141548, iteration: 130319
loss: 0.9560030102729797,grad_norm: 0.9999991452914657, iteration: 130320
loss: 1.0297372341156006,grad_norm: 0.9677190581950675, iteration: 130321
loss: 0.9850780367851257,grad_norm: 0.9840516300558156, iteration: 130322
loss: 0.9961143136024475,grad_norm: 0.9541488584870417, iteration: 130323
loss: 1.026947021484375,grad_norm: 0.9999990253244389, iteration: 130324
loss: 1.194158911705017,grad_norm: 0.999999360480918, iteration: 130325
loss: 0.9365752935409546,grad_norm: 0.9390627685911718, iteration: 130326
loss: 1.1195095777511597,grad_norm: 0.9999999750438594, iteration: 130327
loss: 0.9861201643943787,grad_norm: 0.9177903036766485, iteration: 130328
loss: 0.992052435874939,grad_norm: 0.9500182603047699, iteration: 130329
loss: 1.0064668655395508,grad_norm: 0.9999998276278961, iteration: 130330
loss: 1.0047008991241455,grad_norm: 0.7762943054157208, iteration: 130331
loss: 1.0855605602264404,grad_norm: 0.9999997256435444, iteration: 130332
loss: 0.9727604985237122,grad_norm: 0.7502199093665298, iteration: 130333
loss: 0.967832088470459,grad_norm: 0.8168455659072917, iteration: 130334
loss: 1.0328798294067383,grad_norm: 0.8425026087841325, iteration: 130335
loss: 1.0189539194107056,grad_norm: 0.9323725294783198, iteration: 130336
loss: 1.0200382471084595,grad_norm: 0.9999991492477277, iteration: 130337
loss: 0.968856155872345,grad_norm: 0.8772191946380692, iteration: 130338
loss: 1.013500690460205,grad_norm: 0.9999992127977858, iteration: 130339
loss: 1.0169963836669922,grad_norm: 0.9999990844385651, iteration: 130340
loss: 0.9895318150520325,grad_norm: 0.9999992284566903, iteration: 130341
loss: 1.0297784805297852,grad_norm: 0.9880832197739231, iteration: 130342
loss: 1.0045868158340454,grad_norm: 0.9934533629479173, iteration: 130343
loss: 1.0137815475463867,grad_norm: 0.9029907321075467, iteration: 130344
loss: 0.9954410791397095,grad_norm: 0.8819466908746444, iteration: 130345
loss: 1.0031962394714355,grad_norm: 0.9999990523905263, iteration: 130346
loss: 0.9949097633361816,grad_norm: 0.8590458729046566, iteration: 130347
loss: 1.0394484996795654,grad_norm: 0.9999993290252738, iteration: 130348
loss: 0.9751684665679932,grad_norm: 0.8503583748959839, iteration: 130349
loss: 1.040616750717163,grad_norm: 0.8639630471772678, iteration: 130350
loss: 1.0202909708023071,grad_norm: 0.7930614116258978, iteration: 130351
loss: 1.1899714469909668,grad_norm: 0.9999995283014402, iteration: 130352
loss: 1.0040963888168335,grad_norm: 0.7623431228419555, iteration: 130353
loss: 0.9935248494148254,grad_norm: 0.9999993596864764, iteration: 130354
loss: 0.9903749227523804,grad_norm: 0.9523675196835591, iteration: 130355
loss: 0.9958659410476685,grad_norm: 0.9999991907923836, iteration: 130356
loss: 1.0166810750961304,grad_norm: 0.9387162366384092, iteration: 130357
loss: 1.0359225273132324,grad_norm: 0.9074761115720952, iteration: 130358
loss: 0.997485876083374,grad_norm: 0.9279042091502878, iteration: 130359
loss: 1.118990421295166,grad_norm: 0.999999471635602, iteration: 130360
loss: 1.0285695791244507,grad_norm: 0.9139752931488673, iteration: 130361
loss: 1.0481764078140259,grad_norm: 0.9999991475817543, iteration: 130362
loss: 1.005640983581543,grad_norm: 0.9340220466638864, iteration: 130363
loss: 0.9904085993766785,grad_norm: 0.7699228453329167, iteration: 130364
loss: 0.9981255531311035,grad_norm: 0.7718892186146397, iteration: 130365
loss: 0.9753061532974243,grad_norm: 0.9225091153632332, iteration: 130366
loss: 1.0156447887420654,grad_norm: 0.9740578318952825, iteration: 130367
loss: 1.052145004272461,grad_norm: 0.9999999798033746, iteration: 130368
loss: 0.9949337244033813,grad_norm: 0.9999990843195761, iteration: 130369
loss: 0.9703750014305115,grad_norm: 0.7924157378575616, iteration: 130370
loss: 1.017432689666748,grad_norm: 0.9543870210747613, iteration: 130371
loss: 1.0167672634124756,grad_norm: 0.9999991699165114, iteration: 130372
loss: 1.0298829078674316,grad_norm: 0.9999994580440313, iteration: 130373
loss: 0.9860051274299622,grad_norm: 0.9640359010884905, iteration: 130374
loss: 1.0410914421081543,grad_norm: 0.9999991857745818, iteration: 130375
loss: 1.0201780796051025,grad_norm: 0.8337705175401057, iteration: 130376
loss: 1.027748703956604,grad_norm: 0.9999991161807004, iteration: 130377
loss: 0.9787572026252747,grad_norm: 0.8095461587158321, iteration: 130378
loss: 0.9884324669837952,grad_norm: 0.8599291066316872, iteration: 130379
loss: 0.9979507923126221,grad_norm: 0.8904193948128363, iteration: 130380
loss: 0.99024498462677,grad_norm: 0.9038747152880026, iteration: 130381
loss: 1.0268193483352661,grad_norm: 0.8311860161342896, iteration: 130382
loss: 0.9952775239944458,grad_norm: 0.8658791817948446, iteration: 130383
loss: 0.9660922884941101,grad_norm: 0.9075190295167145, iteration: 130384
loss: 0.9760414958000183,grad_norm: 0.8555151472051372, iteration: 130385
loss: 0.9992598295211792,grad_norm: 0.9999991350664206, iteration: 130386
loss: 1.0194796323776245,grad_norm: 0.8936469295814339, iteration: 130387
loss: 1.0708048343658447,grad_norm: 0.9999991452932119, iteration: 130388
loss: 0.9854032397270203,grad_norm: 0.9999991515738769, iteration: 130389
loss: 1.081487774848938,grad_norm: 0.89799517552741, iteration: 130390
loss: 0.9880080223083496,grad_norm: 0.9557766592612391, iteration: 130391
loss: 1.0132057666778564,grad_norm: 0.7569198811886226, iteration: 130392
loss: 1.0210179090499878,grad_norm: 0.999999372709781, iteration: 130393
loss: 1.0351136922836304,grad_norm: 0.830435821013529, iteration: 130394
loss: 0.995337963104248,grad_norm: 0.9119608819459233, iteration: 130395
loss: 0.9869436621665955,grad_norm: 0.9999993851692277, iteration: 130396
loss: 1.023781418800354,grad_norm: 0.9999991233300232, iteration: 130397
loss: 1.0684765577316284,grad_norm: 0.9257677185057178, iteration: 130398
loss: 1.0155613422393799,grad_norm: 0.7657977514855057, iteration: 130399
loss: 1.0243092775344849,grad_norm: 0.7335049685174999, iteration: 130400
loss: 1.0575346946716309,grad_norm: 0.9999991931449104, iteration: 130401
loss: 1.1751623153686523,grad_norm: 0.9999996702849038, iteration: 130402
loss: 1.0852171182632446,grad_norm: 0.860498247186136, iteration: 130403
loss: 1.1424717903137207,grad_norm: 0.999999736063642, iteration: 130404
loss: 1.1313093900680542,grad_norm: 0.999999234235918, iteration: 130405
loss: 0.9999472498893738,grad_norm: 0.8158962778100592, iteration: 130406
loss: 1.03524911403656,grad_norm: 0.999999220489323, iteration: 130407
loss: 1.0098752975463867,grad_norm: 0.9999999829064444, iteration: 130408
loss: 0.9719985723495483,grad_norm: 0.9033047567704899, iteration: 130409
loss: 1.0783995389938354,grad_norm: 0.9999993044761377, iteration: 130410
loss: 1.144634485244751,grad_norm: 1.0000000325789473, iteration: 130411
loss: 1.1283950805664062,grad_norm: 0.9493335506707141, iteration: 130412
loss: 0.9886272549629211,grad_norm: 0.9004795914612944, iteration: 130413
loss: 0.9809896945953369,grad_norm: 0.828880249446543, iteration: 130414
loss: 1.0116326808929443,grad_norm: 0.9575009556145773, iteration: 130415
loss: 0.9714985489845276,grad_norm: 0.9722668694178412, iteration: 130416
loss: 1.001010537147522,grad_norm: 0.9593162688656229, iteration: 130417
loss: 1.026795506477356,grad_norm: 0.9999993107299129, iteration: 130418
loss: 0.9655671715736389,grad_norm: 0.8999367538195769, iteration: 130419
loss: 1.008166790008545,grad_norm: 0.9001658978300934, iteration: 130420
loss: 1.0164607763290405,grad_norm: 0.9999990648204028, iteration: 130421
loss: 0.9720878601074219,grad_norm: 0.797759191816456, iteration: 130422
loss: 1.056968092918396,grad_norm: 0.999999177034538, iteration: 130423
loss: 1.0335371494293213,grad_norm: 0.8162525780529403, iteration: 130424
loss: 0.9983950853347778,grad_norm: 0.7707049259912871, iteration: 130425
loss: 0.9757424592971802,grad_norm: 0.8803345652507267, iteration: 130426
loss: 1.0541566610336304,grad_norm: 0.9999990225460441, iteration: 130427
loss: 1.0202620029449463,grad_norm: 0.9030468538666458, iteration: 130428
loss: 1.0402518510818481,grad_norm: 0.9999991167275621, iteration: 130429
loss: 1.060667872428894,grad_norm: 0.9999994583522172, iteration: 130430
loss: 0.9933806657791138,grad_norm: 0.8435985147286181, iteration: 130431
loss: 1.0052666664123535,grad_norm: 0.9999997217442287, iteration: 130432
loss: 0.9979456663131714,grad_norm: 0.999999200535941, iteration: 130433
loss: 0.9722105264663696,grad_norm: 0.6717490004888315, iteration: 130434
loss: 0.9973355531692505,grad_norm: 0.7789670865298521, iteration: 130435
loss: 1.0511136054992676,grad_norm: 0.9999992623421134, iteration: 130436
loss: 1.0120141506195068,grad_norm: 0.9999991542245067, iteration: 130437
loss: 1.012882113456726,grad_norm: 0.9999997080904528, iteration: 130438
loss: 0.9741610288619995,grad_norm: 0.7263919723418882, iteration: 130439
loss: 0.9689773321151733,grad_norm: 0.9606649239285461, iteration: 130440
loss: 0.9986205697059631,grad_norm: 0.9999990262358023, iteration: 130441
loss: 1.019195318222046,grad_norm: 0.7579159412153413, iteration: 130442
loss: 1.0428612232208252,grad_norm: 0.9999995534336071, iteration: 130443
loss: 1.0262547731399536,grad_norm: 0.9999991492071751, iteration: 130444
loss: 1.1104791164398193,grad_norm: 0.9999993145723077, iteration: 130445
loss: 1.0118627548217773,grad_norm: 0.9713155071039646, iteration: 130446
loss: 0.9723371863365173,grad_norm: 0.9283520877461481, iteration: 130447
loss: 1.0181262493133545,grad_norm: 0.7309924257231469, iteration: 130448
loss: 1.1576963663101196,grad_norm: 0.9999996893731528, iteration: 130449
loss: 1.0531281232833862,grad_norm: 0.9760110205273285, iteration: 130450
loss: 1.0097742080688477,grad_norm: 0.9729700192568198, iteration: 130451
loss: 1.0605782270431519,grad_norm: 0.974787387594829, iteration: 130452
loss: 0.9964510798454285,grad_norm: 0.9949713499554076, iteration: 130453
loss: 1.0806037187576294,grad_norm: 0.9999998000810816, iteration: 130454
loss: 0.9845123291015625,grad_norm: 0.9999989848690086, iteration: 130455
loss: 1.080582857131958,grad_norm: 0.99999972868515, iteration: 130456
loss: 1.0558382272720337,grad_norm: 0.9999998002417588, iteration: 130457
loss: 1.0245187282562256,grad_norm: 0.9999991088811904, iteration: 130458
loss: 1.1121978759765625,grad_norm: 0.9999996799355545, iteration: 130459
loss: 1.024133563041687,grad_norm: 0.9999990433141019, iteration: 130460
loss: 1.0485066175460815,grad_norm: 0.8935684768542147, iteration: 130461
loss: 1.003893494606018,grad_norm: 0.7543476278874648, iteration: 130462
loss: 0.9957690238952637,grad_norm: 0.8119514519990179, iteration: 130463
loss: 0.9679393172264099,grad_norm: 0.8912606462060201, iteration: 130464
loss: 1.027510166168213,grad_norm: 0.8923176341813632, iteration: 130465
loss: 1.0113736391067505,grad_norm: 0.9999993790620815, iteration: 130466
loss: 1.0464451313018799,grad_norm: 0.9999994144981457, iteration: 130467
loss: 1.0073400735855103,grad_norm: 0.9999991397596535, iteration: 130468
loss: 1.0243079662322998,grad_norm: 0.9806569551676478, iteration: 130469
loss: 1.1544926166534424,grad_norm: 0.9691549471461393, iteration: 130470
loss: 1.0106946229934692,grad_norm: 0.9999990167013236, iteration: 130471
loss: 1.0692538022994995,grad_norm: 0.9999990179333322, iteration: 130472
loss: 0.9992272257804871,grad_norm: 0.9999996425153654, iteration: 130473
loss: 1.236264944076538,grad_norm: 0.9999997992050541, iteration: 130474
loss: 1.0452741384506226,grad_norm: 0.9999992050564002, iteration: 130475
loss: 1.0436404943466187,grad_norm: 0.9240843487424157, iteration: 130476
loss: 1.0281333923339844,grad_norm: 0.8706863483916993, iteration: 130477
loss: 1.0696866512298584,grad_norm: 0.9999993351812034, iteration: 130478
loss: 1.0026062726974487,grad_norm: 0.8409725297661503, iteration: 130479
loss: 0.9747354984283447,grad_norm: 0.8495617197181244, iteration: 130480
loss: 1.059446096420288,grad_norm: 0.9999992065576374, iteration: 130481
loss: 1.0301202535629272,grad_norm: 0.9999995341493559, iteration: 130482
loss: 1.0856019258499146,grad_norm: 0.9999990639863418, iteration: 130483
loss: 0.9786826372146606,grad_norm: 0.791768492666088, iteration: 130484
loss: 1.0179113149642944,grad_norm: 0.9809747268881812, iteration: 130485
loss: 0.9622926712036133,grad_norm: 0.9027185282451556, iteration: 130486
loss: 0.9845742583274841,grad_norm: 0.9999991777645924, iteration: 130487
loss: 1.0292298793792725,grad_norm: 0.9999991536838938, iteration: 130488
loss: 1.0284388065338135,grad_norm: 0.9999990931093601, iteration: 130489
loss: 0.9966161847114563,grad_norm: 0.8079132186764268, iteration: 130490
loss: 1.02851402759552,grad_norm: 0.9999996714599007, iteration: 130491
loss: 1.0056967735290527,grad_norm: 0.8224326533897444, iteration: 130492
loss: 1.0452057123184204,grad_norm: 0.9400795691466597, iteration: 130493
loss: 1.14561927318573,grad_norm: 0.999999286227238, iteration: 130494
loss: 0.9991419315338135,grad_norm: 0.9799691569618046, iteration: 130495
loss: 1.074223279953003,grad_norm: 0.999999889026141, iteration: 130496
loss: 1.1227023601531982,grad_norm: 0.9999993158880164, iteration: 130497
loss: 1.1814523935317993,grad_norm: 0.9999993417047331, iteration: 130498
loss: 1.0055207014083862,grad_norm: 1.0000000405182434, iteration: 130499
loss: 1.2180171012878418,grad_norm: 0.9999994548264025, iteration: 130500
loss: 0.9979225993156433,grad_norm: 0.9999997158502503, iteration: 130501
loss: 0.9504365921020508,grad_norm: 0.9310703484370154, iteration: 130502
loss: 1.2351795434951782,grad_norm: 0.9999998271374685, iteration: 130503
loss: 1.025974988937378,grad_norm: 0.8828578626568651, iteration: 130504
loss: 0.9978237152099609,grad_norm: 0.9999993075125557, iteration: 130505
loss: 1.0117634534835815,grad_norm: 0.9999991331759932, iteration: 130506
loss: 1.0182822942733765,grad_norm: 0.9999992801846196, iteration: 130507
loss: 1.109574794769287,grad_norm: 0.9999991255992041, iteration: 130508
loss: 1.2565534114837646,grad_norm: 0.9999995147916999, iteration: 130509
loss: 1.0111439228057861,grad_norm: 0.9999990825516889, iteration: 130510
loss: 0.9962824583053589,grad_norm: 0.8019153121854474, iteration: 130511
loss: 1.1512898206710815,grad_norm: 0.9999998846855155, iteration: 130512
loss: 1.0479705333709717,grad_norm: 0.999999096644613, iteration: 130513
loss: 1.1192057132720947,grad_norm: 0.9999997233283388, iteration: 130514
loss: 1.1149061918258667,grad_norm: 0.9999998649430427, iteration: 130515
loss: 1.0012445449829102,grad_norm: 0.9554182909724029, iteration: 130516
loss: 1.116736650466919,grad_norm: 0.9999996159747664, iteration: 130517
loss: 1.1235014200210571,grad_norm: 0.999999920927852, iteration: 130518
loss: 1.0774444341659546,grad_norm: 0.9999993301998124, iteration: 130519
loss: 1.0811725854873657,grad_norm: 0.9999992074265646, iteration: 130520
loss: 1.0534138679504395,grad_norm: 0.9999993079477464, iteration: 130521
loss: 0.9805070757865906,grad_norm: 0.9111825909825555, iteration: 130522
loss: 1.1331537961959839,grad_norm: 0.9999992837190858, iteration: 130523
loss: 1.0528409481048584,grad_norm: 0.9999999194870104, iteration: 130524
loss: 1.0796458721160889,grad_norm: 0.9999998690734871, iteration: 130525
loss: 1.0752485990524292,grad_norm: 0.9999992578946562, iteration: 130526
loss: 1.017142415046692,grad_norm: 0.9082824223353728, iteration: 130527
loss: 1.1947964429855347,grad_norm: 0.9999997474709048, iteration: 130528
loss: 1.0690739154815674,grad_norm: 0.99999936656786, iteration: 130529
loss: 1.1014416217803955,grad_norm: 0.9999995485589274, iteration: 130530
loss: 1.0849864482879639,grad_norm: 0.9999993654791933, iteration: 130531
loss: 1.1621652841567993,grad_norm: 0.9999997209503344, iteration: 130532
loss: 1.0692030191421509,grad_norm: 0.999999620504705, iteration: 130533
loss: 1.175005316734314,grad_norm: 0.9999999146179269, iteration: 130534
loss: 1.2845356464385986,grad_norm: 0.9999998490936941, iteration: 130535
loss: 1.0193177461624146,grad_norm: 0.9999993891936563, iteration: 130536
loss: 1.0155295133590698,grad_norm: 0.8844169023452958, iteration: 130537
loss: 1.0862113237380981,grad_norm: 0.9999991766788627, iteration: 130538
loss: 1.2562930583953857,grad_norm: 0.9999998060830554, iteration: 130539
loss: 1.1354148387908936,grad_norm: 0.9999995212318384, iteration: 130540
loss: 1.1188386678695679,grad_norm: 0.9999999402201586, iteration: 130541
loss: 1.1438058614730835,grad_norm: 0.9999991788204909, iteration: 130542
loss: 1.0679113864898682,grad_norm: 0.9999998396599119, iteration: 130543
loss: 1.0041916370391846,grad_norm: 0.8750931079109405, iteration: 130544
loss: 1.1810111999511719,grad_norm: 0.9999998340845996, iteration: 130545
loss: 0.99237060546875,grad_norm: 0.9999998999237792, iteration: 130546
loss: 1.1074469089508057,grad_norm: 0.999999296833422, iteration: 130547
loss: 1.0160309076309204,grad_norm: 0.9999998458560464, iteration: 130548
loss: 1.0910658836364746,grad_norm: 0.999999382119226, iteration: 130549
loss: 1.0478078126907349,grad_norm: 0.9999996332131258, iteration: 130550
loss: 1.074782371520996,grad_norm: 0.9533542953783414, iteration: 130551
loss: 1.2052266597747803,grad_norm: 0.9999999057894993, iteration: 130552
loss: 1.0341378450393677,grad_norm: 0.9999991459043885, iteration: 130553
loss: 1.0642704963684082,grad_norm: 0.9999992193353007, iteration: 130554
loss: 1.2618393898010254,grad_norm: 0.9999996911257454, iteration: 130555
loss: 1.218612790107727,grad_norm: 0.9999997265828834, iteration: 130556
loss: 1.0873537063598633,grad_norm: 0.9999991884933941, iteration: 130557
loss: 1.0179921388626099,grad_norm: 0.9999989720240168, iteration: 130558
loss: 1.1928379535675049,grad_norm: 1.0000000460716538, iteration: 130559
loss: 1.0285385847091675,grad_norm: 0.999999594257116, iteration: 130560
loss: 1.161336898803711,grad_norm: 0.9999996414308683, iteration: 130561
loss: 1.0302984714508057,grad_norm: 0.9984903758095119, iteration: 130562
loss: 1.1170092821121216,grad_norm: 0.9999997120211418, iteration: 130563
loss: 1.2136636972427368,grad_norm: 0.9999999381570275, iteration: 130564
loss: 1.0747212171554565,grad_norm: 0.9999992494249953, iteration: 130565
loss: 1.1358059644699097,grad_norm: 0.9999995453386855, iteration: 130566
loss: 1.0265761613845825,grad_norm: 0.9999997942168679, iteration: 130567
loss: 1.0573859214782715,grad_norm: 0.9999996380782724, iteration: 130568
loss: 1.070074439048767,grad_norm: 0.9999999194456947, iteration: 130569
loss: 1.0578052997589111,grad_norm: 0.9999993927162518, iteration: 130570
loss: 1.0818253755569458,grad_norm: 0.9999998461040288, iteration: 130571
loss: 1.1135659217834473,grad_norm: 0.9999996432452011, iteration: 130572
loss: 1.0770680904388428,grad_norm: 0.9999992673430892, iteration: 130573
loss: 1.1713088750839233,grad_norm: 0.9999999263506468, iteration: 130574
loss: 1.398939609527588,grad_norm: 0.9999995685075946, iteration: 130575
loss: 1.3101773262023926,grad_norm: 0.9999997574119995, iteration: 130576
loss: 1.2930306196212769,grad_norm: 0.9999998566162112, iteration: 130577
loss: 1.47543203830719,grad_norm: 0.9999999231313073, iteration: 130578
loss: 1.1863776445388794,grad_norm: 0.9999992507101444, iteration: 130579
loss: 1.5459023714065552,grad_norm: 0.9999997788840521, iteration: 130580
loss: 1.5438560247421265,grad_norm: 0.999999966975786, iteration: 130581
loss: 1.1151607036590576,grad_norm: 0.9999991440967189, iteration: 130582
loss: 1.183182716369629,grad_norm: 0.999999833105836, iteration: 130583
loss: 1.3484103679656982,grad_norm: 0.9999993839507103, iteration: 130584
loss: 1.2459112405776978,grad_norm: 0.9999999406249807, iteration: 130585
loss: 1.0330724716186523,grad_norm: 0.9999997399625246, iteration: 130586
loss: 1.1518635749816895,grad_norm: 0.9999996875712565, iteration: 130587
loss: 1.6124776601791382,grad_norm: 0.9999999688783543, iteration: 130588
loss: 1.6572428941726685,grad_norm: 0.9999996489761332, iteration: 130589
loss: 1.3402560949325562,grad_norm: 0.9999998913700602, iteration: 130590
loss: 1.2175599336624146,grad_norm: 0.9999999546923258, iteration: 130591
loss: 1.0453907251358032,grad_norm: 0.9999996919314331, iteration: 130592
loss: 1.086132526397705,grad_norm: 0.9999999582089669, iteration: 130593
loss: 1.0266095399856567,grad_norm: 1.0000000698030806, iteration: 130594
loss: 1.162441372871399,grad_norm: 0.9999990967207476, iteration: 130595
loss: 1.1939314603805542,grad_norm: 0.9999998494831722, iteration: 130596
loss: 1.0590285062789917,grad_norm: 0.9999996755548578, iteration: 130597
loss: 1.175642967224121,grad_norm: 0.9999999640148046, iteration: 130598
loss: 1.0570247173309326,grad_norm: 0.9999995279990415, iteration: 130599
loss: 1.0844736099243164,grad_norm: 0.9999993677747214, iteration: 130600
loss: 1.132575511932373,grad_norm: 0.9999996553071493, iteration: 130601
loss: 1.208932638168335,grad_norm: 0.9999995507831478, iteration: 130602
loss: 1.1679707765579224,grad_norm: 0.9999997338105946, iteration: 130603
loss: 1.093230128288269,grad_norm: 0.9999994611478578, iteration: 130604
loss: 1.1180870532989502,grad_norm: 0.9999996159114267, iteration: 130605
loss: 1.2656614780426025,grad_norm: 0.999999909774168, iteration: 130606
loss: 1.0205439329147339,grad_norm: 0.9999997884091208, iteration: 130607
loss: 1.0936861038208008,grad_norm: 0.9999994647438388, iteration: 130608
loss: 1.2045080661773682,grad_norm: 0.9999996415891419, iteration: 130609
loss: 1.4061847925186157,grad_norm: 0.9999994369737054, iteration: 130610
loss: 1.413474202156067,grad_norm: 0.9999994485131649, iteration: 130611
loss: 1.3434693813323975,grad_norm: 0.9999996392019427, iteration: 130612
loss: 1.0414962768554688,grad_norm: 0.9676663746968375, iteration: 130613
loss: 1.1285954713821411,grad_norm: 0.9999991705750358, iteration: 130614
loss: 1.2539854049682617,grad_norm: 0.9999996290940196, iteration: 130615
loss: 1.297795057296753,grad_norm: 0.9999997543494634, iteration: 130616
loss: 1.3173893690109253,grad_norm: 0.9999994700265149, iteration: 130617
loss: 1.1962463855743408,grad_norm: 0.9999999762529832, iteration: 130618
loss: 1.0469330549240112,grad_norm: 0.9999991935437411, iteration: 130619
loss: 1.107262134552002,grad_norm: 0.9999999342030212, iteration: 130620
loss: 1.0709757804870605,grad_norm: 0.9999992425015823, iteration: 130621
loss: 1.03572416305542,grad_norm: 0.9327198200644754, iteration: 130622
loss: 1.1538532972335815,grad_norm: 0.9999995897218097, iteration: 130623
loss: 1.123876929283142,grad_norm: 0.9999994027679474, iteration: 130624
loss: 1.0058643817901611,grad_norm: 0.8806662717994943, iteration: 130625
loss: 1.1206797361373901,grad_norm: 0.9999997116583365, iteration: 130626
loss: 1.0172535181045532,grad_norm: 0.9999991005134794, iteration: 130627
loss: 1.0683974027633667,grad_norm: 0.9999992361711252, iteration: 130628
loss: 1.280511498451233,grad_norm: 0.9999998588201839, iteration: 130629
loss: 1.0229154825210571,grad_norm: 0.9999991988734069, iteration: 130630
loss: 1.221421241760254,grad_norm: 0.9999998414200996, iteration: 130631
loss: 1.153056025505066,grad_norm: 0.9999993511664051, iteration: 130632
loss: 1.1744239330291748,grad_norm: 0.9999992181734868, iteration: 130633
loss: 1.0869280099868774,grad_norm: 0.9999996155269679, iteration: 130634
loss: 1.2122045755386353,grad_norm: 0.9999995086393326, iteration: 130635
loss: 1.134982943534851,grad_norm: 0.999999353783471, iteration: 130636
loss: 1.0720810890197754,grad_norm: 0.9999996801365849, iteration: 130637
loss: 1.1981028318405151,grad_norm: 0.9999995463236333, iteration: 130638
loss: 1.0242857933044434,grad_norm: 0.9999998624252104, iteration: 130639
loss: 1.043462872505188,grad_norm: 0.9999994287568271, iteration: 130640
loss: 1.1047219038009644,grad_norm: 0.9999995753306339, iteration: 130641
loss: 1.0749504566192627,grad_norm: 0.9999995590938621, iteration: 130642
loss: 1.040875792503357,grad_norm: 0.9999996750815481, iteration: 130643
loss: 0.9709745645523071,grad_norm: 0.9999991641745454, iteration: 130644
loss: 1.0575871467590332,grad_norm: 0.9999993084093716, iteration: 130645
loss: 1.2088757753372192,grad_norm: 0.9999995078946381, iteration: 130646
loss: 1.1440671682357788,grad_norm: 0.9999997960542363, iteration: 130647
loss: 1.0592684745788574,grad_norm: 0.9999994518630783, iteration: 130648
loss: 1.0704556703567505,grad_norm: 0.9999995765384244, iteration: 130649
loss: 1.1213206052780151,grad_norm: 0.99999930097997, iteration: 130650
loss: 1.231316089630127,grad_norm: 0.9999998794257785, iteration: 130651
loss: 1.1444188356399536,grad_norm: 0.9999992259638363, iteration: 130652
loss: 1.0685396194458008,grad_norm: 0.999999252825569, iteration: 130653
loss: 1.0006786584854126,grad_norm: 0.9999991763324781, iteration: 130654
loss: 1.0800307989120483,grad_norm: 0.9999995971514686, iteration: 130655
loss: 1.12250554561615,grad_norm: 0.9999996216747745, iteration: 130656
loss: 1.0141077041625977,grad_norm: 0.8976279416251024, iteration: 130657
loss: 1.1017036437988281,grad_norm: 0.9999997269256936, iteration: 130658
loss: 1.0097366571426392,grad_norm: 0.9999992933356328, iteration: 130659
loss: 1.006673812866211,grad_norm: 0.9999993187271813, iteration: 130660
loss: 1.0406018495559692,grad_norm: 0.9999997151909096, iteration: 130661
loss: 1.049296259880066,grad_norm: 0.8706773873159155, iteration: 130662
loss: 1.0205869674682617,grad_norm: 0.9999999883863557, iteration: 130663
loss: 1.0108633041381836,grad_norm: 0.9854485351756821, iteration: 130664
loss: 1.001229166984558,grad_norm: 0.9999993488672235, iteration: 130665
loss: 1.2043659687042236,grad_norm: 0.9999996286360388, iteration: 130666
loss: 0.9574792981147766,grad_norm: 0.8352395704974296, iteration: 130667
loss: 0.9772269129753113,grad_norm: 0.995305757377136, iteration: 130668
loss: 1.102218508720398,grad_norm: 0.9999990257564004, iteration: 130669
loss: 1.0630958080291748,grad_norm: 0.8428683814471527, iteration: 130670
loss: 1.0802756547927856,grad_norm: 0.9999991967788057, iteration: 130671
loss: 1.0417412519454956,grad_norm: 0.9999994035586731, iteration: 130672
loss: 1.0898102521896362,grad_norm: 0.9999991612685805, iteration: 130673
loss: 1.0712785720825195,grad_norm: 0.999999350408019, iteration: 130674
loss: 1.0393143892288208,grad_norm: 0.9999996252708071, iteration: 130675
loss: 1.027158260345459,grad_norm: 0.9154418399064473, iteration: 130676
loss: 1.0140630006790161,grad_norm: 0.9308212195802134, iteration: 130677
loss: 1.1931113004684448,grad_norm: 0.9999993277728495, iteration: 130678
loss: 1.091490626335144,grad_norm: 0.9999996666723012, iteration: 130679
loss: 1.0447814464569092,grad_norm: 0.9999990897995693, iteration: 130680
loss: 0.9621416330337524,grad_norm: 0.9999991631278234, iteration: 130681
loss: 1.000567078590393,grad_norm: 0.9938374187331322, iteration: 130682
loss: 1.0301628112792969,grad_norm: 0.9574782369036119, iteration: 130683
loss: 1.0005029439926147,grad_norm: 0.9999997944343307, iteration: 130684
loss: 1.2363132238388062,grad_norm: 0.9999996892902551, iteration: 130685
loss: 1.0804702043533325,grad_norm: 0.8943121088448216, iteration: 130686
loss: 1.0826278924942017,grad_norm: 0.9999999483224539, iteration: 130687
loss: 0.9933000206947327,grad_norm: 0.8926186346066342, iteration: 130688
loss: 1.0181996822357178,grad_norm: 0.9999999064523343, iteration: 130689
loss: 1.1086134910583496,grad_norm: 0.9999991247580592, iteration: 130690
loss: 1.1253360509872437,grad_norm: 0.9999997311197119, iteration: 130691
loss: 1.1316274404525757,grad_norm: 0.9999995542319762, iteration: 130692
loss: 1.0746800899505615,grad_norm: 0.9999996365663595, iteration: 130693
loss: 1.0764296054840088,grad_norm: 0.9999995336065839, iteration: 130694
loss: 1.1805596351623535,grad_norm: 1.0000000546556516, iteration: 130695
loss: 1.0073527097702026,grad_norm: 0.9999999757341439, iteration: 130696
loss: 1.0034465789794922,grad_norm: 0.9747327112886361, iteration: 130697
loss: 1.0724327564239502,grad_norm: 0.9999992340064288, iteration: 130698
loss: 1.016596794128418,grad_norm: 0.9999998509594262, iteration: 130699
loss: 1.053518533706665,grad_norm: 0.9999992691380899, iteration: 130700
loss: 1.1633185148239136,grad_norm: 1.0000000337289254, iteration: 130701
loss: 0.9966605305671692,grad_norm: 0.8470764318806387, iteration: 130702
loss: 1.0582072734832764,grad_norm: 0.8782127784645396, iteration: 130703
loss: 1.0152561664581299,grad_norm: 0.8923311643614383, iteration: 130704
loss: 1.1695140600204468,grad_norm: 0.9999995702896175, iteration: 130705
loss: 1.0964943170547485,grad_norm: 0.999999484475061, iteration: 130706
loss: 1.0013670921325684,grad_norm: 0.9331020870970794, iteration: 130707
loss: 1.0218842029571533,grad_norm: 0.9999991046723927, iteration: 130708
loss: 1.053813099861145,grad_norm: 0.9999996416758122, iteration: 130709
loss: 1.0136640071868896,grad_norm: 0.9999992661164243, iteration: 130710
loss: 1.0311003923416138,grad_norm: 0.9559911428211756, iteration: 130711
loss: 0.9756152629852295,grad_norm: 0.8497799717792118, iteration: 130712
loss: 1.028743028640747,grad_norm: 0.9828546567811437, iteration: 130713
loss: 0.9960174560546875,grad_norm: 0.9999994243380621, iteration: 130714
loss: 1.016746163368225,grad_norm: 0.9999994710929495, iteration: 130715
loss: 0.986234724521637,grad_norm: 0.8368569127740498, iteration: 130716
loss: 1.0521794557571411,grad_norm: 0.9999992266433348, iteration: 130717
loss: 1.1079732179641724,grad_norm: 0.9999993492314802, iteration: 130718
loss: 1.105023980140686,grad_norm: 0.9999995749685662, iteration: 130719
loss: 1.0154826641082764,grad_norm: 0.9999991218924845, iteration: 130720
loss: 1.0614491701126099,grad_norm: 0.999999499082505, iteration: 130721
loss: 1.108849048614502,grad_norm: 0.9999997063565246, iteration: 130722
loss: 0.9700839519500732,grad_norm: 0.9478040262090045, iteration: 130723
loss: 0.9872695803642273,grad_norm: 0.9234466358401094, iteration: 130724
loss: 0.9799323678016663,grad_norm: 0.7889974596692354, iteration: 130725
loss: 1.100239872932434,grad_norm: 0.9999991874010613, iteration: 130726
loss: 1.0532268285751343,grad_norm: 0.9999992963260277, iteration: 130727
loss: 1.0286269187927246,grad_norm: 0.9293517670835617, iteration: 130728
loss: 1.051071047782898,grad_norm: 0.9999991753326497, iteration: 130729
loss: 0.9943616986274719,grad_norm: 0.7793601932383583, iteration: 130730
loss: 1.0194486379623413,grad_norm: 0.8952146568330361, iteration: 130731
loss: 0.9938613176345825,grad_norm: 0.8430701323159784, iteration: 130732
loss: 1.0545001029968262,grad_norm: 0.999999415928015, iteration: 130733
loss: 0.9679809808731079,grad_norm: 0.9999997968264946, iteration: 130734
loss: 1.0179497003555298,grad_norm: 0.9999991282720436, iteration: 130735
loss: 1.0895729064941406,grad_norm: 0.9999998837420009, iteration: 130736
loss: 0.9733678698539734,grad_norm: 0.8782746093598901, iteration: 130737
loss: 0.9706040024757385,grad_norm: 0.7929681664466427, iteration: 130738
loss: 0.9597229361534119,grad_norm: 0.8574051651776503, iteration: 130739
loss: 1.010704517364502,grad_norm: 0.7304141715146362, iteration: 130740
loss: 1.052414894104004,grad_norm: 0.9496928463600759, iteration: 130741
loss: 1.071772813796997,grad_norm: 0.8741362527871875, iteration: 130742
loss: 0.9983816742897034,grad_norm: 0.9999990902597938, iteration: 130743
loss: 1.0846796035766602,grad_norm: 0.8287058377899589, iteration: 130744
loss: 1.014247179031372,grad_norm: 0.9999991200050338, iteration: 130745
loss: 1.0349750518798828,grad_norm: 0.9999990846200837, iteration: 130746
loss: 1.0528415441513062,grad_norm: 0.9999996316830622, iteration: 130747
loss: 1.0129700899124146,grad_norm: 0.9999996158741757, iteration: 130748
loss: 1.1002733707427979,grad_norm: 0.9999996037584925, iteration: 130749
loss: 0.9718747138977051,grad_norm: 0.9999991244330508, iteration: 130750
loss: 1.1654350757598877,grad_norm: 0.9999992148597757, iteration: 130751
loss: 0.9985058307647705,grad_norm: 0.9455545829722241, iteration: 130752
loss: 1.1455134153366089,grad_norm: 0.9999992191428035, iteration: 130753
loss: 1.0195956230163574,grad_norm: 0.9084373527733092, iteration: 130754
loss: 1.1308379173278809,grad_norm: 0.9999992603414366, iteration: 130755
loss: 1.060295820236206,grad_norm: 0.9999996232579559, iteration: 130756
loss: 1.0450985431671143,grad_norm: 0.9999991172001745, iteration: 130757
loss: 1.0409387350082397,grad_norm: 0.9999996548592812, iteration: 130758
loss: 0.9904605150222778,grad_norm: 0.9999991326981503, iteration: 130759
loss: 0.9946967959403992,grad_norm: 0.765005574550786, iteration: 130760
loss: 0.9938070178031921,grad_norm: 0.9999993965228439, iteration: 130761
loss: 0.9914308786392212,grad_norm: 0.8429397457692033, iteration: 130762
loss: 1.0081322193145752,grad_norm: 0.9999996369160052, iteration: 130763
loss: 1.0991181135177612,grad_norm: 0.9999991256612728, iteration: 130764
loss: 1.0961166620254517,grad_norm: 0.964915509153743, iteration: 130765
loss: 1.1349682807922363,grad_norm: 0.9999997831051702, iteration: 130766
loss: 1.0686064958572388,grad_norm: 0.9999991309985532, iteration: 130767
loss: 1.0377247333526611,grad_norm: 0.9999994116932865, iteration: 130768
loss: 1.1054235696792603,grad_norm: 0.9999993773096765, iteration: 130769
loss: 1.060227394104004,grad_norm: 0.9999994864792241, iteration: 130770
loss: 1.0409899950027466,grad_norm: 0.9466329675659134, iteration: 130771
loss: 1.006313681602478,grad_norm: 0.9999999863889644, iteration: 130772
loss: 0.994079053401947,grad_norm: 0.8490329028420451, iteration: 130773
loss: 1.012280821800232,grad_norm: 0.9999991023070014, iteration: 130774
loss: 1.0330477952957153,grad_norm: 0.9732849143669338, iteration: 130775
loss: 1.0668081045150757,grad_norm: 0.9999995594580432, iteration: 130776
loss: 1.0275843143463135,grad_norm: 0.8407305860502218, iteration: 130777
loss: 1.087328314781189,grad_norm: 0.999999017835389, iteration: 130778
loss: 1.083048701286316,grad_norm: 0.9999992905417037, iteration: 130779
loss: 1.0195739269256592,grad_norm: 0.9999990426446759, iteration: 130780
loss: 1.009393572807312,grad_norm: 0.7544687385445266, iteration: 130781
loss: 1.0715148448944092,grad_norm: 0.9999994466308285, iteration: 130782
loss: 1.0302196741104126,grad_norm: 0.9999992828559222, iteration: 130783
loss: 1.0977919101715088,grad_norm: 0.9999997242410151, iteration: 130784
loss: 0.9306732416152954,grad_norm: 0.9352008221308546, iteration: 130785
loss: 1.1069378852844238,grad_norm: 0.9999996399577165, iteration: 130786
loss: 1.0233763456344604,grad_norm: 0.9999998579368112, iteration: 130787
loss: 0.9866755604743958,grad_norm: 0.942840428374542, iteration: 130788
loss: 1.057580828666687,grad_norm: 0.9999993495392244, iteration: 130789
loss: 1.0124549865722656,grad_norm: 0.8366205264291349, iteration: 130790
loss: 1.013243317604065,grad_norm: 0.9999990701116851, iteration: 130791
loss: 1.0083413124084473,grad_norm: 0.9999990701744212, iteration: 130792
loss: 1.0706543922424316,grad_norm: 0.8710522913072989, iteration: 130793
loss: 1.0267503261566162,grad_norm: 0.9999998012015671, iteration: 130794
loss: 1.0441436767578125,grad_norm: 0.9999995062062893, iteration: 130795
loss: 1.03180730342865,grad_norm: 0.999999914455315, iteration: 130796
loss: 1.0096874237060547,grad_norm: 0.999999128838988, iteration: 130797
loss: 1.0256707668304443,grad_norm: 0.9825928064409576, iteration: 130798
loss: 1.1956429481506348,grad_norm: 0.9999999956478707, iteration: 130799
loss: 1.031484603881836,grad_norm: 0.9456173349899044, iteration: 130800
loss: 1.011807918548584,grad_norm: 0.9999989687896889, iteration: 130801
loss: 0.9974752068519592,grad_norm: 0.9328078098982979, iteration: 130802
loss: 1.0133332014083862,grad_norm: 0.9999992945350155, iteration: 130803
loss: 0.9949485659599304,grad_norm: 0.9575015924727319, iteration: 130804
loss: 1.059590220451355,grad_norm: 0.9999995184521078, iteration: 130805
loss: 0.9960194230079651,grad_norm: 0.9455352473116809, iteration: 130806
loss: 1.0541118383407593,grad_norm: 0.9999991725090545, iteration: 130807
loss: 0.9913454055786133,grad_norm: 0.999999005672902, iteration: 130808
loss: 0.9978899955749512,grad_norm: 0.9482828349764996, iteration: 130809
loss: 1.1086004972457886,grad_norm: 0.9999998787603568, iteration: 130810
loss: 1.2568434476852417,grad_norm: 0.9999998750796893, iteration: 130811
loss: 1.1638003587722778,grad_norm: 0.9999999381338851, iteration: 130812
loss: 1.057687520980835,grad_norm: 0.9999994641830073, iteration: 130813
loss: 1.1020441055297852,grad_norm: 0.9999994632316261, iteration: 130814
loss: 1.1112717390060425,grad_norm: 0.999999563713448, iteration: 130815
loss: 1.0504356622695923,grad_norm: 0.9569898044509887, iteration: 130816
loss: 1.068612813949585,grad_norm: 0.9999998201132981, iteration: 130817
loss: 0.9857657551765442,grad_norm: 0.9999993143649485, iteration: 130818
loss: 1.0489532947540283,grad_norm: 0.9999998017151596, iteration: 130819
loss: 0.9962919354438782,grad_norm: 0.9144896076892662, iteration: 130820
loss: 1.352205514907837,grad_norm: 0.9999996225728583, iteration: 130821
loss: 1.0402324199676514,grad_norm: 0.9999992644415718, iteration: 130822
loss: 1.0925984382629395,grad_norm: 0.999999433209142, iteration: 130823
loss: 1.107901930809021,grad_norm: 0.9999992672014129, iteration: 130824
loss: 1.153773546218872,grad_norm: 0.9999998589121333, iteration: 130825
loss: 1.0020169019699097,grad_norm: 0.9999993490216164, iteration: 130826
loss: 1.0536794662475586,grad_norm: 0.9999998980856116, iteration: 130827
loss: 1.004895567893982,grad_norm: 0.9999991705530127, iteration: 130828
loss: 1.000808596611023,grad_norm: 0.8939611155003471, iteration: 130829
loss: 1.0537936687469482,grad_norm: 0.8618003601256797, iteration: 130830
loss: 0.9769797921180725,grad_norm: 0.7848863925909259, iteration: 130831
loss: 0.9982128143310547,grad_norm: 0.8750531641143582, iteration: 130832
loss: 0.9827898144721985,grad_norm: 0.9999994154617089, iteration: 130833
loss: 1.0378869771957397,grad_norm: 0.999999272775414, iteration: 130834
loss: 1.0613247156143188,grad_norm: 0.9999994354965517, iteration: 130835
loss: 1.024472713470459,grad_norm: 0.999999629016614, iteration: 130836
loss: 1.0361213684082031,grad_norm: 0.999999756684813, iteration: 130837
loss: 1.0093728303909302,grad_norm: 0.9999992367077495, iteration: 130838
loss: 1.0002883672714233,grad_norm: 0.8214883968812188, iteration: 130839
loss: 1.1466649770736694,grad_norm: 0.9999999430356729, iteration: 130840
loss: 1.0053654909133911,grad_norm: 0.9255044607973845, iteration: 130841
loss: 1.0154732465744019,grad_norm: 0.8774287775019491, iteration: 130842
loss: 1.0058172941207886,grad_norm: 0.9768305715822905, iteration: 130843
loss: 1.118943214416504,grad_norm: 0.999999793843363, iteration: 130844
loss: 1.1781227588653564,grad_norm: 0.9999994766089534, iteration: 130845
loss: 1.0038493871688843,grad_norm: 0.8874137880911072, iteration: 130846
loss: 0.9910016059875488,grad_norm: 0.9330849631300348, iteration: 130847
loss: 0.9991931915283203,grad_norm: 0.999999177397561, iteration: 130848
loss: 1.039010763168335,grad_norm: 0.9999997624738838, iteration: 130849
loss: 1.0288405418395996,grad_norm: 0.9999999126034415, iteration: 130850
loss: 1.0085031986236572,grad_norm: 0.9999990863840359, iteration: 130851
loss: 1.0173532962799072,grad_norm: 0.9999995892856883, iteration: 130852
loss: 1.0298274755477905,grad_norm: 0.9904885365756193, iteration: 130853
loss: 1.0244582891464233,grad_norm: 0.9999994663622938, iteration: 130854
loss: 1.1013269424438477,grad_norm: 0.9999997687388358, iteration: 130855
loss: 1.0850554704666138,grad_norm: 0.9999993286033503, iteration: 130856
loss: 1.0812433958053589,grad_norm: 0.9999996977236548, iteration: 130857
loss: 0.935663640499115,grad_norm: 0.89066260917928, iteration: 130858
loss: 1.0340867042541504,grad_norm: 0.9999993336479832, iteration: 130859
loss: 1.0678294897079468,grad_norm: 0.9999990242986335, iteration: 130860
loss: 1.0201483964920044,grad_norm: 0.8166977217501402, iteration: 130861
loss: 0.9824724793434143,grad_norm: 0.9221082758323867, iteration: 130862
loss: 1.0741548538208008,grad_norm: 0.9999993590036086, iteration: 130863
loss: 0.968795120716095,grad_norm: 0.7143177653195558, iteration: 130864
loss: 1.0337733030319214,grad_norm: 0.9999998259923193, iteration: 130865
loss: 1.0250121355056763,grad_norm: 0.9999991280962707, iteration: 130866
loss: 1.0093978643417358,grad_norm: 0.8218890463300829, iteration: 130867
loss: 1.0146080255508423,grad_norm: 0.9999992549149573, iteration: 130868
loss: 1.0277928113937378,grad_norm: 0.9999990666118465, iteration: 130869
loss: 1.0227000713348389,grad_norm: 0.9999992116537155, iteration: 130870
loss: 1.0320590734481812,grad_norm: 0.9947146919842828, iteration: 130871
loss: 1.0714854001998901,grad_norm: 0.9999998521572182, iteration: 130872
loss: 1.0439811944961548,grad_norm: 0.9462720051349066, iteration: 130873
loss: 1.0182723999023438,grad_norm: 0.9999992043614077, iteration: 130874
loss: 1.1838685274124146,grad_norm: 0.9999998701601333, iteration: 130875
loss: 1.1322909593582153,grad_norm: 0.9999994576537546, iteration: 130876
loss: 1.0444486141204834,grad_norm: 0.9555281237274427, iteration: 130877
loss: 0.9899443984031677,grad_norm: 0.8645406080925541, iteration: 130878
loss: 1.1749345064163208,grad_norm: 0.9999996943239623, iteration: 130879
loss: 1.1638317108154297,grad_norm: 0.9999993879136542, iteration: 130880
loss: 1.1627618074417114,grad_norm: 0.9999994618301866, iteration: 130881
loss: 1.0515795946121216,grad_norm: 0.9999993840271112, iteration: 130882
loss: 1.1101186275482178,grad_norm: 0.9999995447828233, iteration: 130883
loss: 1.2457584142684937,grad_norm: 0.9999997765435239, iteration: 130884
loss: 1.1712298393249512,grad_norm: 0.999999664007904, iteration: 130885
loss: 1.0800933837890625,grad_norm: 0.9999991833775713, iteration: 130886
loss: 1.1090096235275269,grad_norm: 0.9999993203596447, iteration: 130887
loss: 1.1523466110229492,grad_norm: 0.9999991446696148, iteration: 130888
loss: 1.0401545763015747,grad_norm: 0.9999996630760808, iteration: 130889
loss: 1.078955888748169,grad_norm: 0.9999997265407236, iteration: 130890
loss: 1.020471215248108,grad_norm: 0.9480080380269076, iteration: 130891
loss: 1.0570083856582642,grad_norm: 0.9999996196616755, iteration: 130892
loss: 1.0121259689331055,grad_norm: 0.9533237923090851, iteration: 130893
loss: 1.2622593641281128,grad_norm: 0.9999997569568695, iteration: 130894
loss: 1.0578835010528564,grad_norm: 0.9999990932957546, iteration: 130895
loss: 0.9913466572761536,grad_norm: 0.9001512855172336, iteration: 130896
loss: 1.0665704011917114,grad_norm: 0.9999992427884421, iteration: 130897
loss: 1.0806487798690796,grad_norm: 0.9999994048069392, iteration: 130898
loss: 1.046994924545288,grad_norm: 0.9999993872895712, iteration: 130899
loss: 1.0973988771438599,grad_norm: 0.9999998297048265, iteration: 130900
loss: 1.1157969236373901,grad_norm: 0.9999999296052926, iteration: 130901
loss: 1.1070057153701782,grad_norm: 0.9999996904442855, iteration: 130902
loss: 1.0590529441833496,grad_norm: 0.9999990972933688, iteration: 130903
loss: 1.0181618928909302,grad_norm: 0.9999996017028994, iteration: 130904
loss: 0.992247998714447,grad_norm: 0.8907730724335986, iteration: 130905
loss: 1.0009198188781738,grad_norm: 0.8864258070491188, iteration: 130906
loss: 1.2483177185058594,grad_norm: 1.0000000828104496, iteration: 130907
loss: 1.0073504447937012,grad_norm: 0.806260523211293, iteration: 130908
loss: 1.0304901599884033,grad_norm: 0.9999994049241885, iteration: 130909
loss: 1.0458694696426392,grad_norm: 0.999999358756362, iteration: 130910
loss: 1.1093875169754028,grad_norm: 0.9999994206666746, iteration: 130911
loss: 1.0508733987808228,grad_norm: 0.9999997623146335, iteration: 130912
loss: 1.1046555042266846,grad_norm: 0.9999995411612507, iteration: 130913
loss: 1.1244171857833862,grad_norm: 0.999999935326417, iteration: 130914
loss: 1.0413569211959839,grad_norm: 0.9999991785916081, iteration: 130915
loss: 1.111627221107483,grad_norm: 0.9999992587626093, iteration: 130916
loss: 1.0395362377166748,grad_norm: 0.9999993528152078, iteration: 130917
loss: 1.0211617946624756,grad_norm: 0.9999997591875672, iteration: 130918
loss: 1.0371618270874023,grad_norm: 0.9999993105861456, iteration: 130919
loss: 1.031829833984375,grad_norm: 0.8820893434088025, iteration: 130920
loss: 1.0364673137664795,grad_norm: 0.9999990882990236, iteration: 130921
loss: 1.1573783159255981,grad_norm: 0.9999998983363687, iteration: 130922
loss: 1.02444326877594,grad_norm: 0.9815346399536413, iteration: 130923
loss: 1.0141981840133667,grad_norm: 0.9999996245077497, iteration: 130924
loss: 1.0890225172042847,grad_norm: 0.9999992558766808, iteration: 130925
loss: 1.070309042930603,grad_norm: 0.9999995269376941, iteration: 130926
loss: 1.1010035276412964,grad_norm: 0.9999997127207044, iteration: 130927
loss: 1.019370198249817,grad_norm: 0.8967198692103391, iteration: 130928
loss: 1.236445426940918,grad_norm: 0.9999991009193895, iteration: 130929
loss: 1.0873291492462158,grad_norm: 0.9999996674646624, iteration: 130930
loss: 1.0455918312072754,grad_norm: 0.9999991487649879, iteration: 130931
loss: 1.057270884513855,grad_norm: 0.9999995973835621, iteration: 130932
loss: 1.0697641372680664,grad_norm: 0.999999428913777, iteration: 130933
loss: 1.112177848815918,grad_norm: 0.9999990438431218, iteration: 130934
loss: 1.1428565979003906,grad_norm: 0.9999999518602197, iteration: 130935
loss: 1.0582228899002075,grad_norm: 0.9999995454781521, iteration: 130936
loss: 1.0705043077468872,grad_norm: 0.9999997375388451, iteration: 130937
loss: 1.0154311656951904,grad_norm: 0.9999998337954678, iteration: 130938
loss: 1.0310744047164917,grad_norm: 0.999999787671543, iteration: 130939
loss: 1.0265378952026367,grad_norm: 0.9999994268646606, iteration: 130940
loss: 1.0972365140914917,grad_norm: 0.9999995491878589, iteration: 130941
loss: 1.0325148105621338,grad_norm: 0.9999994139994198, iteration: 130942
loss: 1.1568055152893066,grad_norm: 0.9999998436834786, iteration: 130943
loss: 1.0480308532714844,grad_norm: 0.9999993883427761, iteration: 130944
loss: 1.1597237586975098,grad_norm: 0.9999997094438461, iteration: 130945
loss: 1.197308897972107,grad_norm: 0.9999998856789478, iteration: 130946
loss: 0.9978845119476318,grad_norm: 0.9999991324548302, iteration: 130947
loss: 1.1915894746780396,grad_norm: 0.9999999928846409, iteration: 130948
loss: 1.15536367893219,grad_norm: 0.9999995993288345, iteration: 130949
loss: 1.0674355030059814,grad_norm: 0.9999991889829474, iteration: 130950
loss: 1.0677218437194824,grad_norm: 0.999999289112611, iteration: 130951
loss: 1.0785201787948608,grad_norm: 1.0000000011340564, iteration: 130952
loss: 1.1762818098068237,grad_norm: 0.9999999501557713, iteration: 130953
loss: 1.2601178884506226,grad_norm: 0.9999998788734124, iteration: 130954
loss: 1.146466851234436,grad_norm: 0.999999951736205, iteration: 130955
loss: 1.0047593116760254,grad_norm: 0.8855555431836847, iteration: 130956
loss: 1.0932269096374512,grad_norm: 0.9999996660204477, iteration: 130957
loss: 1.0076091289520264,grad_norm: 0.9999994077465614, iteration: 130958
loss: 1.0764579772949219,grad_norm: 0.999999716891966, iteration: 130959
loss: 1.0705835819244385,grad_norm: 0.871766620735176, iteration: 130960
loss: 0.9828842282295227,grad_norm: 0.9027503752652688, iteration: 130961
loss: 1.0546329021453857,grad_norm: 0.9999998906404034, iteration: 130962
loss: 1.1775885820388794,grad_norm: 0.9999998560379322, iteration: 130963
loss: 1.038543462753296,grad_norm: 0.9999991434068443, iteration: 130964
loss: 1.0049127340316772,grad_norm: 0.849048188081362, iteration: 130965
loss: 0.9888277649879456,grad_norm: 0.9128655894686515, iteration: 130966
loss: 0.9702226519584656,grad_norm: 0.9999991206099816, iteration: 130967
loss: 1.0291811227798462,grad_norm: 0.8509973392878701, iteration: 130968
loss: 1.0289013385772705,grad_norm: 0.9999991170053826, iteration: 130969
loss: 1.0707730054855347,grad_norm: 0.9999992802360167, iteration: 130970
loss: 1.0052164793014526,grad_norm: 0.9999993462687375, iteration: 130971
loss: 1.0911599397659302,grad_norm: 0.9999991012306368, iteration: 130972
loss: 1.0146894454956055,grad_norm: 0.9388251138945422, iteration: 130973
loss: 1.1850985288619995,grad_norm: 0.9999995836609774, iteration: 130974
loss: 1.151942491531372,grad_norm: 0.9999995772216053, iteration: 130975
loss: 1.0065257549285889,grad_norm: 0.9041365738319684, iteration: 130976
loss: 1.064063310623169,grad_norm: 0.9999991159002181, iteration: 130977
loss: 1.0421860218048096,grad_norm: 0.9999999101122972, iteration: 130978
loss: 1.2595325708389282,grad_norm: 0.999999815561375, iteration: 130979
loss: 1.026723861694336,grad_norm: 0.9999991620354101, iteration: 130980
loss: 1.0182089805603027,grad_norm: 0.9999992212048204, iteration: 130981
loss: 1.0634593963623047,grad_norm: 0.9999989876366717, iteration: 130982
loss: 1.001038670539856,grad_norm: 0.9999997054882162, iteration: 130983
loss: 1.0049190521240234,grad_norm: 0.9999997682967406, iteration: 130984
loss: 1.1041420698165894,grad_norm: 0.9999993463531128, iteration: 130985
loss: 0.9929913878440857,grad_norm: 0.9999992044000536, iteration: 130986
loss: 1.0596470832824707,grad_norm: 0.9999991153292908, iteration: 130987
loss: 1.0099130868911743,grad_norm: 0.8346187122503386, iteration: 130988
loss: 1.0581871271133423,grad_norm: 0.9999992814567331, iteration: 130989
loss: 1.0831915140151978,grad_norm: 0.9999999018926461, iteration: 130990
loss: 1.0224943161010742,grad_norm: 0.9999998099583691, iteration: 130991
loss: 1.047234058380127,grad_norm: 0.9129424494531976, iteration: 130992
loss: 1.1538920402526855,grad_norm: 0.9999998406827733, iteration: 130993
loss: 1.036519169807434,grad_norm: 0.9999998686213143, iteration: 130994
loss: 1.1592905521392822,grad_norm: 0.9999994516021266, iteration: 130995
loss: 0.970030665397644,grad_norm: 0.9999990721199946, iteration: 130996
loss: 1.055391788482666,grad_norm: 0.9999991237343914, iteration: 130997
loss: 1.0347130298614502,grad_norm: 0.9999991957485206, iteration: 130998
loss: 1.0965036153793335,grad_norm: 0.9999993914808899, iteration: 130999
loss: 1.0736870765686035,grad_norm: 0.9999999071177174, iteration: 131000
loss: 1.114522933959961,grad_norm: 0.9999991695827415, iteration: 131001
loss: 1.079453468322754,grad_norm: 0.9999992021196998, iteration: 131002
loss: 1.2976634502410889,grad_norm: 0.99999945396638, iteration: 131003
loss: 1.06630539894104,grad_norm: 0.9999998314285924, iteration: 131004
loss: 1.0297083854675293,grad_norm: 0.999999160422097, iteration: 131005
loss: 1.0455540418624878,grad_norm: 0.9465316247761322, iteration: 131006
loss: 1.079938292503357,grad_norm: 0.9999998479819833, iteration: 131007
loss: 1.1043771505355835,grad_norm: 0.9999991719406002, iteration: 131008
loss: 1.1150646209716797,grad_norm: 0.9999995641557791, iteration: 131009
loss: 0.9836796522140503,grad_norm: 0.9999990820718851, iteration: 131010
loss: 1.04086434841156,grad_norm: 0.999999688338841, iteration: 131011
loss: 1.111246943473816,grad_norm: 0.9999998715563767, iteration: 131012
loss: 1.0497684478759766,grad_norm: 0.9999991555655022, iteration: 131013
loss: 1.0973461866378784,grad_norm: 0.9999999144757167, iteration: 131014
loss: 1.0053856372833252,grad_norm: 0.9999991027665359, iteration: 131015
loss: 1.039772868156433,grad_norm: 1.0000000004141856, iteration: 131016
loss: 1.0412793159484863,grad_norm: 0.9886237576672844, iteration: 131017
loss: 0.9792906641960144,grad_norm: 0.8199047935234873, iteration: 131018
loss: 1.0183969736099243,grad_norm: 0.8931259765664623, iteration: 131019
loss: 0.9908187985420227,grad_norm: 0.7241223439547598, iteration: 131020
loss: 1.0059758424758911,grad_norm: 0.999999794600402, iteration: 131021
loss: 1.0083742141723633,grad_norm: 0.9018927165779844, iteration: 131022
loss: 1.1978827714920044,grad_norm: 0.9999998883666713, iteration: 131023
loss: 1.0697195529937744,grad_norm: 0.999998999795671, iteration: 131024
loss: 1.0107210874557495,grad_norm: 0.9999998226364607, iteration: 131025
loss: 1.0763729810714722,grad_norm: 0.9999997184653481, iteration: 131026
loss: 1.16859769821167,grad_norm: 0.9999999245704733, iteration: 131027
loss: 1.0674512386322021,grad_norm: 0.9999991837591425, iteration: 131028
loss: 1.0650410652160645,grad_norm: 0.9999991677615182, iteration: 131029
loss: 1.0331217050552368,grad_norm: 0.9360439996710884, iteration: 131030
loss: 1.0002467632293701,grad_norm: 0.999999416423266, iteration: 131031
loss: 0.9751731157302856,grad_norm: 0.9457901041358525, iteration: 131032
loss: 1.0362173318862915,grad_norm: 0.9999999133986901, iteration: 131033
loss: 1.0666097402572632,grad_norm: 0.9999993257510522, iteration: 131034
loss: 1.0313329696655273,grad_norm: 0.9999993905889705, iteration: 131035
loss: 1.0183566808700562,grad_norm: 0.9922082290303526, iteration: 131036
loss: 1.0479117631912231,grad_norm: 0.9999991465014428, iteration: 131037
loss: 1.0235788822174072,grad_norm: 0.9999991441475369, iteration: 131038
loss: 0.954566478729248,grad_norm: 0.8005528534019732, iteration: 131039
loss: 1.0419191122055054,grad_norm: 0.9999993325253258, iteration: 131040
loss: 1.015397310256958,grad_norm: 0.7707127872158382, iteration: 131041
loss: 1.124996304512024,grad_norm: 0.9999992252299237, iteration: 131042
loss: 1.0166927576065063,grad_norm: 0.9999993379153881, iteration: 131043
loss: 1.2097879648208618,grad_norm: 0.9999998773594093, iteration: 131044
loss: 1.0379217863082886,grad_norm: 0.999999950116628, iteration: 131045
loss: 0.9944072961807251,grad_norm: 0.9985963388140612, iteration: 131046
loss: 1.0182000398635864,grad_norm: 0.999999150997351, iteration: 131047
loss: 1.0007398128509521,grad_norm: 0.9999989540112227, iteration: 131048
loss: 1.061047077178955,grad_norm: 0.9751873606395006, iteration: 131049
loss: 1.0099565982818604,grad_norm: 0.972310826414064, iteration: 131050
loss: 0.9708325266838074,grad_norm: 0.8187247074377059, iteration: 131051
loss: 0.9969574213027954,grad_norm: 0.9999993023391608, iteration: 131052
loss: 1.070020318031311,grad_norm: 0.9999994035081458, iteration: 131053
loss: 1.004269003868103,grad_norm: 0.9999991843706592, iteration: 131054
loss: 1.0017101764678955,grad_norm: 0.8252658132723467, iteration: 131055
loss: 1.0121442079544067,grad_norm: 0.8856336570620821, iteration: 131056
loss: 1.1105726957321167,grad_norm: 0.9999991371039686, iteration: 131057
loss: 1.0457175970077515,grad_norm: 0.9999991254022326, iteration: 131058
loss: 1.0248273611068726,grad_norm: 0.9553053349752736, iteration: 131059
loss: 1.0177174806594849,grad_norm: 0.99999919068869, iteration: 131060
loss: 0.9580210447311401,grad_norm: 0.9021750906244896, iteration: 131061
loss: 0.9909151792526245,grad_norm: 0.9095051548267249, iteration: 131062
loss: 1.01292884349823,grad_norm: 0.9999996921095317, iteration: 131063
loss: 0.9936668872833252,grad_norm: 0.9951209426955546, iteration: 131064
loss: 1.0071704387664795,grad_norm: 0.8801286490193199, iteration: 131065
loss: 1.0276257991790771,grad_norm: 0.938953038374484, iteration: 131066
loss: 1.049862265586853,grad_norm: 0.9999998803282949, iteration: 131067
loss: 0.9763050675392151,grad_norm: 0.9870670518489694, iteration: 131068
loss: 0.9848664402961731,grad_norm: 0.9999992710978016, iteration: 131069
loss: 0.980807900428772,grad_norm: 0.9999989985576183, iteration: 131070
loss: 0.965569257736206,grad_norm: 0.9955833652021647, iteration: 131071
loss: 0.9845878481864929,grad_norm: 0.9999991209655007, iteration: 131072
loss: 0.9807231426239014,grad_norm: 0.999999133876549, iteration: 131073
loss: 0.9789324402809143,grad_norm: 1.000000030673123, iteration: 131074
loss: 1.0185551643371582,grad_norm: 0.9999989451345536, iteration: 131075
loss: 1.0185880661010742,grad_norm: 0.8813082868561078, iteration: 131076
loss: 1.015020728111267,grad_norm: 0.7270635421828352, iteration: 131077
loss: 1.0312272310256958,grad_norm: 0.9222652531941778, iteration: 131078
loss: 1.0121489763259888,grad_norm: 0.9999992966199104, iteration: 131079
loss: 1.0909547805786133,grad_norm: 0.9999998708125175, iteration: 131080
loss: 1.013555884361267,grad_norm: 0.926319623533945, iteration: 131081
loss: 0.989199697971344,grad_norm: 0.9999990759787578, iteration: 131082
loss: 1.0674867630004883,grad_norm: 0.9999991920919878, iteration: 131083
loss: 1.0102745294570923,grad_norm: 0.999999934213634, iteration: 131084
loss: 1.0727053880691528,grad_norm: 0.9999999666765658, iteration: 131085
loss: 1.0410312414169312,grad_norm: 0.9999996981647055, iteration: 131086
loss: 1.0048712491989136,grad_norm: 0.9412036673875162, iteration: 131087
loss: 1.1215695142745972,grad_norm: 0.9999991007701312, iteration: 131088
loss: 0.9563388228416443,grad_norm: 0.9999990795193658, iteration: 131089
loss: 1.009558916091919,grad_norm: 0.9345661650130821, iteration: 131090
loss: 0.9886709451675415,grad_norm: 0.8457203607699617, iteration: 131091
loss: 1.0046054124832153,grad_norm: 0.8830841239215319, iteration: 131092
loss: 1.0849264860153198,grad_norm: 0.9999999810369812, iteration: 131093
loss: 1.0177050828933716,grad_norm: 0.975018293119824, iteration: 131094
loss: 1.0768213272094727,grad_norm: 0.9999999921626047, iteration: 131095
loss: 1.026140570640564,grad_norm: 0.9999990314015039, iteration: 131096
loss: 0.9807400107383728,grad_norm: 0.9980707788992114, iteration: 131097
loss: 1.0215102434158325,grad_norm: 0.8396150416029484, iteration: 131098
loss: 0.984679102897644,grad_norm: 0.9310032318282111, iteration: 131099
loss: 0.957389771938324,grad_norm: 0.7700472469467265, iteration: 131100
loss: 0.9881038665771484,grad_norm: 0.8739156095178735, iteration: 131101
loss: 1.0487730503082275,grad_norm: 0.888752496820047, iteration: 131102
loss: 1.079245924949646,grad_norm: 0.9999998745790204, iteration: 131103
loss: 0.9962526559829712,grad_norm: 0.943336244290627, iteration: 131104
loss: 1.0438425540924072,grad_norm: 0.9999991624791055, iteration: 131105
loss: 1.0023199319839478,grad_norm: 0.864002502469978, iteration: 131106
loss: 1.1679373979568481,grad_norm: 0.9999994901074344, iteration: 131107
loss: 1.1440155506134033,grad_norm: 0.9999994836876226, iteration: 131108
loss: 1.0882484912872314,grad_norm: 0.8064796465867397, iteration: 131109
loss: 0.99448162317276,grad_norm: 0.8591036411748854, iteration: 131110
loss: 0.9721125364303589,grad_norm: 0.9999992551176872, iteration: 131111
loss: 1.0810801982879639,grad_norm: 0.9999997494055846, iteration: 131112
loss: 1.091884732246399,grad_norm: 0.9999993410183947, iteration: 131113
loss: 1.0361582040786743,grad_norm: 0.804602702529192, iteration: 131114
loss: 1.0610066652297974,grad_norm: 0.9718414216871555, iteration: 131115
loss: 1.0589981079101562,grad_norm: 0.9999991963068593, iteration: 131116
loss: 1.0290902853012085,grad_norm: 0.9999994307989731, iteration: 131117
loss: 1.0232465267181396,grad_norm: 0.9999992843196737, iteration: 131118
loss: 1.0170681476593018,grad_norm: 0.752853954619675, iteration: 131119
loss: 0.9431114196777344,grad_norm: 0.9640271623940703, iteration: 131120
loss: 1.0128499269485474,grad_norm: 0.9831536353056212, iteration: 131121
loss: 0.9905398488044739,grad_norm: 0.7858066704346189, iteration: 131122
loss: 1.0159344673156738,grad_norm: 0.921247119938808, iteration: 131123
loss: 1.0534242391586304,grad_norm: 0.9057869949164787, iteration: 131124
loss: 1.004184365272522,grad_norm: 0.999999127874162, iteration: 131125
loss: 1.0455008745193481,grad_norm: 0.999999169642246, iteration: 131126
loss: 1.0052189826965332,grad_norm: 0.8096712958902388, iteration: 131127
loss: 0.9866528511047363,grad_norm: 0.970420826874973, iteration: 131128
loss: 1.1083459854125977,grad_norm: 0.9999990882224774, iteration: 131129
loss: 1.1296061277389526,grad_norm: 0.9999992009464688, iteration: 131130
loss: 1.004193902015686,grad_norm: 0.9999991241693278, iteration: 131131
loss: 1.1164698600769043,grad_norm: 0.9999997180509305, iteration: 131132
loss: 1.0068442821502686,grad_norm: 0.8191970088751999, iteration: 131133
loss: 1.114214301109314,grad_norm: 0.9999998318432667, iteration: 131134
loss: 0.9951309561729431,grad_norm: 0.8373074410151483, iteration: 131135
loss: 1.1113953590393066,grad_norm: 0.9940102987362632, iteration: 131136
loss: 1.0630171298980713,grad_norm: 0.9095278503354935, iteration: 131137
loss: 1.0286870002746582,grad_norm: 0.9999991360721935, iteration: 131138
loss: 1.2898335456848145,grad_norm: 0.9999999812637499, iteration: 131139
loss: 0.9983053803443909,grad_norm: 0.9999996378942619, iteration: 131140
loss: 0.9608442187309265,grad_norm: 0.9999993831393381, iteration: 131141
loss: 1.0494012832641602,grad_norm: 0.9702922243181754, iteration: 131142
loss: 1.0715534687042236,grad_norm: 0.994871835274366, iteration: 131143
loss: 0.9734688997268677,grad_norm: 0.9121283791620001, iteration: 131144
loss: 1.0184736251831055,grad_norm: 0.8647568090101273, iteration: 131145
loss: 1.0100106000900269,grad_norm: 0.9261738696046934, iteration: 131146
loss: 1.0709174871444702,grad_norm: 0.9999998327838436, iteration: 131147
loss: 0.9998216032981873,grad_norm: 0.9999989787769754, iteration: 131148
loss: 1.010789155960083,grad_norm: 0.9999995758561375, iteration: 131149
loss: 0.984244167804718,grad_norm: 0.9159728355616209, iteration: 131150
loss: 1.0147883892059326,grad_norm: 0.9999994970678331, iteration: 131151
loss: 1.013749361038208,grad_norm: 0.999999113436382, iteration: 131152
loss: 0.9961994290351868,grad_norm: 0.9966854272602278, iteration: 131153
loss: 1.0158125162124634,grad_norm: 0.7806268139029789, iteration: 131154
loss: 1.0283011198043823,grad_norm: 0.7914501409993577, iteration: 131155
loss: 1.023486614227295,grad_norm: 0.9999991115766942, iteration: 131156
loss: 1.547982096672058,grad_norm: 0.9999999361252656, iteration: 131157
loss: 0.9861974716186523,grad_norm: 0.9999991926361934, iteration: 131158
loss: 1.019695520401001,grad_norm: 0.9057184288608086, iteration: 131159
loss: 1.0326673984527588,grad_norm: 0.888935627420727, iteration: 131160
loss: 1.0016543865203857,grad_norm: 0.8210335852784949, iteration: 131161
loss: 0.9654383063316345,grad_norm: 0.9999990852234911, iteration: 131162
loss: 1.014647364616394,grad_norm: 0.7898429683569688, iteration: 131163
loss: 1.0326380729675293,grad_norm: 0.8091732566674616, iteration: 131164
loss: 1.032570481300354,grad_norm: 0.977543265629475, iteration: 131165
loss: 0.9997442364692688,grad_norm: 0.9305995509233012, iteration: 131166
loss: 1.0068416595458984,grad_norm: 0.999999287995157, iteration: 131167
loss: 0.9969267249107361,grad_norm: 0.8272152805343015, iteration: 131168
loss: 1.0397014617919922,grad_norm: 0.8531991365570141, iteration: 131169
loss: 0.9894120097160339,grad_norm: 0.9115305756990677, iteration: 131170
loss: 0.9997185468673706,grad_norm: 0.9302788709503369, iteration: 131171
loss: 1.0300166606903076,grad_norm: 0.9999998372625498, iteration: 131172
loss: 1.0163326263427734,grad_norm: 0.8345141714914588, iteration: 131173
loss: 1.0173805952072144,grad_norm: 0.9999992385754933, iteration: 131174
loss: 0.997822642326355,grad_norm: 0.9654746991478003, iteration: 131175
loss: 1.0053492784500122,grad_norm: 0.9999994314253059, iteration: 131176
loss: 1.109811782836914,grad_norm: 0.9999997728681831, iteration: 131177
loss: 1.045178771018982,grad_norm: 0.9999991545266593, iteration: 131178
loss: 0.9803983569145203,grad_norm: 0.9349581158938134, iteration: 131179
loss: 1.0108190774917603,grad_norm: 0.9999995519993803, iteration: 131180
loss: 1.0201096534729004,grad_norm: 0.9022632065556427, iteration: 131181
loss: 1.010635256767273,grad_norm: 0.8746508127491015, iteration: 131182
loss: 1.022463083267212,grad_norm: 0.8959542839851589, iteration: 131183
loss: 0.9856434464454651,grad_norm: 0.9999991287807808, iteration: 131184
loss: 0.9712515473365784,grad_norm: 0.8780098089221141, iteration: 131185
loss: 1.0412418842315674,grad_norm: 0.871504852839898, iteration: 131186
loss: 1.0040318965911865,grad_norm: 0.9999994223065231, iteration: 131187
loss: 1.0054396390914917,grad_norm: 0.999999849044221, iteration: 131188
loss: 0.9730846285820007,grad_norm: 0.7566666842714806, iteration: 131189
loss: 1.0212942361831665,grad_norm: 0.9999995565393284, iteration: 131190
loss: 1.073691725730896,grad_norm: 0.908414351697997, iteration: 131191
loss: 1.0661358833312988,grad_norm: 1.0000000085811092, iteration: 131192
loss: 0.9887485504150391,grad_norm: 0.8484631316062282, iteration: 131193
loss: 1.0299333333969116,grad_norm: 0.9640423712590577, iteration: 131194
loss: 1.0629569292068481,grad_norm: 0.9364549362513013, iteration: 131195
loss: 1.0557949542999268,grad_norm: 0.9514379795399402, iteration: 131196
loss: 1.0660645961761475,grad_norm: 0.8135092852932737, iteration: 131197
loss: 1.1012159585952759,grad_norm: 0.9999999910663296, iteration: 131198
loss: 1.0639702081680298,grad_norm: 0.9999995228149613, iteration: 131199
loss: 1.0290225744247437,grad_norm: 0.9999991536040984, iteration: 131200
loss: 0.9758440852165222,grad_norm: 0.9858492308861478, iteration: 131201
loss: 1.1150799989700317,grad_norm: 0.9999999729578328, iteration: 131202
loss: 1.1076146364212036,grad_norm: 0.9999994305033552, iteration: 131203
loss: 1.0004653930664062,grad_norm: 0.8642047276892254, iteration: 131204
loss: 1.0285111665725708,grad_norm: 0.9999994149565251, iteration: 131205
loss: 1.0207672119140625,grad_norm: 0.99999927934284, iteration: 131206
loss: 0.987975537776947,grad_norm: 0.7788925635944425, iteration: 131207
loss: 1.0761260986328125,grad_norm: 0.8852010814236125, iteration: 131208
loss: 1.0079655647277832,grad_norm: 0.8531432432891325, iteration: 131209
loss: 1.1485514640808105,grad_norm: 0.9999999893770619, iteration: 131210
loss: 1.0470592975616455,grad_norm: 0.9999996421369132, iteration: 131211
loss: 1.067873477935791,grad_norm: 0.9819273348194224, iteration: 131212
loss: 1.1218558549880981,grad_norm: 0.9522527626620053, iteration: 131213
loss: 1.0411198139190674,grad_norm: 0.9999994485939102, iteration: 131214
loss: 1.0319340229034424,grad_norm: 0.9999991179577762, iteration: 131215
loss: 1.0979788303375244,grad_norm: 0.9999995725023072, iteration: 131216
loss: 1.0144352912902832,grad_norm: 0.999999654614263, iteration: 131217
loss: 1.1033337116241455,grad_norm: 0.9999991841290388, iteration: 131218
loss: 1.0196577310562134,grad_norm: 0.8560661866901222, iteration: 131219
loss: 1.291593313217163,grad_norm: 0.9999999053157871, iteration: 131220
loss: 1.0522793531417847,grad_norm: 0.9999991245597478, iteration: 131221
loss: 1.033249020576477,grad_norm: 0.9999998069484576, iteration: 131222
loss: 1.0016772747039795,grad_norm: 0.9999998812095447, iteration: 131223
loss: 1.0777325630187988,grad_norm: 0.9999990074879276, iteration: 131224
loss: 1.0216494798660278,grad_norm: 0.9999996406147494, iteration: 131225
loss: 1.0846004486083984,grad_norm: 0.8623656126159556, iteration: 131226
loss: 1.112052083015442,grad_norm: 0.9999995844966597, iteration: 131227
loss: 1.0022565126419067,grad_norm: 0.9999991656721132, iteration: 131228
loss: 0.9968016743659973,grad_norm: 0.8728071226118869, iteration: 131229
loss: 0.9860281348228455,grad_norm: 0.9973114739269399, iteration: 131230
loss: 1.1205376386642456,grad_norm: 0.999999738600839, iteration: 131231
loss: 1.123235821723938,grad_norm: 0.9999999205896231, iteration: 131232
loss: 0.9777492880821228,grad_norm: 0.9971062516424944, iteration: 131233
loss: 1.0671651363372803,grad_norm: 0.9999993949605691, iteration: 131234
loss: 1.0217232704162598,grad_norm: 0.9999992174426471, iteration: 131235
loss: 1.0538330078125,grad_norm: 0.999999330988836, iteration: 131236
loss: 1.0003622770309448,grad_norm: 0.9999993555880621, iteration: 131237
loss: 1.02248215675354,grad_norm: 0.8671506434575623, iteration: 131238
loss: 1.0056602954864502,grad_norm: 0.9999994337768228, iteration: 131239
loss: 1.0248063802719116,grad_norm: 0.9999989710600842, iteration: 131240
loss: 0.9975216388702393,grad_norm: 0.9999996609286597, iteration: 131241
loss: 0.9711650013923645,grad_norm: 0.8643906739622947, iteration: 131242
loss: 1.0128778219223022,grad_norm: 0.9999999323033121, iteration: 131243
loss: 1.034282922744751,grad_norm: 0.9060544614903361, iteration: 131244
loss: 1.067581295967102,grad_norm: 0.9999994658591289, iteration: 131245
loss: 1.0147939920425415,grad_norm: 0.8635048962037816, iteration: 131246
loss: 1.0468419790267944,grad_norm: 0.9999991512729535, iteration: 131247
loss: 1.0558078289031982,grad_norm: 0.9999990801938797, iteration: 131248
loss: 0.9793144464492798,grad_norm: 0.7436714104388935, iteration: 131249
loss: 1.0828779935836792,grad_norm: 0.9999999533485433, iteration: 131250
loss: 1.0130614042282104,grad_norm: 0.9177964545394285, iteration: 131251
loss: 1.016668438911438,grad_norm: 0.9082849283530628, iteration: 131252
loss: 1.0311534404754639,grad_norm: 0.9855408682685459, iteration: 131253
loss: 1.1585965156555176,grad_norm: 0.9999989545236236, iteration: 131254
loss: 1.1204832792282104,grad_norm: 0.9999998336367181, iteration: 131255
loss: 0.9925149083137512,grad_norm: 0.9762826939608206, iteration: 131256
loss: 1.0856672525405884,grad_norm: 0.9999990296520159, iteration: 131257
loss: 1.0853649377822876,grad_norm: 0.9999992173915947, iteration: 131258
loss: 1.0157848596572876,grad_norm: 0.7901168727071582, iteration: 131259
loss: 1.0001237392425537,grad_norm: 0.834398169143156, iteration: 131260
loss: 1.0347721576690674,grad_norm: 0.9999992083501316, iteration: 131261
loss: 1.171561360359192,grad_norm: 0.9999991991257803, iteration: 131262
loss: 0.9980186820030212,grad_norm: 0.874494724085544, iteration: 131263
loss: 1.1484957933425903,grad_norm: 0.9999994887697514, iteration: 131264
loss: 0.9632325768470764,grad_norm: 0.802714577697936, iteration: 131265
loss: 0.9858037829399109,grad_norm: 0.9999991022585635, iteration: 131266
loss: 1.0321986675262451,grad_norm: 0.9296022665171173, iteration: 131267
loss: 1.069034457206726,grad_norm: 0.9999989747194098, iteration: 131268
loss: 0.9857101440429688,grad_norm: 0.8567086378664646, iteration: 131269
loss: 1.0303033590316772,grad_norm: 0.9999997560089913, iteration: 131270
loss: 1.0335208177566528,grad_norm: 0.9999998736527348, iteration: 131271
loss: 1.0616039037704468,grad_norm: 0.9999990774413323, iteration: 131272
loss: 1.123173713684082,grad_norm: 0.9999994290889188, iteration: 131273
loss: 1.0931895971298218,grad_norm: 0.9999997721601129, iteration: 131274
loss: 1.0932061672210693,grad_norm: 0.9999995779130357, iteration: 131275
loss: 0.9985445737838745,grad_norm: 0.9040483545744884, iteration: 131276
loss: 0.993155837059021,grad_norm: 0.8375578755332236, iteration: 131277
loss: 1.027237057685852,grad_norm: 0.9999991760900604, iteration: 131278
loss: 1.160857081413269,grad_norm: 0.999999295288319, iteration: 131279
loss: 1.0028523206710815,grad_norm: 0.9999998863471686, iteration: 131280
loss: 1.0094573497772217,grad_norm: 0.7644402130929975, iteration: 131281
loss: 0.965972900390625,grad_norm: 0.9999995396909263, iteration: 131282
loss: 0.9891735911369324,grad_norm: 0.8241375107459619, iteration: 131283
loss: 1.1518747806549072,grad_norm: 0.9999996263863283, iteration: 131284
loss: 1.03950035572052,grad_norm: 0.9999998287952623, iteration: 131285
loss: 1.0700018405914307,grad_norm: 0.9999991383367436, iteration: 131286
loss: 1.018898367881775,grad_norm: 0.9999998998882961, iteration: 131287
loss: 1.003298044204712,grad_norm: 0.8080652926509799, iteration: 131288
loss: 1.0339206457138062,grad_norm: 0.8500569922071197, iteration: 131289
loss: 1.262778878211975,grad_norm: 0.9999995380265532, iteration: 131290
loss: 1.197998046875,grad_norm: 0.9999994097414054, iteration: 131291
loss: 0.9769558906555176,grad_norm: 0.9301917616808884, iteration: 131292
loss: 1.0019580125808716,grad_norm: 0.9320862226651725, iteration: 131293
loss: 1.0158097743988037,grad_norm: 0.9204793069606202, iteration: 131294
loss: 1.0701931715011597,grad_norm: 0.9999994614317876, iteration: 131295
loss: 1.0269569158554077,grad_norm: 0.999999446103738, iteration: 131296
loss: 0.9994580745697021,grad_norm: 0.9999994195022957, iteration: 131297
loss: 1.0123558044433594,grad_norm: 0.9999994999940265, iteration: 131298
loss: 0.9776955842971802,grad_norm: 0.9112118594760299, iteration: 131299
loss: 0.9428166151046753,grad_norm: 0.8830232963413066, iteration: 131300
loss: 1.2268651723861694,grad_norm: 0.9999998504926062, iteration: 131301
loss: 1.0323054790496826,grad_norm: 0.9999993467230441, iteration: 131302
loss: 1.0134422779083252,grad_norm: 0.9999991438700442, iteration: 131303
loss: 0.9745121598243713,grad_norm: 0.9004547697797891, iteration: 131304
loss: 1.019127607345581,grad_norm: 0.7834032453175656, iteration: 131305
loss: 1.203355312347412,grad_norm: 0.9999992370406038, iteration: 131306
loss: 0.9981560707092285,grad_norm: 0.9280411550332357, iteration: 131307
loss: 1.0253419876098633,grad_norm: 0.9112564785562054, iteration: 131308
loss: 0.9894953370094299,grad_norm: 0.9278290381339646, iteration: 131309
loss: 1.0218349695205688,grad_norm: 0.8177334367943413, iteration: 131310
loss: 1.015727162361145,grad_norm: 0.8374133247110489, iteration: 131311
loss: 1.13479483127594,grad_norm: 0.9999997569360706, iteration: 131312
loss: 1.0353657007217407,grad_norm: 0.9893190855332358, iteration: 131313
loss: 0.9989850521087646,grad_norm: 0.895584377340654, iteration: 131314
loss: 0.9950072169303894,grad_norm: 0.9999992231748118, iteration: 131315
loss: 1.0327481031417847,grad_norm: 0.9519700633258646, iteration: 131316
loss: 0.9940584301948547,grad_norm: 0.9999991499437932, iteration: 131317
loss: 1.0200303792953491,grad_norm: 0.7895052763180386, iteration: 131318
loss: 1.05194091796875,grad_norm: 0.9999993213452968, iteration: 131319
loss: 1.0175385475158691,grad_norm: 0.9999996877945939, iteration: 131320
loss: 1.214545488357544,grad_norm: 0.9999998389026505, iteration: 131321
loss: 0.9939757585525513,grad_norm: 0.999999170261752, iteration: 131322
loss: 1.0067903995513916,grad_norm: 0.8857929135162518, iteration: 131323
loss: 0.9895933866500854,grad_norm: 0.9999991622160757, iteration: 131324
loss: 1.016294002532959,grad_norm: 0.9999992811903823, iteration: 131325
loss: 0.9872656464576721,grad_norm: 0.8868550051985336, iteration: 131326
loss: 1.015250563621521,grad_norm: 0.8090424710793228, iteration: 131327
loss: 0.9747048020362854,grad_norm: 0.9279419974300879, iteration: 131328
loss: 0.9758636951446533,grad_norm: 0.7630129066487769, iteration: 131329
loss: 1.057776927947998,grad_norm: 0.999999748972569, iteration: 131330
loss: 1.0081008672714233,grad_norm: 0.9467635822328658, iteration: 131331
loss: 1.0117429494857788,grad_norm: 0.9894863898983377, iteration: 131332
loss: 1.040819764137268,grad_norm: 0.9999999582559119, iteration: 131333
loss: 1.102326512336731,grad_norm: 0.9999993183365625, iteration: 131334
loss: 1.0259418487548828,grad_norm: 0.91099806271265, iteration: 131335
loss: 1.0824360847473145,grad_norm: 0.9999994884113933, iteration: 131336
loss: 1.0748411417007446,grad_norm: 0.9999992561595219, iteration: 131337
loss: 0.9914782047271729,grad_norm: 0.9060636930559364, iteration: 131338
loss: 1.0127378702163696,grad_norm: 0.9999990007161739, iteration: 131339
loss: 1.0416687726974487,grad_norm: 0.9999997460777563, iteration: 131340
loss: 1.0620388984680176,grad_norm: 0.9999998928001831, iteration: 131341
loss: 1.0418627262115479,grad_norm: 0.9999995564748806, iteration: 131342
loss: 1.0080198049545288,grad_norm: 0.7835049497508054, iteration: 131343
loss: 1.1416938304901123,grad_norm: 0.9999993179850998, iteration: 131344
loss: 1.06422758102417,grad_norm: 0.9999989275605231, iteration: 131345
loss: 1.0802247524261475,grad_norm: 0.999999560054173, iteration: 131346
loss: 1.1396892070770264,grad_norm: 0.9999992939532103, iteration: 131347
loss: 1.0455727577209473,grad_norm: 0.8365131297850703, iteration: 131348
loss: 1.0564275979995728,grad_norm: 0.999999563561467, iteration: 131349
loss: 1.01616370677948,grad_norm: 0.99999990665084, iteration: 131350
loss: 1.0936799049377441,grad_norm: 0.9999999284179057, iteration: 131351
loss: 1.017374873161316,grad_norm: 0.8750310455492329, iteration: 131352
loss: 1.0264344215393066,grad_norm: 0.9999990684433484, iteration: 131353
loss: 1.0698965787887573,grad_norm: 0.9999991165986013, iteration: 131354
loss: 1.0193852186203003,grad_norm: 0.9999990572835459, iteration: 131355
loss: 1.0792982578277588,grad_norm: 0.9999997307691986, iteration: 131356
loss: 1.063987374305725,grad_norm: 0.9999996123433368, iteration: 131357
loss: 1.0306978225708008,grad_norm: 0.9999993467988829, iteration: 131358
loss: 1.0485354661941528,grad_norm: 0.9999995197460785, iteration: 131359
loss: 1.0263760089874268,grad_norm: 0.9999996401215594, iteration: 131360
loss: 1.0063265562057495,grad_norm: 0.9081574457197211, iteration: 131361
loss: 1.059471607208252,grad_norm: 0.9999992778994368, iteration: 131362
loss: 1.0971659421920776,grad_norm: 0.9999992453178157, iteration: 131363
loss: 1.0084266662597656,grad_norm: 0.942602408011194, iteration: 131364
loss: 1.020474910736084,grad_norm: 0.9999996161725957, iteration: 131365
loss: 1.021150827407837,grad_norm: 0.9999989856825233, iteration: 131366
loss: 1.0228275060653687,grad_norm: 0.9999999102851437, iteration: 131367
loss: 1.0426725149154663,grad_norm: 0.9999990971545579, iteration: 131368
loss: 1.0251487493515015,grad_norm: 0.999999984351689, iteration: 131369
loss: 1.1406476497650146,grad_norm: 0.9999992992218202, iteration: 131370
loss: 1.0534484386444092,grad_norm: 0.9999992539110224, iteration: 131371
loss: 1.0177122354507446,grad_norm: 0.9999993251062778, iteration: 131372
loss: 1.0028823614120483,grad_norm: 0.7731803120258639, iteration: 131373
loss: 0.9877538084983826,grad_norm: 0.811501576114558, iteration: 131374
loss: 1.0390483140945435,grad_norm: 0.9999992816138091, iteration: 131375
loss: 1.034822940826416,grad_norm: 1.0000000024233562, iteration: 131376
loss: 1.0619269609451294,grad_norm: 0.9999991049543587, iteration: 131377
loss: 0.9836827516555786,grad_norm: 0.7335792190333679, iteration: 131378
loss: 1.3254705667495728,grad_norm: 0.9999998256686697, iteration: 131379
loss: 1.0914162397384644,grad_norm: 0.9999991525701016, iteration: 131380
loss: 1.040761947631836,grad_norm: 0.8232781493859468, iteration: 131381
loss: 1.0027093887329102,grad_norm: 0.9999990702565088, iteration: 131382
loss: 1.0288684368133545,grad_norm: 0.6859207500618058, iteration: 131383
loss: 1.0276495218276978,grad_norm: 0.8475559020167384, iteration: 131384
loss: 1.107712984085083,grad_norm: 0.999999712804973, iteration: 131385
loss: 1.0084190368652344,grad_norm: 0.8802187017645606, iteration: 131386
loss: 1.0887277126312256,grad_norm: 0.9999991146870487, iteration: 131387
loss: 1.0775762796401978,grad_norm: 0.9492457981957966, iteration: 131388
loss: 0.972573459148407,grad_norm: 0.9425629592292142, iteration: 131389
loss: 0.9863540530204773,grad_norm: 0.7772079198575153, iteration: 131390
loss: 1.0271064043045044,grad_norm: 0.980093696200828, iteration: 131391
loss: 1.080420970916748,grad_norm: 0.9999989387217301, iteration: 131392
loss: 0.9943140745162964,grad_norm: 0.7511038352000773, iteration: 131393
loss: 1.0794711112976074,grad_norm: 0.9837604803123483, iteration: 131394
loss: 0.9775669574737549,grad_norm: 0.9783257043471781, iteration: 131395
loss: 1.0671483278274536,grad_norm: 0.9999996582471758, iteration: 131396
loss: 1.0196177959442139,grad_norm: 0.9276365480844204, iteration: 131397
loss: 1.0003117322921753,grad_norm: 0.9999998837902646, iteration: 131398
loss: 0.9583228826522827,grad_norm: 0.890553832115629, iteration: 131399
loss: 0.9576696753501892,grad_norm: 0.9026437060420738, iteration: 131400
loss: 1.023951768875122,grad_norm: 0.999999196290688, iteration: 131401
loss: 1.0873154401779175,grad_norm: 0.9999993480165149, iteration: 131402
loss: 1.01287043094635,grad_norm: 0.8876308899268633, iteration: 131403
loss: 1.0363006591796875,grad_norm: 0.9999992667932539, iteration: 131404
loss: 1.0804458856582642,grad_norm: 0.9999998742629659, iteration: 131405
loss: 1.0405350923538208,grad_norm: 0.9999992460068091, iteration: 131406
loss: 1.0875962972640991,grad_norm: 0.921506816128583, iteration: 131407
loss: 1.0090032815933228,grad_norm: 0.9999993180228067, iteration: 131408
loss: 1.0606614351272583,grad_norm: 0.9999991198943238, iteration: 131409
loss: 0.9998177886009216,grad_norm: 0.8906905103611371, iteration: 131410
loss: 1.0024526119232178,grad_norm: 0.8290462098691667, iteration: 131411
loss: 1.0164594650268555,grad_norm: 0.9999994584208545, iteration: 131412
loss: 1.0604727268218994,grad_norm: 0.9999992279911041, iteration: 131413
loss: 1.147387981414795,grad_norm: 0.999999702877065, iteration: 131414
loss: 0.9406923055648804,grad_norm: 0.6915067298206244, iteration: 131415
loss: 1.0489119291305542,grad_norm: 0.9999989907385457, iteration: 131416
loss: 0.9844454526901245,grad_norm: 0.8888241816647834, iteration: 131417
loss: 1.0185728073120117,grad_norm: 0.8252344240799607, iteration: 131418
loss: 0.9872092604637146,grad_norm: 0.8863657847628184, iteration: 131419
loss: 1.0736662149429321,grad_norm: 0.9999998415638872, iteration: 131420
loss: 1.1081039905548096,grad_norm: 0.9999998152417462, iteration: 131421
loss: 1.0123791694641113,grad_norm: 0.9999992729932088, iteration: 131422
loss: 1.015774130821228,grad_norm: 0.7094061640968673, iteration: 131423
loss: 1.0246539115905762,grad_norm: 0.9999997060725019, iteration: 131424
loss: 1.0400136709213257,grad_norm: 0.9447138407241222, iteration: 131425
loss: 1.032699465751648,grad_norm: 0.9831704063017983, iteration: 131426
loss: 0.9739670753479004,grad_norm: 0.9978326715440148, iteration: 131427
loss: 1.0081373453140259,grad_norm: 0.9999998186780649, iteration: 131428
loss: 1.0768592357635498,grad_norm: 0.998863239442223, iteration: 131429
loss: 1.0612143278121948,grad_norm: 0.9999992168602414, iteration: 131430
loss: 1.0258769989013672,grad_norm: 0.8864961266317806, iteration: 131431
loss: 1.0158045291900635,grad_norm: 0.9268420462976289, iteration: 131432
loss: 1.025808572769165,grad_norm: 0.9999994278770739, iteration: 131433
loss: 1.041088581085205,grad_norm: 0.9525888547423448, iteration: 131434
loss: 1.025629997253418,grad_norm: 0.9999996878054723, iteration: 131435
loss: 1.0539968013763428,grad_norm: 0.9999990516759634, iteration: 131436
loss: 1.1297820806503296,grad_norm: 0.9999998273372532, iteration: 131437
loss: 1.0173728466033936,grad_norm: 0.875842539126083, iteration: 131438
loss: 1.0720407962799072,grad_norm: 0.9999991232639989, iteration: 131439
loss: 0.9326720237731934,grad_norm: 0.9087957303591566, iteration: 131440
loss: 1.0625959634780884,grad_norm: 0.9999998333599309, iteration: 131441
loss: 0.9814550876617432,grad_norm: 0.822130343324608, iteration: 131442
loss: 1.0482285022735596,grad_norm: 0.999999154953021, iteration: 131443
loss: 1.0405299663543701,grad_norm: 0.999999348546424, iteration: 131444
loss: 0.9986833930015564,grad_norm: 0.8199706900962775, iteration: 131445
loss: 1.0534124374389648,grad_norm: 0.9999993407809509, iteration: 131446
loss: 1.040265440940857,grad_norm: 0.902243512382545, iteration: 131447
loss: 1.0839366912841797,grad_norm: 0.9999995617777497, iteration: 131448
loss: 1.005007266998291,grad_norm: 0.9286928109277757, iteration: 131449
loss: 1.0154165029525757,grad_norm: 0.9999991120854158, iteration: 131450
loss: 0.9842583537101746,grad_norm: 0.9999990445870484, iteration: 131451
loss: 1.0079450607299805,grad_norm: 0.8971961673281517, iteration: 131452
loss: 1.0225870609283447,grad_norm: 0.9794096282603187, iteration: 131453
loss: 1.010332703590393,grad_norm: 0.9159416573113544, iteration: 131454
loss: 1.1177394390106201,grad_norm: 0.9999995416486043, iteration: 131455
loss: 1.0480796098709106,grad_norm: 0.9999994025094149, iteration: 131456
loss: 1.037585735321045,grad_norm: 0.9999989892131894, iteration: 131457
loss: 0.9503408670425415,grad_norm: 0.9999992951144805, iteration: 131458
loss: 1.0269731283187866,grad_norm: 0.9999995189621357, iteration: 131459
loss: 1.0541486740112305,grad_norm: 0.9999994092369828, iteration: 131460
loss: 1.0528993606567383,grad_norm: 0.9999996976435861, iteration: 131461
loss: 1.0022128820419312,grad_norm: 0.9999990466809485, iteration: 131462
loss: 1.0267701148986816,grad_norm: 0.8547840561352635, iteration: 131463
loss: 0.9763199090957642,grad_norm: 0.9395744132436649, iteration: 131464
loss: 0.9547831416130066,grad_norm: 0.762279510493744, iteration: 131465
loss: 1.0180587768554688,grad_norm: 0.9999994709244286, iteration: 131466
loss: 0.9919095039367676,grad_norm: 0.9999991500741635, iteration: 131467
loss: 0.997955858707428,grad_norm: 0.9999991327037464, iteration: 131468
loss: 1.0791906118392944,grad_norm: 0.9999998457684784, iteration: 131469
loss: 1.0085010528564453,grad_norm: 0.8442899745568573, iteration: 131470
loss: 1.1252630949020386,grad_norm: 0.999999158122366, iteration: 131471
loss: 1.0343306064605713,grad_norm: 0.9999991837444852, iteration: 131472
loss: 1.0037444829940796,grad_norm: 0.827812550520555, iteration: 131473
loss: 1.0309786796569824,grad_norm: 0.999999314661925, iteration: 131474
loss: 1.016405463218689,grad_norm: 0.8161697844216258, iteration: 131475
loss: 1.0368911027908325,grad_norm: 0.999999421856271, iteration: 131476
loss: 1.0006794929504395,grad_norm: 0.8147516456319349, iteration: 131477
loss: 1.00715172290802,grad_norm: 0.9119517044024708, iteration: 131478
loss: 1.0207589864730835,grad_norm: 0.7865202074519847, iteration: 131479
loss: 0.9793830513954163,grad_norm: 0.8471934918227937, iteration: 131480
loss: 0.999247133731842,grad_norm: 0.9999990764649845, iteration: 131481
loss: 1.023820400238037,grad_norm: 0.9888160554051, iteration: 131482
loss: 1.1749128103256226,grad_norm: 1.0000000002847211, iteration: 131483
loss: 1.0123366117477417,grad_norm: 0.9999990311937412, iteration: 131484
loss: 1.001390814781189,grad_norm: 0.8850367866479404, iteration: 131485
loss: 0.9971581697463989,grad_norm: 0.7744208700052794, iteration: 131486
loss: 1.0216294527053833,grad_norm: 0.999999847237143, iteration: 131487
loss: 1.0747336149215698,grad_norm: 0.9999992255396668, iteration: 131488
loss: 1.0416706800460815,grad_norm: 0.9999993216835014, iteration: 131489
loss: 0.9833409190177917,grad_norm: 0.8109901561184616, iteration: 131490
loss: 0.9990757703781128,grad_norm: 0.9346046397707056, iteration: 131491
loss: 1.0846682786941528,grad_norm: 0.999999102932837, iteration: 131492
loss: 1.008933663368225,grad_norm: 0.8033924476170119, iteration: 131493
loss: 1.1635135412216187,grad_norm: 0.9999994284317757, iteration: 131494
loss: 1.0489188432693481,grad_norm: 0.9558595115120312, iteration: 131495
loss: 1.0791103839874268,grad_norm: 0.9999995123428176, iteration: 131496
loss: 0.9821042418479919,grad_norm: 0.8597066455095963, iteration: 131497
loss: 1.0140295028686523,grad_norm: 0.999999433914024, iteration: 131498
loss: 0.9921200275421143,grad_norm: 0.7543407993785795, iteration: 131499
loss: 1.019329309463501,grad_norm: 0.9999990453355933, iteration: 131500
loss: 0.9906390905380249,grad_norm: 0.8753007563815377, iteration: 131501
loss: 1.0017930269241333,grad_norm: 0.835970581367388, iteration: 131502
loss: 1.0377546548843384,grad_norm: 0.999999004011878, iteration: 131503
loss: 1.019661784172058,grad_norm: 0.9175394911542997, iteration: 131504
loss: 0.9912505745887756,grad_norm: 0.9999991073313947, iteration: 131505
loss: 1.000175952911377,grad_norm: 0.9999999746454713, iteration: 131506
loss: 1.0191876888275146,grad_norm: 0.9999996693456263, iteration: 131507
loss: 1.0131160020828247,grad_norm: 0.8978531005377464, iteration: 131508
loss: 1.0224603414535522,grad_norm: 0.9999991651711309, iteration: 131509
loss: 1.0477771759033203,grad_norm: 0.9999993259778318, iteration: 131510
loss: 0.9738633036613464,grad_norm: 0.9999990372995998, iteration: 131511
loss: 0.998103678226471,grad_norm: 0.9999994761585227, iteration: 131512
loss: 1.0234265327453613,grad_norm: 0.9615997425691342, iteration: 131513
loss: 0.9836026430130005,grad_norm: 0.9799361414979842, iteration: 131514
loss: 0.9874271154403687,grad_norm: 0.9347215521378384, iteration: 131515
loss: 1.0103956460952759,grad_norm: 0.9999992274232513, iteration: 131516
loss: 0.9948294162750244,grad_norm: 0.9883416468430111, iteration: 131517
loss: 1.0544884204864502,grad_norm: 0.9999999395175856, iteration: 131518
loss: 0.9873544573783875,grad_norm: 0.9150819100889724, iteration: 131519
loss: 0.9950788021087646,grad_norm: 0.9559642067135212, iteration: 131520
loss: 1.0207774639129639,grad_norm: 0.7775743520749776, iteration: 131521
loss: 1.0569959878921509,grad_norm: 0.9999999950055286, iteration: 131522
loss: 1.0586295127868652,grad_norm: 0.998367015658643, iteration: 131523
loss: 1.0015562772750854,grad_norm: 0.9999993767462594, iteration: 131524
loss: 1.005971908569336,grad_norm: 0.7552681058455191, iteration: 131525
loss: 1.0281727313995361,grad_norm: 0.9999991102748406, iteration: 131526
loss: 1.0215516090393066,grad_norm: 0.8197695243119524, iteration: 131527
loss: 1.1283001899719238,grad_norm: 1.0000000092852945, iteration: 131528
loss: 1.0158803462982178,grad_norm: 0.8800142607996837, iteration: 131529
loss: 1.0209068059921265,grad_norm: 0.9999991803944979, iteration: 131530
loss: 1.001516580581665,grad_norm: 0.9498646201925257, iteration: 131531
loss: 0.9963926076889038,grad_norm: 0.9681628949547385, iteration: 131532
loss: 1.0091172456741333,grad_norm: 0.9387265946184815, iteration: 131533
loss: 1.030874252319336,grad_norm: 0.99999974393589, iteration: 131534
loss: 0.9609521627426147,grad_norm: 0.9639504920186053, iteration: 131535
loss: 0.9878429770469666,grad_norm: 0.9999993236965696, iteration: 131536
loss: 0.9950272440910339,grad_norm: 0.8696557173936413, iteration: 131537
loss: 0.9792929887771606,grad_norm: 0.7969045303024557, iteration: 131538
loss: 1.0021132230758667,grad_norm: 0.8662999698503838, iteration: 131539
loss: 1.1279289722442627,grad_norm: 0.9999994583425318, iteration: 131540
loss: 0.9874477386474609,grad_norm: 0.9999990860550252, iteration: 131541
loss: 1.054391860961914,grad_norm: 0.9999994300174654, iteration: 131542
loss: 1.0170819759368896,grad_norm: 0.999999202820413, iteration: 131543
loss: 0.990894079208374,grad_norm: 0.9018725858585315, iteration: 131544
loss: 1.072323203086853,grad_norm: 0.9999991537220017, iteration: 131545
loss: 1.0189156532287598,grad_norm: 0.9219457378864061, iteration: 131546
loss: 1.051037311553955,grad_norm: 0.9999995178463921, iteration: 131547
loss: 1.0185743570327759,grad_norm: 0.99999928024229, iteration: 131548
loss: 1.0259685516357422,grad_norm: 0.9999998769949399, iteration: 131549
loss: 1.1496418714523315,grad_norm: 0.9999998478021128, iteration: 131550
loss: 0.9780462980270386,grad_norm: 0.9999991754482777, iteration: 131551
loss: 1.0094521045684814,grad_norm: 0.9190749072245314, iteration: 131552
loss: 1.0527153015136719,grad_norm: 0.9999993884640503, iteration: 131553
loss: 1.0283035039901733,grad_norm: 0.8371899895338033, iteration: 131554
loss: 0.9974541068077087,grad_norm: 0.8686113901840624, iteration: 131555
loss: 0.9924541115760803,grad_norm: 0.9999990268362116, iteration: 131556
loss: 1.0326372385025024,grad_norm: 0.9999996630092377, iteration: 131557
loss: 1.0150392055511475,grad_norm: 0.8114745115031684, iteration: 131558
loss: 1.0240287780761719,grad_norm: 0.9999994128440424, iteration: 131559
loss: 1.162000060081482,grad_norm: 0.9999996338492267, iteration: 131560
loss: 0.9979107975959778,grad_norm: 0.85486804443302, iteration: 131561
loss: 1.0095585584640503,grad_norm: 0.9999990904890096, iteration: 131562
loss: 1.1198254823684692,grad_norm: 0.9999999503743043, iteration: 131563
loss: 1.0071933269500732,grad_norm: 0.9999994548432314, iteration: 131564
loss: 1.0358569622039795,grad_norm: 0.9999996554545085, iteration: 131565
loss: 1.0510075092315674,grad_norm: 0.9999998467840889, iteration: 131566
loss: 1.0448627471923828,grad_norm: 0.9999990690998506, iteration: 131567
loss: 1.124190330505371,grad_norm: 0.999999668928451, iteration: 131568
loss: 1.0482209920883179,grad_norm: 0.9999995939419687, iteration: 131569
loss: 1.0867701768875122,grad_norm: 0.9652852625165224, iteration: 131570
loss: 0.9629592895507812,grad_norm: 0.841451888312081, iteration: 131571
loss: 1.0103124380111694,grad_norm: 0.9999992307137422, iteration: 131572
loss: 1.049653172492981,grad_norm: 0.999999221787246, iteration: 131573
loss: 1.0487314462661743,grad_norm: 0.9999991214122611, iteration: 131574
loss: 0.9923057556152344,grad_norm: 0.9892490984466517, iteration: 131575
loss: 1.1113030910491943,grad_norm: 0.9999995764413593, iteration: 131576
loss: 0.9978104829788208,grad_norm: 0.8974217693835711, iteration: 131577
loss: 0.9995697140693665,grad_norm: 0.9999991224946598, iteration: 131578
loss: 1.072713851928711,grad_norm: 0.9999993687926492, iteration: 131579
loss: 0.9905117750167847,grad_norm: 0.9999991909580159, iteration: 131580
loss: 1.0880839824676514,grad_norm: 0.9999994276577387, iteration: 131581
loss: 1.0585569143295288,grad_norm: 0.9999994310396407, iteration: 131582
loss: 0.9905502200126648,grad_norm: 0.999999067209003, iteration: 131583
loss: 1.0616737604141235,grad_norm: 0.977469247454436, iteration: 131584
loss: 1.0315656661987305,grad_norm: 0.9999992950759582, iteration: 131585
loss: 0.9783257246017456,grad_norm: 0.9999990880888209, iteration: 131586
loss: 1.0934978723526,grad_norm: 0.9999995506292593, iteration: 131587
loss: 1.1122525930404663,grad_norm: 0.9045410912137206, iteration: 131588
loss: 1.0027787685394287,grad_norm: 0.9999992361622291, iteration: 131589
loss: 1.1270859241485596,grad_norm: 0.9999995499023697, iteration: 131590
loss: 0.9691979885101318,grad_norm: 0.9999991196354214, iteration: 131591
loss: 1.178653359413147,grad_norm: 0.9999998839463538, iteration: 131592
loss: 0.9968283176422119,grad_norm: 0.9999991713759315, iteration: 131593
loss: 1.0183634757995605,grad_norm: 0.9999991100794474, iteration: 131594
loss: 1.0667246580123901,grad_norm: 0.9999993512864834, iteration: 131595
loss: 1.176274061203003,grad_norm: 0.9999994631171589, iteration: 131596
loss: 1.066809058189392,grad_norm: 0.8869831711336716, iteration: 131597
loss: 1.0325548648834229,grad_norm: 0.9999990755036053, iteration: 131598
loss: 1.0618630647659302,grad_norm: 0.9999992102835602, iteration: 131599
loss: 1.1307867765426636,grad_norm: 0.9999998400663426, iteration: 131600
loss: 1.1126151084899902,grad_norm: 0.9999991463711785, iteration: 131601
loss: 1.0906059741973877,grad_norm: 0.9999994609796603, iteration: 131602
loss: 1.0223017930984497,grad_norm: 0.9999992373133006, iteration: 131603
loss: 1.0359301567077637,grad_norm: 0.8917551421025511, iteration: 131604
loss: 1.0173768997192383,grad_norm: 0.758571304607381, iteration: 131605
loss: 1.0217422246932983,grad_norm: 0.9999995723958875, iteration: 131606
loss: 0.9826153516769409,grad_norm: 0.9999998001077015, iteration: 131607
loss: 0.988685667514801,grad_norm: 0.9999995179685173, iteration: 131608
loss: 0.9464351534843445,grad_norm: 0.9999995535662345, iteration: 131609
loss: 1.0282450914382935,grad_norm: 0.999999568790135, iteration: 131610
loss: 0.9836394786834717,grad_norm: 0.8549931463611261, iteration: 131611
loss: 1.1262351274490356,grad_norm: 0.9999998009248976, iteration: 131612
loss: 1.018356442451477,grad_norm: 0.92499067498398, iteration: 131613
loss: 1.0302820205688477,grad_norm: 0.9999994734994391, iteration: 131614
loss: 1.0285555124282837,grad_norm: 0.6825715797751487, iteration: 131615
loss: 1.020113229751587,grad_norm: 0.7956878646731688, iteration: 131616
loss: 1.0089088678359985,grad_norm: 0.99999971648897, iteration: 131617
loss: 0.9942989945411682,grad_norm: 0.9999991534099442, iteration: 131618
loss: 1.0858689546585083,grad_norm: 0.9999992061666533, iteration: 131619
loss: 0.9984856247901917,grad_norm: 0.9999993444086696, iteration: 131620
loss: 1.012910008430481,grad_norm: 0.9999993143018098, iteration: 131621
loss: 0.9941207766532898,grad_norm: 0.9565838327128403, iteration: 131622
loss: 1.0283235311508179,grad_norm: 0.8676007908428803, iteration: 131623
loss: 0.9994531273841858,grad_norm: 0.9999998902435406, iteration: 131624
loss: 1.007499098777771,grad_norm: 0.9999994086178048, iteration: 131625
loss: 1.0106284618377686,grad_norm: 0.9999990621030127, iteration: 131626
loss: 1.018028974533081,grad_norm: 0.9999991652639384, iteration: 131627
loss: 1.0299501419067383,grad_norm: 0.8527257421530957, iteration: 131628
loss: 1.0573209524154663,grad_norm: 0.9999999393504693, iteration: 131629
loss: 0.9940100312232971,grad_norm: 0.9999993601229148, iteration: 131630
loss: 1.0051493644714355,grad_norm: 0.673650495406088, iteration: 131631
loss: 0.9705984592437744,grad_norm: 0.8526752515954344, iteration: 131632
loss: 1.0310614109039307,grad_norm: 0.9999993608069706, iteration: 131633
loss: 1.01846444606781,grad_norm: 0.9465750452972669, iteration: 131634
loss: 0.9914655685424805,grad_norm: 0.9436399333092619, iteration: 131635
loss: 1.0596144199371338,grad_norm: 0.8728799628337586, iteration: 131636
loss: 1.058285117149353,grad_norm: 0.9999991962398588, iteration: 131637
loss: 0.9708574414253235,grad_norm: 0.8987441164528756, iteration: 131638
loss: 0.9620038270950317,grad_norm: 0.8125253838106941, iteration: 131639
loss: 1.0083699226379395,grad_norm: 0.9999993774224686, iteration: 131640
loss: 1.0448626279830933,grad_norm: 0.9999998468476446, iteration: 131641
loss: 1.056464672088623,grad_norm: 0.9999994689087003, iteration: 131642
loss: 1.0894677639007568,grad_norm: 0.9999993705677086, iteration: 131643
loss: 1.0318888425827026,grad_norm: 0.9999992525057495, iteration: 131644
loss: 1.0341339111328125,grad_norm: 0.9704805199386936, iteration: 131645
loss: 1.0664793252944946,grad_norm: 0.9999993371729013, iteration: 131646
loss: 0.9692120552062988,grad_norm: 0.9185289644747782, iteration: 131647
loss: 1.1535571813583374,grad_norm: 0.9999992036861288, iteration: 131648
loss: 0.9753673672676086,grad_norm: 0.7882094751899671, iteration: 131649
loss: 1.0273650884628296,grad_norm: 0.9999997275517627, iteration: 131650
loss: 0.9857602715492249,grad_norm: 0.9999995313364379, iteration: 131651
loss: 0.9968770146369934,grad_norm: 0.8509513616045, iteration: 131652
loss: 1.0891296863555908,grad_norm: 0.9376505590419987, iteration: 131653
loss: 1.0879170894622803,grad_norm: 0.999999589151187, iteration: 131654
loss: 1.0030243396759033,grad_norm: 0.9231060593457152, iteration: 131655
loss: 1.0307886600494385,grad_norm: 0.9999997088243244, iteration: 131656
loss: 0.9877179861068726,grad_norm: 0.9999992511213349, iteration: 131657
loss: 1.0777555704116821,grad_norm: 0.9999998008216162, iteration: 131658
loss: 1.009911298751831,grad_norm: 0.965103489816461, iteration: 131659
loss: 1.0123389959335327,grad_norm: 0.9999991916197833, iteration: 131660
loss: 1.0255645513534546,grad_norm: 0.9999990348420882, iteration: 131661
loss: 1.0748203992843628,grad_norm: 0.9999992852847497, iteration: 131662
loss: 1.0904475450515747,grad_norm: 0.9999996816614726, iteration: 131663
loss: 0.9944997429847717,grad_norm: 0.9999989653360768, iteration: 131664
loss: 1.3375978469848633,grad_norm: 0.9999998259487476, iteration: 131665
loss: 0.9810774326324463,grad_norm: 0.8221209591621264, iteration: 131666
loss: 1.007849097251892,grad_norm: 0.8173497894731705, iteration: 131667
loss: 1.028357744216919,grad_norm: 0.9999997243668451, iteration: 131668
loss: 1.0318764448165894,grad_norm: 0.9999992285097115, iteration: 131669
loss: 1.042781949043274,grad_norm: 0.999999128022227, iteration: 131670
loss: 1.0094937086105347,grad_norm: 0.9999991258402693, iteration: 131671
loss: 0.9647639989852905,grad_norm: 0.9999997862789629, iteration: 131672
loss: 1.064140796661377,grad_norm: 0.9999995598494826, iteration: 131673
loss: 1.0260411500930786,grad_norm: 0.9999991242095473, iteration: 131674
loss: 1.00125253200531,grad_norm: 0.8594843165170276, iteration: 131675
loss: 1.0345715284347534,grad_norm: 0.999999470834374, iteration: 131676
loss: 1.0089242458343506,grad_norm: 0.8656692692949098, iteration: 131677
loss: 1.1163828372955322,grad_norm: 0.9999999550485889, iteration: 131678
loss: 1.1870648860931396,grad_norm: 0.9999994961715796, iteration: 131679
loss: 1.05527925491333,grad_norm: 0.9999995414337276, iteration: 131680
loss: 1.0805259943008423,grad_norm: 0.9999999425640884, iteration: 131681
loss: 1.0125577449798584,grad_norm: 0.9999998200039123, iteration: 131682
loss: 1.0086913108825684,grad_norm: 0.9999993735733094, iteration: 131683
loss: 1.0684967041015625,grad_norm: 0.9999994362180373, iteration: 131684
loss: 1.1061168909072876,grad_norm: 0.9999993507932515, iteration: 131685
loss: 1.0767282247543335,grad_norm: 0.8672493603243129, iteration: 131686
loss: 1.0243501663208008,grad_norm: 0.9999991365504103, iteration: 131687
loss: 0.99835205078125,grad_norm: 0.9999990719457849, iteration: 131688
loss: 1.0068342685699463,grad_norm: 0.9712494610202745, iteration: 131689
loss: 1.006887435913086,grad_norm: 0.8630644975750338, iteration: 131690
loss: 0.9721400737762451,grad_norm: 0.8499920036307624, iteration: 131691
loss: 1.0158603191375732,grad_norm: 0.9999991814584615, iteration: 131692
loss: 1.012444019317627,grad_norm: 0.9630164785451139, iteration: 131693
loss: 1.005679726600647,grad_norm: 0.8022082312198572, iteration: 131694
loss: 1.1773061752319336,grad_norm: 0.9999992256438712, iteration: 131695
loss: 1.0205368995666504,grad_norm: 0.9297892642507507, iteration: 131696
loss: 1.0370161533355713,grad_norm: 0.9999998796405565, iteration: 131697
loss: 1.0242162942886353,grad_norm: 0.9919881501631957, iteration: 131698
loss: 1.0216742753982544,grad_norm: 0.9999998696246136, iteration: 131699
loss: 1.0847100019454956,grad_norm: 0.9999998608814128, iteration: 131700
loss: 1.0107693672180176,grad_norm: 0.8503784895953471, iteration: 131701
loss: 1.0673432350158691,grad_norm: 0.9999994136424921, iteration: 131702
loss: 1.1473302841186523,grad_norm: 0.9999999715525985, iteration: 131703
loss: 1.026150107383728,grad_norm: 0.9999992441863623, iteration: 131704
loss: 1.2570823431015015,grad_norm: 0.9999998058291605, iteration: 131705
loss: 1.0398507118225098,grad_norm: 0.9902159455813785, iteration: 131706
loss: 1.0294725894927979,grad_norm: 0.9488461264609912, iteration: 131707
loss: 0.9897637963294983,grad_norm: 0.9030633724814451, iteration: 131708
loss: 1.0584166049957275,grad_norm: 0.9999997449831401, iteration: 131709
loss: 1.0436266660690308,grad_norm: 0.906778114887562, iteration: 131710
loss: 0.968570351600647,grad_norm: 0.9999990950031833, iteration: 131711
loss: 1.0709946155548096,grad_norm: 0.9999995430401668, iteration: 131712
loss: 0.970483124256134,grad_norm: 0.8809294872621833, iteration: 131713
loss: 1.0236968994140625,grad_norm: 0.8491553191754234, iteration: 131714
loss: 1.0788809061050415,grad_norm: 0.9999998695608459, iteration: 131715
loss: 1.0158178806304932,grad_norm: 0.9999994240637888, iteration: 131716
loss: 1.0078977346420288,grad_norm: 0.9999995291131225, iteration: 131717
loss: 1.088809609413147,grad_norm: 0.9999993151307393, iteration: 131718
loss: 1.0105098485946655,grad_norm: 0.9760995949782473, iteration: 131719
loss: 1.041344404220581,grad_norm: 0.9999993141944042, iteration: 131720
loss: 0.9914758801460266,grad_norm: 0.8287732540591383, iteration: 131721
loss: 1.0234624147415161,grad_norm: 0.7890501409137545, iteration: 131722
loss: 1.0212252140045166,grad_norm: 0.9999990075145238, iteration: 131723
loss: 1.0145082473754883,grad_norm: 0.9999999483228837, iteration: 131724
loss: 1.010269045829773,grad_norm: 0.7456228189301042, iteration: 131725
loss: 0.9945018291473389,grad_norm: 0.831902814950558, iteration: 131726
loss: 1.0355416536331177,grad_norm: 0.9999990783121777, iteration: 131727
loss: 0.9749683141708374,grad_norm: 0.9911017407442961, iteration: 131728
loss: 0.9889667630195618,grad_norm: 0.8459071875282369, iteration: 131729
loss: 1.034205436706543,grad_norm: 0.9999989230443884, iteration: 131730
loss: 0.9546366930007935,grad_norm: 0.9999994374445013, iteration: 131731
loss: 1.0698736906051636,grad_norm: 0.9999998247370306, iteration: 131732
loss: 1.0661072731018066,grad_norm: 0.9940831650206056, iteration: 131733
loss: 0.9869828224182129,grad_norm: 0.9073297633227358, iteration: 131734
loss: 0.9748501181602478,grad_norm: 0.9385135795286706, iteration: 131735
loss: 1.0451792478561401,grad_norm: 0.9999991476700278, iteration: 131736
loss: 0.998614490032196,grad_norm: 0.9126002778975555, iteration: 131737
loss: 1.0302003622055054,grad_norm: 0.9999996449662105, iteration: 131738
loss: 0.9990590214729309,grad_norm: 0.8408733280333227, iteration: 131739
loss: 1.006174087524414,grad_norm: 0.8507207210516815, iteration: 131740
loss: 0.9947019815444946,grad_norm: 0.8164217691204555, iteration: 131741
loss: 1.0109087228775024,grad_norm: 0.9999994003906038, iteration: 131742
loss: 0.953998327255249,grad_norm: 0.8933853575400293, iteration: 131743
loss: 1.0398457050323486,grad_norm: 0.970905734302731, iteration: 131744
loss: 1.0268735885620117,grad_norm: 0.999999214110564, iteration: 131745
loss: 0.9912159442901611,grad_norm: 0.9970402148537364, iteration: 131746
loss: 1.0026813745498657,grad_norm: 0.8324419896531999, iteration: 131747
loss: 1.0245776176452637,grad_norm: 0.99999942622749, iteration: 131748
loss: 0.9951392412185669,grad_norm: 0.8981756594287583, iteration: 131749
loss: 1.065788984298706,grad_norm: 0.9999998945842805, iteration: 131750
loss: 0.9920279383659363,grad_norm: 0.88681293842382, iteration: 131751
loss: 0.9938395619392395,grad_norm: 0.9028163387142724, iteration: 131752
loss: 0.9955082535743713,grad_norm: 0.854273373240326, iteration: 131753
loss: 0.9803179502487183,grad_norm: 0.9965402469955168, iteration: 131754
loss: 1.0171449184417725,grad_norm: 0.8597573716583284, iteration: 131755
loss: 0.9686400890350342,grad_norm: 0.923891645390822, iteration: 131756
loss: 1.0711203813552856,grad_norm: 0.999999308367653, iteration: 131757
loss: 1.1503924131393433,grad_norm: 0.9999995220332974, iteration: 131758
loss: 1.0458801984786987,grad_norm: 0.98442666865976, iteration: 131759
loss: 0.9564536809921265,grad_norm: 0.9748915063113845, iteration: 131760
loss: 1.0578914880752563,grad_norm: 0.9448441875945561, iteration: 131761
loss: 1.0079643726348877,grad_norm: 0.9531226558308249, iteration: 131762
loss: 1.0225664377212524,grad_norm: 0.9145551136938656, iteration: 131763
loss: 1.0231226682662964,grad_norm: 0.9426612325775953, iteration: 131764
loss: 1.0396952629089355,grad_norm: 0.999999393362288, iteration: 131765
loss: 0.9988858699798584,grad_norm: 0.8447216633223074, iteration: 131766
loss: 1.0119410753250122,grad_norm: 0.9010564260191948, iteration: 131767
loss: 0.974437415599823,grad_norm: 0.8951217221983085, iteration: 131768
loss: 1.0210245847702026,grad_norm: 0.9999993199602035, iteration: 131769
loss: 1.0163519382476807,grad_norm: 0.9163491000611579, iteration: 131770
loss: 1.1990222930908203,grad_norm: 0.9999996672951257, iteration: 131771
loss: 1.0157854557037354,grad_norm: 0.9999991296052907, iteration: 131772
loss: 1.0723114013671875,grad_norm: 0.9999992163390931, iteration: 131773
loss: 1.0021989345550537,grad_norm: 0.7831728103892838, iteration: 131774
loss: 1.0590771436691284,grad_norm: 0.9999990603842576, iteration: 131775
loss: 1.1909544467926025,grad_norm: 0.9999992035615426, iteration: 131776
loss: 0.9993228316307068,grad_norm: 0.9589974956748313, iteration: 131777
loss: 1.0151535272598267,grad_norm: 0.874668832293914, iteration: 131778
loss: 1.0563515424728394,grad_norm: 0.9999992757059306, iteration: 131779
loss: 1.1103689670562744,grad_norm: 0.9999996442257364, iteration: 131780
loss: 0.9880077242851257,grad_norm: 0.9145166791650178, iteration: 131781
loss: 1.026092767715454,grad_norm: 0.8284378556906865, iteration: 131782
loss: 1.0565978288650513,grad_norm: 0.9999999127692738, iteration: 131783
loss: 0.9973951578140259,grad_norm: 0.99999918691521, iteration: 131784
loss: 0.9770936965942383,grad_norm: 0.8472713135510822, iteration: 131785
loss: 0.9794724583625793,grad_norm: 0.8060109021017408, iteration: 131786
loss: 1.0054959058761597,grad_norm: 0.6430266969823265, iteration: 131787
loss: 1.1381090879440308,grad_norm: 0.9999997344964178, iteration: 131788
loss: 1.035513997077942,grad_norm: 1.0000000567988303, iteration: 131789
loss: 1.0269948244094849,grad_norm: 0.9999990535272423, iteration: 131790
loss: 0.9754618406295776,grad_norm: 0.8673371451157783, iteration: 131791
loss: 1.104492425918579,grad_norm: 0.9999997058542918, iteration: 131792
loss: 0.9933408498764038,grad_norm: 0.9507418905284288, iteration: 131793
loss: 1.0657182931900024,grad_norm: 0.9999994367316951, iteration: 131794
loss: 1.0230921506881714,grad_norm: 0.9521894236387664, iteration: 131795
loss: 1.0113542079925537,grad_norm: 0.8926824181225802, iteration: 131796
loss: 1.0070338249206543,grad_norm: 0.9166647890143852, iteration: 131797
loss: 1.1978013515472412,grad_norm: 0.9999999572579255, iteration: 131798
loss: 1.1524279117584229,grad_norm: 0.9999997086378288, iteration: 131799
loss: 1.0359959602355957,grad_norm: 0.9999997533472866, iteration: 131800
loss: 1.0444893836975098,grad_norm: 0.9999992110321946, iteration: 131801
loss: 1.0377198457717896,grad_norm: 0.9999994277446571, iteration: 131802
loss: 1.014253854751587,grad_norm: 0.9999992186367623, iteration: 131803
loss: 1.2286845445632935,grad_norm: 0.999999945259512, iteration: 131804
loss: 0.9873541593551636,grad_norm: 0.9624258116559474, iteration: 131805
loss: 0.9705564975738525,grad_norm: 0.9881884452170073, iteration: 131806
loss: 0.9673917889595032,grad_norm: 0.8547911144276674, iteration: 131807
loss: 1.1312758922576904,grad_norm: 0.9999999290496292, iteration: 131808
loss: 1.044968843460083,grad_norm: 0.9999993086168864, iteration: 131809
loss: 1.0072611570358276,grad_norm: 0.9999998428325548, iteration: 131810
loss: 1.0314480066299438,grad_norm: 0.999999082991359, iteration: 131811
loss: 1.2091851234436035,grad_norm: 0.9999999127912963, iteration: 131812
loss: 1.0151487588882446,grad_norm: 0.9672699664839637, iteration: 131813
loss: 1.042475700378418,grad_norm: 0.917400094490016, iteration: 131814
loss: 1.0115532875061035,grad_norm: 0.9999992844808511, iteration: 131815
loss: 1.0986223220825195,grad_norm: 0.9999999092567095, iteration: 131816
loss: 1.0308223962783813,grad_norm: 0.9999996116383089, iteration: 131817
loss: 1.0885660648345947,grad_norm: 0.999999737942866, iteration: 131818
loss: 1.0011259317398071,grad_norm: 0.8634979198340531, iteration: 131819
loss: 1.114028811454773,grad_norm: 0.9999995763922181, iteration: 131820
loss: 1.0096315145492554,grad_norm: 0.9999991106047602, iteration: 131821
loss: 1.034866213798523,grad_norm: 0.9999991406180552, iteration: 131822
loss: 1.223925232887268,grad_norm: 0.9999993059971218, iteration: 131823
loss: 1.0583255290985107,grad_norm: 0.9999998774557215, iteration: 131824
loss: 1.0294125080108643,grad_norm: 0.9089888288831306, iteration: 131825
loss: 0.9929917454719543,grad_norm: 1.000000055603457, iteration: 131826
loss: 1.0162873268127441,grad_norm: 0.9999993602660336, iteration: 131827
loss: 0.9993130564689636,grad_norm: 0.9999999107292695, iteration: 131828
loss: 1.0356993675231934,grad_norm: 0.9999995731427642, iteration: 131829
loss: 1.0450711250305176,grad_norm: 0.9999990709293459, iteration: 131830
loss: 1.0071032047271729,grad_norm: 0.9106657672566274, iteration: 131831
loss: 1.0194169282913208,grad_norm: 0.7828796917512645, iteration: 131832
loss: 1.0308971405029297,grad_norm: 0.9999996526675075, iteration: 131833
loss: 1.0784368515014648,grad_norm: 0.9999999124379391, iteration: 131834
loss: 1.0828993320465088,grad_norm: 0.9999993361274203, iteration: 131835
loss: 1.120619297027588,grad_norm: 0.9999993447402559, iteration: 131836
loss: 1.0169730186462402,grad_norm: 0.9999991544120507, iteration: 131837
loss: 1.0368659496307373,grad_norm: 0.9999993673113122, iteration: 131838
loss: 1.0206935405731201,grad_norm: 0.9999994829074873, iteration: 131839
loss: 1.04660165309906,grad_norm: 0.9999999732656816, iteration: 131840
loss: 1.015371322631836,grad_norm: 0.9938857906922558, iteration: 131841
loss: 1.0108388662338257,grad_norm: 0.9999996551841358, iteration: 131842
loss: 1.0326910018920898,grad_norm: 0.9999995532105376, iteration: 131843
loss: 1.069556713104248,grad_norm: 0.9999997601476609, iteration: 131844
loss: 0.9606248736381531,grad_norm: 0.8211449069679452, iteration: 131845
loss: 1.0316319465637207,grad_norm: 0.6393613419020826, iteration: 131846
loss: 1.02297842502594,grad_norm: 0.9999998238411759, iteration: 131847
loss: 1.018449306488037,grad_norm: 0.9999990897810731, iteration: 131848
loss: 1.0486021041870117,grad_norm: 0.9999997022737191, iteration: 131849
loss: 0.9916022419929504,grad_norm: 0.8213787937134664, iteration: 131850
loss: 1.0291633605957031,grad_norm: 0.9999991123448086, iteration: 131851
loss: 1.0882951021194458,grad_norm: 0.9999994132245849, iteration: 131852
loss: 1.013380765914917,grad_norm: 0.9999994690978232, iteration: 131853
loss: 0.9862750768661499,grad_norm: 0.8746595154715339, iteration: 131854
loss: 1.0128761529922485,grad_norm: 0.8123679858252888, iteration: 131855
loss: 0.9972777366638184,grad_norm: 0.9999995196603125, iteration: 131856
loss: 1.1663206815719604,grad_norm: 0.999999656263766, iteration: 131857
loss: 1.022691249847412,grad_norm: 0.8569344722949007, iteration: 131858
loss: 1.0018914937973022,grad_norm: 0.8710670640544436, iteration: 131859
loss: 1.116015076637268,grad_norm: 0.9999996260735311, iteration: 131860
loss: 1.026795744895935,grad_norm: 0.9999992624141811, iteration: 131861
loss: 1.0677176713943481,grad_norm: 0.9999992449965008, iteration: 131862
loss: 1.0198906660079956,grad_norm: 0.921506095785514, iteration: 131863
loss: 0.9598644375801086,grad_norm: 0.9999997202341873, iteration: 131864
loss: 0.9962827563285828,grad_norm: 0.9999991141176332, iteration: 131865
loss: 1.0602668523788452,grad_norm: 0.999999150418735, iteration: 131866
loss: 1.1654117107391357,grad_norm: 1.0000000060114496, iteration: 131867
loss: 0.993097186088562,grad_norm: 0.848932482828701, iteration: 131868
loss: 1.113102674484253,grad_norm: 0.9999997404271366, iteration: 131869
loss: 0.9966192841529846,grad_norm: 0.9999994502299163, iteration: 131870
loss: 1.038240909576416,grad_norm: 0.9999990744817523, iteration: 131871
loss: 1.057996392250061,grad_norm: 0.9999997891025297, iteration: 131872
loss: 1.019144058227539,grad_norm: 0.9999993115480981, iteration: 131873
loss: 1.0170542001724243,grad_norm: 0.7372315518421532, iteration: 131874
loss: 0.9525937438011169,grad_norm: 0.8598743238722719, iteration: 131875
loss: 1.0714255571365356,grad_norm: 0.9999994984527698, iteration: 131876
loss: 1.0056583881378174,grad_norm: 0.8424975207834483, iteration: 131877
loss: 1.0644645690917969,grad_norm: 0.9315894826679094, iteration: 131878
loss: 1.0118554830551147,grad_norm: 0.8908246465238883, iteration: 131879
loss: 1.025749921798706,grad_norm: 0.9999996635247484, iteration: 131880
loss: 0.9942618608474731,grad_norm: 0.880399085133325, iteration: 131881
loss: 1.06759512424469,grad_norm: 0.9999993282059393, iteration: 131882
loss: 0.990241289138794,grad_norm: 0.9133665035753894, iteration: 131883
loss: 0.9961995482444763,grad_norm: 0.979867672023333, iteration: 131884
loss: 1.0366328954696655,grad_norm: 0.8343131410304014, iteration: 131885
loss: 1.0471405982971191,grad_norm: 0.8971057338620089, iteration: 131886
loss: 1.0755298137664795,grad_norm: 0.9999994199036066, iteration: 131887
loss: 1.0330175161361694,grad_norm: 0.9955316447312876, iteration: 131888
loss: 1.0076526403427124,grad_norm: 0.8208884083540334, iteration: 131889
loss: 1.0048365592956543,grad_norm: 0.9705263039265684, iteration: 131890
loss: 1.080019474029541,grad_norm: 0.9999993148897734, iteration: 131891
loss: 0.9760823845863342,grad_norm: 0.869970754922452, iteration: 131892
loss: 0.9695289731025696,grad_norm: 0.8907059849145859, iteration: 131893
loss: 1.0162347555160522,grad_norm: 0.9999991995263132, iteration: 131894
loss: 0.9838064312934875,grad_norm: 0.8546756880893329, iteration: 131895
loss: 1.0161011219024658,grad_norm: 0.9999990654188233, iteration: 131896
loss: 1.0639419555664062,grad_norm: 0.9999992745414551, iteration: 131897
loss: 1.1368762254714966,grad_norm: 0.9999997104767046, iteration: 131898
loss: 1.0055636167526245,grad_norm: 0.9999991827409095, iteration: 131899
loss: 1.0046194791793823,grad_norm: 0.9557336819865194, iteration: 131900
loss: 1.0713167190551758,grad_norm: 0.9999993163839489, iteration: 131901
loss: 0.9948806762695312,grad_norm: 0.7714922135103799, iteration: 131902
loss: 0.9888648390769958,grad_norm: 0.8774175485009091, iteration: 131903
loss: 1.0846943855285645,grad_norm: 0.9999992186506232, iteration: 131904
loss: 1.0195845365524292,grad_norm: 0.93267352913215, iteration: 131905
loss: 1.0078845024108887,grad_norm: 0.9999992178104339, iteration: 131906
loss: 1.1422169208526611,grad_norm: 0.9999993162522738, iteration: 131907
loss: 1.0620156526565552,grad_norm: 0.9804435287065, iteration: 131908
loss: 1.1621623039245605,grad_norm: 0.999999665417651, iteration: 131909
loss: 1.0816792249679565,grad_norm: 0.9999990399667221, iteration: 131910
loss: 1.0545012950897217,grad_norm: 0.9999991379842281, iteration: 131911
loss: 1.0347657203674316,grad_norm: 0.9999991246915709, iteration: 131912
loss: 1.0749260187149048,grad_norm: 0.7551759975114094, iteration: 131913
loss: 1.0226277112960815,grad_norm: 0.9999991478916409, iteration: 131914
loss: 1.0636296272277832,grad_norm: 0.874208414153322, iteration: 131915
loss: 0.9672233462333679,grad_norm: 0.9579718163367685, iteration: 131916
loss: 1.0220426321029663,grad_norm: 0.7303623334902772, iteration: 131917
loss: 1.012896180152893,grad_norm: 0.9999992112585625, iteration: 131918
loss: 1.0099172592163086,grad_norm: 0.8305289896251695, iteration: 131919
loss: 1.034184217453003,grad_norm: 0.9999997270272902, iteration: 131920
loss: 1.0180429220199585,grad_norm: 0.9890412217328421, iteration: 131921
loss: 0.9914096593856812,grad_norm: 0.9567122682263977, iteration: 131922
loss: 1.047890305519104,grad_norm: 0.9999990386830202, iteration: 131923
loss: 0.9932929873466492,grad_norm: 0.999999190344835, iteration: 131924
loss: 1.1375885009765625,grad_norm: 0.9999997123619335, iteration: 131925
loss: 1.0233627557754517,grad_norm: 0.8575266404083535, iteration: 131926
loss: 1.1084383726119995,grad_norm: 0.8738083302372452, iteration: 131927
loss: 0.9994701147079468,grad_norm: 0.778867463729769, iteration: 131928
loss: 1.043628215789795,grad_norm: 0.944624611249943, iteration: 131929
loss: 0.9951987266540527,grad_norm: 0.9043912946102447, iteration: 131930
loss: 1.000083088874817,grad_norm: 0.8134140201769496, iteration: 131931
loss: 1.0689560174942017,grad_norm: 0.9999999446196123, iteration: 131932
loss: 0.9990838170051575,grad_norm: 0.9853475049265119, iteration: 131933
loss: 0.9826375842094421,grad_norm: 0.875089110002111, iteration: 131934
loss: 0.9641082286834717,grad_norm: 0.7693335877947859, iteration: 131935
loss: 1.0313457250595093,grad_norm: 0.815130910103992, iteration: 131936
loss: 1.0700739622116089,grad_norm: 0.9999995580584206, iteration: 131937
loss: 1.0134841203689575,grad_norm: 0.9356900658663881, iteration: 131938
loss: 1.0664299726486206,grad_norm: 0.999999383532711, iteration: 131939
loss: 1.0158240795135498,grad_norm: 0.9667446877717918, iteration: 131940
loss: 1.0195612907409668,grad_norm: 0.7886213723557264, iteration: 131941
loss: 1.046515941619873,grad_norm: 0.9999998025450145, iteration: 131942
loss: 1.0629419088363647,grad_norm: 0.9097647451028263, iteration: 131943
loss: 0.9887890219688416,grad_norm: 0.9999992892420526, iteration: 131944
loss: 1.151287317276001,grad_norm: 0.9999994577197908, iteration: 131945
loss: 1.0385806560516357,grad_norm: 0.9999997106455238, iteration: 131946
loss: 0.9418379664421082,grad_norm: 0.8875326882339135, iteration: 131947
loss: 0.9742016196250916,grad_norm: 0.8553574104173018, iteration: 131948
loss: 1.0809656381607056,grad_norm: 0.944479978890776, iteration: 131949
loss: 1.0163363218307495,grad_norm: 0.976707358944501, iteration: 131950
loss: 1.0224909782409668,grad_norm: 0.9999991844958513, iteration: 131951
loss: 1.0227677822113037,grad_norm: 0.9999992631963349, iteration: 131952
loss: 0.9804850816726685,grad_norm: 0.9999990281689655, iteration: 131953
loss: 0.9817289113998413,grad_norm: 0.9504868611973548, iteration: 131954
loss: 0.9908992648124695,grad_norm: 0.9083452350265113, iteration: 131955
loss: 1.0035042762756348,grad_norm: 0.8348381386070597, iteration: 131956
loss: 0.988158106803894,grad_norm: 0.9182960938493923, iteration: 131957
loss: 0.9328703284263611,grad_norm: 0.923511602630246, iteration: 131958
loss: 1.0180439949035645,grad_norm: 0.9999989297473925, iteration: 131959
loss: 1.0607080459594727,grad_norm: 0.9999996619314381, iteration: 131960
loss: 1.0033241510391235,grad_norm: 0.9999999519482571, iteration: 131961
loss: 1.0993319749832153,grad_norm: 0.995909923196767, iteration: 131962
loss: 1.1141663789749146,grad_norm: 1.0000000207611146, iteration: 131963
loss: 1.0780723094940186,grad_norm: 0.9999990477456058, iteration: 131964
loss: 1.0916651487350464,grad_norm: 0.9999996723543816, iteration: 131965
loss: 1.0863114595413208,grad_norm: 0.9999990399128226, iteration: 131966
loss: 1.0960643291473389,grad_norm: 0.9999994283160664, iteration: 131967
loss: 1.0594403743743896,grad_norm: 0.8424128440531459, iteration: 131968
loss: 1.0209615230560303,grad_norm: 0.999999184228519, iteration: 131969
loss: 1.1785873174667358,grad_norm: 0.999999873037544, iteration: 131970
loss: 1.0147954225540161,grad_norm: 0.8493533082248411, iteration: 131971
loss: 1.0253286361694336,grad_norm: 0.8293413057062459, iteration: 131972
loss: 1.0355807542800903,grad_norm: 0.9999999892325492, iteration: 131973
loss: 0.9738340973854065,grad_norm: 0.860983672457868, iteration: 131974
loss: 1.1447174549102783,grad_norm: 0.9999990201941298, iteration: 131975
loss: 1.0014241933822632,grad_norm: 0.8501683766173339, iteration: 131976
loss: 1.03132164478302,grad_norm: 0.9999993665167111, iteration: 131977
loss: 1.0124902725219727,grad_norm: 0.9193715671222398, iteration: 131978
loss: 0.9790365695953369,grad_norm: 0.8294241120878518, iteration: 131979
loss: 0.9956114292144775,grad_norm: 0.9999995426452604, iteration: 131980
loss: 0.9987687468528748,grad_norm: 0.9999992843231006, iteration: 131981
loss: 0.991819441318512,grad_norm: 0.9999991404090557, iteration: 131982
loss: 0.996090292930603,grad_norm: 0.8784920148119517, iteration: 131983
loss: 1.1732131242752075,grad_norm: 0.9999996699584923, iteration: 131984
loss: 1.025020718574524,grad_norm: 0.8957301515151231, iteration: 131985
loss: 1.0732483863830566,grad_norm: 0.9999994498077565, iteration: 131986
loss: 1.0600299835205078,grad_norm: 0.9999995321014342, iteration: 131987
loss: 1.0042170286178589,grad_norm: 0.8816437615746647, iteration: 131988
loss: 1.043694019317627,grad_norm: 0.999999134133483, iteration: 131989
loss: 1.042437195777893,grad_norm: 0.9999991969769669, iteration: 131990
loss: 1.0417033433914185,grad_norm: 0.9999994293416167, iteration: 131991
loss: 1.0537360906600952,grad_norm: 0.9999999214254113, iteration: 131992
loss: 1.014998435974121,grad_norm: 0.8587255845652875, iteration: 131993
loss: 0.989285945892334,grad_norm: 0.9999998301953954, iteration: 131994
loss: 0.9784651398658752,grad_norm: 0.9999992138668442, iteration: 131995
loss: 0.9806843400001526,grad_norm: 0.8732470246994444, iteration: 131996
loss: 1.0791245698928833,grad_norm: 0.999999759677307, iteration: 131997
loss: 1.0673431158065796,grad_norm: 0.9999994496236918, iteration: 131998
loss: 1.021316647529602,grad_norm: 0.7144042839200013, iteration: 131999
loss: 0.9983484148979187,grad_norm: 0.8335064354976871, iteration: 132000
loss: 1.0320872068405151,grad_norm: 0.9999994794688781, iteration: 132001
loss: 1.0320451259613037,grad_norm: 0.9999996989780713, iteration: 132002
loss: 0.9928473830223083,grad_norm: 0.8244274472079128, iteration: 132003
loss: 1.0342655181884766,grad_norm: 0.9779343219695324, iteration: 132004
loss: 1.0054733753204346,grad_norm: 0.8708283627167301, iteration: 132005
loss: 0.9855269193649292,grad_norm: 0.8766405312232366, iteration: 132006
loss: 1.021948218345642,grad_norm: 0.9999991904906289, iteration: 132007
loss: 0.9896423816680908,grad_norm: 0.9437593459123217, iteration: 132008
loss: 1.0430406332015991,grad_norm: 0.9999992693647475, iteration: 132009
loss: 1.0000648498535156,grad_norm: 0.7715252669628524, iteration: 132010
loss: 1.0685089826583862,grad_norm: 0.9999997132908727, iteration: 132011
loss: 1.0270435810089111,grad_norm: 0.9769324704884408, iteration: 132012
loss: 1.047874927520752,grad_norm: 0.9999994116897951, iteration: 132013
loss: 1.0066680908203125,grad_norm: 0.9999993317736011, iteration: 132014
loss: 1.045331597328186,grad_norm: 0.999999363860393, iteration: 132015
loss: 0.999572217464447,grad_norm: 0.7916084173253781, iteration: 132016
loss: 1.2263103723526,grad_norm: 0.9999997017191059, iteration: 132017
loss: 1.069946050643921,grad_norm: 0.8053299519523742, iteration: 132018
loss: 1.1311427354812622,grad_norm: 0.99999916331121, iteration: 132019
loss: 1.027259349822998,grad_norm: 0.9999996924119865, iteration: 132020
loss: 1.0058262348175049,grad_norm: 0.9330613132410988, iteration: 132021
loss: 1.0247498750686646,grad_norm: 0.999999211718521, iteration: 132022
loss: 1.0545791387557983,grad_norm: 0.9999996233703651, iteration: 132023
loss: 0.9757365584373474,grad_norm: 0.8862073709090493, iteration: 132024
loss: 1.1208115816116333,grad_norm: 0.9999993813174947, iteration: 132025
loss: 1.0454862117767334,grad_norm: 0.9999992163812064, iteration: 132026
loss: 1.0058187246322632,grad_norm: 0.999999747762975, iteration: 132027
loss: 1.040522575378418,grad_norm: 0.9999995021330391, iteration: 132028
loss: 0.9907653331756592,grad_norm: 0.999999612860852, iteration: 132029
loss: 0.9745602011680603,grad_norm: 0.9110537878184909, iteration: 132030
loss: 1.0047607421875,grad_norm: 0.9095041147354928, iteration: 132031
loss: 1.0294668674468994,grad_norm: 1.0000000439922363, iteration: 132032
loss: 1.0701571702957153,grad_norm: 0.9999990369171037, iteration: 132033
loss: 1.0053775310516357,grad_norm: 0.9999993483810108, iteration: 132034
loss: 0.9880435466766357,grad_norm: 0.9767475362014855, iteration: 132035
loss: 1.0255939960479736,grad_norm: 0.9999991941062045, iteration: 132036
loss: 0.9976711273193359,grad_norm: 0.9999990836418403, iteration: 132037
loss: 1.030112385749817,grad_norm: 0.9999997881641458, iteration: 132038
loss: 1.011644721031189,grad_norm: 0.919549102348954, iteration: 132039
loss: 1.0374237298965454,grad_norm: 0.999999244155325, iteration: 132040
loss: 1.0191386938095093,grad_norm: 0.9137314121671786, iteration: 132041
loss: 1.0057570934295654,grad_norm: 0.8449207455571112, iteration: 132042
loss: 1.0066349506378174,grad_norm: 0.9999992568708964, iteration: 132043
loss: 1.0086454153060913,grad_norm: 0.9999990979650601, iteration: 132044
loss: 1.0154119729995728,grad_norm: 0.8437870320837265, iteration: 132045
loss: 1.0445700883865356,grad_norm: 0.9290264820576932, iteration: 132046
loss: 1.1030336618423462,grad_norm: 0.99999989090931, iteration: 132047
loss: 0.9763389825820923,grad_norm: 0.9999990137665913, iteration: 132048
loss: 1.0521076917648315,grad_norm: 0.7505803390249917, iteration: 132049
loss: 1.0590412616729736,grad_norm: 0.9999994618666495, iteration: 132050
loss: 1.0237948894500732,grad_norm: 0.9999991373291474, iteration: 132051
loss: 0.9536073803901672,grad_norm: 0.8860259946711564, iteration: 132052
loss: 1.0515779256820679,grad_norm: 0.9999991879121984, iteration: 132053
loss: 0.9636142253875732,grad_norm: 0.9676542216341261, iteration: 132054
loss: 1.0316588878631592,grad_norm: 0.9999999717070438, iteration: 132055
loss: 0.9942362308502197,grad_norm: 0.9999993298250366, iteration: 132056
loss: 1.0185604095458984,grad_norm: 0.9999995485335661, iteration: 132057
loss: 1.0189855098724365,grad_norm: 0.9999995384304398, iteration: 132058
loss: 1.0608599185943604,grad_norm: 0.8488776395498727, iteration: 132059
loss: 1.0312557220458984,grad_norm: 0.9449797367596251, iteration: 132060
loss: 0.9819968342781067,grad_norm: 0.8536799041880176, iteration: 132061
loss: 1.0354615449905396,grad_norm: 0.9999991757193322, iteration: 132062
loss: 0.9986171126365662,grad_norm: 0.9215496597086572, iteration: 132063
loss: 0.9657945036888123,grad_norm: 0.9999989816050003, iteration: 132064
loss: 0.9962073564529419,grad_norm: 0.797512923527582, iteration: 132065
loss: 1.0548219680786133,grad_norm: 0.9010767649233782, iteration: 132066
loss: 0.9930245280265808,grad_norm: 0.9999990233072277, iteration: 132067
loss: 1.0253453254699707,grad_norm: 0.9999991325020182, iteration: 132068
loss: 0.9907649159431458,grad_norm: 0.9363076860590419, iteration: 132069
loss: 0.9869540929794312,grad_norm: 0.958894594900891, iteration: 132070
loss: 1.0707398653030396,grad_norm: 0.9999991212415237, iteration: 132071
loss: 1.0564265251159668,grad_norm: 0.9999994121591113, iteration: 132072
loss: 1.0145697593688965,grad_norm: 0.8173561041975849, iteration: 132073
loss: 1.0007284879684448,grad_norm: 0.999999225544961, iteration: 132074
loss: 1.066030502319336,grad_norm: 0.9999995190814978, iteration: 132075
loss: 1.0084084272384644,grad_norm: 0.7655548580812196, iteration: 132076
loss: 0.9758128523826599,grad_norm: 0.9851713718405428, iteration: 132077
loss: 0.9929095506668091,grad_norm: 0.9999991803035808, iteration: 132078
loss: 0.986686646938324,grad_norm: 0.870467325959613, iteration: 132079
loss: 1.0235681533813477,grad_norm: 0.9999992395402223, iteration: 132080
loss: 0.9857466816902161,grad_norm: 0.8794874893031845, iteration: 132081
loss: 0.9921725392341614,grad_norm: 0.9875154448982888, iteration: 132082
loss: 1.0001661777496338,grad_norm: 0.8703442195080547, iteration: 132083
loss: 1.0629140138626099,grad_norm: 0.9999999576366493, iteration: 132084
loss: 1.0780997276306152,grad_norm: 0.9999997594369893, iteration: 132085
loss: 0.9728856682777405,grad_norm: 0.9648337059442293, iteration: 132086
loss: 1.0209206342697144,grad_norm: 0.9999989936422101, iteration: 132087
loss: 1.0766621828079224,grad_norm: 0.8852698749801882, iteration: 132088
loss: 1.003322958946228,grad_norm: 0.7882754128583146, iteration: 132089
loss: 0.9771239161491394,grad_norm: 0.9999991103728454, iteration: 132090
loss: 1.0010944604873657,grad_norm: 0.8705227618543999, iteration: 132091
loss: 1.045886516571045,grad_norm: 0.999999033186783, iteration: 132092
loss: 1.0803834199905396,grad_norm: 0.8587371118929072, iteration: 132093
loss: 1.0550671815872192,grad_norm: 0.9999995452778248, iteration: 132094
loss: 0.9871508479118347,grad_norm: 0.9184284122516561, iteration: 132095
loss: 0.9939914345741272,grad_norm: 0.9394405084984898, iteration: 132096
loss: 1.0085583925247192,grad_norm: 0.999999360394735, iteration: 132097
loss: 0.966596245765686,grad_norm: 0.9999990923346838, iteration: 132098
loss: 0.9963987469673157,grad_norm: 0.9886572941582801, iteration: 132099
loss: 1.0320113897323608,grad_norm: 0.999999875473097, iteration: 132100
loss: 1.0105178356170654,grad_norm: 0.8982889923916807, iteration: 132101
loss: 0.9791328310966492,grad_norm: 0.8785626088875731, iteration: 132102
loss: 1.0843688249588013,grad_norm: 0.9999997036648143, iteration: 132103
loss: 0.9873227477073669,grad_norm: 0.8077323908441636, iteration: 132104
loss: 1.0074814558029175,grad_norm: 0.992705703959562, iteration: 132105
loss: 1.013944387435913,grad_norm: 0.9999990043543034, iteration: 132106
loss: 1.019914984703064,grad_norm: 0.9556933404962663, iteration: 132107
loss: 0.9918400645256042,grad_norm: 0.8662343244425692, iteration: 132108
loss: 0.9864265322685242,grad_norm: 0.8136280914356986, iteration: 132109
loss: 1.03581702709198,grad_norm: 0.87226522761073, iteration: 132110
loss: 0.9756359457969666,grad_norm: 0.9999993615708453, iteration: 132111
loss: 1.0452377796173096,grad_norm: 0.9999992271136795, iteration: 132112
loss: 1.024592399597168,grad_norm: 0.791338946064426, iteration: 132113
loss: 0.9986141324043274,grad_norm: 0.8152483168396157, iteration: 132114
loss: 1.0549474954605103,grad_norm: 0.999999188357052, iteration: 132115
loss: 1.075423240661621,grad_norm: 0.999999755768918, iteration: 132116
loss: 1.05876624584198,grad_norm: 0.9999994860540911, iteration: 132117
loss: 0.9783250689506531,grad_norm: 0.843263181335064, iteration: 132118
loss: 1.0594671964645386,grad_norm: 0.9999997500744818, iteration: 132119
loss: 1.0239455699920654,grad_norm: 0.9999993656471957, iteration: 132120
loss: 1.0235296487808228,grad_norm: 0.9999994569609714, iteration: 132121
loss: 0.9891919493675232,grad_norm: 0.9999993390020777, iteration: 132122
loss: 0.9932284355163574,grad_norm: 0.813443359251678, iteration: 132123
loss: 1.0664589405059814,grad_norm: 0.9999997120057935, iteration: 132124
loss: 1.0332516431808472,grad_norm: 0.9999994348183464, iteration: 132125
loss: 1.0156937837600708,grad_norm: 0.9999998983194336, iteration: 132126
loss: 0.9904260039329529,grad_norm: 0.9999989957260026, iteration: 132127
loss: 1.0812472105026245,grad_norm: 0.9999994348079538, iteration: 132128
loss: 1.054457187652588,grad_norm: 0.9999998295204707, iteration: 132129
loss: 1.09059739112854,grad_norm: 0.9999994815548034, iteration: 132130
loss: 0.9878816604614258,grad_norm: 0.9840714031586553, iteration: 132131
loss: 1.0221526622772217,grad_norm: 0.9999990869038177, iteration: 132132
loss: 1.0811495780944824,grad_norm: 0.9999991461350224, iteration: 132133
loss: 1.0497093200683594,grad_norm: 0.9999998746859524, iteration: 132134
loss: 1.051443099975586,grad_norm: 0.999999197103344, iteration: 132135
loss: 1.0032325983047485,grad_norm: 0.8633043492161422, iteration: 132136
loss: 1.054893136024475,grad_norm: 0.9999995597946024, iteration: 132137
loss: 1.01718008518219,grad_norm: 0.9999993968026571, iteration: 132138
loss: 1.044042706489563,grad_norm: 0.8989013278709683, iteration: 132139
loss: 0.9807662963867188,grad_norm: 0.9999991728664109, iteration: 132140
loss: 0.9829332828521729,grad_norm: 0.9999991399864181, iteration: 132141
loss: 1.067846417427063,grad_norm: 0.9999989914164146, iteration: 132142
loss: 1.0025166273117065,grad_norm: 0.9999995222432734, iteration: 132143
loss: 1.0743050575256348,grad_norm: 0.9999990471994131, iteration: 132144
loss: 1.0137206315994263,grad_norm: 0.8588290086237607, iteration: 132145
loss: 1.0747519731521606,grad_norm: 0.9999993561835289, iteration: 132146
loss: 1.0251859426498413,grad_norm: 0.9999993659434291, iteration: 132147
loss: 1.0192357301712036,grad_norm: 0.8795971480055493, iteration: 132148
loss: 1.0308970212936401,grad_norm: 0.8053985627055816, iteration: 132149
loss: 1.0237754583358765,grad_norm: 0.8853047800964186, iteration: 132150
loss: 0.9860458374023438,grad_norm: 0.8644967332420165, iteration: 132151
loss: 1.1146172285079956,grad_norm: 0.9999994716833643, iteration: 132152
loss: 0.9896523952484131,grad_norm: 0.7889431453703412, iteration: 132153
loss: 0.9713518023490906,grad_norm: 0.8232969896960198, iteration: 132154
loss: 1.0036346912384033,grad_norm: 0.9752077297350311, iteration: 132155
loss: 1.013600468635559,grad_norm: 0.7223734912684833, iteration: 132156
loss: 1.0057706832885742,grad_norm: 0.7861652419019861, iteration: 132157
loss: 0.992188036441803,grad_norm: 0.9999996634076014, iteration: 132158
loss: 1.134255051612854,grad_norm: 0.835826261669319, iteration: 132159
loss: 1.0804234743118286,grad_norm: 0.9999994506362538, iteration: 132160
loss: 1.0517462491989136,grad_norm: 0.9999996625777248, iteration: 132161
loss: 1.024365782737732,grad_norm: 0.9999994279600911, iteration: 132162
loss: 0.9993528723716736,grad_norm: 0.893465433845431, iteration: 132163
loss: 1.0241109132766724,grad_norm: 0.9999996230990481, iteration: 132164
loss: 1.0026167631149292,grad_norm: 0.8297234315667756, iteration: 132165
loss: 0.9773315191268921,grad_norm: 0.8096723965807597, iteration: 132166
loss: 0.968267023563385,grad_norm: 0.8316773408070004, iteration: 132167
loss: 1.009265422821045,grad_norm: 0.9999996214727288, iteration: 132168
loss: 0.9789519309997559,grad_norm: 0.8779303713649388, iteration: 132169
loss: 1.1788743734359741,grad_norm: 0.9999999372170895, iteration: 132170
loss: 1.0055574178695679,grad_norm: 0.9999997313466538, iteration: 132171
loss: 0.9850090146064758,grad_norm: 0.9999992195454199, iteration: 132172
loss: 1.2397761344909668,grad_norm: 0.9999993301419534, iteration: 132173
loss: 1.0354816913604736,grad_norm: 0.9999994982137457, iteration: 132174
loss: 1.0851948261260986,grad_norm: 0.9999996424389654, iteration: 132175
loss: 1.1235359907150269,grad_norm: 0.9999995997312537, iteration: 132176
loss: 1.0540595054626465,grad_norm: 0.9999995529456368, iteration: 132177
loss: 1.050359845161438,grad_norm: 0.9999997629994998, iteration: 132178
loss: 0.9911710619926453,grad_norm: 0.9999992301338049, iteration: 132179
loss: 1.1802493333816528,grad_norm: 0.9999996854511809, iteration: 132180
loss: 1.064116358757019,grad_norm: 0.9999994418936133, iteration: 132181
loss: 0.9771850109100342,grad_norm: 0.9999992937831618, iteration: 132182
loss: 1.0450977087020874,grad_norm: 0.9939727022883121, iteration: 132183
loss: 0.9769901037216187,grad_norm: 0.7066206623682438, iteration: 132184
loss: 0.9726292490959167,grad_norm: 0.9464228347304511, iteration: 132185
loss: 1.049730658531189,grad_norm: 0.9999991520013638, iteration: 132186
loss: 1.0950464010238647,grad_norm: 0.999999145999363, iteration: 132187
loss: 1.0108808279037476,grad_norm: 0.9999991105716859, iteration: 132188
loss: 1.018281102180481,grad_norm: 0.9999992550262274, iteration: 132189
loss: 1.0212599039077759,grad_norm: 0.881036112909867, iteration: 132190
loss: 1.0034664869308472,grad_norm: 0.9999991226328376, iteration: 132191
loss: 1.0426368713378906,grad_norm: 0.9999993761350163, iteration: 132192
loss: 1.147373080253601,grad_norm: 0.999999772641075, iteration: 132193
loss: 1.0057597160339355,grad_norm: 0.9999990883579117, iteration: 132194
loss: 1.1164392232894897,grad_norm: 0.999999467661966, iteration: 132195
loss: 1.0171643495559692,grad_norm: 0.9999999940603151, iteration: 132196
loss: 1.0514206886291504,grad_norm: 0.9999994817901805, iteration: 132197
loss: 1.1579654216766357,grad_norm: 1.0000000015331922, iteration: 132198
loss: 1.0182868242263794,grad_norm: 0.9999992895002531, iteration: 132199
loss: 1.0375036001205444,grad_norm: 0.9999992760357941, iteration: 132200
loss: 1.1382982730865479,grad_norm: 1.0000000074314976, iteration: 132201
loss: 1.0682282447814941,grad_norm: 0.9999990467773093, iteration: 132202
loss: 0.9977161884307861,grad_norm: 0.7506494291679556, iteration: 132203
loss: 1.0177305936813354,grad_norm: 0.9999991230417007, iteration: 132204
loss: 1.4195412397384644,grad_norm: 1.0000000736922192, iteration: 132205
loss: 1.0805940628051758,grad_norm: 0.9999998494781799, iteration: 132206
loss: 1.1285090446472168,grad_norm: 0.9999992986514227, iteration: 132207
loss: 1.0780315399169922,grad_norm: 0.9999993418432176, iteration: 132208
loss: 1.007574439048767,grad_norm: 0.9999992286129733, iteration: 132209
loss: 1.0359677076339722,grad_norm: 0.9999993383520411, iteration: 132210
loss: 0.9874569177627563,grad_norm: 0.8629321913950764, iteration: 132211
loss: 1.0184255838394165,grad_norm: 0.999999231610255, iteration: 132212
loss: 1.0279145240783691,grad_norm: 0.9999999121414629, iteration: 132213
loss: 1.0367432832717896,grad_norm: 0.9320416445391487, iteration: 132214
loss: 1.0353397130966187,grad_norm: 0.8829007015679579, iteration: 132215
loss: 1.0322498083114624,grad_norm: 0.7584485414989857, iteration: 132216
loss: 1.066430926322937,grad_norm: 0.9999991342328525, iteration: 132217
loss: 1.0385552644729614,grad_norm: 0.9174068144941698, iteration: 132218
loss: 0.9937600493431091,grad_norm: 0.9999990034793985, iteration: 132219
loss: 1.1191847324371338,grad_norm: 0.9999994890960702, iteration: 132220
loss: 1.0097795724868774,grad_norm: 0.9999991090638736, iteration: 132221
loss: 1.0002341270446777,grad_norm: 0.8758943880858914, iteration: 132222
loss: 1.0092188119888306,grad_norm: 0.8553679505785889, iteration: 132223
loss: 0.9734736084938049,grad_norm: 0.9999991799410634, iteration: 132224
loss: 1.0517919063568115,grad_norm: 0.8536448078941974, iteration: 132225
loss: 1.0286258459091187,grad_norm: 0.9999998719178683, iteration: 132226
loss: 0.9924378991127014,grad_norm: 0.9486490621326505, iteration: 132227
loss: 1.0657850503921509,grad_norm: 0.9999990511670587, iteration: 132228
loss: 1.02197265625,grad_norm: 0.9999997229158596, iteration: 132229
loss: 1.1756126880645752,grad_norm: 0.9999997233668232, iteration: 132230
loss: 1.1394189596176147,grad_norm: 0.9999997864717949, iteration: 132231
loss: 1.079357385635376,grad_norm: 0.9999997053042264, iteration: 132232
loss: 0.9938920140266418,grad_norm: 0.9999991489052644, iteration: 132233
loss: 0.9965806603431702,grad_norm: 0.9999992323381028, iteration: 132234
loss: 1.0532065629959106,grad_norm: 0.9999997313774963, iteration: 132235
loss: 0.9835715889930725,grad_norm: 0.7542436258828046, iteration: 132236
loss: 0.9956235885620117,grad_norm: 0.9830820333587206, iteration: 132237
loss: 1.1090508699417114,grad_norm: 0.9999998628445583, iteration: 132238
loss: 1.059564471244812,grad_norm: 0.9999998059930197, iteration: 132239
loss: 1.0075757503509521,grad_norm: 0.9545815567646193, iteration: 132240
loss: 1.1380645036697388,grad_norm: 0.9999996895838005, iteration: 132241
loss: 1.0022423267364502,grad_norm: 0.9363209881223203, iteration: 132242
loss: 1.1706194877624512,grad_norm: 0.9999998646291429, iteration: 132243
loss: 1.1266517639160156,grad_norm: 0.9999998204446849, iteration: 132244
loss: 1.0073559284210205,grad_norm: 0.9999992098608251, iteration: 132245
loss: 1.101884126663208,grad_norm: 0.9999992792300043, iteration: 132246
loss: 1.1673312187194824,grad_norm: 0.9999996780380204, iteration: 132247
loss: 1.1672266721725464,grad_norm: 0.9999994819764513, iteration: 132248
loss: 1.1206414699554443,grad_norm: 0.9999994750997793, iteration: 132249
loss: 0.9771820902824402,grad_norm: 0.9999992404495406, iteration: 132250
loss: 1.0807042121887207,grad_norm: 0.9999999983034769, iteration: 132251
loss: 1.0782725811004639,grad_norm: 0.9999991151349702, iteration: 132252
loss: 1.033286690711975,grad_norm: 0.9126845367992495, iteration: 132253
loss: 1.0601491928100586,grad_norm: 0.9999994032341938, iteration: 132254
loss: 1.0819189548492432,grad_norm: 0.8194584955390487, iteration: 132255
loss: 0.9854879379272461,grad_norm: 0.8473993191302713, iteration: 132256
loss: 1.1354542970657349,grad_norm: 0.9999999132602032, iteration: 132257
loss: 1.053290605545044,grad_norm: 0.9999992228249247, iteration: 132258
loss: 1.0212150812149048,grad_norm: 0.9999999114891142, iteration: 132259
loss: 1.2116286754608154,grad_norm: 1.0000000503347963, iteration: 132260
loss: 1.1693027019500732,grad_norm: 0.999999960192785, iteration: 132261
loss: 1.041522741317749,grad_norm: 0.9999990889265592, iteration: 132262
loss: 1.1459827423095703,grad_norm: 0.99999956875099, iteration: 132263
loss: 1.0575180053710938,grad_norm: 0.8745285454517001, iteration: 132264
loss: 1.1743648052215576,grad_norm: 1.0000000077512365, iteration: 132265
loss: 1.2317076921463013,grad_norm: 0.9999999072990462, iteration: 132266
loss: 1.0283411741256714,grad_norm: 0.9999989434850727, iteration: 132267
loss: 1.2337149381637573,grad_norm: 0.9999993784669315, iteration: 132268
loss: 1.2534312009811401,grad_norm: 0.9999995479254861, iteration: 132269
loss: 1.0711078643798828,grad_norm: 0.999999095393911, iteration: 132270
loss: 1.1283434629440308,grad_norm: 0.9999998254095788, iteration: 132271
loss: 1.071826696395874,grad_norm: 0.9926365332819115, iteration: 132272
loss: 1.1856826543807983,grad_norm: 0.9999992089958457, iteration: 132273
loss: 1.1139189004898071,grad_norm: 0.9999998089767209, iteration: 132274
loss: 1.118752121925354,grad_norm: 0.9999989239249575, iteration: 132275
loss: 1.0255529880523682,grad_norm: 0.9999994008699632, iteration: 132276
loss: 1.1138672828674316,grad_norm: 0.9999999661719429, iteration: 132277
loss: 1.188193678855896,grad_norm: 0.999999638438118, iteration: 132278
loss: 1.1865534782409668,grad_norm: 0.9999995595650661, iteration: 132279
loss: 1.1070826053619385,grad_norm: 0.9999998497707353, iteration: 132280
loss: 1.0984257459640503,grad_norm: 0.9999998540018497, iteration: 132281
loss: 1.0751320123672485,grad_norm: 0.999999489571656, iteration: 132282
loss: 1.3889081478118896,grad_norm: 0.9999996155643249, iteration: 132283
loss: 1.1960766315460205,grad_norm: 0.9999997821668503, iteration: 132284
loss: 1.028173565864563,grad_norm: 0.9589609766906851, iteration: 132285
loss: 1.451553225517273,grad_norm: 0.99999975058117, iteration: 132286
loss: 1.0069822072982788,grad_norm: 0.9999995813544573, iteration: 132287
loss: 1.031280517578125,grad_norm: 0.8486944658981241, iteration: 132288
loss: 1.0653574466705322,grad_norm: 0.9384698638328148, iteration: 132289
loss: 1.1866559982299805,grad_norm: 0.99999926146857, iteration: 132290
loss: 1.0789766311645508,grad_norm: 0.9999997598742526, iteration: 132291
loss: 1.3526816368103027,grad_norm: 0.9999998971030866, iteration: 132292
loss: 1.0172725915908813,grad_norm: 0.999999117980567, iteration: 132293
loss: 1.0155086517333984,grad_norm: 0.9949314640391783, iteration: 132294
loss: 1.0776615142822266,grad_norm: 0.9999996723589025, iteration: 132295
loss: 1.0549342632293701,grad_norm: 0.9418349768899874, iteration: 132296
loss: 1.0006375312805176,grad_norm: 0.9342910903453634, iteration: 132297
loss: 1.21640944480896,grad_norm: 0.9999997594363256, iteration: 132298
loss: 1.0595048666000366,grad_norm: 0.9999997415954627, iteration: 132299
loss: 1.0419538021087646,grad_norm: 0.8582571679950922, iteration: 132300
loss: 1.0394495725631714,grad_norm: 0.9999994713186382, iteration: 132301
loss: 1.1926478147506714,grad_norm: 0.9999997591541818, iteration: 132302
loss: 1.0732712745666504,grad_norm: 0.9999995729945903, iteration: 132303
loss: 1.0052777528762817,grad_norm: 0.9999992962004285, iteration: 132304
loss: 1.1000102758407593,grad_norm: 0.9999997382232003, iteration: 132305
loss: 1.100274682044983,grad_norm: 0.9999994666529387, iteration: 132306
loss: 1.0566810369491577,grad_norm: 0.9999993801564566, iteration: 132307
loss: 1.163916826248169,grad_norm: 0.9999993817677861, iteration: 132308
loss: 1.3317228555679321,grad_norm: 0.9999995933085514, iteration: 132309
loss: 1.047425389289856,grad_norm: 0.9999996163442022, iteration: 132310
loss: 1.0314555168151855,grad_norm: 0.9652127185676999, iteration: 132311
loss: 1.237107515335083,grad_norm: 0.9999999247531409, iteration: 132312
loss: 1.0405018329620361,grad_norm: 0.947331976891843, iteration: 132313
loss: 1.047215223312378,grad_norm: 0.999999067508346, iteration: 132314
loss: 1.0933811664581299,grad_norm: 0.981012923700579, iteration: 132315
loss: 1.2752665281295776,grad_norm: 0.9999998212695247, iteration: 132316
loss: 1.0579745769500732,grad_norm: 0.9999991411217309, iteration: 132317
loss: 1.1167240142822266,grad_norm: 0.8580836446480199, iteration: 132318
loss: 0.9935750365257263,grad_norm: 0.9999991163039029, iteration: 132319
loss: 1.0795966386795044,grad_norm: 0.9999992773808908, iteration: 132320
loss: 1.110904335975647,grad_norm: 0.9327226774121913, iteration: 132321
loss: 1.0700594186782837,grad_norm: 0.9114068381397364, iteration: 132322
loss: 1.045269250869751,grad_norm: 0.9685667687091234, iteration: 132323
loss: 1.0983935594558716,grad_norm: 0.9999996134289185, iteration: 132324
loss: 0.9953796863555908,grad_norm: 0.8494195006741108, iteration: 132325
loss: 1.186257243156433,grad_norm: 0.9999997549064528, iteration: 132326
loss: 1.0666476488113403,grad_norm: 0.9999990203740854, iteration: 132327
loss: 1.0297584533691406,grad_norm: 0.9572580588849466, iteration: 132328
loss: 0.9753450155258179,grad_norm: 0.9999997038691129, iteration: 132329
loss: 1.033212423324585,grad_norm: 0.9999991675599087, iteration: 132330
loss: 1.0069254636764526,grad_norm: 0.9999990729559038, iteration: 132331
loss: 0.9643619656562805,grad_norm: 0.999999291932564, iteration: 132332
loss: 1.0653232336044312,grad_norm: 0.9999990812914344, iteration: 132333
loss: 1.0513004064559937,grad_norm: 0.9117389156322385, iteration: 132334
loss: 0.9959467649459839,grad_norm: 0.7725856013220582, iteration: 132335
loss: 0.9802790880203247,grad_norm: 0.9999991976547195, iteration: 132336
loss: 1.069333791732788,grad_norm: 0.9999993288911656, iteration: 132337
loss: 1.0117714405059814,grad_norm: 0.9999993073108243, iteration: 132338
loss: 0.9858413338661194,grad_norm: 0.9462683860673547, iteration: 132339
loss: 1.0388970375061035,grad_norm: 0.8975954595600518, iteration: 132340
loss: 1.1238707304000854,grad_norm: 0.9999991985022644, iteration: 132341
loss: 1.053083896636963,grad_norm: 0.8824017642833518, iteration: 132342
loss: 0.9890520572662354,grad_norm: 0.8604542877787261, iteration: 132343
loss: 0.9894307255744934,grad_norm: 0.9972920488782383, iteration: 132344
loss: 0.9856480360031128,grad_norm: 0.999999098414909, iteration: 132345
loss: 0.9987767338752747,grad_norm: 0.9947732877210228, iteration: 132346
loss: 1.0257982015609741,grad_norm: 0.9999993906803353, iteration: 132347
loss: 1.0228523015975952,grad_norm: 0.9999992627890529, iteration: 132348
loss: 1.0618398189544678,grad_norm: 0.9999992892626178, iteration: 132349
loss: 1.0415430068969727,grad_norm: 0.9999990576365029, iteration: 132350
loss: 1.0760304927825928,grad_norm: 0.9999991864974257, iteration: 132351
loss: 1.1190900802612305,grad_norm: 0.8959673309315138, iteration: 132352
loss: 1.045196294784546,grad_norm: 0.999999174071827, iteration: 132353
loss: 1.0198240280151367,grad_norm: 1.0000000331144812, iteration: 132354
loss: 1.0342766046524048,grad_norm: 0.966164514842546, iteration: 132355
loss: 1.0513943433761597,grad_norm: 0.9375801336508756, iteration: 132356
loss: 0.9937645792961121,grad_norm: 0.9999996146462332, iteration: 132357
loss: 1.046717643737793,grad_norm: 0.9999993937722155, iteration: 132358
loss: 1.0329488515853882,grad_norm: 0.9582526543330988, iteration: 132359
loss: 1.2222083806991577,grad_norm: 1.000000023489494, iteration: 132360
loss: 1.0602165460586548,grad_norm: 0.9005526324143701, iteration: 132361
loss: 1.022801160812378,grad_norm: 0.9999993144871547, iteration: 132362
loss: 1.0976301431655884,grad_norm: 0.9999993432254515, iteration: 132363
loss: 1.033789873123169,grad_norm: 0.9657612134327436, iteration: 132364
loss: 1.0768389701843262,grad_norm: 0.9555393260666478, iteration: 132365
loss: 1.1300464868545532,grad_norm: 0.9650227348854924, iteration: 132366
loss: 1.0901925563812256,grad_norm: 0.9999996999971931, iteration: 132367
loss: 0.9882530570030212,grad_norm: 0.8663157223500056, iteration: 132368
loss: 1.0167286396026611,grad_norm: 0.9999991241188858, iteration: 132369
loss: 1.0282055139541626,grad_norm: 0.9999990600255937, iteration: 132370
loss: 0.9861826300621033,grad_norm: 0.9999992712131082, iteration: 132371
loss: 1.0248552560806274,grad_norm: 0.8682966068621497, iteration: 132372
loss: 0.95839524269104,grad_norm: 0.9825188209519273, iteration: 132373
loss: 1.129044532775879,grad_norm: 0.9999999476694447, iteration: 132374
loss: 1.0611404180526733,grad_norm: 0.9824679243647331, iteration: 132375
loss: 0.9829719066619873,grad_norm: 0.9258587620388007, iteration: 132376
loss: 1.0591330528259277,grad_norm: 0.9999992309344473, iteration: 132377
loss: 1.0559358596801758,grad_norm: 0.9114692895937082, iteration: 132378
loss: 1.0763298273086548,grad_norm: 0.9999991278996384, iteration: 132379
loss: 1.0265690088272095,grad_norm: 0.7851069354838348, iteration: 132380
loss: 1.0766931772232056,grad_norm: 0.9999998760761792, iteration: 132381
loss: 1.0089417695999146,grad_norm: 0.9052932614480597, iteration: 132382
loss: 1.0577102899551392,grad_norm: 0.9999994416196436, iteration: 132383
loss: 1.044196605682373,grad_norm: 0.9999991457570911, iteration: 132384
loss: 1.144982933998108,grad_norm: 0.9999995540047566, iteration: 132385
loss: 1.001621127128601,grad_norm: 0.872314668357078, iteration: 132386
loss: 1.0533397197723389,grad_norm: 0.999999264578676, iteration: 132387
loss: 0.9772322773933411,grad_norm: 0.8761734633972761, iteration: 132388
loss: 1.0404667854309082,grad_norm: 0.9603088993935359, iteration: 132389
loss: 1.0505858659744263,grad_norm: 0.7932331498331184, iteration: 132390
loss: 1.0699855089187622,grad_norm: 0.9999991409628785, iteration: 132391
loss: 1.0914461612701416,grad_norm: 0.9999993646968465, iteration: 132392
loss: 1.0735645294189453,grad_norm: 0.9601810996993213, iteration: 132393
loss: 0.9734961986541748,grad_norm: 0.8936712609355745, iteration: 132394
loss: 1.02237868309021,grad_norm: 0.9999994615779458, iteration: 132395
loss: 0.9965219497680664,grad_norm: 0.8780189601133968, iteration: 132396
loss: 1.045363426208496,grad_norm: 0.9999996657432327, iteration: 132397
loss: 1.0387110710144043,grad_norm: 0.9999994604262761, iteration: 132398
loss: 1.0099871158599854,grad_norm: 0.9999996618864916, iteration: 132399
loss: 1.0187265872955322,grad_norm: 0.9999998061922919, iteration: 132400
loss: 1.071014642715454,grad_norm: 0.9999998803723551, iteration: 132401
loss: 0.9909386038780212,grad_norm: 0.8526203511442515, iteration: 132402
loss: 1.0062031745910645,grad_norm: 0.980497864852145, iteration: 132403
loss: 1.0342460870742798,grad_norm: 0.7493212759545532, iteration: 132404
loss: 1.2252016067504883,grad_norm: 0.9999996691681361, iteration: 132405
loss: 0.9775629639625549,grad_norm: 0.9161654618844824, iteration: 132406
loss: 1.0517348051071167,grad_norm: 0.9999992539803988, iteration: 132407
loss: 1.0063576698303223,grad_norm: 0.9999991556274155, iteration: 132408
loss: 1.0199388265609741,grad_norm: 0.8775930867292677, iteration: 132409
loss: 1.0147924423217773,grad_norm: 0.9473114869129382, iteration: 132410
loss: 1.0169602632522583,grad_norm: 0.8545153184688474, iteration: 132411
loss: 1.0254377126693726,grad_norm: 0.9999998069315461, iteration: 132412
loss: 1.020910382270813,grad_norm: 0.99999916886656, iteration: 132413
loss: 1.048424243927002,grad_norm: 0.9656803219263006, iteration: 132414
loss: 1.0356446504592896,grad_norm: 0.9999998755587908, iteration: 132415
loss: 1.0577903985977173,grad_norm: 0.9999996459337255, iteration: 132416
loss: 1.0137633085250854,grad_norm: 0.9999991870870183, iteration: 132417
loss: 0.977750301361084,grad_norm: 0.8940415033968655, iteration: 132418
loss: 0.9891705513000488,grad_norm: 0.9170919542516027, iteration: 132419
loss: 1.0353518724441528,grad_norm: 0.7488907568978636, iteration: 132420
loss: 1.1158994436264038,grad_norm: 0.9999991086171328, iteration: 132421
loss: 1.049708604812622,grad_norm: 0.9108047607243859, iteration: 132422
loss: 0.9957593083381653,grad_norm: 0.9427893220965007, iteration: 132423
loss: 1.032637119293213,grad_norm: 0.950178707385776, iteration: 132424
loss: 0.9911222457885742,grad_norm: 0.9999996291098339, iteration: 132425
loss: 1.0137180089950562,grad_norm: 0.9999997596902466, iteration: 132426
loss: 1.0462217330932617,grad_norm: 0.9999996464938165, iteration: 132427
loss: 1.0464696884155273,grad_norm: 0.9999993905982597, iteration: 132428
loss: 1.1036792993545532,grad_norm: 0.9999996788033532, iteration: 132429
loss: 1.0066169500350952,grad_norm: 0.8617300853747497, iteration: 132430
loss: 1.1616625785827637,grad_norm: 0.9999992448440184, iteration: 132431
loss: 1.0220820903778076,grad_norm: 0.999999550289797, iteration: 132432
loss: 1.0198687314987183,grad_norm: 0.830176512412656, iteration: 132433
loss: 0.9974920153617859,grad_norm: 0.9169159375014204, iteration: 132434
loss: 1.0080780982971191,grad_norm: 0.9999997574821685, iteration: 132435
loss: 1.099660038948059,grad_norm: 0.9999999898532708, iteration: 132436
loss: 1.0081751346588135,grad_norm: 0.9999991848153841, iteration: 132437
loss: 0.9609293937683105,grad_norm: 0.9999992935647374, iteration: 132438
loss: 1.0147932767868042,grad_norm: 0.999999122105202, iteration: 132439
loss: 1.0433157682418823,grad_norm: 0.9200591055968771, iteration: 132440
loss: 1.0658899545669556,grad_norm: 0.9999991766508095, iteration: 132441
loss: 1.0197778940200806,grad_norm: 0.9999993187325346, iteration: 132442
loss: 0.9735126495361328,grad_norm: 0.811069748992191, iteration: 132443
loss: 1.0128209590911865,grad_norm: 0.8914823701508444, iteration: 132444
loss: 1.001086950302124,grad_norm: 0.9999992238668101, iteration: 132445
loss: 1.0001330375671387,grad_norm: 0.9999992259540265, iteration: 132446
loss: 1.0968027114868164,grad_norm: 0.9999998036394455, iteration: 132447
loss: 1.0126944780349731,grad_norm: 0.9058563619532572, iteration: 132448
loss: 1.075859785079956,grad_norm: 0.9999996231917261, iteration: 132449
loss: 1.0324838161468506,grad_norm: 0.9999996778445098, iteration: 132450
loss: 1.106930136680603,grad_norm: 0.9452342949733247, iteration: 132451
loss: 1.0542867183685303,grad_norm: 0.8708970624829515, iteration: 132452
loss: 1.0321825742721558,grad_norm: 0.9999992141175099, iteration: 132453
loss: 1.0184961557388306,grad_norm: 0.890343033959692, iteration: 132454
loss: 1.1782896518707275,grad_norm: 0.9445641127734896, iteration: 132455
loss: 1.1335442066192627,grad_norm: 0.9999999180419581, iteration: 132456
loss: 1.082001805305481,grad_norm: 0.9999998679500408, iteration: 132457
loss: 1.037277340888977,grad_norm: 0.9999995898066052, iteration: 132458
loss: 0.9669037461280823,grad_norm: 0.8026931081620267, iteration: 132459
loss: 1.0334035158157349,grad_norm: 0.9525932132508902, iteration: 132460
loss: 0.9822832345962524,grad_norm: 0.7593356468852269, iteration: 132461
loss: 1.044021487236023,grad_norm: 0.9586930587823381, iteration: 132462
loss: 1.0212430953979492,grad_norm: 0.9728685857210634, iteration: 132463
loss: 0.9946414828300476,grad_norm: 0.9455894311586905, iteration: 132464
loss: 1.0519053936004639,grad_norm: 0.9184385536697987, iteration: 132465
loss: 0.9736930131912231,grad_norm: 0.8053559671837645, iteration: 132466
loss: 1.02553129196167,grad_norm: 0.9999990793683677, iteration: 132467
loss: 1.0280191898345947,grad_norm: 0.9999990462638878, iteration: 132468
loss: 0.9866780638694763,grad_norm: 0.9678721953328373, iteration: 132469
loss: 1.0065709352493286,grad_norm: 0.8724338078633047, iteration: 132470
loss: 1.0431499481201172,grad_norm: 0.9999995593506045, iteration: 132471
loss: 1.0762163400650024,grad_norm: 0.9999994164328746, iteration: 132472
loss: 0.9867428541183472,grad_norm: 0.9999991434973536, iteration: 132473
loss: 0.993826150894165,grad_norm: 0.7751520250832148, iteration: 132474
loss: 0.9831749200820923,grad_norm: 0.9045960710074813, iteration: 132475
loss: 0.9512468576431274,grad_norm: 0.9999991877369738, iteration: 132476
loss: 1.170938491821289,grad_norm: 0.9999998694900515, iteration: 132477
loss: 1.0030826330184937,grad_norm: 0.9999999786094559, iteration: 132478
loss: 0.9976090788841248,grad_norm: 0.9999992047926858, iteration: 132479
loss: 1.0338375568389893,grad_norm: 0.9999998791712612, iteration: 132480
loss: 1.0581649541854858,grad_norm: 0.9999991978045877, iteration: 132481
loss: 1.0021928548812866,grad_norm: 0.883921569127733, iteration: 132482
loss: 1.0545603036880493,grad_norm: 0.9999995980473062, iteration: 132483
loss: 1.0266579389572144,grad_norm: 0.8192555205598497, iteration: 132484
loss: 1.0346605777740479,grad_norm: 0.999999614619071, iteration: 132485
loss: 1.0165315866470337,grad_norm: 0.9999993303970531, iteration: 132486
loss: 0.995046079158783,grad_norm: 0.8547065630573307, iteration: 132487
loss: 1.004288911819458,grad_norm: 0.9999992871487088, iteration: 132488
loss: 1.0881987810134888,grad_norm: 0.953127386929002, iteration: 132489
loss: 1.0235211849212646,grad_norm: 0.9999997746610605, iteration: 132490
loss: 0.9631505608558655,grad_norm: 0.8606196697573136, iteration: 132491
loss: 1.0256346464157104,grad_norm: 0.9999996823625962, iteration: 132492
loss: 0.9937371015548706,grad_norm: 0.7260093907105842, iteration: 132493
loss: 1.0393601655960083,grad_norm: 0.8551985862812604, iteration: 132494
loss: 1.010272741317749,grad_norm: 0.9999991122715624, iteration: 132495
loss: 0.9984107613563538,grad_norm: 0.9999992836040812, iteration: 132496
loss: 1.0181164741516113,grad_norm: 0.9999999401071112, iteration: 132497
loss: 1.0073570013046265,grad_norm: 0.9999991894697087, iteration: 132498
loss: 0.9909942150115967,grad_norm: 0.7841978059872674, iteration: 132499
loss: 0.9824140071868896,grad_norm: 0.999998965654475, iteration: 132500
loss: 1.041917324066162,grad_norm: 0.9999996007456093, iteration: 132501
loss: 1.017947793006897,grad_norm: 0.8465180397172889, iteration: 132502
loss: 1.0474194288253784,grad_norm: 0.999999107635379, iteration: 132503
loss: 0.9923333525657654,grad_norm: 0.8049888694276506, iteration: 132504
loss: 1.0133507251739502,grad_norm: 0.9999990772597114, iteration: 132505
loss: 0.9985124468803406,grad_norm: 0.9349234710092896, iteration: 132506
loss: 1.0239758491516113,grad_norm: 0.9999991401246666, iteration: 132507
loss: 0.9912664890289307,grad_norm: 0.8499770193365278, iteration: 132508
loss: 1.0382202863693237,grad_norm: 0.9999991447039289, iteration: 132509
loss: 1.028910756111145,grad_norm: 0.9999990962591048, iteration: 132510
loss: 0.9948682188987732,grad_norm: 0.818949731440593, iteration: 132511
loss: 1.0225913524627686,grad_norm: 0.7402036914565346, iteration: 132512
loss: 1.0292694568634033,grad_norm: 0.9999990855886268, iteration: 132513
loss: 0.9746586084365845,grad_norm: 0.9999990287890359, iteration: 132514
loss: 1.0355247259140015,grad_norm: 0.818240984007581, iteration: 132515
loss: 0.9765865206718445,grad_norm: 0.9238866888891691, iteration: 132516
loss: 0.9927996397018433,grad_norm: 0.8443732038768602, iteration: 132517
loss: 1.0105550289154053,grad_norm: 0.9251778260242547, iteration: 132518
loss: 1.0592955350875854,grad_norm: 0.9999995861009647, iteration: 132519
loss: 1.0654349327087402,grad_norm: 0.9999999577109795, iteration: 132520
loss: 0.9846287965774536,grad_norm: 0.8954869014931527, iteration: 132521
loss: 1.0437649488449097,grad_norm: 0.8464379615094059, iteration: 132522
loss: 0.9773243069648743,grad_norm: 0.8277154644419821, iteration: 132523
loss: 1.219275712966919,grad_norm: 0.9999999869603278, iteration: 132524
loss: 1.0268874168395996,grad_norm: 0.9999998907591194, iteration: 132525
loss: 1.0081571340560913,grad_norm: 0.9202402358833365, iteration: 132526
loss: 1.0238370895385742,grad_norm: 0.7892922462419258, iteration: 132527
loss: 1.008374810218811,grad_norm: 0.7699790062573879, iteration: 132528
loss: 1.0097228288650513,grad_norm: 0.9999990788337166, iteration: 132529
loss: 0.9857029914855957,grad_norm: 0.9900215399701375, iteration: 132530
loss: 1.0253815650939941,grad_norm: 0.7717417321679335, iteration: 132531
loss: 0.9692436456680298,grad_norm: 0.8802371806127324, iteration: 132532
loss: 1.0352364778518677,grad_norm: 0.9999989829264728, iteration: 132533
loss: 0.9736842513084412,grad_norm: 0.8629143084850334, iteration: 132534
loss: 0.9869210124015808,grad_norm: 0.9071077255754103, iteration: 132535
loss: 1.0750030279159546,grad_norm: 0.999999387528607, iteration: 132536
loss: 1.0308536291122437,grad_norm: 0.8275483688759614, iteration: 132537
loss: 0.9800426363945007,grad_norm: 0.8610407478284577, iteration: 132538
loss: 1.0065466165542603,grad_norm: 0.9436124165083997, iteration: 132539
loss: 0.9905062317848206,grad_norm: 0.7598684343024367, iteration: 132540
loss: 0.9778441786766052,grad_norm: 0.8863104643780254, iteration: 132541
loss: 0.9990204572677612,grad_norm: 0.9275034088115237, iteration: 132542
loss: 0.9974396228790283,grad_norm: 0.923226745111105, iteration: 132543
loss: 0.980665385723114,grad_norm: 0.9533166340335105, iteration: 132544
loss: 1.0222426652908325,grad_norm: 0.9999999257287129, iteration: 132545
loss: 0.9847105741500854,grad_norm: 0.9087210965623678, iteration: 132546
loss: 0.9609460234642029,grad_norm: 0.9999994660668043, iteration: 132547
loss: 1.0102025270462036,grad_norm: 0.8971480912319462, iteration: 132548
loss: 1.1473292112350464,grad_norm: 0.9268623868440333, iteration: 132549
loss: 1.087907314300537,grad_norm: 0.9999999758609067, iteration: 132550
loss: 1.108167290687561,grad_norm: 0.9999991237523597, iteration: 132551
loss: 1.1098737716674805,grad_norm: 0.9999994861598099, iteration: 132552
loss: 0.9652764797210693,grad_norm: 0.9541651360535675, iteration: 132553
loss: 1.0128289461135864,grad_norm: 0.8861315292680577, iteration: 132554
loss: 1.0444618463516235,grad_norm: 0.9874845461947229, iteration: 132555
loss: 1.0180617570877075,grad_norm: 0.9999989962416342, iteration: 132556
loss: 1.0008549690246582,grad_norm: 0.88779700727855, iteration: 132557
loss: 0.9874357581138611,grad_norm: 0.8423839911739466, iteration: 132558
loss: 0.9305312633514404,grad_norm: 0.9999990457170906, iteration: 132559
loss: 1.2009445428848267,grad_norm: 0.999999596902512, iteration: 132560
loss: 1.02069890499115,grad_norm: 0.9857024551954648, iteration: 132561
loss: 0.9836883544921875,grad_norm: 0.9999991821788676, iteration: 132562
loss: 1.0406638383865356,grad_norm: 0.9999995539034312, iteration: 132563
loss: 1.0123096704483032,grad_norm: 0.999999876622676, iteration: 132564
loss: 0.9991588592529297,grad_norm: 0.8143692719308678, iteration: 132565
loss: 1.0210239887237549,grad_norm: 0.9999992083011564, iteration: 132566
loss: 1.0932741165161133,grad_norm: 0.999999279892701, iteration: 132567
loss: 0.9868806600570679,grad_norm: 0.9709318177528125, iteration: 132568
loss: 1.1623371839523315,grad_norm: 0.9999999908214153, iteration: 132569
loss: 1.0780943632125854,grad_norm: 0.9999996535293062, iteration: 132570
loss: 1.0261142253875732,grad_norm: 0.972574920005915, iteration: 132571
loss: 0.9639207720756531,grad_norm: 0.8056084823379851, iteration: 132572
loss: 1.1263164281845093,grad_norm: 0.9999999204789998, iteration: 132573
loss: 1.1735410690307617,grad_norm: 0.9999995852155423, iteration: 132574
loss: 1.049106478691101,grad_norm: 0.9996591279333916, iteration: 132575
loss: 1.1462104320526123,grad_norm: 0.9999996743416939, iteration: 132576
loss: 1.0449084043502808,grad_norm: 0.999999811028659, iteration: 132577
loss: 1.0738065242767334,grad_norm: 0.9999993291791173, iteration: 132578
loss: 1.1412200927734375,grad_norm: 0.9999992439655204, iteration: 132579
loss: 1.1844273805618286,grad_norm: 1.000000008783888, iteration: 132580
loss: 1.0305622816085815,grad_norm: 0.9377812345772994, iteration: 132581
loss: 1.028481364250183,grad_norm: 0.9999991506784884, iteration: 132582
loss: 1.2330158948898315,grad_norm: 0.9999992462363113, iteration: 132583
loss: 0.992696225643158,grad_norm: 0.861754491717473, iteration: 132584
loss: 0.9946390986442566,grad_norm: 0.9343855980932244, iteration: 132585
loss: 1.0337305068969727,grad_norm: 0.9999996111189926, iteration: 132586
loss: 1.0076231956481934,grad_norm: 0.8557484040593939, iteration: 132587
loss: 1.0028995275497437,grad_norm: 0.7413871381292462, iteration: 132588
loss: 0.970294177532196,grad_norm: 0.8074693167988705, iteration: 132589
loss: 1.0465928316116333,grad_norm: 0.9999995686924887, iteration: 132590
loss: 0.996574878692627,grad_norm: 0.8464456155886498, iteration: 132591
loss: 1.0189772844314575,grad_norm: 0.9999992148386716, iteration: 132592
loss: 1.00654137134552,grad_norm: 0.8474989560707791, iteration: 132593
loss: 0.947555422782898,grad_norm: 0.9770261976457688, iteration: 132594
loss: 1.0162183046340942,grad_norm: 0.9999998087577885, iteration: 132595
loss: 0.9873529672622681,grad_norm: 0.812731615727982, iteration: 132596
loss: 1.0173033475875854,grad_norm: 0.8154975753772271, iteration: 132597
loss: 1.0254279375076294,grad_norm: 0.9999991460631545, iteration: 132598
loss: 1.0409525632858276,grad_norm: 0.731085350819501, iteration: 132599
loss: 1.0134958028793335,grad_norm: 0.9999992859987511, iteration: 132600
loss: 0.9761480689048767,grad_norm: 0.9133003352898438, iteration: 132601
loss: 1.0318503379821777,grad_norm: 0.8319702164573594, iteration: 132602
loss: 0.9628600478172302,grad_norm: 0.8958058804591049, iteration: 132603
loss: 1.007707118988037,grad_norm: 0.9999993652087223, iteration: 132604
loss: 1.0866868495941162,grad_norm: 0.970959947141859, iteration: 132605
loss: 1.1447676420211792,grad_norm: 0.9999992216446618, iteration: 132606
loss: 0.9923826456069946,grad_norm: 0.8871456001470183, iteration: 132607
loss: 0.9800360202789307,grad_norm: 0.8418365720643972, iteration: 132608
loss: 1.0420353412628174,grad_norm: 0.9999993559468987, iteration: 132609
loss: 1.0157824754714966,grad_norm: 0.9999991216188516, iteration: 132610
loss: 1.0557941198349,grad_norm: 0.8491046456358108, iteration: 132611
loss: 1.032586932182312,grad_norm: 0.9999991265412909, iteration: 132612
loss: 1.046064019203186,grad_norm: 0.8908263500926321, iteration: 132613
loss: 0.9863818287849426,grad_norm: 0.925034777521956, iteration: 132614
loss: 0.9879558086395264,grad_norm: 0.9999991298781293, iteration: 132615
loss: 1.0439070463180542,grad_norm: 0.9999998018702172, iteration: 132616
loss: 1.0124647617340088,grad_norm: 0.9999996182073507, iteration: 132617
loss: 1.003021478652954,grad_norm: 0.9098646389214454, iteration: 132618
loss: 1.02071213722229,grad_norm: 0.8924885741087036, iteration: 132619
loss: 1.0235267877578735,grad_norm: 0.9999996240350412, iteration: 132620
loss: 0.9651127457618713,grad_norm: 0.7937892739888754, iteration: 132621
loss: 1.0981615781784058,grad_norm: 0.9999991001886475, iteration: 132622
loss: 1.0377334356307983,grad_norm: 0.7928210490656438, iteration: 132623
loss: 1.0088003873825073,grad_norm: 0.9077833285459042, iteration: 132624
loss: 1.0707383155822754,grad_norm: 0.9999991986927391, iteration: 132625
loss: 1.0153571367263794,grad_norm: 0.9999990340111711, iteration: 132626
loss: 0.9836165904998779,grad_norm: 0.9999990875030951, iteration: 132627
loss: 1.002307415008545,grad_norm: 0.9518807031727126, iteration: 132628
loss: 1.0502221584320068,grad_norm: 0.9999998153576639, iteration: 132629
loss: 1.015317440032959,grad_norm: 0.999999012526906, iteration: 132630
loss: 1.0524146556854248,grad_norm: 0.8320440488122102, iteration: 132631
loss: 0.9910289645195007,grad_norm: 0.8285487615338863, iteration: 132632
loss: 1.0375550985336304,grad_norm: 0.9999991617427976, iteration: 132633
loss: 1.0157134532928467,grad_norm: 0.9999991361388775, iteration: 132634
loss: 0.9786837697029114,grad_norm: 0.9999991925677866, iteration: 132635
loss: 1.0191751718521118,grad_norm: 0.9999990922762293, iteration: 132636
loss: 1.0461938381195068,grad_norm: 0.8645481429843688, iteration: 132637
loss: 1.0042407512664795,grad_norm: 0.7551802668958619, iteration: 132638
loss: 1.0141510963439941,grad_norm: 0.9750412622324551, iteration: 132639
loss: 1.033464789390564,grad_norm: 0.9999991603415238, iteration: 132640
loss: 1.0147994756698608,grad_norm: 0.9888882231637544, iteration: 132641
loss: 1.0312923192977905,grad_norm: 0.9999993308083681, iteration: 132642
loss: 0.9797914624214172,grad_norm: 0.8981452015420832, iteration: 132643
loss: 1.0036931037902832,grad_norm: 0.7557647902430654, iteration: 132644
loss: 0.9883286356925964,grad_norm: 0.9999991981452849, iteration: 132645
loss: 1.0245239734649658,grad_norm: 0.999999902877669, iteration: 132646
loss: 0.9825190305709839,grad_norm: 0.8727034166298613, iteration: 132647
loss: 1.0434250831604004,grad_norm: 0.9999999929024388, iteration: 132648
loss: 1.0587680339813232,grad_norm: 0.9999991932328297, iteration: 132649
loss: 1.2535631656646729,grad_norm: 0.9999994893506821, iteration: 132650
loss: 1.0179766416549683,grad_norm: 0.9999993753921244, iteration: 132651
loss: 0.9931321740150452,grad_norm: 0.8714083523527254, iteration: 132652
loss: 1.0629608631134033,grad_norm: 0.9999997858361387, iteration: 132653
loss: 0.995782732963562,grad_norm: 0.9999992785082801, iteration: 132654
loss: 1.0582948923110962,grad_norm: 0.9999992496875293, iteration: 132655
loss: 1.0199650526046753,grad_norm: 0.9999999423766377, iteration: 132656
loss: 1.0824278593063354,grad_norm: 0.9999998936194163, iteration: 132657
loss: 1.0367012023925781,grad_norm: 0.9999997303203942, iteration: 132658
loss: 1.0965694189071655,grad_norm: 0.9999998723034752, iteration: 132659
loss: 1.0764623880386353,grad_norm: 0.9582738707976646, iteration: 132660
loss: 1.1163145303726196,grad_norm: 0.9999996632433771, iteration: 132661
loss: 0.9726207256317139,grad_norm: 0.8385300530270724, iteration: 132662
loss: 1.0817276239395142,grad_norm: 0.9999992043301774, iteration: 132663
loss: 1.0631837844848633,grad_norm: 0.9999992094876502, iteration: 132664
loss: 1.0706762075424194,grad_norm: 0.999999919304208, iteration: 132665
loss: 1.0332943201065063,grad_norm: 0.9999990512642137, iteration: 132666
loss: 1.1807538270950317,grad_norm: 0.9999999307963637, iteration: 132667
loss: 1.0013840198516846,grad_norm: 0.809146467290376, iteration: 132668
loss: 1.0450438261032104,grad_norm: 0.9999997202959122, iteration: 132669
loss: 1.052245020866394,grad_norm: 0.9999998672277979, iteration: 132670
loss: 1.0313982963562012,grad_norm: 0.9750649922279296, iteration: 132671
loss: 1.0183781385421753,grad_norm: 0.8850426980846274, iteration: 132672
loss: 1.0795165300369263,grad_norm: 0.969148123837375, iteration: 132673
loss: 0.971372127532959,grad_norm: 0.8097664645086462, iteration: 132674
loss: 1.0236949920654297,grad_norm: 0.8494405431460579, iteration: 132675
loss: 1.0473244190216064,grad_norm: 0.9044736088487387, iteration: 132676
loss: 0.9870433807373047,grad_norm: 0.8603482565134537, iteration: 132677
loss: 1.045770287513733,grad_norm: 0.9796031767380063, iteration: 132678
loss: 1.0203818082809448,grad_norm: 0.8996652994886208, iteration: 132679
loss: 1.0134466886520386,grad_norm: 0.9999991263789029, iteration: 132680
loss: 0.9997830390930176,grad_norm: 0.9999991090660172, iteration: 132681
loss: 0.9899402260780334,grad_norm: 0.9410760942812073, iteration: 132682
loss: 1.061771273612976,grad_norm: 0.9999997056008816, iteration: 132683
loss: 1.0172946453094482,grad_norm: 0.8696225017417128, iteration: 132684
loss: 1.0787640810012817,grad_norm: 0.8732620443642015, iteration: 132685
loss: 1.013063907623291,grad_norm: 0.9999991012206799, iteration: 132686
loss: 1.0630849599838257,grad_norm: 0.9999991847653468, iteration: 132687
loss: 1.004709005355835,grad_norm: 0.9999992986757864, iteration: 132688
loss: 1.0402607917785645,grad_norm: 0.9999998696235228, iteration: 132689
loss: 1.013454794883728,grad_norm: 0.9556214604536343, iteration: 132690
loss: 1.0429177284240723,grad_norm: 0.9999999676792256, iteration: 132691
loss: 0.9799816012382507,grad_norm: 0.999999278746274, iteration: 132692
loss: 1.0242139101028442,grad_norm: 0.98454545883656, iteration: 132693
loss: 0.9999104142189026,grad_norm: 0.8741037604463272, iteration: 132694
loss: 1.036339282989502,grad_norm: 0.9999991726279065, iteration: 132695
loss: 1.0270721912384033,grad_norm: 0.8556328084080937, iteration: 132696
loss: 0.9869246482849121,grad_norm: 0.9999991123786651, iteration: 132697
loss: 1.0088536739349365,grad_norm: 0.8366389750085109, iteration: 132698
loss: 0.9928485155105591,grad_norm: 0.9999991466299117, iteration: 132699
loss: 1.0864275693893433,grad_norm: 0.9999990548561913, iteration: 132700
loss: 1.1233669519424438,grad_norm: 0.9999992429386887, iteration: 132701
loss: 1.0030345916748047,grad_norm: 0.89391169289929, iteration: 132702
loss: 0.9953183531761169,grad_norm: 0.8895672610885766, iteration: 132703
loss: 1.023316502571106,grad_norm: 0.9999996011909432, iteration: 132704
loss: 1.0057673454284668,grad_norm: 0.9999990896311454, iteration: 132705
loss: 1.2138073444366455,grad_norm: 0.9999994687491032, iteration: 132706
loss: 1.174533486366272,grad_norm: 0.9999997164655372, iteration: 132707
loss: 1.0167596340179443,grad_norm: 0.9999996670258408, iteration: 132708
loss: 1.1614000797271729,grad_norm: 0.9999999972363169, iteration: 132709
loss: 1.0141215324401855,grad_norm: 0.9121351135073695, iteration: 132710
loss: 0.9810785055160522,grad_norm: 0.9660332470841175, iteration: 132711
loss: 0.9996634721755981,grad_norm: 0.8700572111096707, iteration: 132712
loss: 1.0433531999588013,grad_norm: 0.9999998428720694, iteration: 132713
loss: 1.0609678030014038,grad_norm: 0.8640578914196461, iteration: 132714
loss: 0.9941763281822205,grad_norm: 0.8457028238521221, iteration: 132715
loss: 0.9950426816940308,grad_norm: 0.9999990223451054, iteration: 132716
loss: 1.0705088376998901,grad_norm: 0.8699043291227382, iteration: 132717
loss: 1.0809903144836426,grad_norm: 0.9999992770872587, iteration: 132718
loss: 0.9636092185974121,grad_norm: 0.9290527096069784, iteration: 132719
loss: 1.0060839653015137,grad_norm: 0.9368574906266266, iteration: 132720
loss: 1.0066663026809692,grad_norm: 0.9402015200461491, iteration: 132721
loss: 1.10298490524292,grad_norm: 0.999999256889308, iteration: 132722
loss: 1.017471194267273,grad_norm: 0.9999991340522648, iteration: 132723
loss: 1.0803433656692505,grad_norm: 0.9999994513085173, iteration: 132724
loss: 1.0029722452163696,grad_norm: 0.9743167699216485, iteration: 132725
loss: 0.9786109328269958,grad_norm: 0.8299978701446382, iteration: 132726
loss: 1.0601110458374023,grad_norm: 0.99999941615989, iteration: 132727
loss: 1.0056698322296143,grad_norm: 0.7646121329424185, iteration: 132728
loss: 1.0432347059249878,grad_norm: 0.9488310634018072, iteration: 132729
loss: 1.0783672332763672,grad_norm: 0.9999991870109589, iteration: 132730
loss: 1.0302480459213257,grad_norm: 0.9999994278490664, iteration: 132731
loss: 0.9588145017623901,grad_norm: 0.9435272280823935, iteration: 132732
loss: 0.9530618190765381,grad_norm: 0.7050916619294985, iteration: 132733
loss: 1.1463316679000854,grad_norm: 1.0000000091220398, iteration: 132734
loss: 1.3477619886398315,grad_norm: 1.0000000083196465, iteration: 132735
loss: 1.0075294971466064,grad_norm: 0.9999997540160429, iteration: 132736
loss: 1.1082884073257446,grad_norm: 0.9999999763370778, iteration: 132737
loss: 1.1426092386245728,grad_norm: 0.9999997107398413, iteration: 132738
loss: 1.0121557712554932,grad_norm: 0.9999998622644061, iteration: 132739
loss: 1.01265287399292,grad_norm: 0.9261221139111736, iteration: 132740
loss: 1.0813581943511963,grad_norm: 0.9999998848473349, iteration: 132741
loss: 0.9970069527626038,grad_norm: 0.8243290135730985, iteration: 132742
loss: 1.046372652053833,grad_norm: 0.9999990611184939, iteration: 132743
loss: 1.0836703777313232,grad_norm: 0.9999991597425764, iteration: 132744
loss: 1.016950249671936,grad_norm: 0.8399714883942653, iteration: 132745
loss: 1.3550843000411987,grad_norm: 1.0000000276765708, iteration: 132746
loss: 1.0192368030548096,grad_norm: 0.9871376679734825, iteration: 132747
loss: 0.9948532581329346,grad_norm: 0.9999991667171352, iteration: 132748
loss: 0.959956705570221,grad_norm: 0.8845251637566939, iteration: 132749
loss: 0.9771973490715027,grad_norm: 0.9999991396803909, iteration: 132750
loss: 1.0248967409133911,grad_norm: 0.9999990512635085, iteration: 132751
loss: 0.9791631102561951,grad_norm: 0.8229556346298005, iteration: 132752
loss: 1.1016205549240112,grad_norm: 0.999999279697876, iteration: 132753
loss: 1.0966824293136597,grad_norm: 0.9999991262862662, iteration: 132754
loss: 0.988297164440155,grad_norm: 0.9999991871072378, iteration: 132755
loss: 1.2366811037063599,grad_norm: 0.9999997404568086, iteration: 132756
loss: 1.0223352909088135,grad_norm: 0.9202418247287991, iteration: 132757
loss: 0.992918074131012,grad_norm: 0.96157172465215, iteration: 132758
loss: 1.1076499223709106,grad_norm: 0.999999220711444, iteration: 132759
loss: 1.0092302560806274,grad_norm: 0.8783017939411639, iteration: 132760
loss: 0.9617910385131836,grad_norm: 0.8510474058781115, iteration: 132761
loss: 1.022917628288269,grad_norm: 0.8441078414949952, iteration: 132762
loss: 1.064293384552002,grad_norm: 0.9999994257694355, iteration: 132763
loss: 0.996365487575531,grad_norm: 0.9999997806628527, iteration: 132764
loss: 0.9816148281097412,grad_norm: 0.9999991271450257, iteration: 132765
loss: 1.0104014873504639,grad_norm: 0.9184746546376948, iteration: 132766
loss: 1.0473003387451172,grad_norm: 0.9999998350508434, iteration: 132767
loss: 1.0674278736114502,grad_norm: 0.9704041237744134, iteration: 132768
loss: 1.03670334815979,grad_norm: 0.991585613807423, iteration: 132769
loss: 1.0109260082244873,grad_norm: 0.9739733595542648, iteration: 132770
loss: 1.0012084245681763,grad_norm: 0.9999994561180843, iteration: 132771
loss: 0.9610249400138855,grad_norm: 0.8505546076336041, iteration: 132772
loss: 1.0631849765777588,grad_norm: 0.9999997515462546, iteration: 132773
loss: 0.9999336004257202,grad_norm: 0.9999993172815056, iteration: 132774
loss: 1.0628776550292969,grad_norm: 0.9999995110708912, iteration: 132775
loss: 1.0950244665145874,grad_norm: 0.9999998154121321, iteration: 132776
loss: 1.0084751844406128,grad_norm: 0.8569241306844955, iteration: 132777
loss: 1.141942024230957,grad_norm: 0.9999999463619642, iteration: 132778
loss: 1.0952197313308716,grad_norm: 0.9999996292089258, iteration: 132779
loss: 1.122740387916565,grad_norm: 0.9999995143702319, iteration: 132780
loss: 0.9677491188049316,grad_norm: 0.8375669894633669, iteration: 132781
loss: 1.0125017166137695,grad_norm: 0.8665057333384388, iteration: 132782
loss: 1.1229119300842285,grad_norm: 0.9999999941415351, iteration: 132783
loss: 0.9792203307151794,grad_norm: 0.8310512428616315, iteration: 132784
loss: 1.0087320804595947,grad_norm: 0.893487817632264, iteration: 132785
loss: 0.9995338320732117,grad_norm: 0.8676044841208519, iteration: 132786
loss: 1.0753474235534668,grad_norm: 0.9999995981648307, iteration: 132787
loss: 1.0264562368392944,grad_norm: 0.9999993052806656, iteration: 132788
loss: 1.0315688848495483,grad_norm: 0.9999995150294883, iteration: 132789
loss: 1.0151549577713013,grad_norm: 0.908904264031074, iteration: 132790
loss: 1.1916518211364746,grad_norm: 0.9999998054101281, iteration: 132791
loss: 0.9636003375053406,grad_norm: 0.8262374417414381, iteration: 132792
loss: 1.0637736320495605,grad_norm: 0.9999998924117359, iteration: 132793
loss: 1.072874903678894,grad_norm: 0.9999999417881392, iteration: 132794
loss: 1.0972620248794556,grad_norm: 0.9999994388469698, iteration: 132795
loss: 1.0686419010162354,grad_norm: 0.9999994255407046, iteration: 132796
loss: 1.0769474506378174,grad_norm: 0.9999997980002389, iteration: 132797
loss: 1.007135033607483,grad_norm: 0.9999993207346376, iteration: 132798
loss: 0.9697321057319641,grad_norm: 0.9999993561246858, iteration: 132799
loss: 1.0826289653778076,grad_norm: 0.9999994896677087, iteration: 132800
loss: 1.0390853881835938,grad_norm: 0.9999990357428892, iteration: 132801
loss: 1.165939450263977,grad_norm: 0.9999998087248311, iteration: 132802
loss: 0.9848316311836243,grad_norm: 0.8761680039936718, iteration: 132803
loss: 1.036206603050232,grad_norm: 0.9127502106634394, iteration: 132804
loss: 1.020480990409851,grad_norm: 0.8798473871848752, iteration: 132805
loss: 1.042155385017395,grad_norm: 0.9999997424083648, iteration: 132806
loss: 1.0730831623077393,grad_norm: 0.999999456813549, iteration: 132807
loss: 1.0377134084701538,grad_norm: 0.9999994508754599, iteration: 132808
loss: 1.041152834892273,grad_norm: 0.9999999568074963, iteration: 132809
loss: 1.006935954093933,grad_norm: 0.9999991052913889, iteration: 132810
loss: 1.013971209526062,grad_norm: 0.9999990562461669, iteration: 132811
loss: 0.9886876940727234,grad_norm: 0.9999990699964109, iteration: 132812
loss: 0.9953707456588745,grad_norm: 0.9999991842988757, iteration: 132813
loss: 0.9745094180107117,grad_norm: 0.9999991595077888, iteration: 132814
loss: 0.9565827250480652,grad_norm: 0.9166522789985606, iteration: 132815
loss: 1.0198177099227905,grad_norm: 0.8102210895994346, iteration: 132816
loss: 1.0274847745895386,grad_norm: 0.9999998892837691, iteration: 132817
loss: 1.017927885055542,grad_norm: 0.9999994390674751, iteration: 132818
loss: 1.1529972553253174,grad_norm: 0.9999999005642507, iteration: 132819
loss: 1.0009546279907227,grad_norm: 0.9999993523443244, iteration: 132820
loss: 1.0378979444503784,grad_norm: 0.9999991824249737, iteration: 132821
loss: 1.0226527452468872,grad_norm: 0.9999992202511696, iteration: 132822
loss: 1.047905445098877,grad_norm: 0.9999992064981533, iteration: 132823
loss: 1.024979591369629,grad_norm: 0.8322165153571076, iteration: 132824
loss: 1.0394258499145508,grad_norm: 0.999999066811031, iteration: 132825
loss: 0.9981371760368347,grad_norm: 0.7786398627093779, iteration: 132826
loss: 0.9740931987762451,grad_norm: 0.999999142721632, iteration: 132827
loss: 0.987825334072113,grad_norm: 0.9999994913503982, iteration: 132828
loss: 0.9918714165687561,grad_norm: 0.8484767384093025, iteration: 132829
loss: 1.1164405345916748,grad_norm: 1.0000000593129688, iteration: 132830
loss: 0.9994481801986694,grad_norm: 0.9999996624163268, iteration: 132831
loss: 0.9550829529762268,grad_norm: 0.8342383216037014, iteration: 132832
loss: 1.0745583772659302,grad_norm: 0.9999996032406204, iteration: 132833
loss: 0.9783162474632263,grad_norm: 0.9999993468600168, iteration: 132834
loss: 1.0087494850158691,grad_norm: 0.9999995350955967, iteration: 132835
loss: 1.1303935050964355,grad_norm: 0.9999998112791368, iteration: 132836
loss: 1.015084147453308,grad_norm: 0.8233194551665189, iteration: 132837
loss: 1.114983081817627,grad_norm: 1.00000002048759, iteration: 132838
loss: 1.049553394317627,grad_norm: 0.9999990455113194, iteration: 132839
loss: 1.0489617586135864,grad_norm: 0.999999624647117, iteration: 132840
loss: 1.0762182474136353,grad_norm: 0.9999993915704729, iteration: 132841
loss: 1.1034703254699707,grad_norm: 0.9999998639568132, iteration: 132842
loss: 1.296802282333374,grad_norm: 0.9999999030329503, iteration: 132843
loss: 1.0235081911087036,grad_norm: 0.9999996938441198, iteration: 132844
loss: 1.0814043283462524,grad_norm: 0.9999991111645512, iteration: 132845
loss: 1.0033239126205444,grad_norm: 0.9999994709675115, iteration: 132846
loss: 1.0658987760543823,grad_norm: 0.9999996894227906, iteration: 132847
loss: 0.9757329225540161,grad_norm: 0.8936657474173815, iteration: 132848
loss: 1.0020359754562378,grad_norm: 0.9999992083769883, iteration: 132849
loss: 1.0494903326034546,grad_norm: 0.7676752543372588, iteration: 132850
loss: 1.0008163452148438,grad_norm: 0.8971160843808677, iteration: 132851
loss: 0.9927502274513245,grad_norm: 0.6777570982385925, iteration: 132852
loss: 1.0372263193130493,grad_norm: 0.9999997515095881, iteration: 132853
loss: 1.030486822128296,grad_norm: 0.9999996610463743, iteration: 132854
loss: 1.0241714715957642,grad_norm: 0.9999990686229223, iteration: 132855
loss: 1.0576833486557007,grad_norm: 0.9999992385706536, iteration: 132856
loss: 1.0116796493530273,grad_norm: 0.941466301808365, iteration: 132857
loss: 0.9727309346199036,grad_norm: 0.7367776719615144, iteration: 132858
loss: 1.1065704822540283,grad_norm: 0.9999992915190783, iteration: 132859
loss: 0.9875805974006653,grad_norm: 0.9581025763598637, iteration: 132860
loss: 1.1013702154159546,grad_norm: 0.9999997023301687, iteration: 132861
loss: 1.0165153741836548,grad_norm: 0.9999991606789381, iteration: 132862
loss: 1.0217349529266357,grad_norm: 0.9999999025429738, iteration: 132863
loss: 1.0410728454589844,grad_norm: 0.9999990895847614, iteration: 132864
loss: 1.036173939704895,grad_norm: 0.9263879467073791, iteration: 132865
loss: 1.1483154296875,grad_norm: 0.9999998444502025, iteration: 132866
loss: 0.9984503388404846,grad_norm: 0.8922120001650489, iteration: 132867
loss: 1.0284883975982666,grad_norm: 0.8880949594108676, iteration: 132868
loss: 1.0409489870071411,grad_norm: 0.9999991114057528, iteration: 132869
loss: 1.022847056388855,grad_norm: 0.9999992237166216, iteration: 132870
loss: 1.020938754081726,grad_norm: 0.9667302766369917, iteration: 132871
loss: 1.087006688117981,grad_norm: 0.9999991378430315, iteration: 132872
loss: 1.0269780158996582,grad_norm: 0.9355617168578707, iteration: 132873
loss: 1.0322461128234863,grad_norm: 0.813535714054318, iteration: 132874
loss: 1.0183639526367188,grad_norm: 0.8974868359248795, iteration: 132875
loss: 1.0275919437408447,grad_norm: 0.9999999425692817, iteration: 132876
loss: 0.9953996539115906,grad_norm: 0.904569905179568, iteration: 132877
loss: 0.9711989760398865,grad_norm: 0.9542383439127814, iteration: 132878
loss: 0.9733154773712158,grad_norm: 0.8056883795611508, iteration: 132879
loss: 1.0101760625839233,grad_norm: 0.9999990439149143, iteration: 132880
loss: 0.9939690828323364,grad_norm: 0.9999993335976464, iteration: 132881
loss: 1.0076426267623901,grad_norm: 0.8490378039000579, iteration: 132882
loss: 0.9950634837150574,grad_norm: 0.8118159429673903, iteration: 132883
loss: 1.0373668670654297,grad_norm: 0.9999992123609958, iteration: 132884
loss: 0.9909716844558716,grad_norm: 0.8924014796389997, iteration: 132885
loss: 1.1389868259429932,grad_norm: 0.9999993531028766, iteration: 132886
loss: 1.0489556789398193,grad_norm: 0.9999991357869604, iteration: 132887
loss: 1.0314476490020752,grad_norm: 0.9999991762570072, iteration: 132888
loss: 0.9871372580528259,grad_norm: 0.9407848467928214, iteration: 132889
loss: 1.018568515777588,grad_norm: 0.999999804375115, iteration: 132890
loss: 1.0160515308380127,grad_norm: 0.9999994978674849, iteration: 132891
loss: 1.0625219345092773,grad_norm: 0.9999999139702431, iteration: 132892
loss: 1.007142186164856,grad_norm: 0.7751853475763081, iteration: 132893
loss: 1.1082050800323486,grad_norm: 0.9999999012470118, iteration: 132894
loss: 0.9816222786903381,grad_norm: 0.8242792380863235, iteration: 132895
loss: 1.0069724321365356,grad_norm: 0.8726083168448191, iteration: 132896
loss: 1.049357533454895,grad_norm: 0.9999999413538505, iteration: 132897
loss: 1.0588974952697754,grad_norm: 0.9660730142292673, iteration: 132898
loss: 0.9801740050315857,grad_norm: 0.999999068072099, iteration: 132899
loss: 1.0847256183624268,grad_norm: 0.9999993253983324, iteration: 132900
loss: 1.0495657920837402,grad_norm: 0.9999992126451435, iteration: 132901
loss: 1.0240671634674072,grad_norm: 0.8893557141043921, iteration: 132902
loss: 1.0306569337844849,grad_norm: 0.9999998584112952, iteration: 132903
loss: 1.010238528251648,grad_norm: 0.999999727277278, iteration: 132904
loss: 0.9812785387039185,grad_norm: 0.7934354767622662, iteration: 132905
loss: 0.9985697865486145,grad_norm: 0.9999990107012523, iteration: 132906
loss: 1.0040096044540405,grad_norm: 0.9999992787003853, iteration: 132907
loss: 0.9831608533859253,grad_norm: 0.8773505229873588, iteration: 132908
loss: 1.049085259437561,grad_norm: 0.9999992676586299, iteration: 132909
loss: 1.0833793878555298,grad_norm: 0.9238137302132994, iteration: 132910
loss: 1.1468126773834229,grad_norm: 0.9999997726342624, iteration: 132911
loss: 1.0053969621658325,grad_norm: 0.8532103214356598, iteration: 132912
loss: 1.0144585371017456,grad_norm: 0.8287945381794362, iteration: 132913
loss: 1.0280064344406128,grad_norm: 0.9999998204980386, iteration: 132914
loss: 1.0615806579589844,grad_norm: 0.9999991755912645, iteration: 132915
loss: 1.016305923461914,grad_norm: 0.8572983434056313, iteration: 132916
loss: 1.0742486715316772,grad_norm: 0.9999992938680367, iteration: 132917
loss: 0.991395115852356,grad_norm: 0.8305396357586812, iteration: 132918
loss: 1.0493992567062378,grad_norm: 0.8533894988444001, iteration: 132919
loss: 1.0721824169158936,grad_norm: 0.9999997118327695, iteration: 132920
loss: 1.0385165214538574,grad_norm: 0.9999999610593795, iteration: 132921
loss: 1.111728310585022,grad_norm: 0.9999997479650715, iteration: 132922
loss: 1.0850143432617188,grad_norm: 0.9999995002444015, iteration: 132923
loss: 1.1231307983398438,grad_norm: 0.9999993430012603, iteration: 132924
loss: 1.0631390810012817,grad_norm: 0.9999996740357765, iteration: 132925
loss: 1.0076032876968384,grad_norm: 0.9999993377698272, iteration: 132926
loss: 1.2358072996139526,grad_norm: 0.9999994844898493, iteration: 132927
loss: 1.048887014389038,grad_norm: 0.9999999961078525, iteration: 132928
loss: 0.9664092063903809,grad_norm: 0.9377122330972014, iteration: 132929
loss: 1.0415605306625366,grad_norm: 0.9999995041636321, iteration: 132930
loss: 1.0174717903137207,grad_norm: 0.9999996710070737, iteration: 132931
loss: 1.0670098066329956,grad_norm: 0.7610485736504621, iteration: 132932
loss: 1.0215052366256714,grad_norm: 0.95891341616487, iteration: 132933
loss: 1.1431740522384644,grad_norm: 0.9999995819074509, iteration: 132934
loss: 1.0298688411712646,grad_norm: 0.8180995109576417, iteration: 132935
loss: 0.9872894287109375,grad_norm: 0.9819930012733197, iteration: 132936
loss: 1.0057342052459717,grad_norm: 0.9999989750777266, iteration: 132937
loss: 1.0409835577011108,grad_norm: 0.9999992081600777, iteration: 132938
loss: 0.9809885025024414,grad_norm: 0.8895921883187273, iteration: 132939
loss: 0.9997157454490662,grad_norm: 0.9999999571367909, iteration: 132940
loss: 1.0190480947494507,grad_norm: 0.8339419301679568, iteration: 132941
loss: 1.0292176008224487,grad_norm: 0.7511008996796792, iteration: 132942
loss: 1.0086671113967896,grad_norm: 0.8943221796866148, iteration: 132943
loss: 0.9936997294425964,grad_norm: 0.9074253096495392, iteration: 132944
loss: 0.9845800995826721,grad_norm: 0.9080260643733169, iteration: 132945
loss: 0.9981465339660645,grad_norm: 0.8895554887101614, iteration: 132946
loss: 1.0032597780227661,grad_norm: 0.9999991997639662, iteration: 132947
loss: 1.008548378944397,grad_norm: 0.7324754360565009, iteration: 132948
loss: 1.0499136447906494,grad_norm: 0.9999999786457745, iteration: 132949
loss: 1.0154625177383423,grad_norm: 0.9999990574312119, iteration: 132950
loss: 0.9842451214790344,grad_norm: 0.7715670555341239, iteration: 132951
loss: 1.0568277835845947,grad_norm: 0.99999972005698, iteration: 132952
loss: 0.9839587807655334,grad_norm: 0.881647186901181, iteration: 132953
loss: 1.0065349340438843,grad_norm: 0.9999993287687883, iteration: 132954
loss: 0.9907675385475159,grad_norm: 0.9999994574745931, iteration: 132955
loss: 0.9886181354522705,grad_norm: 0.9999990169246835, iteration: 132956
loss: 0.9831154346466064,grad_norm: 0.9999994830335692, iteration: 132957
loss: 1.0140385627746582,grad_norm: 0.9999989420262511, iteration: 132958
loss: 1.0651288032531738,grad_norm: 0.9999999304618293, iteration: 132959
loss: 1.0304315090179443,grad_norm: 0.9999999080476133, iteration: 132960
loss: 0.9853679537773132,grad_norm: 0.9999992811701425, iteration: 132961
loss: 1.025813102722168,grad_norm: 0.9768776706425154, iteration: 132962
loss: 1.034035563468933,grad_norm: 0.9999995339652367, iteration: 132963
loss: 0.9487193822860718,grad_norm: 0.9999992609680104, iteration: 132964
loss: 1.1392481327056885,grad_norm: 0.9999999646976712, iteration: 132965
loss: 0.9654847383499146,grad_norm: 0.9358576487511243, iteration: 132966
loss: 1.0814570188522339,grad_norm: 0.9999994003268885, iteration: 132967
loss: 1.0250489711761475,grad_norm: 0.9311367492501873, iteration: 132968
loss: 0.997866690158844,grad_norm: 0.9999996000677872, iteration: 132969
loss: 0.9571373462677002,grad_norm: 0.8248296819652373, iteration: 132970
loss: 1.0416189432144165,grad_norm: 0.9999994994413659, iteration: 132971
loss: 0.9878250360488892,grad_norm: 0.999999639240842, iteration: 132972
loss: 1.0150706768035889,grad_norm: 0.9999996016906708, iteration: 132973
loss: 1.046401023864746,grad_norm: 0.9999995115298568, iteration: 132974
loss: 1.0636518001556396,grad_norm: 0.9999993970681811, iteration: 132975
loss: 0.9726846218109131,grad_norm: 0.9632038387316162, iteration: 132976
loss: 0.9950698614120483,grad_norm: 0.8585254106690552, iteration: 132977
loss: 1.0289719104766846,grad_norm: 0.8868037979700367, iteration: 132978
loss: 0.9712979793548584,grad_norm: 0.9422771140977996, iteration: 132979
loss: 1.003714919090271,grad_norm: 0.7422048559574522, iteration: 132980
loss: 1.0077952146530151,grad_norm: 0.7950931352085329, iteration: 132981
loss: 0.9855166077613831,grad_norm: 0.9570780715779279, iteration: 132982
loss: 1.0054701566696167,grad_norm: 0.7568703005638523, iteration: 132983
loss: 1.0197986364364624,grad_norm: 0.9434862801321624, iteration: 132984
loss: 0.9666360020637512,grad_norm: 0.8696368407773774, iteration: 132985
loss: 1.0649480819702148,grad_norm: 0.9999995838532877, iteration: 132986
loss: 0.9563474059104919,grad_norm: 0.8408674330862257, iteration: 132987
loss: 0.9766603112220764,grad_norm: 0.8087044858414681, iteration: 132988
loss: 1.0591143369674683,grad_norm: 0.9999992585716154, iteration: 132989
loss: 0.9730687141418457,grad_norm: 0.9147115506256203, iteration: 132990
loss: 0.982688844203949,grad_norm: 0.8712498817800695, iteration: 132991
loss: 1.0388857126235962,grad_norm: 0.9306487328611229, iteration: 132992
loss: 1.0855270624160767,grad_norm: 0.9999999066080365, iteration: 132993
loss: 0.9624868035316467,grad_norm: 0.849287564609203, iteration: 132994
loss: 1.0077571868896484,grad_norm: 0.818135551631259, iteration: 132995
loss: 1.143304705619812,grad_norm: 0.9999997416897173, iteration: 132996
loss: 1.0085731744766235,grad_norm: 0.9999994778876488, iteration: 132997
loss: 1.0216583013534546,grad_norm: 0.9217230495630663, iteration: 132998
loss: 0.9723876714706421,grad_norm: 0.9257972558693085, iteration: 132999
loss: 1.0078212022781372,grad_norm: 0.726197400140371, iteration: 133000
loss: 1.001596450805664,grad_norm: 0.9492736954278055, iteration: 133001
loss: 0.9968850016593933,grad_norm: 0.8970394339600755, iteration: 133002
loss: 0.9651986360549927,grad_norm: 0.999999209559034, iteration: 133003
loss: 0.9919487237930298,grad_norm: 0.9999991206756289, iteration: 133004
loss: 1.0309836864471436,grad_norm: 0.8593565733965914, iteration: 133005
loss: 0.9892571568489075,grad_norm: 0.9999992557205568, iteration: 133006
loss: 1.0521084070205688,grad_norm: 0.9999993443878059, iteration: 133007
loss: 1.020126461982727,grad_norm: 0.9999989256844916, iteration: 133008
loss: 1.0318794250488281,grad_norm: 0.9167421177707674, iteration: 133009
loss: 1.0223824977874756,grad_norm: 0.9999998534441169, iteration: 133010
loss: 1.0434824228286743,grad_norm: 0.9999996901203966, iteration: 133011
loss: 1.063585877418518,grad_norm: 0.9999990299698416, iteration: 133012
loss: 1.0939550399780273,grad_norm: 0.9999997925580036, iteration: 133013
loss: 1.066917896270752,grad_norm: 0.999999255414912, iteration: 133014
loss: 1.017654538154602,grad_norm: 0.8470877545745932, iteration: 133015
loss: 1.0226906538009644,grad_norm: 0.9999996307637027, iteration: 133016
loss: 0.9630993604660034,grad_norm: 0.944058693470499, iteration: 133017
loss: 1.0341473817825317,grad_norm: 0.9999998452096891, iteration: 133018
loss: 1.0111840963363647,grad_norm: 0.9601687945541242, iteration: 133019
loss: 0.9893341660499573,grad_norm: 0.9223945484256088, iteration: 133020
loss: 1.0157278776168823,grad_norm: 0.9999998708597296, iteration: 133021
loss: 0.9996004104614258,grad_norm: 0.9358262885118263, iteration: 133022
loss: 1.0259301662445068,grad_norm: 0.9199532537375535, iteration: 133023
loss: 1.0107386112213135,grad_norm: 0.9999989983924515, iteration: 133024
loss: 0.9916917681694031,grad_norm: 0.8937296049210347, iteration: 133025
loss: 0.9936609268188477,grad_norm: 0.9999992131590455, iteration: 133026
loss: 1.0036295652389526,grad_norm: 0.9999991756319055, iteration: 133027
loss: 0.9970899820327759,grad_norm: 0.8297433733771161, iteration: 133028
loss: 1.2108595371246338,grad_norm: 0.9999997351827714, iteration: 133029
loss: 0.9998608231544495,grad_norm: 0.9526064709204609, iteration: 133030
loss: 1.0668948888778687,grad_norm: 0.9999996178088426, iteration: 133031
loss: 1.019666075706482,grad_norm: 0.9999990857977968, iteration: 133032
loss: 0.9536731839179993,grad_norm: 0.7456019094878369, iteration: 133033
loss: 1.0338091850280762,grad_norm: 0.8526754117520229, iteration: 133034
loss: 1.0834547281265259,grad_norm: 0.9999994770514755, iteration: 133035
loss: 1.0417633056640625,grad_norm: 0.9400597788763198, iteration: 133036
loss: 1.0260779857635498,grad_norm: 0.9751778417876419, iteration: 133037
loss: 1.0255054235458374,grad_norm: 0.999999200477375, iteration: 133038
loss: 0.9341796636581421,grad_norm: 0.8796017353792748, iteration: 133039
loss: 0.9859271049499512,grad_norm: 0.8536593731566208, iteration: 133040
loss: 1.0151993036270142,grad_norm: 0.8961918373235824, iteration: 133041
loss: 1.0024346113204956,grad_norm: 0.8723399174234746, iteration: 133042
loss: 0.9813330173492432,grad_norm: 0.9182588098614499, iteration: 133043
loss: 0.9990114569664001,grad_norm: 0.9418377913319108, iteration: 133044
loss: 1.0088179111480713,grad_norm: 0.9999993836612391, iteration: 133045
loss: 1.0185201168060303,grad_norm: 0.9462106652557277, iteration: 133046
loss: 1.222062110900879,grad_norm: 0.9999996854646721, iteration: 133047
loss: 0.9746130108833313,grad_norm: 0.9705412287081626, iteration: 133048
loss: 0.9881248474121094,grad_norm: 0.9017884050718322, iteration: 133049
loss: 0.9762002825737,grad_norm: 0.9999997671624958, iteration: 133050
loss: 0.9944424033164978,grad_norm: 0.6964835333960115, iteration: 133051
loss: 1.0221916437149048,grad_norm: 0.9877841942162089, iteration: 133052
loss: 0.972629189491272,grad_norm: 0.9999990027020854, iteration: 133053
loss: 1.1749533414840698,grad_norm: 0.9999996713075451, iteration: 133054
loss: 1.011048436164856,grad_norm: 0.8526374928110136, iteration: 133055
loss: 0.9895089864730835,grad_norm: 0.8994264015497694, iteration: 133056
loss: 1.028139591217041,grad_norm: 0.9999993411894248, iteration: 133057
loss: 0.959378182888031,grad_norm: 0.9604159127901194, iteration: 133058
loss: 1.0015357732772827,grad_norm: 0.8372948293553534, iteration: 133059
loss: 1.0199154615402222,grad_norm: 0.999999119652816, iteration: 133060
loss: 1.1126435995101929,grad_norm: 0.9999994713743363, iteration: 133061
loss: 1.0208412408828735,grad_norm: 0.99970996140978, iteration: 133062
loss: 1.0424882173538208,grad_norm: 0.9999992365069641, iteration: 133063
loss: 1.0308096408843994,grad_norm: 0.9999991209523061, iteration: 133064
loss: 1.0254948139190674,grad_norm: 0.9999991380305727, iteration: 133065
loss: 1.0816301107406616,grad_norm: 0.9999996143955486, iteration: 133066
loss: 0.9636856913566589,grad_norm: 0.7421654396374022, iteration: 133067
loss: 0.9886758923530579,grad_norm: 0.8826273450149535, iteration: 133068
loss: 1.0336021184921265,grad_norm: 0.9999990639083055, iteration: 133069
loss: 0.9852792620658875,grad_norm: 0.8159519848144787, iteration: 133070
loss: 1.1280012130737305,grad_norm: 0.9999998961483608, iteration: 133071
loss: 1.0565072298049927,grad_norm: 0.9999994776963117, iteration: 133072
loss: 0.9853249192237854,grad_norm: 0.8371603727124707, iteration: 133073
loss: 1.021012306213379,grad_norm: 0.970398060199261, iteration: 133074
loss: 1.002732753753662,grad_norm: 0.9999993716477211, iteration: 133075
loss: 0.9986129403114319,grad_norm: 0.8665131454864641, iteration: 133076
loss: 1.0331592559814453,grad_norm: 0.9999989722952021, iteration: 133077
loss: 0.9612851142883301,grad_norm: 0.9999992478020029, iteration: 133078
loss: 1.0063139200210571,grad_norm: 0.7210454718349456, iteration: 133079
loss: 0.9803241491317749,grad_norm: 0.9219311025876411, iteration: 133080
loss: 0.9829826951026917,grad_norm: 0.9999991958619577, iteration: 133081
loss: 1.084357738494873,grad_norm: 0.9999998337345556, iteration: 133082
loss: 0.9705908298492432,grad_norm: 0.859151082484446, iteration: 133083
loss: 1.2115845680236816,grad_norm: 0.9999993946663265, iteration: 133084
loss: 1.0389024019241333,grad_norm: 0.8923098212298983, iteration: 133085
loss: 1.0134013891220093,grad_norm: 0.7489043574015029, iteration: 133086
loss: 1.0237354040145874,grad_norm: 0.7903110399315457, iteration: 133087
loss: 1.0224376916885376,grad_norm: 0.9999992520405743, iteration: 133088
loss: 1.0391579866409302,grad_norm: 0.9999989778032946, iteration: 133089
loss: 1.0372811555862427,grad_norm: 0.9999991079059783, iteration: 133090
loss: 1.224164366722107,grad_norm: 0.9999991708236581, iteration: 133091
loss: 1.0225311517715454,grad_norm: 0.7758186748432394, iteration: 133092
loss: 1.0519436597824097,grad_norm: 0.9999996286702627, iteration: 133093
loss: 1.0051707029342651,grad_norm: 0.9999994178417031, iteration: 133094
loss: 1.111068844795227,grad_norm: 0.9999990838606352, iteration: 133095
loss: 0.9822887182235718,grad_norm: 0.8417336138360749, iteration: 133096
loss: 1.0099437236785889,grad_norm: 0.9757965030238479, iteration: 133097
loss: 1.0486242771148682,grad_norm: 0.9999989869617727, iteration: 133098
loss: 1.0117919445037842,grad_norm: 0.9999998444080898, iteration: 133099
loss: 1.018615961074829,grad_norm: 0.8519714022678507, iteration: 133100
loss: 1.0319135189056396,grad_norm: 0.9999991566528149, iteration: 133101
loss: 1.0215249061584473,grad_norm: 0.9999994718475884, iteration: 133102
loss: 1.008689045906067,grad_norm: 0.9999998203382224, iteration: 133103
loss: 0.981974720954895,grad_norm: 0.9999993553589426, iteration: 133104
loss: 0.969946026802063,grad_norm: 0.7243835543920195, iteration: 133105
loss: 1.016724944114685,grad_norm: 0.9999992440916166, iteration: 133106
loss: 1.1597037315368652,grad_norm: 0.999999681656598, iteration: 133107
loss: 1.001079797744751,grad_norm: 0.9729344868445663, iteration: 133108
loss: 1.0420652627944946,grad_norm: 0.9999990894636251, iteration: 133109
loss: 1.134901762008667,grad_norm: 0.9999998517625858, iteration: 133110
loss: 1.098046064376831,grad_norm: 0.9999999145041973, iteration: 133111
loss: 1.0126533508300781,grad_norm: 0.8174021039426569, iteration: 133112
loss: 1.112357497215271,grad_norm: 0.9999991710826556, iteration: 133113
loss: 1.2032203674316406,grad_norm: 0.9999991484905869, iteration: 133114
loss: 1.0456314086914062,grad_norm: 0.9539465473055564, iteration: 133115
loss: 1.157235026359558,grad_norm: 0.9999992765387576, iteration: 133116
loss: 1.065958023071289,grad_norm: 0.9999991067726997, iteration: 133117
loss: 0.9932088255882263,grad_norm: 0.888740030188445, iteration: 133118
loss: 1.0034995079040527,grad_norm: 0.8011493989918889, iteration: 133119
loss: 1.0301878452301025,grad_norm: 0.951172263401553, iteration: 133120
loss: 1.0039222240447998,grad_norm: 0.7928297293455111, iteration: 133121
loss: 1.138609528541565,grad_norm: 0.9999993230758483, iteration: 133122
loss: 1.0264445543289185,grad_norm: 0.7300396129030466, iteration: 133123
loss: 1.0134328603744507,grad_norm: 0.764977504357889, iteration: 133124
loss: 1.0973458290100098,grad_norm: 0.9686238497356041, iteration: 133125
loss: 1.0009348392486572,grad_norm: 0.8272065698388328, iteration: 133126
loss: 1.0273864269256592,grad_norm: 0.9428075524985808, iteration: 133127
loss: 0.986865758895874,grad_norm: 0.9999990824647326, iteration: 133128
loss: 0.9868316650390625,grad_norm: 0.9368320115622987, iteration: 133129
loss: 1.0236380100250244,grad_norm: 0.8774391172409021, iteration: 133130
loss: 0.9956232309341431,grad_norm: 0.9449357021860266, iteration: 133131
loss: 1.0220695734024048,grad_norm: 0.8633573991169881, iteration: 133132
loss: 1.065524935722351,grad_norm: 0.9999993231310702, iteration: 133133
loss: 1.009455680847168,grad_norm: 0.9999992080175056, iteration: 133134
loss: 1.0647118091583252,grad_norm: 0.9999992842615013, iteration: 133135
loss: 1.0191688537597656,grad_norm: 0.9999999069207458, iteration: 133136
loss: 1.0060794353485107,grad_norm: 0.9999996146217914, iteration: 133137
loss: 1.039862871170044,grad_norm: 0.9999991725385243, iteration: 133138
loss: 0.9884732961654663,grad_norm: 0.9923761710781919, iteration: 133139
loss: 1.044219970703125,grad_norm: 0.999999339163547, iteration: 133140
loss: 0.983194887638092,grad_norm: 0.723772760824675, iteration: 133141
loss: 1.0352940559387207,grad_norm: 0.9999999612669452, iteration: 133142
loss: 0.9810783863067627,grad_norm: 0.9999990995652698, iteration: 133143
loss: 1.0904483795166016,grad_norm: 0.9999992575588191, iteration: 133144
loss: 1.0005857944488525,grad_norm: 0.8512867864901748, iteration: 133145
loss: 1.0537723302841187,grad_norm: 0.9999994031370133, iteration: 133146
loss: 1.0540250539779663,grad_norm: 0.9999991526130374, iteration: 133147
loss: 1.072696328163147,grad_norm: 0.9999995388748876, iteration: 133148
loss: 1.030860185623169,grad_norm: 0.9999992712775626, iteration: 133149
loss: 1.0602283477783203,grad_norm: 0.8791424105252391, iteration: 133150
loss: 1.0254641771316528,grad_norm: 0.9999992875639633, iteration: 133151
loss: 1.0448215007781982,grad_norm: 0.958080586331552, iteration: 133152
loss: 0.9894384741783142,grad_norm: 0.9706850011235266, iteration: 133153
loss: 1.1692707538604736,grad_norm: 0.9999998928109087, iteration: 133154
loss: 1.0671159029006958,grad_norm: 0.9999991460039596, iteration: 133155
loss: 1.0123096704483032,grad_norm: 0.7472712818244948, iteration: 133156
loss: 1.0354315042495728,grad_norm: 0.9308880946600273, iteration: 133157
loss: 1.1252992153167725,grad_norm: 0.9999990825202031, iteration: 133158
loss: 1.0591357946395874,grad_norm: 0.885890866873437, iteration: 133159
loss: 1.1274399757385254,grad_norm: 0.9999996227616178, iteration: 133160
loss: 1.092012643814087,grad_norm: 0.99999981611964, iteration: 133161
loss: 0.9957937598228455,grad_norm: 0.8366437410191175, iteration: 133162
loss: 1.033751130104065,grad_norm: 0.9999998707622711, iteration: 133163
loss: 1.381091594696045,grad_norm: 0.9999995051770004, iteration: 133164
loss: 1.0294551849365234,grad_norm: 0.9959015818040159, iteration: 133165
loss: 1.013939380645752,grad_norm: 0.9999991394699467, iteration: 133166
loss: 1.0965187549591064,grad_norm: 0.9999992152475785, iteration: 133167
loss: 1.176212191581726,grad_norm: 0.9999998760391495, iteration: 133168
loss: 1.1805390119552612,grad_norm: 0.9999999241115585, iteration: 133169
loss: 1.0109210014343262,grad_norm: 0.9999993772518431, iteration: 133170
loss: 1.101548194885254,grad_norm: 0.9999997307068067, iteration: 133171
loss: 1.0697970390319824,grad_norm: 0.9999995040752953, iteration: 133172
loss: 1.1621077060699463,grad_norm: 0.9999994336188368, iteration: 133173
loss: 1.6704089641571045,grad_norm: 0.9999999473460728, iteration: 133174
loss: 1.5873054265975952,grad_norm: 0.9999999906193584, iteration: 133175
loss: 1.2469549179077148,grad_norm: 1.0000000446158157, iteration: 133176
loss: 1.2007652521133423,grad_norm: 0.9757739233773204, iteration: 133177
loss: 1.4006115198135376,grad_norm: 0.9999992799727458, iteration: 133178
loss: 1.2509222030639648,grad_norm: 0.9999999076242743, iteration: 133179
loss: 1.2558146715164185,grad_norm: 0.9999993173857511, iteration: 133180
loss: 1.2281213998794556,grad_norm: 0.9999994845484176, iteration: 133181
loss: 1.081366777420044,grad_norm: 0.9999992741545665, iteration: 133182
loss: 1.1496155261993408,grad_norm: 0.999999436783877, iteration: 133183
loss: 1.207971215248108,grad_norm: 0.9999998189477335, iteration: 133184
loss: 1.3363502025604248,grad_norm: 0.9999998370622173, iteration: 133185
loss: 1.3860596418380737,grad_norm: 0.9999998768885843, iteration: 133186
loss: 1.157875657081604,grad_norm: 0.9999993303760378, iteration: 133187
loss: 1.4252827167510986,grad_norm: 0.9999997348001731, iteration: 133188
loss: 1.1440846920013428,grad_norm: 0.9999997816699195, iteration: 133189
loss: 1.4131584167480469,grad_norm: 0.9999998492960487, iteration: 133190
loss: 1.5267903804779053,grad_norm: 0.9999998507199457, iteration: 133191
loss: 1.3276877403259277,grad_norm: 0.9999998704650016, iteration: 133192
loss: 1.398207664489746,grad_norm: 0.9999999044378755, iteration: 133193
loss: 1.1993680000305176,grad_norm: 0.9999999067836806, iteration: 133194
loss: 1.1971274614334106,grad_norm: 0.9999998814100721, iteration: 133195
loss: 1.2575957775115967,grad_norm: 0.9999993055827175, iteration: 133196
loss: 1.1371718645095825,grad_norm: 0.9999995647033485, iteration: 133197
loss: 1.3834270238876343,grad_norm: 0.9999998115209249, iteration: 133198
loss: 1.1167360544204712,grad_norm: 0.9999994007534212, iteration: 133199
loss: 1.2404166460037231,grad_norm: 0.9999995526765524, iteration: 133200
loss: 1.5161845684051514,grad_norm: 0.9999998631060042, iteration: 133201
loss: 1.2279717922210693,grad_norm: 0.9999998160355925, iteration: 133202
loss: 1.2535885572433472,grad_norm: 0.9999994955991064, iteration: 133203
loss: 1.0262882709503174,grad_norm: 0.9999992224078534, iteration: 133204
loss: 1.0933334827423096,grad_norm: 0.9999994249073418, iteration: 133205
loss: 1.1636788845062256,grad_norm: 0.9999995409968263, iteration: 133206
loss: 1.0356800556182861,grad_norm: 0.9999994746931008, iteration: 133207
loss: 1.316833734512329,grad_norm: 0.9999997642363669, iteration: 133208
loss: 1.4181126356124878,grad_norm: 0.9999999162492953, iteration: 133209
loss: 1.105697512626648,grad_norm: 0.9999996994719149, iteration: 133210
loss: 1.2176098823547363,grad_norm: 0.9999998242131646, iteration: 133211
loss: 1.027146816253662,grad_norm: 0.99999915105288, iteration: 133212
loss: 1.1682182550430298,grad_norm: 0.9999995480541688, iteration: 133213
loss: 1.279660940170288,grad_norm: 0.9999998627309985, iteration: 133214
loss: 1.2084881067276,grad_norm: 1.0000000156643507, iteration: 133215
loss: 1.1622735261917114,grad_norm: 1.0000001504360496, iteration: 133216
loss: 1.1547119617462158,grad_norm: 0.9999993185704062, iteration: 133217
loss: 1.1231679916381836,grad_norm: 0.9999995246522675, iteration: 133218
loss: 1.2594645023345947,grad_norm: 0.9999998283419915, iteration: 133219
loss: 1.430713176727295,grad_norm: 0.9999997868713999, iteration: 133220
loss: 1.3793449401855469,grad_norm: 0.9999995589641342, iteration: 133221
loss: 1.2765944004058838,grad_norm: 0.9999995441718036, iteration: 133222
loss: 1.4120028018951416,grad_norm: 0.9999995121350846, iteration: 133223
loss: 1.182301640510559,grad_norm: 0.9999992932038663, iteration: 133224
loss: 1.1937037706375122,grad_norm: 0.9999997615287429, iteration: 133225
loss: 1.3510433435440063,grad_norm: 0.9999998380073947, iteration: 133226
loss: 1.0360156297683716,grad_norm: 0.9999991534866611, iteration: 133227
loss: 1.3366820812225342,grad_norm: 0.9999996129331925, iteration: 133228
loss: 1.2213799953460693,grad_norm: 0.9999996227265132, iteration: 133229
loss: 1.0264699459075928,grad_norm: 0.9765349372942637, iteration: 133230
loss: 1.183858036994934,grad_norm: 0.9999995832034309, iteration: 133231
loss: 1.1096279621124268,grad_norm: 0.9999992298732708, iteration: 133232
loss: 1.1127504110336304,grad_norm: 0.9999994876121071, iteration: 133233
loss: 1.2349745035171509,grad_norm: 0.9999998613890463, iteration: 133234
loss: 1.0801200866699219,grad_norm: 0.9999991971339909, iteration: 133235
loss: 1.2866324186325073,grad_norm: 0.999999839525983, iteration: 133236
loss: 1.2878177165985107,grad_norm: 0.9999997583631386, iteration: 133237
loss: 1.2251925468444824,grad_norm: 0.9999995300248238, iteration: 133238
loss: 1.1728321313858032,grad_norm: 0.9999997028021058, iteration: 133239
loss: 1.3007664680480957,grad_norm: 0.9999999323513304, iteration: 133240
loss: 1.126377820968628,grad_norm: 0.9999992441629824, iteration: 133241
loss: 1.085289478302002,grad_norm: 0.99999947591155, iteration: 133242
loss: 1.146421194076538,grad_norm: 0.9999994010327947, iteration: 133243
loss: 1.0504515171051025,grad_norm: 0.999999243614221, iteration: 133244
loss: 1.169036865234375,grad_norm: 0.9999991980305969, iteration: 133245
loss: 1.1656275987625122,grad_norm: 0.9999998899368662, iteration: 133246
loss: 1.1644728183746338,grad_norm: 0.9999997961918163, iteration: 133247
loss: 1.1613757610321045,grad_norm: 0.9999996638871641, iteration: 133248
loss: 1.224648356437683,grad_norm: 0.9999998496044873, iteration: 133249
loss: 1.3292087316513062,grad_norm: 0.9999995433836412, iteration: 133250
loss: 1.1732170581817627,grad_norm: 0.9999997082389238, iteration: 133251
loss: 1.3260143995285034,grad_norm: 0.9999997957302117, iteration: 133252
loss: 1.1327199935913086,grad_norm: 0.9999994572481118, iteration: 133253
loss: 1.1242530345916748,grad_norm: 0.9999994645372347, iteration: 133254
loss: 1.2227673530578613,grad_norm: 0.999999765946429, iteration: 133255
loss: 1.1884502172470093,grad_norm: 0.9999996701291769, iteration: 133256
loss: 1.1077148914337158,grad_norm: 0.9999993799347501, iteration: 133257
loss: 1.3299909830093384,grad_norm: 0.9999994261883426, iteration: 133258
loss: 1.0640113353729248,grad_norm: 0.9999991533842997, iteration: 133259
loss: 1.2895365953445435,grad_norm: 1.000000086100656, iteration: 133260
loss: 1.1177308559417725,grad_norm: 0.9999990316101439, iteration: 133261
loss: 1.1423193216323853,grad_norm: 0.999999606784332, iteration: 133262
loss: 1.1450852155685425,grad_norm: 0.9999993483693002, iteration: 133263
loss: 1.0610884428024292,grad_norm: 0.9999996479957325, iteration: 133264
loss: 1.1083053350448608,grad_norm: 0.9999995788098938, iteration: 133265
loss: 1.1425904035568237,grad_norm: 0.9999991642926314, iteration: 133266
loss: 1.1386198997497559,grad_norm: 0.9999993810973012, iteration: 133267
loss: 1.1510769128799438,grad_norm: 0.9999999608908653, iteration: 133268
loss: 1.1312885284423828,grad_norm: 0.9999993663954171, iteration: 133269
loss: 1.1126564741134644,grad_norm: 0.9999990301350898, iteration: 133270
loss: 1.1294537782669067,grad_norm: 0.999999073976222, iteration: 133271
loss: 1.2249902486801147,grad_norm: 0.9999999390598081, iteration: 133272
loss: 1.1792289018630981,grad_norm: 0.9999999062565869, iteration: 133273
loss: 1.0552310943603516,grad_norm: 0.9999999217339124, iteration: 133274
loss: 1.0379366874694824,grad_norm: 0.9999991794140978, iteration: 133275
loss: 1.0200960636138916,grad_norm: 0.9999990620124102, iteration: 133276
loss: 1.1579482555389404,grad_norm: 1.000000017864812, iteration: 133277
loss: 1.2392699718475342,grad_norm: 0.9999994177391688, iteration: 133278
loss: 1.061938762664795,grad_norm: 0.9999992788899691, iteration: 133279
loss: 1.0096290111541748,grad_norm: 0.6758471385586657, iteration: 133280
loss: 1.2640362977981567,grad_norm: 0.9999994600918589, iteration: 133281
loss: 1.120841383934021,grad_norm: 0.9999992946991025, iteration: 133282
loss: 1.0719501972198486,grad_norm: 0.9999994710141925, iteration: 133283
loss: 1.1079533100128174,grad_norm: 0.9999991767142533, iteration: 133284
loss: 1.1706715822219849,grad_norm: 0.999999330804801, iteration: 133285
loss: 1.2031115293502808,grad_norm: 0.9999993413210211, iteration: 133286
loss: 1.392261028289795,grad_norm: 0.9999998374940574, iteration: 133287
loss: 1.166411280632019,grad_norm: 0.9999999400445339, iteration: 133288
loss: 1.0523693561553955,grad_norm: 0.9999991579936026, iteration: 133289
loss: 1.270172119140625,grad_norm: 0.9999998952821266, iteration: 133290
loss: 1.0946338176727295,grad_norm: 1.0000000742884492, iteration: 133291
loss: 1.1108449697494507,grad_norm: 0.999999853715649, iteration: 133292
loss: 1.0685125589370728,grad_norm: 0.9999993850400832, iteration: 133293
loss: 1.0937645435333252,grad_norm: 0.9999993735813495, iteration: 133294
loss: 1.3187966346740723,grad_norm: 0.9999998163206781, iteration: 133295
loss: 1.3507437705993652,grad_norm: 0.9999999255061507, iteration: 133296
loss: 1.0953559875488281,grad_norm: 0.9999993148698929, iteration: 133297
loss: 1.0884634256362915,grad_norm: 0.999999188588184, iteration: 133298
loss: 1.1920809745788574,grad_norm: 0.9999993410210226, iteration: 133299
loss: 1.0680612325668335,grad_norm: 0.999999893674815, iteration: 133300
loss: 1.1701934337615967,grad_norm: 0.9999998307738799, iteration: 133301
loss: 1.05461847782135,grad_norm: 0.9999994644042169, iteration: 133302
loss: 1.1176644563674927,grad_norm: 0.9999993002518774, iteration: 133303
loss: 0.996661365032196,grad_norm: 0.920881690286781, iteration: 133304
loss: 1.2504775524139404,grad_norm: 0.9999990087088841, iteration: 133305
loss: 1.18111252784729,grad_norm: 1.0000000177078046, iteration: 133306
loss: 1.077850580215454,grad_norm: 0.9999993606235879, iteration: 133307
loss: 1.1629782915115356,grad_norm: 0.9999994521304316, iteration: 133308
loss: 1.2024964094161987,grad_norm: 0.9999998860631742, iteration: 133309
loss: 1.0724233388900757,grad_norm: 0.9769963563469313, iteration: 133310
loss: 1.0691169500350952,grad_norm: 0.9901249512332877, iteration: 133311
loss: 1.294216513633728,grad_norm: 0.9999999846744696, iteration: 133312
loss: 1.0361171960830688,grad_norm: 0.9999991601569586, iteration: 133313
loss: 1.1443684101104736,grad_norm: 0.9999994637121136, iteration: 133314
loss: 1.1573328971862793,grad_norm: 0.9999995296579576, iteration: 133315
loss: 1.2733500003814697,grad_norm: 0.9999997174809394, iteration: 133316
loss: 1.1544352769851685,grad_norm: 0.9999992176412924, iteration: 133317
loss: 1.0594453811645508,grad_norm: 0.9999997830251223, iteration: 133318
loss: 1.0161012411117554,grad_norm: 0.9336093240695552, iteration: 133319
loss: 1.0814130306243896,grad_norm: 0.9999990678747465, iteration: 133320
loss: 1.1522084474563599,grad_norm: 0.9999992437475894, iteration: 133321
loss: 1.0216938257217407,grad_norm: 0.9999993997476009, iteration: 133322
loss: 1.1062469482421875,grad_norm: 0.9999996497447033, iteration: 133323
loss: 1.286577582359314,grad_norm: 0.9999995675434845, iteration: 133324
loss: 1.0155318975448608,grad_norm: 0.9999992708018862, iteration: 133325
loss: 1.0469467639923096,grad_norm: 0.7854906104976008, iteration: 133326
loss: 1.132813572883606,grad_norm: 1.000000026259703, iteration: 133327
loss: 1.1267682313919067,grad_norm: 0.9999999003289572, iteration: 133328
loss: 1.3102256059646606,grad_norm: 0.9999998487850903, iteration: 133329
loss: 1.0899441242218018,grad_norm: 0.9999997774252685, iteration: 133330
loss: 1.3389064073562622,grad_norm: 0.9999996872384427, iteration: 133331
loss: 1.0493271350860596,grad_norm: 0.9999992074430346, iteration: 133332
loss: 1.0818321704864502,grad_norm: 0.9999993143685177, iteration: 133333
loss: 1.0417896509170532,grad_norm: 0.9999996195931445, iteration: 133334
loss: 1.0073509216308594,grad_norm: 0.9034645452307702, iteration: 133335
loss: 1.0388911962509155,grad_norm: 0.999999395558047, iteration: 133336
loss: 1.0769559144973755,grad_norm: 0.9999998826996824, iteration: 133337
loss: 1.1783440113067627,grad_norm: 0.9999996322611158, iteration: 133338
loss: 1.0715676546096802,grad_norm: 0.9999995678389221, iteration: 133339
loss: 1.1377240419387817,grad_norm: 0.9999998780957412, iteration: 133340
loss: 1.208449125289917,grad_norm: 0.999999497854847, iteration: 133341
loss: 1.0602247714996338,grad_norm: 0.9999992145127923, iteration: 133342
loss: 1.247726321220398,grad_norm: 0.9999998025298941, iteration: 133343
loss: 1.1002647876739502,grad_norm: 0.9999991547717754, iteration: 133344
loss: 1.2175759077072144,grad_norm: 0.9999998765874109, iteration: 133345
loss: 1.1351889371871948,grad_norm: 0.9999999765897778, iteration: 133346
loss: 1.036165714263916,grad_norm: 0.9999997526874728, iteration: 133347
loss: 1.0102572441101074,grad_norm: 0.8090629323014665, iteration: 133348
loss: 1.1214134693145752,grad_norm: 0.999999320119077, iteration: 133349
loss: 1.0694761276245117,grad_norm: 0.9999994720503231, iteration: 133350
loss: 1.260613203048706,grad_norm: 0.999999413114553, iteration: 133351
loss: 1.1085633039474487,grad_norm: 0.9999993377268176, iteration: 133352
loss: 1.0787160396575928,grad_norm: 0.9999991806807268, iteration: 133353
loss: 0.9980810880661011,grad_norm: 0.9373521422476527, iteration: 133354
loss: 1.06294846534729,grad_norm: 0.9999997894333489, iteration: 133355
loss: 1.011565089225769,grad_norm: 0.9999990289605147, iteration: 133356
loss: 1.2157856225967407,grad_norm: 0.999999927298271, iteration: 133357
loss: 1.0605854988098145,grad_norm: 0.9999992192248934, iteration: 133358
loss: 1.0670900344848633,grad_norm: 0.9999994596051347, iteration: 133359
loss: 1.0802717208862305,grad_norm: 0.9999993992942021, iteration: 133360
loss: 1.0817316770553589,grad_norm: 0.9999998690602013, iteration: 133361
loss: 1.0412064790725708,grad_norm: 0.999999026230312, iteration: 133362
loss: 1.0398125648498535,grad_norm: 0.914723605123743, iteration: 133363
loss: 1.0931110382080078,grad_norm: 0.9999996665952562, iteration: 133364
loss: 1.025660514831543,grad_norm: 0.8921703088399232, iteration: 133365
loss: 1.048045039176941,grad_norm: 0.9366784004811557, iteration: 133366
loss: 1.0285371541976929,grad_norm: 0.8828591189440602, iteration: 133367
loss: 1.056098222732544,grad_norm: 0.9930322440531534, iteration: 133368
loss: 1.121610403060913,grad_norm: 0.9999998816353091, iteration: 133369
loss: 1.0644454956054688,grad_norm: 0.999999149780064, iteration: 133370
loss: 1.0642977952957153,grad_norm: 0.9999992081957292, iteration: 133371
loss: 1.0411015748977661,grad_norm: 0.9553993239041791, iteration: 133372
loss: 1.1748234033584595,grad_norm: 1.0000000184965723, iteration: 133373
loss: 1.1767393350601196,grad_norm: 0.9999994532882449, iteration: 133374
loss: 1.3437066078186035,grad_norm: 0.9999998650652872, iteration: 133375
loss: 1.122036337852478,grad_norm: 0.9999993762512985, iteration: 133376
loss: 1.019822359085083,grad_norm: 0.9999994748118568, iteration: 133377
loss: 1.244691014289856,grad_norm: 0.9999998313934111, iteration: 133378
loss: 1.305134892463684,grad_norm: 0.9999999879790394, iteration: 133379
loss: 1.0850244760513306,grad_norm: 0.9999992587354283, iteration: 133380
loss: 1.0508934259414673,grad_norm: 0.9999998773755829, iteration: 133381
loss: 1.0559643507003784,grad_norm: 0.9999998078316114, iteration: 133382
loss: 0.980067253112793,grad_norm: 0.8878509353900942, iteration: 133383
loss: 1.1155284643173218,grad_norm: 0.9999991192342114, iteration: 133384
loss: 1.2503472566604614,grad_norm: 1.000000036904274, iteration: 133385
loss: 1.4455626010894775,grad_norm: 0.9999999729979124, iteration: 133386
loss: 1.4354205131530762,grad_norm: 0.9999999843060797, iteration: 133387
loss: 1.118140697479248,grad_norm: 0.9999997563885692, iteration: 133388
loss: 1.1855040788650513,grad_norm: 0.9999998831665922, iteration: 133389
loss: 1.1199672222137451,grad_norm: 0.9999994458462055, iteration: 133390
loss: 1.0716909170150757,grad_norm: 0.9999999700397707, iteration: 133391
loss: 1.179953932762146,grad_norm: 0.9999996538812079, iteration: 133392
loss: 1.4599288702011108,grad_norm: 0.999999960539468, iteration: 133393
loss: 1.1979478597640991,grad_norm: 0.999999991339081, iteration: 133394
loss: 1.039109468460083,grad_norm: 0.9999997375021634, iteration: 133395
loss: 1.1190564632415771,grad_norm: 0.9999997643156013, iteration: 133396
loss: 1.1373648643493652,grad_norm: 0.9999996755315137, iteration: 133397
loss: 1.200339436531067,grad_norm: 0.9999999036871268, iteration: 133398
loss: 1.3895057439804077,grad_norm: 0.999999724907925, iteration: 133399
loss: 1.1051708459854126,grad_norm: 0.9999994540173021, iteration: 133400
loss: 1.0562554597854614,grad_norm: 0.9999994780944709, iteration: 133401
loss: 1.127444863319397,grad_norm: 0.9999994074428477, iteration: 133402
loss: 1.0726884603500366,grad_norm: 0.9999994655002018, iteration: 133403
loss: 1.163891077041626,grad_norm: 0.9999992486965544, iteration: 133404
loss: 1.1497108936309814,grad_norm: 0.9999997543078668, iteration: 133405
loss: 1.3025028705596924,grad_norm: 0.9999996383941059, iteration: 133406
loss: 1.205775499343872,grad_norm: 0.9999992073454519, iteration: 133407
loss: 1.1857705116271973,grad_norm: 0.9999995477187376, iteration: 133408
loss: 1.0905662775039673,grad_norm: 0.99999923220174, iteration: 133409
loss: 1.1087961196899414,grad_norm: 0.9999990836191187, iteration: 133410
loss: 1.1459848880767822,grad_norm: 0.999999891437353, iteration: 133411
loss: 1.3349440097808838,grad_norm: 0.9999995713703259, iteration: 133412
loss: 1.0638599395751953,grad_norm: 0.999999004678175, iteration: 133413
loss: 1.2189265489578247,grad_norm: 0.9999992768919387, iteration: 133414
loss: 1.092571496963501,grad_norm: 0.9999997146195405, iteration: 133415
loss: 1.181056022644043,grad_norm: 0.9999998450316522, iteration: 133416
loss: 1.077432632446289,grad_norm: 0.9999993105970579, iteration: 133417
loss: 1.051736831665039,grad_norm: 0.9999993861500187, iteration: 133418
loss: 1.134350061416626,grad_norm: 0.999999519452614, iteration: 133419
loss: 1.0514464378356934,grad_norm: 0.9999991718205755, iteration: 133420
loss: 1.218443512916565,grad_norm: 0.9999997763336691, iteration: 133421
loss: 1.0283007621765137,grad_norm: 0.9999999556362384, iteration: 133422
loss: 1.0030051469802856,grad_norm: 0.8627027426218161, iteration: 133423
loss: 1.1762975454330444,grad_norm: 0.9999995741643224, iteration: 133424
loss: 1.039704442024231,grad_norm: 0.9999994240215512, iteration: 133425
loss: 1.0574817657470703,grad_norm: 0.9999991779136069, iteration: 133426
loss: 1.098024606704712,grad_norm: 0.9999994676293728, iteration: 133427
loss: 1.2189100980758667,grad_norm: 0.9999995457144365, iteration: 133428
loss: 1.1425367593765259,grad_norm: 0.9999997064829765, iteration: 133429
loss: 1.192349910736084,grad_norm: 0.9999997496133746, iteration: 133430
loss: 1.0840984582901,grad_norm: 0.9999996926853452, iteration: 133431
loss: 1.0774425268173218,grad_norm: 0.9999998657403255, iteration: 133432
loss: 1.0814898014068604,grad_norm: 0.9999997138542465, iteration: 133433
loss: 1.1620681285858154,grad_norm: 0.9999992725432839, iteration: 133434
loss: 1.078818917274475,grad_norm: 0.999999631252248, iteration: 133435
loss: 1.1231898069381714,grad_norm: 0.9999998460924006, iteration: 133436
loss: 1.227353572845459,grad_norm: 0.9999996897202486, iteration: 133437
loss: 1.0873279571533203,grad_norm: 0.9999999645586681, iteration: 133438
loss: 1.0585569143295288,grad_norm: 0.9999993470216438, iteration: 133439
loss: 1.06298828125,grad_norm: 0.9999997491740711, iteration: 133440
loss: 1.053933024406433,grad_norm: 0.9999992998896517, iteration: 133441
loss: 1.0698349475860596,grad_norm: 0.8321231956064042, iteration: 133442
loss: 1.0862972736358643,grad_norm: 0.999999623850413, iteration: 133443
loss: 1.0188486576080322,grad_norm: 0.9381451486464761, iteration: 133444
loss: 1.046349048614502,grad_norm: 0.9999993083793703, iteration: 133445
loss: 1.1982110738754272,grad_norm: 0.9999992258898335, iteration: 133446
loss: 0.9959278106689453,grad_norm: 0.9999989596131507, iteration: 133447
loss: 1.0700546503067017,grad_norm: 0.9999992420036193, iteration: 133448
loss: 1.1034656763076782,grad_norm: 0.9999990686298417, iteration: 133449
loss: 1.0490401983261108,grad_norm: 0.9999991647764234, iteration: 133450
loss: 1.1339489221572876,grad_norm: 0.9999996443332723, iteration: 133451
loss: 1.1442642211914062,grad_norm: 0.999999660401972, iteration: 133452
loss: 1.0167713165283203,grad_norm: 0.9999992121276636, iteration: 133453
loss: 1.0871856212615967,grad_norm: 0.999999085501488, iteration: 133454
loss: 1.1467655897140503,grad_norm: 0.9999999134670214, iteration: 133455
loss: 1.1600385904312134,grad_norm: 0.9999997500837681, iteration: 133456
loss: 1.0281256437301636,grad_norm: 0.999999840927004, iteration: 133457
loss: 1.2966818809509277,grad_norm: 0.9999994684178212, iteration: 133458
loss: 1.128702163696289,grad_norm: 0.9999998912603506, iteration: 133459
loss: 1.292651891708374,grad_norm: 0.9999994636586224, iteration: 133460
loss: 1.0352396965026855,grad_norm: 0.9862993114427498, iteration: 133461
loss: 1.1725057363510132,grad_norm: 0.9999997764033848, iteration: 133462
loss: 1.0703240633010864,grad_norm: 0.9999995763776627, iteration: 133463
loss: 1.086013913154602,grad_norm: 0.9999996967282593, iteration: 133464
loss: 1.0645568370819092,grad_norm: 0.9999997067326517, iteration: 133465
loss: 1.2527170181274414,grad_norm: 0.9999999231181959, iteration: 133466
loss: 1.1036994457244873,grad_norm: 0.9999998607488633, iteration: 133467
loss: 1.1186219453811646,grad_norm: 0.9999991914979401, iteration: 133468
loss: 1.0338836908340454,grad_norm: 0.9999992992048448, iteration: 133469
loss: 0.9939824342727661,grad_norm: 0.999999521235695, iteration: 133470
loss: 1.0921111106872559,grad_norm: 0.9999995936521473, iteration: 133471
loss: 1.0642848014831543,grad_norm: 0.9999993749958734, iteration: 133472
loss: 1.0750006437301636,grad_norm: 0.9999994651159035, iteration: 133473
loss: 1.0782291889190674,grad_norm: 0.9999999833231078, iteration: 133474
loss: 0.9937011003494263,grad_norm: 0.9772767601719873, iteration: 133475
loss: 1.1169248819351196,grad_norm: 0.9999995943129005, iteration: 133476
loss: 1.0719655752182007,grad_norm: 0.9999991194070458, iteration: 133477
loss: 1.1275724172592163,grad_norm: 1.0000000065991665, iteration: 133478
loss: 1.1940468549728394,grad_norm: 0.9999996037816903, iteration: 133479
loss: 1.0970735549926758,grad_norm: 0.9999994120517932, iteration: 133480
loss: 1.1483761072158813,grad_norm: 0.9999995240088501, iteration: 133481
loss: 1.080946922302246,grad_norm: 0.9999998724804565, iteration: 133482
loss: 1.1414505243301392,grad_norm: 0.9999990845189458, iteration: 133483
loss: 1.2921165227890015,grad_norm: 0.999999863889822, iteration: 133484
loss: 1.3705034255981445,grad_norm: 0.999999718548293, iteration: 133485
loss: 1.3938500881195068,grad_norm: 0.9999994588609615, iteration: 133486
loss: 1.4051165580749512,grad_norm: 0.9999993489315833, iteration: 133487
loss: 1.531403660774231,grad_norm: 0.999999681599015, iteration: 133488
loss: 1.4806923866271973,grad_norm: 0.9999998163945066, iteration: 133489
loss: 1.4695756435394287,grad_norm: 0.9999997352257349, iteration: 133490
loss: 1.4012279510498047,grad_norm: 0.9999999404025418, iteration: 133491
loss: 1.3602505922317505,grad_norm: 0.9999991920282222, iteration: 133492
loss: 1.487242341041565,grad_norm: 0.9999997300567169, iteration: 133493
loss: 1.4734246730804443,grad_norm: 0.9999999233003498, iteration: 133494
loss: 1.4035676717758179,grad_norm: 0.9999995072722765, iteration: 133495
loss: 1.478101372718811,grad_norm: 0.9999996850693281, iteration: 133496
loss: 1.5991193056106567,grad_norm: 0.9999999330078736, iteration: 133497
loss: 1.3927576541900635,grad_norm: 0.9999996392620637, iteration: 133498
loss: 1.2526017427444458,grad_norm: 0.9999994684608716, iteration: 133499
loss: 1.179517149925232,grad_norm: 0.9999998050422275, iteration: 133500
loss: 1.1242259740829468,grad_norm: 0.9999992471714714, iteration: 133501
loss: 1.0843855142593384,grad_norm: 1.0000000156153779, iteration: 133502
loss: 1.1022650003433228,grad_norm: 0.9999996594430094, iteration: 133503
loss: 1.2232372760772705,grad_norm: 0.9999997521341276, iteration: 133504
loss: 1.0590265989303589,grad_norm: 0.9999991224780742, iteration: 133505
loss: 1.1505333185195923,grad_norm: 0.9999997035817982, iteration: 133506
loss: 1.1541842222213745,grad_norm: 0.9999997284723653, iteration: 133507
loss: 1.0907447338104248,grad_norm: 0.9531270178657515, iteration: 133508
loss: 1.0726279020309448,grad_norm: 0.9999992199828651, iteration: 133509
loss: 1.1093759536743164,grad_norm: 0.9999995484243818, iteration: 133510
loss: 1.160630226135254,grad_norm: 0.9999999138555921, iteration: 133511
loss: 1.0856049060821533,grad_norm: 0.9999993092105959, iteration: 133512
loss: 1.0986747741699219,grad_norm: 0.9999999879873872, iteration: 133513
loss: 1.0017122030258179,grad_norm: 0.9999995385824028, iteration: 133514
loss: 1.1216312646865845,grad_norm: 0.999999707216141, iteration: 133515
loss: 1.0066704750061035,grad_norm: 0.9999996190965302, iteration: 133516
loss: 1.019881010055542,grad_norm: 0.9999991486108806, iteration: 133517
loss: 1.1165231466293335,grad_norm: 0.9999998348760111, iteration: 133518
loss: 1.0167375802993774,grad_norm: 0.9999992239061359, iteration: 133519
loss: 1.0982238054275513,grad_norm: 0.9999998924056258, iteration: 133520
loss: 1.0985206365585327,grad_norm: 0.9999998914246219, iteration: 133521
loss: 1.0112913846969604,grad_norm: 0.9999996928286652, iteration: 133522
loss: 1.300783395767212,grad_norm: 0.9999999103416678, iteration: 133523
loss: 1.1310654878616333,grad_norm: 0.9999992849662914, iteration: 133524
loss: 1.045226812362671,grad_norm: 0.999999802756287, iteration: 133525
loss: 1.0675283670425415,grad_norm: 0.9847058596927272, iteration: 133526
loss: 1.065035343170166,grad_norm: 0.9999995294601458, iteration: 133527
loss: 1.1121946573257446,grad_norm: 0.9999998950947433, iteration: 133528
loss: 1.0128326416015625,grad_norm: 0.9999996164504746, iteration: 133529
loss: 1.1178241968154907,grad_norm: 0.9999993117860629, iteration: 133530
loss: 1.1349363327026367,grad_norm: 0.9999999032867163, iteration: 133531
loss: 1.134676218032837,grad_norm: 0.9999998039059502, iteration: 133532
loss: 1.1044968366622925,grad_norm: 0.9999999500986738, iteration: 133533
loss: 1.161734700202942,grad_norm: 0.9999994484248718, iteration: 133534
loss: 1.014032006263733,grad_norm: 0.9999992388600646, iteration: 133535
loss: 0.9949126839637756,grad_norm: 0.9999994586341957, iteration: 133536
loss: 1.1821966171264648,grad_norm: 0.9999998619292852, iteration: 133537
loss: 1.1341713666915894,grad_norm: 0.9999999035898793, iteration: 133538
loss: 1.2416272163391113,grad_norm: 1.0000000583586794, iteration: 133539
loss: 1.1204861402511597,grad_norm: 0.9491581792487565, iteration: 133540
loss: 0.963672935962677,grad_norm: 0.9999990601062815, iteration: 133541
loss: 1.053848147392273,grad_norm: 0.9725410483223494, iteration: 133542
loss: 1.0267170667648315,grad_norm: 0.8580860878314421, iteration: 133543
loss: 1.0149458646774292,grad_norm: 0.9999992619328465, iteration: 133544
loss: 1.0128796100616455,grad_norm: 0.9999996460526189, iteration: 133545
loss: 1.0281164646148682,grad_norm: 0.9999996440852412, iteration: 133546
loss: 1.0771496295928955,grad_norm: 0.9999999144077216, iteration: 133547
loss: 0.9988825917243958,grad_norm: 0.9932973463205299, iteration: 133548
loss: 1.1098346710205078,grad_norm: 0.9999998561569745, iteration: 133549
loss: 1.0180578231811523,grad_norm: 0.939482243433683, iteration: 133550
loss: 1.0752092599868774,grad_norm: 0.9999992870195671, iteration: 133551
loss: 1.0756630897521973,grad_norm: 0.9999992441388622, iteration: 133552
loss: 1.0415520668029785,grad_norm: 0.8718136028131387, iteration: 133553
loss: 1.011287808418274,grad_norm: 0.9462637796740542, iteration: 133554
loss: 1.0473780632019043,grad_norm: 0.9999990715370236, iteration: 133555
loss: 1.1425987482070923,grad_norm: 0.9999992492624534, iteration: 133556
loss: 1.0000909566879272,grad_norm: 0.999999467645862, iteration: 133557
loss: 1.0280702114105225,grad_norm: 0.8172413030492803, iteration: 133558
loss: 1.1222589015960693,grad_norm: 0.9999993273926715, iteration: 133559
loss: 1.0088015794754028,grad_norm: 0.9233136587941089, iteration: 133560
loss: 1.0282269716262817,grad_norm: 0.999999321377921, iteration: 133561
loss: 1.0238300561904907,grad_norm: 0.9999992736137674, iteration: 133562
loss: 1.021201252937317,grad_norm: 0.7715012247858547, iteration: 133563
loss: 1.0501480102539062,grad_norm: 0.9514721631136919, iteration: 133564
loss: 0.9717664122581482,grad_norm: 0.7788794070088532, iteration: 133565
loss: 1.004030704498291,grad_norm: 0.999999122540571, iteration: 133566
loss: 1.046788215637207,grad_norm: 0.9616880023504298, iteration: 133567
loss: 1.0270392894744873,grad_norm: 0.9999991687858438, iteration: 133568
loss: 1.0395827293395996,grad_norm: 0.9999994798368227, iteration: 133569
loss: 0.993489146232605,grad_norm: 0.8932047374959013, iteration: 133570
loss: 1.0071390867233276,grad_norm: 0.9999998707915304, iteration: 133571
loss: 1.0364590883255005,grad_norm: 0.9999995976003331, iteration: 133572
loss: 1.0479905605316162,grad_norm: 0.9999990629379647, iteration: 133573
loss: 1.0211340188980103,grad_norm: 0.9972038339837979, iteration: 133574
loss: 1.0237336158752441,grad_norm: 0.9999999380564883, iteration: 133575
loss: 1.1075841188430786,grad_norm: 0.9999997519496565, iteration: 133576
loss: 0.9944677948951721,grad_norm: 0.9360962769469987, iteration: 133577
loss: 1.0074880123138428,grad_norm: 0.9999998042998842, iteration: 133578
loss: 1.021561622619629,grad_norm: 0.9447670411481284, iteration: 133579
loss: 1.0519771575927734,grad_norm: 0.9118246656996918, iteration: 133580
loss: 1.1193069219589233,grad_norm: 0.9999993991385255, iteration: 133581
loss: 1.0352314710617065,grad_norm: 0.9999992770741343, iteration: 133582
loss: 0.979423999786377,grad_norm: 0.8203876900739162, iteration: 133583
loss: 1.058565616607666,grad_norm: 0.9999996040833765, iteration: 133584
loss: 1.2281179428100586,grad_norm: 0.9999999234692736, iteration: 133585
loss: 1.0175446271896362,grad_norm: 0.9999996132823962, iteration: 133586
loss: 1.1326842308044434,grad_norm: 0.9999998486142218, iteration: 133587
loss: 0.9989503026008606,grad_norm: 0.8894179751553888, iteration: 133588
loss: 1.0430163145065308,grad_norm: 0.757715241410011, iteration: 133589
loss: 0.9905642867088318,grad_norm: 0.9999994980542805, iteration: 133590
loss: 1.029431939125061,grad_norm: 0.9595031069079207, iteration: 133591
loss: 0.9844806790351868,grad_norm: 0.8279029341602027, iteration: 133592
loss: 0.9811859130859375,grad_norm: 0.9176242227364616, iteration: 133593
loss: 1.1041157245635986,grad_norm: 0.9999998679266006, iteration: 133594
loss: 0.9989721775054932,grad_norm: 0.9314498950901773, iteration: 133595
loss: 0.9836037755012512,grad_norm: 0.8663978861130142, iteration: 133596
loss: 1.0264484882354736,grad_norm: 0.9999990077501247, iteration: 133597
loss: 0.9916638731956482,grad_norm: 0.974530023668206, iteration: 133598
loss: 1.1099019050598145,grad_norm: 0.9999998692638088, iteration: 133599
loss: 1.0297203063964844,grad_norm: 0.9542381850113347, iteration: 133600
loss: 1.0295417308807373,grad_norm: 0.8559546752009146, iteration: 133601
loss: 1.004311442375183,grad_norm: 0.9999992061261935, iteration: 133602
loss: 1.0298737287521362,grad_norm: 0.9609377549174326, iteration: 133603
loss: 1.0683832168579102,grad_norm: 0.9999992847931152, iteration: 133604
loss: 1.0188556909561157,grad_norm: 0.9999994439990365, iteration: 133605
loss: 0.9900860786437988,grad_norm: 0.8254026720694441, iteration: 133606
loss: 1.0544731616973877,grad_norm: 0.9999991438449625, iteration: 133607
loss: 1.0028636455535889,grad_norm: 0.7795939381140817, iteration: 133608
loss: 1.0207414627075195,grad_norm: 0.9999995113637544, iteration: 133609
loss: 1.0106199979782104,grad_norm: 0.9257116597298952, iteration: 133610
loss: 0.9972617626190186,grad_norm: 0.8532312845580389, iteration: 133611
loss: 1.0062437057495117,grad_norm: 0.9299318465971302, iteration: 133612
loss: 1.04920494556427,grad_norm: 0.9538146661958105, iteration: 133613
loss: 1.0091419219970703,grad_norm: 0.9999992234288724, iteration: 133614
loss: 1.0141277313232422,grad_norm: 0.9999996642266131, iteration: 133615
loss: 1.133623719215393,grad_norm: 0.9999991334854721, iteration: 133616
loss: 0.9592419862747192,grad_norm: 0.9522963425619604, iteration: 133617
loss: 1.04271399974823,grad_norm: 0.9999999704502224, iteration: 133618
loss: 1.007544755935669,grad_norm: 0.9999997862917614, iteration: 133619
loss: 1.063270092010498,grad_norm: 0.8826506849916491, iteration: 133620
loss: 1.203492522239685,grad_norm: 0.9999998454066241, iteration: 133621
loss: 1.0717359781265259,grad_norm: 0.9999997810621619, iteration: 133622
loss: 1.037689208984375,grad_norm: 0.9999993046669107, iteration: 133623
loss: 1.0185502767562866,grad_norm: 0.9999999315047998, iteration: 133624
loss: 1.017279863357544,grad_norm: 0.9999992110869951, iteration: 133625
loss: 1.0342615842819214,grad_norm: 0.9999990570138594, iteration: 133626
loss: 0.9920772910118103,grad_norm: 0.9999999436361435, iteration: 133627
loss: 1.035346269607544,grad_norm: 0.9999995179738042, iteration: 133628
loss: 1.0800423622131348,grad_norm: 0.9999992939244732, iteration: 133629
loss: 1.0819802284240723,grad_norm: 0.9999995162487887, iteration: 133630
loss: 1.0000741481781006,grad_norm: 0.9557164922855452, iteration: 133631
loss: 1.0526657104492188,grad_norm: 0.9999993382438829, iteration: 133632
loss: 1.0407953262329102,grad_norm: 0.8449729523835107, iteration: 133633
loss: 1.0678032636642456,grad_norm: 0.9999990525964537, iteration: 133634
loss: 1.073712944984436,grad_norm: 0.9999992887520478, iteration: 133635
loss: 0.99578458070755,grad_norm: 0.8918763594153597, iteration: 133636
loss: 0.9672935009002686,grad_norm: 0.8104817388672262, iteration: 133637
loss: 1.1177181005477905,grad_norm: 0.9999993051368051, iteration: 133638
loss: 0.9617419242858887,grad_norm: 0.9360967108711065, iteration: 133639
loss: 1.0360366106033325,grad_norm: 0.9999995790850439, iteration: 133640
loss: 0.9906097650527954,grad_norm: 0.9999998577034985, iteration: 133641
loss: 0.9746046662330627,grad_norm: 0.7135052540421464, iteration: 133642
loss: 1.0765745639801025,grad_norm: 0.9999996273682535, iteration: 133643
loss: 1.143651008605957,grad_norm: 1.000000006230743, iteration: 133644
loss: 1.0511101484298706,grad_norm: 0.9999991470848352, iteration: 133645
loss: 1.246668815612793,grad_norm: 0.9999997833746497, iteration: 133646
loss: 1.0279920101165771,grad_norm: 0.999999798072861, iteration: 133647
loss: 1.0137152671813965,grad_norm: 0.9999999361043961, iteration: 133648
loss: 1.0953853130340576,grad_norm: 0.9999998150314027, iteration: 133649
loss: 0.9895374178886414,grad_norm: 0.9840634036975322, iteration: 133650
loss: 1.20316743850708,grad_norm: 0.9999995793601204, iteration: 133651
loss: 1.001660704612732,grad_norm: 0.9999991886670139, iteration: 133652
loss: 0.9847121834754944,grad_norm: 0.9958722471507415, iteration: 133653
loss: 1.1023359298706055,grad_norm: 0.9999998270699384, iteration: 133654
loss: 1.088798999786377,grad_norm: 0.9413807448699278, iteration: 133655
loss: 0.9697609543800354,grad_norm: 0.9999993464170727, iteration: 133656
loss: 1.0253525972366333,grad_norm: 0.999999023716261, iteration: 133657
loss: 1.0602394342422485,grad_norm: 0.9999992509970145, iteration: 133658
loss: 1.0099821090698242,grad_norm: 0.9999994284291256, iteration: 133659
loss: 1.063356637954712,grad_norm: 0.9408837181124878, iteration: 133660
loss: 1.083796501159668,grad_norm: 0.9999998158078701, iteration: 133661
loss: 1.1265729665756226,grad_norm: 0.9999996019598394, iteration: 133662
loss: 0.9744992852210999,grad_norm: 0.9853423702432096, iteration: 133663
loss: 1.0822099447250366,grad_norm: 0.9999996204519124, iteration: 133664
loss: 1.009270191192627,grad_norm: 0.9999991101692322, iteration: 133665
loss: 1.0218933820724487,grad_norm: 0.9999995255678207, iteration: 133666
loss: 1.0061819553375244,grad_norm: 0.9999990582137016, iteration: 133667
loss: 1.0141961574554443,grad_norm: 0.999999021762552, iteration: 133668
loss: 1.0878021717071533,grad_norm: 0.999998964655935, iteration: 133669
loss: 1.023880124092102,grad_norm: 0.9707945497023109, iteration: 133670
loss: 1.1039917469024658,grad_norm: 0.999999559233541, iteration: 133671
loss: 0.9929754734039307,grad_norm: 0.8597001314723085, iteration: 133672
loss: 0.9984098672866821,grad_norm: 0.9999991223573786, iteration: 133673
loss: 1.1469147205352783,grad_norm: 0.999999726317808, iteration: 133674
loss: 1.0801894664764404,grad_norm: 0.999999389944072, iteration: 133675
loss: 1.0599662065505981,grad_norm: 0.9999994275758488, iteration: 133676
loss: 0.9664230942726135,grad_norm: 0.8480460250027234, iteration: 133677
loss: 1.098239779472351,grad_norm: 0.9999993381646569, iteration: 133678
loss: 1.0552595853805542,grad_norm: 0.9476788917932322, iteration: 133679
loss: 1.0013537406921387,grad_norm: 0.8491258047898874, iteration: 133680
loss: 1.1380870342254639,grad_norm: 0.9999991621983253, iteration: 133681
loss: 1.065394401550293,grad_norm: 0.9999997531685283, iteration: 133682
loss: 1.0623304843902588,grad_norm: 0.8347312247926097, iteration: 133683
loss: 1.0129307508468628,grad_norm: 0.9999994020228614, iteration: 133684
loss: 1.0603327751159668,grad_norm: 0.9999994448905742, iteration: 133685
loss: 0.9738467931747437,grad_norm: 0.9999993626107504, iteration: 133686
loss: 0.9805107712745667,grad_norm: 0.866319796214109, iteration: 133687
loss: 1.0403518676757812,grad_norm: 0.9999994013292816, iteration: 133688
loss: 1.016602873802185,grad_norm: 0.9750160079184638, iteration: 133689
loss: 1.0060479640960693,grad_norm: 0.9999998332209867, iteration: 133690
loss: 1.0448135137557983,grad_norm: 0.9999995269905395, iteration: 133691
loss: 1.0222855806350708,grad_norm: 0.9480270873062246, iteration: 133692
loss: 0.972412645816803,grad_norm: 0.8727350697445191, iteration: 133693
loss: 1.0309604406356812,grad_norm: 0.7506720760568698, iteration: 133694
loss: 1.0789141654968262,grad_norm: 0.9999992216006377, iteration: 133695
loss: 0.971762478351593,grad_norm: 0.9051783327528928, iteration: 133696
loss: 1.1576271057128906,grad_norm: 0.9999998698259588, iteration: 133697
loss: 1.076670527458191,grad_norm: 0.8746410638618247, iteration: 133698
loss: 1.1013466119766235,grad_norm: 0.9999999495673813, iteration: 133699
loss: 1.065567135810852,grad_norm: 0.9999994241905923, iteration: 133700
loss: 1.2877731323242188,grad_norm: 0.9999992752758945, iteration: 133701
loss: 1.02202308177948,grad_norm: 0.7979199982503546, iteration: 133702
loss: 1.0053107738494873,grad_norm: 0.9999999361856453, iteration: 133703
loss: 1.0313266515731812,grad_norm: 0.8605707085552952, iteration: 133704
loss: 1.1202160120010376,grad_norm: 0.9999994922288191, iteration: 133705
loss: 1.0793633460998535,grad_norm: 0.8679671109678246, iteration: 133706
loss: 1.0443180799484253,grad_norm: 0.999999213380995, iteration: 133707
loss: 1.0091267824172974,grad_norm: 0.9308000530472711, iteration: 133708
loss: 0.9983751177787781,grad_norm: 0.9999992699922512, iteration: 133709
loss: 0.9842721819877625,grad_norm: 0.9287882107422952, iteration: 133710
loss: 1.252355933189392,grad_norm: 0.9999996826638442, iteration: 133711
loss: 1.0350810289382935,grad_norm: 0.9999999256545281, iteration: 133712
loss: 1.0175392627716064,grad_norm: 0.8270219970122393, iteration: 133713
loss: 1.0218144655227661,grad_norm: 0.9999991649540768, iteration: 133714
loss: 1.0860583782196045,grad_norm: 0.9999994986321146, iteration: 133715
loss: 1.060113549232483,grad_norm: 0.9999995933084344, iteration: 133716
loss: 1.0494384765625,grad_norm: 0.9999999543360776, iteration: 133717
loss: 1.0562726259231567,grad_norm: 0.9999990630608371, iteration: 133718
loss: 1.0378485918045044,grad_norm: 0.9999997666585544, iteration: 133719
loss: 1.0387314558029175,grad_norm: 0.8562874802174922, iteration: 133720
loss: 1.02303946018219,grad_norm: 0.9999994671295601, iteration: 133721
loss: 1.02065110206604,grad_norm: 0.9999992438357403, iteration: 133722
loss: 1.0829442739486694,grad_norm: 0.9999994518332624, iteration: 133723
loss: 1.0955640077590942,grad_norm: 0.9999995730366479, iteration: 133724
loss: 1.032970905303955,grad_norm: 0.9999992812859748, iteration: 133725
loss: 1.0519194602966309,grad_norm: 0.9999991157610738, iteration: 133726
loss: 1.147168517112732,grad_norm: 0.9999995997040889, iteration: 133727
loss: 1.0359915494918823,grad_norm: 0.9999992261440176, iteration: 133728
loss: 0.9958699345588684,grad_norm: 0.9999997142449725, iteration: 133729
loss: 1.089464783668518,grad_norm: 0.9999992526764764, iteration: 133730
loss: 1.23332679271698,grad_norm: 0.9999998446378325, iteration: 133731
loss: 1.160759687423706,grad_norm: 0.9999993803898171, iteration: 133732
loss: 1.023442029953003,grad_norm: 0.9999995558466328, iteration: 133733
loss: 1.1187186241149902,grad_norm: 0.9999999133852903, iteration: 133734
loss: 1.0062780380249023,grad_norm: 0.8688933148741562, iteration: 133735
loss: 0.9875971674919128,grad_norm: 0.9079101247747884, iteration: 133736
loss: 1.1314358711242676,grad_norm: 0.9999998588902368, iteration: 133737
loss: 1.0281964540481567,grad_norm: 0.9999991560829997, iteration: 133738
loss: 1.019446611404419,grad_norm: 0.9476827277624003, iteration: 133739
loss: 1.0518279075622559,grad_norm: 0.9999997409945697, iteration: 133740
loss: 1.164007544517517,grad_norm: 0.999999621118408, iteration: 133741
loss: 0.9991914629936218,grad_norm: 0.81204134956953, iteration: 133742
loss: 1.1167418956756592,grad_norm: 0.9999992963223605, iteration: 133743
loss: 1.0063669681549072,grad_norm: 0.9999992723235781, iteration: 133744
loss: 1.029849886894226,grad_norm: 0.9980050514810789, iteration: 133745
loss: 1.0374568700790405,grad_norm: 0.9999996479593977, iteration: 133746
loss: 1.0562465190887451,grad_norm: 0.9999994550677745, iteration: 133747
loss: 1.070981502532959,grad_norm: 0.9999999727854851, iteration: 133748
loss: 1.1011923551559448,grad_norm: 0.9999998005792061, iteration: 133749
loss: 1.0372241735458374,grad_norm: 0.9999992315906696, iteration: 133750
loss: 1.0726712942123413,grad_norm: 0.9999992104347201, iteration: 133751
loss: 1.0598608255386353,grad_norm: 0.9999999981438925, iteration: 133752
loss: 1.016686201095581,grad_norm: 0.9999991120344391, iteration: 133753
loss: 1.0637253522872925,grad_norm: 0.99999986513633, iteration: 133754
loss: 0.9824793934822083,grad_norm: 0.9999992865473073, iteration: 133755
loss: 0.993191123008728,grad_norm: 0.9090359207571492, iteration: 133756
loss: 1.063405156135559,grad_norm: 0.999999297273433, iteration: 133757
loss: 1.025558352470398,grad_norm: 0.9999990307937547, iteration: 133758
loss: 0.9683857560157776,grad_norm: 0.999999203470564, iteration: 133759
loss: 1.0977784395217896,grad_norm: 0.9999992194157566, iteration: 133760
loss: 1.160040020942688,grad_norm: 0.9999996985599771, iteration: 133761
loss: 0.9958410263061523,grad_norm: 0.9075483800845381, iteration: 133762
loss: 1.2075810432434082,grad_norm: 0.9999995898957281, iteration: 133763
loss: 1.036908507347107,grad_norm: 0.9809035820630222, iteration: 133764
loss: 1.0509849786758423,grad_norm: 0.9999991747462927, iteration: 133765
loss: 1.0343385934829712,grad_norm: 0.9999991354621965, iteration: 133766
loss: 1.131350040435791,grad_norm: 0.999999945177387, iteration: 133767
loss: 1.0269347429275513,grad_norm: 0.9966582014779802, iteration: 133768
loss: 1.125649333000183,grad_norm: 0.9999998862474501, iteration: 133769
loss: 0.9890387654304504,grad_norm: 0.9999992288872811, iteration: 133770
loss: 1.0233027935028076,grad_norm: 0.9999996640331168, iteration: 133771
loss: 0.99573814868927,grad_norm: 0.9999991818894767, iteration: 133772
loss: 1.0602693557739258,grad_norm: 0.9999992159741132, iteration: 133773
loss: 1.1291347742080688,grad_norm: 0.9982451639157186, iteration: 133774
loss: 1.107877492904663,grad_norm: 0.9999995107634185, iteration: 133775
loss: 1.0493911504745483,grad_norm: 0.9999998128464092, iteration: 133776
loss: 0.9927546381950378,grad_norm: 0.9999990654684381, iteration: 133777
loss: 1.0327852964401245,grad_norm: 0.9899427574696951, iteration: 133778
loss: 1.0586296319961548,grad_norm: 0.9999999644932666, iteration: 133779
loss: 1.090850830078125,grad_norm: 0.9999993611148356, iteration: 133780
loss: 1.0157408714294434,grad_norm: 0.9999998728012236, iteration: 133781
loss: 1.009492039680481,grad_norm: 0.9428623300115535, iteration: 133782
loss: 1.0191266536712646,grad_norm: 0.9779376906888129, iteration: 133783
loss: 0.9899382591247559,grad_norm: 0.9759939994146352, iteration: 133784
loss: 1.0740174055099487,grad_norm: 0.9999991639264515, iteration: 133785
loss: 0.9966585040092468,grad_norm: 0.9999990663631312, iteration: 133786
loss: 1.0230401754379272,grad_norm: 0.8952365921915263, iteration: 133787
loss: 1.078262448310852,grad_norm: 0.9999999381659262, iteration: 133788
loss: 1.0222796201705933,grad_norm: 0.999999776687824, iteration: 133789
loss: 1.0458897352218628,grad_norm: 0.9999995126116472, iteration: 133790
loss: 1.0385371446609497,grad_norm: 0.9999999064826411, iteration: 133791
loss: 1.0148365497589111,grad_norm: 0.9999999334106766, iteration: 133792
loss: 1.0114772319793701,grad_norm: 0.8997514570341492, iteration: 133793
loss: 1.0538568496704102,grad_norm: 0.9999992471541961, iteration: 133794
loss: 1.0273607969284058,grad_norm: 0.8194135761146454, iteration: 133795
loss: 1.0069230794906616,grad_norm: 0.8168067128731672, iteration: 133796
loss: 1.1039544343948364,grad_norm: 0.9999998928463857, iteration: 133797
loss: 1.122453212738037,grad_norm: 0.9999994838647982, iteration: 133798
loss: 0.9758725762367249,grad_norm: 0.8390587496066022, iteration: 133799
loss: 0.9670069217681885,grad_norm: 0.801113274977889, iteration: 133800
loss: 1.0359654426574707,grad_norm: 0.8770384164998719, iteration: 133801
loss: 1.021512746810913,grad_norm: 0.9999992189404573, iteration: 133802
loss: 1.0241378545761108,grad_norm: 0.9999990744435583, iteration: 133803
loss: 0.9435850381851196,grad_norm: 0.8868443592248882, iteration: 133804
loss: 0.9876354932785034,grad_norm: 0.8858551840852023, iteration: 133805
loss: 1.008831262588501,grad_norm: 0.9999992117139341, iteration: 133806
loss: 1.021564245223999,grad_norm: 0.9789600623575027, iteration: 133807
loss: 1.035935878753662,grad_norm: 0.999999265461312, iteration: 133808
loss: 1.0362099409103394,grad_norm: 0.9999996016813397, iteration: 133809
loss: 1.019944667816162,grad_norm: 0.9999998987703731, iteration: 133810
loss: 0.9737995862960815,grad_norm: 0.816559951475699, iteration: 133811
loss: 0.9884645938873291,grad_norm: 0.743468888723697, iteration: 133812
loss: 0.9662907123565674,grad_norm: 0.9651861954969624, iteration: 133813
loss: 1.0300047397613525,grad_norm: 0.9999994735434368, iteration: 133814
loss: 0.9852874279022217,grad_norm: 0.8961722372370625, iteration: 133815
loss: 1.0347899198532104,grad_norm: 0.9999991534358743, iteration: 133816
loss: 1.1016744375228882,grad_norm: 0.9999996887503629, iteration: 133817
loss: 1.0595779418945312,grad_norm: 0.99999976246864, iteration: 133818
loss: 0.9817927479743958,grad_norm: 0.8324810834383205, iteration: 133819
loss: 1.0516563653945923,grad_norm: 0.9999991946800055, iteration: 133820
loss: 1.0397839546203613,grad_norm: 0.9999989905506774, iteration: 133821
loss: 0.9810834527015686,grad_norm: 0.8310599694629287, iteration: 133822
loss: 1.1268161535263062,grad_norm: 0.9999998006693467, iteration: 133823
loss: 1.0583876371383667,grad_norm: 0.9999993143917261, iteration: 133824
loss: 0.9848872423171997,grad_norm: 0.9999993010772292, iteration: 133825
loss: 1.0274772644042969,grad_norm: 0.9629830685183535, iteration: 133826
loss: 1.0570074319839478,grad_norm: 0.8175190664464147, iteration: 133827
loss: 1.1520596742630005,grad_norm: 0.999999862566352, iteration: 133828
loss: 0.9519422650337219,grad_norm: 0.8762946244543469, iteration: 133829
loss: 1.03830087184906,grad_norm: 0.9999997200775851, iteration: 133830
loss: 0.9676620960235596,grad_norm: 0.9399907510655635, iteration: 133831
loss: 0.9848363995552063,grad_norm: 0.9392514858659844, iteration: 133832
loss: 1.1206004619598389,grad_norm: 0.9999993144942895, iteration: 133833
loss: 1.2867536544799805,grad_norm: 0.9999997291584016, iteration: 133834
loss: 1.0097873210906982,grad_norm: 0.8514586408885991, iteration: 133835
loss: 1.0532681941986084,grad_norm: 0.9999994141493888, iteration: 133836
loss: 1.034267544746399,grad_norm: 0.9999999065618607, iteration: 133837
loss: 1.043955683708191,grad_norm: 0.9999991294639916, iteration: 133838
loss: 1.0589097738265991,grad_norm: 0.9999994250491628, iteration: 133839
loss: 0.9694491624832153,grad_norm: 0.9383191928843906, iteration: 133840
loss: 1.0395090579986572,grad_norm: 0.999999607748354, iteration: 133841
loss: 1.044486403465271,grad_norm: 0.9999996884047659, iteration: 133842
loss: 1.12256920337677,grad_norm: 0.999999334144002, iteration: 133843
loss: 1.0053526163101196,grad_norm: 0.8531539881848593, iteration: 133844
loss: 0.9985091090202332,grad_norm: 0.7861322939519624, iteration: 133845
loss: 1.0063070058822632,grad_norm: 0.9999991479606825, iteration: 133846
loss: 1.1379718780517578,grad_norm: 0.9999995949566413, iteration: 133847
loss: 1.0186163187026978,grad_norm: 0.999999115426563, iteration: 133848
loss: 1.0612142086029053,grad_norm: 0.9999994038045946, iteration: 133849
loss: 1.063327670097351,grad_norm: 0.9999996553468159, iteration: 133850
loss: 0.9916543960571289,grad_norm: 0.8109642104529944, iteration: 133851
loss: 1.0354037284851074,grad_norm: 0.9999995398043207, iteration: 133852
loss: 1.1078460216522217,grad_norm: 0.9999999009389744, iteration: 133853
loss: 0.9768974781036377,grad_norm: 0.7389163333453385, iteration: 133854
loss: 1.0837829113006592,grad_norm: 0.999999405937346, iteration: 133855
loss: 0.9718770980834961,grad_norm: 0.8421937368840939, iteration: 133856
loss: 1.142172932624817,grad_norm: 0.9999994033204026, iteration: 133857
loss: 1.0111125707626343,grad_norm: 0.7648197655645944, iteration: 133858
loss: 1.1555150747299194,grad_norm: 0.9999992032317769, iteration: 133859
loss: 1.0082263946533203,grad_norm: 0.9999996619498654, iteration: 133860
loss: 0.9823947548866272,grad_norm: 0.9999991286903036, iteration: 133861
loss: 1.0148861408233643,grad_norm: 0.9999998927397399, iteration: 133862
loss: 1.0751593112945557,grad_norm: 0.9999993015724676, iteration: 133863
loss: 1.0106728076934814,grad_norm: 0.9142690950433254, iteration: 133864
loss: 1.1316837072372437,grad_norm: 0.9999998713702614, iteration: 133865
loss: 1.0163501501083374,grad_norm: 0.9999998530241356, iteration: 133866
loss: 0.9637907147407532,grad_norm: 0.9999994377925341, iteration: 133867
loss: 1.020120620727539,grad_norm: 0.9999990840254054, iteration: 133868
loss: 1.1960066556930542,grad_norm: 0.9999992373636744, iteration: 133869
loss: 1.1225727796554565,grad_norm: 0.9999991636855677, iteration: 133870
loss: 0.9917623996734619,grad_norm: 0.8668881284816791, iteration: 133871
loss: 1.132185935974121,grad_norm: 0.9999999113394501, iteration: 133872
loss: 1.0882540941238403,grad_norm: 0.9999992193317049, iteration: 133873
loss: 1.0710265636444092,grad_norm: 0.9999993007681274, iteration: 133874
loss: 0.9930374026298523,grad_norm: 0.9353230220896398, iteration: 133875
loss: 1.021212100982666,grad_norm: 0.9999994959924204, iteration: 133876
loss: 1.0442533493041992,grad_norm: 0.9999991992414611, iteration: 133877
loss: 1.1466548442840576,grad_norm: 0.9999996118644593, iteration: 133878
loss: 1.0638643503189087,grad_norm: 0.9999990224258791, iteration: 133879
loss: 1.0274500846862793,grad_norm: 0.9999991283381872, iteration: 133880
loss: 1.0661662817001343,grad_norm: 0.9999999492829115, iteration: 133881
loss: 1.0710657835006714,grad_norm: 0.9999993745918421, iteration: 133882
loss: 1.0002118349075317,grad_norm: 0.8509067812729856, iteration: 133883
loss: 1.097077488899231,grad_norm: 0.9999992393485625, iteration: 133884
loss: 1.1228185892105103,grad_norm: 0.9999997028827534, iteration: 133885
loss: 1.013579249382019,grad_norm: 1.0000000120002184, iteration: 133886
loss: 1.0361320972442627,grad_norm: 0.9999997177597806, iteration: 133887
loss: 1.0539984703063965,grad_norm: 0.9999998784227675, iteration: 133888
loss: 1.1254608631134033,grad_norm: 0.9999997320999617, iteration: 133889
loss: 1.2168532609939575,grad_norm: 0.999999909312064, iteration: 133890
loss: 1.1786630153656006,grad_norm: 0.9999997674513752, iteration: 133891
loss: 1.1649545431137085,grad_norm: 0.9999993281495136, iteration: 133892
loss: 1.0521231889724731,grad_norm: 0.9999990917815682, iteration: 133893
loss: 1.0043525695800781,grad_norm: 0.7959219253616171, iteration: 133894
loss: 1.0704952478408813,grad_norm: 0.9999990457685041, iteration: 133895
loss: 1.0704635381698608,grad_norm: 0.9999998524217122, iteration: 133896
loss: 1.0608049631118774,grad_norm: 0.9999991750909094, iteration: 133897
loss: 1.2288901805877686,grad_norm: 0.9999995982238721, iteration: 133898
loss: 1.0957554578781128,grad_norm: 0.9999999047489079, iteration: 133899
loss: 1.1237156391143799,grad_norm: 0.9999996629594181, iteration: 133900
loss: 1.1338579654693604,grad_norm: 0.99999990770168, iteration: 133901
loss: 1.0163527727127075,grad_norm: 0.9381970546013534, iteration: 133902
loss: 1.0083988904953003,grad_norm: 0.8491347909536723, iteration: 133903
loss: 0.9691089987754822,grad_norm: 0.9773967287871497, iteration: 133904
loss: 0.9881299734115601,grad_norm: 0.953948239131996, iteration: 133905
loss: 1.0822075605392456,grad_norm: 0.9999991546908439, iteration: 133906
loss: 1.0216360092163086,grad_norm: 0.8718386000036351, iteration: 133907
loss: 1.045198917388916,grad_norm: 0.9002281893203427, iteration: 133908
loss: 1.0512927770614624,grad_norm: 0.8136046192992391, iteration: 133909
loss: 1.0647324323654175,grad_norm: 0.9999996457652259, iteration: 133910
loss: 1.0224212408065796,grad_norm: 0.8479154944672116, iteration: 133911
loss: 1.1156260967254639,grad_norm: 0.9999995826542764, iteration: 133912
loss: 1.2554292678833008,grad_norm: 0.9999999404829841, iteration: 133913
loss: 1.0445821285247803,grad_norm: 0.9999991178029465, iteration: 133914
loss: 1.0464317798614502,grad_norm: 0.9999997684861656, iteration: 133915
loss: 1.046830177307129,grad_norm: 0.9999995035234188, iteration: 133916
loss: 1.1351679563522339,grad_norm: 1.0000000636687723, iteration: 133917
loss: 1.017952799797058,grad_norm: 0.9999992195891146, iteration: 133918
loss: 1.1628669500350952,grad_norm: 0.9999997850300705, iteration: 133919
loss: 1.057357907295227,grad_norm: 0.9999995091515977, iteration: 133920
loss: 1.0347399711608887,grad_norm: 0.9999999061459017, iteration: 133921
loss: 1.0304193496704102,grad_norm: 0.9999996281157999, iteration: 133922
loss: 1.0492067337036133,grad_norm: 0.9999992517435697, iteration: 133923
loss: 1.0865776538848877,grad_norm: 0.999999109502225, iteration: 133924
loss: 1.032495141029358,grad_norm: 0.8068717686678273, iteration: 133925
loss: 1.1480965614318848,grad_norm: 0.9999998370319865, iteration: 133926
loss: 0.9920375347137451,grad_norm: 0.8522963657564082, iteration: 133927
loss: 1.0064271688461304,grad_norm: 0.999999031483249, iteration: 133928
loss: 1.0435051918029785,grad_norm: 0.9999998332614316, iteration: 133929
loss: 1.044804573059082,grad_norm: 0.9999995432494153, iteration: 133930
loss: 1.430443286895752,grad_norm: 1.000000017981057, iteration: 133931
loss: 1.028995394706726,grad_norm: 0.8331824851410007, iteration: 133932
loss: 1.0259177684783936,grad_norm: 0.7884533868232534, iteration: 133933
loss: 0.9593406319618225,grad_norm: 0.9564747019375228, iteration: 133934
loss: 1.050223708152771,grad_norm: 0.7653629301818519, iteration: 133935
loss: 1.0230045318603516,grad_norm: 0.8115727530396584, iteration: 133936
loss: 1.0685003995895386,grad_norm: 0.9712862263053643, iteration: 133937
loss: 1.007157802581787,grad_norm: 0.9999992864148577, iteration: 133938
loss: 1.1272186040878296,grad_norm: 0.9999999013342362, iteration: 133939
loss: 1.1194990873336792,grad_norm: 0.9999999024198549, iteration: 133940
loss: 1.0470811128616333,grad_norm: 0.9999992478322564, iteration: 133941
loss: 1.0960323810577393,grad_norm: 0.9999998577323796, iteration: 133942
loss: 1.0447818040847778,grad_norm: 0.9999997394895643, iteration: 133943
loss: 1.0328636169433594,grad_norm: 0.9999991467492588, iteration: 133944
loss: 1.0823402404785156,grad_norm: 0.9999991780100324, iteration: 133945
loss: 1.0035091638565063,grad_norm: 0.9932149082662254, iteration: 133946
loss: 1.1817034482955933,grad_norm: 0.9999993197454243, iteration: 133947
loss: 1.064804196357727,grad_norm: 0.9999998493175551, iteration: 133948
loss: 1.0088601112365723,grad_norm: 0.9358263708293897, iteration: 133949
loss: 1.219117283821106,grad_norm: 0.9999998179022652, iteration: 133950
loss: 1.0492470264434814,grad_norm: 0.9999997389493525, iteration: 133951
loss: 1.0271046161651611,grad_norm: 0.9999992388407242, iteration: 133952
loss: 1.0913301706314087,grad_norm: 0.9999993605591464, iteration: 133953
loss: 1.073866367340088,grad_norm: 0.9999996843788302, iteration: 133954
loss: 1.024258017539978,grad_norm: 0.8813203461946061, iteration: 133955
loss: 1.000646948814392,grad_norm: 0.9999993572641382, iteration: 133956
loss: 1.030051350593567,grad_norm: 0.9999991649480946, iteration: 133957
loss: 1.209926962852478,grad_norm: 0.9999997231308962, iteration: 133958
loss: 1.1051623821258545,grad_norm: 0.9999998118287104, iteration: 133959
loss: 1.1386833190917969,grad_norm: 0.9999997880445133, iteration: 133960
loss: 1.1408458948135376,grad_norm: 0.9999999621660277, iteration: 133961
loss: 1.0456593036651611,grad_norm: 0.9999990059548826, iteration: 133962
loss: 1.006683111190796,grad_norm: 0.8757231394447514, iteration: 133963
loss: 0.9759237766265869,grad_norm: 0.974561673571228, iteration: 133964
loss: 1.132498860359192,grad_norm: 0.9999998125166926, iteration: 133965
loss: 1.0134055614471436,grad_norm: 0.9999991015592269, iteration: 133966
loss: 1.0462228059768677,grad_norm: 0.9999994628790583, iteration: 133967
loss: 0.9962303042411804,grad_norm: 0.797411105420447, iteration: 133968
loss: 0.9976250529289246,grad_norm: 0.9999995022636912, iteration: 133969
loss: 0.9847192168235779,grad_norm: 0.965291545270426, iteration: 133970
loss: 1.0474737882614136,grad_norm: 0.9999994082044988, iteration: 133971
loss: 1.0233707427978516,grad_norm: 0.9999998191512067, iteration: 133972
loss: 1.0180777311325073,grad_norm: 0.9999997574336635, iteration: 133973
loss: 0.9880669116973877,grad_norm: 0.7782665446729605, iteration: 133974
loss: 0.9835925102233887,grad_norm: 0.8241634674481805, iteration: 133975
loss: 1.0574510097503662,grad_norm: 0.9999993328904774, iteration: 133976
loss: 1.1281514167785645,grad_norm: 1.000000035007551, iteration: 133977
loss: 0.9937968850135803,grad_norm: 0.9967626343580297, iteration: 133978
loss: 1.0341887474060059,grad_norm: 0.999999201015139, iteration: 133979
loss: 1.082640290260315,grad_norm: 0.9999989816973878, iteration: 133980
loss: 1.0727263689041138,grad_norm: 0.9999991316711174, iteration: 133981
loss: 1.01575767993927,grad_norm: 0.9999997743741982, iteration: 133982
loss: 1.0392733812332153,grad_norm: 0.9418143205226719, iteration: 133983
loss: 1.026809811592102,grad_norm: 0.8837942378250407, iteration: 133984
loss: 1.0180227756500244,grad_norm: 0.7994421471760494, iteration: 133985
loss: 1.0270451307296753,grad_norm: 0.9999992718828047, iteration: 133986
loss: 0.9998270273208618,grad_norm: 0.9999990960152345, iteration: 133987
loss: 1.055034875869751,grad_norm: 0.9999991688099156, iteration: 133988
loss: 1.0148173570632935,grad_norm: 0.8969493461751535, iteration: 133989
loss: 1.0282282829284668,grad_norm: 0.9027941965736956, iteration: 133990
loss: 1.022492527961731,grad_norm: 0.7876344628304128, iteration: 133991
loss: 1.107218861579895,grad_norm: 0.9999991286979526, iteration: 133992
loss: 1.049189805984497,grad_norm: 0.9999994733163213, iteration: 133993
loss: 0.989938497543335,grad_norm: 0.9862753930571906, iteration: 133994
loss: 1.067549705505371,grad_norm: 0.9227862504379333, iteration: 133995
loss: 0.9706301093101501,grad_norm: 0.8814297276440273, iteration: 133996
loss: 1.062661051750183,grad_norm: 0.9999996036929565, iteration: 133997
loss: 1.0136399269104004,grad_norm: 0.8039449104318798, iteration: 133998
loss: 1.0534358024597168,grad_norm: 0.999999299143519, iteration: 133999
loss: 1.064935564994812,grad_norm: 0.999999203791842, iteration: 134000
loss: 1.0239183902740479,grad_norm: 0.9999995878531476, iteration: 134001
loss: 0.9964797496795654,grad_norm: 0.8638475914641806, iteration: 134002
loss: 1.027519941329956,grad_norm: 0.9999994117557847, iteration: 134003
loss: 0.9972480535507202,grad_norm: 0.8506744483033656, iteration: 134004
loss: 1.1405718326568604,grad_norm: 0.9999991819921772, iteration: 134005
loss: 0.9882655143737793,grad_norm: 0.9480440373975554, iteration: 134006
loss: 0.9742643237113953,grad_norm: 0.8767572470338733, iteration: 134007
loss: 1.0666824579238892,grad_norm: 0.9999995629477719, iteration: 134008
loss: 1.0156793594360352,grad_norm: 0.8939954585127675, iteration: 134009
loss: 1.0030598640441895,grad_norm: 0.9999991023420657, iteration: 134010
loss: 1.0423526763916016,grad_norm: 0.9999991252746601, iteration: 134011
loss: 1.0261369943618774,grad_norm: 0.99999952887643, iteration: 134012
loss: 1.0153720378875732,grad_norm: 0.9999996440821466, iteration: 134013
loss: 1.0496727228164673,grad_norm: 0.9999991934705325, iteration: 134014
loss: 1.0119835138320923,grad_norm: 0.9325049152836186, iteration: 134015
loss: 1.0245105028152466,grad_norm: 0.9924201015803036, iteration: 134016
loss: 1.1058216094970703,grad_norm: 0.8917491826706443, iteration: 134017
loss: 1.0610008239746094,grad_norm: 0.9999991887367817, iteration: 134018
loss: 0.9736738801002502,grad_norm: 0.9999989869998048, iteration: 134019
loss: 1.0970251560211182,grad_norm: 0.9999998440289254, iteration: 134020
loss: 0.9978126883506775,grad_norm: 0.9999992424268793, iteration: 134021
loss: 0.9967155456542969,grad_norm: 0.7986702768645567, iteration: 134022
loss: 1.0200008153915405,grad_norm: 0.999999247904528, iteration: 134023
loss: 0.9964649677276611,grad_norm: 0.9683663833709186, iteration: 134024
loss: 1.071088433265686,grad_norm: 0.9258959393785571, iteration: 134025
loss: 1.0195980072021484,grad_norm: 0.9999998759766292, iteration: 134026
loss: 0.9978663921356201,grad_norm: 0.999999066960932, iteration: 134027
loss: 0.9908269643783569,grad_norm: 0.7155003976474226, iteration: 134028
loss: 0.9936055541038513,grad_norm: 0.9607342878179734, iteration: 134029
loss: 1.0417696237564087,grad_norm: 0.9999999281380019, iteration: 134030
loss: 1.0581774711608887,grad_norm: 0.9999990164600402, iteration: 134031
loss: 1.0115814208984375,grad_norm: 0.8846472888668063, iteration: 134032
loss: 1.0104410648345947,grad_norm: 0.8235149471858172, iteration: 134033
loss: 1.1147828102111816,grad_norm: 0.9900191415975494, iteration: 134034
loss: 1.0239704847335815,grad_norm: 0.9268269780968732, iteration: 134035
loss: 1.0238754749298096,grad_norm: 0.8195871457569961, iteration: 134036
loss: 0.9909397959709167,grad_norm: 0.9999991136904111, iteration: 134037
loss: 0.9942640662193298,grad_norm: 0.8367268510258035, iteration: 134038
loss: 1.0044537782669067,grad_norm: 0.9999989218871103, iteration: 134039
loss: 1.0145169496536255,grad_norm: 0.9223078449003472, iteration: 134040
loss: 1.051813006401062,grad_norm: 0.9999997454152088, iteration: 134041
loss: 1.0153477191925049,grad_norm: 0.7027943449740225, iteration: 134042
loss: 1.0059062242507935,grad_norm: 0.892256706730892, iteration: 134043
loss: 1.056140661239624,grad_norm: 0.9999992502372967, iteration: 134044
loss: 1.0423305034637451,grad_norm: 0.8674255028965896, iteration: 134045
loss: 1.0017945766448975,grad_norm: 0.9041816029615182, iteration: 134046
loss: 1.0144962072372437,grad_norm: 0.9625893083720929, iteration: 134047
loss: 1.0378153324127197,grad_norm: 0.9640373113669743, iteration: 134048
loss: 1.072539210319519,grad_norm: 0.8805149820770156, iteration: 134049
loss: 1.0298397541046143,grad_norm: 0.8150633655261318, iteration: 134050
loss: 1.0404243469238281,grad_norm: 0.9008306959101474, iteration: 134051
loss: 1.03485107421875,grad_norm: 0.6859197901874383, iteration: 134052
loss: 1.0902073383331299,grad_norm: 0.9999991565399771, iteration: 134053
loss: 1.0440526008605957,grad_norm: 0.9999997612360836, iteration: 134054
loss: 0.9846686124801636,grad_norm: 0.7957190465462667, iteration: 134055
loss: 1.0097802877426147,grad_norm: 0.9261138052485258, iteration: 134056
loss: 1.0214428901672363,grad_norm: 0.9999990573813626, iteration: 134057
loss: 0.9744656682014465,grad_norm: 0.9937758325183724, iteration: 134058
loss: 1.0086655616760254,grad_norm: 0.9999989556688773, iteration: 134059
loss: 1.0002191066741943,grad_norm: 0.8114265942050385, iteration: 134060
loss: 0.983326256275177,grad_norm: 0.8894228440900337, iteration: 134061
loss: 1.0209394693374634,grad_norm: 0.9999992189348574, iteration: 134062
loss: 0.979347288608551,grad_norm: 0.8379871900487175, iteration: 134063
loss: 0.9613863825798035,grad_norm: 0.8929756007298955, iteration: 134064
loss: 0.9603431224822998,grad_norm: 0.9999994857713158, iteration: 134065
loss: 1.0279366970062256,grad_norm: 0.9609921702210286, iteration: 134066
loss: 1.0541832447052002,grad_norm: 0.9999990436468424, iteration: 134067
loss: 1.0000548362731934,grad_norm: 0.8963661449793504, iteration: 134068
loss: 0.9781225323677063,grad_norm: 0.8565933085881867, iteration: 134069
loss: 1.0297536849975586,grad_norm: 0.975491174541051, iteration: 134070
loss: 1.0458080768585205,grad_norm: 0.9530906255027578, iteration: 134071
loss: 1.0032633543014526,grad_norm: 0.9010378301424481, iteration: 134072
loss: 1.0093739032745361,grad_norm: 0.9999995343847001, iteration: 134073
loss: 1.003007173538208,grad_norm: 0.8505334987463257, iteration: 134074
loss: 1.0212934017181396,grad_norm: 0.7974972221158964, iteration: 134075
loss: 1.0437891483306885,grad_norm: 0.9999997222767613, iteration: 134076
loss: 0.9905792474746704,grad_norm: 0.7529314276657001, iteration: 134077
loss: 1.0180926322937012,grad_norm: 0.9999992141187529, iteration: 134078
loss: 1.0101714134216309,grad_norm: 0.9999989824393356, iteration: 134079
loss: 1.0147749185562134,grad_norm: 0.9999993464925744, iteration: 134080
loss: 1.0161585807800293,grad_norm: 0.9056464738677673, iteration: 134081
loss: 1.0061007738113403,grad_norm: 0.9390185124747416, iteration: 134082
loss: 1.0550788640975952,grad_norm: 0.9999990160582889, iteration: 134083
loss: 1.0399514436721802,grad_norm: 0.8887343432147342, iteration: 134084
loss: 0.9979552030563354,grad_norm: 0.9999991448653244, iteration: 134085
loss: 0.9635098576545715,grad_norm: 0.9498782788693672, iteration: 134086
loss: 0.9488447308540344,grad_norm: 0.841162571750134, iteration: 134087
loss: 1.0200525522232056,grad_norm: 0.888765909534728, iteration: 134088
loss: 1.0825408697128296,grad_norm: 0.9999999855117735, iteration: 134089
loss: 1.0166224241256714,grad_norm: 0.9999993053873976, iteration: 134090
loss: 0.9809650182723999,grad_norm: 0.8229605198406919, iteration: 134091
loss: 0.9922797679901123,grad_norm: 0.9999991840184689, iteration: 134092
loss: 0.985829770565033,grad_norm: 0.9999991460897237, iteration: 134093
loss: 1.0144344568252563,grad_norm: 0.9999999341834414, iteration: 134094
loss: 1.0076411962509155,grad_norm: 0.9999998456477656, iteration: 134095
loss: 0.9961796402931213,grad_norm: 0.8757738190219373, iteration: 134096
loss: 1.042169213294983,grad_norm: 0.9999991294979992, iteration: 134097
loss: 1.027124047279358,grad_norm: 0.942717012463529, iteration: 134098
loss: 0.985241174697876,grad_norm: 0.8142499090117187, iteration: 134099
loss: 0.9627448320388794,grad_norm: 0.9897180771561489, iteration: 134100
loss: 1.0116374492645264,grad_norm: 0.9212551572455863, iteration: 134101
loss: 1.0021418333053589,grad_norm: 0.8272911830739261, iteration: 134102
loss: 1.0545895099639893,grad_norm: 0.9999992806500472, iteration: 134103
loss: 1.0102249383926392,grad_norm: 0.8296418176124978, iteration: 134104
loss: 1.0087497234344482,grad_norm: 0.9999995660744616, iteration: 134105
loss: 0.9867487549781799,grad_norm: 0.7524312003867561, iteration: 134106
loss: 1.0205848217010498,grad_norm: 0.6803019767895897, iteration: 134107
loss: 1.00905442237854,grad_norm: 0.8193251901701644, iteration: 134108
loss: 1.0495845079421997,grad_norm: 0.9999992134342763, iteration: 134109
loss: 0.9890301823616028,grad_norm: 0.9999991584896295, iteration: 134110
loss: 0.9977936148643494,grad_norm: 0.9999990794680992, iteration: 134111
loss: 1.1137570142745972,grad_norm: 0.9999997293392692, iteration: 134112
loss: 0.9759300947189331,grad_norm: 0.8863737941116652, iteration: 134113
loss: 0.9886765480041504,grad_norm: 0.9999991843059449, iteration: 134114
loss: 1.05678391456604,grad_norm: 0.9999994878888486, iteration: 134115
loss: 1.0060594081878662,grad_norm: 0.9999997352915108, iteration: 134116
loss: 1.0145275592803955,grad_norm: 0.9743434371421239, iteration: 134117
loss: 0.9972449541091919,grad_norm: 0.9999994427164448, iteration: 134118
loss: 1.0191869735717773,grad_norm: 0.8889894042978337, iteration: 134119
loss: 1.0149266719818115,grad_norm: 0.9999991132074791, iteration: 134120
loss: 1.0458909273147583,grad_norm: 0.9999993649780285, iteration: 134121
loss: 1.0104740858078003,grad_norm: 0.8645698934547851, iteration: 134122
loss: 1.042417287826538,grad_norm: 0.9999993389412718, iteration: 134123
loss: 0.9942243695259094,grad_norm: 0.999998981897789, iteration: 134124
loss: 0.9824048280715942,grad_norm: 0.9999991137912518, iteration: 134125
loss: 1.0632268190383911,grad_norm: 0.9999994675404819, iteration: 134126
loss: 1.0756663084030151,grad_norm: 0.999999758502849, iteration: 134127
loss: 0.9933406710624695,grad_norm: 0.9518917220542477, iteration: 134128
loss: 1.0135674476623535,grad_norm: 0.9999991425537891, iteration: 134129
loss: 0.9680614471435547,grad_norm: 0.7926341503607948, iteration: 134130
loss: 1.0026274919509888,grad_norm: 0.7528029654871339, iteration: 134131
loss: 1.1619220972061157,grad_norm: 0.9999998927892295, iteration: 134132
loss: 1.0090590715408325,grad_norm: 0.9999990619355204, iteration: 134133
loss: 1.0703428983688354,grad_norm: 0.9882016191555455, iteration: 134134
loss: 0.9651284217834473,grad_norm: 0.8724321452294231, iteration: 134135
loss: 0.9826392531394958,grad_norm: 0.9867930233822373, iteration: 134136
loss: 1.0098323822021484,grad_norm: 0.9999991413419074, iteration: 134137
loss: 1.0734535455703735,grad_norm: 0.9999999952420096, iteration: 134138
loss: 1.043463110923767,grad_norm: 0.8719132528694542, iteration: 134139
loss: 1.0620156526565552,grad_norm: 0.9999993906274671, iteration: 134140
loss: 0.9832746386528015,grad_norm: 0.9298451530845651, iteration: 134141
loss: 1.0205570459365845,grad_norm: 0.9443422340443647, iteration: 134142
loss: 1.0186549425125122,grad_norm: 0.9999991192188014, iteration: 134143
loss: 1.0164560079574585,grad_norm: 0.999999693664818, iteration: 134144
loss: 0.9994170665740967,grad_norm: 0.9487981508384674, iteration: 134145
loss: 1.0183236598968506,grad_norm: 0.8115959163265093, iteration: 134146
loss: 1.0394017696380615,grad_norm: 0.811864621345487, iteration: 134147
loss: 0.9610804915428162,grad_norm: 0.959604437962545, iteration: 134148
loss: 1.0214377641677856,grad_norm: 0.9446632496112365, iteration: 134149
loss: 0.9965699911117554,grad_norm: 0.9610304450265841, iteration: 134150
loss: 1.0402511358261108,grad_norm: 0.9999995056959357, iteration: 134151
loss: 1.0381183624267578,grad_norm: 0.9431669038158914, iteration: 134152
loss: 1.0551751852035522,grad_norm: 0.9999995748862711, iteration: 134153
loss: 1.0046576261520386,grad_norm: 0.7624882914451887, iteration: 134154
loss: 1.0190987586975098,grad_norm: 0.8571267094324321, iteration: 134155
loss: 1.002756953239441,grad_norm: 0.9999996694565644, iteration: 134156
loss: 1.0149364471435547,grad_norm: 0.9749189308852214, iteration: 134157
loss: 1.0163456201553345,grad_norm: 0.9999992566056771, iteration: 134158
loss: 0.9909604787826538,grad_norm: 0.999999410767763, iteration: 134159
loss: 1.0102853775024414,grad_norm: 0.7550842339057267, iteration: 134160
loss: 0.97832852602005,grad_norm: 0.8531626762272037, iteration: 134161
loss: 0.9655519723892212,grad_norm: 0.9508781491118926, iteration: 134162
loss: 1.012706995010376,grad_norm: 0.9162175949917455, iteration: 134163
loss: 1.104508638381958,grad_norm: 0.9999992444094163, iteration: 134164
loss: 1.0288063287734985,grad_norm: 0.9203075135241546, iteration: 134165
loss: 1.0372287034988403,grad_norm: 0.9999998633705754, iteration: 134166
loss: 1.0130590200424194,grad_norm: 0.9999991008968016, iteration: 134167
loss: 0.9962568879127502,grad_norm: 0.8918515148685735, iteration: 134168
loss: 1.0245909690856934,grad_norm: 0.9999993184502923, iteration: 134169
loss: 1.0032870769500732,grad_norm: 0.8994412215492205, iteration: 134170
loss: 1.0477325916290283,grad_norm: 0.9999994040274751, iteration: 134171
loss: 1.0033581256866455,grad_norm: 0.9999990128670594, iteration: 134172
loss: 0.9978924989700317,grad_norm: 0.7903464257206869, iteration: 134173
loss: 1.0034918785095215,grad_norm: 0.760056458946582, iteration: 134174
loss: 0.9809713959693909,grad_norm: 0.9999990631119645, iteration: 134175
loss: 0.972939670085907,grad_norm: 0.7875304010470738, iteration: 134176
loss: 1.0065908432006836,grad_norm: 0.9999991774277569, iteration: 134177
loss: 1.131239652633667,grad_norm: 0.9999998816672437, iteration: 134178
loss: 1.0146543979644775,grad_norm: 0.9999992568209701, iteration: 134179
loss: 0.9974638819694519,grad_norm: 0.9182038906577225, iteration: 134180
loss: 0.9867874979972839,grad_norm: 0.766709303233546, iteration: 134181
loss: 1.0027447938919067,grad_norm: 0.9405479599717883, iteration: 134182
loss: 1.0789474248886108,grad_norm: 0.9999992981763111, iteration: 134183
loss: 1.0083242654800415,grad_norm: 0.8485949198814664, iteration: 134184
loss: 1.0347836017608643,grad_norm: 0.9999990658499893, iteration: 134185
loss: 1.1192020177841187,grad_norm: 0.9999996427498774, iteration: 134186
loss: 0.960050106048584,grad_norm: 0.9999990236276639, iteration: 134187
loss: 1.0097795724868774,grad_norm: 0.9999992604284347, iteration: 134188
loss: 1.0451732873916626,grad_norm: 0.9999991052219611, iteration: 134189
loss: 1.011799931526184,grad_norm: 0.9130837897424797, iteration: 134190
loss: 0.9788601398468018,grad_norm: 0.9999990959283355, iteration: 134191
loss: 1.0623409748077393,grad_norm: 0.9453485484512352, iteration: 134192
loss: 0.9850764870643616,grad_norm: 0.906524442076524, iteration: 134193
loss: 0.9861382842063904,grad_norm: 0.999999914240927, iteration: 134194
loss: 1.0356156826019287,grad_norm: 0.999999866689306, iteration: 134195
loss: 1.0021822452545166,grad_norm: 0.8293492434071843, iteration: 134196
loss: 0.9989927411079407,grad_norm: 0.8699135176524999, iteration: 134197
loss: 1.2051420211791992,grad_norm: 0.9516889554731172, iteration: 134198
loss: 0.9926818609237671,grad_norm: 0.9236627468998551, iteration: 134199
loss: 1.051179051399231,grad_norm: 0.9999993070824794, iteration: 134200
loss: 0.9871805906295776,grad_norm: 0.9123239056299219, iteration: 134201
loss: 1.020548939704895,grad_norm: 0.8999449029504113, iteration: 134202
loss: 0.9746336936950684,grad_norm: 0.8941252005638913, iteration: 134203
loss: 1.0348402261734009,grad_norm: 0.8642499344348872, iteration: 134204
loss: 0.9720309376716614,grad_norm: 0.9999991091380024, iteration: 134205
loss: 0.9805578589439392,grad_norm: 0.9456705355889359, iteration: 134206
loss: 1.0033752918243408,grad_norm: 0.9999991157749855, iteration: 134207
loss: 1.0045236349105835,grad_norm: 0.8169859541013428, iteration: 134208
loss: 1.0481946468353271,grad_norm: 0.9999996855506202, iteration: 134209
loss: 1.02909255027771,grad_norm: 0.9999996213695856, iteration: 134210
loss: 1.0020604133605957,grad_norm: 0.7394225063316139, iteration: 134211
loss: 1.028399109840393,grad_norm: 0.8112833969260205, iteration: 134212
loss: 1.0026237964630127,grad_norm: 0.7955874801600438, iteration: 134213
loss: 1.0313302278518677,grad_norm: 0.9999990989835341, iteration: 134214
loss: 1.0163006782531738,grad_norm: 0.8525121331073774, iteration: 134215
loss: 0.9871262907981873,grad_norm: 0.7725715881041525, iteration: 134216
loss: 1.0863465070724487,grad_norm: 0.9999992386075436, iteration: 134217
loss: 0.9782600998878479,grad_norm: 0.858942144581832, iteration: 134218
loss: 1.0128239393234253,grad_norm: 0.7825604785777894, iteration: 134219
loss: 1.0275156497955322,grad_norm: 0.9999996324986224, iteration: 134220
loss: 0.9743603467941284,grad_norm: 0.8137010786613116, iteration: 134221
loss: 0.9982842803001404,grad_norm: 0.931198929967722, iteration: 134222
loss: 0.9723796248435974,grad_norm: 0.8135636025597013, iteration: 134223
loss: 1.0804235935211182,grad_norm: 0.9999991059091391, iteration: 134224
loss: 0.9982197284698486,grad_norm: 0.8345204660590285, iteration: 134225
loss: 1.023766279220581,grad_norm: 0.9536794744553216, iteration: 134226
loss: 1.0146504640579224,grad_norm: 0.8619669628268444, iteration: 134227
loss: 1.0483965873718262,grad_norm: 0.9999995835034668, iteration: 134228
loss: 1.0204252004623413,grad_norm: 0.9999995787345558, iteration: 134229
loss: 1.0290383100509644,grad_norm: 0.913072338789452, iteration: 134230
loss: 1.0328752994537354,grad_norm: 0.999999602535348, iteration: 134231
loss: 1.0030065774917603,grad_norm: 0.9472601668822259, iteration: 134232
loss: 1.0056272745132446,grad_norm: 0.876212374012181, iteration: 134233
loss: 1.0406911373138428,grad_norm: 0.9999995954620721, iteration: 134234
loss: 1.0587573051452637,grad_norm: 0.9999992600178798, iteration: 134235
loss: 1.0933585166931152,grad_norm: 0.9999998116555602, iteration: 134236
loss: 0.9969563484191895,grad_norm: 0.7282886491623923, iteration: 134237
loss: 1.0311702489852905,grad_norm: 0.9999996925128842, iteration: 134238
loss: 1.0062884092330933,grad_norm: 0.9999991982507859, iteration: 134239
loss: 1.0120936632156372,grad_norm: 0.9999989448995555, iteration: 134240
loss: 1.0426281690597534,grad_norm: 0.9999994014560577, iteration: 134241
loss: 0.9709720015525818,grad_norm: 0.9999992095791452, iteration: 134242
loss: 1.0203338861465454,grad_norm: 0.9999993577129244, iteration: 134243
loss: 1.1417994499206543,grad_norm: 0.9999995901807324, iteration: 134244
loss: 0.9955470561981201,grad_norm: 0.9999997833516615, iteration: 134245
loss: 0.9989111423492432,grad_norm: 0.9926564741921691, iteration: 134246
loss: 0.9850824475288391,grad_norm: 0.8656163573239544, iteration: 134247
loss: 1.0893641710281372,grad_norm: 0.946697332474292, iteration: 134248
loss: 0.9997897148132324,grad_norm: 0.9999993897182959, iteration: 134249
loss: 1.02544367313385,grad_norm: 0.9489792628756646, iteration: 134250
loss: 1.0046191215515137,grad_norm: 0.9999991077452431, iteration: 134251
loss: 0.9873713850975037,grad_norm: 0.7856536830532741, iteration: 134252
loss: 1.006654977798462,grad_norm: 0.9258351430062369, iteration: 134253
loss: 1.0056135654449463,grad_norm: 0.877943998365794, iteration: 134254
loss: 1.0097867250442505,grad_norm: 0.8153728433349082, iteration: 134255
loss: 1.1224486827850342,grad_norm: 0.9999994225893208, iteration: 134256
loss: 1.000590205192566,grad_norm: 0.9999990929708783, iteration: 134257
loss: 1.1304962635040283,grad_norm: 0.9999995959928777, iteration: 134258
loss: 0.9992179870605469,grad_norm: 0.9999992325878332, iteration: 134259
loss: 1.0015980005264282,grad_norm: 0.7999011928911038, iteration: 134260
loss: 1.0016562938690186,grad_norm: 0.9999997591389138, iteration: 134261
loss: 0.9782050251960754,grad_norm: 0.8957930251347211, iteration: 134262
loss: 1.057690143585205,grad_norm: 0.931487325322277, iteration: 134263
loss: 0.9954018592834473,grad_norm: 0.9409860862799198, iteration: 134264
loss: 0.9838336706161499,grad_norm: 0.9999990464046046, iteration: 134265
loss: 0.9829390048980713,grad_norm: 0.9842845006268837, iteration: 134266
loss: 1.0219227075576782,grad_norm: 0.9999997032106892, iteration: 134267
loss: 1.0193663835525513,grad_norm: 0.999999796612438, iteration: 134268
loss: 1.0215981006622314,grad_norm: 0.9248758566415107, iteration: 134269
loss: 0.99750816822052,grad_norm: 0.7897912593066964, iteration: 134270
loss: 0.990013062953949,grad_norm: 0.816571548363172, iteration: 134271
loss: 1.0695972442626953,grad_norm: 0.9999996726635508, iteration: 134272
loss: 1.0146504640579224,grad_norm: 0.9999990709356762, iteration: 134273
loss: 1.0332499742507935,grad_norm: 0.9663832581734376, iteration: 134274
loss: 0.9757688045501709,grad_norm: 0.8406374896355401, iteration: 134275
loss: 0.982020378112793,grad_norm: 0.873442264460967, iteration: 134276
loss: 1.0174192190170288,grad_norm: 0.8412526697714864, iteration: 134277
loss: 0.9613431692123413,grad_norm: 0.8693231150786225, iteration: 134278
loss: 1.0021600723266602,grad_norm: 0.810893124613427, iteration: 134279
loss: 0.9959415793418884,grad_norm: 0.8111013559257365, iteration: 134280
loss: 1.007418155670166,grad_norm: 0.9144624638506951, iteration: 134281
loss: 1.0092025995254517,grad_norm: 0.9999990506485414, iteration: 134282
loss: 0.9727444648742676,grad_norm: 0.9733900517335249, iteration: 134283
loss: 1.0130817890167236,grad_norm: 0.9837932219275045, iteration: 134284
loss: 1.010236144065857,grad_norm: 0.9053302221460947, iteration: 134285
loss: 0.9896630048751831,grad_norm: 0.8522612342406607, iteration: 134286
loss: 1.0334913730621338,grad_norm: 0.9999998813452495, iteration: 134287
loss: 1.0118821859359741,grad_norm: 0.9145274147005623, iteration: 134288
loss: 1.1647558212280273,grad_norm: 0.9999992298807102, iteration: 134289
loss: 1.047914981842041,grad_norm: 0.8885285127883806, iteration: 134290
loss: 1.0992695093154907,grad_norm: 0.9999991869175104, iteration: 134291
loss: 0.994011640548706,grad_norm: 0.9999991771719987, iteration: 134292
loss: 1.0160958766937256,grad_norm: 0.9550356894690435, iteration: 134293
loss: 1.0693477392196655,grad_norm: 0.80171625126019, iteration: 134294
loss: 1.0810753107070923,grad_norm: 0.9999990515557644, iteration: 134295
loss: 1.002668857574463,grad_norm: 0.9999999386623201, iteration: 134296
loss: 0.968752384185791,grad_norm: 0.9999997210421299, iteration: 134297
loss: 1.0148905515670776,grad_norm: 0.9999995443717666, iteration: 134298
loss: 1.0240297317504883,grad_norm: 0.9999991844900921, iteration: 134299
loss: 0.9963105916976929,grad_norm: 0.991736609802834, iteration: 134300
loss: 1.0201349258422852,grad_norm: 0.7688474515896877, iteration: 134301
loss: 1.0349653959274292,grad_norm: 0.9880234370127375, iteration: 134302
loss: 1.134372353553772,grad_norm: 0.9999999220373456, iteration: 134303
loss: 1.0303552150726318,grad_norm: 0.9999993568492415, iteration: 134304
loss: 1.0157604217529297,grad_norm: 0.7464919578047463, iteration: 134305
loss: 1.0420249700546265,grad_norm: 0.9999993339954675, iteration: 134306
loss: 0.9710562825202942,grad_norm: 0.8726004392045107, iteration: 134307
loss: 0.9984700679779053,grad_norm: 0.9999991365997379, iteration: 134308
loss: 1.0756129026412964,grad_norm: 0.999999165660824, iteration: 134309
loss: 1.069566011428833,grad_norm: 0.9999997271051796, iteration: 134310
loss: 1.0749619007110596,grad_norm: 0.9783634865048096, iteration: 134311
loss: 0.9823249578475952,grad_norm: 0.9999997896436319, iteration: 134312
loss: 1.0034726858139038,grad_norm: 0.9414672638686375, iteration: 134313
loss: 0.9974636435508728,grad_norm: 0.916709745951996, iteration: 134314
loss: 1.0103402137756348,grad_norm: 0.9999990856595646, iteration: 134315
loss: 1.41403067111969,grad_norm: 0.9999998302604535, iteration: 134316
loss: 0.9710617065429688,grad_norm: 0.8270997194211742, iteration: 134317
loss: 1.0166223049163818,grad_norm: 0.9105650345163494, iteration: 134318
loss: 1.0500330924987793,grad_norm: 0.9999993402637318, iteration: 134319
loss: 0.9901108741760254,grad_norm: 0.7978430180216374, iteration: 134320
loss: 1.0143682956695557,grad_norm: 0.7786389387821404, iteration: 134321
loss: 0.9551817178726196,grad_norm: 0.9280547396456242, iteration: 134322
loss: 1.081132173538208,grad_norm: 0.9999996670271114, iteration: 134323
loss: 1.3615487813949585,grad_norm: 0.9999999517274718, iteration: 134324
loss: 1.109910488128662,grad_norm: 0.9999995675803831, iteration: 134325
loss: 1.036998987197876,grad_norm: 0.9999993739634306, iteration: 134326
loss: 1.014519214630127,grad_norm: 0.8534130847834016, iteration: 134327
loss: 1.0164011716842651,grad_norm: 0.9999991914433339, iteration: 134328
loss: 1.0235400199890137,grad_norm: 0.9276076772259645, iteration: 134329
loss: 1.0263584852218628,grad_norm: 0.9999995552072851, iteration: 134330
loss: 0.9884613156318665,grad_norm: 0.9999996141493438, iteration: 134331
loss: 0.9848710894584656,grad_norm: 0.7852408611883415, iteration: 134332
loss: 1.007883906364441,grad_norm: 0.7598821455788028, iteration: 134333
loss: 1.0021204948425293,grad_norm: 0.880334072474165, iteration: 134334
loss: 1.3334168195724487,grad_norm: 0.9999993834039119, iteration: 134335
loss: 1.0781841278076172,grad_norm: 0.9999995908802753, iteration: 134336
loss: 1.048862338066101,grad_norm: 0.9999999039340656, iteration: 134337
loss: 1.0269328355789185,grad_norm: 0.8798091482111551, iteration: 134338
loss: 1.0707746744155884,grad_norm: 0.9357452024849072, iteration: 134339
loss: 1.0667693614959717,grad_norm: 0.9999993734351734, iteration: 134340
loss: 1.0522143840789795,grad_norm: 0.9999993701015264, iteration: 134341
loss: 1.1044604778289795,grad_norm: 0.9999997767042149, iteration: 134342
loss: 1.0118941068649292,grad_norm: 0.9274737120623948, iteration: 134343
loss: 1.078054428100586,grad_norm: 0.9999992122280947, iteration: 134344
loss: 1.0145328044891357,grad_norm: 0.999999117071796, iteration: 134345
loss: 1.1058871746063232,grad_norm: 0.9999999771243422, iteration: 134346
loss: 0.9532409310340881,grad_norm: 0.8057952963187328, iteration: 134347
loss: 1.0056103467941284,grad_norm: 0.7859390707906327, iteration: 134348
loss: 1.0184698104858398,grad_norm: 0.7792780433054688, iteration: 134349
loss: 0.9830332398414612,grad_norm: 0.8188546285525193, iteration: 134350
loss: 0.9575778245925903,grad_norm: 0.8936529340575374, iteration: 134351
loss: 1.027550458908081,grad_norm: 0.9999995482989099, iteration: 134352
loss: 1.0141016244888306,grad_norm: 0.8920325329887676, iteration: 134353
loss: 0.9743710160255432,grad_norm: 0.8547561106336989, iteration: 134354
loss: 0.9750174283981323,grad_norm: 0.9284863074836057, iteration: 134355
loss: 0.9962801933288574,grad_norm: 0.7927572477443368, iteration: 134356
loss: 0.9811654686927795,grad_norm: 0.8344018136772471, iteration: 134357
loss: 1.004461646080017,grad_norm: 0.8802166162561496, iteration: 134358
loss: 0.9955056309700012,grad_norm: 0.9999992197466309, iteration: 134359
loss: 1.004776120185852,grad_norm: 0.8072213723336139, iteration: 134360
loss: 0.9982553124427795,grad_norm: 0.954186031728719, iteration: 134361
loss: 1.0400965213775635,grad_norm: 0.9999992064471113, iteration: 134362
loss: 1.0143065452575684,grad_norm: 0.9999993546742676, iteration: 134363
loss: 1.0215851068496704,grad_norm: 0.9993653165459018, iteration: 134364
loss: 0.9831414818763733,grad_norm: 0.8920993555556409, iteration: 134365
loss: 0.9884329438209534,grad_norm: 0.9999991547784939, iteration: 134366
loss: 1.0418951511383057,grad_norm: 0.9999993725027594, iteration: 134367
loss: 0.990421712398529,grad_norm: 0.8955458497738464, iteration: 134368
loss: 0.9963763952255249,grad_norm: 0.8937171929792197, iteration: 134369
loss: 1.0544317960739136,grad_norm: 0.9999993991903467, iteration: 134370
loss: 1.0001782178878784,grad_norm: 0.8364130129928772, iteration: 134371
loss: 0.9860422015190125,grad_norm: 0.8249687523658451, iteration: 134372
loss: 1.0699666738510132,grad_norm: 0.950956580547576, iteration: 134373
loss: 0.9822648167610168,grad_norm: 0.9999990119336807, iteration: 134374
loss: 1.0123313665390015,grad_norm: 0.8327697354126808, iteration: 134375
loss: 1.1205861568450928,grad_norm: 0.999999126856072, iteration: 134376
loss: 0.9956116080284119,grad_norm: 0.8217480566665952, iteration: 134377
loss: 0.9770693778991699,grad_norm: 0.8431100323404714, iteration: 134378
loss: 0.9931954741477966,grad_norm: 0.7944868725647442, iteration: 134379
loss: 0.9910602569580078,grad_norm: 0.9999999113155273, iteration: 134380
loss: 1.005905032157898,grad_norm: 0.8019663414333882, iteration: 134381
loss: 0.9854384660720825,grad_norm: 0.9379344593288591, iteration: 134382
loss: 0.9966862201690674,grad_norm: 0.9127471328520328, iteration: 134383
loss: 1.0153813362121582,grad_norm: 0.7398269307702726, iteration: 134384
loss: 0.990766704082489,grad_norm: 0.9027607988612464, iteration: 134385
loss: 1.074007272720337,grad_norm: 0.8035038119802465, iteration: 134386
loss: 1.0248284339904785,grad_norm: 0.8495241093643353, iteration: 134387
loss: 1.2070932388305664,grad_norm: 0.9999992358052293, iteration: 134388
loss: 1.0076518058776855,grad_norm: 0.999999797656186, iteration: 134389
loss: 1.0051910877227783,grad_norm: 0.9999991562406343, iteration: 134390
loss: 1.1042479276657104,grad_norm: 0.9999991540559169, iteration: 134391
loss: 1.008244276046753,grad_norm: 0.9999993313931687, iteration: 134392
loss: 1.0088255405426025,grad_norm: 0.9999996882957426, iteration: 134393
loss: 1.08528733253479,grad_norm: 0.9999992921299486, iteration: 134394
loss: 0.9556589126586914,grad_norm: 0.9367947962144648, iteration: 134395
loss: 1.0364627838134766,grad_norm: 0.9184360292610532, iteration: 134396
loss: 0.9937727451324463,grad_norm: 0.999999194429283, iteration: 134397
loss: 1.0297460556030273,grad_norm: 0.9325356398504718, iteration: 134398
loss: 1.183590292930603,grad_norm: 0.9999999317517752, iteration: 134399
loss: 0.9777816534042358,grad_norm: 0.8349102221208806, iteration: 134400
loss: 1.0547370910644531,grad_norm: 0.8739832572955527, iteration: 134401
loss: 1.034123420715332,grad_norm: 0.9999992248967244, iteration: 134402
loss: 1.0164531469345093,grad_norm: 0.7178850459630987, iteration: 134403
loss: 1.064186692237854,grad_norm: 0.9999996519480311, iteration: 134404
loss: 0.9796648621559143,grad_norm: 0.9154380909677597, iteration: 134405
loss: 1.0206549167633057,grad_norm: 0.9999992132388691, iteration: 134406
loss: 1.0268065929412842,grad_norm: 0.8980394196012067, iteration: 134407
loss: 0.9927036166191101,grad_norm: 0.8665064798654303, iteration: 134408
loss: 0.980029284954071,grad_norm: 0.7402085818736373, iteration: 134409
loss: 1.075218915939331,grad_norm: 0.9396195759611743, iteration: 134410
loss: 1.0027716159820557,grad_norm: 0.9999990742840782, iteration: 134411
loss: 0.9979246854782104,grad_norm: 0.9892193168768058, iteration: 134412
loss: 1.095546007156372,grad_norm: 0.9287745797239937, iteration: 134413
loss: 1.0407112836837769,grad_norm: 0.888029540503629, iteration: 134414
loss: 1.0651003122329712,grad_norm: 0.9999993845986231, iteration: 134415
loss: 1.0426666736602783,grad_norm: 0.9999994167298633, iteration: 134416
loss: 1.023637056350708,grad_norm: 0.8477002895426067, iteration: 134417
loss: 1.0047043561935425,grad_norm: 0.9242484231003024, iteration: 134418
loss: 1.028738021850586,grad_norm: 0.9999992035181957, iteration: 134419
loss: 1.3155615329742432,grad_norm: 0.9999997617478027, iteration: 134420
loss: 1.0386664867401123,grad_norm: 0.9999989817940619, iteration: 134421
loss: 1.014511227607727,grad_norm: 0.9999989480279344, iteration: 134422
loss: 1.0024398565292358,grad_norm: 0.9999991717870408, iteration: 134423
loss: 1.0171868801116943,grad_norm: 0.9999990801954887, iteration: 134424
loss: 1.0819181203842163,grad_norm: 0.9999990276380967, iteration: 134425
loss: 1.0023082494735718,grad_norm: 0.8964204727775094, iteration: 134426
loss: 0.9956345558166504,grad_norm: 0.8866151779546801, iteration: 134427
loss: 0.9814824461936951,grad_norm: 0.9711171199336358, iteration: 134428
loss: 0.9836212992668152,grad_norm: 0.7855848696049198, iteration: 134429
loss: 0.9971585869789124,grad_norm: 0.8723732741676, iteration: 134430
loss: 1.003349781036377,grad_norm: 0.870556429017553, iteration: 134431
loss: 1.1397089958190918,grad_norm: 0.999999765325816, iteration: 134432
loss: 1.0261077880859375,grad_norm: 0.9999991874295283, iteration: 134433
loss: 1.0030142068862915,grad_norm: 0.7545115764494076, iteration: 134434
loss: 1.0091878175735474,grad_norm: 0.8958209813302264, iteration: 134435
loss: 1.0197652578353882,grad_norm: 0.9999996671729813, iteration: 134436
loss: 1.0048377513885498,grad_norm: 0.9777703463807722, iteration: 134437
loss: 0.9896584749221802,grad_norm: 0.9999999485036613, iteration: 134438
loss: 1.031766414642334,grad_norm: 0.9999994076915873, iteration: 134439
loss: 1.0011919736862183,grad_norm: 0.9692632944225715, iteration: 134440
loss: 1.030194878578186,grad_norm: 0.9999991171192504, iteration: 134441
loss: 1.0784955024719238,grad_norm: 0.9999997201918678, iteration: 134442
loss: 1.1226807832717896,grad_norm: 0.9999998952521061, iteration: 134443
loss: 1.0099608898162842,grad_norm: 0.9430364626808548, iteration: 134444
loss: 0.9959722757339478,grad_norm: 0.9670789462265, iteration: 134445
loss: 1.0476293563842773,grad_norm: 0.8010564522701041, iteration: 134446
loss: 1.0734455585479736,grad_norm: 0.999999138703334, iteration: 134447
loss: 0.9909359216690063,grad_norm: 0.8494169755301545, iteration: 134448
loss: 0.9906324744224548,grad_norm: 0.9999991957536306, iteration: 134449
loss: 1.0269758701324463,grad_norm: 0.8910689942880459, iteration: 134450
loss: 1.021779179573059,grad_norm: 0.9564999595930703, iteration: 134451
loss: 0.9997661113739014,grad_norm: 0.9230210190171457, iteration: 134452
loss: 0.9865711331367493,grad_norm: 0.8728431502253194, iteration: 134453
loss: 1.0257960557937622,grad_norm: 0.9999994085557978, iteration: 134454
loss: 0.9817498922348022,grad_norm: 0.7720560509959697, iteration: 134455
loss: 0.9770105481147766,grad_norm: 0.9123975080712754, iteration: 134456
loss: 0.9466106295585632,grad_norm: 0.962399614014455, iteration: 134457
loss: 1.020912528038025,grad_norm: 0.8551544110552088, iteration: 134458
loss: 0.9884801506996155,grad_norm: 0.8529695365198494, iteration: 134459
loss: 1.015391230583191,grad_norm: 0.9999990646033381, iteration: 134460
loss: 1.0468026399612427,grad_norm: 0.8926735249006528, iteration: 134461
loss: 0.975926399230957,grad_norm: 0.825591682272788, iteration: 134462
loss: 1.0555648803710938,grad_norm: 0.8296065481705893, iteration: 134463
loss: 0.9983941316604614,grad_norm: 0.9064715283076007, iteration: 134464
loss: 1.1660500764846802,grad_norm: 0.9999994805720785, iteration: 134465
loss: 1.0187572240829468,grad_norm: 0.8596340573150314, iteration: 134466
loss: 1.018246054649353,grad_norm: 0.999999818404631, iteration: 134467
loss: 1.016491413116455,grad_norm: 0.9999991984600594, iteration: 134468
loss: 1.0095787048339844,grad_norm: 0.852693357340994, iteration: 134469
loss: 0.9858503341674805,grad_norm: 0.9999996114165329, iteration: 134470
loss: 0.9850215911865234,grad_norm: 0.8565498197552355, iteration: 134471
loss: 0.9513223171234131,grad_norm: 0.9999991823114156, iteration: 134472
loss: 1.0020647048950195,grad_norm: 0.9999998948694081, iteration: 134473
loss: 1.0184283256530762,grad_norm: 0.999999450674549, iteration: 134474
loss: 1.037465214729309,grad_norm: 0.891065598123483, iteration: 134475
loss: 0.9694592952728271,grad_norm: 0.887519945449716, iteration: 134476
loss: 0.9661087393760681,grad_norm: 0.9737374764361294, iteration: 134477
loss: 0.9891219139099121,grad_norm: 0.8805996500707512, iteration: 134478
loss: 1.0019956827163696,grad_norm: 0.9964708695528962, iteration: 134479
loss: 1.0018757581710815,grad_norm: 0.9296477014472546, iteration: 134480
loss: 1.022699236869812,grad_norm: 0.9999994980730853, iteration: 134481
loss: 1.009897232055664,grad_norm: 0.8365395243095815, iteration: 134482
loss: 1.059533953666687,grad_norm: 0.9999993297128018, iteration: 134483
loss: 0.9667840600013733,grad_norm: 0.9999990656892063, iteration: 134484
loss: 0.9964174628257751,grad_norm: 0.7618939865438606, iteration: 134485
loss: 0.9991847276687622,grad_norm: 0.8424258379086227, iteration: 134486
loss: 0.9706880450248718,grad_norm: 0.8096975280119695, iteration: 134487
loss: 1.0093421936035156,grad_norm: 0.8265377307703385, iteration: 134488
loss: 0.9882224798202515,grad_norm: 0.8300188387639575, iteration: 134489
loss: 0.9871130585670471,grad_norm: 0.9999993782733376, iteration: 134490
loss: 0.9982901215553284,grad_norm: 0.9999991441737892, iteration: 134491
loss: 1.0359644889831543,grad_norm: 0.8779570542531304, iteration: 134492
loss: 1.0126842260360718,grad_norm: 0.9297156127160082, iteration: 134493
loss: 1.0338890552520752,grad_norm: 0.7769867995208415, iteration: 134494
loss: 1.0348715782165527,grad_norm: 0.9999997853087955, iteration: 134495
loss: 1.047866702079773,grad_norm: 0.7994807055639948, iteration: 134496
loss: 1.0053579807281494,grad_norm: 0.8382120278856132, iteration: 134497
loss: 1.028734803199768,grad_norm: 0.9999991617834003, iteration: 134498
loss: 1.001688838005066,grad_norm: 0.8603625592763392, iteration: 134499
loss: 0.9686300754547119,grad_norm: 0.9999994225162451, iteration: 134500
loss: 0.9968644976615906,grad_norm: 0.9146108395325161, iteration: 134501
loss: 0.9925233721733093,grad_norm: 0.8757906417552453, iteration: 134502
loss: 1.0963492393493652,grad_norm: 0.9999998659573583, iteration: 134503
loss: 0.9782117605209351,grad_norm: 0.999999119625752, iteration: 134504
loss: 1.0417546033859253,grad_norm: 0.9212111577397804, iteration: 134505
loss: 1.0285321474075317,grad_norm: 0.9999999655324497, iteration: 134506
loss: 1.1128566265106201,grad_norm: 0.9999990713517718, iteration: 134507
loss: 0.9974819421768188,grad_norm: 0.9999991272305756, iteration: 134508
loss: 0.995261549949646,grad_norm: 0.8179140384653836, iteration: 134509
loss: 1.1890835762023926,grad_norm: 0.9999997152035262, iteration: 134510
loss: 0.9726468324661255,grad_norm: 0.818232747361651, iteration: 134511
loss: 1.0182405710220337,grad_norm: 0.9999992955838727, iteration: 134512
loss: 0.9954186081886292,grad_norm: 0.8536312696566781, iteration: 134513
loss: 1.0311328172683716,grad_norm: 0.8348916065513178, iteration: 134514
loss: 1.0058941841125488,grad_norm: 0.7897946960980206, iteration: 134515
loss: 0.9933067560195923,grad_norm: 0.8313895593319431, iteration: 134516
loss: 1.046451449394226,grad_norm: 0.9497607069517656, iteration: 134517
loss: 1.1102532148361206,grad_norm: 0.9999990856194065, iteration: 134518
loss: 1.0650910139083862,grad_norm: 0.8444849436435474, iteration: 134519
loss: 1.0249134302139282,grad_norm: 0.8507097228552617, iteration: 134520
loss: 0.9832179546356201,grad_norm: 0.999999081976317, iteration: 134521
loss: 1.0584501028060913,grad_norm: 0.8859867088097341, iteration: 134522
loss: 0.9955198764801025,grad_norm: 0.9969916184522488, iteration: 134523
loss: 0.9794426560401917,grad_norm: 0.9999992690020655, iteration: 134524
loss: 1.025739073753357,grad_norm: 0.9181167569003041, iteration: 134525
loss: 1.0159231424331665,grad_norm: 0.8298621738814906, iteration: 134526
loss: 0.9936285018920898,grad_norm: 0.7598225017082632, iteration: 134527
loss: 1.040244460105896,grad_norm: 0.9447297759508484, iteration: 134528
loss: 0.9973592162132263,grad_norm: 0.9999994660057833, iteration: 134529
loss: 0.9850429892539978,grad_norm: 0.9404032531491068, iteration: 134530
loss: 0.9738978743553162,grad_norm: 0.9608994806457344, iteration: 134531
loss: 1.0599303245544434,grad_norm: 0.9999999993464276, iteration: 134532
loss: 0.9901690483093262,grad_norm: 0.999998860137302, iteration: 134533
loss: 0.990015983581543,grad_norm: 0.9093691817897952, iteration: 134534
loss: 1.0406008958816528,grad_norm: 0.9999992308334923, iteration: 134535
loss: 1.0080296993255615,grad_norm: 0.9227272578337924, iteration: 134536
loss: 0.9946732521057129,grad_norm: 0.8336034443929624, iteration: 134537
loss: 1.0123279094696045,grad_norm: 0.9165127516176628, iteration: 134538
loss: 0.9957718849182129,grad_norm: 0.7773791940042859, iteration: 134539
loss: 1.0336531400680542,grad_norm: 0.9999992944546734, iteration: 134540
loss: 1.0172133445739746,grad_norm: 0.8930902436207397, iteration: 134541
loss: 0.9772371053695679,grad_norm: 0.9384716833213445, iteration: 134542
loss: 1.0163747072219849,grad_norm: 0.9999990662000041, iteration: 134543
loss: 0.9899715781211853,grad_norm: 0.9999992495286945, iteration: 134544
loss: 1.02505362033844,grad_norm: 0.9225515925906459, iteration: 134545
loss: 0.9772835969924927,grad_norm: 0.6927498296193076, iteration: 134546
loss: 1.0092408657073975,grad_norm: 0.8029130201747047, iteration: 134547
loss: 1.000999927520752,grad_norm: 0.9999991010801466, iteration: 134548
loss: 0.9900632500648499,grad_norm: 0.9999996083724885, iteration: 134549
loss: 1.186233401298523,grad_norm: 0.9999995015980985, iteration: 134550
loss: 1.0842710733413696,grad_norm: 0.9999990841224167, iteration: 134551
loss: 0.9699808359146118,grad_norm: 0.8398481823077645, iteration: 134552
loss: 1.020330786705017,grad_norm: 0.9999992155633196, iteration: 134553
loss: 1.075470209121704,grad_norm: 0.9999994335274869, iteration: 134554
loss: 1.0594909191131592,grad_norm: 0.9999993108380099, iteration: 134555
loss: 1.0866444110870361,grad_norm: 0.9971665639314359, iteration: 134556
loss: 0.9938436150550842,grad_norm: 0.9133774151785007, iteration: 134557
loss: 0.9877225756645203,grad_norm: 0.8360439821193372, iteration: 134558
loss: 0.9876260161399841,grad_norm: 0.7963535915792702, iteration: 134559
loss: 1.0275590419769287,grad_norm: 0.9999989633605145, iteration: 134560
loss: 1.0482732057571411,grad_norm: 0.9999992609695463, iteration: 134561
loss: 1.0021718740463257,grad_norm: 0.9999990217553283, iteration: 134562
loss: 0.9590397477149963,grad_norm: 0.9999991260602618, iteration: 134563
loss: 1.0306404829025269,grad_norm: 0.999999221028508, iteration: 134564
loss: 1.0282970666885376,grad_norm: 0.9937886445470037, iteration: 134565
loss: 1.001205325126648,grad_norm: 0.9175190727445469, iteration: 134566
loss: 1.0027390718460083,grad_norm: 0.7963590241191071, iteration: 134567
loss: 0.9875380396842957,grad_norm: 0.7549426781141548, iteration: 134568
loss: 0.999462902545929,grad_norm: 0.7762729130786195, iteration: 134569
loss: 1.0804290771484375,grad_norm: 0.9999995368639801, iteration: 134570
loss: 1.0298885107040405,grad_norm: 0.8384088618175689, iteration: 134571
loss: 1.0695741176605225,grad_norm: 0.9999991059378427, iteration: 134572
loss: 1.0349500179290771,grad_norm: 0.9999995757008203, iteration: 134573
loss: 1.0214637517929077,grad_norm: 0.9999992674967357, iteration: 134574
loss: 1.0210942029953003,grad_norm: 0.9999991651145184, iteration: 134575
loss: 1.0176849365234375,grad_norm: 0.9999998295687433, iteration: 134576
loss: 1.0204519033432007,grad_norm: 0.8439262541542203, iteration: 134577
loss: 1.1013391017913818,grad_norm: 0.9999993275985138, iteration: 134578
loss: 0.963336169719696,grad_norm: 0.9829724202263638, iteration: 134579
loss: 0.9949708580970764,grad_norm: 0.796063141150049, iteration: 134580
loss: 1.064216136932373,grad_norm: 0.9999999596546345, iteration: 134581
loss: 0.9807383418083191,grad_norm: 0.8831541353825141, iteration: 134582
loss: 0.9879258275032043,grad_norm: 0.8678836162196871, iteration: 134583
loss: 1.0851092338562012,grad_norm: 0.9999996524747222, iteration: 134584
loss: 1.0333298444747925,grad_norm: 0.9924017075831162, iteration: 134585
loss: 0.9677684307098389,grad_norm: 0.9999991158819207, iteration: 134586
loss: 1.0125572681427002,grad_norm: 0.7962814941821095, iteration: 134587
loss: 1.038637638092041,grad_norm: 0.9999996074023714, iteration: 134588
loss: 1.0643028020858765,grad_norm: 0.9999995318465943, iteration: 134589
loss: 1.0036755800247192,grad_norm: 0.7035959574013396, iteration: 134590
loss: 1.032553791999817,grad_norm: 0.8385651883436277, iteration: 134591
loss: 0.993069052696228,grad_norm: 0.9999992406699985, iteration: 134592
loss: 1.0087465047836304,grad_norm: 0.9428260200352492, iteration: 134593
loss: 1.037137746810913,grad_norm: 0.9999996233373614, iteration: 134594
loss: 0.9819098711013794,grad_norm: 0.7755756324330517, iteration: 134595
loss: 1.0186653137207031,grad_norm: 0.9426457937576013, iteration: 134596
loss: 1.0003080368041992,grad_norm: 0.8147739802044447, iteration: 134597
loss: 0.9998275637626648,grad_norm: 0.8947948232271113, iteration: 134598
loss: 0.9601842761039734,grad_norm: 0.9816157567964161, iteration: 134599
loss: 0.99733966588974,grad_norm: 0.8496038634972789, iteration: 134600
loss: 0.9922308921813965,grad_norm: 0.8991119312118263, iteration: 134601
loss: 0.9976568818092346,grad_norm: 0.7110524560095011, iteration: 134602
loss: 1.024519443511963,grad_norm: 0.890251817178514, iteration: 134603
loss: 0.9845564961433411,grad_norm: 0.9999990872016976, iteration: 134604
loss: 0.9972006678581238,grad_norm: 0.7752972095868063, iteration: 134605
loss: 1.011937141418457,grad_norm: 0.9999989274225437, iteration: 134606
loss: 0.9749796390533447,grad_norm: 0.9807157385231433, iteration: 134607
loss: 0.9736056327819824,grad_norm: 0.867036761473807, iteration: 134608
loss: 1.043207049369812,grad_norm: 0.9999990552112232, iteration: 134609
loss: 0.9913337230682373,grad_norm: 0.999999051541057, iteration: 134610
loss: 1.019200325012207,grad_norm: 0.9999997340009483, iteration: 134611
loss: 1.0357518196105957,grad_norm: 0.7937622432814073, iteration: 134612
loss: 0.977536141872406,grad_norm: 0.7896260950949823, iteration: 134613
loss: 0.9998531937599182,grad_norm: 0.8914331102218539, iteration: 134614
loss: 0.9992395639419556,grad_norm: 0.7778618673963257, iteration: 134615
loss: 1.002547025680542,grad_norm: 0.9999991572263054, iteration: 134616
loss: 1.052649974822998,grad_norm: 0.9999996820323154, iteration: 134617
loss: 1.0026460886001587,grad_norm: 0.9999990069599674, iteration: 134618
loss: 1.0097780227661133,grad_norm: 0.9603182407443315, iteration: 134619
loss: 1.0454497337341309,grad_norm: 0.8529410256459231, iteration: 134620
loss: 1.0622684955596924,grad_norm: 0.999999487060797, iteration: 134621
loss: 1.015871524810791,grad_norm: 0.9421805054389774, iteration: 134622
loss: 0.966548502445221,grad_norm: 0.8129678560747028, iteration: 134623
loss: 1.0001323223114014,grad_norm: 0.763883516857387, iteration: 134624
loss: 1.0217293500900269,grad_norm: 0.9426512294573373, iteration: 134625
loss: 0.9742658734321594,grad_norm: 0.8855118604437302, iteration: 134626
loss: 1.0308796167373657,grad_norm: 0.9999992548860672, iteration: 134627
loss: 1.0153754949569702,grad_norm: 0.8773086894588722, iteration: 134628
loss: 0.9822275638580322,grad_norm: 0.9999990495280803, iteration: 134629
loss: 0.9989262819290161,grad_norm: 0.9073167833090903, iteration: 134630
loss: 0.9751908183097839,grad_norm: 0.8853036325387177, iteration: 134631
loss: 0.9789751172065735,grad_norm: 0.9650357821910789, iteration: 134632
loss: 1.0492968559265137,grad_norm: 0.8146899243641973, iteration: 134633
loss: 1.0072847604751587,grad_norm: 0.8489755677362373, iteration: 134634
loss: 1.0401040315628052,grad_norm: 0.999999377079434, iteration: 134635
loss: 1.0047385692596436,grad_norm: 0.9999989828999366, iteration: 134636
loss: 1.0747039318084717,grad_norm: 0.9999997682621606, iteration: 134637
loss: 1.0302642583847046,grad_norm: 0.9999995766274968, iteration: 134638
loss: 1.0340067148208618,grad_norm: 0.9999992414958963, iteration: 134639
loss: 1.0142964124679565,grad_norm: 0.867118304984803, iteration: 134640
loss: 0.9294581413269043,grad_norm: 0.8896854424674487, iteration: 134641
loss: 1.0237394571304321,grad_norm: 0.9999991870369294, iteration: 134642
loss: 0.9452962875366211,grad_norm: 0.7706254824872288, iteration: 134643
loss: 1.0184217691421509,grad_norm: 0.9333540719233349, iteration: 134644
loss: 0.9838393330574036,grad_norm: 0.8315485517687453, iteration: 134645
loss: 1.0182815790176392,grad_norm: 0.9999992151863137, iteration: 134646
loss: 0.9982278347015381,grad_norm: 0.9352521222181227, iteration: 134647
loss: 1.0202573537826538,grad_norm: 0.8326960566623688, iteration: 134648
loss: 1.042192816734314,grad_norm: 0.9999992743308146, iteration: 134649
loss: 0.997454047203064,grad_norm: 0.9999991901056667, iteration: 134650
loss: 1.0843405723571777,grad_norm: 0.9999994218264036, iteration: 134651
loss: 0.9814181327819824,grad_norm: 0.8925144248010409, iteration: 134652
loss: 0.9619892239570618,grad_norm: 0.8137542478656573, iteration: 134653
loss: 1.0034360885620117,grad_norm: 0.7702141856447716, iteration: 134654
loss: 1.0295438766479492,grad_norm: 0.999999888480223, iteration: 134655
loss: 1.067816972732544,grad_norm: 0.9999993365917861, iteration: 134656
loss: 1.022918939590454,grad_norm: 0.8802437994671738, iteration: 134657
loss: 0.9657121300697327,grad_norm: 0.9095672709435483, iteration: 134658
loss: 1.0239436626434326,grad_norm: 0.8750201758594196, iteration: 134659
loss: 0.9845288395881653,grad_norm: 0.8431112631680099, iteration: 134660
loss: 1.0003666877746582,grad_norm: 0.996409117251258, iteration: 134661
loss: 1.0207245349884033,grad_norm: 0.9999991120621688, iteration: 134662
loss: 1.0311297178268433,grad_norm: 0.9999993125108058, iteration: 134663
loss: 0.966779351234436,grad_norm: 0.925691714026593, iteration: 134664
loss: 0.9998764991760254,grad_norm: 0.8630565316843373, iteration: 134665
loss: 0.9754645228385925,grad_norm: 0.7739071030254923, iteration: 134666
loss: 0.9846011400222778,grad_norm: 0.9559325066772916, iteration: 134667
loss: 0.9806444048881531,grad_norm: 0.9160359318120257, iteration: 134668
loss: 0.9934435486793518,grad_norm: 0.8639235248578054, iteration: 134669
loss: 0.9947730302810669,grad_norm: 0.6890376939994437, iteration: 134670
loss: 0.9845341444015503,grad_norm: 0.9999990405312001, iteration: 134671
loss: 0.9974241852760315,grad_norm: 0.9999991736788136, iteration: 134672
loss: 1.0200270414352417,grad_norm: 0.9449161775401098, iteration: 134673
loss: 1.0796259641647339,grad_norm: 0.999999376716876, iteration: 134674
loss: 1.0268123149871826,grad_norm: 0.9999994666477201, iteration: 134675
loss: 1.0905226469039917,grad_norm: 0.9999992064490631, iteration: 134676
loss: 1.11894953250885,grad_norm: 0.9999991818386373, iteration: 134677
loss: 1.0066914558410645,grad_norm: 0.9824278501051988, iteration: 134678
loss: 0.9835423827171326,grad_norm: 0.9999991462608251, iteration: 134679
loss: 1.0446420907974243,grad_norm: 0.7781884629813125, iteration: 134680
loss: 0.9473549127578735,grad_norm: 0.8390926803888484, iteration: 134681
loss: 1.0208479166030884,grad_norm: 0.9999993768920559, iteration: 134682
loss: 1.0216819047927856,grad_norm: 0.851624586373554, iteration: 134683
loss: 1.002344012260437,grad_norm: 0.9079231642925666, iteration: 134684
loss: 0.9553712010383606,grad_norm: 0.807922899685986, iteration: 134685
loss: 0.9920012950897217,grad_norm: 0.999999376656015, iteration: 134686
loss: 1.0013352632522583,grad_norm: 0.9102228263764321, iteration: 134687
loss: 1.019285798072815,grad_norm: 0.9369031493119774, iteration: 134688
loss: 1.0128767490386963,grad_norm: 0.8108786135050529, iteration: 134689
loss: 1.0416792631149292,grad_norm: 0.7925159519889974, iteration: 134690
loss: 0.9967293739318848,grad_norm: 0.8984136192555345, iteration: 134691
loss: 0.9957097768783569,grad_norm: 0.8556410800577522, iteration: 134692
loss: 1.0098509788513184,grad_norm: 0.8331801837761429, iteration: 134693
loss: 1.0274667739868164,grad_norm: 0.930560985529985, iteration: 134694
loss: 1.010347604751587,grad_norm: 0.9906069853053968, iteration: 134695
loss: 1.0354639291763306,grad_norm: 0.9771621525950037, iteration: 134696
loss: 1.0067087411880493,grad_norm: 0.9590984143034499, iteration: 134697
loss: 1.0599925518035889,grad_norm: 0.9608041258005307, iteration: 134698
loss: 0.9812818169593811,grad_norm: 0.889007657067531, iteration: 134699
loss: 0.9799638986587524,grad_norm: 0.934953926882359, iteration: 134700
loss: 1.009947657585144,grad_norm: 0.7621611780229499, iteration: 134701
loss: 1.0030659437179565,grad_norm: 0.9999989931877442, iteration: 134702
loss: 1.0023610591888428,grad_norm: 0.8121358103226533, iteration: 134703
loss: 1.0534859895706177,grad_norm: 0.9999991375896565, iteration: 134704
loss: 1.0261703729629517,grad_norm: 0.9346224956863668, iteration: 134705
loss: 1.0240052938461304,grad_norm: 0.9999990796367763, iteration: 134706
loss: 1.0039788484573364,grad_norm: 0.9737230543944144, iteration: 134707
loss: 1.0347237586975098,grad_norm: 0.9741503120965384, iteration: 134708
loss: 1.0120272636413574,grad_norm: 0.8918746017606713, iteration: 134709
loss: 1.0459786653518677,grad_norm: 0.8351920457290204, iteration: 134710
loss: 0.979341447353363,grad_norm: 0.8992606479334763, iteration: 134711
loss: 0.9857946634292603,grad_norm: 0.9295291095838177, iteration: 134712
loss: 1.0081390142440796,grad_norm: 0.9999992566841189, iteration: 134713
loss: 1.0048253536224365,grad_norm: 0.899673891347926, iteration: 134714
loss: 1.0493625402450562,grad_norm: 0.9981960516557392, iteration: 134715
loss: 0.9929307103157043,grad_norm: 0.8607087354522965, iteration: 134716
loss: 1.0755693912506104,grad_norm: 0.9999998788738841, iteration: 134717
loss: 1.0055181980133057,grad_norm: 0.846685061148655, iteration: 134718
loss: 0.997691810131073,grad_norm: 0.9999995212304739, iteration: 134719
loss: 0.9917634129524231,grad_norm: 0.9605894259575758, iteration: 134720
loss: 0.9527876377105713,grad_norm: 0.8398421590183165, iteration: 134721
loss: 0.9953809976577759,grad_norm: 0.9999998892220129, iteration: 134722
loss: 1.0457470417022705,grad_norm: 0.9999993007525282, iteration: 134723
loss: 1.0379359722137451,grad_norm: 0.9999994159257057, iteration: 134724
loss: 0.975902259349823,grad_norm: 0.8743513254119499, iteration: 134725
loss: 1.0518907308578491,grad_norm: 0.9999999071109312, iteration: 134726
loss: 1.0497537851333618,grad_norm: 0.9999991389901646, iteration: 134727
loss: 1.0505380630493164,grad_norm: 0.9999991275871071, iteration: 134728
loss: 0.996850848197937,grad_norm: 0.9151476760756891, iteration: 134729
loss: 0.9690330624580383,grad_norm: 0.9611308662118802, iteration: 134730
loss: 1.0690408945083618,grad_norm: 0.9917484779969082, iteration: 134731
loss: 1.0833851099014282,grad_norm: 0.9999994348108563, iteration: 134732
loss: 1.0524637699127197,grad_norm: 0.8486864513839851, iteration: 134733
loss: 0.9748374819755554,grad_norm: 0.8520887410466899, iteration: 134734
loss: 1.0690897703170776,grad_norm: 0.99999902110022, iteration: 134735
loss: 1.010202169418335,grad_norm: 0.9999995428219944, iteration: 134736
loss: 1.1576354503631592,grad_norm: 0.9999995123903271, iteration: 134737
loss: 1.0075350999832153,grad_norm: 0.8983334381932706, iteration: 134738
loss: 1.0641521215438843,grad_norm: 0.9999995822020833, iteration: 134739
loss: 1.1309618949890137,grad_norm: 0.9999991589045087, iteration: 134740
loss: 1.0394147634506226,grad_norm: 0.9999996810024299, iteration: 134741
loss: 1.0971704721450806,grad_norm: 0.9999992263681229, iteration: 134742
loss: 1.0008561611175537,grad_norm: 0.9999997655454216, iteration: 134743
loss: 1.008711576461792,grad_norm: 0.9999998468397105, iteration: 134744
loss: 1.0241003036499023,grad_norm: 0.9999991384834825, iteration: 134745
loss: 1.044307827949524,grad_norm: 0.9999995525425912, iteration: 134746
loss: 1.139853835105896,grad_norm: 0.9999998149283973, iteration: 134747
loss: 1.1129906177520752,grad_norm: 0.9999998605380203, iteration: 134748
loss: 1.0347353219985962,grad_norm: 0.9999995576139994, iteration: 134749
loss: 1.0434068441390991,grad_norm: 0.9353709377709939, iteration: 134750
loss: 0.9616158604621887,grad_norm: 0.9146156714156295, iteration: 134751
loss: 1.0942317247390747,grad_norm: 0.9999992413912687, iteration: 134752
loss: 1.0762678384780884,grad_norm: 0.9999993940723644, iteration: 134753
loss: 1.0571914911270142,grad_norm: 0.9999991739785443, iteration: 134754
loss: 1.0080304145812988,grad_norm: 0.8066362564110902, iteration: 134755
loss: 1.0586477518081665,grad_norm: 0.9999990466799159, iteration: 134756
loss: 1.1013778448104858,grad_norm: 0.9999995101827978, iteration: 134757
loss: 1.027045726776123,grad_norm: 0.8680321486767549, iteration: 134758
loss: 1.0376842021942139,grad_norm: 0.9999995541495733, iteration: 134759
loss: 1.1014680862426758,grad_norm: 0.9962370921539588, iteration: 134760
loss: 0.9852346181869507,grad_norm: 0.9999994747864057, iteration: 134761
loss: 1.042928695678711,grad_norm: 0.9999996969471509, iteration: 134762
loss: 1.043154001235962,grad_norm: 0.9999991364850728, iteration: 134763
loss: 1.029114842414856,grad_norm: 0.9288354560941577, iteration: 134764
loss: 1.0117995738983154,grad_norm: 0.9999995695269809, iteration: 134765
loss: 0.9904330968856812,grad_norm: 0.7979339614957123, iteration: 134766
loss: 1.1020923852920532,grad_norm: 0.999999392413127, iteration: 134767
loss: 1.0147218704223633,grad_norm: 0.8548592780227499, iteration: 134768
loss: 1.0572947263717651,grad_norm: 0.9999991716711828, iteration: 134769
loss: 1.0170806646347046,grad_norm: 0.9999993655314972, iteration: 134770
loss: 1.021243691444397,grad_norm: 0.9798044162372664, iteration: 134771
loss: 1.098854660987854,grad_norm: 0.99999931990526, iteration: 134772
loss: 1.1183903217315674,grad_norm: 0.9999999005279189, iteration: 134773
loss: 1.0048574209213257,grad_norm: 0.9999991907604727, iteration: 134774
loss: 1.0444250106811523,grad_norm: 0.9999999380124518, iteration: 134775
loss: 0.9816821813583374,grad_norm: 0.9999990003346191, iteration: 134776
loss: 1.0293679237365723,grad_norm: 0.9999991985818465, iteration: 134777
loss: 1.04729163646698,grad_norm: 0.9999992712336655, iteration: 134778
loss: 1.1107593774795532,grad_norm: 0.9999996815314018, iteration: 134779
loss: 1.1105552911758423,grad_norm: 0.9999999233758213, iteration: 134780
loss: 1.0608699321746826,grad_norm: 0.999999146283183, iteration: 134781
loss: 1.1640541553497314,grad_norm: 0.9999993259836352, iteration: 134782
loss: 1.149356484413147,grad_norm: 0.9999992571654086, iteration: 134783
loss: 1.1700823307037354,grad_norm: 0.9999994372608639, iteration: 134784
loss: 1.217883586883545,grad_norm: 1.0000000516229226, iteration: 134785
loss: 1.2665562629699707,grad_norm: 0.9999999129002949, iteration: 134786
loss: 1.047841191291809,grad_norm: 0.9999992945035333, iteration: 134787
loss: 1.14402174949646,grad_norm: 0.9999994299483704, iteration: 134788
loss: 0.9574927687644958,grad_norm: 0.9999994370989311, iteration: 134789
loss: 1.2521735429763794,grad_norm: 0.9999999544070974, iteration: 134790
loss: 1.1196496486663818,grad_norm: 0.9999996995859881, iteration: 134791
loss: 1.169453740119934,grad_norm: 0.9999998018720573, iteration: 134792
loss: 1.0327082872390747,grad_norm: 0.9999991081666547, iteration: 134793
loss: 1.0693702697753906,grad_norm: 0.9999997965426031, iteration: 134794
loss: 1.0257493257522583,grad_norm: 0.9999990642466215, iteration: 134795
loss: 1.0576269626617432,grad_norm: 0.9799180575012817, iteration: 134796
loss: 1.0697119235992432,grad_norm: 0.9999996298127238, iteration: 134797
loss: 1.2691303491592407,grad_norm: 0.9999998483178911, iteration: 134798
loss: 0.9901259541511536,grad_norm: 0.9999992296142793, iteration: 134799
loss: 1.1757068634033203,grad_norm: 0.9999999145799755, iteration: 134800
loss: 1.0306950807571411,grad_norm: 0.9999998692971594, iteration: 134801
loss: 1.0779855251312256,grad_norm: 0.9026117988189879, iteration: 134802
loss: 1.3204693794250488,grad_norm: 0.9999999614117794, iteration: 134803
loss: 1.231611728668213,grad_norm: 1.0000000862729523, iteration: 134804
loss: 1.0935800075531006,grad_norm: 0.9999997291006061, iteration: 134805
loss: 1.286229133605957,grad_norm: 0.9999995121712939, iteration: 134806
loss: 1.160994291305542,grad_norm: 0.9999998484191478, iteration: 134807
loss: 1.0847622156143188,grad_norm: 0.9999998116371247, iteration: 134808
loss: 1.1253163814544678,grad_norm: 0.9999999987462813, iteration: 134809
loss: 1.079633355140686,grad_norm: 0.9999996669141824, iteration: 134810
loss: 1.0156604051589966,grad_norm: 0.9999994146240001, iteration: 134811
loss: 0.9877676963806152,grad_norm: 0.9999993321614705, iteration: 134812
loss: 1.1892660856246948,grad_norm: 0.9999999254069919, iteration: 134813
loss: 1.081224799156189,grad_norm: 0.9999992193419023, iteration: 134814
loss: 1.0234888792037964,grad_norm: 0.8996544960978945, iteration: 134815
loss: 1.0796558856964111,grad_norm: 0.9999997265069294, iteration: 134816
loss: 1.2600740194320679,grad_norm: 0.9999998429773347, iteration: 134817
loss: 1.0743709802627563,grad_norm: 0.9999990825724686, iteration: 134818
loss: 1.1103941202163696,grad_norm: 0.9999991303618321, iteration: 134819
loss: 1.0109105110168457,grad_norm: 0.999999345299411, iteration: 134820
loss: 1.1163597106933594,grad_norm: 0.9390057397642683, iteration: 134821
loss: 1.0066845417022705,grad_norm: 0.9999995788506777, iteration: 134822
loss: 1.0356645584106445,grad_norm: 0.9999999270441674, iteration: 134823
loss: 1.048776388168335,grad_norm: 0.9999992620681114, iteration: 134824
loss: 1.1237437725067139,grad_norm: 0.9999995376220646, iteration: 134825
loss: 1.093467354774475,grad_norm: 0.9999995712648394, iteration: 134826
loss: 1.1511836051940918,grad_norm: 0.9999999104686227, iteration: 134827
loss: 1.0131531953811646,grad_norm: 0.9999999242438637, iteration: 134828
loss: 1.046363353729248,grad_norm: 0.9999991249957517, iteration: 134829
loss: 1.041377305984497,grad_norm: 0.9999995381464433, iteration: 134830
loss: 1.0208264589309692,grad_norm: 0.999999214277251, iteration: 134831
loss: 1.054133653640747,grad_norm: 0.9999991098095198, iteration: 134832
loss: 1.1729551553726196,grad_norm: 0.9999995625702394, iteration: 134833
loss: 1.0489511489868164,grad_norm: 0.9999995143996048, iteration: 134834
loss: 1.1647504568099976,grad_norm: 0.9999996996032291, iteration: 134835
loss: 1.1412540674209595,grad_norm: 0.9999998753580401, iteration: 134836
loss: 1.0555506944656372,grad_norm: 0.9999989382999197, iteration: 134837
loss: 0.963124692440033,grad_norm: 0.9999992337713187, iteration: 134838
loss: 1.1557170152664185,grad_norm: 0.9999995094351708, iteration: 134839
loss: 0.986690878868103,grad_norm: 0.9999990599985391, iteration: 134840
loss: 1.0862101316452026,grad_norm: 0.9999995201728794, iteration: 134841
loss: 1.0413873195648193,grad_norm: 0.9999991333807747, iteration: 134842
loss: 1.0155597925186157,grad_norm: 0.9999995219651312, iteration: 134843
loss: 0.9628551602363586,grad_norm: 0.8736680286190963, iteration: 134844
loss: 1.1935694217681885,grad_norm: 0.9999998943642004, iteration: 134845
loss: 0.9917673468589783,grad_norm: 0.7495363799853685, iteration: 134846
loss: 1.1064746379852295,grad_norm: 0.9999993118292553, iteration: 134847
loss: 1.0899336338043213,grad_norm: 0.9999996858746414, iteration: 134848
loss: 1.1307570934295654,grad_norm: 0.9999996124266739, iteration: 134849
loss: 1.0436128377914429,grad_norm: 0.9999991664500543, iteration: 134850
loss: 1.0390210151672363,grad_norm: 0.9999993403996552, iteration: 134851
loss: 1.0466208457946777,grad_norm: 0.9999994804740668, iteration: 134852
loss: 1.0079307556152344,grad_norm: 0.999999183478664, iteration: 134853
loss: 0.9464343786239624,grad_norm: 0.9999995762719752, iteration: 134854
loss: 1.01183021068573,grad_norm: 0.9208934947573134, iteration: 134855
loss: 1.1179707050323486,grad_norm: 0.999999956162397, iteration: 134856
loss: 1.024519920349121,grad_norm: 0.9999993734504182, iteration: 134857
loss: 0.9940661191940308,grad_norm: 0.9999990937831459, iteration: 134858
loss: 1.0501065254211426,grad_norm: 0.999999830379991, iteration: 134859
loss: 1.0364207029342651,grad_norm: 0.9166094110684564, iteration: 134860
loss: 1.0349355936050415,grad_norm: 0.9999998464736013, iteration: 134861
loss: 1.0792672634124756,grad_norm: 0.9999991946376219, iteration: 134862
loss: 1.078662395477295,grad_norm: 0.99999958083512, iteration: 134863
loss: 1.038625717163086,grad_norm: 0.8897747481473479, iteration: 134864
loss: 1.020958662033081,grad_norm: 0.9999996858699876, iteration: 134865
loss: 1.0026130676269531,grad_norm: 0.9999999367863156, iteration: 134866
loss: 1.0038384199142456,grad_norm: 0.7587395894830076, iteration: 134867
loss: 1.094264030456543,grad_norm: 0.9999998357692093, iteration: 134868
loss: 0.9708370566368103,grad_norm: 0.9999989854163248, iteration: 134869
loss: 0.9851603507995605,grad_norm: 0.9999994653082814, iteration: 134870
loss: 1.04608154296875,grad_norm: 0.9999994900475235, iteration: 134871
loss: 0.9988553524017334,grad_norm: 0.9500198525091513, iteration: 134872
loss: 1.0868213176727295,grad_norm: 0.999999167114363, iteration: 134873
loss: 0.9992641806602478,grad_norm: 0.9999990685824571, iteration: 134874
loss: 1.0033756494522095,grad_norm: 0.9999992973426368, iteration: 134875
loss: 0.9884027242660522,grad_norm: 0.9515765364019104, iteration: 134876
loss: 1.3505536317825317,grad_norm: 0.9999998309464854, iteration: 134877
loss: 1.0503686666488647,grad_norm: 0.9999992471302603, iteration: 134878
loss: 0.9912490844726562,grad_norm: 0.9999995695541354, iteration: 134879
loss: 1.0548220872879028,grad_norm: 0.9999992281838532, iteration: 134880
loss: 1.143180012702942,grad_norm: 0.9999996089941108, iteration: 134881
loss: 1.129080057144165,grad_norm: 0.9999992279916423, iteration: 134882
loss: 1.0016639232635498,grad_norm: 0.9241259286871287, iteration: 134883
loss: 0.9831109046936035,grad_norm: 0.9037898936350891, iteration: 134884
loss: 1.0932226181030273,grad_norm: 0.9999991485599665, iteration: 134885
loss: 1.020898699760437,grad_norm: 0.999999143422748, iteration: 134886
loss: 0.9938570857048035,grad_norm: 0.9999991575813015, iteration: 134887
loss: 0.9861666560173035,grad_norm: 0.9221982586283131, iteration: 134888
loss: 1.0120742321014404,grad_norm: 0.9344109533192809, iteration: 134889
loss: 0.986431896686554,grad_norm: 0.8225657290857217, iteration: 134890
loss: 1.0609667301177979,grad_norm: 0.9128450422491861, iteration: 134891
loss: 1.1055965423583984,grad_norm: 0.9999999370457303, iteration: 134892
loss: 0.9877583384513855,grad_norm: 0.9999990116203498, iteration: 134893
loss: 0.9979995489120483,grad_norm: 0.9999996939472796, iteration: 134894
loss: 1.0479357242584229,grad_norm: 0.9999998763313147, iteration: 134895
loss: 1.0125668048858643,grad_norm: 0.9999991587463786, iteration: 134896
loss: 1.0564305782318115,grad_norm: 0.9999993973426919, iteration: 134897
loss: 0.9992742538452148,grad_norm: 0.999999802357593, iteration: 134898
loss: 0.9816022515296936,grad_norm: 0.7676702326051692, iteration: 134899
loss: 0.9920502305030823,grad_norm: 0.9180383546631569, iteration: 134900
loss: 0.9623825550079346,grad_norm: 0.999999470083404, iteration: 134901
loss: 0.9904534220695496,grad_norm: 0.9012081925123329, iteration: 134902
loss: 1.429731845855713,grad_norm: 0.9999999095020071, iteration: 134903
loss: 0.9872077107429504,grad_norm: 0.9999991611694614, iteration: 134904
loss: 1.000983715057373,grad_norm: 0.9999990144581916, iteration: 134905
loss: 1.0339020490646362,grad_norm: 0.9999992486100708, iteration: 134906
loss: 1.0564900636672974,grad_norm: 0.9999992717454869, iteration: 134907
loss: 0.9759134650230408,grad_norm: 0.8701956700858966, iteration: 134908
loss: 1.1933655738830566,grad_norm: 1.0000000018219626, iteration: 134909
loss: 0.9864441752433777,grad_norm: 0.8352314007732741, iteration: 134910
loss: 1.108811616897583,grad_norm: 0.999999956964788, iteration: 134911
loss: 1.029822587966919,grad_norm: 0.7549244528543098, iteration: 134912
loss: 1.0227289199829102,grad_norm: 0.9164117589043703, iteration: 134913
loss: 1.0262190103530884,grad_norm: 0.9089901723785037, iteration: 134914
loss: 1.065299391746521,grad_norm: 0.9999991613031659, iteration: 134915
loss: 0.9891707301139832,grad_norm: 0.8444655858543488, iteration: 134916
loss: 1.0001227855682373,grad_norm: 0.8558635412700282, iteration: 134917
loss: 1.0443007946014404,grad_norm: 0.9999994012208365, iteration: 134918
loss: 1.0095279216766357,grad_norm: 0.8643763266337265, iteration: 134919
loss: 1.016824722290039,grad_norm: 0.9999993045340371, iteration: 134920
loss: 1.0259439945220947,grad_norm: 0.8177815381697683, iteration: 134921
loss: 1.04372239112854,grad_norm: 0.9999996508806515, iteration: 134922
loss: 0.9969078302383423,grad_norm: 0.8042805544856411, iteration: 134923
loss: 1.0126612186431885,grad_norm: 0.7470282922731029, iteration: 134924
loss: 0.9986568689346313,grad_norm: 0.9999995017000967, iteration: 134925
loss: 1.023969054222107,grad_norm: 0.9999990579595207, iteration: 134926
loss: 1.0142099857330322,grad_norm: 0.999998934357504, iteration: 134927
loss: 0.9907923340797424,grad_norm: 0.8011401382883843, iteration: 134928
loss: 1.0315401554107666,grad_norm: 0.8491133450007191, iteration: 134929
loss: 1.1338891983032227,grad_norm: 0.9999991723426924, iteration: 134930
loss: 1.0180703401565552,grad_norm: 0.9999990740487761, iteration: 134931
loss: 0.9995432496070862,grad_norm: 0.976293204420118, iteration: 134932
loss: 0.9493435621261597,grad_norm: 0.885338115591342, iteration: 134933
loss: 0.9642539024353027,grad_norm: 0.8754379330481817, iteration: 134934
loss: 0.9779295325279236,grad_norm: 0.9999994927327663, iteration: 134935
loss: 1.031321406364441,grad_norm: 0.9999991642946756, iteration: 134936
loss: 0.9625111222267151,grad_norm: 0.9225586845610185, iteration: 134937
loss: 1.002622127532959,grad_norm: 0.8741248289597375, iteration: 134938
loss: 1.0408198833465576,grad_norm: 0.9670827306193185, iteration: 134939
loss: 0.9757965207099915,grad_norm: 0.9999990953213012, iteration: 134940
loss: 1.0675383806228638,grad_norm: 1.0000000743813708, iteration: 134941
loss: 1.0507593154907227,grad_norm: 0.9999993029511762, iteration: 134942
loss: 0.9760766625404358,grad_norm: 0.8104129816927345, iteration: 134943
loss: 0.9678067564964294,grad_norm: 0.806544621615874, iteration: 134944
loss: 1.0647529363632202,grad_norm: 1.000000029895361, iteration: 134945
loss: 1.0075289011001587,grad_norm: 0.9645307631327791, iteration: 134946
loss: 1.0044457912445068,grad_norm: 0.8884814503588153, iteration: 134947
loss: 1.0130397081375122,grad_norm: 0.769912808772039, iteration: 134948
loss: 1.000712513923645,grad_norm: 0.9999998429416924, iteration: 134949
loss: 1.018173336982727,grad_norm: 0.9999990685282922, iteration: 134950
loss: 1.0247520208358765,grad_norm: 1.0000000227548616, iteration: 134951
loss: 1.0234687328338623,grad_norm: 0.9999996751410548, iteration: 134952
loss: 0.9994766116142273,grad_norm: 0.9999996560279993, iteration: 134953
loss: 0.9720701575279236,grad_norm: 0.9999989924332486, iteration: 134954
loss: 1.027408242225647,grad_norm: 0.9999993878036498, iteration: 134955
loss: 1.008917212486267,grad_norm: 0.9999996743913737, iteration: 134956
loss: 0.9796012043952942,grad_norm: 0.9139155580908223, iteration: 134957
loss: 1.0750677585601807,grad_norm: 0.9420775243509913, iteration: 134958
loss: 1.044777512550354,grad_norm: 0.9999999287956484, iteration: 134959
loss: 1.0009607076644897,grad_norm: 0.999999111274962, iteration: 134960
loss: 1.0203965902328491,grad_norm: 0.999999510975414, iteration: 134961
loss: 1.040144920349121,grad_norm: 0.9891556198409149, iteration: 134962
loss: 1.002860188484192,grad_norm: 0.9999999190119296, iteration: 134963
loss: 1.0136470794677734,grad_norm: 0.9999998324622571, iteration: 134964
loss: 0.9929488897323608,grad_norm: 0.8924002580566482, iteration: 134965
loss: 1.1388332843780518,grad_norm: 0.999999501739788, iteration: 134966
loss: 1.0461339950561523,grad_norm: 0.9999992263622927, iteration: 134967
loss: 1.0297579765319824,grad_norm: 0.9932398967372883, iteration: 134968
loss: 1.0069907903671265,grad_norm: 0.7527485769953429, iteration: 134969
loss: 1.0165437459945679,grad_norm: 0.964531784440081, iteration: 134970
loss: 1.0867528915405273,grad_norm: 1.0000000280162715, iteration: 134971
loss: 0.9710853099822998,grad_norm: 0.9196497226849278, iteration: 134972
loss: 0.9884437322616577,grad_norm: 0.9413239912798633, iteration: 134973
loss: 0.9984405040740967,grad_norm: 0.9934369979647842, iteration: 134974
loss: 1.0130178928375244,grad_norm: 0.9999992150746461, iteration: 134975
loss: 1.0520118474960327,grad_norm: 0.9999993630105907, iteration: 134976
loss: 1.0099066495895386,grad_norm: 0.8211764982702762, iteration: 134977
loss: 1.09687077999115,grad_norm: 0.9999992619589578, iteration: 134978
loss: 1.0835908651351929,grad_norm: 0.9999997236619128, iteration: 134979
loss: 1.0317473411560059,grad_norm: 0.9999995622706783, iteration: 134980
loss: 1.0046660900115967,grad_norm: 0.8789646785142406, iteration: 134981
loss: 1.098705530166626,grad_norm: 0.9999994151983617, iteration: 134982
loss: 1.0128552913665771,grad_norm: 0.9999993185068795, iteration: 134983
loss: 0.9971593022346497,grad_norm: 0.8393031298072726, iteration: 134984
loss: 1.0359489917755127,grad_norm: 0.9999998033328865, iteration: 134985
loss: 1.089158058166504,grad_norm: 0.9999994516907907, iteration: 134986
loss: 1.0050219297409058,grad_norm: 0.9999998319353146, iteration: 134987
loss: 1.0033775568008423,grad_norm: 0.9999991917579167, iteration: 134988
loss: 1.0121532678604126,grad_norm: 0.9698077135697213, iteration: 134989
loss: 1.0380703210830688,grad_norm: 0.9971238450522838, iteration: 134990
loss: 0.9854028820991516,grad_norm: 0.6816867733162791, iteration: 134991
loss: 1.035784125328064,grad_norm: 0.999999798678476, iteration: 134992
loss: 0.9865306615829468,grad_norm: 0.9999998915750892, iteration: 134993
loss: 1.0285660028457642,grad_norm: 0.9999994444759139, iteration: 134994
loss: 1.1369870901107788,grad_norm: 0.9999993191510546, iteration: 134995
loss: 1.0643481016159058,grad_norm: 0.9999990484663054, iteration: 134996
loss: 1.07216477394104,grad_norm: 0.999999818836146, iteration: 134997
loss: 1.1131764650344849,grad_norm: 0.9999998768438133, iteration: 134998
loss: 1.245920181274414,grad_norm: 0.9999998811872831, iteration: 134999
loss: 1.159781813621521,grad_norm: 0.9717903694855587, iteration: 135000
loss: 1.1582444906234741,grad_norm: 0.999999849475682, iteration: 135001
loss: 1.1571308374404907,grad_norm: 0.9999996571442149, iteration: 135002
loss: 1.0237956047058105,grad_norm: 0.8890805441681111, iteration: 135003
loss: 1.0203425884246826,grad_norm: 0.9376821169771168, iteration: 135004
loss: 1.0008457899093628,grad_norm: 0.8974025831902708, iteration: 135005
loss: 1.0346931219100952,grad_norm: 0.9999991002560404, iteration: 135006
loss: 1.0321365594863892,grad_norm: 0.9999998834031234, iteration: 135007
loss: 1.0438848733901978,grad_norm: 0.8243741953064109, iteration: 135008
loss: 1.0010113716125488,grad_norm: 0.9999992559856523, iteration: 135009
loss: 1.0030816793441772,grad_norm: 0.9999994289165969, iteration: 135010
loss: 1.009385347366333,grad_norm: 0.7791533080320108, iteration: 135011
loss: 1.109767198562622,grad_norm: 0.9999996096292812, iteration: 135012
loss: 0.9897604584693909,grad_norm: 0.8299226640754482, iteration: 135013
loss: 1.092930793762207,grad_norm: 0.9999997004478685, iteration: 135014
loss: 1.0246292352676392,grad_norm: 0.9999996215962542, iteration: 135015
loss: 1.0245602130889893,grad_norm: 0.9999996515950065, iteration: 135016
loss: 1.018858551979065,grad_norm: 0.856691749613906, iteration: 135017
loss: 1.0004591941833496,grad_norm: 0.9166034056782715, iteration: 135018
loss: 0.9887146353721619,grad_norm: 0.7299898853020832, iteration: 135019
loss: 1.0149364471435547,grad_norm: 0.9999990413889369, iteration: 135020
loss: 1.0105897188186646,grad_norm: 0.9999991200742833, iteration: 135021
loss: 1.0193536281585693,grad_norm: 0.8404337112701832, iteration: 135022
loss: 1.0284943580627441,grad_norm: 0.9999996212428257, iteration: 135023
loss: 1.0132763385772705,grad_norm: 0.889318554116199, iteration: 135024
loss: 1.0001693964004517,grad_norm: 0.8875939018327855, iteration: 135025
loss: 0.9905632138252258,grad_norm: 0.763389114482608, iteration: 135026
loss: 0.9892079830169678,grad_norm: 0.8069780896917683, iteration: 135027
loss: 0.9952791929244995,grad_norm: 0.8817083012750755, iteration: 135028
loss: 1.013226866722107,grad_norm: 0.8583575347074213, iteration: 135029
loss: 0.9887530207633972,grad_norm: 0.8933866356152018, iteration: 135030
loss: 0.9938603043556213,grad_norm: 0.8403356745878827, iteration: 135031
loss: 0.976361095905304,grad_norm: 0.9368450802974521, iteration: 135032
loss: 0.9894751310348511,grad_norm: 0.8746312460044389, iteration: 135033
loss: 0.9991078972816467,grad_norm: 0.9999998906135966, iteration: 135034
loss: 1.0788612365722656,grad_norm: 0.9053186420506363, iteration: 135035
loss: 0.9521352648735046,grad_norm: 0.9380913081059024, iteration: 135036
loss: 1.0010194778442383,grad_norm: 0.9724198699231685, iteration: 135037
loss: 1.0661380290985107,grad_norm: 0.9999994296379884, iteration: 135038
loss: 1.0063600540161133,grad_norm: 0.8339473786021551, iteration: 135039
loss: 1.0142298936843872,grad_norm: 0.7996809586829655, iteration: 135040
loss: 1.0356624126434326,grad_norm: 0.9999991798327623, iteration: 135041
loss: 1.0061414241790771,grad_norm: 0.9907648286129589, iteration: 135042
loss: 1.0205042362213135,grad_norm: 0.9054254232261637, iteration: 135043
loss: 0.9799217581748962,grad_norm: 0.9999991471162665, iteration: 135044
loss: 1.079793930053711,grad_norm: 0.9999995080872651, iteration: 135045
loss: 1.0089610815048218,grad_norm: 0.7272448879583734, iteration: 135046
loss: 0.999136745929718,grad_norm: 0.9999989515492133, iteration: 135047
loss: 0.9731345176696777,grad_norm: 0.9999993269636741, iteration: 135048
loss: 1.0328946113586426,grad_norm: 0.9999992894792926, iteration: 135049
loss: 1.0049502849578857,grad_norm: 0.7087203259089345, iteration: 135050
loss: 0.958145797252655,grad_norm: 0.8756502463042806, iteration: 135051
loss: 1.0915753841400146,grad_norm: 0.9999996855124769, iteration: 135052
loss: 1.08864164352417,grad_norm: 0.9999998073497159, iteration: 135053
loss: 1.0200103521347046,grad_norm: 0.9143520415034763, iteration: 135054
loss: 0.992206335067749,grad_norm: 0.9610424289072729, iteration: 135055
loss: 0.9782497882843018,grad_norm: 0.9999990974598398, iteration: 135056
loss: 0.9528263211250305,grad_norm: 0.9102078721338842, iteration: 135057
loss: 1.16975736618042,grad_norm: 0.999999983507794, iteration: 135058
loss: 1.0075454711914062,grad_norm: 0.9999996697071518, iteration: 135059
loss: 0.9928240180015564,grad_norm: 0.9999991381827309, iteration: 135060
loss: 1.0369207859039307,grad_norm: 0.9999992991322713, iteration: 135061
loss: 1.0115560293197632,grad_norm: 0.9999992087843775, iteration: 135062
loss: 1.0349317789077759,grad_norm: 0.9605991988517629, iteration: 135063
loss: 1.0014280080795288,grad_norm: 0.7784396463292775, iteration: 135064
loss: 1.0097264051437378,grad_norm: 0.9999995415582491, iteration: 135065
loss: 0.9494042992591858,grad_norm: 0.8407418800808591, iteration: 135066
loss: 1.004035472869873,grad_norm: 0.8764820827640704, iteration: 135067
loss: 0.9986644387245178,grad_norm: 0.9750299372033971, iteration: 135068
loss: 1.0495250225067139,grad_norm: 0.9999991766317454, iteration: 135069
loss: 0.9672814011573792,grad_norm: 0.8965988042894398, iteration: 135070
loss: 1.0134140253067017,grad_norm: 0.8430346578012939, iteration: 135071
loss: 1.0406612157821655,grad_norm: 0.99999973536506, iteration: 135072
loss: 0.9874454736709595,grad_norm: 0.7839782459154089, iteration: 135073
loss: 1.0232070684432983,grad_norm: 0.9197457656863468, iteration: 135074
loss: 0.9707337617874146,grad_norm: 0.8950174165792799, iteration: 135075
loss: 1.011399507522583,grad_norm: 0.9999992231346981, iteration: 135076
loss: 0.9802913665771484,grad_norm: 0.9999994181798654, iteration: 135077
loss: 0.9702432751655579,grad_norm: 0.8687474400853593, iteration: 135078
loss: 1.053780198097229,grad_norm: 0.9999995321168493, iteration: 135079
loss: 1.0133748054504395,grad_norm: 0.9999991039404885, iteration: 135080
loss: 1.0250500440597534,grad_norm: 0.99999969503401, iteration: 135081
loss: 1.1614896059036255,grad_norm: 0.9701302952179114, iteration: 135082
loss: 1.1445204019546509,grad_norm: 0.9999996466540818, iteration: 135083
loss: 1.0945829153060913,grad_norm: 0.999999827720757, iteration: 135084
loss: 1.0605344772338867,grad_norm: 0.9999998578713426, iteration: 135085
loss: 1.0185753107070923,grad_norm: 0.9999991600966449, iteration: 135086
loss: 1.0006992816925049,grad_norm: 0.7593787789934521, iteration: 135087
loss: 0.9658036828041077,grad_norm: 0.8636724960699818, iteration: 135088
loss: 1.0100212097167969,grad_norm: 0.9113815706672073, iteration: 135089
loss: 1.0649839639663696,grad_norm: 0.9999993282947325, iteration: 135090
loss: 0.9944664835929871,grad_norm: 0.9172557732721696, iteration: 135091
loss: 0.9816070199012756,grad_norm: 0.8370934079361618, iteration: 135092
loss: 0.983430802822113,grad_norm: 0.9999996464314665, iteration: 135093
loss: 1.0247607231140137,grad_norm: 0.9999997331989378, iteration: 135094
loss: 0.9875651597976685,grad_norm: 0.8482010886884992, iteration: 135095
loss: 1.044073462486267,grad_norm: 0.8766049729214656, iteration: 135096
loss: 0.9875776767730713,grad_norm: 0.8600516785109218, iteration: 135097
loss: 1.0218485593795776,grad_norm: 0.9999999505460986, iteration: 135098
loss: 1.0611257553100586,grad_norm: 0.8930408996146211, iteration: 135099
loss: 0.9825529456138611,grad_norm: 0.9777524780721382, iteration: 135100
loss: 1.008366584777832,grad_norm: 0.9999990578865982, iteration: 135101
loss: 0.9890764355659485,grad_norm: 0.9021919039570424, iteration: 135102
loss: 1.0360236167907715,grad_norm: 0.7710045693200864, iteration: 135103
loss: 0.9971070885658264,grad_norm: 0.9569051425004297, iteration: 135104
loss: 0.9994367957115173,grad_norm: 0.8878852246331209, iteration: 135105
loss: 1.1891087293624878,grad_norm: 0.9999992494944991, iteration: 135106
loss: 1.0141719579696655,grad_norm: 0.872402199573179, iteration: 135107
loss: 0.9884757399559021,grad_norm: 0.8970090431794095, iteration: 135108
loss: 0.9715425372123718,grad_norm: 0.9621829238991634, iteration: 135109
loss: 0.9761014580726624,grad_norm: 0.7622348332364048, iteration: 135110
loss: 0.963120698928833,grad_norm: 0.9999993265955682, iteration: 135111
loss: 1.0688755512237549,grad_norm: 0.7008111635945469, iteration: 135112
loss: 0.9953442215919495,grad_norm: 0.8421148553067959, iteration: 135113
loss: 1.0084753036499023,grad_norm: 0.968918136786802, iteration: 135114
loss: 1.0100089311599731,grad_norm: 0.8785078964431359, iteration: 135115
loss: 1.003443717956543,grad_norm: 0.8139263816742572, iteration: 135116
loss: 0.9587047100067139,grad_norm: 0.9999996209781533, iteration: 135117
loss: 1.0170772075653076,grad_norm: 0.7738460275017507, iteration: 135118
loss: 0.9903972148895264,grad_norm: 0.9999991937970775, iteration: 135119
loss: 1.097937822341919,grad_norm: 0.9999996433228334, iteration: 135120
loss: 0.9923788905143738,grad_norm: 0.8797684558077975, iteration: 135121
loss: 1.0233492851257324,grad_norm: 0.9832647637619636, iteration: 135122
loss: 1.0044084787368774,grad_norm: 0.750288614892753, iteration: 135123
loss: 1.0115559101104736,grad_norm: 0.8715998262838909, iteration: 135124
loss: 0.9852612614631653,grad_norm: 0.885621618442287, iteration: 135125
loss: 1.01522696018219,grad_norm: 0.8770615551999139, iteration: 135126
loss: 0.9946959018707275,grad_norm: 0.9028274119635951, iteration: 135127
loss: 0.9722168445587158,grad_norm: 0.9202713455642031, iteration: 135128
loss: 1.034166693687439,grad_norm: 1.0000000263797693, iteration: 135129
loss: 1.0288350582122803,grad_norm: 0.9577327974168691, iteration: 135130
loss: 1.0266327857971191,grad_norm: 0.9999998274281493, iteration: 135131
loss: 0.9904153943061829,grad_norm: 0.9141266299983678, iteration: 135132
loss: 1.0486222505569458,grad_norm: 0.9999996128260501, iteration: 135133
loss: 0.9787443280220032,grad_norm: 0.9559426284316754, iteration: 135134
loss: 1.0303939580917358,grad_norm: 0.9318702133045667, iteration: 135135
loss: 1.0405447483062744,grad_norm: 0.8532275196612611, iteration: 135136
loss: 1.0135841369628906,grad_norm: 0.9999993016871733, iteration: 135137
loss: 1.0598187446594238,grad_norm: 0.926260162440238, iteration: 135138
loss: 1.0200897455215454,grad_norm: 0.8806752025763415, iteration: 135139
loss: 1.0065504312515259,grad_norm: 0.9999990055121946, iteration: 135140
loss: 0.9930784106254578,grad_norm: 0.7822492434204938, iteration: 135141
loss: 0.9784092903137207,grad_norm: 0.7877299193332068, iteration: 135142
loss: 1.0150110721588135,grad_norm: 0.9999990424204079, iteration: 135143
loss: 0.981472909450531,grad_norm: 0.8391821487174167, iteration: 135144
loss: 1.078719973564148,grad_norm: 0.9999990924717919, iteration: 135145
loss: 0.9999974966049194,grad_norm: 0.999999057061876, iteration: 135146
loss: 0.9975034594535828,grad_norm: 0.838924006401638, iteration: 135147
loss: 1.0162127017974854,grad_norm: 0.8388693345938083, iteration: 135148
loss: 0.9918907880783081,grad_norm: 0.9999992067729018, iteration: 135149
loss: 1.0281623601913452,grad_norm: 0.8738320094648986, iteration: 135150
loss: 1.015792727470398,grad_norm: 0.8334929806415656, iteration: 135151
loss: 1.0156594514846802,grad_norm: 0.8221328922086081, iteration: 135152
loss: 0.9863646030426025,grad_norm: 0.7549615286912731, iteration: 135153
loss: 0.992502748966217,grad_norm: 0.8927768910222785, iteration: 135154
loss: 1.0206539630889893,grad_norm: 0.9999992804202124, iteration: 135155
loss: 1.000327706336975,grad_norm: 0.7807773466483137, iteration: 135156
loss: 1.0135838985443115,grad_norm: 0.9999998472004468, iteration: 135157
loss: 0.9931433796882629,grad_norm: 0.8753747396211268, iteration: 135158
loss: 1.065976619720459,grad_norm: 0.8916379456808262, iteration: 135159
loss: 0.9907877445220947,grad_norm: 0.8991381818053252, iteration: 135160
loss: 1.012992024421692,grad_norm: 0.9731959688159595, iteration: 135161
loss: 1.0133166313171387,grad_norm: 0.8230839237452016, iteration: 135162
loss: 1.027700424194336,grad_norm: 0.9999992560097449, iteration: 135163
loss: 1.0309220552444458,grad_norm: 0.8946844993454305, iteration: 135164
loss: 1.0048812627792358,grad_norm: 0.9999995299185671, iteration: 135165
loss: 1.0572561025619507,grad_norm: 0.880238157376485, iteration: 135166
loss: 0.9883783459663391,grad_norm: 0.8480344895899934, iteration: 135167
loss: 1.0214250087738037,grad_norm: 0.8609110275759041, iteration: 135168
loss: 1.0121691226959229,grad_norm: 0.9499097559177124, iteration: 135169
loss: 1.0353717803955078,grad_norm: 0.7484591581783211, iteration: 135170
loss: 0.9982839822769165,grad_norm: 0.9310248054664187, iteration: 135171
loss: 1.0683830976486206,grad_norm: 0.99999973968164, iteration: 135172
loss: 1.0258893966674805,grad_norm: 0.8627699536640108, iteration: 135173
loss: 1.0402723550796509,grad_norm: 0.9619289720655985, iteration: 135174
loss: 1.0063109397888184,grad_norm: 0.9999994558295326, iteration: 135175
loss: 1.0040380954742432,grad_norm: 0.999999144241358, iteration: 135176
loss: 1.0094685554504395,grad_norm: 0.8386527096145868, iteration: 135177
loss: 1.0340811014175415,grad_norm: 0.9999990955048843, iteration: 135178
loss: 1.0112215280532837,grad_norm: 0.9039456776482805, iteration: 135179
loss: 0.9815578460693359,grad_norm: 0.8310365239012018, iteration: 135180
loss: 0.991421639919281,grad_norm: 0.9509542664961973, iteration: 135181
loss: 1.017574429512024,grad_norm: 0.7714437556977584, iteration: 135182
loss: 1.0105293989181519,grad_norm: 0.8731009332777538, iteration: 135183
loss: 1.0015703439712524,grad_norm: 0.9111012300083774, iteration: 135184
loss: 1.1158138513565063,grad_norm: 0.9999997029309804, iteration: 135185
loss: 0.9804378747940063,grad_norm: 0.8799243460914377, iteration: 135186
loss: 1.0484141111373901,grad_norm: 0.9147530573877414, iteration: 135187
loss: 0.9722350239753723,grad_norm: 0.9291082682210207, iteration: 135188
loss: 1.0223450660705566,grad_norm: 0.8386021396036757, iteration: 135189
loss: 1.020897388458252,grad_norm: 0.8090511873139418, iteration: 135190
loss: 0.9751194715499878,grad_norm: 0.7351797600098013, iteration: 135191
loss: 1.056053638458252,grad_norm: 0.9999990896287404, iteration: 135192
loss: 1.0079083442687988,grad_norm: 0.9999992126450673, iteration: 135193
loss: 1.0611385107040405,grad_norm: 0.999999688622809, iteration: 135194
loss: 1.0005238056182861,grad_norm: 0.7709054731464747, iteration: 135195
loss: 1.0477875471115112,grad_norm: 0.9999992809625665, iteration: 135196
loss: 0.9993229508399963,grad_norm: 0.788640552774365, iteration: 135197
loss: 0.9743762612342834,grad_norm: 0.9218983906585512, iteration: 135198
loss: 1.0162400007247925,grad_norm: 0.9999990599317077, iteration: 135199
loss: 0.9973400831222534,grad_norm: 0.9096558543798914, iteration: 135200
loss: 1.1175156831741333,grad_norm: 0.9999995196293934, iteration: 135201
loss: 1.018863558769226,grad_norm: 0.7555323186347103, iteration: 135202
loss: 1.0451072454452515,grad_norm: 0.99999965062814, iteration: 135203
loss: 1.0550920963287354,grad_norm: 0.9999995927123921, iteration: 135204
loss: 0.9845073819160461,grad_norm: 0.7637282070953758, iteration: 135205
loss: 0.992494523525238,grad_norm: 0.7733653525389338, iteration: 135206
loss: 1.000796914100647,grad_norm: 0.9999992286375567, iteration: 135207
loss: 1.0198441743850708,grad_norm: 0.9999992558399452, iteration: 135208
loss: 1.0045355558395386,grad_norm: 0.991115067820807, iteration: 135209
loss: 1.0009912252426147,grad_norm: 0.8916239655972754, iteration: 135210
loss: 1.026745080947876,grad_norm: 0.885438090337606, iteration: 135211
loss: 0.9669541716575623,grad_norm: 0.9875717662453484, iteration: 135212
loss: 1.0151286125183105,grad_norm: 0.8533353300015589, iteration: 135213
loss: 1.0887446403503418,grad_norm: 0.8802799159218246, iteration: 135214
loss: 0.9890540838241577,grad_norm: 0.9999994858892363, iteration: 135215
loss: 1.0054408311843872,grad_norm: 0.9999998116445247, iteration: 135216
loss: 0.99876469373703,grad_norm: 0.7165302777303864, iteration: 135217
loss: 0.9892393946647644,grad_norm: 0.9125249143185816, iteration: 135218
loss: 1.0016772747039795,grad_norm: 0.9999991897549044, iteration: 135219
loss: 1.0095120668411255,grad_norm: 0.9627914984589424, iteration: 135220
loss: 1.019906759262085,grad_norm: 0.9999991530445034, iteration: 135221
loss: 1.0014317035675049,grad_norm: 0.9999989737406292, iteration: 135222
loss: 1.10768723487854,grad_norm: 0.9999999100015753, iteration: 135223
loss: 1.0437434911727905,grad_norm: 0.8026570062459433, iteration: 135224
loss: 1.0457226037979126,grad_norm: 0.9999993695354128, iteration: 135225
loss: 1.0009942054748535,grad_norm: 0.9999994742288943, iteration: 135226
loss: 0.9954282641410828,grad_norm: 0.7503322193474179, iteration: 135227
loss: 0.9737030863761902,grad_norm: 0.8853690528633898, iteration: 135228
loss: 0.99530428647995,grad_norm: 0.8849641244596906, iteration: 135229
loss: 1.0270190238952637,grad_norm: 0.9999998872885659, iteration: 135230
loss: 1.017775058746338,grad_norm: 0.9999996444599661, iteration: 135231
loss: 1.0057123899459839,grad_norm: 0.791156144165237, iteration: 135232
loss: 1.0004969835281372,grad_norm: 0.9947082293416333, iteration: 135233
loss: 1.0005830526351929,grad_norm: 0.8726195449871806, iteration: 135234
loss: 1.0070034265518188,grad_norm: 0.9605484566072239, iteration: 135235
loss: 0.9754777550697327,grad_norm: 0.9063502216691373, iteration: 135236
loss: 0.9850305318832397,grad_norm: 0.8732757528205735, iteration: 135237
loss: 0.9663126468658447,grad_norm: 0.9235051926465313, iteration: 135238
loss: 1.012747049331665,grad_norm: 0.9999997561484141, iteration: 135239
loss: 1.014517068862915,grad_norm: 0.9999991824897246, iteration: 135240
loss: 1.057449221611023,grad_norm: 0.9999994657884848, iteration: 135241
loss: 1.0126413106918335,grad_norm: 0.9999997828109439, iteration: 135242
loss: 1.0079331398010254,grad_norm: 0.9214331909079184, iteration: 135243
loss: 0.9792502522468567,grad_norm: 0.7229011307386761, iteration: 135244
loss: 1.0007449388504028,grad_norm: 0.8311628628776385, iteration: 135245
loss: 1.0156984329223633,grad_norm: 0.9999993377231686, iteration: 135246
loss: 0.9837623238563538,grad_norm: 0.9999996766035978, iteration: 135247
loss: 0.9625359773635864,grad_norm: 0.9718519178158986, iteration: 135248
loss: 1.0384368896484375,grad_norm: 0.9999998366483187, iteration: 135249
loss: 0.9911315441131592,grad_norm: 0.966348533100866, iteration: 135250
loss: 1.0014525651931763,grad_norm: 0.9999991434374804, iteration: 135251
loss: 0.9946905374526978,grad_norm: 0.7855743284018978, iteration: 135252
loss: 0.9551628828048706,grad_norm: 0.8646484046725788, iteration: 135253
loss: 0.9986650347709656,grad_norm: 0.9999989662684794, iteration: 135254
loss: 1.0347603559494019,grad_norm: 0.9999998567458173, iteration: 135255
loss: 0.9985557794570923,grad_norm: 0.7804196988345045, iteration: 135256
loss: 1.0299221277236938,grad_norm: 0.9091159855347263, iteration: 135257
loss: 0.9822003245353699,grad_norm: 0.9630297041214075, iteration: 135258
loss: 0.9888087511062622,grad_norm: 0.8248434226230941, iteration: 135259
loss: 0.9694643020629883,grad_norm: 0.9999991380616277, iteration: 135260
loss: 0.9975100159645081,grad_norm: 0.8912249782681877, iteration: 135261
loss: 0.9825596213340759,grad_norm: 0.9975331109607334, iteration: 135262
loss: 0.9919437170028687,grad_norm: 0.9158341896142203, iteration: 135263
loss: 1.0633465051651,grad_norm: 0.9999993199122389, iteration: 135264
loss: 0.9969548583030701,grad_norm: 0.7861824484989737, iteration: 135265
loss: 1.0010156631469727,grad_norm: 0.9373303198140959, iteration: 135266
loss: 1.0030944347381592,grad_norm: 0.758970719883745, iteration: 135267
loss: 1.0401620864868164,grad_norm: 0.8042900538443601, iteration: 135268
loss: 1.1700146198272705,grad_norm: 0.9999997117746361, iteration: 135269
loss: 1.019623875617981,grad_norm: 0.9999989956073327, iteration: 135270
loss: 1.0323758125305176,grad_norm: 0.938494473052481, iteration: 135271
loss: 1.027626395225525,grad_norm: 0.9999992154152142, iteration: 135272
loss: 0.9826849699020386,grad_norm: 0.9999997658059623, iteration: 135273
loss: 1.0205494165420532,grad_norm: 0.9999991375422349, iteration: 135274
loss: 1.0567131042480469,grad_norm: 0.9177126894147757, iteration: 135275
loss: 1.042111873626709,grad_norm: 0.9999997536879985, iteration: 135276
loss: 0.9915922284126282,grad_norm: 0.8668970474178779, iteration: 135277
loss: 1.0068856477737427,grad_norm: 0.921978644889704, iteration: 135278
loss: 0.9694227576255798,grad_norm: 0.835936517336302, iteration: 135279
loss: 1.002102017402649,grad_norm: 0.9999992797086421, iteration: 135280
loss: 1.023844599723816,grad_norm: 0.9056064188433631, iteration: 135281
loss: 0.971494197845459,grad_norm: 0.8457097721957698, iteration: 135282
loss: 0.9953362345695496,grad_norm: 0.887088898547197, iteration: 135283
loss: 1.024437427520752,grad_norm: 0.9999990118017549, iteration: 135284
loss: 1.0193368196487427,grad_norm: 0.9243865608878093, iteration: 135285
loss: 0.9993504881858826,grad_norm: 0.8247527650503904, iteration: 135286
loss: 0.9897897243499756,grad_norm: 0.7768510556580142, iteration: 135287
loss: 1.0041232109069824,grad_norm: 0.999999773270979, iteration: 135288
loss: 1.022052526473999,grad_norm: 0.8966438447234572, iteration: 135289
loss: 0.9669773578643799,grad_norm: 0.7398819933041936, iteration: 135290
loss: 0.9798910021781921,grad_norm: 0.8886183864138093, iteration: 135291
loss: 1.0053809881210327,grad_norm: 0.9999997755976529, iteration: 135292
loss: 1.0532565116882324,grad_norm: 0.9999994807211049, iteration: 135293
loss: 1.0184675455093384,grad_norm: 0.9999989933476048, iteration: 135294
loss: 0.9976863265037537,grad_norm: 0.8178426371695172, iteration: 135295
loss: 1.0048142671585083,grad_norm: 0.8391580345686572, iteration: 135296
loss: 0.994686484336853,grad_norm: 0.7639658680968071, iteration: 135297
loss: 0.9974503517150879,grad_norm: 0.9550919432528667, iteration: 135298
loss: 0.95639967918396,grad_norm: 0.9999991250428519, iteration: 135299
loss: 0.9790982604026794,grad_norm: 0.7551844951752681, iteration: 135300
loss: 1.00373375415802,grad_norm: 0.7322272069791331, iteration: 135301
loss: 1.0016448497772217,grad_norm: 0.884623913823069, iteration: 135302
loss: 0.9794939160346985,grad_norm: 0.8079987650288682, iteration: 135303
loss: 0.99897301197052,grad_norm: 0.831415022340654, iteration: 135304
loss: 1.0036643743515015,grad_norm: 0.9311529773279924, iteration: 135305
loss: 1.0003938674926758,grad_norm: 0.7740665885013475, iteration: 135306
loss: 1.0121411085128784,grad_norm: 0.9154177066630174, iteration: 135307
loss: 0.9741833806037903,grad_norm: 0.9999991231553017, iteration: 135308
loss: 1.0285049676895142,grad_norm: 0.8358434355065749, iteration: 135309
loss: 1.0246648788452148,grad_norm: 0.9999991583079375, iteration: 135310
loss: 0.9913936257362366,grad_norm: 0.948080846304473, iteration: 135311
loss: 1.0243782997131348,grad_norm: 0.8841860231608326, iteration: 135312
loss: 1.0883845090866089,grad_norm: 0.9999993309719044, iteration: 135313
loss: 0.9659854769706726,grad_norm: 0.9055070588115713, iteration: 135314
loss: 1.0067929029464722,grad_norm: 0.9999991521474735, iteration: 135315
loss: 0.9922183156013489,grad_norm: 0.8137864032577147, iteration: 135316
loss: 1.0054014921188354,grad_norm: 0.8485882071579608, iteration: 135317
loss: 0.9835295081138611,grad_norm: 0.9999991942958392, iteration: 135318
loss: 1.0070436000823975,grad_norm: 0.8842240266137148, iteration: 135319
loss: 1.0099437236785889,grad_norm: 0.7447232536774915, iteration: 135320
loss: 1.0099705457687378,grad_norm: 0.8483430482832606, iteration: 135321
loss: 0.9852796792984009,grad_norm: 0.8461141826534962, iteration: 135322
loss: 1.0019413232803345,grad_norm: 0.999999208750691, iteration: 135323
loss: 0.9959760904312134,grad_norm: 0.9999992552401196, iteration: 135324
loss: 0.9716334342956543,grad_norm: 0.7581422690928593, iteration: 135325
loss: 1.0065809488296509,grad_norm: 0.9999989917900757, iteration: 135326
loss: 1.0133962631225586,grad_norm: 0.8332266117631333, iteration: 135327
loss: 0.9888766407966614,grad_norm: 0.999999005724126, iteration: 135328
loss: 0.985859751701355,grad_norm: 0.8813320110733603, iteration: 135329
loss: 0.9671485424041748,grad_norm: 0.928706911227054, iteration: 135330
loss: 1.0103998184204102,grad_norm: 0.8931938442252799, iteration: 135331
loss: 1.0333209037780762,grad_norm: 0.9605218117472942, iteration: 135332
loss: 1.0105977058410645,grad_norm: 0.8531314109953168, iteration: 135333
loss: 1.0455636978149414,grad_norm: 0.9999996641691096, iteration: 135334
loss: 1.0521212816238403,grad_norm: 0.8937170409137937, iteration: 135335
loss: 0.9867643117904663,grad_norm: 0.8877579756011733, iteration: 135336
loss: 0.9762482643127441,grad_norm: 0.9893572760891086, iteration: 135337
loss: 1.0078526735305786,grad_norm: 0.9999990029954672, iteration: 135338
loss: 1.037588357925415,grad_norm: 0.8201793618924739, iteration: 135339
loss: 1.0255693197250366,grad_norm: 0.8722066620300619, iteration: 135340
loss: 1.038756012916565,grad_norm: 0.9999994036609194, iteration: 135341
loss: 1.0134638547897339,grad_norm: 0.7210824145756279, iteration: 135342
loss: 0.9920167922973633,grad_norm: 0.9504712089270142, iteration: 135343
loss: 0.9591526389122009,grad_norm: 0.9331657919182554, iteration: 135344
loss: 0.996001660823822,grad_norm: 0.8919118581781186, iteration: 135345
loss: 0.9810340404510498,grad_norm: 0.9999991540059059, iteration: 135346
loss: 1.0380934476852417,grad_norm: 0.7708918900718263, iteration: 135347
loss: 1.0618802309036255,grad_norm: 0.9999998305381665, iteration: 135348
loss: 0.9922351241111755,grad_norm: 0.7749220520997063, iteration: 135349
loss: 0.9877930283546448,grad_norm: 0.9999990493201902, iteration: 135350
loss: 1.036631464958191,grad_norm: 0.7468755475274428, iteration: 135351
loss: 1.024667501449585,grad_norm: 0.9938126136671197, iteration: 135352
loss: 0.9961079955101013,grad_norm: 0.9669096821463069, iteration: 135353
loss: 0.9704349637031555,grad_norm: 0.9435456944573772, iteration: 135354
loss: 0.977677583694458,grad_norm: 0.8514104328702009, iteration: 135355
loss: 0.9907292127609253,grad_norm: 0.9216762679119375, iteration: 135356
loss: 1.005326747894287,grad_norm: 0.9999990519848526, iteration: 135357
loss: 0.9926292896270752,grad_norm: 0.7522363853848063, iteration: 135358
loss: 0.9598067402839661,grad_norm: 0.8748671054583429, iteration: 135359
loss: 1.0131465196609497,grad_norm: 0.9851618138415481, iteration: 135360
loss: 0.9749449491500854,grad_norm: 0.8494596832571071, iteration: 135361
loss: 0.9723088145256042,grad_norm: 0.9431707497989282, iteration: 135362
loss: 0.9821560382843018,grad_norm: 0.8853786061721486, iteration: 135363
loss: 0.9830728769302368,grad_norm: 0.999999027759202, iteration: 135364
loss: 1.0373789072036743,grad_norm: 0.9775753400408086, iteration: 135365
loss: 1.0197097063064575,grad_norm: 0.9999990675403679, iteration: 135366
loss: 1.01702082157135,grad_norm: 0.999999649435165, iteration: 135367
loss: 1.11322021484375,grad_norm: 0.9999999252544686, iteration: 135368
loss: 1.0135825872421265,grad_norm: 0.9373462663744598, iteration: 135369
loss: 1.010353922843933,grad_norm: 0.8611797445689107, iteration: 135370
loss: 0.9751431345939636,grad_norm: 0.8630705851368582, iteration: 135371
loss: 1.008860468864441,grad_norm: 0.9163494981761183, iteration: 135372
loss: 0.9849244356155396,grad_norm: 0.8737953347843771, iteration: 135373
loss: 0.9992357492446899,grad_norm: 0.999999107702958, iteration: 135374
loss: 1.0143671035766602,grad_norm: 0.9999998114733488, iteration: 135375
loss: 0.9775528311729431,grad_norm: 0.9077614947448245, iteration: 135376
loss: 0.975132167339325,grad_norm: 0.9999990695322994, iteration: 135377
loss: 1.0202826261520386,grad_norm: 0.8894398882332758, iteration: 135378
loss: 1.0296919345855713,grad_norm: 0.8524584455170964, iteration: 135379
loss: 0.9638699889183044,grad_norm: 0.8044084038390761, iteration: 135380
loss: 1.014829158782959,grad_norm: 0.899325723786926, iteration: 135381
loss: 1.0095751285552979,grad_norm: 0.7894174756347722, iteration: 135382
loss: 1.0134063959121704,grad_norm: 0.999999648906191, iteration: 135383
loss: 0.9711918234825134,grad_norm: 0.8898092841606442, iteration: 135384
loss: 0.9712278246879578,grad_norm: 0.8536061291124158, iteration: 135385
loss: 1.0237101316452026,grad_norm: 0.8225484721424192, iteration: 135386
loss: 0.9997538924217224,grad_norm: 0.888845111226327, iteration: 135387
loss: 1.0082309246063232,grad_norm: 0.9999991803624281, iteration: 135388
loss: 1.0144597291946411,grad_norm: 0.9640525871404042, iteration: 135389
loss: 1.0093183517456055,grad_norm: 0.9503715070728845, iteration: 135390
loss: 1.017053246498108,grad_norm: 0.9999993742549153, iteration: 135391
loss: 1.0500876903533936,grad_norm: 0.9718515820060246, iteration: 135392
loss: 0.9932987689971924,grad_norm: 0.9999999392151285, iteration: 135393
loss: 1.0089737176895142,grad_norm: 0.8814477921183691, iteration: 135394
loss: 1.0808638334274292,grad_norm: 0.9999992931566053, iteration: 135395
loss: 0.9817861318588257,grad_norm: 0.8590283736204932, iteration: 135396
loss: 0.9906165599822998,grad_norm: 0.911288674244055, iteration: 135397
loss: 0.998077392578125,grad_norm: 0.8788051654669372, iteration: 135398
loss: 1.0032762289047241,grad_norm: 0.8908751729554587, iteration: 135399
loss: 0.9983146786689758,grad_norm: 0.9999997174235624, iteration: 135400
loss: 1.0745365619659424,grad_norm: 0.9999999316507272, iteration: 135401
loss: 1.0306750535964966,grad_norm: 0.8992626023978846, iteration: 135402
loss: 0.9910363554954529,grad_norm: 0.9999992422466736, iteration: 135403
loss: 0.9708149433135986,grad_norm: 0.947640186545646, iteration: 135404
loss: 0.9744923710823059,grad_norm: 0.9713161889416367, iteration: 135405
loss: 1.0384972095489502,grad_norm: 0.9999990078933291, iteration: 135406
loss: 0.988023042678833,grad_norm: 0.9100105225560621, iteration: 135407
loss: 1.0409961938858032,grad_norm: 0.999999525646117, iteration: 135408
loss: 1.0447596311569214,grad_norm: 0.9211168297998236, iteration: 135409
loss: 0.9757694005966187,grad_norm: 0.9999991114399875, iteration: 135410
loss: 0.9660733938217163,grad_norm: 0.678332166126493, iteration: 135411
loss: 1.02461576461792,grad_norm: 0.8841739638848145, iteration: 135412
loss: 0.9981963038444519,grad_norm: 0.9074714414009603, iteration: 135413
loss: 0.9938246011734009,grad_norm: 0.8436315480499789, iteration: 135414
loss: 0.9912248253822327,grad_norm: 0.999999145152401, iteration: 135415
loss: 1.0049668550491333,grad_norm: 0.6801326599155287, iteration: 135416
loss: 1.0335712432861328,grad_norm: 0.8121686631052983, iteration: 135417
loss: 0.9837872385978699,grad_norm: 0.9999992836796158, iteration: 135418
loss: 1.0029058456420898,grad_norm: 0.9999991883116769, iteration: 135419
loss: 0.9791383743286133,grad_norm: 0.8175591182026567, iteration: 135420
loss: 1.0204957723617554,grad_norm: 0.8192288998946352, iteration: 135421
loss: 1.0212128162384033,grad_norm: 0.9999991633181855, iteration: 135422
loss: 1.0037851333618164,grad_norm: 0.9224451048119088, iteration: 135423
loss: 0.9852025508880615,grad_norm: 0.8777539835582615, iteration: 135424
loss: 1.047609806060791,grad_norm: 0.9999995120847534, iteration: 135425
loss: 1.0239871740341187,grad_norm: 0.9999997204255979, iteration: 135426
loss: 0.9940703511238098,grad_norm: 0.8694428231671602, iteration: 135427
loss: 1.0251330137252808,grad_norm: 0.9999993137254484, iteration: 135428
loss: 0.9869539737701416,grad_norm: 0.8556777676502307, iteration: 135429
loss: 1.0127785205841064,grad_norm: 0.9999989561712863, iteration: 135430
loss: 0.9874545931816101,grad_norm: 0.713929552177704, iteration: 135431
loss: 1.0074033737182617,grad_norm: 0.9136387580174177, iteration: 135432
loss: 1.0061694383621216,grad_norm: 0.8963732214219153, iteration: 135433
loss: 0.9797906279563904,grad_norm: 0.8701087982067934, iteration: 135434
loss: 1.025495171546936,grad_norm: 0.9435194108349569, iteration: 135435
loss: 1.0277959108352661,grad_norm: 0.8483627808145793, iteration: 135436
loss: 1.028469443321228,grad_norm: 0.7103136735652287, iteration: 135437
loss: 1.0163612365722656,grad_norm: 0.7702117455877394, iteration: 135438
loss: 0.9592283368110657,grad_norm: 0.8816647563353168, iteration: 135439
loss: 1.0305466651916504,grad_norm: 0.9999991538349291, iteration: 135440
loss: 0.9896267056465149,grad_norm: 0.9459118177483493, iteration: 135441
loss: 0.9819350838661194,grad_norm: 0.9666109437608963, iteration: 135442
loss: 0.9946970343589783,grad_norm: 0.9916107309693442, iteration: 135443
loss: 1.008315086364746,grad_norm: 0.8216526008073707, iteration: 135444
loss: 0.9755479693412781,grad_norm: 0.7331764068839305, iteration: 135445
loss: 1.054049015045166,grad_norm: 0.8402547549075754, iteration: 135446
loss: 0.9323200583457947,grad_norm: 0.9999989707383734, iteration: 135447
loss: 1.0699169635772705,grad_norm: 0.9999991242587862, iteration: 135448
loss: 0.997588574886322,grad_norm: 0.86651441179136, iteration: 135449
loss: 0.9649457335472107,grad_norm: 0.9476287221114542, iteration: 135450
loss: 0.9730163812637329,grad_norm: 0.833416553072419, iteration: 135451
loss: 1.0073161125183105,grad_norm: 0.8829517885449941, iteration: 135452
loss: 1.018729567527771,grad_norm: 0.7455062004349639, iteration: 135453
loss: 1.0912127494812012,grad_norm: 0.7557422581586172, iteration: 135454
loss: 0.9924965500831604,grad_norm: 0.8285559526266346, iteration: 135455
loss: 1.0164618492126465,grad_norm: 0.9999993363871171, iteration: 135456
loss: 0.9855627417564392,grad_norm: 0.9382281141021855, iteration: 135457
loss: 1.0096458196640015,grad_norm: 0.9999997325230908, iteration: 135458
loss: 1.0480413436889648,grad_norm: 0.7470718654033949, iteration: 135459
loss: 1.1138689517974854,grad_norm: 0.9999992760196944, iteration: 135460
loss: 1.0225027799606323,grad_norm: 0.8776908173026348, iteration: 135461
loss: 1.020682692527771,grad_norm: 0.8307714763723977, iteration: 135462
loss: 0.9951826930046082,grad_norm: 0.9999990264531801, iteration: 135463
loss: 1.0143307447433472,grad_norm: 0.9999991256538523, iteration: 135464
loss: 1.0098296403884888,grad_norm: 0.999999146643419, iteration: 135465
loss: 1.0725924968719482,grad_norm: 0.9999991836891134, iteration: 135466
loss: 0.9881110191345215,grad_norm: 0.9999990959263952, iteration: 135467
loss: 0.9843804240226746,grad_norm: 0.9249315755746996, iteration: 135468
loss: 1.0395950078964233,grad_norm: 0.8688774104681056, iteration: 135469
loss: 1.0163849592208862,grad_norm: 0.9999574361246033, iteration: 135470
loss: 1.046781301498413,grad_norm: 0.999999282783833, iteration: 135471
loss: 1.0387262105941772,grad_norm: 0.9642144443382336, iteration: 135472
loss: 1.0051918029785156,grad_norm: 0.9999992553622642, iteration: 135473
loss: 1.025033950805664,grad_norm: 0.9999992089376322, iteration: 135474
loss: 1.074271321296692,grad_norm: 0.8574535332893491, iteration: 135475
loss: 0.9895981550216675,grad_norm: 0.9999994868493932, iteration: 135476
loss: 1.0367381572723389,grad_norm: 0.7840969523091152, iteration: 135477
loss: 1.0028209686279297,grad_norm: 0.8930694639142891, iteration: 135478
loss: 1.008742332458496,grad_norm: 0.8297173941975652, iteration: 135479
loss: 0.9830149412155151,grad_norm: 0.8204040734084073, iteration: 135480
loss: 1.0332287549972534,grad_norm: 0.9673680523073794, iteration: 135481
loss: 1.009462833404541,grad_norm: 0.7093515595287666, iteration: 135482
loss: 1.0198348760604858,grad_norm: 0.9999991702539388, iteration: 135483
loss: 0.9422826766967773,grad_norm: 0.808910163213856, iteration: 135484
loss: 0.9949391484260559,grad_norm: 0.7522713327281225, iteration: 135485
loss: 1.0015850067138672,grad_norm: 0.999999046305605, iteration: 135486
loss: 0.9929766654968262,grad_norm: 0.894708196696811, iteration: 135487
loss: 1.00668466091156,grad_norm: 0.9999997109509605, iteration: 135488
loss: 1.0029311180114746,grad_norm: 0.9490625404596202, iteration: 135489
loss: 1.0802377462387085,grad_norm: 0.9999991258642857, iteration: 135490
loss: 1.0274136066436768,grad_norm: 0.7898158575448064, iteration: 135491
loss: 0.9943771362304688,grad_norm: 0.9999992415139213, iteration: 135492
loss: 1.02825129032135,grad_norm: 0.7506139361353402, iteration: 135493
loss: 1.0129600763320923,grad_norm: 0.8782973971472562, iteration: 135494
loss: 1.0054452419281006,grad_norm: 0.8675422670625559, iteration: 135495
loss: 1.0222840309143066,grad_norm: 0.9412080836149067, iteration: 135496
loss: 1.0186644792556763,grad_norm: 0.7741870502049901, iteration: 135497
loss: 1.0046707391738892,grad_norm: 0.8865697856374675, iteration: 135498
loss: 0.9962309002876282,grad_norm: 0.9651275359074192, iteration: 135499
loss: 1.0094012022018433,grad_norm: 0.9999998058732359, iteration: 135500
loss: 1.0397683382034302,grad_norm: 0.9999990131785819, iteration: 135501
loss: 1.0040451288223267,grad_norm: 0.7499840350925415, iteration: 135502
loss: 0.9509700536727905,grad_norm: 0.8942082662074158, iteration: 135503
loss: 1.032982587814331,grad_norm: 0.9999989724423787, iteration: 135504
loss: 1.0007431507110596,grad_norm: 0.9281751653911089, iteration: 135505
loss: 1.0034071207046509,grad_norm: 0.9963839464496801, iteration: 135506
loss: 0.9872937798500061,grad_norm: 0.9568320124486799, iteration: 135507
loss: 1.0475068092346191,grad_norm: 0.9999998660078026, iteration: 135508
loss: 1.0237033367156982,grad_norm: 0.9999999858283828, iteration: 135509
loss: 1.036075234413147,grad_norm: 0.9999992238506182, iteration: 135510
loss: 0.9741180539131165,grad_norm: 0.7756653789318644, iteration: 135511
loss: 0.9907981753349304,grad_norm: 0.9487096177346622, iteration: 135512
loss: 1.0103235244750977,grad_norm: 0.7572073889317886, iteration: 135513
loss: 1.064300775527954,grad_norm: 0.9999991483713108, iteration: 135514
loss: 0.987190842628479,grad_norm: 0.9659216759824371, iteration: 135515
loss: 1.0215721130371094,grad_norm: 0.7599901126922544, iteration: 135516
loss: 0.9687903523445129,grad_norm: 0.9381467388910585, iteration: 135517
loss: 1.0181368589401245,grad_norm: 0.8441138902013853, iteration: 135518
loss: 1.024290680885315,grad_norm: 0.8091601535355868, iteration: 135519
loss: 1.014479160308838,grad_norm: 0.9999994554222872, iteration: 135520
loss: 1.0072928667068481,grad_norm: 0.7916115893761104, iteration: 135521
loss: 1.0148831605911255,grad_norm: 0.9134614925282494, iteration: 135522
loss: 1.033636212348938,grad_norm: 0.9999990482177563, iteration: 135523
loss: 0.9942752122879028,grad_norm: 0.8573395584902497, iteration: 135524
loss: 0.958766758441925,grad_norm: 0.85527136167421, iteration: 135525
loss: 0.9969504475593567,grad_norm: 0.9999993245163411, iteration: 135526
loss: 1.174094796180725,grad_norm: 0.9999996877863022, iteration: 135527
loss: 1.0296142101287842,grad_norm: 0.8805354763041218, iteration: 135528
loss: 0.9892448782920837,grad_norm: 0.9541973075747746, iteration: 135529
loss: 1.0430561304092407,grad_norm: 0.8806520601221822, iteration: 135530
loss: 1.012582540512085,grad_norm: 0.8586469461302765, iteration: 135531
loss: 1.1093578338623047,grad_norm: 0.9999995248930669, iteration: 135532
loss: 0.9544235467910767,grad_norm: 0.8318160402775343, iteration: 135533
loss: 1.0474718809127808,grad_norm: 0.9999990748736907, iteration: 135534
loss: 0.9792038202285767,grad_norm: 0.9999990137616145, iteration: 135535
loss: 1.0049827098846436,grad_norm: 0.9196827008986102, iteration: 135536
loss: 1.2216397523880005,grad_norm: 0.9999993299007623, iteration: 135537
loss: 1.0125269889831543,grad_norm: 0.8843164328699353, iteration: 135538
loss: 0.9976073503494263,grad_norm: 0.8875653825414329, iteration: 135539
loss: 1.0380206108093262,grad_norm: 0.9999993991553121, iteration: 135540
loss: 1.029116153717041,grad_norm: 0.8181692757167051, iteration: 135541
loss: 1.0834221839904785,grad_norm: 0.8143962321239919, iteration: 135542
loss: 0.9806112051010132,grad_norm: 0.9999992718270247, iteration: 135543
loss: 1.088542103767395,grad_norm: 0.9999993485715475, iteration: 135544
loss: 1.003562092781067,grad_norm: 0.8167697316106185, iteration: 135545
loss: 0.9770886301994324,grad_norm: 0.9494445910000259, iteration: 135546
loss: 0.9764071702957153,grad_norm: 0.9999996428278839, iteration: 135547
loss: 1.0117052793502808,grad_norm: 0.9611475386279581, iteration: 135548
loss: 1.0435519218444824,grad_norm: 0.999999262529832, iteration: 135549
loss: 0.9956095218658447,grad_norm: 0.9999991752115909, iteration: 135550
loss: 1.0926371812820435,grad_norm: 0.9999999713777983, iteration: 135551
loss: 1.1255192756652832,grad_norm: 0.9999999795120965, iteration: 135552
loss: 1.0636451244354248,grad_norm: 0.9999991320720527, iteration: 135553
loss: 1.0837063789367676,grad_norm: 0.9257750053688231, iteration: 135554
loss: 1.030929684638977,grad_norm: 0.95355368354171, iteration: 135555
loss: 0.9904736876487732,grad_norm: 0.7721808044332794, iteration: 135556
loss: 1.0887689590454102,grad_norm: 0.9999993423073683, iteration: 135557
loss: 0.991173505783081,grad_norm: 0.9999995980988574, iteration: 135558
loss: 1.0026779174804688,grad_norm: 0.9184463436966631, iteration: 135559
loss: 1.012256145477295,grad_norm: 0.8431936688602184, iteration: 135560
loss: 1.0189220905303955,grad_norm: 0.9753041884164666, iteration: 135561
loss: 1.007674217224121,grad_norm: 0.8747830185536896, iteration: 135562
loss: 1.0063302516937256,grad_norm: 0.7750061071040653, iteration: 135563
loss: 1.029895544052124,grad_norm: 0.9999999199443554, iteration: 135564
loss: 1.0961644649505615,grad_norm: 0.9790057836846331, iteration: 135565
loss: 0.9665257334709167,grad_norm: 0.8650083476372271, iteration: 135566
loss: 1.0076936483383179,grad_norm: 0.8117186114835806, iteration: 135567
loss: 1.0079439878463745,grad_norm: 0.8499983421595275, iteration: 135568
loss: 0.9813101887702942,grad_norm: 0.7568363068976258, iteration: 135569
loss: 1.0044381618499756,grad_norm: 0.9155274258158147, iteration: 135570
loss: 1.0010087490081787,grad_norm: 0.8925156595216026, iteration: 135571
loss: 1.0676392316818237,grad_norm: 0.9551995153718696, iteration: 135572
loss: 1.0051857233047485,grad_norm: 0.8788873181710034, iteration: 135573
loss: 0.9975924491882324,grad_norm: 0.9745357272556758, iteration: 135574
loss: 1.075929045677185,grad_norm: 0.9999994177559733, iteration: 135575
loss: 1.0207700729370117,grad_norm: 0.9999989986545345, iteration: 135576
loss: 1.0178123712539673,grad_norm: 0.999999118134026, iteration: 135577
loss: 1.104748249053955,grad_norm: 0.9999992389652322, iteration: 135578
loss: 1.0586425065994263,grad_norm: 0.9999998404829212, iteration: 135579
loss: 0.9971559047698975,grad_norm: 0.8300690477200171, iteration: 135580
loss: 1.034037709236145,grad_norm: 0.9999998022024221, iteration: 135581
loss: 1.0212215185165405,grad_norm: 1.0000000118342953, iteration: 135582
loss: 1.0850189924240112,grad_norm: 0.9999993328187909, iteration: 135583
loss: 1.0116685628890991,grad_norm: 0.8606749096525622, iteration: 135584
loss: 1.0489648580551147,grad_norm: 0.9999996915839959, iteration: 135585
loss: 1.0351899862289429,grad_norm: 0.9999990259304506, iteration: 135586
loss: 0.9755976796150208,grad_norm: 0.9999999395529238, iteration: 135587
loss: 1.0031590461730957,grad_norm: 0.9092535119150619, iteration: 135588
loss: 1.0616322755813599,grad_norm: 0.99999943490268, iteration: 135589
loss: 0.9801765084266663,grad_norm: 0.8312627372119051, iteration: 135590
loss: 0.9816586971282959,grad_norm: 0.8767611172054717, iteration: 135591
loss: 1.011702299118042,grad_norm: 0.8136736800963189, iteration: 135592
loss: 1.0118416547775269,grad_norm: 0.910109005286037, iteration: 135593
loss: 1.0245195627212524,grad_norm: 0.9999991359975126, iteration: 135594
loss: 0.9761652946472168,grad_norm: 0.9999993132110686, iteration: 135595
loss: 1.0288313627243042,grad_norm: 1.0000000812141303, iteration: 135596
loss: 1.0249199867248535,grad_norm: 0.9999993525299444, iteration: 135597
loss: 0.9966197609901428,grad_norm: 0.8463887301090728, iteration: 135598
loss: 1.039271354675293,grad_norm: 0.9999996515416373, iteration: 135599
loss: 0.9777568578720093,grad_norm: 0.9999991751944965, iteration: 135600
loss: 1.0141918659210205,grad_norm: 0.9981549660177784, iteration: 135601
loss: 0.9931875467300415,grad_norm: 0.7604880198402163, iteration: 135602
loss: 1.026718020439148,grad_norm: 0.8231992343721688, iteration: 135603
loss: 1.027400255203247,grad_norm: 0.9618846105874783, iteration: 135604
loss: 1.0360620021820068,grad_norm: 0.9999992851740837, iteration: 135605
loss: 0.9697434306144714,grad_norm: 0.7131265906842976, iteration: 135606
loss: 1.010913372039795,grad_norm: 0.8729739144981539, iteration: 135607
loss: 1.0288439989089966,grad_norm: 0.9999990529811476, iteration: 135608
loss: 1.138667345046997,grad_norm: 0.9999993460622941, iteration: 135609
loss: 1.0635840892791748,grad_norm: 0.9999995954963834, iteration: 135610
loss: 1.0131208896636963,grad_norm: 0.9999990128652502, iteration: 135611
loss: 1.0180200338363647,grad_norm: 0.7535827394382199, iteration: 135612
loss: 1.0472748279571533,grad_norm: 0.7495778451485146, iteration: 135613
loss: 1.0600312948226929,grad_norm: 0.8956494709933985, iteration: 135614
loss: 0.9833377599716187,grad_norm: 0.9999991561299126, iteration: 135615
loss: 0.9963287711143494,grad_norm: 0.9684344879994234, iteration: 135616
loss: 1.0363006591796875,grad_norm: 0.8800522053702012, iteration: 135617
loss: 1.0017818212509155,grad_norm: 0.8016402647021327, iteration: 135618
loss: 0.9962145090103149,grad_norm: 0.9092091646798286, iteration: 135619
loss: 1.0339592695236206,grad_norm: 0.9126993705719547, iteration: 135620
loss: 1.0030673742294312,grad_norm: 0.9535663503154147, iteration: 135621
loss: 0.9791560769081116,grad_norm: 0.9456437962923541, iteration: 135622
loss: 0.9691445827484131,grad_norm: 0.8736182328726947, iteration: 135623
loss: 1.0520282983779907,grad_norm: 0.9999991707757018, iteration: 135624
loss: 1.0203310251235962,grad_norm: 0.8374220450215986, iteration: 135625
loss: 0.9831692576408386,grad_norm: 0.7850731580015013, iteration: 135626
loss: 1.0200287103652954,grad_norm: 0.8350850755406087, iteration: 135627
loss: 1.026888132095337,grad_norm: 0.9999994534310759, iteration: 135628
loss: 1.0010720491409302,grad_norm: 0.9290840591215634, iteration: 135629
loss: 1.0933337211608887,grad_norm: 0.7230980733044281, iteration: 135630
loss: 1.021406650543213,grad_norm: 0.7856754392034663, iteration: 135631
loss: 1.0113192796707153,grad_norm: 0.999999073063399, iteration: 135632
loss: 0.9590047001838684,grad_norm: 0.8606117075835111, iteration: 135633
loss: 0.9827643036842346,grad_norm: 0.9024962833420918, iteration: 135634
loss: 1.0107874870300293,grad_norm: 0.9999994805724741, iteration: 135635
loss: 1.0359052419662476,grad_norm: 0.9999997438361714, iteration: 135636
loss: 1.0722657442092896,grad_norm: 0.9290069688986868, iteration: 135637
loss: 0.985813319683075,grad_norm: 0.9758693433399152, iteration: 135638
loss: 0.9879991412162781,grad_norm: 0.8439744268179057, iteration: 135639
loss: 1.1551570892333984,grad_norm: 0.9999994307675992, iteration: 135640
loss: 0.9968771934509277,grad_norm: 0.8453969590017255, iteration: 135641
loss: 0.9922499060630798,grad_norm: 0.8201238029431018, iteration: 135642
loss: 1.0003178119659424,grad_norm: 0.9999998861943209, iteration: 135643
loss: 0.9902629256248474,grad_norm: 0.7697518859556218, iteration: 135644
loss: 0.9958479404449463,grad_norm: 0.999999075123015, iteration: 135645
loss: 0.988865852355957,grad_norm: 0.9636658311842105, iteration: 135646
loss: 0.9579393863677979,grad_norm: 0.9051710872500356, iteration: 135647
loss: 1.0143649578094482,grad_norm: 0.7960991654434563, iteration: 135648
loss: 1.001644492149353,grad_norm: 0.85882634971567, iteration: 135649
loss: 0.9769608378410339,grad_norm: 0.9999990577683916, iteration: 135650
loss: 0.9882917404174805,grad_norm: 0.7759044091652605, iteration: 135651
loss: 0.9742019176483154,grad_norm: 0.8989456123672454, iteration: 135652
loss: 1.004695177078247,grad_norm: 0.9999991422431933, iteration: 135653
loss: 1.017136812210083,grad_norm: 0.9431472740606727, iteration: 135654
loss: 0.9670455455780029,grad_norm: 0.74178515926008, iteration: 135655
loss: 1.0276374816894531,grad_norm: 0.9999996737742298, iteration: 135656
loss: 1.012675166130066,grad_norm: 0.9659495217571215, iteration: 135657
loss: 1.0569061040878296,grad_norm: 0.999999236274127, iteration: 135658
loss: 1.0184909105300903,grad_norm: 0.9159553494538377, iteration: 135659
loss: 1.2762032747268677,grad_norm: 0.9999996501632256, iteration: 135660
loss: 1.0058008432388306,grad_norm: 0.9464618045951664, iteration: 135661
loss: 0.9655835628509521,grad_norm: 0.7735571272917312, iteration: 135662
loss: 0.9221420288085938,grad_norm: 0.8505647357574968, iteration: 135663
loss: 1.0093027353286743,grad_norm: 0.9999991173149448, iteration: 135664
loss: 1.0998879671096802,grad_norm: 0.9999997505287177, iteration: 135665
loss: 1.0119458436965942,grad_norm: 0.8275668332289721, iteration: 135666
loss: 1.0803394317626953,grad_norm: 0.9999990825650528, iteration: 135667
loss: 1.0231938362121582,grad_norm: 0.8155979245559069, iteration: 135668
loss: 1.0069621801376343,grad_norm: 0.9999994197529843, iteration: 135669
loss: 1.0281428098678589,grad_norm: 0.9999988230738612, iteration: 135670
loss: 1.001372218132019,grad_norm: 0.8956887819676883, iteration: 135671
loss: 1.0169786214828491,grad_norm: 0.9999995235505991, iteration: 135672
loss: 1.0903103351593018,grad_norm: 0.9999991296325081, iteration: 135673
loss: 0.9603070020675659,grad_norm: 0.9243782238377174, iteration: 135674
loss: 0.9674879908561707,grad_norm: 0.7811251744383972, iteration: 135675
loss: 1.1031545400619507,grad_norm: 0.9999991477591441, iteration: 135676
loss: 1.1197913885116577,grad_norm: 0.9999995912936975, iteration: 135677
loss: 1.0259499549865723,grad_norm: 0.9999995298727758, iteration: 135678
loss: 1.0344172716140747,grad_norm: 0.8591969247269268, iteration: 135679
loss: 0.9944136738777161,grad_norm: 0.999999127845757, iteration: 135680
loss: 1.004797101020813,grad_norm: 0.9537756589324253, iteration: 135681
loss: 1.0236800909042358,grad_norm: 0.8440715601572101, iteration: 135682
loss: 1.1549550294876099,grad_norm: 0.9999992112112025, iteration: 135683
loss: 1.0081093311309814,grad_norm: 0.8440600757975907, iteration: 135684
loss: 1.0089459419250488,grad_norm: 0.9038412713924462, iteration: 135685
loss: 1.005500078201294,grad_norm: 0.9215831025711838, iteration: 135686
loss: 1.031334400177002,grad_norm: 0.9999993580219378, iteration: 135687
loss: 1.0443401336669922,grad_norm: 0.8919262758836836, iteration: 135688
loss: 1.0278714895248413,grad_norm: 0.9999998760931863, iteration: 135689
loss: 1.01211416721344,grad_norm: 0.9999991175964958, iteration: 135690
loss: 1.0553685426712036,grad_norm: 0.9191679384532666, iteration: 135691
loss: 0.9797120094299316,grad_norm: 0.9428750005042463, iteration: 135692
loss: 0.9942634701728821,grad_norm: 0.999999280569312, iteration: 135693
loss: 1.061193823814392,grad_norm: 0.9999990719810866, iteration: 135694
loss: 0.9819712042808533,grad_norm: 0.9510880627131137, iteration: 135695
loss: 1.007952094078064,grad_norm: 0.7257069762702203, iteration: 135696
loss: 1.0284913778305054,grad_norm: 0.9999994270765881, iteration: 135697
loss: 1.0095516443252563,grad_norm: 0.8142317425814659, iteration: 135698
loss: 1.0110657215118408,grad_norm: 0.8920979752782301, iteration: 135699
loss: 1.0077441930770874,grad_norm: 0.781765090168899, iteration: 135700
loss: 1.0016604661941528,grad_norm: 0.9999996841893664, iteration: 135701
loss: 1.0184708833694458,grad_norm: 0.9999995894801875, iteration: 135702
loss: 0.9899784922599792,grad_norm: 0.8527126189851455, iteration: 135703
loss: 0.9818928837776184,grad_norm: 0.8668781925327881, iteration: 135704
loss: 0.9715319275856018,grad_norm: 0.9999994404964986, iteration: 135705
loss: 1.0055056810379028,grad_norm: 0.9049059222232617, iteration: 135706
loss: 1.0048469305038452,grad_norm: 0.8530333790096898, iteration: 135707
loss: 0.9782916307449341,grad_norm: 0.835577992068347, iteration: 135708
loss: 1.0423991680145264,grad_norm: 0.9829647066640097, iteration: 135709
loss: 1.0464142560958862,grad_norm: 0.9999990546677621, iteration: 135710
loss: 1.0140492916107178,grad_norm: 0.9999993907167601, iteration: 135711
loss: 0.9935766458511353,grad_norm: 0.9235630243346087, iteration: 135712
loss: 1.0027694702148438,grad_norm: 0.9734416754426263, iteration: 135713
loss: 1.0327094793319702,grad_norm: 0.9999991544547713, iteration: 135714
loss: 1.0056055784225464,grad_norm: 0.999999255027713, iteration: 135715
loss: 1.030094027519226,grad_norm: 0.9999994140912654, iteration: 135716
loss: 1.0170074701309204,grad_norm: 0.9551971026585993, iteration: 135717
loss: 0.9950762391090393,grad_norm: 0.8898939770020162, iteration: 135718
loss: 1.0240345001220703,grad_norm: 0.9999993929772556, iteration: 135719
loss: 0.9950944185256958,grad_norm: 0.9396807940663245, iteration: 135720
loss: 0.9848839044570923,grad_norm: 0.8356672647281941, iteration: 135721
loss: 1.0155895948410034,grad_norm: 0.8851832018789172, iteration: 135722
loss: 0.9725733995437622,grad_norm: 0.8587043785196575, iteration: 135723
loss: 1.0109341144561768,grad_norm: 0.9307370186369947, iteration: 135724
loss: 1.03117036819458,grad_norm: 0.9999993114213922, iteration: 135725
loss: 0.9866790771484375,grad_norm: 0.7110691692172723, iteration: 135726
loss: 1.0229073762893677,grad_norm: 0.9999992752084731, iteration: 135727
loss: 0.9745314717292786,grad_norm: 0.8320465186219966, iteration: 135728
loss: 1.0241276025772095,grad_norm: 0.999999130201064, iteration: 135729
loss: 1.0173730850219727,grad_norm: 0.926078864814551, iteration: 135730
loss: 1.0477516651153564,grad_norm: 0.9999990756498133, iteration: 135731
loss: 1.024856686592102,grad_norm: 0.9999996489321823, iteration: 135732
loss: 1.0430861711502075,grad_norm: 0.9999991720051294, iteration: 135733
loss: 0.9734784960746765,grad_norm: 0.900398119676564, iteration: 135734
loss: 1.0080360174179077,grad_norm: 0.999999721460208, iteration: 135735
loss: 1.008738398551941,grad_norm: 0.8228262651887324, iteration: 135736
loss: 1.0538623332977295,grad_norm: 0.9999992898800619, iteration: 135737
loss: 0.9568189382553101,grad_norm: 0.8532389836311262, iteration: 135738
loss: 0.9957097172737122,grad_norm: 0.9999997003680143, iteration: 135739
loss: 1.0480108261108398,grad_norm: 0.9999999109146579, iteration: 135740
loss: 1.0104187726974487,grad_norm: 0.9543952323749251, iteration: 135741
loss: 1.0193053483963013,grad_norm: 0.9999991128161532, iteration: 135742
loss: 1.0976275205612183,grad_norm: 0.9999991915495416, iteration: 135743
loss: 1.0556743144989014,grad_norm: 0.9999998156658821, iteration: 135744
loss: 1.0271645784378052,grad_norm: 0.9999997333067645, iteration: 135745
loss: 1.008596420288086,grad_norm: 0.9999991499940517, iteration: 135746
loss: 0.9930187463760376,grad_norm: 0.885432985218552, iteration: 135747
loss: 1.0581402778625488,grad_norm: 0.9999993279919075, iteration: 135748
loss: 0.9877702593803406,grad_norm: 0.9595111688488344, iteration: 135749
loss: 0.9675721526145935,grad_norm: 0.9346718544516995, iteration: 135750
loss: 0.9930722117424011,grad_norm: 0.94467547887524, iteration: 135751
loss: 0.9399023652076721,grad_norm: 0.7729994489230647, iteration: 135752
loss: 1.0502276420593262,grad_norm: 0.999999893000984, iteration: 135753
loss: 1.0435304641723633,grad_norm: 0.9999992704125633, iteration: 135754
loss: 0.9936532378196716,grad_norm: 0.9999998948815461, iteration: 135755
loss: 1.0804976224899292,grad_norm: 0.9999991480742408, iteration: 135756
loss: 0.9564342498779297,grad_norm: 0.9965849614554669, iteration: 135757
loss: 1.015144944190979,grad_norm: 0.9999996457761877, iteration: 135758
loss: 1.0088090896606445,grad_norm: 0.9999995784123845, iteration: 135759
loss: 1.013525366783142,grad_norm: 0.9999992633394394, iteration: 135760
loss: 1.0498905181884766,grad_norm: 0.9006712170528748, iteration: 135761
loss: 0.9810678958892822,grad_norm: 0.8988437257030303, iteration: 135762
loss: 0.9883208870887756,grad_norm: 0.8842452585960825, iteration: 135763
loss: 1.067861795425415,grad_norm: 0.9999990979186621, iteration: 135764
loss: 1.0587812662124634,grad_norm: 0.9999993702524154, iteration: 135765
loss: 0.9869028329849243,grad_norm: 0.8962208648196782, iteration: 135766
loss: 0.9944096207618713,grad_norm: 0.9595764871903225, iteration: 135767
loss: 1.0220201015472412,grad_norm: 0.8152743434579158, iteration: 135768
loss: 0.9780369400978088,grad_norm: 0.93840273661566, iteration: 135769
loss: 1.0096365213394165,grad_norm: 0.9433958296854914, iteration: 135770
loss: 0.9997298121452332,grad_norm: 0.7986383513980866, iteration: 135771
loss: 0.99431973695755,grad_norm: 0.9999992181801584, iteration: 135772
loss: 1.0052062273025513,grad_norm: 0.8206448389892601, iteration: 135773
loss: 1.016771674156189,grad_norm: 0.9999998636313088, iteration: 135774
loss: 0.988864541053772,grad_norm: 0.9999995973989237, iteration: 135775
loss: 1.0832910537719727,grad_norm: 0.9999992706050642, iteration: 135776
loss: 0.976405680179596,grad_norm: 0.9391401502725859, iteration: 135777
loss: 1.0141093730926514,grad_norm: 0.9999995330185867, iteration: 135778
loss: 1.0049335956573486,grad_norm: 0.9999990770339385, iteration: 135779
loss: 1.0670655965805054,grad_norm: 0.9999993871743631, iteration: 135780
loss: 1.0807446241378784,grad_norm: 0.9999994388726153, iteration: 135781
loss: 1.0643824338912964,grad_norm: 0.9999991019345297, iteration: 135782
loss: 1.0243698358535767,grad_norm: 0.9999995372134614, iteration: 135783
loss: 1.0488137006759644,grad_norm: 0.9999991268217737, iteration: 135784
loss: 1.0036739110946655,grad_norm: 0.9083260948142639, iteration: 135785
loss: 0.9645891189575195,grad_norm: 0.9188828615772229, iteration: 135786
loss: 1.0158895254135132,grad_norm: 0.9999992777313933, iteration: 135787
loss: 1.062970519065857,grad_norm: 0.9999998715206827, iteration: 135788
loss: 1.045699954032898,grad_norm: 0.999999889759085, iteration: 135789
loss: 1.1233179569244385,grad_norm: 0.9999996758941628, iteration: 135790
loss: 1.0301237106323242,grad_norm: 0.999999377003132, iteration: 135791
loss: 1.234108567237854,grad_norm: 0.9999997325722985, iteration: 135792
loss: 1.0306594371795654,grad_norm: 0.9999993346485194, iteration: 135793
loss: 1.0834063291549683,grad_norm: 0.9999997843256773, iteration: 135794
loss: 1.1874886751174927,grad_norm: 0.9999998501455187, iteration: 135795
loss: 1.254284143447876,grad_norm: 0.9999999595723841, iteration: 135796
loss: 1.4636662006378174,grad_norm: 0.9999998423320159, iteration: 135797
loss: 1.5057867765426636,grad_norm: 0.9999999121969403, iteration: 135798
loss: 1.2059712409973145,grad_norm: 0.9999994177530618, iteration: 135799
loss: 1.3025020360946655,grad_norm: 0.999999886068156, iteration: 135800
loss: 1.203282356262207,grad_norm: 0.9999998421313406, iteration: 135801
loss: 1.2055362462997437,grad_norm: 0.9999998672413118, iteration: 135802
loss: 1.1239910125732422,grad_norm: 0.9999999019034516, iteration: 135803
loss: 1.4178152084350586,grad_norm: 0.9999998645705074, iteration: 135804
loss: 1.2487232685089111,grad_norm: 0.9999998500701306, iteration: 135805
loss: 1.3861011266708374,grad_norm: 1.0000000937171163, iteration: 135806
loss: 1.3399890661239624,grad_norm: 0.9999997240648865, iteration: 135807
loss: 1.1910220384597778,grad_norm: 1.0000001344432636, iteration: 135808
loss: 1.2023849487304688,grad_norm: 0.9999998896695026, iteration: 135809
loss: 1.0356382131576538,grad_norm: 0.9999991112889473, iteration: 135810
loss: 1.1745198965072632,grad_norm: 0.9999996374542215, iteration: 135811
loss: 1.0690722465515137,grad_norm: 0.999999363942296, iteration: 135812
loss: 1.0203696489334106,grad_norm: 0.9999996739826745, iteration: 135813
loss: 1.0736104249954224,grad_norm: 0.9999997596099528, iteration: 135814
loss: 1.0383695363998413,grad_norm: 0.9999996440058336, iteration: 135815
loss: 1.1541554927825928,grad_norm: 1.000000076986351, iteration: 135816
loss: 1.1841583251953125,grad_norm: 0.999999649116716, iteration: 135817
loss: 1.0644299983978271,grad_norm: 0.9999996031351512, iteration: 135818
loss: 1.0609468221664429,grad_norm: 0.999999675602505, iteration: 135819
loss: 0.9981686472892761,grad_norm: 0.9999994593282301, iteration: 135820
loss: 1.181889295578003,grad_norm: 0.9999999117571016, iteration: 135821
loss: 1.1174544095993042,grad_norm: 0.9999996280146786, iteration: 135822
loss: 1.1558715105056763,grad_norm: 0.9999993864526343, iteration: 135823
loss: 1.0065950155258179,grad_norm: 0.9999992522683091, iteration: 135824
loss: 1.0471200942993164,grad_norm: 0.9999994472117807, iteration: 135825
loss: 1.024085521697998,grad_norm: 0.9745539366863895, iteration: 135826
loss: 1.0139954090118408,grad_norm: 0.9999991831537606, iteration: 135827
loss: 1.173397183418274,grad_norm: 0.9999996679491853, iteration: 135828
loss: 1.1061742305755615,grad_norm: 0.8531631801005416, iteration: 135829
loss: 1.0406360626220703,grad_norm: 0.9999994929221241, iteration: 135830
loss: 1.097021460533142,grad_norm: 0.8578516811116804, iteration: 135831
loss: 1.0381287336349487,grad_norm: 0.9999999178953047, iteration: 135832
loss: 1.033678412437439,grad_norm: 0.9999995331181886, iteration: 135833
loss: 1.0254672765731812,grad_norm: 0.9106833299104172, iteration: 135834
loss: 1.070811152458191,grad_norm: 0.9999997807660692, iteration: 135835
loss: 0.9935312867164612,grad_norm: 0.999999029370403, iteration: 135836
loss: 0.9976156949996948,grad_norm: 0.9999996351683971, iteration: 135837
loss: 1.075384259223938,grad_norm: 0.9999999513946481, iteration: 135838
loss: 1.1023443937301636,grad_norm: 0.9999997167815813, iteration: 135839
loss: 1.034920334815979,grad_norm: 0.9999994444868836, iteration: 135840
loss: 1.20454740524292,grad_norm: 0.99999976346242, iteration: 135841
loss: 0.9882259368896484,grad_norm: 0.988978522983778, iteration: 135842
loss: 0.9990343451499939,grad_norm: 0.9999998214305652, iteration: 135843
loss: 1.0238380432128906,grad_norm: 0.9999999245382063, iteration: 135844
loss: 1.0071897506713867,grad_norm: 0.9661776936644733, iteration: 135845
loss: 0.987317681312561,grad_norm: 0.9999991981359556, iteration: 135846
loss: 1.091528296470642,grad_norm: 0.9999991515754963, iteration: 135847
loss: 1.027087688446045,grad_norm: 0.9999991082552978, iteration: 135848
loss: 1.0196456909179688,grad_norm: 0.9999997006725242, iteration: 135849
loss: 0.9906245470046997,grad_norm: 0.8544847653311162, iteration: 135850
loss: 1.0487232208251953,grad_norm: 0.9233833523320245, iteration: 135851
loss: 1.0876824855804443,grad_norm: 0.9999994335956139, iteration: 135852
loss: 1.035578966140747,grad_norm: 0.9999993755813936, iteration: 135853
loss: 1.0350130796432495,grad_norm: 0.9999991065042574, iteration: 135854
loss: 1.0768224000930786,grad_norm: 0.9999996415901797, iteration: 135855
loss: 1.0374006032943726,grad_norm: 0.999999166617638, iteration: 135856
loss: 1.1258320808410645,grad_norm: 0.9999990729184388, iteration: 135857
loss: 1.0238748788833618,grad_norm: 0.9999990586759149, iteration: 135858
loss: 1.0321201086044312,grad_norm: 0.9714720086478107, iteration: 135859
loss: 0.9962726831436157,grad_norm: 0.8009460069671314, iteration: 135860
loss: 0.9960864782333374,grad_norm: 0.8794547406564673, iteration: 135861
loss: 1.0138474702835083,grad_norm: 0.9215042156240894, iteration: 135862
loss: 0.9544631838798523,grad_norm: 0.8126069500380342, iteration: 135863
loss: 1.0124289989471436,grad_norm: 0.7955215634145777, iteration: 135864
loss: 1.018324613571167,grad_norm: 0.7924583856743128, iteration: 135865
loss: 0.9969735741615295,grad_norm: 0.8231025087962442, iteration: 135866
loss: 1.0034071207046509,grad_norm: 0.9999990337134378, iteration: 135867
loss: 1.0140529870986938,grad_norm: 0.846294803128611, iteration: 135868
loss: 0.9584592580795288,grad_norm: 0.9999989721865816, iteration: 135869
loss: 1.0823549032211304,grad_norm: 0.9999993741027742, iteration: 135870
loss: 1.0041821002960205,grad_norm: 0.953668128065111, iteration: 135871
loss: 1.0443758964538574,grad_norm: 0.9999990222868419, iteration: 135872
loss: 1.0886412858963013,grad_norm: 0.9999990230268763, iteration: 135873
loss: 1.0517247915267944,grad_norm: 0.9781463813584332, iteration: 135874
loss: 1.0660442113876343,grad_norm: 0.999999926892061, iteration: 135875
loss: 1.0952911376953125,grad_norm: 0.9999991317302926, iteration: 135876
loss: 0.9728615283966064,grad_norm: 0.9999993686750309, iteration: 135877
loss: 1.0109519958496094,grad_norm: 0.9578143116124248, iteration: 135878
loss: 1.1138359308242798,grad_norm: 0.9999996967436396, iteration: 135879
loss: 1.0318747758865356,grad_norm: 0.7007492630135949, iteration: 135880
loss: 1.035336971282959,grad_norm: 0.999999144849195, iteration: 135881
loss: 1.0241785049438477,grad_norm: 0.9999990813126154, iteration: 135882
loss: 0.969403088092804,grad_norm: 0.912844315450279, iteration: 135883
loss: 0.9737755060195923,grad_norm: 0.9999995701964518, iteration: 135884
loss: 1.028275966644287,grad_norm: 0.9999995640440961, iteration: 135885
loss: 1.015633463859558,grad_norm: 0.912980565146484, iteration: 135886
loss: 1.035703182220459,grad_norm: 1.000000023340089, iteration: 135887
loss: 1.2317942380905151,grad_norm: 0.9999997671856459, iteration: 135888
loss: 1.0249452590942383,grad_norm: 0.9999997323447, iteration: 135889
loss: 1.0609225034713745,grad_norm: 0.9999994689737849, iteration: 135890
loss: 1.0239214897155762,grad_norm: 0.9999994126344166, iteration: 135891
loss: 1.100885033607483,grad_norm: 0.9999993993646517, iteration: 135892
loss: 1.0082422494888306,grad_norm: 0.999999323793336, iteration: 135893
loss: 1.0124880075454712,grad_norm: 0.9921580764461613, iteration: 135894
loss: 1.042588472366333,grad_norm: 0.92162305797716, iteration: 135895
loss: 1.0829757452011108,grad_norm: 0.9996591816344873, iteration: 135896
loss: 1.0393340587615967,grad_norm: 0.9999999502620934, iteration: 135897
loss: 1.0355770587921143,grad_norm: 0.8566979551424101, iteration: 135898
loss: 1.005383014678955,grad_norm: 0.9999998325733743, iteration: 135899
loss: 0.9777790904045105,grad_norm: 0.9999993137021381, iteration: 135900
loss: 1.009101390838623,grad_norm: 0.9999992805187623, iteration: 135901
loss: 1.0002596378326416,grad_norm: 0.9999990223377954, iteration: 135902
loss: 0.9948332905769348,grad_norm: 0.9622879921869154, iteration: 135903
loss: 1.0908533334732056,grad_norm: 0.9999999387360332, iteration: 135904
loss: 0.9993749856948853,grad_norm: 0.9531830004780353, iteration: 135905
loss: 0.9822563529014587,grad_norm: 0.9999996591027872, iteration: 135906
loss: 1.097968578338623,grad_norm: 0.9999995620089676, iteration: 135907
loss: 1.1951359510421753,grad_norm: 0.9999999818047257, iteration: 135908
loss: 1.071357250213623,grad_norm: 0.9999996047675213, iteration: 135909
loss: 1.1390386819839478,grad_norm: 0.9999999876477564, iteration: 135910
loss: 1.160132884979248,grad_norm: 0.9999999380578651, iteration: 135911
loss: 1.0224450826644897,grad_norm: 0.9999989653213203, iteration: 135912
loss: 1.0964032411575317,grad_norm: 0.9999999019464224, iteration: 135913
loss: 1.194563388824463,grad_norm: 0.9999994475503526, iteration: 135914
loss: 1.054391622543335,grad_norm: 0.9999996728619153, iteration: 135915
loss: 1.1010807752609253,grad_norm: 0.9999992629416925, iteration: 135916
loss: 1.1443487405776978,grad_norm: 0.9999996461073503, iteration: 135917
loss: 1.0944584608078003,grad_norm: 0.9999997847513782, iteration: 135918
loss: 1.2555079460144043,grad_norm: 0.9999998615693052, iteration: 135919
loss: 1.0652276277542114,grad_norm: 0.9999999893011021, iteration: 135920
loss: 1.2853398323059082,grad_norm: 0.9999998375952502, iteration: 135921
loss: 1.2238438129425049,grad_norm: 0.9999996836747327, iteration: 135922
loss: 1.2574225664138794,grad_norm: 0.9999993496783971, iteration: 135923
loss: 1.1138360500335693,grad_norm: 0.9999994900087724, iteration: 135924
loss: 1.2405232191085815,grad_norm: 0.999999708300974, iteration: 135925
loss: 1.1242834329605103,grad_norm: 0.9999997411600514, iteration: 135926
loss: 1.0509033203125,grad_norm: 0.9025855808244825, iteration: 135927
loss: 1.1620771884918213,grad_norm: 0.9999999826550782, iteration: 135928
loss: 1.0494197607040405,grad_norm: 0.9999993551135579, iteration: 135929
loss: 1.0617260932922363,grad_norm: 0.999999633439267, iteration: 135930
loss: 1.0876682996749878,grad_norm: 0.9999996862029586, iteration: 135931
loss: 1.0881356000900269,grad_norm: 0.9999999069779157, iteration: 135932
loss: 1.2301146984100342,grad_norm: 1.0000000742572646, iteration: 135933
loss: 0.9758262038230896,grad_norm: 0.967693567765528, iteration: 135934
loss: 1.142398476600647,grad_norm: 0.9999989493295679, iteration: 135935
loss: 1.0572803020477295,grad_norm: 0.8732786167528993, iteration: 135936
loss: 0.9973084926605225,grad_norm: 0.940299724268148, iteration: 135937
loss: 1.0970029830932617,grad_norm: 0.9999997978842896, iteration: 135938
loss: 1.0924667119979858,grad_norm: 0.9999999674901977, iteration: 135939
loss: 1.0392968654632568,grad_norm: 0.9999991175627889, iteration: 135940
loss: 1.0987088680267334,grad_norm: 0.9999999208951622, iteration: 135941
loss: 1.2412294149398804,grad_norm: 0.9999999385195258, iteration: 135942
loss: 1.1703474521636963,grad_norm: 0.9999997566937641, iteration: 135943
loss: 1.0033960342407227,grad_norm: 0.9999996921408266, iteration: 135944
loss: 1.1134382486343384,grad_norm: 0.9999997537289402, iteration: 135945
loss: 1.0087512731552124,grad_norm: 0.9999990674850645, iteration: 135946
loss: 1.2448173761367798,grad_norm: 0.9999997292398614, iteration: 135947
loss: 1.0567116737365723,grad_norm: 0.999999321118731, iteration: 135948
loss: 1.127289891242981,grad_norm: 0.9999992824275519, iteration: 135949
loss: 1.0524362325668335,grad_norm: 0.9999993413856528, iteration: 135950
loss: 1.0439279079437256,grad_norm: 0.9999996409339823, iteration: 135951
loss: 1.0810610055923462,grad_norm: 0.9999993897131632, iteration: 135952
loss: 1.1692569255828857,grad_norm: 0.9999998329866535, iteration: 135953
loss: 1.3819375038146973,grad_norm: 0.9999998699764059, iteration: 135954
loss: 1.3164165019989014,grad_norm: 0.9999999285373659, iteration: 135955
loss: 1.1221153736114502,grad_norm: 0.9999999774141967, iteration: 135956
loss: 1.2969679832458496,grad_norm: 0.9999999067328615, iteration: 135957
loss: 1.2738028764724731,grad_norm: 0.9999999171870466, iteration: 135958
loss: 1.2647795677185059,grad_norm: 0.9999994733175414, iteration: 135959
loss: 1.3018975257873535,grad_norm: 0.9999997006716509, iteration: 135960
loss: 1.4599615335464478,grad_norm: 0.9999999184058922, iteration: 135961
loss: 1.2736787796020508,grad_norm: 0.9999999194884607, iteration: 135962
loss: 1.007997989654541,grad_norm: 0.9999999050997703, iteration: 135963
loss: 1.1290003061294556,grad_norm: 0.9999997053523656, iteration: 135964
loss: 1.119484782218933,grad_norm: 0.9999996892250476, iteration: 135965
loss: 1.1370614767074585,grad_norm: 0.9999995979415641, iteration: 135966
loss: 1.1801050901412964,grad_norm: 0.999999717286639, iteration: 135967
loss: 1.028085470199585,grad_norm: 0.9999993840615005, iteration: 135968
loss: 1.1838314533233643,grad_norm: 0.9999999327436203, iteration: 135969
loss: 1.1625981330871582,grad_norm: 0.9999998128184462, iteration: 135970
loss: 1.202472448348999,grad_norm: 0.9999995864098004, iteration: 135971
loss: 1.1981760263442993,grad_norm: 0.9999998718665539, iteration: 135972
loss: 1.1052148342132568,grad_norm: 0.9999998491578141, iteration: 135973
loss: 1.1720423698425293,grad_norm: 0.9999998848724742, iteration: 135974
loss: 1.0975373983383179,grad_norm: 0.9999995664959501, iteration: 135975
loss: 1.1081217527389526,grad_norm: 0.9999998108702944, iteration: 135976
loss: 1.1659090518951416,grad_norm: 0.999999840266046, iteration: 135977
loss: 0.9578478336334229,grad_norm: 0.9999990434620414, iteration: 135978
loss: 1.112638235092163,grad_norm: 0.9999992058888708, iteration: 135979
loss: 1.061611533164978,grad_norm: 0.9999990547182435, iteration: 135980
loss: 1.1067923307418823,grad_norm: 0.9999999959046836, iteration: 135981
loss: 1.0611507892608643,grad_norm: 0.9999998766653487, iteration: 135982
loss: 1.0388802289962769,grad_norm: 0.9999996360841783, iteration: 135983
loss: 1.0926241874694824,grad_norm: 0.9999991565257473, iteration: 135984
loss: 0.9683090448379517,grad_norm: 0.9749732663467476, iteration: 135985
loss: 1.4691215753555298,grad_norm: 0.9999997973904008, iteration: 135986
loss: 1.0356054306030273,grad_norm: 0.8283939887705841, iteration: 135987
loss: 1.1171824932098389,grad_norm: 0.9999991297021843, iteration: 135988
loss: 1.2038581371307373,grad_norm: 0.9999998227585684, iteration: 135989
loss: 1.025833010673523,grad_norm: 0.941321124088022, iteration: 135990
loss: 1.0082149505615234,grad_norm: 0.8689847933767252, iteration: 135991
loss: 1.0152459144592285,grad_norm: 0.9999992780918574, iteration: 135992
loss: 1.048581600189209,grad_norm: 0.999999966668988, iteration: 135993
loss: 1.1178088188171387,grad_norm: 0.9999995575949797, iteration: 135994
loss: 1.0995913743972778,grad_norm: 0.9999995891035729, iteration: 135995
loss: 1.029779076576233,grad_norm: 0.8373271809124311, iteration: 135996
loss: 1.0572396516799927,grad_norm: 0.999999439401416, iteration: 135997
loss: 1.0817970037460327,grad_norm: 1.0000000309253403, iteration: 135998
loss: 1.0317052602767944,grad_norm: 0.9999991353028636, iteration: 135999
loss: 1.0594435930252075,grad_norm: 1.0000000357411798, iteration: 136000
loss: 1.2063878774642944,grad_norm: 0.999999566986316, iteration: 136001
loss: 1.0808428525924683,grad_norm: 0.9999995394720365, iteration: 136002
loss: 1.0076266527175903,grad_norm: 0.9999995076377524, iteration: 136003
loss: 1.2117629051208496,grad_norm: 0.9999997499953506, iteration: 136004
loss: 1.0835829973220825,grad_norm: 0.9999998882752414, iteration: 136005
loss: 1.1183230876922607,grad_norm: 0.999999909570899, iteration: 136006
loss: 1.0147671699523926,grad_norm: 0.9999993095948766, iteration: 136007
loss: 1.0930050611495972,grad_norm: 0.9999996060095477, iteration: 136008
loss: 1.1582369804382324,grad_norm: 0.9999996873946112, iteration: 136009
loss: 1.0498734712600708,grad_norm: 0.9999991358493863, iteration: 136010
loss: 1.1008087396621704,grad_norm: 0.9999998471413121, iteration: 136011
loss: 1.0595585107803345,grad_norm: 0.9999993726076263, iteration: 136012
loss: 1.1193597316741943,grad_norm: 0.9999998014699758, iteration: 136013
loss: 1.0822935104370117,grad_norm: 0.9999998455107195, iteration: 136014
loss: 1.0755237340927124,grad_norm: 0.9999994574033725, iteration: 136015
loss: 0.9704498052597046,grad_norm: 0.8366716191646195, iteration: 136016
loss: 1.1016147136688232,grad_norm: 0.9999991939823561, iteration: 136017
loss: 1.015783429145813,grad_norm: 0.9999994428344667, iteration: 136018
loss: 1.0906381607055664,grad_norm: 0.9999999574962658, iteration: 136019
loss: 1.428666114807129,grad_norm: 1.0000000049171776, iteration: 136020
loss: 1.1406229734420776,grad_norm: 0.9999999344668761, iteration: 136021
loss: 1.1323803663253784,grad_norm: 0.9999997871974214, iteration: 136022
loss: 1.0723259449005127,grad_norm: 0.9541933038518259, iteration: 136023
loss: 1.0895003080368042,grad_norm: 0.9999992072475165, iteration: 136024
loss: 1.12586510181427,grad_norm: 0.999999746993581, iteration: 136025
loss: 0.9910282492637634,grad_norm: 0.9999993268227789, iteration: 136026
loss: 1.1013779640197754,grad_norm: 0.9999997716937923, iteration: 136027
loss: 1.0704702138900757,grad_norm: 0.9999991470384693, iteration: 136028
loss: 0.9685579538345337,grad_norm: 0.9090909881886599, iteration: 136029
loss: 1.0583808422088623,grad_norm: 0.9999992954844221, iteration: 136030
loss: 0.9863955974578857,grad_norm: 0.9999990569553393, iteration: 136031
loss: 1.2487027645111084,grad_norm: 0.999999935616026, iteration: 136032
loss: 1.0546343326568604,grad_norm: 0.9999993287258362, iteration: 136033
loss: 1.466643214225769,grad_norm: 0.9999997024041625, iteration: 136034
loss: 0.9490779042243958,grad_norm: 0.907862208410617, iteration: 136035
loss: 1.004174828529358,grad_norm: 0.9999992349665069, iteration: 136036
loss: 1.0678702592849731,grad_norm: 0.9999993980734847, iteration: 136037
loss: 0.966797947883606,grad_norm: 0.8615524593954657, iteration: 136038
loss: 1.1150426864624023,grad_norm: 0.999999343600393, iteration: 136039
loss: 1.03141188621521,grad_norm: 0.9999998654285939, iteration: 136040
loss: 1.0335100889205933,grad_norm: 0.9999991394800084, iteration: 136041
loss: 0.9902204275131226,grad_norm: 0.9999998865003344, iteration: 136042
loss: 1.0158289670944214,grad_norm: 0.8719748862661195, iteration: 136043
loss: 0.979159414768219,grad_norm: 0.8809365338942414, iteration: 136044
loss: 1.0290436744689941,grad_norm: 0.9999996594065498, iteration: 136045
loss: 1.1852033138275146,grad_norm: 0.9999994684136454, iteration: 136046
loss: 1.0911351442337036,grad_norm: 0.9999998667015536, iteration: 136047
loss: 1.1022778749465942,grad_norm: 0.9999992570925985, iteration: 136048
loss: 1.0325034856796265,grad_norm: 0.9923025433118419, iteration: 136049
loss: 1.0490831136703491,grad_norm: 0.9356792151846853, iteration: 136050
loss: 1.094143033027649,grad_norm: 0.8828161517068271, iteration: 136051
loss: 1.0610158443450928,grad_norm: 0.9999992162489336, iteration: 136052
loss: 1.0555546283721924,grad_norm: 0.9999990634904139, iteration: 136053
loss: 0.9932500720024109,grad_norm: 0.8317527097503074, iteration: 136054
loss: 0.9954351186752319,grad_norm: 0.8367423178617593, iteration: 136055
loss: 1.0049760341644287,grad_norm: 0.9999995901806451, iteration: 136056
loss: 1.0412647724151611,grad_norm: 0.9999992920548695, iteration: 136057
loss: 1.1236311197280884,grad_norm: 0.9999999215787824, iteration: 136058
loss: 1.2131634950637817,grad_norm: 0.9999998221546762, iteration: 136059
loss: 1.0117143392562866,grad_norm: 0.9999990493428224, iteration: 136060
loss: 1.005372166633606,grad_norm: 0.8079093866689179, iteration: 136061
loss: 1.063560128211975,grad_norm: 0.9999993797318991, iteration: 136062
loss: 1.1990126371383667,grad_norm: 0.9999996235454885, iteration: 136063
loss: 0.9857874512672424,grad_norm: 0.9999994187584589, iteration: 136064
loss: 0.9991664290428162,grad_norm: 0.9266548662975015, iteration: 136065
loss: 1.1019026041030884,grad_norm: 0.999999478808042, iteration: 136066
loss: 1.0488170385360718,grad_norm: 0.9999989874050857, iteration: 136067
loss: 1.0573365688323975,grad_norm: 0.9999995273665523, iteration: 136068
loss: 1.0836576223373413,grad_norm: 0.9999994183303338, iteration: 136069
loss: 1.1859564781188965,grad_norm: 0.9999995696016464, iteration: 136070
loss: 0.9819011092185974,grad_norm: 0.6970141482203529, iteration: 136071
loss: 1.0638707876205444,grad_norm: 0.9999991484392409, iteration: 136072
loss: 1.0675709247589111,grad_norm: 0.9999992542331002, iteration: 136073
loss: 1.0748769044876099,grad_norm: 0.999999738625009, iteration: 136074
loss: 1.0924850702285767,grad_norm: 0.9999998477853117, iteration: 136075
loss: 1.0303277969360352,grad_norm: 0.9999990723817211, iteration: 136076
loss: 1.0790660381317139,grad_norm: 0.9999998861561105, iteration: 136077
loss: 1.0335136651992798,grad_norm: 0.9301047395602426, iteration: 136078
loss: 1.091288685798645,grad_norm: 0.9999998651914674, iteration: 136079
loss: 1.0670267343521118,grad_norm: 0.9999996002681106, iteration: 136080
loss: 1.008949875831604,grad_norm: 0.9371212190503364, iteration: 136081
loss: 1.063882827758789,grad_norm: 0.9999998208266195, iteration: 136082
loss: 1.0539294481277466,grad_norm: 0.9472841353410435, iteration: 136083
loss: 1.139937400817871,grad_norm: 0.9999998931022448, iteration: 136084
loss: 1.1509122848510742,grad_norm: 0.9999990448301175, iteration: 136085
loss: 0.9909713864326477,grad_norm: 0.7601143250693522, iteration: 136086
loss: 1.0219279527664185,grad_norm: 0.9387171907293598, iteration: 136087
loss: 1.043694019317627,grad_norm: 0.9999990376565936, iteration: 136088
loss: 0.9618507027626038,grad_norm: 0.9697553573796617, iteration: 136089
loss: 1.285725474357605,grad_norm: 0.9999999526750296, iteration: 136090
loss: 1.1112041473388672,grad_norm: 0.999999406330847, iteration: 136091
loss: 1.078116774559021,grad_norm: 0.9999999353037703, iteration: 136092
loss: 1.0771299600601196,grad_norm: 0.999999625503197, iteration: 136093
loss: 1.1641751527786255,grad_norm: 0.9999996137538022, iteration: 136094
loss: 1.1856229305267334,grad_norm: 0.9999998914539465, iteration: 136095
loss: 1.0895824432373047,grad_norm: 0.9999991600080886, iteration: 136096
loss: 0.989180326461792,grad_norm: 0.9585155156348718, iteration: 136097
loss: 1.04642915725708,grad_norm: 0.7639456901726459, iteration: 136098
loss: 1.0241907835006714,grad_norm: 0.9487126592763098, iteration: 136099
loss: 1.074716329574585,grad_norm: 0.8145054251711824, iteration: 136100
loss: 1.0455577373504639,grad_norm: 0.8790573340930119, iteration: 136101
loss: 1.0063623189926147,grad_norm: 0.7730256099719678, iteration: 136102
loss: 1.0524311065673828,grad_norm: 0.9999991015614138, iteration: 136103
loss: 1.1871154308319092,grad_norm: 0.9999997207581695, iteration: 136104
loss: 1.0187172889709473,grad_norm: 0.7773956211452541, iteration: 136105
loss: 0.9547816514968872,grad_norm: 0.8133135895236439, iteration: 136106
loss: 1.0507711172103882,grad_norm: 0.9999993193557517, iteration: 136107
loss: 0.9962603449821472,grad_norm: 0.9999991813511931, iteration: 136108
loss: 1.0768547058105469,grad_norm: 0.9999992966907153, iteration: 136109
loss: 1.0301313400268555,grad_norm: 0.9999997441548825, iteration: 136110
loss: 1.0466296672821045,grad_norm: 0.9999991995242051, iteration: 136111
loss: 1.0162392854690552,grad_norm: 0.8025720551353496, iteration: 136112
loss: 1.0679056644439697,grad_norm: 0.9726536640313015, iteration: 136113
loss: 1.2859376668930054,grad_norm: 0.9999998633923735, iteration: 136114
loss: 1.11021089553833,grad_norm: 0.9999994101665847, iteration: 136115
loss: 1.0173617601394653,grad_norm: 0.9491543270657994, iteration: 136116
loss: 1.0041985511779785,grad_norm: 0.9999997663391268, iteration: 136117
loss: 1.0096478462219238,grad_norm: 0.9675816120045727, iteration: 136118
loss: 1.0347001552581787,grad_norm: 0.9560386140954202, iteration: 136119
loss: 0.9850890040397644,grad_norm: 0.9817935829071361, iteration: 136120
loss: 1.0038375854492188,grad_norm: 0.9999992409868858, iteration: 136121
loss: 1.0216331481933594,grad_norm: 0.9999998333499985, iteration: 136122
loss: 0.9762662649154663,grad_norm: 0.9999989868972196, iteration: 136123
loss: 1.0553467273712158,grad_norm: 0.9999991391763039, iteration: 136124
loss: 1.0837185382843018,grad_norm: 0.9999995056948616, iteration: 136125
loss: 0.9549297094345093,grad_norm: 0.9999990779789429, iteration: 136126
loss: 1.0550354719161987,grad_norm: 0.8254018899203783, iteration: 136127
loss: 1.0228970050811768,grad_norm: 0.9999995749727881, iteration: 136128
loss: 1.0113059282302856,grad_norm: 0.999999319348706, iteration: 136129
loss: 1.0420678853988647,grad_norm: 0.9999992141675524, iteration: 136130
loss: 1.143308162689209,grad_norm: 0.9999991237182926, iteration: 136131
loss: 1.015642762184143,grad_norm: 0.9999998391740436, iteration: 136132
loss: 0.9853021502494812,grad_norm: 0.9999991435978557, iteration: 136133
loss: 1.0338364839553833,grad_norm: 0.9136493833284133, iteration: 136134
loss: 1.01229727268219,grad_norm: 0.8399421704488239, iteration: 136135
loss: 1.0145655870437622,grad_norm: 0.8559795936697131, iteration: 136136
loss: 1.0704765319824219,grad_norm: 0.9999990411441227, iteration: 136137
loss: 0.9988048076629639,grad_norm: 0.9999992585621718, iteration: 136138
loss: 1.0768985748291016,grad_norm: 0.9999995356413213, iteration: 136139
loss: 1.0180180072784424,grad_norm: 0.7638480524370359, iteration: 136140
loss: 1.0703965425491333,grad_norm: 0.9999990571479923, iteration: 136141
loss: 0.9622084498405457,grad_norm: 0.9287161402560872, iteration: 136142
loss: 1.0588934421539307,grad_norm: 0.999999362130284, iteration: 136143
loss: 1.0054320096969604,grad_norm: 0.8393205683530391, iteration: 136144
loss: 1.011247992515564,grad_norm: 0.9999991426109142, iteration: 136145
loss: 0.990841805934906,grad_norm: 0.9999999059939562, iteration: 136146
loss: 1.0514975786209106,grad_norm: 0.9999992285672014, iteration: 136147
loss: 0.990361213684082,grad_norm: 0.897314171698844, iteration: 136148
loss: 1.0777735710144043,grad_norm: 0.894722630955842, iteration: 136149
loss: 0.9680051803588867,grad_norm: 0.8707052762160701, iteration: 136150
loss: 1.011269211769104,grad_norm: 0.7704936378446711, iteration: 136151
loss: 1.1008005142211914,grad_norm: 0.9999990641932349, iteration: 136152
loss: 1.0631028413772583,grad_norm: 0.999999590630907, iteration: 136153
loss: 1.0069160461425781,grad_norm: 0.8414624147905247, iteration: 136154
loss: 1.04946768283844,grad_norm: 0.9999991426134746, iteration: 136155
loss: 0.9991876482963562,grad_norm: 0.8039896142942381, iteration: 136156
loss: 1.1207832098007202,grad_norm: 0.9999993536116949, iteration: 136157
loss: 1.0266697406768799,grad_norm: 0.9999993104503634, iteration: 136158
loss: 1.068410038948059,grad_norm: 0.9999998696699307, iteration: 136159
loss: 1.0732203722000122,grad_norm: 0.999999843558901, iteration: 136160
loss: 1.0528112649917603,grad_norm: 0.9422459149223491, iteration: 136161
loss: 0.9740262627601624,grad_norm: 0.8732709803142258, iteration: 136162
loss: 0.9711272716522217,grad_norm: 0.9748939518046283, iteration: 136163
loss: 0.9882047772407532,grad_norm: 0.8467815639829175, iteration: 136164
loss: 1.0334689617156982,grad_norm: 0.8620603416473189, iteration: 136165
loss: 1.0883749723434448,grad_norm: 0.9999996721782883, iteration: 136166
loss: 1.0430333614349365,grad_norm: 0.9182924797489781, iteration: 136167
loss: 1.0100467205047607,grad_norm: 0.9756931454681721, iteration: 136168
loss: 0.9940580725669861,grad_norm: 0.9085207067946119, iteration: 136169
loss: 1.0114260911941528,grad_norm: 0.8991093354132342, iteration: 136170
loss: 0.9965820908546448,grad_norm: 0.8038347832314426, iteration: 136171
loss: 1.0240222215652466,grad_norm: 0.9136970720609581, iteration: 136172
loss: 1.0066267251968384,grad_norm: 0.9999991339458739, iteration: 136173
loss: 1.0237094163894653,grad_norm: 0.9334250582799527, iteration: 136174
loss: 1.06069815158844,grad_norm: 0.9999991637962786, iteration: 136175
loss: 1.054993748664856,grad_norm: 0.9999996511266824, iteration: 136176
loss: 1.0018458366394043,grad_norm: 0.9802742914684658, iteration: 136177
loss: 0.9886632561683655,grad_norm: 0.7881396580306905, iteration: 136178
loss: 1.0010730028152466,grad_norm: 0.9999994590673745, iteration: 136179
loss: 1.0022281408309937,grad_norm: 0.9999997805655513, iteration: 136180
loss: 1.1424325704574585,grad_norm: 0.9999996285013678, iteration: 136181
loss: 1.0582516193389893,grad_norm: 0.9999998317677379, iteration: 136182
loss: 0.9975987672805786,grad_norm: 0.9927037142376542, iteration: 136183
loss: 0.9467867612838745,grad_norm: 0.7887628149387886, iteration: 136184
loss: 1.0230474472045898,grad_norm: 0.9999991722914684, iteration: 136185
loss: 1.0797929763793945,grad_norm: 0.9999997200914864, iteration: 136186
loss: 0.9350593686103821,grad_norm: 0.9999991388161036, iteration: 136187
loss: 0.9990339875221252,grad_norm: 0.8774760520214929, iteration: 136188
loss: 1.368124008178711,grad_norm: 0.9999997682880308, iteration: 136189
loss: 0.9829800724983215,grad_norm: 0.9937781458451453, iteration: 136190
loss: 1.1008880138397217,grad_norm: 0.9999994007034337, iteration: 136191
loss: 0.9833055734634399,grad_norm: 0.9999995707477705, iteration: 136192
loss: 1.0033894777297974,grad_norm: 0.8703491876378121, iteration: 136193
loss: 0.9721197485923767,grad_norm: 0.9182802069430536, iteration: 136194
loss: 0.9787107110023499,grad_norm: 0.801011957550853, iteration: 136195
loss: 1.055313229560852,grad_norm: 0.9054894445008473, iteration: 136196
loss: 0.9973694682121277,grad_norm: 0.9999990537246926, iteration: 136197
loss: 1.0088692903518677,grad_norm: 0.9999997002933392, iteration: 136198
loss: 1.0095531940460205,grad_norm: 0.9999994301365744, iteration: 136199
loss: 1.372138261795044,grad_norm: 0.9999994978246447, iteration: 136200
loss: 1.0055299997329712,grad_norm: 0.9034103030761279, iteration: 136201
loss: 1.3107966184616089,grad_norm: 0.9999994548309055, iteration: 136202
loss: 1.1043676137924194,grad_norm: 0.9999992654823994, iteration: 136203
loss: 1.1186591386795044,grad_norm: 0.9999992074779738, iteration: 136204
loss: 1.0390372276306152,grad_norm: 0.9999996332973601, iteration: 136205
loss: 1.0725500583648682,grad_norm: 0.9999997652045368, iteration: 136206
loss: 1.134749174118042,grad_norm: 1.0000000116949996, iteration: 136207
loss: 1.055151343345642,grad_norm: 0.9999992771953391, iteration: 136208
loss: 1.1156028509140015,grad_norm: 0.9999994397658959, iteration: 136209
loss: 1.0637331008911133,grad_norm: 0.9999997410094476, iteration: 136210
loss: 1.0571305751800537,grad_norm: 0.9999992020764054, iteration: 136211
loss: 1.0233101844787598,grad_norm: 0.999999029360931, iteration: 136212
loss: 0.9958241581916809,grad_norm: 0.8533426243118886, iteration: 136213
loss: 1.0174401998519897,grad_norm: 0.9985498259735299, iteration: 136214
loss: 1.1456509828567505,grad_norm: 0.9999999004523372, iteration: 136215
loss: 1.1741995811462402,grad_norm: 0.999999664953608, iteration: 136216
loss: 1.0358643531799316,grad_norm: 0.949805836501916, iteration: 136217
loss: 1.0576398372650146,grad_norm: 0.7588498010502308, iteration: 136218
loss: 1.0415219068527222,grad_norm: 0.8025462501698172, iteration: 136219
loss: 1.007614016532898,grad_norm: 0.9093132208044616, iteration: 136220
loss: 1.0646566152572632,grad_norm: 0.9999997487648103, iteration: 136221
loss: 1.0298283100128174,grad_norm: 0.9999992506317331, iteration: 136222
loss: 1.0172840356826782,grad_norm: 0.9999998449112935, iteration: 136223
loss: 1.1222203969955444,grad_norm: 0.9999992508667672, iteration: 136224
loss: 1.0310460329055786,grad_norm: 0.9999990395054165, iteration: 136225
loss: 1.036015510559082,grad_norm: 0.9999997833079932, iteration: 136226
loss: 1.0237289667129517,grad_norm: 0.9999991676770548, iteration: 136227
loss: 1.0754252672195435,grad_norm: 0.9999998523038611, iteration: 136228
loss: 0.9915207028388977,grad_norm: 0.9999998641348926, iteration: 136229
loss: 1.2883964776992798,grad_norm: 0.9999998372585402, iteration: 136230
loss: 0.9916678667068481,grad_norm: 0.7433087330016336, iteration: 136231
loss: 1.0645952224731445,grad_norm: 0.9999998083538545, iteration: 136232
loss: 1.3276768922805786,grad_norm: 0.9999998214751863, iteration: 136233
loss: 1.3551222085952759,grad_norm: 0.9999996750303759, iteration: 136234
loss: 1.0828591585159302,grad_norm: 0.999999386877196, iteration: 136235
loss: 1.1006700992584229,grad_norm: 0.9999991762458688, iteration: 136236
loss: 1.0494810342788696,grad_norm: 0.9999997140567111, iteration: 136237
loss: 1.161346197128296,grad_norm: 0.9999996921917538, iteration: 136238
loss: 1.207211971282959,grad_norm: 0.9999999186248346, iteration: 136239
loss: 1.1252018213272095,grad_norm: 0.9999996925237682, iteration: 136240
loss: 1.1013561487197876,grad_norm: 0.9999996793157105, iteration: 136241
loss: 1.1866074800491333,grad_norm: 0.999999747254, iteration: 136242
loss: 0.9945390820503235,grad_norm: 0.9999991108362736, iteration: 136243
loss: 1.109876275062561,grad_norm: 0.9999996345807086, iteration: 136244
loss: 1.0772091150283813,grad_norm: 0.9006649943461092, iteration: 136245
loss: 1.0946483612060547,grad_norm: 0.8878703153754105, iteration: 136246
loss: 1.027004361152649,grad_norm: 0.9999991489125104, iteration: 136247
loss: 1.0737541913986206,grad_norm: 0.9999998534309691, iteration: 136248
loss: 0.9819093346595764,grad_norm: 0.9999993057194837, iteration: 136249
loss: 0.9858086109161377,grad_norm: 0.9999990310042224, iteration: 136250
loss: 1.1972620487213135,grad_norm: 0.9999997434728939, iteration: 136251
loss: 1.1111963987350464,grad_norm: 0.9999994440620016, iteration: 136252
loss: 1.2455497980117798,grad_norm: 0.999999748216809, iteration: 136253
loss: 1.0305277109146118,grad_norm: 0.9999991432903979, iteration: 136254
loss: 0.9887508749961853,grad_norm: 0.9999997219408233, iteration: 136255
loss: 1.1085911989212036,grad_norm: 0.9999997068806675, iteration: 136256
loss: 1.056558609008789,grad_norm: 0.999999785446245, iteration: 136257
loss: 1.0484979152679443,grad_norm: 0.999999636275389, iteration: 136258
loss: 0.9955980181694031,grad_norm: 0.9999996713188517, iteration: 136259
loss: 1.1547843217849731,grad_norm: 0.9999991267666165, iteration: 136260
loss: 1.1779495477676392,grad_norm: 0.9999995870304683, iteration: 136261
loss: 1.03861665725708,grad_norm: 0.9999991078511323, iteration: 136262
loss: 1.1217938661575317,grad_norm: 0.9999991547125421, iteration: 136263
loss: 1.294853687286377,grad_norm: 0.9999994534385622, iteration: 136264
loss: 1.7163207530975342,grad_norm: 1.0000000255311425, iteration: 136265
loss: 1.126705527305603,grad_norm: 0.9999993502851126, iteration: 136266
loss: 1.086211919784546,grad_norm: 0.9126341003501457, iteration: 136267
loss: 1.2403855323791504,grad_norm: 0.999999870593571, iteration: 136268
loss: 1.1207530498504639,grad_norm: 0.9999999442871392, iteration: 136269
loss: 1.2273938655853271,grad_norm: 0.9999998102626325, iteration: 136270
loss: 0.995941162109375,grad_norm: 0.9999990848217478, iteration: 136271
loss: 1.194693922996521,grad_norm: 0.9999998494081558, iteration: 136272
loss: 1.088696002960205,grad_norm: 0.9999997970324549, iteration: 136273
loss: 1.082635521888733,grad_norm: 0.9999997881050665, iteration: 136274
loss: 1.3592209815979004,grad_norm: 0.9999998737360314, iteration: 136275
loss: 1.0981404781341553,grad_norm: 0.999999015667457, iteration: 136276
loss: 1.0766334533691406,grad_norm: 0.9999998524560115, iteration: 136277
loss: 1.0623104572296143,grad_norm: 0.9999993970553889, iteration: 136278
loss: 0.9935360550880432,grad_norm: 0.9999992255746716, iteration: 136279
loss: 1.0269999504089355,grad_norm: 0.9256767032559682, iteration: 136280
loss: 1.1792280673980713,grad_norm: 0.9999993532987775, iteration: 136281
loss: 1.450871467590332,grad_norm: 0.9999999226920789, iteration: 136282
loss: 1.1196932792663574,grad_norm: 0.9999993496768009, iteration: 136283
loss: 1.087003231048584,grad_norm: 0.9999994307081751, iteration: 136284
loss: 1.1907789707183838,grad_norm: 0.9999997614730455, iteration: 136285
loss: 1.2898063659667969,grad_norm: 0.9999998310343651, iteration: 136286
loss: 1.2553106546401978,grad_norm: 0.999999603842742, iteration: 136287
loss: 1.2899049520492554,grad_norm: 0.9999998213947461, iteration: 136288
loss: 1.1154158115386963,grad_norm: 0.999999876998785, iteration: 136289
loss: 1.191840648651123,grad_norm: 0.9999993068282986, iteration: 136290
loss: 1.0725654363632202,grad_norm: 0.999999545815886, iteration: 136291
loss: 1.2739183902740479,grad_norm: 0.9999998993346884, iteration: 136292
loss: 1.1521233320236206,grad_norm: 0.9999998072963043, iteration: 136293
loss: 1.195014238357544,grad_norm: 0.9999996080026804, iteration: 136294
loss: 1.2049657106399536,grad_norm: 0.9999997353210751, iteration: 136295
loss: 1.1690998077392578,grad_norm: 0.9999996650593852, iteration: 136296
loss: 1.061303734779358,grad_norm: 0.9999990612777181, iteration: 136297
loss: 1.0815962553024292,grad_norm: 0.9999995475644989, iteration: 136298
loss: 1.149202823638916,grad_norm: 0.999999935537754, iteration: 136299
loss: 1.0718189477920532,grad_norm: 0.9999998159436273, iteration: 136300
loss: 1.0542694330215454,grad_norm: 0.8085897288445848, iteration: 136301
loss: 1.0092535018920898,grad_norm: 0.9115811572595379, iteration: 136302
loss: 1.169854998588562,grad_norm: 0.9999997387014854, iteration: 136303
loss: 1.1942120790481567,grad_norm: 0.9999996149274811, iteration: 136304
loss: 1.022691249847412,grad_norm: 0.8168166474015335, iteration: 136305
loss: 1.1599310636520386,grad_norm: 0.9999997588928243, iteration: 136306
loss: 1.0138505697250366,grad_norm: 0.9999993679094265, iteration: 136307
loss: 1.2254362106323242,grad_norm: 0.9999999520274971, iteration: 136308
loss: 1.0441114902496338,grad_norm: 0.8862740936574036, iteration: 136309
loss: 1.0466830730438232,grad_norm: 0.9630285863639944, iteration: 136310
loss: 1.0626375675201416,grad_norm: 0.9999991176195512, iteration: 136311
loss: 1.2584601640701294,grad_norm: 0.9999997740611433, iteration: 136312
loss: 1.0166682004928589,grad_norm: 0.999999826063808, iteration: 136313
loss: 1.0683242082595825,grad_norm: 0.9999996539266016, iteration: 136314
loss: 1.0671521425247192,grad_norm: 0.9999999902054043, iteration: 136315
loss: 0.9928522706031799,grad_norm: 0.9999990747789361, iteration: 136316
loss: 1.0017518997192383,grad_norm: 0.9019624275215141, iteration: 136317
loss: 1.0730502605438232,grad_norm: 0.9999993494559495, iteration: 136318
loss: 1.0016920566558838,grad_norm: 0.9999991092552225, iteration: 136319
loss: 1.022486925125122,grad_norm: 0.9999997875551243, iteration: 136320
loss: 1.0894321203231812,grad_norm: 0.9999996830306193, iteration: 136321
loss: 1.1476292610168457,grad_norm: 0.9999995425772416, iteration: 136322
loss: 1.0531573295593262,grad_norm: 0.9999994817204987, iteration: 136323
loss: 1.0384174585342407,grad_norm: 0.8257316327068582, iteration: 136324
loss: 0.9915106296539307,grad_norm: 0.9221716029155784, iteration: 136325
loss: 1.1491564512252808,grad_norm: 0.999999543948394, iteration: 136326
loss: 1.0343046188354492,grad_norm: 0.9999992297742367, iteration: 136327
loss: 1.042884349822998,grad_norm: 0.9999994361275504, iteration: 136328
loss: 1.056716799736023,grad_norm: 0.9999998222981071, iteration: 136329
loss: 1.0497294664382935,grad_norm: 0.9999998311485216, iteration: 136330
loss: 1.1317368745803833,grad_norm: 0.999999423568188, iteration: 136331
loss: 1.0611685514450073,grad_norm: 0.8476374601861142, iteration: 136332
loss: 1.0499193668365479,grad_norm: 0.9999991434295481, iteration: 136333
loss: 1.2330105304718018,grad_norm: 0.9999993322341035, iteration: 136334
loss: 1.0396711826324463,grad_norm: 0.9999998657849949, iteration: 136335
loss: 1.1366217136383057,grad_norm: 0.9999995775824266, iteration: 136336
loss: 1.021056890487671,grad_norm: 0.9999997557269454, iteration: 136337
loss: 1.223658561706543,grad_norm: 0.9999997683553158, iteration: 136338
loss: 1.166448712348938,grad_norm: 1.000000039972719, iteration: 136339
loss: 1.0695534944534302,grad_norm: 0.999999631842775, iteration: 136340
loss: 1.2751787900924683,grad_norm: 0.9999997893744106, iteration: 136341
loss: 1.0809842348098755,grad_norm: 0.9999991342225955, iteration: 136342
loss: 1.1425601243972778,grad_norm: 0.9999995060654842, iteration: 136343
loss: 1.3170769214630127,grad_norm: 0.9999998687571413, iteration: 136344
loss: 1.043967604637146,grad_norm: 0.9999990768636938, iteration: 136345
loss: 1.1036841869354248,grad_norm: 0.9999998506456153, iteration: 136346
loss: 1.1808823347091675,grad_norm: 0.999999680396693, iteration: 136347
loss: 1.3140143156051636,grad_norm: 0.9999997085053915, iteration: 136348
loss: 1.073244571685791,grad_norm: 0.9999992916337122, iteration: 136349
loss: 1.5513972043991089,grad_norm: 1.0000000195771899, iteration: 136350
loss: 1.3745349645614624,grad_norm: 0.999999966623577, iteration: 136351
loss: 1.5179623365402222,grad_norm: 0.999999859248617, iteration: 136352
loss: 1.7316302061080933,grad_norm: 0.9999998584596073, iteration: 136353
loss: 1.0871338844299316,grad_norm: 0.999999937272809, iteration: 136354
loss: 1.3778516054153442,grad_norm: 0.9999999100019378, iteration: 136355
loss: 1.4383082389831543,grad_norm: 0.9999997480582176, iteration: 136356
loss: 1.9638359546661377,grad_norm: 1.0000000291611755, iteration: 136357
loss: 1.0883948802947998,grad_norm: 0.9999992038202944, iteration: 136358
loss: 1.234639048576355,grad_norm: 0.999999932246933, iteration: 136359
loss: 2.1667158603668213,grad_norm: 0.9999999836140208, iteration: 136360
loss: 1.4940239191055298,grad_norm: 0.9999996618167658, iteration: 136361
loss: 1.755744218826294,grad_norm: 0.9999999246513122, iteration: 136362
loss: 1.672429084777832,grad_norm: 0.9999999993465277, iteration: 136363
loss: 1.8088107109069824,grad_norm: 1.000000088791033, iteration: 136364
loss: 1.7929062843322754,grad_norm: 0.9999999562633323, iteration: 136365
loss: 1.6965540647506714,grad_norm: 0.9999997302245236, iteration: 136366
loss: 1.2401022911071777,grad_norm: 0.9999999703915521, iteration: 136367
loss: 1.6633918285369873,grad_norm: 0.999999873774638, iteration: 136368
loss: 1.3536570072174072,grad_norm: 0.9999998730829881, iteration: 136369
loss: 1.3133697509765625,grad_norm: 0.9999999654640949, iteration: 136370
loss: 1.517163872718811,grad_norm: 0.9999998576892268, iteration: 136371
loss: 1.516315221786499,grad_norm: 0.9999997499793915, iteration: 136372
loss: 1.5211199522018433,grad_norm: 0.999999857773736, iteration: 136373
loss: 1.1499782800674438,grad_norm: 0.9999997628561306, iteration: 136374
loss: 1.5524120330810547,grad_norm: 0.9999998804380584, iteration: 136375
loss: 1.0229928493499756,grad_norm: 0.9999990682329405, iteration: 136376
loss: 1.5279284715652466,grad_norm: 0.9999999864195138, iteration: 136377
loss: 1.4091382026672363,grad_norm: 0.9999998766128595, iteration: 136378
loss: 1.1055822372436523,grad_norm: 0.9999994314879826, iteration: 136379
loss: 1.1602482795715332,grad_norm: 0.9999996023278949, iteration: 136380
loss: 1.3568865060806274,grad_norm: 0.999999756477688, iteration: 136381
loss: 1.3511468172073364,grad_norm: 0.9999999001225065, iteration: 136382
loss: 1.2399075031280518,grad_norm: 0.9999999255654358, iteration: 136383
loss: 1.2608580589294434,grad_norm: 0.9999999016234504, iteration: 136384
loss: 1.5331933498382568,grad_norm: 0.9999996209707545, iteration: 136385
loss: 1.1443471908569336,grad_norm: 0.9999997628122053, iteration: 136386
loss: 1.2552634477615356,grad_norm: 0.9999997617498997, iteration: 136387
loss: 1.2930406332015991,grad_norm: 0.9999999882708689, iteration: 136388
loss: 1.1740068197250366,grad_norm: 0.9999998389426404, iteration: 136389
loss: 1.0816144943237305,grad_norm: 0.9999992643812264, iteration: 136390
loss: 1.1659302711486816,grad_norm: 0.9999998465476648, iteration: 136391
loss: 1.4069583415985107,grad_norm: 0.9999999592941315, iteration: 136392
loss: 1.2858655452728271,grad_norm: 0.9999994600498523, iteration: 136393
loss: 1.1530979871749878,grad_norm: 1.0000000108664986, iteration: 136394
loss: 1.0526167154312134,grad_norm: 0.9999998997104886, iteration: 136395
loss: 1.171624779701233,grad_norm: 0.9999997564929681, iteration: 136396
loss: 1.094517707824707,grad_norm: 0.9999998063120699, iteration: 136397
loss: 1.227866768836975,grad_norm: 0.9999990497135091, iteration: 136398
loss: 1.2098994255065918,grad_norm: 0.9999998569534136, iteration: 136399
loss: 1.1317437887191772,grad_norm: 0.9999996301492642, iteration: 136400
loss: 1.0893526077270508,grad_norm: 0.9999996998139358, iteration: 136401
loss: 1.022339940071106,grad_norm: 0.9999998036792707, iteration: 136402
loss: 1.208228349685669,grad_norm: 0.9999994851762005, iteration: 136403
loss: 1.1279865503311157,grad_norm: 0.9999994452625135, iteration: 136404
loss: 1.0800871849060059,grad_norm: 0.9999997215664167, iteration: 136405
loss: 1.2702176570892334,grad_norm: 0.9999995659977061, iteration: 136406
loss: 1.1459980010986328,grad_norm: 0.9999999651092022, iteration: 136407
loss: 1.2952460050582886,grad_norm: 0.9999998357903361, iteration: 136408
loss: 1.2613381147384644,grad_norm: 0.9999998388831536, iteration: 136409
loss: 1.5830646753311157,grad_norm: 0.9999999714843186, iteration: 136410
loss: 1.207904577255249,grad_norm: 0.9999995844356361, iteration: 136411
loss: 1.3498929738998413,grad_norm: 0.9999996809207815, iteration: 136412
loss: 1.1526378393173218,grad_norm: 1.0000000334116161, iteration: 136413
loss: 1.1471456289291382,grad_norm: 0.9999994320640986, iteration: 136414
loss: 1.3917737007141113,grad_norm: 0.9999997684234885, iteration: 136415
loss: 1.3025068044662476,grad_norm: 0.999999227916878, iteration: 136416
loss: 1.3429863452911377,grad_norm: 0.9999998569926568, iteration: 136417
loss: 1.3045202493667603,grad_norm: 0.9999994413702719, iteration: 136418
loss: 1.0343559980392456,grad_norm: 0.9310850837823652, iteration: 136419
loss: 1.164089560508728,grad_norm: 0.9999997804101363, iteration: 136420
loss: 1.0834215879440308,grad_norm: 0.9999997874904664, iteration: 136421
loss: 1.3153572082519531,grad_norm: 0.999999526649807, iteration: 136422
loss: 1.1361422538757324,grad_norm: 0.9999996836850552, iteration: 136423
loss: 1.290898323059082,grad_norm: 1.0000000698074167, iteration: 136424
loss: 1.0999006032943726,grad_norm: 0.9999995638701973, iteration: 136425
loss: 1.1240854263305664,grad_norm: 0.9999996255343084, iteration: 136426
loss: 1.2234106063842773,grad_norm: 0.9999999220630157, iteration: 136427
loss: 1.1433961391448975,grad_norm: 0.9999999274517793, iteration: 136428
loss: 1.1180790662765503,grad_norm: 0.9999991538763143, iteration: 136429
loss: 1.1537436246871948,grad_norm: 0.9999998219582663, iteration: 136430
loss: 1.1946591138839722,grad_norm: 0.9999998977561043, iteration: 136431
loss: 1.3753279447555542,grad_norm: 0.9999999165630893, iteration: 136432
loss: 1.1534371376037598,grad_norm: 0.999999942350777, iteration: 136433
loss: 1.2164794206619263,grad_norm: 0.999999951942527, iteration: 136434
loss: 1.3104267120361328,grad_norm: 0.9999994224408233, iteration: 136435
loss: 1.324331521987915,grad_norm: 0.9999998227096499, iteration: 136436
loss: 1.307605504989624,grad_norm: 0.9999997118785696, iteration: 136437
loss: 1.2645518779754639,grad_norm: 0.9999996581919677, iteration: 136438
loss: 1.4281401634216309,grad_norm: 0.9999996935404123, iteration: 136439
loss: 1.3582773208618164,grad_norm: 0.9999998770198725, iteration: 136440
loss: 1.5876845121383667,grad_norm: 1.0000000723698366, iteration: 136441
loss: 1.2006100416183472,grad_norm: 0.9999996937047791, iteration: 136442
loss: 1.3346256017684937,grad_norm: 0.9999999876518593, iteration: 136443
loss: 1.2409424781799316,grad_norm: 0.9999997248957584, iteration: 136444
loss: 1.4833719730377197,grad_norm: 0.9999998933228912, iteration: 136445
loss: 1.548871397972107,grad_norm: 1.000000049896504, iteration: 136446
loss: 1.4347420930862427,grad_norm: 0.9999997827697317, iteration: 136447
loss: 1.8663396835327148,grad_norm: 0.9999998694074055, iteration: 136448
loss: 1.3212347030639648,grad_norm: 0.9999998438296511, iteration: 136449
loss: 1.6889632940292358,grad_norm: 0.9999998948899921, iteration: 136450
loss: 1.2767395973205566,grad_norm: 0.9999996601225222, iteration: 136451
loss: 1.5466070175170898,grad_norm: 0.9999999509850118, iteration: 136452
loss: 1.796982765197754,grad_norm: 0.9999998529867064, iteration: 136453
loss: 1.3048065900802612,grad_norm: 0.9999997906906097, iteration: 136454
loss: 1.5197125673294067,grad_norm: 0.9999999168210014, iteration: 136455
loss: 1.5120905637741089,grad_norm: 0.9999999407561774, iteration: 136456
loss: 1.2409930229187012,grad_norm: 0.9999995036915227, iteration: 136457
loss: 1.73545241355896,grad_norm: 1.0000000076105082, iteration: 136458
loss: 1.610310435295105,grad_norm: 0.9999997418243224, iteration: 136459
loss: 1.7911204099655151,grad_norm: 0.9999999910518862, iteration: 136460
loss: 1.6050409078598022,grad_norm: 0.9999996639228745, iteration: 136461
loss: 1.6590394973754883,grad_norm: 0.999999941241315, iteration: 136462
loss: 1.5628398656845093,grad_norm: 0.9999999453298043, iteration: 136463
loss: 1.3550282716751099,grad_norm: 0.9999998870872789, iteration: 136464
loss: 1.2818554639816284,grad_norm: 0.9999999526432185, iteration: 136465
loss: 1.3051726818084717,grad_norm: 0.9999999133126709, iteration: 136466
loss: 1.2016825675964355,grad_norm: 0.999999359736027, iteration: 136467
loss: 1.4856983423233032,grad_norm: 0.9999998009316142, iteration: 136468
loss: 1.339463233947754,grad_norm: 0.9999998781378607, iteration: 136469
loss: 1.4297975301742554,grad_norm: 0.9999998214941027, iteration: 136470
loss: 1.4517784118652344,grad_norm: 0.9999997221285731, iteration: 136471
loss: 1.3523781299591064,grad_norm: 0.9999996917328707, iteration: 136472
loss: 1.2780852317810059,grad_norm: 0.9999995150187767, iteration: 136473
loss: 1.4185733795166016,grad_norm: 0.9999997414013968, iteration: 136474
loss: 1.4901363849639893,grad_norm: 0.9999997824089686, iteration: 136475
loss: 1.3274807929992676,grad_norm: 0.9999996444178689, iteration: 136476
loss: 1.199674367904663,grad_norm: 0.9999995778006986, iteration: 136477
loss: 1.1975985765457153,grad_norm: 0.9999995024081332, iteration: 136478
loss: 1.3931559324264526,grad_norm: 0.9999999424335423, iteration: 136479
loss: 1.393958568572998,grad_norm: 0.999999798883083, iteration: 136480
loss: 1.5417039394378662,grad_norm: 0.9999997475249428, iteration: 136481
loss: 1.2934328317642212,grad_norm: 0.999999965993395, iteration: 136482
loss: 1.73512601852417,grad_norm: 0.9999997250084021, iteration: 136483
loss: 1.4543551206588745,grad_norm: 0.9999997866711652, iteration: 136484
loss: 1.30331552028656,grad_norm: 0.9999994471189735, iteration: 136485
loss: 1.36503005027771,grad_norm: 0.9999995629293306, iteration: 136486
loss: 1.3625428676605225,grad_norm: 0.9999997034664205, iteration: 136487
loss: 1.2218965291976929,grad_norm: 0.99999946853669, iteration: 136488
loss: 1.2539217472076416,grad_norm: 0.9999997160099584, iteration: 136489
loss: 1.2562206983566284,grad_norm: 0.9999999084925576, iteration: 136490
loss: 1.4529094696044922,grad_norm: 0.9999996475918549, iteration: 136491
loss: 1.4418443441390991,grad_norm: 0.9999996805276116, iteration: 136492
loss: 1.3317837715148926,grad_norm: 0.9999997065541016, iteration: 136493
loss: 1.1990083456039429,grad_norm: 0.9999996589717633, iteration: 136494
loss: 1.4505656957626343,grad_norm: 0.9999998777170783, iteration: 136495
loss: 1.215236783027649,grad_norm: 0.9999997146286652, iteration: 136496
loss: 1.348484992980957,grad_norm: 0.9999998838732991, iteration: 136497
loss: 1.5884599685668945,grad_norm: 0.9999998898571035, iteration: 136498
loss: 1.5708132982254028,grad_norm: 0.9999999499437763, iteration: 136499
loss: 1.461096167564392,grad_norm: 0.9999997597665722, iteration: 136500
loss: 1.2868332862854004,grad_norm: 0.999999922459317, iteration: 136501
loss: 1.2989170551300049,grad_norm: 0.9999996455548437, iteration: 136502
loss: 1.492769479751587,grad_norm: 1.000000033442524, iteration: 136503
loss: 1.208088994026184,grad_norm: 0.9999998031680568, iteration: 136504
loss: 1.3447812795639038,grad_norm: 0.9999999130352418, iteration: 136505
loss: 1.27299964427948,grad_norm: 0.9999995161483131, iteration: 136506
loss: 1.3714728355407715,grad_norm: 1.0000000382727137, iteration: 136507
loss: 1.676214337348938,grad_norm: 0.9999999673371734, iteration: 136508
loss: 1.3592960834503174,grad_norm: 0.9999999238792019, iteration: 136509
loss: 1.5897555351257324,grad_norm: 0.9999999295003187, iteration: 136510
loss: 1.310593843460083,grad_norm: 0.9999999062557615, iteration: 136511
loss: 1.4959461688995361,grad_norm: 0.99999972447437, iteration: 136512
loss: 1.264374017715454,grad_norm: 0.9999995719962823, iteration: 136513
loss: 1.5402872562408447,grad_norm: 1.0000000377557252, iteration: 136514
loss: 1.2504253387451172,grad_norm: 0.9999996581257781, iteration: 136515
loss: 1.246791124343872,grad_norm: 0.9999997839804805, iteration: 136516
loss: 1.4355249404907227,grad_norm: 0.9999998540799778, iteration: 136517
loss: 1.2435482740402222,grad_norm: 0.9999999447257539, iteration: 136518
loss: 1.3888847827911377,grad_norm: 0.9999999884729939, iteration: 136519
loss: 1.1938756704330444,grad_norm: 0.9999994037260985, iteration: 136520
loss: 1.1890857219696045,grad_norm: 0.9999999025507499, iteration: 136521
loss: 1.6175974607467651,grad_norm: 0.9999998391785235, iteration: 136522
loss: 1.3441461324691772,grad_norm: 0.9999996792002014, iteration: 136523
loss: 1.2488911151885986,grad_norm: 0.9999997483370874, iteration: 136524
loss: 1.598279595375061,grad_norm: 0.9999997245852588, iteration: 136525
loss: 1.1281734704971313,grad_norm: 0.9999992875810523, iteration: 136526
loss: 1.3542747497558594,grad_norm: 0.9999998250905873, iteration: 136527
loss: 1.5545543432235718,grad_norm: 0.9999999809596094, iteration: 136528
loss: 1.2818119525909424,grad_norm: 0.9999996491264175, iteration: 136529
loss: 1.360414981842041,grad_norm: 0.9999997532679132, iteration: 136530
loss: 1.5270204544067383,grad_norm: 0.9999997594542984, iteration: 136531
loss: 1.3977198600769043,grad_norm: 0.9999998238828971, iteration: 136532
loss: 1.5507597923278809,grad_norm: 0.9999999174115023, iteration: 136533
loss: 1.528601050376892,grad_norm: 0.9999996508115536, iteration: 136534
loss: 1.4505091905593872,grad_norm: 0.9999994463024363, iteration: 136535
loss: 1.2644225358963013,grad_norm: 0.9999993489023806, iteration: 136536
loss: 1.270156979560852,grad_norm: 0.9999995732063242, iteration: 136537
loss: 1.361771821975708,grad_norm: 0.9999997150251734, iteration: 136538
loss: 1.192941427230835,grad_norm: 0.999999855824488, iteration: 136539
loss: 1.4474260807037354,grad_norm: 0.999999625009117, iteration: 136540
loss: 1.4864739179611206,grad_norm: 0.9999999190777782, iteration: 136541
loss: 1.4651051759719849,grad_norm: 0.9999996054739765, iteration: 136542
loss: 1.4078185558319092,grad_norm: 0.9999996259789103, iteration: 136543
loss: 1.223172664642334,grad_norm: 0.9999999158654469, iteration: 136544
loss: 1.2281746864318848,grad_norm: 0.9999992661152356, iteration: 136545
loss: 1.354658603668213,grad_norm: 0.9999998129190758, iteration: 136546
loss: 1.291799545288086,grad_norm: 0.9999997946537018, iteration: 136547
loss: 1.3380569219589233,grad_norm: 1.0000000280218158, iteration: 136548
loss: 1.2346744537353516,grad_norm: 0.9999999557665631, iteration: 136549
loss: 1.3230044841766357,grad_norm: 0.9999999368536765, iteration: 136550
loss: 1.5431731939315796,grad_norm: 0.9999998823224107, iteration: 136551
loss: 1.2216849327087402,grad_norm: 0.9999997498661612, iteration: 136552
loss: 1.220134973526001,grad_norm: 0.9999998939683116, iteration: 136553
loss: 1.1731303930282593,grad_norm: 0.9999995618857298, iteration: 136554
loss: 1.2956593036651611,grad_norm: 0.999999816645634, iteration: 136555
loss: 1.3677818775177002,grad_norm: 0.9999998026749195, iteration: 136556
loss: 1.1179121732711792,grad_norm: 0.99999992738735, iteration: 136557
loss: 1.3325742483139038,grad_norm: 0.9999995797803151, iteration: 136558
loss: 1.1478004455566406,grad_norm: 0.9999995507242988, iteration: 136559
loss: 1.0862089395523071,grad_norm: 0.9999993572807888, iteration: 136560
loss: 1.4422006607055664,grad_norm: 0.9999999363801763, iteration: 136561
loss: 1.191243290901184,grad_norm: 0.9999996612785726, iteration: 136562
loss: 1.1389851570129395,grad_norm: 0.9616754686325271, iteration: 136563
loss: 1.2808176279067993,grad_norm: 0.9999998313026857, iteration: 136564
loss: 1.1316874027252197,grad_norm: 0.9999995980577963, iteration: 136565
loss: 1.2470455169677734,grad_norm: 0.999999510026873, iteration: 136566
loss: 1.1812423467636108,grad_norm: 0.9999998598858211, iteration: 136567
loss: 1.187227725982666,grad_norm: 0.9999993630300047, iteration: 136568
loss: 1.1925910711288452,grad_norm: 0.9999993567022915, iteration: 136569
loss: 1.179307460784912,grad_norm: 0.9999994893546588, iteration: 136570
loss: 1.193972110748291,grad_norm: 0.9999996307468758, iteration: 136571
loss: 1.1959145069122314,grad_norm: 0.9999995528100402, iteration: 136572
loss: 1.1861447095870972,grad_norm: 0.9999994721986077, iteration: 136573
loss: 1.230437994003296,grad_norm: 0.9999997720780747, iteration: 136574
loss: 1.2286508083343506,grad_norm: 0.9999991427649054, iteration: 136575
loss: 1.985409140586853,grad_norm: 0.9999999449998844, iteration: 136576
loss: 1.0934081077575684,grad_norm: 0.9356427654623511, iteration: 136577
loss: 1.0195603370666504,grad_norm: 0.9999999193449418, iteration: 136578
loss: 1.1830165386199951,grad_norm: 0.9999997896880042, iteration: 136579
loss: 1.1534098386764526,grad_norm: 0.999999859629361, iteration: 136580
loss: 1.6063958406448364,grad_norm: 0.9999999435430875, iteration: 136581
loss: 1.173311710357666,grad_norm: 0.9999998436116163, iteration: 136582
loss: 1.185517430305481,grad_norm: 1.0000000272443579, iteration: 136583
loss: 1.1025222539901733,grad_norm: 0.9999995208740741, iteration: 136584
loss: 1.1583656072616577,grad_norm: 0.9999993206829151, iteration: 136585
loss: 1.1050766706466675,grad_norm: 0.9999993737546343, iteration: 136586
loss: 1.1414412260055542,grad_norm: 0.9999998191495445, iteration: 136587
loss: 0.9916148781776428,grad_norm: 0.9999999514324357, iteration: 136588
loss: 1.020226240158081,grad_norm: 0.9999994718784398, iteration: 136589
loss: 1.0826938152313232,grad_norm: 0.9999994950481793, iteration: 136590
loss: 1.040428638458252,grad_norm: 0.9999998959130347, iteration: 136591
loss: 1.0602123737335205,grad_norm: 0.9999995141993469, iteration: 136592
loss: 0.9584259986877441,grad_norm: 1.0000000049440503, iteration: 136593
loss: 1.1851511001586914,grad_norm: 0.9999996379632176, iteration: 136594
loss: 1.1253215074539185,grad_norm: 0.9999998379444103, iteration: 136595
loss: 1.123318076133728,grad_norm: 0.9999999197074516, iteration: 136596
loss: 1.0216258764266968,grad_norm: 0.9999994425066654, iteration: 136597
loss: 1.1330004930496216,grad_norm: 0.9999999538815227, iteration: 136598
loss: 1.0723278522491455,grad_norm: 1.000000044882584, iteration: 136599
loss: 1.2335418462753296,grad_norm: 0.9999997829459506, iteration: 136600
loss: 1.132962703704834,grad_norm: 0.9999996452827562, iteration: 136601
loss: 1.1611065864562988,grad_norm: 0.9999998892502172, iteration: 136602
loss: 1.154078722000122,grad_norm: 0.9999994154861522, iteration: 136603
loss: 1.1471936702728271,grad_norm: 0.9999997843140295, iteration: 136604
loss: 0.984630286693573,grad_norm: 0.9999991663043538, iteration: 136605
loss: 1.1019357442855835,grad_norm: 0.9999998715469461, iteration: 136606
loss: 1.1810871362686157,grad_norm: 0.9999998568968601, iteration: 136607
loss: 1.1151689291000366,grad_norm: 0.9999999400771051, iteration: 136608
loss: 1.1808905601501465,grad_norm: 0.9999997614691553, iteration: 136609
loss: 1.120263934135437,grad_norm: 1.0000000240586127, iteration: 136610
loss: 1.4561322927474976,grad_norm: 0.9999999439772683, iteration: 136611
loss: 1.0897877216339111,grad_norm: 0.9999992089804522, iteration: 136612
loss: 1.4132639169692993,grad_norm: 1.0000000256472397, iteration: 136613
loss: 1.067384958267212,grad_norm: 0.9999992058563226, iteration: 136614
loss: 1.1053205728530884,grad_norm: 0.9999998240422565, iteration: 136615
loss: 1.0275771617889404,grad_norm: 0.9999994921633637, iteration: 136616
loss: 1.0214033126831055,grad_norm: 0.9999991463707785, iteration: 136617
loss: 1.0720140933990479,grad_norm: 0.999999736337598, iteration: 136618
loss: 1.072665810585022,grad_norm: 1.000000071903423, iteration: 136619
loss: 1.0877017974853516,grad_norm: 0.9999998929609244, iteration: 136620
loss: 1.0849430561065674,grad_norm: 0.8607364097167381, iteration: 136621
loss: 1.1296671628952026,grad_norm: 0.9999999193588386, iteration: 136622
loss: 1.1432994604110718,grad_norm: 0.9999999238134736, iteration: 136623
loss: 1.2326337099075317,grad_norm: 0.9999997545705699, iteration: 136624
loss: 1.1494158506393433,grad_norm: 0.999999825748292, iteration: 136625
loss: 1.1059459447860718,grad_norm: 0.99999959266784, iteration: 136626
loss: 1.1120171546936035,grad_norm: 0.9999998971953825, iteration: 136627
loss: 1.1552497148513794,grad_norm: 0.9999995803917759, iteration: 136628
loss: 1.147295355796814,grad_norm: 0.9999996845530403, iteration: 136629
loss: 1.1854419708251953,grad_norm: 0.9999999607098079, iteration: 136630
loss: 1.0078048706054688,grad_norm: 0.9999999964047767, iteration: 136631
loss: 1.0589405298233032,grad_norm: 0.9999999369236513, iteration: 136632
loss: 1.0231132507324219,grad_norm: 1.0000000275793341, iteration: 136633
loss: 1.187374472618103,grad_norm: 0.999999588393426, iteration: 136634
loss: 1.0711113214492798,grad_norm: 0.9999997536433415, iteration: 136635
loss: 1.1745697259902954,grad_norm: 0.9999999336141362, iteration: 136636
loss: 1.1158525943756104,grad_norm: 1.0000000182648656, iteration: 136637
loss: 1.0420972108840942,grad_norm: 0.9999994838245932, iteration: 136638
loss: 1.0251333713531494,grad_norm: 1.0000000679415133, iteration: 136639
loss: 1.101513147354126,grad_norm: 0.9999998495747753, iteration: 136640
loss: 1.0546369552612305,grad_norm: 0.9999992137817367, iteration: 136641
loss: 1.180472493171692,grad_norm: 1.000000001008993, iteration: 136642
loss: 1.056321620941162,grad_norm: 0.9999996806938007, iteration: 136643
loss: 1.0894337892532349,grad_norm: 0.9999994476144457, iteration: 136644
loss: 1.0579057931900024,grad_norm: 0.9999999539465558, iteration: 136645
loss: 1.4681035280227661,grad_norm: 1.000000087661439, iteration: 136646
loss: 1.0573257207870483,grad_norm: 0.9999995751928531, iteration: 136647
loss: 1.0879464149475098,grad_norm: 0.9999992216099004, iteration: 136648
loss: 1.1296358108520508,grad_norm: 0.9999999182703014, iteration: 136649
loss: 1.3610470294952393,grad_norm: 0.9999998742272591, iteration: 136650
loss: 1.064987301826477,grad_norm: 0.9999995717139509, iteration: 136651
loss: 1.034293293952942,grad_norm: 0.9999993301041081, iteration: 136652
loss: 1.011928677558899,grad_norm: 0.7953167197845701, iteration: 136653
loss: 1.0049818754196167,grad_norm: 0.999999697743723, iteration: 136654
loss: 1.4930745363235474,grad_norm: 0.9999999505562991, iteration: 136655
loss: 1.0018280744552612,grad_norm: 0.7502212397765105, iteration: 136656
loss: 1.480217695236206,grad_norm: 0.9999997557693551, iteration: 136657
loss: 0.9955518841743469,grad_norm: 0.999999453988374, iteration: 136658
loss: 1.1004619598388672,grad_norm: 0.9999999248625762, iteration: 136659
loss: 1.0115374326705933,grad_norm: 0.9714218227823367, iteration: 136660
loss: 1.0769548416137695,grad_norm: 0.9999994301072025, iteration: 136661
loss: 1.044730544090271,grad_norm: 0.9999994574104004, iteration: 136662
loss: 1.100917100906372,grad_norm: 0.999999896258572, iteration: 136663
loss: 1.408897042274475,grad_norm: 0.9999999864159195, iteration: 136664
loss: 1.0741208791732788,grad_norm: 0.9999998332614048, iteration: 136665
loss: 1.0510754585266113,grad_norm: 0.7603384276145928, iteration: 136666
loss: 1.1010066270828247,grad_norm: 0.9999999528409469, iteration: 136667
loss: 1.0119926929473877,grad_norm: 0.9999992420909677, iteration: 136668
loss: 1.0347611904144287,grad_norm: 0.9948788573810353, iteration: 136669
loss: 1.3038657903671265,grad_norm: 0.9999999179435526, iteration: 136670
loss: 1.0952056646347046,grad_norm: 0.999999685001018, iteration: 136671
loss: 1.0652250051498413,grad_norm: 0.9999991641887007, iteration: 136672
loss: 0.9618260264396667,grad_norm: 0.9175043561526529, iteration: 136673
loss: 1.1690706014633179,grad_norm: 0.9999996222228724, iteration: 136674
loss: 1.0367010831832886,grad_norm: 0.9999999369118063, iteration: 136675
loss: 1.0426783561706543,grad_norm: 0.9999994794003686, iteration: 136676
loss: 1.0538911819458008,grad_norm: 0.999999944200015, iteration: 136677
loss: 1.0326402187347412,grad_norm: 0.9999990249163573, iteration: 136678
loss: 1.1242088079452515,grad_norm: 0.9999995655446675, iteration: 136679
loss: 1.1046737432479858,grad_norm: 0.9999994894592001, iteration: 136680
loss: 0.9798550605773926,grad_norm: 0.9999991297866684, iteration: 136681
loss: 1.1565989255905151,grad_norm: 0.9999999534124544, iteration: 136682
loss: 1.1064215898513794,grad_norm: 0.9999995074409431, iteration: 136683
loss: 1.0807511806488037,grad_norm: 0.9999993117924852, iteration: 136684
loss: 1.1182273626327515,grad_norm: 0.999999212198772, iteration: 136685
loss: 1.0357805490493774,grad_norm: 0.9260818227967745, iteration: 136686
loss: 1.0405703783035278,grad_norm: 0.9999996304927022, iteration: 136687
loss: 1.0061644315719604,grad_norm: 0.8774601127162786, iteration: 136688
loss: 1.1041914224624634,grad_norm: 0.9999996300470566, iteration: 136689
loss: 1.1013940572738647,grad_norm: 0.9999997537842308, iteration: 136690
loss: 1.1052309274673462,grad_norm: 0.9999990662922632, iteration: 136691
loss: 1.0659782886505127,grad_norm: 0.999999902321347, iteration: 136692
loss: 1.0018311738967896,grad_norm: 0.8330331635979034, iteration: 136693
loss: 1.040524959564209,grad_norm: 0.9291909587590084, iteration: 136694
loss: 1.1136125326156616,grad_norm: 0.9999995668881033, iteration: 136695
loss: 1.0496584177017212,grad_norm: 0.9999999104834865, iteration: 136696
loss: 1.1725188493728638,grad_norm: 0.9999998212584974, iteration: 136697
loss: 1.0351520776748657,grad_norm: 0.7545064578236157, iteration: 136698
loss: 0.9642164707183838,grad_norm: 0.8893373554050381, iteration: 136699
loss: 1.041701316833496,grad_norm: 0.9999994266672023, iteration: 136700
loss: 1.3287984132766724,grad_norm: 0.9999998618311315, iteration: 136701
loss: 1.0668060779571533,grad_norm: 0.9999990838950699, iteration: 136702
loss: 1.0144639015197754,grad_norm: 0.9999992535904552, iteration: 136703
loss: 0.9972427487373352,grad_norm: 0.9999999158729302, iteration: 136704
loss: 1.1264348030090332,grad_norm: 0.9999991811211194, iteration: 136705
loss: 1.0510001182556152,grad_norm: 0.9999996738545601, iteration: 136706
loss: 1.030789852142334,grad_norm: 0.9999991646912232, iteration: 136707
loss: 1.063915729522705,grad_norm: 0.9999994070086615, iteration: 136708
loss: 1.0340261459350586,grad_norm: 0.909473618577871, iteration: 136709
loss: 1.00404953956604,grad_norm: 0.9999990878673507, iteration: 136710
loss: 1.151585340499878,grad_norm: 0.9999992098927953, iteration: 136711
loss: 0.9605067372322083,grad_norm: 0.7830050020345946, iteration: 136712
loss: 1.1840095520019531,grad_norm: 0.9999999131296752, iteration: 136713
loss: 1.0249905586242676,grad_norm: 0.9999990519575092, iteration: 136714
loss: 1.0711777210235596,grad_norm: 0.9999999975271971, iteration: 136715
loss: 0.9903974533081055,grad_norm: 0.8091970071757953, iteration: 136716
loss: 1.0481818914413452,grad_norm: 0.9999992998025489, iteration: 136717
loss: 1.0257524251937866,grad_norm: 0.9999995756727093, iteration: 136718
loss: 1.0734602212905884,grad_norm: 0.9999997413760899, iteration: 136719
loss: 1.1130414009094238,grad_norm: 0.9999998119810913, iteration: 136720
loss: 1.149381399154663,grad_norm: 0.999999393360576, iteration: 136721
loss: 1.03383207321167,grad_norm: 0.9999997394025893, iteration: 136722
loss: 1.2355670928955078,grad_norm: 0.9999999228948425, iteration: 136723
loss: 1.0175286531448364,grad_norm: 0.9238909600745343, iteration: 136724
loss: 1.0963759422302246,grad_norm: 0.9999998698692618, iteration: 136725
loss: 1.0006967782974243,grad_norm: 0.9999997005788419, iteration: 136726
loss: 1.2659893035888672,grad_norm: 0.9999997404945055, iteration: 136727
loss: 1.1039637327194214,grad_norm: 0.999999444606727, iteration: 136728
loss: 1.0060203075408936,grad_norm: 0.9999990567688349, iteration: 136729
loss: 1.0453120470046997,grad_norm: 0.9391759045904026, iteration: 136730
loss: 1.0129553079605103,grad_norm: 0.896564034074554, iteration: 136731
loss: 1.0478308200836182,grad_norm: 0.9999994566695407, iteration: 136732
loss: 1.0574958324432373,grad_norm: 0.9999994984158224, iteration: 136733
loss: 1.024848222732544,grad_norm: 0.9999992285120403, iteration: 136734
loss: 0.9873924851417542,grad_norm: 0.9999995856552729, iteration: 136735
loss: 1.1116578578948975,grad_norm: 0.9999996266470212, iteration: 136736
loss: 1.1187328100204468,grad_norm: 0.9999998801442852, iteration: 136737
loss: 1.0850210189819336,grad_norm: 0.9999992515944929, iteration: 136738
loss: 1.070618748664856,grad_norm: 0.999999952670663, iteration: 136739
loss: 1.0483781099319458,grad_norm: 0.9999995082786579, iteration: 136740
loss: 1.1308979988098145,grad_norm: 0.9999998960245412, iteration: 136741
loss: 1.040439486503601,grad_norm: 0.9999999019310922, iteration: 136742
loss: 1.0212661027908325,grad_norm: 0.949071947830518, iteration: 136743
loss: 1.0750319957733154,grad_norm: 0.999999681949874, iteration: 136744
loss: 1.0341891050338745,grad_norm: 0.999999544293491, iteration: 136745
loss: 1.0220820903778076,grad_norm: 0.8890652261801008, iteration: 136746
loss: 1.0241584777832031,grad_norm: 0.9999993420373717, iteration: 136747
loss: 1.1124029159545898,grad_norm: 0.9999995555213402, iteration: 136748
loss: 1.0439122915267944,grad_norm: 0.999999128803307, iteration: 136749
loss: 1.0075774192810059,grad_norm: 0.9999998984238814, iteration: 136750
loss: 1.1607283353805542,grad_norm: 0.9999999540972325, iteration: 136751
loss: 1.1358414888381958,grad_norm: 0.999999715530887, iteration: 136752
loss: 1.0373555421829224,grad_norm: 0.9999994867361076, iteration: 136753
loss: 1.0638970136642456,grad_norm: 0.9999999121153547, iteration: 136754
loss: 1.026084065437317,grad_norm: 0.99999918990462, iteration: 136755
loss: 1.5000265836715698,grad_norm: 0.9999999237434019, iteration: 136756
loss: 1.4801512956619263,grad_norm: 1.0000000653149974, iteration: 136757
loss: 1.4309419393539429,grad_norm: 0.9999999493978212, iteration: 136758
loss: 1.2688190937042236,grad_norm: 1.0000000202236958, iteration: 136759
loss: 1.0344966650009155,grad_norm: 0.9999999672253614, iteration: 136760
loss: 1.0602052211761475,grad_norm: 0.9999995743833653, iteration: 136761
loss: 1.2921960353851318,grad_norm: 0.9999997418603271, iteration: 136762
loss: 1.3146047592163086,grad_norm: 0.9999998650541039, iteration: 136763
loss: 1.026425838470459,grad_norm: 0.9999998176070789, iteration: 136764
loss: 1.1468102931976318,grad_norm: 0.9999995875611296, iteration: 136765
loss: 1.2039567232131958,grad_norm: 0.9999998755935113, iteration: 136766
loss: 1.577354907989502,grad_norm: 1.0000000077225177, iteration: 136767
loss: 1.0110372304916382,grad_norm: 0.9999998301865104, iteration: 136768
loss: 1.4313023090362549,grad_norm: 0.9999996888668602, iteration: 136769
loss: 1.8862882852554321,grad_norm: 1.0000000851705426, iteration: 136770
loss: 1.2194418907165527,grad_norm: 0.9999998105394623, iteration: 136771
loss: 1.0032931566238403,grad_norm: 0.9999995920442033, iteration: 136772
loss: 1.0880192518234253,grad_norm: 0.9999996939844359, iteration: 136773
loss: 1.0419914722442627,grad_norm: 0.9999999073399817, iteration: 136774
loss: 1.386055588722229,grad_norm: 0.9999999086582676, iteration: 136775
loss: 1.536023497581482,grad_norm: 0.999999945407789, iteration: 136776
loss: 1.555989146232605,grad_norm: 0.9999999280127183, iteration: 136777
loss: 1.276471734046936,grad_norm: 0.9999998476034768, iteration: 136778
loss: 1.4063950777053833,grad_norm: 0.9999998328066992, iteration: 136779
loss: 1.2059217691421509,grad_norm: 0.9999997339404315, iteration: 136780
loss: 1.2798227071762085,grad_norm: 0.9999998893542593, iteration: 136781
loss: 1.47807776927948,grad_norm: 0.9999996702349225, iteration: 136782
loss: 1.5025534629821777,grad_norm: 0.9999999453330536, iteration: 136783
loss: 1.2218009233474731,grad_norm: 0.999999930131505, iteration: 136784
loss: 1.3664454221725464,grad_norm: 0.9999999643526924, iteration: 136785
loss: 1.376511812210083,grad_norm: 1.0000000092250791, iteration: 136786
loss: 1.6541141271591187,grad_norm: 1.000000010473487, iteration: 136787
loss: 1.428486704826355,grad_norm: 1.0000000110542024, iteration: 136788
loss: 1.3321589231491089,grad_norm: 0.9999998854054598, iteration: 136789
loss: 1.4539881944656372,grad_norm: 1.00000000111366, iteration: 136790
loss: 1.40223228931427,grad_norm: 1.000000065028679, iteration: 136791
loss: 1.1950163841247559,grad_norm: 0.9999998431029782, iteration: 136792
loss: 1.5425115823745728,grad_norm: 0.9999999179413425, iteration: 136793
loss: 1.505853295326233,grad_norm: 0.999999948764824, iteration: 136794
loss: 1.6308863162994385,grad_norm: 0.9999999978863493, iteration: 136795
loss: 1.6027252674102783,grad_norm: 1.0000000538884612, iteration: 136796
loss: 1.3880821466445923,grad_norm: 0.9999999632253623, iteration: 136797
loss: 1.419981837272644,grad_norm: 0.9999999087742213, iteration: 136798
loss: 1.6635364294052124,grad_norm: 0.9999999640494907, iteration: 136799
loss: 1.625448226928711,grad_norm: 0.9999999026532551, iteration: 136800
loss: 1.4275918006896973,grad_norm: 0.999999947628353, iteration: 136801
loss: 1.7576096057891846,grad_norm: 0.9999999513582094, iteration: 136802
loss: 1.45449960231781,grad_norm: 0.9999998387122551, iteration: 136803
loss: 1.5500712394714355,grad_norm: 0.9999999981040743, iteration: 136804
loss: 1.610640048980713,grad_norm: 0.9999999833104792, iteration: 136805
loss: 1.3947041034698486,grad_norm: 0.9999998915551193, iteration: 136806
loss: 1.3408565521240234,grad_norm: 0.9999999241454929, iteration: 136807
loss: 1.4164278507232666,grad_norm: 1.0000000585516546, iteration: 136808
loss: 1.362970232963562,grad_norm: 0.9999998271141716, iteration: 136809
loss: 1.5871249437332153,grad_norm: 1.000000049296865, iteration: 136810
loss: 1.2709510326385498,grad_norm: 0.999999851866155, iteration: 136811
loss: 1.2334712743759155,grad_norm: 0.999999922539167, iteration: 136812
loss: 1.325936198234558,grad_norm: 1.0000000632637156, iteration: 136813
loss: 1.567238211631775,grad_norm: 0.9999998572579673, iteration: 136814
loss: 1.1880136728286743,grad_norm: 0.999999697595445, iteration: 136815
loss: 1.1152280569076538,grad_norm: 0.9999992726686289, iteration: 136816
loss: 1.1086459159851074,grad_norm: 0.9999999557034673, iteration: 136817
loss: 1.0445895195007324,grad_norm: 0.9999995599895058, iteration: 136818
loss: 1.1365145444869995,grad_norm: 1.0000000547465293, iteration: 136819
loss: 1.1236263513565063,grad_norm: 0.9999998218476017, iteration: 136820
loss: 1.185001015663147,grad_norm: 0.9999999140661148, iteration: 136821
loss: 1.1222758293151855,grad_norm: 0.9999994994489416, iteration: 136822
loss: 1.2736479043960571,grad_norm: 0.9999998516567621, iteration: 136823
loss: 1.3257715702056885,grad_norm: 0.999999796344269, iteration: 136824
loss: 1.3963019847869873,grad_norm: 0.9999998389769359, iteration: 136825
loss: 1.2829458713531494,grad_norm: 1.0000000096142672, iteration: 136826
loss: 1.14727783203125,grad_norm: 0.9999997787675957, iteration: 136827
loss: 1.1525588035583496,grad_norm: 0.9999996148119848, iteration: 136828
loss: 1.190987229347229,grad_norm: 0.9999996033446354, iteration: 136829
loss: 1.189393162727356,grad_norm: 0.9999997513716097, iteration: 136830
loss: 1.3682076930999756,grad_norm: 0.9999997169257822, iteration: 136831
loss: 1.0436148643493652,grad_norm: 0.9999993415568341, iteration: 136832
loss: 1.169613003730774,grad_norm: 0.9999999806437462, iteration: 136833
loss: 1.219411849975586,grad_norm: 0.9999997915532263, iteration: 136834
loss: 1.1926405429840088,grad_norm: 0.9999995461687494, iteration: 136835
loss: 1.1119740009307861,grad_norm: 1.000000106661759, iteration: 136836
loss: 1.0911743640899658,grad_norm: 0.9999994372553042, iteration: 136837
loss: 1.1076632738113403,grad_norm: 0.999999935971687, iteration: 136838
loss: 1.0278483629226685,grad_norm: 0.9999998729894755, iteration: 136839
loss: 0.9984177350997925,grad_norm: 0.9999994220116912, iteration: 136840
loss: 1.1968353986740112,grad_norm: 0.9999999323598455, iteration: 136841
loss: 1.4619847536087036,grad_norm: 0.9999999580576275, iteration: 136842
loss: 1.1006767749786377,grad_norm: 0.9999999492109634, iteration: 136843
loss: 1.1686149835586548,grad_norm: 0.9999995142761979, iteration: 136844
loss: 1.118614673614502,grad_norm: 0.9999997325595968, iteration: 136845
loss: 1.23405122756958,grad_norm: 0.999999926228307, iteration: 136846
loss: 1.047919750213623,grad_norm: 0.9999999816213262, iteration: 136847
loss: 1.0550692081451416,grad_norm: 1.0000000064231631, iteration: 136848
loss: 0.9898330569267273,grad_norm: 0.9999994029116205, iteration: 136849
loss: 1.2206193208694458,grad_norm: 0.9999998193168493, iteration: 136850
loss: 1.1328517198562622,grad_norm: 0.9999998008988998, iteration: 136851
loss: 1.06259024143219,grad_norm: 0.9999998636458208, iteration: 136852
loss: 1.25050687789917,grad_norm: 0.9999999825270429, iteration: 136853
loss: 1.1807923316955566,grad_norm: 1.0000000445226969, iteration: 136854
loss: 1.073913812637329,grad_norm: 0.9999999126047199, iteration: 136855
loss: 1.1165392398834229,grad_norm: 0.9999996924606461, iteration: 136856
loss: 1.1140161752700806,grad_norm: 0.9999998707749503, iteration: 136857
loss: 1.0366886854171753,grad_norm: 0.9819910097161719, iteration: 136858
loss: 1.0707979202270508,grad_norm: 0.9999995929790463, iteration: 136859
loss: 1.074415922164917,grad_norm: 0.9999996726564712, iteration: 136860
loss: 1.0868703126907349,grad_norm: 0.9999996430628446, iteration: 136861
loss: 1.1725175380706787,grad_norm: 0.9999992947023297, iteration: 136862
loss: 1.1709555387496948,grad_norm: 0.9999995249483488, iteration: 136863
loss: 1.2288891077041626,grad_norm: 0.9999999100090485, iteration: 136864
loss: 1.164269208908081,grad_norm: 0.9999998098499141, iteration: 136865
loss: 1.215925931930542,grad_norm: 0.9999999217807855, iteration: 136866
loss: 1.2069205045700073,grad_norm: 0.9999999281927499, iteration: 136867
loss: 1.0776201486587524,grad_norm: 0.9999998465039546, iteration: 136868
loss: 1.0411405563354492,grad_norm: 0.9999996383261543, iteration: 136869
loss: 1.1756961345672607,grad_norm: 0.999999979911375, iteration: 136870
loss: 1.3004668951034546,grad_norm: 1.0000000412162804, iteration: 136871
loss: 1.1920334100723267,grad_norm: 0.999999864481979, iteration: 136872
loss: 1.2452644109725952,grad_norm: 0.9999999335487172, iteration: 136873
loss: 1.117531657218933,grad_norm: 0.9999997749969671, iteration: 136874
loss: 1.3774815797805786,grad_norm: 0.9999998376745243, iteration: 136875
loss: 1.1334971189498901,grad_norm: 0.9999998332415788, iteration: 136876
loss: 1.1673027276992798,grad_norm: 0.9999999311809059, iteration: 136877
loss: 1.5891382694244385,grad_norm: 0.9999998869841721, iteration: 136878
loss: 1.2242838144302368,grad_norm: 0.9999998702246559, iteration: 136879
loss: 1.282596230506897,grad_norm: 0.9999997646855138, iteration: 136880
loss: 1.2444119453430176,grad_norm: 0.9999998495408241, iteration: 136881
loss: 1.238811731338501,grad_norm: 0.9999998660976265, iteration: 136882
loss: 1.3146111965179443,grad_norm: 1.0000000588607425, iteration: 136883
loss: 1.1175804138183594,grad_norm: 0.999999990925135, iteration: 136884
loss: 1.3564879894256592,grad_norm: 1.0000000078440476, iteration: 136885
loss: 1.3579776287078857,grad_norm: 0.999999807957615, iteration: 136886
loss: 1.2405630350112915,grad_norm: 0.9999999376255375, iteration: 136887
loss: 1.2011046409606934,grad_norm: 0.999999977988108, iteration: 136888
loss: 1.1657977104187012,grad_norm: 0.9999999180458833, iteration: 136889
loss: 1.1014896631240845,grad_norm: 0.9999999027897282, iteration: 136890
loss: 1.166670322418213,grad_norm: 0.9999999951494761, iteration: 136891
loss: 1.30753493309021,grad_norm: 0.9999999940154677, iteration: 136892
loss: 1.2838441133499146,grad_norm: 0.9999998051616792, iteration: 136893
loss: 1.1323802471160889,grad_norm: 0.9999998891174311, iteration: 136894
loss: 1.2417370080947876,grad_norm: 0.9999997311034707, iteration: 136895
loss: 1.1369860172271729,grad_norm: 0.9999998686288981, iteration: 136896
loss: 1.1757926940917969,grad_norm: 1.000000050226664, iteration: 136897
loss: 1.3136619329452515,grad_norm: 0.9999999409948439, iteration: 136898
loss: 1.360826849937439,grad_norm: 0.9999999153650883, iteration: 136899
loss: 1.2802153825759888,grad_norm: 0.9999999489644973, iteration: 136900
loss: 1.2638463973999023,grad_norm: 0.9999998549665934, iteration: 136901
loss: 1.2737983465194702,grad_norm: 0.9999997404121226, iteration: 136902
loss: 1.1994829177856445,grad_norm: 0.9999999685843377, iteration: 136903
loss: 1.2237086296081543,grad_norm: 0.9999996820699831, iteration: 136904
loss: 1.2949415445327759,grad_norm: 0.9999999156562152, iteration: 136905
loss: 1.1792737245559692,grad_norm: 0.9999997784852841, iteration: 136906
loss: 1.056890606880188,grad_norm: 0.999999782826651, iteration: 136907
loss: 1.19595468044281,grad_norm: 1.0000000324477847, iteration: 136908
loss: 1.40652334690094,grad_norm: 0.9999999575403111, iteration: 136909
loss: 1.1463160514831543,grad_norm: 0.9999994029499484, iteration: 136910
loss: 1.217536211013794,grad_norm: 0.9999997962617921, iteration: 136911
loss: 1.1361348628997803,grad_norm: 0.9999997479823305, iteration: 136912
loss: 1.401281714439392,grad_norm: 0.999999872750165, iteration: 136913
loss: 1.1921230554580688,grad_norm: 0.9999999460635061, iteration: 136914
loss: 1.083178997039795,grad_norm: 0.9999998274721089, iteration: 136915
loss: 1.1109741926193237,grad_norm: 0.9999998064166937, iteration: 136916
loss: 1.1186240911483765,grad_norm: 0.9999998062368944, iteration: 136917
loss: 1.1278430223464966,grad_norm: 0.9999997988051135, iteration: 136918
loss: 1.1806719303131104,grad_norm: 0.9999997022866293, iteration: 136919
loss: 1.159124493598938,grad_norm: 0.9999998547697535, iteration: 136920
loss: 1.2712844610214233,grad_norm: 0.9999999293781544, iteration: 136921
loss: 1.1664665937423706,grad_norm: 0.999999714369905, iteration: 136922
loss: 1.1167962551116943,grad_norm: 1.0000000240542744, iteration: 136923
loss: 1.200089693069458,grad_norm: 0.9999999252792411, iteration: 136924
loss: 1.1571460962295532,grad_norm: 0.9999999442609325, iteration: 136925
loss: 1.1067274808883667,grad_norm: 0.9999999971554077, iteration: 136926
loss: 1.1232458353042603,grad_norm: 0.9999999694572469, iteration: 136927
loss: 1.2074192762374878,grad_norm: 0.999999694283134, iteration: 136928
loss: 1.1841137409210205,grad_norm: 1.0000000798457616, iteration: 136929
loss: 1.2594680786132812,grad_norm: 0.9999999617294298, iteration: 136930
loss: 1.4049828052520752,grad_norm: 0.9999999588476364, iteration: 136931
loss: 1.1167049407958984,grad_norm: 0.9999999917322205, iteration: 136932
loss: 1.101896047592163,grad_norm: 0.999999784313764, iteration: 136933
loss: 1.2161437273025513,grad_norm: 0.9999996839915003, iteration: 136934
loss: 1.0733147859573364,grad_norm: 0.9999999943439861, iteration: 136935
loss: 1.086623191833496,grad_norm: 0.9999998076143827, iteration: 136936
loss: 1.0866100788116455,grad_norm: 0.9999991690427461, iteration: 136937
loss: 1.0785725116729736,grad_norm: 0.9999996159987338, iteration: 136938
loss: 1.0445080995559692,grad_norm: 0.953578631189879, iteration: 136939
loss: 1.0710691213607788,grad_norm: 0.9999991805852038, iteration: 136940
loss: 1.0480741262435913,grad_norm: 0.9025019929710012, iteration: 136941
loss: 1.1443928480148315,grad_norm: 0.99999978599167, iteration: 136942
loss: 1.124875545501709,grad_norm: 0.999999856964043, iteration: 136943
loss: 1.0216745138168335,grad_norm: 0.9781414012883746, iteration: 136944
loss: 1.1310423612594604,grad_norm: 0.9999998836596576, iteration: 136945
loss: 1.1509324312210083,grad_norm: 0.9999998626497111, iteration: 136946
loss: 1.0782625675201416,grad_norm: 0.9999998624968306, iteration: 136947
loss: 1.1366671323776245,grad_norm: 0.9999992941727734, iteration: 136948
loss: 1.0036464929580688,grad_norm: 0.8692716851749887, iteration: 136949
loss: 1.0583508014678955,grad_norm: 0.999999975225435, iteration: 136950
loss: 1.0416843891143799,grad_norm: 0.9999998561469676, iteration: 136951
loss: 1.0135993957519531,grad_norm: 0.9999992281187173, iteration: 136952
loss: 1.0459390878677368,grad_norm: 0.9999996659366999, iteration: 136953
loss: 1.0409278869628906,grad_norm: 0.9999994327495185, iteration: 136954
loss: 1.0555423498153687,grad_norm: 0.9999996967031914, iteration: 136955
loss: 1.110036849975586,grad_norm: 0.9999998244821939, iteration: 136956
loss: 1.1102328300476074,grad_norm: 0.9999998672906593, iteration: 136957
loss: 1.0331875085830688,grad_norm: 0.9999998549347118, iteration: 136958
loss: 1.0765057802200317,grad_norm: 0.9999998938146689, iteration: 136959
loss: 1.0862441062927246,grad_norm: 0.9999998389284693, iteration: 136960
loss: 1.134046196937561,grad_norm: 0.9999996143937318, iteration: 136961
loss: 1.1946790218353271,grad_norm: 0.9999999363446254, iteration: 136962
loss: 1.0665647983551025,grad_norm: 0.9999991391543015, iteration: 136963
loss: 1.1153366565704346,grad_norm: 0.999999900383998, iteration: 136964
loss: 1.1554596424102783,grad_norm: 0.9999997943954602, iteration: 136965
loss: 1.0367923974990845,grad_norm: 0.999999730683191, iteration: 136966
loss: 1.1437605619430542,grad_norm: 0.9999998492905431, iteration: 136967
loss: 1.1208690404891968,grad_norm: 0.99999950694028, iteration: 136968
loss: 1.153220772743225,grad_norm: 1.0000000757569805, iteration: 136969
loss: 1.1732561588287354,grad_norm: 0.9999998761769122, iteration: 136970
loss: 1.1200615167617798,grad_norm: 0.9999995363277214, iteration: 136971
loss: 1.086077332496643,grad_norm: 0.9999998252659179, iteration: 136972
loss: 1.069095253944397,grad_norm: 0.9999996938311171, iteration: 136973
loss: 1.022696614265442,grad_norm: 0.9999999522695684, iteration: 136974
loss: 1.1589677333831787,grad_norm: 0.9999999300462008, iteration: 136975
loss: 1.1250574588775635,grad_norm: 0.9999993455305108, iteration: 136976
loss: 1.2284612655639648,grad_norm: 0.9999998439965101, iteration: 136977
loss: 1.1237941980361938,grad_norm: 0.9999998306415308, iteration: 136978
loss: 1.01382315158844,grad_norm: 0.9065264416660711, iteration: 136979
loss: 1.1510409116744995,grad_norm: 0.999999715741883, iteration: 136980
loss: 1.075181245803833,grad_norm: 0.839776389866871, iteration: 136981
loss: 1.109686255455017,grad_norm: 0.9999996526010801, iteration: 136982
loss: 1.0807474851608276,grad_norm: 0.999999115735242, iteration: 136983
loss: 0.9989394545555115,grad_norm: 0.9191109411333925, iteration: 136984
loss: 1.0105234384536743,grad_norm: 0.9999997788339058, iteration: 136985
loss: 1.3286666870117188,grad_norm: 0.9999994240542489, iteration: 136986
loss: 1.288297176361084,grad_norm: 0.9999998210830896, iteration: 136987
loss: 1.2506706714630127,grad_norm: 1.0000000201964043, iteration: 136988
loss: 0.9858055710792542,grad_norm: 0.8401716336858301, iteration: 136989
loss: 1.0664063692092896,grad_norm: 0.9999993429233706, iteration: 136990
loss: 1.1118762493133545,grad_norm: 0.9999997876246751, iteration: 136991
loss: 1.1191318035125732,grad_norm: 0.9999996673744322, iteration: 136992
loss: 0.9751035571098328,grad_norm: 0.9055893968667391, iteration: 136993
loss: 1.1472291946411133,grad_norm: 0.9999997733521401, iteration: 136994
loss: 1.104265570640564,grad_norm: 0.99999911700021, iteration: 136995
loss: 1.2536455392837524,grad_norm: 0.9999997885327128, iteration: 136996
loss: 1.1037018299102783,grad_norm: 0.9999999070520027, iteration: 136997
loss: 1.1883794069290161,grad_norm: 0.9999996195906123, iteration: 136998
loss: 1.0366286039352417,grad_norm: 0.9999991255383051, iteration: 136999
loss: 1.0409035682678223,grad_norm: 0.9999999469915608, iteration: 137000
loss: 1.0547268390655518,grad_norm: 0.9999996578308766, iteration: 137001
loss: 1.1116327047348022,grad_norm: 0.9999995396883433, iteration: 137002
loss: 1.0032989978790283,grad_norm: 0.9999999379300231, iteration: 137003
loss: 1.0068103075027466,grad_norm: 0.9999995524901506, iteration: 137004
loss: 1.0449796915054321,grad_norm: 0.8920987278001009, iteration: 137005
loss: 1.0997192859649658,grad_norm: 0.9999996756918664, iteration: 137006
loss: 1.0043792724609375,grad_norm: 1.0000000149452768, iteration: 137007
loss: 1.1041914224624634,grad_norm: 0.9999999209218637, iteration: 137008
loss: 1.0470935106277466,grad_norm: 0.99999988702277, iteration: 137009
loss: 1.0712164640426636,grad_norm: 0.999999538541578, iteration: 137010
loss: 0.9976483583450317,grad_norm: 0.9999991025285087, iteration: 137011
loss: 1.0172741413116455,grad_norm: 0.9999994010144099, iteration: 137012
loss: 1.0387245416641235,grad_norm: 0.9999992071617233, iteration: 137013
loss: 1.069589376449585,grad_norm: 0.9999995719727278, iteration: 137014
loss: 0.9930316805839539,grad_norm: 0.8300230068060309, iteration: 137015
loss: 1.0380643606185913,grad_norm: 0.9932007100511936, iteration: 137016
loss: 1.077049970626831,grad_norm: 0.999999504601228, iteration: 137017
loss: 1.0908923149108887,grad_norm: 0.9999994175206627, iteration: 137018
loss: 1.067237138748169,grad_norm: 0.921480265533136, iteration: 137019
loss: 1.2313016653060913,grad_norm: 0.9999998754116212, iteration: 137020
loss: 1.0092365741729736,grad_norm: 0.87622585002562, iteration: 137021
loss: 1.0100224018096924,grad_norm: 0.9999993044703834, iteration: 137022
loss: 1.1056206226348877,grad_norm: 0.9999991549319277, iteration: 137023
loss: 0.9993691444396973,grad_norm: 0.7835792860765082, iteration: 137024
loss: 1.015849232673645,grad_norm: 0.999999223601891, iteration: 137025
loss: 1.0604369640350342,grad_norm: 0.9999998390252594, iteration: 137026
loss: 1.1440685987472534,grad_norm: 0.9999998972150571, iteration: 137027
loss: 1.0939416885375977,grad_norm: 0.9999996135887188, iteration: 137028
loss: 1.0075534582138062,grad_norm: 0.9349242067989886, iteration: 137029
loss: 1.0726145505905151,grad_norm: 0.9999998845588493, iteration: 137030
loss: 1.0117243528366089,grad_norm: 0.9515584218067056, iteration: 137031
loss: 1.0260258913040161,grad_norm: 0.9999998608585406, iteration: 137032
loss: 1.0327147245407104,grad_norm: 0.9451424708915, iteration: 137033
loss: 1.0393320322036743,grad_norm: 0.9999998206951876, iteration: 137034
loss: 1.009738564491272,grad_norm: 0.9999999390103825, iteration: 137035
loss: 1.0227910280227661,grad_norm: 0.9999995139882859, iteration: 137036
loss: 1.0265060663223267,grad_norm: 0.9999998018661967, iteration: 137037
loss: 1.053939700126648,grad_norm: 0.9999995005281506, iteration: 137038
loss: 1.1523301601409912,grad_norm: 0.9999998969522991, iteration: 137039
loss: 1.064692735671997,grad_norm: 0.9999997317543018, iteration: 137040
loss: 1.1222054958343506,grad_norm: 0.9999995240559363, iteration: 137041
loss: 0.9584345817565918,grad_norm: 0.8726176818892264, iteration: 137042
loss: 1.0666731595993042,grad_norm: 0.9999993128832231, iteration: 137043
loss: 1.1236183643341064,grad_norm: 0.999999446598389, iteration: 137044
loss: 1.0571202039718628,grad_norm: 0.9999999913696379, iteration: 137045
loss: 1.045743465423584,grad_norm: 0.999999676532553, iteration: 137046
loss: 1.024713158607483,grad_norm: 0.9999995381401475, iteration: 137047
loss: 1.064711332321167,grad_norm: 0.9999998025314244, iteration: 137048
loss: 1.1159433126449585,grad_norm: 1.0000000083760536, iteration: 137049
loss: 1.0281531810760498,grad_norm: 0.999999227286615, iteration: 137050
loss: 1.0236574411392212,grad_norm: 0.9999993061737462, iteration: 137051
loss: 1.0655982494354248,grad_norm: 0.9999999900864385, iteration: 137052
loss: 0.9809892177581787,grad_norm: 0.9999993568510379, iteration: 137053
loss: 0.9873415231704712,grad_norm: 0.9195034411271414, iteration: 137054
loss: 1.3583106994628906,grad_norm: 0.9999997252392018, iteration: 137055
loss: 1.0516771078109741,grad_norm: 0.9999997362335249, iteration: 137056
loss: 1.0061691999435425,grad_norm: 0.9999990516239817, iteration: 137057
loss: 1.0117430686950684,grad_norm: 0.9905673693126706, iteration: 137058
loss: 1.090367078781128,grad_norm: 0.9999996179802433, iteration: 137059
loss: 0.9843568205833435,grad_norm: 0.9098053088111673, iteration: 137060
loss: 1.0009349584579468,grad_norm: 0.9999990827840508, iteration: 137061
loss: 1.0349124670028687,grad_norm: 0.9999999361895243, iteration: 137062
loss: 1.0652649402618408,grad_norm: 0.8344166357728154, iteration: 137063
loss: 1.2455053329467773,grad_norm: 0.9999998319998719, iteration: 137064
loss: 1.1682424545288086,grad_norm: 0.999999743020414, iteration: 137065
loss: 1.2150629758834839,grad_norm: 0.9999996929956514, iteration: 137066
loss: 1.012495756149292,grad_norm: 0.8645765962137215, iteration: 137067
loss: 1.063859462738037,grad_norm: 0.9999991705129067, iteration: 137068
loss: 1.076727032661438,grad_norm: 0.9999998150167454, iteration: 137069
loss: 1.0171467065811157,grad_norm: 0.9999998639091222, iteration: 137070
loss: 1.2176175117492676,grad_norm: 0.9999994636939875, iteration: 137071
loss: 1.0747196674346924,grad_norm: 0.9999994143378051, iteration: 137072
loss: 1.1553860902786255,grad_norm: 0.999999919061152, iteration: 137073
loss: 1.2145925760269165,grad_norm: 0.999999817290962, iteration: 137074
loss: 0.9967837929725647,grad_norm: 0.9583307664815238, iteration: 137075
loss: 1.025400161743164,grad_norm: 0.8233332713727547, iteration: 137076
loss: 1.1134263277053833,grad_norm: 0.9999998746611496, iteration: 137077
loss: 1.0230274200439453,grad_norm: 0.9999996405992836, iteration: 137078
loss: 1.0510146617889404,grad_norm: 0.8789648058807515, iteration: 137079
loss: 1.122231125831604,grad_norm: 0.9999993511387034, iteration: 137080
loss: 1.0880342721939087,grad_norm: 0.9999992668998682, iteration: 137081
loss: 1.0682326555252075,grad_norm: 0.999999591621519, iteration: 137082
loss: 1.0101094245910645,grad_norm: 0.8265365766589846, iteration: 137083
loss: 1.059108853340149,grad_norm: 0.9999991009106629, iteration: 137084
loss: 1.1052659749984741,grad_norm: 0.9999998025937845, iteration: 137085
loss: 1.0908958911895752,grad_norm: 0.9999991676322586, iteration: 137086
loss: 1.0594004392623901,grad_norm: 0.9999996324456578, iteration: 137087
loss: 1.1101315021514893,grad_norm: 0.9999995977834379, iteration: 137088
loss: 1.1206437349319458,grad_norm: 0.9999990991787494, iteration: 137089
loss: 1.0580086708068848,grad_norm: 0.9188201434379398, iteration: 137090
loss: 1.057387113571167,grad_norm: 0.9999995872656601, iteration: 137091
loss: 1.0718268156051636,grad_norm: 0.9999997947262971, iteration: 137092
loss: 1.0796809196472168,grad_norm: 0.9999996958857712, iteration: 137093
loss: 1.0333809852600098,grad_norm: 0.9999997105220564, iteration: 137094
loss: 1.0513787269592285,grad_norm: 0.9999991329121711, iteration: 137095
loss: 1.0649123191833496,grad_norm: 0.9999997123136708, iteration: 137096
loss: 1.0321367979049683,grad_norm: 0.9999995891232638, iteration: 137097
loss: 1.1860946416854858,grad_norm: 0.9999998689703664, iteration: 137098
loss: 1.0206841230392456,grad_norm: 0.999999602287968, iteration: 137099
loss: 1.174617052078247,grad_norm: 0.9999995387636542, iteration: 137100
loss: 0.9991605877876282,grad_norm: 0.9999995441898076, iteration: 137101
loss: 0.9875850081443787,grad_norm: 0.9999997889079164, iteration: 137102
loss: 1.0802470445632935,grad_norm: 0.9999992529037546, iteration: 137103
loss: 1.2355833053588867,grad_norm: 0.9999998643043445, iteration: 137104
loss: 1.0629940032958984,grad_norm: 0.8515785739197856, iteration: 137105
loss: 1.2232774496078491,grad_norm: 0.9999998603750474, iteration: 137106
loss: 1.1429917812347412,grad_norm: 0.8491699084523795, iteration: 137107
loss: 1.0475815534591675,grad_norm: 0.9999995653016664, iteration: 137108
loss: 1.0422457456588745,grad_norm: 0.9999994642296643, iteration: 137109
loss: 1.0492926836013794,grad_norm: 0.999999029542449, iteration: 137110
loss: 1.102229356765747,grad_norm: 0.9999996070656771, iteration: 137111
loss: 1.1870152950286865,grad_norm: 0.9999998685784266, iteration: 137112
loss: 1.2245573997497559,grad_norm: 0.999999906512326, iteration: 137113
loss: 1.1583094596862793,grad_norm: 0.9999997570766835, iteration: 137114
loss: 0.9996110796928406,grad_norm: 0.999999140448853, iteration: 137115
loss: 1.027963399887085,grad_norm: 0.9999992752126902, iteration: 137116
loss: 1.0553408861160278,grad_norm: 0.9999998532993997, iteration: 137117
loss: 1.0256210565567017,grad_norm: 0.9999994211842568, iteration: 137118
loss: 1.0530592203140259,grad_norm: 0.8206027912423388, iteration: 137119
loss: 1.0738338232040405,grad_norm: 0.9999999080875703, iteration: 137120
loss: 1.011983871459961,grad_norm: 0.999999210118964, iteration: 137121
loss: 1.101502776145935,grad_norm: 0.9999992747964781, iteration: 137122
loss: 1.0096780061721802,grad_norm: 0.7571583434069334, iteration: 137123
loss: 1.0057693719863892,grad_norm: 0.9999991048580109, iteration: 137124
loss: 1.2091724872589111,grad_norm: 0.9999999290302631, iteration: 137125
loss: 1.0373810529708862,grad_norm: 0.9999992174292763, iteration: 137126
loss: 1.0024125576019287,grad_norm: 0.9999992966151316, iteration: 137127
loss: 1.0012719631195068,grad_norm: 0.999999195065622, iteration: 137128
loss: 0.9984788298606873,grad_norm: 0.9999993445757784, iteration: 137129
loss: 1.0199189186096191,grad_norm: 0.99999942035306, iteration: 137130
loss: 0.9609591364860535,grad_norm: 0.9999991635678936, iteration: 137131
loss: 1.0582557916641235,grad_norm: 0.9999998328891907, iteration: 137132
loss: 1.0033422708511353,grad_norm: 0.9999994155948057, iteration: 137133
loss: 1.0159132480621338,grad_norm: 0.9999994701145362, iteration: 137134
loss: 1.1407248973846436,grad_norm: 0.9999995577089541, iteration: 137135
loss: 1.0962461233139038,grad_norm: 0.9999996161321926, iteration: 137136
loss: 1.1598732471466064,grad_norm: 0.9999994070848401, iteration: 137137
loss: 1.0120779275894165,grad_norm: 0.9999992296862094, iteration: 137138
loss: 1.0627213716506958,grad_norm: 0.9082963636768114, iteration: 137139
loss: 1.0099546909332275,grad_norm: 0.9604642519475364, iteration: 137140
loss: 0.9787917733192444,grad_norm: 0.9246927401336688, iteration: 137141
loss: 1.0629431009292603,grad_norm: 0.9999995499652299, iteration: 137142
loss: 0.9752082824707031,grad_norm: 0.9999999175984002, iteration: 137143
loss: 1.0779473781585693,grad_norm: 0.9999992888520148, iteration: 137144
loss: 1.1800429821014404,grad_norm: 0.9999998856985447, iteration: 137145
loss: 1.2328436374664307,grad_norm: 0.9999997717779697, iteration: 137146
loss: 1.0564671754837036,grad_norm: 0.9999999741013814, iteration: 137147
loss: 1.13983952999115,grad_norm: 0.9999998334763807, iteration: 137148
loss: 1.1393924951553345,grad_norm: 0.9999995745201576, iteration: 137149
loss: 1.0869262218475342,grad_norm: 0.999999130370644, iteration: 137150
loss: 1.1273082494735718,grad_norm: 0.9999999041117416, iteration: 137151
loss: 1.0411725044250488,grad_norm: 0.9999991773021505, iteration: 137152
loss: 1.0590381622314453,grad_norm: 0.9575004561392195, iteration: 137153
loss: 1.0030665397644043,grad_norm: 0.965674570154838, iteration: 137154
loss: 1.1624361276626587,grad_norm: 0.9999997102398976, iteration: 137155
loss: 1.0246784687042236,grad_norm: 0.9999991775905248, iteration: 137156
loss: 1.0703089237213135,grad_norm: 0.9999999062346373, iteration: 137157
loss: 0.9767017364501953,grad_norm: 0.9999994104760981, iteration: 137158
loss: 1.02912175655365,grad_norm: 0.9999995260971618, iteration: 137159
loss: 0.9857948422431946,grad_norm: 0.8849348814687598, iteration: 137160
loss: 1.0490808486938477,grad_norm: 0.9999992286985254, iteration: 137161
loss: 1.1267750263214111,grad_norm: 0.9999999520658075, iteration: 137162
loss: 1.03988516330719,grad_norm: 0.9999995130049952, iteration: 137163
loss: 1.0220904350280762,grad_norm: 0.9999997514081307, iteration: 137164
loss: 0.9822994470596313,grad_norm: 0.8780583374961894, iteration: 137165
loss: 0.9607188701629639,grad_norm: 0.8891107995222433, iteration: 137166
loss: 0.9937572479248047,grad_norm: 0.999999844625541, iteration: 137167
loss: 1.0208319425582886,grad_norm: 0.9929363397978741, iteration: 137168
loss: 1.1119650602340698,grad_norm: 0.9999992474589672, iteration: 137169
loss: 0.9680064916610718,grad_norm: 0.9999998907785901, iteration: 137170
loss: 1.0230180025100708,grad_norm: 0.9999997022457143, iteration: 137171
loss: 1.0032354593276978,grad_norm: 0.9999991095535286, iteration: 137172
loss: 1.1575052738189697,grad_norm: 0.999999790619982, iteration: 137173
loss: 1.0201078653335571,grad_norm: 0.9999992499405864, iteration: 137174
loss: 1.2416938543319702,grad_norm: 0.9999998242673037, iteration: 137175
loss: 1.2282218933105469,grad_norm: 1.0000000352960499, iteration: 137176
loss: 1.1085618734359741,grad_norm: 0.9999990947855012, iteration: 137177
loss: 0.9836241602897644,grad_norm: 0.9999996733200388, iteration: 137178
loss: 1.0065339803695679,grad_norm: 0.9999994240004404, iteration: 137179
loss: 1.0701854228973389,grad_norm: 1.0000000272035596, iteration: 137180
loss: 1.0426992177963257,grad_norm: 0.9999997390371188, iteration: 137181
loss: 1.046369194984436,grad_norm: 0.9999994125214945, iteration: 137182
loss: 1.004202127456665,grad_norm: 0.9228983504923315, iteration: 137183
loss: 1.048603892326355,grad_norm: 0.8908283868733446, iteration: 137184
loss: 0.985712468624115,grad_norm: 0.8500422422340534, iteration: 137185
loss: 1.0157064199447632,grad_norm: 0.8546559745784802, iteration: 137186
loss: 0.9980620741844177,grad_norm: 0.9999991521677595, iteration: 137187
loss: 1.0014657974243164,grad_norm: 0.999999735440819, iteration: 137188
loss: 1.021389365196228,grad_norm: 0.9999990410029486, iteration: 137189
loss: 1.0119913816452026,grad_norm: 0.9999994131110369, iteration: 137190
loss: 1.1464977264404297,grad_norm: 0.9999991960043637, iteration: 137191
loss: 0.992728054523468,grad_norm: 0.7927518908956092, iteration: 137192
loss: 1.1135591268539429,grad_norm: 0.9999992052889157, iteration: 137193
loss: 1.3203279972076416,grad_norm: 0.9999999393353837, iteration: 137194
loss: 1.0903801918029785,grad_norm: 0.9999991937090795, iteration: 137195
loss: 1.0390119552612305,grad_norm: 0.9999991012263904, iteration: 137196
loss: 1.0143985748291016,grad_norm: 0.9999996892921776, iteration: 137197
loss: 1.0645389556884766,grad_norm: 0.9999992886054713, iteration: 137198
loss: 0.9857864379882812,grad_norm: 0.9999999969060032, iteration: 137199
loss: 0.994355320930481,grad_norm: 0.9641173666817419, iteration: 137200
loss: 1.016068696975708,grad_norm: 0.9999998015365621, iteration: 137201
loss: 1.0294581651687622,grad_norm: 0.8064420193513621, iteration: 137202
loss: 1.0726864337921143,grad_norm: 0.9999993104578558, iteration: 137203
loss: 1.0346318483352661,grad_norm: 0.9999990514272764, iteration: 137204
loss: 0.9705461263656616,grad_norm: 0.8370792875442036, iteration: 137205
loss: 0.9977342486381531,grad_norm: 0.9129611415154275, iteration: 137206
loss: 0.9982928037643433,grad_norm: 0.9999991835193437, iteration: 137207
loss: 1.0175824165344238,grad_norm: 0.9545914642378811, iteration: 137208
loss: 1.0737268924713135,grad_norm: 0.9999993698496223, iteration: 137209
loss: 1.0323281288146973,grad_norm: 0.9999995827396592, iteration: 137210
loss: 0.9852173328399658,grad_norm: 0.9999991638347514, iteration: 137211
loss: 1.1496919393539429,grad_norm: 0.999999821517221, iteration: 137212
loss: 1.0103412866592407,grad_norm: 0.9999991549627683, iteration: 137213
loss: 1.0777610540390015,grad_norm: 0.9999996974072257, iteration: 137214
loss: 1.1113017797470093,grad_norm: 0.9999992390214587, iteration: 137215
loss: 1.143386721611023,grad_norm: 0.9999996506976474, iteration: 137216
loss: 1.0205272436141968,grad_norm: 0.9999992101900483, iteration: 137217
loss: 1.043578863143921,grad_norm: 0.9999998556893028, iteration: 137218
loss: 1.0540603399276733,grad_norm: 0.9999996247373374, iteration: 137219
loss: 1.0152143239974976,grad_norm: 0.8535676239376979, iteration: 137220
loss: 1.0250134468078613,grad_norm: 0.8732814925419543, iteration: 137221
loss: 1.1247951984405518,grad_norm: 0.9999997163144058, iteration: 137222
loss: 1.1373302936553955,grad_norm: 0.999999189638232, iteration: 137223
loss: 1.0193603038787842,grad_norm: 0.9850553634360598, iteration: 137224
loss: 0.9654601812362671,grad_norm: 0.8692431305150903, iteration: 137225
loss: 1.0503449440002441,grad_norm: 0.9446545463636264, iteration: 137226
loss: 1.0812582969665527,grad_norm: 0.9268693936664121, iteration: 137227
loss: 1.0322074890136719,grad_norm: 0.9172760353060095, iteration: 137228
loss: 0.9887447953224182,grad_norm: 0.9432447367831833, iteration: 137229
loss: 1.0717113018035889,grad_norm: 0.9813846489301588, iteration: 137230
loss: 0.9687283039093018,grad_norm: 0.8896203443506198, iteration: 137231
loss: 1.0204073190689087,grad_norm: 0.9999992533223281, iteration: 137232
loss: 1.0055497884750366,grad_norm: 0.9999991277644193, iteration: 137233
loss: 1.0360585451126099,grad_norm: 0.986498841959203, iteration: 137234
loss: 1.0728203058242798,grad_norm: 0.9999999281730673, iteration: 137235
loss: 1.1582757234573364,grad_norm: 0.9999995649968532, iteration: 137236
loss: 1.0330766439437866,grad_norm: 0.9999997832365406, iteration: 137237
loss: 1.0375767946243286,grad_norm: 0.9999996531636385, iteration: 137238
loss: 0.9881965517997742,grad_norm: 0.8578261360539715, iteration: 137239
loss: 0.9932065606117249,grad_norm: 0.9999998532665096, iteration: 137240
loss: 1.068950891494751,grad_norm: 0.9999991623606644, iteration: 137241
loss: 1.0185507535934448,grad_norm: 0.999999141382564, iteration: 137242
loss: 1.0303694009780884,grad_norm: 0.9831202144732633, iteration: 137243
loss: 0.9874322414398193,grad_norm: 0.861802322538685, iteration: 137244
loss: 1.0678515434265137,grad_norm: 0.8519574165145688, iteration: 137245
loss: 1.1880192756652832,grad_norm: 0.999999710099058, iteration: 137246
loss: 0.9897370934486389,grad_norm: 0.9999990414370327, iteration: 137247
loss: 1.0188711881637573,grad_norm: 0.9999991325188505, iteration: 137248
loss: 0.9789111614227295,grad_norm: 0.9999989881951895, iteration: 137249
loss: 1.0010292530059814,grad_norm: 0.8515656543090194, iteration: 137250
loss: 1.0392895936965942,grad_norm: 0.8173113498961407, iteration: 137251
loss: 0.9787328243255615,grad_norm: 0.9028184216331488, iteration: 137252
loss: 1.1162222623825073,grad_norm: 0.9999992626270222, iteration: 137253
loss: 1.017479658126831,grad_norm: 0.9881062078205625, iteration: 137254
loss: 1.0271505117416382,grad_norm: 0.9999991154387454, iteration: 137255
loss: 1.05868399143219,grad_norm: 0.8886718255918877, iteration: 137256
loss: 1.0132412910461426,grad_norm: 0.8846381418720667, iteration: 137257
loss: 1.118431568145752,grad_norm: 0.9999997878432632, iteration: 137258
loss: 1.0201486349105835,grad_norm: 0.9384982120439458, iteration: 137259
loss: 1.1522741317749023,grad_norm: 0.9999991854216899, iteration: 137260
loss: 1.0450960397720337,grad_norm: 0.9999996976253991, iteration: 137261
loss: 0.9955645203590393,grad_norm: 0.9999998504677767, iteration: 137262
loss: 1.1139236688613892,grad_norm: 0.999999994333872, iteration: 137263
loss: 1.0747252702713013,grad_norm: 0.9999990709883636, iteration: 137264
loss: 1.0503841638565063,grad_norm: 0.9999994391095702, iteration: 137265
loss: 1.0684642791748047,grad_norm: 0.9999998229465507, iteration: 137266
loss: 1.0082941055297852,grad_norm: 0.999999510791154, iteration: 137267
loss: 1.0179076194763184,grad_norm: 0.9999990789732431, iteration: 137268
loss: 0.993790864944458,grad_norm: 0.7795089099782422, iteration: 137269
loss: 1.0826880931854248,grad_norm: 0.9999993941143854, iteration: 137270
loss: 1.070128083229065,grad_norm: 0.9999991867025672, iteration: 137271
loss: 0.9991208910942078,grad_norm: 0.9999999364713986, iteration: 137272
loss: 1.0412403345108032,grad_norm: 0.7867778366211464, iteration: 137273
loss: 1.0575282573699951,grad_norm: 0.8205450376322694, iteration: 137274
loss: 1.0797228813171387,grad_norm: 0.9999992490106255, iteration: 137275
loss: 0.9979971051216125,grad_norm: 0.9780920570145272, iteration: 137276
loss: 1.1122477054595947,grad_norm: 0.9999990286722884, iteration: 137277
loss: 0.9402542114257812,grad_norm: 0.8915409362850362, iteration: 137278
loss: 0.96297287940979,grad_norm: 0.7594831102954748, iteration: 137279
loss: 1.089704990386963,grad_norm: 0.9479175269479287, iteration: 137280
loss: 1.0162136554718018,grad_norm: 0.9999992763005012, iteration: 137281
loss: 1.0325241088867188,grad_norm: 0.9950934046006821, iteration: 137282
loss: 1.009237289428711,grad_norm: 0.9999991689700276, iteration: 137283
loss: 0.996486246585846,grad_norm: 0.6980974507896212, iteration: 137284
loss: 1.1774389743804932,grad_norm: 0.9999996495150772, iteration: 137285
loss: 1.0100547075271606,grad_norm: 0.9999993050139826, iteration: 137286
loss: 1.0206245183944702,grad_norm: 0.8162801973781568, iteration: 137287
loss: 1.074766755104065,grad_norm: 0.9999990871825596, iteration: 137288
loss: 1.0568299293518066,grad_norm: 0.9999996743145135, iteration: 137289
loss: 1.0582464933395386,grad_norm: 0.9999991875582691, iteration: 137290
loss: 1.0307281017303467,grad_norm: 0.9999990453002277, iteration: 137291
loss: 1.0674159526824951,grad_norm: 0.9999997039938443, iteration: 137292
loss: 1.0814604759216309,grad_norm: 0.9999994280314598, iteration: 137293
loss: 1.0584806203842163,grad_norm: 0.9999991045606474, iteration: 137294
loss: 1.0423874855041504,grad_norm: 0.9999993251040307, iteration: 137295
loss: 1.004372477531433,grad_norm: 0.778582692797579, iteration: 137296
loss: 1.025876760482788,grad_norm: 0.9999998850938755, iteration: 137297
loss: 0.9869157671928406,grad_norm: 0.9999990884585698, iteration: 137298
loss: 1.0315848588943481,grad_norm: 0.9620199083102259, iteration: 137299
loss: 1.12527334690094,grad_norm: 0.9999998005050931, iteration: 137300
loss: 0.9818897247314453,grad_norm: 0.9183073339188453, iteration: 137301
loss: 1.0595301389694214,grad_norm: 0.9999995871775181, iteration: 137302
loss: 1.1337130069732666,grad_norm: 0.9999999120222812, iteration: 137303
loss: 1.0456655025482178,grad_norm: 0.9999990878216812, iteration: 137304
loss: 1.0638571977615356,grad_norm: 0.9637619343208865, iteration: 137305
loss: 0.9991611242294312,grad_norm: 0.897479117833241, iteration: 137306
loss: 1.309516429901123,grad_norm: 0.9999999442053151, iteration: 137307
loss: 1.118510127067566,grad_norm: 0.9999993154318259, iteration: 137308
loss: 1.0016082525253296,grad_norm: 0.887482111209289, iteration: 137309
loss: 1.107860803604126,grad_norm: 0.9999995361152847, iteration: 137310
loss: 1.0481759309768677,grad_norm: 0.9999998648261417, iteration: 137311
loss: 1.0284836292266846,grad_norm: 0.9999998738500974, iteration: 137312
loss: 1.0132876634597778,grad_norm: 0.9177506357662855, iteration: 137313
loss: 1.0622037649154663,grad_norm: 0.999999659832719, iteration: 137314
loss: 1.0219210386276245,grad_norm: 0.9941415470563906, iteration: 137315
loss: 1.0281975269317627,grad_norm: 0.9999990964744224, iteration: 137316
loss: 1.086577296257019,grad_norm: 0.9999990741981959, iteration: 137317
loss: 1.0147902965545654,grad_norm: 0.9999994396554209, iteration: 137318
loss: 1.0616223812103271,grad_norm: 0.9999994426769194, iteration: 137319
loss: 0.99019455909729,grad_norm: 0.8412702353481903, iteration: 137320
loss: 1.1537894010543823,grad_norm: 0.9999998708166045, iteration: 137321
loss: 1.0116711854934692,grad_norm: 0.9999991677975151, iteration: 137322
loss: 0.9804776906967163,grad_norm: 0.8802739448453145, iteration: 137323
loss: 0.9903034567832947,grad_norm: 0.9064756795652252, iteration: 137324
loss: 0.9874693751335144,grad_norm: 0.999999110953529, iteration: 137325
loss: 1.1375248432159424,grad_norm: 0.9999996415430517, iteration: 137326
loss: 1.0531258583068848,grad_norm: 0.9449478705919087, iteration: 137327
loss: 1.0034984350204468,grad_norm: 0.9949313521034809, iteration: 137328
loss: 0.9916028380393982,grad_norm: 0.8880699172869647, iteration: 137329
loss: 1.0098222494125366,grad_norm: 0.8555118895647504, iteration: 137330
loss: 0.9928725957870483,grad_norm: 0.9930815975844027, iteration: 137331
loss: 1.0307793617248535,grad_norm: 0.9644698948429686, iteration: 137332
loss: 1.0082414150238037,grad_norm: 0.9999992912284783, iteration: 137333
loss: 1.0976765155792236,grad_norm: 0.9999999487624546, iteration: 137334
loss: 0.9890462160110474,grad_norm: 0.8583913411560189, iteration: 137335
loss: 1.0845774412155151,grad_norm: 0.9366454035008148, iteration: 137336
loss: 1.1231752634048462,grad_norm: 0.9999996338974978, iteration: 137337
loss: 1.043336272239685,grad_norm: 0.8921556205623729, iteration: 137338
loss: 1.0017242431640625,grad_norm: 0.9999990218409697, iteration: 137339
loss: 1.0399552583694458,grad_norm: 0.9999995421804693, iteration: 137340
loss: 1.040071964263916,grad_norm: 0.9999993626934518, iteration: 137341
loss: 0.9908163547515869,grad_norm: 0.999999619809815, iteration: 137342
loss: 1.000914216041565,grad_norm: 0.9999992621951681, iteration: 137343
loss: 1.0142755508422852,grad_norm: 0.7703544906888016, iteration: 137344
loss: 1.0554115772247314,grad_norm: 0.9471308818750627, iteration: 137345
loss: 1.0365973711013794,grad_norm: 0.9999991687156177, iteration: 137346
loss: 1.0094021558761597,grad_norm: 0.8904995982228802, iteration: 137347
loss: 1.2448832988739014,grad_norm: 0.9999993164558565, iteration: 137348
loss: 1.00152587890625,grad_norm: 0.7194698530676538, iteration: 137349
loss: 1.0587453842163086,grad_norm: 0.9999998831223151, iteration: 137350
loss: 1.101319432258606,grad_norm: 0.999999420834773, iteration: 137351
loss: 1.0405700206756592,grad_norm: 0.9999994943041013, iteration: 137352
loss: 0.9615933299064636,grad_norm: 0.9717257157865455, iteration: 137353
loss: 1.0258498191833496,grad_norm: 0.9851250661068239, iteration: 137354
loss: 1.0692167282104492,grad_norm: 0.9999998530959421, iteration: 137355
loss: 1.0875507593154907,grad_norm: 0.9999993331101614, iteration: 137356
loss: 1.1397331953048706,grad_norm: 0.9999998852290395, iteration: 137357
loss: 1.1521865129470825,grad_norm: 0.9999992102980096, iteration: 137358
loss: 1.0796566009521484,grad_norm: 0.999999569897189, iteration: 137359
loss: 1.0053080320358276,grad_norm: 0.8778677153988739, iteration: 137360
loss: 1.2290902137756348,grad_norm: 0.9999995394964569, iteration: 137361
loss: 1.2421983480453491,grad_norm: 0.9999997187007296, iteration: 137362
loss: 1.0576504468917847,grad_norm: 0.7121169184813781, iteration: 137363
loss: 1.0453782081604004,grad_norm: 0.999999090074318, iteration: 137364
loss: 1.0822088718414307,grad_norm: 0.9999996993645711, iteration: 137365
loss: 1.093135118484497,grad_norm: 0.9999992402202675, iteration: 137366
loss: 1.0501450300216675,grad_norm: 0.9999991587553487, iteration: 137367
loss: 1.0297602415084839,grad_norm: 0.8539213052382032, iteration: 137368
loss: 1.0125365257263184,grad_norm: 0.6984711240998382, iteration: 137369
loss: 1.0286058187484741,grad_norm: 0.8800514975503276, iteration: 137370
loss: 1.0025625228881836,grad_norm: 0.99228281821138, iteration: 137371
loss: 0.9921671748161316,grad_norm: 0.8779113783600276, iteration: 137372
loss: 0.9851882457733154,grad_norm: 0.9999994748397587, iteration: 137373
loss: 1.0328344106674194,grad_norm: 0.999999225665395, iteration: 137374
loss: 1.05215585231781,grad_norm: 0.9999997118535761, iteration: 137375
loss: 1.005732774734497,grad_norm: 0.9594462169990412, iteration: 137376
loss: 1.0502955913543701,grad_norm: 0.9999993167657467, iteration: 137377
loss: 1.052552580833435,grad_norm: 0.999999834731261, iteration: 137378
loss: 1.1757454872131348,grad_norm: 0.9999998993607928, iteration: 137379
loss: 0.9887357950210571,grad_norm: 0.9009919302068394, iteration: 137380
loss: 1.0595128536224365,grad_norm: 0.9999995708543877, iteration: 137381
loss: 1.0551637411117554,grad_norm: 0.9999991021181409, iteration: 137382
loss: 1.0716123580932617,grad_norm: 0.999999076807324, iteration: 137383
loss: 1.0060126781463623,grad_norm: 0.8432872454070232, iteration: 137384
loss: 1.0315901041030884,grad_norm: 0.999999439641454, iteration: 137385
loss: 1.103918433189392,grad_norm: 0.9999999803823987, iteration: 137386
loss: 1.0405668020248413,grad_norm: 0.8759879235281686, iteration: 137387
loss: 0.9716859459877014,grad_norm: 0.8473794025268759, iteration: 137388
loss: 1.106619119644165,grad_norm: 0.9999999762886187, iteration: 137389
loss: 1.0402357578277588,grad_norm: 0.9999996795321157, iteration: 137390
loss: 0.9926562309265137,grad_norm: 0.9292830944880334, iteration: 137391
loss: 1.0607668161392212,grad_norm: 0.9999996845979274, iteration: 137392
loss: 1.0267829895019531,grad_norm: 0.9999993408971402, iteration: 137393
loss: 1.0334101915359497,grad_norm: 0.9999990990803107, iteration: 137394
loss: 0.9803900122642517,grad_norm: 0.848711613152077, iteration: 137395
loss: 0.9845446944236755,grad_norm: 0.8323735263568861, iteration: 137396
loss: 1.011201024055481,grad_norm: 0.8695719711609187, iteration: 137397
loss: 1.0894030332565308,grad_norm: 0.9999991911713916, iteration: 137398
loss: 1.020522117614746,grad_norm: 0.7954833035342428, iteration: 137399
loss: 1.0349035263061523,grad_norm: 0.9360330189430157, iteration: 137400
loss: 1.0503534078598022,grad_norm: 0.9999998574204011, iteration: 137401
loss: 1.028804063796997,grad_norm: 0.9935038158635399, iteration: 137402
loss: 0.9745703935623169,grad_norm: 0.8009373809713259, iteration: 137403
loss: 1.0165984630584717,grad_norm: 0.9999990525139254, iteration: 137404
loss: 1.0551658868789673,grad_norm: 0.8713485075555139, iteration: 137405
loss: 1.10176682472229,grad_norm: 0.9999996721442878, iteration: 137406
loss: 1.0531426668167114,grad_norm: 0.9999998979024732, iteration: 137407
loss: 1.0832310914993286,grad_norm: 0.9715183979294267, iteration: 137408
loss: 1.0037528276443481,grad_norm: 0.8405228955276578, iteration: 137409
loss: 1.0154109001159668,grad_norm: 0.9617346189674483, iteration: 137410
loss: 1.0246459245681763,grad_norm: 0.8138873489139307, iteration: 137411
loss: 1.0432597398757935,grad_norm: 0.9999998631561521, iteration: 137412
loss: 0.9779502153396606,grad_norm: 0.9999991469112837, iteration: 137413
loss: 0.985012948513031,grad_norm: 0.9611641198355239, iteration: 137414
loss: 1.060664176940918,grad_norm: 0.9999995922161502, iteration: 137415
loss: 1.074453592300415,grad_norm: 0.9999998857825103, iteration: 137416
loss: 1.0958611965179443,grad_norm: 1.0000000627812666, iteration: 137417
loss: 1.082808256149292,grad_norm: 0.9435811252984256, iteration: 137418
loss: 0.9919368028640747,grad_norm: 0.7778962516688784, iteration: 137419
loss: 0.9646863341331482,grad_norm: 0.9999990925176183, iteration: 137420
loss: 0.9990366101264954,grad_norm: 0.8586507916043853, iteration: 137421
loss: 1.0244425535202026,grad_norm: 0.9999991231350452, iteration: 137422
loss: 1.0218069553375244,grad_norm: 0.9999995540211265, iteration: 137423
loss: 0.9941008687019348,grad_norm: 0.8948859990505719, iteration: 137424
loss: 0.9912559986114502,grad_norm: 0.8720970318089671, iteration: 137425
loss: 1.0534743070602417,grad_norm: 0.7880139688992032, iteration: 137426
loss: 1.196310043334961,grad_norm: 0.9999993227442061, iteration: 137427
loss: 1.0267621278762817,grad_norm: 0.8177231863398738, iteration: 137428
loss: 1.0886763334274292,grad_norm: 0.9999991467969724, iteration: 137429
loss: 1.0177958011627197,grad_norm: 0.7889717900157686, iteration: 137430
loss: 1.1215955018997192,grad_norm: 0.9999998107920774, iteration: 137431
loss: 1.0930484533309937,grad_norm: 0.9999995037811101, iteration: 137432
loss: 0.9859058856964111,grad_norm: 0.913220225047913, iteration: 137433
loss: 1.0741276741027832,grad_norm: 0.9999990925217127, iteration: 137434
loss: 1.020890235900879,grad_norm: 0.9999993946300252, iteration: 137435
loss: 1.047358751296997,grad_norm: 0.9475527066656187, iteration: 137436
loss: 1.4178465604782104,grad_norm: 0.9999998985387725, iteration: 137437
loss: 0.9825042486190796,grad_norm: 0.8422353188816959, iteration: 137438
loss: 1.1060737371444702,grad_norm: 0.999999720025449, iteration: 137439
loss: 0.9899754524230957,grad_norm: 0.8677453272991276, iteration: 137440
loss: 0.9907616376876831,grad_norm: 0.830298419201547, iteration: 137441
loss: 1.0935934782028198,grad_norm: 0.9999994439281258, iteration: 137442
loss: 1.1091245412826538,grad_norm: 0.9999996004681175, iteration: 137443
loss: 1.0580168962478638,grad_norm: 0.9999999906975072, iteration: 137444
loss: 0.9913966655731201,grad_norm: 0.9581792445545865, iteration: 137445
loss: 1.0289982557296753,grad_norm: 0.9999997940590474, iteration: 137446
loss: 1.0902466773986816,grad_norm: 0.9999998227923115, iteration: 137447
loss: 1.0598423480987549,grad_norm: 0.9999991947590192, iteration: 137448
loss: 1.0712313652038574,grad_norm: 0.9999993553736654, iteration: 137449
loss: 1.0467060804367065,grad_norm: 0.9999997881533148, iteration: 137450
loss: 1.0371863842010498,grad_norm: 0.9001751309364571, iteration: 137451
loss: 1.0157221555709839,grad_norm: 0.9575331806378506, iteration: 137452
loss: 1.024509072303772,grad_norm: 0.8518495399858128, iteration: 137453
loss: 1.098760724067688,grad_norm: 0.9999996578529403, iteration: 137454
loss: 1.0555412769317627,grad_norm: 1.0000000842298817, iteration: 137455
loss: 1.0203664302825928,grad_norm: 0.7952253266418756, iteration: 137456
loss: 1.017896056175232,grad_norm: 0.9999990580134317, iteration: 137457
loss: 1.0226701498031616,grad_norm: 0.8801478975539296, iteration: 137458
loss: 0.9979572892189026,grad_norm: 0.744502090247936, iteration: 137459
loss: 1.0786502361297607,grad_norm: 0.9999994926023301, iteration: 137460
loss: 1.0204174518585205,grad_norm: 0.9366625959603465, iteration: 137461
loss: 1.0694371461868286,grad_norm: 0.9999991989445917, iteration: 137462
loss: 1.1619616746902466,grad_norm: 0.9999993543137896, iteration: 137463
loss: 1.017879843711853,grad_norm: 0.9999996179492767, iteration: 137464
loss: 1.1371220350265503,grad_norm: 0.9999997437440143, iteration: 137465
loss: 1.1332650184631348,grad_norm: 0.9999992600253464, iteration: 137466
loss: 1.0394624471664429,grad_norm: 0.9999992859542147, iteration: 137467
loss: 1.0263744592666626,grad_norm: 0.9556650619651849, iteration: 137468
loss: 1.0064727067947388,grad_norm: 0.9999992855725502, iteration: 137469
loss: 1.0364196300506592,grad_norm: 0.9999993659773535, iteration: 137470
loss: 0.9630468487739563,grad_norm: 0.9057074498816876, iteration: 137471
loss: 1.1086783409118652,grad_norm: 0.9999996293065158, iteration: 137472
loss: 1.1669021844863892,grad_norm: 0.9999996368328793, iteration: 137473
loss: 0.9678046703338623,grad_norm: 0.8900793260071925, iteration: 137474
loss: 1.2314796447753906,grad_norm: 0.9999999054564676, iteration: 137475
loss: 0.9744894504547119,grad_norm: 0.9999993593224148, iteration: 137476
loss: 1.0763120651245117,grad_norm: 0.9999993309112011, iteration: 137477
loss: 1.0238416194915771,grad_norm: 0.9999999646862013, iteration: 137478
loss: 1.381410837173462,grad_norm: 0.9999999752993548, iteration: 137479
loss: 1.0950086116790771,grad_norm: 0.9999994801023051, iteration: 137480
loss: 1.0632344484329224,grad_norm: 0.9999990296549894, iteration: 137481
loss: 1.0241628885269165,grad_norm: 0.9999991999299452, iteration: 137482
loss: 1.1657124757766724,grad_norm: 0.999999140940131, iteration: 137483
loss: 1.0650173425674438,grad_norm: 0.9999999385993066, iteration: 137484
loss: 1.0504791736602783,grad_norm: 0.9999995804668025, iteration: 137485
loss: 1.070884108543396,grad_norm: 0.9999992233238628, iteration: 137486
loss: 1.0059326887130737,grad_norm: 0.9999991633954405, iteration: 137487
loss: 0.999198317527771,grad_norm: 0.9999995106780061, iteration: 137488
loss: 0.9805701375007629,grad_norm: 0.9999992245141065, iteration: 137489
loss: 0.9899348616600037,grad_norm: 0.9765291969372117, iteration: 137490
loss: 0.9922240376472473,grad_norm: 0.8846179754413181, iteration: 137491
loss: 1.012548804283142,grad_norm: 0.757664372713943, iteration: 137492
loss: 1.0248469114303589,grad_norm: 0.9999996075412836, iteration: 137493
loss: 1.1115000247955322,grad_norm: 0.9999995440359072, iteration: 137494
loss: 1.0301272869110107,grad_norm: 0.9999991847317737, iteration: 137495
loss: 1.0227147340774536,grad_norm: 0.9999992743438436, iteration: 137496
loss: 1.0288289785385132,grad_norm: 0.9999996282034126, iteration: 137497
loss: 1.004194974899292,grad_norm: 0.6783393252674526, iteration: 137498
loss: 1.0347883701324463,grad_norm: 0.8868660565377827, iteration: 137499
loss: 1.2244833707809448,grad_norm: 0.9999999181837776, iteration: 137500
loss: 1.0653040409088135,grad_norm: 0.9999994388879897, iteration: 137501
loss: 1.0407646894454956,grad_norm: 0.899538876748353, iteration: 137502
loss: 1.0493249893188477,grad_norm: 0.9999996409787696, iteration: 137503
loss: 0.9466805458068848,grad_norm: 0.9300269700726032, iteration: 137504
loss: 1.0198261737823486,grad_norm: 0.9999998229086079, iteration: 137505
loss: 1.1967837810516357,grad_norm: 0.9999994884093684, iteration: 137506
loss: 1.0644636154174805,grad_norm: 0.9999991228278161, iteration: 137507
loss: 1.0936909914016724,grad_norm: 0.9999993531301938, iteration: 137508
loss: 1.0023245811462402,grad_norm: 0.9999993764788592, iteration: 137509
loss: 1.0047366619110107,grad_norm: 0.9367014997643475, iteration: 137510
loss: 1.0796643495559692,grad_norm: 0.999999270887323, iteration: 137511
loss: 1.0063940286636353,grad_norm: 0.9422752767856096, iteration: 137512
loss: 1.0617979764938354,grad_norm: 0.9999993428840825, iteration: 137513
loss: 1.055999755859375,grad_norm: 0.9999992114699254, iteration: 137514
loss: 1.0709278583526611,grad_norm: 0.9999999585825954, iteration: 137515
loss: 1.1526495218276978,grad_norm: 0.9999997402804248, iteration: 137516
loss: 1.05905282497406,grad_norm: 0.99999958323717, iteration: 137517
loss: 1.0412646532058716,grad_norm: 0.9999995722981785, iteration: 137518
loss: 1.0393705368041992,grad_norm: 0.9999998507927603, iteration: 137519
loss: 1.017468810081482,grad_norm: 0.9999999829496722, iteration: 137520
loss: 1.504754662513733,grad_norm: 0.9999999560181172, iteration: 137521
loss: 1.0965498685836792,grad_norm: 0.9999998918564816, iteration: 137522
loss: 1.0098543167114258,grad_norm: 0.9999992849457844, iteration: 137523
loss: 1.1255396604537964,grad_norm: 0.9999999672359664, iteration: 137524
loss: 0.9793365597724915,grad_norm: 1.000000124596371, iteration: 137525
loss: 1.0899977684020996,grad_norm: 0.9999998224802565, iteration: 137526
loss: 1.034205436706543,grad_norm: 0.9536184735070125, iteration: 137527
loss: 1.0417293310165405,grad_norm: 0.8815415143195431, iteration: 137528
loss: 1.0357798337936401,grad_norm: 0.8660972906576788, iteration: 137529
loss: 1.0071749687194824,grad_norm: 0.9999990004382755, iteration: 137530
loss: 0.9674750566482544,grad_norm: 0.7758693334625618, iteration: 137531
loss: 0.9982900023460388,grad_norm: 0.9243157266293472, iteration: 137532
loss: 1.0617727041244507,grad_norm: 0.9999993399542799, iteration: 137533
loss: 1.0183826684951782,grad_norm: 0.999998976041053, iteration: 137534
loss: 0.9782035946846008,grad_norm: 0.9999991304740612, iteration: 137535
loss: 1.1524335145950317,grad_norm: 0.999999888000226, iteration: 137536
loss: 0.9985476732254028,grad_norm: 0.8882176625863326, iteration: 137537
loss: 1.0659995079040527,grad_norm: 0.999999918154554, iteration: 137538
loss: 0.999744713306427,grad_norm: 0.9999992918634265, iteration: 137539
loss: 1.1420230865478516,grad_norm: 0.999999952940712, iteration: 137540
loss: 1.0019683837890625,grad_norm: 0.9268280802365017, iteration: 137541
loss: 1.0906952619552612,grad_norm: 0.9999994038802879, iteration: 137542
loss: 0.9859955906867981,grad_norm: 0.999999664264768, iteration: 137543
loss: 1.1212971210479736,grad_norm: 0.9999993840976132, iteration: 137544
loss: 1.0958871841430664,grad_norm: 0.9999998074227718, iteration: 137545
loss: 1.1378085613250732,grad_norm: 0.9999991678152291, iteration: 137546
loss: 1.2150700092315674,grad_norm: 0.9999996967858729, iteration: 137547
loss: 1.0524616241455078,grad_norm: 0.9999992812215517, iteration: 137548
loss: 1.2020164728164673,grad_norm: 0.9999994899524756, iteration: 137549
loss: 1.0110760927200317,grad_norm: 0.8335801804109586, iteration: 137550
loss: 1.0621153116226196,grad_norm: 0.999999442060126, iteration: 137551
loss: 1.1580356359481812,grad_norm: 0.9999993639605171, iteration: 137552
loss: 1.0591320991516113,grad_norm: 0.9999994922876017, iteration: 137553
loss: 1.042963981628418,grad_norm: 0.8417622712049895, iteration: 137554
loss: 1.0309784412384033,grad_norm: 0.9241613282559793, iteration: 137555
loss: 1.0527364015579224,grad_norm: 0.9233577948985133, iteration: 137556
loss: 0.9936465620994568,grad_norm: 0.999999643466538, iteration: 137557
loss: 1.0300425291061401,grad_norm: 0.9999998472046066, iteration: 137558
loss: 1.0215868949890137,grad_norm: 0.9999994534639767, iteration: 137559
loss: 0.9712881445884705,grad_norm: 0.9999990364357959, iteration: 137560
loss: 1.0191200971603394,grad_norm: 0.9999992194932713, iteration: 137561
loss: 1.1006267070770264,grad_norm: 0.9999998395032016, iteration: 137562
loss: 0.9612711668014526,grad_norm: 0.9999990988785911, iteration: 137563
loss: 1.0824270248413086,grad_norm: 0.9999996910682192, iteration: 137564
loss: 0.9856014251708984,grad_norm: 0.9999989639023776, iteration: 137565
loss: 1.0978361368179321,grad_norm: 0.9999993108497316, iteration: 137566
loss: 1.047102451324463,grad_norm: 0.9727775181077054, iteration: 137567
loss: 1.1726181507110596,grad_norm: 0.9999996486989746, iteration: 137568
loss: 1.0238056182861328,grad_norm: 0.9999998589934629, iteration: 137569
loss: 1.170049786567688,grad_norm: 0.9999999079234658, iteration: 137570
loss: 1.0419251918792725,grad_norm: 0.8966647182484365, iteration: 137571
loss: 1.0125175714492798,grad_norm: 0.925725222910961, iteration: 137572
loss: 1.0273362398147583,grad_norm: 0.999999187444693, iteration: 137573
loss: 1.0025584697723389,grad_norm: 0.9935849185104582, iteration: 137574
loss: 1.0296560525894165,grad_norm: 0.9999998390504425, iteration: 137575
loss: 1.0179443359375,grad_norm: 0.9999992517464352, iteration: 137576
loss: 1.2126396894454956,grad_norm: 0.9999999864747092, iteration: 137577
loss: 1.0444908142089844,grad_norm: 0.8568324294145527, iteration: 137578
loss: 1.0070098638534546,grad_norm: 0.7911193113308055, iteration: 137579
loss: 1.0458714962005615,grad_norm: 0.9999991761567194, iteration: 137580
loss: 0.9932452440261841,grad_norm: 0.9692534429769869, iteration: 137581
loss: 1.032379150390625,grad_norm: 0.9999997222913488, iteration: 137582
loss: 1.0209752321243286,grad_norm: 0.9999998961073149, iteration: 137583
loss: 1.1890065670013428,grad_norm: 0.999999472053922, iteration: 137584
loss: 1.037833333015442,grad_norm: 0.9999995410744137, iteration: 137585
loss: 1.0162670612335205,grad_norm: 0.9001628283438058, iteration: 137586
loss: 1.06281316280365,grad_norm: 0.9502202030357825, iteration: 137587
loss: 1.0227341651916504,grad_norm: 0.8872979310390523, iteration: 137588
loss: 1.0898500680923462,grad_norm: 0.9999994839188612, iteration: 137589
loss: 1.0156497955322266,grad_norm: 0.9999992539841907, iteration: 137590
loss: 1.0495851039886475,grad_norm: 0.9527713788439655, iteration: 137591
loss: 1.0094753503799438,grad_norm: 0.9999994761209438, iteration: 137592
loss: 1.088119387626648,grad_norm: 0.9999995917319181, iteration: 137593
loss: 1.096890926361084,grad_norm: 0.9999997367549674, iteration: 137594
loss: 0.9856991171836853,grad_norm: 0.9693607375117097, iteration: 137595
loss: 1.1430288553237915,grad_norm: 0.9999993078968781, iteration: 137596
loss: 1.053200125694275,grad_norm: 0.9999992462756713, iteration: 137597
loss: 1.06329345703125,grad_norm: 0.9999996053905744, iteration: 137598
loss: 1.0340393781661987,grad_norm: 0.9253164495912852, iteration: 137599
loss: 1.0312505960464478,grad_norm: 0.9999994313837912, iteration: 137600
loss: 0.9943662881851196,grad_norm: 0.9699672466459434, iteration: 137601
loss: 0.9527787566184998,grad_norm: 0.9999995977811976, iteration: 137602
loss: 1.0840541124343872,grad_norm: 0.9999996355757609, iteration: 137603
loss: 1.0844476222991943,grad_norm: 0.9983538475563339, iteration: 137604
loss: 0.9942958354949951,grad_norm: 0.8960332224768046, iteration: 137605
loss: 0.9954707026481628,grad_norm: 0.7935330112237419, iteration: 137606
loss: 1.0459767580032349,grad_norm: 0.999999066829237, iteration: 137607
loss: 1.0333677530288696,grad_norm: 0.9999993566319075, iteration: 137608
loss: 1.0797955989837646,grad_norm: 0.9999992851154794, iteration: 137609
loss: 1.0174235105514526,grad_norm: 0.9584642087309259, iteration: 137610
loss: 1.017608404159546,grad_norm: 0.7805881357685116, iteration: 137611
loss: 0.9721331000328064,grad_norm: 0.999999067863643, iteration: 137612
loss: 0.9902424812316895,grad_norm: 0.9853142929063237, iteration: 137613
loss: 1.0004607439041138,grad_norm: 0.8024877497562793, iteration: 137614
loss: 1.0242763757705688,grad_norm: 0.9278085193794108, iteration: 137615
loss: 0.9816707372665405,grad_norm: 0.9424259261115249, iteration: 137616
loss: 1.0576629638671875,grad_norm: 0.9999992725263102, iteration: 137617
loss: 0.9674676656723022,grad_norm: 0.9111765357737512, iteration: 137618
loss: 1.074780821800232,grad_norm: 0.9999992178018572, iteration: 137619
loss: 1.0315009355545044,grad_norm: 0.9085507332612118, iteration: 137620
loss: 0.9954851269721985,grad_norm: 0.9999994705502989, iteration: 137621
loss: 1.0188276767730713,grad_norm: 0.999998996300695, iteration: 137622
loss: 0.9781842827796936,grad_norm: 0.9999995041475614, iteration: 137623
loss: 0.9798721075057983,grad_norm: 0.9101215635138425, iteration: 137624
loss: 1.080490231513977,grad_norm: 0.9999996915553834, iteration: 137625
loss: 1.0075052976608276,grad_norm: 0.9235996151399376, iteration: 137626
loss: 1.150532841682434,grad_norm: 0.9999997763105434, iteration: 137627
loss: 1.3474352359771729,grad_norm: 0.9999999494557801, iteration: 137628
loss: 0.9985666275024414,grad_norm: 0.9258285682339832, iteration: 137629
loss: 1.011940598487854,grad_norm: 0.9999989837474661, iteration: 137630
loss: 0.9744904041290283,grad_norm: 0.8938873486555355, iteration: 137631
loss: 1.0892183780670166,grad_norm: 0.9999992313558107, iteration: 137632
loss: 0.9779241681098938,grad_norm: 0.7794000243274933, iteration: 137633
loss: 1.162111759185791,grad_norm: 0.9999995253656955, iteration: 137634
loss: 1.0733293294906616,grad_norm: 0.9999999564009157, iteration: 137635
loss: 1.1183104515075684,grad_norm: 0.9999998242276571, iteration: 137636
loss: 1.0326261520385742,grad_norm: 0.9999997590129879, iteration: 137637
loss: 0.9899978637695312,grad_norm: 0.968064439693314, iteration: 137638
loss: 0.9756638407707214,grad_norm: 0.8341928538434962, iteration: 137639
loss: 1.0050239562988281,grad_norm: 0.9999994707894744, iteration: 137640
loss: 0.9823529720306396,grad_norm: 0.999999111785269, iteration: 137641
loss: 0.9547505378723145,grad_norm: 0.8223511726752205, iteration: 137642
loss: 1.0028319358825684,grad_norm: 0.9999990347386762, iteration: 137643
loss: 1.037160038948059,grad_norm: 0.9455190568608183, iteration: 137644
loss: 1.0860884189605713,grad_norm: 0.9999998349441425, iteration: 137645
loss: 0.9732476472854614,grad_norm: 0.9981044418386139, iteration: 137646
loss: 1.0723683834075928,grad_norm: 0.9999991102497235, iteration: 137647
loss: 0.98160320520401,grad_norm: 0.9928549349664145, iteration: 137648
loss: 0.9842668771743774,grad_norm: 0.971683304711141, iteration: 137649
loss: 0.9532752633094788,grad_norm: 0.9999990243458486, iteration: 137650
loss: 1.0286483764648438,grad_norm: 0.8597103757256239, iteration: 137651
loss: 0.9323055744171143,grad_norm: 0.9479961240764713, iteration: 137652
loss: 1.0293394327163696,grad_norm: 0.8967228708686059, iteration: 137653
loss: 1.0046710968017578,grad_norm: 0.9118656212136452, iteration: 137654
loss: 1.049836277961731,grad_norm: 0.9999990576922551, iteration: 137655
loss: 1.0818933248519897,grad_norm: 0.9999996090087188, iteration: 137656
loss: 1.063552975654602,grad_norm: 0.999999330863253, iteration: 137657
loss: 1.0351139307022095,grad_norm: 0.9999993846054611, iteration: 137658
loss: 0.9753345251083374,grad_norm: 0.8726066853784825, iteration: 137659
loss: 0.9880601763725281,grad_norm: 0.8538740427924789, iteration: 137660
loss: 1.0799367427825928,grad_norm: 0.9999993348800446, iteration: 137661
loss: 1.0051144361495972,grad_norm: 0.9999989807880953, iteration: 137662
loss: 1.081757664680481,grad_norm: 0.999999169980915, iteration: 137663
loss: 1.0121703147888184,grad_norm: 0.9509892652523534, iteration: 137664
loss: 1.0011345148086548,grad_norm: 0.9628450236014529, iteration: 137665
loss: 1.0014450550079346,grad_norm: 0.8985876573257815, iteration: 137666
loss: 0.9917421936988831,grad_norm: 0.856812877039678, iteration: 137667
loss: 1.025779128074646,grad_norm: 0.9999996120337744, iteration: 137668
loss: 1.0523232221603394,grad_norm: 0.9999995769452872, iteration: 137669
loss: 0.992986261844635,grad_norm: 0.8442884919241954, iteration: 137670
loss: 1.0381476879119873,grad_norm: 0.8381875680254878, iteration: 137671
loss: 1.0370570421218872,grad_norm: 0.9999998400869154, iteration: 137672
loss: 1.0039379596710205,grad_norm: 0.7555582404072586, iteration: 137673
loss: 0.9697880148887634,grad_norm: 0.9249634012454416, iteration: 137674
loss: 1.105502724647522,grad_norm: 0.9999998683006203, iteration: 137675
loss: 1.0245780944824219,grad_norm: 0.79997700899684, iteration: 137676
loss: 1.054294228553772,grad_norm: 0.9999991421913255, iteration: 137677
loss: 0.9939324259757996,grad_norm: 0.8633458139715057, iteration: 137678
loss: 1.2184489965438843,grad_norm: 0.9999994588240754, iteration: 137679
loss: 1.3312097787857056,grad_norm: 0.9999994394029852, iteration: 137680
loss: 0.9884609580039978,grad_norm: 0.9226323539710504, iteration: 137681
loss: 1.0883735418319702,grad_norm: 0.9999998928187779, iteration: 137682
loss: 1.0874470472335815,grad_norm: 0.9999993720979841, iteration: 137683
loss: 0.9559007287025452,grad_norm: 0.952225797660921, iteration: 137684
loss: 1.1348278522491455,grad_norm: 0.9999998759738717, iteration: 137685
loss: 1.0232020616531372,grad_norm: 0.9999992444162992, iteration: 137686
loss: 1.1178396940231323,grad_norm: 0.9999999653461604, iteration: 137687
loss: 1.0138298273086548,grad_norm: 0.999999133366294, iteration: 137688
loss: 1.0138578414916992,grad_norm: 0.9304302431459415, iteration: 137689
loss: 1.064098596572876,grad_norm: 0.9899844220688119, iteration: 137690
loss: 1.0427659749984741,grad_norm: 0.9999995798360066, iteration: 137691
loss: 1.015226125717163,grad_norm: 0.9939879630952165, iteration: 137692
loss: 1.0304620265960693,grad_norm: 0.9407545545605807, iteration: 137693
loss: 1.0254250764846802,grad_norm: 0.9999994974944831, iteration: 137694
loss: 0.9997072815895081,grad_norm: 0.9999990994840139, iteration: 137695
loss: 1.037235140800476,grad_norm: 0.9999996585673774, iteration: 137696
loss: 0.9790841937065125,grad_norm: 0.9426015206638576, iteration: 137697
loss: 1.0237245559692383,grad_norm: 0.9999991697218109, iteration: 137698
loss: 1.0810176134109497,grad_norm: 0.9999992643059019, iteration: 137699
loss: 1.0202858448028564,grad_norm: 0.999872902898766, iteration: 137700
loss: 1.0351605415344238,grad_norm: 0.9999999190614589, iteration: 137701
loss: 1.0079185962677002,grad_norm: 0.9999994373959538, iteration: 137702
loss: 1.1219145059585571,grad_norm: 0.9999992850601751, iteration: 137703
loss: 0.9981501698493958,grad_norm: 0.9999994268930159, iteration: 137704
loss: 1.028214931488037,grad_norm: 0.9999992127613239, iteration: 137705
loss: 1.0701441764831543,grad_norm: 0.9999995212126758, iteration: 137706
loss: 1.2114218473434448,grad_norm: 0.9999991385278474, iteration: 137707
loss: 1.1448770761489868,grad_norm: 0.9999994041827985, iteration: 137708
loss: 1.0278551578521729,grad_norm: 0.9999997166283248, iteration: 137709
loss: 1.094141960144043,grad_norm: 0.8880955144562714, iteration: 137710
loss: 0.9708489775657654,grad_norm: 0.9438856312666319, iteration: 137711
loss: 1.2083954811096191,grad_norm: 1.0000000452431783, iteration: 137712
loss: 1.0528674125671387,grad_norm: 0.9999996157371928, iteration: 137713
loss: 1.022432565689087,grad_norm: 0.7887168797660621, iteration: 137714
loss: 1.0615954399108887,grad_norm: 0.904641425113636, iteration: 137715
loss: 1.0041892528533936,grad_norm: 0.999998969145522, iteration: 137716
loss: 1.0178773403167725,grad_norm: 0.9999994637276296, iteration: 137717
loss: 1.0458300113677979,grad_norm: 0.9999993944194534, iteration: 137718
loss: 1.0398350954055786,grad_norm: 0.999999070401891, iteration: 137719
loss: 1.0242925882339478,grad_norm: 0.9385637153130268, iteration: 137720
loss: 0.980950117111206,grad_norm: 0.890751621679825, iteration: 137721
loss: 1.0416691303253174,grad_norm: 0.9999994753094786, iteration: 137722
loss: 1.0471842288970947,grad_norm: 0.9999998346400492, iteration: 137723
loss: 1.1732192039489746,grad_norm: 0.9999997567627403, iteration: 137724
loss: 1.0740959644317627,grad_norm: 0.9999995111512143, iteration: 137725
loss: 1.1961649656295776,grad_norm: 0.9999997194838949, iteration: 137726
loss: 1.0408413410186768,grad_norm: 0.9999998326495179, iteration: 137727
loss: 1.018797516822815,grad_norm: 0.999999836022887, iteration: 137728
loss: 1.092350721359253,grad_norm: 0.9999996038175643, iteration: 137729
loss: 1.0190273523330688,grad_norm: 0.9999996152898332, iteration: 137730
loss: 1.1463580131530762,grad_norm: 0.9999996288517856, iteration: 137731
loss: 1.0104444026947021,grad_norm: 0.9999991466769186, iteration: 137732
loss: 1.0699458122253418,grad_norm: 0.9999996219077614, iteration: 137733
loss: 1.1857891082763672,grad_norm: 0.9999995482309175, iteration: 137734
loss: 1.0441035032272339,grad_norm: 0.9999993376746947, iteration: 137735
loss: 1.1380329132080078,grad_norm: 0.9999994105241304, iteration: 137736
loss: 0.9930235147476196,grad_norm: 0.7470392974101523, iteration: 137737
loss: 1.0683791637420654,grad_norm: 1.000000038239266, iteration: 137738
loss: 1.2293469905853271,grad_norm: 0.9999992948581246, iteration: 137739
loss: 0.9533382058143616,grad_norm: 0.9999997419198613, iteration: 137740
loss: 1.0075219869613647,grad_norm: 0.9999991702014276, iteration: 137741
loss: 0.9928120374679565,grad_norm: 0.8543249339856437, iteration: 137742
loss: 1.0074870586395264,grad_norm: 0.8513920636542032, iteration: 137743
loss: 1.0612293481826782,grad_norm: 0.9999996987746403, iteration: 137744
loss: 1.0078023672103882,grad_norm: 0.9999990536578048, iteration: 137745
loss: 1.0400278568267822,grad_norm: 0.9999994400498551, iteration: 137746
loss: 0.9906603097915649,grad_norm: 0.8772958665539006, iteration: 137747
loss: 1.114283561706543,grad_norm: 0.9999998970506052, iteration: 137748
loss: 1.0169299840927124,grad_norm: 0.9999994457779503, iteration: 137749
loss: 1.0838706493377686,grad_norm: 0.9999998426427954, iteration: 137750
loss: 1.0872726440429688,grad_norm: 0.9999998275134833, iteration: 137751
loss: 1.0107436180114746,grad_norm: 0.7773882526005432, iteration: 137752
loss: 1.047196865081787,grad_norm: 0.9999996652961884, iteration: 137753
loss: 1.0191212892532349,grad_norm: 0.9999990695989914, iteration: 137754
loss: 1.1117146015167236,grad_norm: 0.9999992982240075, iteration: 137755
loss: 1.0059833526611328,grad_norm: 0.9999994164454918, iteration: 137756
loss: 1.0002676248550415,grad_norm: 0.8794496286958544, iteration: 137757
loss: 1.1270686388015747,grad_norm: 0.9999996496561402, iteration: 137758
loss: 1.0841844081878662,grad_norm: 0.9999998502850247, iteration: 137759
loss: 0.9952741861343384,grad_norm: 0.9481521289475617, iteration: 137760
loss: 1.110805869102478,grad_norm: 0.99999997187134, iteration: 137761
loss: 0.987932562828064,grad_norm: 0.9999990027825247, iteration: 137762
loss: 1.026227355003357,grad_norm: 0.8483631032200687, iteration: 137763
loss: 1.2204993963241577,grad_norm: 0.9999999239850834, iteration: 137764
loss: 1.151544213294983,grad_norm: 0.9999996874774607, iteration: 137765
loss: 1.128619909286499,grad_norm: 0.9999999603050603, iteration: 137766
loss: 1.0443204641342163,grad_norm: 0.9999992626197521, iteration: 137767
loss: 1.1956723928451538,grad_norm: 0.9999995382635218, iteration: 137768
loss: 1.0115609169006348,grad_norm: 0.9705448088152849, iteration: 137769
loss: 1.0832933187484741,grad_norm: 1.0000000258400168, iteration: 137770
loss: 1.1786143779754639,grad_norm: 0.999999401325339, iteration: 137771
loss: 1.044380784034729,grad_norm: 0.999999593506686, iteration: 137772
loss: 1.0435587167739868,grad_norm: 0.9999993859660288, iteration: 137773
loss: 1.07645845413208,grad_norm: 0.9999993699206365, iteration: 137774
loss: 1.0255695581436157,grad_norm: 0.8410858968601752, iteration: 137775
loss: 0.9882599711418152,grad_norm: 0.9651116556226574, iteration: 137776
loss: 0.9698445200920105,grad_norm: 0.8963755036532869, iteration: 137777
loss: 1.2098615169525146,grad_norm: 1.0000000256583124, iteration: 137778
loss: 1.003565788269043,grad_norm: 0.9553329260838883, iteration: 137779
loss: 1.0177524089813232,grad_norm: 0.9999990896300266, iteration: 137780
loss: 1.0119751691818237,grad_norm: 0.9999998988188151, iteration: 137781
loss: 1.1074684858322144,grad_norm: 0.9999997175445228, iteration: 137782
loss: 1.2917048931121826,grad_norm: 0.9999999938128702, iteration: 137783
loss: 1.0302716493606567,grad_norm: 0.7879999406676159, iteration: 137784
loss: 1.0894767045974731,grad_norm: 0.9999992106420491, iteration: 137785
loss: 1.0200880765914917,grad_norm: 0.999999131088478, iteration: 137786
loss: 1.0413891077041626,grad_norm: 0.999999401268433, iteration: 137787
loss: 1.0671658515930176,grad_norm: 0.9999993194055637, iteration: 137788
loss: 1.010054588317871,grad_norm: 0.9999994054714642, iteration: 137789
loss: 1.0163733959197998,grad_norm: 0.9798628776805193, iteration: 137790
loss: 1.171463131904602,grad_norm: 0.9999994479825867, iteration: 137791
loss: 1.0568628311157227,grad_norm: 0.9999992856275326, iteration: 137792
loss: 1.013350248336792,grad_norm: 0.9999997103531186, iteration: 137793
loss: 1.261025071144104,grad_norm: 0.9999997599450714, iteration: 137794
loss: 1.1064879894256592,grad_norm: 0.9999996791284664, iteration: 137795
loss: 0.9998267292976379,grad_norm: 0.9782898526435053, iteration: 137796
loss: 1.06522536277771,grad_norm: 0.9999994080792942, iteration: 137797
loss: 1.2515863180160522,grad_norm: 0.9999996452778025, iteration: 137798
loss: 1.08890962600708,grad_norm: 0.9999998299653176, iteration: 137799
loss: 1.1174670457839966,grad_norm: 0.999999292692605, iteration: 137800
loss: 1.086355447769165,grad_norm: 0.9999999045244973, iteration: 137801
loss: 0.9810004234313965,grad_norm: 0.8054935829235235, iteration: 137802
loss: 1.0744750499725342,grad_norm: 0.9999992101228871, iteration: 137803
loss: 1.0732232332229614,grad_norm: 0.9999995529348045, iteration: 137804
loss: 1.1184653043746948,grad_norm: 0.9999997184438194, iteration: 137805
loss: 1.0477783679962158,grad_norm: 0.9999995322692867, iteration: 137806
loss: 1.0639209747314453,grad_norm: 0.9999995540110695, iteration: 137807
loss: 0.9958171844482422,grad_norm: 0.9999991575635419, iteration: 137808
loss: 1.0137860774993896,grad_norm: 0.9605630555578373, iteration: 137809
loss: 1.0724462270736694,grad_norm: 0.9999994592324091, iteration: 137810
loss: 1.0195790529251099,grad_norm: 0.9999994324909226, iteration: 137811
loss: 1.3514609336853027,grad_norm: 0.9999999715900626, iteration: 137812
loss: 1.0097861289978027,grad_norm: 0.9999994560265109, iteration: 137813
loss: 1.047540545463562,grad_norm: 0.9999999589948635, iteration: 137814
loss: 1.069486379623413,grad_norm: 0.9999991834212123, iteration: 137815
loss: 1.0635175704956055,grad_norm: 0.9999995688580183, iteration: 137816
loss: 1.1296074390411377,grad_norm: 0.999999968075717, iteration: 137817
loss: 1.0483955144882202,grad_norm: 0.9999998005565147, iteration: 137818
loss: 1.208715558052063,grad_norm: 0.9999999575987314, iteration: 137819
loss: 1.0387746095657349,grad_norm: 0.9999994692603839, iteration: 137820
loss: 1.001753330230713,grad_norm: 1.0000000288065245, iteration: 137821
loss: 1.0994480848312378,grad_norm: 0.9999998378125959, iteration: 137822
loss: 1.0591598749160767,grad_norm: 0.9999996949080734, iteration: 137823
loss: 1.105541467666626,grad_norm: 0.9999993927752381, iteration: 137824
loss: 1.0536850690841675,grad_norm: 0.9999991414995566, iteration: 137825
loss: 1.0521965026855469,grad_norm: 0.9999996021227414, iteration: 137826
loss: 1.0279144048690796,grad_norm: 0.9999990647922451, iteration: 137827
loss: 1.0113424062728882,grad_norm: 0.9999995544923176, iteration: 137828
loss: 1.0968948602676392,grad_norm: 0.9999992572315184, iteration: 137829
loss: 1.138487696647644,grad_norm: 0.9999999442724046, iteration: 137830
loss: 1.1086217164993286,grad_norm: 0.9999998514440428, iteration: 137831
loss: 1.0812386274337769,grad_norm: 0.9999989379574074, iteration: 137832
loss: 1.0362789630889893,grad_norm: 0.9999992100552451, iteration: 137833
loss: 1.0050095319747925,grad_norm: 0.9023330236119774, iteration: 137834
loss: 1.013443946838379,grad_norm: 0.9612715337287296, iteration: 137835
loss: 1.1586703062057495,grad_norm: 0.9999995699728451, iteration: 137836
loss: 1.1429955959320068,grad_norm: 0.9999991529136271, iteration: 137837
loss: 1.1248077154159546,grad_norm: 0.9999991833913243, iteration: 137838
loss: 1.0199187994003296,grad_norm: 0.999999503262805, iteration: 137839
loss: 1.2226039171218872,grad_norm: 0.999999789192309, iteration: 137840
loss: 1.0738728046417236,grad_norm: 0.9999993430502524, iteration: 137841
loss: 1.0686837434768677,grad_norm: 0.9999992206379569, iteration: 137842
loss: 1.040257215499878,grad_norm: 0.8397705841573851, iteration: 137843
loss: 1.0452467203140259,grad_norm: 0.9999996264499863, iteration: 137844
loss: 1.057288408279419,grad_norm: 0.9999993350907265, iteration: 137845
loss: 1.0639805793762207,grad_norm: 0.9999992333682205, iteration: 137846
loss: 1.1312633752822876,grad_norm: 0.9999995291477733, iteration: 137847
loss: 1.0095843076705933,grad_norm: 0.9999990794751518, iteration: 137848
loss: 1.1200727224349976,grad_norm: 0.9999995775605551, iteration: 137849
loss: 1.0711939334869385,grad_norm: 0.9999994117281618, iteration: 137850
loss: 1.0836374759674072,grad_norm: 0.9999998301703105, iteration: 137851
loss: 0.967437744140625,grad_norm: 0.8329973936099048, iteration: 137852
loss: 1.0430699586868286,grad_norm: 0.9999996982203933, iteration: 137853
loss: 1.0100359916687012,grad_norm: 0.9999997982215725, iteration: 137854
loss: 1.011661410331726,grad_norm: 0.9103993325774312, iteration: 137855
loss: 0.9743462800979614,grad_norm: 0.8956829385300297, iteration: 137856
loss: 1.1824445724487305,grad_norm: 1.0000000026674645, iteration: 137857
loss: 1.0579084157943726,grad_norm: 0.9999992392828819, iteration: 137858
loss: 1.0118792057037354,grad_norm: 0.9999991509834277, iteration: 137859
loss: 1.0490914583206177,grad_norm: 0.999999951877456, iteration: 137860
loss: 1.059573769569397,grad_norm: 0.9999991513675197, iteration: 137861
loss: 1.1109678745269775,grad_norm: 0.9999996994180925, iteration: 137862
loss: 1.1289405822753906,grad_norm: 0.9999993264469399, iteration: 137863
loss: 1.1101776361465454,grad_norm: 0.9999995168073674, iteration: 137864
loss: 1.1510225534439087,grad_norm: 0.9999996743898032, iteration: 137865
loss: 1.024748682975769,grad_norm: 0.9999999152636215, iteration: 137866
loss: 1.0193698406219482,grad_norm: 0.9248447948190086, iteration: 137867
loss: 1.0876015424728394,grad_norm: 0.9999993290647412, iteration: 137868
loss: 1.0089002847671509,grad_norm: 0.9148969840518488, iteration: 137869
loss: 1.1143860816955566,grad_norm: 0.9999992088091805, iteration: 137870
loss: 0.9948832988739014,grad_norm: 0.9999991745272171, iteration: 137871
loss: 1.0360870361328125,grad_norm: 0.9999995677122592, iteration: 137872
loss: 1.0245888233184814,grad_norm: 0.7826308882295114, iteration: 137873
loss: 0.9748175144195557,grad_norm: 0.9327368238487829, iteration: 137874
loss: 0.9941038489341736,grad_norm: 0.9999991955931418, iteration: 137875
loss: 0.9949550032615662,grad_norm: 0.9671107009805189, iteration: 137876
loss: 1.056915521621704,grad_norm: 0.9999993926745304, iteration: 137877
loss: 1.0111243724822998,grad_norm: 0.8594681387561465, iteration: 137878
loss: 1.1288363933563232,grad_norm: 0.9999999461899973, iteration: 137879
loss: 0.9961310029029846,grad_norm: 0.9982240255635625, iteration: 137880
loss: 1.0010757446289062,grad_norm: 0.9125426107946274, iteration: 137881
loss: 1.101702332496643,grad_norm: 0.9999997356322494, iteration: 137882
loss: 0.9864745736122131,grad_norm: 0.9999991389241704, iteration: 137883
loss: 1.096842646598816,grad_norm: 0.9999999312684555, iteration: 137884
loss: 1.0762516260147095,grad_norm: 0.9999996594274868, iteration: 137885
loss: 1.0216584205627441,grad_norm: 0.814175658183774, iteration: 137886
loss: 1.064507246017456,grad_norm: 0.9999998783891552, iteration: 137887
loss: 1.0308876037597656,grad_norm: 0.9999990256176224, iteration: 137888
loss: 1.0642296075820923,grad_norm: 0.9635437584334644, iteration: 137889
loss: 1.0443530082702637,grad_norm: 0.9999990866371115, iteration: 137890
loss: 1.0699902772903442,grad_norm: 0.9999995131672601, iteration: 137891
loss: 1.0081727504730225,grad_norm: 0.9062858110917633, iteration: 137892
loss: 1.1079511642456055,grad_norm: 0.9999999159302834, iteration: 137893
loss: 1.0629878044128418,grad_norm: 0.9999992603180522, iteration: 137894
loss: 0.9645327925682068,grad_norm: 0.999999088845199, iteration: 137895
loss: 1.0192426443099976,grad_norm: 0.9975488544982374, iteration: 137896
loss: 1.053897738456726,grad_norm: 0.9999993996231654, iteration: 137897
loss: 1.0861314535140991,grad_norm: 0.9999989874247186, iteration: 137898
loss: 1.0565228462219238,grad_norm: 0.9999998579200015, iteration: 137899
loss: 1.0508675575256348,grad_norm: 0.9999990933906395, iteration: 137900
loss: 1.00409996509552,grad_norm: 0.873824178827562, iteration: 137901
loss: 1.130105972290039,grad_norm: 0.9999998781468836, iteration: 137902
loss: 1.2341889142990112,grad_norm: 0.9999999220813399, iteration: 137903
loss: 0.9768427014350891,grad_norm: 0.9838390958417241, iteration: 137904
loss: 1.0118908882141113,grad_norm: 0.9999991047265573, iteration: 137905
loss: 1.0055171251296997,grad_norm: 0.9999991989580485, iteration: 137906
loss: 1.082473635673523,grad_norm: 0.9999990949277009, iteration: 137907
loss: 1.0636510848999023,grad_norm: 0.9981851195332395, iteration: 137908
loss: 1.0704624652862549,grad_norm: 0.9999992756212018, iteration: 137909
loss: 1.1092382669448853,grad_norm: 0.9999999237014883, iteration: 137910
loss: 1.0322438478469849,grad_norm: 0.9612339245472883, iteration: 137911
loss: 1.020616888999939,grad_norm: 0.9999998421473986, iteration: 137912
loss: 1.0339717864990234,grad_norm: 0.9999992532969268, iteration: 137913
loss: 1.123020052909851,grad_norm: 0.9999999609527654, iteration: 137914
loss: 1.0386735200881958,grad_norm: 0.9999996263314526, iteration: 137915
loss: 1.167138695716858,grad_norm: 0.9999997124397932, iteration: 137916
loss: 1.0585262775421143,grad_norm: 0.9999997827604409, iteration: 137917
loss: 0.9638914465904236,grad_norm: 0.9032847986545747, iteration: 137918
loss: 1.2011184692382812,grad_norm: 0.9999999187563593, iteration: 137919
loss: 1.0959962606430054,grad_norm: 0.9999999571852692, iteration: 137920
loss: 0.9911009669303894,grad_norm: 0.999999059603072, iteration: 137921
loss: 1.2090044021606445,grad_norm: 0.9999998840549352, iteration: 137922
loss: 1.1644494533538818,grad_norm: 0.9999995173593057, iteration: 137923
loss: 1.1207245588302612,grad_norm: 0.9999998717717893, iteration: 137924
loss: 0.9806183576583862,grad_norm: 0.8910604573164396, iteration: 137925
loss: 1.1338093280792236,grad_norm: 0.9999993629543279, iteration: 137926
loss: 1.0956811904907227,grad_norm: 0.9999997381256879, iteration: 137927
loss: 0.9986007213592529,grad_norm: 0.999999467052633, iteration: 137928
loss: 0.9642854928970337,grad_norm: 0.7836872292005949, iteration: 137929
loss: 1.2359278202056885,grad_norm: 0.9999993009569168, iteration: 137930
loss: 1.0950204133987427,grad_norm: 0.9999990963072243, iteration: 137931
loss: 1.0885359048843384,grad_norm: 0.9999993279813515, iteration: 137932
loss: 1.0307871103286743,grad_norm: 0.9999996626204419, iteration: 137933
loss: 1.2311736345291138,grad_norm: 0.999999744769463, iteration: 137934
loss: 1.0589698553085327,grad_norm: 0.8436920135970997, iteration: 137935
loss: 0.9984163045883179,grad_norm: 0.8387551003528481, iteration: 137936
loss: 1.3252568244934082,grad_norm: 0.9999998289975695, iteration: 137937
loss: 1.1082053184509277,grad_norm: 0.9999991455906689, iteration: 137938
loss: 1.106980800628662,grad_norm: 0.9999991620372324, iteration: 137939
loss: 1.159805178642273,grad_norm: 0.999999652054658, iteration: 137940
loss: 0.9898589253425598,grad_norm: 0.9488428014560943, iteration: 137941
loss: 1.0928747653961182,grad_norm: 0.9173882518073827, iteration: 137942
loss: 1.0795713663101196,grad_norm: 0.9999991253758737, iteration: 137943
loss: 1.037540316581726,grad_norm: 0.9999993185925782, iteration: 137944
loss: 1.0144543647766113,grad_norm: 0.9999991352978174, iteration: 137945
loss: 1.0551819801330566,grad_norm: 0.999999393840411, iteration: 137946
loss: 1.0062494277954102,grad_norm: 0.8397840570909331, iteration: 137947
loss: 1.0984382629394531,grad_norm: 0.9999997953280512, iteration: 137948
loss: 1.0154796838760376,grad_norm: 0.7628271838826848, iteration: 137949
loss: 1.017602562904358,grad_norm: 0.8639042466261546, iteration: 137950
loss: 1.024595856666565,grad_norm: 0.9999997766699867, iteration: 137951
loss: 1.0429115295410156,grad_norm: 0.942236491803912, iteration: 137952
loss: 1.102151870727539,grad_norm: 0.9999991887088396, iteration: 137953
loss: 1.1158151626586914,grad_norm: 0.999999885749347, iteration: 137954
loss: 1.0330291986465454,grad_norm: 0.9999996039894847, iteration: 137955
loss: 0.998748779296875,grad_norm: 0.9999995307041863, iteration: 137956
loss: 1.1273168325424194,grad_norm: 0.9534758960947634, iteration: 137957
loss: 1.2671631574630737,grad_norm: 0.9999994987850728, iteration: 137958
loss: 1.0432846546173096,grad_norm: 0.9950553018801959, iteration: 137959
loss: 1.0179129838943481,grad_norm: 0.999999451128149, iteration: 137960
loss: 1.1165754795074463,grad_norm: 0.9999992235525724, iteration: 137961
loss: 1.0349950790405273,grad_norm: 0.9999994374660593, iteration: 137962
loss: 1.1162394285202026,grad_norm: 0.9999995829334931, iteration: 137963
loss: 1.0271159410476685,grad_norm: 0.9999991980780711, iteration: 137964
loss: 0.9861292839050293,grad_norm: 0.9999990539910938, iteration: 137965
loss: 1.0394078493118286,grad_norm: 0.99999936557165, iteration: 137966
loss: 1.1103334426879883,grad_norm: 0.9999992495284271, iteration: 137967
loss: 1.0118151903152466,grad_norm: 0.8854316265341203, iteration: 137968
loss: 1.0045275688171387,grad_norm: 0.8492417857491807, iteration: 137969
loss: 1.176601529121399,grad_norm: 0.9999992852422177, iteration: 137970
loss: 1.1707818508148193,grad_norm: 0.999999854944608, iteration: 137971
loss: 1.0740411281585693,grad_norm: 0.9999994818399704, iteration: 137972
loss: 1.151194453239441,grad_norm: 0.9999997706821155, iteration: 137973
loss: 1.0079283714294434,grad_norm: 0.9999990801823473, iteration: 137974
loss: 1.0480778217315674,grad_norm: 0.9999994740241788, iteration: 137975
loss: 1.0299993753433228,grad_norm: 0.9999990603232433, iteration: 137976
loss: 1.0030186176300049,grad_norm: 0.8021287393631145, iteration: 137977
loss: 1.0796422958374023,grad_norm: 0.9999992460010344, iteration: 137978
loss: 0.999509334564209,grad_norm: 0.999999198874074, iteration: 137979
loss: 1.0436302423477173,grad_norm: 0.9272232473651073, iteration: 137980
loss: 1.0020358562469482,grad_norm: 0.7939437038495937, iteration: 137981
loss: 1.0744386911392212,grad_norm: 0.999999473496312, iteration: 137982
loss: 1.017891764640808,grad_norm: 0.9999999071605258, iteration: 137983
loss: 1.0585347414016724,grad_norm: 0.890054003529796, iteration: 137984
loss: 1.008163571357727,grad_norm: 0.8308620738094771, iteration: 137985
loss: 0.9765322804450989,grad_norm: 0.9999997831836244, iteration: 137986
loss: 0.985774576663971,grad_norm: 0.8005999507134819, iteration: 137987
loss: 1.128200650215149,grad_norm: 0.9999996179640525, iteration: 137988
loss: 0.9993759393692017,grad_norm: 0.9999991545730399, iteration: 137989
loss: 1.0815134048461914,grad_norm: 0.9999996725597843, iteration: 137990
loss: 0.9989135265350342,grad_norm: 0.7801218259259951, iteration: 137991
loss: 1.2086156606674194,grad_norm: 0.9999992209162095, iteration: 137992
loss: 1.0842244625091553,grad_norm: 0.9999997947624737, iteration: 137993
loss: 1.0053045749664307,grad_norm: 0.8950696197649752, iteration: 137994
loss: 1.0688934326171875,grad_norm: 0.9878100197160572, iteration: 137995
loss: 0.9995288848876953,grad_norm: 0.9999996291283139, iteration: 137996
loss: 1.016278862953186,grad_norm: 0.7947674767383006, iteration: 137997
loss: 0.9879273772239685,grad_norm: 0.897178466141419, iteration: 137998
loss: 0.9885405898094177,grad_norm: 0.800909497059186, iteration: 137999
loss: 1.0405069589614868,grad_norm: 0.9999990890227188, iteration: 138000
loss: 1.2086483240127563,grad_norm: 0.9999998072759393, iteration: 138001
loss: 1.0680441856384277,grad_norm: 0.9999995862704787, iteration: 138002
loss: 1.0537666082382202,grad_norm: 0.9999992335673056, iteration: 138003
loss: 1.0299538373947144,grad_norm: 0.9999998615622937, iteration: 138004
loss: 1.0762431621551514,grad_norm: 0.9695415917892864, iteration: 138005
loss: 1.232025146484375,grad_norm: 0.9999992758134878, iteration: 138006
loss: 1.0552659034729004,grad_norm: 0.8990183214137815, iteration: 138007
loss: 1.0269911289215088,grad_norm: 0.9999992848887628, iteration: 138008
loss: 1.0881845951080322,grad_norm: 0.9999997467741185, iteration: 138009
loss: 1.055946946144104,grad_norm: 0.9999999376838876, iteration: 138010
loss: 1.0184578895568848,grad_norm: 0.8994009171659253, iteration: 138011
loss: 1.1154179573059082,grad_norm: 0.9999996547317288, iteration: 138012
loss: 1.076269268989563,grad_norm: 0.9999998250492047, iteration: 138013
loss: 1.1005698442459106,grad_norm: 0.9999996031729452, iteration: 138014
loss: 1.0773897171020508,grad_norm: 0.9999994949458162, iteration: 138015
loss: 1.0971133708953857,grad_norm: 0.9999998999409065, iteration: 138016
loss: 1.0295078754425049,grad_norm: 0.8854362357625964, iteration: 138017
loss: 1.076805830001831,grad_norm: 0.9632466459162086, iteration: 138018
loss: 1.038694977760315,grad_norm: 0.9999990926416167, iteration: 138019
loss: 1.1823432445526123,grad_norm: 0.9999997704803952, iteration: 138020
loss: 0.9853484034538269,grad_norm: 0.9999995633853652, iteration: 138021
loss: 1.0731027126312256,grad_norm: 0.9999998144042087, iteration: 138022
loss: 1.0299121141433716,grad_norm: 0.9850516138957577, iteration: 138023
loss: 1.1239222288131714,grad_norm: 0.9999995213480087, iteration: 138024
loss: 1.0771100521087646,grad_norm: 0.9999997271912074, iteration: 138025
loss: 1.1701018810272217,grad_norm: 0.9999998534109247, iteration: 138026
loss: 1.0126467943191528,grad_norm: 0.9999993146938914, iteration: 138027
loss: 1.0252115726470947,grad_norm: 0.9035587747042206, iteration: 138028
loss: 0.9453089237213135,grad_norm: 0.9992154737283051, iteration: 138029
loss: 1.0052976608276367,grad_norm: 0.9999990373155471, iteration: 138030
loss: 1.0448646545410156,grad_norm: 0.9999995771127884, iteration: 138031
loss: 1.038762092590332,grad_norm: 0.9999990640265497, iteration: 138032
loss: 1.020963191986084,grad_norm: 0.9999989817381365, iteration: 138033
loss: 1.008795976638794,grad_norm: 0.7756308534286817, iteration: 138034
loss: 1.0016354322433472,grad_norm: 0.8812214609026486, iteration: 138035
loss: 1.1265685558319092,grad_norm: 0.9999995827012023, iteration: 138036
loss: 1.0500292778015137,grad_norm: 0.9999996568544861, iteration: 138037
loss: 1.127221941947937,grad_norm: 0.9999997254083576, iteration: 138038
loss: 1.0540728569030762,grad_norm: 0.999999284071671, iteration: 138039
loss: 1.0290911197662354,grad_norm: 0.9893347651078894, iteration: 138040
loss: 1.0219390392303467,grad_norm: 0.7441438408519436, iteration: 138041
loss: 1.0228161811828613,grad_norm: 0.9999996111805303, iteration: 138042
loss: 1.0885874032974243,grad_norm: 0.999999789112691, iteration: 138043
loss: 1.0885438919067383,grad_norm: 0.9999998579790194, iteration: 138044
loss: 1.0057008266448975,grad_norm: 0.9573466425948896, iteration: 138045
loss: 1.026548981666565,grad_norm: 0.999999364989612, iteration: 138046
loss: 1.210167407989502,grad_norm: 0.999999806091458, iteration: 138047
loss: 1.1533015966415405,grad_norm: 0.9999993564974218, iteration: 138048
loss: 1.0433547496795654,grad_norm: 0.9999992067302798, iteration: 138049
loss: 1.1406816244125366,grad_norm: 0.999999271341669, iteration: 138050
loss: 0.9871491193771362,grad_norm: 0.7689449449290696, iteration: 138051
loss: 1.0170166492462158,grad_norm: 0.999999346449767, iteration: 138052
loss: 1.0841931104660034,grad_norm: 0.9999993356878057, iteration: 138053
loss: 1.0348527431488037,grad_norm: 0.999999017479337, iteration: 138054
loss: 1.0347579717636108,grad_norm: 0.9999994677545806, iteration: 138055
loss: 1.088293194770813,grad_norm: 0.9999992303724002, iteration: 138056
loss: 1.012597680091858,grad_norm: 0.999999321020491, iteration: 138057
loss: 1.0781769752502441,grad_norm: 0.999999939811109, iteration: 138058
loss: 1.0753945112228394,grad_norm: 0.9999995005728507, iteration: 138059
loss: 1.136489987373352,grad_norm: 0.999999961679534, iteration: 138060
loss: 1.0358537435531616,grad_norm: 0.9807070369005517, iteration: 138061
loss: 1.060041069984436,grad_norm: 0.9999995855383954, iteration: 138062
loss: 1.1792590618133545,grad_norm: 0.9999998962895017, iteration: 138063
loss: 1.0114240646362305,grad_norm: 0.9999998607645925, iteration: 138064
loss: 1.0995877981185913,grad_norm: 0.9999995071848055, iteration: 138065
loss: 0.9943278431892395,grad_norm: 0.8828655038679795, iteration: 138066
loss: 0.9742098450660706,grad_norm: 0.8697537725237153, iteration: 138067
loss: 1.2244195938110352,grad_norm: 0.9999998361554638, iteration: 138068
loss: 1.0182576179504395,grad_norm: 0.9999996540465833, iteration: 138069
loss: 1.069764494895935,grad_norm: 0.9737816248448942, iteration: 138070
loss: 1.0870469808578491,grad_norm: 0.9999998554423675, iteration: 138071
loss: 1.0565345287322998,grad_norm: 0.9999996141575024, iteration: 138072
loss: 0.9821170568466187,grad_norm: 0.8544051836001905, iteration: 138073
loss: 1.0319103002548218,grad_norm: 0.9999998402367205, iteration: 138074
loss: 1.0930310487747192,grad_norm: 0.9999993477316017, iteration: 138075
loss: 1.0097850561141968,grad_norm: 0.9999991357814341, iteration: 138076
loss: 1.1135650873184204,grad_norm: 0.9999992795550713, iteration: 138077
loss: 1.016615867614746,grad_norm: 0.8106112512002112, iteration: 138078
loss: 0.9934028387069702,grad_norm: 0.9999994469072817, iteration: 138079
loss: 0.9986000657081604,grad_norm: 0.8979143610232013, iteration: 138080
loss: 1.315719723701477,grad_norm: 0.9999998564269444, iteration: 138081
loss: 1.1360691785812378,grad_norm: 0.9999996887579311, iteration: 138082
loss: 1.1724917888641357,grad_norm: 0.9999999760936782, iteration: 138083
loss: 1.0018391609191895,grad_norm: 0.9999993728586862, iteration: 138084
loss: 1.147826075553894,grad_norm: 0.9999992882272829, iteration: 138085
loss: 1.0663450956344604,grad_norm: 0.9999999098217236, iteration: 138086
loss: 1.034665584564209,grad_norm: 0.9999991779707184, iteration: 138087
loss: 1.012891411781311,grad_norm: 0.9999997108401735, iteration: 138088
loss: 1.112091064453125,grad_norm: 0.9999996746391455, iteration: 138089
loss: 1.0052980184555054,grad_norm: 0.8263826909265362, iteration: 138090
loss: 1.0531408786773682,grad_norm: 0.9999991933794816, iteration: 138091
loss: 1.0966236591339111,grad_norm: 0.9999992655165895, iteration: 138092
loss: 1.0223979949951172,grad_norm: 0.9918126847080103, iteration: 138093
loss: 0.983144223690033,grad_norm: 0.8813240831558199, iteration: 138094
loss: 0.9983369708061218,grad_norm: 0.843256574890767, iteration: 138095
loss: 1.047775387763977,grad_norm: 0.9999995119504712, iteration: 138096
loss: 1.2468465566635132,grad_norm: 0.9999999465772637, iteration: 138097
loss: 1.1165910959243774,grad_norm: 0.999999967599731, iteration: 138098
loss: 1.1901072263717651,grad_norm: 0.9999998676009512, iteration: 138099
loss: 1.0213210582733154,grad_norm: 0.868915465615184, iteration: 138100
loss: 1.0106556415557861,grad_norm: 0.8763748026657788, iteration: 138101
loss: 1.152891755104065,grad_norm: 0.9999995465854182, iteration: 138102
loss: 1.1165412664413452,grad_norm: 0.992708871230431, iteration: 138103
loss: 1.0872528553009033,grad_norm: 0.999999902130026, iteration: 138104
loss: 1.048996925354004,grad_norm: 0.9999996066593343, iteration: 138105
loss: 1.2033008337020874,grad_norm: 0.9999999584858812, iteration: 138106
loss: 1.0694445371627808,grad_norm: 0.9999997469247063, iteration: 138107
loss: 1.027122974395752,grad_norm: 0.939564448457232, iteration: 138108
loss: 1.0803802013397217,grad_norm: 0.9999993023384032, iteration: 138109
loss: 1.139599084854126,grad_norm: 0.9999995672610611, iteration: 138110
loss: 1.177545428276062,grad_norm: 0.9999997047372053, iteration: 138111
loss: 1.0708733797073364,grad_norm: 0.9999993602994323, iteration: 138112
loss: 1.1616268157958984,grad_norm: 0.9999993565071249, iteration: 138113
loss: 1.144380807876587,grad_norm: 0.9999999803936641, iteration: 138114
loss: 1.0275100469589233,grad_norm: 0.9999994181373653, iteration: 138115
loss: 1.3514219522476196,grad_norm: 0.9999998175835549, iteration: 138116
loss: 1.0312097072601318,grad_norm: 0.9073931868763425, iteration: 138117
loss: 1.14157235622406,grad_norm: 0.9999991883421904, iteration: 138118
loss: 1.196912407875061,grad_norm: 0.9999998599249983, iteration: 138119
loss: 1.1200612783432007,grad_norm: 0.9999998339601712, iteration: 138120
loss: 1.0743829011917114,grad_norm: 0.9843472862122781, iteration: 138121
loss: 1.1619876623153687,grad_norm: 0.9999997282739325, iteration: 138122
loss: 1.0195006132125854,grad_norm: 0.9146761788408928, iteration: 138123
loss: 1.0488210916519165,grad_norm: 0.9999991750458771, iteration: 138124
loss: 1.0559684038162231,grad_norm: 0.972353396017071, iteration: 138125
loss: 1.1004726886749268,grad_norm: 0.9999999998152262, iteration: 138126
loss: 1.0703728199005127,grad_norm: 0.9999994527473022, iteration: 138127
loss: 1.1535630226135254,grad_norm: 0.9999993991929264, iteration: 138128
loss: 0.9914387464523315,grad_norm: 0.9999993272824061, iteration: 138129
loss: 1.070539951324463,grad_norm: 0.9999994985235893, iteration: 138130
loss: 1.0687299966812134,grad_norm: 0.9999994090598928, iteration: 138131
loss: 1.0144909620285034,grad_norm: 0.9999994187504087, iteration: 138132
loss: 1.1012554168701172,grad_norm: 0.9999999504425386, iteration: 138133
loss: 1.115117073059082,grad_norm: 0.8451311910871236, iteration: 138134
loss: 1.074524164199829,grad_norm: 0.9999998629584123, iteration: 138135
loss: 1.0172878503799438,grad_norm: 0.9999998229631908, iteration: 138136
loss: 1.0093913078308105,grad_norm: 0.8849683236151852, iteration: 138137
loss: 1.1842083930969238,grad_norm: 0.9999997746684068, iteration: 138138
loss: 1.0497196912765503,grad_norm: 0.8211342407100696, iteration: 138139
loss: 1.1603245735168457,grad_norm: 0.999999565005569, iteration: 138140
loss: 1.0576248168945312,grad_norm: 0.9999999037924268, iteration: 138141
loss: 1.0227577686309814,grad_norm: 0.92222662923203, iteration: 138142
loss: 1.0656694173812866,grad_norm: 0.8341506399850044, iteration: 138143
loss: 1.382822871208191,grad_norm: 0.9999998613811756, iteration: 138144
loss: 1.0415644645690918,grad_norm: 0.9999999750687149, iteration: 138145
loss: 1.0434523820877075,grad_norm: 0.999999118394142, iteration: 138146
loss: 1.0453921556472778,grad_norm: 0.8941953861860195, iteration: 138147
loss: 1.0130287408828735,grad_norm: 0.9999990171229108, iteration: 138148
loss: 1.21172034740448,grad_norm: 0.9999998287390626, iteration: 138149
loss: 1.0473361015319824,grad_norm: 0.847623904089111, iteration: 138150
loss: 1.1631594896316528,grad_norm: 0.9999999229567291, iteration: 138151
loss: 1.2094160318374634,grad_norm: 0.9999997275961683, iteration: 138152
loss: 1.347262978553772,grad_norm: 0.999999842195588, iteration: 138153
loss: 1.0747485160827637,grad_norm: 0.9999995653119507, iteration: 138154
loss: 1.064632773399353,grad_norm: 0.9999998182869366, iteration: 138155
loss: 1.084299087524414,grad_norm: 0.9999991317083248, iteration: 138156
loss: 1.1341452598571777,grad_norm: 0.9999998289774074, iteration: 138157
loss: 1.264628291130066,grad_norm: 0.999999881910845, iteration: 138158
loss: 1.017417073249817,grad_norm: 0.999999471962158, iteration: 138159
loss: 1.2025176286697388,grad_norm: 0.9999997708132234, iteration: 138160
loss: 1.088902473449707,grad_norm: 0.999999981112644, iteration: 138161
loss: 1.0998985767364502,grad_norm: 0.9999993056891026, iteration: 138162
loss: 1.0242489576339722,grad_norm: 0.9999999052489503, iteration: 138163
loss: 0.9934622049331665,grad_norm: 1.0000000375043392, iteration: 138164
loss: 1.1263450384140015,grad_norm: 0.9999995908493672, iteration: 138165
loss: 1.11953866481781,grad_norm: 0.9999996981904192, iteration: 138166
loss: 1.0207290649414062,grad_norm: 0.8781190823090963, iteration: 138167
loss: 1.1358648538589478,grad_norm: 0.9999994708714135, iteration: 138168
loss: 1.2882182598114014,grad_norm: 0.9999999957786061, iteration: 138169
loss: 1.0544712543487549,grad_norm: 0.9999995799467195, iteration: 138170
loss: 0.9653968811035156,grad_norm: 0.999999015147002, iteration: 138171
loss: 1.1717207431793213,grad_norm: 0.9999998513126963, iteration: 138172
loss: 1.1239807605743408,grad_norm: 0.9999998043067219, iteration: 138173
loss: 1.1685107946395874,grad_norm: 0.9999995010999171, iteration: 138174
loss: 1.1107782125473022,grad_norm: 0.9999998383649371, iteration: 138175
loss: 1.028701901435852,grad_norm: 0.8102626741004396, iteration: 138176
loss: 1.0167577266693115,grad_norm: 0.9999998747502671, iteration: 138177
loss: 1.1153316497802734,grad_norm: 0.9999996647569412, iteration: 138178
loss: 0.9918326735496521,grad_norm: 0.7632620952856427, iteration: 138179
loss: 1.029691219329834,grad_norm: 0.9999992806038903, iteration: 138180
loss: 1.2764045000076294,grad_norm: 0.9999998616029994, iteration: 138181
loss: 1.0420902967453003,grad_norm: 0.9999999196151063, iteration: 138182
loss: 1.0149859189987183,grad_norm: 0.8555216514729033, iteration: 138183
loss: 1.0531103610992432,grad_norm: 0.999999046852541, iteration: 138184
loss: 1.0660505294799805,grad_norm: 0.9999991978014979, iteration: 138185
loss: 0.99332594871521,grad_norm: 0.9999993697514167, iteration: 138186
loss: 1.1296201944351196,grad_norm: 1.0000000110804568, iteration: 138187
loss: 1.0782670974731445,grad_norm: 0.9156319509299538, iteration: 138188
loss: 1.0354923009872437,grad_norm: 0.9999990340479328, iteration: 138189
loss: 1.0317987203598022,grad_norm: 0.999999089367561, iteration: 138190
loss: 1.0659221410751343,grad_norm: 0.9999991798891724, iteration: 138191
loss: 0.9798665642738342,grad_norm: 0.8706856634073077, iteration: 138192
loss: 0.9764100909233093,grad_norm: 0.9999994191898913, iteration: 138193
loss: 1.1234261989593506,grad_norm: 0.9779365177588466, iteration: 138194
loss: 1.0659306049346924,grad_norm: 0.9999995829787482, iteration: 138195
loss: 1.018894076347351,grad_norm: 0.743084878114057, iteration: 138196
loss: 1.0733224153518677,grad_norm: 0.9999995593733476, iteration: 138197
loss: 1.1958996057510376,grad_norm: 0.999999509085186, iteration: 138198
loss: 1.0997933149337769,grad_norm: 0.9999991728678657, iteration: 138199
loss: 1.0455217361450195,grad_norm: 0.8724204571954539, iteration: 138200
loss: 1.0270384550094604,grad_norm: 0.9999995845715055, iteration: 138201
loss: 0.9996570348739624,grad_norm: 0.9999993702049802, iteration: 138202
loss: 1.0680506229400635,grad_norm: 0.9999993168436394, iteration: 138203
loss: 1.0044370889663696,grad_norm: 0.9999994242623376, iteration: 138204
loss: 1.1907216310501099,grad_norm: 0.9999998239467931, iteration: 138205
loss: 1.015507459640503,grad_norm: 0.9999995998284734, iteration: 138206
loss: 1.0034425258636475,grad_norm: 0.9999991333353614, iteration: 138207
loss: 1.1310598850250244,grad_norm: 1.0000000170569736, iteration: 138208
loss: 1.0221331119537354,grad_norm: 0.9999992807149815, iteration: 138209
loss: 1.0407381057739258,grad_norm: 0.9999993297949769, iteration: 138210
loss: 1.4964720010757446,grad_norm: 0.9999996266466766, iteration: 138211
loss: 0.9503008723258972,grad_norm: 0.963906437698206, iteration: 138212
loss: 1.0216481685638428,grad_norm: 0.9999990758866886, iteration: 138213
loss: 1.1026208400726318,grad_norm: 0.9999999032403758, iteration: 138214
loss: 1.062423825263977,grad_norm: 0.9999997035907503, iteration: 138215
loss: 1.2721344232559204,grad_norm: 0.9999997035359511, iteration: 138216
loss: 1.0563783645629883,grad_norm: 0.9999999094852678, iteration: 138217
loss: 0.9890336990356445,grad_norm: 0.8881782541101343, iteration: 138218
loss: 1.0413919687271118,grad_norm: 0.9999995334977176, iteration: 138219
loss: 1.1168535947799683,grad_norm: 0.9999990423460994, iteration: 138220
loss: 1.2437756061553955,grad_norm: 1.0000000227456305, iteration: 138221
loss: 1.0187718868255615,grad_norm: 0.8675969560603121, iteration: 138222
loss: 1.027875542640686,grad_norm: 0.9999996274146926, iteration: 138223
loss: 1.0786656141281128,grad_norm: 0.9999997564223939, iteration: 138224
loss: 0.9606565237045288,grad_norm: 0.8877946487234177, iteration: 138225
loss: 1.2035136222839355,grad_norm: 1.0000000797653568, iteration: 138226
loss: 0.9780109524726868,grad_norm: 0.9808906157125653, iteration: 138227
loss: 1.1125160455703735,grad_norm: 0.9999994198840353, iteration: 138228
loss: 1.2013431787490845,grad_norm: 0.9999999109233796, iteration: 138229
loss: 1.082892656326294,grad_norm: 0.9999992511598229, iteration: 138230
loss: 1.0112011432647705,grad_norm: 0.8753810033142398, iteration: 138231
loss: 1.191172480583191,grad_norm: 0.9999998125370576, iteration: 138232
loss: 1.0647128820419312,grad_norm: 0.9999992015568386, iteration: 138233
loss: 1.05156409740448,grad_norm: 0.9999992218432169, iteration: 138234
loss: 1.0096182823181152,grad_norm: 0.9609734306118053, iteration: 138235
loss: 1.0829530954360962,grad_norm: 1.0000000702009397, iteration: 138236
loss: 1.0106228590011597,grad_norm: 0.9207923469582475, iteration: 138237
loss: 1.007544994354248,grad_norm: 0.9999998382918953, iteration: 138238
loss: 1.1949492692947388,grad_norm: 0.9999996491311718, iteration: 138239
loss: 0.9939862489700317,grad_norm: 0.9999990223476372, iteration: 138240
loss: 1.105308175086975,grad_norm: 0.9999996295544136, iteration: 138241
loss: 0.9837033748626709,grad_norm: 0.931413876468088, iteration: 138242
loss: 1.0829017162322998,grad_norm: 0.9999996249928902, iteration: 138243
loss: 1.0030879974365234,grad_norm: 0.9999992733453267, iteration: 138244
loss: 1.0862367153167725,grad_norm: 0.9999997435289628, iteration: 138245
loss: 1.0621203184127808,grad_norm: 0.9999993612600695, iteration: 138246
loss: 0.9704111218452454,grad_norm: 0.9198828659114995, iteration: 138247
loss: 1.0356552600860596,grad_norm: 0.9999994532008573, iteration: 138248
loss: 1.2791013717651367,grad_norm: 0.9999999663145729, iteration: 138249
loss: 0.9792230725288391,grad_norm: 0.9506659816279039, iteration: 138250
loss: 1.0285567045211792,grad_norm: 0.9601677866286459, iteration: 138251
loss: 1.0244269371032715,grad_norm: 0.9999990454014898, iteration: 138252
loss: 1.007322072982788,grad_norm: 0.9999995640801446, iteration: 138253
loss: 1.086178183555603,grad_norm: 0.999999487993433, iteration: 138254
loss: 1.4973320960998535,grad_norm: 0.999999826263085, iteration: 138255
loss: 1.23635995388031,grad_norm: 0.9999998372510002, iteration: 138256
loss: 1.0319218635559082,grad_norm: 0.9027438884166684, iteration: 138257
loss: 1.0017679929733276,grad_norm: 0.9999994088592288, iteration: 138258
loss: 1.1014060974121094,grad_norm: 0.9999995329566485, iteration: 138259
loss: 1.204079031944275,grad_norm: 0.9999993635310631, iteration: 138260
loss: 0.993634045124054,grad_norm: 0.9999991801611375, iteration: 138261
loss: 1.0456719398498535,grad_norm: 0.9999991000013397, iteration: 138262
loss: 1.0393258333206177,grad_norm: 0.9999990263290538, iteration: 138263
loss: 1.300977349281311,grad_norm: 0.9999996520373371, iteration: 138264
loss: 1.496850609779358,grad_norm: 0.9999999054513464, iteration: 138265
loss: 1.0315085649490356,grad_norm: 0.9999994057029417, iteration: 138266
loss: 1.2027533054351807,grad_norm: 0.9999998820724348, iteration: 138267
loss: 1.1011847257614136,grad_norm: 0.9999999270660036, iteration: 138268
loss: 1.1782705783843994,grad_norm: 1.0000000049395181, iteration: 138269
loss: 1.1003965139389038,grad_norm: 0.9300346497985491, iteration: 138270
loss: 1.0202852487564087,grad_norm: 0.9218474444298661, iteration: 138271
loss: 1.0222938060760498,grad_norm: 0.9999991298049019, iteration: 138272
loss: 1.0550256967544556,grad_norm: 0.9999993859745744, iteration: 138273
loss: 1.1383614540100098,grad_norm: 0.9999998020316875, iteration: 138274
loss: 1.041698932647705,grad_norm: 0.9999992622076962, iteration: 138275
loss: 1.1002713441848755,grad_norm: 0.9999995082736143, iteration: 138276
loss: 1.081586480140686,grad_norm: 0.9999994635917204, iteration: 138277
loss: 1.124438762664795,grad_norm: 0.9999999348197053, iteration: 138278
loss: 0.9765589237213135,grad_norm: 0.782785056758879, iteration: 138279
loss: 1.0296053886413574,grad_norm: 0.9999992793123724, iteration: 138280
loss: 1.0482276678085327,grad_norm: 0.9999994047189488, iteration: 138281
loss: 1.093001365661621,grad_norm: 0.9906315055694062, iteration: 138282
loss: 1.0008479356765747,grad_norm: 0.8567771567138613, iteration: 138283
loss: 1.0201948881149292,grad_norm: 0.949586343038668, iteration: 138284
loss: 1.0557160377502441,grad_norm: 0.9999992614576328, iteration: 138285
loss: 0.9861279129981995,grad_norm: 0.8006571365918755, iteration: 138286
loss: 1.1430656909942627,grad_norm: 0.9999998594030627, iteration: 138287
loss: 1.061015009880066,grad_norm: 1.0000000170774779, iteration: 138288
loss: 1.0176732540130615,grad_norm: 0.9999990951818416, iteration: 138289
loss: 1.0969632863998413,grad_norm: 0.9999995092740562, iteration: 138290
loss: 0.9817933440208435,grad_norm: 0.999999673215333, iteration: 138291
loss: 1.0605072975158691,grad_norm: 0.9999998553788173, iteration: 138292
loss: 1.1526415348052979,grad_norm: 0.9018350757189864, iteration: 138293
loss: 1.0332468748092651,grad_norm: 0.873550798955567, iteration: 138294
loss: 1.1211191415786743,grad_norm: 0.9601610387601874, iteration: 138295
loss: 1.0695881843566895,grad_norm: 0.9999995748947377, iteration: 138296
loss: 0.9736846089363098,grad_norm: 0.9768910715278483, iteration: 138297
loss: 1.001521110534668,grad_norm: 0.8799118963639128, iteration: 138298
loss: 0.9615353941917419,grad_norm: 0.9118821060134527, iteration: 138299
loss: 0.9474626183509827,grad_norm: 0.828680468415494, iteration: 138300
loss: 1.015665054321289,grad_norm: 0.8690079442221045, iteration: 138301
loss: 0.9955845475196838,grad_norm: 0.999999337940682, iteration: 138302
loss: 0.9825459718704224,grad_norm: 0.9999991962916445, iteration: 138303
loss: 1.0220240354537964,grad_norm: 0.9999995090686126, iteration: 138304
loss: 1.0002723932266235,grad_norm: 0.8632753624354933, iteration: 138305
loss: 1.21439528465271,grad_norm: 0.9999999379382842, iteration: 138306
loss: 0.9772922992706299,grad_norm: 0.8409960107165466, iteration: 138307
loss: 1.0151169300079346,grad_norm: 0.8526334386966383, iteration: 138308
loss: 1.055801272392273,grad_norm: 0.9999991039957521, iteration: 138309
loss: 1.0580204725265503,grad_norm: 0.9999995740168117, iteration: 138310
loss: 1.041581153869629,grad_norm: 0.9999994470697482, iteration: 138311
loss: 1.0600883960723877,grad_norm: 0.9999992594722124, iteration: 138312
loss: 1.1664007902145386,grad_norm: 0.9999998814808129, iteration: 138313
loss: 1.0082120895385742,grad_norm: 0.9999990450910535, iteration: 138314
loss: 1.2580429315567017,grad_norm: 0.9999994457312974, iteration: 138315
loss: 1.0697336196899414,grad_norm: 0.9999991910389195, iteration: 138316
loss: 0.9935317635536194,grad_norm: 0.999999121348722, iteration: 138317
loss: 0.9892868399620056,grad_norm: 0.9386444100784473, iteration: 138318
loss: 1.1564106941223145,grad_norm: 0.9999999294333773, iteration: 138319
loss: 1.4392714500427246,grad_norm: 0.9999994363144693, iteration: 138320
loss: 1.1112293004989624,grad_norm: 0.9999995316320562, iteration: 138321
loss: 1.1695771217346191,grad_norm: 0.9999999586604114, iteration: 138322
loss: 1.003879427909851,grad_norm: 0.9999999042123866, iteration: 138323
loss: 1.0852985382080078,grad_norm: 1.000000061497339, iteration: 138324
loss: 1.009218454360962,grad_norm: 0.9999990454976148, iteration: 138325
loss: 1.0502471923828125,grad_norm: 0.9503076530039635, iteration: 138326
loss: 1.0768784284591675,grad_norm: 0.9999993112689041, iteration: 138327
loss: 1.0786752700805664,grad_norm: 0.9999993842542367, iteration: 138328
loss: 1.0289766788482666,grad_norm: 0.999999288362119, iteration: 138329
loss: 0.990510880947113,grad_norm: 0.8208024276240892, iteration: 138330
loss: 1.2377538681030273,grad_norm: 0.9999995193845873, iteration: 138331
loss: 1.0418429374694824,grad_norm: 0.9999999092590902, iteration: 138332
loss: 1.132277488708496,grad_norm: 0.942075147017413, iteration: 138333
loss: 1.001757264137268,grad_norm: 0.9436704511767913, iteration: 138334
loss: 1.1468987464904785,grad_norm: 1.0000000283855444, iteration: 138335
loss: 1.065248966217041,grad_norm: 0.9999997026859715, iteration: 138336
loss: 1.0260145664215088,grad_norm: 0.8146459485371748, iteration: 138337
loss: 0.9841820001602173,grad_norm: 0.999999100487115, iteration: 138338
loss: 1.0152486562728882,grad_norm: 0.9999994892358255, iteration: 138339
loss: 1.1570290327072144,grad_norm: 0.9999995473390113, iteration: 138340
loss: 1.0983589887619019,grad_norm: 0.9999998368189166, iteration: 138341
loss: 1.1014214754104614,grad_norm: 0.9999997722233311, iteration: 138342
loss: 1.0995420217514038,grad_norm: 0.9999991852161416, iteration: 138343
loss: 1.1180126667022705,grad_norm: 0.9999998534341327, iteration: 138344
loss: 1.2203654050827026,grad_norm: 0.99999920814856, iteration: 138345
loss: 1.055964469909668,grad_norm: 1.000000002681293, iteration: 138346
loss: 1.109135627746582,grad_norm: 0.9999992224808629, iteration: 138347
loss: 1.0998461246490479,grad_norm: 0.9999993308741989, iteration: 138348
loss: 0.9808421730995178,grad_norm: 0.9740793782697119, iteration: 138349
loss: 1.0091819763183594,grad_norm: 0.8420430795864337, iteration: 138350
loss: 1.1443036794662476,grad_norm: 0.9999996143950762, iteration: 138351
loss: 1.0862987041473389,grad_norm: 0.999999723184783, iteration: 138352
loss: 1.1136126518249512,grad_norm: 0.9999991401088449, iteration: 138353
loss: 0.9972546696662903,grad_norm: 0.9999994557020093, iteration: 138354
loss: 1.1012938022613525,grad_norm: 0.9999990768357059, iteration: 138355
loss: 1.0854253768920898,grad_norm: 0.9999995881134134, iteration: 138356
loss: 0.9749863743782043,grad_norm: 0.9999992413946126, iteration: 138357
loss: 1.1469918489456177,grad_norm: 0.9875112711967582, iteration: 138358
loss: 1.022639513015747,grad_norm: 0.9999990850743945, iteration: 138359
loss: 1.1130179166793823,grad_norm: 0.9999999923259275, iteration: 138360
loss: 1.0579355955123901,grad_norm: 0.9999997388073006, iteration: 138361
loss: 1.0985971689224243,grad_norm: 0.999999914529364, iteration: 138362
loss: 1.2280253171920776,grad_norm: 0.9999993103444005, iteration: 138363
loss: 1.061503291130066,grad_norm: 0.9999994987522061, iteration: 138364
loss: 1.0761408805847168,grad_norm: 0.9999999017068874, iteration: 138365
loss: 1.0266481637954712,grad_norm: 0.9999996750326494, iteration: 138366
loss: 1.0052810907363892,grad_norm: 0.9999998264737713, iteration: 138367
loss: 1.1980490684509277,grad_norm: 0.9999999046812404, iteration: 138368
loss: 1.226162075996399,grad_norm: 0.9999998781226821, iteration: 138369
loss: 1.0736608505249023,grad_norm: 0.9999999011283253, iteration: 138370
loss: 1.080085277557373,grad_norm: 0.9999991211379794, iteration: 138371
loss: 1.0392497777938843,grad_norm: 0.9999993939563315, iteration: 138372
loss: 1.0142844915390015,grad_norm: 0.9297726030582237, iteration: 138373
loss: 1.0217231512069702,grad_norm: 0.9999991311282573, iteration: 138374
loss: 1.1138230562210083,grad_norm: 0.9999992924908364, iteration: 138375
loss: 1.1490716934204102,grad_norm: 0.9999999111577301, iteration: 138376
loss: 1.132910132408142,grad_norm: 1.0000000281271233, iteration: 138377
loss: 1.0125768184661865,grad_norm: 0.9999992239362123, iteration: 138378
loss: 1.0440117120742798,grad_norm: 0.9573552517276214, iteration: 138379
loss: 1.0815379619598389,grad_norm: 0.9967129440751407, iteration: 138380
loss: 1.0818169116973877,grad_norm: 0.9999996569558323, iteration: 138381
loss: 1.0406776666641235,grad_norm: 0.9999990918069408, iteration: 138382
loss: 1.1171165704727173,grad_norm: 0.9999994525284458, iteration: 138383
loss: 1.2485898733139038,grad_norm: 1.0000000172074404, iteration: 138384
loss: 1.0824562311172485,grad_norm: 0.9999998355498809, iteration: 138385
loss: 1.0631572008132935,grad_norm: 0.9999990604819421, iteration: 138386
loss: 1.027754783630371,grad_norm: 0.9999991195872338, iteration: 138387
loss: 0.9869308471679688,grad_norm: 0.848759746608132, iteration: 138388
loss: 1.0166449546813965,grad_norm: 0.993782511773438, iteration: 138389
loss: 1.3621923923492432,grad_norm: 0.9999998315336864, iteration: 138390
loss: 1.0668020248413086,grad_norm: 0.9999994290578029, iteration: 138391
loss: 1.1548349857330322,grad_norm: 0.9999998354698239, iteration: 138392
loss: 0.9912917017936707,grad_norm: 0.9567534830742258, iteration: 138393
loss: 0.9878493547439575,grad_norm: 0.9170775547979446, iteration: 138394
loss: 1.0432603359222412,grad_norm: 0.9999991870919657, iteration: 138395
loss: 1.0222378969192505,grad_norm: 0.9616179070541384, iteration: 138396
loss: 1.0721839666366577,grad_norm: 0.9405953899289959, iteration: 138397
loss: 1.0519603490829468,grad_norm: 0.9999993144854187, iteration: 138398
loss: 1.0827685594558716,grad_norm: 0.9999999777247495, iteration: 138399
loss: 1.0480085611343384,grad_norm: 0.9999992434856724, iteration: 138400
loss: 1.1484638452529907,grad_norm: 0.9999999543998394, iteration: 138401
loss: 1.2003706693649292,grad_norm: 1.0000001056486445, iteration: 138402
loss: 1.0938447713851929,grad_norm: 0.9999998716346162, iteration: 138403
loss: 1.0594732761383057,grad_norm: 0.99999991518651, iteration: 138404
loss: 1.139992594718933,grad_norm: 0.999999853452878, iteration: 138405
loss: 1.059542179107666,grad_norm: 0.9999998403908097, iteration: 138406
loss: 1.1431022882461548,grad_norm: 0.9999990327534904, iteration: 138407
loss: 1.2428284883499146,grad_norm: 0.9999998247014146, iteration: 138408
loss: 1.037752628326416,grad_norm: 0.999999205329795, iteration: 138409
loss: 1.1922591924667358,grad_norm: 0.9999997686841375, iteration: 138410
loss: 1.0687199831008911,grad_norm: 0.9999995392789217, iteration: 138411
loss: 0.9881085753440857,grad_norm: 0.8805147848812134, iteration: 138412
loss: 1.0537642240524292,grad_norm: 0.9999997629635762, iteration: 138413
loss: 1.150184988975525,grad_norm: 0.9999996338214305, iteration: 138414
loss: 1.270200252532959,grad_norm: 0.9999997417480312, iteration: 138415
loss: 1.0366710424423218,grad_norm: 0.8781974239504946, iteration: 138416
loss: 1.2533148527145386,grad_norm: 0.9999995082167579, iteration: 138417
loss: 1.31258225440979,grad_norm: 0.9999999419911891, iteration: 138418
loss: 1.3012592792510986,grad_norm: 1.0000000272708274, iteration: 138419
loss: 1.1098629236221313,grad_norm: 0.9999998774434132, iteration: 138420
loss: 1.0248156785964966,grad_norm: 0.999999497126763, iteration: 138421
loss: 1.0513807535171509,grad_norm: 0.9999995458422365, iteration: 138422
loss: 1.0792733430862427,grad_norm: 0.9999997497751151, iteration: 138423
loss: 1.0593212842941284,grad_norm: 0.9944696997738305, iteration: 138424
loss: 1.2441996335983276,grad_norm: 0.9999998153222999, iteration: 138425
loss: 1.0593533515930176,grad_norm: 0.999999607878179, iteration: 138426
loss: 1.1234517097473145,grad_norm: 0.9999995351690678, iteration: 138427
loss: 0.9997187256813049,grad_norm: 0.9999993025961286, iteration: 138428
loss: 1.0980652570724487,grad_norm: 0.9999999378898516, iteration: 138429
loss: 1.0414942502975464,grad_norm: 0.862410146784494, iteration: 138430
loss: 1.0916560888290405,grad_norm: 0.9999991039367704, iteration: 138431
loss: 1.0156264305114746,grad_norm: 0.7669328366747176, iteration: 138432
loss: 1.0068331956863403,grad_norm: 0.7939783214475337, iteration: 138433
loss: 1.1711153984069824,grad_norm: 0.9999995355126509, iteration: 138434
loss: 1.0694972276687622,grad_norm: 0.9999994553703767, iteration: 138435
loss: 1.025308609008789,grad_norm: 0.9999990656709388, iteration: 138436
loss: 1.1107293367385864,grad_norm: 0.9999995755634061, iteration: 138437
loss: 1.3419924974441528,grad_norm: 0.9999996131356956, iteration: 138438
loss: 1.1906687021255493,grad_norm: 0.9999996749627981, iteration: 138439
loss: 1.0809392929077148,grad_norm: 0.9999994874090613, iteration: 138440
loss: 0.992673397064209,grad_norm: 0.924048281409572, iteration: 138441
loss: 0.9832435250282288,grad_norm: 0.9999994796456199, iteration: 138442
loss: 1.1117305755615234,grad_norm: 0.999999526751942, iteration: 138443
loss: 1.1127393245697021,grad_norm: 0.9999995362896063, iteration: 138444
loss: 1.0102275609970093,grad_norm: 0.9522453994762525, iteration: 138445
loss: 1.040093183517456,grad_norm: 0.9999995035194511, iteration: 138446
loss: 1.0996829271316528,grad_norm: 0.9999993131220657, iteration: 138447
loss: 1.2156511545181274,grad_norm: 1.0000000447212052, iteration: 138448
loss: 1.192941665649414,grad_norm: 0.9999998933172461, iteration: 138449
loss: 1.3509658575057983,grad_norm: 0.9999997670090391, iteration: 138450
loss: 1.0324366092681885,grad_norm: 0.969383270039172, iteration: 138451
loss: 0.9799016714096069,grad_norm: 0.8911974082489214, iteration: 138452
loss: 1.0538651943206787,grad_norm: 0.9999995618078613, iteration: 138453
loss: 0.9883344173431396,grad_norm: 0.9301074626157523, iteration: 138454
loss: 1.060018539428711,grad_norm: 0.9999993722176831, iteration: 138455
loss: 1.0667774677276611,grad_norm: 0.9999994432852427, iteration: 138456
loss: 1.1303462982177734,grad_norm: 0.9999998135729278, iteration: 138457
loss: 1.250622034072876,grad_norm: 0.9999999043136543, iteration: 138458
loss: 1.071232795715332,grad_norm: 0.9999998779216186, iteration: 138459
loss: 1.0013573169708252,grad_norm: 0.8326649773422138, iteration: 138460
loss: 1.0632412433624268,grad_norm: 0.9999990901464478, iteration: 138461
loss: 1.0421247482299805,grad_norm: 0.921090202729635, iteration: 138462
loss: 1.0338468551635742,grad_norm: 0.8349826992907067, iteration: 138463
loss: 1.22523832321167,grad_norm: 0.9999993185199676, iteration: 138464
loss: 1.1036450862884521,grad_norm: 0.999999454113583, iteration: 138465
loss: 1.0954031944274902,grad_norm: 0.9999996753106716, iteration: 138466
loss: 1.2179971933364868,grad_norm: 0.9999998868289345, iteration: 138467
loss: 1.1020996570587158,grad_norm: 0.9999998603345331, iteration: 138468
loss: 1.0556615591049194,grad_norm: 0.999999322063878, iteration: 138469
loss: 1.0406227111816406,grad_norm: 1.0000000508271834, iteration: 138470
loss: 1.0928345918655396,grad_norm: 0.9999996335381246, iteration: 138471
loss: 1.0360407829284668,grad_norm: 0.999999887140663, iteration: 138472
loss: 1.11573326587677,grad_norm: 0.9999998594039033, iteration: 138473
loss: 1.060211420059204,grad_norm: 0.9999994572315772, iteration: 138474
loss: 1.1722177267074585,grad_norm: 0.9999996181233516, iteration: 138475
loss: 1.2589360475540161,grad_norm: 0.999999845366925, iteration: 138476
loss: 1.052752137184143,grad_norm: 0.9999996264139691, iteration: 138477
loss: 1.1122169494628906,grad_norm: 0.9999998855138816, iteration: 138478
loss: 1.3691800832748413,grad_norm: 0.999999757881604, iteration: 138479
loss: 1.0187952518463135,grad_norm: 0.9613809170394025, iteration: 138480
loss: 1.2728580236434937,grad_norm: 0.9999998134608595, iteration: 138481
loss: 1.0576847791671753,grad_norm: 0.9999997845943553, iteration: 138482
loss: 0.9681402444839478,grad_norm: 0.9999993380000022, iteration: 138483
loss: 1.004699468612671,grad_norm: 0.9821365381406761, iteration: 138484
loss: 1.0820144414901733,grad_norm: 0.9999999982902206, iteration: 138485
loss: 1.0476139783859253,grad_norm: 0.9999997458365609, iteration: 138486
loss: 1.0903443098068237,grad_norm: 0.9999996199692941, iteration: 138487
loss: 1.1322987079620361,grad_norm: 0.9999996631707936, iteration: 138488
loss: 1.1926708221435547,grad_norm: 1.0000000943672338, iteration: 138489
loss: 1.1553155183792114,grad_norm: 0.9999999472917054, iteration: 138490
loss: 1.0960142612457275,grad_norm: 1.0000000025137015, iteration: 138491
loss: 1.1528550386428833,grad_norm: 0.999999666877951, iteration: 138492
loss: 1.0660109519958496,grad_norm: 0.9999996681911437, iteration: 138493
loss: 1.0633611679077148,grad_norm: 0.9999998470759155, iteration: 138494
loss: 1.3550971746444702,grad_norm: 0.9999999950362256, iteration: 138495
loss: 1.2696235179901123,grad_norm: 0.9999998468123452, iteration: 138496
loss: 1.036750316619873,grad_norm: 0.9999994631763086, iteration: 138497
loss: 1.057929277420044,grad_norm: 0.9999994202917392, iteration: 138498
loss: 1.3235950469970703,grad_norm: 0.9999999166786104, iteration: 138499
loss: 1.2348958253860474,grad_norm: 0.9999998295387369, iteration: 138500
loss: 1.2850309610366821,grad_norm: 0.9999997994402087, iteration: 138501
loss: 1.1266833543777466,grad_norm: 0.9999998169626932, iteration: 138502
loss: 1.1690843105316162,grad_norm: 0.9999999726586235, iteration: 138503
loss: 1.311541199684143,grad_norm: 0.999999871131447, iteration: 138504
loss: 1.184815526008606,grad_norm: 0.999999780491261, iteration: 138505
loss: 1.1487653255462646,grad_norm: 0.999999778306827, iteration: 138506
loss: 1.3654838800430298,grad_norm: 0.9999996014120832, iteration: 138507
loss: 1.141730546951294,grad_norm: 0.9999993736739978, iteration: 138508
loss: 1.2150702476501465,grad_norm: 0.9999995779709191, iteration: 138509
loss: 1.1452349424362183,grad_norm: 0.999999891463737, iteration: 138510
loss: 1.1282647848129272,grad_norm: 0.9999998258136735, iteration: 138511
loss: 1.4339532852172852,grad_norm: 0.9999998951647636, iteration: 138512
loss: 1.2212274074554443,grad_norm: 0.9999996621475609, iteration: 138513
loss: 0.9649362564086914,grad_norm: 0.9999991785972147, iteration: 138514
loss: 1.1007461547851562,grad_norm: 0.9999996707836453, iteration: 138515
loss: 1.1189161539077759,grad_norm: 0.9999999876514288, iteration: 138516
loss: 1.260684847831726,grad_norm: 0.9999999326590774, iteration: 138517
loss: 1.016235113143921,grad_norm: 0.9999996877139302, iteration: 138518
loss: 1.1688578128814697,grad_norm: 1.0000000157599123, iteration: 138519
loss: 1.0979779958724976,grad_norm: 0.9999992098055174, iteration: 138520
loss: 1.0575947761535645,grad_norm: 0.9999996413184484, iteration: 138521
loss: 1.2360855340957642,grad_norm: 0.9999998809743362, iteration: 138522
loss: 1.4058018922805786,grad_norm: 0.9999999394188195, iteration: 138523
loss: 1.309230923652649,grad_norm: 0.9999995639238279, iteration: 138524
loss: 1.0123562812805176,grad_norm: 0.9435544741904027, iteration: 138525
loss: 1.1749907732009888,grad_norm: 0.9999998230401859, iteration: 138526
loss: 1.2966583967208862,grad_norm: 0.9999999858611268, iteration: 138527
loss: 1.2049319744110107,grad_norm: 0.9999999252954879, iteration: 138528
loss: 1.3900401592254639,grad_norm: 0.9999998953013969, iteration: 138529
loss: 1.1743344068527222,grad_norm: 0.9999996712592146, iteration: 138530
loss: 1.5038206577301025,grad_norm: 1.0000000503597724, iteration: 138531
loss: 1.210693120956421,grad_norm: 0.9999999051897677, iteration: 138532
loss: 1.3148082494735718,grad_norm: 0.9999999058945573, iteration: 138533
loss: 1.1908515691757202,grad_norm: 0.9999993693490868, iteration: 138534
loss: 1.1462807655334473,grad_norm: 0.9999994437127585, iteration: 138535
loss: 1.245396375656128,grad_norm: 0.9999999893393275, iteration: 138536
loss: 1.402616024017334,grad_norm: 0.9999999926280498, iteration: 138537
loss: 1.4127148389816284,grad_norm: 1.0000000334334693, iteration: 138538
loss: 1.270894169807434,grad_norm: 0.9999993987022816, iteration: 138539
loss: 1.4076206684112549,grad_norm: 0.9999997733832561, iteration: 138540
loss: 1.260236382484436,grad_norm: 0.9999995678925058, iteration: 138541
loss: 1.1818643808364868,grad_norm: 0.999999957966258, iteration: 138542
loss: 1.3521556854248047,grad_norm: 0.9999997269190551, iteration: 138543
loss: 1.9693745374679565,grad_norm: 0.9999998938319953, iteration: 138544
loss: 1.3439174890518188,grad_norm: 0.999999891009429, iteration: 138545
loss: 1.3401962518692017,grad_norm: 0.9999997760022337, iteration: 138546
loss: 1.5467278957366943,grad_norm: 0.9999999099433052, iteration: 138547
loss: 1.5398885011672974,grad_norm: 0.9999998928897385, iteration: 138548
loss: 1.4776114225387573,grad_norm: 0.999999918734411, iteration: 138549
loss: 1.2684727907180786,grad_norm: 0.9999999224978906, iteration: 138550
loss: 1.39475417137146,grad_norm: 0.9999999649074972, iteration: 138551
loss: 1.682027816772461,grad_norm: 0.9999999805154974, iteration: 138552
loss: 1.8622159957885742,grad_norm: 0.9999998756864192, iteration: 138553
loss: 1.631738543510437,grad_norm: 0.9999998024613245, iteration: 138554
loss: 1.5519554615020752,grad_norm: 0.9999998425959011, iteration: 138555
loss: 1.4515682458877563,grad_norm: 0.9999998892047653, iteration: 138556
loss: 1.9999725818634033,grad_norm: 0.9999998220689787, iteration: 138557
loss: 1.5915392637252808,grad_norm: 0.9999999690867979, iteration: 138558
loss: 2.017730951309204,grad_norm: 0.9999999213611159, iteration: 138559
loss: 1.7739496231079102,grad_norm: 0.9999999516130437, iteration: 138560
loss: 1.4202102422714233,grad_norm: 0.9999999462968574, iteration: 138561
loss: 1.61832857131958,grad_norm: 0.9999999418264554, iteration: 138562
loss: 1.5234768390655518,grad_norm: 0.9999999568788682, iteration: 138563
loss: 1.9048490524291992,grad_norm: 0.9999998717694765, iteration: 138564
loss: 1.511839747428894,grad_norm: 0.9999999622741462, iteration: 138565
loss: 1.7796372175216675,grad_norm: 1.0000000464366776, iteration: 138566
loss: 1.939823865890503,grad_norm: 0.9999999707009154, iteration: 138567
loss: 1.5890194177627563,grad_norm: 0.9999999820174477, iteration: 138568
loss: 1.5642633438110352,grad_norm: 0.9999998167504629, iteration: 138569
loss: 1.3631298542022705,grad_norm: 0.9999999937750331, iteration: 138570
loss: 1.7492961883544922,grad_norm: 0.9999999357798991, iteration: 138571
loss: 1.4982086420059204,grad_norm: 0.9999998770739169, iteration: 138572
loss: 1.3943088054656982,grad_norm: 0.9999999217122181, iteration: 138573
loss: 1.519539475440979,grad_norm: 1.0000000391012565, iteration: 138574
loss: 1.6658624410629272,grad_norm: 0.9999999532346201, iteration: 138575
loss: 2.012122392654419,grad_norm: 0.999999866213671, iteration: 138576
loss: 1.8414686918258667,grad_norm: 0.9999998839356732, iteration: 138577
loss: 1.6126857995986938,grad_norm: 0.999999863026566, iteration: 138578
loss: 1.747721552848816,grad_norm: 0.9999999511579313, iteration: 138579
loss: 1.6231528520584106,grad_norm: 0.9999998919614691, iteration: 138580
loss: 1.8304179906845093,grad_norm: 1.0000000241752753, iteration: 138581
loss: 1.6435822248458862,grad_norm: 0.9999999716515039, iteration: 138582
loss: 1.5199733972549438,grad_norm: 1.0000000128062476, iteration: 138583
loss: 1.4502978324890137,grad_norm: 0.9999997941485848, iteration: 138584
loss: 1.809674859046936,grad_norm: 0.9999999703300113, iteration: 138585
loss: 1.4692332744598389,grad_norm: 0.9999998128595532, iteration: 138586
loss: 1.5929089784622192,grad_norm: 0.9999999131785086, iteration: 138587
loss: 1.818529725074768,grad_norm: 0.9999999313590907, iteration: 138588
loss: 1.5233229398727417,grad_norm: 0.9999997364809533, iteration: 138589
loss: 1.7685248851776123,grad_norm: 0.9999999865809427, iteration: 138590
loss: 1.3822274208068848,grad_norm: 0.9999998178465226, iteration: 138591
loss: 1.543634057044983,grad_norm: 0.999999863327903, iteration: 138592
loss: 1.8264787197113037,grad_norm: 0.9999998496689331, iteration: 138593
loss: 1.47540283203125,grad_norm: 0.9999997481499721, iteration: 138594
loss: 1.6641157865524292,grad_norm: 0.9999999762056241, iteration: 138595
loss: 1.317286729812622,grad_norm: 0.9999998903923996, iteration: 138596
loss: 1.3131799697875977,grad_norm: 0.9999999782044089, iteration: 138597
loss: 1.3790127038955688,grad_norm: 0.9999997662649771, iteration: 138598
loss: 1.5740314722061157,grad_norm: 0.9999999885782217, iteration: 138599
loss: 1.7577658891677856,grad_norm: 0.9999998205856656, iteration: 138600
loss: 1.4939823150634766,grad_norm: 0.9999999045251637, iteration: 138601
loss: 1.3292114734649658,grad_norm: 0.9999998546024138, iteration: 138602
loss: 1.3668981790542603,grad_norm: 0.999999906263489, iteration: 138603
loss: 1.4210165739059448,grad_norm: 0.9999998741189718, iteration: 138604
loss: 1.3049689531326294,grad_norm: 0.9999999764336329, iteration: 138605
loss: 1.3610543012619019,grad_norm: 0.9999999266001853, iteration: 138606
loss: 1.4627447128295898,grad_norm: 0.9999998994319117, iteration: 138607
loss: 1.4838465452194214,grad_norm: 0.9999998592056527, iteration: 138608
loss: 1.4230313301086426,grad_norm: 1.000000022343971, iteration: 138609
loss: 1.6939388513565063,grad_norm: 0.9999999166832072, iteration: 138610
loss: 1.3634049892425537,grad_norm: 0.9999996053163598, iteration: 138611
loss: 1.3743535280227661,grad_norm: 0.9999999389897334, iteration: 138612
loss: 1.7285493612289429,grad_norm: 1.0000000300426772, iteration: 138613
loss: 1.5778162479400635,grad_norm: 0.9999997053910443, iteration: 138614
loss: 1.6325373649597168,grad_norm: 0.9999998706245686, iteration: 138615
loss: 1.4803510904312134,grad_norm: 0.999999927404262, iteration: 138616
loss: 1.4388818740844727,grad_norm: 0.999999903925608, iteration: 138617
loss: 1.439043641090393,grad_norm: 0.9999998580350319, iteration: 138618
loss: 1.2959030866622925,grad_norm: 0.9999999197665038, iteration: 138619
loss: 1.5217586755752563,grad_norm: 0.9999999755607734, iteration: 138620
loss: 1.8472546339035034,grad_norm: 1.000000028427183, iteration: 138621
loss: 1.6531904935836792,grad_norm: 1.0000000333897427, iteration: 138622
loss: 1.6787621974945068,grad_norm: 0.9999998897844502, iteration: 138623
loss: 1.3458759784698486,grad_norm: 0.9999998778031995, iteration: 138624
loss: 1.39438796043396,grad_norm: 1.0000000242400222, iteration: 138625
loss: 1.8385759592056274,grad_norm: 0.9999999075846006, iteration: 138626
loss: 1.3205654621124268,grad_norm: 0.99999999608623, iteration: 138627
loss: 1.4498064517974854,grad_norm: 0.9999998974426851, iteration: 138628
loss: 1.6137702465057373,grad_norm: 1.000000012776417, iteration: 138629
loss: 1.4658253192901611,grad_norm: 0.9999999903701396, iteration: 138630
loss: 1.6947230100631714,grad_norm: 0.9999999267593238, iteration: 138631
loss: 1.4932491779327393,grad_norm: 0.999999810790781, iteration: 138632
loss: 1.9692304134368896,grad_norm: 0.999999840013763, iteration: 138633
loss: 1.6378445625305176,grad_norm: 0.9999999705622504, iteration: 138634
loss: 1.8656874895095825,grad_norm: 1.0000000220767757, iteration: 138635
loss: 1.9622219800949097,grad_norm: 0.9999999713303857, iteration: 138636
loss: 1.5288739204406738,grad_norm: 0.999999868436023, iteration: 138637
loss: 1.3053494691848755,grad_norm: 0.9999998374333352, iteration: 138638
loss: 1.578222632408142,grad_norm: 0.9999998073092383, iteration: 138639
loss: 1.6732299327850342,grad_norm: 0.9999998572324247, iteration: 138640
loss: 1.6820822954177856,grad_norm: 0.9999999458144738, iteration: 138641
loss: 1.6333765983581543,grad_norm: 1.0000000053289284, iteration: 138642
loss: 1.5245141983032227,grad_norm: 0.9999999544048062, iteration: 138643
loss: 1.4911729097366333,grad_norm: 0.9999997807301534, iteration: 138644
loss: 1.4724584817886353,grad_norm: 0.9999999186628398, iteration: 138645
loss: 1.4849756956100464,grad_norm: 1.0000000316303932, iteration: 138646
loss: 1.4351633787155151,grad_norm: 1.0000000598536478, iteration: 138647
loss: 1.4000575542449951,grad_norm: 1.0000000375061784, iteration: 138648
loss: 1.4510793685913086,grad_norm: 0.999999970970305, iteration: 138649
loss: 1.5035285949707031,grad_norm: 0.9999997444434402, iteration: 138650
loss: 1.5589853525161743,grad_norm: 0.999999901218102, iteration: 138651
loss: 1.7809747457504272,grad_norm: 0.9999999050749258, iteration: 138652
loss: 1.4294151067733765,grad_norm: 0.9999998542377262, iteration: 138653
loss: 1.7627063989639282,grad_norm: 0.9999999044204634, iteration: 138654
loss: 1.40298593044281,grad_norm: 0.999999901733307, iteration: 138655
loss: 1.5225958824157715,grad_norm: 0.9999995828801895, iteration: 138656
loss: 1.2931205034255981,grad_norm: 0.9999999286508113, iteration: 138657
loss: 1.7264035940170288,grad_norm: 0.9999998007665591, iteration: 138658
loss: 1.2617268562316895,grad_norm: 0.9999999010462199, iteration: 138659
loss: 1.8367482423782349,grad_norm: 0.9999998937276409, iteration: 138660
loss: 1.1053227186203003,grad_norm: 0.99999964478743, iteration: 138661
loss: 1.5020031929016113,grad_norm: 0.9999999936668509, iteration: 138662
loss: 1.352842092514038,grad_norm: 0.9999997612407667, iteration: 138663
loss: 1.9734529256820679,grad_norm: 1.0000000769066697, iteration: 138664
loss: 1.4177916049957275,grad_norm: 1.0000000862952014, iteration: 138665
loss: 1.5637683868408203,grad_norm: 1.0000000583078186, iteration: 138666
loss: 1.4903380870819092,grad_norm: 0.9999998552039007, iteration: 138667
loss: 1.19307541847229,grad_norm: 0.9999998704974249, iteration: 138668
loss: 1.437859058380127,grad_norm: 0.9999999731445355, iteration: 138669
loss: 1.435056447982788,grad_norm: 0.9999999192261072, iteration: 138670
loss: 1.712870717048645,grad_norm: 0.9999999049034815, iteration: 138671
loss: 1.2659457921981812,grad_norm: 0.9999993725945646, iteration: 138672
loss: 1.1917057037353516,grad_norm: 0.9999996005138894, iteration: 138673
loss: 1.5125689506530762,grad_norm: 0.9999998997910249, iteration: 138674
loss: 1.4650583267211914,grad_norm: 1.0000000069163988, iteration: 138675
loss: 1.7862002849578857,grad_norm: 0.9999999869969672, iteration: 138676
loss: 1.3387234210968018,grad_norm: 0.9999999379779594, iteration: 138677
loss: 1.7344880104064941,grad_norm: 0.999999936148371, iteration: 138678
loss: 1.4744187593460083,grad_norm: 0.9999999396544305, iteration: 138679
loss: 1.6573779582977295,grad_norm: 0.9999999344763165, iteration: 138680
loss: 1.7557373046875,grad_norm: 0.9999999174006774, iteration: 138681
loss: 1.4514219760894775,grad_norm: 0.9999997542862912, iteration: 138682
loss: 1.8231189250946045,grad_norm: 1.0000000156045672, iteration: 138683
loss: 1.420730471611023,grad_norm: 0.9999999133873724, iteration: 138684
loss: 1.365224838256836,grad_norm: 0.9999998941159937, iteration: 138685
loss: 1.5106648206710815,grad_norm: 0.9999999228343194, iteration: 138686
loss: 1.5453376770019531,grad_norm: 0.9999999151985346, iteration: 138687
loss: 1.6012417078018188,grad_norm: 0.9999996580738022, iteration: 138688
loss: 1.3085293769836426,grad_norm: 0.999999558681155, iteration: 138689
loss: 1.7203150987625122,grad_norm: 1.0000000019953108, iteration: 138690
loss: 1.2856985330581665,grad_norm: 0.9999999109199873, iteration: 138691
loss: 1.6854861974716187,grad_norm: 0.9999999373017678, iteration: 138692
loss: 1.4607830047607422,grad_norm: 0.999999859566236, iteration: 138693
loss: 1.162932276725769,grad_norm: 0.9999997232496204, iteration: 138694
loss: 1.6534724235534668,grad_norm: 0.9999999374105072, iteration: 138695
loss: 1.4865320920944214,grad_norm: 0.9999999526557721, iteration: 138696
loss: 1.4398925304412842,grad_norm: 0.9999997463582526, iteration: 138697
loss: 1.4011127948760986,grad_norm: 0.999999835488771, iteration: 138698
loss: 1.5136698484420776,grad_norm: 1.0000000119540917, iteration: 138699
loss: 1.751574158668518,grad_norm: 0.9999999871585585, iteration: 138700
loss: 1.6080855131149292,grad_norm: 0.9999999666594593, iteration: 138701
loss: 1.466538429260254,grad_norm: 1.0000000942988267, iteration: 138702
loss: 1.709757685661316,grad_norm: 1.0000000096988333, iteration: 138703
loss: 1.4900754690170288,grad_norm: 0.9999999655793487, iteration: 138704
loss: 1.4619247913360596,grad_norm: 0.9999999082793114, iteration: 138705
loss: 1.3226161003112793,grad_norm: 0.9999996689780701, iteration: 138706
loss: 1.637496829032898,grad_norm: 0.9999999395508384, iteration: 138707
loss: 1.7347949743270874,grad_norm: 0.9999997900563052, iteration: 138708
loss: 1.247116208076477,grad_norm: 0.9999998334047852, iteration: 138709
loss: 1.8175256252288818,grad_norm: 1.0000000079890379, iteration: 138710
loss: 1.3688024282455444,grad_norm: 0.9999998614684609, iteration: 138711
loss: 1.2739923000335693,grad_norm: 0.9999998201193451, iteration: 138712
loss: 1.8095974922180176,grad_norm: 1.0000000974052856, iteration: 138713
loss: 1.367756724357605,grad_norm: 0.9999999790838247, iteration: 138714
loss: 1.529913306236267,grad_norm: 0.999999976723788, iteration: 138715
loss: 1.300899624824524,grad_norm: 0.9999998502501211, iteration: 138716
loss: 1.2591636180877686,grad_norm: 0.9999999736364915, iteration: 138717
loss: 1.3486100435256958,grad_norm: 0.9999998776013986, iteration: 138718
loss: 1.5172244310379028,grad_norm: 0.9999998805682729, iteration: 138719
loss: 1.4851728677749634,grad_norm: 0.9999999742252484, iteration: 138720
loss: 1.4273059368133545,grad_norm: 0.9999997907917808, iteration: 138721
loss: 1.1918309926986694,grad_norm: 0.9999997948075705, iteration: 138722
loss: 1.1484571695327759,grad_norm: 0.9999998574385045, iteration: 138723
loss: 1.2380709648132324,grad_norm: 0.9999999798691207, iteration: 138724
loss: 1.143017292022705,grad_norm: 0.999999473513086, iteration: 138725
loss: 1.2323468923568726,grad_norm: 0.9999997056091844, iteration: 138726
loss: 1.2537734508514404,grad_norm: 0.9999998062300659, iteration: 138727
loss: 1.5724875926971436,grad_norm: 0.9999999206216328, iteration: 138728
loss: 1.380201816558838,grad_norm: 0.9999999843471259, iteration: 138729
loss: 1.2910341024398804,grad_norm: 1.0000000198259782, iteration: 138730
loss: 1.2327113151550293,grad_norm: 0.9999998120338757, iteration: 138731
loss: 1.2465053796768188,grad_norm: 0.9999999031255543, iteration: 138732
loss: 1.3579120635986328,grad_norm: 0.9999999252601313, iteration: 138733
loss: 1.0780537128448486,grad_norm: 0.9999991373466021, iteration: 138734
loss: 1.2858214378356934,grad_norm: 0.999999917794111, iteration: 138735
loss: 1.114162564277649,grad_norm: 0.9999995475725001, iteration: 138736
loss: 1.32402503490448,grad_norm: 0.9999998499397164, iteration: 138737
loss: 1.3285326957702637,grad_norm: 0.999999515879677, iteration: 138738
loss: 1.3467309474945068,grad_norm: 0.9999999063940201, iteration: 138739
loss: 1.3181915283203125,grad_norm: 0.9999999843820279, iteration: 138740
loss: 1.458017110824585,grad_norm: 0.9999998794566619, iteration: 138741
loss: 1.3442927598953247,grad_norm: 1.0000000377787848, iteration: 138742
loss: 1.2036949396133423,grad_norm: 0.9999999430644791, iteration: 138743
loss: 1.2336170673370361,grad_norm: 0.9999997910467733, iteration: 138744
loss: 1.4769662618637085,grad_norm: 0.9999998682982787, iteration: 138745
loss: 1.1920746564865112,grad_norm: 0.9999999627630786, iteration: 138746
loss: 1.1219987869262695,grad_norm: 0.9999997400694255, iteration: 138747
loss: 1.2639011144638062,grad_norm: 0.9999997768125831, iteration: 138748
loss: 1.5933523178100586,grad_norm: 0.9999998745824863, iteration: 138749
loss: 1.284267544746399,grad_norm: 0.9999999723591605, iteration: 138750
loss: 1.291267991065979,grad_norm: 0.9999998308041937, iteration: 138751
loss: 1.268434762954712,grad_norm: 0.9999998711953298, iteration: 138752
loss: 1.0891587734222412,grad_norm: 0.9999998737586951, iteration: 138753
loss: 1.402783751487732,grad_norm: 1.0000000247534748, iteration: 138754
loss: 1.2449181079864502,grad_norm: 1.0000000520324646, iteration: 138755
loss: 1.197424292564392,grad_norm: 0.9999999302774429, iteration: 138756
loss: 1.3069653511047363,grad_norm: 0.9999996617028366, iteration: 138757
loss: 1.3917577266693115,grad_norm: 1.0000000340755952, iteration: 138758
loss: 1.5976686477661133,grad_norm: 0.9999999168075753, iteration: 138759
loss: 1.4370449781417847,grad_norm: 0.9999999753700116, iteration: 138760
loss: 1.4613786935806274,grad_norm: 0.9999999591736931, iteration: 138761
loss: 1.268675684928894,grad_norm: 0.9999998487487234, iteration: 138762
loss: 1.477445363998413,grad_norm: 0.9999997573915822, iteration: 138763
loss: 1.3376145362854004,grad_norm: 0.9999998180339502, iteration: 138764
loss: 1.5210386514663696,grad_norm: 0.9999999274210721, iteration: 138765
loss: 1.597306251525879,grad_norm: 0.9999998838750809, iteration: 138766
loss: 1.4602738618850708,grad_norm: 1.000000038934143, iteration: 138767
loss: 1.209606409072876,grad_norm: 0.9999997149900279, iteration: 138768
loss: 1.2880831956863403,grad_norm: 0.9999998486631327, iteration: 138769
loss: 1.312231421470642,grad_norm: 0.9999997375371366, iteration: 138770
loss: 1.3117492198944092,grad_norm: 0.9999997951412821, iteration: 138771
loss: 1.2975361347198486,grad_norm: 0.9999999736486821, iteration: 138772
loss: 1.3918133974075317,grad_norm: 0.9999998743484041, iteration: 138773
loss: 1.2935370206832886,grad_norm: 0.9999994427856586, iteration: 138774
loss: 1.2111808061599731,grad_norm: 0.999999881880108, iteration: 138775
loss: 1.2050243616104126,grad_norm: 0.9999999422519156, iteration: 138776
loss: 1.2744156122207642,grad_norm: 0.9999999755500043, iteration: 138777
loss: 1.5790834426879883,grad_norm: 0.9999999209600887, iteration: 138778
loss: 1.2132421731948853,grad_norm: 0.9999997429456536, iteration: 138779
loss: 1.4320069551467896,grad_norm: 0.9999998553761645, iteration: 138780
loss: 1.1509243249893188,grad_norm: 0.9999995590816626, iteration: 138781
loss: 1.2690125703811646,grad_norm: 0.9999996844480333, iteration: 138782
loss: 1.1639230251312256,grad_norm: 0.9999997029831587, iteration: 138783
loss: 1.2595291137695312,grad_norm: 0.9999997006226166, iteration: 138784
loss: 1.320040225982666,grad_norm: 0.9999999649323694, iteration: 138785
loss: 1.4222557544708252,grad_norm: 0.9999999484391481, iteration: 138786
loss: 1.3330615758895874,grad_norm: 0.9999997336635034, iteration: 138787
loss: 1.2961722612380981,grad_norm: 0.9999997362859422, iteration: 138788
loss: 1.5260040760040283,grad_norm: 0.9999999163642915, iteration: 138789
loss: 1.3540740013122559,grad_norm: 0.9999996005170629, iteration: 138790
loss: 1.2306958436965942,grad_norm: 0.9999997134165088, iteration: 138791
loss: 1.4239217042922974,grad_norm: 0.999999919214926, iteration: 138792
loss: 1.509549856185913,grad_norm: 1.0000000126065145, iteration: 138793
loss: 1.3052140474319458,grad_norm: 1.0000000230209345, iteration: 138794
loss: 1.129791259765625,grad_norm: 0.9999990271429725, iteration: 138795
loss: 1.2812504768371582,grad_norm: 0.9999998130973091, iteration: 138796
loss: 1.2787702083587646,grad_norm: 0.9999996396024524, iteration: 138797
loss: 1.382804036140442,grad_norm: 0.9999998598647313, iteration: 138798
loss: 1.3526270389556885,grad_norm: 0.9999999008728541, iteration: 138799
loss: 1.1751729249954224,grad_norm: 0.9999999103244528, iteration: 138800
loss: 1.4195853471755981,grad_norm: 0.9999998590954855, iteration: 138801
loss: 1.7357227802276611,grad_norm: 0.9999999971933506, iteration: 138802
loss: 1.4661390781402588,grad_norm: 0.9999996591023604, iteration: 138803
loss: 1.4650304317474365,grad_norm: 0.9999997222726827, iteration: 138804
loss: 1.4731254577636719,grad_norm: 0.9999997701121094, iteration: 138805
loss: 1.446629524230957,grad_norm: 0.9999998941734605, iteration: 138806
loss: 1.2906370162963867,grad_norm: 0.9999998491150048, iteration: 138807
loss: 1.3847862482070923,grad_norm: 1.0000000778771403, iteration: 138808
loss: 1.4411314725875854,grad_norm: 0.9999998321142819, iteration: 138809
loss: 1.5090681314468384,grad_norm: 0.9999999183036161, iteration: 138810
loss: 1.3777353763580322,grad_norm: 1.0000000150204098, iteration: 138811
loss: 1.3073979616165161,grad_norm: 0.9999999648206366, iteration: 138812
loss: 1.4440691471099854,grad_norm: 0.9999998903984967, iteration: 138813
loss: 1.1795860528945923,grad_norm: 0.999999895091484, iteration: 138814
loss: 1.516693115234375,grad_norm: 0.9999998318585627, iteration: 138815
loss: 1.4228811264038086,grad_norm: 1.0000000525273671, iteration: 138816
loss: 1.5456494092941284,grad_norm: 1.0000000219707392, iteration: 138817
loss: 1.4571318626403809,grad_norm: 0.9999999011535201, iteration: 138818
loss: 1.4832664728164673,grad_norm: 0.9999999694298385, iteration: 138819
loss: 1.5805842876434326,grad_norm: 1.0000000226094206, iteration: 138820
loss: 1.4805115461349487,grad_norm: 0.9999999079813098, iteration: 138821
loss: 1.5823285579681396,grad_norm: 0.9999999790590394, iteration: 138822
loss: 1.4559653997421265,grad_norm: 0.9999996601133273, iteration: 138823
loss: 1.4161601066589355,grad_norm: 1.0000000588012479, iteration: 138824
loss: 1.5769234895706177,grad_norm: 0.9999999004224421, iteration: 138825
loss: 1.447243094444275,grad_norm: 0.9999999492113022, iteration: 138826
loss: 1.3205033540725708,grad_norm: 0.9999998122500696, iteration: 138827
loss: 1.2364940643310547,grad_norm: 0.9999997542194621, iteration: 138828
loss: 1.416061282157898,grad_norm: 0.9999998771999554, iteration: 138829
loss: 1.2453478574752808,grad_norm: 0.9999999911518198, iteration: 138830
loss: 1.2786134481430054,grad_norm: 0.9999999612155128, iteration: 138831
loss: 1.3982962369918823,grad_norm: 0.9999999096526945, iteration: 138832
loss: 1.4265532493591309,grad_norm: 0.9999999175890433, iteration: 138833
loss: 1.4254237413406372,grad_norm: 0.9999999740519261, iteration: 138834
loss: 1.2167043685913086,grad_norm: 0.9999995374908811, iteration: 138835
loss: 1.5470480918884277,grad_norm: 0.999999945738923, iteration: 138836
loss: 1.4525582790374756,grad_norm: 0.9999997138898449, iteration: 138837
loss: 1.6888670921325684,grad_norm: 0.9999998687282592, iteration: 138838
loss: 1.7185924053192139,grad_norm: 0.9999998584352636, iteration: 138839
loss: 1.4231163263320923,grad_norm: 0.9999998971496922, iteration: 138840
loss: 1.14371919631958,grad_norm: 0.9999993283117149, iteration: 138841
loss: 1.2040278911590576,grad_norm: 1.0000000554182065, iteration: 138842
loss: 1.288435459136963,grad_norm: 0.9999999402487424, iteration: 138843
loss: 1.3588334321975708,grad_norm: 0.9999999310645606, iteration: 138844
loss: 1.1981831789016724,grad_norm: 0.9999999567802379, iteration: 138845
loss: 1.3333252668380737,grad_norm: 0.9999999332384812, iteration: 138846
loss: 1.2582135200500488,grad_norm: 0.9999992659657977, iteration: 138847
loss: 1.274011492729187,grad_norm: 0.9999996350507421, iteration: 138848
loss: 1.3438401222229004,grad_norm: 0.9999998676859249, iteration: 138849
loss: 1.2446508407592773,grad_norm: 0.9999996036936166, iteration: 138850
loss: 1.2055535316467285,grad_norm: 0.9999997827394999, iteration: 138851
loss: 1.4126452207565308,grad_norm: 0.9999999054957482, iteration: 138852
loss: 1.2942265272140503,grad_norm: 0.9999998138996801, iteration: 138853
loss: 1.2507609128952026,grad_norm: 0.9999999588118705, iteration: 138854
loss: 1.3944721221923828,grad_norm: 0.999999809900213, iteration: 138855
loss: 1.4553515911102295,grad_norm: 0.9999998066690692, iteration: 138856
loss: 1.1858376264572144,grad_norm: 0.9999997319970512, iteration: 138857
loss: 1.3640258312225342,grad_norm: 0.9999998856772897, iteration: 138858
loss: 1.228291392326355,grad_norm: 0.9999999242089315, iteration: 138859
loss: 1.2933474779129028,grad_norm: 0.9999998681931381, iteration: 138860
loss: 1.1222774982452393,grad_norm: 0.9999997567769618, iteration: 138861
loss: 1.3583401441574097,grad_norm: 0.9999998210470763, iteration: 138862
loss: 1.4364820718765259,grad_norm: 0.9999999902256442, iteration: 138863
loss: 1.243313193321228,grad_norm: 0.9999996608149018, iteration: 138864
loss: 1.380228877067566,grad_norm: 0.9999998357077294, iteration: 138865
loss: 1.1667087078094482,grad_norm: 0.9999996590643605, iteration: 138866
loss: 1.673821210861206,grad_norm: 0.9999999156554192, iteration: 138867
loss: 1.565222978591919,grad_norm: 0.9999998860488412, iteration: 138868
loss: 1.4291727542877197,grad_norm: 1.0000001146313982, iteration: 138869
loss: 1.5596681833267212,grad_norm: 0.9999999075135726, iteration: 138870
loss: 1.237512230873108,grad_norm: 0.999999883912497, iteration: 138871
loss: 1.3254326581954956,grad_norm: 0.9999996090950909, iteration: 138872
loss: 1.2955801486968994,grad_norm: 0.999999824684962, iteration: 138873
loss: 1.2515273094177246,grad_norm: 0.9999997325433254, iteration: 138874
loss: 1.379814863204956,grad_norm: 1.0000000205876736, iteration: 138875
loss: 1.1889164447784424,grad_norm: 0.9999996187359819, iteration: 138876
loss: 1.2395154237747192,grad_norm: 0.9999997791587919, iteration: 138877
loss: 1.4562227725982666,grad_norm: 0.9999997724047919, iteration: 138878
loss: 1.3746535778045654,grad_norm: 0.9999995360363515, iteration: 138879
loss: 1.3958693742752075,grad_norm: 1.000000080931534, iteration: 138880
loss: 1.1935793161392212,grad_norm: 0.9999995724871745, iteration: 138881
loss: 1.4893890619277954,grad_norm: 0.9999999694514055, iteration: 138882
loss: 1.280776858329773,grad_norm: 0.9999999194294934, iteration: 138883
loss: 1.4165092706680298,grad_norm: 0.9999999016945006, iteration: 138884
loss: 1.2668025493621826,grad_norm: 0.9999998819710645, iteration: 138885
loss: 1.6174873113632202,grad_norm: 0.9999998939696316, iteration: 138886
loss: 1.9842814207077026,grad_norm: 0.999999870891241, iteration: 138887
loss: 1.6216572523117065,grad_norm: 0.9999998341355469, iteration: 138888
loss: 1.6080403327941895,grad_norm: 0.9999999833812723, iteration: 138889
loss: 1.2658662796020508,grad_norm: 0.9999997789598182, iteration: 138890
loss: 1.8128259181976318,grad_norm: 1.000000059561404, iteration: 138891
loss: 1.1431745290756226,grad_norm: 0.9999995404575978, iteration: 138892
loss: 1.3121936321258545,grad_norm: 1.0000000083836948, iteration: 138893
loss: 1.645588755607605,grad_norm: 0.9999999642730429, iteration: 138894
loss: 1.4959442615509033,grad_norm: 0.9999999036393387, iteration: 138895
loss: 1.5886425971984863,grad_norm: 0.9999999317084403, iteration: 138896
loss: 1.3163564205169678,grad_norm: 0.999999990287868, iteration: 138897
loss: 1.3288233280181885,grad_norm: 1.0000000491388719, iteration: 138898
loss: 1.539060115814209,grad_norm: 0.9999999883807121, iteration: 138899
loss: 1.414394736289978,grad_norm: 0.999999921234907, iteration: 138900
loss: 1.5916595458984375,grad_norm: 1.0000000241943483, iteration: 138901
loss: 1.5523872375488281,grad_norm: 0.9999999573180891, iteration: 138902
loss: 1.3903708457946777,grad_norm: 0.9999998317143626, iteration: 138903
loss: 1.3933398723602295,grad_norm: 0.9999998161984422, iteration: 138904
loss: 1.4809443950653076,grad_norm: 0.9999999316428343, iteration: 138905
loss: 1.8596043586730957,grad_norm: 0.9999999940026263, iteration: 138906
loss: 1.547515869140625,grad_norm: 0.9999997687350194, iteration: 138907
loss: 1.2449073791503906,grad_norm: 0.9999996459707432, iteration: 138908
loss: 1.295647144317627,grad_norm: 0.999999964060229, iteration: 138909
loss: 1.2747339010238647,grad_norm: 0.9999996805828995, iteration: 138910
loss: 1.2785639762878418,grad_norm: 0.9999996767122279, iteration: 138911
loss: 1.1800810098648071,grad_norm: 0.999999510420487, iteration: 138912
loss: 1.3912752866744995,grad_norm: 0.9999999476706497, iteration: 138913
loss: 1.3571394681930542,grad_norm: 0.9999998596694262, iteration: 138914
loss: 1.5418370962142944,grad_norm: 0.9999998460087438, iteration: 138915
loss: 1.376443862915039,grad_norm: 0.9999998738899488, iteration: 138916
loss: 1.3452714681625366,grad_norm: 0.9999999475042977, iteration: 138917
loss: 1.4238160848617554,grad_norm: 0.999999932284954, iteration: 138918
loss: 1.5442575216293335,grad_norm: 0.9999998354068232, iteration: 138919
loss: 1.1433357000350952,grad_norm: 0.9999992867472467, iteration: 138920
loss: 1.1448678970336914,grad_norm: 0.9999997479835471, iteration: 138921
loss: 1.2957314252853394,grad_norm: 0.9999999832785819, iteration: 138922
loss: 1.1824097633361816,grad_norm: 0.9999995631957328, iteration: 138923
loss: 1.278850793838501,grad_norm: 0.9999998398261998, iteration: 138924
loss: 1.1098113059997559,grad_norm: 0.9999991120944794, iteration: 138925
loss: 1.1531184911727905,grad_norm: 0.9999996320725953, iteration: 138926
loss: 1.054848313331604,grad_norm: 0.9999991930760178, iteration: 138927
loss: 1.2113893032073975,grad_norm: 0.9999993657444065, iteration: 138928
loss: 1.1853880882263184,grad_norm: 0.9999999239863104, iteration: 138929
loss: 1.1373319625854492,grad_norm: 0.9999995556353317, iteration: 138930
loss: 1.1562620401382446,grad_norm: 0.9999997976027497, iteration: 138931
loss: 1.4680113792419434,grad_norm: 0.9999999052717423, iteration: 138932
loss: 1.357029676437378,grad_norm: 0.9999999552128388, iteration: 138933
loss: 1.1543598175048828,grad_norm: 0.9999997690075403, iteration: 138934
loss: 1.3610265254974365,grad_norm: 0.9999999034775877, iteration: 138935
loss: 1.6062612533569336,grad_norm: 0.9999999828853045, iteration: 138936
loss: 1.2356923818588257,grad_norm: 0.9999998749197696, iteration: 138937
loss: 1.1131126880645752,grad_norm: 0.9999999531222734, iteration: 138938
loss: 1.167565107345581,grad_norm: 0.9999999703289474, iteration: 138939
loss: 1.3246852159500122,grad_norm: 0.9999998789248585, iteration: 138940
loss: 1.1476911306381226,grad_norm: 0.9999992653363865, iteration: 138941
loss: 1.2190173864364624,grad_norm: 0.9999996283329118, iteration: 138942
loss: 1.1955393552780151,grad_norm: 1.000000037483896, iteration: 138943
loss: 1.1491590738296509,grad_norm: 0.9999999590244332, iteration: 138944
loss: 1.2247483730316162,grad_norm: 1.0000000168798309, iteration: 138945
loss: 1.277241826057434,grad_norm: 0.9999995096457683, iteration: 138946
loss: 1.0596815347671509,grad_norm: 0.9999999310670052, iteration: 138947
loss: 1.0040621757507324,grad_norm: 0.9999997318127063, iteration: 138948
loss: 1.204622507095337,grad_norm: 0.9999999446963672, iteration: 138949
loss: 1.1776998043060303,grad_norm: 1.0000000641852433, iteration: 138950
loss: 1.2220065593719482,grad_norm: 0.9999998220716465, iteration: 138951
loss: 1.2403565645217896,grad_norm: 0.9999999080321951, iteration: 138952
loss: 1.4098361730575562,grad_norm: 0.9999997179990575, iteration: 138953
loss: 1.2010507583618164,grad_norm: 0.9999999319454539, iteration: 138954
loss: 1.1501158475875854,grad_norm: 0.9999992462049204, iteration: 138955
loss: 1.3381682634353638,grad_norm: 0.9999996925614205, iteration: 138956
loss: 1.3155361413955688,grad_norm: 1.0000001089963024, iteration: 138957
loss: 1.3231899738311768,grad_norm: 0.999999827030938, iteration: 138958
loss: 1.2823446989059448,grad_norm: 1.0000000016289883, iteration: 138959
loss: 1.1416696310043335,grad_norm: 0.999999674156896, iteration: 138960
loss: 1.2330394983291626,grad_norm: 1.000000032886009, iteration: 138961
loss: 1.2986620664596558,grad_norm: 0.9999997388654818, iteration: 138962
loss: 1.4115219116210938,grad_norm: 0.999999849320542, iteration: 138963
loss: 1.2687410116195679,grad_norm: 0.9999998099429904, iteration: 138964
loss: 1.30904221534729,grad_norm: 0.9999998782005558, iteration: 138965
loss: 1.144377589225769,grad_norm: 0.9999993966640693, iteration: 138966
loss: 1.3735171556472778,grad_norm: 0.9999998705506673, iteration: 138967
loss: 1.3526365756988525,grad_norm: 0.9999999319691177, iteration: 138968
loss: 1.2640684843063354,grad_norm: 0.9999994581660399, iteration: 138969
loss: 1.059354305267334,grad_norm: 0.9999990429835214, iteration: 138970
loss: 1.1202709674835205,grad_norm: 0.9999995028398189, iteration: 138971
loss: 1.1011130809783936,grad_norm: 0.9999992992493734, iteration: 138972
loss: 1.1113369464874268,grad_norm: 0.999999950480582, iteration: 138973
loss: 1.1476025581359863,grad_norm: 0.9999999245114696, iteration: 138974
loss: 1.4675910472869873,grad_norm: 0.9999998021459164, iteration: 138975
loss: 1.0692825317382812,grad_norm: 0.9999992515791905, iteration: 138976
loss: 1.1328463554382324,grad_norm: 0.9999999137746872, iteration: 138977
loss: 1.1244808435440063,grad_norm: 0.9999995928806802, iteration: 138978
loss: 1.1649737358093262,grad_norm: 0.999999788675394, iteration: 138979
loss: 1.0849688053131104,grad_norm: 0.9999999254569234, iteration: 138980
loss: 1.2239466905593872,grad_norm: 0.9999998896944691, iteration: 138981
loss: 1.5903759002685547,grad_norm: 0.9999999940693596, iteration: 138982
loss: 1.3689640760421753,grad_norm: 0.9999997576388141, iteration: 138983
loss: 1.4193288087844849,grad_norm: 1.000000055311241, iteration: 138984
loss: 1.0943087339401245,grad_norm: 0.9999999847756419, iteration: 138985
loss: 1.3356822729110718,grad_norm: 0.9999997314929605, iteration: 138986
loss: 1.2619556188583374,grad_norm: 1.0000000410933025, iteration: 138987
loss: 1.2870702743530273,grad_norm: 0.9999995431391637, iteration: 138988
loss: 1.4981352090835571,grad_norm: 0.9999999102649882, iteration: 138989
loss: 1.119011402130127,grad_norm: 0.9999997995680396, iteration: 138990
loss: 1.0743165016174316,grad_norm: 0.9565974349958692, iteration: 138991
loss: 1.0583349466323853,grad_norm: 0.999999893779412, iteration: 138992
loss: 1.2195895910263062,grad_norm: 1.000000030734678, iteration: 138993
loss: 1.3700587749481201,grad_norm: 0.9999997025341422, iteration: 138994
loss: 1.192994236946106,grad_norm: 0.9999996560322962, iteration: 138995
loss: 1.3825100660324097,grad_norm: 0.9999999651124609, iteration: 138996
loss: 1.2237857580184937,grad_norm: 0.9999999144721817, iteration: 138997
loss: 1.2629070281982422,grad_norm: 0.9999998511569674, iteration: 138998
loss: 1.2740440368652344,grad_norm: 0.9999998820445873, iteration: 138999
loss: 1.3996031284332275,grad_norm: 0.9999999792664322, iteration: 139000
loss: 1.2179899215698242,grad_norm: 1.0000000334427845, iteration: 139001
loss: 1.1262891292572021,grad_norm: 1.000000000140816, iteration: 139002
loss: 1.1376686096191406,grad_norm: 0.9999999037026036, iteration: 139003
loss: 1.214020013809204,grad_norm: 0.9999998196888648, iteration: 139004
loss: 1.191408634185791,grad_norm: 0.9999997905870789, iteration: 139005
loss: 1.288813829421997,grad_norm: 0.9999993582763183, iteration: 139006
loss: 1.1289135217666626,grad_norm: 0.9999994038391604, iteration: 139007
loss: 1.1209877729415894,grad_norm: 0.9999996501755929, iteration: 139008
loss: 1.2909892797470093,grad_norm: 0.9999998562697906, iteration: 139009
loss: 1.146862268447876,grad_norm: 0.9999994311955325, iteration: 139010
loss: 1.0773853063583374,grad_norm: 0.9999991365247795, iteration: 139011
loss: 1.2591814994812012,grad_norm: 0.9999999510590828, iteration: 139012
loss: 1.388518214225769,grad_norm: 0.9999999030917712, iteration: 139013
loss: 1.6492929458618164,grad_norm: 1.0000000161783444, iteration: 139014
loss: 1.8716912269592285,grad_norm: 0.9999999537523844, iteration: 139015
loss: 1.1703104972839355,grad_norm: 0.9999996596363099, iteration: 139016
loss: 1.4998140335083008,grad_norm: 0.9999999274436037, iteration: 139017
loss: 1.4137225151062012,grad_norm: 0.9999998468816845, iteration: 139018
loss: 1.2877912521362305,grad_norm: 0.9999997459698157, iteration: 139019
loss: 1.2254773378372192,grad_norm: 0.9999999744705887, iteration: 139020
loss: 1.5825316905975342,grad_norm: 0.9999999515734749, iteration: 139021
loss: 1.2555873394012451,grad_norm: 0.9999997965226681, iteration: 139022
loss: 1.262502670288086,grad_norm: 0.9999997392972971, iteration: 139023
loss: 1.3003122806549072,grad_norm: 0.9999998921169863, iteration: 139024
loss: 1.4785513877868652,grad_norm: 0.9999999457820684, iteration: 139025
loss: 1.3538109064102173,grad_norm: 0.9999999939769167, iteration: 139026
loss: 1.3959447145462036,grad_norm: 0.9999999020465309, iteration: 139027
loss: 1.1867303848266602,grad_norm: 0.9999999293326405, iteration: 139028
loss: 1.1532182693481445,grad_norm: 0.9999996883575466, iteration: 139029
loss: 1.2773211002349854,grad_norm: 0.9999998108597057, iteration: 139030
loss: 1.2786844968795776,grad_norm: 0.9999999075173926, iteration: 139031
loss: 1.2765581607818604,grad_norm: 0.9999999059134209, iteration: 139032
loss: 1.596550703048706,grad_norm: 0.9999999893218285, iteration: 139033
loss: 1.1019326448440552,grad_norm: 0.999999726453485, iteration: 139034
loss: 1.2566251754760742,grad_norm: 0.9999997703661601, iteration: 139035
loss: 1.133485198020935,grad_norm: 0.9999993937524077, iteration: 139036
loss: 1.1127427816390991,grad_norm: 1.000000070535127, iteration: 139037
loss: 1.486484408378601,grad_norm: 0.9999997522587912, iteration: 139038
loss: 1.1648225784301758,grad_norm: 0.9999996202519865, iteration: 139039
loss: 1.193497657775879,grad_norm: 0.9999999994469022, iteration: 139040
loss: 1.2735047340393066,grad_norm: 0.9999997750432725, iteration: 139041
loss: 1.274457573890686,grad_norm: 0.9999997149743289, iteration: 139042
loss: 1.0932832956314087,grad_norm: 0.9999993714102895, iteration: 139043
loss: 1.1601964235305786,grad_norm: 1.0000000049937718, iteration: 139044
loss: 1.4312673807144165,grad_norm: 0.9999996892212422, iteration: 139045
loss: 1.2219268083572388,grad_norm: 0.9999994075874375, iteration: 139046
loss: 1.300131916999817,grad_norm: 0.999999933036209, iteration: 139047
loss: 1.4360448122024536,grad_norm: 0.9999999571728541, iteration: 139048
loss: 1.229635238647461,grad_norm: 0.9999998924126046, iteration: 139049
loss: 1.254741907119751,grad_norm: 0.9999998341393381, iteration: 139050
loss: 1.0865724086761475,grad_norm: 0.9999999070886463, iteration: 139051
loss: 1.3263736963272095,grad_norm: 0.9999998679222333, iteration: 139052
loss: 1.419083595275879,grad_norm: 0.9999999766337817, iteration: 139053
loss: 1.577359676361084,grad_norm: 0.9999998220277586, iteration: 139054
loss: 1.550475001335144,grad_norm: 1.0000001348296501, iteration: 139055
loss: 1.18842351436615,grad_norm: 0.9999999312299472, iteration: 139056
loss: 1.149307131767273,grad_norm: 0.9999998693473658, iteration: 139057
loss: 1.3639401197433472,grad_norm: 0.9999998861388677, iteration: 139058
loss: 1.4292861223220825,grad_norm: 0.9999999422784451, iteration: 139059
loss: 1.168073058128357,grad_norm: 0.9999994816975926, iteration: 139060
loss: 1.2921161651611328,grad_norm: 0.9999998605998436, iteration: 139061
loss: 1.0882395505905151,grad_norm: 0.9999998374793053, iteration: 139062
loss: 1.2001323699951172,grad_norm: 0.9999999591153834, iteration: 139063
loss: 1.4017635583877563,grad_norm: 0.9999999089361244, iteration: 139064
loss: 1.4065781831741333,grad_norm: 0.9999999533924194, iteration: 139065
loss: 1.1844151020050049,grad_norm: 0.9999998549043604, iteration: 139066
loss: 1.2812496423721313,grad_norm: 0.999999940984578, iteration: 139067
loss: 1.3902913331985474,grad_norm: 0.9999999411095886, iteration: 139068
loss: 1.1981825828552246,grad_norm: 0.999999625618788, iteration: 139069
loss: 1.3831326961517334,grad_norm: 0.9999999905350704, iteration: 139070
loss: 1.375603199005127,grad_norm: 0.9999999655892801, iteration: 139071
loss: 1.2113884687423706,grad_norm: 0.9999998316950579, iteration: 139072
loss: 1.1618200540542603,grad_norm: 0.9999996564551702, iteration: 139073
loss: 1.2187538146972656,grad_norm: 0.9999997361590582, iteration: 139074
loss: 1.064740777015686,grad_norm: 0.9999992625229621, iteration: 139075
loss: 1.292860746383667,grad_norm: 1.0000000168095178, iteration: 139076
loss: 1.5868464708328247,grad_norm: 1.0000000237875606, iteration: 139077
loss: 1.3214325904846191,grad_norm: 1.000000076577873, iteration: 139078
loss: 1.3716071844100952,grad_norm: 0.9999998848124276, iteration: 139079
loss: 1.4649120569229126,grad_norm: 0.9999999190348255, iteration: 139080
loss: 1.1127654314041138,grad_norm: 0.9999995484474551, iteration: 139081
loss: 1.3474400043487549,grad_norm: 0.9999997831583362, iteration: 139082
loss: 1.856489658355713,grad_norm: 0.9999999032796434, iteration: 139083
loss: 1.0389258861541748,grad_norm: 0.9999989950212992, iteration: 139084
loss: 1.3895955085754395,grad_norm: 0.9999999264353698, iteration: 139085
loss: 1.255582332611084,grad_norm: 0.9999998822287319, iteration: 139086
loss: 1.2861372232437134,grad_norm: 0.9999999150601985, iteration: 139087
loss: 1.5612313747406006,grad_norm: 0.9999999414365068, iteration: 139088
loss: 1.3827482461929321,grad_norm: 0.9999999403423631, iteration: 139089
loss: 1.428213119506836,grad_norm: 0.9999998664565695, iteration: 139090
loss: 1.3687233924865723,grad_norm: 0.9999999975380661, iteration: 139091
loss: 1.5872933864593506,grad_norm: 0.9999999835347521, iteration: 139092
loss: 1.3141062259674072,grad_norm: 0.9999998405720822, iteration: 139093
loss: 1.0499393939971924,grad_norm: 0.9999993229020497, iteration: 139094
loss: 1.1683385372161865,grad_norm: 0.9999998599104084, iteration: 139095
loss: 1.2966879606246948,grad_norm: 0.9999996377412056, iteration: 139096
loss: 1.2676647901535034,grad_norm: 0.9999998439045718, iteration: 139097
loss: 1.1156359910964966,grad_norm: 0.9999993671570487, iteration: 139098
loss: 1.063754916191101,grad_norm: 0.9999995688027932, iteration: 139099
loss: 1.1695921421051025,grad_norm: 0.9999998074600099, iteration: 139100
loss: 1.388095736503601,grad_norm: 0.9999999973650826, iteration: 139101
loss: 1.1055837869644165,grad_norm: 0.9999999073296815, iteration: 139102
loss: 1.3241087198257446,grad_norm: 1.0000000336919004, iteration: 139103
loss: 1.1287643909454346,grad_norm: 1.0000000724909652, iteration: 139104
loss: 1.2113606929779053,grad_norm: 0.999999800497401, iteration: 139105
loss: 1.1807087659835815,grad_norm: 0.9999994465380506, iteration: 139106
loss: 1.1534385681152344,grad_norm: 0.999999937930982, iteration: 139107
loss: 1.6036477088928223,grad_norm: 0.9999997405436016, iteration: 139108
loss: 1.223593831062317,grad_norm: 0.9999998269071442, iteration: 139109
loss: 1.2445178031921387,grad_norm: 0.9999999469606611, iteration: 139110
loss: 1.4993820190429688,grad_norm: 0.9999998666933954, iteration: 139111
loss: 1.299638032913208,grad_norm: 1.0000000564073197, iteration: 139112
loss: 1.115456223487854,grad_norm: 0.9999997552458834, iteration: 139113
loss: 1.0405207872390747,grad_norm: 0.9999991366130289, iteration: 139114
loss: 1.0884747505187988,grad_norm: 0.9999996164814594, iteration: 139115
loss: 1.3322944641113281,grad_norm: 0.9999999445109035, iteration: 139116
loss: 1.3137474060058594,grad_norm: 0.9999999348760413, iteration: 139117
loss: 1.054075002670288,grad_norm: 0.9999992544688001, iteration: 139118
loss: 1.2167998552322388,grad_norm: 0.9999997859799347, iteration: 139119
loss: 1.2339390516281128,grad_norm: 0.9999997993436085, iteration: 139120
loss: 1.0213373899459839,grad_norm: 0.9999995699101899, iteration: 139121
loss: 1.1738923788070679,grad_norm: 0.9999994890942282, iteration: 139122
loss: 1.2985866069793701,grad_norm: 0.9999996730627349, iteration: 139123
loss: 1.2496298551559448,grad_norm: 0.9999997591187934, iteration: 139124
loss: 1.2423417568206787,grad_norm: 0.9999998486532167, iteration: 139125
loss: 1.2376731634140015,grad_norm: 0.9999999343579243, iteration: 139126
loss: 1.256019949913025,grad_norm: 1.000000005278418, iteration: 139127
loss: 1.4051403999328613,grad_norm: 0.9999998595419565, iteration: 139128
loss: 1.6056839227676392,grad_norm: 0.9999998278617476, iteration: 139129
loss: 1.1430072784423828,grad_norm: 0.9999996116985156, iteration: 139130
loss: 1.2756026983261108,grad_norm: 0.9999999038868909, iteration: 139131
loss: 1.2812438011169434,grad_norm: 0.9999999104782855, iteration: 139132
loss: 1.2502385377883911,grad_norm: 0.999999928448586, iteration: 139133
loss: 1.4290357828140259,grad_norm: 0.9999999510500288, iteration: 139134
loss: 1.2240155935287476,grad_norm: 1.0000000284471189, iteration: 139135
loss: 1.2772258520126343,grad_norm: 1.0000000092171575, iteration: 139136
loss: 1.2833809852600098,grad_norm: 0.9999997170196924, iteration: 139137
loss: 1.2969286441802979,grad_norm: 0.9999996070105371, iteration: 139138
loss: 1.1957550048828125,grad_norm: 0.9999999182425781, iteration: 139139
loss: 1.3839596509933472,grad_norm: 0.9999999494134821, iteration: 139140
loss: 1.2372829914093018,grad_norm: 0.9999998944190488, iteration: 139141
loss: 1.1941193342208862,grad_norm: 0.9999995777303631, iteration: 139142
loss: 1.3437267541885376,grad_norm: 1.0000000620904819, iteration: 139143
loss: 1.374031901359558,grad_norm: 0.9999998432895801, iteration: 139144
loss: 1.4616748094558716,grad_norm: 0.9999999351775288, iteration: 139145
loss: 1.4419070482254028,grad_norm: 0.9999999093192996, iteration: 139146
loss: 1.3587074279785156,grad_norm: 0.9999996936498137, iteration: 139147
loss: 1.2173831462860107,grad_norm: 0.9999998223070445, iteration: 139148
loss: 1.0979210138320923,grad_norm: 0.9999995608920746, iteration: 139149
loss: 1.1750224828720093,grad_norm: 0.9999999791878527, iteration: 139150
loss: 1.2087712287902832,grad_norm: 0.9999994877462702, iteration: 139151
loss: 1.1205034255981445,grad_norm: 0.9999997807411185, iteration: 139152
loss: 1.162580132484436,grad_norm: 0.9999992734714388, iteration: 139153
loss: 1.1478650569915771,grad_norm: 0.999999721267919, iteration: 139154
loss: 1.353796124458313,grad_norm: 1.0000000077105498, iteration: 139155
loss: 1.4184143543243408,grad_norm: 0.9999999596723449, iteration: 139156
loss: 1.3311734199523926,grad_norm: 0.9999999065949875, iteration: 139157
loss: 1.167361855506897,grad_norm: 0.9999996527832056, iteration: 139158
loss: 1.2158281803131104,grad_norm: 0.9999997562011167, iteration: 139159
loss: 1.482067584991455,grad_norm: 0.9999998789824825, iteration: 139160
loss: 1.5022666454315186,grad_norm: 0.9999998945149411, iteration: 139161
loss: 1.3003253936767578,grad_norm: 0.9999997223774142, iteration: 139162
loss: 1.045903205871582,grad_norm: 0.9999996031382953, iteration: 139163
loss: 1.162493348121643,grad_norm: 0.9999996964178731, iteration: 139164
loss: 1.197005271911621,grad_norm: 0.9999997211840306, iteration: 139165
loss: 1.2686653137207031,grad_norm: 0.9999994483084641, iteration: 139166
loss: 1.5017169713974,grad_norm: 0.9999998461644231, iteration: 139167
loss: 1.227340817451477,grad_norm: 0.9999997701967183, iteration: 139168
loss: 1.2363938093185425,grad_norm: 0.9999999847064633, iteration: 139169
loss: 1.3251923322677612,grad_norm: 1.0000000046343098, iteration: 139170
loss: 1.181559681892395,grad_norm: 0.9999995710547878, iteration: 139171
loss: 1.1420985460281372,grad_norm: 0.9999997308440028, iteration: 139172
loss: 0.9849645495414734,grad_norm: 0.9999991158427334, iteration: 139173
loss: 1.1842387914657593,grad_norm: 0.9999999139988744, iteration: 139174
loss: 1.3003863096237183,grad_norm: 0.9999998730444831, iteration: 139175
loss: 1.2650326490402222,grad_norm: 0.9999999969640435, iteration: 139176
loss: 1.1464258432388306,grad_norm: 0.9999993436361382, iteration: 139177
loss: 1.4746274948120117,grad_norm: 0.9999998499700427, iteration: 139178
loss: 1.3285757303237915,grad_norm: 0.9999996014829359, iteration: 139179
loss: 1.1019611358642578,grad_norm: 0.9999996794897772, iteration: 139180
loss: 1.4956134557724,grad_norm: 0.9999997830760174, iteration: 139181
loss: 1.418380618095398,grad_norm: 0.9999998177487933, iteration: 139182
loss: 1.1674745082855225,grad_norm: 0.9999998708209109, iteration: 139183
loss: 1.238041877746582,grad_norm: 0.9999996878253167, iteration: 139184
loss: 1.2301839590072632,grad_norm: 0.9999997581868089, iteration: 139185
loss: 1.1852076053619385,grad_norm: 0.9999998780944418, iteration: 139186
loss: 1.1941620111465454,grad_norm: 0.9999996604837194, iteration: 139187
loss: 1.1622962951660156,grad_norm: 0.9999995870222572, iteration: 139188
loss: 1.3761650323867798,grad_norm: 0.9999998989281411, iteration: 139189
loss: 1.0632721185684204,grad_norm: 0.9999995827633971, iteration: 139190
loss: 1.208580493927002,grad_norm: 0.9999995883714158, iteration: 139191
loss: 1.2764960527420044,grad_norm: 0.999999934442626, iteration: 139192
loss: 1.1879472732543945,grad_norm: 0.9999996315091997, iteration: 139193
loss: 1.130115270614624,grad_norm: 0.9999994145860617, iteration: 139194
loss: 1.2433501482009888,grad_norm: 0.9999998429886758, iteration: 139195
loss: 1.2845041751861572,grad_norm: 0.9999997646549881, iteration: 139196
loss: 1.2829686403274536,grad_norm: 0.9999998649028928, iteration: 139197
loss: 1.1427161693572998,grad_norm: 0.9999996450135616, iteration: 139198
loss: 1.0679224729537964,grad_norm: 0.9999996479221409, iteration: 139199
loss: 1.4080970287322998,grad_norm: 0.999999888152934, iteration: 139200
loss: 1.214869499206543,grad_norm: 0.9999998040081476, iteration: 139201
loss: 1.258345603942871,grad_norm: 0.9999995647004816, iteration: 139202
loss: 1.319380283355713,grad_norm: 0.9999999422028029, iteration: 139203
loss: 1.2002668380737305,grad_norm: 0.9999998576397892, iteration: 139204
loss: 1.2577226161956787,grad_norm: 0.9999999572324366, iteration: 139205
loss: 1.219264268875122,grad_norm: 0.9999999021272946, iteration: 139206
loss: 1.2256256341934204,grad_norm: 0.9999999607674089, iteration: 139207
loss: 1.3467957973480225,grad_norm: 0.9999998889276676, iteration: 139208
loss: 1.159807562828064,grad_norm: 0.9999995164409025, iteration: 139209
loss: 1.2589057683944702,grad_norm: 0.9999998518536882, iteration: 139210
loss: 1.364465594291687,grad_norm: 0.9999995753647457, iteration: 139211
loss: 1.1567044258117676,grad_norm: 0.9999997004043405, iteration: 139212
loss: 1.1314115524291992,grad_norm: 0.9999994581437928, iteration: 139213
loss: 1.057855486869812,grad_norm: 0.9999999031731114, iteration: 139214
loss: 1.2804592847824097,grad_norm: 0.9999999857726037, iteration: 139215
loss: 1.2473950386047363,grad_norm: 0.9999997124178146, iteration: 139216
loss: 1.3248940706253052,grad_norm: 0.9999999852856598, iteration: 139217
loss: 1.4049158096313477,grad_norm: 0.9999999883758606, iteration: 139218
loss: 1.4249248504638672,grad_norm: 0.999999835124649, iteration: 139219
loss: 1.2371355295181274,grad_norm: 0.9999994363619648, iteration: 139220
loss: 1.3043227195739746,grad_norm: 0.9999999291747701, iteration: 139221
loss: 1.410775065422058,grad_norm: 0.999999811129139, iteration: 139222
loss: 1.3448801040649414,grad_norm: 0.9999999452396859, iteration: 139223
loss: 1.167372465133667,grad_norm: 1.0000000150228114, iteration: 139224
loss: 1.3438161611557007,grad_norm: 0.9999998462960644, iteration: 139225
loss: 1.1215124130249023,grad_norm: 0.9999998457437629, iteration: 139226
loss: 1.139661192893982,grad_norm: 0.999999860455486, iteration: 139227
loss: 1.20350182056427,grad_norm: 0.9999996254512328, iteration: 139228
loss: 1.0233285427093506,grad_norm: 0.9206563446511004, iteration: 139229
loss: 1.3181190490722656,grad_norm: 0.9999997858125481, iteration: 139230
loss: 1.4387571811676025,grad_norm: 0.9999998677537304, iteration: 139231
loss: 1.3965483903884888,grad_norm: 0.9999999155130284, iteration: 139232
loss: 1.305873990058899,grad_norm: 0.9999999951746679, iteration: 139233
loss: 1.1243956089019775,grad_norm: 0.9999999841218263, iteration: 139234
loss: 1.2212939262390137,grad_norm: 0.9999998082499825, iteration: 139235
loss: 1.2398091554641724,grad_norm: 1.0000001154578742, iteration: 139236
loss: 1.3534281253814697,grad_norm: 0.9999996326768378, iteration: 139237
loss: 1.1252882480621338,grad_norm: 0.9999999079947679, iteration: 139238
loss: 1.255787968635559,grad_norm: 0.9999998933721235, iteration: 139239
loss: 1.19065260887146,grad_norm: 0.999999935489895, iteration: 139240
loss: 1.3424869775772095,grad_norm: 0.9999999888820006, iteration: 139241
loss: 1.3897665739059448,grad_norm: 0.9999998587952059, iteration: 139242
loss: 1.2649908065795898,grad_norm: 0.9999993575077469, iteration: 139243
loss: 1.1082922220230103,grad_norm: 0.9999994366827742, iteration: 139244
loss: 1.203202724456787,grad_norm: 0.9999997929188544, iteration: 139245
loss: 1.3566255569458008,grad_norm: 1.0000000163299254, iteration: 139246
loss: 1.1529072523117065,grad_norm: 0.9999997087236199, iteration: 139247
loss: 1.3343585729599,grad_norm: 0.999999907833308, iteration: 139248
loss: 1.1626849174499512,grad_norm: 1.0000000669080247, iteration: 139249
loss: 1.2010284662246704,grad_norm: 0.9999998652133716, iteration: 139250
loss: 1.2197788953781128,grad_norm: 0.9999999213617133, iteration: 139251
loss: 1.2214046716690063,grad_norm: 0.9999999921398334, iteration: 139252
loss: 1.3290563821792603,grad_norm: 0.9999999908988862, iteration: 139253
loss: 1.129097580909729,grad_norm: 0.9999995124363409, iteration: 139254
loss: 1.2591696977615356,grad_norm: 0.999999490719903, iteration: 139255
loss: 1.2304482460021973,grad_norm: 0.9999998592385174, iteration: 139256
loss: 1.0870510339736938,grad_norm: 0.9999998264772868, iteration: 139257
loss: 1.5112048387527466,grad_norm: 0.9999999242785925, iteration: 139258
loss: 1.1132889986038208,grad_norm: 0.9999999632243527, iteration: 139259
loss: 1.3254663944244385,grad_norm: 0.9999998758642958, iteration: 139260
loss: 1.1731032133102417,grad_norm: 0.999999886897982, iteration: 139261
loss: 1.433384895324707,grad_norm: 0.9999998959408855, iteration: 139262
loss: 1.1054048538208008,grad_norm: 0.9999993449182525, iteration: 139263
loss: 1.2108043432235718,grad_norm: 0.9999997698157, iteration: 139264
loss: 1.1626801490783691,grad_norm: 0.9999997111708328, iteration: 139265
loss: 1.1616941690444946,grad_norm: 0.9999996321527693, iteration: 139266
loss: 1.065277338027954,grad_norm: 0.9999998925104261, iteration: 139267
loss: 1.107775330543518,grad_norm: 0.9999995418249648, iteration: 139268
loss: 1.161697506904602,grad_norm: 0.9999994063207674, iteration: 139269
loss: 1.2100971937179565,grad_norm: 0.9999996674559081, iteration: 139270
loss: 1.244326114654541,grad_norm: 0.9999999525759117, iteration: 139271
loss: 1.2755253314971924,grad_norm: 0.9999997141951027, iteration: 139272
loss: 1.1712552309036255,grad_norm: 0.9999998570245004, iteration: 139273
loss: 1.07828688621521,grad_norm: 0.9999995289092987, iteration: 139274
loss: 1.3121205568313599,grad_norm: 0.9999998267007684, iteration: 139275
loss: 1.3581013679504395,grad_norm: 0.9999997439015442, iteration: 139276
loss: 1.0992817878723145,grad_norm: 0.9999995418388261, iteration: 139277
loss: 1.0793262720108032,grad_norm: 0.9999995777515747, iteration: 139278
loss: 1.403137445449829,grad_norm: 1.0000000316514863, iteration: 139279
loss: 1.0351959466934204,grad_norm: 0.999999389372878, iteration: 139280
loss: 1.2139341831207275,grad_norm: 0.9999997890144569, iteration: 139281
loss: 1.166575312614441,grad_norm: 0.9999997471462894, iteration: 139282
loss: 1.3922674655914307,grad_norm: 0.9999994980910353, iteration: 139283
loss: 1.3065357208251953,grad_norm: 0.999999905046201, iteration: 139284
loss: 1.2283611297607422,grad_norm: 0.9999998452414207, iteration: 139285
loss: 1.4532582759857178,grad_norm: 0.999999852226412, iteration: 139286
loss: 1.1352226734161377,grad_norm: 0.9999999286830126, iteration: 139287
loss: 1.2367771863937378,grad_norm: 0.9999998799315266, iteration: 139288
loss: 1.2445613145828247,grad_norm: 0.9999998310281502, iteration: 139289
loss: 1.098023772239685,grad_norm: 0.9999995672647539, iteration: 139290
loss: 1.3930960893630981,grad_norm: 0.999999900785581, iteration: 139291
loss: 1.3776766061782837,grad_norm: 0.9999998568555049, iteration: 139292
loss: 1.2050024271011353,grad_norm: 0.9999998316971866, iteration: 139293
loss: 1.2029926776885986,grad_norm: 0.999999828209207, iteration: 139294
loss: 1.14827299118042,grad_norm: 0.9999999312673216, iteration: 139295
loss: 1.3041608333587646,grad_norm: 0.9999998151153344, iteration: 139296
loss: 1.2576889991760254,grad_norm: 0.9999995040008186, iteration: 139297
loss: 1.1806867122650146,grad_norm: 0.9999998740170637, iteration: 139298
loss: 1.1475350856781006,grad_norm: 0.9999998660041445, iteration: 139299
loss: 1.0690357685089111,grad_norm: 1.0000000756099208, iteration: 139300
loss: 1.2557176351547241,grad_norm: 0.9999995868972041, iteration: 139301
loss: 1.062180757522583,grad_norm: 0.9999994186272096, iteration: 139302
loss: 1.1713664531707764,grad_norm: 0.9999996928442704, iteration: 139303
loss: 1.492932915687561,grad_norm: 1.0000000271526597, iteration: 139304
loss: 1.312516212463379,grad_norm: 0.9999999964651697, iteration: 139305
loss: 1.1331942081451416,grad_norm: 0.999999847890455, iteration: 139306
loss: 1.112436056137085,grad_norm: 0.9999999180818134, iteration: 139307
loss: 1.26331627368927,grad_norm: 0.9999999185677956, iteration: 139308
loss: 1.2186442613601685,grad_norm: 0.9999999009986897, iteration: 139309
loss: 1.341713547706604,grad_norm: 0.9999998387493819, iteration: 139310
loss: 1.2607252597808838,grad_norm: 0.9999996358380658, iteration: 139311
loss: 1.2899680137634277,grad_norm: 0.9999997096711019, iteration: 139312
loss: 1.3376103639602661,grad_norm: 0.9999999823171619, iteration: 139313
loss: 1.0866267681121826,grad_norm: 0.9999998243281645, iteration: 139314
loss: 1.0433731079101562,grad_norm: 0.9999992136604486, iteration: 139315
loss: 1.164715051651001,grad_norm: 0.9999994877878834, iteration: 139316
loss: 1.1694673299789429,grad_norm: 0.9999995464065031, iteration: 139317
loss: 1.179302453994751,grad_norm: 0.9999998302092912, iteration: 139318
loss: 1.2019081115722656,grad_norm: 0.9999998824974633, iteration: 139319
loss: 1.3068640232086182,grad_norm: 0.9999999621387273, iteration: 139320
loss: 1.1838304996490479,grad_norm: 0.9999999719436168, iteration: 139321
loss: 1.175025463104248,grad_norm: 0.999999484440567, iteration: 139322
loss: 1.3949263095855713,grad_norm: 0.9999996821044876, iteration: 139323
loss: 1.3893659114837646,grad_norm: 0.9999998418861936, iteration: 139324
loss: 1.189577341079712,grad_norm: 0.9999997826221393, iteration: 139325
loss: 1.284855842590332,grad_norm: 0.9999995936343424, iteration: 139326
loss: 1.0326842069625854,grad_norm: 0.9999993233541132, iteration: 139327
loss: 1.2364585399627686,grad_norm: 0.9999997241120331, iteration: 139328
loss: 1.1846795082092285,grad_norm: 0.9999995511890235, iteration: 139329
loss: 1.415030837059021,grad_norm: 1.0000000199691412, iteration: 139330
loss: 1.2490315437316895,grad_norm: 0.9999997366585894, iteration: 139331
loss: 1.1764007806777954,grad_norm: 0.9999995660221093, iteration: 139332
loss: 1.2058323621749878,grad_norm: 0.9999998529636511, iteration: 139333
loss: 1.3023324012756348,grad_norm: 0.9999999503762856, iteration: 139334
loss: 1.397657871246338,grad_norm: 0.9999997789014865, iteration: 139335
loss: 1.2602750062942505,grad_norm: 0.9999999815550664, iteration: 139336
loss: 1.3542633056640625,grad_norm: 1.0000000343898718, iteration: 139337
loss: 1.3153996467590332,grad_norm: 0.9999999810236837, iteration: 139338
loss: 1.245625615119934,grad_norm: 0.9999999850922441, iteration: 139339
loss: 1.1459612846374512,grad_norm: 0.9999997020742685, iteration: 139340
loss: 1.2487658262252808,grad_norm: 0.9999999466770697, iteration: 139341
loss: 1.3585227727890015,grad_norm: 0.9999998720003849, iteration: 139342
loss: 1.4969253540039062,grad_norm: 0.9999999684546815, iteration: 139343
loss: 1.3592519760131836,grad_norm: 0.999999761556904, iteration: 139344
loss: 1.311180830001831,grad_norm: 0.9999998796831608, iteration: 139345
loss: 1.2608991861343384,grad_norm: 0.9999999184392884, iteration: 139346
loss: 1.2726984024047852,grad_norm: 0.9999998847241007, iteration: 139347
loss: 1.3573552370071411,grad_norm: 1.0000000103264095, iteration: 139348
loss: 1.4023336172103882,grad_norm: 0.99999976825222, iteration: 139349
loss: 1.370766282081604,grad_norm: 0.999999730099864, iteration: 139350
loss: 1.507054328918457,grad_norm: 0.9999999246349526, iteration: 139351
loss: 1.1355253458023071,grad_norm: 0.9999994957926296, iteration: 139352
loss: 1.4277992248535156,grad_norm: 0.99999986954038, iteration: 139353
loss: 1.1235921382904053,grad_norm: 0.9999999373575047, iteration: 139354
loss: 1.383638858795166,grad_norm: 0.9999996724872703, iteration: 139355
loss: 1.430062174797058,grad_norm: 1.0000000141438357, iteration: 139356
loss: 1.3362488746643066,grad_norm: 0.9999998144474236, iteration: 139357
loss: 1.2749353647232056,grad_norm: 0.9999998526901095, iteration: 139358
loss: 1.4670729637145996,grad_norm: 0.9999999472890636, iteration: 139359
loss: 1.5918477773666382,grad_norm: 0.9999998976935895, iteration: 139360
loss: 1.1883845329284668,grad_norm: 0.9999996733531867, iteration: 139361
loss: 1.2356191873550415,grad_norm: 0.9999997449615096, iteration: 139362
loss: 1.156186580657959,grad_norm: 0.9999997598183472, iteration: 139363
loss: 1.395979404449463,grad_norm: 0.9999999207732894, iteration: 139364
loss: 1.2746880054473877,grad_norm: 0.9999998978220124, iteration: 139365
loss: 1.2256947755813599,grad_norm: 0.9999996157299184, iteration: 139366
loss: 1.2235329151153564,grad_norm: 0.9999998947081128, iteration: 139367
loss: 1.349689245223999,grad_norm: 0.9999997201111532, iteration: 139368
loss: 1.2921079397201538,grad_norm: 0.9999996669621957, iteration: 139369
loss: 1.3230750560760498,grad_norm: 1.0000000430189306, iteration: 139370
loss: 1.2896113395690918,grad_norm: 0.9999995895806701, iteration: 139371
loss: 1.2032599449157715,grad_norm: 0.9999999775123823, iteration: 139372
loss: 1.4301748275756836,grad_norm: 0.9999999041376425, iteration: 139373
loss: 1.292084813117981,grad_norm: 0.9999995625851724, iteration: 139374
loss: 1.4073436260223389,grad_norm: 1.0000000134090636, iteration: 139375
loss: 1.2925432920455933,grad_norm: 0.9999997693455146, iteration: 139376
loss: 1.4658963680267334,grad_norm: 0.9999999201891007, iteration: 139377
loss: 1.810235857963562,grad_norm: 0.9999999495784799, iteration: 139378
loss: 1.3734934329986572,grad_norm: 0.9999999532263392, iteration: 139379
loss: 1.2401636838912964,grad_norm: 0.9999999553938965, iteration: 139380
loss: 1.5547672510147095,grad_norm: 1.0000000968976337, iteration: 139381
loss: 1.3851138353347778,grad_norm: 0.9999999965943407, iteration: 139382
loss: 1.442185878753662,grad_norm: 0.9999999456798744, iteration: 139383
loss: 1.448222279548645,grad_norm: 0.9999999682080023, iteration: 139384
loss: 1.186417818069458,grad_norm: 0.9999997921860303, iteration: 139385
loss: 1.4958254098892212,grad_norm: 0.9999998112924146, iteration: 139386
loss: 1.5292903184890747,grad_norm: 0.9999999713643828, iteration: 139387
loss: 1.207688331604004,grad_norm: 0.9999997477888953, iteration: 139388
loss: 1.4205594062805176,grad_norm: 0.9999999474954138, iteration: 139389
loss: 1.4866186380386353,grad_norm: 1.0000000323850209, iteration: 139390
loss: 1.2741639614105225,grad_norm: 0.9999999310395624, iteration: 139391
loss: 1.3394962549209595,grad_norm: 0.999999983354508, iteration: 139392
loss: 1.3536467552185059,grad_norm: 0.9999998614328883, iteration: 139393
loss: 1.496169090270996,grad_norm: 1.0000000210731113, iteration: 139394
loss: 1.2078871726989746,grad_norm: 0.9999997705483574, iteration: 139395
loss: 1.5921281576156616,grad_norm: 1.0000000653195655, iteration: 139396
loss: 1.390872836112976,grad_norm: 0.9999998272059604, iteration: 139397
loss: 1.3168573379516602,grad_norm: 1.000000069995053, iteration: 139398
loss: 1.3625423908233643,grad_norm: 0.9999999329143291, iteration: 139399
loss: 1.5187149047851562,grad_norm: 0.9999999779653509, iteration: 139400
loss: 1.4339019060134888,grad_norm: 0.9999999711288662, iteration: 139401
loss: 1.622309684753418,grad_norm: 0.9999999351264331, iteration: 139402
loss: 1.313400387763977,grad_norm: 0.9999998897361472, iteration: 139403
loss: 1.3626813888549805,grad_norm: 0.999999922625931, iteration: 139404
loss: 1.4656540155410767,grad_norm: 1.0000000033789818, iteration: 139405
loss: 1.3315595388412476,grad_norm: 0.9999998903885319, iteration: 139406
loss: 1.452616572380066,grad_norm: 0.9999999738835955, iteration: 139407
loss: 1.726188063621521,grad_norm: 0.9999999883566202, iteration: 139408
loss: 1.4536720514297485,grad_norm: 0.9999999200866113, iteration: 139409
loss: 1.3054803609848022,grad_norm: 0.9999997913759747, iteration: 139410
loss: 1.6844242811203003,grad_norm: 0.9999998946528943, iteration: 139411
loss: 1.6217520236968994,grad_norm: 0.9999999299742853, iteration: 139412
loss: 1.6851966381072998,grad_norm: 0.9999998893705774, iteration: 139413
loss: 1.1892532110214233,grad_norm: 0.9999997731485277, iteration: 139414
loss: 1.548089861869812,grad_norm: 1.0000000294461822, iteration: 139415
loss: 1.2849619388580322,grad_norm: 0.9999999742305415, iteration: 139416
loss: 1.263566255569458,grad_norm: 0.9999998378429137, iteration: 139417
loss: 1.0917154550552368,grad_norm: 0.999999717405259, iteration: 139418
loss: 1.3783721923828125,grad_norm: 0.9999998742398779, iteration: 139419
loss: 1.2935421466827393,grad_norm: 0.9999999912990383, iteration: 139420
loss: 1.447022795677185,grad_norm: 0.9999999563390479, iteration: 139421
loss: 1.80720055103302,grad_norm: 1.0000000140820597, iteration: 139422
loss: 1.1577258110046387,grad_norm: 0.9999996623237949, iteration: 139423
loss: 1.321473479270935,grad_norm: 0.9999998456329722, iteration: 139424
loss: 1.478926658630371,grad_norm: 0.9999999821826264, iteration: 139425
loss: 1.459924340248108,grad_norm: 1.0000000001454026, iteration: 139426
loss: 1.6387920379638672,grad_norm: 0.9999999928990949, iteration: 139427
loss: 1.3027805089950562,grad_norm: 0.9999999591602082, iteration: 139428
loss: 1.3111666440963745,grad_norm: 0.9999999591529342, iteration: 139429
loss: 1.6299495697021484,grad_norm: 0.9999999995048311, iteration: 139430
loss: 1.3066972494125366,grad_norm: 1.000000004462341, iteration: 139431
loss: 1.244887113571167,grad_norm: 0.9999996680536738, iteration: 139432
loss: 1.3365697860717773,grad_norm: 0.9999998258351994, iteration: 139433
loss: 1.8343701362609863,grad_norm: 0.9999999511075641, iteration: 139434
loss: 1.800958275794983,grad_norm: 0.9999999037550712, iteration: 139435
loss: 1.7175980806350708,grad_norm: 0.9999999451719568, iteration: 139436
loss: 1.203912377357483,grad_norm: 0.9999999422521534, iteration: 139437
loss: 1.3469771146774292,grad_norm: 0.9999998811812215, iteration: 139438
loss: 1.742319107055664,grad_norm: 1.0000000283926342, iteration: 139439
loss: 1.3908864259719849,grad_norm: 0.9999999083990032, iteration: 139440
loss: 1.550316572189331,grad_norm: 1.0000000097020978, iteration: 139441
loss: 1.6662243604660034,grad_norm: 1.0000000523024215, iteration: 139442
loss: 1.337508201599121,grad_norm: 0.9999999272852179, iteration: 139443
loss: 1.5207325220108032,grad_norm: 0.9999999426664107, iteration: 139444
loss: 1.2566558122634888,grad_norm: 0.9999996375021483, iteration: 139445
loss: 1.5587410926818848,grad_norm: 0.9999999736609536, iteration: 139446
loss: 1.3535305261611938,grad_norm: 0.9999998484765296, iteration: 139447
loss: 1.3118259906768799,grad_norm: 0.9999999501024512, iteration: 139448
loss: 1.1773147583007812,grad_norm: 0.9999999401343563, iteration: 139449
loss: 1.385217308998108,grad_norm: 1.000000021767316, iteration: 139450
loss: 1.33707857131958,grad_norm: 1.000000043338728, iteration: 139451
loss: 1.4261202812194824,grad_norm: 1.0000000445024673, iteration: 139452
loss: 1.2212687730789185,grad_norm: 0.9999999932111957, iteration: 139453
loss: 1.6505482196807861,grad_norm: 0.999999957334494, iteration: 139454
loss: 1.2966235876083374,grad_norm: 1.0000000280790633, iteration: 139455
loss: 1.3628435134887695,grad_norm: 1.0000000031985052, iteration: 139456
loss: 1.8280411958694458,grad_norm: 0.9999999880324902, iteration: 139457
loss: 1.4125412702560425,grad_norm: 0.9999999581373309, iteration: 139458
loss: 1.5823131799697876,grad_norm: 0.9999999092372555, iteration: 139459
loss: 1.6071643829345703,grad_norm: 0.9999999615060203, iteration: 139460
loss: 1.407577395439148,grad_norm: 0.9999999715671719, iteration: 139461
loss: 1.2888389825820923,grad_norm: 0.9999999889528733, iteration: 139462
loss: 1.248130440711975,grad_norm: 0.9999999870941395, iteration: 139463
loss: 1.4004236459732056,grad_norm: 0.9999999030065309, iteration: 139464
loss: 1.4687472581863403,grad_norm: 0.9999999442055569, iteration: 139465
loss: 1.1806367635726929,grad_norm: 0.9999999345545122, iteration: 139466
loss: 1.4146682024002075,grad_norm: 0.999999884535372, iteration: 139467
loss: 1.31382155418396,grad_norm: 0.9999998307125104, iteration: 139468
loss: 1.2461600303649902,grad_norm: 0.9999999657875835, iteration: 139469
loss: 1.4898436069488525,grad_norm: 1.0000000386960815, iteration: 139470
loss: 1.3910504579544067,grad_norm: 0.9999999472027653, iteration: 139471
loss: 1.3889896869659424,grad_norm: 1.000000007046503, iteration: 139472
loss: 1.53976309299469,grad_norm: 0.9999999280686475, iteration: 139473
loss: 1.4610259532928467,grad_norm: 1.000000033451657, iteration: 139474
loss: 1.2622454166412354,grad_norm: 0.9999998745189754, iteration: 139475
loss: 1.2074055671691895,grad_norm: 0.9999998869519976, iteration: 139476
loss: 1.491188883781433,grad_norm: 1.0000000501714716, iteration: 139477
loss: 1.2309842109680176,grad_norm: 0.9999999325386515, iteration: 139478
loss: 1.4308865070343018,grad_norm: 0.9999999881002409, iteration: 139479
loss: 1.4401601552963257,grad_norm: 0.9999999913780956, iteration: 139480
loss: 1.2100603580474854,grad_norm: 0.9999998627232899, iteration: 139481
loss: 1.2610782384872437,grad_norm: 0.9999999523690417, iteration: 139482
loss: 1.4921468496322632,grad_norm: 0.999999959372934, iteration: 139483
loss: 1.607115387916565,grad_norm: 0.9999999808779639, iteration: 139484
loss: 1.4466301202774048,grad_norm: 0.9999999475981945, iteration: 139485
loss: 1.3551987409591675,grad_norm: 0.9999998390749554, iteration: 139486
loss: 1.5444949865341187,grad_norm: 0.9999998776326382, iteration: 139487
loss: 1.1749589443206787,grad_norm: 0.9999998008015667, iteration: 139488
loss: 1.2583788633346558,grad_norm: 0.9999999762455234, iteration: 139489
loss: 1.3106682300567627,grad_norm: 0.9999997909109987, iteration: 139490
loss: 1.5207113027572632,grad_norm: 0.9999999756531188, iteration: 139491
loss: 1.3383334875106812,grad_norm: 1.0000000038950776, iteration: 139492
loss: 1.835453748703003,grad_norm: 0.9999999950323035, iteration: 139493
loss: 1.4351898431777954,grad_norm: 0.9999997957918653, iteration: 139494
loss: 1.2170411348342896,grad_norm: 0.9999998883662317, iteration: 139495
loss: 1.3403791189193726,grad_norm: 0.9999999901155217, iteration: 139496
loss: 1.532362937927246,grad_norm: 0.9999998219307715, iteration: 139497
loss: 1.3711668252944946,grad_norm: 0.9999999359391863, iteration: 139498
loss: 1.2828866243362427,grad_norm: 1.0000000346347946, iteration: 139499
loss: 1.2793793678283691,grad_norm: 0.9999999038920021, iteration: 139500
loss: 1.3701132535934448,grad_norm: 0.9999999477849886, iteration: 139501
loss: 1.1601239442825317,grad_norm: 0.9999997255284493, iteration: 139502
loss: 1.714948296546936,grad_norm: 0.9999998821429266, iteration: 139503
loss: 1.447946548461914,grad_norm: 0.9999997865536252, iteration: 139504
loss: 1.4428777694702148,grad_norm: 0.9999999190538126, iteration: 139505
loss: 2.0355541706085205,grad_norm: 0.9999999320644654, iteration: 139506
loss: 1.3365103006362915,grad_norm: 1.0000000525646064, iteration: 139507
loss: 1.2426692247390747,grad_norm: 0.9999998558065886, iteration: 139508
loss: 1.557280421257019,grad_norm: 0.9999998076620736, iteration: 139509
loss: 1.3644460439682007,grad_norm: 0.9999999953308232, iteration: 139510
loss: 1.2351981401443481,grad_norm: 0.9999993204445616, iteration: 139511
loss: 1.4591456651687622,grad_norm: 0.9999999827499074, iteration: 139512
loss: 1.3646667003631592,grad_norm: 0.9999999336198332, iteration: 139513
loss: 1.6805580854415894,grad_norm: 0.9999999251409208, iteration: 139514
loss: 1.30937922000885,grad_norm: 0.999999917194674, iteration: 139515
loss: 1.3419297933578491,grad_norm: 1.000000049982455, iteration: 139516
loss: 1.2334681749343872,grad_norm: 0.9999999021554017, iteration: 139517
loss: 1.4975334405899048,grad_norm: 0.9999999834250474, iteration: 139518
loss: 1.2071789503097534,grad_norm: 0.9999998714507636, iteration: 139519
loss: 1.227442979812622,grad_norm: 0.9999999301998622, iteration: 139520
loss: 1.3878743648529053,grad_norm: 0.9999998863275007, iteration: 139521
loss: 1.3081303834915161,grad_norm: 0.9999998596423919, iteration: 139522
loss: 1.1490743160247803,grad_norm: 0.999999649258514, iteration: 139523
loss: 1.4381622076034546,grad_norm: 0.9999999552551967, iteration: 139524
loss: 1.3839796781539917,grad_norm: 0.9999998711034265, iteration: 139525
loss: 1.2038792371749878,grad_norm: 0.9999999404704711, iteration: 139526
loss: 1.4135533571243286,grad_norm: 1.0000000415754622, iteration: 139527
loss: 1.69284188747406,grad_norm: 0.9999999725590516, iteration: 139528
loss: 1.3351043462753296,grad_norm: 0.9999998037266883, iteration: 139529
loss: 1.3991888761520386,grad_norm: 1.0000000361650196, iteration: 139530
loss: 1.5414848327636719,grad_norm: 0.999999821168574, iteration: 139531
loss: 1.5681054592132568,grad_norm: 0.9999998901177196, iteration: 139532
loss: 1.265784502029419,grad_norm: 0.9999998478813243, iteration: 139533
loss: 1.1764791011810303,grad_norm: 0.9999997950741532, iteration: 139534
loss: 1.113938331604004,grad_norm: 0.9999995508612357, iteration: 139535
loss: 1.2352949380874634,grad_norm: 0.9999997053177767, iteration: 139536
loss: 1.1624189615249634,grad_norm: 0.9999999726478738, iteration: 139537
loss: 1.39028000831604,grad_norm: 0.9999998882685583, iteration: 139538
loss: 1.3114889860153198,grad_norm: 0.9999999596672584, iteration: 139539
loss: 1.5227982997894287,grad_norm: 0.9999999606281983, iteration: 139540
loss: 1.4391279220581055,grad_norm: 0.9999998520049731, iteration: 139541
loss: 1.5969696044921875,grad_norm: 0.9999998777186822, iteration: 139542
loss: 1.262776255607605,grad_norm: 0.9999998281598014, iteration: 139543
loss: 1.4230023622512817,grad_norm: 1.000000043367196, iteration: 139544
loss: 1.5938278436660767,grad_norm: 1.0000000554088821, iteration: 139545
loss: 1.4630175828933716,grad_norm: 0.9999998814029794, iteration: 139546
loss: 1.3791074752807617,grad_norm: 0.9999997817818813, iteration: 139547
loss: 1.1827452182769775,grad_norm: 0.9999999412619186, iteration: 139548
loss: 1.1401338577270508,grad_norm: 0.9999999582102983, iteration: 139549
loss: 1.171258807182312,grad_norm: 0.9999998322161917, iteration: 139550
loss: 1.3518177270889282,grad_norm: 1.0000000101463786, iteration: 139551
loss: 1.387082576751709,grad_norm: 0.9999999614349221, iteration: 139552
loss: 1.4168756008148193,grad_norm: 1.0000000844513506, iteration: 139553
loss: 1.2355256080627441,grad_norm: 0.9999996023435028, iteration: 139554
loss: 1.2118722200393677,grad_norm: 0.9999998846121231, iteration: 139555
loss: 1.3819652795791626,grad_norm: 0.9999998286275533, iteration: 139556
loss: 1.2172954082489014,grad_norm: 0.9999998793888468, iteration: 139557
loss: 1.6416330337524414,grad_norm: 0.9999999083338322, iteration: 139558
loss: 1.5246273279190063,grad_norm: 1.0000000107363562, iteration: 139559
loss: 1.464771032333374,grad_norm: 0.9999998845760807, iteration: 139560
loss: 1.7044386863708496,grad_norm: 1.0000000017766053, iteration: 139561
loss: 1.2994822263717651,grad_norm: 0.99999982971318, iteration: 139562
loss: 1.3741682767868042,grad_norm: 0.9999999021729304, iteration: 139563
loss: 1.3508052825927734,grad_norm: 0.9999998643222122, iteration: 139564
loss: 1.2830520868301392,grad_norm: 0.9999998408669982, iteration: 139565
loss: 1.3181357383728027,grad_norm: 0.9999998881004816, iteration: 139566
loss: 1.2603247165679932,grad_norm: 0.9999998921249915, iteration: 139567
loss: 1.301658272743225,grad_norm: 0.999999775024325, iteration: 139568
loss: 1.480947732925415,grad_norm: 0.9999998599424172, iteration: 139569
loss: 1.327206015586853,grad_norm: 1.0000000051723703, iteration: 139570
loss: 1.4460716247558594,grad_norm: 0.9999998992810931, iteration: 139571
loss: 1.6102687120437622,grad_norm: 0.9999997868120063, iteration: 139572
loss: 1.458949089050293,grad_norm: 0.9999999457396447, iteration: 139573
loss: 1.4273680448532104,grad_norm: 0.9999999878675083, iteration: 139574
loss: 1.3647302389144897,grad_norm: 0.9999999783368976, iteration: 139575
loss: 1.3750399351119995,grad_norm: 0.9999998949898963, iteration: 139576
loss: 1.1100236177444458,grad_norm: 0.9999998357131697, iteration: 139577
loss: 1.1423338651657104,grad_norm: 0.9999998824150864, iteration: 139578
loss: 1.4958648681640625,grad_norm: 1.0000000108938119, iteration: 139579
loss: 1.4922301769256592,grad_norm: 0.9999999108751823, iteration: 139580
loss: 1.3428285121917725,grad_norm: 1.0000000503027309, iteration: 139581
loss: 1.382128119468689,grad_norm: 0.9999999550666663, iteration: 139582
loss: 1.4381932020187378,grad_norm: 0.9999999708806049, iteration: 139583
loss: 1.313955307006836,grad_norm: 1.0000000070488353, iteration: 139584
loss: 1.4961401224136353,grad_norm: 0.9999999177854674, iteration: 139585
loss: 1.424150824546814,grad_norm: 0.999999975048949, iteration: 139586
loss: 1.0885341167449951,grad_norm: 0.9999993719404134, iteration: 139587
loss: 1.201436161994934,grad_norm: 1.0000000307356467, iteration: 139588
loss: 1.3239082098007202,grad_norm: 0.9999999429957414, iteration: 139589
loss: 1.4213091135025024,grad_norm: 0.9999998138334659, iteration: 139590
loss: 1.2862122058868408,grad_norm: 0.9999998280139755, iteration: 139591
loss: 1.6381512880325317,grad_norm: 0.9999998365912003, iteration: 139592
loss: 1.4510568380355835,grad_norm: 1.000000059532739, iteration: 139593
loss: 1.3270233869552612,grad_norm: 0.9999998988376849, iteration: 139594
loss: 1.2097439765930176,grad_norm: 0.9999999750614071, iteration: 139595
loss: 1.3959583044052124,grad_norm: 0.9999998603954446, iteration: 139596
loss: 1.6714699268341064,grad_norm: 0.9999999144086648, iteration: 139597
loss: 1.4703850746154785,grad_norm: 0.9999998760805909, iteration: 139598
loss: 1.2494158744812012,grad_norm: 0.9999999868337567, iteration: 139599
loss: 1.5693068504333496,grad_norm: 1.000000097216704, iteration: 139600
loss: 1.3620386123657227,grad_norm: 1.000000035434616, iteration: 139601
loss: 1.2213964462280273,grad_norm: 0.9999998977325765, iteration: 139602
loss: 1.3487125635147095,grad_norm: 0.9999997814250376, iteration: 139603
loss: 1.3538578748703003,grad_norm: 1.0000000045531163, iteration: 139604
loss: 1.553551197052002,grad_norm: 0.999999887777911, iteration: 139605
loss: 1.3538990020751953,grad_norm: 1.0000000086332639, iteration: 139606
loss: 1.7525521516799927,grad_norm: 0.9999999952363281, iteration: 139607
loss: 1.1302775144577026,grad_norm: 1.00000008663633, iteration: 139608
loss: 1.4618805646896362,grad_norm: 1.0000000168400043, iteration: 139609
loss: 1.2307443618774414,grad_norm: 0.9999999475735781, iteration: 139610
loss: 1.3254692554473877,grad_norm: 0.9999999866122377, iteration: 139611
loss: 1.5001564025878906,grad_norm: 0.9999999402797272, iteration: 139612
loss: 1.158022403717041,grad_norm: 0.999999995004036, iteration: 139613
loss: 1.6226640939712524,grad_norm: 0.999999921027181, iteration: 139614
loss: 1.2969250679016113,grad_norm: 0.9999998365212621, iteration: 139615
loss: 1.4669610261917114,grad_norm: 0.9999998699165371, iteration: 139616
loss: 1.2694412469863892,grad_norm: 0.999999786839108, iteration: 139617
loss: 1.1794359683990479,grad_norm: 1.0000000824221444, iteration: 139618
loss: 1.7036542892456055,grad_norm: 0.9999999207906228, iteration: 139619
loss: 1.531825065612793,grad_norm: 0.999999832547584, iteration: 139620
loss: 1.452030897140503,grad_norm: 0.9999998891588282, iteration: 139621
loss: 1.523506999015808,grad_norm: 1.00000006597258, iteration: 139622
loss: 1.4887813329696655,grad_norm: 0.9999999146420161, iteration: 139623
loss: 1.9176768064498901,grad_norm: 1.000000009869819, iteration: 139624
loss: 1.5940784215927124,grad_norm: 0.9999999953740396, iteration: 139625
loss: 1.7069311141967773,grad_norm: 1.0000000292943374, iteration: 139626
loss: 1.3405828475952148,grad_norm: 0.9999999475189235, iteration: 139627
loss: 1.6111090183258057,grad_norm: 0.9999998872286601, iteration: 139628
loss: 1.7869880199432373,grad_norm: 0.9999999902001419, iteration: 139629
loss: 1.274749517440796,grad_norm: 0.9999997780321951, iteration: 139630
loss: 1.3201611042022705,grad_norm: 0.9999998662869992, iteration: 139631
loss: 1.3772923946380615,grad_norm: 0.9999999316633639, iteration: 139632
loss: 1.6278232336044312,grad_norm: 0.9999999939676246, iteration: 139633
loss: 1.6642130613327026,grad_norm: 0.9999998785445157, iteration: 139634
loss: 1.3811336755752563,grad_norm: 0.9999998966539608, iteration: 139635
loss: 1.3201308250427246,grad_norm: 0.9999998874755047, iteration: 139636
loss: 1.344887375831604,grad_norm: 0.9999998379034073, iteration: 139637
loss: 1.5843234062194824,grad_norm: 0.9999998902031842, iteration: 139638
loss: 1.2211806774139404,grad_norm: 0.9999996853467247, iteration: 139639
loss: 1.2762460708618164,grad_norm: 0.9999999383150369, iteration: 139640
loss: 1.2326420545578003,grad_norm: 0.9999999311814477, iteration: 139641
loss: 1.303721308708191,grad_norm: 0.9999998915455413, iteration: 139642
loss: 1.1267443895339966,grad_norm: 1.0000000466154235, iteration: 139643
loss: 1.155303716659546,grad_norm: 1.00000000024907, iteration: 139644
loss: 1.1732734441757202,grad_norm: 0.9999998254526067, iteration: 139645
loss: 1.3800427913665771,grad_norm: 0.9999999973727787, iteration: 139646
loss: 1.397940993309021,grad_norm: 0.9999997142317648, iteration: 139647
loss: 1.1509623527526855,grad_norm: 0.9999999262034489, iteration: 139648
loss: 1.2100547552108765,grad_norm: 0.9999997236221138, iteration: 139649
loss: 1.268068790435791,grad_norm: 0.9999998758401725, iteration: 139650
loss: 1.6659750938415527,grad_norm: 0.9999999907608608, iteration: 139651
loss: 1.250070571899414,grad_norm: 0.999999984128613, iteration: 139652
loss: 1.3281179666519165,grad_norm: 0.999999884596361, iteration: 139653
loss: 1.4751559495925903,grad_norm: 0.9999999376881917, iteration: 139654
loss: 1.2695791721343994,grad_norm: 0.9999998341403046, iteration: 139655
loss: 1.167975664138794,grad_norm: 0.9999998879452116, iteration: 139656
loss: 1.9443695545196533,grad_norm: 0.9999999454523576, iteration: 139657
loss: 1.249800205230713,grad_norm: 0.9999999375576157, iteration: 139658
loss: 1.1371238231658936,grad_norm: 0.9999999059050976, iteration: 139659
loss: 1.1243585348129272,grad_norm: 0.9999999928543113, iteration: 139660
loss: 1.386928677558899,grad_norm: 1.000000048911951, iteration: 139661
loss: 1.1694895029067993,grad_norm: 0.9999999519354906, iteration: 139662
loss: 1.4060956239700317,grad_norm: 0.9999998675949048, iteration: 139663
loss: 1.3506858348846436,grad_norm: 0.9999998880362796, iteration: 139664
loss: 1.1740119457244873,grad_norm: 0.9999999199987519, iteration: 139665
loss: 1.2704424858093262,grad_norm: 0.9999999920189302, iteration: 139666
loss: 1.1596652269363403,grad_norm: 0.9999998687881272, iteration: 139667
loss: 1.342966914176941,grad_norm: 0.99999976490312, iteration: 139668
loss: 1.2826318740844727,grad_norm: 0.9999996700316247, iteration: 139669
loss: 1.154511570930481,grad_norm: 0.9999994312517966, iteration: 139670
loss: 1.1265195608139038,grad_norm: 0.9999995770985232, iteration: 139671
loss: 1.4321602582931519,grad_norm: 0.9999998733314683, iteration: 139672
loss: 1.5651499032974243,grad_norm: 1.0000000029902354, iteration: 139673
loss: 1.155703067779541,grad_norm: 0.9999999169270453, iteration: 139674
loss: 1.2923789024353027,grad_norm: 1.0000000497757147, iteration: 139675
loss: 1.8080772161483765,grad_norm: 0.9999999356812257, iteration: 139676
loss: 1.139015793800354,grad_norm: 0.999999906684419, iteration: 139677
loss: 1.2890275716781616,grad_norm: 0.9999997912430773, iteration: 139678
loss: 1.4298099279403687,grad_norm: 0.9999997622412786, iteration: 139679
loss: 1.1087857484817505,grad_norm: 0.9999998997858187, iteration: 139680
loss: 1.2012555599212646,grad_norm: 0.9999998237475852, iteration: 139681
loss: 1.2696852684020996,grad_norm: 1.0000000083899805, iteration: 139682
loss: 1.3919012546539307,grad_norm: 0.9999999083393731, iteration: 139683
loss: 1.451646089553833,grad_norm: 0.9999999413968343, iteration: 139684
loss: 1.1667871475219727,grad_norm: 0.9999996070099209, iteration: 139685
loss: 1.2591928243637085,grad_norm: 0.9999997204101428, iteration: 139686
loss: 1.4440995454788208,grad_norm: 0.9999997810495901, iteration: 139687
loss: 1.3834501504898071,grad_norm: 0.9999998936671523, iteration: 139688
loss: 1.0550458431243896,grad_norm: 0.9999999040825246, iteration: 139689
loss: 1.3975591659545898,grad_norm: 0.9999999181170446, iteration: 139690
loss: 1.2987926006317139,grad_norm: 0.9999997485988504, iteration: 139691
loss: 1.4715182781219482,grad_norm: 0.9999999035629327, iteration: 139692
loss: 1.163013219833374,grad_norm: 0.9999997960608441, iteration: 139693
loss: 1.2635295391082764,grad_norm: 0.9999999768316721, iteration: 139694
loss: 1.2979984283447266,grad_norm: 0.9999999653893615, iteration: 139695
loss: 1.1811017990112305,grad_norm: 1.0000000191351952, iteration: 139696
loss: 1.3460463285446167,grad_norm: 0.9999999174608281, iteration: 139697
loss: 1.0941383838653564,grad_norm: 0.9999998043918376, iteration: 139698
loss: 1.176891565322876,grad_norm: 0.999999939452103, iteration: 139699
loss: 1.1894742250442505,grad_norm: 0.9999997882011339, iteration: 139700
loss: 1.3546686172485352,grad_norm: 0.9999999705377693, iteration: 139701
loss: 1.1418238878250122,grad_norm: 0.9999997052954568, iteration: 139702
loss: 1.4213083982467651,grad_norm: 0.9999999920888885, iteration: 139703
loss: 1.3062119483947754,grad_norm: 0.999999716419668, iteration: 139704
loss: 1.1967970132827759,grad_norm: 0.9999998521893321, iteration: 139705
loss: 1.4435573816299438,grad_norm: 0.9999999538122774, iteration: 139706
loss: 1.1610056161880493,grad_norm: 0.9999998574192178, iteration: 139707
loss: 1.2656151056289673,grad_norm: 1.0000000177460588, iteration: 139708
loss: 1.2770918607711792,grad_norm: 0.999999961769976, iteration: 139709
loss: 1.6066290140151978,grad_norm: 0.9999999515526085, iteration: 139710
loss: 1.1767799854278564,grad_norm: 1.0000001285049673, iteration: 139711
loss: 1.2151685953140259,grad_norm: 0.9999997995696497, iteration: 139712
loss: 1.0925037860870361,grad_norm: 0.9999997512721219, iteration: 139713
loss: 1.103554129600525,grad_norm: 0.9999999013935422, iteration: 139714
loss: 1.516925573348999,grad_norm: 0.9999997662507957, iteration: 139715
loss: 1.3159347772598267,grad_norm: 0.9999997976015427, iteration: 139716
loss: 1.4955803155899048,grad_norm: 0.9999999153844598, iteration: 139717
loss: 1.2864071130752563,grad_norm: 0.9999999393322813, iteration: 139718
loss: 1.3398245573043823,grad_norm: 0.9999998616764842, iteration: 139719
loss: 1.204667091369629,grad_norm: 1.0000000461030836, iteration: 139720
loss: 1.3026236295700073,grad_norm: 0.999999833587276, iteration: 139721
loss: 1.6117448806762695,grad_norm: 1.000000062679605, iteration: 139722
loss: 1.4802168607711792,grad_norm: 1.0000000136223592, iteration: 139723
loss: 1.2733197212219238,grad_norm: 0.9999999241511802, iteration: 139724
loss: 1.4025354385375977,grad_norm: 0.9999999492181654, iteration: 139725
loss: 1.2109462022781372,grad_norm: 0.9999998284354996, iteration: 139726
loss: 1.0752665996551514,grad_norm: 0.9999995327238305, iteration: 139727
loss: 1.5543992519378662,grad_norm: 1.0000000674345493, iteration: 139728
loss: 1.4828577041625977,grad_norm: 0.9999998312129486, iteration: 139729
loss: 1.591732382774353,grad_norm: 1.000000010641601, iteration: 139730
loss: 1.2293241024017334,grad_norm: 0.9999998890688778, iteration: 139731
loss: 1.375840425491333,grad_norm: 0.9999999590110719, iteration: 139732
loss: 1.1720387935638428,grad_norm: 0.9999999845896514, iteration: 139733
loss: 1.5584148168563843,grad_norm: 0.9999999083926503, iteration: 139734
loss: 1.1425774097442627,grad_norm: 0.9999997082879758, iteration: 139735
loss: 1.2603801488876343,grad_norm: 0.9999999718710367, iteration: 139736
loss: 1.376538634300232,grad_norm: 0.9999999719260351, iteration: 139737
loss: 1.4033513069152832,grad_norm: 1.0000000473483268, iteration: 139738
loss: 1.2281382083892822,grad_norm: 0.99999991001295, iteration: 139739
loss: 1.3682186603546143,grad_norm: 0.9999999034861425, iteration: 139740
loss: 1.4309252500534058,grad_norm: 1.0000000202394934, iteration: 139741
loss: 1.2862868309020996,grad_norm: 1.0000000328272176, iteration: 139742
loss: 1.3868632316589355,grad_norm: 0.9999999525442321, iteration: 139743
loss: 1.2525200843811035,grad_norm: 0.999999985093306, iteration: 139744
loss: 1.4849897623062134,grad_norm: 0.9999997558533823, iteration: 139745
loss: 1.647937536239624,grad_norm: 0.9999998131595954, iteration: 139746
loss: 1.5543320178985596,grad_norm: 0.9999999292424788, iteration: 139747
loss: 1.2976231575012207,grad_norm: 0.9999998206021952, iteration: 139748
loss: 1.4347842931747437,grad_norm: 0.9999996140380578, iteration: 139749
loss: 1.4059618711471558,grad_norm: 0.999999918732509, iteration: 139750
loss: 1.2509907484054565,grad_norm: 0.9999999076364975, iteration: 139751
loss: 2.0846736431121826,grad_norm: 0.9999999166822388, iteration: 139752
loss: 1.3808780908584595,grad_norm: 1.0000000653084877, iteration: 139753
loss: 2.0782408714294434,grad_norm: 0.9999999224602273, iteration: 139754
loss: 1.3784617185592651,grad_norm: 0.9999999508641831, iteration: 139755
loss: 1.4254788160324097,grad_norm: 1.0000000149429313, iteration: 139756
loss: 1.437842845916748,grad_norm: 0.9999999198896631, iteration: 139757
loss: 1.4937962293624878,grad_norm: 0.9999997384735386, iteration: 139758
loss: 1.4817856550216675,grad_norm: 0.9999999652832597, iteration: 139759
loss: 1.8462716341018677,grad_norm: 1.0000000022980704, iteration: 139760
loss: 1.3815261125564575,grad_norm: 0.9999998806635195, iteration: 139761
loss: 1.285783290863037,grad_norm: 0.9999998433550141, iteration: 139762
loss: 1.7702938318252563,grad_norm: 1.000000049446216, iteration: 139763
loss: 1.3218209743499756,grad_norm: 1.000000008429681, iteration: 139764
loss: 1.5615888833999634,grad_norm: 0.99999987639332, iteration: 139765
loss: 1.6955089569091797,grad_norm: 1.0000000159195543, iteration: 139766
loss: 1.3177191019058228,grad_norm: 0.9999999136038201, iteration: 139767
loss: 1.4448564052581787,grad_norm: 0.9999998580012316, iteration: 139768
loss: 1.6209698915481567,grad_norm: 1.0000000335998966, iteration: 139769
loss: 1.3017910718917847,grad_norm: 1.0000000170043863, iteration: 139770
loss: 1.273470401763916,grad_norm: 0.9999999352475395, iteration: 139771
loss: 1.4231356382369995,grad_norm: 0.999999737390884, iteration: 139772
loss: 1.3410992622375488,grad_norm: 1.000000014361233, iteration: 139773
loss: 1.4873605966567993,grad_norm: 0.9999999858951162, iteration: 139774
loss: 1.499712347984314,grad_norm: 0.9999999329684436, iteration: 139775
loss: 1.205773115158081,grad_norm: 0.9999998259110402, iteration: 139776
loss: 1.30357027053833,grad_norm: 1.0000000864580838, iteration: 139777
loss: 1.3218166828155518,grad_norm: 0.9999999429875641, iteration: 139778
loss: 1.9042742252349854,grad_norm: 0.9999999372762958, iteration: 139779
loss: 1.474547266960144,grad_norm: 0.9999997023340671, iteration: 139780
loss: 1.785760760307312,grad_norm: 0.9999999172596281, iteration: 139781
loss: 1.4382238388061523,grad_norm: 0.9999999081417864, iteration: 139782
loss: 1.8178006410598755,grad_norm: 0.9999999751437034, iteration: 139783
loss: 1.708272099494934,grad_norm: 1.0000000273080767, iteration: 139784
loss: 1.1610801219940186,grad_norm: 0.9999998354336366, iteration: 139785
loss: 1.7889333963394165,grad_norm: 0.9999999851530597, iteration: 139786
loss: 1.214176058769226,grad_norm: 0.9999999463470631, iteration: 139787
loss: 1.7198772430419922,grad_norm: 0.9999999864978522, iteration: 139788
loss: 1.5618488788604736,grad_norm: 1.000000023447991, iteration: 139789
loss: 1.41385018825531,grad_norm: 0.9999999048917492, iteration: 139790
loss: 1.3717375993728638,grad_norm: 0.9999996084214549, iteration: 139791
loss: 1.3087999820709229,grad_norm: 0.9999999232574747, iteration: 139792
loss: 1.211227536201477,grad_norm: 0.9999995523740682, iteration: 139793
loss: 1.2035285234451294,grad_norm: 0.9999999144704477, iteration: 139794
loss: 1.408643364906311,grad_norm: 1.0000000439595922, iteration: 139795
loss: 1.3472329378128052,grad_norm: 0.9999998764598022, iteration: 139796
loss: 1.3477751016616821,grad_norm: 0.9999998791050837, iteration: 139797
loss: 1.5826106071472168,grad_norm: 1.0000000017039847, iteration: 139798
loss: 1.575882077217102,grad_norm: 0.9999999087677173, iteration: 139799
loss: 1.2376139163970947,grad_norm: 0.9999999312239027, iteration: 139800
loss: 1.8513648509979248,grad_norm: 1.0000000182659767, iteration: 139801
loss: 1.223931074142456,grad_norm: 0.9999998893030176, iteration: 139802
loss: 1.604500412940979,grad_norm: 0.9999998557781342, iteration: 139803
loss: 1.7243590354919434,grad_norm: 1.0000000606590995, iteration: 139804
loss: 1.64893639087677,grad_norm: 0.9999999938831571, iteration: 139805
loss: 1.7423453330993652,grad_norm: 0.9999999227970627, iteration: 139806
loss: 1.4180275201797485,grad_norm: 0.9999998207782738, iteration: 139807
loss: 1.507145643234253,grad_norm: 0.9999998439008536, iteration: 139808
loss: 1.419913649559021,grad_norm: 0.9999998833108215, iteration: 139809
loss: 1.36543869972229,grad_norm: 0.9999999654953325, iteration: 139810
loss: 1.2639714479446411,grad_norm: 0.9999998923285296, iteration: 139811
loss: 1.4893555641174316,grad_norm: 0.9999998884449536, iteration: 139812
loss: 1.1595966815948486,grad_norm: 0.9999993859238803, iteration: 139813
loss: 1.454206109046936,grad_norm: 1.0000000455051592, iteration: 139814
loss: 1.2672818899154663,grad_norm: 0.999999930123308, iteration: 139815
loss: 1.3231730461120605,grad_norm: 0.9999997245407118, iteration: 139816
loss: 1.299821376800537,grad_norm: 0.9999999170610324, iteration: 139817
loss: 1.3983337879180908,grad_norm: 0.999999945929027, iteration: 139818
loss: 1.3111203908920288,grad_norm: 1.0000000820406818, iteration: 139819
loss: 1.1024653911590576,grad_norm: 0.9999997887743258, iteration: 139820
loss: 1.5080660581588745,grad_norm: 0.9999999396011076, iteration: 139821
loss: 1.3343371152877808,grad_norm: 0.9999997894199181, iteration: 139822
loss: 1.1993496417999268,grad_norm: 0.9999996670561881, iteration: 139823
loss: 1.1731196641921997,grad_norm: 0.9999998255386047, iteration: 139824
loss: 1.1224446296691895,grad_norm: 0.9999993822125356, iteration: 139825
loss: 1.4830893278121948,grad_norm: 0.9999999995137413, iteration: 139826
loss: 1.2430665493011475,grad_norm: 0.9999998368169206, iteration: 139827
loss: 1.7737444639205933,grad_norm: 0.9999999353330876, iteration: 139828
loss: 1.2825103998184204,grad_norm: 0.9999996931918408, iteration: 139829
loss: 1.3518147468566895,grad_norm: 0.9999999053201611, iteration: 139830
loss: 1.183570384979248,grad_norm: 0.9999997429004882, iteration: 139831
loss: 1.2941184043884277,grad_norm: 0.9999996181437677, iteration: 139832
loss: 1.479474425315857,grad_norm: 0.9999996511390375, iteration: 139833
loss: 1.747572422027588,grad_norm: 0.9999999393378891, iteration: 139834
loss: 1.4651669263839722,grad_norm: 0.9999999277073186, iteration: 139835
loss: 1.3914259672164917,grad_norm: 0.9999998285953119, iteration: 139836
loss: 1.4357887506484985,grad_norm: 0.9999999036852963, iteration: 139837
loss: 1.262130856513977,grad_norm: 0.9999999558177144, iteration: 139838
loss: 1.5391391515731812,grad_norm: 0.999999944812996, iteration: 139839
loss: 1.3948098421096802,grad_norm: 1.0000000390701167, iteration: 139840
loss: 1.1907564401626587,grad_norm: 0.999999861069509, iteration: 139841
loss: 1.398238182067871,grad_norm: 0.9999999389837683, iteration: 139842
loss: 1.0110265016555786,grad_norm: 0.999999296824253, iteration: 139843
loss: 1.4535279273986816,grad_norm: 1.000000010652883, iteration: 139844
loss: 1.248212456703186,grad_norm: 0.9999999210168586, iteration: 139845
loss: 1.4277652502059937,grad_norm: 0.9999999647898019, iteration: 139846
loss: 1.4106985330581665,grad_norm: 0.9999998187989005, iteration: 139847
loss: 1.3457585573196411,grad_norm: 0.999999736763923, iteration: 139848
loss: 1.5288398265838623,grad_norm: 0.9999995945787649, iteration: 139849
loss: 1.2700297832489014,grad_norm: 0.9999997164981869, iteration: 139850
loss: 1.400144338607788,grad_norm: 0.9999998340516432, iteration: 139851
loss: 1.2159299850463867,grad_norm: 0.9999999202971275, iteration: 139852
loss: 1.3049668073654175,grad_norm: 1.0000000833217637, iteration: 139853
loss: 1.3020185232162476,grad_norm: 0.9999999176621279, iteration: 139854
loss: 1.3039472103118896,grad_norm: 0.9999998663498366, iteration: 139855
loss: 1.2210007905960083,grad_norm: 0.999999807810301, iteration: 139856
loss: 1.3605629205703735,grad_norm: 0.9999999889092485, iteration: 139857
loss: 1.4557551145553589,grad_norm: 0.9999999486243686, iteration: 139858
loss: 1.6951946020126343,grad_norm: 0.999999918611668, iteration: 139859
loss: 1.2726643085479736,grad_norm: 0.9999999703571171, iteration: 139860
loss: 1.4019298553466797,grad_norm: 0.9999996778676578, iteration: 139861
loss: 1.2656546831130981,grad_norm: 0.9999998742941921, iteration: 139862
loss: 1.3926031589508057,grad_norm: 0.9999998135368358, iteration: 139863
loss: 1.206102728843689,grad_norm: 0.9999998864835645, iteration: 139864
loss: 1.5655078887939453,grad_norm: 1.0000000198255623, iteration: 139865
loss: 1.2626904249191284,grad_norm: 1.0000000058835896, iteration: 139866
loss: 1.1339071989059448,grad_norm: 0.9999996649519339, iteration: 139867
loss: 1.2054353952407837,grad_norm: 0.9999999283412512, iteration: 139868
loss: 1.2124431133270264,grad_norm: 1.0000000224626155, iteration: 139869
loss: 1.3414883613586426,grad_norm: 1.0000000540539646, iteration: 139870
loss: 1.3828238248825073,grad_norm: 0.9999999372643124, iteration: 139871
loss: 1.4865174293518066,grad_norm: 0.9999999344981945, iteration: 139872
loss: 1.4829155206680298,grad_norm: 0.9999999517845611, iteration: 139873
loss: 1.4400895833969116,grad_norm: 0.9999999425659531, iteration: 139874
loss: 1.2378021478652954,grad_norm: 0.999999925803292, iteration: 139875
loss: 1.3547497987747192,grad_norm: 0.9999999304886222, iteration: 139876
loss: 1.1597272157669067,grad_norm: 0.9999996389378508, iteration: 139877
loss: 1.234826922416687,grad_norm: 0.9999999605162638, iteration: 139878
loss: 1.1911543607711792,grad_norm: 0.9999999362483841, iteration: 139879
loss: 1.158769965171814,grad_norm: 0.9999999824299056, iteration: 139880
loss: 1.3914917707443237,grad_norm: 0.9999997702952071, iteration: 139881
loss: 1.4201383590698242,grad_norm: 0.9999998087676589, iteration: 139882
loss: 1.3223763704299927,grad_norm: 0.9999997971262903, iteration: 139883
loss: 1.2866721153259277,grad_norm: 0.9999999848166122, iteration: 139884
loss: 1.2427008152008057,grad_norm: 0.9999999865897959, iteration: 139885
loss: 1.5467002391815186,grad_norm: 0.9999999095019912, iteration: 139886
loss: 1.2859766483306885,grad_norm: 0.9999998013241513, iteration: 139887
loss: 1.3638875484466553,grad_norm: 0.9999999297822941, iteration: 139888
loss: 1.4928125143051147,grad_norm: 0.9999999029996339, iteration: 139889
loss: 1.3470747470855713,grad_norm: 0.9999999679239144, iteration: 139890
loss: 1.2366833686828613,grad_norm: 0.9999997404813181, iteration: 139891
loss: 1.199873685836792,grad_norm: 0.9999997800092977, iteration: 139892
loss: 1.4969998598098755,grad_norm: 0.9999998588185436, iteration: 139893
loss: 1.522116780281067,grad_norm: 0.9999999048240437, iteration: 139894
loss: 1.4148114919662476,grad_norm: 0.9999999117314042, iteration: 139895
loss: 1.3834697008132935,grad_norm: 0.9999999063287242, iteration: 139896
loss: 1.2610318660736084,grad_norm: 0.9999996443030722, iteration: 139897
loss: 1.3367727994918823,grad_norm: 0.9999996394701018, iteration: 139898
loss: 1.4559764862060547,grad_norm: 0.9999999304835892, iteration: 139899
loss: 1.171755075454712,grad_norm: 0.9999997520193865, iteration: 139900
loss: 1.2602571249008179,grad_norm: 0.9999999786797796, iteration: 139901
loss: 1.216530442237854,grad_norm: 0.9999997282152174, iteration: 139902
loss: 1.2645732164382935,grad_norm: 0.9999998873174297, iteration: 139903
loss: 1.2929294109344482,grad_norm: 0.9999999554212, iteration: 139904
loss: 1.4046920537948608,grad_norm: 0.9999999142919184, iteration: 139905
loss: 1.2847604751586914,grad_norm: 0.9999999340384069, iteration: 139906
loss: 1.1788465976715088,grad_norm: 0.99999955886725, iteration: 139907
loss: 1.1974698305130005,grad_norm: 0.9999998489934212, iteration: 139908
loss: 1.3742964267730713,grad_norm: 0.9999998793876012, iteration: 139909
loss: 1.2346769571304321,grad_norm: 0.999999515184018, iteration: 139910
loss: 1.2772363424301147,grad_norm: 0.999999884925684, iteration: 139911
loss: 1.3738529682159424,grad_norm: 0.9999998568744496, iteration: 139912
loss: 1.2196755409240723,grad_norm: 0.9999998426894309, iteration: 139913
loss: 1.2023555040359497,grad_norm: 0.999999823609188, iteration: 139914
loss: 1.3323948383331299,grad_norm: 0.9999997652294194, iteration: 139915
loss: 1.1353062391281128,grad_norm: 0.9999997465916269, iteration: 139916
loss: 1.2557549476623535,grad_norm: 0.9999998115887345, iteration: 139917
loss: 1.1908998489379883,grad_norm: 0.9999995320595917, iteration: 139918
loss: 1.235326886177063,grad_norm: 0.9999999887508035, iteration: 139919
loss: 1.3072783946990967,grad_norm: 0.9999998888816829, iteration: 139920
loss: 1.3560352325439453,grad_norm: 0.999999805302441, iteration: 139921
loss: 1.316788911819458,grad_norm: 0.9999997677386324, iteration: 139922
loss: 1.6274627447128296,grad_norm: 0.9999999666549001, iteration: 139923
loss: 1.54042649269104,grad_norm: 0.9999998998949519, iteration: 139924
loss: 1.238458514213562,grad_norm: 0.9999998626502982, iteration: 139925
loss: 1.5078914165496826,grad_norm: 0.9999998382513033, iteration: 139926
loss: 1.464460015296936,grad_norm: 0.9999998368803132, iteration: 139927
loss: 1.2554429769515991,grad_norm: 0.9999998424304417, iteration: 139928
loss: 1.1985383033752441,grad_norm: 0.9999999236962427, iteration: 139929
loss: 1.4901723861694336,grad_norm: 0.9999998484794755, iteration: 139930
loss: 1.342458724975586,grad_norm: 0.9999999133487659, iteration: 139931
loss: 1.3239994049072266,grad_norm: 0.9999995985448714, iteration: 139932
loss: 1.500952124595642,grad_norm: 0.9999999368999489, iteration: 139933
loss: 1.1793118715286255,grad_norm: 0.9999998215128798, iteration: 139934
loss: 1.1851134300231934,grad_norm: 0.9999998926664475, iteration: 139935
loss: 1.7755297422409058,grad_norm: 1.0000000910328501, iteration: 139936
loss: 1.3662241697311401,grad_norm: 1.0000000245412461, iteration: 139937
loss: 1.115838646888733,grad_norm: 0.9999995199653369, iteration: 139938
loss: 1.6018873453140259,grad_norm: 1.0000000669992202, iteration: 139939
loss: 1.5811272859573364,grad_norm: 1.0000000468237236, iteration: 139940
loss: 1.5402944087982178,grad_norm: 1.0000000380226148, iteration: 139941
loss: 1.5171927213668823,grad_norm: 1.0000000642965294, iteration: 139942
loss: 1.417379379272461,grad_norm: 0.999999776419629, iteration: 139943
loss: 1.3634114265441895,grad_norm: 0.9999998763968678, iteration: 139944
loss: 2.0011849403381348,grad_norm: 0.9999998263549896, iteration: 139945
loss: 1.4330270290374756,grad_norm: 0.9999999121605906, iteration: 139946
loss: 1.2381415367126465,grad_norm: 0.9999998381373346, iteration: 139947
loss: 1.2391166687011719,grad_norm: 0.9999997941599821, iteration: 139948
loss: 1.3965575695037842,grad_norm: 0.9999997549938208, iteration: 139949
loss: 1.2496012449264526,grad_norm: 0.9999999556620759, iteration: 139950
loss: 1.3301860094070435,grad_norm: 0.9999997422198889, iteration: 139951
loss: 1.59458589553833,grad_norm: 0.9999995593927122, iteration: 139952
loss: 1.1972304582595825,grad_norm: 0.9999999732352653, iteration: 139953
loss: 1.4094328880310059,grad_norm: 1.000000004082629, iteration: 139954
loss: 1.6992346048355103,grad_norm: 1.0000000076931463, iteration: 139955
loss: 1.1789233684539795,grad_norm: 0.9999999918169836, iteration: 139956
loss: 1.4040895700454712,grad_norm: 0.9999998247149173, iteration: 139957
loss: 1.156600832939148,grad_norm: 0.9999998709609793, iteration: 139958
loss: 1.1513583660125732,grad_norm: 1.0000000175258017, iteration: 139959
loss: 1.5144548416137695,grad_norm: 0.9999999842985798, iteration: 139960
loss: 1.5255680084228516,grad_norm: 0.9999999436239534, iteration: 139961
loss: 1.5013643503189087,grad_norm: 0.9999998746239942, iteration: 139962
loss: 1.2634799480438232,grad_norm: 0.9999998152675493, iteration: 139963
loss: 1.6445751190185547,grad_norm: 0.9999999896324053, iteration: 139964
loss: 1.4441152811050415,grad_norm: 0.9999999854521007, iteration: 139965
loss: 1.5438917875289917,grad_norm: 0.9999999377086262, iteration: 139966
loss: 1.4353820085525513,grad_norm: 1.0000000388453387, iteration: 139967
loss: 1.2865651845932007,grad_norm: 0.9999999098299757, iteration: 139968
loss: 1.355353593826294,grad_norm: 0.9999998113438943, iteration: 139969
loss: 1.2988309860229492,grad_norm: 0.999999852223915, iteration: 139970
loss: 1.4232029914855957,grad_norm: 0.9999999664514024, iteration: 139971
loss: 1.6393849849700928,grad_norm: 1.000000030770024, iteration: 139972
loss: 1.3298524618148804,grad_norm: 0.9999999709109413, iteration: 139973
loss: 1.6465017795562744,grad_norm: 0.9999998765741298, iteration: 139974
loss: 1.3073770999908447,grad_norm: 0.9999999086826025, iteration: 139975
loss: 1.5362083911895752,grad_norm: 0.9999999241207792, iteration: 139976
loss: 1.3857022523880005,grad_norm: 0.9999998423426114, iteration: 139977
loss: 1.4311375617980957,grad_norm: 0.9999995585065905, iteration: 139978
loss: 1.3196797370910645,grad_norm: 0.9999999431167125, iteration: 139979
loss: 1.3866856098175049,grad_norm: 0.9999999246382478, iteration: 139980
loss: 1.4112203121185303,grad_norm: 0.9999998726834445, iteration: 139981
loss: 1.2496402263641357,grad_norm: 0.9999996634683457, iteration: 139982
loss: 1.1627076864242554,grad_norm: 0.9999996751809119, iteration: 139983
loss: 1.2336146831512451,grad_norm: 0.9999998878987384, iteration: 139984
loss: 1.1265558004379272,grad_norm: 0.9999998309767305, iteration: 139985
loss: 1.3825187683105469,grad_norm: 0.9999998171205987, iteration: 139986
loss: 1.4461747407913208,grad_norm: 0.9999998780353601, iteration: 139987
loss: 1.3449673652648926,grad_norm: 0.9999998145606498, iteration: 139988
loss: 1.589049220085144,grad_norm: 0.9999999128523901, iteration: 139989
loss: 1.349387764930725,grad_norm: 1.0000000510354818, iteration: 139990
loss: 1.3967928886413574,grad_norm: 0.9999996612942031, iteration: 139991
loss: 1.3523918390274048,grad_norm: 0.9999999341493586, iteration: 139992
loss: 1.2268966436386108,grad_norm: 0.9999999358556546, iteration: 139993
loss: 1.4046703577041626,grad_norm: 1.0000000069297108, iteration: 139994
loss: 1.4187544584274292,grad_norm: 0.9999999896777828, iteration: 139995
loss: 1.167314052581787,grad_norm: 1.0000000223979115, iteration: 139996
loss: 1.303720235824585,grad_norm: 0.9999997888483542, iteration: 139997
loss: 1.4854295253753662,grad_norm: 0.9999998449779975, iteration: 139998
loss: 1.3352420330047607,grad_norm: 0.9999998373082917, iteration: 139999
loss: 1.179065227508545,grad_norm: 0.9999999060828503, iteration: 140000
Evaluating at step 140000
{'val': 1.2717985101044178, 'test': 2.4352680486612046}
loss: 1.3189924955368042,grad_norm: 0.999999761198701, iteration: 140001
loss: 1.1249639987945557,grad_norm: 0.9999996878104666, iteration: 140002
loss: 1.3847483396530151,grad_norm: 0.9999999622938052, iteration: 140003
loss: 1.404031753540039,grad_norm: 0.9999997975862187, iteration: 140004
loss: 1.450348973274231,grad_norm: 1.000000004549167, iteration: 140005
loss: 1.6048378944396973,grad_norm: 0.9999999471758043, iteration: 140006
loss: 1.2244428396224976,grad_norm: 0.9999994767347956, iteration: 140007
loss: 1.2786140441894531,grad_norm: 0.9999998691208137, iteration: 140008
loss: 1.3601281642913818,grad_norm: 0.9999996652286525, iteration: 140009
loss: 1.3377885818481445,grad_norm: 0.9999998373112369, iteration: 140010
loss: 1.2948988676071167,grad_norm: 0.9999999759353184, iteration: 140011
loss: 1.6164213418960571,grad_norm: 0.999999876222826, iteration: 140012
loss: 1.5056400299072266,grad_norm: 0.9999999416216748, iteration: 140013
loss: 1.554945945739746,grad_norm: 0.9999999829290718, iteration: 140014
loss: 1.2987878322601318,grad_norm: 0.9999998741486903, iteration: 140015
loss: 1.2809005975723267,grad_norm: 0.9999998519637274, iteration: 140016
loss: 1.2072484493255615,grad_norm: 0.9999998924152101, iteration: 140017
loss: 1.5104016065597534,grad_norm: 0.9999999346065548, iteration: 140018
loss: 1.1215330362319946,grad_norm: 0.9999993348624505, iteration: 140019
loss: 1.265127182006836,grad_norm: 0.9999999289678982, iteration: 140020
loss: 1.4354192018508911,grad_norm: 1.000000040818761, iteration: 140021
loss: 1.5998841524124146,grad_norm: 0.9999998699175616, iteration: 140022
loss: 1.1969407796859741,grad_norm: 0.9999999086155265, iteration: 140023
loss: 1.2956827878952026,grad_norm: 0.9999998706858447, iteration: 140024
loss: 1.3451881408691406,grad_norm: 0.9999998268064595, iteration: 140025
loss: 1.2847095727920532,grad_norm: 0.9999999738494759, iteration: 140026
loss: 1.300841212272644,grad_norm: 0.9999998970833581, iteration: 140027
loss: 1.4955804347991943,grad_norm: 0.9999999028237315, iteration: 140028
loss: 1.3318802118301392,grad_norm: 0.9999998690158591, iteration: 140029
loss: 1.425791621208191,grad_norm: 1.0000000463720746, iteration: 140030
loss: 1.410378098487854,grad_norm: 0.9999999792050454, iteration: 140031
loss: 1.6153357028961182,grad_norm: 0.9999999024091356, iteration: 140032
loss: 1.096977710723877,grad_norm: 0.9999996764593012, iteration: 140033
loss: 1.2508888244628906,grad_norm: 0.9999996457486543, iteration: 140034
loss: 1.2053791284561157,grad_norm: 0.9999999012626085, iteration: 140035
loss: 1.3246643543243408,grad_norm: 0.9999998122991066, iteration: 140036
loss: 1.386605143547058,grad_norm: 0.9999998647684427, iteration: 140037
loss: 1.3013321161270142,grad_norm: 0.9999999491221678, iteration: 140038
loss: 1.2453283071517944,grad_norm: 0.9999998320368821, iteration: 140039
loss: 1.1138455867767334,grad_norm: 0.9999997659558437, iteration: 140040
loss: 1.1894184350967407,grad_norm: 0.9999998898692103, iteration: 140041
loss: 1.4821197986602783,grad_norm: 0.999999873611014, iteration: 140042
loss: 1.5005316734313965,grad_norm: 0.9999998931313322, iteration: 140043
loss: 1.3971501588821411,grad_norm: 0.9999999024349362, iteration: 140044
loss: 1.232433795928955,grad_norm: 0.9999999484809964, iteration: 140045
loss: 1.4015040397644043,grad_norm: 0.9999998947667347, iteration: 140046
loss: 1.185157060623169,grad_norm: 0.9999999116207445, iteration: 140047
loss: 1.334005355834961,grad_norm: 0.9999996976481617, iteration: 140048
loss: 1.3300096988677979,grad_norm: 0.9999999456158999, iteration: 140049
loss: 1.0753694772720337,grad_norm: 0.9999999670640438, iteration: 140050
loss: 1.1234506368637085,grad_norm: 0.999999652500711, iteration: 140051
loss: 1.3190362453460693,grad_norm: 0.9999997466026229, iteration: 140052
loss: 1.3325114250183105,grad_norm: 0.9999999838366562, iteration: 140053
loss: 1.273268461227417,grad_norm: 0.9999998017431654, iteration: 140054
loss: 1.4315361976623535,grad_norm: 1.000000093538757, iteration: 140055
loss: 1.3739893436431885,grad_norm: 0.9999998069035578, iteration: 140056
loss: 1.1755766868591309,grad_norm: 0.9999998292285689, iteration: 140057
loss: 1.4666380882263184,grad_norm: 0.9999999311361274, iteration: 140058
loss: 1.135879397392273,grad_norm: 0.999999488543634, iteration: 140059
loss: 1.2299851179122925,grad_norm: 0.9999999304732267, iteration: 140060
loss: 1.4747655391693115,grad_norm: 0.9999998445482263, iteration: 140061
loss: 1.2781100273132324,grad_norm: 0.999999833568566, iteration: 140062
loss: 1.173134446144104,grad_norm: 0.9999999505290794, iteration: 140063
loss: 1.2656062841415405,grad_norm: 0.9999998696064436, iteration: 140064
loss: 1.3397200107574463,grad_norm: 1.0000000722291602, iteration: 140065
loss: 1.1596345901489258,grad_norm: 0.9999997960476765, iteration: 140066
loss: 1.331648588180542,grad_norm: 0.9999999748831694, iteration: 140067
loss: 1.0806686878204346,grad_norm: 0.9999994334768892, iteration: 140068
loss: 1.51210618019104,grad_norm: 0.9999998949595762, iteration: 140069
loss: 1.3430094718933105,grad_norm: 1.0000000385064585, iteration: 140070
loss: 1.2282006740570068,grad_norm: 0.99999960188374, iteration: 140071
loss: 1.2085884809494019,grad_norm: 0.9999998392990894, iteration: 140072
loss: 1.7817070484161377,grad_norm: 0.9999999579366465, iteration: 140073
loss: 1.2847822904586792,grad_norm: 0.9999998553485145, iteration: 140074
loss: 1.2321457862854004,grad_norm: 0.9999999813502042, iteration: 140075
loss: 1.0826188325881958,grad_norm: 0.9204609837409772, iteration: 140076
loss: 1.2298383712768555,grad_norm: 0.9999996491453923, iteration: 140077
loss: 1.186235785484314,grad_norm: 0.9999998664235387, iteration: 140078
loss: 1.417985200881958,grad_norm: 0.9999996640211003, iteration: 140079
loss: 1.1778385639190674,grad_norm: 0.9999998354925463, iteration: 140080
loss: 1.7297663688659668,grad_norm: 0.9999999767301005, iteration: 140081
loss: 1.2041913270950317,grad_norm: 0.9999998490615138, iteration: 140082
loss: 1.4033232927322388,grad_norm: 0.9999998219024355, iteration: 140083
loss: 1.4947091341018677,grad_norm: 0.9999998378133093, iteration: 140084
loss: 1.4206634759902954,grad_norm: 0.9999997661821896, iteration: 140085
loss: 1.337027907371521,grad_norm: 0.9999997524442247, iteration: 140086
loss: 1.175013780593872,grad_norm: 0.9999997429285507, iteration: 140087
loss: 1.0972779989242554,grad_norm: 0.9999999511133958, iteration: 140088
loss: 1.5386914014816284,grad_norm: 0.9999998651581711, iteration: 140089
loss: 1.146668553352356,grad_norm: 0.9999996903953963, iteration: 140090
loss: 1.2444766759872437,grad_norm: 0.9999997223764555, iteration: 140091
loss: 1.2898919582366943,grad_norm: 0.9999998048527046, iteration: 140092
loss: 1.134308934211731,grad_norm: 0.9999998548764057, iteration: 140093
loss: 1.3589617013931274,grad_norm: 0.9999998935662368, iteration: 140094
loss: 1.2699182033538818,grad_norm: 0.9999998317876985, iteration: 140095
loss: 1.0844165086746216,grad_norm: 0.9999997837668803, iteration: 140096
loss: 1.7220665216445923,grad_norm: 0.9999998680137099, iteration: 140097
loss: 1.3603731393814087,grad_norm: 0.9999999299826482, iteration: 140098
loss: 1.3787897825241089,grad_norm: 0.999999875616467, iteration: 140099
loss: 1.2266333103179932,grad_norm: 1.0000000312164976, iteration: 140100
loss: 1.279921054840088,grad_norm: 0.9999999241231405, iteration: 140101
loss: 1.5456279516220093,grad_norm: 0.9999998616528513, iteration: 140102
loss: 1.3680477142333984,grad_norm: 0.9999998410132309, iteration: 140103
loss: 1.0905201435089111,grad_norm: 0.9999997240186251, iteration: 140104
loss: 1.6382914781570435,grad_norm: 0.9999998784130011, iteration: 140105
loss: 1.6941550970077515,grad_norm: 0.9999998499178572, iteration: 140106
loss: 1.2230371236801147,grad_norm: 1.0000000066828887, iteration: 140107
loss: 1.4046982526779175,grad_norm: 0.9999998144843465, iteration: 140108
loss: 1.5498727560043335,grad_norm: 0.9999999588690078, iteration: 140109
loss: 1.339469075202942,grad_norm: 0.9999996921635211, iteration: 140110
loss: 1.382826566696167,grad_norm: 0.9999997191157073, iteration: 140111
loss: 1.3809386491775513,grad_norm: 0.999999947140579, iteration: 140112
loss: 1.5867966413497925,grad_norm: 0.9999998981763317, iteration: 140113
loss: 1.0960592031478882,grad_norm: 0.9999997570166052, iteration: 140114
loss: 1.3589097261428833,grad_norm: 0.9999998687569001, iteration: 140115
loss: 1.2960610389709473,grad_norm: 0.9999998398772436, iteration: 140116
loss: 1.0964022874832153,grad_norm: 0.9999994734665085, iteration: 140117
loss: 1.4447418451309204,grad_norm: 0.9999997428965429, iteration: 140118
loss: 1.2782326936721802,grad_norm: 0.9999999444098303, iteration: 140119
loss: 1.2414603233337402,grad_norm: 0.9999998670299495, iteration: 140120
loss: 1.3012139797210693,grad_norm: 0.9999999109056894, iteration: 140121
loss: 1.101793646812439,grad_norm: 0.9999995672838115, iteration: 140122
loss: 1.3574739694595337,grad_norm: 0.9999996389936298, iteration: 140123
loss: 1.2370635271072388,grad_norm: 0.999999743190473, iteration: 140124
loss: 1.276228427886963,grad_norm: 0.9999999228248302, iteration: 140125
loss: 1.1905763149261475,grad_norm: 0.9999994239959907, iteration: 140126
loss: 1.0769853591918945,grad_norm: 0.9999993626679714, iteration: 140127
loss: 1.2325198650360107,grad_norm: 0.9999997794258723, iteration: 140128
loss: 1.310912013053894,grad_norm: 0.9999997448946175, iteration: 140129
loss: 1.3109394311904907,grad_norm: 0.9999998200217867, iteration: 140130
loss: 1.3597588539123535,grad_norm: 0.9999998302669224, iteration: 140131
loss: 1.1052424907684326,grad_norm: 0.9999997225756455, iteration: 140132
loss: 1.6205717325210571,grad_norm: 0.9999998273403143, iteration: 140133
loss: 1.3937619924545288,grad_norm: 0.9999999328760178, iteration: 140134
loss: 1.1758785247802734,grad_norm: 0.9999998905778088, iteration: 140135
loss: 1.2139976024627686,grad_norm: 0.9999998740124985, iteration: 140136
loss: 1.2878745794296265,grad_norm: 0.9999995949608262, iteration: 140137
loss: 1.2143383026123047,grad_norm: 0.9999997842251246, iteration: 140138
loss: 1.1087985038757324,grad_norm: 0.9999993151828709, iteration: 140139
loss: 1.1191296577453613,grad_norm: 0.999999568789504, iteration: 140140
loss: 1.3594727516174316,grad_norm: 0.9999999852878759, iteration: 140141
loss: 1.2128002643585205,grad_norm: 1.000000039151825, iteration: 140142
loss: 1.2154005765914917,grad_norm: 0.9999999264993806, iteration: 140143
loss: 1.241895079612732,grad_norm: 0.999999872748348, iteration: 140144
loss: 1.4088422060012817,grad_norm: 0.9999999486454314, iteration: 140145
loss: 1.247406005859375,grad_norm: 0.9999998096069903, iteration: 140146
loss: 1.1597753763198853,grad_norm: 0.9999992421969883, iteration: 140147
loss: 1.162548303604126,grad_norm: 0.9999998376557444, iteration: 140148
loss: 1.1782299280166626,grad_norm: 0.9999997830546679, iteration: 140149
loss: 1.0739549398422241,grad_norm: 0.9999998375991926, iteration: 140150
loss: 1.5194292068481445,grad_norm: 0.999999630903969, iteration: 140151
loss: 1.1002891063690186,grad_norm: 0.9999996564357676, iteration: 140152
loss: 1.2520415782928467,grad_norm: 0.9999998842217295, iteration: 140153
loss: 1.354662299156189,grad_norm: 0.9999996019818831, iteration: 140154
loss: 1.4896634817123413,grad_norm: 0.9999998138242846, iteration: 140155
loss: 1.3702911138534546,grad_norm: 0.9999998756443355, iteration: 140156
loss: 1.5088746547698975,grad_norm: 0.9999999072117673, iteration: 140157
loss: 1.2094014883041382,grad_norm: 0.9999998844820189, iteration: 140158
loss: 1.2962563037872314,grad_norm: 0.999999797322512, iteration: 140159
loss: 1.1112990379333496,grad_norm: 0.9999999131738587, iteration: 140160
loss: 1.1625728607177734,grad_norm: 0.9999995601125719, iteration: 140161
loss: 1.0374093055725098,grad_norm: 0.9999992268075566, iteration: 140162
loss: 1.0022612810134888,grad_norm: 0.999999808017291, iteration: 140163
loss: 1.2926899194717407,grad_norm: 0.9999998944874886, iteration: 140164
loss: 1.1860727071762085,grad_norm: 0.9999995645275663, iteration: 140165
loss: 1.290505290031433,grad_norm: 1.0000000066434576, iteration: 140166
loss: 1.125292181968689,grad_norm: 0.9999997995630403, iteration: 140167
loss: 1.2619746923446655,grad_norm: 0.9999999759129248, iteration: 140168
loss: 1.1996523141860962,grad_norm: 0.9999997901894627, iteration: 140169
loss: 1.153646469116211,grad_norm: 0.9999999339902058, iteration: 140170
loss: 1.3556175231933594,grad_norm: 0.9999998577990546, iteration: 140171
loss: 1.1346944570541382,grad_norm: 0.999999822943169, iteration: 140172
loss: 1.0614641904830933,grad_norm: 0.9999993321854064, iteration: 140173
loss: 1.2249081134796143,grad_norm: 0.9999999223398093, iteration: 140174
loss: 1.1833844184875488,grad_norm: 0.9999996547076337, iteration: 140175
loss: 1.2135580778121948,grad_norm: 0.9999996446720804, iteration: 140176
loss: 1.1256897449493408,grad_norm: 0.9999996411285328, iteration: 140177
loss: 1.5340230464935303,grad_norm: 0.9999999801033155, iteration: 140178
loss: 1.5998809337615967,grad_norm: 1.0000000252065013, iteration: 140179
loss: 1.2664515972137451,grad_norm: 1.0000000409104206, iteration: 140180
loss: 1.077736735343933,grad_norm: 0.9999994168613842, iteration: 140181
loss: 1.1247780323028564,grad_norm: 0.9999994228818628, iteration: 140182
loss: 1.200090765953064,grad_norm: 0.9999996813439299, iteration: 140183
loss: 1.13922917842865,grad_norm: 0.9999998062160255, iteration: 140184
loss: 1.1632245779037476,grad_norm: 0.999999628590384, iteration: 140185
loss: 1.075316309928894,grad_norm: 0.9999999514857358, iteration: 140186
loss: 1.1336121559143066,grad_norm: 0.9999992147720905, iteration: 140187
loss: 1.2761666774749756,grad_norm: 1.000000021600618, iteration: 140188
loss: 1.2433911561965942,grad_norm: 0.9999998385175356, iteration: 140189
loss: 1.1817288398742676,grad_norm: 0.9999998773897238, iteration: 140190
loss: 1.260118007659912,grad_norm: 0.999999852808454, iteration: 140191
loss: 1.2246978282928467,grad_norm: 0.999999993633388, iteration: 140192
loss: 1.2989336252212524,grad_norm: 0.9999999492130695, iteration: 140193
loss: 1.1000226736068726,grad_norm: 0.9999998704040235, iteration: 140194
loss: 1.255419373512268,grad_norm: 0.9999996788035951, iteration: 140195
loss: 1.2107808589935303,grad_norm: 0.9999998684924356, iteration: 140196
loss: 1.2587162256240845,grad_norm: 0.9999998624444215, iteration: 140197
loss: 1.1149088144302368,grad_norm: 0.999999820903953, iteration: 140198
loss: 1.3341878652572632,grad_norm: 0.999999542466195, iteration: 140199
loss: 1.1110864877700806,grad_norm: 0.9999994259392644, iteration: 140200
loss: 1.1529326438903809,grad_norm: 0.9999996799896697, iteration: 140201
loss: 1.1349390745162964,grad_norm: 0.9999996124180331, iteration: 140202
loss: 1.2139054536819458,grad_norm: 0.9999999480443306, iteration: 140203
loss: 1.2194721698760986,grad_norm: 0.9999998256756438, iteration: 140204
loss: 1.3806142807006836,grad_norm: 0.9999999364971677, iteration: 140205
loss: 1.2543216943740845,grad_norm: 0.9999998083717958, iteration: 140206
loss: 1.1789309978485107,grad_norm: 0.9999997987259243, iteration: 140207
loss: 1.1984294652938843,grad_norm: 0.9999995785928031, iteration: 140208
loss: 1.2398216724395752,grad_norm: 0.9999998898424676, iteration: 140209
loss: 1.2508517503738403,grad_norm: 0.9999997654367644, iteration: 140210
loss: 1.159906029701233,grad_norm: 0.9999994795949121, iteration: 140211
loss: 1.2214953899383545,grad_norm: 0.9999996541956527, iteration: 140212
loss: 1.0645087957382202,grad_norm: 0.9999995667002929, iteration: 140213
loss: 1.1931285858154297,grad_norm: 0.9999998473701555, iteration: 140214
loss: 1.2906253337860107,grad_norm: 0.9999999758177949, iteration: 140215
loss: 1.0329298973083496,grad_norm: 0.9999997775565908, iteration: 140216
loss: 1.3757598400115967,grad_norm: 0.9999998421839508, iteration: 140217
loss: 1.1601614952087402,grad_norm: 0.9999999252403616, iteration: 140218
loss: 1.272072672843933,grad_norm: 0.999999884987057, iteration: 140219
loss: 1.6505465507507324,grad_norm: 0.9999997618652546, iteration: 140220
loss: 1.0942409038543701,grad_norm: 1.000000001296651, iteration: 140221
loss: 1.205628752708435,grad_norm: 0.9999994228189136, iteration: 140222
loss: 1.3358447551727295,grad_norm: 0.9999998412705848, iteration: 140223
loss: 1.354321002960205,grad_norm: 0.9999999503912907, iteration: 140224
loss: 1.6544642448425293,grad_norm: 1.0000000301681593, iteration: 140225
loss: 1.1492254734039307,grad_norm: 0.9999999386035063, iteration: 140226
loss: 1.2836920022964478,grad_norm: 0.9999998566722923, iteration: 140227
loss: 1.4128315448760986,grad_norm: 0.9999999807983986, iteration: 140228
loss: 1.2295362949371338,grad_norm: 0.9999998097432032, iteration: 140229
loss: 1.2744596004486084,grad_norm: 0.9999996894616737, iteration: 140230
loss: 1.2735556364059448,grad_norm: 0.9999998521458989, iteration: 140231
loss: 1.1380304098129272,grad_norm: 1.0000000164281555, iteration: 140232
loss: 1.1966872215270996,grad_norm: 0.9999995353163994, iteration: 140233
loss: 1.202667474746704,grad_norm: 0.999999925893946, iteration: 140234
loss: 1.0904693603515625,grad_norm: 0.9999999394508056, iteration: 140235
loss: 1.2462059259414673,grad_norm: 0.9999996791266327, iteration: 140236
loss: 1.1257455348968506,grad_norm: 0.9999995022286544, iteration: 140237
loss: 1.1744388341903687,grad_norm: 1.0000000458669926, iteration: 140238
loss: 1.2051805257797241,grad_norm: 0.9999995477740989, iteration: 140239
loss: 1.3395308256149292,grad_norm: 0.999999892042999, iteration: 140240
loss: 1.2181833982467651,grad_norm: 0.9999999971623345, iteration: 140241
loss: 1.3065775632858276,grad_norm: 0.9999997060639444, iteration: 140242
loss: 1.1509283781051636,grad_norm: 1.0000000189651401, iteration: 140243
loss: 1.079123616218567,grad_norm: 0.9999996525623637, iteration: 140244
loss: 1.1402074098587036,grad_norm: 0.999999437756551, iteration: 140245
loss: 1.1138755083084106,grad_norm: 0.9999997521755499, iteration: 140246
loss: 1.4518523216247559,grad_norm: 0.9999998740711402, iteration: 140247
loss: 1.1359975337982178,grad_norm: 0.9999995508387567, iteration: 140248
loss: 1.2213469743728638,grad_norm: 0.9999999770910871, iteration: 140249
loss: 1.4488531351089478,grad_norm: 0.9999999187200185, iteration: 140250
loss: 1.1237971782684326,grad_norm: 0.9999997800118583, iteration: 140251
loss: 1.2149326801300049,grad_norm: 0.9999996771696747, iteration: 140252
loss: 1.1895838975906372,grad_norm: 0.9999997904492052, iteration: 140253
loss: 1.2587584257125854,grad_norm: 0.9999996506098234, iteration: 140254
loss: 1.0478270053863525,grad_norm: 0.9999999783676139, iteration: 140255
loss: 1.4034252166748047,grad_norm: 0.999999896217118, iteration: 140256
loss: 1.0839678049087524,grad_norm: 0.9999992037016866, iteration: 140257
loss: 1.1021631956100464,grad_norm: 0.9999995628327901, iteration: 140258
loss: 1.1387909650802612,grad_norm: 0.9999996385559385, iteration: 140259
loss: 1.0489391088485718,grad_norm: 0.999999662403685, iteration: 140260
loss: 1.0633488893508911,grad_norm: 0.9999999749787818, iteration: 140261
loss: 1.037868618965149,grad_norm: 0.9999990074341194, iteration: 140262
loss: 1.1286559104919434,grad_norm: 0.9999997690841267, iteration: 140263
loss: 1.1766626834869385,grad_norm: 0.9999993082810072, iteration: 140264
loss: 1.0399982929229736,grad_norm: 0.9999991619324978, iteration: 140265
loss: 1.1235480308532715,grad_norm: 0.9999994916583265, iteration: 140266
loss: 1.2320196628570557,grad_norm: 0.999999444995903, iteration: 140267
loss: 1.0920734405517578,grad_norm: 0.9999992947096173, iteration: 140268
loss: 1.3326537609100342,grad_norm: 0.9999998334926203, iteration: 140269
loss: 1.125705599784851,grad_norm: 1.0000000020643476, iteration: 140270
loss: 1.1018165349960327,grad_norm: 0.9999999937196342, iteration: 140271
loss: 1.0312899351119995,grad_norm: 0.9999994344135946, iteration: 140272
loss: 1.148180603981018,grad_norm: 0.9999996390262038, iteration: 140273
loss: 1.2260977029800415,grad_norm: 0.9999998822150169, iteration: 140274
loss: 1.00381600856781,grad_norm: 0.9999995642228661, iteration: 140275
loss: 1.2201272249221802,grad_norm: 0.9999999715605677, iteration: 140276
loss: 1.1222820281982422,grad_norm: 0.9999996799135908, iteration: 140277
loss: 1.145522952079773,grad_norm: 0.9999994617623047, iteration: 140278
loss: 1.1219574213027954,grad_norm: 0.9999998454101545, iteration: 140279
loss: 1.0238500833511353,grad_norm: 0.9999996158335933, iteration: 140280
loss: 1.3020877838134766,grad_norm: 0.9999997298196496, iteration: 140281
loss: 1.0850805044174194,grad_norm: 0.9999997563546126, iteration: 140282
loss: 1.2000664472579956,grad_norm: 0.9999999490572901, iteration: 140283
loss: 1.0528942346572876,grad_norm: 0.9999992461996116, iteration: 140284
loss: 1.1665431261062622,grad_norm: 0.9999998421005794, iteration: 140285
loss: 1.1684647798538208,grad_norm: 0.9999998353236464, iteration: 140286
loss: 1.1932847499847412,grad_norm: 0.9999996519201785, iteration: 140287
loss: 1.1727352142333984,grad_norm: 0.9999999308377666, iteration: 140288
loss: 1.2256206274032593,grad_norm: 0.9999997468742209, iteration: 140289
loss: 1.0538097620010376,grad_norm: 0.9999993788506025, iteration: 140290
loss: 1.146149754524231,grad_norm: 0.9999998988830467, iteration: 140291
loss: 1.2863361835479736,grad_norm: 0.9999999889116343, iteration: 140292
loss: 1.0996564626693726,grad_norm: 0.9999999773881366, iteration: 140293
loss: 0.9961370229721069,grad_norm: 0.9999994220937826, iteration: 140294
loss: 1.3434146642684937,grad_norm: 0.9999998967924418, iteration: 140295
loss: 1.1993316411972046,grad_norm: 0.999999803569091, iteration: 140296
loss: 1.276739478111267,grad_norm: 0.9999993592526852, iteration: 140297
loss: 1.0454621315002441,grad_norm: 0.9999998659535252, iteration: 140298
loss: 1.0881094932556152,grad_norm: 0.999999952117884, iteration: 140299
loss: 1.3370254039764404,grad_norm: 0.9999998982540675, iteration: 140300
loss: 1.1002980470657349,grad_norm: 0.9999996320499378, iteration: 140301
loss: 1.0970823764801025,grad_norm: 0.9999997266168392, iteration: 140302
loss: 1.3513470888137817,grad_norm: 0.9999998733615079, iteration: 140303
loss: 1.337693214416504,grad_norm: 0.9999998805780641, iteration: 140304
loss: 1.0316435098648071,grad_norm: 0.934147392665696, iteration: 140305
loss: 1.2009860277175903,grad_norm: 0.9999999221373372, iteration: 140306
loss: 1.1929187774658203,grad_norm: 0.9999998948714476, iteration: 140307
loss: 1.188698172569275,grad_norm: 0.9999995188022852, iteration: 140308
loss: 1.0935232639312744,grad_norm: 0.9999991345483775, iteration: 140309
loss: 1.2980960607528687,grad_norm: 0.9999996369398443, iteration: 140310
loss: 1.2359087467193604,grad_norm: 0.9999998774650384, iteration: 140311
loss: 1.2481831312179565,grad_norm: 0.9999999541356313, iteration: 140312
loss: 1.0892951488494873,grad_norm: 0.999999669420965, iteration: 140313
loss: 1.290543556213379,grad_norm: 0.9999997481173566, iteration: 140314
loss: 1.3894827365875244,grad_norm: 0.9999998846499782, iteration: 140315
loss: 1.0385308265686035,grad_norm: 1.0000000577673005, iteration: 140316
loss: 1.0373401641845703,grad_norm: 0.9999990523189004, iteration: 140317
loss: 1.2330518960952759,grad_norm: 0.999999928546129, iteration: 140318
loss: 1.1699955463409424,grad_norm: 0.9999996822002876, iteration: 140319
loss: 1.134294033050537,grad_norm: 0.999999664155204, iteration: 140320
loss: 1.0678225755691528,grad_norm: 0.9999993468254417, iteration: 140321
loss: 1.3848646879196167,grad_norm: 0.9999999141540127, iteration: 140322
loss: 1.2406669855117798,grad_norm: 1.0000001035102037, iteration: 140323
loss: 1.3712108135223389,grad_norm: 0.9999999111676021, iteration: 140324
loss: 1.221797227859497,grad_norm: 0.9999999143446863, iteration: 140325
loss: 1.1399610042572021,grad_norm: 0.9999994050591117, iteration: 140326
loss: 1.328757643699646,grad_norm: 0.9999998359776663, iteration: 140327
loss: 1.1816625595092773,grad_norm: 0.9999998499649055, iteration: 140328
loss: 1.1680424213409424,grad_norm: 0.9999995584125423, iteration: 140329
loss: 1.3111268281936646,grad_norm: 0.9999997852898375, iteration: 140330
loss: 1.0966081619262695,grad_norm: 0.9999996619144675, iteration: 140331
loss: 1.2294881343841553,grad_norm: 1.0000000927395671, iteration: 140332
loss: 1.2458089590072632,grad_norm: 1.0000000009078782, iteration: 140333
loss: 1.0852094888687134,grad_norm: 0.9999994177141598, iteration: 140334
loss: 1.1613136529922485,grad_norm: 0.9999999185087319, iteration: 140335
loss: 1.1768265962600708,grad_norm: 0.9999999834135839, iteration: 140336
loss: 1.150309681892395,grad_norm: 0.9999999847058988, iteration: 140337
loss: 1.1125638484954834,grad_norm: 0.9999996442175716, iteration: 140338
loss: 1.326336145401001,grad_norm: 0.9999999592685238, iteration: 140339
loss: 1.3471527099609375,grad_norm: 0.9999999278432007, iteration: 140340
loss: 1.2304152250289917,grad_norm: 0.9999996685102789, iteration: 140341
loss: 1.0481761693954468,grad_norm: 0.9999993128101629, iteration: 140342
loss: 1.1477442979812622,grad_norm: 0.9999997375226206, iteration: 140343
loss: 1.2076082229614258,grad_norm: 0.9999996887826457, iteration: 140344
loss: 1.1348477602005005,grad_norm: 0.9999998512111534, iteration: 140345
loss: 1.0379873514175415,grad_norm: 0.999999320732953, iteration: 140346
loss: 1.1676570177078247,grad_norm: 0.9999998004120678, iteration: 140347
loss: 1.1553950309753418,grad_norm: 0.999999657927167, iteration: 140348
loss: 1.29193115234375,grad_norm: 0.9999998796892641, iteration: 140349
loss: 1.1885110139846802,grad_norm: 0.9999998488346474, iteration: 140350
loss: 1.3955012559890747,grad_norm: 0.999999785646979, iteration: 140351
loss: 1.0327861309051514,grad_norm: 0.9999998345303408, iteration: 140352
loss: 1.3533704280853271,grad_norm: 0.9999999946403884, iteration: 140353
loss: 1.0764293670654297,grad_norm: 1.0000000402000322, iteration: 140354
loss: 1.202419400215149,grad_norm: 0.9999998766093295, iteration: 140355
loss: 1.0964405536651611,grad_norm: 0.9999998807322418, iteration: 140356
loss: 1.065155029296875,grad_norm: 0.9999992744413625, iteration: 140357
loss: 1.046005368232727,grad_norm: 0.9999995389142354, iteration: 140358
loss: 1.1791458129882812,grad_norm: 0.9999996874971011, iteration: 140359
loss: 1.2664315700531006,grad_norm: 0.9999999828438352, iteration: 140360
loss: 1.1452094316482544,grad_norm: 0.9999997095284697, iteration: 140361
loss: 1.0831632614135742,grad_norm: 0.9999997957689784, iteration: 140362
loss: 1.1755515336990356,grad_norm: 0.9999995891696759, iteration: 140363
loss: 1.1091783046722412,grad_norm: 0.9999997098367042, iteration: 140364
loss: 1.2186343669891357,grad_norm: 0.9999998818646535, iteration: 140365
loss: 1.2166976928710938,grad_norm: 0.9999998274554041, iteration: 140366
loss: 1.3526074886322021,grad_norm: 1.0000000648326652, iteration: 140367
loss: 1.0472326278686523,grad_norm: 0.9999993938772644, iteration: 140368
loss: 1.1173344850540161,grad_norm: 0.9999998157791717, iteration: 140369
loss: 1.2852050065994263,grad_norm: 0.9999998190655536, iteration: 140370
loss: 1.2123013734817505,grad_norm: 0.999999916337362, iteration: 140371
loss: 1.1859142780303955,grad_norm: 1.000000024725028, iteration: 140372
loss: 1.157098650932312,grad_norm: 0.9999995641517726, iteration: 140373
loss: 1.3286207914352417,grad_norm: 0.9999998931134734, iteration: 140374
loss: 1.070110559463501,grad_norm: 0.9999999050488927, iteration: 140375
loss: 1.2819933891296387,grad_norm: 0.9999998624755989, iteration: 140376
loss: 1.2233279943466187,grad_norm: 0.9999997791339329, iteration: 140377
loss: 1.2945992946624756,grad_norm: 0.9999997449366771, iteration: 140378
loss: 1.1648496389389038,grad_norm: 0.9999999752696893, iteration: 140379
loss: 1.126936912536621,grad_norm: 0.9999997111430318, iteration: 140380
loss: 1.039104700088501,grad_norm: 0.9999991702967691, iteration: 140381
loss: 1.0627081394195557,grad_norm: 0.9999993944416126, iteration: 140382
loss: 1.135326623916626,grad_norm: 0.9999999359097064, iteration: 140383
loss: 1.1462314128875732,grad_norm: 0.9999998021655585, iteration: 140384
loss: 1.1720054149627686,grad_norm: 1.0000000297359672, iteration: 140385
loss: 1.1341838836669922,grad_norm: 0.9999996483564499, iteration: 140386
loss: 1.1951050758361816,grad_norm: 0.9999998465273799, iteration: 140387
loss: 1.1750614643096924,grad_norm: 0.9999996668106337, iteration: 140388
loss: 1.4760112762451172,grad_norm: 0.999999752788738, iteration: 140389
loss: 1.093809962272644,grad_norm: 0.9999996483683151, iteration: 140390
loss: 1.1911855936050415,grad_norm: 0.9999999085254776, iteration: 140391
loss: 1.1431533098220825,grad_norm: 0.9999999632183327, iteration: 140392
loss: 1.1298468112945557,grad_norm: 0.9999999568416467, iteration: 140393
loss: 1.1252139806747437,grad_norm: 0.999999902371053, iteration: 140394
loss: 1.2870566844940186,grad_norm: 0.9999998271586337, iteration: 140395
loss: 1.0646882057189941,grad_norm: 0.9999993570487008, iteration: 140396
loss: 1.1295875310897827,grad_norm: 1.000000075998267, iteration: 140397
loss: 1.128985047340393,grad_norm: 0.9999999104908429, iteration: 140398
loss: 1.107759952545166,grad_norm: 0.9999995421213635, iteration: 140399
loss: 1.0697016716003418,grad_norm: 0.9999997740938608, iteration: 140400
loss: 1.0608761310577393,grad_norm: 0.9999990867762234, iteration: 140401
loss: 1.2851706743240356,grad_norm: 0.9999998690128903, iteration: 140402
loss: 1.36713707447052,grad_norm: 0.9999994777173574, iteration: 140403
loss: 1.045325517654419,grad_norm: 0.9999992989159537, iteration: 140404
loss: 1.0891822576522827,grad_norm: 0.9999991568194011, iteration: 140405
loss: 1.0665113925933838,grad_norm: 0.9999998081916235, iteration: 140406
loss: 1.1550816297531128,grad_norm: 0.9999998419461225, iteration: 140407
loss: 1.136229395866394,grad_norm: 0.9999998119727553, iteration: 140408
loss: 1.1352332830429077,grad_norm: 0.9999999464938546, iteration: 140409
loss: 1.105157494544983,grad_norm: 0.999999502439191, iteration: 140410
loss: 1.0711172819137573,grad_norm: 0.9999998419703213, iteration: 140411
loss: 1.2283250093460083,grad_norm: 0.9999997608422425, iteration: 140412
loss: 1.12516188621521,grad_norm: 0.9999995981902032, iteration: 140413
loss: 1.169541835784912,grad_norm: 0.9999999413139727, iteration: 140414
loss: 1.2370043992996216,grad_norm: 0.9999998623865792, iteration: 140415
loss: 1.0238323211669922,grad_norm: 0.9999998565345359, iteration: 140416
loss: 1.1722890138626099,grad_norm: 0.9999997879558302, iteration: 140417
loss: 1.1805568933486938,grad_norm: 0.9999996818799451, iteration: 140418
loss: 1.272736668586731,grad_norm: 0.9999997189477666, iteration: 140419
loss: 1.275158166885376,grad_norm: 0.9999998931717922, iteration: 140420
loss: 1.0343326330184937,grad_norm: 0.9999990827524896, iteration: 140421
loss: 1.1123723983764648,grad_norm: 0.9999998167847719, iteration: 140422
loss: 1.1981035470962524,grad_norm: 0.9999999128554123, iteration: 140423
loss: 1.117698311805725,grad_norm: 0.9999998503196958, iteration: 140424
loss: 1.2982170581817627,grad_norm: 0.9999999033375471, iteration: 140425
loss: 1.1295498609542847,grad_norm: 0.999999830902718, iteration: 140426
loss: 1.0921149253845215,grad_norm: 0.9999995918072992, iteration: 140427
loss: 1.0764620304107666,grad_norm: 0.9999995082472151, iteration: 140428
loss: 1.0399634838104248,grad_norm: 0.9999997345495161, iteration: 140429
loss: 1.3350756168365479,grad_norm: 1.0000000066129697, iteration: 140430
loss: 1.1381182670593262,grad_norm: 0.9999999272121056, iteration: 140431
loss: 1.2801523208618164,grad_norm: 0.9999999322639023, iteration: 140432
loss: 0.9826210141181946,grad_norm: 0.9999996513062694, iteration: 140433
loss: 1.2364720106124878,grad_norm: 0.9999997758707254, iteration: 140434
loss: 1.1261993646621704,grad_norm: 0.9999999202605803, iteration: 140435
loss: 1.2038121223449707,grad_norm: 1.0000000235015778, iteration: 140436
loss: 1.2497143745422363,grad_norm: 0.9999998426820845, iteration: 140437
loss: 1.204763412475586,grad_norm: 0.9999999187173534, iteration: 140438
loss: 1.318975567817688,grad_norm: 0.999999992193706, iteration: 140439
loss: 1.1761428117752075,grad_norm: 0.9999997642497844, iteration: 140440
loss: 1.3153642416000366,grad_norm: 0.9999998470737952, iteration: 140441
loss: 1.0956907272338867,grad_norm: 0.9999998173936425, iteration: 140442
loss: 1.354655385017395,grad_norm: 0.9999998832313852, iteration: 140443
loss: 1.0008891820907593,grad_norm: 0.9246141303035997, iteration: 140444
loss: 1.1789220571517944,grad_norm: 0.9999998546918789, iteration: 140445
loss: 1.0901178121566772,grad_norm: 0.999999923929931, iteration: 140446
loss: 1.093532919883728,grad_norm: 0.999999365969525, iteration: 140447
loss: 0.9991194605827332,grad_norm: 0.8381968504465305, iteration: 140448
loss: 1.1369208097457886,grad_norm: 0.9999993199680923, iteration: 140449
loss: 1.1575555801391602,grad_norm: 0.9999996400925257, iteration: 140450
loss: 1.2348048686981201,grad_norm: 1.0000000068584411, iteration: 140451
loss: 1.2465740442276,grad_norm: 0.9999998841521673, iteration: 140452
loss: 1.2987664937973022,grad_norm: 0.9999995114616003, iteration: 140453
loss: 1.2794218063354492,grad_norm: 0.9999997649761255, iteration: 140454
loss: 1.414204716682434,grad_norm: 0.9999999610124765, iteration: 140455
loss: 1.1741828918457031,grad_norm: 0.9999999378112607, iteration: 140456
loss: 1.1227805614471436,grad_norm: 0.9999997768329992, iteration: 140457
loss: 1.0601425170898438,grad_norm: 0.9999997866300921, iteration: 140458
loss: 1.212217092514038,grad_norm: 0.9999997091462018, iteration: 140459
loss: 1.0026135444641113,grad_norm: 0.9999993363694859, iteration: 140460
loss: 1.1474262475967407,grad_norm: 0.9999998948818647, iteration: 140461
loss: 1.1791757345199585,grad_norm: 0.9999998608122931, iteration: 140462
loss: 1.2380677461624146,grad_norm: 0.9999999638183332, iteration: 140463
loss: 1.0884242057800293,grad_norm: 0.9999996059731736, iteration: 140464
loss: 1.1859492063522339,grad_norm: 0.9999995110595727, iteration: 140465
loss: 1.2885351181030273,grad_norm: 0.9999997557705121, iteration: 140466
loss: 1.2271220684051514,grad_norm: 0.9999998927395082, iteration: 140467
loss: 1.2909092903137207,grad_norm: 0.9999998909977593, iteration: 140468
loss: 1.1890259981155396,grad_norm: 0.9999999147516639, iteration: 140469
loss: 1.3206864595413208,grad_norm: 0.999999879595249, iteration: 140470
loss: 1.1547592878341675,grad_norm: 0.9999993160237247, iteration: 140471
loss: 1.0764224529266357,grad_norm: 0.9999992665934385, iteration: 140472
loss: 1.128489375114441,grad_norm: 0.9999997727820125, iteration: 140473
loss: 1.2136746644973755,grad_norm: 0.9999998364382484, iteration: 140474
loss: 1.065132737159729,grad_norm: 0.9999996702661474, iteration: 140475
loss: 0.9982587099075317,grad_norm: 0.9999990313203314, iteration: 140476
loss: 0.9837456941604614,grad_norm: 0.9999990192675471, iteration: 140477
loss: 1.186197280883789,grad_norm: 0.9999996662157951, iteration: 140478
loss: 1.0719356536865234,grad_norm: 0.9999992359284037, iteration: 140479
loss: 1.0897982120513916,grad_norm: 0.9999995060075373, iteration: 140480
loss: 1.1096116304397583,grad_norm: 0.999999512567094, iteration: 140481
loss: 1.0849170684814453,grad_norm: 0.9999992876041874, iteration: 140482
loss: 1.1642603874206543,grad_norm: 0.9999999800471333, iteration: 140483
loss: 1.1531447172164917,grad_norm: 0.9999998563358635, iteration: 140484
loss: 1.0384660959243774,grad_norm: 0.9999991997224991, iteration: 140485
loss: 1.2448779344558716,grad_norm: 0.9999996994393874, iteration: 140486
loss: 1.1945453882217407,grad_norm: 0.9999997923567566, iteration: 140487
loss: 1.2798463106155396,grad_norm: 0.9999995334083753, iteration: 140488
loss: 1.1778569221496582,grad_norm: 0.9999997278688888, iteration: 140489
loss: 1.1014631986618042,grad_norm: 0.9999997273357953, iteration: 140490
loss: 1.1482702493667603,grad_norm: 0.9999994176170333, iteration: 140491
loss: 1.172529697418213,grad_norm: 1.0000000156866324, iteration: 140492
loss: 1.061995029449463,grad_norm: 0.9999995063974605, iteration: 140493
loss: 1.1999753713607788,grad_norm: 1.0000000003126157, iteration: 140494
loss: 1.17442786693573,grad_norm: 0.9999998872719309, iteration: 140495
loss: 1.047865867614746,grad_norm: 0.9148197197297797, iteration: 140496
loss: 1.128821849822998,grad_norm: 0.9999999554506184, iteration: 140497
loss: 1.1933715343475342,grad_norm: 0.9999999607021351, iteration: 140498
loss: 1.3280481100082397,grad_norm: 0.9999998843517932, iteration: 140499
loss: 1.4245308637619019,grad_norm: 0.9999999813877058, iteration: 140500
loss: 1.1804426908493042,grad_norm: 0.9999994221534637, iteration: 140501
loss: 1.130407452583313,grad_norm: 1.000000017880362, iteration: 140502
loss: 1.3677620887756348,grad_norm: 0.9999996293689947, iteration: 140503
loss: 1.1177892684936523,grad_norm: 0.9999992236517535, iteration: 140504
loss: 0.9731343984603882,grad_norm: 0.9999998899408815, iteration: 140505
loss: 1.3831686973571777,grad_norm: 0.9999995115676539, iteration: 140506
loss: 1.4526816606521606,grad_norm: 0.9999999361813664, iteration: 140507
loss: 1.453537106513977,grad_norm: 0.9999997742084206, iteration: 140508
loss: 1.356331706047058,grad_norm: 1.000000007771735, iteration: 140509
loss: 1.0545411109924316,grad_norm: 0.9999998497726424, iteration: 140510
loss: 1.3361965417861938,grad_norm: 0.9999999160264001, iteration: 140511
loss: 1.3777446746826172,grad_norm: 1.0000000631556198, iteration: 140512
loss: 1.1941120624542236,grad_norm: 0.999999919218292, iteration: 140513
loss: 1.3750327825546265,grad_norm: 0.9999998852695501, iteration: 140514
loss: 1.213308572769165,grad_norm: 0.9999995253333814, iteration: 140515
loss: 1.1733648777008057,grad_norm: 0.9999998852268539, iteration: 140516
loss: 1.1787078380584717,grad_norm: 0.9999998839943084, iteration: 140517
loss: 1.0344712734222412,grad_norm: 0.9999991137824068, iteration: 140518
loss: 1.3094168901443481,grad_norm: 1.0000000070233186, iteration: 140519
loss: 1.1445884704589844,grad_norm: 0.9999999095102534, iteration: 140520
loss: 1.2029088735580444,grad_norm: 1.0000000696593745, iteration: 140521
loss: 1.368528127670288,grad_norm: 0.9999998893469758, iteration: 140522
loss: 1.3214473724365234,grad_norm: 0.9999998240109895, iteration: 140523
loss: 1.2119861841201782,grad_norm: 0.999999882901775, iteration: 140524
loss: 1.1171756982803345,grad_norm: 0.99999963844051, iteration: 140525
loss: 1.1567051410675049,grad_norm: 0.999999827731129, iteration: 140526
loss: 1.3404678106307983,grad_norm: 0.9999996722372839, iteration: 140527
loss: 1.3881045579910278,grad_norm: 0.9999999365263607, iteration: 140528
loss: 1.2234281301498413,grad_norm: 0.9999995183538105, iteration: 140529
loss: 1.1895840167999268,grad_norm: 0.999999917829299, iteration: 140530
loss: 1.1828081607818604,grad_norm: 0.9999994080024708, iteration: 140531
loss: 1.1242696046829224,grad_norm: 0.9999998481714728, iteration: 140532
loss: 1.4162757396697998,grad_norm: 0.9999998821514384, iteration: 140533
loss: 1.21125066280365,grad_norm: 0.9999999414350574, iteration: 140534
loss: 1.236512303352356,grad_norm: 0.999999890611067, iteration: 140535
loss: 1.0360968112945557,grad_norm: 0.9999994373341358, iteration: 140536
loss: 1.3551422357559204,grad_norm: 0.9999999839536804, iteration: 140537
loss: 1.1672486066818237,grad_norm: 0.9999994370717306, iteration: 140538
loss: 1.1023632287979126,grad_norm: 0.9999998123263136, iteration: 140539
loss: 1.3766385316848755,grad_norm: 1.000000059183979, iteration: 140540
loss: 1.1952346563339233,grad_norm: 0.9999999094525676, iteration: 140541
loss: 1.2281818389892578,grad_norm: 0.9999998458199219, iteration: 140542
loss: 1.202351450920105,grad_norm: 0.9999998473866245, iteration: 140543
loss: 1.1840707063674927,grad_norm: 0.9999998377256173, iteration: 140544
loss: 1.2274272441864014,grad_norm: 0.9999998272612778, iteration: 140545
loss: 1.2979825735092163,grad_norm: 0.9999999937498171, iteration: 140546
loss: 1.3440866470336914,grad_norm: 1.0000000160296905, iteration: 140547
loss: 1.3726963996887207,grad_norm: 0.9999999807251179, iteration: 140548
loss: 1.0035319328308105,grad_norm: 0.9999993691530141, iteration: 140549
loss: 1.2610080242156982,grad_norm: 0.9999996945547363, iteration: 140550
loss: 1.732350468635559,grad_norm: 0.9999998991487408, iteration: 140551
loss: 1.1728090047836304,grad_norm: 0.9999999540372699, iteration: 140552
loss: 1.1550625562667847,grad_norm: 0.9999998638920875, iteration: 140553
loss: 1.1846052408218384,grad_norm: 0.9999996132658661, iteration: 140554
loss: 1.2972691059112549,grad_norm: 0.999999858115467, iteration: 140555
loss: 1.194715142250061,grad_norm: 0.9999996563023169, iteration: 140556
loss: 1.3238364458084106,grad_norm: 0.999999853109094, iteration: 140557
loss: 1.268845558166504,grad_norm: 0.9999995140383043, iteration: 140558
loss: 1.0666005611419678,grad_norm: 0.9999997804918705, iteration: 140559
loss: 1.3664957284927368,grad_norm: 0.9999999597390778, iteration: 140560
loss: 1.1341453790664673,grad_norm: 0.9999999813108723, iteration: 140561
loss: 1.1930221319198608,grad_norm: 0.99999998501404, iteration: 140562
loss: 1.0766823291778564,grad_norm: 0.99999976881813, iteration: 140563
loss: 1.150681495666504,grad_norm: 0.9999999731824267, iteration: 140564
loss: 1.0972048044204712,grad_norm: 0.9999999402911646, iteration: 140565
loss: 1.0629972219467163,grad_norm: 0.948513579466344, iteration: 140566
loss: 1.1369863748550415,grad_norm: 0.999999482982816, iteration: 140567
loss: 1.2770277261734009,grad_norm: 0.99999981409507, iteration: 140568
loss: 1.1681914329528809,grad_norm: 0.9999998592856165, iteration: 140569
loss: 1.2013540267944336,grad_norm: 0.999999634334702, iteration: 140570
loss: 1.1388285160064697,grad_norm: 0.9999999992406526, iteration: 140571
loss: 1.0839924812316895,grad_norm: 0.999999113256087, iteration: 140572
loss: 1.0187455415725708,grad_norm: 0.9999996837930591, iteration: 140573
loss: 1.187612533569336,grad_norm: 0.9999996201908063, iteration: 140574
loss: 1.0470335483551025,grad_norm: 0.9999998458932857, iteration: 140575
loss: 1.0735036134719849,grad_norm: 0.9999995999983872, iteration: 140576
loss: 1.0494680404663086,grad_norm: 0.999999438814608, iteration: 140577
loss: 1.1153329610824585,grad_norm: 0.9999998861713179, iteration: 140578
loss: 1.0902348756790161,grad_norm: 0.999999880845882, iteration: 140579
loss: 1.0329163074493408,grad_norm: 0.9999998821786396, iteration: 140580
loss: 1.211499571800232,grad_norm: 0.9999997950629054, iteration: 140581
loss: 1.211521029472351,grad_norm: 0.999999717990929, iteration: 140582
loss: 1.1752158403396606,grad_norm: 0.999999374536861, iteration: 140583
loss: 1.2842767238616943,grad_norm: 0.9999996644231736, iteration: 140584
loss: 1.1356698274612427,grad_norm: 0.9999997997889156, iteration: 140585
loss: 1.0359364748001099,grad_norm: 0.9999996348588597, iteration: 140586
loss: 1.2024433612823486,grad_norm: 0.9999998866419618, iteration: 140587
loss: 1.274210810661316,grad_norm: 0.9999999410845355, iteration: 140588
loss: 1.191680669784546,grad_norm: 0.9999998446001451, iteration: 140589
loss: 1.2147983312606812,grad_norm: 0.999999859113497, iteration: 140590
loss: 1.1188390254974365,grad_norm: 0.9999998460781417, iteration: 140591
loss: 1.1830542087554932,grad_norm: 0.9999997623823683, iteration: 140592
loss: 1.150704264640808,grad_norm: 0.9999998347443674, iteration: 140593
loss: 1.2532179355621338,grad_norm: 0.9999998486132127, iteration: 140594
loss: 1.0131224393844604,grad_norm: 0.9999991560688021, iteration: 140595
loss: 1.025758147239685,grad_norm: 0.971377443190572, iteration: 140596
loss: 1.2742670774459839,grad_norm: 0.9999998175491251, iteration: 140597
loss: 1.145971655845642,grad_norm: 0.9999999260134708, iteration: 140598
loss: 1.0651103258132935,grad_norm: 0.999999286902391, iteration: 140599
loss: 1.1990631818771362,grad_norm: 0.9999996475787688, iteration: 140600
loss: 1.1768797636032104,grad_norm: 0.9999993707407527, iteration: 140601
loss: 1.0265053510665894,grad_norm: 0.999999191648453, iteration: 140602
loss: 1.2467719316482544,grad_norm: 0.9999996935649141, iteration: 140603
loss: 1.0526924133300781,grad_norm: 0.877644320847367, iteration: 140604
loss: 1.4093034267425537,grad_norm: 0.9999998402661175, iteration: 140605
loss: 1.0531519651412964,grad_norm: 0.99999976891442, iteration: 140606
loss: 1.1845669746398926,grad_norm: 0.9999995167440924, iteration: 140607
loss: 1.2141127586364746,grad_norm: 0.9999996842313869, iteration: 140608
loss: 1.1278566122055054,grad_norm: 0.9999996909979993, iteration: 140609
loss: 1.2884424924850464,grad_norm: 0.9999998962638598, iteration: 140610
loss: 1.185504674911499,grad_norm: 0.9999998976307385, iteration: 140611
loss: 1.1888115406036377,grad_norm: 0.9999999003024426, iteration: 140612
loss: 1.1268627643585205,grad_norm: 0.9999998533507183, iteration: 140613
loss: 1.1158796548843384,grad_norm: 0.9999998561163772, iteration: 140614
loss: 1.1829255819320679,grad_norm: 0.9999993768545489, iteration: 140615
loss: 1.2193422317504883,grad_norm: 0.9999999276735482, iteration: 140616
loss: 1.07858145236969,grad_norm: 0.9999997072852899, iteration: 140617
loss: 1.0533884763717651,grad_norm: 0.9999995431853631, iteration: 140618
loss: 1.1283804178237915,grad_norm: 0.9999997702670815, iteration: 140619
loss: 1.2951807975769043,grad_norm: 0.9999998515416054, iteration: 140620
loss: 1.2424379587173462,grad_norm: 0.9999997432825548, iteration: 140621
loss: 1.2632704973220825,grad_norm: 0.9999998206743452, iteration: 140622
loss: 1.286838412284851,grad_norm: 0.9999997411620664, iteration: 140623
loss: 1.2458209991455078,grad_norm: 0.9999998167403352, iteration: 140624
loss: 1.1243386268615723,grad_norm: 0.9999995209007686, iteration: 140625
loss: 1.3360034227371216,grad_norm: 0.9999999382770131, iteration: 140626
loss: 1.282059669494629,grad_norm: 0.9999998824375753, iteration: 140627
loss: 1.1085362434387207,grad_norm: 0.9999998594511488, iteration: 140628
loss: 1.131749153137207,grad_norm: 1.000000040022243, iteration: 140629
loss: 1.0743976831436157,grad_norm: 0.9999991884083786, iteration: 140630
loss: 1.2768876552581787,grad_norm: 0.9999997144625545, iteration: 140631
loss: 1.0531898736953735,grad_norm: 0.9999999561046454, iteration: 140632
loss: 1.1296309232711792,grad_norm: 0.9999998789094418, iteration: 140633
loss: 1.1485884189605713,grad_norm: 0.9999995151563337, iteration: 140634
loss: 1.1730633974075317,grad_norm: 0.999999652700707, iteration: 140635
loss: 1.4235109090805054,grad_norm: 0.9999996861890549, iteration: 140636
loss: 1.2167932987213135,grad_norm: 0.9999996173806012, iteration: 140637
loss: 1.1874125003814697,grad_norm: 0.9999997854726477, iteration: 140638
loss: 1.1490236520767212,grad_norm: 0.9999994818846167, iteration: 140639
loss: 1.3645074367523193,grad_norm: 0.999999851741132, iteration: 140640
loss: 1.235086441040039,grad_norm: 0.9999997845585602, iteration: 140641
loss: 1.235306978225708,grad_norm: 1.0000000141002268, iteration: 140642
loss: 1.1766457557678223,grad_norm: 0.9999996166298013, iteration: 140643
loss: 1.1356672048568726,grad_norm: 0.9999996511765102, iteration: 140644
loss: 1.200427532196045,grad_norm: 0.9999998798277218, iteration: 140645
loss: 1.0402865409851074,grad_norm: 0.9999999355362883, iteration: 140646
loss: 1.2952200174331665,grad_norm: 0.9999999600529609, iteration: 140647
loss: 1.1395056247711182,grad_norm: 0.9999995470625057, iteration: 140648
loss: 1.2903159856796265,grad_norm: 1.0000000209860036, iteration: 140649
loss: 1.045408010482788,grad_norm: 0.9999992514563534, iteration: 140650
loss: 1.2137202024459839,grad_norm: 0.9999999711357006, iteration: 140651
loss: 1.0410606861114502,grad_norm: 0.9999994022734209, iteration: 140652
loss: 1.137618899345398,grad_norm: 0.9999998033449483, iteration: 140653
loss: 1.21047043800354,grad_norm: 0.9999998330012346, iteration: 140654
loss: 1.116454005241394,grad_norm: 0.9999996061733539, iteration: 140655
loss: 1.1538681983947754,grad_norm: 0.9999998868295493, iteration: 140656
loss: 1.0300122499465942,grad_norm: 0.9999995546711424, iteration: 140657
loss: 1.1740952730178833,grad_norm: 0.9999993032990152, iteration: 140658
loss: 1.0450791120529175,grad_norm: 0.999999679542003, iteration: 140659
loss: 1.0421407222747803,grad_norm: 0.9999999557911502, iteration: 140660
loss: 1.1581248044967651,grad_norm: 0.9999997866989572, iteration: 140661
loss: 1.0701048374176025,grad_norm: 0.9999995458808406, iteration: 140662
loss: 1.2630378007888794,grad_norm: 0.9999996962488171, iteration: 140663
loss: 1.0859025716781616,grad_norm: 0.9999994959196564, iteration: 140664
loss: 1.4119157791137695,grad_norm: 0.9999997915192623, iteration: 140665
loss: 1.1497814655303955,grad_norm: 0.9999999183544583, iteration: 140666
loss: 1.098863959312439,grad_norm: 0.830145856520474, iteration: 140667
loss: 1.0661295652389526,grad_norm: 0.9999994015296383, iteration: 140668
loss: 1.2789900302886963,grad_norm: 0.9999995402091512, iteration: 140669
loss: 1.029657006263733,grad_norm: 0.9999998505207031, iteration: 140670
loss: 1.203698754310608,grad_norm: 0.9999999775177089, iteration: 140671
loss: 1.216955542564392,grad_norm: 0.9999998476738409, iteration: 140672
loss: 1.162514567375183,grad_norm: 0.9999999933642189, iteration: 140673
loss: 1.0173012018203735,grad_norm: 0.9999996520347024, iteration: 140674
loss: 1.2624633312225342,grad_norm: 0.99999972676835, iteration: 140675
loss: 1.2149269580841064,grad_norm: 0.9999999415122551, iteration: 140676
loss: 1.039054036140442,grad_norm: 0.9999988976573324, iteration: 140677
loss: 1.1848043203353882,grad_norm: 0.9999993682463388, iteration: 140678
loss: 1.1941882371902466,grad_norm: 1.0000000014754202, iteration: 140679
loss: 1.1820788383483887,grad_norm: 0.9999993979013649, iteration: 140680
loss: 1.1803412437438965,grad_norm: 0.9999993932469784, iteration: 140681
loss: 1.198926568031311,grad_norm: 0.99999956925554, iteration: 140682
loss: 1.2723586559295654,grad_norm: 0.9999998896761262, iteration: 140683
loss: 1.1730283498764038,grad_norm: 0.9999998544774867, iteration: 140684
loss: 1.1660919189453125,grad_norm: 0.999999899476028, iteration: 140685
loss: 1.2036069631576538,grad_norm: 0.9999994733736355, iteration: 140686
loss: 1.2335190773010254,grad_norm: 0.9999995277539572, iteration: 140687
loss: 1.202492594718933,grad_norm: 0.999999665888796, iteration: 140688
loss: 1.0945894718170166,grad_norm: 0.9999994699804885, iteration: 140689
loss: 1.2279040813446045,grad_norm: 0.9999997913370798, iteration: 140690
loss: 1.1629716157913208,grad_norm: 0.9999997124943188, iteration: 140691
loss: 1.0338364839553833,grad_norm: 0.9501859950684812, iteration: 140692
loss: 1.1418111324310303,grad_norm: 0.9999998995455436, iteration: 140693
loss: 1.1770542860031128,grad_norm: 0.9999998998371467, iteration: 140694
loss: 1.0600931644439697,grad_norm: 0.9999993885164752, iteration: 140695
loss: 1.075271487236023,grad_norm: 0.9999997202948903, iteration: 140696
loss: 1.070167899131775,grad_norm: 0.9999995104581237, iteration: 140697
loss: 1.212855339050293,grad_norm: 1.0000000554417279, iteration: 140698
loss: 1.1064066886901855,grad_norm: 0.9999999948568231, iteration: 140699
loss: 1.036866545677185,grad_norm: 0.9999996927520524, iteration: 140700
loss: 1.1255594491958618,grad_norm: 0.9999998116511607, iteration: 140701
loss: 1.1058728694915771,grad_norm: 0.999999715180744, iteration: 140702
loss: 1.0884768962860107,grad_norm: 0.999999918398853, iteration: 140703
loss: 1.2675551176071167,grad_norm: 1.0000000216509632, iteration: 140704
loss: 1.2848918437957764,grad_norm: 0.9999998720709614, iteration: 140705
loss: 1.1658776998519897,grad_norm: 0.9999998854013016, iteration: 140706
loss: 0.9947718977928162,grad_norm: 0.9938679091694425, iteration: 140707
loss: 1.210616946220398,grad_norm: 0.9999998450286811, iteration: 140708
loss: 1.1186444759368896,grad_norm: 0.9999994831982215, iteration: 140709
loss: 1.1290948390960693,grad_norm: 0.9999994974257599, iteration: 140710
loss: 1.0894817113876343,grad_norm: 0.9999998579660774, iteration: 140711
loss: 1.1529825925827026,grad_norm: 0.9999994437968531, iteration: 140712
loss: 1.0984188318252563,grad_norm: 0.9999995678852824, iteration: 140713
loss: 1.1262706518173218,grad_norm: 0.9999992499050792, iteration: 140714
loss: 1.3925507068634033,grad_norm: 1.0000000117592094, iteration: 140715
loss: 1.016168475151062,grad_norm: 0.9999994145691894, iteration: 140716
loss: 1.2071985006332397,grad_norm: 0.9999995740101328, iteration: 140717
loss: 1.0351496934890747,grad_norm: 0.9999995408052769, iteration: 140718
loss: 1.2882448434829712,grad_norm: 0.9999996902068277, iteration: 140719
loss: 1.082335352897644,grad_norm: 0.9999997594375438, iteration: 140720
loss: 1.3667285442352295,grad_norm: 0.9999999317450776, iteration: 140721
loss: 1.0642732381820679,grad_norm: 0.9999998630439995, iteration: 140722
loss: 1.0563595294952393,grad_norm: 0.9999999851451811, iteration: 140723
loss: 1.121256947517395,grad_norm: 0.9999998284575808, iteration: 140724
loss: 1.1172456741333008,grad_norm: 0.9999993273455209, iteration: 140725
loss: 1.3320016860961914,grad_norm: 0.9999999932959217, iteration: 140726
loss: 1.1605124473571777,grad_norm: 0.9999995013221836, iteration: 140727
loss: 1.0090444087982178,grad_norm: 0.9999993481066989, iteration: 140728
loss: 1.0861127376556396,grad_norm: 0.9999998201795341, iteration: 140729
loss: 1.1088188886642456,grad_norm: 0.9999996487884766, iteration: 140730
loss: 1.170446515083313,grad_norm: 0.9999991569165244, iteration: 140731
loss: 1.128095269203186,grad_norm: 0.9999994913976964, iteration: 140732
loss: 1.2408167123794556,grad_norm: 0.9999998110646461, iteration: 140733
loss: 1.3451929092407227,grad_norm: 0.9999997514562909, iteration: 140734
loss: 1.1815413236618042,grad_norm: 0.9999995992787625, iteration: 140735
loss: 1.1715515851974487,grad_norm: 0.9999994631559219, iteration: 140736
loss: 1.0982838869094849,grad_norm: 0.9999998833530737, iteration: 140737
loss: 1.3106672763824463,grad_norm: 0.9999997982039879, iteration: 140738
loss: 1.0642610788345337,grad_norm: 0.9999997040998885, iteration: 140739
loss: 1.511794924736023,grad_norm: 0.9999999832201321, iteration: 140740
loss: 1.0942596197128296,grad_norm: 0.9999996965333504, iteration: 140741
loss: 1.1747897863388062,grad_norm: 0.9999997960059286, iteration: 140742
loss: 1.2388997077941895,grad_norm: 0.9999997955448892, iteration: 140743
loss: 1.1017167568206787,grad_norm: 0.9999999165860661, iteration: 140744
loss: 1.099562168121338,grad_norm: 0.9999997037658227, iteration: 140745
loss: 1.0688486099243164,grad_norm: 0.9999995003335056, iteration: 140746
loss: 1.0819995403289795,grad_norm: 0.9999999321631113, iteration: 140747
loss: 1.1207921504974365,grad_norm: 0.9999996460927766, iteration: 140748
loss: 1.0952413082122803,grad_norm: 0.9999997094577123, iteration: 140749
loss: 1.1073769330978394,grad_norm: 0.999999863831943, iteration: 140750
loss: 1.1072945594787598,grad_norm: 0.9999997678201713, iteration: 140751
loss: 1.2197798490524292,grad_norm: 0.9999998207690888, iteration: 140752
loss: 1.0230340957641602,grad_norm: 0.9999990987799898, iteration: 140753
loss: 1.1084822416305542,grad_norm: 0.9999998464110389, iteration: 140754
loss: 1.081777811050415,grad_norm: 0.9999998883399741, iteration: 140755
loss: 1.079156517982483,grad_norm: 0.9999997253578672, iteration: 140756
loss: 1.1465070247650146,grad_norm: 0.9999995623646778, iteration: 140757
loss: 0.9981187582015991,grad_norm: 0.999999306778726, iteration: 140758
loss: 1.1332224607467651,grad_norm: 0.9999995527703803, iteration: 140759
loss: 1.15468168258667,grad_norm: 0.9999999370075361, iteration: 140760
loss: 1.1422287225723267,grad_norm: 0.9999999234468537, iteration: 140761
loss: 1.1014294624328613,grad_norm: 0.9999996405534182, iteration: 140762
loss: 1.1672929525375366,grad_norm: 0.9999995371954024, iteration: 140763
loss: 1.1108149290084839,grad_norm: 0.9999995425215995, iteration: 140764
loss: 0.9923758506774902,grad_norm: 0.9999990710861467, iteration: 140765
loss: 1.2380361557006836,grad_norm: 0.9999996625285975, iteration: 140766
loss: 0.9771507978439331,grad_norm: 0.9999990600677922, iteration: 140767
loss: 1.0749518871307373,grad_norm: 0.9999994709246635, iteration: 140768
loss: 1.0911024808883667,grad_norm: 0.9999994727528638, iteration: 140769
loss: 1.1149749755859375,grad_norm: 0.9999992490266641, iteration: 140770
loss: 1.190144658088684,grad_norm: 0.9999997692040153, iteration: 140771
loss: 1.0379102230072021,grad_norm: 0.8672222659477247, iteration: 140772
loss: 1.028947114944458,grad_norm: 0.9999998418726586, iteration: 140773
loss: 1.1289641857147217,grad_norm: 0.9999994011527182, iteration: 140774
loss: 1.0040700435638428,grad_norm: 0.9999994075918502, iteration: 140775
loss: 1.2163360118865967,grad_norm: 0.9999997964094609, iteration: 140776
loss: 1.0880383253097534,grad_norm: 0.9999992196175794, iteration: 140777
loss: 1.1505872011184692,grad_norm: 0.9999998210909665, iteration: 140778
loss: 1.0991460084915161,grad_norm: 0.9999996925634104, iteration: 140779
loss: 1.2068718671798706,grad_norm: 1.000000024860695, iteration: 140780
loss: 1.0820612907409668,grad_norm: 0.9999996781045537, iteration: 140781
loss: 1.027113914489746,grad_norm: 0.9999994956818422, iteration: 140782
loss: 1.1891391277313232,grad_norm: 0.9999997647569303, iteration: 140783
loss: 1.0735886096954346,grad_norm: 0.9999997814920346, iteration: 140784
loss: 1.207036018371582,grad_norm: 1.0000000351891618, iteration: 140785
loss: 1.0488933324813843,grad_norm: 0.9999997106427563, iteration: 140786
loss: 1.0436683893203735,grad_norm: 0.9999993791589613, iteration: 140787
loss: 1.3468046188354492,grad_norm: 1.0000000059297607, iteration: 140788
loss: 1.1192047595977783,grad_norm: 1.0000000285432384, iteration: 140789
loss: 1.2996240854263306,grad_norm: 0.9999997813527167, iteration: 140790
loss: 1.1046804189682007,grad_norm: 0.9999992274783456, iteration: 140791
loss: 1.2328294515609741,grad_norm: 0.9999994241430586, iteration: 140792
loss: 1.0725477933883667,grad_norm: 0.9999993580007488, iteration: 140793
loss: 1.0490291118621826,grad_norm: 0.9999998515603257, iteration: 140794
loss: 1.08596670627594,grad_norm: 1.0000000224714287, iteration: 140795
loss: 1.1578940153121948,grad_norm: 0.9999996552121863, iteration: 140796
loss: 1.1615606546401978,grad_norm: 0.9999994104255919, iteration: 140797
loss: 1.203830361366272,grad_norm: 0.9999999044906901, iteration: 140798
loss: 1.0040907859802246,grad_norm: 0.8833940240921724, iteration: 140799
loss: 1.3987137079238892,grad_norm: 0.9999998923740511, iteration: 140800
loss: 1.183496117591858,grad_norm: 0.9999998626337733, iteration: 140801
loss: 1.2450602054595947,grad_norm: 1.0000000391205766, iteration: 140802
loss: 1.093416690826416,grad_norm: 0.99999973225719, iteration: 140803
loss: 1.3311642408370972,grad_norm: 0.9999997556295971, iteration: 140804
loss: 1.1794933080673218,grad_norm: 0.999999466194608, iteration: 140805
loss: 1.21981680393219,grad_norm: 0.9999997195545067, iteration: 140806
loss: 1.1123077869415283,grad_norm: 0.9999998937111361, iteration: 140807
loss: 1.1923576593399048,grad_norm: 0.9999998049915757, iteration: 140808
loss: 1.0506738424301147,grad_norm: 0.8337200042899904, iteration: 140809
loss: 1.044765830039978,grad_norm: 0.9999996973784463, iteration: 140810
loss: 1.1029266119003296,grad_norm: 0.999999389663725, iteration: 140811
loss: 1.1374529600143433,grad_norm: 1.000000060331272, iteration: 140812
loss: 1.0456904172897339,grad_norm: 0.9999993567329674, iteration: 140813
loss: 1.116880178451538,grad_norm: 0.9999998953759147, iteration: 140814
loss: 1.1312004327774048,grad_norm: 0.9999993500071065, iteration: 140815
loss: 1.064200758934021,grad_norm: 0.9999997951482397, iteration: 140816
loss: 1.0335427522659302,grad_norm: 0.9999991062049536, iteration: 140817
loss: 1.0019785165786743,grad_norm: 0.9004059241689566, iteration: 140818
loss: 1.2510265111923218,grad_norm: 0.9999999117949171, iteration: 140819
loss: 1.1084109544754028,grad_norm: 0.9999998165119671, iteration: 140820
loss: 1.3515541553497314,grad_norm: 1.000000063681719, iteration: 140821
loss: 1.03030264377594,grad_norm: 0.9999998247968077, iteration: 140822
loss: 1.1580380201339722,grad_norm: 0.9999996579595454, iteration: 140823
loss: 1.0275485515594482,grad_norm: 0.9999992631786382, iteration: 140824
loss: 1.1599702835083008,grad_norm: 0.9999999388636593, iteration: 140825
loss: 1.1327197551727295,grad_norm: 0.9999995500233558, iteration: 140826
loss: 1.039837121963501,grad_norm: 0.999999380316048, iteration: 140827
loss: 1.1284055709838867,grad_norm: 0.9999997563907354, iteration: 140828
loss: 1.1421282291412354,grad_norm: 0.9999999749687878, iteration: 140829
loss: 1.0853056907653809,grad_norm: 0.9999998737843337, iteration: 140830
loss: 1.029176115989685,grad_norm: 0.9999997961170637, iteration: 140831
loss: 1.1883436441421509,grad_norm: 0.9999999669169058, iteration: 140832
loss: 1.2078900337219238,grad_norm: 0.999999716854547, iteration: 140833
loss: 1.1711288690567017,grad_norm: 0.9999995858921832, iteration: 140834
loss: 1.2286711931228638,grad_norm: 0.9999998233698066, iteration: 140835
loss: 1.106361985206604,grad_norm: 0.9999997627028514, iteration: 140836
loss: 1.062159538269043,grad_norm: 0.9999991884387415, iteration: 140837
loss: 1.0951050519943237,grad_norm: 0.9999995917665062, iteration: 140838
loss: 1.0659595727920532,grad_norm: 0.9999997626808808, iteration: 140839
loss: 1.0461194515228271,grad_norm: 0.9999997701143991, iteration: 140840
loss: 1.142405390739441,grad_norm: 0.9999996662858307, iteration: 140841
loss: 1.078755259513855,grad_norm: 0.9999992891669809, iteration: 140842
loss: 1.023215889930725,grad_norm: 0.9220150900296673, iteration: 140843
loss: 1.0708348751068115,grad_norm: 0.9999998793393106, iteration: 140844
loss: 1.1248748302459717,grad_norm: 0.9999997928960781, iteration: 140845
loss: 1.032777190208435,grad_norm: 0.999999913160743, iteration: 140846
loss: 1.1569268703460693,grad_norm: 0.9999998617649448, iteration: 140847
loss: 1.1645746231079102,grad_norm: 0.999999541304531, iteration: 140848
loss: 1.0308562517166138,grad_norm: 0.9999997317139141, iteration: 140849
loss: 1.1117609739303589,grad_norm: 0.9999993029747418, iteration: 140850
loss: 1.1645441055297852,grad_norm: 0.999999917468098, iteration: 140851
loss: 1.115462064743042,grad_norm: 0.9999997131535382, iteration: 140852
loss: 1.0161514282226562,grad_norm: 0.9999992097444518, iteration: 140853
loss: 1.2031381130218506,grad_norm: 0.9999998376284283, iteration: 140854
loss: 1.0690816640853882,grad_norm: 0.9999995275512282, iteration: 140855
loss: 1.0470983982086182,grad_norm: 0.9999991963340926, iteration: 140856
loss: 1.3392369747161865,grad_norm: 0.9999997775152405, iteration: 140857
loss: 1.3939884901046753,grad_norm: 0.9999997868531513, iteration: 140858
loss: 1.2147444486618042,grad_norm: 0.9999998761334449, iteration: 140859
loss: 1.1713021993637085,grad_norm: 0.9999999537215448, iteration: 140860
loss: 1.168026089668274,grad_norm: 0.999999981596989, iteration: 140861
loss: 1.3440346717834473,grad_norm: 0.999999943452745, iteration: 140862
loss: 1.3574012517929077,grad_norm: 0.9999996727349493, iteration: 140863
loss: 1.1767820119857788,grad_norm: 0.9999997361722188, iteration: 140864
loss: 1.0030897855758667,grad_norm: 0.9999997665477305, iteration: 140865
loss: 1.1647695302963257,grad_norm: 0.9999992959903666, iteration: 140866
loss: 1.075972318649292,grad_norm: 0.9999995712309534, iteration: 140867
loss: 1.1887563467025757,grad_norm: 0.9999999834961149, iteration: 140868
loss: 1.1792045831680298,grad_norm: 0.9999998668203225, iteration: 140869
loss: 0.9872685670852661,grad_norm: 0.964966370579303, iteration: 140870
loss: 1.1088693141937256,grad_norm: 0.9999999104322118, iteration: 140871
loss: 1.0846495628356934,grad_norm: 0.999999732998661, iteration: 140872
loss: 1.1591848134994507,grad_norm: 0.9999998547650979, iteration: 140873
loss: 1.0576893091201782,grad_norm: 1.0000000050945526, iteration: 140874
loss: 1.0920358896255493,grad_norm: 0.9999999996802516, iteration: 140875
loss: 1.144844889640808,grad_norm: 0.9999995713860329, iteration: 140876
loss: 1.3015031814575195,grad_norm: 0.999999573267379, iteration: 140877
loss: 1.2259745597839355,grad_norm: 0.9999999815988784, iteration: 140878
loss: 1.2081166505813599,grad_norm: 0.9999997021647078, iteration: 140879
loss: 1.0381810665130615,grad_norm: 0.9999997696236732, iteration: 140880
loss: 1.0538289546966553,grad_norm: 0.9999995368678852, iteration: 140881
loss: 1.1040114164352417,grad_norm: 0.9999992902360553, iteration: 140882
loss: 1.0968854427337646,grad_norm: 0.9999998623596013, iteration: 140883
loss: 1.0592349767684937,grad_norm: 0.9999995819312749, iteration: 140884
loss: 1.3136857748031616,grad_norm: 0.9999997564418885, iteration: 140885
loss: 1.153141975402832,grad_norm: 0.9999999292005656, iteration: 140886
loss: 1.133388638496399,grad_norm: 0.9999997072150378, iteration: 140887
loss: 1.1114342212677002,grad_norm: 1.0000000589113458, iteration: 140888
loss: 1.2742584943771362,grad_norm: 0.9999999052648337, iteration: 140889
loss: 1.1696192026138306,grad_norm: 0.9999996109971457, iteration: 140890
loss: 1.0691845417022705,grad_norm: 0.9999998216746753, iteration: 140891
loss: 1.0529879331588745,grad_norm: 0.999999385771766, iteration: 140892
loss: 1.0986566543579102,grad_norm: 0.9999997895363549, iteration: 140893
loss: 1.1529486179351807,grad_norm: 0.9999999524794944, iteration: 140894
loss: 1.1480920314788818,grad_norm: 0.9999996560801505, iteration: 140895
loss: 1.1277090311050415,grad_norm: 0.9999997773514608, iteration: 140896
loss: 1.0193606615066528,grad_norm: 0.999999493495409, iteration: 140897
loss: 1.0453474521636963,grad_norm: 0.8913367909661178, iteration: 140898
loss: 1.1785105466842651,grad_norm: 0.9999994779195611, iteration: 140899
loss: 1.1761646270751953,grad_norm: 0.9999999007199712, iteration: 140900
loss: 1.2000384330749512,grad_norm: 0.9999999048429061, iteration: 140901
loss: 1.0787663459777832,grad_norm: 0.9999995516519768, iteration: 140902
loss: 1.1400275230407715,grad_norm: 0.999999529081857, iteration: 140903
loss: 1.058244228363037,grad_norm: 0.9999998185202332, iteration: 140904
loss: 1.1489909887313843,grad_norm: 0.9999999370352731, iteration: 140905
loss: 1.1170909404754639,grad_norm: 1.0000000328394367, iteration: 140906
loss: 1.1323025226593018,grad_norm: 0.9999995568567215, iteration: 140907
loss: 1.2271313667297363,grad_norm: 0.9999999658548855, iteration: 140908
loss: 1.0263617038726807,grad_norm: 0.9999995966258935, iteration: 140909
loss: 1.2496203184127808,grad_norm: 0.9999997073928102, iteration: 140910
loss: 1.076244831085205,grad_norm: 0.9999998499446435, iteration: 140911
loss: 1.3481640815734863,grad_norm: 0.9999998548037977, iteration: 140912
loss: 1.1676069498062134,grad_norm: 0.9999997598579684, iteration: 140913
loss: 1.076932430267334,grad_norm: 0.9999992726804992, iteration: 140914
loss: 1.1162443161010742,grad_norm: 0.9999991306386129, iteration: 140915
loss: 1.0643900632858276,grad_norm: 0.9999994494841105, iteration: 140916
loss: 1.2356451749801636,grad_norm: 0.999999933451231, iteration: 140917
loss: 1.2370502948760986,grad_norm: 0.9999998229075489, iteration: 140918
loss: 1.0462541580200195,grad_norm: 0.9733093543151895, iteration: 140919
loss: 1.0318189859390259,grad_norm: 0.9999995526532885, iteration: 140920
loss: 1.1255722045898438,grad_norm: 0.9999997901323651, iteration: 140921
loss: 1.4543161392211914,grad_norm: 1.0000000504140025, iteration: 140922
loss: 1.1191850900650024,grad_norm: 0.9999997960857273, iteration: 140923
loss: 1.1097997426986694,grad_norm: 0.9999994347734915, iteration: 140924
loss: 1.1226986646652222,grad_norm: 0.9999997444939426, iteration: 140925
loss: 1.090794563293457,grad_norm: 0.9999998817241758, iteration: 140926
loss: 1.200263500213623,grad_norm: 0.9999998567165639, iteration: 140927
loss: 1.1067010164260864,grad_norm: 0.9999997676320956, iteration: 140928
loss: 0.9995548725128174,grad_norm: 0.9999998450159694, iteration: 140929
loss: 1.1006048917770386,grad_norm: 0.9999992442888546, iteration: 140930
loss: 1.0357277393341064,grad_norm: 0.9999992906123891, iteration: 140931
loss: 1.0172631740570068,grad_norm: 0.9999996273042693, iteration: 140932
loss: 1.201554775238037,grad_norm: 0.9999996663359829, iteration: 140933
loss: 1.0183476209640503,grad_norm: 0.9999999002952069, iteration: 140934
loss: 1.0914942026138306,grad_norm: 0.9999999157897097, iteration: 140935
loss: 1.077317714691162,grad_norm: 0.9999998295689368, iteration: 140936
loss: 1.136918067932129,grad_norm: 0.9999999104656254, iteration: 140937
loss: 1.1308156251907349,grad_norm: 0.9999998484981658, iteration: 140938
loss: 1.0558608770370483,grad_norm: 0.9999994074746689, iteration: 140939
loss: 1.0595654249191284,grad_norm: 0.9999991027329485, iteration: 140940
loss: 1.029057264328003,grad_norm: 0.9999997092363628, iteration: 140941
loss: 1.142232060432434,grad_norm: 0.9999998760553247, iteration: 140942
loss: 1.0986878871917725,grad_norm: 0.9999996749407603, iteration: 140943
loss: 1.0984491109848022,grad_norm: 0.9999993180736261, iteration: 140944
loss: 1.124153733253479,grad_norm: 0.9999998807879973, iteration: 140945
loss: 1.0953469276428223,grad_norm: 0.9999998415014545, iteration: 140946
loss: 1.184938907623291,grad_norm: 0.9999999696798434, iteration: 140947
loss: 1.1798535585403442,grad_norm: 0.9999995638483281, iteration: 140948
loss: 1.0512537956237793,grad_norm: 0.9999990649674609, iteration: 140949
loss: 1.084939956665039,grad_norm: 0.9999999191548908, iteration: 140950
loss: 1.40590500831604,grad_norm: 1.000000057118517, iteration: 140951
loss: 0.9904870390892029,grad_norm: 0.9999991454899493, iteration: 140952
loss: 1.1303006410598755,grad_norm: 0.9999990569999685, iteration: 140953
loss: 1.181939721107483,grad_norm: 0.9999997313357438, iteration: 140954
loss: 1.094976782798767,grad_norm: 0.9999998638833469, iteration: 140955
loss: 1.0231008529663086,grad_norm: 0.9999995481672255, iteration: 140956
loss: 1.3641607761383057,grad_norm: 1.0000000238773699, iteration: 140957
loss: 1.142340898513794,grad_norm: 0.9999994081891288, iteration: 140958
loss: 1.2444761991500854,grad_norm: 1.0000000021562192, iteration: 140959
loss: 1.0412003993988037,grad_norm: 0.9999997610086133, iteration: 140960
loss: 1.2979844808578491,grad_norm: 0.9999998861986189, iteration: 140961
loss: 1.16411292552948,grad_norm: 0.9999995425792956, iteration: 140962
loss: 1.004296898841858,grad_norm: 0.9999995531989629, iteration: 140963
loss: 1.3311278820037842,grad_norm: 1.0000000257717523, iteration: 140964
loss: 1.1407853364944458,grad_norm: 0.9999994249140853, iteration: 140965
loss: 1.0553967952728271,grad_norm: 0.9999991829754538, iteration: 140966
loss: 1.41298246383667,grad_norm: 0.9999998295195676, iteration: 140967
loss: 1.1223466396331787,grad_norm: 0.9999996426700143, iteration: 140968
loss: 1.1609807014465332,grad_norm: 0.9999996382955115, iteration: 140969
loss: 1.037822961807251,grad_norm: 0.9999997701595997, iteration: 140970
loss: 1.0518946647644043,grad_norm: 0.9999996996263312, iteration: 140971
loss: 1.2835906744003296,grad_norm: 0.9999999311979301, iteration: 140972
loss: 1.1305428743362427,grad_norm: 0.999999886281438, iteration: 140973
loss: 1.3167470693588257,grad_norm: 0.9999996701571782, iteration: 140974
loss: 1.1896246671676636,grad_norm: 0.9999995321453853, iteration: 140975
loss: 1.1866815090179443,grad_norm: 0.9999998037152521, iteration: 140976
loss: 1.0232079029083252,grad_norm: 0.9999992220563145, iteration: 140977
loss: 1.2383102178573608,grad_norm: 0.9999997910285894, iteration: 140978
loss: 1.1811749935150146,grad_norm: 0.9999994419144871, iteration: 140979
loss: 1.1749955415725708,grad_norm: 0.999999196991276, iteration: 140980
loss: 1.0815402269363403,grad_norm: 0.9999998924800894, iteration: 140981
loss: 0.9985328912734985,grad_norm: 0.9999998514352225, iteration: 140982
loss: 1.087992548942566,grad_norm: 0.9999998902255518, iteration: 140983
loss: 1.0713251829147339,grad_norm: 0.9999999602043992, iteration: 140984
loss: 0.9668382406234741,grad_norm: 0.9786136166150644, iteration: 140985
loss: 1.060719609260559,grad_norm: 0.9999995073820521, iteration: 140986
loss: 1.0436629056930542,grad_norm: 0.999999147960288, iteration: 140987
loss: 1.1247150897979736,grad_norm: 0.9999990753268254, iteration: 140988
loss: 1.2534703016281128,grad_norm: 0.9999999594033686, iteration: 140989
loss: 1.0769447088241577,grad_norm: 0.9999995910919495, iteration: 140990
loss: 1.057496190071106,grad_norm: 0.9999990062330121, iteration: 140991
loss: 1.3353700637817383,grad_norm: 0.9999999027260951, iteration: 140992
loss: 1.0685025453567505,grad_norm: 0.9999993652200934, iteration: 140993
loss: 1.2256399393081665,grad_norm: 0.9999998181115765, iteration: 140994
loss: 1.1026840209960938,grad_norm: 0.9999994443465434, iteration: 140995
loss: 1.1103190183639526,grad_norm: 0.9999997378846665, iteration: 140996
loss: 1.0886270999908447,grad_norm: 0.9999999546175167, iteration: 140997
loss: 1.0830883979797363,grad_norm: 0.9999994020936591, iteration: 140998
loss: 0.9821084141731262,grad_norm: 0.9956939673485071, iteration: 140999
loss: 1.119011402130127,grad_norm: 0.9999999092288242, iteration: 141000
loss: 1.160873293876648,grad_norm: 0.9999993522574219, iteration: 141001
loss: 1.0345302820205688,grad_norm: 0.9999991104905487, iteration: 141002
loss: 1.1562793254852295,grad_norm: 0.9999996634603583, iteration: 141003
loss: 1.0117958784103394,grad_norm: 0.9999989250653138, iteration: 141004
loss: 1.0130213499069214,grad_norm: 0.9999995702807319, iteration: 141005
loss: 1.1117326021194458,grad_norm: 0.999999151726423, iteration: 141006
loss: 1.1750423908233643,grad_norm: 0.9999994556293321, iteration: 141007
loss: 1.091877818107605,grad_norm: 0.999999812791971, iteration: 141008
loss: 1.020344614982605,grad_norm: 0.8188841316773142, iteration: 141009
loss: 1.0736668109893799,grad_norm: 0.9999995785322725, iteration: 141010
loss: 1.1552748680114746,grad_norm: 0.9999999607707587, iteration: 141011
loss: 1.0398069620132446,grad_norm: 0.9999998007482294, iteration: 141012
loss: 0.9909302592277527,grad_norm: 0.999999571892384, iteration: 141013
loss: 1.1763863563537598,grad_norm: 0.9999997560489933, iteration: 141014
loss: 1.1070544719696045,grad_norm: 0.9999994981984864, iteration: 141015
loss: 1.0677666664123535,grad_norm: 0.9999993307497513, iteration: 141016
loss: 1.0660887956619263,grad_norm: 0.9999994930856473, iteration: 141017
loss: 1.1046372652053833,grad_norm: 0.9999991183606214, iteration: 141018
loss: 1.3106974363327026,grad_norm: 0.9999998876923268, iteration: 141019
loss: 1.1170302629470825,grad_norm: 0.9999991984110264, iteration: 141020
loss: 1.1764509677886963,grad_norm: 0.9999995414827149, iteration: 141021
loss: 1.0525833368301392,grad_norm: 0.999999113604242, iteration: 141022
loss: 1.1884552240371704,grad_norm: 0.9999997512373626, iteration: 141023
loss: 1.1263443231582642,grad_norm: 0.999999622629116, iteration: 141024
loss: 1.1311153173446655,grad_norm: 0.9999999303740542, iteration: 141025
loss: 1.0495864152908325,grad_norm: 0.9999993366068071, iteration: 141026
loss: 1.0273895263671875,grad_norm: 0.8721931220671219, iteration: 141027
loss: 1.0223758220672607,grad_norm: 0.862681241449213, iteration: 141028
loss: 1.0413089990615845,grad_norm: 0.9999990233241937, iteration: 141029
loss: 0.9886184334754944,grad_norm: 0.83170995520231, iteration: 141030
loss: 1.3675988912582397,grad_norm: 0.9999998955046779, iteration: 141031
loss: 1.0843467712402344,grad_norm: 0.9999999603121077, iteration: 141032
loss: 1.1161214113235474,grad_norm: 0.9999993419577705, iteration: 141033
loss: 1.0337568521499634,grad_norm: 0.9999994799697551, iteration: 141034
loss: 1.0830563306808472,grad_norm: 0.9999992111434497, iteration: 141035
loss: 1.0321097373962402,grad_norm: 0.9999996524604401, iteration: 141036
loss: 0.9518786072731018,grad_norm: 0.8289220134209332, iteration: 141037
loss: 1.1436697244644165,grad_norm: 0.9999993383365993, iteration: 141038
loss: 1.0724279880523682,grad_norm: 0.9999994824646233, iteration: 141039
loss: 1.3148728609085083,grad_norm: 0.9999998057009785, iteration: 141040
loss: 1.0297363996505737,grad_norm: 0.9999990944680703, iteration: 141041
loss: 1.0409033298492432,grad_norm: 0.9999995519217351, iteration: 141042
loss: 1.2084107398986816,grad_norm: 0.9999998776405742, iteration: 141043
loss: 1.0251243114471436,grad_norm: 0.9999995913535248, iteration: 141044
loss: 1.1305270195007324,grad_norm: 0.9999994881351879, iteration: 141045
loss: 1.0107812881469727,grad_norm: 0.9999992946935392, iteration: 141046
loss: 1.1668813228607178,grad_norm: 0.9999996283566054, iteration: 141047
loss: 1.0080606937408447,grad_norm: 0.999999273933756, iteration: 141048
loss: 1.0920132398605347,grad_norm: 0.9999997057894396, iteration: 141049
loss: 1.0049101114273071,grad_norm: 0.8697052813839375, iteration: 141050
loss: 1.024481177330017,grad_norm: 0.9999997864244287, iteration: 141051
loss: 1.0408421754837036,grad_norm: 0.9999993732636717, iteration: 141052
loss: 1.233140468597412,grad_norm: 0.9999996096526533, iteration: 141053
loss: 1.0910135507583618,grad_norm: 0.9999998691650748, iteration: 141054
loss: 1.0182795524597168,grad_norm: 0.9999991702625001, iteration: 141055
loss: 1.0690505504608154,grad_norm: 0.9999995408671926, iteration: 141056
loss: 1.1067579984664917,grad_norm: 0.9999997898275474, iteration: 141057
loss: 1.0109069347381592,grad_norm: 0.9999995137237974, iteration: 141058
loss: 1.3502469062805176,grad_norm: 0.9999995388286034, iteration: 141059
loss: 0.9926149249076843,grad_norm: 0.852404594151864, iteration: 141060
loss: 1.1659612655639648,grad_norm: 0.9914992047444949, iteration: 141061
loss: 1.2116644382476807,grad_norm: 0.9999996727728864, iteration: 141062
loss: 0.9643125534057617,grad_norm: 0.9188084434718835, iteration: 141063
loss: 0.9985665082931519,grad_norm: 0.9501503389024403, iteration: 141064
loss: 1.1048128604888916,grad_norm: 0.9999991211135997, iteration: 141065
loss: 1.0084296464920044,grad_norm: 0.930864753284076, iteration: 141066
loss: 1.0960700511932373,grad_norm: 0.9999995753221094, iteration: 141067
loss: 1.0530108213424683,grad_norm: 0.9999995303605418, iteration: 141068
loss: 1.0945028066635132,grad_norm: 0.9999998919413091, iteration: 141069
loss: 1.1660319566726685,grad_norm: 0.9999998852543608, iteration: 141070
loss: 1.049463152885437,grad_norm: 0.9999994824459035, iteration: 141071
loss: 1.1702021360397339,grad_norm: 0.9999996911245896, iteration: 141072
loss: 1.0845240354537964,grad_norm: 0.9530952615418539, iteration: 141073
loss: 1.0771434307098389,grad_norm: 0.9999999096554864, iteration: 141074
loss: 1.2348933219909668,grad_norm: 0.9999997195565595, iteration: 141075
loss: 1.2800480127334595,grad_norm: 0.9999995916711623, iteration: 141076
loss: 0.9862169623374939,grad_norm: 0.9121550977181984, iteration: 141077
loss: 1.0483909845352173,grad_norm: 0.9999999091655022, iteration: 141078
loss: 1.0167858600616455,grad_norm: 0.9359712549144206, iteration: 141079
loss: 1.0609225034713745,grad_norm: 0.9999993007505684, iteration: 141080
loss: 1.0883711576461792,grad_norm: 0.9999994139942389, iteration: 141081
loss: 1.048051118850708,grad_norm: 0.9999993899768259, iteration: 141082
loss: 1.1096643209457397,grad_norm: 0.9999997326554538, iteration: 141083
loss: 1.0641230344772339,grad_norm: 0.9999992891807324, iteration: 141084
loss: 1.0980929136276245,grad_norm: 0.9999995637193547, iteration: 141085
loss: 1.0203933715820312,grad_norm: 0.9210519613460996, iteration: 141086
loss: 1.0913053750991821,grad_norm: 0.9999996147806771, iteration: 141087
loss: 1.297805905342102,grad_norm: 0.9999997216650954, iteration: 141088
loss: 1.3265371322631836,grad_norm: 0.9999998789656134, iteration: 141089
loss: 1.275469422340393,grad_norm: 0.9999999848929838, iteration: 141090
loss: 1.171411156654358,grad_norm: 0.9999993971600302, iteration: 141091
loss: 1.0286179780960083,grad_norm: 0.9433277059508567, iteration: 141092
loss: 1.0116111040115356,grad_norm: 0.9999996790854052, iteration: 141093
loss: 1.117639422416687,grad_norm: 0.999999863317108, iteration: 141094
loss: 1.0583617687225342,grad_norm: 0.9999993230138323, iteration: 141095
loss: 1.1849113702774048,grad_norm: 1.0000000004319363, iteration: 141096
loss: 1.0783143043518066,grad_norm: 0.9999996829870025, iteration: 141097
loss: 1.0718333721160889,grad_norm: 0.9999997404639932, iteration: 141098
loss: 1.1154624223709106,grad_norm: 0.9999994141655656, iteration: 141099
loss: 1.1759876012802124,grad_norm: 0.9999995606733356, iteration: 141100
loss: 1.0471632480621338,grad_norm: 0.9999995102420374, iteration: 141101
loss: 1.0506469011306763,grad_norm: 0.9999998960042431, iteration: 141102
loss: 0.9881964921951294,grad_norm: 0.9999995458533956, iteration: 141103
loss: 1.1174075603485107,grad_norm: 1.0000000233265864, iteration: 141104
loss: 1.0256710052490234,grad_norm: 0.9645537663114968, iteration: 141105
loss: 1.0412826538085938,grad_norm: 0.9999993378969143, iteration: 141106
loss: 1.0788377523422241,grad_norm: 0.9440496753009041, iteration: 141107
loss: 1.008116602897644,grad_norm: 0.9999998660631065, iteration: 141108
loss: 1.1001275777816772,grad_norm: 0.9999998815821515, iteration: 141109
loss: 1.0472644567489624,grad_norm: 0.9999995085286967, iteration: 141110
loss: 1.0983790159225464,grad_norm: 0.9999995558616317, iteration: 141111
loss: 1.035715937614441,grad_norm: 0.9999999468021415, iteration: 141112
loss: 1.069101333618164,grad_norm: 0.9999992589077853, iteration: 141113
loss: 1.205873727798462,grad_norm: 0.9999997742894648, iteration: 141114
loss: 1.010417103767395,grad_norm: 0.999999073363662, iteration: 141115
loss: 1.0053707361221313,grad_norm: 0.9123304989111983, iteration: 141116
loss: 1.041690707206726,grad_norm: 0.9999992465637795, iteration: 141117
loss: 1.021185278892517,grad_norm: 0.9999993004899637, iteration: 141118
loss: 1.0399771928787231,grad_norm: 0.9999996451553272, iteration: 141119
loss: 1.1309821605682373,grad_norm: 0.9999993603916881, iteration: 141120
loss: 1.0551891326904297,grad_norm: 0.9999993514216609, iteration: 141121
loss: 1.1927621364593506,grad_norm: 0.9999999165250699, iteration: 141122
loss: 1.0609064102172852,grad_norm: 0.9999994704611787, iteration: 141123
loss: 1.095687747001648,grad_norm: 0.9999998315409722, iteration: 141124
loss: 1.1743186712265015,grad_norm: 0.9999999002775376, iteration: 141125
loss: 1.266568899154663,grad_norm: 0.9999999005172286, iteration: 141126
loss: 1.2046725749969482,grad_norm: 0.9999994096415855, iteration: 141127
loss: 1.139161229133606,grad_norm: 0.9999993098505252, iteration: 141128
loss: 1.2222497463226318,grad_norm: 0.9999998680460275, iteration: 141129
loss: 1.1045132875442505,grad_norm: 0.9999999151323197, iteration: 141130
loss: 1.139127254486084,grad_norm: 0.9999992319989393, iteration: 141131
loss: 1.043704628944397,grad_norm: 0.9999993281656512, iteration: 141132
loss: 1.2294758558273315,grad_norm: 1.0000000728099547, iteration: 141133
loss: 1.0535482168197632,grad_norm: 0.9999995216025532, iteration: 141134
loss: 1.050881266593933,grad_norm: 0.9045262533796272, iteration: 141135
loss: 0.9695805907249451,grad_norm: 0.9999992869912603, iteration: 141136
loss: 1.046539545059204,grad_norm: 0.9999990988023283, iteration: 141137
loss: 1.038734793663025,grad_norm: 0.9999997329769409, iteration: 141138
loss: 1.223275899887085,grad_norm: 0.9999995154891212, iteration: 141139
loss: 1.0022143125534058,grad_norm: 0.8841777362081417, iteration: 141140
loss: 1.0731391906738281,grad_norm: 0.9999992014301539, iteration: 141141
loss: 1.1132234334945679,grad_norm: 0.9999996864788464, iteration: 141142
loss: 1.123796820640564,grad_norm: 0.9999995567492516, iteration: 141143
loss: 1.098067045211792,grad_norm: 0.9999994576700393, iteration: 141144
loss: 1.1020889282226562,grad_norm: 0.9999997197497471, iteration: 141145
loss: 1.2268805503845215,grad_norm: 0.999999851858411, iteration: 141146
loss: 0.9989950656890869,grad_norm: 0.9999992532245797, iteration: 141147
loss: 1.0901535749435425,grad_norm: 0.9999999047173911, iteration: 141148
loss: 0.9752479195594788,grad_norm: 0.9093516911728984, iteration: 141149
loss: 1.0137016773223877,grad_norm: 0.9999993667255068, iteration: 141150
loss: 1.0172499418258667,grad_norm: 0.9999992615682238, iteration: 141151
loss: 1.0619542598724365,grad_norm: 0.9999994799119083, iteration: 141152
loss: 0.9861742854118347,grad_norm: 0.9999991367995957, iteration: 141153
loss: 1.0745759010314941,grad_norm: 0.9026661160967325, iteration: 141154
loss: 1.0340420007705688,grad_norm: 0.9999999499423179, iteration: 141155
loss: 1.045992374420166,grad_norm: 0.9999993140335066, iteration: 141156
loss: 1.0820887088775635,grad_norm: 0.9999996076458353, iteration: 141157
loss: 1.0512937307357788,grad_norm: 0.9999990957817022, iteration: 141158
loss: 1.0821163654327393,grad_norm: 0.9999998057559939, iteration: 141159
loss: 1.065759301185608,grad_norm: 0.9999998895873037, iteration: 141160
loss: 0.982552170753479,grad_norm: 0.8352575780075918, iteration: 141161
loss: 1.0790618658065796,grad_norm: 0.9999998984736308, iteration: 141162
loss: 1.1583483219146729,grad_norm: 0.9999997316220597, iteration: 141163
loss: 1.10014808177948,grad_norm: 0.9999999031327046, iteration: 141164
loss: 1.3050518035888672,grad_norm: 0.9999999637100914, iteration: 141165
loss: 0.9728421568870544,grad_norm: 0.9999992843494461, iteration: 141166
loss: 1.0168533325195312,grad_norm: 0.9999988975049399, iteration: 141167
loss: 1.0350486040115356,grad_norm: 0.9999999381927852, iteration: 141168
loss: 1.5496947765350342,grad_norm: 0.9999999217817963, iteration: 141169
loss: 1.0322821140289307,grad_norm: 0.9999993669339102, iteration: 141170
loss: 1.1748310327529907,grad_norm: 0.9999994348293904, iteration: 141171
loss: 0.9677694439888,grad_norm: 0.8384683002427947, iteration: 141172
loss: 1.3280614614486694,grad_norm: 0.9999997674563763, iteration: 141173
loss: 1.1674944162368774,grad_norm: 0.9999992652750436, iteration: 141174
loss: 1.0980958938598633,grad_norm: 0.9999997305907423, iteration: 141175
loss: 1.221889853477478,grad_norm: 0.9999998028359754, iteration: 141176
loss: 1.0792056322097778,grad_norm: 0.9999992598114863, iteration: 141177
loss: 1.0971673727035522,grad_norm: 0.9999995879654053, iteration: 141178
loss: 1.2240760326385498,grad_norm: 0.9999998785767066, iteration: 141179
loss: 1.071480631828308,grad_norm: 0.9999999131584448, iteration: 141180
loss: 1.2335330247879028,grad_norm: 0.9999999151088914, iteration: 141181
loss: 1.1922001838684082,grad_norm: 0.9999999150844032, iteration: 141182
loss: 1.065077543258667,grad_norm: 0.9999992860897324, iteration: 141183
loss: 1.249057412147522,grad_norm: 0.9999998125778653, iteration: 141184
loss: 1.210121512413025,grad_norm: 0.9999998694358982, iteration: 141185
loss: 1.2039287090301514,grad_norm: 0.9999996957334258, iteration: 141186
loss: 1.1032975912094116,grad_norm: 0.9999998693600304, iteration: 141187
loss: 1.3562190532684326,grad_norm: 0.9999995175068432, iteration: 141188
loss: 1.2392849922180176,grad_norm: 0.999999931807119, iteration: 141189
loss: 1.1237422227859497,grad_norm: 0.9999997677926844, iteration: 141190
loss: 1.0032517910003662,grad_norm: 0.9999999867884611, iteration: 141191
loss: 1.214686632156372,grad_norm: 0.9999998040735293, iteration: 141192
loss: 1.1372562646865845,grad_norm: 0.9999993427117639, iteration: 141193
loss: 1.138547420501709,grad_norm: 0.999999251237904, iteration: 141194
loss: 1.0493265390396118,grad_norm: 0.9999992967687078, iteration: 141195
loss: 1.1411668062210083,grad_norm: 1.000000062723098, iteration: 141196
loss: 1.099287748336792,grad_norm: 0.9999998596985005, iteration: 141197
loss: 1.0741593837738037,grad_norm: 0.9999994518020255, iteration: 141198
loss: 1.0139737129211426,grad_norm: 0.9999998116942959, iteration: 141199
loss: 1.21406090259552,grad_norm: 0.9999997065962829, iteration: 141200
loss: 0.9706053137779236,grad_norm: 0.9999999577865754, iteration: 141201
loss: 1.1780569553375244,grad_norm: 0.9999996449465625, iteration: 141202
loss: 1.272903561592102,grad_norm: 0.9999999497680818, iteration: 141203
loss: 1.1215035915374756,grad_norm: 0.9999995284314659, iteration: 141204
loss: 1.0482205152511597,grad_norm: 0.9999997081529801, iteration: 141205
loss: 1.135031819343567,grad_norm: 0.9999997469473088, iteration: 141206
loss: 1.1159945726394653,grad_norm: 0.9999999286137988, iteration: 141207
loss: 1.1786381006240845,grad_norm: 0.9999999775123752, iteration: 141208
loss: 1.1707777976989746,grad_norm: 0.9999994477866104, iteration: 141209
loss: 0.9906821250915527,grad_norm: 0.9685624801922988, iteration: 141210
loss: 1.1107922792434692,grad_norm: 0.9999991735508982, iteration: 141211
loss: 1.0266886949539185,grad_norm: 0.999999186633996, iteration: 141212
loss: 1.1296190023422241,grad_norm: 0.9999996452373612, iteration: 141213
loss: 1.0794042348861694,grad_norm: 0.9999993125883703, iteration: 141214
loss: 1.1703919172286987,grad_norm: 0.9999995886117697, iteration: 141215
loss: 1.1690548658370972,grad_norm: 0.9999998127858717, iteration: 141216
loss: 1.0716149806976318,grad_norm: 0.8692998996377931, iteration: 141217
loss: 1.039102554321289,grad_norm: 0.9999994840392644, iteration: 141218
loss: 0.9999780654907227,grad_norm: 0.9999995824186468, iteration: 141219
loss: 1.0949621200561523,grad_norm: 0.9999997355972602, iteration: 141220
loss: 1.106215000152588,grad_norm: 0.9999998907282132, iteration: 141221
loss: 1.044228196144104,grad_norm: 0.9999993133452907, iteration: 141222
loss: 1.0750025510787964,grad_norm: 0.999999722440789, iteration: 141223
loss: 1.1173999309539795,grad_norm: 0.9999998978771866, iteration: 141224
loss: 1.0551568269729614,grad_norm: 0.9999997401233611, iteration: 141225
loss: 1.1201173067092896,grad_norm: 0.9999999084526046, iteration: 141226
loss: 1.097205400466919,grad_norm: 0.9999999357315751, iteration: 141227
loss: 1.1375161409378052,grad_norm: 0.9999997339788906, iteration: 141228
loss: 1.1043404340744019,grad_norm: 0.9999996163441484, iteration: 141229
loss: 1.220402479171753,grad_norm: 0.9999998756021005, iteration: 141230
loss: 1.1992062330245972,grad_norm: 0.9999999438585204, iteration: 141231
loss: 1.2514698505401611,grad_norm: 0.9999998371145978, iteration: 141232
loss: 1.046064853668213,grad_norm: 0.999999130573394, iteration: 141233
loss: 1.0992274284362793,grad_norm: 0.9999998955933771, iteration: 141234
loss: 1.0405012369155884,grad_norm: 0.9999991091569298, iteration: 141235
loss: 1.1198511123657227,grad_norm: 0.9999996405402153, iteration: 141236
loss: 1.0162818431854248,grad_norm: 0.9999994350715258, iteration: 141237
loss: 1.2242766618728638,grad_norm: 0.9999996198630919, iteration: 141238
loss: 1.120449423789978,grad_norm: 0.9999989599209015, iteration: 141239
loss: 1.1136384010314941,grad_norm: 0.999999974641734, iteration: 141240
loss: 1.087161660194397,grad_norm: 0.9999997700938247, iteration: 141241
loss: 1.005910873413086,grad_norm: 0.8501888429334679, iteration: 141242
loss: 1.0437785387039185,grad_norm: 0.9999996941612783, iteration: 141243
loss: 1.0804733037948608,grad_norm: 0.9999997325486167, iteration: 141244
loss: 1.0328346490859985,grad_norm: 0.9999992022092704, iteration: 141245
loss: 1.1422255039215088,grad_norm: 0.9999993716593998, iteration: 141246
loss: 1.1603703498840332,grad_norm: 0.9999999114672817, iteration: 141247
loss: 1.0415596961975098,grad_norm: 0.9999995843530504, iteration: 141248
loss: 1.0819005966186523,grad_norm: 0.9999994098747563, iteration: 141249
loss: 1.3006607294082642,grad_norm: 0.9999999014816513, iteration: 141250
loss: 1.1064190864562988,grad_norm: 0.9999993095444973, iteration: 141251
loss: 1.0143992900848389,grad_norm: 0.9999994285312932, iteration: 141252
loss: 1.1844611167907715,grad_norm: 0.9999999659589315, iteration: 141253
loss: 1.1282638311386108,grad_norm: 0.9999994143627099, iteration: 141254
loss: 1.029552698135376,grad_norm: 0.9999999868464854, iteration: 141255
loss: 1.0007555484771729,grad_norm: 0.9999991756437769, iteration: 141256
loss: 1.0099669694900513,grad_norm: 0.9081034273865641, iteration: 141257
loss: 1.21223783493042,grad_norm: 0.9999998537704583, iteration: 141258
loss: 1.0390450954437256,grad_norm: 0.9417586981799002, iteration: 141259
loss: 1.1018421649932861,grad_norm: 0.9999997826886738, iteration: 141260
loss: 1.0625613927841187,grad_norm: 0.9999993088743944, iteration: 141261
loss: 0.9674468040466309,grad_norm: 0.9999998453703942, iteration: 141262
loss: 1.2684134244918823,grad_norm: 0.9999999010310509, iteration: 141263
loss: 1.0257357358932495,grad_norm: 0.8590409565284136, iteration: 141264
loss: 1.0716058015823364,grad_norm: 0.9999997848868509, iteration: 141265
loss: 1.0237557888031006,grad_norm: 0.929297963367294, iteration: 141266
loss: 0.9839625954627991,grad_norm: 0.9999993559642333, iteration: 141267
loss: 1.102405309677124,grad_norm: 0.9999994215452713, iteration: 141268
loss: 1.0433447360992432,grad_norm: 0.9999996973839475, iteration: 141269
loss: 1.0498853921890259,grad_norm: 0.9999995455353592, iteration: 141270
loss: 1.145497441291809,grad_norm: 0.9999998155940777, iteration: 141271
loss: 1.0437604188919067,grad_norm: 0.9999993130161933, iteration: 141272
loss: 1.0927846431732178,grad_norm: 0.9999989823538452, iteration: 141273
loss: 1.1188960075378418,grad_norm: 0.9999999564709176, iteration: 141274
loss: 1.0168284177780151,grad_norm: 0.9999993456280347, iteration: 141275
loss: 1.1239876747131348,grad_norm: 0.9999999120178396, iteration: 141276
loss: 1.1642777919769287,grad_norm: 0.9999997185170102, iteration: 141277
loss: 0.9881961941719055,grad_norm: 0.905986578897708, iteration: 141278
loss: 1.1283982992172241,grad_norm: 0.9999999554213412, iteration: 141279
loss: 1.1003466844558716,grad_norm: 0.9999996938150789, iteration: 141280
loss: 1.258760690689087,grad_norm: 0.9999996973472965, iteration: 141281
loss: 1.0332638025283813,grad_norm: 0.9999994666642348, iteration: 141282
loss: 1.0336064100265503,grad_norm: 0.9999996226208949, iteration: 141283
loss: 1.1072757244110107,grad_norm: 0.9999996846156326, iteration: 141284
loss: 1.051893711090088,grad_norm: 0.999999773926787, iteration: 141285
loss: 1.0908313989639282,grad_norm: 0.9999998955008856, iteration: 141286
loss: 1.0151455402374268,grad_norm: 0.9999993639051237, iteration: 141287
loss: 1.153467059135437,grad_norm: 0.9999998288402555, iteration: 141288
loss: 1.0836492776870728,grad_norm: 0.9999997935538053, iteration: 141289
loss: 1.0432928800582886,grad_norm: 1.0000000168411758, iteration: 141290
loss: 1.0033836364746094,grad_norm: 0.9999995911699122, iteration: 141291
loss: 1.1145961284637451,grad_norm: 0.9999998853714304, iteration: 141292
loss: 1.0341777801513672,grad_norm: 0.9999990109690459, iteration: 141293
loss: 1.1577736139297485,grad_norm: 0.9999991935056172, iteration: 141294
loss: 1.0873241424560547,grad_norm: 0.9999992335668743, iteration: 141295
loss: 1.105697512626648,grad_norm: 0.9999999430874298, iteration: 141296
loss: 1.0606601238250732,grad_norm: 0.9999991139997844, iteration: 141297
loss: 1.0722826719284058,grad_norm: 0.9999993833203361, iteration: 141298
loss: 1.0570310354232788,grad_norm: 0.9999995834028084, iteration: 141299
loss: 1.129980444908142,grad_norm: 0.9999995730914328, iteration: 141300
loss: 1.047675609588623,grad_norm: 0.9999997269005173, iteration: 141301
loss: 1.0881130695343018,grad_norm: 0.9999992528301266, iteration: 141302
loss: 1.071066975593567,grad_norm: 0.9999991510505697, iteration: 141303
loss: 1.1440645456314087,grad_norm: 1.0000000205554895, iteration: 141304
loss: 0.9824891686439514,grad_norm: 0.951899497195855, iteration: 141305
loss: 1.1043137311935425,grad_norm: 0.9999997434467116, iteration: 141306
loss: 1.066444993019104,grad_norm: 0.9999993901568802, iteration: 141307
loss: 1.0468096733093262,grad_norm: 0.9999994581999947, iteration: 141308
loss: 1.0415301322937012,grad_norm: 0.9999992281076338, iteration: 141309
loss: 0.9917882680892944,grad_norm: 0.9999992680648528, iteration: 141310
loss: 1.0003985166549683,grad_norm: 0.9982913027105211, iteration: 141311
loss: 1.1346464157104492,grad_norm: 0.9999998748663264, iteration: 141312
loss: 1.0944240093231201,grad_norm: 0.9999994535036154, iteration: 141313
loss: 1.2745708227157593,grad_norm: 0.9999994759900265, iteration: 141314
loss: 1.0433799028396606,grad_norm: 0.9999992263230327, iteration: 141315
loss: 1.1767492294311523,grad_norm: 0.9999993147020022, iteration: 141316
loss: 1.0589790344238281,grad_norm: 0.9999996473097383, iteration: 141317
loss: 1.3729051351547241,grad_norm: 0.9999994815552884, iteration: 141318
loss: 1.121163249015808,grad_norm: 0.9999993546818936, iteration: 141319
loss: 1.1587947607040405,grad_norm: 0.9999992006098043, iteration: 141320
loss: 1.1158860921859741,grad_norm: 0.9999995166597763, iteration: 141321
loss: 1.4203557968139648,grad_norm: 0.9999998010450218, iteration: 141322
loss: 1.0924628973007202,grad_norm: 0.9999994975178865, iteration: 141323
loss: 1.0425916910171509,grad_norm: 0.9600583950927034, iteration: 141324
loss: 1.1665371656417847,grad_norm: 0.9999997781377679, iteration: 141325
loss: 1.0684725046157837,grad_norm: 0.999999304297349, iteration: 141326
loss: 1.01491379737854,grad_norm: 0.8805342486419245, iteration: 141327
loss: 1.1419048309326172,grad_norm: 0.9999998821794489, iteration: 141328
loss: 1.1378062963485718,grad_norm: 0.9999993101192726, iteration: 141329
loss: 1.2589125633239746,grad_norm: 0.999999903246733, iteration: 141330
loss: 1.009006142616272,grad_norm: 0.9212026822283145, iteration: 141331
loss: 1.1532456874847412,grad_norm: 0.99999933657675, iteration: 141332
loss: 1.0246312618255615,grad_norm: 0.9607644394417477, iteration: 141333
loss: 1.1861302852630615,grad_norm: 0.9999993294899793, iteration: 141334
loss: 1.0671288967132568,grad_norm: 0.9999994881398987, iteration: 141335
loss: 1.2596924304962158,grad_norm: 0.999999992194051, iteration: 141336
loss: 1.3858699798583984,grad_norm: 0.999999709804757, iteration: 141337
loss: 1.2436683177947998,grad_norm: 1.0000000380845995, iteration: 141338
loss: 1.0942331552505493,grad_norm: 0.9999993377465103, iteration: 141339
loss: 1.0764333009719849,grad_norm: 0.9999993456885343, iteration: 141340
loss: 1.0359300374984741,grad_norm: 0.926570286598818, iteration: 141341
loss: 1.171431303024292,grad_norm: 0.9999994334962197, iteration: 141342
loss: 1.0881938934326172,grad_norm: 0.9999998669967555, iteration: 141343
loss: 1.2532954216003418,grad_norm: 0.9999998883104135, iteration: 141344
loss: 1.0227128267288208,grad_norm: 0.999999984325786, iteration: 141345
loss: 1.0746657848358154,grad_norm: 0.9999999314918512, iteration: 141346
loss: 1.1694217920303345,grad_norm: 0.9999993236978113, iteration: 141347
loss: 1.0512640476226807,grad_norm: 0.9999994024463054, iteration: 141348
loss: 0.9808927774429321,grad_norm: 0.9999998967564072, iteration: 141349
loss: 1.2672014236450195,grad_norm: 0.9999999413049446, iteration: 141350
loss: 1.0498367547988892,grad_norm: 0.9999999266706802, iteration: 141351
loss: 1.061221957206726,grad_norm: 0.9999992220016666, iteration: 141352
loss: 1.032046914100647,grad_norm: 0.95149653849493, iteration: 141353
loss: 1.1073627471923828,grad_norm: 0.9999993441875519, iteration: 141354
loss: 1.1252745389938354,grad_norm: 0.9999995966016777, iteration: 141355
loss: 1.1947883367538452,grad_norm: 0.9999995233461861, iteration: 141356
loss: 1.0332133769989014,grad_norm: 0.9999994847335334, iteration: 141357
loss: 1.1096857786178589,grad_norm: 0.9999996042342597, iteration: 141358
loss: 1.1214561462402344,grad_norm: 0.9999997985433585, iteration: 141359
loss: 1.0244896411895752,grad_norm: 0.9866043332107902, iteration: 141360
loss: 1.1153312921524048,grad_norm: 0.9999996860671784, iteration: 141361
loss: 1.0343166589736938,grad_norm: 0.9999996259551741, iteration: 141362
loss: 1.036973476409912,grad_norm: 0.9999994586675142, iteration: 141363
loss: 1.2247849702835083,grad_norm: 0.9999995580025375, iteration: 141364
loss: 1.1535521745681763,grad_norm: 0.9999998777897979, iteration: 141365
loss: 1.1733887195587158,grad_norm: 0.9999994248134606, iteration: 141366
loss: 1.1567888259887695,grad_norm: 0.9999992927874877, iteration: 141367
loss: 1.0836375951766968,grad_norm: 0.9999998952686195, iteration: 141368
loss: 1.0705782175064087,grad_norm: 0.9999996189155443, iteration: 141369
loss: 1.1128065586090088,grad_norm: 0.9999997050788099, iteration: 141370
loss: 1.0349795818328857,grad_norm: 0.9999993532586265, iteration: 141371
loss: 1.0819777250289917,grad_norm: 0.9999994460720266, iteration: 141372
loss: 1.0467740297317505,grad_norm: 0.9999991159228362, iteration: 141373
loss: 1.0471827983856201,grad_norm: 0.9333055807242899, iteration: 141374
loss: 1.055883526802063,grad_norm: 0.9999991157327582, iteration: 141375
loss: 1.2058842182159424,grad_norm: 0.9999998875901076, iteration: 141376
loss: 1.0429019927978516,grad_norm: 0.9382276361938925, iteration: 141377
loss: 1.0152555704116821,grad_norm: 0.9999990812562667, iteration: 141378
loss: 1.0792587995529175,grad_norm: 0.9999991795599693, iteration: 141379
loss: 1.108932614326477,grad_norm: 0.999999603671563, iteration: 141380
loss: 1.0494135618209839,grad_norm: 0.9999992407640638, iteration: 141381
loss: 1.0413035154342651,grad_norm: 0.9999991137550815, iteration: 141382
loss: 1.1911274194717407,grad_norm: 0.9999997406282308, iteration: 141383
loss: 1.1061400175094604,grad_norm: 0.9999996088020341, iteration: 141384
loss: 1.1293652057647705,grad_norm: 0.999999890906244, iteration: 141385
loss: 1.0670174360275269,grad_norm: 0.9146732819568957, iteration: 141386
loss: 1.0556402206420898,grad_norm: 1.0000000472720951, iteration: 141387
loss: 0.9655890464782715,grad_norm: 0.9333756873629544, iteration: 141388
loss: 0.9885841608047485,grad_norm: 0.8954500299369149, iteration: 141389
loss: 1.0658849477767944,grad_norm: 0.9999993278125523, iteration: 141390
loss: 1.1397716999053955,grad_norm: 0.9999998082796401, iteration: 141391
loss: 1.14405357837677,grad_norm: 0.9999996755404175, iteration: 141392
loss: 1.0411710739135742,grad_norm: 0.9382549474754568, iteration: 141393
loss: 1.0564814805984497,grad_norm: 0.9999995144603293, iteration: 141394
loss: 1.116025686264038,grad_norm: 0.9999998751232162, iteration: 141395
loss: 1.1183226108551025,grad_norm: 0.9999995561758692, iteration: 141396
loss: 1.3139829635620117,grad_norm: 0.9999998822704294, iteration: 141397
loss: 1.0492832660675049,grad_norm: 0.9999992701425527, iteration: 141398
loss: 1.0963748693466187,grad_norm: 0.9999998506905949, iteration: 141399
loss: 1.152580976486206,grad_norm: 0.9999998655118931, iteration: 141400
loss: 1.020556926727295,grad_norm: 0.9999998814018941, iteration: 141401
loss: 1.1047996282577515,grad_norm: 0.9999998916470227, iteration: 141402
loss: 1.211785912513733,grad_norm: 0.9999994035089061, iteration: 141403
loss: 1.286591649055481,grad_norm: 0.999999739881194, iteration: 141404
loss: 1.0401506423950195,grad_norm: 0.9999991971282405, iteration: 141405
loss: 1.229758620262146,grad_norm: 0.9999998771168274, iteration: 141406
loss: 1.1025887727737427,grad_norm: 0.9999992688051336, iteration: 141407
loss: 1.2455605268478394,grad_norm: 0.9999999003601909, iteration: 141408
loss: 1.2019565105438232,grad_norm: 0.9999998953469983, iteration: 141409
loss: 1.0930463075637817,grad_norm: 0.9999997933849605, iteration: 141410
loss: 1.1152024269104004,grad_norm: 0.9999998766001266, iteration: 141411
loss: 1.1116297245025635,grad_norm: 0.9999997062114352, iteration: 141412
loss: 1.3659281730651855,grad_norm: 0.9999999687857555, iteration: 141413
loss: 1.2189438343048096,grad_norm: 0.9999998780070964, iteration: 141414
loss: 1.0645618438720703,grad_norm: 0.9999991860805426, iteration: 141415
loss: 1.0980225801467896,grad_norm: 0.999999918071732, iteration: 141416
loss: 1.0701963901519775,grad_norm: 0.9999991498513213, iteration: 141417
loss: 1.0392744541168213,grad_norm: 0.9999995183090409, iteration: 141418
loss: 1.0046710968017578,grad_norm: 0.9999995726407502, iteration: 141419
loss: 1.1312087774276733,grad_norm: 0.9999998554610616, iteration: 141420
loss: 1.052460789680481,grad_norm: 0.9999996458873911, iteration: 141421
loss: 1.1212587356567383,grad_norm: 0.999999789011346, iteration: 141422
loss: 1.075639247894287,grad_norm: 0.9999997085209581, iteration: 141423
loss: 1.0794333219528198,grad_norm: 0.999999758540503, iteration: 141424
loss: 1.2964136600494385,grad_norm: 0.999999605872886, iteration: 141425
loss: 1.0319161415100098,grad_norm: 0.9953742749100851, iteration: 141426
loss: 1.2639628648757935,grad_norm: 0.9999993956095092, iteration: 141427
loss: 1.018923282623291,grad_norm: 0.9999992931967873, iteration: 141428
loss: 1.1354212760925293,grad_norm: 0.9999998965494417, iteration: 141429
loss: 1.1189939975738525,grad_norm: 0.9999999576705801, iteration: 141430
loss: 1.0538811683654785,grad_norm: 0.9999999414374885, iteration: 141431
loss: 1.1144790649414062,grad_norm: 0.9999998728071757, iteration: 141432
loss: 1.5103486776351929,grad_norm: 0.9999999986434857, iteration: 141433
loss: 1.1591345071792603,grad_norm: 0.9999997996244951, iteration: 141434
loss: 1.0667191743850708,grad_norm: 0.9999996619832323, iteration: 141435
loss: 0.9843724966049194,grad_norm: 0.9999990888813263, iteration: 141436
loss: 1.107053279876709,grad_norm: 0.9999999874108821, iteration: 141437
loss: 1.1611348390579224,grad_norm: 0.9999994903368974, iteration: 141438
loss: 1.206565499305725,grad_norm: 0.9999995399363292, iteration: 141439
loss: 1.1302341222763062,grad_norm: 0.8657854595336179, iteration: 141440
loss: 1.1144227981567383,grad_norm: 0.9999998388675441, iteration: 141441
loss: 1.0546857118606567,grad_norm: 0.9999990559788535, iteration: 141442
loss: 1.1895900964736938,grad_norm: 0.9999995720293248, iteration: 141443
loss: 1.1211782693862915,grad_norm: 0.9999998660569811, iteration: 141444
loss: 1.0845359563827515,grad_norm: 0.9999994444546303, iteration: 141445
loss: 1.2659611701965332,grad_norm: 0.9999994969834815, iteration: 141446
loss: 1.1303788423538208,grad_norm: 0.9999995824008753, iteration: 141447
loss: 1.1209511756896973,grad_norm: 0.9999999159301354, iteration: 141448
loss: 1.2477545738220215,grad_norm: 0.9999997679211337, iteration: 141449
loss: 1.2416915893554688,grad_norm: 0.999999788131635, iteration: 141450
loss: 1.0442100763320923,grad_norm: 0.9999998117760408, iteration: 141451
loss: 1.1229764223098755,grad_norm: 0.9999994636280525, iteration: 141452
loss: 1.036882996559143,grad_norm: 0.9999992056710234, iteration: 141453
loss: 1.1265923976898193,grad_norm: 0.9999997645961399, iteration: 141454
loss: 1.1262918710708618,grad_norm: 0.9999990994109511, iteration: 141455
loss: 1.0555235147476196,grad_norm: 0.9212987243628765, iteration: 141456
loss: 1.128126859664917,grad_norm: 0.9999996302871074, iteration: 141457
loss: 1.362668752670288,grad_norm: 0.99999990713074, iteration: 141458
loss: 1.0551146268844604,grad_norm: 0.9999998253895727, iteration: 141459
loss: 1.245388388633728,grad_norm: 0.9999995562882734, iteration: 141460
loss: 1.127027988433838,grad_norm: 0.9999999223572101, iteration: 141461
loss: 1.127288818359375,grad_norm: 0.999999796017763, iteration: 141462
loss: 1.0960544347763062,grad_norm: 0.9999993666312836, iteration: 141463
loss: 1.0461502075195312,grad_norm: 0.9999993083661706, iteration: 141464
loss: 1.233685851097107,grad_norm: 0.9999998500566374, iteration: 141465
loss: 1.142904281616211,grad_norm: 0.9999997824042719, iteration: 141466
loss: 1.3743669986724854,grad_norm: 1.0000000347970053, iteration: 141467
loss: 1.1650034189224243,grad_norm: 0.9999999408397172, iteration: 141468
loss: 1.1106077432632446,grad_norm: 0.9999993696148959, iteration: 141469
loss: 1.2852563858032227,grad_norm: 0.9999999668731261, iteration: 141470
loss: 1.0120431184768677,grad_norm: 0.9999995630582269, iteration: 141471
loss: 1.121023178100586,grad_norm: 0.9999998041490039, iteration: 141472
loss: 1.249150037765503,grad_norm: 0.9999993706973267, iteration: 141473
loss: 1.1423996686935425,grad_norm: 1.000000044711333, iteration: 141474
loss: 1.1835159063339233,grad_norm: 0.9999998922284443, iteration: 141475
loss: 1.1178174018859863,grad_norm: 0.9999998653069504, iteration: 141476
loss: 1.1134436130523682,grad_norm: 0.9999996123428122, iteration: 141477
loss: 1.3058756589889526,grad_norm: 0.9999999209169425, iteration: 141478
loss: 1.1858879327774048,grad_norm: 0.9999999922554327, iteration: 141479
loss: 1.2413123846054077,grad_norm: 0.9999999916179036, iteration: 141480
loss: 1.109167218208313,grad_norm: 0.9999996218294601, iteration: 141481
loss: 1.1323741674423218,grad_norm: 0.9999999486094873, iteration: 141482
loss: 1.2098084688186646,grad_norm: 0.9999999768975559, iteration: 141483
loss: 1.2077785730361938,grad_norm: 0.9999999740109522, iteration: 141484
loss: 1.1306110620498657,grad_norm: 0.9999998130961101, iteration: 141485
loss: 1.1822115182876587,grad_norm: 0.999999952857592, iteration: 141486
loss: 1.058205485343933,grad_norm: 1.0000000135105838, iteration: 141487
loss: 1.1982409954071045,grad_norm: 0.9999998498993372, iteration: 141488
loss: 1.0396454334259033,grad_norm: 0.9999994307224923, iteration: 141489
loss: 1.1968783140182495,grad_norm: 0.9999997774443625, iteration: 141490
loss: 1.079856514930725,grad_norm: 0.9999996434115893, iteration: 141491
loss: 1.0908966064453125,grad_norm: 0.9999995869947086, iteration: 141492
loss: 1.1005935668945312,grad_norm: 0.9999990701479508, iteration: 141493
loss: 1.198364496231079,grad_norm: 0.9999998775915002, iteration: 141494
loss: 1.2505651712417603,grad_norm: 0.999999787846135, iteration: 141495
loss: 1.093225359916687,grad_norm: 0.9999995554269833, iteration: 141496
loss: 1.19662344455719,grad_norm: 0.9999998814302551, iteration: 141497
loss: 1.0507586002349854,grad_norm: 0.9999993630351612, iteration: 141498
loss: 1.1776939630508423,grad_norm: 0.9999995032206431, iteration: 141499
loss: 1.1265898942947388,grad_norm: 0.9999994796720438, iteration: 141500
loss: 1.1206865310668945,grad_norm: 0.999999177363431, iteration: 141501
loss: 1.0324194431304932,grad_norm: 0.8303437521703944, iteration: 141502
loss: 1.306679129600525,grad_norm: 0.9999998252275895, iteration: 141503
loss: 1.054145336151123,grad_norm: 0.9999993069890372, iteration: 141504
loss: 1.0548697710037231,grad_norm: 0.9999998912728084, iteration: 141505
loss: 1.1494033336639404,grad_norm: 0.9999993423072654, iteration: 141506
loss: 1.079046368598938,grad_norm: 0.999999953253431, iteration: 141507
loss: 1.0528370141983032,grad_norm: 0.999999212776095, iteration: 141508
loss: 1.1561938524246216,grad_norm: 0.9999998007986157, iteration: 141509
loss: 1.0399738550186157,grad_norm: 0.9595760232316538, iteration: 141510
loss: 1.1938480138778687,grad_norm: 0.99999985727576, iteration: 141511
loss: 1.1292835474014282,grad_norm: 0.99999978311648, iteration: 141512
loss: 1.1041479110717773,grad_norm: 0.999999928892826, iteration: 141513
loss: 1.0091238021850586,grad_norm: 0.9999992219497216, iteration: 141514
loss: 1.11154305934906,grad_norm: 0.9999994730268581, iteration: 141515
loss: 1.0246728658676147,grad_norm: 0.9179370923858173, iteration: 141516
loss: 1.0634533166885376,grad_norm: 0.9999995464152349, iteration: 141517
loss: 1.15537428855896,grad_norm: 0.9999995752105254, iteration: 141518
loss: 1.268391489982605,grad_norm: 1.0000000128101592, iteration: 141519
loss: 1.04428231716156,grad_norm: 0.97580824186707, iteration: 141520
loss: 1.2010881900787354,grad_norm: 0.9999993949672665, iteration: 141521
loss: 1.186633586883545,grad_norm: 0.9999997997170723, iteration: 141522
loss: 1.147281289100647,grad_norm: 0.9999998332865946, iteration: 141523
loss: 1.1083279848098755,grad_norm: 0.9999998088416107, iteration: 141524
loss: 1.130312442779541,grad_norm: 0.9999996308435184, iteration: 141525
loss: 1.0590837001800537,grad_norm: 0.9999998972246595, iteration: 141526
loss: 1.2419785261154175,grad_norm: 0.999999806749521, iteration: 141527
loss: 1.144909143447876,grad_norm: 0.9999996954097337, iteration: 141528
loss: 1.0524191856384277,grad_norm: 0.9999992109166314, iteration: 141529
loss: 1.122641682624817,grad_norm: 0.9999996711104979, iteration: 141530
loss: 1.2495654821395874,grad_norm: 0.9999998660212994, iteration: 141531
loss: 1.084928274154663,grad_norm: 0.9999997969304453, iteration: 141532
loss: 1.092630386352539,grad_norm: 0.9999998159183588, iteration: 141533
loss: 1.0986948013305664,grad_norm: 0.9999996036806978, iteration: 141534
loss: 1.044852375984192,grad_norm: 0.9999994433167205, iteration: 141535
loss: 1.1548815965652466,grad_norm: 0.9999999076992706, iteration: 141536
loss: 1.079993486404419,grad_norm: 0.9999993298956321, iteration: 141537
loss: 1.0826680660247803,grad_norm: 0.953862009658639, iteration: 141538
loss: 0.9830969572067261,grad_norm: 0.9999992756950569, iteration: 141539
loss: 1.1657823324203491,grad_norm: 0.9999997151268483, iteration: 141540
loss: 1.065869927406311,grad_norm: 0.9999994234265088, iteration: 141541
loss: 1.2471752166748047,grad_norm: 0.999999356503213, iteration: 141542
loss: 1.1450034379959106,grad_norm: 0.9999995041186033, iteration: 141543
loss: 1.1188520193099976,grad_norm: 0.9999989982335395, iteration: 141544
loss: 1.0694997310638428,grad_norm: 0.9999998705383062, iteration: 141545
loss: 1.1375324726104736,grad_norm: 0.9999997692195051, iteration: 141546
loss: 1.368160605430603,grad_norm: 0.9999997878953398, iteration: 141547
loss: 1.0911920070648193,grad_norm: 0.9999996031116122, iteration: 141548
loss: 1.1016520261764526,grad_norm: 0.9999998944544971, iteration: 141549
loss: 1.0624226331710815,grad_norm: 0.9999997621852958, iteration: 141550
loss: 1.123749852180481,grad_norm: 0.9999995625361575, iteration: 141551
loss: 1.0565917491912842,grad_norm: 0.9999996881250595, iteration: 141552
loss: 1.0173002481460571,grad_norm: 0.981685426355572, iteration: 141553
loss: 1.0787827968597412,grad_norm: 0.9999991980876018, iteration: 141554
loss: 1.0569547414779663,grad_norm: 0.9999997265665831, iteration: 141555
loss: 1.0663634538650513,grad_norm: 0.9999994312262361, iteration: 141556
loss: 1.0742411613464355,grad_norm: 0.9999997463636374, iteration: 141557
loss: 1.0353344678878784,grad_norm: 0.9999996738468232, iteration: 141558
loss: 0.9913508892059326,grad_norm: 0.9999996221925088, iteration: 141559
loss: 0.9743096828460693,grad_norm: 0.774111471435368, iteration: 141560
loss: 1.0825117826461792,grad_norm: 0.9999999067130172, iteration: 141561
loss: 1.0398414134979248,grad_norm: 0.999999276843569, iteration: 141562
loss: 1.0552775859832764,grad_norm: 0.999999873474402, iteration: 141563
loss: 1.1637158393859863,grad_norm: 0.9999998365210996, iteration: 141564
loss: 1.1742708683013916,grad_norm: 0.9999997990403777, iteration: 141565
loss: 1.2006399631500244,grad_norm: 0.9999996361178652, iteration: 141566
loss: 1.0746431350708008,grad_norm: 0.9999997433633416, iteration: 141567
loss: 1.1319687366485596,grad_norm: 0.9999995015290928, iteration: 141568
loss: 0.9772499799728394,grad_norm: 0.9999990979955325, iteration: 141569
loss: 1.0220879316329956,grad_norm: 0.9999997568291434, iteration: 141570
loss: 1.2166777849197388,grad_norm: 0.9999998011857417, iteration: 141571
loss: 1.0115669965744019,grad_norm: 0.9999991395489687, iteration: 141572
loss: 1.1311397552490234,grad_norm: 0.9999989972826677, iteration: 141573
loss: 1.0653852224349976,grad_norm: 0.9999992302213346, iteration: 141574
loss: 1.0834627151489258,grad_norm: 0.999999543744322, iteration: 141575
loss: 1.0474892854690552,grad_norm: 0.9999994408001197, iteration: 141576
loss: 0.9742133617401123,grad_norm: 0.9789304292633777, iteration: 141577
loss: 1.266607642173767,grad_norm: 0.9999999057254096, iteration: 141578
loss: 1.156082034111023,grad_norm: 0.9999998978496721, iteration: 141579
loss: 1.1542270183563232,grad_norm: 0.9999996896062264, iteration: 141580
loss: 1.0485279560089111,grad_norm: 0.9999993575771953, iteration: 141581
loss: 1.066048264503479,grad_norm: 0.9999996913919894, iteration: 141582
loss: 1.043811321258545,grad_norm: 0.9999990562220314, iteration: 141583
loss: 1.1532407999038696,grad_norm: 0.9999998516318761, iteration: 141584
loss: 1.2250014543533325,grad_norm: 0.9999998921805349, iteration: 141585
loss: 1.006262183189392,grad_norm: 0.9069733392411331, iteration: 141586
loss: 1.0452467203140259,grad_norm: 0.9999998933289954, iteration: 141587
loss: 0.9767640829086304,grad_norm: 0.8834790295881894, iteration: 141588
loss: 1.1663265228271484,grad_norm: 0.9999999092489192, iteration: 141589
loss: 1.0863252878189087,grad_norm: 0.9999994739732734, iteration: 141590
loss: 1.0638346672058105,grad_norm: 0.9999991656964694, iteration: 141591
loss: 1.0277466773986816,grad_norm: 0.8506463854050014, iteration: 141592
loss: 1.0427879095077515,grad_norm: 0.9999992275995468, iteration: 141593
loss: 1.1625899076461792,grad_norm: 0.9999997761852023, iteration: 141594
loss: 1.0921002626419067,grad_norm: 0.9999991064282983, iteration: 141595
loss: 1.1266653537750244,grad_norm: 0.9999997797084449, iteration: 141596
loss: 1.0913747549057007,grad_norm: 0.9999999267368146, iteration: 141597
loss: 1.1454659700393677,grad_norm: 0.9999999394124414, iteration: 141598
loss: 1.1323843002319336,grad_norm: 0.9999998524056378, iteration: 141599
loss: 1.0081168413162231,grad_norm: 0.9999991724394244, iteration: 141600
loss: 1.0194340944290161,grad_norm: 0.9999998563761584, iteration: 141601
loss: 1.1103378534317017,grad_norm: 0.9999996279309, iteration: 141602
loss: 1.0681750774383545,grad_norm: 0.9999994977552555, iteration: 141603
loss: 1.0182344913482666,grad_norm: 0.9999991263278628, iteration: 141604
loss: 1.1105667352676392,grad_norm: 0.9999997041216948, iteration: 141605
loss: 1.2843881845474243,grad_norm: 0.9999997144091892, iteration: 141606
loss: 1.0351840257644653,grad_norm: 0.9999993913051907, iteration: 141607
loss: 1.0156385898590088,grad_norm: 0.9999992921619223, iteration: 141608
loss: 1.0211472511291504,grad_norm: 0.9410951421935898, iteration: 141609
loss: 1.0615386962890625,grad_norm: 0.9999998149020155, iteration: 141610
loss: 1.109898328781128,grad_norm: 0.9999994653592339, iteration: 141611
loss: 1.098855972290039,grad_norm: 0.9999998701925809, iteration: 141612
loss: 1.05177903175354,grad_norm: 0.9999996287794354, iteration: 141613
loss: 1.151290774345398,grad_norm: 0.9999997614534613, iteration: 141614
loss: 1.2200812101364136,grad_norm: 0.9999996911621384, iteration: 141615
loss: 1.0915441513061523,grad_norm: 0.9999992663037309, iteration: 141616
loss: 1.0409901142120361,grad_norm: 0.9999990314433985, iteration: 141617
loss: 1.061214566230774,grad_norm: 0.9999998383894699, iteration: 141618
loss: 1.0905613899230957,grad_norm: 0.9999992484846344, iteration: 141619
loss: 1.1547640562057495,grad_norm: 0.9999997769543478, iteration: 141620
loss: 1.0486981868743896,grad_norm: 0.999999647394279, iteration: 141621
loss: 1.195258378982544,grad_norm: 0.999999990271846, iteration: 141622
loss: 1.117051601409912,grad_norm: 0.9999994456595739, iteration: 141623
loss: 1.0370454788208008,grad_norm: 1.0000000768192598, iteration: 141624
loss: 0.9911125898361206,grad_norm: 0.9999998468075515, iteration: 141625
loss: 1.1749600172042847,grad_norm: 0.9999997199051783, iteration: 141626
loss: 1.0471302270889282,grad_norm: 0.9999990475415167, iteration: 141627
loss: 1.0528132915496826,grad_norm: 0.9999997353391723, iteration: 141628
loss: 1.0922815799713135,grad_norm: 0.9999996448971451, iteration: 141629
loss: 1.0892261266708374,grad_norm: 0.9999995883132865, iteration: 141630
loss: 1.2476359605789185,grad_norm: 0.9999999609555397, iteration: 141631
loss: 1.0161157846450806,grad_norm: 0.9999990279210801, iteration: 141632
loss: 1.1042349338531494,grad_norm: 0.9999993355866372, iteration: 141633
loss: 1.4626935720443726,grad_norm: 0.999999854677288, iteration: 141634
loss: 1.015777826309204,grad_norm: 0.9999991077000093, iteration: 141635
loss: 1.0495496988296509,grad_norm: 0.9999991351578439, iteration: 141636
loss: 1.1542986631393433,grad_norm: 0.99999988231308, iteration: 141637
loss: 1.112859845161438,grad_norm: 0.9999997052233764, iteration: 141638
loss: 1.0256677865982056,grad_norm: 0.9999994060214338, iteration: 141639
loss: 1.0739121437072754,grad_norm: 0.9999992557015983, iteration: 141640
loss: 1.023212194442749,grad_norm: 0.9628294152221424, iteration: 141641
loss: 1.0940628051757812,grad_norm: 0.999999542018564, iteration: 141642
loss: 1.0043917894363403,grad_norm: 0.9312754323125882, iteration: 141643
loss: 1.1000005006790161,grad_norm: 0.9999996546080702, iteration: 141644
loss: 1.2002991437911987,grad_norm: 1.000000075516635, iteration: 141645
loss: 1.0039730072021484,grad_norm: 0.9999994325879499, iteration: 141646
loss: 1.1098456382751465,grad_norm: 0.999999848400467, iteration: 141647
loss: 1.0877047777175903,grad_norm: 0.999999632202976, iteration: 141648
loss: 1.3106882572174072,grad_norm: 1.0000000645276652, iteration: 141649
loss: 1.0473886728286743,grad_norm: 0.9999992679400256, iteration: 141650
loss: 1.0578356981277466,grad_norm: 0.9999991438489827, iteration: 141651
loss: 0.9904976487159729,grad_norm: 0.9999992861496084, iteration: 141652
loss: 1.0559459924697876,grad_norm: 0.999999643188435, iteration: 141653
loss: 1.015940546989441,grad_norm: 0.9188016338175696, iteration: 141654
loss: 1.2820467948913574,grad_norm: 0.9999998889901749, iteration: 141655
loss: 1.1180943250656128,grad_norm: 0.9999991291868006, iteration: 141656
loss: 1.0620925426483154,grad_norm: 0.9999991459854036, iteration: 141657
loss: 1.1555196046829224,grad_norm: 0.9999999499207248, iteration: 141658
loss: 1.16604483127594,grad_norm: 0.99999980815973, iteration: 141659
loss: 0.9822836518287659,grad_norm: 0.9999996857057568, iteration: 141660
loss: 1.18296480178833,grad_norm: 0.999999837555343, iteration: 141661
loss: 1.0535659790039062,grad_norm: 0.9999993365621312, iteration: 141662
loss: 1.0961602926254272,grad_norm: 0.9999998534700512, iteration: 141663
loss: 1.2423410415649414,grad_norm: 0.9999999436692901, iteration: 141664
loss: 1.174067735671997,grad_norm: 0.9999998802829213, iteration: 141665
loss: 1.0904268026351929,grad_norm: 0.9999997817295159, iteration: 141666
loss: 1.09406578540802,grad_norm: 0.9999997493028595, iteration: 141667
loss: 1.1874741315841675,grad_norm: 0.9999995009444317, iteration: 141668
loss: 1.0296669006347656,grad_norm: 0.9590255709721255, iteration: 141669
loss: 1.1296998262405396,grad_norm: 0.9999992838952046, iteration: 141670
loss: 1.2927449941635132,grad_norm: 0.9999996153470556, iteration: 141671
loss: 1.1590075492858887,grad_norm: 0.999999648028816, iteration: 141672
loss: 1.1794333457946777,grad_norm: 0.9999998414490885, iteration: 141673
loss: 1.0610812902450562,grad_norm: 0.9999996912146155, iteration: 141674
loss: 1.0813096761703491,grad_norm: 0.999999932457475, iteration: 141675
loss: 1.0302231311798096,grad_norm: 0.9513085591669969, iteration: 141676
loss: 1.2428748607635498,grad_norm: 0.9999993686054379, iteration: 141677
loss: 1.0723822116851807,grad_norm: 0.9999996835878003, iteration: 141678
loss: 1.0626916885375977,grad_norm: 0.9999998468007906, iteration: 141679
loss: 1.1854366064071655,grad_norm: 0.9999992145807, iteration: 141680
loss: 1.2054917812347412,grad_norm: 0.9999997967288533, iteration: 141681
loss: 1.1118435859680176,grad_norm: 0.999999568906545, iteration: 141682
loss: 1.055364727973938,grad_norm: 0.9999999260968778, iteration: 141683
loss: 1.0754008293151855,grad_norm: 0.9999997609848795, iteration: 141684
loss: 1.0665628910064697,grad_norm: 0.9999997657309163, iteration: 141685
loss: 1.0334333181381226,grad_norm: 0.999999876844796, iteration: 141686
loss: 1.096054196357727,grad_norm: 0.9999992944403994, iteration: 141687
loss: 1.042893648147583,grad_norm: 0.9999995963879315, iteration: 141688
loss: 1.1563557386398315,grad_norm: 0.9999995338982968, iteration: 141689
loss: 1.054189920425415,grad_norm: 0.9999999280973677, iteration: 141690
loss: 1.0738545656204224,grad_norm: 0.9999994310315481, iteration: 141691
loss: 1.0720008611679077,grad_norm: 0.999999959345432, iteration: 141692
loss: 1.0610677003860474,grad_norm: 0.9999999636912605, iteration: 141693
loss: 1.1839993000030518,grad_norm: 0.9999997001878252, iteration: 141694
loss: 1.056273341178894,grad_norm: 0.9999994055690914, iteration: 141695
loss: 1.228505253791809,grad_norm: 0.9999999141403026, iteration: 141696
loss: 1.1999033689498901,grad_norm: 0.9999993511588077, iteration: 141697
loss: 0.9820118546485901,grad_norm: 0.9462164408268646, iteration: 141698
loss: 1.2134363651275635,grad_norm: 0.9999994573475071, iteration: 141699
loss: 1.005177617073059,grad_norm: 0.999999153342891, iteration: 141700
loss: 0.991509199142456,grad_norm: 0.9914055634529069, iteration: 141701
loss: 1.0835390090942383,grad_norm: 0.9999990413370993, iteration: 141702
loss: 1.0536136627197266,grad_norm: 0.999999386255976, iteration: 141703
loss: 1.0046029090881348,grad_norm: 0.8693383228611622, iteration: 141704
loss: 1.0304797887802124,grad_norm: 0.8655261664241025, iteration: 141705
loss: 1.1876591444015503,grad_norm: 0.999999322195062, iteration: 141706
loss: 1.0451351404190063,grad_norm: 0.9999990405350624, iteration: 141707
loss: 1.0091369152069092,grad_norm: 0.768843979283479, iteration: 141708
loss: 0.9859933257102966,grad_norm: 0.999999119702458, iteration: 141709
loss: 1.170844316482544,grad_norm: 0.9999996189977066, iteration: 141710
loss: 1.1390657424926758,grad_norm: 0.9999996240822228, iteration: 141711
loss: 1.2130763530731201,grad_norm: 0.9999997399179092, iteration: 141712
loss: 1.0759626626968384,grad_norm: 0.9999999104167963, iteration: 141713
loss: 1.0476346015930176,grad_norm: 0.999999279455634, iteration: 141714
loss: 1.174025535583496,grad_norm: 0.9999998302910357, iteration: 141715
loss: 1.047266960144043,grad_norm: 0.9999993126675325, iteration: 141716
loss: 1.0801844596862793,grad_norm: 0.99999989747087, iteration: 141717
loss: 1.1527255773544312,grad_norm: 0.999999391337796, iteration: 141718
loss: 1.046947717666626,grad_norm: 0.9999990586135029, iteration: 141719
loss: 1.066227674484253,grad_norm: 0.9999991412254726, iteration: 141720
loss: 1.054409384727478,grad_norm: 0.999999576707834, iteration: 141721
loss: 1.0072799921035767,grad_norm: 0.9152072269483565, iteration: 141722
loss: 1.0785080194473267,grad_norm: 0.9999994686187051, iteration: 141723
loss: 1.208540678024292,grad_norm: 0.9999992984389554, iteration: 141724
loss: 1.0174920558929443,grad_norm: 0.9999997245717315, iteration: 141725
loss: 1.1053801774978638,grad_norm: 0.9999996405210879, iteration: 141726
loss: 1.0823832750320435,grad_norm: 0.9999994597644427, iteration: 141727
loss: 1.12615168094635,grad_norm: 0.9999994430457861, iteration: 141728
loss: 0.9888730645179749,grad_norm: 0.9130744761921626, iteration: 141729
loss: 1.2011685371398926,grad_norm: 0.9999991062554544, iteration: 141730
loss: 1.1696391105651855,grad_norm: 0.999999896467627, iteration: 141731
loss: 1.0144587755203247,grad_norm: 0.9288436910469393, iteration: 141732
loss: 0.9730220437049866,grad_norm: 0.9359486141423865, iteration: 141733
loss: 1.218544840812683,grad_norm: 0.9999997760749448, iteration: 141734
loss: 1.0407639741897583,grad_norm: 0.999999202811154, iteration: 141735
loss: 1.0391020774841309,grad_norm: 0.9999993424275141, iteration: 141736
loss: 1.052274465560913,grad_norm: 0.9999997888813483, iteration: 141737
loss: 1.0397465229034424,grad_norm: 0.9999997561601068, iteration: 141738
loss: 1.0341888666152954,grad_norm: 0.9999991162321312, iteration: 141739
loss: 1.0923174619674683,grad_norm: 0.9999992500598374, iteration: 141740
loss: 1.0521453619003296,grad_norm: 0.9999993986311368, iteration: 141741
loss: 1.0570425987243652,grad_norm: 0.9999990794708052, iteration: 141742
loss: 1.149856448173523,grad_norm: 0.9999998078344756, iteration: 141743
loss: 1.2437238693237305,grad_norm: 0.999999827508257, iteration: 141744
loss: 0.955726683139801,grad_norm: 0.9999991137712205, iteration: 141745
loss: 1.0377604961395264,grad_norm: 0.999999671522365, iteration: 141746
loss: 1.0453839302062988,grad_norm: 0.9999991430110603, iteration: 141747
loss: 1.0989503860473633,grad_norm: 0.9999997737598, iteration: 141748
loss: 1.1911423206329346,grad_norm: 0.999999937211919, iteration: 141749
loss: 1.2252687215805054,grad_norm: 0.9999994439227504, iteration: 141750
loss: 1.0353080034255981,grad_norm: 0.9462157006942659, iteration: 141751
loss: 1.1260939836502075,grad_norm: 0.9999995744411152, iteration: 141752
loss: 1.106486201286316,grad_norm: 0.9999994583430375, iteration: 141753
loss: 1.1008070707321167,grad_norm: 0.9999996176335054, iteration: 141754
loss: 1.2090767621994019,grad_norm: 0.9999997019560602, iteration: 141755
loss: 0.9938591122627258,grad_norm: 0.9999996652573393, iteration: 141756
loss: 1.1071628332138062,grad_norm: 0.9999998101941179, iteration: 141757
loss: 1.2484148740768433,grad_norm: 0.9999992786699777, iteration: 141758
loss: 1.2008485794067383,grad_norm: 0.9999996055599323, iteration: 141759
loss: 1.064713478088379,grad_norm: 0.9999994140256524, iteration: 141760
loss: 1.043609380722046,grad_norm: 0.9999990481663457, iteration: 141761
loss: 1.1683101654052734,grad_norm: 0.9999998800876556, iteration: 141762
loss: 1.1189044713974,grad_norm: 0.9999996156869444, iteration: 141763
loss: 1.2123432159423828,grad_norm: 0.9999998687114118, iteration: 141764
loss: 1.2218873500823975,grad_norm: 0.9999999499437757, iteration: 141765
loss: 1.050681233406067,grad_norm: 0.999999624056561, iteration: 141766
loss: 1.0160818099975586,grad_norm: 0.9999993022037686, iteration: 141767
loss: 1.081592321395874,grad_norm: 0.9999993653333566, iteration: 141768
loss: 1.060063123703003,grad_norm: 0.9999996462716322, iteration: 141769
loss: 1.0280815362930298,grad_norm: 0.9999993307511564, iteration: 141770
loss: 1.0844039916992188,grad_norm: 0.9999991447151132, iteration: 141771
loss: 1.078122615814209,grad_norm: 0.9999997493396428, iteration: 141772
loss: 1.1238644123077393,grad_norm: 0.9999995343953172, iteration: 141773
loss: 1.0290508270263672,grad_norm: 0.9479394978949798, iteration: 141774
loss: 1.1352181434631348,grad_norm: 0.9999997472660275, iteration: 141775
loss: 1.0231181383132935,grad_norm: 0.9999994477999309, iteration: 141776
loss: 0.9635357856750488,grad_norm: 0.7411011439336652, iteration: 141777
loss: 1.156579852104187,grad_norm: 0.9999990517651408, iteration: 141778
loss: 1.023183822631836,grad_norm: 0.9486883728568608, iteration: 141779
loss: 1.0091599225997925,grad_norm: 0.9999994886276745, iteration: 141780
loss: 1.0297898054122925,grad_norm: 0.9999992389966501, iteration: 141781
loss: 1.0630476474761963,grad_norm: 0.9999993876864424, iteration: 141782
loss: 1.023147702217102,grad_norm: 0.9999990951376982, iteration: 141783
loss: 1.0243183374404907,grad_norm: 0.9999998848612742, iteration: 141784
loss: 1.0505602359771729,grad_norm: 0.999999924696763, iteration: 141785
loss: 0.9693573117256165,grad_norm: 0.867295521961395, iteration: 141786
loss: 1.1219189167022705,grad_norm: 0.9999997645434742, iteration: 141787
loss: 0.9593777060508728,grad_norm: 0.9592080413059936, iteration: 141788
loss: 1.0645856857299805,grad_norm: 0.9999991509415588, iteration: 141789
loss: 1.130149245262146,grad_norm: 0.9999992781493338, iteration: 141790
loss: 1.0442160367965698,grad_norm: 0.9999995224666648, iteration: 141791
loss: 1.0719376802444458,grad_norm: 0.9999996665573032, iteration: 141792
loss: 1.139294147491455,grad_norm: 0.9999996160553843, iteration: 141793
loss: 1.064611554145813,grad_norm: 0.9999992510653349, iteration: 141794
loss: 1.0483992099761963,grad_norm: 0.9999994267209291, iteration: 141795
loss: 1.1019829511642456,grad_norm: 0.9999999685696094, iteration: 141796
loss: 0.9759486317634583,grad_norm: 0.8409549754235969, iteration: 141797
loss: 1.0517886877059937,grad_norm: 0.9999998421652035, iteration: 141798
loss: 1.0768159627914429,grad_norm: 0.9999998574163839, iteration: 141799
loss: 1.1162049770355225,grad_norm: 1.000000040622515, iteration: 141800
loss: 1.066935658454895,grad_norm: 0.9999991885756307, iteration: 141801
loss: 1.0701605081558228,grad_norm: 0.9999997914864812, iteration: 141802
loss: 1.0516164302825928,grad_norm: 0.9999999291213317, iteration: 141803
loss: 1.20772123336792,grad_norm: 0.9999998130839922, iteration: 141804
loss: 1.048266887664795,grad_norm: 0.922209099588153, iteration: 141805
loss: 1.0666502714157104,grad_norm: 0.9999992362883309, iteration: 141806
loss: 1.196213960647583,grad_norm: 0.9999999115592886, iteration: 141807
loss: 1.1592272520065308,grad_norm: 0.9999999367444142, iteration: 141808
loss: 1.0336512327194214,grad_norm: 0.9999998568750946, iteration: 141809
loss: 1.090448260307312,grad_norm: 1.0000000234176145, iteration: 141810
loss: 1.2272887229919434,grad_norm: 0.9999999763738844, iteration: 141811
loss: 1.080628514289856,grad_norm: 0.9999990129185262, iteration: 141812
loss: 1.10952627658844,grad_norm: 0.9999992736749536, iteration: 141813
loss: 1.1973347663879395,grad_norm: 0.9999998661820001, iteration: 141814
loss: 1.0828675031661987,grad_norm: 0.9373686937241468, iteration: 141815
loss: 1.0490500926971436,grad_norm: 0.9999993894107349, iteration: 141816
loss: 1.0703243017196655,grad_norm: 0.9999994255567604, iteration: 141817
loss: 0.9579662680625916,grad_norm: 0.9999997489608978, iteration: 141818
loss: 1.069756031036377,grad_norm: 0.999999392625081, iteration: 141819
loss: 1.0844104290008545,grad_norm: 0.999999310080073, iteration: 141820
loss: 1.060952067375183,grad_norm: 0.9999991363744513, iteration: 141821
loss: 1.0963962078094482,grad_norm: 0.9999999785348198, iteration: 141822
loss: 0.9652566909790039,grad_norm: 0.8710212611081567, iteration: 141823
loss: 1.0498348474502563,grad_norm: 0.999999899181624, iteration: 141824
loss: 1.0987598896026611,grad_norm: 0.9999998971524651, iteration: 141825
loss: 1.121977686882019,grad_norm: 0.9999990500695691, iteration: 141826
loss: 1.1148091554641724,grad_norm: 0.9999998493736779, iteration: 141827
loss: 1.0269570350646973,grad_norm: 0.9999996690055143, iteration: 141828
loss: 1.0200425386428833,grad_norm: 0.999999178292919, iteration: 141829
loss: 1.183478832244873,grad_norm: 0.9999994124594767, iteration: 141830
loss: 1.2204290628433228,grad_norm: 0.9999999000959513, iteration: 141831
loss: 1.0351561307907104,grad_norm: 0.9999991284154145, iteration: 141832
loss: 1.1122021675109863,grad_norm: 0.9999999283503838, iteration: 141833
loss: 1.1435564756393433,grad_norm: 0.9999998028729145, iteration: 141834
loss: 1.1344114542007446,grad_norm: 1.0000000155562354, iteration: 141835
loss: 1.107116460800171,grad_norm: 0.9999998499320966, iteration: 141836
loss: 1.005480170249939,grad_norm: 0.9999996075862079, iteration: 141837
loss: 1.038403868675232,grad_norm: 0.9999992871511123, iteration: 141838
loss: 1.0852429866790771,grad_norm: 0.9999992355592352, iteration: 141839
loss: 1.0985362529754639,grad_norm: 0.99999977537358, iteration: 141840
loss: 1.197447419166565,grad_norm: 0.9999999027873084, iteration: 141841
loss: 1.2490036487579346,grad_norm: 0.9999997584578746, iteration: 141842
loss: 1.0990118980407715,grad_norm: 1.0000000659783357, iteration: 141843
loss: 1.0659328699111938,grad_norm: 0.9999996295419236, iteration: 141844
loss: 1.1294562816619873,grad_norm: 0.9999995320856849, iteration: 141845
loss: 1.1160868406295776,grad_norm: 0.9999993903836711, iteration: 141846
loss: 1.1023374795913696,grad_norm: 0.9999993023811118, iteration: 141847
loss: 1.0434650182724,grad_norm: 0.9999992210692985, iteration: 141848
loss: 1.04820716381073,grad_norm: 0.9999998812580939, iteration: 141849
loss: 1.1200379133224487,grad_norm: 0.9999997311731292, iteration: 141850
loss: 1.0359365940093994,grad_norm: 0.9999996625609624, iteration: 141851
loss: 1.1225076913833618,grad_norm: 0.9999992667914063, iteration: 141852
loss: 1.1202136278152466,grad_norm: 0.9999999266930981, iteration: 141853
loss: 1.0291333198547363,grad_norm: 0.9999996262183524, iteration: 141854
loss: 1.0216306447982788,grad_norm: 0.999999172560851, iteration: 141855
loss: 1.109039545059204,grad_norm: 0.9999999304949149, iteration: 141856
loss: 1.2116111516952515,grad_norm: 0.9999998594722342, iteration: 141857
loss: 1.0080664157867432,grad_norm: 0.9999994541988646, iteration: 141858
loss: 1.1248626708984375,grad_norm: 0.999999686000323, iteration: 141859
loss: 1.082854151725769,grad_norm: 0.9999994876753457, iteration: 141860
loss: 1.0038859844207764,grad_norm: 0.8858030498643803, iteration: 141861
loss: 1.019486665725708,grad_norm: 0.8624772314994142, iteration: 141862
loss: 1.018988847732544,grad_norm: 0.9999992290767794, iteration: 141863
loss: 1.147618055343628,grad_norm: 0.9999995689240881, iteration: 141864
loss: 1.0419670343399048,grad_norm: 0.9999991556629731, iteration: 141865
loss: 1.0564732551574707,grad_norm: 0.9999993576467762, iteration: 141866
loss: 1.1861375570297241,grad_norm: 0.9999993853398284, iteration: 141867
loss: 1.0187660455703735,grad_norm: 0.999999135823249, iteration: 141868
loss: 1.058920979499817,grad_norm: 0.9401560787473973, iteration: 141869
loss: 1.0005449056625366,grad_norm: 0.8718415378660884, iteration: 141870
loss: 1.0243111848831177,grad_norm: 0.9999991421487058, iteration: 141871
loss: 1.0851696729660034,grad_norm: 0.9999992171379369, iteration: 141872
loss: 0.9741030931472778,grad_norm: 0.7852501840531831, iteration: 141873
loss: 1.0733894109725952,grad_norm: 0.999999627591963, iteration: 141874
loss: 1.0645817518234253,grad_norm: 0.9999994867705425, iteration: 141875
loss: 1.0954859256744385,grad_norm: 0.9999994560802457, iteration: 141876
loss: 1.0464123487472534,grad_norm: 0.9999997534264996, iteration: 141877
loss: 1.0625174045562744,grad_norm: 0.9999998764967305, iteration: 141878
loss: 1.146660566329956,grad_norm: 1.0000000059519674, iteration: 141879
loss: 1.1798555850982666,grad_norm: 0.9999995888155245, iteration: 141880
loss: 1.01644766330719,grad_norm: 0.9709478593422527, iteration: 141881
loss: 0.9956898093223572,grad_norm: 0.8462331631359019, iteration: 141882
loss: 1.068662166595459,grad_norm: 0.87573937810801, iteration: 141883
loss: 1.2323428392410278,grad_norm: 0.9999998171013901, iteration: 141884
loss: 1.0179731845855713,grad_norm: 0.8189571404934548, iteration: 141885
loss: 1.0786794424057007,grad_norm: 0.9999991128097572, iteration: 141886
loss: 1.087179183959961,grad_norm: 0.9999989584505519, iteration: 141887
loss: 1.0243594646453857,grad_norm: 0.999999372615677, iteration: 141888
loss: 1.0204137563705444,grad_norm: 0.9999994475218669, iteration: 141889
loss: 1.0958352088928223,grad_norm: 0.9999999396887493, iteration: 141890
loss: 1.0702438354492188,grad_norm: 0.9999990420540824, iteration: 141891
loss: 1.2480067014694214,grad_norm: 0.9999998140491773, iteration: 141892
loss: 0.9680326581001282,grad_norm: 0.7939682592958948, iteration: 141893
loss: 1.0268008708953857,grad_norm: 0.9053294584402803, iteration: 141894
loss: 1.2211984395980835,grad_norm: 0.9999997363755613, iteration: 141895
loss: 1.032228708267212,grad_norm: 0.9222667394475996, iteration: 141896
loss: 1.0371426343917847,grad_norm: 0.9999992544361875, iteration: 141897
loss: 1.0683382749557495,grad_norm: 0.9999994387420771, iteration: 141898
loss: 0.9635679125785828,grad_norm: 0.8484635590472973, iteration: 141899
loss: 1.0369439125061035,grad_norm: 0.99999949943377, iteration: 141900
loss: 1.0218592882156372,grad_norm: 0.7611702342119171, iteration: 141901
loss: 1.007711410522461,grad_norm: 0.999999713628168, iteration: 141902
loss: 0.9845076203346252,grad_norm: 0.9999991165768245, iteration: 141903
loss: 1.1517633199691772,grad_norm: 0.9999998983820424, iteration: 141904
loss: 1.1592680215835571,grad_norm: 0.9999999306197992, iteration: 141905
loss: 1.0865468978881836,grad_norm: 0.9999990878321695, iteration: 141906
loss: 1.0550001859664917,grad_norm: 0.8211681514189263, iteration: 141907
loss: 1.1322910785675049,grad_norm: 0.92695900051019, iteration: 141908
loss: 1.062474012374878,grad_norm: 0.9352975308513709, iteration: 141909
loss: 1.0562132596969604,grad_norm: 0.9999992153593391, iteration: 141910
loss: 1.0258959531784058,grad_norm: 0.9999990277306375, iteration: 141911
loss: 1.1059867143630981,grad_norm: 0.9999996797837484, iteration: 141912
loss: 1.0625196695327759,grad_norm: 0.999999533070205, iteration: 141913
loss: 1.118598222732544,grad_norm: 0.999999417649111, iteration: 141914
loss: 1.0797991752624512,grad_norm: 0.9999997005737443, iteration: 141915
loss: 1.0231059789657593,grad_norm: 0.835313201863489, iteration: 141916
loss: 1.0661696195602417,grad_norm: 0.999999167325162, iteration: 141917
loss: 1.0164799690246582,grad_norm: 0.9999995160146816, iteration: 141918
loss: 1.0422683954238892,grad_norm: 0.9999996438967191, iteration: 141919
loss: 1.0685172080993652,grad_norm: 0.9999994430412579, iteration: 141920
loss: 0.996453046798706,grad_norm: 0.9999997620323472, iteration: 141921
loss: 1.243798017501831,grad_norm: 0.9999998523135845, iteration: 141922
loss: 1.074318528175354,grad_norm: 0.9999997883241578, iteration: 141923
loss: 1.0517712831497192,grad_norm: 0.9999995297915499, iteration: 141924
loss: 0.973741888999939,grad_norm: 0.9999997351544996, iteration: 141925
loss: 0.9978593587875366,grad_norm: 0.9999994185210261, iteration: 141926
loss: 1.0014925003051758,grad_norm: 0.8957644769901807, iteration: 141927
loss: 1.011006236076355,grad_norm: 0.9999998198674435, iteration: 141928
loss: 1.1483818292617798,grad_norm: 0.9999998884214435, iteration: 141929
loss: 1.0220990180969238,grad_norm: 0.9999993720146433, iteration: 141930
loss: 1.0746558904647827,grad_norm: 0.9999998964263997, iteration: 141931
loss: 1.0404133796691895,grad_norm: 0.9999997279721535, iteration: 141932
loss: 1.130622386932373,grad_norm: 0.9999993392847545, iteration: 141933
loss: 1.0311633348464966,grad_norm: 0.9999992656713484, iteration: 141934
loss: 1.0789260864257812,grad_norm: 0.8377862778627482, iteration: 141935
loss: 1.0847063064575195,grad_norm: 0.9999995054358504, iteration: 141936
loss: 1.0124437808990479,grad_norm: 0.9597916271918979, iteration: 141937
loss: 1.1483447551727295,grad_norm: 0.9999997143216915, iteration: 141938
loss: 1.0391523838043213,grad_norm: 0.9999996889507766, iteration: 141939
loss: 1.001794695854187,grad_norm: 0.9999991128492355, iteration: 141940
loss: 1.0305050611495972,grad_norm: 0.875208939130622, iteration: 141941
loss: 1.0742170810699463,grad_norm: 0.999999472519872, iteration: 141942
loss: 0.9984852075576782,grad_norm: 0.9844039200902183, iteration: 141943
loss: 1.0974986553192139,grad_norm: 0.9999990757208729, iteration: 141944
loss: 0.9824090600013733,grad_norm: 0.8965279084640234, iteration: 141945
loss: 1.043440341949463,grad_norm: 0.9999997064256494, iteration: 141946
loss: 1.0150039196014404,grad_norm: 0.8591278333403835, iteration: 141947
loss: 1.0675418376922607,grad_norm: 0.9009431708068721, iteration: 141948
loss: 1.0214225053787231,grad_norm: 0.9517731112516284, iteration: 141949
loss: 0.997776210308075,grad_norm: 0.9999992658139886, iteration: 141950
loss: 1.0248315334320068,grad_norm: 0.9999991155103334, iteration: 141951
loss: 1.0368860960006714,grad_norm: 0.8277946525299796, iteration: 141952
loss: 1.0119857788085938,grad_norm: 0.8877117765548639, iteration: 141953
loss: 1.142277479171753,grad_norm: 0.999999694524817, iteration: 141954
loss: 1.0702990293502808,grad_norm: 0.9999992131771506, iteration: 141955
loss: 0.9887142181396484,grad_norm: 0.7623195007512733, iteration: 141956
loss: 1.0487854480743408,grad_norm: 0.9999993474337038, iteration: 141957
loss: 1.047364592552185,grad_norm: 0.999999656960491, iteration: 141958
loss: 1.3581643104553223,grad_norm: 0.999999746560777, iteration: 141959
loss: 1.0895617008209229,grad_norm: 0.9999994971630882, iteration: 141960
loss: 1.0311318635940552,grad_norm: 0.9999991472152568, iteration: 141961
loss: 1.0936192274093628,grad_norm: 0.9999999184211714, iteration: 141962
loss: 1.0370184183120728,grad_norm: 0.9671618543498138, iteration: 141963
loss: 1.0356510877609253,grad_norm: 0.9999993686911054, iteration: 141964
loss: 1.0205844640731812,grad_norm: 0.9999993643622662, iteration: 141965
loss: 1.0940380096435547,grad_norm: 0.9999990408595523, iteration: 141966
loss: 1.1447298526763916,grad_norm: 0.9999995883069958, iteration: 141967
loss: 0.9983928799629211,grad_norm: 0.9999991349649181, iteration: 141968
loss: 1.031030535697937,grad_norm: 0.9999998013302532, iteration: 141969
loss: 0.9847537875175476,grad_norm: 0.9999990765854097, iteration: 141970
loss: 0.9972244501113892,grad_norm: 0.9999991626079036, iteration: 141971
loss: 1.0127962827682495,grad_norm: 0.9434028440041685, iteration: 141972
loss: 1.0369296073913574,grad_norm: 0.9999991703954167, iteration: 141973
loss: 1.0856363773345947,grad_norm: 0.9999999444629137, iteration: 141974
loss: 1.0523788928985596,grad_norm: 0.9999994916945374, iteration: 141975
loss: 1.2278743982315063,grad_norm: 0.9999999722715972, iteration: 141976
loss: 1.0203280448913574,grad_norm: 0.7937873228889093, iteration: 141977
loss: 1.0475280284881592,grad_norm: 0.9999992126830324, iteration: 141978
loss: 1.008596420288086,grad_norm: 0.850196207016354, iteration: 141979
loss: 1.0498220920562744,grad_norm: 0.9999996727749663, iteration: 141980
loss: 1.0548828840255737,grad_norm: 0.972504937162414, iteration: 141981
loss: 1.0526623725891113,grad_norm: 0.9982045255540222, iteration: 141982
loss: 1.074843406677246,grad_norm: 0.9059814995608427, iteration: 141983
loss: 1.0992918014526367,grad_norm: 0.9999995363229849, iteration: 141984
loss: 0.9709679484367371,grad_norm: 0.9999993869900619, iteration: 141985
loss: 0.9413968324661255,grad_norm: 0.9999991126409026, iteration: 141986
loss: 1.0235466957092285,grad_norm: 0.9262640401787293, iteration: 141987
loss: 1.0576972961425781,grad_norm: 0.9999990893164838, iteration: 141988
loss: 1.0553632974624634,grad_norm: 0.9999993295586247, iteration: 141989
loss: 1.005035400390625,grad_norm: 0.8213318541523522, iteration: 141990
loss: 1.0604811906814575,grad_norm: 0.9999993612010721, iteration: 141991
loss: 1.0336395502090454,grad_norm: 0.9999991712399672, iteration: 141992
loss: 1.0921854972839355,grad_norm: 0.9999997475219191, iteration: 141993
loss: 0.9789525866508484,grad_norm: 0.7460066274919682, iteration: 141994
loss: 1.0120630264282227,grad_norm: 0.9999998616905957, iteration: 141995
loss: 1.0724608898162842,grad_norm: 0.999999171129975, iteration: 141996
loss: 1.0192224979400635,grad_norm: 0.9999990784967066, iteration: 141997
loss: 1.1099406480789185,grad_norm: 0.9999992409512813, iteration: 141998
loss: 0.9985079765319824,grad_norm: 0.862790739469284, iteration: 141999
loss: 1.0530797243118286,grad_norm: 0.9999994536197835, iteration: 142000
loss: 1.0566328763961792,grad_norm: 0.9761787787046596, iteration: 142001
loss: 1.0256434679031372,grad_norm: 0.9510237907549771, iteration: 142002
loss: 1.0420790910720825,grad_norm: 0.9524436358948042, iteration: 142003
loss: 1.0661488771438599,grad_norm: 0.9999995849172975, iteration: 142004
loss: 1.062103271484375,grad_norm: 0.9999992683587389, iteration: 142005
loss: 1.0231654644012451,grad_norm: 0.9999990607463078, iteration: 142006
loss: 1.0589936971664429,grad_norm: 0.9999995279743832, iteration: 142007
loss: 1.1283570528030396,grad_norm: 0.9999995730552065, iteration: 142008
loss: 1.0008474588394165,grad_norm: 0.9999992834194337, iteration: 142009
loss: 1.105441927909851,grad_norm: 0.9999999394129276, iteration: 142010
loss: 1.1061322689056396,grad_norm: 0.9999999323322423, iteration: 142011
loss: 1.038007140159607,grad_norm: 0.9999994831485591, iteration: 142012
loss: 0.9999498128890991,grad_norm: 0.9605483600951468, iteration: 142013
loss: 1.0472831726074219,grad_norm: 0.9091231277465636, iteration: 142014
loss: 0.954272985458374,grad_norm: 0.9249080165486417, iteration: 142015
loss: 1.0069636106491089,grad_norm: 0.8454424846712255, iteration: 142016
loss: 1.144214153289795,grad_norm: 0.9999994838760509, iteration: 142017
loss: 1.0167375802993774,grad_norm: 0.7186431405850626, iteration: 142018
loss: 1.1323840618133545,grad_norm: 0.9999996034408359, iteration: 142019
loss: 0.9933721423149109,grad_norm: 0.8832269015801916, iteration: 142020
loss: 1.1161551475524902,grad_norm: 0.9999994165068816, iteration: 142021
loss: 0.9663509726524353,grad_norm: 0.9999992065069645, iteration: 142022
loss: 1.0115553140640259,grad_norm: 0.9999993987457055, iteration: 142023
loss: 1.051969289779663,grad_norm: 0.9276705122159682, iteration: 142024
loss: 1.1085907220840454,grad_norm: 0.9999993321151724, iteration: 142025
loss: 1.189933180809021,grad_norm: 0.9999999140841265, iteration: 142026
loss: 1.121927261352539,grad_norm: 0.9999992148278811, iteration: 142027
loss: 1.0158134698867798,grad_norm: 0.9999990526509239, iteration: 142028
loss: 1.0227495431900024,grad_norm: 0.9999996942492404, iteration: 142029
loss: 1.0965981483459473,grad_norm: 0.9474710282373418, iteration: 142030
loss: 1.010620355606079,grad_norm: 0.75514236551844, iteration: 142031
loss: 1.059221625328064,grad_norm: 0.9999993953762121, iteration: 142032
loss: 1.0389134883880615,grad_norm: 0.9999990880450087, iteration: 142033
loss: 1.0685067176818848,grad_norm: 0.9999998324608803, iteration: 142034
loss: 1.0695825815200806,grad_norm: 0.9999993719492124, iteration: 142035
loss: 1.0777405500411987,grad_norm: 0.9999995076374723, iteration: 142036
loss: 1.0212886333465576,grad_norm: 0.9227055484599668, iteration: 142037
loss: 1.0000017881393433,grad_norm: 0.9999992928181408, iteration: 142038
loss: 0.9900187253952026,grad_norm: 0.9487076834913928, iteration: 142039
loss: 1.0005003213882446,grad_norm: 0.7847642634906418, iteration: 142040
loss: 1.042709469795227,grad_norm: 0.9989422880748532, iteration: 142041
loss: 1.026318907737732,grad_norm: 0.9999999545597091, iteration: 142042
loss: 1.1753901243209839,grad_norm: 1.0000000667418842, iteration: 142043
loss: 1.0450026988983154,grad_norm: 0.9999998983614005, iteration: 142044
loss: 1.0646477937698364,grad_norm: 0.9999991685217307, iteration: 142045
loss: 1.1189873218536377,grad_norm: 0.9999996230361794, iteration: 142046
loss: 1.0841947793960571,grad_norm: 0.9999995097686191, iteration: 142047
loss: 0.9908501505851746,grad_norm: 0.9999992300581394, iteration: 142048
loss: 1.0857359170913696,grad_norm: 0.8306459139169254, iteration: 142049
loss: 1.097590446472168,grad_norm: 0.9999995451398543, iteration: 142050
loss: 1.1704274415969849,grad_norm: 0.9999998974919688, iteration: 142051
loss: 1.070294737815857,grad_norm: 0.9759297314674752, iteration: 142052
loss: 1.0421233177185059,grad_norm: 0.9999996944036527, iteration: 142053
loss: 1.1523494720458984,grad_norm: 1.0000000408148184, iteration: 142054
loss: 1.0375500917434692,grad_norm: 0.9546536960415616, iteration: 142055
loss: 0.9974619746208191,grad_norm: 0.9910924059965236, iteration: 142056
loss: 1.0235114097595215,grad_norm: 0.9775494682399553, iteration: 142057
loss: 1.112345814704895,grad_norm: 0.9999998031494611, iteration: 142058
loss: 1.159896969795227,grad_norm: 0.9999992904537242, iteration: 142059
loss: 1.158912181854248,grad_norm: 0.9999996855103047, iteration: 142060
loss: 1.0162240266799927,grad_norm: 0.9187967561597803, iteration: 142061
loss: 1.0502849817276,grad_norm: 0.9215095905213444, iteration: 142062
loss: 1.0204569101333618,grad_norm: 0.9999991953062066, iteration: 142063
loss: 0.9912403225898743,grad_norm: 0.9999996480446723, iteration: 142064
loss: 1.0986130237579346,grad_norm: 0.9999991003833535, iteration: 142065
loss: 1.0765806436538696,grad_norm: 0.999999627935908, iteration: 142066
loss: 1.0000905990600586,grad_norm: 0.8573818467711845, iteration: 142067
loss: 0.9871256351470947,grad_norm: 0.9999993160333928, iteration: 142068
loss: 1.0548512935638428,grad_norm: 0.9999997751905091, iteration: 142069
loss: 0.9820634722709656,grad_norm: 0.9218490370340187, iteration: 142070
loss: 1.0181620121002197,grad_norm: 0.8940689374155794, iteration: 142071
loss: 1.0450807809829712,grad_norm: 0.9415543746033671, iteration: 142072
loss: 0.989017903804779,grad_norm: 0.8742772348601439, iteration: 142073
loss: 0.9922837615013123,grad_norm: 0.8580156618412748, iteration: 142074
loss: 1.0380885601043701,grad_norm: 0.9999997508490025, iteration: 142075
loss: 1.047316312789917,grad_norm: 0.9999996117630712, iteration: 142076
loss: 1.1169670820236206,grad_norm: 0.9999995340577362, iteration: 142077
loss: 1.1554065942764282,grad_norm: 0.9999997717264526, iteration: 142078
loss: 1.0326389074325562,grad_norm: 0.9999993102435233, iteration: 142079
loss: 1.1877998113632202,grad_norm: 0.9999999219464003, iteration: 142080
loss: 1.0180044174194336,grad_norm: 0.9158888406580946, iteration: 142081
loss: 1.0525574684143066,grad_norm: 0.9999992555414778, iteration: 142082
loss: 1.0349647998809814,grad_norm: 0.999999869401855, iteration: 142083
loss: 1.035837173461914,grad_norm: 0.9595313025891331, iteration: 142084
loss: 1.0363506078720093,grad_norm: 0.9999992917175543, iteration: 142085
loss: 1.2370835542678833,grad_norm: 0.9999995694518828, iteration: 142086
loss: 1.0831081867218018,grad_norm: 0.9999999064332649, iteration: 142087
loss: 0.980036199092865,grad_norm: 0.7004686730724925, iteration: 142088
loss: 1.0799757242202759,grad_norm: 0.9999994110118889, iteration: 142089
loss: 1.0756940841674805,grad_norm: 0.9999999309678511, iteration: 142090
loss: 1.1210414171218872,grad_norm: 0.9999998901354963, iteration: 142091
loss: 1.1412134170532227,grad_norm: 0.9999995778197728, iteration: 142092
loss: 1.096069097518921,grad_norm: 0.9999997777387167, iteration: 142093
loss: 0.9811318516731262,grad_norm: 0.9999989828796573, iteration: 142094
loss: 1.0720932483673096,grad_norm: 0.9999991893208908, iteration: 142095
loss: 1.126541018486023,grad_norm: 0.9999998885533976, iteration: 142096
loss: 1.0753639936447144,grad_norm: 0.9999996558882158, iteration: 142097
loss: 1.0496488809585571,grad_norm: 0.9999996222152319, iteration: 142098
loss: 1.0102766752243042,grad_norm: 0.9999997243542517, iteration: 142099
loss: 1.0645573139190674,grad_norm: 0.9999991022093782, iteration: 142100
loss: 1.0576597452163696,grad_norm: 0.9999991452294904, iteration: 142101
loss: 1.0056672096252441,grad_norm: 0.9999990926832657, iteration: 142102
loss: 1.0307646989822388,grad_norm: 0.9999998884128428, iteration: 142103
loss: 1.2838155031204224,grad_norm: 0.9999998813138367, iteration: 142104
loss: 1.1221436262130737,grad_norm: 0.9999998189523936, iteration: 142105
loss: 1.1652036905288696,grad_norm: 0.999999859860146, iteration: 142106
loss: 1.1428232192993164,grad_norm: 0.9999993893160847, iteration: 142107
loss: 1.1385127305984497,grad_norm: 0.9999995187282011, iteration: 142108
loss: 0.96567702293396,grad_norm: 0.9354288822576444, iteration: 142109
loss: 1.0307954549789429,grad_norm: 0.999999834440221, iteration: 142110
loss: 1.0877406597137451,grad_norm: 0.9999999300280233, iteration: 142111
loss: 1.0087964534759521,grad_norm: 0.9040539075724996, iteration: 142112
loss: 1.0038905143737793,grad_norm: 0.9830728299966447, iteration: 142113
loss: 0.9947715401649475,grad_norm: 0.9999992742290545, iteration: 142114
loss: 0.9765371084213257,grad_norm: 0.9918378525293161, iteration: 142115
loss: 0.9739139080047607,grad_norm: 0.8930611263974089, iteration: 142116
loss: 1.0533400774002075,grad_norm: 0.9999993631410787, iteration: 142117
loss: 1.018356442451477,grad_norm: 0.8119665329464161, iteration: 142118
loss: 1.1171647310256958,grad_norm: 0.9999997480550721, iteration: 142119
loss: 1.0863027572631836,grad_norm: 0.9999995704407849, iteration: 142120
loss: 1.0589956045150757,grad_norm: 0.9999995761731373, iteration: 142121
loss: 1.0942860841751099,grad_norm: 0.9999999571971745, iteration: 142122
loss: 1.072809100151062,grad_norm: 0.9999990945809586, iteration: 142123
loss: 1.0876023769378662,grad_norm: 0.9999997960178459, iteration: 142124
loss: 1.010237216949463,grad_norm: 0.8746979833131363, iteration: 142125
loss: 1.0962902307510376,grad_norm: 0.9999991653502515, iteration: 142126
loss: 1.033829689025879,grad_norm: 1.0000000053480593, iteration: 142127
loss: 0.9609898924827576,grad_norm: 0.9999994499774758, iteration: 142128
loss: 1.0302165746688843,grad_norm: 0.8119973900771595, iteration: 142129
loss: 1.003388524055481,grad_norm: 0.9999990846177353, iteration: 142130
loss: 1.0669465065002441,grad_norm: 0.9999995266987527, iteration: 142131
loss: 1.053802728652954,grad_norm: 0.9630266013294008, iteration: 142132
loss: 1.1006361246109009,grad_norm: 0.9999997889912093, iteration: 142133
loss: 1.117393136024475,grad_norm: 0.9999999864413487, iteration: 142134
loss: 1.0361524820327759,grad_norm: 0.9847951752604247, iteration: 142135
loss: 0.956643283367157,grad_norm: 0.9193067334222014, iteration: 142136
loss: 1.0348358154296875,grad_norm: 0.7594386908888894, iteration: 142137
loss: 1.0537139177322388,grad_norm: 0.9999995947756706, iteration: 142138
loss: 0.9750398397445679,grad_norm: 0.9999998384436793, iteration: 142139
loss: 1.1391205787658691,grad_norm: 0.9999998157023507, iteration: 142140
loss: 0.9888046383857727,grad_norm: 0.864668586221927, iteration: 142141
loss: 1.0411369800567627,grad_norm: 0.9923990380045933, iteration: 142142
loss: 1.0119670629501343,grad_norm: 0.9715872668036549, iteration: 142143
loss: 1.0435845851898193,grad_norm: 0.9999990552608633, iteration: 142144
loss: 1.0428098440170288,grad_norm: 0.9999995415792434, iteration: 142145
loss: 1.1105488538742065,grad_norm: 0.999999803597992, iteration: 142146
loss: 0.9966384172439575,grad_norm: 0.9999991576754734, iteration: 142147
loss: 0.9951813220977783,grad_norm: 0.8973228107143287, iteration: 142148
loss: 0.9980780482292175,grad_norm: 0.9542572009506004, iteration: 142149
loss: 1.243822693824768,grad_norm: 0.9999999343926821, iteration: 142150
loss: 0.997515082359314,grad_norm: 0.9999993734115596, iteration: 142151
loss: 1.023897647857666,grad_norm: 0.8451707659453358, iteration: 142152
loss: 1.0320042371749878,grad_norm: 0.9999992339565312, iteration: 142153
loss: 1.0568170547485352,grad_norm: 0.99999949062487, iteration: 142154
loss: 1.0395523309707642,grad_norm: 0.8038909756755944, iteration: 142155
loss: 0.9651281237602234,grad_norm: 0.7266076584092304, iteration: 142156
loss: 1.0841054916381836,grad_norm: 0.9999994326563243, iteration: 142157
loss: 1.034839391708374,grad_norm: 0.9999999044625922, iteration: 142158
loss: 1.078553318977356,grad_norm: 0.9999995239555391, iteration: 142159
loss: 1.1692395210266113,grad_norm: 1.000000018653368, iteration: 142160
loss: 0.9884184002876282,grad_norm: 0.9234647694517729, iteration: 142161
loss: 1.0816723108291626,grad_norm: 0.9999996074936712, iteration: 142162
loss: 1.0644676685333252,grad_norm: 0.9999994521337323, iteration: 142163
loss: 1.0046510696411133,grad_norm: 0.9999998570816157, iteration: 142164
loss: 1.2396368980407715,grad_norm: 0.9999999457947459, iteration: 142165
loss: 1.124585747718811,grad_norm: 0.9999993827057807, iteration: 142166
loss: 1.032239556312561,grad_norm: 0.9999994317434698, iteration: 142167
loss: 1.0227906703948975,grad_norm: 0.8069480134628613, iteration: 142168
loss: 1.0262115001678467,grad_norm: 0.9999995776257152, iteration: 142169
loss: 1.0857831239700317,grad_norm: 0.9999998466580104, iteration: 142170
loss: 0.9677451848983765,grad_norm: 0.9086078280133673, iteration: 142171
loss: 1.1218572854995728,grad_norm: 0.9999993461923673, iteration: 142172
loss: 1.022127389907837,grad_norm: 0.9999998053687524, iteration: 142173
loss: 0.9631972312927246,grad_norm: 0.8103744620986008, iteration: 142174
loss: 1.0016543865203857,grad_norm: 0.7935923476793062, iteration: 142175
loss: 1.0193291902542114,grad_norm: 0.9999991820891895, iteration: 142176
loss: 1.0368460416793823,grad_norm: 0.9999991617270526, iteration: 142177
loss: 1.0743968486785889,grad_norm: 0.9999998908681066, iteration: 142178
loss: 1.031241774559021,grad_norm: 0.9999997353586365, iteration: 142179
loss: 1.0881125926971436,grad_norm: 0.9999990995672745, iteration: 142180
loss: 1.0072741508483887,grad_norm: 0.8655070724497855, iteration: 142181
loss: 0.9947391748428345,grad_norm: 0.9999990601105222, iteration: 142182
loss: 1.0271539688110352,grad_norm: 0.9999996371752063, iteration: 142183
loss: 1.0393067598342896,grad_norm: 0.9999992953255858, iteration: 142184
loss: 1.029626488685608,grad_norm: 0.9999993105256892, iteration: 142185
loss: 1.1206165552139282,grad_norm: 0.9999997638567548, iteration: 142186
loss: 1.0414632558822632,grad_norm: 0.9999990272175447, iteration: 142187
loss: 1.0738714933395386,grad_norm: 0.9999996331627048, iteration: 142188
loss: 1.031999111175537,grad_norm: 0.9999993546494509, iteration: 142189
loss: 1.1045112609863281,grad_norm: 0.99999997010895, iteration: 142190
loss: 0.9816807508468628,grad_norm: 0.9360261382620249, iteration: 142191
loss: 1.1633895635604858,grad_norm: 0.9999997321154862, iteration: 142192
loss: 1.0222430229187012,grad_norm: 0.9999990745494589, iteration: 142193
loss: 1.0588185787200928,grad_norm: 0.9999997790423418, iteration: 142194
loss: 1.1490250825881958,grad_norm: 0.9999996544751124, iteration: 142195
loss: 1.0597217082977295,grad_norm: 0.9999995172364738, iteration: 142196
loss: 1.0557246208190918,grad_norm: 0.9999996367407537, iteration: 142197
loss: 1.0157456398010254,grad_norm: 0.881041060622845, iteration: 142198
loss: 0.9712806940078735,grad_norm: 0.778936311577285, iteration: 142199
loss: 0.9816390872001648,grad_norm: 0.9999991767141952, iteration: 142200
loss: 1.0507429838180542,grad_norm: 0.9999998334708369, iteration: 142201
loss: 0.9926642775535583,grad_norm: 0.9999997242967538, iteration: 142202
loss: 1.0469648838043213,grad_norm: 0.9999995146420846, iteration: 142203
loss: 1.0300633907318115,grad_norm: 0.9999992684742495, iteration: 142204
loss: 1.0962269306182861,grad_norm: 0.999999227067745, iteration: 142205
loss: 0.9929460883140564,grad_norm: 0.9999991323949065, iteration: 142206
loss: 1.1879479885101318,grad_norm: 0.999999707693574, iteration: 142207
loss: 1.0786553621292114,grad_norm: 0.9999993914679662, iteration: 142208
loss: 0.9907039999961853,grad_norm: 0.8286186368437402, iteration: 142209
loss: 1.0166351795196533,grad_norm: 0.9561870686590431, iteration: 142210
loss: 1.0533193349838257,grad_norm: 0.9999993458754933, iteration: 142211
loss: 1.0552815198898315,grad_norm: 0.9999998966756973, iteration: 142212
loss: 1.0449057817459106,grad_norm: 0.9999994005808266, iteration: 142213
loss: 0.9805909991264343,grad_norm: 0.9999989978305492, iteration: 142214
loss: 1.0409563779830933,grad_norm: 0.9999994029338544, iteration: 142215
loss: 1.0618922710418701,grad_norm: 0.9999994055113804, iteration: 142216
loss: 1.2250316143035889,grad_norm: 0.9999998723490624, iteration: 142217
loss: 1.0698235034942627,grad_norm: 0.9999998028330521, iteration: 142218
loss: 1.05209481716156,grad_norm: 0.9999991372421443, iteration: 142219
loss: 1.0423943996429443,grad_norm: 0.9999993993584657, iteration: 142220
loss: 1.0530266761779785,grad_norm: 0.999999719705809, iteration: 142221
loss: 1.0542200803756714,grad_norm: 0.9999997819854054, iteration: 142222
loss: 1.0095151662826538,grad_norm: 0.9999990129430648, iteration: 142223
loss: 1.115850806236267,grad_norm: 0.9999995562823886, iteration: 142224
loss: 1.0173828601837158,grad_norm: 0.9999999269455905, iteration: 142225
loss: 1.0165749788284302,grad_norm: 0.9999995548673194, iteration: 142226
loss: 1.0394835472106934,grad_norm: 0.9974841170533678, iteration: 142227
loss: 1.4110572338104248,grad_norm: 0.9999997700404702, iteration: 142228
loss: 1.1999404430389404,grad_norm: 0.9999998613170284, iteration: 142229
loss: 1.105205774307251,grad_norm: 0.9999998592195303, iteration: 142230
loss: 1.0703564882278442,grad_norm: 0.9382850450668058, iteration: 142231
loss: 1.0124232769012451,grad_norm: 0.9999993376607488, iteration: 142232
loss: 1.050479769706726,grad_norm: 0.9999999191630212, iteration: 142233
loss: 0.9420095086097717,grad_norm: 0.9999990063061044, iteration: 142234
loss: 1.0454833507537842,grad_norm: 0.8842233898817705, iteration: 142235
loss: 0.9769797325134277,grad_norm: 0.9666171462415504, iteration: 142236
loss: 1.0358127355575562,grad_norm: 0.9999992793847704, iteration: 142237
loss: 1.0420079231262207,grad_norm: 0.999999288023143, iteration: 142238
loss: 0.9762092232704163,grad_norm: 0.8372955980717945, iteration: 142239
loss: 1.0617051124572754,grad_norm: 0.9999991228344859, iteration: 142240
loss: 1.127193570137024,grad_norm: 0.9999999091494147, iteration: 142241
loss: 1.0388590097427368,grad_norm: 0.9999992476564287, iteration: 142242
loss: 1.0229517221450806,grad_norm: 0.9999997058975313, iteration: 142243
loss: 1.1858127117156982,grad_norm: 0.9999996952153309, iteration: 142244
loss: 1.0156934261322021,grad_norm: 0.9999990531913036, iteration: 142245
loss: 1.021146297454834,grad_norm: 0.9999991292747977, iteration: 142246
loss: 1.0195845365524292,grad_norm: 0.9999997545625559, iteration: 142247
loss: 1.0289037227630615,grad_norm: 0.9280273203333288, iteration: 142248
loss: 1.0747326612472534,grad_norm: 0.9999994251194172, iteration: 142249
loss: 1.066102147102356,grad_norm: 0.9999997260415815, iteration: 142250
loss: 1.128649353981018,grad_norm: 0.9999997373669549, iteration: 142251
loss: 1.0679688453674316,grad_norm: 0.9491003049883362, iteration: 142252
loss: 0.9593671560287476,grad_norm: 0.9999989734064492, iteration: 142253
loss: 1.1635572910308838,grad_norm: 0.9999996980553624, iteration: 142254
loss: 0.9758653044700623,grad_norm: 0.7860525722120967, iteration: 142255
loss: 1.0858066082000732,grad_norm: 0.9999994293099048, iteration: 142256
loss: 1.0427311658859253,grad_norm: 0.9999994508771881, iteration: 142257
loss: 1.03461754322052,grad_norm: 0.99999991538943, iteration: 142258
loss: 1.0595461130142212,grad_norm: 0.9999994834268018, iteration: 142259
loss: 1.0294685363769531,grad_norm: 0.9517653596481152, iteration: 142260
loss: 1.0928845405578613,grad_norm: 0.999999295623983, iteration: 142261
loss: 1.1137281656265259,grad_norm: 0.9999997746629752, iteration: 142262
loss: 1.0391788482666016,grad_norm: 0.999999047725549, iteration: 142263
loss: 1.0076929330825806,grad_norm: 0.8762320853930816, iteration: 142264
loss: 1.0249285697937012,grad_norm: 0.8797018229413278, iteration: 142265
loss: 0.9842028617858887,grad_norm: 0.8780569048529647, iteration: 142266
loss: 1.1394141912460327,grad_norm: 0.9999996480428646, iteration: 142267
loss: 1.0151311159133911,grad_norm: 0.9430064836190477, iteration: 142268
loss: 0.9985714554786682,grad_norm: 0.7727367555508238, iteration: 142269
loss: 1.0066230297088623,grad_norm: 0.9999989982644572, iteration: 142270
loss: 1.1416947841644287,grad_norm: 0.9999998460088125, iteration: 142271
loss: 1.2277302742004395,grad_norm: 0.9999996288457459, iteration: 142272
loss: 1.0520737171173096,grad_norm: 0.9999991232277726, iteration: 142273
loss: 0.9601787328720093,grad_norm: 0.9999992155959285, iteration: 142274
loss: 1.0438355207443237,grad_norm: 0.9999997114509284, iteration: 142275
loss: 1.101606845855713,grad_norm: 0.9999994410564306, iteration: 142276
loss: 1.001660704612732,grad_norm: 0.9999998692334272, iteration: 142277
loss: 1.0062321424484253,grad_norm: 0.9999990391924619, iteration: 142278
loss: 1.0055975914001465,grad_norm: 0.9999999477118356, iteration: 142279
loss: 1.146619200706482,grad_norm: 0.99999952064648, iteration: 142280
loss: 1.0902880430221558,grad_norm: 0.9589478876543605, iteration: 142281
loss: 1.178039789199829,grad_norm: 0.9999999878097258, iteration: 142282
loss: 1.4303447008132935,grad_norm: 0.9999997925289036, iteration: 142283
loss: 1.0828441381454468,grad_norm: 0.9782697296313247, iteration: 142284
loss: 1.0435290336608887,grad_norm: 0.9999998736504117, iteration: 142285
loss: 1.0022666454315186,grad_norm: 0.9999992531473334, iteration: 142286
loss: 1.142521619796753,grad_norm: 0.9999994754577318, iteration: 142287
loss: 1.0463120937347412,grad_norm: 0.9999998303737564, iteration: 142288
loss: 1.0816900730133057,grad_norm: 0.9999995457933027, iteration: 142289
loss: 1.0843257904052734,grad_norm: 0.9999994747925784, iteration: 142290
loss: 1.0194429159164429,grad_norm: 0.9999992305062138, iteration: 142291
loss: 1.0667312145233154,grad_norm: 0.9999993328157225, iteration: 142292
loss: 1.0057382583618164,grad_norm: 0.9999992775245333, iteration: 142293
loss: 1.1940053701400757,grad_norm: 0.9999998739488792, iteration: 142294
loss: 1.0500152111053467,grad_norm: 0.9646398031545437, iteration: 142295
loss: 1.2277809381484985,grad_norm: 0.9999994421149252, iteration: 142296
loss: 1.1302295923233032,grad_norm: 0.9999996221138584, iteration: 142297
loss: 1.0379732847213745,grad_norm: 0.9999992560398923, iteration: 142298
loss: 1.1876505613327026,grad_norm: 0.999999680912963, iteration: 142299
loss: 0.9930742979049683,grad_norm: 0.9999994894848789, iteration: 142300
loss: 1.213228702545166,grad_norm: 0.9999996620145207, iteration: 142301
loss: 1.0231740474700928,grad_norm: 0.8507139414478385, iteration: 142302
loss: 1.0805624723434448,grad_norm: 0.9999993092577483, iteration: 142303
loss: 1.0207260847091675,grad_norm: 0.8511746612709729, iteration: 142304
loss: 0.9920269846916199,grad_norm: 0.8611166670520664, iteration: 142305
loss: 1.2073333263397217,grad_norm: 0.9999998324201247, iteration: 142306
loss: 1.1149919033050537,grad_norm: 1.0000000205804758, iteration: 142307
loss: 1.0213514566421509,grad_norm: 0.9189438116256174, iteration: 142308
loss: 1.2789236307144165,grad_norm: 0.9999999552461126, iteration: 142309
loss: 1.0246981382369995,grad_norm: 0.9999998484385997, iteration: 142310
loss: 1.0477105379104614,grad_norm: 0.9999991894760789, iteration: 142311
loss: 1.1890251636505127,grad_norm: 0.9999999395671514, iteration: 142312
loss: 1.0653902292251587,grad_norm: 0.9999995954878662, iteration: 142313
loss: 1.1696743965148926,grad_norm: 0.9999993756367752, iteration: 142314
loss: 1.2527159452438354,grad_norm: 0.9999995928288948, iteration: 142315
loss: 1.016999363899231,grad_norm: 0.8341385348456309, iteration: 142316
loss: 0.9849516749382019,grad_norm: 0.9999999688891285, iteration: 142317
loss: 1.0363585948944092,grad_norm: 0.9999997378654331, iteration: 142318
loss: 1.0323207378387451,grad_norm: 0.9999992288755557, iteration: 142319
loss: 1.0383880138397217,grad_norm: 0.9804664545398093, iteration: 142320
loss: 1.0176316499710083,grad_norm: 0.966874162465958, iteration: 142321
loss: 1.101736068725586,grad_norm: 0.9999994750394889, iteration: 142322
loss: 1.0570576190948486,grad_norm: 0.9999993514254065, iteration: 142323
loss: 1.0190002918243408,grad_norm: 0.9999991724174081, iteration: 142324
loss: 1.080600380897522,grad_norm: 0.9999992158086636, iteration: 142325
loss: 0.9974837303161621,grad_norm: 0.7543549568735726, iteration: 142326
loss: 1.0546374320983887,grad_norm: 0.9999996447159087, iteration: 142327
loss: 1.0367770195007324,grad_norm: 0.8633347843620391, iteration: 142328
loss: 1.1135554313659668,grad_norm: 0.9999993519632109, iteration: 142329
loss: 1.041855812072754,grad_norm: 0.8197804226274139, iteration: 142330
loss: 1.0991127490997314,grad_norm: 0.9999996024635722, iteration: 142331
loss: 1.1232540607452393,grad_norm: 0.9999995862685959, iteration: 142332
loss: 1.0487734079360962,grad_norm: 0.9999995336437337, iteration: 142333
loss: 1.0484758615493774,grad_norm: 0.999999200708305, iteration: 142334
loss: 1.0023386478424072,grad_norm: 0.999999352292293, iteration: 142335
loss: 1.0518884658813477,grad_norm: 0.999999301016822, iteration: 142336
loss: 1.0007197856903076,grad_norm: 0.8085385283826793, iteration: 142337
loss: 1.058950662612915,grad_norm: 0.99999936238909, iteration: 142338
loss: 1.0482652187347412,grad_norm: 0.9999992629945009, iteration: 142339
loss: 1.0343623161315918,grad_norm: 0.9999994498427701, iteration: 142340
loss: 1.0681982040405273,grad_norm: 0.9999997088516553, iteration: 142341
loss: 1.0328081846237183,grad_norm: 0.9999991643164149, iteration: 142342
loss: 1.0478981733322144,grad_norm: 0.9999991392046107, iteration: 142343
loss: 1.0036571025848389,grad_norm: 0.9999995991706776, iteration: 142344
loss: 1.0015075206756592,grad_norm: 0.9013492188420864, iteration: 142345
loss: 1.0874066352844238,grad_norm: 0.9999996938115135, iteration: 142346
loss: 1.0684747695922852,grad_norm: 0.999999459914711, iteration: 142347
loss: 1.0211265087127686,grad_norm: 0.999999068139841, iteration: 142348
loss: 0.9799591302871704,grad_norm: 0.9999995096969733, iteration: 142349
loss: 1.120240330696106,grad_norm: 0.9999993541991982, iteration: 142350
loss: 0.989292323589325,grad_norm: 0.9895261745286658, iteration: 142351
loss: 0.9864696264266968,grad_norm: 0.9999994972074857, iteration: 142352
loss: 1.0422555208206177,grad_norm: 0.9999998903801897, iteration: 142353
loss: 1.0303630828857422,grad_norm: 0.9999993698402851, iteration: 142354
loss: 1.151845932006836,grad_norm: 1.0000000273107452, iteration: 142355
loss: 0.9943277835845947,grad_norm: 0.9999995869478007, iteration: 142356
loss: 0.975169837474823,grad_norm: 0.8067555990261046, iteration: 142357
loss: 1.0019575357437134,grad_norm: 0.9338065867163755, iteration: 142358
loss: 1.1012223958969116,grad_norm: 0.9999994024729164, iteration: 142359
loss: 1.147406816482544,grad_norm: 0.9999991511588278, iteration: 142360
loss: 1.2588927745819092,grad_norm: 0.9999998398403249, iteration: 142361
loss: 0.9960007071495056,grad_norm: 0.9646993976325188, iteration: 142362
loss: 1.1739124059677124,grad_norm: 0.9999991655711657, iteration: 142363
loss: 1.2231945991516113,grad_norm: 0.9999996649402246, iteration: 142364
loss: 1.0794142484664917,grad_norm: 0.9999997619076683, iteration: 142365
loss: 1.0769151449203491,grad_norm: 0.9999993143610568, iteration: 142366
loss: 1.0467422008514404,grad_norm: 0.8717485294002053, iteration: 142367
loss: 1.0495038032531738,grad_norm: 0.9999991142220908, iteration: 142368
loss: 1.1203476190567017,grad_norm: 0.9999999988041187, iteration: 142369
loss: 1.0101295709609985,grad_norm: 0.9873773420365827, iteration: 142370
loss: 0.9983649849891663,grad_norm: 0.8924951218091021, iteration: 142371
loss: 1.2701460123062134,grad_norm: 0.9999999614201925, iteration: 142372
loss: 1.014797329902649,grad_norm: 0.9255632377994334, iteration: 142373
loss: 1.0392640829086304,grad_norm: 0.984947026387376, iteration: 142374
loss: 1.0727900266647339,grad_norm: 0.9999994280229708, iteration: 142375
loss: 1.081965446472168,grad_norm: 0.9999998625585373, iteration: 142376
loss: 1.083738923072815,grad_norm: 0.9999993261047141, iteration: 142377
loss: 1.1333717107772827,grad_norm: 0.9999996642133999, iteration: 142378
loss: 1.0138293504714966,grad_norm: 0.9999995168123267, iteration: 142379
loss: 1.079871416091919,grad_norm: 0.999999156734518, iteration: 142380
loss: 1.16981840133667,grad_norm: 0.9999999007196119, iteration: 142381
loss: 1.0460509061813354,grad_norm: 0.9215213087803273, iteration: 142382
loss: 1.1447018384933472,grad_norm: 0.9999990391024695, iteration: 142383
loss: 1.0421595573425293,grad_norm: 0.9999994015055459, iteration: 142384
loss: 1.1353099346160889,grad_norm: 0.9999999559673808, iteration: 142385
loss: 1.0234429836273193,grad_norm: 0.999999922419404, iteration: 142386
loss: 1.0441988706588745,grad_norm: 0.9999997998702889, iteration: 142387
loss: 1.0552828311920166,grad_norm: 0.9999997257455323, iteration: 142388
loss: 1.0359739065170288,grad_norm: 0.9999995002770072, iteration: 142389
loss: 1.0933470726013184,grad_norm: 0.9999996297670241, iteration: 142390
loss: 1.1285150051116943,grad_norm: 0.9999998603527137, iteration: 142391
loss: 0.9973960518836975,grad_norm: 0.9999999876187757, iteration: 142392
loss: 1.0756800174713135,grad_norm: 0.9999997615705914, iteration: 142393
loss: 0.9737301468849182,grad_norm: 0.7964589420072052, iteration: 142394
loss: 1.02219557762146,grad_norm: 0.8609855317668329, iteration: 142395
loss: 1.038081169128418,grad_norm: 1.0000000678261425, iteration: 142396
loss: 1.1139148473739624,grad_norm: 0.9999991852022908, iteration: 142397
loss: 1.0239239931106567,grad_norm: 0.9999991327077552, iteration: 142398
loss: 1.0633951425552368,grad_norm: 0.9999995803551271, iteration: 142399
loss: 1.0969994068145752,grad_norm: 0.9999994578493249, iteration: 142400
loss: 1.0479832887649536,grad_norm: 0.999999323730876, iteration: 142401
loss: 0.984016478061676,grad_norm: 0.9999989689780171, iteration: 142402
loss: 0.962036669254303,grad_norm: 0.9967347732119505, iteration: 142403
loss: 1.0006039142608643,grad_norm: 0.9999993441683698, iteration: 142404
loss: 1.2439788579940796,grad_norm: 0.9999995215790143, iteration: 142405
loss: 1.0410715341567993,grad_norm: 0.8535018036967041, iteration: 142406
loss: 1.0193802118301392,grad_norm: 0.9928361479300444, iteration: 142407
loss: 1.0229467153549194,grad_norm: 0.8098384088717023, iteration: 142408
loss: 1.0245921611785889,grad_norm: 0.9999997307210046, iteration: 142409
loss: 1.0056123733520508,grad_norm: 0.8873231786872771, iteration: 142410
loss: 1.0474578142166138,grad_norm: 0.9999996606471402, iteration: 142411
loss: 0.9663702249526978,grad_norm: 0.9999991557683432, iteration: 142412
loss: 1.100565791130066,grad_norm: 0.9999998742570579, iteration: 142413
loss: 1.0143120288848877,grad_norm: 0.9999992874216583, iteration: 142414
loss: 1.1063261032104492,grad_norm: 0.9999998609799589, iteration: 142415
loss: 1.0248676538467407,grad_norm: 0.9999993162197223, iteration: 142416
loss: 1.057694673538208,grad_norm: 0.999999870178001, iteration: 142417
loss: 1.170507788658142,grad_norm: 0.9999995231647603, iteration: 142418
loss: 1.111265778541565,grad_norm: 0.9999996833183759, iteration: 142419
loss: 1.0502548217773438,grad_norm: 0.9999995847869948, iteration: 142420
loss: 1.0489388704299927,grad_norm: 0.8307061823857876, iteration: 142421
loss: 1.1017632484436035,grad_norm: 0.9999997689053882, iteration: 142422
loss: 1.067647099494934,grad_norm: 0.9999991591900822, iteration: 142423
loss: 0.9631142616271973,grad_norm: 0.8898438190784314, iteration: 142424
loss: 1.0796692371368408,grad_norm: 0.9999994369682506, iteration: 142425
loss: 1.03055739402771,grad_norm: 0.9999990729621036, iteration: 142426
loss: 1.0825942754745483,grad_norm: 0.9999999132289985, iteration: 142427
loss: 1.0266817808151245,grad_norm: 0.9999992203934964, iteration: 142428
loss: 0.9899033904075623,grad_norm: 0.8797401028847446, iteration: 142429
loss: 0.9886980056762695,grad_norm: 0.9906112862926834, iteration: 142430
loss: 1.050898790359497,grad_norm: 0.9999998227368057, iteration: 142431
loss: 1.2204957008361816,grad_norm: 0.9999999411739318, iteration: 142432
loss: 0.9915915131568909,grad_norm: 0.9461228752031781, iteration: 142433
loss: 1.0640430450439453,grad_norm: 0.9999991513211848, iteration: 142434
loss: 1.0015223026275635,grad_norm: 0.9999999592554325, iteration: 142435
loss: 1.027769923210144,grad_norm: 0.9987669574811274, iteration: 142436
loss: 1.1821244955062866,grad_norm: 0.9999994907977426, iteration: 142437
loss: 1.0712369680404663,grad_norm: 0.9999999908086634, iteration: 142438
loss: 0.9898251295089722,grad_norm: 0.9999990540019454, iteration: 142439
loss: 1.1677249670028687,grad_norm: 0.9999996112399551, iteration: 142440
loss: 1.0337210893630981,grad_norm: 0.7650820671898804, iteration: 142441
loss: 1.0503462553024292,grad_norm: 0.9999990529987732, iteration: 142442
loss: 1.0327823162078857,grad_norm: 0.9999997262412663, iteration: 142443
loss: 0.9976269602775574,grad_norm: 0.9999997854913273, iteration: 142444
loss: 1.0941989421844482,grad_norm: 1.0000000463687213, iteration: 142445
loss: 1.0873416662216187,grad_norm: 0.9999996276933475, iteration: 142446
loss: 1.0585778951644897,grad_norm: 0.9999997964833776, iteration: 142447
loss: 1.0153201818466187,grad_norm: 0.9999996852542962, iteration: 142448
loss: 1.0823183059692383,grad_norm: 0.9999998403719091, iteration: 142449
loss: 1.0622674226760864,grad_norm: 0.9999999538683164, iteration: 142450
loss: 1.2090626955032349,grad_norm: 0.9999996720127002, iteration: 142451
loss: 1.030508279800415,grad_norm: 0.8623443600252926, iteration: 142452
loss: 1.0017433166503906,grad_norm: 0.8640807560442264, iteration: 142453
loss: 1.2596700191497803,grad_norm: 0.9999995191072738, iteration: 142454
loss: 1.085909128189087,grad_norm: 0.873133394235262, iteration: 142455
loss: 1.0648260116577148,grad_norm: 0.9999998082228135, iteration: 142456
loss: 1.2090511322021484,grad_norm: 0.9999992014777195, iteration: 142457
loss: 1.017574429512024,grad_norm: 0.9999998254903648, iteration: 142458
loss: 1.1975468397140503,grad_norm: 0.9999995823844148, iteration: 142459
loss: 1.0832172632217407,grad_norm: 0.9999991690568085, iteration: 142460
loss: 1.0323896408081055,grad_norm: 0.9298664337168858, iteration: 142461
loss: 1.0653496980667114,grad_norm: 0.9999999960865597, iteration: 142462
loss: 0.9906885027885437,grad_norm: 0.999999345991892, iteration: 142463
loss: 1.1031274795532227,grad_norm: 0.9999993408718403, iteration: 142464
loss: 1.0817385911941528,grad_norm: 0.9999997809984394, iteration: 142465
loss: 1.0360565185546875,grad_norm: 0.9999994148064493, iteration: 142466
loss: 1.137834906578064,grad_norm: 0.9999992108371022, iteration: 142467
loss: 1.0119918584823608,grad_norm: 0.9999989756645792, iteration: 142468
loss: 0.9586781859397888,grad_norm: 0.9135148413977934, iteration: 142469
loss: 1.0250802040100098,grad_norm: 0.9197993903384049, iteration: 142470
loss: 1.0092357397079468,grad_norm: 0.9377306125320285, iteration: 142471
loss: 1.0163617134094238,grad_norm: 0.999999876931483, iteration: 142472
loss: 1.1093186140060425,grad_norm: 0.9999994916961166, iteration: 142473
loss: 1.0235713720321655,grad_norm: 0.9999998835190127, iteration: 142474
loss: 1.1177542209625244,grad_norm: 0.9999998830382708, iteration: 142475
loss: 0.9873679280281067,grad_norm: 0.8109637788263139, iteration: 142476
loss: 1.0435208082199097,grad_norm: 0.9999993748598017, iteration: 142477
loss: 0.989168643951416,grad_norm: 0.8231229994024738, iteration: 142478
loss: 1.1403475999832153,grad_norm: 0.9999995971945238, iteration: 142479
loss: 1.1267991065979004,grad_norm: 1.000000093429694, iteration: 142480
loss: 1.165944218635559,grad_norm: 0.9999998636153394, iteration: 142481
loss: 1.078731656074524,grad_norm: 0.9999995816726971, iteration: 142482
loss: 1.028806447982788,grad_norm: 0.9999998824700178, iteration: 142483
loss: 1.0681467056274414,grad_norm: 0.9999995015716914, iteration: 142484
loss: 1.0530149936676025,grad_norm: 0.9999991406439177, iteration: 142485
loss: 0.9759415984153748,grad_norm: 0.932781840373971, iteration: 142486
loss: 1.067720651626587,grad_norm: 0.9999990563817652, iteration: 142487
loss: 1.2037104368209839,grad_norm: 1.0000000433664042, iteration: 142488
loss: 1.0283530950546265,grad_norm: 0.9058290484626114, iteration: 142489
loss: 1.018390417098999,grad_norm: 0.7544726964821225, iteration: 142490
loss: 1.1782164573669434,grad_norm: 0.9999997927705783, iteration: 142491
loss: 1.052051067352295,grad_norm: 0.9999996378734681, iteration: 142492
loss: 1.077353596687317,grad_norm: 0.8413913119154829, iteration: 142493
loss: 1.1171373128890991,grad_norm: 0.9999995984729644, iteration: 142494
loss: 1.0440223217010498,grad_norm: 0.9999997669811171, iteration: 142495
loss: 1.081724762916565,grad_norm: 0.9999999214830273, iteration: 142496
loss: 1.0699951648712158,grad_norm: 0.9865363559570853, iteration: 142497
loss: 1.0470912456512451,grad_norm: 0.9999993458643051, iteration: 142498
loss: 1.103823184967041,grad_norm: 0.9999992816998304, iteration: 142499
loss: 1.0338736772537231,grad_norm: 0.9999994542825316, iteration: 142500
loss: 1.0221340656280518,grad_norm: 0.9999995120961876, iteration: 142501
loss: 1.0552107095718384,grad_norm: 0.9999999641495143, iteration: 142502
loss: 1.122660517692566,grad_norm: 0.9999994613916307, iteration: 142503
loss: 1.0653177499771118,grad_norm: 0.9999998926161536, iteration: 142504
loss: 0.9913276433944702,grad_norm: 0.9999997561382566, iteration: 142505
loss: 1.0137863159179688,grad_norm: 0.9999990523127218, iteration: 142506
loss: 1.0680800676345825,grad_norm: 0.9999999246055469, iteration: 142507
loss: 1.0340811014175415,grad_norm: 0.9999990922786868, iteration: 142508
loss: 1.191002607345581,grad_norm: 0.9999997022150382, iteration: 142509
loss: 1.0686426162719727,grad_norm: 0.9999989916655495, iteration: 142510
loss: 0.9929918646812439,grad_norm: 0.9999992835184106, iteration: 142511
loss: 1.1116297245025635,grad_norm: 0.99999911768236, iteration: 142512
loss: 0.9967087507247925,grad_norm: 0.8587540890374066, iteration: 142513
loss: 1.036818504333496,grad_norm: 0.9999993544920156, iteration: 142514
loss: 1.0571889877319336,grad_norm: 0.9999992535459326, iteration: 142515
loss: 0.9944066405296326,grad_norm: 0.963148341705979, iteration: 142516
loss: 1.0329326391220093,grad_norm: 0.9999995591181, iteration: 142517
loss: 1.1062206029891968,grad_norm: 0.9999999077689048, iteration: 142518
loss: 0.9940044283866882,grad_norm: 0.8579922186058322, iteration: 142519
loss: 1.2016042470932007,grad_norm: 0.9999993951528856, iteration: 142520
loss: 1.038112759590149,grad_norm: 0.9999990965986014, iteration: 142521
loss: 1.0689902305603027,grad_norm: 0.8878985002678146, iteration: 142522
loss: 1.0085511207580566,grad_norm: 0.9999991609399099, iteration: 142523
loss: 1.1911005973815918,grad_norm: 0.9999996844544898, iteration: 142524
loss: 1.0348387956619263,grad_norm: 0.999999178223933, iteration: 142525
loss: 1.0893187522888184,grad_norm: 0.9999993006639138, iteration: 142526
loss: 1.062837839126587,grad_norm: 0.999999627445925, iteration: 142527
loss: 0.999280571937561,grad_norm: 0.6918316565108845, iteration: 142528
loss: 1.0188980102539062,grad_norm: 0.999999705384247, iteration: 142529
loss: 1.0045219659805298,grad_norm: 0.9136127364655311, iteration: 142530
loss: 1.0348987579345703,grad_norm: 0.9999989731277906, iteration: 142531
loss: 1.170052170753479,grad_norm: 1.0000000138240333, iteration: 142532
loss: 1.2618346214294434,grad_norm: 0.9999992377903598, iteration: 142533
loss: 1.1357747316360474,grad_norm: 0.9999996079002732, iteration: 142534
loss: 1.0883973836898804,grad_norm: 0.9999994349427844, iteration: 142535
loss: 1.4056352376937866,grad_norm: 1.0000001237047489, iteration: 142536
loss: 0.9772850275039673,grad_norm: 0.999999567818676, iteration: 142537
loss: 1.1703497171401978,grad_norm: 0.9999992079644804, iteration: 142538
loss: 1.0904711484909058,grad_norm: 0.9999999358405605, iteration: 142539
loss: 1.1257885694503784,grad_norm: 0.9999992303663916, iteration: 142540
loss: 1.3155936002731323,grad_norm: 0.999999993263053, iteration: 142541
loss: 1.0794695615768433,grad_norm: 0.9999994330349621, iteration: 142542
loss: 1.082713007926941,grad_norm: 0.9999998637685279, iteration: 142543
loss: 1.076601266860962,grad_norm: 0.9999992636468007, iteration: 142544
loss: 1.0270915031433105,grad_norm: 0.8192630901112496, iteration: 142545
loss: 1.1159225702285767,grad_norm: 0.9999997580351975, iteration: 142546
loss: 1.096487045288086,grad_norm: 0.9999998787494333, iteration: 142547
loss: 1.0400917530059814,grad_norm: 0.9999997558861051, iteration: 142548
loss: 1.0159578323364258,grad_norm: 1.0000000126907103, iteration: 142549
loss: 1.0508326292037964,grad_norm: 0.9740988621450335, iteration: 142550
loss: 1.071410059928894,grad_norm: 0.9999999423124519, iteration: 142551
loss: 1.3147094249725342,grad_norm: 0.9999998369234111, iteration: 142552
loss: 1.0531545877456665,grad_norm: 0.9999991687378416, iteration: 142553
loss: 1.0850175619125366,grad_norm: 0.9999994232451976, iteration: 142554
loss: 1.0343332290649414,grad_norm: 0.9816145423820438, iteration: 142555
loss: 1.147292971611023,grad_norm: 0.9999999187081046, iteration: 142556
loss: 1.1407796144485474,grad_norm: 0.9999999849421881, iteration: 142557
loss: 1.0371166467666626,grad_norm: 0.9999994902338035, iteration: 142558
loss: 1.045351505279541,grad_norm: 0.9969192588753097, iteration: 142559
loss: 1.0613542795181274,grad_norm: 0.9999999655493083, iteration: 142560
loss: 1.0305792093276978,grad_norm: 0.9999992610607745, iteration: 142561
loss: 1.0355548858642578,grad_norm: 0.8791839524788401, iteration: 142562
loss: 1.0132339000701904,grad_norm: 0.9999992452541173, iteration: 142563
loss: 1.0427945852279663,grad_norm: 0.9999995595798088, iteration: 142564
loss: 1.001623511314392,grad_norm: 0.999999807799096, iteration: 142565
loss: 1.1061629056930542,grad_norm: 0.9999997122645603, iteration: 142566
loss: 1.114915370941162,grad_norm: 0.9999992417096342, iteration: 142567
loss: 1.0675463676452637,grad_norm: 0.9999997791139011, iteration: 142568
loss: 1.001538634300232,grad_norm: 0.87412795052646, iteration: 142569
loss: 1.0398722887039185,grad_norm: 0.9424337261287092, iteration: 142570
loss: 1.000929355621338,grad_norm: 0.9999995057541436, iteration: 142571
loss: 0.9779609441757202,grad_norm: 0.9999993028658688, iteration: 142572
loss: 1.0865628719329834,grad_norm: 0.9999991056943452, iteration: 142573
loss: 1.0295255184173584,grad_norm: 0.9999998942051306, iteration: 142574
loss: 1.047175645828247,grad_norm: 0.9999991105282917, iteration: 142575
loss: 1.050997257232666,grad_norm: 0.9999995544570339, iteration: 142576
loss: 1.0128960609436035,grad_norm: 0.9999992729342119, iteration: 142577
loss: 1.1067488193511963,grad_norm: 0.9999990240544582, iteration: 142578
loss: 1.2016130685806274,grad_norm: 0.999999907489881, iteration: 142579
loss: 0.9947463274002075,grad_norm: 0.849500975053258, iteration: 142580
loss: 1.0307493209838867,grad_norm: 0.9999995338905663, iteration: 142581
loss: 1.1109455823898315,grad_norm: 0.9872667023985109, iteration: 142582
loss: 1.0009207725524902,grad_norm: 0.9999995635365628, iteration: 142583
loss: 1.095644474029541,grad_norm: 1.0000000058673706, iteration: 142584
loss: 1.04949152469635,grad_norm: 0.9999996501177962, iteration: 142585
loss: 1.0051071643829346,grad_norm: 0.7814815217460631, iteration: 142586
loss: 1.0120619535446167,grad_norm: 0.9999990805725502, iteration: 142587
loss: 0.9883703589439392,grad_norm: 0.7965733577406893, iteration: 142588
loss: 1.0076967477798462,grad_norm: 0.9999997507649622, iteration: 142589
loss: 1.0460259914398193,grad_norm: 0.9569501922044499, iteration: 142590
loss: 1.0743334293365479,grad_norm: 0.9999998136291437, iteration: 142591
loss: 0.9945006966590881,grad_norm: 0.8456009059783397, iteration: 142592
loss: 0.9666933417320251,grad_norm: 0.8054446867675258, iteration: 142593
loss: 1.0095787048339844,grad_norm: 0.8480643161813662, iteration: 142594
loss: 1.0695277452468872,grad_norm: 0.9999997781728279, iteration: 142595
loss: 0.9855890274047852,grad_norm: 0.9999992406367648, iteration: 142596
loss: 1.0212663412094116,grad_norm: 0.9999999793894222, iteration: 142597
loss: 1.0373276472091675,grad_norm: 0.9999992472200574, iteration: 142598
loss: 1.029894232749939,grad_norm: 0.9999998887631109, iteration: 142599
loss: 0.9950530529022217,grad_norm: 0.9003256540541223, iteration: 142600
loss: 1.1615442037582397,grad_norm: 0.9999994885834662, iteration: 142601
loss: 1.0697687864303589,grad_norm: 0.9999998957727699, iteration: 142602
loss: 1.0388340950012207,grad_norm: 0.9156741680497069, iteration: 142603
loss: 1.0081921815872192,grad_norm: 0.8474548564323074, iteration: 142604
loss: 1.0164769887924194,grad_norm: 0.9999994575125691, iteration: 142605
loss: 1.015691876411438,grad_norm: 0.7934005318139691, iteration: 142606
loss: 0.9907747507095337,grad_norm: 0.8456946556044231, iteration: 142607
loss: 0.9942386746406555,grad_norm: 0.9160920370160122, iteration: 142608
loss: 1.1224621534347534,grad_norm: 0.9999993759933871, iteration: 142609
loss: 1.0347501039505005,grad_norm: 0.8765329985270842, iteration: 142610
loss: 1.0146840810775757,grad_norm: 0.9999990750519461, iteration: 142611
loss: 1.1091018915176392,grad_norm: 0.9999993580062702, iteration: 142612
loss: 0.970206081867218,grad_norm: 0.9349265264570699, iteration: 142613
loss: 1.0156594514846802,grad_norm: 0.9999991140653478, iteration: 142614
loss: 0.9998712539672852,grad_norm: 0.8503725659868958, iteration: 142615
loss: 0.9932922720909119,grad_norm: 0.9637390684444355, iteration: 142616
loss: 1.0550159215927124,grad_norm: 0.9999998675649584, iteration: 142617
loss: 1.0249110460281372,grad_norm: 0.9999990722264188, iteration: 142618
loss: 1.0818918943405151,grad_norm: 0.9999993700886902, iteration: 142619
loss: 1.1081866025924683,grad_norm: 0.9999994314095624, iteration: 142620
loss: 1.0403650999069214,grad_norm: 0.9999995377671214, iteration: 142621
loss: 1.004828929901123,grad_norm: 0.9999994375503082, iteration: 142622
loss: 1.0837830305099487,grad_norm: 0.9999998707931661, iteration: 142623
loss: 0.9970792531967163,grad_norm: 0.8237383202438421, iteration: 142624
loss: 1.1417053937911987,grad_norm: 1.000000037914209, iteration: 142625
loss: 1.0264034271240234,grad_norm: 0.999999134906025, iteration: 142626
loss: 0.9502729177474976,grad_norm: 0.9999990103015683, iteration: 142627
loss: 1.1090317964553833,grad_norm: 0.999999588296232, iteration: 142628
loss: 1.159744143486023,grad_norm: 1.0000000511483842, iteration: 142629
loss: 1.084999680519104,grad_norm: 0.9999993022133997, iteration: 142630
loss: 1.0673595666885376,grad_norm: 0.9999996661058983, iteration: 142631
loss: 1.1629387140274048,grad_norm: 0.9999999082672325, iteration: 142632
loss: 0.9959285855293274,grad_norm: 0.9943123903466368, iteration: 142633
loss: 1.1805225610733032,grad_norm: 0.9999999187302003, iteration: 142634
loss: 1.0244981050491333,grad_norm: 0.9999991910371832, iteration: 142635
loss: 1.0390129089355469,grad_norm: 0.8716990920979006, iteration: 142636
loss: 1.1286486387252808,grad_norm: 0.9999999256039669, iteration: 142637
loss: 1.058846354484558,grad_norm: 0.8426280238609852, iteration: 142638
loss: 1.0600941181182861,grad_norm: 0.9999997352631188, iteration: 142639
loss: 1.0063488483428955,grad_norm: 0.9209924009257638, iteration: 142640
loss: 1.0515462160110474,grad_norm: 0.9999992313915579, iteration: 142641
loss: 1.0383743047714233,grad_norm: 0.9150289102068672, iteration: 142642
loss: 1.0550140142440796,grad_norm: 0.9734657639992622, iteration: 142643
loss: 1.1019845008850098,grad_norm: 0.9999990510817572, iteration: 142644
loss: 1.080808401107788,grad_norm: 0.999999204775169, iteration: 142645
loss: 1.0956547260284424,grad_norm: 0.9999992417138484, iteration: 142646
loss: 1.0682978630065918,grad_norm: 0.9999996106663375, iteration: 142647
loss: 1.0043190717697144,grad_norm: 0.9999991726233388, iteration: 142648
loss: 0.9648243188858032,grad_norm: 0.8497394186258934, iteration: 142649
loss: 1.1240837574005127,grad_norm: 0.9999996811645929, iteration: 142650
loss: 1.0534025430679321,grad_norm: 0.9999992226036272, iteration: 142651
loss: 1.0564734935760498,grad_norm: 0.999999278730743, iteration: 142652
loss: 1.0319925546646118,grad_norm: 0.835872524366048, iteration: 142653
loss: 0.9931972026824951,grad_norm: 0.9694003526330148, iteration: 142654
loss: 1.0782711505889893,grad_norm: 0.9999990271029288, iteration: 142655
loss: 1.0273187160491943,grad_norm: 0.9839259324814902, iteration: 142656
loss: 1.087217092514038,grad_norm: 0.9999991785596839, iteration: 142657
loss: 1.0158922672271729,grad_norm: 0.9999998841827659, iteration: 142658
loss: 1.0615562200546265,grad_norm: 0.9999993714006262, iteration: 142659
loss: 1.021498203277588,grad_norm: 0.9999998327672452, iteration: 142660
loss: 1.1003774404525757,grad_norm: 0.9999996366839613, iteration: 142661
loss: 1.0404369831085205,grad_norm: 0.9222930488373149, iteration: 142662
loss: 1.0368973016738892,grad_norm: 0.9999997710512739, iteration: 142663
loss: 1.0673800706863403,grad_norm: 0.9999991477911189, iteration: 142664
loss: 1.2163825035095215,grad_norm: 0.9999995245773567, iteration: 142665
loss: 1.051831603050232,grad_norm: 0.999999490853333, iteration: 142666
loss: 0.9857274293899536,grad_norm: 0.9296246841888972, iteration: 142667
loss: 1.007820963859558,grad_norm: 0.9999989924833658, iteration: 142668
loss: 1.0899274349212646,grad_norm: 0.9380019171353388, iteration: 142669
loss: 1.2685935497283936,grad_norm: 0.9999996558834906, iteration: 142670
loss: 1.0145522356033325,grad_norm: 0.999999703383666, iteration: 142671
loss: 1.0303549766540527,grad_norm: 0.9613737414170823, iteration: 142672
loss: 1.1586450338363647,grad_norm: 0.9999995985670469, iteration: 142673
loss: 1.0073323249816895,grad_norm: 0.9999991258631739, iteration: 142674
loss: 1.139148235321045,grad_norm: 0.999999109975604, iteration: 142675
loss: 1.021019458770752,grad_norm: 0.9999996646177884, iteration: 142676
loss: 1.008843183517456,grad_norm: 0.8646426694223852, iteration: 142677
loss: 1.058639645576477,grad_norm: 0.9999996653638973, iteration: 142678
loss: 1.0435407161712646,grad_norm: 0.9999990887022334, iteration: 142679
loss: 1.0567924976348877,grad_norm: 0.9166713002111578, iteration: 142680
loss: 1.000098705291748,grad_norm: 0.9443724073178571, iteration: 142681
loss: 1.1279394626617432,grad_norm: 1.0000000197474983, iteration: 142682
loss: 1.0234792232513428,grad_norm: 0.9999993243308106, iteration: 142683
loss: 1.080530047416687,grad_norm: 0.9999998075182771, iteration: 142684
loss: 1.042341947555542,grad_norm: 0.8418342362069104, iteration: 142685
loss: 0.973502516746521,grad_norm: 0.8025326391357672, iteration: 142686
loss: 1.01786208152771,grad_norm: 0.9251727325273233, iteration: 142687
loss: 0.9837196469306946,grad_norm: 0.8580898778838385, iteration: 142688
loss: 1.1615766286849976,grad_norm: 0.9999995706989347, iteration: 142689
loss: 1.0920641422271729,grad_norm: 0.9999999128955406, iteration: 142690
loss: 1.1684749126434326,grad_norm: 0.9999994682200475, iteration: 142691
loss: 0.9669455885887146,grad_norm: 0.9999991598053346, iteration: 142692
loss: 1.008718729019165,grad_norm: 0.9746265540233485, iteration: 142693
loss: 1.0827081203460693,grad_norm: 0.9999994399803886, iteration: 142694
loss: 0.992634117603302,grad_norm: 0.7377376961982421, iteration: 142695
loss: 1.0172663927078247,grad_norm: 0.8534145185411554, iteration: 142696
loss: 1.0739412307739258,grad_norm: 0.9999993788733771, iteration: 142697
loss: 1.0019127130508423,grad_norm: 0.8351471285661322, iteration: 142698
loss: 1.0674961805343628,grad_norm: 0.9999993068994895, iteration: 142699
loss: 1.0729022026062012,grad_norm: 0.9999998246586584, iteration: 142700
loss: 0.927556037902832,grad_norm: 0.920413605493265, iteration: 142701
loss: 1.0637714862823486,grad_norm: 0.8702521854583599, iteration: 142702
loss: 1.0213046073913574,grad_norm: 0.9999992466983352, iteration: 142703
loss: 0.9891631007194519,grad_norm: 0.9999990899103318, iteration: 142704
loss: 1.0271813869476318,grad_norm: 0.9999992873372938, iteration: 142705
loss: 1.1007190942764282,grad_norm: 0.9999990311010921, iteration: 142706
loss: 1.0685452222824097,grad_norm: 0.9999991743483801, iteration: 142707
loss: 1.014991044998169,grad_norm: 0.9999992543535852, iteration: 142708
loss: 1.0041472911834717,grad_norm: 0.8919044349345825, iteration: 142709
loss: 1.0927469730377197,grad_norm: 0.9999993269442411, iteration: 142710
loss: 1.0005182027816772,grad_norm: 0.9999989997456915, iteration: 142711
loss: 1.0406609773635864,grad_norm: 0.9709941216794512, iteration: 142712
loss: 0.979990541934967,grad_norm: 0.9999994260706336, iteration: 142713
loss: 1.0455830097198486,grad_norm: 0.9999993004453016, iteration: 142714
loss: 1.0078246593475342,grad_norm: 0.8626509851211439, iteration: 142715
loss: 1.0439443588256836,grad_norm: 0.9999997536543657, iteration: 142716
loss: 0.975199818611145,grad_norm: 0.8249149650520524, iteration: 142717
loss: 1.0582549571990967,grad_norm: 0.9999992552413529, iteration: 142718
loss: 1.091508150100708,grad_norm: 0.9999999178995727, iteration: 142719
loss: 1.1591050624847412,grad_norm: 0.9999994938043794, iteration: 142720
loss: 1.0433670282363892,grad_norm: 0.9999998605186374, iteration: 142721
loss: 0.987324059009552,grad_norm: 0.7823359426452083, iteration: 142722
loss: 1.0506172180175781,grad_norm: 0.9999998854423009, iteration: 142723
loss: 0.9906319379806519,grad_norm: 0.9999991676256564, iteration: 142724
loss: 0.9853700399398804,grad_norm: 1.000000001029688, iteration: 142725
loss: 1.0634490251541138,grad_norm: 0.885219777685774, iteration: 142726
loss: 0.9895610809326172,grad_norm: 0.8725441072107439, iteration: 142727
loss: 1.1172734498977661,grad_norm: 0.9999994554073199, iteration: 142728
loss: 1.0627914667129517,grad_norm: 0.999999514579774, iteration: 142729
loss: 1.0990804433822632,grad_norm: 0.9999992511924563, iteration: 142730
loss: 1.0110299587249756,grad_norm: 0.9999995871990522, iteration: 142731
loss: 0.9943302273750305,grad_norm: 0.8782375148169976, iteration: 142732
loss: 1.0088181495666504,grad_norm: 0.9527585459876269, iteration: 142733
loss: 1.135803461074829,grad_norm: 0.9999992058905818, iteration: 142734
loss: 1.0236393213272095,grad_norm: 0.9999994941920477, iteration: 142735
loss: 1.0337601900100708,grad_norm: 0.9999996976163853, iteration: 142736
loss: 1.1310206651687622,grad_norm: 0.999999544761652, iteration: 142737
loss: 0.9934885501861572,grad_norm: 0.9999990034500351, iteration: 142738
loss: 1.0337028503417969,grad_norm: 0.999999861634054, iteration: 142739
loss: 1.0859241485595703,grad_norm: 0.9999994330517474, iteration: 142740
loss: 1.0070157051086426,grad_norm: 0.9211234351130827, iteration: 142741
loss: 1.178756594657898,grad_norm: 0.9999995692981286, iteration: 142742
loss: 1.003976583480835,grad_norm: 0.9502851549139699, iteration: 142743
loss: 1.0064146518707275,grad_norm: 0.9999996367709546, iteration: 142744
loss: 1.1743491888046265,grad_norm: 0.9999999980649039, iteration: 142745
loss: 1.018485188484192,grad_norm: 0.9999998543924175, iteration: 142746
loss: 1.1063063144683838,grad_norm: 1.000000039739566, iteration: 142747
loss: 1.0839399099349976,grad_norm: 0.9999993882996886, iteration: 142748
loss: 1.0385278463363647,grad_norm: 0.9999991568680762, iteration: 142749
loss: 1.054539442062378,grad_norm: 0.9837948954041318, iteration: 142750
loss: 1.1069856882095337,grad_norm: 0.9609232106413239, iteration: 142751
loss: 1.1245474815368652,grad_norm: 0.9999997266065459, iteration: 142752
loss: 1.163277506828308,grad_norm: 0.999999738976561, iteration: 142753
loss: 1.034819483757019,grad_norm: 0.9999990406810405, iteration: 142754
loss: 1.2288646697998047,grad_norm: 0.9999996224002893, iteration: 142755
loss: 0.9591065049171448,grad_norm: 0.8587545883343807, iteration: 142756
loss: 0.9925330877304077,grad_norm: 0.9999998581772318, iteration: 142757
loss: 0.9619253873825073,grad_norm: 0.9999991701736389, iteration: 142758
loss: 1.1357121467590332,grad_norm: 0.9999992430017367, iteration: 142759
loss: 1.0539931058883667,grad_norm: 0.9999993088283445, iteration: 142760
loss: 1.0444731712341309,grad_norm: 0.8205981621870094, iteration: 142761
loss: 1.1605552434921265,grad_norm: 0.9999997360650411, iteration: 142762
loss: 1.1448110342025757,grad_norm: 0.9999993209360462, iteration: 142763
loss: 0.9946504235267639,grad_norm: 0.9428598412200588, iteration: 142764
loss: 1.1542304754257202,grad_norm: 0.9999998201761303, iteration: 142765
loss: 1.008551001548767,grad_norm: 0.9999991332718915, iteration: 142766
loss: 1.126219391822815,grad_norm: 0.999999654328785, iteration: 142767
loss: 1.1115697622299194,grad_norm: 0.9813322626425872, iteration: 142768
loss: 1.138732671737671,grad_norm: 0.9999996104830949, iteration: 142769
loss: 1.071272373199463,grad_norm: 0.9999998576904434, iteration: 142770
loss: 1.046093463897705,grad_norm: 0.9999998335419933, iteration: 142771
loss: 1.0013175010681152,grad_norm: 0.9999996677396054, iteration: 142772
loss: 1.0637054443359375,grad_norm: 0.9999990021067733, iteration: 142773
loss: 1.0885207653045654,grad_norm: 0.9999993771598864, iteration: 142774
loss: 1.2639702558517456,grad_norm: 0.9999994945549889, iteration: 142775
loss: 1.0196037292480469,grad_norm: 0.7462108027655521, iteration: 142776
loss: 1.0231459140777588,grad_norm: 0.914431436088007, iteration: 142777
loss: 1.0047413110733032,grad_norm: 0.8047574602559022, iteration: 142778
loss: 1.1613593101501465,grad_norm: 0.9999997375131844, iteration: 142779
loss: 1.0748012065887451,grad_norm: 0.9999998034989837, iteration: 142780
loss: 1.0481723546981812,grad_norm: 0.9999995236302477, iteration: 142781
loss: 1.0142979621887207,grad_norm: 0.9999992139114763, iteration: 142782
loss: 1.0589476823806763,grad_norm: 0.9977453252284723, iteration: 142783
loss: 1.1643651723861694,grad_norm: 0.9999993572166576, iteration: 142784
loss: 1.0437376499176025,grad_norm: 0.9999993833983407, iteration: 142785
loss: 1.0543469190597534,grad_norm: 0.9999996548731662, iteration: 142786
loss: 1.076567530632019,grad_norm: 0.999999669995525, iteration: 142787
loss: 1.1219191551208496,grad_norm: 0.999999937073696, iteration: 142788
loss: 1.0498305559158325,grad_norm: 0.9999994124534833, iteration: 142789
loss: 1.0664056539535522,grad_norm: 0.9999999353384343, iteration: 142790
loss: 0.9883235096931458,grad_norm: 0.9999990685145684, iteration: 142791
loss: 1.0374178886413574,grad_norm: 0.9999993401181787, iteration: 142792
loss: 1.0147011280059814,grad_norm: 0.9999990569250052, iteration: 142793
loss: 1.0020581483840942,grad_norm: 0.9203937149716679, iteration: 142794
loss: 1.0339549779891968,grad_norm: 0.9999997709589156, iteration: 142795
loss: 1.118849515914917,grad_norm: 0.9999996312992806, iteration: 142796
loss: 1.0513793230056763,grad_norm: 0.9999998519651796, iteration: 142797
loss: 1.060723900794983,grad_norm: 0.9999996718082355, iteration: 142798
loss: 1.0212095975875854,grad_norm: 0.9999996072451769, iteration: 142799
loss: 1.144399642944336,grad_norm: 0.9999995527379462, iteration: 142800
loss: 1.1729613542556763,grad_norm: 0.9257978200037575, iteration: 142801
loss: 1.164650321006775,grad_norm: 0.9999991225971177, iteration: 142802
loss: 1.0306428670883179,grad_norm: 0.9999996537754259, iteration: 142803
loss: 1.0133174657821655,grad_norm: 0.9510541429136706, iteration: 142804
loss: 1.0766369104385376,grad_norm: 0.9999996949469604, iteration: 142805
loss: 1.254654049873352,grad_norm: 0.9999997901814979, iteration: 142806
loss: 1.0875121355056763,grad_norm: 0.9999997281852587, iteration: 142807
loss: 1.0575777292251587,grad_norm: 0.937618743260512, iteration: 142808
loss: 1.0097450017929077,grad_norm: 0.7813141107445946, iteration: 142809
loss: 0.9516845345497131,grad_norm: 0.9793273990778149, iteration: 142810
loss: 1.055945873260498,grad_norm: 0.9999992913720005, iteration: 142811
loss: 1.111301302909851,grad_norm: 0.9999994736223689, iteration: 142812
loss: 1.0302032232284546,grad_norm: 0.98447809663186, iteration: 142813
loss: 0.9974334239959717,grad_norm: 0.9999995776579635, iteration: 142814
loss: 1.0744130611419678,grad_norm: 0.9999992132906218, iteration: 142815
loss: 1.0632328987121582,grad_norm: 0.9999997268746353, iteration: 142816
loss: 1.1952091455459595,grad_norm: 0.9999999702110097, iteration: 142817
loss: 0.9824387431144714,grad_norm: 0.7790740664712915, iteration: 142818
loss: 1.0046374797821045,grad_norm: 0.9999992438495482, iteration: 142819
loss: 1.058605670928955,grad_norm: 1.000000023294701, iteration: 142820
loss: 1.0907931327819824,grad_norm: 0.9999992437202734, iteration: 142821
loss: 0.9822672605514526,grad_norm: 0.9999995139436882, iteration: 142822
loss: 1.0526043176651,grad_norm: 0.9999999529073086, iteration: 142823
loss: 1.0324488878250122,grad_norm: 0.9999994692808806, iteration: 142824
loss: 0.9914867877960205,grad_norm: 0.7323190951990015, iteration: 142825
loss: 1.00521719455719,grad_norm: 0.8674841439063458, iteration: 142826
loss: 1.014098048210144,grad_norm: 0.9999992292704857, iteration: 142827
loss: 1.0725274085998535,grad_norm: 0.9999995786268892, iteration: 142828
loss: 1.0838894844055176,grad_norm: 0.9999995953215429, iteration: 142829
loss: 1.0290688276290894,grad_norm: 0.9849494692096735, iteration: 142830
loss: 1.0264618396759033,grad_norm: 0.8467562453453658, iteration: 142831
loss: 0.9827050566673279,grad_norm: 0.8597623568638476, iteration: 142832
loss: 0.9725977182388306,grad_norm: 0.9415110463557654, iteration: 142833
loss: 1.0366805791854858,grad_norm: 0.999999810747899, iteration: 142834
loss: 1.085038423538208,grad_norm: 0.9999993004230412, iteration: 142835
loss: 1.161379098892212,grad_norm: 0.9999999009841524, iteration: 142836
loss: 1.0108518600463867,grad_norm: 0.9697444301654679, iteration: 142837
loss: 1.094834327697754,grad_norm: 0.9999994932598787, iteration: 142838
loss: 1.0866813659667969,grad_norm: 0.9999999308646518, iteration: 142839
loss: 1.0863856077194214,grad_norm: 0.9899743037456947, iteration: 142840
loss: 0.950581431388855,grad_norm: 0.8878189150384602, iteration: 142841
loss: 0.995281994342804,grad_norm: 0.7942268666432055, iteration: 142842
loss: 1.0658775568008423,grad_norm: 0.999999848407668, iteration: 142843
loss: 1.094208836555481,grad_norm: 0.9136179473742773, iteration: 142844
loss: 1.0169020891189575,grad_norm: 0.9999991562263338, iteration: 142845
loss: 1.0089031457901,grad_norm: 0.8749675421196961, iteration: 142846
loss: 1.0302742719650269,grad_norm: 0.7911263952457761, iteration: 142847
loss: 1.1201860904693604,grad_norm: 0.9999991721215339, iteration: 142848
loss: 1.0522979497909546,grad_norm: 0.999999722520793, iteration: 142849
loss: 1.0214548110961914,grad_norm: 0.9999991095781524, iteration: 142850
loss: 0.9926785230636597,grad_norm: 0.9999990344784147, iteration: 142851
loss: 1.118101716041565,grad_norm: 0.9999994650484474, iteration: 142852
loss: 0.9979772567749023,grad_norm: 1.0000000361212027, iteration: 142853
loss: 1.0229252576828003,grad_norm: 0.9999997632753599, iteration: 142854
loss: 1.1773591041564941,grad_norm: 0.9999999651976459, iteration: 142855
loss: 0.993803083896637,grad_norm: 0.9867278608630083, iteration: 142856
loss: 0.9682903289794922,grad_norm: 0.8530117661064884, iteration: 142857
loss: 1.0015064477920532,grad_norm: 0.9999998785247194, iteration: 142858
loss: 0.9927117824554443,grad_norm: 0.9108308003977053, iteration: 142859
loss: 1.0635344982147217,grad_norm: 0.8593832935495707, iteration: 142860
loss: 0.9910019636154175,grad_norm: 0.9217435302059176, iteration: 142861
loss: 1.0561127662658691,grad_norm: 0.9999992705683498, iteration: 142862
loss: 1.0018095970153809,grad_norm: 0.8211445393238779, iteration: 142863
loss: 1.0411776304244995,grad_norm: 0.999999347951193, iteration: 142864
loss: 1.0295298099517822,grad_norm: 0.896301156054385, iteration: 142865
loss: 1.0699998140335083,grad_norm: 0.8774020867276746, iteration: 142866
loss: 1.0713716745376587,grad_norm: 0.9400346510242596, iteration: 142867
loss: 1.0929311513900757,grad_norm: 0.8510590184534079, iteration: 142868
loss: 1.023076057434082,grad_norm: 0.8015252567775865, iteration: 142869
loss: 0.9804652333259583,grad_norm: 0.841061048648352, iteration: 142870
loss: 1.0989998579025269,grad_norm: 0.9999995660158458, iteration: 142871
loss: 1.084645390510559,grad_norm: 0.9999996387625691, iteration: 142872
loss: 0.9986568093299866,grad_norm: 0.9571996145452442, iteration: 142873
loss: 1.022414207458496,grad_norm: 0.99999981979648, iteration: 142874
loss: 1.0706446170806885,grad_norm: 0.9999994918712388, iteration: 142875
loss: 1.1150972843170166,grad_norm: 0.9999999648004785, iteration: 142876
loss: 1.1061339378356934,grad_norm: 0.9999997690074411, iteration: 142877
loss: 1.0519987344741821,grad_norm: 0.9999991946391346, iteration: 142878
loss: 1.032108187675476,grad_norm: 0.9125301708180427, iteration: 142879
loss: 0.9921028017997742,grad_norm: 0.9999997758235349, iteration: 142880
loss: 1.0038442611694336,grad_norm: 0.9999990938842857, iteration: 142881
loss: 0.9813033938407898,grad_norm: 0.999999609409263, iteration: 142882
loss: 1.0440208911895752,grad_norm: 0.9999998044362313, iteration: 142883
loss: 1.0284956693649292,grad_norm: 0.7904340403322826, iteration: 142884
loss: 0.9412031769752502,grad_norm: 0.9042834801653767, iteration: 142885
loss: 1.0418620109558105,grad_norm: 0.9999991498011727, iteration: 142886
loss: 1.0150820016860962,grad_norm: 0.7128328407741474, iteration: 142887
loss: 1.0494569540023804,grad_norm: 0.9999999106450972, iteration: 142888
loss: 0.9834192395210266,grad_norm: 0.8684944330254117, iteration: 142889
loss: 0.9968234896659851,grad_norm: 0.8614227395264958, iteration: 142890
loss: 1.0747575759887695,grad_norm: 0.9999990147292407, iteration: 142891
loss: 0.9686229228973389,grad_norm: 0.8943915628869733, iteration: 142892
loss: 1.0501761436462402,grad_norm: 0.9999994349336724, iteration: 142893
loss: 1.0977600812911987,grad_norm: 0.9999996734235156, iteration: 142894
loss: 1.0461187362670898,grad_norm: 0.9999997890503255, iteration: 142895
loss: 1.0246714353561401,grad_norm: 0.999999557694247, iteration: 142896
loss: 0.9770111441612244,grad_norm: 0.995210835413451, iteration: 142897
loss: 1.0434813499450684,grad_norm: 0.9257730076623274, iteration: 142898
loss: 0.9892020225524902,grad_norm: 0.9032741155093268, iteration: 142899
loss: 1.1363650560379028,grad_norm: 0.9999998936893265, iteration: 142900
loss: 1.0027782917022705,grad_norm: 0.999998900238869, iteration: 142901
loss: 1.0118036270141602,grad_norm: 0.9999993592498899, iteration: 142902
loss: 1.2935144901275635,grad_norm: 0.9999998604679728, iteration: 142903
loss: 0.9751465320587158,grad_norm: 0.8082407260347748, iteration: 142904
loss: 1.0154461860656738,grad_norm: 0.9301406146656973, iteration: 142905
loss: 0.9986752271652222,grad_norm: 0.9999996004218779, iteration: 142906
loss: 1.1212877035140991,grad_norm: 0.9999990466976522, iteration: 142907
loss: 1.103606104850769,grad_norm: 0.9999994753677359, iteration: 142908
loss: 1.2444658279418945,grad_norm: 0.9999997826453512, iteration: 142909
loss: 1.013467788696289,grad_norm: 0.9999994373001725, iteration: 142910
loss: 0.9712201356887817,grad_norm: 0.9999989367390951, iteration: 142911
loss: 1.0844347476959229,grad_norm: 0.9999998734231491, iteration: 142912
loss: 1.0199298858642578,grad_norm: 0.9999994377373299, iteration: 142913
loss: 1.0128496885299683,grad_norm: 0.9999992388643366, iteration: 142914
loss: 0.9969576597213745,grad_norm: 0.9999996407610073, iteration: 142915
loss: 1.0188050270080566,grad_norm: 0.9999991946695745, iteration: 142916
loss: 1.117974877357483,grad_norm: 0.9999992350650616, iteration: 142917
loss: 0.9945929050445557,grad_norm: 0.8133426680281403, iteration: 142918
loss: 1.0840736627578735,grad_norm: 0.9999992431327925, iteration: 142919
loss: 1.0962883234024048,grad_norm: 0.999999350785636, iteration: 142920
loss: 1.039515733718872,grad_norm: 0.9999999620070191, iteration: 142921
loss: 0.9897821545600891,grad_norm: 0.9999992043417956, iteration: 142922
loss: 1.0704717636108398,grad_norm: 0.9999998644335133, iteration: 142923
loss: 0.9589159488677979,grad_norm: 0.8465551202217794, iteration: 142924
loss: 0.9723464250564575,grad_norm: 0.9999999754621599, iteration: 142925
loss: 1.0239551067352295,grad_norm: 0.8913762634771545, iteration: 142926
loss: 1.0794193744659424,grad_norm: 0.9171716001138287, iteration: 142927
loss: 1.0423524379730225,grad_norm: 0.9999999073016984, iteration: 142928
loss: 1.0378303527832031,grad_norm: 0.7606601668273585, iteration: 142929
loss: 0.9744353294372559,grad_norm: 0.7467277805532924, iteration: 142930
loss: 1.0528476238250732,grad_norm: 0.9999991759217767, iteration: 142931
loss: 1.046776533126831,grad_norm: 0.9999997007227583, iteration: 142932
loss: 1.2234838008880615,grad_norm: 0.9999991623088695, iteration: 142933
loss: 1.0708414316177368,grad_norm: 0.9999998829716933, iteration: 142934
loss: 1.0329434871673584,grad_norm: 0.9999991868300273, iteration: 142935
loss: 1.1087348461151123,grad_norm: 0.9999996256374152, iteration: 142936
loss: 1.057371973991394,grad_norm: 0.9999993342257972, iteration: 142937
loss: 1.0798100233078003,grad_norm: 0.9999996588477764, iteration: 142938
loss: 1.0614241361618042,grad_norm: 0.999999694687863, iteration: 142939
loss: 1.004135012626648,grad_norm: 0.9999993498130656, iteration: 142940
loss: 0.9531780481338501,grad_norm: 0.9307912261381966, iteration: 142941
loss: 1.0493922233581543,grad_norm: 0.9999994485664118, iteration: 142942
loss: 1.0576878786087036,grad_norm: 0.9894900725541632, iteration: 142943
loss: 0.9962684512138367,grad_norm: 0.7306662702859363, iteration: 142944
loss: 0.9979491829872131,grad_norm: 0.794515993114993, iteration: 142945
loss: 1.0171984434127808,grad_norm: 0.9999991153868721, iteration: 142946
loss: 1.0840272903442383,grad_norm: 0.9241280194307813, iteration: 142947
loss: 1.2323962450027466,grad_norm: 0.9999998494433715, iteration: 142948
loss: 1.0021309852600098,grad_norm: 0.9999997209499909, iteration: 142949
loss: 1.0863667726516724,grad_norm: 0.9999990320716446, iteration: 142950
loss: 1.0125939846038818,grad_norm: 0.9999998650429316, iteration: 142951
loss: 1.0179009437561035,grad_norm: 0.9999992683695367, iteration: 142952
loss: 1.0697880983352661,grad_norm: 0.9999998548183315, iteration: 142953
loss: 1.020442247390747,grad_norm: 0.9999992907190222, iteration: 142954
loss: 1.037492036819458,grad_norm: 0.9999994275684861, iteration: 142955
loss: 1.0903838872909546,grad_norm: 0.9999999434090042, iteration: 142956
loss: 0.9374409914016724,grad_norm: 0.9556205020989925, iteration: 142957
loss: 0.9334099292755127,grad_norm: 0.8971284762322773, iteration: 142958
loss: 1.0171364545822144,grad_norm: 0.9999994617841225, iteration: 142959
loss: 0.9915415048599243,grad_norm: 0.960518466697461, iteration: 142960
loss: 1.0824086666107178,grad_norm: 0.9999998857149515, iteration: 142961
loss: 0.9807762503623962,grad_norm: 0.9731418775228026, iteration: 142962
loss: 0.9776325225830078,grad_norm: 0.8501607974863801, iteration: 142963
loss: 0.9742068648338318,grad_norm: 0.9999998662465159, iteration: 142964
loss: 1.0308462381362915,grad_norm: 0.8579197887321667, iteration: 142965
loss: 1.0793323516845703,grad_norm: 0.999999177700333, iteration: 142966
loss: 1.0569629669189453,grad_norm: 0.9999997057070802, iteration: 142967
loss: 1.0129040479660034,grad_norm: 1.000000022636278, iteration: 142968
loss: 1.0293405055999756,grad_norm: 0.9999993704195054, iteration: 142969
loss: 0.9802699089050293,grad_norm: 0.9999994569571812, iteration: 142970
loss: 0.9838675856590271,grad_norm: 0.8384400628488141, iteration: 142971
loss: 0.9992040395736694,grad_norm: 0.919905714798863, iteration: 142972
loss: 1.0219435691833496,grad_norm: 0.9999990199482736, iteration: 142973
loss: 1.0821627378463745,grad_norm: 0.9999996762742441, iteration: 142974
loss: 1.092122197151184,grad_norm: 0.9999997353566513, iteration: 142975
loss: 1.2435634136199951,grad_norm: 0.9999997966884141, iteration: 142976
loss: 1.0363086462020874,grad_norm: 0.9999991472065536, iteration: 142977
loss: 1.0337038040161133,grad_norm: 0.9999992496066449, iteration: 142978
loss: 1.0235551595687866,grad_norm: 0.8671646896047146, iteration: 142979
loss: 1.0467290878295898,grad_norm: 0.9999994741036006, iteration: 142980
loss: 1.0116255283355713,grad_norm: 0.8080540303530892, iteration: 142981
loss: 1.0035972595214844,grad_norm: 0.9999991729518999, iteration: 142982
loss: 1.0027329921722412,grad_norm: 0.9999992288523104, iteration: 142983
loss: 1.1648920774459839,grad_norm: 0.9999997449577007, iteration: 142984
loss: 1.014512300491333,grad_norm: 0.9367692274387974, iteration: 142985
loss: 1.0179662704467773,grad_norm: 0.9999991549700793, iteration: 142986
loss: 0.9913684725761414,grad_norm: 0.9012626353299534, iteration: 142987
loss: 1.0395383834838867,grad_norm: 0.9999999065531064, iteration: 142988
loss: 0.9884552955627441,grad_norm: 0.9999991631446495, iteration: 142989
loss: 1.0453184843063354,grad_norm: 0.9999991093857475, iteration: 142990
loss: 0.9946507811546326,grad_norm: 0.7202803300689253, iteration: 142991
loss: 1.0225874185562134,grad_norm: 0.9999992218511585, iteration: 142992
loss: 1.0490344762802124,grad_norm: 0.9999993540840526, iteration: 142993
loss: 1.069832444190979,grad_norm: 0.9999997295364386, iteration: 142994
loss: 0.9833205342292786,grad_norm: 0.9939240984582511, iteration: 142995
loss: 1.0346180200576782,grad_norm: 0.9999991377476132, iteration: 142996
loss: 1.268304705619812,grad_norm: 0.9999999094222136, iteration: 142997
loss: 1.162588357925415,grad_norm: 0.9999996366883037, iteration: 142998
loss: 1.0226629972457886,grad_norm: 0.8070060820443784, iteration: 142999
loss: 1.031374216079712,grad_norm: 0.9999990980120024, iteration: 143000
loss: 1.0227174758911133,grad_norm: 0.9999994034394657, iteration: 143001
loss: 1.0067676305770874,grad_norm: 0.7605672538867715, iteration: 143002
loss: 0.9819207787513733,grad_norm: 0.9999992905202922, iteration: 143003
loss: 1.0326699018478394,grad_norm: 0.9536856270168065, iteration: 143004
loss: 0.9937843680381775,grad_norm: 0.6492098066337467, iteration: 143005
loss: 1.0793875455856323,grad_norm: 0.9999994684709892, iteration: 143006
loss: 1.0049504041671753,grad_norm: 0.82118212756828, iteration: 143007
loss: 1.0849039554595947,grad_norm: 0.9203341675655675, iteration: 143008
loss: 1.0734508037567139,grad_norm: 0.9999998417329393, iteration: 143009
loss: 1.0111700296401978,grad_norm: 0.9999992538957199, iteration: 143010
loss: 0.993537962436676,grad_norm: 0.9999993423699769, iteration: 143011
loss: 1.0787534713745117,grad_norm: 0.9999997171927131, iteration: 143012
loss: 1.0330772399902344,grad_norm: 0.9999991795429569, iteration: 143013
loss: 1.0472725629806519,grad_norm: 0.9999997866789192, iteration: 143014
loss: 1.0211926698684692,grad_norm: 0.9074088522235205, iteration: 143015
loss: 0.9908198714256287,grad_norm: 0.850463207642753, iteration: 143016
loss: 0.965981662273407,grad_norm: 0.9999990269245754, iteration: 143017
loss: 1.0377126932144165,grad_norm: 0.9999998876248327, iteration: 143018
loss: 1.0146864652633667,grad_norm: 0.9999995922132625, iteration: 143019
loss: 1.0037466287612915,grad_norm: 0.7945640122651454, iteration: 143020
loss: 0.9699665904045105,grad_norm: 0.9878536636320646, iteration: 143021
loss: 1.0454226732254028,grad_norm: 0.9999992015176716, iteration: 143022
loss: 1.1104415655136108,grad_norm: 0.9999996868183066, iteration: 143023
loss: 0.9976204633712769,grad_norm: 0.9999998359755161, iteration: 143024
loss: 1.0791441202163696,grad_norm: 0.9164996409334935, iteration: 143025
loss: 1.104515790939331,grad_norm: 0.9999994059770828, iteration: 143026
loss: 1.0407145023345947,grad_norm: 0.9999995998344337, iteration: 143027
loss: 1.0604761838912964,grad_norm: 0.9999990408841074, iteration: 143028
loss: 0.9985843896865845,grad_norm: 0.8703555656546064, iteration: 143029
loss: 1.0067652463912964,grad_norm: 0.9075883479227513, iteration: 143030
loss: 1.0200711488723755,grad_norm: 0.9999989952230695, iteration: 143031
loss: 1.0578266382217407,grad_norm: 0.9999991804775887, iteration: 143032
loss: 0.9628872275352478,grad_norm: 0.8491252036803127, iteration: 143033
loss: 1.0138750076293945,grad_norm: 0.9214682353588782, iteration: 143034
loss: 1.0270971059799194,grad_norm: 0.8352646089712222, iteration: 143035
loss: 1.1270509958267212,grad_norm: 0.9999993456388142, iteration: 143036
loss: 1.0233752727508545,grad_norm: 0.9999992420891015, iteration: 143037
loss: 1.011010766029358,grad_norm: 0.8904512810468262, iteration: 143038
loss: 0.9937205910682678,grad_norm: 0.7575901431283942, iteration: 143039
loss: 0.9922432899475098,grad_norm: 0.9087604866850082, iteration: 143040
loss: 0.973375141620636,grad_norm: 0.8481621268612993, iteration: 143041
loss: 1.0256962776184082,grad_norm: 0.9999994019006868, iteration: 143042
loss: 1.013643741607666,grad_norm: 0.8619189599056338, iteration: 143043
loss: 1.0452250242233276,grad_norm: 0.999998978907278, iteration: 143044
loss: 0.9567745923995972,grad_norm: 0.9337988044648146, iteration: 143045
loss: 1.0396775007247925,grad_norm: 0.9999993836678305, iteration: 143046
loss: 0.9954303503036499,grad_norm: 0.9999993007165637, iteration: 143047
loss: 0.9979574084281921,grad_norm: 0.8044960292193878, iteration: 143048
loss: 1.051829218864441,grad_norm: 0.9999992789626815, iteration: 143049
loss: 0.987705409526825,grad_norm: 0.8305229845924609, iteration: 143050
loss: 0.9849199652671814,grad_norm: 0.9999997292627052, iteration: 143051
loss: 0.9682566523551941,grad_norm: 0.9371889637112913, iteration: 143052
loss: 1.017613172531128,grad_norm: 0.9999990378514032, iteration: 143053
loss: 0.9877800345420837,grad_norm: 0.9999990594513218, iteration: 143054
loss: 1.022963285446167,grad_norm: 0.9999995321453446, iteration: 143055
loss: 0.997864305973053,grad_norm: 0.9087538147752446, iteration: 143056
loss: 1.0043772459030151,grad_norm: 0.8181474327905671, iteration: 143057
loss: 0.9761912226676941,grad_norm: 0.8647967866163582, iteration: 143058
loss: 1.0288152694702148,grad_norm: 0.8633147713573671, iteration: 143059
loss: 0.9751080274581909,grad_norm: 0.7734117721518445, iteration: 143060
loss: 1.0229648351669312,grad_norm: 0.946917946234544, iteration: 143061
loss: 1.0804036855697632,grad_norm: 0.9909508576392365, iteration: 143062
loss: 0.999411940574646,grad_norm: 0.9687816671165612, iteration: 143063
loss: 1.0322253704071045,grad_norm: 0.9999999114514785, iteration: 143064
loss: 1.0159190893173218,grad_norm: 0.9999992886881754, iteration: 143065
loss: 1.050990104675293,grad_norm: 0.9999999357551538, iteration: 143066
loss: 1.0120410919189453,grad_norm: 0.9999995790781686, iteration: 143067
loss: 0.9671875238418579,grad_norm: 0.919986825503492, iteration: 143068
loss: 1.01488196849823,grad_norm: 0.9999994486576513, iteration: 143069
loss: 0.9961819648742676,grad_norm: 0.9999993773384418, iteration: 143070
loss: 1.0744014978408813,grad_norm: 0.9140223059939853, iteration: 143071
loss: 1.0415343046188354,grad_norm: 0.999999282924057, iteration: 143072
loss: 0.9701242446899414,grad_norm: 0.8363031240468685, iteration: 143073
loss: 1.039413332939148,grad_norm: 0.8890447490643573, iteration: 143074
loss: 0.9521588683128357,grad_norm: 0.8606809496866823, iteration: 143075
loss: 1.0392804145812988,grad_norm: 0.9999991534646933, iteration: 143076
loss: 1.0245436429977417,grad_norm: 0.8089717366607633, iteration: 143077
loss: 1.0216865539550781,grad_norm: 0.9511448485914964, iteration: 143078
loss: 1.0027990341186523,grad_norm: 0.7762636252458411, iteration: 143079
loss: 1.0720993280410767,grad_norm: 0.9999993729748929, iteration: 143080
loss: 1.0090794563293457,grad_norm: 0.9972099301554654, iteration: 143081
loss: 1.1566883325576782,grad_norm: 0.9999996302159458, iteration: 143082
loss: 0.987781286239624,grad_norm: 0.9999991992050208, iteration: 143083
loss: 1.027653694152832,grad_norm: 0.9999993975082726, iteration: 143084
loss: 1.0522198677062988,grad_norm: 0.9999992559145471, iteration: 143085
loss: 0.995229184627533,grad_norm: 0.9999991648533106, iteration: 143086
loss: 1.1966578960418701,grad_norm: 0.9999992249969266, iteration: 143087
loss: 1.0125689506530762,grad_norm: 0.866544775895791, iteration: 143088
loss: 1.0240840911865234,grad_norm: 0.860901700011062, iteration: 143089
loss: 1.15823233127594,grad_norm: 0.9999997799529988, iteration: 143090
loss: 1.0445691347122192,grad_norm: 0.9999998173376502, iteration: 143091
loss: 1.0223850011825562,grad_norm: 0.999999664587211, iteration: 143092
loss: 1.0486024618148804,grad_norm: 0.9999991935180884, iteration: 143093
loss: 1.0036096572875977,grad_norm: 0.9999991442733781, iteration: 143094
loss: 0.9991645812988281,grad_norm: 0.999999789090722, iteration: 143095
loss: 1.0318788290023804,grad_norm: 0.9999992194200881, iteration: 143096
loss: 1.1769253015518188,grad_norm: 0.9999995536400782, iteration: 143097
loss: 1.0440291166305542,grad_norm: 0.9999995978606967, iteration: 143098
loss: 1.0450280904769897,grad_norm: 0.9999997193788716, iteration: 143099
loss: 1.0849262475967407,grad_norm: 0.9999990489262098, iteration: 143100
loss: 0.9920560121536255,grad_norm: 0.8393539876846102, iteration: 143101
loss: 1.0838886499404907,grad_norm: 0.9999995263508297, iteration: 143102
loss: 0.9876118302345276,grad_norm: 0.9599973103175409, iteration: 143103
loss: 0.9849106669425964,grad_norm: 0.9553851530679308, iteration: 143104
loss: 1.0549850463867188,grad_norm: 0.9999991159812323, iteration: 143105
loss: 1.0313416719436646,grad_norm: 0.9999992906176483, iteration: 143106
loss: 1.0098586082458496,grad_norm: 0.9999995354723208, iteration: 143107
loss: 1.048105239868164,grad_norm: 0.9999992473673751, iteration: 143108
loss: 0.9587136507034302,grad_norm: 0.8720139057324888, iteration: 143109
loss: 1.0111541748046875,grad_norm: 0.9999993872336542, iteration: 143110
loss: 1.04701828956604,grad_norm: 0.9999991394482807, iteration: 143111
loss: 1.0316556692123413,grad_norm: 0.9999992645506197, iteration: 143112
loss: 1.028487205505371,grad_norm: 0.7509016594416567, iteration: 143113
loss: 0.999913215637207,grad_norm: 0.8269331699016963, iteration: 143114
loss: 1.0051705837249756,grad_norm: 0.9254802557315268, iteration: 143115
loss: 1.1053224802017212,grad_norm: 1.0000000441115, iteration: 143116
loss: 1.0197334289550781,grad_norm: 0.917053975896553, iteration: 143117
loss: 0.999052107334137,grad_norm: 0.8905371065448484, iteration: 143118
loss: 1.0719000101089478,grad_norm: 0.9999991151203282, iteration: 143119
loss: 1.130095362663269,grad_norm: 0.9999999320349913, iteration: 143120
loss: 1.0414713621139526,grad_norm: 0.9999990800120958, iteration: 143121
loss: 1.001115322113037,grad_norm: 0.9999989864857962, iteration: 143122
loss: 1.1870689392089844,grad_norm: 0.999999841908464, iteration: 143123
loss: 0.984194815158844,grad_norm: 0.7931098990231502, iteration: 143124
loss: 1.0605703592300415,grad_norm: 0.9873937192805431, iteration: 143125
loss: 0.999237596988678,grad_norm: 0.9999992466222128, iteration: 143126
loss: 1.0099900960922241,grad_norm: 0.9459464205001291, iteration: 143127
loss: 1.0477285385131836,grad_norm: 0.999999122728703, iteration: 143128
loss: 1.006784439086914,grad_norm: 0.9208992514062061, iteration: 143129
loss: 1.0055129528045654,grad_norm: 0.9161861051670027, iteration: 143130
loss: 0.9849973917007446,grad_norm: 0.999999264951179, iteration: 143131
loss: 1.0611611604690552,grad_norm: 0.7159057199221746, iteration: 143132
loss: 0.9547756910324097,grad_norm: 0.9745443024819925, iteration: 143133
loss: 1.1215177774429321,grad_norm: 0.9999996365734696, iteration: 143134
loss: 1.1346946954727173,grad_norm: 0.9999993450014677, iteration: 143135
loss: 1.0209439992904663,grad_norm: 0.9999991036295276, iteration: 143136
loss: 0.996627926826477,grad_norm: 0.9377295280207029, iteration: 143137
loss: 1.0021116733551025,grad_norm: 0.9211857405576384, iteration: 143138
loss: 0.9833037853240967,grad_norm: 0.8569761580680787, iteration: 143139
loss: 0.9806873798370361,grad_norm: 0.8160743869865734, iteration: 143140
loss: 0.969632089138031,grad_norm: 0.9999992090074062, iteration: 143141
loss: 1.011297345161438,grad_norm: 0.9999989706918211, iteration: 143142
loss: 1.0197926759719849,grad_norm: 0.8497075287828049, iteration: 143143
loss: 0.981046199798584,grad_norm: 0.8986857625197888, iteration: 143144
loss: 0.9875622391700745,grad_norm: 0.9294019150955241, iteration: 143145
loss: 1.1437246799468994,grad_norm: 0.9999993467512631, iteration: 143146
loss: 0.9591115117073059,grad_norm: 0.9927358089532684, iteration: 143147
loss: 1.0817574262619019,grad_norm: 0.9999994798018973, iteration: 143148
loss: 0.9989193677902222,grad_norm: 0.9983383461416874, iteration: 143149
loss: 1.0233887434005737,grad_norm: 0.9220795448848144, iteration: 143150
loss: 1.0630626678466797,grad_norm: 0.9999994025121269, iteration: 143151
loss: 1.0058767795562744,grad_norm: 0.8805237206937767, iteration: 143152
loss: 1.0526854991912842,grad_norm: 0.9999992838530104, iteration: 143153
loss: 1.0863405466079712,grad_norm: 0.9999996414451812, iteration: 143154
loss: 0.9820986390113831,grad_norm: 0.9459535896761196, iteration: 143155
loss: 1.0069711208343506,grad_norm: 0.8025211834875888, iteration: 143156
loss: 1.0285701751708984,grad_norm: 0.9999998693277725, iteration: 143157
loss: 1.0993438959121704,grad_norm: 0.9999990775827556, iteration: 143158
loss: 1.1827155351638794,grad_norm: 0.9999997769407022, iteration: 143159
loss: 0.9837718605995178,grad_norm: 0.7300308367668501, iteration: 143160
loss: 1.0185245275497437,grad_norm: 0.8325827031963113, iteration: 143161
loss: 0.9827606678009033,grad_norm: 0.9999990814017826, iteration: 143162
loss: 1.0067521333694458,grad_norm: 0.8956826772988409, iteration: 143163
loss: 0.9719847440719604,grad_norm: 0.8871938082566511, iteration: 143164
loss: 1.0069607496261597,grad_norm: 0.9084024119987265, iteration: 143165
loss: 0.9897940158843994,grad_norm: 0.9999992106554547, iteration: 143166
loss: 1.0557631254196167,grad_norm: 0.9539253950245016, iteration: 143167
loss: 1.025482177734375,grad_norm: 0.9603338306153205, iteration: 143168
loss: 1.0344181060791016,grad_norm: 0.9591798207516586, iteration: 143169
loss: 0.9957622289657593,grad_norm: 0.8627505909637085, iteration: 143170
loss: 1.008018136024475,grad_norm: 0.8165999971864529, iteration: 143171
loss: 1.035132884979248,grad_norm: 0.8180636471619963, iteration: 143172
loss: 1.0437952280044556,grad_norm: 0.921763227551739, iteration: 143173
loss: 1.0700962543487549,grad_norm: 0.9070342730092366, iteration: 143174
loss: 1.0007389783859253,grad_norm: 0.7388119026630393, iteration: 143175
loss: 1.0199030637741089,grad_norm: 0.9999995884946036, iteration: 143176
loss: 0.9876139760017395,grad_norm: 0.9999991692410454, iteration: 143177
loss: 1.0113290548324585,grad_norm: 0.9999991777687288, iteration: 143178
loss: 1.03262460231781,grad_norm: 0.9633555978820088, iteration: 143179
loss: 1.055138349533081,grad_norm: 0.9999991905420604, iteration: 143180
loss: 1.0220420360565186,grad_norm: 0.9529398753830274, iteration: 143181
loss: 1.0731794834136963,grad_norm: 0.9999998770858475, iteration: 143182
loss: 1.0197991132736206,grad_norm: 0.858773869108485, iteration: 143183
loss: 1.0113314390182495,grad_norm: 0.9999993693710757, iteration: 143184
loss: 1.0748945474624634,grad_norm: 0.999999123307904, iteration: 143185
loss: 1.0573058128356934,grad_norm: 0.9999996112986831, iteration: 143186
loss: 1.0543804168701172,grad_norm: 0.9999999429313228, iteration: 143187
loss: 1.0480765104293823,grad_norm: 0.9999991048249229, iteration: 143188
loss: 0.9934841990470886,grad_norm: 0.7859150990595065, iteration: 143189
loss: 1.0675103664398193,grad_norm: 0.9999990576696577, iteration: 143190
loss: 0.981053352355957,grad_norm: 0.8746403997303391, iteration: 143191
loss: 0.992592453956604,grad_norm: 0.9999993732903054, iteration: 143192
loss: 0.9948010444641113,grad_norm: 0.8142224189101857, iteration: 143193
loss: 0.9938727021217346,grad_norm: 0.9641936073619389, iteration: 143194
loss: 1.0326536893844604,grad_norm: 0.859612027248472, iteration: 143195
loss: 1.0215098857879639,grad_norm: 0.9520759891235692, iteration: 143196
loss: 1.0700924396514893,grad_norm: 0.99999999304104, iteration: 143197
loss: 1.008017659187317,grad_norm: 0.8454905533635121, iteration: 143198
loss: 1.016477346420288,grad_norm: 0.9999995102739987, iteration: 143199
loss: 1.0089997053146362,grad_norm: 0.8044759594584138, iteration: 143200
loss: 0.9972156286239624,grad_norm: 0.99999986029553, iteration: 143201
loss: 0.9988170266151428,grad_norm: 0.9427936455181084, iteration: 143202
loss: 1.0365946292877197,grad_norm: 0.7683553468112855, iteration: 143203
loss: 0.9976547956466675,grad_norm: 0.979416121032093, iteration: 143204
loss: 1.067457914352417,grad_norm: 0.9999997130095803, iteration: 143205
loss: 1.0990451574325562,grad_norm: 0.9999997344514583, iteration: 143206
loss: 0.9736786484718323,grad_norm: 0.8221971205888843, iteration: 143207
loss: 1.0538493394851685,grad_norm: 0.9999999115540565, iteration: 143208
loss: 0.9835137128829956,grad_norm: 0.8474080117391456, iteration: 143209
loss: 1.0039550065994263,grad_norm: 0.9076403194698246, iteration: 143210
loss: 1.01655113697052,grad_norm: 0.959364712301183, iteration: 143211
loss: 1.0128748416900635,grad_norm: 0.9999992726876409, iteration: 143212
loss: 1.017828106880188,grad_norm: 0.9999996514296544, iteration: 143213
loss: 1.0360732078552246,grad_norm: 0.7838862635026931, iteration: 143214
loss: 1.0004918575286865,grad_norm: 0.7533395290835783, iteration: 143215
loss: 1.0296459197998047,grad_norm: 0.9306578923023675, iteration: 143216
loss: 1.0226047039031982,grad_norm: 0.9204497690446674, iteration: 143217
loss: 1.0017050504684448,grad_norm: 0.8651029367365897, iteration: 143218
loss: 1.1588383913040161,grad_norm: 0.9999994532039875, iteration: 143219
loss: 1.0349963903427124,grad_norm: 0.9999994484475367, iteration: 143220
loss: 0.9907389283180237,grad_norm: 0.9766328115464309, iteration: 143221
loss: 0.9889941215515137,grad_norm: 0.6835361703311403, iteration: 143222
loss: 0.9996767044067383,grad_norm: 0.8783732832996325, iteration: 143223
loss: 1.2596322298049927,grad_norm: 0.9999997818915444, iteration: 143224
loss: 0.9949999451637268,grad_norm: 0.8931637033766153, iteration: 143225
loss: 0.9913513660430908,grad_norm: 0.9115335706378661, iteration: 143226
loss: 0.9715800285339355,grad_norm: 0.9250735796309235, iteration: 143227
loss: 0.9988045692443848,grad_norm: 0.9977322778375368, iteration: 143228
loss: 1.0276755094528198,grad_norm: 0.9999994612266265, iteration: 143229
loss: 1.031557321548462,grad_norm: 0.8558765080001538, iteration: 143230
loss: 1.0016543865203857,grad_norm: 0.9999992820096135, iteration: 143231
loss: 1.0945560932159424,grad_norm: 0.999999219103445, iteration: 143232
loss: 1.0274436473846436,grad_norm: 0.9999994298350552, iteration: 143233
loss: 0.9764062762260437,grad_norm: 0.7271760397865125, iteration: 143234
loss: 1.0461816787719727,grad_norm: 0.9999995234938247, iteration: 143235
loss: 1.013511300086975,grad_norm: 0.9999995248624686, iteration: 143236
loss: 0.9867377877235413,grad_norm: 0.9999999743892863, iteration: 143237
loss: 1.0063725709915161,grad_norm: 0.999999209821592, iteration: 143238
loss: 1.0357046127319336,grad_norm: 0.9065332394343848, iteration: 143239
loss: 1.1679078340530396,grad_norm: 0.9999997354687535, iteration: 143240
loss: 0.9886363744735718,grad_norm: 0.9957080631173066, iteration: 143241
loss: 0.9801050424575806,grad_norm: 0.9999993832491086, iteration: 143242
loss: 0.9774370789527893,grad_norm: 0.9999990666034656, iteration: 143243
loss: 1.0319924354553223,grad_norm: 0.9999999150792325, iteration: 143244
loss: 1.051309585571289,grad_norm: 0.8205037982452178, iteration: 143245
loss: 1.0393824577331543,grad_norm: 0.9999994779791795, iteration: 143246
loss: 1.0474615097045898,grad_norm: 0.9999992479849407, iteration: 143247
loss: 0.9850471019744873,grad_norm: 0.76764475497439, iteration: 143248
loss: 1.025179386138916,grad_norm: 0.999999144219618, iteration: 143249
loss: 0.994875967502594,grad_norm: 0.9696081798362395, iteration: 143250
loss: 0.9490551948547363,grad_norm: 0.9999990437925594, iteration: 143251
loss: 0.98106449842453,grad_norm: 0.7468921321723138, iteration: 143252
loss: 1.0177010297775269,grad_norm: 0.9999992718839547, iteration: 143253
loss: 0.9922184348106384,grad_norm: 0.8442371171034988, iteration: 143254
loss: 0.9966371059417725,grad_norm: 0.9999990814203344, iteration: 143255
loss: 1.0008231401443481,grad_norm: 0.8669479553366755, iteration: 143256
loss: 1.0177037715911865,grad_norm: 0.8404380463125728, iteration: 143257
loss: 1.0411244630813599,grad_norm: 0.9842416617309709, iteration: 143258
loss: 1.0600191354751587,grad_norm: 0.9105693955472962, iteration: 143259
loss: 0.9926313757896423,grad_norm: 0.7740530894917113, iteration: 143260
loss: 1.0641752481460571,grad_norm: 0.9999990937931484, iteration: 143261
loss: 0.987159013748169,grad_norm: 0.9158965396892617, iteration: 143262
loss: 0.9825999736785889,grad_norm: 0.9844044186752342, iteration: 143263
loss: 1.0545387268066406,grad_norm: 0.9999999024029798, iteration: 143264
loss: 1.0155439376831055,grad_norm: 0.9999993490671351, iteration: 143265
loss: 1.018879771232605,grad_norm: 0.9711630990888075, iteration: 143266
loss: 1.0089777708053589,grad_norm: 0.8581953770846987, iteration: 143267
loss: 1.0248231887817383,grad_norm: 0.9999991072776145, iteration: 143268
loss: 0.9888118505477905,grad_norm: 0.9835367753585355, iteration: 143269
loss: 1.0357136726379395,grad_norm: 0.938957256105443, iteration: 143270
loss: 0.9992501735687256,grad_norm: 0.9999993863116263, iteration: 143271
loss: 1.0032116174697876,grad_norm: 0.8852793485172694, iteration: 143272
loss: 1.0406640768051147,grad_norm: 0.999999683522217, iteration: 143273
loss: 1.097601056098938,grad_norm: 0.9999994483966089, iteration: 143274
loss: 1.067857265472412,grad_norm: 0.9999999317149739, iteration: 143275
loss: 1.0215439796447754,grad_norm: 0.7383421315936993, iteration: 143276
loss: 0.974542498588562,grad_norm: 0.9999999731651067, iteration: 143277
loss: 0.976493775844574,grad_norm: 0.8576040338428974, iteration: 143278
loss: 1.1323038339614868,grad_norm: 0.999999297167914, iteration: 143279
loss: 0.996465802192688,grad_norm: 0.923492482987068, iteration: 143280
loss: 1.0047873258590698,grad_norm: 0.7870622416152131, iteration: 143281
loss: 1.0277801752090454,grad_norm: 0.8428113496630261, iteration: 143282
loss: 1.0594931840896606,grad_norm: 0.9999991620138768, iteration: 143283
loss: 1.0121161937713623,grad_norm: 0.9999991308333401, iteration: 143284
loss: 1.0250980854034424,grad_norm: 0.9999993790609039, iteration: 143285
loss: 1.0478254556655884,grad_norm: 0.9999992937942468, iteration: 143286
loss: 1.0165200233459473,grad_norm: 0.8939132986377756, iteration: 143287
loss: 1.0130813121795654,grad_norm: 0.9999992584142462, iteration: 143288
loss: 1.0180402994155884,grad_norm: 0.9999991850495769, iteration: 143289
loss: 1.0403636693954468,grad_norm: 0.8718364407828684, iteration: 143290
loss: 0.9731351733207703,grad_norm: 0.894011489838642, iteration: 143291
loss: 1.0377600193023682,grad_norm: 0.800382089652289, iteration: 143292
loss: 1.0093263387680054,grad_norm: 0.9999990627035851, iteration: 143293
loss: 1.0052577257156372,grad_norm: 0.7886558634207567, iteration: 143294
loss: 1.0191181898117065,grad_norm: 0.8663844588882788, iteration: 143295
loss: 1.03493070602417,grad_norm: 0.9999991332822761, iteration: 143296
loss: 0.9878114461898804,grad_norm: 0.9999997869175177, iteration: 143297
loss: 0.9886959791183472,grad_norm: 0.9617531245082448, iteration: 143298
loss: 1.0080312490463257,grad_norm: 0.7257961159663243, iteration: 143299
loss: 1.0318232774734497,grad_norm: 0.9999990982267439, iteration: 143300
loss: 1.0186673402786255,grad_norm: 0.8745960763930956, iteration: 143301
loss: 0.9862020015716553,grad_norm: 0.8202414183041111, iteration: 143302
loss: 0.9431809782981873,grad_norm: 0.862653028887297, iteration: 143303
loss: 0.9949676394462585,grad_norm: 0.9205175006428556, iteration: 143304
loss: 1.0629098415374756,grad_norm: 0.9999992539216433, iteration: 143305
loss: 0.9960676431655884,grad_norm: 0.999999203030551, iteration: 143306
loss: 1.0196936130523682,grad_norm: 0.8771686853995785, iteration: 143307
loss: 1.0342000722885132,grad_norm: 0.8245584304479031, iteration: 143308
loss: 1.0143544673919678,grad_norm: 0.8886666832658007, iteration: 143309
loss: 0.9926607012748718,grad_norm: 0.8872818669075794, iteration: 143310
loss: 1.0252249240875244,grad_norm: 0.7675848471318204, iteration: 143311
loss: 0.9823104739189148,grad_norm: 0.8460086068067884, iteration: 143312
loss: 1.0773009061813354,grad_norm: 0.9932956308795542, iteration: 143313
loss: 0.9964356422424316,grad_norm: 0.9571848563301592, iteration: 143314
loss: 1.0109964609146118,grad_norm: 0.6924463766601598, iteration: 143315
loss: 1.0405429601669312,grad_norm: 0.8561125460081355, iteration: 143316
loss: 1.0048437118530273,grad_norm: 0.8149601575419705, iteration: 143317
loss: 0.990612268447876,grad_norm: 0.9048153382419044, iteration: 143318
loss: 0.9983901381492615,grad_norm: 0.9999991213795026, iteration: 143319
loss: 1.0250414609909058,grad_norm: 0.9999991419890578, iteration: 143320
loss: 1.0878782272338867,grad_norm: 0.9246024815539089, iteration: 143321
loss: 0.9866262078285217,grad_norm: 0.8653046932271637, iteration: 143322
loss: 1.0427614450454712,grad_norm: 0.999999866750369, iteration: 143323
loss: 0.9714630246162415,grad_norm: 0.8919391344466028, iteration: 143324
loss: 1.028483510017395,grad_norm: 0.927687780389376, iteration: 143325
loss: 0.9509133696556091,grad_norm: 0.9463919450200844, iteration: 143326
loss: 0.9676678776741028,grad_norm: 0.757458880811564, iteration: 143327
loss: 1.0681549310684204,grad_norm: 0.9999993292799496, iteration: 143328
loss: 1.010509967803955,grad_norm: 0.9999991046727346, iteration: 143329
loss: 1.0322258472442627,grad_norm: 0.9999990106575614, iteration: 143330
loss: 0.9951653480529785,grad_norm: 0.8369618698775264, iteration: 143331
loss: 1.0447827577590942,grad_norm: 0.9999993697754567, iteration: 143332
loss: 1.0013879537582397,grad_norm: 0.9144818973635875, iteration: 143333
loss: 0.982242226600647,grad_norm: 0.9999991285465583, iteration: 143334
loss: 1.0051991939544678,grad_norm: 0.9110360403088004, iteration: 143335
loss: 0.9810077548027039,grad_norm: 0.7578778708651565, iteration: 143336
loss: 1.0591961145401,grad_norm: 0.9999993105135421, iteration: 143337
loss: 1.0127065181732178,grad_norm: 0.8303653351844141, iteration: 143338
loss: 1.0157991647720337,grad_norm: 0.7823317562635282, iteration: 143339
loss: 1.0069632530212402,grad_norm: 0.9999993102888592, iteration: 143340
loss: 0.9945873618125916,grad_norm: 0.9999990599691234, iteration: 143341
loss: 0.9897393584251404,grad_norm: 0.9999991418628097, iteration: 143342
loss: 1.002580165863037,grad_norm: 0.9999997249306314, iteration: 143343
loss: 1.0786375999450684,grad_norm: 0.9999994958372085, iteration: 143344
loss: 0.9913986921310425,grad_norm: 0.8344828083083329, iteration: 143345
loss: 1.0210320949554443,grad_norm: 0.8172790638487569, iteration: 143346
loss: 1.006170392036438,grad_norm: 0.9999994991752704, iteration: 143347
loss: 0.9842461347579956,grad_norm: 0.9289397975839818, iteration: 143348
loss: 0.9850178360939026,grad_norm: 0.7618559745911183, iteration: 143349
loss: 1.0351150035858154,grad_norm: 0.999999037043847, iteration: 143350
loss: 1.0008448362350464,grad_norm: 0.9915919104661319, iteration: 143351
loss: 1.0139673948287964,grad_norm: 0.8470409742710321, iteration: 143352
loss: 1.0296157598495483,grad_norm: 0.9999991147058304, iteration: 143353
loss: 1.0887737274169922,grad_norm: 0.9999998898669924, iteration: 143354
loss: 1.35276198387146,grad_norm: 0.9999994968015601, iteration: 143355
loss: 0.9867780804634094,grad_norm: 0.882539848011741, iteration: 143356
loss: 0.9614565968513489,grad_norm: 0.7589561318792424, iteration: 143357
loss: 1.0332216024398804,grad_norm: 0.9999990860624214, iteration: 143358
loss: 1.0457937717437744,grad_norm: 0.9999992772003364, iteration: 143359
loss: 1.0407581329345703,grad_norm: 0.9999993709443287, iteration: 143360
loss: 1.022230625152588,grad_norm: 0.8160009538717448, iteration: 143361
loss: 0.9943053722381592,grad_norm: 0.9999992359209389, iteration: 143362
loss: 1.0796986818313599,grad_norm: 0.999999827956226, iteration: 143363
loss: 1.0910463333129883,grad_norm: 0.9999997105199779, iteration: 143364
loss: 1.0676579475402832,grad_norm: 0.9999994123972231, iteration: 143365
loss: 1.0124685764312744,grad_norm: 0.9649890330825219, iteration: 143366
loss: 1.02176833152771,grad_norm: 0.9900075707459527, iteration: 143367
loss: 0.9909592866897583,grad_norm: 0.9999994222302352, iteration: 143368
loss: 1.0404661893844604,grad_norm: 0.9999997226865172, iteration: 143369
loss: 0.958747386932373,grad_norm: 0.9110295707988612, iteration: 143370
loss: 1.078832983970642,grad_norm: 0.9282812522454497, iteration: 143371
loss: 0.9992194175720215,grad_norm: 0.8624168064993988, iteration: 143372
loss: 0.9591034054756165,grad_norm: 0.9999990889728362, iteration: 143373
loss: 1.0409305095672607,grad_norm: 0.999999217675191, iteration: 143374
loss: 0.9820638298988342,grad_norm: 0.9999991540433759, iteration: 143375
loss: 1.0105139017105103,grad_norm: 0.8911265936053674, iteration: 143376
loss: 1.0246460437774658,grad_norm: 0.9999990367006418, iteration: 143377
loss: 1.0288604497909546,grad_norm: 0.999999902843045, iteration: 143378
loss: 1.0985517501831055,grad_norm: 0.9999992248445291, iteration: 143379
loss: 1.1662647724151611,grad_norm: 0.9999993872708722, iteration: 143380
loss: 1.0014241933822632,grad_norm: 0.7842878018247441, iteration: 143381
loss: 0.998769998550415,grad_norm: 0.9462625039164356, iteration: 143382
loss: 0.9747633337974548,grad_norm: 0.8438643027366373, iteration: 143383
loss: 1.0624940395355225,grad_norm: 0.8971972676370196, iteration: 143384
loss: 1.071250319480896,grad_norm: 0.9999993420169484, iteration: 143385
loss: 0.9904189705848694,grad_norm: 0.9080211196190715, iteration: 143386
loss: 0.9901474714279175,grad_norm: 0.8687042880138687, iteration: 143387
loss: 1.0008569955825806,grad_norm: 0.878319367746277, iteration: 143388
loss: 1.1447396278381348,grad_norm: 0.9999996999947612, iteration: 143389
loss: 1.0409880876541138,grad_norm: 0.9999995988513304, iteration: 143390
loss: 1.0516245365142822,grad_norm: 0.9596446452677071, iteration: 143391
loss: 1.0195950269699097,grad_norm: 0.9999997659291187, iteration: 143392
loss: 1.0526542663574219,grad_norm: 0.999999226148376, iteration: 143393
loss: 0.9869953393936157,grad_norm: 0.83806133086239, iteration: 143394
loss: 1.0486958026885986,grad_norm: 0.9999990840305506, iteration: 143395
loss: 1.0788805484771729,grad_norm: 0.9999992579394179, iteration: 143396
loss: 0.9865062236785889,grad_norm: 0.8046562021486938, iteration: 143397
loss: 1.0490992069244385,grad_norm: 0.9322383582509761, iteration: 143398
loss: 1.0816320180892944,grad_norm: 0.999999236673686, iteration: 143399
loss: 1.0011602640151978,grad_norm: 0.9999997256548964, iteration: 143400
loss: 1.0031291246414185,grad_norm: 0.9375073048019987, iteration: 143401
loss: 1.0209629535675049,grad_norm: 0.9999991978669406, iteration: 143402
loss: 1.0154463052749634,grad_norm: 0.8416494707261688, iteration: 143403
loss: 1.0355141162872314,grad_norm: 0.9999995428616356, iteration: 143404
loss: 1.0680655241012573,grad_norm: 0.9999993739530768, iteration: 143405
loss: 0.9609613418579102,grad_norm: 0.9999989651244416, iteration: 143406
loss: 1.0349628925323486,grad_norm: 0.7878850659686973, iteration: 143407
loss: 1.0127989053726196,grad_norm: 0.9999992417245667, iteration: 143408
loss: 0.9711272716522217,grad_norm: 0.9999991377573556, iteration: 143409
loss: 1.0100302696228027,grad_norm: 0.9388200762094515, iteration: 143410
loss: 1.1346882581710815,grad_norm: 0.9999999361411809, iteration: 143411
loss: 1.0584429502487183,grad_norm: 0.832928925760251, iteration: 143412
loss: 0.9994133114814758,grad_norm: 0.9904618487720371, iteration: 143413
loss: 1.0198583602905273,grad_norm: 0.9999993065940825, iteration: 143414
loss: 1.1157108545303345,grad_norm: 0.8716328882378659, iteration: 143415
loss: 1.0633434057235718,grad_norm: 1.0000001209403226, iteration: 143416
loss: 1.0109690427780151,grad_norm: 0.9366453004526276, iteration: 143417
loss: 0.9871615767478943,grad_norm: 0.8661749251511375, iteration: 143418
loss: 0.9908175468444824,grad_norm: 0.9611743423893369, iteration: 143419
loss: 1.0093824863433838,grad_norm: 0.762959788551521, iteration: 143420
loss: 1.0506848096847534,grad_norm: 0.9999991882580673, iteration: 143421
loss: 1.047768473625183,grad_norm: 0.9999994819680867, iteration: 143422
loss: 1.0255603790283203,grad_norm: 0.9999991745391039, iteration: 143423
loss: 1.0183031558990479,grad_norm: 0.8339377476332529, iteration: 143424
loss: 1.0631383657455444,grad_norm: 0.9999993258306487, iteration: 143425
loss: 1.0117183923721313,grad_norm: 0.94949265492702, iteration: 143426
loss: 1.036356806755066,grad_norm: 0.9999991305279754, iteration: 143427
loss: 0.9952152371406555,grad_norm: 0.9999995251355656, iteration: 143428
loss: 1.0250145196914673,grad_norm: 0.9999998003411238, iteration: 143429
loss: 1.129988431930542,grad_norm: 0.9999994075780708, iteration: 143430
loss: 1.013960599899292,grad_norm: 0.9057837260867982, iteration: 143431
loss: 1.0633059740066528,grad_norm: 0.9501363656265076, iteration: 143432
loss: 1.0190008878707886,grad_norm: 0.9999997864309317, iteration: 143433
loss: 1.0938881635665894,grad_norm: 0.9949222766649354, iteration: 143434
loss: 1.0067535638809204,grad_norm: 0.9760847814932898, iteration: 143435
loss: 0.9962656497955322,grad_norm: 0.8458931460255134, iteration: 143436
loss: 0.9767672419548035,grad_norm: 0.9999998833478654, iteration: 143437
loss: 1.0761507749557495,grad_norm: 0.9999991001692843, iteration: 143438
loss: 1.101946473121643,grad_norm: 0.9999997328495834, iteration: 143439
loss: 1.055747628211975,grad_norm: 0.999999510626345, iteration: 143440
loss: 1.0848619937896729,grad_norm: 0.9563723937534315, iteration: 143441
loss: 1.0738362073898315,grad_norm: 0.9506000355238956, iteration: 143442
loss: 1.0543533563613892,grad_norm: 0.9999991258131846, iteration: 143443
loss: 1.0340256690979004,grad_norm: 0.8695465139197058, iteration: 143444
loss: 1.1736540794372559,grad_norm: 0.9999996044487984, iteration: 143445
loss: 1.0346752405166626,grad_norm: 0.9999991564982624, iteration: 143446
loss: 1.0370439291000366,grad_norm: 0.999999716464691, iteration: 143447
loss: 1.1803077459335327,grad_norm: 0.9999994253583337, iteration: 143448
loss: 1.0037572383880615,grad_norm: 0.999999356595546, iteration: 143449
loss: 1.1864553689956665,grad_norm: 0.9999999306825125, iteration: 143450
loss: 1.074454426765442,grad_norm: 0.9999992067352057, iteration: 143451
loss: 1.077181339263916,grad_norm: 0.9999992183553118, iteration: 143452
loss: 1.044114351272583,grad_norm: 0.7942260417540278, iteration: 143453
loss: 1.0268665552139282,grad_norm: 0.9671676563243315, iteration: 143454
loss: 1.012596607208252,grad_norm: 0.9999999120807217, iteration: 143455
loss: 1.001546859741211,grad_norm: 0.8757582450175644, iteration: 143456
loss: 0.9961719512939453,grad_norm: 0.9999991355413702, iteration: 143457
loss: 1.0554274320602417,grad_norm: 0.9761746208549541, iteration: 143458
loss: 0.9862211346626282,grad_norm: 0.9999992690009432, iteration: 143459
loss: 0.9792786836624146,grad_norm: 0.9999989603702641, iteration: 143460
loss: 0.994146466255188,grad_norm: 0.8833509298357092, iteration: 143461
loss: 1.035695195198059,grad_norm: 0.9035767316295351, iteration: 143462
loss: 1.0587126016616821,grad_norm: 0.9761618302948721, iteration: 143463
loss: 1.0074059963226318,grad_norm: 0.9999991598303648, iteration: 143464
loss: 1.0626964569091797,grad_norm: 0.9999997752630304, iteration: 143465
loss: 0.9776862859725952,grad_norm: 0.9698941182473253, iteration: 143466
loss: 1.070383071899414,grad_norm: 0.853624457488484, iteration: 143467
loss: 1.0085915327072144,grad_norm: 0.8265309355736805, iteration: 143468
loss: 1.0131213665008545,grad_norm: 0.9999991912611677, iteration: 143469
loss: 1.1640664339065552,grad_norm: 0.9999991994832369, iteration: 143470
loss: 1.0498871803283691,grad_norm: 0.9999993217450927, iteration: 143471
loss: 0.9670412540435791,grad_norm: 0.8082892732828, iteration: 143472
loss: 1.0106909275054932,grad_norm: 0.9999991771614202, iteration: 143473
loss: 1.0347636938095093,grad_norm: 0.9999992790869922, iteration: 143474
loss: 1.030005693435669,grad_norm: 0.9251331450325553, iteration: 143475
loss: 1.0729337930679321,grad_norm: 0.9999997392173712, iteration: 143476
loss: 1.0237070322036743,grad_norm: 0.9999994322467854, iteration: 143477
loss: 1.134480357170105,grad_norm: 0.9999993039397898, iteration: 143478
loss: 1.058842420578003,grad_norm: 0.9999996383331942, iteration: 143479
loss: 1.152216911315918,grad_norm: 0.999999558740815, iteration: 143480
loss: 1.0063964128494263,grad_norm: 0.999999468817498, iteration: 143481
loss: 1.0728954076766968,grad_norm: 0.999999581821279, iteration: 143482
loss: 1.042995810508728,grad_norm: 0.9999992239126785, iteration: 143483
loss: 1.0122382640838623,grad_norm: 0.967756735205126, iteration: 143484
loss: 1.0310781002044678,grad_norm: 0.9999996291804022, iteration: 143485
loss: 1.0570714473724365,grad_norm: 0.9257255631676102, iteration: 143486
loss: 1.0741609334945679,grad_norm: 0.8731909385608507, iteration: 143487
loss: 1.0487644672393799,grad_norm: 0.9999997413966708, iteration: 143488
loss: 1.0508224964141846,grad_norm: 0.9977498972009806, iteration: 143489
loss: 0.968849778175354,grad_norm: 0.9999991270127218, iteration: 143490
loss: 1.0685441493988037,grad_norm: 0.9999990260838849, iteration: 143491
loss: 0.9887001514434814,grad_norm: 0.9999996894355583, iteration: 143492
loss: 1.060332179069519,grad_norm: 0.9999994761504499, iteration: 143493
loss: 1.1831258535385132,grad_norm: 0.999999591662511, iteration: 143494
loss: 1.054589033126831,grad_norm: 0.9999992165957254, iteration: 143495
loss: 1.0594528913497925,grad_norm: 0.9999994114431332, iteration: 143496
loss: 1.0231444835662842,grad_norm: 0.999998965808124, iteration: 143497
loss: 1.0541471242904663,grad_norm: 0.8719073591066485, iteration: 143498
loss: 1.0368890762329102,grad_norm: 0.9999990199300876, iteration: 143499
loss: 0.9869127869606018,grad_norm: 0.9999991542427574, iteration: 143500
loss: 1.016655683517456,grad_norm: 0.9999993125138827, iteration: 143501
loss: 1.0425571203231812,grad_norm: 0.9999993752345352, iteration: 143502
loss: 1.125184416770935,grad_norm: 0.9999990917801341, iteration: 143503
loss: 1.1604453325271606,grad_norm: 0.8965263527290772, iteration: 143504
loss: 0.9708739519119263,grad_norm: 0.9999993887868294, iteration: 143505
loss: 1.0063925981521606,grad_norm: 0.999999880336233, iteration: 143506
loss: 1.0902363061904907,grad_norm: 0.9999990657033968, iteration: 143507
loss: 1.0580261945724487,grad_norm: 0.999999173646536, iteration: 143508
loss: 1.0282154083251953,grad_norm: 0.9030804911432644, iteration: 143509
loss: 0.972930908203125,grad_norm: 0.9368832913594841, iteration: 143510
loss: 1.0036696195602417,grad_norm: 0.8897296384716331, iteration: 143511
loss: 1.0222744941711426,grad_norm: 0.9014784628592019, iteration: 143512
loss: 1.0976557731628418,grad_norm: 0.9999995898157018, iteration: 143513
loss: 1.0540343523025513,grad_norm: 0.855630929848427, iteration: 143514
loss: 1.0146170854568481,grad_norm: 0.9999992844905398, iteration: 143515
loss: 1.0374573469161987,grad_norm: 0.9411507213421904, iteration: 143516
loss: 1.0062029361724854,grad_norm: 0.8329314115195932, iteration: 143517
loss: 0.9615035057067871,grad_norm: 0.8671318577006932, iteration: 143518
loss: 0.990021288394928,grad_norm: 0.8402814356817234, iteration: 143519
loss: 0.9931845664978027,grad_norm: 0.7983815822639775, iteration: 143520
loss: 1.031749963760376,grad_norm: 0.9999991732658657, iteration: 143521
loss: 1.0132843255996704,grad_norm: 0.7953123380800451, iteration: 143522
loss: 0.9858698844909668,grad_norm: 0.9061464414024212, iteration: 143523
loss: 1.026159405708313,grad_norm: 0.7342861114375915, iteration: 143524
loss: 0.9961877465248108,grad_norm: 0.999999393114117, iteration: 143525
loss: 1.0735684633255005,grad_norm: 0.9999997806413214, iteration: 143526
loss: 0.9758838415145874,grad_norm: 0.8580284035185063, iteration: 143527
loss: 0.9795408844947815,grad_norm: 0.849741547983399, iteration: 143528
loss: 1.0198572874069214,grad_norm: 0.9999990063248004, iteration: 143529
loss: 1.006256103515625,grad_norm: 0.9999992282392741, iteration: 143530
loss: 1.0167959928512573,grad_norm: 0.8406550635905649, iteration: 143531
loss: 1.0242105722427368,grad_norm: 0.9999998035992037, iteration: 143532
loss: 1.0662438869476318,grad_norm: 0.999999357542581, iteration: 143533
loss: 1.0545510053634644,grad_norm: 0.9999997090459772, iteration: 143534
loss: 1.0384759902954102,grad_norm: 0.7909831541092021, iteration: 143535
loss: 1.0061931610107422,grad_norm: 0.798828080277142, iteration: 143536
loss: 1.0011825561523438,grad_norm: 0.9351396231402224, iteration: 143537
loss: 1.0556598901748657,grad_norm: 0.9999996564148997, iteration: 143538
loss: 1.0061321258544922,grad_norm: 0.7921985770492284, iteration: 143539
loss: 1.0265127420425415,grad_norm: 0.9999996679809436, iteration: 143540
loss: 1.0990424156188965,grad_norm: 0.99999956724723, iteration: 143541
loss: 1.0067650079727173,grad_norm: 0.8845802437535437, iteration: 143542
loss: 0.9877784252166748,grad_norm: 0.9999995497372193, iteration: 143543
loss: 1.0243107080459595,grad_norm: 0.763935332733009, iteration: 143544
loss: 1.0072678327560425,grad_norm: 0.8441862309528817, iteration: 143545
loss: 1.008195400238037,grad_norm: 0.9999994432261043, iteration: 143546
loss: 1.1204190254211426,grad_norm: 0.9999990932493111, iteration: 143547
loss: 1.136946201324463,grad_norm: 0.9999999483425145, iteration: 143548
loss: 0.9910081624984741,grad_norm: 0.9999991909868065, iteration: 143549
loss: 1.002168893814087,grad_norm: 0.9999999149269578, iteration: 143550
loss: 1.03736412525177,grad_norm: 0.9490699141180506, iteration: 143551
loss: 0.9788411855697632,grad_norm: 0.8453173526669624, iteration: 143552
loss: 0.9761674404144287,grad_norm: 0.9119225787901355, iteration: 143553
loss: 1.0109632015228271,grad_norm: 0.919573645143492, iteration: 143554
loss: 0.9740903973579407,grad_norm: 0.9258580925541524, iteration: 143555
loss: 1.01365065574646,grad_norm: 0.9999989927047911, iteration: 143556
loss: 1.087941288948059,grad_norm: 0.99999983129254, iteration: 143557
loss: 0.9889647364616394,grad_norm: 0.8719619933446602, iteration: 143558
loss: 1.0208826065063477,grad_norm: 0.9999992686773795, iteration: 143559
loss: 0.9988632202148438,grad_norm: 0.8110983807528788, iteration: 143560
loss: 1.0164729356765747,grad_norm: 0.9999994399280797, iteration: 143561
loss: 1.0288723707199097,grad_norm: 0.8956504229294476, iteration: 143562
loss: 1.0070524215698242,grad_norm: 0.877660548993536, iteration: 143563
loss: 1.1405620574951172,grad_norm: 0.9999996984943582, iteration: 143564
loss: 1.0037896633148193,grad_norm: 0.9999993560576086, iteration: 143565
loss: 0.9959703087806702,grad_norm: 0.9999992286235955, iteration: 143566
loss: 1.0515490770339966,grad_norm: 0.6659115266362144, iteration: 143567
loss: 1.029137372970581,grad_norm: 0.909341781076393, iteration: 143568
loss: 1.018681526184082,grad_norm: 0.8922883729852294, iteration: 143569
loss: 0.9794505834579468,grad_norm: 0.8345493424168219, iteration: 143570
loss: 1.0585987567901611,grad_norm: 0.9999995076931615, iteration: 143571
loss: 1.028849720954895,grad_norm: 0.7825474503772762, iteration: 143572
loss: 1.0225884914398193,grad_norm: 0.8223835880689575, iteration: 143573
loss: 1.0182815790176392,grad_norm: 0.7412626994986017, iteration: 143574
loss: 0.9991015791893005,grad_norm: 0.995511930132071, iteration: 143575
loss: 0.9880969524383545,grad_norm: 0.8037419671734821, iteration: 143576
loss: 0.9780701994895935,grad_norm: 0.9999994284027925, iteration: 143577
loss: 0.982865035533905,grad_norm: 0.897101438322202, iteration: 143578
loss: 1.0292932987213135,grad_norm: 0.9999990380775184, iteration: 143579
loss: 1.0010188817977905,grad_norm: 0.9999993871390443, iteration: 143580
loss: 0.9980395436286926,grad_norm: 0.9126116897709791, iteration: 143581
loss: 1.0348950624465942,grad_norm: 0.9999993755121431, iteration: 143582
loss: 0.998361349105835,grad_norm: 0.8121215234898923, iteration: 143583
loss: 0.9986338019371033,grad_norm: 0.9254440500898795, iteration: 143584
loss: 1.0304242372512817,grad_norm: 0.9236094508601463, iteration: 143585
loss: 1.0972384214401245,grad_norm: 0.9999998624457234, iteration: 143586
loss: 1.036149024963379,grad_norm: 0.826995702323121, iteration: 143587
loss: 1.0315015316009521,grad_norm: 0.9999998391577983, iteration: 143588
loss: 1.0291166305541992,grad_norm: 0.7879061275125898, iteration: 143589
loss: 0.9994583129882812,grad_norm: 0.8943016288033643, iteration: 143590
loss: 1.0137271881103516,grad_norm: 0.9531230128256702, iteration: 143591
loss: 1.0239554643630981,grad_norm: 0.9897457404168779, iteration: 143592
loss: 1.0346040725708008,grad_norm: 0.9767443665037896, iteration: 143593
loss: 1.0048573017120361,grad_norm: 0.9223462635196689, iteration: 143594
loss: 1.0129342079162598,grad_norm: 0.8522441782254986, iteration: 143595
loss: 0.9843091368675232,grad_norm: 0.6728775816200175, iteration: 143596
loss: 1.0242081880569458,grad_norm: 0.9999992795707271, iteration: 143597
loss: 1.0077191591262817,grad_norm: 0.8331132044561317, iteration: 143598
loss: 0.991457998752594,grad_norm: 0.7632898511378932, iteration: 143599
loss: 1.0648248195648193,grad_norm: 0.999999481406896, iteration: 143600
loss: 0.9911232590675354,grad_norm: 0.9999999719947501, iteration: 143601
loss: 1.0403907299041748,grad_norm: 0.9999996080369191, iteration: 143602
loss: 0.9983852505683899,grad_norm: 0.7503463841669743, iteration: 143603
loss: 1.0047030448913574,grad_norm: 0.9999990555390355, iteration: 143604
loss: 1.0128625631332397,grad_norm: 0.8215299590631749, iteration: 143605
loss: 1.0543549060821533,grad_norm: 0.9999994260537854, iteration: 143606
loss: 1.026209831237793,grad_norm: 0.9999997242930438, iteration: 143607
loss: 0.976199209690094,grad_norm: 0.7572997774819112, iteration: 143608
loss: 1.057324767112732,grad_norm: 0.999999768324114, iteration: 143609
loss: 0.9980520606040955,grad_norm: 0.9049303028050465, iteration: 143610
loss: 0.9672709107398987,grad_norm: 0.9999998825787331, iteration: 143611
loss: 1.057645559310913,grad_norm: 0.9999997648918473, iteration: 143612
loss: 0.9836475849151611,grad_norm: 0.8439477437794621, iteration: 143613
loss: 1.0331696271896362,grad_norm: 0.9999998413024788, iteration: 143614
loss: 0.9831759929656982,grad_norm: 0.9123137451786265, iteration: 143615
loss: 1.017622470855713,grad_norm: 0.8130833380660656, iteration: 143616
loss: 0.9901569485664368,grad_norm: 0.7896493976474996, iteration: 143617
loss: 1.013824462890625,grad_norm: 0.9999992130886087, iteration: 143618
loss: 1.0241259336471558,grad_norm: 0.9999991537784114, iteration: 143619
loss: 1.0308282375335693,grad_norm: 0.688215940201694, iteration: 143620
loss: 0.977351188659668,grad_norm: 0.999999046652068, iteration: 143621
loss: 1.0119314193725586,grad_norm: 0.9755567598216194, iteration: 143622
loss: 1.0120168924331665,grad_norm: 0.8194468113310835, iteration: 143623
loss: 1.0180044174194336,grad_norm: 0.9999996576592401, iteration: 143624
loss: 1.0423020124435425,grad_norm: 1.0000000919198255, iteration: 143625
loss: 0.9686813950538635,grad_norm: 0.9999990813634103, iteration: 143626
loss: 1.0168853998184204,grad_norm: 0.8433181361063026, iteration: 143627
loss: 1.011034369468689,grad_norm: 0.8980187173862195, iteration: 143628
loss: 1.0882614850997925,grad_norm: 0.9999991684876229, iteration: 143629
loss: 1.0179548263549805,grad_norm: 0.9999994558631399, iteration: 143630
loss: 1.022734522819519,grad_norm: 0.999999598892087, iteration: 143631
loss: 1.0352751016616821,grad_norm: 0.999999199728009, iteration: 143632
loss: 0.9924921989440918,grad_norm: 0.9999991469004479, iteration: 143633
loss: 0.9667685031890869,grad_norm: 0.8034964280870092, iteration: 143634
loss: 1.0334632396697998,grad_norm: 0.999999149023189, iteration: 143635
loss: 1.0064551830291748,grad_norm: 0.8060624643142147, iteration: 143636
loss: 1.0068556070327759,grad_norm: 0.9999990959667124, iteration: 143637
loss: 0.9528998136520386,grad_norm: 0.7564642529279219, iteration: 143638
loss: 1.0549405813217163,grad_norm: 0.9999997382975375, iteration: 143639
loss: 1.0173321962356567,grad_norm: 0.827618980202305, iteration: 143640
loss: 1.034525752067566,grad_norm: 0.9999999112579425, iteration: 143641
loss: 0.9953490495681763,grad_norm: 0.9999996090518539, iteration: 143642
loss: 0.9827150106430054,grad_norm: 0.9089465943205244, iteration: 143643
loss: 1.0007250308990479,grad_norm: 0.9999996871790504, iteration: 143644
loss: 0.9588150382041931,grad_norm: 0.9876434100694735, iteration: 143645
loss: 1.0232940912246704,grad_norm: 0.999999743784682, iteration: 143646
loss: 1.1371402740478516,grad_norm: 0.9999992160575911, iteration: 143647
loss: 1.0255082845687866,grad_norm: 0.8259908501021329, iteration: 143648
loss: 1.0269044637680054,grad_norm: 0.838624432393187, iteration: 143649
loss: 0.9751048684120178,grad_norm: 0.9100566346090223, iteration: 143650
loss: 0.9963700771331787,grad_norm: 0.9999990965998742, iteration: 143651
loss: 0.9960604310035706,grad_norm: 0.9205022977987978, iteration: 143652
loss: 1.0074641704559326,grad_norm: 0.9549252870475873, iteration: 143653
loss: 1.0218191146850586,grad_norm: 0.8207772282815003, iteration: 143654
loss: 1.0986472368240356,grad_norm: 0.8269379967679781, iteration: 143655
loss: 1.0700610876083374,grad_norm: 0.9999999590922726, iteration: 143656
loss: 1.0404280424118042,grad_norm: 0.9999996759567006, iteration: 143657
loss: 1.0229688882827759,grad_norm: 0.9999990833751641, iteration: 143658
loss: 1.0241559743881226,grad_norm: 0.8102892463413445, iteration: 143659
loss: 1.0335289239883423,grad_norm: 0.9999993143883498, iteration: 143660
loss: 1.0334689617156982,grad_norm: 0.9999992491096111, iteration: 143661
loss: 0.9947527647018433,grad_norm: 0.8441052862535322, iteration: 143662
